summaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/00-INDEX489
-rw-r--r--Documentation/ABI/README87
-rw-r--r--Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads5
-rw-r--r--Documentation/ABI/obsolete/sysfs-block-zram119
-rw-r--r--Documentation/ABI/obsolete/sysfs-bus-usb31
-rw-r--r--Documentation/ABI/obsolete/sysfs-class-rfkill29
-rw-r--r--Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus48
-rw-r--r--Documentation/ABI/obsolete/sysfs-driver-hid-roccat-kovaplus66
-rw-r--r--Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra73
-rw-r--r--Documentation/ABI/removed/devfs12
-rw-r--r--Documentation/ABI/removed/dv139414
-rw-r--r--Documentation/ABI/removed/ip_queue9
-rw-r--r--Documentation/ABI/removed/net_dma8
-rw-r--r--Documentation/ABI/removed/o2cb10
-rw-r--r--Documentation/ABI/removed/raw139415
-rw-r--r--Documentation/ABI/removed/video139416
-rw-r--r--Documentation/ABI/stable/firewire-cdev103
-rw-r--r--Documentation/ABI/stable/o2cb10
-rw-r--r--Documentation/ABI/stable/syscalls10
-rw-r--r--Documentation/ABI/stable/sysfs-acpi-pmprofile22
-rw-r--r--Documentation/ABI/stable/sysfs-bus-firewire133
-rw-r--r--Documentation/ABI/stable/sysfs-bus-usb140
-rw-r--r--Documentation/ABI/stable/sysfs-bus-xen-backend75
-rw-r--r--Documentation/ABI/stable/sysfs-class-backlight56
-rw-r--r--Documentation/ABI/stable/sysfs-class-rfkill67
-rw-r--r--Documentation/ABI/stable/sysfs-class-tpm185
-rw-r--r--Documentation/ABI/stable/sysfs-class-ubi212
-rw-r--r--Documentation/ABI/stable/sysfs-class-udc93
-rw-r--r--Documentation/ABI/stable/sysfs-devices10
-rw-r--r--Documentation/ABI/stable/sysfs-devices-node93
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-cpu25
-rw-r--r--Documentation/ABI/stable/sysfs-devices-system-xen_memory77
-rw-r--r--Documentation/ABI/stable/sysfs-driver-ib_srp189
-rw-r--r--Documentation/ABI/stable/sysfs-driver-qla2xxx8
-rw-r--r--Documentation/ABI/stable/sysfs-driver-usb-usbtmc62
-rw-r--r--Documentation/ABI/stable/sysfs-driver-w1_ds28e0415
-rw-r--r--Documentation/ABI/stable/sysfs-firmware-efi-vars75
-rw-r--r--Documentation/ABI/stable/sysfs-firmware-opal-dump41
-rw-r--r--Documentation/ABI/stable/sysfs-firmware-opal-elog60
-rw-r--r--Documentation/ABI/stable/sysfs-module34
-rw-r--r--Documentation/ABI/stable/sysfs-transport-srp58
-rw-r--r--Documentation/ABI/stable/thermal-notification4
-rw-r--r--Documentation/ABI/stable/vdso27
-rw-r--r--Documentation/ABI/testing/configfs-spear-pcie-gadget31
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget126
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-acm8
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-ecm16
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-eem14
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-ffs9
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-hid11
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-loopback8
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-mass-storage31
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-midi12
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-ncm15
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-obex9
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-phonet8
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-printer9
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-rndis14
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-serial9
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-sourcesink12
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-subset14
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-uac112
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-uac212
-rw-r--r--Documentation/ABI/testing/configfs-usb-gadget-uvc265
-rw-r--r--Documentation/ABI/testing/debugfs-aufs50
-rw-r--r--Documentation/ABI/testing/debugfs-driver-genwqe91
-rw-r--r--Documentation/ABI/testing/debugfs-ec20
-rw-r--r--Documentation/ABI/testing/debugfs-ideapad19
-rw-r--r--Documentation/ABI/testing/debugfs-olpc16
-rw-r--r--Documentation/ABI/testing/debugfs-pfo-nx-crypto45
-rw-r--r--Documentation/ABI/testing/debugfs-pktcdvd19
-rw-r--r--Documentation/ABI/testing/dev-kmsg101
-rw-r--r--Documentation/ABI/testing/evm23
-rw-r--r--Documentation/ABI/testing/ima_policy97
-rw-r--r--Documentation/ABI/testing/procfs-diskstats22
-rw-r--r--Documentation/ABI/testing/pstore47
-rw-r--r--Documentation/ABI/testing/sysfs-ata110
-rw-r--r--Documentation/ABI/testing/sysfs-aufs31
-rw-r--r--Documentation/ABI/testing/sysfs-block230
-rw-r--r--Documentation/ABI/testing/sysfs-block-bcache156
-rw-r--r--Documentation/ABI/testing/sysfs-block-dm47
-rw-r--r--Documentation/ABI/testing/sysfs-block-rssd5
-rw-r--r--Documentation/ABI/testing/sysfs-block-zram168
-rw-r--r--Documentation/ABI/testing/sysfs-bus-acpi58
-rw-r--r--Documentation/ABI/testing/sysfs-bus-amba20
-rw-r--r--Documentation/ABI/testing/sysfs-bus-bcma31
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-etb1024
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x253
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel12
-rw-r--r--Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc8
-rw-r--r--Documentation/ABI/testing/sysfs-bus-css35
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-events94
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-format20
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x745
-rw-r--r--Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci43
-rw-r--r--Documentation/ABI/testing/sysfs-bus-fcoe116
-rw-r--r--Documentation/ABI/testing/sysfs-bus-hsi19
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa948021
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-hm635221
-rw-r--r--Documentation/ABI/testing/sysfs-bus-i2c-devices-lm353315
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio1364
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-accel-bmc1507
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-ad952329
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-frequency-adf435021
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-gyro-bmg1607
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als61
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-mpu605013
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-proximity-as393516
-rw-r--r--Documentation/ABI/testing/sysfs-bus-iio-trigger-sysfs11
-rw-r--r--Documentation/ABI/testing/sysfs-bus-mdio29
-rw-r--r--Documentation/ABI/testing/sysfs-bus-media6
-rw-r--r--Documentation/ABI/testing/sysfs-bus-mei7
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci296
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-devices-cciss80
-rw-r--r--Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd46
-rw-r--r--Documentation/ABI/testing/sysfs-bus-platform20
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rbd98
-rw-r--r--Documentation/ABI/testing/sysfs-bus-rpmsg75
-rw-r--r--Documentation/ABI/testing/sysfs-bus-umc28
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb181
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg43
-rw-r--r--Documentation/ABI/testing/sysfs-bus-usb-lvstest47
-rw-r--r--Documentation/ABI/testing/sysfs-c2port88
-rw-r--r--Documentation/ABI/testing/sysfs-cfq-target-latency8
-rw-r--r--Documentation/ABI/testing/sysfs-class16
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-adp887056
-rw-r--r--Documentation/ABI/testing/sysfs-class-backlight-driver-lm353348
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi55
-rw-r--r--Documentation/ABI/testing/sysfs-class-cxl192
-rw-r--r--Documentation/ABI/testing/sysfs-class-devfreq100
-rw-r--r--Documentation/ABI/testing/sysfs-class-extcon97
-rw-r--r--Documentation/ABI/testing/sysfs-class-iommu17
-rw-r--r--Documentation/ABI/testing/sysfs-class-iommu-amd-iommu14
-rw-r--r--Documentation/ABI/testing/sysfs-class-iommu-intel-iommu32
-rw-r--r--Documentation/ABI/testing/sysfs-class-lcd23
-rw-r--r--Documentation/ABI/testing/sysfs-class-led37
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-driver-lm353365
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-flash80
-rw-r--r--Documentation/ABI/testing/sysfs-class-leds-gt683r16
-rw-r--r--Documentation/ABI/testing/sysfs-class-mei31
-rw-r--r--Documentation/ABI/testing/sysfs-class-mic.txt157
-rw-r--r--Documentation/ABI/testing/sysfs-class-mtd234
-rw-r--r--Documentation/ABI/testing/sysfs-class-net234
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-batman-adv15
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-cdc_ncm149
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-grcan35
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-mesh108
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-queues87
-rw-r--r--Documentation/ABI/testing/sysfs-class-net-statistics201
-rw-r--r--Documentation/ABI/testing/sysfs-class-pktcdvd72
-rw-r--r--Documentation/ABI/testing/sysfs-class-power76
-rw-r--r--Documentation/ABI/testing/sysfs-class-powercap152
-rw-r--r--Documentation/ABI/testing/sysfs-class-pwm79
-rw-r--r--Documentation/ABI/testing/sysfs-class-rc111
-rw-r--r--Documentation/ABI/testing/sysfs-class-regulator372
-rw-r--r--Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration12
-rw-r--r--Documentation/ABI/testing/sysfs-class-scsi_host29
-rw-r--r--Documentation/ABI/testing/sysfs-class-uwb_rc159
-rw-r--r--Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc57
-rw-r--r--Documentation/ABI/testing/sysfs-dev20
-rw-r--r--Documentation/ABI/testing/sysfs-devices25
-rw-r--r--Documentation/ABI/testing/sysfs-devices-edac140
-rw-r--r--Documentation/ABI/testing/sysfs-devices-firmware_node17
-rw-r--r--Documentation/ABI/testing/sysfs-devices-lpss_ltr44
-rw-r--r--Documentation/ABI/testing/sysfs-devices-memory93
-rw-r--r--Documentation/ABI/testing/sysfs-devices-mmc21
-rw-r--r--Documentation/ABI/testing/sysfs-devices-online20
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget21
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-docg334
-rw-r--r--Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb44
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power276
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power_resources_D013
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power_resources_D114
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power_resources_D214
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power_resources_D3hot14
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power_resources_wakeup13
-rw-r--r--Documentation/ABI/testing/sysfs-devices-power_state20
-rw-r--r--Documentation/ABI/testing/sysfs-devices-real_power_state23
-rw-r--r--Documentation/ABI/testing/sysfs-devices-resource_in_use12
-rw-r--r--Documentation/ABI/testing/sysfs-devices-soc58
-rw-r--r--Documentation/ABI/testing/sysfs-devices-sun14
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu273
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-ibm-rtl22
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-xen_cpu20
-rw-r--r--Documentation/ABI/testing/sysfs-driver-genwqe71
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid20
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-lenovo50
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff52
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-multitouch9
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-picolcd43
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-prodikeys29
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-arvo53
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-isku153
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-kone106
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus96
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-konepure105
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-kovaplus49
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-lua7
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra49
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-ryos178
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-roccat-savu76
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-srws121
-rw-r--r--Documentation/ABI/testing/sysfs-driver-hid-wiimote77
-rw-r--r--Documentation/ABI/testing/sysfs-driver-input-axp-pek11
-rw-r--r--Documentation/ABI/testing/sysfs-driver-intel-rapid-start21
-rw-r--r--Documentation/ABI/testing/sysfs-driver-pciback13
-rw-r--r--Documentation/ABI/testing/sysfs-driver-ppi70
-rw-r--r--Documentation/ABI/testing/sysfs-driver-samsung-laptop45
-rw-r--r--Documentation/ABI/testing/sysfs-driver-sunxi-sid22
-rw-r--r--Documentation/ABI/testing/sysfs-driver-tegra-fuse11
-rw-r--r--Documentation/ABI/testing/sysfs-driver-toshiba_acpi181
-rw-r--r--Documentation/ABI/testing/sysfs-driver-wacom79
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xen-blkback17
-rw-r--r--Documentation/ABI/testing/sysfs-driver-xen-blkfront10
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-acpi206
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-dmi110
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi20
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-efi-runtime-map34
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-gsmi58
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-log7
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-memmap71
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-ofw28
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-sfi15
-rw-r--r--Documentation/ABI/testing/sysfs-firmware-sgi_uv27
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext4111
-rw-r--r--Documentation/ABI/testing/sysfs-fs-f2fs82
-rw-r--r--Documentation/ABI/testing/sysfs-fs-nilfs2269
-rw-r--r--Documentation/ABI/testing/sysfs-fs-xfs39
-rw-r--r--Documentation/ABI/testing/sysfs-gpio27
-rw-r--r--Documentation/ABI/testing/sysfs-i2c-bmp08531
-rw-r--r--Documentation/ABI/testing/sysfs-ibft23
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-boot_params38
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-fscaps8
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-iommu_groups14
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-livepatch44
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm6
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-hugepages15
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-ksm52
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-slab490
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-uids14
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-vmcoreinfo14
-rw-r--r--Documentation/ABI/testing/sysfs-memory-page-offline44
-rw-r--r--Documentation/ABI/testing/sysfs-module52
-rw-r--r--Documentation/ABI/testing/sysfs-ocfs289
-rw-r--r--Documentation/ABI/testing/sysfs-platform-asus-laptop66
-rw-r--r--Documentation/ABI/testing/sysfs-platform-asus-wmi38
-rw-r--r--Documentation/ABI/testing/sysfs-platform-at9125
-rw-r--r--Documentation/ABI/testing/sysfs-platform-brcmstb-gisb-arb8
-rw-r--r--Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg56
-rw-r--r--Documentation/ABI/testing/sysfs-platform-dell-laptop69
-rw-r--r--Documentation/ABI/testing/sysfs-platform-eeepc-laptop50
-rw-r--r--Documentation/ABI/testing/sysfs-platform-ideapad-laptop19
-rw-r--r--Documentation/ABI/testing/sysfs-platform-kim48
-rw-r--r--Documentation/ABI/testing/sysfs-platform-msi-laptop83
-rw-r--r--Documentation/ABI/testing/sysfs-platform-tahvo-usb16
-rw-r--r--Documentation/ABI/testing/sysfs-platform-ts550054
-rw-r--r--Documentation/ABI/testing/sysfs-power258
-rw-r--r--Documentation/ABI/testing/sysfs-pps73
-rw-r--r--Documentation/ABI/testing/sysfs-profiling13
-rw-r--r--Documentation/ABI/testing/sysfs-ptp122
-rw-r--r--Documentation/ABI/testing/sysfs-tty156
-rw-r--r--Documentation/ABI/testing/sysfs-wusb_cbaf100
-rw-r--r--Documentation/BUG-HUNTING246
-rw-r--r--Documentation/Changes395
-rw-r--r--Documentation/CodeOfConflict27
-rw-r--r--Documentation/CodingStyle948
-rw-r--r--Documentation/DMA-API-HOWTO.txt974
-rw-r--r--Documentation/DMA-API.txt704
-rw-r--r--Documentation/DMA-ISA-LPC.txt151
-rw-r--r--Documentation/DMA-attributes.txt102
-rw-r--r--Documentation/DocBook/.gitignore15
-rw-r--r--Documentation/DocBook/80211.tmpl582
-rw-r--r--Documentation/DocBook/Makefile232
-rw-r--r--Documentation/DocBook/alsa-driver-api.tmpl140
-rw-r--r--Documentation/DocBook/crypto-API.tmpl2113
-rw-r--r--Documentation/DocBook/debugobjects.tmpl441
-rw-r--r--Documentation/DocBook/device-drivers.tmpl458
-rw-r--r--Documentation/DocBook/deviceiobook.tmpl323
-rw-r--r--Documentation/DocBook/drm.tmpl4231
-rw-r--r--Documentation/DocBook/filesystems.tmpl425
-rw-r--r--Documentation/DocBook/gadget.tmpl793
-rw-r--r--Documentation/DocBook/genericirq.tmpl520
-rw-r--r--Documentation/DocBook/kernel-api.tmpl331
-rw-r--r--Documentation/DocBook/kernel-hacking.tmpl1310
-rw-r--r--Documentation/DocBook/kernel-locking.tmpl2151
-rw-r--r--Documentation/DocBook/kgdb.tmpl918
-rw-r--r--Documentation/DocBook/libata.tmpl1625
-rw-r--r--Documentation/DocBook/librs.tmpl289
-rw-r--r--Documentation/DocBook/lsm.tmpl265
-rw-r--r--Documentation/DocBook/media/Makefile388
-rw-r--r--Documentation/DocBook/media/bayer.png.b64171
-rw-r--r--Documentation/DocBook/media/constraints.png.b6459
-rw-r--r--Documentation/DocBook/media/crop.gif.b64105
-rw-r--r--Documentation/DocBook/media/dvb/.gitignore1
-rw-r--r--Documentation/DocBook/media/dvb/audio.xml1314
-rw-r--r--Documentation/DocBook/media/dvb/ca.xml582
-rw-r--r--Documentation/DocBook/media/dvb/demux.xml1145
-rw-r--r--Documentation/DocBook/media/dvb/dvbapi.xml141
-rw-r--r--Documentation/DocBook/media/dvb/dvbproperty.xml1317
-rw-r--r--Documentation/DocBook/media/dvb/examples.xml365
-rw-r--r--Documentation/DocBook/media/dvb/frontend.xml1548
-rw-r--r--Documentation/DocBook/media/dvb/intro.xml212
-rw-r--r--Documentation/DocBook/media/dvb/kdapi.xml2309
-rw-r--r--Documentation/DocBook/media/dvb/net.xml156
-rw-r--r--Documentation/DocBook/media/dvb/video.xml1968
-rw-r--r--Documentation/DocBook/media/dvbstb.png.b64398
-rw-r--r--Documentation/DocBook/media/fieldseq_bt.gif.b64447
-rw-r--r--Documentation/DocBook/media/fieldseq_tb.gif.b64445
-rw-r--r--Documentation/DocBook/media/nv12mt.gif.b6437
-rw-r--r--Documentation/DocBook/media/nv12mt_example.gif.b64121
-rw-r--r--Documentation/DocBook/media/pipeline.png.b64213
-rw-r--r--Documentation/DocBook/media/selection.png.b64206
-rw-r--r--Documentation/DocBook/media/v4l/.gitignore1
-rw-r--r--Documentation/DocBook/media/v4l/biblio.xml353
-rw-r--r--Documentation/DocBook/media/v4l/capture.c.xml659
-rw-r--r--Documentation/DocBook/media/v4l/common.xml1102
-rw-r--r--Documentation/DocBook/media/v4l/compat.xml2741
-rw-r--r--Documentation/DocBook/media/v4l/controls.xml5464
-rw-r--r--Documentation/DocBook/media/v4l/dev-capture.xml110
-rw-r--r--Documentation/DocBook/media/v4l/dev-codec.xml27
-rw-r--r--Documentation/DocBook/media/v4l/dev-effect.xml17
-rw-r--r--Documentation/DocBook/media/v4l/dev-event.xml43
-rw-r--r--Documentation/DocBook/media/v4l/dev-osd.xml149
-rw-r--r--Documentation/DocBook/media/v4l/dev-output.xml106
-rw-r--r--Documentation/DocBook/media/v4l/dev-overlay.xml368
-rw-r--r--Documentation/DocBook/media/v4l/dev-radio.xml49
-rw-r--r--Documentation/DocBook/media/v4l/dev-raw-vbi.xml345
-rw-r--r--Documentation/DocBook/media/v4l/dev-rds.xml196
-rw-r--r--Documentation/DocBook/media/v4l/dev-sdr.xml120
-rw-r--r--Documentation/DocBook/media/v4l/dev-sliced-vbi.xml706
-rw-r--r--Documentation/DocBook/media/v4l/dev-subdev.xml484
-rw-r--r--Documentation/DocBook/media/v4l/dev-teletext.xml29
-rw-r--r--Documentation/DocBook/media/v4l/driver.xml200
-rw-r--r--Documentation/DocBook/media/v4l/fdl-appendix.xml671
-rw-r--r--Documentation/DocBook/media/v4l/func-close.xml62
-rw-r--r--Documentation/DocBook/media/v4l/func-ioctl.xml71
-rw-r--r--Documentation/DocBook/media/v4l/func-mmap.xml183
-rw-r--r--Documentation/DocBook/media/v4l/func-munmap.xml76
-rw-r--r--Documentation/DocBook/media/v4l/func-open.xml113
-rw-r--r--Documentation/DocBook/media/v4l/func-poll.xml142
-rw-r--r--Documentation/DocBook/media/v4l/func-read.xml181
-rw-r--r--Documentation/DocBook/media/v4l/func-select.xml130
-rw-r--r--Documentation/DocBook/media/v4l/func-write.xml128
-rw-r--r--Documentation/DocBook/media/v4l/gen-errors.xml77
-rw-r--r--Documentation/DocBook/media/v4l/io.xml1533
-rw-r--r--Documentation/DocBook/media/v4l/keytable.c.xml172
-rw-r--r--Documentation/DocBook/media/v4l/libv4l.xml160
-rw-r--r--Documentation/DocBook/media/v4l/lirc_device_interface.xml255
-rw-r--r--Documentation/DocBook/media/v4l/media-controller.xml89
-rw-r--r--Documentation/DocBook/media/v4l/media-func-close.xml59
-rw-r--r--Documentation/DocBook/media/v4l/media-func-ioctl.xml73
-rw-r--r--Documentation/DocBook/media/v4l/media-func-open.xml94
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-device-info.xml132
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml280
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-enum-links.xml216
-rw-r--r--Documentation/DocBook/media/v4l/media-ioc-setup-link.xml84
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-grey.xml62
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-m420.xml139
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv12.xml143
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv12m.xml153
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv12mt.xml66
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv16.xml166
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv16m.xml170
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-nv24.xml121
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml937
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-packed-yuv.xml236
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sbggr16.xml83
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sbggr8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sdr-cs08.xml44
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sdr-cs14le.xml47
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sdr-cu08.xml44
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sdr-cu16le.xml46
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sdr-ru12le.xml40
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sgbrg8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-sgrbg8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10.xml90
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml34
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10dpcm8.xml28
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml99
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb12.xml90
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-srggb8.xml67
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-uv8.xml62
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-uyvy.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-vyuy.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y10.xml79
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y10b.xml43
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y12.xml79
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y16.xml81
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-y41p.xml149
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv410.xml133
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv411p.xml147
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv420.xml149
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv420m.xml154
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuv422p.xml153
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yuyv.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yvu420m.xml154
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt-yvyu.xml120
-rw-r--r--Documentation/DocBook/media/v4l/pixfmt.xml1801
-rw-r--r--Documentation/DocBook/media/v4l/planar-apis.xml62
-rw-r--r--Documentation/DocBook/media/v4l/remote_controllers.xml320
-rw-r--r--Documentation/DocBook/media/v4l/selection-api.xml324
-rw-r--r--Documentation/DocBook/media/v4l/selections-common.xml180
-rw-r--r--Documentation/DocBook/media/v4l/subdev-formats.xml4038
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-crop.dia614
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-full.dia1588
-rw-r--r--Documentation/DocBook/media/v4l/subdev-image-processing-scaling-multi-source.dia1152
-rw-r--r--Documentation/DocBook/media/v4l/v4l2.xml709
-rw-r--r--Documentation/DocBook/media/v4l/v4l2grab.c.xml164
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-create-bufs.xml165
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-cropcap.xml166
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml207
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml227
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-decoder-cmd.xml249
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dqevent.xml468
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml214
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-encoder-cmd.xml189
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml133
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-fmt.xml159
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-frameintervals.xml259
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-framesizes.xml264
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml181
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumaudio.xml76
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumaudioout.xml79
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enuminput.xml316
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumoutput.xml201
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-enumstd.xml389
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-expbuf.xml210
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-audio.xml172
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-audioout.xml138
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-crop.xml129
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-ctrl.xml133
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-dv-timings.xml341
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-edid.xml162
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-enc-index.xml189
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml427
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml459
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-fmt.xml204
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-frequency.xml148
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-input.xml83
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-jpegcomp.xml175
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-modulator.xml240
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-output.xml85
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-parm.xml314
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-priority.xml135
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-selection.xml239
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-sliced-vbi-cap.xml255
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-std.xml98
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-g-tuner.xml578
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-log-status.xml41
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-overlay.xml74
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml94
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-qbuf.xml192
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml110
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-querybuf.xml105
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-querycap.xml344
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-queryctrl.xml650
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-querystd.xml75
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-reqbufs.xml137
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-s-hw-freq-seek.xml188
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-streamon.xml128
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml157
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml159
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml124
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-crop.xml158
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml183
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml141
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml165
-rw-r--r--Documentation/DocBook/media/v4l/vidioc-subscribe-event.xml129
-rw-r--r--Documentation/DocBook/media/vbi_525.gif.b6484
-rw-r--r--Documentation/DocBook/media/vbi_625.gif.b6490
-rw-r--r--Documentation/DocBook/media/vbi_hsync.gif.b6443
-rw-r--r--Documentation/DocBook/media_api.tmpl88
-rw-r--r--Documentation/DocBook/mtdnand.tmpl1292
-rw-r--r--Documentation/DocBook/networking.tmpl111
-rw-r--r--Documentation/DocBook/rapidio.tmpl158
-rw-r--r--Documentation/DocBook/regulator.tmpl304
-rw-r--r--Documentation/DocBook/s390-drivers.tmpl161
-rw-r--r--Documentation/DocBook/scsi.tmpl409
-rw-r--r--Documentation/DocBook/sh.tmpl105
-rw-r--r--Documentation/DocBook/stylesheet.xsl10
-rw-r--r--Documentation/DocBook/tracepoint.tmpl112
-rw-r--r--Documentation/DocBook/uio-howto.tmpl1050
-rw-r--r--Documentation/DocBook/usb.tmpl980
-rw-r--r--Documentation/DocBook/w1.tmpl101
-rw-r--r--Documentation/DocBook/writing-an-alsa-driver.tmpl6221
-rw-r--r--Documentation/DocBook/writing_musb_glue_layer.tmpl873
-rw-r--r--Documentation/DocBook/writing_usb_driver.tmpl412
-rw-r--r--Documentation/DocBook/z8530book.tmpl371
-rw-r--r--Documentation/EDID/1024x768.S44
-rw-r--r--Documentation/EDID/1280x1024.S44
-rw-r--r--Documentation/EDID/1600x1200.S44
-rw-r--r--Documentation/EDID/1680x1050.S44
-rw-r--r--Documentation/EDID/1920x1080.S44
-rw-r--r--Documentation/EDID/800x600.S41
-rw-r--r--Documentation/EDID/HOWTO.txt58
-rw-r--r--Documentation/EDID/Makefile26
-rw-r--r--Documentation/EDID/edid.S272
-rw-r--r--Documentation/EDID/hex1
-rw-r--r--Documentation/HOWTO609
-rw-r--r--Documentation/IPMI.txt722
-rw-r--r--Documentation/IRQ-affinity.txt65
-rw-r--r--Documentation/IRQ-domain.txt223
-rw-r--r--Documentation/IRQ.txt22
-rw-r--r--Documentation/Intel-IOMMU.txt111
-rw-r--r--Documentation/Makefile4
-rw-r--r--Documentation/ManagementStyle276
-rw-r--r--Documentation/PCI/00-INDEX14
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt587
-rw-r--r--Documentation/PCI/PCIEBUS-HOWTO.txt217
-rw-r--r--Documentation/PCI/pci-error-recovery.txt431
-rw-r--r--Documentation/PCI/pci-iov-howto.txt135
-rw-r--r--Documentation/PCI/pci.txt635
-rw-r--r--Documentation/PCI/pcieaer-howto.txt270
-rw-r--r--Documentation/RCU/00-INDEX36
-rw-r--r--Documentation/RCU/NMI-RCU.txt120
-rw-r--r--Documentation/RCU/RTFP.txt2812
-rw-r--r--Documentation/RCU/UP.txt135
-rw-r--r--Documentation/RCU/arrayRCU.txt141
-rw-r--r--Documentation/RCU/checklist.txt437
-rw-r--r--Documentation/RCU/listRCU.txt315
-rw-r--r--Documentation/RCU/lockdep-splat.txt110
-rw-r--r--Documentation/RCU/lockdep.txt112
-rw-r--r--Documentation/RCU/rcu.txt96
-rw-r--r--Documentation/RCU/rcu_dereference.txt371
-rw-r--r--Documentation/RCU/rcubarrier.txt321
-rw-r--r--Documentation/RCU/rculist_nulls.txt172
-rw-r--r--Documentation/RCU/rcuref.txt132
-rw-r--r--Documentation/RCU/stallwarn.txt250
-rw-r--r--Documentation/RCU/torture.txt334
-rw-r--r--Documentation/RCU/trace.txt554
-rw-r--r--Documentation/RCU/whatisRCU.txt1031
-rw-r--r--Documentation/SAK.txt88
-rw-r--r--Documentation/SM501.txt71
-rw-r--r--Documentation/SecurityBugs38
-rw-r--r--Documentation/SubmitChecklist109
-rw-r--r--Documentation/SubmittingDrivers163
-rw-r--r--Documentation/SubmittingPatches806
-rw-r--r--Documentation/VGA-softcursor.txt39
-rw-r--r--Documentation/accounting/.gitignore1
-rw-r--r--Documentation/accounting/Makefile7
-rw-r--r--Documentation/accounting/cgroupstats.txt27
-rw-r--r--Documentation/accounting/delay-accounting.txt117
-rw-r--r--Documentation/accounting/getdelays.c546
-rw-r--r--Documentation/accounting/taskstats-struct.txt180
-rw-r--r--Documentation/accounting/taskstats.txt181
-rw-r--r--Documentation/acpi/apei/einj.txt177
-rw-r--r--Documentation/acpi/apei/output_format.txt147
-rw-r--r--Documentation/acpi/debug.txt148
-rw-r--r--Documentation/acpi/dsdt-override.txt7
-rw-r--r--Documentation/acpi/enumeration.txt361
-rw-r--r--Documentation/acpi/gpio-properties.txt96
-rw-r--r--Documentation/acpi/initrd_table_override.txt94
-rw-r--r--Documentation/acpi/method-customizing.txt73
-rw-r--r--Documentation/acpi/method-tracing.txt26
-rw-r--r--Documentation/acpi/namespace.txt388
-rw-r--r--Documentation/acpi/scan_handlers.txt77
-rw-r--r--Documentation/acpi/video_extension.txt106
-rw-r--r--Documentation/aoe/aoe.txt143
-rw-r--r--Documentation/aoe/autoload.sh17
-rw-r--r--Documentation/aoe/status.sh30
-rw-r--r--Documentation/aoe/todo.txt14
-rw-r--r--Documentation/aoe/udev-install.sh33
-rw-r--r--Documentation/aoe/udev.txt26
-rw-r--r--Documentation/applying-patches.txt454
-rw-r--r--Documentation/arm/00-INDEX52
-rw-r--r--Documentation/arm/Atmel/README124
-rw-r--r--Documentation/arm/Booting218
-rw-r--r--Documentation/arm/CCN.txt52
-rw-r--r--Documentation/arm/IXP4xx168
-rw-r--r--Documentation/arm/Interrupts167
-rw-r--r--Documentation/arm/Marvell/README284
-rw-r--r--Documentation/arm/Netwinder78
-rw-r--r--Documentation/arm/OMAP/DSS362
-rw-r--r--Documentation/arm/OMAP/omap_pm154
-rw-r--r--Documentation/arm/Porting135
-rw-r--r--Documentation/arm/README204
-rw-r--r--Documentation/arm/SA1100/ADSBitsy43
-rw-r--r--Documentation/arm/SA1100/Assabet300
-rw-r--r--Documentation/arm/SA1100/Brutus66
-rw-r--r--Documentation/arm/SA1100/CERF29
-rw-r--r--Documentation/arm/SA1100/FreeBird21
-rw-r--r--Documentation/arm/SA1100/GraphicsClient98
-rw-r--r--Documentation/arm/SA1100/GraphicsMaster53
-rw-r--r--Documentation/arm/SA1100/HUW_WEBPANEL17
-rw-r--r--Documentation/arm/SA1100/Itsy39
-rw-r--r--Documentation/arm/SA1100/LART14
-rw-r--r--Documentation/arm/SA1100/PLEB11
-rw-r--r--Documentation/arm/SA1100/Pangolin23
-rw-r--r--Documentation/arm/SA1100/Tifon7
-rw-r--r--Documentation/arm/SA1100/Victor16
-rw-r--r--Documentation/arm/SA1100/Yopy2
-rw-r--r--Documentation/arm/SA1100/empeg2
-rw-r--r--Documentation/arm/SA1100/nanoEngine11
-rw-r--r--Documentation/arm/SA1100/serial_UART47
-rw-r--r--Documentation/arm/SH-Mobile/.gitignore1
-rw-r--r--Documentation/arm/SPEAr/overview.txt63
-rw-r--r--Documentation/arm/Samsung-S3C24XX/CPUfreq.txt75
-rw-r--r--Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt58
-rw-r--r--Documentation/arm/Samsung-S3C24XX/GPIO.txt171
-rw-r--r--Documentation/arm/Samsung-S3C24XX/H1940.txt40
-rw-r--r--Documentation/arm/Samsung-S3C24XX/NAND.txt30
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Overview.txt318
-rw-r--r--Documentation/arm/Samsung-S3C24XX/S3C2412.txt120
-rw-r--r--Documentation/arm/Samsung-S3C24XX/S3C2413.txt21
-rw-r--r--Documentation/arm/Samsung-S3C24XX/SMDK2440.txt56
-rw-r--r--Documentation/arm/Samsung-S3C24XX/Suspend.txt137
-rw-r--r--Documentation/arm/Samsung-S3C24XX/USB-Host.txt93
-rw-r--r--Documentation/arm/Samsung/GPIO.txt40
-rw-r--r--Documentation/arm/Samsung/Overview.txt86
-rwxr-xr-xDocumentation/arm/Samsung/clksrc-change-registers.awk166
-rw-r--r--Documentation/arm/Setup129
-rw-r--r--Documentation/arm/VFP/release-notes.txt55
-rw-r--r--Documentation/arm/cluster-pm-race-avoidance.txt498
-rw-r--r--Documentation/arm/firmware.txt70
-rw-r--r--Documentation/arm/kernel_mode_neon.txt121
-rw-r--r--Documentation/arm/kernel_user_helpers.txt267
-rw-r--r--Documentation/arm/mem_alignment58
-rw-r--r--Documentation/arm/memory.txt88
-rw-r--r--Documentation/arm/nwfpe/NOTES29
-rw-r--r--Documentation/arm/nwfpe/README70
-rw-r--r--Documentation/arm/nwfpe/README.FPE156
-rw-r--r--Documentation/arm/nwfpe/TODO67
-rw-r--r--Documentation/arm/pxa/mfp.txt286
-rw-r--r--Documentation/arm/sti/overview.txt33
-rw-r--r--Documentation/arm/sti/stih407-overview.txt18
-rw-r--r--Documentation/arm/sti/stih415-overview.txt12
-rw-r--r--Documentation/arm/sti/stih416-overview.txt12
-rw-r--r--Documentation/arm/sti/stih418-overview.txt20
-rw-r--r--Documentation/arm/sunxi/README61
-rw-r--r--Documentation/arm/sunxi/clocks.txt56
-rw-r--r--Documentation/arm/swp_emulation27
-rw-r--r--Documentation/arm/tcm.txt155
-rw-r--r--Documentation/arm/uefi.txt64
-rw-r--r--Documentation/arm/vlocks.txt211
-rw-r--r--Documentation/arm64/acpi_object_usage.txt593
-rw-r--r--Documentation/arm64/arm-acpi.txt505
-rw-r--r--Documentation/arm64/booting.txt224
-rw-r--r--Documentation/arm64/legacy_instructions.txt57
-rw-r--r--Documentation/arm64/memory.txt94
-rw-r--r--Documentation/arm64/tagged-pointers.txt34
-rw-r--r--Documentation/assoc_array.txt574
-rw-r--r--Documentation/atomic_ops.txt634
-rw-r--r--Documentation/auxdisplay/.gitignore1
-rw-r--r--Documentation/auxdisplay/Makefile7
-rw-r--r--Documentation/auxdisplay/cfag12864b105
-rw-r--r--Documentation/auxdisplay/cfag12864b-example.c281
-rw-r--r--Documentation/auxdisplay/ks010855
-rw-r--r--Documentation/backlight/lp855x-driver.txt66
-rw-r--r--Documentation/bad_memory.txt45
-rw-r--r--Documentation/basic_profiling.txt56
-rw-r--r--Documentation/bcache.txt448
-rw-r--r--Documentation/binfmt_misc.txt124
-rw-r--r--Documentation/blackfin/00-INDEX10
-rw-r--r--Documentation/blackfin/Makefile5
-rw-r--r--Documentation/blackfin/bfin-gpio-notes.txt71
-rw-r--r--Documentation/blackfin/bfin-spi-notes.txt16
-rw-r--r--Documentation/blackfin/gptimers-example.c83
-rw-r--r--Documentation/block/00-INDEX28
-rw-r--r--Documentation/block/biodoc.txt1180
-rw-r--r--Documentation/block/biovecs.txt111
-rw-r--r--Documentation/block/capability.txt15
-rw-r--r--Documentation/block/cfq-iosched.txt292
-rw-r--r--Documentation/block/cmdline-partition.txt39
-rw-r--r--Documentation/block/data-integrity.txt283
-rw-r--r--Documentation/block/deadline-iosched.txt75
-rw-r--r--Documentation/block/ioprio.txt183
-rw-r--r--Documentation/block/null_blk.txt72
-rw-r--r--Documentation/block/queue-sysfs.txt138
-rw-r--r--Documentation/block/request.txt88
-rw-r--r--Documentation/block/stat.txt82
-rw-r--r--Documentation/block/switching-sched.txt37
-rw-r--r--Documentation/block/writeback_cache_control.txt86
-rw-r--r--Documentation/blockdev/00-INDEX18
-rw-r--r--Documentation/blockdev/README.DAC960756
-rw-r--r--Documentation/blockdev/cciss.txt194
-rw-r--r--Documentation/blockdev/cpqarray.txt93
-rw-r--r--Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg588
-rw-r--r--Documentation/blockdev/drbd/DRBD-data-packets.svg459
-rw-r--r--Documentation/blockdev/drbd/README.txt16
-rw-r--r--Documentation/blockdev/drbd/conn-states-8.dot18
-rw-r--r--Documentation/blockdev/drbd/data-structure-v9.txt38
-rw-r--r--Documentation/blockdev/drbd/disk-states-8.dot16
-rw-r--r--Documentation/blockdev/drbd/drbd-connection-state-overview.dot85
-rw-r--r--Documentation/blockdev/drbd/node-states-8.dot14
-rw-r--r--Documentation/blockdev/floppy.txt245
-rw-r--r--Documentation/blockdev/mflash.txt84
-rw-r--r--Documentation/blockdev/nbd.txt31
-rw-r--r--Documentation/blockdev/paride.txt417
-rw-r--r--Documentation/blockdev/ramdisk.txt174
-rw-r--r--Documentation/blockdev/zram.txt189
-rw-r--r--Documentation/braille-console.txt34
-rw-r--r--Documentation/bt8xxgpio.txt67
-rw-r--r--Documentation/btmrvl.txt111
-rw-r--r--Documentation/bus-devices/ti-gpmc.txt122
-rw-r--r--Documentation/bus-virt-phys-mapping.txt208
-rw-r--r--Documentation/cachetlb.txt403
-rw-r--r--Documentation/cdrom/00-INDEX11
-rw-r--r--Documentation/cdrom/Makefile21
-rw-r--r--Documentation/cdrom/cdrom-standard.tex1022
-rw-r--r--Documentation/cdrom/ide-cd538
-rw-r--r--Documentation/cdrom/packet-writing.txt132
-rw-r--r--Documentation/cgroups/00-INDEX28
-rw-r--r--Documentation/cgroups/blkio-controller.txt394
-rw-r--r--Documentation/cgroups/cgroups.txt678
-rw-r--r--Documentation/cgroups/cpuacct.txt49
-rw-r--r--Documentation/cgroups/cpusets.txt839
-rw-r--r--Documentation/cgroups/devices.txt116
-rw-r--r--Documentation/cgroups/freezer-subsystem.txt123
-rw-r--r--Documentation/cgroups/hugetlb.txt45
-rw-r--r--Documentation/cgroups/memcg_test.txt280
-rw-r--r--Documentation/cgroups/memory.txt875
-rw-r--r--Documentation/cgroups/net_cls.txt39
-rw-r--r--Documentation/cgroups/net_prio.txt55
-rw-r--r--Documentation/cgroups/unified-hierarchy.txt461
-rw-r--r--Documentation/circular-buffers.txt243
-rw-r--r--Documentation/clk.txt299
-rw-r--r--Documentation/cma/debugfs.txt21
-rw-r--r--Documentation/coccinelle.txt324
-rw-r--r--Documentation/connector/.gitignore1
-rw-r--r--Documentation/connector/Makefile16
-rw-r--r--Documentation/connector/cn_test.c201
-rw-r--r--Documentation/connector/connector.txt188
-rw-r--r--Documentation/connector/ucon.c250
-rw-r--r--Documentation/console/console.txt144
-rw-r--r--Documentation/cpu-freq/amd-powernow.txt38
-rw-r--r--Documentation/cpu-freq/boost.txt93
-rw-r--r--Documentation/cpu-freq/core.txt123
-rw-r--r--Documentation/cpu-freq/cpu-drivers.txt274
-rw-r--r--Documentation/cpu-freq/cpufreq-nforce2.txt19
-rw-r--r--Documentation/cpu-freq/cpufreq-stats.txt128
-rw-r--r--Documentation/cpu-freq/governors.txt269
-rw-r--r--Documentation/cpu-freq/index.txt54
-rw-r--r--Documentation/cpu-freq/intel-pstate.txt64
-rw-r--r--Documentation/cpu-freq/pcc-cpufreq.txt207
-rw-r--r--Documentation/cpu-freq/user-guide.txt224
-rw-r--r--Documentation/cpu-hotplug.txt452
-rw-r--r--Documentation/cpu-load.txt113
-rw-r--r--Documentation/cpuidle/core.txt23
-rw-r--r--Documentation/cpuidle/driver.txt37
-rw-r--r--Documentation/cpuidle/governor.txt28
-rw-r--r--Documentation/cpuidle/sysfs.txt92
-rw-r--r--Documentation/cputopology.txt111
-rw-r--r--Documentation/crc32.txt182
-rw-r--r--Documentation/cris/README195
-rw-r--r--Documentation/crypto/api-intro.txt248
-rw-r--r--Documentation/crypto/asymmetric-keys.txt312
-rw-r--r--Documentation/crypto/async-tx-api.txt225
-rw-r--r--Documentation/crypto/descore-readme.txt352
-rw-r--r--Documentation/dcdbas.txt91
-rw-r--r--Documentation/debugging-modules.txt22
-rw-r--r--Documentation/debugging-via-ohci1394.txt184
-rw-r--r--Documentation/dell_rbu.txt100
-rw-r--r--Documentation/development-process/1.Intro274
-rw-r--r--Documentation/development-process/2.Process478
-rw-r--r--Documentation/development-process/3.Early-stage212
-rw-r--r--Documentation/development-process/4.Coding399
-rw-r--r--Documentation/development-process/5.Posting307
-rw-r--r--Documentation/development-process/6.Followthrough206
-rw-r--r--Documentation/development-process/7.AdvancedTopics173
-rw-r--r--Documentation/development-process/8.Conclusion70
-rw-r--r--Documentation/device-mapper/cache-policies.txt97
-rw-r--r--Documentation/device-mapper/cache.txt298
-rw-r--r--Documentation/device-mapper/delay.txt26
-rw-r--r--Documentation/device-mapper/dm-crypt.txt96
-rw-r--r--Documentation/device-mapper/dm-flakey.txt53
-rw-r--r--Documentation/device-mapper/dm-io.txt75
-rw-r--r--Documentation/device-mapper/dm-log.txt54
-rw-r--r--Documentation/device-mapper/dm-queue-length.txt39
-rw-r--r--Documentation/device-mapper/dm-raid.txt226
-rw-r--r--Documentation/device-mapper/dm-service-time.txt91
-rw-r--r--Documentation/device-mapper/dm-uevent.txt97
-rw-r--r--Documentation/device-mapper/era.txt108
-rw-r--r--Documentation/device-mapper/kcopyd.txt47
-rw-r--r--Documentation/device-mapper/linear.txt61
-rw-r--r--Documentation/device-mapper/log-writes.txt140
-rw-r--r--Documentation/device-mapper/persistent-data.txt84
-rw-r--r--Documentation/device-mapper/snapshot.txt168
-rw-r--r--Documentation/device-mapper/statistics.txt186
-rw-r--r--Documentation/device-mapper/striped.txt57
-rw-r--r--Documentation/device-mapper/switch.txt138
-rw-r--r--Documentation/device-mapper/thin-provisioning.txt389
-rw-r--r--Documentation/device-mapper/verity.txt172
-rw-r--r--Documentation/device-mapper/zero.txt37
-rw-r--r--Documentation/devices.txt3355
-rw-r--r--Documentation/devicetree/00-INDEX12
-rw-r--r--Documentation/devicetree/bindings/ABI.txt39
-rw-r--r--Documentation/devicetree/bindings/arc/interrupts.txt24
-rw-r--r--Documentation/devicetree/bindings/arc/pct.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/adapteva.txt7
-rw-r--r--Documentation/devicetree/bindings/arm/al,alpine.txt88
-rw-r--r--Documentation/devicetree/bindings/arm/altera.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/altera/socfpga-system.txt13
-rw-r--r--Documentation/devicetree/bindings/arm/amlogic.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/arch_timer.txt96
-rw-r--r--Documentation/devicetree/bindings/arm/arm-boards159
-rw-r--r--Documentation/devicetree/bindings/arm/armada-370-xp-pmsu.txt21
-rw-r--r--Documentation/devicetree/bindings/arm/armada-370-xp.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/armada-375.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/armada-380-mpcore-soc-ctrl.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/armada-38x.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/armada-39x.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/armada-cpu-reset.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/armadeus.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-at91.txt157
-rw-r--r--Documentation/devicetree/bindings/arm/atmel-pmc.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/axxia.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt36
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt97
-rw-r--r--Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda/combophy.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/cavium-thunder.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/cci.txt227
-rw-r--r--Documentation/devicetree/bindings/arm/ccn.txt21
-rw-r--r--Documentation/devicetree/bindings/arm/coherency-fabric.txt43
-rw-r--r--Documentation/devicetree/bindings/arm/coresight.txt199
-rw-r--r--Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp52
-rw-r--r--Documentation/devicetree/bindings/arm/cpu-enable-method/marvell,berlin-smp41
-rw-r--r--Documentation/devicetree/bindings/arm/cpus.txt437
-rw-r--r--Documentation/devicetree/bindings/arm/davinci.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/davinci/cp-intc.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/digicolor.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/exynos/power_domain.txt52
-rw-r--r--Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt38
-rw-r--r--Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt33
-rw-r--r--Documentation/devicetree/bindings/arm/fsl.txt134
-rw-r--r--Documentation/devicetree/bindings/arm/fw-cfg.txt72
-rw-r--r--Documentation/devicetree/bindings/arm/gic-v3.txt118
-rw-r--r--Documentation/devicetree/bindings/arm/gic.txt152
-rw-r--r--Documentation/devicetree/bindings/arm/global_timer.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt105
-rw-r--r--Documentation/devicetree/bindings/arm/idle-states.txt699
-rw-r--r--Documentation/devicetree/bindings/arm/insignal-boards.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/keystone/keystone.txt20
-rw-r--r--Documentation/devicetree/bindings/arm/kirkwood.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/l2cc.txt82
-rw-r--r--Documentation/devicetree/bindings/arm/lpc32xx-mic.txt38
-rw-r--r--Documentation/devicetree/bindings/arm/lpc32xx.txt8
-rw-r--r--Documentation/devicetree/bindings/arm/marvell,berlin.txt152
-rw-r--r--Documentation/devicetree/bindings/arm/marvell,dove.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/marvell,kirkwood.txt98
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek.txt31
-rw-r--r--Documentation/devicetree/bindings/arm/mediatek/mediatek,sysirq.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/moxart.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/feroceon.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/intc.txt60
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/mrvl.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/tauros2.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/mrvl/timer.txt13
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt84
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt57
-rw-r--r--Documentation/devicetree/bindings/arm/msm/ssbi.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/msm/timer.txt47
-rw-r--r--Documentation/devicetree/bindings/arm/mvebu-system-controller.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/nspire.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/olimex.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/omap/counter.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/omap/crossbar.txt55
-rw-r--r--Documentation/devicetree/bindings/arm/omap/ctrl.txt79
-rw-r--r--Documentation/devicetree/bindings/arm/omap/dmm.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/omap/dsp.txt14
-rw-r--r--Documentation/devicetree/bindings/arm/omap/intc.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/omap/iva.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/omap/l3-noc.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/omap/l4.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/omap/mpu.txt38
-rw-r--r--Documentation/devicetree/bindings/arm/omap/omap.txt154
-rw-r--r--Documentation/devicetree/bindings/arm/omap/prcm.txt63
-rw-r--r--Documentation/devicetree/bindings/arm/omap/timer.txt44
-rw-r--r--Documentation/devicetree/bindings/arm/picoxcell.txt24
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt44
-rw-r--r--Documentation/devicetree/bindings/arm/primecell.txt46
-rw-r--r--Documentation/devicetree/bindings/arm/psci.txt102
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip.txt28
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip/pmu-sram.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip/pmu.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt30
-rw-r--r--Documentation/devicetree/bindings/arm/rtsm-dcscb.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/samsung-boards.txt27
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt100
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/interrupt-combiner.txt52
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/pmu.txt69
-rw-r--r--Documentation/devicetree/bindings/arm/samsung/sysreg.txt19
-rw-r--r--Documentation/devicetree/bindings/arm/shmobile.txt63
-rw-r--r--Documentation/devicetree/bindings/arm/sirf.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/spear-misc.txt9
-rw-r--r--Documentation/devicetree/bindings/arm/spear-timer.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/spear.txt26
-rw-r--r--Documentation/devicetree/bindings/arm/spear/shirq.txt48
-rw-r--r--Documentation/devicetree/bindings/arm/sprd.txt11
-rw-r--r--Documentation/devicetree/bindings/arm/ste-nomadik.txt38
-rw-r--r--Documentation/devicetree/bindings/arm/ste-u300.txt46
-rw-r--r--Documentation/devicetree/bindings/arm/sti.txt23
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/tegra.txt41
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt17
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt100
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt12
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt115
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt32
-rw-r--r--Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt18
-rw-r--r--Documentation/devicetree/bindings/arm/topology.txt475
-rw-r--r--Documentation/devicetree/bindings/arm/twd.txt48
-rw-r--r--Documentation/devicetree/bindings/arm/ux500/power_domain.txt35
-rw-r--r--Documentation/devicetree/bindings/arm/versatile-fpga-irq.txt36
-rw-r--r--Documentation/devicetree/bindings/arm/versatile-sysreg.txt10
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress-scc.txt33
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress-sysreg.txt103
-rw-r--r--Documentation/devicetree/bindings/arm/vexpress.txt229
-rw-r--r--Documentation/devicetree/bindings/arm/vic.txt41
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500.txt22
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt16
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt13
-rw-r--r--Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt15
-rw-r--r--Documentation/devicetree/bindings/arm/xen.txt25
-rw-r--r--Documentation/devicetree/bindings/arm/xilinx.txt7
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-platform.txt80
-rw-r--r--Documentation/devicetree/bindings/ata/ahci-st.txt50
-rw-r--r--Documentation/devicetree/bindings/ata/apm-xgene.txt79
-rw-r--r--Documentation/devicetree/bindings/ata/atmel-at91_cf.txt19
-rw-r--r--Documentation/devicetree/bindings/ata/cavium-compact-flash.txt30
-rw-r--r--Documentation/devicetree/bindings/ata/exynos-sata.txt30
-rw-r--r--Documentation/devicetree/bindings/ata/fsl-sata.txt29
-rw-r--r--Documentation/devicetree/bindings/ata/imx-pata.txt17
-rw-r--r--Documentation/devicetree/bindings/ata/imx-sata.txt36
-rw-r--r--Documentation/devicetree/bindings/ata/marvell.txt22
-rw-r--r--Documentation/devicetree/bindings/ata/pata-arasan.txt39
-rw-r--r--Documentation/devicetree/bindings/ata/qcom-sata.txt48
-rw-r--r--Documentation/devicetree/bindings/ata/sata_highbank.txt44
-rw-r--r--Documentation/devicetree/bindings/ata/sata_rcar.txt23
-rw-r--r--Documentation/devicetree/bindings/ata/tegra-sata.txt32
-rw-r--r--Documentation/devicetree/bindings/btmrvl.txt29
-rw-r--r--Documentation/devicetree/bindings/bus/brcm,bus-axi.txt53
-rw-r--r--Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt34
-rw-r--r--Documentation/devicetree/bindings/bus/imx-weim.txt82
-rw-r--r--Documentation/devicetree/bindings/bus/mvebu-mbus.txt279
-rw-r--r--Documentation/devicetree/bindings/bus/omap-ocp2scp.txt29
-rw-r--r--Documentation/devicetree/bindings/bus/renesas,bsc.txt46
-rw-r--r--Documentation/devicetree/bindings/bus/simple-pm-bus.txt44
-rw-r--r--Documentation/devicetree/bindings/bus/ti-gpmc.txt130
-rw-r--r--Documentation/devicetree/bindings/c6x/clocks.txt40
-rw-r--r--Documentation/devicetree/bindings/c6x/dscr.txt127
-rw-r--r--Documentation/devicetree/bindings/c6x/emifa.txt62
-rw-r--r--Documentation/devicetree/bindings/c6x/interrupt.txt104
-rw-r--r--Documentation/devicetree/bindings/c6x/soc.txt28
-rw-r--r--Documentation/devicetree/bindings/c6x/timer64.txt26
-rw-r--r--Documentation/devicetree/bindings/chosen.txt46
-rw-r--r--Documentation/devicetree/bindings/clock/alphascale,acc.txt115
-rw-r--r--Documentation/devicetree/bindings/clock/altr_socfpga.txt30
-rw-r--r--Documentation/devicetree/bindings/clock/arm-integrator.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/at91-clock.txt463
-rw-r--r--Documentation/devicetree/bindings/clock/axi-clkgen.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt139
-rw-r--r--Documentation/devicetree/bindings/clock/calxeda.txt17
-rw-r--r--Documentation/devicetree/bindings/clock/clk-exynos-audss.txt95
-rw-r--r--Documentation/devicetree/bindings/clock/clk-palmas-clk32kg-clocks.txt35
-rw-r--r--Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt53
-rw-r--r--Documentation/devicetree/bindings/clock/clock-bindings.txt169
-rw-r--r--Documentation/devicetree/bindings/clock/clps711x-clock.txt19
-rw-r--r--Documentation/devicetree/bindings/clock/efm32-clock.txt11
-rw-r--r--Documentation/devicetree/bindings/clock/emev2-clock.txt98
-rw-r--r--Documentation/devicetree/bindings/clock/exynos3250-clock.txt57
-rw-r--r--Documentation/devicetree/bindings/clock/exynos4-clock.txt43
-rw-r--r--Documentation/devicetree/bindings/clock/exynos4415-clock.txt38
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5250-clock.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5260-clock.txt190
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5410-clock.txt45
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5420-clock.txt42
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5433-clock.txt462
-rw-r--r--Documentation/devicetree/bindings/clock/exynos5440-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/exynos7-clock.txt108
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-clock.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/fixed-factor-clock.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/gpio-gate-clock.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/hi3620-clock.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/hix5hd2-clock.txt31
-rw-r--r--Documentation/devicetree/bindings/clock/imx1-clock.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/imx21-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/imx23-clock.txt71
-rw-r--r--Documentation/devicetree/bindings/clock/imx25-clock.txt161
-rw-r--r--Documentation/devicetree/bindings/clock/imx27-clock.txt28
-rw-r--r--Documentation/devicetree/bindings/clock/imx28-clock.txt94
-rw-r--r--Documentation/devicetree/bindings/clock/imx31-clock.txt91
-rw-r--r--Documentation/devicetree/bindings/clock/imx35-clock.txt113
-rw-r--r--Documentation/devicetree/bindings/clock/imx5-clock.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/imx6q-clock.txt31
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sl-clock.txt10
-rw-r--r--Documentation/devicetree/bindings/clock/imx6sx-clock.txt13
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-gate.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/keystone-pll.txt84
-rw-r--r--Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,mmp2.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,pxa168.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/marvell,pxa910.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max77686.txt46
-rw-r--r--Documentation/devicetree/bindings/clock/maxim,max77802.txt44
-rw-r--r--Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt48
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-core-clock.txt78
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt22
-rw-r--r--Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt191
-rw-r--r--Documentation/devicetree/bindings/clock/nspire-clock.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt65
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt63
-rw-r--r--Documentation/devicetree/bindings/clock/pistachio-clock.txt123
-rw-r--r--Documentation/devicetree/bindings/clock/prima2-clock.txt73
-rw-r--r--Documentation/devicetree/bindings/clock/pwm-clock.txt26
-rw-r--r--Documentation/devicetree/bindings/clock/pxa-clock.txt16
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,gcc.txt27
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,lcc.txt21
-rw-r--r--Documentation/devicetree/bindings/clock/qcom,mmcc.txt23
-rw-r--r--Documentation/devicetree/bindings/clock/qoriq-clock.txt153
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt34
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt57
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt33
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt25
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt27
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt37
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt29
-rw-r--r--Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt35
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/rockchip.txt77
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt50
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt50
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt77
-rw-r--r--Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt78
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si5351.txt121
-rw-r--r--Documentation/devicetree/bindings/clock/silabs,si570.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/st,nomadik.txt104
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt49
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt36
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt51
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt36
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt61
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,clkgen.txt100
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,flexgen.txt119
-rw-r--r--Documentation/devicetree/bindings/clock/st/st,quadfs.txt48
-rw-r--r--Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt80
-rw-r--r--Documentation/devicetree/bindings/clock/sunxi.txt204
-rw-r--r--Documentation/devicetree/bindings/clock/ti,cdce706.txt42
-rw-r--r--Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt20
-rw-r--r--Documentation/devicetree/bindings/clock/ti/apll.txt45
-rw-r--r--Documentation/devicetree/bindings/clock/ti/autoidle.txt39
-rw-r--r--Documentation/devicetree/bindings/clock/ti/clockdomain.txt24
-rw-r--r--Documentation/devicetree/bindings/clock/ti/composite.txt54
-rw-r--r--Documentation/devicetree/bindings/clock/ti/divider.txt114
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dpll.txt85
-rw-r--r--Documentation/devicetree/bindings/clock/ti/dra7-atl.txt96
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fapll.txt33
-rw-r--r--Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt43
-rw-r--r--Documentation/devicetree/bindings/clock/ti/gate.txt106
-rw-r--r--Documentation/devicetree/bindings/clock/ti/interface.txt56
-rw-r--r--Documentation/devicetree/bindings/clock/ti/mux.txt76
-rw-r--r--Documentation/devicetree/bindings/clock/vf610-clock.txt41
-rw-r--r--Documentation/devicetree/bindings/clock/vt8500.txt74
-rw-r--r--Documentation/devicetree/bindings/clock/xgene.txt111
-rw-r--r--Documentation/devicetree/bindings/clock/zynq-7000.txt110
-rw-r--r--Documentation/devicetree/bindings/common-properties.txt60
-rw-r--r--Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt65
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt64
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-exynos5440.txt28
-rw-r--r--Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt42
-rw-r--r--Documentation/devicetree/bindings/cris/axis.txt9
-rw-r--r--Documentation/devicetree/bindings/cris/boards.txt8
-rw-r--r--Documentation/devicetree/bindings/cris/interrupts.txt23
-rw-r--r--Documentation/devicetree/bindings/crypto/amd-ccp.txt19
-rw-r--r--Documentation/devicetree/bindings/crypto/atmel-crypto.txt68
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-dcp.txt17
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt15
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec2.txt68
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec4.txt455
-rw-r--r--Documentation/devicetree/bindings/crypto/fsl-sec6.txt157
-rw-r--r--Documentation/devicetree/bindings/crypto/img-hash.txt27
-rw-r--r--Documentation/devicetree/bindings/crypto/mv_cesa.txt20
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-aes.txt31
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-des.txt30
-rw-r--r--Documentation/devicetree/bindings/crypto/omap-sham.txt28
-rw-r--r--Documentation/devicetree/bindings/crypto/picochip-spacc.txt23
-rw-r--r--Documentation/devicetree/bindings/crypto/qcom-qce.txt25
-rw-r--r--Documentation/devicetree/bindings/crypto/samsung-sss.txt34
-rw-r--r--Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt110
-rw-r--r--Documentation/devicetree/bindings/dma/apm-xgene-dma.txt47
-rw-r--r--Documentation/devicetree/bindings/dma/arm-pl330.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-dma.txt42
-rw-r--r--Documentation/devicetree/bindings/dma/atmel-xdma.txt54
-rw-r--r--Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt57
-rw-r--r--Documentation/devicetree/bindings/dma/dma.txt81
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-edma.txt76
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-dma.txt48
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt85
-rw-r--r--Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt60
-rw-r--r--Documentation/devicetree/bindings/dma/img-mdc-dma.txt57
-rw-r--r--Documentation/devicetree/bindings/dma/jz4780-dma.txt56
-rw-r--r--Documentation/devicetree/bindings/dma/k3dma.txt46
-rw-r--r--Documentation/devicetree/bindings/dma/mmp-dma.txt77
-rw-r--r--Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt45
-rw-r--r--Documentation/devicetree/bindings/dma/mpc512x-dma.txt29
-rw-r--r--Documentation/devicetree/bindings/dma/mv-xor.txt40
-rw-r--r--Documentation/devicetree/bindings/dma/nbpfaxi.txt61
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_adm.txt62
-rw-r--r--Documentation/devicetree/bindings/dma/qcom_bam_dma.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt95
-rw-r--r--Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt37
-rw-r--r--Documentation/devicetree/bindings/dma/shdma.txt84
-rw-r--r--Documentation/devicetree/bindings/dma/sirfsoc-dma.txt43
-rw-r--r--Documentation/devicetree/bindings/dma/snps-dma.txt63
-rw-r--r--Documentation/devicetree/bindings/dma/ste-coh901318.txt32
-rw-r--r--Documentation/devicetree/bindings/dma/ste-dma40.txt139
-rw-r--r--Documentation/devicetree/bindings/dma/sun6i-dma.txt45
-rw-r--r--Documentation/devicetree/bindings/dma/tegra20-apbdma.txt44
-rw-r--r--Documentation/devicetree/bindings/dma/ti-edma.txt35
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt65
-rw-r--r--Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt75
-rw-r--r--Documentation/devicetree/bindings/drm/armada/marvell,dove-lcd.txt30
-rw-r--r--Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt53
-rw-r--r--Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt50
-rw-r--r--Documentation/devicetree/bindings/drm/i2c/tda998x.txt29
-rw-r--r--Documentation/devicetree/bindings/drm/imx/fsl-imx-drm.txt83
-rw-r--r--Documentation/devicetree/bindings/drm/imx/hdmi.txt58
-rw-r--r--Documentation/devicetree/bindings/drm/imx/ldb.txt146
-rw-r--r--Documentation/devicetree/bindings/drm/msm/gpu.txt52
-rw-r--r--Documentation/devicetree/bindings/drm/msm/hdmi.txt48
-rw-r--r--Documentation/devicetree/bindings/drm/msm/mdp.txt48
-rw-r--r--Documentation/devicetree/bindings/drm/tilcdc/panel.txt66
-rw-r--r--Documentation/devicetree/bindings/drm/tilcdc/slave.txt18
-rw-r--r--Documentation/devicetree/bindings/drm/tilcdc/tfp410.txt21
-rw-r--r--Documentation/devicetree/bindings/drm/tilcdc/tilcdc.txt29
-rw-r--r--Documentation/devicetree/bindings/eeprom.txt28
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-palmas.txt19
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-rt8973a.txt25
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-sm5502.txt23
-rw-r--r--Documentation/devicetree/bindings/extcon/extcon-usb-gpio.txt18
-rw-r--r--Documentation/devicetree/bindings/fb/mxsfb.txt49
-rw-r--r--Documentation/devicetree/bindings/fb/sm501fb.txt34
-rw-r--r--Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt17
-rw-r--r--Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt40
-rw-r--r--Documentation/devicetree/bindings/gpio/8xxx_gpio.txt74
-rw-r--r--Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.txt36
-rw-r--r--Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt52
-rw-r--r--Documentation/devicetree/bindings/gpio/cavium-octeon-gpio.txt49
-rw-r--r--Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt17
-rw-r--r--Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt32
-rw-r--r--Documentation/devicetree/bindings/gpio/fujitsu,mb86s70-gpio.txt20
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-74x164.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-74xx-mmio.txt30
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-adnp.txt34
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-altera.txt43
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-clps711x.txt28
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-davinci.txt62
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-dsp-keystone.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-fan.txt40
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-grgpio.txt26
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-lp3943.txt37
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-max732x.txt59
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt85
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mm-lantiq.txt38
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-msm.txt26
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mvebu.txt53
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-mxs.txt88
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-nmk.txt31
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-omap.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-palmas.txt27
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pca953x.txt39
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt71
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-poweroff.txt36
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-restart.txt54
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-samsung.txt41
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stericsson-coh901.txt7
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stmpe.txt18
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-stp-xway.txt42
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-sx150x.txt40
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-twl4030.txt29
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-tz1090-pdc.txt45
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-tz1090.txt88
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-vf610.txt55
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xgene-sb.txt32
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xgene.txt22
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-xilinx.txt48
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-zevio.txt16
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio-zynq.txt26
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio.txt242
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_atmel.txt25
-rw-r--r--Documentation/devicetree/bindings/gpio/gpio_lpc32xx.txt43
-rw-r--r--Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt19
-rw-r--r--Documentation/devicetree/bindings/gpio/mrvl-gpio.txt69
-rw-r--r--Documentation/devicetree/bindings/gpio/nvidia,tegra20-gpio.txt40
-rw-r--r--Documentation/devicetree/bindings/gpio/pl061-gpio.txt10
-rw-r--r--Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt63
-rw-r--r--Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt60
-rw-r--r--Documentation/devicetree/bindings/gpio/sodaville.txt48
-rw-r--r--Documentation/devicetree/bindings/gpio/spear_spics.txt50
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt43
-rw-r--r--Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt378
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-g2d.txt28
-rw-r--r--Documentation/devicetree/bindings/gpu/samsung-rotator.txt27
-rw-r--r--Documentation/devicetree/bindings/gpu/st,stih4xx.txt243
-rw-r--r--Documentation/devicetree/bindings/graph.txt129
-rw-r--r--Documentation/devicetree/bindings/hid/hid-over-i2c.txt28
-rw-r--r--Documentation/devicetree/bindings/hsi/client-devices.txt44
-rw-r--r--Documentation/devicetree/bindings/hsi/nokia-modem.txt57
-rw-r--r--Documentation/devicetree/bindings/hsi/omap-ssi.txt97
-rw-r--r--Documentation/devicetree/bindings/hwmon/ads1015.txt73
-rw-r--r--Documentation/devicetree/bindings/hwmon/g762.txt47
-rw-r--r--Documentation/devicetree/bindings/hwmon/ibmpowernv.txt23
-rw-r--r--Documentation/devicetree/bindings/hwmon/lm90.txt44
-rw-r--r--Documentation/devicetree/bindings/hwmon/ltc2978.txt39
-rw-r--r--Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt41
-rw-r--r--Documentation/devicetree/bindings/hwmon/pwm-fan.txt12
-rw-r--r--Documentation/devicetree/bindings/hwmon/vexpress.txt23
-rw-r--r--Documentation/devicetree/bindings/hwrng/atmel-trng.txt16
-rw-r--r--Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt12
-rw-r--r--Documentation/devicetree/bindings/hwrng/omap_rng.txt22
-rw-r--r--Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt18
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt20
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt37
-rw-r--r--Documentation/devicetree/bindings/i2c/brcm,kona-i2c.txt35
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt86
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-at91.txt34
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-axxia.txt30
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-cadence.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt27
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-cros-ec-tunnel.txt39
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-davinci.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-designware.txt45
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-digicolor.txt25
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-efm32.txt34
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-exynos5.txt53
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-gpio.txt32
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-hix5hd2.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-img-scb.txt26
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-imx.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-jz4780.txt35
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-meson.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mpc.txt64
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt81
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt53
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt93
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mux.txt60
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt44
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-mxs.txt25
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-nomadik.txt23
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-ocores.txt69
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-octeon.txt34
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-omap.txt31
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-opal.txt37
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pnx.txt36
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt93
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-pxa.txt33
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rcar.txt32
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-riic.txt29
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-rk3x.txt56
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt60
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sirf.txt19
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt15
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-st.txt41
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt41
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-versatile.txt10
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-vt8500.txt24
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-xiic.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/ina209.txt18
-rw-r--r--Documentation/devicetree/bindings/i2c/ina2xx.txt22
-rw-r--r--Documentation/devicetree/bindings/i2c/max6697.txt64
-rw-r--r--Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt75
-rw-r--r--Documentation/devicetree/bindings/i2c/qcom,i2c-qup.txt40
-rw-r--r--Documentation/devicetree/bindings/i2c/ti,bq32k.txt18
-rw-r--r--Documentation/devicetree/bindings/i2c/trivial-devices.txt103
-rw-r--r--Documentation/devicetree/bindings/iio/accel/bma180.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/adc/at91_adc.txt87
-rw-r--r--Documentation/devicetree/bindings/iio/adc/cc10001_adc.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt16
-rw-r--r--Documentation/devicetree/bindings/iio/adc/max1027-adc.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mcp320x.txt30
-rw-r--r--Documentation/devicetree/bindings/iio/adc/mcp3422.txt17
-rw-r--r--Documentation/devicetree/bindings/iio/adc/nuvoton-nau7802.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.txt46
-rw-r--r--Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt129
-rw-r--r--Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/adc/twl4030-madc.txt24
-rw-r--r--Documentation/devicetree/bindings/iio/adc/vf610-adc.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/adc/xilinx-xadc.txt113
-rw-r--r--Documentation/devicetree/bindings/iio/dac/ad7303.txt23
-rw-r--r--Documentation/devicetree/bindings/iio/dac/max5821.txt14
-rw-r--r--Documentation/devicetree/bindings/iio/frequency/adf4350.txt86
-rw-r--r--Documentation/devicetree/bindings/iio/humidity/dht11.txt14
-rw-r--r--Documentation/devicetree/bindings/iio/iio-bindings.txt97
-rw-r--r--Documentation/devicetree/bindings/iio/light/apds9300.txt22
-rw-r--r--Documentation/devicetree/bindings/iio/light/cm36651.txt26
-rw-r--r--Documentation/devicetree/bindings/iio/light/gp2ap020a00f.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/light/tsl2563.txt19
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt18
-rw-r--r--Documentation/devicetree/bindings/iio/magnetometer/hmc5843.txt21
-rw-r--r--Documentation/devicetree/bindings/iio/proximity/as3935.txt28
-rw-r--r--Documentation/devicetree/bindings/iio/sensorhub.txt25
-rw-r--r--Documentation/devicetree/bindings/iio/st-sensors.txt55
-rw-r--r--Documentation/devicetree/bindings/input/ads7846.txt91
-rw-r--r--Documentation/devicetree/bindings/input/atmel,maxtouch.txt32
-rw-r--r--Documentation/devicetree/bindings/input/brcm,bcm-keypad.txt108
-rw-r--r--Documentation/devicetree/bindings/input/cap11xx.txt59
-rw-r--r--Documentation/devicetree/bindings/input/clps711x-keypad.txt27
-rw-r--r--Documentation/devicetree/bindings/input/cros-ec-keyb.txt72
-rw-r--r--Documentation/devicetree/bindings/input/e3x0-button.txt25
-rw-r--r--Documentation/devicetree/bindings/input/elan_i2c.txt34
-rw-r--r--Documentation/devicetree/bindings/input/elants_i2c.txt33
-rw-r--r--Documentation/devicetree/bindings/input/fsl-mma8450.txt12
-rw-r--r--Documentation/devicetree/bindings/input/gpio-beeper.txt13
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys-polled.txt38
-rw-r--r--Documentation/devicetree/bindings/input/gpio-keys.txt48
-rw-r--r--Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt46
-rw-r--r--Documentation/devicetree/bindings/input/imx-keypad.txt53
-rw-r--r--Documentation/devicetree/bindings/input/input-reset.txt33
-rw-r--r--Documentation/devicetree/bindings/input/lpc32xx-key.txt31
-rw-r--r--Documentation/devicetree/bindings/input/matrix-keymap.txt27
-rw-r--r--Documentation/devicetree/bindings/input/nvidia,tegra20-kbc.txt54
-rw-r--r--Documentation/devicetree/bindings/input/omap-keypad.txt28
-rw-r--r--Documentation/devicetree/bindings/input/ps2keyb-mouse-apbps2.txt16
-rw-r--r--Documentation/devicetree/bindings/input/pwm-beeper.txt7
-rw-r--r--Documentation/devicetree/bindings/input/pxa27x-keypad.txt60
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt43
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt89
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-pwrkey.txt46
-rw-r--r--Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt22
-rw-r--r--Documentation/devicetree/bindings/input/regulator-haptic.txt21
-rw-r--r--Documentation/devicetree/bindings/input/rotary-encoder.txt36
-rw-r--r--Documentation/devicetree/bindings/input/samsung-keypad.txt74
-rw-r--r--Documentation/devicetree/bindings/input/spear-keyboard.txt20
-rw-r--r--Documentation/devicetree/bindings/input/st-keyscan.txt60
-rw-r--r--Documentation/devicetree/bindings/input/stmpe-keypad.txt41
-rw-r--r--Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt62
-rw-r--r--Documentation/devicetree/bindings/input/tca8418_keypad.txt10
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv260x.txt50
-rw-r--r--Documentation/devicetree/bindings/input/ti,drv2667.txt17
-rw-r--r--Documentation/devicetree/bindings/input/ti,nspire-keypad.txt60
-rw-r--r--Documentation/devicetree/bindings/input/ti,palmas-pwrbutton.txt36
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/auo_pixcir_ts.txt30
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt76
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/bu21013.txt28
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt46
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt55
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt19
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/goodix.txt29
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt16
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/mms114.txt34
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt26
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sitronix-st1232.txt24
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/stmpe.txt43
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sun4i.txt38
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/sx8654.txt16
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/ti-tsc-adc.txt59
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt29
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt42
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt41
-rw-r--r--Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt34
-rw-r--r--Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt17
-rw-r--r--Documentation/devicetree/bindings/input/twl4030-keypad.txt27
-rw-r--r--Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt21
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.txt38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt42
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt110
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm3380-l2-intc.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt52
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt90
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt29
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-mx.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.txt25
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/cirrus,clps711x-intc.txt41
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/digicolor-ic.txt21
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/interrupts.txt110
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt18
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,armada-370-xp-mpic.txt38
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/marvell,orion-intc.txt48
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt60
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt43
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.txt23
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,intc-irqpin.txt27
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt34
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/samsung,s3c24xx-irq.txt53
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.txt32
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt35
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.txt36
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.txt28
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu33
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.txt71
-rw-r--r--Documentation/devicetree/bindings/iommu/iommu.txt182
-rw-r--r--Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt14
-rw-r--r--Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt21
-rw-r--r--Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt41
-rw-r--r--Documentation/devicetree/bindings/iommu/rockchip,iommu.txt26
-rw-r--r--Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt70
-rw-r--r--Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt26
-rw-r--r--Documentation/devicetree/bindings/leds/common.txt55
-rw-r--r--Documentation/devicetree/bindings/leds/leds-gpio.txt66
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp55xx.txt228
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lp8860.txt29
-rw-r--r--Documentation/devicetree/bindings/leds/leds-ns2.txt26
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pm8941-wled.txt43
-rw-r--r--Documentation/devicetree/bindings/leds/leds-pwm.txt50
-rw-r--r--Documentation/devicetree/bindings/leds/pca963x.txt48
-rw-r--r--Documentation/devicetree/bindings/leds/register-bit-led.txt99
-rw-r--r--Documentation/devicetree/bindings/leds/tca6507.txt49
-rw-r--r--Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt52
-rw-r--r--Documentation/devicetree/bindings/lpddr2/lpddr2.txt102
-rw-r--r--Documentation/devicetree/bindings/mailbox/altera-mailbox.txt49
-rw-r--r--Documentation/devicetree/bindings/mailbox/arm-mhu.txt43
-rw-r--r--Documentation/devicetree/bindings/mailbox/mailbox.txt38
-rw-r--r--Documentation/devicetree/bindings/mailbox/omap-mailbox.txt131
-rw-r--r--Documentation/devicetree/bindings/marvell.txt516
-rw-r--r--Documentation/devicetree/bindings/media/atmel-isi.txt51
-rw-r--r--Documentation/devicetree/bindings/media/coda.txt30
-rw-r--r--Documentation/devicetree/bindings/media/exynos-fimc-lite.txt16
-rw-r--r--Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt15
-rw-r--r--Documentation/devicetree/bindings/media/exynos4-fimc-is.txt49
-rw-r--r--Documentation/devicetree/bindings/media/exynos5-gsc.txt30
-rw-r--r--Documentation/devicetree/bindings/media/gpio-ir-receiver.txt16
-rw-r--r--Documentation/devicetree/bindings/media/hix5hd2-ir.txt25
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7343.txt48
-rw-r--r--Documentation/devicetree/bindings/media/i2c/adv7604.txt70
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mt9m111.txt28
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mt9p031.txt40
-rw-r--r--Documentation/devicetree/bindings/media/i2c/mt9v032.txt39
-rw-r--r--Documentation/devicetree/bindings/media/i2c/nokia,smia.txt63
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov2640.txt46
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ov2659.txt38
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ths8200.txt19
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tvp514x.txt44
-rw-r--r--Documentation/devicetree/bindings/media/i2c/tvp7002.txt53
-rw-r--r--Documentation/devicetree/bindings/media/img-ir-rev1.txt34
-rw-r--r--Documentation/devicetree/bindings/media/meson-ir.txt14
-rw-r--r--Documentation/devicetree/bindings/media/pxa-camera.txt43
-rw-r--r--Documentation/devicetree/bindings/media/rcar_vin.txt88
-rw-r--r--Documentation/devicetree/bindings/media/renesas,vsp1.txt43
-rw-r--r--Documentation/devicetree/bindings/media/s5p-mfc.txt51
-rw-r--r--Documentation/devicetree/bindings/media/samsung-fimc.txt211
-rw-r--r--Documentation/devicetree/bindings/media/samsung-mipi-csis.txt81
-rw-r--r--Documentation/devicetree/bindings/media/samsung-s5c73m3.txt97
-rw-r--r--Documentation/devicetree/bindings/media/samsung-s5k5baf.txt58
-rw-r--r--Documentation/devicetree/bindings/media/samsung-s5k6a3.txt33
-rw-r--r--Documentation/devicetree/bindings/media/sh_mobile_ceu.txt18
-rw-r--r--Documentation/devicetree/bindings/media/si4713.txt30
-rw-r--r--Documentation/devicetree/bindings/media/st-rc.txt29
-rw-r--r--Documentation/devicetree/bindings/media/sunxi-ir.txt25
-rw-r--r--Documentation/devicetree/bindings/media/ti,omap3isp.txt71
-rw-r--r--Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt61
-rw-r--r--Documentation/devicetree/bindings/media/video-interfaces.txt239
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/video.txt35
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/xlnx,v-tc.txt33
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt71
-rw-r--r--Documentation/devicetree/bindings/media/xilinx/xlnx,video.txt55
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt79
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt75
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mvebu-devbus.txt178
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/mvebu-sdram-controller.txt21
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt36
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/renesas-memory-controllers.txt44
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/synopsys.txt11
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt210
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/ti/emif.txt55
-rw-r--r--Documentation/devicetree/bindings/metag/meta-intc.txt82
-rw-r--r--Documentation/devicetree/bindings/metag/meta.txt30
-rw-r--r--Documentation/devicetree/bindings/metag/pdc-intc.txt105
-rw-r--r--Documentation/devicetree/bindings/mfd/88pm860x.txt85
-rw-r--r--Documentation/devicetree/bindings/mfd/ab8500.txt159
-rw-r--r--Documentation/devicetree/bindings/mfd/arizona.txt95
-rw-r--r--Documentation/devicetree/bindings/mfd/as3711.txt73
-rw-r--r--Documentation/devicetree/bindings/mfd/as3722.txt213
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-gpbr.txt15
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt51
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-matrix.txt24
-rw-r--r--Documentation/devicetree/bindings/mfd/atmel-smc.txt19
-rw-r--r--Documentation/devicetree/bindings/mfd/axp20x.txt96
-rw-r--r--Documentation/devicetree/bindings/mfd/bfticu.txt25
-rw-r--r--Documentation/devicetree/bindings/mfd/brcm,bcm59056.txt39
-rw-r--r--Documentation/devicetree/bindings/mfd/cros-ec.txt65
-rw-r--r--Documentation/devicetree/bindings/mfd/da9052-i2c.txt60
-rw-r--r--Documentation/devicetree/bindings/mfd/da9055.txt72
-rw-r--r--Documentation/devicetree/bindings/mfd/da9063.txt93
-rw-r--r--Documentation/devicetree/bindings/mfd/da9150.txt43
-rw-r--r--Documentation/devicetree/bindings/mfd/hi6421.txt38
-rw-r--r--Documentation/devicetree/bindings/mfd/lp3943.txt33
-rw-r--r--Documentation/devicetree/bindings/mfd/max14577.txt146
-rw-r--r--Documentation/devicetree/bindings/mfd/max77686.txt82
-rw-r--r--Documentation/devicetree/bindings/mfd/max77693.txt121
-rw-r--r--Documentation/devicetree/bindings/mfd/max8925.txt64
-rw-r--r--Documentation/devicetree/bindings/mfd/max8998.txt119
-rw-r--r--Documentation/devicetree/bindings/mfd/mc13xxx.txt158
-rw-r--r--Documentation/devicetree/bindings/mfd/mt6397.txt70
-rw-r--r--Documentation/devicetree/bindings/mfd/omap-usb-host.txt103
-rw-r--r--Documentation/devicetree/bindings/mfd/omap-usb-tll.txt27
-rw-r--r--Documentation/devicetree/bindings/mfd/palmas.txt53
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt75
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom,tcsr.txt22
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom-pm8xxx.txt97
-rw-r--r--Documentation/devicetree/bindings/mfd/qcom-rpm.txt264
-rw-r--r--Documentation/devicetree/bindings/mfd/qriox.txt17
-rw-r--r--Documentation/devicetree/bindings/mfd/rk808.txt177
-rw-r--r--Documentation/devicetree/bindings/mfd/rn5t618.txt36
-rw-r--r--Documentation/devicetree/bindings/mfd/s2mpa01.txt90
-rw-r--r--Documentation/devicetree/bindings/mfd/s2mps11.txt139
-rw-r--r--Documentation/devicetree/bindings/mfd/sky81452.txt35
-rw-r--r--Documentation/devicetree/bindings/mfd/stmpe.txt29
-rw-r--r--Documentation/devicetree/bindings/mfd/sun6i-prcm.txt59
-rw-r--r--Documentation/devicetree/bindings/mfd/syscon.txt20
-rw-r--r--Documentation/devicetree/bindings/mfd/tc3589x.txt107
-rw-r--r--Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt19
-rwxr-xr-xDocumentation/devicetree/bindings/mfd/tps6507x.txt91
-rw-r--r--Documentation/devicetree/bindings/mfd/tps65910.txt201
-rw-r--r--Documentation/devicetree/bindings/mfd/twl-familly.txt47
-rw-r--r--Documentation/devicetree/bindings/mfd/twl4030-audio.txt46
-rw-r--r--Documentation/devicetree/bindings/mfd/twl4030-power.txt48
-rw-r--r--Documentation/devicetree/bindings/mfd/twl6040.txt67
-rw-r--r--Documentation/devicetree/bindings/mipi/dsi/mipi-dsi-bus.txt98
-rw-r--r--Documentation/devicetree/bindings/mipi/nvidia,tegra114-mipi.txt41
-rw-r--r--Documentation/devicetree/bindings/mips/brcm/brcm,bmips.txt8
-rw-r--r--Documentation/devicetree/bindings/mips/brcm/soc.txt12
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/bootbus.txt126
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/cib.txt43
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/ciu.txt26
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/ciu2.txt27
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/dma-engine.txt21
-rw-r--r--Documentation/devicetree/bindings/mips/cavium/uctl.txt46
-rw-r--r--Documentation/devicetree/bindings/mips/cpu_irq.txt47
-rw-r--r--Documentation/devicetree/bindings/mips/img/pistachio.txt42
-rw-r--r--Documentation/devicetree/bindings/mips/ralink.txt17
-rw-r--r--Documentation/devicetree/bindings/misc/allwinner,sunxi-sid.txt17
-rw-r--r--Documentation/devicetree/bindings/misc/arm-charlcd.txt18
-rw-r--r--Documentation/devicetree/bindings/misc/at25.txt35
-rw-r--r--Documentation/devicetree/bindings/misc/atmel-ssc.txt49
-rw-r--r--Documentation/devicetree/bindings/misc/bmp085.txt24
-rw-r--r--Documentation/devicetree/bindings/misc/brcm,kona-smc.txt15
-rw-r--r--Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt40
-rw-r--r--Documentation/devicetree/bindings/misc/ifm-csi.txt41
-rw-r--r--Documentation/devicetree/bindings/misc/lis302.txt119
-rw-r--r--Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt12
-rw-r--r--Documentation/devicetree/bindings/misc/sram.txt51
-rw-r--r--Documentation/devicetree/bindings/misc/ti,dac7512.txt20
-rw-r--r--Documentation/devicetree/bindings/mmc/arasan,sdhci.txt27
-rw-r--r--Documentation/devicetree/bindings/mmc/atmel-hsmci.txt73
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhci.txt18
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt21
-rw-r--r--Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/davinci_mmc.txt33
-rw-r--r--Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt93
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-esdhc.txt36
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt41
-rw-r--r--Documentation/devicetree/bindings/mmc/fsl-imx-mmc.txt24
-rw-r--r--Documentation/devicetree/bindings/mmc/img-dw-mshc.txt29
-rw-r--r--Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt44
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-card.txt31
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.txt25
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt25
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt31
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc.txt140
-rw-r--r--Documentation/devicetree/bindings/mmc/mmci.txt61
-rw-r--r--Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt30
-rw-r--r--Documentation/devicetree/bindings/mmc/mxs-mmc.txt27
-rw-r--r--Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt38
-rw-r--r--Documentation/devicetree/bindings/mmc/orion-sdio.txt17
-rw-r--r--Documentation/devicetree/bindings/mmc/pxa-mmc.txt25
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,mmcif.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt25
-rw-r--r--Documentation/devicetree/bindings/mmc/samsung-sdhci.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-dove.txt14
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-fujitsu.txt30
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.txt55
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-pxa.txt50
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-sirf.txt18
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-spear.txt18
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-st.txt113
-rw-r--r--Documentation/devicetree/bindings/mmc/socfpga-dw-mshc.txt23
-rw-r--r--Documentation/devicetree/bindings/mmc/sunxi-mmc.txt43
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt109
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt112
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap.txt54
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt27
-rw-r--r--Documentation/devicetree/bindings/mmc/usdhi6rol0.txt33
-rw-r--r--Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt23
-rw-r--r--Documentation/devicetree/bindings/mtd/arm-versatile.txt8
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-dataflash.txt17
-rw-r--r--Documentation/devicetree/bindings/mtd/atmel-nand.txt111
-rw-r--r--Documentation/devicetree/bindings/mtd/davinci-nand.txt94
-rw-r--r--Documentation/devicetree/bindings/mtd/denali-nand.txt23
-rw-r--r--Documentation/devicetree/bindings/mtd/diskonchip.txt15
-rw-r--r--Documentation/devicetree/bindings/mtd/elm.txt16
-rw-r--r--Documentation/devicetree/bindings/mtd/flctl-nand.txt49
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-quadspi.txt35
-rw-r--r--Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt67
-rw-r--r--Documentation/devicetree/bindings/mtd/fsmc-nand.txt54
-rw-r--r--Documentation/devicetree/bindings/mtd/gpio-control-nand.txt47
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-nand.txt137
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-nor.txt98
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmc-onenand.txt46
-rw-r--r--Documentation/devicetree/bindings/mtd/gpmi-nand.txt58
-rw-r--r--Documentation/devicetree/bindings/mtd/hisi504-nand.txt47
-rw-r--r--Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt32
-rw-r--r--Documentation/devicetree/bindings/mtd/lpc32xx-mlc.txt50
-rw-r--r--Documentation/devicetree/bindings/mtd/lpc32xx-slc.txt52
-rw-r--r--Documentation/devicetree/bindings/mtd/mtd-physmap.txt89
-rw-r--r--Documentation/devicetree/bindings/mtd/mxc-nand.txt19
-rw-r--r--Documentation/devicetree/bindings/mtd/nand.txt21
-rw-r--r--Documentation/devicetree/bindings/mtd/orion-nand.txt50
-rw-r--r--Documentation/devicetree/bindings/mtd/partition.txt71
-rw-r--r--Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt43
-rw-r--r--Documentation/devicetree/bindings/mtd/spear_smi.txt31
-rw-r--r--Documentation/devicetree/bindings/mtd/st-fsm.txt26
-rw-r--r--Documentation/devicetree/bindings/mtd/sunxi-nand.txt45
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt19
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt27
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt27
-rw-r--r--Documentation/devicetree/bindings/net/altera_tse.txt114
-rw-r--r--Documentation/devicetree/bindings/net/amd-xgbe-phy.txt48
-rw-r--r--Documentation/devicetree/bindings/net/amd-xgbe.txt47
-rw-r--r--Documentation/devicetree/bindings/net/apm-xgene-enet.txt76
-rw-r--r--Documentation/devicetree/bindings/net/arc_emac.txt39
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt78
-rw-r--r--Documentation/devicetree/bindings/net/brcm,bcmgenet.txt121
-rw-r--r--Documentation/devicetree/bindings/net/brcm,systemport.txt30
-rw-r--r--Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt39
-rw-r--r--Documentation/devicetree/bindings/net/broadcom-bcm87xx.txt29
-rw-r--r--Documentation/devicetree/bindings/net/calxeda-xgmac.txt18
-rw-r--r--Documentation/devicetree/bindings/net/can/atmel-can.txt14
-rw-r--r--Documentation/devicetree/bindings/net/can/c_can.txt54
-rw-r--r--Documentation/devicetree/bindings/net/can/cc770.txt53
-rw-r--r--Documentation/devicetree/bindings/net/can/fsl-flexcan.txt29
-rw-r--r--Documentation/devicetree/bindings/net/can/grcan.txt28
-rw-r--r--Documentation/devicetree/bindings/net/can/m_can.txt67
-rw-r--r--Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt25
-rw-r--r--Documentation/devicetree/bindings/net/can/mpc5xxx-mscan.txt53
-rw-r--r--Documentation/devicetree/bindings/net/can/rcar_can.txt43
-rw-r--r--Documentation/devicetree/bindings/net/can/sja1000.txt57
-rw-r--r--Documentation/devicetree/bindings/net/can/xilinx_can.txt44
-rw-r--r--Documentation/devicetree/bindings/net/cavium-mdio.txt27
-rw-r--r--Documentation/devicetree/bindings/net/cavium-mix.txt34
-rw-r--r--Documentation/devicetree/bindings/net/cavium-pip.txt93
-rw-r--r--Documentation/devicetree/bindings/net/cdns-emac.txt20
-rw-r--r--Documentation/devicetree/bindings/net/cpsw-phy-sel.txt30
-rw-r--r--Documentation/devicetree/bindings/net/cpsw.txt104
-rw-r--r--Documentation/devicetree/bindings/net/davicom-dm9000.txt28
-rw-r--r--Documentation/devicetree/bindings/net/davinci-mdio.txt33
-rw-r--r--Documentation/devicetree/bindings/net/davinci_emac.txt41
-rw-r--r--Documentation/devicetree/bindings/net/dsa/dsa.txt117
-rw-r--r--Documentation/devicetree/bindings/net/emac_rockchip.txt50
-rw-r--r--Documentation/devicetree/bindings/net/ethernet.txt31
-rw-r--r--Documentation/devicetree/bindings/net/fixed-link.txt42
-rw-r--r--Documentation/devicetree/bindings/net/fsl-fec.txt62
-rw-r--r--Documentation/devicetree/bindings/net/fsl-tsec-phy.txt147
-rw-r--r--Documentation/devicetree/bindings/net/gpmc-eth.txt97
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt88
-rw-r--r--Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt36
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt27
-rw-r--r--Documentation/devicetree/bindings/net/ieee802154/cc2520.txt33
-rw-r--r--Documentation/devicetree/bindings/net/keystone-netcp.txt217
-rw-r--r--Documentation/devicetree/bindings/net/lpc-eth.txt23
-rw-r--r--Documentation/devicetree/bindings/net/macb.txt30
-rw-r--r--Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt21
-rw-r--r--Documentation/devicetree/bindings/net/marvell-orion-mdio.txt39
-rw-r--r--Documentation/devicetree/bindings/net/marvell-orion-net.txt82
-rw-r--r--Documentation/devicetree/bindings/net/marvell-pp2.txt61
-rw-r--r--Documentation/devicetree/bindings/net/marvell-pxa168.txt36
-rw-r--r--Documentation/devicetree/bindings/net/mdio-gpio.txt26
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-gpio.txt127
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt75
-rw-r--r--Documentation/devicetree/bindings/net/mdio-mux.txt136
-rw-r--r--Documentation/devicetree/bindings/net/meson-dwmac.txt25
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ks8851.txt18
-rw-r--r--Documentation/devicetree/bindings/net/micrel-ksz90x1.txt83
-rw-r--r--Documentation/devicetree/bindings/net/micrel.txt37
-rw-r--r--Documentation/devicetree/bindings/net/moxa,moxart-mac.txt21
-rw-r--r--Documentation/devicetree/bindings/net/nfc/nxp-nci.txt35
-rw-r--r--Documentation/devicetree/bindings/net/nfc/pn544.txt35
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st21nfca.txt40
-rw-r--r--Documentation/devicetree/bindings/net/nfc/st21nfcb.txt33
-rw-r--r--Documentation/devicetree/bindings/net/nfc/trf7970a.txt44
-rw-r--r--Documentation/devicetree/bindings/net/opencores-ethoc.txt22
-rw-r--r--Documentation/devicetree/bindings/net/phy.txt40
-rw-r--r--Documentation/devicetree/bindings/net/qca-qca7000-spi.txt47
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.txt68
-rw-r--r--Documentation/devicetree/bindings/net/samsung-sxgbe.txt52
-rw-r--r--Documentation/devicetree/bindings/net/sh_eth.txt57
-rw-r--r--Documentation/devicetree/bindings/net/smsc-lan91c111.txt15
-rw-r--r--Documentation/devicetree/bindings/net/smsc911x.txt35
-rw-r--r--Documentation/devicetree/bindings/net/socfpga-dwmac.txt31
-rw-r--r--Documentation/devicetree/bindings/net/sti-dwmac.txt61
-rw-r--r--Documentation/devicetree/bindings/net/stmmac.txt68
-rw-r--r--Documentation/devicetree/bindings/net/via-rhine.txt17
-rw-r--r--Documentation/devicetree/bindings/net/via-velocity.txt20
-rw-r--r--Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt41
-rw-r--r--Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt30
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt39
-rw-r--r--Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt47
-rw-r--r--Documentation/devicetree/bindings/nios2/nios2.txt62
-rw-r--r--Documentation/devicetree/bindings/nios2/timer.txt19
-rw-r--r--Documentation/devicetree/bindings/nvec/nvidia,nvec.txt21
-rw-r--r--Documentation/devicetree/bindings/open-pic.txt98
-rw-r--r--Documentation/devicetree/bindings/panel/ampire,am800480r3tmqwa1h.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/auo,b101aw03.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/auo,b101ean01.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/auo,b101xtn01.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/auo,b116xw03.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/auo,b133htn01.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/auo,b133xtn01.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/chunghwa,claa101wa01a.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/chunghwa,claa101wb03.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/edt,et057090dhu.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/edt,et070080dh6.txt10
-rw-r--r--Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt10
-rw-r--r--Documentation/devicetree/bindings/panel/foxlink,fl500wvr00-a0t.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/innolux,at043tn24.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/innolux,n116bge.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/innolux,n156bge-l21.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/innolux,zj070na-01p.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/lg,ld070wx3-sl01.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/lg,lh500wx1-sd03.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/lg,lp129qe.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/ortustech,com43h4m85ulc.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/panasonic,vvx10f004b00.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/samsung,ld9040.txt66
-rw-r--r--Documentation/devicetree/bindings/panel/samsung,ltn101nt05.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/samsung,ltn140at29-301.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/samsung,s6e8aa0.txt56
-rw-r--r--Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt49
-rw-r--r--Documentation/devicetree/bindings/panel/shelly,sca07010-bfn-lnn.txt7
-rw-r--r--Documentation/devicetree/bindings/panel/simple-panel.txt21
-rw-r--r--Documentation/devicetree/bindings/pci/83xx-512x-pci.txt40
-rw-r--r--Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt63
-rw-r--r--Documentation/devicetree/bindings/pci/designware-pcie.txt28
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt40
-rw-r--r--Documentation/devicetree/bindings/pci/fsl,pci.txt27
-rw-r--r--Documentation/devicetree/bindings/pci/host-generic-pci.txt100
-rw-r--r--Documentation/devicetree/bindings/pci/layerscape-pci.txt42
-rw-r--r--Documentation/devicetree/bindings/pci/mvebu-pci.txt304
-rw-r--r--Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt224
-rw-r--r--Documentation/devicetree/bindings/pci/pci-keystone.txt63
-rw-r--r--Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt66
-rw-r--r--Documentation/devicetree/bindings/pci/pci.txt20
-rw-r--r--Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt190
-rw-r--r--Documentation/devicetree/bindings/pci/rcar-pci.txt47
-rw-r--r--Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt65
-rw-r--r--Documentation/devicetree/bindings/pci/spear13xx-pcie.txt14
-rw-r--r--Documentation/devicetree/bindings/pci/ti-pci.txt59
-rw-r--r--Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt15
-rw-r--r--Documentation/devicetree/bindings/pci/versatile.txt59
-rw-r--r--Documentation/devicetree/bindings/pci/xgene-pci.txt57
-rw-r--r--Documentation/devicetree/bindings/pci/xilinx-pcie.txt62
-rw-r--r--Documentation/devicetree/bindings/phy/apm-xgene-phy.txt79
-rw-r--r--Documentation/devicetree/bindings/phy/berlin-sata-phy.txt36
-rw-r--r--Documentation/devicetree/bindings/phy/berlin-usb-phy.txt16
-rw-r--r--Documentation/devicetree/bindings/phy/brcm,kona-usb2-phy.txt15
-rw-r--r--Documentation/devicetree/bindings/phy/dm816x-phy.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/hix5hd2-phy.txt22
-rw-r--r--Documentation/devicetree/bindings/phy/phy-bindings.txt70
-rw-r--r--Documentation/devicetree/bindings/phy/phy-miphy28lp.txt117
-rw-r--r--Documentation/devicetree/bindings/phy/phy-miphy365x.txt77
-rw-r--r--Documentation/devicetree/bindings/phy/phy-mvebu.txt43
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stih407-usb.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/phy-stih41x-usb.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-apq8064-sata-phy.txt24
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-dwc3-usb-phy.txt39
-rw-r--r--Documentation/devicetree/bindings/phy/qcom-ipq806x-sata-phy.txt23
-rw-r--r--Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt51
-rw-r--r--Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt37
-rw-r--r--Documentation/devicetree/bindings/phy/samsung-phy.txt176
-rw-r--r--Documentation/devicetree/bindings/phy/st-spear-miphy.txt15
-rw-r--r--Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt37
-rw-r--r--Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt38
-rw-r--r--Documentation/devicetree/bindings/phy/ti-phy.txt102
-rw-r--r--Documentation/devicetree/bindings/pinctrl/abilis,tb10x-iomux.txt80
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt68
-rw-r--r--Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt150
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt461
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt74
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt98
-rw-r--r--Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt132
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt94
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx25-pinctrl.txt23
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx27-pinctrl.txt121
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.txt33
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx51-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx53-pinctrl.txt32
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6dl-pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt38
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sl-pinctrl.txt39
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,imx6sx-pinctrl.txt36
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,mxs-pinctrl.txt127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/fsl,vf610-pinctrl.txt41
-rw-r--r--Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt127
-rw-r--r--Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt227
-rw-r--r--Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt83
-rw-r--r--Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt97
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt96
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt82
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt80
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-39x-pinctrl.txt78
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt95
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,dove-pinctrl.txt90
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt319
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt46
-rw-r--r--Documentation/devicetree/bindings/pinctrl/marvell,orion-pinctrl.txt91
-rw-r--r--Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt96
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra114-pinmux.txt131
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt153
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt129
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt143
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.txt166
-rw-r--r--Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt144
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt236
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt145
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-palmas.txt96
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt252
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt47
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-st.txt171
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-vt8500.txt57
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt155
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt88
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt179
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt95
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt186
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt181
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt112
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt217
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt164
-rw-r--r--Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt167
-rw-r--r--Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt157
-rw-r--r--Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt356
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ste,abx500.txt318
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt145
-rw-r--r--Documentation/devicetree/bindings/pinctrl/ti,omap-pinctrl.txt13
-rw-r--r--Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt104
-rw-r--r--Documentation/devicetree/bindings/power/bq2415x.txt47
-rw-r--r--Documentation/devicetree/bindings/power/da9150-charger.txt26
-rw-r--r--Documentation/devicetree/bindings/power/fsl,imx-gpc.txt59
-rw-r--r--Documentation/devicetree/bindings/power/isp1704.txt17
-rw-r--r--Documentation/devicetree/bindings/power/ltc2941.txt27
-rw-r--r--Documentation/devicetree/bindings/power/opp.txt25
-rw-r--r--Documentation/devicetree/bindings/power/power-controller.txt18
-rw-r--r--Documentation/devicetree/bindings/power/power_domain.txt78
-rw-r--r--Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt100
-rw-r--r--Documentation/devicetree/bindings/power/reset/keystone-reset.txt67
-rw-r--r--Documentation/devicetree/bindings/power/reset/ltc2952-poweroff.txt29
-rw-r--r--Documentation/devicetree/bindings/power/reset/st-reset.txt11
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt23
-rw-r--r--Documentation/devicetree/bindings/power/reset/syscon-reboot.txt23
-rw-r--r--Documentation/devicetree/bindings/power/rockchip-io-domain.txt83
-rw-r--r--Documentation/devicetree/bindings/power/rx51-battery.txt25
-rw-r--r--Documentation/devicetree/bindings/power/twl-charger.txt20
-rw-r--r--Documentation/devicetree/bindings/power_supply/ab8500/btemp.txt16
-rw-r--r--Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt16
-rw-r--r--Documentation/devicetree/bindings/power_supply/ab8500/charger.txt25
-rw-r--r--Documentation/devicetree/bindings/power_supply/ab8500/fg.txt58
-rw-r--r--Documentation/devicetree/bindings/power_supply/axxia-reset.txt20
-rw-r--r--Documentation/devicetree/bindings/power_supply/charger-manager.txt81
-rw-r--r--Documentation/devicetree/bindings/power_supply/gpio-charger.txt27
-rw-r--r--Documentation/devicetree/bindings/power_supply/imx-snvs-poweroff.txt23
-rw-r--r--Documentation/devicetree/bindings/power_supply/lp8727_charger.txt44
-rw-r--r--Documentation/devicetree/bindings/power_supply/max17042_battery.txt18
-rw-r--r--Documentation/devicetree/bindings/power_supply/max8925_batter.txt18
-rw-r--r--Documentation/devicetree/bindings/power_supply/msm-poweroff.txt17
-rw-r--r--Documentation/devicetree/bindings/power_supply/olpc_battery.txt5
-rw-r--r--Documentation/devicetree/bindings/power_supply/power_supply.txt23
-rw-r--r--Documentation/devicetree/bindings/power_supply/qnap-poweroff.txt16
-rw-r--r--Documentation/devicetree/bindings/power_supply/restart-poweroff.txt8
-rw-r--r--Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt23
-rw-r--r--Documentation/devicetree/bindings/power_supply/ti,bq24735.txt32
-rw-r--r--Documentation/devicetree/bindings/power_supply/tps65090.txt17
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/akebono.txt54
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/cpm.txt52
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/emac.txt148
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/hsta.txt19
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/ndfc.txt39
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/ppc440spe-adma.txt93
-rw-r--r--Documentation/devicetree/bindings/powerpc/4xx/reboot.txt18
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/board.txt102
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cache_sram.txt20
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/ccf.txt46
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt67
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt21
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt41
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt18
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt15
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt38
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt43
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt115
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt24
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt51
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt57
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt70
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt37
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt32
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/cpus.txt33
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/dcsr.txt395
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/diu.txt34
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/dma.txt208
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/ecm.txt64
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/fman.txt604
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/gtm.txt31
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/guts.txt36
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt309
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt23
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/lbc.txt43
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mcm.txt64
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mcu-mpc8349emitx.txt17
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt27
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpc5121-psc.txt70
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt198
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt63
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt38
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/mpic.txt231
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt116
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/pamu.txt150
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/pmc.txt63
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/raideng.txt81
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt163
-rw-r--r--Documentation/devicetree/bindings/powerpc/fsl/srio.txt103
-rw-r--r--Documentation/devicetree/bindings/powerpc/nintendo/gamecube.txt109
-rw-r--r--Documentation/devicetree/bindings/powerpc/nintendo/wii.txt184
-rw-r--r--Documentation/devicetree/bindings/pps/pps-gpio.txt20
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt29
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-pwm.txt33
-rw-r--r--Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt16
-rw-r--r--Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt21
-rw-r--r--Documentation/devicetree/bindings/pwm/cirrus,clps711x-pwm.txt16
-rw-r--r--Documentation/devicetree/bindings/pwm/img-pwm.txt24
-rw-r--r--Documentation/devicetree/bindings/pwm/imx-pwm.txt27
-rw-r--r--Documentation/devicetree/bindings/pwm/lpc32xx-pwm.txt12
-rw-r--r--Documentation/devicetree/bindings/pwm/mxs-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt27
-rw-r--r--Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt27
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt30
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt52
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-lp3943.txt58
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-rockchip.txt20
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-samsung.txt51
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-st.txt41
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sun4i.txt20
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiecap.txt29
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt29
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt31
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm.txt69
-rw-r--r--Documentation/devicetree/bindings/pwm/pxa-pwm.txt30
-rw-r--r--Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt28
-rw-r--r--Documentation/devicetree/bindings/pwm/spear-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt17
-rw-r--r--Documentation/devicetree/bindings/pwm/vt8500-pwm.txt18
-rw-r--r--Documentation/devicetree/bindings/regmap/regmap.txt47
-rw-r--r--Documentation/devicetree/bindings/regulator/88pm800.txt38
-rw-r--r--Documentation/devicetree/bindings/regulator/88pm860x.txt30
-rw-r--r--Documentation/devicetree/bindings/regulator/act8865-regulator.txt92
-rw-r--r--Documentation/devicetree/bindings/regulator/anatop-regulator.txt37
-rw-r--r--Documentation/devicetree/bindings/regulator/as3722-regulator.txt91
-rw-r--r--Documentation/devicetree/bindings/regulator/da9210.txt21
-rw-r--r--Documentation/devicetree/bindings/regulator/da9211.txt68
-rw-r--r--Documentation/devicetree/bindings/regulator/fan53555.txt23
-rw-r--r--Documentation/devicetree/bindings/regulator/fixed-regulator.txt34
-rw-r--r--Documentation/devicetree/bindings/regulator/gpio-regulator.txt41
-rw-r--r--Documentation/devicetree/bindings/regulator/isl9305.txt36
-rw-r--r--Documentation/devicetree/bindings/regulator/lp872x.txt160
-rw-r--r--Documentation/devicetree/bindings/regulator/ltc3589.txt99
-rw-r--r--Documentation/devicetree/bindings/regulator/max1586-regulator.txt28
-rw-r--r--Documentation/devicetree/bindings/regulator/max77802.txt88
-rw-r--r--Documentation/devicetree/bindings/regulator/max8660.txt47
-rw-r--r--Documentation/devicetree/bindings/regulator/max8907.txt69
-rw-r--r--Documentation/devicetree/bindings/regulator/max8925-regulator.txt40
-rw-r--r--Documentation/devicetree/bindings/regulator/max8952.txt52
-rw-r--r--Documentation/devicetree/bindings/regulator/max8973-regulator.txt21
-rw-r--r--Documentation/devicetree/bindings/regulator/max8997-regulator.txt146
-rw-r--r--Documentation/devicetree/bindings/regulator/mt6397-regulator.txt217
-rw-r--r--Documentation/devicetree/bindings/regulator/palmas-pmic.txt82
-rw-r--r--Documentation/devicetree/bindings/regulator/pbias-regulator.txt27
-rw-r--r--Documentation/devicetree/bindings/regulator/pfuze100.txt299
-rw-r--r--Documentation/devicetree/bindings/regulator/pwm-regulator.txt27
-rw-r--r--Documentation/devicetree/bindings/regulator/regulator.txt92
-rw-r--r--Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt163
-rw-r--r--Documentation/devicetree/bindings/regulator/sky81452-regulator.txt18
-rw-r--r--Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt132
-rw-r--r--Documentation/devicetree/bindings/regulator/tps51632-regulator.txt27
-rw-r--r--Documentation/devicetree/bindings/regulator/tps62360-regulator.txt44
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65090.txt126
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65217.txt78
-rw-r--r--Documentation/devicetree/bindings/regulator/tps65218.txt23
-rw-r--r--Documentation/devicetree/bindings/regulator/tps6586x.txt135
-rw-r--r--Documentation/devicetree/bindings/regulator/twl-regulator.txt67
-rw-r--r--Documentation/devicetree/bindings/regulator/vexpress.txt32
-rw-r--r--Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt133
-rw-r--r--Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt21
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,bcm21664-resetmgr.txt14
-rw-r--r--Documentation/devicetree/bindings/reset/fsl,imx-src.txt49
-rw-r--r--Documentation/devicetree/bindings/reset/reset.txt75
-rw-r--r--Documentation/devicetree/bindings/reset/sirf,rstc.txt42
-rw-r--r--Documentation/devicetree/bindings/reset/socfpga-reset.txt13
-rw-r--r--Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt42
-rw-r--r--Documentation/devicetree/bindings/reset/st,sti-powerdown.txt47
-rw-r--r--Documentation/devicetree/bindings/reset/st,sti-softreset.txt46
-rw-r--r--Documentation/devicetree/bindings/resource-names.txt54
-rw-r--r--Documentation/devicetree/bindings/rng/apm,rng.txt17
-rw-r--r--Documentation/devicetree/bindings/rng/brcm,bcm2835.txt13
-rw-r--r--Documentation/devicetree/bindings/rng/qcom,prng.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/abracon,abx80x.txt30
-rw-r--r--Documentation/devicetree/bindings/rtc/armada-380-rtc.txt22
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt23
-rw-r--r--Documentation/devicetree/bindings/rtc/dallas,ds1339.txt18
-rw-r--r--Documentation/devicetree/bindings/rtc/digicolor-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/dw-apb.txt32
-rw-r--r--Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt30
-rw-r--r--Documentation/devicetree/bindings/rtc/imxdi-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/isil,isl12057.txt78
-rw-r--r--Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/maxim,ds1742.txt12
-rw-r--r--Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt24
-rw-r--r--Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt16
-rw-r--r--Documentation/devicetree/bindings/rtc/olpc-xo1-rtc.txt5
-rw-r--r--Documentation/devicetree/bindings/rtc/orion-rtc.txt18
-rw-r--r--Documentation/devicetree/bindings/rtc/pxa-rtc.txt14
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-cmos.txt28
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-omap.txt28
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-opal.txt16
-rw-r--r--Documentation/devicetree/bindings/rtc/rtc-palmas.txt33
-rw-r--r--Documentation/devicetree/bindings/rtc/s3c-rtc.txt23
-rw-r--r--Documentation/devicetree/bindings/rtc/sa1100-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/snvs-rtc.txt1
-rw-r--r--Documentation/devicetree/bindings/rtc/spear-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/stmp3xxx-rtc.txt21
-rw-r--r--Documentation/devicetree/bindings/rtc/sun6i-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/sunxi-rtc.txt17
-rw-r--r--Documentation/devicetree/bindings/rtc/twl-rtc.txt12
-rw-r--r--Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt15
-rw-r--r--Documentation/devicetree/bindings/rtc/xgene-rtc.txt28
-rw-r--r--Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt36
-rw-r--r--Documentation/devicetree/bindings/security/tpm/st33zp24-spi.txt34
-rw-r--r--Documentation/devicetree/bindings/serial/8250.txt66
-rw-r--r--Documentation/devicetree/bindings/serial/altera_jtaguart.txt5
-rw-r--r--Documentation/devicetree/bindings/serial/altera_uart.txt8
-rw-r--r--Documentation/devicetree/bindings/serial/arc-uart.txt26
-rw-r--r--Documentation/devicetree/bindings/serial/atmel-usart.txt60
-rw-r--r--Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt30
-rw-r--r--Documentation/devicetree/bindings/serial/cavium-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/cdns,uart.txt20
-rw-r--r--Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/digicolor-usart.txt27
-rw-r--r--Documentation/devicetree/bindings/serial/efm32-uart.txt20
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-imx-uart.txt29
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-lpuart.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/fsl-mxs-auart.txt45
-rw-r--r--Documentation/devicetree/bindings/serial/lantiq_asc.txt16
-rw-r--r--Documentation/devicetree/bindings/serial/maxim,max310x.txt36
-rw-r--r--Documentation/devicetree/bindings/serial/mrvl,pxa-ssp.txt65
-rw-r--r--Documentation/devicetree/bindings/serial/mrvl-serial.txt4
-rw-r--r--Documentation/devicetree/bindings/serial/mtk-uart.txt26
-rw-r--r--Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.txt37
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt33
-rw-r--r--Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt14
-rw-r--r--Documentation/devicetree/bindings/serial/omap_serial.txt30
-rw-r--r--Documentation/devicetree/bindings/serial/pl011.txt51
-rw-r--r--Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt34
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uart.txt25
-rw-r--r--Documentation/devicetree/bindings/serial/qcom,msm-uartdm.txt78
-rw-r--r--Documentation/devicetree/bindings/serial/renesas,sci-serial.txt59
-rw-r--r--Documentation/devicetree/bindings/serial/rs485.txt31
-rw-r--r--Documentation/devicetree/bindings/serial/samsung_uart.txt58
-rw-r--r--Documentation/devicetree/bindings/serial/sirf-uart.txt47
-rw-r--r--Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt73
-rw-r--r--Documentation/devicetree/bindings/serial/sprd-uart.txt7
-rw-r--r--Documentation/devicetree/bindings/serial/st-asc.txt18
-rw-r--r--Documentation/devicetree/bindings/serial/vt8500-uart.txt27
-rw-r--r--Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt23
-rw-r--r--Documentation/devicetree/bindings/serio/altera_ps2.txt5
-rw-r--r--Documentation/devicetree/bindings/serio/olpc,ap-sp.txt13
-rw-r--r--Documentation/devicetree/bindings/serio/snps-arc_ps2.txt16
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/bman-portals.txt56
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/bman.txt135
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/qman-portals.txt154
-rw-r--r--Documentation/devicetree/bindings/soc/fsl/qman.txt175
-rw-r--r--Documentation/devicetree/bindings/soc/mediatek/pwrap.txt58
-rw-r--r--Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.txt88
-rw-r--r--Documentation/devicetree/bindings/soc/ti/keystone-navigator-dma.txt111
-rw-r--r--Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt232
-rw-r--r--Documentation/devicetree/bindings/sound/adi,adau1701.txt35
-rw-r--r--Documentation/devicetree/bindings/sound/adi,axi-i2s.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/adi,ssm2602.txt19
-rw-r--r--Documentation/devicetree/bindings/sound/ak4104.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/ak4554.c11
-rw-r--r--Documentation/devicetree/bindings/sound/ak4642.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/ak5386.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/alc5623.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/alc5632.txt43
-rw-r--r--Documentation/devicetree/bindings/sound/armada-370db-audio.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/arndale.txt24
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-at91sam9g20ek-wm8731-audio.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-sam9x5-wm8731-audio.txt35
-rw-r--r--Documentation/devicetree/bindings/sound/atmel-wm8904.txt55
-rw-r--r--Documentation/devicetree/bindings/sound/atmel_ac97c.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/cdns,xtfpga-i2s.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/cs35l32.txt62
-rw-r--r--Documentation/devicetree/bindings/sound/cs4265.txt29
-rw-r--r--Documentation/devicetree/bindings/sound/cs4270.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/cs4271.txt50
-rw-r--r--Documentation/devicetree/bindings/sound/cs42l52.txt46
-rw-r--r--Documentation/devicetree/bindings/sound/cs42l56.txt63
-rw-r--r--Documentation/devicetree/bindings/sound/cs42l73.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/cs42xx8.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/da9055.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-evm-audio.txt49
-rw-r--r--Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/designware-i2s.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/es8328.txt38
-rw-r--r--Documentation/devicetree/bindings/sound/eukrea-tlv320.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,asrc.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,esai.txt58
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,spdif.txt58
-rw-r--r--Documentation/devicetree/bindings/sound/fsl,ssi.txt87
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-asoc-card.txt82
-rw-r--r--Documentation/devicetree/bindings/sound/fsl-sai.txt73
-rw-r--r--Documentation/devicetree/bindings/sound/hdmi.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-es8328.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt56
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-spdif.txt36
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audio-wm8962.txt53
-rw-r--r--Documentation/devicetree/bindings/sound/imx-audmux.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/ingenic,jz4740-i2s.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/max98090.txt51
-rw-r--r--Documentation/devicetree/bindings/sound/max98095.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/max98357a.txt14
-rw-r--r--Documentation/devicetree/bindings/sound/max98925.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/mrvl,pxa-ssp.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/mrvl,pxa2xx-pcm.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/mvebu-audio.txt34
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/mxs-saif.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/nokia,rx51.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-alc5632.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max98090.txt53
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5640.txt52
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5677.txt67
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-trimslice.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8753.txt40
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8903.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm9712.txt60
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.txt36
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-das.txt12
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra20-i2s.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt88
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt91
-rw-r--r--Documentation/devicetree/bindings/sound/omap-dmic.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/omap-mcbsp.txt37
-rw-r--r--Documentation/devicetree/bindings/sound/omap-mcpdm.txt21
-rw-r--r--Documentation/devicetree/bindings/sound/omap-twl4030.txt62
-rw-r--r--Documentation/devicetree/bindings/sound/pcm1792a.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/pcm512x.txt52
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-cpu.txt43
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,fsi.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsnd.txt216
-rw-r--r--Documentation/devicetree/bindings/sound/renesas,rsrc-card.txt67
-rw-r--r--Documentation/devicetree/bindings/sound/rockchip-i2s.txt37
-rw-r--r--Documentation/devicetree/bindings/sound/rt5631.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/rt5640.txt53
-rw-r--r--Documentation/devicetree/bindings/sound/rt5677.txt76
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,odroidx2-max98090.txt35
-rw-r--r--Documentation/devicetree/bindings/sound/samsung,smdk-wm8994.txt14
-rw-r--r--Documentation/devicetree/bindings/sound/samsung-i2s.txt82
-rw-r--r--Documentation/devicetree/bindings/sound/sgtl5000.txt39
-rw-r--r--Documentation/devicetree/bindings/sound/simple-card.txt160
-rw-r--r--Documentation/devicetree/bindings/sound/sirf-audio-codec.txt17
-rw-r--r--Documentation/devicetree/bindings/sound/sirf-audio-port.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/sirf-audio.txt41
-rw-r--r--Documentation/devicetree/bindings/sound/sirf-usp.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/snow.txt22
-rw-r--r--Documentation/devicetree/bindings/sound/soc-ac97link.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/spdif-receiver.txt10
-rw-r--r--Documentation/devicetree/bindings/sound/spdif-transmitter.txt10
-rw-r--r--Documentation/devicetree/bindings/sound/ssm2518.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/ssm4567.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/st,sta32x.txt92
-rw-r--r--Documentation/devicetree/bindings/sound/st,sta350.txt131
-rw-r--r--Documentation/devicetree/bindings/sound/storm.txt23
-rw-r--r--Documentation/devicetree/bindings/sound/tas2552.txt26
-rw-r--r--Documentation/devicetree/bindings/sound/tdm-slot.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/ti,pcm1681.txt15
-rw-r--r--Documentation/devicetree/bindings/sound/ti,tas5086.txt48
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic31xx.txt61
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic32x4.txt30
-rw-r--r--Documentation/devicetree/bindings/sound/tlv320aic3x.txt67
-rw-r--r--Documentation/devicetree/bindings/sound/tpa6130a2.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/ts3a227e.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/ux500-mop500.txt39
-rw-r--r--Documentation/devicetree/bindings/sound/ux500-msp.txt43
-rw-r--r--Documentation/devicetree/bindings/sound/widgets.txt20
-rw-r--r--Documentation/devicetree/bindings/sound/wm8510.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8523.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8580.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8711.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8728.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8731.txt27
-rw-r--r--Documentation/devicetree/bindings/sound/wm8737.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8741.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8750.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8753.txt40
-rw-r--r--Documentation/devicetree/bindings/sound/wm8770.txt16
-rw-r--r--Documentation/devicetree/bindings/sound/wm8776.txt18
-rw-r--r--Documentation/devicetree/bindings/sound/wm8804.txt25
-rw-r--r--Documentation/devicetree/bindings/sound/wm8903.txt69
-rw-r--r--Documentation/devicetree/bindings/sound/wm8904.txt33
-rw-r--r--Documentation/devicetree/bindings/sound/wm8960.txt31
-rw-r--r--Documentation/devicetree/bindings/sound/wm8962.txt39
-rw-r--r--Documentation/devicetree/bindings/sound/wm8994.txt78
-rw-r--r--Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt22
-rw-r--r--Documentation/devicetree/bindings/spi/efm32-spi.txt41
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt37
-rw-r--r--Documentation/devicetree/bindings/spi/fsl-spi.txt60
-rw-r--r--Documentation/devicetree/bindings/spi/mxs-spi.txt26
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt42
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt38
-rw-r--r--Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt38
-rw-r--r--Documentation/devicetree/bindings/spi/omap-spi.txt47
-rw-r--r--Documentation/devicetree/bindings/spi/qcom,spi-qup.txt103
-rw-r--r--Documentation/devicetree/bindings/spi/sh-hspi.txt29
-rw-r--r--Documentation/devicetree/bindings/spi/sh-msiof.txt71
-rw-r--r--Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt28
-rw-r--r--Documentation/devicetree/bindings/spi/spi-bus.txt94
-rw-r--r--Documentation/devicetree/bindings/spi/spi-cadence.txt31
-rw-r--r--Documentation/devicetree/bindings/spi/spi-davinci.txt88
-rw-r--r--Documentation/devicetree/bindings/spi/spi-dw.txt24
-rw-r--r--Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt57
-rw-r--r--Documentation/devicetree/bindings/spi/spi-gpio.txt31
-rw-r--r--Documentation/devicetree/bindings/spi/spi-img-spfi.txt38
-rw-r--r--Documentation/devicetree/bindings/spi/spi-meson.txt22
-rw-r--r--Documentation/devicetree/bindings/spi/spi-octeon.txt33
-rw-r--r--Documentation/devicetree/bindings/spi/spi-orion.txt19
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rockchip.txt45
-rw-r--r--Documentation/devicetree/bindings/spi/spi-rspi.txt69
-rw-r--r--Documentation/devicetree/bindings/spi/spi-samsung.txt109
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sc18is602.txt23
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sirf.txt41
-rw-r--r--Documentation/devicetree/bindings/spi/spi-st-ssc.txt40
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sun4i.txt24
-rw-r--r--Documentation/devicetree/bindings/spi/spi-sun6i.txt24
-rw-r--r--Documentation/devicetree/bindings/spi/spi-xtensa-xtfpga.txt9
-rw-r--r--Documentation/devicetree/bindings/spi/spi_altera.txt5
-rw-r--r--Documentation/devicetree/bindings/spi/spi_atmel.txt31
-rw-r--r--Documentation/devicetree/bindings/spi/spi_oc_tiny.txt12
-rw-r--r--Documentation/devicetree/bindings/spi/spi_pl022.txt70
-rw-r--r--Documentation/devicetree/bindings/spi/ti_qspi.txt28
-rw-r--r--Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.txt65
-rw-r--r--Documentation/devicetree/bindings/spmi/spmi.txt41
-rw-r--r--Documentation/devicetree/bindings/staging/iio/adc/lpc32xx-adc.txt16
-rw-r--r--Documentation/devicetree/bindings/staging/iio/adc/mxs-lradc.txt47
-rw-r--r--Documentation/devicetree/bindings/staging/iio/adc/spear-adc.txt26
-rw-r--r--Documentation/devicetree/bindings/submitting-patches.txt67
-rw-r--r--Documentation/devicetree/bindings/thermal/armada-thermal.txt24
-rw-r--r--Documentation/devicetree/bindings/thermal/db8500-thermal.txt44
-rw-r--r--Documentation/devicetree/bindings/thermal/dove-thermal.txt18
-rw-r--r--Documentation/devicetree/bindings/thermal/exynos-thermal.txt121
-rw-r--r--Documentation/devicetree/bindings/thermal/imx-thermal.txt24
-rw-r--r--Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt15
-rw-r--r--Documentation/devicetree/bindings/thermal/rcar-thermal.txt38
-rw-r--r--Documentation/devicetree/bindings/thermal/rockchip-thermal.txt68
-rw-r--r--Documentation/devicetree/bindings/thermal/spear-thermal.txt14
-rw-r--r--Documentation/devicetree/bindings/thermal/st-thermal.txt42
-rw-r--r--Documentation/devicetree/bindings/thermal/tegra-soctherm.txt55
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal.txt595
-rw-r--r--Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt74
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt26
-rw-r--r--Documentation/devicetree/bindings/timer/amlogic,meson6-timer.txt15
-rw-r--r--Documentation/devicetree/bindings/timer/arm,sp804.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.txt22
-rw-r--r--Documentation/devicetree/bindings/timer/brcm,kona-timer.txt25
-rw-r--r--Documentation/devicetree/bindings/timer/cadence,ttc-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/timer/digicolor-timer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/energymicro,efm32-timer.txt23
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,ftm-timer.txt31
-rw-r--r--Documentation/devicetree/bindings/timer/fsl,imxgpt.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt33
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt44
-rw-r--r--Documentation/devicetree/bindings/timer/marvell,orion-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt17
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt24
-rw-r--r--Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt28
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,cmt.txt79
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,mtu2.txt42
-rw-r--r--Documentation/devicetree/bindings/timer/renesas,tmu.txt44
-rw-r--r--Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt88
-rw-r--r--Documentation/devicetree/bindings/timer/stericsson-u300-apptimer.txt18
-rw-r--r--Documentation/devicetree/bindings/timer/ti,keystone-timer.txt29
-rw-r--r--Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt57
-rw-r--r--Documentation/devicetree/bindings/unittest.txt71
-rw-r--r--Documentation/devicetree/bindings/usb/am33xx-usb.txt197
-rw-r--r--Documentation/devicetree/bindings/usb/atmel-usb.txt142
-rw-r--r--Documentation/devicetree/bindings/usb/brcm,bcm3384-usb.txt11
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-imx.txt35
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-qcom.txt17
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt24
-rw-r--r--Documentation/devicetree/bindings/usb/ci-hdrc-zevio.txt17
-rw-r--r--Documentation/devicetree/bindings/usb/dwc2.txt38
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3-st.txt68
-rw-r--r--Documentation/devicetree/bindings/usb/dwc3.txt50
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-omap.txt32
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-orion.txt20
-rw-r--r--Documentation/devicetree/bindings/usb/ehci-st.txt39
-rw-r--r--Documentation/devicetree/bindings/usb/exynos-usb.txt117
-rw-r--r--Documentation/devicetree/bindings/usb/fsl-usb.txt83
-rw-r--r--Documentation/devicetree/bindings/usb/generic.txt24
-rw-r--r--Documentation/devicetree/bindings/usb/gr-udc.txt34
-rw-r--r--Documentation/devicetree/bindings/usb/isp1301.txt25
-rw-r--r--Documentation/devicetree/bindings/usb/keystone-phy.txt20
-rw-r--r--Documentation/devicetree/bindings/usb/keystone-usb.txt42
-rw-r--r--Documentation/devicetree/bindings/usb/lpc32xx-udc.txt28
-rw-r--r--Documentation/devicetree/bindings/usb/msm-hsusb.txt95
-rw-r--r--Documentation/devicetree/bindings/usb/mxs-phy.txt21
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt23
-rw-r--r--Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt72
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-nxp.txt24
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-omap3.txt15
-rw-r--r--Documentation/devicetree/bindings/usb/ohci-st.txt37
-rw-r--r--Documentation/devicetree/bindings/usb/omap-usb.txt80
-rw-r--r--Documentation/devicetree/bindings/usb/pxa-usb.txt53
-rw-r--r--Documentation/devicetree/bindings/usb/qcom,dwc3.txt66
-rw-r--r--Documentation/devicetree/bindings/usb/renesas_usbhs.txt27
-rw-r--r--Documentation/devicetree/bindings/usb/samsung-hsotg.txt40
-rw-r--r--Documentation/devicetree/bindings/usb/samsung-usbphy.txt117
-rw-r--r--Documentation/devicetree/bindings/usb/spear-usb.txt39
-rw-r--r--Documentation/devicetree/bindings/usb/twlxxxx-usb.txt40
-rw-r--r--Documentation/devicetree/bindings/usb/udc-xilinx.txt18
-rw-r--r--Documentation/devicetree/bindings/usb/usb-ehci.txt38
-rw-r--r--Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt41
-rw-r--r--Documentation/devicetree/bindings/usb/usb-ohci.txt28
-rw-r--r--Documentation/devicetree/bindings/usb/usb-uhci.txt15
-rw-r--r--Documentation/devicetree/bindings/usb/usb-xhci.txt21
-rw-r--r--Documentation/devicetree/bindings/usb/usb3503.txt36
-rw-r--r--Documentation/devicetree/bindings/usb/usbmisc-imx.txt16
-rw-r--r--Documentation/devicetree/bindings/usb/ux500-usb.txt50
-rw-r--r--Documentation/devicetree/bindings/vendor-prefixes.txt213
-rw-r--r--Documentation/devicetree/bindings/video/adi,adv7123.txt50
-rw-r--r--Documentation/devicetree/bindings/video/adi,adv7511.txt88
-rw-r--r--Documentation/devicetree/bindings/video/analog-tv-connector.txt25
-rw-r--r--Documentation/devicetree/bindings/video/arm,pl11x.txt109
-rw-r--r--Documentation/devicetree/bindings/video/atmel,lcdc.txt89
-rw-r--r--Documentation/devicetree/bindings/video/backlight/88pm860x.txt15
-rw-r--r--Documentation/devicetree/bindings/video/backlight/gpio-backlight.txt16
-rw-r--r--Documentation/devicetree/bindings/video/backlight/lp855x.txt70
-rw-r--r--Documentation/devicetree/bindings/video/backlight/max8925-backlight.txt10
-rw-r--r--Documentation/devicetree/bindings/video/backlight/pwm-backlight.txt35
-rw-r--r--Documentation/devicetree/bindings/video/backlight/sky81452-backlight.txt29
-rw-r--r--Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt27
-rw-r--r--Documentation/devicetree/bindings/video/bridge/ps8622.txt31
-rw-r--r--Documentation/devicetree/bindings/video/bridge/ptn3460.txt39
-rw-r--r--Documentation/devicetree/bindings/video/cirrus,clps711x-fb.txt47
-rw-r--r--Documentation/devicetree/bindings/video/display-timing.txt110
-rw-r--r--Documentation/devicetree/bindings/video/dvi-connector.txt35
-rw-r--r--Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt46
-rw-r--r--Documentation/devicetree/bindings/video/exynos7-decon.txt68
-rw-r--r--Documentation/devicetree/bindings/video/exynos_dp.txt120
-rw-r--r--Documentation/devicetree/bindings/video/exynos_dsim.txt84
-rw-r--r--Documentation/devicetree/bindings/video/exynos_hdmi.txt43
-rw-r--r--Documentation/devicetree/bindings/video/exynos_hdmiddc.txt15
-rw-r--r--Documentation/devicetree/bindings/video/exynos_hdmiphy.txt15
-rw-r--r--Documentation/devicetree/bindings/video/exynos_mixer.txt26
-rw-r--r--Documentation/devicetree/bindings/video/fsl,imx-fb.txt55
-rw-r--r--Documentation/devicetree/bindings/video/hdmi-connector.txt29
-rw-r--r--Documentation/devicetree/bindings/video/lgphilips,lb035q02.txt33
-rw-r--r--Documentation/devicetree/bindings/video/panel-dpi.txt45
-rw-r--r--Documentation/devicetree/bindings/video/panel-dsi-cm.txt29
-rw-r--r--Documentation/devicetree/bindings/video/renesas,du.txt88
-rw-r--r--Documentation/devicetree/bindings/video/rockchip-drm.txt19
-rw-r--r--Documentation/devicetree/bindings/video/rockchip-vop.txt58
-rw-r--r--Documentation/devicetree/bindings/video/samsung-fimd.txt110
-rw-r--r--Documentation/devicetree/bindings/video/sharp,ls037v7dw01.txt43
-rw-r--r--Documentation/devicetree/bindings/video/simple-framebuffer-sunxi.txt33
-rw-r--r--Documentation/devicetree/bindings/video/simple-framebuffer.txt86
-rw-r--r--Documentation/devicetree/bindings/video/sony,acx565akm.txt30
-rw-r--r--Documentation/devicetree/bindings/video/ssd1289fb.txt13
-rw-r--r--Documentation/devicetree/bindings/video/ssd1307fb.txt28
-rw-r--r--Documentation/devicetree/bindings/video/thine,thc63lvdm83d50
-rw-r--r--Documentation/devicetree/bindings/video/ti,dra7-dss.txt69
-rw-r--r--Documentation/devicetree/bindings/video/ti,omap-dss.txt211
-rw-r--r--Documentation/devicetree/bindings/video/ti,omap2-dss.txt54
-rw-r--r--Documentation/devicetree/bindings/video/ti,omap3-dss.txt83
-rw-r--r--Documentation/devicetree/bindings/video/ti,omap4-dss.txt115
-rw-r--r--Documentation/devicetree/bindings/video/ti,omap5-dss.txt96
-rw-r--r--Documentation/devicetree/bindings/video/ti,opa362.txt38
-rw-r--r--Documentation/devicetree/bindings/video/ti,tfp410.txt41
-rw-r--r--Documentation/devicetree/bindings/video/ti,tpd12s015.txt44
-rw-r--r--Documentation/devicetree/bindings/video/toppoly,td028ttec1.txt30
-rw-r--r--Documentation/devicetree/bindings/video/tpo,td043mtea1.txt33
-rw-r--r--Documentation/devicetree/bindings/video/vga-connector.txt36
-rw-r--r--Documentation/devicetree/bindings/video/via,vt8500-fb.txt36
-rw-r--r--Documentation/devicetree/bindings/video/wm,prizm-ge-rops.txt13
-rw-r--r--Documentation/devicetree/bindings/video/wm,wm8505-fb.txt33
-rw-r--r--Documentation/devicetree/bindings/virtio/mmio.txt17
-rw-r--r--Documentation/devicetree/bindings/w1/fsl-imx-owire.txt19
-rw-r--r--Documentation/devicetree/bindings/w1/omap-hdq.txt17
-rw-r--r--Documentation/devicetree/bindings/w1/w1-gpio.txt22
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt9
-rw-r--r--Documentation/devicetree/bindings/watchdog/atmel-wdt.txt50
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/brcm,kona-wdt.txt15
-rw-r--r--Documentation/devicetree/bindings/watchdog/cadence-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/davinci-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/dw_wdt.txt21
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt20
-rw-r--r--Documentation/devicetree/bindings/watchdog/gpio-wdt.txt28
-rw-r--r--Documentation/devicetree/bindings/watchdog/imgpdc-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt12
-rw-r--r--Documentation/devicetree/bindings/watchdog/marvel.txt46
-rw-r--r--Documentation/devicetree/bindings/watchdog/men-a021-wdt.txt25
-rw-r--r--Documentation/devicetree/bindings/watchdog/meson6-wdt.txt13
-rw-r--r--Documentation/devicetree/bindings/watchdog/moxa,moxart-watchdog.txt15
-rw-r--r--Documentation/devicetree/bindings/watchdog/mtk-wdt.txt13
-rw-r--r--Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt23
-rw-r--r--Documentation/devicetree/bindings/watchdog/omap-wdt.txt14
-rw-r--r--Documentation/devicetree/bindings/watchdog/pnx4008-wdt.txt17
-rw-r--r--Documentation/devicetree/bindings/watchdog/qca-ar7130-wdt.txt13
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.txt24
-rw-r--r--Documentation/devicetree/bindings/watchdog/rt2880-wdt.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/samsung-wdt.txt34
-rw-r--r--Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt14
-rw-r--r--Documentation/devicetree/bindings/watchdog/stericsson-coh901327.txt19
-rw-r--r--Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt14
-rw-r--r--Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt10
-rw-r--r--Documentation/devicetree/bindings/x86/ce4100.txt38
-rw-r--r--Documentation/devicetree/bindings/x86/interrupt.txt26
-rw-r--r--Documentation/devicetree/bindings/x86/timer.txt6
-rw-r--r--Documentation/devicetree/bindings/xilinx.txt306
-rw-r--r--Documentation/devicetree/bindings/xillybus/xillybus.txt20
-rw-r--r--Documentation/devicetree/booting-without-of.txt1529
-rw-r--r--Documentation/devicetree/changesets.txt40
-rw-r--r--Documentation/devicetree/dynamic-resolution-notes.txt25
-rw-r--r--Documentation/devicetree/of_unittest.txt197
-rw-r--r--Documentation/devicetree/overlay-notes.txt133
-rw-r--r--Documentation/devicetree/todo.txt10
-rw-r--r--Documentation/devicetree/usage-model.txt415
-rw-r--r--Documentation/digsig.txt96
-rw-r--r--Documentation/dma-buf-sharing.txt465
-rw-r--r--Documentation/dmaengine/00-INDEX8
-rw-r--r--Documentation/dmaengine/client.txt199
-rw-r--r--Documentation/dmaengine/dmatest.txt92
-rw-r--r--Documentation/dmaengine/provider.txt379
-rw-r--r--Documentation/dontdiff260
-rw-r--r--Documentation/driver-model/binding.txt98
-rw-r--r--Documentation/driver-model/bus.txt143
-rw-r--r--Documentation/driver-model/class.txt147
-rw-r--r--Documentation/driver-model/design-patterns.txt116
-rw-r--r--Documentation/driver-model/device.txt106
-rw-r--r--Documentation/driver-model/devres.txt341
-rw-r--r--Documentation/driver-model/driver.txt214
-rw-r--r--Documentation/driver-model/overview.txt123
-rw-r--r--Documentation/driver-model/platform.txt230
-rw-r--r--Documentation/driver-model/porting.txt445
-rw-r--r--Documentation/dvb/README.dvb-usb232
-rw-r--r--Documentation/dvb/avermedia.txt299
-rw-r--r--Documentation/dvb/bt8xx.txt98
-rw-r--r--Documentation/dvb/cards.txt123
-rw-r--r--Documentation/dvb/ci.txt212
-rw-r--r--Documentation/dvb/contributors.txt96
-rw-r--r--Documentation/dvb/faq.txt159
-rw-r--r--Documentation/dvb/opera-firmware.txt8
-rw-r--r--Documentation/dvb/readme.txt62
-rw-r--r--Documentation/dvb/technisat.txt78
-rw-r--r--Documentation/dvb/ttusb-dec.txt40
-rw-r--r--Documentation/dvb/udev.txt46
-rw-r--r--Documentation/dynamic-debug-howto.txt340
-rw-r--r--Documentation/early-userspace/README151
-rw-r--r--Documentation/early-userspace/buffer-format.txt112
-rw-r--r--Documentation/edac.txt775
-rw-r--r--Documentation/efi-stub.txt84
-rw-r--r--Documentation/eisa.txt203
-rw-r--r--Documentation/email-clients.txt263
-rw-r--r--Documentation/extcon/porting-android-switch-class123
-rw-r--r--Documentation/fault-injection/fault-injection.txt269
-rw-r--r--Documentation/fault-injection/notifier-error-inject.txt99
-rw-r--r--Documentation/fault-injection/provoke-crashes.txt38
-rw-r--r--Documentation/fb/00-INDEX75
-rw-r--r--Documentation/fb/api.txt306
-rw-r--r--Documentation/fb/arkfb.txt68
-rw-r--r--Documentation/fb/aty128fb.txt72
-rw-r--r--Documentation/fb/cirrusfb.txt97
-rw-r--r--Documentation/fb/cmap_xfbdev.txt53
-rw-r--r--Documentation/fb/deferred_io.txt75
-rw-r--r--Documentation/fb/efifb.txt31
-rw-r--r--Documentation/fb/ep93xx-fb.txt135
-rw-r--r--Documentation/fb/fbcon.txt324
-rw-r--r--Documentation/fb/framebuffer.txt343
-rw-r--r--Documentation/fb/gxfb.txt52
-rw-r--r--Documentation/fb/intel810.txt278
-rw-r--r--Documentation/fb/intelfb.txt149
-rw-r--r--Documentation/fb/internals.txt82
-rw-r--r--Documentation/fb/lxfb.txt52
-rw-r--r--Documentation/fb/matroxfb.txt413
-rw-r--r--Documentation/fb/metronomefb.txt36
-rw-r--r--Documentation/fb/modedb.txt151
-rw-r--r--Documentation/fb/pvr2fb.txt65
-rw-r--r--Documentation/fb/pxafb.txt142
-rw-r--r--Documentation/fb/s3fb.txt82
-rw-r--r--Documentation/fb/sa1100fb.txt39
-rw-r--r--Documentation/fb/sh7760fb.txt131
-rw-r--r--Documentation/fb/sisfb.txt158
-rw-r--r--Documentation/fb/sm501.txt10
-rw-r--r--Documentation/fb/sstfb.txt174
-rw-r--r--Documentation/fb/tgafb.txt69
-rw-r--r--Documentation/fb/tridentfb.txt70
-rw-r--r--Documentation/fb/udlfb.txt159
-rw-r--r--Documentation/fb/uvesafb.txt183
-rw-r--r--Documentation/fb/vesafb.txt181
-rw-r--r--Documentation/fb/viafb.modes870
-rw-r--r--Documentation/fb/viafb.txt252
-rw-r--r--Documentation/fb/vt8623fb.txt64
-rw-r--r--Documentation/filesystems/.gitignore1
-rw-r--r--Documentation/filesystems/00-INDEX159
-rw-r--r--Documentation/filesystems/9p.txt161
-rw-r--r--Documentation/filesystems/Locking578
-rw-r--r--Documentation/filesystems/Makefile7
-rw-r--r--Documentation/filesystems/adfs.txt75
-rw-r--r--Documentation/filesystems/affs.txt222
-rw-r--r--Documentation/filesystems/afs.txt247
-rw-r--r--Documentation/filesystems/aufs/README383
-rw-r--r--Documentation/filesystems/aufs/design/01intro.txt170
-rw-r--r--Documentation/filesystems/aufs/design/02struct.txt258
-rw-r--r--Documentation/filesystems/aufs/design/03atomic_open.txt85
-rw-r--r--Documentation/filesystems/aufs/design/03lookup.txt113
-rw-r--r--Documentation/filesystems/aufs/design/04branch.txt74
-rw-r--r--Documentation/filesystems/aufs/design/05wbr_policy.txt64
-rw-r--r--Documentation/filesystems/aufs/design/06fhsm.txt120
-rw-r--r--Documentation/filesystems/aufs/design/06mmap.txt72
-rw-r--r--Documentation/filesystems/aufs/design/06xattr.txt96
-rw-r--r--Documentation/filesystems/aufs/design/07export.txt58
-rw-r--r--Documentation/filesystems/aufs/design/08shwh.txt52
-rw-r--r--Documentation/filesystems/aufs/design/10dynop.txt47
-rw-r--r--Documentation/filesystems/autofs4-mount-control.txt393
-rw-r--r--Documentation/filesystems/autofs4.txt520
-rw-r--r--Documentation/filesystems/automount-support.txt118
-rw-r--r--Documentation/filesystems/befs.txt117
-rw-r--r--Documentation/filesystems/bfs.txt57
-rw-r--r--Documentation/filesystems/btrfs.txt270
-rw-r--r--Documentation/filesystems/caching/backend-api.txt703
-rw-r--r--Documentation/filesystems/caching/cachefiles.txt501
-rw-r--r--Documentation/filesystems/caching/fscache.txt443
-rw-r--r--Documentation/filesystems/caching/netfs-api.txt915
-rw-r--r--Documentation/filesystems/caching/object.txt320
-rw-r--r--Documentation/filesystems/caching/operations.txt213
-rw-r--r--Documentation/filesystems/ceph.txt148
-rw-r--r--Documentation/filesystems/cifs/AUTHORS57
-rw-r--r--Documentation/filesystems/cifs/CHANGES1065
-rw-r--r--Documentation/filesystems/cifs/README753
-rw-r--r--Documentation/filesystems/cifs/TODO104
-rw-r--r--Documentation/filesystems/cifs/cifs.txt31
-rwxr-xr-xDocumentation/filesystems/cifs/winucase_convert.pl62
-rw-r--r--Documentation/filesystems/coda.txt1673
-rw-r--r--Documentation/filesystems/configfs/Makefile3
-rw-r--r--Documentation/filesystems/configfs/configfs.txt484
-rw-r--r--Documentation/filesystems/configfs/configfs_example_explicit.c483
-rw-r--r--Documentation/filesystems/configfs/configfs_example_macros.c446
-rw-r--r--Documentation/filesystems/cramfs.txt76
-rw-r--r--Documentation/filesystems/dax.txt94
-rw-r--r--Documentation/filesystems/debugfs.txt191
-rw-r--r--Documentation/filesystems/devpts.txt132
-rw-r--r--Documentation/filesystems/directory-locking127
-rw-r--r--Documentation/filesystems/dlmfs.txt130
-rw-r--r--Documentation/filesystems/dnotify.txt70
-rw-r--r--Documentation/filesystems/dnotify_test.c34
-rw-r--r--Documentation/filesystems/ecryptfs.txt77
-rw-r--r--Documentation/filesystems/efivarfs.txt16
-rw-r--r--Documentation/filesystems/exofs.txt185
-rw-r--r--Documentation/filesystems/ext2.txt384
-rw-r--r--Documentation/filesystems/ext3.txt215
-rw-r--r--Documentation/filesystems/ext4.txt629
-rw-r--r--Documentation/filesystems/f2fs.txt576
-rw-r--r--Documentation/filesystems/fiemap.txt229
-rw-r--r--Documentation/filesystems/files.txt123
-rw-r--r--Documentation/filesystems/fuse.txt423
-rw-r--r--Documentation/filesystems/gfs2-glocks.txt231
-rw-r--r--Documentation/filesystems/gfs2-uevents.txt100
-rw-r--r--Documentation/filesystems/gfs2.txt45
-rw-r--r--Documentation/filesystems/hfs.txt82
-rw-r--r--Documentation/filesystems/hfsplus.txt59
-rw-r--r--Documentation/filesystems/hpfs.txt296
-rw-r--r--Documentation/filesystems/inotify.txt79
-rw-r--r--Documentation/filesystems/isofs.txt48
-rw-r--r--Documentation/filesystems/jfs.txt52
-rw-r--r--Documentation/filesystems/locks.txt68
-rw-r--r--Documentation/filesystems/logfs.txt241
-rw-r--r--Documentation/filesystems/mandatory-locking.txt171
-rw-r--r--Documentation/filesystems/ncpfs.txt12
-rw-r--r--Documentation/filesystems/nfs/00-INDEX26
-rw-r--r--Documentation/filesystems/nfs/Exporting149
-rw-r--r--Documentation/filesystems/nfs/fault_injection.txt69
-rw-r--r--Documentation/filesystems/nfs/idmapper.txt75
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.txt159
-rw-r--r--Documentation/filesystems/nfs/nfs-rdma.txt276
-rw-r--r--Documentation/filesystems/nfs/nfs.txt136
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.txt173
-rw-r--r--Documentation/filesystems/nfs/nfsd-admin-interfaces.txt41
-rw-r--r--Documentation/filesystems/nfs/nfsroot.txt302
-rw-r--r--Documentation/filesystems/nfs/pnfs-block-server.txt37
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt110
-rw-r--r--Documentation/filesystems/nfs/rpc-cache.txt202
-rw-r--r--Documentation/filesystems/nfs/rpc-server-gss.txt91
-rw-r--r--Documentation/filesystems/nilfs2.txt270
-rw-r--r--Documentation/filesystems/ntfs.txt451
-rw-r--r--Documentation/filesystems/ocfs2.txt106
-rw-r--r--Documentation/filesystems/omfs.txt106
-rw-r--r--Documentation/filesystems/overlayfs.txt226
-rw-r--r--Documentation/filesystems/path-lookup.txt382
-rw-r--r--Documentation/filesystems/pohmelfs/design_notes.txt72
-rw-r--r--Documentation/filesystems/pohmelfs/info.txt99
-rw-r--r--Documentation/filesystems/pohmelfs/network_protocol.txt227
-rw-r--r--Documentation/filesystems/porting485
-rw-r--r--Documentation/filesystems/proc.txt1869
-rw-r--r--Documentation/filesystems/qnx6.txt174
-rw-r--r--Documentation/filesystems/quota.txt65
-rw-r--r--Documentation/filesystems/ramfs-rootfs-initramfs.txt359
-rw-r--r--Documentation/filesystems/relay.txt494
-rw-r--r--Documentation/filesystems/romfs.txt186
-rw-r--r--Documentation/filesystems/seq_file.txt338
-rw-r--r--Documentation/filesystems/sharedsubtree.txt939
-rw-r--r--Documentation/filesystems/spufs.txt521
-rw-r--r--Documentation/filesystems/squashfs.txt259
-rw-r--r--Documentation/filesystems/sysfs-pci.txt120
-rw-r--r--Documentation/filesystems/sysfs-tagging.txt42
-rw-r--r--Documentation/filesystems/sysfs.txt380
-rw-r--r--Documentation/filesystems/sysv-fs.txt197
-rw-r--r--Documentation/filesystems/tmpfs.txt148
-rw-r--r--Documentation/filesystems/ubifs.txt119
-rw-r--r--Documentation/filesystems/udf.txt82
-rw-r--r--Documentation/filesystems/ufs.txt60
-rw-r--r--Documentation/filesystems/vfat.txt336
-rw-r--r--Documentation/filesystems/vfs.txt1171
-rw-r--r--Documentation/filesystems/xfs-delayed-logging-design.txt793
-rw-r--r--Documentation/filesystems/xfs-self-describing-metadata.txt350
-rw-r--r--Documentation/filesystems/xfs.txt350
-rw-r--r--Documentation/firmware_class/README128
-rw-r--r--Documentation/firmware_class/hotplug-script17
-rw-r--r--Documentation/flexible-arrays.txt120
-rw-r--r--Documentation/fmc/00-INDEX38
-rw-r--r--Documentation/fmc/API.txt47
-rw-r--r--Documentation/fmc/FMC-and-SDB.txt88
-rw-r--r--Documentation/fmc/carrier.txt311
-rw-r--r--Documentation/fmc/fmc-chardev.txt64
-rw-r--r--Documentation/fmc/fmc-fakedev.txt36
-rw-r--r--Documentation/fmc/fmc-trivial.txt17
-rw-r--r--Documentation/fmc/fmc-write-eeprom.txt98
-rw-r--r--Documentation/fmc/identifiers.txt168
-rw-r--r--Documentation/fmc/mezzanine.txt123
-rw-r--r--Documentation/fmc/parameters.txt56
-rw-r--r--Documentation/frv/README.txt51
-rw-r--r--Documentation/frv/atomic-ops.txt134
-rw-r--r--Documentation/frv/booting.txt182
-rw-r--r--Documentation/frv/clock.txt65
-rw-r--r--Documentation/frv/configuring.txt125
-rw-r--r--Documentation/frv/features.txt310
-rw-r--r--Documentation/frv/gdbinit102
-rw-r--r--Documentation/frv/gdbstub.txt130
-rw-r--r--Documentation/frv/kernel-ABI.txt262
-rw-r--r--Documentation/frv/mmu-layout.txt306
-rw-r--r--Documentation/futex-requeue-pi.txt131
-rw-r--r--Documentation/gcov.txt257
-rw-r--r--Documentation/gdb-kernel-debugging.txt160
-rw-r--r--Documentation/gpio/00-INDEX14
-rw-r--r--Documentation/gpio/board.txt154
-rw-r--r--Documentation/gpio/consumer.txt340
-rw-r--r--Documentation/gpio/driver.txt192
-rw-r--r--Documentation/gpio/gpio-legacy.txt775
-rw-r--r--Documentation/gpio/gpio.txt119
-rw-r--r--Documentation/gpio/sysfs.txt155
-rw-r--r--Documentation/hid/hid-sensor.txt224
-rw-r--r--Documentation/hid/hid-transport.txt317
-rw-r--r--Documentation/hid/hiddev.txt205
-rw-r--r--Documentation/hid/hidraw.txt119
-rw-r--r--Documentation/hid/uhid.txt187
-rw-r--r--Documentation/highuid.txt77
-rw-r--r--Documentation/hsi.txt75
-rw-r--r--Documentation/hw_random.txt90
-rw-r--r--Documentation/hwmon/ab850022
-rw-r--r--Documentation/hwmon/abituguru92
-rw-r--r--Documentation/hwmon/abituguru-datasheet312
-rw-r--r--Documentation/hwmon/abituguru365
-rw-r--r--Documentation/hwmon/abx50028
-rw-r--r--Documentation/hwmon/acpi_power_meter51
-rw-r--r--Documentation/hwmon/ad731425
-rw-r--r--Documentation/hwmon/adc128d81847
-rw-r--r--Documentation/hwmon/adm1021113
-rw-r--r--Documentation/hwmon/adm102551
-rw-r--r--Documentation/hwmon/adm102693
-rw-r--r--Documentation/hwmon/adm103135
-rw-r--r--Documentation/hwmon/adm127592
-rw-r--r--Documentation/hwmon/adm9240177
-rw-r--r--Documentation/hwmon/ads101576
-rw-r--r--Documentation/hwmon/ads782858
-rw-r--r--Documentation/hwmon/adt741073
-rw-r--r--Documentation/hwmon/adt741142
-rw-r--r--Documentation/hwmon/adt746267
-rw-r--r--Documentation/hwmon/adt747073
-rw-r--r--Documentation/hwmon/adt7475117
-rw-r--r--Documentation/hwmon/amc6821102
-rw-r--r--Documentation/hwmon/asb10072
-rw-r--r--Documentation/hwmon/asc7621296
-rw-r--r--Documentation/hwmon/coretemp181
-rw-r--r--Documentation/hwmon/da905261
-rw-r--r--Documentation/hwmon/da905547
-rw-r--r--Documentation/hwmon/dme1737328
-rw-r--r--Documentation/hwmon/ds1621187
-rw-r--r--Documentation/hwmon/ds62034
-rw-r--r--Documentation/hwmon/emc140359
-rw-r--r--Documentation/hwmon/emc210333
-rw-r--r--Documentation/hwmon/emc6w20142
-rw-r--r--Documentation/hwmon/f71805f167
-rw-r--r--Documentation/hwmon/f71882fg138
-rw-r--r--Documentation/hwmon/fam15h_power37
-rw-r--r--Documentation/hwmon/g760a36
-rw-r--r--Documentation/hwmon/g76265
-rw-r--r--Documentation/hwmon/gl518sm73
-rw-r--r--Documentation/hwmon/hih613037
-rw-r--r--Documentation/hwmon/htu2146
-rw-r--r--Documentation/hwmon/hwmon-kernel-api.txt107
-rw-r--r--Documentation/hwmon/ibmaem38
-rw-r--r--Documentation/hwmon/ibmpowernv41
-rw-r--r--Documentation/hwmon/ina20993
-rw-r--r--Documentation/hwmon/ina2xx64
-rw-r--r--Documentation/hwmon/it87262
-rw-r--r--Documentation/hwmon/jc42102
-rw-r--r--Documentation/hwmon/k10temp77
-rw-r--r--Documentation/hwmon/k8temp55
-rw-r--r--Documentation/hwmon/lineage-pem77
-rw-r--r--Documentation/hwmon/lm25066120
-rw-r--r--Documentation/hwmon/lm6377
-rw-r--r--Documentation/hwmon/lm7047
-rw-r--r--Documentation/hwmon/lm7390
-rw-r--r--Documentation/hwmon/lm7592
-rw-r--r--Documentation/hwmon/lm7738
-rw-r--r--Documentation/hwmon/lm7868
-rw-r--r--Documentation/hwmon/lm8063
-rw-r--r--Documentation/hwmon/lm8385
-rw-r--r--Documentation/hwmon/lm85230
-rw-r--r--Documentation/hwmon/lm8777
-rw-r--r--Documentation/hwmon/lm90275
-rw-r--r--Documentation/hwmon/lm9237
-rw-r--r--Documentation/hwmon/lm93309
-rw-r--r--Documentation/hwmon/lm9523441
-rw-r--r--Documentation/hwmon/lm9524541
-rw-r--r--Documentation/hwmon/ltc294584
-rw-r--r--Documentation/hwmon/ltc2978157
-rw-r--r--Documentation/hwmon/ltc415147
-rw-r--r--Documentation/hwmon/ltc421551
-rw-r--r--Documentation/hwmon/ltc4245102
-rw-r--r--Documentation/hwmon/ltc426056
-rw-r--r--Documentation/hwmon/ltc426163
-rw-r--r--Documentation/hwmon/max1606466
-rw-r--r--Documentation/hwmon/max16065105
-rw-r--r--Documentation/hwmon/max161929
-rw-r--r--Documentation/hwmon/max166860
-rw-r--r--Documentation/hwmon/max19760
-rw-r--r--Documentation/hwmon/max34440127
-rw-r--r--Documentation/hwmon/max663949
-rw-r--r--Documentation/hwmon/max664221
-rw-r--r--Documentation/hwmon/max665064
-rw-r--r--Documentation/hwmon/max669758
-rw-r--r--Documentation/hwmon/max868875
-rw-r--r--Documentation/hwmon/mc13783-adc74
-rw-r--r--Documentation/hwmon/mcp302129
-rw-r--r--Documentation/hwmon/menf21bmc50
-rw-r--r--Documentation/hwmon/nct668357
-rw-r--r--Documentation/hwmon/nct6775200
-rw-r--r--Documentation/hwmon/nct780232
-rw-r--r--Documentation/hwmon/nct790460
-rw-r--r--Documentation/hwmon/ntc_thermistor98
-rw-r--r--Documentation/hwmon/pc87360184
-rw-r--r--Documentation/hwmon/pc8742759
-rw-r--r--Documentation/hwmon/pcf859190
-rw-r--r--Documentation/hwmon/pmbus212
-rw-r--r--Documentation/hwmon/pmbus-core283
-rw-r--r--Documentation/hwmon/powr122045
-rw-r--r--Documentation/hwmon/pwm-fan17
-rw-r--r--Documentation/hwmon/sch562727
-rw-r--r--Documentation/hwmon/sch563634
-rw-r--r--Documentation/hwmon/sht1574
-rw-r--r--Documentation/hwmon/sht2149
-rw-r--r--Documentation/hwmon/shtc143
-rw-r--r--Documentation/hwmon/sis5595106
-rw-r--r--Documentation/hwmon/smm665157
-rw-r--r--Documentation/hwmon/smsc47b397163
-rw-r--r--Documentation/hwmon/smsc47m163
-rw-r--r--Documentation/hwmon/smsc47m192103
-rw-r--r--Documentation/hwmon/submitting-patches110
-rw-r--r--Documentation/hwmon/sysfs-interface760
-rw-r--r--Documentation/hwmon/thmc5074
-rw-r--r--Documentation/hwmon/tmp10226
-rw-r--r--Documentation/hwmon/tmp10328
-rw-r--r--Documentation/hwmon/tmp40157
-rw-r--r--Documentation/hwmon/tmp42144
-rw-r--r--Documentation/hwmon/tps4042264
-rw-r--r--Documentation/hwmon/twl4030-madc-hwmon45
-rw-r--r--Documentation/hwmon/ucd9000110
-rw-r--r--Documentation/hwmon/ucd9200112
-rw-r--r--Documentation/hwmon/userspace-tools40
-rw-r--r--Documentation/hwmon/vexpress34
-rw-r--r--Documentation/hwmon/via686a78
-rw-r--r--Documentation/hwmon/vt1211206
-rw-r--r--Documentation/hwmon/w83627ehf190
-rw-r--r--Documentation/hwmon/w83627hf115
-rw-r--r--Documentation/hwmon/w83781d453
-rw-r--r--Documentation/hwmon/w83791d161
-rw-r--r--Documentation/hwmon/w83792d173
-rw-r--r--Documentation/hwmon/w83793106
-rw-r--r--Documentation/hwmon/w83795127
-rw-r--r--Documentation/hwmon/w83l785ts40
-rw-r--r--Documentation/hwmon/w83l786ng54
-rw-r--r--Documentation/hwmon/wm831x37
-rw-r--r--Documentation/hwmon/wm835026
-rw-r--r--Documentation/hwmon/zl6100160
-rw-r--r--Documentation/hwspinlock.txt307
-rw-r--r--Documentation/i2c/busses/i2c-ali153542
-rw-r--r--Documentation/i2c/busses/i2c-ali156327
-rw-r--r--Documentation/i2c/busses/i2c-ali15x3112
-rw-r--r--Documentation/i2c/busses/i2c-amd75625
-rw-r--r--Documentation/i2c/busses/i2c-amd811141
-rw-r--r--Documentation/i2c/busses/i2c-diolan-u2c26
-rw-r--r--Documentation/i2c/busses/i2c-i801164
-rw-r--r--Documentation/i2c/busses/i2c-ismt36
-rw-r--r--Documentation/i2c/busses/i2c-nforce250
-rw-r--r--Documentation/i2c/busses/i2c-ocores68
-rw-r--r--Documentation/i2c/busses/i2c-parport177
-rw-r--r--Documentation/i2c/busses/i2c-parport-light22
-rw-r--r--Documentation/i2c/busses/i2c-pca-isa23
-rw-r--r--Documentation/i2c/busses/i2c-piix4110
-rw-r--r--Documentation/i2c/busses/i2c-sis559559
-rw-r--r--Documentation/i2c/busses/i2c-sis63058
-rw-r--r--Documentation/i2c/busses/i2c-sis96x73
-rw-r--r--Documentation/i2c/busses/i2c-taos-evm46
-rw-r--r--Documentation/i2c/busses/i2c-via34
-rw-r--r--Documentation/i2c/busses/i2c-viapro73
-rw-r--r--Documentation/i2c/busses/scx200_acb32
-rw-r--r--Documentation/i2c/dev-interface214
-rw-r--r--Documentation/i2c/fault-codes124
-rw-r--r--Documentation/i2c/functionality148
-rw-r--r--Documentation/i2c/i2c-protocol88
-rw-r--r--Documentation/i2c/i2c-stub64
-rw-r--r--Documentation/i2c/instantiating-devices248
-rw-r--r--Documentation/i2c/muxes/i2c-mux-gpio83
-rw-r--r--Documentation/i2c/old-module-parameters44
-rw-r--r--Documentation/i2c/slave-eeprom-backend14
-rw-r--r--Documentation/i2c/slave-interface179
-rw-r--r--Documentation/i2c/smbus-protocol273
-rw-r--r--Documentation/i2c/summary43
-rw-r--r--Documentation/i2c/ten-bit-addresses24
-rw-r--r--Documentation/i2c/upgrading-clients279
-rw-r--r--Documentation/i2c/writing-clients403
-rw-r--r--Documentation/ia64/.gitignore1
-rw-r--r--Documentation/ia64/IRQ-redir.txt69
-rw-r--r--Documentation/ia64/Makefile5
-rw-r--r--Documentation/ia64/README43
-rw-r--r--Documentation/ia64/aliasing-test.c263
-rw-r--r--Documentation/ia64/aliasing.txt221
-rw-r--r--Documentation/ia64/efirtc.txt128
-rw-r--r--Documentation/ia64/err_inject.txt1068
-rw-r--r--Documentation/ia64/fsys.txt286
-rw-r--r--Documentation/ia64/mca.txt194
-rw-r--r--Documentation/ia64/serial.txt151
-rw-r--r--Documentation/ia64/xen.txt183
-rw-r--r--Documentation/ide/00-INDEX14
-rw-r--r--Documentation/ide/ChangeLog.ide-cd.1994-2004268
-rw-r--r--Documentation/ide/ChangeLog.ide-floppy.1996-200263
-rw-r--r--Documentation/ide/ChangeLog.ide-tape.1995-2002257
-rw-r--r--Documentation/ide/ide-tape.txt65
-rw-r--r--Documentation/ide/ide.txt256
-rw-r--r--Documentation/ide/warm-plug-howto.txt18
-rw-r--r--Documentation/infiniband/core_locking.txt114
-rw-r--r--Documentation/infiniband/ipoib.txt105
-rw-r--r--Documentation/infiniband/sysfs.txt66
-rw-r--r--Documentation/infiniband/user_mad.txt153
-rw-r--r--Documentation/infiniband/user_verbs.txt69
-rw-r--r--Documentation/init.txt49
-rw-r--r--Documentation/initrd.txt366
-rw-r--r--Documentation/input/alps.txt319
-rw-r--r--Documentation/input/amijoy.txt184
-rw-r--r--Documentation/input/appletouch.txt85
-rw-r--r--Documentation/input/atarikbd.txt709
-rw-r--r--Documentation/input/bcm5974.txt65
-rw-r--r--Documentation/input/cd32.txt19
-rw-r--r--Documentation/input/cma3000_d0x.txt115
-rw-r--r--Documentation/input/cs461x.txt45
-rw-r--r--Documentation/input/edt-ft5x06.txt54
-rw-r--r--Documentation/input/elantech.txt817
-rw-r--r--Documentation/input/event-codes.txt348
-rw-r--r--Documentation/input/ff.txt221
-rw-r--r--Documentation/input/gamepad.txt159
-rw-r--r--Documentation/input/gameport-programming.txt187
-rw-r--r--Documentation/input/gpio-tilt.txt103
-rw-r--r--Documentation/input/iforce-protocol.txt258
-rw-r--r--Documentation/input/input-programming.txt302
-rw-r--r--Documentation/input/input.txt290
-rw-r--r--Documentation/input/interactive.fig42
-rw-r--r--Documentation/input/joystick-api.txt314
-rw-r--r--Documentation/input/joystick-parport.txt542
-rw-r--r--Documentation/input/joystick.txt586
-rw-r--r--Documentation/input/multi-touch-protocol.txt418
-rw-r--r--Documentation/input/notifier.txt52
-rw-r--r--Documentation/input/ntrig.txt126
-rw-r--r--Documentation/input/rotary-encoder.txt121
-rw-r--r--Documentation/input/sentelic.txt873
-rw-r--r--Documentation/input/shape.fig65
-rw-r--r--Documentation/input/walkera0701.txt109
-rw-r--r--Documentation/input/xpad.txt226
-rw-r--r--Documentation/input/yealink.txt216
-rw-r--r--Documentation/intel_txt.txt210
-rw-r--r--Documentation/io-mapping.txt82
-rw-r--r--Documentation/io_ordering.txt47
-rw-r--r--Documentation/ioctl/00-INDEX12
-rw-r--r--Documentation/ioctl/botching-up-ioctls.txt219
-rw-r--r--Documentation/ioctl/cdrom.txt966
-rw-r--r--Documentation/ioctl/hdio.txt1071
-rw-r--r--Documentation/ioctl/ioctl-decoding.txt24
-rw-r--r--Documentation/ioctl/ioctl-number.txt332
-rw-r--r--Documentation/iostats.txt164
-rw-r--r--Documentation/irqflags-tracing.txt50
-rw-r--r--Documentation/isapnp.txt14
-rw-r--r--Documentation/isdn/00-INDEX50
-rw-r--r--Documentation/isdn/CREDITS70
-rw-r--r--Documentation/isdn/HiSax.cert96
-rw-r--r--Documentation/isdn/INTERFACE759
-rw-r--r--Documentation/isdn/INTERFACE.CAPI355
-rw-r--r--Documentation/isdn/INTERFACE.fax163
-rw-r--r--Documentation/isdn/README599
-rw-r--r--Documentation/isdn/README.FAQ26
-rw-r--r--Documentation/isdn/README.HiSax659
-rw-r--r--Documentation/isdn/README.act2000104
-rw-r--r--Documentation/isdn/README.audio138
-rw-r--r--Documentation/isdn/README.avmb1187
-rw-r--r--Documentation/isdn/README.concap259
-rw-r--r--Documentation/isdn/README.diversion127
-rw-r--r--Documentation/isdn/README.fax45
-rw-r--r--Documentation/isdn/README.gigaset421
-rw-r--r--Documentation/isdn/README.hfc-pci41
-rw-r--r--Documentation/isdn/README.hysdn195
-rw-r--r--Documentation/isdn/README.icn148
-rw-r--r--Documentation/isdn/README.mISDN6
-rw-r--r--Documentation/isdn/README.pcbit40
-rw-r--r--Documentation/isdn/README.sc281
-rw-r--r--Documentation/isdn/README.syncppp58
-rw-r--r--Documentation/isdn/README.x25184
-rw-r--r--Documentation/isdn/syncPPP.FAQ224
-rw-r--r--Documentation/ja_JP/HOWTO644
-rw-r--r--Documentation/ja_JP/SubmitChecklist111
-rw-r--r--Documentation/ja_JP/SubmittingPatches719
-rw-r--r--Documentation/ja_JP/stable_api_nonsense.txt263
-rw-r--r--Documentation/ja_JP/stable_kernel_rules.txt84
-rw-r--r--Documentation/java.txt404
-rw-r--r--Documentation/kasan.txt172
-rw-r--r--Documentation/kbuild/00-INDEX14
-rw-r--r--Documentation/kbuild/headers_install.txt47
-rw-r--r--Documentation/kbuild/kbuild.txt235
-rw-r--r--Documentation/kbuild/kconfig-language.txt395
-rw-r--r--Documentation/kbuild/kconfig.txt237
-rw-r--r--Documentation/kbuild/makefiles.txt1410
-rw-r--r--Documentation/kbuild/modules.txt541
-rw-r--r--Documentation/kdbus/.gitignore2
-rw-r--r--Documentation/kdbus/Makefile44
-rw-r--r--Documentation/kdbus/kdbus.bus.xml344
-rw-r--r--Documentation/kdbus/kdbus.connection.xml1244
-rw-r--r--Documentation/kdbus/kdbus.endpoint.xml429
-rw-r--r--Documentation/kdbus/kdbus.fs.xml124
-rw-r--r--Documentation/kdbus/kdbus.item.xml839
-rw-r--r--Documentation/kdbus/kdbus.match.xml555
-rw-r--r--Documentation/kdbus/kdbus.message.xml1276
-rw-r--r--Documentation/kdbus/kdbus.name.xml711
-rw-r--r--Documentation/kdbus/kdbus.policy.xml406
-rw-r--r--Documentation/kdbus/kdbus.pool.xml326
-rw-r--r--Documentation/kdbus/kdbus.xml1012
-rw-r--r--Documentation/kdbus/stylesheet.xsl16
-rw-r--r--Documentation/kdump/gdbmacros.txt201
-rw-r--r--Documentation/kdump/kdump.txt487
-rw-r--r--Documentation/kernel-doc-nano-HOWTO.txt362
-rw-r--r--Documentation/kernel-docs.txt724
-rw-r--r--Documentation/kernel-parameters.txt4062
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt273
-rw-r--r--Documentation/kmemcheck.txt754
-rw-r--r--Documentation/kmemleak.txt203
-rw-r--r--Documentation/ko_KR/HOWTO588
-rw-r--r--Documentation/ko_KR/stable_api_nonsense.txt195
-rw-r--r--Documentation/kobject.txt415
-rw-r--r--Documentation/kprobes.txt731
-rw-r--r--Documentation/kref.txt303
-rw-r--r--Documentation/kselftest.txt69
-rw-r--r--Documentation/laptops/.gitignore2
-rw-r--r--Documentation/laptops/00-INDEX22
-rw-r--r--Documentation/laptops/Makefile5
-rw-r--r--Documentation/laptops/asus-laptop.txt257
-rw-r--r--Documentation/laptops/disk-shock-protection.txt149
-rw-r--r--Documentation/laptops/dslm.c166
-rw-r--r--Documentation/laptops/freefall.c174
-rw-r--r--Documentation/laptops/laptop-mode.txt782
-rw-r--r--Documentation/laptops/sony-laptop.txt144
-rw-r--r--Documentation/laptops/sonypi.txt152
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt1479
-rw-r--r--Documentation/laptops/toshiba_haps.txt76
-rw-r--r--Documentation/ldm.txt109
-rw-r--r--Documentation/leds/00-INDEX22
-rw-r--r--Documentation/leds/leds-blinkm.txt80
-rw-r--r--Documentation/leds/leds-class-flash.txt22
-rw-r--r--Documentation/leds/leds-class.txt97
-rw-r--r--Documentation/leds/leds-lm3556.txt85
-rw-r--r--Documentation/leds/leds-lp3944.txt50
-rw-r--r--Documentation/leds/leds-lp5521.txt101
-rw-r--r--Documentation/leds/leds-lp5523.txt100
-rw-r--r--Documentation/leds/leds-lp5562.txt120
-rw-r--r--Documentation/leds/leds-lp55xx.txt194
-rw-r--r--Documentation/leds/ledtrig-oneshot.txt59
-rw-r--r--Documentation/leds/ledtrig-transient.txt152
-rw-r--r--Documentation/local_ops.txt191
-rw-r--r--Documentation/locking/00-INDEX16
-rw-r--r--Documentation/locking/lglock.txt166
-rw-r--r--Documentation/locking/lockdep-design.txt286
-rw-r--r--Documentation/locking/lockstat.txt183
-rw-r--r--Documentation/locking/locktorture.txt147
-rw-r--r--Documentation/locking/mutex-design.txt157
-rw-r--r--Documentation/locking/rt-mutex-design.txt781
-rw-r--r--Documentation/locking/rt-mutex.txt79
-rw-r--r--Documentation/locking/spinlocks.txt167
-rw-r--r--Documentation/locking/ww-mutex-design.txt344
-rw-r--r--Documentation/lockup-watchdogs.txt63
-rw-r--r--Documentation/logo.gifbin0 -> 16335 bytes
-rw-r--r--Documentation/logo.txt13
-rw-r--r--Documentation/lzo.txt164
-rw-r--r--Documentation/m68k/00-INDEX7
-rw-r--r--Documentation/m68k/README.buddha210
-rw-r--r--Documentation/m68k/kernel-options.txt884
-rw-r--r--Documentation/magic-number.txt160
-rw-r--r--Documentation/mailbox.txt122
-rw-r--r--Documentation/md-cluster.txt176
-rw-r--r--Documentation/md.txt613
-rw-r--r--Documentation/media-framework.txt372
-rw-r--r--Documentation/memory-barriers.txt3064
-rw-r--r--Documentation/memory-devices/ti-emif.txt57
-rw-r--r--Documentation/memory-hotplug.txt450
-rw-r--r--Documentation/metag/00-INDEX4
-rw-r--r--Documentation/metag/kernel-ABI.txt256
-rw-r--r--Documentation/mic/Makefile1
-rw-r--r--Documentation/mic/mic_overview.txt66
-rw-r--r--Documentation/mic/mpssd/.gitignore1
-rw-r--r--Documentation/mic/mpssd/Makefile19
-rwxr-xr-xDocumentation/mic/mpssd/micctrl173
-rwxr-xr-xDocumentation/mic/mpssd/mpss202
-rw-r--r--Documentation/mic/mpssd/mpssd.c1728
-rw-r--r--Documentation/mic/mpssd/mpssd.h102
-rw-r--r--Documentation/mic/mpssd/sysfs.c102
-rw-r--r--Documentation/mips/00-INDEX4
-rw-r--r--Documentation/mips/AU1xxx_IDE.README122
-rw-r--r--Documentation/misc-devices/Makefile1
-rw-r--r--Documentation/misc-devices/ad525x_dpot.txt57
-rw-r--r--Documentation/misc-devices/apds990x.txt111
-rw-r--r--Documentation/misc-devices/bh1770glc.txt116
-rw-r--r--Documentation/misc-devices/c2port.txt90
-rw-r--r--Documentation/misc-devices/eeprom96
-rw-r--r--Documentation/misc-devices/ics932s40131
-rw-r--r--Documentation/misc-devices/isl2900362
-rw-r--r--Documentation/misc-devices/lis3lv02d93
-rw-r--r--Documentation/misc-devices/max6875110
-rw-r--r--Documentation/misc-devices/mei/.gitignore1
-rw-r--r--Documentation/misc-devices/mei/Makefile5
-rw-r--r--Documentation/misc-devices/mei/TODO2
-rw-r--r--Documentation/misc-devices/mei/mei-amt-version.c479
-rw-r--r--Documentation/misc-devices/mei/mei-client-bus.txt141
-rw-r--r--Documentation/misc-devices/mei/mei.txt223
-rw-r--r--Documentation/misc-devices/spear-pcie-gadget.txt130
-rw-r--r--Documentation/mmc/00-INDEX8
-rw-r--r--Documentation/mmc/mmc-async-req.txt87
-rw-r--r--Documentation/mmc/mmc-dev-attrs.txt84
-rw-r--r--Documentation/mmc/mmc-dev-parts.txt40
-rw-r--r--Documentation/mn10300/ABI.txt149
-rw-r--r--Documentation/mn10300/compartmentalisation.txt60
-rw-r--r--Documentation/module-signing.txt241
-rw-r--r--Documentation/mono.txt66
-rw-r--r--Documentation/mtd/nand/pxa3xx-nand.txt113
-rw-r--r--Documentation/mtd/nand_ecc.txt714
-rw-r--r--Documentation/mtd/spi-nor.txt62
-rw-r--r--Documentation/namespaces/compatibility-list.txt39
-rw-r--r--Documentation/namespaces/resource-control.txt14
-rw-r--r--Documentation/netlabel/00-INDEX10
-rw-r--r--Documentation/netlabel/cipso_ipv4.txt48
-rw-r--r--Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt791
-rw-r--r--Documentation/netlabel/introduction.txt46
-rw-r--r--Documentation/netlabel/lsm_interface.txt47
-rw-r--r--Documentation/networking/00-INDEX238
-rw-r--r--Documentation/networking/3c509.txt213
-rw-r--r--Documentation/networking/6pack.txt175
-rw-r--r--Documentation/networking/LICENSE.qla3xxx46
-rw-r--r--Documentation/networking/LICENSE.qlcnic288
-rw-r--r--Documentation/networking/LICENSE.qlge288
-rw-r--r--Documentation/networking/Makefile1
-rw-r--r--Documentation/networking/PLIP.txt215
-rw-r--r--Documentation/networking/README.ipw2100293
-rw-r--r--Documentation/networking/README.ipw2200472
-rw-r--r--Documentation/networking/README.sb1000207
-rw-r--r--Documentation/networking/alias.txt40
-rw-r--r--Documentation/networking/altera_tse.txt263
-rw-r--r--Documentation/networking/arcnet-hardware.txt3133
-rw-r--r--Documentation/networking/arcnet.txt556
-rw-r--r--Documentation/networking/atm.txt8
-rw-r--r--Documentation/networking/ax25.txt10
-rw-r--r--Documentation/networking/batman-adv.txt202
-rw-r--r--Documentation/networking/baycom.txt158
-rw-r--r--Documentation/networking/bonding.txt2743
-rw-r--r--Documentation/networking/bridge.txt15
-rw-r--r--Documentation/networking/caif/Linux-CAIF.txt175
-rw-r--r--Documentation/networking/caif/README109
-rw-r--r--Documentation/networking/caif/spi_porting.txt208
-rw-r--r--Documentation/networking/can.txt1216
-rw-r--r--Documentation/networking/cdc_mbim.txt339
-rw-r--r--Documentation/networking/cops.txt63
-rw-r--r--Documentation/networking/cs89x0.txt624
-rw-r--r--Documentation/networking/cxacru-cf.py48
-rw-r--r--Documentation/networking/cxacru.txt100
-rw-r--r--Documentation/networking/cxgb.txt352
-rw-r--r--Documentation/networking/dccp.txt207
-rw-r--r--Documentation/networking/dctcp.txt43
-rw-r--r--Documentation/networking/de4x5.txt178
-rw-r--r--Documentation/networking/decnet.txt232
-rw-r--r--Documentation/networking/dl2k.txt282
-rw-r--r--Documentation/networking/dm9000.txt167
-rw-r--r--Documentation/networking/dmfe.txt66
-rw-r--r--Documentation/networking/dns_resolver.txt157
-rw-r--r--Documentation/networking/driver.txt93
-rw-r--r--Documentation/networking/e100.txt197
-rw-r--r--Documentation/networking/e1000.txt461
-rw-r--r--Documentation/networking/e1000e.txt312
-rw-r--r--Documentation/networking/eql.txt528
-rw-r--r--Documentation/networking/fib_trie.txt145
-rw-r--r--Documentation/networking/filter.txt1297
-rw-r--r--Documentation/networking/fore200e.txt39
-rw-r--r--Documentation/networking/framerelay.txt39
-rw-r--r--Documentation/networking/gen_stats.txt117
-rw-r--r--Documentation/networking/generic-hdlc.txt132
-rw-r--r--Documentation/networking/generic_netlink.txt3
-rw-r--r--Documentation/networking/gianfar.txt42
-rw-r--r--Documentation/networking/i40e.txt118
-rw-r--r--Documentation/networking/i40evf.txt47
-rw-r--r--Documentation/networking/ieee802154.txt151
-rw-r--r--Documentation/networking/igb.txt129
-rw-r--r--Documentation/networking/igbvf.txt80
-rw-r--r--Documentation/networking/ip-sysctl.txt1858
-rw-r--r--Documentation/networking/ip_dynaddr.txt29
-rw-r--r--Documentation/networking/ipddp.txt73
-rw-r--r--Documentation/networking/iphase.txt158
-rw-r--r--Documentation/networking/ipsec.txt38
-rw-r--r--Documentation/networking/ipv6.txt72
-rw-r--r--Documentation/networking/ipvlan.txt107
-rw-r--r--Documentation/networking/ipvs-sysctl.txt232
-rw-r--r--Documentation/networking/irda.txt10
-rw-r--r--Documentation/networking/ixgb.txt433
-rw-r--r--Documentation/networking/ixgbe.txt349
-rw-r--r--Documentation/networking/ixgbevf.txt52
-rw-r--r--Documentation/networking/l2tp.txt348
-rw-r--r--Documentation/networking/lapb-module.txt263
-rw-r--r--Documentation/networking/ltpc.txt131
-rw-r--r--Documentation/networking/mac80211-auth-assoc-deauth.txt95
-rw-r--r--Documentation/networking/mac80211-injection.txt67
-rw-r--r--Documentation/networking/mac80211_hwsim/README68
-rw-r--r--Documentation/networking/mac80211_hwsim/hostapd.conf11
-rw-r--r--Documentation/networking/mac80211_hwsim/wpa_supplicant.conf10
-rw-r--r--Documentation/networking/mpls-sysctl.txt29
-rw-r--r--Documentation/networking/multiqueue.txt79
-rw-r--r--Documentation/networking/netconsole.txt177
-rw-r--r--Documentation/networking/netdev-FAQ.txt224
-rw-r--r--Documentation/networking/netdev-features.txt167
-rw-r--r--Documentation/networking/netdevices.txt107
-rw-r--r--Documentation/networking/netif-msg.txt79
-rw-r--r--Documentation/networking/netlink_mmap.txt332
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.txt177
-rw-r--r--Documentation/networking/nfc.txt128
-rw-r--r--Documentation/networking/openvswitch.txt248
-rw-r--r--Documentation/networking/operstates.txt162
-rw-r--r--Documentation/networking/packet_mmap.txt1068
-rw-r--r--Documentation/networking/phonet.txt214
-rw-r--r--Documentation/networking/phy.txt339
-rw-r--r--Documentation/networking/pktgen.txt323
-rw-r--r--Documentation/networking/policy-routing.txt150
-rw-r--r--Documentation/networking/ppp_generic.txt432
-rw-r--r--Documentation/networking/proc_net_tcp.txt48
-rw-r--r--Documentation/networking/radiotap-headers.txt152
-rw-r--r--Documentation/networking/ray_cs.txt150
-rw-r--r--Documentation/networking/rds.txt355
-rw-r--r--Documentation/networking/regulatory.txt214
-rw-r--r--Documentation/networking/rxrpc.txt947
-rw-r--r--Documentation/networking/s2io.txt141
-rw-r--r--Documentation/networking/scaling.txt445
-rw-r--r--Documentation/networking/sctp.txt35
-rw-r--r--Documentation/networking/secid.txt14
-rw-r--r--Documentation/networking/skfp.txt220
-rw-r--r--Documentation/networking/smc9.txt42
-rw-r--r--Documentation/networking/spider_net.txt204
-rw-r--r--Documentation/networking/stmmac.txt369
-rw-r--r--Documentation/networking/switchdev.txt59
-rw-r--r--Documentation/networking/tc-actions-env-rules.txt30
-rw-r--r--Documentation/networking/tcp-thin.txt47
-rw-r--r--Documentation/networking/tcp.txt106
-rw-r--r--Documentation/networking/team.txt2
-rw-r--r--Documentation/networking/timestamping.txt457
-rw-r--r--Documentation/networking/timestamping/.gitignore3
-rw-r--r--Documentation/networking/timestamping/Makefile14
-rw-r--r--Documentation/networking/timestamping/hwtstamp_config.c134
-rw-r--r--Documentation/networking/timestamping/timestamping.c528
-rw-r--r--Documentation/networking/timestamping/txtimestamp.c549
-rw-r--r--Documentation/networking/tlan.txt117
-rw-r--r--Documentation/networking/tproxy.txt84
-rw-r--r--Documentation/networking/tuntap.txt227
-rw-r--r--Documentation/networking/udplite.txt278
-rw-r--r--Documentation/networking/vortex.txt448
-rw-r--r--Documentation/networking/vxge.txt93
-rw-r--r--Documentation/networking/vxlan.txt47
-rw-r--r--Documentation/networking/x25-iface.txt123
-rw-r--r--Documentation/networking/x25.txt44
-rw-r--r--Documentation/networking/xfrm_proc.txt74
-rw-r--r--Documentation/networking/xfrm_sync.txt169
-rw-r--r--Documentation/networking/xfrm_sysctl.txt4
-rw-r--r--Documentation/networking/z8530drv.txt657
-rw-r--r--Documentation/nfc/nfc-hci.txt290
-rw-r--r--Documentation/nfc/nfc-pn544.txt32
-rw-r--r--Documentation/nios2/README23
-rw-r--r--Documentation/nommu-mmap.txt291
-rw-r--r--Documentation/numastat.txt27
-rw-r--r--Documentation/oops-tracing.txt279
-rw-r--r--Documentation/padata.txt160
-rw-r--r--Documentation/parisc/00-INDEX6
-rw-r--r--Documentation/parisc/debugging39
-rw-r--r--Documentation/parisc/registers129
-rw-r--r--Documentation/parport-lowlevel.txt1471
-rw-r--r--Documentation/parport.txt267
-rw-r--r--Documentation/pcmcia/.gitignore1
-rw-r--r--Documentation/pcmcia/Makefile7
-rw-r--r--Documentation/pcmcia/crc32hash.c32
-rw-r--r--Documentation/pcmcia/devicetable.txt33
-rw-r--r--Documentation/pcmcia/driver-changes.txt152
-rw-r--r--Documentation/pcmcia/driver.txt30
-rw-r--r--Documentation/pcmcia/locking.txt118
-rw-r--r--Documentation/percpu-rw-semaphore.txt27
-rw-r--r--Documentation/phy.txt152
-rw-r--r--Documentation/phy/samsung-usb2.txt135
-rw-r--r--Documentation/pi-futex.txt121
-rw-r--r--Documentation/pinctrl.txt1408
-rw-r--r--Documentation/platform/x86-laptop-drivers.txt18
-rw-r--r--Documentation/pnp.txt251
-rw-r--r--Documentation/power/00-INDEX46
-rw-r--r--Documentation/power/apm-acpi.txt32
-rw-r--r--Documentation/power/basic-pm-debugging.txt229
-rw-r--r--Documentation/power/charger-manager.txt200
-rw-r--r--Documentation/power/devices.txt697
-rw-r--r--Documentation/power/drivers-testing.txt46
-rw-r--r--Documentation/power/freezing-of-tasks.txt230
-rw-r--r--Documentation/power/interface.txt75
-rw-r--r--Documentation/power/notifiers.txt55
-rw-r--r--Documentation/power/opp.txt362
-rw-r--r--Documentation/power/pci.txt1025
-rw-r--r--Documentation/power/pm_qos_interface.txt224
-rw-r--r--Documentation/power/power_supply_class.txt214
-rw-r--r--Documentation/power/powercap/powercap.txt236
-rw-r--r--Documentation/power/regulator/consumer.txt218
-rw-r--r--Documentation/power/regulator/design.txt33
-rw-r--r--Documentation/power/regulator/machine.txt100
-rw-r--r--Documentation/power/regulator/overview.txt171
-rw-r--r--Documentation/power/regulator/regulator.txt30
-rw-r--r--Documentation/power/runtime_pm.txt912
-rw-r--r--Documentation/power/s2ram.txt85
-rw-r--r--Documentation/power/states.txt109
-rw-r--r--Documentation/power/suspend-and-cpuhotplug.txt275
-rw-r--r--Documentation/power/suspend-and-interrupts.txt135
-rw-r--r--Documentation/power/swsusp-and-swap-files.txt60
-rw-r--r--Documentation/power/swsusp-dmcrypt.txt138
-rw-r--r--Documentation/power/swsusp.txt429
-rw-r--r--Documentation/power/tricks.txt27
-rw-r--r--Documentation/power/tuxonice-internals.txt532
-rw-r--r--Documentation/power/tuxonice.txt948
-rw-r--r--Documentation/power/userland-swsusp.txt170
-rw-r--r--Documentation/power/video.txt185
-rw-r--r--Documentation/powerpc/00-INDEX32
-rw-r--r--Documentation/powerpc/bootwrapper.txt141
-rw-r--r--Documentation/powerpc/cpu_families.txt221
-rw-r--r--Documentation/powerpc/cpu_features.txt56
-rw-r--r--Documentation/powerpc/cxl.txt379
-rw-r--r--Documentation/powerpc/eeh-pci-error-recovery.txt334
-rw-r--r--Documentation/powerpc/firmware-assisted-dump.txt270
-rw-r--r--Documentation/powerpc/hvcs.txt567
-rw-r--r--Documentation/powerpc/mpc52xx.txt39
-rw-r--r--Documentation/powerpc/pci_iov_resource_on_powernv.txt301
-rw-r--r--Documentation/powerpc/pmu-ebb.txt137
-rw-r--r--Documentation/powerpc/ptrace.txt151
-rw-r--r--Documentation/powerpc/qe_firmware.txt295
-rw-r--r--Documentation/powerpc/transactional_memory.txt198
-rw-r--r--Documentation/pps/pps.txt233
-rw-r--r--Documentation/prctl/.gitignore3
-rw-r--r--Documentation/prctl/Makefile8
-rw-r--r--Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c97
-rw-r--r--Documentation/prctl/disable-tsc-on-off-stress-test.c96
-rw-r--r--Documentation/prctl/disable-tsc-test.c95
-rw-r--r--Documentation/prctl/no_new_privs.txt57
-rw-r--r--Documentation/prctl/seccomp_filter.txt225
-rw-r--r--Documentation/preempt-locking.txt135
-rw-r--r--Documentation/printk-formats.txt291
-rw-r--r--Documentation/pti/pti_intel_mid.txt99
-rw-r--r--Documentation/ptp/.gitignore1
-rw-r--r--Documentation/ptp/Makefile8
-rw-r--r--Documentation/ptp/ptp.txt89
-rw-r--r--Documentation/ptp/testptp.c519
-rw-r--r--Documentation/ptp/testptp.mk33
-rw-r--r--Documentation/pwm.txt122
-rw-r--r--Documentation/ramoops.txt128
-rw-r--r--Documentation/rapidio/rapidio.txt351
-rw-r--r--Documentation/rapidio/sysfs.txt158
-rw-r--r--Documentation/rapidio/tsi721.txt62
-rw-r--r--Documentation/rbtree.txt392
-rw-r--r--Documentation/remoteproc.txt299
-rw-r--r--Documentation/rfkill.txt123
-rw-r--r--Documentation/robust-futex-ABI.txt182
-rw-r--r--Documentation/robust-futexes.txt218
-rw-r--r--Documentation/rpmsg.txt293
-rw-r--r--Documentation/rtc.txt207
-rw-r--r--Documentation/s390/00-INDEX28
-rw-r--r--Documentation/s390/3270.ChangeLog44
-rw-r--r--Documentation/s390/3270.txt271
-rw-r--r--Documentation/s390/CommonIO125
-rw-r--r--Documentation/s390/DASD73
-rw-r--r--Documentation/s390/Debugging390.txt2142
-rw-r--r--Documentation/s390/cds.txt472
-rw-r--r--Documentation/s390/config3270.sh76
-rw-r--r--Documentation/s390/driver-model.txt287
-rw-r--r--Documentation/s390/kvm.txt125
-rw-r--r--Documentation/s390/monreader.txt197
-rw-r--r--Documentation/s390/qeth.txt50
-rw-r--r--Documentation/s390/s390dbf.txt667
-rw-r--r--Documentation/s390/zfcpdump.txt52
-rw-r--r--Documentation/scheduler/00-INDEX18
-rw-r--r--Documentation/scheduler/completion.txt248
-rw-r--r--Documentation/scheduler/sched-BFS.txt347
-rw-r--r--Documentation/scheduler/sched-arch.txt74
-rw-r--r--Documentation/scheduler/sched-bwc.txt122
-rw-r--r--Documentation/scheduler/sched-deadline.txt525
-rw-r--r--Documentation/scheduler/sched-design-CFS.txt242
-rw-r--r--Documentation/scheduler/sched-domains.txt77
-rw-r--r--Documentation/scheduler/sched-nice-design.txt108
-rw-r--r--Documentation/scheduler/sched-rt-group.txt183
-rw-r--r--Documentation/scheduler/sched-stats.txt154
-rw-r--r--Documentation/scsi/00-INDEX110
-rw-r--r--Documentation/scsi/53c700.txt135
-rw-r--r--Documentation/scsi/BusLogic.txt566
-rw-r--r--Documentation/scsi/ChangeLog.1992-19972023
-rw-r--r--Documentation/scsi/ChangeLog.arcmsr118
-rw-r--r--Documentation/scsi/ChangeLog.ips122
-rw-r--r--Documentation/scsi/ChangeLog.lpfc1865
-rw-r--r--Documentation/scsi/ChangeLog.megaraid614
-rw-r--r--Documentation/scsi/ChangeLog.megaraid_sas641
-rw-r--r--Documentation/scsi/ChangeLog.ncr53c8xx495
-rw-r--r--Documentation/scsi/ChangeLog.sym53c8xx593
-rw-r--r--Documentation/scsi/ChangeLog.sym53c8xx_2144
-rw-r--r--Documentation/scsi/FlashPoint.txt163
-rw-r--r--Documentation/scsi/LICENSE.FlashPoint60
-rw-r--r--Documentation/scsi/LICENSE.qla2xxx290
-rw-r--r--Documentation/scsi/LICENSE.qla4xxx289
-rw-r--r--Documentation/scsi/Mylex.txt5
-rw-r--r--Documentation/scsi/NinjaSCSI.txt128
-rw-r--r--Documentation/scsi/aacraid.txt150
-rw-r--r--Documentation/scsi/advansys.txt243
-rw-r--r--Documentation/scsi/aha152x.txt183
-rw-r--r--Documentation/scsi/aic79xx.txt497
-rw-r--r--Documentation/scsi/aic7xxx.txt394
-rw-r--r--Documentation/scsi/arcmsr_spec.txt574
-rw-r--r--Documentation/scsi/bfa.txt82
-rw-r--r--Documentation/scsi/bnx2fc.txt75
-rw-r--r--Documentation/scsi/cxgb3i.txt84
-rw-r--r--Documentation/scsi/dc395x.txt102
-rw-r--r--Documentation/scsi/dpti.txt83
-rw-r--r--Documentation/scsi/dtc3x80.txt43
-rw-r--r--Documentation/scsi/g_NCR5380.txt63
-rw-r--r--Documentation/scsi/hpsa.txt130
-rw-r--r--Documentation/scsi/hptiop.txt184
-rw-r--r--Documentation/scsi/in2000.txt202
-rw-r--r--Documentation/scsi/libsas.txt395
-rw-r--r--Documentation/scsi/link_power_management_policy.txt22
-rw-r--r--Documentation/scsi/lpfc.txt83
-rw-r--r--Documentation/scsi/megaraid.txt70
-rw-r--r--Documentation/scsi/ncr53c8xx.txt1824
-rw-r--r--Documentation/scsi/osd.txt197
-rw-r--r--Documentation/scsi/osst.txt218
-rw-r--r--Documentation/scsi/ppa.txt14
-rw-r--r--Documentation/scsi/qlogicfas.txt78
-rw-r--r--Documentation/scsi/scsi-changer.txt180
-rw-r--r--Documentation/scsi/scsi-generic.txt101
-rw-r--r--Documentation/scsi/scsi-parameters.txt133
-rw-r--r--Documentation/scsi/scsi.txt44
-rw-r--r--Documentation/scsi/scsi_eh.txt488
-rw-r--r--Documentation/scsi/scsi_fc_transport.txt496
-rw-r--r--Documentation/scsi/scsi_mid_low_api.txt1401
-rw-r--r--Documentation/scsi/scsi_transport_srp/Makefile7
-rw-r--r--Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot26
-rw-r--r--Documentation/scsi/st.txt526
-rw-r--r--Documentation/scsi/sym53c500_cs.txt23
-rw-r--r--Documentation/scsi/sym53c8xx_2.txt1048
-rw-r--r--Documentation/scsi/tmscsim.txt443
-rw-r--r--Documentation/scsi/ufs.txt133
-rw-r--r--Documentation/scsi/wd719x.txt4
-rw-r--r--Documentation/security/00-INDEX26
-rw-r--r--Documentation/security/IMA-templates.txt88
-rw-r--r--Documentation/security/LSM.txt34
-rw-r--r--Documentation/security/SELinux.txt27
-rw-r--r--Documentation/security/Smack.txt717
-rw-r--r--Documentation/security/Yama.txt73
-rw-r--r--Documentation/security/apparmor.txt39
-rw-r--r--Documentation/security/credentials.txt581
-rw-r--r--Documentation/security/keys-ecryptfs.txt68
-rw-r--r--Documentation/security/keys-request-key.txt202
-rw-r--r--Documentation/security/keys-trusted-encrypted.txt160
-rw-r--r--Documentation/security/keys.txt1433
-rw-r--r--Documentation/security/tomoyo.txt55
-rw-r--r--Documentation/serial-console.txt109
-rw-r--r--Documentation/serial/00-INDEX16
-rw-r--r--Documentation/serial/README.cycladesZ8
-rw-r--r--Documentation/serial/driver460
-rw-r--r--Documentation/serial/moxa-smartio523
-rw-r--r--Documentation/serial/n_gsm.txt89
-rw-r--r--Documentation/serial/rocket.txt189
-rw-r--r--Documentation/serial/serial-rs485.txt136
-rw-r--r--Documentation/serial/tty.txt304
-rw-r--r--Documentation/sgi-ioc4.txt45
-rw-r--r--Documentation/sh/new-machine.txt278
-rw-r--r--Documentation/sh/register-banks.txt33
-rw-r--r--Documentation/smsc_ece1099.txt56
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt2312
-rw-r--r--Documentation/sound/alsa/Audigy-mixer.txt345
-rw-r--r--Documentation/sound/alsa/Audiophile-Usb.txt442
-rw-r--r--Documentation/sound/alsa/Bt87x.txt78
-rw-r--r--Documentation/sound/alsa/CMIPCI.txt254
-rw-r--r--Documentation/sound/alsa/Channel-Mapping-API.txt153
-rw-r--r--Documentation/sound/alsa/ControlNames.txt107
-rw-r--r--Documentation/sound/alsa/HD-Audio-Controls.txt116
-rw-r--r--Documentation/sound/alsa/HD-Audio-Models.txt314
-rw-r--r--Documentation/sound/alsa/HD-Audio.txt863
-rw-r--r--Documentation/sound/alsa/Joystick.txt86
-rw-r--r--Documentation/sound/alsa/MIXART.txt100
-rw-r--r--Documentation/sound/alsa/OSS-Emulation.txt305
-rw-r--r--Documentation/sound/alsa/Procfile.txt234
-rw-r--r--Documentation/sound/alsa/README.maya44163
-rw-r--r--Documentation/sound/alsa/SB-Live-mixer.txt356
-rw-r--r--Documentation/sound/alsa/VIA82xx-mixer.txt8
-rw-r--r--Documentation/sound/alsa/alsa-parameters.txt135
-rw-r--r--Documentation/sound/alsa/compress_offload.txt234
-rw-r--r--Documentation/sound/alsa/emu10k1-jack.txt74
-rw-r--r--Documentation/sound/alsa/hda_codec.txt322
-rw-r--r--Documentation/sound/alsa/hdspm.txt362
-rw-r--r--Documentation/sound/alsa/powersave.txt41
-rw-r--r--Documentation/sound/alsa/seq_oss.html409
-rw-r--r--Documentation/sound/alsa/serial-u16550.txt88
-rw-r--r--Documentation/sound/alsa/soc/DAI.txt56
-rw-r--r--Documentation/sound/alsa/soc/DPCM.txt380
-rw-r--r--Documentation/sound/alsa/soc/clocking.txt51
-rw-r--r--Documentation/sound/alsa/soc/codec.txt179
-rw-r--r--Documentation/sound/alsa/soc/dapm.txt305
-rw-r--r--Documentation/sound/alsa/soc/jack.txt71
-rw-r--r--Documentation/sound/alsa/soc/machine.txt93
-rw-r--r--Documentation/sound/alsa/soc/overview.txt95
-rw-r--r--Documentation/sound/alsa/soc/platform.txt79
-rw-r--r--Documentation/sound/alsa/soc/pops_clicks.txt52
-rw-r--r--Documentation/sound/alsa/timestamping.txt200
-rw-r--r--Documentation/sound/oss/ALS66
-rw-r--r--Documentation/sound/oss/AudioExcelDSP16101
-rw-r--r--Documentation/sound/oss/CMI8330152
-rw-r--r--Documentation/sound/oss/ESS34
-rw-r--r--Documentation/sound/oss/ESS186855
-rw-r--r--Documentation/sound/oss/Introduction459
-rw-r--r--Documentation/sound/oss/MultiSound1065
-rw-r--r--Documentation/sound/oss/OPL36
-rw-r--r--Documentation/sound/oss/Opti218
-rw-r--r--Documentation/sound/oss/PAS16162
-rw-r--r--Documentation/sound/oss/PSS34
-rw-r--r--Documentation/sound/oss/PSS-updates82
-rw-r--r--Documentation/sound/oss/README.OSS1425
-rw-r--r--Documentation/sound/oss/README.modules106
-rw-r--r--Documentation/sound/oss/README.ymfsb107
-rw-r--r--Documentation/sound/oss/SoundPro105
-rw-r--r--Documentation/sound/oss/Soundblaster53
-rw-r--r--Documentation/sound/oss/Tropez+26
-rw-r--r--Documentation/sound/oss/VIBRA1680
-rw-r--r--Documentation/sound/oss/WaveArtist170
-rw-r--r--Documentation/sound/oss/btaudio92
-rw-r--r--Documentation/sound/oss/mwave185
-rw-r--r--Documentation/sound/oss/oss-parameters.txt51
-rw-r--r--Documentation/sound/oss/ultrasound30
-rw-r--r--Documentation/sparse.txt108
-rw-r--r--Documentation/spi/.gitignore2
-rw-r--r--Documentation/spi/00-INDEX22
-rw-r--r--Documentation/spi/Makefile8
-rw-r--r--Documentation/spi/butterfly68
-rw-r--r--Documentation/spi/ep93xx_spi105
-rw-r--r--Documentation/spi/pxa2xx241
-rw-r--r--Documentation/spi/spi-lm70llp79
-rw-r--r--Documentation/spi/spi-sc18is60236
-rw-r--r--Documentation/spi/spi-summary612
-rw-r--r--Documentation/spi/spidev149
-rw-r--r--Documentation/spi/spidev_fdx.c158
-rw-r--r--Documentation/spi/spidev_test.c318
-rw-r--r--Documentation/stable_api_nonsense.txt190
-rw-r--r--Documentation/stable_kernel_rules.txt132
-rw-r--r--Documentation/static-keys.txt286
-rw-r--r--Documentation/svga.txt276
-rw-r--r--Documentation/sysctl/00-INDEX16
-rw-r--r--Documentation/sysctl/README75
-rw-r--r--Documentation/sysctl/abi.txt54
-rw-r--r--Documentation/sysctl/fs.txt300
-rw-r--r--Documentation/sysctl/kernel.txt961
-rw-r--r--Documentation/sysctl/net.txt304
-rw-r--r--Documentation/sysctl/sunrpc.txt20
-rw-r--r--Documentation/sysctl/vm.txt815
-rw-r--r--Documentation/sysfs-rules.txt184
-rw-r--r--Documentation/sysrq.txt255
-rwxr-xr-xDocumentation/target/tcm_mod_builder.py968
-rw-r--r--Documentation/target/tcm_mod_builder.txt145
-rw-r--r--Documentation/target/tcmu-design.txt372
-rw-r--r--Documentation/thermal/cpu-cooling-api.txt43
-rw-r--r--Documentation/thermal/exynos_thermal77
-rw-r--r--Documentation/thermal/exynos_thermal_emulation53
-rw-r--r--Documentation/thermal/intel_powerclamp.txt307
-rw-r--r--Documentation/thermal/nouveau_thermal82
-rw-r--r--Documentation/thermal/sysfs-api.txt401
-rw-r--r--Documentation/thermal/x86_pkg_temperature_thermal47
-rw-r--r--Documentation/this_cpu_ops.txt332
-rw-r--r--Documentation/timers/.gitignore1
-rw-r--r--Documentation/timers/00-INDEX20
-rw-r--r--Documentation/timers/Makefile5
-rw-r--r--Documentation/timers/NO_HZ.txt348
-rw-r--r--Documentation/timers/highres.txt249
-rw-r--r--Documentation/timers/hpet.txt30
-rw-r--r--Documentation/timers/hpet_example.c294
-rw-r--r--Documentation/timers/hrtimers.txt178
-rw-r--r--Documentation/timers/timekeeping.txt179
-rw-r--r--Documentation/timers/timer_stats.txt73
-rw-r--r--Documentation/timers/timers-howto.txt105
-rw-r--r--Documentation/tp_smapi.txt267
-rw-r--r--Documentation/tpm/xen-tpmfront.txt113
-rw-r--r--Documentation/trace/coresight.txt299
-rw-r--r--Documentation/trace/events-kmem.txt107
-rw-r--r--Documentation/trace/events-nmi.txt43
-rw-r--r--Documentation/trace/events-power.txt96
-rw-r--r--Documentation/trace/events.txt496
-rw-r--r--Documentation/trace/ftrace-design.txt382
-rw-r--r--Documentation/trace/ftrace.txt2846
-rw-r--r--Documentation/trace/function-graph-fold.vim42
-rw-r--r--Documentation/trace/kprobetrace.txt172
-rw-r--r--Documentation/trace/mmiotrace.txt164
-rw-r--r--Documentation/trace/postprocess/trace-pagealloc-postprocess.pl418
-rw-r--r--Documentation/trace/postprocess/trace-vmscan-postprocess.pl757
-rw-r--r--Documentation/trace/ring-buffer-design.txt955
-rw-r--r--Documentation/trace/tracepoint-analysis.txt327
-rw-r--r--Documentation/trace/tracepoints.txt145
-rw-r--r--Documentation/trace/uprobetracer.txt159
-rw-r--r--Documentation/unaligned-memory-access.txt262
-rw-r--r--Documentation/unicode.txt175
-rw-r--r--Documentation/unshare.txt295
-rw-r--r--Documentation/usb/CREDITS175
-rw-r--r--Documentation/usb/URB.txt261
-rw-r--r--Documentation/usb/WUSB-Design-overview.txt439
-rw-r--r--Documentation/usb/acm.txt128
-rw-r--r--Documentation/usb/anchors.txt79
-rw-r--r--Documentation/usb/authorization.txt92
-rw-r--r--Documentation/usb/bulk-streams.txt78
-rw-r--r--Documentation/usb/callbacks.txt134
-rw-r--r--Documentation/usb/chipidea.txt92
-rw-r--r--Documentation/usb/dma.txt133
-rw-r--r--Documentation/usb/dwc3.txt45
-rw-r--r--Documentation/usb/ehci.txt214
-rw-r--r--Documentation/usb/error-codes.txt175
-rw-r--r--Documentation/usb/functionfs.txt67
-rw-r--r--Documentation/usb/gadget-testing.txt775
-rw-r--r--Documentation/usb/gadget_configfs.txt384
-rw-r--r--Documentation/usb/gadget_hid.txt452
-rw-r--r--Documentation/usb/gadget_multi.txt150
-rw-r--r--Documentation/usb/gadget_printer.txt510
-rw-r--r--Documentation/usb/gadget_serial.txt286
-rw-r--r--Documentation/usb/hotplug.txt148
-rw-r--r--Documentation/usb/iuu_phoenix.txt84
-rw-r--r--Documentation/usb/linux-cdc-acm.inf107
-rw-r--r--Documentation/usb/linux.inf66
-rw-r--r--Documentation/usb/mass-storage.txt225
-rw-r--r--Documentation/usb/misc_usbsevseg.txt46
-rw-r--r--Documentation/usb/mtouchusb.txt72
-rw-r--r--Documentation/usb/ohci.txt32
-rw-r--r--Documentation/usb/persist.txt165
-rw-r--r--Documentation/usb/power-management.txt760
-rw-r--r--Documentation/usb/proc_usb_info.txt390
-rw-r--r--Documentation/usb/rio.txt138
-rw-r--r--Documentation/usb/usb-help.txt16
-rw-r--r--Documentation/usb/usb-serial.txt493
-rw-r--r--Documentation/usb/usbmon.txt355
-rw-r--r--Documentation/usb/wusb-cbaf130
-rw-r--r--Documentation/vDSO/.gitignore2
-rw-r--r--Documentation/vDSO/Makefile15
-rw-r--r--Documentation/vDSO/parse_vdso.c269
-rw-r--r--Documentation/vDSO/vdso_standalone_test_x86.c128
-rw-r--r--Documentation/vDSO/vdso_test.c52
-rw-r--r--Documentation/vfio.txt458
-rw-r--r--Documentation/vgaarbiter.txt192
-rw-r--r--Documentation/video-output.txt34
-rw-r--r--Documentation/video4linux/4CCs.txt32
-rw-r--r--Documentation/video4linux/API.html27
-rw-r--r--Documentation/video4linux/CARDLIST.au08286
-rw-r--r--Documentation/video4linux/CARDLIST.bttv167
-rw-r--r--Documentation/video4linux/CARDLIST.cx2388547
-rw-r--r--Documentation/video4linux/CARDLIST.cx8891
-rw-r--r--Documentation/video4linux/CARDLIST.em28xx96
-rw-r--r--Documentation/video4linux/CARDLIST.ivtv24
-rw-r--r--Documentation/video4linux/CARDLIST.saa7134194
-rw-r--r--Documentation/video4linux/CARDLIST.saa716411
-rw-r--r--Documentation/video4linux/CARDLIST.tm600016
-rw-r--r--Documentation/video4linux/CARDLIST.tuner91
-rw-r--r--Documentation/video4linux/CARDLIST.usbvision67
-rw-r--r--Documentation/video4linux/Makefile1
-rw-r--r--Documentation/video4linux/README.cpia2130
-rw-r--r--Documentation/video4linux/README.cx8867
-rw-r--r--Documentation/video4linux/README.davinci-vpbe93
-rw-r--r--Documentation/video4linux/README.ir72
-rw-r--r--Documentation/video4linux/README.ivtv186
-rw-r--r--Documentation/video4linux/README.pvrusb2212
-rw-r--r--Documentation/video4linux/README.saa713482
-rw-r--r--Documentation/video4linux/Zoran510
-rw-r--r--Documentation/video4linux/bttv/CONTRIBUTORS25
-rw-r--r--Documentation/video4linux/bttv/Cards960
-rw-r--r--Documentation/video4linux/bttv/ICs37
-rw-r--r--Documentation/video4linux/bttv/Insmod-options172
-rw-r--r--Documentation/video4linux/bttv/MAKEDEV27
-rw-r--r--Documentation/video4linux/bttv/Modprobe.conf11
-rw-r--r--Documentation/video4linux/bttv/Modules.conf14
-rw-r--r--Documentation/video4linux/bttv/PROBLEMS62
-rw-r--r--Documentation/video4linux/bttv/README86
-rw-r--r--Documentation/video4linux/bttv/README.WINVIEW33
-rw-r--r--Documentation/video4linux/bttv/README.freeze74
-rw-r--r--Documentation/video4linux/bttv/README.quirks83
-rw-r--r--Documentation/video4linux/bttv/Sound-FAQ148
-rw-r--r--Documentation/video4linux/bttv/Specs3
-rw-r--r--Documentation/video4linux/bttv/THANKS24
-rw-r--r--Documentation/video4linux/bttv/Tuners115
-rw-r--r--Documentation/video4linux/cafe_ccic54
-rw-r--r--Documentation/video4linux/cpia2_overview.txt38
-rw-r--r--Documentation/video4linux/cx18.txt30
-rw-r--r--Documentation/video4linux/cx2341x/README.hm12120
-rw-r--r--Documentation/video4linux/cx2341x/README.vbi45
-rw-r--r--Documentation/video4linux/cx2341x/fw-calling.txt69
-rw-r--r--Documentation/video4linux/cx2341x/fw-decoder-api.txt297
-rw-r--r--Documentation/video4linux/cx2341x/fw-decoder-regs.txt817
-rw-r--r--Documentation/video4linux/cx2341x/fw-dma.txt96
-rw-r--r--Documentation/video4linux/cx2341x/fw-encoder-api.txt709
-rw-r--r--Documentation/video4linux/cx2341x/fw-memory.txt139
-rw-r--r--Documentation/video4linux/cx2341x/fw-osd-api.txt350
-rw-r--r--Documentation/video4linux/cx2341x/fw-upload.txt49
-rw-r--r--Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt54
-rw-r--r--Documentation/video4linux/fimc.txt148
-rw-r--r--Documentation/video4linux/gspca.txt408
-rw-r--r--Documentation/video4linux/hauppauge-wintv-cx88-ir.txt54
-rw-r--r--Documentation/video4linux/lifeview.txt42
-rw-r--r--Documentation/video4linux/meye.txt123
-rw-r--r--Documentation/video4linux/not-in-cx2388x-datasheet.txt41
-rw-r--r--Documentation/video4linux/omap3isp.txt279
-rw-r--r--Documentation/video4linux/omap4_camera.txt60
-rw-r--r--Documentation/video4linux/pxa_camera.txt174
-rw-r--r--Documentation/video4linux/radiotrack.txt147
-rw-r--r--Documentation/video4linux/sh_mobile_ceu_camera.txt139
-rw-r--r--Documentation/video4linux/si470x.txt129
-rw-r--r--Documentation/video4linux/si4713.txt176
-rw-r--r--Documentation/video4linux/si476x.txt187
-rw-r--r--Documentation/video4linux/soc-camera.txt164
-rw-r--r--Documentation/video4linux/uvcvideo.txt239
-rw-r--r--Documentation/video4linux/v4l2-controls.txt752
-rw-r--r--Documentation/video4linux/v4l2-framework.txt1156
-rw-r--r--Documentation/video4linux/v4l2-pci-skeleton.c924
-rw-r--r--Documentation/video4linux/videobuf355
-rw-r--r--Documentation/video4linux/vivid.txt1129
-rw-r--r--Documentation/video4linux/zr364xx.txt69
-rw-r--r--Documentation/virtual/00-INDEX11
-rw-r--r--Documentation/virtual/kvm/00-INDEX26
-rw-r--r--Documentation/virtual/kvm/api.txt3592
-rw-r--r--Documentation/virtual/kvm/cpuid.txt60
-rw-r--r--Documentation/virtual/kvm/devices/README1
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt116
-rw-r--r--Documentation/virtual/kvm/devices/mpic.txt53
-rw-r--r--Documentation/virtual/kvm/devices/s390_flic.txt94
-rw-r--r--Documentation/virtual/kvm/devices/vfio.txt22
-rw-r--r--Documentation/virtual/kvm/devices/vm.txt85
-rw-r--r--Documentation/virtual/kvm/devices/xics.txt66
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt83
-rw-r--r--Documentation/virtual/kvm/locking.txt168
-rw-r--r--Documentation/virtual/kvm/mmu.txt458
-rw-r--r--Documentation/virtual/kvm/msr.txt266
-rw-r--r--Documentation/virtual/kvm/nested-vmx.txt251
-rw-r--r--Documentation/virtual/kvm/ppc-pv.txt212
-rw-r--r--Documentation/virtual/kvm/review-checklist.txt38
-rw-r--r--Documentation/virtual/kvm/s390-diag.txt82
-rw-r--r--Documentation/virtual/kvm/timekeeping.txt612
-rw-r--r--Documentation/virtual/paravirt_ops.txt32
-rw-r--r--Documentation/virtual/uml/UserModeLinux-HOWTO.txt4589
-rw-r--r--Documentation/vm/.gitignore2
-rw-r--r--Documentation/vm/00-INDEX42
-rw-r--r--Documentation/vm/active_mm.txt83
-rw-r--r--Documentation/vm/balance93
-rw-r--r--Documentation/vm/cleancache.txt277
-rw-r--r--Documentation/vm/frontswap.txt278
-rw-r--r--Documentation/vm/highmem.txt162
-rw-r--r--Documentation/vm/hugetlbpage.txt335
-rw-r--r--Documentation/vm/hwpoison.txt187
-rw-r--r--Documentation/vm/ksm.txt97
-rw-r--r--Documentation/vm/numa149
-rw-r--r--Documentation/vm/numa_memory_policy.txt452
-rw-r--r--Documentation/vm/overcommit-accounting80
-rw-r--r--Documentation/vm/page_migration149
-rw-r--r--Documentation/vm/page_owner.txt81
-rw-r--r--Documentation/vm/pagemap.txt161
-rw-r--r--Documentation/vm/remap_file_pages.txt27
-rw-r--r--Documentation/vm/slub.txt283
-rw-r--r--Documentation/vm/soft-dirty.txt43
-rw-r--r--Documentation/vm/split_page_table_lock94
-rw-r--r--Documentation/vm/transhuge.txt387
-rw-r--r--Documentation/vm/uksm.txt59
-rw-r--r--Documentation/vm/unevictable-lru.txt688
-rw-r--r--Documentation/vm/zsmalloc.txt70
-rw-r--r--Documentation/vm/zswap.txt68
-rw-r--r--Documentation/vme_api.txt406
-rw-r--r--Documentation/volatile-considered-harmful.txt119
-rw-r--r--Documentation/w1/00-INDEX10
-rw-r--r--Documentation/w1/masters/00-INDEX12
-rw-r--r--Documentation/w1/masters/ds248231
-rw-r--r--Documentation/w1/masters/ds249068
-rw-r--r--Documentation/w1/masters/mxc-w112
-rw-r--r--Documentation/w1/masters/omap-hdq46
-rw-r--r--Documentation/w1/masters/w1-gpio33
-rw-r--r--Documentation/w1/slaves/00-INDEX8
-rw-r--r--Documentation/w1/slaves/w1_ds240625
-rw-r--r--Documentation/w1/slaves/w1_ds242347
-rw-r--r--Documentation/w1/slaves/w1_ds28e0436
-rw-r--r--Documentation/w1/slaves/w1_therm50
-rw-r--r--Documentation/w1/w1.generic113
-rw-r--r--Documentation/w1/w1.netlink189
-rw-r--r--Documentation/watchdog/Makefile1
-rw-r--r--Documentation/watchdog/convert_drivers_to_kernel_api.txt218
-rw-r--r--Documentation/watchdog/hpwdt.txt95
-rw-r--r--Documentation/watchdog/pcwd-watchdog.txt66
-rw-r--r--Documentation/watchdog/src/.gitignore2
-rw-r--r--Documentation/watchdog/src/Makefile5
-rw-r--r--Documentation/watchdog/src/watchdog-simple.c24
-rw-r--r--Documentation/watchdog/src/watchdog-test.c86
-rw-r--r--Documentation/watchdog/watchdog-api.txt237
-rw-r--r--Documentation/watchdog/watchdog-kernel-api.txt226
-rw-r--r--Documentation/watchdog/watchdog-parameters.txt399
-rw-r--r--Documentation/watchdog/wdt.txt50
-rw-r--r--Documentation/wimax/README.i2400m237
-rw-r--r--Documentation/wimax/README.wimax81
-rw-r--r--Documentation/workqueue.txt388
-rw-r--r--Documentation/x86/00-INDEX20
-rw-r--r--Documentation/x86/boot.txt1131
-rw-r--r--Documentation/x86/early-microcode.txt42
-rw-r--r--Documentation/x86/earlyprintk.txt136
-rw-r--r--Documentation/x86/entry_64.txt104
-rw-r--r--Documentation/x86/exception-tables.txt292
-rw-r--r--Documentation/x86/i386/IO-APIC.txt119
-rw-r--r--Documentation/x86/intel_mpx.txt244
-rw-r--r--Documentation/x86/mtrr.txt305
-rw-r--r--Documentation/x86/pat.txt160
-rw-r--r--Documentation/x86/tlb.txt75
-rw-r--r--Documentation/x86/usb-legacy-support.txt44
-rw-r--r--Documentation/x86/x86_64/00-INDEX16
-rw-r--r--Documentation/x86/x86_64/boot-options.txt284
-rw-r--r--Documentation/x86/x86_64/cpu-hotplug-spec21
-rw-r--r--Documentation/x86/x86_64/fake-numa-for-cpusets67
-rw-r--r--Documentation/x86/x86_64/kernel-stacks101
-rw-r--r--Documentation/x86/x86_64/machinecheck83
-rw-r--r--Documentation/x86/x86_64/mm.txt42
-rw-r--r--Documentation/x86/x86_64/uefi.txt42
-rw-r--r--Documentation/x86/zero-page.txt37
-rw-r--r--Documentation/xillybus.txt380
-rw-r--r--Documentation/xtensa/atomctl.txt44
-rw-r--r--Documentation/xtensa/mmu.txt64
-rw-r--r--Documentation/xz.txt121
-rw-r--r--Documentation/zh_CN/CodingStyle694
-rw-r--r--Documentation/zh_CN/HOWTO538
-rw-r--r--Documentation/zh_CN/IRQ.txt39
-rw-r--r--Documentation/zh_CN/SecurityBugs50
-rw-r--r--Documentation/zh_CN/SubmittingDrivers164
-rw-r--r--Documentation/zh_CN/SubmittingPatches412
-rw-r--r--Documentation/zh_CN/arm/Booting175
-rw-r--r--Documentation/zh_CN/arm/kernel_user_helpers.txt284
-rw-r--r--Documentation/zh_CN/arm64/booting.txt219
-rw-r--r--Documentation/zh_CN/arm64/legacy_instructions.txt72
-rw-r--r--Documentation/zh_CN/arm64/memory.txt114
-rw-r--r--Documentation/zh_CN/arm64/tagged-pointers.txt52
-rw-r--r--Documentation/zh_CN/basic_profiling.txt71
-rw-r--r--Documentation/zh_CN/email-clients.txt210
-rw-r--r--Documentation/zh_CN/filesystems/sysfs.txt372
-rw-r--r--Documentation/zh_CN/gpio.txt658
-rw-r--r--Documentation/zh_CN/io_ordering.txt67
-rw-r--r--Documentation/zh_CN/magic-number.txt155
-rw-r--r--Documentation/zh_CN/oops-tracing.txt212
-rw-r--r--Documentation/zh_CN/sparse.txt100
-rw-r--r--Documentation/zh_CN/stable_api_nonsense.txt157
-rw-r--r--Documentation/zh_CN/stable_kernel_rules.txt66
-rw-r--r--Documentation/zh_CN/video4linux/omap3isp.txt277
-rw-r--r--Documentation/zh_CN/video4linux/v4l2-framework.txt976
-rw-r--r--Documentation/zh_CN/volatile-considered-harmful.txt113
-rw-r--r--Documentation/zorro.txt103
3837 files changed, 562246 insertions, 0 deletions
diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
new file mode 100644
index 000000000..cd077ca0e
--- /dev/null
+++ b/Documentation/00-INDEX
@@ -0,0 +1,489 @@
+
+This is a brief list of all the files in ./linux/Documentation and what
+they contain. If you add a documentation file, please list it here in
+alphabetical order as well, or risk being hunted down like a rabid dog.
+Please keep the descriptions small enough to fit on one line.
+ Thanks -- Paul G.
+
+Following translations are available on the WWW:
+
+ - Japanese, maintained by the JF Project (jf@listserv.linux.or.jp), at
+ http://linuxjf.sourceforge.jp/
+
+00-INDEX
+ - this file.
+ABI/
+ - info on kernel <-> userspace ABI and relative interface stability.
+
+BUG-HUNTING
+ - brute force method of doing binary search of patches to find bug.
+Changes
+ - list of changes that break older software packages.
+CodingStyle
+ - how the maintainers expect the C code in the kernel to look.
+DMA-API.txt
+ - DMA API, pci_ API & extensions for non-consistent memory machines.
+DMA-API-HOWTO.txt
+ - Dynamic DMA mapping Guide
+DMA-ISA-LPC.txt
+ - How to do DMA with ISA (and LPC) devices.
+DMA-attributes.txt
+ - listing of the various possible attributes a DMA region can have
+DocBook/
+ - directory with DocBook templates etc. for kernel documentation.
+EDID/
+ - directory with info on customizing EDID for broken gfx/displays.
+HOWTO
+ - the process and procedures of how to do Linux kernel development.
+IPMI.txt
+ - info on Linux Intelligent Platform Management Interface (IPMI) Driver.
+IRQ-affinity.txt
+ - how to select which CPU(s) handle which interrupt events on SMP.
+IRQ-domain.txt
+ - info on interrupt numbering and setting up IRQ domains.
+IRQ.txt
+ - description of what an IRQ is.
+Intel-IOMMU.txt
+ - basic info on the Intel IOMMU virtualization support.
+Makefile
+ - some files in Documentation dir are actually sample code to build
+ManagementStyle
+ - how to (attempt to) manage kernel hackers.
+RCU/
+ - directory with info on RCU (read-copy update).
+SAK.txt
+ - info on Secure Attention Keys.
+SM501.txt
+ - Silicon Motion SM501 multimedia companion chip
+SecurityBugs
+ - procedure for reporting security bugs found in the kernel.
+SubmitChecklist
+ - Linux kernel patch submission checklist.
+SubmittingDrivers
+ - procedure to get a new driver source included into the kernel tree.
+SubmittingPatches
+ - procedure to get a source patch included into the kernel tree.
+VGA-softcursor.txt
+ - how to change your VGA cursor from a blinking underscore.
+accounting/
+ - documentation on accounting and taskstats.
+acpi/
+ - info on ACPI-specific hooks in the kernel.
+aoe/
+ - description of AoE (ATA over Ethernet) along with config examples.
+applying-patches.txt
+ - description of various trees and how to apply their patches.
+arm/
+ - directory with info about Linux on the ARM architecture.
+arm64/
+ - directory with info about Linux on the 64 bit ARM architecture.
+assoc_array.txt
+ - generic associative array intro.
+atomic_ops.txt
+ - semantics and behavior of atomic and bitmask operations.
+auxdisplay/
+ - misc. LCD driver documentation (cfag12864b, ks0108).
+backlight/
+ - directory with info on controlling backlights in flat panel displays
+bad_memory.txt
+ - how to use kernel parameters to exclude bad RAM regions.
+basic_profiling.txt
+ - basic instructions for those who wants to profile Linux kernel.
+bcache.txt
+ - Block-layer cache on fast SSDs to improve slow (raid) I/O performance.
+binfmt_misc.txt
+ - info on the kernel support for extra binary formats.
+blackfin/
+ - directory with documentation for the Blackfin arch.
+block/
+ - info on the Block I/O (BIO) layer.
+blockdev/
+ - info on block devices & drivers
+braille-console.txt
+ - info on how to use serial devices for Braille support.
+bt8xxgpio.txt
+ - info on how to modify a bt8xx video card for GPIO usage.
+btmrvl.txt
+ - info on Marvell Bluetooth driver usage.
+bus-devices/
+ - directory with info on TI GPMC (General Purpose Memory Controller)
+bus-virt-phys-mapping.txt
+ - how to access I/O mapped memory from within device drivers.
+cachetlb.txt
+ - describes the cache/TLB flushing interfaces Linux uses.
+cdrom/
+ - directory with information on the CD-ROM drivers that Linux has.
+cgroups/
+ - cgroups features, including cpusets and memory controller.
+circular-buffers.txt
+ - how to make use of the existing circular buffer infrastructure
+clk.txt
+ - info on the common clock framework
+coccinelle.txt
+ - info on how to get and use the Coccinelle code checking tool.
+connector/
+ - docs on the netlink based userspace<->kernel space communication mod.
+console/
+ - documentation on Linux console drivers.
+cpu-freq/
+ - info on CPU frequency and voltage scaling.
+cpu-hotplug.txt
+ - document describing CPU hotplug support in the Linux kernel.
+cpu-load.txt
+ - document describing how CPU load statistics are collected.
+cpuidle/
+ - info on CPU_IDLE, CPU idle state management subsystem.
+cputopology.txt
+ - documentation on how CPU topology info is exported via sysfs.
+crc32.txt
+ - brief tutorial on CRC computation
+cris/
+ - directory with info about Linux on CRIS architecture.
+crypto/
+ - directory with info on the Crypto API.
+dcdbas.txt
+ - information on the Dell Systems Management Base Driver.
+debugging-modules.txt
+ - some notes on debugging modules after Linux 2.6.3.
+debugging-via-ohci1394.txt
+ - how to use firewire like a hardware debugger memory reader.
+dell_rbu.txt
+ - document demonstrating the use of the Dell Remote BIOS Update driver.
+development-process/
+ - how to work with the mainline kernel development process.
+device-mapper/
+ - directory with info on Device Mapper.
+devices.txt
+ - plain ASCII listing of all the nodes in /dev/ with major minor #'s.
+devicetree/
+ - directory with info on device tree files used by OF/PowerPC/ARM
+digsig.txt
+ -info on the Digital Signature Verification API
+dma-buf-sharing.txt
+ - the DMA Buffer Sharing API Guide
+dontdiff
+ - file containing a list of files that should never be diff'ed.
+driver-model/
+ - directory with info about Linux driver model.
+dvb/
+ - info on Linux Digital Video Broadcast (DVB) subsystem.
+dynamic-debug-howto.txt
+ - how to use the dynamic debug (dyndbg) feature.
+early-userspace/
+ - info about initramfs, klibc, and userspace early during boot.
+edac.txt
+ - information on EDAC - Error Detection And Correction
+efi-stub.txt
+ - How to use the EFI boot stub to bypass GRUB or elilo on EFI systems.
+eisa.txt
+ - info on EISA bus support.
+email-clients.txt
+ - info on how to use e-mail to send un-mangled (git) patches.
+extcon/
+ - directory with porting guide for Android kernel switch driver.
+fault-injection/
+ - dir with docs about the fault injection capabilities infrastructure.
+fb/
+ - directory with info on the frame buffer graphics abstraction layer.
+filesystems/
+ - info on the vfs and the various filesystems that Linux supports.
+firmware_class/
+ - request_firmware() hotplug interface info.
+flexible-arrays.txt
+ - how to make use of flexible sized arrays in linux
+fmc/
+ - information about the FMC bus abstraction
+frv/
+ - Fujitsu FR-V Linux documentation.
+futex-requeue-pi.txt
+ - info on requeueing of tasks from a non-PI futex to a PI futex
+gcov.txt
+ - use of GCC's coverage testing tool "gcov" with the Linux kernel
+gpio/
+ - gpio related documentation
+hid/
+ - directory with information on human interface devices
+highuid.txt
+ - notes on the change from 16 bit to 32 bit user/group IDs.
+hsi.txt
+ - HSI subsystem overview.
+hwspinlock.txt
+ - hardware spinlock provides hardware assistance for synchronization
+timers/
+ - info on the timer related topics
+hw_random.txt
+ - info on Linux support for random number generator in i8xx chipsets.
+hwmon/
+ - directory with docs on various hardware monitoring drivers.
+i2c/
+ - directory with info about the I2C bus/protocol (2 wire, kHz speed).
+i2o/
+ - directory with info about the Linux I2O subsystem.
+x86/i386/
+ - directory with info about Linux on Intel 32 bit architecture.
+ia64/
+ - directory with info about Linux on Intel 64 bit architecture.
+infiniband/
+ - directory with documents concerning Linux InfiniBand support.
+init.txt
+ - what to do when the kernel can't find the 1st process to run.
+initrd.txt
+ - how to use the RAM disk as an initial/temporary root filesystem.
+input/
+ - info on Linux input device support.
+intel_txt.txt
+ - info on intel Trusted Execution Technology (intel TXT).
+io-mapping.txt
+ - description of io_mapping functions in linux/io-mapping.h
+io_ordering.txt
+ - info on ordering I/O writes to memory-mapped addresses.
+ioctl/
+ - directory with documents describing various IOCTL calls.
+iostats.txt
+ - info on I/O statistics Linux kernel provides.
+irqflags-tracing.txt
+ - how to use the irq-flags tracing feature.
+isapnp.txt
+ - info on Linux ISA Plug & Play support.
+isdn/
+ - directory with info on the Linux ISDN support, and supported cards.
+java.txt
+ - info on the in-kernel binary support for Java(tm).
+ja_JP/
+ - directory with Japanese translations of various documents
+kbuild/
+ - directory with info about the kernel build process.
+kdump/
+ - directory with mini HowTo on getting the crash dump code to work.
+kernel-doc-nano-HOWTO.txt
+ - mini HowTo on generation and location of kernel documentation files.
+kernel-docs.txt
+ - listing of various WWW + books that document kernel internals.
+kernel-parameters.txt
+ - summary listing of command line / boot prompt args for the kernel.
+kernel-per-CPU-kthreads.txt
+ - List of all per-CPU kthreads and how they introduce jitter.
+kmemcheck.txt
+ - info on dynamic checker that detects uses of uninitialized memory.
+kmemleak.txt
+ - info on how to make use of the kernel memory leak detection system
+ko_KR/
+ - directory with Korean translations of various documents
+kobject.txt
+ - info of the kobject infrastructure of the Linux kernel.
+kprobes.txt
+ - documents the kernel probes debugging feature.
+kref.txt
+ - docs on adding reference counters (krefs) to kernel objects.
+kselftest.txt
+ - small unittests for (some) individual codepaths in the kernel.
+laptops/
+ - directory with laptop related info and laptop driver documentation.
+ldm.txt
+ - a brief description of LDM (Windows Dynamic Disks).
+leds/
+ - directory with info about LED handling under Linux.
+local_ops.txt
+ - semantics and behavior of local atomic operations.
+locking/
+ - directory with info about kernel locking primitives
+lockup-watchdogs.txt
+ - info on soft and hard lockup detectors (aka nmi_watchdog).
+logo.gif
+ - full colour GIF image of Linux logo (penguin - Tux).
+logo.txt
+ - info on creator of above logo & site to get additional images from.
+lzo.txt
+ - kernel LZO decompressor input formats
+m68k/
+ - directory with info about Linux on Motorola 68k architecture.
+magic-number.txt
+ - list of magic numbers used to mark/protect kernel data structures.
+mailbox.txt
+ - How to write drivers for the common mailbox framework (IPC).
+md.txt
+ - info on boot arguments for the multiple devices driver.
+media-framework.txt
+ - info on media framework, its data structures, functions and usage.
+memory-barriers.txt
+ - info on Linux kernel memory barriers.
+memory-devices/
+ - directory with info on parts like the Texas Instruments EMIF driver
+memory-hotplug.txt
+ - Hotpluggable memory support, how to use and current status.
+metag/
+ - directory with info about Linux on Meta architecture.
+mips/
+ - directory with info about Linux on MIPS architecture.
+misc-devices/
+ - directory with info about devices using the misc dev subsystem
+mmc/
+ - directory with info about the MMC subsystem
+mn10300/
+ - directory with info about the mn10300 architecture port
+module-signing.txt
+ - Kernel module signing for increased security when loading modules.
+mtd/
+ - directory with info about memory technology devices (flash)
+mono.txt
+ - how to execute Mono-based .NET binaries with the help of BINFMT_MISC.
+namespaces/
+ - directory with various information about namespaces
+netlabel/
+ - directory with information on the NetLabel subsystem.
+networking/
+ - directory with info on various aspects of networking with Linux.
+nfc/
+ - directory relating info about Near Field Communications support.
+nommu-mmap.txt
+ - documentation about no-mmu memory mapping support.
+numastat.txt
+ - info on how to read Numa policy hit/miss statistics in sysfs.
+oops-tracing.txt
+ - how to decode those nasty internal kernel error dump messages.
+padata.txt
+ - An introduction to the "padata" parallel execution API
+parisc/
+ - directory with info on using Linux on PA-RISC architecture.
+parport.txt
+ - how to use the parallel-port driver.
+parport-lowlevel.txt
+ - description and usage of the low level parallel port functions.
+pcmcia/
+ - info on the Linux PCMCIA driver.
+percpu-rw-semaphore.txt
+ - RCU based read-write semaphore optimized for locking for reading
+phy.txt
+ - Description of the generic PHY framework.
+pi-futex.txt
+ - documentation on lightweight priority inheritance futexes.
+pinctrl.txt
+ - info on pinctrl subsystem and the PINMUX/PINCONF and drivers
+pnp.txt
+ - Linux Plug and Play documentation.
+power/
+ - directory with info on Linux PCI power management.
+powerpc/
+ - directory with info on using Linux with the PowerPC.
+prctl/
+ - directory with info on the priveledge control subsystem
+preempt-locking.txt
+ - info on locking under a preemptive kernel.
+printk-formats.txt
+ - how to get printk format specifiers right
+pps/
+ - directory with information on the pulse-per-second support
+ptp/
+ - directory with info on support for IEEE 1588 PTP clocks in Linux.
+pwm.txt
+ - info on the pulse width modulation driver subsystem
+ramoops.txt
+ - documentation of the ramoops oops/panic logging module.
+rapidio/
+ - directory with info on RapidIO packet-based fabric interconnect
+rbtree.txt
+ - info on what red-black trees are and what they are for.
+remoteproc.txt
+ - info on how to handle remote processor (e.g. AMP) offloads/usage.
+rfkill.txt
+ - info on the radio frequency kill switch subsystem/support.
+robust-futex-ABI.txt
+ - documentation of the robust futex ABI.
+robust-futexes.txt
+ - a description of what robust futexes are.
+rpmsg.txt
+ - info on the Remote Processor Messaging (rpmsg) Framework
+rtc.txt
+ - notes on how to use the Real Time Clock (aka CMOS clock) driver.
+s390/
+ - directory with info on using Linux on the IBM S390.
+scheduler/
+ - directory with info on the scheduler.
+scsi/
+ - directory with info on Linux scsi support.
+security/
+ - directory that contains security-related info
+serial/
+ - directory with info on the low level serial API.
+serial-console.txt
+ - how to set up Linux with a serial line console as the default.
+sgi-ioc4.txt
+ - description of the SGI IOC4 PCI (multi function) device.
+sh/
+ - directory with info on porting Linux to a new architecture.
+smsc_ece1099.txt
+ -info on the smsc Keyboard Scan Expansion/GPIO Expansion device.
+sound/
+ - directory with info on sound card support.
+sparse.txt
+ - info on how to obtain and use the sparse tool for typechecking.
+spi/
+ - overview of Linux kernel Serial Peripheral Interface (SPI) support.
+stable_api_nonsense.txt
+ - info on why the kernel does not have a stable in-kernel api or abi.
+stable_kernel_rules.txt
+ - rules and procedures for the -stable kernel releases.
+static-keys.txt
+ - info on how static keys allow debug code in hotpaths via patching
+svga.txt
+ - short guide on selecting video modes at boot via VGA BIOS.
+sysfs-rules.txt
+ - How not to use sysfs.
+sysctl/
+ - directory with info on the /proc/sys/* files.
+sysrq.txt
+ - info on the magic SysRq key.
+target/
+ - directory with info on generating TCM v4 fabric .ko modules
+this_cpu_ops.txt
+ - List rationale behind and the way to use this_cpu operations.
+thermal/
+ - directory with information on managing thermal issues (CPU/temp)
+trace/
+ - directory with info on tracing technologies within linux
+unaligned-memory-access.txt
+ - info on how to avoid arch breaking unaligned memory access in code.
+unicode.txt
+ - info on the Unicode character/font mapping used in Linux.
+unshare.txt
+ - description of the Linux unshare system call.
+usb/
+ - directory with info regarding the Universal Serial Bus.
+vDSO/
+ - directory with info regarding virtual dynamic shared objects
+vfio.txt
+ - info on Virtual Function I/O used in guest/hypervisor instances.
+vgaarbiter.txt
+ - info on enable/disable the legacy decoding on different VGA devices
+video-output.txt
+ - sysfs class driver interface to enable/disable a video output device.
+video4linux/
+ - directory with info regarding video/TV/radio cards and linux.
+virtual/
+ - directory with information on the various linux virtualizations.
+vm/
+ - directory with info on the Linux vm code.
+vme_api.txt
+ - file relating info on the VME bus API in linux
+volatile-considered-harmful.txt
+ - Why the "volatile" type class should not be used
+w1/
+ - directory with documents regarding the 1-wire (w1) subsystem.
+watchdog/
+ - how to auto-reboot Linux if it has "fallen and can't get up". ;-)
+wimax/
+ - directory with info about Intel Wireless Wimax Connections
+workqueue.txt
+ - information on the Concurrency Managed Workqueue implementation
+x86/x86_64/
+ - directory with info on Linux support for AMD x86-64 (Hammer) machines.
+xillybus.txt
+ - Overview and basic ui of xillybus driver
+xtensa/
+ - directory with documents relating to arch/xtensa port/implementation
+xz.txt
+ - how to make use of the XZ data compression within linux kernel
+zh_CN/
+ - directory with Chinese translations of various documents
+zorro.txt
+ - info on writing drivers for Zorro bus devices found on Amigas.
diff --git a/Documentation/ABI/README b/Documentation/ABI/README
new file mode 100644
index 000000000..1fafc4b07
--- /dev/null
+++ b/Documentation/ABI/README
@@ -0,0 +1,87 @@
+This directory attempts to document the ABI between the Linux kernel and
+userspace, and the relative stability of these interfaces. Due to the
+everchanging nature of Linux, and the differing maturity levels, these
+interfaces should be used by userspace programs in different ways.
+
+We have four different levels of ABI stability, as shown by the four
+different subdirectories in this location. Interfaces may change levels
+of stability according to the rules described below.
+
+The different levels of stability are:
+
+ stable/
+ This directory documents the interfaces that the developer has
+ defined to be stable. Userspace programs are free to use these
+ interfaces with no restrictions, and backward compatibility for
+ them will be guaranteed for at least 2 years. Most interfaces
+ (like syscalls) are expected to never change and always be
+ available.
+
+ testing/
+ This directory documents interfaces that are felt to be stable,
+ as the main development of this interface has been completed.
+ The interface can be changed to add new features, but the
+ current interface will not break by doing this, unless grave
+ errors or security problems are found in them. Userspace
+ programs can start to rely on these interfaces, but they must be
+ aware of changes that can occur before these interfaces move to
+ be marked stable. Programs that use these interfaces are
+ strongly encouraged to add their name to the description of
+ these interfaces, so that the kernel developers can easily
+ notify them if any changes occur (see the description of the
+ layout of the files below for details on how to do this.)
+
+ obsolete/
+ This directory documents interfaces that are still remaining in
+ the kernel, but are marked to be removed at some later point in
+ time. The description of the interface will document the reason
+ why it is obsolete and when it can be expected to be removed.
+
+ removed/
+ This directory contains a list of the old interfaces that have
+ been removed from the kernel.
+
+Every file in these directories will contain the following information:
+
+What: Short description of the interface
+Date: Date created
+KernelVersion: Kernel version this feature first showed up in.
+Contact: Primary contact for this interface (may be a mailing list)
+Description: Long description of the interface and how to use it.
+Users: All users of this interface who wish to be notified when
+ it changes. This is very important for interfaces in
+ the "testing" stage, so that kernel developers can work
+ with userspace developers to ensure that things do not
+ break in ways that are unacceptable. It is also
+ important to get feedback for these interfaces to make
+ sure they are working in a proper way and do not need to
+ be changed further.
+
+
+How things move between levels:
+
+Interfaces in stable may move to obsolete, as long as the proper
+notification is given.
+
+Interfaces may be removed from obsolete and the kernel as long as the
+documented amount of time has gone by.
+
+Interfaces in the testing state can move to the stable state when the
+developers feel they are finished. They cannot be removed from the
+kernel tree without going through the obsolete state first.
+
+It's up to the developer to place their interfaces in the category they
+wish for it to start out in.
+
+
+Notable bits of non-ABI, which should not under any circumstances be considered
+stable:
+
+- Kconfig. Userspace should not rely on the presence or absence of any
+ particular Kconfig symbol, in /proc/config.gz, in the copy of .config
+ commonly installed to /boot, or in any invocation of the kernel build
+ process.
+
+- Kernel-internal symbols. Do not rely on the presence, absence, location, or
+ type of any kernel symbol, either in System.map files or the kernel binary
+ itself. See Documentation/stable_api_nonsense.txt.
diff --git a/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads b/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads
new file mode 100644
index 000000000..b0b0eeb20
--- /dev/null
+++ b/Documentation/ABI/obsolete/proc-sys-vm-nr_pdflush_threads
@@ -0,0 +1,5 @@
+What: /proc/sys/vm/nr_pdflush_threads
+Date: June 2012
+Contact: Wanpeng Li <liwp@linux.vnet.ibm.com>
+Description: Since pdflush is replaced by per-BDI flusher, the interface of old pdflush
+ exported in /proc/sys/vm/ should be removed.
diff --git a/Documentation/ABI/obsolete/sysfs-block-zram b/Documentation/ABI/obsolete/sysfs-block-zram
new file mode 100644
index 000000000..720ea92cf
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-block-zram
@@ -0,0 +1,119 @@
+What: /sys/block/zram<id>/num_reads
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The num_reads file is read-only and specifies the number of
+ reads (failed or successful) done on this device.
+ Now accessible via zram<id>/stat node.
+
+What: /sys/block/zram<id>/num_writes
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The num_writes file is read-only and specifies the number of
+ writes (failed or successful) done on this device.
+ Now accessible via zram<id>/stat node.
+
+What: /sys/block/zram<id>/invalid_io
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The invalid_io file is read-only and specifies the number of
+ non-page-size-aligned I/O requests issued to this device.
+ Now accessible via zram<id>/io_stat node.
+
+What: /sys/block/zram<id>/failed_reads
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The failed_reads file is read-only and specifies the number of
+ failed reads happened on this device.
+ Now accessible via zram<id>/io_stat node.
+
+What: /sys/block/zram<id>/failed_writes
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The failed_writes file is read-only and specifies the number of
+ failed writes happened on this device.
+ Now accessible via zram<id>/io_stat node.
+
+What: /sys/block/zram<id>/notify_free
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The notify_free file is read-only. Depending on device usage
+ scenario it may account a) the number of pages freed because
+ of swap slot free notifications or b) the number of pages freed
+ because of REQ_DISCARD requests sent by bio. The former ones
+ are sent to a swap block device when a swap slot is freed, which
+ implies that this disk is being used as a swap disk. The latter
+ ones are sent by filesystem mounted with discard option,
+ whenever some data blocks are getting discarded.
+ Now accessible via zram<id>/io_stat node.
+
+What: /sys/block/zram<id>/zero_pages
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The zero_pages file is read-only and specifies number of zero
+ filled pages written to this disk. No memory is allocated for
+ such pages.
+ Now accessible via zram<id>/mm_stat node.
+
+What: /sys/block/zram<id>/orig_data_size
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The orig_data_size file is read-only and specifies uncompressed
+ size of data stored in this disk. This excludes zero-filled
+ pages (zero_pages) since no memory is allocated for them.
+ Unit: bytes
+ Now accessible via zram<id>/mm_stat node.
+
+What: /sys/block/zram<id>/compr_data_size
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The compr_data_size file is read-only and specifies compressed
+ size of data stored in this disk. So, compression ratio can be
+ calculated using orig_data_size and this statistic.
+ Unit: bytes
+ Now accessible via zram<id>/mm_stat node.
+
+What: /sys/block/zram<id>/mem_used_total
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The mem_used_total file is read-only and specifies the amount
+ of memory, including allocator fragmentation and metadata
+ overhead, allocated for this disk. So, allocator space
+ efficiency can be calculated using compr_data_size and this
+ statistic.
+ Unit: bytes
+ Now accessible via zram<id>/mm_stat node.
+
+What: /sys/block/zram<id>/mem_used_max
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The mem_used_max file is read/write and specifies the amount
+ of maximum memory zram have consumed to store compressed data.
+ For resetting the value, you should write "0". Otherwise,
+ you could see -EINVAL.
+ Unit: bytes
+ Downgraded to write-only node: so it's possible to set new
+ value only; its current value is stored in zram<id>/mm_stat
+ node.
+
+What: /sys/block/zram<id>/mem_limit
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The mem_limit file is read/write and specifies the maximum
+ amount of memory ZRAM can use to store the compressed data.
+ The limit could be changed in run time and "0" means disable
+ the limit. No limit is the initial state. Unit: bytes
+ Downgraded to write-only node: so it's possible to set new
+ value only; its current value is stored in zram<id>/mm_stat
+ node.
diff --git a/Documentation/ABI/obsolete/sysfs-bus-usb b/Documentation/ABI/obsolete/sysfs-bus-usb
new file mode 100644
index 000000000..bd096d33f
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-bus-usb
@@ -0,0 +1,31 @@
+What: /sys/bus/usb/devices/.../power/level
+Date: March 2007
+KernelVersion: 2.6.21
+Contact: Alan Stern <stern@rowland.harvard.edu>
+Description:
+ Each USB device directory will contain a file named
+ power/level. This file holds a power-level setting for
+ the device, either "on" or "auto".
+
+ "on" means that the device is not allowed to autosuspend,
+ although normal suspends for system sleep will still
+ be honored. "auto" means the device will autosuspend
+ and autoresume in the usual manner, according to the
+ capabilities of its driver.
+
+ During normal use, devices should be left in the "auto"
+ level. The "on" level is meant for administrative uses.
+ If you want to suspend a device immediately but leave it
+ free to wake up in response to I/O requests, you should
+ write "0" to power/autosuspend.
+
+ Device not capable of proper suspend and resume should be
+ left in the "on" level. Although the USB spec requires
+ devices to support suspend/resume, many of them do not.
+ In fact so many don't that by default, the USB core
+ initializes all non-hub devices in the "on" level. Some
+ drivers may change this setting when they are bound.
+
+ This file is deprecated and will be removed after 2010.
+ Use the power/control file instead; it does exactly the
+ same thing.
diff --git a/Documentation/ABI/obsolete/sysfs-class-rfkill b/Documentation/ABI/obsolete/sysfs-class-rfkill
new file mode 100644
index 000000000..ff60ad9ec
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-class-rfkill
@@ -0,0 +1,29 @@
+rfkill - radio frequency (RF) connector kill switch support
+
+For details to this subsystem look at Documentation/rfkill.txt.
+
+What: /sys/class/rfkill/rfkill[0-9]+/state
+Date: 09-Jul-2007
+KernelVersion v2.6.22
+Contact: linux-wireless@vger.kernel.org
+Description: Current state of the transmitter.
+ This file is deprecated and scheduled to be removed in 2014,
+ because its not possible to express the 'soft and hard block'
+ state of the rfkill driver.
+Values: A numeric value.
+ 0: RFKILL_STATE_SOFT_BLOCKED
+ transmitter is turned off by software
+ 1: RFKILL_STATE_UNBLOCKED
+ transmitter is (potentially) active
+ 2: RFKILL_STATE_HARD_BLOCKED
+ transmitter is forced off by something outside of
+ the driver's control.
+
+What: /sys/class/rfkill/rfkill[0-9]+/claim
+Date: 09-Jul-2007
+KernelVersion v2.6.22
+Contact: linux-wireless@vger.kernel.org
+Description: This file is deprecated because there no longer is a way to
+ claim just control over a single rfkill instance.
+ This file is scheduled to be removed in 2012.
+Values: 0: Kernel handles events
diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus
new file mode 100644
index 000000000..833fd5992
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-koneplus
@@ -0,0 +1,48 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/startup_profile
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 0-4.
+ When read, this attribute returns the number of the actual
+ profile. This value is persistent, so its equivalent to the
+ profile that's active when the mouse is powered on next time.
+ When written, this file sets the number of the startup profile
+ and the mouse activates this profile immediately.
+ Please use actual_profile, it does the same thing.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/firmware_version
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the raw integer version number of the
+ firmware reported by the mouse. Using the integer value eases
+ further usage in other programs. To receive the real version
+ number the decimal point has to be shifted 2 positions to the
+ left. E.g. a returned value of 121 means 1.21
+ This file is readonly.
+ Please read binary attribute info which contains firmware version.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/profile[1-5]_buttons
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When read, these files return the respective profile buttons.
+ The returned data is 77 bytes in size.
+ This file is readonly.
+ Write control to select profile and read profile_buttons instead.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/profile[1-5]_settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When read, these files return the respective profile settings.
+ The returned data is 43 bytes in size.
+ This file is readonly.
+ Write control to select profile and read profile_settings instead.
+Users: http://roccat.sourceforge.net \ No newline at end of file
diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-kovaplus b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-kovaplus
new file mode 100644
index 000000000..4a98e02b6
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-kovaplus
@@ -0,0 +1,66 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/actual_cpi
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 1-4.
+ When read, this attribute returns the number of the active
+ cpi level.
+ This file is readonly.
+ Has never been used. If bookkeeping is done, it's done in userland tools.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/actual_sensitivity_x
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 1-10.
+ When read, this attribute returns the number of the actual
+ sensitivity in x direction.
+ This file is readonly.
+ Has never been used. If bookkeeping is done, it's done in userland tools.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/actual_sensitivity_y
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 1-10.
+ When read, this attribute returns the number of the actual
+ sensitivity in y direction.
+ This file is readonly.
+ Has never been used. If bookkeeping is done, it's done in userland tools.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/firmware_version
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the raw integer version number of the
+ firmware reported by the mouse. Using the integer value eases
+ further usage in other programs. To receive the real version
+ number the decimal point has to be shifted 2 positions to the
+ left. E.g. a returned value of 121 means 1.21
+ This file is readonly.
+ Obsoleted by binary sysfs attribute "info".
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/profile[1-5]_buttons
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When read, these files return the respective profile buttons.
+ The returned data is 23 bytes in size.
+ This file is readonly.
+ Write control to select profile and read profile_buttons instead.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/profile[1-5]_settings
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When read, these files return the respective profile settings.
+ The returned data is 16 bytes in size.
+ This file is readonly.
+ Write control to select profile and read profile_settings instead.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
new file mode 100644
index 000000000..87ac87e95
--- /dev/null
+++ b/Documentation/ABI/obsolete/sysfs-driver-hid-roccat-pyra
@@ -0,0 +1,73 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/actual_cpi
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: It is possible to switch the cpi setting of the mouse with the
+ press of a button.
+ When read, this file returns the raw number of the actual cpi
+ setting reported by the mouse. This number has to be further
+ processed to receive the real dpi value.
+
+ VALUE DPI
+ 1 400
+ 2 800
+ 4 1600
+
+ This file is readonly.
+ Has never been used. If bookkeeping is done, it's done in userland tools.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/actual_profile
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the number of the actual profile in
+ range 0-4.
+ This file is readonly.
+ Please use binary attribute "settings" which provides this information.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/firmware_version
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the raw integer version number of the
+ firmware reported by the mouse. Using the integer value eases
+ further usage in other programs. To receive the real version
+ number the decimal point has to be shifted 2 positions to the
+ left. E.g. a returned value of 138 means 1.38
+ This file is readonly.
+ Please use binary attribute "info" which provides this information.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/profile[1-5]_buttons
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When read, these files return the respective profile buttons.
+ The returned data is 19 bytes in size.
+ This file is readonly.
+ Write control to select profile and read profile_buttons instead.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/profile[1-5]_settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When read, these files return the respective profile settings.
+ The returned data is 13 bytes in size.
+ This file is readonly.
+ Write control to select profile and read profile_settings instead.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/startup_profile
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 0-4.
+ When read, this attribute returns the number of the profile
+ that's active when the mouse is powered on.
+ This file is readonly.
+ Please use binary attribute "settings" which provides this information.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/removed/devfs b/Documentation/ABI/removed/devfs
new file mode 100644
index 000000000..0020c4993
--- /dev/null
+++ b/Documentation/ABI/removed/devfs
@@ -0,0 +1,12 @@
+What: devfs
+Date: July 2005 (scheduled), finally removed in kernel v2.6.18
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ devfs has been unmaintained for a number of years, has unfixable
+ races, contains a naming policy within the kernel that is
+ against the LSB, and can be replaced by using udev.
+ The files fs/devfs/*, include/linux/devfs_fs*.h were removed,
+ along with the assorted devfs function calls throughout the
+ kernel tree.
+
+Users:
diff --git a/Documentation/ABI/removed/dv1394 b/Documentation/ABI/removed/dv1394
new file mode 100644
index 000000000..c2310b667
--- /dev/null
+++ b/Documentation/ABI/removed/dv1394
@@ -0,0 +1,14 @@
+What: dv1394 (a.k.a. "OHCI-DV I/O support" for FireWire)
+Date: May 2010 (scheduled), finally removed in kernel v2.6.37
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ /dev/dv1394/* were character device files, one for each FireWire
+ controller and for NTSC and PAL respectively, from which DV data
+ could be received by read() or transmitted by write(). A few
+ ioctl()s allowed limited control.
+ This special-purpose interface has been superseded by libraw1394 +
+ libiec61883 which are functionally equivalent, support HDV, and
+ transparently work on top of the newer firewire kernel drivers.
+
+Users:
+ ffmpeg/libavformat (if configured for DV1394)
diff --git a/Documentation/ABI/removed/ip_queue b/Documentation/ABI/removed/ip_queue
new file mode 100644
index 000000000..3243613bc
--- /dev/null
+++ b/Documentation/ABI/removed/ip_queue
@@ -0,0 +1,9 @@
+What: ip_queue
+Date: finally removed in kernel v3.5.0
+Contact: Pablo Neira Ayuso <pablo@netfilter.org>
+Description:
+ ip_queue has been replaced by nfnetlink_queue which provides
+ more advanced queueing mechanism to user-space. The ip_queue
+ module was already announced to become obsolete years ago.
+
+Users:
diff --git a/Documentation/ABI/removed/net_dma b/Documentation/ABI/removed/net_dma
new file mode 100644
index 000000000..a173aecc2
--- /dev/null
+++ b/Documentation/ABI/removed/net_dma
@@ -0,0 +1,8 @@
+What: tcp_dma_copybreak sysctl
+Date: Removed in kernel v3.13
+Contact: Dan Williams <dan.j.williams@intel.com>
+Description:
+ Formerly the lower limit, in bytes, of the size of socket reads
+ that will be offloaded to a DMA copy engine. Removed due to
+ coherency issues of the cpu potentially touching the buffers
+ while dma is in flight.
diff --git a/Documentation/ABI/removed/o2cb b/Documentation/ABI/removed/o2cb
new file mode 100644
index 000000000..20c91adca
--- /dev/null
+++ b/Documentation/ABI/removed/o2cb
@@ -0,0 +1,10 @@
+What: /sys/o2cb symlink
+Date: May 2011
+KernelVersion: 3.0
+Contact: ocfs2-devel@oss.oracle.com
+Description: This is a symlink: /sys/o2cb to /sys/fs/o2cb. The symlink is
+ removed when new versions of ocfs2-tools which know to look
+ in /sys/fs/o2cb are sufficiently prevalent. Don't code new
+ software to look here, it should try /sys/fs/o2cb instead.
+Users: ocfs2-tools. It's sufficient to mail proposed changes to
+ ocfs2-devel@oss.oracle.com.
diff --git a/Documentation/ABI/removed/raw1394 b/Documentation/ABI/removed/raw1394
new file mode 100644
index 000000000..ec333e676
--- /dev/null
+++ b/Documentation/ABI/removed/raw1394
@@ -0,0 +1,15 @@
+What: raw1394 (a.k.a. "Raw IEEE1394 I/O support" for FireWire)
+Date: May 2010 (scheduled), finally removed in kernel v2.6.37
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ /dev/raw1394 was a character device file that allowed low-level
+ access to FireWire buses. Its major drawbacks were its inability
+ to implement sensible device security policies, and its low level
+ of abstraction that required userspace clients to duplicate much
+ of the kernel's ieee1394 core functionality.
+ Replaced by /dev/fw*, i.e. the <linux/firewire-cdev.h> ABI of
+ firewire-core.
+
+Users:
+ libraw1394 (works with firewire-cdev too, transparent to library ABI
+ users)
diff --git a/Documentation/ABI/removed/video1394 b/Documentation/ABI/removed/video1394
new file mode 100644
index 000000000..c39c25aee
--- /dev/null
+++ b/Documentation/ABI/removed/video1394
@@ -0,0 +1,16 @@
+What: video1394 (a.k.a. "OHCI-1394 Video support" for FireWire)
+Date: May 2010 (scheduled), finally removed in kernel v2.6.37
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ /dev/video1394/* were character device files, one for each FireWire
+ controller, which were used for isochronous I/O. It was added as an
+ alternative to raw1394's isochronous I/O functionality which had
+ performance issues in its first generation. Any video1394 user had
+ to use raw1394 + libraw1394 too because video1394 did not provide
+ asynchronous I/O for device discovery and configuration.
+ Replaced by /dev/fw*, i.e. the <linux/firewire-cdev.h> ABI of
+ firewire-core.
+
+Users:
+ libdc1394 (works with firewire-cdev too, transparent to library ABI
+ users)
diff --git a/Documentation/ABI/stable/firewire-cdev b/Documentation/ABI/stable/firewire-cdev
new file mode 100644
index 000000000..16d030827
--- /dev/null
+++ b/Documentation/ABI/stable/firewire-cdev
@@ -0,0 +1,103 @@
+What: /dev/fw[0-9]+
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ The character device files /dev/fw* are the interface between
+ firewire-core and IEEE 1394 device drivers implemented in
+ userspace. The ioctl(2)- and read(2)-based ABI is defined and
+ documented in <linux/firewire-cdev.h>.
+
+ This ABI offers most of the features which firewire-core also
+ exposes to kernelspace IEEE 1394 drivers.
+
+ Each /dev/fw* is associated with one IEEE 1394 node, which can
+ be remote or local nodes. Operations on a /dev/fw* file have
+ different scope:
+ - The 1394 node which is associated with the file:
+ - Asynchronous request transmission
+ - Get the Configuration ROM
+ - Query node ID
+ - Query maximum speed of the path between this node
+ and local node
+ - The 1394 bus (i.e. "card") to which the node is attached to:
+ - Isochronous stream transmission and reception
+ - Asynchronous stream transmission and reception
+ - Asynchronous broadcast request transmission
+ - PHY packet transmission and reception
+ - Allocate, reallocate, deallocate isochronous
+ resources (channels, bandwidth) at the bus's IRM
+ - Query node IDs of local node, root node, IRM, bus
+ manager
+ - Query cycle time
+ - Bus reset initiation, bus reset event reception
+ - All 1394 buses:
+ - Allocation of IEEE 1212 address ranges on the local
+ link layers, reception of inbound requests to such
+ an address range, asynchronous response transmission
+ to inbound requests
+ - Addition of descriptors or directories to the local
+ nodes' Configuration ROM
+
+ Due to the different scope of operations and in order to let
+ userland implement different access permission models, some
+ operations are restricted to /dev/fw* files that are associated
+ with a local node:
+ - Addition of descriptors or directories to the local
+ nodes' Configuration ROM
+ - PHY packet transmission and reception
+
+ A /dev/fw* file remains associated with one particular node
+ during its entire life time. Bus topology changes, and hence
+ node ID changes, are tracked by firewire-core. ABI users do not
+ need to be aware of topology.
+
+ The following file operations are supported:
+
+ open(2)
+ Currently the only useful flags are O_RDWR.
+
+ ioctl(2)
+ Initiate various actions. Some take immediate effect, others
+ are performed asynchronously while or after the ioctl returns.
+ See the inline documentation in <linux/firewire-cdev.h> for
+ descriptions of all ioctls.
+
+ poll(2), select(2), epoll_wait(2) etc.
+ Watch for events to become available to be read.
+
+ read(2)
+ Receive various events. There are solicited events like
+ outbound asynchronous transaction completion or isochronous
+ buffer completion, and unsolicited events such as bus resets,
+ request reception, or PHY packet reception. Always use a read
+ buffer which is large enough to receive the largest event that
+ could ever arrive. See <linux/firewire-cdev.h> for descriptions
+ of all event types and for which ioctls affect reception of
+ events.
+
+ mmap(2)
+ Allocate a DMA buffer for isochronous reception or transmission
+ and map it into the process address space. The arguments should
+ be used as follows: addr = NULL, length = the desired buffer
+ size, i.e. number of packets times size of largest packet,
+ prot = at least PROT_READ for reception and at least PROT_WRITE
+ for transmission, flags = MAP_SHARED, fd = the handle to the
+ /dev/fw*, offset = 0.
+
+ Isochronous reception works in packet-per-buffer fashion except
+ for multichannel reception which works in buffer-fill mode.
+
+ munmap(2)
+ Unmap the isochronous I/O buffer from the process address space.
+
+ close(2)
+ Besides stopping and freeing I/O contexts that were associated
+ with the file descriptor, back out any changes to the local
+ nodes' Configuration ROM. Deallocate isochronous channels and
+ bandwidth at the IRM that were marked for kernel-assisted
+ re- and deallocation.
+
+Users: libraw1394
+ libdc1394
+ tools like jujuutils, fwhack, ...
diff --git a/Documentation/ABI/stable/o2cb b/Documentation/ABI/stable/o2cb
new file mode 100644
index 000000000..5eb1545e0
--- /dev/null
+++ b/Documentation/ABI/stable/o2cb
@@ -0,0 +1,10 @@
+What: /sys/fs/o2cb/ (was /sys/o2cb)
+Date: Dec 2005
+KernelVersion: 2.6.16
+Contact: ocfs2-devel@oss.oracle.com
+Description: Ocfs2-tools looks at 'interface-revision' for versioning
+ information. Each logmask/ file controls a set of debug prints
+ and can be written into with the strings "allow", "deny", or
+ "off". Reading the file returns the current state.
+Users: ocfs2-tools. It's sufficient to mail proposed changes to
+ ocfs2-devel@oss.oracle.com.
diff --git a/Documentation/ABI/stable/syscalls b/Documentation/ABI/stable/syscalls
new file mode 100644
index 000000000..c3ae3e7d6
--- /dev/null
+++ b/Documentation/ABI/stable/syscalls
@@ -0,0 +1,10 @@
+What: The kernel syscall interface
+Description:
+ This interface matches much of the POSIX interface and is based
+ on it and other Unix based interfaces. It will only be added to
+ over time, and not have things removed from it.
+
+ Note that this interface is different for every architecture
+ that Linux supports. Please see the architecture-specific
+ documentation for details on the syscall numbers that are to be
+ mapped to each syscall.
diff --git a/Documentation/ABI/stable/sysfs-acpi-pmprofile b/Documentation/ABI/stable/sysfs-acpi-pmprofile
new file mode 100644
index 000000000..964c7a8af
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-acpi-pmprofile
@@ -0,0 +1,22 @@
+What: /sys/firmware/acpi/pm_profile
+Date: 03-Nov-2011
+KernelVersion: v3.2
+Contact: linux-acpi@vger.kernel.org
+Description: The ACPI pm_profile sysfs interface exports the platform
+ power management (and performance) requirement expectations
+ as provided by BIOS. The integer value is directly passed as
+ retrieved from the FADT ACPI table.
+Values: For possible values see ACPI specification:
+ 5.2.9 Fixed ACPI Description Table (FADT)
+ Field: Preferred_PM_Profile
+
+ Currently these values are defined by spec:
+ 0 Unspecified
+ 1 Desktop
+ 2 Mobile
+ 3 Workstation
+ 4 Enterprise Server
+ 5 SOHO Server
+ 6 Appliance PC
+ 7 Performance Server
+ >7 Reserved
diff --git a/Documentation/ABI/stable/sysfs-bus-firewire b/Documentation/ABI/stable/sysfs-bus-firewire
new file mode 100644
index 000000000..41e5a0cd1
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-firewire
@@ -0,0 +1,133 @@
+What: /sys/bus/firewire/devices/fw[0-9]+/
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 node device attributes.
+ Read-only. Mutable during the node device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ config_rom
+ Contents of the Configuration ROM register.
+ Binary attribute; an array of host-endian u32.
+
+ guid
+ The node's EUI-64 in the bus information block of
+ Configuration ROM.
+ Hexadecimal string representation of an u64.
+
+
+What: /sys/bus/firewire/devices/fw[0-9]+/units
+Date: June 2009
+KernelVersion: 2.6.31
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 node device attribute.
+ Read-only. Mutable during the node device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ units
+ Summary of all units present in an IEEE 1394 node.
+ Contains space-separated tuples of specifier_id and
+ version of each unit present in the node. Specifier_id
+ and version are hexadecimal string representations of
+ u24 of the respective unit directory entries.
+ Specifier_id and version within each tuple are separated
+ by a colon.
+
+Users: udev rules to set ownership and access permissions or ACLs of
+ /dev/fw[0-9]+ character device files
+
+
+What: /sys/bus/firewire/devices/fw[0-9]+/is_local
+Date: July 2012
+KernelVersion: 3.6
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 node device attribute.
+ Read-only and immutable.
+Values: 1: The sysfs entry represents a local node (a controller card).
+ 0: The sysfs entry represents a remote node.
+
+
+What: /sys/bus/firewire/devices/fw[0-9]+[.][0-9]+/
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ IEEE 1394 unit device attributes.
+ Read-only. Immutable during the unit device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ modalias
+ Same as MODALIAS in the uevent at device creation.
+
+ rom_index
+ Offset of the unit directory within the parent device's
+ (node device's) Configuration ROM, in quadlets.
+ Decimal string representation.
+
+
+What: /sys/bus/firewire/devices/*/
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ Attributes common to IEEE 1394 node devices and unit devices.
+ Read-only. Mutable during the node device's lifetime.
+ Immutable during the unit device's lifetime.
+ See IEEE 1212 for semantic definitions.
+
+ These attributes are only created if the root directory of an
+ IEEE 1394 node or the unit directory of an IEEE 1394 unit
+ actually contains according entries.
+
+ hardware_version
+ Hexadecimal string representation of an u24.
+
+ hardware_version_name
+ Contents of a respective textual descriptor leaf.
+
+ model
+ Hexadecimal string representation of an u24.
+
+ model_name
+ Contents of a respective textual descriptor leaf.
+
+ specifier_id
+ Hexadecimal string representation of an u24.
+ Mandatory in unit directories according to IEEE 1212.
+
+ vendor
+ Hexadecimal string representation of an u24.
+ Mandatory in the root directory according to IEEE 1212.
+
+ vendor_name
+ Contents of a respective textual descriptor leaf.
+
+ version
+ Hexadecimal string representation of an u24.
+ Mandatory in unit directories according to IEEE 1212.
+
+
+What: /sys/bus/firewire/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
+ formerly
+ /sys/bus/ieee1394/drivers/sbp2/fw*/host*/target*/*:*:*:*/ieee1394_id
+Date: Feb 2004
+KernelVersion: 2.6.4
+Contact: linux1394-devel@lists.sourceforge.net
+Description:
+ SCSI target port identifier and logical unit identifier of a
+ logical unit of an SBP-2 target. The identifiers are specified
+ in SAM-2...SAM-4 annex A. They are persistent and world-wide
+ unique properties the SBP-2 attached target.
+
+ Read-only attribute, immutable during the target's lifetime.
+ Format, as exposed by firewire-sbp2 since 2.6.22, May 2007:
+ Colon-separated hexadecimal string representations of
+ u64 EUI-64 : u24 directory_ID : u16 LUN
+ without 0x prefixes, without whitespace. The former sbp2 driver
+ (removed in 2.6.37 after being superseded by firewire-sbp2) used
+ a somewhat shorter format which was not as close to SAM.
+
+Users: udev rules to create /dev/disk/by-id/ symlinks
diff --git a/Documentation/ABI/stable/sysfs-bus-usb b/Documentation/ABI/stable/sysfs-bus-usb
new file mode 100644
index 000000000..831f15d96
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-usb
@@ -0,0 +1,140 @@
+What: /sys/bus/usb/devices/.../power/persist
+Date: May 2007
+KernelVersion: 2.6.23
+Contact: Alan Stern <stern@rowland.harvard.edu>
+Description:
+ USB device directories can contain a file named power/persist.
+ The file holds a boolean value (0 or 1) indicating whether or
+ not the "USB-Persist" facility is enabled for the device. For
+ hubs this facility is always enabled and their device
+ directories will not contain this file.
+
+ For more information, see Documentation/usb/persist.txt.
+
+What: /sys/bus/usb/devices/.../power/autosuspend
+Date: March 2007
+KernelVersion: 2.6.21
+Contact: Alan Stern <stern@rowland.harvard.edu>
+Description:
+ Each USB device directory will contain a file named
+ power/autosuspend. This file holds the time (in seconds)
+ the device must be idle before it will be autosuspended.
+ 0 means the device will be autosuspended as soon as
+ possible. Negative values will prevent the device from
+ being autosuspended at all, and writing a negative value
+ will resume the device if it is already suspended.
+
+ The autosuspend delay for newly-created devices is set to
+ the value of the usbcore.autosuspend module parameter.
+
+What: /sys/bus/usb/device/.../power/connected_duration
+Date: January 2008
+KernelVersion: 2.6.25
+Contact: Sarah Sharp <sarah.a.sharp@intel.com>
+Description:
+ If CONFIG_PM is enabled, then this file is present. When read,
+ it returns the total time (in msec) that the USB device has been
+ connected to the machine. This file is read-only.
+Users:
+ PowerTOP <powertop@lists.01.org>
+ https://01.org/powertop/
+
+What: /sys/bus/usb/device/.../power/active_duration
+Date: January 2008
+KernelVersion: 2.6.25
+Contact: Sarah Sharp <sarah.a.sharp@intel.com>
+Description:
+ If CONFIG_PM is enabled, then this file is present. When read,
+ it returns the total time (in msec) that the USB device has been
+ active, i.e. not in a suspended state. This file is read-only.
+
+ Tools can use this file and the connected_duration file to
+ compute the percentage of time that a device has been active.
+ For example,
+ echo $((100 * `cat active_duration` / `cat connected_duration`))
+ will give an integer percentage. Note that this does not
+ account for counter wrap.
+Users:
+ PowerTOP <powertop@lists.01.org>
+ https://01.org/powertop/
+
+What: /sys/bus/usb/devices/<busnum>-<port[.port]>...:<config num>-<interface num>/supports_autosuspend
+Date: January 2008
+KernelVersion: 2.6.27
+Contact: Sarah Sharp <sarah.a.sharp@intel.com>
+Description:
+ When read, this file returns 1 if the interface driver
+ for this interface supports autosuspend. It also
+ returns 1 if no driver has claimed this interface, as an
+ unclaimed interface will not stop the device from being
+ autosuspended if all other interface drivers are idle.
+ The file returns 0 if autosuspend support has not been
+ added to the driver.
+Users:
+ USB PM tool
+ git://git.moblin.org/users/sarah/usb-pm-tool/
+
+What: /sys/bus/usb/device/.../avoid_reset_quirk
+Date: December 2009
+Contact: Oliver Neukum <oliver@neukum.org>
+Description:
+ Writing 1 to this file tells the kernel that this
+ device will morph into another mode when it is reset.
+ Drivers will not use reset for error handling for
+ such devices.
+Users:
+ usb_modeswitch
+
+What: /sys/bus/usb/devices/.../devnum
+KernelVersion: since at least 2.6.18
+Description:
+ Device address on the USB bus.
+Users:
+ libusb
+
+What: /sys/bus/usb/devices/.../bConfigurationValue
+KernelVersion: since at least 2.6.18
+Description:
+ bConfigurationValue of the *active* configuration for the
+ device. Writing 0 or -1 to bConfigurationValue will reset the
+ active configuration (unconfigure the device). Writing
+ another value will change the active configuration.
+
+ Note that some devices, in violation of the USB spec, have a
+ configuration with a value equal to 0. Writing 0 to
+ bConfigurationValue for these devices will install that
+ configuration, rather then unconfigure the device.
+
+ Writing -1 will always unconfigure the device.
+Users:
+ libusb
+
+What: /sys/bus/usb/devices/.../busnum
+KernelVersion: 2.6.22
+Description:
+ Bus-number of the USB-bus the device is connected to.
+Users:
+ libusb
+
+What: /sys/bus/usb/devices/.../descriptors
+KernelVersion: 2.6.26
+Description:
+ Binary file containing cached descriptors of the device. The
+ binary data consists of the device descriptor followed by the
+ descriptors for each configuration of the device.
+ Note that the wTotalLength of the config descriptors can not
+ be trusted, as the device may have a smaller config descriptor
+ than it advertises. The bLength field of each (sub) descriptor
+ can be trusted, and can be used to seek forward one (sub)
+ descriptor at a time until the next config descriptor is found.
+ All descriptors read from this file are in bus-endian format
+Users:
+ libusb
+
+What: /sys/bus/usb/devices/.../speed
+KernelVersion: since at least 2.6.18
+Description:
+ Speed the device is connected with to the usb-host in
+ Mbit / second. IE one of 1.5 / 12 / 480 / 5000.
+Users:
+ libusb
diff --git a/Documentation/ABI/stable/sysfs-bus-xen-backend b/Documentation/ABI/stable/sysfs-bus-xen-backend
new file mode 100644
index 000000000..3d5951c8b
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-bus-xen-backend
@@ -0,0 +1,75 @@
+What: /sys/bus/xen-backend/devices/*/devtype
+Date: Feb 2009
+KernelVersion: 2.6.38
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The type of the device. e.g., one of: 'vbd' (block),
+ 'vif' (network), or 'vfb' (framebuffer).
+
+What: /sys/bus/xen-backend/devices/*/nodename
+Date: Feb 2009
+KernelVersion: 2.6.38
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ XenStore node (under /local/domain/NNN/) for this
+ backend device.
+
+What: /sys/bus/xen-backend/devices/vbd-*/physical_device
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The major:minor number (in hexidecimal) of the
+ physical device providing the storage for this backend
+ block device.
+
+What: /sys/bus/xen-backend/devices/vbd-*/mode
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Whether the block device is read-only ('r') or
+ read-write ('w').
+
+What: /sys/bus/xen-backend/devices/vbd-*/statistics/f_req
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Number of flush requests from the frontend.
+
+What: /sys/bus/xen-backend/devices/vbd-*/statistics/oo_req
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Number of requests delayed because the backend was too
+ busy processing previous requests.
+
+What: /sys/bus/xen-backend/devices/vbd-*/statistics/rd_req
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Number of read requests from the frontend.
+
+What: /sys/bus/xen-backend/devices/vbd-*/statistics/rd_sect
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Number of sectors read by the frontend.
+
+What: /sys/bus/xen-backend/devices/vbd-*/statistics/wr_req
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Number of write requests from the frontend.
+
+What: /sys/bus/xen-backend/devices/vbd-*/statistics/wr_sect
+Date: April 2011
+KernelVersion: 3.0
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Number of sectors written by the frontend.
diff --git a/Documentation/ABI/stable/sysfs-class-backlight b/Documentation/ABI/stable/sysfs-class-backlight
new file mode 100644
index 000000000..70302f370
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-backlight
@@ -0,0 +1,56 @@
+What: /sys/class/backlight/<backlight>/bl_power
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Control BACKLIGHT power, values are FB_BLANK_* from fb.h
+ - FB_BLANK_UNBLANK (0) : power on.
+ - FB_BLANK_POWERDOWN (4) : power off
+Users: HAL
+
+What: /sys/class/backlight/<backlight>/brightness
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Control the brightness for this <backlight>. Values
+ are between 0 and max_brightness. This file will also
+ show the brightness level stored in the driver, which
+ may not be the actual brightness (see actual_brightness).
+Users: HAL
+
+What: /sys/class/backlight/<backlight>/actual_brightness
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Show the actual brightness by querying the hardware.
+Users: HAL
+
+What: /sys/class/backlight/<backlight>/max_brightness
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Maximum brightness for <backlight>.
+Users: HAL
+
+What: /sys/class/backlight/<backlight>/type
+Date: September 2010
+KernelVersion: 2.6.37
+Contact: Matthew Garrett <mjg@redhat.com>
+Description:
+ The type of interface controlled by <backlight>.
+ "firmware": The driver uses a standard firmware interface
+ "platform": The driver uses a platform-specific interface
+ "raw": The driver controls hardware registers directly
+
+ In the general case, when multiple backlight
+ interfaces are available for a single device, firmware
+ control should be preferred to platform control should
+ be preferred to raw control. Using a firmware
+ interface reduces the probability of confusion with
+ the hardware and the OS independently updating the
+ backlight state. Platform interfaces are mostly a
+ holdover from pre-standardisation of firmware
+ interfaces.
diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
new file mode 100644
index 000000000..097f522c3
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -0,0 +1,67 @@
+rfkill - radio frequency (RF) connector kill switch support
+
+For details to this subsystem look at Documentation/rfkill.txt.
+
+For the deprecated /sys/class/rfkill/*/state and
+/sys/class/rfkill/*/claim knobs of this interface look in
+Documentation/ABI/obsolete/sysfs-class-rfkill.
+
+What: /sys/class/rfkill
+Date: 09-Jul-2007
+KernelVersion: v2.6.22
+Contact: linux-wireless@vger.kernel.org,
+Description: The rfkill class subsystem folder.
+ Each registered rfkill driver is represented by an rfkillX
+ subfolder (X being an integer > 0).
+
+
+What: /sys/class/rfkill/rfkill[0-9]+/name
+Date: 09-Jul-2007
+KernelVersion v2.6.22
+Contact: linux-wireless@vger.kernel.org
+Description: Name assigned by driver to this key (interface or driver name).
+Values: arbitrary string.
+
+
+What: /sys/class/rfkill/rfkill[0-9]+/type
+Date: 09-Jul-2007
+KernelVersion v2.6.22
+Contact: linux-wireless@vger.kernel.org
+Description: Driver type string ("wlan", "bluetooth", etc).
+Values: See include/linux/rfkill.h.
+
+
+What: /sys/class/rfkill/rfkill[0-9]+/persistent
+Date: 09-Jul-2007
+KernelVersion v2.6.22
+Contact: linux-wireless@vger.kernel.org
+Description: Whether the soft blocked state is initialised from non-volatile
+ storage at startup.
+Values: A numeric value.
+ 0: false
+ 1: true
+
+
+What: /sys/class/rfkill/rfkill[0-9]+/hard
+Date: 12-March-2010
+KernelVersion v2.6.34
+Contact: linux-wireless@vger.kernel.org
+Description: Current hardblock state. This file is read only.
+Values: A numeric value.
+ 0: inactive
+ The transmitter is (potentially) active.
+ 1: active
+ The transmitter is forced off by something outside of
+ the driver's control.
+
+
+What: /sys/class/rfkill/rfkill[0-9]+/soft
+Date: 12-March-2010
+KernelVersion v2.6.34
+Contact: linux-wireless@vger.kernel.org
+Description: Current softblock state. This file is read and write.
+Values: A numeric value.
+ 0: inactive
+ The transmitter is (potentially) active.
+ 1: active
+ The transmitter is turned off by software.
diff --git a/Documentation/ABI/stable/sysfs-class-tpm b/Documentation/ABI/stable/sysfs-class-tpm
new file mode 100644
index 000000000..9f790eebb
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-tpm
@@ -0,0 +1,185 @@
+What: /sys/class/tpm/tpmX/device/
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: tpmdd-devel@lists.sf.net
+Description: The device/ directory under a specific TPM instance exposes
+ the properties of that TPM chip
+
+
+What: /sys/class/tpm/tpmX/device/active
+Date: April 2006
+KernelVersion: 2.6.17
+Contact: tpmdd-devel@lists.sf.net
+Description: The "active" property prints a '1' if the TPM chip is accepting
+ commands. An inactive TPM chip still contains all the state of
+ an active chip (Storage Root Key, NVRAM, etc), and can be
+ visible to the OS, but will only accept a restricted set of
+ commands. See the TPM Main Specification part 2, Structures,
+ section 17 for more information on which commands are
+ available.
+
+What: /sys/class/tpm/tpmX/device/cancel
+Date: June 2005
+KernelVersion: 2.6.13
+Contact: tpmdd-devel@lists.sf.net
+Description: The "cancel" property allows you to cancel the currently
+ pending TPM command. Writing any value to cancel will call the
+ TPM vendor specific cancel operation.
+
+What: /sys/class/tpm/tpmX/device/caps
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: tpmdd-devel@lists.sf.net
+Description: The "caps" property contains TPM manufacturer and version info.
+
+ Example output:
+
+ Manufacturer: 0x53544d20
+ TCG version: 1.2
+ Firmware version: 8.16
+
+ Manufacturer is a hex dump of the 4 byte manufacturer info
+ space in a TPM. TCG version shows the TCG TPM spec level that
+ the chip supports. Firmware version is that of the chip and
+ is manufacturer specific.
+
+What: /sys/class/tpm/tpmX/device/durations
+Date: March 2011
+KernelVersion: 3.1
+Contact: tpmdd-devel@lists.sf.net
+Description: The "durations" property shows the 3 vendor-specific values
+ used to wait for a short, medium and long TPM command. All
+ TPM commands are categorized as short, medium or long in
+ execution time, so that the driver doesn't have to wait
+ any longer than necessary before starting to poll for a
+ result.
+
+ Example output:
+
+ 3015000 4508000 180995000 [original]
+
+ Here the short, medium and long durations are displayed in
+ usecs. "[original]" indicates that the values are displayed
+ unmodified from when they were queried from the chip.
+ Durations can be modified in the case where a buggy chip
+ reports them in msec instead of usec and they need to be
+ scaled to be displayed in usecs. In this case "[adjusted]"
+ will be displayed in place of "[original]".
+
+What: /sys/class/tpm/tpmX/device/enabled
+Date: April 2006
+KernelVersion: 2.6.17
+Contact: tpmdd-devel@lists.sf.net
+Description: The "enabled" property prints a '1' if the TPM chip is enabled,
+ meaning that it should be visible to the OS. This property
+ may be visible but produce a '0' after some operation that
+ disables the TPM.
+
+What: /sys/class/tpm/tpmX/device/owned
+Date: April 2006
+KernelVersion: 2.6.17
+Contact: tpmdd-devel@lists.sf.net
+Description: The "owned" property produces a '1' if the TPM_TakeOwnership
+ ordinal has been executed successfully in the chip. A '0'
+ indicates that ownership hasn't been taken.
+
+What: /sys/class/tpm/tpmX/device/pcrs
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: tpmdd-devel@lists.sf.net
+Description: The "pcrs" property will dump the current value of all Platform
+ Configuration Registers in the TPM. Note that since these
+ values may be constantly changing, the output is only valid
+ for a snapshot in time.
+
+ Example output:
+
+ PCR-00: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-01: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-02: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-03: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ PCR-04: 3A 3F 78 0F 11 A4 B4 99 69 FC AA 80 CD 6E 39 57 C3 3B 22 75
+ ...
+
+ The number of PCRs and hex bytes needed to represent a PCR
+ value will vary depending on TPM chip version. For TPM 1.1 and
+ 1.2 chips, PCRs represent SHA-1 hashes, which are 20 bytes
+ long. Use the "caps" property to determine TPM version.
+
+What: /sys/class/tpm/tpmX/device/pubek
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: tpmdd-devel@lists.sf.net
+Description: The "pubek" property will return the TPM's public endorsement
+ key if possible. If the TPM has had ownership established and
+ is version 1.2, the pubek will not be available without the
+ owner's authorization. Since the TPM driver doesn't store any
+ secrets, it can't authorize its own request for the pubek,
+ making it unaccessible. The public endorsement key is gener-
+ ated at TPM menufacture time and exists for the life of the
+ chip.
+
+ Example output:
+
+ Algorithm: 00 00 00 01
+ Encscheme: 00 03
+ Sigscheme: 00 01
+ Parameters: 00 00 08 00 00 00 00 02 00 00 00 00
+ Modulus length: 256
+ Modulus:
+ B4 76 41 82 C9 20 2C 10 18 40 BC 8B E5 44 4C 6C
+ 3A B2 92 0C A4 9B 2A 83 EB 5C 12 85 04 48 A0 B6
+ 1E E4 81 84 CE B2 F2 45 1C F0 85 99 61 02 4D EB
+ 86 C4 F7 F3 29 60 52 93 6B B2 E5 AB 8B A9 09 E3
+ D7 0E 7D CA 41 BF 43 07 65 86 3C 8C 13 7A D0 8B
+ 82 5E 96 0B F8 1F 5F 34 06 DA A2 52 C1 A9 D5 26
+ 0F F4 04 4B D9 3F 2D F2 AC 2F 74 64 1F 8B CD 3E
+ 1E 30 38 6C 70 63 69 AB E2 50 DF 49 05 2E E1 8D
+ 6F 78 44 DA 57 43 69 EE 76 6C 38 8A E9 8E A3 F0
+ A7 1F 3C A8 D0 12 15 3E CA 0E BD FA 24 CD 33 C6
+ 47 AE A4 18 83 8E 22 39 75 93 86 E6 FD 66 48 B6
+ 10 AD 94 14 65 F9 6A 17 78 BD 16 53 84 30 BF 70
+ E0 DC 65 FD 3C C6 B0 1E BF B9 C1 B5 6C EF B1 3A
+ F8 28 05 83 62 26 11 DC B4 6B 5A 97 FF 32 26 B6
+ F7 02 71 CF 15 AE 16 DD D1 C1 8E A8 CF 9B 50 7B
+ C3 91 FF 44 1E CF 7C 39 FE 17 77 21 20 BD CE 9B
+
+ Possible values:
+
+ Algorithm: TPM_ALG_RSA (1)
+ Encscheme: TPM_ES_RSAESPKCSv15 (2)
+ TPM_ES_RSAESOAEP_SHA1_MGF1 (3)
+ Sigscheme: TPM_SS_NONE (1)
+ Parameters, a byte string of 3 u32 values:
+ Key Length (bits): 00 00 08 00 (2048)
+ Num primes: 00 00 00 02 (2)
+ Exponent Size: 00 00 00 00 (0 means the
+ default exp)
+ Modulus Length: 256 (bytes)
+ Modulus: The 256 byte Endorsement Key modulus
+
+What: /sys/class/tpm/tpmX/device/temp_deactivated
+Date: April 2006
+KernelVersion: 2.6.17
+Contact: tpmdd-devel@lists.sf.net
+Description: The "temp_deactivated" property returns a '1' if the chip has
+ been temporarily dectivated, usually until the next power
+ cycle. Whether a warm boot (reboot) will clear a TPM chip
+ from a temp_deactivated state is platform specific.
+
+What: /sys/class/tpm/tpmX/device/timeouts
+Date: March 2011
+KernelVersion: 3.1
+Contact: tpmdd-devel@lists.sf.net
+Description: The "timeouts" property shows the 4 vendor-specific values
+ for the TPM's interface spec timeouts. The use of these
+ timeouts is defined by the TPM interface spec that the chip
+ conforms to.
+
+ Example output:
+
+ 750000 750000 750000 750000 [original]
+
+ The four timeout values are shown in usecs, with a trailing
+ "[original]" or "[adjusted]" depending on whether the values
+ were scaled by the driver to be reported in usec from msecs.
diff --git a/Documentation/ABI/stable/sysfs-class-ubi b/Documentation/ABI/stable/sysfs-class-ubi
new file mode 100644
index 000000000..18d471d9f
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-ubi
@@ -0,0 +1,212 @@
+What: /sys/class/ubi/
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ The ubi/ class sub-directory belongs to the UBI subsystem and
+ provides general UBI information, per-UBI device information
+ and per-UBI volume information.
+
+What: /sys/class/ubi/version
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ This file contains version of the latest supported UBI on-media
+ format. Currently it is 1, and there is no plan to change this.
+ However, if in the future UBI needs on-flash format changes
+ which cannot be done in a compatible manner, a new format
+ version will be added. So this is a mechanism for possible
+ future backward-compatible (but forward-incompatible)
+ improvements.
+
+What: /sys/class/ubiX/
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ The /sys/class/ubi0, /sys/class/ubi1, etc directories describe
+ UBI devices (UBI device 0, 1, etc). They contain general UBI
+ device information and per UBI volume information (each UBI
+ device may have many UBI volumes)
+
+What: /sys/class/ubi/ubiX/avail_eraseblocks
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Amount of available logical eraseblock. For example, one may
+ create a new UBI volume which has this amount of logical
+ eraseblocks.
+
+What: /sys/class/ubi/ubiX/bad_peb_count
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Count of bad physical eraseblocks on the underlying MTD device.
+
+What: /sys/class/ubi/ubiX/bgt_enabled
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Contains ASCII "0\n" if the UBI background thread is disabled,
+ and ASCII "1\n" if it is enabled.
+
+What: /sys/class/ubi/ubiX/dev
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Major and minor numbers of the character device corresponding
+ to this UBI device (in <major>:<minor> format).
+
+What: /sys/class/ubi/ubiX/eraseblock_size
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Maximum logical eraseblock size this UBI device may provide. UBI
+ volumes may have smaller logical eraseblock size because of their
+ alignment.
+
+What: /sys/class/ubi/ubiX/max_ec
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Maximum physical eraseblock erase counter value.
+
+What: /sys/class/ubi/ubiX/max_vol_count
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Maximum number of volumes which this UBI device may have.
+
+What: /sys/class/ubi/ubiX/min_io_size
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Minimum input/output unit size. All the I/O may only be done
+ in fractions of the contained number.
+
+What: /sys/class/ubi/ubiX/mtd_num
+Date: January 2008
+KernelVersion: 2.6.25
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Number of the underlying MTD device.
+
+What: /sys/class/ubi/ubiX/reserved_for_bad
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Number of physical eraseblocks reserved for bad block handling.
+
+What: /sys/class/ubi/ubiX/total_eraseblocks
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Total number of good (not marked as bad) physical eraseblocks on
+ the underlying MTD device.
+
+What: /sys/class/ubi/ubiX/volumes_count
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Count of volumes on this UBI device.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ The /sys/class/ubi/ubiX/ubiX_0/, /sys/class/ubi/ubiX/ubiX_1/,
+ etc directories describe UBI volumes on UBI device X (volumes
+ 0, 1, etc).
+
+What: /sys/class/ubi/ubiX/ubiX_Y/alignment
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Volume alignment - the value the logical eraseblock size of
+ this volume has to be aligned on. For example, 2048 means that
+ logical eraseblock size is multiple of 2048. In other words,
+ volume logical eraseblock size is UBI device logical eraseblock
+ size aligned to the alignment value.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/corrupted
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Contains ASCII "0\n" if the UBI volume is OK, and ASCII "1\n"
+ if it is corrupted (e.g., due to an interrupted volume update).
+
+What: /sys/class/ubi/ubiX/ubiX_Y/data_bytes
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ The amount of data this volume contains. This value makes sense
+ only for static volumes, and for dynamic volume it equivalent
+ to the total volume size in bytes.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/dev
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Major and minor numbers of the character device corresponding
+ to this UBI volume (in <major>:<minor> format).
+
+What: /sys/class/ubi/ubiX/ubiX_Y/name
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Volume name.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/reserved_ebs
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Count of physical eraseblock reserved for this volume.
+ Equivalent to the volume size in logical eraseblocks.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/type
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Volume type. Contains ASCII "dynamic\n" for dynamic volumes and
+ "static\n" for static volumes.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/upd_marker
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Contains ASCII "0\n" if the update marker is not set for this
+ volume, and "1\n" if it is set. The update marker is set when
+ volume update starts, and cleaned when it ends. So the presence
+ of the update marker indicates that the volume is being updated
+ at the moment of the update was interrupted. The later may be
+ checked using the "corrupted" sysfs file.
+
+What: /sys/class/ubi/ubiX/ubiX_Y/usable_eb_size
+Date: July 2006
+KernelVersion: 2.6.22
+Contact: Artem Bityutskiy <dedekind@infradead.org>
+Description:
+ Logical eraseblock size of this volume. Equivalent to logical
+ eraseblock size of the device aligned on the volume alignment
+ value.
diff --git a/Documentation/ABI/stable/sysfs-class-udc b/Documentation/ABI/stable/sysfs-class-udc
new file mode 100644
index 000000000..85d3dac2e
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-class-udc
@@ -0,0 +1,93 @@
+What: /sys/class/udc/<udc>/a_alt_hnp_support
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates if an OTG A-Host supports HNP at an alternate port.
+Users:
+
+What: /sys/class/udc/<udc>/a_hnp_support
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates if an OTG A-Host supports HNP at this port.
+Users:
+
+What: /sys/class/udc/<udc>/b_hnp_enable
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates if an OTG A-Host enabled HNP support.
+Users:
+
+What: /sys/class/udc/<udc>/current_speed
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates the current negotiated speed at this port.
+Users:
+
+What: /sys/class/udc/<udc>/is_a_peripheral
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates that this port is the default Host on an OTG session
+ but HNP was used to switch roles.
+Users:
+
+What: /sys/class/udc/<udc>/is_otg
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates that this port support OTG.
+Users:
+
+What: /sys/class/udc/<udc>/maximum_speed
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates the maximum USB speed supported by this port.
+Users:
+
+What: /sys/class/udc/<udc>/maximum_speed
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates the maximum USB speed supported by this port.
+Users:
+
+What: /sys/class/udc/<udc>/soft_connect
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Allows users to disconnect data pullup resistors thus causing a
+ logical disconnection from the USB Host.
+Users:
+
+What: /sys/class/udc/<udc>/srp
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Allows users to manually start Session Request Protocol.
+Users:
+
+What: /sys/class/udc/<udc>/state
+Date: June 2011
+KernelVersion: 3.1
+Contact: Felipe Balbi <balbi@kernel.org>
+Description:
+ Indicates current state of the USB Device Controller. Valid
+ states are: 'not-attached', 'attached', 'powered',
+ 'reconnecting', 'unauthenticated', 'default', 'addressed',
+ 'configured', and 'suspended'; however not all USB Device
+ Controllers support reporting all states.
+Users:
diff --git a/Documentation/ABI/stable/sysfs-devices b/Documentation/ABI/stable/sysfs-devices
new file mode 100644
index 000000000..43f78b88d
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices
@@ -0,0 +1,10 @@
+# Note: This documents additional properties of any device beyond what
+# is documented in Documentation/sysfs-rules.txt
+
+What: /sys/devices/*/of_path
+Date: February 2015
+Contact: Device Tree mailing list <devicetree@vger.kernel.org>
+Description:
+ Any device associated with a device-tree node will have
+ an of_path symlink pointing to the corresponding device
+ node in /sys/firmware/devicetree/
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
new file mode 100644
index 000000000..5b2d0f088
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -0,0 +1,93 @@
+What: /sys/devices/system/node/possible
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Nodes that could be possibly become online at some point.
+
+What: /sys/devices/system/node/online
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Nodes that are online.
+
+What: /sys/devices/system/node/has_normal_memory
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Nodes that have regular memory.
+
+What: /sys/devices/system/node/has_cpu
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Nodes that have one or more CPUs.
+
+What: /sys/devices/system/node/has_high_memory
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Nodes that have regular or high memory.
+ Depends on CONFIG_HIGHMEM.
+
+What: /sys/devices/system/node/nodeX
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ When CONFIG_NUMA is enabled, this is a directory containing
+ information on node X such as what CPUs are local to the
+ node. Each file is detailed next.
+
+What: /sys/devices/system/node/nodeX/cpumap
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ The node's cpumap.
+
+What: /sys/devices/system/node/nodeX/cpulist
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ The CPUs associated to the node.
+
+What: /sys/devices/system/node/nodeX/meminfo
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Provides information about the node's distribution and memory
+ utilization. Similar to /proc/meminfo, see Documentation/filesystems/proc.txt
+
+What: /sys/devices/system/node/nodeX/numastat
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ The node's hit/miss statistics, in units of pages.
+ See Documentation/numastat.txt
+
+What: /sys/devices/system/node/nodeX/distance
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ Distance between the node and all the other nodes
+ in the system.
+
+What: /sys/devices/system/node/nodeX/vmstat
+Date: October 2002
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ The node's zoned virtual memory statistics.
+ This is a superset of numastat.
+
+What: /sys/devices/system/node/nodeX/compact
+Date: February 2010
+Contact: Mel Gorman <mel@csn.ul.ie>
+Description:
+ When this file is written to, all memory within that node
+ will be compacted. When it completes, memory will be freed
+ into blocks which have as many contiguous pages as possible
+
+What: /sys/devices/system/node/nodeX/hugepages/hugepages-<size>/
+Date: December 2009
+Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
+Description:
+ The node's huge page size control/query attributes.
+ See Documentation/vm/hugetlbpage.txt \ No newline at end of file
diff --git a/Documentation/ABI/stable/sysfs-devices-system-cpu b/Documentation/ABI/stable/sysfs-devices-system-cpu
new file mode 100644
index 000000000..33c133e2a
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-cpu
@@ -0,0 +1,25 @@
+What: /sys/devices/system/cpu/dscr_default
+Date: 13-May-2014
+KernelVersion: v3.15.0
+Contact:
+Description: Writes are equivalent to writing to
+ /sys/devices/system/cpu/cpuN/dscr on all CPUs.
+ Reads return the last written value or 0.
+ This value is not a global default: it is a way to set
+ all per-CPU defaults at the same time.
+Values: 64 bit unsigned integer (bit field)
+
+What: /sys/devices/system/cpu/cpu[0-9]+/dscr
+Date: 13-May-2014
+KernelVersion: v3.15.0
+Contact:
+Description: Default value for the Data Stream Control Register (DSCR) on
+ a CPU.
+ This default value is used when the kernel is executing and
+ for any process that has not set the DSCR itself.
+ If a process ever sets the DSCR (via direct access to the
+ SPR) that value will be persisted for that process and used
+ on any CPU where it executes (overriding the value described
+ here).
+ If set by a process it will be inherited by child processes.
+Values: 64 bit unsigned integer (bit field)
diff --git a/Documentation/ABI/stable/sysfs-devices-system-xen_memory b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
new file mode 100644
index 000000000..caa311d59
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-devices-system-xen_memory
@@ -0,0 +1,77 @@
+What: /sys/devices/system/xen_memory/xen_memory0/max_retry_count
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The maximum number of times the balloon driver will
+ attempt to increase the balloon before giving up. See
+ also 'retry_count' below.
+ A value of zero means retry forever and is the default one.
+
+What: /sys/devices/system/xen_memory/xen_memory0/max_schedule_delay
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The limit that 'schedule_delay' (see below) will be
+ increased to. The default value is 32 seconds.
+
+What: /sys/devices/system/xen_memory/xen_memory0/retry_count
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The current number of times that the balloon driver
+ has attempted to increase the size of the balloon.
+ The default value is one. With max_retry_count being
+ zero (unlimited), this means that the driver will attempt
+ to retry with a 'schedule_delay' delay.
+
+What: /sys/devices/system/xen_memory/xen_memory0/schedule_delay
+Date: May 2011
+KernelVersion: 2.6.39
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The time (in seconds) to wait between attempts to
+ increase the balloon. Each time the balloon cannot be
+ increased, 'schedule_delay' is increased (until
+ 'max_schedule_delay' is reached at which point it
+ will use the max value).
+
+What: /sys/devices/system/xen_memory/xen_memory0/target
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ The target number of pages to adjust this domain's
+ memory reservation to.
+
+What: /sys/devices/system/xen_memory/xen_memory0/target_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ As target above, except the value is in KiB.
+
+What: /sys/devices/system/xen_memory/xen_memory0/info/current_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Current size (in KiB) of this domain's memory
+ reservation.
+
+What: /sys/devices/system/xen_memory/xen_memory0/info/high_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Amount (in KiB) of high memory in the balloon.
+
+What: /sys/devices/system/xen_memory/xen_memory0/info/low_kb
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Amount (in KiB) of low (or normal) memory in the
+ balloon.
diff --git a/Documentation/ABI/stable/sysfs-driver-ib_srp b/Documentation/ABI/stable/sysfs-driver-ib_srp
new file mode 100644
index 000000000..7049a2b50
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-ib_srp
@@ -0,0 +1,189 @@
+What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/add_target
+Date: January 2, 2006
+KernelVersion: 2.6.15
+Contact: linux-rdma@vger.kernel.org
+Description: Interface for making ib_srp connect to a new target.
+ One can request ib_srp to connect to a new target by writing
+ a comma-separated list of login parameters to this sysfs
+ attribute. The supported parameters are:
+ * id_ext, a 16-digit hexadecimal number specifying the eight
+ byte identifier extension in the 16-byte SRP target port
+ identifier. The target port identifier is sent by ib_srp
+ to the target in the SRP_LOGIN_REQ request.
+ * ioc_guid, a 16-digit hexadecimal number specifying the eight
+ byte I/O controller GUID portion of the 16-byte target port
+ identifier.
+ * dgid, a 32-digit hexadecimal number specifying the
+ destination GID.
+ * pkey, a four-digit hexadecimal number specifying the
+ InfiniBand partition key.
+ * service_id, a 16-digit hexadecimal number specifying the
+ InfiniBand service ID used to establish communication with
+ the SRP target. How to find out the value of the service ID
+ is specified in the documentation of the SRP target.
+ * max_sect, a decimal number specifying the maximum number of
+ 512-byte sectors to be transferred via a single SCSI command.
+ * max_cmd_per_lun, a decimal number specifying the maximum
+ number of outstanding commands for a single LUN.
+ * io_class, a hexadecimal number specifying the SRP I/O class.
+ Must be either 0xff00 (rev 10) or 0x0100 (rev 16a). The I/O
+ class defines the format of the SRP initiator and target
+ port identifiers.
+ * initiator_ext, a 16-digit hexadecimal number specifying the
+ identifier extension portion of the SRP initiator port
+ identifier. This data is sent by the initiator to the target
+ in the SRP_LOGIN_REQ request.
+ * cmd_sg_entries, a number in the range 1..255 that specifies
+ the maximum number of data buffer descriptors stored in the
+ SRP_CMD information unit itself. With allow_ext_sg=0 the
+ parameter cmd_sg_entries defines the maximum S/G list length
+ for a single SRP_CMD, and commands whose S/G list length
+ exceeds this limit after S/G list collapsing will fail.
+ * allow_ext_sg, whether ib_srp is allowed to include a partial
+ memory descriptor list in an SRP_CMD instead of the entire
+ list. If a partial memory descriptor list has been included
+ in an SRP_CMD the remaining memory descriptors are
+ communicated from initiator to target via an additional RDMA
+ transfer. Setting allow_ext_sg to 1 increases the maximum
+ amount of data that can be transferred between initiator and
+ target via a single SCSI command. Since not all SRP target
+ implementations support partial memory descriptor lists the
+ default value for this option is 0.
+ * sg_tablesize, a number in the range 1..2048 specifying the
+ maximum S/G list length the SCSI layer is allowed to pass to
+ ib_srp. Specifying a value that exceeds cmd_sg_entries is
+ only safe with partial memory descriptor list support enabled
+ (allow_ext_sg=1).
+ * comp_vector, a number in the range 0..n-1 specifying the
+ MSI-X completion vector of the first RDMA channel. Some
+ HCA's allocate multiple (n) MSI-X vectors per HCA port. If
+ the IRQ affinity masks of these interrupts have been
+ configured such that each MSI-X interrupt is handled by a
+ different CPU then the comp_vector parameter can be used to
+ spread the SRP completion workload over multiple CPU's.
+ * tl_retry_count, a number in the range 2..7 specifying the
+ IB RC retry count.
+ * queue_size, the maximum number of commands that the
+ initiator is allowed to queue per SCSI host. The default
+ value for this parameter is 62. The lowest supported value
+ is 2.
+
+What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/ibdev
+Date: January 2, 2006
+KernelVersion: 2.6.15
+Contact: linux-rdma@vger.kernel.org
+Description: HCA name (<hca>).
+
+What: /sys/class/infiniband_srp/srp-<hca>-<port_number>/port
+Date: January 2, 2006
+KernelVersion: 2.6.15
+Contact: linux-rdma@vger.kernel.org
+Description: HCA port number (<port_number>).
+
+What: /sys/class/scsi_host/host<n>/allow_ext_sg
+Date: May 19, 2011
+KernelVersion: 2.6.39
+Contact: linux-rdma@vger.kernel.org
+Description: Whether ib_srp is allowed to include a partial memory
+ descriptor list in an SRP_CMD when communicating with an SRP
+ target.
+
+What: /sys/class/scsi_host/host<n>/ch_count
+Date: April 1, 2015
+KernelVersion: 3.19
+Contact: linux-rdma@vger.kernel.org
+Description: Number of RDMA channels used for communication with the SRP
+ target.
+
+What: /sys/class/scsi_host/host<n>/cmd_sg_entries
+Date: May 19, 2011
+KernelVersion: 2.6.39
+Contact: linux-rdma@vger.kernel.org
+Description: Maximum number of data buffer descriptors that may be sent to
+ the target in a single SRP_CMD request.
+
+What: /sys/class/scsi_host/host<n>/comp_vector
+Date: September 2, 2013
+KernelVersion: 3.11
+Contact: linux-rdma@vger.kernel.org
+Description: Completion vector used for the first RDMA channel.
+
+What: /sys/class/scsi_host/host<n>/dgid
+Date: June 17, 2006
+KernelVersion: 2.6.17
+Contact: linux-rdma@vger.kernel.org
+Description: InfiniBand destination GID used for communication with the SRP
+ target. Differs from orig_dgid if port redirection has happened.
+
+What: /sys/class/scsi_host/host<n>/id_ext
+Date: June 17, 2006
+KernelVersion: 2.6.17
+Contact: linux-rdma@vger.kernel.org
+Description: Eight-byte identifier extension portion of the 16-byte target
+ port identifier.
+
+What: /sys/class/scsi_host/host<n>/ioc_guid
+Date: June 17, 2006
+KernelVersion: 2.6.17
+Contact: linux-rdma@vger.kernel.org
+Description: Eight-byte I/O controller GUID portion of the 16-byte target
+ port identifier.
+
+What: /sys/class/scsi_host/host<n>/local_ib_device
+Date: November 29, 2006
+KernelVersion: 2.6.19
+Contact: linux-rdma@vger.kernel.org
+Description: Name of the InfiniBand HCA used for communicating with the
+ SRP target.
+
+What: /sys/class/scsi_host/host<n>/local_ib_port
+Date: November 29, 2006
+KernelVersion: 2.6.19
+Contact: linux-rdma@vger.kernel.org
+Description: Number of the HCA port used for communicating with the
+ SRP target.
+
+What: /sys/class/scsi_host/host<n>/orig_dgid
+Date: June 17, 2006
+KernelVersion: 2.6.17
+Contact: linux-rdma@vger.kernel.org
+Description: InfiniBand destination GID specified in the parameters
+ written to the add_target sysfs attribute.
+
+What: /sys/class/scsi_host/host<n>/pkey
+Date: June 17, 2006
+KernelVersion: 2.6.17
+Contact: linux-rdma@vger.kernel.org
+Description: A 16-bit number representing the InfiniBand partition key used
+ for communication with the SRP target.
+
+What: /sys/class/scsi_host/host<n>/req_lim
+Date: October 20, 2010
+KernelVersion: 2.6.36
+Contact: linux-rdma@vger.kernel.org
+Description: Number of requests ib_srp can send to the target before it has
+ to wait for more credits. For more information see also the
+ SRP credit algorithm in the SRP specification.
+
+What: /sys/class/scsi_host/host<n>/service_id
+Date: June 17, 2006
+KernelVersion: 2.6.17
+Contact: linux-rdma@vger.kernel.org
+Description: InfiniBand service ID used for establishing communication with
+ the SRP target.
+
+What: /sys/class/scsi_host/host<n>/sgid
+Date: February 1, 2014
+KernelVersion: 3.13
+Contact: linux-rdma@vger.kernel.org
+Description: InfiniBand GID of the source port used for communication with
+ the SRP target.
+
+What: /sys/class/scsi_host/host<n>/zero_req_lim
+Date: September 20, 2006
+KernelVersion: 2.6.18
+Contact: linux-rdma@vger.kernel.org
+Description: Number of times the initiator had to wait before sending a
+ request to the target because it ran out of credits. For more
+ information see also the SRP credit algorithm in the SRP
+ specification.
diff --git a/Documentation/ABI/stable/sysfs-driver-qla2xxx b/Documentation/ABI/stable/sysfs-driver-qla2xxx
new file mode 100644
index 000000000..9a59d8449
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-qla2xxx
@@ -0,0 +1,8 @@
+What: /sys/bus/pci/drivers/qla2xxx/.../devices/*
+Date: September 2009
+Contact: QLogic Linux Driver <linux-driver@qlogic.com>
+Description: qla2xxx-udev.sh currently looks for uevent CHANGE events to
+ signal a firmware-dump has been generated by the driver and is
+ ready for retrieval.
+Users: qla2xxx-udev.sh. Proposed changes should be mailed to
+ linux-driver@qlogic.com
diff --git a/Documentation/ABI/stable/sysfs-driver-usb-usbtmc b/Documentation/ABI/stable/sysfs-driver-usb-usbtmc
new file mode 100644
index 000000000..e960cd027
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-usb-usbtmc
@@ -0,0 +1,62 @@
+What: /sys/bus/usb/drivers/usbtmc/*/interface_capabilities
+What: /sys/bus/usb/drivers/usbtmc/*/device_capabilities
+Date: August 2008
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ These files show the various USB TMC capabilities as described
+ by the device itself. The full description of the bitfields
+ can be found in the USB TMC documents from the USB-IF entitled
+ "Universal Serial Bus Test and Measurement Class Specification
+ (USBTMC) Revision 1.0" section 4.2.1.8.
+
+ The files are read only.
+
+
+What: /sys/bus/usb/drivers/usbtmc/*/usb488_interface_capabilities
+What: /sys/bus/usb/drivers/usbtmc/*/usb488_device_capabilities
+Date: August 2008
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ These files show the various USB TMC capabilities as described
+ by the device itself. The full description of the bitfields
+ can be found in the USB TMC documents from the USB-IF entitled
+ "Universal Serial Bus Test and Measurement Class, Subclass
+ USB488 Specification (USBTMC-USB488) Revision 1.0" section
+ 4.2.2.
+
+ The files are read only.
+
+
+What: /sys/bus/usb/drivers/usbtmc/*/TermChar
+Date: August 2008
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ This file is the TermChar value to be sent to the USB TMC
+ device as described by the document, "Universal Serial Bus Test
+ and Measurement Class Specification
+ (USBTMC) Revision 1.0" as published by the USB-IF.
+
+ Note that the TermCharEnabled file determines if this value is
+ sent to the device or not.
+
+
+What: /sys/bus/usb/drivers/usbtmc/*/TermCharEnabled
+Date: August 2008
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ This file determines if the TermChar is to be sent to the
+ device on every transaction or not. For more details about
+ this, please see the document, "Universal Serial Bus Test and
+ Measurement Class Specification (USBTMC) Revision 1.0" as
+ published by the USB-IF.
+
+
+What: /sys/bus/usb/drivers/usbtmc/*/auto_abort
+Date: August 2008
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ This file determines if the transaction of the USB TMC
+ device is to be automatically aborted if there is any error.
+ For more details about this, please see the document,
+ "Universal Serial Bus Test and Measurement Class Specification
+ (USBTMC) Revision 1.0" as published by the USB-IF.
diff --git a/Documentation/ABI/stable/sysfs-driver-w1_ds28e04 b/Documentation/ABI/stable/sysfs-driver-w1_ds28e04
new file mode 100644
index 000000000..26579ee86
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-driver-w1_ds28e04
@@ -0,0 +1,15 @@
+What: /sys/bus/w1/devices/.../pio
+Date: May 2012
+Contact: Markus Franke <franm@hrz.tu-chemnitz.de>
+Description: read/write the contents of the two PIO's of the DS28E04-100
+ see Documentation/w1/slaves/w1_ds28e04 for detailed information
+Users: any user space application which wants to communicate with DS28E04-100
+
+
+
+What: /sys/bus/w1/devices/.../eeprom
+Date: May 2012
+Contact: Markus Franke <franm@hrz.tu-chemnitz.de>
+Description: read/write the contents of the EEPROM memory of the DS28E04-100
+ see Documentation/w1/slaves/w1_ds28e04 for detailed information
+Users: any user space application which wants to communicate with DS28E04-100
diff --git a/Documentation/ABI/stable/sysfs-firmware-efi-vars b/Documentation/ABI/stable/sysfs-firmware-efi-vars
new file mode 100644
index 000000000..5def20b90
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-firmware-efi-vars
@@ -0,0 +1,75 @@
+What: /sys/firmware/efi/vars
+Date: April 2004
+Contact: Matt Domsch <Matt_Domsch@dell.com>
+Description:
+ This directory exposes interfaces for interactive with
+ EFI variables. For more information on EFI variables,
+ see 'Variable Services' in the UEFI specification
+ (section 7.2 in specification version 2.3 Errata D).
+
+ In summary, EFI variables are named, and are classified
+ into separate namespaces through the use of a vendor
+ GUID. They also have an arbitrary binary value
+ associated with them.
+
+ The efivars module enumerates these variables and
+ creates a separate directory for each one found. Each
+ directory has a name of the form "<key>-<vendor guid>"
+ and contains the following files:
+
+ attributes: A read-only text file enumerating the
+ EFI variable flags. Potential values
+ include:
+
+ EFI_VARIABLE_NON_VOLATILE
+ EFI_VARIABLE_BOOTSERVICE_ACCESS
+ EFI_VARIABLE_RUNTIME_ACCESS
+ EFI_VARIABLE_HARDWARE_ERROR_RECORD
+ EFI_VARIABLE_AUTHENTICATED_WRITE_ACCESS
+
+ See the EFI documentation for an
+ explanation of each of these variables.
+
+ data: A read-only binary file that can be read
+ to attain the value of the EFI variable
+
+ guid: The vendor GUID of the variable. This
+ should always match the GUID in the
+ variable's name.
+
+ raw_var: A binary file that can be read to obtain
+ a structure that contains everything
+ there is to know about the variable.
+ For structure definition see "struct
+ efi_variable" in the kernel sources.
+
+ This file can also be written to in
+ order to update the value of a variable.
+ For this to work however, all fields of
+ the "struct efi_variable" passed must
+ match byte for byte with the structure
+ read out of the file, save for the value
+ portion.
+
+ **Note** the efi_variable structure
+ read/written with this file contains a
+ 'long' type that may change widths
+ depending on your underlying
+ architecture.
+
+ size: As ASCII representation of the size of
+ the variable's value.
+
+
+ In addition, two other magic binary files are provided
+ in the top-level directory and are used for adding and
+ removing variables:
+
+ new_var: Takes a "struct efi_variable" and
+ instructs the EFI firmware to create a
+ new variable.
+
+ del_var: Takes a "struct efi_variable" and
+ instructs the EFI firmware to remove any
+ variable that has a matching vendor GUID
+ and variable key name.
diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-dump b/Documentation/ABI/stable/sysfs-firmware-opal-dump
new file mode 100644
index 000000000..32fe7f5c4
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-firmware-opal-dump
@@ -0,0 +1,41 @@
+What: /sys/firmware/opal/dump
+Date: Feb 2014
+Contact: Stewart Smith <stewart@linux.vnet.ibm.com>
+Description:
+ This directory exposes interfaces for interacting with
+ the FSP and platform dumps through OPAL firmware interface.
+
+ This is only for the powerpc/powernv platform.
+
+ initiate_dump: When '1' is written to it,
+ we will initiate a dump.
+ Read this file for supported commands.
+
+ 0xXX-0xYYYY: A directory for dump of type 0xXX and
+ id 0xYYYY (in hex). The name of this
+ directory should not be relied upon to
+ be in this format, only that it's unique
+ among all dumps. For determining the type
+ and ID of the dump, use the id and type files.
+ Do not rely on any particular size of dump
+ type or dump id.
+
+ Each dump has the following files:
+ id: An ASCII representation of the dump ID
+ in hex (e.g. '0x01')
+ type: An ASCII representation of the type of
+ dump in the format "0x%x %s" with the ID
+ in hex and a description of the dump type
+ (or 'unknown').
+ Type '0xffffffff unknown' is used when
+ we could not get the type from firmware.
+ e.g. '0x02 System/Platform Dump'
+ dump: A binary file containing the dump.
+ The size of the dump is the size of this file.
+ acknowledge: When 'ack' is written to this, we will
+ acknowledge that we've retrieved the
+ dump to the service processor. It will
+ then remove it, making the dump
+ inaccessible.
+ Reading this file will get a list of
+ supported actions.
diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-elog b/Documentation/ABI/stable/sysfs-firmware-opal-elog
new file mode 100644
index 000000000..e1f3058f5
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-firmware-opal-elog
@@ -0,0 +1,60 @@
+What: /sys/firmware/opal/elog
+Date: Feb 2014
+Contact: Stewart Smith <stewart@linux.vnet.ibm.com>
+Description:
+ This directory exposes error log entries retrieved
+ through the OPAL firmware interface.
+
+ Each error log is identified by a unique ID and will
+ exist until explicitly acknowledged to firmware.
+
+ Each log entry has a directory in /sys/firmware/opal/elog.
+
+ Log entries may be purged by the service processor
+ before retrieved by firmware or retrieved/acknowledged by
+ Linux if there is no room for more log entries.
+
+ In the event that Linux has retrieved the log entries
+ but not explicitly acknowledged them to firmware and
+ the service processor needs more room for log entries,
+ the only remaining copy of a log message may be in
+ Linux.
+
+ Typically, a user space daemon will monitor for new
+ entries, read them out and acknowledge them.
+
+ The service processor may be able to store more log
+ entries than firmware can, so after you acknowledge
+ an event from Linux you may instantly get another one
+ from the queue that was generated some time in the past.
+
+ The raw log format is a binary format. We currently
+ do not parse this at all in kernel, leaving it up to
+ user space to solve the problem. In future, we may
+ do more parsing in kernel and add more files to make
+ it easier for simple user space processes to extract
+ more information.
+
+ For each log entry (directory), there are the following
+ files:
+
+ id: An ASCII representation of the ID of the
+ error log, in hex - e.g. "0x01".
+
+ type: An ASCII representation of the type id and
+ description of the type of error log.
+ Currently just "0x00 PEL" - platform error log.
+ In the future there may be additional types.
+
+ raw: A read-only binary file that can be read
+ to get the raw log entry. These are
+ <16kb, often just hundreds of bytes and
+ "average" 2kb.
+
+ acknowledge: Writing 'ack' to this file will acknowledge
+ the error log to firmware (and in turn
+ the service processor, if applicable).
+ Shortly after acknowledging it, the log
+ entry will be removed from sysfs.
+ Reading this file will list the supported
+ operations (curently just acknowledge). \ No newline at end of file
diff --git a/Documentation/ABI/stable/sysfs-module b/Documentation/ABI/stable/sysfs-module
new file mode 100644
index 000000000..6272ae5fb
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-module
@@ -0,0 +1,34 @@
+What: /sys/module
+Description:
+ The /sys/module tree consists of the following structure:
+
+ /sys/module/MODULENAME
+ The name of the module that is in the kernel. This
+ module name will always show up if the module is loaded as a
+ dynamic module. If it is built directly into the kernel, it
+ will only show up if it has a version or at least one
+ parameter.
+
+ Note: The conditions of creation in the built-in case are not
+ by design and may be removed in the future.
+
+ /sys/module/MODULENAME/parameters
+ This directory contains individual files that are each
+ individual parameters of the module that are able to be
+ changed at runtime. See the individual module
+ documentation as to the contents of these parameters and
+ what they accomplish.
+
+ Note: The individual parameter names and values are not
+ considered stable, only the fact that they will be
+ placed in this location within sysfs. See the
+ individual driver documentation for details as to the
+ stability of the different parameters.
+
+ /sys/module/MODULENAME/refcnt
+ If the module is able to be unloaded from the kernel, this file
+ will contain the current reference count of the module.
+
+ Note: If the module is built into the kernel, or if the
+ CONFIG_MODULE_UNLOAD kernel configuration value is not enabled,
+ this file will not be present.
diff --git a/Documentation/ABI/stable/sysfs-transport-srp b/Documentation/ABI/stable/sysfs-transport-srp
new file mode 100644
index 000000000..ec7af69fe
--- /dev/null
+++ b/Documentation/ABI/stable/sysfs-transport-srp
@@ -0,0 +1,58 @@
+What: /sys/class/srp_remote_ports/port-<h>:<n>/delete
+Date: June 1, 2012
+KernelVersion: 3.7
+Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
+Description: Instructs an SRP initiator to disconnect from a target and to
+ remove all LUNs imported from that target.
+
+What: /sys/class/srp_remote_ports/port-<h>:<n>/dev_loss_tmo
+Date: February 1, 2014
+KernelVersion: 3.13
+Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
+Description: Number of seconds the SCSI layer will wait after a transport
+ layer error has been observed before removing a target port.
+ Zero means immediate removal. Setting this attribute to "off"
+ will disable the dev_loss timer.
+
+What: /sys/class/srp_remote_ports/port-<h>:<n>/fast_io_fail_tmo
+Date: February 1, 2014
+KernelVersion: 3.13
+Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
+Description: Number of seconds the SCSI layer will wait after a transport
+ layer error has been observed before failing I/O. Zero means
+ failing I/O immediately. Setting this attribute to "off" will
+ disable the fast_io_fail timer.
+
+What: /sys/class/srp_remote_ports/port-<h>:<n>/port_id
+Date: June 27, 2007
+KernelVersion: 2.6.24
+Contact: linux-scsi@vger.kernel.org
+Description: 16-byte local SRP port identifier in hexadecimal format. An
+ example: 4c:49:4e:55:58:20:56:49:4f:00:00:00:00:00:00:00.
+
+What: /sys/class/srp_remote_ports/port-<h>:<n>/reconnect_delay
+Date: February 1, 2014
+KernelVersion: 3.13
+Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
+Description: Number of seconds the SCSI layer will wait after a reconnect
+ attempt failed before retrying. Setting this attribute to
+ "off" will disable time-based reconnecting.
+
+What: /sys/class/srp_remote_ports/port-<h>:<n>/roles
+Date: June 27, 2007
+KernelVersion: 2.6.24
+Contact: linux-scsi@vger.kernel.org
+Description: Role of the remote port. Either "SRP Initiator" or "SRP Target".
+
+What: /sys/class/srp_remote_ports/port-<h>:<n>/state
+Date: February 1, 2014
+KernelVersion: 3.13
+Contact: linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org
+Description: State of the transport layer used for communication with the
+ remote port. "running" if the transport layer is operational;
+ "blocked" if a transport layer error has been encountered but
+ the fast_io_fail_tmo timer has not yet fired; "fail-fast"
+ after the fast_io_fail_tmo timer has fired and before the
+ "dev_loss_tmo" timer has fired; "lost" after the
+ "dev_loss_tmo" timer has fired and before the port is finally
+ removed.
diff --git a/Documentation/ABI/stable/thermal-notification b/Documentation/ABI/stable/thermal-notification
new file mode 100644
index 000000000..9723e8b7a
--- /dev/null
+++ b/Documentation/ABI/stable/thermal-notification
@@ -0,0 +1,4 @@
+What: A notification mechanism for thermal related events
+Description:
+ This interface enables notification for thermal related events.
+ The notification is in the form of a netlink event.
diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso
new file mode 100644
index 000000000..7cdfc28cc
--- /dev/null
+++ b/Documentation/ABI/stable/vdso
@@ -0,0 +1,27 @@
+On some architectures, when the kernel loads any userspace program it
+maps an ELF DSO into that program's address space. This DSO is called
+the vDSO and it often contains useful and highly-optimized alternatives
+to real syscalls.
+
+These functions are called just like ordinary C function according to
+your platform's ABI. Call them from a sensible context. (For example,
+if you set CS on x86 to something strange, the vDSO functions are
+within their rights to crash.) In addition, if you pass a bad
+pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT.
+
+To find the DSO, parse the auxiliary vector passed to the program's
+entry point. The AT_SYSINFO_EHDR entry will point to the vDSO.
+
+The vDSO uses symbol versioning; whenever you request a symbol from the
+vDSO, specify the version you are expecting.
+
+Programs that dynamically link to glibc will use the vDSO automatically.
+Otherwise, you can use the reference parser in Documentation/vDSO/parse_vdso.c.
+
+Unless otherwise noted, the set of symbols with any given version and the
+ABI of those symbols is considered stable. It may vary across architectures,
+though.
+
+(As of this writing, this ABI documentation as been confirmed for x86_64.
+ The maintainers of the other vDSO-using architectures should confirm
+ that it is correct for their architecture.)
diff --git a/Documentation/ABI/testing/configfs-spear-pcie-gadget b/Documentation/ABI/testing/configfs-spear-pcie-gadget
new file mode 100644
index 000000000..875988146
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-spear-pcie-gadget
@@ -0,0 +1,31 @@
+What: /config/pcie-gadget
+Date: Feb 2011
+KernelVersion: 2.6.37
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+
+ Interface is used to configure selected dual mode PCIe controller
+ as device and then program its various registers to configure it
+ as a particular device type.
+ This interfaces can be used to show spear's PCIe device capability.
+
+ Nodes are only visible when configfs is mounted. To mount configfs
+ in /config directory use:
+ # mount -t configfs none /config/
+
+ For nth PCIe Device Controller
+ /config/pcie-gadget.n/
+ link ... used to enable ltssm and read its status.
+ int_type ...used to configure and read type of supported
+ interrupt
+ no_of_msi ... used to configure number of MSI vector needed and
+ to read no of MSI granted.
+ inta ... write 1 to assert INTA and 0 to de-assert.
+ send_msi ... write MSI vector to be sent.
+ vendor_id ... used to write and read vendor id (hex)
+ device_id ... used to write and read device id (hex)
+ bar0_size ... used to write and read bar0_size
+ bar0_address ... used to write and read bar0 mapped area in hex.
+ bar0_rw_offset ... used to write and read offset of bar0 where
+ bar0_data will be written or read.
+ bar0_data ... used to write and read data at bar0_rw_offset.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget b/Documentation/ABI/testing/configfs-usb-gadget
new file mode 100644
index 000000000..95a36589a
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget
@@ -0,0 +1,126 @@
+What: /config/usb-gadget
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ This group contains sub-groups corresponding to created
+ USB gadgets.
+
+What: /config/usb-gadget/gadget
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+
+ The attributes of a gadget:
+
+ UDC - bind a gadget to UDC/unbind a gadget;
+ write UDC's name found in /sys/class/udc/*
+ to bind a gadget, empty string "" to unbind.
+
+ bDeviceClass - USB device class code
+ bDeviceSubClass - USB device subclass code
+ bDeviceProtocol - USB device protocol code
+ bMaxPacketSize0 - maximum endpoint 0 packet size
+ bcdDevice - bcd device release number
+ bcdUSB - bcd USB specification version number
+ idProduct - product ID
+ idVendor - vendor ID
+
+What: /config/usb-gadget/gadget/configs
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ This group contains a USB gadget's configurations
+
+What: /config/usb-gadget/gadget/configs/config
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes of a configuration:
+
+ bmAttributes - configuration characteristics
+ MaxPower - maximum power consumption from the bus
+
+What: /config/usb-gadget/gadget/configs/config/strings
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ This group contains subdirectories for language-specific
+ strings for this configuration.
+
+What: /config/usb-gadget/gadget/configs/config/strings/language
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ configuration - configuration description
+
+
+What: /config/usb-gadget/gadget/functions
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ This group contains functions available to this USB gadget.
+
+What: /config/usb-gadget/gadget/functions/<func>.<inst>/interface.<n>
+Date: May 2014
+KernelVersion: 3.16
+Description:
+ This group contains "Feature Descriptors" specific for one
+ gadget's USB interface or one interface group described
+ by an IAD.
+
+ The attributes:
+
+ compatible_id - 8-byte string for "Compatible ID"
+ sub_compatible_id - 8-byte string for "Sub Compatible ID"
+
+What: /config/usb-gadget/gadget/functions/<func>.<inst>/interface.<n>/<property>
+Date: May 2014
+KernelVersion: 3.16
+Description:
+ This group contains "Extended Property Descriptors" specific for one
+ gadget's USB interface or one interface group described
+ by an IAD.
+
+ The attributes:
+
+ type - value 1..7 for interpreting the data
+ 1: unicode string
+ 2: unicode string with environment variable
+ 3: binary
+ 4: little-endian 32-bit
+ 5: big-endian 32-bit
+ 6: unicode string with a symbolic link
+ 7: multiple unicode strings
+ data - blob of data to be interpreted depending on
+ type
+
+What: /config/usb-gadget/gadget/strings
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ This group contains subdirectories for language-specific
+ strings for this gadget.
+
+What: /config/usb-gadget/gadget/strings/language
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ serialnumber - gadget's serial number (string)
+ product - gadget's product description
+ manufacturer - gadget's manufacturer description
+
+What: /config/usb-gadget/gadget/os_desc
+Date: May 2014
+KernelVersion: 3.16
+Description:
+ This group contains "OS String" extension handling attributes.
+
+ use - flag turning "OS Desctiptors" support on/off
+ b_vendor_code - one-byte value used for custom per-device and
+ per-interface requests
+ qw_sign - an identifier to be reported as "OS String"
+ proper
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-acm b/Documentation/ABI/testing/configfs-usb-gadget-acm
new file mode 100644
index 000000000..d21092d75
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-acm
@@ -0,0 +1,8 @@
+What: /config/usb-gadget/gadget/functions/acm.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+
+ This item contains just one readonly attribute: port_num.
+ It contains the port number of the /dev/ttyGS<n> device
+ associated with acm function's instance "name".
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-ecm b/Documentation/ABI/testing/configfs-usb-gadget-ecm
new file mode 100644
index 000000000..0addf7704
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-ecm
@@ -0,0 +1,16 @@
+What: /config/usb-gadget/gadget/functions/ecm.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ ifname - network device interface name associated with
+ this function instance
+ qmult - queue length multiplier for high and
+ super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
+
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-eem b/Documentation/ABI/testing/configfs-usb-gadget-eem
new file mode 100644
index 000000000..a4c57158f
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-eem
@@ -0,0 +1,14 @@
+What: /config/usb-gadget/gadget/functions/eem.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ ifname - network device interface name associated with
+ this function instance
+ qmult - queue length multiplier for high and
+ super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-ffs b/Documentation/ABI/testing/configfs-usb-gadget-ffs
new file mode 100644
index 000000000..e39b27653
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-ffs
@@ -0,0 +1,9 @@
+What: /config/usb-gadget/gadget/functions/ffs.name
+Date: Nov 2013
+KernelVersion: 3.13
+Description: The purpose of this directory is to create and remove it.
+
+ A corresponding USB function instance is created/removed.
+ There are no attributes here.
+
+ All parameters are set through FunctionFS.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-hid b/Documentation/ABI/testing/configfs-usb-gadget-hid
new file mode 100644
index 000000000..f12e00e6b
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-hid
@@ -0,0 +1,11 @@
+What: /config/usb-gadget/gadget/functions/hid.name
+Date: Nov 2014
+KernelVersion: 3.19
+Description:
+ The attributes:
+
+ protocol - HID protocol to use
+ report_desc - blob corresponding to HID report descriptors
+ except the data passed through /dev/hidg<N>
+ report_length - HID report length
+ subclass - HID device subclass to use
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-loopback b/Documentation/ABI/testing/configfs-usb-gadget-loopback
new file mode 100644
index 000000000..9aae5bfb9
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-loopback
@@ -0,0 +1,8 @@
+What: /config/usb-gadget/gadget/functions/Loopback.name
+Date: Nov 2013
+KernelVersion: 3.13
+Description:
+ The attributes:
+
+ qlen - depth of loopback queue
+ bulk_buflen - buffer length
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-mass-storage b/Documentation/ABI/testing/configfs-usb-gadget-mass-storage
new file mode 100644
index 000000000..9931fb0d6
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-mass-storage
@@ -0,0 +1,31 @@
+What: /config/usb-gadget/gadget/functions/mass_storage.name
+Date: Oct 2013
+KernelVersion: 3.13
+Description:
+ The attributes:
+
+ stall - Set to permit function to halt bulk endpoints.
+ Disabled on some USB devices known not to work
+ correctly. You should set it to true.
+ num_buffers - Number of pipeline buffers. Valid numbers
+ are 2..4. Available only if
+ CONFIG_USB_GADGET_DEBUG_FILES is set.
+
+What: /config/usb-gadget/gadget/functions/mass_storage.name/lun.name
+Date: Oct 2013
+KernelVersion: 3.13
+Description:
+ The attributes:
+
+ file - The path to the backing file for the LUN.
+ Required if LUN is not marked as removable.
+ ro - Flag specifying access to the LUN shall be
+ read-only. This is implied if CD-ROM emulation
+ is enabled as well as when it was impossible
+ to open "filename" in R/W mode.
+ removable - Flag specifying that LUN shall be indicated as
+ being removable.
+ cdrom - Flag specifying that LUN shall be reported as
+ being a CD-ROM.
+ nofua - Flag specifying that FUA flag
+ in SCSI WRITE(10,12)
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-midi b/Documentation/ABI/testing/configfs-usb-gadget-midi
new file mode 100644
index 000000000..6b341df72
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-midi
@@ -0,0 +1,12 @@
+What: /config/usb-gadget/gadget/functions/midi.name
+Date: Nov 2014
+KernelVersion: 3.19
+Description:
+ The attributes:
+
+ index - index value for the USB MIDI adapter
+ id - ID string for the USB MIDI adapter
+ buflen - MIDI buffer length
+ qlen - USB read request queue length
+ in_ports - number of MIDI input ports
+ out_ports - number of MIDI output ports
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-ncm b/Documentation/ABI/testing/configfs-usb-gadget-ncm
new file mode 100644
index 000000000..6fe723eff
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-ncm
@@ -0,0 +1,15 @@
+What: /config/usb-gadget/gadget/functions/ncm.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ ifname - network device interface name associated with
+ this function instance
+ qmult - queue length multiplier for high and
+ super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-obex b/Documentation/ABI/testing/configfs-usb-gadget-obex
new file mode 100644
index 000000000..a6a9327ed
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-obex
@@ -0,0 +1,9 @@
+What: /config/usb-gadget/gadget/functions/obex.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+
+ This item contains just one readonly attribute: port_num.
+ It contains the port number of the /dev/ttyGS<n> device
+ associated with obex function's instance "name".
+
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-phonet b/Documentation/ABI/testing/configfs-usb-gadget-phonet
new file mode 100644
index 000000000..7037a358e
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-phonet
@@ -0,0 +1,8 @@
+What: /config/usb-gadget/gadget/functions/phonet.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+
+ This item contains just one readonly attribute: ifname.
+ It contains the network interface name assigned during
+ network device registration.
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-printer b/Documentation/ABI/testing/configfs-usb-gadget-printer
new file mode 100644
index 000000000..6b0714e3c
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-printer
@@ -0,0 +1,9 @@
+What: /config/usb-gadget/gadget/functions/printer.name
+Date: Apr 2015
+KernelVersion: 4.1
+Description:
+ The attributes:
+
+ pnp_string - Data to be passed to the host in pnp string
+ q_len - Number of requests per endpoint
+
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-rndis b/Documentation/ABI/testing/configfs-usb-gadget-rndis
new file mode 100644
index 000000000..e32879b84
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-rndis
@@ -0,0 +1,14 @@
+What: /config/usb-gadget/gadget/functions/rndis.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ ifname - network device interface name associated with
+ this function instance
+ qmult - queue length multiplier for high and
+ super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-serial b/Documentation/ABI/testing/configfs-usb-gadget-serial
new file mode 100644
index 000000000..474d249f7
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-serial
@@ -0,0 +1,9 @@
+What: /config/usb-gadget/gadget/functions/gser.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+
+ This item contains just one readonly attribute: port_num.
+ It contains the port number of the /dev/ttyGS<n> device
+ associated with gser function's instance "name".
+
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-sourcesink b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink
new file mode 100644
index 000000000..29477c319
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-sourcesink
@@ -0,0 +1,12 @@
+What: /config/usb-gadget/gadget/functions/SourceSink.name
+Date: Nov 2013
+KernelVersion: 3.13
+Description:
+ The attributes:
+
+ pattern - 0 (all zeros), 1 (mod63), 2 (none)
+ isoc_interval - 1..16
+ isoc_maxpacket - 0 - 1023 (fs), 0 - 1024 (hs/ss)
+ isoc_mult - 0..2 (hs/ss only)
+ isoc_maxburst - 0..15 (ss only)
+ qlen - buffer length
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-subset b/Documentation/ABI/testing/configfs-usb-gadget-subset
new file mode 100644
index 000000000..9373e2c51
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-subset
@@ -0,0 +1,14 @@
+What: /config/usb-gadget/gadget/functions/geth.name
+Date: Jun 2013
+KernelVersion: 3.11
+Description:
+ The attributes:
+
+ ifname - network device interface name associated with
+ this function instance
+ qmult - queue length multiplier for high and
+ super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac1 b/Documentation/ABI/testing/configfs-usb-gadget-uac1
new file mode 100644
index 000000000..8ba9a1233
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uac1
@@ -0,0 +1,12 @@
+What: /config/usb-gadget/gadget/functions/uac1.name
+Date: Sep 2014
+KernelVersion: 3.18
+Description:
+ The attributes:
+
+ audio_buf_size - audio buffer size
+ fn_cap - capture pcm device file name
+ fn_cntl - control device file name
+ fn_play - playback pcm device file name
+ req_buf_size - ISO OUT endpoint request buffer size
+ req_count - ISO OUT endpoint request count
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uac2 b/Documentation/ABI/testing/configfs-usb-gadget-uac2
new file mode 100644
index 000000000..2bfdd4efa
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uac2
@@ -0,0 +1,12 @@
+What: /config/usb-gadget/gadget/functions/uac2.name
+Date: Sep 2014
+KernelVersion: 3.18
+Description:
+ The attributes:
+
+ c_chmask - capture channel mask
+ c_srate - capture sampling rate
+ c_ssize - capture sample size (bytes)
+ p_chmask - playback channel mask
+ p_srate - playback sampling rate
+ p_ssize - playback sample size (bytes)
diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
new file mode 100644
index 000000000..2f4a0051b
--- /dev/null
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -0,0 +1,265 @@
+What: /config/usb-gadget/gadget/functions/uvc.name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: UVC function directory
+
+ streaming_maxburst - 0..15 (ss only)
+ streaming_maxpacket - 1..1023 (fs), 1..3072 (hs/ss)
+ streaming_interval - 1..16
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Control descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/class
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/class/ss
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Super speed control class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/class/fs
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Full speed control class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Terminal descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/output
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Output terminal descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/output/default
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Default output terminal descriptors
+
+ All attributes read only:
+ iTerminal - index of string descriptor
+ bSourceID - id of the terminal to which this terminal
+ is connected
+ bAssocTerminal - id of the input terminal to which this output
+ terminal is associated
+ wTerminalType - terminal type
+ bTerminalID - a non-zero id of this terminal
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Camera terminal descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/terminal/camera/default
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Default camera terminal descriptors
+
+ All attributes read only:
+ bmControls - bitmap specifying which controls are
+ supported for the video stream
+ wOcularFocalLength - the value of Locular
+ wObjectiveFocalLengthMax- the value of Lmin
+ wObjectiveFocalLengthMin- the value of Lmax
+ iTerminal - index of string descriptor
+ bAssocTerminal - id of the output terminal to which
+ this terminal is connected
+ wTerminalType - terminal type
+ bTerminalID - a non-zero id of this terminal
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/processing
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Processing unit descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/processing/default
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Default processing unit descriptors
+
+ All attributes read only:
+ iProcessing - index of string descriptor
+ bmControls - bitmap specifying which controls are
+ supported for the video stream
+ wMaxMultiplier - maximum digital magnification x100
+ bSourceID - id of the terminal to which this unit is
+ connected
+ bUnitID - a non-zero id of this unit
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/header
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Control header descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/control/header/name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Specific control header descriptors
+
+dwClockFrequency
+bcdUVC
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Streaming descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Streaming class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class/ss
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Super speed streaming class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class/hs
+Date: Dec 2014
+KernelVersion: 3.20
+Description: High speed streaming class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/class/fs
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Full speed streaming class descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Color matching descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/color_matching/default
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Default color matching descriptors
+
+ All attributes read only:
+ bMatrixCoefficients - matrix used to compute luma and
+ chroma values from the color primaries
+ bTransferCharacteristics- optoelectronic transfer
+ characteristic of the source picutre,
+ also called the gamma function
+ bColorPrimaries - color primaries and the reference
+ white
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg
+Date: Dec 2014
+KernelVersion: 3.20
+Description: MJPEG format descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Specific MJPEG format descriptors
+
+ All attributes read only,
+ except bmaControls and bDefaultFrameIndex:
+ bmaControls - this format's data for bmaControls in
+ the streaming header
+ bmInterfaceFlags - specifies interlace information,
+ read-only
+ bAspectRatioY - the X dimension of the picture aspect
+ ratio, read-only
+ bAspectRatioX - the Y dimension of the picture aspect
+ ratio, read-only
+ bmFlags - characteristics of this format,
+ read-only
+ bDefaultFrameIndex - optimum frame index for this stream
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/mjpeg/name/name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Specific MJPEG frame descriptors
+
+ dwFrameInterval - indicates how frame interval can be
+ programmed; a number of values
+ separated by newline can be specified
+ dwDefaultFrameInterval - the frame interval the device would
+ like to use as default
+ dwMaxVideoFrameBufferSize- the maximum number of bytes the
+ compressor will produce for a video
+ frame or still image
+ dwMaxBitRate - the maximum bit rate at the shortest
+ frame interval in bps
+ dwMinBitRate - the minimum bit rate at the longest
+ frame interval in bps
+ wHeight - height of decoded bitmap frame in px
+ wWidth - width of decoded bitmam frame in px
+ bmCapabilities - still image support, fixed frame-rate
+ support
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Uncompressed format descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Specific uncompressed format descriptors
+
+ bmaControls - this format's data for bmaControls in
+ the streaming header
+ bmInterfaceFlags - specifies interlace information,
+ read-only
+ bAspectRatioY - the X dimension of the picture aspect
+ ratio, read-only
+ bAspectRatioX - the Y dimension of the picture aspect
+ ratio, read-only
+ bDefaultFrameIndex - optimum frame index for this stream
+ bBitsPerPixel - number of bits per pixel used to
+ specify color in the decoded video
+ frame
+ guidFormat - globally unique id used to identify
+ stream-encoding format
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/uncompressed/name/name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Specific uncompressed frame descriptors
+
+ dwFrameInterval - indicates how frame interval can be
+ programmed; a number of values
+ separated by newline can be specified
+ dwDefaultFrameInterval - the frame interval the device would
+ like to use as default
+ dwMaxVideoFrameBufferSize- the maximum number of bytes the
+ compressor will produce for a video
+ frame or still image
+ dwMaxBitRate - the maximum bit rate at the shortest
+ frame interval in bps
+ dwMinBitRate - the minimum bit rate at the longest
+ frame interval in bps
+ wHeight - height of decoded bitmap frame in px
+ wWidth - width of decoded bitmam frame in px
+ bmCapabilities - still image support, fixed frame-rate
+ support
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Streaming header descriptors
+
+What: /config/usb-gadget/gadget/functions/uvc.name/streaming/header/name
+Date: Dec 2014
+KernelVersion: 3.20
+Description: Specific streaming header descriptors
+
+ All attributes read only:
+ bTriggerUsage - how the host software will respond to
+ a hardware trigger interrupt event
+ bTriggerSupport - flag specifying if hardware
+ triggering is supported
+ bStillCaptureMethod - method of still image caputre
+ supported
+ bTerminalLink - id of the output terminal to which
+ the video endpoint of this interface
+ is connected
+ bmInfo - capabilities of this video streaming
+ interface
diff --git a/Documentation/ABI/testing/debugfs-aufs b/Documentation/ABI/testing/debugfs-aufs
new file mode 100644
index 000000000..99642d105
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-aufs
@@ -0,0 +1,50 @@
+What: /debug/aufs/si_<id>/
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ Under /debug/aufs, a directory named si_<id> is created
+ per aufs mount, where <id> is a unique id generated
+ internally.
+
+What: /debug/aufs/si_<id>/plink
+Date: Apr 2013
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It has three lines and shows the information about the
+ pseudo-link. The first line is a single number
+ representing a number of buckets. The second line is a
+ number of pseudo-links per buckets (separated by a
+ blank). The last line is a single number representing a
+ total number of psedo-links.
+ When the aufs mount option 'noplink' is specified, it
+ will show "1\n0\n0\n".
+
+What: /debug/aufs/si_<id>/xib
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It shows the consumed blocks by xib (External Inode Number
+ Bitmap), its block size and file size.
+ When the aufs mount option 'noxino' is specified, it
+ will be empty. About XINO files, see the aufs manual.
+
+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It shows the consumed blocks by xino (External Inode Number
+ Translation Table), its link count, block size and file
+ size.
+ When the aufs mount option 'noxino' is specified, it
+ will be empty. About XINO files, see the aufs manual.
+
+What: /debug/aufs/si_<id>/xigen
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It shows the consumed blocks by xigen (External Inode
+ Generation Table), its block size and file size.
+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
+ be created.
+ When the aufs mount option 'noxino' is specified, it
+ will be empty. About XINO files, see the aufs manual.
diff --git a/Documentation/ABI/testing/debugfs-driver-genwqe b/Documentation/ABI/testing/debugfs-driver-genwqe
new file mode 100644
index 000000000..1c2f25674
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-driver-genwqe
@@ -0,0 +1,91 @@
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/ddcb_info
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: DDCB queue dump used for debugging queueing problems.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/curr_regs
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Dump of the current error registers.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/curr_dbg_uid0
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Internal chip state of UID0 (unit id 0).
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/curr_dbg_uid1
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Internal chip state of UID1.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/curr_dbg_uid2
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Internal chip state of UID2.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/prev_regs
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Dump of the error registers before the last reset of
+ the card occured.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid0
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Internal chip state of UID0 before card was reset.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid1
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Internal chip state of UID1 before card was reset.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/prev_dbg_uid2
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Internal chip state of UID2 before card was reset.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/info
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Comprehensive summary of bitstream version and software
+ version. Used bitstream and bitstream clocking information.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/err_inject
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Possibility to inject error cases to ensure that the drivers
+ error handling code works well.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/vf<0..14>_jobtimeout_msec
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Default VF timeout 250ms. Testing might require 1000ms.
+ Using 0 will use the cards default value (whatever that is).
+
+ The timeout depends on the max number of available cards
+ in the system and the maximum allowed queue size.
+
+ The driver ensures that the settings are done just before
+ the VFs get enabled. Changing the timeouts in flight is not
+ possible.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/jobtimer
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Dump job timeout register values for PF and VFs.
+ Only available for PF.
+
+What: /sys/kernel/debug/genwqe/genwqe<n>_card/queue_working_time
+Date: Dec 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Dump queue working time register values for PF and VFs.
+ Only available for PF.
diff --git a/Documentation/ABI/testing/debugfs-ec b/Documentation/ABI/testing/debugfs-ec
new file mode 100644
index 000000000..6546115a9
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-ec
@@ -0,0 +1,20 @@
+What: /sys/kernel/debug/ec/*/{gpe,use_global_lock,io}
+Date: July 2010
+Contact: Thomas Renninger <trenn@suse.de>
+Description:
+
+General information like which GPE is assigned to the EC and whether
+the global lock should get used.
+Knowing the EC GPE one can watch the amount of HW events related to
+the EC here (XY -> GPE number from /sys/kernel/debug/ec/*/gpe):
+/sys/firmware/acpi/interrupts/gpeXY
+
+The io file is binary and a userspace tool located here:
+ftp://ftp.suse.com/pub/people/trenn/sources/ec/
+should get used to read out the 256 Embedded Controller registers
+or writing to them.
+
+CAUTION: Do not write to the Embedded Controller if you don't know
+what you are doing! Rebooting afterwards also is a good idea.
+This can influence the way your machine is cooled and fans may
+not get switched on again after you did a wrong write.
diff --git a/Documentation/ABI/testing/debugfs-ideapad b/Documentation/ABI/testing/debugfs-ideapad
new file mode 100644
index 000000000..7079c0b21
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-ideapad
@@ -0,0 +1,19 @@
+What: /sys/kernel/debug/ideapad/cfg
+Date: Sep 2011
+KernelVersion: 3.2
+Contact: Ike Panhc <ike.pan@canonical.com>
+Description:
+
+cfg shows the return value of _CFG method in VPC2004 device. It tells machine
+capability and what graphic component within the machine.
+
+
+What: /sys/kernel/debug/ideapad/status
+Date: Sep 2011
+KernelVersion: 3.2
+Contact: Ike Panhc <ike.pan@canonical.com>
+Description:
+
+status shows infos we can read and tells its meaning and value.
+
+
diff --git a/Documentation/ABI/testing/debugfs-olpc b/Documentation/ABI/testing/debugfs-olpc
new file mode 100644
index 000000000..bd76cc6d5
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-olpc
@@ -0,0 +1,16 @@
+What: /sys/kernel/debug/olpc-ec/cmd
+Date: Dec 2011
+KernelVersion: 3.4
+Contact: devel@lists.laptop.org
+Description:
+
+A generic interface for executing OLPC Embedded Controller commands and
+reading their responses.
+
+To execute a command, write data with the format: CC:N A A A A
+CC is the (hex) command, N is the count of expected reply bytes, and A A A A
+are optional (hex) arguments.
+
+To read the response (if any), read from the generic node after executing
+a command. Hex reply bytes will be returned, *whether or not* they came from
+the immediately previous command.
diff --git a/Documentation/ABI/testing/debugfs-pfo-nx-crypto b/Documentation/ABI/testing/debugfs-pfo-nx-crypto
new file mode 100644
index 000000000..685d5a448
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pfo-nx-crypto
@@ -0,0 +1,45 @@
+What: /sys/kernel/debug/nx-crypto/*
+Date: March 2012
+KernelVersion: 3.4
+Contact: Kent Yoder <key@linux.vnet.ibm.com>
+Description:
+
+ These debugfs interfaces are built by the nx-crypto driver, built in
+arch/powerpc/crypto/nx.
+
+Error Detection
+===============
+
+errors:
+- A u32 providing a total count of errors since the driver was loaded. The
+only errors counted here are those returned from the hcall, H_COP_OP.
+
+last_error:
+- The most recent non-zero return code from the H_COP_OP hcall. -EBUSY is not
+recorded here (the hcall will retry until -EBUSY goes away).
+
+last_error_pid:
+- The process ID of the process who received the most recent error from the
+hcall.
+
+Device Use
+==========
+
+aes_bytes:
+- The total number of bytes encrypted using AES in any of the driver's
+supported modes.
+
+aes_ops:
+- The total number of AES operations submitted to the hardware.
+
+sha256_bytes:
+- The total number of bytes hashed by the hardware using SHA-256.
+
+sha256_ops:
+- The total number of SHA-256 operations submitted to the hardware.
+
+sha512_bytes:
+- The total number of bytes hashed by the hardware using SHA-512.
+
+sha512_ops:
+- The total number of SHA-512 operations submitted to the hardware.
diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
new file mode 100644
index 000000000..cf11736ac
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -0,0 +1,19 @@
+What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+
+debugfs interface
+-----------------
+
+The pktcdvd module (packet writing driver) creates
+these files in debugfs:
+
+/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
+ info (0444) Lots of driver statistics and infos.
+
+Example:
+-------
+
+cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/dev-kmsg b/Documentation/ABI/testing/dev-kmsg
new file mode 100644
index 000000000..bb820be48
--- /dev/null
+++ b/Documentation/ABI/testing/dev-kmsg
@@ -0,0 +1,101 @@
+What: /dev/kmsg
+Date: Mai 2012
+KernelVersion: 3.5
+Contact: Kay Sievers <kay@vrfy.org>
+Description: The /dev/kmsg character device node provides userspace access
+ to the kernel's printk buffer.
+
+ Injecting messages:
+ Every write() to the opened device node places a log entry in
+ the kernel's printk buffer.
+
+ The logged line can be prefixed with a <N> syslog prefix, which
+ carries the syslog priority and facility. The single decimal
+ prefix number is composed of the 3 lowest bits being the syslog
+ priority and the higher bits the syslog facility number.
+
+ If no prefix is given, the priority number is the default kernel
+ log priority and the facility number is set to LOG_USER (1). It
+ is not possible to inject messages from userspace with the
+ facility number LOG_KERN (0), to make sure that the origin of
+ the messages can always be reliably determined.
+
+ Accessing the buffer:
+ Every read() from the opened device node receives one record
+ of the kernel's printk buffer.
+
+ The first read() directly following an open() always returns
+ first message in the buffer; there is no kernel-internal
+ persistent state; many readers can concurrently open the device
+ and read from it, without affecting other readers.
+
+ Every read() will receive the next available record. If no more
+ records are available read() will block, or if O_NONBLOCK is
+ used -EAGAIN returned.
+
+ Messages in the record ring buffer get overwritten as whole,
+ there are never partial messages received by read().
+
+ In case messages get overwritten in the circular buffer while
+ the device is kept open, the next read() will return -EPIPE,
+ and the seek position be updated to the next available record.
+ Subsequent reads() will return available records again.
+
+ Unlike the classic syslog() interface, the 64 bit record
+ sequence numbers allow to calculate the amount of lost
+ messages, in case the buffer gets overwritten. And they allow
+ to reconnect to the buffer and reconstruct the read position
+ if needed, without limiting the interface to a single reader.
+
+ The device supports seek with the following parameters:
+ SEEK_SET, 0
+ seek to the first entry in the buffer
+ SEEK_END, 0
+ seek after the last entry in the buffer
+ SEEK_DATA, 0
+ seek after the last record available at the time
+ the last SYSLOG_ACTION_CLEAR was issued.
+
+ The output format consists of a prefix carrying the syslog
+ prefix including priority and facility, the 64 bit message
+ sequence number and the monotonic timestamp in microseconds,
+ and a flag field. All fields are separated by a ','.
+
+ Future extensions might add more comma separated values before
+ the terminating ';'. Unknown fields and values should be
+ gracefully ignored.
+
+ The human readable text string starts directly after the ';'
+ and is terminated by a '\n'. Untrusted values derived from
+ hardware or other facilities are printed, therefore
+ all non-printable characters and '\' itself in the log message
+ are escaped by "\x00" C-style hex encoding.
+
+ A line starting with ' ', is a continuation line, adding
+ key/value pairs to the log message, which provide the machine
+ readable context of the message, for reliable processing in
+ userspace.
+
+ Example:
+ 7,160,424069,-;pci_root PNP0A03:00: host bridge window [io 0x0000-0x0cf7] (ignored)
+ SUBSYSTEM=acpi
+ DEVICE=+acpi:PNP0A03:00
+ 6,339,5140900,-;NET: Registered protocol family 10
+ 30,340,5690716,-;udevd[80]: starting version 181
+
+ The DEVICE= key uniquely identifies devices the following way:
+ b12:8 - block dev_t
+ c127:3 - char dev_t
+ n8 - netdev ifindex
+ +sound:card0 - subsystem:devname
+
+ The flags field carries '-' by default. A 'c' indicates a
+ fragment of a line. All following fragments are flagged with
+ '+'. Note, that these hints about continuation lines are not
+ necessarily correct, and the stream could be interleaved with
+ unrelated messages, but merging the lines in the output
+ usually produces better human readable results. A similar
+ logic is used internally when messages are printed to the
+ console, /proc/kmsg or the syslog() syscall.
+
+Users: dmesg(1), userspace kernel log consumers
diff --git a/Documentation/ABI/testing/evm b/Documentation/ABI/testing/evm
new file mode 100644
index 000000000..8374d4557
--- /dev/null
+++ b/Documentation/ABI/testing/evm
@@ -0,0 +1,23 @@
+What: security/evm
+Date: March 2011
+Contact: Mimi Zohar <zohar@us.ibm.com>
+Description:
+ EVM protects a file's security extended attributes(xattrs)
+ against integrity attacks. The initial method maintains an
+ HMAC-sha1 value across the extended attributes, storing the
+ value as the extended attribute 'security.evm'.
+
+ EVM depends on the Kernel Key Retention System to provide it
+ with a trusted/encrypted key for the HMAC-sha1 operation.
+ The key is loaded onto the root's keyring using keyctl. Until
+ EVM receives notification that the key has been successfully
+ loaded onto the keyring (echo 1 > <securityfs>/evm), EVM
+ can not create or validate the 'security.evm' xattr, but
+ returns INTEGRITY_UNKNOWN. Loading the key and signaling EVM
+ should be done as early as possible. Normally this is done
+ in the initramfs, which has already been measured as part
+ of the trusted boot. For more information on creating and
+ loading existing trusted/encrypted keys, refer to:
+ Documentation/keys-trusted-encrypted.txt. (A sample dracut
+ patch, which loads the trusted/encrypted key and enables
+ EVM, is available from http://linux-ima.sourceforge.net/#EVM.)
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
new file mode 100644
index 000000000..0a378a882
--- /dev/null
+++ b/Documentation/ABI/testing/ima_policy
@@ -0,0 +1,97 @@
+What: security/ima/policy
+Date: May 2008
+Contact: Mimi Zohar <zohar@us.ibm.com>
+Description:
+ The Trusted Computing Group(TCG) runtime Integrity
+ Measurement Architecture(IMA) maintains a list of hash
+ values of executables and other sensitive system files
+ loaded into the run-time of this system. At runtime,
+ the policy can be constrained based on LSM specific data.
+ Policies are loaded into the securityfs file ima/policy
+ by opening the file, writing the rules one at a time and
+ then closing the file. The new policy takes effect after
+ the file ima/policy is closed.
+
+ IMA appraisal, if configured, uses these file measurements
+ for local measurement appraisal.
+
+ rule format: action [condition ...]
+
+ action: measure | dont_measure | appraise | dont_appraise | audit
+ condition:= base | lsm [option]
+ base: [[func=] [mask=] [fsmagic=] [fsuuid=] [uid=]
+ [euid=] [fowner=]]
+ lsm: [[subj_user=] [subj_role=] [subj_type=]
+ [obj_user=] [obj_role=] [obj_type=]]
+ option: [[appraise_type=]] [permit_directio]
+
+ base: func:= [BPRM_CHECK][MMAP_CHECK][FILE_CHECK][MODULE_CHECK]
+ [FIRMWARE_CHECK]
+ mask:= [[^]MAY_READ] [[^]MAY_WRITE] [[^]MAY_APPEND]
+ [[^]MAY_EXEC]
+ fsmagic:= hex value
+ fsuuid:= file system UUID (e.g 8bcbe394-4f13-4144-be8e-5aa9ea2ce2f6)
+ uid:= decimal value
+ euid:= decimal value
+ fowner:=decimal value
+ lsm: are LSM specific
+ option: appraise_type:= [imasig]
+
+ default policy:
+ # PROC_SUPER_MAGIC
+ dont_measure fsmagic=0x9fa0
+ dont_appraise fsmagic=0x9fa0
+ # SYSFS_MAGIC
+ dont_measure fsmagic=0x62656572
+ dont_appraise fsmagic=0x62656572
+ # DEBUGFS_MAGIC
+ dont_measure fsmagic=0x64626720
+ dont_appraise fsmagic=0x64626720
+ # TMPFS_MAGIC
+ dont_measure fsmagic=0x01021994
+ dont_appraise fsmagic=0x01021994
+ # RAMFS_MAGIC
+ dont_appraise fsmagic=0x858458f6
+ # DEVPTS_SUPER_MAGIC
+ dont_measure fsmagic=0x1cd1
+ dont_appraise fsmagic=0x1cd1
+ # BINFMTFS_MAGIC
+ dont_measure fsmagic=0x42494e4d
+ dont_appraise fsmagic=0x42494e4d
+ # SECURITYFS_MAGIC
+ dont_measure fsmagic=0x73636673
+ dont_appraise fsmagic=0x73636673
+ # SELINUX_MAGIC
+ dont_measure fsmagic=0xf97cff8c
+ dont_appraise fsmagic=0xf97cff8c
+ # CGROUP_SUPER_MAGIC
+ dont_measure fsmagic=0x27e0eb
+ dont_appraise fsmagic=0x27e0eb
+ # NSFS_MAGIC
+ dont_measure fsmagic=0x6e736673
+ dont_appraise fsmagic=0x6e736673
+
+ measure func=BPRM_CHECK
+ measure func=FILE_MMAP mask=MAY_EXEC
+ measure func=FILE_CHECK mask=MAY_READ uid=0
+ measure func=MODULE_CHECK
+ measure func=FIRMWARE_CHECK
+ appraise fowner=0
+
+ The default policy measures all executables in bprm_check,
+ all files mmapped executable in file_mmap, and all files
+ open for read by root in do_filp_open. The default appraisal
+ policy appraises all files owned by root.
+
+ Examples of LSM specific definitions:
+
+ SELinux:
+ dont_measure obj_type=var_log_t
+ dont_appraise obj_type=var_log_t
+ dont_measure obj_type=auditd_log_t
+ dont_appraise obj_type=auditd_log_t
+ measure subj_user=system_u func=FILE_CHECK mask=MAY_READ
+ measure subj_role=system_r func=FILE_CHECK mask=MAY_READ
+
+ Smack:
+ measure subj_user=_ func=FILE_CHECK mask=MAY_READ
diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats
new file mode 100644
index 000000000..f91a973a3
--- /dev/null
+++ b/Documentation/ABI/testing/procfs-diskstats
@@ -0,0 +1,22 @@
+What: /proc/diskstats
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /proc/diskstats file displays the I/O statistics
+ of block devices. Each line contains the following 14
+ fields:
+ 1 - major number
+ 2 - minor mumber
+ 3 - device name
+ 4 - reads completed successfully
+ 5 - reads merged
+ 6 - sectors read
+ 7 - time spent reading (ms)
+ 8 - writes completed
+ 9 - writes merged
+ 10 - sectors written
+ 11 - time spent writing (ms)
+ 12 - I/Os currently in progress
+ 13 - time spent doing I/Os (ms)
+ 14 - weighted time spent doing I/Os (ms)
+ For more details refer to Documentation/iostats.txt
diff --git a/Documentation/ABI/testing/pstore b/Documentation/ABI/testing/pstore
new file mode 100644
index 000000000..5fca9f5e1
--- /dev/null
+++ b/Documentation/ABI/testing/pstore
@@ -0,0 +1,47 @@
+Where: /sys/fs/pstore/... (or /dev/pstore/...)
+Date: March 2011
+Kernel Version: 2.6.39
+Contact: tony.luck@intel.com
+Description: Generic interface to platform dependent persistent storage.
+
+ Platforms that provide a mechanism to preserve some data
+ across system reboots can register with this driver to
+ provide a generic interface to show records captured in
+ the dying moments. In the case of a panic the last part
+ of the console log is captured, but other interesting
+ data can also be saved.
+
+ # mount -t pstore -o kmsg_bytes=8000 - /sys/fs/pstore
+
+ $ ls -l /sys/fs/pstore/
+ total 0
+ -r--r--r-- 1 root root 7896 Nov 30 15:38 dmesg-erst-1
+
+ Different users of this interface will result in different
+ filename prefixes. Currently two are defined:
+
+ "dmesg" - saved console log
+ "mce" - architecture dependent data from fatal h/w error
+
+ Once the information in a file has been read, removing
+ the file will signal to the underlying persistent storage
+ device that it can reclaim the space for later re-use.
+
+ $ rm /sys/fs/pstore/dmesg-erst-1
+
+ The expectation is that all files in /sys/fs/pstore/
+ will be saved elsewhere and erased from persistent store
+ soon after boot to free up space ready for the next
+ catastrophe.
+
+ The 'kmsg_bytes' mount option changes the target amount of
+ data saved on each oops/panic. Pstore saves (possibly
+ multiple) files based on the record size of the underlying
+ persistent storage until at least this amount is reached.
+ Default is 10 Kbytes.
+
+ Pstore only supports one backend at a time. If multiple
+ backends are available, the preferred backend may be
+ set by passing the pstore.backend= argument to the kernel at
+ boot time.
+
diff --git a/Documentation/ABI/testing/sysfs-ata b/Documentation/ABI/testing/sysfs-ata
new file mode 100644
index 000000000..9231daef3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ata
@@ -0,0 +1,110 @@
+What: /sys/class/ata_...
+Date: August 2008
+Contact: Gwendal Grignou<gwendal@google.com>
+Description:
+
+Provide a place in sysfs for storing the ATA topology of the system. This allows
+retrieving various information about ATA objects.
+
+Files under /sys/class/ata_port
+-------------------------------
+
+ For each port, a directory ataX is created where X is the ata_port_id of
+ the port. The device parent is the ata host device.
+
+idle_irq (read)
+
+ Number of IRQ received by the port while idle [some ata HBA only].
+
+nr_pmp_links (read)
+
+ If a SATA Port Multiplier (PM) is connected, number of link behind it.
+
+Files under /sys/class/ata_link
+-------------------------------
+
+ Behind each port, there is a ata_link. If there is a SATA PM in the
+ topology, 15 ata_link objects are created.
+
+ If a link is behind a port, the directory name is linkX, where X is
+ ata_port_id of the port.
+ If a link is behind a PM, its name is linkX.Y where X is ata_port_id
+ of the parent port and Y the PM port.
+
+hw_sata_spd_limit
+
+ Maximum speed supported by the connected SATA device.
+
+sata_spd_limit
+
+ Maximum speed imposed by libata.
+
+sata_spd
+
+ Current speed of the link [1.5, 3Gps,...].
+
+Files under /sys/class/ata_device
+---------------------------------
+
+ Behind each link, up to two ata device are created.
+ The name of the directory is devX[.Y].Z where:
+ - X is ata_port_id of the port where the device is connected,
+ - Y the port of the PM if any, and
+ - Z the device id: for PATA, there is usually 2 devices [0,1],
+ only 1 for SATA.
+
+class
+ Device class. Can be "ata" for disk, "atapi" for packet device,
+ "pmp" for PM, or "none" if no device was found behind the link.
+
+dma_mode
+
+ Transfer modes supported by the device when in DMA mode.
+ Mostly used by PATA device.
+
+pio_mode
+
+ Transfer modes supported by the device when in PIO mode.
+ Mostly used by PATA device.
+
+xfer_mode
+
+ Current transfer mode.
+
+id
+
+ Cached result of IDENTIFY command, as described in ATA8 7.16 and 7.17.
+ Only valid if the device is not a PM.
+
+gscr
+
+ Cached result of the dump of PM GSCR register.
+ Valid registers are:
+ 0: SATA_PMP_GSCR_PROD_ID,
+ 1: SATA_PMP_GSCR_REV,
+ 2: SATA_PMP_GSCR_PORT_INFO,
+ 32: SATA_PMP_GSCR_ERROR,
+ 33: SATA_PMP_GSCR_ERROR_EN,
+ 64: SATA_PMP_GSCR_FEAT,
+ 96: SATA_PMP_GSCR_FEAT_EN,
+ 130: SATA_PMP_GSCR_SII_GPIO
+ Only valid if the device is a PM.
+
+trim
+
+ Shows the DSM TRIM mode currently used by the device. Valid
+ values are:
+ unsupported: Drive does not support DSM TRIM
+ unqueued: Drive supports unqueued DSM TRIM only
+ queued: Drive supports queued DSM TRIM
+ forced_unqueued: Drive's unqueued DSM support is known to be
+ buggy and only unqueued TRIM commands
+ are sent
+
+spdn_cnt
+
+ Number of time libata decided to lower the speed of link due to errors.
+
+ering
+
+ Formatted output of the error ring of the device.
diff --git a/Documentation/ABI/testing/sysfs-aufs b/Documentation/ABI/testing/sysfs-aufs
new file mode 100644
index 000000000..82f951849
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-aufs
@@ -0,0 +1,31 @@
+What: /sys/fs/aufs/si_<id>/
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ Under /sys/fs/aufs, a directory named si_<id> is created
+ per aufs mount, where <id> is a unique id generated
+ internally.
+
+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It shows the abolute path of a member directory (which
+ is called branch) in aufs, and its permission.
+
+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
+Date: July 2013
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It shows the id of a member directory (which is called
+ branch) in aufs.
+
+What: /sys/fs/aufs/si_<id>/xi_path
+Date: March 2009
+Contact: J. R. Okajima <hooanon05g@gmail.com>
+Description:
+ It shows the abolute path of XINO (External Inode Number
+ Bitmap, Translation Table and Generation Table) file
+ even if it is the default path.
+ When the aufs mount option 'noxino' is specified, it
+ will be empty. About XINO files, see the aufs manual.
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
new file mode 100644
index 000000000..8df003963
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block
@@ -0,0 +1,230 @@
+What: /sys/block/<disk>/stat
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /sys/block/<disk>/stat files displays the I/O
+ statistics of disk <disk>. They contain 11 fields:
+ 1 - reads completed successfully
+ 2 - reads merged
+ 3 - sectors read
+ 4 - time spent reading (ms)
+ 5 - writes completed
+ 6 - writes merged
+ 7 - sectors written
+ 8 - time spent writing (ms)
+ 9 - I/Os currently in progress
+ 10 - time spent doing I/Os (ms)
+ 11 - weighted time spent doing I/Os (ms)
+ For more details refer Documentation/iostats.txt
+
+
+What: /sys/block/<disk>/<part>/stat
+Date: February 2008
+Contact: Jerome Marchand <jmarchan@redhat.com>
+Description:
+ The /sys/block/<disk>/<part>/stat files display the
+ I/O statistics of partition <part>. The format is the
+ same as the above-written /sys/block/<disk>/stat
+ format.
+
+
+What: /sys/block/<disk>/integrity/format
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Metadata format for integrity capable block device.
+ E.g. T10-DIF-TYPE1-CRC.
+
+
+What: /sys/block/<disk>/integrity/read_verify
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether the block layer should verify the
+ integrity of read requests serviced by devices that
+ support sending integrity metadata.
+
+
+What: /sys/block/<disk>/integrity/tag_size
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Number of bytes of integrity tag space available per
+ 512 bytes of data.
+
+
+What: /sys/block/<disk>/integrity/device_is_integrity_capable
+Date: July 2014
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether a storage device is capable of storing
+ integrity metadata. Set if the device is T10 PI-capable.
+
+
+What: /sys/block/<disk>/integrity/write_generate
+Date: June 2008
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Indicates whether the block layer should automatically
+ generate checksums for write requests bound for
+ devices that support receiving integrity metadata.
+
+What: /sys/block/<disk>/alignment_offset
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Storage devices may report a physical block size that is
+ bigger than the logical block size (for instance a drive
+ with 4KB physical sectors exposing 512-byte logical
+ blocks to the operating system). This parameter
+ indicates how many bytes the beginning of the device is
+ offset from the disk's natural alignment.
+
+What: /sys/block/<disk>/<partition>/alignment_offset
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Storage devices may report a physical block size that is
+ bigger than the logical block size (for instance a drive
+ with 4KB physical sectors exposing 512-byte logical
+ blocks to the operating system). This parameter
+ indicates how many bytes the beginning of the partition
+ is offset from the disk's natural alignment.
+
+What: /sys/block/<disk>/queue/logical_block_size
+Date: May 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ This is the smallest unit the storage device can
+ address. It is typically 512 bytes.
+
+What: /sys/block/<disk>/queue/physical_block_size
+Date: May 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ This is the smallest unit a physical storage device can
+ write atomically. It is usually the same as the logical
+ block size but may be bigger. One example is SATA
+ drives with 4KB sectors that expose a 512-byte logical
+ block size to the operating system. For stacked block
+ devices the physical_block_size variable contains the
+ maximum physical_block_size of the component devices.
+
+What: /sys/block/<disk>/queue/minimum_io_size
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Storage devices may report a granularity or preferred
+ minimum I/O size which is the smallest request the
+ device can perform without incurring a performance
+ penalty. For disk drives this is often the physical
+ block size. For RAID arrays it is often the stripe
+ chunk size. A properly aligned multiple of
+ minimum_io_size is the preferred request size for
+ workloads where a high number of I/O operations is
+ desired.
+
+What: /sys/block/<disk>/queue/optimal_io_size
+Date: April 2009
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Storage devices may report an optimal I/O size, which is
+ the device's preferred unit for sustained I/O. This is
+ rarely reported for disk drives. For RAID arrays it is
+ usually the stripe width or the internal track size. A
+ properly aligned multiple of optimal_io_size is the
+ preferred request size for workloads where sustained
+ throughput is desired. If no optimal I/O size is
+ reported this file contains 0.
+
+What: /sys/block/<disk>/queue/nomerges
+Date: January 2010
+Contact:
+Description:
+ Standard I/O elevator operations include attempts to
+ merge contiguous I/Os. For known random I/O loads these
+ attempts will always fail and result in extra cycles
+ being spent in the kernel. This allows one to turn off
+ this behavior on one of two ways: When set to 1, complex
+ merge checks are disabled, but the simple one-shot merges
+ with the previous I/O request are enabled. When set to 2,
+ all merge tries are disabled. The default value is 0 -
+ which enables all types of merge tries.
+
+What: /sys/block/<disk>/discard_alignment
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may
+ internally allocate space in units that are bigger than
+ the exported logical block size. The discard_alignment
+ parameter indicates how many bytes the beginning of the
+ device is offset from the internal allocation unit's
+ natural alignment.
+
+What: /sys/block/<disk>/<partition>/discard_alignment
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may
+ internally allocate space in units that are bigger than
+ the exported logical block size. The discard_alignment
+ parameter indicates how many bytes the beginning of the
+ partition is offset from the internal allocation unit's
+ natural alignment.
+
+What: /sys/block/<disk>/queue/discard_granularity
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may
+ internally allocate space using units that are bigger
+ than the logical block size. The discard_granularity
+ parameter indicates the size of the internal allocation
+ unit in bytes if reported by the device. Otherwise the
+ discard_granularity will be set to match the device's
+ physical block size. A discard_granularity of 0 means
+ that the device does not support discard functionality.
+
+What: /sys/block/<disk>/queue/discard_max_bytes
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may have
+ internal limits on the number of bytes that can be
+ trimmed or unmapped in a single operation. Some storage
+ protocols also have inherent limits on the number of
+ blocks that can be described in a single command. The
+ discard_max_bytes parameter is set by the device driver
+ to the maximum number of bytes that can be discarded in
+ a single operation. Discard requests issued to the
+ device must not exceed this limit. A discard_max_bytes
+ value of 0 means that the device does not support
+ discard functionality.
+
+What: /sys/block/<disk>/queue/discard_zeroes_data
+Date: May 2011
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Devices that support discard functionality may return
+ stale or random data when a previously discarded block
+ is read back. This can cause problems if the filesystem
+ expects discarded blocks to be explicitly cleared. If a
+ device reports that it deterministically returns zeroes
+ when a discarded area is read the discard_zeroes_data
+ parameter will be set to one. Otherwise it will be 0 and
+ the result of reading a discarded area is undefined.
+
+What: /sys/block/<disk>/queue/write_same_max_bytes
+Date: January 2012
+Contact: Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+ Some devices support a write same operation in which a
+ single data block can be written to a range of several
+ contiguous blocks on storage. This can be used to wipe
+ areas on disk or to initialize drives in a RAID
+ configuration. write_same_max_bytes indicates how many
+ bytes can be written in a single write same command. If
+ write_same_max_bytes is 0, write same is not supported
+ by the device.
+
diff --git a/Documentation/ABI/testing/sysfs-block-bcache b/Documentation/ABI/testing/sysfs-block-bcache
new file mode 100644
index 000000000..9e4bbc5d5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-bcache
@@ -0,0 +1,156 @@
+What: /sys/block/<disk>/bcache/unregister
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ A write to this file causes the backing device or cache to be
+ unregistered. If a backing device had dirty data in the cache,
+ writeback mode is automatically disabled and all dirty data is
+ flushed before the device is unregistered. Caches unregister
+ all associated backing devices before unregistering themselves.
+
+What: /sys/block/<disk>/bcache/clear_stats
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ Writing to this file resets all the statistics for the device.
+
+What: /sys/block/<disk>/bcache/cache
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a backing device that has cache, a symlink to
+ the bcache/ dir of that cache.
+
+What: /sys/block/<disk>/bcache/cache_hits
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: integer number of full cache hits,
+ counted per bio. A partial cache hit counts as a miss.
+
+What: /sys/block/<disk>/bcache/cache_misses
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: integer number of cache misses.
+
+What: /sys/block/<disk>/bcache/cache_hit_ratio
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: cache hits as a percentage.
+
+What: /sys/block/<disk>/bcache/sequential_cutoff
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: Threshold past which sequential IO will
+ skip the cache. Read and written as bytes in human readable
+ units (i.e. echo 10M > sequntial_cutoff).
+
+What: /sys/block/<disk>/bcache/bypassed
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ Sum of all reads and writes that have bypassed the cache (due
+ to the sequential cutoff). Expressed as bytes in human
+ readable units.
+
+What: /sys/block/<disk>/bcache/writeback
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: When on, writeback caching is enabled and
+ writes will be buffered in the cache. When off, caching is in
+ writethrough mode; reads and writes will be added to the
+ cache but no write buffering will take place.
+
+What: /sys/block/<disk>/bcache/writeback_running
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: when off, dirty data will not be written
+ from the cache to the backing device. The cache will still be
+ used to buffer writes until it is mostly full, at which point
+ writes transparently revert to writethrough mode. Intended only
+ for benchmarking/testing.
+
+What: /sys/block/<disk>/bcache/writeback_delay
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: In writeback mode, when dirty data is
+ written to the cache and the cache held no dirty data for that
+ backing device, writeback from cache to backing device starts
+ after this delay, expressed as an integer number of seconds.
+
+What: /sys/block/<disk>/bcache/writeback_percent
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For backing devices: If nonzero, writeback from cache to
+ backing device only takes place when more than this percentage
+ of the cache is used, allowing more write coalescing to take
+ place and reducing total number of writes sent to the backing
+ device. Integer between 0 and 40.
+
+What: /sys/block/<disk>/bcache/synchronous
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, a boolean that allows synchronous mode to be
+ switched on and off. In synchronous mode all writes are ordered
+ such that the cache can reliably recover from unclean shutdown;
+ if disabled bcache will not generally wait for writes to
+ complete but if the cache is not shut down cleanly all data
+ will be discarded from the cache. Should not be turned off with
+ writeback caching enabled.
+
+What: /sys/block/<disk>/bcache/discard
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, a boolean allowing discard/TRIM to be turned off
+ or back on if the device supports it.
+
+What: /sys/block/<disk>/bcache/bucket_size
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, bucket size in human readable units, as set at
+ cache creation time; should match the erase block size of the
+ SSD for optimal performance.
+
+What: /sys/block/<disk>/bcache/nbuckets
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, the number of usable buckets.
+
+What: /sys/block/<disk>/bcache/tree_depth
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, height of the btree excluding leaf nodes (i.e. a
+ one node tree will have a depth of 0).
+
+What: /sys/block/<disk>/bcache/btree_cache_size
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ Number of btree buckets/nodes that are currently cached in
+ memory; cache dynamically grows and shrinks in response to
+ memory pressure from the rest of the system.
+
+What: /sys/block/<disk>/bcache/written
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, total amount of data in human readable units
+ written to the cache, excluding all metadata.
+
+What: /sys/block/<disk>/bcache/btree_written
+Date: November 2010
+Contact: Kent Overstreet <kent.overstreet@gmail.com>
+Description:
+ For a cache, sum of all btree writes in human readable units.
diff --git a/Documentation/ABI/testing/sysfs-block-dm b/Documentation/ABI/testing/sysfs-block-dm
new file mode 100644
index 000000000..f9f2339b9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-dm
@@ -0,0 +1,47 @@
+What: /sys/block/dm-<num>/dm/name
+Date: January 2009
+KernelVersion: 2.6.29
+Contact: dm-devel@redhat.com
+Description: Device-mapper device name.
+ Read-only string containing mapped device name.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/uuid
+Date: January 2009
+KernelVersion: 2.6.29
+Contact: dm-devel@redhat.com
+Description: Device-mapper device UUID.
+ Read-only string containing DM-UUID or empty string
+ if DM-UUID is not set.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/suspended
+Date: June 2009
+KernelVersion: 2.6.31
+Contact: dm-devel@redhat.com
+Description: Device-mapper device suspend state.
+ Contains the value 1 while the device is suspended.
+ Otherwise it contains 0. Read-only attribute.
+Users: util-linux, device-mapper udev rules
+
+What: /sys/block/dm-<num>/dm/rq_based_seq_io_merge_deadline
+Date: March 2015
+KernelVersion: 4.1
+Contact: dm-devel@redhat.com
+Description: Allow control over how long a request that is a
+ reasonable merge candidate can be queued on the request
+ queue. The resolution of this deadline is in
+ microseconds (ranging from 1 to 100000 usecs).
+ Setting this attribute to 0 (the default) will disable
+ request-based DM's merge heuristic and associated extra
+ accounting. This attribute is not applicable to
+ bio-based DM devices so it will only ever report 0 for
+ them.
+
+What: /sys/block/dm-<num>/dm/use_blk_mq
+Date: March 2015
+KernelVersion: 4.1
+Contact: dm-devel@redhat.com
+Description: Request-based Device-mapper blk-mq I/O path mode.
+ Contains the value 1 if the device is using blk-mq.
+ Otherwise it contains 0. Read-only attribute.
diff --git a/Documentation/ABI/testing/sysfs-block-rssd b/Documentation/ABI/testing/sysfs-block-rssd
new file mode 100644
index 000000000..beef30c04
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-rssd
@@ -0,0 +1,5 @@
+What: /sys/block/rssd*/status
+Date: April 2012
+KernelVersion: 3.4
+Contact: Asai Thambi S P <asamymuthupa@micron.com>
+Description: This is a read-only file. Indicates the status of the device.
diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
new file mode 100644
index 000000000..2e69e83bf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -0,0 +1,168 @@
+What: /sys/block/zram<id>/disksize
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The disksize file is read-write and specifies the disk size
+ which represents the limit on the *uncompressed* worth of data
+ that can be stored in this disk.
+ Unit: bytes
+
+What: /sys/block/zram<id>/initstate
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The initstate file is read-only and shows the initialization
+ state of the device.
+
+What: /sys/block/zram<id>/reset
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The reset file is write-only and allows resetting the
+ device. The reset operation frees all the memory associated
+ with this device.
+
+What: /sys/block/zram<id>/num_reads
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The num_reads file is read-only and specifies the number of
+ reads (failed or successful) done on this device.
+
+What: /sys/block/zram<id>/num_writes
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The num_writes file is read-only and specifies the number of
+ writes (failed or successful) done on this device.
+
+What: /sys/block/zram<id>/invalid_io
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The invalid_io file is read-only and specifies the number of
+ non-page-size-aligned I/O requests issued to this device.
+
+What: /sys/block/zram<id>/failed_reads
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The failed_reads file is read-only and specifies the number of
+ failed reads happened on this device.
+
+What: /sys/block/zram<id>/failed_writes
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The failed_writes file is read-only and specifies the number of
+ failed writes happened on this device.
+
+What: /sys/block/zram<id>/max_comp_streams
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The max_comp_streams file is read-write and specifies the
+ number of backend's zcomp_strm compression streams (number of
+ concurrent compress operations).
+
+What: /sys/block/zram<id>/comp_algorithm
+Date: February 2014
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The comp_algorithm file is read-write and lets to show
+ available and selected compression algorithms, change
+ compression algorithm selection.
+
+What: /sys/block/zram<id>/notify_free
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The notify_free file is read-only. Depending on device usage
+ scenario it may account a) the number of pages freed because
+ of swap slot free notifications or b) the number of pages freed
+ because of REQ_DISCARD requests sent by bio. The former ones
+ are sent to a swap block device when a swap slot is freed, which
+ implies that this disk is being used as a swap disk. The latter
+ ones are sent by filesystem mounted with discard option,
+ whenever some data blocks are getting discarded.
+
+What: /sys/block/zram<id>/zero_pages
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The zero_pages file is read-only and specifies number of zero
+ filled pages written to this disk. No memory is allocated for
+ such pages.
+
+What: /sys/block/zram<id>/orig_data_size
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The orig_data_size file is read-only and specifies uncompressed
+ size of data stored in this disk. This excludes zero-filled
+ pages (zero_pages) since no memory is allocated for them.
+ Unit: bytes
+
+What: /sys/block/zram<id>/compr_data_size
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The compr_data_size file is read-only and specifies compressed
+ size of data stored in this disk. So, compression ratio can be
+ calculated using orig_data_size and this statistic.
+ Unit: bytes
+
+What: /sys/block/zram<id>/mem_used_total
+Date: August 2010
+Contact: Nitin Gupta <ngupta@vflare.org>
+Description:
+ The mem_used_total file is read-only and specifies the amount
+ of memory, including allocator fragmentation and metadata
+ overhead, allocated for this disk. So, allocator space
+ efficiency can be calculated using compr_data_size and this
+ statistic.
+ Unit: bytes
+
+What: /sys/block/zram<id>/mem_used_max
+Date: August 2014
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The mem_used_max file is read/write and specifies the amount
+ of maximum memory zram have consumed to store compressed data.
+ For resetting the value, you should write "0". Otherwise,
+ you could see -EINVAL.
+ Unit: bytes
+
+What: /sys/block/zram<id>/mem_limit
+Date: August 2014
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The mem_limit file is read/write and specifies the maximum
+ amount of memory ZRAM can use to store the compressed data. The
+ limit could be changed in run time and "0" means disable the
+ limit. No limit is the initial state. Unit: bytes
+
+What: /sys/block/zram<id>/compact
+Date: August 2015
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ The compact file is write-only and trigger compaction for
+ allocator zrm uses. The allocator moves some objects so that
+ it could free fragment space.
+
+What: /sys/block/zram<id>/io_stat
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The io_stat file is read-only and accumulates device's I/O
+ statistics not accounted by block layer. For example,
+ failed_reads, failed_writes, etc. File format is similar to
+ block layer statistics file format.
+
+What: /sys/block/zram<id>/mm_stat
+Date: August 2015
+Contact: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Description:
+ The mm_stat file is read-only and represents device's mm
+ statistics (orig_data_size, compr_data_size, etc.) in a format
+ similar to block layer statistics file format.
diff --git a/Documentation/ABI/testing/sysfs-bus-acpi b/Documentation/ABI/testing/sysfs-bus-acpi
new file mode 100644
index 000000000..7fa9cbc75
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-acpi
@@ -0,0 +1,58 @@
+What: /sys/bus/acpi/devices/.../path
+Date: December 2006
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ This attribute indicates the full path of ACPI namespace
+ object associated with the device object. For example,
+ \_SB_.PCI0.
+ This file is not present for device objects representing
+ fixed ACPI hardware features (like power and sleep
+ buttons).
+
+What: /sys/bus/acpi/devices/.../modalias
+Date: July 2007
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ This attribute indicates the PNP IDs of the device object.
+ That is acpi:HHHHHHHH:[CCCCCCC:]. Where each HHHHHHHH or
+ CCCCCCCC contains device object's PNPID (_HID or _CID).
+
+What: /sys/bus/acpi/devices/.../hid
+Date: April 2005
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ This attribute indicates the hardware ID (_HID) of the
+ device object. For example, PNP0103.
+ This file is present for device objects having the _HID
+ control method.
+
+What: /sys/bus/acpi/devices/.../description
+Date: October 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ This attribute contains the output of the device object's
+ _STR control method, if present.
+
+What: /sys/bus/acpi/devices/.../adr
+Date: October 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ This attribute contains the output of the device object's
+ _ADR control method, which is present for ACPI device
+ objects representing devices having standard enumeration
+ algorithms, such as PCI.
+
+What: /sys/bus/acpi/devices/.../uid
+Date: October 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ This attribute contains the output of the device object's
+ _UID control method, if present.
+
+What: /sys/bus/acpi/devices/.../eject
+Date: December 2006
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ Writing 1 to this attribute will trigger hot removal of
+ this device object. This file exists for every device
+ object that has _EJ0 method.
diff --git a/Documentation/ABI/testing/sysfs-bus-amba b/Documentation/ABI/testing/sysfs-bus-amba
new file mode 100644
index 000000000..e7b54677c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-amba
@@ -0,0 +1,20 @@
+What: /sys/bus/amba/devices/.../driver_override
+Date: September 2014
+Contact: Antonios Motakis <a.motakis@virtualopensystems.com>
+Description:
+ This file allows the driver for a device to be specified which
+ will override standard OF, ACPI, ID table, and name matching.
+ When specified, only a driver with a name matching the value
+ written to driver_override will have an opportunity to bind to
+ the device. The override is specified by writing a string to the
+ driver_override file (echo vfio-amba > driver_override) and may
+ be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device will
+ not bind to any driver. This also allows devices to opt-out of
+ driver binding using a driver_override name such as "none".
+ Only a single driver may be specified in the override, there is
+ no support for parsing delimiters.
diff --git a/Documentation/ABI/testing/sysfs-bus-bcma b/Documentation/ABI/testing/sysfs-bus-bcma
new file mode 100644
index 000000000..721b4aea3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-bcma
@@ -0,0 +1,31 @@
+What: /sys/bus/bcma/devices/.../manuf
+Date: May 2011
+KernelVersion: 3.0
+Contact: Rafał Miłecki <zajec5@gmail.com>
+Description:
+ Each BCMA core has it's manufacturer id. See
+ include/linux/bcma/bcma.h for possible values.
+
+What: /sys/bus/bcma/devices/.../id
+Date: May 2011
+KernelVersion: 3.0
+Contact: Rafał Miłecki <zajec5@gmail.com>
+Description:
+ There are a few types of BCMA cores, they can be identified by
+ id field.
+
+What: /sys/bus/bcma/devices/.../rev
+Date: May 2011
+KernelVersion: 3.0
+Contact: Rafał Miłecki <zajec5@gmail.com>
+Description:
+ BCMA cores of the same type can still slightly differ depending
+ on their revision. Use it for detailed programming.
+
+What: /sys/bus/bcma/devices/.../class
+Date: May 2011
+KernelVersion: 3.0
+Contact: Rafał Miłecki <zajec5@gmail.com>
+Description:
+ Each BCMA core is identified by few fields, including class it
+ belongs to. See include/linux/bcma/bcma.h for possible values.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10 b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
new file mode 100644
index 000000000..4b8d6ec92
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etb10
@@ -0,0 +1,24 @@
+What: /sys/bus/coresight/devices/<memory_map>.etb/enable_sink
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Add/remove a sink from a trace path. There can be multiple
+ source for a single sink.
+ ex: echo 1 > /sys/bus/coresight/devices/20010000.etb/enable_sink
+
+What: /sys/bus/coresight/devices/<memory_map>.etb/status
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (R) List various control and status registers. The specific
+ layout and content is driver specific.
+
+What: /sys/bus/coresight/devices/<memory_map>.etb/trigger_cntr
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Disables write access to the Trace RAM by stopping the
+ formatter after a defined number of words have been stored
+ following the trigger event. The number of 32-bit words written
+ into the Trace RAM following the trigger event is equal to the
+ value stored in this register+1 (from ARM ETB-TRM).
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
new file mode 100644
index 000000000..b4d0b99af
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-etm3x
@@ -0,0 +1,253 @@
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/enable_source
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Enable/disable tracing on this specific trace entiry.
+ Enabling a source implies the source has been configured
+ properly and a sink has been identidifed for it. The path
+ of coresight components linking the source to the sink is
+ configured and managed automatically by the coresight framework.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/status
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (R) List various control and status registers. The specific
+ layout and content is driver specific.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_idx
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: Select which address comparator or pair (of comparators) to
+ work with.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_acctype
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with @addr_idx. Specifies
+ characteristics about the address comparator being configure,
+ for example the access type, the kind of instruction to trace,
+ processor contect ID to trigger on, etc. Individual fields in
+ the access type register may vary on the version of the trace
+ entity.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_range
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with @addr_idx. Specifies the range of
+ addresses to trigger on. Inclusion or exclusion is specificed
+ in the corresponding access type register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_single
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with @addr_idx. Specifies the single
+ address to trigger on, highly influenced by the configuration
+ options of the corresponding access type register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_start
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with @addr_idx. Specifies the single
+ address to start tracing on, highly influenced by the
+ configuration options of the corresponding access type register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/addr_stop
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with @addr_idx. Specifies the single
+ address to stop tracing on, highly influenced by the
+ configuration options of the corresponding access type register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/cntr_idx
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Specifies the counter to work on.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/cntr_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with cntr_idx, give access to the
+ counter event register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/cntr_val
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with cntr_idx, give access to the
+ counter value register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/cntr_rld_val
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with cntr_idx, give access to the
+ counter reload value register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/cntr_rld_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used in conjunction with cntr_idx, give access to the
+ counter reload event register.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/ctxid_idx
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Specifies the index of the context ID register to be
+ selected.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/ctxid_mask
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Mask to apply to all the context ID comparator.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/ctxid_val
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Used with the ctxid_idx, specify with context ID to trigger
+ on.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/enable_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines which event triggers a trace.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/etmsr
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Gives access to the ETM status register, which holds
+ programming information and status on certains events.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/fifofull_level
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Number of byte left in the fifo before considering it full.
+ Depending on the tracer's version, can also hold threshold for
+ data suppression.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/mode
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Interface with the driver's 'mode' field, controlling
+ various aspect of the trace entity such as time stamping,
+ context ID size and cycle accurate tracing. Driver specific
+ and bound to change depending on the driver.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/nr_addr_cmp
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (R) Provides the number of address comparators pairs accessible
+ on a trace unit, as specified by bit 3:0 of register ETMCCR.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/nr_cntr
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (R) Provides the number of counters accessible on a trace unit,
+ as specified by bit 15:13 of register ETMCCR.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/nr_ctxid_cmp
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (R) Provides the number of context ID comparator available on a
+ trace unit, as specified by bit 25:24 of register ETMCCR.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/reset
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (W) Cancels all configuration on a trace unit and set it back
+ to its boot configuration.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_12_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines the event that causes the sequencer to transition
+ from state 1 to state 2.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_13_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines the event that causes the sequencer to transition
+ from state 1 to state 3.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_21_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines the event that causes the sequencer to transition
+ from state 2 to state 1.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_23_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines the event that causes the sequencer to transition
+ from state 2 to state 3.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_31_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines the event that causes the sequencer to transition
+ from state 3 to state 1.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/seq_32_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines the event that causes the sequencer to transition
+ from state 3 to state 2.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/curr_seq_state
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (R) Holds the current state of the sequencer.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/sync_freq
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Holds the trace synchronization frequency value - must be
+ programmed with the various implementation behavior in mind.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/timestamp_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines an event that requests the insertion of a timestamp
+ into the trace stream.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/traceid
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Holds the trace ID that will appear in the trace stream
+ coming from this trace entity.
+
+What: /sys/bus/coresight/devices/<memory_map>.[etm|ptm]/trigger_event
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Define the event that controls the trigger.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel b/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
new file mode 100644
index 000000000..d75acda5e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-funnel
@@ -0,0 +1,12 @@
+What: /sys/bus/coresight/devices/<memory_map>.funnel/funnel_ctrl
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Enables the slave ports and defines the hold time of the
+ slave ports.
+
+What: /sys/bus/coresight/devices/<memory_map>.funnel/priority
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Defines input port priority order.
diff --git a/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
new file mode 100644
index 000000000..f38cded5f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coresight-devices-tmc
@@ -0,0 +1,8 @@
+What: /sys/bus/coresight/devices/<memory_map>.tmc/trigger_cntr
+Date: November 2014
+KernelVersion: 3.19
+Contact: Mathieu Poirier <mathieu.poirier@linaro.org>
+Description: (RW) Disables write access to the Trace RAM by stopping the
+ formatter after a defined number of words have been stored
+ following the trigger event. Additional interface for this
+ driver are expected to be added as it matures.
diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css
new file mode 100644
index 000000000..2979c40c1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-css
@@ -0,0 +1,35 @@
+What: /sys/bus/css/devices/.../type
+Date: March 2008
+Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
+ linux-s390@vger.kernel.org
+Description: Contains the subchannel type, as reported by the hardware.
+ This attribute is present for all subchannel types.
+
+What: /sys/bus/css/devices/.../modalias
+Date: March 2008
+Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
+ linux-s390@vger.kernel.org
+Description: Contains the module alias as reported with uevents.
+ It is of the format css:t<type> and present for all
+ subchannel types.
+
+What: /sys/bus/css/drivers/io_subchannel/.../chpids
+Date: December 2002
+Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
+ linux-s390@vger.kernel.org
+Description: Contains the ids of the channel paths used by this
+ subchannel, as reported by the channel subsystem
+ during subchannel recognition.
+ Note: This is an I/O-subchannel specific attribute.
+Users: s390-tools, HAL
+
+What: /sys/bus/css/drivers/io_subchannel/.../pimpampom
+Date: December 2002
+Contact: Cornelia Huck <cornelia.huck@de.ibm.com>
+ linux-s390@vger.kernel.org
+Description: Contains the PIM/PAM/POM values, as reported by the
+ channel subsystem when last queried by the common I/O
+ layer (this implies that this attribute is not necessarily
+ in sync with the values current in the channel subsystem).
+ Note: This is an I/O-subchannel specific attribute.
+Users: s390-tools, HAL
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
new file mode 100644
index 000000000..505f080d2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -0,0 +1,94 @@
+What: /sys/devices/cpu/events/
+ /sys/devices/cpu/events/branch-misses
+ /sys/devices/cpu/events/cache-references
+ /sys/devices/cpu/events/cache-misses
+ /sys/devices/cpu/events/stalled-cycles-frontend
+ /sys/devices/cpu/events/branch-instructions
+ /sys/devices/cpu/events/stalled-cycles-backend
+ /sys/devices/cpu/events/instructions
+ /sys/devices/cpu/events/cpu-cycles
+
+Date: 2013/01/08
+
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+
+Description: Generic performance monitoring events
+
+ A collection of performance monitoring events that may be
+ supported by many/most CPUs. These events can be monitored
+ using the 'perf(1)' tool.
+
+ The contents of each file would look like:
+
+ event=0xNNNN
+
+ where 'N' is a hex digit and the number '0xNNNN' shows the
+ "raw code" for the perf event identified by the file's
+ "basename".
+
+
+What: /sys/bus/event_source/devices/<pmu>/events/<event>
+Date: 2014/02/24
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Per-pmu performance monitoring events specific to the running system
+
+ Each file (except for some of those with a '.' in them, '.unit'
+ and '.scale') in the 'events' directory describes a single
+ performance monitoring event supported by the <pmu>. The name
+ of the file is the name of the event.
+
+ File contents:
+
+ <term>[=<value>][,<term>[=<value>]]...
+
+ Where <term> is one of the terms listed under
+ /sys/bus/event_source/devices/<pmu>/format/ and <value> is
+ a number is base-16 format with a '0x' prefix (lowercase only).
+ If a <term> is specified alone (without an assigned value), it
+ is implied that 0x1 is assigned to that <term>.
+
+ Examples (each of these lines would be in a seperate file):
+
+ event=0x2abc
+ event=0x423,inv,cmask=0x3
+ domain=0x1,offset=0x8,starting_index=0xffff
+ domain=0x1,offset=0x8,core=?
+
+ Each of the assignments indicates a value to be assigned to a
+ particular set of bits (as defined by the format file
+ corresponding to the <term>) in the perf_event structure passed
+ to the perf_open syscall.
+
+ In the case of the last example, a value replacing "?" would
+ need to be provided by the user selecting the particular event.
+ This is referred to as "event parameterization". Event
+ parameters have the format 'param=?'.
+
+What: /sys/bus/event_source/devices/<pmu>/events/<event>.unit
+Date: 2014/02/24
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Perf event units
+
+ A string specifying the English plural numerical unit that <event>
+ (once multiplied by <event>.scale) represents.
+
+ Example:
+
+ Joules
+
+What: /sys/bus/event_source/devices/<pmu>/events/<event>.scale
+Date: 2014/02/24
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Perf event scaling factors
+
+ A string representing a floating point value expressed in
+ scientific notation to be multiplied by the event count
+ recieved from the kernel to match the unit specified in the
+ <event>.unit file.
+
+ Example:
+
+ 2.3283064365386962890625e-10
+
+ This is provided to avoid performing floating point arithmetic
+ in the kernel.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
new file mode 100644
index 000000000..77f47ff5e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format
@@ -0,0 +1,20 @@
+Where: /sys/bus/event_source/devices/<dev>/format
+Date: January 2012
+Kernel Version: 3.3
+Contact: Jiri Olsa <jolsa@redhat.com>
+Description:
+ Attribute group to describe the magic bits that go into
+ perf_event_attr::config[012] for a particular pmu.
+ Each attribute of this group defines the 'hardware' bitmask
+ we want to export, so that userspace can deal with sane
+ name/value pairs.
+
+ Userspace must be prepared for the possibility that attributes
+ define overlapping bit ranges. For example:
+ attr1 = 'config:0-23'
+ attr2 = 'config:0-7'
+ attr3 = 'config:12-35'
+
+ Example: 'config1:1,6-10,44'
+ Defines contents of attribute that occupies bits 1,6-10,44 of
+ perf_event_attr::config1.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
new file mode 100644
index 000000000..f89333757
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -0,0 +1,45 @@
+What: /sys/bus/event_source/devices/hv_24x7/interface/catalog
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ Provides access to the binary "24x7 catalog" provided by the
+ hypervisor on POWER7 and 8 systems. This catalog lists events
+ avaliable from the powerpc "hv_24x7" pmu. Its format is
+ documented here:
+ https://raw.githubusercontent.com/jmesmon/catalog-24x7/master/hv-24x7-catalog.h
+
+What: /sys/bus/event_source/devices/hv_24x7/interface/catalog_length
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ A number equal to the length in bytes of the catalog. This is
+ also extractable from the provided binary "catalog" sysfs entry.
+
+What: /sys/bus/event_source/devices/hv_24x7/interface/catalog_version
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ Exposes the "version" field of the 24x7 catalog. This is also
+ extractable from the provided binary "catalog" sysfs entry.
+
+What: /sys/bus/event_source/devices/hv_24x7/event_descs/<event-name>
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ Provides the description of a particular event as provided by
+ the firmware. If firmware does not provide a description, no
+ file will be created.
+
+ Note that the event-name lacks the domain suffix appended for
+ events in the events/ dir.
+
+What: /sys/bus/event_source/devices/hv_24x7/event_long_descs/<event-name>
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ Provides the "long" description of a particular event as
+ provided by the firmware. If firmware does not provide a
+ description, no file will be created.
+
+ Note that the event-name lacks the domain suffix appended for
+ events in the events/ dir.
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
new file mode 100644
index 000000000..3ca4e554d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci
@@ -0,0 +1,43 @@
+What: /sys/bus/event_source/devices/hv_gpci/interface/collect_privileged
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ '0' if the hypervisor is configured to forbid access to event
+ counters being accumulated by other guests and to physical
+ domain event counters.
+ '1' if that access is allowed.
+
+What: /sys/bus/event_source/devices/hv_gpci/interface/ga
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ 0 or 1. Indicates whether we have access to "GA" events (listed
+ in arch/powerpc/perf/hv-gpci.h).
+
+What: /sys/bus/event_source/devices/hv_gpci/interface/expanded
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ 0 or 1. Indicates whether we have access to "EXPANDED" events (listed
+ in arch/powerpc/perf/hv-gpci.h).
+
+What: /sys/bus/event_source/devices/hv_gpci/interface/lab
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ 0 or 1. Indicates whether we have access to "LAB" events (listed
+ in arch/powerpc/perf/hv-gpci.h).
+
+What: /sys/bus/event_source/devices/hv_gpci/interface/version
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ A number indicating the version of the gpci interface that the
+ hypervisor reports supporting.
+
+What: /sys/bus/event_source/devices/hv_gpci/interface/kernel_version
+Date: February 2014
+Contact: Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:
+ A number indicating the latest version of the gpci interface
+ that the kernel is aware of.
diff --git a/Documentation/ABI/testing/sysfs-bus-fcoe b/Documentation/ABI/testing/sysfs-bus-fcoe
new file mode 100644
index 000000000..21640eaad
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-fcoe
@@ -0,0 +1,116 @@
+What: /sys/bus/fcoe/
+Date: August 2012
+KernelVersion: TBD
+Contact: Robert Love <robert.w.love@intel.com>, devel@open-fcoe.org
+Description: The FCoE bus. Attributes in this directory are control interfaces.
+Attributes:
+
+ ctlr_create: 'FCoE Controller' instance creation interface. Writing an
+ <ifname> to this file will allocate and populate sysfs with a
+ fcoe_ctlr_device (ctlr_X). The user can then configure any
+ per-port settings and finally write to the fcoe_ctlr_device's
+ 'start' attribute to begin the kernel's discovery and login
+ process.
+
+ ctlr_destroy: 'FCoE Controller' instance removal interface. Writing a
+ fcoe_ctlr_device's sysfs name to this file will log the
+ fcoe_ctlr_device out of the fabric or otherwise connected
+ FCoE devices. It will also free all kernel memory allocated
+ for this fcoe_ctlr_device and any structures associated
+ with it, this includes the scsi_host.
+
+What: /sys/bus/fcoe/devices/ctlr_X
+Date: March 2012
+KernelVersion: TBD
+Contact: Robert Love <robert.w.love@intel.com>, devel@open-fcoe.org
+Description: 'FCoE Controller' instances on the fcoe bus.
+ The FCoE Controller now has a three stage creation process.
+ 1) Write interface name to ctlr_create 2) Configure the FCoE
+ Controller (ctlr_X) 3) Enable the FCoE Controller to begin
+ discovery and login. The FCoE Controller is destroyed by
+ writing it's name, i.e. ctlr_X to the ctlr_delete file.
+
+Attributes:
+
+ fcf_dev_loss_tmo: Device loss timeout peroid (see below). Changing
+ this value will change the dev_loss_tmo for all
+ FCFs discovered by this controller.
+
+ mode: Display or change the FCoE Controller's mode. Possible
+ modes are 'Fabric' and 'VN2VN'. If a FCoE Controller
+ is started in 'Fabric' mode then FIP FCF discovery is
+ initiated and ultimately a fabric login is attempted.
+ If a FCoE Controller is started in 'VN2VN' mode then
+ FIP VN2VN discovery and login is performed. A FCoE
+ Controller only supports one mode at a time.
+
+ enabled: Whether an FCoE controller is enabled or disabled.
+ 0 if disabled, 1 if enabled. Writing either 0 or 1
+ to this file will enable or disable the FCoE controller.
+
+ lesb/link_fail: Link Error Status Block (LESB) link failure count.
+
+ lesb/vlink_fail: Link Error Status Block (LESB) virtual link
+ failure count.
+
+ lesb/miss_fka: Link Error Status Block (LESB) missed FCoE
+ Initialization Protocol (FIP) Keep-Alives (FKA).
+
+ lesb/symb_err: Link Error Status Block (LESB) symbolic error count.
+
+ lesb/err_block: Link Error Status Block (LESB) block error count.
+
+ lesb/fcs_error: Link Error Status Block (LESB) Fibre Channel
+ Serivces error count.
+
+Notes: ctlr_X (global increment starting at 0)
+
+What: /sys/bus/fcoe/devices/fcf_X
+Date: March 2012
+KernelVersion: TBD
+Contact: Robert Love <robert.w.love@intel.com>, devel@open-fcoe.org
+Description: 'FCoE FCF' instances on the fcoe bus. A FCF is a Fibre Channel
+ Forwarder, which is a FCoE switch that can accept FCoE
+ (Ethernet) packets, unpack them, and forward the embedded
+ Fibre Channel frames into a FC fabric. It can also take
+ outbound FC frames and pack them in Ethernet packets to
+ be sent to their destination on the Ethernet segment.
+Attributes:
+
+ fabric_name: Identifies the fabric that the FCF services.
+
+ switch_name: Identifies the FCF.
+
+ priority: The switch's priority amongst other FCFs on the same
+ fabric.
+
+ selected: 1 indicates that the switch has been selected for use;
+ 0 indicates that the swich will not be used.
+
+ fc_map: The Fibre Channel MAP
+
+ vfid: The Virtual Fabric ID
+
+ mac: The FCF's MAC address
+
+ fka_peroid: The FIP Keep-Alive peroid
+
+ fabric_state: The internal kernel state
+ "Unknown" - Initialization value
+ "Disconnected" - No link to the FCF/fabric
+ "Connected" - Host is connected to the FCF
+ "Deleted" - FCF is being removed from the system
+
+ dev_loss_tmo: The device loss timeout peroid for this FCF.
+
+Notes: A device loss infrastructre similar to the FC Transport's
+ is present in fcoe_sysfs. It is nice to have so that a
+ link flapping adapter doesn't continually advance the count
+ used to identify the discovered FCF. FCFs will exist in a
+ "Disconnected" state until either the timer expires and the
+ FCF becomes "Deleted" or the FCF is rediscovered and becomes
+ "Connected."
+
+
+Users: The first user of this interface will be the fcoeadm application,
+ which is commonly packaged in the fcoe-utils package.
diff --git a/Documentation/ABI/testing/sysfs-bus-hsi b/Documentation/ABI/testing/sysfs-bus-hsi
new file mode 100644
index 000000000..1b1b282a9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-hsi
@@ -0,0 +1,19 @@
+What: /sys/bus/hsi
+Date: April 2012
+KernelVersion: 3.4
+Contact: Carlos Chinea <carlos.chinea@nokia.com>
+Description:
+ High Speed Synchronous Serial Interface (HSI) is a
+ serial interface mainly used for connecting application
+ engines (APE) with cellular modem engines (CMT) in cellular
+ handsets.
+ The bus will be populated with devices (hsi_clients) representing
+ the protocols available in the system. Bus drivers implement
+ those protocols.
+
+What: /sys/bus/hsi/devices/.../modalias
+Date: April 2012
+KernelVersion: 3.4
+Contact: Carlos Chinea <carlos.chinea@nokia.com>
+Description: Stores the same MODALIAS value emitted by uevent
+ Format: hsi:<hsi_client device name>
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
new file mode 100644
index 000000000..9de269bb0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-fsa9480
@@ -0,0 +1,21 @@
+What: /sys/bus/i2c/devices/.../device
+Date: February 2011
+Contact: Minkyu Kang <mk7.kang@samsung.com>
+Description:
+ show what device is attached
+ NONE - no device
+ USB - USB device is attached
+ UART - UART is attached
+ CHARGER - Charger is attaced
+ JIG - JIG is attached
+
+What: /sys/bus/i2c/devices/.../switch
+Date: February 2011
+Contact: Minkyu Kang <mk7.kang@samsung.com>
+Description:
+ show or set the state of manual switch
+ VAUDIO - switch to VAUDIO path
+ UART - switch to UART path
+ AUDIO - switch to AUDIO path
+ DHOST - switch to DHOST path
+ AUTO - switch automatically by device
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
new file mode 100644
index 000000000..feb2e4a87
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-hm6352
@@ -0,0 +1,21 @@
+Where: /sys/bus/i2c/devices/.../heading0_input
+Date: April 2010
+Kernel Version: 2.6.36?
+Contact: alan.cox@intel.com
+Description: Reports the current heading from the compass as a floating
+ point value in degrees.
+
+Where: /sys/bus/i2c/devices/.../power_state
+Date: April 2010
+Kernel Version: 2.6.36?
+Contact: alan.cox@intel.com
+Description: Sets the power state of the device. 0 sets the device into
+ sleep mode, 1 wakes it up.
+
+Where: /sys/bus/i2c/devices/.../calibration
+Date: April 2010
+Kernel Version: 2.6.36?
+Contact: alan.cox@intel.com
+Description: Sets the calibration on or off (1 = on, 0 = off). See the
+ chip data sheet.
+
diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533 b/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
new file mode 100644
index 000000000..1b62230b3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-lm3533
@@ -0,0 +1,15 @@
+What: /sys/bus/i2c/devices/.../output_hvled[n]
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the controlling backlight device for high-voltage current
+ sink HVLED[n] (n = 1, 2) (0, 1).
+
+What: /sys/bus/i2c/devices/.../output_lvled[n]
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the controlling led device for low-voltage current sink
+ LVLED[n] (n = 1..5) (0..3).
diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
new file mode 100644
index 000000000..1fbdd79d1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -0,0 +1,1364 @@
+What: /sys/bus/iio/devices/iio:deviceX
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Hardware chip or device accessed by one communication port.
+ Corresponds to a grouping of sensor channels. X is the IIO
+ index of the device.
+
+What: /sys/bus/iio/devices/triggerX
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ An event driven driver of data capture to an in kernel buffer.
+ May be provided by a device driver that also has an IIO device
+ based on hardware generated events (e.g. data ready) or
+ provided by a separate driver for other hardware (e.g.
+ periodic timer, GPIO or high resolution timer).
+ Contains trigger type specific elements. These do not
+ generalize well and hence are not documented in this file.
+ X is the IIO index of the trigger.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Directory of attributes relating to the buffer for the device.
+
+What: /sys/bus/iio/devices/iio:deviceX/name
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Description of the physical chip / device for device X.
+ Typically a part number.
+
+What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency
+What: /sys/bus/iio/devices/iio:deviceX/buffer/sampling_frequency
+What: /sys/bus/iio/devices/triggerX/sampling_frequency
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Some devices have internal clocks. This parameter sets the
+ resulting sampling frequency. In many devices this
+ parameter has an effect on input filters etc. rather than
+ simply controlling when the input is sampled. As this
+ effects data ready triggers, hardware buffers and the sysfs
+ direct access interfaces, it may be found in any of the
+ relevant directories. If it effects all of the above
+ then it is to be found in the base device directory.
+
+What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency_available
+What: /sys/.../iio:deviceX/buffer/sampling_frequency_available
+What: /sys/bus/iio/devices/triggerX/sampling_frequency_available
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ When the internal sampling clock can only take a small
+ discrete set of values, this file lists those available.
+
+What: /sys/bus/iio/devices/iio:deviceX/oversampling_ratio
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ Hardware dependent ADC oversampling. Controls the sampling ratio
+ of the digital filter if available.
+
+What: /sys/bus/iio/devices/iio:deviceX/oversampling_ratio_available
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ Hardware dependent values supported by the oversampling filter.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled no bias removal etc.) voltage measurement from
+ channel Y. In special cases where the channel does not
+ correspond to externally available input one of the named
+ versions may be used. The number must always be specified and
+ unique to allow association with event codes. Units after
+ application of scale and offset are millivolts.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled) differential voltage measurement equivalent to
+ channel Y - channel Z where these channel numbers apply to the
+ physically equivalent inputs when non differential readings are
+ separately available. In differential only parts, then all that
+ is required is a consistent labeling. Units after application
+ of scale and offset are millivolts.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_raw
+KernelVersion: 3.17
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled no bias removal etc.) current measurement from
+ channel Y. In special cases where the channel does not
+ correspond to externally available input one of the named
+ versions may be used. The number must always be specified and
+ unique to allow association with event codes. Units after
+ application of scale and offset are milliamps.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_capacitanceY_raw
+KernelVersion: 3.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw capacitance measurement from channel Y. Units after
+ application of scale and offset are nanofarads.
+
+What: /sys/.../iio:deviceX/in_capacitanceY-in_capacitanceZ_raw
+KernelVersion: 3.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw differential capacitance measurement equivalent to
+ channel Y - channel Z where these channel numbers apply to the
+ physically equivalent inputs when non differential readings are
+ separately available. In differential only parts, then all that
+ is required is a consistent labeling. Units after application
+ of scale and offset are nanofarads.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_tempX_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_ambient_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_object_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled no bias removal etc.) temperature measurement.
+ If an axis is specified it generally means that the temperature
+ sensor is associated with one part of a compound device (e.g.
+ a gyroscope axis). The ambient and object modifiers distinguish
+ between ambient (reference) and distant temperature for contact-
+ less measurements. Units after application of scale and offset
+ are milli degrees Celsius.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_tempX_input
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ Scaled temperature measurement in milli degrees Celsius.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Acceleration in direction x, y or z (may be arbitrarily assigned
+ but should match other such assignments on device).
+ Has all of the equivalent parameters as per voltageY. Units
+ after application of scale and offset are m/s^2.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Angular velocity about axis x, y or z (may be arbitrarily
+ assigned). Has all the equivalent parameters as per voltageY.
+ Units after application of scale and offset are radians per
+ second.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_incli_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_incli_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_incli_z_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Inclination raw reading about axis x, y or z (may be
+ arbitrarily assigned). Data converted by application of offset
+ and scale to degrees.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_raw
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Magnetic field along axis x, y or z (may be arbitrarily
+ assigned). Data converted by application of offset
+ then scale to Gauss.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_peak_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_peak_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_peak_raw
+KernelVersion: 2.6.36
+Contact: linux-iio@vger.kernel.org
+Description:
+ Highest value since some reset condition. These
+ attributes allow access to this and are otherwise
+ the direct equivalent of the <type>Y[_name]_raw attributes.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_xyz_squared_peak_raw
+KernelVersion: 2.6.36
+Contact: linux-iio@vger.kernel.org
+Description:
+ A computed peak value based on the sum squared magnitude of
+ the underlying value in the specified directions.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_raw
+KernelVersion: 3.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw pressure measurement from channel Y. Units after
+ application of scale and offset are kilopascal.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_input
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_input
+KernelVersion: 3.8
+Contact: linux-iio@vger.kernel.org
+Description:
+ Scaled pressure measurement from channel Y, in kilopascal.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_raw
+KernelVersion: 3.14
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw humidity measurement of air. Units after application of
+ scale and offset are milli percent.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_input
+KernelVersion: 3.14
+Contact: linux-iio@vger.kernel.org
+Description:
+ Scaled humidity measurement in milli percent.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_X_mean_raw
+KernelVersion: 3.5
+Contact: linux-iio@vger.kernel.org
+Description:
+ Averaged raw measurement from channel X. The number of values
+ used for averaging is device specific. The converting rules for
+ normal raw values also applies to the averaged raw values.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_current_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_tempY_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_temp_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_offset
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_offset
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ If known for a device, offset to be added to <type>[Y]_raw prior
+ to scaling by <type>[Y]_scale in order to obtain value in the
+ <type> units as specified in <type>[Y]_raw documentation.
+ Not present if the offset is always 0 or unknown. If Y or
+ axis <x|y|z> is not present, then the offset applies to all
+ in channels of <type>.
+ May be writable if a variable offset can be applied on the
+ device. Note that this is different to calibbias which
+ is for devices (or drivers) that apply offsets to compensate
+ for variation between different instances of the part, typically
+ adjusted by using some hardware supported calibration procedure.
+ Calibbias is applied internally, offset is applied in userspace
+ to the _raw output.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_scale
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_scale
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_currentY_supply_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_current_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_peak_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_energy_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_distance_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_x_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_y_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_z_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_magnetic_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_true_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_magnetic_tilt_comp_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_true_tilt_comp_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_humidityrelative_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_scale
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_scale
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ If known for a device, scale to be applied to <type>Y[_name]_raw
+ post addition of <type>[Y][_name]_offset in order to obtain the
+ measured value in <type> units as specified in
+ <type>[Y][_name]_raw documentation. If shared across all in
+ channels then Y and <x|y|z> are not present and the value is
+ called <type>[Y][_name]_scale. The peak modifier means this
+ value is applied to <type>Y[_name]_peak_raw values.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibbias
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibbias
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Hardware applied calibration offset (assumed to fix production
+ inaccuracies).
+
+What /sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_accel_z_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_anglvel_x_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_anglvel_y_calibscale
+What /sys/bus/iio/devices/iio:deviceX/in_anglvel_z_calibscale
+what /sys/bus/iio/devices/iio:deviceX/in_illuminance0_calibscale
+what /sys/bus/iio/devices/iio:deviceX/in_proximity0_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressureY_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_pressure_calibscale
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance_calibscale
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Hardware applied calibration scale factor (assumed to fix
+ production inaccuracies). If shared across all channels,
+ <type>_calibscale is used.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender
+What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender
+What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender
+What: /sys/bus/iio/devices/iio:deviceX/in_velocity_calibgender
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Gender of the user (e.g.: male, female) used by some pedometers
+ to compute the stride length, distance, speed and activity
+ type.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibgender_available
+What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibgender_available
+What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibgender_available
+What: /sys/bus/iio/devices/iio:deviceX/in_velocity_calibgender_available
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available gender values (e.g.: male, female).
+
+What: /sys/bus/iio/devices/iio:deviceX/in_activity_calibheight
+What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibheight
+What: /sys/bus/iio/devices/iio:deviceX/in_distance_calibheight
+What: /sys/bus/iio/devices/iio:deviceX/in_velocity_calibheight
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ Height of the user (in meters) used by some pedometers
+ to compute the stride length, distance, speed and activity
+ type.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_energy_calibweight
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Weight of the user (in kg). It is needed by some pedometers
+ to compute the calories burnt by the user.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_scale_available
+What: /sys/.../iio:deviceX/in_voltageX_scale_available
+What: /sys/.../iio:deviceX/in_voltage-voltage_scale_available
+What: /sys/.../iio:deviceX/out_voltageX_scale_available
+What: /sys/.../iio:deviceX/out_altvoltageX_scale_available
+What: /sys/.../iio:deviceX/in_capacitance_scale_available
+What: /sys/.../iio:deviceX/in_pressure_scale_available
+What: /sys/.../iio:deviceX/in_pressureY_scale_available
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ If a discrete set of scale values is available, they
+ are listed in this attribute.
+
+What /sys/bus/iio/devices/iio:deviceX/out_voltageY_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_red_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_green_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_blue_hardwaregain
+What: /sys/bus/iio/devices/iio:deviceX/in_intensity_clear_hardwaregain
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Hardware applied gain factor. If shared across all channels,
+ <type>_hardwaregain is used.
+
+What: /sys/.../in_accel_filter_low_pass_3db_frequency
+What: /sys/.../in_magn_filter_low_pass_3db_frequency
+What: /sys/.../in_anglvel_filter_low_pass_3db_frequency
+KernelVersion: 3.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ If a known or controllable low pass filter is applied
+ to the underlying data channel, then this parameter
+ gives the 3dB frequency of the filter in Hz.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_raw
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled, no bias etc.) output voltage for
+ channel Y. The number must always be specified and
+ unique if the output corresponds to a single channel.
+ While DAC like devices typically use out_voltage,
+ a continuous frequency generating device, such as
+ a DDS or PLL should use out_altvoltage.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY&Z_raw
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY&Z_raw
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw (unscaled, no bias etc.) output voltage for an aggregate of
+ channel Y, channel Z, etc. This interface is available in cases
+ where a single output sets the value for multiple channels
+ simultaneously.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_powerdown_mode
+What: /sys/bus/iio/devices/iio:deviceX/out_voltage_powerdown_mode
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_powerdown_mode
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltage_powerdown_mode
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the output powerdown mode.
+ DAC output stage is disconnected from the amplifier and
+ 1kohm_to_gnd: connected to ground via an 1kOhm resistor,
+ 6kohm_to_gnd: connected to ground via a 6kOhm resistor,
+ 20kohm_to_gnd: connected to ground via a 20kOhm resistor,
+ 100kohm_to_gnd: connected to ground via an 100kOhm resistor,
+ 500kohm_to_gnd: connected to ground via a 500kOhm resistor,
+ three_state: left floating.
+ For a list of available output power down options read
+ outX_powerdown_mode_available. If Y is not present the
+ mode is shared across all outputs.
+
+What: /sys/.../iio:deviceX/out_votlageY_powerdown_mode_available
+What: /sys/.../iio:deviceX/out_voltage_powerdown_mode_available
+What: /sys/.../iio:deviceX/out_altvotlageY_powerdown_mode_available
+What: /sys/.../iio:deviceX/out_altvoltage_powerdown_mode_available
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ Lists all available output power down modes.
+ If Y is not present the mode is shared across all outputs.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_powerdown
+What: /sys/bus/iio/devices/iio:deviceX/out_voltage_powerdown
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_powerdown
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltage_powerdown
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ Writing 1 causes output Y to enter the power down mode specified
+ by the corresponding outY_powerdown_mode. DAC output stage is
+ disconnected from the amplifier. Clearing returns to normal
+ operation. Y may be suppressed if all outputs are controlled
+ together.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Output frequency for channel Y in Hz. The number must always be
+ specified and unique if the output corresponds to a single
+ channel.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_phase
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Phase in radians of one frequency/clock output Y
+ (out_altvoltageY) relative to another frequency/clock output
+ (out_altvoltageZ) of the device X. The number must always be
+ specified and unique if the output corresponds to a single
+ channel.
+
+What: /sys/bus/iio/devices/iio:deviceX/events
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Configuration of which hardware generated events are passed up
+ to user-space.
+
+What: /sys/.../iio:deviceX/events/in_accel_x_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_x_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_y_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_y_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_z_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_z_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_anglvel_x_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_anglvel_x_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_anglvel_y_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_anglvel_y_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_anglvel_z_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_anglvel_z_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_magn_x_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_magn_x_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_magn_y_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_magn_y_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_magn_z_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_magn_z_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_tilt_comp_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_tilt_comp_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_tilt_comp_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_tilt_comp_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_voltageY_supply_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_voltageY_supply_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_voltageY_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_voltageY_thresh_falling_en
+What: /sys/.../iio:deviceX/events/in_tempY_thresh_rising_en
+What: /sys/.../iio:deviceX/events/in_tempY_thresh_falling_en
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Event generated when channel passes a threshold in the specified
+ (_rising|_falling) direction. If the direction is not specified,
+ then either the device will report an event which ever direction
+ a single threshold value is passed in (e.g.
+ <type>[Y][_name]_<raw|input>_thresh_value) or
+ <type>[Y][_name]_<raw|input>_thresh_rising_value and
+ <type>[Y][_name]_<raw|input>_thresh_falling_value may take
+ different values, but the device can only enable both thresholds
+ or neither.
+ Note the driver will assume the last p events requested are
+ to be enabled where p is how many it supports (which may vary
+ depending on the exact set requested. So if you want to be
+ sure you have set what you think you have, check the contents of
+ these attributes after everything is configured. Drivers may
+ have to buffer any parameters so that they are consistent when
+ a given event type is enabled at a future point (and not those for
+ whatever event was previously enabled).
+
+What: /sys/.../iio:deviceX/events/in_accel_x_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_x_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_y_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_y_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_z_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_z_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_anglvel_x_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_anglvel_x_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_anglvel_y_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_anglvel_y_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_anglvel_z_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_anglvel_z_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_magn_x_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_magn_x_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_magn_y_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_magn_y_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_magn_z_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_magn_z_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_tilt_comp_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_magnetic_tilt_comp_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_tilt_comp_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_rot_from_north_true_tilt_comp_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_voltageY_supply_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_voltageY_supply_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_voltageY_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_voltageY_roc_falling_en
+What: /sys/.../iio:deviceX/events/in_tempY_roc_rising_en
+What: /sys/.../iio:deviceX/events/in_tempY_roc_falling_en
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Event generated when channel passes a threshold on the rate of
+ change (1st differential) in the specified (_rising|_falling)
+ direction. If the direction is not specified, then either the
+ device will report an event which ever direction a single
+ threshold value is passed in (e.g.
+ <type>[Y][_name]_<raw|input>_roc_value) or
+ <type>[Y][_name]_<raw|input>_roc_rising_value and
+ <type>[Y][_name]_<raw|input>_roc_falling_value may take
+ different values, but the device can only enable both rate of
+ change thresholds or neither.
+ Note the driver will assume the last p events requested are
+ to be enabled where p is however many it supports (which may
+ vary depending on the exact set requested. So if you want to be
+ sure you have set what you think you have, check the contents of
+ these attributes after everything is configured. Drivers may
+ have to buffer any parameters so that they are consistent when
+ a given event type is enabled a future point (and not those for
+ whatever event was previously enabled).
+
+What: /sys/.../events/in_accel_thresh_rising_value
+What: /sys/.../events/in_accel_thresh_falling_value
+What: /sys/.../events/in_accel_x_raw_thresh_rising_value
+What: /sys/.../events/in_accel_x_raw_thresh_falling_value
+What: /sys/.../events/in_accel_y_raw_thresh_rising_value
+What: /sys/.../events/in_accel_y_raw_thresh_falling_value
+What: /sys/.../events/in_accel_z_raw_thresh_rising_value
+What: /sys/.../events/in_accel_z_raw_thresh_falling_value
+What: /sys/.../events/in_anglvel_x_raw_thresh_rising_value
+What: /sys/.../events/in_anglvel_x_raw_thresh_falling_value
+What: /sys/.../events/in_anglvel_y_raw_thresh_rising_value
+What: /sys/.../events/in_anglvel_y_raw_thresh_falling_value
+What: /sys/.../events/in_anglvel_z_raw_thresh_rising_value
+What: /sys/.../events/in_anglvel_z_raw_thresh_falling_value
+What: /sys/.../events/in_magn_x_raw_thresh_rising_value
+What: /sys/.../events/in_magn_x_raw_thresh_falling_value
+What: /sys/.../events/in_magn_y_raw_thresh_rising_value
+What: /sys/.../events/in_magn_y_raw_thresh_falling_value
+What: /sys/.../events/in_magn_z_raw_thresh_rising_value
+What: /sys/.../events/in_magn_z_raw_thresh_falling_value
+What: /sys/.../events/in_rot_from_north_magnetic_raw_thresh_rising_value
+What: /sys/.../events/in_rot_from_north_magnetic_raw_thresh_falling_value
+What: /sys/.../events/in_rot_from_north_true_raw_thresh_rising_value
+What: /sys/.../events/in_rot_from_north_true_raw_thresh_falling_value
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_raw_thresh_rising_value
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_raw_thresh_falling_value
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_raw_thresh_rising_value
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_raw_thresh_falling_value
+What: /sys/.../events/in_voltageY_supply_raw_thresh_rising_value
+What: /sys/.../events/in_voltageY_supply_raw_thresh_falling_value
+What: /sys/.../events/in_voltageY_raw_thresh_rising_value
+What: /sys/.../events/in_voltageY_raw_thresh_falling_value
+What: /sys/.../events/in_tempY_raw_thresh_rising_value
+What: /sys/.../events/in_tempY_raw_thresh_falling_value
+What: /sys/.../events/in_illuminance0_thresh_falling_value
+what: /sys/.../events/in_illuminance0_thresh_rising_value
+what: /sys/.../events/in_proximity0_thresh_falling_value
+what: /sys/.../events/in_proximity0_thresh_rising_value
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the value of threshold that the device is comparing
+ against for the events enabled by
+ <type>Y[_name]_thresh[_rising|falling]_en.
+ If separate attributes exist for the two directions, but
+ direction is not specified for this attribute, then a single
+ threshold value applies to both directions.
+ The raw or input element of the name indicates whether the
+ value is in raw device units or in processed units (as _raw
+ and _input do on sysfs direct channel read attributes).
+
+What: /sys/.../events/in_accel_scale
+What: /sys/.../events/in_accel_peak_scale
+What: /sys/.../events/in_anglvel_scale
+What: /sys/.../events/in_magn_scale
+What: /sys/.../events/in_rot_from_north_magnetic_scale
+What: /sys/.../events/in_rot_from_north_true_scale
+What: /sys/.../events/in_voltage_scale
+What: /sys/.../events/in_voltage_supply_scale
+What: /sys/.../events/in_temp_scale
+What: /sys/.../events/in_illuminance_scale
+What: /sys/.../events/in_proximity_scale
+KernelVersion: 3.21
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the conversion factor from the standard units
+ to device specific units used to set the event trigger
+ threshold.
+
+What: /sys/.../events/in_accel_x_thresh_rising_hysteresis
+What: /sys/.../events/in_accel_x_thresh_falling_hysteresis
+What: /sys/.../events/in_accel_x_thresh_either_hysteresis
+What: /sys/.../events/in_accel_y_thresh_rising_hysteresis
+What: /sys/.../events/in_accel_y_thresh_falling_hysteresis
+What: /sys/.../events/in_accel_y_thresh_either_hysteresis
+What: /sys/.../events/in_accel_z_thresh_rising_hysteresis
+What: /sys/.../events/in_accel_z_thresh_falling_hysteresis
+What: /sys/.../events/in_accel_z_thresh_either_hysteresis
+What: /sys/.../events/in_anglvel_x_thresh_rising_hysteresis
+What: /sys/.../events/in_anglvel_x_thresh_falling_hysteresis
+What: /sys/.../events/in_anglvel_x_thresh_either_hysteresis
+What: /sys/.../events/in_anglvel_y_thresh_rising_hysteresis
+What: /sys/.../events/in_anglvel_y_thresh_falling_hysteresis
+What: /sys/.../events/in_anglvel_y_thresh_either_hysteresis
+What: /sys/.../events/in_anglvel_z_thresh_rising_hysteresis
+What: /sys/.../events/in_anglvel_z_thresh_falling_hysteresis
+What: /sys/.../events/in_anglvel_z_thresh_either_hysteresis
+What: /sys/.../events/in_magn_x_thresh_rising_hysteresis
+What: /sys/.../events/in_magn_x_thresh_falling_hysteresis
+What: /sys/.../events/in_magn_x_thresh_either_hysteresis
+What: /sys/.../events/in_magn_y_thresh_rising_hysteresis
+What: /sys/.../events/in_magn_y_thresh_falling_hysteresis
+What: /sys/.../events/in_magn_y_thresh_either_hysteresis
+What: /sys/.../events/in_magn_z_thresh_rising_hysteresis
+What: /sys/.../events/in_magn_z_thresh_falling_hysteresis
+What: /sys/.../events/in_magn_z_thresh_either_hysteresis
+What: /sys/.../events/in_rot_from_north_magnetic_thresh_rising_hysteresis
+What: /sys/.../events/in_rot_from_north_magnetic_thresh_falling_hysteresis
+What: /sys/.../events/in_rot_from_north_magnetic_thresh_either_hysteresis
+What: /sys/.../events/in_rot_from_north_true_thresh_rising_hysteresis
+What: /sys/.../events/in_rot_from_north_true_thresh_falling_hysteresis
+What: /sys/.../events/in_rot_from_north_true_thresh_either_hysteresis
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_thresh_rising_hysteresis
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_thresh_falling_hysteresis
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_thresh_either_hysteresis
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_thresh_rising_hysteresis
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_thresh_falling_hysteresis
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_thresh_either_hysteresis
+What: /sys/.../events/in_voltageY_thresh_rising_hysteresis
+What: /sys/.../events/in_voltageY_thresh_falling_hysteresis
+What: /sys/.../events/in_voltageY_thresh_either_hysteresis
+What: /sys/.../events/in_tempY_thresh_rising_hysteresis
+What: /sys/.../events/in_tempY_thresh_falling_hysteresis
+What: /sys/.../events/in_tempY_thresh_either_hysteresis
+What: /sys/.../events/in_illuminance0_thresh_falling_hysteresis
+what: /sys/.../events/in_illuminance0_thresh_rising_hysteresis
+what: /sys/.../events/in_illuminance0_thresh_either_hysteresis
+what: /sys/.../events/in_proximity0_thresh_falling_hysteresis
+what: /sys/.../events/in_proximity0_thresh_rising_hysteresis
+what: /sys/.../events/in_proximity0_thresh_either_hysteresis
+KernelVersion: 3.13
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the hysteresis of threshold that the device is comparing
+ against for the events enabled by
+ <type>Y[_name]_thresh[_(rising|falling)]_hysteresis.
+ If separate attributes exist for the two directions, but
+ direction is not specified for this attribute, then a single
+ hysteresis value applies to both directions.
+ For falling events the hysteresis is added to the _value attribute for
+ this event to get the upper threshold for when the event goes back to
+ normal, for rising events the hysteresis is subtracted from the _value
+ attribute. E.g. if in_voltage0_raw_thresh_rising_value is set to 1200
+ and in_voltage0_raw_thresh_rising_hysteresis is set to 50. The event
+ will get activated once in_voltage0_raw goes above 1200 and will become
+ deactived again once the value falls below 1150.
+
+What: /sys/.../events/in_accel_x_raw_roc_rising_value
+What: /sys/.../events/in_accel_x_raw_roc_falling_value
+What: /sys/.../events/in_accel_y_raw_roc_rising_value
+What: /sys/.../events/in_accel_y_raw_roc_falling_value
+What: /sys/.../events/in_accel_z_raw_roc_rising_value
+What: /sys/.../events/in_accel_z_raw_roc_falling_value
+What: /sys/.../events/in_anglvel_x_raw_roc_rising_value
+What: /sys/.../events/in_anglvel_x_raw_roc_falling_value
+What: /sys/.../events/in_anglvel_y_raw_roc_rising_value
+What: /sys/.../events/in_anglvel_y_raw_roc_falling_value
+What: /sys/.../events/in_anglvel_z_raw_roc_rising_value
+What: /sys/.../events/in_anglvel_z_raw_roc_falling_value
+What: /sys/.../events/in_magn_x_raw_roc_rising_value
+What: /sys/.../events/in_magn_x_raw_roc_falling_value
+What: /sys/.../events/in_magn_y_raw_roc_rising_value
+What: /sys/.../events/in_magn_y_raw_roc_falling_value
+What: /sys/.../events/in_magn_z_raw_roc_rising_value
+What: /sys/.../events/in_magn_z_raw_roc_falling_value
+What: /sys/.../events/in_rot_from_north_magnetic_raw_roc_rising_value
+What: /sys/.../events/in_rot_from_north_magnetic_raw_roc_falling_value
+What: /sys/.../events/in_rot_from_north_true_raw_roc_rising_value
+What: /sys/.../events/in_rot_from_north_true_raw_roc_falling_value
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_raw_roc_rising_value
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_raw_roc_falling_value
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_raw_roc_rising_value
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_raw_roc_falling_value
+What: /sys/.../events/in_voltageY_supply_raw_roc_rising_value
+What: /sys/.../events/in_voltageY_supply_raw_roc_falling_value
+What: /sys/.../events/in_voltageY_raw_roc_rising_value
+What: /sys/.../events/in_voltageY_raw_roc_falling_value
+What: /sys/.../events/in_tempY_raw_roc_rising_value
+What: /sys/.../events/in_tempY_raw_roc_falling_value
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the value of rate of change threshold that the
+ device is comparing against for the events enabled by
+ <type>[Y][_name]_roc[_rising|falling]_en.
+ If separate attributes exist for the two directions,
+ but direction is not specified for this attribute,
+ then a single threshold value applies to both directions.
+ The raw or input element of the name indicates whether the
+ value is in raw device units or in processed units (as _raw
+ and _input do on sysfs direct channel read attributes).
+
+What: /sys/.../events/in_accel_x_thresh_rising_period
+What: /sys/.../events/in_accel_x_thresh_falling_period
+What: /sys/.../events/in_accel_x_roc_rising_period
+What: /sys/.../events/in_accel_x_roc_falling_period
+What: /sys/.../events/in_accel_y_thresh_rising_period
+What: /sys/.../events/in_accel_y_thresh_falling_period
+What: /sys/.../events/in_accel_y_roc_rising_period
+What: /sys/.../events/in_accel_y_roc_falling_period
+What: /sys/.../events/in_accel_z_thresh_rising_period
+What: /sys/.../events/in_accel_z_thresh_falling_period
+What: /sys/.../events/in_accel_z_roc_rising_period
+What: /sys/.../events/in_accel_z_roc_falling_period
+What: /sys/.../events/in_anglvel_x_thresh_rising_period
+What: /sys/.../events/in_anglvel_x_thresh_falling_period
+What: /sys/.../events/in_anglvel_x_roc_rising_period
+What: /sys/.../events/in_anglvel_x_roc_falling_period
+What: /sys/.../events/in_anglvel_y_thresh_rising_period
+What: /sys/.../events/in_anglvel_y_thresh_falling_period
+What: /sys/.../events/in_anglvel_y_roc_rising_period
+What: /sys/.../events/in_anglvel_y_roc_falling_period
+What: /sys/.../events/in_anglvel_z_thresh_rising_period
+What: /sys/.../events/in_anglvel_z_thresh_falling_period
+What: /sys/.../events/in_anglvel_z_roc_rising_period
+What: /sys/.../events/in_anglvel_z_roc_falling_period
+What: /sys/.../events/in_magn_x_thresh_rising_period
+What: /sys/.../events/in_magn_x_thresh_falling_period
+What: /sys/.../events/in_magn_x_roc_rising_period
+What: /sys/.../events/in_magn_x_roc_falling_period
+What: /sys/.../events/in_magn_y_thresh_rising_period
+What: /sys/.../events/in_magn_y_thresh_falling_period
+What: /sys/.../events/in_magn_y_roc_rising_period
+What: /sys/.../events/in_magn_y_roc_falling_period
+What: /sys/.../events/in_magn_z_thresh_rising_period
+What: /sys/.../events/in_magn_z_thresh_falling_period
+What: /sys/.../events/in_magn_z_roc_rising_period
+What: /sys/.../events/in_magn_z_roc_falling_period
+What: /sys/.../events/in_rot_from_north_magnetic_thresh_rising_period
+What: /sys/.../events/in_rot_from_north_magnetic_thresh_falling_period
+What: /sys/.../events/in_rot_from_north_magnetic_roc_rising_period
+What: /sys/.../events/in_rot_from_north_magnetic_roc_falling_period
+What: /sys/.../events/in_rot_from_north_true_thresh_rising_period
+What: /sys/.../events/in_rot_from_north_true_thresh_falling_period
+What: /sys/.../events/in_rot_from_north_true_roc_rising_period
+What: /sys/.../events/in_rot_from_north_true_roc_falling_period
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_thresh_rising_period
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_thresh_falling_period
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_roc_rising_period
+What: /sys/.../events/in_rot_from_north_magnetic_tilt_comp_roc_falling_period
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_thresh_rising_period
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_thresh_falling_period
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_roc_rising_period
+What: /sys/.../events/in_rot_from_north_true_tilt_comp_roc_falling_period
+What: /sys/.../events/in_voltageY_supply_thresh_rising_period
+What: /sys/.../events/in_voltageY_supply_thresh_falling_period
+What: /sys/.../events/in_voltageY_supply_roc_rising_period
+What: /sys/.../events/in_voltageY_supply_roc_falling_period
+What: /sys/.../events/in_voltageY_thresh_rising_period
+What: /sys/.../events/in_voltageY_thresh_falling_period
+What: /sys/.../events/in_voltageY_roc_rising_period
+What: /sys/.../events/in_voltageY_roc_falling_period
+What: /sys/.../events/in_tempY_thresh_rising_period
+What: /sys/.../events/in_tempY_thresh_falling_period
+What: /sys/.../events/in_tempY_roc_rising_period
+What: /sys/.../events/in_tempY_roc_falling_period
+What: /sys/.../events/in_accel_x&y&z_mag_falling_period
+What: /sys/.../events/in_intensity0_thresh_period
+What: /sys/.../events/in_proximity0_thresh_period
+What: /sys/.../events/in_activity_still_thresh_rising_period
+What: /sys/.../events/in_activity_still_thresh_falling_period
+What: /sys/.../events/in_activity_walking_thresh_rising_period
+What: /sys/.../events/in_activity_walking_thresh_falling_period
+What: /sys/.../events/in_activity_jogging_thresh_rising_period
+What: /sys/.../events/in_activity_jogging_thresh_falling_period
+What: /sys/.../events/in_activity_running_thresh_rising_period
+What: /sys/.../events/in_activity_running_thresh_falling_period
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Period of time (in seconds) for which the condition must be
+ met before an event is generated. If direction is not
+ specified then this period applies to both directions.
+
+What: /sys/.../events/in_activity_still_thresh_rising_en
+What: /sys/.../events/in_activity_still_thresh_falling_en
+What: /sys/.../events/in_activity_walking_thresh_rising_en
+What: /sys/.../events/in_activity_walking_thresh_falling_en
+What: /sys/.../events/in_activity_jogging_thresh_rising_en
+What: /sys/.../events/in_activity_jogging_thresh_falling_en
+What: /sys/.../events/in_activity_running_thresh_rising_en
+What: /sys/.../events/in_activity_running_thresh_falling_en
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ Enables or disables activitity events. Depending on direction
+ an event is generated when sensor ENTERS or LEAVES a given state.
+
+What: /sys/.../events/in_activity_still_thresh_rising_value
+What: /sys/.../events/in_activity_still_thresh_falling_value
+What: /sys/.../events/in_activity_walking_thresh_rising_value
+What: /sys/.../events/in_activity_walking_thresh_falling_value
+What: /sys/.../events/in_activity_jogging_thresh_rising_value
+What: /sys/.../events/in_activity_jogging_thresh_falling_value
+What: /sys/.../events/in_activity_running_thresh_rising_value
+What: /sys/.../events/in_activity_running_thresh_falling_value
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ Confidence value (in units as percentage) to be used
+ for deciding when an event should be generated. E.g for
+ running: If the confidence value reported by the sensor
+ is greater than in_activity_running_thresh_rising_value
+ then the sensor ENTERS running state. Conversely, if the
+ confidence value reported by the sensor is lower than
+ in_activity_running_thresh_falling_value then the sensor
+ is LEAVING running state.
+
+What: /sys/.../iio:deviceX/events/in_accel_mag_en
+What: /sys/.../iio:deviceX/events/in_accel_mag_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_mag_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_x_mag_en
+What: /sys/.../iio:deviceX/events/in_accel_x_mag_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_x_mag_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_y_mag_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_z_mag_en
+What: /sys/.../iio:deviceX/events/in_accel_z_mag_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_z_mag_falling_en
+What: /sys/.../iio:deviceX/events/in_accel_x&y&z_mag_rising_en
+What: /sys/.../iio:deviceX/events/in_accel_x&y&z_mag_falling_en
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Similar to in_accel_x_thresh[_rising|_falling]_en, but here the
+ magnitude of the channel is compared to the threshold, not its
+ signed value.
+
+What: /sys/.../events/in_accel_raw_mag_value
+What: /sys/.../events/in_accel_x_raw_mag_rising_value
+What: /sys/.../events/in_accel_y_raw_mag_rising_value
+What: /sys/.../events/in_accel_z_raw_mag_rising_value
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ The value to which the magnitude of the channel is compared. If
+ number or direction is not specified, applies to all channels of
+ this type.
+
+What: /sys/.../events/in_steps_change_en
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Event generated when channel passes a threshold on the absolute
+ change in value. E.g. for steps: a step change event is
+ generated each time the user takes N steps, where N is set using
+ in_steps_change_value.
+
+What: /sys/.../events/in_steps_change_value
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the value of change threshold that the
+ device is comparing against for the events enabled by
+ <type>[Y][_name]_roc[_rising|falling|]_en. E.g. for steps:
+ if set to 3, a step change event will be generated every 3
+ steps.
+
+What: /sys/bus/iio/devices/iio:deviceX/trigger/current_trigger
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ The name of the trigger source being used, as per string given
+ in /sys/class/iio/triggerY/name.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/length
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Number of scans contained by the buffer.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/bytes_per_datum
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Bytes per scan. Due to alignment fun, the scan may be larger
+ than implied directly by the scan_element parameters.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/enable
+KernelVersion: 2.6.35
+Contact: linux-iio@vger.kernel.org
+Description:
+ Actually start the buffer capture up. Will start trigger
+ if first device and appropriate.
+
+What: /sys/bus/iio/devices/iio:deviceX/scan_elements
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Directory containing interfaces for elements that will be
+ captured for a single triggered sample set in the buffer.
+
+What: /sys/.../iio:deviceX/scan_elements/in_accel_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_accel_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_accel_z_en
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_z_en
+What: /sys/.../iio:deviceX/scan_elements/in_magn_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_magn_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_magn_z_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_tilt_comp_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_tilt_comp_en
+What: /sys/.../iio:deviceX/scan_elements/in_timestamp_en
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_en
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
+What: /sys/.../iio:deviceX/scan_elements/in_incli_x_en
+What: /sys/.../iio:deviceX/scan_elements/in_incli_y_en
+What: /sys/.../iio:deviceX/scan_elements/in_pressureY_en
+What: /sys/.../iio:deviceX/scan_elements/in_pressure_en
+What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_en
+What: /sys/.../iio:deviceX/scan_elements/in_proximity_en
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Scan element control for triggered data capture.
+
+What: /sys/.../iio:deviceX/scan_elements/in_accel_type
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_type
+What: /sys/.../iio:deviceX/scan_elements/in_magn_type
+What: /sys/.../iio:deviceX/scan_elements/in_incli_type
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_type
+What: /sys/.../iio:deviceX/scan_elements/in_voltage_type
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
+What: /sys/.../iio:deviceX/scan_elements/in_timestamp_type
+What: /sys/.../iio:deviceX/scan_elements/in_pressureY_type
+What: /sys/.../iio:deviceX/scan_elements/in_pressure_type
+What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_type
+What: /sys/.../iio:deviceX/scan_elements/in_proximity_type
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ Description of the scan element data storage within the buffer
+ and hence the form in which it is read from user-space.
+ Form is [be|le]:[s|u]bits/storagebits[>>shift].
+ be or le specifies big or little endian. s or u specifies if
+ signed (2's complement) or unsigned. bits is the number of bits
+ of data and storagebits is the space (after padding) that it
+ occupies in the buffer. shift if specified, is the shift that
+ needs to be applied prior to masking out unused bits. Some
+ devices put their data in the middle of the transferred elements
+ with additional information on both sides. Note that some
+ devices will have additional information in the unused bits
+ so to get a clean value, the bits value must be used to mask
+ the buffer output value appropriately. The storagebits value
+ also specifies the data alignment. So s48/64>>2 will be a
+ signed 48 bit integer stored in a 64 bit location aligned to
+ a 64 bit boundary. To obtain the clean value, shift right 2
+ and apply a mask to zero the top 16 bits of the result.
+ For other storage combinations this attribute will be extended
+ appropriately.
+
+What: /sys/.../iio:deviceX/scan_elements/in_accel_type_available
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ If the type parameter can take one of a small set of values,
+ this attribute lists them.
+
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_index
+What: /sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
+What: /sys/.../iio:deviceX/scan_elements/in_accel_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_accel_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_accel_z_index
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_anglvel_z_index
+What: /sys/.../iio:deviceX/scan_elements/in_magn_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_magn_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_magn_z_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_magnetic_tilt_comp_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_from_north_true_tilt_comp_index
+What: /sys/.../iio:deviceX/scan_elements/in_incli_x_index
+What: /sys/.../iio:deviceX/scan_elements/in_incli_y_index
+What: /sys/.../iio:deviceX/scan_elements/in_timestamp_index
+What: /sys/.../iio:deviceX/scan_elements/in_pressureY_index
+What: /sys/.../iio:deviceX/scan_elements/in_pressure_index
+What: /sys/.../iio:deviceX/scan_elements/in_rot_quaternion_index
+What: /sys/.../iio:deviceX/scan_elements/in_proximity_index
+KernelVersion: 2.6.37
+Contact: linux-iio@vger.kernel.org
+Description:
+ A single positive integer specifying the position of this
+ scan element in the buffer. Note these are not dependent on
+ what is enabled and may not be contiguous. Thus for user-space
+ to establish the full layout these must be used in conjunction
+ with all _en attributes to establish which channels are present,
+ and the relevant _type attributes to establish the data storage
+ format.
+
+What: /sys/.../iio:deviceX/in_activity_still_input
+What: /sys/.../iio:deviceX/in_activity_walking_input
+What: /sys/.../iio:deviceX/in_activity_jogging_input
+What: /sys/.../iio:deviceX/in_activity_running_input
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to read the confidence for an activity
+ expressed in units as percentage.
+
+What: /sys/.../iio:deviceX/in_anglvel_z_quadrature_correction_raw
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to read the amount of quadrature error
+ present in the device at a given time.
+
+What: /sys/.../iio:deviceX/in_accelX_power_mode
+KernelVersion: 3.11
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the chip power mode.
+ low_noise: reduce noise level from ADC,
+ low_power: enable low current consumption.
+ For a list of available output power modes read
+ in_accel_power_mode_available.
+
+What: /sys/.../iio:deviceX/in_energy_input
+What: /sys/.../iio:deviceX/in_energy_raw
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to read the energy value reported by the
+ device (e.g.: human activity sensors report energy burnt by the
+ user). Units after application of scale are Joules.
+
+What: /sys/.../iio:deviceX/in_distance_input
+What: /sys/.../iio:deviceX/in_distance_raw
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to read the distance covered by the user
+ since the last reboot while activated. Units after application
+ of scale are meters.
+
+What: /sys/bus/iio/devices/iio:deviceX/store_eeprom
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Writing '1' stores the current device configuration into
+ on-chip EEPROM. After power-up or chip reset the device will
+ automatically load the saved configuration.
+
+What: /sys/.../iio:deviceX/in_proximity_raw
+What: /sys/.../iio:deviceX/in_proximity_input
+What: /sys/.../iio:deviceX/in_proximityY_raw
+KernelVersion: 3.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Proximity measurement indicating that some
+ object is near the sensor, usually be observing
+ reflectivity of infrared or ultrasound emitted.
+ Often these sensors are unit less and as such conversion
+ to SI units is not possible. Higher proximity measurements
+ indicate closer objects, and vice versa.
+
+What: /sys/.../iio:deviceX/in_illuminance_input
+What: /sys/.../iio:deviceX/in_illuminance_raw
+What: /sys/.../iio:deviceX/in_illuminanceY_input
+What: /sys/.../iio:deviceX/in_illuminanceY_raw
+What: /sys/.../iio:deviceX/in_illuminanceY_mean_raw
+What: /sys/.../iio:deviceX/in_illuminance_ir_raw
+What: /sys/.../iio:deviceX/in_illuminance_clear_raw
+KernelVersion: 3.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Illuminance measurement, units after application of scale
+ and offset are lux.
+
+What: /sys/.../iio:deviceX/in_intensityY_raw
+What: /sys/.../iio:deviceX/in_intensityY_ir_raw
+What: /sys/.../iio:deviceX/in_intensityY_both_raw
+KernelVersion: 3.4
+Contact: linux-iio@vger.kernel.org
+Description:
+ Unit-less light intensity. Modifiers both and ir indicate
+ that measurements contains visible and infrared light
+ components or just infrared light, respectively.
+
+What: /sys/.../iio:deviceX/in_intensity_red_integration_time
+What: /sys/.../iio:deviceX/in_intensity_green_integration_time
+What: /sys/.../iio:deviceX/in_intensity_blue_integration_time
+What: /sys/.../iio:deviceX/in_intensity_clear_integration_time
+What: /sys/.../iio:deviceX/in_illuminance_integration_time
+KernelVersion: 3.12
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to get/set the integration time in
+ seconds.
+
+What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_integration_time
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Number of seconds in which to compute speed.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_quaternion_raw
+KernelVersion: 3.15
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw value of quaternion components using a format
+ x y z w. Here x, y, and z component represents the axis about
+ which a rotation will occur and w component represents the
+ amount of rotation.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_magnetic_tilt_comp_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_true_tilt_comp_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_magnetic_raw
+What: /sys/bus/iio/devices/iio:deviceX/in_rot_from_north_true_raw
+KernelVersion: 3.15
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw value of rotation from true/magnetic north measured with
+ or without compensation from tilt sensors.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_currentX_raw
+KernelVersion: 3.18
+Contact: linux-iio@vger.kernel.org
+Description:
+ Raw current measurement from channel X. Units are in milliamps
+ after application of scale and offset. If no offset or scale is
+ present, output should be considered as processed with the
+ unit in milliamps.
+
+What: /sys/.../iio:deviceX/in_energy_en
+What: /sys/.../iio:deviceX/in_distance_en
+What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_en
+What: /sys/.../iio:deviceX/in_steps_en
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ Activates a device feature that runs in firmware/hardware.
+ E.g. for steps: the pedometer saves power while not used;
+ when activated, it will count the steps taken by the user in
+ firmware and export them through in_steps_input.
+
+What: /sys/.../iio:deviceX/in_steps_input
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to read the number of steps taken by the user
+ since the last reboot while activated.
+
+What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_input
+What: /sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_raw
+KernelVersion: 3.19
+Contact: linux-iio@vger.kernel.org
+Description:
+ This attribute is used to read the current speed value of the
+ user (which is the norm or magnitude of the velocity vector).
+ Units after application of scale are m/s.
+
+What: /sys/.../iio:deviceX/in_steps_debounce_count
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies the number of steps that must occur within
+ in_steps_filter_debounce_time for the pedometer to decide the
+ consumer is making steps.
+
+What: /sys/.../iio:deviceX/in_steps_debounce_time
+KernelVersion: 4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Specifies number of seconds in which we compute the steps
+ that occur in order to decide if the consumer is making steps.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/watermark
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ A single positive integer specifying the maximum number of scan
+ elements to wait for.
+ Poll will block until the watermark is reached.
+ Blocking read will wait until the minimum between the requested
+ read amount or the low water mark is available.
+ Non-blocking read will retrieve the available samples from the
+ buffer even if there are less samples then watermark level. This
+ allows the application to block on poll with a timeout and read
+ the available samples after the timeout expires and thus have a
+ maximum delay guarantee.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_enabled
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ A read-only boolean value that indicates if the hardware fifo is
+ currently enabled or disabled. If the device does not have a
+ hardware fifo this entry is not present.
+ The hardware fifo is enabled when the buffer is enabled if the
+ current hardware fifo watermark level is set and other current
+ device settings allows it (e.g. if a trigger is set that samples
+ data differently that the hardware fifo does then hardware fifo
+ will not enabled).
+ If the hardware fifo is enabled and the level of the hardware
+ fifo reaches the hardware fifo watermark level the device will
+ flush its hardware fifo to the device buffer. Doing a non
+ blocking read on the device when no samples are present in the
+ device buffer will also force a flush.
+ When the hardware fifo is enabled there is no need to use a
+ trigger to use buffer mode since the watermark settings
+ guarantees that the hardware fifo is flushed to the device
+ buffer.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_watermark
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ Read-only entry that contains a single integer specifying the
+ current watermark level for the hardware fifo. If the device
+ does not have a hardware fifo this entry is not present.
+ The watermark level for the hardware fifo is set by the driver
+ based on the value set by the user in buffer/watermark but
+ taking into account hardware limitations (e.g. most hardware
+ buffers are limited to 32-64 samples, some hardware buffers
+ watermarks are fixed or have minimum levels). A value of 0
+ means that the hardware watermark is unset.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_watermark_min
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ A single positive integer specifying the minimum watermark level
+ for the hardware fifo of this device. If the device does not
+ have a hardware fifo this entry is not present.
+ If the user sets buffer/watermark to a value less than this one,
+ then the hardware watermark will remain unset.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_watermark_max
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ A single positive integer specifying the maximum watermark level
+ for the hardware fifo of this device. If the device does not
+ have a hardware fifo this entry is not present.
+ If the user sets buffer/watermark to a value greater than this
+ one, then the hardware watermark will be capped at this value.
+
+What: /sys/bus/iio/devices/iio:deviceX/buffer/hwfifo_watermark_available
+KernelVersion: 4.2
+Contact: linux-iio@vger.kernel.org
+Description:
+ A list of positive integers specifying the available watermark
+ levels for the hardware fifo. This entry is optional and if it
+ is not present it means that all the values between
+ hwfifo_watermark_min and hwfifo_watermark_max are supported.
+ If the user sets buffer/watermark to a value greater than
+ hwfifo_watermak_min but not equal to any of the values in this
+ list, the driver will chose an appropriate value for the
+ hardware fifo watermark level.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-accel-bmc150 b/Documentation/ABI/testing/sysfs-bus-iio-accel-bmc150
new file mode 100644
index 000000000..99847a913
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-accel-bmc150
@@ -0,0 +1,7 @@
+What: /sys/bus/iio/devices/triggerX/name = "bmc150_accel-any-motion-devX"
+KernelVersion: 3.17
+Contact: linux-iio@vger.kernel.org
+Description:
+ The BMC150 accelerometer kernel module provides an additional trigger,
+ which sets driver in a mode, where data is pushed to the buffer
+ only when there is any motion.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523
new file mode 100644
index 000000000..a91aeabe7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-ad9523
@@ -0,0 +1,29 @@
+What: /sys/bus/iio/devices/iio:deviceX/pll2_feedback_clk_present
+What: /sys/bus/iio/devices/iio:deviceX/pll2_reference_clk_present
+What: /sys/bus/iio/devices/iio:deviceX/pll1_reference_clk_a_present
+What: /sys/bus/iio/devices/iio:deviceX/pll1_reference_clk_b_present
+What: /sys/bus/iio/devices/iio:deviceX/pll1_reference_clk_test_present
+What: /sys/bus/iio/devices/iio:deviceX/vcxo_clk_present
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns either '1' or '0'.
+ '1' means that the clock in question is present.
+ '0' means that the clock is missing.
+
+What: /sys/bus/iio/devices/iio:deviceX/pllY_locked
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Reading returns either '1' or '0'. '1' means that the
+ pllY is locked.
+
+What: /sys/bus/iio/devices/iio:deviceX/sync_dividers
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Writing '1' triggers the clock distribution synchronization
+ functionality. All dividers are reset and the channels start
+ with their predefined phase offsets (out_altvoltageY_phase).
+ Writing this file has the effect as driving the external
+ /SYNC pin low.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4350 b/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4350
new file mode 100644
index 000000000..1254457a7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-frequency-adf4350
@@ -0,0 +1,21 @@
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_frequency_resolution
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Stores channel Y frequency resolution/channel spacing in Hz.
+ The value given directly influences the MODULUS used by
+ the fractional-N PLL. It is assumed that the algorithm
+ that is used to compute the various dividers, is able to
+ generate proper values for multiples of channel spacing.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_altvoltageY_refin_frequency
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ Sets channel Y REFin frequency in Hz. In some clock chained
+ applications, the reference frequency used by the PLL may
+ change during runtime. This attribute allows the user to
+ adjust the reference frequency accordingly.
+ The value written has no effect until out_altvoltageY_frequency
+ is updated. Consider to use out_altvoltageY_powerdown to power
+ down the PLL and its RFOut buffers during REFin changes.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-gyro-bmg160 b/Documentation/ABI/testing/sysfs-bus-iio-gyro-bmg160
new file mode 100644
index 000000000..e98209c91
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-gyro-bmg160
@@ -0,0 +1,7 @@
+What: /sys/bus/iio/devices/triggerX/name = "bmg160-any-motion-devX"
+KernelVersion: 3.17
+Contact: linux-iio@vger.kernel.org
+Description:
+ The BMG160 gyro kernel module provides an additional trigger,
+ which sets driver in a mode, where data is pushed to the buffer
+ only when there is any motion.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als b/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als
new file mode 100644
index 000000000..22c5ea670
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-light-lm3533-als
@@ -0,0 +1,61 @@
+What: /sys/.../events/in_illuminance0_thresh_either_en
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Event generated when channel passes one of the four thresholds
+ in each direction (rising|falling) and a zone change occurs.
+ The corresponding light zone can be read from
+ in_illuminance0_zone.
+
+What: /sys/.../events/in_illuminance0_threshY_hysteresis
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Get the hysteresis for thresholds Y, that is,
+ threshY_hysteresis = threshY_raising - threshY_falling
+
+What: /sys/.../events/illuminance_threshY_falling_value
+What: /sys/.../events/illuminance_threshY_raising_value
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Specifies the value of threshold that the device is comparing
+ against for the events enabled by
+ in_illuminance0_thresh_either_en (0..255), where Y in 0..3.
+
+ Note that threshY_falling must be less than or equal to
+ threshY_raising.
+
+ These thresholds correspond to the eight zone-boundary
+ registers (boundaryY_{low,high}) and define the five light
+ zones.
+
+What: /sys/bus/iio/devices/iio:deviceX/in_illuminance0_zone
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Get the current light zone (0..4) as defined by the
+ in_illuminance0_threshY_{falling,rising} thresholds.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_raw
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Get output current for channel Y (0..255), that is,
+ out_currentY_currentZ_raw, where Z is the current zone.
+
+What: /sys/bus/iio/devices/iio:deviceX/out_currentY_currentZ_raw
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the output current for channel out_currentY when in zone
+ Z (0..255), where Y in 0..2 and Z in 0..4.
+
+ These values correspond to the ALS-mapper target registers for
+ ALS-mapper Y + 1.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-mpu6050 b/Documentation/ABI/testing/sysfs-bus-iio-mpu6050
new file mode 100644
index 000000000..cb53737aa
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-mpu6050
@@ -0,0 +1,13 @@
+What: /sys/bus/iio/devices/iio:deviceX/in_gyro_matrix
+What: /sys/bus/iio/devices/iio:deviceX/in_accel_matrix
+What: /sys/bus/iio/devices/iio:deviceX/in_magn_matrix
+KernelVersion: 3.4.0
+Contact: linux-iio@vger.kernel.org
+Description:
+ This is mounting matrix for motion sensors. Mounting matrix
+ is a 3x3 unitary matrix. A typical mounting matrix would look like
+ [0, 1, 0; 1, 0, 0; 0, 0, -1]. Using this information, it would be
+ easy to tell the relative positions among sensors as well as their
+ positions relative to the board that holds these sensors. Identity matrix
+ [1, 0, 0; 0, 1, 0; 0, 0, 1] means sensor chip and device are perfectly
+ aligned with each other. All axes are exactly the same.
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935 b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
new file mode 100644
index 000000000..6708c5e26
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-proximity-as3935
@@ -0,0 +1,16 @@
+What /sys/bus/iio/devices/iio:deviceX/in_proximity_raw
+Date: March 2014
+KernelVersion: 3.15
+Contact: Matt Ranostay <mranostay@gmail.com>
+Description:
+ Get the current distance in meters of storm (1km steps)
+ 1000-40000 = distance in meters
+
+What /sys/bus/iio/devices/iio:deviceX/sensor_sensitivity
+Date: March 2014
+KernelVersion: 3.15
+Contact: Matt Ranostay <mranostay@gmail.com>
+Description:
+ Show or set the gain boost of the amp, from 0-31 range.
+ 18 = indoors (default)
+ 14 = outdoors
diff --git a/Documentation/ABI/testing/sysfs-bus-iio-trigger-sysfs b/Documentation/ABI/testing/sysfs-bus-iio-trigger-sysfs
new file mode 100644
index 000000000..5235e6c74
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-trigger-sysfs
@@ -0,0 +1,11 @@
+What: /sys/bus/iio/devices/triggerX/trigger_now
+KernelVersion: 2.6.38
+Contact: linux-iio@vger.kernel.org
+Description:
+ This file is provided by the iio-trig-sysfs stand-alone trigger
+ driver. Writing this file with any value triggers an event
+ driven driver, associated with this trigger, to capture data
+ into an in kernel buffer. This approach can be valuable during
+ automated testing or in situations, where other trigger methods
+ are not applicable. For example no RTC or spare GPIOs.
+ X is the IIO index of the trigger.
diff --git a/Documentation/ABI/testing/sysfs-bus-mdio b/Documentation/ABI/testing/sysfs-bus-mdio
new file mode 100644
index 000000000..491baaf42
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-mdio
@@ -0,0 +1,29 @@
+What: /sys/bus/mdio_bus/devices/.../phy_id
+Date: November 2012
+KernelVersion: 3.8
+Contact: netdev@vger.kernel.org
+Description:
+ This attribute contains the 32-bit PHY Identifier as reported
+ by the device during bus enumeration, encoded in hexadecimal.
+ This ID is used to match the device with the appropriate
+ driver.
+
+What: /sys/bus/mdio_bus/devices/.../phy_interface
+Date: February 2014
+KernelVersion: 3.15
+Contact: netdev@vger.kernel.org
+Description:
+ This attribute contains the PHY interface as configured by the
+ Ethernet driver during bus enumeration, encoded in string.
+ This interface mode is used to configure the Ethernet MAC with the
+ appropriate mode for its data lines to the PHY hardware.
+
+What: /sys/bus/mdio_bus/devices/.../phy_has_fixups
+Date: February 2014
+KernelVersion: 3.15
+Contact: netdev@vger.kernel.org
+Description:
+ This attribute contains the boolean value whether a given PHY
+ device has had any "fixup" workaround running on it, encoded as
+ a boolean. This information is provided to help troubleshooting
+ PHY configurations.
diff --git a/Documentation/ABI/testing/sysfs-bus-media b/Documentation/ABI/testing/sysfs-bus-media
new file mode 100644
index 000000000..7057e5741
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-media
@@ -0,0 +1,6 @@
+What: /sys/bus/media/devices/.../model
+Date: January 2011
+Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ linux-media@vger.kernel.org
+Description: Contains the device model name in UTF-8. The device version is
+ is not be appended to the model name.
diff --git a/Documentation/ABI/testing/sysfs-bus-mei b/Documentation/ABI/testing/sysfs-bus-mei
new file mode 100644
index 000000000..2066f0bbd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-mei
@@ -0,0 +1,7 @@
+What: /sys/bus/mei/devices/.../modalias
+Date: March 2013
+KernelVersion: 3.10
+Contact: Samuel Ortiz <sameo@linux.intel.com>
+ linux-mei@linux.intel.com
+Description: Stores the same MODALIAS value emitted by uevent
+ Format: mei:<mei device name>
diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci
new file mode 100644
index 000000000..b3bc50f65
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci
@@ -0,0 +1,296 @@
+What: /sys/bus/pci/drivers/.../bind
+Date: December 2003
+Contact: linux-pci@vger.kernel.org
+Description:
+ Writing a device location to this file will cause
+ the driver to attempt to bind to the device found at
+ this location. This is useful for overriding default
+ bindings. The format for the location is: DDDD:BB:DD.F.
+ That is Domain:Bus:Device.Function and is the same as
+ found in /sys/bus/pci/devices/. For example:
+ # echo 0000:00:19.0 > /sys/bus/pci/drivers/foo/bind
+ (Note: kernels before 2.6.28 may require echo -n).
+
+What: /sys/bus/pci/drivers/.../unbind
+Date: December 2003
+Contact: linux-pci@vger.kernel.org
+Description:
+ Writing a device location to this file will cause the
+ driver to attempt to unbind from the device found at
+ this location. This may be useful when overriding default
+ bindings. The format for the location is: DDDD:BB:DD.F.
+ That is Domain:Bus:Device.Function and is the same as
+ found in /sys/bus/pci/devices/. For example:
+ # echo 0000:00:19.0 > /sys/bus/pci/drivers/foo/unbind
+ (Note: kernels before 2.6.28 may require echo -n).
+
+What: /sys/bus/pci/drivers/.../new_id
+Date: December 2003
+Contact: linux-pci@vger.kernel.org
+Description:
+ Writing a device ID to this file will attempt to
+ dynamically add a new device ID to a PCI device driver.
+ This may allow the driver to support more hardware than
+ was included in the driver's static device ID support
+ table at compile time. The format for the device ID is:
+ VVVV DDDD SVVV SDDD CCCC MMMM PPPP. That is Vendor ID,
+ Device ID, Subsystem Vendor ID, Subsystem Device ID,
+ Class, Class Mask, and Private Driver Data. The Vendor ID
+ and Device ID fields are required, the rest are optional.
+ Upon successfully adding an ID, the driver will probe
+ for the device and attempt to bind to it. For example:
+ # echo "8086 10f5" > /sys/bus/pci/drivers/foo/new_id
+
+What: /sys/bus/pci/drivers/.../remove_id
+Date: February 2009
+Contact: Chris Wright <chrisw@sous-sol.org>
+Description:
+ Writing a device ID to this file will remove an ID
+ that was dynamically added via the new_id sysfs entry.
+ The format for the device ID is:
+ VVVV DDDD SVVV SDDD CCCC MMMM. That is Vendor ID, Device
+ ID, Subsystem Vendor ID, Subsystem Device ID, Class,
+ and Class Mask. The Vendor ID and Device ID fields are
+ required, the rest are optional. After successfully
+ removing an ID, the driver will no longer support the
+ device. This is useful to ensure auto probing won't
+ match the driver to the device. For example:
+ # echo "8086 10f5" > /sys/bus/pci/drivers/foo/remove_id
+
+What: /sys/bus/pci/rescan
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of all PCI buses in the system, and
+ re-discover previously removed devices.
+
+What: /sys/bus/pci/devices/.../msi_bus
+Date: September 2014
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a zero value to this attribute disallows MSI and
+ MSI-X for any future drivers of the device. If the device
+ is a bridge, MSI and MSI-X will be disallowed for future
+ drivers of all child devices under the bridge. Drivers
+ must be reloaded for the new setting to take effect.
+
+What: /sys/bus/pci/devices/.../msi_irqs/
+Date: September, 2011
+Contact: Neil Horman <nhorman@tuxdriver.com>
+Description:
+ The /sys/devices/.../msi_irqs directory contains a variable set
+ of files, with each file being named after a corresponding msi
+ irq vector allocated to that device.
+
+What: /sys/bus/pci/devices/.../msi_irqs/<N>
+Date: September 2011
+Contact: Neil Horman <nhorman@tuxdriver.com>
+Description:
+ This attribute indicates the mode that the irq vector named by
+ the file is in (msi vs. msix)
+
+What: /sys/bus/pci/devices/.../remove
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ hot-remove the PCI device and any of its children.
+
+What: /sys/bus/pci/devices/.../pci_bus/.../rescan
+Date: May 2011
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of the bus and all child buses,
+ and re-discover devices removed earlier from this
+ part of the device tree.
+
+What: /sys/bus/pci/devices/.../rescan
+Date: January 2009
+Contact: Linux PCI developers <linux-pci@vger.kernel.org>
+Description:
+ Writing a non-zero value to this attribute will
+ force a rescan of the device's parent bus and all
+ child buses, and re-discover devices removed earlier
+ from this part of the device tree.
+
+What: /sys/bus/pci/devices/.../reset
+Date: July 2009
+Contact: Michael S. Tsirkin <mst@redhat.com>
+Description:
+ Some devices allow an individual function to be reset
+ without affecting other functions in the same device.
+ For devices that have this support, a file named reset
+ will be present in sysfs. Writing 1 to this file
+ will perform reset.
+
+What: /sys/bus/pci/devices/.../vpd
+Date: February 2008
+Contact: Ben Hutchings <bwh@kernel.org>
+Description:
+ A file named vpd in a device directory will be a
+ binary file containing the Vital Product Data for the
+ device. It should follow the VPD format defined in
+ PCI Specification 2.1 or 2.2, but users should consider
+ that some devices may have malformatted data. If the
+ underlying VPD has a writable section then the
+ corresponding section of this file will be writable.
+
+What: /sys/bus/pci/devices/.../virtfnN
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when hardware supports the SR-IOV
+ capability and the Physical Function driver has enabled it.
+ The symbolic link points to the PCI device sysfs entry of the
+ Virtual Function whose index is N (0...MaxVFs-1).
+
+What: /sys/bus/pci/devices/.../dep_link
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when hardware supports the SR-IOV
+ capability and the Physical Function driver has enabled it,
+ and this device has vendor specific dependencies with others.
+ The symbolic link points to the PCI device sysfs entry of
+ Physical Function this device depends on.
+
+What: /sys/bus/pci/devices/.../physfn
+Date: March 2009
+Contact: Yu Zhao <yu.zhao@intel.com>
+Description:
+ This symbolic link appears when a device is a Virtual Function.
+ The symbolic link points to the PCI device sysfs entry of the
+ Physical Function this device associates with.
+
+What: /sys/bus/pci/slots/.../module
+Date: June 2009
+Contact: linux-pci@vger.kernel.org
+Description:
+ This symbolic link points to the PCI hotplug controller driver
+ module that manages the hotplug slot.
+
+What: /sys/bus/pci/devices/.../label
+Date: July 2010
+Contact: Narendra K <narendra_k@dell.com>, linux-bugs@dell.com
+Description:
+ Reading this attribute will provide the firmware
+ given name (SMBIOS type 41 string or ACPI _DSM string) of
+ the PCI device. The attribute will be created only
+ if the firmware has given a name to the PCI device.
+ ACPI _DSM string name will be given priority if the
+ system firmware provides SMBIOS type 41 string also.
+Users:
+ Userspace applications interested in knowing the
+ firmware assigned name of the PCI device.
+
+What: /sys/bus/pci/devices/.../index
+Date: July 2010
+Contact: Narendra K <narendra_k@dell.com>, linux-bugs@dell.com
+Description:
+ Reading this attribute will provide the firmware
+ given instance (SMBIOS type 41 device type instance) of the
+ PCI device. The attribute will be created only if the firmware
+ has given an instance number to the PCI device.
+Users:
+ Userspace applications interested in knowing the
+ firmware assigned device type instance of the PCI
+ device that can help in understanding the firmware
+ intended order of the PCI device.
+
+What: /sys/bus/pci/devices/.../acpi_index
+Date: July 2010
+Contact: Narendra K <narendra_k@dell.com>, linux-bugs@dell.com
+Description:
+ Reading this attribute will provide the firmware
+ given instance (ACPI _DSM instance number) of the PCI device.
+ The attribute will be created only if the firmware has given
+ an instance number to the PCI device. ACPI _DSM instance number
+ will be given priority if the system firmware provides SMBIOS
+ type 41 device type instance also.
+Users:
+ Userspace applications interested in knowing the
+ firmware assigned instance number of the PCI
+ device that can help in understanding the firmware
+ intended order of the PCI device.
+
+What: /sys/bus/pci/devices/.../d3cold_allowed
+Date: July 2012
+Contact: Huang Ying <ying.huang@intel.com>
+Description:
+ d3cold_allowed is bit to control whether the corresponding PCI
+ device can be put into D3Cold state. If it is cleared, the
+ device will never be put into D3Cold state. If it is set, the
+ device may be put into D3Cold state if other requirements are
+ satisfied too. Reading this attribute will show the current
+ value of d3cold_allowed bit. Writing this attribute will set
+ the value of d3cold_allowed bit.
+
+What: /sys/bus/pci/devices/.../sriov_totalvfs
+Date: November 2012
+Contact: Donald Dutile <ddutile@redhat.com>
+Description:
+ This file appears when a physical PCIe device supports SR-IOV.
+ Userspace applications can read this file to determine the
+ maximum number of Virtual Functions (VFs) a PCIe physical
+ function (PF) can support. Typically, this is the value reported
+ in the PF's SR-IOV extended capability structure's TotalVFs
+ element. Drivers have the ability at probe time to reduce the
+ value read from this file via the pci_sriov_set_totalvfs()
+ function.
+
+What: /sys/bus/pci/devices/.../sriov_numvfs
+Date: November 2012
+Contact: Donald Dutile <ddutile@redhat.com>
+Description:
+ This file appears when a physical PCIe device supports SR-IOV.
+ Userspace applications can read and write to this file to
+ determine and control the enablement or disablement of Virtual
+ Functions (VFs) on the physical function (PF). A read of this
+ file will return the number of VFs that are enabled on this PF.
+ A number written to this file will enable the specified
+ number of VFs. A userspace application would typically read the
+ file and check that the value is zero, and then write the number
+ of VFs that should be enabled on the PF; the value written
+ should be less than or equal to the value in the sriov_totalvfs
+ file. A userspace application wanting to disable the VFs would
+ write a zero to this file. The core ensures that valid values
+ are written to this file, and returns errors when values are not
+ valid. For example, writing a 2 to this file when sriov_numvfs
+ is not 0 and not 2 already will return an error. Writing a 10
+ when the value of sriov_totalvfs is 8 will return an error.
+
+What: /sys/bus/pci/devices/.../driver_override
+Date: April 2014
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ This file allows the driver for a device to be specified which
+ will override standard static and dynamic ID matching. When
+ specified, only a driver with a name matching the value written
+ to driver_override will have an opportunity to bind to the
+ device. The override is specified by writing a string to the
+ driver_override file (echo pci-stub > driver_override) and
+ may be cleared with an empty string (echo > driver_override).
+ This returns the device to standard matching rules binding.
+ Writing to driver_override does not automatically unbind the
+ device from its current driver or make any attempt to
+ automatically load the specified driver. If no driver with a
+ matching name is currently loaded in the kernel, the device
+ will not bind to any driver. This also allows devices to
+ opt-out of driver binding using a driver_override name such as
+ "none". Only a single driver may be specified in the override,
+ there is no support for parsing delimiters.
+
+What: /sys/bus/pci/devices/.../numa_node
+Date: Oct 2014
+Contact: Prarit Bhargava <prarit@redhat.com>
+Description:
+ This file contains the NUMA node to which the PCI device is
+ attached, or -1 if the node is unknown. The initial value
+ comes from an ACPI _PXM method or a similar firmware
+ source. If that is missing or incorrect, this file can be
+ written to override the node. In that case, please report
+ a firmware bug to the system vendor. Writing to this file
+ taints the kernel with TAINT_FIRMWARE_WORKAROUND, which
+ reduces the supportability of your system.
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
new file mode 100644
index 000000000..53d99edd1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-cciss
@@ -0,0 +1,80 @@
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/model
+Date: March 2009
+Kernel Version: 2.6.30
+Contact: iss_storagedev@hp.com
+Description: Displays the SCSI INQUIRY page 0 model for logical drive
+ Y of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/rev
+Date: March 2009
+Kernel Version: 2.6.30
+Contact: iss_storagedev@hp.com
+Description: Displays the SCSI INQUIRY page 0 revision for logical
+ drive Y of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/unique_id
+Date: March 2009
+Kernel Version: 2.6.30
+Contact: iss_storagedev@hp.com
+Description: Displays the SCSI INQUIRY page 83 serial number for logical
+ drive Y of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/vendor
+Date: March 2009
+Kernel Version: 2.6.30
+Contact: iss_storagedev@hp.com
+Description: Displays the SCSI INQUIRY page 0 vendor for logical drive
+ Y of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/block:cciss!cXdY
+Date: March 2009
+Kernel Version: 2.6.30
+Contact: iss_storagedev@hp.com
+Description: A symbolic link to /sys/block/cciss!cXdY
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/rescan
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Kicks of a rescan of the controller to discover logical
+ drive topology changes.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/lunid
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Displays the 8-byte LUN ID used to address logical
+ drive Y of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/raid_level
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Displays the RAID level of logical drive Y of
+ controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/cXdY/usage_count
+Date: August 2009
+Kernel Version: 2.6.31
+Contact: iss_storagedev@hp.com
+Description: Displays the usage count (number of opens) of logical drive Y
+ of controller X.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/resettable
+Date: February 2011
+Kernel Version: 2.6.38
+Contact: iss_storagedev@hp.com
+Description: Value of 1 indicates the controller can honor the reset_devices
+ kernel parameter. Value of 0 indicates reset_devices cannot be
+ honored. This is to allow, for example, kexec tools to be able
+ to warn the user if they designate an unresettable device as
+ a dump device, as kdump requires resetting the device in order
+ to work reliably.
+
+Where: /sys/bus/pci/devices/<dev>/ccissX/transport_mode
+Date: July 2011
+Kernel Version: 3.0
+Contact: iss_storagedev@hp.com
+Description: Value of "simple" indicates that the controller has been placed
+ in "simple mode". Value of "performant" indicates that the
+ controller has been placed in "performant mode".
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd b/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd
new file mode 100644
index 000000000..60c60fa62
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-pci-drivers-ehci_hcd
@@ -0,0 +1,46 @@
+What: /sys/bus/pci/drivers/ehci_hcd/.../companion
+ /sys/bus/usb/devices/usbN/../companion
+Date: January 2007
+KernelVersion: 2.6.21
+Contact: Alan Stern <stern@rowland.harvard.edu>
+Description:
+ PCI-based EHCI USB controllers (i.e., high-speed USB-2.0
+ controllers) are often implemented along with a set of
+ "companion" full/low-speed USB-1.1 controllers. When a
+ high-speed device is plugged in, the connection is routed
+ to the EHCI controller; when a full- or low-speed device
+ is plugged in, the connection is routed to the companion
+ controller.
+
+ Sometimes you want to force a high-speed device to connect
+ at full speed, which can be accomplished by forcing the
+ connection to be routed to the companion controller.
+ That's what this file does. Writing a port number to the
+ file causes connections on that port to be routed to the
+ companion controller, and writing the negative of a port
+ number returns the port to normal operation.
+
+ For example: To force the high-speed device attached to
+ port 4 on bus 2 to run at full speed:
+
+ echo 4 >/sys/bus/usb/devices/usb2/../companion
+
+ To return the port to high-speed operation:
+
+ echo -4 >/sys/bus/usb/devices/usb2/../companion
+
+ Reading the file gives the list of ports currently forced
+ to the companion controller.
+
+ Note: Some EHCI controllers do not have companions; they
+ may contain an internal "transaction translator" or they
+ may be attached directly to a "rate-matching hub". This
+ mechanism will not work with such controllers. Also, it
+ cannot be used to force a port on a high-speed hub to
+ connect at full speed.
+
+ Note: When this file was first added, it appeared in a
+ different sysfs directory. The location given above is
+ correct for 2.6.35 (and probably several earlier kernel
+ versions as well).
+
diff --git a/Documentation/ABI/testing/sysfs-bus-platform b/Documentation/ABI/testing/sysfs-bus-platform
new file mode 100644
index 000000000..5172a6124
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform
@@ -0,0 +1,20 @@
+What: /sys/bus/platform/devices/.../driver_override
+Date: April 2014
+Contact: Kim Phillips <kim.phillips@freescale.com>
+Description:
+ This file allows the driver for a device to be specified which
+ will override standard OF, ACPI, ID table, and name matching.
+ When specified, only a driver with a name matching the value
+ written to driver_override will have an opportunity to bind
+ to the device. The override is specified by writing a string
+ to the driver_override file (echo vfio-platform > \
+ driver_override) and may be cleared with an empty string
+ (echo > driver_override). This returns the device to standard
+ matching rules binding. Writing to driver_override does not
+ automatically unbind the device from its current driver or make
+ any attempt to automatically load the specified driver. If no
+ driver with a matching name is currently loaded in the kernel,
+ the device will not bind to any driver. This also allows
+ devices to opt-out of driver binding using a driver_override
+ name such as "none". Only a single driver may be specified in
+ the override, there is no support for parsing delimiters.
diff --git a/Documentation/ABI/testing/sysfs-bus-rbd b/Documentation/ABI/testing/sysfs-bus-rbd
new file mode 100644
index 000000000..2ddd68092
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-rbd
@@ -0,0 +1,98 @@
+What: /sys/bus/rbd/
+Date: November 2010
+Contact: Yehuda Sadeh <yehuda@newdream.net>,
+ Sage Weil <sage@newdream.net>
+Description:
+
+Being used for adding and removing rbd block devices.
+
+Usage: <mon ip addr> <options> <pool name> <rbd image name> [snap name]
+
+ $ echo "192.168.0.1 name=admin rbd foo" > /sys/bus/rbd/add
+
+The snapshot name can be "-" or omitted to map the image read/write. A <dev-id>
+will be assigned for any registered block device. If snapshot is used, it will
+be mapped read-only.
+
+Removal of a device:
+
+ $ echo <dev-id> > /sys/bus/rbd/remove
+
+What: /sys/bus/rbd/add_single_major
+Date: December 2013
+KernelVersion: 3.14
+Contact: Sage Weil <sage@inktank.com>
+Description: Available only if rbd module is inserted with single_major
+ parameter set to true.
+ Usage is the same as for /sys/bus/rbd/add. If present,
+ should be used instead of the latter: any attempts to use
+ /sys/bus/rbd/add if /sys/bus/rbd/add_single_major is
+ available will fail for backwards compatibility reasons.
+
+What: /sys/bus/rbd/remove_single_major
+Date: December 2013
+KernelVersion: 3.14
+Contact: Sage Weil <sage@inktank.com>
+Description: Available only if rbd module is inserted with single_major
+ parameter set to true.
+ Usage is the same as for /sys/bus/rbd/remove. If present,
+ should be used instead of the latter: any attempts to use
+ /sys/bus/rbd/remove if /sys/bus/rbd/remove_single_major is
+ available will fail for backwards compatibility reasons.
+
+Entries under /sys/bus/rbd/devices/<dev-id>/
+--------------------------------------------
+
+client_id
+
+ The ceph unique client id that was assigned for this specific session.
+
+features
+
+ A hexadecimal encoding of the feature bits for this image.
+
+major
+
+ The block device major number.
+
+minor
+
+ The block device minor number. (December 2013, since 3.14.)
+
+name
+
+ The name of the rbd image.
+
+image_id
+
+ The unique id for the rbd image. (For rbd image format 1
+ this is empty.)
+
+pool
+
+ The name of the storage pool where this rbd image resides.
+ An rbd image name is unique within its pool.
+
+pool_id
+
+ The unique identifier for the rbd image's pool. This is
+ a permanent attribute of the pool. A pool's id will never
+ change.
+
+size
+
+ The size (in bytes) of the mapped block device.
+
+refresh
+
+ Writing to this file will reread the image header data and set
+ all relevant datastructures accordingly.
+
+current_snap
+
+ The current snapshot for which the device is mapped.
+
+parent
+
+ Information identifying the chain of parent images in a layered rbd
+ image. Entries are separated by empty lines.
diff --git a/Documentation/ABI/testing/sysfs-bus-rpmsg b/Documentation/ABI/testing/sysfs-bus-rpmsg
new file mode 100644
index 000000000..189e419a5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-rpmsg
@@ -0,0 +1,75 @@
+What: /sys/bus/rpmsg/devices/.../name
+Date: June 2011
+KernelVersion: 3.3
+Contact: Ohad Ben-Cohen <ohad@wizery.com>
+Description:
+ Every rpmsg device is a communication channel with a remote
+ processor. Channels are identified with a (textual) name,
+ which is maximum 32 bytes long (defined as RPMSG_NAME_SIZE in
+ rpmsg.h).
+
+ This sysfs entry contains the name of this channel.
+
+What: /sys/bus/rpmsg/devices/.../src
+Date: June 2011
+KernelVersion: 3.3
+Contact: Ohad Ben-Cohen <ohad@wizery.com>
+Description:
+ Every rpmsg device is a communication channel with a remote
+ processor. Channels have a local ("source") rpmsg address,
+ and remote ("destination") rpmsg address. When an entity
+ starts listening on one end of a channel, it assigns it with
+ a unique rpmsg address (a 32 bits integer). This way when
+ inbound messages arrive to this address, the rpmsg core
+ dispatches them to the listening entity (a kernel driver).
+
+ This sysfs entry contains the src (local) rpmsg address
+ of this channel. If it contains 0xffffffff, then an address
+ wasn't assigned (can happen if no driver exists for this
+ channel).
+
+What: /sys/bus/rpmsg/devices/.../dst
+Date: June 2011
+KernelVersion: 3.3
+Contact: Ohad Ben-Cohen <ohad@wizery.com>
+Description:
+ Every rpmsg device is a communication channel with a remote
+ processor. Channels have a local ("source") rpmsg address,
+ and remote ("destination") rpmsg address. When an entity
+ starts listening on one end of a channel, it assigns it with
+ a unique rpmsg address (a 32 bits integer). This way when
+ inbound messages arrive to this address, the rpmsg core
+ dispatches them to the listening entity.
+
+ This sysfs entry contains the dst (remote) rpmsg address
+ of this channel. If it contains 0xffffffff, then an address
+ wasn't assigned (can happen if the kernel driver that
+ is attached to this channel is exposing a service to the
+ remote processor. This make it a local rpmsg server,
+ and it is listening for inbound messages that may be sent
+ from any remote rpmsg client; it is not bound to a single
+ remote entity).
+
+What: /sys/bus/rpmsg/devices/.../announce
+Date: June 2011
+KernelVersion: 3.3
+Contact: Ohad Ben-Cohen <ohad@wizery.com>
+Description:
+ Every rpmsg device is a communication channel with a remote
+ processor. Channels are identified by a textual name (see
+ /sys/bus/rpmsg/devices/.../name above) and have a local
+ ("source") rpmsg address, and remote ("destination") rpmsg
+ address.
+
+ A channel is first created when an entity, whether local
+ or remote, starts listening on it for messages (and is thus
+ called an rpmsg server).
+
+ When that happens, a "name service" announcement is sent
+ to the other processor, in order to let it know about the
+ creation of the channel (this way remote clients know they
+ can start sending messages).
+
+ This sysfs entry tells us whether the channel is a local
+ server channel that is announced (values are either
+ true or false).
diff --git a/Documentation/ABI/testing/sysfs-bus-umc b/Documentation/ABI/testing/sysfs-bus-umc
new file mode 100644
index 000000000..948fec412
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-umc
@@ -0,0 +1,28 @@
+What: /sys/bus/umc/
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The Wireless Host Controller Interface (WHCI)
+ specification describes a PCI-based device with
+ multiple capabilities; the UWB Multi-interface
+ Controller (UMC).
+
+ The umc bus presents each of the individual
+ capabilties as a device.
+
+What: /sys/bus/umc/devices/.../capability_id
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The ID of this capability, with 0 being the radio
+ controller capability.
+
+What: /sys/bus/umc/devices/.../version
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The specification version this capability's hardware
+ interface complies with.
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
new file mode 100644
index 000000000..e5cc7633d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -0,0 +1,181 @@
+What: /sys/bus/usb/device/.../authorized
+Date: July 2008
+KernelVersion: 2.6.26
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ Authorized devices are available for use by device
+ drivers, non-authorized one are not. By default, wired
+ USB devices are authorized.
+
+ Certified Wireless USB devices are not authorized
+ initially and should be (by writing 1) after the
+ device has been authenticated.
+
+What: /sys/bus/usb/device/.../wusb_cdid
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ For Certified Wireless USB devices only.
+
+ A devices's CDID, as 16 space-separated hex octets.
+
+What: /sys/bus/usb/device/.../wusb_ck
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ For Certified Wireless USB devices only.
+
+ Write the device's connection key (CK) to start the
+ authentication of the device. The CK is 16
+ space-separated hex octets.
+
+What: /sys/bus/usb/device/.../wusb_disconnect
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ For Certified Wireless USB devices only.
+
+ Write a 1 to force the device to disconnect
+ (equivalent to unplugging a wired USB device).
+
+What: /sys/bus/usb/drivers/.../new_id
+Date: October 2011
+Contact: linux-usb@vger.kernel.org
+Description:
+ Writing a device ID to this file will attempt to
+ dynamically add a new device ID to a USB device driver.
+ This may allow the driver to support more hardware than
+ was included in the driver's static device ID support
+ table at compile time. The format for the device ID is:
+ idVendor idProduct bInterfaceClass RefIdVendor RefIdProduct
+ The vendor ID and device ID fields are required, the
+ rest is optional. The Ref* tuple can be used to tell the
+ driver to use the same driver_data for the new device as
+ it is used for the reference device.
+ Upon successfully adding an ID, the driver will probe
+ for the device and attempt to bind to it. For example:
+ # echo "8086 10f5" > /sys/bus/usb/drivers/foo/new_id
+
+ Here add a new device (0458:7045) using driver_data from
+ an already supported device (0458:704c):
+ # echo "0458 7045 0 0458 704c" > /sys/bus/usb/drivers/foo/new_id
+
+ Reading from this file will list all dynamically added
+ device IDs in the same format, with one entry per
+ line. For example:
+ # cat /sys/bus/usb/drivers/foo/new_id
+ 8086 10f5
+ dead beef 06
+ f00d cafe
+
+ The list will be truncated at PAGE_SIZE bytes due to
+ sysfs restrictions.
+
+What: /sys/bus/usb-serial/drivers/.../new_id
+Date: October 2011
+Contact: linux-usb@vger.kernel.org
+Description:
+ For serial USB drivers, this attribute appears under the
+ extra bus folder "usb-serial" in sysfs; apart from that
+ difference, all descriptions from the entry
+ "/sys/bus/usb/drivers/.../new_id" apply.
+
+What: /sys/bus/usb/drivers/.../remove_id
+Date: November 2009
+Contact: CHENG Renquan <rqcheng@smu.edu.sg>
+Description:
+ Writing a device ID to this file will remove an ID
+ that was dynamically added via the new_id sysfs entry.
+ The format for the device ID is:
+ idVendor idProduct. After successfully
+ removing an ID, the driver will no longer support the
+ device. This is useful to ensure auto probing won't
+ match the driver to the device. For example:
+ # echo "046d c315" > /sys/bus/usb/drivers/foo/remove_id
+
+ Reading from this file will list the dynamically added
+ device IDs, exactly like reading from the entry
+ "/sys/bus/usb/drivers/.../new_id"
+
+What: /sys/bus/usb/devices/.../power/usb2_hardware_lpm
+Date: September 2011
+Contact: Andiry Xu <andiry.xu@amd.com>
+Description:
+ If CONFIG_PM is set and a USB 2.0 lpm-capable device is plugged
+ in to a xHCI host which support link PM, it will perform a LPM
+ test; if the test is passed and host supports USB2 hardware LPM
+ (xHCI 1.0 feature), USB2 hardware LPM will be enabled for the
+ device and the USB device directory will contain a file named
+ power/usb2_hardware_lpm. The file holds a string value (enable
+ or disable) indicating whether or not USB2 hardware LPM is
+ enabled for the device. Developer can write y/Y/1 or n/N/0 to
+ the file to enable/disable the feature.
+
+What: /sys/bus/usb/devices/.../removable
+Date: February 2012
+Contact: Matthew Garrett <mjg@redhat.com>
+Description:
+ Some information about whether a given USB device is
+ physically fixed to the platform can be inferred from a
+ combination of hub descriptor bits and platform-specific data
+ such as ACPI. This file will read either "removable" or
+ "fixed" if the information is available, and "unknown"
+ otherwise.
+
+What: /sys/bus/usb/devices/.../ltm_capable
+Date: July 2012
+Contact: Sarah Sharp <sarah.a.sharp@linux.intel.com>
+Description:
+ USB 3.0 devices may optionally support Latency Tolerance
+ Messaging (LTM). They indicate their support by setting a bit
+ in the bmAttributes field of their SuperSpeed BOS descriptors.
+ If that bit is set for the device, ltm_capable will read "yes".
+ If the device doesn't support LTM, the file will read "no".
+ The file will be present for all speeds of USB devices, and will
+ always read "no" for USB 1.1 and USB 2.0 devices.
+
+What: /sys/bus/usb/devices/.../(hub interface)/portX
+Date: August 2012
+Contact: Lan Tianyu <tianyu.lan@intel.com>
+Description:
+ The /sys/bus/usb/devices/.../(hub interface)/portX
+ is usb port device's sysfs directory.
+
+What: /sys/bus/usb/devices/.../(hub interface)/portX/connect_type
+Date: January 2013
+Contact: Lan Tianyu <tianyu.lan@intel.com>
+Description:
+ Some platforms provide usb port connect types through ACPI.
+ This attribute is to expose these information to user space.
+ The file will read "hotplug", "wired" and "not used" if the
+ information is available, and "unknown" otherwise.
+
+What: /sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout
+Date: May 2013
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ USB 2.0 devices may support hardware link power management (LPM)
+ L1 sleep state. The usb2_lpm_l1_timeout attribute allows
+ tuning the timeout for L1 inactivity timer (LPM timer), e.g.
+ needed inactivity time before host requests the device to go to L1 sleep.
+ Useful for power management tuning.
+ Supported values are 0 - 65535 microseconds.
+
+What: /sys/bus/usb/devices/.../power/usb2_lpm_besl
+Date: May 2013
+Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
+Description:
+ USB 2.0 devices that support hardware link power management (LPM)
+ L1 sleep state now use a best effort service latency value (BESL) to
+ indicate the best effort to resumption of service to the device after the
+ initiation of the resume event.
+ If the device does not have a preferred besl value then the host can select
+ one instead. This usb2_lpm_besl attribute allows to tune the host selected besl
+ value in order to tune power saving and service latency.
+
+ Supported values are 0 - 15.
+ More information on how besl values map to microseconds can be found in
+ USB 2.0 ECN Errata for Link Power Management, section 4.10)
diff --git a/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
new file mode 100644
index 000000000..70d00dfa4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-usb-devices-usbsevseg
@@ -0,0 +1,43 @@
+Where: /sys/bus/usb/.../powered
+Date: August 2008
+Kernel Version: 2.6.26
+Contact: Harrison Metzger <harrisonmetz@gmail.com>
+Description: Controls whether the device's display will powered.
+ A value of 0 is off and a non-zero value is on.
+
+Where: /sys/bus/usb/.../mode_msb
+Where: /sys/bus/usb/.../mode_lsb
+Date: August 2008
+Kernel Version: 2.6.26
+Contact: Harrison Metzger <harrisonmetz@gmail.com>
+Description: Controls the devices display mode.
+ For a 6 character display the values are
+ MSB 0x06; LSB 0x3F, and
+ for an 8 character display the values are
+ MSB 0x08; LSB 0xFF.
+
+Where: /sys/bus/usb/.../textmode
+Date: August 2008
+Kernel Version: 2.6.26
+Contact: Harrison Metzger <harrisonmetz@gmail.com>
+Description: Controls the way the device interprets its text buffer.
+ raw: each character controls its segment manually
+ hex: each character is between 0-15
+ ascii: each character is between '0'-'9' and 'A'-'F'.
+
+Where: /sys/bus/usb/.../text
+Date: August 2008
+Kernel Version: 2.6.26
+Contact: Harrison Metzger <harrisonmetz@gmail.com>
+Description: The text (or data) for the device to display
+
+Where: /sys/bus/usb/.../decimals
+Date: August 2008
+Kernel Version: 2.6.26
+Contact: Harrison Metzger <harrisonmetz@gmail.com>
+Description: Controls the decimal places on the device.
+ To set the nth decimal place, give this field
+ the value of 10 ** n. Assume this field has
+ the value k and has 1 or more decimal places set,
+ to set the mth place (where m is not already set),
+ change this fields value to k + 10 ** m.
diff --git a/Documentation/ABI/testing/sysfs-bus-usb-lvstest b/Documentation/ABI/testing/sysfs-bus-usb-lvstest
new file mode 100644
index 000000000..aae68fc2d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-usb-lvstest
@@ -0,0 +1,47 @@
+Link Layer Validation Device is a standard device for testing of Super
+Speed Link Layer tests. These nodes are available in sysfs only when lvs
+driver is bound with root hub device.
+
+What: /sys/bus/usb/devices/.../get_dev_desc
+Date: March 2014
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+ Write to this node to issue "Get Device Descriptor"
+ for Link Layer Validation device. It is needed for TD.7.06.
+
+What: /sys/bus/usb/devices/.../u1_timeout
+Date: March 2014
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+ Set "U1 timeout" for the downstream port where Link Layer
+ Validation device is connected. Timeout value must be between 0
+ and 127. It is needed for TD.7.18, TD.7.19, TD.7.20 and TD.7.21.
+
+What: /sys/bus/usb/devices/.../u2_timeout
+Date: March 2014
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+ Set "U2 timeout" for the downstream port where Link Layer
+ Validation device is connected. Timeout value must be between 0
+ and 127. It is needed for TD.7.18, TD.7.19, TD.7.20 and TD.7.21.
+
+What: /sys/bus/usb/devices/.../hot_reset
+Date: March 2014
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+ Write to this node to issue "Reset" for Link Layer Validation
+ device. It is needed for TD.7.29, TD.7.31, TD.7.34 and TD.7.35.
+
+What: /sys/bus/usb/devices/.../u3_entry
+Date: March 2014
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+ Write to this node to issue "U3 entry" for Link Layer
+ Validation device. It is needed for TD.7.35 and TD.7.36.
+
+What: /sys/bus/usb/devices/.../u3_exit
+Date: March 2014
+Contact: Pratyush Anand <pratyush.anand@st.com>
+Description:
+ Write to this node to issue "U3 exit" for Link Layer
+ Validation device. It is needed for TD.7.36.
diff --git a/Documentation/ABI/testing/sysfs-c2port b/Documentation/ABI/testing/sysfs-c2port
new file mode 100644
index 000000000..716cffc45
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-c2port
@@ -0,0 +1,88 @@
+What: /sys/class/c2port/
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/ directory will contain files and
+ directories that will provide a unified interface to
+ the C2 port interface.
+
+What: /sys/class/c2port/c2portX
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/ directory is related to X-th
+ C2 port into the system. Each directory will contain files to
+ manage and control its C2 port.
+
+What: /sys/class/c2port/c2portX/access
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/access file enable the access
+ to the C2 port from the system. No commands can be sent
+ till this entry is set to 0.
+
+What: /sys/class/c2port/c2portX/dev_id
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/dev_id file show the device ID
+ of the connected micro.
+
+What: /sys/class/c2port/c2portX/flash_access
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/flash_access file enable the
+ access to the on-board flash of the connected micro.
+ No commands can be sent till this entry is set to 0.
+
+What: /sys/class/c2port/c2portX/flash_block_size
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/flash_block_size file show
+ the on-board flash block size of the connected micro.
+
+What: /sys/class/c2port/c2portX/flash_blocks_num
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/flash_blocks_num file show
+ the on-board flash blocks number of the connected micro.
+
+What: /sys/class/c2port/c2portX/flash_data
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/flash_data file export
+ the content of the on-board flash of the connected micro.
+
+What: /sys/class/c2port/c2portX/flash_erase
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/flash_erase file execute
+ the "erase" command on the on-board flash of the connected
+ micro.
+
+What: /sys/class/c2port/c2portX/flash_erase
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/flash_erase file show the
+ on-board flash size of the connected micro.
+
+What: /sys/class/c2port/c2portX/reset
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/reset file execute a "reset"
+ command on the connected micro.
+
+What: /sys/class/c2port/c2portX/rev_id
+Date: October 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/c2port/c2portX/rev_id file show the revision ID
+ of the connected micro.
diff --git a/Documentation/ABI/testing/sysfs-cfq-target-latency b/Documentation/ABI/testing/sysfs-cfq-target-latency
new file mode 100644
index 000000000..df0f7828c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-cfq-target-latency
@@ -0,0 +1,8 @@
+What: /sys/block/<device>/iosched/target_latency
+Date: March 2012
+contact: Tao Ma <boyu.mt@taobao.com>
+Description:
+ The /sys/block/<device>/iosched/target_latency only exists
+ when the user sets cfq to /sys/block/<device>/scheduler.
+ It contains an estimated latency time for the cfq. cfq will
+ use it to calculate the time slice used for every task.
diff --git a/Documentation/ABI/testing/sysfs-class b/Documentation/ABI/testing/sysfs-class
new file mode 100644
index 000000000..676530fcf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class
@@ -0,0 +1,16 @@
+What: /sys/class/
+Date: Febuary 2006
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ The /sys/class directory will consist of a group of
+ subdirectories describing individual classes of devices
+ in the kernel. The individual directories will consist
+ of either subdirectories, or symlinks to other
+ directories.
+
+ All programs that use this directory tree must be able
+ to handle both subdirectories or symlinks in order to
+ work properly.
+
+Users:
+ udev <linux-hotplug-devel@lists.sourceforge.net>
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870 b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
new file mode 100644
index 000000000..33e648808
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-adp8870
@@ -0,0 +1,56 @@
+What: /sys/class/backlight/<backlight>/<ambient light zone>_max
+What: /sys/class/backlight/<backlight>/l1_daylight_max
+What: /sys/class/backlight/<backlight>/l2_bright_max
+What: /sys/class/backlight/<backlight>/l3_office_max
+What: /sys/class/backlight/<backlight>/l4_indoor_max
+What: /sys/class/backlight/<backlight>/l5_dark_max
+Date: May 2011
+KernelVersion: 3.0
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the maximum brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127. This file
+ will also show the brightness level stored for this
+ <ambient light zone>.
+
+What: /sys/class/backlight/<backlight>/<ambient light zone>_dim
+What: /sys/class/backlight/<backlight>/l2_bright_dim
+What: /sys/class/backlight/<backlight>/l3_office_dim
+What: /sys/class/backlight/<backlight>/l4_indoor_dim
+What: /sys/class/backlight/<backlight>/l5_dark_dim
+Date: May 2011
+KernelVersion: 3.0
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Control the dim brightness for <ambient light zone>
+ on this <backlight>. Values are between 0 and 127, typically
+ set to 0. Full off when the backlight is disabled.
+ This file will also show the dim brightness level stored for
+ this <ambient light zone>.
+
+What: /sys/class/backlight/<backlight>/ambient_light_level
+Date: May 2011
+KernelVersion: 3.0
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Get conversion value of the light sensor.
+ This value is updated every 80 ms (when the light sensor
+ is enabled). Returns integer between 0 (dark) and
+ 8000 (max ambient brightness)
+
+What: /sys/class/backlight/<backlight>/ambient_light_zone
+Date: May 2011
+KernelVersion: 3.0
+Contact: device-drivers-devel@blackfin.uclinux.org
+Description:
+ Get/Set current ambient light zone. Reading returns
+ integer between 1..5 (1 = daylight, 2 = bright, ..., 5 = dark).
+ Writing a value between 1..5 forces the backlight controller
+ to enter the corresponding ambient light zone.
+ Writing 0 returns to normal/automatic ambient light level
+ operation. The ambient light sensing feature on these devices
+ is an extension to the API documented in
+ Documentation/ABI/stable/sysfs-class-backlight.
+ It can be enabled by writing the value stored in
+ /sys/class/backlight/<backlight>/max_brightness to
+ /sys/class/backlight/<backlight>/brightness.
diff --git a/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
new file mode 100644
index 000000000..77cf7ac94
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-backlight-driver-lm3533
@@ -0,0 +1,48 @@
+What: /sys/class/backlight/<backlight>/als_channel
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Get the ALS output channel used as input in
+ ALS-current-control mode (0, 1), where
+
+ 0 - out_current0 (backlight 0)
+ 1 - out_current1 (backlight 1)
+
+What: /sys/class/backlight/<backlight>/als_en
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Enable ALS-current-control mode (0, 1).
+
+What: /sys/class/backlight/<backlight>/id
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Get the id of this backlight (0, 1).
+
+What: /sys/class/backlight/<backlight>/linear
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the brightness-mapping mode (0, 1), where
+
+ 0 - exponential mode
+ 1 - linear mode
+
+What: /sys/class/backlight/<backlight>/pwm
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the PWM-input control mask (5 bits), where
+
+ bit 5 - PWM-input enabled in Zone 4
+ bit 4 - PWM-input enabled in Zone 3
+ bit 3 - PWM-input enabled in Zone 2
+ bit 2 - PWM-input enabled in Zone 1
+ bit 1 - PWM-input enabled in Zone 0
+ bit 0 - PWM-input enabled
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
new file mode 100644
index 000000000..d773d5697
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -0,0 +1,55 @@
+What: /sys/class/bdi/<bdi>/
+Date: January 2008
+Contact: Peter Zijlstra <a.p.zijlstra@chello.nl>
+Description:
+
+Provide a place in sysfs for the backing_dev_info object. This allows
+setting and retrieving various BDI specific variables.
+
+The <bdi> identifier can be either of the following:
+
+MAJOR:MINOR
+
+ Device number for block devices, or value of st_dev on
+ non-block filesystems which provide their own BDI, such as NFS
+ and FUSE.
+
+MAJOR:MINOR-fuseblk
+
+ Value of st_dev on fuseblk filesystems.
+
+default
+
+ The default backing dev, used for non-block device backed
+ filesystems which do not provide their own BDI.
+
+Files under /sys/class/bdi/<bdi>/
+---------------------------------
+
+read_ahead_kb (read-write)
+
+ Size of the read-ahead window in kilobytes
+
+min_ratio (read-write)
+
+ Under normal circumstances each device is given a part of the
+ total write-back cache that relates to its current average
+ writeout speed in relation to the other devices.
+
+ The 'min_ratio' parameter allows assigning a minimum
+ percentage of the write-back cache to a particular device.
+ For example, this is useful for providing a minimum QoS.
+
+max_ratio (read-write)
+
+ Allows limiting a particular device to use not more than the
+ given percentage of the write-back cache. This is useful in
+ situations where we want to avoid one device taking all or
+ most of the write-back cache. For example in case of an NFS
+ mount that is prone to get stuck, or a FUSE mount which cannot
+ be trusted to play fair.
+
+stable_pages_required (read-only)
+
+ If set, the backing device requires that all pages comprising a write
+ request must not be changed until writeout is complete.
diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl
new file mode 100644
index 000000000..d46bba801
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-cxl
@@ -0,0 +1,192 @@
+Note: Attributes that are shared between devices are stored in the directory
+pointed to by the symlink device/.
+Example: The real path of the attribute /sys/class/cxl/afu0.0s/irqs_max is
+/sys/class/cxl/afu0.0s/device/irqs_max, i.e. /sys/class/cxl/afu0.0/irqs_max.
+
+
+Slave contexts (eg. /sys/class/cxl/afu0.0s):
+
+What: /sys/class/cxl/<afu>/irqs_max
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Decimal value of maximum number of interrupts that can be
+ requested by userspace. The default on probe is the maximum
+ that hardware can support (eg. 2037). Write values will limit
+ userspace applications to that many userspace interrupts. Must
+ be >= irqs_min.
+
+What: /sys/class/cxl/<afu>/irqs_min
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the minimum number of interrupts that
+ userspace must request on a CXL_START_WORK ioctl. Userspace may
+ omit the num_interrupts field in the START_WORK IOCTL to get
+ this minimum automatically.
+
+What: /sys/class/cxl/<afu>/mmio_size
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the size of the MMIO space that may be mmaped
+ by userspace.
+
+What: /sys/class/cxl/<afu>/modes_supported
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ List of the modes this AFU supports. One per line.
+ Valid entries are: "dedicated_process" and "afu_directed"
+
+What: /sys/class/cxl/<afu>/mode
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ The current mode the AFU is using. Will be one of the modes
+ given in modes_supported. Writing will change the mode
+ provided that no user contexts are attached.
+
+
+What: /sys/class/cxl/<afu>/prefault_mode
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Set the mode for prefaulting in segments into the segment table
+ when performing the START_WORK ioctl. Possible values:
+ none: No prefaulting (default)
+ work_element_descriptor: Treat the work element
+ descriptor as an effective address and
+ prefault what it points to.
+ all: all segments process calling START_WORK maps.
+
+What: /sys/class/cxl/<afu>/reset
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: write only
+ Writing 1 here will reset the AFU provided there are not
+ contexts active on the AFU.
+
+What: /sys/class/cxl/<afu>/api_version
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the current version of the kernel/user API.
+
+What: /sys/class/cxl/<afu>/api_version_compatible
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the the lowest version of the userspace API
+ this this kernel supports.
+
+
+AFU configuration records (eg. /sys/class/cxl/afu0.0/cr0):
+
+An AFU may optionally export one or more PCIe like configuration records, known
+as AFU configuration records, which will show up here (if present).
+
+What: /sys/class/cxl/<afu>/cr<config num>/vendor
+Date: February 2015
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Hexadecimal value of the vendor ID found in this AFU
+ configuration record.
+
+What: /sys/class/cxl/<afu>/cr<config num>/device
+Date: February 2015
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Hexadecimal value of the device ID found in this AFU
+ configuration record.
+
+What: /sys/class/cxl/<afu>/cr<config num>/class
+Date: February 2015
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Hexadecimal value of the class code found in this AFU
+ configuration record.
+
+What: /sys/class/cxl/<afu>/cr<config num>/config
+Date: February 2015
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ This binary file provides raw access to the AFU configuration
+ record. The format is expected to match the either the standard
+ or extended configuration space defined by the PCIe
+ specification.
+
+
+
+Master contexts (eg. /sys/class/cxl/afu0.0m)
+
+What: /sys/class/cxl/<afu>m/mmio_size
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the size of the MMIO space that may be mmaped
+ by userspace. This includes all slave contexts space also.
+
+What: /sys/class/cxl/<afu>m/pp_mmio_len
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the Per Process MMIO space length.
+
+What: /sys/class/cxl/<afu>m/pp_mmio_off
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Decimal value of the Per Process MMIO space offset.
+
+
+Card info (eg. /sys/class/cxl/card0)
+
+What: /sys/class/cxl/<card>/caia_version
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Identifies the CAIA Version the card implements.
+
+What: /sys/class/cxl/<card>/psl_revision
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Identifies the revision level of the PSL.
+
+What: /sys/class/cxl/<card>/base_image
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Identifies the revision level of the base image for devices
+ that support loadable PSLs. For FPGAs this field identifies
+ the image contained in the on-adapter flash which is loaded
+ during the initial program load.
+
+What: /sys/class/cxl/<card>/image_loaded
+Date: September 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read only
+ Will return "user" or "factory" depending on the image loaded
+ onto the card.
+
+What: /sys/class/cxl/<card>/load_image_on_perst
+Date: December 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: read/write
+ Valid entries are "none", "user", and "factory".
+ "none" means PERST will not cause image to be loaded to the
+ card. A power cycle is required to load the image.
+ "none" could be useful for debugging because the trace arrays
+ are preserved.
+ "user" and "factory" means PERST will cause either the user or
+ user or factory image to be loaded.
+ Default is to reload on PERST whichever image the card has
+ loaded.
+
+What: /sys/class/cxl/<card>/reset
+Date: October 2014
+Contact: linuxppc-dev@lists.ozlabs.org
+Description: write only
+ Writing 1 will issue a PERST to card which may cause the card
+ to reload the FPGA depending on load_image_on_perst.
diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq
new file mode 100644
index 000000000..ee39acacf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-devfreq
@@ -0,0 +1,100 @@
+What: /sys/class/devfreq/.../
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ Provide a place in sysfs for the devfreq objects.
+ This allows accessing various devfreq specific variables.
+ The name of devfreq object denoted as ... is same as the
+ name of device using devfreq.
+
+What: /sys/class/devfreq/.../governor
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../governor show or set the name of the
+ governor used by the corresponding devfreq object.
+
+What: /sys/class/devfreq/.../cur_freq
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../cur_freq shows the current
+ frequency of the corresponding devfreq object. Same as
+ target_freq when get_cur_freq() is not implemented by
+ devfreq driver.
+
+What: /sys/class/devfreq/.../target_freq
+Date: September 2012
+Contact: Rajagopal Venkat <rajagopal.venkat@linaro.org>
+Description:
+ The /sys/class/devfreq/.../target_freq shows the next governor
+ predicted target frequency of the corresponding devfreq object.
+
+What: /sys/class/devfreq/.../polling_interval
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../polling_interval shows and sets
+ the requested polling interval of the corresponding devfreq
+ object. The values are represented in ms. If the value is
+ less than 1 jiffy, it is considered to be 0, which means
+ no polling. This value is meaningless if the governor is
+ not polling; thus. If the governor is not using
+ devfreq-provided central polling
+ (/sys/class/devfreq/.../central_polling is 0), this value
+ may be useless.
+
+What: /sys/class/devfreq/.../trans_stat
+Date: October 2012
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Descrtiption:
+ This ABI shows the statistics of devfreq behavior on a
+ specific device. It shows the time spent in each state and
+ the number of transitions between states.
+ In order to activate this ABI, the devfreq target device
+ driver should provide the list of available frequencies
+ with its profile.
+
+What: /sys/class/devfreq/.../userspace/set_freq
+Date: September 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../userspace/set_freq shows and
+ sets the requested frequency for the devfreq object if
+ userspace governor is in effect.
+
+What: /sys/class/devfreq/.../available_frequencies
+Date: October 2012
+Contact: Nishanth Menon <nm@ti.com>
+Description:
+ The /sys/class/devfreq/.../available_frequencies shows
+ the available frequencies of the corresponding devfreq object.
+ This is a snapshot of available frequencies and not limited
+ by the min/max frequency restrictions.
+
+What: /sys/class/devfreq/.../available_governors
+Date: October 2012
+Contact: Nishanth Menon <nm@ti.com>
+Description:
+ The /sys/class/devfreq/.../available_governors shows
+ currently available governors in the system.
+
+What: /sys/class/devfreq/.../min_freq
+Date: January 2013
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../min_freq shows and stores
+ the minimum frequency requested by users. It is 0 if
+ the user does not care. min_freq overrides the
+ frequency requested by governors.
+
+What: /sys/class/devfreq/.../max_freq
+Date: January 2013
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/devfreq/.../max_freq shows and stores
+ the maximum frequency requested by users. It is 0 if
+ the user does not care. max_freq overrides the
+ frequency requested by governors and min_freq.
+ The max_freq overrides min_freq because max_freq may be
+ used to throttle devices to avoid overheating.
diff --git a/Documentation/ABI/testing/sysfs-class-extcon b/Documentation/ABI/testing/sysfs-class-extcon
new file mode 100644
index 000000000..57a726232
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-extcon
@@ -0,0 +1,97 @@
+What: /sys/class/extcon/.../
+Date: February 2012
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ Provide a place in sysfs for the extcon objects.
+ This allows accessing extcon specific variables.
+ The name of extcon object denoted as ... is the name given
+ with extcon_dev_register.
+
+ One extcon device denotes a single external connector
+ port. An external connector may have multiple cables
+ attached simultaneously. Many of docks, cradles, and
+ accessory cables have such capability. For example,
+ the 30-pin port of Nuri board (/arch/arm/mach-exynos)
+ may have both HDMI and Charger attached, or analog audio,
+ video, and USB cables attached simultaneously.
+
+ If there are cables mutually exclusive with each other,
+ such binary relations may be expressed with extcon_dev's
+ mutually_exclusive array.
+
+What: /sys/class/extcon/.../name
+Date: February 2012
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/extcon/.../name shows the name of the extcon
+ object. If the extcon object has an optional callback
+ "show_name" defined, the callback will provide the name with
+ this sysfs node.
+
+What: /sys/class/extcon/.../state
+Date: February 2012
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/extcon/.../state shows and stores the cable
+ attach/detach information of the corresponding extcon object.
+ If the extcon object has an optional callback "show_state"
+ defined, the showing function is overridden with the optional
+ callback.
+
+ If the default callback for showing function is used, the
+ format is like this:
+ # cat state
+ USB_OTG=1
+ HDMI=0
+ TA=1
+ EAR_JACK=0
+ #
+ In this example, the extcon device has USB_OTG and TA
+ cables attached and HDMI and EAR_JACK cables detached.
+
+ In order to update the state of an extcon device, enter a hex
+ state number starting with 0x:
+ # echo 0xHEX > state
+
+ This updates the whole state of the extcon device.
+ Inputs of all the methods are required to meet the
+ mutually_exclusive conditions if they exist.
+
+ It is recommended to use this "global" state interface if
+ you need to set the value atomically. The later state
+ interface associated with each cable cannot update
+ multiple cable states of an extcon device simultaneously.
+
+What: /sys/class/extcon/.../cable.x/name
+Date: February 2012
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/extcon/.../cable.x/name shows the name of cable
+ "x" (integer between 0 and 31) of an extcon device.
+
+What: /sys/class/extcon/.../cable.x/state
+Date: February 2012
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ The /sys/class/extcon/.../cable.x/state shows and stores the
+ state of cable "x" (integer between 0 and 31) of an extcon
+ device. The state value is either 0 (detached) or 1
+ (attached).
+
+What: /sys/class/extcon/.../mutually_exclusive/...
+Date: December 2011
+Contact: MyungJoo Ham <myungjoo.ham@samsung.com>
+Description:
+ Shows the relations of mutually exclusiveness. For example,
+ if the mutually_exclusive array of extcon device is
+ {0x3, 0x5, 0xC, 0x0}, then the output is:
+ # ls mutually_exclusive/
+ 0x3
+ 0x5
+ 0xc
+ #
+
+ Note that mutually_exclusive is a sub-directory of the extcon
+ device and the file names under the mutually_exclusive
+ directory show the mutually-exclusive sets, not the contents
+ of the files.
diff --git a/Documentation/ABI/testing/sysfs-class-iommu b/Documentation/ABI/testing/sysfs-class-iommu
new file mode 100644
index 000000000..6d0a1b4be
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-iommu
@@ -0,0 +1,17 @@
+What: /sys/class/iommu/<iommu>/devices/
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ IOMMU drivers are able to link devices managed by a
+ given IOMMU here to allow association of IOMMU to
+ device.
+
+What: /sys/devices/.../iommu
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ IOMMU drivers are able to link the IOMMU for a
+ given device here to allow association of device to
+ IOMMU.
diff --git a/Documentation/ABI/testing/sysfs-class-iommu-amd-iommu b/Documentation/ABI/testing/sysfs-class-iommu-amd-iommu
new file mode 100644
index 000000000..d6ba8e8a4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-iommu-amd-iommu
@@ -0,0 +1,14 @@
+What: /sys/class/iommu/<iommu>/amd-iommu/cap
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ IOMMU capability header as documented in the AMD IOMMU
+ specification. Format: %x
+
+What: /sys/class/iommu/<iommu>/amd-iommu/features
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ Extended features of the IOMMU. Format: %llx
diff --git a/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu b/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu
new file mode 100644
index 000000000..258cc246d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-iommu-intel-iommu
@@ -0,0 +1,32 @@
+What: /sys/class/iommu/<iommu>/intel-iommu/address
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ Physical address of the VT-d DRHD for this IOMMU.
+ Format: %llx. This allows association of a sysfs
+ intel-iommu with a DMAR DRHD table entry.
+
+What: /sys/class/iommu/<iommu>/intel-iommu/cap
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ The cached hardware capability register value
+ of this DRHD unit. Format: %llx.
+
+What: /sys/class/iommu/<iommu>/intel-iommu/ecap
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ The cached hardware extended capability register
+ value of this DRHD unit. Format: %llx.
+
+What: /sys/class/iommu/<iommu>/intel-iommu/version
+Date: June 2014
+KernelVersion: 3.17
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description:
+ The architecture version as reported from the
+ VT-d VER_REG. Format: %d:%d, major:minor
diff --git a/Documentation/ABI/testing/sysfs-class-lcd b/Documentation/ABI/testing/sysfs-class-lcd
new file mode 100644
index 000000000..35906bf7a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-lcd
@@ -0,0 +1,23 @@
+What: /sys/class/lcd/<lcd>/lcd_power
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Control LCD power, values are FB_BLANK_* from fb.h
+ - FB_BLANK_UNBLANK (0) : power on.
+ - FB_BLANK_POWERDOWN (4) : power off
+
+What: /sys/class/lcd/<lcd>/contrast
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Current contrast of this LCD device. Value is between 0 and
+ /sys/class/lcd/<lcd>/max_contrast.
+
+What: /sys/class/lcd/<lcd>/max_contrast
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Maximum contrast for this LCD device.
diff --git a/Documentation/ABI/testing/sysfs-class-led b/Documentation/ABI/testing/sysfs-class-led
new file mode 100644
index 000000000..3646ec85d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led
@@ -0,0 +1,37 @@
+What: /sys/class/leds/<led>/brightness
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Set the brightness of the LED. Most LEDs don't
+ have hardware brightness support so will just be turned on for
+ non-zero brightness settings. The value is between 0 and
+ /sys/class/leds/<led>/max_brightness.
+
+What: /sys/class/leds/<led>/max_brightness
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Maximum brightness level for this led, default is 255 (LED_FULL).
+
+What: /sys/class/leds/<led>/trigger
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Set the trigger for this LED. A trigger is a kernel based source
+ of led events.
+ You can change triggers in a similar manner to the way an IO
+ scheduler is chosen. Trigger specific parameters can appear in
+ /sys/class/leds/<led> once a given trigger is selected.
+
+What: /sys/class/leds/<led>/inverted
+Date: January 2011
+KernelVersion: 2.6.38
+Contact: Richard Purdie <rpurdie@rpsys.net>
+Description:
+ Invert the LED on/off state. This parameter is specific to
+ gpio and backlight triggers. In case of the backlight trigger,
+ it is useful when driving a LED which is intended to indicate
+ a device in a standby like state.
diff --git a/Documentation/ABI/testing/sysfs-class-led-driver-lm3533 b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
new file mode 100644
index 000000000..620ebb3b9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-driver-lm3533
@@ -0,0 +1,65 @@
+What: /sys/class/leds/<led>/als_channel
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the ALS output channel to use as input in
+ ALS-current-control mode (1, 2), where
+
+ 1 - out_current1
+ 2 - out_current2
+
+What: /sys/class/leds/<led>/als_en
+Date: May 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Enable ALS-current-control mode (0, 1).
+
+What: /sys/class/leds/<led>/falltime
+What: /sys/class/leds/<led>/risetime
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the pattern generator fall and rise times (0..7), where
+
+ 0 - 2048 us
+ 1 - 262 ms
+ 2 - 524 ms
+ 3 - 1.049 s
+ 4 - 2.097 s
+ 5 - 4.194 s
+ 6 - 8.389 s
+ 7 - 16.78 s
+
+What: /sys/class/leds/<led>/id
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Get the id of this led (0..3).
+
+What: /sys/class/leds/<led>/linear
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the brightness-mapping mode (0, 1), where
+
+ 0 - exponential mode
+ 1 - linear mode
+
+What: /sys/class/leds/<led>/pwm
+Date: April 2012
+KernelVersion: 3.5
+Contact: Johan Hovold <jhovold@gmail.com>
+Description:
+ Set the PWM-input control mask (5 bits), where
+
+ bit 5 - PWM-input enabled in Zone 4
+ bit 4 - PWM-input enabled in Zone 3
+ bit 3 - PWM-input enabled in Zone 2
+ bit 2 - PWM-input enabled in Zone 1
+ bit 1 - PWM-input enabled in Zone 0
+ bit 0 - PWM-input enabled
diff --git a/Documentation/ABI/testing/sysfs-class-led-flash b/Documentation/ABI/testing/sysfs-class-led-flash
new file mode 100644
index 000000000..220a0270b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-led-flash
@@ -0,0 +1,80 @@
+What: /sys/class/leds/<led>/flash_brightness
+Date: March 2015
+KernelVersion: 4.0
+Contact: Jacek Anaszewski <j.anaszewski@samsung.com>
+Description: read/write
+ Set the brightness of this LED in the flash strobe mode, in
+ microamperes. The file is created only for the flash LED devices
+ that support setting flash brightness.
+
+ The value is between 0 and
+ /sys/class/leds/<led>/max_flash_brightness.
+
+What: /sys/class/leds/<led>/max_flash_brightness
+Date: March 2015
+KernelVersion: 4.0
+Contact: Jacek Anaszewski <j.anaszewski@samsung.com>
+Description: read only
+ Maximum brightness level for this LED in the flash strobe mode,
+ in microamperes.
+
+What: /sys/class/leds/<led>/flash_timeout
+Date: March 2015
+KernelVersion: 4.0
+Contact: Jacek Anaszewski <j.anaszewski@samsung.com>
+Description: read/write
+ Hardware timeout for flash, in microseconds. The flash strobe
+ is stopped after this period of time has passed from the start
+ of the strobe. The file is created only for the flash LED
+ devices that support setting flash timeout.
+
+What: /sys/class/leds/<led>/max_flash_timeout
+Date: March 2015
+KernelVersion: 4.0
+Contact: Jacek Anaszewski <j.anaszewski@samsung.com>
+Description: read only
+ Maximum flash timeout for this LED, in microseconds.
+
+What: /sys/class/leds/<led>/flash_strobe
+Date: March 2015
+KernelVersion: 4.0
+Contact: Jacek Anaszewski <j.anaszewski@samsung.com>
+Description: read/write
+ Flash strobe state. When written with 1 it triggers flash strobe
+ and when written with 0 it turns the flash off.
+
+ On read 1 means that flash is currently strobing and 0 means
+ that flash is off.
+
+What: /sys/class/leds/<led>/flash_fault
+Date: March 2015
+KernelVersion: 4.0
+Contact: Jacek Anaszewski <j.anaszewski@samsung.com>
+Description: read only
+ Space separated list of flash faults that may have occurred.
+ Flash faults are re-read after strobing the flash. Possible
+ flash faults:
+
+ * led-over-voltage - flash controller voltage to the flash LED
+ has exceeded the limit specific to the flash controller
+ * flash-timeout-exceeded - the flash strobe was still on when
+ the timeout set by the user has expired; not all flash
+ controllers may set this in all such conditions
+ * controller-over-temperature - the flash controller has
+ overheated
+ * controller-short-circuit - the short circuit protection
+ of the flash controller has been triggered
+ * led-power-supply-over-current - current in the LED power
+ supply has exceeded the limit specific to the flash
+ controller
+ * indicator-led-fault - the flash controller has detected
+ a short or open circuit condition on the indicator LED
+ * led-under-voltage - flash controller voltage to the flash
+ LED has been below the minimum limit specific to
+ the flash
+ * controller-under-voltage - the input voltage of the flash
+ controller is below the limit under which strobing the
+ flash at full current will not be possible;
+ the condition persists until this flag is no longer set
+ * led-over-temperature - the temperature of the LED has exceeded
+ its allowed upper limit
diff --git a/Documentation/ABI/testing/sysfs-class-leds-gt683r b/Documentation/ABI/testing/sysfs-class-leds-gt683r
new file mode 100644
index 000000000..e4fae6026
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-leds-gt683r
@@ -0,0 +1,16 @@
+What: /sys/class/leds/<led>/gt683r/mode
+Date: Jun 2014
+KernelVersion: 3.17
+Contact: Janne Kanniainen <janne.kanniainen@gmail.com>
+Description:
+ Set the mode of LEDs. You should notice that changing the mode
+ of one LED will update the mode of its two sibling devices as
+ well.
+
+ 0 - normal
+ 1 - audio
+ 2 - breathing
+
+ Normal: LEDs are fully on when enabled
+ Audio: LEDs brightness depends on sound level
+ Breathing: LEDs brightness varies at human breathing rate \ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-class-mei b/Documentation/ABI/testing/sysfs-class-mei
new file mode 100644
index 000000000..80d9888a8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mei
@@ -0,0 +1,31 @@
+What: /sys/class/mei/
+Date: May 2014
+KernelVersion: 3.17
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description:
+ The mei/ class sub-directory belongs to mei device class
+
+
+What: /sys/class/mei/meiN/
+Date: May 2014
+KernelVersion: 3.17
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description:
+ The /sys/class/mei/meiN directory is created for
+ each probed mei device
+
+What: /sys/class/mei/meiN/fw_status
+Date: Nov 2014
+KernelVersion: 3.19
+Contact: Tomas Winkler <tomas.winkler@intel.com>
+Description: Display fw status registers content
+
+ The ME FW writes its status information into fw status
+ registers for BIOS and OS to monitor fw health.
+
+ The register contains running state, power management
+ state, error codes, and others. The way the registers
+ are decoded depends on PCH or SoC generation.
+ Also number of registers varies between 1 and 6
+ depending on generation.
+
diff --git a/Documentation/ABI/testing/sysfs-class-mic.txt b/Documentation/ABI/testing/sysfs-class-mic.txt
new file mode 100644
index 000000000..13f48afc5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mic.txt
@@ -0,0 +1,157 @@
+What: /sys/class/mic/
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ The mic class directory belongs to Intel MIC devices and
+ provides information per MIC device. An Intel MIC device is a
+ PCIe form factor add-in Coprocessor card based on the Intel Many
+ Integrated Core (MIC) architecture that runs a Linux OS.
+
+What: /sys/class/mic/mic(x)
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ The directories /sys/class/mic/mic0, /sys/class/mic/mic1 etc.,
+ represent MIC devices (0,1,..etc). Each directory has
+ information specific to that MIC device.
+
+What: /sys/class/mic/mic(x)/family
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ Provides information about the Coprocessor family for an Intel
+ MIC device. For example - "x100"
+
+What: /sys/class/mic/mic(x)/stepping
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ Provides information about the silicon stepping for an Intel
+ MIC device. For example - "A0" or "B0"
+
+What: /sys/class/mic/mic(x)/state
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this entry provides the current state of an Intel
+ MIC device in the context of the card OS. Possible values that
+ will be read are:
+ "offline" - The MIC device is ready to boot the card OS. On
+ reading this entry after an OSPM resume, a "boot" has to be
+ written to this entry if the card was previously shutdown
+ during OSPM suspend.
+ "online" - The MIC device has initiated booting a card OS.
+ "shutting_down" - The card OS is shutting down.
+ "reset_failed" - The MIC device has failed to reset.
+ "suspending" - The MIC device is currently being prepared for
+ suspend. On reading this entry, a "suspend" has to be written
+ to the state sysfs entry to ensure the card is shutdown during
+ OSPM suspend.
+ "suspended" - The MIC device has been suspended.
+
+ When written, this sysfs entry triggers different state change
+ operations depending upon the current state of the card OS.
+ Acceptable values are:
+ "boot" - Boot the card OS image specified by the combination
+ of firmware, ramdisk, cmdline and bootmode
+ sysfs entries.
+ "reset" - Initiates device reset.
+ "shutdown" - Initiates card OS shutdown.
+ "suspend" - Initiates card OS shutdown and also marks the card
+ as suspended.
+
+What: /sys/class/mic/mic(x)/shutdown_status
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. This
+ OS can shutdown because of various reasons. When read, this
+ entry provides the status on why the card OS was shutdown.
+ Possible values are:
+ "nop" - shutdown status is not applicable, when the card OS is
+ "online"
+ "crashed" - Shutdown because of a HW or SW crash.
+ "halted" - Shutdown because of a halt command.
+ "poweroff" - Shutdown because of a poweroff command.
+ "restart" - Shutdown because of a restart command.
+
+What: /sys/class/mic/mic(x)/cmdline
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. Before
+ booting this card OS, it is possible to pass kernel command line
+ options to configure various features in it, similar to
+ self-bootable machines. When read, this entry provides
+ information about the current kernel command line options set to
+ boot the card OS. This entry can be written to change the
+ existing kernel command line options. Typically, the user would
+ want to read the current command line options, append new ones
+ or modify existing ones and then write the whole kernel command
+ line back to this entry.
+
+What: /sys/class/mic/mic(x)/firmware
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this sysfs entry provides the path name under
+ /lib/firmware/ where the firmware image to be booted on the
+ card can be found. The entry can be written to change the
+ firmware image location under /lib/firmware/.
+
+What: /sys/class/mic/mic(x)/ramdisk
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this sysfs entry provides the path name under
+ /lib/firmware/ where the ramdisk image to be used during card
+ OS boot can be found. The entry can be written to change
+ the ramdisk image location under /lib/firmware/.
+
+What: /sys/class/mic/mic(x)/bootmode
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ When read, this sysfs entry provides the current bootmode for
+ the card. This sysfs entry can be written with the following
+ valid strings:
+ a) linux - Boot a Linux image.
+ b) elf - Boot an elf image for flash updates.
+
+What: /sys/class/mic/mic(x)/log_buf_addr
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. For
+ debugging purpose and early kernel boot messages, the user can
+ access the card OS log buffer via debugfs. When read, this entry
+ provides the kernel virtual address of the buffer where the card
+ OS log buffer can be read. This entry is written by the host
+ configuration daemon to set the log buffer address. The correct
+ log buffer address to be written can be found in the System.map
+ file of the card OS.
+
+What: /sys/class/mic/mic(x)/log_buf_len
+Date: October 2013
+KernelVersion: 3.13
+Contact: Sudeep Dutt <sudeep.dutt@intel.com>
+Description:
+ An Intel MIC device runs a Linux OS during its operation. For
+ debugging purpose and early kernel boot messages, the user can
+ access the card OS log buffer via debugfs. When read, this entry
+ provides the kernel virtual address where the card OS log buffer
+ length can be read. This entry is written by host configuration
+ daemon to set the log buffer length address. The correct log
+ buffer length address to be written can be found in the
+ System.map file of the card OS.
diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
new file mode 100644
index 000000000..3b5c3bca9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -0,0 +1,234 @@
+What: /sys/class/mtd/
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The mtd/ class subdirectory belongs to the MTD subsystem
+ (MTD core).
+
+What: /sys/class/mtd/mtdX/
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The /sys/class/mtd/mtd{0,1,2,3,...} directories correspond
+ to each /dev/mtdX character device. These may represent
+ physical/simulated flash devices, partitions on a flash
+ device, or concatenated flash devices.
+
+What: /sys/class/mtd/mtdXro/
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ These directories provide the corresponding read-only device
+ nodes for /sys/class/mtd/mtdX/ .
+
+What: /sys/class/mtd/mtdX/dev
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Major and minor numbers of the character device corresponding
+ to this MTD device (in <major>:<minor> format). This is the
+ read-write device so <minor> will be even.
+
+What: /sys/class/mtd/mtdXro/dev
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Major and minor numbers of the character device corresponding
+ to the read-only variant of thie MTD device (in
+ <major>:<minor> format). In this case <minor> will be odd.
+
+What: /sys/class/mtd/mtdX/erasesize
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ "Major" erase size for the device. If numeraseregions is
+ zero, this is the eraseblock size for the entire device.
+ Otherwise, the MEMGETREGIONCOUNT/MEMGETREGIONINFO ioctls
+ can be used to determine the actual eraseblock layout.
+
+What: /sys/class/mtd/mtdX/flags
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ A hexadecimal value representing the device flags, ORed
+ together:
+
+ 0x0400: MTD_WRITEABLE - device is writable
+ 0x0800: MTD_BIT_WRITEABLE - single bits can be flipped
+ 0x1000: MTD_NO_ERASE - no erase necessary
+ 0x2000: MTD_POWERUP_LOCK - always locked after reset
+
+What: /sys/class/mtd/mtdX/name
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ A human-readable ASCII name for the device or partition.
+ This will match the name in /proc/mtd .
+
+What: /sys/class/mtd/mtdX/numeraseregions
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ For devices that have variable eraseblock sizes, this
+ provides the total number of erase regions. Otherwise,
+ it will read back as zero.
+
+What: /sys/class/mtd/mtdX/oobsize
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Number of OOB bytes per page.
+
+What: /sys/class/mtd/mtdX/size
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Total size of the device/partition, in bytes.
+
+What: /sys/class/mtd/mtdX/type
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ One of the following ASCII strings, representing the device
+ type:
+
+ absent, ram, rom, nor, nand, mlc-nand, dataflash, ubi, unknown
+
+What: /sys/class/mtd/mtdX/writesize
+Date: April 2009
+KernelVersion: 2.6.29
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Minimal writable flash unit size. This will always be
+ a positive integer.
+
+ In the case of NOR flash it is 1 (even though individual
+ bits can be cleared).
+
+ In the case of NAND flash it is one NAND page (or a
+ half page, or a quarter page).
+
+ In the case of ECC NOR, it is the ECC block size.
+
+What: /sys/class/mtd/mtdX/ecc_strength
+Date: April 2012
+KernelVersion: 3.4
+Contact: linux-mtd@lists.infradead.org
+Description:
+ Maximum number of bit errors that the device is capable of
+ correcting within each region covering an ECC step (see
+ ecc_step_size). This will always be a non-negative integer.
+
+ In the case of devices lacking any ECC capability, it is 0.
+
+What: /sys/class/mtd/mtdX/bitflip_threshold
+Date: April 2012
+KernelVersion: 3.4
+Contact: linux-mtd@lists.infradead.org
+Description:
+ This allows the user to examine and adjust the criteria by which
+ mtd returns -EUCLEAN from mtd_read() and mtd_read_oob(). If the
+ maximum number of bit errors that were corrected on any single
+ region comprising an ecc step (as reported by the driver) equals
+ or exceeds this value, -EUCLEAN is returned. Otherwise, absent
+ an error, 0 is returned. Higher layers (e.g., UBI) use this
+ return code as an indication that an erase block may be
+ degrading and should be scrutinized as a candidate for being
+ marked as bad.
+
+ The initial value may be specified by the flash device driver.
+ If not, then the default value is ecc_strength.
+
+ The introduction of this feature brings a subtle change to the
+ meaning of the -EUCLEAN return code. Previously, it was
+ interpreted to mean simply "one or more bit errors were
+ corrected". Its new interpretation can be phrased as "a
+ dangerously high number of bit errors were corrected on one or
+ more regions comprising an ecc step". The precise definition of
+ "dangerously high" can be adjusted by the user with
+ bitflip_threshold. Users are discouraged from doing this,
+ however, unless they know what they are doing and have intimate
+ knowledge of the properties of their device. Broadly speaking,
+ bitflip_threshold should be low enough to detect genuine erase
+ block degradation, but high enough to avoid the consequences of
+ a persistent return value of -EUCLEAN on devices where sticky
+ bitflips occur. Note that if bitflip_threshold exceeds
+ ecc_strength, -EUCLEAN is never returned by the read operations.
+ Conversely, if bitflip_threshold is zero, -EUCLEAN is always
+ returned, absent a hard error.
+
+ This is generally applicable only to NAND flash devices with ECC
+ capability. It is ignored on devices lacking ECC capability;
+ i.e., devices for which ecc_strength is zero.
+
+What: /sys/class/mtd/mtdX/ecc_step_size
+Date: May 2013
+KernelVersion: 3.10
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The size of a single region covered by ECC, known as the ECC
+ step. Devices may have several equally sized ECC steps within
+ each writesize region.
+
+ It will always be a non-negative integer. In the case of
+ devices lacking any ECC capability, it is 0.
+
+What: /sys/class/mtd/mtdX/ecc_failures
+Date: June 2014
+KernelVersion: 3.17
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The number of failures reported by this device's ECC. Typically,
+ these failures are associated with failed read operations.
+
+ It will always be a non-negative integer. In the case of
+ devices lacking any ECC capability, it is 0.
+
+What: /sys/class/mtd/mtdX/corrected_bits
+Date: June 2014
+KernelVersion: 3.17
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The number of bits that have been corrected by means of the
+ device's ECC.
+
+ It will always be a non-negative integer. In the case of
+ devices lacking any ECC capability, it is 0.
+
+What: /sys/class/mtd/mtdX/bad_blocks
+Date: June 2014
+KernelVersion: 3.17
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The number of blocks marked as bad, if any, in this partition.
+
+What: /sys/class/mtd/mtdX/bbt_blocks
+Date: June 2014
+KernelVersion: 3.17
+Contact: linux-mtd@lists.infradead.org
+Description:
+ The number of blocks that are marked as reserved, if any, in
+ this partition. These are typically used to store the in-flash
+ bad block table (BBT).
+
+What: /sys/class/mtd/mtdX/offset
+Date: March 2015
+KernelVersion: 4.1
+Contact: linux-mtd@lists.infradead.org
+Description:
+ For a partition, the offset of that partition from the start
+ of the master device in bytes. This attribute is absent on
+ main devices, so it can be used to distinguish between
+ partitions and devices that aren't partitions.
diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
new file mode 100644
index 000000000..5ecfd72ba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -0,0 +1,234 @@
+What: /sys/class/net/<iface>/name_assign_type
+Date: July 2014
+KernelVersion: 3.17
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the name assignment type. Possible values are:
+ 1: enumerated by the kernel, possibly in an unpredictable way
+ 2: predictably named by the kernel
+ 3: named by userspace
+ 4: renamed
+
+What: /sys/class/net/<iface>/addr_assign_type
+Date: July 2010
+KernelVersion: 3.2
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the address assignment type. Possible values are:
+ 0: permanent address
+ 1: randomly generated
+ 2: stolen from another device
+ 3: set using dev_set_mac_address
+
+What: /sys/class/net/<iface>/addr_len
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the hardware address size in bytes.
+ Values vary based on the lower-level protocol used by the
+ interface (Ethernet, FDDI, ATM, IEEE 802.15.4...). See
+ include/uapi/linux/if_*.h for actual values.
+
+What: /sys/class/net/<iface>/address
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Hardware address currently assigned to this interface.
+ Format is a string, e.g: 00:11:22:33:44:55 for an Ethernet MAC
+ address.
+
+What: /sys/class/net/<iface>/broadcast
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Hardware broadcast address for this interface. Format is a
+ string, e.g: ff:ff:ff:ff:ff:ff for an Ethernet broadcast MAC
+ address.
+
+What: /sys/class/net/<iface>/carrier
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the current physical link state of the interface.
+ Posssible values are:
+ 0: physical link is down
+ 1: physical link is up
+
+ Note: some special devices, e.g: bonding and team drivers will
+ allow this attribute to be written to force a link state for
+ operating correctly and designating another fallback interface.
+
+What: /sys/class/net/<iface>/dev_id
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the device unique identifier. Format is an hexadecimal
+ value. This is used to disambiguate interfaces which might be
+ stacked (e.g: VLAN interfaces) but still have the same MAC
+ address as their parent device.
+
+What: /sys/class/net/<iface>/dormant
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates whether the interface is in dormant state. Possible
+ values are:
+ 0: interface is not dormant
+ 1: interface is dormant
+
+ This attribute can be used by supplicant software to signal that
+ the device is not usable unless some supplicant-based
+ authentication is performed (e.g: 802.1x). 'link_mode' attribute
+ will also reflect the dormant state.
+
+What: /sys/clas/net/<iface>/duplex
+Date: October 2009
+KernelVersion: 2.6.33
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface latest or current duplex value. Possible
+ values are:
+ half: half duplex
+ full: full duplex
+
+ Note: This attribute is only valid for interfaces that implement
+ the ethtool get_settings method (mostly Ethernet).
+
+What: /sys/class/net/<iface>/flags
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface flags as a bitmask in hexadecimal. See
+ include/uapi/linux/if.h for a list of all possible values and
+ the flags semantics.
+
+What: /sys/class/net/<iface>/ifalias
+Date: September 2008
+KernelVersion: 2.6.28
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates/stores an interface alias name as a string. This can
+ be used for system management purposes.
+
+What: /sys/class/net/<iface>/ifindex
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the system-wide interface unique index identifier as a
+ decimal number. This attribute is used for mapping an interface
+ identifier to an interface name. It is used throughout the
+ networking stack for specifying the interface specific
+ requests/events.
+
+What: /sys/class/net/<iface>/iflink
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the system-wide interface unique index identifier a
+ the interface is linked to. Format is decimal. This attribute is
+ used to resolve interfaces chaining, linking and stacking.
+ Physical interfaces have the same 'ifindex' and 'iflink' values.
+
+What: /sys/class/net/<iface>/link_mode
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface link mode, as a decimal number. This
+ attribute should be used in conjunction with 'dormant' attribute
+ to determine the interface usability. Possible values:
+ 0: default link mode
+ 1: dormant link mode
+
+What: /sys/class/net/<iface>/mtu
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface currently configured MTU value, in
+ bytes, and in decimal format. Specific values depends on the
+ lower-level interface protocol used. Ethernet devices will show
+ a 'mtu' attribute value of 1500 unless changed.
+
+What: /sys/class/net/<iface>/netdev_group
+Date: January 2011
+KernelVersion: 2.6.39
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface network device group, as a decimal
+ integer. Default value is 0 which corresponds to the initial
+ network devices group. The group can be changed to affect
+ routing decisions (see: net/ipv4/fib_rules and
+ net/ipv6/fib6_rules.c).
+
+What: /sys/class/net/<iface>/operstate
+Date: March 2006
+KernelVersion: 2.6.17
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface RFC2863 operational state as a string.
+ Possible values are:
+ "unknown", "notpresent", "down", "lowerlayerdown", "testing",
+ "dormant", "up".
+
+What: /sys/class/net/<iface>/phys_port_id
+Date: July 2013
+KernelVersion: 3.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface unique physical port identifier within
+ the NIC, as a string.
+
+What: /sys/class/net/<iface>/phys_port_name
+Date: March 2015
+KernelVersion: 4.0
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface physical port name within the NIC,
+ as a string.
+
+What: /sys/class/net/<iface>/speed
+Date: October 2009
+KernelVersion: 2.6.33
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface latest or current speed value. Value is
+ an integer representing the link speed in Mbits/sec.
+
+ Note: this attribute is only valid for interfaces that implement
+ the ethtool get_settings method (mostly Ethernet ).
+
+What: /sys/class/net/<iface>/tx_queue_len
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface transmit queue len in number of packets,
+ as an integer value. Value depend on the type of interface,
+ Ethernet network adapters have a default value of 1000 unless
+ configured otherwise
+
+What: /sys/class/net/<iface>/type
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the interface protocol type as a decimal value. See
+ include/uapi/linux/if_arp.h for all possible values.
+
+What: /sys/class/net/<iface>/phys_switch_id
+Date: November 2014
+KernelVersion: 3.19
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the unique physical switch identifier of a switch this
+ port belongs to, as a string.
diff --git a/Documentation/ABI/testing/sysfs-class-net-batman-adv b/Documentation/ABI/testing/sysfs-class-net-batman-adv
new file mode 100644
index 000000000..7f34a95bb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-batman-adv
@@ -0,0 +1,15 @@
+
+What: /sys/class/net/<iface>/batman-adv/iface_status
+Date: May 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Indicates the status of <iface> as it is seen by batman.
+
+What: /sys/class/net/<iface>/batman-adv/mesh_iface
+Date: May 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ The /sys/class/net/<iface>/batman-adv/mesh_iface file
+ displays the batman mesh interface this <iface>
+ currently is associated with.
+
diff --git a/Documentation/ABI/testing/sysfs-class-net-cdc_ncm b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm
new file mode 100644
index 000000000..5cedf72df
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-cdc_ncm
@@ -0,0 +1,149 @@
+What: /sys/class/net/<iface>/cdc_ncm/min_tx_pkt
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ The driver will pad NCM Transfer Blocks (NTBs) longer
+ than this to tx_max, allowing the device to receive
+ tx_max sized frames with no terminating short
+ packet. NTBs shorter than this limit are transmitted
+ as-is, without any padding, and are terminated with a
+ short USB packet.
+
+ Padding to tx_max allows the driver to transmit NTBs
+ back-to-back without any interleaving short USB
+ packets. This reduces the number of short packet
+ interrupts in the device, and represents a tradeoff
+ between USB bus bandwidth and device DMA optimization.
+
+ Set to 0 to pad all frames. Set greater than tx_max to
+ disable all padding.
+
+What: /sys/class/net/<iface>/cdc_ncm/rx_max
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ The maximum NTB size for RX. Cannot exceed the
+ maximum value supported by the device. Must allow at
+ least one max sized datagram plus headers.
+
+ The actual limits are device dependent. See
+ dwNtbInMaxSize.
+
+ Note: Some devices will silently ignore changes to
+ this value, resulting in oversized NTBs and
+ corresponding framing errors.
+
+What: /sys/class/net/<iface>/cdc_ncm/tx_max
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ The maximum NTB size for TX. Cannot exceed the
+ maximum value supported by the device. Must allow at
+ least one max sized datagram plus headers.
+
+ The actual limits are device dependent. See
+ dwNtbOutMaxSize.
+
+What: /sys/class/net/<iface>/cdc_ncm/tx_timer_usecs
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ Datagram aggregation timeout in µs. The driver will
+ wait up to 3 times this timeout for more datagrams to
+ aggregate before transmitting an NTB frame.
+
+ Valid range: 5 to 4000000
+
+ Set to 0 to disable aggregation.
+
+The following read-only attributes all represent fields of the
+structure defined in section 6.2.1 "GetNtbParameters" of "Universal
+Serial Bus Communications Class Subclass Specifications for Network
+Control Model Devices" (CDC NCM), Revision 1.0 (Errata 1), November
+24, 2010 from USB Implementers Forum, Inc. The descriptions are
+quoted from table 6-3 of CDC NCM: "NTB Parameter Structure".
+
+What: /sys/class/net/<iface>/cdc_ncm/bmNtbFormatsSupported
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ Bit 0: 16-bit NTB supported (set to 1)
+ Bit 1: 32-bit NTB supported
+ Bits 2 – 15: reserved (reset to zero; must be ignored by host)
+
+What: /sys/class/net/<iface>/cdc_ncm/dwNtbInMaxSize
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ IN NTB Maximum Size in bytes
+
+What: /sys/class/net/<iface>/cdc_ncm/wNdpInDivisor
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ Divisor used for IN NTB Datagram payload alignment
+
+What: /sys/class/net/<iface>/cdc_ncm/wNdpInPayloadRemainder
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ Remainder used to align input datagram payload within
+ the NTB: (Payload Offset) mod (wNdpInDivisor) =
+ wNdpInPayloadRemainder
+
+What: /sys/class/net/<iface>/cdc_ncm/wNdpInAlignment
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ NDP alignment modulus for NTBs on the IN pipe. Shall
+ be a power of 2, and shall be at least 4.
+
+What: /sys/class/net/<iface>/cdc_ncm/dwNtbOutMaxSize
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ OUT NTB Maximum Size
+
+What: /sys/class/net/<iface>/cdc_ncm/wNdpOutDivisor
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ OUT NTB Datagram alignment modulus
+
+What: /sys/class/net/<iface>/cdc_ncm/wNdpOutPayloadRemainder
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ Remainder used to align output datagram payload
+ offsets within the NTB: Padding, shall be transmitted
+ as zero by function, and ignored by host. (Payload
+ Offset) mod (wNdpOutDivisor) = wNdpOutPayloadRemainder
+
+What: /sys/class/net/<iface>/cdc_ncm/wNdpOutAlignment
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ NDP alignment modulus for use in NTBs on the OUT
+ pipe. Shall be a power of 2, and shall be at least 4.
+
+What: /sys/class/net/<iface>/cdc_ncm/wNtbOutMaxDatagrams
+Date: May 2014
+KernelVersion: 3.16
+Contact: Bjørn Mork <bjorn@mork.no>
+Description:
+ Maximum number of datagrams that the host may pack
+ into a single OUT NTB. Zero means that the device
+ imposes no limit.
diff --git a/Documentation/ABI/testing/sysfs-class-net-grcan b/Documentation/ABI/testing/sysfs-class-net-grcan
new file mode 100644
index 000000000..f418c92ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-grcan
@@ -0,0 +1,35 @@
+
+What: /sys/class/net/<iface>/grcan/enable0
+Date: October 2012
+KernelVersion: 3.8
+Contact: Andreas Larsson <andreas@gaisler.com>
+Description:
+ Hardware configuration of physical interface 0. This file reads
+ and writes the "Enable 0" bit of the configuration register.
+ Possible values: 0 or 1. See the GRCAN chapter of the GRLIB IP
+ core library documentation for details. The default value is 0
+ or set by the module parameter grcan.enable0 and can be read at
+ /sys/module/grcan/parameters/enable0.
+
+What: /sys/class/net/<iface>/grcan/enable1
+Date: October 2012
+KernelVersion: 3.8
+Contact: Andreas Larsson <andreas@gaisler.com>
+Description:
+ Hardware configuration of physical interface 1. This file reads
+ and writes the "Enable 1" bit of the configuration register.
+ Possible values: 0 or 1. See the GRCAN chapter of the GRLIB IP
+ core library documentation for details. The default value is 0
+ or set by the module parameter grcan.enable1 and can be read at
+ /sys/module/grcan/parameters/enable1.
+
+What: /sys/class/net/<iface>/grcan/select
+Date: October 2012
+KernelVersion: 3.8
+Contact: Andreas Larsson <andreas@gaisler.com>
+Description:
+ Configuration of which physical interface to be used. Possible
+ values: 0 or 1. See the GRCAN chapter of the GRLIB IP core
+ library documentation for details. The default value is 0 or is
+ set by the module parameter grcan.select and can be read at
+ /sys/module/grcan/parameters/select.
diff --git a/Documentation/ABI/testing/sysfs-class-net-mesh b/Documentation/ABI/testing/sysfs-class-net-mesh
new file mode 100644
index 000000000..c46406296
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-mesh
@@ -0,0 +1,108 @@
+
+What: /sys/class/net/<mesh_iface>/mesh/aggregated_ogms
+Date: May 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Indicates whether the batman protocol messages of the
+ mesh <mesh_iface> shall be aggregated or not.
+
+What: /sys/class/net/<mesh_iface>/mesh/<vlan_subdir>/ap_isolation
+Date: May 2011
+Contact: Antonio Quartulli <antonio@meshcoding.com>
+Description:
+ Indicates whether the data traffic going from a
+ wireless client to another wireless client will be
+ silently dropped. <vlan_subdir> is empty when referring
+ to the untagged lan.
+
+What: /sys/class/net/<mesh_iface>/mesh/bonding
+Date: June 2010
+Contact: Simon Wunderlich <sw@simonwunderlich.de>
+Description:
+ Indicates whether the data traffic going through the
+ mesh will be sent using multiple interfaces at the
+ same time (if available).
+
+What: /sys/class/net/<mesh_iface>/mesh/bridge_loop_avoidance
+Date: November 2011
+Contact: Simon Wunderlich <sw@simonwunderlich.de>
+Description:
+ Indicates whether the bridge loop avoidance feature
+ is enabled. This feature detects and avoids loops
+ between the mesh and devices bridged with the soft
+ interface <mesh_iface>.
+
+What: /sys/class/net/<mesh_iface>/mesh/fragmentation
+Date: October 2010
+Contact: Andreas Langer <an.langer@gmx.de>
+Description:
+ Indicates whether the data traffic going through the
+ mesh will be fragmented or silently discarded if the
+ packet size exceeds the outgoing interface MTU.
+
+What: /sys/class/net/<mesh_iface>/mesh/gw_bandwidth
+Date: October 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Defines the bandwidth which is propagated by this
+ node if gw_mode was set to 'server'.
+
+What: /sys/class/net/<mesh_iface>/mesh/gw_mode
+Date: October 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Defines the state of the gateway features. Can be
+ either 'off', 'client' or 'server'.
+
+What: /sys/class/net/<mesh_iface>/mesh/gw_sel_class
+Date: October 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Defines the selection criteria this node will use
+ to choose a gateway if gw_mode was set to 'client'.
+
+What: /sys/class/net/<mesh_iface>/mesh/hop_penalty
+Date: Oct 2010
+Contact: Linus Lüssing <linus.luessing@web.de>
+Description:
+ Defines the penalty which will be applied to an
+ originator message's tq-field on every hop.
+
+What: /sys/class/net/<mesh_iface>/mesh/isolation_mark
+Date: Nov 2013
+Contact: Antonio Quartulli <antonio@meshcoding.com>
+Description:
+ Defines the isolation mark (and its bitmask) which
+ is used to classify clients as "isolated" by the
+ Extended Isolation feature.
+
+What: /sys/class/net/<mesh_iface>/mesh/multicast_mode
+Date: Feb 2014
+Contact: Linus Lüssing <linus.luessing@web.de>
+Description:
+ Indicates whether multicast optimizations are enabled
+ or disabled. If set to zero then all nodes in the
+ mesh are going to use classic flooding for any
+ multicast packet with no optimizations.
+
+What: /sys/class/net/<mesh_iface>/mesh/network_coding
+Date: Nov 2012
+Contact: Martin Hundeboll <martin@hundeboll.net>
+Description:
+ Controls whether Network Coding (using some magic
+ to send fewer wifi packets but still the same
+ content) is enabled or not.
+
+What: /sys/class/net/<mesh_iface>/mesh/orig_interval
+Date: May 2010
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Defines the interval in milliseconds in which batman
+ sends its protocol messages.
+
+What: /sys/class/net/<mesh_iface>/mesh/routing_algo
+Date: Dec 2011
+Contact: Marek Lindner <mareklindner@neomailbox.ch>
+Description:
+ Defines the routing procotol this mesh instance
+ uses to find the optimal paths through the mesh.
diff --git a/Documentation/ABI/testing/sysfs-class-net-queues b/Documentation/ABI/testing/sysfs-class-net-queues
new file mode 100644
index 000000000..0c0df91b1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-queues
@@ -0,0 +1,87 @@
+What: /sys/class/<iface>/queues/rx-<queue>/rps_cpus
+Date: March 2010
+KernelVersion: 2.6.35
+Contact: netdev@vger.kernel.org
+Description:
+ Mask of the CPU(s) currently enabled to participate into the
+ Receive Packet Steering packet processing flow for this
+ network device queue. Possible values depend on the number
+ of available CPU(s) in the system.
+
+What: /sys/class/<iface>/queues/rx-<queue>/rps_flow_cnt
+Date: April 2010
+KernelVersion: 2.6.35
+Contact: netdev@vger.kernel.org
+Description:
+ Number of Receive Packet Steering flows being currently
+ processed by this particular network device receive queue.
+
+What: /sys/class/<iface>/queues/tx-<queue>/tx_timeout
+Date: November 2011
+KernelVersion: 3.3
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of transmit timeout events seen by this
+ network interface transmit queue.
+
+What: /sys/class/<iface>/queues/tx-<queue>/tx_maxrate
+Date: March 2015
+KernelVersion: 4.1
+Contact: netdev@vger.kernel.org
+Description:
+ A Mbps max-rate set for the queue, a value of zero means disabled,
+ default is disabled.
+
+What: /sys/class/<iface>/queues/tx-<queue>/xps_cpus
+Date: November 2010
+KernelVersion: 2.6.38
+Contact: netdev@vger.kernel.org
+Description:
+ Mask of the CPU(s) currently enabled to participate into the
+ Transmit Packet Steering packet processing flow for this
+ network device transmit queue. Possible vaules depend on the
+ number of available CPU(s) in the system.
+
+What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/hold_time
+Date: November 2011
+KernelVersion: 3.3
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the hold time in milliseconds to measure the slack
+ of this particular network device transmit queue.
+ Default value is 1000.
+
+What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/inflight
+Date: November 2011
+KernelVersion: 3.3
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of bytes (objects) in flight on this
+ network device transmit queue.
+
+What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit
+Date: November 2011
+KernelVersion: 3.3
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the current limit of bytes allowed to be queued
+ on this network device transmit queue. This value is clamped
+ to be within the bounds defined by limit_max and limit_min.
+
+What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_max
+Date: November 2011
+KernelVersion: 3.3
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the absolute maximum limit of bytes allowed to be
+ queued on this network device transmit queue. See
+ include/linux/dynamic_queue_limits.h for the default value.
+
+What: /sys/class/<iface>/queues/tx-<queue>/byte_queue_limits/limit_min
+Date: November 2011
+KernelVersion: 3.3
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the absolute minimum limit of bytes allowed to be
+ queued on this network device transmit queue. Default value is
+ 0.
diff --git a/Documentation/ABI/testing/sysfs-class-net-statistics b/Documentation/ABI/testing/sysfs-class-net-statistics
new file mode 100644
index 000000000..397118de7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-net-statistics
@@ -0,0 +1,201 @@
+What: /sys/class/<iface>/statistics/collisions
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of collisions seen by this network device.
+ This value might not be relevant with all MAC layers.
+
+What: /sys/class/<iface>/statistics/multicast
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of multicast packets received by this
+ network device.
+
+What: /sys/class/<iface>/statistics/rx_bytes
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of bytes received by this network device.
+ See the network driver for the exact meaning of when this
+ value is incremented.
+
+What: /sys/class/<iface>/statistics/rx_compressed
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of compressed packets received by this
+ network device. This value might only be relevant for interfaces
+ that support packet compression (e.g: PPP).
+
+What: /sys/class/<iface>/statistics/rx_crc_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets received with a CRC (FCS) error
+ by this network device. Note that the specific meaning might
+ depend on the MAC layer used by the interface.
+
+What: /sys/class/<iface>/statistics/rx_dropped
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets received by the network device
+ but dropped, that are not forwarded to the upper layers for
+ packet processing. See the network driver for the exact
+ meaning of this value.
+
+What: /sys/class/<iface>/statistics/rx_fifo_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of receive FIFO errors seen by this
+ network device. See the network driver for the exact
+ meaning of this value.
+
+What: /sys/class/<iface>/statistics/rx_frame_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of received frames with error, such as
+ alignment errors. Note that the specific meaning depends on
+ on the MAC layer protocol used. See the network driver for
+ the exact meaning of this value.
+
+What: /sys/class/<iface>/statistics/rx_length_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of received error packet with a length
+ error, oversized or undersized. See the network driver for the
+ exact meaning of this value.
+
+What: /sys/class/<iface>/statistics/rx_missed_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of received packets that have been missed
+ due to lack of capacity in the receive side. See the network
+ driver for the exact meaning of this value.
+
+What: /sys/class/<iface>/statistics/rx_over_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of received packets that are oversized
+ compared to what the network device is configured to accept
+ (e.g: larger than MTU). See the network driver for the exact
+ meaning of this value.
+
+What: /sys/class/<iface>/statistics/rx_packets
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the total number of good packets received by this
+ network device.
+
+What: /sys/class/<iface>/statistics/tx_aborted_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets that have been aborted
+ during transmission by a network device (e.g: because of
+ a medium collision). See the network driver for the exact
+ meaning of this value.
+
+What: /sys/class/<iface>/statistics/tx_bytes
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of bytes transmitted by a network
+ device. See the network driver for the exact meaning of this
+ value, in particular whether this accounts for all successfully
+ transmitted packets or all packets that have been queued for
+ transmission.
+
+What: /sys/class/<iface>/statistics/tx_carrier_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets that could not be transmitted
+ because of carrier errors (e.g: physical link down). See the
+ network driver for the exact meaning of this value.
+
+What: /sys/class/<iface>/statistics/tx_compressed
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of transmitted compressed packets. Note
+ this might only be relevant for devices that support
+ compression (e.g: PPP).
+
+What: /sys/class/<iface>/statistics/tx_dropped
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets dropped during transmission.
+ See the driver for the exact reasons as to why the packets were
+ dropped.
+
+What: /sys/class/<iface>/statistics/tx_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets in error during transmission by
+ a network device. See the driver for the exact reasons as to
+ why the packets were dropped.
+
+What: /sys/class/<iface>/statistics/tx_fifo_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets having caused a transmit
+ FIFO error. See the driver for the exact reasons as to why the
+ packets were dropped.
+
+What: /sys/class/<iface>/statistics/tx_heartbeat_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets transmitted that have been
+ reported as heartbeat errors. See the driver for the exact
+ reasons as to why the packets were dropped.
+
+What: /sys/class/<iface>/statistics/tx_packets
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets transmitted by a network
+ device. See the driver for whether this reports the number of all
+ attempted or successful transmissions.
+
+What: /sys/class/<iface>/statistics/tx_window_errors
+Date: April 2005
+KernelVersion: 2.6.12
+Contact: netdev@vger.kernel.org
+Description:
+ Indicates the number of packets not successfully transmitted
+ due to a window collision. The specific meaning depends on the
+ MAC layer used. On Ethernet this is usually used to report
+ late collisions errors.
diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd
new file mode 100644
index 000000000..b1c3f0263
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-pktcdvd
@@ -0,0 +1,72 @@
+What: /sys/class/pktcdvd/
+Date: Oct. 2006
+KernelVersion: 2.6.20
+Contact: Thomas Maier <balagi@justmail.de>
+Description:
+
+sysfs interface
+---------------
+
+The pktcdvd module (packet writing driver) creates
+these files in the sysfs:
+(<devid> is in format major:minor )
+
+/sys/class/pktcdvd/
+ add (0200) Write a block device id (major:minor)
+ to create a new pktcdvd device and map
+ it to the block device.
+
+ remove (0200) Write the pktcdvd device id (major:minor)
+ to it to remove the pktcdvd device.
+
+ device_map (0444) Shows the device mapping in format:
+ pktcdvd[0-7] <pktdevid> <blkdevid>
+
+/sys/class/pktcdvd/pktcdvd[0-7]/
+ dev (0444) Device id
+ uevent (0200) To send an uevent.
+
+/sys/class/pktcdvd/pktcdvd[0-7]/stat/
+ packets_started (0444) Number of started packets.
+ packets_finished (0444) Number of finished packets.
+
+ kb_written (0444) kBytes written.
+ kb_read (0444) kBytes read.
+ kb_read_gather (0444) kBytes read to fill write packets.
+
+ reset (0200) Write any value to it to reset
+ pktcdvd device statistic values, like
+ bytes read/written.
+
+/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/
+ size (0444) Contains the size of the bio write
+ queue.
+
+ congestion_off (0644) If bio write queue size is below
+ this mark, accept new bio requests
+ from the block layer.
+
+ congestion_on (0644) If bio write queue size is higher
+ as this mark, do no longer accept
+ bio write requests from the block
+ layer and wait till the pktcdvd
+ device has processed enough bio's
+ so that bio write queue size is
+ below congestion off mark.
+ A value of <= 0 disables congestion
+ control.
+
+
+Example:
+--------
+To use the pktcdvd sysfs interface directly, you can do:
+
+# create a new pktcdvd device mapped to /dev/hdc
+echo "22:0" >/sys/class/pktcdvd/add
+cat /sys/class/pktcdvd/device_map
+# assuming device pktcdvd0 was created, look at stat's
+cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
+# print the device id of the mapped block device
+fgrep pktcdvd0 /sys/class/pktcdvd/device_map
+# remove device, using pktcdvd0 device id 253:0
+echo "253:0" >/sys/class/pktcdvd/remove
diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power
new file mode 100644
index 000000000..369d2a2d7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-power
@@ -0,0 +1,76 @@
+What: /sys/class/power/ds2760-battery.*/charge_now
+Date: May 2010
+KernelVersion: 2.6.35
+Contact: Daniel Mack <daniel@caiaq.de>
+Description:
+ This file is writeable and can be used to set the current
+ coloumb counter value inside the battery monitor chip. This
+ is needed for unavoidable corrections of aging batteries.
+ A userspace daemon can monitor the battery charging logic
+ and once the counter drops out of considerable bounds, take
+ appropriate action.
+
+What: /sys/class/power/ds2760-battery.*/charge_full
+Date: May 2010
+KernelVersion: 2.6.35
+Contact: Daniel Mack <daniel@caiaq.de>
+Description:
+ This file is writeable and can be used to set the assumed
+ battery 'full level'. As batteries age, this value has to be
+ amended over time.
+
+What: /sys/class/power_supply/max14577-charger/device/fast_charge_timer
+Date: October 2014
+KernelVersion: 3.18.0
+Contact: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Description:
+ This entry shows and sets the maximum time the max14577
+ charger operates in fast-charge mode. When the timer expires
+ the device will terminate fast-charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+ - 5, 6 or 7 (hours),
+ - 0: disabled.
+
+What: /sys/class/power_supply/max77693-charger/device/fast_charge_timer
+Date: January 2015
+KernelVersion: 3.19.0
+Contact: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Description:
+ This entry shows and sets the maximum time the max77693
+ charger operates in fast-charge mode. When the timer expires
+ the device will terminate fast-charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+ - 4 - 16 (hours), step by 2 (rounded down)
+ - 0: disabled.
+
+What: /sys/class/power_supply/max77693-charger/device/top_off_threshold_current
+Date: January 2015
+KernelVersion: 3.19.0
+Contact: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Description:
+ This entry shows and sets the charging current threshold for
+ entering top-off charging mode. When charging current in fast
+ charge mode drops below this value, the charger will trigger
+ interrupt and start top-off charging mode.
+
+ Valid values:
+ - 100000 - 200000 (microamps), step by 25000 (rounded down)
+ - 200000 - 350000 (microamps), step by 50000 (rounded down)
+ - 0: disabled.
+
+What: /sys/class/power_supply/max77693-charger/device/top_off_timer
+Date: January 2015
+KernelVersion: 3.19.0
+Contact: Krzysztof Kozlowski <k.kozlowski@samsung.com>
+Description:
+ This entry shows and sets the maximum time the max77693
+ charger operates in top-off charge mode. When the timer expires
+ the device will terminate top-off charge mode (charging current
+ will drop to 0 A) and will trigger interrupt.
+
+ Valid values:
+ - 0 - 70 (minutes), step by 10 (rounded down)
diff --git a/Documentation/ABI/testing/sysfs-class-powercap b/Documentation/ABI/testing/sysfs-class-powercap
new file mode 100644
index 000000000..db3b3ff70
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-powercap
@@ -0,0 +1,152 @@
+What: /sys/class/powercap/
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ The powercap/ class sub directory belongs to the power cap
+ subsystem. Refer to
+ Documentation/power/powercap/powercap.txt for details.
+
+What: /sys/class/powercap/<control type>
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ A <control type> is a unique name under /sys/class/powercap.
+ Here <control type> determines how the power is going to be
+ controlled. A <control type> can contain multiple power zones.
+
+What: /sys/class/powercap/<control type>/enabled
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ This allows to enable/disable power capping for a "control type".
+ This status affects every power zone using this "control_type.
+
+What: /sys/class/powercap/<control type>/<power zone>
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ A power zone is a single or a collection of devices, which can
+ be independently monitored and controlled. A power zone sysfs
+ entry is qualified with the name of the <control type>.
+ E.g. intel-rapl:0:1:1.
+
+What: /sys/class/powercap/<control type>/<power zone>/<child power zone>
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Power zones may be organized in a hierarchy in which child
+ power zones provide monitoring and control for a subset of
+ devices under the parent. For example, if there is a parent
+ power zone for a whole CPU package, each CPU core in it can
+ be a child power zone.
+
+What: /sys/class/powercap/.../<power zone>/name
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Specifies the name of this power zone.
+
+What: /sys/class/powercap/.../<power zone>/energy_uj
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Current energy counter in micro-joules. Write "0" to reset.
+ If the counter can not be reset, then this attribute is
+ read-only.
+
+What: /sys/class/powercap/.../<power zone>/max_energy_range_uj
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Range of the above energy counter in micro-joules.
+
+
+What: /sys/class/powercap/.../<power zone>/power_uw
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Current power in micro-watts.
+
+What: /sys/class/powercap/.../<power zone>/max_power_range_uw
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Range of the above power value in micro-watts.
+
+What: /sys/class/powercap/.../<power zone>/constraint_X_name
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Each power zone can define one or more constraints. Each
+ constraint can have an optional name. Here "X" can have values
+ from 0 to max integer.
+
+What: /sys/class/powercap/.../<power zone>/constraint_X_power_limit_uw
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Power limit in micro-watts should be applicable for
+ the time window specified by "constraint_X_time_window_us".
+ Here "X" can have values from 0 to max integer.
+
+What: /sys/class/powercap/.../<power zone>/constraint_X_time_window_us
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Time window in micro seconds. This is used along with
+ constraint_X_power_limit_uw to define a power constraint.
+ Here "X" can have values from 0 to max integer.
+
+
+What: /sys/class/powercap/<control type>/.../constraint_X_max_power_uw
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Maximum allowed power in micro watts for this constraint.
+ Here "X" can have values from 0 to max integer.
+
+What: /sys/class/powercap/<control type>/.../constraint_X_min_power_uw
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Minimum allowed power in micro watts for this constraint.
+ Here "X" can have values from 0 to max integer.
+
+What: /sys/class/powercap/.../<power zone>/constraint_X_max_time_window_us
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Maximum allowed time window in micro seconds for this
+ constraint. Here "X" can have values from 0 to max integer.
+
+What: /sys/class/powercap/.../<power zone>/constraint_X_min_time_window_us
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description:
+ Minimum allowed time window in micro seconds for this
+ constraint. Here "X" can have values from 0 to max integer.
+
+What: /sys/class/powercap/.../<power zone>/enabled
+Date: September 2013
+KernelVersion: 3.13
+Contact: linux-pm@vger.kernel.org
+Description
+ This allows to enable/disable power capping at power zone level.
+ This applies to current power zone and its children.
diff --git a/Documentation/ABI/testing/sysfs-class-pwm b/Documentation/ABI/testing/sysfs-class-pwm
new file mode 100644
index 000000000..c479d77b6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-pwm
@@ -0,0 +1,79 @@
+What: /sys/class/pwm/
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ The pwm/ class sub-directory belongs to the Generic PWM
+ Framework and provides a sysfs interface for using PWM
+ channels.
+
+What: /sys/class/pwm/pwmchipN/
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ A /sys/class/pwm/pwmchipN directory is created for each
+ probed PWM controller/chip where N is the base of the
+ PWM chip.
+
+What: /sys/class/pwm/pwmchipN/npwm
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ The number of PWM channels supported by the PWM chip.
+
+What: /sys/class/pwm/pwmchipN/export
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ Exports a PWM channel from the PWM chip for sysfs control.
+ Value is between 0 and /sys/class/pwm/pwmchipN/npwm - 1.
+
+What: /sys/class/pwm/pwmchipN/unexport
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ Unexports a PWM channel.
+
+What: /sys/class/pwm/pwmchipN/pwmX
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ A /sys/class/pwm/pwmchipN/pwmX directory is created for
+ each exported PWM channel where X is the exported PWM
+ channel number.
+
+What: /sys/class/pwm/pwmchipN/pwmX/period
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ Sets the PWM signal period in nanoseconds.
+
+What: /sys/class/pwm/pwmchipN/pwmX/duty_cycle
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ Sets the PWM signal duty cycle in nanoseconds.
+
+What: /sys/class/pwm/pwmchipN/pwmX/polarity
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ Sets the output polarity of the PWM signal to "normal" or
+ "inversed".
+
+What: /sys/class/pwm/pwmchipN/pwmX/enable
+Date: May 2013
+KernelVersion: 3.11
+Contact: H Hartley Sweeten <hsweeten@visionengravers.com>
+Description:
+ Enable/disable the PWM signal.
+ 0 is disabled
+ 1 is enabled
diff --git a/Documentation/ABI/testing/sysfs-class-rc b/Documentation/ABI/testing/sysfs-class-rc
new file mode 100644
index 000000000..b65674da4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rc
@@ -0,0 +1,111 @@
+What: /sys/class/rc/
+Date: Apr 2010
+KernelVersion: 2.6.35
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ The rc/ class sub-directory belongs to the Remote Controller
+ core and provides a sysfs interface for configuring infrared
+ remote controller receivers.
+
+What: /sys/class/rc/rcN/
+Date: Apr 2010
+KernelVersion: 2.6.35
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ A /sys/class/rc/rcN directory is created for each remote
+ control receiver device where N is the number of the receiver.
+
+What: /sys/class/rc/rcN/protocols
+Date: Jun 2010
+KernelVersion: 2.6.36
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ Reading this file returns a list of available protocols,
+ something like:
+ "rc5 [rc6] nec jvc [sony]"
+ Enabled protocols are shown in [] brackets.
+ Writing "+proto" will add a protocol to the list of enabled
+ protocols.
+ Writing "-proto" will remove a protocol from the list of enabled
+ protocols.
+ Writing "proto" will enable only "proto".
+ Writing "none" will disable all protocols.
+ Write fails with EINVAL if an invalid protocol combination or
+ unknown protocol name is used.
+
+What: /sys/class/rc/rcN/filter
+Date: Jan 2014
+KernelVersion: 3.15
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ Sets the scancode filter expected value.
+ Use in combination with /sys/class/rc/rcN/filter_mask to set the
+ expected value of the bits set in the filter mask.
+ If the hardware supports it then scancodes which do not match
+ the filter will be ignored. Otherwise the write will fail with
+ an error.
+ This value may be reset to 0 if the current protocol is altered.
+
+What: /sys/class/rc/rcN/filter_mask
+Date: Jan 2014
+KernelVersion: 3.15
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ Sets the scancode filter mask of bits to compare.
+ Use in combination with /sys/class/rc/rcN/filter to set the bits
+ of the scancode which should be compared against the expected
+ value. A value of 0 disables the filter to allow all valid
+ scancodes to be processed.
+ If the hardware supports it then scancodes which do not match
+ the filter will be ignored. Otherwise the write will fail with
+ an error.
+ This value may be reset to 0 if the current protocol is altered.
+
+What: /sys/class/rc/rcN/wakeup_protocols
+Date: Feb 2014
+KernelVersion: 3.15
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ Reading this file returns a list of available protocols to use
+ for the wakeup filter, something like:
+ "rc5 rc6 nec jvc [sony]"
+ The enabled wakeup protocol is shown in [] brackets.
+ Writing "+proto" will add a protocol to the list of enabled
+ wakeup protocols.
+ Writing "-proto" will remove a protocol from the list of enabled
+ wakeup protocols.
+ Writing "proto" will use "proto" for wakeup events.
+ Writing "none" will disable wakeup.
+ Write fails with EINVAL if an invalid protocol combination or
+ unknown protocol name is used, or if wakeup is not supported by
+ the hardware.
+
+What: /sys/class/rc/rcN/wakeup_filter
+Date: Jan 2014
+KernelVersion: 3.15
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ Sets the scancode wakeup filter expected value.
+ Use in combination with /sys/class/rc/rcN/wakeup_filter_mask to
+ set the expected value of the bits set in the wakeup filter mask
+ to trigger a system wake event.
+ If the hardware supports it and wakeup_filter_mask is not 0 then
+ scancodes which match the filter will wake the system from e.g.
+ suspend to RAM or power off.
+ Otherwise the write will fail with an error.
+ This value may be reset to 0 if the wakeup protocol is altered.
+
+What: /sys/class/rc/rcN/wakeup_filter_mask
+Date: Jan 2014
+KernelVersion: 3.15
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+Description:
+ Sets the scancode wakeup filter mask of bits to compare.
+ Use in combination with /sys/class/rc/rcN/wakeup_filter to set
+ the bits of the scancode which should be compared against the
+ expected value to trigger a system wake event.
+ If the hardware supports it and wakeup_filter_mask is not 0 then
+ scancodes which match the filter will wake the system from e.g.
+ suspend to RAM or power off.
+ Otherwise the write will fail with an error.
+ This value may be reset to 0 if the wakeup protocol is altered.
diff --git a/Documentation/ABI/testing/sysfs-class-regulator b/Documentation/ABI/testing/sysfs-class-regulator
new file mode 100644
index 000000000..bc578bc60
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-regulator
@@ -0,0 +1,372 @@
+What: /sys/class/regulator/.../state
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ state. This reports the regulator enable control, for
+ regulators which can report that input value.
+
+ This will be one of the following strings:
+
+ 'enabled'
+ 'disabled'
+ 'unknown'
+
+ 'enabled' means the regulator output is ON and is supplying
+ power to the system (assuming no error prevents it).
+
+ 'disabled' means the regulator output is OFF and is not
+ supplying power to the system (unless some non-Linux
+ control has enabled it).
+
+ 'unknown' means software cannot determine the state, or
+ the reported state is invalid.
+
+ NOTE: this field can be used in conjunction with microvolts
+ or microamps to determine configured regulator output levels.
+
+
+What: /sys/class/regulator/.../status
+Description:
+ Some regulator directories will contain a field called
+ "status". This reports the current regulator status, for
+ regulators which can report that output value.
+
+ This will be one of the following strings:
+
+ off
+ on
+ error
+ fast
+ normal
+ idle
+ standby
+
+ "off" means the regulator is not supplying power to the
+ system.
+
+ "on" means the regulator is supplying power to the system,
+ and the regulator can't report a detailed operation mode.
+
+ "error" indicates an out-of-regulation status such as being
+ disabled due to thermal shutdown, or voltage being unstable
+ because of problems with the input power supply.
+
+ "fast", "normal", "idle", and "standby" are all detailed
+ regulator operation modes (described elsewhere). They
+ imply "on", but provide more detail.
+
+ Note that regulator status is a function of many inputs,
+ not limited to control inputs from Linux. For example,
+ the actual load presented may trigger "error" status; or
+ a regulator may be enabled by another user, even though
+ Linux did not enable it.
+
+
+What: /sys/class/regulator/.../type
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Each regulator directory will contain a field called
+ type. This holds the regulator type.
+
+ This will be one of the following strings:
+
+ 'voltage'
+ 'current'
+ 'unknown'
+
+ 'voltage' means the regulator output voltage can be controlled
+ by software.
+
+ 'current' means the regulator output current limit can be
+ controlled by software.
+
+ 'unknown' means software cannot control either voltage or
+ current limit.
+
+
+What: /sys/class/regulator/.../microvolts
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ microvolts. This holds the regulator output voltage setting
+ measured in microvolts (i.e. E-6 Volts), for regulators
+ which can report the control input for voltage.
+
+ NOTE: This value should not be used to determine the regulator
+ output voltage level as this value is the same regardless of
+ whether the regulator is enabled or disabled.
+
+
+What: /sys/class/regulator/.../microamps
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ microamps. This holds the regulator output current limit
+ setting measured in microamps (i.e. E-6 Amps), for regulators
+ which can report the control input for a current limit.
+
+ NOTE: This value should not be used to determine the regulator
+ output current level as this value is the same regardless of
+ whether the regulator is enabled or disabled.
+
+
+What: /sys/class/regulator/.../opmode
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ opmode. This holds the current regulator operating mode,
+ for regulators which can report that control input value.
+
+ The opmode value can be one of the following strings:
+
+ 'fast'
+ 'normal'
+ 'idle'
+ 'standby'
+ 'unknown'
+
+ The modes are described in include/linux/regulator/consumer.h
+
+ NOTE: This value should not be used to determine the regulator
+ output operating mode as this value is the same regardless of
+ whether the regulator is enabled or disabled. A "status"
+ attribute may be available to determine the actual mode.
+
+
+What: /sys/class/regulator/.../min_microvolts
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ min_microvolts. This holds the minimum safe working regulator
+ output voltage setting for this domain measured in microvolts,
+ for regulators which support voltage constraints.
+
+ NOTE: this will return the string 'constraint not defined' if
+ the power domain has no min microvolts constraint defined by
+ platform code.
+
+
+What: /sys/class/regulator/.../max_microvolts
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ max_microvolts. This holds the maximum safe working regulator
+ output voltage setting for this domain measured in microvolts,
+ for regulators which support voltage constraints.
+
+ NOTE: this will return the string 'constraint not defined' if
+ the power domain has no max microvolts constraint defined by
+ platform code.
+
+
+What: /sys/class/regulator/.../min_microamps
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ min_microamps. This holds the minimum safe working regulator
+ output current limit setting for this domain measured in
+ microamps, for regulators which support current constraints.
+
+ NOTE: this will return the string 'constraint not defined' if
+ the power domain has no min microamps constraint defined by
+ platform code.
+
+
+What: /sys/class/regulator/.../max_microamps
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ max_microamps. This holds the maximum safe working regulator
+ output current limit setting for this domain measured in
+ microamps, for regulators which support current constraints.
+
+ NOTE: this will return the string 'constraint not defined' if
+ the power domain has no max microamps constraint defined by
+ platform code.
+
+
+What: /sys/class/regulator/.../name
+Date: October 2008
+KernelVersion: 2.6.28
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Each regulator directory will contain a field called
+ name. This holds a string identifying the regulator for
+ display purposes.
+
+ NOTE: this will be empty if no suitable name is provided
+ by platform or regulator drivers.
+
+
+What: /sys/class/regulator/.../num_users
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Each regulator directory will contain a field called
+ num_users. This holds the number of consumer devices that
+ have called regulator_enable() on this regulator.
+
+
+What: /sys/class/regulator/.../requested_microamps
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ requested_microamps. This holds the total requested load
+ current in microamps for this regulator from all its consumer
+ devices.
+
+
+What: /sys/class/regulator/.../parent
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a link called parent.
+ This points to the parent or supply regulator if one exists.
+
+What: /sys/class/regulator/.../suspend_mem_microvolts
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_mem_microvolts. This holds the regulator output
+ voltage setting for this domain measured in microvolts when
+ the system is suspended to memory, for voltage regulators
+ implementing suspend voltage configuration constraints.
+
+What: /sys/class/regulator/.../suspend_disk_microvolts
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_disk_microvolts. This holds the regulator output
+ voltage setting for this domain measured in microvolts when
+ the system is suspended to disk, for voltage regulators
+ implementing suspend voltage configuration constraints.
+
+What: /sys/class/regulator/.../suspend_standby_microvolts
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_standby_microvolts. This holds the regulator output
+ voltage setting for this domain measured in microvolts when
+ the system is suspended to standby, for voltage regulators
+ implementing suspend voltage configuration constraints.
+
+What: /sys/class/regulator/.../suspend_mem_mode
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_mem_mode. This holds the regulator operating mode
+ setting for this domain when the system is suspended to
+ memory, for regulators implementing suspend mode
+ configuration constraints.
+
+What: /sys/class/regulator/.../suspend_disk_mode
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_disk_mode. This holds the regulator operating mode
+ setting for this domain when the system is suspended to disk,
+ for regulators implementing suspend mode configuration
+ constraints.
+
+What: /sys/class/regulator/.../suspend_standby_mode
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_standby_mode. This holds the regulator operating mode
+ setting for this domain when the system is suspended to
+ standby, for regulators implementing suspend mode
+ configuration constraints.
+
+What: /sys/class/regulator/.../suspend_mem_state
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_mem_state. This holds the regulator operating state
+ when suspended to memory, for regulators implementing suspend
+ configuration constraints.
+
+ This will be one of the same strings reported by
+ the "state" attribute.
+
+What: /sys/class/regulator/.../suspend_disk_state
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_disk_state. This holds the regulator operating state
+ when suspended to disk, for regulators implementing
+ suspend configuration constraints.
+
+ This will be one of the same strings reported by
+ the "state" attribute.
+
+What: /sys/class/regulator/.../suspend_standby_state
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: Liam Girdwood <lrg@slimlogic.co.uk>
+Description:
+ Some regulator directories will contain a field called
+ suspend_standby_state. This holds the regulator operating
+ state when suspended to standby, for regulators implementing
+ suspend configuration constraints.
+
+ This will be one of the same strings reported by
+ the "state" attribute.
+
+What: /sys/class/regulator/.../bypass
+Date: September 2012
+KernelVersion: 3.7
+Contact: Mark Brown <broonie@opensource.wolfsonmicro.com>
+Description:
+ Some regulator directories will contain a field called
+ bypass. This indicates if the device is in bypass mode.
+
+ This will be one of the following strings:
+
+ 'enabled'
+ 'disabled'
+ 'unknown'
+
+ 'enabled' means the regulator is in bypass mode.
+
+ 'disabled' means that the regulator is regulating.
+
+ 'unknown' means software cannot determine the state, or
+ the reported state is invalid.
diff --git a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
new file mode 100644
index 000000000..4cf1e7222
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
@@ -0,0 +1,12 @@
+What: Attribute for calibrating ST-Ericsson AB8500 Real Time Clock
+Date: Oct 2011
+KernelVersion: 3.0
+Contact: Mark Godfrey <mark.godfrey@stericsson.com>
+Description: The rtc_calibration attribute allows the userspace to
+ calibrate the AB8500.s 32KHz Real Time Clock.
+ Every 60 seconds the AB8500 will correct the RTC's value
+ by adding to it the value of this attribute.
+ The range of the attribute is -127 to +127 in units of
+ 30.5 micro-seconds (half-parts-per-million of the 32KHz clock)
+Users: The /vendor/st-ericsson/base_utilities/core/rtc_calibration
+ daemon uses this interface.
diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host
new file mode 100644
index 000000000..0eb255e7d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-scsi_host
@@ -0,0 +1,29 @@
+What: /sys/class/scsi_host/hostX/isci_id
+Date: June 2011
+Contact: Dave Jiang <dave.jiang@intel.com>
+Description:
+ This file contains the enumerated host ID for the Intel
+ SCU controller. The Intel(R) C600 Series Chipset SATA/SAS
+ Storage Control Unit embeds up to two 4-port controllers in
+ a single PCI device. The controllers are enumerated in order
+ which usually means the lowest number scsi_host corresponds
+ with the first controller, but this association is not
+ guaranteed. The 'isci_id' attribute unambiguously identifies
+ the controller index: '0' for the first controller,
+ '1' for the second.
+
+What: /sys/class/scsi_host/hostX/acciopath_status
+Date: November 2013
+Contact: Stephen M. Cameron <scameron@beardog.cce.hp.com>
+Description: This file contains the current status of the "SSD Smart Path"
+ feature of HP Smart Array RAID controllers using the hpsa
+ driver. SSD Smart Path, when enabled permits the driver to
+ send i/o requests directly to physical devices that are part
+ of a logical drive, bypassing the controllers firmware RAID
+ stack for a performance advantage when possible. A value of
+ '1' indicates the feature is enabled, and the controller may
+ use the direct i/o path to physical devices. A value of zero
+ means the feature is disabled and the controller may not use
+ the direct i/o path to physical devices. This setting is
+ controller wide, affecting all configured logical drives on the
+ controller. This file is readable and writable.
diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc b/Documentation/ABI/testing/sysfs-class-uwb_rc
new file mode 100644
index 000000000..85f4875d1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-uwb_rc
@@ -0,0 +1,159 @@
+What: /sys/class/uwb_rc
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ Interfaces for WiMedia Ultra Wideband Common Radio
+ Platform (UWB) radio controllers.
+
+ Familiarity with the ECMA-368 'High Rate Ultra
+ Wideband MAC and PHY Specification' is assumed.
+
+What: /sys/class/uwb_rc/beacon_timeout_ms
+Date: July 2008
+KernelVersion: 2.6.27
+Description:
+ If no beacons are received from a device for at least
+ this time, the device will be considered to have gone
+ and it will be removed. The default is 3 superframes
+ (~197 ms) as required by the specification.
+
+What: /sys/class/uwb_rc/uwbN/
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ An individual UWB radio controller.
+
+What: /sys/class/uwb_rc/uwbN/beacon
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ Write:
+
+ <channel>
+
+ to force a specific channel to be used when beaconing,
+ or, if <channel> is -1, to prohibit beaconing. If
+ <channel> is 0, then the default channel selection
+ algorithm will be used. Valid channels depends on the
+ radio controller's supported band groups.
+
+ Reading returns the currently active channel, or -1 if
+ the radio controller is not beaconing.
+
+What: /sys/class/uwb_rc/uwbN/ASIE
+Date: August 2014
+KernelVersion: 3.18
+Contact: linux-usb@vger.kernel.org
+Description:
+
+ The application-specific information element (ASIE)
+ included in this device's beacon, in space separated
+ hex octets.
+
+ Reading returns the current ASIE. Writing replaces
+ the current ASIE with the one written.
+
+What: /sys/class/uwb_rc/uwbN/scan
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ Write:
+
+ <channel> <type> [<bpst offset>]
+
+ to start (or stop) scanning on a channel. <type> is one of:
+ 0 - scan
+ 1 - scan outside BP
+ 2 - scan while inactive
+ 3 - scanning disabled
+ 4 - scan (with start time of <bpst offset>)
+
+What: /sys/class/uwb_rc/uwbN/mac_address
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ The EUI-48, in colon-separated hex octets, for this
+ radio controller. A write will change the radio
+ controller's EUI-48 but only do so while the device is
+ not beaconing or scanning.
+
+What: /sys/class/uwb_rc/uwbN/wusbhc
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ A symlink to the device (if any) of the WUSB Host
+ Controller PAL using this radio controller.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ A neighbour UWB device that has either been detected
+ as part of a scan or is a member of the radio
+ controllers beacon group.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/BPST
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ The time (using the radio controllers internal 1 ms
+ interval superframe timer) of the last beacon from
+ this device was received.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/DevAddr
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ The current DevAddr of this device in colon separated
+ hex octets.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/EUI_48
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+
+ The EUI-48 of this device in colon separated hex
+ octets.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/BPST
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/IEs
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ The latest IEs included in this device's beacon, in
+ space separated hex octets with one IE per line.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/LQE
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ Link Quality Estimate - the Signal to Noise Ratio
+ (SNR) of all packets received from this device in dB.
+ This gives an estimate on a suitable PHY rate. Refer
+ to [ECMA-368] section 13.3 for more details.
+
+What: /sys/class/uwb_rc/uwbN/<EUI-48>/RSSI
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: linux-usb@vger.kernel.org
+Description:
+ Received Signal Strength Indication - the strength of
+ the received signal in dB. LQE is a more useful
+ measure of the radio link quality.
diff --git a/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc
new file mode 100644
index 000000000..5977e2875
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-uwb_rc-wusbhc
@@ -0,0 +1,57 @@
+What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_chid
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ Write the CHID (16 space-separated hex octets) for this host controller.
+ This starts the host controller, allowing it to accept connection from
+ WUSB devices.
+
+ Set an all zero CHID to stop the host controller.
+
+What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_trust_timeout
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ Devices that haven't sent a WUSB packet to the host
+ within 'wusb_trust_timeout' ms are considered to have
+ disconnected and are removed. The default value of
+ 4000 ms is the value required by the WUSB
+ specification.
+
+ Since this relates to security (specifically, the
+ lifetime of PTKs and GTKs) it should not be changed
+ from the default.
+
+What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_phy_rate
+Date: August 2009
+KernelVersion: 2.6.32
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The maximum PHY rate to use for all connected devices.
+ This is only of limited use for testing and
+ development as the hardware's automatic rate
+ adaptation is better then this simple control.
+
+ Refer to [ECMA-368] section 10.3.1.1 for the value to
+ use.
+
+What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_dnts
+Date: June 2013
+KernelVersion: 3.11
+Contact: Thomas Pugliese <thomas.pugliese@gmail.com>
+Description:
+ The device notification time slot (DNTS) count and inverval in
+ milliseconds that the WUSB host should use. This controls how
+ often the devices will have the opportunity to send
+ notifications to the host.
+
+What: /sys/class/uwb_rc/uwbN/wusbhc/wusb_retry_count
+Date: June 2013
+KernelVersion: 3.11
+Contact: Thomas Pugliese <thomas.pugliese@gmail.com>
+Description:
+ The number of retries that the WUSB host should attempt
+ before reporting an error for a bus transaction. The range of
+ valid values is [0..15], where 0 indicates infinite retries.
diff --git a/Documentation/ABI/testing/sysfs-dev b/Documentation/ABI/testing/sysfs-dev
new file mode 100644
index 000000000..a9f2b8b05
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-dev
@@ -0,0 +1,20 @@
+What: /sys/dev
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Dan Williams <dan.j.williams@intel.com>
+Description: The /sys/dev tree provides a method to look up the sysfs
+ path for a device using the information returned from
+ stat(2). There are two directories, 'block' and 'char',
+ beneath /sys/dev containing symbolic links with names of
+ the form "<major>:<minor>". These links point to the
+ corresponding sysfs path for the given device.
+
+ Example:
+ $ readlink /sys/dev/block/8:32
+ ../../block/sdc
+
+ Entries in /sys/dev/char and /sys/dev/block will be
+ dynamically created and destroyed as devices enter and
+ leave the system.
+
+Users: mdadm <linux-raid@vger.kernel.org>
diff --git a/Documentation/ABI/testing/sysfs-devices b/Documentation/ABI/testing/sysfs-devices
new file mode 100644
index 000000000..5fcc94358
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices
@@ -0,0 +1,25 @@
+What: /sys/devices
+Date: February 2006
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description:
+ The /sys/devices tree contains a snapshot of the
+ internal state of the kernel device tree. Devices will
+ be added and removed dynamically as the machine runs,
+ and between different kernel versions, the layout of the
+ devices within this tree will change.
+
+ Please do not rely on the format of this tree because of
+ this. If a program wishes to find different things in
+ the tree, please use the /sys/class structure and rely
+ on the symlinks there to point to the proper location
+ within the /sys/devices tree of the individual devices.
+ Or rely on the uevent messages to notify programs of
+ devices being added and removed from this tree to find
+ the location of those devices.
+
+ Note that sometimes not all devices along the directory
+ chain will have emitted uevent messages, so userspace
+ programs must be able to handle such occurrences.
+
+Users:
+ udev <linux-hotplug-devel@lists.sourceforge.net>
diff --git a/Documentation/ABI/testing/sysfs-devices-edac b/Documentation/ABI/testing/sysfs-devices-edac
new file mode 100644
index 000000000..6568e0010
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-edac
@@ -0,0 +1,140 @@
+What: /sys/devices/system/edac/mc/mc*/reset_counters
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This write-only control file will zero all the statistical
+ counters for UE and CE errors on the given memory controller.
+ Zeroing the counters will also reset the timer indicating how
+ long since the last counter were reset. This is useful for
+ computing errors/time. Since the counters are always reset
+ at driver initialization time, no module/kernel parameter
+ is available.
+
+What: /sys/devices/system/edac/mc/mc*/seconds_since_reset
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays how many seconds have elapsed
+ since the last counter reset. This can be used with the error
+ counters to measure error rates.
+
+What: /sys/devices/system/edac/mc/mc*/mc_name
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the type of memory controller
+ that is being utilized.
+
+What: /sys/devices/system/edac/mc/mc*/size_mb
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays, in count of megabytes, of memory
+ that this memory controller manages.
+
+What: /sys/devices/system/edac/mc/mc*/ue_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the total count of uncorrectable
+ errors that have occurred on this memory controller. If
+ panic_on_ue is set, this counter will not have a chance to
+ increment, since EDAC will panic the system
+
+What: /sys/devices/system/edac/mc/mc*/ue_noinfo_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the number of UEs that have
+ occurred on this memory controller with no information as to
+ which DIMM slot is having errors.
+
+What: /sys/devices/system/edac/mc/mc*/ce_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the total count of correctable
+ errors that have occurred on this memory controller. This
+ count is very important to examine. CEs provide early
+ indications that a DIMM is beginning to fail. This count
+ field should be monitored for non-zero values and report
+ such information to the system administrator.
+
+What: /sys/devices/system/edac/mc/mc*/ce_noinfo_count
+Date: January 2006
+Contact: linux-edac@vger.kernel.org
+Description: This attribute file displays the number of CEs that
+ have occurred on this memory controller wherewith no
+ information as to which DIMM slot is having errors. Memory is
+ handicapped, but operational, yet no information is available
+ to indicate which slot the failing memory is in. This count
+ field should be also be monitored for non-zero values.
+
+What: /sys/devices/system/edac/mc/mc*/sdram_scrub_rate
+Date: February 2007
+Contact: linux-edac@vger.kernel.org
+Description: Read/Write attribute file that controls memory scrubbing.
+ The scrubbing rate used by the memory controller is set by
+ writing a minimum bandwidth in bytes/sec to the attribute file.
+ The rate will be translated to an internal value that gives at
+ least the specified rate.
+ Reading the file will return the actual scrubbing rate employed.
+ If configuration fails or memory scrubbing is not implemented,
+ the value of the attribute file will be -1.
+
+What: /sys/devices/system/edac/mc/mc*/max_location
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file displays the information about the last
+ available memory slot in this memory controller. It is used by
+ userspace tools in order to display the memory filling layout.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/size
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display the size of dimm or rank.
+ For dimm*/size, this is the size, in MB of the DIMM memory
+ stick. For rank*/size, this is the size, in MB for one rank
+ of the DIMM memory stick. On single rank memories (1R), this
+ is also the total size of the dimm. On dual rank (2R) memories,
+ this is half the size of the total DIMM memories.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_dev_type
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display what type of DRAM device is
+ being utilized on this DIMM (x1, x2, x4, x8, ...).
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_edac_mode
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display what type of Error detection
+ and correction is being utilized. For example: S4ECD4ED would
+ mean a Chipkill with x4 DRAM.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_label
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_location
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display the location (csrow/channel,
+ branch/channel/slot or channel/slot) of the dimm or rank.
+
+What: /sys/devices/system/edac/mc/mc*/(dimm|rank)*/dimm_mem_type
+Date: April 2012
+Contact: Mauro Carvalho Chehab <m.chehab@samsung.com>
+ linux-edac@vger.kernel.org
+Description: This attribute file will display what type of memory is
+ currently on this csrow. Normally, either buffered or
+ unbuffered memory (for example, Unbuffered-DDR3).
diff --git a/Documentation/ABI/testing/sysfs-devices-firmware_node b/Documentation/ABI/testing/sysfs-devices-firmware_node
new file mode 100644
index 000000000..46badc9ea
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-firmware_node
@@ -0,0 +1,17 @@
+What: /sys/devices/.../firmware_node/
+Date: September 2012
+Contact: <>
+Description:
+ The /sys/devices/.../firmware_node directory contains attributes
+ allowing the user space to check and modify some firmware
+ related properties of given device.
+
+What: /sys/devices/.../firmware_node/description
+Date: September 2012
+Contact: Lance Ortiz <lance.ortiz@hp.com>
+Description:
+ The /sys/devices/.../firmware/description attribute contains a string
+ that describes the device as provided by the _STR method in the ACPI
+ namespace. This attribute is read-only. If the device does not have
+ an _STR method associated with it in the ACPI namespace, this
+ attribute is not present.
diff --git a/Documentation/ABI/testing/sysfs-devices-lpss_ltr b/Documentation/ABI/testing/sysfs-devices-lpss_ltr
new file mode 100644
index 000000000..ea9298d9b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-lpss_ltr
@@ -0,0 +1,44 @@
+What: /sys/devices/.../lpss_ltr/
+Date: March 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../lpss_ltr/ directory is only present for
+ devices included into the Intel Lynxpoint Low Power Subsystem
+ (LPSS). If present, it contains attributes containing the LTR
+ mode and the values of LTR registers of the device.
+
+What: /sys/devices/.../lpss_ltr/ltr_mode
+Date: March 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../lpss_ltr/ltr_mode attribute contains an
+ integer number (0 or 1) indicating whether or not the devices'
+ LTR functionality is working in the software mode (1).
+
+ This attribute is read-only. If the device's runtime PM status
+ is not "active", attempts to read from this attribute cause
+ -EAGAIN to be returned.
+
+What: /sys/devices/.../lpss_ltr/auto_ltr
+Date: March 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../lpss_ltr/auto_ltr attribute contains the
+ current value of the device's AUTO_LTR register (raw)
+ represented as an 8-digit hexadecimal number.
+
+ This attribute is read-only. If the device's runtime PM status
+ is not "active", attempts to read from this attribute cause
+ -EAGAIN to be returned.
+
+What: /sys/devices/.../lpss_ltr/sw_ltr
+Date: March 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../lpss_ltr/auto_ltr attribute contains the
+ current value of the device's SW_LTR register (raw) represented
+ as an 8-digit hexadecimal number.
+
+ This attribute is read-only. If the device's runtime PM status
+ is not "active", attempts to read from this attribute cause
+ -EAGAIN to be returned.
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
new file mode 100644
index 000000000..deef3b572
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -0,0 +1,93 @@
+What: /sys/devices/system/memory
+Date: June 2008
+Contact: Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+ The /sys/devices/system/memory contains a snapshot of the
+ internal state of the kernel memory blocks. Files could be
+ added or removed dynamically to represent hot-add/remove
+ operations.
+Users: hotplug memory add/remove tools
+ http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+
+What: /sys/devices/system/memory/memoryX/removable
+Date: June 2008
+Contact: Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+ The file /sys/devices/system/memory/memoryX/removable
+ indicates whether this memory block is removable or not.
+ This is useful for a user-level agent to determine
+ identify removable sections of the memory before attempting
+ potentially expensive hot-remove memory operation
+Users: hotplug memory remove tools
+ http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+
+What: /sys/devices/system/memory/memoryX/phys_device
+Date: September 2008
+Contact: Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+ The file /sys/devices/system/memory/memoryX/phys_device
+ is read-only and is designed to show the name of physical
+ memory device. Implementation is currently incomplete.
+
+What: /sys/devices/system/memory/memoryX/phys_index
+Date: September 2008
+Contact: Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+ The file /sys/devices/system/memory/memoryX/phys_index
+ is read-only and contains the section ID in hexadecimal
+ which is equivalent to decimal X contained in the
+ memory section directory name.
+
+What: /sys/devices/system/memory/memoryX/state
+Date: September 2008
+Contact: Badari Pulavarty <pbadari@us.ibm.com>
+Description:
+ The file /sys/devices/system/memory/memoryX/state
+ is read-write. When read, its contents show the
+ online/offline state of the memory section. When written,
+ root can toggle the the online/offline state of a removable
+ memory section (see removable file description above)
+ using the following commands.
+ # echo online > /sys/devices/system/memory/memoryX/state
+ # echo offline > /sys/devices/system/memory/memoryX/state
+
+ For example, if /sys/devices/system/memory/memory22/removable
+ contains a value of 1 and
+ /sys/devices/system/memory/memory22/state contains the
+ string "online" the following command can be executed by
+ by root to offline that section.
+ # echo offline > /sys/devices/system/memory/memory22/state
+Users: hotplug memory remove tools
+ http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+
+
+What: /sys/devices/system/memory/memoryX/valid_zones
+Date: July 2014
+Contact: Zhang Zhen <zhenzhang.zhang@huawei.com>
+Description:
+ The file /sys/devices/system/memory/memoryX/valid_zones is
+ read-only and is designed to show which zone this memory
+ block can be onlined to.
+
+What: /sys/devices/system/memoryX/nodeY
+Date: October 2009
+Contact: Linux Memory Management list <linux-mm@kvack.org>
+Description:
+ When CONFIG_NUMA is enabled, a symbolic link that
+ points to the corresponding NUMA node directory.
+
+ For example, the following symbolic link is created for
+ memory section 9 on node0:
+ /sys/devices/system/memory/memory9/node0 -> ../../node/node0
+
+
+What: /sys/devices/system/node/nodeX/memoryY
+Date: September 2008
+Contact: Gary Hade <garyhade@us.ibm.com>
+Description:
+ When CONFIG_NUMA is enabled
+ /sys/devices/system/node/nodeX/memoryY is a symbolic link that
+ points to the corresponding /sys/devices/system/memory/memoryY
+ memory section directory. For example, the following symbolic
+ link is created for memory section 9 on node0.
+ /sys/devices/system/node/node0/memory9 -> ../../memory/memory9
diff --git a/Documentation/ABI/testing/sysfs-devices-mmc b/Documentation/ABI/testing/sysfs-devices-mmc
new file mode 100644
index 000000000..5a50ab655
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-mmc
@@ -0,0 +1,21 @@
+What: /sys/devices/.../mmc_host/mmcX/mmcX:XXXX/enhanced_area_offset
+Date: January 2011
+Contact: Chuanxiao Dong <chuanxiao.dong@intel.com>
+Description:
+ Enhanced area is a new feature defined in eMMC4.4 standard.
+ eMMC4.4 or later card can support such feature. This kind of
+ area can help to improve the card performance. If the feature
+ is enabled, this attribute will indicate the start address of
+ enhanced data area. If not, this attribute will be -EINVAL.
+ Unit Byte. Format decimal.
+
+What: /sys/devices/.../mmc_host/mmcX/mmcX:XXXX/enhanced_area_size
+Date: January 2011
+Contact: Chuanxiao Dong <chuanxiao.dong@intel.com>
+Description:
+ Enhanced area is a new feature defined in eMMC4.4 standard.
+ eMMC4.4 or later card can support such feature. This kind of
+ area can help to improve the card performance. If the feature
+ is enabled, this attribute will indicate the size of enhanced
+ data area. If not, this attribute will be -EINVAL.
+ Unit KByte. Format decimal.
diff --git a/Documentation/ABI/testing/sysfs-devices-online b/Documentation/ABI/testing/sysfs-devices-online
new file mode 100644
index 000000000..f990026c0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-online
@@ -0,0 +1,20 @@
+What: /sys/devices/.../online
+Date: April 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../online attribute is only present for
+ devices whose bus types provide .online() and .offline()
+ callbacks. The number read from it (0 or 1) reflects the value
+ of the device's 'offline' field. If that number is 1 and '0'
+ (or 'n', or 'N') is written to this file, the device bus type's
+ .offline() callback is executed for the device and (if
+ successful) its 'offline' field is updated accordingly. In
+ turn, if that number is 0 and '1' (or 'y', or 'Y') is written to
+ this file, the device bus type's .online() callback is executed
+ for the device and (if successful) its 'offline' field is
+ updated as appropriate.
+
+ After a successful execution of the bus type's .offline()
+ callback the device cannot be used for any purpose until either
+ it is removed (i.e. device_del() is called for it), or its bus
+ type's .online() is exeucted successfully.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget b/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
new file mode 100644
index 000000000..d548eaac2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
@@ -0,0 +1,21 @@
+What: /sys/devices/platform/_UDC_/gadget/suspended
+Date: April 2010
+Contact: Fabien Chouteau <fabien.chouteau@barco.com>
+Description:
+ Show the suspend state of an USB composite gadget.
+ 1 -> suspended
+ 0 -> resumed
+
+ (_UDC_ is the name of the USB Device Controller driver)
+
+What: /sys/devices/platform/_UDC_/gadget/gadget-lunX/nofua
+Date: July 2010
+Contact: Andy Shevchenko <andy.shevchenko@gmail.com>
+Description:
+ Show or set the reaction on the FUA (Force Unit Access) bit in
+ the SCSI WRITE(10,12) commands when a gadget in USB Mass
+ Storage mode.
+
+ Possible values are:
+ 1 -> ignore the FUA flag
+ 0 -> obey the FUA flag
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-docg3 b/Documentation/ABI/testing/sysfs-devices-platform-docg3
new file mode 100644
index 000000000..8aa367168
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-docg3
@@ -0,0 +1,34 @@
+What: /sys/devices/platform/docg3/f[0-3]_dps[01]_is_keylocked
+Date: November 2011
+KernelVersion: 3.3
+Contact: Robert Jarzmik <robert.jarzmik@free.fr>
+Description:
+ Show whether the floor (0 to 4), protection area (0 or 1) is
+ keylocked. Each docg3 chip (or floor) has 2 protection areas,
+ which can cover any part of it, block aligned, called DPS.
+ The protection has information embedded whether it blocks reads,
+ writes or both.
+ The result is:
+ 0 -> the DPS is not keylocked
+ 1 -> the DPS is keylocked
+Users: None identified so far.
+
+What: /sys/devices/platform/docg3/f[0-3]_dps[01]_protection_key
+Date: November 2011
+KernelVersion: 3.3
+Contact: Robert Jarzmik <robert.jarzmik@free.fr>
+Description:
+ Enter the protection key for the floor (0 to 4), protection area
+ (0 or 1). Each docg3 chip (or floor) has 2 protection areas,
+ which can cover any part of it, block aligned, called DPS.
+ The protection has information embedded whether it blocks reads,
+ writes or both.
+ The protection key is a string of 8 bytes (value 0-255).
+ Entering the correct value toggle the lock, and can be observed
+ through f[0-3]_dps[01]_is_keylocked.
+ Possible values are:
+ - 8 bytes
+ Typical values are:
+ - "00000000"
+ - "12345678"
+Users: None identified so far.
diff --git a/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb b/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb
new file mode 100644
index 000000000..210708242
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-sh_mobile_lcdc_fb
@@ -0,0 +1,44 @@
+What: /sys/devices/platform/sh_mobile_lcdc_fb.[0-3]/graphics/fb[0-9]/ovl_alpha
+Date: May 2012
+Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Description:
+ This file is only available on fb[0-9] devices corresponding
+ to overlay planes.
+
+ Stores the alpha blending value for the overlay. Values range
+ from 0 (transparent) to 255 (opaque). The value is ignored if
+ the mode is not set to Alpha Blending.
+
+What: /sys/devices/platform/sh_mobile_lcdc_fb.[0-3]/graphics/fb[0-9]/ovl_mode
+Date: May 2012
+Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Description:
+ This file is only available on fb[0-9] devices corresponding
+ to overlay planes.
+
+ Selects the composition mode for the overlay. Possible values
+ are
+
+ 0 - Alpha Blending
+ 1 - ROP3
+
+What: /sys/devices/platform/sh_mobile_lcdc_fb.[0-3]/graphics/fb[0-9]/ovl_position
+Date: May 2012
+Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Description:
+ This file is only available on fb[0-9] devices corresponding
+ to overlay planes.
+
+ Stores the x,y overlay position on the display in pixels. The
+ position format is `[0-9]+,[0-9]+'.
+
+What: /sys/devices/platform/sh_mobile_lcdc_fb.[0-3]/graphics/fb[0-9]/ovl_rop3
+Date: May 2012
+Contact: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+Description:
+ This file is only available on fb[0-9] devices corresponding
+ to overlay planes.
+
+ Stores the raster operation (ROP3) for the overlay. Values
+ range from 0 to 255. The value is ignored if the mode is not
+ set to ROP3.
diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power
new file mode 100644
index 000000000..676fdf5f2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power
@@ -0,0 +1,276 @@
+What: /sys/devices/.../power/
+Date: January 2009
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power directory contains attributes
+ allowing the user space to check and modify some power
+ management related properties of given device.
+
+What: /sys/devices/.../power/wakeup
+Date: January 2009
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power/wakeup attribute allows the user
+ space to check if the device is enabled to wake up the system
+ from sleep states, such as the memory sleep state (suspend to
+ RAM) and hibernation (suspend to disk), and to enable or disable
+ it to do that as desired.
+
+ Some devices support "wakeup" events, which are hardware signals
+ used to activate the system from a sleep state. Such devices
+ have one of the following two values for the sysfs power/wakeup
+ file:
+
+ + "enabled\n" to issue the events;
+ + "disabled\n" not to do so;
+
+ In that cases the user space can change the setting represented
+ by the contents of this file by writing either "enabled", or
+ "disabled" to it.
+
+ For the devices that are not capable of generating system wakeup
+ events this file is not present. In that case the device cannot
+ be enabled to wake up the system from sleep states.
+
+What: /sys/devices/.../power/control
+Date: January 2009
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power/control attribute allows the user
+ space to control the run-time power management of the device.
+
+ All devices have one of the following two values for the
+ power/control file:
+
+ + "auto\n" to allow the device to be power managed at run time;
+ + "on\n" to prevent the device from being power managed;
+
+ The default for all devices is "auto", which means that they may
+ be subject to automatic power management, depending on their
+ drivers. Changing this attribute to "on" prevents the driver
+ from power managing the device at run time. Doing that while
+ the device is suspended causes it to be woken up.
+
+What: /sys/devices/.../power/async
+Date: January 2009
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../async attribute allows the user space to
+ enable or diasble the device's suspend and resume callbacks to
+ be executed asynchronously (ie. in separate threads, in parallel
+ with the main suspend/resume thread) during system-wide power
+ transitions (eg. suspend to RAM, hibernation).
+
+ All devices have one of the following two values for the
+ power/async file:
+
+ + "enabled\n" to permit the asynchronous suspend/resume;
+ + "disabled\n" to forbid it;
+
+ The value of this attribute may be changed by writing either
+ "enabled", or "disabled" to it.
+
+ It generally is unsafe to permit the asynchronous suspend/resume
+ of a device unless it is certain that all of the PM dependencies
+ of the device are known to the PM core. However, for some
+ devices this attribute is set to "enabled" by bus type code or
+ device drivers and in that cases it should be safe to leave the
+ default value.
+
+What: /sys/devices/.../power/wakeup_count
+Date: September 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_count attribute contains the number
+ of signaled wakeup events associated with the device. This
+ attribute is read-only. If the device is not capable to wake up
+ the system from sleep states, this attribute is not present.
+ If the device is not enabled to wake up the system from sleep
+ states, this attribute is empty.
+
+What: /sys/devices/.../power/wakeup_active_count
+Date: September 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_active_count attribute contains the
+ number of times the processing of wakeup events associated with
+ the device was completed (at the kernel level). This attribute
+ is read-only. If the device is not capable to wake up the
+ system from sleep states, this attribute is not present. If
+ the device is not enabled to wake up the system from sleep
+ states, this attribute is empty.
+
+What: /sys/devices/.../power/wakeup_abort_count
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_abort_count attribute contains the
+ number of times the processing of a wakeup event associated with
+ the device might have aborted system transition into a sleep
+ state in progress. This attribute is read-only. If the device
+ is not capable to wake up the system from sleep states, this
+ attribute is not present. If the device is not enabled to wake
+ up the system from sleep states, this attribute is empty.
+
+What: /sys/devices/.../power/wakeup_expire_count
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_expire_count attribute contains the
+ number of times a wakeup event associated with the device has
+ been reported with a timeout that expired. This attribute is
+ read-only. If the device is not capable to wake up the system
+ from sleep states, this attribute is not present. If the
+ device is not enabled to wake up the system from sleep states,
+ this attribute is empty.
+
+What: /sys/devices/.../power/wakeup_active
+Date: September 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_active attribute contains either 1,
+ or 0, depending on whether or not a wakeup event associated with
+ the device is being processed (1). This attribute is read-only.
+ If the device is not capable to wake up the system from sleep
+ states, this attribute is not present. If the device is not
+ enabled to wake up the system from sleep states, this attribute
+ is empty.
+
+What: /sys/devices/.../power/wakeup_total_time_ms
+Date: September 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_total_time_ms attribute contains
+ the total time of processing wakeup events associated with the
+ device, in milliseconds. This attribute is read-only. If the
+ device is not capable to wake up the system from sleep states,
+ this attribute is not present. If the device is not enabled to
+ wake up the system from sleep states, this attribute is empty.
+
+What: /sys/devices/.../power/wakeup_max_time_ms
+Date: September 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_max_time_ms attribute contains
+ the maximum time of processing a single wakeup event associated
+ with the device, in milliseconds. This attribute is read-only.
+ If the device is not capable to wake up the system from sleep
+ states, this attribute is not present. If the device is not
+ enabled to wake up the system from sleep states, this attribute
+ is empty.
+
+What: /sys/devices/.../power/wakeup_last_time_ms
+Date: September 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_last_time_ms attribute contains
+ the value of the monotonic clock corresponding to the time of
+ signaling the last wakeup event associated with the device, in
+ milliseconds. This attribute is read-only. If the device is
+ not enabled to wake up the system from sleep states, this
+ attribute is not present. If the device is not enabled to wake
+ up the system from sleep states, this attribute is empty.
+
+What: /sys/devices/.../power/wakeup_prevent_sleep_time_ms
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../wakeup_prevent_sleep_time_ms attribute
+ contains the total time the device has been preventing
+ opportunistic transitions to sleep states from occurring.
+ This attribute is read-only. If the device is not capable to
+ wake up the system from sleep states, this attribute is not
+ present. If the device is not enabled to wake up the system
+ from sleep states, this attribute is empty.
+
+What: /sys/devices/.../power/autosuspend_delay_ms
+Date: September 2010
+Contact: Alan Stern <stern@rowland.harvard.edu>
+Description:
+ The /sys/devices/.../power/autosuspend_delay_ms attribute
+ contains the autosuspend delay value (in milliseconds). Some
+ drivers do not want their device to suspend as soon as it
+ becomes idle at run time; they want the device to remain
+ inactive for a certain minimum period of time first. That
+ period is called the autosuspend delay. Negative values will
+ prevent the device from being suspended at run time (similar
+ to writing "on" to the power/control attribute). Values >=
+ 1000 will cause the autosuspend timer expiration to be rounded
+ up to the nearest second.
+
+ Not all drivers support this attribute. If it isn't supported,
+ attempts to read or write it will yield I/O errors.
+
+What: /sys/devices/.../power/pm_qos_resume_latency_us
+Date: March 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power/pm_qos_resume_latency_us attribute
+ contains the PM QoS resume latency limit for the given device,
+ which is the maximum allowed time it can take to resume the
+ device, after it has been suspended at run time, from a resume
+ request to the moment the device will be ready to process I/O,
+ in microseconds. If it is equal to 0, however, this means that
+ the PM QoS resume latency may be arbitrary.
+
+ Not all drivers support this attribute. If it isn't supported,
+ it is not present.
+
+ This attribute has no effect on system-wide suspend/resume and
+ hibernation.
+
+What: /sys/devices/.../power/pm_qos_latency_tolerance_us
+Date: January 2014
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power/pm_qos_latency_tolerance_us attribute
+ contains the PM QoS active state latency tolerance limit for the
+ given device in microseconds. That is the maximum memory access
+ latency the device can suffer without any visible adverse
+ effects on user space functionality. If that value is the
+ string "any", the latency does not matter to user space at all,
+ but hardware should not be allowed to set the latency tolerance
+ for the device automatically.
+
+ Reading "auto" from this file means that the maximum memory
+ access latency for the device may be determined automatically
+ by the hardware as needed. Writing "auto" to it allows the
+ hardware to be switched to this mode if there are no other
+ latency tolerance requirements from the kernel side.
+
+ This attribute is only present if the feature controlled by it
+ is supported by the hardware.
+
+ This attribute has no effect on runtime suspend and resume of
+ devices and on system-wide suspend/resume and hibernation.
+
+What: /sys/devices/.../power/pm_qos_no_power_off
+Date: September 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power/pm_qos_no_power_off attribute
+ is used for manipulating the PM QoS "no power off" flag. If
+ set, this flag indicates to the kernel that power should not
+ be removed entirely from the device.
+
+ Not all drivers support this attribute. If it isn't supported,
+ it is not present.
+
+ This attribute has no effect on system-wide suspend/resume and
+ hibernation.
+
+What: /sys/devices/.../power/pm_qos_remote_wakeup
+Date: September 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/devices/.../power/pm_qos_remote_wakeup attribute
+ is used for manipulating the PM QoS "remote wakeup required"
+ flag. If set, this flag indicates to the kernel that the
+ device is a source of user events that have to be signaled from
+ its low-power states.
+
+ Not all drivers support this attribute. If it isn't supported,
+ it is not present.
+
+ This attribute has no effect on system-wide suspend/resume and
+ hibernation.
diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D0 b/Documentation/ABI/testing/sysfs-devices-power_resources_D0
new file mode 100644
index 000000000..73b77a6be
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D0
@@ -0,0 +1,13 @@
+What: /sys/devices/.../power_resources_D0/
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../power_resources_D0/ directory is only
+ present for device objects representing ACPI device nodes that
+ use ACPI power resources for power management.
+
+ If present, it contains symbolic links to device directories
+ representing ACPI power resources that need to be turned on for
+ the given device node to be in ACPI power state D0. The names
+ of the links are the same as the names of the directories they
+ point to.
diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D1 b/Documentation/ABI/testing/sysfs-devices-power_resources_D1
new file mode 100644
index 000000000..30c20703f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D1
@@ -0,0 +1,14 @@
+What: /sys/devices/.../power_resources_D1/
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../power_resources_D1/ directory is only
+ present for device objects representing ACPI device nodes that
+ use ACPI power resources for power management and support ACPI
+ power state D1.
+
+ If present, it contains symbolic links to device directories
+ representing ACPI power resources that need to be turned on for
+ the given device node to be in ACPI power state D1. The names
+ of the links are the same as the names of the directories they
+ point to.
diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D2 b/Documentation/ABI/testing/sysfs-devices-power_resources_D2
new file mode 100644
index 000000000..fd9d84b42
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D2
@@ -0,0 +1,14 @@
+What: /sys/devices/.../power_resources_D2/
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../power_resources_D2/ directory is only
+ present for device objects representing ACPI device nodes that
+ use ACPI power resources for power management and support ACPI
+ power state D2.
+
+ If present, it contains symbolic links to device directories
+ representing ACPI power resources that need to be turned on for
+ the given device node to be in ACPI power state D2. The names
+ of the links are the same as the names of the directories they
+ point to.
diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_D3hot b/Documentation/ABI/testing/sysfs-devices-power_resources_D3hot
new file mode 100644
index 000000000..3df32c20a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power_resources_D3hot
@@ -0,0 +1,14 @@
+What: /sys/devices/.../power_resources_D3hot/
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../power_resources_D3hot/ directory is only
+ present for device objects representing ACPI device nodes that
+ use ACPI power resources for power management and support ACPI
+ power state D3hot.
+
+ If present, it contains symbolic links to device directories
+ representing ACPI power resources that need to be turned on for
+ the given device node to be in ACPI power state D3hot. The
+ names of the links are the same as the names of the directories
+ they point to.
diff --git a/Documentation/ABI/testing/sysfs-devices-power_resources_wakeup b/Documentation/ABI/testing/sysfs-devices-power_resources_wakeup
new file mode 100644
index 000000000..e0588feeb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power_resources_wakeup
@@ -0,0 +1,13 @@
+What: /sys/devices/.../power_resources_wakeup/
+Date: April 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../power_resources_wakeup/ directory is only
+ present for device objects representing ACPI device nodes that
+ require ACPI power resources for wakeup signaling.
+
+ If present, it contains symbolic links to device directories
+ representing ACPI power resources that need to be turned on for
+ the given device node to be able to signal wakeup. The names of
+ the links are the same as the names of the directories they
+ point to.
diff --git a/Documentation/ABI/testing/sysfs-devices-power_state b/Documentation/ABI/testing/sysfs-devices-power_state
new file mode 100644
index 000000000..7ad954674
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-power_state
@@ -0,0 +1,20 @@
+What: /sys/devices/.../power_state
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../power_state attribute is only present for
+ device objects representing ACPI device nodes that provide power
+ management methods.
+
+ If present, it contains a string representing the current ACPI
+ power state of the given device node. Its possible values,
+ "D0", "D1", "D2", "D3hot", and "D3cold", reflect the power state
+ names defined by the ACPI specification (ACPI 4 and above).
+
+ If the device node uses shared ACPI power resources, this state
+ determines a list of power resources required not to be turned
+ off. However, some power resources needed by the device node in
+ higher-power (lower-number) states may also be ON because of
+ some other devices using them at the moment.
+
+ This attribute is read-only.
diff --git a/Documentation/ABI/testing/sysfs-devices-real_power_state b/Documentation/ABI/testing/sysfs-devices-real_power_state
new file mode 100644
index 000000000..8b3527c82
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-real_power_state
@@ -0,0 +1,23 @@
+What: /sys/devices/.../real_power_state
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../real_power_state attribute is only present
+ for device objects representing ACPI device nodes that provide
+ power management methods and use ACPI power resources for power
+ management.
+
+ If present, it contains a string representing the real ACPI
+ power state of the given device node as returned by the _PSC
+ control method or inferred from the configuration of power
+ resources. Its possible values, "D0", "D1", "D2", "D3hot", and
+ "D3cold", reflect the power state names defined by the ACPI
+ specification (ACPI 4 and above).
+
+ In some situations the value of this attribute may be different
+ from the value of the /sys/devices/.../power_state attribute for
+ the same device object. If that happens, some shared power
+ resources used by the device node are only ON because of some
+ other devices using them at the moment.
+
+ This attribute is read-only.
diff --git a/Documentation/ABI/testing/sysfs-devices-resource_in_use b/Documentation/ABI/testing/sysfs-devices-resource_in_use
new file mode 100644
index 000000000..b4a3bc592
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-resource_in_use
@@ -0,0 +1,12 @@
+What: /sys/devices/.../resource_in_use
+Date: January 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The /sys/devices/.../resource_in_use attribute is only present
+ for device objects representing ACPI power resources.
+
+ If present, it contains a number (0 or 1) representing the
+ current status of the given power resource (0 means that the
+ resource is not in use and therefore it has been turned off).
+
+ This attribute is read-only.
diff --git a/Documentation/ABI/testing/sysfs-devices-soc b/Documentation/ABI/testing/sysfs-devices-soc
new file mode 100644
index 000000000..6d9cc253f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-soc
@@ -0,0 +1,58 @@
+What: /sys/devices/socX
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ The /sys/devices/ directory contains a sub-directory for each
+ System-on-Chip (SoC) device on a running platform. Information
+ regarding each SoC can be obtained by reading sysfs files. This
+ functionality is only available if implemented by the platform.
+
+ The directory created for each SoC will also house information
+ about devices which are commonly contained in /sys/devices/platform.
+ It has been agreed that if an SoC device exists, its supported
+ devices would be better suited to appear as children of that SoC.
+
+What: /sys/devices/socX/machine
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ Read-only attribute common to all SoCs. Contains the SoC machine
+ name (e.g. Ux500).
+
+What: /sys/devices/socX/family
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ Read-only attribute common to all SoCs. Contains SoC family name
+ (e.g. DB8500).
+
+What: /sys/devices/socX/soc_id
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ Read-only attribute supported by most SoCs. In the case of
+ ST-Ericsson's chips this contains the SoC serial number.
+
+What: /sys/devices/socX/revision
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ Read-only attribute supported by most SoCs. Contains the SoC's
+ manufacturing revision number.
+
+What: /sys/devices/socX/process
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ Read-only attribute supported ST-Ericsson's silicon. Contains the
+ the process by which the silicon chip was manufactured.
+
+What: /sys/bus/soc
+Date: January 2012
+contact: Lee Jones <lee.jones@linaro.org>
+Description:
+ The /sys/bus/soc/ directory contains the usual sub-folders
+ expected under most buses. /sys/bus/soc/devices is of particular
+ interest, as it contains a symlink for each SoC device found on
+ the system. Each symlink points back into the aforementioned
+ /sys/devices/socX devices.
diff --git a/Documentation/ABI/testing/sysfs-devices-sun b/Documentation/ABI/testing/sysfs-devices-sun
new file mode 100644
index 000000000..625ce4b63
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-sun
@@ -0,0 +1,14 @@
+What: /sys/devices/.../sun
+Date: October 2012
+Contact: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
+Description:
+ The file contains a Slot-unique ID which provided by the _SUN
+ method in the ACPI namespace. The value is written in Advanced
+ Configuration and Power Interface Specification as follows:
+
+ "The _SUN value is required to be unique among the slots of
+ the same type. It is also recommended that this number match
+ the slot number printed on the physical slot whenever possible."
+
+ So reading the sysfs file, we can identify a physical position
+ of the slot in the system.
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
new file mode 100644
index 000000000..da9551357
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -0,0 +1,273 @@
+What: /sys/devices/system/cpu/
+Date: pre-git history
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:
+ A collection of both global and individual CPU attributes
+
+ Individual CPU attributes are contained in subdirectories
+ named by the kernel's logical CPU number, e.g.:
+
+ /sys/devices/system/cpu/cpu#/
+
+What: /sys/devices/system/cpu/kernel_max
+ /sys/devices/system/cpu/offline
+ /sys/devices/system/cpu/online
+ /sys/devices/system/cpu/possible
+ /sys/devices/system/cpu/present
+Date: December 2008
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: CPU topology files that describe kernel limits related to
+ hotplug. Briefly:
+
+ kernel_max: the maximum cpu index allowed by the kernel
+ configuration.
+
+ offline: cpus that are not online because they have been
+ HOTPLUGGED off or exceed the limit of cpus allowed by the
+ kernel configuration (kernel_max above).
+
+ online: cpus that are online and being scheduled.
+
+ possible: cpus that have been allocated resources and can be
+ brought online if they are present.
+
+ present: cpus that have been identified as being present in
+ the system.
+
+ See Documentation/cputopology.txt for more information.
+
+
+What: /sys/devices/system/cpu/probe
+ /sys/devices/system/cpu/release
+Date: November 2009
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Dynamic addition and removal of CPU's. This is not hotplug
+ removal, this is meant complete removal/addition of the CPU
+ from the system.
+
+ probe: writes to this file will dynamically add a CPU to the
+ system. Information written to the file to add CPU's is
+ architecture specific.
+
+ release: writes to this file dynamically remove a CPU from
+ the system. Information writtento the file to remove CPU's
+ is architecture specific.
+
+What: /sys/devices/system/cpu/cpu#/node
+Date: October 2009
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Discover NUMA node a CPU belongs to
+
+ When CONFIG_NUMA is enabled, a symbolic link that points
+ to the corresponding NUMA node directory.
+
+ For example, the following symlink is created for cpu42
+ in NUMA node 2:
+
+ /sys/devices/system/cpu/cpu42/node2 -> ../../node/node2
+
+
+What: /sys/devices/system/cpu/cpu#/topology/core_id
+ /sys/devices/system/cpu/cpu#/topology/core_siblings
+ /sys/devices/system/cpu/cpu#/topology/core_siblings_list
+ /sys/devices/system/cpu/cpu#/topology/physical_package_id
+ /sys/devices/system/cpu/cpu#/topology/thread_siblings
+ /sys/devices/system/cpu/cpu#/topology/thread_siblings_list
+Date: December 2008
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: CPU topology files that describe a logical CPU's relationship
+ to other cores and threads in the same physical package.
+
+ One cpu# directory is created per logical CPU in the system,
+ e.g. /sys/devices/system/cpu/cpu42/.
+
+ Briefly, the files above are:
+
+ core_id: the CPU core ID of cpu#. Typically it is the
+ hardware platform's identifier (rather than the kernel's).
+ The actual value is architecture and platform dependent.
+
+ core_siblings: internal kernel map of cpu#'s hardware threads
+ within the same physical_package_id.
+
+ core_siblings_list: human-readable list of the logical CPU
+ numbers within the same physical_package_id as cpu#.
+
+ physical_package_id: physical package id of cpu#. Typically
+ corresponds to a physical socket number, but the actual value
+ is architecture and platform dependent.
+
+ thread_siblings: internel kernel map of cpu#'s hardware
+ threads within the same core as cpu#
+
+ thread_siblings_list: human-readable list of cpu#'s hardware
+ threads within the same core as cpu#
+
+ See Documentation/cputopology.txt for more information.
+
+
+What: /sys/devices/system/cpu/cpuidle/current_driver
+ /sys/devices/system/cpu/cpuidle/current_governer_ro
+Date: September 2007
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Discover cpuidle policy and mechanism
+
+ Various CPUs today support multiple idle levels that are
+ differentiated by varying exit latencies and power
+ consumption during idle.
+
+ Idle policy (governor) is differentiated from idle mechanism
+ (driver)
+
+ current_driver: displays current idle mechanism
+
+ current_governor_ro: displays current idle policy
+
+ See files in Documentation/cpuidle/ for more information.
+
+
+What: /sys/devices/system/cpu/cpu#/cpufreq/*
+Date: pre-git history
+Contact: linux-pm@vger.kernel.org
+Description: Discover and change clock speed of CPUs
+
+ Clock scaling allows you to change the clock speed of the
+ CPUs on the fly. This is a nice method to save battery
+ power, because the lower the clock speed, the less power
+ the CPU consumes.
+
+ There are many knobs to tweak in this directory.
+
+ See files in Documentation/cpu-freq/ for more information.
+
+ In particular, read Documentation/cpu-freq/user-guide.txt
+ to learn how to control the knobs.
+
+
+What: /sys/devices/system/cpu/cpu#/cpufreq/freqdomain_cpus
+Date: June 2013
+Contact: linux-pm@vger.kernel.org
+Description: Discover CPUs in the same CPU frequency coordination domain
+
+ freqdomain_cpus is the list of CPUs (online+offline) that share
+ the same clock/freq domain (possibly at the hardware level).
+ That information may be hidden from the cpufreq core and the
+ value of related_cpus may be different from freqdomain_cpus. This
+ attribute is useful for user space DVFS controllers to get better
+ power/performance results for platforms using acpi-cpufreq.
+
+ This file is only present if the acpi-cpufreq driver is in use.
+
+
+What: /sys/devices/system/cpu/cpu*/cache/index3/cache_disable_{0,1}
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Disable L3 cache indices
+
+ These files exist in every CPU's cache/index3 directory. Each
+ cache_disable_{0,1} file corresponds to one disable slot which
+ can be used to disable a cache index. Reading from these files
+ on a processor with this functionality will return the currently
+ disabled index for that node. There is one L3 structure per
+ node, or per internal node on MCM machines. Writing a valid
+ index to one of these files will cause the specificed cache
+ index to be disabled.
+
+ All AMD processors with L3 caches provide this functionality.
+ For details, see BKDGs at
+ http://developer.amd.com/documentation/guides/Pages/default.aspx
+
+
+What: /sys/devices/system/cpu/cpufreq/boost
+Date: August 2012
+Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Processor frequency boosting control
+
+ This switch controls the boost setting for the whole system.
+ Boosting allows the CPU and the firmware to run at a frequency
+ beyound it's nominal limit.
+ More details can be found in Documentation/cpu-freq/boost.txt
+
+
+What: /sys/devices/system/cpu/cpu#/crash_notes
+ /sys/devices/system/cpu/cpu#/crash_notes_size
+Date: April 2013
+Contact: kexec@lists.infradead.org
+Description: address and size of the percpu note.
+
+ crash_notes: the physical address of the memory that holds the
+ note of cpu#.
+
+ crash_notes_size: size of the note of cpu#.
+
+
+What: /sys/devices/system/cpu/intel_pstate/max_perf_pct
+ /sys/devices/system/cpu/intel_pstate/min_perf_pct
+ /sys/devices/system/cpu/intel_pstate/no_turbo
+Date: February 2013
+Contact: linux-pm@vger.kernel.org
+Description: Parameters for the Intel P-state driver
+
+ Logic for selecting the current P-state in Intel
+ Sandybridge+ processors. The three knobs control
+ limits for the P-state that will be requested by the
+ driver.
+
+ max_perf_pct: limits the maximum P state that will be requested by
+ the driver stated as a percentage of the available performance.
+
+ min_perf_pct: limits the minimum P state that will be requested by
+ the driver stated as a percentage of the available performance.
+
+ no_turbo: limits the driver to selecting P states below the turbo
+ frequency range.
+
+ More details can be found in Documentation/cpu-freq/intel-pstate.txt
+
+What: /sys/devices/system/cpu/cpu*/cache/index*/<set_of_attributes_mentioned_below>
+Date: July 2014(documented, existed before August 2008)
+Contact: Sudeep Holla <sudeep.holla@arm.com>
+ Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description: Parameters for the CPU cache attributes
+
+ allocation_policy:
+ - WriteAllocate: allocate a memory location to a cache line
+ on a cache miss because of a write
+ - ReadAllocate: allocate a memory location to a cache line
+ on a cache miss because of a read
+ - ReadWriteAllocate: both writeallocate and readallocate
+
+ attributes: LEGACY used only on IA64 and is same as write_policy
+
+ coherency_line_size: the minimum amount of data in bytes that gets
+ transferred from memory to cache
+
+ level: the cache hierarcy in the multi-level cache configuration
+
+ number_of_sets: total number of sets in the cache, a set is a
+ collection of cache lines with the same cache index
+
+ physical_line_partition: number of physical cache line per cache tag
+
+ shared_cpu_list: the list of logical cpus sharing the cache
+
+ shared_cpu_map: logical cpu mask containing the list of cpus sharing
+ the cache
+
+ size: the total cache size in kB
+
+ type:
+ - Instruction: cache that only holds instructions
+ - Data: cache that only caches data
+ - Unified: cache that holds both data and instructions
+
+ ways_of_associativity: degree of freedom in placing a particular block
+ of memory in the cache
+
+ write_policy:
+ - WriteThrough: data is written to both the cache line
+ and to the block in the lower-level memory
+ - WriteBack: data is written only to the cache line and
+ the modified cache line is written to main
+ memory only when it is replaced
diff --git a/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
new file mode 100644
index 000000000..b82deeaec
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-system-ibm-rtl
@@ -0,0 +1,22 @@
+What: state
+Date: Sep 2010
+KernelVersion: 2.6.37
+Contact: Vernon Mauery <vernux@us.ibm.com>
+Description: The state file allows a means by which to change in and
+ out of Premium Real-Time Mode (PRTM), as well as the
+ ability to query the current state.
+ 0 => PRTM off
+ 1 => PRTM enabled
+Users: The ibm-prtm userspace daemon uses this interface.
+
+
+What: version
+Date: Sep 2010
+KernelVersion: 2.6.37
+Contact: Vernon Mauery <vernux@us.ibm.com>
+Description: The version file provides a means by which to query
+ the RTL table version that lives in the Extended
+ BIOS Data Area (EBDA).
+Users: The ibm-prtm userspace daemon uses this interface.
+
+
diff --git a/Documentation/ABI/testing/sysfs-devices-system-xen_cpu b/Documentation/ABI/testing/sysfs-devices-system-xen_cpu
new file mode 100644
index 000000000..9ca02fb2d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-system-xen_cpu
@@ -0,0 +1,20 @@
+What: /sys/devices/system/xen_cpu/
+Date: May 2012
+Contact: Liu, Jinsong <jinsong.liu@intel.com>
+Description:
+ A collection of global/individual Xen physical cpu attributes
+
+ Individual physical cpu attributes are contained in
+ subdirectories named by the Xen's logical cpu number, e.g.:
+ /sys/devices/system/xen_cpu/xen_cpu#/
+
+
+What: /sys/devices/system/xen_cpu/xen_cpu#/online
+Date: May 2012
+Contact: Liu, Jinsong <jinsong.liu@intel.com>
+Description:
+ Interface to online/offline Xen physical cpus
+
+ When running under Xen platform, it provide user interface
+ to online/offline physical cpus, except cpu0 due to several
+ logic restrictions and assumptions.
diff --git a/Documentation/ABI/testing/sysfs-driver-genwqe b/Documentation/ABI/testing/sysfs-driver-genwqe
new file mode 100644
index 000000000..64ac6d567
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-genwqe
@@ -0,0 +1,71 @@
+What: /sys/class/genwqe/genwqe<n>_card/version
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Unique bitstream identification e.g.
+ '0000000330336283.00000000475a4950'.
+
+What: /sys/class/genwqe/genwqe<n>_card/appid
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Identifies the currently active card application e.g. 'GZIP'
+ for compression/decompression.
+
+What: /sys/class/genwqe/genwqe<n>_card/type
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Type of the card e.g. 'GenWQE5-A7'.
+
+What: /sys/class/genwqe/genwqe<n>_card/curr_bitstream
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Currently active bitstream. 1 is default, 0 is backup.
+
+What: /sys/class/genwqe/genwqe<n>_card/next_bitstream
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Interface to set the next bitstream to be used.
+
+What: /sys/class/genwqe/genwqe<n>_card/reload_bitstream
+Date: May 2014
+Contact: klebers@linux.vnet.ibm.com
+Description: Interface to trigger a PCIe card reset to reload the bitstream.
+ sudo sh -c 'echo 1 > \
+ /sys/class/genwqe/genwqe0_card/reload_bitstream'
+ If successfully, the card will come back with the bitstream set
+ on 'next_bitstream'.
+
+What: /sys/class/genwqe/genwqe<n>_card/tempsens
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Interface to read the cards temperature sense register.
+
+What: /sys/class/genwqe/genwqe<n>_card/freerunning_timer
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Interface to read the cards free running timer.
+ Used for performance and utilization measurements.
+
+What: /sys/class/genwqe/genwqe<n>_card/queue_working_time
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Interface to read queue working time.
+ Used for performance and utilization measurements.
+
+What: /sys/class/genwqe/genwqe<n>_card/state
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: State of the card: "unused", "used", "error".
+
+What: /sys/class/genwqe/genwqe<n>_card/base_clock
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Base clock frequency of the card.
+
+What: /sys/class/genwqe/genwqe<n>_card/device/sriov_numvfs
+Date: Oct 2013
+Contact: haver@linux.vnet.ibm.com
+Description: Enable VFs (1..15):
+ sudo sh -c 'echo 15 > \
+ /sys/bus/pci/devices/0000\:1b\:00.0/sriov_numvfs'
+ Disable VFs:
+ Write a 0 into the same sysfs entry.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid b/Documentation/ABI/testing/sysfs-driver-hid
new file mode 100644
index 000000000..48942cacb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid
@@ -0,0 +1,20 @@
+What: For USB devices : /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
+ For BT devices : /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/report_descriptor
+ Symlink : /sys/class/hidraw/hidraw<num>/device/report_descriptor
+Date: Jan 2011
+KernelVersion: 2.0.39
+Contact: Alan Ott <alan@signal11.us>
+Description: When read, this file returns the device's raw binary HID
+ report descriptor.
+ This file cannot be written.
+Users: HIDAPI library (http://www.signal11.us/oss/hidapi)
+
+What: For USB devices : /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+ For BT devices : /sys/class/bluetooth/hci<addr>/<hid-bus>:<vendor-id>:<product-id>.<num>/country
+ Symlink : /sys/class/hidraw/hidraw<num>/device/country
+Date: February 2015
+KernelVersion: 3.19
+Contact: Olivier Gay <ogay@logitech.com>
+Description: When read, this file returns the hex integer value in ASCII
+ of the device's HID country code (e.g. 21 for US).
+ This file cannot be written.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-lenovo b/Documentation/ABI/testing/sysfs-driver-hid-lenovo
new file mode 100644
index 000000000..53a072596
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-lenovo
@@ -0,0 +1,50 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/press_to_select
+Date: July 2011
+Contact: linux-input@vger.kernel.org
+Description: This controls if mouse clicks should be generated if the trackpoint is quickly pressed. How fast this press has to be
+ is being controlled by press_speed.
+ Values are 0 or 1.
+ Applies to Thinkpad USB Keyboard with TrackPoint.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/dragging
+Date: July 2011
+Contact: linux-input@vger.kernel.org
+Description: If this setting is enabled, it is possible to do dragging by pressing the trackpoint. This requires press_to_select to be enabled.
+ Values are 0 or 1.
+ Applies to Thinkpad USB Keyboard with TrackPoint.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/release_to_select
+Date: July 2011
+Contact: linux-input@vger.kernel.org
+Description: For details regarding this setting please refer to http://www.pc.ibm.com/ww/healthycomputing/trkpntb.html
+ Values are 0 or 1.
+ Applies to Thinkpad USB Keyboard with TrackPoint.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/select_right
+Date: July 2011
+Contact: linux-input@vger.kernel.org
+Description: This setting controls if the mouse click events generated by pressing the trackpoint (if press_to_select is enabled) generate
+ a left or right mouse button click.
+ Values are 0 or 1.
+ Applies to Thinkpad USB Keyboard with TrackPoint.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/sensitivity
+Date: July 2011
+Contact: linux-input@vger.kernel.org
+Description: This file contains the trackpoint sensitivity.
+ Values are decimal integers from 1 (lowest sensitivity) to 255 (highest sensitivity).
+ Applies to Thinkpad USB Keyboard with TrackPoint.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/press_speed
+Date: July 2011
+Contact: linux-input@vger.kernel.org
+Description: This setting controls how fast the trackpoint needs to be pressed to generate a mouse click if press_to_select is enabled.
+ Values are decimal integers from 1 (slowest) to 255 (fastest).
+ Applies to Thinkpad USB Keyboard with TrackPoint.
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/fn_lock
+Date: July 2014
+Contact: linux-input@vger.kernel.org
+Description: This setting controls whether Fn Lock is enabled on the keyboard (i.e. if F1 is Mute or F1)
+ Values are 0 or 1
+ Applies to ThinkPad Compact (USB|Bluetooth) Keyboard with TrackPoint.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
new file mode 100644
index 000000000..b3f6a2ac5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-logitech-lg4ff
@@ -0,0 +1,52 @@
+What: /sys/module/hid_logitech/drivers/hid:logitech/<dev>/range.
+Date: July 2011
+KernelVersion: 3.2
+Contact: Michal Malý <madcatxster@gmail.com>
+Description: Display minimum, maximum and current range of the steering
+ wheel. Writing a value within min and max boundaries sets the
+ range of the wheel.
+
+What: /sys/bus/hid/drivers/logitech/<dev>/alternate_modes
+Date: Feb 2015
+KernelVersion: 4.1
+Contact: Michal Malý <madcatxster@gmail.com>
+Description: Displays a set of alternate modes supported by a wheel. Each
+ mode is listed as follows:
+ Tag: Mode Name
+ Currently active mode is marked with an asterisk. List also
+ contains an abstract item "native" which always denotes the
+ native mode of the wheel. Echoing the mode tag switches the
+ wheel into the corresponding mode. Depending on the exact model
+ of the wheel not all listed modes might always be selectable.
+ If a wheel cannot be switched into the desired mode, -EINVAL
+ is returned accompanied with an explanatory message in the
+ kernel log.
+ This entry is not created for devices that have only one mode.
+
+ Currently supported mode switches:
+ Driving Force Pro:
+ DF-EX --> DFP
+
+ G25:
+ DF-EX --> DFP --> G25
+
+ G27:
+ DF-EX <*> DFP <-> G25 <-> G27
+ DF-EX <*--------> G25 <-> G27
+ DF-EX <*----------------> G27
+
+ DFGT:
+ DF-EX <*> DFP <-> DFGT
+ DF-EX <*--------> DFGT
+
+ * hid_logitech module must be loaded with lg4ff_no_autoswitch=1
+ parameter set in order for the switch to DF-EX mode to work.
+
+What: /sys/bus/hid/drivers/logitech/<dev>/real_id
+Date: Feb 2015
+KernelVersion: 4.1
+Contact: Michal Malý <madcatxster@gmail.com>
+Description: Displays the real model of the wheel regardless of any
+ alternate mode the wheel might be switched to.
+ It is a read-only value.
+ This entry is not created for devices that have only one mode.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-multitouch b/Documentation/ABI/testing/sysfs-driver-hid-multitouch
new file mode 100644
index 000000000..f79839d1a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-multitouch
@@ -0,0 +1,9 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/quirks
+Date: November 2011
+Contact: Benjamin Tissoires <benjamin.tissoires@gmail.com>
+Description: The integer value of this attribute corresponds to the
+ quirks actually in place to handle the device's protocol.
+ When read, this attribute returns the current settings (see
+ MT_QUIRKS_* in hid-multitouch.c).
+ When written this attribute change on the fly the quirks, then
+ the protocol to handle the device.
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-picolcd b/Documentation/ABI/testing/sysfs-driver-hid-picolcd
new file mode 100644
index 000000000..08579e7e1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-picolcd
@@ -0,0 +1,43 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/operation_mode
+Date: March 2010
+Contact: Bruno Prémont <bonbons@linux-vserver.org>
+Description: Make it possible to switch the PicoLCD device between LCD
+ (firmware) and bootloader (flasher) operation modes.
+
+ Reading: returns list of available modes, the active mode being
+ enclosed in brackets ('[' and ']')
+
+ Writing: causes operation mode switch. Permitted values are
+ the non-active mode names listed when read.
+
+ Note: when switching mode the current PicoLCD HID device gets
+ disconnected and reconnects after above delay (see attribute
+ operation_mode_delay for its value).
+
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/operation_mode_delay
+Date: April 2010
+Contact: Bruno Prémont <bonbons@linux-vserver.org>
+Description: Delay PicoLCD waits before restarting in new mode when
+ operation_mode has changed.
+
+ Reading/Writing: It is expressed in ms and permitted range is
+ 0..30000ms.
+
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/fb_update_rate
+Date: March 2010
+Contact: Bruno Prémont <bonbons@linux-vserver.org>
+Description: Make it possible to adjust defio refresh rate.
+
+ Reading: returns list of available refresh rates (expressed in Hz),
+ the active refresh rate being enclosed in brackets ('[' and ']')
+
+ Writing: accepts new refresh rate expressed in integer Hz
+ within permitted rates.
+
+ Note: As device can barely do 2 complete refreshes a second
+ it only makes sense to adjust this value if only one or two
+ tiles get changed and it's not appropriate to expect the application
+ to flush it's tiny changes explicitely at higher than default rate.
+
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-prodikeys b/Documentation/ABI/testing/sysfs-driver-hid-prodikeys
new file mode 100644
index 000000000..05d988c29
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-prodikeys
@@ -0,0 +1,29 @@
+What: /sys/bus/hid/drivers/prodikeys/.../channel
+Date: April 2010
+KernelVersion: 2.6.34
+Contact: Don Prince <dhprince.devel@yahoo.co.uk>
+Description:
+ Allows control (via software) the midi channel to which
+ that the pc-midi keyboard will output.midi data.
+ Range: 0..15
+ Type: Read/write
+What: /sys/bus/hid/drivers/prodikeys/.../sustain
+Date: April 2010
+KernelVersion: 2.6.34
+Contact: Don Prince <dhprince.devel@yahoo.co.uk>
+Description:
+ Allows control (via software) the sustain duration of a
+ note held by the pc-midi driver.
+ 0 means sustain mode is disabled.
+ Range: 0..5000 (milliseconds)
+ Type: Read/write
+What: /sys/bus/hid/drivers/prodikeys/.../octave
+Date: April 2010
+KernelVersion: 2.6.34
+Contact: Don Prince <dhprince.devel@yahoo.co.uk>
+Description:
+ Controls the octave shift modifier in the pc-midi driver.
+ The octave can be shifted via software up/down 2 octaves.
+ 0 means the no ocatve shift.
+ Range: -2..2 (minus 2 to plus 2)
+ Type: Read/Write
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-arvo b/Documentation/ABI/testing/sysfs-driver-hid-roccat-arvo
new file mode 100644
index 000000000..55e281b00
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-arvo
@@ -0,0 +1,53 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/arvo/roccatarvo<minor>/actual_profile
+Date: Januar 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 1-5.
+ When read, this attribute returns the number of the actual
+ profile which is also the profile that's active on device startup.
+ When written this attribute activates the selected profile
+ immediately.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/arvo/roccatarvo<minor>/button
+Date: Januar 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The keyboard can store short macros with consist of 1 button with
+ several modifier keys internally.
+ When written, this file lets one set the sequence for a specific
+ button for a specific profile. Button and profile numbers are
+ included in written data. The data has to be 24 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/arvo/roccatarvo<minor>/info
+Date: Januar 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns some info about the device like the
+ installed firmware version.
+ The size of the data is 8 bytes in size.
+ This file is readonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/arvo/roccatarvo<minor>/key_mask
+Date: Januar 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The keyboard lets the user deactivate 5 certain keys like the
+ windows and application keys, to protect the user from the outcome
+ of accidentally pressing them.
+ The integer value of this attribute has bits 0-4 set depending
+ on the state of the corresponding key.
+ When read, this file returns the current state of the buttons.
+ When written, the given buttons are activated/deactivated
+ immediately.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/arvo/roccatarvo<minor>/mode_key
+Date: Januar 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The keyboard has a condensed layout without num-lock key.
+ Instead it uses a mode-key which activates a gaming mode where
+ the assignment of the number block changes.
+ The integer value of this attribute ranges from 0 (OFF) to 1 (ON).
+ When read, this file returns the actual state of the key.
+ When written, the key is activated/deactivated immediately.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku b/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku
new file mode 100644
index 000000000..c601d0f2a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-isku
@@ -0,0 +1,153 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/actual_profile
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 0-4.
+ When read, this attribute returns the number of the actual
+ profile. This value is persistent, so its equivalent to the
+ profile that's active when the device is powered on next time.
+ When written, this file sets the number of the startup profile
+ and the device activates this profile immediately.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/info
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ The data is 6 bytes long.
+ This file is readonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/key_mask
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one deactivate certain keys like
+ windows and application keys, to prevent accidental presses.
+ Profile number for which this settings occur is included in
+ written data. The data has to be 6 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_capslock
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ capslock key for a specific profile. Profile number is included
+ in written data. The data has to be 6 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_easyzone
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ easyzone keys for a specific profile. Profile number is included
+ in written data. The data has to be 65 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_function
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ function keys for a specific profile. Profile number is included
+ in written data. The data has to be 41 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_macro
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the macro
+ keys for a specific profile. Profile number is included in
+ written data. The data has to be 35 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_media
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the media
+ keys for a specific profile. Profile number is included in
+ written data. The data has to be 29 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/keys_thumbster
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ thumbster keys for a specific profile. Profile number is included
+ in written data. The data has to be 23 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/last_set
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the time in secs since
+ epoch in which the last configuration took place.
+ The data has to be 20 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/light
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the backlight intensity for
+ a specific profile. Profile number is included in written data.
+ The data has to be 10 bytes long for Isku, IskuFX needs 16 bytes
+ of data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/macro
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one store macros with max 500
+ keystrokes for a specific button for a specific profile.
+ Button and profile numbers are included in written data.
+ The data has to be 2083 bytes long.
+ Before reading this file, control has to be written to select
+ which profile and key to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/reset
+Date: November 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one reset the device.
+ The data has to be 3 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/control
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one select which data from which
+ profile will be read next. The data has to be 3 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/talk
+Date: June 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one trigger easyshift functionality
+ from the host.
+ The data has to be 16 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/isku/roccatisku<minor>/talkfx
+Date: February 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one trigger temporary color schemes
+ from the host.
+ The data has to be 16 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
new file mode 100644
index 000000000..3ca397110
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kone
@@ -0,0 +1,106 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/actual_dpi
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: It is possible to switch the dpi setting of the mouse with the
+ press of a button.
+ When read, this file returns the raw number of the actual dpi
+ setting reported by the mouse. This number has to be further
+ processed to receive the real dpi value.
+
+ VALUE DPI
+ 1 800
+ 2 1200
+ 3 1600
+ 4 2000
+ 5 2400
+ 6 3200
+
+ This file is readonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/actual_profile
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the number of the actual profile.
+ This file is readonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/firmware_version
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the raw integer version number of the
+ firmware reported by the mouse. Using the integer value eases
+ further usage in other programs. To receive the real version
+ number the decimal point has to be shifted 2 positions to the
+ left. E.g. a returned value of 138 means 1.38
+ This file is readonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/profile[1-5]
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile holds information like button
+ mappings, sensitivity, the colors of the 5 leds and light
+ effects.
+ When read, these files return the respective profile. The
+ returned data is 975 bytes in size.
+ When written, this file lets one write the respective profile
+ data back to the mouse. The data has to be 975 bytes long.
+ The mouse will reject invalid data, whereas the profile number
+ stored in the profile doesn't need to fit the number of the
+ store.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/settings
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the settings stored in the mouse.
+ The size of the data is 36 bytes and holds information like the
+ startup_profile, tcu state and calibration_data.
+ When written, this file lets write settings back to the mouse.
+ The data has to be 36 bytes long. The mouse will reject invalid
+ data.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/startup_profile
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 1 to 5.
+ When read, this attribute returns the number of the profile
+ that's active when the mouse is powered on.
+ When written, this file sets the number of the startup profile
+ and the mouse activates this profile immediately.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/tcu
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse has a "Tracking Control Unit" which lets the user
+ calibrate the laser power to fit the mousepad surface.
+ When read, this file returns the current state of the TCU,
+ where 0 means off and 1 means on.
+ Writing 0 in this file will switch the TCU off.
+ Writing 1 in this file will start the calibration which takes
+ around 6 seconds to complete and activates the TCU.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kone/roccatkone<minor>/weight
+Date: March 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can be equipped with one of four supplied weights
+ ranging from 5 to 20 grams which are recognized by the mouse
+ and its value can be read out. When read, this file returns the
+ raw value returned by the mouse which eases further processing
+ in other software.
+ The values map to the weights as follows:
+
+ VALUE WEIGHT
+ 0 none
+ 1 5g
+ 2 10g
+ 3 15g
+ 4 20g
+
+ This file is readonly.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus
new file mode 100644
index 000000000..7bd776f9c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-koneplus
@@ -0,0 +1,96 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/actual_profile
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 0-4.
+ When read, this attribute returns the number of the actual
+ profile. This value is persistent, so its equivalent to the
+ profile that's active when the mouse is powered on next time.
+ When written, this file sets the number of the startup profile
+ and the mouse activates this profile immediately.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/info
+Date: November 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ When written, the device can be reset.
+ The data is 8 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/macro
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store a macro with max 500 key/button strokes
+ internally.
+ When written, this file lets one set the sequence for a specific
+ button for a specific profile. Button and profile numbers are
+ included in written data. The data has to be 2082 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/profile_buttons
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When written, this file lets one write the respective profile
+ buttons back to the mouse. The data has to be 77 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/profile_settings
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When written, this file lets one write the respective profile
+ settings back to the mouse. The data has to be 43 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/sensor
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse has a tracking- and a distance-control-unit. These
+ can be activated/deactivated and the lift-off distance can be
+ set. The data has to be 6 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/talk
+Date: May 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: Used to active some easy* functions of the mouse from outside.
+ The data has to be 16 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written a calibration process for the tracking control unit
+ can be initiated/cancelled. Also lets one read/write sensor
+ registers.
+ The data has to be 4 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/koneplus/roccatkoneplus<minor>/tcu_image
+Date: October 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read the mouse returns a 30x30 pixel image of the
+ sampled underground. This works only in the course of a
+ calibration process initiated with tcu.
+ The returned data is 1028 bytes in size.
+ This file is readonly.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-konepure b/Documentation/ABI/testing/sysfs-driver-hid-roccat-konepure
new file mode 100644
index 000000000..41a9b7fbf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-konepure
@@ -0,0 +1,105 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/actual_profile
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. actual_profile holds number of actual profile.
+ This value is persistent, so its value determines the profile
+ that's active when the mouse is powered on next time.
+ When written, the mouse activates the set profile immediately.
+ The data has to be 3 bytes long.
+ The mouse will reject invalid data.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/control
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one select which data from which
+ profile will be read next. The data has to be 3 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/info
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ When written, the device can be reset.
+ The data is 6 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/macro
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store a macro with max 500 key/button strokes
+ internally.
+ When written, this file lets one set the sequence for a specific
+ button for a specific profile. Button and profile numbers are
+ included in written data. The data has to be 2082 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/profile_buttons
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When written, this file lets one write the respective profile
+ buttons back to the mouse. The data has to be 59 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/profile_settings
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When written, this file lets one write the respective profile
+ settings back to the mouse. The data has to be 31 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/sensor
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse has a tracking- and a distance-control-unit. These
+ can be activated/deactivated and the lift-off distance can be
+ set. The data has to be 6 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/talk
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: Used to active some easy* functions of the mouse from outside.
+ The data has to be 16 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/tcu
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written a calibration process for the tracking control unit
+ can be initiated/cancelled. Also lets one read/write sensor
+ registers.
+ The data has to be 4 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/konepure/roccatkonepure<minor>/tcu_image
+Date: December 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read the mouse returns a 30x30 pixel image of the
+ sampled underground. This works only in the course of a
+ calibration process initiated with tcu.
+ The returned data is 1028 bytes in size.
+ This file is readonly.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-kovaplus b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kovaplus
new file mode 100644
index 000000000..a10404f15
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-kovaplus
@@ -0,0 +1,49 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/actual_profile
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The integer value of this attribute ranges from 0-4.
+ When read, this attribute returns the number of the active
+ profile.
+ When written, the mouse activates this profile immediately.
+ The profile that's active when powered down is the same that's
+ active when the mouse is powered on.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/info
+Date: November 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ When written, the device can be reset.
+ The data is 6 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/profile_buttons
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When written, this file lets one write the respective profile
+ buttons back to the mouse. The data has to be 23 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/kovaplus/roccatkovaplus<minor>/profile_settings
+Date: January 2011
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When written, this file lets one write the respective profile
+ settings back to the mouse. The data has to be 16 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-lua b/Documentation/ABI/testing/sysfs-driver-hid-roccat-lua
new file mode 100644
index 000000000..31c6c4c8b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-lua
@@ -0,0 +1,7 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/control
+Date: October 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, cpi, button and light settings can be configured.
+ When read, actual cpi setting and sensor data are returned.
+ The data has to be 8 bytes long.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra b/Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra
new file mode 100644
index 000000000..9fa9de30d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-pyra
@@ -0,0 +1,49 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/info
+Date: November 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ When written, the device can be reset.
+ The data is 6 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/profile_settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_settings holds information like resolution, sensitivity
+ and light effects.
+ When written, this file lets one write the respective profile
+ settings back to the mouse. The data has to be 13 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/profile_buttons
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split in settings and buttons.
+ profile_buttons holds information about button layout.
+ When written, this file lets one write the respective profile
+ buttons back to the mouse. The data has to be 19 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/pyra/roccatpyra<minor>/settings
+Date: August 2010
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns the settings stored in the mouse.
+ The size of the data is 3 bytes and holds information on the
+ startup_profile.
+ When written, this file lets write settings back to the mouse.
+ The data has to be 3 bytes long. The mouse will reject invalid
+ data.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-ryos b/Documentation/ABI/testing/sysfs-driver-hid-roccat-ryos
new file mode 100644
index 000000000..1d6a8cf9d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-ryos
@@ -0,0 +1,178 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/control
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one select which data from which
+ profile will be read next. The data has to be 3 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/profile
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. profile holds index of actual profile.
+ This value is persistent, so its value determines the profile
+ that's active when the device is powered on next time.
+ When written, the device activates the set profile immediately.
+ The data has to be 3 bytes long.
+ The device will reject invalid data.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/keys_primary
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the default of all keys for
+ a specific profile. Profile index is included in written data.
+ The data has to be 125 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/keys_function
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ function keys for a specific profile. Profile index is included
+ in written data. The data has to be 95 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/keys_macro
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the macro
+ keys for a specific profile. Profile index is included in
+ written data. The data has to be 35 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/keys_thumbster
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ thumbster keys for a specific profile. Profile index is included
+ in written data. The data has to be 23 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/keys_extra
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ capslock and function keys for a specific profile. Profile index
+ is included in written data. The data has to be 8 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/keys_easyzone
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the function of the
+ easyzone keys for a specific profile. Profile index is included
+ in written data. The data has to be 294 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/key_mask
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one deactivate certain keys like
+ windows and application keys, to prevent accidental presses.
+ Profile index for which this settings occur is included in
+ written data. The data has to be 6 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/light
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the backlight intensity for
+ a specific profile. Profile index is included in written data.
+ This attribute is only valid for the glow and pro variant.
+ The data has to be 16 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/macro
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one store macros with max 480
+ keystrokes for a specific button for a specific profile.
+ Button and profile indexes are included in written data.
+ The data has to be 2002 bytes long.
+ Before reading this file, control has to be written to select
+ which profile and key to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/info
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ The data is 8 bytes long.
+ This file is readonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/reset
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one reset the device.
+ The data has to be 3 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/talk
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one trigger easyshift functionality
+ from the host.
+ The data has to be 16 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/light_control
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one switch between stored and custom
+ light settings.
+ This attribute is only valid for the pro variant.
+ The data has to be 8 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/stored_lights
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set per-key lighting for different
+ layers.
+ This attribute is only valid for the pro variant.
+ The data has to be 1382 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/custom_lights
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set the actual per-key lighting.
+ This attribute is only valid for the pro variant.
+ The data has to be 20 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/ryos/roccatryos<minor>/light_macro
+Date: October 2013
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one set a light macro that is looped
+ whenever the device gets in dimness mode.
+ This attribute is only valid for the pro variant.
+ The data has to be 2002 bytes long.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-roccat-savu b/Documentation/ABI/testing/sysfs-driver-hid-roccat-savu
new file mode 100644
index 000000000..f1e02a98b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-roccat-savu
@@ -0,0 +1,76 @@
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/buttons
+Date: Mai 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split into general settings and
+ button settings. buttons holds informations about button layout.
+ When written, this file lets one write the respective profile
+ buttons to the mouse. The data has to be 47 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ Before reading this file, control has to be written to select
+ which profile to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/control
+Date: Mai 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one select which data from which
+ profile will be read next. The data has to be 3 bytes long.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/general
+Date: Mai 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. A profile is split into general settings and
+ button settings. profile holds informations like resolution, sensitivity
+ and light effects.
+ When written, this file lets one write the respective profile
+ settings back to the mouse. The data has to be 43 bytes long.
+ The mouse will reject invalid data.
+ Which profile to write is determined by the profile number
+ contained in the data.
+ This file is writeonly.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/info
+Date: Mai 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When read, this file returns general data like firmware version.
+ When written, the device can be reset.
+ The data is 8 bytes long.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/macro
+Date: Mai 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: When written, this file lets one store macros with max 500
+ keystrokes for a specific button for a specific profile.
+ Button and profile numbers are included in written data.
+ The data has to be 2083 bytes long.
+ Before reading this file, control has to be written to select
+ which profile and key to read.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/profile
+Date: Mai 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse can store 5 profiles which can be switched by the
+ press of a button. profile holds number of actual profile.
+ This value is persistent, so its value determines the profile
+ that's active when the mouse is powered on next time.
+ When written, the mouse activates the set profile immediately.
+ The data has to be 3 bytes long.
+ The mouse will reject invalid data.
+Users: http://roccat.sourceforge.net
+
+What: /sys/bus/usb/devices/<busnum>-<devnum>:<config num>.<interface num>/<hid-bus>:<vendor-id>:<product-id>.<num>/savu/roccatsavu<minor>/sensor
+Date: July 2012
+Contact: Stefan Achatz <erazor_de@users.sourceforge.net>
+Description: The mouse has a Avago ADNS-3090 sensor.
+ This file allows reading and writing of the mouse sensors registers.
+ The data has to be 4 bytes long.
+Users: http://roccat.sourceforge.net
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-srws1 b/Documentation/ABI/testing/sysfs-driver-hid-srws1
new file mode 100644
index 000000000..d0eba70c7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-srws1
@@ -0,0 +1,21 @@
+What: /sys/class/leds/SRWS1::<serial>::RPM1
+What: /sys/class/leds/SRWS1::<serial>::RPM2
+What: /sys/class/leds/SRWS1::<serial>::RPM3
+What: /sys/class/leds/SRWS1::<serial>::RPM4
+What: /sys/class/leds/SRWS1::<serial>::RPM5
+What: /sys/class/leds/SRWS1::<serial>::RPM6
+What: /sys/class/leds/SRWS1::<serial>::RPM7
+What: /sys/class/leds/SRWS1::<serial>::RPM8
+What: /sys/class/leds/SRWS1::<serial>::RPM9
+What: /sys/class/leds/SRWS1::<serial>::RPM10
+What: /sys/class/leds/SRWS1::<serial>::RPM11
+What: /sys/class/leds/SRWS1::<serial>::RPM12
+What: /sys/class/leds/SRWS1::<serial>::RPM13
+What: /sys/class/leds/SRWS1::<serial>::RPM14
+What: /sys/class/leds/SRWS1::<serial>::RPM15
+What: /sys/class/leds/SRWS1::<serial>::RPMALL
+Date: Jan 2013
+KernelVersion: 3.9
+Contact: Simon Wood <simon@mungewell.org>
+Description: Provides a control for turning on/off the LEDs which form
+ an RPM meter on the front of the controller
diff --git a/Documentation/ABI/testing/sysfs-driver-hid-wiimote b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
new file mode 100644
index 000000000..39dfa5cb1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-hid-wiimote
@@ -0,0 +1,77 @@
+What: /sys/bus/hid/drivers/wiimote/<dev>/led1
+What: /sys/bus/hid/drivers/wiimote/<dev>/led2
+What: /sys/bus/hid/drivers/wiimote/<dev>/led3
+What: /sys/bus/hid/drivers/wiimote/<dev>/led4
+Date: July 2011
+KernelVersion: 3.1
+Contact: David Herrmann <dh.herrmann@googlemail.com>
+Description: Make it possible to set/get current led state. Reading from it
+ returns 0 if led is off and 1 if it is on. Writing 0 to it
+ disables the led, writing 1 enables it.
+
+What: /sys/bus/hid/drivers/wiimote/<dev>/extension
+Date: August 2011
+KernelVersion: 3.2
+Contact: David Herrmann <dh.herrmann@gmail.com>
+Description: This file contains the currently connected and initialized
+ extensions. It can be one of: none, motionp, nunchuck, classic,
+ motionp+nunchuck, motionp+classic
+ motionp is the official Nintendo Motion+ extension, nunchuck is
+ the official Nintendo Nunchuck extension and classic is the
+ Nintendo Classic Controller extension. The motionp extension can
+ be combined with the other two.
+ Starting with kernel-version 3.11 Motion Plus hotplugging is
+ supported and if detected, it's no longer reported as static
+ extension. You will get uevent notifications for the motion-plus
+ device then.
+
+What: /sys/bus/hid/drivers/wiimote/<dev>/devtype
+Date: May 2013
+KernelVersion: 3.11
+Contact: David Herrmann <dh.herrmann@gmail.com>
+Description: While a device is initialized by the wiimote driver, we perform
+ a device detection and signal a "change" uevent after it is
+ done. This file shows the detected device type. "pending" means
+ that the detection is still ongoing, "unknown" means, that the
+ device couldn't be detected or loaded. "generic" means, that the
+ device couldn't be detected but supports basic Wii Remote
+ features and can be used.
+ Other strings for each device-type are available and may be
+ added if new device-specific detections are added.
+ Currently supported are:
+ gen10: First Wii Remote generation
+ gen20: Second Wii Remote Plus generation (builtin MP)
+ balanceboard: Wii Balance Board
+
+What: /sys/bus/hid/drivers/wiimote/<dev>/bboard_calib
+Date: May 2013
+KernelVersion: 3.11
+Contact: David Herrmann <dh.herrmann@gmail.com>
+Description: This attribute is only provided if the device was detected as a
+ balance board. It provides a single line with 3 calibration
+ values for all 4 sensors. The values are separated by colons and
+ are each 2 bytes long (encoded as 4 digit hexadecimal value).
+ First, 0kg values for all 4 sensors are written, followed by the
+ 17kg values for all 4 sensors and last the 34kg values for all 4
+ sensors.
+ Calibration data is already applied by the kernel to all input
+ values but may be used by user-space to perform other
+ transformations.
+
+What: /sys/bus/hid/drivers/wiimote/<dev>/pro_calib
+Date: October 2013
+KernelVersion: 3.13
+Contact: David Herrmann <dh.herrmann@gmail.com>
+Description: This attribute is only provided if the device was detected as a
+ pro-controller. It provides a single line with 4 calibration
+ values for all 4 analog sticks. Format is: "x1:y1 x2:y2". Data
+ is prefixed with a +/-. Each value is a signed 16bit number.
+ Data is encoded as decimal numbers and specifies the offsets of
+ the analog sticks of the pro-controller.
+ Calibration data is already applied by the kernel to all input
+ values but may be used by user-space to perform other
+ transformations.
+ Calibration data is detected by the kernel during device setup.
+ You can write "scan\n" into this file to re-trigger calibration.
+ You can also write data directly in the form "x1:y1 x2:y2" to
+ set the calibration values manually.
diff --git a/Documentation/ABI/testing/sysfs-driver-input-axp-pek b/Documentation/ABI/testing/sysfs-driver-input-axp-pek
new file mode 100644
index 000000000..a5e671b9f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-input-axp-pek
@@ -0,0 +1,11 @@
+What: /sys/class/input/input(x)/device/startup
+Date: March 2014
+Contact: Carlo Caione <carlo@caione.org>
+Description: Startup time in us. Board is powered on if the button is pressed
+ for more than <startup_time>
+
+What: /sys/class/input/input(x)/device/shutdown
+Date: March 2014
+Contact: Carlo Caione <carlo@caione.org>
+Description: Shutdown time in us. Board is powered off if the button is pressed
+ for more than <shutdown_time>
diff --git a/Documentation/ABI/testing/sysfs-driver-intel-rapid-start b/Documentation/ABI/testing/sysfs-driver-intel-rapid-start
new file mode 100644
index 000000000..5a7d2e217
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-intel-rapid-start
@@ -0,0 +1,21 @@
+What: /sys/bus/acpi/intel-rapid-start/wakeup_events
+Date: July 2, 2013
+KernelVersion: 3.11
+Contact: Matthew Garrett <mjg59@srcf.ucam.org>
+Description: An integer representing a set of wakeup events as follows:
+ 1: Wake to enter hibernation when the wakeup timer expires
+ 2: Wake to enter hibernation when the battery reaches a
+ critical level
+
+ These values are ORed together. For example, a value of 3
+ indicates that the system will wake to enter hibernation when
+ either the wakeup timer expires or the battery reaches a
+ critical level.
+
+What: /sys/bus/acpi/intel-rapid-start/wakeup_time
+Date: July 2, 2013
+KernelVersion: 3.11
+Contact: Matthew Garrett <mjg59@srcf.ucam.org>
+Description: An integer representing the length of time the system will
+ remain asleep before waking up to enter hibernation.
+ This value is in minutes.
diff --git a/Documentation/ABI/testing/sysfs-driver-pciback b/Documentation/ABI/testing/sysfs-driver-pciback
new file mode 100644
index 000000000..6a733bfa3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-pciback
@@ -0,0 +1,13 @@
+What: /sys/bus/pci/drivers/pciback/quirks
+Date: Oct 2011
+KernelVersion: 3.1
+Contact: xen-devel@lists.xenproject.org
+Description:
+ If the permissive attribute is set, then writing a string in
+ the format of DDDD:BB:DD.F-REG:SIZE:MASK will allow the guest
+ to write and read from the PCI device. That is Domain:Bus:
+ Device.Function-Register:Size:Mask (Domain is optional).
+ For example:
+ #echo 00:19.0-E0:2:FF > /sys/bus/pci/drivers/pciback/quirks
+ will allow the guest to read and write to the configuration
+ register 0x0E.
diff --git a/Documentation/ABI/testing/sysfs-driver-ppi b/Documentation/ABI/testing/sysfs-driver-ppi
new file mode 100644
index 000000000..7d1435bc9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-ppi
@@ -0,0 +1,70 @@
+What: /sys/devices/pnp0/<bus-num>/ppi/
+Date: August 2012
+Kernel Version: 3.6
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This folder includes the attributes related with PPI (Physical
+ Presence Interface). Only if TPM is supported by BIOS, this
+ folder makes sense. The folder path can be got by command
+ 'find /sys/ -name 'pcrs''. For the detail information of PPI,
+ please refer to the PPI specification from
+ http://www.trustedcomputinggroup.org/
+
+What: /sys/devices/pnp0/<bus-num>/ppi/version
+Date: August 2012
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This attribute shows the version of the PPI supported by the
+ platform.
+ This file is readonly.
+
+What: /sys/devices/pnp0/<bus-num>/ppi/request
+Date: August 2012
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This attribute shows the request for an operation to be
+ executed in the pre-OS environment. It is the only input from
+ the OS to the pre-OS environment. The request should be an
+ integer value range from 1 to 160, and 0 means no request.
+ This file can be read and written.
+
+What: /sys/devices/pnp0/00:<bus-num>/ppi/response
+Date: August 2012
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This attribute shows the response to the most recent operation
+ request it acted upon. The format is "<request> <response num>
+ : <response description>".
+ This file is readonly.
+
+What: /sys/devices/pnp0/<bus-num>/ppi/transition_action
+Date: August 2012
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This attribute shows the platform-specific action that should
+ take place in order to transition to the BIOS for execution of
+ a requested operation. The format is "<action num>: <action
+ description>".
+ This file is readonly.
+
+What: /sys/devices/pnp0/<bus-num>/ppi/tcg_operations
+Date: August 2012
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This attribute shows whether it is allowed to request an
+ operation to be executed in the pre-OS environment by the BIOS
+ for the requests defined by TCG, i.e. requests from 1 to 22.
+ The format is "<request> <status num>: <status description>".
+ This attribute is only supported by PPI version 1.2+.
+ This file is readonly.
+
+What: /sys/devices/pnp0/<bus-num>/ppi/vs_operations
+Date: August 2012
+Contact: xiaoyan.zhang@intel.com
+Description:
+ This attribute shows whether it is allowed to request an
+ operation to be executed in the pre-OS environment by the BIOS
+ for the verdor specific requests, i.e. requests from 128 to
+ 255. The format is same with tcg_operations. This attribute
+ is also only supported by PPI version 1.2+.
+ This file is readonly.
diff --git a/Documentation/ABI/testing/sysfs-driver-samsung-laptop b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
new file mode 100644
index 000000000..63c1ad021
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-samsung-laptop
@@ -0,0 +1,45 @@
+What: /sys/devices/platform/samsung/performance_level
+Date: January 1, 2010
+KernelVersion: 2.6.33
+Contact: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Description: Some Samsung laptops have different "performance levels"
+ that are can be modified by a function key, and by this
+ sysfs file. These values don't always make a whole lot
+ of sense, but some users like to modify them to keep
+ their fans quiet at all costs. Reading from this file
+ will show the current performance level. Writing to the
+ file can change this value.
+ Valid options:
+ "silent"
+ "normal"
+ "overclock"
+ Note that not all laptops support all of these options.
+ Specifically, not all support the "overclock" option,
+ and it's still unknown if this value even changes
+ anything, other than making the user feel a bit better.
+
+What: /sys/devices/platform/samsung/battery_life_extender
+Date: December 1, 2011
+KernelVersion: 3.3
+Contact: Corentin Chary <corentin.chary@gmail.com>
+Description: Max battery charge level can be modified, battery cycle
+ life can be extended by reducing the max battery charge
+ level.
+ 0 means normal battery mode (100% charge)
+ 1 means battery life extender mode (80% charge)
+
+What: /sys/devices/platform/samsung/usb_charge
+Date: December 1, 2011
+KernelVersion: 3.3
+Contact: Corentin Chary <corentin.chary@gmail.com>
+Description: Use your USB ports to charge devices, even
+ when your laptop is powered off.
+ 1 means enabled, 0 means disabled.
+
+What: /sys/devices/platform/samsung/lid_handling
+Date: December 11, 2014
+KernelVersion: 3.19
+Contact: Julijonas Kikutis <julijonas.kikutis@gmail.com>
+Description: Some Samsung laptops handle lid closing quicker and
+ only handle lid opening with this mode enabled.
+ 1 means enabled, 0 means disabled.
diff --git a/Documentation/ABI/testing/sysfs-driver-sunxi-sid b/Documentation/ABI/testing/sysfs-driver-sunxi-sid
new file mode 100644
index 000000000..ffb9536f6
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-sunxi-sid
@@ -0,0 +1,22 @@
+What: /sys/devices/*/<our-device>/eeprom
+Date: August 2013
+Contact: Oliver Schinagl <oliver@schinagl.nl>
+Description: read-only access to the SID (Security-ID) on current
+ A-series SoC's from Allwinner. Currently supports A10, A10s, A13
+ and A20 CPU's. The earlier A1x series of SoCs exports 16 bytes,
+ whereas the newer A20 SoC exposes 512 bytes split into sections.
+ Besides the 16 bytes of SID, there's also an SJTAG area,
+ HDMI-HDCP key and some custom keys. Below a quick overview, for
+ details see the user manual:
+ 0x000 128 bit root-key (sun[457]i)
+ 0x010 128 bit boot-key (sun7i)
+ 0x020 64 bit security-jtag-key (sun7i)
+ 0x028 16 bit key configuration (sun7i)
+ 0x02b 16 bit custom-vendor-key (sun7i)
+ 0x02c 320 bit low general key (sun7i)
+ 0x040 32 bit read-control access (sun7i)
+ 0x064 224 bit low general key (sun7i)
+ 0x080 2304 bit HDCP-key (sun7i)
+ 0x1a0 768 bit high general key (sun7i)
+Users: any user space application which wants to read the SID on
+ Allwinner's A-series of CPU's.
diff --git a/Documentation/ABI/testing/sysfs-driver-tegra-fuse b/Documentation/ABI/testing/sysfs-driver-tegra-fuse
new file mode 100644
index 000000000..69f5af632
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-tegra-fuse
@@ -0,0 +1,11 @@
+What: /sys/devices/*/<our-device>/fuse
+Date: February 2014
+Contact: Peter De Schrijver <pdeschrijver@nvidia.com>
+Description: read-only access to the efuses on Tegra20, Tegra30, Tegra114
+ and Tegra124 SoC's from NVIDIA. The efuses contain write once
+ data programmed at the factory. The data is layed out in 32bit
+ words in LSB first format. Each bit represents a single value
+ as decoded from the fuse registers. Bits order/assignment
+ exactly matches the HW registers, including any unused bits.
+Users: any user space application which wants to read the efuses on
+ Tegra SoC's
diff --git a/Documentation/ABI/testing/sysfs-driver-toshiba_acpi b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
new file mode 100644
index 000000000..eed922ef4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-toshiba_acpi
@@ -0,0 +1,181 @@
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_backlight_mode
+Date: June 8, 2014
+KernelVersion: 3.15
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the keyboard backlight operation mode, valid
+ values are:
+ * 0x1 -> FN-Z
+ * 0x2 -> AUTO (also called TIMER)
+ * 0x8 -> ON
+ * 0x10 -> OFF
+ Note that from kernel 3.16 onwards this file accepts all listed
+ parameters, kernel 3.15 only accepts the first two (FN-Z and
+ AUTO).
+ Also note that toggling this value on type 1 devices, requires
+ a reboot for changes to take effect.
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_backlight_timeout
+Date: June 8, 2014
+KernelVersion: 3.15
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the timeout of the keyboard backlight
+ whenever the operation mode is set to AUTO (or TIMER),
+ valid values range from 0-60.
+ Note that the kernel 3.15 only had support for the first
+ keyboard type, the kernel 3.16 added support for the second
+ type and the range accepted for type 2 is 1-60.
+ See the entry named "kbd_type"
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/position
+Date: June 8, 2014
+KernelVersion: 3.15
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file shows the absolute position of the built-in
+ accelereometer.
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/touchpad
+Date: June 8, 2014
+KernelVersion: 3.15
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This files controls the status of the touchpad and pointing
+ stick (if available), valid values are:
+ * 0 -> OFF
+ * 1 -> ON
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/available_kbd_modes
+Date: August 3, 2014
+KernelVersion: 3.16
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file shows the supported keyboard backlight modes
+ the system supports, which can be:
+ * 0x1 -> FN-Z
+ * 0x2 -> AUTO (also called TIMER)
+ * 0x8 -> ON
+ * 0x10 -> OFF
+ Note that not all keyboard types support the listed modes.
+ See the entry named "available_kbd_modes"
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_type
+Date: August 3, 2014
+KernelVersion: 3.16
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file shows the current keyboard backlight type,
+ which can be:
+ * 1 -> Type 1, supporting modes FN-Z and AUTO
+ * 2 -> Type 2, supporting modes TIMER, ON and OFF
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_sleep_charge
+Date: January 23, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the USB Sleep & Charge charging mode, which
+ can be:
+ * 0 -> Disabled (0x00)
+ * 1 -> Alternate (0x09)
+ * 2 -> Auto (0x21)
+ * 3 -> Typical (0x11)
+ Note that from kernel 4.1 onwards this file accepts all listed
+ values, kernel 4.0 only supports the first three.
+ Note that this feature only works when connected to power, if
+ you want to use it under battery, see the entry named
+ "sleep_functions_on_battery"
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/sleep_functions_on_battery
+Date: January 23, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the USB Sleep Functions under battery, and
+ set the level at which point they will be disabled, accepted
+ values can be:
+ * 0 -> Disabled
+ * 1-100 -> Battery level to disable sleep functions
+ Currently it prints two values, the first one indicates if the
+ feature is enabled or disabled, while the second one shows the
+ current battery level set.
+ Note that when the value is set to disabled, the sleep function
+ will only work when connected to power.
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_rapid_charge
+Date: January 23, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the USB Rapid Charge state, which can be:
+ * 0 -> Disabled
+ * 1 -> Enabled
+ Note that toggling this value requires a reboot for changes to
+ take effect.
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_sleep_music
+Date: January 23, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the Sleep & Music state, which values can be:
+ * 0 -> Disabled
+ * 1 -> Enabled
+ Note that this feature only works when connected to power, if
+ you want to use it under battery, see the entry named
+ "sleep_functions_on_battery"
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/version
+Date: February 12, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file shows the current version of the driver
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/fan
+Date: February 12, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the state of the internal fan, valid
+ values are:
+ * 0 -> OFF
+ * 1 -> ON
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/kbd_function_keys
+Date: February 12, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the Special Functions (hotkeys) operation
+ mode, valid values are:
+ * 0 -> Normal Operation
+ * 1 -> Special Functions
+ In the "Normal Operation" mode, the F{1-12} keys are as usual
+ and the hotkeys are accessed via FN-F{1-12}.
+ In the "Special Functions" mode, the F{1-12} keys trigger the
+ hotkey and the F{1-12} keys are accessed via FN-F{1-12}.
+ Note that toggling this value requires a reboot for changes to
+ take effect.
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/panel_power_on
+Date: February 12, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls whether the laptop should turn ON whenever
+ the LID is opened, valid values are:
+ * 0 -> Disabled
+ * 1 -> Enabled
+ Note that toggling this value requires a reboot for changes to
+ take effect.
+Users: KToshiba
+
+What: /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS{1900,620{0,7,8}}:00/usb_three
+Date: February 12, 2015
+KernelVersion: 4.0
+Contact: Azael Avalos <coproscefalo@gmail.com>
+Description: This file controls the USB 3 functionality, valid values are:
+ * 0 -> Disabled (Acts as a regular USB 2)
+ * 1 -> Enabled (Full USB 3 functionality)
+ Note that toggling this value requires a reboot for changes to
+ take effect.
+Users: KToshiba
diff --git a/Documentation/ABI/testing/sysfs-driver-wacom b/Documentation/ABI/testing/sysfs-driver-wacom
new file mode 100644
index 000000000..c4f0fed64
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-wacom
@@ -0,0 +1,79 @@
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/speed
+Date: April 2010
+Kernel Version: 2.6.35
+Contact: linux-bluetooth@vger.kernel.org
+Description:
+ The /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/speed file
+ controls reporting speed of Wacom bluetooth tablet. Reading
+ from this file returns 1 if tablet reports in high speed mode
+ or 0 otherwise. Writing to this file one of these values
+ switches reporting speed.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/led
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ Attribute group for control of the status LEDs and the OLEDs.
+ This attribute group is only available for Intuos 4 M, L,
+ and XL (with LEDs and OLEDs), Intuos 4 WL, Intuos 5 (LEDs only),
+ Intuos Pro (LEDs only) and Cintiq 21UX2 and Cintiq 24HD
+ (LEDs only). Therefore its presence implicitly signifies the
+ presence of said LEDs and OLEDs on the tablet device.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/status0_luminance
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ Writing to this file sets the status LED luminance (1..127)
+ when the stylus does not touch the tablet surface, and no
+ button is pressed on the stylus. This luminance level is
+ normally lower than the level when a button is pressed.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/status1_luminance
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ Writing to this file sets the status LED luminance (1..127)
+ when the stylus touches the tablet surface, or any button is
+ pressed on the stylus.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/status_led0_select
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ Writing to this file sets which one of the four (for Intuos 4
+ and Intuos 5) or of the right four (for Cintiq 21UX2 and Cintiq
+ 24HD) status LEDs is active (0..3). The other three LEDs on the
+ same side are always inactive.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/status_led1_select
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ Writing to this file sets which one of the left four (for Cintiq 21UX2
+ and Cintiq 24HD) status LEDs is active (0..3). The other three LEDs on
+ the left are always inactive.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/buttons_luminance
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ Writing to this file sets the overall luminance level (0..15)
+ of all eight button OLED displays.
+
+What: /sys/bus/hid/devices/<bus>:<vid>:<pid>.<n>/wacom_led/button<n>_rawimg
+Date: August 2014
+Contact: linux-input@vger.kernel.org
+Description:
+ When writing a 1024 byte raw image in Wacom Intuos 4
+ interleaving format to the file, the image shows up on Button N
+ of the device. The image is a 64x32 pixel 4-bit gray image. The
+ 1024 byte binary is split up into 16x 64 byte chunks. Each 64
+ byte chunk encodes the image data for two consecutive lines on
+ the display. The low nibble of each byte contains the first
+ line, and the high nibble contains the second line.
+ When the Wacom Intuos 4 is connected over Bluetooth, the
+ image has to contain 256 bytes (64x32 px 1 bit colour).
+ The format is also scrambled, like in the USB mode, and it can
+ be summarized by converting 76543210 into GECA6420.
+ HGFEDCBA HFDB7531
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback
new file mode 100644
index 000000000..8bb43b66e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback
@@ -0,0 +1,17 @@
+What: /sys/module/xen_blkback/parameters/max_buffer_pages
+Date: March 2013
+KernelVersion: 3.11
+Contact: Roger Pau Monné <roger.pau@citrix.com>
+Description:
+ Maximum number of free pages to keep in each block
+ backend buffer.
+
+What: /sys/module/xen_blkback/parameters/max_persistent_grants
+Date: March 2013
+KernelVersion: 3.11
+Contact: Roger Pau Monné <roger.pau@citrix.com>
+Description:
+ Maximum number of grants to map persistently in
+ blkback. If the frontend tries to use more than
+ max_persistent_grants, the LRU kicks in and starts
+ removing 5% of max_persistent_grants every 100ms.
diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
new file mode 100644
index 000000000..c0a6cb7eb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront
@@ -0,0 +1,10 @@
+What: /sys/module/xen_blkfront/parameters/max
+Date: June 2013
+KernelVersion: 3.11
+Contact: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Description:
+ Maximum number of segments that the frontend will negotiate
+ with the backend for indirect descriptors. The default value
+ is 32 - higher value means more potential throughput but more
+ memory usage. The backend picks the minimum of the frontend
+ and its default backend value.
diff --git a/Documentation/ABI/testing/sysfs-firmware-acpi b/Documentation/ABI/testing/sysfs-firmware-acpi
new file mode 100644
index 000000000..b4436cca9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-acpi
@@ -0,0 +1,206 @@
+What: /sys/firmware/acpi/bgrt/
+Date: January 2012
+Contact: Matthew Garrett <mjg@redhat.com>
+Description:
+ The BGRT is an ACPI 5.0 feature that allows the OS
+ to obtain a copy of the firmware boot splash and
+ some associated metadata. This is intended to be used
+ by boot splash applications in order to interact with
+ the firmware boot splash in order to avoid jarring
+ transitions.
+
+ image: The image bitmap. Currently a 32-bit BMP.
+ status: 1 if the image is valid, 0 if firmware invalidated it.
+ type: 0 indicates image is in BMP format.
+ version: The version of the BGRT. Currently 1.
+ xoffset: The number of pixels between the left of the screen
+ and the left edge of the image.
+ yoffset: The number of pixels between the top of the screen
+ and the top edge of the image.
+
+What: /sys/firmware/acpi/hotplug/
+Date: February 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ There are separate hotplug profiles for different classes of
+ devices supported by ACPI, such as containers, memory modules,
+ processors, PCI root bridges etc. A hotplug profile for a given
+ class of devices is a collection of settings defining the way
+ that class of devices will be handled by the ACPI core hotplug
+ code. Those profiles are represented in sysfs as subdirectories
+ of /sys/firmware/acpi/hotplug/.
+
+ The following setting is available to user space for each
+ hotplug profile:
+
+ enabled: If set, the ACPI core will handle notifications of
+ hotplug events associated with the given class of
+ devices and will allow those devices to be ejected with
+ the help of the _EJ0 control method. Unsetting it
+ effectively disables hotplug for the correspoinding
+ class of devices.
+
+ The value of the above attribute is an integer number: 1 (set)
+ or 0 (unset). Attempts to write any other values to it will
+ cause -EINVAL to be returned.
+
+What: /sys/firmware/acpi/hotplug/force_remove
+Date: May 2013
+Contact: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Description:
+ The number in this file (0 or 1) determines whether (1) or not
+ (0) the ACPI subsystem will allow devices to be hot-removed even
+ if they cannot be put offline gracefully (from the kernel's
+ viewpoint). That number can be changed by writing a boolean
+ value to this file.
+
+What: /sys/firmware/acpi/interrupts/
+Date: February 2008
+Contact: Len Brown <lenb@kernel.org>
+Description:
+ All ACPI interrupts are handled via a single IRQ,
+ the System Control Interrupt (SCI), which appears
+ as "acpi" in /proc/interrupts.
+
+ However, one of the main functions of ACPI is to make
+ the platform understand random hardware without
+ special driver support. So while the SCI handles a few
+ well known (fixed feature) interrupts sources, such
+ as the power button, it can also handle a variable
+ number of a "General Purpose Events" (GPE).
+
+ A GPE vectors to a specified handler in AML, which
+ can do a anything the BIOS writer wants from
+ OS context. GPE 0x12, for example, would vector
+ to a level or edge handler called _L12 or _E12.
+ The handler may do its business and return.
+ Or the handler may send send a Notify event
+ to a Linux device driver registered on an ACPI device,
+ such as a battery, or a processor.
+
+ To figure out where all the SCI's are coming from,
+ /sys/firmware/acpi/interrupts contains a file listing
+ every possible source, and the count of how many
+ times it has triggered.
+
+ $ cd /sys/firmware/acpi/interrupts
+ $ grep . *
+ error: 0
+ ff_gbl_lock: 0 enable
+ ff_pmtimer: 0 invalid
+ ff_pwr_btn: 0 enable
+ ff_rt_clk: 2 disable
+ ff_slp_btn: 0 invalid
+ gpe00: 0 invalid
+ gpe01: 0 enable
+ gpe02: 108 enable
+ gpe03: 0 invalid
+ gpe04: 0 invalid
+ gpe05: 0 invalid
+ gpe06: 0 enable
+ gpe07: 0 enable
+ gpe08: 0 invalid
+ gpe09: 0 invalid
+ gpe0A: 0 invalid
+ gpe0B: 0 invalid
+ gpe0C: 0 invalid
+ gpe0D: 0 invalid
+ gpe0E: 0 invalid
+ gpe0F: 0 invalid
+ gpe10: 0 invalid
+ gpe11: 0 invalid
+ gpe12: 0 invalid
+ gpe13: 0 invalid
+ gpe14: 0 invalid
+ gpe15: 0 invalid
+ gpe16: 0 invalid
+ gpe17: 1084 enable
+ gpe18: 0 enable
+ gpe19: 0 invalid
+ gpe1A: 0 invalid
+ gpe1B: 0 invalid
+ gpe1C: 0 invalid
+ gpe1D: 0 invalid
+ gpe1E: 0 invalid
+ gpe1F: 0 invalid
+ gpe_all: 1192
+ sci: 1194
+ sci_not: 0
+
+ sci - The number of times the ACPI SCI
+ has been called and claimed an interrupt.
+
+ sci_not - The number of times the ACPI SCI
+ has been called and NOT claimed an interrupt.
+
+ gpe_all - count of SCI caused by GPEs.
+
+ gpeXX - count for individual GPE source
+
+ ff_gbl_lock - Global Lock
+
+ ff_pmtimer - PM Timer
+
+ ff_pwr_btn - Power Button
+
+ ff_rt_clk - Real Time Clock
+
+ ff_slp_btn - Sleep Button
+
+ error - an interrupt that can't be accounted for above.
+
+ invalid: it's either a GPE or a Fixed Event that
+ doesn't have an event handler.
+
+ disable: the GPE/Fixed Event is valid but disabled.
+
+ enable: the GPE/Fixed Event is valid and enabled.
+
+ Root has permission to clear any of these counters. Eg.
+ # echo 0 > gpe11
+
+ All counters can be cleared by clearing the total "sci":
+ # echo 0 > sci
+
+ None of these counters has an effect on the function
+ of the system, they are simply statistics.
+
+ Besides this, user can also write specific strings to these files
+ to enable/disable/clear ACPI interrupts in user space, which can be
+ used to debug some ACPI interrupt storm issues.
+
+ Note that only writting to VALID GPE/Fixed Event is allowed,
+ i.e. user can only change the status of runtime GPE and
+ Fixed Event with event handler installed.
+
+ Let's take power button fixed event for example, please kill acpid
+ and other user space applications so that the machine won't shutdown
+ when pressing the power button.
+ # cat ff_pwr_btn
+ 0 enabled
+ # press the power button for 3 times;
+ # cat ff_pwr_btn
+ 3 enabled
+ # echo disable > ff_pwr_btn
+ # cat ff_pwr_btn
+ 3 disabled
+ # press the power button for 3 times;
+ # cat ff_pwr_btn
+ 3 disabled
+ # echo enable > ff_pwr_btn
+ # cat ff_pwr_btn
+ 4 enabled
+ /*
+ * this is because the status bit is set even if the enable bit is cleared,
+ * and it triggers an ACPI fixed event when the enable bit is set again
+ */
+ # press the power button for 3 times;
+ # cat ff_pwr_btn
+ 7 enabled
+ # echo disable > ff_pwr_btn
+ # press the power button for 3 times;
+ # echo clear > ff_pwr_btn /* clear the status bit */
+ # echo disable > ff_pwr_btn
+ # cat ff_pwr_btn
+ 7 enabled
+
diff --git a/Documentation/ABI/testing/sysfs-firmware-dmi b/Documentation/ABI/testing/sysfs-firmware-dmi
new file mode 100644
index 000000000..c78f9ab01
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-dmi
@@ -0,0 +1,110 @@
+What: /sys/firmware/dmi/
+Date: February 2011
+Contact: Mike Waychison <mikew@google.com>
+Description:
+ Many machines' firmware (x86 and ia64) export DMI /
+ SMBIOS tables to the operating system. Getting at this
+ information is often valuable to userland, especially in
+ cases where there are OEM extensions used.
+
+ The kernel itself does not rely on the majority of the
+ information in these tables being correct. It equally
+ cannot ensure that the data as exported to userland is
+ without error either.
+
+ DMI is structured as a large table of entries, where
+ each entry has a common header indicating the type and
+ length of the entry, as well as a firmware-provided
+ 'handle' that is supposed to be unique amongst all
+ entries.
+
+ Some entries are required by the specification, but many
+ others are optional. In general though, users should
+ never expect to find a specific entry type on their
+ system unless they know for certain what their firmware
+ is doing. Machine to machine experiences will vary.
+
+ Multiple entries of the same type are allowed. In order
+ to handle these duplicate entry types, each entry is
+ assigned by the operating system an 'instance', which is
+ derived from an entry type's ordinal position. That is
+ to say, if there are 'N' multiple entries with the same type
+ 'T' in the DMI tables (adjacent or spread apart, it
+ doesn't matter), they will be represented in sysfs as
+ entries "T-0" through "T-(N-1)":
+
+ Example entry directories:
+
+ /sys/firmware/dmi/entries/17-0
+ /sys/firmware/dmi/entries/17-1
+ /sys/firmware/dmi/entries/17-2
+ /sys/firmware/dmi/entries/17-3
+ ...
+
+ Instance numbers are used in lieu of the firmware
+ assigned entry handles as the kernel itself makes no
+ guarantees that handles as exported are unique, and
+ there are likely firmware images that get this wrong in
+ the wild.
+
+ Each DMI entry in sysfs has the common header values
+ exported as attributes:
+
+ handle : The 16bit 'handle' that is assigned to this
+ entry by the firmware. This handle may be
+ referred to by other entries.
+ length : The length of the entry, as presented in the
+ entry itself. Note that this is _not the
+ total count of bytes associated with the
+ entry_. This value represents the length of
+ the "formatted" portion of the entry. This
+ "formatted" region is sometimes followed by
+ the "unformatted" region composed of nul
+ terminated strings, with termination signalled
+ by a two nul characters in series.
+ raw : The raw bytes of the entry. This includes the
+ "formatted" portion of the entry, the
+ "unformatted" strings portion of the entry,
+ and the two terminating nul characters.
+ type : The type of the entry. This value is the same
+ as found in the directory name. It indicates
+ how the rest of the entry should be interpreted.
+ instance: The instance ordinal of the entry for the
+ given type. This value is the same as found
+ in the parent directory name.
+ position: The ordinal position (zero-based) of the entry
+ within the entirety of the DMI entry table.
+
+ === Entry Specialization ===
+
+ Some entry types may have other information available in
+ sysfs. Not all types are specialized.
+
+ --- Type 15 - System Event Log ---
+
+ This entry allows the firmware to export a log of
+ events the system has taken. This information is
+ typically backed by nvram, but the implementation
+ details are abstracted by this table. This entry's data
+ is exported in the directory:
+
+ /sys/firmware/dmi/entries/15-0/system_event_log
+
+ and has the following attributes (documented in the
+ SMBIOS / DMI specification under "System Event Log (Type 15)":
+
+ area_length
+ header_start_offset
+ data_start_offset
+ access_method
+ status
+ change_token
+ access_method_address
+ header_format
+ per_log_type_descriptor_length
+ type_descriptors_supported_count
+
+ As well, the kernel exports the binary attribute:
+
+ raw_event_log : The raw binary bits of the event log
+ as described by the DMI entry.
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi b/Documentation/ABI/testing/sysfs-firmware-efi
new file mode 100644
index 000000000..05874da7c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-efi
@@ -0,0 +1,20 @@
+What: /sys/firmware/efi/fw_vendor
+Date: December 2013
+Contact: Dave Young <dyoung@redhat.com>
+Description: It shows the physical address of firmware vendor field in the
+ EFI system table.
+Users: Kexec
+
+What: /sys/firmware/efi/runtime
+Date: December 2013
+Contact: Dave Young <dyoung@redhat.com>
+Description: It shows the physical address of runtime service table entry in
+ the EFI system table.
+Users: Kexec
+
+What: /sys/firmware/efi/config_table
+Date: December 2013
+Contact: Dave Young <dyoung@redhat.com>
+Description: It shows the physical address of config table entry in the EFI
+ system table.
+Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
new file mode 100644
index 000000000..c61b9b348
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-efi-runtime-map
@@ -0,0 +1,34 @@
+What: /sys/firmware/efi/runtime-map/
+Date: December 2013
+Contact: Dave Young <dyoung@redhat.com>
+Description: Switching efi runtime services to virtual mode requires
+ that all efi memory ranges which have the runtime attribute
+ bit set to be mapped to virtual addresses.
+
+ The efi runtime services can only be switched to virtual
+ mode once without rebooting. The kexec kernel must maintain
+ the same physical to virtual address mappings as the first
+ kernel. The mappings are exported to sysfs so userspace tools
+ can reassemble them and pass them into the kexec kernel.
+
+ /sys/firmware/efi/runtime-map/ is the directory the kernel
+ exports that information in.
+
+ subdirectories are named with the number of the memory range:
+
+ /sys/firmware/efi/runtime-map/0
+ /sys/firmware/efi/runtime-map/1
+ /sys/firmware/efi/runtime-map/2
+ /sys/firmware/efi/runtime-map/3
+ ...
+
+ Each subdirectory contains five files:
+
+ attribute : The attributes of the memory range.
+ num_pages : The size of the memory range in pages.
+ phys_addr : The physical address of the memory range.
+ type : The type of the memory range.
+ virt_addr : The virtual address of the memory range.
+
+ Above values are all hexadecimal numbers with the '0x' prefix.
+Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-firmware-gsmi b/Documentation/ABI/testing/sysfs-firmware-gsmi
new file mode 100644
index 000000000..0faa0aaf4
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-gsmi
@@ -0,0 +1,58 @@
+What: /sys/firmware/gsmi
+Date: March 2011
+Contact: Mike Waychison <mikew@google.com>
+Description:
+ Some servers used internally at Google have firmware
+ that provides callback functionality via explicit SMI
+ triggers. Some of the callbacks are similar to those
+ provided by the EFI runtime services page, but due to
+ historical reasons this different entry-point has been
+ used.
+
+ The gsmi driver implements the kernel's abstraction for
+ these firmware callbacks. Currently, this functionality
+ is limited to handling the system event log and getting
+ access to EFI-style variables stored in nvram.
+
+ Layout:
+
+ /sys/firmware/gsmi/vars:
+
+ This directory has the same layout (and
+ underlying implementation as /sys/firmware/efi/vars.
+ See Documentation/ABI/*/sysfs-firmware-efi-vars
+ for more information on how to interact with
+ this structure.
+
+ /sys/firmware/gsmi/append_to_eventlog - write-only:
+
+ This file takes a binary blob and passes it onto
+ the firmware to be timestamped and appended to
+ the system eventlog. The binary format is
+ interpreted by the firmware and may change from
+ platform to platform. The only kernel-enforced
+ requirement is that the blob be prefixed with a
+ 32bit host-endian type used as part of the
+ firmware call.
+
+ /sys/firmware/gsmi/clear_config - write-only:
+
+ Writing any value to this file will cause the
+ entire firmware configuration to be reset to
+ "factory defaults". Callers should assume that
+ a reboot is required for the configuration to be
+ cleared.
+
+ /sys/firmware/gsmi/clear_eventlog - write-only:
+
+ This file is used to clear out a portion/the
+ whole of the system event log. Values written
+ should be values between 1 and 100 inclusive (in
+ ASCII) representing the fraction of the log to
+ clear. Not all platforms support fractional
+ clearing though, and this writes to this file
+ will error out if the firmware doesn't like your
+ submitted fraction.
+
+ Callers should assume that a reboot is needed
+ for this operation to complete.
diff --git a/Documentation/ABI/testing/sysfs-firmware-log b/Documentation/ABI/testing/sysfs-firmware-log
new file mode 100644
index 000000000..9b58e7c53
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-log
@@ -0,0 +1,7 @@
+What: /sys/firmware/log
+Date: February 2011
+Contact: Mike Waychison <mikew@google.com>
+Description:
+ The /sys/firmware/log is a binary file that represents a
+ read-only copy of the firmware's log if one is
+ available.
diff --git a/Documentation/ABI/testing/sysfs-firmware-memmap b/Documentation/ABI/testing/sysfs-firmware-memmap
new file mode 100644
index 000000000..eca0d6508
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-memmap
@@ -0,0 +1,71 @@
+What: /sys/firmware/memmap/
+Date: June 2008
+Contact: Bernhard Walle <bernhard.walle@gmx.de>
+Description:
+ On all platforms, the firmware provides a memory map which the
+ kernel reads. The resources from that memory map are registered
+ in the kernel resource tree and exposed to userspace via
+ /proc/iomem (together with other resources).
+
+ However, on most architectures that firmware-provided memory
+ map is modified afterwards by the kernel itself, either because
+ the kernel merges that memory map with other information or
+ just because the user overwrites that memory map via command
+ line.
+
+ kexec needs the raw firmware-provided memory map to setup the
+ parameter segment of the kernel that should be booted with
+ kexec. Also, the raw memory map is useful for debugging. For
+ that reason, /sys/firmware/memmap is an interface that provides
+ the raw memory map to userspace.
+
+ The structure is as follows: Under /sys/firmware/memmap there
+ are subdirectories with the number of the entry as their name:
+
+ /sys/firmware/memmap/0
+ /sys/firmware/memmap/1
+ /sys/firmware/memmap/2
+ /sys/firmware/memmap/3
+ ...
+
+ The maximum depends on the number of memory map entries provided
+ by the firmware. The order is just the order that the firmware
+ provides.
+
+ Each directory contains three files:
+
+ start : The start address (as hexadecimal number with the
+ '0x' prefix).
+ end : The end address, inclusive (regardless whether the
+ firmware provides inclusive or exclusive ranges).
+ type : Type of the entry as string. See below for a list of
+ valid types.
+
+ So, for example:
+
+ /sys/firmware/memmap/0/start
+ /sys/firmware/memmap/0/end
+ /sys/firmware/memmap/0/type
+ /sys/firmware/memmap/1/start
+ ...
+
+ Currently following types exist:
+
+ - System RAM
+ - ACPI Tables
+ - ACPI Non-volatile Storage
+ - reserved
+
+ Following shell snippet can be used to display that memory
+ map in a human-readable format:
+
+ -------------------- 8< ----------------------------------------
+ #!/bin/bash
+ cd /sys/firmware/memmap
+ for dir in * ; do
+ start=$(cat $dir/start)
+ end=$(cat $dir/end)
+ type=$(cat $dir/type)
+ printf "%016x-%016x (%s)\n" $start $[ $end +1] "$type"
+ done
+ -------------------- >8 ----------------------------------------
diff --git a/Documentation/ABI/testing/sysfs-firmware-ofw b/Documentation/ABI/testing/sysfs-firmware-ofw
new file mode 100644
index 000000000..f562b188e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-ofw
@@ -0,0 +1,28 @@
+What: /sys/firmware/devicetree/*
+Date: November 2013
+Contact: Grant Likely <grant.likely@linaro.org>
+Description:
+ When using OpenFirmware or a Flattened Device Tree to enumerate
+ hardware, the device tree structure will be exposed in this
+ directory.
+
+ It is possible for multiple device-tree directories to exist.
+ Some device drivers use a separate detached device tree which
+ have no attachment to the system tree and will appear in a
+ different subdirectory under /sys/firmware/devicetree.
+
+ Userspace must not use the /sys/firmware/devicetree/base
+ path directly, but instead should follow /proc/device-tree
+ symlink. It is possible that the absolute path will change
+ in the future, but the symlink is the stable ABI.
+
+ The /proc/device-tree symlink replaces the devicetree /proc
+ filesystem support, and has largely the same semantics and
+ should be compatible with existing userspace.
+
+ The contents of /sys/firmware/devicetree/ is a
+ hierarchy of directories, one per device tree node. The
+ directory name is the resolved path component name (node
+ name plus address). Properties are represented as files
+ in the directory. The contents of each file is the exact
+ binary data from the device tree.
diff --git a/Documentation/ABI/testing/sysfs-firmware-sfi b/Documentation/ABI/testing/sysfs-firmware-sfi
new file mode 100644
index 000000000..4be7d44ae
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-sfi
@@ -0,0 +1,15 @@
+What: /sys/firmware/sfi/tables/
+Date: May 2010
+Contact: Len Brown <lenb@kernel.org>
+Description:
+ SFI defines a number of small static memory tables
+ so the kernel can get platform information from firmware.
+
+ The tables are defined in the latest SFI specification:
+ http://simplefirmware.org/documentation
+
+ While the tables are used by the kernel, user-space
+ can observe them this way:
+
+ # cd /sys/firmware/sfi/tables
+ # cat $TABLENAME > $TABLENAME.bin
diff --git a/Documentation/ABI/testing/sysfs-firmware-sgi_uv b/Documentation/ABI/testing/sysfs-firmware-sgi_uv
new file mode 100644
index 000000000..4573fd4b7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-sgi_uv
@@ -0,0 +1,27 @@
+What: /sys/firmware/sgi_uv/
+Date: August 2008
+Contact: Russ Anderson <rja@sgi.com>
+Description:
+ The /sys/firmware/sgi_uv directory contains information
+ about the SGI UV platform.
+
+ Under that directory are a number of files:
+
+ partition_id
+ coherence_id
+
+ The partition_id entry contains the partition id.
+ SGI UV systems can be partitioned into multiple physical
+ machines, which each partition running a unique copy
+ of the operating system. Each partition will have a unique
+ partition id. To display the partition id, use the command:
+
+ cat /sys/firmware/sgi_uv/partition_id
+
+ The coherence_id entry contains the coherence id.
+ A partitioned SGI UV system can have one or more coherence
+ domain. The coherence id indicates which coherence domain
+ this partition is in. To display the coherence id, use the
+ command:
+
+ cat /sys/firmware/sgi_uv/coherence_id
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
new file mode 100644
index 000000000..c631253cf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -0,0 +1,111 @@
+What: /sys/fs/ext4/<disk>/mb_stats
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Controls whether the multiblock allocator should
+ collect statistics, which are shown during the unmount.
+ 1 means to collect statistics, 0 means not to collect
+ statistics
+
+What: /sys/fs/ext4/<disk>/mb_group_prealloc
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The multiblock allocator will round up allocation
+ requests to a multiple of this tuning parameter if the
+ stripe size is not set in the ext4 superblock
+
+What: /sys/fs/ext4/<disk>/mb_max_to_scan
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The maximum number of extents the multiblock allocator
+ will search to find the best extent
+
+What: /sys/fs/ext4/<disk>/mb_min_to_scan
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The minimum number of extents the multiblock allocator
+ will search to find the best extent
+
+What: /sys/fs/ext4/<disk>/mb_order2_req
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Tuning parameter which controls the minimum size for
+ requests (as a power of 2) where the buddy cache is
+ used
+
+What: /sys/fs/ext4/<disk>/mb_stream_req
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Files which have fewer blocks than this tunable
+ parameter will have their blocks allocated out of a
+ block group specific preallocation pool, so that small
+ files are packed closely together. Each large file
+ will have its blocks allocated out of its own unique
+ preallocation pool.
+
+What: /sys/fs/ext4/<disk>/inode_readahead_blks
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Tuning parameter which controls the maximum number of
+ inode table blocks that ext4's inode table readahead
+ algorithm will pre-read into the buffer cache
+
+What: /sys/fs/ext4/<disk>/delayed_allocation_blocks
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the number of blocks
+ that are dirty in the page cache, but which do not
+ have their location in the filesystem allocated yet.
+
+What: /sys/fs/ext4/<disk>/lifetime_write_kbytes
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the number of kilobytes
+ of data that have been written to this filesystem since it was
+ created.
+
+What: /sys/fs/ext4/<disk>/session_write_kbytes
+Date: March 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the number of
+ kilobytes of data that have been written to this
+ filesystem since it was mounted.
+
+What: /sys/fs/ext4/<disk>/inode_goal
+Date: June 2008
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ Tuning parameter which (if non-zero) controls the goal
+ inode used by the inode allocator in preference to
+ all other allocation heuristics. This is intended for
+ debugging use only, and should be 0 on production
+ systems.
+
+What: /sys/fs/ext4/<disk>/max_writeback_mb_bump
+Date: September 2009
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The maximum number of megabytes the writeback code will
+ try to write out before move on to another inode.
+
+What: /sys/fs/ext4/<disk>/extent_max_zeroout_kb
+Date: August 2012
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ The maximum number of kilobytes which will be zeroed
+ out in preference to creating a new uninitialized
+ extent when manipulating an inode's extent tree. Note
+ that using a larger value will increase the
+ variability of time necessary to complete a random
+ write operation (since a 4k random write might turn
+ into a much larger write due to the zeroout
+ operation).
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
new file mode 100644
index 000000000..2c4cc4200
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -0,0 +1,82 @@
+What: /sys/fs/f2fs/<disk>/gc_max_sleep_time
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the maximun sleep time for gc_thread. Time
+ is in milliseconds.
+
+What: /sys/fs/f2fs/<disk>/gc_min_sleep_time
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the minimum sleep time for gc_thread. Time
+ is in milliseconds.
+
+What: /sys/fs/f2fs/<disk>/gc_no_gc_sleep_time
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the default sleep time for gc_thread. Time
+ is in milliseconds.
+
+What: /sys/fs/f2fs/<disk>/gc_idle
+Date: July 2013
+Contact: "Namjae Jeon" <namjae.jeon@samsung.com>
+Description:
+ Controls the victim selection policy for garbage collection.
+
+What: /sys/fs/f2fs/<disk>/reclaim_segments
+Date: October 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the issue rate of segment discard commands.
+
+What: /sys/fs/f2fs/<disk>/ipu_policy
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the in-place-update policy.
+
+What: /sys/fs/f2fs/<disk>/min_ipu_util
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the FS utilization condition for the in-place-update
+ policies.
+
+What: /sys/fs/f2fs/<disk>/min_fsync_blocks
+Date: September 2014
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the dirty page count condition for the in-place-update
+ policies.
+
+What: /sys/fs/f2fs/<disk>/max_small_discards
+Date: November 2013
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the issue rate of small discard commands.
+
+What: /sys/fs/f2fs/<disk>/max_victim_search
+Date: January 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the number of trials to find a victim segment.
+
+What: /sys/fs/f2fs/<disk>/dir_level
+Date: March 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the directory level for large directory.
+
+What: /sys/fs/f2fs/<disk>/ram_thresh
+Date: March 2014
+Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
+Description:
+ Controls the memory footprint used by f2fs.
+
+What: /sys/fs/f2fs/<disk>/trim_sections
+Date: February 2015
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description:
+ Controls the trimming rate in batch mode.
diff --git a/Documentation/ABI/testing/sysfs-fs-nilfs2 b/Documentation/ABI/testing/sysfs-fs-nilfs2
new file mode 100644
index 000000000..304ba84a9
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-nilfs2
@@ -0,0 +1,269 @@
+
+What: /sys/fs/nilfs2/features/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current revision of NILFS file system driver.
+ This value informs about file system revision that
+ driver is ready to support.
+
+What: /sys/fs/nilfs2/features/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/features group.
+
+What: /sys/fs/nilfs2/<device>/revision
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show NILFS file system revision on volume.
+ This value informs about metadata structures'
+ revision on mounted volume.
+
+What: /sys/fs/nilfs2/<device>/blocksize
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's block size in bytes.
+
+What: /sys/fs/nilfs2/<device>/device_size
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume size in bytes.
+
+What: /sys/fs/nilfs2/<device>/free_blocks
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of free blocks on volume.
+
+What: /sys/fs/nilfs2/<device>/uuid
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's UUID (Universally Unique Identifier).
+
+What: /sys/fs/nilfs2/<device>/volume_name
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show volume's label.
+
+What: /sys/fs/nilfs2/<device>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device> group.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in human-readable
+ format.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show last write time of super block in seconds.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_write_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show current write count of super block.
+
+What: /sys/fs/nilfs2/<device>/superblock/sb_update_frequency
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show/Set interval of periodical update of superblock
+ (in seconds).
+
+What: /sys/fs/nilfs2/<device>/superblock/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/superblock
+ group.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_pseg_block
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show start block number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show sequence value of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_seg_sequence
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show segment sequence counter.
+
+What: /sys/fs/nilfs2/<device>/segctor/current_last_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the latest full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_full_seg
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show index number of the full segment index
+ to be used next.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_pseg_offset
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show offset of next partial segment in the current
+ full segment.
+
+What: /sys/fs/nilfs2/<device>/segctor/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in
+ human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_seg_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in human-readable format.
+
+What: /sys/fs/nilfs2/<device>/segctor/last_nongc_write_time_secs
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show write time of the last segment not for cleaner
+ operation in seconds.
+
+What: /sys/fs/nilfs2/<device>/segctor/dirty_data_blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of dirty data blocks.
+
+What: /sys/fs/nilfs2/<device>/segctor/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segctor
+ group.
+
+What: /sys/fs/nilfs2/<device>/segments/segments_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of segments on a volume.
+
+What: /sys/fs/nilfs2/<device>/segments/blocks_per_segment
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks in segment.
+
+What: /sys/fs/nilfs2/<device>/segments/clean_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of clean segments.
+
+What: /sys/fs/nilfs2/<device>/segments/dirty_segments
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show count of dirty segments.
+
+What: /sys/fs/nilfs2/<device>/segments/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/segments
+ group.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/checkpoints_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of checkpoints on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/snapshots_number
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of snapshots on volume.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/last_seg_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show checkpoint number of the latest segment.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/next_checkpoint
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show next checkpoint number.
+
+What: /sys/fs/nilfs2/<device>/checkpoints/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/checkpoints
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe content of /sys/fs/nilfs2/<device>/mounted_snapshots
+ group.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/inodes_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of inodes for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/blocks_count
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Show number of blocks for snapshot.
+
+What: /sys/fs/nilfs2/<device>/mounted_snapshots/<id>/README
+Date: April 2014
+Contact: "Vyacheslav Dubeyko" <slava@dubeyko.com>
+Description:
+ Describe attributes of /sys/fs/nilfs2/<device>/mounted_snapshots/<id>
+ group.
diff --git a/Documentation/ABI/testing/sysfs-fs-xfs b/Documentation/ABI/testing/sysfs-fs-xfs
new file mode 100644
index 000000000..ea0cc8c42
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-fs-xfs
@@ -0,0 +1,39 @@
+What: /sys/fs/xfs/<disk>/log/log_head_lsn
+Date: July 2014
+KernelVersion: 3.17
+Contact: xfs@oss.sgi.com
+Description:
+ The log sequence number (LSN) of the current head of the
+ log. The LSN is exported in "cycle:basic block" format.
+Users: xfstests
+
+What: /sys/fs/xfs/<disk>/log/log_tail_lsn
+Date: July 2014
+KernelVersion: 3.17
+Contact: xfs@oss.sgi.com
+Description:
+ The log sequence number (LSN) of the current tail of the
+ log. The LSN is exported in "cycle:basic block" format.
+
+What: /sys/fs/xfs/<disk>/log/reserve_grant_head
+Date: July 2014
+KernelVersion: 3.17
+Contact: xfs@oss.sgi.com
+Description:
+ The current state of the log reserve grant head. It
+ represents the total log reservation of all currently
+ outstanding transactions. The grant head is exported in
+ "cycle:bytes" format.
+Users: xfstests
+
+What: /sys/fs/xfs/<disk>/log/write_grant_head
+Date: July 2014
+KernelVersion: 3.17
+Contact: xfs@oss.sgi.com
+Description:
+ The current state of the log write grant head. It
+ represents the total log reservation of all currently
+ oustanding transactions, including regrants due to
+ rolling transactions. The grant head is exported in
+ "cycle:bytes" format.
+Users: xfstests
diff --git a/Documentation/ABI/testing/sysfs-gpio b/Documentation/ABI/testing/sysfs-gpio
new file mode 100644
index 000000000..80f4c94c7
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-gpio
@@ -0,0 +1,27 @@
+What: /sys/class/gpio/
+Date: July 2008
+KernelVersion: 2.6.27
+Contact: David Brownell <dbrownell@users.sourceforge.net>
+Description:
+
+ As a Kconfig option, individual GPIO signals may be accessed from
+ userspace. GPIOs are only made available to userspace by an explicit
+ "export" operation. If a given GPIO is not claimed for use by
+ kernel code, it may be exported by userspace (and unexported later).
+ Kernel code may export it for complete or partial access.
+
+ GPIOs are identified as they are inside the kernel, using integers in
+ the range 0..INT_MAX. See Documentation/gpio.txt for more information.
+
+ /sys/class/gpio
+ /export ... asks the kernel to export a GPIO to userspace
+ /unexport ... to return a GPIO to the kernel
+ /gpioN ... for each exported GPIO #N
+ /value ... always readable, writes fail for input GPIOs
+ /direction ... r/w as: in, out (default low); write: high, low
+ /edge ... r/w as: none, falling, rising, both
+ /gpiochipN ... for each gpiochip; #N is its first GPIO
+ /base ... (r/o) same as N
+ /label ... (r/o) descriptive, not necessarily unique
+ /ngpio ... (r/o) number of GPIOs; numbered N to N + (ngpio - 1)
+
diff --git a/Documentation/ABI/testing/sysfs-i2c-bmp085 b/Documentation/ABI/testing/sysfs-i2c-bmp085
new file mode 100644
index 000000000..585962ad0
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-i2c-bmp085
@@ -0,0 +1,31 @@
+What: /sys/bus/i2c/devices/<busnum>-<devaddr>/pressure0_input
+Date: June 2010
+Contact: Christoph Mair <christoph.mair@gmail.com>
+Description: Start a pressure measurement and read the result. Values
+ represent the ambient air pressure in pascal (0.01 millibar).
+
+ Reading: returns the current air pressure.
+
+
+What: /sys/bus/i2c/devices/<busnum>-<devaddr>/temp0_input
+Date: June 2010
+Contact: Christoph Mair <christoph.mair@gmail.com>
+Description: Measure the ambient temperature. The returned value represents
+ the ambient temperature in units of 0.1 degree celsius.
+
+ Reading: returns the current temperature.
+
+
+What: /sys/bus/i2c/devices/<busnum>-<devaddr>/oversampling
+Date: June 2010
+Contact: Christoph Mair <christoph.mair@gmail.com>
+Description: Tell the bmp085 to use more samples to calculate a pressure
+ value. When writing to this file the chip will use 2^x samples
+ to calculate the next pressure value with x being the value
+ written. Using this feature will decrease RMS noise and
+ increase the measurement time.
+
+ Reading: returns the current oversampling setting.
+
+ Writing: sets a new oversampling setting.
+ Accepted values: 0..3.
diff --git a/Documentation/ABI/testing/sysfs-ibft b/Documentation/ABI/testing/sysfs-ibft
new file mode 100644
index 000000000..cac3930bd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ibft
@@ -0,0 +1,23 @@
+What: /sys/firmware/ibft/initiator
+Date: November 2007
+Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
+Description: The /sys/firmware/ibft/initiator directory will contain
+ files that expose the iSCSI Boot Firmware Table initiator data.
+ Usually this contains the Initiator name.
+
+What: /sys/firmware/ibft/targetX
+Date: November 2007
+Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
+Description: The /sys/firmware/ibft/targetX directory will contain
+ files that expose the iSCSI Boot Firmware Table target data.
+ Usually this contains the target's IP address, boot LUN,
+ target name, and what NIC it is associated with. It can also
+ contain the CHAP name (and password), the reverse CHAP
+ name (and password)
+
+What: /sys/firmware/ibft/ethernetX
+Date: November 2007
+Contact: Konrad Rzeszutek <ketuzsezr@darnok.org>
+Description: The /sys/firmware/ibft/ethernetX directory will contain
+ files that expose the iSCSI Boot Firmware Table NIC data.
+ Usually this contains the IP address, MAC, and gateway of the NIC.
diff --git a/Documentation/ABI/testing/sysfs-kernel-boot_params b/Documentation/ABI/testing/sysfs-kernel-boot_params
new file mode 100644
index 000000000..eca38ce28
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-boot_params
@@ -0,0 +1,38 @@
+What: /sys/kernel/boot_params
+Date: December 2013
+Contact: Dave Young <dyoung@redhat.com>
+Description: The /sys/kernel/boot_params directory contains two
+ files: "data" and "version" and one subdirectory "setup_data".
+ It is used to export the kernel boot parameters of an x86
+ platform to userspace for kexec and debugging purpose.
+
+ If there's no setup_data in boot_params the subdirectory will
+ not be created.
+
+ "data" file is the binary representation of struct boot_params.
+
+ "version" file is the string representation of boot
+ protocol version.
+
+ "setup_data" subdirectory contains the setup_data data
+ structure in boot_params. setup_data is maintained in kernel
+ as a link list. In "setup_data" subdirectory there's one
+ subdirectory for each link list node named with the number
+ of the list nodes. The list node subdirectory contains two
+ files "type" and "data". "type" file is the string
+ representation of setup_data type. "data" file is the binary
+ representation of setup_data payload.
+
+ The whole boot_params directory structure is like below:
+ /sys/kernel/boot_params
+ |__ data
+ |__ setup_data
+ | |__ 0
+ | | |__ data
+ | | |__ type
+ | |__ 1
+ | |__ data
+ | |__ type
+ |__ version
+
+Users: Kexec
diff --git a/Documentation/ABI/testing/sysfs-kernel-fscaps b/Documentation/ABI/testing/sysfs-kernel-fscaps
new file mode 100644
index 000000000..50a3033b5
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-fscaps
@@ -0,0 +1,8 @@
+What: /sys/kernel/fscaps
+Date: February 2011
+KernelVersion: 2.6.38
+Contact: Ludwig Nussel <ludwig.nussel@suse.de>
+Description
+ Shows whether file system capabilities are honored
+ when executing a binary
+
diff --git a/Documentation/ABI/testing/sysfs-kernel-iommu_groups b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
new file mode 100644
index 000000000..9b31556cf
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-iommu_groups
@@ -0,0 +1,14 @@
+What: /sys/kernel/iommu_groups/
+Date: May 2012
+KernelVersion: v3.5
+Contact: Alex Williamson <alex.williamson@redhat.com>
+Description: /sys/kernel/iommu_groups/ contains a number of sub-
+ directories, each representing an IOMMU group. The
+ name of the sub-directory matches the iommu_group_id()
+ for the group, which is an integer value. Within each
+ subdirectory is another directory named "devices" with
+ links to the sysfs devices contained in this group.
+ The group directory also optionally contains a "name"
+ file if the IOMMU driver has chosen to register a more
+ common name for the group.
+Users:
diff --git a/Documentation/ABI/testing/sysfs-kernel-livepatch b/Documentation/ABI/testing/sysfs-kernel-livepatch
new file mode 100644
index 000000000..5bf42a840
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-livepatch
@@ -0,0 +1,44 @@
+What: /sys/kernel/livepatch
+Date: Nov 2014
+KernelVersion: 3.19.0
+Contact: live-patching@vger.kernel.org
+Description:
+ Interface for kernel live patching
+
+ The /sys/kernel/livepatch directory contains subdirectories for
+ each loaded live patch module.
+
+What: /sys/kernel/livepatch/<patch>
+Date: Nov 2014
+KernelVersion: 3.19.0
+Contact: live-patching@vger.kernel.org
+Description:
+ The patch directory contains subdirectories for each kernel
+ object (vmlinux or a module) in which it patched functions.
+
+What: /sys/kernel/livepatch/<patch>/enabled
+Date: Nov 2014
+KernelVersion: 3.19.0
+Contact: live-patching@vger.kernel.org
+Description:
+ A writable attribute that indicates whether the patched
+ code is currently applied. Writing 0 will disable the patch
+ while writing 1 will re-enable the patch.
+
+What: /sys/kernel/livepatch/<patch>/<object>
+Date: Nov 2014
+KernelVersion: 3.19.0
+Contact: live-patching@vger.kernel.org
+Description:
+ The object directory contains subdirectories for each function
+ that is patched within the object.
+
+What: /sys/kernel/livepatch/<patch>/<object>/<function>
+Date: Nov 2014
+KernelVersion: 3.19.0
+Contact: live-patching@vger.kernel.org
+Description:
+ The function directory contains attributes regarding the
+ properties and state of the patched function.
+
+ There are currently no such attributes.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm b/Documentation/ABI/testing/sysfs-kernel-mm
new file mode 100644
index 000000000..190d523ac
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm
@@ -0,0 +1,6 @@
+What: /sys/kernel/mm
+Date: July 2008
+Contact: Nishanth Aravamudan <nacc@us.ibm.com>, VM maintainers
+Description:
+ /sys/kernel/mm/ should contain any and all VM
+ related information in /sys/kernel/.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-hugepages b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
new file mode 100644
index 000000000..e21c00571
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-hugepages
@@ -0,0 +1,15 @@
+What: /sys/kernel/mm/hugepages/
+Date: June 2008
+Contact: Nishanth Aravamudan <nacc@us.ibm.com>, hugetlb maintainers
+Description:
+ /sys/kernel/mm/hugepages/ contains a number of subdirectories
+ of the form hugepages-<size>kB, where <size> is the page size
+ of the hugepages supported by the kernel/CPU combination.
+
+ Under these directories are a number of files:
+ nr_hugepages
+ nr_overcommit_hugepages
+ free_hugepages
+ surplus_hugepages
+ resv_hugepages
+ See Documentation/vm/hugetlbpage.txt for details.
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-ksm b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
new file mode 100644
index 000000000..73e653ee2
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-ksm
@@ -0,0 +1,52 @@
+What: /sys/kernel/mm/ksm
+Date: September 2009
+KernelVersion: 2.6.32
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Interface for Kernel Samepage Merging (KSM)
+
+What: /sys/kernel/mm/ksm/full_scans
+What: /sys/kernel/mm/ksm/pages_shared
+What: /sys/kernel/mm/ksm/pages_sharing
+What: /sys/kernel/mm/ksm/pages_to_scan
+What: /sys/kernel/mm/ksm/pages_unshared
+What: /sys/kernel/mm/ksm/pages_volatile
+What: /sys/kernel/mm/ksm/run
+What: /sys/kernel/mm/ksm/sleep_millisecs
+Date: September 2009
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Kernel Samepage Merging daemon sysfs interface
+
+ full_scans: how many times all mergeable areas have been
+ scanned.
+
+ pages_shared: how many shared pages are being used.
+
+ pages_sharing: how many more sites are sharing them i.e. how
+ much saved.
+
+ pages_to_scan: how many present pages to scan before ksmd goes
+ to sleep.
+
+ pages_unshared: how many pages unique but repeatedly checked
+ for merging.
+
+ pages_volatile: how many pages changing too fast to be placed
+ in a tree.
+
+ run: write 0 to disable ksm, read 0 while ksm is disabled.
+ write 1 to run ksm, read 1 while ksm is running.
+ write 2 to disable ksm and unmerge all its pages.
+
+ sleep_millisecs: how many milliseconds ksm should sleep between
+ scans.
+
+ See Documentation/vm/ksm.txt for more information.
+
+What: /sys/kernel/mm/ksm/merge_across_nodes
+Date: January 2013
+KernelVersion: 3.9
+Contact: Linux memory management mailing list <linux-mm@kvack.org>
+Description: Control merging pages across different NUMA nodes.
+
+ When it is set to 0 only pages from the same node are merged,
+ otherwise pages from all nodes can be merged together (default).
diff --git a/Documentation/ABI/testing/sysfs-kernel-slab b/Documentation/ABI/testing/sysfs-kernel-slab
new file mode 100644
index 000000000..91bd6ca54
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-slab
@@ -0,0 +1,490 @@
+What: /sys/kernel/slab
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The /sys/kernel/slab directory contains a snapshot of the
+ internal state of the SLUB allocator for each cache. Certain
+ files may be modified to change the behavior of the cache (and
+ any cache it aliases, if any).
+Users: kernel memory tuning tools
+
+What: /sys/kernel/slab/cache/aliases
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The aliases file is read-only and specifies how many caches
+ have merged into this cache.
+
+What: /sys/kernel/slab/cache/align
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The align file is read-only and specifies the cache's object
+ alignment in bytes.
+
+What: /sys/kernel/slab/cache/alloc_calls
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The alloc_calls file is read-only and lists the kernel code
+ locations from which allocations for this cache were performed.
+ The alloc_calls file only contains information if debugging is
+ enabled for that cache (see Documentation/vm/slub.txt).
+
+What: /sys/kernel/slab/cache/alloc_fastpath
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The alloc_fastpath file shows how many objects have been
+ allocated using the fast path. It can be written to clear the
+ current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/alloc_from_partial
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The alloc_from_partial file shows how many times a cpu slab has
+ been full and it has been refilled by using a slab from the list
+ of partially used slabs. It can be written to clear the current
+ count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/alloc_refill
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The alloc_refill file shows how many times the per-cpu freelist
+ was empty but there were objects available as the result of
+ remote cpu frees. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/alloc_slab
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The alloc_slab file is shows how many times a new slab had to
+ be allocated from the page allocator. It can be written to
+ clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/alloc_slowpath
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The alloc_slowpath file shows how many objects have been
+ allocated using the slow path because of a refill or
+ allocation from a partial or new slab. It can be written to
+ clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/cache_dma
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The cache_dma file is read-only and specifies whether objects
+ are from ZONE_DMA.
+ Available when CONFIG_ZONE_DMA is enabled.
+
+What: /sys/kernel/slab/cache/cpu_slabs
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The cpu_slabs file is read-only and displays how many cpu slabs
+ are active and their NUMA locality.
+
+What: /sys/kernel/slab/cache/cpuslab_flush
+Date: April 2009
+KernelVersion: 2.6.31
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The file cpuslab_flush shows how many times a cache's cpu slabs
+ have been flushed as the result of destroying or shrinking a
+ cache, a cpu going offline, or as the result of forcing an
+ allocation from a certain node. It can be written to clear the
+ current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/ctor
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The ctor file is read-only and specifies the cache's object
+ constructor function, which is invoked for each object when a
+ new slab is allocated.
+
+What: /sys/kernel/slab/cache/deactivate_empty
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The deactivate_empty file shows how many times an empty cpu slab
+ was deactivated. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/deactivate_full
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The deactivate_full file shows how many times a full cpu slab
+ was deactivated. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/deactivate_remote_frees
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The deactivate_remote_frees file shows how many times a cpu slab
+ has been deactivated and contained free objects that were freed
+ remotely. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/deactivate_to_head
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The deactivate_to_head file shows how many times a partial cpu
+ slab was deactivated and added to the head of its node's partial
+ list. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/deactivate_to_tail
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The deactivate_to_tail file shows how many times a partial cpu
+ slab was deactivated and added to the tail of its node's partial
+ list. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/destroy_by_rcu
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The destroy_by_rcu file is read-only and specifies whether
+ slabs (not objects) are freed by rcu.
+
+What: /sys/kernel/slab/cache/free_add_partial
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_add_partial file shows how many times an object has
+ been freed in a full slab so that it had to added to its node's
+ partial list. It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/free_calls
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_calls file is read-only and lists the locations of
+ object frees if slab debugging is enabled (see
+ Documentation/vm/slub.txt).
+
+What: /sys/kernel/slab/cache/free_fastpath
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_fastpath file shows how many objects have been freed
+ using the fast path because it was an object from the cpu slab.
+ It can be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/free_frozen
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_frozen file shows how many objects have been freed to
+ a frozen slab (i.e. a remote cpu slab). It can be written to
+ clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/free_remove_partial
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_remove_partial file shows how many times an object has
+ been freed to a now-empty slab so that it had to be removed from
+ its node's partial list. It can be written to clear the current
+ count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/free_slab
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_slab file shows how many times an empty slab has been
+ freed back to the page allocator. It can be written to clear
+ the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/free_slowpath
+Date: February 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The free_slowpath file shows how many objects have been freed
+ using the slow path (i.e. to a full or partial slab). It can
+ be written to clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/hwcache_align
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The hwcache_align file is read-only and specifies whether
+ objects are aligned on cachelines.
+
+What: /sys/kernel/slab/cache/min_partial
+Date: February 2009
+KernelVersion: 2.6.30
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ David Rientjes <rientjes@google.com>
+Description:
+ The min_partial file specifies how many empty slabs shall
+ remain on a node's partial list to avoid the overhead of
+ allocating new slabs. Such slabs may be reclaimed by utilizing
+ the shrink file.
+
+What: /sys/kernel/slab/cache/object_size
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The object_size file is read-only and specifies the cache's
+ object size.
+
+What: /sys/kernel/slab/cache/objects
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The objects file is read-only and displays how many objects are
+ active and from which nodes they are from.
+
+What: /sys/kernel/slab/cache/objects_partial
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The objects_partial file is read-only and displays how many
+ objects are on partial slabs and from which nodes they are
+ from.
+
+What: /sys/kernel/slab/cache/objs_per_slab
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The file objs_per_slab is read-only and specifies how many
+ objects may be allocated from a single slab of the order
+ specified in /sys/kernel/slab/cache/order.
+
+What: /sys/kernel/slab/cache/order
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The order file specifies the page order at which new slabs are
+ allocated. It is writable and can be changed to increase the
+ number of objects per slab. If a slab cannot be allocated
+ because of fragmentation, SLUB will retry with the minimum order
+ possible depending on its characteristics.
+ When debug_guardpage_minorder=N (N > 0) parameter is specified
+ (see Documentation/kernel-parameters.txt), the minimum possible
+ order is used and this sysfs entry can not be used to change
+ the order at run time.
+
+What: /sys/kernel/slab/cache/order_fallback
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The order_fallback file shows how many times an allocation of a
+ new slab has not been possible at the cache's order and instead
+ fallen back to its minimum possible order. It can be written to
+ clear the current count.
+ Available when CONFIG_SLUB_STATS is enabled.
+
+What: /sys/kernel/slab/cache/partial
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The partial file is read-only and displays how long many
+ partial slabs there are and how long each node's list is.
+
+What: /sys/kernel/slab/cache/poison
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The poison file specifies whether objects should be poisoned
+ when a new slab is allocated.
+
+What: /sys/kernel/slab/cache/reclaim_account
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The reclaim_account file specifies whether the cache's objects
+ are reclaimable (and grouped by their mobility).
+
+What: /sys/kernel/slab/cache/red_zone
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The red_zone file specifies whether the cache's objects are red
+ zoned.
+
+What: /sys/kernel/slab/cache/remote_node_defrag_ratio
+Date: January 2008
+KernelVersion: 2.6.25
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The file remote_node_defrag_ratio specifies the percentage of
+ times SLUB will attempt to refill the cpu slab with a partial
+ slab from a remote node as opposed to allocating a new slab on
+ the local node. This reduces the amount of wasted memory over
+ the entire system but can be expensive.
+ Available when CONFIG_NUMA is enabled.
+
+What: /sys/kernel/slab/cache/sanity_checks
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The sanity_checks file specifies whether expensive checks
+ should be performed on free and, at minimum, enables double free
+ checks. Caches that enable sanity_checks cannot be merged with
+ caches that do not.
+
+What: /sys/kernel/slab/cache/shrink
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The shrink file is written when memory should be reclaimed from
+ a cache. Empty partial slabs are freed and the partial list is
+ sorted so the slabs with the fewest available objects are used
+ first.
+
+What: /sys/kernel/slab/cache/slab_size
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The slab_size file is read-only and specifies the object size
+ with metadata (debugging information and alignment) in bytes.
+
+What: /sys/kernel/slab/cache/slabs
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The slabs file is read-only and displays how long many slabs
+ there are (both cpu and partial) and from which nodes they are
+ from.
+
+What: /sys/kernel/slab/cache/store_user
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The store_user file specifies whether the location of
+ allocation or free should be tracked for a cache.
+
+What: /sys/kernel/slab/cache/total_objects
+Date: April 2008
+KernelVersion: 2.6.26
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The total_objects file is read-only and displays how many total
+ objects a cache has and from which nodes they are from.
+
+What: /sys/kernel/slab/cache/trace
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ The trace file specifies whether object allocations and frees
+ should be traced.
+
+What: /sys/kernel/slab/cache/validate
+Date: May 2007
+KernelVersion: 2.6.22
+Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
+ Christoph Lameter <cl@linux-foundation.org>
+Description:
+ Writing to the validate file causes SLUB to traverse all of its
+ cache's objects and check the validity of metadata.
diff --git a/Documentation/ABI/testing/sysfs-kernel-uids b/Documentation/ABI/testing/sysfs-kernel-uids
new file mode 100644
index 000000000..28f14695a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-uids
@@ -0,0 +1,14 @@
+What: /sys/kernel/uids/<uid>/cpu_shares
+Date: December 2007
+Contact: Dhaval Giani <dhaval@linux.vnet.ibm.com>
+ Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
+Description:
+ The /sys/kernel/uids/<uid>/cpu_shares tunable is used
+ to set the cpu bandwidth a user is allowed. This is a
+ propotional value. What that means is that if there
+ are two users logged in, each with an equal number of
+ shares, then they will get equal CPU bandwidth. Another
+ example would be, if User A has shares = 1024 and user
+ B has shares = 2048, User B will get twice the CPU
+ bandwidth user A will. For more details refer
+ Documentation/scheduler/sched-design-CFS.txt
diff --git a/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
new file mode 100644
index 000000000..7bd81168e
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-vmcoreinfo
@@ -0,0 +1,14 @@
+What: /sys/kernel/vmcoreinfo
+Date: October 2007
+KernelVersion: 2.6.24
+Contact: Ken'ichi Ohmichi <oomichi@mxs.nes.nec.co.jp>
+ Kexec Mailing List <kexec@lists.infradead.org>
+ Vivek Goyal <vgoyal@redhat.com>
+Description
+ Shows physical address and size of vmcoreinfo ELF note.
+ First value contains physical address of note in hex and
+ second value contains the size of note in hex. This ELF
+ note info is parsed by second kernel and exported to user
+ space as part of ELF note in /proc/vmcore file. This note
+ contains various information like struct size, symbol
+ values, page size etc.
diff --git a/Documentation/ABI/testing/sysfs-memory-page-offline b/Documentation/ABI/testing/sysfs-memory-page-offline
new file mode 100644
index 000000000..e14703f12
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-memory-page-offline
@@ -0,0 +1,44 @@
+What: /sys/devices/system/memory/soft_offline_page
+Date: Sep 2009
+KernelVersion: 2.6.33
+Contact: andi@firstfloor.org
+Description:
+ Soft-offline the memory page containing the physical address
+ written into this file. Input is a hex number specifying the
+ physical address of the page. The kernel will then attempt
+ to soft-offline it, by moving the contents elsewhere or
+ dropping it if possible. The kernel will then be placed
+ on the bad page list and never be reused.
+
+ The offlining is done in kernel specific granuality.
+ Normally it's the base page size of the kernel, but
+ this might change.
+
+ The page must be still accessible, not poisoned. The
+ kernel will never kill anything for this, but rather
+ fail the offline. Return value is the size of the
+ number, or a error when the offlining failed. Reading
+ the file is not allowed.
+
+What: /sys/devices/system/memory/hard_offline_page
+Date: Sep 2009
+KernelVersion: 2.6.33
+Contact: andi@firstfloor.org
+Description:
+ Hard-offline the memory page containing the physical
+ address written into this file. Input is a hex number
+ specifying the physical address of the page. The
+ kernel will then attempt to hard-offline the page, by
+ trying to drop the page or killing any owner or
+ triggering IO errors if needed. Note this may kill
+ any processes owning the page. The kernel will avoid
+ to access this page assuming it's poisoned by the
+ hardware.
+
+ The offlining is done in kernel specific granuality.
+ Normally it's the base page size of the kernel, but
+ this might change.
+
+ Return value is the size of the number, or a error when
+ the offlining failed.
+ Reading the file is not allowed.
diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
new file mode 100644
index 000000000..0aac02e7f
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-module
@@ -0,0 +1,52 @@
+What: /sys/module/pch_phub/drivers/.../pch_mac
+Date: August 2010
+KernelVersion: 2.6.35
+Contact: masa-korg@dsn.okisemi.com
+Description: Write/read GbE MAC address.
+
+What: /sys/module/pch_phub/drivers/.../pch_firmware
+Date: August 2010
+KernelVersion: 2.6.35
+Contact: masa-korg@dsn.okisemi.com
+Description: Write/read Option ROM data.
+
+
+What: /sys/module/ehci_hcd/drivers/.../uframe_periodic_max
+Date: July 2011
+KernelVersion: 3.1
+Contact: Kirill Smelkov <kirr@mns.spb.ru>
+Description: Maximum time allowed for periodic transfers per microframe (μs)
+
+ [ USB 2.0 sets maximum allowed time for periodic transfers per
+ microframe to be 80%, that is 100 microseconds out of 125
+ microseconds (full microframe).
+
+ However there are cases, when 80% max isochronous bandwidth is
+ too limiting. For example two video streams could require 110
+ microseconds of isochronous bandwidth per microframe to work
+ together. ]
+
+ Through this setting it is possible to raise the limit so that
+ the host controller would allow allocating more than 100
+ microseconds of periodic bandwidth per microframe.
+
+ Beware, non-standard modes are usually not thoroughly tested by
+ hardware designers, and the hardware can malfunction when this
+ setting differ from default 100.
+
+What: /sys/module/*/{coresize,initsize}
+Date: Jan 2012
+KernelVersion:»·3.3
+Contact: Kay Sievers <kay.sievers@vrfy.org>
+Description: Module size in bytes.
+
+What: /sys/module/*/taint
+Date: Jan 2012
+KernelVersion:»·3.3
+Contact: Kay Sievers <kay.sievers@vrfy.org>
+Description: Module taint flags:
+ P - proprietary module
+ O - out-of-tree module
+ F - force-loaded module
+ C - staging driver module
+ E - unsigned module
diff --git a/Documentation/ABI/testing/sysfs-ocfs2 b/Documentation/ABI/testing/sysfs-ocfs2
new file mode 100644
index 000000000..b7cc516a8
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ocfs2
@@ -0,0 +1,89 @@
+What: /sys/fs/ocfs2/
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2 directory contains knobs used by the
+ ocfs2-tools to interact with the filesystem.
+
+What: /sys/fs/ocfs2/max_locking_protocol
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/max_locking_protocol file displays version
+ of ocfs2 locking supported by the filesystem. This version
+ covers how ocfs2 uses distributed locking between cluster
+ nodes.
+
+ The protocol version has a major and minor number. Two
+ cluster nodes can interoperate if they have an identical
+ major number and an overlapping minor number - thus,
+ a node with version 1.10 can interoperate with a node
+ sporting version 1.8, as long as both use the 1.8 protocol.
+
+ Reading from this file returns a single line, the major
+ number and minor number joined by a period, eg "1.10".
+
+ This file is read-only. The value is compiled into the
+ driver.
+
+What: /sys/fs/ocfs2/loaded_cluster_plugins
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/loaded_cluster_plugins file describes
+ the available plugins to support ocfs2 cluster operation.
+ A cluster plugin is required to use ocfs2 in a cluster.
+ There are currently two available plugins:
+
+ * 'o2cb' - The classic o2cb cluster stack that ocfs2 has
+ used since its inception.
+ * 'user' - A plugin supporting userspace cluster software
+ in conjunction with fs/dlm.
+
+ Reading from this file returns the names of all loaded
+ plugins, one per line.
+
+ This file is read-only. Its contents may change as
+ plugins are loaded or removed.
+
+What: /sys/fs/ocfs2/active_cluster_plugin
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/active_cluster_plugin displays which
+ cluster plugin is currently in use by the filesystem.
+ The active plugin will appear in the loaded_cluster_plugins
+ file as well. Only one plugin can be used at a time.
+
+ Reading from this file returns the name of the active plugin
+ on a single line.
+
+ This file is read-only. Which plugin is active depends on
+ the cluster stack in use. The contents may change
+ when all filesystems are unmounted and the cluster stack
+ is changed.
+
+What: /sys/fs/ocfs2/cluster_stack
+Date: April 2008
+Contact: ocfs2-devel@oss.oracle.com
+Description:
+ The /sys/fs/ocfs2/cluster_stack file contains the name
+ of current ocfs2 cluster stack. This value is set by
+ userspace tools when bringing the cluster stack online.
+
+ Cluster stack names are 4 characters in length.
+
+ When the 'o2cb' cluster stack is used, the 'o2cb' cluster
+ plugin is active. All other cluster stacks use the 'user'
+ cluster plugin.
+
+ Reading from this file returns the name of the current
+ cluster stack on a single line.
+
+ Writing a new stack name to this file changes the current
+ cluster stack unless there are mounted ocfs2 filesystems.
+ If there are mounted filesystems, attempts to change the
+ stack return an error.
+
+Users:
+ ocfs2-tools <ocfs2-tools-devel@oss.oracle.com>
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-laptop b/Documentation/ABI/testing/sysfs-platform-asus-laptop
new file mode 100644
index 000000000..cd9d667c3
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-asus-laptop
@@ -0,0 +1,66 @@
+What: /sys/devices/platform/asus_laptop/display
+Date: January 2007
+KernelVersion: 2.6.20
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ This file allows display switching. The value
+ is composed by 4 bits and defined as follow:
+ 4321
+ |||`- LCD
+ ||`-- CRT
+ |`--- TV
+ `---- DVI
+ Ex: - 0 (0000b) means no display
+ - 3 (0011b) CRT+LCD.
+
+What: /sys/devices/platform/asus_laptop/gps
+Date: January 2007
+KernelVersion: 2.6.20
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the gps device. 1 means on, 0 means off.
+Users: Lapsus
+
+What: /sys/devices/platform/asus_laptop/ledd
+Date: January 2007
+KernelVersion: 2.6.20
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Some models like the W1N have a LED display that can be
+ used to display several items of information.
+ To control the LED display, use the following :
+ echo 0x0T000DDD > /sys/devices/platform/asus_laptop/
+ where T control the 3 letters display, and DDD the 3 digits display.
+ The DDD table can be found in Documentation/laptops/asus-laptop.txt
+
+What: /sys/devices/platform/asus_laptop/bluetooth
+Date: January 2007
+KernelVersion: 2.6.20
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the bluetooth device. 1 means on, 0 means off.
+ This may control the led, the device or both.
+Users: Lapsus
+
+What: /sys/devices/platform/asus_laptop/wlan
+Date: January 2007
+KernelVersion: 2.6.20
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the wlan device. 1 means on, 0 means off.
+ This may control the led, the device or both.
+Users: Lapsus
+
+What: /sys/devices/platform/asus_laptop/wimax
+Date: October 2010
+KernelVersion: 2.6.37
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the wimax device. 1 means on, 0 means off.
+
+What: /sys/devices/platform/asus_laptop/wwan
+Date: October 2010
+KernelVersion: 2.6.37
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the wwan (3G) device. 1 means on, 0 means off.
diff --git a/Documentation/ABI/testing/sysfs-platform-asus-wmi b/Documentation/ABI/testing/sysfs-platform-asus-wmi
new file mode 100644
index 000000000..019e1e293
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-asus-wmi
@@ -0,0 +1,38 @@
+What: /sys/devices/platform/<platform>/cpufv
+Date: Oct 2010
+KernelVersion: 2.6.37
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Change CPU clock configuration (write-only).
+ There are three available clock configuration:
+ * 0 -> Super Performance Mode
+ * 1 -> High Performance Mode
+ * 2 -> Power Saving Mode
+
+What: /sys/devices/platform/<platform>/camera
+Date: Jan 2010
+KernelVersion: 2.6.39
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the camera. 1 means on, 0 means off.
+
+What: /sys/devices/platform/<platform>/cardr
+Date: Jan 2010
+KernelVersion: 2.6.39
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the card reader. 1 means on, 0 means off.
+
+What: /sys/devices/platform/<platform>/touchpad
+Date: Jan 2010
+KernelVersion: 2.6.39
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the card touchpad. 1 means on, 0 means off.
+
+What: /sys/devices/platform/<platform>/lid_resume
+Date: May 2012
+KernelVersion: 3.5
+Contact: "AceLan Kao" <acelan.kao@canonical.com>
+Description:
+ Resume on lid open. 1 means on, 0 means off.
diff --git a/Documentation/ABI/testing/sysfs-platform-at91 b/Documentation/ABI/testing/sysfs-platform-at91
new file mode 100644
index 000000000..4cc6a865a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-at91
@@ -0,0 +1,25 @@
+What: /sys/devices/platform/at91_can/net/<iface>/mb0_id
+Date: January 2011
+KernelVersion: 2.6.38
+Contact: Marc Kleine-Budde <kernel@pengutronix.de>
+Description:
+ Value representing the can_id of mailbox 0.
+
+ Default: 0x7ff (standard frame)
+
+ Due to a chip bug (errata 50.2.6.3 & 50.3.5.3 in
+ "AT91SAM9263 Preliminary 6249H-ATARM-27-Jul-09") the
+ contents of mailbox 0 may be send under certain
+ conditions (even if disabled or in rx mode).
+
+ The workaround in the errata suggests not to use the
+ mailbox and load it with an unused identifier.
+
+ In order to use an extended can_id add the
+ CAN_EFF_FLAG (0x80000000U) to the can_id. Example:
+
+ - standard id 0x7ff:
+ echo 0x7ff > /sys/class/net/can0/mb0_id
+
+ - extended id 0x1fffffff:
+ echo 0x9fffffff > /sys/class/net/can0/mb0_id
diff --git a/Documentation/ABI/testing/sysfs-platform-brcmstb-gisb-arb b/Documentation/ABI/testing/sysfs-platform-brcmstb-gisb-arb
new file mode 100644
index 000000000..f1bad92bb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-brcmstb-gisb-arb
@@ -0,0 +1,8 @@
+What: /sys/devices/../../gisb_arb_timeout
+Date: May 2014
+KernelVersion: 3.17
+Contact: Florian Fainelli <f.fainelli@gmail.com>
+Description:
+ Returns the currently configured raw timeout value of the
+ Broadcom Set Top Box internal GISB bus arbiter. Minimum value
+ is 1, and maximum value is 0xffffffff.
diff --git a/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg b/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg
new file mode 100644
index 000000000..151c59578
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-chipidea-usb-otg
@@ -0,0 +1,56 @@
+What: /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
+Date: Feb 2014
+Contact: Li Jun <b47624@freescale.com>
+Description:
+ Can be set and read.
+ Set a_bus_req(A-device bus request) input to be 1 if
+ the application running on the A-device wants to use the bus,
+ and to be 0 when the application no longer wants to use
+ the bus(or wants to work as peripheral). a_bus_req can also
+ be set to 1 by kernel in response to remote wakeup signaling
+ from the B-device, the A-device should decide to resume the bus.
+
+ Valid values are "1" and "0".
+
+ Reading: returns 1 if the application running on the A-device
+ is using the bus as host role, otherwise 0.
+
+What: /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
+Date: Feb 2014
+Contact: Li Jun <b47624@freescale.com>
+Description:
+ Can be set and read
+ The a_bus_drop(A-device bus drop) input is 1 when the
+ application running on the A-device wants to power down
+ the bus, and is 0 otherwise, When a_bus_drop is 1, then
+ the a_bus_req shall be 0.
+
+ Valid values are "1" and "0".
+
+ Reading: returns 1 if the bus is off(vbus is turned off) by
+ A-device, otherwise 0.
+
+What: /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+Date: Feb 2014
+Contact: Li Jun <b47624@freescale.com>
+Description:
+ Can be set and read.
+ The b_bus_req(B-device bus request) input is 1 during the time
+ that the application running on the B-device wants to use the
+ bus as host, and is 0 when the application no longer wants to
+ work as host and decides to switch back to be peripheral.
+
+ Valid values are "1" and "0".
+
+ Reading: returns if the application running on the B device
+ is using the bus as host role, otherwise 0.
+
+What: /sys/bus/platform/devices/ci_hdrc.0/inputs/a_clr_err
+Date: Feb 2014
+Contact: Li Jun <b47624@freescale.com>
+Description:
+ Only can be set.
+ The a_clr_err(A-device Vbus error clear) input is used to clear
+ vbus error, then A-device will power down the bus.
+
+ Valid value is "1"
diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop
new file mode 100644
index 000000000..8c6a0b8e1
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop
@@ -0,0 +1,69 @@
+What: /sys/class/leds/dell::kbd_backlight/als_enabled
+Date: December 2014
+KernelVersion: 3.19
+Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+ Pali Rohár <pali.rohar@gmail.com>
+Description:
+ This file allows to control the automatic keyboard
+ illumination mode on some systems that have an ambient
+ light sensor. Write 1 to this file to enable the auto
+ mode, 0 to disable it.
+
+What: /sys/class/leds/dell::kbd_backlight/als_setting
+Date: December 2014
+KernelVersion: 3.19
+Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+ Pali Rohár <pali.rohar@gmail.com>
+Description:
+ This file allows to specifiy the on/off threshold value,
+ as reported by the ambient light sensor.
+
+What: /sys/class/leds/dell::kbd_backlight/start_triggers
+Date: December 2014
+KernelVersion: 3.19
+Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+ Pali Rohár <pali.rohar@gmail.com>
+Description:
+ This file allows to control the input triggers that
+ turn on the keyboard backlight illumination that is
+ disabled because of inactivity.
+ Read the file to see the triggers available. The ones
+ enabled are preceded by '+', those disabled by '-'.
+
+ To enable a trigger, write its name preceded by '+' to
+ this file. To disable a trigger, write its name preceded
+ by '-' instead.
+
+ For example, to enable the keyboard as trigger run:
+ echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+ To disable it:
+ echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+
+ Note that not all the available triggers can be configured.
+
+What: /sys/class/leds/dell::kbd_backlight/stop_timeout
+Date: December 2014
+KernelVersion: 3.19
+Contact: Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+ Pali Rohár <pali.rohar@gmail.com>
+Description:
+ This file allows to specify the interval after which the
+ keyboard illumination is disabled because of inactivity.
+ The timeouts are expressed in seconds, minutes, hours and
+ days, for which the symbols are 's', 'm', 'h' and 'd'
+ respectively.
+
+ To configure the timeout, write to this file a value along
+ with any the above units. If no unit is specified, the value
+ is assumed to be expressed in seconds.
+
+ For example, to set the timeout to 10 minutes run:
+ echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
+
+ Note that when this file is read, the returned value might be
+ expressed in a different unit than the one used when the timeout
+ was set.
+
+ Also note that only some timeouts are supported and that
+ some systems might fall back to a specific timeout in case
+ an invalid timeout is written to this file.
diff --git a/Documentation/ABI/testing/sysfs-platform-eeepc-laptop b/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
new file mode 100644
index 000000000..5b026c695
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-eeepc-laptop
@@ -0,0 +1,50 @@
+What: /sys/devices/platform/eeepc/disp
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ This file allows display switching.
+ - 1 = LCD
+ - 2 = CRT
+ - 3 = LCD+CRT
+ If you run X11, you should use xrandr instead.
+
+What: /sys/devices/platform/eeepc/camera
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the camera. 1 means on, 0 means off.
+
+What: /sys/devices/platform/eeepc/cardr
+Date: May 2008
+KernelVersion: 2.6.26
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Control the card reader. 1 means on, 0 means off.
+
+What: /sys/devices/platform/eeepc/cpufv
+Date: Jun 2009
+KernelVersion: 2.6.31
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ Change CPU clock configuration.
+ On the Eee PC 1000H there are three available clock configuration:
+ * 0 -> Super Performance Mode
+ * 1 -> High Performance Mode
+ * 2 -> Power Saving Mode
+ On Eee PC 701 there is only 2 available clock configurations.
+ Available configuration are listed in available_cpufv file.
+ Reading this file will show the raw hexadecimal value which
+ is defined as follow:
+ | 8 bit | 8 bit |
+ | `---- Current mode
+ `------------ Availables modes
+ For example, 0x301 means: mode 1 selected, 3 available modes.
+
+What: /sys/devices/platform/eeepc/available_cpufv
+Date: Jun 2009
+KernelVersion: 2.6.31
+Contact: "Corentin Chary" <corentincj@iksaif.net>
+Description:
+ List available cpufv modes.
diff --git a/Documentation/ABI/testing/sysfs-platform-ideapad-laptop b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
new file mode 100644
index 000000000..b31e782bd
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-ideapad-laptop
@@ -0,0 +1,19 @@
+What: /sys/devices/platform/ideapad/camera_power
+Date: Dec 2010
+KernelVersion: 2.6.37
+Contact: "Ike Panhc <ike.pan@canonical.com>"
+Description:
+ Control the power of camera module. 1 means on, 0 means off.
+
+What: /sys/devices/platform/ideapad/fan_mode
+Date: June 2012
+KernelVersion: 3.6
+Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
+Description:
+ Change fan mode
+ There are four available modes:
+ * 0 -> Super Silent Mode
+ * 1 -> Standard Mode
+ * 2 -> Dust Cleaning
+ * 4 -> Efficient Thermal Dissipation Mode
+
diff --git a/Documentation/ABI/testing/sysfs-platform-kim b/Documentation/ABI/testing/sysfs-platform-kim
new file mode 100644
index 000000000..c16532718
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-kim
@@ -0,0 +1,48 @@
+What: /sys/devices/platform/kim/dev_name
+Date: January 2010
+KernelVersion: 2.6.38
+Contact: "Pavan Savoy" <pavan_savoy@ti.com>
+Description:
+ Name of the UART device at which the WL128x chip
+ is connected. example: "/dev/ttyS0".
+ The device name flows down to architecture specific board
+ initialization file from the SFI/ATAGS bootloader
+ firmware. The name exposed is read from the user-space
+ dameon and opens the device when install is requested.
+
+What: /sys/devices/platform/kim/baud_rate
+Date: January 2010
+KernelVersion: 2.6.38
+Contact: "Pavan Savoy" <pavan_savoy@ti.com>
+Description:
+ The maximum reliable baud-rate the host can support.
+ Different platforms tend to have different high-speed
+ UART configurations, so the baud-rate needs to be set
+ locally and also sent across to the WL128x via a HCI-VS
+ command. The entry is read and made use by the user-space
+ daemon when the ldisc install is requested.
+
+What: /sys/devices/platform/kim/flow_cntrl
+Date: January 2010
+KernelVersion: 2.6.38
+Contact: "Pavan Savoy" <pavan_savoy@ti.com>
+Description:
+ The WL128x makes use of flow control mechanism, and this
+ entry most often should be 1, the host's UART is required
+ to have the capability of flow-control, or else this
+ entry can be made use of for exceptions.
+
+What: /sys/devices/platform/kim/install
+Date: January 2010
+KernelVersion: 2.6.38
+Contact: "Pavan Savoy" <pavan_savoy@ti.com>
+Description:
+ When one of the protocols Bluetooth, FM or GPS wants to make
+ use of the shared UART transport, it registers to the shared
+ transport driver, which will signal the user-space for opening,
+ configuring baud and install line discipline via this sysfs
+ entry. This entry would be polled upon by the user-space
+ daemon managing the UART, and is notified about the change
+ by the sysfs_notify. The value would be '1' when UART needs
+ to be opened/ldisc installed, and would be '0' when UART
+ is no more required and needs to be closed.
diff --git a/Documentation/ABI/testing/sysfs-platform-msi-laptop b/Documentation/ABI/testing/sysfs-platform-msi-laptop
new file mode 100644
index 000000000..307a247ba
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-msi-laptop
@@ -0,0 +1,83 @@
+What: /sys/devices/platform/msi-laptop-pf/lcd_level
+Date: Oct 2006
+KernelVersion: 2.6.19
+Contact: "Lennart Poettering <mzxreary@0pointer.de>"
+Description:
+ Screen brightness: contains a single integer in the range 0..8.
+
+What: /sys/devices/platform/msi-laptop-pf/auto_brightness
+Date: Oct 2006
+KernelVersion: 2.6.19
+Contact: "Lennart Poettering <mzxreary@0pointer.de>"
+Description:
+ Enable automatic brightness control: contains either 0 or 1. If
+ set to 1 the hardware adjusts the screen brightness
+ automatically when the power cord is plugged/unplugged.
+
+What: /sys/devices/platform/msi-laptop-pf/wlan
+Date: Oct 2006
+KernelVersion: 2.6.19
+Contact: "Lennart Poettering <mzxreary@0pointer.de>"
+Description:
+ WLAN subsystem enabled: contains either 0 or 1.
+
+What: /sys/devices/platform/msi-laptop-pf/bluetooth
+Date: Oct 2006
+KernelVersion: 2.6.19
+Contact: "Lennart Poettering <mzxreary@0pointer.de>"
+Description:
+ Bluetooth subsystem enabled: contains either 0 or 1. Please
+ note that this file is constantly 0 if no Bluetooth hardware is
+ available.
+
+What: /sys/devices/platform/msi-laptop-pf/touchpad
+Date: Nov 2012
+KernelVersion: 3.8
+Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
+Description:
+ Contains either 0 or 1 and indicates if touchpad is turned on.
+ Touchpad state can only be toggled by pressing Fn+F3.
+
+What: /sys/devices/platform/msi-laptop-pf/turbo_mode
+Date: Nov 2012
+KernelVersion: 3.8
+Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
+Description:
+ Contains either 0 or 1 and indicates if turbo mode is turned
+ on. In turbo mode power LED is orange and processor is
+ overclocked. Turbo mode is available only if charging. It is
+ only possible to toggle turbo mode state by pressing Fn+F10,
+ and there is a few seconds cooldown between subsequent toggles.
+ If user presses Fn+F10 too frequent, turbo mode state is not
+ changed.
+
+What: /sys/devices/platform/msi-laptop-pf/eco_mode
+Date: Nov 2012
+KernelVersion: 3.8
+Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
+Description:
+ Contains either 0 or 1 and indicates if ECO mode is turned on.
+ In ECO mode power LED is green and userspace should do some
+ powersaving actions. ECO mode is available only on battery
+ power. ECO mode can only be toggled by pressing Fn+F10.
+
+What: /sys/devices/platform/msi-laptop-pf/turbo_cooldown
+Date: Nov 2012
+KernelVersion: 3.8
+Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
+Description:
+ Contains value in range 0..3:
+ * 0 -> Turbo mode is off
+ * 1 -> Turbo mode is on, cannot be turned off yet
+ * 2 -> Turbo mode is off, cannot be turned on yet
+ * 3 -> Turbo mode is on
+
+What: /sys/devices/platform/msi-laptop-pf/auto_fan
+Date: Nov 2012
+KernelVersion: 3.8
+Contact: "Maxim Mikityanskiy <maxtram95@gmail.com>"
+Description:
+ Contains either 0 or 1 and indicates if fan speed is controlled
+ automatically (1) or fan runs at maximal speed (0). Can be
+ toggled in software.
+
diff --git a/Documentation/ABI/testing/sysfs-platform-tahvo-usb b/Documentation/ABI/testing/sysfs-platform-tahvo-usb
new file mode 100644
index 000000000..f6e20ce4b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-tahvo-usb
@@ -0,0 +1,16 @@
+What: /sys/bus/platform/devices/tahvo-usb/otg_mode
+Date: December 2013
+Contact: Aaro Koskinen <aaro.koskinen@iki.fi>
+Description:
+ Set or read the current OTG mode. Valid values are "host" and
+ "peripheral".
+
+ Reading: returns the current mode.
+
+What: /sys/bus/platform/devices/tahvo-usb/vbus
+Date: December 2013
+Contact: Aaro Koskinen <aaro.koskinen@iki.fi>
+Description:
+ Read the current VBUS state.
+
+ Reading: returns "on" or "off".
diff --git a/Documentation/ABI/testing/sysfs-platform-ts5500 b/Documentation/ABI/testing/sysfs-platform-ts5500
new file mode 100644
index 000000000..e685957ca
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-ts5500
@@ -0,0 +1,54 @@
+What: /sys/devices/platform/ts5500/adc
+Date: January 2013
+KernelVersion: 3.7
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Indicates the presence of an A/D Converter. If it is present,
+ it will display "1", otherwise "0".
+
+What: /sys/devices/platform/ts5500/ereset
+Date: January 2013
+KernelVersion: 3.7
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Indicates the presence of an external reset. If it is present,
+ it will display "1", otherwise "0".
+
+What: /sys/devices/platform/ts5500/id
+Date: January 2013
+KernelVersion: 3.7
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Product ID of the TS board. TS-5500 ID is 0x60.
+
+What: /sys/devices/platform/ts5500/jumpers
+Date: January 2013
+KernelVersion: 3.7
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Bitfield showing the jumpers' state. If a jumper is present,
+ the corresponding bit is set. For instance, 0x0e means jumpers
+ 2, 3 and 4 are set.
+
+What: /sys/devices/platform/ts5500/name
+Date: July 2014
+KernelVersion: 3.16
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Model name of the TS board, e.g. "TS-5500".
+
+What: /sys/devices/platform/ts5500/rs485
+Date: January 2013
+KernelVersion: 3.7
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Indicates the presence of the RS485 option. If it is present,
+ it will display "1", otherwise "0".
+
+What: /sys/devices/platform/ts5500/sram
+Date: January 2013
+KernelVersion: 3.7
+Contact: "Savoir-faire Linux Inc." <kernel@savoirfairelinux.com>
+Description:
+ Indicates the presence of the SRAM option. If it is present,
+ it will display "1", otherwise "0".
diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
new file mode 100644
index 000000000..f45518163
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-power
@@ -0,0 +1,258 @@
+What: /sys/power/
+Date: August 2006
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power directory will contain files that will
+ provide a unified interface to the power management
+ subsystem.
+
+What: /sys/power/state
+Date: May 2014
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/state file controls system sleep states.
+ Reading from this file returns the available sleep state
+ labels, which may be "mem", "standby", "freeze" and "disk"
+ (hibernation). The meanings of the first three labels depend on
+ the relative_sleep_states command line argument as follows:
+ 1) relative_sleep_states = 1
+ "mem", "standby", "freeze" represent non-hibernation sleep
+ states from the deepest ("mem", always present) to the
+ shallowest ("freeze"). "standby" and "freeze" may or may
+ not be present depending on the capabilities of the
+ platform. "freeze" can only be present if "standby" is
+ present.
+ 2) relative_sleep_states = 0 (default)
+ "mem" - "suspend-to-RAM", present if supported.
+ "standby" - "power-on suspend", present if supported.
+ "freeze" - "suspend-to-idle", always present.
+
+ Writing to this file one of these strings causes the system to
+ transition into the corresponding state, if available. See
+ Documentation/power/states.txt for a description of what
+ "suspend-to-RAM", "power-on suspend" and "suspend-to-idle" mean.
+
+What: /sys/power/disk
+Date: September 2006
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/disk file controls the operating mode of the
+ suspend-to-disk mechanism. Reading from this file returns
+ the name of the method by which the system will be put to
+ sleep on the next suspend. There are four methods supported:
+ 'firmware' - means that the memory image will be saved to disk
+ by some firmware, in which case we also assume that the
+ firmware will handle the system suspend.
+ 'platform' - the memory image will be saved by the kernel and
+ the system will be put to sleep by the platform driver (e.g.
+ ACPI or other PM registers).
+ 'shutdown' - the memory image will be saved by the kernel and
+ the system will be powered off.
+ 'reboot' - the memory image will be saved by the kernel and
+ the system will be rebooted.
+
+ Additionally, /sys/power/disk can be used to turn on one of the
+ two testing modes of the suspend-to-disk mechanism: 'testproc'
+ or 'test'. If the suspend-to-disk mechanism is in the
+ 'testproc' mode, writing 'disk' to /sys/power/state will cause
+ the kernel to disable nonboot CPUs and freeze tasks, wait for 5
+ seconds, unfreeze tasks and enable nonboot CPUs. If it is in
+ the 'test' mode, writing 'disk' to /sys/power/state will cause
+ the kernel to disable nonboot CPUs and freeze tasks, shrink
+ memory, suspend devices, wait for 5 seconds, resume devices,
+ unfreeze tasks and enable nonboot CPUs. Then, we are able to
+ look in the log messages and work out, for example, which code
+ is being slow and which device drivers are misbehaving.
+
+ The suspend-to-disk method may be chosen by writing to this
+ file one of the accepted strings:
+
+ 'firmware'
+ 'platform'
+ 'shutdown'
+ 'reboot'
+ 'testproc'
+ 'test'
+
+ It will only change to 'firmware' or 'platform' if the system
+ supports that.
+
+What: /sys/power/image_size
+Date: August 2006
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/image_size file controls the size of the image
+ created by the suspend-to-disk mechanism. It can be written a
+ string representing a non-negative integer that will be used
+ as an upper limit of the image size, in bytes. The kernel's
+ suspend-to-disk code will do its best to ensure the image size
+ will not exceed this number. However, if it turns out to be
+ impossible, the kernel will try to suspend anyway using the
+ smallest image possible. In particular, if "0" is written to
+ this file, the suspend image will be as small as possible.
+
+ Reading from this file will display the current image size
+ limit, which is set to 500 MB by default.
+
+What: /sys/power/pm_trace
+Date: August 2006
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/pm_trace file controls the code which saves the
+ last PM event point in the RTC across reboots, so that you can
+ debug a machine that just hangs during suspend (or more
+ commonly, during resume). Namely, the RTC is only used to save
+ the last PM event point if this file contains '1'. Initially
+ it contains '0' which may be changed to '1' by writing a
+ string representing a nonzero integer into it.
+
+ To use this debugging feature you should attempt to suspend
+ the machine, then reboot it and run
+
+ dmesg -s 1000000 | grep 'hash matches'
+
+ If you do not get any matches (or they appear to be false
+ positives), it is possible that the last PM event point
+ referred to a device created by a loadable kernel module. In
+ this case cat /sys/power/pm_trace_dev_match (see below) after
+ your system is started up and the kernel modules are loaded.
+
+ CAUTION: Using it will cause your machine's real-time (CMOS)
+ clock to be set to a random invalid time after a resume.
+
+What; /sys/power/pm_trace_dev_match
+Date: October 2010
+Contact: James Hogan <james@albanarts.com>
+Description:
+ The /sys/power/pm_trace_dev_match file contains the name of the
+ device associated with the last PM event point saved in the RTC
+ across reboots when pm_trace has been used. More precisely it
+ contains the list of current devices (including those
+ registered by loadable kernel modules since boot) which match
+ the device hash in the RTC at boot, with a newline after each
+ one.
+
+ The advantage of this file over the hash matches printed to the
+ kernel log (see /sys/power/pm_trace), is that it includes
+ devices created after boot by loadable kernel modules.
+
+ Due to the small hash size necessary to fit in the RTC, it is
+ possible that more than one device matches the hash, in which
+ case further investigation is required to determine which
+ device is causing the problem. Note that genuine RTC clock
+ values (such as when pm_trace has not been used), can still
+ match a device and output it's name here.
+
+What: /sys/power/pm_async
+Date: January 2009
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/pm_async file controls the switch allowing the
+ user space to enable or disable asynchronous suspend and resume
+ of devices. If enabled, this feature will cause some device
+ drivers' suspend and resume callbacks to be executed in parallel
+ with each other and with the main suspend thread. It is enabled
+ if this file contains "1", which is the default. It may be
+ disabled by writing "0" to this file, in which case all devices
+ will be suspended and resumed synchronously.
+
+What: /sys/power/wakeup_count
+Date: July 2010
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/wakeup_count file allows user space to put the
+ system into a sleep state while taking into account the
+ concurrent arrival of wakeup events. Reading from it returns
+ the current number of registered wakeup events and it blocks if
+ some wakeup events are being processed at the time the file is
+ read from. Writing to it will only succeed if the current
+ number of wakeup events is equal to the written value and, if
+ successful, will make the kernel abort a subsequent transition
+ to a sleep state if any wakeup events are reported after the
+ write has returned.
+
+What: /sys/power/reserved_size
+Date: May 2011
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/reserved_size file allows user space to control
+ the amount of memory reserved for allocations made by device
+ drivers during the "device freeze" stage of hibernation. It can
+ be written a string representing a non-negative integer that
+ will be used as the amount of memory to reserve for allocations
+ made by device drivers' "freeze" callbacks, in bytes.
+
+ Reading from this file will display the current value, which is
+ set to 1 MB by default.
+
+What: /sys/power/autosleep
+Date: April 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/autosleep file can be written one of the strings
+ returned by reads from /sys/power/state. If that happens, a
+ work item attempting to trigger a transition of the system to
+ the sleep state represented by that string is queued up. This
+ attempt will only succeed if there are no active wakeup sources
+ in the system at that time. After every execution, regardless
+ of whether or not the attempt to put the system to sleep has
+ succeeded, the work item requeues itself until user space
+ writes "off" to /sys/power/autosleep.
+
+ Reading from this file causes the last string successfully
+ written to it to be returned.
+
+What: /sys/power/wake_lock
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/wake_lock file allows user space to create
+ wakeup source objects and activate them on demand (if one of
+ those wakeup sources is active, reads from the
+ /sys/power/wakeup_count file block or return false). When a
+ string without white space is written to /sys/power/wake_lock,
+ it will be assumed to represent a wakeup source name. If there
+ is a wakeup source object with that name, it will be activated
+ (unless active already). Otherwise, a new wakeup source object
+ will be registered, assigned the given name and activated.
+ If a string written to /sys/power/wake_lock contains white
+ space, the part of the string preceding the white space will be
+ regarded as a wakeup source name and handled as descrived above.
+ The other part of the string will be regarded as a timeout (in
+ nanoseconds) such that the wakeup source will be automatically
+ deactivated after it has expired. The timeout, if present, is
+ set regardless of the current state of the wakeup source object
+ in question.
+
+ Reads from this file return a string consisting of the names of
+ wakeup sources created with the help of it that are active at
+ the moment, separated with spaces.
+
+
+What: /sys/power/wake_unlock
+Date: February 2012
+Contact: Rafael J. Wysocki <rjw@rjwysocki.net>
+Description:
+ The /sys/power/wake_unlock file allows user space to deactivate
+ wakeup sources created with the help of /sys/power/wake_lock.
+ When a string is written to /sys/power/wake_unlock, it will be
+ assumed to represent the name of a wakeup source to deactivate.
+ If a wakeup source object of that name exists and is active at
+ the moment, it will be deactivated.
+
+ Reads from this file return a string consisting of the names of
+ wakeup sources created with the help of /sys/power/wake_lock
+ that are inactive at the moment, separated with spaces.
+
+What: /sys/power/pm_print_times
+Date: May 2012
+Contact: Sameer Nanda <snanda@chromium.org>
+Description:
+ The /sys/power/pm_print_times file allows user space to
+ control whether the time taken by devices to suspend and
+ resume is printed. These prints are useful for hunting down
+ devices that take too long to suspend or resume.
+
+ Writing a "1" enables this printing while writing a "0"
+ disables it. The default value is "0". Reading from this file
+ will display the current value.
diff --git a/Documentation/ABI/testing/sysfs-pps b/Documentation/ABI/testing/sysfs-pps
new file mode 100644
index 000000000..25028c7bc
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-pps
@@ -0,0 +1,73 @@
+What: /sys/class/pps/
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ directory will contain files and
+ directories that will provide a unified interface to
+ the PPS sources.
+
+What: /sys/class/pps/ppsX/
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/ directory is related to X-th
+ PPS source into the system. Each directory will
+ contain files to manage and control its PPS source.
+
+What: /sys/class/pps/ppsX/assert
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/assert file reports the assert events
+ and the assert sequence number of the X-th source in the form:
+
+ <secs>.<nsec>#<sequence>
+
+ If the source has no assert events the content of this file
+ is empty.
+
+What: /sys/class/pps/ppsX/clear
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/clear file reports the clear events
+ and the clear sequence number of the X-th source in the form:
+
+ <secs>.<nsec>#<sequence>
+
+ If the source has no clear events the content of this file
+ is empty.
+
+What: /sys/class/pps/ppsX/mode
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/mode file reports the functioning
+ mode of the X-th source in hexadecimal encoding.
+
+ Please, refer to linux/include/linux/pps.h for further
+ info.
+
+What: /sys/class/pps/ppsX/echo
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/echo file reports if the X-th does
+ or does not support an "echo" function.
+
+What: /sys/class/pps/ppsX/name
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/name file reports the name of the
+ X-th source.
+
+What: /sys/class/pps/ppsX/path
+Date: February 2008
+Contact: Rodolfo Giometti <giometti@linux.it>
+Description:
+ The /sys/class/pps/ppsX/path file reports the path name of
+ the device connected with the X-th source.
+
+ If the source is not connected with any device the content
+ of this file is empty.
diff --git a/Documentation/ABI/testing/sysfs-profiling b/Documentation/ABI/testing/sysfs-profiling
new file mode 100644
index 000000000..8a8e466eb
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-profiling
@@ -0,0 +1,13 @@
+What: /sys/kernel/profiling
+Date: September 2008
+Contact: Dave Hansen <dave@linux.vnet.ibm.com>
+Description:
+ /sys/kernel/profiling is the runtime equivalent
+ of the boot-time profile= option.
+
+ You can get the same effect running:
+
+ echo 2 > /sys/kernel/profiling
+
+ as you would by issuing profile=2 on the boot
+ command line.
diff --git a/Documentation/ABI/testing/sysfs-ptp b/Documentation/ABI/testing/sysfs-ptp
new file mode 100644
index 000000000..44806a678
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-ptp
@@ -0,0 +1,122 @@
+What: /sys/class/ptp/
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This directory contains files and directories
+ providing a standardized interface to the ancillary
+ features of PTP hardware clocks.
+
+What: /sys/class/ptp/ptpN/
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This directory contains the attributes of the Nth PTP
+ hardware clock registered into the PTP class driver
+ subsystem.
+
+What: /sys/class/ptp/ptpN/clock_name
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file contains the name of the PTP hardware clock
+ as a human readable string. The purpose of this
+ attribute is to provide the user with a "friendly
+ name" and to help distinguish PHY based devices from
+ MAC based ones. The string does not necessarily have
+ to be any kind of unique id.
+
+What: /sys/class/ptp/ptpN/max_adjustment
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file contains the PTP hardware clock's maximum
+ frequency adjustment value (a positive integer) in
+ parts per billion.
+
+What: /sys/class/ptp/ptpN/n_alarms
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file contains the number of periodic or one shot
+ alarms offer by the PTP hardware clock.
+
+What: /sys/class/ptp/ptpN/n_external_timestamps
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file contains the number of external timestamp
+ channels offered by the PTP hardware clock.
+
+What: /sys/class/ptp/ptpN/n_periodic_outputs
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file contains the number of programmable periodic
+ output channels offered by the PTP hardware clock.
+
+What: /sys/class/ptp/ptpN/n_pins
+Date: March 2014
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file contains the number of programmable pins
+ offered by the PTP hardware clock.
+
+What: /sys/class/ptp/ptpN/pins
+Date: March 2014
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This directory contains one file for each programmable
+ pin offered by the PTP hardware clock. The file name
+ is the hardware dependent pin name. Reading from this
+ file produces two numbers, the assigned function (see
+ the PTP_PF_ enumeration values in linux/ptp_clock.h)
+ and the channel number. The function and channel
+ assignment may be changed by two writing numbers into
+ the file.
+
+What: /sys/class/ptp/ptpN/pps_avaiable
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file indicates whether the PTP hardware clock
+ supports a Pulse Per Second to the host CPU. Reading
+ "1" means that the PPS is supported, while "0" means
+ not supported.
+
+What: /sys/class/ptp/ptpN/extts_enable
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This write-only file enables or disables external
+ timestamps. To enable external timestamps, write the
+ channel index followed by a "1" into the file.
+ To disable external timestamps, write the channel
+ index followed by a "0" into the file.
+
+What: /sys/class/ptp/ptpN/fifo
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This file provides timestamps on external events, in
+ the form of three integers: channel index, seconds,
+ and nanoseconds.
+
+What: /sys/class/ptp/ptpN/period
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This write-only file enables or disables periodic
+ outputs. To enable a periodic output, write five
+ integers into the file: channel index, start time
+ seconds, start time nanoseconds, period seconds, and
+ period nanoseconds. To disable a periodic output, set
+ all the seconds and nanoseconds values to zero.
+
+What: /sys/class/ptp/ptpN/pps_enable
+Date: September 2010
+Contact: Richard Cochran <richardcochran@gmail.com>
+Description:
+ This write-only file enables or disables delivery of
+ PPS events to the Linux PPS subsystem. To enable PPS
+ events, write a "1" into the file. To disable events,
+ write a "0" into the file.
diff --git a/Documentation/ABI/testing/sysfs-tty b/Documentation/ABI/testing/sysfs-tty
new file mode 100644
index 000000000..9eb3c2b6b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-tty
@@ -0,0 +1,156 @@
+What: /sys/class/tty/console/active
+Date: Nov 2010
+Contact: Kay Sievers <kay.sievers@vrfy.org>
+Description:
+ Shows the list of currently configured
+ console devices, like 'tty1 ttyS0'.
+ The last entry in the file is the active
+ device connected to /dev/console.
+ The file supports poll() to detect virtual
+ console switches.
+
+What: /sys/class/tty/tty0/active
+Date: Nov 2010
+Contact: Kay Sievers <kay.sievers@vrfy.org>
+Description:
+ Shows the currently active virtual console
+ device, like 'tty1'.
+ The file supports poll() to detect virtual
+ console switches.
+
+What: /sys/class/tty/ttyS0/uartclk
+Date: Sep 2012
+Contact: Tomas Hlavacek <tmshlvck@gmail.com>
+Description:
+ Shows the current uartclk value associated with the
+ UART port in serial_core, that is bound to TTY like ttyS0.
+ uartclk = 16 * baud_base
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/type
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Shows the current tty type for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/line
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Shows the current tty line number for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/port
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Shows the current tty port I/O address for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/irq
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Shows the current primary interrupt for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/flags
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the tty port status flags for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/xmit_fifo_size
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the transmit FIFO size for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/close_delay
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the closing delay time for this port in ms.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/closing_wait
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the close wait time for this port in ms.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/custom_divisor
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the custom divisor if any that is set on this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/io_type
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the I/O type that is to be used with the iomem base
+ address.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/iomem_base
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ The I/O memory base for this port.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/iomem_reg_shift
+Date: October 2012
+Contact: Alan Cox <alan@linux.intel.com>
+Description:
+ Show the register shift indicating the spacing to be used
+ for accesses on this iomem address.
+
+ These sysfs values expose the TIOCGSERIAL interface via
+ sysfs rather than via ioctls.
+
+What: /sys/class/tty/ttyS0/rx_trig_bytes
+Date: May 2014
+Contact: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
+Description:
+ Shows current RX interrupt trigger bytes or sets the
+ user specified value to change it for the FIFO buffer.
+ Users can show or set this value regardless of opening the
+ serial device file or not.
+
+ The RX trigger can be set one of four kinds of values for UART
+ serials. When users input a meaning less value to this I/F,
+ the RX trigger is changed to the nearest lower value for the
+ device specification. For example, when user sets 7bytes on
+ 16550A, which has 1/4/8/14 bytes trigger, the RX trigger is
+ automatically changed to 4 bytes.
diff --git a/Documentation/ABI/testing/sysfs-wusb_cbaf b/Documentation/ABI/testing/sysfs-wusb_cbaf
new file mode 100644
index 000000000..a99c5f86a
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-wusb_cbaf
@@ -0,0 +1,100 @@
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_*
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ Various files for managing Cable Based Association of
+ (wireless) USB devices.
+
+ The sequence of operations should be:
+
+ 1. Device is plugged in.
+
+ 2. The connection manager (CM) sees a device with CBA capability.
+ (the wusb_chid etc. files in /sys/devices/blah/OURDEVICE).
+
+ 3. The CM writes the host name, supported band groups,
+ and the CHID (host ID) into the wusb_host_name,
+ wusb_host_band_groups and wusb_chid files. These
+ get sent to the device and the CDID (if any) for
+ this host is requested.
+
+ 4. The CM can verify that the device's supported band
+ groups (wusb_device_band_groups) are compatible
+ with the host.
+
+ 5. The CM reads the wusb_cdid file.
+
+ 6. The CM looks it up its database.
+
+ - If it has a matching CHID,CDID entry, the device
+ has been authorized before and nothing further
+ needs to be done.
+
+ - If the CDID is zero (or the CM doesn't find a
+ matching CDID in its database), the device is
+ assumed to be not known. The CM may associate
+ the host with device by: writing a randomly
+ generated CDID to wusb_cdid and then a random CK
+ to wusb_ck (this uploads the new CC to the
+ device).
+
+ CMD may choose to prompt the user before
+ associating with a new device.
+
+ 7. Device is unplugged.
+
+ References:
+ [WUSB-AM] Association Models Supplement to the
+ Certified Wireless Universal Serial Bus
+ Specification, version 1.0.
+
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_chid
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The CHID of the host formatted as 16 space-separated
+ hex octets.
+
+ Writes fetches device's supported band groups and the
+ the CDID for any existing association with this host.
+
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_host_name
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ A friendly name for the host as a UTF-8 encoded string.
+
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_host_band_groups
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The band groups supported by the host, in the format
+ defined in [WUSB-AM].
+
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_device_band_groups
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The band groups supported by the device, in the format
+ defined in [WUSB-AM].
+
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_cdid
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ The device's CDID formatted as 16 space-separated hex
+ octets.
+
+What: /sys/bus/usb/drivers/wusb_cbaf/.../wusb_ck
+Date: August 2008
+KernelVersion: 2.6.27
+Contact: David Vrabel <david.vrabel@csr.com>
+Description:
+ Write 16 space-separated random, hex octets to
+ associate with the device.
diff --git a/Documentation/BUG-HUNTING b/Documentation/BUG-HUNTING
new file mode 100644
index 000000000..65022a87b
--- /dev/null
+++ b/Documentation/BUG-HUNTING
@@ -0,0 +1,246 @@
+Table of contents
+=================
+
+Last updated: 20 December 2005
+
+Contents
+========
+
+- Introduction
+- Devices not appearing
+- Finding patch that caused a bug
+-- Finding using git-bisect
+-- Finding it the old way
+- Fixing the bug
+
+Introduction
+============
+
+Always try the latest kernel from kernel.org and build from source. If you are
+not confident in doing that please report the bug to your distribution vendor
+instead of to a kernel developer.
+
+Finding bugs is not always easy. Have a go though. If you can't find it don't
+give up. Report as much as you have found to the relevant maintainer. See
+MAINTAINERS for who that is for the subsystem you have worked on.
+
+Before you submit a bug report read REPORTING-BUGS.
+
+Devices not appearing
+=====================
+
+Often this is caused by udev. Check that first before blaming it on the
+kernel.
+
+Finding patch that caused a bug
+===============================
+
+
+
+Finding using git-bisect
+------------------------
+
+Using the provided tools with git makes finding bugs easy provided the bug is
+reproducible.
+
+Steps to do it:
+- start using git for the kernel source
+- read the man page for git-bisect
+- have fun
+
+Finding it the old way
+----------------------
+
+[Sat Mar 2 10:32:33 PST 1996 KERNEL_BUG-HOWTO lm@sgi.com (Larry McVoy)]
+
+This is how to track down a bug if you know nothing about kernel hacking.
+It's a brute force approach but it works pretty well.
+
+You need:
+
+ . A reproducible bug - it has to happen predictably (sorry)
+ . All the kernel tar files from a revision that worked to the
+ revision that doesn't
+
+You will then do:
+
+ . Rebuild a revision that you believe works, install, and verify that.
+ . Do a binary search over the kernels to figure out which one
+ introduced the bug. I.e., suppose 1.3.28 didn't have the bug, but
+ you know that 1.3.69 does. Pick a kernel in the middle and build
+ that, like 1.3.50. Build & test; if it works, pick the mid point
+ between .50 and .69, else the mid point between .28 and .50.
+ . You'll narrow it down to the kernel that introduced the bug. You
+ can probably do better than this but it gets tricky.
+
+ . Narrow it down to a subdirectory
+
+ - Copy kernel that works into "test". Let's say that 3.62 works,
+ but 3.63 doesn't. So you diff -r those two kernels and come
+ up with a list of directories that changed. For each of those
+ directories:
+
+ Copy the non-working directory next to the working directory
+ as "dir.63".
+ One directory at time, try moving the working directory to
+ "dir.62" and mv dir.63 dir"time, try
+
+ mv dir dir.62
+ mv dir.63 dir
+ find dir -name '*.[oa]' -print | xargs rm -f
+
+ And then rebuild and retest. Assuming that all related
+ changes were contained in the sub directory, this should
+ isolate the change to a directory.
+
+ Problems: changes in header files may have occurred; I've
+ found in my case that they were self explanatory - you may
+ or may not want to give up when that happens.
+
+ . Narrow it down to a file
+
+ - You can apply the same technique to each file in the directory,
+ hoping that the changes in that file are self contained.
+
+ . Narrow it down to a routine
+
+ - You can take the old file and the new file and manually create
+ a merged file that has
+
+ #ifdef VER62
+ routine()
+ {
+ ...
+ }
+ #else
+ routine()
+ {
+ ...
+ }
+ #endif
+
+ And then walk through that file, one routine at a time and
+ prefix it with
+
+ #define VER62
+ /* both routines here */
+ #undef VER62
+
+ Then recompile, retest, move the ifdefs until you find the one
+ that makes the difference.
+
+Finally, you take all the info that you have, kernel revisions, bug
+description, the extent to which you have narrowed it down, and pass
+that off to whomever you believe is the maintainer of that section.
+A post to linux.dev.kernel isn't such a bad idea if you've done some
+work to narrow it down.
+
+If you get it down to a routine, you'll probably get a fix in 24 hours.
+
+My apologies to Linus and the other kernel hackers for describing this
+brute force approach, it's hardly what a kernel hacker would do. However,
+it does work and it lets non-hackers help fix bugs. And it is cool
+because Linux snapshots will let you do this - something that you can't
+do with vendor supplied releases.
+
+Fixing the bug
+==============
+
+Nobody is going to tell you how to fix bugs. Seriously. You need to work it
+out. But below are some hints on how to use the tools.
+
+To debug a kernel, use objdump and look for the hex offset from the crash
+output to find the valid line of code/assembler. Without debug symbols, you
+will see the assembler code for the routine shown, but if your kernel has
+debug symbols the C code will also be available. (Debug symbols can be enabled
+in the kernel hacking menu of the menu configuration.) For example:
+
+ objdump -r -S -l --disassemble net/dccp/ipv4.o
+
+NB.: you need to be at the top level of the kernel tree for this to pick up
+your C files.
+
+If you don't have access to the code you can also debug on some crash dumps
+e.g. crash dump output as shown by Dave Miller.
+
+> EIP is at ip_queue_xmit+0x14/0x4c0
+> ...
+> Code: 44 24 04 e8 6f 05 00 00 e9 e8 fe ff ff 8d 76 00 8d bc 27 00 00
+> 00 00 55 57 56 53 81 ec bc 00 00 00 8b ac 24 d0 00 00 00 8b 5d 08
+> <8b> 83 3c 01 00 00 89 44 24 14 8b 45 28 85 c0 89 44 24 18 0f 85
+>
+> Put the bytes into a "foo.s" file like this:
+>
+> .text
+> .globl foo
+> foo:
+> .byte .... /* bytes from Code: part of OOPS dump */
+>
+> Compile it with "gcc -c -o foo.o foo.s" then look at the output of
+> "objdump --disassemble foo.o".
+>
+> Output:
+>
+> ip_queue_xmit:
+> push %ebp
+> push %edi
+> push %esi
+> push %ebx
+> sub $0xbc, %esp
+> mov 0xd0(%esp), %ebp ! %ebp = arg0 (skb)
+> mov 0x8(%ebp), %ebx ! %ebx = skb->sk
+> mov 0x13c(%ebx), %eax ! %eax = inet_sk(sk)->opt
+
+In addition, you can use GDB to figure out the exact file and line
+number of the OOPS from the vmlinux file. If you have
+CONFIG_DEBUG_INFO enabled, you can simply copy the EIP value from the
+OOPS:
+
+ EIP: 0060:[<c021e50e>] Not tainted VLI
+
+And use GDB to translate that to human-readable form:
+
+ gdb vmlinux
+ (gdb) l *0xc021e50e
+
+If you don't have CONFIG_DEBUG_INFO enabled, you use the function
+offset from the OOPS:
+
+ EIP is at vt_ioctl+0xda8/0x1482
+
+And recompile the kernel with CONFIG_DEBUG_INFO enabled:
+
+ make vmlinux
+ gdb vmlinux
+ (gdb) p vt_ioctl
+ (gdb) l *(0x<address of vt_ioctl> + 0xda8)
+or, as one command
+ (gdb) l *(vt_ioctl + 0xda8)
+
+If you have a call trace, such as :-
+>Call Trace:
+> [<ffffffff8802c8e9>] :jbd:log_wait_commit+0xa3/0xf5
+> [<ffffffff810482d9>] autoremove_wake_function+0x0/0x2e
+> [<ffffffff8802770b>] :jbd:journal_stop+0x1be/0x1ee
+> ...
+this shows the problem in the :jbd: module. You can load that module in gdb
+and list the relevant code.
+ gdb fs/jbd/jbd.ko
+ (gdb) p log_wait_commit
+ (gdb) l *(0x<address> + 0xa3)
+or
+ (gdb) l *(log_wait_commit + 0xa3)
+
+
+Another very useful option of the Kernel Hacking section in menuconfig is
+Debug memory allocations. This will help you see whether data has been
+initialised and not set before use etc. To see the values that get assigned
+with this look at mm/slab.c and search for POISON_INUSE. When using this an
+Oops will often show the poisoned data instead of zero which is the default.
+
+Once you have worked out a fix please submit it upstream. After all open
+source is about sharing what you do and don't you want to be recognised for
+your genius?
+
+Please do read Documentation/SubmittingPatches though to help your code get
+accepted.
diff --git a/Documentation/Changes b/Documentation/Changes
new file mode 100644
index 000000000..646cdaa6e
--- /dev/null
+++ b/Documentation/Changes
@@ -0,0 +1,395 @@
+Intro
+=====
+
+This document is designed to provide a list of the minimum levels of
+software necessary to run the 3.0 kernels.
+
+This document is originally based on my "Changes" file for 2.0.x kernels
+and therefore owes credit to the same people as that file (Jared Mauch,
+Axel Boldt, Alessandro Sigala, and countless other users all over the
+'net).
+
+Current Minimal Requirements
+============================
+
+Upgrade to at *least* these software revisions before thinking you've
+encountered a bug! If you're unsure what version you're currently
+running, the suggested command should tell you.
+
+Again, keep in mind that this list assumes you are already functionally
+running a Linux kernel. Also, not all tools are necessary on all
+systems; obviously, if you don't have any ISDN hardware, for example,
+you probably needn't concern yourself with isdn4k-utils.
+
+o GNU C 3.2 # gcc --version
+o GNU make 3.80 # make --version
+o binutils 2.12 # ld -v
+o util-linux 2.10o # fdformat --version
+o module-init-tools 0.9.10 # depmod -V
+o e2fsprogs 1.41.4 # e2fsck -V
+o jfsutils 1.1.3 # fsck.jfs -V
+o reiserfsprogs 3.6.3 # reiserfsck -V
+o xfsprogs 2.6.0 # xfs_db -V
+o squashfs-tools 4.0 # mksquashfs -version
+o btrfs-progs 0.18 # btrfsck
+o pcmciautils 004 # pccardctl -V
+o quota-tools 3.09 # quota -V
+o PPP 2.4.0 # pppd --version
+o isdn4k-utils 3.1pre1 # isdnctrl 2>&1|grep version
+o nfs-utils 1.0.5 # showmount --version
+o procps 3.2.0 # ps --version
+o oprofile 0.9 # oprofiled --version
+o udev 081 # udevd --version
+o grub 0.93 # grub --version || grub-install --version
+o mcelog 0.6 # mcelog --version
+o iptables 1.4.2 # iptables -V
+
+
+Kernel compilation
+==================
+
+GCC
+---
+
+The gcc version requirements may vary depending on the type of CPU in your
+computer.
+
+Make
+----
+
+You will need GNU make 3.80 or later to build the kernel.
+
+Binutils
+--------
+
+Linux on IA-32 has recently switched from using as86 to using gas for
+assembling the 16-bit boot code, removing the need for as86 to compile
+your kernel. This change does, however, mean that you need a recent
+release of binutils.
+
+Perl
+----
+
+You will need perl 5 and the following modules: Getopt::Long, Getopt::Std,
+File::Basename, and File::Find to build the kernel.
+
+BC
+--
+
+You will need bc to build kernels 3.10 and higher
+
+
+System utilities
+================
+
+Architectural changes
+---------------------
+
+DevFS has been obsoleted in favour of udev
+(http://www.kernel.org/pub/linux/utils/kernel/hotplug/)
+
+32-bit UID support is now in place. Have fun!
+
+Linux documentation for functions is transitioning to inline
+documentation via specially-formatted comments near their
+definitions in the source. These comments can be combined with the
+SGML templates in the Documentation/DocBook directory to make DocBook
+files, which can then be converted by DocBook stylesheets to PostScript,
+HTML, PDF files, and several other formats. In order to convert from
+DocBook format to a format of your choice, you'll need to install Jade as
+well as the desired DocBook stylesheets.
+
+Util-linux
+----------
+
+New versions of util-linux provide *fdisk support for larger disks,
+support new options to mount, recognize more supported partition
+types, have a fdformat which works with 2.4 kernels, and similar goodies.
+You'll probably want to upgrade.
+
+Ksymoops
+--------
+
+If the unthinkable happens and your kernel oopses, you may need the
+ksymoops tool to decode it, but in most cases you don't.
+It is generally preferred to build the kernel with CONFIG_KALLSYMS so
+that it produces readable dumps that can be used as-is (this also
+produces better output than ksymoops). If for some reason your kernel
+is not build with CONFIG_KALLSYMS and you have no way to rebuild and
+reproduce the Oops with that option, then you can still decode that Oops
+with ksymoops.
+
+Module-Init-Tools
+-----------------
+
+A new module loader is now in the kernel that requires module-init-tools
+to use. It is backward compatible with the 2.4.x series kernels.
+
+Mkinitrd
+--------
+
+These changes to the /lib/modules file tree layout also require that
+mkinitrd be upgraded.
+
+E2fsprogs
+---------
+
+The latest version of e2fsprogs fixes several bugs in fsck and
+debugfs. Obviously, it's a good idea to upgrade.
+
+JFSutils
+--------
+
+The jfsutils package contains the utilities for the file system.
+The following utilities are available:
+o fsck.jfs - initiate replay of the transaction log, and check
+ and repair a JFS formatted partition.
+o mkfs.jfs - create a JFS formatted partition.
+o other file system utilities are also available in this package.
+
+Reiserfsprogs
+-------------
+
+The reiserfsprogs package should be used for reiserfs-3.6.x
+(Linux kernels 2.4.x). It is a combined package and contains working
+versions of mkreiserfs, resize_reiserfs, debugreiserfs and
+reiserfsck. These utils work on both i386 and alpha platforms.
+
+Xfsprogs
+--------
+
+The latest version of xfsprogs contains mkfs.xfs, xfs_db, and the
+xfs_repair utilities, among others, for the XFS filesystem. It is
+architecture independent and any version from 2.0.0 onward should
+work correctly with this version of the XFS kernel code (2.6.0 or
+later is recommended, due to some significant improvements).
+
+PCMCIAutils
+-----------
+
+PCMCIAutils replaces pcmcia-cs. It properly sets up
+PCMCIA sockets at system startup and loads the appropriate modules
+for 16-bit PCMCIA devices if the kernel is modularized and the hotplug
+subsystem is used.
+
+Quota-tools
+-----------
+
+Support for 32 bit uid's and gid's is required if you want to use
+the newer version 2 quota format. Quota-tools version 3.07 and
+newer has this support. Use the recommended version or newer
+from the table above.
+
+Intel IA32 microcode
+--------------------
+
+A driver has been added to allow updating of Intel IA32 microcode,
+accessible as a normal (misc) character device. If you are not using
+udev you may need to:
+
+mkdir /dev/cpu
+mknod /dev/cpu/microcode c 10 184
+chmod 0644 /dev/cpu/microcode
+
+as root before you can use this. You'll probably also want to
+get the user-space microcode_ctl utility to use with this.
+
+udev
+----
+udev is a userspace application for populating /dev dynamically with
+only entries for devices actually present. udev replaces the basic
+functionality of devfs, while allowing persistent device naming for
+devices.
+
+FUSE
+----
+
+Needs libfuse 2.4.0 or later. Absolute minimum is 2.3.0 but mount
+options 'direct_io' and 'kernel_cache' won't work.
+
+Networking
+==========
+
+General changes
+---------------
+
+If you have advanced network configuration needs, you should probably
+consider using the network tools from ip-route2.
+
+Packet Filter / NAT
+-------------------
+The packet filtering and NAT code uses the same tools like the previous 2.4.x
+kernel series (iptables). It still includes backwards-compatibility modules
+for 2.2.x-style ipchains and 2.0.x-style ipfwadm.
+
+PPP
+---
+
+The PPP driver has been restructured to support multilink and to
+enable it to operate over diverse media layers. If you use PPP,
+upgrade pppd to at least 2.4.0.
+
+If you are not using udev, you must have the device file /dev/ppp
+which can be made by:
+
+mknod /dev/ppp c 108 0
+
+as root.
+
+Isdn4k-utils
+------------
+
+Due to changes in the length of the phone number field, isdn4k-utils
+needs to be recompiled or (preferably) upgraded.
+
+NFS-utils
+---------
+
+In ancient (2.4 and earlier) kernels, the nfs server needed to know
+about any client that expected to be able to access files via NFS. This
+information would be given to the kernel by "mountd" when the client
+mounted the filesystem, or by "exportfs" at system startup. exportfs
+would take information about active clients from /var/lib/nfs/rmtab.
+
+This approach is quite fragile as it depends on rmtab being correct
+which is not always easy, particularly when trying to implement
+fail-over. Even when the system is working well, rmtab suffers from
+getting lots of old entries that never get removed.
+
+With modern kernels we have the option of having the kernel tell mountd
+when it gets a request from an unknown host, and mountd can give
+appropriate export information to the kernel. This removes the
+dependency on rmtab and means that the kernel only needs to know about
+currently active clients.
+
+To enable this new functionality, you need to:
+
+ mount -t nfsd nfsd /proc/fs/nfsd
+
+before running exportfs or mountd. It is recommended that all NFS
+services be protected from the internet-at-large by a firewall where
+that is possible.
+
+mcelog
+------
+
+On x86 kernels the mcelog utility is needed to process and log machine check
+events when CONFIG_X86_MCE is enabled. Machine check events are errors reported
+by the CPU. Processing them is strongly encouraged.
+
+Getting updated software
+========================
+
+Kernel compilation
+******************
+
+gcc
+---
+o <ftp://ftp.gnu.org/gnu/gcc/>
+
+Make
+----
+o <ftp://ftp.gnu.org/gnu/make/>
+
+Binutils
+--------
+o <ftp://ftp.kernel.org/pub/linux/devel/binutils/>
+
+System utilities
+****************
+
+Util-linux
+----------
+o <ftp://ftp.kernel.org/pub/linux/utils/util-linux/>
+
+Ksymoops
+--------
+o <ftp://ftp.kernel.org/pub/linux/utils/kernel/ksymoops/v2.4/>
+
+Module-Init-Tools
+-----------------
+o <ftp://ftp.kernel.org/pub/linux/kernel/people/rusty/modules/>
+
+Mkinitrd
+--------
+o <https://code.launchpad.net/initrd-tools/main>
+
+E2fsprogs
+---------
+o <http://prdownloads.sourceforge.net/e2fsprogs/e2fsprogs-1.29.tar.gz>
+
+JFSutils
+--------
+o <http://jfs.sourceforge.net/>
+
+Reiserfsprogs
+-------------
+o <http://www.kernel.org/pub/linux/utils/fs/reiserfs/>
+
+Xfsprogs
+--------
+o <ftp://oss.sgi.com/projects/xfs/>
+
+Pcmciautils
+-----------
+o <ftp://ftp.kernel.org/pub/linux/utils/kernel/pcmcia/>
+
+Quota-tools
+----------
+o <http://sourceforge.net/projects/linuxquota/>
+
+DocBook Stylesheets
+-------------------
+o <http://sourceforge.net/projects/docbook/files/docbook-dsssl/>
+
+XMLTO XSLT Frontend
+-------------------
+o <http://cyberelk.net/tim/xmlto/>
+
+Intel P6 microcode
+------------------
+o <https://downloadcenter.intel.com/>
+
+udev
+----
+o <http://www.freedesktop.org/software/systemd/man/udev.html>
+
+FUSE
+----
+o <http://sourceforge.net/projects/fuse>
+
+mcelog
+------
+o <http://www.mcelog.org/>
+
+Networking
+**********
+
+PPP
+---
+o <ftp://ftp.samba.org/pub/ppp/>
+
+Isdn4k-utils
+------------
+o <ftp://ftp.isdn4linux.de/pub/isdn4linux/utils/>
+
+NFS-utils
+---------
+o <http://sourceforge.net/project/showfiles.php?group_id=14>
+
+Iptables
+--------
+o <http://www.iptables.org/downloads.html>
+
+Ip-route2
+---------
+o <https://www.kernel.org/pub/linux/utils/net/iproute2/>
+
+OProfile
+--------
+o <http://oprofile.sf.net/download/>
+
+NFS-Utils
+---------
+o <http://nfs.sourceforge.net/>
+
diff --git a/Documentation/CodeOfConflict b/Documentation/CodeOfConflict
new file mode 100644
index 000000000..1684d0b4e
--- /dev/null
+++ b/Documentation/CodeOfConflict
@@ -0,0 +1,27 @@
+Code of Conflict
+----------------
+
+The Linux kernel development effort is a very personal process compared
+to "traditional" ways of developing software. Your code and ideas
+behind it will be carefully reviewed, often resulting in critique and
+criticism. The review will almost always require improvements to the
+code before it can be included in the kernel. Know that this happens
+because everyone involved wants to see the best possible solution for
+the overall success of Linux. This development process has been proven
+to create the most robust operating system kernel ever, and we do not
+want to do anything to cause the quality of submission and eventual
+result to ever decrease.
+
+If however, anyone feels personally abused, threatened, or otherwise
+uncomfortable due to this process, that is not acceptable. If so,
+please contact the Linux Foundation's Technical Advisory Board at
+<tab@lists.linux-foundation.org>, or the individual members, and they
+will work to resolve the issue to the best of their ability. For more
+information on who is on the Technical Advisory Board and what their
+role is, please see:
+ http://www.linuxfoundation.org/programs/advisory-councils/tab
+
+As a reviewer of code, please strive to keep things civil and focused on
+the technical issues involved. We are all humans, and frustrations can
+be high on both sides of the process. Try to keep in mind the immortal
+words of Bill and Ted, "Be excellent to each other."
diff --git a/Documentation/CodingStyle b/Documentation/CodingStyle
new file mode 100644
index 000000000..f4b78eafd
--- /dev/null
+++ b/Documentation/CodingStyle
@@ -0,0 +1,948 @@
+
+ Linux kernel coding style
+
+This is a short document describing the preferred coding style for the
+linux kernel. Coding style is very personal, and I won't _force_ my
+views on anybody, but this is what goes for anything that I have to be
+able to maintain, and I'd prefer it for most other things too. Please
+at least consider the points made here.
+
+First off, I'd suggest printing out a copy of the GNU coding standards,
+and NOT read it. Burn them, it's a great symbolic gesture.
+
+Anyway, here goes:
+
+
+ Chapter 1: Indentation
+
+Tabs are 8 characters, and thus indentations are also 8 characters.
+There are heretic movements that try to make indentations 4 (or even 2!)
+characters deep, and that is akin to trying to define the value of PI to
+be 3.
+
+Rationale: The whole idea behind indentation is to clearly define where
+a block of control starts and ends. Especially when you've been looking
+at your screen for 20 straight hours, you'll find it a lot easier to see
+how the indentation works if you have large indentations.
+
+Now, some people will claim that having 8-character indentations makes
+the code move too far to the right, and makes it hard to read on a
+80-character terminal screen. The answer to that is that if you need
+more than 3 levels of indentation, you're screwed anyway, and should fix
+your program.
+
+In short, 8-char indents make things easier to read, and have the added
+benefit of warning you when you're nesting your functions too deep.
+Heed that warning.
+
+The preferred way to ease multiple indentation levels in a switch statement is
+to align the "switch" and its subordinate "case" labels in the same column
+instead of "double-indenting" the "case" labels. E.g.:
+
+ switch (suffix) {
+ case 'G':
+ case 'g':
+ mem <<= 30;
+ break;
+ case 'M':
+ case 'm':
+ mem <<= 20;
+ break;
+ case 'K':
+ case 'k':
+ mem <<= 10;
+ /* fall through */
+ default:
+ break;
+ }
+
+Don't put multiple statements on a single line unless you have
+something to hide:
+
+ if (condition) do_this;
+ do_something_everytime;
+
+Don't put multiple assignments on a single line either. Kernel coding style
+is super simple. Avoid tricky expressions.
+
+Outside of comments, documentation and except in Kconfig, spaces are never
+used for indentation, and the above example is deliberately broken.
+
+Get a decent editor and don't leave whitespace at the end of lines.
+
+
+ Chapter 2: Breaking long lines and strings
+
+Coding style is all about readability and maintainability using commonly
+available tools.
+
+The limit on the length of lines is 80 columns and this is a strongly
+preferred limit.
+
+Statements longer than 80 columns will be broken into sensible chunks, unless
+exceeding 80 columns significantly increases readability and does not hide
+information. Descendants are always substantially shorter than the parent and
+are placed substantially to the right. The same applies to function headers
+with a long argument list. However, never break user-visible strings such as
+printk messages, because that breaks the ability to grep for them.
+
+
+ Chapter 3: Placing Braces and Spaces
+
+The other issue that always comes up in C styling is the placement of
+braces. Unlike the indent size, there are few technical reasons to
+choose one placement strategy over the other, but the preferred way, as
+shown to us by the prophets Kernighan and Ritchie, is to put the opening
+brace last on the line, and put the closing brace first, thusly:
+
+ if (x is true) {
+ we do y
+ }
+
+This applies to all non-function statement blocks (if, switch, for,
+while, do). E.g.:
+
+ switch (action) {
+ case KOBJ_ADD:
+ return "add";
+ case KOBJ_REMOVE:
+ return "remove";
+ case KOBJ_CHANGE:
+ return "change";
+ default:
+ return NULL;
+ }
+
+However, there is one special case, namely functions: they have the
+opening brace at the beginning of the next line, thus:
+
+ int function(int x)
+ {
+ body of function
+ }
+
+Heretic people all over the world have claimed that this inconsistency
+is ... well ... inconsistent, but all right-thinking people know that
+(a) K&R are _right_ and (b) K&R are right. Besides, functions are
+special anyway (you can't nest them in C).
+
+Note that the closing brace is empty on a line of its own, _except_ in
+the cases where it is followed by a continuation of the same statement,
+ie a "while" in a do-statement or an "else" in an if-statement, like
+this:
+
+ do {
+ body of do-loop
+ } while (condition);
+
+and
+
+ if (x == y) {
+ ..
+ } else if (x > y) {
+ ...
+ } else {
+ ....
+ }
+
+Rationale: K&R.
+
+Also, note that this brace-placement also minimizes the number of empty
+(or almost empty) lines, without any loss of readability. Thus, as the
+supply of new-lines on your screen is not a renewable resource (think
+25-line terminal screens here), you have more empty lines to put
+comments on.
+
+Do not unnecessarily use braces where a single statement will do.
+
+ if (condition)
+ action();
+
+and
+
+ if (condition)
+ do_this();
+ else
+ do_that();
+
+This does not apply if only one branch of a conditional statement is a single
+statement; in the latter case use braces in both branches:
+
+ if (condition) {
+ do_this();
+ do_that();
+ } else {
+ otherwise();
+ }
+
+ 3.1: Spaces
+
+Linux kernel style for use of spaces depends (mostly) on
+function-versus-keyword usage. Use a space after (most) keywords. The
+notable exceptions are sizeof, typeof, alignof, and __attribute__, which look
+somewhat like functions (and are usually used with parentheses in Linux,
+although they are not required in the language, as in: "sizeof info" after
+"struct fileinfo info;" is declared).
+
+So use a space after these keywords:
+
+ if, switch, case, for, do, while
+
+but not with sizeof, typeof, alignof, or __attribute__. E.g.,
+
+ s = sizeof(struct file);
+
+Do not add spaces around (inside) parenthesized expressions. This example is
+*bad*:
+
+ s = sizeof( struct file );
+
+When declaring pointer data or a function that returns a pointer type, the
+preferred use of '*' is adjacent to the data name or function name and not
+adjacent to the type name. Examples:
+
+ char *linux_banner;
+ unsigned long long memparse(char *ptr, char **retptr);
+ char *match_strdup(substring_t *s);
+
+Use one space around (on each side of) most binary and ternary operators,
+such as any of these:
+
+ = + - < > * / % | & ^ <= >= == != ? :
+
+but no space after unary operators:
+
+ & * + - ~ ! sizeof typeof alignof __attribute__ defined
+
+no space before the postfix increment & decrement unary operators:
+
+ ++ --
+
+no space after the prefix increment & decrement unary operators:
+
+ ++ --
+
+and no space around the '.' and "->" structure member operators.
+
+Do not leave trailing whitespace at the ends of lines. Some editors with
+"smart" indentation will insert whitespace at the beginning of new lines as
+appropriate, so you can start typing the next line of code right away.
+However, some such editors do not remove the whitespace if you end up not
+putting a line of code there, such as if you leave a blank line. As a result,
+you end up with lines containing trailing whitespace.
+
+Git will warn you about patches that introduce trailing whitespace, and can
+optionally strip the trailing whitespace for you; however, if applying a series
+of patches, this may make later patches in the series fail by changing their
+context lines.
+
+
+ Chapter 4: Naming
+
+C is a Spartan language, and so should your naming be. Unlike Modula-2
+and Pascal programmers, C programmers do not use cute names like
+ThisVariableIsATemporaryCounter. A C programmer would call that
+variable "tmp", which is much easier to write, and not the least more
+difficult to understand.
+
+HOWEVER, while mixed-case names are frowned upon, descriptive names for
+global variables are a must. To call a global function "foo" is a
+shooting offense.
+
+GLOBAL variables (to be used only if you _really_ need them) need to
+have descriptive names, as do global functions. If you have a function
+that counts the number of active users, you should call that
+"count_active_users()" or similar, you should _not_ call it "cntusr()".
+
+Encoding the type of a function into the name (so-called Hungarian
+notation) is brain damaged - the compiler knows the types anyway and can
+check those, and it only confuses the programmer. No wonder MicroSoft
+makes buggy programs.
+
+LOCAL variable names should be short, and to the point. If you have
+some random integer loop counter, it should probably be called "i".
+Calling it "loop_counter" is non-productive, if there is no chance of it
+being mis-understood. Similarly, "tmp" can be just about any type of
+variable that is used to hold a temporary value.
+
+If you are afraid to mix up your local variable names, you have another
+problem, which is called the function-growth-hormone-imbalance syndrome.
+See chapter 6 (Functions).
+
+
+ Chapter 5: Typedefs
+
+Please don't use things like "vps_t".
+It's a _mistake_ to use typedef for structures and pointers. When you see a
+
+ vps_t a;
+
+in the source, what does it mean?
+In contrast, if it says
+
+ struct virtual_container *a;
+
+you can actually tell what "a" is.
+
+Lots of people think that typedefs "help readability". Not so. They are
+useful only for:
+
+ (a) totally opaque objects (where the typedef is actively used to _hide_
+ what the object is).
+
+ Example: "pte_t" etc. opaque objects that you can only access using
+ the proper accessor functions.
+
+ NOTE! Opaqueness and "accessor functions" are not good in themselves.
+ The reason we have them for things like pte_t etc. is that there
+ really is absolutely _zero_ portably accessible information there.
+
+ (b) Clear integer types, where the abstraction _helps_ avoid confusion
+ whether it is "int" or "long".
+
+ u8/u16/u32 are perfectly fine typedefs, although they fit into
+ category (d) better than here.
+
+ NOTE! Again - there needs to be a _reason_ for this. If something is
+ "unsigned long", then there's no reason to do
+
+ typedef unsigned long myflags_t;
+
+ but if there is a clear reason for why it under certain circumstances
+ might be an "unsigned int" and under other configurations might be
+ "unsigned long", then by all means go ahead and use a typedef.
+
+ (c) when you use sparse to literally create a _new_ type for
+ type-checking.
+
+ (d) New types which are identical to standard C99 types, in certain
+ exceptional circumstances.
+
+ Although it would only take a short amount of time for the eyes and
+ brain to become accustomed to the standard types like 'uint32_t',
+ some people object to their use anyway.
+
+ Therefore, the Linux-specific 'u8/u16/u32/u64' types and their
+ signed equivalents which are identical to standard types are
+ permitted -- although they are not mandatory in new code of your
+ own.
+
+ When editing existing code which already uses one or the other set
+ of types, you should conform to the existing choices in that code.
+
+ (e) Types safe for use in userspace.
+
+ In certain structures which are visible to userspace, we cannot
+ require C99 types and cannot use the 'u32' form above. Thus, we
+ use __u32 and similar types in all structures which are shared
+ with userspace.
+
+Maybe there are other cases too, but the rule should basically be to NEVER
+EVER use a typedef unless you can clearly match one of those rules.
+
+In general, a pointer, or a struct that has elements that can reasonably
+be directly accessed should _never_ be a typedef.
+
+
+ Chapter 6: Functions
+
+Functions should be short and sweet, and do just one thing. They should
+fit on one or two screenfuls of text (the ISO/ANSI screen size is 80x24,
+as we all know), and do one thing and do that well.
+
+The maximum length of a function is inversely proportional to the
+complexity and indentation level of that function. So, if you have a
+conceptually simple function that is just one long (but simple)
+case-statement, where you have to do lots of small things for a lot of
+different cases, it's OK to have a longer function.
+
+However, if you have a complex function, and you suspect that a
+less-than-gifted first-year high-school student might not even
+understand what the function is all about, you should adhere to the
+maximum limits all the more closely. Use helper functions with
+descriptive names (you can ask the compiler to in-line them if you think
+it's performance-critical, and it will probably do a better job of it
+than you would have done).
+
+Another measure of the function is the number of local variables. They
+shouldn't exceed 5-10, or you're doing something wrong. Re-think the
+function, and split it into smaller pieces. A human brain can
+generally easily keep track of about 7 different things, anything more
+and it gets confused. You know you're brilliant, but maybe you'd like
+to understand what you did 2 weeks from now.
+
+In source files, separate functions with one blank line. If the function is
+exported, the EXPORT* macro for it should follow immediately after the closing
+function brace line. E.g.:
+
+ int system_is_up(void)
+ {
+ return system_state == SYSTEM_RUNNING;
+ }
+ EXPORT_SYMBOL(system_is_up);
+
+In function prototypes, include parameter names with their data types.
+Although this is not required by the C language, it is preferred in Linux
+because it is a simple way to add valuable information for the reader.
+
+
+ Chapter 7: Centralized exiting of functions
+
+Albeit deprecated by some people, the equivalent of the goto statement is
+used frequently by compilers in form of the unconditional jump instruction.
+
+The goto statement comes in handy when a function exits from multiple
+locations and some common work such as cleanup has to be done. If there is no
+cleanup needed then just return directly.
+
+Choose label names which say what the goto does or why the goto exists. An
+example of a good name could be "out_buffer:" if the goto frees "buffer". Avoid
+using GW-BASIC names like "err1:" and "err2:". Also don't name them after the
+goto location like "err_kmalloc_failed:"
+
+The rationale for using gotos is:
+
+- unconditional statements are easier to understand and follow
+- nesting is reduced
+- errors by not updating individual exit points when making
+ modifications are prevented
+- saves the compiler work to optimize redundant code away ;)
+
+ int fun(int a)
+ {
+ int result = 0;
+ char *buffer;
+
+ buffer = kmalloc(SIZE, GFP_KERNEL);
+ if (!buffer)
+ return -ENOMEM;
+
+ if (condition1) {
+ while (loop1) {
+ ...
+ }
+ result = 1;
+ goto out_buffer;
+ }
+ ...
+ out_buffer:
+ kfree(buffer);
+ return result;
+ }
+
+A common type of bug to be aware of it "one err bugs" which look like this:
+
+ err:
+ kfree(foo->bar);
+ kfree(foo);
+ return ret;
+
+The bug in this code is that on some exit paths "foo" is NULL. Normally the
+fix for this is to split it up into two error labels "err_bar:" and "err_foo:".
+
+
+ Chapter 8: Commenting
+
+Comments are good, but there is also a danger of over-commenting. NEVER
+try to explain HOW your code works in a comment: it's much better to
+write the code so that the _working_ is obvious, and it's a waste of
+time to explain badly written code.
+
+Generally, you want your comments to tell WHAT your code does, not HOW.
+Also, try to avoid putting comments inside a function body: if the
+function is so complex that you need to separately comment parts of it,
+you should probably go back to chapter 6 for a while. You can make
+small comments to note or warn about something particularly clever (or
+ugly), but try to avoid excess. Instead, put the comments at the head
+of the function, telling people what it does, and possibly WHY it does
+it.
+
+When commenting the kernel API functions, please use the kernel-doc format.
+See the files Documentation/kernel-doc-nano-HOWTO.txt and scripts/kernel-doc
+for details.
+
+Linux style for comments is the C89 "/* ... */" style.
+Don't use C99-style "// ..." comments.
+
+The preferred style for long (multi-line) comments is:
+
+ /*
+ * This is the preferred style for multi-line
+ * comments in the Linux kernel source code.
+ * Please use it consistently.
+ *
+ * Description: A column of asterisks on the left side,
+ * with beginning and ending almost-blank lines.
+ */
+
+For files in net/ and drivers/net/ the preferred style for long (multi-line)
+comments is a little different.
+
+ /* The preferred comment style for files in net/ and drivers/net
+ * looks like this.
+ *
+ * It is nearly the same as the generally preferred comment style,
+ * but there is no initial almost-blank line.
+ */
+
+It's also important to comment data, whether they are basic types or derived
+types. To this end, use just one data declaration per line (no commas for
+multiple data declarations). This leaves you room for a small comment on each
+item, explaining its use.
+
+
+ Chapter 9: You've made a mess of it
+
+That's OK, we all do. You've probably been told by your long-time Unix
+user helper that "GNU emacs" automatically formats the C sources for
+you, and you've noticed that yes, it does do that, but the defaults it
+uses are less than desirable (in fact, they are worse than random
+typing - an infinite number of monkeys typing into GNU emacs would never
+make a good program).
+
+So, you can either get rid of GNU emacs, or change it to use saner
+values. To do the latter, you can stick the following in your .emacs file:
+
+(defun c-lineup-arglist-tabs-only (ignored)
+ "Line up argument lists by tabs, not spaces"
+ (let* ((anchor (c-langelem-pos c-syntactic-element))
+ (column (c-langelem-2nd-pos c-syntactic-element))
+ (offset (- (1+ column) anchor))
+ (steps (floor offset c-basic-offset)))
+ (* (max steps 1)
+ c-basic-offset)))
+
+(add-hook 'c-mode-common-hook
+ (lambda ()
+ ;; Add kernel style
+ (c-add-style
+ "linux-tabs-only"
+ '("linux" (c-offsets-alist
+ (arglist-cont-nonempty
+ c-lineup-gcc-asm-reg
+ c-lineup-arglist-tabs-only))))))
+
+(add-hook 'c-mode-hook
+ (lambda ()
+ (let ((filename (buffer-file-name)))
+ ;; Enable kernel mode for the appropriate files
+ (when (and filename
+ (string-match (expand-file-name "~/src/linux-trees")
+ filename))
+ (setq indent-tabs-mode t)
+ (setq show-trailing-whitespace t)
+ (c-set-style "linux-tabs-only")))))
+
+This will make emacs go better with the kernel coding style for C
+files below ~/src/linux-trees.
+
+But even if you fail in getting emacs to do sane formatting, not
+everything is lost: use "indent".
+
+Now, again, GNU indent has the same brain-dead settings that GNU emacs
+has, which is why you need to give it a few command line options.
+However, that's not too bad, because even the makers of GNU indent
+recognize the authority of K&R (the GNU people aren't evil, they are
+just severely misguided in this matter), so you just give indent the
+options "-kr -i8" (stands for "K&R, 8 character indents"), or use
+"scripts/Lindent", which indents in the latest style.
+
+"indent" has a lot of options, and especially when it comes to comment
+re-formatting you may want to take a look at the man page. But
+remember: "indent" is not a fix for bad programming.
+
+
+ Chapter 10: Kconfig configuration files
+
+For all of the Kconfig* configuration files throughout the source tree,
+the indentation is somewhat different. Lines under a "config" definition
+are indented with one tab, while help text is indented an additional two
+spaces. Example:
+
+config AUDIT
+ bool "Auditing support"
+ depends on NET
+ help
+ Enable auditing infrastructure that can be used with another
+ kernel subsystem, such as SELinux (which requires this for
+ logging of avc messages output). Does not do system-call
+ auditing without CONFIG_AUDITSYSCALL.
+
+Seriously dangerous features (such as write support for certain
+filesystems) should advertise this prominently in their prompt string:
+
+config ADFS_FS_RW
+ bool "ADFS write support (DANGEROUS)"
+ depends on ADFS_FS
+ ...
+
+For full documentation on the configuration files, see the file
+Documentation/kbuild/kconfig-language.txt.
+
+
+ Chapter 11: Data structures
+
+Data structures that have visibility outside the single-threaded
+environment they are created and destroyed in should always have
+reference counts. In the kernel, garbage collection doesn't exist (and
+outside the kernel garbage collection is slow and inefficient), which
+means that you absolutely _have_ to reference count all your uses.
+
+Reference counting means that you can avoid locking, and allows multiple
+users to have access to the data structure in parallel - and not having
+to worry about the structure suddenly going away from under them just
+because they slept or did something else for a while.
+
+Note that locking is _not_ a replacement for reference counting.
+Locking is used to keep data structures coherent, while reference
+counting is a memory management technique. Usually both are needed, and
+they are not to be confused with each other.
+
+Many data structures can indeed have two levels of reference counting,
+when there are users of different "classes". The subclass count counts
+the number of subclass users, and decrements the global count just once
+when the subclass count goes to zero.
+
+Examples of this kind of "multi-level-reference-counting" can be found in
+memory management ("struct mm_struct": mm_users and mm_count), and in
+filesystem code ("struct super_block": s_count and s_active).
+
+Remember: if another thread can find your data structure, and you don't
+have a reference count on it, you almost certainly have a bug.
+
+
+ Chapter 12: Macros, Enums and RTL
+
+Names of macros defining constants and labels in enums are capitalized.
+
+ #define CONSTANT 0x12345
+
+Enums are preferred when defining several related constants.
+
+CAPITALIZED macro names are appreciated but macros resembling functions
+may be named in lower case.
+
+Generally, inline functions are preferable to macros resembling functions.
+
+Macros with multiple statements should be enclosed in a do - while block:
+
+ #define macrofun(a, b, c) \
+ do { \
+ if (a == 5) \
+ do_this(b, c); \
+ } while (0)
+
+Things to avoid when using macros:
+
+1) macros that affect control flow:
+
+ #define FOO(x) \
+ do { \
+ if (blah(x) < 0) \
+ return -EBUGGERED; \
+ } while(0)
+
+is a _very_ bad idea. It looks like a function call but exits the "calling"
+function; don't break the internal parsers of those who will read the code.
+
+2) macros that depend on having a local variable with a magic name:
+
+ #define FOO(val) bar(index, val)
+
+might look like a good thing, but it's confusing as hell when one reads the
+code and it's prone to breakage from seemingly innocent changes.
+
+3) macros with arguments that are used as l-values: FOO(x) = y; will
+bite you if somebody e.g. turns FOO into an inline function.
+
+4) forgetting about precedence: macros defining constants using expressions
+must enclose the expression in parentheses. Beware of similar issues with
+macros using parameters.
+
+ #define CONSTANT 0x4000
+ #define CONSTEXP (CONSTANT | 3)
+
+5) namespace collisions when defining local variables in macros resembling
+functions:
+
+#define FOO(x) \
+({ \
+ typeof(x) ret; \
+ ret = calc_ret(x); \
+ (ret); \
+)}
+
+ret is a common name for a local variable - __foo_ret is less likely
+to collide with an existing variable.
+
+The cpp manual deals with macros exhaustively. The gcc internals manual also
+covers RTL which is used frequently with assembly language in the kernel.
+
+
+ Chapter 13: Printing kernel messages
+
+Kernel developers like to be seen as literate. Do mind the spelling
+of kernel messages to make a good impression. Do not use crippled
+words like "dont"; use "do not" or "don't" instead. Make the messages
+concise, clear, and unambiguous.
+
+Kernel messages do not have to be terminated with a period.
+
+Printing numbers in parentheses (%d) adds no value and should be avoided.
+
+There are a number of driver model diagnostic macros in <linux/device.h>
+which you should use to make sure messages are matched to the right device
+and driver, and are tagged with the right level: dev_err(), dev_warn(),
+dev_info(), and so forth. For messages that aren't associated with a
+particular device, <linux/printk.h> defines pr_notice(), pr_info(),
+pr_warn(), pr_err(), etc.
+
+Coming up with good debugging messages can be quite a challenge; and once
+you have them, they can be a huge help for remote troubleshooting. However
+debug message printing is handled differently than printing other non-debug
+messages. While the other pr_XXX() functions print unconditionally,
+pr_debug() does not; it is compiled out by default, unless either DEBUG is
+defined or CONFIG_DYNAMIC_DEBUG is set. That is true for dev_dbg() also,
+and a related convention uses VERBOSE_DEBUG to add dev_vdbg() messages to
+the ones already enabled by DEBUG.
+
+Many subsystems have Kconfig debug options to turn on -DDEBUG in the
+corresponding Makefile; in other cases specific files #define DEBUG. And
+when a debug message should be unconditionally printed, such as if it is
+already inside a debug-related #ifdef section, printk(KERN_DEBUG ...) can be
+used.
+
+
+ Chapter 14: Allocating memory
+
+The kernel provides the following general purpose memory allocators:
+kmalloc(), kzalloc(), kmalloc_array(), kcalloc(), vmalloc(), and
+vzalloc(). Please refer to the API documentation for further information
+about them.
+
+The preferred form for passing a size of a struct is the following:
+
+ p = kmalloc(sizeof(*p), ...);
+
+The alternative form where struct name is spelled out hurts readability and
+introduces an opportunity for a bug when the pointer variable type is changed
+but the corresponding sizeof that is passed to a memory allocator is not.
+
+Casting the return value which is a void pointer is redundant. The conversion
+from void pointer to any other pointer type is guaranteed by the C programming
+language.
+
+The preferred form for allocating an array is the following:
+
+ p = kmalloc_array(n, sizeof(...), ...);
+
+The preferred form for allocating a zeroed array is the following:
+
+ p = kcalloc(n, sizeof(...), ...);
+
+Both forms check for overflow on the allocation size n * sizeof(...),
+and return NULL if that occurred.
+
+
+ Chapter 15: The inline disease
+
+There appears to be a common misperception that gcc has a magic "make me
+faster" speedup option called "inline". While the use of inlines can be
+appropriate (for example as a means of replacing macros, see Chapter 12), it
+very often is not. Abundant use of the inline keyword leads to a much bigger
+kernel, which in turn slows the system as a whole down, due to a bigger
+icache footprint for the CPU and simply because there is less memory
+available for the pagecache. Just think about it; a pagecache miss causes a
+disk seek, which easily takes 5 milliseconds. There are a LOT of cpu cycles
+that can go into these 5 milliseconds.
+
+A reasonable rule of thumb is to not put inline at functions that have more
+than 3 lines of code in them. An exception to this rule are the cases where
+a parameter is known to be a compiletime constant, and as a result of this
+constantness you *know* the compiler will be able to optimize most of your
+function away at compile time. For a good example of this later case, see
+the kmalloc() inline function.
+
+Often people argue that adding inline to functions that are static and used
+only once is always a win since there is no space tradeoff. While this is
+technically correct, gcc is capable of inlining these automatically without
+help, and the maintenance issue of removing the inline when a second user
+appears outweighs the potential value of the hint that tells gcc to do
+something it would have done anyway.
+
+
+ Chapter 16: Function return values and names
+
+Functions can return values of many different kinds, and one of the
+most common is a value indicating whether the function succeeded or
+failed. Such a value can be represented as an error-code integer
+(-Exxx = failure, 0 = success) or a "succeeded" boolean (0 = failure,
+non-zero = success).
+
+Mixing up these two sorts of representations is a fertile source of
+difficult-to-find bugs. If the C language included a strong distinction
+between integers and booleans then the compiler would find these mistakes
+for us... but it doesn't. To help prevent such bugs, always follow this
+convention:
+
+ If the name of a function is an action or an imperative command,
+ the function should return an error-code integer. If the name
+ is a predicate, the function should return a "succeeded" boolean.
+
+For example, "add work" is a command, and the add_work() function returns 0
+for success or -EBUSY for failure. In the same way, "PCI device present" is
+a predicate, and the pci_dev_present() function returns 1 if it succeeds in
+finding a matching device or 0 if it doesn't.
+
+All EXPORTed functions must respect this convention, and so should all
+public functions. Private (static) functions need not, but it is
+recommended that they do.
+
+Functions whose return value is the actual result of a computation, rather
+than an indication of whether the computation succeeded, are not subject to
+this rule. Generally they indicate failure by returning some out-of-range
+result. Typical examples would be functions that return pointers; they use
+NULL or the ERR_PTR mechanism to report failure.
+
+
+ Chapter 17: Don't re-invent the kernel macros
+
+The header file include/linux/kernel.h contains a number of macros that
+you should use, rather than explicitly coding some variant of them yourself.
+For example, if you need to calculate the length of an array, take advantage
+of the macro
+
+ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+Similarly, if you need to calculate the size of some structure member, use
+
+ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+
+There are also min() and max() macros that do strict type checking if you
+need them. Feel free to peruse that header file to see what else is already
+defined that you shouldn't reproduce in your code.
+
+
+ Chapter 18: Editor modelines and other cruft
+
+Some editors can interpret configuration information embedded in source files,
+indicated with special markers. For example, emacs interprets lines marked
+like this:
+
+ -*- mode: c -*-
+
+Or like this:
+
+ /*
+ Local Variables:
+ compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
+ End:
+ */
+
+Vim interprets markers that look like this:
+
+ /* vim:set sw=8 noet */
+
+Do not include any of these in source files. People have their own personal
+editor configurations, and your source files should not override them. This
+includes markers for indentation and mode configuration. People may use their
+own custom mode, or may have some other magic method for making indentation
+work correctly.
+
+
+ Chapter 19: Inline assembly
+
+In architecture-specific code, you may need to use inline assembly to interface
+with CPU or platform functionality. Don't hesitate to do so when necessary.
+However, don't use inline assembly gratuitously when C can do the job. You can
+and should poke hardware from C when possible.
+
+Consider writing simple helper functions that wrap common bits of inline
+assembly, rather than repeatedly writing them with slight variations. Remember
+that inline assembly can use C parameters.
+
+Large, non-trivial assembly functions should go in .S files, with corresponding
+C prototypes defined in C header files. The C prototypes for assembly
+functions should use "asmlinkage".
+
+You may need to mark your asm statement as volatile, to prevent GCC from
+removing it if GCC doesn't notice any side effects. You don't always need to
+do so, though, and doing so unnecessarily can limit optimization.
+
+When writing a single inline assembly statement containing multiple
+instructions, put each instruction on a separate line in a separate quoted
+string, and end each string except the last with \n\t to properly indent the
+next instruction in the assembly output:
+
+ asm ("magic %reg1, #42\n\t"
+ "more_magic %reg2, %reg3"
+ : /* outputs */ : /* inputs */ : /* clobbers */);
+
+
+ Chapter 20: Conditional Compilation
+
+Wherever possible, don't use preprocessor conditionals (#if, #ifdef) in .c
+files; doing so makes code harder to read and logic harder to follow. Instead,
+use such conditionals in a header file defining functions for use in those .c
+files, providing no-op stub versions in the #else case, and then call those
+functions unconditionally from .c files. The compiler will avoid generating
+any code for the stub calls, producing identical results, but the logic will
+remain easy to follow.
+
+Prefer to compile out entire functions, rather than portions of functions or
+portions of expressions. Rather than putting an ifdef in an expression, factor
+out part or all of the expression into a separate helper function and apply the
+conditional to that function.
+
+If you have a function or variable which may potentially go unused in a
+particular configuration, and the compiler would warn about its definition
+going unused, mark the definition as __maybe_unused rather than wrapping it in
+a preprocessor conditional. (However, if a function or variable *always* goes
+unused, delete it.)
+
+Within code, where possible, use the IS_ENABLED macro to convert a Kconfig
+symbol into a C boolean expression, and use it in a normal C conditional:
+
+ if (IS_ENABLED(CONFIG_SOMETHING)) {
+ ...
+ }
+
+The compiler will constant-fold the conditional away, and include or exclude
+the block of code just as with an #ifdef, so this will not add any runtime
+overhead. However, this approach still allows the C compiler to see the code
+inside the block, and check it for correctness (syntax, types, symbol
+references, etc). Thus, you still have to use an #ifdef if the code inside the
+block references symbols that will not exist if the condition is not met.
+
+At the end of any non-trivial #if or #ifdef block (more than a few lines),
+place a comment after the #endif on the same line, noting the conditional
+expression used. For instance:
+
+ #ifdef CONFIG_SOMETHING
+ ...
+ #endif /* CONFIG_SOMETHING */
+
+
+ Appendix I: References
+
+The C Programming Language, Second Edition
+by Brian W. Kernighan and Dennis M. Ritchie.
+Prentice Hall, Inc., 1988.
+ISBN 0-13-110362-8 (paperback), 0-13-110370-9 (hardback).
+URL: http://cm.bell-labs.com/cm/cs/cbook/
+
+The Practice of Programming
+by Brian W. Kernighan and Rob Pike.
+Addison-Wesley, Inc., 1999.
+ISBN 0-201-61586-X.
+URL: http://cm.bell-labs.com/cm/cs/tpop/
+
+GNU manuals - where in compliance with K&R and this text - for cpp, gcc,
+gcc internals and indent, all available from http://www.gnu.org/manual/
+
+WG14 is the international standardization working group for the programming
+language C, URL: http://www.open-std.org/JTC1/SC22/WG14/
+
+Kernel CodingStyle, by greg@kroah.com at OLS 2002:
+http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
+
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
new file mode 100644
index 000000000..aef8cc5a6
--- /dev/null
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -0,0 +1,974 @@
+ Dynamic DMA mapping Guide
+ =========================
+
+ David S. Miller <davem@redhat.com>
+ Richard Henderson <rth@cygnus.com>
+ Jakub Jelinek <jakub@redhat.com>
+
+This is a guide to device driver writers on how to use the DMA API
+with example pseudo-code. For a concise description of the API, see
+DMA-API.txt.
+
+ CPU and DMA addresses
+
+There are several kinds of addresses involved in the DMA API, and it's
+important to understand the differences.
+
+The kernel normally uses virtual addresses. Any address returned by
+kmalloc(), vmalloc(), and similar interfaces is a virtual address and can
+be stored in a "void *".
+
+The virtual memory system (TLB, page tables, etc.) translates virtual
+addresses to CPU physical addresses, which are stored as "phys_addr_t" or
+"resource_size_t". The kernel manages device resources like registers as
+physical addresses. These are the addresses in /proc/iomem. The physical
+address is not directly useful to a driver; it must use ioremap() to map
+the space and produce a virtual address.
+
+I/O devices use a third kind of address: a "bus address". If a device has
+registers at an MMIO address, or if it performs DMA to read or write system
+memory, the addresses used by the device are bus addresses. In some
+systems, bus addresses are identical to CPU physical addresses, but in
+general they are not. IOMMUs and host bridges can produce arbitrary
+mappings between physical and bus addresses.
+
+From a device's point of view, DMA uses the bus address space, but it may
+be restricted to a subset of that space. For example, even if a system
+supports 64-bit addresses for main memory and PCI BARs, it may use an IOMMU
+so devices only need to use 32-bit DMA addresses.
+
+Here's a picture and some examples:
+
+ CPU CPU Bus
+ Virtual Physical Address
+ Address Address Space
+ Space Space
+
+ +-------+ +------+ +------+
+ | | |MMIO | Offset | |
+ | | Virtual |Space | applied | |
+ C +-------+ --------> B +------+ ----------> +------+ A
+ | | mapping | | by host | |
+ +-----+ | | | | bridge | | +--------+
+ | | | | +------+ | | | |
+ | CPU | | | | RAM | | | | Device |
+ | | | | | | | | | |
+ +-----+ +-------+ +------+ +------+ +--------+
+ | | Virtual |Buffer| Mapping | |
+ X +-------+ --------> Y +------+ <---------- +------+ Z
+ | | mapping | RAM | by IOMMU
+ | | | |
+ | | | |
+ +-------+ +------+
+
+During the enumeration process, the kernel learns about I/O devices and
+their MMIO space and the host bridges that connect them to the system. For
+example, if a PCI device has a BAR, the kernel reads the bus address (A)
+from the BAR and converts it to a CPU physical address (B). The address B
+is stored in a struct resource and usually exposed via /proc/iomem. When a
+driver claims a device, it typically uses ioremap() to map physical address
+B at a virtual address (C). It can then use, e.g., ioread32(C), to access
+the device registers at bus address A.
+
+If the device supports DMA, the driver sets up a buffer using kmalloc() or
+a similar interface, which returns a virtual address (X). The virtual
+memory system maps X to a physical address (Y) in system RAM. The driver
+can use virtual address X to access the buffer, but the device itself
+cannot because DMA doesn't go through the CPU virtual memory system.
+
+In some simple systems, the device can do DMA directly to physical address
+Y. But in many others, there is IOMMU hardware that translates DMA
+addresses to physical addresses, e.g., it translates Z to Y. This is part
+of the reason for the DMA API: the driver can give a virtual address X to
+an interface like dma_map_single(), which sets up any required IOMMU
+mapping and returns the DMA address Z. The driver then tells the device to
+do DMA to Z, and the IOMMU maps it to the buffer at address Y in system
+RAM.
+
+So that Linux can use the dynamic DMA mapping, it needs some help from the
+drivers, namely it has to take into account that DMA addresses should be
+mapped only for the time they are actually used and unmapped after the DMA
+transfer.
+
+The following API will work of course even on platforms where no such
+hardware exists.
+
+Note that the DMA API works with any bus independent of the underlying
+microprocessor architecture. You should use the DMA API rather than the
+bus-specific DMA API, i.e., use the dma_map_*() interfaces rather than the
+pci_map_*() interfaces.
+
+First of all, you should make sure
+
+#include <linux/dma-mapping.h>
+
+is in your driver, which provides the definition of dma_addr_t. This type
+can hold any valid DMA address for the platform and should be used
+everywhere you hold a DMA address returned from the DMA mapping functions.
+
+ What memory is DMA'able?
+
+The first piece of information you must know is what kernel memory can
+be used with the DMA mapping facilities. There has been an unwritten
+set of rules regarding this, and this text is an attempt to finally
+write them down.
+
+If you acquired your memory via the page allocator
+(i.e. __get_free_page*()) or the generic memory allocators
+(i.e. kmalloc() or kmem_cache_alloc()) then you may DMA to/from
+that memory using the addresses returned from those routines.
+
+This means specifically that you may _not_ use the memory/addresses
+returned from vmalloc() for DMA. It is possible to DMA to the
+_underlying_ memory mapped into a vmalloc() area, but this requires
+walking page tables to get the physical addresses, and then
+translating each of those pages back to a kernel address using
+something like __va(). [ EDIT: Update this when we integrate
+Gerd Knorr's generic code which does this. ]
+
+This rule also means that you may use neither kernel image addresses
+(items in data/text/bss segments), nor module image addresses, nor
+stack addresses for DMA. These could all be mapped somewhere entirely
+different than the rest of physical memory. Even if those classes of
+memory could physically work with DMA, you'd need to ensure the I/O
+buffers were cacheline-aligned. Without that, you'd see cacheline
+sharing problems (data corruption) on CPUs with DMA-incoherent caches.
+(The CPU could write to one word, DMA would write to a different one
+in the same cache line, and one of them could be overwritten.)
+
+Also, this means that you cannot take the return of a kmap()
+call and DMA to/from that. This is similar to vmalloc().
+
+What about block I/O and networking buffers? The block I/O and
+networking subsystems make sure that the buffers they use are valid
+for you to DMA from/to.
+
+ DMA addressing limitations
+
+Does your device have any DMA addressing limitations? For example, is
+your device only capable of driving the low order 24-bits of address?
+If so, you need to inform the kernel of this fact.
+
+By default, the kernel assumes that your device can address the full
+32-bits. For a 64-bit capable device, this needs to be increased.
+And for a device with limitations, as discussed in the previous
+paragraph, it needs to be decreased.
+
+Special note about PCI: PCI-X specification requires PCI-X devices to
+support 64-bit addressing (DAC) for all transactions. And at least
+one platform (SGI SN2) requires 64-bit consistent allocations to
+operate correctly when the IO bus is in PCI-X mode.
+
+For correct operation, you must interrogate the kernel in your device
+probe routine to see if the DMA controller on the machine can properly
+support the DMA addressing limitation your device has. It is good
+style to do this even if your device holds the default setting,
+because this shows that you did think about these issues wrt. your
+device.
+
+The query is performed via a call to dma_set_mask_and_coherent():
+
+ int dma_set_mask_and_coherent(struct device *dev, u64 mask);
+
+which will query the mask for both streaming and coherent APIs together.
+If you have some special requirements, then the following two separate
+queries can be used instead:
+
+ The query for streaming mappings is performed via a call to
+ dma_set_mask():
+
+ int dma_set_mask(struct device *dev, u64 mask);
+
+ The query for consistent allocations is performed via a call
+ to dma_set_coherent_mask():
+
+ int dma_set_coherent_mask(struct device *dev, u64 mask);
+
+Here, dev is a pointer to the device struct of your device, and mask
+is a bit mask describing which bits of an address your device
+supports. It returns zero if your card can perform DMA properly on
+the machine given the address mask you provided. In general, the
+device struct of your device is embedded in the bus-specific device
+struct of your device. For example, &pdev->dev is a pointer to the
+device struct of a PCI device (pdev is a pointer to the PCI device
+struct of your device).
+
+If it returns non-zero, your device cannot perform DMA properly on
+this platform, and attempting to do so will result in undefined
+behavior. You must either use a different mask, or not use DMA.
+
+This means that in the failure case, you have three options:
+
+1) Use another DMA mask, if possible (see below).
+2) Use some non-DMA mode for data transfer, if possible.
+3) Ignore this device and do not initialize it.
+
+It is recommended that your driver print a kernel KERN_WARNING message
+when you end up performing either #2 or #3. In this manner, if a user
+of your driver reports that performance is bad or that the device is not
+even detected, you can ask them for the kernel messages to find out
+exactly why.
+
+The standard 32-bit addressing device would do something like this:
+
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
+ dev_warn(dev, "mydev: No suitable DMA available\n");
+ goto ignore_this_device;
+ }
+
+Another common scenario is a 64-bit capable device. The approach here
+is to try for 64-bit addressing, but back down to a 32-bit mask that
+should not fail. The kernel may fail the 64-bit mask not because the
+platform is not capable of 64-bit addressing. Rather, it may fail in
+this case simply because 32-bit addressing is done more efficiently
+than 64-bit addressing. For example, Sparc64 PCI SAC addressing is
+more efficient than DAC addressing.
+
+Here is how you would handle a 64-bit capable device which can drive
+all 64-bits when accessing streaming DMA:
+
+ int using_dac;
+
+ if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
+ using_dac = 1;
+ } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
+ using_dac = 0;
+ } else {
+ dev_warn(dev, "mydev: No suitable DMA available\n");
+ goto ignore_this_device;
+ }
+
+If a card is capable of using 64-bit consistent allocations as well,
+the case would look like this:
+
+ int using_dac, consistent_using_dac;
+
+ if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
+ using_dac = 1;
+ consistent_using_dac = 1;
+ } else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
+ using_dac = 0;
+ consistent_using_dac = 0;
+ } else {
+ dev_warn(dev, "mydev: No suitable DMA available\n");
+ goto ignore_this_device;
+ }
+
+The coherent mask will always be able to set the same or a smaller mask as
+the streaming mask. However for the rare case that a device driver only
+uses consistent allocations, one would have to check the return value from
+dma_set_coherent_mask().
+
+Finally, if your device can only drive the low 24-bits of
+address you might do something like:
+
+ if (dma_set_mask(dev, DMA_BIT_MASK(24))) {
+ dev_warn(dev, "mydev: 24-bit DMA addressing not available\n");
+ goto ignore_this_device;
+ }
+
+When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
+returns zero, the kernel saves away this mask you have provided. The
+kernel will use this information later when you make DMA mappings.
+
+There is a case which we are aware of at this time, which is worth
+mentioning in this documentation. If your device supports multiple
+functions (for example a sound card provides playback and record
+functions) and the various different functions have _different_
+DMA addressing limitations, you may wish to probe each mask and
+only provide the functionality which the machine can handle. It
+is important that the last call to dma_set_mask() be for the
+most specific mask.
+
+Here is pseudo-code showing how this might be done:
+
+ #define PLAYBACK_ADDRESS_BITS DMA_BIT_MASK(32)
+ #define RECORD_ADDRESS_BITS DMA_BIT_MASK(24)
+
+ struct my_sound_card *card;
+ struct device *dev;
+
+ ...
+ if (!dma_set_mask(dev, PLAYBACK_ADDRESS_BITS)) {
+ card->playback_enabled = 1;
+ } else {
+ card->playback_enabled = 0;
+ dev_warn(dev, "%s: Playback disabled due to DMA limitations\n",
+ card->name);
+ }
+ if (!dma_set_mask(dev, RECORD_ADDRESS_BITS)) {
+ card->record_enabled = 1;
+ } else {
+ card->record_enabled = 0;
+ dev_warn(dev, "%s: Record disabled due to DMA limitations\n",
+ card->name);
+ }
+
+A sound card was used as an example here because this genre of PCI
+devices seems to be littered with ISA chips given a PCI front end,
+and thus retaining the 16MB DMA addressing limitations of ISA.
+
+ Types of DMA mappings
+
+There are two types of DMA mappings:
+
+- Consistent DMA mappings which are usually mapped at driver
+ initialization, unmapped at the end and for which the hardware should
+ guarantee that the device and the CPU can access the data
+ in parallel and will see updates made by each other without any
+ explicit software flushing.
+
+ Think of "consistent" as "synchronous" or "coherent".
+
+ The current default is to return consistent memory in the low 32
+ bits of the DMA space. However, for future compatibility you should
+ set the consistent mask even if this default is fine for your
+ driver.
+
+ Good examples of what to use consistent mappings for are:
+
+ - Network card DMA ring descriptors.
+ - SCSI adapter mailbox command data structures.
+ - Device firmware microcode executed out of
+ main memory.
+
+ The invariant these examples all require is that any CPU store
+ to memory is immediately visible to the device, and vice
+ versa. Consistent mappings guarantee this.
+
+ IMPORTANT: Consistent DMA memory does not preclude the usage of
+ proper memory barriers. The CPU may reorder stores to
+ consistent memory just as it may normal memory. Example:
+ if it is important for the device to see the first word
+ of a descriptor updated before the second, you must do
+ something like:
+
+ desc->word0 = address;
+ wmb();
+ desc->word1 = DESC_VALID;
+
+ in order to get correct behavior on all platforms.
+
+ Also, on some platforms your driver may need to flush CPU write
+ buffers in much the same way as it needs to flush write buffers
+ found in PCI bridges (such as by reading a register's value
+ after writing it).
+
+- Streaming DMA mappings which are usually mapped for one DMA
+ transfer, unmapped right after it (unless you use dma_sync_* below)
+ and for which hardware can optimize for sequential accesses.
+
+ This of "streaming" as "asynchronous" or "outside the coherency
+ domain".
+
+ Good examples of what to use streaming mappings for are:
+
+ - Networking buffers transmitted/received by a device.
+ - Filesystem buffers written/read by a SCSI device.
+
+ The interfaces for using this type of mapping were designed in
+ such a way that an implementation can make whatever performance
+ optimizations the hardware allows. To this end, when using
+ such mappings you must be explicit about what you want to happen.
+
+Neither type of DMA mapping has alignment restrictions that come from
+the underlying bus, although some devices may have such restrictions.
+Also, systems with caches that aren't DMA-coherent will work better
+when the underlying buffers don't share cache lines with other data.
+
+
+ Using Consistent DMA mappings.
+
+To allocate and map large (PAGE_SIZE or so) consistent DMA regions,
+you should do:
+
+ dma_addr_t dma_handle;
+
+ cpu_addr = dma_alloc_coherent(dev, size, &dma_handle, gfp);
+
+where device is a struct device *. This may be called in interrupt
+context with the GFP_ATOMIC flag.
+
+Size is the length of the region you want to allocate, in bytes.
+
+This routine will allocate RAM for that region, so it acts similarly to
+__get_free_pages() (but takes size instead of a page order). If your
+driver needs regions sized smaller than a page, you may prefer using
+the dma_pool interface, described below.
+
+The consistent DMA mapping interfaces, for non-NULL dev, will by
+default return a DMA address which is 32-bit addressable. Even if the
+device indicates (via DMA mask) that it may address the upper 32-bits,
+consistent allocation will only return > 32-bit addresses for DMA if
+the consistent DMA mask has been explicitly changed via
+dma_set_coherent_mask(). This is true of the dma_pool interface as
+well.
+
+dma_alloc_coherent() returns two values: the virtual address which you
+can use to access it from the CPU and dma_handle which you pass to the
+card.
+
+The CPU virtual address and the DMA address are both
+guaranteed to be aligned to the smallest PAGE_SIZE order which
+is greater than or equal to the requested size. This invariant
+exists (for example) to guarantee that if you allocate a chunk
+which is smaller than or equal to 64 kilobytes, the extent of the
+buffer you receive will not cross a 64K boundary.
+
+To unmap and free such a DMA region, you call:
+
+ dma_free_coherent(dev, size, cpu_addr, dma_handle);
+
+where dev, size are the same as in the above call and cpu_addr and
+dma_handle are the values dma_alloc_coherent() returned to you.
+This function may not be called in interrupt context.
+
+If your driver needs lots of smaller memory regions, you can write
+custom code to subdivide pages returned by dma_alloc_coherent(),
+or you can use the dma_pool API to do that. A dma_pool is like
+a kmem_cache, but it uses dma_alloc_coherent(), not __get_free_pages().
+Also, it understands common hardware constraints for alignment,
+like queue heads needing to be aligned on N byte boundaries.
+
+Create a dma_pool like this:
+
+ struct dma_pool *pool;
+
+ pool = dma_pool_create(name, dev, size, align, boundary);
+
+The "name" is for diagnostics (like a kmem_cache name); dev and size
+are as above. The device's hardware alignment requirement for this
+type of data is "align" (which is expressed in bytes, and must be a
+power of two). If your device has no boundary crossing restrictions,
+pass 0 for boundary; passing 4096 says memory allocated from this pool
+must not cross 4KByte boundaries (but at that time it may be better to
+use dma_alloc_coherent() directly instead).
+
+Allocate memory from a DMA pool like this:
+
+ cpu_addr = dma_pool_alloc(pool, flags, &dma_handle);
+
+flags are GFP_KERNEL if blocking is permitted (not in_interrupt nor
+holding SMP locks), GFP_ATOMIC otherwise. Like dma_alloc_coherent(),
+this returns two values, cpu_addr and dma_handle.
+
+Free memory that was allocated from a dma_pool like this:
+
+ dma_pool_free(pool, cpu_addr, dma_handle);
+
+where pool is what you passed to dma_pool_alloc(), and cpu_addr and
+dma_handle are the values dma_pool_alloc() returned. This function
+may be called in interrupt context.
+
+Destroy a dma_pool by calling:
+
+ dma_pool_destroy(pool);
+
+Make sure you've called dma_pool_free() for all memory allocated
+from a pool before you destroy the pool. This function may not
+be called in interrupt context.
+
+ DMA Direction
+
+The interfaces described in subsequent portions of this document
+take a DMA direction argument, which is an integer and takes on
+one of the following values:
+
+ DMA_BIDIRECTIONAL
+ DMA_TO_DEVICE
+ DMA_FROM_DEVICE
+ DMA_NONE
+
+You should provide the exact DMA direction if you know it.
+
+DMA_TO_DEVICE means "from main memory to the device"
+DMA_FROM_DEVICE means "from the device to main memory"
+It is the direction in which the data moves during the DMA
+transfer.
+
+You are _strongly_ encouraged to specify this as precisely
+as you possibly can.
+
+If you absolutely cannot know the direction of the DMA transfer,
+specify DMA_BIDIRECTIONAL. It means that the DMA can go in
+either direction. The platform guarantees that you may legally
+specify this, and that it will work, but this may be at the
+cost of performance for example.
+
+The value DMA_NONE is to be used for debugging. One can
+hold this in a data structure before you come to know the
+precise direction, and this will help catch cases where your
+direction tracking logic has failed to set things up properly.
+
+Another advantage of specifying this value precisely (outside of
+potential platform-specific optimizations of such) is for debugging.
+Some platforms actually have a write permission boolean which DMA
+mappings can be marked with, much like page protections in the user
+program address space. Such platforms can and do report errors in the
+kernel logs when the DMA controller hardware detects violation of the
+permission setting.
+
+Only streaming mappings specify a direction, consistent mappings
+implicitly have a direction attribute setting of
+DMA_BIDIRECTIONAL.
+
+The SCSI subsystem tells you the direction to use in the
+'sc_data_direction' member of the SCSI command your driver is
+working on.
+
+For Networking drivers, it's a rather simple affair. For transmit
+packets, map/unmap them with the DMA_TO_DEVICE direction
+specifier. For receive packets, just the opposite, map/unmap them
+with the DMA_FROM_DEVICE direction specifier.
+
+ Using Streaming DMA mappings
+
+The streaming DMA mapping routines can be called from interrupt
+context. There are two versions of each map/unmap, one which will
+map/unmap a single memory region, and one which will map/unmap a
+scatterlist.
+
+To map a single region, you do:
+
+ struct device *dev = &my_dev->dev;
+ dma_addr_t dma_handle;
+ void *addr = buffer->ptr;
+ size_t size = buffer->len;
+
+ dma_handle = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+and to unmap it:
+
+ dma_unmap_single(dev, dma_handle, size, direction);
+
+You should call dma_mapping_error() as dma_map_single() could fail and return
+error. Not all DMA implementations support the dma_mapping_error() interface.
+However, it is a good practice to call dma_mapping_error() interface, which
+will invoke the generic mapping error check interface. Doing so will ensure
+that the mapping code will work correctly on all DMA implementations without
+any dependency on the specifics of the underlying implementation. Using the
+returned address without checking for errors could result in failures ranging
+from panics to silent data corruption. A couple of examples of incorrect ways
+to check for errors that make assumptions about the underlying DMA
+implementation are as follows and these are applicable to dma_map_page() as
+well.
+
+Incorrect example 1:
+ dma_addr_t dma_handle;
+
+ dma_handle = dma_map_single(dev, addr, size, direction);
+ if ((dma_handle & 0xffff != 0) || (dma_handle >= 0x1000000)) {
+ goto map_error;
+ }
+
+Incorrect example 2:
+ dma_addr_t dma_handle;
+
+ dma_handle = dma_map_single(dev, addr, size, direction);
+ if (dma_handle == DMA_ERROR_CODE) {
+ goto map_error;
+ }
+
+You should call dma_unmap_single() when the DMA activity is finished, e.g.,
+from the interrupt which told you that the DMA transfer is done.
+
+Using CPU pointers like this for single mappings has a disadvantage:
+you cannot reference HIGHMEM memory in this way. Thus, there is a
+map/unmap interface pair akin to dma_{map,unmap}_single(). These
+interfaces deal with page/offset pairs instead of CPU pointers.
+Specifically:
+
+ struct device *dev = &my_dev->dev;
+ dma_addr_t dma_handle;
+ struct page *page = buffer->page;
+ unsigned long offset = buffer->offset;
+ size_t size = buffer->len;
+
+ dma_handle = dma_map_page(dev, page, offset, size, direction);
+ if (dma_mapping_error(dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+ ...
+
+ dma_unmap_page(dev, dma_handle, size, direction);
+
+Here, "offset" means byte offset within the given page.
+
+You should call dma_mapping_error() as dma_map_page() could fail and return
+error as outlined under the dma_map_single() discussion.
+
+You should call dma_unmap_page() when the DMA activity is finished, e.g.,
+from the interrupt which told you that the DMA transfer is done.
+
+With scatterlists, you map a region gathered from several regions by:
+
+ int i, count = dma_map_sg(dev, sglist, nents, direction);
+ struct scatterlist *sg;
+
+ for_each_sg(sglist, sg, count, i) {
+ hw_address[i] = sg_dma_address(sg);
+ hw_len[i] = sg_dma_len(sg);
+ }
+
+where nents is the number of entries in the sglist.
+
+The implementation is free to merge several consecutive sglist entries
+into one (e.g. if DMA mapping is done with PAGE_SIZE granularity, any
+consecutive sglist entries can be merged into one provided the first one
+ends and the second one starts on a page boundary - in fact this is a huge
+advantage for cards which either cannot do scatter-gather or have very
+limited number of scatter-gather entries) and returns the actual number
+of sg entries it mapped them to. On failure 0 is returned.
+
+Then you should loop count times (note: this can be less than nents times)
+and use sg_dma_address() and sg_dma_len() macros where you previously
+accessed sg->address and sg->length as shown above.
+
+To unmap a scatterlist, just call:
+
+ dma_unmap_sg(dev, sglist, nents, direction);
+
+Again, make sure DMA activity has already finished.
+
+PLEASE NOTE: The 'nents' argument to the dma_unmap_sg call must be
+ the _same_ one you passed into the dma_map_sg call,
+ it should _NOT_ be the 'count' value _returned_ from the
+ dma_map_sg call.
+
+Every dma_map_{single,sg}() call should have its dma_unmap_{single,sg}()
+counterpart, because the DMA address space is a shared resource and
+you could render the machine unusable by consuming all DMA addresses.
+
+If you need to use the same streaming DMA region multiple times and touch
+the data in between the DMA transfers, the buffer needs to be synced
+properly in order for the CPU and device to see the most up-to-date and
+correct copy of the DMA buffer.
+
+So, firstly, just map it with dma_map_{single,sg}(), and after each DMA
+transfer call either:
+
+ dma_sync_single_for_cpu(dev, dma_handle, size, direction);
+
+or:
+
+ dma_sync_sg_for_cpu(dev, sglist, nents, direction);
+
+as appropriate.
+
+Then, if you wish to let the device get at the DMA area again,
+finish accessing the data with the CPU, and then before actually
+giving the buffer to the hardware call either:
+
+ dma_sync_single_for_device(dev, dma_handle, size, direction);
+
+or:
+
+ dma_sync_sg_for_device(dev, sglist, nents, direction);
+
+as appropriate.
+
+After the last DMA transfer call one of the DMA unmap routines
+dma_unmap_{single,sg}(). If you don't touch the data from the first
+dma_map_*() call till dma_unmap_*(), then you don't have to call the
+dma_sync_*() routines at all.
+
+Here is pseudo code which shows a situation in which you would need
+to use the dma_sync_*() interfaces.
+
+ my_card_setup_receive_buffer(struct my_card *cp, char *buffer, int len)
+ {
+ dma_addr_t mapping;
+
+ mapping = dma_map_single(cp->dev, buffer, len, DMA_FROM_DEVICE);
+ if (dma_mapping_error(cp->dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+ cp->rx_buf = buffer;
+ cp->rx_len = len;
+ cp->rx_dma = mapping;
+
+ give_rx_buf_to_card(cp);
+ }
+
+ ...
+
+ my_card_interrupt_handler(int irq, void *devid, struct pt_regs *regs)
+ {
+ struct my_card *cp = devid;
+
+ ...
+ if (read_card_status(cp) == RX_BUF_TRANSFERRED) {
+ struct my_card_header *hp;
+
+ /* Examine the header to see if we wish
+ * to accept the data. But synchronize
+ * the DMA transfer with the CPU first
+ * so that we see updated contents.
+ */
+ dma_sync_single_for_cpu(&cp->dev, cp->rx_dma,
+ cp->rx_len,
+ DMA_FROM_DEVICE);
+
+ /* Now it is safe to examine the buffer. */
+ hp = (struct my_card_header *) cp->rx_buf;
+ if (header_is_ok(hp)) {
+ dma_unmap_single(&cp->dev, cp->rx_dma, cp->rx_len,
+ DMA_FROM_DEVICE);
+ pass_to_upper_layers(cp->rx_buf);
+ make_and_setup_new_rx_buf(cp);
+ } else {
+ /* CPU should not write to
+ * DMA_FROM_DEVICE-mapped area,
+ * so dma_sync_single_for_device() is
+ * not needed here. It would be required
+ * for DMA_BIDIRECTIONAL mapping if
+ * the memory was modified.
+ */
+ give_rx_buf_to_card(cp);
+ }
+ }
+ }
+
+Drivers converted fully to this interface should not use virt_to_bus() any
+longer, nor should they use bus_to_virt(). Some drivers have to be changed a
+little bit, because there is no longer an equivalent to bus_to_virt() in the
+dynamic DMA mapping scheme - you have to always store the DMA addresses
+returned by the dma_alloc_coherent(), dma_pool_alloc(), and dma_map_single()
+calls (dma_map_sg() stores them in the scatterlist itself if the platform
+supports dynamic DMA mapping in hardware) in your driver structures and/or
+in the card registers.
+
+All drivers should be using these interfaces with no exceptions. It
+is planned to completely remove virt_to_bus() and bus_to_virt() as
+they are entirely deprecated. Some ports already do not provide these
+as it is impossible to correctly support them.
+
+ Handling Errors
+
+DMA address space is limited on some architectures and an allocation
+failure can be determined by:
+
+- checking if dma_alloc_coherent() returns NULL or dma_map_sg returns 0
+
+- checking the dma_addr_t returned from dma_map_single() and dma_map_page()
+ by using dma_mapping_error():
+
+ dma_addr_t dma_handle;
+
+ dma_handle = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+
+- unmap pages that are already mapped, when mapping error occurs in the middle
+ of a multiple page mapping attempt. These example are applicable to
+ dma_map_page() as well.
+
+Example 1:
+ dma_addr_t dma_handle1;
+ dma_addr_t dma_handle2;
+
+ dma_handle1 = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle1)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling1;
+ }
+ dma_handle2 = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_handle2)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling2;
+ }
+
+ ...
+
+ map_error_handling2:
+ dma_unmap_single(dma_handle1);
+ map_error_handling1:
+
+Example 2: (if buffers are allocated in a loop, unmap all mapped buffers when
+ mapping error is detected in the middle)
+
+ dma_addr_t dma_addr;
+ dma_addr_t array[DMA_BUFFERS];
+ int save_index = 0;
+
+ for (i = 0; i < DMA_BUFFERS; i++) {
+
+ ...
+
+ dma_addr = dma_map_single(dev, addr, size, direction);
+ if (dma_mapping_error(dev, dma_addr)) {
+ /*
+ * reduce current DMA mapping usage,
+ * delay and try again later or
+ * reset driver.
+ */
+ goto map_error_handling;
+ }
+ array[i].dma_addr = dma_addr;
+ save_index++;
+ }
+
+ ...
+
+ map_error_handling:
+
+ for (i = 0; i < save_index; i++) {
+
+ ...
+
+ dma_unmap_single(array[i].dma_addr);
+ }
+
+Networking drivers must call dev_kfree_skb() to free the socket buffer
+and return NETDEV_TX_OK if the DMA mapping fails on the transmit hook
+(ndo_start_xmit). This means that the socket buffer is just dropped in
+the failure case.
+
+SCSI drivers must return SCSI_MLQUEUE_HOST_BUSY if the DMA mapping
+fails in the queuecommand hook. This means that the SCSI subsystem
+passes the command to the driver again later.
+
+ Optimizing Unmap State Space Consumption
+
+On many platforms, dma_unmap_{single,page}() is simply a nop.
+Therefore, keeping track of the mapping address and length is a waste
+of space. Instead of filling your drivers up with ifdefs and the like
+to "work around" this (which would defeat the whole purpose of a
+portable API) the following facilities are provided.
+
+Actually, instead of describing the macros one by one, we'll
+transform some example code.
+
+1) Use DEFINE_DMA_UNMAP_{ADDR,LEN} in state saving structures.
+ Example, before:
+
+ struct ring_state {
+ struct sk_buff *skb;
+ dma_addr_t mapping;
+ __u32 len;
+ };
+
+ after:
+
+ struct ring_state {
+ struct sk_buff *skb;
+ DEFINE_DMA_UNMAP_ADDR(mapping);
+ DEFINE_DMA_UNMAP_LEN(len);
+ };
+
+2) Use dma_unmap_{addr,len}_set() to set these values.
+ Example, before:
+
+ ringp->mapping = FOO;
+ ringp->len = BAR;
+
+ after:
+
+ dma_unmap_addr_set(ringp, mapping, FOO);
+ dma_unmap_len_set(ringp, len, BAR);
+
+3) Use dma_unmap_{addr,len}() to access these values.
+ Example, before:
+
+ dma_unmap_single(dev, ringp->mapping, ringp->len,
+ DMA_FROM_DEVICE);
+
+ after:
+
+ dma_unmap_single(dev,
+ dma_unmap_addr(ringp, mapping),
+ dma_unmap_len(ringp, len),
+ DMA_FROM_DEVICE);
+
+It really should be self-explanatory. We treat the ADDR and LEN
+separately, because it is possible for an implementation to only
+need the address in order to perform the unmap operation.
+
+ Platform Issues
+
+If you are just writing drivers for Linux and do not maintain
+an architecture port for the kernel, you can safely skip down
+to "Closing".
+
+1) Struct scatterlist requirements.
+
+ Don't invent the architecture specific struct scatterlist; just use
+ <asm-generic/scatterlist.h>. You need to enable
+ CONFIG_NEED_SG_DMA_LENGTH if the architecture supports IOMMUs
+ (including software IOMMU).
+
+2) ARCH_DMA_MINALIGN
+
+ Architectures must ensure that kmalloc'ed buffer is
+ DMA-safe. Drivers and subsystems depend on it. If an architecture
+ isn't fully DMA-coherent (i.e. hardware doesn't ensure that data in
+ the CPU cache is identical to data in main memory),
+ ARCH_DMA_MINALIGN must be set so that the memory allocator
+ makes sure that kmalloc'ed buffer doesn't share a cache line with
+ the others. See arch/arm/include/asm/cache.h as an example.
+
+ Note that ARCH_DMA_MINALIGN is about DMA memory alignment
+ constraints. You don't need to worry about the architecture data
+ alignment constraints (e.g. the alignment constraints about 64-bit
+ objects).
+
+3) Supporting multiple types of IOMMUs
+
+ If your architecture needs to support multiple types of IOMMUs, you
+ can use include/linux/asm-generic/dma-mapping-common.h. It's a
+ library to support the DMA API with multiple types of IOMMUs. Lots
+ of architectures (x86, powerpc, sh, alpha, ia64, microblaze and
+ sparc) use it. Choose one to see how it can be used. If you need to
+ support multiple types of IOMMUs in a single system, the example of
+ x86 or powerpc helps.
+
+ Closing
+
+This document, and the API itself, would not be in its current
+form without the feedback and suggestions from numerous individuals.
+We would like to specifically mention, in no particular order, the
+following people:
+
+ Russell King <rmk@arm.linux.org.uk>
+ Leo Dagum <dagum@barrel.engr.sgi.com>
+ Ralf Baechle <ralf@oss.sgi.com>
+ Grant Grundler <grundler@cup.hp.com>
+ Jay Estabrook <Jay.Estabrook@compaq.com>
+ Thomas Sailer <sailer@ife.ee.ethz.ch>
+ Andrea Arcangeli <andrea@suse.de>
+ Jens Axboe <jens.axboe@oracle.com>
+ David Mosberger-Tang <davidm@hpl.hp.com>
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
new file mode 100644
index 000000000..7eba542ef
--- /dev/null
+++ b/Documentation/DMA-API.txt
@@ -0,0 +1,704 @@
+ Dynamic DMA mapping using the generic device
+ ============================================
+
+ James E.J. Bottomley <James.Bottomley@HansenPartnership.com>
+
+This document describes the DMA API. For a more gentle introduction
+of the API (and actual examples), see Documentation/DMA-API-HOWTO.txt.
+
+This API is split into two pieces. Part I describes the basic API.
+Part II describes extensions for supporting non-consistent memory
+machines. Unless you know that your driver absolutely has to support
+non-consistent platforms (this is usually only legacy platforms) you
+should only use the API described in part I.
+
+Part I - dma_ API
+-------------------------------------
+
+To get the dma_ API, you must #include <linux/dma-mapping.h>. This
+provides dma_addr_t and the interfaces described below.
+
+A dma_addr_t can hold any valid DMA address for the platform. It can be
+given to a device to use as a DMA source or target. A CPU cannot reference
+a dma_addr_t directly because there may be translation between its physical
+address space and the DMA address space.
+
+Part Ia - Using large DMA-coherent buffers
+------------------------------------------
+
+void *
+dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+
+Consistent memory is memory for which a write by either the device or
+the processor can immediately be read by the processor or device
+without having to worry about caching effects. (You may however need
+to make sure to flush the processor's write buffers before telling
+devices to read that memory.)
+
+This routine allocates a region of <size> bytes of consistent memory.
+
+It returns a pointer to the allocated region (in the processor's virtual
+address space) or NULL if the allocation failed.
+
+It also returns a <dma_handle> which may be cast to an unsigned integer the
+same width as the bus and given to the device as the DMA address base of
+the region.
+
+Note: consistent memory can be expensive on some platforms, and the
+minimum allocation length may be as big as a page, so you should
+consolidate your requests for consistent memory as much as possible.
+The simplest way to do that is to use the dma_pool calls (see below).
+
+The flag parameter (dma_alloc_coherent() only) allows the caller to
+specify the GFP_ flags (see kmalloc()) for the allocation (the
+implementation may choose to ignore flags that affect the location of
+the returned memory, like GFP_DMA).
+
+void *
+dma_zalloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+
+Wraps dma_alloc_coherent() and also zeroes the returned memory if the
+allocation attempt succeeded.
+
+void
+dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+
+Free a region of consistent memory you previously allocated. dev,
+size and dma_handle must all be the same as those passed into
+dma_alloc_coherent(). cpu_addr must be the virtual address returned by
+the dma_alloc_coherent().
+
+Note that unlike their sibling allocation calls, these routines
+may only be called with IRQs enabled.
+
+
+Part Ib - Using small DMA-coherent buffers
+------------------------------------------
+
+To get this part of the dma_ API, you must #include <linux/dmapool.h>
+
+Many drivers need lots of small DMA-coherent memory regions for DMA
+descriptors or I/O buffers. Rather than allocating in units of a page
+or more using dma_alloc_coherent(), you can use DMA pools. These work
+much like a struct kmem_cache, except that they use the DMA-coherent allocator,
+not __get_free_pages(). Also, they understand common hardware constraints
+for alignment, like queue heads needing to be aligned on N-byte boundaries.
+
+
+ struct dma_pool *
+ dma_pool_create(const char *name, struct device *dev,
+ size_t size, size_t align, size_t alloc);
+
+dma_pool_create() initializes a pool of DMA-coherent buffers
+for use with a given device. It must be called in a context which
+can sleep.
+
+The "name" is for diagnostics (like a struct kmem_cache name); dev and size
+are like what you'd pass to dma_alloc_coherent(). The device's hardware
+alignment requirement for this type of data is "align" (which is expressed
+in bytes, and must be a power of two). If your device has no boundary
+crossing restrictions, pass 0 for alloc; passing 4096 says memory allocated
+from this pool must not cross 4KByte boundaries.
+
+
+ void *dma_pool_alloc(struct dma_pool *pool, gfp_t gfp_flags,
+ dma_addr_t *dma_handle);
+
+This allocates memory from the pool; the returned memory will meet the
+size and alignment requirements specified at creation time. Pass
+GFP_ATOMIC to prevent blocking, or if it's permitted (not
+in_interrupt, not holding SMP locks), pass GFP_KERNEL to allow
+blocking. Like dma_alloc_coherent(), this returns two values: an
+address usable by the CPU, and the DMA address usable by the pool's
+device.
+
+
+ void dma_pool_free(struct dma_pool *pool, void *vaddr,
+ dma_addr_t addr);
+
+This puts memory back into the pool. The pool is what was passed to
+dma_pool_alloc(); the CPU (vaddr) and DMA addresses are what
+were returned when that routine allocated the memory being freed.
+
+
+ void dma_pool_destroy(struct dma_pool *pool);
+
+dma_pool_destroy() frees the resources of the pool. It must be
+called in a context which can sleep. Make sure you've freed all allocated
+memory back to the pool before you destroy it.
+
+
+Part Ic - DMA addressing limitations
+------------------------------------
+
+int
+dma_supported(struct device *dev, u64 mask)
+
+Checks to see if the device can support DMA to the memory described by
+mask.
+
+Returns: 1 if it can and 0 if it can't.
+
+Notes: This routine merely tests to see if the mask is possible. It
+won't change the current mask settings. It is more intended as an
+internal API for use by the platform than an external API for use by
+driver writers.
+
+int
+dma_set_mask_and_coherent(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+streaming and coherent DMA mask parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+int
+dma_set_mask(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+int
+dma_set_coherent_mask(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+u64
+dma_get_required_mask(struct device *dev)
+
+This API returns the mask that the platform requires to
+operate efficiently. Usually this means the returned mask
+is the minimum required to cover all of memory. Examining the
+required mask gives drivers with variable descriptor sizes the
+opportunity to use smaller descriptors as necessary.
+
+Requesting the required mask does not alter the current mask. If you
+wish to take advantage of it, you should issue a dma_set_mask()
+call to set the mask to the value returned.
+
+
+Part Id - Streaming DMA mappings
+--------------------------------
+
+dma_addr_t
+dma_map_single(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction direction)
+
+Maps a piece of processor virtual memory so it can be accessed by the
+device and returns the DMA address of the memory.
+
+The direction for both APIs may be converted freely by casting.
+However the dma_ API uses a strongly typed enumerator for its
+direction:
+
+DMA_NONE no direction (used for debugging)
+DMA_TO_DEVICE data is going from the memory to the device
+DMA_FROM_DEVICE data is coming from the device to the memory
+DMA_BIDIRECTIONAL direction isn't known
+
+Notes: Not all memory regions in a machine can be mapped by this API.
+Further, contiguous kernel virtual space may not be contiguous as
+physical memory. Since this API does not provide any scatter/gather
+capability, it will fail if the user tries to map a non-physically
+contiguous piece of memory. For this reason, memory to be mapped by
+this API should be obtained from sources which guarantee it to be
+physically contiguous (like kmalloc).
+
+Further, the DMA address of the memory must be within the
+dma_mask of the device (the dma_mask is a bit mask of the
+addressable region for the device, i.e., if the DMA address of
+the memory ANDed with the dma_mask is still equal to the DMA
+address, then the device can perform DMA to the memory). To
+ensure that the memory allocated by kmalloc is within the dma_mask,
+the driver may specify various platform-dependent flags to restrict
+the DMA address range of the allocation (e.g., on x86, GFP_DMA
+guarantees to be within the first 16MB of available DMA addresses,
+as required by ISA devices).
+
+Note also that the above constraints on physical contiguity and
+dma_mask may not apply if the platform has an IOMMU (a device which
+maps an I/O DMA address to a physical memory address). However, to be
+portable, device driver writers may *not* assume that such an IOMMU
+exists.
+
+Warnings: Memory coherency operates at a granularity called the cache
+line width. In order for memory mapped by this API to operate
+correctly, the mapped region must begin exactly on a cache line
+boundary and end exactly on one (to prevent two separately mapped
+regions from sharing a single cache line). Since the cache line size
+may not be known at compile time, the API will not enforce this
+requirement. Therefore, it is recommended that driver writers who
+don't take special care to determine the cache line size at run time
+only map virtual regions that begin and end on page boundaries (which
+are guaranteed also to be cache line boundaries).
+
+DMA_TO_DEVICE synchronisation must be done after the last modification
+of the memory region by the software and before it is handed off to
+the driver. Once this primitive is used, memory covered by this
+primitive should be treated as read-only by the device. If the device
+may write to it at any point, it should be DMA_BIDIRECTIONAL (see
+below).
+
+DMA_FROM_DEVICE synchronisation must be done before the driver
+accesses data that may be changed by the device. This memory should
+be treated as read-only by the driver. If the driver needs to write
+to it at any point, it should be DMA_BIDIRECTIONAL (see below).
+
+DMA_BIDIRECTIONAL requires special handling: it means that the driver
+isn't sure if the memory was modified before being handed off to the
+device and also isn't sure if the device will also modify it. Thus,
+you must always sync bidirectional memory twice: once before the
+memory is handed off to the device (to make sure all memory changes
+are flushed from the processor) and once before the data may be
+accessed after being used by the device (to make sure any processor
+cache lines are updated with data that the device may have changed).
+
+void
+dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction direction)
+
+Unmaps the region previously mapped. All the parameters passed in
+must be identical to those passed in (and returned) by the mapping
+API.
+
+dma_addr_t
+dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction direction)
+void
+dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
+ enum dma_data_direction direction)
+
+API for mapping and unmapping for pages. All the notes and warnings
+for the other mapping APIs apply here. Also, although the <offset>
+and <size> parameters are provided to do partial page mapping, it is
+recommended that you never use these unless you really know what the
+cache width is.
+
+int
+dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+
+In some circumstances dma_map_single() and dma_map_page() will fail to create
+a mapping. A driver can check for these errors by testing the returned
+DMA address with dma_mapping_error(). A non-zero return value means the mapping
+could not be created and the driver should take appropriate action (e.g.
+reduce current DMA mapping usage or delay and try again later).
+
+ int
+ dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction direction)
+
+Returns: the number of DMA address segments mapped (this may be shorter
+than <nents> passed in if some elements of the scatter/gather list are
+physically or virtually adjacent and an IOMMU maps them with a single
+entry).
+
+Please note that the sg cannot be mapped again if it has been mapped once.
+The mapping process is allowed to destroy information in the sg.
+
+As with the other mapping interfaces, dma_map_sg() can fail. When it
+does, 0 is returned and a driver must take appropriate action. It is
+critical that the driver do something, in the case of a block driver
+aborting the request or even oopsing is better than doing nothing and
+corrupting the filesystem.
+
+With scatterlists, you use the resulting mapping like this:
+
+ int i, count = dma_map_sg(dev, sglist, nents, direction);
+ struct scatterlist *sg;
+
+ for_each_sg(sglist, sg, count, i) {
+ hw_address[i] = sg_dma_address(sg);
+ hw_len[i] = sg_dma_len(sg);
+ }
+
+where nents is the number of entries in the sglist.
+
+The implementation is free to merge several consecutive sglist entries
+into one (e.g. with an IOMMU, or if several pages just happen to be
+physically contiguous) and returns the actual number of sg entries it
+mapped them to. On failure 0, is returned.
+
+Then you should loop count times (note: this can be less than nents times)
+and use sg_dma_address() and sg_dma_len() macros where you previously
+accessed sg->address and sg->length as shown above.
+
+ void
+ dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nhwentries, enum dma_data_direction direction)
+
+Unmap the previously mapped scatter/gather list. All the parameters
+must be the same as those and passed in to the scatter/gather mapping
+API.
+
+Note: <nents> must be the number you passed in, *not* the number of
+DMA address entries returned.
+
+void
+dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+void
+dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size,
+ enum dma_data_direction direction)
+void
+dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction direction)
+void
+dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems,
+ enum dma_data_direction direction)
+
+Synchronise a single contiguous or scatter/gather mapping for the CPU
+and device. With the sync_sg API, all the parameters must be the same
+as those passed into the single mapping API. With the sync_single API,
+you can use dma_handle and size parameters that aren't identical to
+those passed into the single mapping API to do a partial sync.
+
+Notes: You must do this:
+
+- Before reading values that have been written by DMA from the device
+ (use the DMA_FROM_DEVICE direction)
+- After writing values that will be written to the device using DMA
+ (use the DMA_TO_DEVICE) direction
+- before *and* after handing memory to the device if the memory is
+ DMA_BIDIRECTIONAL
+
+See also dma_map_single().
+
+dma_addr_t
+dma_map_single_attrs(struct device *dev, void *cpu_addr, size_t size,
+ enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+
+void
+dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+
+int
+dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+
+void
+dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+
+The four functions above are just like the counterpart functions
+without the _attrs suffixes, except that they pass an optional
+struct dma_attrs*.
+
+struct dma_attrs encapsulates a set of "DMA attributes". For the
+definition of struct dma_attrs see linux/dma-attrs.h.
+
+The interpretation of DMA attributes is architecture-specific, and
+each attribute should be documented in Documentation/DMA-attributes.txt.
+
+If struct dma_attrs* is NULL, the semantics of each of these
+functions is identical to those of the corresponding function
+without the _attrs suffix. As a result dma_map_single_attrs()
+can generally replace dma_map_single(), etc.
+
+As an example of the use of the *_attrs functions, here's how
+you could pass an attribute DMA_ATTR_FOO when mapping memory
+for DMA:
+
+#include <linux/dma-attrs.h>
+/* DMA_ATTR_FOO should be defined in linux/dma-attrs.h and
+ * documented in Documentation/DMA-attributes.txt */
+...
+
+ DEFINE_DMA_ATTRS(attrs);
+ dma_set_attr(DMA_ATTR_FOO, &attrs);
+ ....
+ n = dma_map_sg_attrs(dev, sg, nents, DMA_TO_DEVICE, &attr);
+ ....
+
+Architectures that care about DMA_ATTR_FOO would check for its
+presence in their implementations of the mapping and unmapping
+routines, e.g.:
+
+void whizco_dma_map_sg_attrs(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ struct dma_attrs *attrs)
+{
+ ....
+ int foo = dma_get_attr(DMA_ATTR_FOO, attrs);
+ ....
+ if (foo)
+ /* twizzle the frobnozzle */
+ ....
+
+
+Part II - Advanced dma_ usage
+-----------------------------
+
+Warning: These pieces of the DMA API should not be used in the
+majority of cases, since they cater for unlikely corner cases that
+don't belong in usual drivers.
+
+If you don't understand how cache line coherency works between a
+processor and an I/O device, you should not be using this part of the
+API at all.
+
+void *
+dma_alloc_noncoherent(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flag)
+
+Identical to dma_alloc_coherent() except that the platform will
+choose to return either consistent or non-consistent memory as it sees
+fit. By using this API, you are guaranteeing to the platform that you
+have all the correct and necessary sync points for this memory in the
+driver should it choose to return non-consistent memory.
+
+Note: where the platform can return consistent memory, it will
+guarantee that the sync points become nops.
+
+Warning: Handling non-consistent memory is a real pain. You should
+only use this API if you positively know your driver will be
+required to work on one of the rare (usually non-PCI) architectures
+that simply cannot make consistent memory.
+
+void
+dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle)
+
+Free memory allocated by the nonconsistent API. All parameters must
+be identical to those passed in (and returned by
+dma_alloc_noncoherent()).
+
+int
+dma_get_cache_alignment(void)
+
+Returns the processor cache alignment. This is the absolute minimum
+alignment *and* width that you must observe when either mapping
+memory or doing partial flushes.
+
+Notes: This API may return a number *larger* than the actual cache
+line, but it will guarantee that one or more cache lines fit exactly
+into the width returned by this call. It will also always be a power
+of two for easy alignment.
+
+void
+dma_cache_sync(struct device *dev, void *vaddr, size_t size,
+ enum dma_data_direction direction)
+
+Do a partial sync of memory that was allocated by
+dma_alloc_noncoherent(), starting at virtual address vaddr and
+continuing on for size. Again, you *must* observe the cache line
+boundaries when doing this.
+
+int
+dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+ dma_addr_t device_addr, size_t size, int
+ flags)
+
+Declare region of memory to be handed out by dma_alloc_coherent() when
+it's asked for coherent memory for this device.
+
+phys_addr is the CPU physical address to which the memory is currently
+assigned (this will be ioremapped so the CPU can access the region).
+
+device_addr is the DMA address the device needs to be programmed
+with to actually address this memory (this will be handed out as the
+dma_addr_t in dma_alloc_coherent()).
+
+size is the size of the area (must be multiples of PAGE_SIZE).
+
+flags can be ORed together and are:
+
+DMA_MEMORY_MAP - request that the memory returned from
+dma_alloc_coherent() be directly writable.
+
+DMA_MEMORY_IO - request that the memory returned from
+dma_alloc_coherent() be addressable using read()/write()/memcpy_toio() etc.
+
+One or both of these flags must be present.
+
+DMA_MEMORY_INCLUDES_CHILDREN - make the declared memory be allocated by
+dma_alloc_coherent of any child devices of this one (for memory residing
+on a bridge).
+
+DMA_MEMORY_EXCLUSIVE - only allocate memory from the declared regions.
+Do not allow dma_alloc_coherent() to fall back to system memory when
+it's out of memory in the declared region.
+
+The return value will be either DMA_MEMORY_MAP or DMA_MEMORY_IO and
+must correspond to a passed in flag (i.e. no returning DMA_MEMORY_IO
+if only DMA_MEMORY_MAP were passed in) for success or zero for
+failure.
+
+Note, for DMA_MEMORY_IO returns, all subsequent memory returned by
+dma_alloc_coherent() may no longer be accessed directly, but instead
+must be accessed using the correct bus functions. If your driver
+isn't prepared to handle this contingency, it should not specify
+DMA_MEMORY_IO in the input flags.
+
+As a simplification for the platforms, only *one* such region of
+memory may be declared per device.
+
+For reasons of efficiency, most platforms choose to track the declared
+region only at the granularity of a page. For smaller allocations,
+you should use the dma_pool() API.
+
+void
+dma_release_declared_memory(struct device *dev)
+
+Remove the memory region previously declared from the system. This
+API performs *no* in-use checking for this region and will return
+unconditionally having removed all the required structures. It is the
+driver's job to ensure that no parts of this memory region are
+currently in use.
+
+void *
+dma_mark_declared_memory_occupied(struct device *dev,
+ dma_addr_t device_addr, size_t size)
+
+This is used to occupy specific regions of the declared space
+(dma_alloc_coherent() will hand out the first free region it finds).
+
+device_addr is the *device* address of the region requested.
+
+size is the size (and should be a page-sized multiple).
+
+The return value will be either a pointer to the processor virtual
+address of the memory, or an error (via PTR_ERR()) if any part of the
+region is occupied.
+
+Part III - Debug drivers use of the DMA-API
+-------------------------------------------
+
+The DMA-API as described above has some constraints. DMA addresses must be
+released with the corresponding function with the same size for example. With
+the advent of hardware IOMMUs it becomes more and more important that drivers
+do not violate those constraints. In the worst case such a violation can
+result in data corruption up to destroyed filesystems.
+
+To debug drivers and find bugs in the usage of the DMA-API checking code can
+be compiled into the kernel which will tell the developer about those
+violations. If your architecture supports it you can select the "Enable
+debugging of DMA-API usage" option in your kernel configuration. Enabling this
+option has a performance impact. Do not enable it in production kernels.
+
+If you boot the resulting kernel will contain code which does some bookkeeping
+about what DMA memory was allocated for which device. If this code detects an
+error it prints a warning message with some details into your kernel log. An
+example warning message may look like this:
+
+------------[ cut here ]------------
+WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
+ check_unmap+0x203/0x490()
+Hardware name:
+forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
+ function [device address=0x00000000640444be] [size=66 bytes] [mapped as
+single] [unmapped as page]
+Modules linked in: nfsd exportfs bridge stp llc r8169
+Pid: 0, comm: swapper Tainted: G W 2.6.28-dmatest-09289-g8bb99c0 #1
+Call Trace:
+ <IRQ> [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
+ [<ffffffff80647b70>] _spin_unlock+0x10/0x30
+ [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
+ [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
+ [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
+ [<ffffffff80252f96>] queue_work+0x56/0x60
+ [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
+ [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
+ [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
+ [<ffffffff80235177>] find_busiest_group+0x207/0x8a0
+ [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
+ [<ffffffff803c7ea3>] check_unmap+0x203/0x490
+ [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
+ [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
+ [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
+ [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
+ [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
+ [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
+ [<ffffffff8020c093>] ret_from_intr+0x0/0xa
+ <EOI> <4>---[ end trace f6435a98e2a38c0e ]---
+
+The driver developer can find the driver and the device including a stacktrace
+of the DMA-API call which caused this warning.
+
+Per default only the first error will result in a warning message. All other
+errors will only silently counted. This limitation exist to prevent the code
+from flooding your kernel log. To support debugging a device driver this can
+be disabled via debugfs. See the debugfs interface documentation below for
+details.
+
+The debugfs directory for the DMA-API debugging code is called dma-api/. In
+this directory the following files can currently be found:
+
+ dma-api/all_errors This file contains a numeric value. If this
+ value is not equal to zero the debugging code
+ will print a warning for every error it finds
+ into the kernel log. Be careful with this
+ option, as it can easily flood your logs.
+
+ dma-api/disabled This read-only file contains the character 'Y'
+ if the debugging code is disabled. This can
+ happen when it runs out of memory or if it was
+ disabled at boot time
+
+ dma-api/error_count This file is read-only and shows the total
+ numbers of errors found.
+
+ dma-api/num_errors The number in this file shows how many
+ warnings will be printed to the kernel log
+ before it stops. This number is initialized to
+ one at system boot and be set by writing into
+ this file
+
+ dma-api/min_free_entries
+ This read-only file can be read to get the
+ minimum number of free dma_debug_entries the
+ allocator has ever seen. If this value goes
+ down to zero the code will disable itself
+ because it is not longer reliable.
+
+ dma-api/num_free_entries
+ The current number of free dma_debug_entries
+ in the allocator.
+
+ dma-api/driver-filter
+ You can write a name of a driver into this file
+ to limit the debug output to requests from that
+ particular driver. Write an empty string to
+ that file to disable the filter and see
+ all errors again.
+
+If you have this code compiled into your kernel it will be enabled by default.
+If you want to boot without the bookkeeping anyway you can provide
+'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
+Notice that you can not enable it again at runtime. You have to reboot to do
+so.
+
+If you want to see debug messages only for a special device driver you can
+specify the dma_debug_driver=<drivername> parameter. This will enable the
+driver filter at boot time. The debug code will only print errors for that
+driver afterwards. This filter can be disabled or changed later using debugfs.
+
+When the code disables itself at runtime this is most likely because it ran
+out of dma_debug_entries. These entries are preallocated at boot. The number
+of preallocated entries is defined per architecture. If it is too low for you
+boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
+architectural default.
+
+void debug_dmap_mapping_error(struct device *dev, dma_addr_t dma_addr);
+
+dma-debug interface debug_dma_mapping_error() to debug drivers that fail
+to check DMA mapping errors on addresses returned by dma_map_single() and
+dma_map_page() interfaces. This interface clears a flag set by
+debug_dma_map_page() to indicate that dma_mapping_error() has been called by
+the driver. When driver does unmap, debug_dma_unmap() checks the flag and if
+this flag is still set, prints warning message that includes call trace that
+leads up to the unmap. This interface can be called from dma_mapping_error()
+routines to enable DMA mapping error check debugging.
+
diff --git a/Documentation/DMA-ISA-LPC.txt b/Documentation/DMA-ISA-LPC.txt
new file mode 100644
index 000000000..b1a19835e
--- /dev/null
+++ b/Documentation/DMA-ISA-LPC.txt
@@ -0,0 +1,151 @@
+ DMA with ISA and LPC devices
+ ============================
+
+ Pierre Ossman <drzeus@drzeus.cx>
+
+This document describes how to do DMA transfers using the old ISA DMA
+controller. Even though ISA is more or less dead today the LPC bus
+uses the same DMA system so it will be around for quite some time.
+
+Part I - Headers and dependencies
+---------------------------------
+
+To do ISA style DMA you need to include two headers:
+
+#include <linux/dma-mapping.h>
+#include <asm/dma.h>
+
+The first is the generic DMA API used to convert virtual addresses to
+bus addresses (see Documentation/DMA-API.txt for details).
+
+The second contains the routines specific to ISA DMA transfers. Since
+this is not present on all platforms make sure you construct your
+Kconfig to be dependent on ISA_DMA_API (not ISA) so that nobody tries
+to build your driver on unsupported platforms.
+
+Part II - Buffer allocation
+---------------------------
+
+The ISA DMA controller has some very strict requirements on which
+memory it can access so extra care must be taken when allocating
+buffers.
+
+(You usually need a special buffer for DMA transfers instead of
+transferring directly to and from your normal data structures.)
+
+The DMA-able address space is the lowest 16 MB of _physical_ memory.
+Also the transfer block may not cross page boundaries (which are 64
+or 128 KiB depending on which channel you use).
+
+In order to allocate a piece of memory that satisfies all these
+requirements you pass the flag GFP_DMA to kmalloc.
+
+Unfortunately the memory available for ISA DMA is scarce so unless you
+allocate the memory during boot-up it's a good idea to also pass
+__GFP_REPEAT and __GFP_NOWARN to make the allocater try a bit harder.
+
+(This scarcity also means that you should allocate the buffer as
+early as possible and not release it until the driver is unloaded.)
+
+Part III - Address translation
+------------------------------
+
+To translate the virtual address to a bus address, use the normal DMA
+API. Do _not_ use isa_virt_to_phys() even though it does the same
+thing. The reason for this is that the function isa_virt_to_phys()
+will require a Kconfig dependency to ISA, not just ISA_DMA_API which
+is really all you need. Remember that even though the DMA controller
+has its origins in ISA it is used elsewhere.
+
+Note: x86_64 had a broken DMA API when it came to ISA but has since
+been fixed. If your arch has problems then fix the DMA API instead of
+reverting to the ISA functions.
+
+Part IV - Channels
+------------------
+
+A normal ISA DMA controller has 8 channels. The lower four are for
+8-bit transfers and the upper four are for 16-bit transfers.
+
+(Actually the DMA controller is really two separate controllers where
+channel 4 is used to give DMA access for the second controller (0-3).
+This means that of the four 16-bits channels only three are usable.)
+
+You allocate these in a similar fashion as all basic resources:
+
+extern int request_dma(unsigned int dmanr, const char * device_id);
+extern void free_dma(unsigned int dmanr);
+
+The ability to use 16-bit or 8-bit transfers is _not_ up to you as a
+driver author but depends on what the hardware supports. Check your
+specs or test different channels.
+
+Part V - Transfer data
+----------------------
+
+Now for the good stuff, the actual DMA transfer. :)
+
+Before you use any ISA DMA routines you need to claim the DMA lock
+using claim_dma_lock(). The reason is that some DMA operations are
+not atomic so only one driver may fiddle with the registers at a
+time.
+
+The first time you use the DMA controller you should call
+clear_dma_ff(). This clears an internal register in the DMA
+controller that is used for the non-atomic operations. As long as you
+(and everyone else) uses the locking functions then you only need to
+reset this once.
+
+Next, you tell the controller in which direction you intend to do the
+transfer using set_dma_mode(). Currently you have the options
+DMA_MODE_READ and DMA_MODE_WRITE.
+
+Set the address from where the transfer should start (this needs to
+be 16-bit aligned for 16-bit transfers) and how many bytes to
+transfer. Note that it's _bytes_. The DMA routines will do all the
+required translation to values that the DMA controller understands.
+
+The final step is enabling the DMA channel and releasing the DMA
+lock.
+
+Once the DMA transfer is finished (or timed out) you should disable
+the channel again. You should also check get_dma_residue() to make
+sure that all data has been transferred.
+
+Example:
+
+int flags, residue;
+
+flags = claim_dma_lock();
+
+clear_dma_ff();
+
+set_dma_mode(channel, DMA_MODE_WRITE);
+set_dma_addr(channel, phys_addr);
+set_dma_count(channel, num_bytes);
+
+dma_enable(channel);
+
+release_dma_lock(flags);
+
+while (!device_done());
+
+flags = claim_dma_lock();
+
+dma_disable(channel);
+
+residue = dma_get_residue(channel);
+if (residue != 0)
+ printk(KERN_ERR "driver: Incomplete DMA transfer!"
+ " %d bytes left!\n", residue);
+
+release_dma_lock(flags);
+
+Part VI - Suspend/resume
+------------------------
+
+It is the driver's responsibility to make sure that the machine isn't
+suspended while a DMA transfer is in progress. Also, all DMA settings
+are lost when the system suspends so if your driver relies on the DMA
+controller being in a certain state then you have to restore these
+registers upon resume.
diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
new file mode 100644
index 000000000..18dc52c4f
--- /dev/null
+++ b/Documentation/DMA-attributes.txt
@@ -0,0 +1,102 @@
+ DMA attributes
+ ==============
+
+This document describes the semantics of the DMA attributes that are
+defined in linux/dma-attrs.h.
+
+DMA_ATTR_WRITE_BARRIER
+----------------------
+
+DMA_ATTR_WRITE_BARRIER is a (write) barrier attribute for DMA. DMA
+to a memory region with the DMA_ATTR_WRITE_BARRIER attribute forces
+all pending DMA writes to complete, and thus provides a mechanism to
+strictly order DMA from a device across all intervening busses and
+bridges. This barrier is not specific to a particular type of
+interconnect, it applies to the system as a whole, and so its
+implementation must account for the idiosyncrasies of the system all
+the way from the DMA device to memory.
+
+As an example of a situation where DMA_ATTR_WRITE_BARRIER would be
+useful, suppose that a device does a DMA write to indicate that data is
+ready and available in memory. The DMA of the "completion indication"
+could race with data DMA. Mapping the memory used for completion
+indications with DMA_ATTR_WRITE_BARRIER would prevent the race.
+
+DMA_ATTR_WEAK_ORDERING
+----------------------
+
+DMA_ATTR_WEAK_ORDERING specifies that reads and writes to the mapping
+may be weakly ordered, that is that reads and writes may pass each other.
+
+Since it is optional for platforms to implement DMA_ATTR_WEAK_ORDERING,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_WRITE_COMBINE
+----------------------
+
+DMA_ATTR_WRITE_COMBINE specifies that writes to the mapping may be
+buffered to improve performance.
+
+Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE,
+those that do not will simply ignore the attribute and exhibit default
+behavior.
+
+DMA_ATTR_NON_CONSISTENT
+-----------------------
+
+DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either
+consistent or non-consistent memory as it sees fit. By using this API,
+you are guaranteeing to the platform that you have all the correct and
+necessary sync points for this memory in the driver.
+
+DMA_ATTR_NO_KERNEL_MAPPING
+--------------------------
+
+DMA_ATTR_NO_KERNEL_MAPPING lets the platform to avoid creating a kernel
+virtual mapping for the allocated buffer. On some architectures creating
+such mapping is non-trivial task and consumes very limited resources
+(like kernel virtual address space or dma consistent address space).
+Buffers allocated with this attribute can be only passed to user space
+by calling dma_mmap_attrs(). By using this API, you are guaranteeing
+that you won't dereference the pointer returned by dma_alloc_attr(). You
+can treat it as a cookie that must be passed to dma_mmap_attrs() and
+dma_free_attrs(). Make sure that both of these also get this attribute
+set on each call.
+
+Since it is optional for platforms to implement
+DMA_ATTR_NO_KERNEL_MAPPING, those that do not will simply ignore the
+attribute and exhibit default behavior.
+
+DMA_ATTR_SKIP_CPU_SYNC
+----------------------
+
+By default dma_map_{single,page,sg} functions family transfer a given
+buffer from CPU domain to device domain. Some advanced use cases might
+require sharing a buffer between more than one device. This requires
+having a mapping created separately for each device and is usually
+performed by calling dma_map_{single,page,sg} function more than once
+for the given buffer with device pointer to each device taking part in
+the buffer sharing. The first call transfers a buffer from 'CPU' domain
+to 'device' domain, what synchronizes CPU caches for the given region
+(usually it means that the cache has been flushed or invalidated
+depending on the dma direction). However, next calls to
+dma_map_{single,page,sg}() for other devices will perform exactly the
+same synchronization operation on the CPU cache. CPU cache synchronization
+might be a time consuming operation, especially if the buffers are
+large, so it is highly recommended to avoid it if possible.
+DMA_ATTR_SKIP_CPU_SYNC allows platform code to skip synchronization of
+the CPU cache for the given buffer assuming that it has been already
+transferred to 'device' domain. This attribute can be also used for
+dma_unmap_{single,page,sg} functions family to force buffer to stay in
+device domain after releasing a mapping for it. Use this attribute with
+care!
+
+DMA_ATTR_FORCE_CONTIGUOUS
+-------------------------
+
+By default DMA-mapping subsystem is allowed to assemble the buffer
+allocated by dma_alloc_attrs() function from individual pages if it can
+be mapped as contiguous chunk into device dma address space. By
+specifying this attribute the allocated buffer is forced to be contiguous
+also in physical memory.
diff --git a/Documentation/DocBook/.gitignore b/Documentation/DocBook/.gitignore
new file mode 100644
index 000000000..7ebd5465d
--- /dev/null
+++ b/Documentation/DocBook/.gitignore
@@ -0,0 +1,15 @@
+*.xml
+*.ps
+*.pdf
+*.html
+*.9.gz
+*.9
+*.aux
+*.dvi
+*.log
+*.out
+*.png
+*.gif
+*.svg
+media-indices.tmpl
+media-entities.tmpl
diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl
new file mode 100644
index 000000000..aac9357d4
--- /dev/null
+++ b/Documentation/DocBook/80211.tmpl
@@ -0,0 +1,582 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE set PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+<set>
+ <setinfo>
+ <title>The 802.11 subsystems &ndash; for kernel developers</title>
+ <subtitle>
+ Explaining wireless 802.11 networking in the Linux kernel
+ </subtitle>
+
+ <copyright>
+ <year>2007-2009</year>
+ <holder>Johannes Berg</holder>
+ </copyright>
+
+ <authorgroup>
+ <author>
+ <firstname>Johannes</firstname>
+ <surname>Berg</surname>
+ <affiliation>
+ <address><email>johannes@sipsolutions.net</email></address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+ <para>
+ This documentation is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this documentation; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+
+ <abstract>
+ <para>
+ These books attempt to give a description of the
+ various subsystems that play a role in 802.11 wireless
+ networking in Linux. Since these books are for kernel
+ developers they attempts to document the structures
+ and functions used in the kernel as well as giving a
+ higher-level overview.
+ </para>
+ <para>
+ The reader is expected to be familiar with the 802.11
+ standard as published by the IEEE in 802.11-2007 (or
+ possibly later versions). References to this standard
+ will be given as "802.11-2007 8.1.5".
+ </para>
+ </abstract>
+ </setinfo>
+ <book id="cfg80211-developers-guide">
+ <bookinfo>
+ <title>The cfg80211 subsystem</title>
+
+ <abstract>
+!Pinclude/net/cfg80211.h Introduction
+ </abstract>
+ </bookinfo>
+ <chapter>
+ <title>Device registration</title>
+!Pinclude/net/cfg80211.h Device registration
+!Finclude/net/cfg80211.h ieee80211_band
+!Finclude/net/cfg80211.h ieee80211_channel_flags
+!Finclude/net/cfg80211.h ieee80211_channel
+!Finclude/net/cfg80211.h ieee80211_rate_flags
+!Finclude/net/cfg80211.h ieee80211_rate
+!Finclude/net/cfg80211.h ieee80211_sta_ht_cap
+!Finclude/net/cfg80211.h ieee80211_supported_band
+!Finclude/net/cfg80211.h cfg80211_signal_type
+!Finclude/net/cfg80211.h wiphy_params_flags
+!Finclude/net/cfg80211.h wiphy_flags
+!Finclude/net/cfg80211.h wiphy
+!Finclude/net/cfg80211.h wireless_dev
+!Finclude/net/cfg80211.h wiphy_new
+!Finclude/net/cfg80211.h wiphy_register
+!Finclude/net/cfg80211.h wiphy_unregister
+!Finclude/net/cfg80211.h wiphy_free
+
+!Finclude/net/cfg80211.h wiphy_name
+!Finclude/net/cfg80211.h wiphy_dev
+!Finclude/net/cfg80211.h wiphy_priv
+!Finclude/net/cfg80211.h priv_to_wiphy
+!Finclude/net/cfg80211.h set_wiphy_dev
+!Finclude/net/cfg80211.h wdev_priv
+!Finclude/net/cfg80211.h ieee80211_iface_limit
+!Finclude/net/cfg80211.h ieee80211_iface_combination
+!Finclude/net/cfg80211.h cfg80211_check_combinations
+ </chapter>
+ <chapter>
+ <title>Actions and configuration</title>
+!Pinclude/net/cfg80211.h Actions and configuration
+!Finclude/net/cfg80211.h cfg80211_ops
+!Finclude/net/cfg80211.h vif_params
+!Finclude/net/cfg80211.h key_params
+!Finclude/net/cfg80211.h survey_info_flags
+!Finclude/net/cfg80211.h survey_info
+!Finclude/net/cfg80211.h cfg80211_beacon_data
+!Finclude/net/cfg80211.h cfg80211_ap_settings
+!Finclude/net/cfg80211.h station_parameters
+!Finclude/net/cfg80211.h rate_info_flags
+!Finclude/net/cfg80211.h rate_info
+!Finclude/net/cfg80211.h station_info
+!Finclude/net/cfg80211.h monitor_flags
+!Finclude/net/cfg80211.h mpath_info_flags
+!Finclude/net/cfg80211.h mpath_info
+!Finclude/net/cfg80211.h bss_parameters
+!Finclude/net/cfg80211.h ieee80211_txq_params
+!Finclude/net/cfg80211.h cfg80211_crypto_settings
+!Finclude/net/cfg80211.h cfg80211_auth_request
+!Finclude/net/cfg80211.h cfg80211_assoc_request
+!Finclude/net/cfg80211.h cfg80211_deauth_request
+!Finclude/net/cfg80211.h cfg80211_disassoc_request
+!Finclude/net/cfg80211.h cfg80211_ibss_params
+!Finclude/net/cfg80211.h cfg80211_connect_params
+!Finclude/net/cfg80211.h cfg80211_pmksa
+!Finclude/net/cfg80211.h cfg80211_rx_mlme_mgmt
+!Finclude/net/cfg80211.h cfg80211_auth_timeout
+!Finclude/net/cfg80211.h cfg80211_rx_assoc_resp
+!Finclude/net/cfg80211.h cfg80211_assoc_timeout
+!Finclude/net/cfg80211.h cfg80211_tx_mlme_mgmt
+!Finclude/net/cfg80211.h cfg80211_ibss_joined
+!Finclude/net/cfg80211.h cfg80211_connect_result
+!Finclude/net/cfg80211.h cfg80211_roamed
+!Finclude/net/cfg80211.h cfg80211_disconnected
+!Finclude/net/cfg80211.h cfg80211_ready_on_channel
+!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired
+!Finclude/net/cfg80211.h cfg80211_new_sta
+!Finclude/net/cfg80211.h cfg80211_rx_mgmt
+!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status
+!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify
+!Finclude/net/cfg80211.h cfg80211_cqm_pktloss_notify
+!Finclude/net/cfg80211.h cfg80211_michael_mic_failure
+ </chapter>
+ <chapter>
+ <title>Scanning and BSS list handling</title>
+!Pinclude/net/cfg80211.h Scanning and BSS list handling
+!Finclude/net/cfg80211.h cfg80211_ssid
+!Finclude/net/cfg80211.h cfg80211_scan_request
+!Finclude/net/cfg80211.h cfg80211_scan_done
+!Finclude/net/cfg80211.h cfg80211_bss
+!Finclude/net/cfg80211.h cfg80211_inform_bss_width_frame
+!Finclude/net/cfg80211.h cfg80211_inform_bss_width
+!Finclude/net/cfg80211.h cfg80211_unlink_bss
+!Finclude/net/cfg80211.h cfg80211_find_ie
+!Finclude/net/cfg80211.h ieee80211_bss_get_ie
+ </chapter>
+ <chapter>
+ <title>Utility functions</title>
+!Pinclude/net/cfg80211.h Utility functions
+!Finclude/net/cfg80211.h ieee80211_channel_to_frequency
+!Finclude/net/cfg80211.h ieee80211_frequency_to_channel
+!Finclude/net/cfg80211.h ieee80211_get_channel
+!Finclude/net/cfg80211.h ieee80211_get_response_rate
+!Finclude/net/cfg80211.h ieee80211_hdrlen
+!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb
+!Finclude/net/cfg80211.h ieee80211_radiotap_iterator
+ </chapter>
+ <chapter>
+ <title>Data path helpers</title>
+!Pinclude/net/cfg80211.h Data path helpers
+!Finclude/net/cfg80211.h ieee80211_data_to_8023
+!Finclude/net/cfg80211.h ieee80211_data_from_8023
+!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s
+!Finclude/net/cfg80211.h cfg80211_classify8021d
+ </chapter>
+ <chapter>
+ <title>Regulatory enforcement infrastructure</title>
+!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure
+!Finclude/net/cfg80211.h regulatory_hint
+!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory
+!Finclude/net/cfg80211.h freq_reg_info
+ </chapter>
+ <chapter>
+ <title>RFkill integration</title>
+!Pinclude/net/cfg80211.h RFkill integration
+!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state
+!Finclude/net/cfg80211.h wiphy_rfkill_start_polling
+!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling
+ </chapter>
+ <chapter>
+ <title>Test mode</title>
+!Pinclude/net/cfg80211.h Test mode
+!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb
+!Finclude/net/cfg80211.h cfg80211_testmode_reply
+!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb
+!Finclude/net/cfg80211.h cfg80211_testmode_event
+ </chapter>
+ </book>
+ <book id="mac80211-developers-guide">
+ <bookinfo>
+ <title>The mac80211 subsystem</title>
+ <abstract>
+!Pinclude/net/mac80211.h Introduction
+!Pinclude/net/mac80211.h Warning
+ </abstract>
+ </bookinfo>
+
+ <toc></toc>
+
+ <!--
+ Generally, this document shall be ordered by increasing complexity.
+ It is important to note that readers should be able to read only
+ the first few sections to get a working driver and only advanced
+ usage should require reading the full document.
+ -->
+
+ <part>
+ <title>The basic mac80211 driver interface</title>
+ <partintro>
+ <para>
+ You should read and understand the information contained
+ within this part of the book while implementing a driver.
+ In some chapters, advanced usage is noted, that may be
+ skipped at first.
+ </para>
+ <para>
+ This part of the book only covers station and monitor mode
+ functionality, additional information required to implement
+ the other modes is covered in the second part of the book.
+ </para>
+ </partintro>
+
+ <chapter id="basics">
+ <title>Basic hardware handling</title>
+ <para>TBD</para>
+ <para>
+ This chapter shall contain information on getting a hw
+ struct allocated and registered with mac80211.
+ </para>
+ <para>
+ Since it is required to allocate rates/modes before registering
+ a hw struct, this chapter shall also contain information on setting
+ up the rate/mode structs.
+ </para>
+ <para>
+ Additionally, some discussion about the callbacks and
+ the general programming model should be in here, including
+ the definition of ieee80211_ops which will be referred to
+ a lot.
+ </para>
+ <para>
+ Finally, a discussion of hardware capabilities should be done
+ with references to other parts of the book.
+ </para>
+ <!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h ieee80211_hw
+!Finclude/net/mac80211.h ieee80211_hw_flags
+!Finclude/net/mac80211.h SET_IEEE80211_DEV
+!Finclude/net/mac80211.h SET_IEEE80211_PERM_ADDR
+!Finclude/net/mac80211.h ieee80211_ops
+!Finclude/net/mac80211.h ieee80211_alloc_hw
+!Finclude/net/mac80211.h ieee80211_register_hw
+!Finclude/net/mac80211.h ieee80211_unregister_hw
+!Finclude/net/mac80211.h ieee80211_free_hw
+ </chapter>
+
+ <chapter id="phy-handling">
+ <title>PHY configuration</title>
+ <para>TBD</para>
+ <para>
+ This chapter should describe PHY handling including
+ start/stop callbacks and the various structures used.
+ </para>
+!Finclude/net/mac80211.h ieee80211_conf
+!Finclude/net/mac80211.h ieee80211_conf_flags
+ </chapter>
+
+ <chapter id="iface-handling">
+ <title>Virtual interfaces</title>
+ <para>TBD</para>
+ <para>
+ This chapter should describe virtual interface basics
+ that are relevant to the driver (VLANs, MGMT etc are not.)
+ It should explain the use of the add_iface/remove_iface
+ callbacks as well as the interface configuration callbacks.
+ </para>
+ <para>Things related to AP mode should be discussed there.</para>
+ <para>
+ Things related to supporting multiple interfaces should be
+ in the appropriate chapter, a BIG FAT note should be here about
+ this though and the recommendation to allow only a single
+ interface in STA mode at first!
+ </para>
+!Finclude/net/mac80211.h ieee80211_vif
+ </chapter>
+
+ <chapter id="rx-tx">
+ <title>Receive and transmit processing</title>
+ <sect1>
+ <title>what should be here</title>
+ <para>TBD</para>
+ <para>
+ This should describe the receive and transmit
+ paths in mac80211/the drivers as well as
+ transmit status handling.
+ </para>
+ </sect1>
+ <sect1>
+ <title>Frame format</title>
+!Pinclude/net/mac80211.h Frame format
+ </sect1>
+ <sect1>
+ <title>Packet alignment</title>
+!Pnet/mac80211/rx.c Packet alignment
+ </sect1>
+ <sect1>
+ <title>Calling into mac80211 from interrupts</title>
+!Pinclude/net/mac80211.h Calling mac80211 from interrupts
+ </sect1>
+ <sect1>
+ <title>functions/definitions</title>
+!Finclude/net/mac80211.h ieee80211_rx_status
+!Finclude/net/mac80211.h mac80211_rx_flags
+!Finclude/net/mac80211.h mac80211_tx_info_flags
+!Finclude/net/mac80211.h mac80211_tx_control_flags
+!Finclude/net/mac80211.h mac80211_rate_control_flags
+!Finclude/net/mac80211.h ieee80211_tx_rate
+!Finclude/net/mac80211.h ieee80211_tx_info
+!Finclude/net/mac80211.h ieee80211_tx_info_clear_status
+!Finclude/net/mac80211.h ieee80211_rx
+!Finclude/net/mac80211.h ieee80211_rx_ni
+!Finclude/net/mac80211.h ieee80211_rx_irqsafe
+!Finclude/net/mac80211.h ieee80211_tx_status
+!Finclude/net/mac80211.h ieee80211_tx_status_ni
+!Finclude/net/mac80211.h ieee80211_tx_status_irqsafe
+!Finclude/net/mac80211.h ieee80211_rts_get
+!Finclude/net/mac80211.h ieee80211_rts_duration
+!Finclude/net/mac80211.h ieee80211_ctstoself_get
+!Finclude/net/mac80211.h ieee80211_ctstoself_duration
+!Finclude/net/mac80211.h ieee80211_generic_frame_duration
+!Finclude/net/mac80211.h ieee80211_wake_queue
+!Finclude/net/mac80211.h ieee80211_stop_queue
+!Finclude/net/mac80211.h ieee80211_wake_queues
+!Finclude/net/mac80211.h ieee80211_stop_queues
+!Finclude/net/mac80211.h ieee80211_queue_stopped
+ </sect1>
+ </chapter>
+
+ <chapter id="filters">
+ <title>Frame filtering</title>
+!Pinclude/net/mac80211.h Frame filtering
+!Finclude/net/mac80211.h ieee80211_filter_flags
+ </chapter>
+
+ <chapter id="workqueue">
+ <title>The mac80211 workqueue</title>
+!Pinclude/net/mac80211.h mac80211 workqueue
+!Finclude/net/mac80211.h ieee80211_queue_work
+!Finclude/net/mac80211.h ieee80211_queue_delayed_work
+ </chapter>
+ </part>
+
+ <part id="advanced">
+ <title>Advanced driver interface</title>
+ <partintro>
+ <para>
+ Information contained within this part of the book is
+ of interest only for advanced interaction of mac80211
+ with drivers to exploit more hardware capabilities and
+ improve performance.
+ </para>
+ </partintro>
+
+ <chapter id="led-support">
+ <title>LED support</title>
+ <para>
+ Mac80211 supports various ways of blinking LEDs. Wherever possible,
+ device LEDs should be exposed as LED class devices and hooked up to
+ the appropriate trigger, which will then be triggered appropriately
+ by mac80211.
+ </para>
+!Finclude/net/mac80211.h ieee80211_get_tx_led_name
+!Finclude/net/mac80211.h ieee80211_get_rx_led_name
+!Finclude/net/mac80211.h ieee80211_get_assoc_led_name
+!Finclude/net/mac80211.h ieee80211_get_radio_led_name
+!Finclude/net/mac80211.h ieee80211_tpt_blink
+!Finclude/net/mac80211.h ieee80211_tpt_led_trigger_flags
+!Finclude/net/mac80211.h ieee80211_create_tpt_led_trigger
+ </chapter>
+
+ <chapter id="hardware-crypto-offload">
+ <title>Hardware crypto acceleration</title>
+!Pinclude/net/mac80211.h Hardware crypto acceleration
+ <!-- intentionally multiple !F lines to get proper order -->
+!Finclude/net/mac80211.h set_key_cmd
+!Finclude/net/mac80211.h ieee80211_key_conf
+!Finclude/net/mac80211.h ieee80211_key_flags
+!Finclude/net/mac80211.h ieee80211_get_tkip_p1k
+!Finclude/net/mac80211.h ieee80211_get_tkip_p1k_iv
+!Finclude/net/mac80211.h ieee80211_get_tkip_p2k
+ </chapter>
+
+ <chapter id="powersave">
+ <title>Powersave support</title>
+!Pinclude/net/mac80211.h Powersave support
+ </chapter>
+
+ <chapter id="beacon-filter">
+ <title>Beacon filter support</title>
+!Pinclude/net/mac80211.h Beacon filter support
+!Finclude/net/mac80211.h ieee80211_beacon_loss
+ </chapter>
+
+ <chapter id="qos">
+ <title>Multiple queues and QoS support</title>
+ <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_tx_queue_params
+ </chapter>
+
+ <chapter id="AP">
+ <title>Access point mode support</title>
+ <para>TBD</para>
+ <para>Some parts of the if_conf should be discussed here instead</para>
+ <para>
+ Insert notes about VLAN interfaces with hw crypto here or
+ in the hw crypto chapter.
+ </para>
+ <section id="ps-client">
+ <title>support for powersaving clients</title>
+!Pinclude/net/mac80211.h AP support for powersaving clients
+!Finclude/net/mac80211.h ieee80211_get_buffered_bc
+!Finclude/net/mac80211.h ieee80211_beacon_get
+!Finclude/net/mac80211.h ieee80211_sta_eosp
+!Finclude/net/mac80211.h ieee80211_frame_release_type
+!Finclude/net/mac80211.h ieee80211_sta_ps_transition
+!Finclude/net/mac80211.h ieee80211_sta_ps_transition_ni
+!Finclude/net/mac80211.h ieee80211_sta_set_buffered
+!Finclude/net/mac80211.h ieee80211_sta_block_awake
+ </section>
+ </chapter>
+
+ <chapter id="multi-iface">
+ <title>Supporting multiple virtual interfaces</title>
+ <para>TBD</para>
+ <para>
+ Note: WDS with identical MAC address should almost always be OK
+ </para>
+ <para>
+ Insert notes about having multiple virtual interfaces with
+ different MAC addresses here, note which configurations are
+ supported by mac80211, add notes about supporting hw crypto
+ with it.
+ </para>
+!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces
+!Finclude/net/mac80211.h ieee80211_iterate_active_interfaces_atomic
+ </chapter>
+
+ <chapter id="station-handling">
+ <title>Station handling</title>
+ <para>TODO</para>
+!Finclude/net/mac80211.h ieee80211_sta
+!Finclude/net/mac80211.h sta_notify_cmd
+!Finclude/net/mac80211.h ieee80211_find_sta
+!Finclude/net/mac80211.h ieee80211_find_sta_by_ifaddr
+ </chapter>
+
+ <chapter id="hardware-scan-offload">
+ <title>Hardware scan offload</title>
+ <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_scan_completed
+ </chapter>
+
+ <chapter id="aggregation">
+ <title>Aggregation</title>
+ <sect1>
+ <title>TX A-MPDU aggregation</title>
+!Pnet/mac80211/agg-tx.c TX A-MPDU aggregation
+!Cnet/mac80211/agg-tx.c
+ </sect1>
+ <sect1>
+ <title>RX A-MPDU aggregation</title>
+!Pnet/mac80211/agg-rx.c RX A-MPDU aggregation
+!Cnet/mac80211/agg-rx.c
+!Finclude/net/mac80211.h ieee80211_ampdu_mlme_action
+ </sect1>
+ </chapter>
+
+ <chapter id="smps">
+ <title>Spatial Multiplexing Powersave (SMPS)</title>
+!Pinclude/net/mac80211.h Spatial multiplexing power save
+!Finclude/net/mac80211.h ieee80211_request_smps
+!Finclude/net/mac80211.h ieee80211_smps_mode
+ </chapter>
+ </part>
+
+ <part id="rate-control">
+ <title>Rate control interface</title>
+ <partintro>
+ <para>TBD</para>
+ <para>
+ This part of the book describes the rate control algorithm
+ interface and how it relates to mac80211 and drivers.
+ </para>
+ </partintro>
+ <chapter id="ratecontrol-api">
+ <title>Rate Control API</title>
+ <para>TBD</para>
+!Finclude/net/mac80211.h ieee80211_start_tx_ba_session
+!Finclude/net/mac80211.h ieee80211_start_tx_ba_cb_irqsafe
+!Finclude/net/mac80211.h ieee80211_stop_tx_ba_session
+!Finclude/net/mac80211.h ieee80211_stop_tx_ba_cb_irqsafe
+!Finclude/net/mac80211.h ieee80211_rate_control_changed
+!Finclude/net/mac80211.h ieee80211_tx_rate_control
+!Finclude/net/mac80211.h rate_control_send_low
+ </chapter>
+ </part>
+
+ <part id="internal">
+ <title>Internals</title>
+ <partintro>
+ <para>TBD</para>
+ <para>
+ This part of the book describes mac80211 internals.
+ </para>
+ </partintro>
+
+ <chapter id="key-handling">
+ <title>Key handling</title>
+ <sect1>
+ <title>Key handling basics</title>
+!Pnet/mac80211/key.c Key handling basics
+ </sect1>
+ <sect1>
+ <title>MORE TBD</title>
+ <para>TBD</para>
+ </sect1>
+ </chapter>
+
+ <chapter id="rx-processing">
+ <title>Receive processing</title>
+ <para>TBD</para>
+ </chapter>
+
+ <chapter id="tx-processing">
+ <title>Transmit processing</title>
+ <para>TBD</para>
+ </chapter>
+
+ <chapter id="sta-info">
+ <title>Station info handling</title>
+ <sect1>
+ <title>Programming information</title>
+!Fnet/mac80211/sta_info.h sta_info
+!Fnet/mac80211/sta_info.h ieee80211_sta_info_flags
+ </sect1>
+ <sect1>
+ <title>STA information lifetime rules</title>
+!Pnet/mac80211/sta_info.c STA information lifetime rules
+ </sect1>
+ </chapter>
+
+ <chapter id="aggregation-internals">
+ <title>Aggregation</title>
+!Fnet/mac80211/sta_info.h sta_ampdu_mlme
+!Fnet/mac80211/sta_info.h tid_ampdu_tx
+!Fnet/mac80211/sta_info.h tid_ampdu_rx
+ </chapter>
+
+ <chapter id="synchronisation">
+ <title>Synchronisation</title>
+ <para>TBD</para>
+ <para>Locking, lots of RCU</para>
+ </chapter>
+ </part>
+ </book>
+</set>
diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile
new file mode 100644
index 000000000..b6a6a2e0d
--- /dev/null
+++ b/Documentation/DocBook/Makefile
@@ -0,0 +1,232 @@
+###
+# This makefile is used to generate the kernel documentation,
+# primarily based on in-line comments in various source files.
+# See Documentation/kernel-doc-nano-HOWTO.txt for instruction in how
+# to document the SRC - and how to read it.
+# To add a new book the only step required is to add the book to the
+# list of DOCBOOKS.
+
+DOCBOOKS := z8530book.xml device-drivers.xml \
+ kernel-hacking.xml kernel-locking.xml deviceiobook.xml \
+ writing_usb_driver.xml networking.xml \
+ kernel-api.xml filesystems.xml lsm.xml usb.xml kgdb.xml \
+ gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
+ genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
+ 80211.xml debugobjects.xml sh.xml regulator.xml \
+ alsa-driver-api.xml writing-an-alsa-driver.xml \
+ tracepoint.xml drm.xml media_api.xml w1.xml \
+ writing_musb_glue_layer.xml crypto-API.xml
+
+include Documentation/DocBook/media/Makefile
+
+###
+# The build process is as follows (targets):
+# (xmldocs) [by docproc]
+# file.tmpl --> file.xml +--> file.ps (psdocs) [by db2ps or xmlto]
+# +--> file.pdf (pdfdocs) [by db2pdf or xmlto]
+# +--> DIR=file (htmldocs) [by xmlto]
+# +--> man/ (mandocs) [by xmlto]
+
+
+# for PDF and PS output you can choose between xmlto and docbook-utils tools
+PDF_METHOD = $(prefer-db2x)
+PS_METHOD = $(prefer-db2x)
+
+
+###
+# The targets that may be used.
+PHONY += xmldocs sgmldocs psdocs pdfdocs htmldocs mandocs installmandocs cleandocs
+
+targets += $(DOCBOOKS)
+BOOKS := $(addprefix $(obj)/,$(DOCBOOKS))
+xmldocs: $(BOOKS)
+sgmldocs: xmldocs
+
+PS := $(patsubst %.xml, %.ps, $(BOOKS))
+psdocs: $(PS)
+
+PDF := $(patsubst %.xml, %.pdf, $(BOOKS))
+pdfdocs: $(PDF)
+
+HTML := $(sort $(patsubst %.xml, %.html, $(BOOKS)))
+htmldocs: $(HTML)
+ $(call build_main_index)
+ $(call build_images)
+ $(call install_media_images)
+
+MAN := $(patsubst %.xml, %.9, $(BOOKS))
+mandocs: $(MAN)
+ find $(obj)/man -name '*.9' | xargs gzip -f
+
+installmandocs: mandocs
+ mkdir -p /usr/local/man/man9/
+ install $(obj)/man/*.9.gz /usr/local/man/man9/
+
+###
+#External programs used
+KERNELDOC = $(srctree)/scripts/kernel-doc
+DOCPROC = $(objtree)/scripts/docproc
+
+XMLTOFLAGS = -m $(srctree)/$(src)/stylesheet.xsl
+XMLTOFLAGS += --skip-validation
+
+###
+# DOCPROC is used for two purposes:
+# 1) To generate a dependency list for a .tmpl file
+# 2) To preprocess a .tmpl file and call kernel-doc with
+# appropriate parameters.
+# The following rules are used to generate the .xml documentation
+# required to generate the final targets. (ps, pdf, html).
+quiet_cmd_docproc = DOCPROC $@
+ cmd_docproc = SRCTREE=$(srctree)/ $(DOCPROC) doc $< >$@
+define rule_docproc
+ set -e; \
+ $(if $($(quiet)cmd_$(1)),echo ' $($(quiet)cmd_$(1))';) \
+ $(cmd_$(1)); \
+ ( \
+ echo 'cmd_$@ := $(cmd_$(1))'; \
+ echo $@: `SRCTREE=$(srctree) $(DOCPROC) depend $<`; \
+ ) > $(dir $@).$(notdir $@).cmd
+endef
+
+%.xml: %.tmpl $(KERNELDOC) $(DOCPROC) FORCE
+ $(call if_changed_rule,docproc)
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+notfoundtemplate = echo "*** You have to install docbook-utils or xmlto ***"; \
+ exit 1
+db2xtemplate = db2TYPE -o $(dir $@) $<
+xmltotemplate = xmlto TYPE $(XMLTOFLAGS) -o $(dir $@) $<
+
+# determine which methods are available
+ifeq ($(shell which db2ps >/dev/null 2>&1 && echo found),found)
+ use-db2x = db2x
+ prefer-db2x = db2x
+else
+ use-db2x = notfound
+ prefer-db2x = $(use-xmlto)
+endif
+ifeq ($(shell which xmlto >/dev/null 2>&1 && echo found),found)
+ use-xmlto = xmlto
+ prefer-xmlto = xmlto
+else
+ use-xmlto = notfound
+ prefer-xmlto = $(use-db2x)
+endif
+
+# the commands, generated from the chosen template
+quiet_cmd_db2ps = PS $@
+ cmd_db2ps = $(subst TYPE,ps, $($(PS_METHOD)template))
+%.ps : %.xml
+ $(call cmd,db2ps)
+
+quiet_cmd_db2pdf = PDF $@
+ cmd_db2pdf = $(subst TYPE,pdf, $($(PDF_METHOD)template))
+%.pdf : %.xml
+ $(call cmd,db2pdf)
+
+
+index = index.html
+main_idx = $(obj)/$(index)
+build_main_index = rm -rf $(main_idx); \
+ echo '<h1>Linux Kernel HTML Documentation</h1>' >> $(main_idx) && \
+ echo '<h2>Kernel Version: $(KERNELVERSION)</h2>' >> $(main_idx) && \
+ cat $(HTML) >> $(main_idx)
+
+quiet_cmd_db2html = HTML $@
+ cmd_db2html = xmlto html $(XMLTOFLAGS) -o $(patsubst %.html,%,$@) $< && \
+ echo '<a HREF="$(patsubst %.html,%,$(notdir $@))/index.html"> \
+ $(patsubst %.html,%,$(notdir $@))</a><p>' > $@
+
+%.html: %.xml
+ @(which xmlto > /dev/null 2>&1) || \
+ (echo "*** You need to install xmlto ***"; \
+ exit 1)
+ @rm -rf $@ $(patsubst %.html,%,$@)
+ $(call cmd,db2html)
+ @if [ ! -z "$(PNG-$(basename $(notdir $@)))" ]; then \
+ cp $(PNG-$(basename $(notdir $@))) $(patsubst %.html,%,$@); fi
+
+quiet_cmd_db2man = MAN $@
+ cmd_db2man = if grep -q refentry $<; then xmlto man $(XMLTOFLAGS) -o $(obj)/man $< ; fi
+%.9 : %.xml
+ @(which xmlto > /dev/null 2>&1) || \
+ (echo "*** You need to install xmlto ***"; \
+ exit 1)
+ $(Q)mkdir -p $(obj)/man
+ $(call cmd,db2man)
+ @touch $@
+
+###
+# Rules to generate postscripts and PNG images from .fig format files
+quiet_cmd_fig2eps = FIG2EPS $@
+ cmd_fig2eps = fig2dev -Leps $< $@
+
+%.eps: %.fig
+ @(which fig2dev > /dev/null 2>&1) || \
+ (echo "*** You need to install transfig ***"; \
+ exit 1)
+ $(call cmd,fig2eps)
+
+quiet_cmd_fig2png = FIG2PNG $@
+ cmd_fig2png = fig2dev -Lpng $< $@
+
+%.png: %.fig
+ @(which fig2dev > /dev/null 2>&1) || \
+ (echo "*** You need to install transfig ***"; \
+ exit 1)
+ $(call cmd,fig2png)
+
+###
+# Rule to convert a .c file to inline XML documentation
+ gen_xml = :
+ quiet_gen_xml = echo ' GEN $@'
+silent_gen_xml = :
+%.xml: %.c
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>"; \
+ expand --tabs=8 < $< | \
+ sed -e "s/&/\\&amp;/g" \
+ -e "s/</\\&lt;/g" \
+ -e "s/>/\\&gt;/g"; \
+ echo "</programlisting>") > $@
+
+###
+# Help targets as used by the top-level makefile
+dochelp:
+ @echo ' Linux kernel internal documentation in different formats:'
+ @echo ' htmldocs - HTML'
+ @echo ' pdfdocs - PDF'
+ @echo ' psdocs - Postscript'
+ @echo ' xmldocs - XML DocBook'
+ @echo ' mandocs - man pages'
+ @echo ' installmandocs - install man pages generated by mandocs'
+ @echo ' cleandocs - clean all generated DocBook files'
+
+###
+# Temporary files left by various tools
+clean-files := $(DOCBOOKS) \
+ $(patsubst %.xml, %.dvi, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.aux, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.tex, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.log, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.out, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.ps, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.pdf, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.html, $(DOCBOOKS)) \
+ $(patsubst %.xml, %.9, $(DOCBOOKS)) \
+ $(index)
+
+clean-dirs := $(patsubst %.xml,%,$(DOCBOOKS)) man
+
+cleandocs: cleanmediadocs
+ $(Q)rm -f $(call objectify, $(clean-files))
+ $(Q)rm -rf $(call objectify, $(clean-dirs))
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable se we can use it in if_changed and friends.
+
+.PHONY: $(PHONY)
diff --git a/Documentation/DocBook/alsa-driver-api.tmpl b/Documentation/DocBook/alsa-driver-api.tmpl
new file mode 100644
index 000000000..71f924612
--- /dev/null
+++ b/Documentation/DocBook/alsa-driver-api.tmpl
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<!-- ****************************************************** -->
+<!-- Header -->
+<!-- ****************************************************** -->
+<book id="ALSA-Driver-API">
+ <bookinfo>
+ <title>The ALSA Driver API</title>
+
+ <legalnotice>
+ <para>
+ This document is free; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ </para>
+
+ <para>
+ This document is distributed in the hope that it will be useful,
+ but <emphasis>WITHOUT ANY WARRANTY</emphasis>; without even the
+ implied warranty of <emphasis>MERCHANTABILITY or FITNESS FOR A
+ PARTICULAR PURPOSE</emphasis>. See the GNU General Public License
+ for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+ </legalnotice>
+
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter><title>Management of Cards and Devices</title>
+ <sect1><title>Card Management</title>
+!Esound/core/init.c
+ </sect1>
+ <sect1><title>Device Components</title>
+!Esound/core/device.c
+ </sect1>
+ <sect1><title>Module requests and Device File Entries</title>
+!Esound/core/sound.c
+ </sect1>
+ <sect1><title>Memory Management Helpers</title>
+!Esound/core/memory.c
+!Esound/core/memalloc.c
+ </sect1>
+ </chapter>
+ <chapter><title>PCM API</title>
+ <sect1><title>PCM Core</title>
+!Esound/core/pcm.c
+!Esound/core/pcm_lib.c
+!Esound/core/pcm_native.c
+!Iinclude/sound/pcm.h
+ </sect1>
+ <sect1><title>PCM Format Helpers</title>
+!Esound/core/pcm_misc.c
+ </sect1>
+ <sect1><title>PCM Memory Management</title>
+!Esound/core/pcm_memory.c
+ </sect1>
+ <sect1><title>PCM DMA Engine API</title>
+!Esound/core/pcm_dmaengine.c
+!Iinclude/sound/dmaengine_pcm.h
+ </sect1>
+ </chapter>
+ <chapter><title>Control/Mixer API</title>
+ <sect1><title>General Control Interface</title>
+!Esound/core/control.c
+ </sect1>
+ <sect1><title>AC97 Codec API</title>
+!Esound/pci/ac97/ac97_codec.c
+!Esound/pci/ac97/ac97_pcm.c
+ </sect1>
+ <sect1><title>Virtual Master Control API</title>
+!Esound/core/vmaster.c
+!Iinclude/sound/control.h
+ </sect1>
+ </chapter>
+ <chapter><title>MIDI API</title>
+ <sect1><title>Raw MIDI API</title>
+!Esound/core/rawmidi.c
+ </sect1>
+ <sect1><title>MPU401-UART API</title>
+!Esound/drivers/mpu401/mpu401_uart.c
+ </sect1>
+ </chapter>
+ <chapter><title>Proc Info API</title>
+ <sect1><title>Proc Info Interface</title>
+!Esound/core/info.c
+ </sect1>
+ </chapter>
+ <chapter><title>Compress Offload</title>
+ <sect1><title>Compress Offload API</title>
+!Esound/core/compress_offload.c
+!Iinclude/uapi/sound/compress_offload.h
+!Iinclude/uapi/sound/compress_params.h
+!Iinclude/sound/compress_driver.h
+ </sect1>
+ </chapter>
+ <chapter><title>ASoC</title>
+ <sect1><title>ASoC Core API</title>
+!Iinclude/sound/soc.h
+!Esound/soc/soc-core.c
+!Esound/soc/soc-cache.c
+!Esound/soc/soc-devres.c
+!Esound/soc/soc-io.c
+!Esound/soc/soc-pcm.c
+ </sect1>
+ <sect1><title>ASoC DAPM API</title>
+!Esound/soc/soc-dapm.c
+ </sect1>
+ <sect1><title>ASoC DMA Engine API</title>
+!Esound/soc/soc-generic-dmaengine-pcm.c
+ </sect1>
+ </chapter>
+ <chapter><title>Miscellaneous Functions</title>
+ <sect1><title>Hardware-Dependent Devices API</title>
+!Esound/core/hwdep.c
+ </sect1>
+ <sect1><title>Jack Abstraction Layer API</title>
+!Iinclude/sound/jack.h
+!Esound/core/jack.c
+!Esound/soc/soc-jack.c
+ </sect1>
+ <sect1><title>ISA DMA Helpers</title>
+!Esound/core/isadma.c
+ </sect1>
+ <sect1><title>Other Helper Macros</title>
+!Iinclude/sound/core.h
+ </sect1>
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/crypto-API.tmpl b/Documentation/DocBook/crypto-API.tmpl
new file mode 100644
index 000000000..efc8d90a9
--- /dev/null
+++ b/Documentation/DocBook/crypto-API.tmpl
@@ -0,0 +1,2113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="KernelCryptoAPI">
+ <bookinfo>
+ <title>Linux Kernel Crypto API</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Stephan</firstname>
+ <surname>Mueller</surname>
+ <affiliation>
+ <address>
+ <email>smueller@chronox.de</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Marek</firstname>
+ <surname>Vasut</surname>
+ <affiliation>
+ <address>
+ <email>marek@denx.de</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2014</year>
+ <holder>Stephan Mueller</holder>
+ </copyright>
+
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+
+ <chapter id="Intro">
+ <title>Kernel Crypto API Interface Specification</title>
+
+ <sect1><title>Introduction</title>
+
+ <para>
+ The kernel crypto API offers a rich set of cryptographic ciphers as
+ well as other data transformation mechanisms and methods to invoke
+ these. This document contains a description of the API and provides
+ example code.
+ </para>
+
+ <para>
+ To understand and properly use the kernel crypto API a brief
+ explanation of its structure is given. Based on the architecture,
+ the API can be separated into different components. Following the
+ architecture specification, hints to developers of ciphers are
+ provided. Pointers to the API function call documentation are
+ given at the end.
+ </para>
+
+ <para>
+ The kernel crypto API refers to all algorithms as "transformations".
+ Therefore, a cipher handle variable usually has the name "tfm".
+ Besides cryptographic operations, the kernel crypto API also knows
+ compression transformations and handles them the same way as ciphers.
+ </para>
+
+ <para>
+ The kernel crypto API serves the following entity types:
+
+ <itemizedlist>
+ <listitem>
+ <para>consumers requesting cryptographic services</para>
+ </listitem>
+ <listitem>
+ <para>data transformation implementations (typically ciphers)
+ that can be called by consumers using the kernel crypto
+ API</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+
+ <para>
+ This specification is intended for consumers of the kernel crypto
+ API as well as for developers implementing ciphers. This API
+ specification, however, does not discuss all API calls available
+ to data transformation implementations (i.e. implementations of
+ ciphers and other transformations (such as CRC or even compression
+ algorithms) that can register with the kernel crypto API).
+ </para>
+
+ <para>
+ Note: The terms "transformation" and cipher algorithm are used
+ interchangably.
+ </para>
+ </sect1>
+
+ <sect1><title>Terminology</title>
+ <para>
+ The transformation implementation is an actual code or interface
+ to hardware which implements a certain transformation with precisely
+ defined behavior.
+ </para>
+
+ <para>
+ The transformation object (TFM) is an instance of a transformation
+ implementation. There can be multiple transformation objects
+ associated with a single transformation implementation. Each of
+ those transformation objects is held by a crypto API consumer or
+ another transformation. Transformation object is allocated when a
+ crypto API consumer requests a transformation implementation.
+ The consumer is then provided with a structure, which contains
+ a transformation object (TFM).
+ </para>
+
+ <para>
+ The structure that contains transformation objects may also be
+ referred to as a "cipher handle". Such a cipher handle is always
+ subject to the following phases that are reflected in the API calls
+ applicable to such a cipher handle:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>Initialization of a cipher handle.</para>
+ </listitem>
+ <listitem>
+ <para>Execution of all intended cipher operations applicable
+ for the handle where the cipher handle must be furnished to
+ every API call.</para>
+ </listitem>
+ <listitem>
+ <para>Destruction of a cipher handle.</para>
+ </listitem>
+ </orderedlist>
+
+ <para>
+ When using the initialization API calls, a cipher handle is
+ created and returned to the consumer. Therefore, please refer
+ to all initialization API calls that refer to the data
+ structure type a consumer is expected to receive and subsequently
+ to use. The initialization API calls have all the same naming
+ conventions of crypto_alloc_*.
+ </para>
+
+ <para>
+ The transformation context is private data associated with
+ the transformation object.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="Architecture"><title>Kernel Crypto API Architecture</title>
+ <sect1><title>Cipher algorithm types</title>
+ <para>
+ The kernel crypto API provides different API calls for the
+ following cipher types:
+
+ <itemizedlist>
+ <listitem><para>Symmetric ciphers</para></listitem>
+ <listitem><para>AEAD ciphers</para></listitem>
+ <listitem><para>Message digest, including keyed message digest</para></listitem>
+ <listitem><para>Random number generation</para></listitem>
+ <listitem><para>User space interface</para></listitem>
+ </itemizedlist>
+ </para>
+ </sect1>
+
+ <sect1><title>Ciphers And Templates</title>
+ <para>
+ The kernel crypto API provides implementations of single block
+ ciphers and message digests. In addition, the kernel crypto API
+ provides numerous "templates" that can be used in conjunction
+ with the single block ciphers and message digests. Templates
+ include all types of block chaining mode, the HMAC mechanism, etc.
+ </para>
+
+ <para>
+ Single block ciphers and message digests can either be directly
+ used by a caller or invoked together with a template to form
+ multi-block ciphers or keyed message digests.
+ </para>
+
+ <para>
+ A single block cipher may even be called with multiple templates.
+ However, templates cannot be used without a single cipher.
+ </para>
+
+ <para>
+ See /proc/crypto and search for "name". For example:
+
+ <itemizedlist>
+ <listitem><para>aes</para></listitem>
+ <listitem><para>ecb(aes)</para></listitem>
+ <listitem><para>cmac(aes)</para></listitem>
+ <listitem><para>ccm(aes)</para></listitem>
+ <listitem><para>rfc4106(gcm(aes))</para></listitem>
+ <listitem><para>sha1</para></listitem>
+ <listitem><para>hmac(sha1)</para></listitem>
+ <listitem><para>authenc(hmac(sha1),cbc(aes))</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>
+ In these examples, "aes" and "sha1" are the ciphers and all
+ others are the templates.
+ </para>
+ </sect1>
+
+ <sect1><title>Synchronous And Asynchronous Operation</title>
+ <para>
+ The kernel crypto API provides synchronous and asynchronous
+ API operations.
+ </para>
+
+ <para>
+ When using the synchronous API operation, the caller invokes
+ a cipher operation which is performed synchronously by the
+ kernel crypto API. That means, the caller waits until the
+ cipher operation completes. Therefore, the kernel crypto API
+ calls work like regular function calls. For synchronous
+ operation, the set of API calls is small and conceptually
+ similar to any other crypto library.
+ </para>
+
+ <para>
+ Asynchronous operation is provided by the kernel crypto API
+ which implies that the invocation of a cipher operation will
+ complete almost instantly. That invocation triggers the
+ cipher operation but it does not signal its completion. Before
+ invoking a cipher operation, the caller must provide a callback
+ function the kernel crypto API can invoke to signal the
+ completion of the cipher operation. Furthermore, the caller
+ must ensure it can handle such asynchronous events by applying
+ appropriate locking around its data. The kernel crypto API
+ does not perform any special serialization operation to protect
+ the caller's data integrity.
+ </para>
+ </sect1>
+
+ <sect1><title>Crypto API Cipher References And Priority</title>
+ <para>
+ A cipher is referenced by the caller with a string. That string
+ has the following semantics:
+
+ <programlisting>
+ template(single block cipher)
+ </programlisting>
+
+ where "template" and "single block cipher" is the aforementioned
+ template and single block cipher, respectively. If applicable,
+ additional templates may enclose other templates, such as
+
+ <programlisting>
+ template1(template2(single block cipher)))
+ </programlisting>
+ </para>
+
+ <para>
+ The kernel crypto API may provide multiple implementations of a
+ template or a single block cipher. For example, AES on newer
+ Intel hardware has the following implementations: AES-NI,
+ assembler implementation, or straight C. Now, when using the
+ string "aes" with the kernel crypto API, which cipher
+ implementation is used? The answer to that question is the
+ priority number assigned to each cipher implementation by the
+ kernel crypto API. When a caller uses the string to refer to a
+ cipher during initialization of a cipher handle, the kernel
+ crypto API looks up all implementations providing an
+ implementation with that name and selects the implementation
+ with the highest priority.
+ </para>
+
+ <para>
+ Now, a caller may have the need to refer to a specific cipher
+ implementation and thus does not want to rely on the
+ priority-based selection. To accommodate this scenario, the
+ kernel crypto API allows the cipher implementation to register
+ a unique name in addition to common names. When using that
+ unique name, a caller is therefore always sure to refer to
+ the intended cipher implementation.
+ </para>
+
+ <para>
+ The list of available ciphers is given in /proc/crypto. However,
+ that list does not specify all possible permutations of
+ templates and ciphers. Each block listed in /proc/crypto may
+ contain the following information -- if one of the components
+ listed as follows are not applicable to a cipher, it is not
+ displayed:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>name: the generic name of the cipher that is subject
+ to the priority-based selection -- this name can be used by
+ the cipher allocation API calls (all names listed above are
+ examples for such generic names)</para>
+ </listitem>
+ <listitem>
+ <para>driver: the unique name of the cipher -- this name can
+ be used by the cipher allocation API calls</para>
+ </listitem>
+ <listitem>
+ <para>module: the kernel module providing the cipher
+ implementation (or "kernel" for statically linked ciphers)</para>
+ </listitem>
+ <listitem>
+ <para>priority: the priority value of the cipher implementation</para>
+ </listitem>
+ <listitem>
+ <para>refcnt: the reference count of the respective cipher
+ (i.e. the number of current consumers of this cipher)</para>
+ </listitem>
+ <listitem>
+ <para>selftest: specification whether the self test for the
+ cipher passed</para>
+ </listitem>
+ <listitem>
+ <para>type:
+ <itemizedlist>
+ <listitem>
+ <para>blkcipher for synchronous block ciphers</para>
+ </listitem>
+ <listitem>
+ <para>ablkcipher for asynchronous block ciphers</para>
+ </listitem>
+ <listitem>
+ <para>cipher for single block ciphers that may be used with
+ an additional template</para>
+ </listitem>
+ <listitem>
+ <para>shash for synchronous message digest</para>
+ </listitem>
+ <listitem>
+ <para>ahash for asynchronous message digest</para>
+ </listitem>
+ <listitem>
+ <para>aead for AEAD cipher type</para>
+ </listitem>
+ <listitem>
+ <para>compression for compression type transformations</para>
+ </listitem>
+ <listitem>
+ <para>rng for random number generator</para>
+ </listitem>
+ <listitem>
+ <para>givcipher for cipher with associated IV generator
+ (see the geniv entry below for the specification of the
+ IV generator type used by the cipher implementation)</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem>
+ <para>blocksize: blocksize of cipher in bytes</para>
+ </listitem>
+ <listitem>
+ <para>keysize: key size in bytes</para>
+ </listitem>
+ <listitem>
+ <para>ivsize: IV size in bytes</para>
+ </listitem>
+ <listitem>
+ <para>seedsize: required size of seed data for random number
+ generator</para>
+ </listitem>
+ <listitem>
+ <para>digestsize: output size of the message digest</para>
+ </listitem>
+ <listitem>
+ <para>geniv: IV generation type:
+ <itemizedlist>
+ <listitem>
+ <para>eseqiv for encrypted sequence number based IV
+ generation</para>
+ </listitem>
+ <listitem>
+ <para>seqiv for sequence number based IV generation</para>
+ </listitem>
+ <listitem>
+ <para>chainiv for chain iv generation</para>
+ </listitem>
+ <listitem>
+ <para>&lt;builtin&gt; is a marker that the cipher implements
+ IV generation and handling as it is specific to the given
+ cipher</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+
+ <sect1><title>Key Sizes</title>
+ <para>
+ When allocating a cipher handle, the caller only specifies the
+ cipher type. Symmetric ciphers, however, typically support
+ multiple key sizes (e.g. AES-128 vs. AES-192 vs. AES-256).
+ These key sizes are determined with the length of the provided
+ key. Thus, the kernel crypto API does not provide a separate
+ way to select the particular symmetric cipher key size.
+ </para>
+ </sect1>
+
+ <sect1><title>Cipher Allocation Type And Masks</title>
+ <para>
+ The different cipher handle allocation functions allow the
+ specification of a type and mask flag. Both parameters have
+ the following meaning (and are therefore not covered in the
+ subsequent sections).
+ </para>
+
+ <para>
+ The type flag specifies the type of the cipher algorithm.
+ The caller usually provides a 0 when the caller wants the
+ default handling. Otherwise, the caller may provide the
+ following selections which match the the aforementioned
+ cipher types:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_CIPHER Single block cipher</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_COMPRESS Compression</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_AEAD Authenticated Encryption with
+ Associated Data (MAC)</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_BLKCIPHER Synchronous multi-block cipher</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_ABLKCIPHER Asynchronous multi-block cipher</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_GIVCIPHER Asynchronous multi-block
+ cipher packed together with an IV generator (see geniv field
+ in the /proc/crypto listing for the known IV generators)</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_DIGEST Raw message digest</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_HASH Alias for CRYPTO_ALG_TYPE_DIGEST</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_SHASH Synchronous multi-block hash</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_AHASH Asynchronous multi-block hash</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_RNG Random Number Generation</para>
+ </listitem>
+ <listitem>
+ <para>CRYPTO_ALG_TYPE_PCOMPRESS Enhanced version of
+ CRYPTO_ALG_TYPE_COMPRESS allowing for segmented compression /
+ decompression instead of performing the operation on one
+ segment only. CRYPTO_ALG_TYPE_PCOMPRESS is intended to replace
+ CRYPTO_ALG_TYPE_COMPRESS once existing consumers are converted.</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The mask flag restricts the type of cipher. The only allowed
+ flag is CRYPTO_ALG_ASYNC to restrict the cipher lookup function
+ to asynchronous ciphers. Usually, a caller provides a 0 for the
+ mask flag.
+ </para>
+
+ <para>
+ When the caller provides a mask and type specification, the
+ caller limits the search the kernel crypto API can perform for
+ a suitable cipher implementation for the given cipher name.
+ That means, even when a caller uses a cipher name that exists
+ during its initialization call, the kernel crypto API may not
+ select it due to the used type and mask field.
+ </para>
+ </sect1>
+
+ <sect1><title>Internal Structure of Kernel Crypto API</title>
+
+ <para>
+ The kernel crypto API has an internal structure where a cipher
+ implementation may use many layers and indirections. This section
+ shall help to clarify how the kernel crypto API uses
+ various components to implement the complete cipher.
+ </para>
+
+ <para>
+ The following subsections explain the internal structure based
+ on existing cipher implementations. The first section addresses
+ the most complex scenario where all other scenarios form a logical
+ subset.
+ </para>
+
+ <sect2><title>Generic AEAD Cipher Structure</title>
+
+ <para>
+ The following ASCII art decomposes the kernel crypto API layers
+ when using the AEAD cipher with the automated IV generation. The
+ shown example is used by the IPSEC layer.
+ </para>
+
+ <para>
+ For other use cases of AEAD ciphers, the ASCII art applies as
+ well, but the caller may not use the GIVCIPHER interface. In
+ this case, the caller must generate the IV.
+ </para>
+
+ <para>
+ The depicted example decomposes the AEAD cipher of GCM(AES) based
+ on the generic C implementations (gcm.c, aes-generic.c, ctr.c,
+ ghash-generic.c, seqiv.c). The generic implementation serves as an
+ example showing the complete logic of the kernel crypto API.
+ </para>
+
+ <para>
+ It is possible that some streamlined cipher implementations (like
+ AES-NI) provide implementations merging aspects which in the view
+ of the kernel crypto API cannot be decomposed into layers any more.
+ In case of the AES-NI implementation, the CTR mode, the GHASH
+ implementation and the AES cipher are all merged into one cipher
+ implementation registered with the kernel crypto API. In this case,
+ the concept described by the following ASCII art applies too. However,
+ the decomposition of GCM into the individual sub-components
+ by the kernel crypto API is not done any more.
+ </para>
+
+ <para>
+ Each block in the following ASCII art is an independent cipher
+ instance obtained from the kernel crypto API. Each block
+ is accessed by the caller or by other blocks using the API functions
+ defined by the kernel crypto API for the cipher implementation type.
+ </para>
+
+ <para>
+ The blocks below indicate the cipher type as well as the specific
+ logic implemented in the cipher.
+ </para>
+
+ <para>
+ The ASCII art picture also indicates the call structure, i.e. who
+ calls which component. The arrows point to the invoked block
+ where the caller uses the API applicable to the cipher type
+ specified for the block.
+ </para>
+
+ <programlisting>
+<![CDATA[
+kernel crypto API | IPSEC Layer
+ |
++-----------+ |
+| | (1)
+| givcipher | <----------------------------------- esp_output
+| (seqiv) | ---+
++-----------+ |
+ | (2)
++-----------+ |
+| | <--+ (2)
+| aead | <----------------------------------- esp_input
+| (gcm) | ------------+
++-----------+ |
+ | (3) | (5)
+ v v
++-----------+ +-----------+
+| | | |
+| ablkcipher| | ahash |
+| (ctr) | ---+ | (ghash) |
++-----------+ | +-----------+
+ |
++-----------+ | (4)
+| | <--+
+| cipher |
+| (aes) |
++-----------+
+]]>
+ </programlisting>
+
+ <para>
+ The following call sequence is applicable when the IPSEC layer
+ triggers an encryption operation with the esp_output function. During
+ configuration, the administrator set up the use of rfc4106(gcm(aes)) as
+ the cipher for ESP. The following call sequence is now depicted in the
+ ASCII art above:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>
+ esp_output() invokes crypto_aead_givencrypt() to trigger an encryption
+ operation of the GIVCIPHER implementation.
+ </para>
+
+ <para>
+ In case of GCM, the SEQIV implementation is registered as GIVCIPHER
+ in crypto_rfc4106_alloc().
+ </para>
+
+ <para>
+ The SEQIV performs its operation to generate an IV where the core
+ function is seqiv_geniv().
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Now, SEQIV uses the AEAD API function calls to invoke the associated
+ AEAD cipher. In our case, during the instantiation of SEQIV, the
+ cipher handle for GCM is provided to SEQIV. This means that SEQIV
+ invokes AEAD cipher operations with the GCM cipher handle.
+ </para>
+
+ <para>
+ During instantiation of the GCM handle, the CTR(AES) and GHASH
+ ciphers are instantiated. The cipher handles for CTR(AES) and GHASH
+ are retained for later use.
+ </para>
+
+ <para>
+ The GCM implementation is responsible to invoke the CTR mode AES and
+ the GHASH cipher in the right manner to implement the GCM
+ specification.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The GCM AEAD cipher type implementation now invokes the ABLKCIPHER API
+ with the instantiated CTR(AES) cipher handle.
+ </para>
+
+ <para>
+ During instantiation of the CTR(AES) cipher, the CIPHER type
+ implementation of AES is instantiated. The cipher handle for AES is
+ retained.
+ </para>
+
+ <para>
+ That means that the ABLKCIPHER implementation of CTR(AES) only
+ implements the CTR block chaining mode. After performing the block
+ chaining operation, the CIPHER implementation of AES is invoked.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The ABLKCIPHER of CTR(AES) now invokes the CIPHER API with the AES
+ cipher handle to encrypt one block.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The GCM AEAD implementation also invokes the GHASH cipher
+ implementation via the AHASH API.
+ </para>
+ </listitem>
+ </orderedlist>
+
+ <para>
+ When the IPSEC layer triggers the esp_input() function, the same call
+ sequence is followed with the only difference that the operation starts
+ with step (2).
+ </para>
+ </sect2>
+
+ <sect2><title>Generic Block Cipher Structure</title>
+ <para>
+ Generic block ciphers follow the same concept as depicted with the ASCII
+ art picture above.
+ </para>
+
+ <para>
+ For example, CBC(AES) is implemented with cbc.c, and aes-generic.c. The
+ ASCII art picture above applies as well with the difference that only
+ step (4) is used and the ABLKCIPHER block chaining mode is CBC.
+ </para>
+ </sect2>
+
+ <sect2><title>Generic Keyed Message Digest Structure</title>
+ <para>
+ Keyed message digest implementations again follow the same concept as
+ depicted in the ASCII art picture above.
+ </para>
+
+ <para>
+ For example, HMAC(SHA256) is implemented with hmac.c and
+ sha256_generic.c. The following ASCII art illustrates the
+ implementation:
+ </para>
+
+ <programlisting>
+<![CDATA[
+kernel crypto API | Caller
+ |
++-----------+ (1) |
+| | <------------------ some_function
+| ahash |
+| (hmac) | ---+
++-----------+ |
+ | (2)
++-----------+ |
+| | <--+
+| shash |
+| (sha256) |
++-----------+
+]]>
+ </programlisting>
+
+ <para>
+ The following call sequence is applicable when a caller triggers
+ an HMAC operation:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>
+ The AHASH API functions are invoked by the caller. The HMAC
+ implementation performs its operation as needed.
+ </para>
+
+ <para>
+ During initialization of the HMAC cipher, the SHASH cipher type of
+ SHA256 is instantiated. The cipher handle for the SHA256 instance is
+ retained.
+ </para>
+
+ <para>
+ At one time, the HMAC implementation requires a SHA256 operation
+ where the SHA256 cipher handle is used.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ The HMAC instance now invokes the SHASH API with the SHA256
+ cipher handle to calculate the message digest.
+ </para>
+ </listitem>
+ </orderedlist>
+ </sect2>
+ </sect1>
+ </chapter>
+
+ <chapter id="Development"><title>Developing Cipher Algorithms</title>
+ <sect1><title>Registering And Unregistering Transformation</title>
+ <para>
+ There are three distinct types of registration functions in
+ the Crypto API. One is used to register a generic cryptographic
+ transformation, while the other two are specific to HASH
+ transformations and COMPRESSion. We will discuss the latter
+ two in a separate chapter, here we will only look at the
+ generic ones.
+ </para>
+
+ <para>
+ Before discussing the register functions, the data structure
+ to be filled with each, struct crypto_alg, must be considered
+ -- see below for a description of this data structure.
+ </para>
+
+ <para>
+ The generic registration functions can be found in
+ include/linux/crypto.h and their definition can be seen below.
+ The former function registers a single transformation, while
+ the latter works on an array of transformation descriptions.
+ The latter is useful when registering transformations in bulk.
+ </para>
+
+ <programlisting>
+ int crypto_register_alg(struct crypto_alg *alg);
+ int crypto_register_algs(struct crypto_alg *algs, int count);
+ </programlisting>
+
+ <para>
+ The counterparts to those functions are listed below.
+ </para>
+
+ <programlisting>
+ int crypto_unregister_alg(struct crypto_alg *alg);
+ int crypto_unregister_algs(struct crypto_alg *algs, int count);
+ </programlisting>
+
+ <para>
+ Notice that both registration and unregistration functions
+ do return a value, so make sure to handle errors. A return
+ code of zero implies success. Any return code &lt; 0 implies
+ an error.
+ </para>
+
+ <para>
+ The bulk registration / unregistration functions require
+ that struct crypto_alg is an array of count size. These
+ functions simply loop over that array and register /
+ unregister each individual algorithm. If an error occurs,
+ the loop is terminated at the offending algorithm definition.
+ That means, the algorithms prior to the offending algorithm
+ are successfully registered. Note, the caller has no way of
+ knowing which cipher implementations have successfully
+ registered. If this is important to know, the caller should
+ loop through the different implementations using the single
+ instance *_alg functions for each individual implementation.
+ </para>
+ </sect1>
+
+ <sect1><title>Single-Block Symmetric Ciphers [CIPHER]</title>
+ <para>
+ Example of transformations: aes, arc4, ...
+ </para>
+
+ <para>
+ This section describes the simplest of all transformation
+ implementations, that being the CIPHER type used for symmetric
+ ciphers. The CIPHER type is used for transformations which
+ operate on exactly one block at a time and there are no
+ dependencies between blocks at all.
+ </para>
+
+ <sect2><title>Registration specifics</title>
+ <para>
+ The registration of [CIPHER] algorithm is specific in that
+ struct crypto_alg field .cra_type is empty. The .cra_u.cipher
+ has to be filled in with proper callbacks to implement this
+ transformation.
+ </para>
+
+ <para>
+ See struct cipher_alg below.
+ </para>
+ </sect2>
+
+ <sect2><title>Cipher Definition With struct cipher_alg</title>
+ <para>
+ Struct cipher_alg defines a single block cipher.
+ </para>
+
+ <para>
+ Here are schematics of how these functions are called when
+ operated from other part of the kernel. Note that the
+ .cia_setkey() call might happen before or after any of these
+ schematics happen, but must not happen during any of these
+ are in-flight.
+ </para>
+
+ <para>
+ <programlisting>
+ KEY ---. PLAINTEXT ---.
+ v v
+ .cia_setkey() -&gt; .cia_encrypt()
+ |
+ '-----&gt; CIPHERTEXT
+ </programlisting>
+ </para>
+
+ <para>
+ Please note that a pattern where .cia_setkey() is called
+ multiple times is also valid:
+ </para>
+
+ <para>
+ <programlisting>
+
+ KEY1 --. PLAINTEXT1 --. KEY2 --. PLAINTEXT2 --.
+ v v v v
+ .cia_setkey() -&gt; .cia_encrypt() -&gt; .cia_setkey() -&gt; .cia_encrypt()
+ | |
+ '---&gt; CIPHERTEXT1 '---&gt; CIPHERTEXT2
+ </programlisting>
+ </para>
+
+ </sect2>
+ </sect1>
+
+ <sect1><title>Multi-Block Ciphers [BLKCIPHER] [ABLKCIPHER]</title>
+ <para>
+ Example of transformations: cbc(aes), ecb(arc4), ...
+ </para>
+
+ <para>
+ This section describes the multi-block cipher transformation
+ implementations for both synchronous [BLKCIPHER] and
+ asynchronous [ABLKCIPHER] case. The multi-block ciphers are
+ used for transformations which operate on scatterlists of
+ data supplied to the transformation functions. They output
+ the result into a scatterlist of data as well.
+ </para>
+
+ <sect2><title>Registration Specifics</title>
+
+ <para>
+ The registration of [BLKCIPHER] or [ABLKCIPHER] algorithms
+ is one of the most standard procedures throughout the crypto API.
+ </para>
+
+ <para>
+ Note, if a cipher implementation requires a proper alignment
+ of data, the caller should use the functions of
+ crypto_blkcipher_alignmask() or crypto_ablkcipher_alignmask()
+ respectively to identify a memory alignment mask. The kernel
+ crypto API is able to process requests that are unaligned.
+ This implies, however, additional overhead as the kernel
+ crypto API needs to perform the realignment of the data which
+ may imply moving of data.
+ </para>
+ </sect2>
+
+ <sect2><title>Cipher Definition With struct blkcipher_alg and ablkcipher_alg</title>
+ <para>
+ Struct blkcipher_alg defines a synchronous block cipher whereas
+ struct ablkcipher_alg defines an asynchronous block cipher.
+ </para>
+
+ <para>
+ Please refer to the single block cipher description for schematics
+ of the block cipher usage. The usage patterns are exactly the same
+ for [ABLKCIPHER] and [BLKCIPHER] as they are for plain [CIPHER].
+ </para>
+ </sect2>
+
+ <sect2><title>Specifics Of Asynchronous Multi-Block Cipher</title>
+ <para>
+ There are a couple of specifics to the [ABLKCIPHER] interface.
+ </para>
+
+ <para>
+ First of all, some of the drivers will want to use the
+ Generic ScatterWalk in case the hardware needs to be fed
+ separate chunks of the scatterlist which contains the
+ plaintext and will contain the ciphertext. Please refer
+ to the ScatterWalk interface offered by the Linux kernel
+ scatter / gather list implementation.
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1><title>Hashing [HASH]</title>
+
+ <para>
+ Example of transformations: crc32, md5, sha1, sha256,...
+ </para>
+
+ <sect2><title>Registering And Unregistering The Transformation</title>
+
+ <para>
+ There are multiple ways to register a HASH transformation,
+ depending on whether the transformation is synchronous [SHASH]
+ or asynchronous [AHASH] and the amount of HASH transformations
+ we are registering. You can find the prototypes defined in
+ include/crypto/internal/hash.h:
+ </para>
+
+ <programlisting>
+ int crypto_register_ahash(struct ahash_alg *alg);
+
+ int crypto_register_shash(struct shash_alg *alg);
+ int crypto_register_shashes(struct shash_alg *algs, int count);
+ </programlisting>
+
+ <para>
+ The respective counterparts for unregistering the HASH
+ transformation are as follows:
+ </para>
+
+ <programlisting>
+ int crypto_unregister_ahash(struct ahash_alg *alg);
+
+ int crypto_unregister_shash(struct shash_alg *alg);
+ int crypto_unregister_shashes(struct shash_alg *algs, int count);
+ </programlisting>
+ </sect2>
+
+ <sect2><title>Cipher Definition With struct shash_alg and ahash_alg</title>
+ <para>
+ Here are schematics of how these functions are called when
+ operated from other part of the kernel. Note that the .setkey()
+ call might happen before or after any of these schematics happen,
+ but must not happen during any of these are in-flight. Please note
+ that calling .init() followed immediately by .finish() is also a
+ perfectly valid transformation.
+ </para>
+
+ <programlisting>
+ I) DATA -----------.
+ v
+ .init() -&gt; .update() -&gt; .final() ! .update() might not be called
+ ^ | | at all in this scenario.
+ '----' '---&gt; HASH
+
+ II) DATA -----------.-----------.
+ v v
+ .init() -&gt; .update() -&gt; .finup() ! .update() may not be called
+ ^ | | at all in this scenario.
+ '----' '---&gt; HASH
+
+ III) DATA -----------.
+ v
+ .digest() ! The entire process is handled
+ | by the .digest() call.
+ '---------------&gt; HASH
+ </programlisting>
+
+ <para>
+ Here is a schematic of how the .export()/.import() functions are
+ called when used from another part of the kernel.
+ </para>
+
+ <programlisting>
+ KEY--. DATA--.
+ v v ! .update() may not be called
+ .setkey() -&gt; .init() -&gt; .update() -&gt; .export() at all in this scenario.
+ ^ | |
+ '-----' '--&gt; PARTIAL_HASH
+
+ ----------- other transformations happen here -----------
+
+ PARTIAL_HASH--. DATA1--.
+ v v
+ .import -&gt; .update() -&gt; .final() ! .update() may not be called
+ ^ | | at all in this scenario.
+ '----' '--&gt; HASH1
+
+ PARTIAL_HASH--. DATA2-.
+ v v
+ .import -&gt; .finup()
+ |
+ '---------------&gt; HASH2
+ </programlisting>
+ </sect2>
+
+ <sect2><title>Specifics Of Asynchronous HASH Transformation</title>
+ <para>
+ Some of the drivers will want to use the Generic ScatterWalk
+ in case the implementation needs to be fed separate chunks of the
+ scatterlist which contains the input data. The buffer containing
+ the resulting hash will always be properly aligned to
+ .cra_alignmask so there is no need to worry about this.
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+
+ <chapter id="User"><title>User Space Interface</title>
+ <sect1><title>Introduction</title>
+ <para>
+ The concepts of the kernel crypto API visible to kernel space is fully
+ applicable to the user space interface as well. Therefore, the kernel
+ crypto API high level discussion for the in-kernel use cases applies
+ here as well.
+ </para>
+
+ <para>
+ The major difference, however, is that user space can only act as a
+ consumer and never as a provider of a transformation or cipher algorithm.
+ </para>
+
+ <para>
+ The following covers the user space interface exported by the kernel
+ crypto API. A working example of this description is libkcapi that
+ can be obtained from [1]. That library can be used by user space
+ applications that require cryptographic services from the kernel.
+ </para>
+
+ <para>
+ Some details of the in-kernel kernel crypto API aspects do not
+ apply to user space, however. This includes the difference between
+ synchronous and asynchronous invocations. The user space API call
+ is fully synchronous.
+ </para>
+
+ <para>
+ [1] http://www.chronox.de/libkcapi.html
+ </para>
+
+ </sect1>
+
+ <sect1><title>User Space API General Remarks</title>
+ <para>
+ The kernel crypto API is accessible from user space. Currently,
+ the following ciphers are accessible:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>Message digest including keyed message digest (HMAC, CMAC)</para>
+ </listitem>
+
+ <listitem>
+ <para>Symmetric ciphers</para>
+ </listitem>
+
+ <listitem>
+ <para>AEAD ciphers</para>
+ </listitem>
+
+ <listitem>
+ <para>Random Number Generators</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The interface is provided via socket type using the type AF_ALG.
+ In addition, the setsockopt option type is SOL_ALG. In case the
+ user space header files do not export these flags yet, use the
+ following macros:
+ </para>
+
+ <programlisting>
+#ifndef AF_ALG
+#define AF_ALG 38
+#endif
+#ifndef SOL_ALG
+#define SOL_ALG 279
+#endif
+ </programlisting>
+
+ <para>
+ A cipher is accessed with the same name as done for the in-kernel
+ API calls. This includes the generic vs. unique naming schema for
+ ciphers as well as the enforcement of priorities for generic names.
+ </para>
+
+ <para>
+ To interact with the kernel crypto API, a socket must be
+ created by the user space application. User space invokes the cipher
+ operation with the send()/write() system call family. The result of the
+ cipher operation is obtained with the read()/recv() system call family.
+ </para>
+
+ <para>
+ The following API calls assume that the socket descriptor
+ is already opened by the user space application and discusses only
+ the kernel crypto API specific invocations.
+ </para>
+
+ <para>
+ To initialize the socket interface, the following sequence has to
+ be performed by the consumer:
+ </para>
+
+ <orderedlist>
+ <listitem>
+ <para>
+ Create a socket of type AF_ALG with the struct sockaddr_alg
+ parameter specified below for the different cipher types.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Invoke bind with the socket descriptor
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Invoke accept with the socket descriptor. The accept system call
+ returns a new file descriptor that is to be used to interact with
+ the particular cipher instance. When invoking send/write or recv/read
+ system calls to send data to the kernel or obtain data from the
+ kernel, the file descriptor returned by accept must be used.
+ </para>
+ </listitem>
+ </orderedlist>
+ </sect1>
+
+ <sect1><title>In-place Cipher operation</title>
+ <para>
+ Just like the in-kernel operation of the kernel crypto API, the user
+ space interface allows the cipher operation in-place. That means that
+ the input buffer used for the send/write system call and the output
+ buffer used by the read/recv system call may be one and the same.
+ This is of particular interest for symmetric cipher operations where a
+ copying of the output data to its final destination can be avoided.
+ </para>
+
+ <para>
+ If a consumer on the other hand wants to maintain the plaintext and
+ the ciphertext in different memory locations, all a consumer needs
+ to do is to provide different memory pointers for the encryption and
+ decryption operation.
+ </para>
+ </sect1>
+
+ <sect1><title>Message Digest API</title>
+ <para>
+ The message digest type to be used for the cipher operation is
+ selected when invoking the bind syscall. bind requires the caller
+ to provide a filled struct sockaddr data structure. This data
+ structure must be filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash", /* this selects the hash logic in the kernel */
+ .salg_name = "sha1" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ The salg_type value "hash" applies to message digests and keyed
+ message digests. Though, a keyed message digest is referenced by
+ the appropriate salg_name. Please see below for the setsockopt
+ interface that explains how the key can be set for a keyed message
+ digest.
+ </para>
+
+ <para>
+ Using the send() system call, the application provides the data that
+ should be processed with the message digest. The send system call
+ allows the following flags to be specified:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ MSG_MORE: If this flag is set, the send system call acts like a
+ message digest update function where the final hash is not
+ yet calculated. If the flag is not set, the send system call
+ calculates the final message digest immediately.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ With the recv() system call, the application can read the message
+ digest from the kernel crypto API. If the buffer is too small for the
+ message digest, the flag MSG_TRUNC is set by the kernel.
+ </para>
+
+ <para>
+ In order to set a message digest key, the calling application must use
+ the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
+ operation is performed without the initial HMAC state change caused by
+ the key.
+ </para>
+ </sect1>
+
+ <sect1><title>Symmetric Cipher API</title>
+ <para>
+ The operation is very similar to the message digest discussion.
+ During initialization, the struct sockaddr data structure must be
+ filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher", /* this selects the symmetric cipher */
+ .salg_name = "cbc(aes)" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ Before data can be sent to the kernel using the write/send system
+ call family, the consumer must set the key. The key setting is
+ described with the setsockopt invocation below.
+ </para>
+
+ <para>
+ Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
+ specified with the data structure provided by the sendmsg() system call.
+ </para>
+
+ <para>
+ The sendmsg system call parameter of struct msghdr is embedded into the
+ struct cmsghdr data structure. See recv(2) and cmsg(3) for more
+ information on how the cmsghdr data structure is used together with the
+ send/recv system call family. That cmsghdr data structure holds the
+ following information specified with a separate header instances:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ specification of the cipher operation type with one of these flags:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>ALG_OP_ENCRYPT - encryption of data</para>
+ </listitem>
+ <listitem>
+ <para>ALG_OP_DECRYPT - decryption of data</para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ specification of the IV information marked with the flag ALG_SET_IV
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The send system call family allows the following flag to be specified:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ MSG_MORE: If this flag is set, the send system call acts like a
+ cipher update function where more input data is expected
+ with a subsequent invocation of the send system call.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Note: The kernel reports -EINVAL for any unexpected data. The caller
+ must make sure that all data matches the constraints given in
+ /proc/crypto for the selected cipher.
+ </para>
+
+ <para>
+ With the recv() system call, the application can read the result of
+ the cipher operation from the kernel crypto API. The output buffer
+ must be at least as large as to hold all blocks of the encrypted or
+ decrypted data. If the output data size is smaller, only as many
+ blocks are returned that fit into that output buffer size.
+ </para>
+ </sect1>
+
+ <sect1><title>AEAD Cipher API</title>
+ <para>
+ The operation is very similar to the symmetric cipher discussion.
+ During initialization, the struct sockaddr data structure must be
+ filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "aead", /* this selects the symmetric cipher */
+ .salg_name = "gcm(aes)" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ Before data can be sent to the kernel using the write/send system
+ call family, the consumer must set the key. The key setting is
+ described with the setsockopt invocation below.
+ </para>
+
+ <para>
+ In addition, before data can be sent to the kernel using the
+ write/send system call family, the consumer must set the authentication
+ tag size. To set the authentication tag size, the caller must use the
+ setsockopt invocation described below.
+ </para>
+
+ <para>
+ Using the sendmsg() system call, the application provides the data that should be processed for encryption or decryption. In addition, the IV is
+ specified with the data structure provided by the sendmsg() system call.
+ </para>
+
+ <para>
+ The sendmsg system call parameter of struct msghdr is embedded into the
+ struct cmsghdr data structure. See recv(2) and cmsg(3) for more
+ information on how the cmsghdr data structure is used together with the
+ send/recv system call family. That cmsghdr data structure holds the
+ following information specified with a separate header instances:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ specification of the cipher operation type with one of these flags:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>ALG_OP_ENCRYPT - encryption of data</para>
+ </listitem>
+ <listitem>
+ <para>ALG_OP_DECRYPT - decryption of data</para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ specification of the IV information marked with the flag ALG_SET_IV
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ specification of the associated authentication data (AAD) with the
+ flag ALG_SET_AEAD_ASSOCLEN. The AAD is sent to the kernel together
+ with the plaintext / ciphertext. See below for the memory structure.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The send system call family allows the following flag to be specified:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ MSG_MORE: If this flag is set, the send system call acts like a
+ cipher update function where more input data is expected
+ with a subsequent invocation of the send system call.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Note: The kernel reports -EINVAL for any unexpected data. The caller
+ must make sure that all data matches the constraints given in
+ /proc/crypto for the selected cipher.
+ </para>
+
+ <para>
+ With the recv() system call, the application can read the result of
+ the cipher operation from the kernel crypto API. The output buffer
+ must be at least as large as defined with the memory structure below.
+ If the output data size is smaller, the cipher operation is not performed.
+ </para>
+
+ <para>
+ The authenticated decryption operation may indicate an integrity error.
+ Such breach in integrity is marked with the -EBADMSG error code.
+ </para>
+
+ <sect2><title>AEAD Memory Structure</title>
+ <para>
+ The AEAD cipher operates with the following information that
+ is communicated between user and kernel space as one data stream:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>plaintext or ciphertext</para>
+ </listitem>
+
+ <listitem>
+ <para>associated authentication data (AAD)</para>
+ </listitem>
+
+ <listitem>
+ <para>authentication tag</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The sizes of the AAD and the authentication tag are provided with
+ the sendmsg and setsockopt calls (see there). As the kernel knows
+ the size of the entire data stream, the kernel is now able to
+ calculate the right offsets of the data components in the data
+ stream.
+ </para>
+
+ <para>
+ The user space caller must arrange the aforementioned information
+ in the following order:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ AEAD encryption input: AAD || plaintext
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ AEAD decryption input: AAD || ciphertext || authentication tag
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ The output buffer the user space caller provides must be at least as
+ large to hold the following data:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ AEAD encryption output: ciphertext || authentication tag
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ AEAD decryption output: plaintext
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ </sect1>
+
+ <sect1><title>Random Number Generator API</title>
+ <para>
+ Again, the operation is very similar to the other APIs.
+ During initialization, the struct sockaddr data structure must be
+ filled as follows:
+ </para>
+
+ <programlisting>
+struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "rng", /* this selects the symmetric cipher */
+ .salg_name = "drbg_nopr_sha256" /* this is the cipher name */
+};
+ </programlisting>
+
+ <para>
+ Depending on the RNG type, the RNG must be seeded. The seed is provided
+ using the setsockopt interface to set the key. For example, the
+ ansi_cprng requires a seed. The DRBGs do not require a seed, but
+ may be seeded.
+ </para>
+
+ <para>
+ Using the read()/recvmsg() system calls, random numbers can be obtained.
+ The kernel generates at most 128 bytes in one call. If user space
+ requires more data, multiple calls to read()/recvmsg() must be made.
+ </para>
+
+ <para>
+ WARNING: The user space caller may invoke the initially mentioned
+ accept system call multiple times. In this case, the returned file
+ descriptors have the same state.
+ </para>
+
+ </sect1>
+
+ <sect1><title>Zero-Copy Interface</title>
+ <para>
+ In addition to the send/write/read/recv system call familty, the AF_ALG
+ interface can be accessed with the zero-copy interface of splice/vmsplice.
+ As the name indicates, the kernel tries to avoid a copy operation into
+ kernel space.
+ </para>
+
+ <para>
+ The zero-copy operation requires data to be aligned at the page boundary.
+ Non-aligned data can be used as well, but may require more operations of
+ the kernel which would defeat the speed gains obtained from the zero-copy
+ interface.
+ </para>
+
+ <para>
+ The system-interent limit for the size of one zero-copy operation is
+ 16 pages. If more data is to be sent to AF_ALG, user space must slice
+ the input into segments with a maximum size of 16 pages.
+ </para>
+
+ <para>
+ Zero-copy can be used with the following code example (a complete working
+ example is provided with libkcapi):
+ </para>
+
+ <programlisting>
+int pipes[2];
+
+pipe(pipes);
+/* input data in iov */
+vmsplice(pipes[1], iov, iovlen, SPLICE_F_GIFT);
+/* opfd is the file descriptor returned from accept() system call */
+splice(pipes[0], NULL, opfd, NULL, ret, 0);
+read(opfd, out, outlen);
+ </programlisting>
+
+ </sect1>
+
+ <sect1><title>Setsockopt Interface</title>
+ <para>
+ In addition to the read/recv and send/write system call handling
+ to send and retrieve data subject to the cipher operation, a consumer
+ also needs to set the additional information for the cipher operation.
+ This additional information is set using the setsockopt system call
+ that must be invoked with the file descriptor of the open cipher
+ (i.e. the file descriptor returned by the accept system call).
+ </para>
+
+ <para>
+ Each setsockopt invocation must use the level SOL_ALG.
+ </para>
+
+ <para>
+ The setsockopt interface allows setting the following data using
+ the mentioned optname:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ ALG_SET_KEY -- Setting the key. Key setting is applicable to:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>the skcipher cipher type (symmetric ciphers)</para>
+ </listitem>
+ <listitem>
+ <para>the hash cipher type (keyed message digests)</para>
+ </listitem>
+ <listitem>
+ <para>the AEAD cipher type</para>
+ </listitem>
+ <listitem>
+ <para>the RNG cipher type to provide the seed</para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size
+ for AEAD ciphers. For a encryption operation, the authentication
+ tag of the given size will be generated. For a decryption operation,
+ the provided ciphertext is assumed to contain an authentication tag
+ of the given size (see section about AEAD memory layout below).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ </sect1>
+
+ <sect1><title>User space API example</title>
+ <para>
+ Please see [1] for libkcapi which provides an easy-to-use wrapper
+ around the aforementioned Netlink kernel interface. [1] also contains
+ a test application that invokes all libkcapi API calls.
+ </para>
+
+ <para>
+ [1] http://www.chronox.de/libkcapi.html
+ </para>
+
+ </sect1>
+
+ </chapter>
+
+ <chapter id="API"><title>Programming Interface</title>
+ <sect1><title>Block Cipher Context Data Structures</title>
+!Pinclude/linux/crypto.h Block Cipher Context Data Structures
+!Finclude/linux/crypto.h aead_request
+ </sect1>
+ <sect1><title>Block Cipher Algorithm Definitions</title>
+!Pinclude/linux/crypto.h Block Cipher Algorithm Definitions
+!Finclude/linux/crypto.h crypto_alg
+!Finclude/linux/crypto.h ablkcipher_alg
+!Finclude/linux/crypto.h aead_alg
+!Finclude/linux/crypto.h blkcipher_alg
+!Finclude/linux/crypto.h cipher_alg
+!Finclude/linux/crypto.h rng_alg
+ </sect1>
+ <sect1><title>Asynchronous Block Cipher API</title>
+!Pinclude/linux/crypto.h Asynchronous Block Cipher API
+!Finclude/linux/crypto.h crypto_alloc_ablkcipher
+!Finclude/linux/crypto.h crypto_free_ablkcipher
+!Finclude/linux/crypto.h crypto_has_ablkcipher
+!Finclude/linux/crypto.h crypto_ablkcipher_ivsize
+!Finclude/linux/crypto.h crypto_ablkcipher_blocksize
+!Finclude/linux/crypto.h crypto_ablkcipher_setkey
+!Finclude/linux/crypto.h crypto_ablkcipher_reqtfm
+!Finclude/linux/crypto.h crypto_ablkcipher_encrypt
+!Finclude/linux/crypto.h crypto_ablkcipher_decrypt
+ </sect1>
+ <sect1><title>Asynchronous Cipher Request Handle</title>
+!Pinclude/linux/crypto.h Asynchronous Cipher Request Handle
+!Finclude/linux/crypto.h crypto_ablkcipher_reqsize
+!Finclude/linux/crypto.h ablkcipher_request_set_tfm
+!Finclude/linux/crypto.h ablkcipher_request_alloc
+!Finclude/linux/crypto.h ablkcipher_request_free
+!Finclude/linux/crypto.h ablkcipher_request_set_callback
+!Finclude/linux/crypto.h ablkcipher_request_set_crypt
+ </sect1>
+ <sect1><title>Authenticated Encryption With Associated Data (AEAD) Cipher API</title>
+!Pinclude/linux/crypto.h Authenticated Encryption With Associated Data (AEAD) Cipher API
+!Finclude/linux/crypto.h crypto_alloc_aead
+!Finclude/linux/crypto.h crypto_free_aead
+!Finclude/linux/crypto.h crypto_aead_ivsize
+!Finclude/linux/crypto.h crypto_aead_authsize
+!Finclude/linux/crypto.h crypto_aead_blocksize
+!Finclude/linux/crypto.h crypto_aead_setkey
+!Finclude/linux/crypto.h crypto_aead_setauthsize
+!Finclude/linux/crypto.h crypto_aead_encrypt
+!Finclude/linux/crypto.h crypto_aead_decrypt
+ </sect1>
+ <sect1><title>Asynchronous AEAD Request Handle</title>
+!Pinclude/linux/crypto.h Asynchronous AEAD Request Handle
+!Finclude/linux/crypto.h crypto_aead_reqsize
+!Finclude/linux/crypto.h aead_request_set_tfm
+!Finclude/linux/crypto.h aead_request_alloc
+!Finclude/linux/crypto.h aead_request_free
+!Finclude/linux/crypto.h aead_request_set_callback
+!Finclude/linux/crypto.h aead_request_set_crypt
+!Finclude/linux/crypto.h aead_request_set_assoc
+ </sect1>
+ <sect1><title>Synchronous Block Cipher API</title>
+!Pinclude/linux/crypto.h Synchronous Block Cipher API
+!Finclude/linux/crypto.h crypto_alloc_blkcipher
+!Finclude/linux/crypto.h crypto_free_blkcipher
+!Finclude/linux/crypto.h crypto_has_blkcipher
+!Finclude/linux/crypto.h crypto_blkcipher_name
+!Finclude/linux/crypto.h crypto_blkcipher_ivsize
+!Finclude/linux/crypto.h crypto_blkcipher_blocksize
+!Finclude/linux/crypto.h crypto_blkcipher_setkey
+!Finclude/linux/crypto.h crypto_blkcipher_encrypt
+!Finclude/linux/crypto.h crypto_blkcipher_encrypt_iv
+!Finclude/linux/crypto.h crypto_blkcipher_decrypt
+!Finclude/linux/crypto.h crypto_blkcipher_decrypt_iv
+!Finclude/linux/crypto.h crypto_blkcipher_set_iv
+!Finclude/linux/crypto.h crypto_blkcipher_get_iv
+ </sect1>
+ <sect1><title>Single Block Cipher API</title>
+!Pinclude/linux/crypto.h Single Block Cipher API
+!Finclude/linux/crypto.h crypto_alloc_cipher
+!Finclude/linux/crypto.h crypto_free_cipher
+!Finclude/linux/crypto.h crypto_has_cipher
+!Finclude/linux/crypto.h crypto_cipher_blocksize
+!Finclude/linux/crypto.h crypto_cipher_setkey
+!Finclude/linux/crypto.h crypto_cipher_encrypt_one
+!Finclude/linux/crypto.h crypto_cipher_decrypt_one
+ </sect1>
+ <sect1><title>Synchronous Message Digest API</title>
+!Pinclude/linux/crypto.h Synchronous Message Digest API
+!Finclude/linux/crypto.h crypto_alloc_hash
+!Finclude/linux/crypto.h crypto_free_hash
+!Finclude/linux/crypto.h crypto_has_hash
+!Finclude/linux/crypto.h crypto_hash_blocksize
+!Finclude/linux/crypto.h crypto_hash_digestsize
+!Finclude/linux/crypto.h crypto_hash_init
+!Finclude/linux/crypto.h crypto_hash_update
+!Finclude/linux/crypto.h crypto_hash_final
+!Finclude/linux/crypto.h crypto_hash_digest
+!Finclude/linux/crypto.h crypto_hash_setkey
+ </sect1>
+ <sect1><title>Message Digest Algorithm Definitions</title>
+!Pinclude/crypto/hash.h Message Digest Algorithm Definitions
+!Finclude/crypto/hash.h hash_alg_common
+!Finclude/crypto/hash.h ahash_alg
+!Finclude/crypto/hash.h shash_alg
+ </sect1>
+ <sect1><title>Asynchronous Message Digest API</title>
+!Pinclude/crypto/hash.h Asynchronous Message Digest API
+!Finclude/crypto/hash.h crypto_alloc_ahash
+!Finclude/crypto/hash.h crypto_free_ahash
+!Finclude/crypto/hash.h crypto_ahash_init
+!Finclude/crypto/hash.h crypto_ahash_digestsize
+!Finclude/crypto/hash.h crypto_ahash_reqtfm
+!Finclude/crypto/hash.h crypto_ahash_reqsize
+!Finclude/crypto/hash.h crypto_ahash_setkey
+!Finclude/crypto/hash.h crypto_ahash_finup
+!Finclude/crypto/hash.h crypto_ahash_final
+!Finclude/crypto/hash.h crypto_ahash_digest
+!Finclude/crypto/hash.h crypto_ahash_export
+!Finclude/crypto/hash.h crypto_ahash_import
+ </sect1>
+ <sect1><title>Asynchronous Hash Request Handle</title>
+!Pinclude/crypto/hash.h Asynchronous Hash Request Handle
+!Finclude/crypto/hash.h ahash_request_set_tfm
+!Finclude/crypto/hash.h ahash_request_alloc
+!Finclude/crypto/hash.h ahash_request_free
+!Finclude/crypto/hash.h ahash_request_set_callback
+!Finclude/crypto/hash.h ahash_request_set_crypt
+ </sect1>
+ <sect1><title>Synchronous Message Digest API</title>
+!Pinclude/crypto/hash.h Synchronous Message Digest API
+!Finclude/crypto/hash.h crypto_alloc_shash
+!Finclude/crypto/hash.h crypto_free_shash
+!Finclude/crypto/hash.h crypto_shash_blocksize
+!Finclude/crypto/hash.h crypto_shash_digestsize
+!Finclude/crypto/hash.h crypto_shash_descsize
+!Finclude/crypto/hash.h crypto_shash_setkey
+!Finclude/crypto/hash.h crypto_shash_digest
+!Finclude/crypto/hash.h crypto_shash_export
+!Finclude/crypto/hash.h crypto_shash_import
+!Finclude/crypto/hash.h crypto_shash_init
+!Finclude/crypto/hash.h crypto_shash_update
+!Finclude/crypto/hash.h crypto_shash_final
+!Finclude/crypto/hash.h crypto_shash_finup
+ </sect1>
+ <sect1><title>Crypto API Random Number API</title>
+!Pinclude/crypto/rng.h Random number generator API
+!Finclude/crypto/rng.h crypto_alloc_rng
+!Finclude/crypto/rng.h crypto_rng_alg
+!Finclude/crypto/rng.h crypto_free_rng
+!Finclude/crypto/rng.h crypto_rng_get_bytes
+!Finclude/crypto/rng.h crypto_rng_reset
+!Finclude/crypto/rng.h crypto_rng_seedsize
+!Cinclude/crypto/rng.h
+ </sect1>
+ </chapter>
+
+ <chapter id="Code"><title>Code Examples</title>
+ <sect1><title>Code Example For Asynchronous Block Cipher Operation</title>
+ <programlisting>
+
+struct tcrypt_result {
+ struct completion completion;
+ int err;
+};
+
+/* tie all data structures together */
+struct ablkcipher_def {
+ struct scatterlist sg;
+ struct crypto_ablkcipher *tfm;
+ struct ablkcipher_request *req;
+ struct tcrypt_result result;
+};
+
+/* Callback function */
+static void test_ablkcipher_cb(struct crypto_async_request *req, int error)
+{
+ struct tcrypt_result *result = req-&gt;data;
+
+ if (error == -EINPROGRESS)
+ return;
+ result-&gt;err = error;
+ complete(&amp;result-&gt;completion);
+ pr_info("Encryption finished successfully\n");
+}
+
+/* Perform cipher operation */
+static unsigned int test_ablkcipher_encdec(struct ablkcipher_def *ablk,
+ int enc)
+{
+ int rc = 0;
+
+ if (enc)
+ rc = crypto_ablkcipher_encrypt(ablk-&gt;req);
+ else
+ rc = crypto_ablkcipher_decrypt(ablk-&gt;req);
+
+ switch (rc) {
+ case 0:
+ break;
+ case -EINPROGRESS:
+ case -EBUSY:
+ rc = wait_for_completion_interruptible(
+ &amp;ablk-&gt;result.completion);
+ if (!rc &amp;&amp; !ablk-&gt;result.err) {
+ reinit_completion(&amp;ablk-&gt;result.completion);
+ break;
+ }
+ default:
+ pr_info("ablkcipher encrypt returned with %d result %d\n",
+ rc, ablk-&gt;result.err);
+ break;
+ }
+ init_completion(&amp;ablk-&gt;result.completion);
+
+ return rc;
+}
+
+/* Initialize and trigger cipher operation */
+static int test_ablkcipher(void)
+{
+ struct ablkcipher_def ablk;
+ struct crypto_ablkcipher *ablkcipher = NULL;
+ struct ablkcipher_request *req = NULL;
+ char *scratchpad = NULL;
+ char *ivdata = NULL;
+ unsigned char key[32];
+ int ret = -EFAULT;
+
+ ablkcipher = crypto_alloc_ablkcipher("cbc-aes-aesni", 0, 0);
+ if (IS_ERR(ablkcipher)) {
+ pr_info("could not allocate ablkcipher handle\n");
+ return PTR_ERR(ablkcipher);
+ }
+
+ req = ablkcipher_request_alloc(ablkcipher, GFP_KERNEL);
+ if (IS_ERR(req)) {
+ pr_info("could not allocate request queue\n");
+ ret = PTR_ERR(req);
+ goto out;
+ }
+
+ ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
+ test_ablkcipher_cb,
+ &amp;ablk.result);
+
+ /* AES 256 with random key */
+ get_random_bytes(&amp;key, 32);
+ if (crypto_ablkcipher_setkey(ablkcipher, key, 32)) {
+ pr_info("key could not be set\n");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ /* IV will be random */
+ ivdata = kmalloc(16, GFP_KERNEL);
+ if (!ivdata) {
+ pr_info("could not allocate ivdata\n");
+ goto out;
+ }
+ get_random_bytes(ivdata, 16);
+
+ /* Input data will be random */
+ scratchpad = kmalloc(16, GFP_KERNEL);
+ if (!scratchpad) {
+ pr_info("could not allocate scratchpad\n");
+ goto out;
+ }
+ get_random_bytes(scratchpad, 16);
+
+ ablk.tfm = ablkcipher;
+ ablk.req = req;
+
+ /* We encrypt one block */
+ sg_init_one(&amp;ablk.sg, scratchpad, 16);
+ ablkcipher_request_set_crypt(req, &amp;ablk.sg, &amp;ablk.sg, 16, ivdata);
+ init_completion(&amp;ablk.result.completion);
+
+ /* encrypt data */
+ ret = test_ablkcipher_encdec(&amp;ablk, 1);
+ if (ret)
+ goto out;
+
+ pr_info("Encryption triggered successfully\n");
+
+out:
+ if (ablkcipher)
+ crypto_free_ablkcipher(ablkcipher);
+ if (req)
+ ablkcipher_request_free(req);
+ if (ivdata)
+ kfree(ivdata);
+ if (scratchpad)
+ kfree(scratchpad);
+ return ret;
+}
+ </programlisting>
+ </sect1>
+
+ <sect1><title>Code Example For Synchronous Block Cipher Operation</title>
+ <programlisting>
+
+static int test_blkcipher(void)
+{
+ struct crypto_blkcipher *blkcipher = NULL;
+ char *cipher = "cbc(aes)";
+ // AES 128
+ charkey =
+"\x12\x34\x56\x78\x90\xab\xcd\xef\x12\x34\x56\x78\x90\xab\xcd\xef";
+ chariv =
+"\x12\x34\x56\x78\x90\xab\xcd\xef\x12\x34\x56\x78\x90\xab\xcd\xef";
+ unsigned int ivsize = 0;
+ char *scratchpad = NULL; // holds plaintext and ciphertext
+ struct scatterlist sg;
+ struct blkcipher_desc desc;
+ int ret = -EFAULT;
+
+ blkcipher = crypto_alloc_blkcipher(cipher, 0, 0);
+ if (IS_ERR(blkcipher)) {
+ printk("could not allocate blkcipher handle for %s\n", cipher);
+ return -PTR_ERR(blkcipher);
+ }
+
+ if (crypto_blkcipher_setkey(blkcipher, key, strlen(key))) {
+ printk("key could not be set\n");
+ ret = -EAGAIN;
+ goto out;
+ }
+
+ ivsize = crypto_blkcipher_ivsize(blkcipher);
+ if (ivsize) {
+ if (ivsize != strlen(iv))
+ printk("IV length differs from expected length\n");
+ crypto_blkcipher_set_iv(blkcipher, iv, ivsize);
+ }
+
+ scratchpad = kmalloc(crypto_blkcipher_blocksize(blkcipher), GFP_KERNEL);
+ if (!scratchpad) {
+ printk("could not allocate scratchpad for %s\n", cipher);
+ goto out;
+ }
+ /* get some random data that we want to encrypt */
+ get_random_bytes(scratchpad, crypto_blkcipher_blocksize(blkcipher));
+
+ desc.flags = 0;
+ desc.tfm = blkcipher;
+ sg_init_one(&amp;sg, scratchpad, crypto_blkcipher_blocksize(blkcipher));
+
+ /* encrypt data in place */
+ crypto_blkcipher_encrypt(&amp;desc, &amp;sg, &amp;sg,
+ crypto_blkcipher_blocksize(blkcipher));
+
+ /* decrypt data in place
+ * crypto_blkcipher_decrypt(&amp;desc, &amp;sg, &amp;sg,
+ */ crypto_blkcipher_blocksize(blkcipher));
+
+
+ printk("Cipher operation completed\n");
+ return 0;
+
+out:
+ if (blkcipher)
+ crypto_free_blkcipher(blkcipher);
+ if (scratchpad)
+ kzfree(scratchpad);
+ return ret;
+}
+ </programlisting>
+ </sect1>
+
+ <sect1><title>Code Example For Use of Operational State Memory With SHASH</title>
+ <programlisting>
+
+struct sdesc {
+ struct shash_desc shash;
+ char ctx[];
+};
+
+static struct sdescinit_sdesc(struct crypto_shash *alg)
+{
+ struct sdescsdesc;
+ int size;
+
+ size = sizeof(struct shash_desc) + crypto_shash_descsize(alg);
+ sdesc = kmalloc(size, GFP_KERNEL);
+ if (!sdesc)
+ return ERR_PTR(-ENOMEM);
+ sdesc-&gt;shash.tfm = alg;
+ sdesc-&gt;shash.flags = 0x0;
+ return sdesc;
+}
+
+static int calc_hash(struct crypto_shashalg,
+ const unsigned chardata, unsigned int datalen,
+ unsigned chardigest) {
+ struct sdescsdesc;
+ int ret;
+
+ sdesc = init_sdesc(alg);
+ if (IS_ERR(sdesc)) {
+ pr_info("trusted_key: can't alloc %s\n", hash_alg);
+ return PTR_ERR(sdesc);
+ }
+
+ ret = crypto_shash_digest(&amp;sdesc-&gt;shash, data, datalen, digest);
+ kfree(sdesc);
+ return ret;
+}
+ </programlisting>
+ </sect1>
+
+ <sect1><title>Code Example For Random Number Generator Usage</title>
+ <programlisting>
+
+static int get_random_numbers(u8 *buf, unsigned int len)
+{
+ struct crypto_rngrng = NULL;
+ chardrbg = "drbg_nopr_sha256"; /* Hash DRBG with SHA-256, no PR */
+ int ret;
+
+ if (!buf || !len) {
+ pr_debug("No output buffer provided\n");
+ return -EINVAL;
+ }
+
+ rng = crypto_alloc_rng(drbg, 0, 0);
+ if (IS_ERR(rng)) {
+ pr_debug("could not allocate RNG handle for %s\n", drbg);
+ return -PTR_ERR(rng);
+ }
+
+ ret = crypto_rng_get_bytes(rng, buf, len);
+ if (ret &lt; 0)
+ pr_debug("generation of random numbers failed\n");
+ else if (ret == 0)
+ pr_debug("RNG returned no data");
+ else
+ pr_debug("RNG returned %d bytes of data\n", ret);
+
+out:
+ crypto_free_rng(rng);
+ return ret;
+}
+ </programlisting>
+ </sect1>
+ </chapter>
+ </book>
diff --git a/Documentation/DocBook/debugobjects.tmpl b/Documentation/DocBook/debugobjects.tmpl
new file mode 100644
index 000000000..24979f691
--- /dev/null
+++ b/Documentation/DocBook/debugobjects.tmpl
@@ -0,0 +1,441 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="debug-objects-guide">
+ <bookinfo>
+ <title>Debug objects life time</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2008</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ debugobjects is a generic infrastructure to track the life time
+ of kernel objects and validate the operations on those.
+ </para>
+ <para>
+ debugobjects is useful to check for the following error patterns:
+ <itemizedlist>
+ <listitem><para>Activation of uninitialized objects</para></listitem>
+ <listitem><para>Initialization of active objects</para></listitem>
+ <listitem><para>Usage of freed/destroyed objects</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ debugobjects is not changing the data structure of the real
+ object so it can be compiled in with a minimal runtime impact
+ and enabled on demand with a kernel command line option.
+ </para>
+ </chapter>
+
+ <chapter id="howto">
+ <title>Howto use debugobjects</title>
+ <para>
+ A kernel subsystem needs to provide a data structure which
+ describes the object type and add calls into the debug code at
+ appropriate places. The data structure to describe the object
+ type needs at minimum the name of the object type. Optional
+ functions can and should be provided to fixup detected problems
+ so the kernel can continue to work and the debug information can
+ be retrieved from a live system instead of hard core debugging
+ with serial consoles and stack trace transcripts from the
+ monitor.
+ </para>
+ <para>
+ The debug calls provided by debugobjects are:
+ <itemizedlist>
+ <listitem><para>debug_object_init</para></listitem>
+ <listitem><para>debug_object_init_on_stack</para></listitem>
+ <listitem><para>debug_object_activate</para></listitem>
+ <listitem><para>debug_object_deactivate</para></listitem>
+ <listitem><para>debug_object_destroy</para></listitem>
+ <listitem><para>debug_object_free</para></listitem>
+ <listitem><para>debug_object_assert_init</para></listitem>
+ </itemizedlist>
+ Each of these functions takes the address of the real object and
+ a pointer to the object type specific debug description
+ structure.
+ </para>
+ <para>
+ Each detected error is reported in the statistics and a limited
+ number of errors are printk'ed including a full stack trace.
+ </para>
+ <para>
+ The statistics are available via /sys/kernel/debug/debug_objects/stats.
+ They provide information about the number of warnings and the
+ number of successful fixups along with information about the
+ usage of the internal tracking objects and the state of the
+ internal tracking objects pool.
+ </para>
+ </chapter>
+ <chapter id="debugfunctions">
+ <title>Debug functions</title>
+ <sect1 id="prototypes">
+ <title>Debug object function reference</title>
+!Elib/debugobjects.c
+ </sect1>
+ <sect1 id="debug_object_init">
+ <title>debug_object_init</title>
+ <para>
+ This function is called whenever the initialization function
+ of a real object is called.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ checked, whether the object can be initialized. Initializing
+ is not allowed for active and destroyed objects. When
+ debugobjects detects an error, then it calls the fixup_init
+ function of the object type description structure if provided
+ by the caller. The fixup function can correct the problem
+ before the real initialization of the object happens. E.g. it
+ can deactivate an active object in order to prevent damage to
+ the subsystem.
+ </para>
+ <para>
+ When the real object is not yet tracked by debugobjects,
+ debugobjects allocates a tracker object for the real object
+ and sets the tracker object state to ODEBUG_STATE_INIT. It
+ verifies that the object is not on the callers stack. If it is
+ on the callers stack then a limited number of warnings
+ including a full stack trace is printk'ed. The calling code
+ must use debug_object_init_on_stack() and remove the object
+ before leaving the function which allocated it. See next
+ section.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_init_on_stack">
+ <title>debug_object_init_on_stack</title>
+ <para>
+ This function is called whenever the initialization function
+ of a real object which resides on the stack is called.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ checked, whether the object can be initialized. Initializing
+ is not allowed for active and destroyed objects. When
+ debugobjects detects an error, then it calls the fixup_init
+ function of the object type description structure if provided
+ by the caller. The fixup function can correct the problem
+ before the real initialization of the object happens. E.g. it
+ can deactivate an active object in order to prevent damage to
+ the subsystem.
+ </para>
+ <para>
+ When the real object is not yet tracked by debugobjects
+ debugobjects allocates a tracker object for the real object
+ and sets the tracker object state to ODEBUG_STATE_INIT. It
+ verifies that the object is on the callers stack.
+ </para>
+ <para>
+ An object which is on the stack must be removed from the
+ tracker by calling debug_object_free() before the function
+ which allocates the object returns. Otherwise we keep track of
+ stale objects.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_activate">
+ <title>debug_object_activate</title>
+ <para>
+ This function is called whenever the activation function of a
+ real object is called.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ checked, whether the object can be activated. Activating is
+ not allowed for active and destroyed objects. When
+ debugobjects detects an error, then it calls the
+ fixup_activate function of the object type description
+ structure if provided by the caller. The fixup function can
+ correct the problem before the real activation of the object
+ happens. E.g. it can deactivate an active object in order to
+ prevent damage to the subsystem.
+ </para>
+ <para>
+ When the real object is not yet tracked by debugobjects then
+ the fixup_activate function is called if available. This is
+ necessary to allow the legitimate activation of statically
+ allocated and initialized objects. The fixup function checks
+ whether the object is valid and calls the debug_objects_init()
+ function to initialize the tracking of this object.
+ </para>
+ <para>
+ When the activation is legitimate, then the state of the
+ associated tracker object is set to ODEBUG_STATE_ACTIVE.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_deactivate">
+ <title>debug_object_deactivate</title>
+ <para>
+ This function is called whenever the deactivation function of
+ a real object is called.
+ </para>
+ <para>
+ When the real object is tracked by debugobjects it is checked,
+ whether the object can be deactivated. Deactivating is not
+ allowed for untracked or destroyed objects.
+ </para>
+ <para>
+ When the deactivation is legitimate, then the state of the
+ associated tracker object is set to ODEBUG_STATE_INACTIVE.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_destroy">
+ <title>debug_object_destroy</title>
+ <para>
+ This function is called to mark an object destroyed. This is
+ useful to prevent the usage of invalid objects, which are
+ still available in memory: either statically allocated objects
+ or objects which are freed later.
+ </para>
+ <para>
+ When the real object is tracked by debugobjects it is checked,
+ whether the object can be destroyed. Destruction is not
+ allowed for active and destroyed objects. When debugobjects
+ detects an error, then it calls the fixup_destroy function of
+ the object type description structure if provided by the
+ caller. The fixup function can correct the problem before the
+ real destruction of the object happens. E.g. it can deactivate
+ an active object in order to prevent damage to the subsystem.
+ </para>
+ <para>
+ When the destruction is legitimate, then the state of the
+ associated tracker object is set to ODEBUG_STATE_DESTROYED.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_free">
+ <title>debug_object_free</title>
+ <para>
+ This function is called before an object is freed.
+ </para>
+ <para>
+ When the real object is tracked by debugobjects it is checked,
+ whether the object can be freed. Free is not allowed for
+ active objects. When debugobjects detects an error, then it
+ calls the fixup_free function of the object type description
+ structure if provided by the caller. The fixup function can
+ correct the problem before the real free of the object
+ happens. E.g. it can deactivate an active object in order to
+ prevent damage to the subsystem.
+ </para>
+ <para>
+ Note that debug_object_free removes the object from the
+ tracker. Later usage of the object is detected by the other
+ debug checks.
+ </para>
+ </sect1>
+
+ <sect1 id="debug_object_assert_init">
+ <title>debug_object_assert_init</title>
+ <para>
+ This function is called to assert that an object has been
+ initialized.
+ </para>
+ <para>
+ When the real object is not tracked by debugobjects, it calls
+ fixup_assert_init of the object type description structure
+ provided by the caller, with the hardcoded object state
+ ODEBUG_NOT_AVAILABLE. The fixup function can correct the problem
+ by calling debug_object_init and other specific initializing
+ functions.
+ </para>
+ <para>
+ When the real object is already tracked by debugobjects it is
+ ignored.
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="fixupfunctions">
+ <title>Fixup functions</title>
+ <sect1 id="debug_obj_descr">
+ <title>Debug object type description structure</title>
+!Iinclude/linux/debugobjects.h
+ </sect1>
+ <sect1 id="fixup_init">
+ <title>fixup_init</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_init is detected. The function takes the
+ address of the object and the state which is currently
+ recorded in the tracker.
+ </para>
+ <para>
+ Called from debug_object_init when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ <para>
+ Note, that the function needs to call the debug_object_init()
+ function again, after the damage has been repaired in order to
+ keep the state consistent.
+ </para>
+ </sect1>
+
+ <sect1 id="fixup_activate">
+ <title>fixup_activate</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_activate is detected.
+ </para>
+ <para>
+ Called from debug_object_activate when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_NOTAVAILABLE</para></listitem>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ <para>
+ Note that the function needs to call the debug_object_activate()
+ function again after the damage has been repaired in order to
+ keep the state consistent.
+ </para>
+ <para>
+ The activation of statically initialized objects is a special
+ case. When debug_object_activate() has no tracked object for
+ this object address then fixup_activate() is called with
+ object state ODEBUG_STATE_NOTAVAILABLE. The fixup function
+ needs to check whether this is a legitimate case of a
+ statically initialized object or not. In case it is it calls
+ debug_object_init() and debug_object_activate() to make the
+ object known to the tracker and marked active. In this case
+ the function should return 0 because this is not a real fixup.
+ </para>
+ </sect1>
+
+ <sect1 id="fixup_destroy">
+ <title>fixup_destroy</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_destroy is detected.
+ </para>
+ <para>
+ Called from debug_object_destroy when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ </sect1>
+ <sect1 id="fixup_free">
+ <title>fixup_free</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_free is detected. Further it can be called
+ from the debug checks in kfree/vfree, when an active object is
+ detected from the debug_check_no_obj_freed() sanity checks.
+ </para>
+ <para>
+ Called from debug_object_free() or debug_check_no_obj_freed()
+ when the object state is:
+ <itemizedlist>
+ <listitem><para>ODEBUG_STATE_ACTIVE</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ </sect1>
+ <sect1 id="fixup_assert_init">
+ <title>fixup_assert_init</title>
+ <para>
+ This function is called from the debug code whenever a problem
+ in debug_object_assert_init is detected.
+ </para>
+ <para>
+ Called from debug_object_assert_init() with a hardcoded state
+ ODEBUG_STATE_NOTAVAILABLE when the object is not found in the
+ debug bucket.
+ </para>
+ <para>
+ The function returns 1 when the fixup was successful,
+ otherwise 0. The return value is used to update the
+ statistics.
+ </para>
+ <para>
+ Note, this function should make sure debug_object_init() is
+ called before returning.
+ </para>
+ <para>
+ The handling of statically initialized objects is a special
+ case. The fixup function should check if this is a legitimate
+ case of a statically initialized object or not. In this case only
+ debug_object_init() should be called to make the object known to
+ the tracker. Then the function should return 0 because this is not
+ a real fixup.
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None (knock on wood).
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/device-drivers.tmpl b/Documentation/DocBook/device-drivers.tmpl
new file mode 100644
index 000000000..faf09d4a0
--- /dev/null
+++ b/Documentation/DocBook/device-drivers.tmpl
@@ -0,0 +1,458 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxDriversAPI">
+ <bookinfo>
+ <title>Linux Device Drivers</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="Basics">
+ <title>Driver Basics</title>
+ <sect1><title>Driver Entry and Exit points</title>
+!Iinclude/linux/init.h
+ </sect1>
+
+ <sect1><title>Atomic and pointer manipulation</title>
+!Iarch/x86/include/asm/atomic.h
+ </sect1>
+
+ <sect1><title>Delaying, scheduling, and timer routines</title>
+!Iinclude/linux/sched.h
+!Ekernel/sched/core.c
+!Ikernel/sched/cpupri.c
+!Ikernel/sched/fair.c
+!Iinclude/linux/completion.h
+!Ekernel/time/timer.c
+ </sect1>
+ <sect1><title>Wait queues and Wake events</title>
+!Iinclude/linux/wait.h
+!Ekernel/sched/wait.c
+ </sect1>
+ <sect1><title>High-resolution timers</title>
+!Iinclude/linux/ktime.h
+!Iinclude/linux/hrtimer.h
+!Ekernel/time/hrtimer.c
+ </sect1>
+ <sect1><title>Workqueues and Kevents</title>
+!Ekernel/workqueue.c
+ </sect1>
+ <sect1><title>Internal Functions</title>
+!Ikernel/exit.c
+!Ikernel/signal.c
+!Iinclude/linux/kthread.h
+!Ekernel/kthread.c
+ </sect1>
+
+ <sect1><title>Kernel objects manipulation</title>
+<!--
+X!Iinclude/linux/kobject.h
+-->
+!Elib/kobject.c
+ </sect1>
+
+ <sect1><title>Kernel utility functions</title>
+!Iinclude/linux/kernel.h
+!Ekernel/printk/printk.c
+!Ekernel/panic.c
+!Ekernel/sys.c
+!Ekernel/rcu/srcu.c
+!Ekernel/rcu/tree.c
+!Ekernel/rcu/tree_plugin.h
+!Ekernel/rcu/update.c
+ </sect1>
+
+ <sect1><title>Device Resource Management</title>
+!Edrivers/base/devres.c
+ </sect1>
+
+ </chapter>
+
+ <chapter id="devdrivers">
+ <title>Device drivers infrastructure</title>
+ <sect1><title>The Basic Device Driver-Model Structures </title>
+!Iinclude/linux/device.h
+ </sect1>
+ <sect1><title>Device Drivers Base</title>
+!Idrivers/base/init.c
+!Edrivers/base/driver.c
+!Edrivers/base/core.c
+!Edrivers/base/syscore.c
+!Edrivers/base/class.c
+!Idrivers/base/node.c
+!Edrivers/base/firmware_class.c
+!Edrivers/base/transport_class.c
+<!-- Cannot be included, because
+ attribute_container_add_class_device_adapter
+ and attribute_container_classdev_to_container
+ exceed allowed 44 characters maximum
+X!Edrivers/base/attribute_container.c
+-->
+!Edrivers/base/dd.c
+<!--
+X!Edrivers/base/interface.c
+-->
+!Iinclude/linux/platform_device.h
+!Edrivers/base/platform.c
+!Edrivers/base/bus.c
+ </sect1>
+ <sect1><title>Device Drivers DMA Management</title>
+!Edrivers/dma-buf/dma-buf.c
+!Edrivers/dma-buf/fence.c
+!Edrivers/dma-buf/seqno-fence.c
+!Iinclude/linux/fence.h
+!Iinclude/linux/seqno-fence.h
+!Edrivers/dma-buf/reservation.c
+!Iinclude/linux/reservation.h
+!Edrivers/base/dma-coherent.c
+!Edrivers/base/dma-mapping.c
+ </sect1>
+ <sect1><title>Device Drivers Power Management</title>
+!Edrivers/base/power/main.c
+ </sect1>
+ <sect1><title>Device Drivers ACPI Support</title>
+<!-- Internal functions only
+X!Edrivers/acpi/sleep/main.c
+X!Edrivers/acpi/sleep/wakeup.c
+X!Edrivers/acpi/motherboard.c
+X!Edrivers/acpi/bus.c
+-->
+!Edrivers/acpi/scan.c
+!Idrivers/acpi/scan.c
+<!-- No correct structured comments
+X!Edrivers/acpi/pci_bind.c
+-->
+ </sect1>
+ <sect1><title>Device drivers PnP support</title>
+!Idrivers/pnp/core.c
+<!-- No correct structured comments
+X!Edrivers/pnp/system.c
+ -->
+!Edrivers/pnp/card.c
+!Idrivers/pnp/driver.c
+!Edrivers/pnp/manager.c
+!Edrivers/pnp/support.c
+ </sect1>
+ <sect1><title>Userspace IO devices</title>
+!Edrivers/uio/uio.c
+!Iinclude/linux/uio_driver.h
+ </sect1>
+ </chapter>
+
+ <chapter id="parportdev">
+ <title>Parallel Port Devices</title>
+!Iinclude/linux/parport.h
+!Edrivers/parport/ieee1284.c
+!Edrivers/parport/share.c
+!Idrivers/parport/daisy.c
+ </chapter>
+
+ <chapter id="message_devices">
+ <title>Message-based devices</title>
+ <sect1><title>Fusion message devices</title>
+!Edrivers/message/fusion/mptbase.c
+!Idrivers/message/fusion/mptbase.c
+!Edrivers/message/fusion/mptscsih.c
+!Idrivers/message/fusion/mptscsih.c
+!Idrivers/message/fusion/mptctl.c
+!Idrivers/message/fusion/mptspi.c
+!Idrivers/message/fusion/mptfc.c
+!Idrivers/message/fusion/mptlan.c
+ </sect1>
+ </chapter>
+
+ <chapter id="snddev">
+ <title>Sound Devices</title>
+!Iinclude/sound/core.h
+!Esound/sound_core.c
+!Iinclude/sound/pcm.h
+!Esound/core/pcm.c
+!Esound/core/device.c
+!Esound/core/info.c
+!Esound/core/rawmidi.c
+!Esound/core/sound.c
+!Esound/core/memory.c
+!Esound/core/pcm_memory.c
+!Esound/core/init.c
+!Esound/core/isadma.c
+!Esound/core/control.c
+!Esound/core/pcm_lib.c
+!Esound/core/hwdep.c
+!Esound/core/pcm_native.c
+!Esound/core/memalloc.c
+<!-- FIXME: Removed for now since no structured comments in source
+X!Isound/sound_firmware.c
+-->
+ </chapter>
+
+ <chapter id="uart16x50">
+ <title>16x50 UART Driver</title>
+!Edrivers/tty/serial/serial_core.c
+!Edrivers/tty/serial/8250/8250_core.c
+ </chapter>
+
+ <chapter id="fbdev">
+ <title>Frame Buffer Library</title>
+
+ <para>
+ The frame buffer drivers depend heavily on four data structures.
+ These structures are declared in include/linux/fb.h. They are
+ fb_info, fb_var_screeninfo, fb_fix_screeninfo and fb_monospecs.
+ The last three can be made available to and from userland.
+ </para>
+
+ <para>
+ fb_info defines the current state of a particular video card.
+ Inside fb_info, there exists a fb_ops structure which is a
+ collection of needed functions to make fbdev and fbcon work.
+ fb_info is only visible to the kernel.
+ </para>
+
+ <para>
+ fb_var_screeninfo is used to describe the features of a video card
+ that are user defined. With fb_var_screeninfo, things such as
+ depth and the resolution may be defined.
+ </para>
+
+ <para>
+ The next structure is fb_fix_screeninfo. This defines the
+ properties of a card that are created when a mode is set and can't
+ be changed otherwise. A good example of this is the start of the
+ frame buffer memory. This "locks" the address of the frame buffer
+ memory, so that it cannot be changed or moved.
+ </para>
+
+ <para>
+ The last structure is fb_monospecs. In the old API, there was
+ little importance for fb_monospecs. This allowed for forbidden things
+ such as setting a mode of 800x600 on a fix frequency monitor. With
+ the new API, fb_monospecs prevents such things, and if used
+ correctly, can prevent a monitor from being cooked. fb_monospecs
+ will not be useful until kernels 2.5.x.
+ </para>
+
+ <sect1><title>Frame Buffer Memory</title>
+!Edrivers/video/fbdev/core/fbmem.c
+ </sect1>
+<!--
+ <sect1><title>Frame Buffer Console</title>
+X!Edrivers/video/console/fbcon.c
+ </sect1>
+-->
+ <sect1><title>Frame Buffer Colormap</title>
+!Edrivers/video/fbdev/core/fbcmap.c
+ </sect1>
+<!-- FIXME:
+ drivers/video/fbgen.c has no docs, which stuffs up the sgml. Comment
+ out until somebody adds docs. KAO
+ <sect1><title>Frame Buffer Generic Functions</title>
+X!Idrivers/video/fbgen.c
+ </sect1>
+KAO -->
+ <sect1><title>Frame Buffer Video Mode Database</title>
+!Idrivers/video/fbdev/core/modedb.c
+!Edrivers/video/fbdev/core/modedb.c
+ </sect1>
+ <sect1><title>Frame Buffer Macintosh Video Mode Database</title>
+!Edrivers/video/fbdev/macmodes.c
+ </sect1>
+ <sect1><title>Frame Buffer Fonts</title>
+ <para>
+ Refer to the file lib/fonts/fonts.c for more information.
+ </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Ilib/fonts/fonts.c
+-->
+ </sect1>
+ </chapter>
+
+ <chapter id="input_subsystem">
+ <title>Input Subsystem</title>
+ <sect1><title>Input core</title>
+!Iinclude/linux/input.h
+!Edrivers/input/input.c
+!Edrivers/input/ff-core.c
+!Edrivers/input/ff-memless.c
+ </sect1>
+ <sect1><title>Multitouch Library</title>
+!Iinclude/linux/input/mt.h
+!Edrivers/input/input-mt.c
+ </sect1>
+ <sect1><title>Polled input devices</title>
+!Iinclude/linux/input-polldev.h
+!Edrivers/input/input-polldev.c
+ </sect1>
+ <sect1><title>Matrix keyboars/keypads</title>
+!Iinclude/linux/input/matrix_keypad.h
+ </sect1>
+ <sect1><title>Sparse keymap support</title>
+!Iinclude/linux/input/sparse-keymap.h
+!Edrivers/input/sparse-keymap.c
+ </sect1>
+ </chapter>
+
+ <chapter id="spi">
+ <title>Serial Peripheral Interface (SPI)</title>
+ <para>
+ SPI is the "Serial Peripheral Interface", widely used with
+ embedded systems because it is a simple and efficient
+ interface: basically a multiplexed shift register.
+ Its three signal wires hold a clock (SCK, often in the range
+ of 1-20 MHz), a "Master Out, Slave In" (MOSI) data line, and
+ a "Master In, Slave Out" (MISO) data line.
+ SPI is a full duplex protocol; for each bit shifted out the
+ MOSI line (one per clock) another is shifted in on the MISO line.
+ Those bits are assembled into words of various sizes on the
+ way to and from system memory.
+ An additional chipselect line is usually active-low (nCS);
+ four signals are normally used for each peripheral, plus
+ sometimes an interrupt.
+ </para>
+ <para>
+ The SPI bus facilities listed here provide a generalized
+ interface to declare SPI busses and devices, manage them
+ according to the standard Linux driver model, and perform
+ input/output operations.
+ At this time, only "master" side interfaces are supported,
+ where Linux talks to SPI peripherals and does not implement
+ such a peripheral itself.
+ (Interfaces to support implementing SPI slaves would
+ necessarily look different.)
+ </para>
+ <para>
+ The programming interface is structured around two kinds of driver,
+ and two kinds of device.
+ A "Controller Driver" abstracts the controller hardware, which may
+ be as simple as a set of GPIO pins or as complex as a pair of FIFOs
+ connected to dual DMA engines on the other side of the SPI shift
+ register (maximizing throughput). Such drivers bridge between
+ whatever bus they sit on (often the platform bus) and SPI, and
+ expose the SPI side of their device as a
+ <structname>struct spi_master</structname>.
+ SPI devices are children of that master, represented as a
+ <structname>struct spi_device</structname> and manufactured from
+ <structname>struct spi_board_info</structname> descriptors which
+ are usually provided by board-specific initialization code.
+ A <structname>struct spi_driver</structname> is called a
+ "Protocol Driver", and is bound to a spi_device using normal
+ driver model calls.
+ </para>
+ <para>
+ The I/O model is a set of queued messages. Protocol drivers
+ submit one or more <structname>struct spi_message</structname>
+ objects, which are processed and completed asynchronously.
+ (There are synchronous wrappers, however.) Messages are
+ built from one or more <structname>struct spi_transfer</structname>
+ objects, each of which wraps a full duplex SPI transfer.
+ A variety of protocol tweaking options are needed, because
+ different chips adopt very different policies for how they
+ use the bits transferred with SPI.
+ </para>
+!Iinclude/linux/spi/spi.h
+!Fdrivers/spi/spi.c spi_register_board_info
+!Edrivers/spi/spi.c
+ </chapter>
+
+ <chapter id="i2c">
+ <title>I<superscript>2</superscript>C and SMBus Subsystem</title>
+
+ <para>
+ I<superscript>2</superscript>C (or without fancy typography, "I2C")
+ is an acronym for the "Inter-IC" bus, a simple bus protocol which is
+ widely used where low data rate communications suffice.
+ Since it's also a licensed trademark, some vendors use another
+ name (such as "Two-Wire Interface", TWI) for the same bus.
+ I2C only needs two signals (SCL for clock, SDA for data), conserving
+ board real estate and minimizing signal quality issues.
+ Most I2C devices use seven bit addresses, and bus speeds of up
+ to 400 kHz; there's a high speed extension (3.4 MHz) that's not yet
+ found wide use.
+ I2C is a multi-master bus; open drain signaling is used to
+ arbitrate between masters, as well as to handshake and to
+ synchronize clocks from slower clients.
+ </para>
+
+ <para>
+ The Linux I2C programming interfaces support only the master
+ side of bus interactions, not the slave side.
+ The programming interface is structured around two kinds of driver,
+ and two kinds of device.
+ An I2C "Adapter Driver" abstracts the controller hardware; it binds
+ to a physical device (perhaps a PCI device or platform_device) and
+ exposes a <structname>struct i2c_adapter</structname> representing
+ each I2C bus segment it manages.
+ On each I2C bus segment will be I2C devices represented by a
+ <structname>struct i2c_client</structname>. Those devices will
+ be bound to a <structname>struct i2c_driver</structname>,
+ which should follow the standard Linux driver model.
+ (At this writing, a legacy model is more widely used.)
+ There are functions to perform various I2C protocol operations; at
+ this writing all such functions are usable only from task context.
+ </para>
+
+ <para>
+ The System Management Bus (SMBus) is a sibling protocol. Most SMBus
+ systems are also I2C conformant. The electrical constraints are
+ tighter for SMBus, and it standardizes particular protocol messages
+ and idioms. Controllers that support I2C can also support most
+ SMBus operations, but SMBus controllers don't support all the protocol
+ options that an I2C controller will.
+ There are functions to perform various SMBus protocol operations,
+ either using I2C primitives or by issuing SMBus commands to
+ i2c_adapter devices which don't support those I2C operations.
+ </para>
+
+!Iinclude/linux/i2c.h
+!Fdrivers/i2c/i2c-boardinfo.c i2c_register_board_info
+!Edrivers/i2c/i2c-core.c
+ </chapter>
+
+ <chapter id="hsi">
+ <title>High Speed Synchronous Serial Interface (HSI)</title>
+
+ <para>
+ High Speed Synchronous Serial Interface (HSI) is a
+ serial interface mainly used for connecting application
+ engines (APE) with cellular modem engines (CMT) in cellular
+ handsets.
+
+ HSI provides multiplexing for up to 16 logical channels,
+ low-latency and full duplex communication.
+ </para>
+
+!Iinclude/linux/hsi/hsi.h
+!Edrivers/hsi/hsi.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/deviceiobook.tmpl b/Documentation/DocBook/deviceiobook.tmpl
new file mode 100644
index 000000000..54199a0dc
--- /dev/null
+++ b/Documentation/DocBook/deviceiobook.tmpl
@@ -0,0 +1,323 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="DoingIO">
+ <bookinfo>
+ <title>Bus-Independent Device Accesses</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Matthew</firstname>
+ <surname>Wilcox</surname>
+ <affiliation>
+ <address>
+ <email>matthew@wil.cx</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <authorgroup>
+ <author>
+ <firstname>Alan</firstname>
+ <surname>Cox</surname>
+ <affiliation>
+ <address>
+ <email>alan@lxorguk.ukuu.org.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2001</year>
+ <holder>Matthew Wilcox</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ Linux provides an API which abstracts performing IO across all busses
+ and devices, allowing device drivers to be written independently of
+ bus type.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None.
+ </para>
+ </chapter>
+
+ <chapter id="mmio">
+ <title>Memory Mapped IO</title>
+ <sect1 id="getting_access_to_the_device">
+ <title>Getting Access to the Device</title>
+ <para>
+ The most widely supported form of IO is memory mapped IO.
+ That is, a part of the CPU's address space is interpreted
+ not as accesses to memory, but as accesses to a device. Some
+ architectures define devices to be at a fixed address, but most
+ have some method of discovering devices. The PCI bus walk is a
+ good example of such a scheme. This document does not cover how
+ to receive such an address, but assumes you are starting with one.
+ Physical addresses are of type unsigned long.
+ </para>
+
+ <para>
+ This address should not be used directly. Instead, to get an
+ address suitable for passing to the accessor functions described
+ below, you should call <function>ioremap</function>.
+ An address suitable for accessing the device will be returned to you.
+ </para>
+
+ <para>
+ After you've finished using the device (say, in your module's
+ exit routine), call <function>iounmap</function> in order to return
+ the address space to the kernel. Most architectures allocate new
+ address space each time you call <function>ioremap</function>, and
+ they can run out unless you call <function>iounmap</function>.
+ </para>
+ </sect1>
+
+ <sect1 id="accessing_the_device">
+ <title>Accessing the device</title>
+ <para>
+ The part of the interface most used by drivers is reading and
+ writing memory-mapped registers on the device. Linux provides
+ interfaces to read and write 8-bit, 16-bit, 32-bit and 64-bit
+ quantities. Due to a historical accident, these are named byte,
+ word, long and quad accesses. Both read and write accesses are
+ supported; there is no prefetch support at this time.
+ </para>
+
+ <para>
+ The functions are named <function>readb</function>,
+ <function>readw</function>, <function>readl</function>,
+ <function>readq</function>, <function>readb_relaxed</function>,
+ <function>readw_relaxed</function>, <function>readl_relaxed</function>,
+ <function>readq_relaxed</function>, <function>writeb</function>,
+ <function>writew</function>, <function>writel</function> and
+ <function>writeq</function>.
+ </para>
+
+ <para>
+ Some devices (such as framebuffers) would like to use larger
+ transfers than 8 bytes at a time. For these devices, the
+ <function>memcpy_toio</function>, <function>memcpy_fromio</function>
+ and <function>memset_io</function> functions are provided.
+ Do not use memset or memcpy on IO addresses; they
+ are not guaranteed to copy data in order.
+ </para>
+
+ <para>
+ The read and write functions are defined to be ordered. That is the
+ compiler is not permitted to reorder the I/O sequence. When the
+ ordering can be compiler optimised, you can use <function>
+ __readb</function> and friends to indicate the relaxed ordering. Use
+ this with care.
+ </para>
+
+ <para>
+ While the basic functions are defined to be synchronous with respect
+ to each other and ordered with respect to each other the busses the
+ devices sit on may themselves have asynchronicity. In particular many
+ authors are burned by the fact that PCI bus writes are posted
+ asynchronously. A driver author must issue a read from the same
+ device to ensure that writes have occurred in the specific cases the
+ author cares. This kind of property cannot be hidden from driver
+ writers in the API. In some cases, the read used to flush the device
+ may be expected to fail (if the card is resetting, for example). In
+ that case, the read should be done from config space, which is
+ guaranteed to soft-fail if the card doesn't respond.
+ </para>
+
+ <para>
+ The following is an example of flushing a write to a device when
+ the driver would like to ensure the write's effects are visible prior
+ to continuing execution.
+ </para>
+
+<programlisting>
+static inline void
+qla1280_disable_intrs(struct scsi_qla_host *ha)
+{
+ struct device_reg *reg;
+
+ reg = ha->iobase;
+ /* disable risc and host interrupts */
+ WRT_REG_WORD(&amp;reg->ictrl, 0);
+ /*
+ * The following read will ensure that the above write
+ * has been received by the device before we return from this
+ * function.
+ */
+ RD_REG_WORD(&amp;reg->ictrl);
+ ha->flags.ints_enabled = 0;
+}
+</programlisting>
+
+ <para>
+ In addition to write posting, on some large multiprocessing systems
+ (e.g. SGI Challenge, Origin and Altix machines) posted writes won't
+ be strongly ordered coming from different CPUs. Thus it's important
+ to properly protect parts of your driver that do memory-mapped writes
+ with locks and use the <function>mmiowb</function> to make sure they
+ arrive in the order intended. Issuing a regular <function>readX
+ </function> will also ensure write ordering, but should only be used
+ when the driver has to be sure that the write has actually arrived
+ at the device (not that it's simply ordered with respect to other
+ writes), since a full <function>readX</function> is a relatively
+ expensive operation.
+ </para>
+
+ <para>
+ Generally, one should use <function>mmiowb</function> prior to
+ releasing a spinlock that protects regions using <function>writeb
+ </function> or similar functions that aren't surrounded by <function>
+ readb</function> calls, which will ensure ordering and flushing. The
+ following pseudocode illustrates what might occur if write ordering
+ isn't guaranteed via <function>mmiowb</function> or one of the
+ <function>readX</function> functions.
+ </para>
+
+<programlisting>
+CPU A: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: spin_unlock_irqrestore(&amp;dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU B: writel(newval2, ring_ptr);
+CPU B: ...
+CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
+</programlisting>
+
+ <para>
+ In the case above, newval2 could be written to ring_ptr before
+ newval. Fixing it is easy though:
+ </para>
+
+<programlisting>
+CPU A: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: mmiowb(); /* ensure no other writes beat us to the device */
+CPU A: spin_unlock_irqrestore(&amp;dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&amp;dev_lock, flags)
+CPU B: writel(newval2, ring_ptr);
+CPU B: ...
+CPU B: mmiowb();
+CPU B: spin_unlock_irqrestore(&amp;dev_lock, flags)
+</programlisting>
+
+ <para>
+ See tg3.c for a real world example of how to use <function>mmiowb
+ </function>
+ </para>
+
+ <para>
+ PCI ordering rules also guarantee that PIO read responses arrive
+ after any outstanding DMA writes from that bus, since for some devices
+ the result of a <function>readb</function> call may signal to the
+ driver that a DMA transaction is complete. In many cases, however,
+ the driver may want to indicate that the next
+ <function>readb</function> call has no relation to any previous DMA
+ writes performed by the device. The driver can use
+ <function>readb_relaxed</function> for these cases, although only
+ some platforms will honor the relaxed semantics. Using the relaxed
+ read functions will provide significant performance benefits on
+ platforms that support it. The qla2xxx driver provides examples
+ of how to use <function>readX_relaxed</function>. In many cases,
+ a majority of the driver's <function>readX</function> calls can
+ safely be converted to <function>readX_relaxed</function> calls, since
+ only a few will indicate or depend on DMA completion.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="port_space_accesses">
+ <title>Port Space Accesses</title>
+ <sect1 id="port_space_explained">
+ <title>Port Space Explained</title>
+
+ <para>
+ Another form of IO commonly supported is Port Space. This is a
+ range of addresses separate to the normal memory address space.
+ Access to these addresses is generally not as fast as accesses
+ to the memory mapped addresses, and it also has a potentially
+ smaller address space.
+ </para>
+
+ <para>
+ Unlike memory mapped IO, no preparation is required
+ to access port space.
+ </para>
+
+ </sect1>
+ <sect1 id="accessing_port_space">
+ <title>Accessing Port Space</title>
+ <para>
+ Accesses to this space are provided through a set of functions
+ which allow 8-bit, 16-bit and 32-bit accesses; also
+ known as byte, word and long. These functions are
+ <function>inb</function>, <function>inw</function>,
+ <function>inl</function>, <function>outb</function>,
+ <function>outw</function> and <function>outl</function>.
+ </para>
+
+ <para>
+ Some variants are provided for these functions. Some devices
+ require that accesses to their ports are slowed down. This
+ functionality is provided by appending a <function>_p</function>
+ to the end of the function. There are also equivalents to memcpy.
+ The <function>ins</function> and <function>outs</function>
+ functions copy bytes, words or longs to the given port.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+!Iarch/x86/include/asm/io.h
+!Elib/pci_iomap.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl
new file mode 100644
index 000000000..9765a4c08
--- /dev/null
+++ b/Documentation/DocBook/drm.tmpl
@@ -0,0 +1,4231 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="drmDevelopersGuide">
+ <bookinfo>
+ <title>Linux DRM Developer's Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jesse</firstname>
+ <surname>Barnes</surname>
+ <contrib>Initial version</contrib>
+ <affiliation>
+ <orgname>Intel Corporation</orgname>
+ <address>
+ <email>jesse.barnes@intel.com</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Laurent</firstname>
+ <surname>Pinchart</surname>
+ <contrib>Driver internals</contrib>
+ <affiliation>
+ <orgname>Ideas on board SPRL</orgname>
+ <address>
+ <email>laurent.pinchart@ideasonboard.com</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Daniel</firstname>
+ <surname>Vetter</surname>
+ <contrib>Contributions all over the place</contrib>
+ <affiliation>
+ <orgname>Intel Corporation</orgname>
+ <address>
+ <email>daniel.vetter@ffwll.ch</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2008-2009</year>
+ <year>2013-2014</year>
+ <holder>Intel Corporation</holder>
+ </copyright>
+ <copyright>
+ <year>2012</year>
+ <holder>Laurent Pinchart</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ The contents of this file may be used under the terms of the GNU
+ General Public License version 2 (the "GPL") as distributed in
+ the kernel source COPYING file.
+ </para>
+ </legalnotice>
+
+ <revhistory>
+ <!-- Put document revisions here, newest first. -->
+ <revision>
+ <revnumber>1.0</revnumber>
+ <date>2012-07-13</date>
+ <authorinitials>LP</authorinitials>
+ <revremark>Added extensive documentation about driver internals.
+ </revremark>
+ </revision>
+ </revhistory>
+ </bookinfo>
+
+<toc></toc>
+
+<part id="drmCore">
+ <title>DRM Core</title>
+ <partintro>
+ <para>
+ This first part of the DRM Developer's Guide documents core DRM code,
+ helper libraries for writing drivers and generic userspace interfaces
+ exposed by DRM drivers.
+ </para>
+ </partintro>
+
+ <chapter id="drmIntroduction">
+ <title>Introduction</title>
+ <para>
+ The Linux DRM layer contains code intended to support the needs
+ of complex graphics devices, usually containing programmable
+ pipelines well suited to 3D graphics acceleration. Graphics
+ drivers in the kernel may make use of DRM functions to make
+ tasks like memory management, interrupt handling and DMA easier,
+ and provide a uniform interface to applications.
+ </para>
+ <para>
+ A note on versions: this guide covers features found in the DRM
+ tree, including the TTM memory manager, output configuration and
+ mode setting, and the new vblank internals, in addition to all
+ the regular features found in current kernels.
+ </para>
+ <para>
+ [Insert diagram of typical DRM stack here]
+ </para>
+ </chapter>
+
+ <!-- Internals -->
+
+ <chapter id="drmInternals">
+ <title>DRM Internals</title>
+ <para>
+ This chapter documents DRM internals relevant to driver authors
+ and developers working to add support for the latest features to
+ existing drivers.
+ </para>
+ <para>
+ First, we go over some typical driver initialization
+ requirements, like setting up command buffers, creating an
+ initial output configuration, and initializing core services.
+ Subsequent sections cover core internals in more detail,
+ providing implementation notes and examples.
+ </para>
+ <para>
+ The DRM layer provides several services to graphics drivers,
+ many of them driven by the application interfaces it provides
+ through libdrm, the library that wraps most of the DRM ioctls.
+ These include vblank event handling, memory
+ management, output management, framebuffer management, command
+ submission &amp; fencing, suspend/resume support, and DMA
+ services.
+ </para>
+
+ <!-- Internals: driver init -->
+
+ <sect1>
+ <title>Driver Initialization</title>
+ <para>
+ At the core of every DRM driver is a <structname>drm_driver</structname>
+ structure. Drivers typically statically initialize a drm_driver structure,
+ and then pass it to one of the <function>drm_*_init()</function> functions
+ to register it with the DRM subsystem.
+ </para>
+ <para>
+ Newer drivers that no longer require a <structname>drm_bus</structname>
+ structure can alternatively use the low-level device initialization and
+ registration functions such as <function>drm_dev_alloc()</function> and
+ <function>drm_dev_register()</function> directly.
+ </para>
+ <para>
+ The <structname>drm_driver</structname> structure contains static
+ information that describes the driver and features it supports, and
+ pointers to methods that the DRM core will call to implement the DRM API.
+ We will first go through the <structname>drm_driver</structname> static
+ information fields, and will then describe individual operations in
+ details as they get used in later sections.
+ </para>
+ <sect2>
+ <title>Driver Information</title>
+ <sect3>
+ <title>Driver Features</title>
+ <para>
+ Drivers inform the DRM core about their requirements and supported
+ features by setting appropriate flags in the
+ <structfield>driver_features</structfield> field. Since those flags
+ influence the DRM core behaviour since registration time, most of them
+ must be set to registering the <structname>drm_driver</structname>
+ instance.
+ </para>
+ <synopsis>u32 driver_features;</synopsis>
+ <variablelist>
+ <title>Driver Feature Flags</title>
+ <varlistentry>
+ <term>DRIVER_USE_AGP</term>
+ <listitem><para>
+ Driver uses AGP interface, the DRM core will manage AGP resources.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_REQUIRE_AGP</term>
+ <listitem><para>
+ Driver needs AGP interface to function. AGP initialization failure
+ will become a fatal error.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_PCI_DMA</term>
+ <listitem><para>
+ Driver is capable of PCI DMA, mapping of PCI DMA buffers to
+ userspace will be enabled. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_SG</term>
+ <listitem><para>
+ Driver can perform scatter/gather DMA, allocation and mapping of
+ scatter/gather buffers will be enabled. Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_HAVE_DMA</term>
+ <listitem><para>
+ Driver supports DMA, the userspace DMA API will be supported.
+ Deprecated.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_HAVE_IRQ</term><term>DRIVER_IRQ_SHARED</term>
+ <listitem><para>
+ DRIVER_HAVE_IRQ indicates whether the driver has an IRQ handler
+ managed by the DRM Core. The core will support simple IRQ handler
+ installation when the flag is set. The installation process is
+ described in <xref linkend="drm-irq-registration"/>.</para>
+ <para>DRIVER_IRQ_SHARED indicates whether the device &amp; handler
+ support shared IRQs (note that this is required of PCI drivers).
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_GEM</term>
+ <listitem><para>
+ Driver use the GEM memory manager.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_MODESET</term>
+ <listitem><para>
+ Driver supports mode setting interfaces (KMS).
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_PRIME</term>
+ <listitem><para>
+ Driver implements DRM PRIME buffer sharing.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_RENDER</term>
+ <listitem><para>
+ Driver supports dedicated render nodes.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRIVER_ATOMIC</term>
+ <listitem><para>
+ Driver supports atomic properties. In this case the driver
+ must implement appropriate obj->atomic_get_property() vfuncs
+ for any modeset objects with driver specific properties.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </sect3>
+ <sect3>
+ <title>Major, Minor and Patchlevel</title>
+ <synopsis>int major;
+int minor;
+int patchlevel;</synopsis>
+ <para>
+ The DRM core identifies driver versions by a major, minor and patch
+ level triplet. The information is printed to the kernel log at
+ initialization time and passed to userspace through the
+ DRM_IOCTL_VERSION ioctl.
+ </para>
+ <para>
+ The major and minor numbers are also used to verify the requested driver
+ API version passed to DRM_IOCTL_SET_VERSION. When the driver API changes
+ between minor versions, applications can call DRM_IOCTL_SET_VERSION to
+ select a specific version of the API. If the requested major isn't equal
+ to the driver major, or the requested minor is larger than the driver
+ minor, the DRM_IOCTL_SET_VERSION call will return an error. Otherwise
+ the driver's set_version() method will be called with the requested
+ version.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Name, Description and Date</title>
+ <synopsis>char *name;
+char *desc;
+char *date;</synopsis>
+ <para>
+ The driver name is printed to the kernel log at initialization time,
+ used for IRQ registration and passed to userspace through
+ DRM_IOCTL_VERSION.
+ </para>
+ <para>
+ The driver description is a purely informative string passed to
+ userspace through the DRM_IOCTL_VERSION ioctl and otherwise unused by
+ the kernel.
+ </para>
+ <para>
+ The driver date, formatted as YYYYMMDD, is meant to identify the date of
+ the latest modification to the driver. However, as most drivers fail to
+ update it, its value is mostly useless. The DRM core prints it to the
+ kernel log at initialization time and passes it to userspace through the
+ DRM_IOCTL_VERSION ioctl.
+ </para>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Device Registration</title>
+ <para>
+ A number of functions are provided to help with device registration.
+ The functions deal with PCI and platform devices, respectively.
+ </para>
+!Edrivers/gpu/drm/drm_pci.c
+!Edrivers/gpu/drm/drm_platform.c
+ <para>
+ New drivers that no longer rely on the services provided by the
+ <structname>drm_bus</structname> structure can call the low-level
+ device registration functions directly. The
+ <function>drm_dev_alloc()</function> function can be used to allocate
+ and initialize a new <structname>drm_device</structname> structure.
+ Drivers will typically want to perform some additional setup on this
+ structure, such as allocating driver-specific data and storing a
+ pointer to it in the DRM device's <structfield>dev_private</structfield>
+ field. Drivers should also set the device's unique name using the
+ <function>drm_dev_set_unique()</function> function. After it has been
+ set up a device can be registered with the DRM subsystem by calling
+ <function>drm_dev_register()</function>. This will cause the device to
+ be exposed to userspace and will call the driver's
+ <structfield>.load()</structfield> implementation. When a device is
+ removed, the DRM device can safely be unregistered and freed by calling
+ <function>drm_dev_unregister()</function> followed by a call to
+ <function>drm_dev_unref()</function>.
+ </para>
+!Edrivers/gpu/drm/drm_drv.c
+ </sect2>
+ <sect2>
+ <title>Driver Load</title>
+ <para>
+ The <methodname>load</methodname> method is the driver and device
+ initialization entry point. The method is responsible for allocating and
+ initializing driver private data, performing resource allocation and
+ mapping (e.g. acquiring
+ clocks, mapping registers or allocating command buffers), initializing
+ the memory manager (<xref linkend="drm-memory-management"/>), installing
+ the IRQ handler (<xref linkend="drm-irq-registration"/>), setting up
+ vertical blanking handling (<xref linkend="drm-vertical-blank"/>), mode
+ setting (<xref linkend="drm-mode-setting"/>) and initial output
+ configuration (<xref linkend="drm-kms-init"/>).
+ </para>
+ <note><para>
+ If compatibility is a concern (e.g. with drivers converted over from
+ User Mode Setting to Kernel Mode Setting), care must be taken to prevent
+ device initialization and control that is incompatible with currently
+ active userspace drivers. For instance, if user level mode setting
+ drivers are in use, it would be problematic to perform output discovery
+ &amp; configuration at load time. Likewise, if user-level drivers
+ unaware of memory management are in use, memory management and command
+ buffer setup may need to be omitted. These requirements are
+ driver-specific, and care needs to be taken to keep both old and new
+ applications and libraries working.
+ </para></note>
+ <synopsis>int (*load) (struct drm_device *, unsigned long flags);</synopsis>
+ <para>
+ The method takes two arguments, a pointer to the newly created
+ <structname>drm_device</structname> and flags. The flags are used to
+ pass the <structfield>driver_data</structfield> field of the device id
+ corresponding to the device passed to <function>drm_*_init()</function>.
+ Only PCI devices currently use this, USB and platform DRM drivers have
+ their <methodname>load</methodname> method called with flags to 0.
+ </para>
+ <sect3>
+ <title>Driver Private Data</title>
+ <para>
+ The driver private hangs off the main
+ <structname>drm_device</structname> structure and can be used for
+ tracking various device-specific bits of information, like register
+ offsets, command buffer status, register state for suspend/resume, etc.
+ At load time, a driver may simply allocate one and set
+ <structname>drm_device</structname>.<structfield>dev_priv</structfield>
+ appropriately; it should be freed and
+ <structname>drm_device</structname>.<structfield>dev_priv</structfield>
+ set to NULL when the driver is unloaded.
+ </para>
+ </sect3>
+ <sect3 id="drm-irq-registration">
+ <title>IRQ Registration</title>
+ <para>
+ The DRM core tries to facilitate IRQ handler registration and
+ unregistration by providing <function>drm_irq_install</function> and
+ <function>drm_irq_uninstall</function> functions. Those functions only
+ support a single interrupt per device, devices that use more than one
+ IRQs need to be handled manually.
+ </para>
+ <sect4>
+ <title>Managed IRQ Registration</title>
+ <para>
+ <function>drm_irq_install</function> starts by calling the
+ <methodname>irq_preinstall</methodname> driver operation. The operation
+ is optional and must make sure that the interrupt will not get fired by
+ clearing all pending interrupt flags or disabling the interrupt.
+ </para>
+ <para>
+ The passed-in IRQ will then be requested by a call to
+ <function>request_irq</function>. If the DRIVER_IRQ_SHARED driver
+ feature flag is set, a shared (IRQF_SHARED) IRQ handler will be
+ requested.
+ </para>
+ <para>
+ The IRQ handler function must be provided as the mandatory irq_handler
+ driver operation. It will get passed directly to
+ <function>request_irq</function> and thus has the same prototype as all
+ IRQ handlers. It will get called with a pointer to the DRM device as the
+ second argument.
+ </para>
+ <para>
+ Finally the function calls the optional
+ <methodname>irq_postinstall</methodname> driver operation. The operation
+ usually enables interrupts (excluding the vblank interrupt, which is
+ enabled separately), but drivers may choose to enable/disable interrupts
+ at a different time.
+ </para>
+ <para>
+ <function>drm_irq_uninstall</function> is similarly used to uninstall an
+ IRQ handler. It starts by waking up all processes waiting on a vblank
+ interrupt to make sure they don't hang, and then calls the optional
+ <methodname>irq_uninstall</methodname> driver operation. The operation
+ must disable all hardware interrupts. Finally the function frees the IRQ
+ by calling <function>free_irq</function>.
+ </para>
+ </sect4>
+ <sect4>
+ <title>Manual IRQ Registration</title>
+ <para>
+ Drivers that require multiple interrupt handlers can't use the managed
+ IRQ registration functions. In that case IRQs must be registered and
+ unregistered manually (usually with the <function>request_irq</function>
+ and <function>free_irq</function> functions, or their devm_* equivalent).
+ </para>
+ <para>
+ When manually registering IRQs, drivers must not set the DRIVER_HAVE_IRQ
+ driver feature flag, and must not provide the
+ <methodname>irq_handler</methodname> driver operation. They must set the
+ <structname>drm_device</structname> <structfield>irq_enabled</structfield>
+ field to 1 upon registration of the IRQs, and clear it to 0 after
+ unregistering the IRQs.
+ </para>
+ </sect4>
+ </sect3>
+ <sect3>
+ <title>Memory Manager Initialization</title>
+ <para>
+ Every DRM driver requires a memory manager which must be initialized at
+ load time. DRM currently contains two memory managers, the Translation
+ Table Manager (TTM) and the Graphics Execution Manager (GEM).
+ This document describes the use of the GEM memory manager only. See
+ <xref linkend="drm-memory-management"/> for details.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Miscellaneous Device Configuration</title>
+ <para>
+ Another task that may be necessary for PCI devices during configuration
+ is mapping the video BIOS. On many devices, the VBIOS describes device
+ configuration, LCD panel timings (if any), and contains flags indicating
+ device state. Mapping the BIOS can be done using the pci_map_rom() call,
+ a convenience function that takes care of mapping the actual ROM,
+ whether it has been shadowed into memory (typically at address 0xc0000)
+ or exists on the PCI device in the ROM BAR. Note that after the ROM has
+ been mapped and any necessary information has been extracted, it should
+ be unmapped; on many devices, the ROM address decoder is shared with
+ other BARs, so leaving it mapped could cause undesired behaviour like
+ hangs or memory corruption.
+ <!--!Fdrivers/pci/rom.c pci_map_rom-->
+ </para>
+ </sect3>
+ </sect2>
+ </sect1>
+
+ <!-- Internals: memory management -->
+
+ <sect1 id="drm-memory-management">
+ <title>Memory management</title>
+ <para>
+ Modern Linux systems require large amount of graphics memory to store
+ frame buffers, textures, vertices and other graphics-related data. Given
+ the very dynamic nature of many of that data, managing graphics memory
+ efficiently is thus crucial for the graphics stack and plays a central
+ role in the DRM infrastructure.
+ </para>
+ <para>
+ The DRM core includes two memory managers, namely Translation Table Maps
+ (TTM) and Graphics Execution Manager (GEM). TTM was the first DRM memory
+ manager to be developed and tried to be a one-size-fits-them all
+ solution. It provides a single userspace API to accommodate the need of
+ all hardware, supporting both Unified Memory Architecture (UMA) devices
+ and devices with dedicated video RAM (i.e. most discrete video cards).
+ This resulted in a large, complex piece of code that turned out to be
+ hard to use for driver development.
+ </para>
+ <para>
+ GEM started as an Intel-sponsored project in reaction to TTM's
+ complexity. Its design philosophy is completely different: instead of
+ providing a solution to every graphics memory-related problems, GEM
+ identified common code between drivers and created a support library to
+ share it. GEM has simpler initialization and execution requirements than
+ TTM, but has no video RAM management capabilities and is thus limited to
+ UMA devices.
+ </para>
+ <sect2>
+ <title>The Translation Table Manager (TTM)</title>
+ <para>
+ TTM design background and information belongs here.
+ </para>
+ <sect3>
+ <title>TTM initialization</title>
+ <warning><para>This section is outdated.</para></warning>
+ <para>
+ Drivers wishing to support TTM must fill out a drm_bo_driver
+ structure. The structure contains several fields with function
+ pointers for initializing the TTM, allocating and freeing memory,
+ waiting for command completion and fence synchronization, and memory
+ migration. See the radeon_ttm.c file for an example of usage.
+ </para>
+ <para>
+ The ttm_global_reference structure is made up of several fields:
+ </para>
+ <programlisting>
+ struct ttm_global_reference {
+ enum ttm_global_types global_type;
+ size_t size;
+ void *object;
+ int (*init) (struct ttm_global_reference *);
+ void (*release) (struct ttm_global_reference *);
+ };
+ </programlisting>
+ <para>
+ There should be one global reference structure for your memory
+ manager as a whole, and there will be others for each object
+ created by the memory manager at runtime. Your global TTM should
+ have a type of TTM_GLOBAL_TTM_MEM. The size field for the global
+ object should be sizeof(struct ttm_mem_global), and the init and
+ release hooks should point at your driver-specific init and
+ release routines, which probably eventually call
+ ttm_mem_global_init and ttm_mem_global_release, respectively.
+ </para>
+ <para>
+ Once your global TTM accounting structure is set up and initialized
+ by calling ttm_global_item_ref() on it,
+ you need to create a buffer object TTM to
+ provide a pool for buffer object allocation by clients and the
+ kernel itself. The type of this object should be TTM_GLOBAL_TTM_BO,
+ and its size should be sizeof(struct ttm_bo_global). Again,
+ driver-specific init and release functions may be provided,
+ likely eventually calling ttm_bo_global_init() and
+ ttm_bo_global_release(), respectively. Also, like the previous
+ object, ttm_global_item_ref() is used to create an initial reference
+ count for the TTM, which will call your initialization function.
+ </para>
+ </sect3>
+ </sect2>
+ <sect2 id="drm-gem">
+ <title>The Graphics Execution Manager (GEM)</title>
+ <para>
+ The GEM design approach has resulted in a memory manager that doesn't
+ provide full coverage of all (or even all common) use cases in its
+ userspace or kernel API. GEM exposes a set of standard memory-related
+ operations to userspace and a set of helper functions to drivers, and let
+ drivers implement hardware-specific operations with their own private API.
+ </para>
+ <para>
+ The GEM userspace API is described in the
+ <ulink url="http://lwn.net/Articles/283798/"><citetitle>GEM - the Graphics
+ Execution Manager</citetitle></ulink> article on LWN. While slightly
+ outdated, the document provides a good overview of the GEM API principles.
+ Buffer allocation and read and write operations, described as part of the
+ common GEM API, are currently implemented using driver-specific ioctls.
+ </para>
+ <para>
+ GEM is data-agnostic. It manages abstract buffer objects without knowing
+ what individual buffers contain. APIs that require knowledge of buffer
+ contents or purpose, such as buffer allocation or synchronization
+ primitives, are thus outside of the scope of GEM and must be implemented
+ using driver-specific ioctls.
+ </para>
+ <para>
+ On a fundamental level, GEM involves several operations:
+ <itemizedlist>
+ <listitem>Memory allocation and freeing</listitem>
+ <listitem>Command execution</listitem>
+ <listitem>Aperture management at command execution time</listitem>
+ </itemizedlist>
+ Buffer object allocation is relatively straightforward and largely
+ provided by Linux's shmem layer, which provides memory to back each
+ object.
+ </para>
+ <para>
+ Device-specific operations, such as command execution, pinning, buffer
+ read &amp; write, mapping, and domain ownership transfers are left to
+ driver-specific ioctls.
+ </para>
+ <sect3>
+ <title>GEM Initialization</title>
+ <para>
+ Drivers that use GEM must set the DRIVER_GEM bit in the struct
+ <structname>drm_driver</structname>
+ <structfield>driver_features</structfield> field. The DRM core will
+ then automatically initialize the GEM core before calling the
+ <methodname>load</methodname> operation. Behind the scene, this will
+ create a DRM Memory Manager object which provides an address space
+ pool for object allocation.
+ </para>
+ <para>
+ In a KMS configuration, drivers need to allocate and initialize a
+ command ring buffer following core GEM initialization if required by
+ the hardware. UMA devices usually have what is called a "stolen"
+ memory region, which provides space for the initial framebuffer and
+ large, contiguous memory regions required by the device. This space is
+ typically not managed by GEM, and must be initialized separately into
+ its own DRM MM object.
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Objects Creation</title>
+ <para>
+ GEM splits creation of GEM objects and allocation of the memory that
+ backs them in two distinct operations.
+ </para>
+ <para>
+ GEM objects are represented by an instance of struct
+ <structname>drm_gem_object</structname>. Drivers usually need to extend
+ GEM objects with private information and thus create a driver-specific
+ GEM object structure type that embeds an instance of struct
+ <structname>drm_gem_object</structname>.
+ </para>
+ <para>
+ To create a GEM object, a driver allocates memory for an instance of its
+ specific GEM object type and initializes the embedded struct
+ <structname>drm_gem_object</structname> with a call to
+ <function>drm_gem_object_init</function>. The function takes a pointer to
+ the DRM device, a pointer to the GEM object and the buffer object size
+ in bytes.
+ </para>
+ <para>
+ GEM uses shmem to allocate anonymous pageable memory.
+ <function>drm_gem_object_init</function> will create an shmfs file of
+ the requested size and store it into the struct
+ <structname>drm_gem_object</structname> <structfield>filp</structfield>
+ field. The memory is used as either main storage for the object when the
+ graphics hardware uses system memory directly or as a backing store
+ otherwise.
+ </para>
+ <para>
+ Drivers are responsible for the actual physical pages allocation by
+ calling <function>shmem_read_mapping_page_gfp</function> for each page.
+ Note that they can decide to allocate pages when initializing the GEM
+ object, or to delay allocation until the memory is needed (for instance
+ when a page fault occurs as a result of a userspace memory access or
+ when the driver needs to start a DMA transfer involving the memory).
+ </para>
+ <para>
+ Anonymous pageable memory allocation is not always desired, for instance
+ when the hardware requires physically contiguous system memory as is
+ often the case in embedded devices. Drivers can create GEM objects with
+ no shmfs backing (called private GEM objects) by initializing them with
+ a call to <function>drm_gem_private_object_init</function> instead of
+ <function>drm_gem_object_init</function>. Storage for private GEM
+ objects must be managed by drivers.
+ </para>
+ <para>
+ Drivers that do not need to extend GEM objects with private information
+ can call the <function>drm_gem_object_alloc</function> function to
+ allocate and initialize a struct <structname>drm_gem_object</structname>
+ instance. The GEM core will call the optional driver
+ <methodname>gem_init_object</methodname> operation after initializing
+ the GEM object with <function>drm_gem_object_init</function>.
+ <synopsis>int (*gem_init_object) (struct drm_gem_object *obj);</synopsis>
+ </para>
+ <para>
+ No alloc-and-init function exists for private GEM objects.
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Objects Lifetime</title>
+ <para>
+ All GEM objects are reference-counted by the GEM core. References can be
+ acquired and release by <function>calling drm_gem_object_reference</function>
+ and <function>drm_gem_object_unreference</function> respectively. The
+ caller must hold the <structname>drm_device</structname>
+ <structfield>struct_mutex</structfield> lock. As a convenience, GEM
+ provides the <function>drm_gem_object_reference_unlocked</function> and
+ <function>drm_gem_object_unreference_unlocked</function> functions that
+ can be called without holding the lock.
+ </para>
+ <para>
+ When the last reference to a GEM object is released the GEM core calls
+ the <structname>drm_driver</structname>
+ <methodname>gem_free_object</methodname> operation. That operation is
+ mandatory for GEM-enabled drivers and must free the GEM object and all
+ associated resources.
+ </para>
+ <para>
+ <synopsis>void (*gem_free_object) (struct drm_gem_object *obj);</synopsis>
+ Drivers are responsible for freeing all GEM object resources, including
+ the resources created by the GEM core. If an mmap offset has been
+ created for the object (in which case
+ <structname>drm_gem_object</structname>::<structfield>map_list</structfield>::<structfield>map</structfield>
+ is not NULL) it must be freed by a call to
+ <function>drm_gem_free_mmap_offset</function>. The shmfs backing store
+ must be released by calling <function>drm_gem_object_release</function>
+ (that function can safely be called if no shmfs backing store has been
+ created).
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Objects Naming</title>
+ <para>
+ Communication between userspace and the kernel refers to GEM objects
+ using local handles, global names or, more recently, file descriptors.
+ All of those are 32-bit integer values; the usual Linux kernel limits
+ apply to the file descriptors.
+ </para>
+ <para>
+ GEM handles are local to a DRM file. Applications get a handle to a GEM
+ object through a driver-specific ioctl, and can use that handle to refer
+ to the GEM object in other standard or driver-specific ioctls. Closing a
+ DRM file handle frees all its GEM handles and dereferences the
+ associated GEM objects.
+ </para>
+ <para>
+ To create a handle for a GEM object drivers call
+ <function>drm_gem_handle_create</function>. The function takes a pointer
+ to the DRM file and the GEM object and returns a locally unique handle.
+ When the handle is no longer needed drivers delete it with a call to
+ <function>drm_gem_handle_delete</function>. Finally the GEM object
+ associated with a handle can be retrieved by a call to
+ <function>drm_gem_object_lookup</function>.
+ </para>
+ <para>
+ Handles don't take ownership of GEM objects, they only take a reference
+ to the object that will be dropped when the handle is destroyed. To
+ avoid leaking GEM objects, drivers must make sure they drop the
+ reference(s) they own (such as the initial reference taken at object
+ creation time) as appropriate, without any special consideration for the
+ handle. For example, in the particular case of combined GEM object and
+ handle creation in the implementation of the
+ <methodname>dumb_create</methodname> operation, drivers must drop the
+ initial reference to the GEM object before returning the handle.
+ </para>
+ <para>
+ GEM names are similar in purpose to handles but are not local to DRM
+ files. They can be passed between processes to reference a GEM object
+ globally. Names can't be used directly to refer to objects in the DRM
+ API, applications must convert handles to names and names to handles
+ using the DRM_IOCTL_GEM_FLINK and DRM_IOCTL_GEM_OPEN ioctls
+ respectively. The conversion is handled by the DRM core without any
+ driver-specific support.
+ </para>
+ <para>
+ GEM also supports buffer sharing with dma-buf file descriptors through
+ PRIME. GEM-based drivers must use the provided helpers functions to
+ implement the exporting and importing correctly. See <xref linkend="drm-prime-support" />.
+ Since sharing file descriptors is inherently more secure than the
+ easily guessable and global GEM names it is the preferred buffer
+ sharing mechanism. Sharing buffers through GEM names is only supported
+ for legacy userspace. Furthermore PRIME also allows cross-device
+ buffer sharing since it is based on dma-bufs.
+ </para>
+ </sect3>
+ <sect3 id="drm-gem-objects-mapping">
+ <title>GEM Objects Mapping</title>
+ <para>
+ Because mapping operations are fairly heavyweight GEM favours
+ read/write-like access to buffers, implemented through driver-specific
+ ioctls, over mapping buffers to userspace. However, when random access
+ to the buffer is needed (to perform software rendering for instance),
+ direct access to the object can be more efficient.
+ </para>
+ <para>
+ The mmap system call can't be used directly to map GEM objects, as they
+ don't have their own file handle. Two alternative methods currently
+ co-exist to map GEM objects to userspace. The first method uses a
+ driver-specific ioctl to perform the mapping operation, calling
+ <function>do_mmap</function> under the hood. This is often considered
+ dubious, seems to be discouraged for new GEM-enabled drivers, and will
+ thus not be described here.
+ </para>
+ <para>
+ The second method uses the mmap system call on the DRM file handle.
+ <synopsis>void *mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset);</synopsis>
+ DRM identifies the GEM object to be mapped by a fake offset passed
+ through the mmap offset argument. Prior to being mapped, a GEM object
+ must thus be associated with a fake offset. To do so, drivers must call
+ <function>drm_gem_create_mmap_offset</function> on the object. The
+ function allocates a fake offset range from a pool and stores the
+ offset divided by PAGE_SIZE in
+ <literal>obj-&gt;map_list.hash.key</literal>. Care must be taken not to
+ call <function>drm_gem_create_mmap_offset</function> if a fake offset
+ has already been allocated for the object. This can be tested by
+ <literal>obj-&gt;map_list.map</literal> being non-NULL.
+ </para>
+ <para>
+ Once allocated, the fake offset value
+ (<literal>obj-&gt;map_list.hash.key &lt;&lt; PAGE_SHIFT</literal>)
+ must be passed to the application in a driver-specific way and can then
+ be used as the mmap offset argument.
+ </para>
+ <para>
+ The GEM core provides a helper method <function>drm_gem_mmap</function>
+ to handle object mapping. The method can be set directly as the mmap
+ file operation handler. It will look up the GEM object based on the
+ offset value and set the VMA operations to the
+ <structname>drm_driver</structname> <structfield>gem_vm_ops</structfield>
+ field. Note that <function>drm_gem_mmap</function> doesn't map memory to
+ userspace, but relies on the driver-provided fault handler to map pages
+ individually.
+ </para>
+ <para>
+ To use <function>drm_gem_mmap</function>, drivers must fill the struct
+ <structname>drm_driver</structname> <structfield>gem_vm_ops</structfield>
+ field with a pointer to VM operations.
+ </para>
+ <para>
+ <synopsis>struct vm_operations_struct *gem_vm_ops
+
+ struct vm_operations_struct {
+ void (*open)(struct vm_area_struct * area);
+ void (*close)(struct vm_area_struct * area);
+ int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ };</synopsis>
+ </para>
+ <para>
+ The <methodname>open</methodname> and <methodname>close</methodname>
+ operations must update the GEM object reference count. Drivers can use
+ the <function>drm_gem_vm_open</function> and
+ <function>drm_gem_vm_close</function> helper functions directly as open
+ and close handlers.
+ </para>
+ <para>
+ The fault operation handler is responsible for mapping individual pages
+ to userspace when a page fault occurs. Depending on the memory
+ allocation scheme, drivers can allocate pages at fault time, or can
+ decide to allocate memory for the GEM object at the time the object is
+ created.
+ </para>
+ <para>
+ Drivers that want to map the GEM object upfront instead of handling page
+ faults can implement their own mmap file operation handler.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Memory Coherency</title>
+ <para>
+ When mapped to the device or used in a command buffer, backing pages
+ for an object are flushed to memory and marked write combined so as to
+ be coherent with the GPU. Likewise, if the CPU accesses an object
+ after the GPU has finished rendering to the object, then the object
+ must be made coherent with the CPU's view of memory, usually involving
+ GPU cache flushing of various kinds. This core CPU&lt;-&gt;GPU
+ coherency management is provided by a device-specific ioctl, which
+ evaluates an object's current domain and performs any necessary
+ flushing or synchronization to put the object into the desired
+ coherency domain (note that the object may be busy, i.e. an active
+ render target; in that case, setting the domain blocks the client and
+ waits for rendering to complete before performing any necessary
+ flushing operations).
+ </para>
+ </sect3>
+ <sect3>
+ <title>Command Execution</title>
+ <para>
+ Perhaps the most important GEM function for GPU devices is providing a
+ command execution interface to clients. Client programs construct
+ command buffers containing references to previously allocated memory
+ objects, and then submit them to GEM. At that point, GEM takes care to
+ bind all the objects into the GTT, execute the buffer, and provide
+ necessary synchronization between clients accessing the same buffers.
+ This often involves evicting some objects from the GTT and re-binding
+ others (a fairly expensive operation), and providing relocation
+ support which hides fixed GTT offsets from clients. Clients must take
+ care not to submit command buffers that reference more objects than
+ can fit in the GTT; otherwise, GEM will reject them and no rendering
+ will occur. Similarly, if several objects in the buffer require fence
+ registers to be allocated for correct rendering (e.g. 2D blits on
+ pre-965 chips), care must be taken not to require more fence registers
+ than are available to the client. Such resource management should be
+ abstracted from the client in libdrm.
+ </para>
+ </sect3>
+ <sect3>
+ <title>GEM Function Reference</title>
+!Edrivers/gpu/drm/drm_gem.c
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>VMA Offset Manager</title>
+!Pdrivers/gpu/drm/drm_vma_manager.c vma offset manager
+!Edrivers/gpu/drm/drm_vma_manager.c
+!Iinclude/drm/drm_vma_manager.h
+ </sect2>
+ <sect2 id="drm-prime-support">
+ <title>PRIME Buffer Sharing</title>
+ <para>
+ PRIME is the cross device buffer sharing framework in drm, originally
+ created for the OPTIMUS range of multi-gpu platforms. To userspace
+ PRIME buffers are dma-buf based file descriptors.
+ </para>
+ <sect3>
+ <title>Overview and Driver Interface</title>
+ <para>
+ Similar to GEM global names, PRIME file descriptors are
+ also used to share buffer objects across processes. They offer
+ additional security: as file descriptors must be explicitly sent over
+ UNIX domain sockets to be shared between applications, they can't be
+ guessed like the globally unique GEM names.
+ </para>
+ <para>
+ Drivers that support the PRIME
+ API must set the DRIVER_PRIME bit in the struct
+ <structname>drm_driver</structname>
+ <structfield>driver_features</structfield> field, and implement the
+ <methodname>prime_handle_to_fd</methodname> and
+ <methodname>prime_fd_to_handle</methodname> operations.
+ </para>
+ <para>
+ <synopsis>int (*prime_handle_to_fd)(struct drm_device *dev,
+ struct drm_file *file_priv, uint32_t handle,
+ uint32_t flags, int *prime_fd);
+int (*prime_fd_to_handle)(struct drm_device *dev,
+ struct drm_file *file_priv, int prime_fd,
+ uint32_t *handle);</synopsis>
+ Those two operations convert a handle to a PRIME file descriptor and
+ vice versa. Drivers must use the kernel dma-buf buffer sharing framework
+ to manage the PRIME file descriptors. Similar to the mode setting
+ API PRIME is agnostic to the underlying buffer object manager, as
+ long as handles are 32bit unsigned integers.
+ </para>
+ <para>
+ While non-GEM drivers must implement the operations themselves, GEM
+ drivers must use the <function>drm_gem_prime_handle_to_fd</function>
+ and <function>drm_gem_prime_fd_to_handle</function> helper functions.
+ Those helpers rely on the driver
+ <methodname>gem_prime_export</methodname> and
+ <methodname>gem_prime_import</methodname> operations to create a dma-buf
+ instance from a GEM object (dma-buf exporter role) and to create a GEM
+ object from a dma-buf instance (dma-buf importer role).
+ </para>
+ <para>
+ <synopsis>struct dma_buf * (*gem_prime_export)(struct drm_device *dev,
+ struct drm_gem_object *obj,
+ int flags);
+struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev,
+ struct dma_buf *dma_buf);</synopsis>
+ These two operations are mandatory for GEM drivers that support
+ PRIME.
+ </para>
+ </sect3>
+ <sect3>
+ <title>PRIME Helper Functions</title>
+!Pdrivers/gpu/drm/drm_prime.c PRIME Helpers
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>PRIME Function References</title>
+!Edrivers/gpu/drm/drm_prime.c
+ </sect2>
+ <sect2>
+ <title>DRM MM Range Allocator</title>
+ <sect3>
+ <title>Overview</title>
+!Pdrivers/gpu/drm/drm_mm.c Overview
+ </sect3>
+ <sect3>
+ <title>LRU Scan/Eviction Support</title>
+!Pdrivers/gpu/drm/drm_mm.c lru scan roaster
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>DRM MM Range Allocator Function References</title>
+!Edrivers/gpu/drm/drm_mm.c
+!Iinclude/drm/drm_mm.h
+ </sect2>
+ <sect2>
+ <title>CMA Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_gem_cma_helper.c cma helpers
+!Edrivers/gpu/drm/drm_gem_cma_helper.c
+!Iinclude/drm/drm_gem_cma_helper.h
+ </sect2>
+ </sect1>
+
+ <!-- Internals: mode setting -->
+
+ <sect1 id="drm-mode-setting">
+ <title>Mode Setting</title>
+ <para>
+ Drivers must initialize the mode setting core by calling
+ <function>drm_mode_config_init</function> on the DRM device. The function
+ initializes the <structname>drm_device</structname>
+ <structfield>mode_config</structfield> field and never fails. Once done,
+ mode configuration must be setup by initializing the following fields.
+ </para>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int min_width, min_height;
+int max_width, max_height;</synopsis>
+ <para>
+ Minimum and maximum width and height of the frame buffers in pixel
+ units.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>struct drm_mode_config_funcs *funcs;</synopsis>
+ <para>Mode setting functions.</para>
+ </listitem>
+ </itemizedlist>
+ <sect2>
+ <title>Display Modes Function Reference</title>
+!Iinclude/drm/drm_modes.h
+!Edrivers/gpu/drm/drm_modes.c
+ </sect2>
+ <sect2>
+ <title>Atomic Mode Setting Function Reference</title>
+!Edrivers/gpu/drm/drm_atomic.c
+ </sect2>
+ <sect2>
+ <title>Frame Buffer Creation</title>
+ <synopsis>struct drm_framebuffer *(*fb_create)(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_mode_fb_cmd2 *mode_cmd);</synopsis>
+ <para>
+ Frame buffers are abstract memory objects that provide a source of
+ pixels to scanout to a CRTC. Applications explicitly request the
+ creation of frame buffers through the DRM_IOCTL_MODE_ADDFB(2) ioctls and
+ receive an opaque handle that can be passed to the KMS CRTC control,
+ plane configuration and page flip functions.
+ </para>
+ <para>
+ Frame buffers rely on the underneath memory manager for low-level memory
+ operations. When creating a frame buffer applications pass a memory
+ handle (or a list of memory handles for multi-planar formats) through
+ the <parameter>drm_mode_fb_cmd2</parameter> argument. For drivers using
+ GEM as their userspace buffer management interface this would be a GEM
+ handle. Drivers are however free to use their own backing storage object
+ handles, e.g. vmwgfx directly exposes special TTM handles to userspace
+ and so expects TTM handles in the create ioctl and not GEM handles.
+ </para>
+ <para>
+ Drivers must first validate the requested frame buffer parameters passed
+ through the mode_cmd argument. In particular this is where invalid
+ sizes, pixel formats or pitches can be caught.
+ </para>
+ <para>
+ If the parameters are deemed valid, drivers then create, initialize and
+ return an instance of struct <structname>drm_framebuffer</structname>.
+ If desired the instance can be embedded in a larger driver-specific
+ structure. Drivers must fill its <structfield>width</structfield>,
+ <structfield>height</structfield>, <structfield>pitches</structfield>,
+ <structfield>offsets</structfield>, <structfield>depth</structfield>,
+ <structfield>bits_per_pixel</structfield> and
+ <structfield>pixel_format</structfield> fields from the values passed
+ through the <parameter>drm_mode_fb_cmd2</parameter> argument. They
+ should call the <function>drm_helper_mode_fill_fb_struct</function>
+ helper function to do so.
+ </para>
+
+ <para>
+ The initialization of the new framebuffer instance is finalized with a
+ call to <function>drm_framebuffer_init</function> which takes a pointer
+ to DRM frame buffer operations (struct
+ <structname>drm_framebuffer_funcs</structname>). Note that this function
+ publishes the framebuffer and so from this point on it can be accessed
+ concurrently from other threads. Hence it must be the last step in the
+ driver's framebuffer initialization sequence. Frame buffer operations
+ are
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*create_handle)(struct drm_framebuffer *fb,
+ struct drm_file *file_priv, unsigned int *handle);</synopsis>
+ <para>
+ Create a handle to the frame buffer underlying memory object. If
+ the frame buffer uses a multi-plane format, the handle will
+ reference the memory object associated with the first plane.
+ </para>
+ <para>
+ Drivers call <function>drm_gem_handle_create</function> to create
+ the handle.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_framebuffer *framebuffer);</synopsis>
+ <para>
+ Destroy the frame buffer object and frees all associated
+ resources. Drivers must call
+ <function>drm_framebuffer_cleanup</function> to free resources
+ allocated by the DRM core for the frame buffer object, and must
+ make sure to unreference all memory objects associated with the
+ frame buffer. Handles created by the
+ <methodname>create_handle</methodname> operation are released by
+ the DRM core.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*dirty)(struct drm_framebuffer *framebuffer,
+ struct drm_file *file_priv, unsigned flags, unsigned color,
+ struct drm_clip_rect *clips, unsigned num_clips);</synopsis>
+ <para>
+ This optional operation notifies the driver that a region of the
+ frame buffer has changed in response to a DRM_IOCTL_MODE_DIRTYFB
+ ioctl call.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The lifetime of a drm framebuffer is controlled with a reference count,
+ drivers can grab additional references with
+ <function>drm_framebuffer_reference</function>and drop them
+ again with <function>drm_framebuffer_unreference</function>. For
+ driver-private framebuffers for which the last reference is never
+ dropped (e.g. for the fbdev framebuffer when the struct
+ <structname>drm_framebuffer</structname> is embedded into the fbdev
+ helper struct) drivers can manually clean up a framebuffer at module
+ unload time with
+ <function>drm_framebuffer_unregister_private</function>.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Dumb Buffer Objects</title>
+ <para>
+ The KMS API doesn't standardize backing storage object creation and
+ leaves it to driver-specific ioctls. Furthermore actually creating a
+ buffer object even for GEM-based drivers is done through a
+ driver-specific ioctl - GEM only has a common userspace interface for
+ sharing and destroying objects. While not an issue for full-fledged
+ graphics stacks that include device-specific userspace components (in
+ libdrm for instance), this limit makes DRM-based early boot graphics
+ unnecessarily complex.
+ </para>
+ <para>
+ Dumb objects partly alleviate the problem by providing a standard
+ API to create dumb buffers suitable for scanout, which can then be used
+ to create KMS frame buffers.
+ </para>
+ <para>
+ To support dumb objects drivers must implement the
+ <methodname>dumb_create</methodname>,
+ <methodname>dumb_destroy</methodname> and
+ <methodname>dumb_map_offset</methodname> operations.
+ </para>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*dumb_create)(struct drm_file *file_priv, struct drm_device *dev,
+ struct drm_mode_create_dumb *args);</synopsis>
+ <para>
+ The <methodname>dumb_create</methodname> operation creates a driver
+ object (GEM or TTM handle) suitable for scanout based on the
+ width, height and depth from the struct
+ <structname>drm_mode_create_dumb</structname> argument. It fills the
+ argument's <structfield>handle</structfield>,
+ <structfield>pitch</structfield> and <structfield>size</structfield>
+ fields with a handle for the newly created object and its line
+ pitch and size in bytes.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*dumb_destroy)(struct drm_file *file_priv, struct drm_device *dev,
+ uint32_t handle);</synopsis>
+ <para>
+ The <methodname>dumb_destroy</methodname> operation destroys a dumb
+ object created by <methodname>dumb_create</methodname>.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*dumb_map_offset)(struct drm_file *file_priv, struct drm_device *dev,
+ uint32_t handle, uint64_t *offset);</synopsis>
+ <para>
+ The <methodname>dumb_map_offset</methodname> operation associates an
+ mmap fake offset with the object given by the handle and returns
+ it. Drivers must use the
+ <function>drm_gem_create_mmap_offset</function> function to
+ associate the fake offset as described in
+ <xref linkend="drm-gem-objects-mapping"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ <para>
+ Note that dumb objects may not be used for gpu acceleration, as has been
+ attempted on some ARM embedded platforms. Such drivers really must have
+ a hardware-specific ioctl to allocate suitable buffer objects.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Output Polling</title>
+ <synopsis>void (*output_poll_changed)(struct drm_device *dev);</synopsis>
+ <para>
+ This operation notifies the driver that the status of one or more
+ connectors has changed. Drivers that use the fb helper can just call the
+ <function>drm_fb_helper_hotplug_event</function> function to handle this
+ operation.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Locking</title>
+ <para>
+ Beside some lookup structures with their own locking (which is hidden
+ behind the interface functions) most of the modeset state is protected
+ by the <code>dev-&lt;mode_config.lock</code> mutex and additionally
+ per-crtc locks to allow cursor updates, pageflips and similar operations
+ to occur concurrently with background tasks like output detection.
+ Operations which cross domains like a full modeset always grab all
+ locks. Drivers there need to protect resources shared between crtcs with
+ additional locking. They also need to be careful to always grab the
+ relevant crtc locks if a modset functions touches crtc state, e.g. for
+ load detection (which does only grab the <code>mode_config.lock</code>
+ to allow concurrent screen updates on live crtcs).
+ </para>
+ </sect2>
+ </sect1>
+
+ <!-- Internals: kms initialization and cleanup -->
+
+ <sect1 id="drm-kms-init">
+ <title>KMS Initialization and Cleanup</title>
+ <para>
+ A KMS device is abstracted and exposed as a set of planes, CRTCs, encoders
+ and connectors. KMS drivers must thus create and initialize all those
+ objects at load time after initializing mode setting.
+ </para>
+ <sect2>
+ <title>CRTCs (struct <structname>drm_crtc</structname>)</title>
+ <para>
+ A CRTC is an abstraction representing a part of the chip that contains a
+ pointer to a scanout buffer. Therefore, the number of CRTCs available
+ determines how many independent scanout buffers can be active at any
+ given time. The CRTC structure contains several fields to support this:
+ a pointer to some video memory (abstracted as a frame buffer object), a
+ display mode, and an (x, y) offset into the video memory to support
+ panning or configurations where one piece of video memory spans multiple
+ CRTCs.
+ </para>
+ <sect3>
+ <title>CRTC Initialization</title>
+ <para>
+ A KMS device must create and register at least one struct
+ <structname>drm_crtc</structname> instance. The instance is allocated
+ and zeroed by the driver, possibly as part of a larger structure, and
+ registered with a call to <function>drm_crtc_init</function> with a
+ pointer to CRTC functions.
+ </para>
+ </sect3>
+ <sect3 id="drm-kms-crtcops">
+ <title>CRTC Operations</title>
+ <sect4>
+ <title>Set Configuration</title>
+ <synopsis>int (*set_config)(struct drm_mode_set *set);</synopsis>
+ <para>
+ Apply a new CRTC configuration to the device. The configuration
+ specifies a CRTC, a frame buffer to scan out from, a (x,y) position in
+ the frame buffer, a display mode and an array of connectors to drive
+ with the CRTC if possible.
+ </para>
+ <para>
+ If the frame buffer specified in the configuration is NULL, the driver
+ must detach all encoders connected to the CRTC and all connectors
+ attached to those encoders and disable them.
+ </para>
+ <para>
+ This operation is called with the mode config lock held.
+ </para>
+ <note><para>
+ Note that the drm core has no notion of restoring the mode setting
+ state after resume, since all resume handling is in the full
+ responsibility of the driver. The common mode setting helper library
+ though provides a helper which can be used for this:
+ <function>drm_helper_resume_force_mode</function>.
+ </para></note>
+ </sect4>
+ <sect4>
+ <title>Page Flipping</title>
+ <synopsis>int (*page_flip)(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event);</synopsis>
+ <para>
+ Schedule a page flip to the given frame buffer for the CRTC. This
+ operation is called with the mode config mutex held.
+ </para>
+ <para>
+ Page flipping is a synchronization mechanism that replaces the frame
+ buffer being scanned out by the CRTC with a new frame buffer during
+ vertical blanking, avoiding tearing. When an application requests a page
+ flip the DRM core verifies that the new frame buffer is large enough to
+ be scanned out by the CRTC in the currently configured mode and then
+ calls the CRTC <methodname>page_flip</methodname> operation with a
+ pointer to the new frame buffer.
+ </para>
+ <para>
+ The <methodname>page_flip</methodname> operation schedules a page flip.
+ Once any pending rendering targeting the new frame buffer has
+ completed, the CRTC will be reprogrammed to display that frame buffer
+ after the next vertical refresh. The operation must return immediately
+ without waiting for rendering or page flip to complete and must block
+ any new rendering to the frame buffer until the page flip completes.
+ </para>
+ <para>
+ If a page flip can be successfully scheduled the driver must set the
+ <code>drm_crtc-&gt;fb</code> field to the new framebuffer pointed to
+ by <code>fb</code>. This is important so that the reference counting
+ on framebuffers stays balanced.
+ </para>
+ <para>
+ If a page flip is already pending, the
+ <methodname>page_flip</methodname> operation must return
+ -<errorname>EBUSY</errorname>.
+ </para>
+ <para>
+ To synchronize page flip to vertical blanking the driver will likely
+ need to enable vertical blanking interrupts. It should call
+ <function>drm_vblank_get</function> for that purpose, and call
+ <function>drm_vblank_put</function> after the page flip completes.
+ </para>
+ <para>
+ If the application has requested to be notified when page flip completes
+ the <methodname>page_flip</methodname> operation will be called with a
+ non-NULL <parameter>event</parameter> argument pointing to a
+ <structname>drm_pending_vblank_event</structname> instance. Upon page
+ flip completion the driver must call <methodname>drm_send_vblank_event</methodname>
+ to fill in the event and send to wake up any waiting processes.
+ This can be performed with
+ <programlisting><![CDATA[
+ spin_lock_irqsave(&dev->event_lock, flags);
+ ...
+ drm_send_vblank_event(dev, pipe, event);
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+ ]]></programlisting>
+ </para>
+ <note><para>
+ FIXME: Could drivers that don't need to wait for rendering to complete
+ just add the event to <literal>dev-&gt;vblank_event_list</literal> and
+ let the DRM core handle everything, as for "normal" vertical blanking
+ events?
+ </para></note>
+ <para>
+ While waiting for the page flip to complete, the
+ <literal>event-&gt;base.link</literal> list head can be used freely by
+ the driver to store the pending event in a driver-specific list.
+ </para>
+ <para>
+ If the file handle is closed before the event is signaled, drivers must
+ take care to destroy the event in their
+ <methodname>preclose</methodname> operation (and, if needed, call
+ <function>drm_vblank_put</function>).
+ </para>
+ </sect4>
+ <sect4>
+ <title>Miscellaneous</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>void (*set_property)(struct drm_crtc *crtc,
+ struct drm_property *property, uint64_t value);</synopsis>
+ <para>
+ Set the value of the given CRTC property to
+ <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
+ for more information about properties.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*gamma_set)(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+ uint32_t start, uint32_t size);</synopsis>
+ <para>
+ Apply a gamma table to the device. The operation is optional.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_crtc *crtc);</synopsis>
+ <para>
+ Destroy the CRTC when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect4>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Planes (struct <structname>drm_plane</structname>)</title>
+ <para>
+ A plane represents an image source that can be blended with or overlayed
+ on top of a CRTC during the scanout process. Planes are associated with
+ a frame buffer to crop a portion of the image memory (source) and
+ optionally scale it to a destination size. The result is then blended
+ with or overlayed on top of a CRTC.
+ </para>
+ <para>
+ The DRM core recognizes three types of planes:
+ <itemizedlist>
+ <listitem>
+ DRM_PLANE_TYPE_PRIMARY represents a "main" plane for a CRTC. Primary
+ planes are the planes operated upon by CRTC modesetting and flipping
+ operations described in <xref linkend="drm-kms-crtcops"/>.
+ </listitem>
+ <listitem>
+ DRM_PLANE_TYPE_CURSOR represents a "cursor" plane for a CRTC. Cursor
+ planes are the planes operated upon by the DRM_IOCTL_MODE_CURSOR and
+ DRM_IOCTL_MODE_CURSOR2 ioctls.
+ </listitem>
+ <listitem>
+ DRM_PLANE_TYPE_OVERLAY represents all non-primary, non-cursor planes.
+ Some drivers refer to these types of planes as "sprites" internally.
+ </listitem>
+ </itemizedlist>
+ For compatibility with legacy userspace, only overlay planes are made
+ available to userspace by default. Userspace clients may set the
+ DRM_CLIENT_CAP_UNIVERSAL_PLANES client capability bit to indicate that
+ they wish to receive a universal plane list containing all plane types.
+ </para>
+ <sect3>
+ <title>Plane Initialization</title>
+ <para>
+ To create a plane, a KMS drivers allocates and
+ zeroes an instances of struct <structname>drm_plane</structname>
+ (possibly as part of a larger structure) and registers it with a call
+ to <function>drm_universal_plane_init</function>. The function takes a bitmask
+ of the CRTCs that can be associated with the plane, a pointer to the
+ plane functions, a list of format supported formats, and the type of
+ plane (primary, cursor, or overlay) being initialized.
+ </para>
+ <para>
+ Cursor and overlay planes are optional. All drivers should provide
+ one primary plane per CRTC (although this requirement may change in
+ the future); drivers that do not wish to provide special handling for
+ primary planes may make use of the helper functions described in
+ <xref linkend="drm-kms-planehelpers"/> to create and register a
+ primary plane with standard capabilities.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Plane Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc,
+ struct drm_framebuffer *fb, int crtc_x, int crtc_y,
+ unsigned int crtc_w, unsigned int crtc_h,
+ uint32_t src_x, uint32_t src_y,
+ uint32_t src_w, uint32_t src_h);</synopsis>
+ <para>
+ Enable and configure the plane to use the given CRTC and frame buffer.
+ </para>
+ <para>
+ The source rectangle in frame buffer memory coordinates is given by
+ the <parameter>src_x</parameter>, <parameter>src_y</parameter>,
+ <parameter>src_w</parameter> and <parameter>src_h</parameter>
+ parameters (as 16.16 fixed point values). Devices that don't support
+ subpixel plane coordinates can ignore the fractional part.
+ </para>
+ <para>
+ The destination rectangle in CRTC coordinates is given by the
+ <parameter>crtc_x</parameter>, <parameter>crtc_y</parameter>,
+ <parameter>crtc_w</parameter> and <parameter>crtc_h</parameter>
+ parameters (as integer values). Devices scale the source rectangle to
+ the destination rectangle. If scaling is not supported, and the source
+ rectangle size doesn't match the destination rectangle size, the
+ driver must return a -<errorname>EINVAL</errorname> error.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*disable_plane)(struct drm_plane *plane);</synopsis>
+ <para>
+ Disable the plane. The DRM core calls this method in response to a
+ DRM_IOCTL_MODE_SETPLANE ioctl call with the frame buffer ID set to 0.
+ Disabled planes must not be processed by the CRTC.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_plane *plane);</synopsis>
+ <para>
+ Destroy the plane when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Encoders (struct <structname>drm_encoder</structname>)</title>
+ <para>
+ An encoder takes pixel data from a CRTC and converts it to a format
+ suitable for any attached connectors. On some devices, it may be
+ possible to have a CRTC send data to more than one encoder. In that
+ case, both encoders would receive data from the same scanout buffer,
+ resulting in a "cloned" display configuration across the connectors
+ attached to each encoder.
+ </para>
+ <sect3>
+ <title>Encoder Initialization</title>
+ <para>
+ As for CRTCs, a KMS driver must create, initialize and register at
+ least one struct <structname>drm_encoder</structname> instance. The
+ instance is allocated and zeroed by the driver, possibly as part of a
+ larger structure.
+ </para>
+ <para>
+ Drivers must initialize the struct <structname>drm_encoder</structname>
+ <structfield>possible_crtcs</structfield> and
+ <structfield>possible_clones</structfield> fields before registering the
+ encoder. Both fields are bitmasks of respectively the CRTCs that the
+ encoder can be connected to, and sibling encoders candidate for cloning.
+ </para>
+ <para>
+ After being initialized, the encoder must be registered with a call to
+ <function>drm_encoder_init</function>. The function takes a pointer to
+ the encoder functions and an encoder type. Supported types are
+ <itemizedlist>
+ <listitem>
+ DRM_MODE_ENCODER_DAC for VGA and analog on DVI-I/DVI-A
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_TMDS for DVI, HDMI and (embedded) DisplayPort
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_LVDS for display panels
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_TVDAC for TV output (Composite, S-Video, Component,
+ SCART)
+ </listitem>
+ <listitem>
+ DRM_MODE_ENCODER_VIRTUAL for virtual machine displays
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ Encoders must be attached to a CRTC to be used. DRM drivers leave
+ encoders unattached at initialization time. Applications (or the fbdev
+ compatibility layer when implemented) are responsible for attaching the
+ encoders they want to use to a CRTC.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Encoder Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_encoder *encoder);</synopsis>
+ <para>
+ Called to destroy the encoder when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*set_property)(struct drm_plane *plane,
+ struct drm_property *property, uint64_t value);</synopsis>
+ <para>
+ Set the value of the given plane property to
+ <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
+ for more information about properties.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Connectors (struct <structname>drm_connector</structname>)</title>
+ <para>
+ A connector is the final destination for pixel data on a device, and
+ usually connects directly to an external display device like a monitor
+ or laptop panel. A connector can only be attached to one encoder at a
+ time. The connector is also the structure where information about the
+ attached display is kept, so it contains fields for display data, EDID
+ data, DPMS &amp; connection status, and information about modes
+ supported on the attached displays.
+ </para>
+ <sect3>
+ <title>Connector Initialization</title>
+ <para>
+ Finally a KMS driver must create, initialize, register and attach at
+ least one struct <structname>drm_connector</structname> instance. The
+ instance is created as other KMS objects and initialized by setting the
+ following fields.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><structfield>interlace_allowed</structfield></term>
+ <listitem><para>
+ Whether the connector can handle interlaced modes.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><structfield>doublescan_allowed</structfield></term>
+ <listitem><para>
+ Whether the connector can handle doublescan.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><structfield>display_info
+ </structfield></term>
+ <listitem><para>
+ Display information is filled from EDID information when a display
+ is detected. For non hot-pluggable displays such as flat panels in
+ embedded systems, the driver should initialize the
+ <structfield>display_info</structfield>.<structfield>width_mm</structfield>
+ and
+ <structfield>display_info</structfield>.<structfield>height_mm</structfield>
+ fields with the physical size of the display.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term id="drm-kms-connector-polled"><structfield>polled</structfield></term>
+ <listitem><para>
+ Connector polling mode, a combination of
+ <variablelist>
+ <varlistentry>
+ <term>DRM_CONNECTOR_POLL_HPD</term>
+ <listitem><para>
+ The connector generates hotplug events and doesn't need to be
+ periodically polled. The CONNECT and DISCONNECT flags must not
+ be set together with the HPD flag.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_CONNECTOR_POLL_CONNECT</term>
+ <listitem><para>
+ Periodically poll the connector for connection.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_CONNECTOR_POLL_DISCONNECT</term>
+ <listitem><para>
+ Periodically poll the connector for disconnection.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ Set to 0 for connectors that don't support connection status
+ discovery.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ The connector is then registered with a call to
+ <function>drm_connector_init</function> with a pointer to the connector
+ functions and a connector type, and exposed through sysfs with a call to
+ <function>drm_connector_register</function>.
+ </para>
+ <para>
+ Supported connector types are
+ <itemizedlist>
+ <listitem>DRM_MODE_CONNECTOR_VGA</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DVII</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DVID</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DVIA</listitem>
+ <listitem>DRM_MODE_CONNECTOR_Composite</listitem>
+ <listitem>DRM_MODE_CONNECTOR_SVIDEO</listitem>
+ <listitem>DRM_MODE_CONNECTOR_LVDS</listitem>
+ <listitem>DRM_MODE_CONNECTOR_Component</listitem>
+ <listitem>DRM_MODE_CONNECTOR_9PinDIN</listitem>
+ <listitem>DRM_MODE_CONNECTOR_DisplayPort</listitem>
+ <listitem>DRM_MODE_CONNECTOR_HDMIA</listitem>
+ <listitem>DRM_MODE_CONNECTOR_HDMIB</listitem>
+ <listitem>DRM_MODE_CONNECTOR_TV</listitem>
+ <listitem>DRM_MODE_CONNECTOR_eDP</listitem>
+ <listitem>DRM_MODE_CONNECTOR_VIRTUAL</listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ Connectors must be attached to an encoder to be used. For devices that
+ map connectors to encoders 1:1, the connector should be attached at
+ initialization time with a call to
+ <function>drm_mode_connector_attach_encoder</function>. The driver must
+ also set the <structname>drm_connector</structname>
+ <structfield>encoder</structfield> field to point to the attached
+ encoder.
+ </para>
+ <para>
+ Finally, drivers must initialize the connectors state change detection
+ with a call to <function>drm_kms_helper_poll_init</function>. If at
+ least one connector is pollable but can't generate hotplug interrupts
+ (indicated by the DRM_CONNECTOR_POLL_CONNECT and
+ DRM_CONNECTOR_POLL_DISCONNECT connector flags), a delayed work will
+ automatically be queued to periodically poll for changes. Connectors
+ that can generate hotplug interrupts must be marked with the
+ DRM_CONNECTOR_POLL_HPD flag instead, and their interrupt handler must
+ call <function>drm_helper_hpd_irq_event</function>. The function will
+ queue a delayed work to check the state of all connectors, but no
+ periodic polling will be done.
+ </para>
+ </sect3>
+ <sect3>
+ <title>Connector Operations</title>
+ <note><para>
+ Unless otherwise state, all operations are mandatory.
+ </para></note>
+ <sect4>
+ <title>DPMS</title>
+ <synopsis>void (*dpms)(struct drm_connector *connector, int mode);</synopsis>
+ <para>
+ The DPMS operation sets the power state of a connector. The mode
+ argument is one of
+ <itemizedlist>
+ <listitem><para>DRM_MODE_DPMS_ON</para></listitem>
+ <listitem><para>DRM_MODE_DPMS_STANDBY</para></listitem>
+ <listitem><para>DRM_MODE_DPMS_SUSPEND</para></listitem>
+ <listitem><para>DRM_MODE_DPMS_OFF</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ In all but DPMS_ON mode the encoder to which the connector is attached
+ should put the display in low-power mode by driving its signals
+ appropriately. If more than one connector is attached to the encoder
+ care should be taken not to change the power state of other displays as
+ a side effect. Low-power mode should be propagated to the encoders and
+ CRTCs when all related connectors are put in low-power mode.
+ </para>
+ </sect4>
+ <sect4>
+ <title>Modes</title>
+ <synopsis>int (*fill_modes)(struct drm_connector *connector, uint32_t max_width,
+ uint32_t max_height);</synopsis>
+ <para>
+ Fill the mode list with all supported modes for the connector. If the
+ <parameter>max_width</parameter> and <parameter>max_height</parameter>
+ arguments are non-zero, the implementation must ignore all modes wider
+ than <parameter>max_width</parameter> or higher than
+ <parameter>max_height</parameter>.
+ </para>
+ <para>
+ The connector must also fill in this operation its
+ <structfield>display_info</structfield>
+ <structfield>width_mm</structfield> and
+ <structfield>height_mm</structfield> fields with the connected display
+ physical size in millimeters. The fields should be set to 0 if the value
+ isn't known or is not applicable (for instance for projector devices).
+ </para>
+ </sect4>
+ <sect4>
+ <title>Connection Status</title>
+ <para>
+ The connection status is updated through polling or hotplug events when
+ supported (see <xref linkend="drm-kms-connector-polled"/>). The status
+ value is reported to userspace through ioctls and must not be used
+ inside the driver, as it only gets initialized by a call to
+ <function>drm_mode_getconnector</function> from userspace.
+ </para>
+ <synopsis>enum drm_connector_status (*detect)(struct drm_connector *connector,
+ bool force);</synopsis>
+ <para>
+ Check to see if anything is attached to the connector. The
+ <parameter>force</parameter> parameter is set to false whilst polling or
+ to true when checking the connector due to user request.
+ <parameter>force</parameter> can be used by the driver to avoid
+ expensive, destructive operations during automated probing.
+ </para>
+ <para>
+ Return connector_status_connected if something is connected to the
+ connector, connector_status_disconnected if nothing is connected and
+ connector_status_unknown if the connection state isn't known.
+ </para>
+ <para>
+ Drivers should only return connector_status_connected if the connection
+ status has really been probed as connected. Connectors that can't detect
+ the connection status, or failed connection status probes, should return
+ connector_status_unknown.
+ </para>
+ </sect4>
+ <sect4>
+ <title>Miscellaneous</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>void (*set_property)(struct drm_connector *connector,
+ struct drm_property *property, uint64_t value);</synopsis>
+ <para>
+ Set the value of the given connector property to
+ <parameter>value</parameter>. See <xref linkend="drm-kms-properties"/>
+ for more information about properties.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*destroy)(struct drm_connector *connector);</synopsis>
+ <para>
+ Destroy the connector when not needed anymore. See
+ <xref linkend="drm-kms-init"/>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect4>
+ </sect3>
+ </sect2>
+ <sect2>
+ <title>Cleanup</title>
+ <para>
+ The DRM core manages its objects' lifetime. When an object is not needed
+ anymore the core calls its destroy function, which must clean up and
+ free every resource allocated for the object. Every
+ <function>drm_*_init</function> call must be matched with a
+ corresponding <function>drm_*_cleanup</function> call to cleanup CRTCs
+ (<function>drm_crtc_cleanup</function>), planes
+ (<function>drm_plane_cleanup</function>), encoders
+ (<function>drm_encoder_cleanup</function>) and connectors
+ (<function>drm_connector_cleanup</function>). Furthermore, connectors
+ that have been added to sysfs must be removed by a call to
+ <function>drm_connector_unregister</function> before calling
+ <function>drm_connector_cleanup</function>.
+ </para>
+ <para>
+ Connectors state change detection must be cleanup up with a call to
+ <function>drm_kms_helper_poll_fini</function>.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Output discovery and initialization example</title>
+ <programlisting><![CDATA[
+void intel_crt_init(struct drm_device *dev)
+{
+ struct drm_connector *connector;
+ struct intel_output *intel_output;
+
+ intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
+ if (!intel_output)
+ return;
+
+ connector = &intel_output->base;
+ drm_connector_init(dev, &intel_output->base,
+ &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
+
+ drm_encoder_init(dev, &intel_output->enc, &intel_crt_enc_funcs,
+ DRM_MODE_ENCODER_DAC);
+
+ drm_mode_connector_attach_encoder(&intel_output->base,
+ &intel_output->enc);
+
+ /* Set up the DDC bus. */
+ intel_output->ddc_bus = intel_i2c_create(dev, GPIOA, "CRTDDC_A");
+ if (!intel_output->ddc_bus) {
+ dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
+ "failed.\n");
+ return;
+ }
+
+ intel_output->type = INTEL_OUTPUT_ANALOG;
+ connector->interlace_allowed = 0;
+ connector->doublescan_allowed = 0;
+
+ drm_encoder_helper_add(&intel_output->enc, &intel_crt_helper_funcs);
+ drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
+
+ drm_connector_register(connector);
+}]]></programlisting>
+ <para>
+ In the example above (taken from the i915 driver), a CRTC, connector and
+ encoder combination is created. A device-specific i2c bus is also
+ created for fetching EDID data and performing monitor detection. Once
+ the process is complete, the new connector is registered with sysfs to
+ make its properties available to applications.
+ </para>
+ </sect2>
+ <sect2>
+ <title>KMS API Functions</title>
+!Edrivers/gpu/drm/drm_crtc.c
+ </sect2>
+ <sect2>
+ <title>KMS Data Structures</title>
+!Iinclude/drm/drm_crtc.h
+ </sect2>
+ <sect2>
+ <title>KMS Locking</title>
+!Pdrivers/gpu/drm/drm_modeset_lock.c kms locking
+!Iinclude/drm/drm_modeset_lock.h
+!Edrivers/gpu/drm/drm_modeset_lock.c
+ </sect2>
+ </sect1>
+
+ <!-- Internals: kms helper functions -->
+
+ <sect1>
+ <title>Mode Setting Helper Functions</title>
+ <para>
+ The plane, CRTC, encoder and connector functions provided by the drivers
+ implement the DRM API. They're called by the DRM core and ioctl handlers
+ to handle device state changes and configuration request. As implementing
+ those functions often requires logic not specific to drivers, mid-layer
+ helper functions are available to avoid duplicating boilerplate code.
+ </para>
+ <para>
+ The DRM core contains one mid-layer implementation. The mid-layer provides
+ implementations of several plane, CRTC, encoder and connector functions
+ (called from the top of the mid-layer) that pre-process requests and call
+ lower-level functions provided by the driver (at the bottom of the
+ mid-layer). For instance, the
+ <function>drm_crtc_helper_set_config</function> function can be used to
+ fill the struct <structname>drm_crtc_funcs</structname>
+ <structfield>set_config</structfield> field. When called, it will split
+ the <methodname>set_config</methodname> operation in smaller, simpler
+ operations and call the driver to handle them.
+ </para>
+ <para>
+ To use the mid-layer, drivers call <function>drm_crtc_helper_add</function>,
+ <function>drm_encoder_helper_add</function> and
+ <function>drm_connector_helper_add</function> functions to install their
+ mid-layer bottom operations handlers, and fill the
+ <structname>drm_crtc_funcs</structname>,
+ <structname>drm_encoder_funcs</structname> and
+ <structname>drm_connector_funcs</structname> structures with pointers to
+ the mid-layer top API functions. Installing the mid-layer bottom operation
+ handlers is best done right after registering the corresponding KMS object.
+ </para>
+ <para>
+ The mid-layer is not split between CRTC, encoder and connector operations.
+ To use it, a driver must provide bottom functions for all of the three KMS
+ entities.
+ </para>
+ <sect2>
+ <title>Helper Functions</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int drm_crtc_helper_set_config(struct drm_mode_set *set);</synopsis>
+ <para>
+ The <function>drm_crtc_helper_set_config</function> helper function
+ is a CRTC <methodname>set_config</methodname> implementation. It
+ first tries to locate the best encoder for each connector by calling
+ the connector <methodname>best_encoder</methodname> helper
+ operation.
+ </para>
+ <para>
+ After locating the appropriate encoders, the helper function will
+ call the <methodname>mode_fixup</methodname> encoder and CRTC helper
+ operations to adjust the requested mode, or reject it completely in
+ which case an error will be returned to the application. If the new
+ configuration after mode adjustment is identical to the current
+ configuration the helper function will return without performing any
+ other operation.
+ </para>
+ <para>
+ If the adjusted mode is identical to the current mode but changes to
+ the frame buffer need to be applied, the
+ <function>drm_crtc_helper_set_config</function> function will call
+ the CRTC <methodname>mode_set_base</methodname> helper operation. If
+ the adjusted mode differs from the current mode, or if the
+ <methodname>mode_set_base</methodname> helper operation is not
+ provided, the helper function performs a full mode set sequence by
+ calling the <methodname>prepare</methodname>,
+ <methodname>mode_set</methodname> and
+ <methodname>commit</methodname> CRTC and encoder helper operations,
+ in that order.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void drm_helper_connector_dpms(struct drm_connector *connector, int mode);</synopsis>
+ <para>
+ The <function>drm_helper_connector_dpms</function> helper function
+ is a connector <methodname>dpms</methodname> implementation that
+ tracks power state of connectors. To use the function, drivers must
+ provide <methodname>dpms</methodname> helper operations for CRTCs
+ and encoders to apply the DPMS state to the device.
+ </para>
+ <para>
+ The mid-layer doesn't track the power state of CRTCs and encoders.
+ The <methodname>dpms</methodname> helper operations can thus be
+ called with a mode identical to the currently active mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
+ uint32_t maxX, uint32_t maxY);</synopsis>
+ <para>
+ The <function>drm_helper_probe_single_connector_modes</function> helper
+ function is a connector <methodname>fill_modes</methodname>
+ implementation that updates the connection status for the connector
+ and then retrieves a list of modes by calling the connector
+ <methodname>get_modes</methodname> helper operation.
+ </para>
+ <para>
+ If the helper operation returns no mode, and if the connector status
+ is connector_status_connected, standard VESA DMT modes up to
+ 1024x768 are automatically added to the modes list by a call to
+ <function>drm_add_modes_noedid</function>.
+ </para>
+ <para>
+ The function then filters out modes larger than
+ <parameter>max_width</parameter> and <parameter>max_height</parameter>
+ if specified. It finally calls the optional connector
+ <methodname>mode_valid</methodname> helper operation for each mode in
+ the probed list to check whether the mode is valid for the connector.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>CRTC Helper Operations</title>
+ <itemizedlist>
+ <listitem id="drm-helper-crtc-mode-fixup">
+ <synopsis>bool (*mode_fixup)(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);</synopsis>
+ <para>
+ Let CRTCs adjust the requested mode or reject it completely. This
+ operation returns true if the mode is accepted (possibly after being
+ adjusted) or false if it is rejected.
+ </para>
+ <para>
+ The <methodname>mode_fixup</methodname> operation should reject the
+ mode if it can't reasonably use it. The definition of "reasonable"
+ is currently fuzzy in this context. One possible behaviour would be
+ to set the adjusted mode to the panel timings when a fixed-mode
+ panel is used with hardware capable of scaling. Another behaviour
+ would be to accept any input mode and adjust it to the closest mode
+ supported by the hardware (FIXME: This needs to be clarified).
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*mode_set_base)(struct drm_crtc *crtc, int x, int y,
+ struct drm_framebuffer *old_fb)</synopsis>
+ <para>
+ Move the CRTC on the current frame buffer (stored in
+ <literal>crtc-&gt;fb</literal>) to position (x,y). Any of the frame
+ buffer, x position or y position may have been modified.
+ </para>
+ <para>
+ This helper operation is optional. If not provided, the
+ <function>drm_crtc_helper_set_config</function> function will fall
+ back to the <methodname>mode_set</methodname> helper operation.
+ </para>
+ <note><para>
+ FIXME: Why are x and y passed as arguments, as they can be accessed
+ through <literal>crtc-&gt;x</literal> and
+ <literal>crtc-&gt;y</literal>?
+ </para></note>
+ </listitem>
+ <listitem>
+ <synopsis>void (*prepare)(struct drm_crtc *crtc);</synopsis>
+ <para>
+ Prepare the CRTC for mode setting. This operation is called after
+ validating the requested mode. Drivers use it to perform
+ device-specific operations required before setting the new mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*mode_set)(struct drm_crtc *crtc, struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode, int x, int y,
+ struct drm_framebuffer *old_fb);</synopsis>
+ <para>
+ Set a new mode, position and frame buffer. Depending on the device
+ requirements, the mode can be stored internally by the driver and
+ applied in the <methodname>commit</methodname> operation, or
+ programmed to the hardware immediately.
+ </para>
+ <para>
+ The <methodname>mode_set</methodname> operation returns 0 on success
+ or a negative error code if an error occurs.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*commit)(struct drm_crtc *crtc);</synopsis>
+ <para>
+ Commit a mode. This operation is called after setting the new mode.
+ Upon return the device must use the new mode and be fully
+ operational.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>Encoder Helper Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>bool (*mode_fixup)(struct drm_encoder *encoder,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);</synopsis>
+ <para>
+ Let encoders adjust the requested mode or reject it completely. This
+ operation returns true if the mode is accepted (possibly after being
+ adjusted) or false if it is rejected. See the
+ <link linkend="drm-helper-crtc-mode-fixup">mode_fixup CRTC helper
+ operation</link> for an explanation of the allowed adjustments.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*prepare)(struct drm_encoder *encoder);</synopsis>
+ <para>
+ Prepare the encoder for mode setting. This operation is called after
+ validating the requested mode. Drivers use it to perform
+ device-specific operations required before setting the new mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*mode_set)(struct drm_encoder *encoder,
+ struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);</synopsis>
+ <para>
+ Set a new mode. Depending on the device requirements, the mode can
+ be stored internally by the driver and applied in the
+ <methodname>commit</methodname> operation, or programmed to the
+ hardware immediately.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>void (*commit)(struct drm_encoder *encoder);</synopsis>
+ <para>
+ Commit a mode. This operation is called after setting the new mode.
+ Upon return the device must use the new mode and be fully
+ operational.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>Connector Helper Operations</title>
+ <itemizedlist>
+ <listitem>
+ <synopsis>struct drm_encoder *(*best_encoder)(struct drm_connector *connector);</synopsis>
+ <para>
+ Return a pointer to the best encoder for the connecter. Device that
+ map connectors to encoders 1:1 simply return the pointer to the
+ associated encoder. This operation is mandatory.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*get_modes)(struct drm_connector *connector);</synopsis>
+ <para>
+ Fill the connector's <structfield>probed_modes</structfield> list
+ by parsing EDID data with <function>drm_add_edid_modes</function>,
+ adding standard VESA DMT modes with <function>drm_add_modes_noedid</function>,
+ or calling <function>drm_mode_probed_add</function> directly for every
+ supported mode and return the number of modes it has detected. This
+ operation is mandatory.
+ </para>
+ <para>
+ Note that the caller function will automatically add standard VESA
+ DMT modes up to 1024x768 if the <methodname>get_modes</methodname>
+ helper operation returns no mode and if the connector status is
+ connector_status_connected. There is no need to call
+ <function>drm_add_edid_modes</function> manually in that case.
+ </para>
+ <para>
+ When adding modes manually the driver creates each mode with a call to
+ <function>drm_mode_create</function> and must fill the following fields.
+ <itemizedlist>
+ <listitem>
+ <synopsis>__u32 type;</synopsis>
+ <para>
+ Mode type bitmask, a combination of
+ <variablelist>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_BUILTIN</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_CLOCK_C</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_CRTC_C</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>
+ DRM_MODE_TYPE_PREFERRED - The preferred mode for the connector
+ </term>
+ <listitem>
+ <para>not used?</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_DEFAULT</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_USERDEF</term>
+ <listitem><para>not used?</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_TYPE_DRIVER</term>
+ <listitem>
+ <para>
+ The mode has been created by the driver (as opposed to
+ to user-created modes).
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ Drivers must set the DRM_MODE_TYPE_DRIVER bit for all modes they
+ create, and set the DRM_MODE_TYPE_PREFERRED bit for the preferred
+ mode.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>__u32 clock;</synopsis>
+ <para>Pixel clock frequency in kHz unit</para>
+ </listitem>
+ <listitem>
+ <synopsis>__u16 hdisplay, hsync_start, hsync_end, htotal;
+ __u16 vdisplay, vsync_start, vsync_end, vtotal;</synopsis>
+ <para>Horizontal and vertical timing information</para>
+ <screen><![CDATA[
+ Active Front Sync Back
+ Region Porch Porch
+ <-----------------------><----------------><-------------><-------------->
+
+ //////////////////////|
+ ////////////////////// |
+ ////////////////////// |.................. ................
+ _______________
+
+ <----- [hv]display ----->
+ <------------- [hv]sync_start ------------>
+ <--------------------- [hv]sync_end --------------------->
+ <-------------------------------- [hv]total ----------------------------->
+]]></screen>
+ </listitem>
+ <listitem>
+ <synopsis>__u16 hskew;
+ __u16 vscan;</synopsis>
+ <para>Unknown</para>
+ </listitem>
+ <listitem>
+ <synopsis>__u32 flags;</synopsis>
+ <para>
+ Mode flags, a combination of
+ <variablelist>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PHSYNC</term>
+ <listitem><para>
+ Horizontal sync is active high
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_NHSYNC</term>
+ <listitem><para>
+ Horizontal sync is active low
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PVSYNC</term>
+ <listitem><para>
+ Vertical sync is active high
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_NVSYNC</term>
+ <listitem><para>
+ Vertical sync is active low
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_INTERLACE</term>
+ <listitem><para>
+ Mode is interlaced
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_DBLSCAN</term>
+ <listitem><para>
+ Mode uses doublescan
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_CSYNC</term>
+ <listitem><para>
+ Mode uses composite sync
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PCSYNC</term>
+ <listitem><para>
+ Composite sync is active high
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_NCSYNC</term>
+ <listitem><para>
+ Composite sync is active low
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_HSKEW</term>
+ <listitem><para>
+ hskew provided (not used?)
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_BCAST</term>
+ <listitem><para>
+ not used?
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_PIXMUX</term>
+ <listitem><para>
+ not used?
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_DBLCLK</term>
+ <listitem><para>
+ not used?
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_FLAG_CLKDIV2</term>
+ <listitem><para>
+ ?
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+ <para>
+ Note that modes marked with the INTERLACE or DBLSCAN flags will be
+ filtered out by
+ <function>drm_helper_probe_single_connector_modes</function> if
+ the connector's <structfield>interlace_allowed</structfield> or
+ <structfield>doublescan_allowed</structfield> field is set to 0.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>char name[DRM_DISPLAY_MODE_LEN];</synopsis>
+ <para>
+ Mode name. The driver must call
+ <function>drm_mode_set_name</function> to fill the mode name from
+ <structfield>hdisplay</structfield>,
+ <structfield>vdisplay</structfield> and interlace flag after
+ filling the corresponding fields.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ The <structfield>vrefresh</structfield> value is computed by
+ <function>drm_helper_probe_single_connector_modes</function>.
+ </para>
+ <para>
+ When parsing EDID data, <function>drm_add_edid_modes</function> fills the
+ connector <structfield>display_info</structfield>
+ <structfield>width_mm</structfield> and
+ <structfield>height_mm</structfield> fields. When creating modes
+ manually the <methodname>get_modes</methodname> helper operation must
+ set the <structfield>display_info</structfield>
+ <structfield>width_mm</structfield> and
+ <structfield>height_mm</structfield> fields if they haven't been set
+ already (for instance at initialization time when a fixed-size panel is
+ attached to the connector). The mode <structfield>width_mm</structfield>
+ and <structfield>height_mm</structfield> fields are only used internally
+ during EDID parsing and should not be set when creating modes manually.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>int (*mode_valid)(struct drm_connector *connector,
+ struct drm_display_mode *mode);</synopsis>
+ <para>
+ Verify whether a mode is valid for the connector. Return MODE_OK for
+ supported modes and one of the enum drm_mode_status values (MODE_*)
+ for unsupported modes. This operation is optional.
+ </para>
+ <para>
+ As the mode rejection reason is currently not used beside for
+ immediately removing the unsupported mode, an implementation can
+ return MODE_BAD regardless of the exact reason why the mode is not
+ valid.
+ </para>
+ <note><para>
+ Note that the <methodname>mode_valid</methodname> helper operation is
+ only called for modes detected by the device, and
+ <emphasis>not</emphasis> for modes set by the user through the CRTC
+ <methodname>set_config</methodname> operation.
+ </para></note>
+ </listitem>
+ </itemizedlist>
+ </sect2>
+ <sect2>
+ <title>Atomic Modeset Helper Functions Reference</title>
+ <sect3>
+ <title>Overview</title>
+!Pdrivers/gpu/drm/drm_atomic_helper.c overview
+ </sect3>
+ <sect3>
+ <title>Implementing Asynchronous Atomic Commit</title>
+!Pdrivers/gpu/drm/drm_atomic_helper.c implementing async commit
+ </sect3>
+ <sect3>
+ <title>Atomic State Reset and Initialization</title>
+!Pdrivers/gpu/drm/drm_atomic_helper.c atomic state reset and initialization
+ </sect3>
+!Iinclude/drm/drm_atomic_helper.h
+!Edrivers/gpu/drm/drm_atomic_helper.c
+ </sect2>
+ <sect2>
+ <title>Modeset Helper Functions Reference</title>
+!Iinclude/drm/drm_crtc_helper.h
+!Edrivers/gpu/drm/drm_crtc_helper.c
+!Pdrivers/gpu/drm/drm_crtc_helper.c overview
+ </sect2>
+ <sect2>
+ <title>Output Probing Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_probe_helper.c output probing helper overview
+!Edrivers/gpu/drm/drm_probe_helper.c
+ </sect2>
+ <sect2>
+ <title>fbdev Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_fb_helper.c fbdev helpers
+!Edrivers/gpu/drm/drm_fb_helper.c
+!Iinclude/drm/drm_fb_helper.h
+ </sect2>
+ <sect2>
+ <title>Display Port Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_dp_helper.c dp helpers
+!Iinclude/drm/drm_dp_helper.h
+!Edrivers/gpu/drm/drm_dp_helper.c
+ </sect2>
+ <sect2>
+ <title>Display Port MST Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_dp_mst_topology.c dp mst helper
+!Iinclude/drm/drm_dp_mst_helper.h
+!Edrivers/gpu/drm/drm_dp_mst_topology.c
+ </sect2>
+ <sect2>
+ <title>MIPI DSI Helper Functions Reference</title>
+!Pdrivers/gpu/drm/drm_mipi_dsi.c dsi helpers
+!Iinclude/drm/drm_mipi_dsi.h
+!Edrivers/gpu/drm/drm_mipi_dsi.c
+ </sect2>
+ <sect2>
+ <title>EDID Helper Functions Reference</title>
+!Edrivers/gpu/drm/drm_edid.c
+ </sect2>
+ <sect2>
+ <title>Rectangle Utilities Reference</title>
+!Pinclude/drm/drm_rect.h rect utils
+!Iinclude/drm/drm_rect.h
+!Edrivers/gpu/drm/drm_rect.c
+ </sect2>
+ <sect2>
+ <title>Flip-work Helper Reference</title>
+!Pinclude/drm/drm_flip_work.h flip utils
+!Iinclude/drm/drm_flip_work.h
+!Edrivers/gpu/drm/drm_flip_work.c
+ </sect2>
+ <sect2>
+ <title>HDMI Infoframes Helper Reference</title>
+ <para>
+ Strictly speaking this is not a DRM helper library but generally useable
+ by any driver interfacing with HDMI outputs like v4l or alsa drivers.
+ But it nicely fits into the overall topic of mode setting helper
+ libraries and hence is also included here.
+ </para>
+!Iinclude/linux/hdmi.h
+!Edrivers/video/hdmi.c
+ </sect2>
+ <sect2>
+ <title id="drm-kms-planehelpers">Plane Helper Reference</title>
+!Edrivers/gpu/drm/drm_plane_helper.c
+!Pdrivers/gpu/drm/drm_plane_helper.c overview
+ </sect2>
+ <sect2>
+ <title>Tile group</title>
+!Pdrivers/gpu/drm/drm_crtc.c Tile group
+ </sect2>
+ </sect1>
+
+ <!-- Internals: kms properties -->
+
+ <sect1 id="drm-kms-properties">
+ <title>KMS Properties</title>
+ <para>
+ Drivers may need to expose additional parameters to applications than
+ those described in the previous sections. KMS supports attaching
+ properties to CRTCs, connectors and planes and offers a userspace API to
+ list, get and set the property values.
+ </para>
+ <para>
+ Properties are identified by a name that uniquely defines the property
+ purpose, and store an associated value. For all property types except blob
+ properties the value is a 64-bit unsigned integer.
+ </para>
+ <para>
+ KMS differentiates between properties and property instances. Drivers
+ first create properties and then create and associate individual instances
+ of those properties to objects. A property can be instantiated multiple
+ times and associated with different objects. Values are stored in property
+ instances, and all other property information are stored in the property
+ and shared between all instances of the property.
+ </para>
+ <para>
+ Every property is created with a type that influences how the KMS core
+ handles the property. Supported property types are
+ <variablelist>
+ <varlistentry>
+ <term>DRM_MODE_PROP_RANGE</term>
+ <listitem><para>Range properties report their minimum and maximum
+ admissible values. The KMS core verifies that values set by
+ application fit in that range.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_PROP_ENUM</term>
+ <listitem><para>Enumerated properties take a numerical value that
+ ranges from 0 to the number of enumerated values defined by the
+ property minus one, and associate a free-formed string name to each
+ value. Applications can retrieve the list of defined value-name pairs
+ and use the numerical value to get and set property instance values.
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_PROP_BITMASK</term>
+ <listitem><para>Bitmask properties are enumeration properties that
+ additionally restrict all enumerated values to the 0..63 range.
+ Bitmask property instance values combine one or more of the
+ enumerated bits defined by the property.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_MODE_PROP_BLOB</term>
+ <listitem><para>Blob properties store a binary blob without any format
+ restriction. The binary blobs are created as KMS standalone objects,
+ and blob property instance values store the ID of their associated
+ blob object.</para>
+ <para>Blob properties are only used for the connector EDID property
+ and cannot be created by drivers.</para></listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+ <para>
+ To create a property drivers call one of the following functions depending
+ on the property type. All property creation functions take property flags
+ and name, as well as type-specific arguments.
+ <itemizedlist>
+ <listitem>
+ <synopsis>struct drm_property *drm_property_create_range(struct drm_device *dev, int flags,
+ const char *name,
+ uint64_t min, uint64_t max);</synopsis>
+ <para>Create a range property with the given minimum and maximum
+ values.</para>
+ </listitem>
+ <listitem>
+ <synopsis>struct drm_property *drm_property_create_enum(struct drm_device *dev, int flags,
+ const char *name,
+ const struct drm_prop_enum_list *props,
+ int num_values);</synopsis>
+ <para>Create an enumerated property. The <parameter>props</parameter>
+ argument points to an array of <parameter>num_values</parameter>
+ value-name pairs.</para>
+ </listitem>
+ <listitem>
+ <synopsis>struct drm_property *drm_property_create_bitmask(struct drm_device *dev,
+ int flags, const char *name,
+ const struct drm_prop_enum_list *props,
+ int num_values);</synopsis>
+ <para>Create a bitmask property. The <parameter>props</parameter>
+ argument points to an array of <parameter>num_values</parameter>
+ value-name pairs.</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ Properties can additionally be created as immutable, in which case they
+ will be read-only for applications but can be modified by the driver. To
+ create an immutable property drivers must set the DRM_MODE_PROP_IMMUTABLE
+ flag at property creation time.
+ </para>
+ <para>
+ When no array of value-name pairs is readily available at property
+ creation time for enumerated or range properties, drivers can create
+ the property using the <function>drm_property_create</function> function
+ and manually add enumeration value-name pairs by calling the
+ <function>drm_property_add_enum</function> function. Care must be taken to
+ properly specify the property type through the <parameter>flags</parameter>
+ argument.
+ </para>
+ <para>
+ After creating properties drivers can attach property instances to CRTC,
+ connector and plane objects by calling the
+ <function>drm_object_attach_property</function>. The function takes a
+ pointer to the target object, a pointer to the previously created property
+ and an initial instance value.
+ </para>
+ <sect2>
+ <title>Existing KMS Properties</title>
+ <para>
+ The following table gives description of drm properties exposed by various
+ modules/drivers.
+ </para>
+ <table border="1" cellpadding="0" cellspacing="0">
+ <tbody>
+ <tr style="font-weight: bold;">
+ <td valign="top" >Owner Module/Drivers</td>
+ <td valign="top" >Group</td>
+ <td valign="top" >Property Name</td>
+ <td valign="top" >Type</td>
+ <td valign="top" >Property Values</td>
+ <td valign="top" >Object attached</td>
+ <td valign="top" >Description/Restrictions</td>
+ </tr>
+ <tr>
+ <td rowspan="36" valign="top" >DRM</td>
+ <td rowspan="5" valign="top" >Connector</td>
+ <td valign="top" >“EDID”</td>
+ <td valign="top" >BLOB | IMMUTABLE</td>
+ <td valign="top" >0</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >Contains id of edid blob ptr object.</td>
+ </tr>
+ <tr>
+ <td valign="top" >“DPMS”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ “On”, “Standby”, “Suspend”, “Off” }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >Contains DPMS operation mode value.</td>
+ </tr>
+ <tr>
+ <td valign="top" >“PATH”</td>
+ <td valign="top" >BLOB | IMMUTABLE</td>
+ <td valign="top" >0</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >Contains topology path to a connector.</td>
+ </tr>
+ <tr>
+ <td valign="top" >“TILE”</td>
+ <td valign="top" >BLOB | IMMUTABLE</td>
+ <td valign="top" >0</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >Contains tiling information for a connector.</td>
+ </tr>
+ <tr>
+ <td valign="top" >“CRTC_ID”</td>
+ <td valign="top" >OBJECT</td>
+ <td valign="top" >DRM_MODE_OBJECT_CRTC</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >CRTC that connector is attached to (atomic)</td>
+ </tr>
+ <tr>
+ <td rowspan="11" valign="top" >Plane</td>
+ <td valign="top" >“type”</td>
+ <td valign="top" >ENUM | IMMUTABLE</td>
+ <td valign="top" >{ "Overlay", "Primary", "Cursor" }</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Plane type</td>
+ </tr>
+ <tr>
+ <td valign="top" >“SRC_X”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=UINT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout source x coordinate in 16.16 fixed point (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“SRC_Y”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=UINT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout source y coordinate in 16.16 fixed point (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“SRC_W”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=UINT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout source width in 16.16 fixed point (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“SRC_H”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=UINT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout source height in 16.16 fixed point (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“CRTC_X”</td>
+ <td valign="top" >SIGNED_RANGE</td>
+ <td valign="top" >Min=INT_MIN, Max=INT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout CRTC (destination) x coordinate (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“CRTC_Y”</td>
+ <td valign="top" >SIGNED_RANGE</td>
+ <td valign="top" >Min=INT_MIN, Max=INT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout CRTC (destination) y coordinate (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“CRTC_W”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=UINT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout CRTC (destination) width (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“CRTC_H”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=UINT_MAX</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout CRTC (destination) height (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“FB_ID”</td>
+ <td valign="top" >OBJECT</td>
+ <td valign="top" >DRM_MODE_OBJECT_FB</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >Scanout framebuffer (atomic)</td>
+ </tr>
+ <tr>
+ <td valign="top" >“CRTC_ID”</td>
+ <td valign="top" >OBJECT</td>
+ <td valign="top" >DRM_MODE_OBJECT_CRTC</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >CRTC that plane is attached to (atomic)</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >DVI-I</td>
+ <td valign="top" >“subconnector”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ “Unknown”, “DVI-D”, “DVI-A” }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“select subconnector”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ “Automatic”, “DVI-D”, “DVI-A” }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="13" valign="top" >TV</td>
+ <td valign="top" >“subconnector”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "Unknown", "Composite", "SVIDEO", "Component", "SCART" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“select subconnector”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "Automatic", "Composite", "SVIDEO", "Component", "SCART" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "NTSC_M", "NTSC_J", "NTSC_443", "PAL_B" } etc.</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“left margin”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“right margin”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“top margin”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“bottom margin”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“brightness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“contrast”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker reduction”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“overscan”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“saturation”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“hue”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >Virtual GPU</td>
+ <td valign="top" >“suggested X”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffffff</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >property to suggest an X offset for a connector</td>
+ </tr>
+ <tr>
+ <td valign="top" >“suggested Y”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffffff</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >property to suggest an Y offset for a connector</td>
+ </tr>
+ <tr>
+ <td rowspan="3" valign="top" >Optional</td>
+ <td valign="top" >“scaling mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "None", "Full", "Center", "Full aspect" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"aspect ratio"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "None", "4:3", "16:9" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >DRM property to set aspect ratio from user space app.
+ This enum is made generic to allow addition of custom aspect
+ ratios.</td>
+ </tr>
+ <tr>
+ <td valign="top" >“dirty”</td>
+ <td valign="top" >ENUM | IMMUTABLE</td>
+ <td valign="top" >{ "Off", "On", "Annotate" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="21" valign="top" >i915</td>
+ <td rowspan="2" valign="top" >Generic</td>
+ <td valign="top" >"Broadcast RGB"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "Automatic", "Full", "Limited 16:235" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“audio”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "force-dvi", "off", "auto", "on" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="1" valign="top" >Plane</td>
+ <td valign="top" >“rotation”</td>
+ <td valign="top" >BITMASK</td>
+ <td valign="top" >{ 0, "rotate-0" }, { 2, "rotate-180" }</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="17" valign="top" >SDVO-TV</td>
+ <td valign="top" >“mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "NTSC_M", "NTSC_J", "NTSC_443", "PAL_B" } etc.</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"left_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"right_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"top_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"bottom_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“hpos”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“vpos”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“contrast”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“saturation”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“hue”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“sharpness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker_filter”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker_filter_adaptive”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker_filter_2d”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“tv_chroma_filter”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“tv_luma_filter”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“dot_crawl”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >SDVO-TV/LVDS</td>
+ <td valign="top" >“brightness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >CDV gma-500</td>
+ <td rowspan="2" valign="top" >Generic</td>
+ <td valign="top" >"Broadcast RGB"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ “Full”, “Limited 16:235” }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"Broadcast RGB"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ “off”, “auto”, “on” }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="19" valign="top" >Poulsbo</td>
+ <td rowspan="1" valign="top" >Generic</td>
+ <td valign="top" >“backlight”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=100</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="17" valign="top" >SDVO-TV</td>
+ <td valign="top" >“mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "NTSC_M", "NTSC_J", "NTSC_443", "PAL_B" } etc.</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"left_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"right_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"top_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"bottom_margin"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“hpos”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“vpos”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“contrast”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“saturation”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“hue”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“sharpness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker_filter”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker_filter_adaptive”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“flicker_filter_2d”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“tv_chroma_filter”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“tv_luma_filter”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“dot_crawl”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >SDVO-TV/LVDS</td>
+ <td valign="top" >“brightness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max= SDVO dependent</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="11" valign="top" >armada</td>
+ <td rowspan="2" valign="top" >CRTC</td>
+ <td valign="top" >"CSC_YUV"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "Auto" , "CCIR601", "CCIR709" }</td>
+ <td valign="top" >CRTC</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"CSC_RGB"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "Auto", "Computer system", "Studio" }</td>
+ <td valign="top" >CRTC</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="9" valign="top" >Overlay</td>
+ <td valign="top" >"colorkey"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"colorkey_min"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"colorkey_max"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"colorkey_val"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"colorkey_alpha"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0xffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"colorkey_mode"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "disabled", "Y component", "U component"
+ , "V component", "RGB", “R component", "G component", "B component" }</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"brightness"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=256 + 255</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"contrast"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0x7fff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"saturation"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0x7fff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >exynos</td>
+ <td valign="top" >CRTC</td>
+ <td valign="top" >“mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "normal", "blank" }</td>
+ <td valign="top" >CRTC</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >Overlay</td>
+ <td valign="top" >“zpos”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=MAX_PLANE-1</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >i2c/ch7006_drv</td>
+ <td valign="top" >Generic</td>
+ <td valign="top" >“scale”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=2</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="1" valign="top" >TV</td>
+ <td valign="top" >“mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "PAL", "PAL-M","PAL-N"}, ”PAL-Nc"
+ , "PAL-60", "NTSC-M", "NTSC-J" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="15" valign="top" >nouveau</td>
+ <td rowspan="6" valign="top" >NV10 Overlay</td>
+ <td valign="top" >"colorkey"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0x01ffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“contrast”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=8192-1</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“brightness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1024</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“hue”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=359</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“saturation”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=8192-1</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“iturbt_709”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >Nv04 Overlay</td>
+ <td valign="top" >“colorkey”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0x01ffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“brightness”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1024</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="7" valign="top" >Display</td>
+ <td valign="top" >“dithering mode”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "auto", "off", "on" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“dithering depth”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "auto", "off", "on", "static 2x2", "dynamic 2x2", "temporal" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“underscan”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "auto", "6 bpc", "8 bpc" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“underscan hborder”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=128</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“underscan vborder”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=128</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“vibrant hue”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=180</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“color vibrance”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=200</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="2" valign="top" >omap</td>
+ <td rowspan="2" valign="top" >Generic</td>
+ <td valign="top" >“rotation”</td>
+ <td valign="top" >BITMASK</td>
+ <td valign="top" >{ 0, "rotate-0" },
+ { 1, "rotate-90" },
+ { 2, "rotate-180" },
+ { 3, "rotate-270" },
+ { 4, "reflect-x" },
+ { 5, "reflect-y" }</td>
+ <td valign="top" >CRTC, Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >“zorder”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=3</td>
+ <td valign="top" >CRTC, Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >qxl</td>
+ <td valign="top" >Generic</td>
+ <td valign="top" >“hotplug_mode_update"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="9" valign="top" >radeon</td>
+ <td valign="top" >DVI-I</td>
+ <td valign="top" >“coherent”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >DAC enable load detect</td>
+ <td valign="top" >“load detection”</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=1</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >TV Standard</td>
+ <td valign="top" >"tv standard"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "ntsc", "pal", "pal-m", "pal-60", "ntsc-j"
+ , "scart-pal", "pal-cn", "secam" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >legacy TMDS PLL detect</td>
+ <td valign="top" >"tmds_pll"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "driver", "bios" }</td>
+ <td valign="top" >-</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="3" valign="top" >Underscan</td>
+ <td valign="top" >"underscan"</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "off", "on", "auto" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"underscan hborder"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=128</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"underscan vborder"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=128</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >Audio</td>
+ <td valign="top" >“audio”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "off", "on", "auto" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >FMT Dithering</td>
+ <td valign="top" >“dither”</td>
+ <td valign="top" >ENUM</td>
+ <td valign="top" >{ "off", "on" }</td>
+ <td valign="top" >Connector</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td rowspan="3" valign="top" >rcar-du</td>
+ <td rowspan="3" valign="top" >Generic</td>
+ <td valign="top" >"alpha"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=255</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"colorkey"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=0, Max=0x01ffffff</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ <tr>
+ <td valign="top" >"zpos"</td>
+ <td valign="top" >RANGE</td>
+ <td valign="top" >Min=1, Max=7</td>
+ <td valign="top" >Plane</td>
+ <td valign="top" >TBD</td>
+ </tr>
+ </tbody>
+ </table>
+ </sect2>
+ </sect1>
+
+ <!-- Internals: vertical blanking -->
+
+ <sect1 id="drm-vertical-blank">
+ <title>Vertical Blanking</title>
+ <para>
+ Vertical blanking plays a major role in graphics rendering. To achieve
+ tear-free display, users must synchronize page flips and/or rendering to
+ vertical blanking. The DRM API offers ioctls to perform page flips
+ synchronized to vertical blanking and wait for vertical blanking.
+ </para>
+ <para>
+ The DRM core handles most of the vertical blanking management logic, which
+ involves filtering out spurious interrupts, keeping race-free blanking
+ counters, coping with counter wrap-around and resets and keeping use
+ counts. It relies on the driver to generate vertical blanking interrupts
+ and optionally provide a hardware vertical blanking counter. Drivers must
+ implement the following operations.
+ </para>
+ <itemizedlist>
+ <listitem>
+ <synopsis>int (*enable_vblank) (struct drm_device *dev, int crtc);
+void (*disable_vblank) (struct drm_device *dev, int crtc);</synopsis>
+ <para>
+ Enable or disable vertical blanking interrupts for the given CRTC.
+ </para>
+ </listitem>
+ <listitem>
+ <synopsis>u32 (*get_vblank_counter) (struct drm_device *dev, int crtc);</synopsis>
+ <para>
+ Retrieve the value of the vertical blanking counter for the given
+ CRTC. If the hardware maintains a vertical blanking counter its value
+ should be returned. Otherwise drivers can use the
+ <function>drm_vblank_count</function> helper function to handle this
+ operation.
+ </para>
+ </listitem>
+ </itemizedlist>
+ <para>
+ Drivers must initialize the vertical blanking handling core with a call to
+ <function>drm_vblank_init</function> in their
+ <methodname>load</methodname> operation. The function will set the struct
+ <structname>drm_device</structname>
+ <structfield>vblank_disable_allowed</structfield> field to 0. This will
+ keep vertical blanking interrupts enabled permanently until the first mode
+ set operation, where <structfield>vblank_disable_allowed</structfield> is
+ set to 1. The reason behind this is not clear. Drivers can set the field
+ to 1 after <function>calling drm_vblank_init</function> to make vertical
+ blanking interrupts dynamically managed from the beginning.
+ </para>
+ <para>
+ Vertical blanking interrupts can be enabled by the DRM core or by drivers
+ themselves (for instance to handle page flipping operations). The DRM core
+ maintains a vertical blanking use count to ensure that the interrupts are
+ not disabled while a user still needs them. To increment the use count,
+ drivers call <function>drm_vblank_get</function>. Upon return vertical
+ blanking interrupts are guaranteed to be enabled.
+ </para>
+ <para>
+ To decrement the use count drivers call
+ <function>drm_vblank_put</function>. Only when the use count drops to zero
+ will the DRM core disable the vertical blanking interrupts after a delay
+ by scheduling a timer. The delay is accessible through the vblankoffdelay
+ module parameter or the <varname>drm_vblank_offdelay</varname> global
+ variable and expressed in milliseconds. Its default value is 5000 ms.
+ Zero means never disable, and a negative value means disable immediately.
+ Drivers may override the behaviour by setting the
+ <structname>drm_device</structname>
+ <structfield>vblank_disable_immediate</structfield> flag, which when set
+ causes vblank interrupts to be disabled immediately regardless of the
+ drm_vblank_offdelay value. The flag should only be set if there's a
+ properly working hardware vblank counter present.
+ </para>
+ <para>
+ When a vertical blanking interrupt occurs drivers only need to call the
+ <function>drm_handle_vblank</function> function to account for the
+ interrupt.
+ </para>
+ <para>
+ Resources allocated by <function>drm_vblank_init</function> must be freed
+ with a call to <function>drm_vblank_cleanup</function> in the driver
+ <methodname>unload</methodname> operation handler.
+ </para>
+ <sect2>
+ <title>Vertical Blanking and Interrupt Handling Functions Reference</title>
+!Edrivers/gpu/drm/drm_irq.c
+!Finclude/drm/drmP.h drm_crtc_vblank_waitqueue
+ </sect2>
+ </sect1>
+
+ <!-- Internals: open/close, file operations and ioctls -->
+
+ <sect1>
+ <title>Open/Close, File Operations and IOCTLs</title>
+ <sect2>
+ <title>Open and Close</title>
+ <synopsis>int (*firstopen) (struct drm_device *);
+void (*lastclose) (struct drm_device *);
+int (*open) (struct drm_device *, struct drm_file *);
+void (*preclose) (struct drm_device *, struct drm_file *);
+void (*postclose) (struct drm_device *, struct drm_file *);</synopsis>
+ <abstract>Open and close handlers. None of those methods are mandatory.
+ </abstract>
+ <para>
+ The <methodname>firstopen</methodname> method is called by the DRM core
+ for legacy UMS (User Mode Setting) drivers only when an application
+ opens a device that has no other opened file handle. UMS drivers can
+ implement it to acquire device resources. KMS drivers can't use the
+ method and must acquire resources in the <methodname>load</methodname>
+ method instead.
+ </para>
+ <para>
+ Similarly the <methodname>lastclose</methodname> method is called when
+ the last application holding a file handle opened on the device closes
+ it, for both UMS and KMS drivers. Additionally, the method is also
+ called at module unload time or, for hot-pluggable devices, when the
+ device is unplugged. The <methodname>firstopen</methodname> and
+ <methodname>lastclose</methodname> calls can thus be unbalanced.
+ </para>
+ <para>
+ The <methodname>open</methodname> method is called every time the device
+ is opened by an application. Drivers can allocate per-file private data
+ in this method and store them in the struct
+ <structname>drm_file</structname> <structfield>driver_priv</structfield>
+ field. Note that the <methodname>open</methodname> method is called
+ before <methodname>firstopen</methodname>.
+ </para>
+ <para>
+ The close operation is split into <methodname>preclose</methodname> and
+ <methodname>postclose</methodname> methods. Drivers must stop and
+ cleanup all per-file operations in the <methodname>preclose</methodname>
+ method. For instance pending vertical blanking and page flip events must
+ be cancelled. No per-file operation is allowed on the file handle after
+ returning from the <methodname>preclose</methodname> method.
+ </para>
+ <para>
+ Finally the <methodname>postclose</methodname> method is called as the
+ last step of the close operation, right before calling the
+ <methodname>lastclose</methodname> method if no other open file handle
+ exists for the device. Drivers that have allocated per-file private data
+ in the <methodname>open</methodname> method should free it here.
+ </para>
+ <para>
+ The <methodname>lastclose</methodname> method should restore CRTC and
+ plane properties to default value, so that a subsequent open of the
+ device will not inherit state from the previous user. It can also be
+ used to execute delayed power switching state changes, e.g. in
+ conjunction with the vga-switcheroo infrastructure. Beyond that KMS
+ drivers should not do any further cleanup. Only legacy UMS drivers might
+ need to clean up device state so that the vga console or an independent
+ fbdev driver could take over.
+ </para>
+ </sect2>
+ <sect2>
+ <title>File Operations</title>
+ <synopsis>const struct file_operations *fops</synopsis>
+ <abstract>File operations for the DRM device node.</abstract>
+ <para>
+ Drivers must define the file operations structure that forms the DRM
+ userspace API entry point, even though most of those operations are
+ implemented in the DRM core. The <methodname>open</methodname>,
+ <methodname>release</methodname> and <methodname>ioctl</methodname>
+ operations are handled by
+ <programlisting>
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ #ifdef CONFIG_COMPAT
+ .compat_ioctl = drm_compat_ioctl,
+ #endif
+ </programlisting>
+ </para>
+ <para>
+ Drivers that implement private ioctls that requires 32/64bit
+ compatibility support must provide their own
+ <methodname>compat_ioctl</methodname> handler that processes private
+ ioctls and calls <function>drm_compat_ioctl</function> for core ioctls.
+ </para>
+ <para>
+ The <methodname>read</methodname> and <methodname>poll</methodname>
+ operations provide support for reading DRM events and polling them. They
+ are implemented by
+ <programlisting>
+ .poll = drm_poll,
+ .read = drm_read,
+ .llseek = no_llseek,
+ </programlisting>
+ </para>
+ <para>
+ The memory mapping implementation varies depending on how the driver
+ manages memory. Pre-GEM drivers will use <function>drm_mmap</function>,
+ while GEM-aware drivers will use <function>drm_gem_mmap</function>. See
+ <xref linkend="drm-gem"/>.
+ <programlisting>
+ .mmap = drm_gem_mmap,
+ </programlisting>
+ </para>
+ <para>
+ No other file operation is supported by the DRM API.
+ </para>
+ </sect2>
+ <sect2>
+ <title>IOCTLs</title>
+ <synopsis>struct drm_ioctl_desc *ioctls;
+int num_ioctls;</synopsis>
+ <abstract>Driver-specific ioctls descriptors table.</abstract>
+ <para>
+ Driver-specific ioctls numbers start at DRM_COMMAND_BASE. The ioctls
+ descriptors table is indexed by the ioctl number offset from the base
+ value. Drivers can use the DRM_IOCTL_DEF_DRV() macro to initialize the
+ table entries.
+ </para>
+ <para>
+ <programlisting>DRM_IOCTL_DEF_DRV(ioctl, func, flags)</programlisting>
+ <para>
+ <parameter>ioctl</parameter> is the ioctl name. Drivers must define
+ the DRM_##ioctl and DRM_IOCTL_##ioctl macros to the ioctl number
+ offset from DRM_COMMAND_BASE and the ioctl number respectively. The
+ first macro is private to the device while the second must be exposed
+ to userspace in a public header.
+ </para>
+ <para>
+ <parameter>func</parameter> is a pointer to the ioctl handler function
+ compatible with the <type>drm_ioctl_t</type> type.
+ <programlisting>typedef int drm_ioctl_t(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);</programlisting>
+ </para>
+ <para>
+ <parameter>flags</parameter> is a bitmask combination of the following
+ values. It restricts how the ioctl is allowed to be called.
+ <itemizedlist>
+ <listitem><para>
+ DRM_AUTH - Only authenticated callers allowed
+ </para></listitem>
+ <listitem><para>
+ DRM_MASTER - The ioctl can only be called on the master file
+ handle
+ </para></listitem>
+ <listitem><para>
+ DRM_ROOT_ONLY - Only callers with the SYSADMIN capability allowed
+ </para></listitem>
+ <listitem><para>
+ DRM_CONTROL_ALLOW - The ioctl can only be called on a control
+ device
+ </para></listitem>
+ <listitem><para>
+ DRM_UNLOCKED - The ioctl handler will be called without locking
+ the DRM global mutex
+ </para></listitem>
+ </itemizedlist>
+ </para>
+ </para>
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>Legacy Support Code</title>
+ <para>
+ The section very briefly covers some of the old legacy support code which
+ is only used by old DRM drivers which have done a so-called shadow-attach
+ to the underlying device instead of registering as a real driver. This
+ also includes some of the old generic buffer management and command
+ submission code. Do not use any of this in new and modern drivers.
+ </para>
+
+ <sect2>
+ <title>Legacy Suspend/Resume</title>
+ <para>
+ The DRM core provides some suspend/resume code, but drivers wanting full
+ suspend/resume support should provide save() and restore() functions.
+ These are called at suspend, hibernate, or resume time, and should perform
+ any state save or restore required by your device across suspend or
+ hibernate states.
+ </para>
+ <synopsis>int (*suspend) (struct drm_device *, pm_message_t state);
+ int (*resume) (struct drm_device *);</synopsis>
+ <para>
+ Those are legacy suspend and resume methods which
+ <emphasis>only</emphasis> work with the legacy shadow-attach driver
+ registration functions. New driver should use the power management
+ interface provided by their bus type (usually through
+ the struct <structname>device_driver</structname> dev_pm_ops) and set
+ these methods to NULL.
+ </para>
+ </sect2>
+
+ <sect2>
+ <title>Legacy DMA Services</title>
+ <para>
+ This should cover how DMA mapping etc. is supported by the core.
+ These functions are deprecated and should not be used.
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+
+<!-- TODO
+
+- Add a glossary
+- Document the struct_mutex catch-all lock
+- Document connector properties
+
+- Why is the load method optional?
+- What are drivers supposed to set the initial display state to, and how?
+ Connector's DPMS states are not initialized and are thus equal to
+ DRM_MODE_DPMS_ON. The fbcon compatibility layer calls
+ drm_helper_disable_unused_functions(), which disables unused encoders and
+ CRTCs, but doesn't touch the connectors' DPMS state, and
+ drm_helper_connector_dpms() in reaction to fbdev blanking events. Do drivers
+ that don't implement (or just don't use) fbcon compatibility need to call
+ those functions themselves?
+- KMS drivers must call drm_vblank_pre_modeset() and drm_vblank_post_modeset()
+ around mode setting. Should this be done in the DRM core?
+- vblank_disable_allowed is set to 1 in the first drm_vblank_post_modeset()
+ call and never set back to 0. It seems to be safe to permanently set it to 1
+ in drm_vblank_init() for KMS driver, and it might be safe for UMS drivers as
+ well. This should be investigated.
+- crtc and connector .save and .restore operations are only used internally in
+ drivers, should they be removed from the core?
+- encoder mid-layer .save and .restore operations are only used internally in
+ drivers, should they be removed from the core?
+- encoder mid-layer .detect operation is only used internally in drivers,
+ should it be removed from the core?
+-->
+
+ <!-- External interfaces -->
+
+ <chapter id="drmExternals">
+ <title>Userland interfaces</title>
+ <para>
+ The DRM core exports several interfaces to applications,
+ generally intended to be used through corresponding libdrm
+ wrapper functions. In addition, drivers export device-specific
+ interfaces for use by userspace drivers &amp; device-aware
+ applications through ioctls and sysfs files.
+ </para>
+ <para>
+ External interfaces include: memory mapping, context management,
+ DMA operations, AGP management, vblank control, fence
+ management, memory management, and output management.
+ </para>
+ <para>
+ Cover generic ioctls and sysfs layout here. We only need high-level
+ info, since man pages should cover the rest.
+ </para>
+
+ <!-- External: render nodes -->
+
+ <sect1>
+ <title>Render nodes</title>
+ <para>
+ DRM core provides multiple character-devices for user-space to use.
+ Depending on which device is opened, user-space can perform a different
+ set of operations (mainly ioctls). The primary node is always created
+ and called card&lt;num&gt;. Additionally, a currently
+ unused control node, called controlD&lt;num&gt; is also
+ created. The primary node provides all legacy operations and
+ historically was the only interface used by userspace. With KMS, the
+ control node was introduced. However, the planned KMS control interface
+ has never been written and so the control node stays unused to date.
+ </para>
+ <para>
+ With the increased use of offscreen renderers and GPGPU applications,
+ clients no longer require running compositors or graphics servers to
+ make use of a GPU. But the DRM API required unprivileged clients to
+ authenticate to a DRM-Master prior to getting GPU access. To avoid this
+ step and to grant clients GPU access without authenticating, render
+ nodes were introduced. Render nodes solely serve render clients, that
+ is, no modesetting or privileged ioctls can be issued on render nodes.
+ Only non-global rendering commands are allowed. If a driver supports
+ render nodes, it must advertise it via the DRIVER_RENDER
+ DRM driver capability. If not supported, the primary node must be used
+ for render clients together with the legacy drmAuth authentication
+ procedure.
+ </para>
+ <para>
+ If a driver advertises render node support, DRM core will create a
+ separate render node called renderD&lt;num&gt;. There will
+ be one render node per device. No ioctls except PRIME-related ioctls
+ will be allowed on this node. Especially GEM_OPEN will be
+ explicitly prohibited. Render nodes are designed to avoid the
+ buffer-leaks, which occur if clients guess the flink names or mmap
+ offsets on the legacy interface. Additionally to this basic interface,
+ drivers must mark their driver-dependent render-only ioctls as
+ DRM_RENDER_ALLOW so render clients can use them. Driver
+ authors must be careful not to allow any privileged ioctls on render
+ nodes.
+ </para>
+ <para>
+ With render nodes, user-space can now control access to the render node
+ via basic file-system access-modes. A running graphics server which
+ authenticates clients on the privileged primary/legacy node is no longer
+ required. Instead, a client can open the render node and is immediately
+ granted GPU access. Communication between clients (or servers) is done
+ via PRIME. FLINK from render node to legacy node is not supported. New
+ clients must not use the insecure FLINK interface.
+ </para>
+ <para>
+ Besides dropping all modeset/global ioctls, render nodes also drop the
+ DRM-Master concept. There is no reason to associate render clients with
+ a DRM-Master as they are independent of any graphics server. Besides,
+ they must work without any running master, anyway.
+ Drivers must be able to run without a master object if they support
+ render nodes. If, on the other hand, a driver requires shared state
+ between clients which is visible to user-space and accessible beyond
+ open-file boundaries, they cannot support render nodes.
+ </para>
+ </sect1>
+
+ <!-- External: vblank handling -->
+
+ <sect1>
+ <title>VBlank event handling</title>
+ <para>
+ The DRM core exposes two vertical blank related ioctls:
+ <variablelist>
+ <varlistentry>
+ <term>DRM_IOCTL_WAIT_VBLANK</term>
+ <listitem>
+ <para>
+ This takes a struct drm_wait_vblank structure as its argument,
+ and it is used to block or request a signal when a specified
+ vblank event occurs.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>DRM_IOCTL_MODESET_CTL</term>
+ <listitem>
+ <para>
+ This was only used for user-mode-settind drivers around
+ modesetting changes to allow the kernel to update the vblank
+ interrupt after mode setting, since on many devices the vertical
+ blank counter is reset to 0 at some point during modeset. Modern
+ drivers should not call this any more since with kernel mode
+ setting it is a no-op.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </para>
+ </sect1>
+
+ </chapter>
+</part>
+<part id="drmDrivers">
+ <title>DRM Drivers</title>
+
+ <partintro>
+ <para>
+ This second part of the DRM Developer's Guide documents driver code,
+ implementation details and also all the driver-specific userspace
+ interfaces. Especially since all hardware-acceleration interfaces to
+ userspace are driver specific for efficiency and other reasons these
+ interfaces can be rather substantial. Hence every driver has its own
+ chapter.
+ </para>
+ </partintro>
+
+ <chapter id="drmI915">
+ <title>drm/i915 Intel GFX Driver</title>
+ <para>
+ The drm/i915 driver supports all (with the exception of some very early
+ models) integrated GFX chipsets with both Intel display and rendering
+ blocks. This excludes a set of SoC platforms with an SGX rendering unit,
+ those have basic support through the gma500 drm driver.
+ </para>
+ <sect1>
+ <title>Core Driver Infrastructure</title>
+ <para>
+ This section covers core driver infrastructure used by both the display
+ and the GEM parts of the driver.
+ </para>
+ <sect2>
+ <title>Runtime Power Management</title>
+!Pdrivers/gpu/drm/i915/intel_runtime_pm.c runtime pm
+!Idrivers/gpu/drm/i915/intel_runtime_pm.c
+!Idrivers/gpu/drm/i915/intel_uncore.c
+ </sect2>
+ <sect2>
+ <title>Interrupt Handling</title>
+!Pdrivers/gpu/drm/i915/i915_irq.c interrupt handling
+!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_init intel_irq_init_hw intel_hpd_init
+!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_fini
+!Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_disable_interrupts
+!Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_enable_interrupts
+ </sect2>
+ <sect2>
+ <title>Intel GVT-g Guest Support(vGPU)</title>
+!Pdrivers/gpu/drm/i915/i915_vgpu.c Intel GVT-g guest support
+!Idrivers/gpu/drm/i915/i915_vgpu.c
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>Display Hardware Handling</title>
+ <para>
+ This section covers everything related to the display hardware including
+ the mode setting infrastructure, plane, sprite and cursor handling and
+ display, output probing and related topics.
+ </para>
+ <sect2>
+ <title>Mode Setting Infrastructure</title>
+ <para>
+ The i915 driver is thus far the only DRM driver which doesn't use the
+ common DRM helper code to implement mode setting sequences. Thus it
+ has its own tailor-made infrastructure for executing a display
+ configuration change.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Frontbuffer Tracking</title>
+!Pdrivers/gpu/drm/i915/intel_frontbuffer.c frontbuffer tracking
+!Idrivers/gpu/drm/i915/intel_frontbuffer.c
+!Fdrivers/gpu/drm/i915/intel_drv.h intel_frontbuffer_flip
+!Fdrivers/gpu/drm/i915/i915_gem.c i915_gem_track_fb
+ </sect2>
+ <sect2>
+ <title>Display FIFO Underrun Reporting</title>
+!Pdrivers/gpu/drm/i915/intel_fifo_underrun.c fifo underrun handling
+!Idrivers/gpu/drm/i915/intel_fifo_underrun.c
+ </sect2>
+ <sect2>
+ <title>Plane Configuration</title>
+ <para>
+ This section covers plane configuration and composition with the
+ primary plane, sprites, cursors and overlays. This includes the
+ infrastructure to do atomic vsync'ed updates of all this state and
+ also tightly coupled topics like watermark setup and computation,
+ framebuffer compression and panel self refresh.
+ </para>
+ </sect2>
+ <sect2>
+ <title>Atomic Plane Helpers</title>
+!Pdrivers/gpu/drm/i915/intel_atomic_plane.c atomic plane helpers
+!Idrivers/gpu/drm/i915/intel_atomic_plane.c
+ </sect2>
+ <sect2>
+ <title>Output Probing</title>
+ <para>
+ This section covers output probing and related infrastructure like the
+ hotplug interrupt storm detection and mitigation code. Note that the
+ i915 driver still uses most of the common DRM helper code for output
+ probing, so those sections fully apply.
+ </para>
+ </sect2>
+ <sect2>
+ <title>High Definition Audio</title>
+!Pdrivers/gpu/drm/i915/intel_audio.c High Definition Audio over HDMI and Display Port
+!Idrivers/gpu/drm/i915/intel_audio.c
+ </sect2>
+ <sect2>
+ <title>Panel Self Refresh PSR (PSR/SRD)</title>
+!Pdrivers/gpu/drm/i915/intel_psr.c Panel Self Refresh (PSR/SRD)
+!Idrivers/gpu/drm/i915/intel_psr.c
+ </sect2>
+ <sect2>
+ <title>Frame Buffer Compression (FBC)</title>
+!Pdrivers/gpu/drm/i915/intel_fbc.c Frame Buffer Compression (FBC)
+!Idrivers/gpu/drm/i915/intel_fbc.c
+ </sect2>
+ <sect2>
+ <title>Display Refresh Rate Switching (DRRS)</title>
+!Pdrivers/gpu/drm/i915/intel_dp.c Display Refresh Rate Switching (DRRS)
+!Fdrivers/gpu/drm/i915/intel_dp.c intel_dp_set_drrs_state
+!Fdrivers/gpu/drm/i915/intel_dp.c intel_edp_drrs_enable
+!Fdrivers/gpu/drm/i915/intel_dp.c intel_edp_drrs_disable
+!Fdrivers/gpu/drm/i915/intel_dp.c intel_edp_drrs_invalidate
+!Fdrivers/gpu/drm/i915/intel_dp.c intel_edp_drrs_flush
+!Fdrivers/gpu/drm/i915/intel_dp.c intel_dp_drrs_init
+
+ </sect2>
+ <sect2>
+ <title>DPIO</title>
+!Pdrivers/gpu/drm/i915/i915_reg.h DPIO
+ <table id="dpiox2">
+ <title>Dual channel PHY (VLV/CHV)</title>
+ <tgroup cols="8">
+ <colspec colname="c0" />
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <colspec colname="c5" />
+ <colspec colname="c6" />
+ <colspec colname="c7" />
+ <spanspec spanname="ch0" namest="c0" nameend="c3" />
+ <spanspec spanname="ch1" namest="c4" nameend="c7" />
+ <spanspec spanname="ch0pcs01" namest="c0" nameend="c1" />
+ <spanspec spanname="ch0pcs23" namest="c2" nameend="c3" />
+ <spanspec spanname="ch1pcs01" namest="c4" nameend="c5" />
+ <spanspec spanname="ch1pcs23" namest="c6" nameend="c7" />
+ <thead>
+ <row>
+ <entry spanname="ch0">CH0</entry>
+ <entry spanname="ch1">CH1</entry>
+ </row>
+ </thead>
+ <tbody valign="top" align="center">
+ <row>
+ <entry spanname="ch0">CMN/PLL/REF</entry>
+ <entry spanname="ch1">CMN/PLL/REF</entry>
+ </row>
+ <row>
+ <entry spanname="ch0pcs01">PCS01</entry>
+ <entry spanname="ch0pcs23">PCS23</entry>
+ <entry spanname="ch1pcs01">PCS01</entry>
+ <entry spanname="ch1pcs23">PCS23</entry>
+ </row>
+ <row>
+ <entry>TX0</entry>
+ <entry>TX1</entry>
+ <entry>TX2</entry>
+ <entry>TX3</entry>
+ <entry>TX0</entry>
+ <entry>TX1</entry>
+ <entry>TX2</entry>
+ <entry>TX3</entry>
+ </row>
+ <row>
+ <entry spanname="ch0">DDI0</entry>
+ <entry spanname="ch1">DDI1</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <table id="dpiox1">
+ <title>Single channel PHY (CHV)</title>
+ <tgroup cols="4">
+ <colspec colname="c0" />
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <spanspec spanname="ch0" namest="c0" nameend="c3" />
+ <spanspec spanname="ch0pcs01" namest="c0" nameend="c1" />
+ <spanspec spanname="ch0pcs23" namest="c2" nameend="c3" />
+ <thead>
+ <row>
+ <entry spanname="ch0">CH0</entry>
+ </row>
+ </thead>
+ <tbody valign="top" align="center">
+ <row>
+ <entry spanname="ch0">CMN/PLL/REF</entry>
+ </row>
+ <row>
+ <entry spanname="ch0pcs01">PCS01</entry>
+ <entry spanname="ch0pcs23">PCS23</entry>
+ </row>
+ <row>
+ <entry>TX0</entry>
+ <entry>TX1</entry>
+ <entry>TX2</entry>
+ <entry>TX3</entry>
+ </row>
+ <row>
+ <entry spanname="ch0">DDI2</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </sect2>
+ </sect1>
+
+ <sect1>
+ <title>Memory Management and Command Submission</title>
+ <para>
+ This sections covers all things related to the GEM implementation in the
+ i915 driver.
+ </para>
+ <sect2>
+ <title>Batchbuffer Parsing</title>
+!Pdrivers/gpu/drm/i915/i915_cmd_parser.c batch buffer command parser
+!Idrivers/gpu/drm/i915/i915_cmd_parser.c
+ </sect2>
+ <sect2>
+ <title>Batchbuffer Pools</title>
+!Pdrivers/gpu/drm/i915/i915_gem_batch_pool.c batch pool
+!Idrivers/gpu/drm/i915/i915_gem_batch_pool.c
+ </sect2>
+ <sect2>
+ <title>Logical Rings, Logical Ring Contexts and Execlists</title>
+!Pdrivers/gpu/drm/i915/intel_lrc.c Logical Rings, Logical Ring Contexts and Execlists
+!Idrivers/gpu/drm/i915/intel_lrc.c
+ </sect2>
+ <sect2>
+ <title>Global GTT views</title>
+!Pdrivers/gpu/drm/i915/i915_gem_gtt.c Global GTT views
+!Idrivers/gpu/drm/i915/i915_gem_gtt.c
+ </sect2>
+ <sect2>
+ <title>Buffer Object Eviction</title>
+ <para>
+ This section documents the interface functions for evicting buffer
+ objects to make space available in the virtual gpu address spaces.
+ Note that this is mostly orthogonal to shrinking buffer objects
+ caches, which has the goal to make main memory (shared with the gpu
+ through the unified memory architecture) available.
+ </para>
+!Idrivers/gpu/drm/i915/i915_gem_evict.c
+ </sect2>
+ <sect2>
+ <title>Buffer Object Memory Shrinking</title>
+ <para>
+ This section documents the interface function for shrinking memory
+ usage of buffer object caches. Shrinking is used to make main memory
+ available. Note that this is mostly orthogonal to evicting buffer
+ objects, which has the goal to make space in gpu virtual address
+ spaces.
+ </para>
+!Idrivers/gpu/drm/i915/i915_gem_shrinker.c
+ </sect2>
+ </sect1>
+
+ <sect1>
+ <title> Tracing </title>
+ <para>
+ This sections covers all things related to the tracepoints implemented in
+ the i915 driver.
+ </para>
+ <sect2>
+ <title> i915_ppgtt_create and i915_ppgtt_release </title>
+!Pdrivers/gpu/drm/i915/i915_trace.h i915_ppgtt_create and i915_ppgtt_release tracepoints
+ </sect2>
+ <sect2>
+ <title> i915_context_create and i915_context_free </title>
+!Pdrivers/gpu/drm/i915/i915_trace.h i915_context_create and i915_context_free tracepoints
+ </sect2>
+ <sect2>
+ <title> switch_mm </title>
+!Pdrivers/gpu/drm/i915/i915_trace.h switch_mm tracepoint
+ </sect2>
+ </sect1>
+
+ </chapter>
+!Cdrivers/gpu/drm/i915/i915_irq.c
+</part>
+</book>
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl
new file mode 100644
index 000000000..bcdfdb9a9
--- /dev/null
+++ b/Documentation/DocBook/filesystems.tmpl
@@ -0,0 +1,425 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Linux-filesystems-API">
+ <bookinfo>
+ <title>Linux Filesystems API</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="vfs">
+ <title>The Linux VFS</title>
+ <sect1 id="the_filesystem_types"><title>The Filesystem types</title>
+!Iinclude/linux/fs.h
+ </sect1>
+ <sect1 id="the_directory_cache"><title>The Directory Cache</title>
+!Efs/dcache.c
+!Iinclude/linux/dcache.h
+ </sect1>
+ <sect1 id="inode_handling"><title>Inode Handling</title>
+!Efs/inode.c
+!Efs/bad_inode.c
+ </sect1>
+ <sect1 id="registration_and_superblocks"><title>Registration and Superblocks</title>
+!Efs/super.c
+ </sect1>
+ <sect1 id="file_locks"><title>File Locks</title>
+!Efs/locks.c
+!Ifs/locks.c
+ </sect1>
+ <sect1 id="other_functions"><title>Other Functions</title>
+!Efs/mpage.c
+!Efs/namei.c
+!Efs/buffer.c
+!Eblock/bio.c
+!Efs/seq_file.c
+!Efs/filesystems.c
+!Efs/fs-writeback.c
+!Efs/block_dev.c
+ </sect1>
+ </chapter>
+
+ <chapter id="proc">
+ <title>The proc filesystem</title>
+
+ <sect1 id="sysctl_interface"><title>sysctl interface</title>
+!Ekernel/sysctl.c
+ </sect1>
+
+ <sect1 id="proc_filesystem_interface"><title>proc filesystem interface</title>
+!Ifs/proc/base.c
+ </sect1>
+ </chapter>
+
+ <chapter id="fs_events">
+ <title>Events based on file descriptors</title>
+!Efs/eventfd.c
+ </chapter>
+
+ <chapter id="sysfs">
+ <title>The Filesystem for Exporting Kernel Objects</title>
+!Efs/sysfs/file.c
+!Efs/sysfs/symlink.c
+ </chapter>
+
+ <chapter id="debugfs">
+ <title>The debugfs filesystem</title>
+
+ <sect1 id="debugfs_interface"><title>debugfs interface</title>
+!Efs/debugfs/inode.c
+!Efs/debugfs/file.c
+ </sect1>
+ </chapter>
+
+ <chapter id="LinuxJDBAPI">
+ <chapterinfo>
+ <title>The Linux Journalling API</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Roger</firstname>
+ <surname>Gammans</surname>
+ <affiliation>
+ <address>
+ <email>rgammans@computer-surgery.co.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <authorgroup>
+ <author>
+ <firstname>Stephen</firstname>
+ <surname>Tweedie</surname>
+ <affiliation>
+ <address>
+ <email>sct@redhat.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2002</year>
+ <holder>Roger Gammans</holder>
+ </copyright>
+ </chapterinfo>
+
+ <title>The Linux Journalling API</title>
+
+ <sect1 id="journaling_overview">
+ <title>Overview</title>
+ <sect2 id="journaling_details">
+ <title>Details</title>
+<para>
+The journalling layer is easy to use. You need to
+first of all create a journal_t data structure. There are
+two calls to do this dependent on how you decide to allocate the physical
+media on which the journal resides. The journal_init_inode() call
+is for journals stored in filesystem inodes, or the journal_init_dev()
+call can be use for journal stored on a raw device (in a continuous range
+of blocks). A journal_t is a typedef for a struct pointer, so when
+you are finally finished make sure you call journal_destroy() on it
+to free up any used kernel memory.
+</para>
+
+<para>
+Once you have got your journal_t object you need to 'mount' or load the journal
+file, unless of course you haven't initialised it yet - in which case you
+need to call journal_create().
+</para>
+
+<para>
+Most of the time however your journal file will already have been created, but
+before you load it you must call journal_wipe() to empty the journal file.
+Hang on, you say , what if the filesystem wasn't cleanly umount()'d . Well, it is the
+job of the client file system to detect this and skip the call to journal_wipe().
+</para>
+
+<para>
+In either case the next call should be to journal_load() which prepares the
+journal file for use. Note that journal_wipe(..,0) calls journal_skip_recovery()
+for you if it detects any outstanding transactions in the journal and similarly
+journal_load() will call journal_recover() if necessary.
+I would advise reading fs/ext3/super.c for examples on this stage.
+[RGG: Why is the journal_wipe() call necessary - doesn't this needlessly
+complicate the API. Or isn't a good idea for the journal layer to hide
+dirty mounts from the client fs]
+</para>
+
+<para>
+Now you can go ahead and start modifying the underlying
+filesystem. Almost.
+</para>
+
+<para>
+
+You still need to actually journal your filesystem changes, this
+is done by wrapping them into transactions. Additionally you
+also need to wrap the modification of each of the buffers
+with calls to the journal layer, so it knows what the modifications
+you are actually making are. To do this use journal_start() which
+returns a transaction handle.
+</para>
+
+<para>
+journal_start()
+and its counterpart journal_stop(), which indicates the end of a transaction
+are nestable calls, so you can reenter a transaction if necessary,
+but remember you must call journal_stop() the same number of times as
+journal_start() before the transaction is completed (or more accurately
+leaves the update phase). Ext3/VFS makes use of this feature to simplify
+quota support.
+</para>
+
+<para>
+Inside each transaction you need to wrap the modifications to the
+individual buffers (blocks). Before you start to modify a buffer you
+need to call journal_get_{create,write,undo}_access() as appropriate,
+this allows the journalling layer to copy the unmodified data if it
+needs to. After all the buffer may be part of a previously uncommitted
+transaction.
+At this point you are at last ready to modify a buffer, and once
+you are have done so you need to call journal_dirty_{meta,}data().
+Or if you've asked for access to a buffer you now know is now longer
+required to be pushed back on the device you can call journal_forget()
+in much the same way as you might have used bforget() in the past.
+</para>
+
+<para>
+A journal_flush() may be called at any time to commit and checkpoint
+all your transactions.
+</para>
+
+<para>
+Then at umount time , in your put_super() you can then call journal_destroy()
+to clean up your in-core journal object.
+</para>
+
+<para>
+Unfortunately there a couple of ways the journal layer can cause a deadlock.
+The first thing to note is that each task can only have
+a single outstanding transaction at any one time, remember nothing
+commits until the outermost journal_stop(). This means
+you must complete the transaction at the end of each file/inode/address
+etc. operation you perform, so that the journalling system isn't re-entered
+on another journal. Since transactions can't be nested/batched
+across differing journals, and another filesystem other than
+yours (say ext3) may be modified in a later syscall.
+</para>
+
+<para>
+The second case to bear in mind is that journal_start() can
+block if there isn't enough space in the journal for your transaction
+(based on the passed nblocks param) - when it blocks it merely(!) needs to
+wait for transactions to complete and be committed from other tasks,
+so essentially we are waiting for journal_stop(). So to avoid
+deadlocks you must treat journal_start/stop() as if they
+were semaphores and include them in your semaphore ordering rules to prevent
+deadlocks. Note that journal_extend() has similar blocking behaviour to
+journal_start() so you can deadlock here just as easily as on journal_start().
+</para>
+
+<para>
+Try to reserve the right number of blocks the first time. ;-). This will
+be the maximum number of blocks you are going to touch in this transaction.
+I advise having a look at at least ext3_jbd.h to see the basis on which
+ext3 uses to make these decisions.
+</para>
+
+<para>
+Another wriggle to watch out for is your on-disk block allocation strategy.
+why? Because, if you undo a delete, you need to ensure you haven't reused any
+of the freed blocks in a later transaction. One simple way of doing this
+is make sure any blocks you allocate only have checkpointed transactions
+listed against them. Ext3 does this in ext3_test_allocatable().
+</para>
+
+<para>
+Lock is also providing through journal_{un,}lock_updates(),
+ext3 uses this when it wants a window with a clean and stable fs for a moment.
+eg.
+</para>
+
+<programlisting>
+
+ journal_lock_updates() //stop new stuff happening..
+ journal_flush() // checkpoint everything.
+ ..do stuff on stable fs
+ journal_unlock_updates() // carry on with filesystem use.
+</programlisting>
+
+<para>
+The opportunities for abuse and DOS attacks with this should be obvious,
+if you allow unprivileged userspace to trigger codepaths containing these
+calls.
+</para>
+
+<para>
+A new feature of jbd since 2.5.25 is commit callbacks with the new
+journal_callback_set() function you can now ask the journalling layer
+to call you back when the transaction is finally committed to disk, so that
+you can do some of your own management. The key to this is the journal_callback
+struct, this maintains the internal callback information but you can
+extend it like this:-
+</para>
+<programlisting>
+ struct myfs_callback_s {
+ //Data structure element required by jbd..
+ struct journal_callback for_jbd;
+ // Stuff for myfs allocated together.
+ myfs_inode* i_commited;
+
+ }
+</programlisting>
+
+<para>
+this would be useful if you needed to know when data was committed to a
+particular inode.
+</para>
+
+ </sect2>
+
+ <sect2 id="jbd_summary">
+ <title>Summary</title>
+<para>
+Using the journal is a matter of wrapping the different context changes,
+being each mount, each modification (transaction) and each changed buffer
+to tell the journalling layer about them.
+</para>
+
+<para>
+Here is a some pseudo code to give you an idea of how it works, as
+an example.
+</para>
+
+<programlisting>
+ journal_t* my_jnrl = journal_create();
+ journal_init_{dev,inode}(jnrl,...)
+ if (clean) journal_wipe();
+ journal_load();
+
+ foreach(transaction) { /*transactions must be
+ completed before
+ a syscall returns to
+ userspace*/
+
+ handle_t * xct=journal_start(my_jnrl);
+ foreach(bh) {
+ journal_get_{create,write,undo}_access(xact,bh);
+ if ( myfs_modify(bh) ) { /* returns true
+ if makes changes */
+ journal_dirty_{meta,}data(xact,bh);
+ } else {
+ journal_forget(bh);
+ }
+ }
+ journal_stop(xct);
+ }
+ journal_destroy(my_jrnl);
+</programlisting>
+ </sect2>
+
+ </sect1>
+
+ <sect1 id="data_types">
+ <title>Data Types</title>
+ <para>
+ The journalling layer uses typedefs to 'hide' the concrete definitions
+ of the structures used. As a client of the JBD layer you can
+ just rely on the using the pointer as a magic cookie of some sort.
+
+ Obviously the hiding is not enforced as this is 'C'.
+ </para>
+ <sect2 id="structures"><title>Structures</title>
+!Iinclude/linux/jbd.h
+ </sect2>
+ </sect1>
+
+ <sect1 id="functions">
+ <title>Functions</title>
+ <para>
+ The functions here are split into two groups those that
+ affect a journal as a whole, and those which are used to
+ manage transactions
+ </para>
+ <sect2 id="journal_level"><title>Journal Level</title>
+!Efs/jbd/journal.c
+!Ifs/jbd/recovery.c
+ </sect2>
+ <sect2 id="transaction_level"><title>Transasction Level</title>
+!Efs/jbd/transaction.c
+ </sect2>
+ </sect1>
+ <sect1 id="see_also">
+ <title>See also</title>
+ <para>
+ <citation>
+ <ulink url="http://kernel.org/pub/linux/kernel/people/sct/ext3/journal-design.ps.gz">
+ Journaling the Linux ext2fs Filesystem, LinuxExpo 98, Stephen Tweedie
+ </ulink>
+ </citation>
+ </para>
+ <para>
+ <citation>
+ <ulink url="http://olstrans.sourceforge.net/release/OLS2000-ext3/OLS2000-ext3.html">
+ Ext3 Journalling FileSystem, OLS 2000, Dr. Stephen Tweedie
+ </ulink>
+ </citation>
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="splice">
+ <title>splice API</title>
+ <para>
+ splice is a method for moving blocks of data around inside the
+ kernel, without continually transferring them between the kernel
+ and user space.
+ </para>
+!Ffs/splice.c
+ </chapter>
+
+ <chapter id="pipes">
+ <title>pipes API</title>
+ <para>
+ Pipe interfaces are all for in-kernel (builtin image) use.
+ They are not exported for use by modules.
+ </para>
+!Iinclude/linux/pipe_fs_i.h
+!Ffs/pipe.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/gadget.tmpl b/Documentation/DocBook/gadget.tmpl
new file mode 100644
index 000000000..641629221
--- /dev/null
+++ b/Documentation/DocBook/gadget.tmpl
@@ -0,0 +1,793 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="USB-Gadget-API">
+ <bookinfo>
+ <title>USB Gadget API for Linux</title>
+ <date>20 August 2004</date>
+ <edition>20 August 2004</edition>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ <copyright>
+ <year>2003-2004</year>
+ <holder>David Brownell</holder>
+ </copyright>
+
+ <author>
+ <firstname>David</firstname>
+ <surname>Brownell</surname>
+ <affiliation>
+ <address><email>dbrownell@users.sourceforge.net</email></address>
+ </affiliation>
+ </author>
+ </bookinfo>
+
+<toc></toc>
+
+<chapter id="intro"><title>Introduction</title>
+
+<para>This document presents a Linux-USB "Gadget"
+kernel mode
+API, for use within peripherals and other USB devices
+that embed Linux.
+It provides an overview of the API structure,
+and shows how that fits into a system development project.
+This is the first such API released on Linux to address
+a number of important problems, including: </para>
+
+<itemizedlist>
+ <listitem><para>Supports USB 2.0, for high speed devices which
+ can stream data at several dozen megabytes per second.
+ </para></listitem>
+ <listitem><para>Handles devices with dozens of endpoints just as
+ well as ones with just two fixed-function ones. Gadget drivers
+ can be written so they're easy to port to new hardware.
+ </para></listitem>
+ <listitem><para>Flexible enough to expose more complex USB device
+ capabilities such as multiple configurations, multiple interfaces,
+ composite devices,
+ and alternate interface settings.
+ </para></listitem>
+ <listitem><para>USB "On-The-Go" (OTG) support, in conjunction
+ with updates to the Linux-USB host side.
+ </para></listitem>
+ <listitem><para>Sharing data structures and API models with the
+ Linux-USB host side API. This helps the OTG support, and
+ looks forward to more-symmetric frameworks (where the same
+ I/O model is used by both host and device side drivers).
+ </para></listitem>
+ <listitem><para>Minimalist, so it's easier to support new device
+ controller hardware. I/O processing doesn't imply large
+ demands for memory or CPU resources.
+ </para></listitem>
+</itemizedlist>
+
+
+<para>Most Linux developers will not be able to use this API, since they
+have USB "host" hardware in a PC, workstation, or server.
+Linux users with embedded systems are more likely to
+have USB peripheral hardware.
+To distinguish drivers running inside such hardware from the
+more familiar Linux "USB device drivers",
+which are host side proxies for the real USB devices,
+a different term is used:
+the drivers inside the peripherals are "USB gadget drivers".
+In USB protocol interactions, the device driver is the master
+(or "client driver")
+and the gadget driver is the slave (or "function driver").
+</para>
+
+<para>The gadget API resembles the host side Linux-USB API in that both
+use queues of request objects to package I/O buffers, and those requests
+may be submitted or canceled.
+They share common definitions for the standard USB
+<emphasis>Chapter 9</emphasis> messages, structures, and constants.
+Also, both APIs bind and unbind drivers to devices.
+The APIs differ in detail, since the host side's current
+URB framework exposes a number of implementation details
+and assumptions that are inappropriate for a gadget API.
+While the model for control transfers and configuration
+management is necessarily different (one side is a hardware-neutral master,
+the other is a hardware-aware slave), the endpoint I/0 API used here
+should also be usable for an overhead-reduced host side API.
+</para>
+
+</chapter>
+
+<chapter id="structure"><title>Structure of Gadget Drivers</title>
+
+<para>A system running inside a USB peripheral
+normally has at least three layers inside the kernel to handle
+USB protocol processing, and may have additional layers in
+user space code.
+The "gadget" API is used by the middle layer to interact
+with the lowest level (which directly handles hardware).
+</para>
+
+<para>In Linux, from the bottom up, these layers are:
+</para>
+
+<variablelist>
+
+ <varlistentry>
+ <term><emphasis>USB Controller Driver</emphasis></term>
+
+ <listitem>
+ <para>This is the lowest software level.
+ It is the only layer that talks to hardware,
+ through registers, fifos, dma, irqs, and the like.
+ The <filename>&lt;linux/usb/gadget.h&gt;</filename> API abstracts
+ the peripheral controller endpoint hardware.
+ That hardware is exposed through endpoint objects, which accept
+ streams of IN/OUT buffers, and through callbacks that interact
+ with gadget drivers.
+ Since normal USB devices only have one upstream
+ port, they only have one of these drivers.
+ The controller driver can support any number of different
+ gadget drivers, but only one of them can be used at a time.
+ </para>
+
+ <para>Examples of such controller hardware include
+ the PCI-based NetChip 2280 USB 2.0 high speed controller,
+ the SA-11x0 or PXA-25x UDC (found within many PDAs),
+ and a variety of other products.
+ </para>
+
+ </listitem></varlistentry>
+
+ <varlistentry>
+ <term><emphasis>Gadget Driver</emphasis></term>
+
+ <listitem>
+ <para>The lower boundary of this driver implements hardware-neutral
+ USB functions, using calls to the controller driver.
+ Because such hardware varies widely in capabilities and restrictions,
+ and is used in embedded environments where space is at a premium,
+ the gadget driver is often configured at compile time
+ to work with endpoints supported by one particular controller.
+ Gadget drivers may be portable to several different controllers,
+ using conditional compilation.
+ (Recent kernels substantially simplify the work involved in
+ supporting new hardware, by <emphasis>autoconfiguring</emphasis>
+ endpoints automatically for many bulk-oriented drivers.)
+ Gadget driver responsibilities include:
+ </para>
+ <itemizedlist>
+ <listitem><para>handling setup requests (ep0 protocol responses)
+ possibly including class-specific functionality
+ </para></listitem>
+ <listitem><para>returning configuration and string descriptors
+ </para></listitem>
+ <listitem><para>(re)setting configurations and interface
+ altsettings, including enabling and configuring endpoints
+ </para></listitem>
+ <listitem><para>handling life cycle events, such as managing
+ bindings to hardware,
+ USB suspend/resume, remote wakeup,
+ and disconnection from the USB host.
+ </para></listitem>
+ <listitem><para>managing IN and OUT transfers on all currently
+ enabled endpoints
+ </para></listitem>
+ </itemizedlist>
+
+ <para>
+ Such drivers may be modules of proprietary code, although
+ that approach is discouraged in the Linux community.
+ </para>
+ </listitem></varlistentry>
+
+ <varlistentry>
+ <term><emphasis>Upper Level</emphasis></term>
+
+ <listitem>
+ <para>Most gadget drivers have an upper boundary that connects
+ to some Linux driver or framework in Linux.
+ Through that boundary flows the data which the gadget driver
+ produces and/or consumes through protocol transfers over USB.
+ Examples include:
+ </para>
+ <itemizedlist>
+ <listitem><para>user mode code, using generic (gadgetfs)
+ or application specific files in
+ <filename>/dev</filename>
+ </para></listitem>
+ <listitem><para>networking subsystem (for network gadgets,
+ like the CDC Ethernet Model gadget driver)
+ </para></listitem>
+ <listitem><para>data capture drivers, perhaps video4Linux or
+ a scanner driver; or test and measurement hardware.
+ </para></listitem>
+ <listitem><para>input subsystem (for HID gadgets)
+ </para></listitem>
+ <listitem><para>sound subsystem (for audio gadgets)
+ </para></listitem>
+ <listitem><para>file system (for PTP gadgets)
+ </para></listitem>
+ <listitem><para>block i/o subsystem (for usb-storage gadgets)
+ </para></listitem>
+ <listitem><para>... and more </para></listitem>
+ </itemizedlist>
+ </listitem></varlistentry>
+
+ <varlistentry>
+ <term><emphasis>Additional Layers</emphasis></term>
+
+ <listitem>
+ <para>Other layers may exist.
+ These could include kernel layers, such as network protocol stacks,
+ as well as user mode applications building on standard POSIX
+ system call APIs such as
+ <emphasis>open()</emphasis>, <emphasis>close()</emphasis>,
+ <emphasis>read()</emphasis> and <emphasis>write()</emphasis>.
+ On newer systems, POSIX Async I/O calls may be an option.
+ Such user mode code will not necessarily be subject to
+ the GNU General Public License (GPL).
+ </para>
+ </listitem></varlistentry>
+
+
+</variablelist>
+
+<para>OTG-capable systems will also need to include a standard Linux-USB
+host side stack,
+with <emphasis>usbcore</emphasis>,
+one or more <emphasis>Host Controller Drivers</emphasis> (HCDs),
+<emphasis>USB Device Drivers</emphasis> to support
+the OTG "Targeted Peripheral List",
+and so forth.
+There will also be an <emphasis>OTG Controller Driver</emphasis>,
+which is visible to gadget and device driver developers only indirectly.
+That helps the host and device side USB controllers implement the
+two new OTG protocols (HNP and SRP).
+Roles switch (host to peripheral, or vice versa) using HNP
+during USB suspend processing, and SRP can be viewed as a
+more battery-friendly kind of device wakeup protocol.
+</para>
+
+<para>Over time, reusable utilities are evolving to help make some
+gadget driver tasks simpler.
+For example, building configuration descriptors from vectors of
+descriptors for the configurations interfaces and endpoints is
+now automated, and many drivers now use autoconfiguration to
+choose hardware endpoints and initialize their descriptors.
+
+A potential example of particular interest
+is code implementing standard USB-IF protocols for
+HID, networking, storage, or audio classes.
+Some developers are interested in KDB or KGDB hooks, to let
+target hardware be remotely debugged.
+Most such USB protocol code doesn't need to be hardware-specific,
+any more than network protocols like X11, HTTP, or NFS are.
+Such gadget-side interface drivers should eventually be combined,
+to implement composite devices.
+</para>
+
+</chapter>
+
+
+<chapter id="api"><title>Kernel Mode Gadget API</title>
+
+<para>Gadget drivers declare themselves through a
+<emphasis>struct usb_gadget_driver</emphasis>, which is responsible for
+most parts of enumeration for a <emphasis>struct usb_gadget</emphasis>.
+The response to a set_configuration usually involves
+enabling one or more of the <emphasis>struct usb_ep</emphasis> objects
+exposed by the gadget, and submitting one or more
+<emphasis>struct usb_request</emphasis> buffers to transfer data.
+Understand those four data types, and their operations, and
+you will understand how this API works.
+</para>
+
+<note><title>Incomplete Data Type Descriptions</title>
+
+<para>This documentation was prepared using the standard Linux
+kernel <filename>docproc</filename> tool, which turns text
+and in-code comments into SGML DocBook and then into usable
+formats such as HTML or PDF.
+Other than the "Chapter 9" data types, most of the significant
+data types and functions are described here.
+</para>
+
+<para>However, docproc does not understand all the C constructs
+that are used, so some relevant information is likely omitted from
+what you are reading.
+One example of such information is endpoint autoconfiguration.
+You'll have to read the header file, and use example source
+code (such as that for "Gadget Zero"), to fully understand the API.
+</para>
+
+<para>The part of the API implementing some basic
+driver capabilities is specific to the version of the
+Linux kernel that's in use.
+The 2.6 kernel includes a <emphasis>driver model</emphasis>
+framework that has no analogue on earlier kernels;
+so those parts of the gadget API are not fully portable.
+(They are implemented on 2.4 kernels, but in a different way.)
+The driver model state is another part of this API that is
+ignored by the kerneldoc tools.
+</para>
+</note>
+
+<para>The core API does not expose
+every possible hardware feature, only the most widely available ones.
+There are significant hardware features, such as device-to-device DMA
+(without temporary storage in a memory buffer)
+that would be added using hardware-specific APIs.
+</para>
+
+<para>This API allows drivers to use conditional compilation to handle
+endpoint capabilities of different hardware, but doesn't require that.
+Hardware tends to have arbitrary restrictions, relating to
+transfer types, addressing, packet sizes, buffering, and availability.
+As a rule, such differences only matter for "endpoint zero" logic
+that handles device configuration and management.
+The API supports limited run-time
+detection of capabilities, through naming conventions for endpoints.
+Many drivers will be able to at least partially autoconfigure
+themselves.
+In particular, driver init sections will often have endpoint
+autoconfiguration logic that scans the hardware's list of endpoints
+to find ones matching the driver requirements
+(relying on those conventions), to eliminate some of the most
+common reasons for conditional compilation.
+</para>
+
+<para>Like the Linux-USB host side API, this API exposes
+the "chunky" nature of USB messages: I/O requests are in terms
+of one or more "packets", and packet boundaries are visible to drivers.
+Compared to RS-232 serial protocols, USB resembles
+synchronous protocols like HDLC
+(N bytes per frame, multipoint addressing, host as the primary
+station and devices as secondary stations)
+more than asynchronous ones
+(tty style: 8 data bits per frame, no parity, one stop bit).
+So for example the controller drivers won't buffer
+two single byte writes into a single two-byte USB IN packet,
+although gadget drivers may do so when they implement
+protocols where packet boundaries (and "short packets")
+are not significant.
+</para>
+
+<sect1 id="lifecycle"><title>Driver Life Cycle</title>
+
+<para>Gadget drivers make endpoint I/O requests to hardware without
+needing to know many details of the hardware, but driver
+setup/configuration code needs to handle some differences.
+Use the API like this:
+</para>
+
+<orderedlist numeration='arabic'>
+
+<listitem><para>Register a driver for the particular device side
+usb controller hardware,
+such as the net2280 on PCI (USB 2.0),
+sa11x0 or pxa25x as found in Linux PDAs,
+and so on.
+At this point the device is logically in the USB ch9 initial state
+("attached"), drawing no power and not usable
+(since it does not yet support enumeration).
+Any host should not see the device, since it's not
+activated the data line pullup used by the host to
+detect a device, even if VBUS power is available.
+</para></listitem>
+
+<listitem><para>Register a gadget driver that implements some higher level
+device function. That will then bind() to a usb_gadget, which
+activates the data line pullup sometime after detecting VBUS.
+</para></listitem>
+
+<listitem><para>The hardware driver can now start enumerating.
+The steps it handles are to accept USB power and set_address requests.
+Other steps are handled by the gadget driver.
+If the gadget driver module is unloaded before the host starts to
+enumerate, steps before step 7 are skipped.
+</para></listitem>
+
+<listitem><para>The gadget driver's setup() call returns usb descriptors,
+based both on what the bus interface hardware provides and on the
+functionality being implemented.
+That can involve alternate settings or configurations,
+unless the hardware prevents such operation.
+For OTG devices, each configuration descriptor includes
+an OTG descriptor.
+</para></listitem>
+
+<listitem><para>The gadget driver handles the last step of enumeration,
+when the USB host issues a set_configuration call.
+It enables all endpoints used in that configuration,
+with all interfaces in their default settings.
+That involves using a list of the hardware's endpoints, enabling each
+endpoint according to its descriptor.
+It may also involve using <function>usb_gadget_vbus_draw</function>
+to let more power be drawn from VBUS, as allowed by that configuration.
+For OTG devices, setting a configuration may also involve reporting
+HNP capabilities through a user interface.
+</para></listitem>
+
+<listitem><para>Do real work and perform data transfers, possibly involving
+changes to interface settings or switching to new configurations, until the
+device is disconnect()ed from the host.
+Queue any number of transfer requests to each endpoint.
+It may be suspended and resumed several times before being disconnected.
+On disconnect, the drivers go back to step 3 (above).
+</para></listitem>
+
+<listitem><para>When the gadget driver module is being unloaded,
+the driver unbind() callback is issued. That lets the controller
+driver be unloaded.
+</para></listitem>
+
+</orderedlist>
+
+<para>Drivers will normally be arranged so that just loading the
+gadget driver module (or statically linking it into a Linux kernel)
+allows the peripheral device to be enumerated, but some drivers
+will defer enumeration until some higher level component (like
+a user mode daemon) enables it.
+Note that at this lowest level there are no policies about how
+ep0 configuration logic is implemented,
+except that it should obey USB specifications.
+Such issues are in the domain of gadget drivers,
+including knowing about implementation constraints
+imposed by some USB controllers
+or understanding that composite devices might happen to
+be built by integrating reusable components.
+</para>
+
+<para>Note that the lifecycle above can be slightly different
+for OTG devices.
+Other than providing an additional OTG descriptor in each
+configuration, only the HNP-related differences are particularly
+visible to driver code.
+They involve reporting requirements during the SET_CONFIGURATION
+request, and the option to invoke HNP during some suspend callbacks.
+Also, SRP changes the semantics of
+<function>usb_gadget_wakeup</function>
+slightly.
+</para>
+
+</sect1>
+
+<sect1 id="ch9"><title>USB 2.0 Chapter 9 Types and Constants</title>
+
+<para>Gadget drivers
+rely on common USB structures and constants
+defined in the
+<filename>&lt;linux/usb/ch9.h&gt;</filename>
+header file, which is standard in Linux 2.6 kernels.
+These are the same types and constants used by host
+side drivers (and usbcore).
+</para>
+
+!Iinclude/linux/usb/ch9.h
+</sect1>
+
+<sect1 id="core"><title>Core Objects and Methods</title>
+
+<para>These are declared in
+<filename>&lt;linux/usb/gadget.h&gt;</filename>,
+and are used by gadget drivers to interact with
+USB peripheral controller drivers.
+</para>
+
+ <!-- yeech, this is ugly in nsgmls PDF output.
+
+ the PDF bookmark and refentry output nesting is wrong,
+ and the member/argument documentation indents ugly.
+
+ plus something (docproc?) adds whitespace before the
+ descriptive paragraph text, so it can't line up right
+ unless the explanations are trivial.
+ -->
+
+!Iinclude/linux/usb/gadget.h
+</sect1>
+
+<sect1 id="utils"><title>Optional Utilities</title>
+
+<para>The core API is sufficient for writing a USB Gadget Driver,
+but some optional utilities are provided to simplify common tasks.
+These utilities include endpoint autoconfiguration.
+</para>
+
+!Edrivers/usb/gadget/usbstring.c
+!Edrivers/usb/gadget/config.c
+<!-- !Edrivers/usb/gadget/epautoconf.c -->
+</sect1>
+
+<sect1 id="composite"><title>Composite Device Framework</title>
+
+<para>The core API is sufficient for writing drivers for composite
+USB devices (with more than one function in a given configuration),
+and also multi-configuration devices (also more than one function,
+but not necessarily sharing a given configuration).
+There is however an optional framework which makes it easier to
+reuse and combine functions.
+</para>
+
+<para>Devices using this framework provide a <emphasis>struct
+usb_composite_driver</emphasis>, which in turn provides one or
+more <emphasis>struct usb_configuration</emphasis> instances.
+Each such configuration includes at least one
+<emphasis>struct usb_function</emphasis>, which packages a user
+visible role such as "network link" or "mass storage device".
+Management functions may also exist, such as "Device Firmware
+Upgrade".
+</para>
+
+!Iinclude/linux/usb/composite.h
+!Edrivers/usb/gadget/composite.c
+
+</sect1>
+
+<sect1 id="functions"><title>Composite Device Functions</title>
+
+<para>At this writing, a few of the current gadget drivers have
+been converted to this framework.
+Near-term plans include converting all of them, except for "gadgetfs".
+</para>
+
+!Edrivers/usb/gadget/function/f_acm.c
+!Edrivers/usb/gadget/function/f_ecm.c
+!Edrivers/usb/gadget/function/f_subset.c
+!Edrivers/usb/gadget/function/f_obex.c
+!Edrivers/usb/gadget/function/f_serial.c
+
+</sect1>
+
+
+</chapter>
+
+<chapter id="controllers"><title>Peripheral Controller Drivers</title>
+
+<para>The first hardware supporting this API was the NetChip 2280
+controller, which supports USB 2.0 high speed and is based on PCI.
+This is the <filename>net2280</filename> driver module.
+The driver supports Linux kernel versions 2.4 and 2.6;
+contact NetChip Technologies for development boards and product
+information.
+</para>
+
+<para>Other hardware working in the "gadget" framework includes:
+Intel's PXA 25x and IXP42x series processors
+(<filename>pxa2xx_udc</filename>),
+Toshiba TC86c001 "Goku-S" (<filename>goku_udc</filename>),
+Renesas SH7705/7727 (<filename>sh_udc</filename>),
+MediaQ 11xx (<filename>mq11xx_udc</filename>),
+Hynix HMS30C7202 (<filename>h7202_udc</filename>),
+National 9303/4 (<filename>n9604_udc</filename>),
+Texas Instruments OMAP (<filename>omap_udc</filename>),
+Sharp LH7A40x (<filename>lh7a40x_udc</filename>),
+and more.
+Most of those are full speed controllers.
+</para>
+
+<para>At this writing, there are people at work on drivers in
+this framework for several other USB device controllers,
+with plans to make many of them be widely available.
+</para>
+
+<!-- !Edrivers/usb/gadget/net2280.c -->
+
+<para>A partial USB simulator,
+the <filename>dummy_hcd</filename> driver, is available.
+It can act like a net2280, a pxa25x, or an sa11x0 in terms
+of available endpoints and device speeds; and it simulates
+control, bulk, and to some extent interrupt transfers.
+That lets you develop some parts of a gadget driver on a normal PC,
+without any special hardware, and perhaps with the assistance
+of tools such as GDB running with User Mode Linux.
+At least one person has expressed interest in adapting that
+approach, hooking it up to a simulator for a microcontroller.
+Such simulators can help debug subsystems where the runtime hardware
+is unfriendly to software development, or is not yet available.
+</para>
+
+<para>Support for other controllers is expected to be developed
+and contributed
+over time, as this driver framework evolves.
+</para>
+
+</chapter>
+
+<chapter id="gadget"><title>Gadget Drivers</title>
+
+<para>In addition to <emphasis>Gadget Zero</emphasis>
+(used primarily for testing and development with drivers
+for usb controller hardware), other gadget drivers exist.
+</para>
+
+<para>There's an <emphasis>ethernet</emphasis> gadget
+driver, which implements one of the most useful
+<emphasis>Communications Device Class</emphasis> (CDC) models.
+One of the standards for cable modem interoperability even
+specifies the use of this ethernet model as one of two
+mandatory options.
+Gadgets using this code look to a USB host as if they're
+an Ethernet adapter.
+It provides access to a network where the gadget's CPU is one host,
+which could easily be bridging, routing, or firewalling
+access to other networks.
+Since some hardware can't fully implement the CDC Ethernet
+requirements, this driver also implements a "good parts only"
+subset of CDC Ethernet.
+(That subset doesn't advertise itself as CDC Ethernet,
+to avoid creating problems.)
+</para>
+
+<para>Support for Microsoft's <emphasis>RNDIS</emphasis>
+protocol has been contributed by Pengutronix and Auerswald GmbH.
+This is like CDC Ethernet, but it runs on more slightly USB hardware
+(but less than the CDC subset).
+However, its main claim to fame is being able to connect directly to
+recent versions of Windows, using drivers that Microsoft bundles
+and supports, making it much simpler to network with Windows.
+</para>
+
+<para>There is also support for user mode gadget drivers,
+using <emphasis>gadgetfs</emphasis>.
+This provides a <emphasis>User Mode API</emphasis> that presents
+each endpoint as a single file descriptor. I/O is done using
+normal <emphasis>read()</emphasis> and <emphasis>read()</emphasis> calls.
+Familiar tools like GDB and pthreads can be used to
+develop and debug user mode drivers, so that once a robust
+controller driver is available many applications for it
+won't require new kernel mode software.
+Linux 2.6 <emphasis>Async I/O (AIO)</emphasis>
+support is available, so that user mode software
+can stream data with only slightly more overhead
+than a kernel driver.
+</para>
+
+<para>There's a USB Mass Storage class driver, which provides
+a different solution for interoperability with systems such
+as MS-Windows and MacOS.
+That <emphasis>Mass Storage</emphasis> driver uses a
+file or block device as backing store for a drive,
+like the <filename>loop</filename> driver.
+The USB host uses the BBB, CB, or CBI versions of the mass
+storage class specification, using transparent SCSI commands
+to access the data from the backing store.
+</para>
+
+<para>There's a "serial line" driver, useful for TTY style
+operation over USB.
+The latest version of that driver supports CDC ACM style
+operation, like a USB modem, and so on most hardware it can
+interoperate easily with MS-Windows.
+One interesting use of that driver is in boot firmware (like a BIOS),
+which can sometimes use that model with very small systems without
+real serial lines.
+</para>
+
+<para>Support for other kinds of gadget is expected to
+be developed and contributed
+over time, as this driver framework evolves.
+</para>
+
+</chapter>
+
+<chapter id="otg"><title>USB On-The-GO (OTG)</title>
+
+<para>USB OTG support on Linux 2.6 was initially developed
+by Texas Instruments for
+<ulink url="http://www.omap.com">OMAP</ulink> 16xx and 17xx
+series processors.
+Other OTG systems should work in similar ways, but the
+hardware level details could be very different.
+</para>
+
+<para>Systems need specialized hardware support to implement OTG,
+notably including a special <emphasis>Mini-AB</emphasis> jack
+and associated transceiver to support <emphasis>Dual-Role</emphasis>
+operation:
+they can act either as a host, using the standard
+Linux-USB host side driver stack,
+or as a peripheral, using this "gadget" framework.
+To do that, the system software relies on small additions
+to those programming interfaces,
+and on a new internal component (here called an "OTG Controller")
+affecting which driver stack connects to the OTG port.
+In each role, the system can re-use the existing pool of
+hardware-neutral drivers, layered on top of the controller
+driver interfaces (<emphasis>usb_bus</emphasis> or
+<emphasis>usb_gadget</emphasis>).
+Such drivers need at most minor changes, and most of the calls
+added to support OTG can also benefit non-OTG products.
+</para>
+
+<itemizedlist>
+ <listitem><para>Gadget drivers test the <emphasis>is_otg</emphasis>
+ flag, and use it to determine whether or not to include
+ an OTG descriptor in each of their configurations.
+ </para></listitem>
+ <listitem><para>Gadget drivers may need changes to support the
+ two new OTG protocols, exposed in new gadget attributes
+ such as <emphasis>b_hnp_enable</emphasis> flag.
+ HNP support should be reported through a user interface
+ (two LEDs could suffice), and is triggered in some cases
+ when the host suspends the peripheral.
+ SRP support can be user-initiated just like remote wakeup,
+ probably by pressing the same button.
+ </para></listitem>
+ <listitem><para>On the host side, USB device drivers need
+ to be taught to trigger HNP at appropriate moments, using
+ <function>usb_suspend_device()</function>.
+ That also conserves battery power, which is useful even
+ for non-OTG configurations.
+ </para></listitem>
+ <listitem><para>Also on the host side, a driver must support the
+ OTG "Targeted Peripheral List". That's just a whitelist,
+ used to reject peripherals not supported with a given
+ Linux OTG host.
+ <emphasis>This whitelist is product-specific;
+ each product must modify <filename>otg_whitelist.h</filename>
+ to match its interoperability specification.
+ </emphasis>
+ </para>
+ <para>Non-OTG Linux hosts, like PCs and workstations,
+ normally have some solution for adding drivers, so that
+ peripherals that aren't recognized can eventually be supported.
+ That approach is unreasonable for consumer products that may
+ never have their firmware upgraded, and where it's usually
+ unrealistic to expect traditional PC/workstation/server kinds
+ of support model to work.
+ For example, it's often impractical to change device firmware
+ once the product has been distributed, so driver bugs can't
+ normally be fixed if they're found after shipment.
+ </para></listitem>
+</itemizedlist>
+
+<para>
+Additional changes are needed below those hardware-neutral
+<emphasis>usb_bus</emphasis> and <emphasis>usb_gadget</emphasis>
+driver interfaces; those aren't discussed here in any detail.
+Those affect the hardware-specific code for each USB Host or Peripheral
+controller, and how the HCD initializes (since OTG can be active only
+on a single port).
+They also involve what may be called an <emphasis>OTG Controller
+Driver</emphasis>, managing the OTG transceiver and the OTG state
+machine logic as well as much of the root hub behavior for the
+OTG port.
+The OTG controller driver needs to activate and deactivate USB
+controllers depending on the relevant device role.
+Some related changes were needed inside usbcore, so that it
+can identify OTG-capable devices and respond appropriately
+to HNP or SRP protocols.
+</para>
+
+</chapter>
+
+</book>
+<!--
+ vim:syntax=sgml:sw=4
+-->
diff --git a/Documentation/DocBook/genericirq.tmpl b/Documentation/DocBook/genericirq.tmpl
new file mode 100644
index 000000000..59fb5c077
--- /dev/null
+++ b/Documentation/DocBook/genericirq.tmpl
@@ -0,0 +1,520 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Generic-IRQ-Guide">
+ <bookinfo>
+ <title>Linux generic IRQ handling</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Ingo</firstname>
+ <surname>Molnar</surname>
+ <affiliation>
+ <address>
+ <email>mingo@elte.hu</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2005-2010</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+ <copyright>
+ <year>2005-2006</year>
+ <holder>Ingo Molnar</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The generic interrupt handling layer is designed to provide a
+ complete abstraction of interrupt handling for device drivers.
+ It is able to handle all the different types of interrupt controller
+ hardware. Device drivers use generic API functions to request, enable,
+ disable and free interrupts. The drivers do not have to know anything
+ about interrupt hardware details, so they can be used on different
+ platforms without code changes.
+ </para>
+ <para>
+ This documentation is provided to developers who want to implement
+ an interrupt subsystem based for their architecture, with the help
+ of the generic IRQ handling layer.
+ </para>
+ </chapter>
+
+ <chapter id="rationale">
+ <title>Rationale</title>
+ <para>
+ The original implementation of interrupt handling in Linux uses
+ the __do_IRQ() super-handler, which is able to deal with every
+ type of interrupt logic.
+ </para>
+ <para>
+ Originally, Russell King identified different types of handlers to
+ build a quite universal set for the ARM interrupt handler
+ implementation in Linux 2.5/2.6. He distinguished between:
+ <itemizedlist>
+ <listitem><para>Level type</para></listitem>
+ <listitem><para>Edge type</para></listitem>
+ <listitem><para>Simple type</para></listitem>
+ </itemizedlist>
+ During the implementation we identified another type:
+ <itemizedlist>
+ <listitem><para>Fast EOI type</para></listitem>
+ </itemizedlist>
+ In the SMP world of the __do_IRQ() super-handler another type
+ was identified:
+ <itemizedlist>
+ <listitem><para>Per CPU type</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ This split implementation of high-level IRQ handlers allows us to
+ optimize the flow of the interrupt handling for each specific
+ interrupt type. This reduces complexity in that particular code path
+ and allows the optimized handling of a given type.
+ </para>
+ <para>
+ The original general IRQ implementation used hw_interrupt_type
+ structures and their ->ack(), ->end() [etc.] callbacks to
+ differentiate the flow control in the super-handler. This leads to
+ a mix of flow logic and low-level hardware logic, and it also leads
+ to unnecessary code duplication: for example in i386, there is an
+ ioapic_level_irq and an ioapic_edge_irq IRQ-type which share many
+ of the low-level details but have different flow handling.
+ </para>
+ <para>
+ A more natural abstraction is the clean separation of the
+ 'irq flow' and the 'chip details'.
+ </para>
+ <para>
+ Analysing a couple of architecture's IRQ subsystem implementations
+ reveals that most of them can use a generic set of 'irq flow'
+ methods and only need to add the chip-level specific code.
+ The separation is also valuable for (sub)architectures
+ which need specific quirks in the IRQ flow itself but not in the
+ chip details - and thus provides a more transparent IRQ subsystem
+ design.
+ </para>
+ <para>
+ Each interrupt descriptor is assigned its own high-level flow
+ handler, which is normally one of the generic
+ implementations. (This high-level flow handler implementation also
+ makes it simple to provide demultiplexing handlers which can be
+ found in embedded platforms on various architectures.)
+ </para>
+ <para>
+ The separation makes the generic interrupt handling layer more
+ flexible and extensible. For example, an (sub)architecture can
+ use a generic IRQ-flow implementation for 'level type' interrupts
+ and add a (sub)architecture specific 'edge type' implementation.
+ </para>
+ <para>
+ To make the transition to the new model easier and prevent the
+ breakage of existing implementations, the __do_IRQ() super-handler
+ is still available. This leads to a kind of duality for the time
+ being. Over time the new model should be used in more and more
+ architectures, as it enables smaller and cleaner IRQ subsystems.
+ It's deprecated for three years now and about to be removed.
+ </para>
+ </chapter>
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None (knock on wood).
+ </para>
+ </chapter>
+
+ <chapter id="Abstraction">
+ <title>Abstraction layers</title>
+ <para>
+ There are three main levels of abstraction in the interrupt code:
+ <orderedlist>
+ <listitem><para>High-level driver API</para></listitem>
+ <listitem><para>High-level IRQ flow handlers</para></listitem>
+ <listitem><para>Chip-level hardware encapsulation</para></listitem>
+ </orderedlist>
+ </para>
+ <sect1 id="Interrupt_control_flow">
+ <title>Interrupt control flow</title>
+ <para>
+ Each interrupt is described by an interrupt descriptor structure
+ irq_desc. The interrupt is referenced by an 'unsigned int' numeric
+ value which selects the corresponding interrupt description structure
+ in the descriptor structures array.
+ The descriptor structure contains status information and pointers
+ to the interrupt flow method and the interrupt chip structure
+ which are assigned to this interrupt.
+ </para>
+ <para>
+ Whenever an interrupt triggers, the low-level architecture code calls
+ into the generic interrupt code by calling desc->handle_irq().
+ This high-level IRQ handling function only uses desc->irq_data.chip
+ primitives referenced by the assigned chip descriptor structure.
+ </para>
+ </sect1>
+ <sect1 id="Highlevel_Driver_API">
+ <title>High-level Driver API</title>
+ <para>
+ The high-level Driver API consists of following functions:
+ <itemizedlist>
+ <listitem><para>request_irq()</para></listitem>
+ <listitem><para>free_irq()</para></listitem>
+ <listitem><para>disable_irq()</para></listitem>
+ <listitem><para>enable_irq()</para></listitem>
+ <listitem><para>disable_irq_nosync() (SMP only)</para></listitem>
+ <listitem><para>synchronize_irq() (SMP only)</para></listitem>
+ <listitem><para>irq_set_irq_type()</para></listitem>
+ <listitem><para>irq_set_irq_wake()</para></listitem>
+ <listitem><para>irq_set_handler_data()</para></listitem>
+ <listitem><para>irq_set_chip()</para></listitem>
+ <listitem><para>irq_set_chip_data()</para></listitem>
+ </itemizedlist>
+ See the autogenerated function documentation for details.
+ </para>
+ </sect1>
+ <sect1 id="Highlevel_IRQ_flow_handlers">
+ <title>High-level IRQ flow handlers</title>
+ <para>
+ The generic layer provides a set of pre-defined irq-flow methods:
+ <itemizedlist>
+ <listitem><para>handle_level_irq</para></listitem>
+ <listitem><para>handle_edge_irq</para></listitem>
+ <listitem><para>handle_fasteoi_irq</para></listitem>
+ <listitem><para>handle_simple_irq</para></listitem>
+ <listitem><para>handle_percpu_irq</para></listitem>
+ <listitem><para>handle_edge_eoi_irq</para></listitem>
+ <listitem><para>handle_bad_irq</para></listitem>
+ </itemizedlist>
+ The interrupt flow handlers (either pre-defined or architecture
+ specific) are assigned to specific interrupts by the architecture
+ either during bootup or during device initialization.
+ </para>
+ <sect2 id="Default_flow_implementations">
+ <title>Default flow implementations</title>
+ <sect3 id="Helper_functions">
+ <title>Helper functions</title>
+ <para>
+ The helper functions call the chip primitives and
+ are used by the default flow implementations.
+ The following helper functions are implemented (simplified excerpt):
+ <programlisting>
+default_enable(struct irq_data *data)
+{
+ desc->irq_data.chip->irq_unmask(data);
+}
+
+default_disable(struct irq_data *data)
+{
+ if (!delay_disable(data))
+ desc->irq_data.chip->irq_mask(data);
+}
+
+default_ack(struct irq_data *data)
+{
+ chip->irq_ack(data);
+}
+
+default_mask_ack(struct irq_data *data)
+{
+ if (chip->irq_mask_ack) {
+ chip->irq_mask_ack(data);
+ } else {
+ chip->irq_mask(data);
+ chip->irq_ack(data);
+ }
+}
+
+noop(struct irq_data *data))
+{
+}
+
+ </programlisting>
+ </para>
+ </sect3>
+ </sect2>
+ <sect2 id="Default_flow_handler_implementations">
+ <title>Default flow handler implementations</title>
+ <sect3 id="Default_Level_IRQ_flow_handler">
+ <title>Default Level IRQ flow handler</title>
+ <para>
+ handle_level_irq provides a generic implementation
+ for level-triggered interrupts.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+desc->irq_data.chip->irq_mask_ack();
+handle_irq_event(desc->action);
+desc->irq_data.chip->irq_unmask();
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_FASTEOI_IRQ_flow_handler">
+ <title>Default Fast EOI IRQ flow handler</title>
+ <para>
+ handle_fasteoi_irq provides a generic implementation
+ for interrupts, which only need an EOI at the end of
+ the handler.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+handle_irq_event(desc->action);
+desc->irq_data.chip->irq_eoi();
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_Edge_IRQ_flow_handler">
+ <title>Default Edge IRQ flow handler</title>
+ <para>
+ handle_edge_irq provides a generic implementation
+ for edge-triggered interrupts.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+if (desc->status &amp; running) {
+ desc->irq_data.chip->irq_mask_ack();
+ desc->status |= pending | masked;
+ return;
+}
+desc->irq_data.chip->irq_ack();
+desc->status |= running;
+do {
+ if (desc->status &amp; masked)
+ desc->irq_data.chip->irq_unmask();
+ desc->status &amp;= ~pending;
+ handle_irq_event(desc->action);
+} while (status &amp; pending);
+desc->status &amp;= ~running;
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_simple_IRQ_flow_handler">
+ <title>Default simple IRQ flow handler</title>
+ <para>
+ handle_simple_irq provides a generic implementation
+ for simple interrupts.
+ </para>
+ <para>
+ Note: The simple flow handler does not call any
+ handler/chip primitives.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+handle_irq_event(desc->action);
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="Default_per_CPU_flow_handler">
+ <title>Default per CPU flow handler</title>
+ <para>
+ handle_percpu_irq provides a generic implementation
+ for per CPU interrupts.
+ </para>
+ <para>
+ Per CPU interrupts are only available on SMP and
+ the handler provides a simplified version without
+ locking.
+ </para>
+ <para>
+ The following control flow is implemented (simplified excerpt):
+ <programlisting>
+if (desc->irq_data.chip->irq_ack)
+ desc->irq_data.chip->irq_ack();
+handle_irq_event(desc->action);
+if (desc->irq_data.chip->irq_eoi)
+ desc->irq_data.chip->irq_eoi();
+ </programlisting>
+ </para>
+ </sect3>
+ <sect3 id="EOI_Edge_IRQ_flow_handler">
+ <title>EOI Edge IRQ flow handler</title>
+ <para>
+ handle_edge_eoi_irq provides an abnomination of the edge
+ handler which is solely used to tame a badly wreckaged
+ irq controller on powerpc/cell.
+ </para>
+ </sect3>
+ <sect3 id="BAD_IRQ_flow_handler">
+ <title>Bad IRQ flow handler</title>
+ <para>
+ handle_bad_irq is used for spurious interrupts which
+ have no real handler assigned..
+ </para>
+ </sect3>
+ </sect2>
+ <sect2 id="Quirks_and_optimizations">
+ <title>Quirks and optimizations</title>
+ <para>
+ The generic functions are intended for 'clean' architectures and chips,
+ which have no platform-specific IRQ handling quirks. If an architecture
+ needs to implement quirks on the 'flow' level then it can do so by
+ overriding the high-level irq-flow handler.
+ </para>
+ </sect2>
+ <sect2 id="Delayed_interrupt_disable">
+ <title>Delayed interrupt disable</title>
+ <para>
+ This per interrupt selectable feature, which was introduced by Russell
+ King in the ARM interrupt implementation, does not mask an interrupt
+ at the hardware level when disable_irq() is called. The interrupt is
+ kept enabled and is masked in the flow handler when an interrupt event
+ happens. This prevents losing edge interrupts on hardware which does
+ not store an edge interrupt event while the interrupt is disabled at
+ the hardware level. When an interrupt arrives while the IRQ_DISABLED
+ flag is set, then the interrupt is masked at the hardware level and
+ the IRQ_PENDING bit is set. When the interrupt is re-enabled by
+ enable_irq() the pending bit is checked and if it is set, the
+ interrupt is resent either via hardware or by a software resend
+ mechanism. (It's necessary to enable CONFIG_HARDIRQS_SW_RESEND when
+ you want to use the delayed interrupt disable feature and your
+ hardware is not capable of retriggering an interrupt.)
+ The delayed interrupt disable is not configurable.
+ </para>
+ </sect2>
+ </sect1>
+ <sect1 id="Chiplevel_hardware_encapsulation">
+ <title>Chip-level hardware encapsulation</title>
+ <para>
+ The chip-level hardware descriptor structure irq_chip
+ contains all the direct chip relevant functions, which
+ can be utilized by the irq flow implementations.
+ <itemizedlist>
+ <listitem><para>irq_ack()</para></listitem>
+ <listitem><para>irq_mask_ack() - Optional, recommended for performance</para></listitem>
+ <listitem><para>irq_mask()</para></listitem>
+ <listitem><para>irq_unmask()</para></listitem>
+ <listitem><para>irq_eoi() - Optional, required for EOI flow handlers</para></listitem>
+ <listitem><para>irq_retrigger() - Optional</para></listitem>
+ <listitem><para>irq_set_type() - Optional</para></listitem>
+ <listitem><para>irq_set_wake() - Optional</para></listitem>
+ </itemizedlist>
+ These primitives are strictly intended to mean what they say: ack means
+ ACK, masking means masking of an IRQ line, etc. It is up to the flow
+ handler(s) to use these basic units of low-level functionality.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="doirq">
+ <title>__do_IRQ entry point</title>
+ <para>
+ The original implementation __do_IRQ() was an alternative entry
+ point for all types of interrupts. It no longer exists.
+ </para>
+ <para>
+ This handler turned out to be not suitable for all
+ interrupt hardware and was therefore reimplemented with split
+ functionality for edge/level/simple/percpu interrupts. This is not
+ only a functional optimization. It also shortens code paths for
+ interrupts.
+ </para>
+ </chapter>
+
+ <chapter id="locking">
+ <title>Locking on SMP</title>
+ <para>
+ The locking of chip registers is up to the architecture that
+ defines the chip primitives. The per-irq structure is
+ protected via desc->lock, by the generic layer.
+ </para>
+ </chapter>
+
+ <chapter id="genericchip">
+ <title>Generic interrupt chip</title>
+ <para>
+ To avoid copies of identical implementations of IRQ chips the
+ core provides a configurable generic interrupt chip
+ implementation. Developers should check carefully whether the
+ generic chip fits their needs before implementing the same
+ functionality slightly differently themselves.
+ </para>
+!Ekernel/irq/generic-chip.c
+ </chapter>
+
+ <chapter id="structs">
+ <title>Structures</title>
+ <para>
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the generic IRQ layer.
+ </para>
+!Iinclude/linux/irq.h
+!Iinclude/linux/interrupt.h
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the kernel API functions
+ which are exported.
+ </para>
+!Ekernel/irq/manage.c
+!Ekernel/irq/chip.c
+ </chapter>
+
+ <chapter id="intfunctions">
+ <title>Internal Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the internal functions.
+ </para>
+!Ikernel/irq/irqdesc.c
+!Ikernel/irq/handle.c
+!Ikernel/irq/chip.c
+ </chapter>
+
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The following people have contributed to this document:
+ <orderedlist>
+ <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+ <listitem><para>Ingo Molnar<email>mingo@elte.hu</email></para></listitem>
+ </orderedlist>
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
new file mode 100644
index 000000000..ecfd0ea40
--- /dev/null
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -0,0 +1,331 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxKernelAPI">
+ <bookinfo>
+ <title>The Linux Kernel API</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="adt">
+ <title>Data Types</title>
+ <sect1><title>Doubly Linked Lists</title>
+!Iinclude/linux/list.h
+ </sect1>
+ </chapter>
+
+ <chapter id="libc">
+ <title>Basic C Library Functions</title>
+
+ <para>
+ When writing drivers, you cannot in general use routines which are
+ from the C Library. Some of the functions have been found generally
+ useful and they are listed below. The behaviour of these functions
+ may vary slightly from those defined by ANSI, and these deviations
+ are noted in the text.
+ </para>
+
+ <sect1><title>String Conversions</title>
+!Elib/vsprintf.c
+!Finclude/linux/kernel.h kstrtol
+!Finclude/linux/kernel.h kstrtoul
+!Elib/kstrtox.c
+ </sect1>
+ <sect1><title>String Manipulation</title>
+<!-- All functions are exported at now
+X!Ilib/string.c
+ -->
+!Elib/string.c
+ </sect1>
+ <sect1><title>Bit Operations</title>
+!Iarch/x86/include/asm/bitops.h
+ </sect1>
+ </chapter>
+
+ <chapter id="kernel-lib">
+ <title>Basic Kernel Library Functions</title>
+
+ <para>
+ The Linux kernel provides more basic utility functions.
+ </para>
+
+ <sect1><title>Bitmap Operations</title>
+!Elib/bitmap.c
+!Ilib/bitmap.c
+ </sect1>
+
+ <sect1><title>Command-line Parsing</title>
+!Elib/cmdline.c
+ </sect1>
+
+ <sect1 id="crc"><title>CRC Functions</title>
+!Elib/crc7.c
+!Elib/crc16.c
+!Elib/crc-itu-t.c
+!Elib/crc32.c
+!Elib/crc-ccitt.c
+ </sect1>
+
+ <sect1 id="idr"><title>idr/ida Functions</title>
+!Pinclude/linux/idr.h idr sync
+!Plib/idr.c IDA description
+!Elib/idr.c
+ </sect1>
+ </chapter>
+
+ <chapter id="mm">
+ <title>Memory Management in Linux</title>
+ <sect1><title>The Slab Cache</title>
+!Iinclude/linux/slab.h
+!Emm/slab.c
+!Emm/util.c
+ </sect1>
+ <sect1><title>User Space Memory Access</title>
+!Iarch/x86/include/asm/uaccess_32.h
+!Earch/x86/lib/usercopy_32.c
+ </sect1>
+ <sect1><title>More Memory Management Functions</title>
+!Emm/readahead.c
+!Emm/filemap.c
+!Emm/memory.c
+!Emm/vmalloc.c
+!Imm/page_alloc.c
+!Emm/mempool.c
+!Emm/dmapool.c
+!Emm/page-writeback.c
+!Emm/truncate.c
+ </sect1>
+ </chapter>
+
+
+ <chapter id="ipc">
+ <title>Kernel IPC facilities</title>
+
+ <sect1><title>IPC utilities</title>
+!Iipc/util.c
+ </sect1>
+ </chapter>
+
+ <chapter id="kfifo">
+ <title>FIFO Buffer</title>
+ <sect1><title>kfifo interface</title>
+!Iinclude/linux/kfifo.h
+ </sect1>
+ </chapter>
+
+ <chapter id="relayfs">
+ <title>relay interface support</title>
+
+ <para>
+ Relay interface support
+ is designed to provide an efficient mechanism for tools and
+ facilities to relay large amounts of data from kernel space to
+ user space.
+ </para>
+
+ <sect1><title>relay interface</title>
+!Ekernel/relay.c
+!Ikernel/relay.c
+ </sect1>
+ </chapter>
+
+ <chapter id="modload">
+ <title>Module Support</title>
+ <sect1><title>Module Loading</title>
+!Ekernel/kmod.c
+ </sect1>
+ <sect1><title>Inter Module support</title>
+ <para>
+ Refer to the file kernel/module.c for more information.
+ </para>
+<!-- FIXME: Removed for now since no structured comments in source
+X!Ekernel/module.c
+-->
+ </sect1>
+ </chapter>
+
+ <chapter id="hardware">
+ <title>Hardware Interfaces</title>
+ <sect1><title>Interrupt Handling</title>
+!Ekernel/irq/manage.c
+ </sect1>
+
+ <sect1><title>DMA Channels</title>
+!Ekernel/dma.c
+ </sect1>
+
+ <sect1><title>Resources Management</title>
+!Ikernel/resource.c
+!Ekernel/resource.c
+ </sect1>
+
+ <sect1><title>MTRR Handling</title>
+!Earch/x86/kernel/cpu/mtrr/main.c
+ </sect1>
+
+ <sect1><title>PCI Support Library</title>
+!Edrivers/pci/pci.c
+!Edrivers/pci/pci-driver.c
+!Edrivers/pci/remove.c
+!Edrivers/pci/search.c
+!Edrivers/pci/msi.c
+!Edrivers/pci/bus.c
+!Edrivers/pci/access.c
+!Edrivers/pci/irq.c
+!Edrivers/pci/htirq.c
+<!-- FIXME: Removed for now since no structured comments in source
+X!Edrivers/pci/hotplug.c
+-->
+!Edrivers/pci/probe.c
+!Edrivers/pci/slot.c
+!Edrivers/pci/rom.c
+!Edrivers/pci/iov.c
+!Idrivers/pci/pci-sysfs.c
+ </sect1>
+ <sect1><title>PCI Hotplug Support Library</title>
+!Edrivers/pci/hotplug/pci_hotplug_core.c
+ </sect1>
+ </chapter>
+
+ <chapter id="firmware">
+ <title>Firmware Interfaces</title>
+ <sect1><title>DMI Interfaces</title>
+!Edrivers/firmware/dmi_scan.c
+ </sect1>
+ <sect1><title>EDD Interfaces</title>
+!Idrivers/firmware/edd.c
+ </sect1>
+ </chapter>
+
+ <chapter id="security">
+ <title>Security Framework</title>
+!Isecurity/security.c
+!Esecurity/inode.c
+ </chapter>
+
+ <chapter id="audit">
+ <title>Audit Interfaces</title>
+!Ekernel/audit.c
+!Ikernel/auditsc.c
+!Ikernel/auditfilter.c
+ </chapter>
+
+ <chapter id="accounting">
+ <title>Accounting Framework</title>
+!Ikernel/acct.c
+ </chapter>
+
+ <chapter id="blkdev">
+ <title>Block Devices</title>
+!Eblock/blk-core.c
+!Iblock/blk-core.c
+!Eblock/blk-map.c
+!Iblock/blk-sysfs.c
+!Eblock/blk-settings.c
+!Eblock/blk-exec.c
+!Eblock/blk-flush.c
+!Eblock/blk-lib.c
+!Eblock/blk-tag.c
+!Iblock/blk-tag.c
+!Eblock/blk-integrity.c
+!Ikernel/trace/blktrace.c
+!Iblock/genhd.c
+!Eblock/genhd.c
+ </chapter>
+
+ <chapter id="chrdev">
+ <title>Char devices</title>
+!Efs/char_dev.c
+ </chapter>
+
+ <chapter id="miscdev">
+ <title>Miscellaneous Devices</title>
+!Edrivers/char/misc.c
+ </chapter>
+
+ <chapter id="clk">
+ <title>Clock Framework</title>
+
+ <para>
+ The clock framework defines programming interfaces to support
+ software management of the system clock tree.
+ This framework is widely used with System-On-Chip (SOC) platforms
+ to support power management and various devices which may need
+ custom clock rates.
+ Note that these "clocks" don't relate to timekeeping or real
+ time clocks (RTCs), each of which have separate frameworks.
+ These <structname>struct clk</structname> instances may be used
+ to manage for example a 96 MHz signal that is used to shift bits
+ into and out of peripherals or busses, or otherwise trigger
+ synchronous state machine transitions in system hardware.
+ </para>
+
+ <para>
+ Power management is supported by explicit software clock gating:
+ unused clocks are disabled, so the system doesn't waste power
+ changing the state of transistors that aren't in active use.
+ On some systems this may be backed by hardware clock gating,
+ where clocks are gated without being disabled in software.
+ Sections of chips that are powered but not clocked may be able
+ to retain their last state.
+ This low power state is often called a <emphasis>retention
+ mode</emphasis>.
+ This mode still incurs leakage currents, especially with finer
+ circuit geometries, but for CMOS circuits power is mostly used
+ by clocked state changes.
+ </para>
+
+ <para>
+ Power-aware drivers only enable their clocks when the device
+ they manage is in active use. Also, system sleep states often
+ differ according to which clock domains are active: while a
+ "standby" state may allow wakeup from several active domains, a
+ "mem" (suspend-to-RAM) state may require a more wholesale shutdown
+ of clocks derived from higher speed PLLs and oscillators, limiting
+ the number of possible wakeup event sources. A driver's suspend
+ method may need to be aware of system-specific clock constraints
+ on the target sleep state.
+ </para>
+
+ <para>
+ Some platforms support programmable clock generators. These
+ can be used by external chips of various kinds, such as other
+ CPUs, multimedia codecs, and devices with strict requirements
+ for interface clocking.
+ </para>
+
+!Iinclude/linux/clk.h
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
new file mode 100644
index 000000000..e84f09467
--- /dev/null
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -0,0 +1,1310 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="lk-hacking-guide">
+ <bookinfo>
+ <title>Unreliable Guide To Hacking The Linux Kernel</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Rusty</firstname>
+ <surname>Russell</surname>
+ <affiliation>
+ <address>
+ <email>rusty@rustcorp.com.au</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2005</year>
+ <holder>Rusty Russell</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+
+ <releaseinfo>
+ This is the first release of this document as part of the kernel tarball.
+ </releaseinfo>
+
+ </bookinfo>
+
+ <toc></toc>
+
+ <chapter id="introduction">
+ <title>Introduction</title>
+ <para>
+ Welcome, gentle reader, to Rusty's Remarkably Unreliable Guide to Linux
+ Kernel Hacking. This document describes the common routines and
+ general requirements for kernel code: its goal is to serve as a
+ primer for Linux kernel development for experienced C
+ programmers. I avoid implementation details: that's what the
+ code is for, and I ignore whole tracts of useful routines.
+ </para>
+ <para>
+ Before you read this, please understand that I never wanted to
+ write this document, being grossly under-qualified, but I always
+ wanted to read it, and this was the only way. I hope it will
+ grow into a compendium of best practice, common starting points
+ and random information.
+ </para>
+ </chapter>
+
+ <chapter id="basic-players">
+ <title>The Players</title>
+
+ <para>
+ At any time each of the CPUs in a system can be:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ not associated with any process, serving a hardware interrupt;
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ not associated with any process, serving a softirq or tasklet;
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ running in kernel space, associated with a process (user context);
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ running a process in user space.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ There is an ordering between these. The bottom two can preempt
+ each other, but above that is a strict hierarchy: each can only be
+ preempted by the ones above it. For example, while a softirq is
+ running on a CPU, no other softirq will preempt it, but a hardware
+ interrupt can. However, any other CPUs in the system execute
+ independently.
+ </para>
+
+ <para>
+ We'll see a number of ways that the user context can block
+ interrupts, to become truly non-preemptable.
+ </para>
+
+ <sect1 id="basics-usercontext">
+ <title>User Context</title>
+
+ <para>
+ User context is when you are coming in from a system call or other
+ trap: like userspace, you can be preempted by more important tasks
+ and by interrupts. You can sleep, by calling
+ <function>schedule()</function>.
+ </para>
+
+ <note>
+ <para>
+ You are always in user context on module load and unload,
+ and on operations on the block device layer.
+ </para>
+ </note>
+
+ <para>
+ In user context, the <varname>current</varname> pointer (indicating
+ the task we are currently executing) is valid, and
+ <function>in_interrupt()</function>
+ (<filename>include/linux/interrupt.h</filename>) is <returnvalue>false
+ </returnvalue>.
+ </para>
+
+ <caution>
+ <para>
+ Beware that if you have preemption or softirqs disabled
+ (see below), <function>in_interrupt()</function> will return a
+ false positive.
+ </para>
+ </caution>
+ </sect1>
+
+ <sect1 id="basics-hardirqs">
+ <title>Hardware Interrupts (Hard IRQs)</title>
+
+ <para>
+ Timer ticks, <hardware>network cards</hardware> and
+ <hardware>keyboard</hardware> are examples of real
+ hardware which produce interrupts at any time. The kernel runs
+ interrupt handlers, which services the hardware. The kernel
+ guarantees that this handler is never re-entered: if the same
+ interrupt arrives, it is queued (or dropped). Because it
+ disables interrupts, this handler has to be fast: frequently it
+ simply acknowledges the interrupt, marks a 'software interrupt'
+ for execution and exits.
+ </para>
+
+ <para>
+ You can tell you are in a hardware interrupt, because
+ <function>in_irq()</function> returns <returnvalue>true</returnvalue>.
+ </para>
+ <caution>
+ <para>
+ Beware that this will return a false positive if interrupts are disabled
+ (see below).
+ </para>
+ </caution>
+ </sect1>
+
+ <sect1 id="basics-softirqs">
+ <title>Software Interrupt Context: Softirqs and Tasklets</title>
+
+ <para>
+ Whenever a system call is about to return to userspace, or a
+ hardware interrupt handler exits, any 'software interrupts'
+ which are marked pending (usually by hardware interrupts) are
+ run (<filename>kernel/softirq.c</filename>).
+ </para>
+
+ <para>
+ Much of the real interrupt handling work is done here. Early in
+ the transition to <acronym>SMP</acronym>, there were only 'bottom
+ halves' (BHs), which didn't take advantage of multiple CPUs. Shortly
+ after we switched from wind-up computers made of match-sticks and snot,
+ we abandoned this limitation and switched to 'softirqs'.
+ </para>
+
+ <para>
+ <filename class="headerfile">include/linux/interrupt.h</filename> lists the
+ different softirqs. A very important softirq is the
+ timer softirq (<filename
+ class="headerfile">include/linux/timer.h</filename>): you can
+ register to have it call functions for you in a given length of
+ time.
+ </para>
+
+ <para>
+ Softirqs are often a pain to deal with, since the same softirq
+ will run simultaneously on more than one CPU. For this reason,
+ tasklets (<filename
+ class="headerfile">include/linux/interrupt.h</filename>) are more
+ often used: they are dynamically-registrable (meaning you can have
+ as many as you want), and they also guarantee that any tasklet
+ will only run on one CPU at any time, although different tasklets
+ can run simultaneously.
+ </para>
+ <caution>
+ <para>
+ The name 'tasklet' is misleading: they have nothing to do with 'tasks',
+ and probably more to do with some bad vodka Alexey Kuznetsov had at the
+ time.
+ </para>
+ </caution>
+
+ <para>
+ You can tell you are in a softirq (or tasklet)
+ using the <function>in_softirq()</function> macro
+ (<filename class="headerfile">include/linux/interrupt.h</filename>).
+ </para>
+ <caution>
+ <para>
+ Beware that this will return a false positive if a bh lock (see below)
+ is held.
+ </para>
+ </caution>
+ </sect1>
+ </chapter>
+
+ <chapter id="basic-rules">
+ <title>Some Basic Rules</title>
+
+ <variablelist>
+ <varlistentry>
+ <term>No memory protection</term>
+ <listitem>
+ <para>
+ If you corrupt memory, whether in user context or
+ interrupt context, the whole machine will crash. Are you
+ sure you can't do what you want in userspace?
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>No floating point or <acronym>MMX</acronym></term>
+ <listitem>
+ <para>
+ The <acronym>FPU</acronym> context is not saved; even in user
+ context the <acronym>FPU</acronym> state probably won't
+ correspond with the current process: you would mess with some
+ user process' <acronym>FPU</acronym> state. If you really want
+ to do this, you would have to explicitly save/restore the full
+ <acronym>FPU</acronym> state (and avoid context switches). It
+ is generally a bad idea; use fixed point arithmetic first.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>A rigid stack limit</term>
+ <listitem>
+ <para>
+ Depending on configuration options the kernel stack is about 3K to 6K for most 32-bit architectures: it's
+ about 14K on most 64-bit archs, and often shared with interrupts
+ so you can't use it all. Avoid deep recursion and huge local
+ arrays on the stack (allocate them dynamically instead).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>The Linux kernel is portable</term>
+ <listitem>
+ <para>
+ Let's keep it that way. Your code should be 64-bit clean,
+ and endian-independent. You should also minimize CPU
+ specific stuff, e.g. inline assembly should be cleanly
+ encapsulated and minimized to ease porting. Generally it
+ should be restricted to the architecture-dependent part of
+ the kernel tree.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </chapter>
+
+ <chapter id="ioctls">
+ <title>ioctls: Not writing a new system call</title>
+
+ <para>
+ A system call generally looks like this
+ </para>
+
+ <programlisting>
+asmlinkage long sys_mycall(int arg)
+{
+ return 0;
+}
+ </programlisting>
+
+ <para>
+ First, in most cases you don't want to create a new system call.
+ You create a character device and implement an appropriate ioctl
+ for it. This is much more flexible than system calls, doesn't have
+ to be entered in every architecture's
+ <filename class="headerfile">include/asm/unistd.h</filename> and
+ <filename>arch/kernel/entry.S</filename> file, and is much more
+ likely to be accepted by Linus.
+ </para>
+
+ <para>
+ If all your routine does is read or write some parameter, consider
+ implementing a <function>sysfs</function> interface instead.
+ </para>
+
+ <para>
+ Inside the ioctl you're in user context to a process. When a
+ error occurs you return a negated errno (see
+ <filename class="headerfile">include/linux/errno.h</filename>),
+ otherwise you return <returnvalue>0</returnvalue>.
+ </para>
+
+ <para>
+ After you slept you should check if a signal occurred: the
+ Unix/Linux way of handling signals is to temporarily exit the
+ system call with the <constant>-ERESTARTSYS</constant> error. The
+ system call entry code will switch back to user context, process
+ the signal handler and then your system call will be restarted
+ (unless the user disabled that). So you should be prepared to
+ process the restart, e.g. if you're in the middle of manipulating
+ some data structure.
+ </para>
+
+ <programlisting>
+if (signal_pending(current))
+ return -ERESTARTSYS;
+ </programlisting>
+
+ <para>
+ If you're doing longer computations: first think userspace. If you
+ <emphasis>really</emphasis> want to do it in kernel you should
+ regularly check if you need to give up the CPU (remember there is
+ cooperative multitasking per CPU). Idiom:
+ </para>
+
+ <programlisting>
+cond_resched(); /* Will sleep */
+ </programlisting>
+
+ <para>
+ A short note on interface design: the UNIX system call motto is
+ "Provide mechanism not policy".
+ </para>
+ </chapter>
+
+ <chapter id="deadlock-recipes">
+ <title>Recipes for Deadlock</title>
+
+ <para>
+ You cannot call any routines which may sleep, unless:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ You are in user context.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ You do not own any spinlocks.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ You have interrupts enabled (actually, Andi Kleen says
+ that the scheduling code will enable them for you, but
+ that's probably not what you wanted).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Note that some functions may sleep implicitly: common ones are
+ the user space access functions (*_user) and memory allocation
+ functions without <symbol>GFP_ATOMIC</symbol>.
+ </para>
+
+ <para>
+ You should always compile your kernel
+ <symbol>CONFIG_DEBUG_ATOMIC_SLEEP</symbol> on, and it will warn
+ you if you break these rules. If you <emphasis>do</emphasis> break
+ the rules, you will eventually lock up your box.
+ </para>
+
+ <para>
+ Really.
+ </para>
+ </chapter>
+
+ <chapter id="common-routines">
+ <title>Common Routines</title>
+
+ <sect1 id="routines-printk">
+ <title>
+ <function>printk()</function>
+ <filename class="headerfile">include/linux/kernel.h</filename>
+ </title>
+
+ <para>
+ <function>printk()</function> feeds kernel messages to the
+ console, dmesg, and the syslog daemon. It is useful for debugging
+ and reporting errors, and can be used inside interrupt context,
+ but use with caution: a machine which has its console flooded with
+ printk messages is unusable. It uses a format string mostly
+ compatible with ANSI C printf, and C string concatenation to give
+ it a first "priority" argument:
+ </para>
+
+ <programlisting>
+printk(KERN_INFO "i = %u\n", i);
+ </programlisting>
+
+ <para>
+ See <filename class="headerfile">include/linux/kernel.h</filename>;
+ for other KERN_ values; these are interpreted by syslog as the
+ level. Special case: for printing an IP address use
+ </para>
+
+ <programlisting>
+__be32 ipaddress;
+printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
+ </programlisting>
+
+ <para>
+ <function>printk()</function> internally uses a 1K buffer and does
+ not catch overruns. Make sure that will be enough.
+ </para>
+
+ <note>
+ <para>
+ You will know when you are a real kernel hacker
+ when you start typoing printf as printk in your user programs :)
+ </para>
+ </note>
+
+ <!--- From the Lions book reader department -->
+
+ <note>
+ <para>
+ Another sidenote: the original Unix Version 6 sources had a
+ comment on top of its printf function: "Printf should not be
+ used for chit-chat". You should follow that advice.
+ </para>
+ </note>
+ </sect1>
+
+ <sect1 id="routines-copy">
+ <title>
+ <function>copy_[to/from]_user()</function>
+ /
+ <function>get_user()</function>
+ /
+ <function>put_user()</function>
+ <filename class="headerfile">include/asm/uaccess.h</filename>
+ </title>
+
+ <para>
+ <emphasis>[SLEEPS]</emphasis>
+ </para>
+
+ <para>
+ <function>put_user()</function> and <function>get_user()</function>
+ are used to get and put single values (such as an int, char, or
+ long) from and to userspace. A pointer into userspace should
+ never be simply dereferenced: data should be copied using these
+ routines. Both return <constant>-EFAULT</constant> or 0.
+ </para>
+ <para>
+ <function>copy_to_user()</function> and
+ <function>copy_from_user()</function> are more general: they copy
+ an arbitrary amount of data to and from userspace.
+ <caution>
+ <para>
+ Unlike <function>put_user()</function> and
+ <function>get_user()</function>, they return the amount of
+ uncopied data (ie. <returnvalue>0</returnvalue> still means
+ success).
+ </para>
+ </caution>
+ [Yes, this moronic interface makes me cringe. The flamewar comes up every year or so. --RR.]
+ </para>
+ <para>
+ The functions may sleep implicitly. This should never be called
+ outside user context (it makes no sense), with interrupts
+ disabled, or a spinlock held.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-kmalloc">
+ <title><function>kmalloc()</function>/<function>kfree()</function>
+ <filename class="headerfile">include/linux/slab.h</filename></title>
+
+ <para>
+ <emphasis>[MAY SLEEP: SEE BELOW]</emphasis>
+ </para>
+
+ <para>
+ These routines are used to dynamically request pointer-aligned
+ chunks of memory, like malloc and free do in userspace, but
+ <function>kmalloc()</function> takes an extra flag word.
+ Important values:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term>
+ <constant>
+ GFP_KERNEL
+ </constant>
+ </term>
+ <listitem>
+ <para>
+ May sleep and swap to free memory. Only allowed in user
+ context, but is the most reliable way to allocate memory.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <constant>
+ GFP_ATOMIC
+ </constant>
+ </term>
+ <listitem>
+ <para>
+ Don't sleep. Less reliable than <constant>GFP_KERNEL</constant>,
+ but may be called from interrupt context. You should
+ <emphasis>really</emphasis> have a good out-of-memory
+ error-handling strategy.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>
+ <constant>
+ GFP_DMA
+ </constant>
+ </term>
+ <listitem>
+ <para>
+ Allocate ISA DMA lower than 16MB. If you don't know what that
+ is you don't need it. Very unreliable.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ If you see a <errorname>sleeping function called from invalid
+ context</errorname> warning message, then maybe you called a
+ sleeping allocation function from interrupt context without
+ <constant>GFP_ATOMIC</constant>. You should really fix that.
+ Run, don't walk.
+ </para>
+
+ <para>
+ If you are allocating at least <constant>PAGE_SIZE</constant>
+ (<filename class="headerfile">include/asm/page.h</filename>) bytes,
+ consider using <function>__get_free_pages()</function>
+
+ (<filename class="headerfile">include/linux/mm.h</filename>). It
+ takes an order argument (0 for page sized, 1 for double page, 2
+ for four pages etc.) and the same memory priority flag word as
+ above.
+ </para>
+
+ <para>
+ If you are allocating more than a page worth of bytes you can use
+ <function>vmalloc()</function>. It'll allocate virtual memory in
+ the kernel map. This block is not contiguous in physical memory,
+ but the <acronym>MMU</acronym> makes it look like it is for you
+ (so it'll only look contiguous to the CPUs, not to external device
+ drivers). If you really need large physically contiguous memory
+ for some weird device, you have a problem: it is poorly supported
+ in Linux because after some time memory fragmentation in a running
+ kernel makes it hard. The best way is to allocate the block early
+ in the boot process via the <function>alloc_bootmem()</function>
+ routine.
+ </para>
+
+ <para>
+ Before inventing your own cache of often-used objects consider
+ using a slab cache in
+ <filename class="headerfile">include/linux/slab.h</filename>
+ </para>
+ </sect1>
+
+ <sect1 id="routines-current">
+ <title><function>current</function>
+ <filename class="headerfile">include/asm/current.h</filename></title>
+
+ <para>
+ This global variable (really a macro) contains a pointer to
+ the current task structure, so is only valid in user context.
+ For example, when a process makes a system call, this will
+ point to the task structure of the calling process. It is
+ <emphasis>not NULL</emphasis> in interrupt context.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-udelay">
+ <title><function>mdelay()</function>/<function>udelay()</function>
+ <filename class="headerfile">include/asm/delay.h</filename>
+ <filename class="headerfile">include/linux/delay.h</filename>
+ </title>
+
+ <para>
+ The <function>udelay()</function> and <function>ndelay()</function> functions can be used for small pauses.
+ Do not use large values with them as you risk
+ overflow - the helper function <function>mdelay()</function> is useful
+ here, or consider <function>msleep()</function>.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-endian">
+ <title><function>cpu_to_be32()</function>/<function>be32_to_cpu()</function>/<function>cpu_to_le32()</function>/<function>le32_to_cpu()</function>
+ <filename class="headerfile">include/asm/byteorder.h</filename>
+ </title>
+
+ <para>
+ The <function>cpu_to_be32()</function> family (where the "32" can
+ be replaced by 64 or 16, and the "be" can be replaced by "le") are
+ the general way to do endian conversions in the kernel: they
+ return the converted value. All variations supply the reverse as
+ well: <function>be32_to_cpu()</function>, etc.
+ </para>
+
+ <para>
+ There are two major variations of these functions: the pointer
+ variation, such as <function>cpu_to_be32p()</function>, which take
+ a pointer to the given type, and return the converted value. The
+ other variation is the "in-situ" family, such as
+ <function>cpu_to_be32s()</function>, which convert value referred
+ to by the pointer, and return void.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-local-irqs">
+ <title><function>local_irq_save()</function>/<function>local_irq_restore()</function>
+ <filename class="headerfile">include/linux/irqflags.h</filename>
+ </title>
+
+ <para>
+ These routines disable hard interrupts on the local CPU, and
+ restore them. They are reentrant; saving the previous state in
+ their one <varname>unsigned long flags</varname> argument. If you
+ know that interrupts are enabled, you can simply use
+ <function>local_irq_disable()</function> and
+ <function>local_irq_enable()</function>.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-softirqs">
+ <title><function>local_bh_disable()</function>/<function>local_bh_enable()</function>
+ <filename class="headerfile">include/linux/interrupt.h</filename></title>
+
+ <para>
+ These routines disable soft interrupts on the local CPU, and
+ restore them. They are reentrant; if soft interrupts were
+ disabled before, they will still be disabled after this pair
+ of functions has been called. They prevent softirqs and tasklets
+ from running on the current CPU.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-processorids">
+ <title><function>smp_processor_id</function>()
+ <filename class="headerfile">include/asm/smp.h</filename></title>
+
+ <para>
+ <function>get_cpu()</function> disables preemption (so you won't
+ suddenly get moved to another CPU) and returns the current
+ processor number, between 0 and <symbol>NR_CPUS</symbol>. Note
+ that the CPU numbers are not necessarily continuous. You return
+ it again with <function>put_cpu()</function> when you are done.
+ </para>
+ <para>
+ If you know you cannot be preempted by another task (ie. you are
+ in interrupt context, or have preemption disabled) you can use
+ smp_processor_id().
+ </para>
+ </sect1>
+
+ <sect1 id="routines-init">
+ <title><type>__init</type>/<type>__exit</type>/<type>__initdata</type>
+ <filename class="headerfile">include/linux/init.h</filename></title>
+
+ <para>
+ After boot, the kernel frees up a special section; functions
+ marked with <type>__init</type> and data structures marked with
+ <type>__initdata</type> are dropped after boot is complete: similarly
+ modules discard this memory after initialization. <type>__exit</type>
+ is used to declare a function which is only required on exit: the
+ function will be dropped if this file is not compiled as a module.
+ See the header file for use. Note that it makes no sense for a function
+ marked with <type>__init</type> to be exported to modules with
+ <function>EXPORT_SYMBOL()</function> - this will break.
+ </para>
+
+ </sect1>
+
+ <sect1 id="routines-init-again">
+ <title><function>__initcall()</function>/<function>module_init()</function>
+ <filename class="headerfile">include/linux/init.h</filename></title>
+ <para>
+ Many parts of the kernel are well served as a module
+ (dynamically-loadable parts of the kernel). Using the
+ <function>module_init()</function> and
+ <function>module_exit()</function> macros it is easy to write code
+ without #ifdefs which can operate both as a module or built into
+ the kernel.
+ </para>
+
+ <para>
+ The <function>module_init()</function> macro defines which
+ function is to be called at module insertion time (if the file is
+ compiled as a module), or at boot time: if the file is not
+ compiled as a module the <function>module_init()</function> macro
+ becomes equivalent to <function>__initcall()</function>, which
+ through linker magic ensures that the function is called on boot.
+ </para>
+
+ <para>
+ The function can return a negative error number to cause
+ module loading to fail (unfortunately, this has no effect if
+ the module is compiled into the kernel). This function is
+ called in user context with interrupts enabled, so it can sleep.
+ </para>
+ </sect1>
+
+ <sect1 id="routines-moduleexit">
+ <title> <function>module_exit()</function>
+ <filename class="headerfile">include/linux/init.h</filename> </title>
+
+ <para>
+ This macro defines the function to be called at module removal
+ time (or never, in the case of the file compiled into the
+ kernel). It will only be called if the module usage count has
+ reached zero. This function can also sleep, but cannot fail:
+ everything must be cleaned up by the time it returns.
+ </para>
+
+ <para>
+ Note that this macro is optional: if it is not present, your
+ module will not be removable (except for 'rmmod -f').
+ </para>
+ </sect1>
+
+ <sect1 id="routines-module-use-counters">
+ <title> <function>try_module_get()</function>/<function>module_put()</function>
+ <filename class="headerfile">include/linux/module.h</filename></title>
+
+ <para>
+ These manipulate the module usage count, to protect against
+ removal (a module also can't be removed if another module uses one
+ of its exported symbols: see below). Before calling into module
+ code, you should call <function>try_module_get()</function> on
+ that module: if it fails, then the module is being removed and you
+ should act as if it wasn't there. Otherwise, you can safely enter
+ the module, and call <function>module_put()</function> when you're
+ finished.
+ </para>
+
+ <para>
+ Most registerable structures have an
+ <structfield>owner</structfield> field, such as in the
+ <structname>file_operations</structname> structure. Set this field
+ to the macro <symbol>THIS_MODULE</symbol>.
+ </para>
+ </sect1>
+
+ <!-- add info on new-style module refcounting here -->
+ </chapter>
+
+ <chapter id="queues">
+ <title>Wait Queues
+ <filename class="headerfile">include/linux/wait.h</filename>
+ </title>
+ <para>
+ <emphasis>[SLEEPS]</emphasis>
+ </para>
+
+ <para>
+ A wait queue is used to wait for someone to wake you up when a
+ certain condition is true. They must be used carefully to ensure
+ there is no race condition. You declare a
+ <type>wait_queue_head_t</type>, and then processes which want to
+ wait for that condition declare a <type>wait_queue_t</type>
+ referring to themselves, and place that in the queue.
+ </para>
+
+ <sect1 id="queue-declaring">
+ <title>Declaring</title>
+
+ <para>
+ You declare a <type>wait_queue_head_t</type> using the
+ <function>DECLARE_WAIT_QUEUE_HEAD()</function> macro, or using the
+ <function>init_waitqueue_head()</function> routine in your
+ initialization code.
+ </para>
+ </sect1>
+
+ <sect1 id="queue-waitqueue">
+ <title>Queuing</title>
+
+ <para>
+ Placing yourself in the waitqueue is fairly complex, because you
+ must put yourself in the queue before checking the condition.
+ There is a macro to do this:
+ <function>wait_event_interruptible()</function>
+
+ <filename class="headerfile">include/linux/wait.h</filename> The
+ first argument is the wait queue head, and the second is an
+ expression which is evaluated; the macro returns
+ <returnvalue>0</returnvalue> when this expression is true, or
+ <returnvalue>-ERESTARTSYS</returnvalue> if a signal is received.
+ The <function>wait_event()</function> version ignores signals.
+ </para>
+
+ </sect1>
+
+ <sect1 id="queue-waking">
+ <title>Waking Up Queued Tasks</title>
+
+ <para>
+ Call <function>wake_up()</function>
+
+ <filename class="headerfile">include/linux/wait.h</filename>;,
+ which will wake up every process in the queue. The exception is
+ if one has <constant>TASK_EXCLUSIVE</constant> set, in which case
+ the remainder of the queue will not be woken. There are other variants
+ of this basic function available in the same header.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="atomic-ops">
+ <title>Atomic Operations</title>
+
+ <para>
+ Certain operations are guaranteed atomic on all platforms. The
+ first class of operations work on <type>atomic_t</type>
+
+ <filename class="headerfile">include/asm/atomic.h</filename>; this
+ contains a signed integer (at least 32 bits long), and you must use
+ these functions to manipulate or read atomic_t variables.
+ <function>atomic_read()</function> and
+ <function>atomic_set()</function> get and set the counter,
+ <function>atomic_add()</function>,
+ <function>atomic_sub()</function>,
+ <function>atomic_inc()</function>,
+ <function>atomic_dec()</function>, and
+ <function>atomic_dec_and_test()</function> (returns
+ <returnvalue>true</returnvalue> if it was decremented to zero).
+ </para>
+
+ <para>
+ Yes. It returns <returnvalue>true</returnvalue> (i.e. != 0) if the
+ atomic variable is zero.
+ </para>
+
+ <para>
+ Note that these functions are slower than normal arithmetic, and
+ so should not be used unnecessarily.
+ </para>
+
+ <para>
+ The second class of atomic operations is atomic bit operations on an
+ <type>unsigned long</type>, defined in
+
+ <filename class="headerfile">include/linux/bitops.h</filename>. These
+ operations generally take a pointer to the bit pattern, and a bit
+ number: 0 is the least significant bit.
+ <function>set_bit()</function>, <function>clear_bit()</function>
+ and <function>change_bit()</function> set, clear, and flip the
+ given bit. <function>test_and_set_bit()</function>,
+ <function>test_and_clear_bit()</function> and
+ <function>test_and_change_bit()</function> do the same thing,
+ except return true if the bit was previously set; these are
+ particularly useful for atomically setting flags.
+ </para>
+
+ <para>
+ It is possible to call these operations with bit indices greater
+ than BITS_PER_LONG. The resulting behavior is strange on big-endian
+ platforms though so it is a good idea not to do this.
+ </para>
+ </chapter>
+
+ <chapter id="symbols">
+ <title>Symbols</title>
+
+ <para>
+ Within the kernel proper, the normal linking rules apply
+ (ie. unless a symbol is declared to be file scope with the
+ <type>static</type> keyword, it can be used anywhere in the
+ kernel). However, for modules, a special exported symbol table is
+ kept which limits the entry points to the kernel proper. Modules
+ can also export symbols.
+ </para>
+
+ <sect1 id="sym-exportsymbols">
+ <title><function>EXPORT_SYMBOL()</function>
+ <filename class="headerfile">include/linux/export.h</filename></title>
+
+ <para>
+ This is the classic method of exporting a symbol: dynamically
+ loaded modules will be able to use the symbol as normal.
+ </para>
+ </sect1>
+
+ <sect1 id="sym-exportsymbols-gpl">
+ <title><function>EXPORT_SYMBOL_GPL()</function>
+ <filename class="headerfile">include/linux/export.h</filename></title>
+
+ <para>
+ Similar to <function>EXPORT_SYMBOL()</function> except that the
+ symbols exported by <function>EXPORT_SYMBOL_GPL()</function> can
+ only be seen by modules with a
+ <function>MODULE_LICENSE()</function> that specifies a GPL
+ compatible license. It implies that the function is considered
+ an internal implementation issue, and not really an interface.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="conventions">
+ <title>Routines and Conventions</title>
+
+ <sect1 id="conventions-doublelinkedlist">
+ <title>Double-linked lists
+ <filename class="headerfile">include/linux/list.h</filename></title>
+
+ <para>
+ There used to be three sets of linked-list routines in the kernel
+ headers, but this one is the winner. If you don't have some
+ particular pressing need for a single list, it's a good choice.
+ </para>
+
+ <para>
+ In particular, <function>list_for_each_entry</function> is useful.
+ </para>
+ </sect1>
+
+ <sect1 id="convention-returns">
+ <title>Return Conventions</title>
+
+ <para>
+ For code called in user context, it's very common to defy C
+ convention, and return <returnvalue>0</returnvalue> for success,
+ and a negative error number
+ (eg. <returnvalue>-EFAULT</returnvalue>) for failure. This can be
+ unintuitive at first, but it's fairly widespread in the kernel.
+ </para>
+
+ <para>
+ Using <function>ERR_PTR()</function>
+
+ <filename class="headerfile">include/linux/err.h</filename>; to
+ encode a negative error number into a pointer, and
+ <function>IS_ERR()</function> and <function>PTR_ERR()</function>
+ to get it back out again: avoids a separate pointer parameter for
+ the error number. Icky, but in a good way.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-borkedcompile">
+ <title>Breaking Compilation</title>
+
+ <para>
+ Linus and the other developers sometimes change function or
+ structure names in development kernels; this is not done just to
+ keep everyone on their toes: it reflects a fundamental change
+ (eg. can no longer be called with interrupts on, or does extra
+ checks, or doesn't do checks which were caught before). Usually
+ this is accompanied by a fairly complete note to the linux-kernel
+ mailing list; search the archive. Simply doing a global replace
+ on the file usually makes things <emphasis>worse</emphasis>.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-initialising">
+ <title>Initializing structure members</title>
+
+ <para>
+ The preferred method of initializing structures is to use
+ designated initialisers, as defined by ISO C99, eg:
+ </para>
+ <programlisting>
+static struct block_device_operations opt_fops = {
+ .open = opt_open,
+ .release = opt_release,
+ .ioctl = opt_ioctl,
+ .check_media_change = opt_media_change,
+};
+ </programlisting>
+ <para>
+ This makes it easy to grep for, and makes it clear which
+ structure fields are set. You should do this because it looks
+ cool.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-gnu-extns">
+ <title>GNU Extensions</title>
+
+ <para>
+ GNU Extensions are explicitly allowed in the Linux kernel.
+ Note that some of the more complex ones are not very well
+ supported, due to lack of general use, but the following are
+ considered standard (see the GCC info page section "C
+ Extensions" for more details - Yes, really the info page, the
+ man page is only a short summary of the stuff in info).
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ Inline functions
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Statement expressions (ie. the ({ and }) constructs).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Declaring attributes of a function / variable / type
+ (__attribute__)
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ typeof
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Zero length arrays
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Macro varargs
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Arithmetic on void pointers
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Non-Constant initializers
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Assembler Instructions (not outside arch/ and include/asm/)
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Function names as strings (__func__).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ __builtin_constant_p()
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Be wary when using long long in the kernel, the code gcc generates for
+ it is horrible and worse: division and multiplication does not work
+ on i386 because the GCC runtime functions for it are missing from
+ the kernel environment.
+ </para>
+
+ <!-- FIXME: add a note about ANSI aliasing cleanness -->
+ </sect1>
+
+ <sect1 id="conventions-cplusplus">
+ <title>C++</title>
+
+ <para>
+ Using C++ in the kernel is usually a bad idea, because the
+ kernel does not provide the necessary runtime environment
+ and the include files are not tested for it. It is still
+ possible, but not recommended. If you really want to do
+ this, forget about exceptions at least.
+ </para>
+ </sect1>
+
+ <sect1 id="conventions-ifdef">
+ <title>&num;if</title>
+
+ <para>
+ It is generally considered cleaner to use macros in header files
+ (or at the top of .c files) to abstract away functions rather than
+ using `#if' pre-processor statements throughout the source code.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="submitting">
+ <title>Putting Your Stuff in the Kernel</title>
+
+ <para>
+ In order to get your stuff into shape for official inclusion, or
+ even to make a neat patch, there's administrative work to be
+ done:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ Figure out whose pond you've been pissing in. Look at the top of
+ the source files, inside the <filename>MAINTAINERS</filename>
+ file, and last of all in the <filename>CREDITS</filename> file.
+ You should coordinate with this person to make sure you're not
+ duplicating effort, or trying something that's already been
+ rejected.
+ </para>
+
+ <para>
+ Make sure you put your name and EMail address at the top of
+ any files you create or mangle significantly. This is the
+ first place people will look when they find a bug, or when
+ <emphasis>they</emphasis> want to make a change.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Usually you want a configuration option for your kernel hack.
+ Edit <filename>Kconfig</filename> in the appropriate directory.
+ The Config language is simple to use by cut and paste, and there's
+ complete documentation in
+ <filename>Documentation/kbuild/kconfig-language.txt</filename>.
+ </para>
+
+ <para>
+ In your description of the option, make sure you address both the
+ expert user and the user who knows nothing about your feature. Mention
+ incompatibilities and issues here. <emphasis> Definitely
+ </emphasis> end your description with <quote> if in doubt, say N
+ </quote> (or, occasionally, `Y'); this is for people who have no
+ idea what you are talking about.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Edit the <filename>Makefile</filename>: the CONFIG variables are
+ exported here so you can usually just add a "obj-$(CONFIG_xxx) +=
+ xxx.o" line. The syntax is documented in
+ <filename>Documentation/kbuild/makefiles.txt</filename>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Put yourself in <filename>CREDITS</filename> if you've done
+ something noteworthy, usually beyond a single file (your name
+ should be at the top of the source files anyway).
+ <filename>MAINTAINERS</filename> means you want to be consulted
+ when changes are made to a subsystem, and hear about bugs; it
+ implies a more-than-passing commitment to some part of the code.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Finally, don't forget to read <filename>Documentation/SubmittingPatches</filename>
+ and possibly <filename>Documentation/SubmittingDrivers</filename>.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </chapter>
+
+ <chapter id="cantrips">
+ <title>Kernel Cantrips</title>
+
+ <para>
+ Some favorites from browsing the source. Feel free to add to this
+ list.
+ </para>
+
+ <para>
+ <filename>arch/x86/include/asm/delay.h:</filename>
+ </para>
+ <programlisting>
+#define ndelay(n) (__builtin_constant_p(n) ? \
+ ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
+ __ndelay(n))
+ </programlisting>
+
+ <para>
+ <filename>include/linux/fs.h</filename>:
+ </para>
+ <programlisting>
+/*
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a dentry
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ */
+ #define ERR_PTR(err) ((void *)((long)(err)))
+ #define PTR_ERR(ptr) ((long)(ptr))
+ #define IS_ERR(ptr) ((unsigned long)(ptr) > (unsigned long)(-1000))
+</programlisting>
+
+ <para>
+ <filename>arch/x86/include/asm/uaccess_32.h:</filename>
+ </para>
+
+ <programlisting>
+#define copy_to_user(to,from,n) \
+ (__builtin_constant_p(n) ? \
+ __constant_copy_to_user((to),(from),(n)) : \
+ __generic_copy_to_user((to),(from),(n)))
+ </programlisting>
+
+ <para>
+ <filename>arch/sparc/kernel/head.S:</filename>
+ </para>
+
+ <programlisting>
+/*
+ * Sun people can't spell worth damn. "compatability" indeed.
+ * At least we *know* we can't spell, and use a spell-checker.
+ */
+
+/* Uh, actually Linus it is I who cannot spell. Too much murky
+ * Sparc assembly will do this to ya.
+ */
+C_LABEL(cputypvar):
+ .asciz "compatibility"
+
+/* Tested on SS-5, SS-10. Probably someone at Sun applied a spell-checker. */
+ .align 4
+C_LABEL(cputypvar_sun4m):
+ .asciz "compatible"
+ </programlisting>
+
+ <para>
+ <filename>arch/sparc/lib/checksum.S:</filename>
+ </para>
+
+ <programlisting>
+ /* Sun, you just can't beat me, you just can't. Stop trying,
+ * give up. I'm serious, I am going to kick the living shit
+ * out of you, game over, lights out.
+ */
+ </programlisting>
+ </chapter>
+
+ <chapter id="credits">
+ <title>Thanks</title>
+
+ <para>
+ Thanks to Andi Kleen for the idea, answering my questions, fixing
+ my mistakes, filling content, etc. Philipp Rumpf for more spelling
+ and clarity fixes, and some excellent non-obvious points. Werner
+ Almesberger for giving me a great summary of
+ <function>disable_irq()</function>, and Jes Sorensen and Andrea
+ Arcangeli added caveats. Michael Elizabeth Chastain for checking
+ and adding to the Configure section. <!-- Rusty insisted on this
+ bit; I didn't do it! --> Telsa Gwynne for teaching me DocBook.
+ </para>
+ </chapter>
+</book>
+
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
new file mode 100644
index 000000000..7c9cc4846
--- /dev/null
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -0,0 +1,2151 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LKLockingGuide">
+ <bookinfo>
+ <title>Unreliable Guide To Locking</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Rusty</firstname>
+ <surname>Russell</surname>
+ <affiliation>
+ <address>
+ <email>rusty@rustcorp.com.au</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2003</year>
+ <holder>Rusty Russell</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ Welcome, to Rusty's Remarkably Unreliable Guide to Kernel
+ Locking issues. This document describes the locking systems in
+ the Linux Kernel in 2.6.
+ </para>
+ <para>
+ With the wide availability of HyperThreading, and <firstterm
+ linkend="gloss-preemption">preemption </firstterm> in the Linux
+ Kernel, everyone hacking on the kernel needs to know the
+ fundamentals of concurrency and locking for
+ <firstterm linkend="gloss-smp"><acronym>SMP</acronym></firstterm>.
+ </para>
+ </chapter>
+
+ <chapter id="races">
+ <title>The Problem With Concurrency</title>
+ <para>
+ (Skip this if you know what a Race Condition is).
+ </para>
+ <para>
+ In a normal program, you can increment a counter like so:
+ </para>
+ <programlisting>
+ very_important_count++;
+ </programlisting>
+
+ <para>
+ This is what they would expect to happen:
+ </para>
+
+ <table>
+ <title>Expected Results</title>
+
+ <tgroup cols="2" align="left">
+
+ <thead>
+ <row>
+ <entry>Instance 1</entry>
+ <entry>Instance 2</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>read very_important_count (5)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>add 1 (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>write very_important_count (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>read very_important_count (6)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>add 1 (7)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>write very_important_count (7)</entry>
+ </row>
+ </tbody>
+
+ </tgroup>
+ </table>
+
+ <para>
+ This is what might happen:
+ </para>
+
+ <table>
+ <title>Possible Results</title>
+
+ <tgroup cols="2" align="left">
+ <thead>
+ <row>
+ <entry>Instance 1</entry>
+ <entry>Instance 2</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>read very_important_count (5)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>read very_important_count (5)</entry>
+ </row>
+ <row>
+ <entry>add 1 (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>add 1 (6)</entry>
+ </row>
+ <row>
+ <entry>write very_important_count (6)</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>write very_important_count (6)</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <sect1 id="race-condition">
+ <title>Race Conditions and Critical Regions</title>
+ <para>
+ This overlap, where the result depends on the
+ relative timing of multiple tasks, is called a <firstterm>race condition</firstterm>.
+ The piece of code containing the concurrency issue is called a
+ <firstterm>critical region</firstterm>. And especially since Linux starting running
+ on SMP machines, they became one of the major issues in kernel
+ design and implementation.
+ </para>
+ <para>
+ Preemption can have the same effect, even if there is only one
+ CPU: by preempting one task during the critical region, we have
+ exactly the same race condition. In this case the thread which
+ preempts might run the critical region itself.
+ </para>
+ <para>
+ The solution is to recognize when these simultaneous accesses
+ occur, and use locks to make sure that only one instance can
+ enter the critical region at any time. There are many
+ friendly primitives in the Linux kernel to help you do this.
+ And then there are the unfriendly primitives, but I'll pretend
+ they don't exist.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="locks">
+ <title>Locking in the Linux Kernel</title>
+
+ <para>
+ If I could give you one piece of advice: never sleep with anyone
+ crazier than yourself. But if I had to give you advice on
+ locking: <emphasis>keep it simple</emphasis>.
+ </para>
+
+ <para>
+ Be reluctant to introduce new locks.
+ </para>
+
+ <para>
+ Strangely enough, this last one is the exact reverse of my advice when
+ you <emphasis>have</emphasis> slept with someone crazier than yourself.
+ And you should think about getting a big dog.
+ </para>
+
+ <sect1 id="lock-intro">
+ <title>Two Main Types of Kernel Locks: Spinlocks and Mutexes</title>
+
+ <para>
+ There are two main types of kernel locks. The fundamental type
+ is the spinlock
+ (<filename class="headerfile">include/asm/spinlock.h</filename>),
+ which is a very simple single-holder lock: if you can't get the
+ spinlock, you keep trying (spinning) until you can. Spinlocks are
+ very small and fast, and can be used anywhere.
+ </para>
+ <para>
+ The second type is a mutex
+ (<filename class="headerfile">include/linux/mutex.h</filename>): it
+ is like a spinlock, but you may block holding a mutex.
+ If you can't lock a mutex, your task will suspend itself, and be woken
+ up when the mutex is released. This means the CPU can do something
+ else while you are waiting. There are many cases when you simply
+ can't sleep (see <xref linkend="sleeping-things"/>), and so have to
+ use a spinlock instead.
+ </para>
+ <para>
+ Neither type of lock is recursive: see
+ <xref linkend="deadlock"/>.
+ </para>
+ </sect1>
+
+ <sect1 id="uniprocessor">
+ <title>Locks and Uniprocessor Kernels</title>
+
+ <para>
+ For kernels compiled without <symbol>CONFIG_SMP</symbol>, and
+ without <symbol>CONFIG_PREEMPT</symbol> spinlocks do not exist at
+ all. This is an excellent design decision: when no-one else can
+ run at the same time, there is no reason to have a lock.
+ </para>
+
+ <para>
+ If the kernel is compiled without <symbol>CONFIG_SMP</symbol>,
+ but <symbol>CONFIG_PREEMPT</symbol> is set, then spinlocks
+ simply disable preemption, which is sufficient to prevent any
+ races. For most purposes, we can think of preemption as
+ equivalent to SMP, and not worry about it separately.
+ </para>
+
+ <para>
+ You should always test your locking code with <symbol>CONFIG_SMP</symbol>
+ and <symbol>CONFIG_PREEMPT</symbol> enabled, even if you don't have an SMP test box, because it
+ will still catch some kinds of locking bugs.
+ </para>
+
+ <para>
+ Mutexes still exist, because they are required for
+ synchronization between <firstterm linkend="gloss-usercontext">user
+ contexts</firstterm>, as we will see below.
+ </para>
+ </sect1>
+
+ <sect1 id="usercontextlocking">
+ <title>Locking Only In User Context</title>
+
+ <para>
+ If you have a data structure which is only ever accessed from
+ user context, then you can use a simple mutex
+ (<filename>include/linux/mutex.h</filename>) to protect it. This
+ is the most trivial case: you initialize the mutex. Then you can
+ call <function>mutex_lock_interruptible()</function> to grab the mutex,
+ and <function>mutex_unlock()</function> to release it. There is also a
+ <function>mutex_lock()</function>, which should be avoided, because it
+ will not return if a signal is received.
+ </para>
+
+ <para>
+ Example: <filename>net/netfilter/nf_sockopt.c</filename> allows
+ registration of new <function>setsockopt()</function> and
+ <function>getsockopt()</function> calls, with
+ <function>nf_register_sockopt()</function>. Registration and
+ de-registration are only done on module load and unload (and boot
+ time, where there is no concurrency), and the list of registrations
+ is only consulted for an unknown <function>setsockopt()</function>
+ or <function>getsockopt()</function> system call. The
+ <varname>nf_sockopt_mutex</varname> is perfect to protect this,
+ especially since the setsockopt and getsockopt calls may well
+ sleep.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-user-bh">
+ <title>Locking Between User Context and Softirqs</title>
+
+ <para>
+ If a <firstterm linkend="gloss-softirq">softirq</firstterm> shares
+ data with user context, you have two problems. Firstly, the current
+ user context can be interrupted by a softirq, and secondly, the
+ critical region could be entered from another CPU. This is where
+ <function>spin_lock_bh()</function>
+ (<filename class="headerfile">include/linux/spinlock.h</filename>) is
+ used. It disables softirqs on that CPU, then grabs the lock.
+ <function>spin_unlock_bh()</function> does the reverse. (The
+ '_bh' suffix is a historical reference to "Bottom Halves", the
+ old name for software interrupts. It should really be
+ called spin_lock_softirq()' in a perfect world).
+ </para>
+
+ <para>
+ Note that you can also use <function>spin_lock_irq()</function>
+ or <function>spin_lock_irqsave()</function> here, which stop
+ hardware interrupts as well: see <xref linkend="hardirq-context"/>.
+ </para>
+
+ <para>
+ This works perfectly for <firstterm linkend="gloss-up"><acronym>UP
+ </acronym></firstterm> as well: the spin lock vanishes, and this macro
+ simply becomes <function>local_bh_disable()</function>
+ (<filename class="headerfile">include/linux/interrupt.h</filename>), which
+ protects you from the softirq being run.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-user-tasklet">
+ <title>Locking Between User Context and Tasklets</title>
+
+ <para>
+ This is exactly the same as above, because <firstterm
+ linkend="gloss-tasklet">tasklets</firstterm> are actually run
+ from a softirq.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-user-timers">
+ <title>Locking Between User Context and Timers</title>
+
+ <para>
+ This, too, is exactly the same as above, because <firstterm
+ linkend="gloss-timers">timers</firstterm> are actually run from
+ a softirq. From a locking point of view, tasklets and timers
+ are identical.
+ </para>
+ </sect1>
+
+ <sect1 id="lock-tasklets">
+ <title>Locking Between Tasklets/Timers</title>
+
+ <para>
+ Sometimes a tasklet or timer might want to share data with
+ another tasklet or timer.
+ </para>
+
+ <sect2 id="lock-tasklets-same">
+ <title>The Same Tasklet/Timer</title>
+ <para>
+ Since a tasklet is never run on two CPUs at once, you don't
+ need to worry about your tasklet being reentrant (running
+ twice at once), even on SMP.
+ </para>
+ </sect2>
+
+ <sect2 id="lock-tasklets-different">
+ <title>Different Tasklets/Timers</title>
+ <para>
+ If another tasklet/timer wants
+ to share data with your tasklet or timer , you will both need to use
+ <function>spin_lock()</function> and
+ <function>spin_unlock()</function> calls.
+ <function>spin_lock_bh()</function> is
+ unnecessary here, as you are already in a tasklet, and
+ none will be run on the same CPU.
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1 id="lock-softirqs">
+ <title>Locking Between Softirqs</title>
+
+ <para>
+ Often a softirq might
+ want to share data with itself or a tasklet/timer.
+ </para>
+
+ <sect2 id="lock-softirqs-same">
+ <title>The Same Softirq</title>
+
+ <para>
+ The same softirq can run on the other CPUs: you can use a
+ per-CPU array (see <xref linkend="per-cpu"/>) for better
+ performance. If you're going so far as to use a softirq,
+ you probably care about scalable performance enough
+ to justify the extra complexity.
+ </para>
+
+ <para>
+ You'll need to use <function>spin_lock()</function> and
+ <function>spin_unlock()</function> for shared data.
+ </para>
+ </sect2>
+
+ <sect2 id="lock-softirqs-different">
+ <title>Different Softirqs</title>
+
+ <para>
+ You'll need to use <function>spin_lock()</function> and
+ <function>spin_unlock()</function> for shared data, whether it
+ be a timer, tasklet, different softirq or the same or another
+ softirq: any of them could be running on a different CPU.
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+
+ <chapter id="hardirq-context">
+ <title>Hard IRQ Context</title>
+
+ <para>
+ Hardware interrupts usually communicate with a
+ tasklet or softirq. Frequently this involves putting work in a
+ queue, which the softirq will take out.
+ </para>
+
+ <sect1 id="hardirq-softirq">
+ <title>Locking Between Hard IRQ and Softirqs/Tasklets</title>
+
+ <para>
+ If a hardware irq handler shares data with a softirq, you have
+ two concerns. Firstly, the softirq processing can be
+ interrupted by a hardware interrupt, and secondly, the
+ critical region could be entered by a hardware interrupt on
+ another CPU. This is where <function>spin_lock_irq()</function> is
+ used. It is defined to disable interrupts on that cpu, then grab
+ the lock. <function>spin_unlock_irq()</function> does the reverse.
+ </para>
+
+ <para>
+ The irq handler does not to use
+ <function>spin_lock_irq()</function>, because the softirq cannot
+ run while the irq handler is running: it can use
+ <function>spin_lock()</function>, which is slightly faster. The
+ only exception would be if a different hardware irq handler uses
+ the same lock: <function>spin_lock_irq()</function> will stop
+ that from interrupting us.
+ </para>
+
+ <para>
+ This works perfectly for UP as well: the spin lock vanishes,
+ and this macro simply becomes <function>local_irq_disable()</function>
+ (<filename class="headerfile">include/asm/smp.h</filename>), which
+ protects you from the softirq/tasklet/BH being run.
+ </para>
+
+ <para>
+ <function>spin_lock_irqsave()</function>
+ (<filename>include/linux/spinlock.h</filename>) is a variant
+ which saves whether interrupts were on or off in a flags word,
+ which is passed to <function>spin_unlock_irqrestore()</function>. This
+ means that the same code can be used inside an hard irq handler (where
+ interrupts are already off) and in softirqs (where the irq
+ disabling is required).
+ </para>
+
+ <para>
+ Note that softirqs (and hence tasklets and timers) are run on
+ return from hardware interrupts, so
+ <function>spin_lock_irq()</function> also stops these. In that
+ sense, <function>spin_lock_irqsave()</function> is the most
+ general and powerful locking function.
+ </para>
+
+ </sect1>
+ <sect1 id="hardirq-hardirq">
+ <title>Locking Between Two Hard IRQ Handlers</title>
+ <para>
+ It is rare to have to share data between two IRQ handlers, but
+ if you do, <function>spin_lock_irqsave()</function> should be
+ used: it is architecture-specific whether all interrupts are
+ disabled inside irq handlers themselves.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="cheatsheet">
+ <title>Cheat Sheet For Locking</title>
+ <para>
+ Pete Zaitcev gives the following summary:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ If you are in a process context (any syscall) and want to
+ lock other process out, use a mutex. You can take a mutex
+ and sleep (<function>copy_from_user*(</function> or
+ <function>kmalloc(x,GFP_KERNEL)</function>).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Otherwise (== data can be touched in an interrupt), use
+ <function>spin_lock_irqsave()</function> and
+ <function>spin_unlock_irqrestore()</function>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Avoid holding spinlock for more than 5 lines of code and
+ across any function call (except accessors like
+ <function>readb</function>).
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <sect1 id="minimum-lock-reqirements">
+ <title>Table of Minimum Requirements</title>
+
+ <para> The following table lists the <emphasis>minimum</emphasis>
+ locking requirements between various contexts. In some cases,
+ the same context can only be running on one CPU at a time, so
+ no locking is required for that context (eg. a particular
+ thread can only run on one CPU at a time, but if it needs
+ shares data with another thread, locking is required).
+ </para>
+ <para>
+ Remember the advice above: you can always use
+ <function>spin_lock_irqsave()</function>, which is a superset
+ of all other spinlock primitives.
+ </para>
+
+ <table>
+<title>Table of Locking Requirements</title>
+<tgroup cols="11">
+<tbody>
+
+<row>
+<entry></entry>
+<entry>IRQ Handler A</entry>
+<entry>IRQ Handler B</entry>
+<entry>Softirq A</entry>
+<entry>Softirq B</entry>
+<entry>Tasklet A</entry>
+<entry>Tasklet B</entry>
+<entry>Timer A</entry>
+<entry>Timer B</entry>
+<entry>User Context A</entry>
+<entry>User Context B</entry>
+</row>
+
+<row>
+<entry>IRQ Handler A</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>IRQ Handler B</entry>
+<entry>SLIS</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Softirq A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+</row>
+
+<row>
+<entry>Softirq B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+</row>
+
+<row>
+<entry>Tasklet A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Tasklet B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Timer A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>Timer B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>SL</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>User Context A</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>None</entry>
+</row>
+
+<row>
+<entry>User Context B</entry>
+<entry>SLI</entry>
+<entry>SLI</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>SLBH</entry>
+<entry>MLI</entry>
+<entry>None</entry>
+</row>
+
+</tbody>
+</tgroup>
+</table>
+
+ <table>
+<title>Legend for Locking Requirements Table</title>
+<tgroup cols="2">
+<tbody>
+
+<row>
+<entry>SLIS</entry>
+<entry>spin_lock_irqsave</entry>
+</row>
+<row>
+<entry>SLI</entry>
+<entry>spin_lock_irq</entry>
+</row>
+<row>
+<entry>SL</entry>
+<entry>spin_lock</entry>
+</row>
+<row>
+<entry>SLBH</entry>
+<entry>spin_lock_bh</entry>
+</row>
+<row>
+<entry>MLI</entry>
+<entry>mutex_lock_interruptible</entry>
+</row>
+
+</tbody>
+</tgroup>
+</table>
+
+</sect1>
+</chapter>
+
+<chapter id="trylock-functions">
+ <title>The trylock Functions</title>
+ <para>
+ There are functions that try to acquire a lock only once and immediately
+ return a value telling about success or failure to acquire the lock.
+ They can be used if you need no access to the data protected with the lock
+ when some other thread is holding the lock. You should acquire the lock
+ later if you then need access to the data protected with the lock.
+ </para>
+
+ <para>
+ <function>spin_trylock()</function> does not spin but returns non-zero if
+ it acquires the spinlock on the first try or 0 if not. This function can
+ be used in all contexts like <function>spin_lock</function>: you must have
+ disabled the contexts that might interrupt you and acquire the spin lock.
+ </para>
+
+ <para>
+ <function>mutex_trylock()</function> does not suspend your task
+ but returns non-zero if it could lock the mutex on the first try
+ or 0 if not. This function cannot be safely used in hardware or software
+ interrupt contexts despite not sleeping.
+ </para>
+</chapter>
+
+ <chapter id="Examples">
+ <title>Common Examples</title>
+ <para>
+Let's step through a simple example: a cache of number to name
+mappings. The cache keeps a count of how often each of the objects is
+used, and when it gets full, throws out the least used one.
+
+ </para>
+
+ <sect1 id="examples-usercontext">
+ <title>All In User Context</title>
+ <para>
+For our first example, we assume that all operations are in user
+context (ie. from system calls), so we can sleep. This means we can
+use a mutex to protect the cache and all the objects within
+it. Here's the code:
+ </para>
+
+ <programlisting>
+#include &lt;linux/list.h&gt;
+#include &lt;linux/slab.h&gt;
+#include &lt;linux/string.h&gt;
+#include &lt;linux/mutex.h&gt;
+#include &lt;asm/errno.h&gt;
+
+struct object
+{
+ struct list_head list;
+ int id;
+ char name[32];
+ int popularity;
+};
+
+/* Protects the cache, cache_num, and the objects within it */
+static DEFINE_MUTEX(cache_lock);
+static LIST_HEAD(cache);
+static unsigned int cache_num = 0;
+#define MAX_CACHE_SIZE 10
+
+/* Must be holding cache_lock */
+static struct object *__cache_find(int id)
+{
+ struct object *i;
+
+ list_for_each_entry(i, &amp;cache, list)
+ if (i-&gt;id == id) {
+ i-&gt;popularity++;
+ return i;
+ }
+ return NULL;
+}
+
+/* Must be holding cache_lock */
+static void __cache_delete(struct object *obj)
+{
+ BUG_ON(!obj);
+ list_del(&amp;obj-&gt;list);
+ kfree(obj);
+ cache_num--;
+}
+
+/* Must be holding cache_lock */
+static void __cache_add(struct object *obj)
+{
+ list_add(&amp;obj-&gt;list, &amp;cache);
+ if (++cache_num > MAX_CACHE_SIZE) {
+ struct object *i, *outcast = NULL;
+ list_for_each_entry(i, &amp;cache, list) {
+ if (!outcast || i-&gt;popularity &lt; outcast-&gt;popularity)
+ outcast = i;
+ }
+ __cache_delete(outcast);
+ }
+}
+
+int cache_add(int id, const char *name)
+{
+ struct object *obj;
+
+ if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
+ return -ENOMEM;
+
+ strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+
+ mutex_lock(&amp;cache_lock);
+ __cache_add(obj);
+ mutex_unlock(&amp;cache_lock);
+ return 0;
+}
+
+void cache_delete(int id)
+{
+ mutex_lock(&amp;cache_lock);
+ __cache_delete(__cache_find(id));
+ mutex_unlock(&amp;cache_lock);
+}
+
+int cache_find(int id, char *name)
+{
+ struct object *obj;
+ int ret = -ENOENT;
+
+ mutex_lock(&amp;cache_lock);
+ obj = __cache_find(id);
+ if (obj) {
+ ret = 0;
+ strcpy(name, obj-&gt;name);
+ }
+ mutex_unlock(&amp;cache_lock);
+ return ret;
+}
+</programlisting>
+
+ <para>
+Note that we always make sure we have the cache_lock when we add,
+delete, or look up the cache: both the cache infrastructure itself and
+the contents of the objects are protected by the lock. In this case
+it's easy, since we copy the data for the user, and never let them
+access the objects directly.
+ </para>
+ <para>
+There is a slight (and common) optimization here: in
+<function>cache_add</function> we set up the fields of the object
+before grabbing the lock. This is safe, as no-one else can access it
+until we put it in cache.
+ </para>
+ </sect1>
+
+ <sect1 id="examples-interrupt">
+ <title>Accessing From Interrupt Context</title>
+ <para>
+Now consider the case where <function>cache_find</function> can be
+called from interrupt context: either a hardware interrupt or a
+softirq. An example would be a timer which deletes object from the
+cache.
+ </para>
+ <para>
+The change is shown below, in standard patch format: the
+<symbol>-</symbol> are lines which are taken away, and the
+<symbol>+</symbol> are lines which are added.
+ </para>
+<programlisting>
+--- cache.c.usercontext 2003-12-09 13:58:54.000000000 +1100
++++ cache.c.interrupt 2003-12-09 14:07:49.000000000 +1100
+@@ -12,7 +12,7 @@
+ int popularity;
+ };
+
+-static DEFINE_MUTEX(cache_lock);
++static DEFINE_SPINLOCK(cache_lock);
+ static LIST_HEAD(cache);
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+@@ -55,6 +55,7 @@
+ int cache_add(int id, const char *name)
+ {
+ struct object *obj;
++ unsigned long flags;
+
+ if ((obj = kmalloc(sizeof(*obj), GFP_KERNEL)) == NULL)
+ return -ENOMEM;
+@@ -63,30 +64,33 @@
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+
+- mutex_lock(&amp;cache_lock);
++ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+- mutex_unlock(&amp;cache_lock);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ return 0;
+ }
+
+ void cache_delete(int id)
+ {
+- mutex_lock(&amp;cache_lock);
++ unsigned long flags;
++
++ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_delete(__cache_find(id));
+- mutex_unlock(&amp;cache_lock);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ }
+
+ int cache_find(int id, char *name)
+ {
+ struct object *obj;
+ int ret = -ENOENT;
++ unsigned long flags;
+
+- mutex_lock(&amp;cache_lock);
++ spin_lock_irqsave(&amp;cache_lock, flags);
+ obj = __cache_find(id);
+ if (obj) {
+ ret = 0;
+ strcpy(name, obj-&gt;name);
+ }
+- mutex_unlock(&amp;cache_lock);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ return ret;
+ }
+</programlisting>
+
+ <para>
+Note that the <function>spin_lock_irqsave</function> will turn off
+interrupts if they are on, otherwise does nothing (if we are already
+in an interrupt handler), hence these functions are safe to call from
+any context.
+ </para>
+ <para>
+Unfortunately, <function>cache_add</function> calls
+<function>kmalloc</function> with the <symbol>GFP_KERNEL</symbol>
+flag, which is only legal in user context. I have assumed that
+<function>cache_add</function> is still only called in user context,
+otherwise this should become a parameter to
+<function>cache_add</function>.
+ </para>
+ </sect1>
+ <sect1 id="examples-refcnt">
+ <title>Exposing Objects Outside This File</title>
+ <para>
+If our objects contained more information, it might not be sufficient
+to copy the information in and out: other parts of the code might want
+to keep pointers to these objects, for example, rather than looking up
+the id every time. This produces two problems.
+ </para>
+ <para>
+The first problem is that we use the <symbol>cache_lock</symbol> to
+protect objects: we'd need to make this non-static so the rest of the
+code can use it. This makes locking trickier, as it is no longer all
+in one place.
+ </para>
+ <para>
+The second problem is the lifetime problem: if another structure keeps
+a pointer to an object, it presumably expects that pointer to remain
+valid. Unfortunately, this is only guaranteed while you hold the
+lock, otherwise someone might call <function>cache_delete</function>
+and even worse, add another object, re-using the same address.
+ </para>
+ <para>
+As there is only one lock, you can't hold it forever: no-one else would
+get any work done.
+ </para>
+ <para>
+The solution to this problem is to use a reference count: everyone who
+has a pointer to the object increases it when they first get the
+object, and drops the reference count when they're finished with it.
+Whoever drops it to zero knows it is unused, and can actually delete it.
+ </para>
+ <para>
+Here is the code:
+ </para>
+
+<programlisting>
+--- cache.c.interrupt 2003-12-09 14:25:43.000000000 +1100
++++ cache.c.refcnt 2003-12-09 14:33:05.000000000 +1100
+@@ -7,6 +7,7 @@
+ struct object
+ {
+ struct list_head list;
++ unsigned int refcnt;
+ int id;
+ char name[32];
+ int popularity;
+@@ -17,6 +18,35 @@
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+
++static void __object_put(struct object *obj)
++{
++ if (--obj-&gt;refcnt == 0)
++ kfree(obj);
++}
++
++static void __object_get(struct object *obj)
++{
++ obj-&gt;refcnt++;
++}
++
++void object_put(struct object *obj)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&amp;cache_lock, flags);
++ __object_put(obj);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
++}
++
++void object_get(struct object *obj)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&amp;cache_lock, flags);
++ __object_get(obj);
++ spin_unlock_irqrestore(&amp;cache_lock, flags);
++}
++
+ /* Must be holding cache_lock */
+ static struct object *__cache_find(int id)
+ {
+@@ -35,6 +65,7 @@
+ {
+ BUG_ON(!obj);
+ list_del(&amp;obj-&gt;list);
++ __object_put(obj);
+ cache_num--;
+ }
+
+@@ -63,6 +94,7 @@
+ strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
++ obj-&gt;refcnt = 1; /* The cache holds a reference */
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+@@ -79,18 +111,15 @@
+ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ }
+
+-int cache_find(int id, char *name)
++struct object *cache_find(int id)
+ {
+ struct object *obj;
+- int ret = -ENOENT;
+ unsigned long flags;
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ obj = __cache_find(id);
+- if (obj) {
+- ret = 0;
+- strcpy(name, obj-&gt;name);
+- }
++ if (obj)
++ __object_get(obj);
+ spin_unlock_irqrestore(&amp;cache_lock, flags);
+- return ret;
++ return obj;
+ }
+</programlisting>
+
+<para>
+We encapsulate the reference counting in the standard 'get' and 'put'
+functions. Now we can return the object itself from
+<function>cache_find</function> which has the advantage that the user
+can now sleep holding the object (eg. to
+<function>copy_to_user</function> to name to userspace).
+</para>
+<para>
+The other point to note is that I said a reference should be held for
+every pointer to the object: thus the reference count is 1 when first
+inserted into the cache. In some versions the framework does not hold
+a reference count, but they are more complicated.
+</para>
+
+ <sect2 id="examples-refcnt-atomic">
+ <title>Using Atomic Operations For The Reference Count</title>
+<para>
+In practice, <type>atomic_t</type> would usually be used for
+<structfield>refcnt</structfield>. There are a number of atomic
+operations defined in
+
+<filename class="headerfile">include/asm/atomic.h</filename>: these are
+guaranteed to be seen atomically from all CPUs in the system, so no
+lock is required. In this case, it is simpler than using spinlocks,
+although for anything non-trivial using spinlocks is clearer. The
+<function>atomic_inc</function> and
+<function>atomic_dec_and_test</function> are used instead of the
+standard increment and decrement operators, and the lock is no longer
+used to protect the reference count itself.
+</para>
+
+<programlisting>
+--- cache.c.refcnt 2003-12-09 15:00:35.000000000 +1100
++++ cache.c.refcnt-atomic 2003-12-11 15:49:42.000000000 +1100
+@@ -7,7 +7,7 @@
+ struct object
+ {
+ struct list_head list;
+- unsigned int refcnt;
++ atomic_t refcnt;
+ int id;
+ char name[32];
+ int popularity;
+@@ -18,33 +18,15 @@
+ static unsigned int cache_num = 0;
+ #define MAX_CACHE_SIZE 10
+
+-static void __object_put(struct object *obj)
+-{
+- if (--obj-&gt;refcnt == 0)
+- kfree(obj);
+-}
+-
+-static void __object_get(struct object *obj)
+-{
+- obj-&gt;refcnt++;
+-}
+-
+ void object_put(struct object *obj)
+ {
+- unsigned long flags;
+-
+- spin_lock_irqsave(&amp;cache_lock, flags);
+- __object_put(obj);
+- spin_unlock_irqrestore(&amp;cache_lock, flags);
++ if (atomic_dec_and_test(&amp;obj-&gt;refcnt))
++ kfree(obj);
+ }
+
+ void object_get(struct object *obj)
+ {
+- unsigned long flags;
+-
+- spin_lock_irqsave(&amp;cache_lock, flags);
+- __object_get(obj);
+- spin_unlock_irqrestore(&amp;cache_lock, flags);
++ atomic_inc(&amp;obj-&gt;refcnt);
+ }
+
+ /* Must be holding cache_lock */
+@@ -65,7 +47,7 @@
+ {
+ BUG_ON(!obj);
+ list_del(&amp;obj-&gt;list);
+- __object_put(obj);
++ object_put(obj);
+ cache_num--;
+ }
+
+@@ -94,7 +76,7 @@
+ strlcpy(obj-&gt;name, name, sizeof(obj-&gt;name));
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+- obj-&gt;refcnt = 1; /* The cache holds a reference */
++ atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+@@ -119,7 +101,7 @@
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ obj = __cache_find(id);
+ if (obj)
+- __object_get(obj);
++ object_get(obj);
+ spin_unlock_irqrestore(&amp;cache_lock, flags);
+ return obj;
+ }
+</programlisting>
+</sect2>
+</sect1>
+
+ <sect1 id="examples-lock-per-obj">
+ <title>Protecting The Objects Themselves</title>
+ <para>
+In these examples, we assumed that the objects (except the reference
+counts) never changed once they are created. If we wanted to allow
+the name to change, there are three possibilities:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+You can make <symbol>cache_lock</symbol> non-static, and tell people
+to grab that lock before changing the name in any object.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+You can provide a <function>cache_obj_rename</function> which grabs
+this lock and changes the name for the caller, and tell everyone to
+use that function.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+You can make the <symbol>cache_lock</symbol> protect only the cache
+itself, and use another lock to protect the name.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+Theoretically, you can make the locks as fine-grained as one lock for
+every field, for every object. In practice, the most common variants
+are:
+</para>
+ <itemizedlist>
+ <listitem>
+ <para>
+One lock which protects the infrastructure (the <symbol>cache</symbol>
+list in this example) and all the objects. This is what we have done
+so far.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+One lock which protects the infrastructure (including the list
+pointers inside the objects), and one lock inside the object which
+protects the rest of that object.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+Multiple locks to protect the infrastructure (eg. one lock per hash
+chain), possibly with a separate per-object lock.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+<para>
+Here is the "lock-per-object" implementation:
+</para>
+<programlisting>
+--- cache.c.refcnt-atomic 2003-12-11 15:50:54.000000000 +1100
++++ cache.c.perobjectlock 2003-12-11 17:15:03.000000000 +1100
+@@ -6,11 +6,17 @@
+
+ struct object
+ {
++ /* These two protected by cache_lock. */
+ struct list_head list;
++ int popularity;
++
+ atomic_t refcnt;
++
++ /* Doesn't change once created. */
+ int id;
++
++ spinlock_t lock; /* Protects the name */
+ char name[32];
+- int popularity;
+ };
+
+ static DEFINE_SPINLOCK(cache_lock);
+@@ -77,6 +84,7 @@
+ obj-&gt;id = id;
+ obj-&gt;popularity = 0;
+ atomic_set(&amp;obj-&gt;refcnt, 1); /* The cache holds a reference */
++ spin_lock_init(&amp;obj-&gt;lock);
+
+ spin_lock_irqsave(&amp;cache_lock, flags);
+ __cache_add(obj);
+</programlisting>
+
+<para>
+Note that I decide that the <structfield>popularity</structfield>
+count should be protected by the <symbol>cache_lock</symbol> rather
+than the per-object lock: this is because it (like the
+<structname>struct list_head</structname> inside the object) is
+logically part of the infrastructure. This way, I don't need to grab
+the lock of every object in <function>__cache_add</function> when
+seeking the least popular.
+</para>
+
+<para>
+I also decided that the <structfield>id</structfield> member is
+unchangeable, so I don't need to grab each object lock in
+<function>__cache_find()</function> to examine the
+<structfield>id</structfield>: the object lock is only used by a
+caller who wants to read or write the <structfield>name</structfield>
+field.
+</para>
+
+<para>
+Note also that I added a comment describing what data was protected by
+which locks. This is extremely important, as it describes the runtime
+behavior of the code, and can be hard to gain from just reading. And
+as Alan Cox says, <quote>Lock data, not code</quote>.
+</para>
+</sect1>
+</chapter>
+
+ <chapter id="common-problems">
+ <title>Common Problems</title>
+ <sect1 id="deadlock">
+ <title>Deadlock: Simple and Advanced</title>
+
+ <para>
+ There is a coding bug where a piece of code tries to grab a
+ spinlock twice: it will spin forever, waiting for the lock to
+ be released (spinlocks, rwlocks and mutexes are not
+ recursive in Linux). This is trivial to diagnose: not a
+ stay-up-five-nights-talk-to-fluffy-code-bunnies kind of
+ problem.
+ </para>
+
+ <para>
+ For a slightly more complex case, imagine you have a region
+ shared by a softirq and user context. If you use a
+ <function>spin_lock()</function> call to protect it, it is
+ possible that the user context will be interrupted by the softirq
+ while it holds the lock, and the softirq will then spin
+ forever trying to get the same lock.
+ </para>
+
+ <para>
+ Both of these are called deadlock, and as shown above, it can
+ occur even with a single CPU (although not on UP compiles,
+ since spinlocks vanish on kernel compiles with
+ <symbol>CONFIG_SMP</symbol>=n. You'll still get data corruption
+ in the second example).
+ </para>
+
+ <para>
+ This complete lockup is easy to diagnose: on SMP boxes the
+ watchdog timer or compiling with <symbol>DEBUG_SPINLOCK</symbol> set
+ (<filename>include/linux/spinlock.h</filename>) will show this up
+ immediately when it happens.
+ </para>
+
+ <para>
+ A more complex problem is the so-called 'deadly embrace',
+ involving two or more locks. Say you have a hash table: each
+ entry in the table is a spinlock, and a chain of hashed
+ objects. Inside a softirq handler, you sometimes want to
+ alter an object from one place in the hash to another: you
+ grab the spinlock of the old hash chain and the spinlock of
+ the new hash chain, and delete the object from the old one,
+ and insert it in the new one.
+ </para>
+
+ <para>
+ There are two problems here. First, if your code ever
+ tries to move the object to the same chain, it will deadlock
+ with itself as it tries to lock it twice. Secondly, if the
+ same softirq on another CPU is trying to move another object
+ in the reverse direction, the following could happen:
+ </para>
+
+ <table>
+ <title>Consequences</title>
+
+ <tgroup cols="2" align="left">
+
+ <thead>
+ <row>
+ <entry>CPU 1</entry>
+ <entry>CPU 2</entry>
+ </row>
+ </thead>
+
+ <tbody>
+ <row>
+ <entry>Grab lock A -&gt; OK</entry>
+ <entry>Grab lock B -&gt; OK</entry>
+ </row>
+ <row>
+ <entry>Grab lock B -&gt; spin</entry>
+ <entry>Grab lock A -&gt; spin</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ The two CPUs will spin forever, waiting for the other to give up
+ their lock. It will look, smell, and feel like a crash.
+ </para>
+ </sect1>
+
+ <sect1 id="techs-deadlock-prevent">
+ <title>Preventing Deadlock</title>
+
+ <para>
+ Textbooks will tell you that if you always lock in the same
+ order, you will never get this kind of deadlock. Practice
+ will tell you that this approach doesn't scale: when I
+ create a new lock, I don't understand enough of the kernel
+ to figure out where in the 5000 lock hierarchy it will fit.
+ </para>
+
+ <para>
+ The best locks are encapsulated: they never get exposed in
+ headers, and are never held around calls to non-trivial
+ functions outside the same file. You can read through this
+ code and see that it will never deadlock, because it never
+ tries to grab another lock while it has that one. People
+ using your code don't even need to know you are using a
+ lock.
+ </para>
+
+ <para>
+ A classic problem here is when you provide callbacks or
+ hooks: if you call these with the lock held, you risk simple
+ deadlock, or a deadly embrace (who knows what the callback
+ will do?). Remember, the other programmers are out to get
+ you, so don't do this.
+ </para>
+
+ <sect2 id="techs-deadlock-overprevent">
+ <title>Overzealous Prevention Of Deadlocks</title>
+
+ <para>
+ Deadlocks are problematic, but not as bad as data
+ corruption. Code which grabs a read lock, searches a list,
+ fails to find what it wants, drops the read lock, grabs a
+ write lock and inserts the object has a race condition.
+ </para>
+
+ <para>
+ If you don't see why, please stay the fuck away from my code.
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1 id="racing-timers">
+ <title>Racing Timers: A Kernel Pastime</title>
+
+ <para>
+ Timers can produce their own special problems with races.
+ Consider a collection of objects (list, hash, etc) where each
+ object has a timer which is due to destroy it.
+ </para>
+
+ <para>
+ If you want to destroy the entire collection (say on module
+ removal), you might do the following:
+ </para>
+
+ <programlisting>
+ /* THIS CODE BAD BAD BAD BAD: IF IT WAS ANY WORSE IT WOULD USE
+ HUNGARIAN NOTATION */
+ spin_lock_bh(&amp;list_lock);
+
+ while (list) {
+ struct foo *next = list-&gt;next;
+ del_timer(&amp;list-&gt;timer);
+ kfree(list);
+ list = next;
+ }
+
+ spin_unlock_bh(&amp;list_lock);
+ </programlisting>
+
+ <para>
+ Sooner or later, this will crash on SMP, because a timer can
+ have just gone off before the <function>spin_lock_bh()</function>,
+ and it will only get the lock after we
+ <function>spin_unlock_bh()</function>, and then try to free
+ the element (which has already been freed!).
+ </para>
+
+ <para>
+ This can be avoided by checking the result of
+ <function>del_timer()</function>: if it returns
+ <returnvalue>1</returnvalue>, the timer has been deleted.
+ If <returnvalue>0</returnvalue>, it means (in this
+ case) that it is currently running, so we can do:
+ </para>
+
+ <programlisting>
+ retry:
+ spin_lock_bh(&amp;list_lock);
+
+ while (list) {
+ struct foo *next = list-&gt;next;
+ if (!del_timer(&amp;list-&gt;timer)) {
+ /* Give timer a chance to delete this */
+ spin_unlock_bh(&amp;list_lock);
+ goto retry;
+ }
+ kfree(list);
+ list = next;
+ }
+
+ spin_unlock_bh(&amp;list_lock);
+ </programlisting>
+
+ <para>
+ Another common problem is deleting timers which restart
+ themselves (by calling <function>add_timer()</function> at the end
+ of their timer function). Because this is a fairly common case
+ which is prone to races, you should use <function>del_timer_sync()</function>
+ (<filename class="headerfile">include/linux/timer.h</filename>)
+ to handle this case. It returns the number of times the timer
+ had to be deleted before we finally stopped it from adding itself back
+ in.
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="Efficiency">
+ <title>Locking Speed</title>
+
+ <para>
+There are three main things to worry about when considering speed of
+some code which does locking. First is concurrency: how many things
+are going to be waiting while someone else is holding a lock. Second
+is the time taken to actually acquire and release an uncontended lock.
+Third is using fewer, or smarter locks. I'm assuming that the lock is
+used fairly often: otherwise, you wouldn't be concerned about
+efficiency.
+</para>
+ <para>
+Concurrency depends on how long the lock is usually held: you should
+hold the lock for as long as needed, but no longer. In the cache
+example, we always create the object without the lock held, and then
+grab the lock only when we are ready to insert it in the list.
+</para>
+ <para>
+Acquisition times depend on how much damage the lock operations do to
+the pipeline (pipeline stalls) and how likely it is that this CPU was
+the last one to grab the lock (ie. is the lock cache-hot for this
+CPU): on a machine with more CPUs, this likelihood drops fast.
+Consider a 700MHz Intel Pentium III: an instruction takes about 0.7ns,
+an atomic increment takes about 58ns, a lock which is cache-hot on
+this CPU takes 160ns, and a cacheline transfer from another CPU takes
+an additional 170 to 360ns. (These figures from Paul McKenney's
+<ulink url="http://www.linuxjournal.com/article.php?sid=6993"> Linux
+Journal RCU article</ulink>).
+</para>
+ <para>
+These two aims conflict: holding a lock for a short time might be done
+by splitting locks into parts (such as in our final per-object-lock
+example), but this increases the number of lock acquisitions, and the
+results are often slower than having a single lock. This is another
+reason to advocate locking simplicity.
+</para>
+ <para>
+The third concern is addressed below: there are some methods to reduce
+the amount of locking which needs to be done.
+</para>
+
+ <sect1 id="efficiency-rwlocks">
+ <title>Read/Write Lock Variants</title>
+
+ <para>
+ Both spinlocks and mutexes have read/write variants:
+ <type>rwlock_t</type> and <structname>struct rw_semaphore</structname>.
+ These divide users into two classes: the readers and the writers. If
+ you are only reading the data, you can get a read lock, but to write to
+ the data you need the write lock. Many people can hold a read lock,
+ but a writer must be sole holder.
+ </para>
+
+ <para>
+ If your code divides neatly along reader/writer lines (as our
+ cache code does), and the lock is held by readers for
+ significant lengths of time, using these locks can help. They
+ are slightly slower than the normal locks though, so in practice
+ <type>rwlock_t</type> is not usually worthwhile.
+ </para>
+ </sect1>
+
+ <sect1 id="efficiency-read-copy-update">
+ <title>Avoiding Locks: Read Copy Update</title>
+
+ <para>
+ There is a special method of read/write locking called Read Copy
+ Update. Using RCU, the readers can avoid taking a lock
+ altogether: as we expect our cache to be read more often than
+ updated (otherwise the cache is a waste of time), it is a
+ candidate for this optimization.
+ </para>
+
+ <para>
+ How do we get rid of read locks? Getting rid of read locks
+ means that writers may be changing the list underneath the
+ readers. That is actually quite simple: we can read a linked
+ list while an element is being added if the writer adds the
+ element very carefully. For example, adding
+ <symbol>new</symbol> to a single linked list called
+ <symbol>list</symbol>:
+ </para>
+
+ <programlisting>
+ new-&gt;next = list-&gt;next;
+ wmb();
+ list-&gt;next = new;
+ </programlisting>
+
+ <para>
+ The <function>wmb()</function> is a write memory barrier. It
+ ensures that the first operation (setting the new element's
+ <symbol>next</symbol> pointer) is complete and will be seen by
+ all CPUs, before the second operation is (putting the new
+ element into the list). This is important, since modern
+ compilers and modern CPUs can both reorder instructions unless
+ told otherwise: we want a reader to either not see the new
+ element at all, or see the new element with the
+ <symbol>next</symbol> pointer correctly pointing at the rest of
+ the list.
+ </para>
+ <para>
+ Fortunately, there is a function to do this for standard
+ <structname>struct list_head</structname> lists:
+ <function>list_add_rcu()</function>
+ (<filename>include/linux/list.h</filename>).
+ </para>
+ <para>
+ Removing an element from the list is even simpler: we replace
+ the pointer to the old element with a pointer to its successor,
+ and readers will either see it, or skip over it.
+ </para>
+ <programlisting>
+ list-&gt;next = old-&gt;next;
+ </programlisting>
+ <para>
+ There is <function>list_del_rcu()</function>
+ (<filename>include/linux/list.h</filename>) which does this (the
+ normal version poisons the old object, which we don't want).
+ </para>
+ <para>
+ The reader must also be careful: some CPUs can look through the
+ <symbol>next</symbol> pointer to start reading the contents of
+ the next element early, but don't realize that the pre-fetched
+ contents is wrong when the <symbol>next</symbol> pointer changes
+ underneath them. Once again, there is a
+ <function>list_for_each_entry_rcu()</function>
+ (<filename>include/linux/list.h</filename>) to help you. Of
+ course, writers can just use
+ <function>list_for_each_entry()</function>, since there cannot
+ be two simultaneous writers.
+ </para>
+ <para>
+ Our final dilemma is this: when can we actually destroy the
+ removed element? Remember, a reader might be stepping through
+ this element in the list right now: if we free this element and
+ the <symbol>next</symbol> pointer changes, the reader will jump
+ off into garbage and crash. We need to wait until we know that
+ all the readers who were traversing the list when we deleted the
+ element are finished. We use <function>call_rcu()</function> to
+ register a callback which will actually destroy the object once
+ all pre-existing readers are finished. Alternatively,
+ <function>synchronize_rcu()</function> may be used to block until
+ all pre-existing are finished.
+ </para>
+ <para>
+ But how does Read Copy Update know when the readers are
+ finished? The method is this: firstly, the readers always
+ traverse the list inside
+ <function>rcu_read_lock()</function>/<function>rcu_read_unlock()</function>
+ pairs: these simply disable preemption so the reader won't go to
+ sleep while reading the list.
+ </para>
+ <para>
+ RCU then waits until every other CPU has slept at least once:
+ since readers cannot sleep, we know that any readers which were
+ traversing the list during the deletion are finished, and the
+ callback is triggered. The real Read Copy Update code is a
+ little more optimized than this, but this is the fundamental
+ idea.
+ </para>
+
+<programlisting>
+--- cache.c.perobjectlock 2003-12-11 17:15:03.000000000 +1100
++++ cache.c.rcupdate 2003-12-11 17:55:14.000000000 +1100
+@@ -1,15 +1,18 @@
+ #include &lt;linux/list.h&gt;
+ #include &lt;linux/slab.h&gt;
+ #include &lt;linux/string.h&gt;
++#include &lt;linux/rcupdate.h&gt;
+ #include &lt;linux/mutex.h&gt;
+ #include &lt;asm/errno.h&gt;
+
+ struct object
+ {
+- /* These two protected by cache_lock. */
++ /* This is protected by RCU */
+ struct list_head list;
+ int popularity;
+
++ struct rcu_head rcu;
++
+ atomic_t refcnt;
+
+ /* Doesn't change once created. */
+@@ -40,7 +43,7 @@
+ {
+ struct object *i;
+
+- list_for_each_entry(i, &amp;cache, list) {
++ list_for_each_entry_rcu(i, &amp;cache, list) {
+ if (i-&gt;id == id) {
+ i-&gt;popularity++;
+ return i;
+@@ -49,19 +52,25 @@
+ return NULL;
+ }
+
++/* Final discard done once we know no readers are looking. */
++static void cache_delete_rcu(void *arg)
++{
++ object_put(arg);
++}
++
+ /* Must be holding cache_lock */
+ static void __cache_delete(struct object *obj)
+ {
+ BUG_ON(!obj);
+- list_del(&amp;obj-&gt;list);
+- object_put(obj);
++ list_del_rcu(&amp;obj-&gt;list);
+ cache_num--;
++ call_rcu(&amp;obj-&gt;rcu, cache_delete_rcu);
+ }
+
+ /* Must be holding cache_lock */
+ static void __cache_add(struct object *obj)
+ {
+- list_add(&amp;obj-&gt;list, &amp;cache);
++ list_add_rcu(&amp;obj-&gt;list, &amp;cache);
+ if (++cache_num > MAX_CACHE_SIZE) {
+ struct object *i, *outcast = NULL;
+ list_for_each_entry(i, &amp;cache, list) {
+@@ -104,12 +114,11 @@
+ struct object *cache_find(int id)
+ {
+ struct object *obj;
+- unsigned long flags;
+
+- spin_lock_irqsave(&amp;cache_lock, flags);
++ rcu_read_lock();
+ obj = __cache_find(id);
+ if (obj)
+ object_get(obj);
+- spin_unlock_irqrestore(&amp;cache_lock, flags);
++ rcu_read_unlock();
+ return obj;
+ }
+</programlisting>
+
+<para>
+Note that the reader will alter the
+<structfield>popularity</structfield> member in
+<function>__cache_find()</function>, and now it doesn't hold a lock.
+One solution would be to make it an <type>atomic_t</type>, but for
+this usage, we don't really care about races: an approximate result is
+good enough, so I didn't change it.
+</para>
+
+<para>
+The result is that <function>cache_find()</function> requires no
+synchronization with any other functions, so is almost as fast on SMP
+as it would be on UP.
+</para>
+
+<para>
+There is a further optimization possible here: remember our original
+cache code, where there were no reference counts and the caller simply
+held the lock whenever using the object? This is still possible: if
+you hold the lock, no one can delete the object, so you don't need to
+get and put the reference count.
+</para>
+
+<para>
+Now, because the 'read lock' in RCU is simply disabling preemption, a
+caller which always has preemption disabled between calling
+<function>cache_find()</function> and
+<function>object_put()</function> does not need to actually get and
+put the reference count: we could expose
+<function>__cache_find()</function> by making it non-static, and
+such callers could simply call that.
+</para>
+<para>
+The benefit here is that the reference count is not written to: the
+object is not altered in any way, which is much faster on SMP
+machines due to caching.
+</para>
+ </sect1>
+
+ <sect1 id="per-cpu">
+ <title>Per-CPU Data</title>
+
+ <para>
+ Another technique for avoiding locking which is used fairly
+ widely is to duplicate information for each CPU. For example,
+ if you wanted to keep a count of a common condition, you could
+ use a spin lock and a single counter. Nice and simple.
+ </para>
+
+ <para>
+ If that was too slow (it's usually not, but if you've got a
+ really big machine to test on and can show that it is), you
+ could instead use a counter for each CPU, then none of them need
+ an exclusive lock. See <function>DEFINE_PER_CPU()</function>,
+ <function>get_cpu_var()</function> and
+ <function>put_cpu_var()</function>
+ (<filename class="headerfile">include/linux/percpu.h</filename>).
+ </para>
+
+ <para>
+ Of particular use for simple per-cpu counters is the
+ <type>local_t</type> type, and the
+ <function>cpu_local_inc()</function> and related functions,
+ which are more efficient than simple code on some architectures
+ (<filename class="headerfile">include/asm/local.h</filename>).
+ </para>
+
+ <para>
+ Note that there is no simple, reliable way of getting an exact
+ value of such a counter, without introducing more locks. This
+ is not a problem for some uses.
+ </para>
+ </sect1>
+
+ <sect1 id="mostly-hardirq">
+ <title>Data Which Mostly Used By An IRQ Handler</title>
+
+ <para>
+ If data is always accessed from within the same IRQ handler, you
+ don't need a lock at all: the kernel already guarantees that the
+ irq handler will not run simultaneously on multiple CPUs.
+ </para>
+ <para>
+ Manfred Spraul points out that you can still do this, even if
+ the data is very occasionally accessed in user context or
+ softirqs/tasklets. The irq handler doesn't use a lock, and
+ all other accesses are done as so:
+ </para>
+
+<programlisting>
+ spin_lock(&amp;lock);
+ disable_irq(irq);
+ ...
+ enable_irq(irq);
+ spin_unlock(&amp;lock);
+</programlisting>
+ <para>
+ The <function>disable_irq()</function> prevents the irq handler
+ from running (and waits for it to finish if it's currently
+ running on other CPUs). The spinlock prevents any other
+ accesses happening at the same time. Naturally, this is slower
+ than just a <function>spin_lock_irq()</function> call, so it
+ only makes sense if this type of access happens extremely
+ rarely.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="sleeping-things">
+ <title>What Functions Are Safe To Call From Interrupts?</title>
+
+ <para>
+ Many functions in the kernel sleep (ie. call schedule())
+ directly or indirectly: you can never call them while holding a
+ spinlock, or with preemption disabled. This also means you need
+ to be in user context: calling them from an interrupt is illegal.
+ </para>
+
+ <sect1 id="sleeping">
+ <title>Some Functions Which Sleep</title>
+
+ <para>
+ The most common ones are listed below, but you usually have to
+ read the code to find out if other calls are safe. If everyone
+ else who calls it can sleep, you probably need to be able to
+ sleep, too. In particular, registration and deregistration
+ functions usually expect to be called from user context, and can
+ sleep.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Accesses to
+ <firstterm linkend="gloss-userspace">userspace</firstterm>:
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ <function>copy_from_user()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>copy_to_user()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>get_user()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>put_user()</function>
+ </para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <para>
+ <function>kmalloc(GFP_KERNEL)</function>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <function>mutex_lock_interruptible()</function> and
+ <function>mutex_lock()</function>
+ </para>
+ <para>
+ There is a <function>mutex_trylock()</function> which does not
+ sleep. Still, it must not be used inside interrupt context since
+ its implementation is not safe for that.
+ <function>mutex_unlock()</function> will also never sleep.
+ It cannot be used in interrupt context either since a mutex
+ must be released by the same task that acquired it.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+
+ <sect1 id="dont-sleep">
+ <title>Some Functions Which Don't Sleep</title>
+
+ <para>
+ Some functions are safe to call from any context, or holding
+ almost any lock.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ <function>printk()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>kfree()</function>
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ <function>add_timer()</function> and <function>del_timer()</function>
+ </para>
+ </listitem>
+ </itemizedlist>
+ </sect1>
+ </chapter>
+
+ <chapter id="apiref-mutex">
+ <title>Mutex API reference</title>
+!Iinclude/linux/mutex.h
+!Ekernel/locking/mutex.c
+ </chapter>
+
+ <chapter id="apiref-futex">
+ <title>Futex API reference</title>
+!Ikernel/futex.c
+ </chapter>
+
+ <chapter id="references">
+ <title>Further reading</title>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ <filename>Documentation/locking/spinlocks.txt</filename>:
+ Linus Torvalds' spinlocking tutorial in the kernel sources.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Unix Systems for Modern Architectures: Symmetric
+ Multiprocessing and Caching for Kernel Programmers:
+ </para>
+
+ <para>
+ Curt Schimmel's very good introduction to kernel level
+ locking (not written for Linux, but nearly everything
+ applies). The book is expensive, but really worth every
+ penny to understand SMP locking. [ISBN: 0201633388]
+ </para>
+ </listitem>
+ </itemizedlist>
+ </chapter>
+
+ <chapter id="thanks">
+ <title>Thanks</title>
+
+ <para>
+ Thanks to Telsa Gwynne for DocBooking, neatening and adding
+ style.
+ </para>
+
+ <para>
+ Thanks to Martin Pool, Philipp Rumpf, Stephen Rothwell, Paul
+ Mackerras, Ruedi Aschwanden, Alan Cox, Manfred Spraul, Tim
+ Waugh, Pete Zaitcev, James Morris, Robert Love, Paul McKenney,
+ John Ashby for proofreading, correcting, flaming, commenting.
+ </para>
+
+ <para>
+ Thanks to the cabal for having no influence on this document.
+ </para>
+ </chapter>
+
+ <glossary id="glossary">
+ <title>Glossary</title>
+
+ <glossentry id="gloss-preemption">
+ <glossterm>preemption</glossterm>
+ <glossdef>
+ <para>
+ Prior to 2.5, or when <symbol>CONFIG_PREEMPT</symbol> is
+ unset, processes in user context inside the kernel would not
+ preempt each other (ie. you had that CPU until you gave it up,
+ except for interrupts). With the addition of
+ <symbol>CONFIG_PREEMPT</symbol> in 2.5.4, this changed: when
+ in user context, higher priority tasks can "cut in": spinlocks
+ were changed to disable preemption, even on UP.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-bh">
+ <glossterm>bh</glossterm>
+ <glossdef>
+ <para>
+ Bottom Half: for historical reasons, functions with
+ '_bh' in them often now refer to any software interrupt, e.g.
+ <function>spin_lock_bh()</function> blocks any software interrupt
+ on the current CPU. Bottom halves are deprecated, and will
+ eventually be replaced by tasklets. Only one bottom half will be
+ running at any time.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-hwinterrupt">
+ <glossterm>Hardware Interrupt / Hardware IRQ</glossterm>
+ <glossdef>
+ <para>
+ Hardware interrupt request. <function>in_irq()</function> returns
+ <returnvalue>true</returnvalue> in a hardware interrupt handler.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-interruptcontext">
+ <glossterm>Interrupt Context</glossterm>
+ <glossdef>
+ <para>
+ Not user context: processing a hardware irq or software irq.
+ Indicated by the <function>in_interrupt()</function> macro
+ returning <returnvalue>true</returnvalue>.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-smp">
+ <glossterm><acronym>SMP</acronym></glossterm>
+ <glossdef>
+ <para>
+ Symmetric Multi-Processor: kernels compiled for multiple-CPU
+ machines. (CONFIG_SMP=y).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-softirq">
+ <glossterm>Software Interrupt / softirq</glossterm>
+ <glossdef>
+ <para>
+ Software interrupt handler. <function>in_irq()</function> returns
+ <returnvalue>false</returnvalue>; <function>in_softirq()</function>
+ returns <returnvalue>true</returnvalue>. Tasklets and softirqs
+ both fall into the category of 'software interrupts'.
+ </para>
+ <para>
+ Strictly speaking a softirq is one of up to 32 enumerated software
+ interrupts which can run on multiple CPUs at once.
+ Sometimes used to refer to tasklets as
+ well (ie. all software interrupts).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-tasklet">
+ <glossterm>tasklet</glossterm>
+ <glossdef>
+ <para>
+ A dynamically-registrable software interrupt,
+ which is guaranteed to only run on one CPU at a time.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-timers">
+ <glossterm>timer</glossterm>
+ <glossdef>
+ <para>
+ A dynamically-registrable software interrupt, which is run at
+ (or close to) a given time. When running, it is just like a
+ tasklet (in fact, they are called from the TIMER_SOFTIRQ).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-up">
+ <glossterm><acronym>UP</acronym></glossterm>
+ <glossdef>
+ <para>
+ Uni-Processor: Non-SMP. (CONFIG_SMP=n).
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-usercontext">
+ <glossterm>User Context</glossterm>
+ <glossdef>
+ <para>
+ The kernel executing on behalf of a particular process (ie. a
+ system call or trap) or kernel thread. You can tell which
+ process with the <symbol>current</symbol> macro.) Not to
+ be confused with userspace. Can be interrupted by software or
+ hardware interrupts.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry id="gloss-userspace">
+ <glossterm>Userspace</glossterm>
+ <glossdef>
+ <para>
+ A process executing its own code outside the kernel.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ </glossary>
+</book>
+
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
new file mode 100644
index 000000000..f3abca7ec
--- /dev/null
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -0,0 +1,918 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="kgdbOnLinux">
+ <bookinfo>
+ <title>Using kgdb, kdb and the kernel debugger internals</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jason</firstname>
+ <surname>Wessel</surname>
+ <affiliation>
+ <address>
+ <email>jason.wessel@windriver.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+ <copyright>
+ <year>2008,2010</year>
+ <holder>Wind River Systems, Inc.</holder>
+ </copyright>
+ <copyright>
+ <year>2004-2005</year>
+ <holder>MontaVista Software, Inc.</holder>
+ </copyright>
+ <copyright>
+ <year>2004</year>
+ <holder>Amit S. Kale</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This file is licensed under the terms of the GNU General Public License
+ version 2. This program is licensed "as is" without any warranty of any
+ kind, whether express or implied.
+ </para>
+
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+ <chapter id="Introduction">
+ <title>Introduction</title>
+ <para>
+ The kernel has two different debugger front ends (kdb and kgdb)
+ which interface to the debug core. It is possible to use either
+ of the debugger front ends and dynamically transition between them
+ if you configure the kernel properly at compile and runtime.
+ </para>
+ <para>
+ Kdb is simplistic shell-style interface which you can use on a
+ system console with a keyboard or serial console. You can use it
+ to inspect memory, registers, process lists, dmesg, and even set
+ breakpoints to stop in a certain location. Kdb is not a source
+ level debugger, although you can set breakpoints and execute some
+ basic kernel run control. Kdb is mainly aimed at doing some
+ analysis to aid in development or diagnosing kernel problems. You
+ can access some symbols by name in kernel built-ins or in kernel
+ modules if the code was built
+ with <symbol>CONFIG_KALLSYMS</symbol>.
+ </para>
+ <para>
+ Kgdb is intended to be used as a source level debugger for the
+ Linux kernel. It is used along with gdb to debug a Linux kernel.
+ The expectation is that gdb can be used to "break in" to the
+ kernel to inspect memory, variables and look through call stack
+ information similar to the way an application developer would use
+ gdb to debug an application. It is possible to place breakpoints
+ in kernel code and perform some limited execution stepping.
+ </para>
+ <para>
+ Two machines are required for using kgdb. One of these machines is
+ a development machine and the other is the target machine. The
+ kernel to be debugged runs on the target machine. The development
+ machine runs an instance of gdb against the vmlinux file which
+ contains the symbols (not a boot image such as bzImage, zImage,
+ uImage...). In gdb the developer specifies the connection
+ parameters and connects to kgdb. The type of connection a
+ developer makes with gdb depends on the availability of kgdb I/O
+ modules compiled as built-ins or loadable kernel modules in the test
+ machine's kernel.
+ </para>
+ </chapter>
+ <chapter id="CompilingAKernel">
+ <title>Compiling a kernel</title>
+ <para>
+ <itemizedlist>
+ <listitem><para>In order to enable compilation of kdb, you must first enable kgdb.</para></listitem>
+ <listitem><para>The kgdb test compile options are described in the kgdb test suite chapter.</para></listitem>
+ </itemizedlist>
+ </para>
+ <sect1 id="CompileKGDB">
+ <title>Kernel config options for kgdb</title>
+ <para>
+ To enable <symbol>CONFIG_KGDB</symbol> you should look under
+ "Kernel hacking" / "Kernel debugging" and select "KGDB: kernel debugger".
+ </para>
+ <para>
+ While it is not a hard requirement that you have symbols in your
+ vmlinux file, gdb tends not to be very useful without the symbolic
+ data, so you will want to turn
+ on <symbol>CONFIG_DEBUG_INFO</symbol> which is called "Compile the
+ kernel with debug info" in the config menu.
+ </para>
+ <para>
+ It is advised, but not required, that you turn on the
+ <symbol>CONFIG_FRAME_POINTER</symbol> kernel option which is called "Compile the
+ kernel with frame pointers" in the config menu. This option
+ inserts code to into the compiled executable which saves the frame
+ information in registers or on the stack at different points which
+ allows a debugger such as gdb to more accurately construct
+ stack back traces while debugging the kernel.
+ </para>
+ <para>
+ If the architecture that you are using supports the kernel option
+ CONFIG_DEBUG_RODATA, you should consider turning it off. This
+ option will prevent the use of software breakpoints because it
+ marks certain regions of the kernel's memory space as read-only.
+ If kgdb supports it for the architecture you are using, you can
+ use hardware breakpoints if you desire to run with the
+ CONFIG_DEBUG_RODATA option turned on, else you need to turn off
+ this option.
+ </para>
+ <para>
+ Next you should choose one of more I/O drivers to interconnect
+ debugging host and debugged target. Early boot debugging requires
+ a KGDB I/O driver that supports early debugging and the driver
+ must be built into the kernel directly. Kgdb I/O driver
+ configuration takes place via kernel or module parameters which
+ you can learn more about in the in the section that describes the
+ parameter "kgdboc".
+ </para>
+ <para>Here is an example set of .config symbols to enable or
+ disable for kgdb:
+ <itemizedlist>
+ <listitem><para># CONFIG_DEBUG_RODATA is not set</para></listitem>
+ <listitem><para>CONFIG_FRAME_POINTER=y</para></listitem>
+ <listitem><para>CONFIG_KGDB=y</para></listitem>
+ <listitem><para>CONFIG_KGDB_SERIAL_CONSOLE=y</para></listitem>
+ </itemizedlist>
+ </para>
+ </sect1>
+ <sect1 id="CompileKDB">
+ <title>Kernel config options for kdb</title>
+ <para>Kdb is quite a bit more complex than the simple gdbstub
+ sitting on top of the kernel's debug core. Kdb must implement a
+ shell, and also adds some helper functions in other parts of the
+ kernel, responsible for printing out interesting data such as what
+ you would see if you ran "lsmod", or "ps". In order to build kdb
+ into the kernel you follow the same steps as you would for kgdb.
+ </para>
+ <para>The main config option for kdb
+ is <symbol>CONFIG_KGDB_KDB</symbol> which is called "KGDB_KDB:
+ include kdb frontend for kgdb" in the config menu. In theory you
+ would have already also selected an I/O driver such as the
+ CONFIG_KGDB_SERIAL_CONSOLE interface if you plan on using kdb on a
+ serial port, when you were configuring kgdb.
+ </para>
+ <para>If you want to use a PS/2-style keyboard with kdb, you would
+ select CONFIG_KDB_KEYBOARD which is called "KGDB_KDB: keyboard as
+ input device" in the config menu. The CONFIG_KDB_KEYBOARD option
+ is not used for anything in the gdb interface to kgdb. The
+ CONFIG_KDB_KEYBOARD option only works with kdb.
+ </para>
+ <para>Here is an example set of .config symbols to enable/disable kdb:
+ <itemizedlist>
+ <listitem><para># CONFIG_DEBUG_RODATA is not set</para></listitem>
+ <listitem><para>CONFIG_FRAME_POINTER=y</para></listitem>
+ <listitem><para>CONFIG_KGDB=y</para></listitem>
+ <listitem><para>CONFIG_KGDB_SERIAL_CONSOLE=y</para></listitem>
+ <listitem><para>CONFIG_KGDB_KDB=y</para></listitem>
+ <listitem><para>CONFIG_KDB_KEYBOARD=y</para></listitem>
+ </itemizedlist>
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="kgdbKernelArgs">
+ <title>Kernel Debugger Boot Arguments</title>
+ <para>This section describes the various runtime kernel
+ parameters that affect the configuration of the kernel debugger.
+ The following chapter covers using kdb and kgdb as well as
+ providing some examples of the configuration parameters.</para>
+ <sect1 id="kgdboc">
+ <title>Kernel parameter: kgdboc</title>
+ <para>The kgdboc driver was originally an abbreviation meant to
+ stand for "kgdb over console". Today it is the primary mechanism
+ to configure how to communicate from gdb to kgdb as well as the
+ devices you want to use to interact with the kdb shell.
+ </para>
+ <para>For kgdb/gdb, kgdboc is designed to work with a single serial
+ port. It is intended to cover the circumstance where you want to
+ use a serial console as your primary console as well as using it to
+ perform kernel debugging. It is also possible to use kgdb on a
+ serial port which is not designated as a system console. Kgdboc
+ may be configured as a kernel built-in or a kernel loadable module.
+ You can only make use of <constant>kgdbwait</constant> and early
+ debugging if you build kgdboc into the kernel as a built-in.
+ </para>
+ <para>Optionally you can elect to activate kms (Kernel Mode
+ Setting) integration. When you use kms with kgdboc and you have a
+ video driver that has atomic mode setting hooks, it is possible to
+ enter the debugger on the graphics console. When the kernel
+ execution is resumed, the previous graphics mode will be restored.
+ This integration can serve as a useful tool to aid in diagnosing
+ crashes or doing analysis of memory with kdb while allowing the
+ full graphics console applications to run.
+ </para>
+ <sect2 id="kgdbocArgs">
+ <title>kgdboc arguments</title>
+ <para>Usage: <constant>kgdboc=[kms][[,]kbd][[,]serial_device][,baud]</constant></para>
+ <para>The order listed above must be observed if you use any of the
+ optional configurations together.
+ </para>
+ <para>Abbreviations:
+ <itemizedlist>
+ <listitem><para>kms = Kernel Mode Setting</para></listitem>
+ <listitem><para>kbd = Keyboard</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>You can configure kgdboc to use the keyboard, and/or a serial
+ device depending on if you are using kdb and/or kgdb, in one of the
+ following scenarios. The order listed above must be observed if
+ you use any of the optional configurations together. Using kms +
+ only gdb is generally not a useful combination.</para>
+ <sect3 id="kgdbocArgs1">
+ <title>Using loadable module or built-in</title>
+ <para>
+ <orderedlist>
+ <listitem><para>As a kernel built-in:</para>
+ <para>Use the kernel boot argument: <constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para></listitem>
+ <listitem>
+ <para>As a kernel loadable module:</para>
+ <para>Use the command: <constant>modprobe kgdboc kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
+ <para>Here are two examples of how you might format the kgdboc
+ string. The first is for an x86 target using the first serial port.
+ The second example is for the ARM Versatile AB using the second
+ serial port.
+ <orderedlist>
+ <listitem><para><constant>kgdboc=ttyS0,115200</constant></para></listitem>
+ <listitem><para><constant>kgdboc=ttyAMA1,115200</constant></para></listitem>
+ </orderedlist>
+ </para>
+ </listitem>
+ </orderedlist></para>
+ </sect3>
+ <sect3 id="kgdbocArgs2">
+ <title>Configure kgdboc at runtime with sysfs</title>
+ <para>At run time you can enable or disable kgdboc by echoing a
+ parameters into the sysfs. Here are two examples:</para>
+ <orderedlist>
+ <listitem><para>Enable kgdboc on ttyS0</para>
+ <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ <listitem><para>Disable kgdboc</para>
+ <para><constant>echo "" &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </orderedlist>
+ <para>NOTE: You do not need to specify the baud if you are
+ configuring the console on tty which is already configured or
+ open.</para>
+ </sect3>
+ <sect3 id="kgdbocArgs3">
+ <title>More examples</title>
+ <para>You can configure kgdboc to use the keyboard, and/or a serial device
+ depending on if you are using kdb and/or kgdb, in one of the
+ following scenarios.
+ <orderedlist>
+ <listitem><para>kdb and kgdb over only a serial port</para>
+ <para><constant>kgdboc=&lt;serial_device&gt;[,baud]</constant></para>
+ <para>Example: <constant>kgdboc=ttyS0,115200</constant></para>
+ </listitem>
+ <listitem><para>kdb and kgdb with keyboard and a serial port</para>
+ <para><constant>kgdboc=kbd,&lt;serial_device&gt;[,baud]</constant></para>
+ <para>Example: <constant>kgdboc=kbd,ttyS0,115200</constant></para>
+ </listitem>
+ <listitem><para>kdb with a keyboard</para>
+ <para><constant>kgdboc=kbd</constant></para>
+ </listitem>
+ <listitem><para>kdb with kernel mode setting</para>
+ <para><constant>kgdboc=kms,kbd</constant></para>
+ </listitem>
+ <listitem><para>kdb with kernel mode setting and kgdb over a serial port</para>
+ <para><constant>kgdboc=kms,kbd,ttyS0,115200</constant></para>
+ </listitem>
+ </orderedlist>
+ </para>
+ <para>NOTE: Kgdboc does not support interrupting the target via the
+ gdb remote protocol. You must manually send a sysrq-g unless you
+ have a proxy that splits console output to a terminal program.
+ A console proxy has a separate TCP port for the debugger and a separate
+ TCP port for the "human" console. The proxy can take care of sending
+ the sysrq-g for you.
+ </para>
+ <para>When using kgdboc with no debugger proxy, you can end up
+ connecting the debugger at one of two entry points. If an
+ exception occurs after you have loaded kgdboc, a message should
+ print on the console stating it is waiting for the debugger. In
+ this case you disconnect your terminal program and then connect the
+ debugger in its place. If you want to interrupt the target system
+ and forcibly enter a debug session you have to issue a Sysrq
+ sequence and then type the letter <constant>g</constant>. Then
+ you disconnect the terminal session and connect gdb. Your options
+ if you don't like this are to hack gdb to send the sysrq-g for you
+ as well as on the initial connect, or to use a debugger proxy that
+ allows an unmodified gdb to do the debugging.
+ </para>
+ </sect3>
+ </sect2>
+ </sect1>
+ <sect1 id="kgdbwait">
+ <title>Kernel parameter: kgdbwait</title>
+ <para>
+ The Kernel command line option <constant>kgdbwait</constant> makes
+ kgdb wait for a debugger connection during booting of a kernel. You
+ can only use this option if you compiled a kgdb I/O driver into the
+ kernel and you specified the I/O driver configuration as a kernel
+ command line option. The kgdbwait parameter should always follow the
+ configuration parameter for the kgdb I/O driver in the kernel
+ command line else the I/O driver will not be configured prior to
+ asking the kernel to use it to wait.
+ </para>
+ <para>
+ The kernel will stop and wait as early as the I/O driver and
+ architecture allows when you use this option. If you build the
+ kgdb I/O driver as a loadable kernel module kgdbwait will not do
+ anything.
+ </para>
+ </sect1>
+ <sect1 id="kgdbcon">
+ <title>Kernel parameter: kgdbcon</title>
+ <para> The kgdbcon feature allows you to see printk() messages
+ inside gdb while gdb is connected to the kernel. Kdb does not make
+ use of the kgdbcon feature.
+ </para>
+ <para>Kgdb supports using the gdb serial protocol to send console
+ messages to the debugger when the debugger is connected and running.
+ There are two ways to activate this feature.
+ <orderedlist>
+ <listitem><para>Activate with the kernel command line option:</para>
+ <para><constant>kgdbcon</constant></para>
+ </listitem>
+ <listitem><para>Use sysfs before configuring an I/O driver</para>
+ <para>
+ <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
+ </para>
+ <para>
+ NOTE: If you do this after you configure the kgdb I/O driver, the
+ setting will not take effect until the next point the I/O is
+ reconfigured.
+ </para>
+ </listitem>
+ </orderedlist>
+ </para>
+ <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an
+ active system console. An example of incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant>
+ </para>
+ <para>It is possible to use this option with kgdboc on a tty that is not a system console.
+ </para>
+ </sect1>
+ <sect1 id="kgdbreboot">
+ <title>Run time parameter: kgdbreboot</title>
+ <para> The kgdbreboot feature allows you to change how the debugger
+ deals with the reboot notification. You have 3 choices for the
+ behavior. The default behavior is always set to 0.</para>
+ <orderedlist>
+ <listitem><para>echo -1 > /sys/module/debug_core/parameters/kgdbreboot</para>
+ <para>Ignore the reboot notification entirely.</para>
+ </listitem>
+ <listitem><para>echo 0 > /sys/module/debug_core/parameters/kgdbreboot</para>
+ <para>Send the detach message to any attached debugger client.</para>
+ </listitem>
+ <listitem><para>echo 1 > /sys/module/debug_core/parameters/kgdbreboot</para>
+ <para>Enter the debugger on reboot notify.</para>
+ </listitem>
+ </orderedlist>
+ </sect1>
+ </chapter>
+ <chapter id="usingKDB">
+ <title>Using kdb</title>
+ <para>
+ </para>
+ <sect1 id="quickKDBserial">
+ <title>Quick start for kdb on a serial port</title>
+ <para>This is a quick example of how to use kdb.</para>
+ <para><orderedlist>
+ <listitem><para>Configure kgdboc at boot using kernel parameters:
+ <itemizedlist>
+ <listitem><para><constant>console=ttyS0,115200 kgdboc=ttyS0,115200</constant></para></listitem>
+ </itemizedlist></para>
+ <para>OR</para>
+ <para>Configure kgdboc after the kernel has booted; assuming you are using a serial port console:
+ <itemizedlist>
+ <listitem><para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>Enter the kernel debugger manually or by waiting for an oops or fault. There are several ways you can enter the kernel debugger manually; all involve using the sysrq-g, which means you must have enabled CONFIG_MAGIC_SYSRQ=y in your kernel config.</para>
+ <itemizedlist>
+ <listitem><para>When logged in as root or with a super user session you can run:</para>
+ <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+ <listitem><para>Example using minicom 2.2</para>
+ <para>Press: <constant>Control-a</constant></para>
+ <para>Press: <constant>f</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ <listitem><para>When you have telneted to a terminal server that supports sending a remote break</para>
+ <para>Press: <constant>Control-]</constant></para>
+ <para>Type in:<constant>send break</constant></para>
+ <para>Press: <constant>Enter</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem><para>From the kdb prompt you can run the "help" command to see a complete list of the commands that are available.</para>
+ <para>Some useful commands in kdb include:
+ <itemizedlist>
+ <listitem><para>lsmod -- Shows where kernel modules are loaded</para></listitem>
+ <listitem><para>ps -- Displays only the active processes</para></listitem>
+ <listitem><para>ps A -- Shows all the processes</para></listitem>
+ <listitem><para>summary -- Shows kernel version info and memory usage</para></listitem>
+ <listitem><para>bt -- Get a backtrace of the current process using dump_stack()</para></listitem>
+ <listitem><para>dmesg -- View the kernel syslog buffer</para></listitem>
+ <listitem><para>go -- Continue the system</para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem>
+ <para>When you are done using kdb you need to consider rebooting the
+ system or using the "go" command to resuming normal kernel
+ execution. If you have paused the kernel for a lengthy period of
+ time, applications that rely on timely networking or anything to do
+ with real wall clock time could be adversely affected, so you
+ should take this into consideration when using the kernel
+ debugger.</para>
+ </listitem>
+ </orderedlist></para>
+ </sect1>
+ <sect1 id="quickKDBkeyboard">
+ <title>Quick start for kdb using a keyboard connected console</title>
+ <para>This is a quick example of how to use kdb with a keyboard.</para>
+ <para><orderedlist>
+ <listitem><para>Configure kgdboc at boot using kernel parameters:
+ <itemizedlist>
+ <listitem><para><constant>kgdboc=kbd</constant></para></listitem>
+ </itemizedlist></para>
+ <para>OR</para>
+ <para>Configure kgdboc after the kernel has booted:
+ <itemizedlist>
+ <listitem><para><constant>echo kbd &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>Enter the kernel debugger manually or by waiting for an oops or fault. There are several ways you can enter the kernel debugger manually; all involve using the sysrq-g, which means you must have enabled CONFIG_MAGIC_SYSRQ=y in your kernel config.</para>
+ <itemizedlist>
+ <listitem><para>When logged in as root or with a super user session you can run:</para>
+ <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+ <listitem><para>Example using a laptop keyboard</para>
+ <para>Press and hold down: <constant>Alt</constant></para>
+ <para>Press and hold down: <constant>Fn</constant></para>
+ <para>Press and release the key with the label: <constant>SysRq</constant></para>
+ <para>Release: <constant>Fn</constant></para>
+ <para>Press and release: <constant>g</constant></para>
+ <para>Release: <constant>Alt</constant></para>
+ </listitem>
+ <listitem><para>Example using a PS/2 101-key keyboard</para>
+ <para>Press and hold down: <constant>Alt</constant></para>
+ <para>Press and release the key with the label: <constant>SysRq</constant></para>
+ <para>Press and release: <constant>g</constant></para>
+ <para>Release: <constant>Alt</constant></para>
+ </listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem>
+ <para>Now type in a kdb command such as "help", "dmesg", "bt" or "go" to continue kernel execution.</para>
+ </listitem>
+ </orderedlist></para>
+ </sect1>
+ </chapter>
+ <chapter id="EnableKGDB">
+ <title>Using kgdb / gdb</title>
+ <para>In order to use kgdb you must activate it by passing
+ configuration information to one of the kgdb I/O drivers. If you
+ do not pass any configuration information kgdb will not do anything
+ at all. Kgdb will only actively hook up to the kernel trap hooks
+ if a kgdb I/O driver is loaded and configured. If you unconfigure
+ a kgdb I/O driver, kgdb will unregister all the kernel hook points.
+ </para>
+ <para> All kgdb I/O drivers can be reconfigured at run time, if
+ <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
+ are enabled, by echo'ing a new config string to
+ <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
+ The driver can be unconfigured by passing an empty string. You cannot
+ change the configuration while the debugger is attached. Make sure
+ to detach the debugger with the <constant>detach</constant> command
+ prior to trying to unconfigure a kgdb I/O driver.
+ </para>
+ <sect1 id="ConnectingGDB">
+ <title>Connecting with gdb to a serial port</title>
+ <orderedlist>
+ <listitem><para>Configure kgdboc</para>
+ <para>Configure kgdboc at boot using kernel parameters:
+ <itemizedlist>
+ <listitem><para><constant>kgdboc=ttyS0,115200</constant></para></listitem>
+ </itemizedlist></para>
+ <para>OR</para>
+ <para>Configure kgdboc after the kernel has booted:
+ <itemizedlist>
+ <listitem><para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+ </itemizedlist></para>
+ </listitem>
+ <listitem>
+ <para>Stop kernel execution (break into the debugger)</para>
+ <para>In order to connect to gdb via kgdboc, the kernel must
+ first be stopped. There are several ways to stop the kernel which
+ include using kgdbwait as a boot argument, via a sysrq-g, or running
+ the kernel until it takes an exception where it waits for the
+ debugger to attach.
+ <itemizedlist>
+ <listitem><para>When logged in as root or with a super user session you can run:</para>
+ <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+ <listitem><para>Example using minicom 2.2</para>
+ <para>Press: <constant>Control-a</constant></para>
+ <para>Press: <constant>f</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ <listitem><para>When you have telneted to a terminal server that supports sending a remote break</para>
+ <para>Press: <constant>Control-]</constant></para>
+ <para>Type in:<constant>send break</constant></para>
+ <para>Press: <constant>Enter</constant></para>
+ <para>Press: <constant>g</constant></para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem>
+ <para>Connect from gdb</para>
+ <para>
+ Example (using a directly connected port):
+ </para>
+ <programlisting>
+ % gdb ./vmlinux
+ (gdb) set remotebaud 115200
+ (gdb) target remote /dev/ttyS0
+ </programlisting>
+ <para>
+ Example (kgdb to a terminal server on TCP port 2012):
+ </para>
+ <programlisting>
+ % gdb ./vmlinux
+ (gdb) target remote 192.168.2.2:2012
+ </programlisting>
+ <para>
+ Once connected, you can debug a kernel the way you would debug an
+ application program.
+ </para>
+ <para>
+ If you are having problems connecting or something is going
+ seriously wrong while debugging, it will most often be the case
+ that you want to enable gdb to be verbose about its target
+ communications. You do this prior to issuing the <constant>target
+ remote</constant> command by typing in: <constant>set debug remote 1</constant>
+ </para>
+ </listitem>
+ </orderedlist>
+ <para>Remember if you continue in gdb, and need to "break in" again,
+ you need to issue an other sysrq-g. It is easy to create a simple
+ entry point by putting a breakpoint at <constant>sys_sync</constant>
+ and then you can run "sync" from a shell or script to break into the
+ debugger.</para>
+ </sect1>
+ </chapter>
+ <chapter id="switchKdbKgdb">
+ <title>kgdb and kdb interoperability</title>
+ <para>It is possible to transition between kdb and kgdb dynamically.
+ The debug core will remember which you used the last time and
+ automatically start in the same mode.</para>
+ <sect1>
+ <title>Switching between kdb and kgdb</title>
+ <sect2>
+ <title>Switching from kgdb to kdb</title>
+ <para>
+ There are two ways to switch from kgdb to kdb: you can use gdb to
+ issue a maintenance packet, or you can blindly type the command $3#33.
+ Whenever the kernel debugger stops in kgdb mode it will print the
+ message <constant>KGDB or $3#33 for KDB</constant>. It is important
+ to note that you have to type the sequence correctly in one pass.
+ You cannot type a backspace or delete because kgdb will interpret
+ that as part of the debug stream.
+ <orderedlist>
+ <listitem><para>Change from kgdb to kdb by blindly typing:</para>
+ <para><constant>$3#33</constant></para></listitem>
+ <listitem><para>Change from kgdb to kdb with gdb</para>
+ <para><constant>maintenance packet 3</constant></para>
+ <para>NOTE: Now you must kill gdb. Typically you press control-z and
+ issue the command: kill -9 %</para></listitem>
+ </orderedlist>
+ </para>
+ </sect2>
+ <sect2>
+ <title>Change from kdb to kgdb</title>
+ <para>There are two ways you can change from kdb to kgdb. You can
+ manually enter kgdb mode by issuing the kgdb command from the kdb
+ shell prompt, or you can connect gdb while the kdb shell prompt is
+ active. The kdb shell looks for the typical first commands that gdb
+ would issue with the gdb remote protocol and if it sees one of those
+ commands it automatically changes into kgdb mode.</para>
+ <orderedlist>
+ <listitem><para>From kdb issue the command:</para>
+ <para><constant>kgdb</constant></para>
+ <para>Now disconnect your terminal program and connect gdb in its place</para></listitem>
+ <listitem><para>At the kdb prompt, disconnect the terminal program and connect gdb in its place.</para></listitem>
+ </orderedlist>
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>Running kdb commands from gdb</title>
+ <para>It is possible to run a limited set of kdb commands from gdb,
+ using the gdb monitor command. You don't want to execute any of the
+ run control or breakpoint operations, because it can disrupt the
+ state of the kernel debugger. You should be using gdb for
+ breakpoints and run control operations if you have gdb connected.
+ The more useful commands to run are things like lsmod, dmesg, ps or
+ possibly some of the memory information commands. To see all the kdb
+ commands you can run <constant>monitor help</constant>.</para>
+ <para>Example:
+ <informalexample><programlisting>
+(gdb) monitor ps
+1 idle process (state I) and
+27 sleeping system daemon (state M) processes suppressed,
+use 'ps A' to see all.
+Task Addr Pid Parent [*] cpu State Thread Command
+
+0xc78291d0 1 0 0 0 S 0xc7829404 init
+0xc7954150 942 1 0 0 S 0xc7954384 dropbear
+0xc78789c0 944 1 0 0 S 0xc7878bf4 sh
+(gdb)
+ </programlisting></informalexample>
+ </para>
+ </sect1>
+ </chapter>
+ <chapter id="KGDBTestSuite">
+ <title>kgdb Test Suite</title>
+ <para>
+ When kgdb is enabled in the kernel config you can also elect to
+ enable the config parameter KGDB_TESTS. Turning this on will
+ enable a special kgdb I/O module which is designed to test the
+ kgdb internal functions.
+ </para>
+ <para>
+ The kgdb tests are mainly intended for developers to test the kgdb
+ internals as well as a tool for developing a new kgdb architecture
+ specific implementation. These tests are not really for end users
+ of the Linux kernel. The primary source of documentation would be
+ to look in the drivers/misc/kgdbts.c file.
+ </para>
+ <para>
+ The kgdb test suite can also be configured at compile time to run
+ the core set of tests by setting the kernel config parameter
+ KGDB_TESTS_ON_BOOT. This particular option is aimed at automated
+ regression testing and does not require modifying the kernel boot
+ config arguments. If this is turned on, the kgdb test suite can
+ be disabled by specifying "kgdbts=" as a kernel boot argument.
+ </para>
+ </chapter>
+ <chapter id="CommonBackEndReq">
+ <title>Kernel Debugger Internals</title>
+ <sect1 id="kgdbArchitecture">
+ <title>Architecture Specifics</title>
+ <para>
+ The kernel debugger is organized into a number of components:
+ <orderedlist>
+ <listitem><para>The debug core</para>
+ <para>
+ The debug core is found in kernel/debugger/debug_core.c. It contains:
+ <itemizedlist>
+ <listitem><para>A generic OS exception handler which includes
+ sync'ing the processors into a stopped state on an multi-CPU
+ system.</para></listitem>
+ <listitem><para>The API to talk to the kgdb I/O drivers</para></listitem>
+ <listitem><para>The API to make calls to the arch-specific kgdb implementation</para></listitem>
+ <listitem><para>The logic to perform safe memory reads and writes to memory while using the debugger</para></listitem>
+ <listitem><para>A full implementation for software breakpoints unless overridden by the arch</para></listitem>
+ <listitem><para>The API to invoke either the kdb or kgdb frontend to the debug core.</para></listitem>
+ <listitem><para>The structures and callback API for atomic kernel mode setting.</para>
+ <para>NOTE: kgdboc is where the kms callbacks are invoked.</para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>kgdb arch-specific implementation</para>
+ <para>
+ This implementation is generally found in arch/*/kernel/kgdb.c.
+ As an example, arch/x86/kernel/kgdb.c contains the specifics to
+ implement HW breakpoint as well as the initialization to
+ dynamically register and unregister for the trap handlers on
+ this architecture. The arch-specific portion implements:
+ <itemizedlist>
+ <listitem><para>contains an arch-specific trap catcher which
+ invokes kgdb_handle_exception() to start kgdb about doing its
+ work</para></listitem>
+ <listitem><para>translation to and from gdb specific packet format to pt_regs</para></listitem>
+ <listitem><para>Registration and unregistration of architecture specific trap hooks</para></listitem>
+ <listitem><para>Any special exception handling and cleanup</para></listitem>
+ <listitem><para>NMI exception handling and cleanup</para></listitem>
+ <listitem><para>(optional) HW breakpoints</para></listitem>
+ </itemizedlist>
+ </para>
+ </listitem>
+ <listitem><para>gdbstub frontend (aka kgdb)</para>
+ <para>The gdbstub is located in kernel/debug/gdbstub.c. It contains:</para>
+ <itemizedlist>
+ <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem><para>kdb frontend</para>
+ <para>The kdb debugger shell is broken down into a number of
+ components. The kdb core is located in kernel/debug/kdb. There
+ are a number of helper functions in some of the other kernel
+ components to make it possible for kdb to examine and report
+ information about the kernel without taking locks that could
+ cause a kernel deadlock. The kdb core contains implements the following functionality.</para>
+ <itemizedlist>
+ <listitem><para>A simple shell</para></listitem>
+ <listitem><para>The kdb core command set</para></listitem>
+ <listitem><para>A registration API to register additional kdb shell commands.</para>
+ <itemizedlist>
+ <listitem><para>A good example of a self-contained kdb module
+ is the "ftdump" command for dumping the ftrace buffer. See:
+ kernel/trace/trace_kdb.c</para></listitem>
+ <listitem><para>For an example of how to dynamically register
+ a new kdb command you can build the kdb_hello.ko kernel module
+ from samples/kdb/kdb_hello.c. To build this example you can
+ set CONFIG_SAMPLES=y and CONFIG_SAMPLE_KDB=m in your kernel
+ config. Later run "modprobe kdb_hello" and the next time you
+ enter the kdb shell, you can run the "hello"
+ command.</para></listitem>
+ </itemizedlist></listitem>
+ <listitem><para>The implementation for kdb_printf() which
+ emits messages directly to I/O drivers, bypassing the kernel
+ log.</para></listitem>
+ <listitem><para>SW / HW breakpoint management for the kdb shell</para></listitem>
+ </itemizedlist>
+ </listitem>
+ <listitem><para>kgdb I/O driver</para>
+ <para>
+ Each kgdb I/O driver has to provide an implementation for the following:
+ <itemizedlist>
+ <listitem><para>configuration via built-in or module</para></listitem>
+ <listitem><para>dynamic configuration and kgdb hook registration calls</para></listitem>
+ <listitem><para>read and write character interface</para></listitem>
+ <listitem><para>A cleanup handler for unconfiguring from the kgdb core</para></listitem>
+ <listitem><para>(optional) Early debug methodology</para></listitem>
+ </itemizedlist>
+ Any given kgdb I/O driver has to operate very closely with the
+ hardware and must do it in such a way that does not enable
+ interrupts or change other parts of the system context without
+ completely restoring them. The kgdb core will repeatedly "poll"
+ a kgdb I/O driver for characters when it needs input. The I/O
+ driver is expected to return immediately if there is no data
+ available. Doing so allows for the future possibility to touch
+ watchdog hardware in such a way as to have a target system not
+ reset when these are enabled.
+ </para>
+ </listitem>
+ </orderedlist>
+ </para>
+ <para>
+ If you are intent on adding kgdb architecture specific support
+ for a new architecture, the architecture should define
+ <constant>HAVE_ARCH_KGDB</constant> in the architecture specific
+ Kconfig file. This will enable kgdb for the architecture, and
+ at that point you must create an architecture specific kgdb
+ implementation.
+ </para>
+ <para>
+ There are a few flags which must be set on every architecture in
+ their &lt;asm/kgdb.h&gt; file. These are:
+ <itemizedlist>
+ <listitem>
+ <para>
+ NUMREGBYTES: The size in bytes of all of the registers, so
+ that we can ensure they will all fit into a packet.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ BUFMAX: The size in bytes of the buffer GDB will read into.
+ This must be larger than NUMREGBYTES.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ CACHE_FLUSH_IS_SAFE: Set to 1 if it is always safe to call
+ flush_cache_range or flush_icache_range. On some architectures,
+ these functions may not be safe to call on SMP since we keep other
+ CPUs in a holding pattern.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ There are also the following functions for the common backend,
+ found in kernel/kgdb.c, that must be supplied by the
+ architecture-specific backend unless marked as (optional), in
+ which case a default function maybe used if the architecture
+ does not need to provide a specific implementation.
+ </para>
+!Iinclude/linux/kgdb.h
+ </sect1>
+ <sect1 id="kgdbocDesign">
+ <title>kgdboc internals</title>
+ <sect2>
+ <title>kgdboc and uarts</title>
+ <para>
+ The kgdboc driver is actually a very thin driver that relies on the
+ underlying low level to the hardware driver having "polling hooks"
+ to which the tty driver is attached. In the initial
+ implementation of kgdboc the serial_core was changed to expose a
+ low level UART hook for doing polled mode reading and writing of a
+ single character while in an atomic context. When kgdb makes an I/O
+ request to the debugger, kgdboc invokes a callback in the serial
+ core which in turn uses the callback in the UART driver.</para>
+ <para>
+ When using kgdboc with a UART, the UART driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
+#ifdef CONFIG_CONSOLE_POLL
+ .poll_get_char = serial8250_get_poll_char,
+ .poll_put_char = serial8250_put_poll_char,
+#endif
+ </programlisting>
+ Any implementation specifics around creating a polling driver use the
+ <constant>#ifdef CONFIG_CONSOLE_POLL</constant>, as shown above.
+ Keep in mind that polling hooks have to be implemented in such a way
+ that they can be called from an atomic context and have to restore
+ the state of the UART chip on return such that the system can return
+ to normal when the debugger detaches. You need to be very careful
+ with any kind of lock you consider, because failing here is most likely
+ going to mean pressing the reset button.
+ </para>
+ </sect2>
+ <sect2 id="kgdbocKbd">
+ <title>kgdboc and keyboards</title>
+ <para>The kgdboc driver contains logic to configure communications
+ with an attached keyboard. The keyboard infrastructure is only
+ compiled into the kernel when CONFIG_KDB_KEYBOARD=y is set in the
+ kernel configuration.</para>
+ <para>The core polled keyboard driver driver for PS/2 type keyboards
+ is in drivers/char/kdb_keyboard.c. This driver is hooked into the
+ debug core when kgdboc populates the callback in the array
+ called <constant>kdb_poll_funcs[]</constant>. The
+ kdb_get_kbd_char() is the top-level function which polls hardware
+ for single character input.
+ </para>
+ </sect2>
+ <sect2 id="kgdbocKms">
+ <title>kgdboc and kms</title>
+ <para>The kgdboc driver contains logic to request the graphics
+ display to switch to a text context when you are using
+ "kgdboc=kms,kbd", provided that you have a video driver which has a
+ frame buffer console and atomic kernel mode setting support.</para>
+ <para>
+ Every time the kernel
+ debugger is entered it calls kgdboc_pre_exp_handler() which in turn
+ calls con_debug_enter() in the virtual console layer. On resuming kernel
+ execution, the kernel debugger calls kgdboc_post_exp_handler() which
+ in turn calls con_debug_leave().</para>
+ <para>Any video driver that wants to be compatible with the kernel
+ debugger and the atomic kms callbacks must implement the
+ mode_set_base_atomic, fb_debug_enter and fb_debug_leave operations.
+ For the fb_debug_enter and fb_debug_leave the option exists to use
+ the generic drm fb helper functions or implement something custom for
+ the hardware. The following example shows the initialization of the
+ .mode_set_base_atomic operation in
+ drivers/gpu/drm/i915/intel_display.c:
+ <informalexample>
+ <programlisting>
+static const struct drm_crtc_helper_funcs intel_helper_funcs = {
+[...]
+ .mode_set_base_atomic = intel_pipe_set_base_atomic,
+[...]
+};
+ </programlisting>
+ </informalexample>
+ </para>
+ <para>Here is an example of how the i915 driver initializes the fb_debug_enter and fb_debug_leave functions to use the generic drm helpers in
+ drivers/gpu/drm/i915/intel_fb.c:
+ <informalexample>
+ <programlisting>
+static struct fb_ops intelfb_ops = {
+[...]
+ .fb_debug_enter = drm_fb_helper_debug_enter,
+ .fb_debug_leave = drm_fb_helper_debug_leave,
+[...]
+};
+ </programlisting>
+ </informalexample>
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The following people have contributed to this document:
+ <orderedlist>
+ <listitem><para>Amit Kale<email>amitkale@linsyssoft.com</email></para></listitem>
+ <listitem><para>Tom Rini<email>trini@kernel.crashing.org</email></para></listitem>
+ </orderedlist>
+ In March 2008 this document was completely rewritten by:
+ <itemizedlist>
+ <listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+ </itemizedlist>
+ In Jan 2010 this document was updated to include kdb.
+ <itemizedlist>
+ <listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+ </itemizedlist>
+ </para>
+ </chapter>
+</book>
+
diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
new file mode 100644
index 000000000..d7fcdc5a4
--- /dev/null
+++ b/Documentation/DocBook/libata.tmpl
@@ -0,0 +1,1625 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="libataDevGuide">
+ <bookinfo>
+ <title>libATA Developer's Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jeff</firstname>
+ <surname>Garzik</surname>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2003-2006</year>
+ <holder>Jeff Garzik</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ The contents of this file are subject to the Open
+ Software License version 1.1 that can be found at
+ <ulink url="http://fedoraproject.org/wiki/Licensing:OSL1.1">http://fedoraproject.org/wiki/Licensing:OSL1.1</ulink>
+ and is included herein by reference.
+ </para>
+
+ <para>
+ Alternatively, the contents of this file may be used under the terms
+ of the GNU General Public License version 2 (the "GPL") as distributed
+ in the kernel source COPYING file, in which case the provisions of
+ the GPL are applicable instead of the above. If you wish to allow
+ the use of your version of this file only under the terms of the
+ GPL and not to allow others to use your version of this file under
+ the OSL, indicate your decision by deleting the provisions above and
+ replace them with the notice and other provisions required by the GPL.
+ If you do not delete the provisions above, a recipient may use your
+ version of this file under either the OSL or the GPL.
+ </para>
+
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="libataIntroduction">
+ <title>Introduction</title>
+ <para>
+ libATA is a library used inside the Linux kernel to support ATA host
+ controllers and devices. libATA provides an ATA driver API, class
+ transports for ATA and ATAPI devices, and SCSI&lt;-&gt;ATA translation
+ for ATA devices according to the T10 SAT specification.
+ </para>
+ <para>
+ This Guide documents the libATA driver API, library functions, library
+ internals, and a couple sample ATA low-level drivers.
+ </para>
+ </chapter>
+
+ <chapter id="libataDriverApi">
+ <title>libata Driver API</title>
+ <para>
+ struct ata_port_operations is defined for every low-level libata
+ hardware driver, and it controls how the low-level driver
+ interfaces with the ATA and SCSI layers.
+ </para>
+ <para>
+ FIS-based drivers will hook into the system with ->qc_prep() and
+ ->qc_issue() high-level hooks. Hardware which behaves in a manner
+ similar to PCI IDE hardware may utilize several generic helpers,
+ defining at a bare minimum the bus I/O addresses of the ATA shadow
+ register blocks.
+ </para>
+ <sect1>
+ <title>struct ata_port_operations</title>
+
+ <sect2><title>Disable ATA port</title>
+ <programlisting>
+void (*port_disable) (struct ata_port *);
+ </programlisting>
+
+ <para>
+ Called from ata_bus_probe() error path, as well as when
+ unregistering from the SCSI module (rmmod, hot unplug).
+ This function should do whatever needs to be done to take the
+ port out of use. In most cases, ata_port_disable() can be used
+ as this hook.
+ </para>
+ <para>
+ Called from ata_bus_probe() on a failed probe.
+ Called from ata_scsi_release().
+ </para>
+
+ </sect2>
+
+ <sect2><title>Post-IDENTIFY device configuration</title>
+ <programlisting>
+void (*dev_config) (struct ata_port *, struct ata_device *);
+ </programlisting>
+
+ <para>
+ Called after IDENTIFY [PACKET] DEVICE is issued to each device
+ found. Typically used to apply device-specific fixups prior to
+ issue of SET FEATURES - XFER MODE, and prior to operation.
+ </para>
+ <para>
+ This entry may be specified as NULL in ata_port_operations.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Set PIO/DMA mode</title>
+ <programlisting>
+void (*set_piomode) (struct ata_port *, struct ata_device *);
+void (*set_dmamode) (struct ata_port *, struct ata_device *);
+void (*post_set_mode) (struct ata_port *);
+unsigned int (*mode_filter) (struct ata_port *, struct ata_device *, unsigned int);
+ </programlisting>
+
+ <para>
+ Hooks called prior to the issue of SET FEATURES - XFER MODE
+ command. The optional ->mode_filter() hook is called when libata
+ has built a mask of the possible modes. This is passed to the
+ ->mode_filter() function which should return a mask of valid modes
+ after filtering those unsuitable due to hardware limits. It is not
+ valid to use this interface to add modes.
+ </para>
+ <para>
+ dev->pio_mode and dev->dma_mode are guaranteed to be valid when
+ ->set_piomode() and when ->set_dmamode() is called. The timings for
+ any other drive sharing the cable will also be valid at this point.
+ That is the library records the decisions for the modes of each
+ drive on a channel before it attempts to set any of them.
+ </para>
+ <para>
+ ->post_set_mode() is
+ called unconditionally, after the SET FEATURES - XFER MODE
+ command completes successfully.
+ </para>
+
+ <para>
+ ->set_piomode() is always called (if present), but
+ ->set_dma_mode() is only called if DMA is possible.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Taskfile read/write</title>
+ <programlisting>
+void (*sff_tf_load) (struct ata_port *ap, struct ata_taskfile *tf);
+void (*sff_tf_read) (struct ata_port *ap, struct ata_taskfile *tf);
+ </programlisting>
+
+ <para>
+ ->tf_load() is called to load the given taskfile into hardware
+ registers / DMA buffers. ->tf_read() is called to read the
+ hardware registers / DMA buffers, to obtain the current set of
+ taskfile register values.
+ Most drivers for taskfile-based hardware (PIO or MMIO) use
+ ata_sff_tf_load() and ata_sff_tf_read() for these hooks.
+ </para>
+
+ </sect2>
+
+ <sect2><title>PIO data read/write</title>
+ <programlisting>
+void (*sff_data_xfer) (struct ata_device *, unsigned char *, unsigned int, int);
+ </programlisting>
+
+ <para>
+All bmdma-style drivers must implement this hook. This is the low-level
+operation that actually copies the data bytes during a PIO data
+transfer.
+Typically the driver will choose one of ata_sff_data_xfer_noirq(),
+ata_sff_data_xfer(), or ata_sff_data_xfer32().
+ </para>
+
+ </sect2>
+
+ <sect2><title>ATA command execute</title>
+ <programlisting>
+void (*sff_exec_command)(struct ata_port *ap, struct ata_taskfile *tf);
+ </programlisting>
+
+ <para>
+ causes an ATA command, previously loaded with
+ ->tf_load(), to be initiated in hardware.
+ Most drivers for taskfile-based hardware use ata_sff_exec_command()
+ for this hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Per-cmd ATAPI DMA capabilities filter</title>
+ <programlisting>
+int (*check_atapi_dma) (struct ata_queued_cmd *qc);
+ </programlisting>
+
+ <para>
+Allow low-level driver to filter ATA PACKET commands, returning a status
+indicating whether or not it is OK to use DMA for the supplied PACKET
+command.
+ </para>
+ <para>
+ This hook may be specified as NULL, in which case libata will
+ assume that atapi dma can be supported.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Read specific ATA shadow registers</title>
+ <programlisting>
+u8 (*sff_check_status)(struct ata_port *ap);
+u8 (*sff_check_altstatus)(struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ Reads the Status/AltStatus ATA shadow register from
+ hardware. On some hardware, reading the Status register has
+ the side effect of clearing the interrupt condition.
+ Most drivers for taskfile-based hardware use
+ ata_sff_check_status() for this hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Write specific ATA shadow register</title>
+ <programlisting>
+void (*sff_set_devctl)(struct ata_port *ap, u8 ctl);
+ </programlisting>
+
+ <para>
+ Write the device control ATA shadow register to the hardware.
+ Most drivers don't need to define this.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Select ATA device on bus</title>
+ <programlisting>
+void (*sff_dev_select)(struct ata_port *ap, unsigned int device);
+ </programlisting>
+
+ <para>
+ Issues the low-level hardware command(s) that causes one of N
+ hardware devices to be considered 'selected' (active and
+ available for use) on the ATA bus. This generally has no
+ meaning on FIS-based devices.
+ </para>
+ <para>
+ Most drivers for taskfile-based hardware use
+ ata_sff_dev_select() for this hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Private tuning method</title>
+ <programlisting>
+void (*set_mode) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ By default libata performs drive and controller tuning in
+ accordance with the ATA timing rules and also applies blacklists
+ and cable limits. Some controllers need special handling and have
+ custom tuning rules, typically raid controllers that use ATA
+ commands but do not actually do drive timing.
+ </para>
+
+ <warning>
+ <para>
+ This hook should not be used to replace the standard controller
+ tuning logic when a controller has quirks. Replacing the default
+ tuning logic in that case would bypass handling for drive and
+ bridge quirks that may be important to data reliability. If a
+ controller needs to filter the mode selection it should use the
+ mode_filter hook instead.
+ </para>
+ </warning>
+
+ </sect2>
+
+ <sect2><title>Control PCI IDE BMDMA engine</title>
+ <programlisting>
+void (*bmdma_setup) (struct ata_queued_cmd *qc);
+void (*bmdma_start) (struct ata_queued_cmd *qc);
+void (*bmdma_stop) (struct ata_port *ap);
+u8 (*bmdma_status) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+When setting up an IDE BMDMA transaction, these hooks arm
+(->bmdma_setup), fire (->bmdma_start), and halt (->bmdma_stop)
+the hardware's DMA engine. ->bmdma_status is used to read the standard
+PCI IDE DMA Status register.
+ </para>
+
+ <para>
+These hooks are typically either no-ops, or simply not implemented, in
+FIS-based drivers.
+ </para>
+ <para>
+Most legacy IDE drivers use ata_bmdma_setup() for the bmdma_setup()
+hook. ata_bmdma_setup() will write the pointer to the PRD table to
+the IDE PRD Table Address register, enable DMA in the DMA Command
+register, and call exec_command() to begin the transfer.
+ </para>
+ <para>
+Most legacy IDE drivers use ata_bmdma_start() for the bmdma_start()
+hook. ata_bmdma_start() will write the ATA_DMA_START flag to the DMA
+Command register.
+ </para>
+ <para>
+Many legacy IDE drivers use ata_bmdma_stop() for the bmdma_stop()
+hook. ata_bmdma_stop() clears the ATA_DMA_START flag in the DMA
+command register.
+ </para>
+ <para>
+Many legacy IDE drivers use ata_bmdma_status() as the bmdma_status() hook.
+ </para>
+
+ </sect2>
+
+ <sect2><title>High-level taskfile hooks</title>
+ <programlisting>
+void (*qc_prep) (struct ata_queued_cmd *qc);
+int (*qc_issue) (struct ata_queued_cmd *qc);
+ </programlisting>
+
+ <para>
+ Higher-level hooks, these two hooks can potentially supercede
+ several of the above taskfile/DMA engine hooks. ->qc_prep is
+ called after the buffers have been DMA-mapped, and is typically
+ used to populate the hardware's DMA scatter-gather table.
+ Most drivers use the standard ata_qc_prep() helper function, but
+ more advanced drivers roll their own.
+ </para>
+ <para>
+ ->qc_issue is used to make a command active, once the hardware
+ and S/G tables have been prepared. IDE BMDMA drivers use the
+ helper function ata_qc_issue_prot() for taskfile protocol-based
+ dispatch. More advanced drivers implement their own ->qc_issue.
+ </para>
+ <para>
+ ata_qc_issue_prot() calls ->tf_load(), ->bmdma_setup(), and
+ ->bmdma_start() as necessary to initiate a transfer.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Exception and probe handling (EH)</title>
+ <programlisting>
+void (*eng_timeout) (struct ata_port *ap);
+void (*phy_reset) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+Deprecated. Use ->error_handler() instead.
+ </para>
+
+ <programlisting>
+void (*freeze) (struct ata_port *ap);
+void (*thaw) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+ata_port_freeze() is called when HSM violations or some other
+condition disrupts normal operation of the port. A frozen port
+is not allowed to perform any operation until the port is
+thawed, which usually follows a successful reset.
+ </para>
+
+ <para>
+The optional ->freeze() callback can be used for freezing the port
+hardware-wise (e.g. mask interrupt and stop DMA engine). If a
+port cannot be frozen hardware-wise, the interrupt handler
+must ack and clear interrupts unconditionally while the port
+is frozen.
+ </para>
+ <para>
+The optional ->thaw() callback is called to perform the opposite of ->freeze():
+prepare the port for normal operation once again. Unmask interrupts,
+start DMA engine, etc.
+ </para>
+
+ <programlisting>
+void (*error_handler) (struct ata_port *ap);
+ </programlisting>
+
+ <para>
+->error_handler() is a driver's hook into probe, hotplug, and recovery
+and other exceptional conditions. The primary responsibility of an
+implementation is to call ata_do_eh() or ata_bmdma_drive_eh() with a set
+of EH hooks as arguments:
+ </para>
+
+ <para>
+'prereset' hook (may be NULL) is called during an EH reset, before any other actions
+are taken.
+ </para>
+
+ <para>
+'postreset' hook (may be NULL) is called after the EH reset is performed. Based on
+existing conditions, severity of the problem, and hardware capabilities,
+ </para>
+
+ <para>
+Either 'softreset' (may be NULL) or 'hardreset' (may be NULL) will be
+called to perform the low-level EH reset.
+ </para>
+
+ <programlisting>
+void (*post_internal_cmd) (struct ata_queued_cmd *qc);
+ </programlisting>
+
+ <para>
+Perform any hardware-specific actions necessary to finish processing
+after executing a probe-time or EH-time command via ata_exec_internal().
+ </para>
+
+ </sect2>
+
+ <sect2><title>Hardware interrupt handling</title>
+ <programlisting>
+irqreturn_t (*irq_handler)(int, void *, struct pt_regs *);
+void (*irq_clear) (struct ata_port *);
+ </programlisting>
+
+ <para>
+ ->irq_handler is the interrupt handling routine registered with
+ the system, by libata. ->irq_clear is called during probe just
+ before the interrupt handler is registered, to be sure hardware
+ is quiet.
+ </para>
+ <para>
+ The second argument, dev_instance, should be cast to a pointer
+ to struct ata_host_set.
+ </para>
+ <para>
+ Most legacy IDE drivers use ata_sff_interrupt() for the
+ irq_handler hook, which scans all ports in the host_set,
+ determines which queued command was active (if any), and calls
+ ata_sff_host_intr(ap,qc).
+ </para>
+ <para>
+ Most legacy IDE drivers use ata_sff_irq_clear() for the
+ irq_clear() hook, which simply clears the interrupt and error
+ flags in the DMA status register.
+ </para>
+
+ </sect2>
+
+ <sect2><title>SATA phy read/write</title>
+ <programlisting>
+int (*scr_read) (struct ata_port *ap, unsigned int sc_reg,
+ u32 *val);
+int (*scr_write) (struct ata_port *ap, unsigned int sc_reg,
+ u32 val);
+ </programlisting>
+
+ <para>
+ Read and write standard SATA phy registers. Currently only used
+ if ->phy_reset hook called the sata_phy_reset() helper function.
+ sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE.
+ </para>
+
+ </sect2>
+
+ <sect2><title>Init and shutdown</title>
+ <programlisting>
+int (*port_start) (struct ata_port *ap);
+void (*port_stop) (struct ata_port *ap);
+void (*host_stop) (struct ata_host_set *host_set);
+ </programlisting>
+
+ <para>
+ ->port_start() is called just after the data structures for each
+ port are initialized. Typically this is used to alloc per-port
+ DMA buffers / tables / rings, enable DMA engines, and similar
+ tasks. Some drivers also use this entry point as a chance to
+ allocate driver-private memory for ap->private_data.
+ </para>
+ <para>
+ Many drivers use ata_port_start() as this hook or call
+ it from their own port_start() hooks. ata_port_start()
+ allocates space for a legacy IDE PRD table and returns.
+ </para>
+ <para>
+ ->port_stop() is called after ->host_stop(). Its sole function
+ is to release DMA/memory resources, now that they are no longer
+ actively being used. Many drivers also free driver-private
+ data from port at this time.
+ </para>
+ <para>
+ ->host_stop() is called after all ->port_stop() calls
+have completed. The hook must finalize hardware shutdown, release DMA
+and other resources, etc.
+ This hook may be specified as NULL, in which case it is not called.
+ </para>
+
+ </sect2>
+
+ </sect1>
+ </chapter>
+
+ <chapter id="libataEH">
+ <title>Error handling</title>
+
+ <para>
+ This chapter describes how errors are handled under libata.
+ Readers are advised to read SCSI EH
+ (Documentation/scsi/scsi_eh.txt) and ATA exceptions doc first.
+ </para>
+
+ <sect1><title>Origins of commands</title>
+ <para>
+ In libata, a command is represented with struct ata_queued_cmd
+ or qc. qc's are preallocated during port initialization and
+ repetitively used for command executions. Currently only one
+ qc is allocated per port but yet-to-be-merged NCQ branch
+ allocates one for each tag and maps each qc to NCQ tag 1-to-1.
+ </para>
+ <para>
+ libata commands can originate from two sources - libata itself
+ and SCSI midlayer. libata internal commands are used for
+ initialization and error handling. All normal blk requests
+ and commands for SCSI emulation are passed as SCSI commands
+ through queuecommand callback of SCSI host template.
+ </para>
+ </sect1>
+
+ <sect1><title>How commands are issued</title>
+
+ <variablelist>
+
+ <varlistentry><term>Internal commands</term>
+ <listitem>
+ <para>
+ First, qc is allocated and initialized using
+ ata_qc_new_init(). Although ata_qc_new_init() doesn't
+ implement any wait or retry mechanism when qc is not
+ available, internal commands are currently issued only during
+ initialization and error recovery, so no other command is
+ active and allocation is guaranteed to succeed.
+ </para>
+ <para>
+ Once allocated qc's taskfile is initialized for the command to
+ be executed. qc currently has two mechanisms to notify
+ completion. One is via qc->complete_fn() callback and the
+ other is completion qc->waiting. qc->complete_fn() callback
+ is the asynchronous path used by normal SCSI translated
+ commands and qc->waiting is the synchronous (issuer sleeps in
+ process context) path used by internal commands.
+ </para>
+ <para>
+ Once initialization is complete, host_set lock is acquired
+ and the qc is issued.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>SCSI commands</term>
+ <listitem>
+ <para>
+ All libata drivers use ata_scsi_queuecmd() as
+ hostt->queuecommand callback. scmds can either be simulated
+ or translated. No qc is involved in processing a simulated
+ scmd. The result is computed right away and the scmd is
+ completed.
+ </para>
+ <para>
+ For a translated scmd, ata_qc_new_init() is invoked to
+ allocate a qc and the scmd is translated into the qc. SCSI
+ midlayer's completion notification function pointer is stored
+ into qc->scsidone.
+ </para>
+ <para>
+ qc->complete_fn() callback is used for completion
+ notification. ATA commands use ata_scsi_qc_complete() while
+ ATAPI commands use atapi_qc_complete(). Both functions end up
+ calling qc->scsidone to notify upper layer when the qc is
+ finished. After translation is completed, the qc is issued
+ with ata_qc_issue().
+ </para>
+ <para>
+ Note that SCSI midlayer invokes hostt->queuecommand while
+ holding host_set lock, so all above occur while holding
+ host_set lock.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+ </sect1>
+
+ <sect1><title>How commands are processed</title>
+ <para>
+ Depending on which protocol and which controller are used,
+ commands are processed differently. For the purpose of
+ discussion, a controller which uses taskfile interface and all
+ standard callbacks is assumed.
+ </para>
+ <para>
+ Currently 6 ATA command protocols are used. They can be
+ sorted into the following four categories according to how
+ they are processed.
+ </para>
+
+ <variablelist>
+ <varlistentry><term>ATA NO DATA or DMA</term>
+ <listitem>
+ <para>
+ ATA_PROT_NODATA and ATA_PROT_DMA fall into this category.
+ These types of commands don't require any software
+ intervention once issued. Device will raise interrupt on
+ completion.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATA PIO</term>
+ <listitem>
+ <para>
+ ATA_PROT_PIO is in this category. libata currently
+ implements PIO with polling. ATA_NIEN bit is set to turn
+ off interrupt and pio_task on ata_wq performs polling and
+ IO.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATAPI NODATA or DMA</term>
+ <listitem>
+ <para>
+ ATA_PROT_ATAPI_NODATA and ATA_PROT_ATAPI_DMA are in this
+ category. packet_task is used to poll BSY bit after
+ issuing PACKET command. Once BSY is turned off by the
+ device, packet_task transfers CDB and hands off processing
+ to interrupt handler.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATAPI PIO</term>
+ <listitem>
+ <para>
+ ATA_PROT_ATAPI is in this category. ATA_NIEN bit is set
+ and, as in ATAPI NODATA or DMA, packet_task submits cdb.
+ However, after submitting cdb, further processing (data
+ transfer) is handed off to pio_task.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </sect1>
+
+ <sect1><title>How commands are completed</title>
+ <para>
+ Once issued, all qc's are either completed with
+ ata_qc_complete() or time out. For commands which are handled
+ by interrupts, ata_host_intr() invokes ata_qc_complete(), and,
+ for PIO tasks, pio_task invokes ata_qc_complete(). In error
+ cases, packet_task may also complete commands.
+ </para>
+ <para>
+ ata_qc_complete() does the following.
+ </para>
+
+ <orderedlist>
+
+ <listitem>
+ <para>
+ DMA memory is unmapped.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ ATA_QCFLAG_ACTIVE is cleared from qc->flags.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ qc->complete_fn() callback is invoked. If the return value of
+ the callback is not zero. Completion is short circuited and
+ ata_qc_complete() returns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ __ata_qc_complete() is called, which does
+ <orderedlist>
+
+ <listitem>
+ <para>
+ qc->flags is cleared to zero.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ ap->active_tag and qc->tag are poisoned.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ qc->waiting is cleared &amp; completed (in that order).
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ qc is deallocated by clearing appropriate bit in ap->qactive.
+ </para>
+ </listitem>
+
+ </orderedlist>
+ </para>
+ </listitem>
+
+ </orderedlist>
+
+ <para>
+ So, it basically notifies upper layer and deallocates qc. One
+ exception is short-circuit path in #3 which is used by
+ atapi_qc_complete().
+ </para>
+ <para>
+ For all non-ATAPI commands, whether it fails or not, almost
+ the same code path is taken and very little error handling
+ takes place. A qc is completed with success status if it
+ succeeded, with failed status otherwise.
+ </para>
+ <para>
+ However, failed ATAPI commands require more handling as
+ REQUEST SENSE is needed to acquire sense data. If an ATAPI
+ command fails, ata_qc_complete() is invoked with error status,
+ which in turn invokes atapi_qc_complete() via
+ qc->complete_fn() callback.
+ </para>
+ <para>
+ This makes atapi_qc_complete() set scmd->result to
+ SAM_STAT_CHECK_CONDITION, complete the scmd and return 1. As
+ the sense data is empty but scmd->result is CHECK CONDITION,
+ SCSI midlayer will invoke EH for the scmd, and returning 1
+ makes ata_qc_complete() to return without deallocating the qc.
+ This leads us to ata_scsi_error() with partially completed qc.
+ </para>
+
+ </sect1>
+
+ <sect1><title>ata_scsi_error()</title>
+ <para>
+ ata_scsi_error() is the current transportt->eh_strategy_handler()
+ for libata. As discussed above, this will be entered in two
+ cases - timeout and ATAPI error completion. This function
+ calls low level libata driver's eng_timeout() callback, the
+ standard callback for which is ata_eng_timeout(). It checks
+ if a qc is active and calls ata_qc_timeout() on the qc if so.
+ Actual error handling occurs in ata_qc_timeout().
+ </para>
+ <para>
+ If EH is invoked for timeout, ata_qc_timeout() stops BMDMA and
+ completes the qc. Note that as we're currently in EH, we
+ cannot call scsi_done. As described in SCSI EH doc, a
+ recovered scmd should be either retried with
+ scsi_queue_insert() or finished with scsi_finish_command().
+ Here, we override qc->scsidone with scsi_finish_command() and
+ calls ata_qc_complete().
+ </para>
+ <para>
+ If EH is invoked due to a failed ATAPI qc, the qc here is
+ completed but not deallocated. The purpose of this
+ half-completion is to use the qc as place holder to make EH
+ code reach this place. This is a bit hackish, but it works.
+ </para>
+ <para>
+ Once control reaches here, the qc is deallocated by invoking
+ __ata_qc_complete() explicitly. Then, internal qc for REQUEST
+ SENSE is issued. Once sense data is acquired, scmd is
+ finished by directly invoking scsi_finish_command() on the
+ scmd. Note that as we already have completed and deallocated
+ the qc which was associated with the scmd, we don't need
+ to/cannot call ata_qc_complete() again.
+ </para>
+
+ </sect1>
+
+ <sect1><title>Problems with the current EH</title>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ Error representation is too crude. Currently any and all
+ error conditions are represented with ATA STATUS and ERROR
+ registers. Errors which aren't ATA device errors are treated
+ as ATA device errors by setting ATA_ERR bit. Better error
+ descriptor which can properly represent ATA and other
+ errors/exceptions is needed.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ When handling timeouts, no action is taken to make device
+ forget about the timed out command and ready for new commands.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ EH handling via ata_scsi_error() is not properly protected
+ from usual command processing. On EH entrance, the device is
+ not in quiescent state. Timed out commands may succeed or
+ fail any time. pio_task and atapi_task may still be running.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Too weak error recovery. Devices / controllers causing HSM
+ mismatch errors and other errors quite often require reset to
+ return to known state. Also, advanced error handling is
+ necessary to support features like NCQ and hotplug.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ ATA errors are directly handled in the interrupt handler and
+ PIO errors in pio_task. This is problematic for advanced
+ error handling for the following reasons.
+ </para>
+ <para>
+ First, advanced error handling often requires context and
+ internal qc execution.
+ </para>
+ <para>
+ Second, even a simple failure (say, CRC error) needs
+ information gathering and could trigger complex error handling
+ (say, resetting &amp; reconfiguring). Having multiple code
+ paths to gather information, enter EH and trigger actions
+ makes life painful.
+ </para>
+ <para>
+ Third, scattered EH code makes implementing low level drivers
+ difficult. Low level drivers override libata callbacks. If
+ EH is scattered over several places, each affected callbacks
+ should perform its part of error handling. This can be error
+ prone and painful.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+ </sect1>
+ </chapter>
+
+ <chapter id="libataExt">
+ <title>libata Library</title>
+!Edrivers/ata/libata-core.c
+ </chapter>
+
+ <chapter id="libataInt">
+ <title>libata Core Internals</title>
+!Idrivers/ata/libata-core.c
+ </chapter>
+
+ <chapter id="libataScsiInt">
+ <title>libata SCSI translation/emulation</title>
+!Edrivers/ata/libata-scsi.c
+!Idrivers/ata/libata-scsi.c
+ </chapter>
+
+ <chapter id="ataExceptions">
+ <title>ATA errors and exceptions</title>
+
+ <para>
+ This chapter tries to identify what error/exception conditions exist
+ for ATA/ATAPI devices and describe how they should be handled in
+ implementation-neutral way.
+ </para>
+
+ <para>
+ The term 'error' is used to describe conditions where either an
+ explicit error condition is reported from device or a command has
+ timed out.
+ </para>
+
+ <para>
+ The term 'exception' is either used to describe exceptional
+ conditions which are not errors (say, power or hotplug events), or
+ to describe both errors and non-error exceptional conditions. Where
+ explicit distinction between error and exception is necessary, the
+ term 'non-error exception' is used.
+ </para>
+
+ <sect1 id="excat">
+ <title>Exception categories</title>
+ <para>
+ Exceptions are described primarily with respect to legacy
+ taskfile + bus master IDE interface. If a controller provides
+ other better mechanism for error reporting, mapping those into
+ categories described below shouldn't be difficult.
+ </para>
+
+ <para>
+ In the following sections, two recovery actions - reset and
+ reconfiguring transport - are mentioned. These are described
+ further in <xref linkend="exrec"/>.
+ </para>
+
+ <sect2 id="excatHSMviolation">
+ <title>HSM violation</title>
+ <para>
+ This error is indicated when STATUS value doesn't match HSM
+ requirement during issuing or execution any ATA/ATAPI command.
+ </para>
+
+ <itemizedlist>
+ <title>Examples</title>
+
+ <listitem>
+ <para>
+ ATA_STATUS doesn't contain !BSY &amp;&amp; DRDY &amp;&amp; !DRQ while trying
+ to issue a command.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; !DRQ during PIO data transfer.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ DRQ on command completion.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR after CDB transfer starts but before the
+ last byte of CDB is transferred. ATA/ATAPI standard states
+ that &quot;The device shall not terminate the PACKET command
+ with an error before the last byte of the command packet has
+ been written&quot; in the error outputs description of PACKET
+ command and the state diagram doesn't include such
+ transitions.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ In these cases, HSM is violated and not much information
+ regarding the error can be acquired from STATUS or ERROR
+ register. IOW, this error can be anything - driver bug,
+ faulty device, controller and/or cable.
+ </para>
+
+ <para>
+ As HSM is violated, reset is necessary to restore known state.
+ Reconfiguring transport for lower speed might be helpful too
+ as transmission errors sometimes cause this kind of errors.
+ </para>
+ </sect2>
+
+ <sect2 id="excatDevErr">
+ <title>ATA/ATAPI device error (non-NCQ / non-CHECK CONDITION)</title>
+
+ <para>
+ These are errors detected and reported by ATA/ATAPI devices
+ indicating device problems. For this type of errors, STATUS
+ and ERROR register values are valid and describe error
+ condition. Note that some of ATA bus errors are detected by
+ ATA/ATAPI devices and reported using the same mechanism as
+ device errors. Those cases are described later in this
+ section.
+ </para>
+
+ <para>
+ For ATA commands, this type of errors are indicated by !BSY
+ &amp;&amp; ERR during command execution and on completion.
+ </para>
+
+ <para>For ATAPI commands,</para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR &amp;&amp; ABRT right after issuing PACKET
+ indicates that PACKET command is not supported and falls in
+ this category.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR(==CHK) &amp;&amp; !ABRT after the last
+ byte of CDB is transferred indicates CHECK CONDITION and
+ doesn't fall in this category.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ !BSY &amp;&amp; ERR(==CHK) &amp;&amp; ABRT after the last byte
+ of CDB is transferred *probably* indicates CHECK CONDITION and
+ doesn't fall in this category.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ Of errors detected as above, the followings are not ATA/ATAPI
+ device errors but ATA bus errors and should be handled
+ according to <xref linkend="excatATAbusErr"/>.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>CRC error during data transfer</term>
+ <listitem>
+ <para>
+ This is indicated by ICRC bit in the ERROR register and
+ means that corruption occurred during data transfer. Up to
+ ATA/ATAPI-7, the standard specifies that this bit is only
+ applicable to UDMA transfers but ATA/ATAPI-8 draft revision
+ 1f says that the bit may be applicable to multiword DMA and
+ PIO.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>ABRT error during data transfer or on completion</term>
+ <listitem>
+ <para>
+ Up to ATA/ATAPI-7, the standard specifies that ABRT could be
+ set on ICRC errors and on cases where a device is not able
+ to complete a command. Combined with the fact that MWDMA
+ and PIO transfer errors aren't allowed to use ICRC bit up to
+ ATA/ATAPI-7, it seems to imply that ABRT bit alone could
+ indicate transfer errors.
+ </para>
+ <para>
+ However, ATA/ATAPI-8 draft revision 1f removes the part
+ that ICRC errors can turn on ABRT. So, this is kind of
+ gray area. Some heuristics are needed here.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ ATA/ATAPI device errors can be further categorized as follows.
+ </para>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>Media errors</term>
+ <listitem>
+ <para>
+ This is indicated by UNC bit in the ERROR register. ATA
+ devices reports UNC error only after certain number of
+ retries cannot recover the data, so there's nothing much
+ else to do other than notifying upper layer.
+ </para>
+ <para>
+ READ and WRITE commands report CHS or LBA of the first
+ failed sector but ATA/ATAPI standard specifies that the
+ amount of transferred data on error completion is
+ indeterminate, so we cannot assume that sectors preceding
+ the failed sector have been transferred and thus cannot
+ complete those sectors successfully as SCSI does.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>Media changed / media change requested error</term>
+ <listitem>
+ <para>
+ &lt;&lt;TODO: fill here&gt;&gt;
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>Address error</term>
+ <listitem>
+ <para>
+ This is indicated by IDNF bit in the ERROR register.
+ Report to upper layer.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>Other errors</term>
+ <listitem>
+ <para>
+ This can be invalid command or parameter indicated by ABRT
+ ERROR bit or some other error condition. Note that ABRT
+ bit can indicate a lot of things including ICRC and Address
+ errors. Heuristics needed.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ Depending on commands, not all STATUS/ERROR bits are
+ applicable. These non-applicable bits are marked with
+ &quot;na&quot; in the output descriptions but up to ATA/ATAPI-7
+ no definition of &quot;na&quot; can be found. However,
+ ATA/ATAPI-8 draft revision 1f describes &quot;N/A&quot; as
+ follows.
+ </para>
+
+ <blockquote>
+ <variablelist>
+ <varlistentry><term>3.2.3.3a N/A</term>
+ <listitem>
+ <para>
+ A keyword the indicates a field has no defined value in
+ this standard and should not be checked by the host or
+ device. N/A fields should be cleared to zero.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </blockquote>
+
+ <para>
+ So, it seems reasonable to assume that &quot;na&quot; bits are
+ cleared to zero by devices and thus need no explicit masking.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatATAPIcc">
+ <title>ATAPI device CHECK CONDITION</title>
+
+ <para>
+ ATAPI device CHECK CONDITION error is indicated by set CHK bit
+ (ERR bit) in the STATUS register after the last byte of CDB is
+ transferred for a PACKET command. For this kind of errors,
+ sense data should be acquired to gather information regarding
+ the errors. REQUEST SENSE packet command should be used to
+ acquire sense data.
+ </para>
+
+ <para>
+ Once sense data is acquired, this type of errors can be
+ handled similarly to other SCSI errors. Note that sense data
+ may indicate ATA bus error (e.g. Sense Key 04h HARDWARE ERROR
+ &amp;&amp; ASC/ASCQ 47h/00h SCSI PARITY ERROR). In such
+ cases, the error should be considered as an ATA bus error and
+ handled according to <xref linkend="excatATAbusErr"/>.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatNCQerr">
+ <title>ATA device error (NCQ)</title>
+
+ <para>
+ NCQ command error is indicated by cleared BSY and set ERR bit
+ during NCQ command phase (one or more NCQ commands
+ outstanding). Although STATUS and ERROR registers will
+ contain valid values describing the error, READ LOG EXT is
+ required to clear the error condition, determine which command
+ has failed and acquire more information.
+ </para>
+
+ <para>
+ READ LOG EXT Log Page 10h reports which tag has failed and
+ taskfile register values describing the error. With this
+ information the failed command can be handled as a normal ATA
+ command error as in <xref linkend="excatDevErr"/> and all
+ other in-flight commands must be retried. Note that this
+ retry should not be counted - it's likely that commands
+ retried this way would have completed normally if it were not
+ for the failed command.
+ </para>
+
+ <para>
+ Note that ATA bus errors can be reported as ATA device NCQ
+ errors. This should be handled as described in <xref
+ linkend="excatATAbusErr"/>.
+ </para>
+
+ <para>
+ If READ LOG EXT Log Page 10h fails or reports NQ, we're
+ thoroughly screwed. This condition should be treated
+ according to <xref linkend="excatHSMviolation"/>.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatATAbusErr">
+ <title>ATA bus error</title>
+
+ <para>
+ ATA bus error means that data corruption occurred during
+ transmission over ATA bus (SATA or PATA). This type of errors
+ can be indicated by
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ ICRC or ABRT error as described in <xref linkend="excatDevErr"/>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Controller-specific error completion with error information
+ indicating transmission error.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ On some controllers, command timeout. In this case, there may
+ be a mechanism to determine that the timeout is due to
+ transmission error.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Unknown/random errors, timeouts and all sorts of weirdities.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ As described above, transmission errors can cause wide variety
+ of symptoms ranging from device ICRC error to random device
+ lockup, and, for many cases, there is no way to tell if an
+ error condition is due to transmission error or not;
+ therefore, it's necessary to employ some kind of heuristic
+ when dealing with errors and timeouts. For example,
+ encountering repetitive ABRT errors for known supported
+ command is likely to indicate ATA bus error.
+ </para>
+
+ <para>
+ Once it's determined that ATA bus errors have possibly
+ occurred, lowering ATA bus transmission speed is one of
+ actions which may alleviate the problem. See <xref
+ linkend="exrecReconf"/> for more information.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatPCIbusErr">
+ <title>PCI bus error</title>
+
+ <para>
+ Data corruption or other failures during transmission over PCI
+ (or other system bus). For standard BMDMA, this is indicated
+ by Error bit in the BMDMA Status register. This type of
+ errors must be logged as it indicates something is very wrong
+ with the system. Resetting host controller is recommended.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatLateCompletion">
+ <title>Late completion</title>
+
+ <para>
+ This occurs when timeout occurs and the timeout handler finds
+ out that the timed out command has completed successfully or
+ with error. This is usually caused by lost interrupts. This
+ type of errors must be logged. Resetting host controller is
+ recommended.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatUnknown">
+ <title>Unknown error (timeout)</title>
+
+ <para>
+ This is when timeout occurs and the command is still
+ processing or the host and device are in unknown state. When
+ this occurs, HSM could be in any valid or invalid state. To
+ bring the device to known state and make it forget about the
+ timed out command, resetting is necessary. The timed out
+ command may be retried.
+ </para>
+
+ <para>
+ Timeouts can also be caused by transmission errors. Refer to
+ <xref linkend="excatATAbusErr"/> for more details.
+ </para>
+
+ </sect2>
+
+ <sect2 id="excatHoplugPM">
+ <title>Hotplug and power management exceptions</title>
+
+ <para>
+ &lt;&lt;TODO: fill here&gt;&gt;
+ </para>
+
+ </sect2>
+
+ </sect1>
+
+ <sect1 id="exrec">
+ <title>EH recovery actions</title>
+
+ <para>
+ This section discusses several important recovery actions.
+ </para>
+
+ <sect2 id="exrecClr">
+ <title>Clearing error condition</title>
+
+ <para>
+ Many controllers require its error registers to be cleared by
+ error handler. Different controllers may have different
+ requirements.
+ </para>
+
+ <para>
+ For SATA, it's strongly recommended to clear at least SError
+ register during error handling.
+ </para>
+ </sect2>
+
+ <sect2 id="exrecRst">
+ <title>Reset</title>
+
+ <para>
+ During EH, resetting is necessary in the following cases.
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ HSM is in unknown or invalid state
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ HBA is in unknown or invalid state
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ EH needs to make HBA/device forget about in-flight commands
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ HBA/device behaves weirdly
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ Resetting during EH might be a good idea regardless of error
+ condition to improve EH robustness. Whether to reset both or
+ either one of HBA and device depends on situation but the
+ following scheme is recommended.
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ When it's known that HBA is in ready state but ATA/ATAPI
+ device is in unknown state, reset only device.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ If HBA is in unknown state, reset both HBA and device.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ HBA resetting is implementation specific. For a controller
+ complying to taskfile/BMDMA PCI IDE, stopping active DMA
+ transaction may be sufficient iff BMDMA state is the only HBA
+ context. But even mostly taskfile/BMDMA PCI IDE complying
+ controllers may have implementation specific requirements and
+ mechanism to reset themselves. This must be addressed by
+ specific drivers.
+ </para>
+
+ <para>
+ OTOH, ATA/ATAPI standard describes in detail ways to reset
+ ATA/ATAPI devices.
+ </para>
+
+ <variablelist>
+
+ <varlistentry><term>PATA hardware reset</term>
+ <listitem>
+ <para>
+ This is hardware initiated device reset signalled with
+ asserted PATA RESET- signal. There is no standard way to
+ initiate hardware reset from software although some
+ hardware provides registers that allow driver to directly
+ tweak the RESET- signal.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>Software reset</term>
+ <listitem>
+ <para>
+ This is achieved by turning CONTROL SRST bit on for at
+ least 5us. Both PATA and SATA support it but, in case of
+ SATA, this may require controller-specific support as the
+ second Register FIS to clear SRST should be transmitted
+ while BSY bit is still set. Note that on PATA, this resets
+ both master and slave devices on a channel.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>EXECUTE DEVICE DIAGNOSTIC command</term>
+ <listitem>
+ <para>
+ Although ATA/ATAPI standard doesn't describe exactly, EDD
+ implies some level of resetting, possibly similar level
+ with software reset. Host-side EDD protocol can be handled
+ with normal command processing and most SATA controllers
+ should be able to handle EDD's just like other commands.
+ As in software reset, EDD affects both devices on a PATA
+ bus.
+ </para>
+ <para>
+ Although EDD does reset devices, this doesn't suit error
+ handling as EDD cannot be issued while BSY is set and it's
+ unclear how it will act when device is in unknown/weird
+ state.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>ATAPI DEVICE RESET command</term>
+ <listitem>
+ <para>
+ This is very similar to software reset except that reset
+ can be restricted to the selected device without affecting
+ the other device sharing the cable.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry><term>SATA phy reset</term>
+ <listitem>
+ <para>
+ This is the preferred way of resetting a SATA device. In
+ effect, it's identical to PATA hardware reset. Note that
+ this can be done with the standard SCR Control register.
+ As such, it's usually easier to implement than software
+ reset.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ One more thing to consider when resetting devices is that
+ resetting clears certain configuration parameters and they
+ need to be set to their previous or newly adjusted values
+ after reset.
+ </para>
+
+ <para>
+ Parameters affected are.
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ CHS set up with INITIALIZE DEVICE PARAMETERS (seldom used)
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Parameters set with SET FEATURES including transfer mode setting
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Block count set with SET MULTIPLE MODE
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Other parameters (SET MAX, MEDIA LOCK...)
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ ATA/ATAPI standard specifies that some parameters must be
+ maintained across hardware or software reset, but doesn't
+ strictly specify all of them. Always reconfiguring needed
+ parameters after reset is required for robustness. Note that
+ this also applies when resuming from deep sleep (power-off).
+ </para>
+
+ <para>
+ Also, ATA/ATAPI standard requires that IDENTIFY DEVICE /
+ IDENTIFY PACKET DEVICE is issued after any configuration
+ parameter is updated or a hardware reset and the result used
+ for further operation. OS driver is required to implement
+ revalidation mechanism to support this.
+ </para>
+
+ </sect2>
+
+ <sect2 id="exrecReconf">
+ <title>Reconfigure transport</title>
+
+ <para>
+ For both PATA and SATA, a lot of corners are cut for cheap
+ connectors, cables or controllers and it's quite common to see
+ high transmission error rate. This can be mitigated by
+ lowering transmission speed.
+ </para>
+
+ <para>
+ The following is a possible scheme Jeff Garzik suggested.
+ </para>
+
+ <blockquote>
+ <para>
+ If more than $N (3?) transmission errors happen in 15 minutes,
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ if SATA, decrease SATA PHY speed. if speed cannot be decreased,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ decrease UDMA xfer speed. if at UDMA0, switch to PIO4,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ decrease PIO xfer speed. if at PIO3, complain, but continue
+ </para>
+ </listitem>
+ </itemizedlist>
+ </blockquote>
+
+ </sect2>
+
+ </sect1>
+
+ </chapter>
+
+ <chapter id="PiixInt">
+ <title>ata_piix Internals</title>
+!Idrivers/ata/ata_piix.c
+ </chapter>
+
+ <chapter id="SILInt">
+ <title>sata_sil Internals</title>
+!Idrivers/ata/sata_sil.c
+ </chapter>
+
+ <chapter id="libataThanks">
+ <title>Thanks</title>
+ <para>
+ The bulk of the ATA knowledge comes thanks to long conversations with
+ Andre Hedrick (www.linux-ide.org), and long hours pondering the ATA
+ and SCSI specifications.
+ </para>
+ <para>
+ Thanks to Alan Cox for pointing out similarities
+ between SATA and SCSI, and in general for motivation to hack on
+ libata.
+ </para>
+ <para>
+ libata's device detection
+ method, ata_pio_devchk, and in general all the early probing was
+ based on extensive study of Hale Landis's probe/reset code in his
+ ATADRVR driver (www.ata-atapi.com).
+ </para>
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/librs.tmpl b/Documentation/DocBook/librs.tmpl
new file mode 100644
index 000000000..94f21361e
--- /dev/null
+++ b/Documentation/DocBook/librs.tmpl
@@ -0,0 +1,289 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Reed-Solomon-Library-Guide">
+ <bookinfo>
+ <title>Reed-Solomon Library Programming Interface</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2004</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The generic Reed-Solomon Library provides encoding, decoding
+ and error correction functions.
+ </para>
+ <para>
+ Reed-Solomon codes are used in communication and storage
+ applications to ensure data integrity.
+ </para>
+ <para>
+ This documentation is provided for developers who want to utilize
+ the functions provided by the library.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None.
+ </para>
+ </chapter>
+
+ <chapter id="usage">
+ <title>Usage</title>
+ <para>
+ This chapter provides examples of how to use the library.
+ </para>
+ <sect1>
+ <title>Initializing</title>
+ <para>
+ The init function init_rs returns a pointer to an
+ rs decoder structure, which holds the necessary
+ information for encoding, decoding and error correction
+ with the given polynomial. It either uses an existing
+ matching decoder or creates a new one. On creation all
+ the lookup tables for fast en/decoding are created.
+ The function may take a while, so make sure not to
+ call it in critical code paths.
+ </para>
+ <programlisting>
+/* the Reed Solomon control structure */
+static struct rs_control *rs_decoder;
+
+/* Symbolsize is 10 (bits)
+ * Primitive polynomial is x^10+x^3+1
+ * first consecutive root is 0
+ * primitive element to generate roots = 1
+ * generator polynomial degree (number of roots) = 6
+ */
+rs_decoder = init_rs (10, 0x409, 0, 1, 6);
+ </programlisting>
+ </sect1>
+ <sect1>
+ <title>Encoding</title>
+ <para>
+ The encoder calculates the Reed-Solomon code over
+ the given data length and stores the result in
+ the parity buffer. Note that the parity buffer must
+ be initialized before calling the encoder.
+ </para>
+ <para>
+ The expanded data can be inverted on the fly by
+ providing a non-zero inversion mask. The expanded data is
+ XOR'ed with the mask. This is used e.g. for FLASH
+ ECC, where the all 0xFF is inverted to an all 0x00.
+ The Reed-Solomon code for all 0x00 is all 0x00. The
+ code is inverted before storing to FLASH so it is 0xFF
+ too. This prevents that reading from an erased FLASH
+ results in ECC errors.
+ </para>
+ <para>
+ The databytes are expanded to the given symbol size
+ on the fly. There is no support for encoding continuous
+ bitstreams with a symbol size != 8 at the moment. If
+ it is necessary it should be not a big deal to implement
+ such functionality.
+ </para>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6];
+/* Initialize the parity buffer */
+memset(par, 0, sizeof(par));
+/* Encode 512 byte in data8. Store parity in buffer par */
+encode_rs8 (rs_decoder, data8, 512, par, 0);
+ </programlisting>
+ </sect1>
+ <sect1>
+ <title>Decoding</title>
+ <para>
+ The decoder calculates the syndrome over
+ the given data length and the received parity symbols
+ and corrects errors in the data.
+ </para>
+ <para>
+ If a syndrome is available from a hardware decoder
+ then the syndrome calculation is skipped.
+ </para>
+ <para>
+ The correction of the data buffer can be suppressed
+ by providing a correction pattern buffer and an error
+ location buffer to the decoder. The decoder stores the
+ calculated error location and the correction bitmask
+ in the given buffers. This is useful for hardware
+ decoders which use a weird bit ordering scheme.
+ </para>
+ <para>
+ The databytes are expanded to the given symbol size
+ on the fly. There is no support for decoding continuous
+ bitstreams with a symbolsize != 8 at the moment. If
+ it is necessary it should be not a big deal to implement
+ such functionality.
+ </para>
+
+ <sect2>
+ <title>
+ Decoding with syndrome calculation, direct data correction
+ </title>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6];
+uint8_t data[512];
+int numerr;
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, data8, par, 512, NULL, 0, NULL, 0, NULL);
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>
+ Decoding with syndrome given by hardware decoder, direct data correction
+ </title>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6], syn[6];
+uint8_t data[512];
+int numerr;
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Get syndrome from hardware decoder */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, data8, par, 512, syn, 0, NULL, 0, NULL);
+ </programlisting>
+ </sect2>
+
+ <sect2>
+ <title>
+ Decoding with syndrome given by hardware decoder, no direct data correction.
+ </title>
+ <para>
+ Note: It's not necessary to give data and received parity to the decoder.
+ </para>
+ <programlisting>
+/* Parity buffer. Size = number of roots */
+uint16_t par[6], syn[6], corr[8];
+uint8_t data[512];
+int numerr, errpos[8];
+/* Receive data */
+.....
+/* Receive parity */
+.....
+/* Get syndrome from hardware decoder */
+.....
+/* Decode 512 byte in data8.*/
+numerr = decode_rs8 (rs_decoder, NULL, NULL, 512, syn, 0, errpos, 0, corr);
+for (i = 0; i &lt; numerr; i++) {
+ do_error_correction_in_your_buffer(errpos[i], corr[i]);
+}
+ </programlisting>
+ </sect2>
+ </sect1>
+ <sect1>
+ <title>Cleanup</title>
+ <para>
+ The function free_rs frees the allocated resources,
+ if the caller is the last user of the decoder.
+ </para>
+ <programlisting>
+/* Release resources */
+free_rs(rs_decoder);
+ </programlisting>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="structs">
+ <title>Structures</title>
+ <para>
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the Reed-Solomon Library and are relevant for a developer.
+ </para>
+!Iinclude/linux/rslib.h
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the Reed-Solomon functions
+ which are exported.
+ </para>
+!Elib/reed_solomon/reed_solomon.c
+ </chapter>
+
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The library code for encoding and decoding was written by Phil Karn.
+ </para>
+ <programlisting>
+ Copyright 2002, Phil Karn, KA9Q
+ May be used under the terms of the GNU General Public License (GPL)
+ </programlisting>
+ <para>
+ The wrapper functions and interfaces are written by Thomas Gleixner.
+ </para>
+ <para>
+ Many users have provided bugfixes, improvements and helping hands for testing.
+ Thanks a lot.
+ </para>
+ <para>
+ The following people have contributed to this document:
+ </para>
+ <para>
+ Thomas Gleixner<email>tglx@linutronix.de</email>
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/lsm.tmpl b/Documentation/DocBook/lsm.tmpl
new file mode 100644
index 000000000..fe7664ce9
--- /dev/null
+++ b/Documentation/DocBook/lsm.tmpl
@@ -0,0 +1,265 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE article PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<article class="whitepaper" id="LinuxSecurityModule" lang="en">
+ <articleinfo>
+ <title>Linux Security Modules: General Security Hooks for Linux</title>
+ <authorgroup>
+ <author>
+ <firstname>Stephen</firstname>
+ <surname>Smalley</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>ssmalley@nai.com</email></address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Timothy</firstname>
+ <surname>Fraser</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>tfraser@nai.com</email></address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Chris</firstname>
+ <surname>Vance</surname>
+ <affiliation>
+ <orgname>NAI Labs</orgname>
+ <address><email>cvance@nai.com</email></address>
+ </affiliation>
+ </author>
+ </authorgroup>
+ </articleinfo>
+
+<sect1 id="Introduction"><title>Introduction</title>
+
+<para>
+In March 2001, the National Security Agency (NSA) gave a presentation
+about Security-Enhanced Linux (SELinux) at the 2.5 Linux Kernel
+Summit. SELinux is an implementation of flexible and fine-grained
+nondiscretionary access controls in the Linux kernel, originally
+implemented as its own particular kernel patch. Several other
+security projects (e.g. RSBAC, Medusa) have also developed flexible
+access control architectures for the Linux kernel, and various
+projects have developed particular access control models for Linux
+(e.g. LIDS, DTE, SubDomain). Each project has developed and
+maintained its own kernel patch to support its security needs.
+</para>
+
+<para>
+In response to the NSA presentation, Linus Torvalds made a set of
+remarks that described a security framework he would be willing to
+consider for inclusion in the mainstream Linux kernel. He described a
+general framework that would provide a set of security hooks to
+control operations on kernel objects and a set of opaque security
+fields in kernel data structures for maintaining security attributes.
+This framework could then be used by loadable kernel modules to
+implement any desired model of security. Linus also suggested the
+possibility of migrating the Linux capabilities code into such a
+module.
+</para>
+
+<para>
+The Linux Security Modules (LSM) project was started by WireX to
+develop such a framework. LSM is a joint development effort by
+several security projects, including Immunix, SELinux, SGI and Janus,
+and several individuals, including Greg Kroah-Hartman and James
+Morris, to develop a Linux kernel patch that implements this
+framework. The patch is currently tracking the 2.4 series and is
+targeted for integration into the 2.5 development series. This
+technical report provides an overview of the framework and the example
+capabilities security module provided by the LSM kernel patch.
+</para>
+
+</sect1>
+
+<sect1 id="framework"><title>LSM Framework</title>
+
+<para>
+The LSM kernel patch provides a general kernel framework to support
+security modules. In particular, the LSM framework is primarily
+focused on supporting access control modules, although future
+development is likely to address other security needs such as
+auditing. By itself, the framework does not provide any additional
+security; it merely provides the infrastructure to support security
+modules. The LSM kernel patch also moves most of the capabilities
+logic into an optional security module, with the system defaulting
+to the traditional superuser logic. This capabilities module
+is discussed further in <xref linkend="cap"/>.
+</para>
+
+<para>
+The LSM kernel patch adds security fields to kernel data structures
+and inserts calls to hook functions at critical points in the kernel
+code to manage the security fields and to perform access control. It
+also adds functions for registering and unregistering security
+modules, and adds a general <function>security</function> system call
+to support new system calls for security-aware applications.
+</para>
+
+<para>
+The LSM security fields are simply <type>void*</type> pointers. For
+process and program execution security information, security fields
+were added to <structname>struct task_struct</structname> and
+<structname>struct linux_binprm</structname>. For filesystem security
+information, a security field was added to
+<structname>struct super_block</structname>. For pipe, file, and socket
+security information, security fields were added to
+<structname>struct inode</structname> and
+<structname>struct file</structname>. For packet and network device security
+information, security fields were added to
+<structname>struct sk_buff</structname> and
+<structname>struct net_device</structname>. For System V IPC security
+information, security fields were added to
+<structname>struct kern_ipc_perm</structname> and
+<structname>struct msg_msg</structname>; additionally, the definitions
+for <structname>struct msg_msg</structname>, <structname>struct
+msg_queue</structname>, and <structname>struct
+shmid_kernel</structname> were moved to header files
+(<filename>include/linux/msg.h</filename> and
+<filename>include/linux/shm.h</filename> as appropriate) to allow
+the security modules to use these definitions.
+</para>
+
+<para>
+Each LSM hook is a function pointer in a global table,
+security_ops. This table is a
+<structname>security_operations</structname> structure as defined by
+<filename>include/linux/security.h</filename>. Detailed documentation
+for each hook is included in this header file. At present, this
+structure consists of a collection of substructures that group related
+hooks based on the kernel object (e.g. task, inode, file, sk_buff,
+etc) as well as some top-level hook function pointers for system
+operations. This structure is likely to be flattened in the future
+for performance. The placement of the hook calls in the kernel code
+is described by the "called:" lines in the per-hook documentation in
+the header file. The hook calls can also be easily found in the
+kernel code by looking for the string "security_ops->".
+
+</para>
+
+<para>
+Linus mentioned per-process security hooks in his original remarks as a
+possible alternative to global security hooks. However, if LSM were
+to start from the perspective of per-process hooks, then the base
+framework would have to deal with how to handle operations that
+involve multiple processes (e.g. kill), since each process might have
+its own hook for controlling the operation. This would require a
+general mechanism for composing hooks in the base framework.
+Additionally, LSM would still need global hooks for operations that
+have no process context (e.g. network input operations).
+Consequently, LSM provides global security hooks, but a security
+module is free to implement per-process hooks (where that makes sense)
+by storing a security_ops table in each process' security field and
+then invoking these per-process hooks from the global hooks.
+The problem of composition is thus deferred to the module.
+</para>
+
+<para>
+The global security_ops table is initialized to a set of hook
+functions provided by a dummy security module that provides
+traditional superuser logic. A <function>register_security</function>
+function (in <filename>security/security.c</filename>) is provided to
+allow a security module to set security_ops to refer to its own hook
+functions, and an <function>unregister_security</function> function is
+provided to revert security_ops to the dummy module hooks. This
+mechanism is used to set the primary security module, which is
+responsible for making the final decision for each hook.
+</para>
+
+<para>
+LSM also provides a simple mechanism for stacking additional security
+modules with the primary security module. It defines
+<function>register_security</function> and
+<function>unregister_security</function> hooks in the
+<structname>security_operations</structname> structure and provides
+<function>mod_reg_security</function> and
+<function>mod_unreg_security</function> functions that invoke these
+hooks after performing some sanity checking. A security module can
+call these functions in order to stack with other modules. However,
+the actual details of how this stacking is handled are deferred to the
+module, which can implement these hooks in any way it wishes
+(including always returning an error if it does not wish to support
+stacking). In this manner, LSM again defers the problem of
+composition to the module.
+</para>
+
+<para>
+Although the LSM hooks are organized into substructures based on
+kernel object, all of the hooks can be viewed as falling into two
+major categories: hooks that are used to manage the security fields
+and hooks that are used to perform access control. Examples of the
+first category of hooks include the
+<function>alloc_security</function> and
+<function>free_security</function> hooks defined for each kernel data
+structure that has a security field. These hooks are used to allocate
+and free security structures for kernel objects. The first category
+of hooks also includes hooks that set information in the security
+field after allocation, such as the <function>post_lookup</function>
+hook in <structname>struct inode_security_ops</structname>. This hook
+is used to set security information for inodes after successful lookup
+operations. An example of the second category of hooks is the
+<function>permission</function> hook in
+<structname>struct inode_security_ops</structname>. This hook checks
+permission when accessing an inode.
+</para>
+
+</sect1>
+
+<sect1 id="cap"><title>LSM Capabilities Module</title>
+
+<para>
+The LSM kernel patch moves most of the existing POSIX.1e capabilities
+logic into an optional security module stored in the file
+<filename>security/capability.c</filename>. This change allows
+users who do not want to use capabilities to omit this code entirely
+from their kernel, instead using the dummy module for traditional
+superuser logic or any other module that they desire. This change
+also allows the developers of the capabilities logic to maintain and
+enhance their code more freely, without needing to integrate patches
+back into the base kernel.
+</para>
+
+<para>
+In addition to moving the capabilities logic, the LSM kernel patch
+could move the capability-related fields from the kernel data
+structures into the new security fields managed by the security
+modules. However, at present, the LSM kernel patch leaves the
+capability fields in the kernel data structures. In his original
+remarks, Linus suggested that this might be preferable so that other
+security modules can be easily stacked with the capabilities module
+without needing to chain multiple security structures on the security field.
+It also avoids imposing extra overhead on the capabilities module
+to manage the security fields. However, the LSM framework could
+certainly support such a move if it is determined to be desirable,
+with only a few additional changes described below.
+</para>
+
+<para>
+At present, the capabilities logic for computing process capabilities
+on <function>execve</function> and <function>set*uid</function>,
+checking capabilities for a particular process, saving and checking
+capabilities for netlink messages, and handling the
+<function>capget</function> and <function>capset</function> system
+calls have been moved into the capabilities module. There are still a
+few locations in the base kernel where capability-related fields are
+directly examined or modified, but the current version of the LSM
+patch does allow a security module to completely replace the
+assignment and testing of capabilities. These few locations would
+need to be changed if the capability-related fields were moved into
+the security field. The following is a list of known locations that
+still perform such direct examination or modification of
+capability-related fields:
+<itemizedlist>
+<listitem><para><filename>fs/open.c</filename>:<function>sys_access</function></para></listitem>
+<listitem><para><filename>fs/lockd/host.c</filename>:<function>nlm_bind_host</function></para></listitem>
+<listitem><para><filename>fs/nfsd/auth.c</filename>:<function>nfsd_setuser</function></para></listitem>
+<listitem><para><filename>fs/proc/array.c</filename>:<function>task_cap</function></para></listitem>
+</itemizedlist>
+</para>
+
+</sect1>
+
+</article>
diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
new file mode 100644
index 000000000..8bf7c6191
--- /dev/null
+++ b/Documentation/DocBook/media/Makefile
@@ -0,0 +1,388 @@
+###
+# Media build rules - Auto-generates media contents/indexes and *.h xml's
+#
+
+SHELL=/bin/bash
+
+MEDIA_OBJ_DIR=$(objtree)/Documentation/DocBook/
+MEDIA_SRC_DIR=$(srctree)/Documentation/DocBook/media
+
+MEDIA_TEMP = media-entities.tmpl \
+ media-indices.tmpl \
+ videodev2.h.xml \
+ v4l2.xml \
+ audio.h.xml \
+ ca.h.xml \
+ dmx.h.xml \
+ frontend.h.xml \
+ net.h.xml \
+ video.h.xml \
+
+IMGFILES := $(patsubst %.b64,%, $(notdir $(shell ls $(MEDIA_SRC_DIR)/*.b64)))
+OBJIMGFILES := $(addprefix $(MEDIA_OBJ_DIR)/, $(IMGFILES))
+GENFILES := $(addprefix $(MEDIA_OBJ_DIR)/, $(MEDIA_TEMP))
+
+PHONY += cleanmediadocs
+
+cleanmediadocs:
+ -@rm -f `find $(MEDIA_OBJ_DIR) -type l` $(GENFILES) $(OBJIMGFILES) 2>/dev/null
+
+$(obj)/media_api.xml: $(GENFILES) FORCE
+
+#$(MEDIA_OBJ_DIR)/media_api.html: $(MEDIA_OBJ_DIR)/media_api.xml
+#$(MEDIA_OBJ_DIR)/media_api.pdf: $(MEDIA_OBJ_DIR)/media_api.xml
+#$(MEDIA_OBJ_DIR)/media_api.ps: $(MEDIA_OBJ_DIR)/media_api.xml
+
+V4L_SGMLS = \
+ $(shell ls $(MEDIA_SRC_DIR)/v4l/*.xml|perl -ne 'print "$$1 " if (m,.*/(.*)\n,)') \
+ capture.c.xml \
+ keytable.c.xml \
+ v4l2grab.c.xml
+
+DVB_SGMLS = \
+ $(shell ls $(MEDIA_SRC_DIR)/dvb/*.xml|perl -ne 'print "$$1 " if (m,.*/(.*)\n,)')
+
+MEDIA_SGMLS = $(addprefix ./,$(V4L_SGMLS)) $(addprefix ./,$(DVB_SGMLS)) $(addprefix ./,$(MEDIA_TEMP))
+
+FUNCS = \
+ close \
+ ioctl \
+ mmap \
+ munmap \
+ open \
+ poll \
+ read \
+ select \
+ write \
+
+IOCTLS = \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/audio.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/ca.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/dmx.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/frontend.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([A-Z][^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/net.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/dvb/video.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/media.h) \
+ $(shell perl -ne 'print "$$1 " if /\#define\s+([^\s]+)\s+_IO/' $(srctree)/include/uapi/linux/v4l2-subdev.h) \
+ VIDIOC_SUBDEV_G_FRAME_INTERVAL \
+ VIDIOC_SUBDEV_S_FRAME_INTERVAL \
+ VIDIOC_SUBDEV_ENUM_MBUS_CODE \
+ VIDIOC_SUBDEV_ENUM_FRAME_SIZE \
+ VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL \
+ VIDIOC_SUBDEV_G_SELECTION \
+ VIDIOC_SUBDEV_S_SELECTION \
+
+TYPES = \
+ $(shell perl -ne 'print "$$1 " if /^typedef\s+[^\s]+\s+([^\s]+)\;/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if /^}\s+([a-z0-9_]+_t)/' $(srctree)/include/uapi/linux/dvb/frontend.h)
+
+ENUMS = \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/audio.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/ca.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/dmx.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/frontend.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/net.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/dvb/video.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/media.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-mediabus.h) \
+ $(shell perl -ne 'print "$$1 " if /^enum\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-subdev.h)
+
+STRUCTS = \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/videodev2.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s\{]+)\s*/)' $(srctree)/include/uapi/linux/dvb/audio.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/ca.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/dmx.h) \
+ $(shell perl -ne 'print "$$1 " if (!/dtv\_cmds\_h/ && /^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/frontend.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([A-Z][^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/net.h) \
+ $(shell perl -ne 'print "$$1 " if (/^struct\s+([^\s]+)\s+/)' $(srctree)/include/uapi/linux/dvb/video.h) \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/media.h) \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-subdev.h) \
+ $(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/v4l2-mediabus.h)
+
+ERRORS = \
+ E2BIG \
+ EACCES \
+ EAGAIN \
+ EBADF \
+ EBADFD \
+ EBADR \
+ EBADRQC \
+ EBUSY \
+ ECHILD \
+ ECONNRESET \
+ EDEADLK \
+ EDOM \
+ EEXIST \
+ EFAULT \
+ EFBIG \
+ EILSEQ \
+ EINIT \
+ EINPROGRESS \
+ EINTR \
+ EINVAL \
+ EIO \
+ EMFILE \
+ ENFILE \
+ ENOBUFS \
+ ENODATA \
+ ENODEV \
+ ENOENT \
+ ENOIOCTLCMD \
+ ENOMEM \
+ ENOSPC \
+ ENOSR \
+ ENOSYS \
+ ENOTSUP \
+ ENOTSUPP \
+ ENOTTY \
+ ENXIO \
+ EOPNOTSUPP \
+ EOVERFLOW \
+ EPERM \
+ EPIPE \
+ EPROTO \
+ ERANGE \
+ EREMOTE \
+ EREMOTEIO \
+ ERESTART \
+ ERESTARTSYS \
+ ESHUTDOWN \
+ ESPIPE \
+ ETIME \
+ ETIMEDOUT \
+ EUSERS \
+ EWOULDBLOCK \
+ EXDEV \
+
+ESCAPE = \
+ -e "s/&/\\&amp;/g" \
+ -e "s/</\\&lt;/g" \
+ -e "s/>/\\&gt;/g"
+
+FILENAME = \
+ -e s,"^[^\/]*/",, \
+ -e s/"\\.xml"// \
+ -e s/"\\.tmpl"// \
+ -e s/\\\./-/g \
+ -e s/"^func-"// \
+ -e s/"^pixfmt-"// \
+ -e s/"^vidioc-"//
+
+# Generate references to these structs in videodev2.h.xml.
+DOCUMENTED = \
+ -e "s/\(enum *\)v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1<link linkend=\"\2\">v4l2_mpeg_cx2341x_video_\2<\/link>/g" \
+ -e "s/\(\(enum\|struct\) *\)\(v4l2_[a-zA-Z0-9_]*\)/\1<link linkend=\"\3\">\3<\/link>/g" \
+ -e "s/\(V4L2_PIX_FMT_[A-Z0-9_]\+\)\(\s\+v4l2_fourcc\)/<link linkend=\"\1\">\1<\/link>\2/g" \
+ -e ":a;s/\(linkend=\".*\)_\(.*\">\)/\1-\2/;ta" \
+ -e "s/v4l2\-mpeg\-vbi\-ITV0/v4l2-mpeg-vbi-itv0-1/g"
+
+DVB_DOCUMENTED = \
+ -e "s/\(linkend\=\"\)FE_SET_PROPERTY/\1FE_GET_PROPERTY/g" \
+ -e "s,\(struct\s\+\)\([a-z0-9_]\+\)\(\s\+{\),\1\<link linkend=\"\2\">\2\<\/link\>\3,g" \
+ -e "s,\(}\s\+\)\([a-z0-9_]\+_t\+\),\1\<link linkend=\"\2\">\2\<\/link\>,g" \
+ -e "s,\(define\s\+\)\(DTV_[A-Z0-9_]\+\)\(\s\+[0-9]\+\),\1\<link linkend=\"\2\">\2\<\/link\>\3,g" \
+ -e "s,<link\s\+linkend=\".*\">\(DTV_IOCTL_MAX_MSGS\|dtv_cmds_h\|__.*_old\)<\/link>,\1,g" \
+ -e ":a;s/\(linkend=\".*\)_\(.*\">\)/\1-\2/;ta" \
+ -e "s,\(audio-mixer\|audio-karaoke\|audio-status\|ca-slot-info\|ca-descr-info\|ca-caps\|ca-msg\|ca-descr\|ca-pid\|dmx-filter\|dmx-caps\|video-system\|video-highlight\|video-spu\|video-spu-palette\|video-navi-pack\)-t,\1,g" \
+ -e "s,DTV-ISDBT-LAYER[A-C],DTV-ISDBT-LAYER,g" \
+ -e "s,\(define\s\+\)\([A-Z0-9_]\+\)\(\s\+_IO\),\1\<link linkend=\"\2\">\2\<\/link\>\3,g" \
+ -e "s,<link\s\+linkend=\".*\">\(__.*_OLD\)<\/link>,\1,g" \
+
+#
+# Media targets and dependencies
+#
+
+install_media_images = \
+ $(Q)-cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api
+
+$(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64
+ $(Q)base64 -d $< >$@
+
+$(MEDIA_OBJ_DIR)/v4l2.xml: $(OBJIMGFILES)
+ @$($(quiet)gen_xml)
+ @(ln -sf `cd $(MEDIA_SRC_DIR) && /bin/pwd`/v4l/*xml $(MEDIA_OBJ_DIR)/)
+ @(ln -sf `cd $(MEDIA_SRC_DIR) && /bin/pwd`/dvb/*xml $(MEDIA_OBJ_DIR)/)
+
+$(MEDIA_OBJ_DIR)/videodev2.h.xml: $(srctree)/include/uapi/linux/videodev2.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/audio.h.xml: $(srctree)/include/uapi/linux/dvb/audio.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/ca.h.xml: $(srctree)/include/uapi/linux/dvb/ca.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/dmx.h.xml: $(srctree)/include/uapi/linux/dvb/dmx.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/frontend.h.xml: $(srctree)/include/uapi/linux/dvb/frontend.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/net.h.xml: $(srctree)/include/uapi/linux/dvb/net.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/video.h.xml: $(srctree)/include/uapi/linux/dvb/video.h $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<programlisting>") > $@
+ @( \
+ expand --tabs=8 < $< | \
+ sed $(ESCAPE) $(DVB_DOCUMENTED) | \
+ sed 's/i\.e\./&ie;/') >> $@
+ @( \
+ echo "</programlisting>") >> $@
+
+$(MEDIA_OBJ_DIR)/media-entities.tmpl: $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<!-- Generated file! Do not edit. -->") >$@
+ @( \
+ echo -e "\n<!-- Functions -->") >>$@
+ @( \
+ for ident in $(FUNCS) ; do \
+ entity=`echo $$ident | tr _ -` ; \
+ echo "<!ENTITY func-$$entity \"<link" \
+ "linkend='func-$$entity'><function>$$ident()</function></link>\">" \
+ >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Ioctls -->") >>$@
+ @( \
+ for ident in $(IOCTLS) ; do \
+ entity=`echo $$ident | tr _ -` ; \
+ id=`grep "<refname>$$ident" $(MEDIA_OBJ_DIR)/vidioc-*.xml $(MEDIA_OBJ_DIR)/media-ioc-*.xml | sed -r s,"^.*/(.*).xml.*","\1",` ; \
+ echo "<!ENTITY $$entity \"<link" \
+ "linkend='$$id'><constant>$$ident</constant></link>\">" \
+ >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Types -->") >>$@
+ @( \
+ for ident in $(TYPES) ; do \
+ entity=`echo $$ident | tr _ -` ; \
+ echo "<!ENTITY $$entity \"<link" \
+ "linkend='$$entity'>$$ident</link>\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Enums -->") >>$@
+ @( \
+ for ident in $(ENUMS) ; do \
+ entity=`echo $$ident | sed -e "s/v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1/" | tr _ -` ; \
+ echo "<!ENTITY $$entity \"enum&nbsp;<link" \
+ "linkend='$$entity'>$$ident</link>\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Structures -->") >>$@
+ @( \
+ for ident in $(STRUCTS) ; do \
+ entity=`echo $$ident | tr _ - | sed s/v4l2-mpeg-vbi-ITV0/v4l2-mpeg-vbi-itv0-1/g` ; \
+ echo "<!ENTITY $$entity \"struct&nbsp;<link" \
+ "linkend='$$entity'>$$ident</link>\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Error Codes -->") >>$@
+ @( \
+ for ident in $(ERRORS) ; do \
+ echo "<!ENTITY $$ident \"<errorcode>$$ident</errorcode>" \
+ "error code\">" >>$@ ; \
+ done)
+ @( \
+ echo -e "\n<!-- Subsections -->") >>$@
+ @( \
+ for file in $(MEDIA_SGMLS) ; do \
+ entity=`echo "$$file" | sed $(FILENAME) -e s/"^([^-]*)"/sub\1/` ; \
+ if ! echo "$$file" | \
+ grep -q -E -e '^(func|vidioc|pixfmt)-' ; then \
+ echo "<!ENTITY sub-$$entity SYSTEM \"$$file\">" >>$@ ; \
+ fi ; \
+ done)
+ @( \
+ echo -e "\n<!-- Function Reference -->") >>$@
+ @( \
+ for file in $(MEDIA_SGMLS) ; do \
+ if echo "$$file" | \
+ grep -q -E -e '(func|vidioc|pixfmt)-' ; then \
+ entity=`echo "$$file" |sed $(FILENAME)` ; \
+ echo "<!ENTITY $$entity SYSTEM \"$$file\">" >>$@ ; \
+ fi ; \
+ done)
+
+# Jade can auto-generate a list-of-tables, which includes all structs,
+# but we only want data types, all types, and sorted please.
+$(MEDIA_OBJ_DIR)/media-indices.tmpl: $(MEDIA_OBJ_DIR)/v4l2.xml
+ @$($(quiet)gen_xml)
+ @( \
+ echo "<!-- Generated file! Do not edit. -->") >$@
+ @( \
+ echo -e "\n<index><title>List of Types</title>") >>$@
+ @( \
+ for ident in $(TYPES) ; do \
+ id=`echo $$ident | tr _ -` ; \
+ echo "<indexentry><primaryie><link" \
+ "linkend='$$id'>$$ident</link></primaryie></indexentry>" >>$@ ; \
+ done)
+ @( \
+ for ident in $(ENUMS) ; do \
+ id=`echo $$ident | sed -e "s/v4l2_mpeg_cx2341x_video_\([a-z]*_spatial_filter_type\)/\1/" | tr _ -`; \
+ echo "<indexentry><primaryie>enum&nbsp;<link" \
+ "linkend='$$id'>$$ident</link></primaryie></indexentry>" >>$@ ; \
+ done)
+ @( \
+ for ident in $(STRUCTS) ; do \
+ id=`echo $$ident | tr _ - | sed s/v4l2-mpeg-vbi-ITV0/v4l2-mpeg-vbi-itv0-1/g` ; \
+ echo "<indexentry><primaryie>struct&nbsp;<link" \
+ "linkend='$$id'>$$ident</link></primaryie></indexentry>" >>$@ ; \
+ done)
+ @( \
+ echo "</index>") >>$@
+
diff --git a/Documentation/DocBook/media/bayer.png.b64 b/Documentation/DocBook/media/bayer.png.b64
new file mode 100644
index 000000000..ccdf2bcda
--- /dev/null
+++ b/Documentation/DocBook/media/bayer.png.b64
@@ -0,0 +1,171 @@
+iVBORw0KGgoAAAANSUhEUgAAAlgAAACqCAMAAABGfcHVAAAAAXNSR0IArs4c6QAAAwBQTFRFAAIA
+CAICAAQVEQEBAgsAJgECAAogAwsTAQopHQYBNAEAAAxNARQAERIQAhoDABwAABZEHRQKGRYKQw0F
+ACMBACUAERwpHR4cVRAFBR5rZhADACR2JiIhBDAGAiWGgQ4AcxQABDYACSeQMSYlJykmESxYlQ4A
+PSYZIS05OSsJHS5JOC8kAEMDUC8SADXLNDUzADbEAEsAADX/2RABCFIAAD/qxB0AAD//BFgAK0Vp
+WT4r3hwA3RsTRERAAEf/5CIA2iYCCUv+WUgz7iIAOk5g3CgVSU5SiD8uB2sABm8AE1X/U1RQOFyL
+4jkfIlz/RV98M1j+G2H/fVk23jtD4T0pXl9ieFtGcV894UIiYWJfAIwA50gOV2p+4kssO2j+dGZx
+bG1qVmj/OHH/aHJzfnBX5lQ7B50AZnahdXd0AKUG5V1ARnz/6mErCqgAAKsAent46GBIW4GhAK0A
+AK8B42FtALIOin9/ALUAiIOBALkAVIf/6WxWg4eBi4SKJrEAmoVtdY2geoP/rYVXhoyOqYVuJbUh
+IrgWX5D/jo6J7nszP7gAsI9S63xnN70zZqO/fZzCOb4+cZr+64dy8otYnJ6b7ImDRcM56IqcWMEo
+oJb/N8ZoTMRL7Y9/QchcsaOTo6eohaj/7ZqKXspXj6v9xal+oK+7d7vTUM+Afco5r7CumLTVStKV
+bs9ukbb/9qx/9q9l8queoLv/e9R66beG7rDImNRhi9aDwsPAs8bWzcK2cd67jtqP5MWUodyB8b+1
+tMr/z8L/j9+kbOXWnN2ZstD7yc7Rzs7Ly9xb183UwdD/+si/qeOmvuKIx9fj4tPCtuWiqOrL+tS2
+y9v++NPK2dvZt+m0ueq80+Wo3OeSwuy/yezG+d7f/eS/z/DS3uf/6Ono4PC71O39xPb02vPZ/+nR
++Ori6e399+vt+PGz+ur65fL55/Xb4vbh7ffX/PPY8vP9+vLy6Pf36fjr/PfM8vjr//f+/vn48P36
+9vv+/vzf+fv4/fvu//z7+v7//P/7/v/8//QpxAAAAAFiS0dEAIgFHUgAAAAJcEhZcwAAFY8AABWW
+AQ2TT8cAAAAHdElNRQfaCRQXGSltwbPRAAAgAElEQVR42u2dDXwU1bXAZwEJtEaNH1nbh68fpoWK
+iE1ao2Bgo9RqIrEg+BIFmqLYLOlMcHHlU6DiQmrJM2jKo0QIBHgUjD5ETcQIlKq0gKDmA+UjiRAT
+BCOBkGzC5re/++6987Ezszszdzc7s9jfPa2wO+zMPefc/5575t67Z5hB/0Ek/W668xckcmVmQZ5S
+CvLmgshl4QCiZu+8ntCOgWlzVfrl5ZZFrl6T/VYSv9x5K3Pj9wnkh9fFFxQE6VcVqXY+8PjgH5K0
++/0bBxDaYcsN0i+vLlTbzH9kjEknkEF3zptjLPPmXL2VwGC/nxysm+YRyc+/S2bHNYUgmtJkf5RI
+vScH3HEvifz05mhqB8G68d6xJO3ecSWhHXYfYdvM99LHGEv6mEF3zmFJ5Gr49e9qVUh7O/wP/w/9
+gf4EXnKwbpjNGQs779bvktlxzULg7TCQzvDAItBvzqMD7hjrMJaxPx0Cv3OdBvqFBRZJs46xCCwi
+O+xNwNfSclom6F2L4j1A/UsG1hgI1jyWUzLEKf/gX0CwevIzsvSlJoyh8IY5LmPhEFhEhsCI9b7L
+oy/uI2GBRaDfPATWaGO596dDADhioJ+7PKyI5SBoF4NFZAcEa6ZjvL7MOg9MAWtPxv4aHdlfM315
+TMHy7Gg4pifN5cUxBMsPisub9dRrqHc1xBCsC7vHH6jVlQOO3eGBhccc9B+rGIWkP/ALBNYEA3uX
+xxasooMGbVaWxhSs0kr9Njs8zbEE60C2UbOTTAOrR6/ZHjB/ZWzBet+gzR0xBmuHfpttsQbLIEP2
+ZpsGVrsBWMspWBQsGrEoWFEAK1UUDbBkQEkJu+Ko+WDxDRmApWmH+WCF0u/bCFYIMyIHK30CL1kZ
+Y1J17wo51snhW1/4d9BdoZlgcZx7mcezzM1yemBp22E2WBzL66fsExVYjmxBxsNed1gHVra8XX2w
+WBc2A/4dDbCSp4v/2PrGb1L1hkKnZ8sRNFH39cel6K1lQyFbvLcZXf3YrmWsNlg6dpgMFltc3dAN
+j3+zazWrCVbKBun8ltcfS3FYBpb0D721L+uCxXoqxO5VfEMiBmsa6BL/+UxWqhZYMFytPSVd5yMU
+qKxJ3jlub7f4D5f+xmqDpW2HuWCxr0r69b7N6oAV6JsTj6VYBpaciP9L0QaLVXQv13ewUqeBdjyS
+ZM0/Cf6uBRbkak03uLSraBHnWfsJAJ/LEi2TIxZs7bPyZS6XZwu0XEaWCiwdO0wFi3sXgC/K4QDi
+qfhEoV8QWNtT8FLK+L90gddHWwjWjNGw1dG/mgW7/jFNsFjYvd/sKnK73Kh7P4oSWHw3JOcDkJGq
+BVbxBfD5IidKqpzOV/3gb05rwGJfRXEAfYM41nMKfMXpgaVhh5lgsVsAeJvj9YOMXVrE6YAlvHwa
+XJSFLJPBOg8m8W2lpLwFQ5YjNFgc6t45OFCx0OVgNRu1iIVEByznu+ArIUixnPMfKGRZARaCSRpf
+ENx/4wwiVgg7TASLc52CA4f4BiobCFmaYDlSusBUC8GaGgC6VgssFnavS3QtC7uXiyJYMP09o5m8
+O2GfOsW8il1TudoisF4FX8hGvy3lc1yGYAXZYSZYa+RBitvy9hyXIVij744RWP+jDRb8ygaCFLdm
+x7KoJO/tyWj2Jz3/JPhjssY8lnNL91cvsNL8KOtk1fNY5iTv3D/AP2UJMaubvGvZYSJY8Jv+T04+
+8eAyBCsFdvBXVg6F2UK7k85oDoUs7N5FsiwjSsk7v5cKkqHsD3nEcm4BnznxHINTENaaCVJpcBGn
+zXQilpYdZoL1iThSB+kXBNbu8VOhzFhwAICXrUzeF2RPnTpp6qy/nAG9YzWSd5gpfqZhRl/AkpjY
+P0HrrtBZDQ468ZuKHVgqXdYk793Ag4zkllXyDZfq5FhadpgJVjMoxZ3g3sHrV84ZzmMB8LpjdCym
+G3r/oDXdwFaD97EZHG9FxQ53VHKsadOh5K8/q51jYbDwC/FSiywFixX7/Sirk2Np2GEmWA2gHOvn
+Efe3aCfvXiTA27J9lpVLOl7cLvyH2g2PaU6QSmCx4mXcXDTASkaSmpxxEvw1VXsofBLPt79/9AgU
+2DJr5VDIFh2rh9IM6vXA0rDDgqGQW4b1awAN+neFvzoDvpTPjlqVvD8Nw+ToFG2wxKGQO3gUmnEk
+GmAlS/M/Y5KXg5pkLbD45F3IsdhgsExO3vHS5JMV2mDp2GFJ8o71KzYCK+VhSJYjxXKw4A0DeF0P
+LDF5xxOVXLQiltg384PAktaanxSmG+D9AkrtEFhWLEKzr4Jv+FsUNOizO/QjloYd5k439C6SVIID
+doPRPNbTXeA96yPW6JS3AFCkWMrpBg/qXmmYcEcbLO2IxTrfBRdfcAqYOZ1WDYVozvGf0s2vkxAs
+6yIWGqs/l9ZsnWtBsxFYKHa8bOEitDiPBQfhc49prhWyqHuliWhX1HIsvI1JL8eCMJ0CF1ezeBxk
+iz+xLMdCSyYfzRZugbd0gCO6OVZoO0xd0lnTDT57QdiktqYDtBnOvMPYcc7CRWhpghSmWW9qgoVW
+EC6u5uMGh7s3KmBNQzJ9+UnQpTnzzjmLTwHwRUVxcemWBnjnusuqRWi0ctX5cXlR8dq9HQB8s1pv
+SUfDDlMjFkxPQO/H5auKy/e2of0XhmuFKQ93gTctHwpHO1ColA+GqkVovntXFQndG5WZd0m6fqe9
+bYZzej6RPvjZ6qAJUtP2vLNrpP0c53bNYXVm3rXsMHnbzFrJL727XtCbIA0srYA/pVg33SAu6dx9
+BpyQ3Teot80oujc6E6TtWBpr1mfobPRDUrzlSEfH10d3FcEbBws3+rnX7m3o6Pjm43K9jX46dpi8
+0Y9zFe891tHZ/HHFMo5zEawV/uo4+HKsVWCdli1C+2F2p7nRj+OK+O7dUeRio7vnPdVoazIr3/Ru
+4dZkce2bI9vznmr51mRh2wd72e95T9HdmhzKDEt+paP4MQX9+Rf9lU60wKI//6JgUbAoWJczWMRF
+QehQSMEyJWJNM7B3eYwj1re8KEhnjMGaZNSsaUVB0tcrZaPqbVaMyxiVVlcqRP22KLZljEqLlApV
+q97uiG0ZowOOVzboyitmlTECK6fly2V6fr7qfXtMwTpaVKyUUtX74uYYggVAs1o9lX5F1SCGYDWB
+l2bMVMos5dsZL4HwwTIQFwYrmmICWNEUM8CKnpgDFpmEAVZQM263+shsl1ZxWz/6H/oD/ukPC6x5
+s42L6s4mrEFqClgkRX8hWPeONRYzwBpN0i4Ci8iOkGB5Q7xjbP2CZGDwoX62K29Qy/U33RB8bEDS
+SLUkpfUlYjE3EMmVIewIJTZ7sH4FfQHrqhuuV8tNQUduuJrpTyQ228hg/UoiByuXsN3+A64OtiPE
+kauYEP0bslw4c9MD9xPIA9d/5wc/JJH+uWUlaunL6Di3P1GzPxhMaMfV920N0q8qcvVO27/34/80
+lh9/b8D9D5DIz+3B7ivZFzlYv73+AaKG7x9AaEd8YbB+IUdH5hdkddR/9H2iOuX3XrE1ujnW3O+Q
+tXsdqR3PRnko/GUGQXX5jNsYjki9B5JIWvWSg3UrmVtY5jYSO9J/SV7n/efzOJKsDYI1mkSugOGp
+7ai+HAsLrLEE2afj3uvI7JhzEwTrgJGEA9ZtRPXlbx/wJMlNCA/WfgNpB/4wwCJyy5PM7UQ56u0w
+x2o7YtC/bSaB1eZx6xcqd9XHFKyXpLpnGuLYQBwTog+WF7wmlo3TkIzp7SB2YJ027F63p80csOoX
+dXR3aksHKC2PKVjZG8BpPQEvzYgpWPkrhd1koaWnJqMmhmCdqXd3dOpJd4e73hywjngM7C2viClY
+M7YbtPnKrFiDpSutWY0xBcuoe4HHNLC6KVgmgtUYa7AM8ncfBYuCRcGiYH3rwRJ+UKYLVookVoLl
+0Gw3FFgh7TAZrNRkQVKNwVKXCLIIrNBuUYKlZUZfwOJYd3FpeemqZawOWI4VCwSZ6bAyYk0V2501
+VVnzIBgsDTvMBSt1+vL5WPKVtZNCgMW6iqB6pcs41lKwtNyiACt1gmjGNHWZ/IjBYj17+T0jX+9a
+xGqCNT5wlZbXrQMrJUBEb+0f5D9NDwJLyw5zwUreLx4/80Z6qg5YrGvLMeykznplPXiTwZLc8o3K
+LQqwkqX9cl5VdbGIwWLXXIDGNjc0dwBwSfFLRWXEAoB/NN3xLgBetw6sDeC00C5UT/5LXjVYmnaY
+DNYe0IoeydgIe75GBywO/SC0t62hARXpV5S7NhcsdouWW9RgdfFm+EGXskx+hGCxW/yoTjnHch6o
+wsUXdMDi053Rk94CQFFewmSwtgsp1oIz4M2xmmBp22E6WCtxapK+shv8MVUTLM8p0LurCFVRXauq
+B28qWKj2hcwti3TAqklORRlWvrpMfoRgeU6Cz4VfvqLyDB+x2mA5UCV62OV3v6V8xoHpYOHC+6ic
+9CUZ0CqwtO0wHaz1yWPSUfb7GngjWQss9l0UL4QSVKgevEVgofrtvFtw9Y1drA5YqenIjuT5UqGx
+voCFCnzPFltzvgo+l1XADwZL6Oy/SHUIrAFLfH0azNACS8cOK8DCr1aCPVpgscXdgSjFek71yoqH
+mAkWrt+u4ZbgiKWuYNcXsIrlNe9dntJlLpcxWG8pC+JYBdbDivroSrB07LAALFw9acwH2kMh7ODP
+ZflN6arZ1kQsPbeEAAvbsTIaQyG79pQ8HXEpCnyHzrFSUHGJP8Ugx4Ij8InHNHIsPTtMB2vjmIyM
+jKzpe5QdohwK/6GsB29R8q7rliCwxmRBM6at7z7zm2iABb7RLPCtBmsFlld2A/CplXeFtQtwuxvO
+AHmxFDVY2naYDpbU+2O0wTolPPmBcwbVgzcVLB23aEw3gK7fJfd9uoEtB8f4Osw7ULnc+vpjHlYL
+rIDjP1UW/jUZrIC8PFoTLB07LAML7E/XBMsnlBUv4tU7uoO1BKwK0S2VQrsezhAs0Pi71KiB5XaK
+v6srZnXnsbygd/tMVWFnk8FqOYAnsb58KVt75l3PDvNzrFS0E3nCym7FWKgEqxsUadSrNxUsyS1t
+wW4JcVcIBT2VrysKEWut/yIfossr0SMJOsEqVjfHelo9O2pRjvUW+FJZ9Fc9FGrbYdFdYWry/G4g
+G0XUQyFOojkPUq/iiKxIr7lDodotRazBXWFqctZJ8NfkKCTvwnQsXw65Qw8sNI/FFwxPsRYs9BzH
+46D3MZ2IpWOHVdMNY1JrwHwNsHTq1ZsJFgfd8oLCLYZgwZfrFfNxkc5jfSKfS2QNwBIKhv/J4oiF
+XkxCFTS1F6F17LAMrGRtsFhUDz6g7A6LwFK5hbMQLG4NWl/gxJKMHXo5Ft+vdx9XFQy3BCx+ENZe
+hNaxwyqwUtNPakcszyk0A87x6jmrZWXFzQULAh1wC8z0VhmClZr6RjTAQlN34O1l+HET7jUNQIa0
+BlgpDwNFOWmrJkhhqOzVWYTWtsOatcLk5DGvgTOy/Q2qJZ21F8AXq92ouoq7aK8ffMxatFao7ZZg
+sPj9WMv9QHFbGCFYnAs23ftxZcWOgx3oOezGM+9BT8+waOYdDoafai9Ca9thOlh7lq+Esr4GKJJe
+1SI03nzxBVSvGpW9/uwFa5Z0VG659LbOPFYjNmPlHgD+nhyV/VicVEi996NlrM5+LLG3YQ9flG+6
+Mxms3YFnGsufIBm0H0vLDqv2YwGwUXc/VvFe8XNflLo4y/ZjabpFcx5rf3qUdpCyruLqg0cOVpe7
+We2Nfo7aA9Ja4YLa2plWgbXi+EvSIvT22t1jdXaQathhMljra/BPlfe8sVK5jSloBynLeir2HqlH
+5eBZ6/ZjSW6pVLtFCVa+YMaejdNTo73nnTXY8x76tfl73h2ybfcke97Z2Ox5Tybd887FZs87S7bn
+PWjTu9m/0nE4ZC8dlu15d2i1e9n8SkeonfFt/5VOuoYd9Odf9OdffQKL/q6QgkXBomBRsPoKlo+C
+9e8MllGzZoFV7+4EPm3pBqWxBSt7A/DqyWVQxqirpwv+H/6BRfybF9AY4zJGHt3u9YFOs8BqVlfi
+KlIXXjsYU7BWOCYpC61NUr6f5NhArJ4ZYK1Pn6astKZ6mzWtNYZgnf7aYyjNPFizSeowQ7DGkgiq
+QdpWf0QhR5Vv64+CcMAiaheBRWZHqFKRu1UCog7WbQOcROWucanID5RSs3+PUlrDKhVJ5BYnQ2iH
+vQl8repetdTj/ZXMreyTBML+6EbHHSRyRYmv6fQZlYDI5ZnvELU7+joyO5w3PXO+6YJKuiNXr8l+
++5hfGkv67cyjThI3329vamrqVYu61TCK2/6IzC2PwohFYAeMWB8Gd29IdZgBVwbJVVcFHxtgG0wk
+tiH2IBnZB7BKCNu9NpQdwYeuZOKD1IvP7QNYSf0GBsmg4EP9mBC6XB3iWLB69viIn3ngA8+GajeU
+MKR2BOtnD13nPbNuH4HUjcwl+ty+pMLgz9X1BayRZPpl9sGOPujXNKSs7kNjqSuzV5HoV1eYFOJo
+U+Rg5RK6pcreBztCTgIwhF/XtKVkn0siqfPeRe6bQsLHWuROJrRjIYimNNnJqKyznyf63NakaGrn
+Azk5ZJ/sIraDpM67VwCrcf1GXVnfDjtkLgANldX6gsAqA2C//vXWv0acJPBgvW/QbmUbADkQrI0b
+CewoAJ1GZlSHAxYcB+r1L1gJ7773oWfQbDd4HNsBASwD7SobwgLLf3yDgWzn7TDqXtGOAxsM7fBi
+sHqmTcifriP5WfkYrA6P+nlsKnFVYLBqMqZN15X0jWFFrGqXfrPFniIfAmtlhq4Zgh3PglJ3qbEd
+YYBVb6Sfqx53yAbHzBm64qiFYPlAhUtfvyJPWzhgeWdkz9JtdqZjA7TjX4bdy9txoXb8jBmGdmCw
+2rMMJtzemIDBanYbGFRZjMHak2VgbziP7oVgVRg98PSYuwOBZTRjzdvxLPAYPmG1OCywqosMPlRU
+jcFascDgc9m7MVhGj+7tcDeEA9bp8bUGH1uwAoH1tbuDxI4LB7KBsR08WBP2AP6Rb/5QAjtkGg+W
+0SNj0bOUk/hnQoe8EN9GTwRg6Q/sxzydAlh+YzuejfIzoavA+0ZgreLBemmBfgrgJQQrnGdCQ7DO
+Zx8wSIh4sNoMA+EqASyyZ0IjsPYbfNPDBSuKT7EnilgSWAR2ULAoWBQsChYFi4JFwaJgfVvBajcA
+azkFi4JFIxYFi4JFwaJgUbAoWBQsChZN3ilYNGJRsChYFCwKVphg5RCCVUAG1pCS6A6Fc0eSgNUB
+cu4jBKsgumDFE4IVTwZWmT3KYGWC00RgxROCFU8MViEZWLklZGDlVEU3YpXlkkWswgIysKAdUQUr
+s44IrLpMMrCqMkFUwVo4lzBiZf7raxKw6jK7ScECRGChaxGBBSWqYEEhAgsKCVjQDm80wUJCAhYS
+ErCQRA8sLERg4e5tI7Jjd1TBQjuiiMDygegOhfCCZGD1kEUsnzeqEQvZSwSWjxAsX5TB8hGC5SME
+yxdtsC77iOUnAwsKjViXU8QKC6xoRiwKFgWLRiwKFgWLgkXBomBRsChYFCwKFgWL3hVSsChYNGJR
+sChYFCwKFgWLgkXBomBhsGoIwTIoR1IpgmWg4PIwk/dygw80IMUgWOsJwTKsNhNlsIolsPRlkgBW
+pf7HOsIFy6jazIoVRN0r2LHbsNrMJBGsjNcaa3SkcT1fl6jBVd/coCNtFXz5nz0ZNcrrqa7emB8m
+WMVtDbrtHnR1oC9e/nxdMyQ7PJUG16soDku/ao+uWxqaPTwpK2Ycr9WV8TxYxeW6+jUfcTWEo97p
+8dv12z0+cwXfvUeI7Ng9vraWwA4IFliZkaWQaRMUbydk8KHAV+7WL+8t1G9vn66+nvJtFnEBfGGk
+W2RQVhwXSvNDoLN0RbSj0uUhsoNUDOvaCxGmdrxKpirfOma04M/VG+jnLveFpd8Kh7Kd7Gy1IgeI
+ulewo2WG6nrjs0PZwaCa4Y2tja2tjY3wL/g3fo3+j9/gF9LorpQO1Xt+jPaDdnxuo3AJ8bKyNkjr
+lIuxv81AhM81tirsaNWyo43wepHqp37fKeQyLcdb9OT4eSEHazO4XpjqAf1moVZ8uz4jt3TyZpw3
+uh62gyFSzQ8uf/H/m9jxbyIMdQEVChYVChYVChYVKhQsKhQsKhQsKlQoWFQoWFQoWCD0g0V8fvUL
+2SdDbKDwmqqu1xtQQd1SqCNBp/WYrKDkpR5/kEt9BKf5zFscUDTfE/zSq+llXwTdq4hYWwvmIlla
+8o786M6SwmeXbj6ruOjhrYVzl5YdEo41FSycK5z5odnfhJadJagZ6XG7hULLSBm0ZFNXoDgiSi86
+benmdtO/qYGGsGuqeJfOXbi0rJVfUtonOHnp5h6VlxeqvRxlrsokXTa3KjpzHWq6Sd408vKzopel
+M5eWHIpsKExjBIkfd1LEc93wBHxo2JRuiebD9wyxoWOJ4w7hz9QxktinmNp3LYtvxi3HD1si+EFs
+2JY4hf9yyHXhu9ILehcPF0/zmxey4IV7BXfFDVvSjRvKlanzIj5SKB0Y9g7Q8rIpYKUxoZremZYg
+eOuk6JqAlzerzhTACBesTGZkDpTJsKlbeANbHoH43Dc5J3M4w9wiXvP5BCYOHUuzMQO3ocel1jHx
+OVgyYf89ZOJ37vBQ6JP7YDsJjO0hXsN4JlNs+SkcIhi7qAsDj3Sh074cJZ3G3GVaz6H9C7Ahu+Cu
+u86iYwVMkqBOAtNvG3JNCX9kMjww6JD8tEzey2dN857QvTk5sH8HviN00xM2Ji5tMvZfIj7mlbyc
+hrzs589ME8Eg9Z8KrBL+xbqh/V7EcX0iE7fkEPrWt6yTrvmcjZnyYQ+Ol6OYQTU4YsVLEaV/3Aem
+9dyXI5jEzdjxdYttzO9xOLIzTfwQ9Hx/rEsVIz7bvOURW2KNeNqSJv60BHiaecPgKNjQId4PCcyD
+PFjis5EPj7Jh1kqYTOHACPgRv+RlIHj5Qb95YJWIugy1Pci/eo6JEzpz3XAhdEB3DdvcJHr5KcWZ
+6wbzYIQNlvjA9CeY3yKbN6GQJMh7gxkcGE8k2J4Sj50bxUzhwRLzu97/Mq/n4LWlqAnxjsOv7cw+
+4V+HM1sxWElisnnpZ7YXkRXPMbdIucEm4bToC24oUcpON/W3HVKABb66FkeoEiYNKojo2cQMA7yX
+A6dBL79jHliFUjPMNThlec8WJ4FyYiizJNjLiScVSCIwIgfLDxZiPHqHMjI+JyIdusAjzDi/6Enw
+pu2hs3Kw/HJPRrvjDg+OCzj93IjEDySwvH6o2HDbZgVYXYI9h69gtgXgHJG4zaxtWS3XMbJrTxy4
+TeUO/jvAg4XzdeYWrNJ1zIuB9GFi3IsWgNXL2M6jUW8UzFykf3+OeQpqj73slcYI6OUAWH6QRxo3
+QkescyP6ob54b8A1rYF/PXcIJiz+iz/jUwW+oUsfnvdaFrH+zPwk0AG9hw95gWwohN91/EWTR6x7
+bEuE0wLSiawwR95kftKtcpccrBPX9jskHwrBRD4rhNHjrPo08yPWoPNYpUEBiADsTOyuu4CkQm8n
+9LIsYn05ot8HEYGVu68KyrpRcLSDt3+/tj0l3HBCEW6Ot18x7JR0DM+6+GU5Vi/MsfaY1XG/Zv4X
+KNThc6wSrPPihMRtQJFj9T5iG/gBPu2/g08zI3VXNCT0TagcKw0rvPUe20N4UgJ62a/2sgU5Fmpz
+Q/9xFwKdqeVldGYeD8bQuCWE92YqsEQZ181rsjTohHXMfUGXhmAVYMk19a5wKBNiGiU+XtSZn26o
+YobMRark5eK7Qi867R1ggfjguLI56GgBM5JXJwfeSgt3haLGiYcwj6G8bBJYabwu8IZ0IJ4oeoZ5
+POj7EexlnwqMSMCyJyEZYkP5G5q+4BH3jeQlTYjkXlxCPAkfS6rDYEkTHQ+1muYaO1OHo03VEL7l
+PB6sJEFnfGsMwZJ0ieMjwRCmisdfUDgH+MzpuJH47gGAJqGhTHisIDB3lMinEBAsXuEEG8zZ/TIv
+JwW8bBJY4pyfeLeQJ8bTNEFlH/DFq7xcgM+UgxF5jtWybiiDponE75JPpAb75T4erCTh4D4MVi6U
+oTbhZtskuVn4LolzoJkibcJEBx6Jqhg7VCUnIT5xyVnxtHeEWMZLmllgpQkRS5wvTsJgjczLzc0c
+HD9MnPUXcyzv4XuYRMXXV+ZlsyIWdE1mf2bcZiHuSBFLRA7mqwlBXg4Moi3rbmYe8kcOFkzuEq75
+AA23S7AGvq1QqgqZIfD17sHj+ByrCh3cahfAQkc+HYxaNW+x60Zh9G/C6uSIYO0Tb/ZtP+mRcqxN
+trglqtSMPy3XLLACORbvLogUBiuXny5KFO9MA3eFJ0b0ezHotELIo6k5Vu9E5hYxY39ezLFwZ5bF
+I7DUXs5RpP0QjJo+gNUDhjNl6DZHfldYh8E68bNB2xTD077AXeGmBHT3ahpZf2YelC2eFirAQvdT
+TL/zgbvC51CGhXVR3hVuNQss3JDM9io5WLA3+21TgyVMCilvJveZCVYh8si5UWjePYBJICXGEes5
+6OUumZdz5Gm/X5gtjBSsLhif4fmXZPNYfvAhAgvNYz0kW65UgAWet9leNAsrPzjcP64m8G6pGqxe
+xtYjm26YaBO6Ep4mZu9ePygzD6xzV0jzWLChrXKwwLkRaNxTgCVOcp2TzbMhL5sLFpozGMrccoEP
+sqOYpwJN92CwdvZPVHhZAVaXlEhGOBQKcUk+lQzvB6/FlPfv91RgkeVaOVj+XvhlOGkWWTiIB67+
+hHoofNP23XYZWOew+/zq0xabB5Ziih+6Sw4WzhOE5F1U4NJE2+9BsJeHmD6Ptckm8iRfWgGH8awp
+dNddSi+LYHl5MPZHApYAZssjDB58YSOJwlrhzidsaAUCyNYKfXWLb7bZ6gJgecGJwba7zNs+EFjF
+atl5j42ZrJggPTwUeyswQWjMGXEAAAJqSURBVPpef366gV8rPCuddp9pYPGLkry7HrEx4+RgoTUo
+YbohU5zzs/FBLMjLfpPBgtFcXISeKK4Vnt+3OMEWh159qvTy44oJ0ntIJxxUYKUVoNu7nOEMjs5e
+YXcDOmJjmHHC1/F5G9rdkItWv6EKfsXM+3MMs80srsR1d3hnAxWMm9LKg5WTh3TOTGDUM++PMGgM
+9PrA4VH8adiKKa3m6Ye2UUjuwhMvAbBganNLK45YSdjJuXg/hh97+Z4QXjYTLDgY3iXQ/QQT6Mxx
+7wS8PFn08ln+fhI7OedmJo5wUjD0fixmmLRss244nhey2ccJW3jwfqwEfr/OyCU9wv21CNalEcwg
+8wZDaacQY59yiE/NmcBWITznt5Wxi2DBACJsQhH3Y/GnmSi968SGkLu8aD9WjrSM0h9veAjsx7Lz
+82z8Nq74wGlmgZXGzBW/AZsYKbkS92PF4xiBs4qWxUPkXvaFBCMcsEpy87Aod1ruLJlbwG/HlO0w
+hMcKln4oZDdNuXmBT+dONm8XKcqYdhbCljdLq2sFvMoFS/mOBHU5c6UAsi53ssiR+jTzBDaUt7Ss
+SbwJzSmTdH8+93GYX1TlCE4uUygDvZyn9nKUwSrJqZLePZO7tNsrtHUYdTDuTG9IL/tkYBAvORnu
+eff6Zb0qSo/OcADM3Pfu1VHWq3fAr2djlNlXudQXdCTYjV4L6uCodfEG97RwSL7nXa2zPwKwqFCJ
+mlCwqFCwqFCwqFCwqFChYFGhYFGhYFGhQsGiQsGiQsGiQoWCRYWCRYWCRYUKBYsKBYsKBYsKFQoW
+FQoWFQoWFSoULCqXq/w/gbudjI6bMwYAAAAASUVORK5CYII=
diff --git a/Documentation/DocBook/media/constraints.png.b64 b/Documentation/DocBook/media/constraints.png.b64
new file mode 100644
index 000000000..125b4a949
--- /dev/null
+++ b/Documentation/DocBook/media/constraints.png.b64
@@ -0,0 +1,59 @@
+iVBORw0KGgoAAAANSUhEUgAAAlQAAAFYCAYAAACVsmLPAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A
+/wD/oL2nkwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAAd0SU1FB9sLCBIAKVtZsMAAAAxxSURBVHja
+7d3ZbqvIAkDRLsv//8v0QytXvpYZap7Wko56OAnE2AXbBSbhOI7jHwAAkr1sAgAAQQUAIKgAAAQV
+AICgAgBAUAEACCoAAEEFACCoAAAQVAAAzb2jvyMEWw0AmFvh37xnhgoAQFABAPT1zvruwtNlAADV
+VLxsyQwVAICgAgAQVAAAggoAQFABACCoYEohuFkugKACsmLq178DIKiAyJgSVQCCCigQU6IKQFAB
+BWJKVAEIKqBgKIkqAEEFFAgkUQUgqIACYSSqAAQViKkwxjIAEFSwbUyJKgBBBWJq8GUCIKhgm5gS
+VQCCCsSUqAIQVMBYoSOqAAQVLOk41lwXAIIKhoqqJyFUYhkACCpYMqpiQqjEMgAQVLBUVKWEUIll
+ACCoYImoygmhEssAQFDBElHVexkACCoAAEEFACCoAAAQVAAAggoAQFABAAgqAAAEFQCAoAIAEFQA
+AIIKAABBBQAgqAAABBUAgKACAOA/b5sAGjsO2wBgMWaoAAAEFQCAoAIAEFQAADtzUXohIQQbAYDi
+Dh9kmYIZKgAAQQUAIKgAAAQVAICgAgAgmU/5VeSTGQDE8InxeZmhAgAQVAAAggoAQFABAAgqAAAE
+FQCAoAIAEFQAAHtyY0/o4O7efe4JCzAXM1QAAIIKAEBQAQAIKgAAQQUAgKACABBUAACCCgBAUAEA
+IKgAAAQVAICgAgAQVAAACCoAAEEFACCoAAAEFVBICGMsAwBBBVPHVE4QlVgGAIIKpo6ps/9utQwA
+BBUsEVMpQVRiGQAIKlgqpmKCqMQyABBUsGRMzbouAAQVNHMca64LAEEFy0WVmAIQVCCqxBSAoAL6
+hI+YAhBUIKrEFICgAvqEkJgCEFQgqo4+3wuAoILto0pMAQgqICOQxBSAoAIyQklMAQgqICOYxBSA
+oAIyokpMAQgqICOqxBTAvN42AYwTVQDMyQwVAICgAgAQVAAAggoAQFABAJDMp/y4FIJtwJx8ehJo
+yQwVAICgAgDoyyk/HnMKhdE5RQ30YoYKAEBQAQAIKgAAQQUAIKgAABBUAACCCgBAUAEACCoAAAQV
+AICgAgAQVAAAggoAAEEFACCoAAAEFQCAoAIAQFABAAgqAABBBQAgqAAAEFQAAIIKAEBQAQAIKgAA
+BBUAgKACABBUAACCCgAAQQUAIKgAAAQVAICgAgBAUAEACCoAAEEFACCoAAAQVAAAggoAQFABAAgq
+AACGCKoQPAs2JQAIquwCUAI2JQAIqowCOPtvbEoAEFQRBaAEbEoAEFQFCkAJ2JQAIKgKFIASsClh
+szEKrDGoXkNuiOPwwim4iezYoc9+39iDfQbVq+mGEFOiCjZ7E23swR6D6tV8Q4gpUQWb7PeNPdhn
+UL26bAgxJapgk/2+sQd7DKr3EDE1y96mUPT1fqgh6Ffosbsz9mDdQfXquiEY/rUKlBtLYgoqDJZB
+Dmjlg8qRWlSBMSSmYLOoKhtUjtCiCowdMQUbRtXLswUgpkBU5XkXf9CmPJZ9nQJrft6Gife9XmC/
+t0mHg9tr3FcJYgrmjilgn8Fa55SfI7WYAvtnYKNBW+8+VLGn/zY6wtd4qDY1iCngx+BtdNCre1G6
+W3gPt7MXUwAwW1CJKjEFCzB2wODtH1SiSkyB/TKw+KB9DfnARJWYAvtnYKLB+m7+AJ+UgL2WTQmT
+jz1jEJVf0ASD7jXck2/vY1PCQscwE+6wfkz1CaqrB6wAbEoQVcBkMdUvqH49cAVgU4KoAiaMqb5B
+9bkBFIBNCaIKmDSm+geVArApYaOxZ4zCuoPq5VkDqL//F1Ow9qASVACV9/9iCtYfVIIKoOL+X0zB
+HoNKUAFU2v+LKdhnUAkqgAZvqoG1B5WgAgAQVAAAggoAQFABAAgqAAAEFQCAoAIAEFQAAIIKAABB
+BQAgqAAABBUAgKACAEBQAQAIKgAAQQUAIKgAABBUAACCCgBAUAEACCoAAAQVAICgAgAY3NsmIEYI
+//3zONK/7u/v/nx+zdPl/1rO0++LWd6vZZ59Xe7jSfnZSq3z6jnJ2ValX09PHj9AD2aoiPJ34Lo6
+wJWKiJQD7N2BN/WAzbNtZTsCuzJDRZeD8XHkH3zPZo5CSJudeTKbdrX+lkE7QkzFbq8VHj/AGTNU
+dDkY1ziw1jjY7nAA/wzKqxnIu5gSPICggoTIuDroXh1YRz3ohuCUlcgESOOUH81iZdR1fJ9+zL1Q
+use1Y6nrvLsearR46rHNAQQVw6l14HtyOurJz5USVqs9LynXt8V+ShBAUMHHQfdzFuMsQGqHSW5M
+PQmrVtdsjRCkOwY5gKBiGne3Okg5WJaMqbuw2uX5+P6aX4H8/f922F4AgorlgyD3hp47z3ycPfZf
+p/FSb00BIKjg4kD8/cm4mFNjKfd/OpsJyb2GJ+V+UzEXSK9wAfuvqGr9s7ooHRiV2yYgDCe8xUOp
+gHny2GNjVdwAOzJDRbUYSfnep8srfdCOWV6tr225ztzt3PpxiTRgdGaoAAAEFQBAX075sbS7C6dH
+OJU0w8/ocQEIKjY2w0F71bAQTMBOnPIDABBUAAB9OeXHY36tCAD8ZoYKAEBQAQD05ZQfl3xSCwDu
+maECABBUAACCCgBAUAEACCqgiRDczwtAUAFZMfXr3wEQVEBkTIkqAEEFFIgpUQUgqIACMSWqAAQV
+UDCURBWAoAIKBJKoAhBUQIEwElUAggrEVBhjGQAIKtg2pkQVgKACMTX4MgEQVLBNTIkqAEEFYkpU
+AQgqYKzQEVUAggqWdBxrrgsAQQVDRdWTECqxDAAEFSwZVTEhVGIZAAgqWCqqUkKoxDIAEFSwRFTl
+hFCJZQAgqGCJqOq9DAAEFQCAoAIAEFQAAAgqAABBBQAwibdNAECqcPKLJo8fH1cNN7+U8up7jpOP
+v6as//PvPr+/xPpTlsEazFABUDSmnsRTie/pvX74ZIYKgKz4+J55+fu7EMLPWZmU2auY9YsjejBD
+BUDRmDk7pdZq/Vf/P2bZT7/2OI7/rU/ICSoAiHIVLS2uFyq5Dtc3kcspPwCairmQvHUghhBOT1U+
+eQx/fyfQBBUALBNrtcPmc/l/QYagAoDqYi9ib/2zPZ2l+hVw7Ms1VAAkKXXbgpIXkH9eIF7r8T15
+bEJLUAHA4wD6FQ5PPoVXc/0ll3/3db/+sCen/ABIio7PU3U5YfIdY0++78n6RzPqxfiUYYYKqh94
+rv/AzFGV8nelouLue3JC5e5XzTx57E777SUcsa+4zxeIo8HlOw/vOgBwLBlqA1drGDNUAACCCgBA
+UAEATM2n/CpyQSIA7MEMFQCAoAIAEFQAAIIKAGBnLkovxI3XAGBfZqgAAAQVAEBfTvlBbXf3I3O6
+GGB6ZqgAAAQVAICgAgAQVAAAggoAAEEFACCoAAAEFQCAoAIAQFABAAgqAABBBQAgqAAAEFQAAIIK
+AEBQAQAIKiBFCGMsAwBBBVPHVE4QlVgGAM29bQIoGFOf/30c7ZcBrV/zd6/Rq6/7fs1/fs3T5Z+9
+AckZO2dvaL6XeffGJ/XxpPxspdZ59ZzkbKve278BM1RQOqaeDvbSy4CW/g5WV6/RUhHRcuwYc2W2
+VY3tP/hzY4YKar5bfLIDeLIMM1WsOnaOI/9AeTZzETt2YmbTrtbfMmhH2PfFbq/Syxxk/2iGCmrF
+1Kzrgplez78OpjUOsDu8qfkMyqsZyLvwSdleNZYpqGASLQe3GSpGHgNXB92r1+6or+sQvInptV+a
+eF/nlB/kDv7aO14xxUpahErqOr7Hc+yF9y3Hbul13l27NPJ+aJBTgYIKRo4qMcXK46b2wTVlHb9m
+3VpcXD/i85Kyb4v9lGCvZQoq2CiqxBQzvfY/ZzHOAqR2mOTG1JOwanXN1ghBunucR3INFYw4qMUU
+K/sLsO9rlXKuXSoZU99jcfXxmPpp5LP7f5W+B9Ukz4GggtGiSkxBn5ja/UL0v3D5/nO1jyq1zWos
+szGn/KDGTinnoliY9TV/FzZnr++U+z+dfcIw93qblPtNxVwUvcIF7N/7uZJRlbLMQS5KN0MFtQ4w
+YgrWGberjs+Y21vExmqN/eDAz0M4jsifrtZ5alh5ZyWmAMbaJxfe75qhgl7veMUUwDIEFfSMKjEF
+sAQXpUOrqJrk5nSwpLvT7yOMxxl+Ro9LUMFQUSWmoP348zN6XIIK7FgAWDWo/DZuAAAXpQMACCoA
+gM7iT/m5BgQA4P+YoQIAEFQAAIIKAEBQAQAIKgAABBUAgKACABBUAAB7+hfHbDX87cMFJQAAAABJ
+RU5ErkJggg==
diff --git a/Documentation/DocBook/media/crop.gif.b64 b/Documentation/DocBook/media/crop.gif.b64
new file mode 100644
index 000000000..11d936ae7
--- /dev/null
+++ b/Documentation/DocBook/media/crop.gif.b64
@@ -0,0 +1,105 @@
+R0lGODlhuQJGAeMAAAAAAH9/fwCvAP8AANEA0dEAAK8Ar////wCOAAAA0QAA////////////////
+/////ywAAAAAuQJGAQAE/vDISau9OOvNu/9gKI5kaZ5oqq5s675wLM90bd94ru987//AoHBILBqP
+yKRyyWw6n9CodEqtWq/YrHbL7Xq/4LB4TC6bz+i0es1uu9/wuHxOr9vv+Lx+z+/7/4CBgoOEhYaH
+iImKi4yNjo+QkZKTlJWWl5iZmpucnZ6foKGio6SlpqeoqaqrrK2ur7CxsrO0tba3uLm6gQC9vr/A
+wcLDxMXGx8jJysvMzc7P0NHS09TV1tfYxbth2d3e3+DRAePk5ebn6Onl4ezt7u3q8fLqANtg7/j5
++s/z/f4B+wIKHAjsn8F09ex5IciwobuDEM1Bi0ixosWLGDNqrJhQIZdk/htDihxJsiTJiSZTqlzJ
+MmNHj1q+tRznsKbNmzhzDoz3EiYWmTN7+vQJgOfQmN5mAjzKtCg9pj+TBoU61ClCqlaAthSKVZdV
+dFy7NtHKMqxYW1/PmT2bhOzKtWxlpZUYF4pblXDrvpq7Tq+Tu+UGCB5MuLDhw4gTK17MuLHjx5Aj
+S55MubLly5gza95MmVxev0EAkxsg8jNoVXNJ0zy9RPQ41RtNsz6V2vPstlLTwdYo+zap2qt9G3Ed
+YLdL4bGAL0VOhLhxjL2Zf1IeXboM56Wtt6KuPXRudM8vVu+eiTt5H9hDjj9vyfyIXrTW80gfO4OC
++/jz69/Pv7///wAG/ijggAQWaOCBCCao4IIMNujggRe4J4IwBxBg4YUYZqjhhhx26OGHIIYo4ogk
+loihMBbi1k084VlklgLsWQKjBRJqgIwEBJRyY4UqZsNidhjMGOMkQlLgnjERwkdBjuVpk2QFTB5B
+H2/2DUlJkRNYhWQKUTKyJQpdFjHlcUFaSaQxo9nGQph/fCkDm0OMCV2VZh7iZpbnwCYfBnDKcecO
+fXq3ojotckRnnXr8SQGWEtQIphuKEhEoEHKKdygHCUiQ6QEJdDrEphWA2oGo3UXaAaMHOHrCpFmY
+2gSr6H2XJ5AXoHqBp5xyuimpPfCa6we+6uWqCaiqagKsTAxrBbLz/slqTqEUvWgBqLviSqqvnXpq
+rbbZTpDtt9ziSsG3unKraabkltutWMq+UOyswa3A7A/tfjGvDpW6eKm3v+a667i38vvvuQLzW7Cm
+AJ878L/W9ouuR/Xi8O6zasorRMRo3JtDvoaWOe2v4IIc7LUIE4zwtd1Sm7C6KZ8MLsmzYBzExIFV
+rILGJsgcB843cBztvgqHWnKwup5s8rroVivwwEc3DHLR/jKcis5K0JxmvDezQLUePNvgc0TSBix0
+1OuG6nS56nob7ssqp132wuIi7cnWU1j9ms1chkD3IF3X8DVEYe9AtNi37M2F3cXh/WgFhjPSNw1/
+HxS4CS97MPjH/ts5uQfieqbQuCWPzxC5QZPncPnYoXz+BueKY+Bm6J3AHsPo/5TOmup5sB5vxLJv
+0vsLtPtjO1W4D0Kz6r9nknwLwfczvFeam6IAmndjnfcsy2vtbM3qAT2KkhkULwj4SRITIbzLWYx9
+j9j82L3HvyljivzeG1tC9qCzf4379cEPigACCAYAB0jAAhrwgAhMoAIXyMAGOvCBEIygAVMVDBLo
+Ln1ZWx8SmjeP521CAEYiXypAGML1XHBPF8BfJVToue1drX+1GgUJZTHDFJywBSycRA5PwEF5eFAT
+NYRFEE9wwzXRYoc5c2H1YGgBW32QFkMk1vkoZr3FyQKJJeih/lH894kotsKLFpwi9zB4vSvqzxr8
+oxIXPQHGVbRRBEVUnxk3qMTEvS+GonjjBBCwxwMg4I+d0CMI4pjBOUqpjtACm/c4IUhASuCPfPQj
+I1lAyDLGAosk0OJT1hhIC0RSkpDsoyg9GUpAhtKPp6QAJD9pB0F+oJJWvOQZq5FGMuExFFHkYyR1
+OUpWqrKPvHykJIXZyzy40gOwXNURZ0mNWs6Jk5P0JChXKUxHXsCXwQTlKIe5h2OeSowvRKEFMOkI
+ck4IkbRqogyvaU1uZpOd1URlNXepSnriwZscSOaxlknHQekmnRVwIhAxgM09rtKXBrXnKalJzFTe
+AZ8b0Of9/vh5SH+CB6CLWicPEAoIiGpAoiQwp+OYOQ1nWgqaT0TBQTl6TUN4tH7oEyeUKDocdN5R
+nXnsAUv98FJO2i+kNBWTTZkYUI3SkJLgXKJMlxTU5gxVjbf8HxSRSqOY4rCpcXqqLXGKy6muAKQj
+EOkixPoBTV4FpQOdRU+jiicqkjGWsCCrB8wKlkWm9KhfTaod36pMDVbUR4TC6AQEmom1spGqjLOq
+Ef1aU4uiD6pclapaEWskxcpRlv0E7D9vWtScTjavVXXrUicgV0SUlgN0VYtd04pXFYBVBKc1RGxt
+pNVnsvWwn3WtXju3WEM2VrMX5WxGPdtaG+62dftkrFAd/utWyHa2q7k1bmjHOFocYfVitT3pbTsZ
+XRS8NgSzJUR4XZddfaG1sF7V7XTDeVXlOpW5Y3TucKFbXO8et4p99e1ygfvYrT5XsvUl4n35mlz9
+vpe/zfXvfAEcC8P+t63Uba+BswrfF8p3sEZtMGUhzN7eYvav7QuscDFMXA2DNrGilfCHfxvizRJ1
+wV1Mr3RRHGEPx5Wk0jCpebcbzQBLcb1KVfGNM9vi4L6YxPQ1sXpp3OHLDhnE+xPxkSVAWEw4uMcz
+rmyKbfyK8ZYPwfFVMJIZLMQNN8qyhVzxfovcX9tGNsbdFTCQ91pdHrmXwmC2sJipnOEyn1jLNXZy
+l3Es/g4pX5jPJfbzkgHd5DQ/mcVRdvGhD1DlS1z5rlnmcJC57Aov06i8HeMxphWd6TNvWdCdJjRK
+JL1nSvf5FZdGNJM3jepWeJpxoP7Zea0sY/vOmbe1ZsWtS5jnJU660paINXr/rGk6C3nQRI60kY/9
+alco+7sgGLYftN2oXCty15butZxn7WxO21rV/DB0q5FdCWXzmtmmDrSjoQ1lNKrbzQ/GrY9LgO0P
+cJsP/04tXcCdbHH/mNzAnneqo21vVuMbxvpWcqlThWZLPnrN0m6zdt8ccVL7GuHIneidsVthY6+7
+2l80M8VPrfBzM5yW9954vrm77zD+OuRAHbmgir1X/monGtYq/2lYr7tzNif44WOGc81H0G8P/HsP
+Afc24Fa77KXDccB1fjrX0O0MHYea4zSX+McZTeuWC5vrzfC6rkXNWrGPm+zlDvYqol7ynp/859YO
+esXhSm9IN3zad0+yx9/e7IRbvO8Y/7vGdwx2LA/+4HA3PN8XXm+YO1zmEA/74/mN9WdT3u+WBzzS
+ZU1moMN75fI+/OcTH/rFf33mjjf9oguP86HrnFJSlxzV3231EDS9A1rPA915nsipE7zdBuf8zfFb
+YDUf2OhhHr2r8Z7y0wsdtkTHfd2Lr/vjU8Ld4bb+3vPrfDxDX8/SZ/f3k29zkDNf5BMmOfEFS3ql
+/rsd8rR/f87jX/SMHx3zSddxsjdx1wde2UcvuUc6uxd+vTdInWduZ/dyzRRzjAd7ozaAY5d/BAZ/
+5Sd/52dy6YdyrHBtDyh3qjB8H2h3IUh9I6h3LKd6Lld5E3h5FZh5sZd34veCkxeDoDeDogeA9SeA
+ODh78dZoMBiBMlhSFPh6NniBQ0iA49d8F/d8/hd9QDh9goeBhFeEZXeEc4d2zKB238Z2VXd/yud+
+G7h/Hdh/ivd/NRiAmqeF+MeFcWd2XyiBSkiDTAiHN1h9RIh6RriDSNiDefiDbxiEcfiEGUiHkkd+
+U2h+VYh+V6h+kyBBlniJmJiJmriJluiCqSeI/neYhDm2hGvXeJzgCzEjQkxXgnZ4gmC4DGJofGS4
+CcAAC7XYfpFXe9h3ewi4ffSHhfGjiqvwC2eYi/pne/ynffM3YoiYOqhoi894dcuXhsi4hsqYgtyn
+gN5XHt1mi93oe6zoha6Ih6Ooh6VogTpSZ+3RG7/HAcGHBygYiSA4idCYisgUjqA4jqJYaOY4hqY4
+NepYCcI4cwWYbQcYK77IjMBYj3KxFu24Ae94B/HYhlZ4iAuZHAFJCQP5kBoQkXYwka3nhnvYjAyJ
+kWBXkP52kD0gcH2xjQBpj3CIkk6nks2SkFN2kWiRkZOgJByZAR5ZByDpg653jk1YCwM5jADQ/pN8
+QpM7wJIhR4l7oZOSoIoyCXxMiS8JWDsLaJRS+QgwQj5V6Y5XuTFZKTxbmZOvICRKEpYQOZY44JTv
+B5Xx0ZWKgCW+EIUc+IgeKI8qSI9YcZSiUCxp0YhSiHhUSJGSaJFyWTh0WQjv0guB6IiGCYmIOY+K
+GReA2QnHUxSEmZeTuZeV2ZeXWReZqQl2A5nHuIvJ2IvLeJOLuQ2leQmcA5lZ55Y9U5bOc5ZHEZtX
+cl+8mZK8iJCt6XO305h6cEK/KZbBuZK42UG6STzGeQdFlJw+aZte05w+9Jx/GZ10QEjUOU7W6TfY
+uUWzKCzcGQew9J2kFZ6QM56bVJ5+oZ6E/qBP6vmTdBCUhTiU/oiOtyGfgQBS1Gmfc4Cf5WiII4mT
+0uGffvBavymgckCg/GigRMmH1qGgm4OP5GWVy1mTwxl4Q2KheNB0memgO/OKyhCL3QefzAGiddCO
+R0micAChqyah+1mU58GicsCRwgijbyCj6daPsviPMYKjq4OhZdUTPOoGPtp1pFijFGomRMoGSvmN
+draawomNv/iaCXqeh2Ok51Sl1siaWKqQWlqhXJoFU4pr7Ck67nlWKgqlZ2oFaQolUZo/5Bih+hmk
+/IkoIfQHc8pUFKSXbBiSFXmgZcoedQoGf7qeWRKngrCkadekemqjfPokx+mlakilqQCp/mEoqSkq
+pJWaoSGKqdXYp5tqoiDhqdr4pqFqqi1KqqppqabAqbCoqlrpkq3aqK86jbW5AYlqWqiKDCi6qqCa
+q7Q1B4tqXR3wq4VAqydqq2aJq8bqqm6QrGCKWo4KlMF6DMN6q6w6rbIqpbBqgHqTrQ+6rdQDpJ+6
+p+C6rObqA9baNcy6behaDN0ard/arqKaBvGaRJzgrKmqrsTKrvo6V++aA/3KQwebBgArrNCam9Ja
+sPtKBgmLAvMqkfVKDPcKsfkqscdKseNqkCtwsfeZscOwsc4ZsR4bPgsbAxU7si0bBg3LrQ+bsh27
+sr4asy3wstojCTObrjQ6qU+Ks/7q/gU8yzw6uwU/a681m50qS7Q52wVH6wIkuwZLq7FNS57FCrVZ
+lLTFqIG92p4jdaczmqfrSqlcq7BoGrLAeZ2KcLUnm7XvubVpW7RVMLVsCqxk+6NBe7ZDW7cqULUu
+y7Yz2ZRe+wRwKwwo67Q3C7jlWjeEq6HNIl4mq7hy66Z067gWe7gwpYOSiZWPWrnBsLhaS7CaG7ic
+e5J4manMCQiJO7qXW1dPe7pfygR4O7l98LoFEbuqNbu0W7tJcLutC3Wiu7sC662Z+7swG7yRq5w1
+tXV7y6THi6/Jq7zLawTCi3vwWLy/QLpza7rWS7U6m73e8ZHce5e8O3CNG76bOwTk/otdJRu9kTq9
+HFu97Iu0M9O8bfkEgvsq54ua9Guz9nu/+Auv+tuRUtC/SqC73Zu+Lbm+BIy6PfC+h6Sk/8sXiWmo
+EQy/OkDBdMQGDIy+Acy4A7zBwHOeHowbahDCANy3A4u2JnybN5DCSqDAQcDCGGyZGhzDTlUDNNwa
+qQuB18iX2Yi84MvDbisDP5wsQSyOV0rEWYrEFQwDSzwWTVyYlEmoGTyhJCnFCOguB1yd3HDFpXqY
+WqzDXIygXly+nhiZWNwFNmwDOOy9mHvEa0yWOfiJn/sFcTwDc+zAT3nHQAyFnvvG90DGzkuIBWq2
+L/y3gqy9cwiIXZiPfIzIYryP/mUrkml8qI8snl/LiLpIrmrQx0IsplBMpp38F+NRxUhBBX88wqUL
+w6nMxq8Uxkv5BqS8x4MqlJrspF08y897j7zqeWuQy6ybxbxcqJsMzFEQm6xcyU7wyi5sxLLMzFkV
+UbacQnZgzG2ryHjay0L7y9b8wT61umXsJ6krzYxMzY48zlNMkOYcq9t8uOoMzn4rzu6swpnHlgi8
+B9x8y5jMt+tMvXaczz2MiPx8yf4cs/WszL6sxgatyjiZ0ADdB//MqAEtvdNM0NUc0eSMhmHbJu/a
+0Fv80Jzs0T8wPT1B0do8CNxM0mhs0ih9BTMCPiwNnoWQyzAtmjs801RQJPBx/tMzZSdcutNFzNHt
+7NNHwCgtPMm6zAvcadRRrNSQ+2lf0ZnHnNPGKdWoTNVSMDGoidXnTNQ0wNWu6dVfDU69INbyvAgX
+a9bEidZOgDioGdKOwKxw7aFybbvHFY2tmAiJmtcruNdNgJyl7NbVJdh+Sdh8Pcw4yiwGMAGRbQCU
+jQGRnQKXvQWPfcF0LLsQzNg+XIIgiiyVLQGUfdmZTQGpbQKr3cpPbcqhedT1W9CgjbDhqKBsktmT
+XdoHkNqtXdqnLdm7fdqVTdy7PcaGPMSxPdW1zbzD/GnHPNmm3duSXd0XIN3TTd3ajdoVwN1iIJ+K
+PZrNjQQS9Z1wIt3GPd28/m0B2L3d2e3dxJ3dY2DenA3IcTnezg3SUdvNwu3b1d3aqt3b8P3e6m3d
+AH7IIpvR87vRs93R+D3D48qbfbLaup3e7G3avD3g1G3c7W0GEl7fsPy9Dv7gof3c7prIolCa4d3T
+JO6+kQuYssPhgL0WK77MLU4ED7mWQ40KOg6oCt6pIV7HI37jg2vi50Q+SVoGxIjR3pzJDh3OEE3k
+tm3kR94RSa7k0VjjMi3l+Uvl5fqMV04GFaTlUH7SXL6FklyH/hrmZ+ALZH7PUX7mJa7fa2Iidn7n
+eJ7ner7nJgLiDC7AtC3neezG9wuXG2jmgr6KXh7Bhv5DiT4D1qqvja6d/o/+h0K9spPuu5UujXTO
+w5n+2ZsOjotOwJ9ewqGOi2ArxaUe6Keu6J0ew6s+5K3u6sZIjdYb60k962h+6R6L6/is64uY5myt
+vL4e58Ae7LwuscWO6Me+XclesMve7EqczUQb7dJOxdSOs9Z+7S4Q6e267dzexk5N6m3q2aYe7uVc
+yIVe7r0L6ugek/FM7OyuvudurAUgAfd+AAWw7z+Q7yfg79806utuk3F9uvyu7/qe7wCvAwtPAg3/
+UdmO6fP+wPVuJf5+7/uu8BXw8BmP8QrP7x0/AR0/8gl/8CKf8fhu8hpf8h4P8iHfuXpM7gAw8wBQ
+8zZ/8zif8zrf2e1e/vEWj/AIv/L4fgEXD/QXX/RFL/JAv/RLr/JDb/Qpr/QmD/ECz746f/VYj/U8
+T++sjigYz/Jfn/AYsPBC7/Rkj/JJ//Ri//Qr//FKz/JU/+omnPV0X/dbT/FdXyco//ZCbwEHH/Z/
+//drb/Z9H/htz/Ypr/Fp7+zx/rt1//hXf/eB7LhkP/Qk7/eCn/hwr/kjf/lBv/d7v/mKj/ahn+4x
+P/CQn/o5zNM2jtIPnwGvvwPeDq6qX/uSf99I3PkeEPtE7+JVH761r/q3f+g+zft+7/tyv8HBn/rD
+7+jvLurJz+jL//jNT+nPb/qEbvXTb/f2fegP8v3gH/7iP/7kX/7m/n/+6D/707r93K8bnPH+8B//
+8j//9F//9n//+E//oez47J/1SmHJEHDkpNVenPXm3X8wFEeyNM8RCFa2BVA4lme6tm8g13e+9/lW
+UDgkFgOvW1K5ZDadT6hSVURGrVdsdvnjdntGcHhY1ZbNZ3Ra3ZkSyWt4XF7z1rtivNi+5/f9f8BA
+wUHCQsNDxETFHaO3uUfISDa7vErLS8xMzU3OTr1Az1DRUdJS0yBHSdXVyL3TV9hY2dmjRdtb3NxB
+2iNW3985XeFh4mLjY+Rk5WUeYOdn6Gjpaepq62vsbO1t7m7vb/Bw8XHycvNz9HT1dfZ293f4ePl5
++nr7e/x8/X3+G37/f4ABBQ4kWNDgQYQJFS5k2NDhQ4gRJdKLAAA7
diff --git a/Documentation/DocBook/media/dvb/.gitignore b/Documentation/DocBook/media/dvb/.gitignore
new file mode 100644
index 000000000..d7ec32eaf
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/.gitignore
@@ -0,0 +1 @@
+!*.xml
diff --git a/Documentation/DocBook/media/dvb/audio.xml b/Documentation/DocBook/media/dvb/audio.xml
new file mode 100644
index 000000000..a7ea56c71
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/audio.xml
@@ -0,0 +1,1314 @@
+<title>DVB Audio Device</title>
+<para>The DVB audio device controls the MPEG2 audio decoder of the DVB hardware. It
+can be accessed through <emphasis role="tt">/dev/dvb/adapter0/audio0</emphasis>. Data types and and
+ioctl definitions can be accessed by including <emphasis role="tt">linux/dvb/audio.h</emphasis> in your
+application.
+</para>
+<para>Please note that some DVB cards don&#8217;t have their own MPEG decoder, which results in
+the omission of the audio and video device.
+</para>
+<para>
+These ioctls were also used by V4L2 to control MPEG decoders implemented in V4L2. The use
+of these ioctls for that purpose has been made obsolete and proper V4L2 ioctls or controls
+have been created to replace that functionality.</para>
+
+<section id="audio_data_types">
+<title>Audio Data Types</title>
+<para>This section describes the structures, data types and defines used when talking to the
+audio device.
+</para>
+
+<section id="audio-stream-source-t">
+<title>audio_stream_source_t</title>
+<para>The audio stream source is set through the AUDIO_SELECT_SOURCE call and can take
+the following values, depending on whether we are replaying from an internal (demux) or
+external (user write) source.
+</para>
+<programlisting>
+typedef enum {
+ AUDIO_SOURCE_DEMUX,
+ AUDIO_SOURCE_MEMORY
+} audio_stream_source_t;
+</programlisting>
+<para>AUDIO_SOURCE_DEMUX selects the demultiplexer (fed either by the frontend or the
+DVR device) as the source of the video stream. If AUDIO_SOURCE_MEMORY
+is selected the stream comes from the application through the <emphasis role="tt">write()</emphasis> system
+call.
+</para>
+
+</section>
+<section id="audio-play-state-t">
+<title>audio_play_state_t</title>
+<para>The following values can be returned by the AUDIO_GET_STATUS call representing the
+state of audio playback.
+</para>
+<programlisting>
+typedef enum {
+ AUDIO_STOPPED,
+ AUDIO_PLAYING,
+ AUDIO_PAUSED
+} audio_play_state_t;
+</programlisting>
+
+</section>
+<section id="audio-channel-select-t">
+<title>audio_channel_select_t</title>
+<para>The audio channel selected via AUDIO_CHANNEL_SELECT is determined by the
+following values.
+</para>
+<programlisting>
+typedef enum {
+ AUDIO_STEREO,
+ AUDIO_MONO_LEFT,
+ AUDIO_MONO_RIGHT,
+ AUDIO_MONO,
+ AUDIO_STEREO_SWAPPED
+} audio_channel_select_t;
+</programlisting>
+
+</section>
+<section id="audio-status">
+<title>struct audio_status</title>
+<para>The AUDIO_GET_STATUS call returns the following structure informing about various
+states of the playback operation.
+</para>
+<programlisting>
+typedef struct audio_status {
+ boolean AV_sync_state;
+ boolean mute_state;
+ audio_play_state_t play_state;
+ audio_stream_source_t stream_source;
+ audio_channel_select_t channel_select;
+ boolean bypass_mode;
+ audio_mixer_t mixer_state;
+} audio_status_t;
+</programlisting>
+
+</section>
+<section id="audio-mixer">
+<title>struct audio_mixer</title>
+<para>The following structure is used by the AUDIO_SET_MIXER call to set the audio
+volume.
+</para>
+<programlisting>
+typedef struct audio_mixer {
+ unsigned int volume_left;
+ unsigned int volume_right;
+} audio_mixer_t;
+</programlisting>
+
+</section>
+<section id="audio_encodings">
+<title>audio encodings</title>
+<para>A call to AUDIO_GET_CAPABILITIES returns an unsigned integer with the following
+bits set according to the hardwares capabilities.
+</para>
+<programlisting>
+ #define AUDIO_CAP_DTS 1
+ #define AUDIO_CAP_LPCM 2
+ #define AUDIO_CAP_MP1 4
+ #define AUDIO_CAP_MP2 8
+ #define AUDIO_CAP_MP3 16
+ #define AUDIO_CAP_AAC 32
+ #define AUDIO_CAP_OGG 64
+ #define AUDIO_CAP_SDDS 128
+ #define AUDIO_CAP_AC3 256
+</programlisting>
+
+</section>
+<section id="audio-karaoke">
+<title>struct audio_karaoke</title>
+<para>The ioctl AUDIO_SET_KARAOKE uses the following format:
+</para>
+<programlisting>
+typedef
+struct audio_karaoke {
+ int vocal1;
+ int vocal2;
+ int melody;
+} audio_karaoke_t;
+</programlisting>
+<para>If Vocal1 or Vocal2 are non-zero, they get mixed into left and right t at 70% each. If both,
+Vocal1 and Vocal2 are non-zero, Vocal1 gets mixed into the left channel and Vocal2 into the
+right channel at 100% each. Ff Melody is non-zero, the melody channel gets mixed into left
+and right.
+</para>
+
+</section>
+<section id="audio-attributes-t">
+<title>audio attributes</title>
+<para>The following attributes can be set by a call to AUDIO_SET_ATTRIBUTES:
+</para>
+<programlisting>
+ typedef uint16_t audio_attributes_t;
+ /&#x22C6; bits: descr. &#x22C6;/
+ /&#x22C6; 15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, &#x22C6;/
+ /&#x22C6; 12 multichannel extension &#x22C6;/
+ /&#x22C6; 11-10 audio type (0=not spec, 1=language included) &#x22C6;/
+ /&#x22C6; 9- 8 audio application mode (0=not spec, 1=karaoke, 2=surround) &#x22C6;/
+ /&#x22C6; 7- 6 Quantization / DRC (mpeg audio: 1=DRC exists)(lpcm: 0=16bit, &#x22C6;/
+ /&#x22C6; 5- 4 Sample frequency fs (0=48kHz, 1=96kHz) &#x22C6;/
+ /&#x22C6; 2- 0 number of audio channels (n+1 channels) &#x22C6;/
+</programlisting>
+ </section></section>
+<section id="audio_function_calls">
+<title>Audio Function Calls</title>
+
+
+<section id="audio_fopen">
+<title>open()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call opens a named audio device (e.g. /dev/dvb/adapter0/audio0)
+ for subsequent use. When an open() call has succeeded, the device will be ready
+ for use. The significance of blocking or non-blocking mode is described in the
+ documentation for functions where there is a difference. It does not affect the
+ semantics of the open() call itself. A device opened in blocking mode can later
+ be put into non-blocking mode (and vice versa) using the F_SETFL command
+ of the fcntl system call. This is a standard system call, documented in the Linux
+ manual page for fcntl. Only one user can open the Audio Device in O_RDWR
+ mode. All other attempts to open the device in this mode will fail, and an error
+ code will be returned. If the Audio Device is opened in O_RDONLY mode, the
+ only ioctl call that can be used is AUDIO_GET_STATUS. All other call will
+ return with an error code.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open(const char &#x22C6;deviceName, int flags);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>const char
+ *deviceName</para>
+</entry><entry
+ align="char">
+<para>Name of specific audio device.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int flags</para>
+</entry><entry
+ align="char">
+<para>A bit-wise OR of the following flags:</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDONLY read-only access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDWR read/write access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_NONBLOCK open in non-blocking mode</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>(blocking mode is the default)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>Device driver not loaded/available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>Device or resource busy.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="audio_fclose">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call closes a previously opened audio device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(int fd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="audio_fwrite">
+<title>write()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call can only be used if AUDIO_SOURCE_MEMORY is selected
+ in the ioctl call AUDIO_SELECT_SOURCE. The data provided shall be in
+ PES format. If O_NONBLOCK is not specified the function will block until
+ buffer space is available. The amount of data to be transferred is implied by
+ count.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>size_t write(int fd, const void &#x22C6;buf, size_t count);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>void *buf</para>
+</entry><entry
+ align="char">
+<para>Pointer to the buffer containing the PES data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t count</para>
+</entry><entry
+ align="char">
+<para>Size of buf.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EPERM</para>
+</entry><entry
+ align="char">
+<para>Mode AUDIO_SOURCE_MEMORY not selected.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ENOMEM</para>
+</entry><entry
+ align="char">
+<para>Attempted to write more data than the internal buffer can
+ hold.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="AUDIO_STOP"
+role="subsection"><title>AUDIO_STOP</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to stop playing the current stream.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_STOP);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_STOP for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_PLAY"
+role="subsection"><title>AUDIO_PLAY</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to start playing an audio stream from the
+ selected source.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_PLAY);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_PLAY for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_PAUSE"
+role="subsection"><title>AUDIO_PAUSE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call suspends the audio stream being played. Decoding and playing
+ are paused. It is then possible to restart again decoding and playing process of
+ the audio stream using AUDIO_CONTINUE command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>If AUDIO_SOURCE_MEMORY is selected in the ioctl call
+ AUDIO_SELECT_SOURCE, the DVB-subsystem will not decode (consume)
+ any more data until the ioctl call AUDIO_CONTINUE or AUDIO_PLAY is
+ performed.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_PAUSE);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_PAUSE for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_CONTINUE"
+role="subsection"><title>AUDIO_CONTINUE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl restarts the decoding and playing process previously paused
+with AUDIO_PAUSE command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>It only works if the stream were previously stopped with AUDIO_PAUSE</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_CONTINUE);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_CONTINUE for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SELECT_SOURCE"
+role="subsection"><title>AUDIO_SELECT_SOURCE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call informs the audio device which source shall be used
+ for the input data. The possible sources are demux or memory. If
+ AUDIO_SOURCE_MEMORY is selected, the data is fed to the Audio Device
+ through the write command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_SELECT_SOURCE,
+ audio_stream_source_t source);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SELECT_SOURCE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>audio_stream_source_t
+ source</para>
+</entry><entry
+ align="char">
+<para>Indicates the source that shall be used for the Audio
+ stream.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SET_MUTE"
+role="subsection"><title>AUDIO_SET_MUTE</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To control a V4L2 decoder use the V4L2
+&VIDIOC-DECODER-CMD; with the <constant>V4L2_DEC_CMD_START_MUTE_AUDIO</constant> flag instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the audio device to mute the stream that is currently being
+ played.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_SET_MUTE,
+ boolean state);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_MUTE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>boolean state</para>
+</entry><entry
+ align="char">
+<para>Indicates if audio device shall mute or not.</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>TRUE Audio Mute</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>FALSE Audio Un-mute</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SET_AV_SYNC"
+role="subsection"><title>AUDIO_SET_AV_SYNC</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to turn ON or OFF A/V synchronization.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_SET_AV_SYNC,
+ boolean state);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_AV_SYNC for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>boolean state</para>
+</entry><entry
+ align="char">
+<para>Tells the DVB subsystem if A/V synchronization shall be
+ ON or OFF.</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>TRUE AV-sync ON</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>FALSE AV-sync OFF</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SET_BYPASS_MODE"
+role="subsection"><title>AUDIO_SET_BYPASS_MODE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to bypass the Audio decoder and forward
+ the stream without decoding. This mode shall be used if streams that can&#8217;t be
+ handled by the DVB system shall be decoded. Dolby DigitalTM streams are
+ automatically forwarded by the DVB subsystem if the hardware can handle it.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ AUDIO_SET_BYPASS_MODE, boolean mode);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_BYPASS_MODE for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>boolean mode</para>
+</entry><entry
+ align="char">
+<para>Enables or disables the decoding of the current Audio
+ stream in the DVB subsystem.</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>TRUE Bypass is disabled</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>FALSE Bypass is enabled</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_CHANNEL_SELECT"
+role="subsection"><title>AUDIO_CHANNEL_SELECT</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To control a V4L2 decoder use the V4L2
+<constant>V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK</constant> control instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to select the requested channel if possible.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ AUDIO_CHANNEL_SELECT, audio_channel_select_t);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_CHANNEL_SELECT for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>audio_channel_select_t
+ ch</para>
+</entry><entry
+ align="char">
+<para>Select the output format of the audio (mono left/right,
+ stereo).</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_BILINGUAL_CHANNEL_SELECT"
+role="subsection"><title>AUDIO_BILINGUAL_CHANNEL_SELECT</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is obsolete. Do not use in new drivers. It has been replaced by
+the V4L2 <constant>V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK</constant> control
+for MPEG decoders controlled through V4L2.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to select the requested channel for bilingual streams if possible.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ AUDIO_BILINGUAL_CHANNEL_SELECT, audio_channel_select_t);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_BILINGUAL_CHANNEL_SELECT for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>audio_channel_select_t
+ch</para>
+</entry><entry
+ align="char">
+<para>Select the output format of the audio (mono left/right,
+ stereo).</para>
+</entry>
+ </row>
+</tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_GET_PTS"
+role="subsection"><title>AUDIO_GET_PTS</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is obsolete. Do not use in new drivers. If you need this functionality,
+then please contact the linux-media mailing list (&v4l-ml;).</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to return the current PTS timestamp.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ AUDIO_GET_PTS, __u64 *pts);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_GET_PTS for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u64 *pts
+</para>
+</entry><entry
+ align="char">
+<para>Returns the 33-bit timestamp as defined in ITU T-REC-H.222.0 / ISO/IEC 13818-1.
+</para>
+<para>
+The PTS should belong to the currently played
+frame if possible, but may also be a value close to it
+like the PTS of the last decoded frame or the last PTS
+extracted by the PES parser.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_GET_STATUS"
+role="subsection"><title>AUDIO_GET_STATUS</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to return the current state of the Audio
+ Device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_GET_STATUS,
+ struct audio_status &#x22C6;status);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_GET_STATUS for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct audio_status
+ *status</para>
+</entry><entry
+ align="char">
+<para>Returns the current state of Audio Device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_GET_CAPABILITIES"
+role="subsection"><title>AUDIO_GET_CAPABILITIES</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to tell us about the decoding capabilities
+ of the audio hardware.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ AUDIO_GET_CAPABILITIES, unsigned int &#x22C6;cap);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_GET_CAPABILITIES for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>unsigned int *cap</para>
+</entry><entry
+ align="char">
+<para>Returns a bit array of supported sound formats.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_CLEAR_BUFFER"
+role="subsection"><title>AUDIO_CLEAR_BUFFER</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Audio Device to clear all software and hardware buffers
+ of the audio decoder device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_CLEAR_BUFFER);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_CLEAR_BUFFER for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SET_ID"
+role="subsection"><title>AUDIO_SET_ID</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl selects which sub-stream is to be decoded if a program or system
+ stream is sent to the video device. If no audio stream type is set the id has to be
+ in [0xC0,0xDF] for MPEG sound, in [0x80,0x87] for AC3 and in [0xA0,0xA7]
+ for LPCM. More specifications may follow for other stream types. If the stream
+ type is set the id just specifies the substream id of the audio stream and only
+ the first 5 bits are recognized.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_SET_ID, int
+ id);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_ID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int id</para>
+</entry><entry
+ align="char">
+<para>audio sub-stream id</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SET_MIXER"
+role="subsection"><title>AUDIO_SET_MIXER</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl lets you adjust the mixer settings of the audio decoder.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = AUDIO_SET_MIXER,
+ audio_mixer_t &#x22C6;mix);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_ID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>audio_mixer_t *mix</para>
+</entry><entry
+ align="char">
+<para>mixer settings.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="AUDIO_SET_STREAMTYPE"
+role="subsection"><title>AUDIO_SET_STREAMTYPE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl tells the driver which kind of audio stream to expect. This is useful
+ if the stream offers several audio sub-streams like LPCM and AC3.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = AUDIO_SET_STREAMTYPE,
+ int type);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_STREAMTYPE for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int type</para>
+</entry><entry
+ align="char">
+<para>stream type</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>type is not a valid or supported stream type.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="AUDIO_SET_EXT_ID"
+role="subsection"><title>AUDIO_SET_EXT_ID</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl can be used to set the extension id for MPEG streams in DVD
+ playback. Only the first 3 bits are recognized.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = AUDIO_SET_EXT_ID, int
+ id);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_EXT_ID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int id</para>
+</entry><entry
+ align="char">
+<para>audio sub_stream_id</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>id is not a valid id.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="AUDIO_SET_ATTRIBUTES"
+role="subsection"><title>AUDIO_SET_ATTRIBUTES</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is intended for DVD playback and allows you to set certain
+ information about the audio stream.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = AUDIO_SET_ATTRIBUTES,
+ audio_attributes_t attr );</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_ATTRIBUTES for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>audio_attributes_t
+ attr</para>
+</entry><entry
+ align="char">
+<para>audio attributes according to section ??</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>attr is not a valid or supported attribute setting.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="AUDIO_SET_KARAOKE"
+role="subsection"><title>AUDIO_SET_KARAOKE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl allows one to set the mixer settings for a karaoke DVD.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = AUDIO_SET_KARAOKE,
+ audio_karaoke_t &#x22C6;karaoke);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals AUDIO_SET_KARAOKE for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>audio_karaoke_t
+ *karaoke</para>
+</entry><entry
+ align="char">
+<para>karaoke settings according to section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>karaoke is not a valid or supported karaoke setting.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section>
+</section>
diff --git a/Documentation/DocBook/media/dvb/ca.xml b/Documentation/DocBook/media/dvb/ca.xml
new file mode 100644
index 000000000..85eaf4fe2
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/ca.xml
@@ -0,0 +1,582 @@
+<title>DVB CA Device</title>
+<para>The DVB CA device controls the conditional access hardware. It can be accessed through
+<emphasis role="tt">/dev/dvb/adapter0/ca0</emphasis>. Data types and and ioctl definitions can be accessed by
+including <emphasis role="tt">linux/dvb/ca.h</emphasis> in your application.
+</para>
+
+<section id="ca_data_types">
+<title>CA Data Types</title>
+
+
+<section id="ca-slot-info">
+<title>ca_slot_info_t</title>
+ <programlisting>
+typedef struct ca_slot_info {
+ int num; /&#x22C6; slot number &#x22C6;/
+
+ int type; /&#x22C6; CA interface this slot supports &#x22C6;/
+#define CA_CI 1 /&#x22C6; CI high level interface &#x22C6;/
+#define CA_CI_LINK 2 /&#x22C6; CI link layer level interface &#x22C6;/
+#define CA_CI_PHYS 4 /&#x22C6; CI physical layer level interface &#x22C6;/
+#define CA_DESCR 8 /&#x22C6; built-in descrambler &#x22C6;/
+#define CA_SC 128 /&#x22C6; simple smart card interface &#x22C6;/
+
+ unsigned int flags;
+#define CA_CI_MODULE_PRESENT 1 /&#x22C6; module (or card) inserted &#x22C6;/
+#define CA_CI_MODULE_READY 2
+} ca_slot_info_t;
+</programlisting>
+
+</section>
+<section id="ca-descr-info">
+<title>ca_descr_info_t</title>
+<programlisting>
+typedef struct ca_descr_info {
+ unsigned int num; /&#x22C6; number of available descramblers (keys) &#x22C6;/
+ unsigned int type; /&#x22C6; type of supported scrambling system &#x22C6;/
+#define CA_ECD 1
+#define CA_NDS 2
+#define CA_DSS 4
+} ca_descr_info_t;
+</programlisting>
+
+</section>
+<section id="ca-caps">
+<title>ca_caps_t</title>
+<programlisting>
+typedef struct ca_caps {
+ unsigned int slot_num; /&#x22C6; total number of CA card and module slots &#x22C6;/
+ unsigned int slot_type; /&#x22C6; OR of all supported types &#x22C6;/
+ unsigned int descr_num; /&#x22C6; total number of descrambler slots (keys) &#x22C6;/
+ unsigned int descr_type;/&#x22C6; OR of all supported types &#x22C6;/
+ } ca_cap_t;
+</programlisting>
+
+</section>
+<section id="ca-msg">
+<title>ca_msg_t</title>
+<programlisting>
+/&#x22C6; a message to/from a CI-CAM &#x22C6;/
+typedef struct ca_msg {
+ unsigned int index;
+ unsigned int type;
+ unsigned int length;
+ unsigned char msg[256];
+} ca_msg_t;
+</programlisting>
+
+</section>
+<section id="ca-descr">
+<title>ca_descr_t</title>
+<programlisting>
+typedef struct ca_descr {
+ unsigned int index;
+ unsigned int parity;
+ unsigned char cw[8];
+} ca_descr_t;
+</programlisting>
+</section>
+
+<section id="ca-pid">
+<title>ca-pid</title>
+<programlisting>
+typedef struct ca_pid {
+ unsigned int pid;
+ int index; /&#x22C6; -1 == disable&#x22C6;/
+} ca_pid_t;
+</programlisting>
+</section></section>
+
+<section id="ca_function_calls">
+<title>CA Function Calls</title>
+
+
+<section id="ca_fopen">
+<title>open()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call opens a named ca device (e.g. /dev/ost/ca) for subsequent use.</para>
+<para>When an open() call has succeeded, the device will be ready for use.
+ The significance of blocking or non-blocking mode is described in the
+ documentation for functions where there is a difference. It does not affect the
+ semantics of the open() call itself. A device opened in blocking mode can later
+ be put into non-blocking mode (and vice versa) using the F_SETFL command
+ of the fcntl system call. This is a standard system call, documented in the Linux
+ manual page for fcntl. Only one user can open the CA Device in O_RDWR
+ mode. All other attempts to open the device in this mode will fail, and an error
+ code will be returned.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open(const char &#x22C6;deviceName, int flags);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>const char
+ *deviceName</para>
+</entry><entry
+ align="char">
+<para>Name of specific video device.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int flags</para>
+</entry><entry
+ align="char">
+<para>A bit-wise OR of the following flags:</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDONLY read-only access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDWR read/write access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_NONBLOCK open in non-blocking mode</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>(blocking mode is the default)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>Device driver not loaded/available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINTERNAL</para>
+</entry><entry
+ align="char">
+<para>Internal error.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>Device or resource busy.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="ca_fclose">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call closes a previously opened audio device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(int fd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section>
+
+<section id="CA_RESET"
+role="subsection"><title>CA_RESET</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_RESET);
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_RESET for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_GET_CAP"
+role="subsection"><title>CA_GET_CAP</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_GET_CAP,
+ ca_caps_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_GET_CAP for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_caps_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_GET_SLOT_INFO"
+role="subsection"><title>CA_GET_SLOT_INFO</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_GET_SLOT_INFO,
+ ca_slot_info_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_GET_SLOT_INFO for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_slot_info_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_GET_DESCR_INFO"
+role="subsection"><title>CA_GET_DESCR_INFO</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_GET_DESCR_INFO,
+ ca_descr_info_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_GET_DESCR_INFO for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_descr_info_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_GET_MSG"
+role="subsection"><title>CA_GET_MSG</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_GET_MSG,
+ ca_msg_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_GET_MSG for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_msg_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_SEND_MSG"
+role="subsection"><title>CA_SEND_MSG</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_SEND_MSG,
+ ca_msg_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_SEND_MSG for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_msg_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_SET_DESCR"
+role="subsection"><title>CA_SET_DESCR</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_SET_DESCR,
+ ca_descr_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_SET_DESCR for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_descr_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="CA_SET_PID"
+role="subsection"><title>CA_SET_PID</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = CA_SET_PID,
+ ca_pid_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals CA_SET_PID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ca_pid_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+</section>
diff --git a/Documentation/DocBook/media/dvb/demux.xml b/Documentation/DocBook/media/dvb/demux.xml
new file mode 100644
index 000000000..c8683d66f
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/demux.xml
@@ -0,0 +1,1145 @@
+<title>DVB Demux Device</title>
+
+<para>The DVB demux device controls the filters of the DVB hardware/software. It can be
+accessed through <emphasis role="tt">/dev/adapter0/demux0</emphasis>. Data types and and ioctl definitions can be
+accessed by including <emphasis role="tt">linux/dvb/dmx.h</emphasis> in your application.
+</para>
+<section id="dmx_types">
+<title>Demux Data Types</title>
+
+<section id="dmx-output-t">
+<title>dmx_output_t</title>
+<programlisting>
+typedef enum
+{
+ DMX_OUT_DECODER, /&#x22C6; Streaming directly to decoder. &#x22C6;/
+ DMX_OUT_TAP, /&#x22C6; Output going to a memory buffer &#x22C6;/
+ /&#x22C6; (to be retrieved via the read command).&#x22C6;/
+ DMX_OUT_TS_TAP, /&#x22C6; Output multiplexed into a new TS &#x22C6;/
+ /&#x22C6; (to be retrieved by reading from the &#x22C6;/
+ /&#x22C6; logical DVR device). &#x22C6;/
+ DMX_OUT_TSDEMUX_TAP /&#x22C6; Like TS_TAP but retrieved from the DMX device &#x22C6;/
+} dmx_output_t;
+</programlisting>
+<para><emphasis role="tt">DMX_OUT_TAP</emphasis> delivers the stream output to the demux device on which the ioctl is
+called.
+</para>
+<para><emphasis role="tt">DMX_OUT_TS_TAP</emphasis> routes output to the logical DVR device <emphasis role="tt">/dev/dvb/adapter0/dvr0</emphasis>,
+which delivers a TS multiplexed from all filters for which <emphasis role="tt">DMX_OUT_TS_TAP</emphasis> was
+specified.
+</para>
+</section>
+
+<section id="dmx-input-t">
+<title>dmx_input_t</title>
+<programlisting>
+typedef enum
+{
+ DMX_IN_FRONTEND, /&#x22C6; Input from a front-end device. &#x22C6;/
+ DMX_IN_DVR /&#x22C6; Input from the logical DVR device. &#x22C6;/
+} dmx_input_t;
+</programlisting>
+</section>
+
+<section id="dmx-pes-type-t">
+<title>dmx_pes_type_t</title>
+<programlisting>
+typedef enum
+{
+ DMX_PES_AUDIO0,
+ DMX_PES_VIDEO0,
+ DMX_PES_TELETEXT0,
+ DMX_PES_SUBTITLE0,
+ DMX_PES_PCR0,
+
+ DMX_PES_AUDIO1,
+ DMX_PES_VIDEO1,
+ DMX_PES_TELETEXT1,
+ DMX_PES_SUBTITLE1,
+ DMX_PES_PCR1,
+
+ DMX_PES_AUDIO2,
+ DMX_PES_VIDEO2,
+ DMX_PES_TELETEXT2,
+ DMX_PES_SUBTITLE2,
+ DMX_PES_PCR2,
+
+ DMX_PES_AUDIO3,
+ DMX_PES_VIDEO3,
+ DMX_PES_TELETEXT3,
+ DMX_PES_SUBTITLE3,
+ DMX_PES_PCR3,
+
+ DMX_PES_OTHER
+} dmx_pes_type_t;
+</programlisting>
+</section>
+
+<section id="dmx-filter">
+<title>struct dmx_filter</title>
+ <programlisting>
+ typedef struct dmx_filter
+{
+ __u8 filter[DMX_FILTER_SIZE];
+ __u8 mask[DMX_FILTER_SIZE];
+ __u8 mode[DMX_FILTER_SIZE];
+} dmx_filter_t;
+</programlisting>
+</section>
+
+<section id="dmx-sct-filter-params">
+<title>struct dmx_sct_filter_params</title>
+<programlisting>
+struct dmx_sct_filter_params
+{
+ __u16 pid;
+ dmx_filter_t filter;
+ __u32 timeout;
+ __u32 flags;
+#define DMX_CHECK_CRC 1
+#define DMX_ONESHOT 2
+#define DMX_IMMEDIATE_START 4
+#define DMX_KERNEL_CLIENT 0x8000
+};
+</programlisting>
+</section>
+
+<section id="dmx-pes-filter-params">
+<title>struct dmx_pes_filter_params</title>
+<programlisting>
+struct dmx_pes_filter_params
+{
+ __u16 pid;
+ dmx_input_t input;
+ dmx_output_t output;
+ dmx_pes_type_t pes_type;
+ __u32 flags;
+};
+</programlisting>
+</section>
+
+<section id="dmx-event">
+<title>struct dmx_event</title>
+ <programlisting>
+ struct dmx_event
+ {
+ dmx_event_t event;
+ time_t timeStamp;
+ union
+ {
+ dmx_scrambling_status_t scrambling;
+ } u;
+ };
+</programlisting>
+</section>
+
+<section id="dmx-stc">
+<title>struct dmx_stc</title>
+<programlisting>
+struct dmx_stc {
+ unsigned int num; /&#x22C6; input : which STC? 0..N &#x22C6;/
+ unsigned int base; /&#x22C6; output: divisor for stc to get 90 kHz clock &#x22C6;/
+ __u64 stc; /&#x22C6; output: stc in 'base'&#x22C6;90 kHz units &#x22C6;/
+};
+</programlisting>
+</section>
+
+<section id="dmx-caps">
+<title>struct dmx_caps</title>
+<programlisting>
+ typedef struct dmx_caps {
+ __u32 caps;
+ int num_decoders;
+} dmx_caps_t;
+</programlisting>
+</section>
+
+<section id="dmx-source-t">
+<title>enum dmx_source_t</title>
+<programlisting>
+typedef enum {
+ DMX_SOURCE_FRONT0 = 0,
+ DMX_SOURCE_FRONT1,
+ DMX_SOURCE_FRONT2,
+ DMX_SOURCE_FRONT3,
+ DMX_SOURCE_DVR0 = 16,
+ DMX_SOURCE_DVR1,
+ DMX_SOURCE_DVR2,
+ DMX_SOURCE_DVR3
+} dmx_source_t;
+</programlisting>
+</section>
+
+</section>
+<section id="dmx_fcalls">
+<title>Demux Function Calls</title>
+
+<section id="dmx_fopen">
+<title>open()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call, used with a device name of /dev/dvb/adapter0/demux0,
+ allocates a new filter and returns a handle which can be used for subsequent
+ control of that filter. This call has to be made for each filter to be used, i.e. every
+ returned file descriptor is a reference to a single filter. /dev/dvb/adapter0/dvr0
+ is a logical device to be used for retrieving Transport Streams for digital
+ video recording. When reading from this device a transport stream containing
+ the packets from all PES filters set in the corresponding demux device
+ (/dev/dvb/adapter0/demux0) having the output set to DMX_OUT_TS_TAP. A
+ recorded Transport Stream is replayed by writing to this device. </para>
+<para>The significance of blocking or non-blocking mode is described in the
+ documentation for functions where there is a difference. It does not affect the
+ semantics of the open() call itself. A device opened in blocking mode can later
+ be put into non-blocking mode (and vice versa) using the F_SETFL command
+ of the fcntl system call.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open(const char &#x22C6;deviceName, int flags);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>const char
+ *deviceName</para>
+</entry><entry
+ align="char">
+<para>Name of demux device.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int flags</para>
+</entry><entry
+ align="char">
+<para>A bit-wise OR of the following flags:</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDWR read/write access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_NONBLOCK open in non-blocking mode</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>(blocking mode is the default)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>Device driver not loaded/available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EMFILE</para>
+</entry><entry
+ align="char">
+<para>&#8220;Too many open files&#8221;, i.e. no more filters available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ENOMEM</para>
+</entry><entry
+ align="char">
+<para>The driver failed to allocate enough memory.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="dmx_fclose">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call deactivates and deallocates a filter that was previously
+ allocated via the open() call.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(int fd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="dmx_fread">
+<title>read()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call returns filtered data, which might be section or PES data. The
+ filtered data is transferred from the driver&#8217;s internal circular buffer to buf. The
+ maximum amount of data to be transferred is implied by count.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>When returning section data the driver always tries to return a complete single
+ section (even though buf would provide buffer space for more data). If the size
+ of the buffer is smaller than the section as much as possible will be returned,
+ and the remaining data will be provided in subsequent calls.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The size of the internal buffer is 2 * 4096 bytes (the size of two maximum
+ sized sections) by default. The size of this buffer may be changed by using the
+ DMX_SET_BUFFER_SIZE function. If the buffer is not large enough, or if
+ the read operations are not performed fast enough, this may result in a buffer
+ overflow error. In this case EOVERFLOW will be returned, and the circular
+ buffer will be emptied. This call is blocking if there is no data to return, i.e. the
+ process will be put to sleep waiting for data, unless the O_NONBLOCK flag
+ is specified.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>Note that in order to be able to read, the filtering process has to be started
+ by defining either a section or a PES filter by means of the ioctl functions,
+ and then starting the filtering process via the DMX_START ioctl function
+ or by setting the DMX_IMMEDIATE_START flag. If the reading is done
+ from a logical DVR demux device, the data will constitute a Transport Stream
+ including the packets from all PES filters in the corresponding demux device
+ /dev/dvb/adapter0/demux0 having the output set to DMX_OUT_TS_TAP.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>size_t read(int fd, void &#x22C6;buf, size_t count);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>void *buf</para>
+</entry><entry
+ align="char">
+<para>Pointer to the buffer to be used for returned filtered data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t count</para>
+</entry><entry
+ align="char">
+<para>Size of buf.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EWOULDBLOCK</para>
+</entry><entry
+ align="char">
+<para>No data to return and O_NONBLOCK was specified.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ECRC</para>
+</entry><entry
+ align="char">
+<para>Last section had a CRC error - no data returned. The
+ buffer is flushed.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EOVERFLOW</para>
+</entry><entry
+ align="char">
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>The filtered data was not read from the buffer in due
+ time, resulting in non-read data being lost. The buffer is
+ flushed.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ETIMEDOUT</para>
+</entry><entry
+ align="char">
+<para>The section was not loaded within the stated timeout
+ period. See ioctl DMX_SET_FILTER for how to set a
+ timeout.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EFAULT</para>
+</entry><entry
+ align="char">
+<para>The driver failed to write to the callers buffer due to an
+ invalid *buf pointer.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="dmx_fwrite">
+<title>write()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call is only provided by the logical device /dev/dvb/adapter0/dvr0,
+ associated with the physical demux device that provides the actual DVR
+ functionality. It is used for replay of a digitally recorded Transport Stream.
+ Matching filters have to be defined in the corresponding physical demux
+ device, /dev/dvb/adapter0/demux0. The amount of data to be transferred is
+ implied by count.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>ssize_t write(int fd, const void &#x22C6;buf, size_t
+ count);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>void *buf</para>
+</entry><entry
+ align="char">
+<para>Pointer to the buffer containing the Transport Stream.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t count</para>
+</entry><entry
+ align="char">
+<para>Size of buf.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EWOULDBLOCK</para>
+</entry><entry
+ align="char">
+<para>No data was written. This
+ might happen if O_NONBLOCK was specified and there
+ is no more buffer space available (if O_NONBLOCK is
+ not specified the function will block until buffer space is
+ available).</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>This error code indicates that there are conflicting
+ requests. The corresponding demux device is setup to
+ receive data from the front- end. Make sure that these
+ filters are stopped and that the filters with input set to
+ DMX_IN_DVR are started.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="DMX_START">
+<title>DMX_START</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call is used to start the actual filtering operation defined via the ioctl
+ calls DMX_SET_FILTER or DMX_SET_PES_FILTER.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request = DMX_START);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_START for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument, i.e. no filtering parameters provided via
+ the DMX_SET_FILTER or DMX_SET_PES_FILTER
+ functions.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>This error code indicates that there are conflicting
+ requests. There are active filters filtering data from
+ another input source. Make sure that these filters are
+ stopped before starting this filter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="DMX_STOP">
+<title>DMX_STOP</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call is used to stop the actual filtering operation defined via the
+ ioctl calls DMX_SET_FILTER or DMX_SET_PES_FILTER and started via
+ the DMX_START command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request = DMX_STOP);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_STOP for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_SET_FILTER">
+<title>DMX_SET_FILTER</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call sets up a filter according to the filter and mask parameters
+ provided. A timeout may be defined stating number of seconds to wait for a
+ section to be loaded. A value of 0 means that no timeout should be applied.
+ Finally there is a flag field where it is possible to state whether a section should
+ be CRC-checked, whether the filter should be a &#8221;one-shot&#8221; filter, i.e. if the
+ filtering operation should be stopped after the first section is received, and
+ whether the filtering operation should be started immediately (without waiting
+ for a DMX_START ioctl call). If a filter was previously set-up, this filter will
+ be canceled, and the receive buffer will be flushed.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request = DMX_SET_FILTER,
+ struct dmx_sct_filter_params &#x22C6;params);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_SET_FILTER for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dmx_sct_filter_params
+ *params</para>
+</entry><entry
+ align="char">
+<para>Pointer to structure containing filter parameters.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_SET_PES_FILTER">
+<title>DMX_SET_PES_FILTER</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call sets up a PES filter according to the parameters provided. By a
+ PES filter is meant a filter that is based just on the packet identifier (PID), i.e.
+ no PES header or payload filtering capability is supported.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The transport stream destination for the filtered output may be set. Also the
+ PES type may be stated in order to be able to e.g. direct a video stream directly
+ to the video decoder. Finally there is a flag field where it is possible to state
+ whether the filtering operation should be started immediately (without waiting
+ for a DMX_START ioctl call). If a filter was previously set-up, this filter will
+ be cancelled, and the receive buffer will be flushed.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request = DMX_SET_PES_FILTER,
+ struct dmx_pes_filter_params &#x22C6;params);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_SET_PES_FILTER for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dmx_pes_filter_params
+ *params</para>
+</entry><entry
+ align="char">
+<para>Pointer to structure containing filter parameters.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>This error code indicates that there are conflicting
+ requests. There are active filters filtering data from
+ another input source. Make sure that these filters are
+ stopped before starting this filter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="DMX_SET_BUFFER_SIZE">
+<title>DMX_SET_BUFFER_SIZE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call is used to set the size of the circular buffer used for filtered data.
+ The default size is two maximum sized sections, i.e. if this function is not called
+ a buffer size of 2 * 4096 bytes will be used.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request =
+ DMX_SET_BUFFER_SIZE, unsigned long size);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_SET_BUFFER_SIZE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>unsigned long size</para>
+</entry><entry
+ align="char">
+<para>Size of circular buffer.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_GET_EVENT">
+<title>DMX_GET_EVENT</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns an event if available. If an event is not available,
+ the behavior depends on whether the device is in blocking or non-blocking
+ mode. In the latter case, the call fails immediately with errno set to
+ EWOULDBLOCK. In the former case, the call blocks until an event becomes
+ available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The standard Linux poll() and/or select() system calls can be used with the
+ device file descriptor to watch for new events. For select(), the file descriptor
+ should be included in the exceptfds argument, and for poll(), POLLPRI should
+ be specified as the wake-up condition. Only the latest event for each filter is
+ saved.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request = DMX_GET_EVENT,
+ struct dmx_event &#x22C6;ev);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_GET_EVENT for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct dmx_event *ev</para>
+</entry><entry
+ align="char">
+<para>Pointer to the location where the event is to be stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EWOULDBLOCK</para>
+</entry><entry
+ align="char">
+<para>There is no event pending, and the device is in
+ non-blocking mode.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="DMX_GET_STC">
+<title>DMX_GET_STC</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns the current value of the system time counter (which is driven
+ by a PES filter of type DMX_PES_PCR). Some hardware supports more than one
+ STC, so you must specify which one by setting the num field of stc before the ioctl
+ (range 0...n). The result is returned in form of a ratio with a 64 bit numerator
+ and a 32 bit denominator, so the real 90kHz STC value is stc-&#x003E;stc /
+ stc-&#x003E;base
+ .</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request = DMX_GET_STC, struct
+ dmx_stc &#x22C6;stc);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_GET_STC for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct dmx_stc *stc</para>
+</entry><entry
+ align="char">
+<para>Pointer to the location where the stc is to be stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid stc number.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section>
+
+<section id="DMX_GET_PES_PIDS"
+role="subsection"><title>DMX_GET_PES_PIDS</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = DMX_GET_PES_PIDS,
+ __u16[5]);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_GET_PES_PIDS for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16[5]
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_GET_CAPS"
+role="subsection"><title>DMX_GET_CAPS</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = DMX_GET_CAPS,
+ dmx_caps_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_GET_CAPS for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_caps_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_SET_SOURCE"
+role="subsection"><title>DMX_SET_SOURCE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = DMX_SET_SOURCE,
+ dmx_source_t *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_SET_SOURCE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_source_t *
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_ADD_PID"
+role="subsection"><title>DMX_ADD_PID</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call allows to add multiple PIDs to a transport stream filter
+previously set up with DMX_SET_PES_FILTER and output equal to DMX_OUT_TSDEMUX_TAP.
+</para></entry></row><row><entry align="char"><para>
+It is used by readers of /dev/dvb/adapterX/demuxY.
+</para></entry></row><row><entry align="char"><para>
+It may be called at any time, i.e. before or after the first filter on the
+shared file descriptor was started. It makes it possible to record multiple
+services without the need to de-multiplex or re-multiplex TS packets.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = DMX_ADD_PID,
+ __u16 *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_ADD_PID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16 *
+</para>
+</entry><entry
+ align="char">
+<para>PID number to be filtered.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="DMX_REMOVE_PID"
+role="subsection"><title>DMX_REMOVE_PID</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call allows to remove a PID when multiple PIDs are set on a
+transport stream filter, e. g. a filter previously set up with output equal to
+DMX_OUT_TSDEMUX_TAP, created via either DMX_SET_PES_FILTER or DMX_ADD_PID.
+</para></entry></row><row><entry align="char"><para>
+It is used by readers of /dev/dvb/adapterX/demuxY.
+</para></entry></row><row><entry align="char"><para>
+It may be called at any time, i.e. before or after the first filter on the
+shared file descriptor was started. It makes it possible to record multiple
+services without the need to de-multiplex or re-multiplex TS packets.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = DMX_REMOVE_PID,
+ __u16 *);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals DMX_REMOVE_PID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16 *
+</para>
+</entry><entry
+ align="char">
+<para>PID of the PES filter to be removed.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+
+</section>
diff --git a/Documentation/DocBook/media/dvb/dvbapi.xml b/Documentation/DocBook/media/dvb/dvbapi.xml
new file mode 100644
index 000000000..4c15396c6
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/dvbapi.xml
@@ -0,0 +1,141 @@
+<partinfo>
+<authorgroup>
+<author>
+<firstname>Ralph</firstname>
+<surname>Metzler</surname>
+<othername role="mi">J. K.</othername>
+<affiliation><address><email>rjkm@metzlerbros.de</email></address></affiliation>
+</author>
+<author>
+<firstname>Marcus</firstname>
+<surname>Metzler</surname>
+<othername role="mi">O. C.</othername>
+<affiliation><address><email>rjkm@metzlerbros.de</email></address></affiliation>
+</author>
+</authorgroup>
+<authorgroup>
+<author>
+<firstname>Mauro</firstname>
+<othername role="mi">Carvalho</othername>
+<surname>Chehab</surname>
+<affiliation><address><email>m.chehab@samsung.com</email></address></affiliation>
+<contrib>Ported document to Docbook XML.</contrib>
+</author>
+</authorgroup>
+<copyright>
+ <year>2002</year>
+ <year>2003</year>
+ <holder>Convergence GmbH</holder>
+</copyright>
+<copyright>
+ <year>2009-2014</year>
+ <holder>Mauro Carvalho Chehab</holder>
+</copyright>
+
+<revhistory>
+<!-- Put document revisions here, newest first. -->
+<revision>
+ <revnumber>2.0.4</revnumber>
+ <date>2011-05-06</date>
+ <authorinitials>mcc</authorinitials>
+ <revremark>
+ Add more information about DVB APIv5, better describing the frontend GET/SET props ioctl's.
+ </revremark>
+</revision>
+<revision>
+ <revnumber>2.0.3</revnumber>
+ <date>2010-07-03</date>
+ <authorinitials>mcc</authorinitials>
+ <revremark>
+ Add some frontend capabilities flags, present on kernel, but missing at the specs.
+ </revremark>
+</revision>
+<revision>
+ <revnumber>2.0.2</revnumber>
+ <date>2009-10-25</date>
+ <authorinitials>mcc</authorinitials>
+ <revremark>
+ documents FE_SET_FRONTEND_TUNE_MODE and FE_DISHETWORK_SEND_LEGACY_CMD ioctls.
+ </revremark>
+</revision>
+<revision>
+<revnumber>2.0.1</revnumber>
+<date>2009-09-16</date>
+<authorinitials>mcc</authorinitials>
+<revremark>
+Added ISDB-T test originally written by Patrick Boettcher
+</revremark>
+</revision>
+<revision>
+<revnumber>2.0.0</revnumber>
+<date>2009-09-06</date>
+<authorinitials>mcc</authorinitials>
+<revremark>Conversion from LaTex to DocBook XML. The
+ contents is the same as the original LaTex version.</revremark>
+</revision>
+<revision>
+<revnumber>1.0.0</revnumber>
+<date>2003-07-24</date>
+<authorinitials>rjkm</authorinitials>
+<revremark>Initial revision on LaTEX.</revremark>
+</revision>
+</revhistory>
+</partinfo>
+
+
+<title>LINUX DVB API</title>
+<subtitle>Version 5.10</subtitle>
+<!-- ADD THE CHAPTERS HERE -->
+ <chapter id="dvb_introdution">
+ &sub-intro;
+ </chapter>
+ <chapter id="dvb_frontend">
+ &sub-frontend;
+ </chapter>
+ <chapter id="dvb_demux">
+ &sub-demux;
+ </chapter>
+ <chapter id="dvb_video">
+ &sub-video;
+ </chapter>
+ <chapter id="dvb_audio">
+ &sub-audio;
+ </chapter>
+ <chapter id="dvb_ca">
+ &sub-ca;
+ </chapter>
+ <chapter id="dvb_net">
+ &sub-net;
+ </chapter>
+ <chapter id="dvb_kdapi">
+ &sub-kdapi;
+ </chapter>
+ <chapter id="dvb_examples">
+ &sub-examples;
+ </chapter>
+<!-- END OF CHAPTERS -->
+ <appendix id="audio_h">
+ <title>DVB Audio Header File</title>
+ &sub-audio-h;
+ </appendix>
+ <appendix id="ca_h">
+ <title>DVB Conditional Access Header File</title>
+ &sub-ca-h;
+ </appendix>
+ <appendix id="dmx_h">
+ <title>DVB Demux Header File</title>
+ &sub-dmx-h;
+ </appendix>
+ <appendix id="frontend_h">
+ <title>DVB Frontend Header File</title>
+ &sub-frontend-h;
+ </appendix>
+ <appendix id="net_h">
+ <title>DVB Network Header File</title>
+ &sub-net-h;
+ </appendix>
+ <appendix id="video_h">
+ <title>DVB Video Header File</title>
+ &sub-video-h;
+ </appendix>
+
diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
new file mode 100644
index 000000000..3018564dd
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -0,0 +1,1317 @@
+<section id="FE_GET_SET_PROPERTY">
+<title><constant>FE_GET_PROPERTY/FE_SET_PROPERTY</constant></title>
+<para>This section describes the DVB version 5 extension of the DVB-API, also
+called "S2API", as this API were added to provide support for DVB-S2. It was
+designed to be able to replace the old frontend API. Yet, the DISEQC and
+the capability ioctls weren't implemented yet via the new way.</para>
+<para>The typical usage for the <constant>FE_GET_PROPERTY/FE_SET_PROPERTY</constant>
+API is to replace the ioctl's were the <link linkend="dvb-frontend-parameters">
+struct <constant>dvb_frontend_parameters</constant></link> were used.</para>
+<section id="dtv-stats">
+<title>DTV stats type</title>
+<programlisting>
+struct dtv_stats {
+ __u8 scale; /* enum fecap_scale_params type */
+ union {
+ __u64 uvalue; /* for counters and relative scales */
+ __s64 svalue; /* for 1/1000 dB measures */
+ };
+} __packed;
+</programlisting>
+</section>
+<section id="dtv-fe-stats">
+<title>DTV stats type</title>
+<programlisting>
+#define MAX_DTV_STATS 4
+
+struct dtv_fe_stats {
+ __u8 len;
+ struct dtv_stats stat[MAX_DTV_STATS];
+} __packed;
+</programlisting>
+</section>
+
+<section id="dtv-property">
+<title>DTV property type</title>
+<programlisting>
+/* Reserved fields should be set to 0 */
+
+struct dtv_property {
+ __u32 cmd;
+ __u32 reserved[3];
+ union {
+ __u32 data;
+ struct dtv_fe_stats st;
+ struct {
+ __u8 data[32];
+ __u32 len;
+ __u32 reserved1[3];
+ void *reserved2;
+ } buffer;
+ } u;
+ int result;
+} __attribute__ ((packed));
+
+/* num of properties cannot exceed DTV_IOCTL_MAX_MSGS per ioctl */
+#define DTV_IOCTL_MAX_MSGS 64
+</programlisting>
+</section>
+<section id="dtv-properties">
+<title>DTV properties type</title>
+<programlisting>
+struct dtv_properties {
+ __u32 num;
+ struct dtv_property *props;
+};
+</programlisting>
+</section>
+
+<section id="FE_GET_PROPERTY">
+<title>FE_GET_PROPERTY</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns one or more frontend properties. This call only
+ requires read-only access to the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_GET_PROPERTY">FE_GET_PROPERTY</link>,
+ dtv_properties &#x22C6;props);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int num</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_GET_PROPERTY">FE_GET_PROPERTY</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct dtv_property *props</para>
+</entry><entry
+ align="char">
+<para>Points to the location where the front-end property commands are stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row>
+ <entry align="char"><para>EOPNOTSUPP</para></entry>
+ <entry align="char"><para>Property type not supported.</para></entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="FE_SET_PROPERTY">
+<title>FE_SET_PROPERTY</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call sets one or more frontend properties. This call
+ requires read/write access to the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_SET_PROPERTY">FE_SET_PROPERTY</link>,
+ dtv_properties &#x22C6;props);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int num</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_SET_PROPERTY">FE_SET_PROPERTY</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct dtv_property *props</para>
+</entry><entry
+ align="char">
+<para>Points to the location where the front-end property commands are stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row>
+ <entry align="char"><para>EOPNOTSUPP</para></entry>
+ <entry align="char"><para>Property type not supported.</para></entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section>
+ <title>Property types</title>
+<para>
+On <link linkend="FE_GET_PROPERTY">FE_GET_PROPERTY</link>/<link linkend="FE_SET_PROPERTY">FE_SET_PROPERTY</link>,
+the actual action is determined by the dtv_property cmd/data pairs. With one single ioctl, is possible to
+get/set up to 64 properties. The actual meaning of each property is described on the next sections.
+</para>
+
+<para>The available frontend property types are shown on the next section.</para>
+</section>
+
+<section id="fe_property_parameters">
+ <title>Digital TV property parameters</title>
+ <section id="DTV-UNDEFINED">
+ <title><constant>DTV_UNDEFINED</constant></title>
+ <para>Used internally. A GET/SET operation for it won't change or return anything.</para>
+ </section>
+ <section id="DTV-TUNE">
+ <title><constant>DTV_TUNE</constant></title>
+ <para>Interpret the cache of data, build either a traditional frontend tunerequest so we can pass validation in the <constant>FE_SET_FRONTEND</constant> ioctl.</para>
+ </section>
+ <section id="DTV-CLEAR">
+ <title><constant>DTV_CLEAR</constant></title>
+ <para>Reset a cache of data specific to the frontend here. This does not effect hardware.</para>
+ </section>
+ <section id="DTV-FREQUENCY">
+ <title><constant>DTV_FREQUENCY</constant></title>
+
+ <para>Central frequency of the channel.</para>
+
+ <para>Notes:</para>
+ <para>1)For satellital delivery systems, it is measured in kHz.
+ For the other ones, it is measured in Hz.</para>
+ <para>2)For ISDB-T, the channels are usually transmitted with an offset of 143kHz.
+ E.g. a valid frequency could be 474143 kHz. The stepping is bound to the bandwidth of
+ the channel which is 6MHz.</para>
+
+ <para>3)As in ISDB-Tsb the channel consists of only one or three segments the
+ frequency step is 429kHz, 3*429 respectively. As for ISDB-T the
+ central frequency of the channel is expected.</para>
+ </section>
+ <section id="DTV-MODULATION">
+ <title><constant>DTV_MODULATION</constant></title>
+<para>Specifies the frontend modulation type for cable and satellite types. The modulation can be one of the types bellow:</para>
+<programlisting>
+ typedef enum fe_modulation {
+ QPSK,
+ QAM_16,
+ QAM_32,
+ QAM_64,
+ QAM_128,
+ QAM_256,
+ QAM_AUTO,
+ VSB_8,
+ VSB_16,
+ PSK_8,
+ APSK_16,
+ APSK_32,
+ DQPSK,
+ QAM_4_NR,
+ } fe_modulation_t;
+</programlisting>
+ </section>
+ <section id="DTV-BANDWIDTH-HZ">
+ <title><constant>DTV_BANDWIDTH_HZ</constant></title>
+
+ <para>Bandwidth for the channel, in HZ.</para>
+
+ <para>Possible values:
+ <constant>1712000</constant>,
+ <constant>5000000</constant>,
+ <constant>6000000</constant>,
+ <constant>7000000</constant>,
+ <constant>8000000</constant>,
+ <constant>10000000</constant>.
+ </para>
+
+ <para>Notes:</para>
+
+ <para>1) For ISDB-T it should be always 6000000Hz (6MHz)</para>
+ <para>2) For ISDB-Tsb it can vary depending on the number of connected segments</para>
+ <para>3) Bandwidth doesn't apply for DVB-C transmissions, as the bandwidth
+ for DVB-C depends on the symbol rate</para>
+ <para>4) Bandwidth in ISDB-T is fixed (6MHz) or can be easily derived from
+ other parameters (DTV_ISDBT_SB_SEGMENT_IDX,
+ DTV_ISDBT_SB_SEGMENT_COUNT).</para>
+ <para>5) DVB-T supports 6, 7 and 8MHz.</para>
+ <para>6) In addition, DVB-T2 supports 1.172, 5 and 10MHz.</para>
+ </section>
+ <section id="DTV-INVERSION">
+ <title><constant>DTV_INVERSION</constant></title>
+ <para>The Inversion field can take one of these values:
+ </para>
+ <programlisting>
+ typedef enum fe_spectral_inversion {
+ INVERSION_OFF,
+ INVERSION_ON,
+ INVERSION_AUTO
+ } fe_spectral_inversion_t;
+ </programlisting>
+ <para>It indicates if spectral inversion should be presumed or not. In the automatic setting
+ (<constant>INVERSION_AUTO</constant>) the hardware will try to figure out the correct setting by
+ itself.
+ </para>
+ </section>
+ <section id="DTV-DISEQC-MASTER">
+ <title><constant>DTV_DISEQC_MASTER</constant></title>
+ <para>Currently not implemented.</para>
+ </section>
+ <section id="DTV-SYMBOL-RATE">
+ <title><constant>DTV_SYMBOL_RATE</constant></title>
+ <para>Digital TV symbol rate, in bauds (symbols/second). Used on cable standards.</para>
+ </section>
+ <section id="DTV-INNER-FEC">
+ <title><constant>DTV_INNER_FEC</constant></title>
+ <para>Used cable/satellite transmissions. The acceptable values are:
+ </para>
+ <programlisting>
+typedef enum fe_code_rate {
+ FEC_NONE = 0,
+ FEC_1_2,
+ FEC_2_3,
+ FEC_3_4,
+ FEC_4_5,
+ FEC_5_6,
+ FEC_6_7,
+ FEC_7_8,
+ FEC_8_9,
+ FEC_AUTO,
+ FEC_3_5,
+ FEC_9_10,
+ FEC_2_5,
+} fe_code_rate_t;
+ </programlisting>
+ <para>which correspond to error correction rates of 1/2, 2/3, etc.,
+ no error correction or auto detection.</para>
+ </section>
+ <section id="DTV-VOLTAGE">
+ <title><constant>DTV_VOLTAGE</constant></title>
+ <para>The voltage is usually used with non-DiSEqC capable LNBs to switch
+ the polarzation (horizontal/vertical). When using DiSEqC epuipment this
+ voltage has to be switched consistently to the DiSEqC commands as
+ described in the DiSEqC spec.</para>
+ <programlisting>
+ typedef enum fe_sec_voltage {
+ SEC_VOLTAGE_13,
+ SEC_VOLTAGE_18
+ } fe_sec_voltage_t;
+ </programlisting>
+ </section>
+ <section id="DTV-TONE">
+ <title><constant>DTV_TONE</constant></title>
+ <para>Currently not used.</para>
+ </section>
+ <section id="DTV-PILOT">
+ <title><constant>DTV_PILOT</constant></title>
+ <para>Sets DVB-S2 pilot</para>
+ <section id="fe-pilot-t">
+ <title>fe_pilot type</title>
+ <programlisting>
+typedef enum fe_pilot {
+ PILOT_ON,
+ PILOT_OFF,
+ PILOT_AUTO,
+} fe_pilot_t;
+ </programlisting>
+ </section>
+ </section>
+ <section id="DTV-ROLLOFF">
+ <title><constant>DTV_ROLLOFF</constant></title>
+ <para>Sets DVB-S2 rolloff</para>
+
+ <section id="fe-rolloff-t">
+ <title>fe_rolloff type</title>
+ <programlisting>
+typedef enum fe_rolloff {
+ ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
+ ROLLOFF_20,
+ ROLLOFF_25,
+ ROLLOFF_AUTO,
+} fe_rolloff_t;
+ </programlisting>
+ </section>
+ </section>
+ <section id="DTV-DISEQC-SLAVE-REPLY">
+ <title><constant>DTV_DISEQC_SLAVE_REPLY</constant></title>
+ <para>Currently not implemented.</para>
+ </section>
+ <section id="DTV-FE-CAPABILITY-COUNT">
+ <title><constant>DTV_FE_CAPABILITY_COUNT</constant></title>
+ <para>Currently not implemented.</para>
+ </section>
+ <section id="DTV-FE-CAPABILITY">
+ <title><constant>DTV_FE_CAPABILITY</constant></title>
+ <para>Currently not implemented.</para>
+ </section>
+ <section id="DTV-DELIVERY-SYSTEM">
+ <title><constant>DTV_DELIVERY_SYSTEM</constant></title>
+ <para>Specifies the type of Delivery system</para>
+ <section id="fe-delivery-system-t">
+ <title>fe_delivery_system type</title>
+ <para>Possible values: </para>
+<programlisting>
+
+typedef enum fe_delivery_system {
+ SYS_UNDEFINED,
+ SYS_DVBC_ANNEX_A,
+ SYS_DVBC_ANNEX_B,
+ SYS_DVBT,
+ SYS_DSS,
+ SYS_DVBS,
+ SYS_DVBS2,
+ SYS_DVBH,
+ SYS_ISDBT,
+ SYS_ISDBS,
+ SYS_ISDBC,
+ SYS_ATSC,
+ SYS_ATSCMH,
+ SYS_DTMB,
+ SYS_CMMB,
+ SYS_DAB,
+ SYS_DVBT2,
+ SYS_TURBO,
+ SYS_DVBC_ANNEX_C,
+} fe_delivery_system_t;
+</programlisting>
+ </section>
+ </section>
+ <section id="DTV-ISDBT-PARTIAL-RECEPTION">
+ <title><constant>DTV_ISDBT_PARTIAL_RECEPTION</constant></title>
+
+ <para>If <constant>DTV_ISDBT_SOUND_BROADCASTING</constant> is '0' this bit-field represents whether
+ the channel is in partial reception mode or not.</para>
+
+ <para>If '1' <constant>DTV_ISDBT_LAYERA_*</constant> values are assigned to the center segment and
+ <constant>DTV_ISDBT_LAYERA_SEGMENT_COUNT</constant> has to be '1'.</para>
+
+ <para>If in addition <constant>DTV_ISDBT_SOUND_BROADCASTING</constant> is '1'
+ <constant>DTV_ISDBT_PARTIAL_RECEPTION</constant> represents whether this ISDB-Tsb channel
+ is consisting of one segment and layer or three segments and two layers.</para>
+
+ <para>Possible values: 0, 1, -1 (AUTO)</para>
+ </section>
+ <section id="DTV-ISDBT-SOUND-BROADCASTING">
+ <title><constant>DTV_ISDBT_SOUND_BROADCASTING</constant></title>
+
+ <para>This field represents whether the other DTV_ISDBT_*-parameters are
+ referring to an ISDB-T and an ISDB-Tsb channel. (See also
+ <constant>DTV_ISDBT_PARTIAL_RECEPTION</constant>).</para>
+
+ <para>Possible values: 0, 1, -1 (AUTO)</para>
+ </section>
+ <section id="DTV-ISDBT-SB-SUBCHANNEL-ID">
+ <title><constant>DTV_ISDBT_SB_SUBCHANNEL_ID</constant></title>
+
+ <para>This field only applies if <constant>DTV_ISDBT_SOUND_BROADCASTING</constant> is '1'.</para>
+
+ <para>(Note of the author: This might not be the correct description of the
+ <constant>SUBCHANNEL-ID</constant> in all details, but it is my understanding of the technical
+ background needed to program a device)</para>
+
+ <para>An ISDB-Tsb channel (1 or 3 segments) can be broadcasted alone or in a
+ set of connected ISDB-Tsb channels. In this set of channels every
+ channel can be received independently. The number of connected
+ ISDB-Tsb segment can vary, e.g. depending on the frequency spectrum
+ bandwidth available.</para>
+
+ <para>Example: Assume 8 ISDB-Tsb connected segments are broadcasted. The
+ broadcaster has several possibilities to put those channels in the
+ air: Assuming a normal 13-segment ISDB-T spectrum he can align the 8
+ segments from position 1-8 to 5-13 or anything in between.</para>
+
+ <para>The underlying layer of segments are subchannels: each segment is
+ consisting of several subchannels with a predefined IDs. A sub-channel
+ is used to help the demodulator to synchronize on the channel.</para>
+
+ <para>An ISDB-T channel is always centered over all sub-channels. As for
+ the example above, in ISDB-Tsb it is no longer as simple as that.</para>
+
+ <para><constant>The DTV_ISDBT_SB_SUBCHANNEL_ID</constant> parameter is used to give the
+ sub-channel ID of the segment to be demodulated.</para>
+
+ <para>Possible values: 0 .. 41, -1 (AUTO)</para>
+ </section>
+ <section id="DTV-ISDBT-SB-SEGMENT-IDX">
+ <title><constant>DTV_ISDBT_SB_SEGMENT_IDX</constant></title>
+ <para>This field only applies if <constant>DTV_ISDBT_SOUND_BROADCASTING</constant> is '1'.</para>
+ <para><constant>DTV_ISDBT_SB_SEGMENT_IDX</constant> gives the index of the segment to be
+ demodulated for an ISDB-Tsb channel where several of them are
+ transmitted in the connected manner.</para>
+ <para>Possible values: 0 .. <constant>DTV_ISDBT_SB_SEGMENT_COUNT</constant> - 1</para>
+ <para>Note: This value cannot be determined by an automatic channel search.</para>
+ </section>
+ <section id="DTV-ISDBT-SB-SEGMENT-COUNT">
+ <title><constant>DTV_ISDBT_SB_SEGMENT_COUNT</constant></title>
+ <para>This field only applies if <constant>DTV_ISDBT_SOUND_BROADCASTING</constant> is '1'.</para>
+ <para><constant>DTV_ISDBT_SB_SEGMENT_COUNT</constant> gives the total count of connected ISDB-Tsb
+ channels.</para>
+ <para>Possible values: 1 .. 13</para>
+ <para>Note: This value cannot be determined by an automatic channel search.</para>
+ </section>
+ <section id="isdb-hierq-layers">
+ <title><constant>DTV-ISDBT-LAYER*</constant> parameters</title>
+ <para>ISDB-T channels can be coded hierarchically. As opposed to DVB-T in
+ ISDB-T hierarchical layers can be decoded simultaneously. For that
+ reason a ISDB-T demodulator has 3 Viterbi and 3 Reed-Solomon decoders.</para>
+ <para>ISDB-T has 3 hierarchical layers which each can use a part of the
+ available segments. The total number of segments over all layers has
+ to 13 in ISDB-T.</para>
+ <para>There are 3 parameter sets, for Layers A, B and C.</para>
+ <section id="DTV-ISDBT-LAYER-ENABLED">
+ <title><constant>DTV_ISDBT_LAYER_ENABLED</constant></title>
+ <para>Hierarchical reception in ISDB-T is achieved by enabling or disabling
+ layers in the decoding process. Setting all bits of
+ <constant>DTV_ISDBT_LAYER_ENABLED</constant> to '1' forces all layers (if applicable) to be
+ demodulated. This is the default.</para>
+ <para>If the channel is in the partial reception mode
+ (<constant>DTV_ISDBT_PARTIAL_RECEPTION</constant> = 1) the central segment can be decoded
+ independently of the other 12 segments. In that mode layer A has to
+ have a <constant>SEGMENT_COUNT</constant> of 1.</para>
+ <para>In ISDB-Tsb only layer A is used, it can be 1 or 3 in ISDB-Tsb
+ according to <constant>DTV_ISDBT_PARTIAL_RECEPTION</constant>. <constant>SEGMENT_COUNT</constant> must be filled
+ accordingly.</para>
+ <para>Possible values: 0x1, 0x2, 0x4 (|-able)</para>
+ <para><constant>DTV_ISDBT_LAYER_ENABLED[0:0]</constant> - layer A</para>
+ <para><constant>DTV_ISDBT_LAYER_ENABLED[1:1]</constant> - layer B</para>
+ <para><constant>DTV_ISDBT_LAYER_ENABLED[2:2]</constant> - layer C</para>
+ <para><constant>DTV_ISDBT_LAYER_ENABLED[31:3]</constant> unused</para>
+ </section>
+ <section id="DTV-ISDBT-LAYER-FEC">
+ <title><constant>DTV_ISDBT_LAYER*_FEC</constant></title>
+ <para>Possible values: <constant>FEC_AUTO</constant>, <constant>FEC_1_2</constant>, <constant>FEC_2_3</constant>, <constant>FEC_3_4</constant>, <constant>FEC_5_6</constant>, <constant>FEC_7_8</constant></para>
+ </section>
+ <section id="DTV-ISDBT-LAYER-MODULATION">
+ <title><constant>DTV_ISDBT_LAYER*_MODULATION</constant></title>
+ <para>Possible values: <constant>QAM_AUTO</constant>, QP<constant>SK, QAM_16</constant>, <constant>QAM_64</constant>, <constant>DQPSK</constant></para>
+ <para>Note: If layer C is <constant>DQPSK</constant> layer B has to be <constant>DQPSK</constant>. If layer B is <constant>DQPSK</constant>
+ and <constant>DTV_ISDBT_PARTIAL_RECEPTION</constant>=0 layer has to be <constant>DQPSK</constant>.</para>
+ </section>
+ <section id="DTV-ISDBT-LAYER-SEGMENT-COUNT">
+ <title><constant>DTV_ISDBT_LAYER*_SEGMENT_COUNT</constant></title>
+ <para>Possible values: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1 (AUTO)</para>
+ <para>Note: Truth table for <constant>DTV_ISDBT_SOUND_BROADCASTING</constant> and
+ <constant>DTV_ISDBT_PARTIAL_RECEPTION</constant> and <constant>LAYER</constant>*_SEGMENT_COUNT</para>
+ <informaltable id="isdbt-layer_seg-cnt-table">
+ <tgroup cols="6">
+ <tbody>
+ <row>
+ <entry>PR</entry>
+ <entry>SB</entry>
+ <entry>Layer A width</entry>
+ <entry>Layer B width</entry>
+ <entry>Layer C width</entry>
+ <entry>total width</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>1 .. 13</entry>
+ <entry>1 .. 13</entry>
+ <entry>1 .. 13</entry>
+ <entry>13</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>1</entry>
+ <entry>1 .. 13</entry>
+ <entry>1 .. 13</entry>
+ <entry>13</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>1</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>1</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>1</entry>
+ <entry>1</entry>
+ <entry>2</entry>
+ <entry>0</entry>
+ <entry>13</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </section>
+ <section id="DTV-ISDBT-LAYER-TIME-INTERLEAVING">
+ <title><constant>DTV_ISDBT_LAYER*_TIME_INTERLEAVING</constant></title>
+ <para>Valid values: 0, 1, 2, 4, -1 (AUTO)</para>
+ <para>when DTV_ISDBT_SOUND_BROADCASTING is active, value 8 is also valid.</para>
+ <para>Note: The real time interleaving length depends on the mode (fft-size). The values
+ here are referring to what can be found in the TMCC-structure, as shown in the table below.</para>
+ <informaltable id="isdbt-layer-interleaving-table">
+ <tgroup cols="4" align="center">
+ <tbody>
+ <row>
+ <entry>DTV_ISDBT_LAYER*_TIME_INTERLEAVING</entry>
+ <entry>Mode 1 (2K FFT)</entry>
+ <entry>Mode 2 (4K FFT)</entry>
+ <entry>Mode 3 (8K FFT)</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>4</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>8</entry>
+ <entry>4</entry>
+ <entry>2</entry>
+ </row>
+ <row>
+ <entry>4</entry>
+ <entry>16</entry>
+ <entry>8</entry>
+ <entry>4</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </section>
+ <section id="DTV-ATSCMH-FIC-VER">
+ <title><constant>DTV_ATSCMH_FIC_VER</constant></title>
+ <para>Version number of the FIC (Fast Information Channel) signaling data.</para>
+ <para>FIC is used for relaying information to allow rapid service acquisition by the receiver.</para>
+ <para>Possible values: 0, 1, 2, 3, ..., 30, 31</para>
+ </section>
+ <section id="DTV-ATSCMH-PARADE-ID">
+ <title><constant>DTV_ATSCMH_PARADE_ID</constant></title>
+ <para>Parade identification number</para>
+ <para>A parade is a collection of up to eight MH groups, conveying one or two ensembles.</para>
+ <para>Possible values: 0, 1, 2, 3, ..., 126, 127</para>
+ </section>
+ <section id="DTV-ATSCMH-NOG">
+ <title><constant>DTV_ATSCMH_NOG</constant></title>
+ <para>Number of MH groups per MH subframe for a designated parade.</para>
+ <para>Possible values: 1, 2, 3, 4, 5, 6, 7, 8</para>
+ </section>
+ <section id="DTV-ATSCMH-TNOG">
+ <title><constant>DTV_ATSCMH_TNOG</constant></title>
+ <para>Total number of MH groups including all MH groups belonging to all MH parades in one MH subframe.</para>
+ <para>Possible values: 0, 1, 2, 3, ..., 30, 31</para>
+ </section>
+ <section id="DTV-ATSCMH-SGN">
+ <title><constant>DTV_ATSCMH_SGN</constant></title>
+ <para>Start group number.</para>
+ <para>Possible values: 0, 1, 2, 3, ..., 14, 15</para>
+ </section>
+ <section id="DTV-ATSCMH-PRC">
+ <title><constant>DTV_ATSCMH_PRC</constant></title>
+ <para>Parade repetition cycle.</para>
+ <para>Possible values: 1, 2, 3, 4, 5, 6, 7, 8</para>
+ </section>
+ <section id="DTV-ATSCMH-RS-FRAME-MODE">
+ <title><constant>DTV_ATSCMH_RS_FRAME_MODE</constant></title>
+ <para>RS frame mode.</para>
+ <para>Possible values are:</para>
+ <para id="atscmh-rs-frame-mode">
+<programlisting>
+typedef enum atscmh_rs_frame_mode {
+ ATSCMH_RSFRAME_PRI_ONLY = 0,
+ ATSCMH_RSFRAME_PRI_SEC = 1,
+} atscmh_rs_frame_mode_t;
+</programlisting>
+ </para>
+ </section>
+ <section id="DTV-ATSCMH-RS-FRAME-ENSEMBLE">
+ <title><constant>DTV_ATSCMH_RS_FRAME_ENSEMBLE</constant></title>
+ <para>RS frame ensemble.</para>
+ <para>Possible values are:</para>
+ <para id="atscmh-rs-frame-ensemble">
+<programlisting>
+typedef enum atscmh_rs_frame_ensemble {
+ ATSCMH_RSFRAME_ENS_PRI = 0,
+ ATSCMH_RSFRAME_ENS_SEC = 1,
+} atscmh_rs_frame_ensemble_t;
+</programlisting>
+ </para>
+ </section>
+ <section id="DTV-ATSCMH-RS-CODE-MODE-PRI">
+ <title><constant>DTV_ATSCMH_RS_CODE_MODE_PRI</constant></title>
+ <para>RS code mode (primary).</para>
+ <para>Possible values are:</para>
+ <para id="atscmh-rs-code-mode">
+<programlisting>
+typedef enum atscmh_rs_code_mode {
+ ATSCMH_RSCODE_211_187 = 0,
+ ATSCMH_RSCODE_223_187 = 1,
+ ATSCMH_RSCODE_235_187 = 2,
+} atscmh_rs_code_mode_t;
+</programlisting>
+ </para>
+ </section>
+ <section id="DTV-ATSCMH-RS-CODE-MODE-SEC">
+ <title><constant>DTV_ATSCMH_RS_CODE_MODE_SEC</constant></title>
+ <para>RS code mode (secondary).</para>
+ <para>Possible values are:</para>
+<programlisting>
+typedef enum atscmh_rs_code_mode {
+ ATSCMH_RSCODE_211_187 = 0,
+ ATSCMH_RSCODE_223_187 = 1,
+ ATSCMH_RSCODE_235_187 = 2,
+} atscmh_rs_code_mode_t;
+</programlisting>
+ </section>
+ <section id="DTV-ATSCMH-SCCC-BLOCK-MODE">
+ <title><constant>DTV_ATSCMH_SCCC_BLOCK_MODE</constant></title>
+ <para>Series Concatenated Convolutional Code Block Mode.</para>
+ <para>Possible values are:</para>
+ <para id="atscmh-sccc-block-mode">
+<programlisting>
+typedef enum atscmh_sccc_block_mode {
+ ATSCMH_SCCC_BLK_SEP = 0,
+ ATSCMH_SCCC_BLK_COMB = 1,
+} atscmh_sccc_block_mode_t;
+</programlisting>
+ </para>
+ </section>
+ <section id="DTV-ATSCMH-SCCC-CODE-MODE-A">
+ <title><constant>DTV_ATSCMH_SCCC_CODE_MODE_A</constant></title>
+ <para>Series Concatenated Convolutional Code Rate.</para>
+ <para>Possible values are:</para>
+ <para id="atscmh-sccc-code-mode">
+<programlisting>
+typedef enum atscmh_sccc_code_mode {
+ ATSCMH_SCCC_CODE_HLF = 0,
+ ATSCMH_SCCC_CODE_QTR = 1,
+} atscmh_sccc_code_mode_t;
+</programlisting>
+ </para>
+ </section>
+ <section id="DTV-ATSCMH-SCCC-CODE-MODE-B">
+ <title><constant>DTV_ATSCMH_SCCC_CODE_MODE_B</constant></title>
+ <para>Series Concatenated Convolutional Code Rate.</para>
+ <para>Possible values are:</para>
+<programlisting>
+typedef enum atscmh_sccc_code_mode {
+ ATSCMH_SCCC_CODE_HLF = 0,
+ ATSCMH_SCCC_CODE_QTR = 1,
+} atscmh_sccc_code_mode_t;
+</programlisting>
+ </section>
+ <section id="DTV-ATSCMH-SCCC-CODE-MODE-C">
+ <title><constant>DTV_ATSCMH_SCCC_CODE_MODE_C</constant></title>
+ <para>Series Concatenated Convolutional Code Rate.</para>
+ <para>Possible values are:</para>
+<programlisting>
+typedef enum atscmh_sccc_code_mode {
+ ATSCMH_SCCC_CODE_HLF = 0,
+ ATSCMH_SCCC_CODE_QTR = 1,
+} atscmh_sccc_code_mode_t;
+</programlisting>
+ </section>
+ <section id="DTV-ATSCMH-SCCC-CODE-MODE-D">
+ <title><constant>DTV_ATSCMH_SCCC_CODE_MODE_D</constant></title>
+ <para>Series Concatenated Convolutional Code Rate.</para>
+ <para>Possible values are:</para>
+<programlisting>
+typedef enum atscmh_sccc_code_mode {
+ ATSCMH_SCCC_CODE_HLF = 0,
+ ATSCMH_SCCC_CODE_QTR = 1,
+} atscmh_sccc_code_mode_t;
+</programlisting>
+ </section>
+ </section>
+ <section id="DTV-API-VERSION">
+ <title><constant>DTV_API_VERSION</constant></title>
+ <para>Returns the major/minor version of the DVB API</para>
+ </section>
+ <section id="DTV-CODE-RATE-HP">
+ <title><constant>DTV_CODE_RATE_HP</constant></title>
+ <para>Used on terrestrial transmissions. The acceptable values are:
+ </para>
+ <programlisting>
+typedef enum fe_code_rate {
+ FEC_NONE = 0,
+ FEC_1_2,
+ FEC_2_3,
+ FEC_3_4,
+ FEC_4_5,
+ FEC_5_6,
+ FEC_6_7,
+ FEC_7_8,
+ FEC_8_9,
+ FEC_AUTO,
+ FEC_3_5,
+ FEC_9_10,
+} fe_code_rate_t;
+ </programlisting>
+ </section>
+ <section id="DTV-CODE-RATE-LP">
+ <title><constant>DTV_CODE_RATE_LP</constant></title>
+ <para>Used on terrestrial transmissions. The acceptable values are:
+ </para>
+ <programlisting>
+typedef enum fe_code_rate {
+ FEC_NONE = 0,
+ FEC_1_2,
+ FEC_2_3,
+ FEC_3_4,
+ FEC_4_5,
+ FEC_5_6,
+ FEC_6_7,
+ FEC_7_8,
+ FEC_8_9,
+ FEC_AUTO,
+ FEC_3_5,
+ FEC_9_10,
+} fe_code_rate_t;
+ </programlisting>
+ </section>
+ <section id="DTV-GUARD-INTERVAL">
+ <title><constant>DTV_GUARD_INTERVAL</constant></title>
+
+ <para>Possible values are:</para>
+<programlisting>
+typedef enum fe_guard_interval {
+ GUARD_INTERVAL_1_32,
+ GUARD_INTERVAL_1_16,
+ GUARD_INTERVAL_1_8,
+ GUARD_INTERVAL_1_4,
+ GUARD_INTERVAL_AUTO,
+ GUARD_INTERVAL_1_128,
+ GUARD_INTERVAL_19_128,
+ GUARD_INTERVAL_19_256,
+ GUARD_INTERVAL_PN420,
+ GUARD_INTERVAL_PN595,
+ GUARD_INTERVAL_PN945,
+} fe_guard_interval_t;
+</programlisting>
+
+ <para>Notes:</para>
+ <para>1) If <constant>DTV_GUARD_INTERVAL</constant> is set the <constant>GUARD_INTERVAL_AUTO</constant> the hardware will
+ try to find the correct guard interval (if capable) and will use TMCC to fill
+ in the missing parameters.</para>
+ <para>2) Intervals 1/128, 19/128 and 19/256 are used only for DVB-T2 at present</para>
+ <para>3) DTMB specifies PN420, PN595 and PN945.</para>
+ </section>
+ <section id="DTV-TRANSMISSION-MODE">
+ <title><constant>DTV_TRANSMISSION_MODE</constant></title>
+
+ <para>Specifies the number of carriers used by the standard</para>
+
+ <para>Possible values are:</para>
+<programlisting>
+typedef enum fe_transmit_mode {
+ TRANSMISSION_MODE_2K,
+ TRANSMISSION_MODE_8K,
+ TRANSMISSION_MODE_AUTO,
+ TRANSMISSION_MODE_4K,
+ TRANSMISSION_MODE_1K,
+ TRANSMISSION_MODE_16K,
+ TRANSMISSION_MODE_32K,
+ TRANSMISSION_MODE_C1,
+ TRANSMISSION_MODE_C3780,
+} fe_transmit_mode_t;
+</programlisting>
+ <para>Notes:</para>
+ <para>1) ISDB-T supports three carrier/symbol-size: 8K, 4K, 2K. It is called
+ 'mode' in the standard: Mode 1 is 2K, mode 2 is 4K, mode 3 is 8K</para>
+
+ <para>2) If <constant>DTV_TRANSMISSION_MODE</constant> is set the <constant>TRANSMISSION_MODE_AUTO</constant> the
+ hardware will try to find the correct FFT-size (if capable) and will
+ use TMCC to fill in the missing parameters.</para>
+ <para>3) DVB-T specifies 2K and 8K as valid sizes.</para>
+ <para>4) DVB-T2 specifies 1K, 2K, 4K, 8K, 16K and 32K.</para>
+ <para>5) DTMB specifies C1 and C3780.</para>
+ </section>
+ <section id="DTV-HIERARCHY">
+ <title><constant>DTV_HIERARCHY</constant></title>
+ <para>Frontend hierarchy</para>
+ <programlisting>
+typedef enum fe_hierarchy {
+ HIERARCHY_NONE,
+ HIERARCHY_1,
+ HIERARCHY_2,
+ HIERARCHY_4,
+ HIERARCHY_AUTO
+ } fe_hierarchy_t;
+ </programlisting>
+ </section>
+ <section id="DTV-STREAM-ID">
+ <title><constant>DTV_STREAM_ID</constant></title>
+ <para>DVB-S2, DVB-T2 and ISDB-S support the transmission of several
+ streams on a single transport stream.
+ This property enables the DVB driver to handle substream filtering,
+ when supported by the hardware.
+ By default, substream filtering is disabled.
+ </para><para>
+ For DVB-S2 and DVB-T2, the valid substream id range is from 0 to 255.
+ </para><para>
+ For ISDB, the valid substream id range is from 1 to 65535.
+ </para><para>
+ To disable it, you should use the special macro NO_STREAM_ID_FILTER.
+ </para><para>
+ Note: any value outside the id range also disables filtering.
+ </para>
+ </section>
+ <section id="DTV-DVBT2-PLP-ID-LEGACY">
+ <title><constant>DTV_DVBT2_PLP_ID_LEGACY</constant></title>
+ <para>Obsolete, replaced with DTV_STREAM_ID.</para>
+ </section>
+ <section id="DTV-ENUM-DELSYS">
+ <title><constant>DTV_ENUM_DELSYS</constant></title>
+ <para>A Multi standard frontend needs to advertise the delivery systems provided.
+ Applications need to enumerate the provided delivery systems, before using
+ any other operation with the frontend. Prior to it's introduction,
+ FE_GET_INFO was used to determine a frontend type. A frontend which
+ provides more than a single delivery system, FE_GET_INFO doesn't help much.
+ Applications which intends to use a multistandard frontend must enumerate
+ the delivery systems associated with it, rather than trying to use
+ FE_GET_INFO. In the case of a legacy frontend, the result is just the same
+ as with FE_GET_INFO, but in a more structured format </para>
+ </section>
+ <section id="DTV-INTERLEAVING">
+ <title><constant>DTV_INTERLEAVING</constant></title>
+ <para id="fe-interleaving">Interleaving mode</para>
+ <programlisting>
+enum fe_interleaving {
+ INTERLEAVING_NONE,
+ INTERLEAVING_AUTO,
+ INTERLEAVING_240,
+ INTERLEAVING_720,
+};
+ </programlisting>
+ </section>
+ <section id="DTV-LNA">
+ <title><constant>DTV_LNA</constant></title>
+ <para>Low-noise amplifier.</para>
+ <para>Hardware might offer controllable LNA which can be set manually
+ using that parameter. Usually LNA could be found only from
+ terrestrial devices if at all.</para>
+ <para>Possible values: 0, 1, LNA_AUTO</para>
+ <para>0, LNA off</para>
+ <para>1, LNA on</para>
+ <para>use the special macro LNA_AUTO to set LNA auto</para>
+ </section>
+</section>
+
+ <section id="frontend-stat-properties">
+ <title>Frontend statistics indicators</title>
+ <para>The values are returned via <constant>dtv_property.stat</constant>.
+ If the property is supported, <constant>dtv_property.stat.len</constant> is bigger than zero.</para>
+ <para>For most delivery systems, <constant>dtv_property.stat.len</constant>
+ will be 1 if the stats is supported, and the properties will
+ return a single value for each parameter.</para>
+ <para>It should be noticed, however, that new OFDM delivery systems
+ like ISDB can use different modulation types for each group of
+ carriers. On such standards, up to 3 groups of statistics can be
+ provided, and <constant>dtv_property.stat.len</constant> is updated
+ to reflect the "global" metrics, plus one metric per each carrier
+ group (called "layer" on ISDB).</para>
+ <para>So, in order to be consistent with other delivery systems, the first
+ value at <link linkend="dtv-stats"><constant>dtv_property.stat.dtv_stats</constant></link>
+ array refers to the global metric. The other elements of the array
+ represent each layer, starting from layer A(index 1),
+ layer B (index 2) and so on.</para>
+ <para>The number of filled elements are stored at <constant>dtv_property.stat.len</constant>.</para>
+ <para>Each element of the <constant>dtv_property.stat.dtv_stats</constant> array consists on two elements:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><constant>svalue</constant> or <constant>uvalue</constant>, where
+ <constant>svalue</constant> is for signed values of the measure (dB measures)
+ and <constant>uvalue</constant> is for unsigned values (counters, relative scale)</para></listitem>
+ <listitem><para><constant>scale</constant> - Scale for the value. It can be:</para>
+ <itemizedlist mark='bullet' id="fecap-scale-params">
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - The parameter is supported by the frontend, but it was not possible to collect it (could be a transitory or permanent condition)</para></listitem>
+ <listitem><para><constant>FE_SCALE_DECIBEL</constant> - parameter is a signed value, measured in 1/1000 dB</para></listitem>
+ <listitem><para><constant>FE_SCALE_RELATIVE</constant> - parameter is a unsigned value, where 0 means 0% and 65535 means 100%.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - parameter is a unsigned value that counts the occurrence of an event, like bit error, block error, or lapsed time.</para></listitem>
+ </itemizedlist>
+ </listitem>
+ </itemizedlist>
+ <section id="DTV-STAT-SIGNAL-STRENGTH">
+ <title><constant>DTV_STAT_SIGNAL_STRENGTH</constant></title>
+ <para>Indicates the signal strength level at the analog part of the tuner or of the demod.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_DECIBEL</constant> - signal strength is in 0.0001 dBm units, power measured in miliwatts. This value is generally negative.</para></listitem>
+ <listitem><para><constant>FE_SCALE_RELATIVE</constant> - The frontend provides a 0% to 100% measurement for power (actually, 0 to 65535).</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-CNR">
+ <title><constant>DTV_STAT_CNR</constant></title>
+ <para>Indicates the Signal to Noise ratio for the main carrier.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_DECIBEL</constant> - Signal/Noise ratio is in 0.0001 dB units.</para></listitem>
+ <listitem><para><constant>FE_SCALE_RELATIVE</constant> - The frontend provides a 0% to 100% measurement for Signal/Noise (actually, 0 to 65535).</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-PRE-ERROR-BIT-COUNT">
+ <title><constant>DTV_STAT_PRE_ERROR_BIT_COUNT</constant></title>
+ <para>Measures the number of bit errors before the forward error correction (FEC) on the inner coding block (before Viterbi, LDPC or other inner code).</para>
+ <para>This measure is taken during the same interval as <constant>DTV_STAT_PRE_TOTAL_BIT_COUNT</constant>.</para>
+ <para>In order to get the BER (Bit Error Rate) measurement, it should be divided by
+ <link linkend="DTV-STAT-PRE-TOTAL-BIT-COUNT"><constant>DTV_STAT_PRE_TOTAL_BIT_COUNT</constant></link>.</para>
+ <para>This measurement is monotonically increased, as the frontend gets more bit count measurements.
+ The frontend may reset it when a channel/transponder is tuned.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - Number of error bits counted before the inner coding.</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-PRE-TOTAL-BIT-COUNT">
+ <title><constant>DTV_STAT_PRE_TOTAL_BIT_COUNT</constant></title>
+ <para>Measures the amount of bits received before the inner code block, during the same period as
+ <link linkend="DTV-STAT-PRE-ERROR-BIT-COUNT"><constant>DTV_STAT_PRE_ERROR_BIT_COUNT</constant></link> measurement was taken.</para>
+ <para>It should be noticed that this measurement can be smaller than the total amount of bits on the transport stream,
+ as the frontend may need to manually restart the measurement, losing some data between each measurement interval.</para>
+ <para>This measurement is monotonically increased, as the frontend gets more bit count measurements.
+ The frontend may reset it when a channel/transponder is tuned.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - Number of bits counted while measuring
+ <link linkend="DTV-STAT-PRE-ERROR-BIT-COUNT"><constant>DTV_STAT_PRE_ERROR_BIT_COUNT</constant></link>.</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-POST-ERROR-BIT-COUNT">
+ <title><constant>DTV_STAT_POST_ERROR_BIT_COUNT</constant></title>
+ <para>Measures the number of bit errors after the forward error correction (FEC) done by inner code block (after Viterbi, LDPC or other inner code).</para>
+ <para>This measure is taken during the same interval as <constant>DTV_STAT_POST_TOTAL_BIT_COUNT</constant>.</para>
+ <para>In order to get the BER (Bit Error Rate) measurement, it should be divided by
+ <link linkend="DTV-STAT-POST-TOTAL-BIT-COUNT"><constant>DTV_STAT_POST_TOTAL_BIT_COUNT</constant></link>.</para>
+ <para>This measurement is monotonically increased, as the frontend gets more bit count measurements.
+ The frontend may reset it when a channel/transponder is tuned.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - Number of error bits counted after the inner coding.</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-POST-TOTAL-BIT-COUNT">
+ <title><constant>DTV_STAT_POST_TOTAL_BIT_COUNT</constant></title>
+ <para>Measures the amount of bits received after the inner coding, during the same period as
+ <link linkend="DTV-STAT-POST-ERROR-BIT-COUNT"><constant>DTV_STAT_POST_ERROR_BIT_COUNT</constant></link> measurement was taken.</para>
+ <para>It should be noticed that this measurement can be smaller than the total amount of bits on the transport stream,
+ as the frontend may need to manually restart the measurement, losing some data between each measurement interval.</para>
+ <para>This measurement is monotonically increased, as the frontend gets more bit count measurements.
+ The frontend may reset it when a channel/transponder is tuned.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - Number of bits counted while measuring
+ <link linkend="DTV-STAT-POST-ERROR-BIT-COUNT"><constant>DTV_STAT_POST_ERROR_BIT_COUNT</constant></link>.</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-ERROR-BLOCK-COUNT">
+ <title><constant>DTV_STAT_ERROR_BLOCK_COUNT</constant></title>
+ <para>Measures the number of block errors after the outer forward error correction coding (after Reed-Solomon or other outer code).</para>
+ <para>This measurement is monotonically increased, as the frontend gets more bit count measurements.
+ The frontend may reset it when a channel/transponder is tuned.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - Number of error blocks counted after the outer coding.</para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="DTV-STAT-TOTAL-BLOCK-COUNT">
+ <title><constant>DTV-STAT_TOTAL_BLOCK_COUNT</constant></title>
+ <para>Measures the total number of blocks received during the same period as
+ <link linkend="DTV-STAT-ERROR-BLOCK-COUNT"><constant>DTV_STAT_ERROR_BLOCK_COUNT</constant></link> measurement was taken.</para>
+ <para>It can be used to calculate the PER indicator, by dividing
+ <link linkend="DTV-STAT-ERROR-BLOCK-COUNT"><constant>DTV_STAT_ERROR_BLOCK_COUNT</constant></link>
+ by <link linkend="DTV-STAT-TOTAL-BLOCK-COUNT"><constant>DTV-STAT-TOTAL-BLOCK-COUNT</constant></link>.</para>
+ <para>Possible scales for this metric are:</para>
+ <itemizedlist mark='bullet'>
+ <listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
+ <listitem><para><constant>FE_SCALE_COUNTER</constant> - Number of blocks counted while measuring
+ <link linkend="DTV-STAT-ERROR-BLOCK-COUNT"><constant>DTV_STAT_ERROR_BLOCK_COUNT</constant></link>.</para></listitem>
+ </itemizedlist>
+ </section>
+ </section>
+
+ <section id="frontend-property-terrestrial-systems">
+ <title>Properties used on terrestrial delivery systems</title>
+ <section id="dvbt-params">
+ <title>DVB-T delivery system</title>
+ <para>The following parameters are valid for DVB-T:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-BANDWIDTH-HZ"><constant>DTV_BANDWIDTH_HZ</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CODE-RATE-HP"><constant>DTV_CODE_RATE_HP</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CODE-RATE-LP"><constant>DTV_CODE_RATE_LP</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-GUARD-INTERVAL"><constant>DTV_GUARD_INTERVAL</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TRANSMISSION-MODE"><constant>DTV_TRANSMISSION_MODE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-HIERARCHY"><constant>DTV_HIERARCHY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-LNA"><constant>DTV_LNA</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="dvbt2-params">
+ <title>DVB-T2 delivery system</title>
+ <para>DVB-T2 support is currently in the early stages
+ of development, so expect that this section maygrow and become
+ more detailed with time.</para>
+ <para>The following parameters are valid for DVB-T2:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-BANDWIDTH-HZ"><constant>DTV_BANDWIDTH_HZ</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CODE-RATE-HP"><constant>DTV_CODE_RATE_HP</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CODE-RATE-LP"><constant>DTV_CODE_RATE_LP</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-GUARD-INTERVAL"><constant>DTV_GUARD_INTERVAL</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TRANSMISSION-MODE"><constant>DTV_TRANSMISSION_MODE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-HIERARCHY"><constant>DTV_HIERARCHY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-STREAM-ID"><constant>DTV_STREAM_ID</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-LNA"><constant>DTV_LNA</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="isdbt">
+ <title>ISDB-T delivery system</title>
+ <para>This ISDB-T/ISDB-Tsb API extension should reflect all information
+ needed to tune any ISDB-T/ISDB-Tsb hardware. Of course it is possible
+ that some very sophisticated devices won't need certain parameters to
+ tune.</para>
+ <para>The information given here should help application writers to know how
+ to handle ISDB-T and ISDB-Tsb hardware using the Linux DVB-API.</para>
+ <para>The details given here about ISDB-T and ISDB-Tsb are just enough to
+ basically show the dependencies between the needed parameter values,
+ but surely some information is left out. For more detailed information
+ see the following documents:</para>
+ <para>ARIB STD-B31 - "Transmission System for Digital Terrestrial
+ Television Broadcasting" and</para>
+ <para>ARIB TR-B14 - "Operational Guidelines for Digital Terrestrial
+ Television Broadcasting".</para>
+ <para>In order to understand the ISDB specific parameters,
+ one has to have some knowledge the channel structure in
+ ISDB-T and ISDB-Tsb. I.e. it has to be known to
+ the reader that an ISDB-T channel consists of 13 segments,
+ that it can have up to 3 layer sharing those segments,
+ and things like that.</para>
+ <para>The following parameters are valid for ISDB-T:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-BANDWIDTH-HZ"><constant>DTV_BANDWIDTH_HZ</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-GUARD-INTERVAL"><constant>DTV_GUARD_INTERVAL</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TRANSMISSION-MODE"><constant>DTV_TRANSMISSION_MODE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-ENABLED"><constant>DTV_ISDBT_LAYER_ENABLED</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-PARTIAL-RECEPTION"><constant>DTV_ISDBT_PARTIAL_RECEPTION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-SOUND-BROADCASTING"><constant>DTV_ISDBT_SOUND_BROADCASTING</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-SB-SUBCHANNEL-ID"><constant>DTV_ISDBT_SB_SUBCHANNEL_ID</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-SB-SEGMENT-IDX"><constant>DTV_ISDBT_SB_SEGMENT_IDX</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-SB-SEGMENT-COUNT"><constant>DTV_ISDBT_SB_SEGMENT_COUNT</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-FEC"><constant>DTV_ISDBT_LAYERA_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-MODULATION"><constant>DTV_ISDBT_LAYERA_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-SEGMENT-COUNT"><constant>DTV_ISDBT_LAYERA_SEGMENT_COUNT</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-TIME-INTERLEAVING"><constant>DTV_ISDBT_LAYERA_TIME_INTERLEAVING</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-FEC"><constant>DTV_ISDBT_LAYERB_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-MODULATION"><constant>DTV_ISDBT_LAYERB_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-SEGMENT-COUNT"><constant>DTV_ISDBT_LAYERB_SEGMENT_COUNT</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-TIME-INTERLEAVING"><constant>DTV_ISDBT_LAYERB_TIME_INTERLEAVING</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-FEC"><constant>DTV_ISDBT_LAYERC_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-MODULATION"><constant>DTV_ISDBT_LAYERC_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-SEGMENT-COUNT"><constant>DTV_ISDBT_LAYERC_SEGMENT_COUNT</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ISDBT-LAYER-TIME-INTERLEAVING"><constant>DTV_ISDBT_LAYERC_TIME_INTERLEAVING</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="atsc-params">
+ <title>ATSC delivery system</title>
+ <para>The following parameters are valid for ATSC:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-BANDWIDTH-HZ"><constant>DTV_BANDWIDTH_HZ</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="atscmh-params">
+ <title>ATSC-MH delivery system</title>
+ <para>The following parameters are valid for ATSC-MH:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-BANDWIDTH-HZ"><constant>DTV_BANDWIDTH_HZ</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-FIC-VER"><constant>DTV_ATSCMH_FIC_VER</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-PARADE-ID"><constant>DTV_ATSCMH_PARADE_ID</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-NOG"><constant>DTV_ATSCMH_NOG</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-TNOG"><constant>DTV_ATSCMH_TNOG</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-SGN"><constant>DTV_ATSCMH_SGN</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-PRC"><constant>DTV_ATSCMH_PRC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-RS-FRAME-MODE"><constant>DTV_ATSCMH_RS_FRAME_MODE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-RS-FRAME-ENSEMBLE"><constant>DTV_ATSCMH_RS_FRAME_ENSEMBLE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-RS-CODE-MODE-PRI"><constant>DTV_ATSCMH_RS_CODE_MODE_PRI</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-RS-CODE-MODE-SEC"><constant>DTV_ATSCMH_RS_CODE_MODE_SEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-SCCC-BLOCK-MODE"><constant>DTV_ATSCMH_SCCC_BLOCK_MODE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-SCCC-CODE-MODE-A"><constant>DTV_ATSCMH_SCCC_CODE_MODE_A</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-SCCC-CODE-MODE-B"><constant>DTV_ATSCMH_SCCC_CODE_MODE_B</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-SCCC-CODE-MODE-C"><constant>DTV_ATSCMH_SCCC_CODE_MODE_C</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ATSCMH-SCCC-CODE-MODE-D"><constant>DTV_ATSCMH_SCCC_CODE_MODE_D</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="dtmb-params">
+ <title>DTMB delivery system</title>
+ <para>The following parameters are valid for DTMB:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-BANDWIDTH-HZ"><constant>DTV_BANDWIDTH_HZ</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INNER-FEC"><constant>DTV_INNER_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-GUARD-INTERVAL"><constant>DTV_GUARD_INTERVAL</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TRANSMISSION-MODE"><constant>DTV_TRANSMISSION_MODE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INTERLEAVING"><constant>DTV_INTERLEAVING</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-LNA"><constant>DTV_LNA</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ </section>
+ <section id="frontend-property-cable-systems">
+ <title>Properties used on cable delivery systems</title>
+ <section id="dvbc-params">
+ <title>DVB-C delivery system</title>
+ <para>The DVB-C Annex-A is the widely used cable standard. Transmission uses QAM modulation.</para>
+ <para>The DVB-C Annex-C is optimized for 6MHz, and is used in Japan. It supports a subset of the Annex A modulation types, and a roll-off of 0.13, instead of 0.15</para>
+ <para>The following parameters are valid for DVB-C Annex A/C:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-SYMBOL-RATE"><constant>DTV_SYMBOL_RATE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INNER-FEC"><constant>DTV_INNER_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-LNA"><constant>DTV_LNA</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="dvbc-annex-b-params">
+ <title>DVB-C Annex B delivery system</title>
+ <para>The DVB-C Annex-B is only used on a few Countries like the United States.</para>
+ <para>The following parameters are valid for DVB-C Annex B:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-LNA"><constant>DTV_LNA</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ </section>
+ <section id="frontend-property-satellital-systems">
+ <title>Properties used on satellital delivery systems</title>
+ <section id="dvbs-params">
+ <title>DVB-S delivery system</title>
+ <para>The following parameters are valid for DVB-S:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-SYMBOL-RATE"><constant>DTV_SYMBOL_RATE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INNER-FEC"><constant>DTV_INNER_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-VOLTAGE"><constant>DTV_VOLTAGE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TONE"><constant>DTV_TONE</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ <para>Future implementations might add those two missing parameters:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-DISEQC-MASTER"><constant>DTV_DISEQC_MASTER</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DISEQC-SLAVE-REPLY"><constant>DTV_DISEQC_SLAVE_REPLY</constant></link></para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="dvbs2-params">
+ <title>DVB-S2 delivery system</title>
+ <para>In addition to all parameters valid for DVB-S, DVB-S2 supports the following parameters:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-PILOT"><constant>DTV_PILOT</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-ROLLOFF"><constant>DTV_ROLLOFF</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-STREAM-ID"><constant>DTV_STREAM_ID</constant></link></para></listitem>
+ </itemizedlist>
+ <para>In addition, the <link linkend="frontend-stat-properties">DTV QoS statistics</link> are also valid.</para>
+ </section>
+ <section id="turbo-params">
+ <title>Turbo code delivery system</title>
+ <para>In addition to all parameters valid for DVB-S, turbo code supports the following parameters:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-MODULATION"><constant>DTV_MODULATION</constant></link></para></listitem>
+ </itemizedlist>
+ </section>
+ <section id="isdbs-params">
+ <title>ISDB-S delivery system</title>
+ <para>The following parameters are valid for ISDB-S:</para>
+ <itemizedlist mark='opencircle'>
+ <listitem><para><link linkend="DTV-API-VERSION"><constant>DTV_API_VERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-DELIVERY-SYSTEM"><constant>DTV_DELIVERY_SYSTEM</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-TUNE"><constant>DTV_TUNE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-CLEAR"><constant>DTV_CLEAR</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-FREQUENCY"><constant>DTV_FREQUENCY</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INVERSION"><constant>DTV_INVERSION</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-SYMBOL-RATE"><constant>DTV_SYMBOL_RATE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-INNER-FEC"><constant>DTV_INNER_FEC</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-VOLTAGE"><constant>DTV_VOLTAGE</constant></link></para></listitem>
+ <listitem><para><link linkend="DTV-STREAM-ID"><constant>DTV_STREAM_ID</constant></link></para></listitem>
+ </itemizedlist>
+ </section>
+ </section>
+</section>
diff --git a/Documentation/DocBook/media/dvb/examples.xml b/Documentation/DocBook/media/dvb/examples.xml
new file mode 100644
index 000000000..f037e568e
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/examples.xml
@@ -0,0 +1,365 @@
+<title>Examples</title>
+<para>In this section we would like to present some examples for using the DVB API.
+</para>
+<para>Maintainer note: This section is out of date. Please refer to the sample programs packaged
+with the driver distribution from <ulink url="http://linuxtv.org/hg/dvb-apps" />.
+</para>
+
+<section id="tuning">
+<title>Tuning</title>
+<para>We will start with a generic tuning subroutine that uses the frontend and SEC, as well as
+the demux devices. The example is given for QPSK tuners, but can easily be adjusted for
+QAM.
+</para>
+<programlisting>
+ #include &#x003C;sys/ioctl.h&#x003E;
+ #include &#x003C;stdio.h&#x003E;
+ #include &#x003C;stdint.h&#x003E;
+ #include &#x003C;sys/types.h&#x003E;
+ #include &#x003C;sys/stat.h&#x003E;
+ #include &#x003C;fcntl.h&#x003E;
+ #include &#x003C;time.h&#x003E;
+ #include &#x003C;unistd.h&#x003E;
+
+ #include &#x003C;linux/dvb/dmx.h&#x003E;
+ #include &#x003C;linux/dvb/frontend.h&#x003E;
+ #include &#x003C;linux/dvb/sec.h&#x003E;
+ #include &#x003C;sys/poll.h&#x003E;
+
+ #define DMX "/dev/dvb/adapter0/demux1"
+ #define FRONT "/dev/dvb/adapter0/frontend1"
+ #define SEC "/dev/dvb/adapter0/sec1"
+
+ /&#x22C6; routine for checking if we have a signal and other status information&#x22C6;/
+ int FEReadStatus(int fd, fe_status_t &#x22C6;stat)
+ {
+ int ans;
+
+ if ( (ans = ioctl(fd,FE_READ_STATUS,stat) &#x003C; 0)){
+ perror("FE READ STATUS: ");
+ return -1;
+ }
+
+ if (&#x22C6;stat &amp; FE_HAS_POWER)
+ printf("FE HAS POWER\n");
+
+ if (&#x22C6;stat &amp; FE_HAS_SIGNAL)
+ printf("FE HAS SIGNAL\n");
+
+ if (&#x22C6;stat &amp; FE_SPECTRUM_INV)
+ printf("SPEKTRUM INV\n");
+
+ return 0;
+ }
+
+
+ /&#x22C6; tune qpsk &#x22C6;/
+ /&#x22C6; freq: frequency of transponder &#x22C6;/
+ /&#x22C6; vpid, apid, tpid: PIDs of video, audio and teletext TS packets &#x22C6;/
+ /&#x22C6; diseqc: DiSEqC address of the used LNB &#x22C6;/
+ /&#x22C6; pol: Polarisation &#x22C6;/
+ /&#x22C6; srate: Symbol Rate &#x22C6;/
+ /&#x22C6; fec. FEC &#x22C6;/
+ /&#x22C6; lnb_lof1: local frequency of lower LNB band &#x22C6;/
+ /&#x22C6; lnb_lof2: local frequency of upper LNB band &#x22C6;/
+ /&#x22C6; lnb_slof: switch frequency of LNB &#x22C6;/
+
+ int set_qpsk_channel(int freq, int vpid, int apid, int tpid,
+ int diseqc, int pol, int srate, int fec, int lnb_lof1,
+ int lnb_lof2, int lnb_slof)
+ {
+ struct secCommand scmd;
+ struct secCmdSequence scmds;
+ struct dmx_pes_filter_params pesFilterParams;
+ FrontendParameters frp;
+ struct pollfd pfd[1];
+ FrontendEvent event;
+ int demux1, demux2, demux3, front;
+
+ frequency = (uint32_t) freq;
+ symbolrate = (uint32_t) srate;
+
+ if((front = open(FRONT,O_RDWR)) &#x003C; 0){
+ perror("FRONTEND DEVICE: ");
+ return -1;
+ }
+
+ if((sec = open(SEC,O_RDWR)) &#x003C; 0){
+ perror("SEC DEVICE: ");
+ return -1;
+ }
+
+ if (demux1 &#x003C; 0){
+ if ((demux1=open(DMX, O_RDWR|O_NONBLOCK))
+ &#x003C; 0){
+ perror("DEMUX DEVICE: ");
+ return -1;
+ }
+ }
+
+ if (demux2 &#x003C; 0){
+ if ((demux2=open(DMX, O_RDWR|O_NONBLOCK))
+ &#x003C; 0){
+ perror("DEMUX DEVICE: ");
+ return -1;
+ }
+ }
+
+ if (demux3 &#x003C; 0){
+ if ((demux3=open(DMX, O_RDWR|O_NONBLOCK))
+ &#x003C; 0){
+ perror("DEMUX DEVICE: ");
+ return -1;
+ }
+ }
+
+ if (freq &#x003C; lnb_slof) {
+ frp.Frequency = (freq - lnb_lof1);
+ scmds.continuousTone = SEC_TONE_OFF;
+ } else {
+ frp.Frequency = (freq - lnb_lof2);
+ scmds.continuousTone = SEC_TONE_ON;
+ }
+ frp.Inversion = INVERSION_AUTO;
+ if (pol) scmds.voltage = SEC_VOLTAGE_18;
+ else scmds.voltage = SEC_VOLTAGE_13;
+
+ scmd.type=0;
+ scmd.u.diseqc.addr=0x10;
+ scmd.u.diseqc.cmd=0x38;
+ scmd.u.diseqc.numParams=1;
+ scmd.u.diseqc.params[0] = 0xF0 | ((diseqc &#x22C6; 4) &amp; 0x0F) |
+ (scmds.continuousTone == SEC_TONE_ON ? 1 : 0) |
+ (scmds.voltage==SEC_VOLTAGE_18 ? 2 : 0);
+
+ scmds.miniCommand=SEC_MINI_NONE;
+ scmds.numCommands=1;
+ scmds.commands=&amp;scmd;
+ if (ioctl(sec, SEC_SEND_SEQUENCE, &amp;scmds) &#x003C; 0){
+ perror("SEC SEND: ");
+ return -1;
+ }
+
+ if (ioctl(sec, SEC_SEND_SEQUENCE, &amp;scmds) &#x003C; 0){
+ perror("SEC SEND: ");
+ return -1;
+ }
+
+ frp.u.qpsk.SymbolRate = srate;
+ frp.u.qpsk.FEC_inner = fec;
+
+ if (ioctl(front, FE_SET_FRONTEND, &amp;frp) &#x003C; 0){
+ perror("QPSK TUNE: ");
+ return -1;
+ }
+
+ pfd[0].fd = front;
+ pfd[0].events = POLLIN;
+
+ if (poll(pfd,1,3000)){
+ if (pfd[0].revents &amp; POLLIN){
+ printf("Getting QPSK event\n");
+ if ( ioctl(front, FE_GET_EVENT, &amp;event)
+
+ == -EOVERFLOW){
+ perror("qpsk get event");
+ return -1;
+ }
+ printf("Received ");
+ switch(event.type){
+ case FE_UNEXPECTED_EV:
+ printf("unexpected event\n");
+ return -1;
+ case FE_FAILURE_EV:
+ printf("failure event\n");
+ return -1;
+
+ case FE_COMPLETION_EV:
+ printf("completion event\n");
+ }
+ }
+ }
+
+
+ pesFilterParams.pid = vpid;
+ pesFilterParams.input = DMX_IN_FRONTEND;
+ pesFilterParams.output = DMX_OUT_DECODER;
+ pesFilterParams.pes_type = DMX_PES_VIDEO;
+ pesFilterParams.flags = DMX_IMMEDIATE_START;
+ if (ioctl(demux1, DMX_SET_PES_FILTER, &amp;pesFilterParams) &#x003C; 0){
+ perror("set_vpid");
+ return -1;
+ }
+
+ pesFilterParams.pid = apid;
+ pesFilterParams.input = DMX_IN_FRONTEND;
+ pesFilterParams.output = DMX_OUT_DECODER;
+ pesFilterParams.pes_type = DMX_PES_AUDIO;
+ pesFilterParams.flags = DMX_IMMEDIATE_START;
+ if (ioctl(demux2, DMX_SET_PES_FILTER, &amp;pesFilterParams) &#x003C; 0){
+ perror("set_apid");
+ return -1;
+ }
+
+ pesFilterParams.pid = tpid;
+ pesFilterParams.input = DMX_IN_FRONTEND;
+ pesFilterParams.output = DMX_OUT_DECODER;
+ pesFilterParams.pes_type = DMX_PES_TELETEXT;
+ pesFilterParams.flags = DMX_IMMEDIATE_START;
+ if (ioctl(demux3, DMX_SET_PES_FILTER, &amp;pesFilterParams) &#x003C; 0){
+ perror("set_tpid");
+ return -1;
+ }
+
+ return has_signal(fds);
+ }
+
+</programlisting>
+<para>The program assumes that you are using a universal LNB and a standard DiSEqC
+switch with up to 4 addresses. Of course, you could build in some more checking if
+tuning was successful and maybe try to repeat the tuning process. Depending on the
+external hardware, i.e. LNB and DiSEqC switch, and weather conditions this may be
+necessary.
+</para>
+</section>
+
+<section id="the_dvr_device">
+<title>The DVR device</title>
+<para>The following program code shows how to use the DVR device for recording.
+</para>
+<programlisting>
+ #include &#x003C;sys/ioctl.h&#x003E;
+ #include &#x003C;stdio.h&#x003E;
+ #include &#x003C;stdint.h&#x003E;
+ #include &#x003C;sys/types.h&#x003E;
+ #include &#x003C;sys/stat.h&#x003E;
+ #include &#x003C;fcntl.h&#x003E;
+ #include &#x003C;time.h&#x003E;
+ #include &#x003C;unistd.h&#x003E;
+
+ #include &#x003C;linux/dvb/dmx.h&#x003E;
+ #include &#x003C;linux/dvb/video.h&#x003E;
+ #include &#x003C;sys/poll.h&#x003E;
+ #define DVR "/dev/dvb/adapter0/dvr1"
+ #define AUDIO "/dev/dvb/adapter0/audio1"
+ #define VIDEO "/dev/dvb/adapter0/video1"
+
+ #define BUFFY (188&#x22C6;20)
+ #define MAX_LENGTH (1024&#x22C6;1024&#x22C6;5) /&#x22C6; record 5MB &#x22C6;/
+
+
+ /&#x22C6; switch the demuxes to recording, assuming the transponder is tuned &#x22C6;/
+
+ /&#x22C6; demux1, demux2: file descriptor of video and audio filters &#x22C6;/
+ /&#x22C6; vpid, apid: PIDs of video and audio channels &#x22C6;/
+
+ int switch_to_record(int demux1, int demux2, uint16_t vpid, uint16_t apid)
+ {
+ struct dmx_pes_filter_params pesFilterParams;
+
+ if (demux1 &#x003C; 0){
+ if ((demux1=open(DMX, O_RDWR|O_NONBLOCK))
+ &#x003C; 0){
+ perror("DEMUX DEVICE: ");
+ return -1;
+ }
+ }
+
+ if (demux2 &#x003C; 0){
+ if ((demux2=open(DMX, O_RDWR|O_NONBLOCK))
+ &#x003C; 0){
+ perror("DEMUX DEVICE: ");
+ return -1;
+ }
+ }
+
+ pesFilterParams.pid = vpid;
+ pesFilterParams.input = DMX_IN_FRONTEND;
+ pesFilterParams.output = DMX_OUT_TS_TAP;
+ pesFilterParams.pes_type = DMX_PES_VIDEO;
+ pesFilterParams.flags = DMX_IMMEDIATE_START;
+ if (ioctl(demux1, DMX_SET_PES_FILTER, &amp;pesFilterParams) &#x003C; 0){
+ perror("DEMUX DEVICE");
+ return -1;
+ }
+ pesFilterParams.pid = apid;
+ pesFilterParams.input = DMX_IN_FRONTEND;
+ pesFilterParams.output = DMX_OUT_TS_TAP;
+ pesFilterParams.pes_type = DMX_PES_AUDIO;
+ pesFilterParams.flags = DMX_IMMEDIATE_START;
+ if (ioctl(demux2, DMX_SET_PES_FILTER, &amp;pesFilterParams) &#x003C; 0){
+ perror("DEMUX DEVICE");
+ return -1;
+ }
+ return 0;
+ }
+
+ /&#x22C6; start recording MAX_LENGTH , assuming the transponder is tuned &#x22C6;/
+
+ /&#x22C6; demux1, demux2: file descriptor of video and audio filters &#x22C6;/
+ /&#x22C6; vpid, apid: PIDs of video and audio channels &#x22C6;/
+ int record_dvr(int demux1, int demux2, uint16_t vpid, uint16_t apid)
+ {
+ int i;
+ int len;
+ int written;
+ uint8_t buf[BUFFY];
+ uint64_t length;
+ struct pollfd pfd[1];
+ int dvr, dvr_out;
+
+ /&#x22C6; open dvr device &#x22C6;/
+ if ((dvr = open(DVR, O_RDONLY|O_NONBLOCK)) &#x003C; 0){
+ perror("DVR DEVICE");
+ return -1;
+ }
+
+ /&#x22C6; switch video and audio demuxes to dvr &#x22C6;/
+ printf ("Switching dvr on\n");
+ i = switch_to_record(demux1, demux2, vpid, apid);
+ printf("finished: ");
+
+ printf("Recording %2.0f MB of test file in TS format\n",
+ MAX_LENGTH/(1024.0&#x22C6;1024.0));
+ length = 0;
+
+ /&#x22C6; open output file &#x22C6;/
+ if ((dvr_out = open(DVR_FILE,O_WRONLY|O_CREAT
+ |O_TRUNC, S_IRUSR|S_IWUSR
+ |S_IRGRP|S_IWGRP|S_IROTH|
+ S_IWOTH)) &#x003C; 0){
+ perror("Can't open file for dvr test");
+ return -1;
+ }
+
+ pfd[0].fd = dvr;
+ pfd[0].events = POLLIN;
+
+ /&#x22C6; poll for dvr data and write to file &#x22C6;/
+ while (length &#x003C; MAX_LENGTH ) {
+ if (poll(pfd,1,1)){
+ if (pfd[0].revents &amp; POLLIN){
+ len = read(dvr, buf, BUFFY);
+ if (len &#x003C; 0){
+ perror("recording");
+ return -1;
+ }
+ if (len &#x003E; 0){
+ written = 0;
+ while (written &#x003C; len)
+ written +=
+ write (dvr_out,
+ buf, len);
+ length += len;
+ printf("written %2.0f MB\r",
+ length/1024./1024.);
+ }
+ }
+ }
+ }
+ return 0;
+ }
+
+</programlisting>
+
+</section>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
new file mode 100644
index 000000000..8a6a6ff27
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -0,0 +1,1548 @@
+<title>DVB Frontend API</title>
+
+<para>The DVB frontend device controls the tuner and DVB demodulator
+hardware. It can be accessed through <emphasis
+role="tt">/dev/dvb/adapter0/frontend0</emphasis>. Data types and and
+ioctl definitions can be accessed by including <emphasis
+role="tt">linux/dvb/frontend.h</emphasis> in your application.</para>
+
+<para>DVB frontends come in three varieties: DVB-S (satellite), DVB-C
+(cable) and DVB-T (terrestrial). Transmission via the internet (DVB-IP)
+is not yet handled by this API but a future extension is possible. For
+DVB-S the frontend device also supports satellite equipment control
+(SEC) via DiSEqC and V-SEC protocols. The DiSEqC (digital SEC)
+specification is available from
+<ulink url="http://www.eutelsat.com/satellites/4_5_5.html">Eutelsat</ulink>.</para>
+
+<para>Note that the DVB API may also be used for MPEG decoder-only PCI
+cards, in which case there exists no frontend device.</para>
+
+<section id="frontend_types">
+<title>Frontend Data Types</title>
+
+<section id="fe-type-t">
+<title>Frontend type</title>
+
+<para>For historical reasons, frontend types are named by the type of modulation used in
+transmission. The fontend types are given by fe_type_t type, defined as:</para>
+
+<table pgwide="1" frame="none" id="fe-type">
+<title>Frontend types</title>
+<tgroup cols="3">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>fe_type</entry>
+ <entry>Description</entry>
+ <entry><link linkend="DTV-DELIVERY-SYSTEM">DTV_DELIVERY_SYSTEM</link> equivalent type</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry id="FE_QPSK"><constant>FE_QPSK</constant></entry>
+ <entry>For DVB-S standard</entry>
+ <entry><constant>SYS_DVBS</constant></entry>
+ </row>
+ <row>
+ <entry id="FE_QAM"><constant>FE_QAM</constant></entry>
+ <entry>For DVB-C annex A standard</entry>
+ <entry><constant>SYS_DVBC_ANNEX_A</constant></entry>
+ </row>
+ <row>
+ <entry id="FE_OFDM"><constant>FE_OFDM</constant></entry>
+ <entry>For DVB-T standard</entry>
+ <entry><constant>SYS_DVBT</constant></entry>
+ </row>
+ <row>
+ <entry id="FE_ATSC"><constant>FE_ATSC</constant></entry>
+ <entry>For ATSC standard (terrestrial) or for DVB-C Annex B (cable) used in US.</entry>
+ <entry><constant>SYS_ATSC</constant> (terrestrial) or <constant>SYS_DVBC_ANNEX_B</constant> (cable)</entry>
+ </row>
+</tbody></tgroup></table>
+
+<para>Newer formats like DVB-S2, ISDB-T, ISDB-S and DVB-T2 are not described at the above, as they're
+supported via the new <link linkend="FE_GET_SET_PROPERTY">FE_GET_PROPERTY/FE_GET_SET_PROPERTY</link> ioctl's, using the <link linkend="DTV-DELIVERY-SYSTEM">DTV_DELIVERY_SYSTEM</link> parameter.
+</para>
+
+<para>The usage of this field is deprecated, as it doesn't report all supported standards, and
+will provide an incomplete information for frontends that support multiple delivery systems.
+Please use <link linkend="DTV-ENUM-DELSYS">DTV_ENUM_DELSYS</link> instead.</para>
+</section>
+
+<section id="fe-caps-t">
+<title>frontend capabilities</title>
+
+<para>Capabilities describe what a frontend can do. Some capabilities can only be supported for
+a specific frontend type.</para>
+<programlisting>
+ typedef enum fe_caps {
+ FE_IS_STUPID = 0,
+ FE_CAN_INVERSION_AUTO = 0x1,
+ FE_CAN_FEC_1_2 = 0x2,
+ FE_CAN_FEC_2_3 = 0x4,
+ FE_CAN_FEC_3_4 = 0x8,
+ FE_CAN_FEC_4_5 = 0x10,
+ FE_CAN_FEC_5_6 = 0x20,
+ FE_CAN_FEC_6_7 = 0x40,
+ FE_CAN_FEC_7_8 = 0x80,
+ FE_CAN_FEC_8_9 = 0x100,
+ FE_CAN_FEC_AUTO = 0x200,
+ FE_CAN_QPSK = 0x400,
+ FE_CAN_QAM_16 = 0x800,
+ FE_CAN_QAM_32 = 0x1000,
+ FE_CAN_QAM_64 = 0x2000,
+ FE_CAN_QAM_128 = 0x4000,
+ FE_CAN_QAM_256 = 0x8000,
+ FE_CAN_QAM_AUTO = 0x10000,
+ FE_CAN_TRANSMISSION_MODE_AUTO = 0x20000,
+ FE_CAN_BANDWIDTH_AUTO = 0x40000,
+ FE_CAN_GUARD_INTERVAL_AUTO = 0x80000,
+ FE_CAN_HIERARCHY_AUTO = 0x100000,
+ FE_CAN_8VSB = 0x200000,
+ FE_CAN_16VSB = 0x400000,
+ FE_HAS_EXTENDED_CAPS = 0x800000,
+ FE_CAN_MULTISTREAM = 0x4000000,
+ FE_CAN_TURBO_FEC = 0x8000000,
+ FE_CAN_2G_MODULATION = 0x10000000,
+ FE_NEEDS_BENDING = 0x20000000,
+ FE_CAN_RECOVER = 0x40000000,
+ FE_CAN_MUTE_TS = 0x80000000
+ } fe_caps_t;
+</programlisting>
+</section>
+
+<section id="dvb-frontend-info">
+<title>frontend information</title>
+
+<para>Information about the frontend ca be queried with
+ <link linkend="FE_GET_INFO">FE_GET_INFO</link>.</para>
+
+<programlisting>
+ struct dvb_frontend_info {
+ char name[128];
+ fe_type_t type;
+ uint32_t frequency_min;
+ uint32_t frequency_max;
+ uint32_t frequency_stepsize;
+ uint32_t frequency_tolerance;
+ uint32_t symbol_rate_min;
+ uint32_t symbol_rate_max;
+ uint32_t symbol_rate_tolerance; /&#x22C6; ppm &#x22C6;/
+ uint32_t notifier_delay; /&#x22C6; ms &#x22C6;/
+ fe_caps_t caps;
+ };
+</programlisting>
+</section>
+
+<section id="dvb-diseqc-master-cmd">
+<title>diseqc master command</title>
+
+<para>A message sent from the frontend to DiSEqC capable equipment.</para>
+<programlisting>
+ struct dvb_diseqc_master_cmd {
+ uint8_t msg [6]; /&#x22C6; { framing, address, command, data[3] } &#x22C6;/
+ uint8_t msg_len; /&#x22C6; valid values are 3...6 &#x22C6;/
+ };
+</programlisting>
+</section>
+<section role="subsection" id="dvb-diseqc-slave-reply">
+<title>diseqc slave reply</title>
+
+<para>A reply to the frontend from DiSEqC 2.0 capable equipment.</para>
+<programlisting>
+ struct dvb_diseqc_slave_reply {
+ uint8_t msg [4]; /&#x22C6; { framing, data [3] } &#x22C6;/
+ uint8_t msg_len; /&#x22C6; valid values are 0...4, 0 means no msg &#x22C6;/
+ int timeout; /&#x22C6; return from ioctl after timeout ms with &#x22C6;/
+ }; /&#x22C6; errorcode when no message was received &#x22C6;/
+</programlisting>
+</section>
+
+<section id="fe-sec-voltage-t">
+<title>diseqc slave reply</title>
+<para>The voltage is usually used with non-DiSEqC capable LNBs to switch the polarzation
+(horizontal/vertical). When using DiSEqC epuipment this voltage has to be switched
+consistently to the DiSEqC commands as described in the DiSEqC spec.</para>
+<programlisting>
+ typedef enum fe_sec_voltage {
+ SEC_VOLTAGE_13,
+ SEC_VOLTAGE_18
+ } fe_sec_voltage_t;
+</programlisting>
+</section>
+
+<section id="fe-sec-tone-mode-t">
+<title>SEC continuous tone</title>
+
+<para>The continuous 22KHz tone is usually used with non-DiSEqC capable LNBs to switch the
+high/low band of a dual-band LNB. When using DiSEqC epuipment this voltage has to
+be switched consistently to the DiSEqC commands as described in the DiSEqC
+spec.</para>
+<programlisting>
+ typedef enum fe_sec_tone_mode {
+ SEC_TONE_ON,
+ SEC_TONE_OFF
+ } fe_sec_tone_mode_t;
+</programlisting>
+</section>
+
+<section id="fe-sec-mini-cmd-t">
+<title>SEC tone burst</title>
+
+<para>The 22KHz tone burst is usually used with non-DiSEqC capable switches to select
+between two connected LNBs/satellites. When using DiSEqC epuipment this voltage has to
+be switched consistently to the DiSEqC commands as described in the DiSEqC
+spec.</para>
+<programlisting>
+ typedef enum fe_sec_mini_cmd {
+ SEC_MINI_A,
+ SEC_MINI_B
+ } fe_sec_mini_cmd_t;
+</programlisting>
+
+<para></para>
+</section>
+
+<section id="fe-status-t">
+<title>frontend status</title>
+<para>Several functions of the frontend device use the fe_status data type defined
+by</para>
+<programlisting>
+typedef enum fe_status {
+ FE_HAS_SIGNAL = 0x01,
+ FE_HAS_CARRIER = 0x02,
+ FE_HAS_VITERBI = 0x04,
+ FE_HAS_SYNC = 0x08,
+ FE_HAS_LOCK = 0x10,
+ FE_TIMEDOUT = 0x20,
+ FE_REINIT = 0x40,
+} fe_status_t;
+</programlisting>
+<para>to indicate the current state and/or state changes of the frontend hardware:
+</para>
+
+<informaltable><tgroup cols="2"><tbody>
+<row>
+<entry align="char">FE_HAS_SIGNAL</entry>
+<entry align="char">The frontend has found something above the noise level</entry>
+</row><row>
+<entry align="char">FE_HAS_CARRIER</entry>
+<entry align="char">The frontend has found a DVB signal</entry>
+</row><row>
+<entry align="char">FE_HAS_VITERBI</entry>
+<entry align="char">The frontend FEC inner coding (Viterbi, LDPC or other inner code) is stable</entry>
+</row><row>
+<entry align="char">FE_HAS_SYNC</entry>
+<entry align="char">Synchronization bytes was found</entry>
+</row><row>
+<entry align="char">FE_HAS_LOCK</entry>
+<entry align="char">The DVB were locked and everything is working</entry>
+</row><row>
+<entry align="char">FE_TIMEDOUT</entry>
+<entry align="char">no lock within the last about 2 seconds</entry>
+</row><row>
+<entry align="char">FE_REINIT</entry>
+<entry align="char">The frontend was reinitialized, application is
+recommended to reset DiSEqC, tone and parameters</entry>
+</row>
+</tbody></tgroup></informaltable>
+
+</section>
+
+<section id="dvb-frontend-parameters">
+<title>frontend parameters</title>
+<para>The kind of parameters passed to the frontend device for tuning depend on
+the kind of hardware you are using.</para>
+<para>The struct <constant>dvb_frontend_parameters</constant> uses an
+union with specific per-system parameters. However, as newer delivery systems
+required more data, the structure size weren't enough to fit, and just
+extending its size would break the existing applications. So, those parameters
+were replaced by the usage of <link linkend="FE_GET_SET_PROPERTY">
+<constant>FE_GET_PROPERTY/FE_SET_PROPERTY</constant></link> ioctl's. The
+new API is flexible enough to add new parameters to existing delivery systems,
+and to add newer delivery systems.</para>
+<para>So, newer applications should use <link linkend="FE_GET_SET_PROPERTY">
+<constant>FE_GET_PROPERTY/FE_SET_PROPERTY</constant></link> instead, in
+order to be able to support the newer System Delivery like DVB-S2, DVB-T2,
+DVB-C2, ISDB, etc.</para>
+<para>All kinds of parameters are combined as an union in the FrontendParameters structure:
+<programlisting>
+struct dvb_frontend_parameters {
+ uint32_t frequency; /&#x22C6; (absolute) frequency in Hz for QAM/OFDM &#x22C6;/
+ /&#x22C6; intermediate frequency in kHz for QPSK &#x22C6;/
+ fe_spectral_inversion_t inversion;
+ union {
+ struct dvb_qpsk_parameters qpsk;
+ struct dvb_qam_parameters qam;
+ struct dvb_ofdm_parameters ofdm;
+ struct dvb_vsb_parameters vsb;
+ } u;
+};
+</programlisting></para>
+<para>In the case of QPSK frontends the <constant>frequency</constant> field specifies the intermediate
+frequency, i.e. the offset which is effectively added to the local oscillator frequency (LOF) of
+the LNB. The intermediate frequency has to be specified in units of kHz. For QAM and
+OFDM frontends the <constant>frequency</constant> specifies the absolute frequency and is given in Hz.
+</para>
+
+<section id="dvb-qpsk-parameters">
+<title>QPSK parameters</title>
+<para>For satellite QPSK frontends you have to use the <constant>dvb_qpsk_parameters</constant> structure:</para>
+<programlisting>
+ struct dvb_qpsk_parameters {
+ uint32_t symbol_rate; /&#x22C6; symbol rate in Symbols per second &#x22C6;/
+ fe_code_rate_t fec_inner; /&#x22C6; forward error correction (see above) &#x22C6;/
+ };
+</programlisting>
+</section>
+<section id="dvb-qam-parameters">
+<title>QAM parameters</title>
+<para>for cable QAM frontend you use the <constant>dvb_qam_parameters</constant> structure:</para>
+<programlisting>
+ struct dvb_qam_parameters {
+ uint32_t symbol_rate; /&#x22C6; symbol rate in Symbols per second &#x22C6;/
+ fe_code_rate_t fec_inner; /&#x22C6; forward error correction (see above) &#x22C6;/
+ fe_modulation_t modulation; /&#x22C6; modulation type (see above) &#x22C6;/
+ };
+</programlisting>
+</section>
+<section id="dvb-vsb-parameters">
+<title>VSB parameters</title>
+<para>ATSC frontends are supported by the <constant>dvb_vsb_parameters</constant> structure:</para>
+<programlisting>
+struct dvb_vsb_parameters {
+ fe_modulation_t modulation; /&#x22C6; modulation type (see above) &#x22C6;/
+};
+</programlisting>
+</section>
+<section id="dvb-ofdm-parameters">
+<title>OFDM parameters</title>
+<para>DVB-T frontends are supported by the <constant>dvb_ofdm_parameters</constant> structure:</para>
+<programlisting>
+ struct dvb_ofdm_parameters {
+ fe_bandwidth_t bandwidth;
+ fe_code_rate_t code_rate_HP; /&#x22C6; high priority stream code rate &#x22C6;/
+ fe_code_rate_t code_rate_LP; /&#x22C6; low priority stream code rate &#x22C6;/
+ fe_modulation_t constellation; /&#x22C6; modulation type (see above) &#x22C6;/
+ fe_transmit_mode_t transmission_mode;
+ fe_guard_interval_t guard_interval;
+ fe_hierarchy_t hierarchy_information;
+ };
+</programlisting>
+</section>
+<section id="fe-spectral-inversion-t">
+<title>frontend spectral inversion</title>
+<para>The Inversion field can take one of these values:
+</para>
+<programlisting>
+typedef enum fe_spectral_inversion {
+ INVERSION_OFF,
+ INVERSION_ON,
+ INVERSION_AUTO
+} fe_spectral_inversion_t;
+</programlisting>
+<para>It indicates if spectral inversion should be presumed or not. In the automatic setting
+(<constant>INVERSION_AUTO</constant>) the hardware will try to figure out the correct setting by
+itself.
+</para>
+</section>
+<section id="fe-code-rate-t">
+<title>frontend code rate</title>
+<para>The possible values for the <constant>fec_inner</constant> field used on
+<link linkend="dvb-qpsk-parameters"><constant>struct dvb_qpsk_parameters</constant></link> and
+<link linkend="dvb-qam-parameters"><constant>struct dvb_qam_parameters</constant></link> are:
+</para>
+<programlisting>
+typedef enum fe_code_rate {
+ FEC_NONE = 0,
+ FEC_1_2,
+ FEC_2_3,
+ FEC_3_4,
+ FEC_4_5,
+ FEC_5_6,
+ FEC_6_7,
+ FEC_7_8,
+ FEC_8_9,
+ FEC_AUTO,
+ FEC_3_5,
+ FEC_9_10,
+} fe_code_rate_t;
+</programlisting>
+<para>which correspond to error correction rates of 1/2, 2/3, etc., no error correction or auto
+detection.
+</para>
+</section>
+<section id="fe-modulation-t">
+<title>frontend modulation type for QAM, OFDM and VSB</title>
+<para>For cable and terrestrial frontends, e. g. for
+<link linkend="dvb-qam-parameters"><constant>struct dvb_qpsk_parameters</constant></link>,
+<link linkend="dvb-ofdm-parameters"><constant>struct dvb_qam_parameters</constant></link> and
+<link linkend="dvb-vsb-parameters"><constant>struct dvb_qam_parameters</constant></link>,
+it needs to specify the quadrature modulation mode which can be one of the following:
+</para>
+<programlisting>
+ typedef enum fe_modulation {
+ QPSK,
+ QAM_16,
+ QAM_32,
+ QAM_64,
+ QAM_128,
+ QAM_256,
+ QAM_AUTO,
+ VSB_8,
+ VSB_16,
+ PSK_8,
+ APSK_16,
+ APSK_32,
+ DQPSK,
+ } fe_modulation_t;
+</programlisting>
+</section>
+<section>
+<title>More OFDM parameters</title>
+<section id="fe-transmit-mode-t">
+<title>Number of carriers per channel</title>
+<programlisting>
+typedef enum fe_transmit_mode {
+ TRANSMISSION_MODE_2K,
+ TRANSMISSION_MODE_8K,
+ TRANSMISSION_MODE_AUTO,
+ TRANSMISSION_MODE_4K,
+ TRANSMISSION_MODE_1K,
+ TRANSMISSION_MODE_16K,
+ TRANSMISSION_MODE_32K,
+ } fe_transmit_mode_t;
+</programlisting>
+</section>
+<section id="fe-bandwidth-t">
+<title>frontend bandwidth</title>
+<programlisting>
+typedef enum fe_bandwidth {
+ BANDWIDTH_8_MHZ,
+ BANDWIDTH_7_MHZ,
+ BANDWIDTH_6_MHZ,
+ BANDWIDTH_AUTO,
+ BANDWIDTH_5_MHZ,
+ BANDWIDTH_10_MHZ,
+ BANDWIDTH_1_712_MHZ,
+} fe_bandwidth_t;
+</programlisting>
+</section>
+<section id="fe-guard-interval-t">
+<title>frontend guard inverval</title>
+<programlisting>
+typedef enum fe_guard_interval {
+ GUARD_INTERVAL_1_32,
+ GUARD_INTERVAL_1_16,
+ GUARD_INTERVAL_1_8,
+ GUARD_INTERVAL_1_4,
+ GUARD_INTERVAL_AUTO,
+ GUARD_INTERVAL_1_128,
+ GUARD_INTERVAL_19_128,
+ GUARD_INTERVAL_19_256,
+} fe_guard_interval_t;
+</programlisting>
+</section>
+<section id="fe-hierarchy-t">
+<title>frontend hierarchy</title>
+<programlisting>
+typedef enum fe_hierarchy {
+ HIERARCHY_NONE,
+ HIERARCHY_1,
+ HIERARCHY_2,
+ HIERARCHY_4,
+ HIERARCHY_AUTO
+ } fe_hierarchy_t;
+</programlisting>
+</section>
+</section>
+
+</section>
+
+<section id="dvb-frontend-event">
+<title>frontend events</title>
+ <programlisting>
+ struct dvb_frontend_event {
+ fe_status_t status;
+ struct dvb_frontend_parameters parameters;
+ };
+</programlisting>
+ </section>
+</section>
+
+
+<section id="frontend_fcalls">
+<title>Frontend Function Calls</title>
+
+<section id="frontend_f_open">
+<title>open()</title>
+<para>DESCRIPTION</para>
+<informaltable><tgroup cols="1"><tbody><row>
+<entry align="char">
+<para>This system call opens a named frontend device (/dev/dvb/adapter0/frontend0)
+ for subsequent use. Usually the first thing to do after a successful open is to
+ find out the frontend type with <link linkend="FE_GET_INFO">FE_GET_INFO</link>.</para>
+<para>The device can be opened in read-only mode, which only allows monitoring of
+ device status and statistics, or read/write mode, which allows any kind of use
+ (e.g. performing tuning operations.)
+</para>
+<para>In a system with multiple front-ends, it is usually the case that multiple devices
+ cannot be open in read/write mode simultaneously. As long as a front-end
+ device is opened in read/write mode, other open() calls in read/write mode will
+ either fail or block, depending on whether non-blocking or blocking mode was
+ specified. A front-end device opened in blocking mode can later be put into
+ non-blocking mode (and vice versa) using the F_SETFL command of the fcntl
+ system call. This is a standard system call, documented in the Linux manual
+ page for fcntl. When an open() call has succeeded, the device will be ready
+ for use in the specified mode. This implies that the corresponding hardware is
+ powered up, and that other front-ends may have been powered down to make
+ that possible.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open(const char &#x22C6;deviceName, int flags);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>const char
+ *deviceName</para>
+</entry><entry
+ align="char">
+<para>Name of specific video device.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int flags</para>
+</entry><entry
+ align="char">
+<para>A bit-wise OR of the following flags:</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDONLY read-only access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDWR read/write access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_NONBLOCK open in non-blocking mode</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>(blocking mode is the default)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>Device driver not loaded/available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINTERNAL</para>
+</entry><entry
+ align="char">
+<para>Internal error.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>Device or resource busy.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="frontend_f_close">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call closes a previously opened front-end device. After closing
+ a front-end device, its corresponding hardware might be powered down
+ automatically.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(int fd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="FE_READ_STATUS">
+<title>FE_READ_STATUS</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns status information about the front-end. This call only
+ requires read-only access to the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_READ_STATUS">FE_READ_STATUS</link>,
+ fe_status_t &#x22C6;status);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_READ_STATUS">FE_READ_STATUS</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct fe_status_t
+ *status</para>
+</entry><entry
+ align="char">
+<para>Points to the location where the front-end status word is
+ to be stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EFAULT</para>
+</entry><entry
+ align="char">
+<para>status points to invalid address.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+</section>
+
+<section id="FE_READ_BER">
+<title>FE_READ_BER</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns the bit error rate for the signal currently
+ received/demodulated by the front-end. For this command, read-only access to
+ the device is sufficient.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_READ_BER">FE_READ_BER</link>,
+ uint32_t &#x22C6;ber);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_READ_BER">FE_READ_BER</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>uint32_t *ber</para>
+</entry><entry
+ align="char">
+<para>The bit error rate is stored into *ber.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_READ_SNR">
+<title>FE_READ_SNR</title>
+
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns the signal-to-noise ratio for the signal currently received
+ by the front-end. For this command, read-only access to the device is sufficient.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_READ_SNR">FE_READ_SNR</link>, uint16_t
+ &#x22C6;snr);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_READ_SNR">FE_READ_SNR</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>uint16_t *snr</para>
+</entry><entry
+ align="char">
+<para>The signal-to-noise ratio is stored into *snr.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_READ_SIGNAL_STRENGTH">
+<title>FE_READ_SIGNAL_STRENGTH</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns the signal strength value for the signal currently received
+ by the front-end. For this command, read-only access to the device is sufficient.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request =
+ <link linkend="FE_READ_SIGNAL_STRENGTH">FE_READ_SIGNAL_STRENGTH</link>, uint16_t &#x22C6;strength);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_READ_SIGNAL_STRENGTH">FE_READ_SIGNAL_STRENGTH</link> for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>uint16_t *strength</para>
+</entry><entry
+ align="char">
+<para>The signal strength value is stored into *strength.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_READ_UNCORRECTED_BLOCKS">
+<title>FE_READ_UNCORRECTED_BLOCKS</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns the number of uncorrected blocks detected by the device
+ driver during its lifetime. For meaningful measurements, the increment in block
+ count during a specific time interval should be calculated. For this command,
+ read-only access to the device is sufficient.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>Note that the counter will wrap to zero after its maximum count has been
+ reached.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl( int fd, int request =
+ <link linkend="FE_READ_UNCORRECTED_BLOCKS">FE_READ_UNCORRECTED_BLOCKS</link>, uint32_t &#x22C6;ublocks);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_READ_UNCORRECTED_BLOCKS">FE_READ_UNCORRECTED_BLOCKS</link> for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>uint32_t *ublocks</para>
+</entry><entry
+ align="char">
+<para>The total number of uncorrected blocks seen by the driver
+ so far.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_SET_FRONTEND">
+<title>FE_SET_FRONTEND</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call starts a tuning operation using specified parameters. The result
+ of this call will be successful if the parameters were valid and the tuning could
+ be initiated. The result of the tuning operation in itself, however, will arrive
+ asynchronously as an event (see documentation for <link linkend="FE_GET_EVENT">FE_GET_EVENT</link> and
+ FrontendEvent.) If a new <link linkend="FE_SET_FRONTEND">FE_SET_FRONTEND</link> operation is initiated before
+ the previous one was completed, the previous operation will be aborted in favor
+ of the new one. This command requires read/write access to the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_SET_FRONTEND">FE_SET_FRONTEND</link>,
+ struct dvb_frontend_parameters &#x22C6;p);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_SET_FRONTEND">FE_SET_FRONTEND</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dvb_frontend_parameters
+ *p</para>
+</entry><entry
+ align="char">
+<para>Points to parameters for tuning operation.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Maximum supported symbol rate reached.</para>
+</entry>
+</row></tbody></tgroup></informaltable>
+</section>
+
+<section id="FE_GET_FRONTEND">
+<title>FE_GET_FRONTEND</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call queries the currently effective frontend parameters. For this
+ command, read-only access to the device is sufficient.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_GET_FRONTEND">FE_GET_FRONTEND</link>,
+ struct dvb_frontend_parameters &#x22C6;p);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_SET_FRONTEND">FE_SET_FRONTEND</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dvb_frontend_parameters
+ *p</para>
+</entry><entry
+ align="char">
+<para>Points to parameters for tuning operation.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Maximum supported symbol rate reached.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+
+<section id="FE_GET_EVENT">
+<title>FE_GET_EVENT</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns a frontend event if available. If an event is not
+ available, the behavior depends on whether the device is in blocking or
+ non-blocking mode. In the latter case, the call fails immediately with errno
+ set to EWOULDBLOCK. In the former case, the call blocks until an event
+ becomes available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The standard Linux poll() and/or select() system calls can be used with the
+ device file descriptor to watch for new events. For select(), the file descriptor
+ should be included in the exceptfds argument, and for poll(), POLLPRI should
+ be specified as the wake-up condition. Since the event queue allocated is
+ rather small (room for 8 events), the queue must be serviced regularly to avoid
+ overflow. If an overflow happens, the oldest event is discarded from the queue,
+ and an error (EOVERFLOW) occurs the next time the queue is read. After
+ reporting the error condition in this fashion, subsequent
+ <link linkend="FE_GET_EVENT">FE_GET_EVENT</link>
+ calls will return events from the queue as usual.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>For the sake of implementation simplicity, this command requires read/write
+ access to the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = QPSK_GET_EVENT,
+ struct dvb_frontend_event &#x22C6;ev);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_GET_EVENT">FE_GET_EVENT</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dvb_frontend_event
+ *ev</para>
+</entry><entry
+ align="char">
+<para>Points to the location where the event,</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>if any, is to be stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EWOULDBLOCK</para>
+</entry><entry
+ align="char">
+<para>There is no event pending, and the device is in
+ non-blocking mode.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EOVERFLOW</para>
+</entry><entry
+ align="char">
+<para>Overflow in event queue - one or more events were lost.</para>
+</entry>
+</row></tbody></tgroup></informaltable>
+</section>
+
+<section id="FE_GET_INFO">
+<title>FE_GET_INFO</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns information about the front-end. This call only requires
+ read-only access to the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(int fd, int request = <link linkend="FE_GET_INFO">FE_GET_INFO</link>, struct
+ dvb_frontend_info &#x22C6;info);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_GET_INFO">FE_GET_INFO</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dvb_frontend_info
+ *info</para>
+</entry><entry
+ align="char">
+<para>Points to the location where the front-end information is
+ to be stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="FE_DISEQC_RESET_OVERLOAD">
+<title>FE_DISEQC_RESET_OVERLOAD</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>If the bus has been automatically powered off due to power overload, this ioctl
+ call restores the power to the bus. The call requires read/write access to the
+ device. This call has no effect if the device is manually powered off. Not all
+ DVB adapters support this ioctl.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ <link linkend="FE_DISEQC_RESET_OVERLOAD">FE_DISEQC_RESET_OVERLOAD</link>);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_DISEQC_RESET_OVERLOAD">FE_DISEQC_RESET_OVERLOAD</link> for this
+ command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_DISEQC_SEND_MASTER_CMD">
+<title>FE_DISEQC_SEND_MASTER_CMD</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call is used to send a a DiSEqC command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ <link linkend="FE_DISEQC_SEND_MASTER_CMD">FE_DISEQC_SEND_MASTER_CMD</link>, struct
+ dvb_diseqc_master_cmd &#x22C6;cmd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_DISEQC_SEND_MASTER_CMD">FE_DISEQC_SEND_MASTER_CMD</link> for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dvb_diseqc_master_cmd
+ *cmd</para>
+</entry><entry
+ align="char">
+<para>Pointer to the command to be transmitted.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_DISEQC_RECV_SLAVE_REPLY">
+<title>FE_DISEQC_RECV_SLAVE_REPLY</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call is used to receive reply to a DiSEqC 2.0 command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ <link linkend="FE_DISEQC_RECV_SLAVE_REPLY">FE_DISEQC_RECV_SLAVE_REPLY</link>, struct
+ dvb_diseqc_slave_reply &#x22C6;reply);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_DISEQC_RECV_SLAVE_REPLY">FE_DISEQC_RECV_SLAVE_REPLY</link> for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ dvb_diseqc_slave_reply
+ *reply</para>
+</entry><entry
+ align="char">
+<para>Pointer to the command to be received.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="FE_DISEQC_SEND_BURST">
+<title>FE_DISEQC_SEND_BURST</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call is used to send a 22KHz tone burst.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ <link linkend="FE_DISEQC_SEND_BURST">FE_DISEQC_SEND_BURST</link>, fe_sec_mini_cmd_t burst);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_DISEQC_SEND_BURST">FE_DISEQC_SEND_BURST</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>fe_sec_mini_cmd_t
+ burst</para>
+</entry><entry
+ align="char">
+<para>burst A or B.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_SET_TONE">
+<title>FE_SET_TONE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This call is used to set the generation of the continuous 22kHz tone. This call
+ requires read/write permissions.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_SET_TONE">FE_SET_TONE</link>,
+ fe_sec_tone_mode_t tone);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_SET_TONE">FE_SET_TONE</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>fe_sec_tone_mode_t
+ tone</para>
+</entry><entry
+ align="char">
+<para>The requested tone generation mode (on/off).</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="FE_SET_VOLTAGE">
+<title>FE_SET_VOLTAGE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This call is used to set the bus voltage. This call requires read/write
+ permissions.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link>,
+ fe_sec_voltage_t voltage);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>fe_sec_voltage_t
+ voltage</para>
+</entry><entry
+ align="char">
+<para>The requested bus voltage.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_ENABLE_HIGH_LNB_VOLTAGE">
+<title>FE_ENABLE_HIGH_LNB_VOLTAGE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>If high != 0 enables slightly higher voltages instead of 13/18V (to compensate
+ for long cables). This call requires read/write permissions. Not all DVB
+ adapters support this ioctl.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ <link linkend="FE_ENABLE_HIGH_LNB_VOLTAGE">FE_ENABLE_HIGH_LNB_VOLTAGE</link>, int high);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link> for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int high</para>
+</entry><entry
+ align="char">
+<para>The requested bus voltage.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_SET_FRONTEND_TUNE_MODE">
+<title>FE_SET_FRONTEND_TUNE_MODE</title>
+<para>DESCRIPTION</para>
+<informaltable><tgroup cols="1"><tbody><row>
+<entry align="char">
+<para>Allow setting tuner mode flags to the frontend.</para>
+</entry>
+</row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS</para>
+<informaltable><tgroup cols="1"><tbody><row>
+<entry align="char">
+<para>int ioctl(int fd, int request =
+<link linkend="FE_SET_FRONTEND_TUNE_MODE">FE_SET_FRONTEND_TUNE_MODE</link>, unsigned int flags);</para>
+</entry>
+</row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS</para>
+<informaltable><tgroup cols="2"><tbody><row>
+<entry align="char">
+ <para>unsigned int flags</para>
+</entry>
+<entry align="char">
+<para>
+FE_TUNE_MODE_ONESHOT When set, this flag will disable any zigzagging or other "normal" tuning behaviour. Additionally, there will be no automatic monitoring of the lock status, and hence no frontend events will be generated. If a frontend device is closed, this flag will be automatically turned off when the device is reopened read-write.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+<section id="FE_DISHNETWORK_SEND_LEGACY_CMD">
+ <title>FE_DISHNETWORK_SEND_LEGACY_CMD</title>
+<para>DESCRIPTION</para>
+<informaltable><tgroup cols="1"><tbody><row>
+<entry align="char">
+<para>WARNING: This is a very obscure legacy command, used only at stv0299 driver. Should not be used on newer drivers.</para>
+<para>It provides a non-standard method for selecting Diseqc voltage on the frontend, for Dish Network legacy switches.</para>
+<para>As support for this ioctl were added in 2004, this means that such dishes were already legacy in 2004.</para>
+</entry>
+</row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS</para>
+<informaltable><tgroup cols="1"><tbody><row>
+<entry align="char">
+<para>int ioctl(int fd, int request =
+ <link linkend="FE_DISHNETWORK_SEND_LEGACY_CMD">FE_DISHNETWORK_SEND_LEGACY_CMD</link>, unsigned long cmd);</para>
+</entry>
+</row></tbody></tgroup></informaltable>
+
+<para>PARAMETERS</para>
+<informaltable><tgroup cols="2"><tbody><row>
+<entry align="char">
+ <para>unsigned long cmd</para>
+</entry>
+<entry align="char">
+<para>
+sends the specified raw cmd to the dish via DISEqC.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+&return-value-dvb;
+</section>
+
+</section>
+
+&sub-dvbproperty;
diff --git a/Documentation/DocBook/media/dvb/intro.xml b/Documentation/DocBook/media/dvb/intro.xml
new file mode 100644
index 000000000..2048b53d1
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/intro.xml
@@ -0,0 +1,212 @@
+<title>Introduction</title>
+
+<section id="requisites">
+<title>What you need to know</title>
+
+<para>The reader of this document is required to have some knowledge in
+the area of digital video broadcasting (DVB) and should be familiar with
+part I of the MPEG2 specification ISO/IEC 13818 (aka ITU-T H.222), i.e
+you should know what a program/transport stream (PS/TS) is and what is
+meant by a packetized elementary stream (PES) or an I-frame.</para>
+
+<para>Various DVB standards documents are available from
+<ulink url="http://www.dvb.org" /> and/or
+<ulink url="http://www.etsi.org" />.</para>
+
+<para>It is also necessary to know how to access unix/linux devices and
+how to use ioctl calls. This also includes the knowledge of C or C++.
+</para>
+</section>
+
+<section id="history">
+<title>History</title>
+
+<para>The first API for DVB cards we used at Convergence in late 1999
+was an extension of the Video4Linux API which was primarily developed
+for frame grabber cards. As such it was not really well suited to be
+used for DVB cards and their new features like recording MPEG streams
+and filtering several section and PES data streams at the same time.
+</para>
+
+<para>In early 2000, we were approached by Nokia with a proposal for a
+new standard Linux DVB API. As a commitment to the development of
+terminals based on open standards, Nokia and Convergence made it
+available to all Linux developers and published it on
+<ulink url="http://www.linuxtv.org/" /> in September 2000.
+Convergence is the maintainer of the Linux DVB API. Together with the
+LinuxTV community (i.e. you, the reader of this document), the Linux DVB
+API will be constantly reviewed and improved. With the Linux driver for
+the Siemens/Hauppauge DVB PCI card Convergence provides a first
+implementation of the Linux DVB API.</para>
+</section>
+
+<section id="overview">
+<title>Overview</title>
+
+<figure id="stb_components">
+<title>Components of a DVB card/STB</title>
+<mediaobject>
+<imageobject>
+<imagedata fileref="dvbstb.pdf" format="PS" />
+</imageobject>
+<imageobject>
+<imagedata fileref="dvbstb.png" format="PNG" />
+</imageobject>
+</mediaobject>
+</figure>
+
+<para>A DVB PCI card or DVB set-top-box (STB) usually consists of the
+following main hardware components: </para>
+
+<itemizedlist>
+ <listitem>
+
+<para>Frontend consisting of tuner and DVB demodulator</para>
+
+<para>Here the raw signal reaches the DVB hardware from a satellite dish
+or antenna or directly from cable. The frontend down-converts and
+demodulates this signal into an MPEG transport stream (TS). In case of a
+satellite frontend, this includes a facility for satellite equipment
+control (SEC), which allows control of LNB polarization, multi feed
+switches or dish rotors.</para>
+
+</listitem>
+ <listitem>
+
+<para>Conditional Access (CA) hardware like CI adapters and smartcard slots
+</para>
+
+<para>The complete TS is passed through the CA hardware. Programs to
+which the user has access (controlled by the smart card) are decoded in
+real time and re-inserted into the TS.</para>
+
+</listitem>
+ <listitem>
+ <para>Demultiplexer which filters the incoming DVB stream</para>
+
+<para>The demultiplexer splits the TS into its components like audio and
+video streams. Besides usually several of such audio and video streams
+it also contains data streams with information about the programs
+offered in this or other streams of the same provider.</para>
+
+</listitem>
+<listitem>
+
+<para>MPEG2 audio and video decoder</para>
+
+<para>The main targets of the demultiplexer are the MPEG2 audio and
+video decoders. After decoding they pass on the uncompressed audio and
+video to the computer screen or (through a PAL/NTSC encoder) to a TV
+set.</para>
+
+
+</listitem>
+</itemizedlist>
+
+<para><xref linkend="stb_components" /> shows a crude schematic of the control and data flow
+between those components.</para>
+
+<para>On a DVB PCI card not all of these have to be present since some
+functionality can be provided by the main CPU of the PC (e.g. MPEG
+picture and sound decoding) or is not needed (e.g. for data-only uses
+like &#8220;internet over satellite&#8221;). Also not every card or STB
+provides conditional access hardware.</para>
+
+</section>
+
+<section id="dvb_devices">
+<title>Linux DVB Devices</title>
+
+<para>The Linux DVB API lets you control these hardware components
+through currently six Unix-style character devices for video, audio,
+frontend, demux, CA and IP-over-DVB networking. The video and audio
+devices control the MPEG2 decoder hardware, the frontend device the
+tuner and the DVB demodulator. The demux device gives you control over
+the PES and section filters of the hardware. If the hardware does not
+support filtering these filters can be implemented in software. Finally,
+the CA device controls all the conditional access capabilities of the
+hardware. It can depend on the individual security requirements of the
+platform, if and how many of the CA functions are made available to the
+application through this device.</para>
+
+<para>All devices can be found in the <emphasis role="tt">/dev</emphasis>
+tree under <emphasis role="tt">/dev/dvb</emphasis>. The individual devices
+are called:</para>
+
+<itemizedlist>
+<listitem>
+
+<para><emphasis role="tt">/dev/dvb/adapterN/audioM</emphasis>,</para>
+</listitem>
+<listitem>
+<para><emphasis role="tt">/dev/dvb/adapterN/videoM</emphasis>,</para>
+</listitem>
+<listitem>
+<para><emphasis role="tt">/dev/dvb/adapterN/frontendM</emphasis>,</para>
+</listitem>
+ <listitem>
+
+<para><emphasis role="tt">/dev/dvb/adapterN/netM</emphasis>,</para>
+</listitem>
+ <listitem>
+
+<para><emphasis role="tt">/dev/dvb/adapterN/demuxM</emphasis>,</para>
+</listitem>
+ <listitem>
+
+<para><emphasis role="tt">/dev/dvb/adapterN/dvrM</emphasis>,</para>
+</listitem>
+ <listitem>
+
+<para><emphasis role="tt">/dev/dvb/adapterN/caM</emphasis>,</para></listitem></itemizedlist>
+
+<para>where N enumerates the DVB PCI cards in a system starting
+from&#x00A0;0, and M enumerates the devices of each type within each
+adapter, starting from&#x00A0;0, too. We will omit the &#8220;<emphasis
+role="tt">/dev/dvb/adapterN/</emphasis>&#8221; in the further dicussion
+of these devices. The naming scheme for the devices is the same wheter
+devfs is used or not.</para>
+
+<para>More details about the data structures and function calls of all
+the devices are described in the following chapters.</para>
+
+</section>
+
+<section id="include_files">
+<title>API include files</title>
+
+<para>For each of the DVB devices a corresponding include file exists.
+The DVB API include files should be included in application sources with
+a partial path like:</para>
+
+<programlisting>
+ #include &#x003C;linux/dvb/audio.h&#x003E;
+</programlisting>
+<programlisting>
+ #include &#x003C;linux/dvb/ca.h&#x003E;
+</programlisting>
+<programlisting>
+ #include &#x003C;linux/dvb/dmx.h&#x003E;
+</programlisting>
+<programlisting>
+ #include &#x003C;linux/dvb/frontend.h&#x003E;
+</programlisting>
+<programlisting>
+ #include &#x003C;linux/dvb/net.h&#x003E;
+</programlisting>
+<programlisting>
+ #include &#x003C;linux/dvb/osd.h&#x003E;
+</programlisting>
+<programlisting>
+ #include &#x003C;linux/dvb/video.h&#x003E;
+</programlisting>
+
+<para>To enable applications to support different API version, an
+additional include file <emphasis
+role="tt">linux/dvb/version.h</emphasis> exists, which defines the
+constant <emphasis role="tt">DVB_API_VERSION</emphasis>. This document
+describes <emphasis role="tt">DVB_API_VERSION 5.8</emphasis>.
+</para>
+
+</section>
+
diff --git a/Documentation/DocBook/media/dvb/kdapi.xml b/Documentation/DocBook/media/dvb/kdapi.xml
new file mode 100644
index 000000000..6c11ec52c
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/kdapi.xml
@@ -0,0 +1,2309 @@
+<title>Kernel Demux API</title>
+<para>The kernel demux API defines a driver-internal interface for registering low-level,
+hardware specific driver to a hardware independent demux layer. It is only of interest for
+DVB device driver writers. The header file for this API is named <emphasis role="tt">demux.h</emphasis> and located in
+<emphasis role="tt">drivers/media/dvb-core</emphasis>.
+</para>
+<para>Maintainer note: This section must be reviewed. It is probably out of date.
+</para>
+
+<section id="kernel_demux_data_types">
+<title>Kernel Demux Data Types</title>
+
+
+<section id="dmx_success_t">
+<title>dmx_success_t</title>
+ <programlisting>
+ typedef enum {
+ DMX_OK = 0, /&#x22C6; Received Ok &#x22C6;/
+ DMX_LENGTH_ERROR, /&#x22C6; Incorrect length &#x22C6;/
+ DMX_OVERRUN_ERROR, /&#x22C6; Receiver ring buffer overrun &#x22C6;/
+ DMX_CRC_ERROR, /&#x22C6; Incorrect CRC &#x22C6;/
+ DMX_FRAME_ERROR, /&#x22C6; Frame alignment error &#x22C6;/
+ DMX_FIFO_ERROR, /&#x22C6; Receiver FIFO overrun &#x22C6;/
+ DMX_MISSED_ERROR /&#x22C6; Receiver missed packet &#x22C6;/
+ } dmx_success_t;
+</programlisting>
+
+</section>
+<section id="ts_filter_types">
+<title>TS filter types</title>
+ <programlisting>
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+ /&#x22C6; TS packet reception &#x22C6;/
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+
+ /&#x22C6; TS filter type for set_type() &#x22C6;/
+
+ #define TS_PACKET 1 /&#x22C6; send TS packets (188 bytes) to callback (default) &#x22C6;/
+ #define TS_PAYLOAD_ONLY 2 /&#x22C6; in case TS_PACKET is set, only send the TS
+ payload (&#x003C;=184 bytes per packet) to callback &#x22C6;/
+ #define TS_DECODER 4 /&#x22C6; send stream to built-in decoder (if present) &#x22C6;/
+</programlisting>
+
+</section>
+<section id="dmx_ts_pes_t">
+<title>dmx_ts_pes_t</title>
+<para>The structure
+</para>
+<programlisting>
+ typedef enum
+ {
+ DMX_TS_PES_AUDIO, /&#x22C6; also send packets to audio decoder (if it exists) &#x22C6;/
+ DMX_TS_PES_VIDEO, /&#x22C6; ... &#x22C6;/
+ DMX_TS_PES_TELETEXT,
+ DMX_TS_PES_SUBTITLE,
+ DMX_TS_PES_PCR,
+ DMX_TS_PES_OTHER,
+ } dmx_ts_pes_t;
+</programlisting>
+<para>describes the PES type for filters which write to a built-in decoder. The correspond (and
+should be kept identical) to the types in the demux device.
+</para>
+<programlisting>
+ struct dmx_ts_feed_s {
+ int is_filtering; /&#x22C6; Set to non-zero when filtering in progress &#x22C6;/
+ struct dmx_demux_s&#x22C6; parent; /&#x22C6; Back-pointer &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ int (&#x22C6;set) (struct dmx_ts_feed_s&#x22C6; feed,
+ __u16 pid,
+ size_t callback_length,
+ size_t circular_buffer_size,
+ int descramble,
+ struct timespec timeout);
+ int (&#x22C6;start_filtering) (struct dmx_ts_feed_s&#x22C6; feed);
+ int (&#x22C6;stop_filtering) (struct dmx_ts_feed_s&#x22C6; feed);
+ int (&#x22C6;set_type) (struct dmx_ts_feed_s&#x22C6; feed,
+ int type,
+ dmx_ts_pes_t pes_type);
+ };
+
+ typedef struct dmx_ts_feed_s dmx_ts_feed_t;
+</programlisting>
+ <programlisting>
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+ /&#x22C6; PES packet reception (not supported yet) &#x22C6;/
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+
+ typedef struct dmx_pes_filter_s {
+ struct dmx_pes_s&#x22C6; parent; /&#x22C6; Back-pointer &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ } dmx_pes_filter_t;
+</programlisting>
+ <programlisting>
+ typedef struct dmx_pes_feed_s {
+ int is_filtering; /&#x22C6; Set to non-zero when filtering in progress &#x22C6;/
+ struct dmx_demux_s&#x22C6; parent; /&#x22C6; Back-pointer &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ int (&#x22C6;set) (struct dmx_pes_feed_s&#x22C6; feed,
+ __u16 pid,
+ size_t circular_buffer_size,
+ int descramble,
+ struct timespec timeout);
+ int (&#x22C6;start_filtering) (struct dmx_pes_feed_s&#x22C6; feed);
+ int (&#x22C6;stop_filtering) (struct dmx_pes_feed_s&#x22C6; feed);
+ int (&#x22C6;allocate_filter) (struct dmx_pes_feed_s&#x22C6; feed,
+ dmx_pes_filter_t&#x22C6;&#x22C6; filter);
+ int (&#x22C6;release_filter) (struct dmx_pes_feed_s&#x22C6; feed,
+ dmx_pes_filter_t&#x22C6; filter);
+ } dmx_pes_feed_t;
+</programlisting>
+ <programlisting>
+ typedef struct {
+ __u8 filter_value [DMX_MAX_FILTER_SIZE];
+ __u8 filter_mask [DMX_MAX_FILTER_SIZE];
+ struct dmx_section_feed_s&#x22C6; parent; /&#x22C6; Back-pointer &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ } dmx_section_filter_t;
+</programlisting>
+ <programlisting>
+ struct dmx_section_feed_s {
+ int is_filtering; /&#x22C6; Set to non-zero when filtering in progress &#x22C6;/
+ struct dmx_demux_s&#x22C6; parent; /&#x22C6; Back-pointer &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ int (&#x22C6;set) (struct dmx_section_feed_s&#x22C6; feed,
+ __u16 pid,
+ size_t circular_buffer_size,
+ int descramble,
+ int check_crc);
+ int (&#x22C6;allocate_filter) (struct dmx_section_feed_s&#x22C6; feed,
+ dmx_section_filter_t&#x22C6;&#x22C6; filter);
+ int (&#x22C6;release_filter) (struct dmx_section_feed_s&#x22C6; feed,
+ dmx_section_filter_t&#x22C6; filter);
+ int (&#x22C6;start_filtering) (struct dmx_section_feed_s&#x22C6; feed);
+ int (&#x22C6;stop_filtering) (struct dmx_section_feed_s&#x22C6; feed);
+ };
+ typedef struct dmx_section_feed_s dmx_section_feed_t;
+
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+ /&#x22C6; Callback functions &#x22C6;/
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+
+ typedef int (&#x22C6;dmx_ts_cb) ( __u8 &#x22C6; buffer1,
+ size_t buffer1_length,
+ __u8 &#x22C6; buffer2,
+ size_t buffer2_length,
+ dmx_ts_feed_t&#x22C6; source,
+ dmx_success_t success);
+
+ typedef int (&#x22C6;dmx_section_cb) ( __u8 &#x22C6; buffer1,
+ size_t buffer1_len,
+ __u8 &#x22C6; buffer2,
+ size_t buffer2_len,
+ dmx_section_filter_t &#x22C6; source,
+ dmx_success_t success);
+
+ typedef int (&#x22C6;dmx_pes_cb) ( __u8 &#x22C6; buffer1,
+ size_t buffer1_len,
+ __u8 &#x22C6; buffer2,
+ size_t buffer2_len,
+ dmx_pes_filter_t&#x22C6; source,
+ dmx_success_t success);
+
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+ /&#x22C6; DVB Front-End &#x22C6;/
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+
+ typedef enum {
+ DMX_OTHER_FE = 0,
+ DMX_SATELLITE_FE,
+ DMX_CABLE_FE,
+ DMX_TERRESTRIAL_FE,
+ DMX_LVDS_FE,
+ DMX_ASI_FE, /&#x22C6; DVB-ASI interface &#x22C6;/
+ DMX_MEMORY_FE
+ } dmx_frontend_source_t;
+
+ typedef struct {
+ /&#x22C6; The following char&#x22C6; fields point to NULL terminated strings &#x22C6;/
+ char&#x22C6; id; /&#x22C6; Unique front-end identifier &#x22C6;/
+ char&#x22C6; vendor; /&#x22C6; Name of the front-end vendor &#x22C6;/
+ char&#x22C6; model; /&#x22C6; Name of the front-end model &#x22C6;/
+ struct list_head connectivity_list; /&#x22C6; List of front-ends that can
+ be connected to a particular
+ demux &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ dmx_frontend_source_t source;
+ } dmx_frontend_t;
+
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+ /&#x22C6; MPEG-2 TS Demux &#x22C6;/
+ /&#x22C6;--------------------------------------------------------------------------&#x22C6;/
+
+ /&#x22C6;
+ &#x22C6; Flags OR'ed in the capabilites field of struct dmx_demux_s.
+ &#x22C6;/
+
+ #define DMX_TS_FILTERING 1
+ #define DMX_PES_FILTERING 2
+ #define DMX_SECTION_FILTERING 4
+ #define DMX_MEMORY_BASED_FILTERING 8 /&#x22C6; write() available &#x22C6;/
+ #define DMX_CRC_CHECKING 16
+ #define DMX_TS_DESCRAMBLING 32
+ #define DMX_SECTION_PAYLOAD_DESCRAMBLING 64
+ #define DMX_MAC_ADDRESS_DESCRAMBLING 128
+</programlisting>
+
+</section>
+<section id="demux_demux_t">
+<title>demux_demux_t</title>
+ <programlisting>
+ /&#x22C6;
+ &#x22C6; DMX_FE_ENTRY(): Casts elements in the list of registered
+ &#x22C6; front-ends from the generic type struct list_head
+ &#x22C6; to the type &#x22C6; dmx_frontend_t
+ &#x22C6;.
+ &#x22C6;/
+
+ #define DMX_FE_ENTRY(list) list_entry(list, dmx_frontend_t, connectivity_list)
+
+ struct dmx_demux_s {
+ /&#x22C6; The following char&#x22C6; fields point to NULL terminated strings &#x22C6;/
+ char&#x22C6; id; /&#x22C6; Unique demux identifier &#x22C6;/
+ char&#x22C6; vendor; /&#x22C6; Name of the demux vendor &#x22C6;/
+ char&#x22C6; model; /&#x22C6; Name of the demux model &#x22C6;/
+ __u32 capabilities; /&#x22C6; Bitfield of capability flags &#x22C6;/
+ dmx_frontend_t&#x22C6; frontend; /&#x22C6; Front-end connected to the demux &#x22C6;/
+ struct list_head reg_list; /&#x22C6; List of registered demuxes &#x22C6;/
+ void&#x22C6; priv; /&#x22C6; Pointer to private data of the API client &#x22C6;/
+ int users; /&#x22C6; Number of users &#x22C6;/
+ int (&#x22C6;open) (struct dmx_demux_s&#x22C6; demux);
+ int (&#x22C6;close) (struct dmx_demux_s&#x22C6; demux);
+ int (&#x22C6;write) (struct dmx_demux_s&#x22C6; demux, const char&#x22C6; buf, size_t count);
+ int (&#x22C6;allocate_ts_feed) (struct dmx_demux_s&#x22C6; demux,
+ dmx_ts_feed_t&#x22C6;&#x22C6; feed,
+ dmx_ts_cb callback);
+ int (&#x22C6;release_ts_feed) (struct dmx_demux_s&#x22C6; demux,
+ dmx_ts_feed_t&#x22C6; feed);
+ int (&#x22C6;allocate_pes_feed) (struct dmx_demux_s&#x22C6; demux,
+ dmx_pes_feed_t&#x22C6;&#x22C6; feed,
+ dmx_pes_cb callback);
+ int (&#x22C6;release_pes_feed) (struct dmx_demux_s&#x22C6; demux,
+ dmx_pes_feed_t&#x22C6; feed);
+ int (&#x22C6;allocate_section_feed) (struct dmx_demux_s&#x22C6; demux,
+ dmx_section_feed_t&#x22C6;&#x22C6; feed,
+ dmx_section_cb callback);
+ int (&#x22C6;release_section_feed) (struct dmx_demux_s&#x22C6; demux,
+ dmx_section_feed_t&#x22C6; feed);
+ int (&#x22C6;descramble_mac_address) (struct dmx_demux_s&#x22C6; demux,
+ __u8&#x22C6; buffer1,
+ size_t buffer1_length,
+ __u8&#x22C6; buffer2,
+ size_t buffer2_length,
+ __u16 pid);
+ int (&#x22C6;descramble_section_payload) (struct dmx_demux_s&#x22C6; demux,
+ __u8&#x22C6; buffer1,
+ size_t buffer1_length,
+ __u8&#x22C6; buffer2, size_t buffer2_length,
+ __u16 pid);
+ int (&#x22C6;add_frontend) (struct dmx_demux_s&#x22C6; demux,
+ dmx_frontend_t&#x22C6; frontend);
+ int (&#x22C6;remove_frontend) (struct dmx_demux_s&#x22C6; demux,
+ dmx_frontend_t&#x22C6; frontend);
+ struct list_head&#x22C6; (&#x22C6;get_frontends) (struct dmx_demux_s&#x22C6; demux);
+ int (&#x22C6;connect_frontend) (struct dmx_demux_s&#x22C6; demux,
+ dmx_frontend_t&#x22C6; frontend);
+ int (&#x22C6;disconnect_frontend) (struct dmx_demux_s&#x22C6; demux);
+
+
+ /&#x22C6; added because js cannot keep track of these himself &#x22C6;/
+ int (&#x22C6;get_pes_pids) (struct dmx_demux_s&#x22C6; demux, __u16 &#x22C6;pids);
+ };
+ typedef struct dmx_demux_s dmx_demux_t;
+</programlisting>
+
+</section>
+<section id="demux_directory">
+<title>Demux directory</title>
+ <programlisting>
+ /&#x22C6;
+ &#x22C6; DMX_DIR_ENTRY(): Casts elements in the list of registered
+ &#x22C6; demuxes from the generic type struct list_head&#x22C6; to the type dmx_demux_t
+ &#x22C6;.
+ &#x22C6;/
+
+ #define DMX_DIR_ENTRY(list) list_entry(list, dmx_demux_t, reg_list)
+
+ int dmx_register_demux (dmx_demux_t&#x22C6; demux);
+ int dmx_unregister_demux (dmx_demux_t&#x22C6; demux);
+ struct list_head&#x22C6; dmx_get_demuxes (void);
+</programlisting>
+ </section></section>
+<section id="demux_directory_api">
+<title>Demux Directory API</title>
+<para>The demux directory is a Linux kernel-wide facility for registering and accessing the
+MPEG-2 TS demuxes in the system. Run-time registering and unregistering of demux drivers
+is possible using this API.
+</para>
+<para>All demux drivers in the directory implement the abstract interface dmx_demux_t.
+</para>
+
+<section
+role="subsection"><title>dmx_register_demux()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function makes a demux driver interface available to the Linux kernel. It is
+ usually called by the init_module() function of the kernel module that contains
+ the demux driver. The caller of this function is responsible for allocating
+ dynamic or static memory for the demux structure and for initializing its fields
+ before calling this function. The memory allocated for the demux structure
+ must not be freed before calling dmx_unregister_demux(),</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int dmx_register_demux ( dmx_demux_t &#x22C6;demux )</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux structure.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EEXIST</para>
+</entry><entry
+ align="char">
+<para>A demux with the same value of the id field already stored
+ in the directory.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSPC</para>
+</entry><entry
+ align="char">
+<para>No space left in the directory.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>dmx_unregister_demux()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function is called to indicate that the given demux interface is no
+ longer available. The caller of this function is responsible for freeing the
+ memory of the demux structure, if it was dynamically allocated before calling
+ dmx_register_demux(). The cleanup_module() function of the kernel module
+ that contains the demux driver should call this function. Note that this function
+ fails if the demux is currently in use, i.e., release_demux() has not been called
+ for the interface.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int dmx_unregister_demux ( dmx_demux_t &#x22C6;demux )</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux structure which is to be
+ unregistered.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>The specified demux is not registered in the demux
+ directory.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>The specified demux is currently in use.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>dmx_get_demuxes()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Provides the caller with the list of registered demux interfaces, using the
+ standard list structure defined in the include file linux/list.h. The include file
+ demux.h defines the macro DMX_DIR_ENTRY() for converting an element of
+ the generic type struct list_head* to the type dmx_demux_t*. The caller must
+ not free the memory of any of the elements obtained via this function call.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>struct list_head &#x22C6;dmx_get_demuxes ()</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>none</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>struct list_head *</para>
+</entry><entry
+ align="char">
+<para>A list of demux interfaces, or NULL in the case of an
+ empty list.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section></section>
+<section id="demux_api">
+<title>Demux API</title>
+<para>The demux API should be implemented for each demux in the system. It is used to select
+the TS source of a demux and to manage the demux resources. When the demux
+client allocates a resource via the demux API, it receives a pointer to the API of that
+resource.
+</para>
+<para>Each demux receives its TS input from a DVB front-end or from memory, as set via the
+demux API. In a system with more than one front-end, the API can be used to select one of
+the DVB front-ends as a TS source for a demux, unless this is fixed in the HW platform. The
+demux API only controls front-ends regarding their connections with demuxes; the APIs
+used to set the other front-end parameters, such as tuning, are not defined in this
+document.
+</para>
+<para>The functions that implement the abstract interface demux should be defined static or
+module private and registered to the Demux Directory for external access. It is not necessary
+to implement every function in the demux_t struct, however (for example, a demux interface
+might support Section filtering, but not TS or PES filtering). The API client is expected to
+check the value of any function pointer before calling the function: the value of NULL means
+&#8220;function not available&#8221;.
+</para>
+<para>Whenever the functions of the demux API modify shared data, the possibilities of lost
+update and race condition problems should be addressed, e.g. by protecting parts of code with
+mutexes. This is especially important on multi-processor hosts.
+</para>
+<para>Note that functions called from a bottom half context must not sleep, at least in the 2.2.x
+kernels. Even a simple memory allocation can result in a kernel thread being put to sleep if
+swapping is needed. For example, the Linux kernel calls the functions of a network device
+interface from a bottom half context. Thus, if a demux API function is called from network
+device code, the function must not sleep.
+</para>
+
+
+<section id="kdapi_fopen">
+<title>open()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function reserves the demux for use by the caller and, if necessary,
+ initializes the demux. When the demux is no longer needed, the function close()
+ should be called. It should be possible for multiple clients to access the demux
+ at the same time. Thus, the function implementation should increment the
+ demux usage count when open() is called and decrement it when close() is
+ called.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open ( demux_t&#x22C6; demux );</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t* demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EUSERS</para>
+</entry><entry
+ align="char">
+<para>Maximum usage count reached.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="kdapi_fclose">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function reserves the demux for use by the caller and, if necessary,
+ initializes the demux. When the demux is no longer needed, the function close()
+ should be called. It should be possible for multiple clients to access the demux
+ at the same time. Thus, the function implementation should increment the
+ demux usage count when open() is called and decrement it when close() is
+ called.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(demux_t&#x22C6; demux);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t* demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENODEV</para>
+</entry><entry
+ align="char">
+<para>The demux was not in use.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="kdapi_fwrite">
+<title>write()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function provides the demux driver with a memory buffer containing TS
+ packets. Instead of receiving TS packets from the DVB front-end, the demux
+ driver software will read packets from memory. Any clients of this demux
+ with active TS, PES or Section filters will receive filtered data via the Demux
+ callback API (see 0). The function returns when all the data in the buffer has
+ been consumed by the demux. Demux hardware typically cannot read TS from
+ memory. If this is the case, memory-based filtering has to be implemented
+ entirely in software.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int write(demux_t&#x22C6; demux, const char&#x22C6; buf, size_t
+ count);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t* demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>const char* buf</para>
+</entry><entry
+ align="char">
+<para>Pointer to the TS data in kernel-space memory.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t length</para>
+</entry><entry
+ align="char">
+<para>Length of the TS data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>The command is not implemented.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>allocate_ts_feed()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Allocates a new TS feed, which is used to filter the TS packets carrying a
+ certain PID. The TS feed normally corresponds to a hardware PID filter on the
+ demux chip.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int allocate_ts_feed(dmx_demux_t&#x22C6; demux,
+ dmx_ts_feed_t&#x22C6;&#x22C6; feed, dmx_ts_cb callback);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t* demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_ts_feed_t**
+ feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the TS feed API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_ts_cb callback</para>
+</entry><entry
+ align="char">
+<para>Pointer to the callback function for passing received TS
+ packet</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EBUSY</para>
+</entry><entry
+ align="char">
+<para>No more TS feeds available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>The command is not implemented.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>release_ts_feed()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Releases the resources allocated with allocate_ts_feed(). Any filtering in
+ progress on the TS feed should be stopped before calling this function.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int release_ts_feed(dmx_demux_t&#x22C6; demux,
+ dmx_ts_feed_t&#x22C6; feed);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t* demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_ts_feed_t* feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the TS feed API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>allocate_section_feed()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Allocates a new section feed, i.e. a demux resource for filtering and receiving
+ sections. On platforms with hardware support for section filtering, a section
+ feed is directly mapped to the demux HW. On other platforms, TS packets are
+ first PID filtered in hardware and a hardware section filter then emulated in
+ software. The caller obtains an API pointer of type dmx_section_feed_t as an
+ out parameter. Using this API the caller can set filtering parameters and start
+ receiving sections.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int allocate_section_feed(dmx_demux_t&#x22C6; demux,
+ dmx_section_feed_t &#x22C6;&#x22C6;feed, dmx_section_cb callback);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t *demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_section_feed_t
+ **feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_section_cb
+ callback</para>
+</entry><entry
+ align="char">
+<para>Pointer to the callback function for passing received
+ sections.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EBUSY</para>
+</entry><entry
+ align="char">
+<para>No more section feeds available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>The command is not implemented.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>release_section_feed()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Releases the resources allocated with allocate_section_feed(), including
+ allocated filters. Any filtering in progress on the section feed should be stopped
+ before calling this function.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int release_section_feed(dmx_demux_t&#x22C6; demux,
+ dmx_section_feed_t &#x22C6;feed);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>demux_t *demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_section_feed_t
+ *feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>descramble_mac_address()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function runs a descrambling algorithm on the destination MAC
+ address field of a DVB Datagram Section, replacing the original address
+ with its un-encrypted version. Otherwise, the description on the function
+ descramble_section_payload() applies also to this function.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int descramble_mac_address(dmx_demux_t&#x22C6; demux, __u8
+ &#x22C6;buffer1, size_t buffer1_length, __u8 &#x22C6;buffer2,
+ size_t buffer2_length, __u16 pid);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t
+ *demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u8 *buffer1</para>
+</entry><entry
+ align="char">
+<para>Pointer to the first byte of the section.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer1_length</para>
+</entry><entry
+ align="char">
+<para>Length of the section data, including headers and CRC,
+ in buffer1.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u8* buffer2</para>
+</entry><entry
+ align="char">
+<para>Pointer to the tail of the section data, or NULL. The
+ pointer has a non-NULL value if the section wraps past
+ the end of a circular buffer.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer2_length</para>
+</entry><entry
+ align="char">
+<para>Length of the section data, including headers and CRC,
+ in buffer2.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16 pid</para>
+</entry><entry
+ align="char">
+<para>The PID on which the section was received. Useful
+ for obtaining the descrambling key, e.g. from a DVB
+ Common Access facility.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>No descrambling facility available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>descramble_section_payload()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function runs a descrambling algorithm on the payload of a DVB
+ Datagram Section, replacing the original payload with its un-encrypted
+ version. The function will be called from the demux API implementation;
+ the API client need not call this function directly. Section-level scrambling
+ algorithms are currently standardized only for DVB-RCC (return channel
+ over 2-directional cable TV network) systems. For all other DVB networks,
+ encryption schemes are likely to be proprietary to each data broadcaster. Thus,
+ it is expected that this function pointer will have the value of NULL (i.e.,
+ function not available) in most demux API implementations. Nevertheless, it
+ should be possible to use the function pointer as a hook for dynamically adding
+ a &#8220;plug-in&#8221; descrambling facility to a demux driver.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>While this function is not needed with hardware-based section descrambling,
+ the descramble_section_payload function pointer can be used to override the
+ default hardware-based descrambling algorithm: if the function pointer has a
+ non-NULL value, the corresponding function should be used instead of any
+ descrambling hardware.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int descramble_section_payload(dmx_demux_t&#x22C6; demux,
+ __u8 &#x22C6;buffer1, size_t buffer1_length, __u8 &#x22C6;buffer2,
+ size_t buffer2_length, __u16 pid);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t
+ *demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u8 *buffer1</para>
+</entry><entry
+ align="char">
+<para>Pointer to the first byte of the section.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer1_length</para>
+</entry><entry
+ align="char">
+<para>Length of the section data, including headers and CRC,
+ in buffer1.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u8 *buffer2</para>
+</entry><entry
+ align="char">
+<para>Pointer to the tail of the section data, or NULL. The
+ pointer has a non-NULL value if the section wraps past
+ the end of a circular buffer.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer2_length</para>
+</entry><entry
+ align="char">
+<para>Length of the section data, including headers and CRC,
+ in buffer2.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16 pid</para>
+</entry><entry
+ align="char">
+<para>The PID on which the section was received. Useful
+ for obtaining the descrambling key, e.g. from a DVB
+ Common Access facility.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>No descrambling facility available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>add_frontend()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Registers a connectivity between a demux and a front-end, i.e., indicates that
+ the demux can be connected via a call to connect_frontend() to use the given
+ front-end as a TS source. The client of this function has to allocate dynamic or
+ static memory for the frontend structure and initialize its fields before calling
+ this function. This function is normally called during the driver initialization.
+ The caller must not free the memory of the frontend struct before successfully
+ calling remove_frontend().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int add_frontend(dmx_demux_t &#x22C6;demux, dmx_frontend_t
+ &#x22C6;frontend);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_frontend_t*
+ frontend</para>
+</entry><entry
+ align="char">
+<para>Pointer to the front-end instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EEXIST</para>
+</entry><entry
+ align="char">
+<para>A front-end with the same value of the id field already
+ registered.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINUSE</para>
+</entry><entry
+ align="char">
+<para>The demux is in use.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOMEM</para>
+</entry><entry
+ align="char">
+<para>No more front-ends can be added.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>remove_frontend()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Indicates that the given front-end, registered by a call to add_frontend(), can
+ no longer be connected as a TS source by this demux. The function should be
+ called when a front-end driver or a demux driver is removed from the system.
+ If the front-end is in use, the function fails with the return value of -EBUSY.
+ After successfully calling this function, the caller can free the memory of
+ the frontend struct if it was dynamically allocated before the add_frontend()
+ operation.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int remove_frontend(dmx_demux_t&#x22C6; demux,
+ dmx_frontend_t&#x22C6; frontend);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_frontend_t*
+ frontend</para>
+</entry><entry
+ align="char">
+<para>Pointer to the front-end instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EBUSY</para>
+</entry><entry
+ align="char">
+<para>The front-end is in use, i.e. a call to connect_frontend()
+ has not been followed by a call to disconnect_frontend().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>get_frontends()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Provides the APIs of the front-ends that have been registered for this demux.
+ Any of the front-ends obtained with this call can be used as a parameter for
+ connect_frontend().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The include file demux.h contains the macro DMX_FE_ENTRY() for
+ converting an element of the generic type struct list_head* to the type
+ dmx_frontend_t*. The caller must not free the memory of any of the elements
+ obtained via this function call.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>struct list_head&#x22C6; get_frontends(dmx_demux_t&#x22C6; demux);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*</para>
+</entry><entry
+ align="char">
+<para>A list of front-end interfaces, or NULL in the case of an
+ empty list.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>connect_frontend()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Connects the TS output of the front-end to the input of the demux. A demux
+ can only be connected to a front-end registered to the demux with the function
+ add_frontend().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>It may or may not be possible to connect multiple demuxes to the same
+ front-end, depending on the capabilities of the HW platform. When not used,
+ the front-end should be released by calling disconnect_frontend().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int connect_frontend(dmx_demux_t&#x22C6; demux,
+ dmx_frontend_t&#x22C6; frontend);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_frontend_t*
+ frontend</para>
+</entry><entry
+ align="char">
+<para>Pointer to the front-end instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EBUSY</para>
+</entry><entry
+ align="char">
+<para>The front-end is in use.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>disconnect_frontend()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Disconnects the demux and a front-end previously connected by a
+ connect_frontend() call.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int disconnect_frontend(dmx_demux_t&#x22C6; demux);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_demux_t*
+ demux</para>
+</entry><entry
+ align="char">
+<para>Pointer to the demux API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section></section>
+<section id="demux_callback_api">
+<title>Demux Callback API</title>
+<para>This kernel-space API comprises the callback functions that deliver filtered data to the
+demux client. Unlike the other APIs, these API functions are provided by the client and called
+from the demux code.
+</para>
+<para>The function pointers of this abstract interface are not packed into a structure as in the
+other demux APIs, because the callback functions are registered and used independent
+of each other. As an example, it is possible for the API client to provide several
+callback functions for receiving TS packets and no callbacks for PES packets or
+sections.
+</para>
+<para>The functions that implement the callback API need not be re-entrant: when a demux
+driver calls one of these functions, the driver is not allowed to call the function again before
+the original call returns. If a callback is triggered by a hardware interrupt, it is recommended
+to use the Linux &#8220;bottom half&#8221; mechanism or start a tasklet instead of making the callback
+function call directly from a hardware interrupt.
+</para>
+
+<section
+role="subsection"><title>dmx_ts_cb()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function, provided by the client of the demux API, is called from the
+ demux code. The function is only called when filtering on this TS feed has
+ been enabled using the start_filtering() function.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>Any TS packets that match the filter settings are copied to a circular buffer. The
+ filtered TS packets are delivered to the client using this callback function. The
+ size of the circular buffer is controlled by the circular_buffer_size parameter
+ of the set() function in the TS Feed API. It is expected that the buffer1 and
+ buffer2 callback parameters point to addresses within the circular buffer, but
+ other implementations are also possible. Note that the called party should not
+ try to free the memory the buffer1 and buffer2 parameters point to.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>When this function is called, the buffer1 parameter typically points to the
+ start of the first undelivered TS packet within a circular buffer. The buffer2
+ buffer parameter is normally NULL, except when the received TS packets have
+ crossed the last address of the circular buffer and &#8221;wrapped&#8221; to the beginning
+ of the buffer. In the latter case the buffer1 parameter would contain an address
+ within the circular buffer, while the buffer2 parameter would contain the first
+ address of the circular buffer.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The number of bytes delivered with this function (i.e. buffer1_length +
+ buffer2_length) is usually equal to the value of callback_length parameter
+ given in the set() function, with one exception: if a timeout occurs before
+ receiving callback_length bytes of TS data, any undelivered packets are
+ immediately delivered to the client by calling this function. The timeout
+ duration is controlled by the set() function in the TS Feed API.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>If a TS packet is received with errors that could not be fixed by the TS-level
+ forward error correction (FEC), the Transport_error_indicator flag of the TS
+ packet header should be set. The TS packet should not be discarded, as
+ the error can possibly be corrected by a higher layer protocol. If the called
+ party is slow in processing the callback, it is possible that the circular buffer
+ eventually fills up. If this happens, the demux driver should discard any TS
+ packets received while the buffer is full. The error should be indicated to the
+ client on the next callback by setting the success parameter to the value of
+ DMX_OVERRUN_ERROR.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The type of data returned to the callback can be selected by the new
+ function int (*set_type) (struct dmx_ts_feed_s* feed, int type, dmx_ts_pes_t
+ pes_type) which is part of the dmx_ts_feed_s struct (also cf. to the
+ include file ost/demux.h) The type parameter decides if the raw TS packet
+ (TS_PACKET) or just the payload (TS_PACKET&#8212;TS_PAYLOAD_ONLY)
+ should be returned. If additionally the TS_DECODER bit is set the stream
+ will also be sent to the hardware MPEG decoder. In this case, the second
+ flag decides as what kind of data the stream should be interpreted. The
+ possible choices are one of DMX_TS_PES_AUDIO, DMX_TS_PES_VIDEO,
+ DMX_TS_PES_TELETEXT, DMX_TS_PES_SUBTITLE,
+ DMX_TS_PES_PCR, or DMX_TS_PES_OTHER.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int dmx_ts_cb(__u8&#x22C6; buffer1, size_t buffer1_length,
+ __u8&#x22C6; buffer2, size_t buffer2_length, dmx_ts_feed_t&#x22C6;
+ source, dmx_success_t success);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>__u8* buffer1</para>
+</entry><entry
+ align="char">
+<para>Pointer to the start of the filtered TS packets.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer1_length</para>
+</entry><entry
+ align="char">
+<para>Length of the TS data in buffer1.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u8* buffer2</para>
+</entry><entry
+ align="char">
+<para>Pointer to the tail of the filtered TS packets, or NULL.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer2_length</para>
+</entry><entry
+ align="char">
+<para>Length of the TS data in buffer2.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_ts_feed_t*
+ source</para>
+</entry><entry
+ align="char">
+<para>Indicates which TS feed is the source of the callback.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_success_t
+ success</para>
+</entry><entry
+ align="char">
+<para>Indicates if there was an error in TS reception.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>Continue filtering.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-1</para>
+</entry><entry
+ align="char">
+<para>Stop filtering - has the same effect as a call to
+ stop_filtering() on the TS Feed API.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>dmx_section_cb()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function, provided by the client of the demux API, is called from the
+ demux code. The function is only called when filtering of sections has been
+ enabled using the function start_filtering() of the section feed API. When the
+ demux driver has received a complete section that matches at least one section
+ filter, the client is notified via this callback function. Normally this function is
+ called for each received section; however, it is also possible to deliver multiple
+ sections with one callback, for example when the system load is high. If an
+ error occurs while receiving a section, this function should be called with
+ the corresponding error type set in the success field, whether or not there is
+ data to deliver. The Section Feed implementation should maintain a circular
+ buffer for received sections. However, this is not necessary if the Section Feed
+ API is implemented as a client of the TS Feed API, because the TS Feed
+ implementation then buffers the received data. The size of the circular buffer
+ can be configured using the set() function in the Section Feed API. If there
+ is no room in the circular buffer when a new section is received, the section
+ must be discarded. If this happens, the value of the success parameter should
+ be DMX_OVERRUN_ERROR on the next callback.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int dmx_section_cb(__u8&#x22C6; buffer1, size_t
+ buffer1_length, __u8&#x22C6; buffer2, size_t
+ buffer2_length, dmx_section_filter_t&#x22C6; source,
+ dmx_success_t success);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>__u8* buffer1</para>
+</entry><entry
+ align="char">
+<para>Pointer to the start of the filtered section, e.g. within the
+ circular buffer of the demux driver.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer1_length</para>
+</entry><entry
+ align="char">
+<para>Length of the filtered section data in buffer1, including
+ headers and CRC.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u8* buffer2</para>
+</entry><entry
+ align="char">
+<para>Pointer to the tail of the filtered section data, or NULL.
+ Useful to handle the wrapping of a circular buffer.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t buffer2_length</para>
+</entry><entry
+ align="char">
+<para>Length of the filtered section data in buffer2, including
+ headers and CRC.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_section_filter_t*
+ filter</para>
+</entry><entry
+ align="char">
+<para>Indicates the filter that triggered the callback.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_success_t
+ success</para>
+</entry><entry
+ align="char">
+<para>Indicates if there was an error in section reception.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>Continue filtering.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-1</para>
+</entry><entry
+ align="char">
+<para>Stop filtering - has the same effect as a call to
+ stop_filtering() on the Section Feed API.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section></section>
+<section id="ts_feed_api">
+<title>TS Feed API</title>
+<para>A TS feed is typically mapped to a hardware PID filter on the demux chip.
+Using this API, the client can set the filtering properties to start/stop filtering TS
+packets on a particular TS feed. The API is defined as an abstract interface of the type
+dmx_ts_feed_t.
+</para>
+<para>The functions that implement the interface should be defined static or module private. The
+client can get the handle of a TS feed API by calling the function allocate_ts_feed() in the
+demux API.
+</para>
+
+<section
+role="subsection"><title>set()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function sets the parameters of a TS feed. Any filtering in progress on the
+ TS feed must be stopped before calling this function.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int set ( dmx_ts_feed_t&#x22C6; feed, __u16 pid, size_t
+ callback_length, size_t circular_buffer_size, int
+ descramble, struct timespec timeout);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_ts_feed_t* feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the TS feed API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16 pid</para>
+</entry><entry
+ align="char">
+<para>PID value to filter. Only the TS packets carrying the
+ specified PID will be passed to the API client.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t
+ callback_length</para>
+</entry><entry
+ align="char">
+<para>Number of bytes to deliver with each call to the
+ dmx_ts_cb() callback function. The value of this
+ parameter should be a multiple of 188.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t
+ circular_buffer_size</para>
+</entry><entry
+ align="char">
+<para>Size of the circular buffer for the filtered TS packets.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int descramble</para>
+</entry><entry
+ align="char">
+<para>If non-zero, descramble the filtered TS packets.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct timespec
+ timeout</para>
+</entry><entry
+ align="char">
+<para>Maximum time to wait before delivering received TS
+ packets to the client.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOMEM</para>
+</entry><entry
+ align="char">
+<para>Not enough memory for the requested buffer size.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>No descrambling facility available for TS.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>start_filtering()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Starts filtering TS packets on this TS feed, according to its settings. The PID
+ value to filter can be set by the API client. All matching TS packets are
+ delivered asynchronously to the client, using the callback function registered
+ with allocate_ts_feed().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int start_filtering(dmx_ts_feed_t&#x22C6; feed);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_ts_feed_t* feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the TS feed API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>stop_filtering()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Stops filtering TS packets on this TS feed.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int stop_filtering(dmx_ts_feed_t&#x22C6; feed);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_ts_feed_t* feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the TS feed API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section></section>
+<section id="section_feed_api">
+<title>Section Feed API</title>
+<para>A section feed is a resource consisting of a PID filter and a set of section filters. Using this
+API, the client can set the properties of a section feed and to start/stop filtering. The API is
+defined as an abstract interface of the type dmx_section_feed_t. The functions that implement
+the interface should be defined static or module private. The client can get the handle of
+a section feed API by calling the function allocate_section_feed() in the demux
+API.
+</para>
+<para>On demux platforms that provide section filtering in hardware, the Section Feed API
+implementation provides a software wrapper for the demux hardware. Other platforms may
+support only PID filtering in hardware, requiring that TS packets are converted to sections in
+software. In the latter case the Section Feed API implementation can be a client of the TS
+Feed API.
+</para>
+
+</section>
+<section id="kdapi_set">
+<title>set()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function sets the parameters of a section feed. Any filtering in progress on
+ the section feed must be stopped before calling this function. If descrambling
+ is enabled, the payload_scrambling_control and address_scrambling_control
+ fields of received DVB datagram sections should be observed. If either one is
+ non-zero, the section should be descrambled either in hardware or using the
+ functions descramble_mac_address() and descramble_section_payload() of the
+ demux API. Note that according to the MPEG-2 Systems specification, only
+ the payloads of private sections can be scrambled while the rest of the section
+ data must be sent in the clear.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int set(dmx_section_feed_t&#x22C6; feed, __u16 pid, size_t
+ circular_buffer_size, int descramble, int
+ check_crc);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_section_feed_t*
+ feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u16 pid</para>
+</entry><entry
+ align="char">
+<para>PID value to filter; only the TS packets carrying the
+ specified PID will be accepted.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t
+ circular_buffer_size</para>
+</entry><entry
+ align="char">
+<para>Size of the circular buffer for filtered sections.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int descramble</para>
+</entry><entry
+ align="char">
+<para>If non-zero, descramble any sections that are scrambled.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int check_crc</para>
+</entry><entry
+ align="char">
+<para>If non-zero, check the CRC values of filtered sections.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOMEM</para>
+</entry><entry
+ align="char">
+<para>Not enough memory for the requested buffer size.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSYS</para>
+</entry><entry
+ align="char">
+<para>No descrambling facility available for sections.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameters.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>allocate_filter()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function is used to allocate a section filter on the demux. It should only be
+ called when no filtering is in progress on this section feed. If a filter cannot be
+ allocated, the function fails with -ENOSPC. See in section ?? for the format of
+ the section filter.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>The bitfields filter_mask and filter_value should only be modified when no
+ filtering is in progress on this section feed. filter_mask controls which bits of
+ filter_value are compared with the section headers/payload. On a binary value
+ of 1 in filter_mask, the corresponding bits are compared. The filter only accepts
+ sections that are equal to filter_value in all the tested bit positions. Any changes
+ to the values of filter_mask and filter_value are guaranteed to take effect only
+ when the start_filtering() function is called next time. The parent pointer in
+ the struct is initialized by the API implementation to the value of the feed
+ parameter. The priv pointer is not used by the API implementation, and can
+ thus be freely utilized by the caller of this function. Any data pointed to by the
+ priv pointer is available to the recipient of the dmx_section_cb() function call.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>While the maximum section filter length (DMX_MAX_FILTER_SIZE) is
+ currently set at 16 bytes, hardware filters of that size are not available on all
+ platforms. Therefore, section filtering will often take place first in hardware,
+ followed by filtering in software for the header bytes that were not covered
+ by a hardware filter. The filter_mask field can be checked to determine how
+ many bytes of the section filter are actually used, and if the hardware filter will
+ suffice. Additionally, software-only section filters can optionally be allocated
+ to clients when all hardware section filters are in use. Note that on most demux
+ hardware it is not possible to filter on the section_length field of the section
+ header &#8211; thus this field is ignored, even though it is included in filter_value and
+ filter_mask fields.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int allocate_filter(dmx_section_feed_t&#x22C6; feed,
+ dmx_section_filter_t&#x22C6;&#x22C6; filter);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_section_feed_t*
+ feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_section_filter_t**
+ filter</para>
+</entry><entry
+ align="char">
+<para>Pointer to the allocated filter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENOSPC</para>
+</entry><entry
+ align="char">
+<para>No filters of given type and length available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameters.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>release_filter()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This function releases all the resources of a previously allocated section filter.
+ The function should not be called while filtering is in progress on this section
+ feed. After calling this function, the caller should not try to dereference the
+ filter pointer.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int release_filter ( dmx_section_feed_t&#x22C6; feed,
+ dmx_section_filter_t&#x22C6; filter);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_section_feed_t*
+ feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>dmx_section_filter_t*
+ filter</para>
+</entry><entry
+ align="char">
+<para>I/O Pointer to the instance data of a section filter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-ENODEV</para>
+</entry><entry
+ align="char">
+<para>No such filter allocated.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>start_filtering()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Starts filtering sections on this section feed, according to its settings. Sections
+ are first filtered based on their PID and then matched with the section
+ filters allocated for this feed. If the section matches the PID filter and
+ at least one section filter, it is delivered to the API client. The section
+ is delivered asynchronously using the callback function registered with
+ allocate_section_feed().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int start_filtering ( dmx_section_feed_t&#x22C6; feed );</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_section_feed_t*
+ feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section
+role="subsection"><title>stop_filtering()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>Stops filtering sections on this section feed. Note that any changes to the
+ filtering parameters (filter_value, filter_mask, etc.) should only be made when
+ filtering is stopped.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int stop_filtering ( dmx_section_feed_t&#x22C6; feed );</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>dmx_section_feed_t*
+ feed</para>
+</entry><entry
+ align="char">
+<para>Pointer to the section feed API and instance data.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURNS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>0</para>
+</entry><entry
+ align="char">
+<para>The function was completed without errors.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>-EINVAL</para>
+</entry><entry
+ align="char">
+<para>Bad parameter.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
diff --git a/Documentation/DocBook/media/dvb/net.xml b/Documentation/DocBook/media/dvb/net.xml
new file mode 100644
index 000000000..a193e8694
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/net.xml
@@ -0,0 +1,156 @@
+<title>DVB Network API</title>
+<para>The DVB net device enables feeding of MPE (multi protocol encapsulation) packets
+received via DVB into the Linux network protocol stack, e.g. for internet via satellite
+applications. It can be accessed through <emphasis role="tt">/dev/dvb/adapter0/net0</emphasis>. Data types and
+and ioctl definitions can be accessed by including <emphasis role="tt">linux/dvb/net.h</emphasis> in your
+application.
+</para>
+<section id="dvb_net_types">
+<title>DVB Net Data Types</title>
+
+<section id="dvb-net-if">
+<title>struct dvb_net_if</title>
+<programlisting>
+struct dvb_net_if {
+ __u16 pid;
+ __u16 if_num;
+ __u8 feedtype;
+#define DVB_NET_FEEDTYPE_MPE 0 /&#x22C6; multi protocol encapsulation &#x22C6;/
+#define DVB_NET_FEEDTYPE_ULE 1 /&#x22C6; ultra lightweight encapsulation &#x22C6;/
+};
+</programlisting>
+</section>
+
+</section>
+<section id="net_fcalls">
+<title>DVB net Function Calls</title>
+<para>To be written&#x2026;
+</para>
+
+<section id="NET_ADD_IF"
+role="subsection"><title>NET_ADD_IF</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = NET_ADD_IF,
+ struct dvb_net_if *if);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals NET_ADD_IF for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct dvb_net_if *if
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="NET_REMOVE_IF"
+role="subsection"><title>NET_REMOVE_IF</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = NET_REMOVE_IF);
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals NET_REMOVE_IF for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+
+<section id="NET_GET_IF"
+role="subsection"><title>NET_GET_IF</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is undocumented. Documentation is welcome.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = NET_GET_IF,
+ struct dvb_net_if *if);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals NET_GET_IF for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct dvb_net_if *if
+</para>
+</entry><entry
+ align="char">
+<para>Undocumented.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+</section>
+</section>
diff --git a/Documentation/DocBook/media/dvb/video.xml b/Documentation/DocBook/media/dvb/video.xml
new file mode 100644
index 000000000..3ea1ca7e7
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/video.xml
@@ -0,0 +1,1968 @@
+<title>DVB Video Device</title>
+<para>The DVB video device controls the MPEG2 video decoder of the DVB hardware. It
+can be accessed through <emphasis role="tt">/dev/dvb/adapter0/video0</emphasis>. Data types and and
+ioctl definitions can be accessed by including <emphasis role="tt">linux/dvb/video.h</emphasis> in your
+application.
+</para>
+<para>Note that the DVB video device only controls decoding of the MPEG video stream, not
+its presentation on the TV or computer screen. On PCs this is typically handled by an
+associated video4linux device, e.g. <emphasis role="tt">/dev/video</emphasis>, which allows scaling and defining output
+windows.
+</para>
+<para>Some DVB cards don&#8217;t have their own MPEG decoder, which results in the omission of
+the audio and video device as well as the video4linux device.
+</para>
+<para>The ioctls that deal with SPUs (sub picture units) and navigation packets are only
+supported on some MPEG decoders made for DVD playback.
+</para>
+<para>
+These ioctls were also used by V4L2 to control MPEG decoders implemented in V4L2. The use
+of these ioctls for that purpose has been made obsolete and proper V4L2 ioctls or controls
+have been created to replace that functionality.</para>
+<section id="video_types">
+<title>Video Data Types</title>
+
+<section id="video-format-t">
+<title>video_format_t</title>
+<para>The <emphasis role="tt">video_format_t</emphasis> data type defined by
+</para>
+<programlisting>
+typedef enum {
+ VIDEO_FORMAT_4_3, /&#x22C6; Select 4:3 format &#x22C6;/
+ VIDEO_FORMAT_16_9, /&#x22C6; Select 16:9 format. &#x22C6;/
+ VIDEO_FORMAT_221_1 /&#x22C6; 2.21:1 &#x22C6;/
+} video_format_t;
+</programlisting>
+<para>is used in the VIDEO_SET_FORMAT function (??) to tell the driver which aspect ratio
+the output hardware (e.g. TV) has. It is also used in the data structures video_status
+(??) returned by VIDEO_GET_STATUS (??) and video_event (??) returned by
+VIDEO_GET_EVENT (??) which report about the display format of the current video
+stream.
+</para>
+</section>
+
+<section id="video-displayformat-t">
+<title>video_displayformat_t</title>
+<para>In case the display format of the video stream and of the display hardware differ the
+application has to specify how to handle the cropping of the picture. This can be done using
+the VIDEO_SET_DISPLAY_FORMAT call (??) which accepts
+</para>
+<programlisting>
+typedef enum {
+ VIDEO_PAN_SCAN, /&#x22C6; use pan and scan format &#x22C6;/
+ VIDEO_LETTER_BOX, /&#x22C6; use letterbox format &#x22C6;/
+ VIDEO_CENTER_CUT_OUT /&#x22C6; use center cut out format &#x22C6;/
+} video_displayformat_t;
+</programlisting>
+<para>as argument.
+</para>
+</section>
+
+<section id="video-stream-source-t">
+<title>video_stream_source_t</title>
+<para>The video stream source is set through the VIDEO_SELECT_SOURCE call and can take
+the following values, depending on whether we are replaying from an internal (demuxer) or
+external (user write) source.
+</para>
+<programlisting>
+typedef enum {
+ VIDEO_SOURCE_DEMUX, /&#x22C6; Select the demux as the main source &#x22C6;/
+ VIDEO_SOURCE_MEMORY /&#x22C6; If this source is selected, the stream
+ comes from the user through the write
+ system call &#x22C6;/
+} video_stream_source_t;
+</programlisting>
+<para>VIDEO_SOURCE_DEMUX selects the demultiplexer (fed either by the frontend or the
+DVR device) as the source of the video stream. If VIDEO_SOURCE_MEMORY
+is selected the stream comes from the application through the <emphasis role="tt">write()</emphasis> system
+call.
+</para>
+</section>
+
+<section id="video-play-state-t">
+<title>video_play_state_t</title>
+<para>The following values can be returned by the VIDEO_GET_STATUS call representing the
+state of video playback.
+</para>
+<programlisting>
+typedef enum {
+ VIDEO_STOPPED, /&#x22C6; Video is stopped &#x22C6;/
+ VIDEO_PLAYING, /&#x22C6; Video is currently playing &#x22C6;/
+ VIDEO_FREEZED /&#x22C6; Video is freezed &#x22C6;/
+} video_play_state_t;
+</programlisting>
+</section>
+
+<section id="video-command">
+<title>struct video_command</title>
+<para>The structure must be zeroed before use by the application
+This ensures it can be extended safely in the future.</para>
+<programlisting>
+struct video_command {
+ __u32 cmd;
+ __u32 flags;
+ union {
+ struct {
+ __u64 pts;
+ } stop;
+
+ struct {
+ /&#x22C6; 0 or 1000 specifies normal speed,
+ 1 specifies forward single stepping,
+ -1 specifies backward single stepping,
+ &gt;>1: playback at speed/1000 of the normal speed,
+ &lt;-1: reverse playback at (-speed/1000) of the normal speed. &#x22C6;/
+ __s32 speed;
+ __u32 format;
+ } play;
+
+ struct {
+ __u32 data[16];
+ } raw;
+ };
+};
+</programlisting>
+</section>
+
+<section id="video-size-t">
+<title>video_size_t</title>
+<programlisting>
+typedef struct {
+ int w;
+ int h;
+ video_format_t aspect_ratio;
+} video_size_t;
+</programlisting>
+</section>
+
+
+<section id="video-event">
+<title>struct video_event</title>
+<para>The following is the structure of a video event as it is returned by the VIDEO_GET_EVENT
+call.
+</para>
+<programlisting>
+struct video_event {
+ __s32 type;
+#define VIDEO_EVENT_SIZE_CHANGED 1
+#define VIDEO_EVENT_FRAME_RATE_CHANGED 2
+#define VIDEO_EVENT_DECODER_STOPPED 3
+#define VIDEO_EVENT_VSYNC 4
+ __kernel_time_t timestamp;
+ union {
+ video_size_t size;
+ unsigned int frame_rate; /&#x22C6; in frames per 1000sec &#x22C6;/
+ unsigned char vsync_field; /&#x22C6; unknown/odd/even/progressive &#x22C6;/
+ } u;
+};
+</programlisting>
+</section>
+
+<section id="video-status">
+<title>struct video_status</title>
+<para>The VIDEO_GET_STATUS call returns the following structure informing about various
+states of the playback operation.
+</para>
+<programlisting>
+struct video_status {
+ int video_blank; /&#x22C6; blank video on freeze? &#x22C6;/
+ video_play_state_t play_state; /&#x22C6; current state of playback &#x22C6;/
+ video_stream_source_t stream_source; /&#x22C6; current source (demux/memory) &#x22C6;/
+ video_format_t video_format; /&#x22C6; current aspect ratio of stream &#x22C6;/
+ video_displayformat_t display_format;/&#x22C6; selected cropping mode &#x22C6;/
+};
+</programlisting>
+<para>If video_blank is set video will be blanked out if the channel is changed or if playback is
+stopped. Otherwise, the last picture will be displayed. play_state indicates if the video is
+currently frozen, stopped, or being played back. The stream_source corresponds to the seleted
+source for the video stream. It can come either from the demultiplexer or from memory.
+The video_format indicates the aspect ratio (one of 4:3 or 16:9) of the currently
+played video stream. Finally, display_format corresponds to the selected cropping
+mode in case the source video format is not the same as the format of the output
+device.
+</para>
+</section>
+
+<section id="video-still-picture">
+<title>struct video_still_picture</title>
+<para>An I-frame displayed via the VIDEO_STILLPICTURE call is passed on within the
+following structure.
+</para>
+<programlisting>
+/&#x22C6; pointer to and size of a single iframe in memory &#x22C6;/
+struct video_still_picture {
+ char &#x22C6;iFrame; /&#x22C6; pointer to a single iframe in memory &#x22C6;/
+ int32_t size;
+};
+</programlisting>
+</section>
+
+<section id="video_caps">
+<title>video capabilities</title>
+<para>A call to VIDEO_GET_CAPABILITIES returns an unsigned integer with the following
+bits set according to the hardwares capabilities.
+</para>
+<programlisting>
+ /&#x22C6; bit definitions for capabilities: &#x22C6;/
+ /&#x22C6; can the hardware decode MPEG1 and/or MPEG2? &#x22C6;/
+ #define VIDEO_CAP_MPEG1 1
+ #define VIDEO_CAP_MPEG2 2
+ /&#x22C6; can you send a system and/or program stream to video device?
+ (you still have to open the video and the audio device but only
+ send the stream to the video device) &#x22C6;/
+ #define VIDEO_CAP_SYS 4
+ #define VIDEO_CAP_PROG 8
+ /&#x22C6; can the driver also handle SPU, NAVI and CSS encoded data?
+ (CSS API is not present yet) &#x22C6;/
+ #define VIDEO_CAP_SPU 16
+ #define VIDEO_CAP_NAVI 32
+ #define VIDEO_CAP_CSS 64
+</programlisting>
+</section>
+
+<section id="video-system">
+<title>video_system_t</title>
+<para>A call to VIDEO_SET_SYSTEM sets the desired video system for TV output. The
+following system types can be set:
+</para>
+<programlisting>
+typedef enum {
+ VIDEO_SYSTEM_PAL,
+ VIDEO_SYSTEM_NTSC,
+ VIDEO_SYSTEM_PALN,
+ VIDEO_SYSTEM_PALNc,
+ VIDEO_SYSTEM_PALM,
+ VIDEO_SYSTEM_NTSC60,
+ VIDEO_SYSTEM_PAL60,
+ VIDEO_SYSTEM_PALM60
+} video_system_t;
+</programlisting>
+</section>
+
+<section id="video-highlight">
+<title>struct video_highlight</title>
+<para>Calling the ioctl VIDEO_SET_HIGHLIGHTS posts the SPU highlight information. The
+call expects the following format for that information:
+</para>
+<programlisting>
+ typedef
+ struct video_highlight {
+ boolean active; /&#x22C6; 1=show highlight, 0=hide highlight &#x22C6;/
+ uint8_t contrast1; /&#x22C6; 7- 4 Pattern pixel contrast &#x22C6;/
+ /&#x22C6; 3- 0 Background pixel contrast &#x22C6;/
+ uint8_t contrast2; /&#x22C6; 7- 4 Emphasis pixel-2 contrast &#x22C6;/
+ /&#x22C6; 3- 0 Emphasis pixel-1 contrast &#x22C6;/
+ uint8_t color1; /&#x22C6; 7- 4 Pattern pixel color &#x22C6;/
+ /&#x22C6; 3- 0 Background pixel color &#x22C6;/
+ uint8_t color2; /&#x22C6; 7- 4 Emphasis pixel-2 color &#x22C6;/
+ /&#x22C6; 3- 0 Emphasis pixel-1 color &#x22C6;/
+ uint32_t ypos; /&#x22C6; 23-22 auto action mode &#x22C6;/
+ /&#x22C6; 21-12 start y &#x22C6;/
+ /&#x22C6; 9- 0 end y &#x22C6;/
+ uint32_t xpos; /&#x22C6; 23-22 button color number &#x22C6;/
+ /&#x22C6; 21-12 start x &#x22C6;/
+ /&#x22C6; 9- 0 end x &#x22C6;/
+ } video_highlight_t;
+</programlisting>
+
+</section>
+<section id="video-spu">
+<title>struct video_spu</title>
+<para>Calling VIDEO_SET_SPU deactivates or activates SPU decoding, according to the
+following format:
+</para>
+<programlisting>
+ typedef
+ struct video_spu {
+ boolean active;
+ int stream_id;
+ } video_spu_t;
+</programlisting>
+
+</section>
+<section id="video-spu-palette">
+<title>struct video_spu_palette</title>
+<para>The following structure is used to set the SPU palette by calling VIDEO_SPU_PALETTE:
+</para>
+<programlisting>
+ typedef
+ struct video_spu_palette {
+ int length;
+ uint8_t &#x22C6;palette;
+ } video_spu_palette_t;
+</programlisting>
+
+</section>
+<section id="video-navi-pack">
+<title>struct video_navi_pack</title>
+<para>In order to get the navigational data the following structure has to be passed to the ioctl
+VIDEO_GET_NAVI:
+</para>
+<programlisting>
+ typedef
+ struct video_navi_pack {
+ int length; /&#x22C6; 0 ... 1024 &#x22C6;/
+ uint8_t data[1024];
+ } video_navi_pack_t;
+</programlisting>
+</section>
+
+
+<section id="video-attributes-t">
+<title>video_attributes_t</title>
+<para>The following attributes can be set by a call to VIDEO_SET_ATTRIBUTES:
+</para>
+<programlisting>
+ typedef uint16_t video_attributes_t;
+ /&#x22C6; bits: descr. &#x22C6;/
+ /&#x22C6; 15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) &#x22C6;/
+ /&#x22C6; 13-12 TV system (0=525/60, 1=625/50) &#x22C6;/
+ /&#x22C6; 11-10 Aspect ratio (0=4:3, 3=16:9) &#x22C6;/
+ /&#x22C6; 9- 8 permitted display mode on 4:3 monitor (0=both, 1=only pan-sca &#x22C6;/
+ /&#x22C6; 7 line 21-1 data present in GOP (1=yes, 0=no) &#x22C6;/
+ /&#x22C6; 6 line 21-2 data present in GOP (1=yes, 0=no) &#x22C6;/
+ /&#x22C6; 5- 3 source resolution (0=720x480/576, 1=704x480/576, 2=352x480/57 &#x22C6;/
+ /&#x22C6; 2 source letterboxed (1=yes, 0=no) &#x22C6;/
+ /&#x22C6; 0 film/camera mode (0=camera, 1=film (625/50 only)) &#x22C6;/
+</programlisting>
+</section></section>
+
+
+<section id="video_function_calls">
+<title>Video Function Calls</title>
+
+
+<section id="video_fopen">
+<title>open()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call opens a named video device (e.g. /dev/dvb/adapter0/video0)
+ for subsequent use.</para>
+<para>When an open() call has succeeded, the device will be ready for use.
+ The significance of blocking or non-blocking mode is described in the
+ documentation for functions where there is a difference. It does not affect the
+ semantics of the open() call itself. A device opened in blocking mode can later
+ be put into non-blocking mode (and vice versa) using the F_SETFL command
+ of the fcntl system call. This is a standard system call, documented in the Linux
+ manual page for fcntl. Only one user can open the Video Device in O_RDWR
+ mode. All other attempts to open the device in this mode will fail, and an
+ error-code will be returned. If the Video Device is opened in O_RDONLY
+ mode, the only ioctl call that can be used is VIDEO_GET_STATUS. All other
+ call will return an error code.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int open(const char &#x22C6;deviceName, int flags);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>const char
+ *deviceName</para>
+</entry><entry
+ align="char">
+<para>Name of specific video device.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int flags</para>
+</entry><entry
+ align="char">
+<para>A bit-wise OR of the following flags:</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDONLY read-only access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_RDWR read/write access</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>O_NONBLOCK open in non-blocking mode</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>(blocking mode is the default)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>ENODEV</para>
+</entry><entry
+ align="char">
+<para>Device driver not loaded/available.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINTERNAL</para>
+</entry><entry
+ align="char">
+<para>Internal error.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBUSY</para>
+</entry><entry
+ align="char">
+<para>Device or resource busy.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid argument.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="video_fclose">
+<title>close()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call closes a previously opened video device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int close(int fd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section>
+<section id="video_fwrite">
+<title>write()</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This system call can only be used if VIDEO_SOURCE_MEMORY is selected
+ in the ioctl call VIDEO_SELECT_SOURCE. The data provided shall be in
+ PES format, unless the capability allows other formats. If O_NONBLOCK is
+ not specified the function will block until buffer space is available. The amount
+ of data to be transferred is implied by count.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>size_t write(int fd, const void &#x22C6;buf, size_t count);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>void *buf</para>
+</entry><entry
+ align="char">
+<para>Pointer to the buffer containing the PES data.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>size_t count</para>
+</entry><entry
+ align="char">
+<para>Size of buf.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>RETURN VALUE</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EPERM</para>
+</entry><entry
+ align="char">
+<para>Mode VIDEO_SOURCE_MEMORY not selected.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>ENOMEM</para>
+</entry><entry
+ align="char">
+<para>Attempted to write more data than the internal buffer can
+ hold.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EBADF</para>
+</entry><entry
+ align="char">
+<para>fd is not a valid open file descriptor.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_STOP"
+role="subsection"><title>VIDEO_STOP</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To control a V4L2 decoder use the V4L2
+&VIDIOC-DECODER-CMD; instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to stop playing the current stream.
+ Depending on the input parameter, the screen can be blanked out or displaying
+ the last decoded frame.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_STOP, boolean
+ mode);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_STOP for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>Boolean mode</para>
+</entry><entry
+ align="char">
+<para>Indicates how the screen shall be handled.</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>TRUE: Blank screen when stop.</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>FALSE: Show last decoded frame.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_PLAY"
+role="subsection"><title>VIDEO_PLAY</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To control a V4L2 decoder use the V4L2
+&VIDIOC-DECODER-CMD; instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to start playing a video stream from the
+ selected source.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_PLAY);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_PLAY for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_FREEZE"
+role="subsection"><title>VIDEO_FREEZE</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To control a V4L2 decoder use the V4L2
+&VIDIOC-DECODER-CMD; instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call suspends the live video stream being played. Decoding
+ and playing are frozen. It is then possible to restart the decoding
+ and playing process of the video stream using the VIDEO_CONTINUE
+ command. If VIDEO_SOURCE_MEMORY is selected in the ioctl call
+ VIDEO_SELECT_SOURCE, the DVB subsystem will not decode any more
+ data until the ioctl call VIDEO_CONTINUE or VIDEO_PLAY is performed.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_FREEZE);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_FREEZE for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_CONTINUE"
+role="subsection"><title>VIDEO_CONTINUE</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To control a V4L2 decoder use the V4L2
+&VIDIOC-DECODER-CMD; instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call restarts decoding and playing processes of the video stream
+ which was played before a call to VIDEO_FREEZE was made.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_CONTINUE);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_CONTINUE for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SELECT_SOURCE"
+role="subsection"><title>VIDEO_SELECT_SOURCE</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. This ioctl was also supported by the
+V4L2 ivtv driver, but that has been replaced by the ivtv-specific
+<constant>IVTV_IOC_PASSTHROUGH_MODE</constant> ioctl.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call informs the video device which source shall be used for the input
+ data. The possible sources are demux or memory. If memory is selected, the
+ data is fed to the video device through the write command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_SELECT_SOURCE,
+ video_stream_source_t source);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SELECT_SOURCE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_stream_source_t
+ source</para>
+</entry><entry
+ align="char">
+<para>Indicates which source shall be used for the Video stream.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SET_BLANK"
+role="subsection"><title>VIDEO_SET_BLANK</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to blank out the picture.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_SET_BLANK, boolean
+ mode);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_BLANK for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>boolean mode</para>
+</entry><entry
+ align="char">
+<para>TRUE: Blank screen when stop.</para>
+</entry>
+ </row><row><entry
+ align="char">
+</entry><entry
+ align="char">
+<para>FALSE: Show last decoded frame.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_GET_STATUS"
+role="subsection"><title>VIDEO_GET_STATUS</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to return the current status of the device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_GET_STATUS, struct
+ video_status &#x22C6;status);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_STATUS for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct video_status
+ *status</para>
+</entry><entry
+ align="char">
+<para>Returns the current status of the Video Device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_GET_FRAME_COUNT"
+role="subsection"><title>VIDEO_GET_FRAME_COUNT</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is obsolete. Do not use in new drivers. For V4L2 decoders this
+ioctl has been replaced by the <constant>V4L2_CID_MPEG_VIDEO_DEC_FRAME</constant> control.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to return the number of displayed frames
+since the decoder was started.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ VIDEO_GET_FRAME_COUNT, __u64 *pts);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_FRAME_COUNT for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u64 *pts
+</para>
+</entry><entry
+ align="char">
+<para>Returns the number of frames displayed since the decoder was started.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_GET_PTS"
+role="subsection"><title>VIDEO_GET_PTS</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is obsolete. Do not use in new drivers. For V4L2 decoders this
+ioctl has been replaced by the <constant>V4L2_CID_MPEG_VIDEO_DEC_PTS</constant> control.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to return the current PTS timestamp.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ VIDEO_GET_PTS, __u64 *pts);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_PTS for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>__u64 *pts
+</para>
+</entry><entry
+ align="char">
+<para>Returns the 33-bit timestamp as defined in ITU T-REC-H.222.0 / ISO/IEC 13818-1.
+</para>
+<para>
+The PTS should belong to the currently played
+frame if possible, but may also be a value close to it
+like the PTS of the last decoded frame or the last PTS
+extracted by the PES parser.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_GET_FRAME_RATE"
+role="subsection"><title>VIDEO_GET_FRAME_RATE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to return the current framerate.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ VIDEO_GET_FRAME_RATE, unsigned int *rate);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_FRAME_RATE for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>unsigned int *rate
+</para>
+</entry><entry
+ align="char">
+<para>Returns the framerate in number of frames per 1000 seconds.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_GET_EVENT"
+role="subsection"><title>VIDEO_GET_EVENT</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is for DVB devices only. To get events from a V4L2 decoder use the V4L2
+&VIDIOC-DQEVENT; ioctl instead.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call returns an event of type video_event if available. If an event is
+ not available, the behavior depends on whether the device is in blocking or
+ non-blocking mode. In the latter case, the call fails immediately with errno
+ set to EWOULDBLOCK. In the former case, the call blocks until an event
+ becomes available. The standard Linux poll() and/or select() system calls can
+ be used with the device file descriptor to watch for new events. For select(),
+ the file descriptor should be included in the exceptfds argument, and for
+ poll(), POLLPRI should be specified as the wake-up condition. Read-only
+ permissions are sufficient for this ioctl call.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_GET_EVENT, struct
+ video_event &#x22C6;ev);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_EVENT for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct video_event
+ *ev</para>
+</entry><entry
+ align="char">
+<para>Points to the location where the event, if any, is to be
+ stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EWOULDBLOCK</para>
+</entry><entry
+ align="char">
+<para>There is no event pending, and the device is in
+ non-blocking mode.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>EOVERFLOW</para>
+</entry><entry
+ align="char">
+<para>Overflow in event queue - one or more events were lost.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_COMMAND"
+role="subsection"><title>VIDEO_COMMAND</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is obsolete. Do not use in new drivers. For V4L2 decoders this
+ioctl has been replaced by the &VIDIOC-DECODER-CMD; ioctl.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl commands the decoder. The <constant>video_command</constant> struct
+is a subset of the <constant>v4l2_decoder_cmd</constant> struct, so refer to the
+&VIDIOC-DECODER-CMD; documentation for more information.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ VIDEO_COMMAND, struct video_command *cmd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_COMMAND for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct video_command *cmd
+</para>
+</entry><entry
+ align="char">
+<para>Commands the decoder.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_TRY_COMMAND"
+role="subsection"><title>VIDEO_TRY_COMMAND</title>
+<para>DESCRIPTION
+</para>
+<para>This ioctl is obsolete. Do not use in new drivers. For V4L2 decoders this
+ioctl has been replaced by the &VIDIOC-TRY-DECODER-CMD; ioctl.</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl tries a decoder command. The <constant>video_command</constant> struct
+is a subset of the <constant>v4l2_decoder_cmd</constant> struct, so refer to the
+&VIDIOC-TRY-DECODER-CMD; documentation for more information.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ VIDEO_TRY_COMMAND, struct video_command *cmd);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_TRY_COMMAND for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct video_command *cmd
+</para>
+</entry><entry
+ align="char">
+<para>Try a decoder command.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_GET_SIZE"
+role="subsection"><title>VIDEO_GET_SIZE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl returns the size and aspect ratio.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request =
+ VIDEO_GET_SIZE, video_size_t *size);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_SIZE for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_size_t *size
+</para>
+</entry><entry
+ align="char">
+<para>Returns the size and aspect ratio.
+</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SET_DISPLAY_FORMAT"
+role="subsection"><title>VIDEO_SET_DISPLAY_FORMAT</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to select the video format to be applied
+ by the MPEG chip on the video.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request =
+ VIDEO_SET_DISPLAY_FORMAT, video_display_format_t
+ format);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_DISPLAY_FORMAT for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_display_format_t
+ format</para>
+</entry><entry
+ align="char">
+<para>Selects the video format to be used.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_STILLPICTURE"
+role="subsection"><title>VIDEO_STILLPICTURE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to display a still picture (I-frame). The
+ input data shall contain an I-frame. If the pointer is NULL, then the current
+ displayed still picture is blanked.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_STILLPICTURE,
+ struct video_still_picture &#x22C6;sp);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_STILLPICTURE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>struct
+ video_still_picture
+ *sp</para>
+</entry><entry
+ align="char">
+<para>Pointer to a location where an I-frame and size is stored.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_FAST_FORWARD"
+role="subsection"><title>VIDEO_FAST_FORWARD</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the Video Device to skip decoding of N number of I-frames.
+ This call can only be used if VIDEO_SOURCE_MEMORY is selected.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_FAST_FORWARD, int
+ nFrames);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_FAST_FORWARD for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int nFrames</para>
+</entry><entry
+ align="char">
+<para>The number of frames to skip.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EPERM</para>
+</entry><entry
+ align="char">
+<para>Mode VIDEO_SOURCE_MEMORY not selected.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_SLOWMOTION"
+role="subsection"><title>VIDEO_SLOWMOTION</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the video device to repeat decoding frames N number of
+ times. This call can only be used if VIDEO_SOURCE_MEMORY is selected.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_SLOWMOTION, int
+ nFrames);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SLOWMOTION for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int nFrames</para>
+</entry><entry
+ align="char">
+<para>The number of times to repeat each frame.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EPERM</para>
+</entry><entry
+ align="char">
+<para>Mode VIDEO_SOURCE_MEMORY not selected.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_GET_CAPABILITIES"
+role="subsection"><title>VIDEO_GET_CAPABILITIES</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call asks the video device about its decoding capabilities. On success
+ it returns and integer which has bits set according to the defines in section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_GET_CAPABILITIES,
+ unsigned int &#x22C6;cap);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_CAPABILITIES for this
+ command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>unsigned int *cap</para>
+</entry><entry
+ align="char">
+<para>Pointer to a location where to store the capability
+ information.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SET_ID"
+role="subsection"><title>VIDEO_SET_ID</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl selects which sub-stream is to be decoded if a program or system
+ stream is sent to the video device.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(int fd, int request = VIDEO_SET_ID, int
+ id);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_ID for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int id</para>
+</entry><entry
+ align="char">
+<para>video sub-stream id</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>Invalid sub-stream id.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_CLEAR_BUFFER"
+role="subsection"><title>VIDEO_CLEAR_BUFFER</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl call clears all video buffers in the driver and in the decoder hardware.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_CLEAR_BUFFER);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_CLEAR_BUFFER for this command.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SET_STREAMTYPE"
+role="subsection"><title>VIDEO_SET_STREAMTYPE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl tells the driver which kind of stream to expect being written to it. If
+ this call is not used the default of video PES is used. Some drivers might not
+ support this call and always expect PES.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>int ioctl(fd, int request = VIDEO_SET_STREAMTYPE,
+ int type);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_STREAMTYPE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int type</para>
+</entry><entry
+ align="char">
+<para>stream type</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SET_FORMAT"
+role="subsection"><title>VIDEO_SET_FORMAT</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl sets the screen format (aspect ratio) of the connected output device
+ (TV) so that the output of the decoder can be adjusted accordingly.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_SET_FORMAT,
+ video_format_t format);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_FORMAT for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_format_t
+ format</para>
+</entry><entry
+ align="char">
+<para>video format of TV as defined in section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>format is not a valid video format.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_SET_SYSTEM"
+role="subsection"><title>VIDEO_SET_SYSTEM</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl sets the television output format. The format (see section ??) may
+ vary from the color format of the displayed MPEG stream. If the hardware is
+ not able to display the requested format the call will return an error.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_SET_SYSTEM ,
+ video_system_t system);</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_FORMAT for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_system_t
+ system</para>
+</entry><entry
+ align="char">
+<para>video system of TV output.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>system is not a valid or supported video system.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_SET_HIGHLIGHT"
+role="subsection"><title>VIDEO_SET_HIGHLIGHT</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl sets the SPU highlight information for the menu access of a DVD.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_SET_HIGHLIGHT
+ ,video_highlight_t &#x22C6;vhilite)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_HIGHLIGHT for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_highlight_t
+ *vhilite</para>
+</entry><entry
+ align="char">
+<para>SPU Highlight information according to section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+
+</section><section id="VIDEO_SET_SPU"
+role="subsection"><title>VIDEO_SET_SPU</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl activates or deactivates SPU decoding in a DVD input stream. It can
+ only be used, if the driver is able to handle a DVD stream.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_SET_SPU ,
+ video_spu_t &#x22C6;spu)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_SPU for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_spu_t *spu</para>
+</entry><entry
+ align="char">
+<para>SPU decoding (de)activation and subid setting according
+ to section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>input is not a valid spu setting or driver cannot handle
+ SPU.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_SET_SPU_PALETTE"
+role="subsection"><title>VIDEO_SET_SPU_PALETTE</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl sets the SPU color palette.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_SET_SPU_PALETTE
+ ,video_spu_palette_t &#x22C6;palette )</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_SPU_PALETTE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_spu_palette_t
+ *palette</para>
+</entry><entry
+ align="char">
+<para>SPU palette according to section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>input is not a valid palette or driver doesn&#8217;t handle SPU.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_GET_NAVI"
+role="subsection"><title>VIDEO_GET_NAVI</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl returns navigational information from the DVD stream. This is
+ especially needed if an encoded stream has to be decoded by the hardware.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_GET_NAVI ,
+ video_navi_pack_t &#x22C6;navipack)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_GET_NAVI for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_navi_pack_t
+ *navipack</para>
+</entry><entry
+ align="char">
+<para>PCI or DSI pack (private stream 2) according to section
+ ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EFAULT</para>
+</entry><entry
+ align="char">
+<para>driver is not able to return navigational information</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+
+</section><section id="VIDEO_SET_ATTRIBUTES"
+role="subsection"><title>VIDEO_SET_ATTRIBUTES</title>
+<para>DESCRIPTION
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para>This ioctl is intended for DVD playback and allows you to set certain
+ information about the stream. Some hardware may not need this information,
+ but the call also tells the hardware to prepare for DVD playback.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>SYNOPSIS
+</para>
+<informaltable><tgroup cols="1"><tbody><row><entry
+ align="char">
+<para> int ioctl(fd, int request = VIDEO_SET_ATTRIBUTE
+ ,video_attributes_t vattr)</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+<para>PARAMETERS
+</para>
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>int fd</para>
+</entry><entry
+ align="char">
+<para>File descriptor returned by a previous call to open().</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>int request</para>
+</entry><entry
+ align="char">
+<para>Equals VIDEO_SET_ATTRIBUTE for this command.</para>
+</entry>
+ </row><row><entry
+ align="char">
+<para>video_attributes_t
+ vattr</para>
+</entry><entry
+ align="char">
+<para>video attributes according to section ??.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+&return-value-dvb;
+<informaltable><tgroup cols="2"><tbody><row><entry
+ align="char">
+<para>EINVAL</para>
+</entry><entry
+ align="char">
+<para>input is not a valid attribute setting.</para>
+</entry>
+ </row></tbody></tgroup></informaltable>
+ </section></section>
diff --git a/Documentation/DocBook/media/dvbstb.png.b64 b/Documentation/DocBook/media/dvbstb.png.b64
new file mode 100644
index 000000000..e8b52fde3
--- /dev/null
+++ b/Documentation/DocBook/media/dvbstb.png.b64
@@ -0,0 +1,398 @@
+iVBORw0KGgoAAAANSUhEUgAAAzMAAAGaCAYAAAA7Jx25AAAABmJLR0QAAAAAAAD5Q7t/AAAACXBI
+WXMAAA3XAAANiQFmEOuiAAAgAElEQVR42uzdd1RU18I28GdgKFZUBE0saFA0KoqFFkEhKhbAQmxJ
+bIkNNEpMEUwsMZarJMZrw4KxRExQczUqil0jRBA1GAjGQqLYC4TemdnfH76cj3HodYDntxaLmTll
+zuw57Zmz9z4yIYQAkYZzcnJCSkoKGjZsyMIgIiIiquPS09PRoEEDyBhmqCaQyWRo06YN3nvvPRYG
+ERERUR137Ngx/Pnnn5CzKKgmMDAwwKpVqxhmiIiIiAj29vZ4//33ocWiICIiIiKimohhhoiIiIiI
+GGaIiIiIiIgYZoiIiIiIiBhmiIiIiIiIYYaIiIiIiIhhhoiIiIiIiGGGiIiIiIgYZoiIiIiIiBhm
+iIiIiIiIGGaIiIiIiIgYZoiIiIiIiGGGiIiIiIiIYYaIiIiIiIhhhoiIiIiIGGaIiIiIiIgYZoiI
+iIiIiBhmiIiIiIiIYYaIiIiIiIhhhoiIiIiIqFLIWQRElSMsLAy2trZo1KgR5HJualTxEhIS8P33
+3+PDDz+sM5+5bdu2ePDgAZo2bcoVgCplm3J0dMS5c+fqzGf++uuvsWTJEm5TVClSU1ORk5ODBw8e
+oHXr1gwzRDVJbm4uAGDRokUwMDBggVCFmzlzJrKysurUZ3727BksLCzg4eHBFYAq3IIFC5CQkFCn
+PnNGRgYAYNWqVVwBqMJFRUVh48aNUCqVlfYeDDNElWzGjBkMM1QpNm7cWOc+c8uWLTFjxgzMmDGD
+KwBVuLt37yIkJKTOfW5nZ2duU1SpYaYysc0MERERERHVSAwzRERERETEMENERERERMQwQ0RERERE
+xDBDREREREQMM0RERERERAwzREREREREDDNERERERMQwQ0RERERExDBDRERERETEMENERERERMQw
+Q0REREREDDNEREREREQMM0RERERERAwzRERERETEMENERERERMQwQ0RERERExDBDREREREQMM0RE
+RERERAwzREREREREDDNEREREREQMM0RERERExDBDRERERETEMENERERERMQwQ0REREREDDNERERE
+REQMM0RERERERAwzRERERETEMENERERERMQwQ0REREREVGnkLAKimunBgwdISkoq8/SGhoZ47bXX
+WJCV6NmzZwgMDMS5c+ewd+9eFgiVSVZWFkJCQnD16lU8evQICoUChoaG6NChA2xsbNCxY0fIZDI8
+efIEp06dwuTJk0s876CgIJiYmKBLly4saKq2Y5Wuri6aNm0KQ0NDaGnxd3ZimCGqE/78808EBgbi
+p59+QkJCgsowLS0tyGQy6blSqYQQQmWcjz/+GGvXrmVBVoKtW7di+/btuHbtGoQQMDQ0ZKFQqf37
+77/w8fHBtm3bkJCQgCZNmsDS0hLGxsZ48OABtm/fjidPnsDU1BR2dnYICwtDz549SxxmlEol5s6d
+CxsbG+zZs4cFTpV2rDpx4gQOHDiAJ0+eqAzT09ODUqlETk4OAEBfXx/dunWDvb093Nzc0LdvX5Vj
+GVFBGH+JaqihQ4di06ZNOHr0qMrrly5dgkKhQG5urvSnVCqRlZWF27dvY8mSJQCA7OxsFmIlmTFj
+Bs6ePctfu6nMTp48iTfffBOrV6+Gnp4e9uzZg+fPn+PUqVPw9/fHkSNH8PDhQxw9ehRCCOzevRu3
+bt1CWlpaqd4jJiYG+/btw+PHj1noVGnHqnXr1uHcuXMqr+/fvx8ZGRnIzs5GSkoKIiIi8M0330BH
+Rwdr166Fvb09evXqhdOnT7MQiWGGqDazsrJSeV5Y1TFdXV107NgRX331FSZPniz9ElbTnDp1SuOX
+USaToXHjxujevTtXUCq1H3/8EcOGDcPz58/RtWtXREREYMKECdDR0VE9gGtpwcXFBdeuXYONjQ0A
+ID09vcTvs2HDBgBATk4OfH19WfBUqTp16gS5/P9XCDI3N5euujRs2BAWFhb46KOP8Ntvv+HIkSNo
+3rw5rl+/DicnJ3z66adQKpUsRGKYIaqNdHR0Sl3HeNy4cTXyysyBAwdq1EkX635TaV29ehVTpkyB
+UqlEw4YNcfToUbRs2bLIaZo0aYIjR47AyMioxFdm7ty5g6CgIGhrawMAtmzZgoyMDH4BVGlkMhl0
+dXVLNJ6rqyvCwsLQqlUrAMB3332Hjz/+mIVIDDNEtfkgURqOjo5YunRpjfqMd+7cwfTp0/llU62l
+VCoxY8YM6arp/Pnz0b59+xJNa2RkBC8vrxJfmfH19YWVlRUmTJgAAIiPj2cnFaRRxypTU1McOnRI
+CtwbNmzA4cOHWYjEMENUl+Xm5iIhIQH6+vowMTEpcJz8HQUIIdQ6DijoBKy0CppnUfN59uwZnJ2d
+S9V7mxCiVMtW2mWqiPckyu/EiROIiIgAAGhra8Pd3b1U00+aNAlZWVnFjpeamoqdO3di9uzZmD17
+tvT6f//732K3d6KqZGlpiRkzZkjPvby8it3HlmY/XNh+v6jtoCTHRU1RlmNSac8BGGaIqEpduXIF
+CxYsUHs9MTERfn5+sLa2xrVr15CSkoJJkyZBX18fbdq0QWRkpMrOLTAwEMOHD4epqSnat2+Pxo0b
+o3///vDz8yu0LU5ubi7Onz8PDw8PmJubS+87d+5cGBoaQi6Xw8LCQq2x5+XLl2Fra4s7d+4AAEJD
+Q+Hi4gIXFxfMnz9fZdzs7Gz4+vrC2toa+vr60NHRQdeuXeHj41PgSV5Zl+lVx44dw8CBA/Haa6+h
+Q4cO6NmzJw4cOFBn17OgoCC1XouoeD/++KP02NbWFkZGRqWa3sjICDt37ix2PH9/f8jlcowdOxaW
+lpawtrYGAERHR+Ps2bP8IjRQaGgooqKi6mTYnDNnjvT41q1buHTpkto4pdn3CyFw7do1eHt7o127
+dkhMTIQQAv7+/rCwsIBcLkfTpk3x8ccfS9Wxc3NzsXnzZvTu3Ru6urqoX78+3n33XbWeRPfv34/x
+48dLx6jFixdLw5KSkjB37lwMHz5cGp6/hkRsbCzmz58vDcv7++KLL5Cbm4vDhw9j7Nix0utz587F
+s2fPylUWZTkH0NTURqTxDAwMxN69e2vUMgcHBwsAIjExsdLfS1tbWwAQAMTdu3cLHW/hwoVi5syZ
+0vMrV66IESNGCF1dXWn63377TfTv31/o6+tLry1YsEAIIUR6eroYPXq00NPTE7t37xY5OTlCCCFu
+374t+vbtKwCIHj16iNjYWJX3PXXqlHBycpLm16JFCxEdHS06duwoHB0dhYuLi6hfv74AIHR0dMQf
+f/whTfvXX3+J06dPC2NjYwFA2NraitOnT4vTp0+L8PBwabynT5+KPn36iOnTp4vIyEjx6NEjcejQ
+IdGiRQsBQPTt21ekpaVVyDLlUSgUYvbs2UIul4stW7aI7OxsIYQQ0dHRwsLCQjRq1EgAEIaGhpXy
+vZubmwtfX1+NW/fzyrRdu3Zi5syZIiAgQDx58qRC5t22bVuN/MwVoVWrVlLZeXp6Vsp7KJVK0bVr
+V+Hl5SW95u/vL72vs7NznT7WeHt7Czs7O41brmnTpgkAwsDAQIwYMUKsX79eREZGCqVSWSGfuaq+
+9wYNGkjr2l9//VXi6dq3by9Nt3jxYpVhpdn3X7p0SYwePVrI5XKV5Rg8eLCwsrIS7u7u4u2335aG
+ff755+LJkyfirbfeEo6OjmLWrFli1KhRQktLSwAQrq6uast67949af6DBw9WGx4dHS0ds18td6VS
+KZYtWya9f+/evVWGr1y5Uujq6oqAgIACv/vSHgdLew5QFpGRkQKA2nlBRQgMDBQGBgaCYYYYZmpZ
+mDl48KAIDQ2V/i5duiTOnj0rvv76a6Grq6sSZtLS0kR2drZ0oAQgnJycxKFDh0RqaqqYOHGiaNKk
+iTh9+rRQKpVi7NixAkCBJ5MpKSmic+fOAoDo1KmTSElJURtn6NChAoDQ19cXlpaWIiIiQhr2xx9/
+SJ9jypQpatOamJgIAGLEiBFqw7Kzs0WfPn3EqFGj1Hbw+/fvlz6bt7d3hS7TokWLBACxZs0atWGP
+Hz+WDtx1Lcw0a9ZMKnMdHR3pwF4R4aa2hpnk5GSpzApbpyrC2bNnhUwmU/nRIzMzU/qxAIC4efMm
+w4yG8fDwkE6gtbS0hJ6eXoWFm5oQZkaOHClNN3r06HLv+xcsWCANs7GxUflhTKlUSu/XoEEDYWlp
+KS5cuKAy/erVq6XpY2Ji1JbX1NS00DCT/3hWULkrlUoxZMgQ6bvOK6f09HTRsWNH8d133xU4z7KU
+RWnOARhmiBhmqizMFPeXP8zk+eGHH6ThX331VYHvcezYMQFANG3aVGRlZRU4zpEjR6T5fPHFF2rD
+P/roI2n4s2fP1Ib369dPCkOlCTNbt24VAMS5c+fUhmVmZkq/MDVt2lS6mlTeZbpx44bQ1tYWhoaG
+hZbH8OHD62SYMTIyKnT9K2+4qa1h5p9//lEpp61bt1bK+4waNarAX5PzgjkAMWvWLIYZDQwz+a8m
+5P8rb7ipCWFmxowZ0nSOjo7l3vdv375dml9YWJjatPv27ZOGb9q0SW34zZs3peG7du1SG96pU6ci
+w0xe2Cms3O/fvy9d2XdychJKpVJMmzZNODg4CIVCUeA05TkOluQcQJPDjJw1UYlql5s3b6o07hdC
+IC0tDZcuXcKHH35Y4DT5718xZMiQAsfJ6xLZysqq0O41hw0bBmNjYzx//hxbt27F0qVLVe4rkNcr
+DQAYGxurTZ/XDWd8fHypPrOfnx8AIDw8HNHR0WrDmzVrhsePHyMhIQE3btxQuf9LWZdp3bp1UCgU
+GDhwYKHl0ahRI66Qr8jfpurevXvYsWMHvv/+e+Tm5qJdu3YYPHgwHB0d0b9//2K7JK7NFApFhc8z
+NjYWhw8fxvHjx9WGzZw5EytXroRCocCuXbuwfPlyNG3alCtsDZB3U+S8dhlHjx7FiRMnkJWVBQMD
+Azg4OGDAgAFwcHBAt27dSt37pSbIv8z5j1dl3ffn3+83aNCg0P1+3jxe1aJFC+nxw4cPK/zztmnT
+Bt988w3c3d1x6tQpTJo0CYcPH0ZUVFShXf6X5zhYknMATcYwQ1TL6OnpQV9fX+W1evXqYfjw4Viw
+YIHUkL4w+Xfy+Q+WFy5cAAA0b968yGn79++PAwcOID4+HtHR0ejRo0eJlz1vJy1K0cg1OTkZ165d
+g7a2dqGNzseMGaP2HuVZJiGE1EVo586dq/Uk5v79+7h27ZpGrYO5ubllDjfff/89tm/fDoVCIYWb
+3r17w9XVtVaHm1eDQ1xcXIW/x5YtW/DGG29g0KBBBZ68ubm54cCBA0hPT8f27dvx+eef18l9aFpa
+msZtUy9evChzuDly5AiCgoKQnZ2Nxo0bw9HREf369YO9vT369OlTI76Tf//9V3r8+uuvV/q+v6Dj
+oMrJc74f6Srr/kzTp0/Hvn37cP78efj7+2Pjxo2F9kJakWVR3GdnmCGqQ4QG9jrz9ttv4+7du6We
+Lj4+XroZX3Enqp06dZIeP3z4sFRhpizu3r0rdT+5Zs2aKtkRv3jxAk+fPgVQvVdfsrOzsWrVKqxa
+tarWbDf516979+5h69atUkifOnVqpVyx0ARNmjSRrmoCQExMTIXOPyMjA35+flAoFCq/yL66nefZ
+sGED5s2bp3LSVlfcuHGjxpzkl/RYlNcrV3JyMg4fPiz9GOPo6CiFA01269Yt6XHv3r2rbd9flbS0
+tODn5wdzc3NkZGTg1KlTmDVrVoFX1mp7WTDMEFWDFy9eYPny5Rq3XD179sTGjRtLPV3+E8i8k/jC
+GBoaSo+rYoeaF7KEELh//36JbzJYHvl/Nc/MzKy271NPTw/ffvttodUHq4uZmVmZryzo6ekhKysL
+enp6sLS0hJOTE/r37w8bGxvo6uoiMDCw1u437OzscPDgQQAvu+KtSAEBAUhNTYWPj0+Rv8quWLEC
+T58+xYMHD3Do0CGVX3Prip49exZYFa86ffbZZ/jhhx9KddUzj46ODpRKJZRKJTp06IChQ4fCwcEB
+ffv2hbGxMRYsWIDExESNPp5GRUVJz11cXKpt31/V0tLSpOPvkSNHsG/fPowfP14jjoMMM0S12Llz
+5zBgwABMnTq11nym5s2bQ0dHBzk5OYiOjoYQotB61/lv0PXGG29U+rLVr19fehwSElIlO3E9PT3p
+8d9//11t34tMJkP9+vU1rm1Daerk6+vrIzMzUyW8ODo6Ftk2q7YaO3asFGbu3LmDqKgo6f5H5SGE
+wIYNGzBmzBjMnTu3yHHj4+Px1VdfAXh5E826GGby7jOiSfLvc4qjq6sLhUIBpVKJjh07YsiQIXBw
+cIC9vX2R1YQ11c6dO6WaDi4uLmjXrl217furUlZWFiZMmIClS5di48aNePToEebMmYMBAwao3YOq
+tpdFcXjTTKIKolQqsXz5cgwYMAAAMHz48Fp1cM+7sV5cXBxu3LhR6Lh59XVbtWqFjh07VvqymZqa
+SifPfn5+RVbvS01NxcyZM8v9nq1bt5YaTF64cIF3TS+FvPZcenp6sLOzwxdffIHg4GAkJycjODgY
+ixYtgp2dXZ0LMgDg5uamchLy3XffVch8L168iIiICEyfPr3YcWfOnCmt25cuXUJ4eDhXWg2nq6sL
+bW1tyGQymJmZwd3dHQcOHMCLFy9w69YtrFu3DqNGjaqRQebx48dSNVpdXV2sXr26Wvf9FaUkx4yF
+CxeicePGmD9/PjZv3iwdfz09PTXiOMgwQ1TLvHjxAoMGDcKSJUsAvLxjcUE9oFTWTjH/1ZDKOrH+
+4IMPpMcBAQGFjpd38uPu7l7qXnOKWva8eeXV/c7TqFEjKWgFBwdjz549BU6fm5uLKVOmwMnJqdzL
+pKenh/79+wN4WVc5KCioyGnrWtjJq/Lwanixt7dneCmCjo4ONm3aJD3ftWsXTp8+XeLpExMT4erq
+qnZX8NWrV8PMzAz29vbFzqNly5YYPXq09Hzt2rXcwWsApVIpVTcqaXjJX+W3Jp3E50lISMDIkSOR
+kJAAANi0aRO6dOlSZfv+8sirYp2enl5gGaSkpBQ5/a+//oqNGzfCz88PWlpacHV1xbvvvgsA+Omn
+n3D06NEqPQ4yzBDVcufPn0fXrl1x8eJFKJVKaGlp4bPPPquy909PT1c5QJSlZ5X8YaiwOtmTJk2C
+paUlAGDz5s0F1rG+ffs2goODYWZmhnnz5qkNL67xdl7PVgUd8PKqfdy+fVsanp2djcePH6v8UjVt
+2jSsX79e6s0HeFllx8XFBdnZ2XBzc6uQZcr/+Tw8PNS650xMTERISAiAl41uU1NT68w2kXcAfzW8
+XLx4keGlGEOHDsWaNWuk525ubiVqJxQaGgpLS0v069dPpdvYK1euICgoCGPGjCnxjwvvvfee9PjA
+gQPVWpWS/v8JsBCixoWXV/e1+dsYFhZshBA4deoU+vTpgytXrkBXVxfbtm3DtGnT1MYt674//zGv
+LMfE4n5AzLvCevnyZdy+fVulDFasWCHtIwu6DUFycjImT56MBQsW4M0335ReX7dunfQdu7u7qx2D
+y3McLMk5AMMMUS2kVCrx9ddfY+DAgYiPj0dubi7kcjnGjx+Ptm3bVtlynDx5UuV5YVcJipK/u+bf
+f/+9wHHkcjkOHTqETp06IT4+HhMmTFD5BT4hIQETJ05EmzZtEBgYWGDf/flPivJ6bcp/QPjzzz8B
+vOxONDk5WWW4ra2tNI/58+fj4MGDeOedd/Dvv/9i3LhxGDdunBQ+PD090bx5c/To0QOmpqYwMzND
+UlIS/P391U7oyrpMw4YNg4eHBwDg/v376NWrF1asWIHAwEBs27YNjo6OMDAwkA4O3bt3r3WX9gtz
+7do1ZGVlMbyU0SeffIJ9+/ahWbNmSE1NhaurK9zc3HDy5EmVX3pTU1MRFBSEd955By4uLli2bJlK
+d8qZmZmYNWuWtP2WVF6bhLyTr3nz5hV78keVa968eYiLi6tR4eVV58+fV1mP9u3bh5iYGPz999/4
+/fffcfjwYSxatAg9evTA4MGDce/ePbi5ueH69euFVpEs674/f2+BBQWK2NhY6XFB1aofPHggPX78
++LHa8LyaDNnZ2bCzs4OXlxe8vb1hbm4OIQQcHBwAAGFhYXj//fdx7Ngx6bxi2rRpUCqV8PLyUpmn
+kZGRVPvj8ePHmDJlikrwKM9xsCTnAJqe9ok0noGBgdi7d6/GLM+TJ0+Eg4OD2h2ZZTKZiIyMFEII
+ERwcLACIxMTESlkGPz8/MWjQoALvCm1nZyc+/fTTYudx5swZ4erqKrS0tKRp9fT0xKRJk8T27dsL
+nCYpKUnMmzdPNGrUSLz++utixowZ4sMPPxQmJibC3d1dxMXFqU0TGhoqxo8fr7KMXbp0EV9++aUQ
+QoiTJ08KJycnleE9e/YU27Ztk+YRGxsrWrduLQ1//fXXxfnz56XhOTk5YsmSJdJdk/P+DAwMxMKF
+C0VGRkaFL5NCoRDffPONaNq0qcp4JiYm4ty5c+L9998XhoaGwsPDQwQHBxd65+ayMjc3F76+vnVq
+X9C2bds685nj4uLEkiVLRNu2bVX2MYaGhqJZs2YCgGjTpo1YtGiR2na3f/9+0blzZ2k6uVwuRo0a
+JY4fP17o+/31119i8uTJol27dmr7lN69e4tffvml1pe5t7e3sLOzq1PblLe3d6F3oq+oY5WTk5PQ
+1dVVW6/y/zVu3Fh06dJFjB07VmzatEk8fPiwRPMvzb7/119/FbNmzRJ6enoq+2tvb28RGxsr/v77
+bzFv3jzRvHlzabiurq7w8PAQFy5cEBkZGeLzzz8X7du3V9m23N3dRWBgoPQ+SqVSLF26VOX43KxZ
+M+n44ezsLNq3by+8vb1FaGioyM3NFcHBweLtt98WAISRkZHw8fFR+Zznzp0TDg4OKp/R1tZWZZsu
+7XGwLOcApRUZGSkAiNjY2ApftwIDA4WBgYGQCbZcpRqgSZMm8PX1Van+UJ2/Lo0ZMwYpKSkq7Tfk
+cjkcHR1x6tQpAC97FLG3t0diYqL0C31tkpWVhT/++ANxcXFo2rQpevToodKjSmVIS0tDWFiY1PNV
+QT38ZGRk4Pr160hISICRkRG6d+9eqp6AyloW169fR1xcHIyNjdGzZ0/I5XLExMTAxMRE5e7KFal7
+9+7w8PCQrhDVBSYmJvD29q5Tn1kIgdu3b+PPP/9EXFwclEoljI2N0bVrV3Tq1KlG3tFdUy1YsAAh
+ISEIDg6uU585Kiqqxnd7Xh37/uK8ePEC169fR7169dCnTx+pDeE///yDdu3alfomzjWxLKKiotC9
+e3fExsZWeK2VY8eO4f3332fXzEQlpVAosGzZMixbtky6HPzq8C+++KLOlIeenh6srKyq9D0bNGgg
+9RZXmHr16klV0qqyLPIaX+bXoUMHbjhUbjKZDJ06dVK5IS0RVf++vzhGRkYYNGiQ2uuVfdsCTSyL
+ysQwQ1QCT58+xZgxYxAWFlZg3XEtLS1YWFhI9WCJiIiIqPKxAwCiYpw5cwbdunVDeHh4kb18LFy4
+kIVFRERExDBDVP0UCgUWL14MJycnJCQkqN3fJI9MJoOJiQlGjBjBQiMiIiKqQqxmRlSAR48eYfz4
+8QgLC5P69y+MtrY2vvjii0pryEdEREREDDNEJXLq1CkMHjwYurq6Jbp5lIGBASZNmsSCIyIiIqpi
+/CmZKB8hBPbv3w8AhVYry09XVxdeXl68ISARERERwwxR9ZLJZNi+fTvWr18PLS2tYquO6ejoYMaM
+GSw4IiIiIoYZIs0wZ84cnDlzBo0aNSr0hoe6urr46KOPauUNMYmIiIgYZohqMEdHR4SGhkJbW7vA
+O2wrlUp4enqyoIiIiIgYZog0z6pVq9CsWTO4uLhAW1tbel1XVxeTJ0/Ga6+9xkIiIiIiYpgh0izr
+1q1DQEAA/ve//+Hw4cNYvnw5tLS0IJPJkJ2dDS8vLxYSEREREcMMkWa5cOECPvvsM/j6+sLGxgYy
+mQze3t4IDAyEEAI2Njbo2LEjC4qIiIioGvE+M0SvuH//PsaOHYtp06Zh6tSpKsOGDh2KW7duISsr
+iwVFRERExDBDpDnS09Ph5uYGMzMzrFu3rsBxzMzMWFBEREREDDNEmsXDwwNPnz7F1atXeSNMIiIi
+IoYZopohr8H/r7/+ipYtW7JAiIiIiBhmiDRfXoP/LVu2wMbGhgVCREREVAOwNzOq84pq8E9ERERE
+DDNEGqkkDf6JiIiISDOxmhnVaWzwT0RERFQLwsz333+P77//Hg0aNGCpUIVTKBTIycnB//73Pxgb
+G2vEMrHBPxEREVEtCTMxMTEIDQ2Fl5cXS4UqXFRUFM6fP4/MzEyNWB42+CciIiKqRWEGAJydnbFq
+1SqWClVKmDl+/LhGLAsb/BMRERHVDuwAgOoUNvgnIiIiqj3YAQDVKWzwT0RERMQwQ1TjsME/ERER
+EcMMUY3DBv9EREREtQ/bzFCtxwb/RERERAwzRDUOG/wTERER1V6sZka1Ghv8ExERETHMENU4bPBP
+RERExDBDVOOwwT8RERFR7cc2M1TrsME/EREREcMMUY3DBv9EREREdQermVGtwgb/RERERAwzRDWO
+pjb4X716NQwMDPgFUYWLioqqc5/54cOHWL16NZKTk7kCUKXsr83Nzevc5z527BhWr17NFaAYycnJ
+uH//Ptq1a4eGDRuyQDTkOMUwQ7WCJjb4b9iwIUxMTHDixAloabFGJ1W8Nm3awMjIqE59ZkdHR9y/
+fx8HDhzgCkAVrl27dujZs2ed+sytWrVCmzZtuE29QqlUIi0tTeUvJycHMpkM+vr66NKlCwupBLKz
+s2FqalqptWUYZqjG09QG/xYWFrh37x6/IKIKdObMGRYCUQX66KOP8NFHH9XpMlAoFIiOjkZ4eDgu
+X76My5cv48aNGxBCoHPnznBycoKVlRVsbGwQFRWFhQsX4urVq1x5NATDDNVobPBPREREpfHo0SMp
+tFy+fBnXrhOJlUQAACAASURBVF1DamoqWrZsCWtra4wfPx42Njbo06cPGjdurDLtjRs3WIAMM0QV
+hw3+iYiIqDCpqam4evUqwsPDERYWhvDwcDx69Aj169dH7969YWVlhdmzZ8Pa2hpt27ZlgTHMEFUd
+TW3wT0RERFWvuOpi1tbWWLhwIWxsbNCtWzfI5TwNZpghqiaa2OCfiIiIqk55qosRwwxRtdHUBv9E
+RERUOVhdjBhmqFZgg38iIqLajdXFiGGGai02+CciIqpdWF2MGGaoTjhx4gT27dvHBv9EREQ1FKuL
+EcMM1Um5ubn48ccfsXXrVjb4JyIiqgFYXYwYZojwssF/eno63n77bTb4JyIi0lCsLkYMM0SvyGvw
+r6WlhUmTJrFAiIiINACrixHDDFEJeHh44NmzZ2jQoAEvPxMREVUDVhcjhhmiMli3bh0CAgLw66+/
+YsiQISwQIiKiKlCS6mLW1tawtLRkdTFimCEqyIULF/DZZ59hy5YtbPBPRERUSVhdjBhmiCrY/fv3
+MXbsWEybNo0N/omIiCoIq4sRw0wVCAoKgomJCbp06cJvpw7Ka/BvZmaGdevWsUCIiIjKiNXFiGGm
+iimVSsydOxc2NjbYs2cPv506KK/B/5UrV6Crq8sCISIiKgFWFyOGGQ1w8uRJxMTEIDY2FqtXr8br
+r7/Ob6gOyd/gv2XLliwQIiKiAigUCty4cUPlqgurixHDjAbYsGEDACAnJwe+vr5Yvnx5lb7/qVOn
+4OTkxLWiGrDBPxERUcFYXYyoBoSZO3fuICgoCNra2lAoFNiyZQu+/PJL1KtXr0re/8CBA9i7dy/D
+TDVgg38iIqKXWF2MqIaGGV9fX1hZWeHNN9/E7t27ER8fj71792LatGlVEqSmT58OBwcHrhFVjA3+
+iYiormJ1Mc329OlThISEYPTo0UWOFxMTg3/++Yc/iNflMJOamoqdO3di/fr1UpgBgP/+97+YOnUq
+ZDJZieclhFAbX6lUQktLq8Dxnz17BmdnZyQlJZVqmYUQEEIUOt/yLFNFTfvqfACUqiyrAhv8ExFR
+XcHqYjXLzZs3MW7cOMTFxaFp06aFjrdnzx4cPXqUYaaaaGnCQvj7+0Mul2Ps2LGwtLSEtbU1ACA6
+Ohpnz54tdvrc3FycP38eHh4eMDc3BwAkJiZi7ty5MDQ0hFwuh4WFBU6fPq0y3eXLl2Fra4s7d+4A
+AEJDQ+Hi4gIXFxfMnz9f7X2ys7Ph6+sLa2tr6OvrQ0dHB127doWPjw+ysrIqZJnKO21+V69excSJ
+E2Fvb4/Bgwejbdu26N27N3bs2CGFm+qU1+D/wIEDbPBPRES1SmpqKi5cuAAfHx+4ubmhdevWaN26
+NSZOnIjQ0FD06dMHO3bsQGxsLJ48eYJffvkFX3zxBQYMGMAgoyFsbGygq6uLkJCQIse7cOECHB0d
+WWDVRfwfb29v4ezsLKqaUqkUXbt2FV5eXtJr/v7+AoAAUOwynTp1Sjg5OUnjt2jRQkRHR4uOHTsK
+R0dH4eLiIurXry8ACB0dHfHHH39I0/7111/i9OnTwtjYWAAQtra24vTp0+L06dMiPDxc5X2ePn0q
++vTpI6ZPny4iIyPFo0ePxKFDh0SLFi0EANG3b1+RlpZW7mUqz7T5bdq0SchkMuHp6SkUCoUQQoi0
+tDRhZ2cnAIgVK1ZU6fccGRkpAIjY2FghhBDnz58XcrlcbN++vUTTGxgYiL179woiIiJNk5ubKyIj
+I4Wfn5+YNm2aMDc3F9ra2kJLS0t06dJFfPDBB2Lz5s0iIiJC5OTksMBqkH79+olPPvlEer53717R
+tm1b6Xl6errQ09MTR44cYWFVscDAQGFgYCCqPcycPXtWyGQycffuXem1zMxMKWAAEDdv3ix2PkOH
+DhUAhL6+vrC0tBQRERHSsD/++ENoa2sLAGLKlClq05qYmAgAYsSIEQXOOzs7W/Tp00eMGjVKKJVK
+lWH79++XltPb27vClqk80z548EAafurUKZVhAQEBAoBo1KiRyMrKqpYwExsbK4yMjIS7u3uJp2eY
+ISIiTfHw4UPxv//9T8yfP1/0799fNGzYUAAQLVu2FCNGjBArVqwQZ86cEUlJSSysGm7x4sWiZ8+e
+hYaZ8+fPC21tbZGQkMDCqqYwU+3VzDZu3AgXFxe0a9dOek1PTw8zZ86Unq9fv77Y+ZiamgIAMjMz
+ERgYCAsLC2lY9+7d0bdvX6kqWWnt3LkTV69exZw5c9TanAwfPhz6+voAgK1btyI3N7dClqk8096+
+fRsKhQIAEBcXpzLM2NgYAJCSkoK7d+9W+fedkZHBBv9ERMTqYlQjODo64o8//kBCQkKBw8+fP4/u
+3bujSZMmLKxqUq0dAMTGxuLw4cM4fvy42rCZM2di5cqVUCgU2LVrF5YvX15k4yttbW21E/b8WrVq
+BQCIj48v9XL6+fkBAMLDwxEdHa02vFmzZnj8+DESEhJw48YNdO/evdzLVJ5p+/Xrh08//RRZWVkY
+OXKkyrD8Yay0nR5UhC+//JIN/omISCOxdzF6lY2NDXR0dBASEgJXV1e14WwvU8fDzJYtW/DGG29g
+0KBBBZ6su7m54cCBA0hPT8f27dvx+eefl/m98nr/EqVs+J6cnIxr165BW1sbT548KXCcMWPGqL1P
+ZS5TcdPK5XJ8++23Kq+lp6cjICAAO3bskF5TKpVV/p0fOXIEFy9eZIN/IiKqduxdjIqjr68Pa2tr
+XLhwQS3MZGRk4PLly/jss89YUHUxzGRkZMDPzw8KhUK6kvGq/FcdNmzYgHnz5lX5ryB3796FEAJK
+pRJr1qxRuWJSE9y7dw/r16/HrVu3MHXqVCxZsqRauw5csWIFbGxsuOUREVGV4s0oqawcHBxw9OhR
+tdcvX76M3Nxc2Nvbs5DqYpgJCAhAamoqfHx8iryasWLFCjx9+hQPHjzAoUOHVK6CVIW0tDQAL6+A
+3L9/H+3bt68RX2xaWhoWLFgAf39/+Pr6Ys2aNZDJZLhw4UK1Ltft27dx//59HiiIiKjSsLoYVSRH
+R0csX74ciYmJKq+zvUwdDjNCCGzYsAFjxozB3Llzixw3Pj4eX331FYCXN9Gs6jBTv3596XFISEiN
+CDNJSUlwdHREREQEgoKCMGTIEI1Ztl69esHExASTJk2Cn58f280QEVG5sboYVaa8djPBwcEqr7O9
+jGaolt7MLl68iIiICEyfPr3YcWfOnAkdHR0AwKVLlxAeHl6ly2pqaio1mvfz8yuyfUtqaqpKL2zV
+ZeXKlYiIiICJiYlGBRkAcHZ2BgD88MMPePPNN3HixAluhUREVGLsXYyqWv52M3ny2ss4ODiwgOpi
+mFm9ejXMzMxKVMewZcuWGD16tPR87dq1ZXrPokJIXljJzs5WG9aoUSNYW1sDAIKDg7Fnz54C55Gb
+m4spU6aUqj1KWRr+l2TavN7h9PT01Ibl5ORU+0oXFRUF4GV7JGdnZ4wcORL//PMPt0YiIlKhUCgQ
+FRWF7du3Y/r06VKVngEDBmD37t1o0qQJFi5ciIiICCQlJeHixYv49ttvMWbMGFZnpgrl4OCA8+fP
+S8/DwsLYXqauhpkrV64gKCgIY8aMUbtnS2Hee+896fGBAwfw999/F7jDK0reSXxBISCvy+fbt29L
+w7Ozs/H48WMAgKenpzTutGnTsH79emRlZUmv3blzBy4uLsjOzoabm1uFLFN5ps3rpezOnTv4888/
+pdczMzOxe/dulV8VyhuqyqJbt2745JNPYGBgAF1dXcTExKBbt2746quvpGUiIiLNdP36ddy5c6dS
+5v3o0SMcPHgQXl5ecHBwQJMmTdC9e3csWrQIL168wPjx43Hy5EkkJCQgOjoaO3bsgLu7OywsLNju
+hSpV3v1m0tPTAbysYtajRw+2l6lrYSYzMxOzZs0CgFLtdPLfUFOhUGDevHlq3QrnDzjPnz9XGSaE
+kE7qk5KSkJycrDLc1tZWmsf8+fNx8OBBvPPOO/j3338BAOPGjcO4ceOkEOHp6YnmzZujR48eMDU1
+hZmZGZKSkuDv768S0MqzTOWZNu/qkBACgwYNwsKFCzF79mz06NEDXbt2lcZbtWoVlixZgh9++KHK
+V7wlS5ZAX18fPXr0wN27dzFr1ixs3rwZXbp0waFDh7hlEhFpmN9//x0jR45Er169EBAQUO75sboY
+1SR57WZu3rwphRlWMdMQ4v94e3sLZ2dnUVn2798vOnfuLAAIAEIul4tRo0aJ48ePFzrNX3/9JSZP
+nizatWsnTZf317t3b/HLL7+I0NBQMX78eJVhXbp0EV9++aUQQoiTJ08KJycnleE9e/YU27Ztk94n
+NjZWtG7dWhr++uuvi/Pnz6ssS05OjliyZIlo1KiRyrwMDAzEwoULRUZGhjRueZapIj5PUlKScHBw
+UBnH2dlZxMTECIVCIczNzaXXR40aJdLT00Vli4yMFABEbGys9NquXbuEvr6+mDx5sqhfv744dOiQ
+mDt3rpDL5WLIkCHi1q1b0rgGBgZi7969goiIqtaVK1eEq6urkMlkwsnJSYSEhJR6Hrm5uSIyMlL4
++fmJadOmCXNzc6GtrS20tLREly5dxAcffCA2b94sIiIiRE5ODgudNFK/fv3EsGHDRJs2bYSenp44
+cuQIC6UaBQYGCgMDAyET/1fHaMGCBYiKikJgYGCdDHVpaWkICwuDnp4eLC0tC2xvArysmnX9+nUk
+JCTAyMgI3bt3L3Tcag6piIyMxJMnT/Dmm2/CxMREGpaSkoLQ0FA0b94cPXv2LHF1v/KIiopC9+7d
+ERsbK9VjFkLA3t4eTZo0QYcOHeDn54fDhw+jRYsWmDNnDkJDQ/HJJ5/gyy+/ROvWreHr66tS5ZCI
+iCpPeHg4vv76axw7dgxDhgzB4sWLpZoMxSmudzErKyv2LkY1zpIlS+Dv74/U1FTEx8cjLi6O1cyq
+0bFjx/D++++DFUz/T4MGDTBgwIBix6tXr16Jd+bVSSaToUePHujRo4fasEaNGlXrjTPzL+PGjRvR
+p08f/PLLLwCAESNG4PDhwzh//jwCAgLw6aefwt/fXyM6LiAiqgvCwsLw9ddfIygoCMOGDUNYWJjU
+EU5BeDNKqivy7jfToEEDtpfRIAwzVK0sLCwwY8YMzJs3D5GRkSqB5t1334Wrqyu+/vprfPPNN1i+
+fLlaux8iIqoYv/32G77++mucPn0azs7OCA8Ph6Wlpco4vBkl1WXW1taQy+VISUlhexmGGaL/b9my
+Zfj555/x7bffSl1v5wWagQMHwsfHB1u2bIFcLoeFhQXmzJmDxYsX8xcRIqIKEBwcjKVLl+LcuXNw
+dXXFlStX0Lt3bwBFVxezsrLizSipTqlXrx7Mzc1x7do1hhmGGaL/z9DQECtXroSnpycmT55cYKDR
+0tKCt7c39PX1MW/ePAQEBGD9+vUq9yAiIqKS+/XXX7F06VJcuHABI0eOREhICLKzs3H27FmsWLGC
+1cWICtC8eXMA4P1lGGaIVH344YfYunUrPv30Uxw4cEAt0ORxc3PD0KFDsXLlSrXuuYmIqHjnz5/H
+0qVLERwcjF69emHUqFG4c+cO+vXrx+piRMXw9vbGixcvWDuEYYZIlZaWFjZs2IC+ffvi1KlTcHJy
+Ugk0+Xtcq1evHpYtW8ZCIyIqhb1792LChAkAAG1tbSiVSsTGxsLIyAgjR47E2rVrWV2MqBjW1tYY
+PHgwC4JhhkidjY0NPvzwQ3h6eiIyMhI6OjpSoFm/fr10o9Ca4s8//8TEiRPRrFkzaGlp8QumCvf8
++XMsX74crq6udeYzv/POO3j48CFPuEshOTkZMTEx0o2ggZcN+QHgxYsXCAoKQlBQEJYtWwaZTCZd
+hcn7r6Ojo/JcLpdDJpNBW1sbLVq0gKGhYa0pq3///RdvvfUWNmzYUGfWj++//x7r16+HsbExN5YS
+ysrKwqBBg1gQJZCdnY309HQcO3as0tYxhhnSKCtXroSZmRnWrVuHzz77DDKZDGvXrsWWLVuwZs0a
+vP322xg4cGCN+CyJiYm4fv063N3dYWBgwC+XKtzq1avx8OHDOvWZDx48iMaNG8PDw4MrQCk4OjpK
+ISYnJ0f6r1QqkZ2drfZfCIGsrCzpf94JHABkZmZK/9u3b4+OHTvWqm0qOzu7Tq0bMTExiIyMhJeX
+FzcUqnBRUVG4ePGitN9gmKFaz8jICEuXLsWXX36Jd999F61atYJMJoO+vj5sbW1VOgWoKVatWsUw
+Q5Xi+PHjde4zt23bFt7e3gwzVClkMhlCQkLq3Od2dnbGqlWruAJQpYSZyj5Wse4LaZzZs2ejQ4cO
+mD9/vsrrEyZMwPTp0zFixAicOXOGBUVERERUxzHMkMbR1tbG+vXrERAQgODgYOn1vCpnDDRERERE
+BLCaGWkoe3t7zJw5E8+ePVN5PS/QAKiRVc6IiIiIiGGG6gBfX98CX2egISIiIiKGGaqxGGiIiIiI
+iGGGGGiIiIiIiGGGiIGGiIiIiBhmiBhoiIiIiIhhhhhoiIiIiIhhhoiBhoiIiIgYZogYaIiIiIiI
+YYaIgYaIiIiIYYaIgYaIiIiIGGaIGGiIiIiIiGGGiIGGiIiIiBhmiIGGiIiIiBhmiBhoiIiIiIhh
+hoiBhoiIiIgYZogYaIiIiIgYZogYaIiIiIiIYYaIgYaIiIiIGGaIGGiIiIiIGGaIGGiIiIiIiGGG
+iIGGiIiIiBhmiBhoiIiIiIhhhoiBhoiIiIhhhoiBhoiIiIiqNcxcuHABZ86cYalQhYuKimKgISIi
+IqLKCzNpaWkYNGgQS4XqPAYaIiIiohoUZv7zn//gP//5D0uEiIGGiIiIqGaFGSJioCmMEALh4eE4
+ceIEnj17BmNjY1haWuLtt99GvXr1kJiYiJ9//hnTpk2Tpnnw4AGSkpLK/J6GhoZ47bXXih0vKysL
+ISEhuHr1Kh49egSFQgFDQ0N06NABNjY26NixI2QyGZ48eYJTp05h8uTJXLGpWgUFBcHExARdunTR
+mGV69uwZAgMDce7cOezdu1dl2L179+Dh4QEDAwNs3boVBgYG/BKpytbLFy9eFDq8adOmaNWqVYHH
+rOjo6AKnad++PRo0aFBh63ZR2w4xzBAx0GiAFy9eYNKkSThx4gSaN28OW1tb3L9/H76+vsjKyoKL
+iwsePnwIuVyuEmb+/PNPBAYG4qeffkJCQoLKPLW0tCCTyaTnSqUSQgiVcT7++GOp3Avy77//wsfH
+B9u2bUNCQgKaNGkCS0tLGBsb48GDB9i+fTuePHkCU1NT2NnZISwsDD179mSYoWqlVCoxd+5c2NjY
+YM+ePdW+PFu3bsX27dtx7do1CCFgaGioNs6aNWtw4sQJAICtrS08PT35RVKV+Oeff+Dn54ddu3ap
+HCM6deqE8ePHo1+/foWGmaCgIPz22284fPgwAMDAwACzZs3CRx99JIWZ8qzbJdl2qIoIohrAwMBA
+7N27t1qXQalUCk9PT1G/fn1x+vTpYscPDg4WAERiYmKNLfeMjAxhYWEhAIjJkyeLjIwMaVh2drbY
+vHmzqFevngAgOnfuXOA8QkJCBADpLyQkpMDxsrKyxO3bt8WSJUsEADFr1qxCl+vEiRPC2NhYABAt
+W7YUe/bsEdnZ2SrjKBQKcfToUfHGG29I7+3q6lqrtgtzc3Ph6+tbp/YFbdu2rdGf+fjx4wKA0NHR
+EY8ePar25VEqlSIpKUl07dpVABCGhoZq42zbtk0AEDKZTJw9e7ZWr1/e3t7Czs6uTm1T3t7ewtnZ
+WaOX0cfHR+U48tNPP5V42i5duggA4sSJExW6bpdk2yEhIiMjBQARGxtb4fMODAwUBgYGQotxjqh0
+V2imT5+OESNG1Ime/7Zt24br16+jSZMm2Lx5M/T19aVhOjo6cHd3x5kzZ1CvXj08evSowHlYWVmp
+PC/oVzQA0NXVRceOHfHVV19h8uTJyMnJKXC8H3/8EcOGDcPz58/RtWtXREREYMKECdDR0VG7+uPi
+4oJr167BxsYGAJCens4VmarVhg0bAAA5OTnw9fXViP1a48aN0b1790LHmT59Oi5fvoyoqCi8/fbb
+/BKpys2bNw9vvvmm9DwkJKSkP9gjLi4OdnZ2GDx4cIWu2yXZdqhqMMwQMdAU6siRIwAAU1NT1KtX
+r8Bx3nrrLXzzzTdISUlBSkqK2nAdHR1oaZVuVzNu3DhkZ2ervX716lVMmTIFSqUSDRs2xNGjR9Gy
+Zcsi59WkSRMcOXIERkZGSEtL40pM1ebOnTsICgqCtrY2AGDLli3IyMjQjJOBYrZRKysrdO3alV8i
+VQu5XI7FixdLz/39/Uu0Pw8PD8fz58/x0UcfVdq6XdrjGzHMEDHQVKHHjx9LJ2FFXdWYPn06WrRo
+IY1fUJmVhqOjI5YuXarymlKpxIwZM6QrNvPnz0f79u1LND8jIyN4eXnxygxVK19fX1hZWWHChAkA
+gPj4eDYYJiqh0aNHw8TEBACQlJSEHTt2FDvN999/j+bNm2PkyJEsQIYZIqqLgaZJkyYAgOTkZCxY
+sKDQ8XR1dTFp0iT8+++/5X7PFy9eQF9fXzpo5Tlx4gQiIiIAANra2nB3dy/VfCdNmoSsrCyuvFQt
+UlNTsXPnTsyePRuzZ8+WXv/vf/+r1vlFVRBCQKlUlmm6kirL/IkKI5fL8fHHH0vP165dC4VCUej4
+KSkp+PHHHzF58mTo6elV2Lpd1m2nPNNyW2KYIWKgKaP8N9Fdv349Pv744wKrfwGAj48PbG1ty/V+
+SqWy0J7ifvzxR+mxra0tjIyMSjVvIyMj7Ny5kysuVQt/f3/I5XKMHTsWlpaWsLa2BgBER0fj7Nmz
+RU67f/9+jB8/Hi4uLnBxcVGpbpOUlIS5c+di+PDh0vBXr2rmd+zYMQwcOBCvvfYaOnTogJ49e+LA
+gQNFvn9KSgp++OEHDB48GOvWrSvyRC0wMBDDhw+Hqakp2rdvj8aNG6N///7w8/MrtB0cUUlNnTpV
+6j757t27Uk9lhR0z0tLSMH369HKv22XddgAgOzsbvr6+sLa2hr6+PnR0dNC1a1f4+PgU+gMbt6XS
+p0Qi9mZWCb2c1YbezOLi4sRrr72m0ouMhYWFCA8PL9V8tLW1penv3r1b6HghISHCxMSkwGGtWrWS
+5uHp6cmNgr2Z1RhKpVJ07dpVeHl5Sa/5+/tL63NJepK6d++ekMvlAoAYPHiw2vDo6GhpOytofgqF
+QsyePVvI5XKxZcsWqfe/6OhoYWFhIRo1aqTWI1N0dLQYO3as0NfXl5b1m2++KXD50tPTxejRo4We
+np7YvXu3yMnJEUIIcfv2bdG3b18BQPTo0aNSejRib2a1vzez/D7//HNpfezbt2+h21zPnj1Fv379
+ChxemnW7LNtOnqdPn4o+ffqI6dOni8jISPHo0SNx6NAh0aJFC2n509LSauW2VJW9mTHMEMNMJQWa
+2hBmhBDi999/F82bN1cJNADExIkTxcOHD0sdZg4ePChCQ0NV/n799VexZcsW0a5duwLDTHJyssp7
+r1mzhhsFw0yNcfbsWSGTyVSCfGZmptS9OABx8+bNYudjampaaJgRQggTE5NCw8yiRYsK3XYeP34s
+GjRooHZClpqaKjIzM8Xu3buLPOFTKpVi7NixAkCB301KSoro3LmzACA6deokUlJSGGYYZsrswYMH
+UrAHIMLCwtTGuXLligAg/P39C5xHSdftsm47Qry8fUGfPn3EqFGjhFKpVBm2f/9+6X29vb1r5bZU
+lWGG1cyIWOWsSD179sS1a9fUuq3cs2cPOnfujG3btpWqHr2bmxtsbW1V/vr37w93d3fcu3evwGni
+4uJUnjds2JArHdUYGzduhIuLC9q1aye9pqenh5kzZ6pU4yyOXC4v0/C//voLK1euhKGhYYG9Or32
+2msYMGCA2usNGjSAnp4e7O3ti3zfoKAg7N+/H02bNsXUqVPVhjds2BA+Pj4AgFu3buE///kPVwoq
+s9atW2PcuHHS8++++05tnK1bt6Jp06Z45513CpxHSdftsm47ALBz505cvXoVc+bMUesEZ/jw4dKt
+DrZu3Yrc3FxuS+XAMENUSYHm6tWrteaztW3bFmfOnMG+ffvwxhtvSK+npqZi5syZRd4X5lU3b95E
+RkaG9Jeeno6kpCRcvXoVffv2LdE8imr0SaRJYmNjcfjwYZVG/3lmzpwpddO8a9cuJCQkVMoyrFu3
+DgqFAgMHDoSurm6B4zRq1KjQ6V+9h9Or8u6XY2VlVej8hw0bBmNjY7WTN6Ky+OSTT6THP//8M2Jj
+Y6XnycnJ+OmnnzBx4kSVe6OVZd0uz7bj5+cH4GX30Bs3blT58/PzQ7NmzQAACQkJuHHjBrclhhki
+zQs0RfX+VVM/29ixY3Hjxg0sXbpU5VfgPXv2YPLkySW6QqOnpwd9fX3pr169emjcuDF69+6NzZs3
+FzhN06ZNVZ6/eqWGSFNt2bIFb7zxhkpnGnlatWoFNzc3AC9v6Lp9+/YKf38hhNRIunPnzhU+f6VS
+iQsXLgAAmjdvXuh42tra6N+/P4CXXVJHR0dz5aAy69WrFxwcHKR1MP+VzZI0/K/sbSc5ORnXrl2D
+trY2njx5gpiYGLW/MWPGwNPTE56entDS0uK2VA5ybhJElRNoHj9+XKKeTmoaPT09LF68GEOGDMGI
+ESPw9OlTAMBPP/2EUaNGYcyYMWWed7du3dCqVSu115s0aQJjY2M8f/4cABATE8MVjTReRkYG/Pz8
+oFAoCr1LeHx8vPR4w4YNmDdvXrHVyUrjxYsX0jZa1NWXsoqPj5duXljcL8SdOnWSHj98+BA9evTg
+SkJl9umnn0on/35+fliyZAkaNWqErVu3wtbWFt26dau2befu3btSN8xr1qyRrsAW937clhhmiDQq
+0MyZM6dGh5nU1FQ0aNCg0BteWllZITg4GLa2ttKVEl9f33KFGZlMht9++63AYXZ2djh48CAAIDQ0
+lCsZDE9QJwAAGAdJREFUabyAgACkpqbCx8enyLuEr1ixAk+fPsWDBw9w6NChcm1Dr8p/FTMzM7PC
+P2P+Kp95J36FMTQ0lB6X5OSOqCjDhg1Dp06dcOvWLaSkpGD79u2wt7fH9evXK6Qb/vJsO3mhRAiB
++/fvl+gGz9yWGGaINDLQ1GTu7u6YNWsW3nrrrULH6dChA3x8fPDhhx8CAP74448KXYaHDx/CyMgI
+enp6GDt2rBRm7ty5g6ioKJibm3NFI40khMCGDRswZswYzJ07t8hx4+Pj8dVXXwF4eRPNigwz+W8W
++Pfff1f452zevDl0dHSQk5OD6OhoCCEK3fflv/Ff/rZ3RGWhpaWFTz75ROpIY926dYiMjETjxo0r
+ZBsqz7ZTv3596XFISEiJwgy3pXKsC9wciKiwoLJ8+fJix8t/o8yS3GW5pJKSkmBpaSldbndzc1M5
+IBTUgw2Rprh48SIiIiJKVG9/5syZUkPkS5cuITw8vMwB6lWtW7eW5n3hwoVS9TxYEnK5XLoBaFxc
+nNSQuSBPnjwB8LKtUMeOHbmSULlNnDhRal9y//597N69GxMmTECDBg3KPe/ybDumpqZSEPHz8yty
+2ryOdLgtMcwQUSWEmaCgoCLvsAy8bBeQx8bGpsQnWcVZtGgROnfuLB2UdHR0sGnTJmn4rl27cPr0
+6RLPLzExEa6urnj27Bm/XKp0q1evhpmZWbFdvwJAy5YtMXr0aOn52rVrCxwvrzpJenp6gdtYSkqK
+2ut6enpSY+G7d+8iKCioyG20oG21uO33gw8+kB4HBAQUOl5eSHN3d6/xV65JM9SrVw+zZs1Sea00
+Df+LWrfLs+00atRICibBwcHYs2dPgdPm5uZiypQpcHJy4rbEMENElRFm8nauRf1ClL9dUP7uMvNk
+ZmaqXBIv7sQor3rOhg0b1HqAGjp0KNasWSM9d3NzQ2BgYLGfJTQ0FJaWlujXrx9atGjBL5cq1ZUr
+VxAUFIQxY8aU+ETjvffeU9mmCqrWkndl8vLly7h9+7b0ukKhwIoVK6SQk79TAQCYN2+e9NjDwwMP
+Hz5UC/ohISEAXvbClJqaqjI8f7frBTVMnjRpEiwtLQEAmzdvRmJioto4t2/fRnBwMMzMzFSWh6i8
+Zs2aJdUK6NOnDywsLEo8bXHrdnm2HU9PT+nxtGnTsH79emRlZUmv3blzBy4uLsjOzpZ6NeS2xDBD
+RJUQZhISEmBnZ4effvpJpYGiUqnEjh07pBt4LV++vMBfoV8NGzt27EBYWBhiYmJw79493L17Fzdv
+3sTFixexadMm2NvbS20MBg4cqDa/Tz75BPv27UOzZs2QmpoKV1dXuLm54eTJkyq/WKempiIoKAjv
+vPMOXFxcsGzZMnz++ef8YqlSZWZmSr8Ul6ZXsvw31FQoFJg3b57KjwB5PywAQHZ2Nuzs7ODl5QVv
+b2+Ym5tDCCF1VRsWFob3338fx44dA/CyobSHhweAl1VxevXqhRUrViAwMBDbtm2Do6MjDAwMpBO6
+7t27q9zQM/+PGbdu3VJbdrlcjkOHDqFTp06Ij4/HhAkTpAbQefuQiRMnok2bNggMDKyQKkBEeVq0
+aIGJEycCAGbMmFGqaYtbt8uz7YwbN066uWdOTg48PT3RvHlz9OjRA6ampjAzM0NSUhL8/f2lHz24
+LZWRIKoBDAwMxN69e2vUMgcHBwsAIjExsUaWuVKpFAYGBmLJkiXCy8tLmJmZiRYtWoghQ4YIV1dX
+0bZtWwFAmJqaip9//lltej8/PzFgwAChra0tAJT6z8DAQOTm5ha6fHFxcWLJkiXScgAQMplMGBoa
+imbNmgkAok2bNmLRokUiLi6uVm4X5ubmwtfXt07tC9q2bauxn3n//v2ic+fO0vool8vFqFGjxPHj
+xwud5q+//hKTJ08W7dq1U9sGevfuLX755ReVbXLp0qVCLpdL4zRr1kxs27ZNCCGEs7OzaN++vfD2
+9hahoaEq249CoRDffPONaNq0qcp7mJiYiHPnzon33/9/7d1/dNV1/cDxF9vdxgZzTn4VET/q2DIC
+A9EgEaxOBxI9kOmRUA6dOqEQET+UMDrQOXKC6mQHS4qAlHM8mgdPHTMMzBN4GHFU1CUJyVkonFJ+
+jDMYsrGN7dMfftnXxa8pG9y7PR5/7W73fnbv+3Mv7LnPfX12e9KtW7dk2rRpyebNm5OGhoZk06ZN
+yYwZM5Lu3bs3e41NnDgxeeyxx055LEeOHElmz56dFBYWJr17906mTp2afOMb30j69euX3HXXXRnx
+Opw/f34ycuTIDvWamj9/fjJu3LiMfgw7duxICgsLk6qqqhZd//08tz/Ia+ek+vr6ZNGiRUlhYeEp
+/7/94Ac/SGpqak57/9rDa+mkV199NYmIZM+ePa2+7T/96U9JUVFR0ilp7WlAaAOXXnppLF++vNlb
+MdJdaWlpXHfddXH48OGm39xkmqeffjpuuOGGk7/4iF27dkVZWVkcOnQo8vPzY9CgQTF06NCznnb2
+AvxCJnbt2hX/+Mc/oqKiIhobG6Nnz54xcODAKCkpadfvJx48eHBMmzat6TeHHUG/fv1i/vz5Heox
+/6+DBw9GWVlZ5Ofnx7Bhw5r+yvnu3bujf//+Z3091tbWRllZWVRUVETPnj1jyJAhkUqlory8PPr1
+63fOv4jeErW1tfH3v/89Kioqori4OK688spmZ3dKZ/fee2+UlpbG5s2bO8zz6d57743t27e36C27
+6ezll1+OoUOHttn2z+e1U1NTE2VlZVFZWRk9evSIwYMHt+iEOZn8Wjpp+/btMXjw4NizZ0/07du3
+Vbe9bt26uP32252aGTizkyET8e6ppktKSpr9sa50kK73C9pKjx49Tpkni2jZKVrz8vKaBpPf6+Tb
+SltDXl5eXHPNNXYUF1Rbhsz5vnby8/ObnfnTa6l1mZkBAADEDAAAgJgBAAAQMwAAgJgBAAAQMwAA
+AGIGAAAQMwAAAGIGAABAzAAAAIgZAABAzAAAAIgZAAAAMQMAAIgZAAAAMQMAACBmAAAAMQMAACBm
+AAAAxAwAAICYAQAAxAwAAICYAQAAEDMAAICYAQAAEDMAAABiBgAAEDMAAABiBgAAQMwAAACIGQAA
+QMwAAACIGQAAADEDAACIGQAAgDSTsgTQttauXRtFRUUWgla3ffv2DveYKyoqYu3atdG9e/cO85gb
+Ghri+PHj0aVLF096/163iXXr1sXatWs9AVqguro68vPzo1OnThYjTf6fEjPQVi+u1Lsvr9mzZ0dO
+To4FoU3k5eV1qMdbVFQUGzdujLKysg7zmOvq6qK6ujry8vKic+fOfohqQ5WVlTF69OgO9Zjz8/Mj
+IuLOO+/0BDiH2traqKmpiYKCgsjNzbUgLXDkyJGIiMjKars3g4kZaCPDhw+PJEksBLSit956q8M9
+5rq6uli1alUsXbo0qqqqYubMmTFr1qwoLi72hOC8LVy4MBYuXGghzmL9+vUxd+7c2Lt3b9x3330x
+Z86cpgjk4jMzAwBpLDc3N6ZPnx7l5eXxox/9KB566KEYMGBALFq0KCorKy0QtJHy8vIYP358jBs3
+LoYPHx67du2KBQsWCBkxAwCIGkhPhw8fjrvvvjsGDhwYVVVVsW3btli9enV8+MMftjhiBgAQNZB+
+Tpw4EcuXL4/LL7881q5dG2vWrImNGzfGkCFDLI6YAQBEDaSn9evXx5VXXhnf+973YtasWfHPf/4z
+Jk6caGHEDAAgaiA9mYsRMwCAqIGMYi5GzAAAogYyirkYMQMAiBrIOOZixAwAIGogo5iLETMAQDuN
+moULF4oa2iVzMWIGAGjnUfPwww+LGtoVczFiBgAQNZBxzMWIGQBA1IgaMkp5eXlMmDDBXIyYAQBE
+jaghM7x3LubIkSPmYsQMACBqRA3pzVwMYgYAEDVkHHMxiBkAQNSQUczFIGYAAFFDRjEXg5gBAFo9
+apYsWSJqaDPmYhAzAECbRc20adNEDW3CXAwt1SlJksQykPZP1E6dYvDgwTFp0iSLAZCGGhoa4sUX
+X4yNGzdGbW1tXHvttTFy5EjzDLwvhw4dinXr1sXOnTvjqquuijFjxkRhYaGF4RTr1q2LzZs3ixky
+wzXXXBNVVVXRtWtXiwGQxpIkiYqKiti3b180NDREz549o1evXpGdnW1xOGsMv/3223HgwIHo2rVr
+9OnTJwoKCiwMZ1RdXR1du3YVMwBA66urq4vVq1fHkiVLoqqqKmbOnBmzZ8+O4uJii0OTEydOxG9+
+85tYtGhRFBQUxI9//GNvJ+N9ETMAgKjhglu/fn3MnTs39u7dG/Pnz485c+Z4WyJiBgAQNaSv8vLy
+uPvuu+Opp56Kr3/967F48WKnWeYDczYzAKDNOfsZ/l4MbcGRGQDggnOkpuMwF4OYAQBEDRnHXAxi
+BgAQNWQUczFcKGZmAICLzkxN+2AuhgvNkRkAIO04UpNZzMUgZgAARE3GMReDmAEAEDUZxVwM6cDM
+DACQ9szUpA9zMaQTR2YAgIzjSM2FZy4GMQMAIGoyjrkYxAwAgKjJKOZiSHdmZgCAjGempnWZiyFT
+ODIDALQ7jtR8MP87F7N06dL42te+ZmEQMwAAoiZ9mYtBzAAAiJqMYi6GTGZmBgBo98zUnMpcDO2B
+IzMAQIfTHo/UvPnmm/Hmm2/G9ddff9brmYtBzAAAdPCoOXbsWDz77LMxfvz4i/44du3aFSUlJU2P
+KScn57TXMxdDe+NtZgBAh3U+bz/75S9/GRMmTIilS5de1Mewc+fOuPbaayM7OzuysrLigQceOOU6
+5eXlMWHChBg3blwMHz48du3aFQsWLBAyZDxHZgAA/k9Lj9QcO3YsPvrRj0ZlZWVkZ2fHlClTYsWK
+FZFKpS7o/S0rK4svfOELcfTo0Thx4kRERHTp0iXeeOON6NGjRxw+fDgWL14cv/jFL+Jzn/tc3H//
+/TFkyBA7GjEDANBRo+anP/1pLFiwIOrr6yMiIicnJ0aOHBl/+MMfoqio6ILcx5deeik+//nPR3V1
+dTQ0NDR9Pjc3NyZNmhRXX321uRjEDACAqPn/qLnzzjtj0KBBp7wNLTc3NwYMGBDPPPNM9O3bt03v
+15YtW2LMmDFx/PjxZiHzXgUFBfH973/fXAxiBgBA1LwbNQcPHoz6+vrTRkROTk5ccsklsWHDhrjq
+qqva5L5s2rQpvvzlL0ddXV00Njae9jpZWVkxZMiQ2LZtm52HmAEAIKKysjL69OkT1dXVZ7xOdnZ2
+pFKpePzxx1v9TGcbNmyI8ePHR319/RlD5r1B87vf/S5uvfVWO452y9nMAABaaNWqVU1zMmfS0NAQ
+dXV1cfPNN8eyZcta7Xs/9dRTceONN571iMx7JUkS3/3ud+P48eN2HGIGAKAjO3bsWCxZsuScMXMy
+JBobG2POnDnx7W9/+4xzLS31xBNPxM033xwnTpyIlr6pJkmSePvtty/6qaNBzAAAXGTLli2Lo0eP
+vq/bNDY2xsqVK+Omm26Kd9555wN930ceeSRuu+229xVEeXl50alTp4iIeO2118JUAe2VmRkAgHOo
+qamJgoKCiHj3rGV1dXXv6/a5ubnxiU98IjZs2BC9e/du8e1Wr14dU6dObdF8TCqVirq6uhgwYEBM
+mDAhxo4dG6NGjYrOnTvbgYgZAICObN++fbF169bYunVrPPfcc1FWVhZ1dXWRl5fXooH8nJycKC4u
+jmeffTYGDRp0zu/34IMPxne+850zHlXJy8uLurq6yM3NjS9+8Ytx0003xdixY6N///52FmIGAIAz
+q6+vj5dffjmef/75+Nvf/habNm2K/fv3RyqViuzs7KitrT3lNllZWZGXlxe///3vY+zYsWfc9s9+
+9rOYN29es0By9AXEDABAm/nPf/4TL7zwQmzZsiU2bdoUr776atTX10fnzp2jtra22VGWX/3qV3HX
+XXedso377rsvFi5cGBHvnua5sbHR0RcQMwAAF1ZdXV288sorsXXr1ti8eXOUlpbGgQMHmr7+pS99
+KdavXx9ZWe+ek2natGnx61//OiIi+vfvH1/5ylccfQExAwBcKPv27Ys//vGPFuIMDh8+HP/617/i
+6aefjoaGhujbt2/ccccdsX79+vj3v/8dH/rQh2L06NHRrVs3i3UWI0eOjE996lMWQsyIGQCg9ZSW
+lsZ1110XvXr1ii5duliQczj5N2mysrKaTqfM2e3evTuWL18e06ZNsxgdXMoSAABt4fXXX4+ioiIL
+QasbPHiwRSAi/NFMAABAzAAAAIgZAAAAMQMAAIgZAAAAMQMAACBmAAAAMQMAACBmAAAAxAwAAICY
+AQAAxAwAAICYAQAAEDMAAICYAQAAEDMAAABiBgAAEDMAAABiBgAAQMwAAACIGQAAQMwAAACIGQAA
+ADEDAACIGQAAADEDAAAgZgAAADEDAAAgZgAAAMQMAACAmAEAANqBlCUAADh/VVVVsXfv3vP7wSyV
+ik9+8pOxf//+OHjw4BmvV1xcHB/5yEdO+XySJPHaa6+d9jYDBgyILl262FGIGQAAmvvLX/4St9xy
+y3lto2fPnrF///7YvXt3rFy5Mh5++OFIkqTp6yUlJTFx4sQYNWrUGWPmz3/+c2zZsiWefPLJiIgo
+KiqK6dOnx4wZM8QMYgYAgFPV1NRERETv3r1jwYIF8dnPfjYuu+yySKVSUVpaGpMmTYqIiI997GPx
+3HPPRZIkcfz48XjrrbfiySefjGXLljVtY8SIETFixIi44oorYt68eU3f44c//GFMnDjxjPchKysr
+7rnnnrjnnnti4MCBsWPHjnj88cdjzJgxdhBiBgCA06uuro6cnJz461//GiUlJc2+1qNHj6aPc3Jy
+ok+fPk2XL7/88hg9enRceumlsXjx4ma3mz17djz00EOxc+fOiIgoLS09a8yclCRJVFRUxMiRI4UM
+7ZoTAAAAtIKampoYP378KSHTUjNmzIjGxsZoaGho+lwqlYqFCxc2XX7kkUfi2LFj59zWCy+8EAcO
+HIgZM2bYMYgZAADOHTPjxo37wLe/7LLLYtiwYXH8+PFmn7/llluiX79+ERFx5MiR+O1vf3vOba1e
+vTq6d+8eEyZMsGMQMwAAnN28efNiypQp57WNLVu2REFBQbPPpVKpmDVrVtPln//8582O3vyvo0eP
+xqOPPhpTpkyJvLw8OwYxAwDAOX6oysqKTp06ndc2srOzT7uNb37zm1FUVBQREW+88UbTmcpO59FH
+H41jx47Ft771LTsFMQMAwMVVWFgYU6dObbp8//33n/Z6SZLEihUrYtSoUR94dgfEDAAArWrmzJmR
+Sr17ItotW7bE888/f8p1XnrppXjllVeahQ+IGQAALqo+ffrEbbfd1nT5dEdnVqxYEcXFxfHVr37V
+giFmAABIH3PmzGn6+Iknnog9e/Y0Xa6qqorHHnssJk+eHJ07d7ZYiBkAANLH0KFD4/rrr4+IiMbG
+xnjggQeavmbwHzEDAEBamzt3btPHK1eujKqqqqbB/xEjRsSnP/1pi4SYAQAg/dxwww1NZyo7evRo
+rFq1KrZt2xZlZWUG/xEzAACk8Q9vWVnNZmeWLVsWDz74YFxyySVx6623WiDEDAAA6Wvy5MnRvXv3
+iIjYu3dvrFmzJu64447o0qWLxUHMAADQehobG1t1e/n5+TF9+vRmnzP4j5gBAKDVvfPOO00f19TU
+tMo2p0+fHnl5eRERMWzYsPjMZz5joREzAAC0rmeeeabp471798aOHTvOe5u9evWKyZMnR0QY/EfM
+AADQeg4dOhRTpkyJIUOGxIoVK5p97eqrr44bb7wxfvKTn5zX95gzZ04UFhbGxIkTLTgdUsoSAAC0
+vm7dusWaNWva9HtcccUVsWnTpigsLLTgdEiOzAAAZLChQ4daBMQMAACAmAEAABAzAAAAYgYAABAz
+AAAAYgYAAEDMAAAAYgYAAEDMAAAAiBkAAEDMAAAAiBkAAAAxAwAAIGYAAAAxAwAAIGYAAADEDAAA
+IGYAAADEDAAAgJgBAADEjCUAAADEDAAAgJgBAAAQMwAAgJgBAAAQMwAAAGIGAAAQMwAAAGIGAABA
+zAAAAIgZAABAzAAAAIgZAAAAMQMAAIgZAACA9JCyBABAW+jVq1cUFBRYCFpdZWWlRSAiIjolSZJY
+BgCgtVRUVMTGjRstBG1q6NCh8fGPf9xCiBkxAwAAZB4zMwAAQEZKvb79xaYLJYOutiIAAEBGcGQG
+AADISP8FpxZnWS0U37cAAAAASUVORK5CYII=
diff --git a/Documentation/DocBook/media/fieldseq_bt.gif.b64 b/Documentation/DocBook/media/fieldseq_bt.gif.b64
new file mode 100644
index 000000000..b5b557b88
--- /dev/null
+++ b/Documentation/DocBook/media/fieldseq_bt.gif.b64
@@ -0,0 +1,447 @@
+R0lGODlhcwKfAucAAAAAAElJDK+vr0gSElYMDC8kDV5bEBcHOwYGSEQODmEaGgoKOBkTVC0tVyAg
+aDcJC6Ojoys8DAAYGqSkxV9fFFtdEJmZmUA4EF0wMAAAcAoTHTZHJ0gYGAcMTwcSO29ISFUHB2AV
+FXd3YAcHMRUVQiIAGg4HT3t7eywOJ3d3dwcHSEEgABMuDnd3OGpkSQAAYlZGBzEEBGJlDCstCxwc
+WQcHSzkRGWBtYC0AACA3ABAKNhAQTTMwDA0VQD4AAEYVFVVVVSQMJQULOB8fQScnYBgYRD5VPmZm
+DEZRB2ZiDAoKSgAAVAwQOH5+lBwcS+7u7hoaST4+X3d3WACPADMzMyBRIDgAAGBgc0JCEHEAAEwN
+DRkwDAoKOR8kPZR7eyA1IABpABgNQBA9EABVAAsLRww/DAwMPgBNAENDCgc9B8zMzAUFQQBDAD4M
+DAwOKgAAcQA5AEtLFYqKAA0NTC8HBxEREQgfCAArAAApACIqMkkGBhoqKnwAAAsGQ6qqqkoKCg4O
+MlkcHAoZJCcrW6SkpFQAAAAAOBAOSwAVGh0ROgMPHWZmB00QEGUAAFQaGjEyC2w4OLe3n4qKioiI
+iBAVMC4uXhkZUGIAAHJYWHd3AAAAPhAQUQUGL0BAIGggIBgAGkIVFV9fEAwcJR8KJA8MU9EAAAcH
+VRoaYWhoaDcAALu7AGZmZnAAAGRkZGQVFVhqWD4KCgwOUzMzDAAAmgklBzEHBzExClhYWBMTPAYJ
+Qy8fCFpaB///////ACISRExUDUQrDAwMVhISSEYYGHd3IDhcOERERElJAAkPNTsHF1hYckgGBj05
+CFYAADg4OCAVO0hCDDAwMLu7ilpaDR8qCDg+EBxGHN3d3REGNjo9CDQ8DBwYRGZmHFMAABQ+FBE+
+ESIiIhs+BxU0FWVeBw04DYqKsxAsEB8hQAwuDAc2BwwqDAoqCgcIL1dMDQAA0Q0iDQwiDAckBxAQ
+EDwAAAAAU0JCDAkJPru7u5oAADg4bAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAALwALAAAAABzAp8C
+AAj+AHkJHEiwoMGDCBMqXMiwocOHECNKnEixosWLGDNq3Mixo8ePIEOKHEmypMmTKFOqXMmypcuX
+MGPKnEmzps2bOHPq3Mkz5z0AQIMCSNHyZ0WjE5GqNAZAqcGmT+8VZMoL6k6rEp0KfEJl489VBcEB
+GGjBwk8LBJsyFQqU15MUdQDUWfVk4JNVccER5ZXCT8+/gAN/xFp0LEWtDxEfRKo44s8n1/YeJCyQ
+8GO+1xhSbvwxRWaklBEiXoVW4886BNW0FQjkyem6laUKdLqKSuZrQIxtpbLqc51JbsHBFkw8pYUT
+w4uHDK2SM0PnCqHPNiz9uWGFoS1fb7h5u0nQshf+ar2G2isAKn4FpqByHQivn8YkY3WK9RoANXx1
+kwUncBVw5QCSdAsA8jiDXIAeEQYXAMbgp0YKKQAFYVxEPbjgXm/FBURmD1pQxz1qsDfUdAVlCMCG
+vFg4lhpMgbOKYX6IBY5fHX642FBx0cULbnKlUFdQkgS1IxA91mWMBWJRYQF7dZQ2HVBIsdhjbG4R
+WUeE4f3UFlQN6hUiUK1puV1Q93Sp24LglNYlAGmmKGJrvBxJ4YWxiUkmLzGymZ6ULnqXAlgC5Tmj
+QRE2qd+NkwDKCziTwAjcT6rhV1WW28013D11UfHfVrItieCnHw0YVIEHgnoRVutllgJ/X+F54hP+
+fjT1FWR68WXbXV3dU4cxmBoDnGpSaZUqru/NJRUVrV3DXrHBAnCNrrwmN9Cs17jIC2+QUUEUY4Zh
+qyxR52X2IlFwFWSUUU/8tmObXBrzxBNEhveeYVCd5wd5RD1Ra3eVzajGPeCoSq8x4qJ2ZXDghvlq
+rFJBBR6z82aGbLbe+TqbjT9lNtCq4npHUMblqQEOUr3Nmx+VJJIVl7aSToqQan/ZydbMNNds8804
+56zzzjzXbGpFWA0q0IdKWSWrswIhyUuTAtn3L9IE2ddsQUzveF/GKUJtwVirSAZECliLpnUdqmns
+ockml500agCkx625YxnlqUCT6NaU2lbL+zD+AKXNpTHK09Lr5MaCk+h3WrIZ3fDULnc90Nd4b12Q
+VY6zJtmiTnoc+LV+QYiUfuiyS6lB96wHAKDMAa5TZBC27vrrsMcu++y012777bjDHg3N4Dgj7c8P
+YTXzPUUnTvx1Rgl/PFlUgBMv2gMp/zaJawUFtuabT6fUudTFjfxYVk2/uVERCmX38tHrTe/iTa8C
+Th1A4MevyykCsStV9Bt1jfvwy288lQ5bX5zYcr3spU8g1ZMQ4gbCH7HxIlGLetp7/sOYOjxhPthz
+FX40VRDPqA54IISIqAhkoN+F0CFBkxf0FBe2s1XNaVG6D5W08sKrGcY+ZuPa5aB3I7OBI3L+qHkb
+EPPXPbiZzAKHSh8Om6YdAKowQ/TLnlUOhrbwbQeKinNiFaVSuYEskReSI4iNeNFFg1StKgnRH9lY
+NZYUSEopX8NgWpIDlVURJFawSd0J92iQW5DKhHzkTnhS9UALFq9Op6MVuW5VG7RF6oFt46GtrkEs
+pRgjWcvSVbCIhrwdUqtW3tLWvOqClFCCSzbiIxG61PUtlxnDXfBqosusAgQ42TGSHwPAj2TDSqbs
+kkS1rMst/zdLxjkMYza6JMWmshdNQqx0fAkYGanjyW6J0iiHUkq65DiQV2aGWMFB0T1EmbVAmjMh
+lDwnRRS0MvwcclI/WZCOTAQnpSwoPgD+mIRW6EnK6zyBKYZKmozQYk/vqIiKPEKoWNRAHtQkdC9W
+TNn4DPMlKkIllldyYy61mKK1UAE/AI3aQq2CJDbBhW2oXJFH/YeyekmlodeSYUnb5BaAIrEgk3CP
+QNlUmgi55UVNMoxPNwpJd95HMk5p1Ojsghe5lGwrTa1V0nSqzqpadSXVmckOr7rHdAEyNbDRVU+A
+MDiumvWsHclqTO6xKbSeMAVlPcgqWgMvReWkWm7Nq14tota9+vUjb2FILF/FE7P89bCITaxiF8vY
+xjr2sZCNrGQnS9nKWvaymM2sZjfL2c569rOgDa1oR1uSfxHvtKhNrWpXy9rWuva1sI3+7WkBaVrZ
+2va2uM0tbNOo29769re7FQ1wh0vc3/K2uMhNbnAN4hrlOve5p21ZQvIxi+pa97rYza52t8vd7nr3
+u+CtbgZUOBBP4OO86E2vetfL3va6973wja98z1uIhHBDFfjNr373y9/++ve/AA6wgAeMX24kxBpg
+SLCCF8zgBjv4wRCOsIQnTOEEWyMhSwivhjfM4Q6DlwaiYcV8R0ziEptYvp5gSD7cweIWu/jFMI6x
+jGdM4xrb+MYsngV5BeKJUvj4x0AOspCHTOQiG/nISE6yjy+REGL04slQjrKUp0zlKlv5yljOspaf
+TIyEVGEKYA6zmMdM5jKb+cxoTrP+mtcM5iok5AU4jrOc50znGztANPhQsp73zOc+JznFC1lxnQdN
+6ELTWMcI6bGfF83oRhuZyQhx8pYnTelKWzrLXUbIl9nM6U57+tNqdjNC4GzoUpt60HdeTJ4dzepW
+LxrQChH0qWdN60PvmBeKdrWud/3oJl/618AONpYzfZBNg/rYyE72mUV9EFLX+tnQdkeqSbdqXlv7
+2qWAdULOYYZue/vb4A63uMdN7nKb+9zo7jYXIIAQDhDg3fCOt7znTe962/ve+M63vt9di4TI4ggA
+D7jAB07wghv84AhPuMIXDnBZJOQd6Ii4xCdO8Ypb/OIYz7jGN87xiL8jIexIt8j+R07ykqPbDQiB
+wB/2zfKWu/zl+uaAiqNNc1oj+iC5xrbOWw3pg0ha2EAP+qWJbRBjK/voSP80sw3i7Jo7ndDTNle1
+d051RmsbIbJ+utbnfHOD5LzqYN9zzw3yc6Gb/exWJnpBjJ70trvdzEsvSNO3TncbR/1jUw+73pF8
+9YNkve6Al3HXC/L1vRt+yGMvSNnRznjGq50gbH+75N8ed4LMPfCYb/Hdp5X3w3v+x303yN8zn/nB
+E6Twn/d84gmy+Ma7PuiPH0jkJ0/7o1d+IJcnfeA3P5vOp/7woS/IEOZA/OIb//jIT77yl8/85jv/
++cUnBEJ+oIXqW//62M++9rf+z/3ue//74K8+LBKChWmY//zoT7/618/+9rv//fCPv/mxkBBzkOP+
++M+//vfP//77//8AGIACeH/mkBD2AH0ImIAKuIDPxwQIQQjhF4ESOIEUGH4/MHO6p3umV16/14Gr
+NxCt93oi+GuxJxCzV3soCGq3JxC5l4F0x3vv4XsdqHfBRxCj54J0t4E8NoOp94ECEYIjGISTVoK8
+cIIpeIRstoK80II4+HQweA8yyINVV4MDcYNN+HQ6iGtSqHq+JoReWGlEaIRIOIbL9mZXuHt4toXA
+h4FnmIO3hnpquHM+yAtA+IV2OGVhSIZ6mIRm2IYvmIZxSIMMAQUvUIiGeIj+iJiIiriIjNiIjviI
+kFiIS8BuB5EA83CJmJiJmriJnNiJnviJoBiKoniJOJAQ2ZAJqJiKqriKrNiKrviKsBiLsjiLqJgN
+CREPbJCLuriLvNiLvviLwBiMwjiMxJiL8ZAQhhCJyriMzNiMkDgCKZcKoziN1FiN1iiKCcCGfoiF
+bxiIejeHdXiH4tgLebiH5liGo7aNW/eEUeiNvEaFAmGF6lhrWQiH7shr4DiO+oiHXnaO/khmSsiE
+8zhr7HiPU6iNAwlt9WiQVJeP+/iQ5NiP/ziRUxCQCVlzBcmQOgePvEAEHvaRIBmS3pUBAoAQrsAH
+KJmSKrmSLNmSLvmSMBn+kzI5kyjZDAkRCgSWkzq5kzw5YKGQEGJQYUI5lERZlBQmBhgmkkq5lCC5
+CQghAI1Ak1I5lVRZlTPpCgzRAG+wlVzZlV75lWAZlmI5lmRZlma5lfRQkgehACfWlm75lvBVXwgR
+B3JQl3Z5l3iZl3q5l3zZl375l4BZl3GQEOJwBoZ5mIiZmIq5mIzZmI75mJAZmYYpDgmhCWd5mZiZ
+mZppliTwlCIGl6AZmm2pAAh5kTbXjRqJbQ4JkfpYjhTpjxZpmtCWkalpbRwpj7JZaAtZm9a2mqwp
+jq75muYYm7lJa7TJm7p2m8VJj6iJnLrmm79ph8EpnHpInMtpasfpnKz+xpFOUAPe+Z3gGZ7iOZ7k
+WZ7meZ7omZ7eqQKUaBADAALwGZ/yOZ/0WZ/2eZ/4mZ/6uZ/wGQMJgQa7EKACOqAEWqAGeqAImqAK
+uqAMGqBokBDrkA4SOqEUWqEWeqEYmqEauqEc2qESug4JsQbqOaIkWqImmp4LkHJ6wJ8s2qIu+qL7
+OQCleZ2EtpvayXNdGJ13OJ3UOYbWSaOFlp03anUzCqR0ZqND2mjQqaNCyKM9eoQ/aqR1JqRJ2mfK
+KaW62ZxVumhLyqQj6KRPioJRiqVyRqVbqmdXSqZ1hqRnymdd6qWvB6ZhSntjqqZ2BohtaqUM0Z0n
+2qd++qfmyZ4I8Z7+MFqohnqo+OmfCAGgDdqojvqokLqgD4oQEeqhlnqpmJqpHAqiCCGigPqpoNqn
+KXoQELCiiHqqqFqoMhpodrqmWpqnevamcNp4cjqnklenrUpjZgqrRpamuYpjbMqrSCars4p2tWqr
+boervxpjuyqsQ+ary1pjweqsRUasxWp2x4qsSaes0epizUqtQMaRWrmZ5Fqu5jqWaYkQbCma7Nqu
+cZkQdBmY8jqv9FqvfzmYCFGYkrmv/Nqv/gqZlIkQlnmuBFuw5NqZByEAn+muDNuw+ECaC+GRTDmx
+FDuSamkQJ2mVGruxHAuTNokQONmTIjuyJAtgP4kQQWmUKruyLBv+YUiJEBlWsTI7s9XllAkblR2b
+szqrsVjJqt1qY9MKrkJmrdcKexKprWLahz+rq3gqtEUGrUsLY0HrtD9GtEUrbNmKtMrGrUv7rU4L
+tVHrYlNLtaVgtVcLbFmrtcjGtT/rtULLkVHgAHI7t3Rbt3Z7t3ibt3q7t3zbt3JLA7eGAZ4wuIRb
+uIZ7uIibuIq7uIzbuI47uKCQEJ1ADJRbuZZ7uZibuZq7uZzbuZ77uZTbCQnxBVVQuqZ7uqibuqq7
+uqzbuq77urBbul+QELjgt7Z7u7ibu317DqIRCI/7u8AbvMLruBhAWsZ7vMibvMq7vMzbvM77vNAb
+vdI7vdRbvdb+e73Ym73au73c273e+73gG77iO77kW77me77om77qy1VnBEblsRXNlEGawRa6ARr0
+K0nzEhRCcxDlwxZScRdxYSnRIxRwsr4GfMAPkQJUxQtAYFdDhRhqQCSEhRCh8TBGdMHumx5ptB3k
+gSK4UQcaYxXKssAIXMImPBCqARsXpMF+EBcSxUB0wRVJNDnkZcFEdcPRJB7bcUkFUUuAEysnHMRB
+TFMt7EVpUkRTEVYZVMEChMEGZDV/cyN2IUOpoUtRBMRCnMUHrMD9IRm+kkqE0kCTUcNNjMMvHEVS
+fMYcNcJa3Mbqm8JLIylcDMZ2ARe3VhVskTIzsy0eoxV6BD3+ilEv5vNVblzI3qskMXIx/aTGhUQw
+2EHGH4S/TvFFDrQVVIzCVvzHhrzJ3ptTldO/ZIIYq3LHB3TBTEw622FH0bHDJOzDaMzJsAy+9vFD
+qHzGFyRdCXHKryzJ1+EhGlzJTQM/t2E/IUzKsXzM19s8aSwzvDIzuQzJeJzHzJy/QLG/G1wiTXU4
+kYzM3NzN3vzN4BzO4jzOZpUCr3TOr4TLH4FE6PxKcUXO8BzP8jzP9FzP9nzP+JzP+rzP/NzP/vzP
+AP1YuTPQBF3QBn3QUexXrHPQDN3QDk3QCb1XpfPQFF3RFg0hjtUzGr3RHN3RPGPMZiUzHj3SJF3S
+NwPSXAX+yia90ixN0hm9VXr1Eyh9VTKdWDWNWEOF0/K7VyOCWDd9WD/9V0HtVzl9WEWtWD0N1Joc
+0kvNVUO9V0dN1DutV0ntBZhw1Vid1Vq91Vzd1V791WAd1mKN1YEz01b10zfwCmq91mzd1m791nAd
+13I913Rd12p9A2WdWDl9DWPd137914A91l5AOC/NgWRLZD331DGNFWKotlub1zrNeYdNZLAW1YiV
+1PZItond1DTN2I5Np5Bt1MjTjlRb2VOdV5g92YgX2kLt2Z99q6wt1ZKt2kFm2oW9g7QNZJtt1lX1
+04392ioY21A92rkdroTdWEnNCKm63Mytn8sg3IsdHur+oKnUXd3WvaHqAN15ldOE0Nze/d3yyQjH
+zVipXdxLpt1u5dvATXnojVY5DYXm7WO2jdySkdlUu9s27drrva3tfVbvTdpfO96LVd7mjd8+rd/7
+bXv9Xc7EHd/zTd71Hd9lu+BOjeAJnmzMptjb3eDm/eADLhmECt4inqqKOi+8rU4/XanXveIsrqmc
+auJ6fR2lOuI0fqqryhen7VYEXtwGrtTh8dsXvmYZztlW9d8S7uFIHeHx3eOt/eNBruDaE+Oz3eEC
+nuSGXeAU3tlO/uQYnuVFzuHFjeSJldTr6rBmLppyCeMHHh76+q9u/uZw3pgBq+aiPRAKe+Z4DpoQ
+i+P+t80LjpAFgB7ogj7ohF7ohn7oiJ7oir7ogO4DXt7bWOENLTvplL6y3vDo6pTTAtANjN7pnv7p
+oL7ojlDlY67kWB7lay57XN7lqF7nvXfkpH7Zps7jmH5O6r3qxzbkJ35ORu7gsX5YO57bTO5Xt47r
+Slfr5tTrVK4eOY5WST0MbRDt0j7t1F7t1n7t2J7t2r7t3B7tdIDsgfTT8FAG5F7u5n7u6J7u6r7u
+7N7u7v7u5A4P4M5HOQ0BD9Dt+J7v+r7v3D4Mv/5XwU7bwy7RFm7sfNjqf6XsYf7vfhXwqj3w0a3q
+Bu9pui7lr+7rzN7n9u20EJ9XxT7xB0/nCQ/muS3+5rJ+5bSO8MRe8CCPZhUf2Re/7HxO3wNBfRV4
+8zif8903fipP8OFhfwMY9EI/9EQfgAXY83rF3Tq/9Eyf8xeY8TSP26cu8iu/5S0v5PO+Rwpf8gzP
+07Mu7Fl/Qh9/9S4f9iG09bRt8sD+9QJv9iA09mQPd24PPGiv2moP8Gz/8HP/M3Af9wC596ZS95N9
+9w0vGe4Gc4if+Ip/b/2G9B6PFRDXcZI/+ZRf+Rv3cY7vVvW+covf+Z6P+DIH9RCO8mCf+WjV934v
+Zi/v6jEI66L/4aTf9qZ/Vqif+m0G+KAi+IdN+F4f+3o/+0xt9bb/98Cf0iSf9l1P1ZIRAnne/G3+
+meYant5Y8Q1jUP3Wf/3Yn/3av/3c3/3e//3gX/3fgPufoukL6/zoL18hkPyoLRl/HurwH//yj+iO
+XvxaPhCSXun6v/8Sdun2/+UAwUuggG5ZDB5EmFDhQoYNHT6EGNGgI4G8UgComFHjRo4dPX4EGVLk
+yIwAUlT0VErlSpYtXb6EGVPmTJo1VV6qeA/APZI9ff4E2lMnT4FVphxFmlTpUqZNnT6FGlXq0So5
+dwbFmlUryYs58dkEG1bs2JqeKnbdmlbt2oomUZKFG1fuTJwCh7LFm3fk3aJT/f4FHFhqVbtX9R5G
+nBEtr3tf5z6GDNeswMWJLSd2KzBlZM6d6Vr+JXpZtFq+vIwKRp1a9VPCjA2Php11cWPPtW2vnGwR
+Y2zeWzPzwuBJ+HDixY0fR55c+XLmzYWDAt1butDXX6pcx55d+3bu3b1/Bx9e/PUv0aefBzk7kHP2
+7d2/b47h7G709UH+tp/fdWj99kv3r+8/AM+rbEAC6TMwP/wS7E1ABmNz8MHRIpTwsgIrHO1CDGNb
+cEPLKPRQLxBDxGtEEtfS8MS8UlTxMABWuSdGGWeksUYbb8QxRx135DHHSV5rUUQAJumxSCOPRDLJ
+GH/kL0i8LlIySimnNHIVBJ1EDAAtt+SySy+/BDNMMccks0wzm8SSNDPXZLNNN9/0Es00t7r+CE47
+78TTzTkTo7JPP/+U8Yk92XoCUEMPRVLQQdW6BlFHH8VxUUknpbRSSy/FNFNNN+W0U08/BTVUUUcl
+tVRTT0U1VVVXZbVVV1+FNVZZZ6W1VltvxTVXXXfltdfD6rAgIwvqyOiJk1zDyktjkOVFWWYz0mlL
+cPz4qEsqQrtmlToAACcFQaPt8pYvj7WACi2pCFYgbjWyUk5f34V31xSAyAiIVRTbTaeN1ABCSyAU
+5QjIwtRFU1+NDOZlWGo7MuwJbQW9po5VruFFDSqo0EjgZvlbhYqF/aDi3mYBUCOjbd2NN2WVY1UD
+AEWfAGBhXvzYdmCNwFnliSeoWJbhgnf+0xhhqxQDx6PXYObJGHoreqLbktB8zQJwAOblCWCbpeLY
+mc1FeWWvv0YVnHRprugasfO9khdjFBU6458J3qjtZ3m5BgCKAw7Nap1J1mgSjNuCOjQqJtkoBYyH
+LFqgVZgEu3HHUZ1Xca2NIRLtj1JIHO+4gX774Cs1hrvqVaiVW2iNXwsao53AKZmXOuru+nHZZ6+0
+ZUHraD1yg+UWyNg6UPbSZi9PKv1zd50t/umNUH97pxROAnlj2qen/lJ0/Uj8njq+tXyjSeow5m6f
+Nw8d2rSFrlt8tyv6UQ2YW2f/7/IBr2hw7w+/R42iF5e+ev//T9Mk7HWsOnEpRmnDXOz++me++dmM
+gWfJ3PL4cxWlFctp9FufQIZFNasF6yqse90CAThCEmKobqwj39yqxreQpK6BKnyWwowWmu8JJGIT
+q9jFMqi8inTsYyGDm+H+BroSFtGI9qFCBIfmGi4ZA1xbmmEKv8TELS3ridOqFpeoAL9sbatbVBNh
+A8t1LsLBrWVlJOIR1bhGNrbRjW+EYxzlOMdQpcAYd8SjMeCHGAvkEY/pomMgBTlIQhbSkIdEZCIV
+uUhGNtKRj4RkJCU5SUpW0pKXxGR98rRJTnZyS1pD1RM9OUpSnmlVoixlKlXJJVgBwBjPg2UsZTlL
+WtbSlrfEZS51mcsOlUonq9hlMIX+OUxiFvN57Trli4y5TGY2M5jGSJuqelmqaY7KRKC65qey6SkW
+naqao/pmqLbZqXFyqpyb6qapwhmqdWozjaI6p6bimal0UlNryshHPvW5T37205//BGhABTpQguoT
+fu30VGlc0AKGNtShD4VoRCU6UYpW1KIXZagLzJOq0lSioB8FaUhFStAozKeVWsuHO1S6Upa21KUv
+hWlMZTpTmtZUpbMIDULJ+Rpi9MKnPwVqUIU6VKIW1ahHRWpSfUqMjYbyNS+waVSlOlWq1tQBJn3V
+b1JaVa521asyxWlbQHmq0vRUqWdFa1rVilSmFkaB7gwNVL86V7py9aqUiWaqtFr+V772laZhVddY
+TVXWtRbWsIc9alv3k8y4+tWxj13pXXVz0opsFbKXrStgmyVYX/IUsZ8FrWEVO09MlUaumEWtVyVb
+T1L9BgovgG1sZTtb2tbWtrfFbW51u1vYLgECYmVsRWxxDOIW17jHRW5ylbtc5jbXuc8lri2aStbX
+GIK318VudrW72xFg1VV7TW14q6pZnZrTs6FFb3oTO93BPlW875XqavOKKvDC175gzSlnSUVY9fbX
+v0tlb2cbe18Cv1S+lBWIZQu8YHeQV7/WPO9/JRza0b5TnO5lMIMPnFWtEWEWHwZxiEU8YhKX2MQn
+RnGKVfzhDAgAuKoqTShUMWP+GtfYxjfGcY51vGMe99jHMw5FgPf7miWs2MhHRnKSVbwJ77bqNw14
+Q5SlPGUqV9nKV8ZylrW8ZS5HmR4uDmxwBRIHOZTZzGdGc5rVvGY2t9nNb4ZzmeMgZAiHRhNdxnOe
+9bxnLpOgyayqb4YJ7GAx88KsE0b0YSv81oRiWNAE3vB3UfroAhMaxhFOdKbTuuhCn5bS8I20kyf9
+aftamqOY1nSq2UpneDqa1OINNaC15oQa1NrWt8Z1rnW9a1732te/BnatVfDbMF86NGjYRbKVvWxm
+N9vZz4Z2tKU9bWonGw2svnBo1hBsbnfb298G9gL+vKpAvzq1pnZqaA6tanb+F5XTxq6Ip82N2liT
+e9TzPnd+C73udvcbqO8+9YDxTe9xS/PeA78suqmrbn83/N/YxqarEf7Yehu8shPHrMLby3CHOxzg
+6Y43xi9bcb3OGtwnR3nKfT3sFwe8IsiudsxlPnOaT/vabi30tlW+c56fXNx4RTAvFCzyvmpcwBXh
+d8fZ/fGFh5zojiU5fQ/+9MzqG94CSbrSU830jTud6nyNujen/vWvGn3IHNd6u7l+dIHIm+yqLXjJ
+KwJlPtfd7nfP8pdbDvIxx9nvfwd84OE8Z5xfnRd3xnviFV93PwOdwxXxsJIlP3nKn7jFe2+6QGT8
+Y8533vOf73GQC+9ygRT+ufKnR73kmex4SV/87XQ1e52RnvZ+r/3sXn893Fkvatfn3quxbzXaab91
+iMMV976vatjVOXbkSxX42Z798FVte9m3vfldVb49KzKEYHTf+98Hf/jFP37yl9/850d/95VA7M0W
+WhZHgH/85T9/+tff/vfHf/71v3/4y6L4jQ6NEUi/ASTAAjRA9HODuJO63ru+qXq+iBM+6Us06gu+
+42tAm8q+1mK+C5ypBzQ+rJNA4hs9vuMFt+PAmcpAcNrAE4QpDwTA6AvBCfy/nRI4FqSpFBSVcrPB
+mHJBGoTBGJwwCoQ+69vBG1RAsdu+OVDCJWTCJnTCJ4TCKJTCKaTCKlz+QkLAvK4TCCyYhi70wi8E
+wzAUwzEkwzI0wzNEwy7Eghk0r9CwByuEwziUwzmsQiY4wuVjwCJsQasjPUMDQhkcwcwrQT1EwTvU
+vgQjRPzKQrbzwz8MQjbcFNNKxJjCQXZawUTswTb8QUfsLyGEQAucRJWqRFDRwVBsMD4kwazjRAqD
+RHmSOFN0h1H8lN9oAmWwxVvExVzUxV3kxV70xV8ExmC8RfEpr0h8DRGQgmRUxmVkxmZ0xmeExmiU
+xmmkxmQUgVbMlNIQxm3kxm70RmG8AkPUQFkrNBI0x1NhLRUkR8M7Ry1MlXTMwQdTR3YUxHYsFXhk
+J3Dwo33kx370x3/+BMiAFMiB5MdidEWeIciEVMiFZEiC5JpkQsiGlMiJpEiBBIf5OpWK1MiN5Mg/
+WhU16MiQFMmF3KNTAcmRRMmU7MdMYsmWdMmXhMmYlMmZpMmatMmbxMmc1Mmd5Mme9MmfBMqgFMqh
+JMrzuJqKGJZiORbeGQnkAZousaLz4RIsAomWWZr5QSWTQCXisZaFcaUHEoh+OShncZaiNEv0iJyK
+sBd8caCK4Bd/ASMeeiAX8hykrAOZ6Yg6ARgieg3eEZoU2J6RKaNngZkOARILO8vEFA3b6Z2YqQia
+6Z6MwBmd4ZkoqsswgqG/VKKNAJajxMzy8UvjaRZjCMxnmQRwOE3+CZJLxWTN3hAbgSAbGzqbthSI
+taHN1RSezgFLurEbj/ADl1mFq+TL0AjNjKGYnaCCpUGYrKmbkjxMRmvN6ESRpQGmiqCc3cFIzLHM
+udTNJcIgjgjOmXGZFwLNaPpLNMofxzSY5uSFrNmhz5TO+MwLxsQdyqAX7OwI3wGeLhGeLuHKFPpO
+zlyYOkDP8SmMLuEJUTrKqwDMQtkNvxGIGlrN4ZHPCsWL68mewMRP7wEf9VHNy6TL3UyfjvhNLSLP
+BSrOHkocwwgZgzEXLsHL57TQGc0LAapO3UBQuUmgFuqcEPVOi9jMHhIZ8TzO4yFO80QQgzGMlumK
+ERUIIApQxKT+0SkdiRMqSWZpm/cRCbr0UQeSIY6wGrwkUPjsSyTlzvnACMPJCAmdHyml0jf9iCTi
+COxsIlTaziWaIlGKSmnBS41IyozomJFhpe9EpajMiLqhFiAxF15AzUN1zDYNHjiV1Eml1Eq11Ev1
+HzvaxyvNiz7aR0DC1FAV1VEl1VI11VNF1VRV1VVl1VZ1VQNxpliV1Vl9Hme4BVrF1VzdJVRwBlTQ
+1V8FVlniVV8N1mLV1WE11mSl1VtwBmV11llVl1WS1mml1mq11mvF1mzV1m3l1m711m8Nk2KTJmdY
+FUkAAEko13NNV3RVFWfASFFBC4PUFAAgV1UxV3ZNlXtd11X+cVdWiVd5zMF6zVd1tVeCHVh8RZV+
+XZV/dUtIcdg+2UuBVYOHpdgoqQh9tYuK1dgjuViD3diP7ZGOxVeQJdkcqQiFrZqSVVkagR+GTbDU
+g9mYTbEMyCmB9QR8wNmc1dmd5dme9dmfBdqgFdqhxdlCENmK4AbQU9qlZVoe44ajFQhrAIOppdqq
+tdqrxdqs1dqt5dqu9dqptQao5QXTk9myNdtZoIGTpY97YAWiddu3hdu4HdrccFmhg8WWIi+bvY29
+7Yy64AWMbcRV7ESxPY3VMNzDFYzWAFwTDEXJQlna4NvInQu6BZpLJMS8fQvJ1Vyy8FvAVUXBRSzF
+AtzCRdz+0jVdp1Bcg2XcSXTctXWMzYVdm6BccR06WMRczYjd3KWJzjXYzwVd0SLc0xXe4UWK1MXX
+1U3E1vUK3WVemJjd9hOIczCD6aXe6rXe68Xe7NXe7eXe7vXe6eUC9qPXiuAAAjDf80Xf9FXf9WXf
+9nXf94Xf+DXfWhDb9+O/+8Xf/NVf/fM/gQDcd0CHABbgASbgAjbgA0bgBFbgBWbgAH4HsWWH75Xg
+CabgCvbeBBQIlIWAP5DfDvbgDwbh+OUArCrFULxdXtiM5lXhmxBb3/3dtRJdgyVd4qVhwzVeUGxc
+tV3eFV7h5y3hSTzhFOZh5uVdfHXhF9604K3hJV6NGyb+wrsVRR22i9cdYt31YcvVwyCu4uYt4k1E
+YkVTYiYW48Bw4kGE4ijOYNfdYua94jy03ZrN3DWO3S4GwS8GrRjG1xke4z2GijJGXkJU3imW49xt
+Y4EYAjpE5ERWZCrEQnUR2B/QgkiW5Emm5Eq25EvG5EzW5E3m5EiGBbHlwjQU5VEm5VI+wzX0X4M1
+B3Jg5VZ25VeG5ViW5Vmm5Vq25VtmZXMQ2zdc5F72ZUS2wzSuCELo5GI25mNG5k7+ARLG4iLU4kGG
+XToOXDsG41TOYz7G5qjw4zNGY154XCqG5sgtZLvl5mcOZ8mV5iOm5lWz5orQ42yGZ6oQ2z/Ww0Bm
+DHD+PufbGOfaNUVzzue9Ted1Dt0wjueCnoJt5mZ7htx/3tt95uZTbAu9ZWiAbmGBrua/lWGD1miE
+PmOFxueJ5oxx5r4DJOmSNunyW7+IrohhaIOWdumXhumYlumZpumatumbxumWpoP63d+e9umfzr/+
+xWh8hYcyMOqjRuqkVuqlZuqmduqnhuqoNmp4EFsBPOmrxmqSxmBvpg8IeICcBuuwFuuxxulhYGY3
+7mc4xl2Qto2AtujCwmN31miD5mgo9mi2tg2HLme1RmG8rg23fmu1iuu+mOt4ruu7vWu/7gy9PmN/
+VuzHAOzARqvBNo3CNux5fujEfmzIGOfIO9vPTr3+y3PkinAFPjDt00bt1Fbt1Wbt1nbt14bt2Dbt
+ZhDbzWva28Zt0BO9oa4IMfja3wbu4BZurxUDsSVb0EbuyVs9rq4IAWgE2Ybu6Jbu6Y5tVzhrgaC7
+xdPu7c47MGsWgVUAuRXv8SbvoDXadu47wVPv9WbvNiM83hYIcTiD+abv+rbv+8bv/Nbv/ebv/vbv
++RYHsUU87ibwAn+DxmPugWjb8mbwBhdvBbhucm5svhbizZaLyJZspaLsd7ZsJj5sWNRsC5cLxoZi
+xxZxzq3oDJ9sgu7wMf5wUwzxEycLEr9bE5fxsMBwFV8v9K7sFufjF89hYRbkG5eMCKe1nkPyJF/+
+OfEV2AEAgSeH8iiX8imn8iq38ivH8izX8iePAbGFuZoD8zAXc2i7OfjmhXVIhzRX8zVn8zZ38zeH
+8ziX8zmn8zRfB7HVOSXX8z2vgZ9LcF6AAD3Y8kEn9EI3dC0fgAjnZxOmcCKHixzXcXdjcR9fYiBn
+XSm+Z0efcUV/aBvX9M/gcXWO9F7YcErfY0tPXkxf6E+XXU7fa5Vea1a3CUgfdaEqdVMXY1QHZFX/
+aFl3XlefcFjva1+vCVqv9YfjcQ7H9dPV9XrmdWIviwgvAj6ndiVv5O+uCEY49G3n9m7H8mUQ23oY
+83En9zCvB7FVhzpX93Vn93anc3XA82qX953+83OUJQRvx/d873ZGAPYSb3RoB3UzF/VIv/Vlr+Fm
+L8IYB3iXoPE3FvYKX/iWMPZjB7BkN/hKx+yEfvaI/3XH+2FM/HeOd4mJp/iCv/jhRfgdVHiRL4Vx
+zm4Dh3nF07vRFojwdvCbx3nzFlsya++e93n1fm/Ale//JvqiN/qj7+8A5/EBj/mmtzsER1kBWPCc
+p/qqxwcI9/gOS+6tlzzRxnaBKG3qFvuxJ3vXpm0et+3cVvu117HdBlzfHu64l/u539ri5vHj5vq8
+T7HljvrnLvu/B3yxt+6sR2tGf3iWH/kUp3iiMvmTZ/aM7+iNR3zc6PcaD/nJLwWSP/bGd/z+0k15
+G1x5kW/4tD58zGfhUF98Sbf4zhfez2fB0Of4ca6E7aL92rf93FoCvg6ES+D93vf93wf+4Bf+4Sf+
+4jf+4+d9KwDlTGD+5nf+54f+6Jf+6af+6rf+62d+VDZzFmCD7vf+7wf/8Bf/8Sf/8jf/80f/7mcB
+sbWu23f/96d9XFD1QkD++rf/+8f/4w+ECNcrgUUVwAUIXgIHEixo8CDChAoXMmx4UBIASQ4nUqxo
+8aJAiBIxcuzo0aEzAB9HkiyZQiQvAClKsmxpEYAzlzJnItRI8+ZNmzh3ttTJ8+fHkECHejwpUCXR
+pBdhKm3q0KfTqA8jSq1qEKrVqkKzcjX+mnLVvbBix5Ita/Ys2rRq17JVCyBa27hy59KtG7YVgFZ2
+9/LtGxevXr+CBwsGTPgw4rnRACRu7BjtKpQAJlOubPky5syaN3Pu7Pkz6NCiR5Mubfo06tSqV7Nu
+7fo17NiyZ78W+Pg27rCSIOTufViAJAG+h/sFLpw48rrGkzOXC0FS8+hxuVKvbv069uzat3Pv7v07
++PDix5Mvb/48+vTq17Nv7/49/Pjy59Ovb/8+/vz69/Pv7/8/gAEKOCCBBRp4IIIJKrgggw06+CCE
+EUo4IYUVWnghhhlq2N0TFvBizEIgJmSMMSlQoUaIaohI0IofbvgijDGilwKIIF5DhTH+JxbUokE1
+NsTjQC0CKSORRRqp1CQe+rHSkj5a4Acv1/BYIonXuPjhNeCkoOU1JuqYQgoe8rJll7xYQAUVHoLo
+B47gCDTkkXHKOadH1wDByyrXPOFji0vueA8v96xyJZ+8qHHnjR+iGKSLVDzBCzh78uLoE3XQeSmm
+mVrkIxBW+mgoECvtyKiIhVpQolFTYqnllvd8Cqemscp66SSrqEliHacGusqjPQIq6KSGFqrGSh1e
+SeqVkzT6RKWzOvssndcA0OubLq6CY4kFgUnio9eC6aKIXkJZarUgnknFSmvieOex0Lr7Lrzxyjsv
+vQx1CSaYVta7L7/9+vsvwAELPDD+wQUbfDDCCSu8MMMNO/wwxBFLPHF1VOhrgbK8GNuuQiSSqOaH
+HqfLIonoHlRHyYt6uZKKxqBM5ctK4kjFuIMKtAqsCIUpkJ0DiZkzySSOzMvLxqhYUNHUBukyiSiu
+/GbJYlL0hJs3L2os0AQVjWKNIrfrsckGvazjmGiudCrT28ZcdtjG2Ixn1tqK2bNAPzvkcbZde/x1
+yaIS1PLLvLCZI5Qh57jo1FXjuWjhcQu0tYhFgzxQ0ifnKOrgNJuJK5VMG822qG4PhHNFFtedsZnV
+/ugxyHh/CvXlBqGNMpMzFz424hdNknGnAtGoepQz5w58uaMyerPUx0u6s5l+Czn+0D1APPoEEK4q
+Do7jAx0qUJK2odSil8kTTyiLxgN70IrMWzB0sBYBsSgVA/3e4o2H9ziqqsfjKf6Vy4u5vv6Opb50
+XS97AtkeL7o3JnINBAho4p+q8gc8WyFERNGbXvWuRLeKvE8g8TPU9whyD+Hd73gSvNL5jBe/C2os
+g8RL3/8IOBDsVWR3AundAoFXP7KVr3wnXBEF0Wcb6bUQUCLa4EUSRTWBNAl4T4oSkCJoPB2yq4dP
+YN+xnnczfUUJZ6uAkh9IV5FITcpK1GPgzTxYQhP2UH9wWtHzVvTBigAwjEycH0GeKKU1Fq+Nx0Ki
+6q74tQDC0YdfFJwYKUJG0xH+C43EEojiAgiuKf6xisbL00CuMagjWnIidbSZGMGHIkAOkmNa5Jjq
+QIRJnr3tlHE8ZBgNGDyNVa2RToTSHik5ST+uiJSqW2UXNdhJi1jMT5JC40D8tKO9gYt1pRwf0XBl
+xDa6cplqGBRYZDkmCzzhgyk4GioltcymXaloppSkG30YtYssMQVQOiYqBec3ygmtXM48JSpfVodp
+prOf5bomnlxlkTB184biBBIp8cY1w+WNlxUcn97AMTyHtBNKk/ADMpM5z8fVM3IeW6g/tZYjcOQy
+pIUkFUCzScxrKFN6GZVnj5ipt4YS8qHnxJ1HdrcKFPUOmYfaKETPqb+ECqT+oNXUHzA1CaL4xU+b
+h8JYoIbWoiumQGlBfSbw3lhTbRrkfR/sKcd+KkQfUrKXwxSRUampVmQxNZ4LeaqyUsDPqa6CV3zc
+ZU15NsxfclGTbqWIV1tIPhGuAqil/CE6r3SopLZyrcVrqzZ16rS5/i1UY0WWQ/VqU8b+1SJ2+qDH
+cnUsQVlVkq974eggyKgB5jVQRKSeQFNQ2M4mhAq9m91pO3jZVPoRhW+bImu5WhAL1BV2uWoRaW2K
+2dYGUa2sRa3vYugi2WJxIra1Eo1cxr49cVGXp31mc8332gwKN4/FBZzRWgRA5Q42rym0YvXGCyjo
+brNuI6MubQ/yWd/hin3+yd0tXukb3h6yELb5BexGPUqia9XTuwxNF+vAZljJ+c5smWWi7VwkLU/p
+bo6qK5oAzOngaDYNbyup3G6fdmCG1KFwpBIZg7PlYBOTOL19s9zHKhw2+oKuWhtecUEm4eFyeuwE
+2CPRXWc60+zGTrmZa9xIWtxDEx9Zxpml8NZQjLSPCi7Dre2xhgHAYfclmKMLxhZQI4g3hqKtyZd9
+MjQpJuc5y+he+OouR/AFpvmqZ3344t9F/AwmQGfFzvn6iKFTgOfz6FmuIxE0865zj0aPJNGLNk+j
++dyRTNO5057+NKhDLepRk7rUpj41qlOt6lWzutWufjWsY91q0frMUgP+EWSgULIUy4DoHpLhda4N
+4mvKgMPFB/F1rpPHmGFbZiWX6bVlNJcSTWvN1rdeRR0AUIeqHuUy3aYMDvkIhN9KK3d+YwxGmE2Z
+YKfE29MuiLqLrRBkpwS5IsEMiDDz7cnIm91dBcDwXDUZ3ap7Mr+qTL8REm1AneQy1JZ1VFJgyXET
+xCv0ruzASzsQdMNbMg+/OPR0bQEpIwTZvq4DtTh+FE2rPOQVR/m7DeKHybi4m71Tg21XfhCOPwHb
+pX1kMq2dwEh6JebXzjYPha1rlxudIC3398iN3fFuow7kLX+6zn0Hc5DfejLzVAMAlHXFrS+d6WYi
++c7n23OYew/i1gH+e6+eAAAX+yHbbS8IOHjVTVhhnd59L7u/xxTJqfvayldn+cfLjm6s82LcFPdd
+JMFuJca33MoDmQS10b7je9g966Mznc9LDni/P7zpgd9SQvzusl5ZHfFpd/rBDzIJcMyeRfNMU+DN
+LviEqFzufOa6260CDjHVPZPDRwnwjcF6wG888Z7X/d15JmbR5xrsizo87JVekOljXe5+GPlAqIA6
+2zyK8pq2wOBRWVyNAbyotNf10x39fBGO3uOvpz9Bfnxsj+c88Ng3yNWhCPBNiqK1H/vljhpYyQCC
+nP4BIJ9VCrUMYPBFHLvM1ptgHvIxn/ykX/Npn+nlXu4xXrAh2+/+NN3/4Z/8ZAzWgZ/3zV8HOiAK
+ZtJvMZGtnYn8yBX8lZ73XBoIkt79QZ8Ikh7YQUnrZR8M+o7NAF+5EWD0TZ0HvuAR9pyxSeAENgXc
+EY3K3Am9SeDY6WC7VcbdXcZKAB/XBaFIIBvVeMgJfiFljKFl0BrWWVbjsYsIPtvzcd1FnQyUAEHG
+jJC/ieDIjV/HVUbsseFkiMjTlaEO+mAKoFwRbhywGSJSgKCQcY+tSWDBZeD26aAdQl8VSkWa+EHV
+cN6jbCHzTQLK8KAL+qATxmCUTN/+sRspruHfEcQqVM3TyZ1lPIr4DRegmF8eDd5ejQmvTF+lFOIH
+3gM4gIMX9qD+/R2h7jWgE3IcFYDF0q0hFFrA9AEfFVgGLhmgbXjIAi6dNBphAoEjFX5iUkzCuIlK
+wxEi8LUKQ9RiMtZfxXFg211cqNCi8zHd06Ff13kI6nUdlABjkMwT9SCEGuTKB2Wis1Eb5wliLE5j
+6SXi0g3kRHIc2BWdCVak63FdA1bjm8yTZY3jPfKe6zWhOkaFtEiUB3Kd3E3UD/pjP8Zg1KXeGepa
+pfDjE7Lb01keENyJzVnJjXyQQa6d0lgAoUEK7T3h0/UiQ5DhM25iK94k9Rldw5njB3Zk7pkIQaCi
+90zCo6CiAjLfxVnlD4KlJ65kU1ABB5piZRhDJqKkB+pbwUH+G7FJHeEt3STQorcx4NxJIgSAo5lM
+i8Zgm7bZlSQuXmWEm0ElxEks2sV5m8NdJQpixi9aRq7l5bxN5aRcI8tVZstpyWa2YVNmUmAGSjcC
+AMGJYWkCQMLtXGUkXTqypW3eJm7mpm7uZp65DjnhRJvhzVJSSHA6E060jOsYFnsUZ47xpnM+J3RG
+p3ROJ3VWp3VeJ3Zmp3Zu54D4pnd+J3iGp3iOJ3mWp3nKpH8gp3muJ3u2p3uyJ3r2B3O+J33Wp32S
+p4LA5n3uJ3/yZzc24374Grb0J4EWqHn+Z4GchIEuKIOCJzhooIBM4oD4GoDqB4UWyIUSCEcKyIYW
+iIQKSIb+TqgI9keIciiE9keHEsiHBkiJsuiI8keLAkiK/seMRqjfeAEm5KiO7iiP9qiP/iiQBqmQ
+DimR6mjIVWh+hOgNvAKTNqmTPimURqmUTimVVqmVXimT3sCRJqiuXUORfimYhqmYFqkXyM+JAoiE
+ekIprCmbtqmbvimcxqmczimd1qmdruklbCmBhGgVTIGf/imgBqqgDiqhFqqhHiqiJqqfVoGeDkjR
+3QM+3KmkTiqlVqqdeoKZ5qffqKmldqqnfuqc5qn3ICl+8KminiqqpqqqJiqjjiqXQk+kgqqszmqn
+YqrvnOl/pCmt7iqv1qmo5hqp3oepriqxFquxGmqrAuv+q9pGrPaqsz5rKdjqmOCqf+gqtF7rrv5q
+jP7HsB6rt37rqibrtvrHozYrtp6rp0prjQaIhNpAMrwrvMarvM4rvdarvd4rvuarvr7rAzQqiKrc
+FoSDwA4swRaswR4swiaswi4swzaswG6BvwZI0RHCvlasxV4sxu6rDWRqgkioA7gDyIasyI4syZas
+yZ4syqasyq4syL5AxAJIiBJDL8wszdaszd4szuaszu4sz/asz84sMbwsjeraPcwCyx4t0iat0q6s
+A3Asgnjs0kat1E5tyrqsq+6pysnsz24t13at1/Zs0F6toxKt0VKt2Z5t1DbtrWrqQHws2r4t3Fat
+0Pr+R8x+rd3eLd7ybNgqq4aSbdz+LeCGrNpOK9sKhNsGLuKirdXyrYjymdbmLeRG7tfu7biiqN8m
+LuZS7eCuK5r6DQqUAOiGruiOLumWrumeLuqmruquLuh+wtySqMrRAhvMLu3Wru3eLu7mru7uLu/2
+ru/OLi28Ln8UHQSwrvEeL/ImL+uigNMeiLWiK/RWqra+aICqXJ+CK/Zm76GKK/XqR7lGL/hOqrpS
+a388b/ier5xOb7DaR7dqr/u+L/eub318L/rWL5yOb+HyAqfaL//iqfBWL59d7/sOcPbG77IGirn2
+b/3ib8d6rvI+MARHcOq6rtj+K59RQw5ksAZvMAf+d7AHfzAIh7AIjzAJZzA1/K/36lrxSjALtzAE
+M+/aNnDbZi4NR+3iVi6MZq3k7jAPgy0K58ejlm0NDzHLbi758gfUErESy20Fu6jj9jAURzHNUm73
+AvHlLjEWk6wR5+/hZrEXu8MNV3Gp6rAUl/EOU7H80kcQfzEbb7EMGy4bf3EYp/F81K0Z33HeovEB
+F20ce7EbP63fhIEJDDIhF7IhHzIiJ7IiLzIjN7IjD3If/PAY81nAOqwlXzImZzLDQmwTy6iuEcIj
+h7IojzIpP3IYNK+BmK8Co6/6Yqj1EjAsg6sB9y2srjL/MjAgD8T+2vL5tjLWBnAsB7OxzvLY1jL+
+L6MvLjvvph4zK0uysL6yMEdzqhKziRozM4NvMqfyMl8z+Ppy4w6EAEuzOG+vM9sH/XIz9Gazh/rN
+NpCCO78zPMezPM8zPdezPd8zPuezO7NDOddHiD6CDAS0QA80QRe0QR80Qie0Qi80Qwf0I/SzGqsw
+GegzRVe0RV90Pm8DKq/zDPcxFs+xKz8xHo/05EL0fKyxRy/xHytzR6c0EYP0Lw/E45I0TfusHtOy
+bQixS9fwSmtzS+80DcP0NwvETNe0UefsTRdzTgP1EPc0RwvEKcyCVE81VVe1VV81Vme1Vm81V3e1
+VC+BSctHiFKAKpS1WZ81Wqe1Wq81W7e1W7/+NVyXNQWEdXwUnQBkgFfntV7vNV939SlstIr6jSNk
+AWEXtmEfNmIntmIvNmM3tmM/NmH7AF3DR4iKwxlcNmZntmZvNmd3tmd/NmiHtmhftjhM9nvYdTdA
+tmqvNmu39mM7AmAPiCqj87V6swWD8zjnNrKatnucM21jqzoHti7/NrrathPjtm4nd6BSs8QSbQIT
+t7MGt2xvM3RDq3HDLDQrt3Yztydbc3VHd2zb6ECgQBCUt3mfN3qnt3qvN3u3t3u/N3yXNwUz7m0L
+xAxEAH7nt37vN3/3t3//N4AHuIAPOH7PAG+3B/GOQnwvOIM3uIPDNwwT7hvzQhczdeIKdX3+80JR
+HzWHT/GBswdKWzjmOrVww7GIYy6GHzdRdziL12xSV/NSn3jikvh0/7SM/22KY7dItziLv3hzQ49O
+33jc0rh4m7iQ4/iHr4cd83iH+3h3x/iRD3l4s6vfPEMYXDmWZ7mWbzmXd7mXfzmYh7mYX7kOJLl6
+hGg1/IKarzmbt7mbvzmcx7mczzmd17maV4OZp8fEjjmf97mf//mYP8OUd+5wf/ezXje3Zrd2Jzd3
+D613G/quSneR6y+kOyui062iL3puNzq5Onel96qkU3mhfzqtXjrsArOmM3qez4inkzqthjqhC8Qu
+u/qnmnoOo3qqb/qqM1qr0zqownqusjP+Rg87sRf7PfNzJyc6nwF0Qze7sz87tC/0Qyd7pw8EBEy0
+sWe7tg+7RsdwLht5lL9tjiu7TDN5j+86pl1xuKMtkYs6uK+72Y47pu+4uRu1kzs6lMO72bZ7rFO4
+visuupfHktd7Td97tef7v0stvwf7QER1Xz88xEe8VoM1tZ/6QFRAXGe8xm88x8N1BQQ8edg1Xks8
+yZf8w/+1t7O0QAy2a7e8y788Y0t2xd/6QFj2aN88zue8zod2ac/8fqA2zAe90Ls8bKe8T8u6r8uq
+rQMwcuf6OHO65T560lcqsFcrdU+9pS69hWa60wsz1A9vr2M91Q86wyO92Gc9yI9H+3b+vdenvXj4
+9tlLatWXr9/4wgHcPd7nvd7vPd/3vd//PeAHvuDfPTa4fXiEaDYggeIvPuM3vuM/PuRHvuRPPuVX
+vuJng+GDB/EOPud3vud//uD7AtlbvY0n/NLKu8WvOMHbe+Z/R4ibftqOPt2XPuwjLerTvOqvPk0b
+fNQjfO0j7cKT/rv//tHePtPnvu6PNO+DPZAT/9IG/+wPv/OrrPFvPb0n/x0v/8+r+/Qzrewjsd8k
+AuiPP/mXP+AXvs9b/0Bog+W3v/u/P/xXvja0vndMrPnfP/6XfyJ8/37Mdtz7KkDwEngPwD2BBxEm
+VLiQYUOHDyFGfEjQoMAqUzBm1Lj+kWNHjx9BhhQ5EmOVgxQlplS5kmVLgSkAnMRXimZNmzdx5tS5
+k2dPnz9pejoI02VRo0ddAkhx0BNQp0+hRv156WRBpFexYkVpkWRXr1/BjjQ50GpWs2dVEh04U2pb
+t295Cn0ZE21duxCVMoW7l69bqmQr3hVsdyuvi2ERJ1YMciyvwoMhZ1XrmG1fy5fjDqUbmfPZvAId
+ZRE9mnRp06dRp1a9mnVr0T6qBu48u2VhcWdw59a9m3dv37+BBxc+HLe42LSRs5wsoJtr58+hR2/t
+SHNy6y0/8zo1i3t379/Bhxc/nnx58+e5Lzl+nX3DwhVUxZc/n359+/fx59e/n3/+/Arr2wsQoeUy
+QM/AAxFM8LxTqhPQQYWyc8CdCSms0MILMcxQww057NDDCV8A8MH2CiOmlxNRTFHFFVls0cUXYYxR
+xhOJEXHE6ya7Z5YPeezRxx89dKDBGx+MEMgjkUySwxABI5LEsngxccYpqazSyhhrbNJJHDfTUckv
+wURSyLm2dNDIMNFMc0kby+ysxCvhjFNOGLN0DMo2Z8txRzX57HPCMXmZDE/rsjvkhUMRTVTRRRlt
+1NFHIY1U0kMNYXPQwQrLJpNNOe3U009BDVXUUUkt1dRNs7H00rsmg2CJSWGNVdZZJT1kyFVpy64p
+zHjtlaa/7JQNV8EKO2yxY5H+Dauxx4ZltcvKfI2WL7kC3axZznSVVtu9gGX2WrSKTVbccUVa9s5v
+0coR2m3ZfYpaQdEVLNt26XWq23PjzSpccvnttyRV8z1K3XoJ9uldawOua96CGcbpXmETRmpffylO
+1lyIIzZq4IY5rungjO3K7pkwSC7Z5JNRTlnllVlu2eWXSdYBYJBXKqyaX3DOWeedee7Z55+BDlro
+oXGuZmaaU5qMEJiZbtrpp2F+5lakrzrTz6vDZDJYqo96c86vwYazTm+5TqvLPbFOO0lA4S27KKvV
+jttHrcl2OyKvw85b7xfHxtfuiPSUW/Ae2Ub4b5bgHlzxDOn2+3CH8N5b8sn++8b48YYCX1xzDAu/
+PKmlBJJw89EpbNxyzxWKfPLVw64cdcDPJl32zl9XKbsmlMld9915793334EPXvjhidf96NoLE0GK
+5Zlv3vnnoY9e+umpr9765UU4/vXJrine++/BD7/4Jqau/aHszF+o7vS3Zl99x9lv230y53cI/frb
+x19L/fPXX/75/6e/+9VvfekroPkOWLsAxs9w/OMFAMBhDAlOkIIVtOAFMZhBDW6Qgx3UIBXgZ0AA
+UMGDJTThCVGYQgmC8HTpg4kKYRhDGZoQHA3k3wxxmEMdTlANDuSFGnYYRCGisIcOtMAQkZjEDPqQ
+iU104hOhGEUpTpGKVbT+4hWxmEUtbpGLXfTiF8EYRjGOkYxlNOMZ0ZhGNa6RjW104xvhGEc5zpGO
+dbTjHfGYRz3ukY999OMfARlIQQ6SkIU05CERyQsgrAIh1wBAEV9ykBBCDgCVtKSdBGJJTWZSWASx
+JDj8QMlMGgMhBHmgJitJSlRWMpOaBCVZGAKERybkHsaoJBCK6ElNGkSXEAwlQ2Cyynv08pUKUQMj
+z9fChFhlkispyBOM8YRETvMufqgDQiYBjgFt5lxPWEUdRgjJ1BnOlA/E2LnKyQsL1OGX72vlJKqC
+kDv5DUopqIM004mQJ1QSdAJRAwDg+QR74rOB6VxnOxmSz3TCRJoJMYb+OIF5jZRYJQUSzYpVLIBM
+am7ULOw8CBX6eQ9wSlJYq6CCRL/Z0HGu1JwLQae1UqDNhNIFAMa4JyxJKs9zysYq+TxINrOJEGP0
+kxdUsAAmWfoSmbrHWvk8lxqWelFlJsUgT6iDRTma1aOsApn7LOITwDEJbgorBYFxnE+RSs9OWsuR
+WKUlTe9BBSDglJM6dSlPe4jWolZ0lrzwKkLUIFG95rOtooynJN0aKNAZwwI1NCoI63BUq6gBsqCj
+LD/rapUnyDKypFRDCuz5GW9WchXSHG0dYFKRVcBTq611iTUFYgEqDKWsY2VqYg9bSrje9a3LtFw5
+C/LPUDqVpzsdCjL+0erIHoKUrr1Nal2ZqlvaKsQYrB0hSvNiz5ailhfXqINBqvtDZhrEKsYAwhOe
+AELH1IGRfpilMUq7WUbG9BqjrcgkSOla/a7Eo0BgbVyR2tKFrJO179tlWlGZ35f6dqZ1HShx5Zng
+VmoSdGidxGx5MYlrBvitqGyugBtcFVQe1a4P/GU5TWkV9uK2u+M1Z2ExSZCGFsSRDf0nLyIrEEdW
+RK/79fFCUlBaAEjUqry0LS3BAY4WohW4xpXuQWDM0rJQYRUQzimEAmOBIXNYICDUZCiVW8qjDpat
+W45ubr254SuXBcUxsco1VgGOOuBSnVQAhyzJm+d0OjLABeklK6H+xGYb/pjQ/owshv+cF3TWocBn
+frJanavUM5fln4Kap5Pr6tMoU1kgQ0UIEJZC5gFF1Z255XJZBA3LOwm0Dv8soouDa2ZTppPGhgMH
+iXc8EDUXmtcISXKj6XouKgA7xI/GNF0POunABLPE0GVwTn2aAgwLRMNkmYQ0NSzYgm4m2Yalqzfn
+KlTr8pguKTYIdwMFDldnGAChdLF50ateWoM3vqsgJX3t+5L89prf1cItrT0szGIfdpXM9DAxESpl
+2aj3yhM+8J3AEWpUxrTRjjyxl+lsp4lrvJLF9DbHb6nSlyAz1Zi0ih8w3sPQDhWgLoYmBFcRk3n7
+VZYAOO9clJL+giL6t98997lDrkHqutyjoSJFilVF/nOl/7zKg4k4zTVaFNAuneorOaIFSSwYIFqQ
+qFy7egWz3pJrdB0tQCRt0lsS9aqvne1td/vb4R53uc+d7nW3+93xnne9F1KJffd72PG3db8PXogQ
+rZ/gCZ94HEIRgop3PAwHOD+CkPDxlS8hC304ectvnoM1ZDzZ6xd59yXwdaRHnek9t8DQg35+omcf
+6i8H+8fJ/nCqbz3r3ed6EU619M3sPe9RZ/vc91MSrzD+8ZGffOUvn/nNd/7zoR/942NV9wiEEiaw
+n33tb5/73ff+98EffvGPX/vaOz2UpJ9+9a+f/dK/QfkcmJ3+YlSM/scCg1lxv3u9dKxj1KL93wqD
+DepvABGjMYSPfeSPABXQK+5PkvLP+gJjV/ivYfzP984vMARwATUwJAxw0PAnATcwBDuiATPpAZEH
+SiRwAgumAoHPcwJQBGFQIzrw8w5i/mIwBknwgUzw9/ZPBVfQ/FwQSjLwBkVwBp8oO7qgB5RwCZmw
+CZ3wCaEwCqVwCqmwCpeQEBww86DkAtqhC73wC8EwDMVwDMmwDM3wDNGwCy8ACGMPSpDBCuEwDuVw
+DquQEuCPf7IjH2SHdGYB/7QwMKSEdQTxa1zHgQrjBfZwdGjHifIwETenD7PQEKEkEAexEq+kEPnn
+EB1Rcxb+sYkacRMVBxJL8A8PghIt8RRnBBP1RxNBcXA6kYk+sRXlRhR1kBQFwhRRMRddRBXxhxVl
+MW5e0YeyIw9EoRiN8RiRMRmVcRmZsRmd8RmhsRgFAQIiMROhpACAIRu1cRu5sRu98RvBMRzFcRzJ
+MRsLgA1nD0o0IBrZsR3d8R2hUQPuUID6yQaJMARzsPpOMAJ9kGFY0BZ5YQjvUQONkBHrcSDx0Q8l
+kR/7kWD+cSEPQiARkgAL0hMPciIXMB938AJ7sCHb5SGtEQMxcgErEhb7aQOIIyVVciVZcjjgQACq
+cRWhBBSkoyZt8iZXAxTQ8XAK4w5a8ieBMiiDwzjoxyD+D6ISFCQplXIpyyMDYHIUIVIgKKA/qLIq
+rfIq+YMCdhIAoWQJmPIrwVIpGWAeP7Cf9PAX44YW9ZEHb1EX3ZJvttJufBEtsSYY488s6VJt1HIj
+gxAQ3/IvV4QXCQhKEDEv65IsV+8gztIw/WQvARIXAdMtBVPyCJMxr8Yu8bCfuqADOLMzPfMzQTM0
+RXM0SbM0TfM0OTMXqBEqQ/IgeGAcYDM2ZXM2abM2bfM2cTM3dXM3YZMH4tJtCoMTUHM4ibM4jfM0
+3QAxb68GR1IBNRIgU9AjtwUkZVIkm3MAS1IYL/I66e85o5IXolM6pYU6e1EIubP+svMumfM8K8Y7
+W1P+IMJTPH2FPAfTOtnTX9IzM9fzPvvFPauzI+UzWuiTMu2TP8klP+nxIIbgOBm0QR3UNLGQNf9T
+IKCBNy30QjE0Q3cTGn6zbAojFx40REW0QZlAOYdPMS2zMRXyPaMkMl10MkevMlOUTzAzQQViMWcU
+TRzzOyHTRVERRl9PRnMUTWq0LFF0SHV0RSe0RX30L4FU/wSiMJEUTIo0MW90SsNkR1m0R5u0Ep8U
+Ag9CSrF0bUwUAftpDyQgTdV0Tdm0Td30TeE0TuV0Tuk0TRFhNWvxO3VhBfi0T/30TwE1UAV1UAm1
+UA31UPlUFzqUawpDEer0USE1UiWVTuWxKC1yPw3+dFz8szwZMkDHc1Gp5gUz9UDLNH1AcFTFZVPr
+E0A9lVcGNEYLFFWPBUGNVCDsUVbtT0k5lVVb9TJeNUhjFVcTg1atlBfQdFKRNVmVNU7vNCZ3VSD2
+FFGldVqptVoNVVH3Z0kddVm5tVuRtVKrhQavdEyVREuXlEu7VBC/dB/DlFyVpEqXc1zd9UjM9VmZ
+NF1zcV3ZkhfEdF59BF5PVF791UfqdVXbEl/zFVSRZi4HlkcA1kyPtGF7pGAJtBQRNmGz1V77VWI7
+5GFNtZ8WdERFdmQh1FkNlhcqVENVdmVZFjc5NGNPFkRJdmZpljNL1FJNElOFNTFUtWLhs1cFVGH+
+aUZUd1YxiDVeeeFWizYsehZWeRVop0VoQYZol1ZZStV8TrVqwaJpgfVpoRYufhVKA1JrC/Bqaydr
+ybYruFZs4/NrwVZqM4Zq05YkjjZgeeEcYiFv9XZv+bZv/fZvATdwBXdwCTdvyQBP15IjBYIHkqBx
+HfdxITdyJXdyKbdyLfdyMbdxfRNmfZYX2KFwQTd0RXd0CTc5cVY7I5ZjPYRinfZgL9YS9VVx+VV1
+HdZsXycWaZdDWLdrXfd1BzF2+7Jdc7djbRd1cHd4M2R3xRZdfTdvgLcNA2NjkddCPBZr+wkpwzJ7
+tbcpnzJPWXQqsTJ8xXd880MrObd1ecErt3f+fdmXO8bydNVTIFBSKOm3fn/yJU22c2kSJ/m3f2tS
+J8+Xd3nBJ+23gA14KIvXc9B2bkVibcH0Z93WV+E2YuSWgTkwgS9ngS34IxyYXSE4gvsibB94bDdY
+LDD4cTS4hDmig/e1bUFYKkTYg0lYhRnjhA9nGOExh3V4h51xGvMXfbGxHIV4iIm4iMfxHANYbNeR
+h5m4iXMYXA/wY1N3ei9EeUeYeZsXbJ43HaOXijWkes8WL70YQ6xYhrE4i+dki3lSSMe4QsD4dsW4
+jSukjPf1jNE4TtSYK7tYjt3Yhv/meOWYjmXXju/4Eic4YRhWjt/YeDWTDh35kSGZCiPUe5f+lAvT
+8JIxOZM1+QzXMIlH+A0jOZRF2ZHtEH7101ZpOCRYWHZd+IWhIob3VSJTeSPqFmJReZY9YpWD94Nd
++S1gWXZlGZf/xZRtNGmFeQR19WRbuZeB4pd3eYaPOSNqWYpvOZozQpeh12uZuZkPOWAq+Jin2XoP
+Am9Jt5zN+ZwD93B/WIAZN3Pd+Z3hOZ4vd3P7x14/F53xOZ/L2XTD9QjjmI/dQZCfmZALuUryWC7Z
+mI8XWYH/mY8FOpt7t6Cdt5vzJZHbeKEzuKEDOZk7l6AlOhUpOl4seowxGoX7aRDaN6W11ynXWWwN
+gHxhOqbF1wBCGl0KQ31VOqeXcgf82G7+smN+DziohfoM8FdC7XV//TeplVo1ALieT5aAhzqq7Zco
++9koq9mapwCbuVibt9lgavpbvlmYwzmMdTaatXqNO7Wro8KZIRqarXms4bisj/ms9Zir1Xon2Hqr
+BSKYxbqn3QYJPSCwBXuwCbuwDfuwETuxFXuxGVuwEZcv25oZYGCyKbuyLfuyMTuzNXuzObuzPXuy
+meGrr6UwhKCxTfu0UTu1GRsQ/LpsALmNH1qv7/WjW0e0m2Wkvbikb1ijYZuj0dejaZtObHtYcJuK
+dfuPeXuMYxutLTa4a9uTZVh6Sbq1uea1ldu3BRi4nbtFDho4E1qRqZtqskMZ8qG8zfv+vNE7vdV7
+vdm7vd37veHbvCEpcZ/ZBVrgvvE7v/V7v/m7v/37vwE8wAX8vl1guHGlMCohvhV8wRm8weE7CsIb
+aeiboQFySS0cf6JYnFH3Oy/8ZP3HA4v1lFm0wzv3wxkvgjgvxTFowtOR8lT8xVfIAoPQxWFcxT3v
+iWo8xyUI8A5Px3Pc8OYH8Xw8xfeuyI38yJE8yZV8yZm8yZ38yaE8yqV8yqm8yq38yrE8y7V8y7m8
+y738y8E8zMV8zMm8zM38zNEcInLsINZJnyoMxO1HwoBLzsnJlRIuIVgNANhrxlYJyr4JglIA7UTM
+koDgGhKNxdP8y1Mg3ARikbbpw37+qOZujiHwpcmeC9k8iiFMSqKuAQgUDGO8axUkirKmLamugYSa
+LdFVHbAAoKH2qZ2sqdxsLb5QndIhxtIjzdRiqiGgJMz8xrz0KawGzr1SfdWN/dYEArZ07NZknbqK
+btAqfbdyvbmiLCH2/CSkCV/+CptK/cmSfd9AzNiNfdEFYhX6qbrazCF23dYV7tJPzXLWaYR0ruFM
+DdLpauxETsbFHcz/SZpa7SXmKt0XgtVOp88RjMK47N0hR9oAgOTo3N3JYsSKfd8T3aj8QKZEiqDs
+XcOMgcXoHaei/bmqvSHcq4e0va9+Sui+7ZsmnuLRfBIWCXSCaZfQKqamKuTDHdL+82ndXSrf82wh
+gL3T7uHpSq259N3lu9yR1I2lfGrbk6ndpz3Aug3oO96vTMrZGmnF2O2mih7cWh7pzdzOSu3PjCHR
+eP3WaarPyR7kPG7g/1zrTwmVKgLOwKmmkN3ACN3nwX7v9UcNiI3vAT/wBf+OUuCCgPwsvo6CeLwo
+MGjwHf/xIT/yJX/yKb/yLf/yMT/zNZ8zQKvzPf/zQT/0RX/0Sb/0Tf/0Ub+ifGjsUr/1Xf/1YR/2
+Pf71Yr/2bf/2YZ/xCm73eb/3ff/3gT/4hV/utXD4jf/4kT/5g78FL2fmlf/5oT/6f19cR9xeSdx9
+Mrz54dxur1+Au9+Ftn97wp/+mqvfw60fw8c/9dKfrAViAsrh/eE//uV//um//u3//vE///Uf/g18
+VQojEgBCmsCBBAsaPIgwocKFDBsKjMQr4j0A9yJavIgxo8aNHDt6/AjSYwoAFp+UO4kypcqVLFu6
+fAkzpkyUEyyODIkzp86dPDUCSGHRwayhRIsaPYo0qdKlTJs6HbrE4sSKPatavRpyqkVuqrp6/Qo2
+rNixZMuaPYu2KzepFLG6ffv2psQMT+vavYvXqQObJOH6/Vv1Z1B3hAsbPow4seLFjBs7fkz4BVuq
+gCtbzqg1IrFenDt7/gw6tOjRpEubPs2Z2OTLrFnL5XVvFuTZtGvbfrw34uv+1rwtC47o4Lbw4cQb
+S5bYtrdyt5l5bUYNPbr06aZVI6e8PDvP17GLe/8+PDev3drL8/zNKzj49eyNrzYPH2Tz59Tr278/
+2jrs5PH7b+QuW3sCDkiYeOT5h2BG6O1gSoMOPghhhBJOSGGFFl6IYYMIvJdgh83Vs0uIIo5IYokm
+nohiiiquyGKI9XDYIYKvEZJhjTbeiGOGO/AVY48WoacegUKCd9x+2PkI33z4Lcmkffo1hyR8AA5J
+pXcG9hVlgkBWyaVwRUKZpXZKNklmmaU9yV+Y2k3ZZZuzXakmglu6Sad718VZ3phm7slnL2geiSdv
+bNZJKGJwBgrfnIUuGhn+jIi2pmefkjL556PKDcpooYdamh16wuQIaqiiWsiJo5xW1pwsi6zKaquu
+vgprrLLOSmuttq4qi6mn/vUaBAiMCmywoQrD467KKZopoV+maexfkU4KbX2VNlsZpsnSuSm1lyF7
+rZvLAqotVs9GSy5004YLl7Xddpktun9xuy6X37oL17jl3kvaufRepW68VLa7r1vonZJXwQYfzFRU
+dwZ8VXNxyAFxxBJPTHHFFl+MccYabwxxHLoynNNrAtCFcMkmF3xKsSALDFRElrwBc8wyz0xzzTbf
+jHPOOu8MsyYfrywffxSkRXTRRh+NFgU/Ay0SlgLQw3PUUk9N9c6WqMz+dGAtp+evt0tnvZG9+I79
+mb5gg9Rv1wQCfDZO8KpN4LxtZ8UffWTfXfbXc6vcHdxVsr23R2/73Z7cgXckNt5jm314RmkTvh7g
+jfu0dRFmXI555ppvznnnnn8OeuiiX86O3oE390gAqq/Oeuuuvw577LLPTnvtqj9i+t69cjF6777/
+DrzoRWA9eUeDQ05k7nMnrvi9jBevG5Z9Iz+g5ND/uHWQ1BeufNvMN0/u89A/vj1x1l/Py/HlD2c4
++gs7B37z4hdP/vq3nX+9+vbb1r7738c/qflNrn77ow3+oIceEsxhgQxsoAMfCMEISnCCFKygBRdo
+j+6drTlYmIYHPwj+whCKcIQkLKEJT4jCFHoQCxoE24wuCMMYynCGFyQB8dx3Ef0VcDb9Q9//ANgn
+ATaOgDvEzQ1xGBEdFtExPbzeD4G4JyEejohLZMwBi6fEKi6midB7IhTLJMXAUVGLibni5LJIRsRw
+sXhe/GKTwqg76QUojbUxY+PQA4Vg6HGPfOyjH/8IyEAKcpCELKQeR9DCrKXqCIxspCMfCclISnKS
+lKykJS/JyFy9D4e9UoIhPwnKUIqykFA4IhLRSMfCrHFybXTjkuA4tzGmskCmxCEqZ7nKxrXSlfeB
+ZdtkOUs7Hu6Wqczl4XbJS2klkmnATKUwAzewk0lzmk1RmJGQ+L7+h3Fsm9zspjc15rFNuk9kJKOm
+Oc85i5RFD5sK2trLqgbPeMoTZz4Tpw+FhrR86nOfZlGaPa8nMqjNc6AEhefV1snOHGZvlrQx5unq
+lszwLRNozaTjM/dGTDo6dG/IjKh0fHm2iqbxonPLaBo3ujyIejSAE12ZSMlI0rahJxEmqKlNb4rT
+nOp0pzztqU9/CtSaluqfXeQPC8KB1KQqdalMbapTnwrVqEp1qkhlQUtB1qs+BHWrXO2qV4GaiFq6
+Dz2eKIVZz4rWtKp1rWxtq1vfCte4mvUSV2VYc6owhbzqda987atf/wrYwAp2sITNaxXqGjDu4EOu
+jG2sYx8bV0/+iBV9ZIWsZS+L2bfSlahs5A9eCwva0Ip2tIM9LGcHKL3FZna1rLWsZBGa0CRurayt
+ra1tNYvYfd2VtLztrW8Fa9prYlOxty2ucc/62vFgKbaVPa5za7tZ4SJxt7+trnV5G1wwoY+4z+1u
+ZpN7IGyihxEgKK95z4ve9Kp3vextr3vfC9/yLiO39GqOOtKB3/zqd7/87a9//wvgAAt4wPhVB33d
+NaP4KnjBDG5wfBkx2fzN1rsUtmx0tetEz153wxwu7YHRxd0Ki1iu4F1uQps74hS79cLMuidlPtvh
+GMt4CtltMUBTq+Ics7XEscWeRWir4yDP9cPhou6Mj3zdGoP+a3w4FrKQedzj9E3YyUFm8ZI7+2Ik
+a7m6SmZniKmsYij3GD0JmIeZz4zmNKt5zWxus5vfDOc4mxkHRNZWc+yAhzzrec987rOf/wzoQAt6
+0ITOsx3qTK1epULOjG60ox8d5wREGIFTBrOKrcxOI29506Lt8nCbbOkUi5m5lQ61iDGNTU1zetUe
+Pu0QQW3qCo/6xKWOtXdRPV0Ns3rXwEV0s75s6+7Omp1khrSxj43sN9PZ1cfkTzzYAO1oS3va1K62
+ta+N7Wxre9vQjoevjaXoZIt73MeWNGxJ/eNgVxjXOFQ1r9+tV08jEdjqPu6wxVvrehuX3f7TNbz/
+bdhv74r+3vq+7b1Pme+C25bfLrYIjAEOb3lzEtYKN/iksZjwirOW4RnOMsT/LfFxUlzjrT24LbdG
+XgerfOUsb+98mf1QyqAjDTSvuc1vjvOc63znPO+5z39Oc3QI/FQJbrnRj75yCJ+b1ukm+W05XlSP
+f/zdId/uyJ3+3YufMeNYt/DQOeXuqXO66jeWimq7zlqTj5XraHcs1LHscLFT/euWInjbH6t2yrL9
+7nJ9Oyv9LfdVk53JZud71pdO7K1xgACMb7zjHw/5yEt+8pSvvOUvz/ha0P1RzXkHOj4P+tCLfvSk
+L73pT4/61Kv+8+/YPKJ69QfMy372tK/95Tmg9Tvu3fD+uIU5RwEf+E0Pnn5X5z2Jcz/M3RufrX7X
+JfCDr+Xho7bwy8c78qGp/OqntfnNljr0tyz9V1Nf+4zNu4QtogB8qH/97G+/+98P//jLf/70r7/6
+C+H6QDXHG2Dov///D4ABKIADSIAFaIAHiID95w35hyciwwr2B4ERKIETWH8KcH0YtTWOkAUbyIEd
+6IEfCIIhKIIjSIIlaIIb6AMMGCfNIQ5n4IIvCIMxKIMzSIM1aIM3iIM56ILioIJqIjLdcIJBKIRD
+SIQm6AgXWFLZR35D5nsp5X3fh2ThN0XFt4RrZX6U1nRVCFfcF3NxB4Xg14NhYndamFZXiHFZSIZt
+xYX+v/eEXyhjUihGVJiGZmWGW2cRw9AGeaiHe8iHfeiHfwiIgSiIg0iIeUgHYZglzQEPZcCIjeiI
+jwiJkSiJk0iJlWiJl8iI8ICIUdIrD1CInwiKoSiKhDgMSChTSriEa+iEXuiGRwaHcTR+c2iFpng2
+KCaLaqWK3vN8rchhrxhLcjiHdah7aHiLaJWLG7SLvJhkm4gkYyiLwph8xFiMTChd7ZaMyshlzOgj
+zhiMtAg26PEBkCCO40iO5WiO54iO6aiO68iO7SiOGKCNPdIcRlAM9WiP94iP+aiP+8iP/eiP/wiQ
+9WgE8Rgjr6EG7oiQCamQC+mOH+CNWYMe1nhlfzeTkc5XkVOYUOGFcJlmY1HHkRcZhxlpYon3kSWZ
+ah1JfCIZZeljDCngki8JkzEpkzNJkzVpkzeJkzlpk6uAkn+3CjoJlEEplENJlC7JkyAZR0WplEvJ
+lEFpDCMpXgAglVNJlVVplVeJlVmplVvJlV3ZlUiZUl4plmNJlmVpllUJlr90lmvJlm1plisJl3Ep
+l3O5NwEBADs=
diff --git a/Documentation/DocBook/media/fieldseq_tb.gif.b64 b/Documentation/DocBook/media/fieldseq_tb.gif.b64
new file mode 100644
index 000000000..7b4c1766b
--- /dev/null
+++ b/Documentation/DocBook/media/fieldseq_tb.gif.b64
@@ -0,0 +1,445 @@
+R0lGODlhdQKaAucAAAAAAElJDK+vr1YMDBUVZC8kDQAAVkYQEBcHOwYGSCEJHSAgaKOjoys8DDMz
+CgAYGp+fn19fFJmZmQoKO10wMA0VIAAAcDsICCsMDAcMT1MMD2ZmAAcSO29ISFUHByIAGoiIAA4H
+T0pKDJaFhXd3d0EgABoaVGYyAC4AKXd3ODs7BwAAN1MAKQAAYlZGB2JlDBwcWWBtYCA3ABAQTQAA
+ZQ0VQD4AAFVVVUhjSCQMJQAAfBMHMkQgIEtLSzAyDD5VPmZmDEZRB2FhEWZiDFo2ETkdCwAAVEUt
+Gu7u7js7Ozc3N3d3WACPADU1NTMzMyBRIDgAAEJCEHEAAEwNDZeXAABpAEQFBSMjIxgNQDooCBA9
+EEhIbwBVAAw/DAwMPgBNAENDCgc9B8zMzABDAD4MDAwOKjwKCkQWKUscHAAAcUtLFRMTEwohCoqK
+AA0NTBEREQgfCBUqIgApADIAAA4ULzg+DEEfH3wAAAcHSaqqqlkcHDgMDKSkpFQAABUVRjEwCGZm
+B00QEDAwXSUMJGUAAJaWlhQUUnx8jVQaGgcGLggSGy8GBmw4OGNAL4qKioiIiGIAAEsHB3JYWHd3
+AAAAPlctLYQyAGggIBgAGkIVFQwcJRgYSA8MU9EAAAcHVQAALRoaYbu7AEY1H2ZmZlxdEHAAAD82
+DlhqWExGHgwOUzMzDAAAmgA5KTEHB2ZmPlpaB///////ACISRExUDTJPJUQrDAwMVhISSEhISHd3
+IC4xCjhcOA4ORERERBkVXElJAG5gYFhYcnt1ZkgGBlYAAAUFMTg4ODo3BTJrAFESEmZmMF5jBwoG
+Q1paDUkKChxGHN3d3RwYRGZmHCgoKFMAACYmJi4YLhQ+FCIiIhU0FT0AKR4eHmVeBw04DRAsEAwu
+DAc2BwoqCgAAPFdMDQAA0WAqKgwiDEgZGRkQRAckBxsTPDEwDBAQEDwAAEJGDAAAU0FBQEJCDLu7
+u2IYGJoAABgYRjg4bAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACH5BAEAALAALAAAAAB1ApoC
+AAj+AGEJHEiwoMGDCBMqXMiwocOHECNKnEixosWLGDNq3Mixo8ePIEOKHEmypMmTKFOqXMmypcuX
+MGOelAegpk0AJFrSrLhTpYQ3AHoeDFpQqMCfQQHIXEh0olGBYkZtpGkTW56B0EYBfTMKCUEJEqja
+7DpQDIAbBJsOJHF1qdu3cOOqVKtTKcWnEOnmlQALWk6Eep8C4Ou3YWC7JUlAg9VUL0K8vcRMRUwC
+gFdoXBdD6+WE4A0kQqE5kSqwsuWBepFg8yq3tevXsPPKg4n3YW2HjnHPZrp7oODehoHDui2ysfDH
+iKFi42iU6A20A5G84SsQrdE8iKdPR3181KPY4MP+NySBC4L4lHRJAI0MSwwJ0++B5nSvHqdAJPVv
+LHb/U54YJzX99RR+QOnX3ntKidELANiMYlce2DB4FX9vHMdYTfIQeNZ+8dlHkFg9QSihQEQpyKCD
+H9q0E4X+AfhGTir6ZhMskWGTU33Y8EWffFC5OB+CONX3V1BixVgfe7DgWFlB7621nmRMAjBdLwk1
+Bw0SAEBZ1mKw5PHddQ9aNgp0jB0nQWfnpanmDTVNU56aI6lFghOLkbAcTVJh9xl28uCJBDQ2Jkkn
+EqN0Js8bvWTYy3dmzfbUnFcWOhxXsznxmWhKHeooANAcmihrBhEFKaGGAtAVoH9xN1ymFa66GFH+
+lv4JYEFE7eRphi/21ephJDqRR6fY1MlqnlnCkitmfZra5VlI8Fnms89CmuRy6jkKVEGLlkbntEwG
+CwuKyK2VE3HfLhaapQKNuSxrjpkF50Sg9SnvvPTWa++9+Oar77789lsvPDcBsE0Tqb67kVpWDVSh
+UMbRxKUEy23XV5YOFwRNrckZS93FYlRs1sNKjZLqDSRUnBBREnNsMsS0bvrtyDD6x+lRGWPsqkCZ
+pSVcT4MZtCRN7M6Ws0AM70bU0UbPljDO8uQ4kAQ177a0scA5TWKVAQ8mVJECUbnqTZ/h3NYb30Hb
+ssEQFZn12my37fbbcMct99xxo62RWlknq2r+mVsrhbfeRzmBDZsu68xdT33TGHDJGYc629+JG77T
+gjfFHLmqtiKWFTZv3CAZr9CKcQOiC66q6uadd4yYcc+ynrdav5EYMHAIYx2dVljunGnZtWF3E5q5
+5Wb3QfH6a/zxyCev/LwAB3xFEoUMnxHCuu9N5Myw/KSxQCrbZRaUNhOUMsV2XQzyy0U1fvb4qj/8
+xtk7iey4+TTDn2njBJpNtF1N5Wp60QQhEABb97ikwQIbwrFa9qImEAQmRGKMsZ2MaESmdCmFBFCq
+zShIs6xXWUh40gthStgEAHZYSksitIictvWIN4BmdUnzE6oEFalSQaVYjwDAVR41KEkJpRf+l5qV
+piYFuOEUjESzGZUPTfWnQEmOVdYqIAVlxcDMKewvdtKf6aBFtr4s6IWq8p+dBog0xrRFVCx0YbWG
+EyFslU1aLfRKyaa1GHDNMUW3QwtmDOSXa3ltiwF8Q1tw9p12qS+FiCxJoTyXyOkBpz5OkAwZk1Uf
+smgobEI5UmUeMaD8eEUoSFjQlI4SIadlsnEAEsMlP4kToJDliaRkEHWIEkopSeCPSPwfYvIAoLNI
+JpUEwcz7mgIxBqmnQjDsUi89N8lniRIWwIQklKSJyyRxUJokygmhbMm/I6otbH3RijFp5J4JFkR7
+BJFU3rJXwUa6UySgemddxHPEhZBLnij+AVRDVEm097XmBtTBp0AHOrx7ukUeZXOIQQk6klFYqCBj
+QgISbsBBuOiToRjNaHgWCieOarQjhVnIRGsCzriE5aMoTalKV8rSlrr0pTCNqUxnStOa2vSmOM2p
+TnfK05769KdADapQh0rUohr1qEhNqlKXipARSOKpUI2qVKdK1apa9apYzapWocolgxwiGGANq1jH
+StaymvWsaE2rWtca1oTE4BRwjatc50rXutr1rnjNq173CtcYJEQYSwisYAdL2MIa9rCITaxiF8vY
+wAojIVuNrGQnS9mtAgMh0GCrZjfL2c6y9RAOYUQnRkva0pr2tKhNrWpXy9rWuna09Hj+6ALIQdva
+2va2uM2tbnfL29769re0bUFCnsCE4hr3uMhNrnKXy9zmOve50C3uExKSi1hY97rYza52t8vd7nr3
+u+ANr3VzkRBIvPa86E2vel17icesArjwja985/vbBYR2vfjNr35ZG1uEzJa+AA6wgHkrXIQQN7oI
+TrCCF/zc6SKkuuKNsIQnTGHwkhch5t2vhjeM3/YeRB7vHbCIRwxg+zZEtBxOsYr5K1sSu/jFvi3w
+QQ7M4Brb+MbMdfBBIFzhHvv4x9298EEyvOIiG7kTHjYIiGHM5CbX1sQMQfGRp8zh/h7kv07OMoll
+bBAa4/jLYF6wjg3CYyCb+cwUFrL+QYhM5TbnN8lFCbGW5yxgKC/EDlPIs573zOc++/nPgA60oAdN
+aD3zASF+cIOiF83oRjv60ZCOtKQnTelKK9oLCfFGNzbN6U57+tOgDrWoR03qUpt6095ISBSawepW
+u/rVsI61rGdN61rb+tasjkJCzFDoXvv618Am9B4QwgdLG/vYyE62pf1wXzc7e71WNgiW6Uxt+XK5
+IF4Os7a3vdwxF6TMaA63uLmr5oKw+dnobi2cPyTnaru7vs1Ot7xXG+2CTPvd+NbttQmSbW77m9ve
+Jgi4x03wcZebIOeet8JJu27ftDvfELetnRUi5YVbvBP1Jsi9Ix7xfQ+k3/8O+Zf+Az6QgRf85GY+
++EASfnF5N5xoD+c4xCeekIq3fN4ZH8jGZY5vjwsE5CIPOoNJLhCTo/zoFVa5QFh+82e/fDgx5/m7
+aY4QHgzg6ljPuta3zvWue/3rYA+72K/uDAYgpB5eSLva1872trv97XCPu9znTve0JyIh5uiC3vfO
+9777/e+AD7zgB0/4wuvdHAlRBRAWz/jGO/7xkI+85CdP+cpbfvGqSEgrxs75znv+82LHAEIYMIG6
+m/70qE893esR76bLO+cC2bnUq+1zWABd6LiPLtFhYXSk+168SocF013f5qcvefYzbz3xnw17WMge
++XOu/e1zT/0cU/f32E9zeZf+73L3Qj/fVD+Izbnf5uY///tOln7116/762f//eEN/vDJX2TjRx39
+WQ6/QcpBj/77//8AGIACOIAEWIAGeIAI2H+lIAAIYQJp8IAQGIESOIEUWIEWeIEYmIEa+ICUkBDZ
+8AUgGIIiOIIkWIImeIIomIIquIIgmA0JoQZtEIMyOIM0WIM2eIM4mIM6uIM8GINqkBB9kIBCOIRE
+WIQImAwIIQDvsIFM2IRO+IQaaAIOUQlSUIVWeIVYmIVauIVc2IVe+IVgWIXUwIAHAQqrcIZomIZq
+uIZs2IZu+IZwGIdyeIZGkBDXUAV4mId6uId82Id++IeAGIiCOIh4eA0JEQH+oZCIiriIjNiIjviI
+kBiJkjiJlJiIEZAQNhCGmriJnNiJYJgJSWgBcziKpFiKpiiHoKB89Ddl5od/7qZ+7BeL1vdg8FeL
+3iV/q+hs9ueK1aZ/BTF+uVhkrciLdAaLsniMx7V7vWeL8IeLwUhlu0iMc+aLBAGMz5hiwyiNWWaM
+yIiMysiM4IhdzniNRhaN2uhk1DgQaBAJ7NiO7viO8BiP8jiP9FiP9niP7DgMZncQt+AJ/viPABmQ
+AjmQBFmQBnmQCJmQ/lgMCREO4PCQEBmREjmRFFmRFnmRGJmRGvmQ4ZAQYPAKIBmSIjmSJFmSJnmS
+KJmSKrmSIAkGCWEF+Bj+kzI5kzR5j8N2EAyQAAq5kzzZkz6ZkLegiuRYZS12juk3XN2YlN8YjuE4
+jkOpYuZolDCWjgJhjU+pX9kolS/GjUkZi0vJlMzolFe5YVGplSRGlbBglWMJbUVpli7GlV25fl8J
+lrUolmupX2XplgOGlmp5l+iVlXo5YHAZl9Q3l3T5fnbpl+uVl4FZYg6RCTUZmZI5mfZ4aAcxAz+Z
+mZq5mQeZAAnxDWEQmqI5mqRZmqZ5mqiZmqq5mqwZmt+QECIACLI5m7RZm7Z5m7iZm7q5m7zZm7Ip
+AglxAZQ5nMQpmTdpEHzAmcq5nJs5A0KpmOkFmI0JYINJmLhnmIeJfYn+CZ3oxZjTKV98yZ1Y2Zbf
+SZ1IaZ3sh53Z6XvbKZ7s5X3lGWDh6Z5s6V/xGWDViZ4ip57reXTtSZ+r5Z336VvzCaB/SZ4DCl/5
+qZ//xp/9eXL/aaCoJaAJultoeQ6QkKEauqEc2qEe+qEgGqIiOqIkmqF9sI8GwQvisKIs2qIu+qIw
+GqMyOqM0WqM2uqKfkBDpMAY82qM++qNAGqRCOqREWqRGeqQ8mg4JoQKT0KRO+qRQGqVSOqVUWqVW
+eqVY2qQqkBBQUKJe+qVgGqYk2gqjtwI3eqZomqZqaqO88JwSmlrSWaG9taAMCnDu96C/F6FvWloU
+Kqe4VaB7ymL26af+wEWndaptDoqnBrd9gapu8EmoBOqmjUpacQqpuGWohwpmiaqo4aanjdqnlkoO
+FyqmpFqqpiqiJ4oQKrqmrNqqrjqjOYoQcCAHtFqrtnqruJqrurqrvNqrvvqrtAoHCfEHv1Csxnqs
+yJqsyrqszNqszvqs0Fqsf8Clp1qt1lqqZIqTZvqq3NqtrNqmJzapgnploTqn55mpQbepnHpmnhqo
+oGqpgCqup1Wp5Rpc54quIaeu6wpk7bqn7wqp8SqvpUWv9Yqp+Gpj+rqvPtavb/qvhBqwAgtbCFqv
+tmWwBzt0d6qwBMewEuqwfoqWHXAJIjuyJFuyJnuyKJuyKruyLNv+siOLQgRhDwswszRbszZ7szib
+szq7szzbsz47szCQELTwBERbtEZ7tEibtEq7tEzbtE77tERLCwmhDLlQtVZ7tVibtVq7tVzbtV77
+tWBbtcqQEHrgsmZ7tmibti1LAQghBj/7tnAbt3L7s/bAVHZ7t3ibt3q7t3zbt377t4AbuII7uIRb
+uIZ7uIibuIq7uIzbuI77uJAbuZI7uZRbuZb7UhCUPf50HwJySIYRMFTCM6ALSDJSFYPkM2szG4Sy
+Fa90ITZRUpcbu7KLESRQQRRFEEtCulBBQrB7NumTS78bvJp7uh+WHHukGaPDJU0hGu00u877vPt0
+GrCAJYOUB9f+orsH1BVI4ATVZDjB6xh4YRRZpBBGAUQF8RzAuyzQu77sqxBWY71YkSN2ARmsQRzg
+yz/HEb7Giz3hch/FQhBm4RVqgR3tW8AGnCTQIT9dw0nz67ncAhj5i79K1jixI0HYS0Dcgy4HvMHP
+G8DGMk3W0cAIIUAnEzD7IzuVg70V3L8XfDQ30QvxxMEybLlOIAEQ0k+sdMIG0UK90FVDEcHpa054
+hBX8W7zR8b9lcRogNMNMLLmPQFFYlDd4MUYMcb9BrMNDXBrLQb7JYb6eAR1L3MRi3LgXgw0wu0UD
+gsQKYcVapMKI8RPEO8HB1DnI+wbK+1BjnMeMKzhG/DU2kSj+WXMyQOy6N+HHNRG6NzE1Fnwf4jQ0
+bazHkBzJkjzJlFzJlnzJNEUCvbDJnIwkInFLnbzJAYXJpFzKpnzKqJzKqrzKrNzKrvzKsBzLsjzL
+tFzLtnzLGhHKurzLvNzLvvzLwBzMwuzLo8xSCjLMyJzMyrzMy3zGKnXMzBzN0jzNyXxTDELN2JzN
+2uwhLkUT3KvN4BzOyAwgePxR3izO6JzOvNxGNsXNLuXOLEUT5axR8gxT9fxSuVtT8MxS+6xS99zN
+YZxR/9xS+UxT/axSB41SAx3PAY1RC71SBT1T3CwMjFDRFn3RGJ3RGr3RHN3RHv3RIG3RXZXQ5qwW
+OLALKJ3+0iq90izd0i790jAd0zI90yiNAzIyzwKtFiG90zzd0z4d0h2wFg7cUtzcl5MabSRNz2ox
+fRd7Yzr20P6sFvMnrkkW0TJV1BGLWkhdT1HdG0zd1DX21A3NUP881ZNa1UPNz6li1I261fa81GDt
+b2KN0w4t1Vl9WmhtzWt916bl1i/1z18d1wo212/dG2b9qUKt1+pYnIzd2Paoj6jB1Sn1z2zQmpZ9
+2Zid2azJBjdd2AMBk44d2qLNjsdp1TGF1XxNqb2R1Dnt1YK9bYT913ad2qOV1+2817SNcast2QoN
+168dZrEN0Iad20iW2Lc9EGwdqH4t3B/328Dd2bI93Ln+bdv6nCpU6InYnd3a3YVjGNmeLRAfyILi
+Pd7kXd4q6IL7Q9dkrRaZuN3u/d7YDYqlkdYrxc38Z4T4nd/6XYAL6N3RPRBaQIgCPuAEXuCDqAXQ
+zdwCEYT73eAOjt9ION+KXZXEvdwtBdjOrakJfuGzTdvUbdC4TdsWztCuneE4FtwcLt0ebtzVjdwV
+vtvfbXsmfuIbTuIrR9wfLtGpgqHX2uM+jqooytp13RuzCqxGfuRInuS+KqzpHeNd+uNQHuUZmq1J
+Qt8IHeKpPeIrheEzjrA1vuUdnto5ftVYztda3tXN3eVh/eVovnQ4zuIg7uK5feaT7dtqLmZsXucq
+Lub+cK7jci7iMP7fP3fnGNvkgi58by7hxy0QeBZsjv7okC5olskYvF3SvaFpp5bpmr7pnF5qqWbo
+Cg4LvBbppF7qjl7aVp5SqA3o/h3qgU3oyIXiNu7m093nZP7nWR7org7rg53nvb3nfD3mp13md03n
+v57mvN5+oJ7iN17rit7iFD7nus7sg57syr4q6k1QZZ3oVT7hsMADZBDu4j7u5F7u5n7u6J7u6r7u
+7B7uZhDkla7UvZF3hlfv9n7v+E54iLfssw4Li9DuAB/wAj/w7C56zx7n0c7q2RTjr27tsg7mwH7X
+wg5Tq57rrU7tMm7t0PXwbY7ozt7ti56WL37x/d7+8MnO8Xre7Ct+8H4uENcN3zAf89xNhpQe4+Ft
+3jif8zp/guiN7THe3jIf9EIvBfIN8tAOC/f94Eq/9PxN80K+3r1xhwY+9VRf9YBoiPwO8QPB4Ezf
+9V5PDxFu9Agv8tJO8lpf7RrvXCh/7LS+8mLf8mSv8DV/6CbP62tv6SrP5yx/6wlv8QtP92nfYL6O
+922v92/P998Oeoq/+IwPdmVn9h1vDt8w+ZRf+ZZ/+Zif+Zq/+Zzf+Z4/+fvu84e+eY1f+qav+AZ/
++MOO62Y+7SUf+Go/+PKe98Fu66vf960P+SmP9rCvXHc/+4Vf+3t/+3Hv93O/673fbbLf2rQv8bb+
+T/HEntXGTvgZn/zJ9fvMH/zOP/zQPxCS8NPgH/7i/9Fa8vTarhY/QNPqv/7s3/4z/QPLP+QDQQHj
+X//2L/7P/1LmT1D7L1BQvfsAAUvgQIIFDR5EmFDhQoYNEcoDIM/hRIoVLV4USAIARo4dPXYEQOLj
+SJIlQ5ZEmbIiRIkqXb48yBLmzJkyad5MqRHnTpgnef4EKRLo0JURiR51aBPp0odGmT4tqBPqVIQA
+epHAmlXrVq5dvX4FG1bs2LA+qT6FOIrsWrZt3b7FOsrpWaZp4d7Fm3dtr410/QIAHFjwYMKFDR9G
+nFjxYsZC/SKFyFjyZMqVLQ9u+fho5MudPX/+pqxZ9GjSpU2fRp1a9WrWrV2/hh1b9mzatW3fxp1b
+927evX3/Bh5c+HDixY0fR55c+XLmzZ0/hx5d+nTq1a1fx55d+3buFN9IICjhDUEkQiFyJNwL1nlY
+6df3JcgZALY8Cgc7yQxt1Jv5JJC8J+yGwoSSwAnAnABPoPkKkiuz7h6EMEKKSLiBoBtGIUgq9ggS
+Q0AAbvivKgcFYm+ugTY8ET7x6hNRICT2+w+aN0aBBhYxnHCiIBMVdHAUJ1jMwwkM2wNADIL4G1FC
+JZdUUgwAQkQCABZhyYM/EuEbCJtRkEDCCfVaLKjEJFG8MkNsEporSol6qXAgJLBxrL0k55L+AJsQ
+XfyuPSccC3JHJv8EVDtsEqxyIGgG7YtMWHoJUdGB/BTTIEVRhAaAGg9K8w0uiyzokRwfnTMzJx4x
+iIQcAXjkTIFGecTPQF+FFToKV3Wsl0fYc3QgElTFdMy+IMWyTIJcldPFUeqbFEs/5wJWTmyMhOWN
+SpOMtVprj3PyvzegnRXXYN0k4Q1qFRxMWMJESlbHcd1LF1SDmPVVTqyoPHXca+/FtzcE81BVHk3f
+E7bTN3q5FMz4fo03TCwrLVjdgVoVI0poH/6Ux3dFJbXTesU4k9Vi8wU55N0euVAojTBTdFd73VXY
+4pZfzojXi4dds00X4aR5ZoHEuxMWJPL+NOpZaT8WuWijY6v0WUkTDVbihpolOsWlB1oRzcweGQ8W
+GWm0EUeHv171R4GCHNIoUz8l9mi11x7NCZmlBjCwXuQDzOqpiSyXbvXko88+wZyYWD/+4Ow5apep
+NhCAUd11MuO02YY8csknp7xyyy/HPPNYSeilc897mRgmCT73PEHNT0c9ddVXZ71111+HPXbZZ6e9
+dttvxz133XeHXB7ffwc+eOGHJ754449HPnnlC08OCeWfhz566acXnnnkoKE+e+23n7460L4HP/zA
+ViaObvHPR38x8oc7OX333y/M+1G4p7/++ltdXziIbrW/f/+hx59zNPI/AhaweHLxXpz+lKOU5TBw
+gY8jjlSaI0HpmKWBEByOA5OjQeRQcDkehI4FH5i/4HDwOCY0DgiTo0LnmEUQC4BhDGU4QxrW0IY3
+xGEOdbhDGNYgRSQEjlJQkQsiFtGIR0RiEpW4RCY20YlPJCIqfihA+MgDBjzEYha1uMUdCkJX33qO
+WRZADjKW0YxnRGMa1bhGNrbRjW8kYwum2Byl5CIWd8RjHvW4Rz720Y9/BGQgBXnHXMyRORKUxyrg
+uEhGNtKRb1zAFxM4kDE+0pKXxCQb5XglIP6mjoMEZShFOUpBFpKTVDyRIjO5SlZaMpIZAWMLHVPJ
+VtbSlpo05AUzY0dS9tKXv/yjKd/+00nfIFKVt0RmMsnxSliwsDlmMUEapDlNalbTmtfEZja1uU1u
+dlOalMjlCAeihjaU05znRGc61blOdrbTne+EZznVEM4VwkcA7/BmPvW5T3520wSSpI5ZQLEKghbU
+oAdFaEIVulCGNtShDyWoEeiJHKVEIBQXxWhGNbpRjnbUox8FaUhFetEITPQ4EhSABSC6Upa21KUP
+BQVApyNGZdbUlpscpnM+CUye9rSXwkRhcYxpU6KukpnOZA5Ni7rUR+I0qOWbCy99OlWqBtOkKazi
+MZm6VUjKtIKz5GpY3ehUDOovqlVFa1rxCNSyBmeoYoVrGo8ay2c65haewGte9br+V7721a9/BWxg
+BTtYvBbjqsVRChhesVjGNtaxj4VsZCU7WcpW1rKLBcNhIwgfBiSAsJ8FbWhFO9hbeDU6So1rauOo
+2QyeVa2vnSpbidmbt6o2tXOdpEBoaVu4knW2vNkpbIX7S9mikkRa5W1YcRtQsCa3t6w16y6HO92f
+QtetWXUuXJc70+Zml6u+1alrqTveQRZ3gtj1Lle3+9WBzGC074VvfAObAOsGcS4iAER+9btf/vbX
+v/8FcIAFPGAC51cE9f2NBPkgXwY3OL4zMG0Iu5vepYKXjuIlb4b9aN5DopfCS13vaSf8YZtamDnB
+1XCK9cjhD3qYxDYNsYQp+eL+oppYlwORqop1HAsWK6e2NFZmjMM4YiDf0sbiFEiOd5ziHtczlUWu
+qZBlORBeiMPKV8ZylrW8ZS532ctfBnOYrfwJBPtGKSqYRJrVvGY2t9nNb4ZznOU8ZzqnWQVlpi1n
+VyBmPvfZz38OMy8iPOQZQzmZR94ghpes4SZ30MWGrqWU61poSN8Uz8BV9KLJ2+iTPrrSRh30lHX7
+aUuf8sLS1TSTL72bH5M6k5JOqmOqDGha19rWXiazqU88lz/8wte/BnawhT1sYhfb2MdGdrJ9/YdV
+60aCDNjzraU9bVoLGpa5hcVuXY1JRFM006meLqex+uRtg/razKV0uS/Z7RP+fhvcwhW3UD2t7kbC
+ejmopXdTm50bFL873PvGTavzzUh7KwffA2cku43Tb3/DG+C3ETjCu3pu7rbXwRfHuGDpq+sbCyQe
+lwV5yEU+csvG4+G2UXDGVb5yvUKY4uwdtcQbqXDEurvhaY33Zskt80UWPDkH53kbaQ5VVN98uDln
+37yDrkafIwfoS1fj0FtbdKPDFunCiTjU0dj04zxd62eUenRxXPWjn7w2Wf96GbluHLNswhZvh3vc
+5T53utfd7nfHe971/nbDchzJsFAFEAQ/eMIX3vCHR3ziFb94xjde8KowO22ejYe9V97yl8e83jcR
+6knHPO1Rj/xsGE72ql7+/bo7//zWOR/rdKfejGEvoc1J31PTAwftn197cbyeetjbl+qzL33oZXP7
+tOeeOGJ8afKVv/yGStTvic6MNEY6fepX3/oilYbwY4NIlTLf+99PvvGHYxYxGND89tM+bJRyfvZz
+L/2vkaDz2j9/6U0MqfdWIPTD+9vdPDXpz7k/g8s/b+M/3fA/sTsvAKQr1ju1/XPABDSumYKfCZzA
+AuQ3CsRA97HAgMvADhSf6tCLEBTBEcyKhlEOaCDBFFRBtzDBDVrBF4TBr+CdGaTBGrTBG8TBHNTB
+HeTBHvTBHwTCIBTCISTCIjTCI0TCJFTCJeSOPKGarHER81jAhWCXXxn+jL35Fr6ZkoRwEpuJGvMJ
+CfNBl/tgEas4GAvhFHK5QryRGyZ0Q9OYlYG4kAxhGoPoEMAAEbt5Gag5w515gy08iJO5E2KZC0dB
+kXD5D8DIGIBxEcDIPxNpqzeUxKfIlkackiqpw4LQEi7xEj3sQz6Em5hZiO9wQsNhGUaEGYuxin9B
+xVRJFZ05nEmURc0YlLGBwkORgEwkCEYJGFiEG1DsRYZJiDx4klGwGULMDEMMFkuREydokw3Zk0oJ
+naiJxFm0xp+IQ7UYCFvxFoXYFU/8xYTpw1M0CGOkkieJxZzpxVBsJseRBydBlr6QRljYE7AxxWvE
+R6SoxG3JiArpxoP+KA9xQZNyYcRzQUV2vEeB+EOFdEeDMR+JoBsnNBtNYQ9PEQissUeDzMeNHIp9
+6Zd/+UeBIRj7SBhgPEhhPAhi/Jt0PEVlJIhRUBWnEBL2SJzA2EJI3ECO1EmOIBltbCbC8J1vURmG
+gBqTPMS3eckhOccaQUaE7MVIsZGQ2AiUpEelpMac3MmspIikmUZGJBOnIcqSFMdQrBqAXMiBeANS
+acp1fEqE+aKNMBWCwEhyrEattEuOcJuYYBrBmBvCAEdz8Uu9iZv5AMTwgMKwYcO6ackqJIhKqQ8T
+MRBYwIZF1BopoUu/vMvM1MzN5MzO9Ewe5BzSAZ2bGB3RNJ3PRM3+1FTN1WTN1nTN14TN2JTN2aTN
+2jwd0cTN3NTN3eTN3vTN3/zNNZgG4CTO4jTO4yROJVgDJUDO5nTO5wRO5WRO6KTO6qRO6bTO7NTO
+4pyGNdjO7wRP3hwWbAjP8jRPAGAH81TP7awGAKiG9YTP6mzP94zP+mzO+bTP/DROdrAK/fTP38QG
+ZRlApzMG53AEAHAEA0VQBU3Q5jCGKdS5RxnQrivQ5jjQBmWOC2VQ53hQBRyWCWW7Cs3QBbVQEh1R
+DF2ODo3A9nCMEZCEF4XRGJXRGaXRGrXRG8XRHNVRGC0YABDRQwiGIBXSISXSIjXSI0XSJFXSJWVS
+IR0IDRWIGDj+hSml0iq10ivF0izV0i3l0i710imNgSc1UWFYgjI10zNF0zRV0zVl0zZ10zeF0zIV
+BjHF0B210zvF0zzdUWAYCBXVmiYF1EAV1EFt0kPwKrNghE5Q1EVl1EZ11EeF1EiV1Eml1EpVVHrI
+DB9tPddbLYGAUlh4AiYQ1VEl1VI11VNF1VRV1VVl1VYV1Seg07EDPpyLVYGABEvF1VzV1V2t1Evo
+U6VLvfVCVF4l1mI11knF1EcRUW3jVHLAqU8NVVeV1mml1mplVVj1VBNVsln1KWH61Fs91nAVV2L1
+VYHw00Rq1jMSVsdI1HF113dF1kxd1nR9vVoFVWvF13zV11X+xVZY+NRt5Vae8lYTBVd4NdiD7YRy
+hYVzRa5mXdeBaFeEldhxTVYFmVd67VR/NdFo3deO9Vhr7dd/Ddjgy1YMLdiJRdliVViGxVgyeliB
+sIMpkNmZpdmatdmbxdmc1dmd5dmenVk+UNaB8AM3INqiNdqjRdqkVdqlZdqmddqnJVovsFdv6Iaq
+tdqrxdqs1dqt5dqu9dqvBduq9QZ7jYJmMNuzRdu0Vdu1Zdu2ddu3hdu4NdsosFcz8Nm7xdu81due
+3YNfHQg+gNrAFdzBJVyo9YNDZdeUVVxirdj2uFiMfdaN/djJpdxrtVeAHVlfGliTXdzOzdWVBVbc
+Q1yI9dz+0qXUxtVUz6PXyMVQjq3c14XdkNXWzI0tez1Z08VdRgVd1EvXl4WFiM3d4O0E1H3c1bVX
+14Xd5P1Y2cVQzKXdUdrcgbhd4TXd3T2ull2m0RUI4KVe0yXeTeVU1h0I5FXe8s1X5pXV5wWm6LXV
+7g1e612PhuVU3+WBAbDf+8Xf/NXf/eXf/vXf/wXgALZfZ2CAoBWIevCCBFbgBWbgBnbgB4bgCJbg
+CabgBE4EezWHLtDgDebgDvbgDwbhEBbhESbhEtZgc7DXwHO8FWbhFnZhxoO8kh2IVhDgGrbhG8bh
+AMYAvxUIBpiACgbiIBbiIabgetDe33Xf3P1e1U1X8RX+CPI13yieVvRNMvUVWNtNYtyFX3RtWd/l
+3ixW3CXONux14nuV4jOmViqGBee14vLCYjD23C2WX9fzYjj2XDFm1vA9XjTmY1dVYzZu40BiX1iY
+XjuWWDnGXt8tB3pg5EZ25EeG5EiW5Emm5Eq25Etm5FIQAAOGhWjqp08G5VDWJnCSYYHIhi9A5VRW
+5VVm5VZ25VeG5ViW5VlG5WywV3KKp1zW5V3m5Xeap1KGhT7A5GEm5mI25ktOBh6GhXsS5WZ2ZlD+
+p5czi0qQgmq25mvG5mzW5m3m5m725m8G52qmhk222IEYKPBD53RuPnu9hipw53eG53iW53mm53q2
+53v+xud8dudrsFeLur5/BuiA/qiSAmYbCOeDRuiEVmhwzgRlTil1huiILqiYkubENeQwllfwdb0y
+huI+9uhR/eNA1tw3vmiUReQuPuIvLmmDxWMy3uOPhulSDWmRJqVBLuSVfteTxtg6xmmJbemW5eiY
+FupXvVyarmmS7mmD1Wl6pV8ycOqnhuqoluqppuqqtuqrxuqsdmozKOByFgi3y7ywFuuxvru+01gM
+NYdvUOu1Zuu2duu3huu4luu5puu6VmsUBuZ1CIC95uu+9uu/BuzAFuzBJuzCNuy9Xgd7XQStZuzG
+duzHzuodNlfOojyytuzLDuvNq2jSTeqD/WnIfen+oYbpmTbqULLpzj7Ype7dlEZtls5oJm7WoBbt
+0S7q0jZtpG5tcVVth2Xt3HbXzzZeYO7o2Y5i0rbtUsJt3zbW3Z7f3lbucAXuJg5t4uZj4z5uQDrt
+5w5X5qbjI86ESADv8Bbv8Sbv8jbv80bv9Fbv9Q5voPVqWHAvlpPvi9u4sx6IbwiD/Nbv/ebv/vbv
+/wbwABfwASfw/P4Ge8WvAlPwBWfwBh+wAwPmC2DvCafwCrfw9e7byf7b+ebwBnO5n/xQztZuY43u
+2J5u6j5j677uDUvuEddV7g5W53ZxXi1xPRZuFO9jFV9xPsruGedVGBfdzd5eH2fc1x5joD5xHDf+
+Xx3f8RVrcSKnVCAvPhmHckut8Y1OciVXXiZv8rV68iqPVCn/Ot89B0gw8zNH8zRX8zVn8zZ38zeH
+8zg38z7oaselMmrD8zwHs1yzb4FIhzEA9EAX9EEn9EI39ENH9ERX9EUH9HSwVzSrs0iX9Emn9Dm7
+M2CGAjnX9E3n9E6P81ZQZmjT81En9SuzNhCXUBEH80q9ct7Lci2P3dru8j7q8VWPcmXm4p2mcluP
+1Fb/PNmG9SWX9Vnfo1rn9TDH9TmOcSFH4mOPV07OYyy/8WAXdmAG5CY3dmd3VDHXOkU+5m8H93Cv
+ZE3mZALQgXNH93RX93Vn93Z393eH93iX93P+NwB71QIuwPd81/d95/d+9/d/B/iAF/iBx3ctsFch
+oIKEV/iFZ/iGd/iHh/iIl/iJp/iEFwJ7FWZx1/iN//Zk1nCBEAAamPeRJ/mSN3l5J4AjpuaFZvmW
+d/luHmdOPmeJpnnwc74+h4V21ued5/me93l85mdg9meBJvqivz6CxnmDfvmlZ3qWb+iPX+buq/mp
+Xz6KRnUFsWhth1RfTztgp/bk5fJZz3atX1RuhzqeJntH5fqv8/qvf92w7/KxT3uzXzq0T3tGXXut
+a3u3p1y4x/Yv13q6Dzrf7YBLMPzDR/zEV/zFZ/zGd/zHh/zIP/yJSV1YsAcuwvzM1/wcggH+e6WF
+JwD90Bf90Sf90jf900f91Ff91Qd9WrBXZYCi2Jf92af9J1IGe9UDydf93ef93o98ClBmMdj84Sd+
+zbeHI04qEV2OT11+E21+FFUOP+2wEG8h5VcO5r9+589+6E8O6W8x6n8m608O7B9/7S9/7kcO7/cx
+ZbmKGHT/F+yP95f/EewBAOiB+cd/vaj/+8///n+L/QcIEgIHEixo8CDChAoXMmyoEBsAhxInUqxo
+cWAvALA2wgLg8SPIkCJHkixp8iTKlCpXsmzp8iXMmDJn0qxp8ybOnDp38uzpUyfHoEKHEi1q9CjS
+pEqXMm3q9CnUqFKnUq1q9SrWrFq3cu3+6vUr2LBix5Ita/Ys2rRq17Jt6/Yt3Lhy59Kta/cu3rx6
+9/Lt6/cv4MCCBxMubPgw4sSKFzNu7Pgx5MiSJ1OubPky5syaN3Pu7Pkz6NCiR5Mubfo06tSqV7Nu
+7fo17NiyyyKRAKsXUtxGe/Ui4URMbjG6gw6/Pfs48uSbSeDGDc1Jr99Ciw9trpQ6x+LYlXPv7t3v
+I9t5SMAab11CHljQqPfmDc34bWjYSMyH5lu6QNuw6NuHJcGJE7bhlgd02Gy03XcJKrigWtDcAMso
+0CBhXXHjTScPLPKMAh+FsIjx4HO3AZedcU4gAQs2E8JiIhJvMPgijDGWZd0N71nn4Q3+5E1Hom4d
+StAbCRqxF9989MlzI4IyKrkkk009MoqAvL3xY4ajnFgdhhqu6GGHYpBXG3w8wvdIiUi02CSaaapZ
+FDQAXHmgcaNA15tQAvF2opwCGafbfen1CCdu/zlB3oDQPRjmmokquiijjTr6aFn2EfQepJVaeimm
+mWq6KaedevopqKGKOiqppZp6KqqpqspWHhtu5ARH+iU51Bu83Vgrb8AVh2svb2bXC67A3aejrQFC
+5QSlEpAJC5izTmergLfZSihxvA1KFK7S7QcgecIByxtzuIoHnRN+ugqhs0ORoJ+DscJ53bTw8Spc
+tdHpGJS3uG407IHWjtgUEgZuNMr+iOkhitS8Pca7q62+clTrtfty65+UQH7bi2383nbuKOnWye6h
+/r2blK0Y71kyosXey1G+vJVHrsHF/stUqxzBulHMTPEqL7SI8urwRhDrSGB06f34LXMXCzvxxhx1
+TJXAYrgqj0aI3qffjiQerN1GWhI13LobSUDtq1A9smyN+9640XPRzTwyoFmPDGV1G6kYtn/3ct31
+DSciccORAqPoMb6Hhtd11cXdACDWWmu999yNE8cR3mPD1+5TN4x4sxiJByUPuW+nDPfoVX5d9n76
+WQ432KoTKjg2hLNsuH7Mwb24sXLHPbnjdJ+eYd/MAn55yE1F7WrncIPudt2P667+m9dZwypP8H9j
+yDrlrt8Gu+xFjZJe2H//6TTqz5MOOYLDcT3czU6FGDDOtheHnnrYDTmk45jzjgTZI0MeIUeg0bHv
+ledpTknRit4jvpF5aSOCc9y7IBcm/cltfWwrHlMsVzN0wY0EwKEg9rbGu4NFMISlG10vCNiq7jnw
+RMjykO3C1EAUNU9MI5wgBrUGQLZtSDcgXAoB8WbA4nhQPTncG/789zvc7FA950Lf5FKYnhVSpWYm
+2g+9SKgiWvEmWrDY2QnTB8HmWEtyTEGWhVS0tqBY6FkuO1kXT0hCXL0BQ1B0HHt6ITUIHekp60LC
+zTw4vqD8sGS6Cpqt5ChG7Nn+ChuiUwr8SJCeR+RhkGxcGSLppLA43lEoEMOGjUbYyT3tcRR9dMof
+b9Y3S16wOomEoyZvuMj78caRULHiiSjJyvhx0WS6mVcYsRUdUCZxlM0p5SmnAsoHyYNsxeEfCYBW
+uiSGaZEorMrZCAaLtK0RR5g8nwjzd0S7wcqYTqOUE3EDq3I+5UPKypAzg4KEUViphiUcYwBzqEQb
+TkVzsPobh4SioW9O03yke9eHgonHKG6JnU5xJ5lIYEe4zbOeutuTQY1YFN00UYAkfMoyhRfQz42C
+oEjM6HZ086GOPlGUDF3nR51CT9scrVaI0txueEdN6J1LbpXrn1QcdDNbTan+OBqSpgh36jQzqi91
+YtPbCKnnN8DhhgQljSlSnJC2mq5tQui8qCX/Z8aR/RSrGaQnl76Vxafm1Hmy7FpPtQbQspqwrFW9
+KgtflbakvaF/Xt3ojpQ6sLHCR6rCmyhVZprWWq01b229Jz5N1zzrVW94ddXeXYEalc75apO9eAQY
+DQrMnf0MsJRjWl6JkqPATktO4LInLAkFLZUJM47bipgJX1Y047QplE95RPvexSv6vPKGPMtVyZJm
+r8fellhWeYPBIjit2L3WoMn9oq3EUNpe6lFiuJWjxpwDAN86BbhZ45UEqGs/1iYyubQ1rW7LddB2
+ukmn0FLvN/eGXuy6bLv+nsxufKMLXtSqZ7xmXRWCE5ymsQ2EsE5pJkHsIqmBfDUqExZIhQnD4PxY
+ZcN4o8uFSZDhp4R4xIIpyPWogmK7QHggVmmxnhQs4xnTuMY2vjGOc6zjHfO4xz7+MZCDLOQhE7nI
+Rj4yp6YUFAm4iCP8Q5xURIIbqm1Eyhmq2udAgg0BD4XKVGscAORBNZGQx8pj/oh8O5JiTzbZyaN4
+AwD6eqWRVBkkafvdRm7Q0zbN7F5hjsqZP3LlOoekymu+8ke2fBQqd8SoGhkJbujcES0bjNFDuQEA
+3nYkj+AU0SDJEqWNEiSRiDnUSMYLCYqn56AECcpC+RCnkarmLlftzwL+xTJHLM1kLmf5ym94k60N
+HZRg5xrXJPj1rIeSB49EF5BpE4NWhT0UW88T2a9eWR7aDItHPLDV0nYznLVFFEsX+9vDPrSuoWsU
+RntkWYPmCLGJDe8UH/tE5HayR1bWOTLxD9n3drV/1L1oXFs6SLI+tVs4azcARDfbVfs3NqwEyCTJ
++90V/ze56bNujVCNTuaO96HfPe9kC0XPq97XAzv3noqT/DbffMShBb6ie8kDziMPipze82akYrzW
+IW+5yPfzwFtXGVhXIjfIiZL0f28bG9wmzsqMxfSMD53Wvb45wuuCDf1kO4BbfziuD3T0sJ/b6kAP
+OsALbGIvZzrTH1/+88XDbuCKI4HhTLaZu7t2IpYTO72whZCr6j6igD0Cy/KWKNaJrni+oxvXvS0K
+u0F3KKTD/efxBg7TB8Xnhc9MDO+ZuuMNvPGrV9nEWXdLqgemN5iD/SgaL0rczZ322bN80FSOYdIT
+n3YSLKvid6+7wWrPeNLzUNlN/g/lJGr4nyNu7WSPvNIbL5TaR75z6aF82adN76mRffMRYzraac/8
+tBecoKd/i8LfMKLUMxr8/R6/SFxN5vDfm/oc10jAbJN7Qn+EPIGOs35U3Gpt06EIX0joBrHdGyVh
+S3rcwLKAjsjVHpPl3a19msUdoOzVH/NBX71hH7xh4KSBhI78m3n+bVuTgd//tV72QR7BiYSDnR9b
+BEgeCEzN2ZsKCgVo9YLpnR30KR7xPd643V/X/Nr+8SCujYLAyFvdhYQLUaAEYMjwxcrQ6dN+WImB
+tQio6V6GYAM2jB/9+Vz0+aDasaC0OYEp4VoRYp0EGNi/OUFIpMfm5ZptgB4hiR4ZlpvdvBkM0sUj
+6JmOjNqn/ZuRJEXsGaHZodzoWVqOFGEhyt+h+d3CZcwDAd/ZtZzH2Q0VahfyeVr/AV3NUWAQml0U
+4iEi3qGtdY63vV0Yjty9AeGKuMolbpP/kV3Qvd7okWL47WFbtIktKd69CZ5SFGIjkuKuJSKWtQgj
+hhy59WB2rMz+DTyIs73Hc9xMFFabr0iAg3EhKFqavDkBKIbi4lme9ImNzIkivXnEClbi0oWdbwQF
+aCHOI+TSG3weLaYbr4mhpc0TFeriWjhB1b3b//VCChqFMMZfQHKiohmjOzKiQYZeeogEA7hdrNRX
+tcWZRY0EFNpZhQVPUQRJhnFjoZHaHV4dRoagoAVaQprimrlhOpYksc0HJ0aE09VhpbkhAHRaCs5i
+ot0jPoYER/IjUAalUA4lURYlYCiXIYXF0ZSMyXTHUpbMC16FtzCl+SHGU/aMUWalVm4lV3alV34l
+WIalWI4lWZalWZ4lWqalcvwEW/qEF2JGCralXNLEW14GIM7+JV7ORGoAAJBchF/+JWA2xCjUnmZQ
+TUkFJmImJmIOZl1aRpAoJmRGZkVkxF5WZWZQTWNWBmaKxmaGRiqCxmeWRkRwJmFeZmnC5WnaJS1y
+RmiSxmiGRmeCRmx+xmx6Rmt2xm2KxmvCQgcwgm/+JnAGp3AOJ3EWp3EeJ3Imp29SQLFlJmXE5g/s
+gnROJ3VWp3VeJ3Zmp3ZuJ3d2p3T+QHOKRiqKgXKWp3meJ3oqZwdQzmqGxm5eQifEp3zOJ33Wp33e
+J37mp37uJ3/GJySEJ2wG2xMwAYEWqIEeKIImqIIuKIM2qIM+KIE+AYCCJpbJAz30J4ZmqIZuKH9e
+AntWJkf+wCeHjiiJlmh+/ifiOOdkxOaAQqiLviiMxqiDSmiKimeFXqiJ5qiOjqiH7kt7gsZ77qiQ
+Dul+ouiVqahksKiMLimTNmmD0uiR2miu4SiRVqmVdkKP7sePfkaQXqmXCqmR1mZnKKmTlqmZLimU
+iilr3uiXtmmOZmluuue9WMMg1Kmd3ime5qme7imf9qmf/img1qkCTChtBpsPvACiJqqiLiqjNqqj
+PiqkRqqkTiqi+gCh2iaWMYACBCqndqqnfiqgWsOHosZuLgA5nCqqpqqqriqrtqqrviqsxqqsnmoL
+XOqYBlsuxIKu7iqv9qqv/iqwBquwDiuxFquu5oKtrmn+rq3CrDarsz4rtMrqAozqaZRqtF4rtmYr
+rNZqjQZoiuWqsYaruI4ruRIrsnYrhS6rtq4ru2LrtPooiG6EqbYrvdbrtibrZsQmuJYrv/arvwbr
+uUapZ1Yos9qrwR7sqb6rlsYrLPjCKjwsxEasxE4sxVasxV4sxmasxj6sEeBrYQYbKYSCyI4syZas
+yZ4syqasyq4sy7asyJKCx2ZGKgqABWyszd4szuasxvoCtZrGbiKCFASt0A4t0Rat0R4t0iat0i4t
+0watDcQsaqZYNnwB1Vat1V4t1mat1m4t13at134t1WYD1KomRwgANTQt2qat2q4t0yJCz4rmvYio
+m87+7YaGaWpaBpmeqd7uLYOm6d1WRipaKN0ObobC6ZZ6RpcSruKe6NjirYDyLeRGboH6LZJGRuBS
+6eJmLn0aLsPKreZ+bifYbeVCRt5KrumaKeVKaddgLuhmLueS6r3IAgLMLu3Wru3eLu7mru7uLu/2
+ru/O7gQ0rmYGWx0EgfEeL/Imr/IuL/M2r/M+L/RGr/HWgfBSRioywO9mr/ZuL/f+riy8rWvey7wi
+LPm2K7cKrGzi6r+uL/v2a8Cq6XIQbPnO77oqbJwCqfjSr/5e6/nC78d+a/sGsACba/VORuAW7P4m
+8Kza7+F2hrUqMATfK7oWKgAPsAVf8LEWsGQccAT+d3CrMjDDjq8HjzA59O/fPqf6YrAKB/D7nrAB
+yy8JkzAIwy5HyG733jAO5/DuBu8Ee0ZsFq/0BrEQDzERQy/19jBuZqoOLzET5/D3wisNb4Tntu7i
+ii5ppliLnq4WN2nqDuyUUvHnvm61xi0Ya64VeytHZPEWrzGMdnG6rm4Zuy74jkbixvHgnnH6YjEb
+7/GLuvFnXK4dK64Y++y9AC3bHjIiJ3LSPi0Sc0ZsTi3YRrIkTzIle63YNnL8lu3ZKjInd/Ihuy0U
+jzFHOKzOlrIpn/LFdiwm/y9HhKzLvjIsx7IssyzMrrLMYhnNorIu77Ip82woEzJHiHAMR7AJj+7+
+Y+jrCifz+rawMTsGBw+zB8+wKMsrNHtwMV8xR+yrMm/zuDKz6mYIAlezAkszMFOzOEPwNaPxRmgz
+N7czAdsyZjzzOScwOcMtR0QDJ+SzPu8zP/ezP/8zQAe0QA80QedzImhwZMTmoVIqQze0Qz+0pFoq
+PJPtRjAALxQ0Rme0Rm80QUfDHOsmGQfyHSM06T4uH5/0k5L0YwCySNPtINuzFLc03eIxBacxSt+0
+gvoxpn6xTLvpS4dviPa0m9K0D5s0Th91hKq0M7OpUH/pT9NxSDf1lRL1reoxUiO1TicxT0u1lT41
+SHOEOmCBWI81WZe1WZ81Wqe1Wq81W7e1WO/+gFI3xg/PAl3XtV3fNV7ntV7vNV/3tV//NV0fMfr+
+MZbxgVsfNmIntmK7tTp8tJwG8zwrcDrncTa7s2W/82DvdNeEc2TPbz0DtTl3Nv1Odk2v82WfNrB6
+sxdvtmjr72dDNWS39vySdlFXMGrfdgZPtGPCsGyT72t/dWj39sHSdlVXNm7jtmq/MTgLt287Nv5y
+hDXkgHRPN3VXt3VfN3Znt3ZvN3d3t3RjQlwzRmzqQgOUt3mfN3qnt3qvN3u3t3u/N3yXty6E92Jc
+r3ffN37nt357t6j+MkzDwhRzNZjSt2KU7lWjdFYrKxwLeJV69WPHNIMTKVU7slEfOIITeGL+sHSE
+C6mDPzeEb/iOTni+VriF83GCZ/KCg7iOdjiXFrInvziMLy0jZ3ZxbwQkVzKO57iOb+0l07iCw4LZ
+xriQD7nQgvLCRjEsgAIvLzmTZ6wq+/iIp1gEzDKVV7mVr2wEYDhizGzNNrmXf/nDgoJzt3hsM7fB
+EjeF2/Zxn3ZyE7a6mrnB/vaDw4Iwwzm7onmUG/eas7mWH4Y823m7yrmH0zmg1yues7Jp7zmf6zbg
+8nah1++YI+694DNHV7qlX3pAHzSjo3CKLTREfzqoh/qjSjSUa8b1XjSmp7qqV7pH+zdoA7iKh3if
+G4aBlzgbn7ipM3WsmyiLS3pQ77qJijj+osOCGtv6rc96YWg4sPNopDtwVC87hwq7aVq1se8xrt/y
+VkP7hva6s/+6ttctshNGrVf76V57POv6txdus3PGbqJDCLw7vMe7vM87vde7vd87vue7vr87M4T7
+YMSmOyyDwA88wRe8wR88wie8wi88wze8wLuDv59YYe87xVe8xV/8vqPDum/GAz/6uh76tOu5ortz
+m2v2cns8pLs6bAc3yvNvxAcGMo+8ZZe8VrN2y2eroJM5y9/8s4J81Iq8zG8zzf+4PHA2zztrzvv6
+zh/9rPr8ZcR80HPz0KP4yTP9syZ9t2/EIHwA13e913892Ie92I892Ze92Z8914P3pq/+aLDJgNu/
+PdzHvdzPPd3Xvd3fPd7nPdy//FFmKtr/PeAHvuCj/SBsvGbUcbr3p7T//EYUO7mXO9//hbInfoca
+fmYgPuXr5+I/PYk/vumaO0VnCOtmfn5yO7s/O+kz7tonaed7fuSC/m5ne+rjp+lz/L2cAQvkvu7v
+Pu/3vu//PvAHv/APP/HnPgpEvl8APDIsP/M3v/M/P/RHv/RPP/VXv/UvP8SvvuXisjYUv/d/P/iH
+P/GfgeVjxm4qOZin/5I/uf+G/EZM+ZXHv/xTeZZrP2Rwufrnvy6LucoDN6FbPUCQEziQYEGBLWAl
+lAdAXkKHDyFGlDiRYkWLFzFaXNj+MGGuWB9BhhQ5kmRJkydRplT5MZfDjRlhxpQ5k2ZCEgBcrjK4
+k2dPnz+BBhU6cIHDmzWRJlWaFAAJhwuGRpU6lSpPhAoZLtW6devLjivBhhU7VmVLrBy5plUL86hC
+nVXhxpVrsKhNnGvx5s3Y9Olcv3+nXoXlVW/hvIQ9klW8mPFJs4OzGpastu3gt4AxZ95ZF1blyZ/X
+8k2ILkRp06dRp1a9mnVr169hl2bmMjJo2zUJu1u2m3dv37+BBxc+nHhx47vd0UZ7mznbu7D4xJY+
+nXr12OiMPm++faZoWJc6hRc/nnx58+fRp1e/nn14SMq5x69I+AkT+/fx59e/n3/+f///AQzQvifg
+k89AiCqTh572GGzQwQfZuyS7AymkyDvwIMxQww3Te++sCimkT8ARSSzRxAAJ/BBEAxNckMMXYcxQ
+QrtWrBGWC2PMUcf1PIRsORuZE/HEIYks0r8UfQSSuxZ3bNJJ8WbsTDsl5cPxyStz7JEwKm0T0sgv
+wTwRyS25/IxJLNHkMErPytzOO3Wsi1POOV1LpMA2PyNMl+P47NPPP43T5U48JauMAWboTFRROdWZ
+kFA3nUoIKs0opVQwMh89rLbEGuvUU7IewzTTtRK8rNJT/eKMzVEn825SVGGN69LaWMULsU9xzTWl
+UGmtlbLn5DE11mGjUnVKXwv+c5XYZaOa9Udku9pU12mpBYnXZ6FdqlRmuQXK2Gwlc3WVcckt19xz
+0U1X3XXZbdfdcY0YFNylCJMmlHvxzVffffnt199/AQ5Y4HulkXfepBK04N2FGW7YYXe/PTgv78SQ
+x+KLMc5Y44057tjjj0EO+WKDJaaJMJFRTlnllVEmuWSZKkOC5ZlprhlkMRx9OS3vdDa5155jEhVo
+jIQe2qJVjcYI6aRl4pnpi4p+OqKopX6I6qppxPqipbWuyOmuIboaa7GrJltqrsGWMm2YAGjb7bfh
+jlvuuemu2+678c4b27UXytvvvwEPXHC49077psERT1zxwNfOiITHIY9c8sn+Ka/c8ssxz1zzzaFp
+PCJoNg9d9NFJL33yzj232vTVWW+99NRhj1322Wmv3fbbcc9d9915793334EPXvjhiS/e+OORT175
+5Zlv3vnnoY9e+umpr97667HvXQwAUE9olBseygP1hZCK26ms+n7bKfIhgvuG7iVCHwAJHmLocMLT
+d7uh/AHoBecbsbU98D0ECaN4AwCwEakbwa0XCXEf/ML2QP61TYFGgeDUjkUR8rEvWrB4X/ZAmDQn
+POIhb6BfQrbHEQ46JA9OaNsokGChZ8lPIit0IEcK+IYYTkR+OnRIbVZoQ6w4BAk3aCAAJXK4HcIC
+CU4YRefk8QYSIjEikcn+4RIj6BBoOOGIN5zIFi8COpiQT4xc2WAXQ5jGkj3CCSwEwBJ7ERkbQkOK
+sNgiGts3w/1lUIi06kUF8wiZP/4QLUHko3bY9zNYmNCERkGjBLDhxSouZ5A11E4etKPIzgCSXhns
+ZEKw8T81jhJc0OCeTQbYmUfIcUpi6KIQCTlJyFhSlg6BpAwhs73/ARGRh7RaJKkYvjd+zyEjJCBH
+FEmrW9KShWjUJDYaIobHUZAEB1zfXaqJwP9lExv02+BdHoENBOahgf4TZzcTkodz5gGFccTGKO4y
+Ck6Skp6jMiYssMFOWIghkqysCAmAGT89zhKDtRwiD/fYmSPy0mq+TEj+AZH5LGJicoeaTOZyYElQ
+O5IAi4pEwikXMgpYYPIGSMCkxXCSQlg8ooHywEbnjvLNlb50pW27US86Z8DBvGGPnXNCSbcITjzW
+k6iEYuM+31DMhvhTIjkUZRXfttA9Ek6jsaQNLsmHBHQylDZww8rbQinJEuqzjsFsH9wiRauMTvCE
+Ym3oQanIEPJt7xFYnJBM0ZkQF97ohOSTp0NuQILtoU4Cd8loURELpI+CTqSdGSBTEfSGJ3ptoLDs
+43KWKVCCRvGjhewlM5l4g8b2CpNvayMs7mlBs1o1IZnN4kN1ytq3ajQycr0LOQHgBHaK4QZv6EUc
+Z0m+2sCTiuSLo/r+OMi+wyaWuSC6QTj1edy3VTUhN3DCUxEqS8sey4/z9CIHA8tVuM7WJlKFyCga
+O9JTAlSYMbzoQypZ0KvKdr4crC1KIYJJMbwhUjIlX15hAVx//vUhpiSsYT3ZXAUfCJJJraUN8+DD
+i/yMhvKtHw4NaNcLaxQJB/QseUFMAvMS8Q36TEgdtfpBeTghrdiyYoYnwsECpnK1P8SZfSNKvnBC
+kXt1hEYcZYYT8gG0c6vEiT+juD+eBhioe13pUBccZfl8NL0bhoz67uc2XAZygi1F6wLd9kGsameV
+H+6q+qoaxc7FTQIOdsgoTptDBCpQbg58m5gteeeO7k23tM0xTgr+2DZ0QnLOb+CpkLEpThYfWYV3
+IXRekRBHEx5xFFOU8qUxbbRHeJcrYuhem5fyhgtmmtSlRlYTDUNkO7JYKRKosqlhjaffznrWkxEx
+rX+L3ZfhutZJecSouYKEbPJXwzArdqyRnWxlL5vZzXb2s6EdbWlPm9rVtva1sZ1tbfOa29329rfB
+HW5xj5vcbYWdK8mdbnWvm93s1nXj0N1uec+b3urWHQLrnW997/trjVsIF/cdcIGn24WFA9u/B55w
+hXdbnPfmdNr6zTdNps1sT6s409AGu4ivbeMHn7jHDd61iyct46nrONhOrrWRG23lQ2s50EruOe+M
+QBI1t/nNcZ7+c53vnOc99/nPgW7z7qV8bLWJwSmQnnSlL53pTXf606EedalPHekxcBnFaxN0rW+d
+610POjBydjvvMCJNZc8QPdBC9LLVpj5hcvvb/zOmj4u8NpAw+90btKYEx27sePf7etD+w4fTHS1t
+h/vhEc8EuYdc5XX/++PPo3eHO4TskLe8eALvwME3vvCJ9/zbFz87wtj98peXfO76XnrLZ/5Gmy96
+5z8feyOFXnajV73lT48774jCBb33/e+BH3zhD5/4xTf+8ZHvez4IXvS1oQMHoB996U+f+tW3/vWx
+n33tbx/6dLg6yB0ChuSPn/zlNz/yTRF223knBdVyf65CkXb+168dLa/q1v0LwpmX94wwk3j//zvl
+MWKucdgPAA1QMeKP+WqvNuwP/xxQ/+aO8xzC/w6wAsFCAPdO4xSo/SywA1EiATWv+erPAUlQICCQ
+8V5vAj1wBUsCAycvITiQBWXwI0Cw9USwL0rwAb+P8FRwBmfQBVFPgUShFoiwCI3wCJEwCZVwCZmw
+CZ3wCYuQARQwdgijAjThCrEwC7VwC7mwC73wC8EwDMXwCitgByUwIbIACtVwDdmwDZ+wANSvdlLv
+9v6O9dROarxE9vSwRGiPCh2PDv8u98ROgSoPEPHODucPD9luDxmRRPoQdmzPEPFOENePECXxEOXv
+BhPC8Br+sRP74xFTJxIvsewoUQ4VqBKkIBVVcRVZsRVd8RVhMRZlcRZpMRWpQQCmEBJrIxu+oBd9
+8ReBMRiFcRiJsRiN8RiRsRezwQxTMCFsoBahMRqlcRppMRPikHa8QxkGZhu5sRu9UWBAABdDcAHR
+whAe5hzRMR3ZxRCYkf4cYgO+MR7lcR4BJgKucXYK0AdlsAbv0OIYMAd1UEX8EC0oUB9XEAh1bwMN
+kgX5MRH9cQQB8v5OUBNhoSAX0gIRchAdIgYvsgIbkiIbMCKJZSLJsQc7sgIzshIdAhWGoCVd8iVh
+MiZlciZpsiZt8iZxsiVfQQrHcSAdogxSISiFciiJsij+jfIokTIplXIpmTIoy6AdFREtjiEnqbIq
+rfIqcXId7lF25nAU0QQRKZITPXEs8QMUPUcUvRJLShEbLTEtvzITS3ITyXIuyxIqH9IhSM8tr2Qt
+8bEt9fJJwDIuYUEs6dITzdLf/vAvnYQvudIvFXNHAtMn5bIw5/IwJQ4t8vIxdYQx+U4Ix+EzQTM0
+RXM0SbM0TfM0UTM1VRM0l68ndREt6KAGZHM2abM2bfM2cTM3dXM3ebM3ZdP7BPI1w281ibM4jfM4
+VTP9siYhN/IkLfAjBTMkRTJWSFIyK9I5UXIrO7M5sdMAodM6pXM6UaU6hTMhLLI73S8lTZE70fP9
+vrP+PGEhPMWzUsgzFGvjPNtzWtSTLdkzP6nlPe0TIudzWOrzLO/TP9NTOzXQIY6gBBz0QSE0QiV0
+Qim0Qi30QjE0Qx+UJ21QMBXhAUA0REV0REm0RE30RFE0RVV0RUFUEeySaQijCDR0Rmm0Rm00Q+Fw
+OTUyIQpRM3MkMuGTMCmTES0T6zDTR3eEMxeUR5FUR4A0QB1CSIdUD4sU/BIiM5tUTRTU5BwzSzfk
+SQ0U9qa0E6uUB6/US2FESbnUIRrBEtz0TeE0TuV0Tum0Tu30TvE0T930BMSxQ60zDlghUAV1UAm1
+UA31UBE1URV1URk1UOPgRZOGME5ATym1Ui31UvP+lAi2VOYUSBvp8VNBNR7DMRehNCHMUR1RNVXP
+kR2Ds1RhAR5DNVZltR43lQAVEkH/Ey7Bc0BHElJZ7kBxVT9rleNuNVjhT1fhUz55FTMKFDEJ0lh1
+ZT/7sj+htVMANExxcFnH01ddDlir1VOktTFXkhvItVzN9VzRNV3VdV3ZtV3d9V3JdRw4tB9htDbK
+IAPwNV/1dV/5tV/99V8BNmAFdmDx9SlbFVsT4hjgdWEZtmEd9l3/YFghrkvRFELA1FmjdEwNk1uB
+Bi0rFkLUlFMp72O/FFldVUo11vPK9AxhAUtJNu8kFuUo9mXb42IvM2NTlkg5lv8Sk2ZhVkdVkkn+
+fdZBbNZIcTZnqXRndcZjh3Y9QtZWx/VhpXZqqZZd5ZVUERYW7pVgubZrvfZrBdZgk0QwFbZqzfZs
+pTZigXY9YfBbP+VaMVZStBVWmvVmzdNtwTVmuyYf8ZYx4NZu43Nut/Vg4/Y6+5YxwnU72/ZwF+Nv
+jVZuBZc+lfZl+o9xFyNxlxQWPHVWObdzQ2FUXdNVT1VVSbd014VVx9Y6YdVzWTdU7XFt+TMh2hRT
+abd2bddO+RRrCxdQG7V3ffd3gXdRH5VwAXdSb/d4kZd2NRV2p1Vom7ZmTTZrURZp4W5lm7Fln5dB
+npZYRzZ72aNorXQwqTdpifdxsdd7nVZvtab+K9H3PMDXTMV3fGPPet3xTNs3PbZ3Yh1iCN2wf/33
+f5twXh2yXtHCCsfwgBE4gRU4DMuwfMM3DQE4giW4f3NUbYKQWi0XLBw3fJU1cuOibs0XPzO4LNQX
+a/h2hDU4egu3gz24KkA4fEUYhR2jhKvmhGU4JTYYflm4hafiheE3hm+YJDB3TRc3iHFYhQF3h3m4
+WCa3ZCrXiHeFhqVm95Cziq34ilOzNf0UPmPTN734i8E4jHkTOFMXPsEAi9E4ja1YOS2YOZ33fs3j
+fVl2euUXTOg3KvESjtEjf2W2e/W4POT4eum4jmeviSWGaf+4E/h4b2f2jwO5fuOXkBHvju/+0n4T
+eTwWeX0bWY8fGY8nU5IPj5IJOI8vGZOl+Gm8gyWxcpVZuZVrcid1F3CBsilpuZZt+ZaXUmz3b2lr
+Yypd+ZeBeZW1knnFtYih2CRymGWVeImFwodZFoiPmSVOmWlsOJpFIpmvd5mZ2VsM+WCe2JqFeJqT
+pprBmQaR2Hy1eZt9wpmvF5qjeYhFNiE2t3XpWR5Bd4tF13T1eZ/JBXV3mXJrY3XreaC78XXbeEdh
+ARWpcaEZuqFj8RZj2Xx5MRkpuqIt+qKPcRkdGH6f0aE9+qMX2hqJWXFhoUdLuZMrOZJBGfS6eV4Q
++Y8z2YQ3GY5RepQ/eaXdTpQjtWdLOab+a3im77emd1pMcTpMdPpXj7SUoUScjWb3JvipofoJBZgi
+DXiBrfqqsdoLG7iMXRWCo/qrwboWKngAudeYyxkksBmS01mdeYKdIdmdjxmeodaszzoW0tqTA5et
+5cKt8RquoViuyxoWOLKu7fqcOViv97qlweWbCTsWAFt/6fqs7zql1xqxCYKvU9qvjfix+zghliAF
+QDu0RXu0Sbu0Tfu0UTu1VXu1Q1uU6HWoHcIeZHu2abu2bfu2cTu3dXu3ebu3aVuxs4UwlIG1ibu4
+jfu4V9sVmHpoXnucBxi2BRM+YYesIbuYrVO6XXW6M5CIrRu7s9a7G4e6Ue6PXKe8zdv+vJu7W+Xp
+vNm7vUUHnlBw7dbbvem7vicHuFBvcfR7v/mbgpqvvwE8wAEnvhVRwA38wOdG2xR8wRm8wR38wSE8
+wiV8wim8wi38wjE8wzV8wzm8wz38w0E8xEV8xEm8xE38xFE8xVV8xVm8xV38xWE8sRrJltyMifpr
+uy2EgQgqbrwMg9wmnypC2A5IsioqbrTIgObs2MAsqvgHz2L8yZmDBGhMtB6iLYSIt9qmpLZstn5G
+iDiozUwsIuCsc6DBiNxKiySrc8TACU6LsuZr1aAMyuW8MLZnhz7KxCIMwSLinZCgieK8xhLpWbxc
+O9gruxLClHBGkXqBxrTK0gy9qjD+ac4l/TPyKsK0qJv0HCJ6YYcOq8vvwtMtzI5OSSKI3CXc61k+
+CruOys3Hi5wm/dULQ8q9R4F64RGUa+8KXbNeC9Rfi75oPLcEy9cH3SLEC3SUHNaRHSnqfJG2CXxu
+fSKEvHCMfMe/bNitLMZIwIVGS8etfcvZKtnBnSucQALUSSF86Nkj4hF8C9jOfMcF/VhWyMAwQr9q
+LNUhIpyI3cyYKLbCvd+R4hFEK1KyrG3wC0GgacLevd27vTMCqor2rMaYzCF6waUGT7yoy98xPiNM
+Kaxey4bsHeHli9fJC8wp4rc6p4BOS5HoaLJWScK8/TlmLONlPiacoOHri4EmaMv+56vO+MfLfjzM
+I0LO0tzOqMqOkLx/AOzRm/zYZ77pdUcMHN3ppX7qqR7Cbw3X3k0tJIDbzE0puq3qwT7sxX7syb7s
+zf7s0T7t1X7t2f7B7fvt4R5y2B1sQCfu7Z6+517k7n7v2fveEPzv/57A/RHwCV/ABR/jCj/x+/sF
+rzu7Hd9zxBvxGR+8AZfyDQfHsSbyNZkiLT98tZt2NF+mHaJibKb0TZ9mgBtaTub0Wb/1Qyb1kSVm
+XH/2aR9jRCn0f9ohLoEeeL/3ff/3gT/4hX/4ib/4jf/4eb8PYN9XCOMZquD5oT/6pX/6qb/6rf/6
+sT/7tf/5n2H5ayVBSgH5xX/+/Mm//I/fp6dYgTBEqTtBSyJQkIuapTdaa86E/dEfldWf/d3D+1kl
+D+O/kAECFix5AOQJPIgwocKFDBs6fAgx4kMSAA7Ko9cpo8aNHDt6/AgypMiRJDNeOkhRosqVLFu6
+bAiAxMFLJWvavImTJCSLBV/6/AlUJUGDAp8wOYo0qdKlTJs6fQo1qtSjT3gSDYo1a9CUAi/m/Ao2
+rMiTArlqPYv2Z8yDBzy4fQs3rty5dOvavYs3r9tFVtP6/ctw6MFw4AobPow4seLFjBs7fgy5cLi+
+gCsDNstgmN7NnDt7znsAZUXLpEmvFUhTrOrVX3d27Vk6NlbBRafavo07t9T+qq+vyv7t0qxX1sSL
+jxUNPDnW07BSG38OvZPrgbCVW4dIG5ZR3dy7e3/Km7rv6+QVCscYPT1xsrDMln/vkLlz9fTBTs8O
+H3727d/7+9cdHn75lXdefQZ+xZ57Ay4Ii3wHPmjTfdUxeN1+/12IYVQBTkihcgVCCOJxZY3W4YDM
+BfJZiiqueNcwlJWoXHbfhEFjjTbeiGOOOu7IY48+/kjjNy/CCJxZfLCIZJIrBoIckfA5GGKUHkk4
+npOlWZhhllpSNaSVpX0oZZgmNenldVCKGSaVZcqG5ZZu/rdhlWv+BSaaUSZI4pzAnWlniGrqaVmb
+bw7aXZyAWlZnnxDieej+njIJNA89kk5KaaWWXopppppuymmnkvbRZaNnZZfNF6aeimqqqq7Kaquu
+vgprrKZmE6qoWZklQCme7sprr752Og+ZtlrG3Dx3HItsssouy2yzzj4LbbTSHktMrcMClZ0WVWzL
+bbfefgtuuOKOS2655m6rhbXX+oQrIdO+C2+88kob7IjrEvtoc4pG+ee9s1XHH6EC42aov1gluq+B
+jBrsF58JG9gvwz4JOnDFGqorsUQIP6zewhlr5TDH6kX8MUsUW4wyUwWX3NLGIkPnMctAMXdAOzbf
+jHPOOu/Mc88+/wx00DbPgbHMgVUHhxxKL810004/DXXUUk9NddVKw1H+tNHmkcgAFEJ/DXbYYgcd
+mr1aq5XvfC+nR/LZDp2cctxc9ua2xiQOt3bHwta9Ush5G9c23wrBLXfKKwvOkMt/sxYz4hH5vThr
+gTtOd22FX87E4ZQjpHjkYjW+OUxpew7d5JQTjvnAmofe3t3okV4c6KwrdKKStt+Ol4uVz747YZH9
+Dnzwwj822e68G4l78srDxaTZvDMEOez2Zc036qkTunronUuPk+zPCxQ99ziZ7rj117+Z/ebbi1+T
+99+Hz35N5CNu/vlbpk/5+vGP5P7z8O8/kvkJrn72yxL+HKc/AIKkf7xjzjkGAMEISnCCFKygBS+I
+wQxqcIMQbAX16pb+HXN0YYQkLKEJT4jCFKpwhSxsoQtHaI4Pug0zzuCgDW+Iwxxu8Bx7+95B/qdA
+kAiwegArIMoOiLgEBrEjDJwdEJfYkSGCsIhGrBgSBadEKGqkiax7ohY1IkW3EbCKcJLh2bL4RS6G
+rli/aqMb37gpUBlvdqSSlR3viMc8xopWc2QdrnQFx0AKso31ap0PF8IcREhhkYxspCMfCclISnKS
+lKykJRdpAzNqLTvXOJcnPwnKUJrrGpo0Gq6occlUqnKVrLQkInp4SC9+UTqllNkYydifK/INjVpU
+4+Zk+cUwnu2WuPSOLuvGSyj6knLA1KIwN0nFYqKvlixL5hKX6Tj+B2pgm9zspje/Cc5winOc5Cyn
+ObfpwT6GLju6aIA73wnPeMpznvSspz3vic98ulMX1CwZDc8J0IAKdKDm5KHzDvnDfC2AHAxtqEMf
+CtGISnSiFK2oRS/K0Bb082PZyUUsPgrSkIp0pCQtqUlPitKUqvSjudhoxoSzCozKdKY0relFFwBL
+HzJnoTbtqU9/SlGNqnNzHV2pUY+K1KSqtKVDzd/dYgrUqEq1pzg9KEIbpNCpanWrFhWqeHxYVKWK
+daxkPSlTv/o9mHJ1rWxtaFUNedWEHoSnba3rVL0qINaFtax87atSz5pX7T3VroSN6lsVhFDm+MEN
+jG2sYx8L2cj+SnaylK2sZS/LWC+4VGLZiUIzPgva0Ip2tKQtrWlPi9rUqvazUdgsw4yE2djKdra0
+xawfcvq+rBZ2tzbFK4f0Wh2P+nW4xEUpYH8rWItAlbfMxehh83TVnTZ3ul11rcH2Wtzsave4clLf
+YKkL3og+N65yFQhdw4tecvi2u6cLrnbfm13u+lCt6U3veMmL1bnWN73rBat74Qvgvso3rd/dL3jv
+S17m8EIcDG6wgx8M4QhLeMIUrrCFL8zgT1jXX9lRwSQ+DOIQi3jEJC6xiU+M4hSr+MMq2PC9MLMC
+DMt4xjSu8YV5gVv/6dbA1O3v97Ab4CAndcDPoy+Pp4vguEr+98jT9fHzgCzkKC/VxesyMpN5m+To
+7vjKu3Uy76As5TCXlMjHKzCXC5vlxOaLADpos5vfDOc4y3nOdK6zne+M5zYbgMrXyo4QqADoQAt6
+0IQutKEPjehEK3rRgBYCn4eFKxrkedKUrrSl8UyAHDcwX6BYhac/DepQi3rUpC61qU+N6lR72giP
+tlV2IhCKWMt61rSuta1vjetc63rXvI51BFotKlxZQNXELraxj51qUGjaiVs+s129TMf/innaYwZ2
+o6zs7LqmOZbNzjZboQ3cqwiX2uQOKZlnh21vr3XbOu22urcK7nVKu9zlPrcfzfxurrI7twe5hSf+
+DfCAC3z+4AQvuMEPjvCEK/zfxbD2obIDhldIfOIUr7jFL47xjGt84xzvuMTB4HBAYSYBCy+5yU+O
+coXfYtlddHe+pRpvos6b3tS2d3K7styXb3XfOtavzrka8/aKm+b0trl3lftzfbN8jS5Pek+DXr6Z
+Ez3MRncq0p0+VZ5v2udYjyrU6Sf1qUe56gjEd9epuvRf5svfKW+7299+8IY3NepXcQAg7o73vOt9
+73zvu9//DvjAC/7uDgi5nkYO98Qr3u0rt6qauX52n359gGEXe5DJnkSzR56mWmc25Ddf08kTceiW
+FzPmsah50Ds37cxsuuorKvopkr70Uj79LlP/+op2vuX+B+k0sn8P/OCbmtVzB/tV1NCG5Ct/+cxv
+vvOfD/3oS3/61E++Ggw/J2ELf/vcB76yHc/tg7D50uQvv/nrvOfiU/4qQui1+98P//jz2tHqv/1B
+BCDp8+t//+TPNPjb/Xm5d1GxJ0aVR3vvZXvIhHsCKFG7x3QByIBBhX1rAmYHiIATWCbpFoET5YBq
+B4EbGFEEOEwGaIHFlYAztIAg6Fasl035Ug9eAIMxKIMzSIM1aIM3iIM5qIM7CIOJgIFekh3rEABD
+SIRFaIRHiIRJqIRLyIRN6IRDuA4/aCWYMQE8aIVXiIVZuIP1wIKIs2QqKIH1J3sHMW4lCGAneEYp
+CIb+Hdh6HwiGDSWC0DR7ZghfaKg1GviGK/h//GZeeThRcWg0FUiHw2WHpqSGKsiGLeiGeQiItkSC
+g8hXhSgzeOiHieiF+bIF9qCJm8iJneiJnwiKoSiKo0iKpaiJgiCFTpIdrpACreiKrwiLsSiLs0iL
+tWiLt4iLregKqUgkZiEGpgiMwSiMw2iKW9CFgsMcX4ZcMsdedDdf0IVu0Bh+P7aMQudfzZh5h4RY
+0/hk1eiM1IiNqKeN0qhTvUAC54iO6aiO68iO7eiO7wiP8SiP7zgK3gh2ozCP+aiP+8iP/XiO9RiO
+t+ePA0mQBamPvUCO7wMAC8mQDemQDwmRESmRE0kokRVpkRYZkFN0kRvJkR3pkR/pkBmJgiBJkiVp
+kh6JXympkivJkm4TEAA7
diff --git a/Documentation/DocBook/media/nv12mt.gif.b64 b/Documentation/DocBook/media/nv12mt.gif.b64
new file mode 100644
index 000000000..083a7c85d
--- /dev/null
+++ b/Documentation/DocBook/media/nv12mt.gif.b64
@@ -0,0 +1,37 @@
+R0lGODlhFgFnAMZnAAAAAAYCAgAASAwFBQAAdEgAACQODkgASCoQEEgAdHQAADATEjUVFHQASDsX
+F3QAdE0eHVMhIABISABInEhIAIM0Mok2NI84Nk9PT5o9O5xIAHRInFlZWaxEQbhJRgB0v75LSLhQ
+TbRTUcBRTrBXVatcWsJWVKdfXW9vb6VhX0h0v8RcWZhpaJJubpBwb8ZiX8ZiYI5zc4t1dYd4eMhn
+Zb90AIN8fH9/f8pracpta8ttasxzcM51dM52dM53dc94dkic39F+fNOEgpmZmdWJh9ePjdiTkt+c
+SNuamd2gnt6lo3S//5y/nOCqqeKwr+S1tOa7uv+/dOjBwOrGxuzKye3KyuzMy5zf/7/fnO/S0fHX
+1//fnPPd3fTe3vXj4vfo6Pnu7r////v09N////35+f//v///3///////////////////////////
+/////////////////////////////////////////////////////////////////////////yH+
+FE5WMTJNVCBtZW1vcnkgbGF5b3V0ACwAAAAAFgFnAAAH/oBngoOEhYaHiImKi4yNjo+QkZKTlJWW
+l5iZmpucnZ6foKGio6SlpqeoqaqrrK2ur7CxsrO0tba3uLm6u7y9vr/AwcLDxMXGmkM3ysvMzc7P
+0NHS0CjT1tfY19XZ3N3Y297h4sxDlQDj6OLn6ezZ6+3w0u/x9M0A5r/3vvq9/Lz+kQDqEpiLIC6D
+txAyUliLIS2HsyDKkniIIiyLrzC60tiKoyCPq0CqEpmKJCqQJk+lNLWyVEtSKPPJ3Ddz0stRN0Xl
+DLUTVEyaQPvVlNTzU1FPRzsl5fRTaNB/QwNGHTi1IL6nu5Zu0qqpKVSsVME+4pqJLCazl9Ba8oqp
+jAIA/gTCIOXkVsAVo52OAABgV6kmMxr2Cgbil9LLGh/OIJ6r6QgBLAfuMm48YYzPT1siF7a5qUyD
+u1HibtaUWfLoS55NT+a0+DSklqXPxK5EJkuhm7NdV8pMYW9i3YJsT8q99Wqm2MQn/cihZRBuzasv
+RenrdgnwM8uFQzpSOfrrTcihW5KyogiYM89VF9cUWq7i3+sRTVlB5Lyj6ngNd/58pn0mMUmY4ER6
++R2nGWCEMbUIGQE2QUYj3Fnm3VibIPgeJ178kEFz4Imn4F8aEJbcWY18EcQLViziVoITOvLSFgXA
+5R4iVvxg44045oijByAU0QMIQAYppJAwPHhIFILx/qUeieDFCIB1igjBg45U4nhBlVSaAEIQYiTi
+2IzXLbTKF1mUaeaZaJr5RAc5aKeIFStw0RErVehARZp4mrlAnmlCscILU8yp3y5iILECBI7AKaeg
+q3DxQ5cuMgLgCg5uZBwuTpiAhBgQKZqRK45CKqYiftZ30aW1WEFDEF58xIinn4L6aCMIabHDDhye
+OqgtYgSRonOLwBqrrKImglAQUjyEalg0xjlRLKEuotayFIoliLC6whKtsVVFai222Wo7KyLT7kpU
+VeCGK26xhJTLmblZGZKuuutW1C2tUc1Lb7233TuqU9c6m9At2wJrLb68JEEGP/rG4u4iBaPnr7S8
+/ohhgsRnNOxwLgU/LJVh9YR8Qwsl3HCOCyHIIDI986yMjgwkzKBMyy63g1LN8JzAgskoq4wzOzT/
+3E0MIIiQggM2CG0ztbW88MUZDAgc7y5aFlHBCDsk4ebA8NryxQsZd7DoV7oIQcQKBpyhRRM/jOCE
+VV3X8kQRcKYtCBnsNsQLFHSLPQjecL+bi9lxApCFEjuM0ASzuXwNtdSMf5yLlkQEAcIOSmzN9S5O
+A6Dxs3HPogUIOSBhRQATy1LEE/d8vq+3t5BhhH0YA6zLFELo47qlocNChhC/unowLRYzDLnevbvy
+e/DC265LEqgPsntITCsPvCEef4fV9CVVz8ry/vYOjzwh3KvkvSrgh+985AGPDbrgvl9PbuoRNev+
+6wjHzzz29L8v7/G8g18r0sct8SkrEeXTyfkigaS9sIgRBCwge/bSlzBNAjAV/BACAXgIGAmmOxo8
+lybK8AC5TGdJh4igBAuhOUaUYQOWqQEIMVGb2jVGAh6iRA0N9iYOFmJE8RFhJ/CzIPn9qxDZoYR/
+NLEcBNClAUzIISWWw6FO+XAQQOzKAocjxUKoUBEImY+pIAEY+GhCCh0wT2OAkEVJkMc8EtGYB/cC
+pRBKjjVmTKER85fCBhlpEQ2c4SYGEKC3XWILCQhDGyUBIBNE4BG7O0IGtZg8SkRIEV+k2CJM/oSi
+R0gShWs5Q4aYY4kaiKiLlvDCBUjZCNcRMYh3zMSXFuEDHGAJS1e65Y20xKVGvPJNurTSjUbQox8N
+6ZhAosEf3ZIkADxQXsHU0QWI6SNkIrNIKlJAHSkpwEvIUEKK6AKfxrmncZbJT4BSxBZUIIgTlsic
+ZyrnE1bQJkws8gxkgieaIEDPFkaiBlCapR21dxwn7UWQTErEpCqlCMBQEJRrKdQKnkAaVFKiUB2g
+aEEPCk5YEpRshygV7VwRAU3lbRaZQsIA+qc+kBLCVriKhaos0CpcqIpVNpxaNwNXCGTJoldWyJ5C
+fcVDl35Up0blKVJjyT6lNvUgW/TfUp+6jzkhJhWqBqxfJQ+4Pqd6FXZXrepUv8rHsWK1q2e1qlnF
+SlVbsKWt4wurW6O6saxKFa5gZCn+5mrXiijNZn8FWmDTEbTBuqMSHDDsODCgWHEwtrHeeCxkucGB
+Y1j2spjNrGY3y9nOevazoA2taEdL2tKa9rSoTa1qV8va1rr2tbCNrWxnS9va2va2uM3tLQIBADs=
diff --git a/Documentation/DocBook/media/nv12mt_example.gif.b64 b/Documentation/DocBook/media/nv12mt_example.gif.b64
new file mode 100644
index 000000000..a512078c7
--- /dev/null
+++ b/Documentation/DocBook/media/nv12mt_example.gif.b64
@@ -0,0 +1,121 @@
+R0lGODlhoAHkAOe1AAAAAAAASAAAdEgAAEgASEgAdBgYGHQAABoaGnQASHQAdC0eHigoKEIlJEYm
+JS4uLlssKzY2NgBISFIyMQBInEBAQEhBQUhIAFBBQVhCQkhISF5DQ2NDQmdDQ3NEQ05OToNGRHhJ
+SJxIAItHRY9HRXRInJdIRlpaWppLSaJJRqpJR7BIRa5KR2NfX7JKR2BgYGxdXWleXmZfX31ZWXJc
+XHpaWQB0v29dXLZKSHhbWolXVpVUU5JVU55SUJhUUqdQTrpLSK9OTKRRT6FSULRNS7hMSrVNSr5L
+SL9MSr1OS7xQTsBRTrtTUL5WU7lYVsJWVEh0v7deW4pqab5dW8RcWbdgXrZjYcZgXnd3d8ZiX8Zi
+YL9lY7RoZshnZb90ALJubLFwb8prabBzccpta79wbsttaqx6ecxzcKt8e4aGhr93dc10cs51dM52
+dImJib97ec53dc94dqiEhKeHhkic39F+fKeKiaWPj9OEgqSSkb6PjtWJh5qamqGamqCcnNePjZ+f
+n9iTkr6amtiUktmVk9+cSL6enduamb+jo92gnr+pqd6lo7Ozs7+wsHS//7W1tZy/nOCqqbm5ub+4
+uOKwr+S1tL+/v9+/dOa7uv+/dOjBwOrGxuzKye3KyuzMy5zf/7/fnO/S0d/fnPHX1+Dg4P/fnPPd
+3fTe3vXj4vfo6L//v+/v7/nu7r///9//v/v09N//39////35+f//v///3///////////////////
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////
+/////////////////////////////////////////////////////yH+EUNyZWF0ZWQgd2l0aCBH
+SU1QACwAAAAAoAHkAAAI/gBrCRxIsKDBgwgTKlzIsKHDhxAjSpxIsaLFixgzatzIsaPHjyBDihxJ
+sqTJkyhTqlzJsqXLlzBjypxJs6bNmzhz6tzJs6fPn0CDCh1KtKjRo0iTKl3KtKnTp1CjSp1KtapV
+h6QkWdrKtavXr2DDih0rlpFWsmjTqkVrdq3bt27bwp1L16vcunjn3s3LNy2jVRU/MKhAuLDhw4gT
+K17MeDGCwY0jS54c+THly5gvW87MufPhzZ5DcwYturRkBGkqvgAEdPVP169Z+4Q9W3ZP2hJx89St
+k3dv27uBB4+tWvhO3ziRJzeeU/lN5zWhK5ROk7pM69eZL2+t/Xl3hthj/oZ/OZ789+jn0ROnWN5l
+e5bv4aefGX9l/ZTx7+Ofn537+tr/5cZffwAGeNyA4iFoXnH+FejgcAb+xmCEzSm44IMQ3mbhQPqh
+1KFJH4K4oXwNajghhhJSuJ2K3p1oIoopvgjjihOFWJKNI+GY44j28dgji+D5qJKOIRFZpJAeIpkk
+kAvZOMsBAAjQykdOHhDAJ1QqSVAhAABwpUcf0iJCl2TS0ZGNmZDpSJYcpekllmfyaKMXNtRCZ0av
+jEJQiIUIAAoBcMbJUSEUxHJkQa+wglEpgGKUJ4da1vLkmn1OiZEsoUC6USkDrJmJlIKyF+ksCWD5
+qaUWydJFHagIhCOj/oFyFCKpsYIp3CZPRCKLRXdeumqrs5Zay6kaxTGGniESS2uoNUYKay3PZhQJ
+FYnI8mqjtm5KwAVd1pltQangkYUnFEUrLbUtRGqnAKocsOZGmlDxRwyRZvLlpMwKqC2W5mLEyh5U
+zKAutNjme5G9WOJr8ECerJqKRIQautG/RsjRkRcAFNrRK4kUYQZHGHt5wbsb5edso/1mFEoQVrS6
+L5saEWunt7J+N221DyncUQ5OsJrRk2Z6AWpHN1RxbEc6a2TyRsvKjJAncUQt9dRUT/3DEUgcofXW
+XHc9CEMpK30ylmKaaRAebFSt9to/VLG21F0cQYUmDkXMENRvs13F/hFLXNH131wvgpCy7t6dt9pt
+L3HEH20ADrgWuyrktNguXlT2zAulEsrmnHfueec6KNEFuQt5QoUpYBdc80aXh11LJ2Vw8vnss+dg
+B+2cH3IEHg8zBHRDmuNOew5mPDE6RggTXOtBwQvveQ5oUDFGpnefvtCTNJcsp7qcRomqRq8Q4nFD
+pqMuOZlvLmxR9wCQbJApcbzSbEKoGEt9Q5V+9IoPSlCyEZddch/4dsCESjikfAl5UrdgNj+eUOIJ
+h5BBehB4oZTAT34R8c0rDvEEA8bkgTvog09AKELyWa8lS9NJw3wGHQpW0ILxy6BxHngIDL5khagw
+EkdwSB0XkkhU/jpBxRhIVwvn+NA9A4PIBSGiG0z4LCZCJJ0OwVcHKU7whEis3IEScsQstmSJDpmi
++ipUOix6sYEz6uIZWQLGIDFJPVw04xplWCKCqHGOK2njdJJ4KBbd8UdAjNAfUchHiegRIWJcnYoG
+OaTt1bEWjPwhTA5pkERq75ECieR+tBgjSMqRPoWcCCX3FMpvoUiTmwykg1AJSJmM0lWlHCONMvnJ
+BHHyJtWCDStb6coYHsSSlPsJD/xgx1raUpU6ecUTisiaXfKylzYk5Rtn8ooi2MaZS0JmTjaBB2Zi
+s5E3McUaeifNGd1kE0OQzTdPEp8TYKEv8ByLGvRgCQ1IoQmK/oinPr+igXfuE56IOMIU3iCISWyl
+n/9M6EH9qdB9qiEE7zQEPhsKTw3woSIagIxpNqoYFmCgAg5YQQY4StLEGECjJTUNDo5AAhMAIQUg
+aABKU1rSk9L0pixYAAM44IKR3pSjBkiNNm+SiixA0gh3mCZL8AAw1I0iEj9Igv9ktJOivmAOxoQj
+GnNSiT+YjgaykUU0W4QTTHj1hC/ww1jJ2JOu1oAJ5quFWNm6VZww9XQvsMMizrCESGwRJ0X1JOqA
+mRHCXoSpSK1FKPbaV7rqaydPOMIe6nCEKCzifp20SRYeVj7DXsSzFYlsD6xwhDNc9q91tckojjCG
+Q3giXUpl/skfPPjVWCpyJ6sdww7mELkMPRYnsgiEomAZ25Vwk2GJNedMgqso0N6yIc51iCzwQERm
+FlclyiTIW+OKWuBSl7jKbZJtKTLd6lo3vDDBmauwyl3H2qS8mkJvQqK7EPgWhL6/1dA6s3kT+4KX
+qnSsiX/LCeAt7ldE3jUvfgOc2pcMmMC+hdCBSbJghDz4vAV+SIUJcuH4ZpiuExbJhgvS4RFD15Ey
+6bCHI9zdEIPExAJRMYzF+9yUqHjF3Y2wi00ZkxvPeI81PsmNcZzZHAvWJiYe8o/ni+KYlekhQyay
+QBIlETd9CXzIGpiYrkyRR1WSOc5kX8YkdkmMPInLChxa/n2/C2SEYPZ6VgrUmZcX5Db/TAFTSh5D
+oizlWuBKVzkrgaG8oLGMqKoONxhYISSgOokc2mV93qXrgmmRPv0pUHfqlUL4rBz7McTS2AI1nYeq
+YT4mzcJsPnFCwjWuKqtZWkYQQ2/NnABINHoiN+utclg56cJqKVpNe7VB+Izhg2yCCnsYrkL61ev8
+MpgjzZZrqlWtkBWSkyFiyp5GYsAFKmxCI4WgQ7Qd8i9v//cgqBSzAH2tkWg9q9fELvZBZJGIXM3a
+IMy+dZ2Z/BFNo9q8bmRIrhfipkLXLBS/ukgpCtCKcT8E4axqYVYRUgguU1rh2Hq3vqUNcH4zJBV1
+aDVC/vI96gaHMYl2UwgcwnA4xLmt5XGI29zwZ/GnwVxqbYsa1hwHOMEZxAtmmjTebj41vhGB53/z
+uUNOjZEPuRtlG1850eOQ85tHtg5rVR5BHH7yfUMkfws5hfOGd7uxh0J3vGsI0w3SPLPbLhQPPB5F
+FIg+s4HL7J+Lew1MXbjbYhxOwf4eQcSO98293eyYoEIWvn0QkpfZ5BQhNJl5jJD6TW8hpYCCQPRc
+sj4Q4glTzQjXGRI+0MubV+8C++P/LpDL+fuz6eEYFQA9ctWNPuCQl4iYxyzLWmywgw0RU5dqnpHQ
+1RDaG48IDTEYIvYZfPUUcdPw+TUA70HfIIlPdkKk/v8m7hM/9zRm0fJh0jAh5AEnOISwkYs8kFGc
+4Qx6Oib4tVoQJ0LaJVFcchS/fN0XFqQOdANKXkcgHzZL6+de7Nd1/UdImISACUhW81cd43VxLOaA
+FgiBznaAGFiBBqiBSNZkBfiBDdiBDyiCpHaBJihfBBiC9PdsHLiBHtiCMSiBAyh/LwiDKJiCNxh+
+KmiDO0iDCyhJGViCMkiEQNiDMJFCSOh/MyiAQfhMTDSBFGiER/iDVdiECOFOFPVPCLWF8dSFXlhR
+DBWGeQGGZFiGY3iGdGFRGDVTP7VRNvWGJBWHcghUbliHnkGHeFgaeriHeShUEeiELFiEJJiDVxiF
+/iOIg4YoiIPIiC6IhT4IiUkohey2hOoniUxIhStohZuIiE8ITp+YSpbIgCdYiIpoijrIiZFYaomY
+ipiIR5q4igo4ikKoipPYioRIbbQIhbF4i6HIX0O4iJ3Yi5kojLKoi404jMboi7sIioGojKcYjbko
+jYfIir/ITpTYdNloEUvmccn4S9vIjeFYitToiK9Iis94jK5IjLC4jMWIjLb4juUIjfOojjxoEXM2
+ENJnd9r4M3Gmj9M3hRORj5unJtc3kP9YkOlTiQfzZJICJcImjhmxjwJBkAwZjF/nJ9gyC3g2LN+X
+jhSnkYAnaHbyfDUYkpdWkYWjerCHEaImEJzi/ikRCZIHwZF59iWZpm0SeRE26ZGf8JICOYusVxBr
+R5O1tzyTc5IJAWzCkpRKeZSbByrL0o9M4y6BF5Rz13e3d4+PWC63tpXeOJQDkW1YGRHmIjTtsm5P
+2XjYkjxF2ZUVASsaV3Jw6ZVwApZhWZdmeWuvBxFedm7rUzAFN3k7uSiqgzEmWZhTpmwQcZbDNzJd
+Fn+nxyt1MpcVgSl9Fnk0g5eIBIKBWSspNxGPBphxeWsVR5fWaJiAdwBBM5N19Wf3ljrL85b1lXB8
+ojGWWRHGkmX/Y3CcCY5riW+qw5IUkWtOd2u0mZpDSThqmVqs1nFLeWtOCRE3A1sZAXZXaRHx/jIv
+WsKSv8l/5NiYBSN5GlFuAtNuGad5PnmRn7l5X/Kdl5hJDuMQKYM9/rIHFZMR5Nl6IhA0OjkRHDM+
+GLGfAwGf8SmU0Yc+V7J7iSkRK9MyDUkmVyJ8C0mVFuF9WAJA7XOQAzFw26egn0B3/0kRPPNE5VJ9
+ZFIo3eOaE1E0RxOXKNolFHAJIMqenqgSQzd1V5M1SMc1X4OOJoE2U0c1Vfc2MheAG5GjN9c2fOM3
+Pbo1SocRStpyibM4jfOkRwA5vIigJ9F2hRc6cldtE0dh2Qg7slN4nXN4uIN215YRXup2xROmKPGm
+zgM90vNmTzOmfWSUKxE+AlpG7eWMJ/FK/lxJEJaHp33KP6G3E69AQB5UPYGKjcGJEjQkQZDKjChB
+qHk5ZRz0qC9BQiP0BCF0QHoqYp5pE+kncZEqqJnqS8opEONHfr/SjXY0q/OxY8FJq7Wwf6RZTKvK
+qq2adXlpfzLBq7rqe1XUqwWBq7k6jqXIrC35Ra6Ke/Eoj0eGqXq5jtdKjyWhqQdaj+oIrc3agOIa
+rS7hrcoKrtjqq+b4qkZWruZ6rtPamdcoqehWqvbKp7UIr/Eqr8KamdpKgvwannZGVQOrmC+hqcf6
+rTB4sPq6sATmsJPqEYQKsem6gRKLkdSqE7mkTvhai5M0rwA7jTYxTOyqrgxLTcvkGhk7/rEfMUoW
+O5k4UU3X9LHA+hBaqIZ0IQhkUE9YIFH5pLNwYYZCKxaKsAWNwE9pWLRrQbRMqxaCgAL+BLRPmxZs
+SBEZ5YehQQIeUAEGMAE9pbV5eIdiqxgbcAQqMAIdYAGE0Ydlexlu+7aTQQIQMBg85VNyKxlB5bIl
+sVm1sF3NOBKR9Qd4sARnkAg54KwaWxNZAAOA0LLjWlVG5QnJ9Y0qcVdOBVVSFbhdmgVXZbNAmq0x
+0VW1FWP/yq0uYVYUlFanW4034VZwNRBzhYqFahOYm1eM5Vfs2LlHFrMWi1hJtVh8pbsoO7IxEVmT
+VVmnVa0o4bedpbiiKxOiRVqmhaiu/guPqsVarmWdlqsSs0VLYFWvI5FbuxWbtFuw7yVcxtuuM3Fc
+mVS5zGtj6uu7p7pc00a/ybRMAwG43asS9oW/BMsSDwbAOaFeRcRe/YsS/kXAiyvA03axJIsekBvA
+DlxdDBy9NvbAEHy9LTLBDVzB9wW9N/rBCqzBG8y+K+LBI2y/AHfBK+xgJnzCqHuEKsylMFFiIuyu
+OgzCm+qOL1TDG3vDMezCO+y/Mby+M+yIQIy+MAydMlu8MjwSnJbDNoyAS0yvKXbETxywQYyQFrdl
+qElsyEFlS5eQYtagD/EoHWKRtQBAH5kQfxmfrJSPFOqQ/ZqgXWI2FLlnWvzE1nsQ/ntsZahZxF0c
+EUApEIuWfBxXxbD5aSIJk4oMEYeWaC75yIiMxnuWcAeqSYdcoJFcxRDRkwgjym8cY338xJ52PR05
+yiRJoAj7wnY5EKRiayUXb0/8nA3xdINMnbFmvrq3kcJiER76xOnGlyNKwmXsPrQpxt9xbNqndn0X
+lYJHwcApegUTbpNmy1sMSfO5bBkXo80pEdxmbkPJKNwCAMfMEOXGeEZUqiljoFGcy7aXfPEGHfRm
+b7I5liKQzrAMynt5lwznOtq8zQIxzMK5PKfpaxB3f78MJ24ZzROx0Kqaz1uCyRjsEK/Xl7J7yusL
+ciKXEL0ymGVZuxMRLUCndQYh/nVDWqSHc6RQSZQQbUc6+nI7h6VHEKWQ7NBDo2lTenNGZ9M3jXmq
+k5z9PBGh2caYrNJL+nIwd3XCetRtXMqETNIN/ZDoAwD8KBCEh6ahoKbOw6YvTRBFSae483Zx58QU
+/SyXIxBk7Tx692vDadEXvRAsSZwFsdV459XCk3iLF5LTTNTYO9UP8c6fLNiH6s1w4gWpx6JM5Hmm
+15610DqFjRCl5z/HKWes6XcV4cqubKMGIXu093O+qZ6c98qCzRAY6sm77M+/56kGgaHOR5gVYXyt
+S3A1yj7hzBCxqh+pbdembZYxmjGiENxy7c+1kH2MWRAMCgtjUqF3fNooHKsu/lF+53cT6YfE9piL
+7gd/SVzNM0KsMJF/VFwQvJqyHBywAHjeTLy7IMveW3q+KFzI8N3d2R3fXBzBPezDoXuO7a3f/R3Y
+/v3eUCzg963e+T3f9W3fBq7gDI7FnJuvCfzfA36zxo3f9L2uAU7gC+7dEa7hG47hGS6KyJzgJF7i
+1mrhJm7eH37iK96OCA7iAP7iLN7gKe7iBX7gEy7i8evhNA7j8p3jwLjjFC7jNr4QOVu1buG0SD4W
+Sr7kYtHkTq60Ua4WUD7lXHG1E5G1eZsZcbvljNHlXr4YYB7miDHmZH4YZn7mbQuII+7jKF7jEn7j
+F67iPT7jc77f7j3kPy7n/nAe5y1e5H8ez24e6H5e53Y+6DjO54hu6Hge4kFe1EAO4UKu4/zN4xw+
+6Y+e55Su6Zle4YSu54p+6Ive5zFL0Ize6JEu6e5d6kTs6KrO6a/u6adu6X0O6m8u6nR+54U+67b+
+6Zsu67pO66M+7MQO6Il+68bO679O5Ki+58je7KGe7MHe6w5+E3F8rF62ZHGM3dglmab+Etk+3s5+
+7A2xx5EtAlK9aZrcIRR5xrINZbY6kXbsxqstu5rM7R+ax3Wcx6K57koSyAaZKvGOPA4pyCO93hFB
+ynCSyPWOEMaJJAoPzwVNBSbLk6v8JVAdEcOsHwq/dZN9EA9vZhcfoivJ/tgOMS0Vj48jH5PDYvLA
+Dp4agS+z/PEJYZ7qMikSLxCs0ANMwHj+6AhTaRHrLOhw5j4aDRE2zxGTkp0XsfM9X5WOoCzB/Nwv
+n8/YTPMK8aAM/ZnqpjR2cO9cvy0LZBESzT0FA89a/zJoGdPc+PUm+pkPndvQfekXcScL13D61tNU
+WtNYitM/p20JbRB6/zY516RA7fcCcSdxzzBDKjU/bdOI//cVLduDnzdX0zeHrxCahpjvnkkr7TaG
+D/kI0SshEwCQaaFtrhB2c9Ku09bCA6Zo/RBQvXauXzt2cNYXYTcyo2m1Tztv3ZtkRvtcvTmwf50a
+8ztCM81szdW2g/uV/maSgD3u1V5pUgkldeco4vMx1xmR0b8QMvAFj039lqLW/VkRlf3tjvw9GQ+g
+2Q9uQ8OcGvH94V8Rdj2dqY/vo5+YBlqpSkKgio3IACGgVS2CBQ0eRFiQUhEwrxI+hOiFQqyCtETQ
+qVWKwCeIHWtRenLIYa0XgDyeJCiRYsFZBzCi9AjykAyTMDuqLJgpAEeNHG1CXNjw50OcB1vaGAqx
+ZNKCS5kaLDUAwFQAEwn2fErQU5c6qEjWzJpRKlUKosZaDbu1zg2wWaNSrRrrLQBHTNV6bdr26dyp
+EwsJDFvr7tfAYuH6pVo3sFq2hflWdXVgKtLCBJ0yvVxZs0FUYzzl/t0cmrNnwqIrd/58MLPpwKhB
+sw7tejVszbNt2qZdGHfurLt5P/X9e2hw4T+JF0d5/KFy5B6ZN1+uF3rS59NVS7d+G3v2k9UNeude
+Orz28cO3l08Inrt66+zZtz+P/rp8mO+h278fn75l/R3xN/+vuAAF7I++AYU7kDf3CjSQQfQSVNDB
+8iDMjULYFtyvOwnHs/DCDcPrkLUQRcMwQ/8+XA/FFE1USsXsRgytRBbTc3E6GGOsMb8Zo9sRIRl7
+fA1I/oT8LkcAjTySyCGBQxK5G2trkkAlxSPyycp+FNJK3aJEkMsIp6QSSCzF9LLCMs2cUsst03RR
+zcDc7O1M2uBksRJMOjFz8QQsLOGzTz//BDRQQQcldFAN9iw0UUUXTfRQRh+F9FFHI6W00j8ntTRT
+SjHVtFNFNeAjKzciqKBUU09FNVVVV2W1VVYfINVVWWelVVZYa8U1V1xv1bVXX1Hl9Vdhew12WGNn
+feARMJdltllnn4U2WmmnpbZaa6/FNlttt+W2W2+/BTdcccclt1xzz0U3XXXXZbddd9+FN15556W3
+XnvvxTdfffflt19//+03IAA7
diff --git a/Documentation/DocBook/media/pipeline.png.b64 b/Documentation/DocBook/media/pipeline.png.b64
new file mode 100644
index 000000000..97d9ac007
--- /dev/null
+++ b/Documentation/DocBook/media/pipeline.png.b64
@@ -0,0 +1,213 @@
+iVBORw0KGgoAAAANSUhEUgAAAlgAAAEcCAMAAAAsmToJAAAAAXNSR0IArs4c6QAAAwBQTFRFAAEA
+EAEBAwUBCgMBCAUKAgkMHQIDCwYXFQYDBgkVDwgFCAsHChASJwkDFA4NEg4TDhANHgwHDg8aExAG
+DBEjBBUZERMQCBNRDxU0ExcZFRgVFRcgCRkzHBcUDRdCNRELIxYTAx4mLBUMEBwcEhsiDxwhExsu
+Dh8XJRkQByAtDCMUHx4bACk0DSsiGScsFCRcJSUiGSZCDiwqGCg1LyQbIycpVhsPDy0wNyQVECxA
+PSIfCS84ADJCEStWRiIULSwpADdHCDkgEy1/BjorEjhADDhXCzZvITNPUysQDzs8MjIvCj04BT1O
+PDEoQjEhMDU3LzY9KTdFTTMYNzk2GD6PAEpfEUVjBExFCEpPB1ArK0FjKEVZYTscdDYXG0d5SkA5
+FEiJQUNAOURPAFhCA1RpP0ZJVEMvYUAyBFtRWkYnDlR+QEleAF1jkTojHlV1AGB4PlN5YU87cUws
+SVRXAGaBWVJKUlRRRVZlAG1PWFNSBGt5AmqVAG+NAHNpK17BWl9ifFs9KWmnSGZ0YGFfp1IuCnWj
+AHuKbWBXoVNAdGBPDniWAHujTWmLk14rAH+ch19XX2aRX2t8AIeLiWgvAIeeAIaqa21rOnehdW1m
+AImspWJYj2tLW3C0AIyvWXaNAo6xb3Z5dXZzent4WIKiQojAen+CkX1pcoSWcYDOoH9fp4E+Y4ur
+hoWCXIvGb4mut3xpqYNYnYdUO5rDyXxdjY6LYJqk0IRGb5azXZnMjJGUgJWqpJB8l5aUyJN4kKK1
+rZ6IYqzge6Xho6GdnaWpgqy6yZ9zvaR0hqzMk6rN5Jxxw6mF26lbq7C2pLTGwrCbs7Owvb+8zcGc
+8rp42L2xsMbY3L+jtMfsz8W2nM/yx8jH0MfA7MWU78h/3crIyNTo4NLD6dK1z9bc1dbT3NXOv9vr
+/OKSwur8++Gw4uTh0ef78uPU+OPL3Ojz++bGyfH36evo/+3b6vT88vPw/feu1/v+//bb//nK/PnW
+5f/+//ro+fv49/7///37//71//3//v/8sZeTnQAAAAFiS0dEAIgFHUgAAAAJcEhZcwAAFY4AABWO
+AUTUBDsAAAAHdElNRQfaCRQPAiJBEFMLAAAgAElEQVR42u2dD3wU5bX300nWws50shA4lyTGikRE
+Qy0aUChKvJZqTQGBFNurVo0FWq2VqhAq0hc0t1rAxtLLpu61cUl6gWtNe3vb7Z9IKm+Xqn2tkFih
+GmwFA9Xwpw1s/UOyCc97zvPM7L/sbjabXTLZPL/PhzA7O7M7s893zjnPmec5k8WkpNKgLPkTSEmw
+pCRYVlSjp5n/3+3xGGsONHt2vx1t04PNTbtP8aUujxAuHvN6XjHe721pbmqLsp/H021+crP5wce8
+Ta8Evi+wKMHKHKkwjf/vA+D/77SrAKDeeTRiO3/vcnzDoTzXgy/+CEKMbVdw5dh9tMGubAfuuMjf
+E7HnfgAf/X9yJn6Aupqv206fNfYdXHr/Aloc944EK7PBWg9QurVxlQ3s70Vs+FmAdQ0LAP6By1cA
+KCT2AsBqXKme7mEfajBr25MaTIjkSjPA0mHsVnz/G8jgzyBna4MGH/UzdgHYt9ZrMNovwcpksN4E
+eJpeHbLB5PDtelT4L/wP4Fr+95NirQa34d85cB1jX3eAsGVhiPRuR644WPjOafxgHXw9uD8axL/k
+3+gj3P4vLgJ0SrAyGaxzoFCsfpObJh8aJVMtzUSMA3JZTw/Ab8RKYcD+Rvu2ehsJJIBu9llFwdW3
+0l8N1B0CrDlgpz1K4OfIkRqgT4cOjNkkWBkLFiM4ejT4nXjVrcH/wb9G4BUUwOcZ+xCgtkh74Ci9
+9Iu1Jig/BjhDhmzCGUTzccaKNx9lAqwSUAVY17Kvw7U7S2wLn6fXRXCzn90P2dIVZhxYIlxSiCE0
+W+8F1l+GYDU0hG99P9/gVxiEKxiGvYPMoCMjT2a8fxyRQkQw9HpJg3EBGAmsr4PDz8Eaxz4F+UBf
++DjtMZ0WJ/9ZBu8ZDlZwfX7fjTG0x9ibPTF93D52UEdELgD9LXYw19zteA7onMwLAHuI74SB9QGG
+b372DKAvLQF4xN/7FPlR/17gjD3H/BKszHWFYRbr6siEA7sL4Jbga4zG2Yd2sCuqaoC1H63YS3wJ
+nSW3RyFgsQ0coQLsNn4KdD/3gtcauG0HOCItVgYH78EYq4fHWOFaDLA6tMNHe3St0m0L3+Ixln+/
+BqP3ib0xGue2LRQstmu+vXjz13mMZYRbubhoFw7yXyRYmdwrHMN7hR/fgVGSdrovVzvEUpuXYPhn
+wG9+wJcOaTDZ3GcOpU/fiACLmb3CP5o0ncemCcamQY4EK+PzWD3zYayNYvcwYUj0UiCldTf+9yXQ
+2RNlKl+6Aw2YLRCtU/D+uBZgxQDr9iIkrauIeo067OOu8Cfs38Fxgi9+XoKVyWCx+wBKG5t1tDfU
+9ME81hlybtki3U59w9XNuOVL/r+IJbu/58yXwOgGdPd0aXAuwXU384eAdT7YtzXqPPO+HuybPRdA
+9hnW5YDR2zz41ntnJFiZDBbbbnfQvUI7OBaF5bF+ZdwehALCj+4n2n9qLDkK/8zxMdSNjlA/3UP4
+vRFqsf4+xhG4V8h3G0sB+/4ptJjzB5luyDDV1otUVXd9vbGmpXFr09td96mXha5kT9Yb4psfbKxv
+EgMdDjZuE2MTzPfru/9eX/8HsabWeMcY3eBtaHrL+LiDjVt3B75v224/k2CNFL3+D/kbSLCkJFhS
+EiwpKQmWlARLSoIlJSXBkpJgSUmwpKQkWFISLCkJlpSUBEtKgiUlwZKSkmBJSbCkJFhSUhIsKQmW
+leRP28f50/H5EizLSlHEDC9jCuGxJ6crtoU7TkVjxJxkuGu+XVvNp9t0bS9SFu4Ra1+/PTt/c+R+
+uFLbfMJYtNvMbRk7nq34JFiZrPB5hYdsoKqqA9Sjfbm61ZhkOAdwG1D3IVd2WhJFGp4CB+6Xw8Iq
+kD6j0QbZR3D3F/he8CPjHT1s3r0EK8PB6rHBLARmby7kRm538i4QYP2Jag4dmwkOKkU69gjbqcEb
+rOddgD1sL8DDofu8D/C4/9gCyPHTrNVZp3oXixpZjCCVYI0gsHy8Tihjf1Rt+Le7pSWw2U4bjBZg
+faUEkRJ1QETZte8ShefDT3CxtukV1tXSQm6yBXf+d7Nc0T9o2/d4SUjCyf8iTJZgjTCwng2+FVKG
+janqMn9o3UgDLOKmy1g83NrKQ6zzwc493+/YFFFKpoAqIuk03/5dmoCP1g/00xKsjAerpIWLYzMK
+oDTw9ACfqgY2w7A8rCDpHEETzZb+kPgCeIpqAz7HOD0PI2yXMfYpUcS2gGosLwf7bq8G1zNuwn7D
+JFgZD1ZAFBWdTxVBcjZHezJFKFhPAfyQwLqDu0LOGCysmi8qHS0G+Dg6wR4qJHKCVy4ajWtn0lec
+SzvfDzczCVbmg1VQxWVgs2E6oaY/HgcsP3FVKvB6pGUDCLCu53aMB+eE0E9E3y9ntxet4Gh6a/QD
+q3Qi62UYyyRYIyzdwPt/O2/Xg+X4+oJF1bNhEV/k1YgWCVdIXvGfIupCS6Xz9/8+xQGQOwddoXh8
+ABqv/8JNf9/a2goQ0jWQYGU2WH7W0cbJ6dL6lLYNsVi3AnzNWHfwyXV7WDCOZyI4p7ptRvW/XdVb
+T8/B4P1TZtnRccFCWqoEa6RYrCvUfHPt5JhgfTeQ5uxobz1DVd7HElH0rIq/cb5wAx3yCLTDbf4z
+It1gxPElMAE7mCQqFS/BGilgIRiXdeDyTkoWsO7W1ihgvYkheCuJPz7nBDumUTbrV5D3NvOdT2VI
+j2O8hRudxz/8YT97EXL9fnSFz/nZq4FnpbA+z7yQYGVyjLXcAXb+MIFFEXmsIFi5gU6kv0tFs6OK
+eGsBf1iF9g9yhLnMz0vh+u/npd3VN/j7/EEBpWcCYMngPaOlqCWhSatd84tUteDGHZF5LAKLv+xR
+TeGLQ/N1dcZm8faGEjV/4VF0hCoHaaaqnOIrtYXivuP2MlBnBOvEq6oESyq+QobC9ER7vydyq5Eq
+CZaUBEtKgiUlwZJKnyqmlXRLsKRSrhJlBI5KlmClX+USLKl0qFKCJZUO1UqwpNKhZkVtl2BJpVzt
+qtIswZJKvRSlUoIllQ6wFAmWVOqlKkq3BEsq5apWyzokWFIpV4eiVEiwpNISZHVLsKRSrlpFaZBg
+SaVeoCidEiyplKthZGYcJFhpV4FSPgIHZUmw0q8KtFkdEiypVKsbwVIafBIsqVSrhNCq7ZBgSaU8
+gie2ytq6JVhSqVWVSlZLbeiUYEmlVp4ChexWebsESyq16qzgZmtahwRLKg1mSylvlGBJpVotQLGW
+T4IllXKrpYyAjKkEaygEmU+WBGtINE3J9AmHEqwhIwskWFKpV4mi1EuwpFIvJbMHAEqwhkrNme0M
++4Dl6+w4++rs7EnT+Q3R6SRysxm7hu3D5HQGC1Z3x7Kc8RNnX2XIds1VadXU8ebS7Ik2W+pHlfja
+Z+RMnBg4CVt6z+YqW+CHmzrRNr6x39PpUJQBPQjF10anY37HNek+nYkXm42Dp6N5O5MHy9eQs8Lt
+CqpufOirNGjtNaGv3POyWlOJVWeVbW3YCUysS+/pTHWGnc5VOYf7OcKCgSSzOm8cXxN6Au7x6T0b
+15IVoSy4LtYOJwlWs80Z/slnGSxaYUuZ1equ7XP0Zxcs1NJ+TqdNUaoSPZ2qyKM/u2DxCz/flwRY
+3fkrIz/57IPlqrumKjVcdUReJUMBlqtuam3co0y4sENn39M562DRV3oHDFZ7Tl+IhgAsl6smPxWD
+LFuiQXT2wXK5NtrinU5VGbQlcjreiX0/eQjAQhtcNUCwWi+OwtCQgOVy2gbPlWdetK8bCrBc7nh3
+broVpTyB09m6xGURsFxrywcE1oGov/nQgOVyOgadIVrisgxYSFb3IH1h41KXZcBybawcAFi+0VEJ
+GiKwXM6iwXHVNs9lIbCQrNiHWpFA9r09+q80RGC5VtYmDlZBdICGCizX2nWDSjPE+sWHCCyXszrm
+sbYqiqe/89FclgLLNa8lUbDqV7qsBZZr9mACeJvbYmC55nbEuWFY0s/pFNVYDCy3LUGwfBe7rAaW
+syR5rg6sdFkNLFdsZ+io6KesQ3uM5h06sFw1tYmB9aDTcmC5ViQ9Rao7x2U9sNYeiHW40/ob76e7
+LAeW62JfQmDFMlhDCZZzWrJgHV5pQbBim6zGfu7qdC+1IFg1iUwyyvKstCBYrquSBSvfZUWwlsS6
+xtv7id5vrLEgWHUTEwGr3G1FsNY2JTmgYbwlwaqrjToEprMdu4UFFQWxx89oLguC5Zp3IAGwprqs
+CFZNeXJgdaywJFiurOiDkwOKZdGWWBKstZ4EwFpiSbBcSSZJm93WBCvaLcP2ELBi5bnWWhIs94PD
+F6z85MCqqrMmWBdGM0mOAFexplU0bLQmWAn4k6yl1gRrYnJgLbMoWHN90buEhmL1DGstCtalIw6s
+iuEEFus3xFpnUbAulGBZGqwyg6uY4zkkWBKsZMDqNMDySLAkWKkEixcHiTdxVYIlwUoKrGYBVrcE
+S4KVUrBE+B77xqgES4KVHFj00ArFK8GSYKUYLF/cEEuCJcFKEiwevndLsCRYqQbLqyhx5r1IsCRY
+SYKF4XurBEuClXqwKuONTZZgSbCSBYvFe/yqBEuClTRYtRIsCVY6wGISLAmWBEuCJcGSYEmwMhGs
+JRIslFM3phzrOsQ54keHCViz9VFiI11fGe901kiLlW6wwOBJh/zYW10EwwUs0A2wIA5Yn4AVEixL
+gAUZBRa+KcE662A5UeZ7xrITN3I6hydYEafD/yewnBKsswqW8xOA0jfRms/RItxM9oo0HMF6lB95
+qVO4c+PM5vKVKyRYaQeLWyYOVo0uIIK7Xa5vGosPDTew6GwEWF8Vp6MjTuKK4WRJsM4SWKYQrEkA
+ZKwuIYwESxcu2TS8YqyAVrrcAJNx3RaACUZc9f1S8oEyxjrbYNUA3GEE658nyAqXOodd8B4C1hf0
+vG/Tyq/i/25yiStl8D40MVbIi0K60Emzhm+vcK7xoo5e3CA84b0SrKEBy2m8GId/vxgIroY3WFt4
+wPXo5fxs7pVgnX2wTFdYQ66Qa+MlwxgsdIWbTFco3l5TALl11gJr49xJ+Zfem+nphgIRvF9EXUWA
+q3HxWwZYzmEIVmjwjl1BKrl1OYyzFliXGB3Xh1IOVs0NULppoGD54oK1ZlL+TcmBZYby+sO8a8g1
+ziUyWYmCNQlqBgpWd1ywHr08/xpnUnksI91AOQZ3gVii1RTI5z2UIFg3wNI0gvUFgM9sqrkHD6wu
+xWBR5kindhwQWNOUKl9MsP7VyB0klXkX6Z6QBOksc22CYH0ZBg4WKNXdMcH6ajBlO/AEKcdpFh3f
+lot0I8Ry8UB+ZWJg3aOnE6y6POCPqXlUh9tSDNZFUOz8NBS6BwoWKoytIFiI6t14oNclBlYqFAoW
+IZgEWKgwtoJgoXn5TE0BfDIxsFKhULC+gDSmEyxdXLimq9ENr1in67xTXhxYC6V1Bueom/iuOl5y
+t8UC69FR8DDyqn87CbDC2AqC9W90OP8Gk4YELPScyYIVxlYQrG9SBP5lXXcOAVho9wrTChZ6F714
+ZRhXSNbdBJzQVdyaBVzHp43VpS6xSaQDDYK1ln6xLaH2ZUBgBdkKgnURXEgxd27dkIB1t2sQYAXZ
+CoJ1Azm37+uwaUjA+ow7ra6Qmp74mOcMmq8vQK4bF9H5110CY92uFaDjXl+dh32PFWi9hdm6jbbW
+H6qJ6QrX8r1CDeLEhgQU0hIGW0GwdAKrJhTmszuCNBys2fUJnI4adjrEVhCsKwks/BU3DokrdKUZ
+LIxkddMerRFm+fuj4A4836uIIKIDadJu4ZcVgja5Tti5c4m90jgxlgnW7CAFSjKq6gMWWAOsS5M6
+neogWJcSWO7MBQvbiiei5xFBhm5Caua6XIaxKuC92eJNLmMtIiiM2uwEwJo1OLCmtVvVYiUFVmXn
+yLFYwiNehE5vBeUHuOaGgeW6h/dt9W8PEKw+MVZ3vwqNsYgq68RYfcCa60vgdCCcKtbHFQ5RjJV2
+sJaCkWnCBfeaYA8lHCye7dRhXogrnFwXoCxGr1AfVK/QoCpar7DYZRGwBhS8V3b27RXqQ9Yr7B+s
+jsGB9X262y9uDlxWh/ZlFrbUo6VLnSFg1WFHG/faMgrj9jV83GUgeI8Hlisv+TxWkKrwPBbEymPp
+ISOv+tOWgn43ASMZmTKwKjti5rH08/rJY9WJzO7Fse8uzU00uz0QsMpaB515N8YifttMUgHkhoIV
+SEKQC7oE9JB0Q1ywRJbiNtfAwZrW3h098/7pWJl33XTi/YPljH/bmSs/pWBVdsTIvFOO0ribHBus
+uov4uYWl6Psm5VMPVr3SOLgYy8CmeFPwRbEz3BWKtYWbAglS/SZX/2Ald69wWjhVCd4r1EOzpmkD
+S9cHDJajsiPuvUK4qr97hV8QRIVb6tSApccDq7Vv+biBBu/OkPkewQkf5n9ha8OWnVHmtwx6dEN3
+UqMbQsCqyZ90zyTRFf3ipPzZnGy0clvmTip9iC+izSO79uiVBcaYjnxw3TPpwltMdC9cEgusCAd0
+dkY3mPcIL9dzuSOnkxK3mddcWZA/9aEgWFvmFly4xEDm0UmznP3dhA7/7gv7lo9THN0jfTxWKFim
+uzScfDF/W7y42ozGzFvSfMyMGZ/R3gVmZGAZsG7g8z8CSUdxeDRmRozug3NNsIzzXcvBmgR5mwYJ
+VjkFh+0SrFCw9E/y4VaFaylDV8xpKtxIMR+a2Ro+k/DLGDg715iDSOGmGgzfruW4Xc9XWwesOqLl
+QmO83Fd1KN1Y8zli7Vu6fq9z4yQaJcPBwrdmbdp4ER9IQxHZeXWDBKuWdzuaRzpYgU4hgkPjBWqM
+7u73aaSMDvS83G9yjJwGTNfxZbrhLpb58CyA8wSb1gHLmEEIGh6p8xJ+fM4r521y1XzxXpcxNpnA
+cn6Cj17EUP9eAuu8/mOs/sBqNe58jHSwzE5hjRhz5TSGXrnoRxYjsraEgeUU/bxP0p9NZorB2MlS
+YGEwdZHRwXZHDrZaczkYYLmN+YRz4bw6BOvewYNlPiGvRLpC0xWGdf4IKmMudBhYhgpNyAywnBYE
+ixz4F3XI+3ZY3Qa6HUJpCAOsQCpvVF3MAcoDA8t8Ql7gaQgSrOgWKxwspyFzPoVVLdaauWvNVPZa
+d8g0588BxlhiXrRpscwzShFYtYG7Bi0SrABYGGNNDomx+oJ1XUjeMwSsqy0GlrsAxvKFbyFYGGNN
+EIHUBMMrfisQY10iDt1IfaYCrNbgXc5aCZYJFvX7Jm+iXmGhKxKsux+l0fPXO7d8EfKvDgPrX83O
+oqXSDYUr+QCUUS7R9auhwArByuWnhwwFe4Wue4KT7gcNFgu5f14uwTLBMge7FrrCweKBvlGhxZyo
+E7RSYvVtVoqxPgGBCTmBPNb15tLkAozTRR7LeCt3U8rAKguC1T5SwQreJKwxK0TiVT5J55lpfJvW
+GbUjt1yuz+aZd9BmG+8Gt3GtmaSXbnLpVgret+B55M9+SBzKlhsgV9xOoCNd6/qEPoGqSNaJt/RL
+HxKZ95SAVRvuCeVM6NRpZBe3NYMsn5xiL8FKJVgs1A9KsCRYKQMLg6xpXkWpl2BJsFIKVi09H09R
+CiRYEqyUgtVBw2YqFaVj2IJVKcFyWfXJFM2K4hmuYHWoEizLguUz0qPDEawyRYJlWbBoGkL38ASr
+Q1HaJFiWBavBvAs97MAqD/ZoJVjWA6tNUaqHJVgdNONLgmXdx8opijoswaJR+yDBsi5YZsJhmIHV
+wW8bSLCsC5bHmFMxzMCqCBv+KsGyHljtxpSK4QWWMFhh04wkWNYCywyyhhdYFUbZDAmWdcEygqxh
+BVZnn0lGEizLgYVBlne4gWUYLEWVYFkXrHaRyRpOYHUGRlVLsKwLFgZZZRYGa3x0722oNdZJLbMo
+WLMzC6wZ/VQtI7CWWBOs/HgGyxyw31cPuq0J1vjkwGq0KFjl8Q66mo9PzppnTbCmxzNYsW/qtFnU
+YuUkN26xc601waqPd9Beng/Kml1nRbBqlvWtsuapClTYj3lTp3OpNcGakRxY7BpLgrXCy/qN3rNi
+O4+hBGtljKExtXjQVY7Y0Xu3zZpgtSQJlm5JsK7qZPGj9woEy7vUimCNj8GN6BF2x45YKtxWBGui
+L0mwqjZaECx3fvyDBsoHZbHxddYDa2NF9ENuDYx8jTkOcK4FwaorYMlqqgXBWtEc/5gr6erPYo0r
+rAfW1JhGVunv2tfc1gNrbmfSYE2rsRxYbls/x1xL3cIsxmLxM3RgbVwW02CV9NuRutJyYLnLWfKa
+aDmwlnr7OeRmuqmDYLXMsxpYWuwIq/9rv95pNbBs3YMAq3ajxcByl/V3yHj9NxBYrHqltcC6qj2G
+7y5XKhJoCsVtLbDmtrPBSK+xFFju/i8Tnm/I4jOqaqwE1opt0Y+3Je6g5JC5bTluK4G1opENTjZL
+gTW1td8D7qZ8AweL5TutA9bKZTGvg3gTv0J7hqPd1gFrRfUguWL+8RYC65rdifgMDIUFWKxko1XA
+WrouNleJtlFnVG84JGDNa2CDls9mFbDcF7+SyAFTIivLzMQtsQRYdeO9MQNCpXIATbHWEmC5x7ey
+FMinbbQEWHVKYuFiuaIEwGItORYAa60tRpqqakBcoZr6HvzZB2tFTgdLjWonuoccLPe8GxPs3lYq
+SndW8LJYN945pGDVrc2KYa7aoCDOUJkY7vDGqe4hBcu9IquNpUydkyJGC5xtsNxLshLu3aIZ6MwK
+Pfgm29SN7qDG/9ydVm28JuTFivHj26JfEB0l2B1UBu5TOmpzZteEfMPE9J6Ne6or5MXS8eMPs5Sq
+Y5ntGudZPJ0lK4LLdUtslw4gaVKtKB1ZEUd/eOvCS01pl6ZZ+ebC3GVN7TFSn94CGidTltxd3I7D
+62688mydTsjnL/O2d7LUq+PAA5eetdaZdKG5dOMDLe0DyvHSPZ0sZmV5y8IrpkoNC9VbGqz2anSB
+NLSvoE021fBSg6K0WROs1uoCY6xoWUOHbCgJVirUVlUgLBWG7M2SquEoC8ZYLWXGuPYSpT4t8a/U
+WVCUXuHQqkpV+FOrobHdJ5tnWIPVaSGwqvjseaVaQjXchS3pswxYbdwH1sugKgNUEXqvcIjlAap7
+JW1VRqjEOmB5MFwvl1hliGiqujXAokExHtkgmQNWpTXA6lQKlGbZHpmijsCYdwsEe9WyPTJGLYrS
+aAmwOhKcJSE1PNRIjz2xAlhV5YpXNkfmqIrK21oALF/ch5hIDTuBqN1ghS6hjLAySD5RxmjoD6Q2
+8LhXqUyQl2J3K4BVEO/hOFLDTtV8YvHQg9WtqOWyNTJIojje0IPV2W8xNanhpLYy/pSmoQfLfFyU
+VMZ4whargFUrmyPTPKEFwGqjMl1SGdQnrLYGWL7WVpltyByVG7NAs+RPIZVa/yNuo0iwpFKpShG6
+JwCWP4FPC27TM9AD6Ym+m182UUp0tn9HNFhGTftIsDyqkQZXVPEknu3zdVWbsbnvIR+7vUjVFu4j
+Jj7MUkm054YiNX/ZUbFnma6WrO6740Gb+IpD+MkzdoiVG0rU/DtPibWAn3B6xCNRFmwIUbniNfy9
+HTOWnYqGz1Pmxo9NdzhmPE1LO8t0rWQ1geXfH/JDB/puqiGyLxum68Ze2BB2beGJnqSP2jRYfcEC
+83yA+mpd04FmOQAUHo3Y8KAOqmID/Q90zhqQVNY7Exy4tUYb36/hnhpcH/ndvVOAPyH1VTtk4/tf
+o1XT+WcVcupwUaPFkR4Emw3hAA7WBgf+MPjL5L7Ul6tDYGy8gFoL4BvM/4wDf13gv/5+vuj4Qdg+
+KhhCDhabezF2K7afTbRfMmpWppkDVfoBaw7kPO9jPXtzYHLEht+FcSeYbw5chsu7wNbWimIvQu4R
+5ltM53PcBt/zs72a43fh++2fAhysro/Aom6210bv/xhmdLL9NvWHjH0JinFRgx9KsMLAegbgEew8
+Hz4f7H3syU4wwPp/GjzP2FOQd5RdADf72c+0vLcYmwn3+tkL2qi3Qvdp5VoOE/zsTQ2ew720vD/j
+Vnl7mG8B3JZkBz+kMFB8sF7TdHGBYFP/lEKzqsDQ9NunP0BZC8jhkF0mVl4BtPLvNvt77ECFfhoN
+cRasY69WVf2ZsWMPVG1lbL4KYzlYf9X0dxgxtYx15ehv0Lm+3clYNizjX75MghUG1vlGY6O9vxr/
+q68KFs6cDo5csfH2+dl+P+tVoY3RP8b//tNGdCFev2QfPFBFrbihqlLEXm/qo470sGa+F277CrsL
+rsPVv9Wy30s21RAYABUfrP+A85iJzE18ZUSS/H9hHP49Bx7Y+WATIyyqxb7/bYTk7+fA91jvR8je
+fRZGY3ygaM+3cbBahUNshQlo3EazXdVbT9DrL0EpWjObtFjhYP0JHCeE2/sOjMKlSgiOCFFK9wVf
+4m/+Tw3eRouFru9Nsli/EJgsh2sZ+wrkveV/hlsoxmF7OriXDfagg/olo0U0XkmoUVEcLDZYjuoq
+VLWDwPo4PBIgaAKdQ8SQvJOj4BbGPrBxb134FvsYt1hdWfC4scHPwP4Gt3dPvyhsXpvfQOqvoNP5
+vgpj0XgVLnAAOOgsj+fAwqrpUCxjLHBU8YYAAus7kG108Dg2vC6xqbfDOUN6sk+xF2z6sgd53PSE
+2HUVNWDXGDj3Q53bPGGZToXu9Q4bA1TGvZeM18DVGlYhry9YATWQBdpqrBc+L0IY2tuPoMO0QfHu
+XWSW/gPGYn/uZQ0M/l6wwSLGY8J8HS41D4CDhTHYN/hFo7EDALk7WpaDg8h7ir46SoQ68sAKCFur
+IkiOg8CKzB6FgLUe4EcijgcYi+A8KN6rogb0v4DRP2QbUdocvqHQUxpZL7sgKimwOsLnhvYFS21t
+QbUqkWCN7vNRJ6eA/izFbIep8uwhjNNP5kBh1e06YGBF9vUZJI5fEyfRpuW+EwYWGjO0TWXAwdJ/
+YnrB+0Hb0XKfpv9UgmU0BJbt7hUAAAvPSURBVAfr9gTB6mH3YVfQTxfs2N0tMyHnRBhYgrdfi23f
+tI06Yu613gGz/ATW7mTBosA9dJRK/BjrioAr/HEg2gro2BSw/zokjZAFm9mhMQ5wzLoCAys8WrwK
+ik+ZfUhhukLAIuwge7M2Dl2h6udeMZcds+m/YRTbTZZghcZYQVf4N97TiwFWD4ZScIuf+of2oxyj
+O4KukDfgnzQ0WGKn++BfTK7uEnsZrrCLwq2BclUWMayuv+D9XAqH9jGMnh6J5KoIcvaF5dyA0qgH
+WtrYKP13wrpeb/wax5AhnvEKBYsda209cQBmoVfkYB1AsA6I9w6AJsEKBetvGFmh/3qeneERVEyL
+tRx0Ho+vMs2UzQze74LPi3hdJKy4J/ylsTty9XjIqn/mDDh471SVaeGPkowP1n4bueHP2m/ZzoPw
+sPhqCow10J9TcgsPmvTfdXqf9NNSDv7FaP0b5hUxEwp1yPf7Q8HyNnq4Kfsa682B33CjeC6F9PQ9
+f8SQXoIVmm6YQhmdX6jFe3X4AYsJ1nrIEx2+JwR+y9FMYdeKTNwF8HsRwY4FnduDDwJ9P1z5U9OI
+kVn7Hy17gLeC2qgGY/cAwMIj1Vef6L0PIz5+U6eq2hsSIy6sRlVRj1HfwY4tQPd1yIbo49LX0Bzl
+QC69X+2hcEr/DfYMbw6zWN+F/CNsly33HT++n7+H4U/2DdaVQ51LXFwowQoD600NFu5hL4MOY6nR
+G6qrooCFG43lv3n7u7nwtVNsu07x1AIoPYHNRR7wXR1uRotAeR/yliLqfTPX2KsNwy7HDnZwSqBT
+n3DCXVGmRVSC7wcsdp8GqupA+5m/2R+ax3rfZnRZcPNetK+qCqOPUOxNS6V+sj+GqsgRLqLrwnCG
+Aiw/kqerDpVHaYtpEQr93EbS92knJFjht3RetuMPw2/E3HkkIr8QeLnA/M3bKIClrSkWOa7TIv+h
+MZo/RVc/XeL/CdmnTUcYuLuznpobxp0a0KFWRnvQUVbfJJcBliJuQr/+WJGiPdAyBfLeCM1jeRVT
+9GpXmZK/mTqxPTunKzOa6Jo6x3y/qmuMMprWzFFs7xn5DhFi3We33WkEorumK8U7uP099mQRLp4a
+6VyxacGGEMmhY9vLsm0Ln7/LQb6wPHzKXIV4+RHzN8de+sFVevaM53lO9dgG3XYnub/1ikJ5nKcU
+Bd3ig+ZHBPYib7R3fnb+5u6B3IRudSjRxgAnPGxm552Jf5c/ua3kWJlE9PpMS111vHJslEc8yIF+
+UoNQgwLoBqOVYpRgSSWtRipIDC1R35NgSSWn7lpR5zpG5VgJllQyaq0UPbOYTw+RYEkNWG314sE0
+1XHm7UmwpAYkX3MlUkUpBk/c8ukSLKmE1d5YaT7ur7q/h0hKsKT6V2erp7qiIJATr27t/0m+Eiyp
+aGrxNjd7GmurqyrLSxziOX9Clc2JPUdZgiUVTaBECFeU17Yk/nBuCZZUNBWEMFVQUe1paRvgY0kl
+WFJRXSEfFt3W3pHsY24lWFJpkQRLSoIlJcGSkmBJSUmwpCRYUhIsKak0g9Vab0zwqq3nQ057vQ1b
+m96OuuvL9XySoZ+1NG7b7RcTO7z1TebErdcbt/WtAHAwuPJg49am4LQA39Z62RgRqq8X2UmP0SYH
+Guu3RS+q8O5WsQW2RQNuwtuCWiV2U9TWG2o3WlDs5e/11jeeSAdYEfMKt9tpTqFaGuW7juXwSYb+
+Y9NVAIe2B49rfx5ubeNzvHuX22i/8FqiYmXhW+Yi2AKVMReb8+6lAnKAGEgnSs3sn0KzCtXRf4iy
+5QLRav69OpWDKj7Rw96fSa0yms95vs/GV4btEVIq0r+3iO91BFvwNWpB5fG0g/WyBouamh+zw8V9
+djw5EzhYvRdA/jbPdBj9Hk1MLW3aYHM820OgFDY9aaPqWSFaD7lbPTMh30+Tox2rmxeICfWMV0KU
+YMUF630dxm1tbiwC+9tnIjdcr4lWw20ubmoEGOdH1OybPUU0PxV/dftWXDw3bHaduAuoQd4+KpqF
+exXBuNO0iC2oOX6fbrCuAF71+GUtL6KSKNuZBwKs12xU1eGkDb7HvstrxPyWivy9puHJsf+dsSh0
+n+M2eBZJ/Bh87wz7GE2O7rIZ5R0O2SRY/YD1HbATGb1TqDJfmF6badYg/Y5qP2HU8RtFk+vf1ahg
+5Bh4iTO3L5qlQ+P0Pzaaa/+uLW8PewJoavEGXmQvrWBlGzVmmprbOOWBQpFesD+icrA6W71i82qE
+ZR1dNlRR9Me8tB93f3OU0afRrimj/X8FlVzji8TUOXyDLF40mfk+kvusBCs+WBVGISNvM9XkDJ0J
+rUDpjaLV2luahZtrYzq0MlGN9C/aqKN+ciE/YF15Ctqt9/lf0lNU25YdNveiGqQPMdbzF9uod9IM
+1q2Qu/lEyBkEAuzdq98KK0iKJucniOE6sdkP0dT98LEi28I96PePU6mAn4H+UmjZUVxRfITt0kQN
+m8Xw+AEJVhSwOoNg/QrgzmAEHlq7YeHz4aUcXgC0WAvgslOiEsh/CiKpGhYV9HuWikKKNv1wlB60
+Yi9Q1eSP8zJGH+bk/SENYBlDvHipyIPZ6KJmrDNOyNPcGuamg2D1ziS/fgUPqdArbkbzS1Xewf4c
+vy7053gditc0HlL9GHLxstgAdkXTnhMBVilrk2BFActsCsJmOjaFtszoSHubPaFbhoJ1vIjs0MkF
+oCowDsF5DLJp9SpeknEBZG/XzKJF60OK6Z2cQnvpyVf0GxhY7OAC6rw5tOei7BoEqxej8F9TKObY
+wU4uB/SK2aA/z3z3UUjPus7BjyhGJ9h7Dsw4wfbmIEPYl8TvUeFOvx+tXf5pCVa/YPVu4D05ZXWU
+EhchYCFXo45QTw93duAl7g+UiqSm/ruOH2E4wt4xwcJ9yNWofURUiwBrd7rTDeiBPbdnO0B9Lg5Y
+vpmgPm3UKcSzKcLTGWUaLyqrvV8Dx0uUJTmETClU8IhgW3SKvf4R3OzkmFw8pXYJVnxXSL+z97E8
+ozRRTLAO6ZCNsfoHOmz2U52xl9BiKUGLRUXW7EdEzvG3WraZDTpWJKp+ptVihYPFj2Hv+dH6CSZY
+vinC5VFfcboyY88V8N/oFY2C7xR1vaoB/Jp/0LFVDu2BV7QJ9PiAf/A4fhzaKsgm4wVKhWQpTh5L
+6PBiyDsRGyzkitfvNAKr5RhYhcRYIq+jG2DdBXeYNOZBDjdeIsb6IMk674mDdWDVFNE92AW2mGCh
+FdVCe7JdOXazV9ibRVVuu8ag+R0d7Ge8CnMpjqezfR00AstQpWQpNliPlf1cOLB4VZOP6/QgGhao
+wb0Kctifgr1CwxUaddXGmHYJvee4I0aamnqFf7LZ09wrxL7encYXxrZYiyHXeKbPX8vz3qFIazLF
+6WOPUoVu9Q1ebnKHqPf+lRL6Owc7kGixqMb4rTCZdXhI9eBo9kqWYoM1nUprM+pOx7FYRfBR4Sd/
+oelHeMD/eQLo14E8lgjeeWb9bwG7NF2Un2QskMc6L915rPsBCmsbn5wOKsVIEU8F52Cd2asZ4X41
+FRDN3/aY3f4Sz7xrW58cBbPQQFEW9Ble1f5F0Fc3zOePnVhMi7drjh8ZaWQZvPcD1ssOyFm3rWG5
+g9d5jKjoJ8A6Qw9fEBX9eqfg1g0XYBx/hjLv6xpE5v1n1CP8iqhua1RTZmeeMffyknOZsW2VI/2Z
+994N2fxeofZ8RB4raLG+ZDoyDKr2Z9P9Px5v9S7HqEmddYr6HsXcNo0+KlbaZ5GB66EbWJD9tPlh
+Eqz+YqxdOvUKHaJXGL0G6cxgDdJj/F5hDm+39XRfgyruoyO8mSfvP3oq8CgU8iDB24YH6bah8jRL
+PVhtnkax0OjhSavelmZPc1vYKlONnhb+1xC96PV6dpsjFl5vbqL9Xvd46Kazr8lDHY4DYiXpIG4b
+NOroECVJkRe5R/QKvUabHPB6ml45FbbKkPHSbApPB/+pPa8YHu5gcxMfvOD1eGj31zweDNNagi1t
+iM9GbfE0p2V0g5QUk2BJSbCkJFhSUoPW/wfr5tj8wgE+HwAAAABJRU5ErkJggg==
diff --git a/Documentation/DocBook/media/selection.png.b64 b/Documentation/DocBook/media/selection.png.b64
new file mode 100644
index 000000000..416186558
--- /dev/null
+++ b/Documentation/DocBook/media/selection.png.b64
@@ -0,0 +1,206 @@
+iVBORw0KGgoAAAANSUhEUgAABIsAAAHpCAYAAAACi7yYAAAAAXNSR0IArs4c6QAAAAZiS0dEAP8A
+/wD/oL2nkwAAAAlwSFlzAAAOxAAADsQBlSsOGwAAAAd0SU1FB9sLCBAiCLMGMtAAACAASURBVHja
+7d3rkds4FgZQaMohTBY7ObRCV+fgyWJy4P6wJavVIgmSAIjHOVWu3bElPkBSAj5dgpdpmqYAAAAA
+ACGEvzQBAAAAAHfCIgAAAAAehEUAAAAAPAiLAAAAAHgQFgEAAADwICwCAAAA4EFYBAAAAMDDD00A
+21wul9XXTNN0aHnP749Z39o2rK0jRzssLX/pvVve9+61S69Jdey2bn/sMTx6TAAA/cIW+oVb+2tb
+3p+izwioLIJsHYe9X+a979vae89ut6Pb1+txBwD0C3vZN0ERrFNZBAct/ZJxuVx2Vdg8v+/oLyEx
+69j7xbq2/1u2e0u75Th2Mevf8ytVzDkDAOgXjtYv3LquVP0nQRHEUVkEBTsJve/r0hfu2hdz7e0W
+27HQ4QAA9Avr7BcJiiCesAhO+GKK/YIt8SV+RscoNmippUPl1jIAQL/w3PUc7Y8JimAbYRGc9KVY
+Yu6b3OsYNUTRuQAA9AvL9AtT9LsERbCdOYsAX74ZOiVbO1M6LQCAfmH7/TzohcoiqOhLK+eXV4p1
+xP4y1krF0X1bn7dXBwIA0C+ss19oagAoR1gEJ4j9osv5iPq965imKUk59eidwNc/AIB+oX7h/HpK
+tzeMzm1oQJIv7Ra/eO/7sOWxtgAAtN0v1N+DdcIiyPQFlPP1JbZpTyehl19q1joQOhgAgH7hOf3C
+Pct9tz36c7DMbWhQwPMXUYkOQ6517P3Sj/216axJEdfWoyMBAOgXpukX5uqv7Xm/W9JgnsoiSGxr
+4FHiiyvlOu7v21pu/PqLzuuvOTHtlmIZW/bz+f1r6177ewBAv1C/8FwqjCCesAgSdwK2dAh63e+5
+fX8XuBxtt1SdkZhy6djt37vNOioAoF84Sr8wV39tzzIERvCd29Agg7knQ8T+unTk15mc64j5El17
+KsbRW75inrqR6glj79rELWsAgH5hmn7hmcckpt8HI7tMRjYAAAAA/KayCAAAAIAHYREAAAAAD8Ii
+AAAAAB6ERQAAAAA8CIsAAAAAeBAWAQAAAPAgLAIAAADgQVgEAAAAwIOwCAAAAIAHYREAAAAADz80
+AQAAqVwuF40AABWbpmn1NbvDIh0BAKDGzg3n0T8EgD7sCot0BAAAmDNNUwj6iwBQlS3fzIduQ7vd
+blobAMjuer1qhKZ6o4IiAGiZOYsAAMji0w+LAHC6jx0/unkaGgAAAAAPwiIAAAAAHoRFAAAAADwI
+iwAAAAB4EBYBAAAA8OBpaAAAFDf3ZJa5J6htef3za5eeyDb3urWnxsQuM/V7jmxX7Dr3HIMUbfj6
++qXjurZ977Zja1vuaVOAnqgsAgCgqKWB+rt/2/r6s7Z/z3aesf0x+1fjdgFQjsoiALpyfRng3J5+
+Fb7/2+3NL8Xv/m1pWa/veX7t/XXXN4OtuWXs+fe59c/t45H2erd/Mdu/9XX0b63q5zWkWHr9/d8+
+rtfFapOY9byz9L7X5e7ZzqVKmT2VP3ts2cc966+1MmfuGKkkAvhFZREA3XgON94FNnMhzlJQNLes
+1/ffX/f62ue/fw1d3r3m9d/nlhu7/rX22rv8LW20d/voT8ztYbEBzNJrS4YMubbzzNCidLs+BzX3
+datsAjiXsAiALrwLfPYGE1uXtaVK5l2YNLes2OXurdI5svwtbaSKiFdbg5Cl18f821y1UupAZu92
+1njblwobgLG5DQ0AZqSofjkSnOSuvsmxf2fsB5SUMtT5vN2+LC82xNoziXaJNthyO11MBdHS7YUA
+5CUsAmAo91u97rdGLc1jdKQi5t08QiH8uSVrTcwcSkekWv7avuTeD1hzD2TuwcOWqqIS8wa9C01G
+nD/neV9fQzQAyhMWAUAma5NVA23KEeLMhUZHJ5g+e/9jXyscAqiLOYsA6MK7+XLW5gWK/fdnsYHP
+2uvWJtveu969ti5/bxsJzNgTDOx5JP2z1yAmNsC4T7j8+ifXdj6vs7VjlGsdQiSAc6gsAqAbz7eY
+Pf9dqmVtWd7cbWivE0LPbe/rv80tL1Vb7Vl+TBvl3g/a8nx70dIj7e9/v/b6mKer1bBfc9tZ65w8
+pdt1bh1zQdFaGwNw3GWapmnzmy6XQx1wAIAt7gHTjm4LJTuWv/uI084QYC482Pv6LfMSvXtc/Nag
+pNR+xb7+yLYeXX9MG669ZunYpN7mEeeJAsZx/4y7/P7vmP6U29AAAChq6yPm9z6S3n7t34/c648J
+Z97N49TKuQDQOpVFAED1VBY10rGMrCwCAMpRWQQAAADAIcIiAAAAAB48DQ0AADqSciJsAMYkLAIA
+gI4IgwA4SlgEAADAZh9/X9/+/ed/t8Ovf37t3PKWXje3rq3LTP2eI9sVs961969t59r2LbX16zJi
+t+Xzv1vyduE4YVHpD9SZsuDnX4COlA7HLD/Ferase2lZW7Zh6/a+vn6pDda27912rK0vVbsCAEB1
+45qFwf3H39dNIcm715fY/rWQKsV7Wj5me93Dn6VlxgZKnEdYVPLiXAgTPq7X6BBh7rWpln/kPWv7
+LigBAIDGxzUrVT+vocTS6+//thYs7A1plt73utw927kUeixt3xnhWEybzO13qe0VHtVDWFTq4nwK
+cmKDni2B0NLy7/82F/4srWdPYLRneVvWUWvgNNfuAjIAALoZ10TcHhYbwNz/LiYwStpvf3PbU47t
+zL0v727/WqvqijlmEEIIf2mCAh+oK0HR0UBhbflbbuVKsT1ry4vdhhRt/nm7PdZdYr0AADCCreHC
+0utj/m0u3EkdcuzdzntQ09MxS7Gud23iFrQ2qCwqeXFmrjBZWv7n7XZ6WFLDNgAAAGNLGeq8Vilt
+ndz53fKO7sMZc0DlPjaCpfKERTVfKBsmqy617hr2de21qeduAgAAzvM6YfKWypQS8wa9q6IpVT3z
+vPyYp4pBLGERu55i1sSXytO2q2oCAAAe44MMIc5caDQ3B1KSsVzF4dC7p6KthWgqiOohLKr5A2zj
+RNW511/LurY8NQ4AAEhv661OMY9RXxwDPAUP9/+OGjtsDB+ObufzOnMFOTHLnZvoWhhDLBNcl/xA
+PRherIUka7dfLS333Z/a9j/VOoRIAACwc0wy86SzL/3tmadvLU12/Pra2vZryz6V3OZ3f44eMwhB
+ZVGZi/jpFqi5qqAj1UJry495Gltupbdhbh1zQdFauwEAAL/72i+PkU/x+hoeRb93O/fMi1R6Iuet
+xyz1emNDQRNc10NYVOoieQl0jnoNN2KWXyoo2jMH0lnbfKTdzm5nAAA4bXyzMJnyXHVLC0FA7fsV
+cxveu7mCWjoG1EFYVPKDZ2GS5diAYW0ZtQYYJZ/gtrSuexs9h201txsAAFQ7vtkYMGx5/dHXHgk/
+atmvI+9PNYF0ioqvGqrG2O4yTdO0+U2XSwghhJuBNABQwPV3qL+j20LJjuXvPuL9KPnRBWCbtVvE
+hCrsOq9+96Muv/87pj+lsggAAKDFAeBLsCBIaJ9jSC2ERQAAAB0QHgGpCIuI++JZmZRbmTkAAFTW
+h98QHn1cPzQYFPR5+6x6+4RFRJ7IN40AAAA19dGfwp+Yx6HHPr4cQFgEAADQuNfwZy08inkEOzAu
+YREAAECjYiqKdvl50bg04Ujg+Xr7Ze5bw1q63VNYlPzgXzUCAP13zNyeDJB/bJErCAKKB0WtERYB
+AACcNWA9IRBy6xnDX3eColXCoowUbgLQk0kTAMQPRguFQItPOHuzDXuCoss/jieV9Ul+Hrg2TwqK
+WnvioLAIAABgy6CvgiBoz/apKGL4a1dQFE1YBAAA8DywK3hrWOoAJ1U1EXR3XQuKNhEWAQAAYwwW
+Gw6B9u6foAgERXsIiwAAgLYHgoUnia4tgBESwcL1UUlQ9Hn7bCo8EhYBAAB1DvJOenR860GLoAh+
+f4ZUFBS1RlgEAACUH8R5ZLx9hJyfMYKiQ4RFAABAuoGSEMj+w9mfQ4Kiw4RFAADA+iBICAS08Fkl
+KEpCWAQAACMPrMwLBPTyeSYoSkZYBAAAPQ6ahEDASJ95gqKkhEUAANDaoMgtYQB/PhMFRckJiwAA
+oJYBjxAIYNvnpqAoC2ERAADkHlQIgQDyf+4JipIRFgEAwN4Bg3mBAKogKEpLWAQAAK+DASEQQDME
+RekJiwAAGIpbwgD6ISjKQ1gEAEAXhEAAZPl+GSwoCkFYBABA7Z10IRAAZ30HDRgUhSAsAgDgrA64
+eYEAqPl7atCgKARhEQAAR/17CSGEMP186WSHa9HNEAIB70zTNMy+Xi4XBzyRkYOiEIRFAAAs+ff8
+gYcQCICSRg+KQhAWAQCMSQgE0J25KioVR/EERb8IiwAAenJGCPS/6ctgZHp0sG+OB0AFXkMk4dF7
+gqI/hEUAAC04qxLof5O2B6B7gqKvhEUAAGcSAgFQ2HOlkSojQdE7wiIAgFxOvCUMAFgnKHpPWAQA
+sJUQCIBOjFxlJCiaJywCALgTAgHAEARFy4RFAED/zAsEAKvuVUa9VxgJitYJi6DmD+uf7//+8s/6
+a969ds/yU6xn636uLWttu9e2dakdX5cRuy2Xf/K2ETBDCAQAbHBWUPS63toJi6BSS8HD9DM+eJh7
+barlH3nPme2y5h7+LC0zNlACdnaq/r5+v/Zzh0NCIADotsJIUBRPWAQ1fjg/BSKxQc+WQGhp+fd/
+mwtJltaTOzCKbZe5fSoV6giPYKXD9BQCFSMEAoCx+x+Cok2ERVCZtUBk6e9TLP/5dqrY8CfmFqy1
+7Xm+/evdenO3C5CgMyQEAoC+xibT1EV1kaBoO2ERVCp38LG0/CPhT+vt8q4dlsIrARVDdBTffB58
+hGv29X7+d3v8/+v1+ui0AgDEqiUo+rx9NhUeCYug48FcCOfPI7T3faXmQOrtWECJa/eo5xAIAKi8
+v9Dw/EU1BUWtERYByQaXe8OQ5/fVXNUEvVyruQiBAIBaCIqOERZBJ7ZOVJ17/bUParfs1+utaGu3
+oKkgIqczrpfHuf+l43NzMABgpD5IQ/MXCYqOExZBxQPCI6HDWoVOzCPhlwaNJQa8c3MFCWPo9Zov
+zbUEAPRGUJSGsAgqE/M0siOBydryY546VmKw+jpwzt0ukMtZlXOuBQAgeb+m8uoiQVE6wiKo0Gsw
+kmKwOjcvUEuTMadul63rjQ3STHA9SGdJCAQAUA1BUVrCIqjU0m1ksYPFtWWcFWrEPHZ+7rH1Z243
+43BLGADATD+pwuoiQVF6wiKoWMzgce01a4HMGQPZLWFXim3J3Y4G+w11boRAAABdERTlISwCoHlC
+IACAgn2v6dczUmurMBIUpSMsAqDejoh5gQAAiCAoSktYBBQf4BuIIwQCACAVQVF6wiLAgJyk3BIG
+AEApgqI8hEUARBECAQDwpX9Y4ZPRchgtKApBWATgS14IBAAAb40YFIUgLALolnmBAADI3ufsuLpo
+1KAoBGERQHtfyEIgAADIauSgKARhEUBV3BIGAEBzfdjOqotGD4pCEBYBlPkCFQIBAED1BEW/CIsA
+DhACAQCMpbYKmmmaqtmO1quLBEV/CIsA3n3ZmRcIAACGISj6SlgEDEUIBABAT16reWqpNGqJoOg7
+YRHQDbeEAQAAWwiK3hMWAdUTAgEAQGQ/9qnSqHSVUWvzFgmK5gmLgNMIgQAAgDMIipYJi4DkzAsE
+AADnu1f5mMfoK0HROmEREE0IBAAAtOysoOh1vbUTFgEhBLeEAQBAr0pWGNU8b5GgKJ6wCDonBAIA
+AEYnKNpGWASNEgIBAACb+vODzmEkKNpOWASVMS8QAABAGrUERZ+3z6bCI2ERFCIEAgAAanC5XLJW
+F9Uyb1FNQVFrhEWQ+oOxUCgkBAIAAHaPJzIHRmcTFB0jLILaPrSFQAAAALsJio4TFkEhQiAAAKCq
+MUqH1UWCojSERZD6A1coBAAAUJygKJ2/nE4AAABASqUrlgRFaaksghQfhD+1Af1QHQcAQEsERemp
+LAIAAIBB1fCI+yMERXkIiwAAAIDmCYrScRsaJOYWHlrkVkoAgIHHMB08FU1QlJbKIgAAAKBZgqL0
+hEUAAABAkwRFeQiLAAAAAGaMFhSFICwCAAAAeGvEoCgEYREAAADAN6MGRSEIiwAAAGB4l8sl+TJb
+fsLayEFRCCH8cEkAQJkOTo5OGAAAaY0eFIUgLAJgcCV/8VpalyAJAOB8gqJfhEUADKPmUuh32yZA
+AgAoR1D0h7CIrgduBlp9DqqdM4xyHj9vv3MTACAfQdFXwiKAmcH5K4P19o9hT/vlfAQASENQ9J2w
+iO4HjQZUGKyPeXxG2V/nIQCQyuVyGa5PJSh6T1iEgR0kOIcN2H2OOA8BANoiKJonLAIwYG+6vfne
+Ls5BAIBlgqJlf2kCeh/oGVRyxvntvNO22gkAoE6ConUqiwAyDthDUOWRsi1xDgIAHHFWUPS63tqp
+LAIoMGAXdhxrP5yDAABHCYriCYsYYuBnkIQBu/ZCmwIA4xIUbSMsAjhhwI42Ort9tTEAMApB0XbC
+IoYZABoY4Vpoo120jfMQACCVWoKi1ibRFhYBGKhrD+0OANAdQdF+wiKAkwfqBusCCwAA0hIUHSMs
+YqjBoAEp1Pe54LoEACAlQdFxP5xGAOebpilcLpfh9rkVKY6NUAwAID9BURrCIoBKjBQY1Rqc5Gz/
+uWULkQAA0hAUpSMsYriB4YgVHLR1rfR+ftb0eVBDW79ug/AIAGA7QVFawiJgqIH5O7UNznsOjGpo
+69rb9nn7BEcAAOsERekJixhuIN77YJxjg3OD9D4/C1q93gVHAADLBEV5CIsAKhyk9xZonhV09NSG
+giMAgGWConSERQCRg3QD9PaOmXMSAGAMgqJkHc0Qpin85ZQip5oHMgZZ7BmglwwhejlHS+/HSLeY
+lj4nAQBqJChK2nkPIQRhEW0NisAAvbXvmslxse8AgDFcNoKiPIRFGMhCxV9+LZ+jpYMitAMAQA6j
+BUUhCItoZKB4HwAZCGFwPt71v9b+joE2AQDa6sO1ZMSgKARhEUCSwTnaXfsAAPRl1KAoBGERmbSU
+SEvPcY62t72CkPh20lYAANuNHBSFICyikcGOQSKtnaejEhQ5PwEAWjd6UBSCsAjAgFwbD9N22g8A
+YJmg6BdhEcnlmNi6pW0G134egg7tCACQk6DoD2ERBjuAa157AgAMTVD0lbCIpFqu0FFdRM2D8NrP
+z5zbJ9jQrgBAe/25lvoagqLvhEU0O5Ax0IE+OxbU8zkLANA7QdF7wiIAqiXM0MYAALkIiuYJi0im
+xYmtc+4DBt+ue+0IAECdBEXLhEUYlAMAAAxstB/NBUXrhEUAVNepEAQDAJDDWUHR63prJyyiukHj
+1kFi6kGlW9HgXIIiAAD9uRwERfGERQAAAEDXBEXbCIs4rMdKHNVFcM41oqoIAMDYJzVB0XbCIqqy
+d6BogAkAAMCrWoKi1ibRFhYBsImqIgAA/boW+nSCov2ERVTz4VLbQNGtaAAAAG0SFB0jLKIbqhLA
+9QsAQJyefxwXFB0nLIJBP0BpSy1himsCAICaCYrSEBZRxaAx1UBYdQK9XRsAANBKf/Xs8ZigKB1h
+EQCnEvICAHCUoCgtYRG79Dyxdc59Bdc9AABn9ud67NMJitITFtEdVQoAAABjEBTl8cOpBZBOjl9q
+eg5AhbsAAG32UWvs1wmK0lFZxKkfNLk+UFIv1+03AAAA9RIUpaWyCCCRnkNFgSkAgD7cnLOrigRF
+6akswoDRvlMxt2kBAMA8QVEeKovodhB8uVwEPBTjXKvvMwAAQL9Uny6F0YKiEFQWAVT7hSxMAQCA
+c40YFIWgsoiTBsSlBsGpq4umaTKAJ9t1AQAALfVHex8bjRoUhaCyCKDKL+aavngFYgAAjGbkoCgE
+lUUAmwlPjlOhBwDoC+rP1Wr0oCgElUWc8IFY+kMl9fp8OYx9HZQ4/oIUAAA4h6DoF5VFACtKBoSC
+IgAAatdrn1VQ9IewiKID5V4+VEx07bz3pQsAAP0QFH0lLGIIqZ+KRl9qODcERQAAtDK26o2g6Dth
+EVCMwG6cL1wAAGiBoOg9E1xTbHB/9oDYRNfUSFAEAEAr/dbe+q6ConnCIoATv3BrJxQFAKBHgqJl
+bkMDKGz0aiLVVAAA+m5nEhStU1nErB6fguZWNM4+/wQlAABwnrOCotf11k5lEUBmAiIAAPRjzyco
+iqeyiLd6rCrKtT2qi5g7z1QSAQBAHQRF26gsAjhIIAQAgL5tvQRF26ksAjhomqYvfwAAgDrUEhS1
+Nom2yiLeDnxTqTWVvlwuBvUUuYZUHQEAUKve+6qCov2ERQAZCY4AAGihr9pbf1VQdIzb0Fj8sDjC
+wBi+X18q2gAAIC9B0XHCIoYlzOIsQiMAAGrup7bcVxUUpSEsAjjxyxgAAEhDUJSOsIgsA9dWqnZU
+F1HDdSc0AgBAP/UYQVFawiKASr6MAQCA7QRF6QmLACohMAIAoMY+as39VEFRHj+c+qQepLZ2a9fl
+ckm6/9M0ub2t4XPj7C9C5w8AAOwjKEpHWATw5F1QUzpAEhgBAFCbe5+41n6qoCgtt6ExdFVRru12
+O1FfLpfL40+L1yUAAPRMUJSesAhgg5LBkcAIAIDa1NZHFRTlISwC2KlEaCQwAgCAc40WFIUgLBqe
+W9Dybb9B/jgERgAAjDaOHKWPOmJQFIKwCCCJ0nMaAQAAeY0aFIUgLCLhQBnIdy2oLgIAoDY991FH
+DopCEBa5sMk60NfGzqPWz6cc++K6AACgZqMHRSEIiwCyUG0HAMAIevshUFD0i7DIBW1QnHl/VFHg
+fAIAgPoJiv744XQAyONyuQh3AIDmTdOkavqlj1fzsXKO7CMo+kplEUBjnQkBFAAApCMo+k5YNCC3
+oJXfL4N7AACgxDjm+U+r48ySBEXvCYsACnxp+zIGAIC6CIrmCYsGo6rovP0zuAfXAwD47qb0mKZk
+lVFL54mgaJkJrvGFAax2MlzvAAD0QlC0TmURQAGeIAIAwNn90RJVRrX/yHhWUPS63toJiwaiMsAx
+wPkEAACjEhTFExYBcAphFwDAOXJXGNXYzxMUbSMsAgAAALolKNpOWDQIv+A7Fpyv5XmLzLkEAOjH
+6p+2eL7UEhS1Nom2sAgAAADojqBoP2HRAPwC4JjgXLL9AAC8U+IJaWcQFB0jLAIAAAC6ISg6TlgE
+QBTzFgEA6OttcUYVuaAoDWFR59zi4diAawEAgBEIitIRFgEAABDFjzx9a7m6SFCUlrAIgFM7EAAA
+cISgKD1hUcek/o4RuBYAANiitR8HBUV5CIsAAACA5gmK0hEWdcqv9I4V5JLr1ybXAgDov+Kc2UtQ
+lJawCAAAAGiWoCi9H04rYpjU9iu/puAz4ZLlOpimyecNAECnfb0cBEV5qCzqkCDDMcNxBgAA0hgt
+KApBWEQEv/IDJQnVAACMA2sxYlAUgrDIIItqPjgdO1wHrgcAMO6AeowaFIUgLAJoml98AAAgvZGD
+ohCERRiIahuK6PXXN9VFAAD01rcbPSgKQVjk4sMxBNeENgYAIIQgKLoTFjFL5Qzgs6JvgiIAfI/A
+H4KiP4RFYJCMjpT2064AAEMTFH0lLNLpx7GkUTWFlbm3xXWhPQEAchEUfScsovpBKBiU+9wYrS21
+IwBAGYKi94RFOv5UOEB2TF2baNMcbaf9AICzxzo1ERTNExYB+OJuarsEHtoMAOAoQdEyYRHNDELB
+4NxniPbVVgD4nsH5cpSgaJ2wyMWGY4tjp507bR9tBADw1VlB0et6aycsAkg8QM+theq/UtsoENEm
+AACxBEXxhEU0NwgFA3SfJ+/aH+0AADBHULSNsMigAMeYho5Ta4Fu6cBo1GtGWAkAME9QtJ2wiGYH
+oWCA7rNl7rg4BwEACKGeoKi1SbSFRQ0PEHCsOW9wfsZxEehuP072DwD0Vxm3Dyoo2u+HUx+g/g5Q
+60HR5XI5pR3v6+whaNMRBwCIJyg6RlhENwMpMCCv/3PmrPZ9Xm9rn3fOSQCAbQRFxwmLDGZpYEA8
+TZPKiMHPKddHnvOwxrZ1nQAA7CcoSkNYBFCxHqv+agiM7l6344z2Fg4B0INeftyk7XNFUJSOsAgf
+6uDaPGXfagxJ5rYpxbEQCgEA5CMoSktY1BiDjXEHwn6tGe8ccp347AUAYJ2gKL2/nFYGpIDr8sx9
+9TkEAMBegqI8hEUN8cu2Ab9zwHljv9H2AADvCYrSERYBGLTbf20OANA0QVFa5iwySABci1W1hQo6
+5xwAwBaCovRUFjXC4MmAzLngHBmpTbSLcw4AIIagKA+VRQAG7FW3kYDUOQcAcKbRgqIQVBY1IcdA
+yaDBOcF5A3bXn88r5xwAQBtGDIpCUFkERQZqwh0M1tO0n2vJOQcAUMqoQVEIwiIAA/YG21No5JwD
+AMhp5KAoBLehVc8taAZvJc8N0h1vt/6UaWO0CQB9j13gDKMHRSGoLAJINlDn3HYfsYPqvAMASEtQ
+9IuwyMACcB11dVxGCI2cgwAA6QmK/hAWVUwZZ3+Du9THdJomg0aDcRaOXS+fo85HAIC8BEVfCYsM
+DnBMnX8Mc821FB65BgFokR8zaZGg6DthEaT+gvypDaBW7zqvNQRIOtUAAOcQFL0nLAJgaEtBTcog
+SSAEAFAXQdE8YREAzBDwAAD0SVC0TFgEKQaU//z637lb0O7/DgAAwLkEReuERVBAzDxGAiUAAIC8
+zgqKXtdbO2ERVGItUBImAQDQRL/WE9G6O569EBTFExZBQnOBToonpKlOAgAA2EdQtI2wCAqICXEE
+SgAAAOkJirYTFkEl1kKcFGFS7HIESgAAHOpzuhWNStQSFH3ePpsKj4RF0IhS1UkxyxEmAQAAtasp
+KGqNsAg64nY3AACg6jFLoYozQdExwiIY7cPZ7W4AAEDHBEXHCYuAL2q63S12ewAAgPSmaWpumwVF
+aQiLgM3MnwQAANRGUJSOsAjIwvxJAABj80Q0ShIUpSUsAk5j/iQA8i3Z/QAADThJREFUAOAoQVF6
+wiKgWm53AwAAlgiK8hAWAU1zuxsAABCCoCglYRHQPYESAAD0TVCUlrAIIJg/CQAAWiUoSk9YBBDB
+/EkAADv6NZ6IxnM/NsO5ICjKQ1gEkOrLz+1uAADQndGCohCERQBFCZQAAGjBNE0aIYwZFIUgLAKo
+jvmTAADgfKMGRSEIiwCaY/4kAKAl5i1q85iNbuSgKARhEUCX3O4GAAD7jB4UhSAsAhiW290AACjW
+92ykukxQ9IuwCID3X+gV3e4Wuz0AALCXoOgPYREAu5k/CQCgL6POVyQo+kpYBEBW5k8CAKBmgqLv
+hEUAnM78SQDQN09Ea+c4jUZQ9J6wCIDqmT8JAIDUBEXzhEUAdMH8SQAAB/o3g1UVCYqWCYsAGIb5
+kwAAEBStExYBwBPzJwEAI1FR9HnKemsnLAKADdzuBgDQJkFRPGERACTmdjcAePO95YloVR6TIn2j
+Co67oGgbYREAnECgBABQhqBoO2ERAFTK/EkAQA4jzVNUS1D0eftsKjwSFgFAo86cP+kjXL92gP67
+OSAAQFVqCopaIywCgI6VCpQ+/r6uvkagBIB5i85t+1P6Iicdb0HRMcIiABhcqdvdBEoAQAmCouOE
+RQDAonuYNH3p/Ny+do4igqCoTtbMch6B1b+XEP43OSgAEOHsuYnOqCoSFKUhLAIADoupCEoVKIV/
+VzqewiQAGJKgKB1hEQBQRLFA6d+IXzEFSgB0aKSnnH3rQwiKkhIWAQDVmAuUrtfrr05wovmTBEoA
+0A9BUXrCIgCgHTEBzr+J5kcQKAGEEH7NO5OyYqX1J6KNXL2z9bwpQVCUh7AIAOhLTYGSMAkAihEU
+pSMsAgDGUypQUp0EwIDOqBwTFKUlLAIAeGctxHG7GwBUQVCUnrAIAGAPt7sBwDelq4oERXkIiwAA
+cnG7G9BRAGCSa2LOkx6NFhSFICwCADiXQAkAqjViUBSCsAgAoH7mTwKgcj1WFY0aFIUgLAIAaF8l
+8ydNP0O4/ONwANC+kYOiEIRFAABjKBQoTT+fOtrhGvWez/9ujg80wLxFLJ0bPRk9KApBWAQAwF2p
+291eO+V/X1dfI1ACoARB0S/CIgAA4qyESZfL5UtlUdLOu0AJoEo9VRUJiv4QFgEAkG7Q8E8I06OT
+fYvrnEcEQSmWI0wCYPY7RFD0hbAIAIBTxYQ4KQIl1UkA6ago6puwCACA6q2FOKWqk2K2BYB2CIre
+ExYBANC8UtVJscsRKNErT0Tjfh70QFA0T1gEAMAQagqUhEkA5xIULRMWAQDAfbBg/iSAWSqKxiEs
+AgCADcyfBNCus4Ki1/XWTlgEAAAJud0NtjFvUf1UFKVdbwuERQAAUJjb3QDKEhRtIywCAIAKCZSo
+VeonolH3se6BoGg7YREAADTK/EkAK59flQRFn7fPpsIjYREAAHTK/EnAXj1UFdUUFLVGWAQAAANz
+uxvQI0HRMcIiAABgkUCJV6nnLfJEtHqOaw8ERccJiwAAgMPMnwTUQFCUhrAIAADIzvxJUKeeKroE
+RekIiwAAgCq43S3xAPZpPwVk9E5QlJawCAAAaEYNt7u1GLx8/H0VGNHtvFCCovSERQAAQDdKVCe1
+WpkkMKJHgqI8hEUAAMBQSlQn1TBv0ud/t2/bkTIw8kS0Oo3choKidIRFAAAAzwO/CsKkmO2I3Zec
+gRFUc90KipISFgEAAGwZlJ44b9KekCdnYNRCFYtqpQGuSUFRcsIiAACAlAPXjPMm7b29TYUR3V5v
+gqIshEUAAAClB7iZAqWt74kJjKafjhdjGy0oCkFYBAAAUKV3IU6qW9y+L3PS4PDu+hgwKApBWAQA
+ANCMUvMlAeMGRSEIiwAAALqR6va2PXMZnTWwtl7r7Wm9tRAWAQAADCBn1ZEgwXqtty/Coozc9QsA
+AJwt5glqHwb01mu9p663NsIiAACAzsQERAb01mu9day3RsIiAACATpQKiUYc0Fuv9Y5EWJTY5+2m
+EQAAgHrGKAkDolEH9NZrvaMRFgEAAHQoR0g04oDeeq13RMIiAACATuQKiEYd0Fuv9Y7qL00AAACA
+Ab31Wi93wiIAAAAM6K3XenkQFgEAAGBAb73WW3C9tRMWAQAAYEBvvdZbaL0tEBYBAABgQG+91ltg
+va0QFgEAAGBAb73Wm3m9LREWAQAAMEuQYL3W2856UxEWAQAA8JYBvfVabzvrTekyTdO0+U2XSwgh
+hNvt5tMTAMjuer2GEELY0W2hZMfydx9xenSO9RWhFS3fLgMtKhkgffzuR11+/3dMf0plEQAAAAAP
+wiIAAAAAHn5oAgAAgLG1OKcKkI/KIgAAAAAehEUAAAAAPAiLAAAAAHgQFgEAAADwYIJrAAAAivq4
+frz9+7mJtre8/vm1SxN3z71ubl1bl5n6PUe2K3adW4/DWvsfPb5737PlmJrc/T2VRQAAABSzNHB/
+929bX3/W9u/ZzjO2/+gxOrrcrcve856alt8qlUUAAAAUsVb18zpoX3r9/d8+rh+L1Sdbq19itu91
+uXu28/73qapz9tiyjyWWneo9Z+xvb1QWAQAAkF3M7WGxAczSa3Pac9vbnu08M7RYu+3r8/b5eM3W
+dj/aFjmO8xnnUQuERQAAABSzNQhZen3Mv81VK6UOZPZu52i3Qe1p99zhmYqi79yGBgAAABFShjqf
+t88vy4sNsfZMon10H9fmYzozbMndHqMSFgEAANCleyBzDzS2VBWVmDfoXfVTrsqnFPv4/HevYRd9
+ERYBAABApBwhzlxodHRC59T7WGM4pIIoD2ERAAAAxWy9bWntaWdrnquL7v8dY2sIcXQ7n9d55oTd
+e7Z9yzHds2+520OF1HcmuAYAACC7mKdOzT1ZbG0enVqeHrZlO1sLKO5PQXv9s8WeY5b7ONdyHtVG
+ZREAAABFPM9zs6UqaOn1MQP8Ek/T2rOde+ZFamVC55T7lqo9SsxD1QuVRQAAABSz9RHzex9Jb7+O
+i7l1b8utc3uqkfa8p6blt+oyTdO0+U2XSwghhNvtpgUBgOyu12sIIYQd3RZKdix/9xGnRwdcXxEA
+zvbxux91+f3fMf0plUUAAAAAPJizCACA09yrxl7NVbBvef3za5cq4udeN7eurctM/Z4j2xW7ztT7
+eH/t2nGda//YZS7tz1q77DlmAL1SWQQAwCmWBvbv/m3r68/a/j3becb2x+5jDccixTLn9qXm9oc9
+Pq4fi38gRrHKopikvvQvG3vWs+fLxS8yfpEBAOb7DDH9taXX3//ter0u9pP29AvXtu91uXu2c6mP
+d6RftsWWdR89FiXsOWZ7zw+ojcmaSaFIZVGqXx5S/nqzd3v37r9fZAAA1sOGd3+/9votPz6msue2
+tz3bWWvgcsaxOLq81tof4EzZK4u2/mq05XVry1/7ZWPLLw4pvlBTbXcNHQS/yAAAOfoae19/u90W
+K5zvP3jN9V9S9lf2budaFXlpe6uacrRnquW11P4AZ8paWbT1V6PUy6/h1wO/yPjCBQD6kzNcWqrk
+fve61z9792duOTX05e7bkONHyL3tD9CzIreh5f6CWftlo9aORMntzn1Puy9XAKBmr2HDliqSEkHK
+7XYTWpx8fmh/gD9+1LhRZ06SfOQLodQEhEe+BN+VYKdc9mtbqCoCAHqVo5/zroJmy5QKqfclV9+x
+tr7snvYH6NmPkXe+9nCn1Q6T0AgAiO2LbekjrD3tLKav8lwtErvuPU/KPbKdc/2qVo5diW0+crtd
+D+0PkNtfNW7UvQz0tRz0zKdb7Nnu5+2v5YumxPbMlfECALz2tbY+DGTtCbO1PBxky3a21E86eiy2
+PiE4VT+9l/YHKKVIZdHR0s21JyDs/WWjhvmM/CIDAIzouX+3pSpo6fUxfbsSc2nu2c49fdaUUzds
+DWy27mOq45dif1K1P0DPslYWbf3VKPXya3uKQ6rt9osMANCDrQ/7qPmhJr3u17uK8b3bnGo/j94F
+0Op5BVDSZZqmafObLpdNH55rQcJrBcrWx83HLv/19ak+/Pc+Qn7rdqfc19flbA1+UuwLAGz9rt3R
+baFkx/J3H/F+lD59/wPA6T5+96Muv/87pj9VZM6iFGn93mXU8uQGv8gAAAAALShSWQQAcITKokY6
+liqLAKA6eyqLfmg2AADoj2kCANhLWAQAAB0SBgGwl7BohV9kAAAAgJEIi1YIgwAAAICRCIsAAMji
+Y6VCGwCo01+aAAAAAIA7lUUAACR10QQA0PZ3+TRN0+Y3XXQBAIDydnRbKNmx1EcEgC76UyqLAAAo
+1vkEAOq3KyzSEQAAAADokwmuAQAAAHgQFgEAAADwICwCAAAA4EFYBAAAAMCDsAgAAACAB2ERAAAA
+AA/CIgAAAAAehEUAAAAAPAiLAAAAAHgQFgEAAADwICwCAAAA4EFYBAAAAMCDsAgAAACAB2ERAAAA
+AA/CIgAAAAAehEUAAAAAPAiLAAAAAHgQFgEAAADwICwCAAAA4EFYBAAAAMCDsAgAAACAB2ERAAAA
+AA/CIgAAAAAe/g/10lQlA3JSSwAAAABJRU5ErkJggg==
diff --git a/Documentation/DocBook/media/v4l/.gitignore b/Documentation/DocBook/media/v4l/.gitignore
new file mode 100644
index 000000000..d7ec32eaf
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/.gitignore
@@ -0,0 +1 @@
+!*.xml
diff --git a/Documentation/DocBook/media/v4l/biblio.xml b/Documentation/DocBook/media/v4l/biblio.xml
new file mode 100644
index 000000000..fdee6b3f3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/biblio.xml
@@ -0,0 +1,353 @@
+ <bibliography>
+ <title>References</title>
+
+ <biblioentry id="cea608">
+ <abbrev>CEA&nbsp;608-E</abbrev>
+ <authorgroup>
+ <corpauthor>Consumer Electronics Association (<ulink
+url="http://www.ce.org">http://www.ce.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>CEA-608-E R-2014 "Line 21 Data Services"</title>
+ </biblioentry>
+
+ <biblioentry id="en300294">
+ <abbrev>EN&nbsp;300&nbsp;294</abbrev>
+ <authorgroup>
+ <corpauthor>European Telecommunication Standards Institute
+(<ulink url="http://www.etsi.org">http://www.etsi.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>EN 300 294 "625-line television Wide Screen Signalling
+(WSS)"</title>
+ </biblioentry>
+
+ <biblioentry id="ets300231">
+ <abbrev>ETS&nbsp;300&nbsp;231</abbrev>
+ <authorgroup>
+ <corpauthor>European Telecommunication Standards Institute
+(<ulink
+url="http://www.etsi.org">http://www.etsi.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ETS 300 231 "Specification of the domestic video
+Programme Delivery Control system (PDC)"</title>
+ </biblioentry>
+
+ <biblioentry id="ets300706">
+ <abbrev>ETS&nbsp;300&nbsp;706</abbrev>
+ <authorgroup>
+ <corpauthor>European Telecommunication Standards Institute
+(<ulink url="http://www.etsi.org">http://www.etsi.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ETS 300 706 "Enhanced Teletext specification"</title>
+ </biblioentry>
+
+ <biblioentry id="mpeg2part1">
+ <abbrev>ISO&nbsp;13818-1</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>), International
+Organisation for Standardisation (<ulink
+url="http://www.iso.ch">http://www.iso.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-T Rec. H.222.0 | ISO/IEC 13818-1 "Information
+technology &mdash; Generic coding of moving pictures and associated
+audio information: Systems"</title>
+ </biblioentry>
+
+ <biblioentry id="mpeg2part2">
+ <abbrev>ISO&nbsp;13818-2</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>), International
+Organisation for Standardisation (<ulink
+url="http://www.iso.ch">http://www.iso.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-T Rec. H.262 | ISO/IEC 13818-2 "Information
+technology &mdash; Generic coding of moving pictures and associated
+audio information: Video"</title>
+ </biblioentry>
+
+ <biblioentry id="itu470">
+ <abbrev>ITU&nbsp;BT.470</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-R Recommendation BT.470-6 "Conventional Television
+Systems"</title>
+ </biblioentry>
+
+ <biblioentry id="itu601">
+ <abbrev>ITU&nbsp;BT.601</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-R Recommendation BT.601-5 "Studio Encoding Parameters
+of Digital Television for Standard 4:3 and Wide-Screen 16:9 Aspect
+Ratios"</title>
+ </biblioentry>
+
+ <biblioentry id="itu653">
+ <abbrev>ITU&nbsp;BT.653</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-R Recommendation BT.653-3 "Teletext systems"</title>
+ </biblioentry>
+
+ <biblioentry id="itu709">
+ <abbrev>ITU&nbsp;BT.709</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-R Recommendation BT.709-5 "Parameter values for the
+HDTV standards for production and international programme
+exchange"</title>
+ </biblioentry>
+
+ <biblioentry id="itu1119">
+ <abbrev>ITU&nbsp;BT.1119</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-R Recommendation BT.1119 "625-line
+television Wide Screen Signalling (WSS)"</title>
+ </biblioentry>
+
+ <biblioentry id="jfif">
+ <abbrev>JFIF</abbrev>
+ <authorgroup>
+ <corpauthor>Independent JPEG Group (<ulink
+url="http://www.ijg.org">http://www.ijg.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>JPEG File Interchange Format</title>
+ <subtitle>Version 1.02</subtitle>
+ </biblioentry>
+
+ <biblioentry id="itu-t81">
+ <abbrev>ITU-T.81</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union
+(<ulink url="http://www.itu.int">http://www.itu.int</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-T Recommendation T.81
+"Information Technology &mdash; Digital Compression and Coding of Continous-Tone
+Still Images &mdash; Requirements and Guidelines"</title>
+ </biblioentry>
+
+ <biblioentry id="w3c-jpeg-jfif">
+ <abbrev>W3C JPEG JFIF</abbrev>
+ <authorgroup>
+ <corpauthor>The World Wide Web Consortium (<ulink
+url="http://www.w3.org/Graphics/JPEG">http://www.w3.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>JPEG JFIF</title>
+ </biblioentry>
+
+ <biblioentry id="smpte12m">
+ <abbrev>SMPTE&nbsp;12M</abbrev>
+ <authorgroup>
+ <corpauthor>Society of Motion Picture and Television Engineers
+(<ulink url="http://www.smpte.org">http://www.smpte.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>SMPTE 12M-1999 "Television, Audio and Film - Time and
+Control Code"</title>
+ </biblioentry>
+
+ <biblioentry id="smpte170m">
+ <abbrev>SMPTE&nbsp;170M</abbrev>
+ <authorgroup>
+ <corpauthor>Society of Motion Picture and Television Engineers
+(<ulink url="http://www.smpte.org">http://www.smpte.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>SMPTE 170M-1999 "Television - Composite Analog Video
+Signal - NTSC for Studio Applications"</title>
+ </biblioentry>
+
+ <biblioentry id="smpte240m">
+ <abbrev>SMPTE&nbsp;240M</abbrev>
+ <authorgroup>
+ <corpauthor>Society of Motion Picture and Television Engineers
+(<ulink url="http://www.smpte.org">http://www.smpte.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>SMPTE 240M-1999 "Television - Signal Parameters -
+1125-Line High-Definition Production"</title>
+ </biblioentry>
+
+ <biblioentry id="srgb">
+ <abbrev>sRGB</abbrev>
+ <authorgroup>
+ <corpauthor>International Electrotechnical Commission
+(<ulink url="http://www.iec.ch">http://www.iec.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>IEC 61966-2-1 ed1.0 "Multimedia systems and equipment - Colour measurement
+and management - Part 2-1: Colour management - Default RGB colour space - sRGB"</title>
+ </biblioentry>
+
+ <biblioentry id="sycc">
+ <abbrev>sYCC</abbrev>
+ <authorgroup>
+ <corpauthor>International Electrotechnical Commission
+(<ulink url="http://www.iec.ch">http://www.iec.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>IEC 61966-2-1-am1 ed1.0 "Amendment 1 - Multimedia systems and equipment - Colour measurement
+and management - Part 2-1: Colour management - Default RGB colour space - sRGB"</title>
+ </biblioentry>
+
+ <biblioentry id="xvycc">
+ <abbrev>xvYCC</abbrev>
+ <authorgroup>
+ <corpauthor>International Electrotechnical Commission
+(<ulink url="http://www.iec.ch">http://www.iec.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>IEC 61966-2-4 ed1.0 "Multimedia systems and equipment - Colour measurement
+and management - Part 2-4: Colour management - Extended-gamut YCC colour space for video
+applications - xvYCC"</title>
+ </biblioentry>
+
+ <biblioentry id="adobergb">
+ <abbrev>AdobeRGB</abbrev>
+ <authorgroup>
+ <corpauthor>Adobe Systems Incorporated (<ulink url="http://www.adobe.com">http://www.adobe.com</ulink>)</corpauthor>
+ </authorgroup>
+ <title>Adobe&copy; RGB (1998) Color Image Encoding Version 2005-05</title>
+ </biblioentry>
+
+ <biblioentry id="oprgb">
+ <abbrev>opRGB</abbrev>
+ <authorgroup>
+ <corpauthor>International Electrotechnical Commission
+(<ulink url="http://www.iec.ch">http://www.iec.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>IEC 61966-2-5 "Multimedia systems and equipment - Colour measurement
+and management - Part 2-5: Colour management - Optional RGB colour space - opRGB"</title>
+ </biblioentry>
+
+ <biblioentry id="itu2020">
+ <abbrev>ITU&nbsp;BT.2020</abbrev>
+ <authorgroup>
+ <corpauthor>International Telecommunication Union (<ulink
+url="http://www.itu.ch">http://www.itu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>ITU-R Recommendation BT.2020 (08/2012) "Parameter values for ultra-high
+definition television systems for production and international programme exchange"
+</title>
+ </biblioentry>
+
+ <biblioentry id="tech3213">
+ <abbrev>EBU&nbsp;Tech&nbsp;3213</abbrev>
+ <authorgroup>
+ <corpauthor>European Broadcast Union (<ulink
+url="http://www.ebu.ch">http://www.ebu.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>E.B.U. Standard for Chromaticity Tolerances for Studio Monitors"</title>
+ </biblioentry>
+
+ <biblioentry id="iec62106">
+ <abbrev>IEC&nbsp;62106</abbrev>
+ <authorgroup>
+ <corpauthor>International Electrotechnical Commission
+(<ulink url="http://www.iec.ch">http://www.iec.ch</ulink>)</corpauthor>
+ </authorgroup>
+ <title>Specification of the radio data system (RDS) for VHF/FM sound broadcasting
+in the frequency range from 87,5 to 108,0 MHz</title>
+ </biblioentry>
+
+ <biblioentry id="nrsc4">
+ <abbrev>NRSC-4-B</abbrev>
+ <authorgroup>
+ <corpauthor>National Radio Systems Committee
+(<ulink url="http://www.nrscstandards.org">http://www.nrscstandards.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>NRSC-4-B: United States RBDS Standard</title>
+ </biblioentry>
+
+ <biblioentry id="iso12232">
+ <abbrev>ISO&nbsp;12232:2006</abbrev>
+ <authorgroup>
+ <corpauthor>International Organization for Standardization
+(<ulink url="http://www.iso.org">http://www.iso.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>Photography &mdash; Digital still cameras &mdash; Determination
+ of exposure index, ISO speed ratings, standard output sensitivity, and
+ recommended exposure index</title>
+ </biblioentry>
+
+ <biblioentry id="cea861">
+ <abbrev>CEA-861-E</abbrev>
+ <authorgroup>
+ <corpauthor>Consumer Electronics Association
+(<ulink url="http://www.ce.org">http://www.ce.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>A DTV Profile for Uncompressed High Speed Digital Interfaces</title>
+ </biblioentry>
+
+ <biblioentry id="vesadmt">
+ <abbrev>VESA&nbsp;DMT</abbrev>
+ <authorgroup>
+ <corpauthor>Video Electronics Standards Association
+(<ulink url="http://www.vesa.org">http://www.vesa.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>VESA and Industry Standards and Guidelines for Computer Display Monitor Timing (DMT)</title>
+ </biblioentry>
+
+ <biblioentry id="vesaedid">
+ <abbrev>EDID</abbrev>
+ <authorgroup>
+ <corpauthor>Video Electronics Standards Association
+(<ulink url="http://www.vesa.org">http://www.vesa.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>VESA Enhanced Extended Display Identification Data Standard</title>
+ <subtitle>Release A, Revision 2</subtitle>
+ </biblioentry>
+
+ <biblioentry id="hdcp">
+ <abbrev>HDCP</abbrev>
+ <authorgroup>
+ <corpauthor>Digital Content Protection LLC
+(<ulink url="http://www.digital-cp.com">http://www.digital-cp.com</ulink>)</corpauthor>
+ </authorgroup>
+ <title>High-bandwidth Digital Content Protection System</title>
+ <subtitle>Revision 1.3</subtitle>
+ </biblioentry>
+
+ <biblioentry id="hdmi">
+ <abbrev>HDMI</abbrev>
+ <authorgroup>
+ <corpauthor>HDMI Licensing LLC
+(<ulink url="http://www.hdmi.org">http://www.hdmi.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>High-Definition Multimedia Interface</title>
+ <subtitle>Specification Version 1.4a</subtitle>
+ </biblioentry>
+
+ <biblioentry id="dp">
+ <abbrev>DP</abbrev>
+ <authorgroup>
+ <corpauthor>Video Electronics Standards Association
+(<ulink url="http://www.vesa.org">http://www.vesa.org</ulink>)</corpauthor>
+ </authorgroup>
+ <title>VESA DisplayPort Standard</title>
+ <subtitle>Version 1, Revision 2</subtitle>
+ </biblioentry>
+
+ <biblioentry id="poynton">
+ <abbrev>poynton</abbrev>
+ <authorgroup>
+ <corpauthor>Charles Poynton</corpauthor>
+ </authorgroup>
+ <title>Digital Video and HDTV, Algorithms and Interfaces</title>
+ </biblioentry>
+
+ <biblioentry id="colimg">
+ <abbrev>colimg</abbrev>
+ <authorgroup>
+ <corpauthor>Erik Reinhard et al.</corpauthor>
+ </authorgroup>
+ <title>Color Imaging: Fundamentals and Applications</title>
+ </biblioentry>
+
+ </bibliography>
diff --git a/Documentation/DocBook/media/v4l/capture.c.xml b/Documentation/DocBook/media/v4l/capture.c.xml
new file mode 100644
index 000000000..1c5c49a2d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/capture.c.xml
@@ -0,0 +1,659 @@
+<programlisting>
+/*
+ * V4L2 video capture example
+ *
+ * This program can be used and distributed without restrictions.
+ *
+ * This program is provided with the V4L2 API
+ * see http://linuxtv.org/docs.php for more information
+ */
+
+#include &lt;stdio.h&gt;
+#include &lt;stdlib.h&gt;
+#include &lt;string.h&gt;
+#include &lt;assert.h&gt;
+
+#include &lt;getopt.h&gt; /* getopt_long() */
+
+#include &lt;fcntl.h&gt; /* low-level i/o */
+#include &lt;unistd.h&gt;
+#include &lt;errno.h&gt;
+#include &lt;sys/stat.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;sys/time.h&gt;
+#include &lt;sys/mman.h&gt;
+#include &lt;sys/ioctl.h&gt;
+
+#include &lt;linux/videodev2.h&gt;
+
+#define CLEAR(x) memset(&amp;(x), 0, sizeof(x))
+
+enum io_method {
+ IO_METHOD_READ,
+ IO_METHOD_MMAP,
+ IO_METHOD_USERPTR,
+};
+
+struct buffer {
+ void *start;
+ size_t length;
+};
+
+static char *dev_name;
+static enum io_method io = IO_METHOD_MMAP;
+static int fd = -1;
+struct buffer *buffers;
+static unsigned int n_buffers;
+static int out_buf;
+static int force_format;
+static int frame_count = 70;
+
+static void errno_exit(const char *s)
+{
+ fprintf(stderr, "%s error %d, %s\n", s, errno, strerror(errno));
+ exit(EXIT_FAILURE);
+}
+
+static int xioctl(int fh, int request, void *arg)
+{
+ int r;
+
+ do {
+ r = ioctl(fh, request, arg);
+ } while (-1 == r &amp;&amp; EINTR == errno);
+
+ return r;
+}
+
+static void process_image(const void *p, int size)
+{
+ if (out_buf)
+ fwrite(p, size, 1, stdout);
+
+ fflush(stderr);
+ fprintf(stderr, ".");
+ fflush(stdout);
+}
+
+static int read_frame(void)
+{
+ struct <link linkend="v4l2-buffer">v4l2_buffer</link> buf;
+ unsigned int i;
+
+ switch (io) {
+ case IO_METHOD_READ:
+ if (-1 == read(fd, buffers[0].start, buffers[0].length)) {
+ switch (errno) {
+ case EAGAIN:
+ return 0;
+
+ case EIO:
+ /* Could ignore EIO, see spec. */
+
+ /* fall through */
+
+ default:
+ errno_exit("read");
+ }
+ }
+
+ process_image(buffers[0].start, buffers[0].length);
+ break;
+
+ case IO_METHOD_MMAP:
+ CLEAR(buf);
+
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_MMAP;
+
+ if (-1 == xioctl(fd, VIDIOC_DQBUF, &amp;buf)) {
+ switch (errno) {
+ case EAGAIN:
+ return 0;
+
+ case EIO:
+ /* Could ignore EIO, see spec. */
+
+ /* fall through */
+
+ default:
+ errno_exit("VIDIOC_DQBUF");
+ }
+ }
+
+ assert(buf.index &lt; n_buffers);
+
+ process_image(buffers[buf.index].start, buf.bytesused);
+
+ if (-1 == xioctl(fd, VIDIOC_QBUF, &amp;buf))
+ errno_exit("VIDIOC_QBUF");
+ break;
+
+ case IO_METHOD_USERPTR:
+ CLEAR(buf);
+
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_USERPTR;
+
+ if (-1 == xioctl(fd, VIDIOC_DQBUF, &amp;buf)) {
+ switch (errno) {
+ case EAGAIN:
+ return 0;
+
+ case EIO:
+ /* Could ignore EIO, see spec. */
+
+ /* fall through */
+
+ default:
+ errno_exit("VIDIOC_DQBUF");
+ }
+ }
+
+ for (i = 0; i &lt; n_buffers; ++i)
+ if (buf.m.userptr == (unsigned long)buffers[i].start
+ &amp;&amp; buf.length == buffers[i].length)
+ break;
+
+ assert(i &lt; n_buffers);
+
+ process_image((void *)buf.m.userptr, buf.bytesused);
+
+ if (-1 == xioctl(fd, VIDIOC_QBUF, &amp;buf))
+ errno_exit("VIDIOC_QBUF");
+ break;
+ }
+
+ return 1;
+}
+
+static void mainloop(void)
+{
+ unsigned int count;
+
+ count = frame_count;
+
+ while (count-- &gt; 0) {
+ for (;;) {
+ fd_set fds;
+ struct timeval tv;
+ int r;
+
+ FD_ZERO(&amp;fds);
+ FD_SET(fd, &amp;fds);
+
+ /* Timeout. */
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+
+ r = select(fd + 1, &amp;fds, NULL, NULL, &amp;tv);
+
+ if (-1 == r) {
+ if (EINTR == errno)
+ continue;
+ errno_exit("select");
+ }
+
+ if (0 == r) {
+ fprintf(stderr, "select timeout\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (read_frame())
+ break;
+ /* EAGAIN - continue select loop. */
+ }
+ }
+}
+
+static void stop_capturing(void)
+{
+ enum <link linkend="v4l2-buf-type">v4l2_buf_type</link> type;
+
+ switch (io) {
+ case IO_METHOD_READ:
+ /* Nothing to do. */
+ break;
+
+ case IO_METHOD_MMAP:
+ case IO_METHOD_USERPTR:
+ type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ if (-1 == xioctl(fd, VIDIOC_STREAMOFF, &amp;type))
+ errno_exit("VIDIOC_STREAMOFF");
+ break;
+ }
+}
+
+static void start_capturing(void)
+{
+ unsigned int i;
+ enum <link linkend="v4l2-buf-type">v4l2_buf_type</link> type;
+
+ switch (io) {
+ case IO_METHOD_READ:
+ /* Nothing to do. */
+ break;
+
+ case IO_METHOD_MMAP:
+ for (i = 0; i &lt; n_buffers; ++i) {
+ struct <link linkend="v4l2-buffer">v4l2_buffer</link> buf;
+
+ CLEAR(buf);
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_MMAP;
+ buf.index = i;
+
+ if (-1 == xioctl(fd, VIDIOC_QBUF, &amp;buf))
+ errno_exit("VIDIOC_QBUF");
+ }
+ type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ if (-1 == xioctl(fd, VIDIOC_STREAMON, &amp;type))
+ errno_exit("VIDIOC_STREAMON");
+ break;
+
+ case IO_METHOD_USERPTR:
+ for (i = 0; i &lt; n_buffers; ++i) {
+ struct <link linkend="v4l2-buffer">v4l2_buffer</link> buf;
+
+ CLEAR(buf);
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_USERPTR;
+ buf.index = i;
+ buf.m.userptr = (unsigned long)buffers[i].start;
+ buf.length = buffers[i].length;
+
+ if (-1 == xioctl(fd, VIDIOC_QBUF, &amp;buf))
+ errno_exit("VIDIOC_QBUF");
+ }
+ type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ if (-1 == xioctl(fd, VIDIOC_STREAMON, &amp;type))
+ errno_exit("VIDIOC_STREAMON");
+ break;
+ }
+}
+
+static void uninit_device(void)
+{
+ unsigned int i;
+
+ switch (io) {
+ case IO_METHOD_READ:
+ free(buffers[0].start);
+ break;
+
+ case IO_METHOD_MMAP:
+ for (i = 0; i &lt; n_buffers; ++i)
+ if (-1 == munmap(buffers[i].start, buffers[i].length))
+ errno_exit("munmap");
+ break;
+
+ case IO_METHOD_USERPTR:
+ for (i = 0; i &lt; n_buffers; ++i)
+ free(buffers[i].start);
+ break;
+ }
+
+ free(buffers);
+}
+
+static void init_read(unsigned int buffer_size)
+{
+ buffers = calloc(1, sizeof(*buffers));
+
+ if (!buffers) {
+ fprintf(stderr, "Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ buffers[0].length = buffer_size;
+ buffers[0].start = malloc(buffer_size);
+
+ if (!buffers[0].start) {
+ fprintf(stderr, "Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void init_mmap(void)
+{
+ struct <link linkend="v4l2-requestbuffers">v4l2_requestbuffers</link> req;
+
+ CLEAR(req);
+
+ req.count = 4;
+ req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ req.memory = V4L2_MEMORY_MMAP;
+
+ if (-1 == xioctl(fd, VIDIOC_REQBUFS, &amp;req)) {
+ if (EINVAL == errno) {
+ fprintf(stderr, "%s does not support "
+ "memory mapping\n", dev_name);
+ exit(EXIT_FAILURE);
+ } else {
+ errno_exit("VIDIOC_REQBUFS");
+ }
+ }
+
+ if (req.count &lt; 2) {
+ fprintf(stderr, "Insufficient buffer memory on %s\n",
+ dev_name);
+ exit(EXIT_FAILURE);
+ }
+
+ buffers = calloc(req.count, sizeof(*buffers));
+
+ if (!buffers) {
+ fprintf(stderr, "Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (n_buffers = 0; n_buffers &lt; req.count; ++n_buffers) {
+ struct <link linkend="v4l2-buffer">v4l2_buffer</link> buf;
+
+ CLEAR(buf);
+
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_MMAP;
+ buf.index = n_buffers;
+
+ if (-1 == xioctl(fd, VIDIOC_QUERYBUF, &amp;buf))
+ errno_exit("VIDIOC_QUERYBUF");
+
+ buffers[n_buffers].length = buf.length;
+ buffers[n_buffers].start =
+ mmap(NULL /* start anywhere */,
+ buf.length,
+ PROT_READ | PROT_WRITE /* required */,
+ MAP_SHARED /* recommended */,
+ fd, buf.m.offset);
+
+ if (MAP_FAILED == buffers[n_buffers].start)
+ errno_exit("mmap");
+ }
+}
+
+static void init_userp(unsigned int buffer_size)
+{
+ struct <link linkend="v4l2-requestbuffers">v4l2_requestbuffers</link> req;
+
+ CLEAR(req);
+
+ req.count = 4;
+ req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ req.memory = V4L2_MEMORY_USERPTR;
+
+ if (-1 == xioctl(fd, VIDIOC_REQBUFS, &amp;req)) {
+ if (EINVAL == errno) {
+ fprintf(stderr, "%s does not support "
+ "user pointer i/o\n", dev_name);
+ exit(EXIT_FAILURE);
+ } else {
+ errno_exit("VIDIOC_REQBUFS");
+ }
+ }
+
+ buffers = calloc(4, sizeof(*buffers));
+
+ if (!buffers) {
+ fprintf(stderr, "Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ for (n_buffers = 0; n_buffers &lt; 4; ++n_buffers) {
+ buffers[n_buffers].length = buffer_size;
+ buffers[n_buffers].start = malloc(buffer_size);
+
+ if (!buffers[n_buffers].start) {
+ fprintf(stderr, "Out of memory\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void init_device(void)
+{
+ struct <link linkend="v4l2-capability">v4l2_capability</link> cap;
+ struct <link linkend="v4l2-cropcap">v4l2_cropcap</link> cropcap;
+ struct <link linkend="v4l2-crop">v4l2_crop</link> crop;
+ struct <link linkend="v4l2-format">v4l2_format</link> fmt;
+ unsigned int min;
+
+ if (-1 == xioctl(fd, VIDIOC_QUERYCAP, &amp;cap)) {
+ if (EINVAL == errno) {
+ fprintf(stderr, "%s is no V4L2 device\n",
+ dev_name);
+ exit(EXIT_FAILURE);
+ } else {
+ errno_exit("VIDIOC_QUERYCAP");
+ }
+ }
+
+ if (!(cap.capabilities &amp; V4L2_CAP_VIDEO_CAPTURE)) {
+ fprintf(stderr, "%s is no video capture device\n",
+ dev_name);
+ exit(EXIT_FAILURE);
+ }
+
+ switch (io) {
+ case IO_METHOD_READ:
+ if (!(cap.capabilities &amp; V4L2_CAP_READWRITE)) {
+ fprintf(stderr, "%s does not support read i/o\n",
+ dev_name);
+ exit(EXIT_FAILURE);
+ }
+ break;
+
+ case IO_METHOD_MMAP:
+ case IO_METHOD_USERPTR:
+ if (!(cap.capabilities &amp; V4L2_CAP_STREAMING)) {
+ fprintf(stderr, "%s does not support streaming i/o\n",
+ dev_name);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ }
+
+
+ /* Select video input, video standard and tune here. */
+
+
+ CLEAR(cropcap);
+
+ cropcap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+ if (0 == xioctl(fd, VIDIOC_CROPCAP, &amp;cropcap)) {
+ crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ crop.c = cropcap.defrect; /* reset to default */
+
+ if (-1 == xioctl(fd, VIDIOC_S_CROP, &amp;crop)) {
+ switch (errno) {
+ case EINVAL:
+ /* Cropping not supported. */
+ break;
+ default:
+ /* Errors ignored. */
+ break;
+ }
+ }
+ } else {
+ /* Errors ignored. */
+ }
+
+
+ CLEAR(fmt);
+
+ fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ if (force_format) {
+ fmt.fmt.pix.width = 640;
+ fmt.fmt.pix.height = 480;
+ fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
+ fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
+
+ if (-1 == xioctl(fd, VIDIOC_S_FMT, &amp;fmt))
+ errno_exit("VIDIOC_S_FMT");
+
+ /* Note VIDIOC_S_FMT may change width and height. */
+ } else {
+ /* Preserve original settings as set by v4l2-ctl for example */
+ if (-1 == xioctl(fd, VIDIOC_G_FMT, &amp;fmt))
+ errno_exit("VIDIOC_G_FMT");
+ }
+
+ /* Buggy driver paranoia. */
+ min = fmt.fmt.pix.width * 2;
+ if (fmt.fmt.pix.bytesperline &lt; min)
+ fmt.fmt.pix.bytesperline = min;
+ min = fmt.fmt.pix.bytesperline * fmt.fmt.pix.height;
+ if (fmt.fmt.pix.sizeimage &lt; min)
+ fmt.fmt.pix.sizeimage = min;
+
+ switch (io) {
+ case IO_METHOD_READ:
+ init_read(fmt.fmt.pix.sizeimage);
+ break;
+
+ case IO_METHOD_MMAP:
+ init_mmap();
+ break;
+
+ case IO_METHOD_USERPTR:
+ init_userp(fmt.fmt.pix.sizeimage);
+ break;
+ }
+}
+
+static void close_device(void)
+{
+ if (-1 == close(fd))
+ errno_exit("close");
+
+ fd = -1;
+}
+
+static void open_device(void)
+{
+ struct stat st;
+
+ if (-1 == stat(dev_name, &amp;st)) {
+ fprintf(stderr, "Cannot identify '%s': %d, %s\n",
+ dev_name, errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if (!S_ISCHR(st.st_mode)) {
+ fprintf(stderr, "%s is no device\n", dev_name);
+ exit(EXIT_FAILURE);
+ }
+
+ fd = open(dev_name, O_RDWR /* required */ | O_NONBLOCK, 0);
+
+ if (-1 == fd) {
+ fprintf(stderr, "Cannot open '%s': %d, %s\n",
+ dev_name, errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void usage(FILE *fp, int argc, char **argv)
+{
+ fprintf(fp,
+ "Usage: %s [options]\n\n"
+ "Version 1.3\n"
+ "Options:\n"
+ "-d | --device name Video device name [%s]\n"
+ "-h | --help Print this message\n"
+ "-m | --mmap Use memory mapped buffers [default]\n"
+ "-r | --read Use read() calls\n"
+ "-u | --userp Use application allocated buffers\n"
+ "-o | --output Outputs stream to stdout\n"
+ "-f | --format Force format to 640x480 YUYV\n"
+ "-c | --count Number of frames to grab [%i]\n"
+ "",
+ argv[0], dev_name, frame_count);
+}
+
+static const char short_options[] = "d:hmruofc:";
+
+static const struct option
+long_options[] = {
+ { "device", required_argument, NULL, 'd' },
+ { "help", no_argument, NULL, 'h' },
+ { "mmap", no_argument, NULL, 'm' },
+ { "read", no_argument, NULL, 'r' },
+ { "userp", no_argument, NULL, 'u' },
+ { "output", no_argument, NULL, 'o' },
+ { "format", no_argument, NULL, 'f' },
+ { "count", required_argument, NULL, 'c' },
+ { 0, 0, 0, 0 }
+};
+
+int main(int argc, char **argv)
+{
+ dev_name = "/dev/video0";
+
+ for (;;) {
+ int idx;
+ int c;
+
+ c = getopt_long(argc, argv,
+ short_options, long_options, &amp;idx);
+
+ if (-1 == c)
+ break;
+
+ switch (c) {
+ case 0: /* getopt_long() flag */
+ break;
+
+ case 'd':
+ dev_name = optarg;
+ break;
+
+ case 'h':
+ usage(stdout, argc, argv);
+ exit(EXIT_SUCCESS);
+
+ case 'm':
+ io = IO_METHOD_MMAP;
+ break;
+
+ case 'r':
+ io = IO_METHOD_READ;
+ break;
+
+ case 'u':
+ io = IO_METHOD_USERPTR;
+ break;
+
+ case 'o':
+ out_buf++;
+ break;
+
+ case 'f':
+ force_format++;
+ break;
+
+ case 'c':
+ errno = 0;
+ frame_count = strtol(optarg, NULL, 0);
+ if (errno)
+ errno_exit(optarg);
+ break;
+
+ default:
+ usage(stderr, argc, argv);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ open_device();
+ init_device();
+ start_capturing();
+ mainloop();
+ stop_capturing();
+ uninit_device();
+ close_device();
+ fprintf(stderr, "\n");
+ return 0;
+}
+</programlisting>
diff --git a/Documentation/DocBook/media/v4l/common.xml b/Documentation/DocBook/media/v4l/common.xml
new file mode 100644
index 000000000..8b5e01422
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/common.xml
@@ -0,0 +1,1102 @@
+ <title>Common API Elements</title>
+
+ <para>Programming a V4L2 device consists of these
+steps:</para>
+
+ <itemizedlist>
+ <listitem>
+ <para>Opening the device</para>
+ </listitem>
+ <listitem>
+ <para>Changing device properties, selecting a video and audio
+input, video standard, picture brightness a.&nbsp;o.</para>
+ </listitem>
+ <listitem>
+ <para>Negotiating a data format</para>
+ </listitem>
+ <listitem>
+ <para>Negotiating an input/output method</para>
+ </listitem>
+ <listitem>
+ <para>The actual input/output loop</para>
+ </listitem>
+ <listitem>
+ <para>Closing the device</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>In practice most steps are optional and can be executed out of
+order. It depends on the V4L2 device type, you can read about the
+details in <xref linkend="devices" />. In this chapter we will discuss
+the basic concepts applicable to all devices.</para>
+
+ <section id="open">
+ <title>Opening and Closing Devices</title>
+
+ <section>
+ <title>Device Naming</title>
+
+ <para>V4L2 drivers are implemented as kernel modules, loaded
+manually by the system administrator or automatically when a device is
+first discovered. The driver modules plug into the "videodev" kernel
+module. It provides helper functions and a common application
+interface specified in this document.</para>
+
+ <para>Each driver thus loaded registers one or more device nodes
+with major number 81 and a minor number between 0 and 255. Minor numbers
+are allocated dynamically unless the kernel is compiled with the kernel
+option CONFIG_VIDEO_FIXED_MINOR_RANGES. In that case minor numbers are
+allocated in ranges depending on the device node type (video, radio, etc.).</para>
+
+ <para>Many drivers support "video_nr", "radio_nr" or "vbi_nr"
+module options to select specific video/radio/vbi node numbers. This allows
+the user to request that the device node is named e.g. /dev/video5 instead
+of leaving it to chance. When the driver supports multiple devices of the same
+type more than one device node number can be assigned, separated by commas:
+ <informalexample>
+ <screen>
+&gt; modprobe mydriver video_nr=0,1 radio_nr=0,1</screen>
+ </informalexample></para>
+
+ <para>In <filename>/etc/modules.conf</filename> this may be
+written as: <informalexample>
+ <screen>
+options mydriver video_nr=0,1 radio_nr=0,1
+ </screen>
+ </informalexample> When no device node number is given as module
+option the driver supplies a default.</para>
+
+ <para>Normally udev will create the device nodes in /dev automatically
+for you. If udev is not installed, then you need to enable the
+CONFIG_VIDEO_FIXED_MINOR_RANGES kernel option in order to be able to correctly
+relate a minor number to a device node number. I.e., you need to be certain
+that minor number 5 maps to device node name video5. With this kernel option
+different device types have different minor number ranges. These ranges are
+listed in <xref linkend="devices" />.
+</para>
+
+ <para>The creation of character special files (with
+<application>mknod</application>) is a privileged operation and
+devices cannot be opened by major and minor number. That means
+applications cannot <emphasis>reliable</emphasis> scan for loaded or
+installed drivers. The user must enter a device name, or the
+application can try the conventional device names.</para>
+ </section>
+
+ <section id="related">
+ <title>Related Devices</title>
+
+ <para>Devices can support several functions. For example
+video capturing, VBI capturing and radio support.</para>
+
+ <para>The V4L2 API creates different nodes for each of these functions.</para>
+
+ <para>The V4L2 API was designed with the idea that one device node could support
+all functions. However, in practice this never worked: this 'feature'
+was never used by applications and many drivers did not support it and if
+they did it was certainly never tested. In addition, switching a device
+node between different functions only works when using the streaming I/O
+API, not with the read()/write() API.</para>
+
+ <para>Today each device node supports just one function.</para>
+
+ <para>Besides video input or output the hardware may also
+support audio sampling or playback. If so, these functions are
+implemented as ALSA PCM devices with optional ALSA audio mixer
+devices.</para>
+
+ <para>One problem with all these devices is that the V4L2 API
+makes no provisions to find these related devices. Some really
+complex devices use the Media Controller (see <xref linkend="media_controller" />)
+which can be used for this purpose. But most drivers do not use it,
+and while some code exists that uses sysfs to discover related devices
+(see libmedia_dev in the <ulink url="http://git.linuxtv.org/cgit.cgi/v4l-utils.git/">v4l-utils</ulink>
+git repository), there is no library yet that can provide a single API towards
+both Media Controller-based devices and devices that do not use the Media Controller.
+If you want to work on this please write to the linux-media mailing list: &v4l-ml;.</para>
+ </section>
+
+ <section>
+ <title>Multiple Opens</title>
+
+ <para>V4L2 devices can be opened more than once.<footnote><para>
+There are still some old and obscure drivers that have not been updated to
+allow for multiple opens. This implies that for such drivers &func-open; can
+return an &EBUSY; when the device is already in use.</para></footnote>
+When this is supported by the driver, users can for example start a
+"panel" application to change controls like brightness or audio
+volume, while another application captures video and audio. In other words, panel
+applications are comparable to an ALSA audio mixer application.
+Just opening a V4L2 device should not change the state of the device.<footnote>
+<para>Unfortunately, opening a radio device often switches the state of the
+device to radio mode in many drivers. This behavior should be fixed eventually
+as it violates the V4L2 specification.</para></footnote></para>
+
+ <para>Once an application has allocated the memory buffers needed for
+streaming data (by calling the &VIDIOC-REQBUFS; or &VIDIOC-CREATE-BUFS; ioctls,
+or implicitly by calling the &func-read; or &func-write; functions) that
+application (filehandle) becomes the owner of the device. It is no longer
+allowed to make changes that would affect the buffer sizes (e.g. by calling
+the &VIDIOC-S-FMT; ioctl) and other applications are no longer allowed to allocate
+buffers or start or stop streaming. The &EBUSY; will be returned instead.</para>
+
+ <para>Merely opening a V4L2 device does not grant exclusive
+access.<footnote>
+ <para>Drivers could recognize the
+<constant>O_EXCL</constant> open flag. Presently this is not required,
+so applications cannot know if it really works.</para>
+ </footnote> Initiating data exchange however assigns the right
+to read or write the requested type of data, and to change related
+properties, to this file descriptor. Applications can request
+additional access privileges using the priority mechanism described in
+<xref linkend="app-pri" />.</para>
+ </section>
+
+ <section>
+ <title>Shared Data Streams</title>
+
+ <para>V4L2 drivers should not support multiple applications
+reading or writing the same data stream on a device by copying
+buffers, time multiplexing or similar means. This is better handled by
+a proxy application in user space.</para>
+ </section>
+
+ <section>
+ <title>Functions</title>
+
+ <para>To open and close V4L2 devices applications use the
+&func-open; and &func-close; function, respectively. Devices are
+programmed using the &func-ioctl; function as explained in the
+following sections.</para>
+ </section>
+ </section>
+
+ <section id="querycap">
+ <title>Querying Capabilities</title>
+
+ <para>Because V4L2 covers a wide variety of devices not all
+aspects of the API are equally applicable to all types of devices.
+Furthermore devices of the same type have different capabilities and
+this specification permits the omission of a few complicated and less
+important parts of the API.</para>
+
+ <para>The &VIDIOC-QUERYCAP; ioctl is available to check if the kernel
+device is compatible with this specification, and to query the <link
+linkend="devices">functions</link> and <link linkend="io">I/O
+methods</link> supported by the device.</para>
+
+ <para>Starting with kernel version 3.1, VIDIOC-QUERYCAP will return the
+V4L2 API version used by the driver, with generally matches the Kernel version.
+There's no need of using &VIDIOC-QUERYCAP; to check if a specific ioctl is
+supported, the V4L2 core now returns ENOTTY if a driver doesn't provide
+support for an ioctl.</para>
+
+ <para>Other features can be queried
+by calling the respective ioctl, for example &VIDIOC-ENUMINPUT;
+to learn about the number, types and names of video connectors on the
+device. Although abstraction is a major objective of this API, the
+&VIDIOC-QUERYCAP; ioctl also allows driver specific applications to reliably identify
+the driver.</para>
+
+ <para>All V4L2 drivers must support
+<constant>VIDIOC_QUERYCAP</constant>. Applications should always call
+this ioctl after opening the device.</para>
+ </section>
+
+ <section id="app-pri">
+ <title>Application Priority</title>
+
+ <para>When multiple applications share a device it may be
+desirable to assign them different priorities. Contrary to the
+traditional "rm -rf /" school of thought a video recording application
+could for example block other applications from changing video
+controls or switching the current TV channel. Another objective is to
+permit low priority applications working in background, which can be
+preempted by user controlled applications and automatically regain
+control of the device at a later time.</para>
+
+ <para>Since these features cannot be implemented entirely in user
+space V4L2 defines the &VIDIOC-G-PRIORITY; and &VIDIOC-S-PRIORITY;
+ioctls to request and query the access priority associate with a file
+descriptor. Opening a device assigns a medium priority, compatible
+with earlier versions of V4L2 and drivers not supporting these ioctls.
+Applications requiring a different priority will usually call
+<constant>VIDIOC_S_PRIORITY</constant> after verifying the device with
+the &VIDIOC-QUERYCAP; ioctl.</para>
+
+ <para>Ioctls changing driver properties, such as &VIDIOC-S-INPUT;,
+return an &EBUSY; after another application obtained higher priority.</para>
+ </section>
+
+ <section id="video">
+ <title>Video Inputs and Outputs</title>
+
+ <para>Video inputs and outputs are physical connectors of a
+device. These can be for example RF connectors (antenna/cable), CVBS
+a.k.a. Composite Video, S-Video or RGB connectors. Video and VBI
+capture devices have inputs. Video and VBI output devices have outputs,
+at least one each. Radio devices have no video inputs or outputs.</para>
+
+ <para>To learn about the number and attributes of the
+available inputs and outputs applications can enumerate them with the
+&VIDIOC-ENUMINPUT; and &VIDIOC-ENUMOUTPUT; ioctl, respectively. The
+&v4l2-input; returned by the <constant>VIDIOC_ENUMINPUT</constant>
+ioctl also contains signal status information applicable when the
+current video input is queried.</para>
+
+ <para>The &VIDIOC-G-INPUT; and &VIDIOC-G-OUTPUT; ioctls return the
+index of the current video input or output. To select a different
+input or output applications call the &VIDIOC-S-INPUT; and
+&VIDIOC-S-OUTPUT; ioctls. Drivers must implement all the input ioctls
+when the device has one or more inputs, all the output ioctls when the
+device has one or more outputs.</para>
+
+ <example>
+ <title>Information about the current video input</title>
+
+ <programlisting>
+&v4l2-input; input;
+int index;
+
+if (-1 == ioctl(fd, &VIDIOC-G-INPUT;, &amp;index)) {
+ perror("VIDIOC_G_INPUT");
+ exit(EXIT_FAILURE);
+}
+
+memset(&amp;input, 0, sizeof(input));
+input.index = index;
+
+if (-1 == ioctl(fd, &VIDIOC-ENUMINPUT;, &amp;input)) {
+ perror("VIDIOC_ENUMINPUT");
+ exit(EXIT_FAILURE);
+}
+
+printf("Current input: %s\n", input.name);
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Switching to the first video input</title>
+
+ <programlisting>
+int index;
+
+index = 0;
+
+if (-1 == ioctl(fd, &VIDIOC-S-INPUT;, &amp;index)) {
+ perror("VIDIOC_S_INPUT");
+ exit(EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+ </section>
+
+ <section id="audio">
+ <title>Audio Inputs and Outputs</title>
+
+ <para>Audio inputs and outputs are physical connectors of a
+device. Video capture devices have inputs, output devices have
+outputs, zero or more each. Radio devices have no audio inputs or
+outputs. They have exactly one tuner which in fact
+<emphasis>is</emphasis> an audio source, but this API associates
+tuners with video inputs or outputs only, and radio devices have
+none of these.<footnote>
+ <para>Actually &v4l2-audio; ought to have a
+<structfield>tuner</structfield> field like &v4l2-input;, not only
+making the API more consistent but also permitting radio devices with
+multiple tuners.</para>
+ </footnote> A connector on a TV card to loop back the received
+audio signal to a sound card is not considered an audio output.</para>
+
+ <para>Audio and video inputs and outputs are associated. Selecting
+a video source also selects an audio source. This is most evident when
+the video and audio source is a tuner. Further audio connectors can
+combine with more than one video input or output. Assumed two
+composite video inputs and two audio inputs exist, there may be up to
+four valid combinations. The relation of video and audio connectors
+is defined in the <structfield>audioset</structfield> field of the
+respective &v4l2-input; or &v4l2-output;, where each bit represents
+the index number, starting at zero, of one audio input or output.</para>
+
+ <para>To learn about the number and attributes of the
+available inputs and outputs applications can enumerate them with the
+&VIDIOC-ENUMAUDIO; and &VIDIOC-ENUMAUDOUT; ioctl, respectively. The
+&v4l2-audio; returned by the <constant>VIDIOC_ENUMAUDIO</constant> ioctl
+also contains signal status information applicable when the current
+audio input is queried.</para>
+
+ <para>The &VIDIOC-G-AUDIO; and &VIDIOC-G-AUDOUT; ioctls report
+the current audio input and output, respectively. Note that, unlike
+&VIDIOC-G-INPUT; and &VIDIOC-G-OUTPUT; these ioctls return a structure
+as <constant>VIDIOC_ENUMAUDIO</constant> and
+<constant>VIDIOC_ENUMAUDOUT</constant> do, not just an index.</para>
+
+ <para>To select an audio input and change its properties
+applications call the &VIDIOC-S-AUDIO; ioctl. To select an audio
+output (which presently has no changeable properties) applications
+call the &VIDIOC-S-AUDOUT; ioctl.</para>
+
+ <para>Drivers must implement all audio input ioctls when the device
+has multiple selectable audio inputs, all audio output ioctls when the
+device has multiple selectable audio outputs. When the device has any
+audio inputs or outputs the driver must set the <constant>V4L2_CAP_AUDIO</constant>
+flag in the &v4l2-capability; returned by the &VIDIOC-QUERYCAP; ioctl.</para>
+
+ <example>
+ <title>Information about the current audio input</title>
+
+ <programlisting>
+&v4l2-audio; audio;
+
+memset(&amp;audio, 0, sizeof(audio));
+
+if (-1 == ioctl(fd, &VIDIOC-G-AUDIO;, &amp;audio)) {
+ perror("VIDIOC_G_AUDIO");
+ exit(EXIT_FAILURE);
+}
+
+printf("Current input: %s\n", audio.name);
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Switching to the first audio input</title>
+
+ <programlisting>
+&v4l2-audio; audio;
+
+memset(&amp;audio, 0, sizeof(audio)); /* clear audio.mode, audio.reserved */
+
+audio.index = 0;
+
+if (-1 == ioctl(fd, &VIDIOC-S-AUDIO;, &amp;audio)) {
+ perror("VIDIOC_S_AUDIO");
+ exit(EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+ </section>
+
+ <section id="tuner">
+ <title>Tuners and Modulators</title>
+
+ <section>
+ <title>Tuners</title>
+
+ <para>Video input devices can have one or more tuners
+demodulating a RF signal. Each tuner is associated with one or more
+video inputs, depending on the number of RF connectors on the tuner.
+The <structfield>type</structfield> field of the respective
+&v4l2-input; returned by the &VIDIOC-ENUMINPUT; ioctl is set to
+<constant>V4L2_INPUT_TYPE_TUNER</constant> and its
+<structfield>tuner</structfield> field contains the index number of
+the tuner.</para>
+
+ <para>Radio input devices have exactly one tuner with index zero, no
+video inputs.</para>
+
+ <para>To query and change tuner properties applications use the
+&VIDIOC-G-TUNER; and &VIDIOC-S-TUNER; ioctls, respectively. The
+&v4l2-tuner; returned by <constant>VIDIOC_G_TUNER</constant> also
+contains signal status information applicable when the tuner of the
+current video or radio input is queried. Note that
+<constant>VIDIOC_S_TUNER</constant> does not switch the current tuner,
+when there is more than one at all. The tuner is solely determined by
+the current video input. Drivers must support both ioctls and set the
+<constant>V4L2_CAP_TUNER</constant> flag in the &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl when the device has one or
+more tuners.</para>
+ </section>
+
+ <section>
+ <title>Modulators</title>
+
+ <para>Video output devices can have one or more modulators, uh,
+modulating a video signal for radiation or connection to the antenna
+input of a TV set or video recorder. Each modulator is associated with
+one or more video outputs, depending on the number of RF connectors on
+the modulator. The <structfield>type</structfield> field of the
+respective &v4l2-output; returned by the &VIDIOC-ENUMOUTPUT; ioctl is
+set to <constant>V4L2_OUTPUT_TYPE_MODULATOR</constant> and its
+<structfield>modulator</structfield> field contains the index number
+of the modulator.</para>
+
+ <para>Radio output devices have exactly one modulator with index
+zero, no video outputs.</para>
+
+ <para>A video or radio device cannot support both a tuner and a
+modulator. Two separate device nodes will have to be used for such
+hardware, one that supports the tuner functionality and one that supports
+the modulator functionality. The reason is a limitation with the
+&VIDIOC-S-FREQUENCY; ioctl where you cannot specify whether the frequency
+is for a tuner or a modulator.</para>
+
+ <para>To query and change modulator properties applications use
+the &VIDIOC-G-MODULATOR; and &VIDIOC-S-MODULATOR; ioctl. Note that
+<constant>VIDIOC_S_MODULATOR</constant> does not switch the current
+modulator, when there is more than one at all. The modulator is solely
+determined by the current video output. Drivers must support both
+ioctls and set the <constant>V4L2_CAP_MODULATOR</constant> flag in
+the &v4l2-capability; returned by the &VIDIOC-QUERYCAP; ioctl when the
+device has one or more modulators.</para>
+ </section>
+
+ <section>
+ <title>Radio Frequency</title>
+
+ <para>To get and set the tuner or modulator radio frequency
+applications use the &VIDIOC-G-FREQUENCY; and &VIDIOC-S-FREQUENCY;
+ioctl which both take a pointer to a &v4l2-frequency;. These ioctls
+are used for TV and radio devices alike. Drivers must support both
+ioctls when the tuner or modulator ioctls are supported, or
+when the device is a radio device.</para>
+ </section>
+ </section>
+
+ <section id="standard">
+ <title>Video Standards</title>
+
+ <para>Video devices typically support one or more different video
+standards or variations of standards. Each video input and output may
+support another set of standards. This set is reported by the
+<structfield>std</structfield> field of &v4l2-input; and
+&v4l2-output; returned by the &VIDIOC-ENUMINPUT; and
+&VIDIOC-ENUMOUTPUT; ioctls, respectively.</para>
+
+ <para>V4L2 defines one bit for each analog video standard
+currently in use worldwide, and sets aside bits for driver defined
+standards, &eg; hybrid standards to watch NTSC video tapes on PAL TVs
+and vice versa. Applications can use the predefined bits to select a
+particular standard, although presenting the user a menu of supported
+standards is preferred. To enumerate and query the attributes of the
+supported standards applications use the &VIDIOC-ENUMSTD; ioctl.</para>
+
+ <para>Many of the defined standards are actually just variations
+of a few major standards. The hardware may in fact not distinguish
+between them, or do so internal and switch automatically. Therefore
+enumerated standards also contain sets of one or more standard
+bits.</para>
+
+ <para>Assume a hypothetic tuner capable of demodulating B/PAL,
+G/PAL and I/PAL signals. The first enumerated standard is a set of B
+and G/PAL, switched automatically depending on the selected radio
+frequency in UHF or VHF band. Enumeration gives a "PAL-B/G" or "PAL-I"
+choice. Similar a Composite input may collapse standards, enumerating
+"PAL-B/G/H/I", "NTSC-M" and "SECAM-D/K".<footnote>
+ <para>Some users are already confused by technical terms PAL,
+NTSC and SECAM. There is no point asking them to distinguish between
+B, G, D, or K when the software or hardware can do that
+automatically.</para>
+ </footnote></para>
+
+ <para>To query and select the standard used by the current video
+input or output applications call the &VIDIOC-G-STD; and
+&VIDIOC-S-STD; ioctl, respectively. The <emphasis>received</emphasis>
+standard can be sensed with the &VIDIOC-QUERYSTD; ioctl. Note that the
+parameter of all these ioctls is a pointer to a &v4l2-std-id; type
+(a standard set), <emphasis>not</emphasis> an index into the standard
+enumeration. Drivers must implement all video standard ioctls
+when the device has one or more video inputs or outputs.</para>
+
+ <para>Special rules apply to devices such as USB cameras where the notion of video
+standards makes little sense. More generally for any capture or output device
+which is: <itemizedlist>
+ <listitem>
+ <para>incapable of capturing fields or frames at the nominal
+rate of the video standard, or</para>
+ </listitem>
+ <listitem>
+ <para>that does not support the video standard formats at all.</para>
+ </listitem>
+ </itemizedlist> Here the driver shall set the
+<structfield>std</structfield> field of &v4l2-input; and &v4l2-output;
+to zero and the <constant>VIDIOC_G_STD</constant>,
+<constant>VIDIOC_S_STD</constant>,
+<constant>VIDIOC_QUERYSTD</constant> and
+<constant>VIDIOC_ENUMSTD</constant> ioctls shall return the
+&ENOTTY; or the &EINVAL;.</para>
+ <para>Applications can make use of the <xref linkend="input-capabilities" /> and
+<xref linkend="output-capabilities"/> flags to determine whether the video standard ioctls
+can be used with the given input or output.</para>
+
+ <example>
+ <title>Information about the current video standard</title>
+
+ <programlisting>
+&v4l2-std-id; std_id;
+&v4l2-standard; standard;
+
+if (-1 == ioctl(fd, &VIDIOC-G-STD;, &amp;std_id)) {
+ /* Note when VIDIOC_ENUMSTD always returns ENOTTY this
+ is no video device or it falls under the USB exception,
+ and VIDIOC_G_STD returning ENOTTY is no error. */
+
+ perror("VIDIOC_G_STD");
+ exit(EXIT_FAILURE);
+}
+
+memset(&amp;standard, 0, sizeof(standard));
+standard.index = 0;
+
+while (0 == ioctl(fd, &VIDIOC-ENUMSTD;, &amp;standard)) {
+ if (standard.id &amp; std_id) {
+ printf("Current video standard: %s\n", standard.name);
+ exit(EXIT_SUCCESS);
+ }
+
+ standard.index++;
+}
+
+/* EINVAL indicates the end of the enumeration, which cannot be
+ empty unless this device falls under the USB exception. */
+
+if (errno == EINVAL || standard.index == 0) {
+ perror("VIDIOC_ENUMSTD");
+ exit(EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Listing the video standards supported by the current
+input</title>
+
+ <programlisting>
+&v4l2-input; input;
+&v4l2-standard; standard;
+
+memset(&amp;input, 0, sizeof(input));
+
+if (-1 == ioctl(fd, &VIDIOC-G-INPUT;, &amp;input.index)) {
+ perror("VIDIOC_G_INPUT");
+ exit(EXIT_FAILURE);
+}
+
+if (-1 == ioctl(fd, &VIDIOC-ENUMINPUT;, &amp;input)) {
+ perror("VIDIOC_ENUM_INPUT");
+ exit(EXIT_FAILURE);
+}
+
+printf("Current input %s supports:\n", input.name);
+
+memset(&amp;standard, 0, sizeof(standard));
+standard.index = 0;
+
+while (0 == ioctl(fd, &VIDIOC-ENUMSTD;, &amp;standard)) {
+ if (standard.id &amp; input.std)
+ printf("%s\n", standard.name);
+
+ standard.index++;
+}
+
+/* EINVAL indicates the end of the enumeration, which cannot be
+ empty unless this device falls under the USB exception. */
+
+if (errno != EINVAL || standard.index == 0) {
+ perror("VIDIOC_ENUMSTD");
+ exit(EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Selecting a new video standard</title>
+
+ <programlisting>
+&v4l2-input; input;
+&v4l2-std-id; std_id;
+
+memset(&amp;input, 0, sizeof(input));
+
+if (-1 == ioctl(fd, &VIDIOC-G-INPUT;, &amp;input.index)) {
+ perror("VIDIOC_G_INPUT");
+ exit(EXIT_FAILURE);
+}
+
+if (-1 == ioctl(fd, &VIDIOC-ENUMINPUT;, &amp;input)) {
+ perror("VIDIOC_ENUM_INPUT");
+ exit(EXIT_FAILURE);
+}
+
+if (0 == (input.std &amp; V4L2_STD_PAL_BG)) {
+ fprintf(stderr, "Oops. B/G PAL is not supported.\n");
+ exit(EXIT_FAILURE);
+}
+
+/* Note this is also supposed to work when only B
+ <emphasis>or</emphasis> G/PAL is supported. */
+
+std_id = V4L2_STD_PAL_BG;
+
+if (-1 == ioctl(fd, &VIDIOC-S-STD;, &amp;std_id)) {
+ perror("VIDIOC_S_STD");
+ exit(EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+ </section>
+ <section id="dv-timings">
+ <title>Digital Video (DV) Timings</title>
+ <para>
+ The video standards discussed so far have been dealing with Analog TV and the
+corresponding video timings. Today there are many more different hardware interfaces
+such as High Definition TV interfaces (HDMI), VGA, DVI connectors etc., that carry
+video signals and there is a need to extend the API to select the video timings
+for these interfaces. Since it is not possible to extend the &v4l2-std-id; due to
+the limited bits available, a new set of ioctls was added to set/get video timings at
+the input and output.</para>
+
+ <para>These ioctls deal with the detailed digital video timings that define
+each video format. This includes parameters such as the active video width and height,
+signal polarities, frontporches, backporches, sync widths etc. The <filename>linux/v4l2-dv-timings.h</filename>
+header can be used to get the timings of the formats in the <xref linkend="cea861" /> and
+<xref linkend="vesadmt" /> standards.
+ </para>
+
+ <para>To enumerate and query the attributes of the DV timings supported by a device
+ applications use the &VIDIOC-ENUM-DV-TIMINGS; and &VIDIOC-DV-TIMINGS-CAP; ioctls.
+ To set DV timings for the device applications use the
+&VIDIOC-S-DV-TIMINGS; ioctl and to get current DV timings they use the
+&VIDIOC-G-DV-TIMINGS; ioctl. To detect the DV timings as seen by the video receiver applications
+use the &VIDIOC-QUERY-DV-TIMINGS; ioctl.</para>
+ <para>Applications can make use of the <xref linkend="input-capabilities" /> and
+<xref linkend="output-capabilities"/> flags to determine whether the digital video ioctls
+can be used with the given input or output.</para>
+ </section>
+
+ &sub-controls;
+
+ <section id="format">
+ <title>Data Formats</title>
+
+ <section>
+ <title>Data Format Negotiation</title>
+
+ <para>Different devices exchange different kinds of data with
+applications, for example video images, raw or sliced VBI data, RDS
+datagrams. Even within one kind many different formats are possible,
+in particular an abundance of image formats. Although drivers must
+provide a default and the selection persists across closing and
+reopening a device, applications should always negotiate a data format
+before engaging in data exchange. Negotiation means the application
+asks for a particular format and the driver selects and reports the
+best the hardware can do to satisfy the request. Of course
+applications can also just query the current selection.</para>
+
+ <para>A single mechanism exists to negotiate all data formats
+using the aggregate &v4l2-format; and the &VIDIOC-G-FMT; and
+&VIDIOC-S-FMT; ioctls. Additionally the &VIDIOC-TRY-FMT; ioctl can be
+used to examine what the hardware <emphasis>could</emphasis> do,
+without actually selecting a new data format. The data formats
+supported by the V4L2 API are covered in the respective device section
+in <xref linkend="devices" />. For a closer look at image formats see
+<xref linkend="pixfmt" />.</para>
+
+ <para>The <constant>VIDIOC_S_FMT</constant> ioctl is a major
+turning-point in the initialization sequence. Prior to this point
+multiple panel applications can access the same device concurrently to
+select the current input, change controls or modify other properties.
+The first <constant>VIDIOC_S_FMT</constant> assigns a logical stream
+(video data, VBI data etc.) exclusively to one file descriptor.</para>
+
+ <para>Exclusive means no other application, more precisely no
+other file descriptor, can grab this stream or change device
+properties inconsistent with the negotiated parameters. A video
+standard change for example, when the new standard uses a different
+number of scan lines, can invalidate the selected image format.
+Therefore only the file descriptor owning the stream can make
+invalidating changes. Accordingly multiple file descriptors which
+grabbed different logical streams prevent each other from interfering
+with their settings. When for example video overlay is about to start
+or already in progress, simultaneous video capturing may be restricted
+to the same cropping and image size.</para>
+
+ <para>When applications omit the
+<constant>VIDIOC_S_FMT</constant> ioctl its locking side effects are
+implied by the next step, the selection of an I/O method with the
+&VIDIOC-REQBUFS; ioctl or implicit with the first &func-read; or
+&func-write; call.</para>
+
+ <para>Generally only one logical stream can be assigned to a
+file descriptor, the exception being drivers permitting simultaneous
+video capturing and overlay using the same file descriptor for
+compatibility with V4L and earlier versions of V4L2. Switching the
+logical stream or returning into "panel mode" is possible by closing
+and reopening the device. Drivers <emphasis>may</emphasis> support a
+switch using <constant>VIDIOC_S_FMT</constant>.</para>
+
+ <para>All drivers exchanging data with
+applications must support the <constant>VIDIOC_G_FMT</constant> and
+<constant>VIDIOC_S_FMT</constant> ioctl. Implementation of the
+<constant>VIDIOC_TRY_FMT</constant> is highly recommended but
+optional.</para>
+ </section>
+
+ <section>
+ <title>Image Format Enumeration</title>
+
+ <para>Apart of the generic format negotiation functions
+a special ioctl to enumerate all image formats supported by video
+capture, overlay or output devices is available.<footnote>
+ <para>Enumerating formats an application has no a-priori
+knowledge of (otherwise it could explicitly ask for them and need not
+enumerate) seems useless, but there are applications serving as proxy
+between drivers and the actual video applications for which this is
+useful.</para>
+ </footnote></para>
+
+ <para>The &VIDIOC-ENUM-FMT; ioctl must be supported
+by all drivers exchanging image data with applications.</para>
+
+ <important>
+ <para>Drivers are not supposed to convert image formats in
+kernel space. They must enumerate only formats directly supported by
+the hardware. If necessary driver writers should publish an example
+conversion routine or library for integration into applications.</para>
+ </important>
+ </section>
+ </section>
+
+ &sub-planar-apis;
+
+ <section id="crop">
+ <title>Image Cropping, Insertion and Scaling</title>
+
+ <para>Some video capture devices can sample a subsection of the
+picture and shrink or enlarge it to an image of arbitrary size. We
+call these abilities cropping and scaling. Some video output devices
+can scale an image up or down and insert it at an arbitrary scan line
+and horizontal offset into a video signal.</para>
+
+ <para>Applications can use the following API to select an area in
+the video signal, query the default area and the hardware limits.
+<emphasis>Despite their name, the &VIDIOC-CROPCAP;, &VIDIOC-G-CROP;
+and &VIDIOC-S-CROP; ioctls apply to input as well as output
+devices.</emphasis></para>
+
+ <para>Scaling requires a source and a target. On a video capture
+or overlay device the source is the video signal, and the cropping
+ioctls determine the area actually sampled. The target are images
+read by the application or overlaid onto the graphics screen. Their
+size (and position for an overlay) is negotiated with the
+&VIDIOC-G-FMT; and &VIDIOC-S-FMT; ioctls.</para>
+
+ <para>On a video output device the source are the images passed in
+by the application, and their size is again negotiated with the
+<constant>VIDIOC_G/S_FMT</constant> ioctls, or may be encoded in a
+compressed video stream. The target is the video signal, and the
+cropping ioctls determine the area where the images are
+inserted.</para>
+
+ <para>Source and target rectangles are defined even if the device
+does not support scaling or the <constant>VIDIOC_G/S_CROP</constant>
+ioctls. Their size (and position where applicable) will be fixed in
+this case. <emphasis>All capture and output device must support the
+<constant>VIDIOC_CROPCAP</constant> ioctl such that applications can
+determine if scaling takes place.</emphasis></para>
+
+ <section>
+ <title>Cropping Structures</title>
+
+ <figure id="crop-scale">
+ <title>Image Cropping, Insertion and Scaling</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="crop.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="crop.gif" format="GIF" />
+ </imageobject>
+ <textobject>
+ <phrase>The cropping, insertion and scaling process</phrase>
+ </textobject>
+ </mediaobject>
+ </figure>
+
+ <para>For capture devices the coordinates of the top left
+corner, width and height of the area which can be sampled is given by
+the <structfield>bounds</structfield> substructure of the
+&v4l2-cropcap; returned by the <constant>VIDIOC_CROPCAP</constant>
+ioctl. To support a wide range of hardware this specification does not
+define an origin or units. However by convention drivers should
+horizontally count unscaled samples relative to 0H (the leading edge
+of the horizontal sync pulse, see <xref linkend="vbi-hsync" />).
+Vertically ITU-R line
+numbers of the first field (<xref linkend="vbi-525" />, <xref
+linkend="vbi-625" />), multiplied by two if the driver can capture both
+fields.</para>
+
+ <para>The top left corner, width and height of the source
+rectangle, that is the area actually sampled, is given by &v4l2-crop;
+using the same coordinate system as &v4l2-cropcap;. Applications can
+use the <constant>VIDIOC_G_CROP</constant> and
+<constant>VIDIOC_S_CROP</constant> ioctls to get and set this
+rectangle. It must lie completely within the capture boundaries and
+the driver may further adjust the requested size and/or position
+according to hardware limitations.</para>
+
+ <para>Each capture device has a default source rectangle, given
+by the <structfield>defrect</structfield> substructure of
+&v4l2-cropcap;. The center of this rectangle shall align with the
+center of the active picture area of the video signal, and cover what
+the driver writer considers the complete picture. Drivers shall reset
+the source rectangle to the default when the driver is first loaded,
+but not later.</para>
+
+ <para>For output devices these structures and ioctls are used
+accordingly, defining the <emphasis>target</emphasis> rectangle where
+the images will be inserted into the video signal.</para>
+
+ </section>
+
+ <section>
+ <title>Scaling Adjustments</title>
+
+ <para>Video hardware can have various cropping, insertion and
+scaling limitations. It may only scale up or down, support only
+discrete scaling factors, or have different scaling abilities in
+horizontal and vertical direction. Also it may not support scaling at
+all. At the same time the &v4l2-crop; rectangle may have to be
+aligned, and both the source and target rectangles may have arbitrary
+upper and lower size limits. In particular the maximum
+<structfield>width</structfield> and <structfield>height</structfield>
+in &v4l2-crop; may be smaller than the
+&v4l2-cropcap;.<structfield>bounds</structfield> area. Therefore, as
+usual, drivers are expected to adjust the requested parameters and
+return the actual values selected.</para>
+
+ <para>Applications can change the source or the target rectangle
+first, as they may prefer a particular image size or a certain area in
+the video signal. If the driver has to adjust both to satisfy hardware
+limitations, the last requested rectangle shall take priority, and the
+driver should preferably adjust the opposite one. The &VIDIOC-TRY-FMT;
+ioctl however shall not change the driver state and therefore only
+adjust the requested rectangle.</para>
+
+ <para>Suppose scaling on a video capture device is restricted to
+a factor 1:1 or 2:1 in either direction and the target image size must
+be a multiple of 16&nbsp;&times;&nbsp;16 pixels. The source cropping
+rectangle is set to defaults, which are also the upper limit in this
+example, of 640&nbsp;&times;&nbsp;400 pixels at offset 0,&nbsp;0. An
+application requests an image size of 300&nbsp;&times;&nbsp;225
+pixels, assuming video will be scaled down from the "full picture"
+accordingly. The driver sets the image size to the closest possible
+values 304&nbsp;&times;&nbsp;224, then chooses the cropping rectangle
+closest to the requested size, that is 608&nbsp;&times;&nbsp;224
+(224&nbsp;&times;&nbsp;2:1 would exceed the limit 400). The offset
+0,&nbsp;0 is still valid, thus unmodified. Given the default cropping
+rectangle reported by <constant>VIDIOC_CROPCAP</constant> the
+application can easily propose another offset to center the cropping
+rectangle.</para>
+
+ <para>Now the application may insist on covering an area using a
+picture aspect ratio closer to the original request, so it asks for a
+cropping rectangle of 608&nbsp;&times;&nbsp;456 pixels. The present
+scaling factors limit cropping to 640&nbsp;&times;&nbsp;384, so the
+driver returns the cropping size 608&nbsp;&times;&nbsp;384 and adjusts
+the image size to closest possible 304&nbsp;&times;&nbsp;192.</para>
+
+ </section>
+
+ <section>
+ <title>Examples</title>
+
+ <para>Source and target rectangles shall remain unchanged across
+closing and reopening a device, such that piping data into or out of a
+device will work without special preparations. More advanced
+applications should ensure the parameters are suitable before starting
+I/O.</para>
+
+ <example>
+ <title>Resetting the cropping parameters</title>
+
+ <para>(A video capture device is assumed; change
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> for other
+devices.)</para>
+
+ <programlisting>
+&v4l2-cropcap; cropcap;
+&v4l2-crop; crop;
+
+memset (&amp;cropcap, 0, sizeof (cropcap));
+cropcap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+if (-1 == ioctl (fd, &VIDIOC-CROPCAP;, &amp;cropcap)) {
+ perror ("VIDIOC_CROPCAP");
+ exit (EXIT_FAILURE);
+}
+
+memset (&amp;crop, 0, sizeof (crop));
+crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+crop.c = cropcap.defrect;
+
+/* Ignore if cropping is not supported (EINVAL). */
+
+if (-1 == ioctl (fd, &VIDIOC-S-CROP;, &amp;crop)
+ &amp;&amp; errno != EINVAL) {
+ perror ("VIDIOC_S_CROP");
+ exit (EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Simple downscaling</title>
+
+ <para>(A video capture device is assumed.)</para>
+
+ <programlisting>
+&v4l2-cropcap; cropcap;
+&v4l2-format; format;
+
+reset_cropping_parameters ();
+
+/* Scale down to 1/4 size of full picture. */
+
+memset (&amp;format, 0, sizeof (format)); /* defaults */
+
+format.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+format.fmt.pix.width = cropcap.defrect.width &gt;&gt; 1;
+format.fmt.pix.height = cropcap.defrect.height &gt;&gt; 1;
+format.fmt.pix.pixelformat = V4L2_PIX_FMT_YUYV;
+
+if (-1 == ioctl (fd, &VIDIOC-S-FMT;, &amp;format)) {
+ perror ("VIDIOC_S_FORMAT");
+ exit (EXIT_FAILURE);
+}
+
+/* We could check the actual image size now, the actual scaling factor
+ or if the driver can scale at all. */
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Selecting an output area</title>
+
+ <programlisting>
+&v4l2-cropcap; cropcap;
+&v4l2-crop; crop;
+
+memset (&amp;cropcap, 0, sizeof (cropcap));
+cropcap.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+
+if (-1 == ioctl (fd, VIDIOC_CROPCAP;, &amp;cropcap)) {
+ perror ("VIDIOC_CROPCAP");
+ exit (EXIT_FAILURE);
+}
+
+memset (&amp;crop, 0, sizeof (crop));
+
+crop.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
+crop.c = cropcap.defrect;
+
+/* Scale the width and height to 50 % of their original size
+ and center the output. */
+
+crop.c.width /= 2;
+crop.c.height /= 2;
+crop.c.left += crop.c.width / 2;
+crop.c.top += crop.c.height / 2;
+
+/* Ignore if cropping is not supported (EINVAL). */
+
+if (-1 == ioctl (fd, VIDIOC_S_CROP, &amp;crop)
+ &amp;&amp; errno != EINVAL) {
+ perror ("VIDIOC_S_CROP");
+ exit (EXIT_FAILURE);
+}
+</programlisting>
+ </example>
+
+ <example>
+ <title>Current scaling factor and pixel aspect</title>
+
+ <para>(A video capture device is assumed.)</para>
+
+ <programlisting>
+&v4l2-cropcap; cropcap;
+&v4l2-crop; crop;
+&v4l2-format; format;
+double hscale, vscale;
+double aspect;
+int dwidth, dheight;
+
+memset (&amp;cropcap, 0, sizeof (cropcap));
+cropcap.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+if (-1 == ioctl (fd, &VIDIOC-CROPCAP;, &amp;cropcap)) {
+ perror ("VIDIOC_CROPCAP");
+ exit (EXIT_FAILURE);
+}
+
+memset (&amp;crop, 0, sizeof (crop));
+crop.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+if (-1 == ioctl (fd, &VIDIOC-G-CROP;, &amp;crop)) {
+ if (errno != EINVAL) {
+ perror ("VIDIOC_G_CROP");
+ exit (EXIT_FAILURE);
+ }
+
+ /* Cropping not supported. */
+ crop.c = cropcap.defrect;
+}
+
+memset (&amp;format, 0, sizeof (format));
+format.fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+if (-1 == ioctl (fd, &VIDIOC-G-FMT;, &amp;format)) {
+ perror ("VIDIOC_G_FMT");
+ exit (EXIT_FAILURE);
+}
+
+/* The scaling applied by the driver. */
+
+hscale = format.fmt.pix.width / (double) crop.c.width;
+vscale = format.fmt.pix.height / (double) crop.c.height;
+
+aspect = cropcap.pixelaspect.numerator /
+ (double) cropcap.pixelaspect.denominator;
+aspect = aspect * hscale / vscale;
+
+/* Devices following ITU-R BT.601 do not capture
+ square pixels. For playback on a computer monitor
+ we should scale the images to this size. */
+
+dwidth = format.fmt.pix.width / aspect;
+dheight = format.fmt.pix.height;
+ </programlisting>
+ </example>
+ </section>
+ </section>
+
+ &sub-selection-api;
+
+ <section id="streaming-par">
+ <title>Streaming Parameters</title>
+
+ <para>Streaming parameters are intended to optimize the video
+capture process as well as I/O. Presently applications can request a
+high quality capture mode with the &VIDIOC-S-PARM; ioctl.</para>
+
+ <para>The current video standard determines a nominal number of
+frames per second. If less than this number of frames is to be
+captured or output, applications can request frame skipping or
+duplicating on the driver side. This is especially useful when using
+the &func-read; or &func-write;, which are not augmented by timestamps
+or sequence counters, and to avoid unnecessary data copying.</para>
+
+ <para>Finally these ioctls can be used to determine the number of
+buffers used internally by a driver in read/write mode. For
+implications see the section discussing the &func-read;
+function.</para>
+
+ <para>To get and set the streaming parameters applications call
+the &VIDIOC-G-PARM; and &VIDIOC-S-PARM; ioctl, respectively. They take
+a pointer to a &v4l2-streamparm;, which contains a union holding
+separate parameters for input and output devices.</para>
+
+ <para>These ioctls are optional, drivers need not implement
+them. If so, they return the &EINVAL;.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
new file mode 100644
index 000000000..a0aef85d3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/compat.xml
@@ -0,0 +1,2741 @@
+ <title>Changes</title>
+
+ <para>The following chapters document the evolution of the V4L2 API,
+errata or extensions. They are also intended to help application and
+driver writers to port or update their code.</para>
+
+ <section id="diff-v4l">
+ <title>Differences between V4L and V4L2</title>
+
+ <para>The Video For Linux API was first introduced in Linux 2.1 to
+unify and replace various TV and radio device related interfaces,
+developed independently by driver writers in prior years. Starting
+with Linux 2.5 the much improved V4L2 API replaces the V4L API.
+The support for the old V4L calls were removed from Kernel, but the
+library <xref linkend="libv4l" /> supports the conversion of a V4L
+API system call into a V4L2 one.</para>
+
+ <section>
+ <title>Opening and Closing Devices</title>
+
+ <para>For compatibility reasons the character device file names
+recommended for V4L2 video capture, overlay, radio and raw
+vbi capture devices did not change from those used by V4L. They are
+listed in <xref linkend="devices" /> and below in <xref
+ linkend="v4l-dev" />.</para>
+
+ <para>The teletext devices (minor range 192-223) have been removed in
+V4L2 and no longer exist. There is no hardware available anymore for handling
+pure teletext. Instead raw or sliced VBI is used.</para>
+
+ <para>The V4L <filename>videodev</filename> module automatically
+assigns minor numbers to drivers in load order, depending on the
+registered device type. We recommend that V4L2 drivers by default
+register devices with the same numbers, but the system administrator
+can assign arbitrary minor numbers using driver module options. The
+major device number remains 81.</para>
+
+ <table id="v4l-dev">
+ <title>V4L Device Types, Names and Numbers</title>
+ <tgroup cols="3">
+ <thead>
+ <row>
+ <entry>Device Type</entry>
+ <entry>File Name</entry>
+ <entry>Minor Numbers</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Video capture and overlay</entry>
+ <entry><para><filename>/dev/video</filename> and
+<filename>/dev/bttv0</filename><footnote> <para>According to
+Documentation/devices.txt these should be symbolic links to
+<filename>/dev/video0</filename>. Note the original bttv interface is
+not compatible with V4L or V4L2.</para> </footnote>,
+<filename>/dev/video0</filename> to
+<filename>/dev/video63</filename></para></entry>
+ <entry>0-63</entry>
+ </row>
+ <row>
+ <entry>Radio receiver</entry>
+ <entry><para><filename>/dev/radio</filename><footnote>
+ <para>According to
+<filename>Documentation/devices.txt</filename> a symbolic link to
+<filename>/dev/radio0</filename>.</para>
+ </footnote>, <filename>/dev/radio0</filename> to
+<filename>/dev/radio63</filename></para></entry>
+ <entry>64-127</entry>
+ </row>
+ <row>
+ <entry>Raw VBI capture</entry>
+ <entry><para><filename>/dev/vbi</filename>,
+<filename>/dev/vbi0</filename> to
+<filename>/dev/vbi31</filename></para></entry>
+ <entry>224-255</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>V4L prohibits (or used to prohibit) multiple opens of a
+device file. V4L2 drivers <emphasis>may</emphasis> support multiple
+opens, see <xref linkend="open" /> for details and consequences.</para>
+
+ <para>V4L drivers respond to V4L2 ioctls with an &EINVAL;.</para>
+ </section>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>The V4L <constant>VIDIOCGCAP</constant> ioctl is
+equivalent to V4L2's &VIDIOC-QUERYCAP;.</para>
+
+ <para>The <structfield>name</structfield> field in struct
+<structname>video_capability</structname> became
+<structfield>card</structfield> in &v4l2-capability;,
+<structfield>type</structfield> was replaced by
+<structfield>capabilities</structfield>. Note V4L2 does not
+distinguish between device types like this, better think of basic
+video input, video output and radio devices supporting a set of
+related functions like video capturing, video overlay and VBI
+capturing. See <xref linkend="open" /> for an
+introduction.<informaltable>
+ <tgroup cols="3">
+ <thead>
+ <row>
+ <entry>struct
+<structname>video_capability</structname>
+<structfield>type</structfield></entry>
+ <entry>&v4l2-capability;
+<structfield>capabilities</structfield> flags</entry>
+ <entry>Purpose</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>VID_TYPE_CAPTURE</constant></entry>
+ <entry><constant>V4L2_CAP_VIDEO_CAPTURE</constant></entry>
+ <entry>The <link linkend="capture">video
+capture</link> interface is supported.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_TUNER</constant></entry>
+ <entry><constant>V4L2_CAP_TUNER</constant></entry>
+ <entry>The device has a <link linkend="tuner">tuner or
+modulator</link>.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_TELETEXT</constant></entry>
+ <entry><constant>V4L2_CAP_VBI_CAPTURE</constant></entry>
+ <entry>The <link linkend="raw-vbi">raw VBI
+capture</link> interface is supported.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_OVERLAY</constant></entry>
+ <entry><constant>V4L2_CAP_VIDEO_OVERLAY</constant></entry>
+ <entry>The <link linkend="overlay">video
+overlay</link> interface is supported.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_CHROMAKEY</constant></entry>
+ <entry><constant>V4L2_FBUF_CAP_CHROMAKEY</constant> in
+field <structfield>capability</structfield> of
+&v4l2-framebuffer;</entry>
+ <entry>Whether chromakey overlay is supported. For
+more information on overlay see
+<xref linkend="overlay" />.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_CLIPPING</constant></entry>
+ <entry><constant>V4L2_FBUF_CAP_LIST_CLIPPING</constant>
+and <constant>V4L2_FBUF_CAP_BITMAP_CLIPPING</constant> in field
+<structfield>capability</structfield> of &v4l2-framebuffer;</entry>
+ <entry>Whether clipping the overlaid image is
+supported, see <xref linkend="overlay" />.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_FRAMERAM</constant></entry>
+ <entry><constant>V4L2_FBUF_CAP_EXTERNOVERLAY</constant>
+<emphasis>not set</emphasis> in field
+<structfield>capability</structfield> of &v4l2-framebuffer;</entry>
+ <entry>Whether overlay overwrites frame buffer memory,
+see <xref linkend="overlay" />.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_SCALES</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>This flag indicates if the hardware can scale
+images. The V4L2 API implies the scale factor by setting the cropping
+dimensions and image size with the &VIDIOC-S-CROP; and &VIDIOC-S-FMT;
+ioctl, respectively. The driver returns the closest sizes possible.
+For more information on cropping and scaling see <xref
+ linkend="crop" />.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_MONOCHROME</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>Applications can enumerate the supported image
+formats with the &VIDIOC-ENUM-FMT; ioctl to determine if the device
+supports grey scale capturing only. For more information on image
+formats see <xref linkend="pixfmt" />.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_SUBCAPTURE</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>Applications can call the &VIDIOC-G-CROP; ioctl
+to determine if the device supports capturing a subsection of the full
+picture ("cropping" in V4L2). If not, the ioctl returns the &EINVAL;.
+For more information on cropping and scaling see <xref
+ linkend="crop" />.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_MPEG_DECODER</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>Applications can enumerate the supported image
+formats with the &VIDIOC-ENUM-FMT; ioctl to determine if the device
+supports MPEG streams.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_MPEG_ENCODER</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>See above.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_MJPEG_DECODER</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>See above.</entry>
+ </row>
+ <row>
+ <entry><constant>VID_TYPE_MJPEG_ENCODER</constant></entry>
+ <entry><constant>-</constant></entry>
+ <entry>See above.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>The <structfield>audios</structfield> field was replaced
+by <structfield>capabilities</structfield> flag
+<constant>V4L2_CAP_AUDIO</constant>, indicating
+<emphasis>if</emphasis> the device has any audio inputs or outputs. To
+determine their number applications can enumerate audio inputs with
+the &VIDIOC-G-AUDIO; ioctl. The audio ioctls are described in <xref
+ linkend="audio" />.</para>
+
+ <para>The <structfield>maxwidth</structfield>,
+<structfield>maxheight</structfield>,
+<structfield>minwidth</structfield> and
+<structfield>minheight</structfield> fields were removed. Calling the
+&VIDIOC-S-FMT; or &VIDIOC-TRY-FMT; ioctl with the desired dimensions
+returns the closest size possible, taking into account the current
+video standard, cropping and scaling limitations.</para>
+ </section>
+
+ <section>
+ <title>Video Sources</title>
+
+ <para>V4L provides the <constant>VIDIOCGCHAN</constant> and
+<constant>VIDIOCSCHAN</constant> ioctl using struct
+<structname>video_channel</structname> to enumerate
+the video inputs of a V4L device. The equivalent V4L2 ioctls
+are &VIDIOC-ENUMINPUT;, &VIDIOC-G-INPUT; and &VIDIOC-S-INPUT;
+using &v4l2-input; as discussed in <xref linkend="video" />.</para>
+
+ <para>The <structfield>channel</structfield> field counting
+inputs was renamed to <structfield>index</structfield>, the video
+input types were renamed as follows: <informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>struct <structname>video_channel</structname>
+<structfield>type</structfield></entry>
+ <entry>&v4l2-input;
+<structfield>type</structfield></entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>VIDEO_TYPE_TV</constant></entry>
+ <entry><constant>V4L2_INPUT_TYPE_TUNER</constant></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_TYPE_CAMERA</constant></entry>
+ <entry><constant>V4L2_INPUT_TYPE_CAMERA</constant></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>Unlike the <structfield>tuners</structfield> field
+expressing the number of tuners of this input, V4L2 assumes each video
+input is connected to at most one tuner. However a tuner can have more
+than one input, &ie; RF connectors, and a device can have multiple
+tuners. The index number of the tuner associated with the input, if
+any, is stored in field <structfield>tuner</structfield> of
+&v4l2-input;. Enumeration of tuners is discussed in <xref
+ linkend="tuner" />.</para>
+
+ <para>The redundant <constant>VIDEO_VC_TUNER</constant> flag was
+dropped. Video inputs associated with a tuner are of type
+<constant>V4L2_INPUT_TYPE_TUNER</constant>. The
+<constant>VIDEO_VC_AUDIO</constant> flag was replaced by the
+<structfield>audioset</structfield> field. V4L2 considers devices with
+up to 32 audio inputs. Each set bit in the
+<structfield>audioset</structfield> field represents one audio input
+this video input combines with. For information about audio inputs and
+how to switch between them see <xref linkend="audio" />.</para>
+
+ <para>The <structfield>norm</structfield> field describing the
+supported video standards was replaced by
+<structfield>std</structfield>. The V4L specification mentions a flag
+<constant>VIDEO_VC_NORM</constant> indicating whether the standard can
+be changed. This flag was a later addition together with the
+<structfield>norm</structfield> field and has been removed in the
+meantime. V4L2 has a similar, albeit more comprehensive approach
+to video standards, see <xref linkend="standard" /> for more
+information.</para>
+ </section>
+
+ <section>
+ <title>Tuning</title>
+
+ <para>The V4L <constant>VIDIOCGTUNER</constant> and
+<constant>VIDIOCSTUNER</constant> ioctl and struct
+<structname>video_tuner</structname> can be used to enumerate the
+tuners of a V4L TV or radio device. The equivalent V4L2 ioctls are
+&VIDIOC-G-TUNER; and &VIDIOC-S-TUNER; using &v4l2-tuner;. Tuners are
+covered in <xref linkend="tuner" />.</para>
+
+ <para>The <structfield>tuner</structfield> field counting tuners
+was renamed to <structfield>index</structfield>. The fields
+<structfield>name</structfield>, <structfield>rangelow</structfield>
+and <structfield>rangehigh</structfield> remained unchanged.</para>
+
+ <para>The <constant>VIDEO_TUNER_PAL</constant>,
+<constant>VIDEO_TUNER_NTSC</constant> and
+<constant>VIDEO_TUNER_SECAM</constant> flags indicating the supported
+video standards were dropped. This information is now contained in the
+associated &v4l2-input;. No replacement exists for the
+<constant>VIDEO_TUNER_NORM</constant> flag indicating whether the
+video standard can be switched. The <structfield>mode</structfield>
+field to select a different video standard was replaced by a whole new
+set of ioctls and structures described in <xref linkend="standard" />.
+Due to its ubiquity it should be mentioned the BTTV driver supports
+several standards in addition to the regular
+<constant>VIDEO_MODE_PAL</constant> (0),
+<constant>VIDEO_MODE_NTSC</constant>,
+<constant>VIDEO_MODE_SECAM</constant> and
+<constant>VIDEO_MODE_AUTO</constant> (3). Namely N/PAL Argentina,
+M/PAL, N/PAL, and NTSC Japan with numbers 3-6 (sic).</para>
+
+ <para>The <constant>VIDEO_TUNER_STEREO_ON</constant> flag
+indicating stereo reception became
+<constant>V4L2_TUNER_SUB_STEREO</constant> in field
+<structfield>rxsubchans</structfield>. This field also permits the
+detection of monaural and bilingual audio, see the definition of
+&v4l2-tuner; for details. Presently no replacement exists for the
+<constant>VIDEO_TUNER_RDS_ON</constant> and
+<constant>VIDEO_TUNER_MBS_ON</constant> flags.</para>
+
+ <para> The <constant>VIDEO_TUNER_LOW</constant> flag was renamed
+to <constant>V4L2_TUNER_CAP_LOW</constant> in the &v4l2-tuner;
+<structfield>capability</structfield> field.</para>
+
+ <para>The <constant>VIDIOCGFREQ</constant> and
+<constant>VIDIOCSFREQ</constant> ioctl to change the tuner frequency
+where renamed to &VIDIOC-G-FREQUENCY; and &VIDIOC-S-FREQUENCY;. They
+take a pointer to a &v4l2-frequency; instead of an unsigned long
+integer.</para>
+ </section>
+
+ <section id="v4l-image-properties">
+ <title>Image Properties</title>
+
+ <para>V4L2 has no equivalent of the
+<constant>VIDIOCGPICT</constant> and <constant>VIDIOCSPICT</constant>
+ioctl and struct <structname>video_picture</structname>. The following
+fields where replaced by V4L2 controls accessible with the
+&VIDIOC-QUERYCTRL;, &VIDIOC-G-CTRL; and &VIDIOC-S-CTRL; ioctls:<informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>struct <structname>video_picture</structname></entry>
+ <entry>V4L2 Control ID</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><structfield>brightness</structfield></entry>
+ <entry><constant>V4L2_CID_BRIGHTNESS</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>hue</structfield></entry>
+ <entry><constant>V4L2_CID_HUE</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>colour</structfield></entry>
+ <entry><constant>V4L2_CID_SATURATION</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>contrast</structfield></entry>
+ <entry><constant>V4L2_CID_CONTRAST</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>whiteness</structfield></entry>
+ <entry><constant>V4L2_CID_WHITENESS</constant></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>The V4L picture controls are assumed to range from 0 to
+65535 with no particular reset value. The V4L2 API permits arbitrary
+limits and defaults which can be queried with the &VIDIOC-QUERYCTRL;
+ioctl. For general information about controls see <xref
+linkend="control" />.</para>
+
+ <para>The <structfield>depth</structfield> (average number of
+bits per pixel) of a video image is implied by the selected image
+format. V4L2 does not explicitly provide such information assuming
+applications recognizing the format are aware of the image depth and
+others need not know. The <structfield>palette</structfield> field
+moved into the &v4l2-pix-format;:<informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>struct <structname>video_picture</structname>
+<structfield>palette</structfield></entry>
+ <entry>&v4l2-pix-format;
+<structfield>pixfmt</structfield></entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>VIDEO_PALETTE_GREY</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-GREY"><constant>V4L2_PIX_FMT_GREY</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_HI240</constant></entry>
+ <entry><para><link
+linkend="pixfmt-reserved"><constant>V4L2_PIX_FMT_HI240</constant></link><footnote>
+ <para>This is a custom format used by the BTTV
+driver, not one of the V4L2 standard formats.</para>
+ </footnote></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_RGB565</constant></entry>
+ <entry><para><link
+linkend="pixfmt-rgb"><constant>V4L2_PIX_FMT_RGB565</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_RGB555</constant></entry>
+ <entry><para><link
+linkend="pixfmt-rgb"><constant>V4L2_PIX_FMT_RGB555</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_RGB24</constant></entry>
+ <entry><para><link
+linkend="pixfmt-rgb"><constant>V4L2_PIX_FMT_BGR24</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_RGB32</constant></entry>
+ <entry><para><link
+linkend="pixfmt-rgb"><constant>V4L2_PIX_FMT_BGR32</constant></link><footnote>
+ <para>Presumably all V4L RGB formats are
+little-endian, although some drivers might interpret them according to machine endianness. V4L2 defines little-endian, big-endian and red/blue
+swapped variants. For details see <xref linkend="pixfmt-rgb" />.</para>
+ </footnote></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV422</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-YUYV"><constant>V4L2_PIX_FMT_YUYV</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><para><constant>VIDEO_PALETTE_YUYV</constant><footnote>
+ <para><constant>VIDEO_PALETTE_YUV422</constant>
+and <constant>VIDEO_PALETTE_YUYV</constant> are the same formats. Some
+V4L drivers respond to one, some to the other.</para>
+ </footnote></para></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-YUYV"><constant>V4L2_PIX_FMT_YUYV</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_UYVY</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-UYVY"><constant>V4L2_PIX_FMT_UYVY</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV420</constant></entry>
+ <entry>None</entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV411</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-Y41P"><constant>V4L2_PIX_FMT_Y41P</constant></link><footnote>
+ <para>Not to be confused with
+<constant>V4L2_PIX_FMT_YUV411P</constant>, which is a planar
+format.</para> </footnote></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_RAW</constant></entry>
+ <entry><para>None<footnote> <para>V4L explains this
+as: "RAW capture (BT848)"</para> </footnote></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV422P</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-YUV422P"><constant>V4L2_PIX_FMT_YUV422P</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV411P</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-YUV411P"><constant>V4L2_PIX_FMT_YUV411P</constant></link><footnote>
+ <para>Not to be confused with
+<constant>V4L2_PIX_FMT_Y41P</constant>, which is a packed
+format.</para> </footnote></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV420P</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-YVU420"><constant>V4L2_PIX_FMT_YVU420</constant></link></para></entry>
+ </row>
+ <row>
+ <entry><constant>VIDEO_PALETTE_YUV410P</constant></entry>
+ <entry><para><link
+linkend="V4L2-PIX-FMT-YVU410"><constant>V4L2_PIX_FMT_YVU410</constant></link></para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>V4L2 image formats are defined in <xref
+linkend="pixfmt" />. The image format can be selected with the
+&VIDIOC-S-FMT; ioctl.</para>
+ </section>
+
+ <section>
+ <title>Audio</title>
+
+ <para>The <constant>VIDIOCGAUDIO</constant> and
+<constant>VIDIOCSAUDIO</constant> ioctl and struct
+<structname>video_audio</structname> are used to enumerate the
+audio inputs of a V4L device. The equivalent V4L2 ioctls are
+&VIDIOC-G-AUDIO; and &VIDIOC-S-AUDIO; using &v4l2-audio; as
+discussed in <xref linkend="audio" />.</para>
+
+ <para>The <structfield>audio</structfield> "channel number"
+field counting audio inputs was renamed to
+<structfield>index</structfield>.</para>
+
+ <para>On <constant>VIDIOCSAUDIO</constant> the
+<structfield>mode</structfield> field selects <emphasis>one</emphasis>
+of the <constant>VIDEO_SOUND_MONO</constant>,
+<constant>VIDEO_SOUND_STEREO</constant>,
+<constant>VIDEO_SOUND_LANG1</constant> or
+<constant>VIDEO_SOUND_LANG2</constant> audio demodulation modes. When
+the current audio standard is BTSC
+<constant>VIDEO_SOUND_LANG2</constant> refers to SAP and
+<constant>VIDEO_SOUND_LANG1</constant> is meaningless. Also
+undocumented in the V4L specification, there is no way to query the
+selected mode. On <constant>VIDIOCGAUDIO</constant> the driver returns
+the <emphasis>actually received</emphasis> audio programmes in this
+field. In the V4L2 API this information is stored in the &v4l2-tuner;
+<structfield>rxsubchans</structfield> and
+<structfield>audmode</structfield> fields, respectively. See <xref
+linkend="tuner" /> for more information on tuners. Related to audio
+modes &v4l2-audio; also reports if this is a mono or stereo
+input, regardless if the source is a tuner.</para>
+
+ <para>The following fields where replaced by V4L2 controls
+accessible with the &VIDIOC-QUERYCTRL;, &VIDIOC-G-CTRL; and
+&VIDIOC-S-CTRL; ioctls:<informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>struct
+<structname>video_audio</structname></entry>
+ <entry>V4L2 Control ID</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><structfield>volume</structfield></entry>
+ <entry><constant>V4L2_CID_AUDIO_VOLUME</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>bass</structfield></entry>
+ <entry><constant>V4L2_CID_AUDIO_BASS</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>treble</structfield></entry>
+ <entry><constant>V4L2_CID_AUDIO_TREBLE</constant></entry>
+ </row>
+ <row>
+ <entry><structfield>balance</structfield></entry>
+ <entry><constant>V4L2_CID_AUDIO_BALANCE</constant></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>To determine which of these controls are supported by a
+driver V4L provides the <structfield>flags</structfield>
+<constant>VIDEO_AUDIO_VOLUME</constant>,
+<constant>VIDEO_AUDIO_BASS</constant>,
+<constant>VIDEO_AUDIO_TREBLE</constant> and
+<constant>VIDEO_AUDIO_BALANCE</constant>. In the V4L2 API the
+&VIDIOC-QUERYCTRL; ioctl reports if the respective control is
+supported. Accordingly the <constant>VIDEO_AUDIO_MUTABLE</constant>
+and <constant>VIDEO_AUDIO_MUTE</constant> flags where replaced by the
+boolean <constant>V4L2_CID_AUDIO_MUTE</constant> control.</para>
+
+ <para>All V4L2 controls have a <structfield>step</structfield>
+attribute replacing the struct <structname>video_audio</structname>
+<structfield>step</structfield> field. The V4L audio controls are
+assumed to range from 0 to 65535 with no particular reset value. The
+V4L2 API permits arbitrary limits and defaults which can be queried
+with the &VIDIOC-QUERYCTRL; ioctl. For general information about
+controls see <xref linkend="control" />.</para>
+ </section>
+
+ <section>
+ <title>Frame Buffer Overlay</title>
+
+ <para>The V4L2 ioctls equivalent to
+<constant>VIDIOCGFBUF</constant> and <constant>VIDIOCSFBUF</constant>
+are &VIDIOC-G-FBUF; and &VIDIOC-S-FBUF;. The
+<structfield>base</structfield> field of struct
+<structname>video_buffer</structname> remained unchanged, except V4L2
+defines a flag to indicate non-destructive overlays instead of a
+<constant>NULL</constant> pointer. All other fields moved into the
+&v4l2-pix-format; <structfield>fmt</structfield> substructure of
+&v4l2-framebuffer;. The <structfield>depth</structfield> field was
+replaced by <structfield>pixelformat</structfield>. See <xref
+ linkend="pixfmt-rgb" /> for a list of RGB formats and their
+respective color depths.</para>
+
+ <para>Instead of the special ioctls
+<constant>VIDIOCGWIN</constant> and <constant>VIDIOCSWIN</constant>
+V4L2 uses the general-purpose data format negotiation ioctls
+&VIDIOC-G-FMT; and &VIDIOC-S-FMT;. They take a pointer to a
+&v4l2-format; as argument. Here the <structfield>win</structfield>
+member of the <structfield>fmt</structfield> union is used, a
+&v4l2-window;.</para>
+
+ <para>The <structfield>x</structfield>,
+<structfield>y</structfield>, <structfield>width</structfield> and
+<structfield>height</structfield> fields of struct
+<structname>video_window</structname> moved into &v4l2-rect;
+substructure <structfield>w</structfield> of struct
+<structname>v4l2_window</structname>. The
+<structfield>chromakey</structfield>,
+<structfield>clips</structfield>, and
+<structfield>clipcount</structfield> fields remained unchanged. Struct
+<structname>video_clip</structname> was renamed to &v4l2-clip;, also
+containing a struct <structname>v4l2_rect</structname>, but the
+semantics are still the same.</para>
+
+ <para>The <constant>VIDEO_WINDOW_INTERLACE</constant> flag was
+dropped. Instead applications must set the
+<structfield>field</structfield> field to
+<constant>V4L2_FIELD_ANY</constant> or
+<constant>V4L2_FIELD_INTERLACED</constant>. The
+<constant>VIDEO_WINDOW_CHROMAKEY</constant> flag moved into
+&v4l2-framebuffer;, under the new name
+<constant>V4L2_FBUF_FLAG_CHROMAKEY</constant>.</para>
+
+ <para>In V4L, storing a bitmap pointer in
+<structfield>clips</structfield> and setting
+<structfield>clipcount</structfield> to
+<constant>VIDEO_CLIP_BITMAP</constant> (-1) requests bitmap
+clipping, using a fixed size bitmap of 1024 &times; 625 bits. Struct
+<structname>v4l2_window</structname> has a separate
+<structfield>bitmap</structfield> pointer field for this purpose and
+the bitmap size is determined by <structfield>w.width</structfield> and
+<structfield>w.height</structfield>.</para>
+
+ <para>The <constant>VIDIOCCAPTURE</constant> ioctl to enable or
+disable overlay was renamed to &VIDIOC-OVERLAY;.</para>
+ </section>
+
+ <section>
+ <title>Cropping</title>
+
+ <para>To capture only a subsection of the full picture V4L
+defines the <constant>VIDIOCGCAPTURE</constant> and
+<constant>VIDIOCSCAPTURE</constant> ioctls using struct
+<structname>video_capture</structname>. The equivalent V4L2 ioctls are
+&VIDIOC-G-CROP; and &VIDIOC-S-CROP; using &v4l2-crop;, and the related
+&VIDIOC-CROPCAP; ioctl. This is a rather complex matter, see
+<xref linkend="crop" /> for details.</para>
+
+ <para>The <structfield>x</structfield>,
+<structfield>y</structfield>, <structfield>width</structfield> and
+<structfield>height</structfield> fields moved into &v4l2-rect;
+substructure <structfield>c</structfield> of struct
+<structname>v4l2_crop</structname>. The
+<structfield>decimation</structfield> field was dropped. In the V4L2
+API the scaling factor is implied by the size of the cropping
+rectangle and the size of the captured or overlaid image.</para>
+
+ <para>The <constant>VIDEO_CAPTURE_ODD</constant>
+and <constant>VIDEO_CAPTURE_EVEN</constant> flags to capture only the
+odd or even field, respectively, were replaced by
+<constant>V4L2_FIELD_TOP</constant> and
+<constant>V4L2_FIELD_BOTTOM</constant> in the field named
+<structfield>field</structfield> of &v4l2-pix-format; and
+&v4l2-window;. These structures are used to select a capture or
+overlay format with the &VIDIOC-S-FMT; ioctl.</para>
+ </section>
+
+ <section>
+ <title>Reading Images, Memory Mapping</title>
+
+ <section>
+ <title>Capturing using the read method</title>
+
+ <para>There is no essential difference between reading images
+from a V4L or V4L2 device using the &func-read; function, however V4L2
+drivers are not required to support this I/O method. Applications can
+determine if the function is available with the &VIDIOC-QUERYCAP;
+ioctl. All V4L2 devices exchanging data with applications must support
+the &func-select; and &func-poll; functions.</para>
+
+ <para>To select an image format and size, V4L provides the
+<constant>VIDIOCSPICT</constant> and <constant>VIDIOCSWIN</constant>
+ioctls. V4L2 uses the general-purpose data format negotiation ioctls
+&VIDIOC-G-FMT; and &VIDIOC-S-FMT;. They take a pointer to a
+&v4l2-format; as argument, here the &v4l2-pix-format; named
+<structfield>pix</structfield> of its <structfield>fmt</structfield>
+union is used.</para>
+
+ <para>For more information about the V4L2 read interface see
+<xref linkend="rw" />.</para>
+ </section>
+ <section>
+ <title>Capturing using memory mapping</title>
+
+ <para>Applications can read from V4L devices by mapping
+buffers in device memory, or more often just buffers allocated in
+DMA-able system memory, into their address space. This avoids the data
+copying overhead of the read method. V4L2 supports memory mapping as
+well, with a few differences.</para>
+
+ <informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>V4L</entry>
+ <entry>V4L2</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>The image format must be selected before
+buffers are allocated, with the &VIDIOC-S-FMT; ioctl. When no format
+is selected the driver may use the last, possibly by another
+application requested format.</entry>
+ </row>
+ <row>
+ <entry><para>Applications cannot change the number of
+buffers. The it is built into the driver, unless it has a module
+option to change the number when the driver module is
+loaded.</para></entry>
+ <entry><para>The &VIDIOC-REQBUFS; ioctl allocates the
+desired number of buffers, this is a required step in the initialization
+sequence.</para></entry>
+ </row>
+ <row>
+ <entry><para>Drivers map all buffers as one contiguous
+range of memory. The <constant>VIDIOCGMBUF</constant> ioctl is
+available to query the number of buffers, the offset of each buffer
+from the start of the virtual file, and the overall amount of memory
+used, which can be used as arguments for the &func-mmap;
+function.</para></entry>
+ <entry><para>Buffers are individually mapped. The
+offset and size of each buffer can be determined with the
+&VIDIOC-QUERYBUF; ioctl.</para></entry>
+ </row>
+ <row>
+ <entry><para>The <constant>VIDIOCMCAPTURE</constant>
+ioctl prepares a buffer for capturing. It also determines the image
+format for this buffer. The ioctl returns immediately, eventually with
+an &EAGAIN; if no video signal had been detected. When the driver
+supports more than one buffer applications can call the ioctl multiple
+times and thus have multiple outstanding capture
+requests.</para><para>The <constant>VIDIOCSYNC</constant> ioctl
+suspends execution until a particular buffer has been
+filled.</para></entry>
+ <entry><para>Drivers maintain an incoming and outgoing
+queue. &VIDIOC-QBUF; enqueues any empty buffer into the incoming
+queue. Filled buffers are dequeued from the outgoing queue with the
+&VIDIOC-DQBUF; ioctl. To wait until filled buffers become available this
+function, &func-select; or &func-poll; can be used. The
+&VIDIOC-STREAMON; ioctl must be called once after enqueuing one or
+more buffers to start capturing. Its counterpart
+&VIDIOC-STREAMOFF; stops capturing and dequeues all buffers from both
+queues. Applications can query the signal status, if known, with the
+&VIDIOC-ENUMINPUT; ioctl.</para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+
+ <para>For a more in-depth discussion of memory mapping and
+examples, see <xref linkend="mmap" />.</para>
+ </section>
+ </section>
+
+ <section>
+ <title>Reading Raw VBI Data</title>
+
+ <para>Originally the V4L API did not specify a raw VBI capture
+interface, only the device file <filename>/dev/vbi</filename> was
+reserved for this purpose. The only driver supporting this interface
+was the BTTV driver, de-facto defining the V4L VBI interface. Reading
+from the device yields a raw VBI image with the following
+parameters:<informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>&v4l2-vbi-format;</entry>
+ <entry>V4L, BTTV driver</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>sampling_rate</entry>
+ <entry>28636363&nbsp;Hz NTSC (or any other 525-line
+standard); 35468950&nbsp;Hz PAL and SECAM (625-line standards)</entry>
+ </row>
+ <row>
+ <entry>offset</entry>
+ <entry>?</entry>
+ </row>
+ <row>
+ <entry>samples_per_line</entry>
+ <entry>2048</entry>
+ </row>
+ <row>
+ <entry>sample_format</entry>
+ <entry>V4L2_PIX_FMT_GREY. The last four bytes (a
+machine endianness integer) contain a frame counter.</entry>
+ </row>
+ <row>
+ <entry>start[]</entry>
+ <entry>10, 273 NTSC; 22, 335 PAL and SECAM</entry>
+ </row>
+ <row>
+ <entry>count[]</entry>
+ <entry><para>16, 16<footnote><para>Old driver
+versions used different values, eventually the custom
+<constant>BTTV_VBISIZE</constant> ioctl was added to query the
+correct values.</para></footnote></para></entry>
+ </row>
+ <row>
+ <entry>flags</entry>
+ <entry>0</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>Undocumented in the V4L specification, in Linux 2.3 the
+<constant>VIDIOCGVBIFMT</constant> and
+<constant>VIDIOCSVBIFMT</constant> ioctls using struct
+<structname>vbi_format</structname> were added to determine the VBI
+image parameters. These ioctls are only partially compatible with the
+V4L2 VBI interface specified in <xref linkend="raw-vbi" />.</para>
+
+ <para>An <structfield>offset</structfield> field does not
+exist, <structfield>sample_format</structfield> is supposed to be
+<constant>VIDEO_PALETTE_RAW</constant>, equivalent to
+<constant>V4L2_PIX_FMT_GREY</constant>. The remaining fields are
+probably equivalent to &v4l2-vbi-format;.</para>
+
+ <para>Apparently only the Zoran (ZR 36120) driver implements
+these ioctls. The semantics differ from those specified for V4L2 in two
+ways. The parameters are reset on &func-open; and
+<constant>VIDIOCSVBIFMT</constant> always returns an &EINVAL; if the
+parameters are invalid.</para>
+ </section>
+
+ <section>
+ <title>Miscellaneous</title>
+
+ <para>V4L2 has no equivalent of the
+<constant>VIDIOCGUNIT</constant> ioctl. Applications can find the VBI
+device associated with a video capture device (or vice versa) by
+reopening the device and requesting VBI data. For details see
+<xref linkend="open" />.</para>
+
+ <para>No replacement exists for <constant>VIDIOCKEY</constant>,
+and the V4L functions for microcode programming. A new interface for
+MPEG compression and playback devices is documented in <xref
+ linkend="extended-controls" />.</para>
+ </section>
+
+ </section>
+
+ <section id="hist-v4l2">
+ <title>Changes of the V4L2 API</title>
+
+ <para>Soon after the V4L API was added to the kernel it was
+criticised as too inflexible. In August 1998 Bill Dirks proposed a
+number of improvements and began to work on documentation, example
+drivers and applications. With the help of other volunteers this
+eventually became the V4L2 API, not just an extension but a
+replacement for the V4L API. However it took another four years and
+two stable kernel releases until the new API was finally accepted for
+inclusion into the kernel in its present form.</para>
+
+ <section>
+ <title>Early Versions</title>
+ <para>1998-08-20: First version.</para>
+
+ <para>1998-08-27: The &func-select; function was introduced.</para>
+
+ <para>1998-09-10: New video standard interface.</para>
+
+ <para>1998-09-18: The <constant>VIDIOC_NONCAP</constant> ioctl
+was replaced by the otherwise meaningless <constant>O_TRUNC</constant>
+&func-open; flag, and the aliases <constant>O_NONCAP</constant> and
+<constant>O_NOIO</constant> were defined. Applications can set this
+flag if they intend to access controls only, as opposed to capture
+applications which need exclusive access. The
+<constant>VIDEO_STD_XXX</constant> identifiers are now ordinals
+instead of flags, and the <function>video_std_construct()</function>
+helper function takes id and transmission arguments.</para>
+
+ <para>1998-09-28: Revamped video standard. Made video controls
+individually enumerable.</para>
+
+ <para>1998-10-02: The <structfield>id</structfield> field was
+removed from struct <structname>video_standard</structname> and the
+color subcarrier fields were renamed. The &VIDIOC-QUERYSTD; ioctl was
+renamed to &VIDIOC-ENUMSTD;, &VIDIOC-G-INPUT; to &VIDIOC-ENUMINPUT;. A
+first draft of the Codec API was released.</para>
+
+ <para>1998-11-08: Many minor changes. Most symbols have been
+renamed. Some material changes to &v4l2-capability;.</para>
+
+ <para>1998-11-12: The read/write directon of some ioctls was misdefined.</para>
+
+ <para>1998-11-14: <constant>V4L2_PIX_FMT_RGB24</constant>
+changed to <constant>V4L2_PIX_FMT_BGR24</constant>, and
+<constant>V4L2_PIX_FMT_RGB32</constant> changed to
+<constant>V4L2_PIX_FMT_BGR32</constant>. Audio controls are now
+accessible with the &VIDIOC-G-CTRL; and &VIDIOC-S-CTRL; ioctls under
+names starting with <constant>V4L2_CID_AUDIO</constant>. The
+<constant>V4L2_MAJOR</constant> define was removed from
+<filename>videodev.h</filename> since it was only used once in the
+<filename>videodev</filename> kernel module. The
+<constant>YUV422</constant> and <constant>YUV411</constant> planar
+image formats were added.</para>
+
+ <para>1998-11-28: A few ioctl symbols changed. Interfaces for codecs and
+video output devices were added.</para>
+
+ <para>1999-01-14: A raw VBI capture interface was added.</para>
+
+ <para>1999-01-19: The <constant>VIDIOC_NEXTBUF</constant> ioctl
+ was removed.</para>
+ </section>
+
+ <section>
+ <title>V4L2 Version 0.16 1999-01-31</title>
+ <para>1999-01-27: There is now one QBUF ioctl, VIDIOC_QWBUF and VIDIOC_QRBUF
+are gone. VIDIOC_QBUF takes a v4l2_buffer as a parameter. Added
+digital zoom (cropping) controls.</para>
+ </section>
+
+ <!-- Where's 0.17? mhs couldn't find that videodev.h, perhaps Bill
+ forgot to bump the version number or never released it. -->
+
+ <section>
+ <title>V4L2 Version 0.18 1999-03-16</title>
+ <para>Added a v4l to V4L2 ioctl compatibility layer to
+videodev.c. Driver writers, this changes how you implement your ioctl
+handler. See the Driver Writer's Guide. Added some more control id
+codes.</para>
+ </section>
+
+ <section>
+ <title>V4L2 Version 0.19 1999-06-05</title>
+ <para>1999-03-18: Fill in the category and catname fields of
+v4l2_queryctrl objects before passing them to the driver. Required a
+minor change to the VIDIOC_QUERYCTRL handlers in the sample
+drivers.</para>
+ <para>1999-03-31: Better compatibility for v4l memory capture
+ioctls. Requires changes to drivers to fully support new compatibility
+features, see Driver Writer's Guide and v4l2cap.c. Added new control
+IDs: V4L2_CID_HFLIP, _VFLIP. Changed V4L2_PIX_FMT_YUV422P to _YUV422P,
+and _YUV411P to _YUV411P.</para>
+ <para>1999-04-04: Added a few more control IDs.</para>
+ <para>1999-04-07: Added the button control type.</para>
+ <para>1999-05-02: Fixed a typo in videodev.h, and added the
+V4L2_CTRL_FLAG_GRAYED (later V4L2_CTRL_FLAG_GRABBED) flag.</para>
+ <para>1999-05-20: Definition of VIDIOC_G_CTRL was wrong causing
+a malfunction of this ioctl.</para>
+ <para>1999-06-05: Changed the value of
+V4L2_CID_WHITENESS.</para>
+ </section>
+
+ <section>
+ <title>V4L2 Version 0.20 (1999-09-10)</title>
+
+ <para>Version 0.20 introduced a number of changes which were
+<emphasis>not backward compatible</emphasis> with 0.19 and earlier
+versions. Purpose of these changes was to simplify the API, while
+making it more extensible and following common Linux driver API
+conventions.</para>
+
+ <orderedlist>
+ <listitem>
+ <para>Some typos in <constant>V4L2_FMT_FLAG</constant>
+symbols were fixed. &v4l2-clip; was changed for compatibility with
+v4l. (1999-08-30)</para>
+ </listitem>
+
+ <listitem>
+ <para><constant>V4L2_TUNER_SUB_LANG1</constant> was added.
+(1999-09-05)</para>
+ </listitem>
+
+ <listitem>
+ <para>All ioctl() commands that used an integer argument now
+take a pointer to an integer. Where it makes sense, ioctls will return
+the actual new value in the integer pointed to by the argument, a
+common convention in the V4L2 API. The affected ioctls are:
+VIDIOC_PREVIEW, VIDIOC_STREAMON, VIDIOC_STREAMOFF, VIDIOC_S_FREQ,
+VIDIOC_S_INPUT, VIDIOC_S_OUTPUT, VIDIOC_S_EFFECT. For example
+<programlisting>
+err = ioctl (fd, VIDIOC_XXX, V4L2_XXX);
+</programlisting> becomes <programlisting>
+int a = V4L2_XXX; err = ioctl(fd, VIDIOC_XXX, &amp;a);
+</programlisting>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>All the different get- and set-format commands were
+swept into one &VIDIOC-G-FMT; and &VIDIOC-S-FMT; ioctl taking a union
+and a type field selecting the union member as parameter. Purpose is to
+simplify the API by eliminating several ioctls and to allow new and
+driver private data streams without adding new ioctls.</para>
+
+ <para>This change obsoletes the following ioctls:
+<constant>VIDIOC_S_INFMT</constant>,
+<constant>VIDIOC_G_INFMT</constant>,
+<constant>VIDIOC_S_OUTFMT</constant>,
+<constant>VIDIOC_G_OUTFMT</constant>,
+<constant>VIDIOC_S_VBIFMT</constant> and
+<constant>VIDIOC_G_VBIFMT</constant>. The image format structure
+<structname>v4l2_format</structname> was renamed to &v4l2-pix-format;,
+while &v4l2-format; is now the envelopping structure for all format
+negotiations.</para>
+ </listitem>
+
+ <listitem>
+ <para>Similar to the changes above, the
+<constant>VIDIOC_G_PARM</constant> and
+<constant>VIDIOC_S_PARM</constant> ioctls were merged with
+<constant>VIDIOC_G_OUTPARM</constant> and
+<constant>VIDIOC_S_OUTPARM</constant>. A
+<structfield>type</structfield> field in the new &v4l2-streamparm;
+selects the respective union member.</para>
+
+ <para>This change obsoletes the
+<constant>VIDIOC_G_OUTPARM</constant> and
+<constant>VIDIOC_S_OUTPARM</constant> ioctls.</para>
+ </listitem>
+
+ <listitem>
+ <para>Control enumeration was simplified, and two new
+control flags were introduced and one dropped. The
+<structfield>catname</structfield> field was replaced by a
+<structfield>group</structfield> field.</para>
+
+ <para>Drivers can now flag unsupported and temporarily
+unavailable controls with <constant>V4L2_CTRL_FLAG_DISABLED</constant>
+and <constant>V4L2_CTRL_FLAG_GRABBED</constant> respectively. The
+<structfield>group</structfield> name indicates a possibly narrower
+classification than the <structfield>category</structfield>. In other
+words, there may be multiple groups within a category. Controls within
+a group would typically be drawn within a group box. Controls in
+different categories might have a greater separation, or may even
+appear in separate windows.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &v4l2-buffer; <structfield>timestamp</structfield>
+was changed to a 64 bit integer, containing the sampling or output
+time of the frame in nanoseconds. Additionally timestamps will be in
+absolute system time, not starting from zero at the beginning of a
+stream. The data type name for timestamps is stamp_t, defined as a
+signed 64-bit integer. Output devices should not send a buffer out
+until the time in the timestamp field has arrived. I would like to
+follow SGI's lead, and adopt a multimedia timestamping system like
+their UST (Unadjusted System Time). See
+http://web.archive.org/web/*/http://reality.sgi.com
+/cpirazzi_engr/lg/time/intro.html.
+UST uses timestamps that are 64-bit signed integers
+(not struct timeval's) and given in nanosecond units. The UST clock
+starts at zero when the system is booted and runs continuously and
+uniformly. It takes a little over 292 years for UST to overflow. There
+is no way to set the UST clock. The regular Linux time-of-day clock
+can be changed periodically, which would cause errors if it were being
+used for timestamping a multimedia stream. A real UST style clock will
+require some support in the kernel that is not there yet. But in
+anticipation, I will change the timestamp field to a 64-bit integer,
+and I will change the v4l2_masterclock_gettime() function (used only
+by drivers) to return a 64-bit integer.</para>
+ </listitem>
+
+ <listitem>
+ <para>A <structfield>sequence</structfield> field was added
+to &v4l2-buffer;. The <structfield>sequence</structfield> field counts
+captured frames, it is ignored by output devices. When a capture
+driver drops a frame, the sequence number of that frame is
+skipped.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 Version 0.20 incremental changes</title>
+ <!-- Version number didn't change anymore, reason unknown. -->
+
+ <para>1999-12-23: In &v4l2-vbi-format; the
+<structfield>reserved1</structfield> field became
+<structfield>offset</structfield>. Previously drivers were required to
+clear the <structfield>reserved1</structfield> field.</para>
+
+ <para>2000-01-13: The
+ <constant>V4L2_FMT_FLAG_NOT_INTERLACED</constant> flag was added.</para>
+
+ <para>2000-07-31: The <filename>linux/poll.h</filename> header
+is now included by <filename>videodev.h</filename> for compatibility
+with the original <filename>videodev.h</filename> file.</para>
+
+ <para>2000-11-20: <constant>V4L2_TYPE_VBI_OUTPUT</constant> and
+<constant>V4L2_PIX_FMT_Y41P</constant> were added.</para>
+
+ <para>2000-11-25: <constant>V4L2_TYPE_VBI_INPUT</constant> was
+added.</para>
+
+ <para>2000-12-04: A couple typos in symbol names were fixed.</para>
+
+ <para>2001-01-18: To avoid namespace conflicts the
+<constant>fourcc</constant> macro defined in the
+<filename>videodev.h</filename> header file was renamed to
+<constant>v4l2_fourcc</constant>.</para>
+
+ <para>2001-01-25: A possible driver-level compatibility problem
+between the <filename>videodev.h</filename> file in Linux 2.4.0 and
+the <filename>videodev.h</filename> file included in the
+<filename>videodevX</filename> patch was fixed. Users of an earlier
+version of <filename>videodevX</filename> on Linux 2.4.0 should
+recompile their V4L and V4L2 drivers.</para>
+
+ <para>2001-01-26: A possible kernel-level incompatibility
+between the <filename>videodev.h</filename> file in the
+<filename>videodevX</filename> patch and the
+<filename>videodev.h</filename> file in Linux 2.2.x with devfs patches
+applied was fixed.</para>
+
+ <para>2001-03-02: Certain V4L ioctls which pass data in both
+direction although they are defined with read-only parameter, did not
+work correctly through the backward compatibility layer.
+[Solution?]</para>
+
+ <para>2001-04-13: Big endian 16-bit RGB formats were added.</para>
+
+ <para>2001-09-17: New YUV formats and the &VIDIOC-G-FREQUENCY; and
+&VIDIOC-S-FREQUENCY; ioctls were added. (The old
+<constant>VIDIOC_G_FREQ</constant> and
+<constant>VIDIOC_S_FREQ</constant> ioctls did not take multiple tuners
+into account.)</para>
+
+ <para>2000-09-18: <constant>V4L2_BUF_TYPE_VBI</constant> was
+added. This may <emphasis>break compatibility</emphasis> as the
+&VIDIOC-G-FMT; and &VIDIOC-S-FMT; ioctls may fail now if the struct
+<structname>v4l2_fmt</structname> <structfield>type</structfield>
+field does not contain <constant>V4L2_BUF_TYPE_VBI</constant>. In the
+documentation of the &v4l2-vbi-format;
+<structfield>offset</structfield> field the ambiguous phrase "rising
+edge" was changed to "leading edge".</para>
+ </section>
+
+ <section>
+ <title>V4L2 Version 0.20 2000-11-23</title>
+
+ <para>A number of changes were made to the raw VBI
+interface.</para>
+
+ <orderedlist>
+ <listitem>
+ <para>Figures clarifying the line numbering scheme were
+added to the V4L2 API specification. The
+<structfield>start</structfield>[0] and
+<structfield>start</structfield>[1] fields no longer count line
+numbers beginning at zero. Rationale: a) The previous definition was
+unclear. b) The <structfield>start</structfield>[] values are ordinal
+numbers. c) There is no point in inventing a new line numbering
+scheme. We now use line number as defined by ITU-R, period.
+Compatibility: Add one to the start values. Applications depending on
+the previous semantics may not function correctly.</para>
+ </listitem>
+
+ <listitem>
+ <para>The restriction "count[0] &gt; 0 and count[1] &gt; 0"
+has been relaxed to "(count[0] + count[1]) &gt; 0". Rationale:
+Drivers may allocate resources at scan line granularity and some data
+services are transmitted only on the first field. The comment that
+both <structfield>count</structfield> values will usually be equal is
+misleading and pointless and has been removed. This change
+<emphasis>breaks compatibility</emphasis> with earlier versions:
+Drivers may return EINVAL, applications may not function
+correctly.</para>
+ </listitem>
+
+ <listitem>
+ <para>Drivers are again permitted to return negative
+(unknown) start values as proposed earlier. Why this feature was
+dropped is unclear. This change may <emphasis>break
+compatibility</emphasis> with applications depending on the start
+values being positive. The use of <constant>EBUSY</constant> and
+<constant>EINVAL</constant> error codes with the &VIDIOC-S-FMT; ioctl
+was clarified. The &EBUSY; was finally documented, and the
+<structfield>reserved2</structfield> field which was previously
+mentioned only in the <filename>videodev.h</filename> header
+file.</para>
+ </listitem>
+
+ <listitem>
+ <para>New buffer types
+<constant>V4L2_TYPE_VBI_INPUT</constant> and
+<constant>V4L2_TYPE_VBI_OUTPUT</constant> were added. The former is an
+alias for the old <constant>V4L2_TYPE_VBI</constant>, the latter was
+missing in the <filename>videodev.h</filename> file.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 Version 0.20 2002-07-25</title>
+ <para>Added sliced VBI interface proposal.</para>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.5.46, 2002-10</title>
+
+ <para>Around October-November 2002, prior to an announced
+feature freeze of Linux 2.5, the API was revised, drawing from
+experience with V4L2 0.20. This unnamed version was finally merged
+into Linux 2.5.46.</para>
+
+ <orderedlist>
+ <listitem>
+ <para>As specified in <xref linkend="related" />, drivers
+must make related device functions available under all minor device
+numbers.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &func-open; function requires access mode
+<constant>O_RDWR</constant> regardless of the device type. All V4L2
+drivers exchanging data with applications must support the
+<constant>O_NONBLOCK</constant> flag. The <constant>O_NOIO</constant>
+flag, a V4L2 symbol which aliased the meaningless
+<constant>O_TRUNC</constant> to indicate accesses without data
+exchange (panel applications) was dropped. Drivers must stay in "panel
+mode" until the application attempts to initiate a data exchange, see
+<xref linkend="open" />.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &v4l2-capability; changed dramatically. Note that
+also the size of the structure changed, which is encoded in the ioctl
+request code, thus older V4L2 devices will respond with an &EINVAL; to
+the new &VIDIOC-QUERYCAP; ioctl.</para>
+
+ <para>There are new fields to identify the driver, a new RDS
+device function <constant>V4L2_CAP_RDS_CAPTURE</constant>, the
+<constant>V4L2_CAP_AUDIO</constant> flag indicates if the device has
+any audio connectors, another I/O capability
+<constant>V4L2_CAP_ASYNCIO</constant> can be flagged. In response to
+these changes the <structfield>type</structfield> field became a bit
+set and was merged into the <structfield>flags</structfield> field.
+<constant>V4L2_FLAG_TUNER</constant> was renamed to
+<constant>V4L2_CAP_TUNER</constant>,
+<constant>V4L2_CAP_VIDEO_OVERLAY</constant> replaced
+<constant>V4L2_FLAG_PREVIEW</constant> and
+<constant>V4L2_CAP_VBI_CAPTURE</constant> and
+<constant>V4L2_CAP_VBI_OUTPUT</constant> replaced
+<constant>V4L2_FLAG_DATA_SERVICE</constant>.
+<constant>V4L2_FLAG_READ</constant> and
+<constant>V4L2_FLAG_WRITE</constant> were merged into
+<constant>V4L2_CAP_READWRITE</constant>.</para>
+
+ <para>The redundant fields
+<structfield>inputs</structfield>, <structfield>outputs</structfield>
+and <structfield>audios</structfield> were removed. These properties
+can be determined as described in <xref linkend="video" /> and <xref
+linkend="audio" />.</para>
+
+ <para>The somewhat volatile and therefore barely useful
+fields <structfield>maxwidth</structfield>,
+<structfield>maxheight</structfield>,
+<structfield>minwidth</structfield>,
+<structfield>minheight</structfield>,
+<structfield>maxframerate</structfield> were removed. This information
+is available as described in <xref linkend="format" /> and
+<xref linkend="standard" />.</para>
+
+ <para><constant>V4L2_FLAG_SELECT</constant> was removed. We
+believe the select() function is important enough to require support
+of it in all V4L2 drivers exchanging data with applications. The
+redundant <constant>V4L2_FLAG_MONOCHROME</constant> flag was removed,
+this information is available as described in <xref
+ linkend="format" />.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-input; the
+<structfield>assoc_audio</structfield> field and the
+<structfield>capability</structfield> field and its only flag
+<constant>V4L2_INPUT_CAP_AUDIO</constant> was replaced by the new
+<structfield>audioset</structfield> field. Instead of linking one
+video input to one audio input this field reports all audio inputs
+this video input combines with.</para>
+
+ <para>New fields are <structfield>tuner</structfield>
+(reversing the former link from tuners to video inputs),
+<structfield>std</structfield> and
+<structfield>status</structfield>.</para>
+
+ <para>Accordingly &v4l2-output; lost its
+<structfield>capability</structfield> and
+<structfield>assoc_audio</structfield> fields.
+<structfield>audioset</structfield>,
+<structfield>modulator</structfield> and
+<structfield>std</structfield> where added instead.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &v4l2-audio; field
+<structfield>audio</structfield> was renamed to
+<structfield>index</structfield>, for consistency with other
+structures. A new capability flag
+<constant>V4L2_AUDCAP_STEREO</constant> was added to indicated if the
+audio input in question supports stereo sound.
+<constant>V4L2_AUDCAP_EFFECTS</constant> and the corresponding
+<constant>V4L2_AUDMODE</constant> flags where removed. This can be
+easily implemented using controls. (However the same applies to AVL
+which is still there.)</para>
+
+ <para>Again for consistency the &v4l2-audioout; field
+<structfield>audio</structfield> was renamed to
+<structfield>index</structfield>.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &v4l2-tuner;
+<structfield>input</structfield> field was replaced by an
+<structfield>index</structfield> field, permitting devices with
+multiple tuners. The link between video inputs and tuners is now
+reversed, inputs point to their tuner. The
+<structfield>std</structfield> substructure became a
+simple set (more about this below) and moved into &v4l2-input;. A
+<structfield>type</structfield> field was added.</para>
+
+ <para>Accordingly in &v4l2-modulator; the
+<structfield>output</structfield> was replaced by an
+<structfield>index</structfield> field.</para>
+
+ <para>In &v4l2-frequency; the
+<structfield>port</structfield> field was replaced by a
+<structfield>tuner</structfield> field containing the respective tuner
+or modulator index number. A tuner <structfield>type</structfield>
+field was added and the <structfield>reserved</structfield> field
+became larger for future extensions (satellite tuners in
+particular).</para>
+ </listitem>
+
+ <listitem>
+ <para>The idea of completely transparent video standards was
+dropped. Experience showed that applications must be able to work with
+video standards beyond presenting the user a menu. Instead of
+enumerating supported standards with an ioctl applications can now
+refer to standards by &v4l2-std-id; and symbols defined in the
+<filename>videodev2.h</filename> header file. For details see <xref
+ linkend="standard" />. The &VIDIOC-G-STD; and
+&VIDIOC-S-STD; now take a pointer to this type as argument.
+&VIDIOC-QUERYSTD; was added to autodetect the received standard, if
+the hardware has this capability. In &v4l2-standard; an
+<structfield>index</structfield> field was added for &VIDIOC-ENUMSTD;.
+A &v4l2-std-id; field named <structfield>id</structfield> was added as
+machine readable identifier, also replacing the
+<structfield>transmission</structfield> field. The misleading
+<structfield>framerate</structfield> field was renamed
+to <structfield>frameperiod</structfield>. The now obsolete
+<structfield>colorstandard</structfield> information, originally
+needed to distguish between variations of standards, were
+removed.</para>
+
+ <para>Struct <structname>v4l2_enumstd</structname> ceased to
+be. &VIDIOC-ENUMSTD; now takes a pointer to a &v4l2-standard;
+directly. The information which standards are supported by a
+particular video input or output moved into &v4l2-input; and
+&v4l2-output; fields named <structfield>std</structfield>,
+respectively.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &v4l2-queryctrl; fields
+<structfield>category</structfield> and
+<structfield>group</structfield> did not catch on and/or were not
+implemented as expected and therefore removed.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &VIDIOC-TRY-FMT; ioctl was added to negotiate data
+formats as with &VIDIOC-S-FMT;, but without the overhead of
+programming the hardware and regardless of I/O in progress.</para>
+
+ <para>In &v4l2-format; the <structfield>fmt</structfield>
+union was extended to contain &v4l2-window;. All image format
+negotiations are now possible with <constant>VIDIOC_G_FMT</constant>,
+<constant>VIDIOC_S_FMT</constant> and
+<constant>VIDIOC_TRY_FMT</constant>; ioctl. The
+<constant>VIDIOC_G_WIN</constant> and
+<constant>VIDIOC_S_WIN</constant> ioctls to prepare for a video
+overlay were removed. The <structfield>type</structfield> field
+changed to type &v4l2-buf-type; and the buffer type names changed as
+follows.<informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>Old defines</entry>
+ <entry>&v4l2-buf-type;</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_CAPTURE</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_CODECIN</constant></entry>
+ <entry>Omitted for now</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_CODECOUT</constant></entry>
+ <entry>Omitted for now</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_EFFECTSIN</constant></entry>
+ <entry>Omitted for now</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_EFFECTSIN2</constant></entry>
+ <entry>Omitted for now</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_EFFECTSOUT</constant></entry>
+ <entry>Omitted for now</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEOOUT</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_VBI_CAPTURE</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_VBI_OUTPUT</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_SLICED_VBI_CAPTURE</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_SLICED_VBI_OUTPUT</constant></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_PRIVATE_BASE</constant></entry>
+ <entry><constant>V4L2_BUF_TYPE_PRIVATE</constant> (but this is deprecated)</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-fmtdesc; a &v4l2-buf-type; field named
+<structfield>type</structfield> was added as in &v4l2-format;. The
+<constant>VIDIOC_ENUM_FBUFFMT</constant> ioctl is no longer needed and
+was removed. These calls can be replaced by &VIDIOC-ENUM-FMT; with
+type <constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant>.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-pix-format; the
+<structfield>depth</structfield> field was removed, assuming
+applications which recognize the format by its four-character-code
+already know the color depth, and others do not care about it. The
+same rationale lead to the removal of the
+<constant>V4L2_FMT_FLAG_COMPRESSED</constant> flag. The
+<constant>V4L2_FMT_FLAG_SWCONVECOMPRESSED</constant> flag was removed
+because drivers are not supposed to convert images in kernel space. A
+user library of conversion functions should be provided instead. The
+<constant>V4L2_FMT_FLAG_BYTESPERLINE</constant> flag was redundant.
+Applications can set the <structfield>bytesperline</structfield> field
+to zero to get a reasonable default. Since the remaining flags were
+replaced as well, the <structfield>flags</structfield> field itself
+was removed.</para>
+ <para>The interlace flags were replaced by a &v4l2-field;
+value in a newly added <structfield>field</structfield>
+field.<informaltable>
+ <tgroup cols="2">
+ <thead>
+ <row>
+ <entry>Old flag</entry>
+ <entry>&v4l2-field;</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FMT_FLAG_NOT_INTERLACED</constant></entry>
+ <entry>?</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FMT_FLAG_INTERLACED</constant>
+= <constant>V4L2_FMT_FLAG_COMBINED</constant></entry>
+ <entry><constant>V4L2_FIELD_INTERLACED</constant></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FMT_FLAG_TOPFIELD</constant>
+= <constant>V4L2_FMT_FLAG_ODDFIELD</constant></entry>
+ <entry><constant>V4L2_FIELD_TOP</constant></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FMT_FLAG_BOTFIELD</constant>
+= <constant>V4L2_FMT_FLAG_EVENFIELD</constant></entry>
+ <entry><constant>V4L2_FIELD_BOTTOM</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_FIELD_SEQ_TB</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_FIELD_SEQ_BT</constant></entry>
+ </row>
+ <row>
+ <entry><constant>-</constant></entry>
+ <entry><constant>V4L2_FIELD_ALTERNATE</constant></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+
+ <para>The color space flags were replaced by a
+&v4l2-colorspace; value in a newly added
+<structfield>colorspace</structfield> field, where one of
+<constant>V4L2_COLORSPACE_SMPTE170M</constant>,
+<constant>V4L2_COLORSPACE_BT878</constant>,
+<constant>V4L2_COLORSPACE_470_SYSTEM_M</constant> or
+<constant>V4L2_COLORSPACE_470_SYSTEM_BG</constant> replaces
+<constant>V4L2_FMT_CS_601YUV</constant>.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-requestbuffers; the
+<structfield>type</structfield> field was properly defined as
+&v4l2-buf-type;. Buffer types changed as mentioned above. A new
+<structfield>memory</structfield> field of type &v4l2-memory; was
+added to distinguish between I/O methods using buffers allocated
+by the driver or the application. See <xref linkend="io" /> for
+details.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-buffer; the <structfield>type</structfield>
+field was properly defined as &v4l2-buf-type;. Buffer types changed as
+mentioned above. A <structfield>field</structfield> field of type
+&v4l2-field; was added to indicate if a buffer contains a top or
+bottom field. The old field flags were removed. Since no unadjusted
+system time clock was added to the kernel as planned, the
+<structfield>timestamp</structfield> field changed back from type
+stamp_t, an unsigned 64 bit integer expressing the sample time in
+nanoseconds, to struct <structname>timeval</structname>. With the
+addition of a second memory mapping method the
+<structfield>offset</structfield> field moved into union
+<structfield>m</structfield>, and a new
+<structfield>memory</structfield> field of type &v4l2-memory; was
+added to distinguish between I/O methods. See <xref linkend="io" />
+for details.</para>
+
+ <para>The <constant>V4L2_BUF_REQ_CONTIG</constant>
+flag was used by the V4L compatibility layer, after changes to this
+code it was no longer needed. The
+<constant>V4L2_BUF_ATTR_DEVICEMEM</constant> flag would indicate if
+the buffer was indeed allocated in device memory rather than DMA-able
+system memory. It was barely useful and so was removed.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-framebuffer; the
+<structfield>base[3]</structfield> array anticipating double- and
+triple-buffering in off-screen video memory, however without defining
+a synchronization mechanism, was replaced by a single pointer. The
+<constant>V4L2_FBUF_CAP_SCALEUP</constant> and
+<constant>V4L2_FBUF_CAP_SCALEDOWN</constant> flags were removed.
+Applications can determine this capability more accurately using the
+new cropping and scaling interface. The
+<constant>V4L2_FBUF_CAP_CLIPPING</constant> flag was replaced by
+<constant>V4L2_FBUF_CAP_LIST_CLIPPING</constant> and
+<constant>V4L2_FBUF_CAP_BITMAP_CLIPPING</constant>.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-clip; the <structfield>x</structfield>,
+<structfield>y</structfield>, <structfield>width</structfield> and
+<structfield>height</structfield> field moved into a
+<structfield>c</structfield> substructure of type &v4l2-rect;. The
+<structfield>x</structfield> and <structfield>y</structfield> fields
+were renamed to <structfield>left</structfield> and
+<structfield>top</structfield>, &ie; offsets to a context dependent
+origin.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-window; the <structfield>x</structfield>,
+<structfield>y</structfield>, <structfield>width</structfield> and
+<structfield>height</structfield> field moved into a
+<structfield>w</structfield> substructure as above. A
+<structfield>field</structfield> field of type %v4l2-field; was added
+to distinguish between field and frame (interlaced) overlay.</para>
+ </listitem>
+
+ <listitem>
+ <para>The digital zoom interface, including struct
+<structname>v4l2_zoomcap</structname>, struct
+<structname>v4l2_zoom</structname>,
+<constant>V4L2_ZOOM_NONCAP</constant> and
+<constant>V4L2_ZOOM_WHILESTREAMING</constant> was replaced by a new
+cropping and scaling interface. The previously unused struct
+<structname>v4l2_cropcap</structname> and
+<structname>v4l2_crop</structname> where redefined for this purpose.
+See <xref linkend="crop" /> for details.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-vbi-format; the
+<structfield>SAMPLE_FORMAT</structfield> field now contains a
+four-character-code as used to identify video image formats and
+<constant>V4L2_PIX_FMT_GREY</constant> replaces the
+<constant>V4L2_VBI_SF_UBYTE</constant> define. The
+<structfield>reserved</structfield> field was extended.</para>
+ </listitem>
+
+ <listitem>
+ <para>In &v4l2-captureparm; the type of the
+<structfield>timeperframe</structfield> field changed from unsigned
+long to &v4l2-fract;. This allows the accurate expression of multiples
+of the NTSC-M frame rate 30000 / 1001. A new field
+<structfield>readbuffers</structfield> was added to control the driver
+behaviour in read I/O mode.</para>
+
+ <para>Similar changes were made to &v4l2-outputparm;.</para>
+ </listitem>
+
+ <listitem>
+ <para>The struct <structname>v4l2_performance</structname>
+and <constant>VIDIOC_G_PERF</constant> ioctl were dropped. Except when
+using the <link linkend="rw">read/write I/O method</link>, which is
+limited anyway, this information is already available to
+applications.</para>
+ </listitem>
+
+ <listitem>
+ <para>The example transformation from RGB to YCbCr color
+space in the old V4L2 documentation was inaccurate, this has been
+corrected in <xref linkend="pixfmt" />.<!-- 0.5670G should be
+0.587, and 127/112 != 255/224 --></para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 2003-06-19</title>
+
+ <orderedlist>
+ <listitem>
+ <para>A new capability flag
+<constant>V4L2_CAP_RADIO</constant> was added for radio devices. Prior
+to this change radio devices would identify solely by having exactly one
+tuner whose type field reads <constant>V4L2_TUNER_RADIO</constant>.</para>
+ </listitem>
+
+ <listitem>
+ <para>An optional driver access priority mechanism was
+added, see <xref linkend="app-pri" /> for details.</para>
+ </listitem>
+
+ <listitem>
+ <para>The audio input and output interface was found to be
+incomplete.</para>
+ <para>Previously the &VIDIOC-G-AUDIO;
+ioctl would enumerate the available audio inputs. An ioctl to
+determine the current audio input, if more than one combines with the
+current video input, did not exist. So
+<constant>VIDIOC_G_AUDIO</constant> was renamed to
+<constant>VIDIOC_G_AUDIO_OLD</constant>, this ioctl was removed on
+Kernel 2.6.39. The &VIDIOC-ENUMAUDIO; ioctl was added to enumerate
+audio inputs, while &VIDIOC-G-AUDIO; now reports the current audio
+input.</para>
+ <para>The same changes were made to &VIDIOC-G-AUDOUT; and
+&VIDIOC-ENUMAUDOUT;.</para>
+ <para>Until further the "videodev" module will automatically
+translate between the old and new ioctls, but drivers and applications
+must be updated to successfully compile again.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &VIDIOC-OVERLAY; ioctl was incorrectly defined with
+write-read parameter. It was changed to write-only, while the write-read
+version was renamed to <constant>VIDIOC_OVERLAY_OLD</constant>. The old
+ioctl was removed on Kernel 2.6.39. Until further the "videodev"
+kernel module will automatically translate to the new version, so drivers
+must be recompiled, but not applications.</para>
+ </listitem>
+
+ <listitem>
+ <para><xref linkend="overlay" /> incorrectly stated that
+clipping rectangles define regions where the video can be seen.
+Correct is that clipping rectangles define regions where
+<emphasis>no</emphasis> video shall be displayed and so the graphics
+surface can be seen.</para>
+ </listitem>
+
+ <listitem>
+ <para>The &VIDIOC-S-PARM; and &VIDIOC-S-CTRL; ioctls were
+defined with write-only parameter, inconsistent with other ioctls
+modifying their argument. They were changed to write-read, while a
+<constant>_OLD</constant> suffix was added to the write-only versions.
+The old ioctls were removed on Kernel 2.6.39. Drivers and
+applications assuming a constant parameter need an update.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 2003-11-05</title>
+ <orderedlist>
+ <listitem>
+ <para>In <xref linkend="pixfmt-rgb" /> the following pixel
+formats were incorrectly transferred from Bill Dirks' V4L2
+specification. Descriptions below refer to bytes in memory, in
+ascending address order.<informaltable>
+ <tgroup cols="3">
+ <thead>
+ <row>
+ <entry>Symbol</entry>
+ <entry>In this document prior to revision
+0.5</entry>
+ <entry>Corrected</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_PIX_FMT_RGB24</constant></entry>
+ <entry>B, G, R</entry>
+ <entry>R, G, B</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PIX_FMT_BGR24</constant></entry>
+ <entry>R, G, B</entry>
+ <entry>B, G, R</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PIX_FMT_RGB32</constant></entry>
+ <entry>B, G, R, X</entry>
+ <entry>R, G, B, X</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PIX_FMT_BGR32</constant></entry>
+ <entry>R, G, B, X</entry>
+ <entry>B, G, R, X</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable> The
+<constant>V4L2_PIX_FMT_BGR24</constant> example was always
+correct.</para>
+ <para>In <xref linkend="v4l-image-properties" /> the mapping
+of the V4L <constant>VIDEO_PALETTE_RGB24</constant> and
+<constant>VIDEO_PALETTE_RGB32</constant> formats to V4L2 pixel formats
+was accordingly corrected.</para>
+ </listitem>
+
+ <listitem>
+ <para>Unrelated to the fixes above, drivers may still
+interpret some V4L2 RGB pixel formats differently. These issues have
+yet to be addressed, for details see <xref
+ linkend="pixfmt-rgb" />.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.6, 2004-05-09</title>
+ <orderedlist>
+ <listitem>
+ <para>The &VIDIOC-CROPCAP; ioctl was incorrectly defined
+with read-only parameter. It is now defined as write-read ioctl, while
+the read-only version was renamed to
+<constant>VIDIOC_CROPCAP_OLD</constant>. The old ioctl was removed
+on Kernel 2.6.39.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.8</title>
+ <orderedlist>
+ <listitem>
+ <para>A new field <structfield>input</structfield> (former
+<structfield>reserved[0]</structfield>) was added to the &v4l2-buffer;
+structure. Purpose of this field is to alternate between video inputs
+(&eg; cameras) in step with the video capturing process. This function
+must be enabled with the new <constant>V4L2_BUF_FLAG_INPUT</constant>
+flag. The <structfield>flags</structfield> field is no longer
+read-only.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2004-08-01</title>
+
+ <orderedlist>
+ <listitem>
+ <para>The return value of the
+<xref linkend="func-open" /> function was incorrectly documented.</para>
+ </listitem>
+
+ <listitem>
+ <para>Audio output ioctls end in -AUDOUT, not -AUDIOOUT.</para>
+ </listitem>
+
+ <listitem>
+ <para>In the Current Audio Input example the
+<constant>VIDIOC_G_AUDIO</constant> ioctl took the wrong
+argument.</para>
+ </listitem>
+
+ <listitem>
+ <para>The documentation of the &VIDIOC-QBUF; and
+&VIDIOC-DQBUF; ioctls did not mention the &v4l2-buffer;
+<structfield>memory</structfield> field. It was also missing from
+examples. Also on the <constant>VIDIOC_DQBUF</constant> page the &EIO;
+was not documented.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.14</title>
+ <orderedlist>
+ <listitem>
+ <para>A new sliced VBI interface was added. It is documented
+in <xref linkend="sliced" /> and replaces the interface first
+proposed in V4L2 specification 0.8.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.15</title>
+ <orderedlist>
+ <listitem>
+ <para>The &VIDIOC-LOG-STATUS; ioctl was added.</para>
+ </listitem>
+
+ <listitem>
+ <para>New video standards
+<constant>V4L2_STD_NTSC_443</constant>,
+<constant>V4L2_STD_SECAM_LC</constant>,
+<constant>V4L2_STD_SECAM_DK</constant> (a set of SECAM D, K and K1),
+and <constant>V4L2_STD_ATSC</constant> (a set of
+<constant>V4L2_STD_ATSC_8_VSB</constant> and
+<constant>V4L2_STD_ATSC_16_VSB</constant>) were defined. Note the
+<constant>V4L2_STD_525_60</constant> set now includes
+<constant>V4L2_STD_NTSC_443</constant>. See also <xref
+ linkend="v4l2-std-id" />.</para>
+ </listitem>
+
+ <listitem>
+ <para>The <constant>VIDIOC_G_COMP</constant> and
+<constant>VIDIOC_S_COMP</constant> ioctl were renamed to
+<constant>VIDIOC_G_MPEGCOMP</constant> and
+<constant>VIDIOC_S_MPEGCOMP</constant> respectively. Their argument
+was replaced by a struct
+<structname>v4l2_mpeg_compression</structname> pointer. (The
+<constant>VIDIOC_G_MPEGCOMP</constant> and
+<constant>VIDIOC_S_MPEGCOMP</constant> ioctls where removed in Linux
+2.6.25.)</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2005-11-27</title>
+ <para>The capture example in <xref linkend="capture-example" />
+called the &VIDIOC-S-CROP; ioctl without checking if cropping is
+supported. In the video standard selection example in
+<xref linkend="standard" /> the &VIDIOC-S-STD; call used the wrong
+argument type.</para>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2006-01-10</title>
+ <orderedlist>
+ <listitem>
+ <para>The <constant>V4L2_IN_ST_COLOR_KILL</constant> flag in
+&v4l2-input; not only indicates if the color killer is enabled, but
+also if it is active. (The color killer disables color decoding when
+it detects no color in the video signal to improve the image
+quality.)</para>
+ </listitem>
+
+ <listitem>
+ <para>&VIDIOC-S-PARM; is a write-read ioctl, not write-only as
+stated on its reference page. The ioctl changed in 2003 as noted above.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2006-02-03</title>
+ <orderedlist>
+ <listitem>
+ <para>In &v4l2-captureparm; and &v4l2-outputparm; the
+<structfield>timeperframe</structfield> field gives the time in
+seconds, not microseconds.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2006-02-04</title>
+ <orderedlist>
+ <listitem>
+ <para>The <structfield>clips</structfield> field in
+&v4l2-window; must point to an array of &v4l2-clip;, not a linked
+list, because drivers ignore the struct
+<structname>v4l2_clip</structname>.<structfield>next</structfield>
+pointer.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.17</title>
+ <orderedlist>
+ <listitem>
+ <para>New video standard macros were added:
+<constant>V4L2_STD_NTSC_M_KR</constant> (NTSC M South Korea), and the
+sets <constant>V4L2_STD_MN</constant>,
+<constant>V4L2_STD_B</constant>, <constant>V4L2_STD_GH</constant> and
+<constant>V4L2_STD_DK</constant>. The
+<constant>V4L2_STD_NTSC</constant> and
+<constant>V4L2_STD_SECAM</constant> sets now include
+<constant>V4L2_STD_NTSC_M_KR</constant> and
+<constant>V4L2_STD_SECAM_LC</constant> respectively.</para>
+ </listitem>
+
+ <listitem>
+ <para>A new <constant>V4L2_TUNER_MODE_LANG1_LANG2</constant>
+was defined to record both languages of a bilingual program. The
+use of <constant>V4L2_TUNER_MODE_STEREO</constant> for this purpose
+is deprecated now. See the &VIDIOC-G-TUNER; section for
+details.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2006-09-23 (Draft 0.15)</title>
+ <orderedlist>
+ <listitem>
+ <para>In various places
+<constant>V4L2_BUF_TYPE_SLICED_VBI_CAPTURE</constant> and
+<constant>V4L2_BUF_TYPE_SLICED_VBI_OUTPUT</constant> of the sliced VBI
+interface were not mentioned along with other buffer types.</para>
+ </listitem>
+
+ <listitem>
+ <para>In <xref linkend="vidioc-g-audio" /> it was clarified
+that the &v4l2-audio; <structfield>mode</structfield> field is a flags
+field.</para>
+ </listitem>
+
+ <listitem>
+ <para><xref linkend="vidioc-querycap" /> did not mention the
+sliced VBI and radio capability flags.</para>
+ </listitem>
+
+ <listitem>
+ <para>In <xref linkend="vidioc-g-frequency" /> it was
+clarified that applications must initialize the tuner
+<structfield>type</structfield> field of &v4l2-frequency; before
+calling &VIDIOC-S-FREQUENCY;.</para>
+ </listitem>
+
+ <listitem>
+ <para>The <structfield>reserved</structfield> array
+in &v4l2-requestbuffers; has 2 elements, not 32.</para>
+ </listitem>
+
+ <listitem>
+ <para>In <xref linkend="output" /> and <xref
+ linkend="raw-vbi" /> the device file names
+<filename>/dev/vout</filename> which never caught on were replaced
+by <filename>/dev/video</filename>.</para>
+ </listitem>
+
+ <listitem>
+ <para>With Linux 2.6.15 the possible range for VBI device minor
+numbers was extended from 224-239 to 224-255. Accordingly device file names
+<filename>/dev/vbi0</filename> to <filename>/dev/vbi31</filename> are
+possible now.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.18</title>
+ <orderedlist>
+ <listitem>
+ <para>New ioctls &VIDIOC-G-EXT-CTRLS;, &VIDIOC-S-EXT-CTRLS;
+and &VIDIOC-TRY-EXT-CTRLS; were added, a flag to skip unsupported
+controls with &VIDIOC-QUERYCTRL;, new control types
+<constant>V4L2_CTRL_TYPE_INTEGER64</constant> and
+<constant>V4L2_CTRL_TYPE_CTRL_CLASS</constant> (<xref
+ linkend="v4l2-ctrl-type" />), and new control flags
+<constant>V4L2_CTRL_FLAG_READ_ONLY</constant>,
+<constant>V4L2_CTRL_FLAG_UPDATE</constant>,
+<constant>V4L2_CTRL_FLAG_INACTIVE</constant> and
+<constant>V4L2_CTRL_FLAG_SLIDER</constant> (<xref
+ linkend="control-flags" />). See <xref
+ linkend="extended-controls" /> for details.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.19</title>
+ <orderedlist>
+ <listitem>
+ <para>In &v4l2-sliced-vbi-cap; a buffer type field was added
+replacing a reserved field. Note on architectures where the size of
+enum types differs from int types the size of the structure changed.
+The &VIDIOC-G-SLICED-VBI-CAP; ioctl was redefined from being read-only
+to write-read. Applications must initialize the type field and clear
+the reserved fields now. These changes may <emphasis>break the
+compatibility</emphasis> with older drivers and applications.</para>
+ </listitem>
+
+ <listitem>
+ <para>The ioctls &VIDIOC-ENUM-FRAMESIZES; and
+&VIDIOC-ENUM-FRAMEINTERVALS; were added.</para>
+ </listitem>
+
+ <listitem>
+ <para>A new pixel format <constant>V4L2_PIX_FMT_RGB444</constant> (<xref
+linkend="rgb-formats" />) was added.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 spec erratum 2006-10-12 (Draft 0.17)</title>
+ <orderedlist>
+ <listitem>
+ <para><constant>V4L2_PIX_FMT_HM12</constant> (<xref
+linkend="reserved-formats" />) is a YUV 4:2:0, not 4:2:2 format.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.21</title>
+ <orderedlist>
+ <listitem>
+ <para>The <filename>videodev2.h</filename> header file is
+now dual licensed under GNU General Public License version two or
+later, and under a 3-clause BSD-style license.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.22</title>
+ <orderedlist>
+ <listitem>
+ <para>Two new field orders
+ <constant>V4L2_FIELD_INTERLACED_TB</constant> and
+ <constant>V4L2_FIELD_INTERLACED_BT</constant> were
+ added. See <xref linkend="v4l2-field" /> for details.</para>
+ </listitem>
+
+ <listitem>
+ <para>Three new clipping/blending methods with a global or
+straight or inverted local alpha value were added to the video overlay
+interface. See the description of the &VIDIOC-G-FBUF; and
+&VIDIOC-S-FBUF; ioctls for details.</para>
+ <para>A new <structfield>global_alpha</structfield> field
+was added to <link
+linkend="v4l2-window"><structname>v4l2_window</structname></link>,
+extending the structure. This may <emphasis>break
+compatibility</emphasis> with applications using a struct
+<structname>v4l2_window</structname> directly. However the <link
+linkend="vidioc-g-fmt">VIDIOC_G/S/TRY_FMT</link> ioctls, which take a
+pointer to a <link linkend="v4l2-format">v4l2_format</link> parent
+structure with padding bytes at the end, are not affected.</para>
+ </listitem>
+
+ <listitem>
+ <para>The format of the <structfield>chromakey</structfield>
+field in &v4l2-window; changed from "host order RGB32" to a pixel
+value in the same format as the framebuffer. This may <emphasis>break
+compatibility</emphasis> with existing applications. Drivers
+supporting the "host order RGB32" format are not known.</para>
+ </listitem>
+
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.24</title>
+ <orderedlist>
+ <listitem>
+ <para>The pixel formats
+<constant>V4L2_PIX_FMT_PAL8</constant>,
+<constant>V4L2_PIX_FMT_YUV444</constant>,
+<constant>V4L2_PIX_FMT_YUV555</constant>,
+<constant>V4L2_PIX_FMT_YUV565</constant> and
+<constant>V4L2_PIX_FMT_YUV32</constant> were added.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.25</title>
+ <orderedlist>
+ <listitem>
+ <para>The pixel formats <link linkend="V4L2-PIX-FMT-Y16">
+<constant>V4L2_PIX_FMT_Y16</constant></link> and <link
+linkend="V4L2-PIX-FMT-SBGGR16">
+<constant>V4L2_PIX_FMT_SBGGR16</constant></link> were added.</para>
+ </listitem>
+ <listitem>
+ <para>New <link linkend="control">controls</link>
+<constant>V4L2_CID_POWER_LINE_FREQUENCY</constant>,
+<constant>V4L2_CID_HUE_AUTO</constant>,
+<constant>V4L2_CID_WHITE_BALANCE_TEMPERATURE</constant>,
+<constant>V4L2_CID_SHARPNESS</constant> and
+<constant>V4L2_CID_BACKLIGHT_COMPENSATION</constant> were added. The
+controls <constant>V4L2_CID_BLACK_LEVEL</constant>,
+<constant>V4L2_CID_WHITENESS</constant>,
+<constant>V4L2_CID_HCENTER</constant> and
+<constant>V4L2_CID_VCENTER</constant> were deprecated.
+</para>
+ </listitem>
+ <listitem>
+ <para>A <link linkend="camera-controls">Camera controls
+class</link> was added, with the new controls
+<constant>V4L2_CID_EXPOSURE_AUTO</constant>,
+<constant>V4L2_CID_EXPOSURE_ABSOLUTE</constant>,
+<constant>V4L2_CID_EXPOSURE_AUTO_PRIORITY</constant>,
+<constant>V4L2_CID_PAN_RELATIVE</constant>,
+<constant>V4L2_CID_TILT_RELATIVE</constant>,
+<constant>V4L2_CID_PAN_RESET</constant>,
+<constant>V4L2_CID_TILT_RESET</constant>,
+<constant>V4L2_CID_PAN_ABSOLUTE</constant>,
+<constant>V4L2_CID_TILT_ABSOLUTE</constant>,
+<constant>V4L2_CID_FOCUS_ABSOLUTE</constant>,
+<constant>V4L2_CID_FOCUS_RELATIVE</constant> and
+<constant>V4L2_CID_FOCUS_AUTO</constant>.</para>
+ </listitem>
+ <listitem>
+ <para>The <constant>VIDIOC_G_MPEGCOMP</constant> and
+<constant>VIDIOC_S_MPEGCOMP</constant> ioctls, which were superseded
+by the <link linkend="extended-controls">extended controls</link>
+interface in Linux 2.6.18, where finally removed from the
+<filename>videodev2.h</filename> header file.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.26</title>
+ <orderedlist>
+ <listitem>
+ <para>The pixel formats
+<constant>V4L2_PIX_FMT_Y16</constant> and
+<constant>V4L2_PIX_FMT_SBGGR16</constant> were added.</para>
+ </listitem>
+ <listitem>
+ <para>Added user controls
+<constant>V4L2_CID_CHROMA_AGC</constant> and
+<constant>V4L2_CID_COLOR_KILLER</constant>.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.27</title>
+ <orderedlist>
+ <listitem>
+ <para>The &VIDIOC-S-HW-FREQ-SEEK; ioctl and the
+<constant>V4L2_CAP_HW_FREQ_SEEK</constant> capability were added.</para>
+ </listitem>
+ <listitem>
+ <para>The pixel formats
+<constant>V4L2_PIX_FMT_YVYU</constant>,
+<constant>V4L2_PIX_FMT_PCA501</constant>,
+<constant>V4L2_PIX_FMT_PCA505</constant>,
+<constant>V4L2_PIX_FMT_PCA508</constant>,
+<constant>V4L2_PIX_FMT_PCA561</constant>,
+<constant>V4L2_PIX_FMT_SGBRG8</constant>,
+<constant>V4L2_PIX_FMT_PAC207</constant> and
+<constant>V4L2_PIX_FMT_PJPG</constant> were added.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.28</title>
+ <orderedlist>
+ <listitem>
+ <para>Added <constant>V4L2_MPEG_AUDIO_ENCODING_AAC</constant> and
+<constant>V4L2_MPEG_AUDIO_ENCODING_AC3</constant> MPEG audio encodings.</para>
+ </listitem>
+ <listitem>
+ <para>Added <constant>V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC</constant> MPEG
+video encoding.</para>
+ </listitem>
+ <listitem>
+ <para>The pixel formats
+<constant>V4L2_PIX_FMT_SGRBG10</constant> and
+<constant>V4L2_PIX_FMT_SGRBG10DPCM8</constant> were added.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 2.6.29</title>
+ <orderedlist>
+ <listitem>
+ <para>The <constant>VIDIOC_G_CHIP_IDENT</constant> ioctl was renamed
+to <constant>VIDIOC_G_CHIP_IDENT_OLD</constant> and <constant>VIDIOC_DBG_G_CHIP_IDENT</constant>
+was introduced in its place. The old struct <structname>v4l2_chip_ident</structname>
+was renamed to <structname id="v4l2-chip-ident-old">v4l2_chip_ident_old</structname>.</para>
+ </listitem>
+ <listitem>
+ <para>The pixel formats
+<constant>V4L2_PIX_FMT_VYUY</constant>,
+<constant>V4L2_PIX_FMT_NV16</constant> and
+<constant>V4L2_PIX_FMT_NV61</constant> were added.</para>
+ </listitem>
+ <listitem>
+ <para>Added camera controls
+<constant>V4L2_CID_ZOOM_ABSOLUTE</constant>,
+<constant>V4L2_CID_ZOOM_RELATIVE</constant>,
+<constant>V4L2_CID_ZOOM_CONTINUOUS</constant> and
+<constant>V4L2_CID_PRIVACY</constant>.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 2.6.30</title>
+ <orderedlist>
+ <listitem>
+ <para>New control flag <constant>V4L2_CTRL_FLAG_WRITE_ONLY</constant> was added.</para>
+ </listitem>
+ <listitem>
+ <para>New control <constant>V4L2_CID_COLORFX</constant> was added.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 2.6.32</title>
+ <orderedlist>
+ <listitem>
+ <para>In order to be easier to compare a V4L2 API and a kernel
+version, now V4L2 API is numbered using the Linux Kernel version numeration.</para>
+ </listitem>
+ <listitem>
+ <para>Finalized the RDS capture API. See <xref linkend="rds" /> for
+more information.</para>
+ </listitem>
+ <listitem>
+ <para>Added new capabilities for modulators and RDS encoders.</para>
+ </listitem>
+ <listitem>
+ <para>Add description for libv4l API.</para>
+ </listitem>
+ <listitem>
+ <para>Added support for string controls via new type <constant>V4L2_CTRL_TYPE_STRING</constant>.</para>
+ </listitem>
+ <listitem>
+ <para>Added <constant>V4L2_CID_BAND_STOP_FILTER</constant> documentation.</para>
+ </listitem>
+ <listitem>
+ <para>Added FM Modulator (FM TX) Extended Control Class: <constant>V4L2_CTRL_CLASS_FM_TX</constant> and their Control IDs.</para>
+ </listitem>
+<listitem>
+ <para>Added FM Receiver (FM RX) Extended Control Class: <constant>V4L2_CTRL_CLASS_FM_RX</constant> and their Control IDs.</para>
+ </listitem>
+ <listitem>
+ <para>Added Remote Controller chapter, describing the default Remote Controller mapping for media devices.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 2.6.33</title>
+ <orderedlist>
+ <listitem>
+ <para>Added support for Digital Video timings in order to support HDTV receivers and transmitters.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 2.6.34</title>
+ <orderedlist>
+ <listitem>
+ <para>Added
+<constant>V4L2_CID_IRIS_ABSOLUTE</constant> and
+<constant>V4L2_CID_IRIS_RELATIVE</constant> controls to the
+ <link linkend="camera-controls">Camera controls class</link>.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 2.6.37</title>
+ <orderedlist>
+ <listitem>
+ <para>Remove the vtx (videotext/teletext) API. This API was no longer
+used and no hardware exists to verify the API. Nor were any userspace applications found
+that used it. It was originally scheduled for removal in 2.6.35.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 2.6.39</title>
+ <orderedlist>
+ <listitem>
+ <para>The old VIDIOC_*_OLD symbols and V4L1 support were removed.</para>
+ </listitem>
+ <listitem>
+ <para>Multi-planar API added. Does not affect the compatibility of
+ current drivers and applications. See
+ <link linkend="planar-apis">multi-planar API</link>
+ for details.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 3.1</title>
+ <orderedlist>
+ <listitem>
+ <para>VIDIOC_QUERYCAP now returns a per-subsystem version instead of a per-driver one.</para>
+ <para>Standardize an error code for invalid ioctl.</para>
+ <para>Added V4L2_CTRL_TYPE_BITMASK.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+ <section>
+ <title>V4L2 in Linux 3.2</title>
+ <orderedlist>
+ <listitem>
+ <para>V4L2_CTRL_FLAG_VOLATILE was added to signal volatile controls to userspace.</para>
+ </listitem>
+ <listitem>
+ <para>Add selection API for extended control over cropping
+ and composing. Does not affect the compatibility of current
+ drivers and applications. See <link
+ linkend="selection-api"> selection API </link> for
+ details.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.3</title>
+ <orderedlist>
+ <listitem>
+ <para>Added <constant>V4L2_CID_ALPHA_COMPONENT</constant> control
+ to the <link linkend="control">User controls class</link>.
+ </para>
+ </listitem>
+ <listitem>
+ <para>Added the device_caps field to struct v4l2_capabilities and added the new
+ V4L2_CAP_DEVICE_CAPS capability.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.4</title>
+ <orderedlist>
+ <listitem>
+ <para>Added <link linkend="jpeg-controls">JPEG compression control
+ class</link>.</para>
+ </listitem>
+ <listitem>
+ <para>Extended the DV Timings API:
+ &VIDIOC-ENUM-DV-TIMINGS;, &VIDIOC-QUERY-DV-TIMINGS; and
+ &VIDIOC-DV-TIMINGS-CAP;.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.5</title>
+ <orderedlist>
+ <listitem>
+ <para>Added integer menus, the new type will be
+ V4L2_CTRL_TYPE_INTEGER_MENU.</para>
+ </listitem>
+ <listitem>
+ <para>Added selection API for V4L2 subdev interface:
+ &VIDIOC-SUBDEV-G-SELECTION; and
+ &VIDIOC-SUBDEV-S-SELECTION;.</para>
+ </listitem>
+ <listitem>
+ <para> Added <constant>V4L2_COLORFX_ANTIQUE</constant>,
+ <constant>V4L2_COLORFX_ART_FREEZE</constant>,
+ <constant>V4L2_COLORFX_AQUA</constant>,
+ <constant>V4L2_COLORFX_SILHOUETTE</constant>,
+ <constant>V4L2_COLORFX_SOLARIZATION</constant>,
+ <constant>V4L2_COLORFX_VIVID</constant> and
+ <constant>V4L2_COLORFX_ARBITRARY_CBCR</constant> menu items
+ to the <constant>V4L2_CID_COLORFX</constant> control.</para>
+ </listitem>
+ <listitem>
+ <para> Added <constant>V4L2_CID_COLORFX_CBCR</constant> control.</para>
+ </listitem>
+ <listitem>
+ <para> Added camera controls <constant>V4L2_CID_AUTO_EXPOSURE_BIAS</constant>,
+ <constant>V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE</constant>,
+ <constant>V4L2_CID_IMAGE_STABILIZATION</constant>,
+ <constant>V4L2_CID_ISO_SENSITIVITY</constant>,
+ <constant>V4L2_CID_ISO_SENSITIVITY_AUTO</constant>,
+ <constant>V4L2_CID_EXPOSURE_METERING</constant>,
+ <constant>V4L2_CID_SCENE_MODE</constant>,
+ <constant>V4L2_CID_3A_LOCK</constant>,
+ <constant>V4L2_CID_AUTO_FOCUS_START</constant>,
+ <constant>V4L2_CID_AUTO_FOCUS_STOP</constant>,
+ <constant>V4L2_CID_AUTO_FOCUS_STATUS</constant> and
+ <constant>V4L2_CID_AUTO_FOCUS_RANGE</constant>.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.6</title>
+ <orderedlist>
+ <listitem>
+ <para>Replaced <structfield>input</structfield> in
+ <structname>v4l2_buffer</structname> by
+ <structfield>reserved2</structfield> and removed
+ <constant>V4L2_BUF_FLAG_INPUT</constant>.</para>
+ </listitem>
+ <listitem>
+ <para>Added V4L2_CAP_VIDEO_M2M and V4L2_CAP_VIDEO_M2M_MPLANE capabilities.</para>
+ </listitem>
+ <listitem>
+ <para>Added support for frequency band enumerations: &VIDIOC-ENUM-FREQ-BANDS;.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.9</title>
+ <orderedlist>
+ <listitem>
+ <para>Added timestamp types to
+ <structfield>flags</structfield> field in
+ <structname>v4l2_buffer</structname>. See <xref
+ linkend="buffer-flags" />.</para>
+ </listitem>
+ <listitem>
+ <para>Added <constant>V4L2_EVENT_CTRL_CH_RANGE</constant> control event
+ changes flag. See <xref linkend="ctrl-changes-flags"/>.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.10</title>
+ <orderedlist>
+ <listitem>
+ <para>Removed obsolete and unused DV_PRESET ioctls
+ VIDIOC_G_DV_PRESET, VIDIOC_S_DV_PRESET, VIDIOC_QUERY_DV_PRESET and
+ VIDIOC_ENUM_DV_PRESET. Remove the related v4l2_input/output capability
+ flags V4L2_IN_CAP_PRESETS and V4L2_OUT_CAP_PRESETS.
+ </para>
+ </listitem>
+ <listitem>
+ <para>Added new debugging ioctl &VIDIOC-DBG-G-CHIP-INFO;.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.11</title>
+ <orderedlist>
+ <listitem>
+ <para>Remove obsolete <constant>VIDIOC_DBG_G_CHIP_IDENT</constant> ioctl.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.14</title>
+ <orderedlist>
+ <listitem>
+ <para> In struct <structname>v4l2_rect</structname>, the type
+of <structfield>width</structfield> and <structfield>height</structfield>
+fields changed from _s32 to _u32.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.15</title>
+ <orderedlist>
+ <listitem>
+ <para>Added Software Defined Radio (SDR) Interface.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.16</title>
+ <orderedlist>
+ <listitem>
+ <para>Added event V4L2_EVENT_SOURCE_CHANGE.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.17</title>
+ <orderedlist>
+ <listitem>
+ <para>Extended &v4l2-pix-format;. Added format flags.
+ </para>
+ </listitem>
+ <listitem>
+ <para>Added compound control types and &VIDIOC-QUERY-EXT-CTRL;.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.18</title>
+ <orderedlist>
+ <listitem>
+ <para>Added <constant>V4L2_CID_PAN_SPEED</constant> and
+ <constant>V4L2_CID_TILT_SPEED</constant> camera controls.</para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section>
+ <title>V4L2 in Linux 3.19</title>
+ <orderedlist>
+ <listitem>
+ <para>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding;
+and &v4l2-quantization; fields to &v4l2-pix-format;, &v4l2-pix-format-mplane;
+and &v4l2-mbus-framefmt;.
+ </para>
+ </listitem>
+ </orderedlist>
+ </section>
+
+ <section id="other">
+ <title>Relation of V4L2 to other Linux multimedia APIs</title>
+
+ <section id="xvideo">
+ <title>X Video Extension</title>
+
+ <para>The X Video Extension (abbreviated XVideo or just Xv) is
+an extension of the X Window system, implemented for example by the
+XFree86 project. Its scope is similar to V4L2, an API to video capture
+and output devices for X clients. Xv allows applications to display
+live video in a window, send window contents to a TV output, and
+capture or output still images in XPixmaps<footnote>
+ <para>This is not implemented in XFree86.</para>
+ </footnote>. With their implementation XFree86 makes the
+extension available across many operating systems and
+architectures.</para>
+
+ <para>Because the driver is embedded into the X server Xv has a
+number of advantages over the V4L2 <link linkend="overlay">video
+overlay interface</link>. The driver can easily determine the overlay
+target, &ie; visible graphics memory or off-screen buffers for a
+destructive overlay. It can program the RAMDAC for a non-destructive
+overlay, scaling or color-keying, or the clipping functions of the
+video capture hardware, always in sync with drawing operations or
+windows moving or changing their stacking order.</para>
+
+ <para>To combine the advantages of Xv and V4L a special Xv
+driver exists in XFree86 and XOrg, just programming any overlay capable
+Video4Linux device it finds. To enable it
+<filename>/etc/X11/XF86Config</filename> must contain these lines:</para>
+ <para><screen>
+Section "Module"
+ Load "v4l"
+EndSection</screen></para>
+
+ <para>As of XFree86 4.2 this driver still supports only V4L
+ioctls, however it should work just fine with all V4L2 devices through
+the V4L2 backward-compatibility layer. Since V4L2 permits multiple
+opens it is possible (if supported by the V4L2 driver) to capture
+video while an X client requested video overlay. Restrictions of
+simultaneous capturing and overlay are discussed in <xref
+ linkend="overlay" /> apply.</para>
+
+ <para>Only marginally related to V4L2, XFree86 extended Xv to
+support hardware YUV to RGB conversion and scaling for faster video
+playback, and added an interface to MPEG-2 decoding hardware. This API
+is useful to display images captured with V4L2 devices.</para>
+ </section>
+
+ <section>
+ <title>Digital Video</title>
+
+ <para>V4L2 does not support digital terrestrial, cable or
+satellite broadcast. A separate project aiming at digital receivers
+exists. You can find its homepage at <ulink
+url="http://linuxtv.org">http://linuxtv.org</ulink>. The Linux DVB API
+has no connection to the V4L2 API except that drivers for hybrid
+hardware may support both.</para>
+ </section>
+
+ <section>
+ <title>Audio Interfaces</title>
+
+ <para>[to do - OSS/ALSA]</para>
+ </section>
+ </section>
+
+ <section id="experimental">
+ <title>Experimental API Elements</title>
+
+ <para>The following V4L2 API elements are currently experimental
+and may change in the future.</para>
+
+ <itemizedlist>
+ <listitem>
+ <para>Video Output Overlay (OSD) Interface, <xref
+ linkend="osd" />.</para>
+ </listitem>
+ <listitem>
+ <para>&VIDIOC-DBG-G-REGISTER; and &VIDIOC-DBG-S-REGISTER;
+ioctls.</para>
+ </listitem>
+ <listitem>
+ <para>&VIDIOC-DBG-G-CHIP-INFO; ioctl.</para>
+ </listitem>
+ <listitem>
+ <para>&VIDIOC-ENUM-DV-TIMINGS;, &VIDIOC-QUERY-DV-TIMINGS; and
+ &VIDIOC-DV-TIMINGS-CAP; ioctls.</para>
+ </listitem>
+ <listitem>
+ <para>Flash API. <xref linkend="flash-controls" /></para>
+ </listitem>
+ <listitem>
+ <para>&VIDIOC-CREATE-BUFS; and &VIDIOC-PREPARE-BUF; ioctls.</para>
+ </listitem>
+ <listitem>
+ <para>Selection API. <xref linkend="selection-api" /></para>
+ </listitem>
+ <listitem>
+ <para>Sub-device selection API: &VIDIOC-SUBDEV-G-SELECTION;
+ and &VIDIOC-SUBDEV-S-SELECTION; ioctls.</para>
+ </listitem>
+ <listitem>
+ <para>Support for frequency band enumeration: &VIDIOC-ENUM-FREQ-BANDS; ioctl.</para>
+ </listitem>
+ <listitem>
+ <para>Vendor and device specific media bus pixel formats.
+ <xref linkend="v4l2-mbus-vendor-spec-fmts" />.</para>
+ </listitem>
+ <listitem>
+ <para>Importing DMABUF file descriptors as a new IO method described
+ in <xref linkend="dmabuf" />.</para>
+ </listitem>
+ <listitem>
+ <para>Exporting DMABUF files using &VIDIOC-EXPBUF; ioctl.</para>
+ </listitem>
+ <listitem>
+ <para>Software Defined Radio (SDR) Interface, <xref linkend="sdr" />.</para>
+ </listitem>
+ </itemizedlist>
+ </section>
+
+ <section id="obsolete">
+ <title>Obsolete API Elements</title>
+
+ <para>The following V4L2 API elements were superseded by new
+interfaces and should not be implemented in new drivers.</para>
+
+ <itemizedlist>
+ <listitem>
+ <para><constant>VIDIOC_G_MPEGCOMP</constant> and
+<constant>VIDIOC_S_MPEGCOMP</constant> ioctls. Use Extended Controls,
+<xref linkend="extended-controls" />.</para>
+ </listitem>
+ <listitem>
+ <para>VIDIOC_G_DV_PRESET, VIDIOC_S_DV_PRESET, VIDIOC_ENUM_DV_PRESETS and
+ VIDIOC_QUERY_DV_PRESET ioctls. Use the DV Timings API (<xref linkend="dv-timings" />).</para>
+ </listitem>
+ <listitem>
+ <para><constant>VIDIOC_SUBDEV_G_CROP</constant> and
+ <constant>VIDIOC_SUBDEV_S_CROP</constant> ioctls. Use
+ <constant>VIDIOC_SUBDEV_G_SELECTION</constant> and
+ <constant>VIDIOC_SUBDEV_S_SELECTION</constant>, <xref
+ linkend="vidioc-subdev-g-selection" />.</para>
+ </listitem>
+ </itemizedlist>
+ </section>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/controls.xml b/Documentation/DocBook/media/v4l/controls.xml
new file mode 100644
index 000000000..4e9462f1a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/controls.xml
@@ -0,0 +1,5464 @@
+ <section id="control">
+ <title>User Controls</title>
+
+ <para>Devices typically have a number of user-settable controls
+such as brightness, saturation and so on, which would be presented to
+the user on a graphical user interface. But, different devices
+will have different controls available, and furthermore, the range of
+possible values, and the default value will vary from device to
+device. The control ioctls provide the information and a mechanism to
+create a nice user interface for these controls that will work
+correctly with any device.</para>
+
+ <para>All controls are accessed using an ID value. V4L2 defines
+several IDs for specific purposes. Drivers can also implement their
+own custom controls using <constant>V4L2_CID_PRIVATE_BASE</constant>
+<footnote><para>The use of <constant>V4L2_CID_PRIVATE_BASE</constant>
+is problematic because different drivers may use the same
+<constant>V4L2_CID_PRIVATE_BASE</constant> ID for different controls.
+This makes it hard to programatically set such controls since the meaning
+of the control with that ID is driver dependent. In order to resolve this
+drivers use unique IDs and the <constant>V4L2_CID_PRIVATE_BASE</constant>
+IDs are mapped to those unique IDs by the kernel. Consider these
+<constant>V4L2_CID_PRIVATE_BASE</constant> IDs as aliases to the real
+IDs.</para>
+<para>Many applications today still use the <constant>V4L2_CID_PRIVATE_BASE</constant>
+IDs instead of using &VIDIOC-QUERYCTRL; with the <constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant>
+flag to enumerate all IDs, so support for <constant>V4L2_CID_PRIVATE_BASE</constant>
+is still around.</para></footnote>
+and higher values. The pre-defined control IDs have the prefix
+<constant>V4L2_CID_</constant>, and are listed in <xref
+linkend="control-id" />. The ID is used when querying the attributes of
+a control, and when getting or setting the current value.</para>
+
+ <para>Generally applications should present controls to the user
+without assumptions about their purpose. Each control comes with a
+name string the user is supposed to understand. When the purpose is
+non-intuitive the driver writer should provide a user manual, a user
+interface plug-in or a driver specific panel application. Predefined
+IDs were introduced to change a few controls programmatically, for
+example to mute a device during a channel switch.</para>
+
+ <para>Drivers may enumerate different controls after switching
+the current video input or output, tuner or modulator, or audio input
+or output. Different in the sense of other bounds, another default and
+current value, step size or other menu items. A control with a certain
+<emphasis>custom</emphasis> ID can also change name and
+type.</para>
+
+ <para>If a control is not applicable to the current configuration
+of the device (for example, it doesn't apply to the current video input)
+drivers set the <constant>V4L2_CTRL_FLAG_INACTIVE</constant> flag.</para>
+
+ <para>Control values are stored globally, they do not
+change when switching except to stay within the reported bounds. They
+also do not change &eg; when the device is opened or closed, when the
+tuner radio frequency is changed or generally never without
+application request.</para>
+
+ <para>V4L2 specifies an event mechanism to notify applications
+when controls change value (see &VIDIOC-SUBSCRIBE-EVENT;, event
+<constant>V4L2_EVENT_CTRL</constant>), panel applications might want to make
+use of that in order to always reflect the correct control value.</para>
+
+ <para>
+ All controls use machine endianness.
+ </para>
+
+ <table pgwide="1" frame="none" id="control-id">
+ <title>Control IDs</title>
+ <tgroup cols="3">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>ID</entry>
+ <entry>Type</entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CID_BASE</constant></entry>
+ <entry></entry>
+ <entry>First predefined ID, equal to
+<constant>V4L2_CID_BRIGHTNESS</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_USER_BASE</constant></entry>
+ <entry></entry>
+ <entry>Synonym of <constant>V4L2_CID_BASE</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_BRIGHTNESS</constant></entry>
+ <entry>integer</entry>
+ <entry>Picture brightness, or more precisely, the black
+level.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_CONTRAST</constant></entry>
+ <entry>integer</entry>
+ <entry>Picture contrast or luma gain.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_SATURATION</constant></entry>
+ <entry>integer</entry>
+ <entry>Picture color saturation or chroma gain.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_HUE</constant></entry>
+ <entry>integer</entry>
+ <entry>Hue or color balance.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUDIO_VOLUME</constant></entry>
+ <entry>integer</entry>
+ <entry>Overall audio volume. Note some drivers also
+provide an OSS or ALSA mixer interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUDIO_BALANCE</constant></entry>
+ <entry>integer</entry>
+ <entry>Audio stereo balance. Minimum corresponds to all
+the way left, maximum to right.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUDIO_BASS</constant></entry>
+ <entry>integer</entry>
+ <entry>Audio bass adjustment.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUDIO_TREBLE</constant></entry>
+ <entry>integer</entry>
+ <entry>Audio treble adjustment.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUDIO_MUTE</constant></entry>
+ <entry>boolean</entry>
+ <entry>Mute audio, &ie; set the volume to zero, however
+without affecting <constant>V4L2_CID_AUDIO_VOLUME</constant>. Like
+ALSA drivers, V4L2 drivers must mute at load time to avoid excessive
+noise. Actually the entire device should be reset to a low power
+consumption state.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUDIO_LOUDNESS</constant></entry>
+ <entry>boolean</entry>
+ <entry>Loudness mode (bass boost).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_BLACK_LEVEL</constant></entry>
+ <entry>integer</entry>
+ <entry>Another name for brightness (not a synonym of
+<constant>V4L2_CID_BRIGHTNESS</constant>). This control is deprecated
+and should not be used in new drivers and applications.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUTO_WHITE_BALANCE</constant></entry>
+ <entry>boolean</entry>
+ <entry>Automatic white balance (cameras).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_DO_WHITE_BALANCE</constant></entry>
+ <entry>button</entry>
+ <entry>This is an action control. When set (the value is
+ignored), the device will do a white balance and then hold the current
+setting. Contrast this with the boolean
+<constant>V4L2_CID_AUTO_WHITE_BALANCE</constant>, which, when
+activated, keeps adjusting the white balance.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_RED_BALANCE</constant></entry>
+ <entry>integer</entry>
+ <entry>Red chroma balance.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_BLUE_BALANCE</constant></entry>
+ <entry>integer</entry>
+ <entry>Blue chroma balance.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_GAMMA</constant></entry>
+ <entry>integer</entry>
+ <entry>Gamma adjust.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_WHITENESS</constant></entry>
+ <entry>integer</entry>
+ <entry>Whiteness for grey-scale devices. This is a synonym
+for <constant>V4L2_CID_GAMMA</constant>. This control is deprecated
+and should not be used in new drivers and applications.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_EXPOSURE</constant></entry>
+ <entry>integer</entry>
+ <entry>Exposure (cameras). [Unit?]</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUTOGAIN</constant></entry>
+ <entry>boolean</entry>
+ <entry>Automatic gain/exposure control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_GAIN</constant></entry>
+ <entry>integer</entry>
+ <entry>Gain control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_HFLIP</constant></entry>
+ <entry>boolean</entry>
+ <entry>Mirror the picture horizontally.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_VFLIP</constant></entry>
+ <entry>boolean</entry>
+ <entry>Mirror the picture vertically.</entry>
+ </row>
+ <row id="v4l2-power-line-frequency">
+ <entry><constant>V4L2_CID_POWER_LINE_FREQUENCY</constant></entry>
+ <entry>enum</entry>
+ <entry>Enables a power line frequency filter to avoid
+flicker. Possible values for <constant>enum v4l2_power_line_frequency</constant> are:
+<constant>V4L2_CID_POWER_LINE_FREQUENCY_DISABLED</constant> (0),
+<constant>V4L2_CID_POWER_LINE_FREQUENCY_50HZ</constant> (1),
+<constant>V4L2_CID_POWER_LINE_FREQUENCY_60HZ</constant> (2) and
+<constant>V4L2_CID_POWER_LINE_FREQUENCY_AUTO</constant> (3).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_HUE_AUTO</constant></entry>
+ <entry>boolean</entry>
+ <entry>Enables automatic hue control by the device. The
+effect of setting <constant>V4L2_CID_HUE</constant> while automatic
+hue control is enabled is undefined, drivers should ignore such
+request.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_WHITE_BALANCE_TEMPERATURE</constant></entry>
+ <entry>integer</entry>
+ <entry>This control specifies the white balance settings
+as a color temperature in Kelvin. A driver should have a minimum of
+2800 (incandescent) to 6500 (daylight). For more information about
+color temperature see <ulink
+url="http://en.wikipedia.org/wiki/Color_temperature">Wikipedia</ulink>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_SHARPNESS</constant></entry>
+ <entry>integer</entry>
+ <entry>Adjusts the sharpness filters in a camera. The
+minimum value disables the filters, higher values give a sharper
+picture.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_BACKLIGHT_COMPENSATION</constant></entry>
+ <entry>integer</entry>
+ <entry>Adjusts the backlight compensation in a camera. The
+minimum value disables backlight compensation.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_CHROMA_AGC</constant></entry>
+ <entry>boolean</entry>
+ <entry>Chroma automatic gain control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_CHROMA_GAIN</constant></entry>
+ <entry>integer</entry>
+ <entry>Adjusts the Chroma gain control (for use when chroma AGC
+ is disabled).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_COLOR_KILLER</constant></entry>
+ <entry>boolean</entry>
+ <entry>Enable the color killer (&ie; force a black &amp; white image in case of a weak video signal).</entry>
+ </row>
+ <row id="v4l2-colorfx">
+ <entry><constant>V4L2_CID_COLORFX</constant></entry>
+ <entry>enum</entry>
+ <entry>Selects a color effect. The following values are defined:
+ </entry>
+ </row><row>
+ <entry></entry>
+ <entry></entry>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_COLORFX_NONE</constant>&nbsp;</entry>
+ <entry>Color effect is disabled.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_ANTIQUE</constant>&nbsp;</entry>
+ <entry>An aging (old photo) effect.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_ART_FREEZE</constant>&nbsp;</entry>
+ <entry>Frost color effect.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_AQUA</constant>&nbsp;</entry>
+ <entry>Water color, cool tone.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_BW</constant>&nbsp;</entry>
+ <entry>Black and white.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_EMBOSS</constant>&nbsp;</entry>
+ <entry>Emboss, the highlights and shadows replace light/dark boundaries
+ and low contrast areas are set to a gray background.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_GRASS_GREEN</constant>&nbsp;</entry>
+ <entry>Grass green.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_NEGATIVE</constant>&nbsp;</entry>
+ <entry>Negative.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SEPIA</constant>&nbsp;</entry>
+ <entry>Sepia tone.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SKETCH</constant>&nbsp;</entry>
+ <entry>Sketch.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SKIN_WHITEN</constant>&nbsp;</entry>
+ <entry>Skin whiten.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SKY_BLUE</constant>&nbsp;</entry>
+ <entry>Sky blue.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SOLARIZATION</constant>&nbsp;</entry>
+ <entry>Solarization, the image is partially reversed in tone,
+ only color values above or below a certain threshold are inverted.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SILHOUETTE</constant>&nbsp;</entry>
+ <entry>Silhouette (outline).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_VIVID</constant>&nbsp;</entry>
+ <entry>Vivid colors.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORFX_SET_CBCR</constant>&nbsp;</entry>
+ <entry>The Cb and Cr chroma components are replaced by fixed
+ coefficients determined by <constant>V4L2_CID_COLORFX_CBCR</constant>
+ control.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_COLORFX_CBCR</constant></entry>
+ <entry>integer</entry>
+ <entry>Determines the Cb and Cr coefficients for <constant>V4L2_COLORFX_SET_CBCR</constant>
+ color effect. Bits [7:0] of the supplied 32 bit value are interpreted as
+ Cr component, bits [15:8] as Cb component and bits [31:16] must be zero.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_AUTOBRIGHTNESS</constant></entry>
+ <entry>boolean</entry>
+ <entry>Enable Automatic Brightness.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_ROTATE</constant></entry>
+ <entry>integer</entry>
+ <entry>Rotates the image by specified angle. Common angles are 90,
+ 270 and 180. Rotating the image to 90 and 270 will reverse the height
+ and width of the display window. It is necessary to set the new height and
+ width of the picture using the &VIDIOC-S-FMT; ioctl according to
+ the rotation angle selected.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_BG_COLOR</constant></entry>
+ <entry>integer</entry>
+ <entry>Sets the background color on the current output device.
+ Background color needs to be specified in the RGB24 format. The
+ supplied 32 bit value is interpreted as bits 0-7 Red color information,
+ bits 8-15 Green color information, bits 16-23 Blue color
+ information and bits 24-31 must be zero.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_ILLUMINATORS_1</constant>
+ <constant>V4L2_CID_ILLUMINATORS_2</constant></entry>
+ <entry>boolean</entry>
+ <entry>Switch on or off the illuminator 1 or 2 of the device
+ (usually a microscope).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MIN_BUFFERS_FOR_CAPTURE</constant></entry>
+ <entry>integer</entry>
+ <entry>This is a read-only control that can be read by the application
+and used as a hint to determine the number of CAPTURE buffers to pass to REQBUFS.
+The value is the minimum number of CAPTURE buffers that is necessary for hardware
+to work.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MIN_BUFFERS_FOR_OUTPUT</constant></entry>
+ <entry>integer</entry>
+ <entry>This is a read-only control that can be read by the application
+and used as a hint to determine the number of OUTPUT buffers to pass to REQBUFS.
+The value is the minimum number of OUTPUT buffers that is necessary for hardware
+to work.</entry>
+ </row>
+ <row id="v4l2-alpha-component">
+ <entry><constant>V4L2_CID_ALPHA_COMPONENT</constant></entry>
+ <entry>integer</entry>
+ <entry>Sets the alpha color component. When a capture device (or
+ capture queue of a mem-to-mem device) produces a frame format that
+ includes an alpha component
+ (e.g. <link linkend="rgb-formats">packed RGB image formats</link>)
+ and the alpha value is not defined by the device or the mem-to-mem
+ input data this control lets you select the alpha component value of
+ all pixels. When an output device (or output queue of a mem-to-mem
+ device) consumes a frame format that doesn't include an alpha
+ component and the device supports alpha channel processing this
+ control lets you set the alpha component value of all pixels for
+ further processing in the device.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_LASTP1</constant></entry>
+ <entry></entry>
+ <entry>End of the predefined control IDs (currently
+ <constant>V4L2_CID_ALPHA_COMPONENT</constant> + 1).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_PRIVATE_BASE</constant></entry>
+ <entry></entry>
+ <entry>ID of the first custom (driver specific) control.
+Applications depending on particular custom controls should check the
+driver name and version, see <xref linkend="querycap" />.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>Applications can enumerate the available controls with the
+&VIDIOC-QUERYCTRL; and &VIDIOC-QUERYMENU; ioctls, get and set a
+control value with the &VIDIOC-G-CTRL; and &VIDIOC-S-CTRL; ioctls.
+Drivers must implement <constant>VIDIOC_QUERYCTRL</constant>,
+<constant>VIDIOC_G_CTRL</constant> and
+<constant>VIDIOC_S_CTRL</constant> when the device has one or more
+controls, <constant>VIDIOC_QUERYMENU</constant> when it has one or
+more menu type controls.</para>
+
+ <example id="enum_all_controls">
+ <title>Enumerating all user controls</title>
+
+ <programlisting>
+&v4l2-queryctrl; queryctrl;
+&v4l2-querymenu; querymenu;
+
+static void enumerate_menu(void)
+{
+ printf(" Menu items:\n");
+
+ memset(&amp;querymenu, 0, sizeof(querymenu));
+ querymenu.id = queryctrl.id;
+
+ for (querymenu.index = queryctrl.minimum;
+ querymenu.index &lt;= queryctrl.maximum;
+ querymenu.index++) {
+ if (0 == ioctl(fd, &VIDIOC-QUERYMENU;, &amp;querymenu)) {
+ printf(" %s\n", querymenu.name);
+ }
+ }
+}
+
+memset(&amp;queryctrl, 0, sizeof(queryctrl));
+
+for (queryctrl.id = V4L2_CID_BASE;
+ queryctrl.id &lt; V4L2_CID_LASTP1;
+ queryctrl.id++) {
+ if (0 == ioctl(fd, &VIDIOC-QUERYCTRL;, &amp;queryctrl)) {
+ if (queryctrl.flags &amp; V4L2_CTRL_FLAG_DISABLED)
+ continue;
+
+ printf("Control %s\n", queryctrl.name);
+
+ if (queryctrl.type == V4L2_CTRL_TYPE_MENU)
+ enumerate_menu();
+ } else {
+ if (errno == EINVAL)
+ continue;
+
+ perror("VIDIOC_QUERYCTRL");
+ exit(EXIT_FAILURE);
+ }
+}
+
+for (queryctrl.id = V4L2_CID_PRIVATE_BASE;;
+ queryctrl.id++) {
+ if (0 == ioctl(fd, &VIDIOC-QUERYCTRL;, &amp;queryctrl)) {
+ if (queryctrl.flags &amp; V4L2_CTRL_FLAG_DISABLED)
+ continue;
+
+ printf("Control %s\n", queryctrl.name);
+
+ if (queryctrl.type == V4L2_CTRL_TYPE_MENU)
+ enumerate_menu();
+ } else {
+ if (errno == EINVAL)
+ break;
+
+ perror("VIDIOC_QUERYCTRL");
+ exit(EXIT_FAILURE);
+ }
+}
+</programlisting>
+ </example>
+
+ <example>
+ <title>Enumerating all user controls (alternative)</title>
+ <programlisting>
+memset(&amp;queryctrl, 0, sizeof(queryctrl));
+
+queryctrl.id = V4L2_CTRL_CLASS_USER | V4L2_CTRL_FLAG_NEXT_CTRL;
+while (0 == ioctl(fd, &VIDIOC-QUERYCTRL;, &amp;queryctrl)) {
+ if (V4L2_CTRL_ID2CLASS(queryctrl.id) != V4L2_CTRL_CLASS_USER)
+ break;
+ if (queryctrl.flags &amp; V4L2_CTRL_FLAG_DISABLED)
+ continue;
+
+ printf("Control %s\n", queryctrl.name);
+
+ if (queryctrl.type == V4L2_CTRL_TYPE_MENU)
+ enumerate_menu();
+
+ queryctrl.id |= V4L2_CTRL_FLAG_NEXT_CTRL;
+}
+if (errno != EINVAL) {
+ perror("VIDIOC_QUERYCTRL");
+ exit(EXIT_FAILURE);
+}
+</programlisting>
+ </example>
+
+ <example>
+ <title>Changing controls</title>
+
+ <programlisting>
+&v4l2-queryctrl; queryctrl;
+&v4l2-control; control;
+
+memset(&amp;queryctrl, 0, sizeof(queryctrl));
+queryctrl.id = V4L2_CID_BRIGHTNESS;
+
+if (-1 == ioctl(fd, &VIDIOC-QUERYCTRL;, &amp;queryctrl)) {
+ if (errno != EINVAL) {
+ perror("VIDIOC_QUERYCTRL");
+ exit(EXIT_FAILURE);
+ } else {
+ printf("V4L2_CID_BRIGHTNESS is not supported\n");
+ }
+} else if (queryctrl.flags &amp; V4L2_CTRL_FLAG_DISABLED) {
+ printf("V4L2_CID_BRIGHTNESS is not supported\n");
+} else {
+ memset(&amp;control, 0, sizeof (control));
+ control.id = V4L2_CID_BRIGHTNESS;
+ control.value = queryctrl.default_value;
+
+ if (-1 == ioctl(fd, &VIDIOC-S-CTRL;, &amp;control)) {
+ perror("VIDIOC_S_CTRL");
+ exit(EXIT_FAILURE);
+ }
+}
+
+memset(&amp;control, 0, sizeof(control));
+control.id = V4L2_CID_CONTRAST;
+
+if (0 == ioctl(fd, &VIDIOC-G-CTRL;, &amp;control)) {
+ control.value += 1;
+
+ /* The driver may clamp the value or return ERANGE, ignored here */
+
+ if (-1 == ioctl(fd, &VIDIOC-S-CTRL;, &amp;control)
+ &amp;&amp; errno != ERANGE) {
+ perror("VIDIOC_S_CTRL");
+ exit(EXIT_FAILURE);
+ }
+/* Ignore if V4L2_CID_CONTRAST is unsupported */
+} else if (errno != EINVAL) {
+ perror("VIDIOC_G_CTRL");
+ exit(EXIT_FAILURE);
+}
+
+control.id = V4L2_CID_AUDIO_MUTE;
+control.value = 1; /* silence */
+
+/* Errors ignored */
+ioctl(fd, VIDIOC_S_CTRL, &amp;control);
+</programlisting>
+ </example>
+ </section>
+
+ <section id="extended-controls">
+ <title>Extended Controls</title>
+
+ <section>
+ <title>Introduction</title>
+
+ <para>The control mechanism as originally designed was meant
+to be used for user settings (brightness, saturation, etc). However,
+it turned out to be a very useful model for implementing more
+complicated driver APIs where each driver implements only a subset of
+a larger API.</para>
+
+ <para>The MPEG encoding API was the driving force behind
+designing and implementing this extended control mechanism: the MPEG
+standard is quite large and the currently supported hardware MPEG
+encoders each only implement a subset of this standard. Further more,
+many parameters relating to how the video is encoded into an MPEG
+stream are specific to the MPEG encoding chip since the MPEG standard
+only defines the format of the resulting MPEG stream, not how the
+video is actually encoded into that format.</para>
+
+ <para>Unfortunately, the original control API lacked some
+features needed for these new uses and so it was extended into the
+(not terribly originally named) extended control API.</para>
+
+ <para>Even though the MPEG encoding API was the first effort
+to use the Extended Control API, nowadays there are also other classes
+of Extended Controls, such as Camera Controls and FM Transmitter Controls.
+The Extended Controls API as well as all Extended Controls classes are
+described in the following text.</para>
+ </section>
+
+ <section>
+ <title>The Extended Control API</title>
+
+ <para>Three new ioctls are available: &VIDIOC-G-EXT-CTRLS;,
+&VIDIOC-S-EXT-CTRLS; and &VIDIOC-TRY-EXT-CTRLS;. These ioctls act on
+arrays of controls (as opposed to the &VIDIOC-G-CTRL; and
+&VIDIOC-S-CTRL; ioctls that act on a single control). This is needed
+since it is often required to atomically change several controls at
+once.</para>
+
+ <para>Each of the new ioctls expects a pointer to a
+&v4l2-ext-controls;. This structure contains a pointer to the control
+array, a count of the number of controls in that array and a control
+class. Control classes are used to group similar controls into a
+single class. For example, control class
+<constant>V4L2_CTRL_CLASS_USER</constant> contains all user controls
+(&ie; all controls that can also be set using the old
+<constant>VIDIOC_S_CTRL</constant> ioctl). Control class
+<constant>V4L2_CTRL_CLASS_MPEG</constant> contains all controls
+relating to MPEG encoding, etc.</para>
+
+ <para>All controls in the control array must belong to the
+specified control class. An error is returned if this is not the
+case.</para>
+
+ <para>It is also possible to use an empty control array (count
+== 0) to check whether the specified control class is
+supported.</para>
+
+ <para>The control array is a &v4l2-ext-control; array. The
+<structname>v4l2_ext_control</structname> structure is very similar to
+&v4l2-control;, except for the fact that it also allows for 64-bit
+values and pointers to be passed.</para>
+
+ <para>Since the &v4l2-ext-control; supports pointers it is now
+also possible to have controls with compound types such as N-dimensional arrays
+and/or structures. You need to specify the <constant>V4L2_CTRL_FLAG_NEXT_COMPOUND</constant>
+when enumerating controls to actually be able to see such compound controls.
+In other words, these controls with compound types should only be used
+programmatically.</para>
+
+ <para>Since such compound controls need to expose more information
+about themselves than is possible with &VIDIOC-QUERYCTRL; the
+&VIDIOC-QUERY-EXT-CTRL; ioctl was added. In particular, this ioctl gives
+the dimensions of the N-dimensional array if this control consists of more than
+one element.</para>
+
+ <para>It is important to realize that due to the flexibility of
+controls it is necessary to check whether the control you want to set
+actually is supported in the driver and what the valid range of values
+is. So use the &VIDIOC-QUERYCTRL; (or &VIDIOC-QUERY-EXT-CTRL;) and
+&VIDIOC-QUERYMENU; ioctls to check this. Also note that it is possible
+that some of the menu indices in a control of type
+<constant>V4L2_CTRL_TYPE_MENU</constant> may not be supported
+(<constant>VIDIOC_QUERYMENU</constant> will return an error). A good
+example is the list of supported MPEG audio bitrates. Some drivers only
+support one or two bitrates, others support a wider range.</para>
+
+ <para>
+ All controls use machine endianness.
+ </para>
+ </section>
+
+ <section>
+ <title>Enumerating Extended Controls</title>
+
+ <para>The recommended way to enumerate over the extended
+controls is by using &VIDIOC-QUERYCTRL; in combination with the
+<constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant> flag:</para>
+
+ <informalexample>
+ <programlisting>
+&v4l2-queryctrl; qctrl;
+
+qctrl.id = V4L2_CTRL_FLAG_NEXT_CTRL;
+while (0 == ioctl (fd, &VIDIOC-QUERYCTRL;, &amp;qctrl)) {
+ /* ... */
+ qctrl.id |= V4L2_CTRL_FLAG_NEXT_CTRL;
+}
+</programlisting>
+ </informalexample>
+
+ <para>The initial control ID is set to 0 ORed with the
+<constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant> flag. The
+<constant>VIDIOC_QUERYCTRL</constant> ioctl will return the first
+control with a higher ID than the specified one. When no such controls
+are found an error is returned.</para>
+
+ <para>If you want to get all controls within a specific control
+class, then you can set the initial
+<structfield>qctrl.id</structfield> value to the control class and add
+an extra check to break out of the loop when a control of another
+control class is found:</para>
+
+ <informalexample>
+ <programlisting>
+qctrl.id = V4L2_CTRL_CLASS_MPEG | V4L2_CTRL_FLAG_NEXT_CTRL;
+while (0 == ioctl(fd, &VIDIOC-QUERYCTRL;, &amp;qctrl)) {
+ if (V4L2_CTRL_ID2CLASS(qctrl.id) != V4L2_CTRL_CLASS_MPEG)
+ break;
+ /* ... */
+ qctrl.id |= V4L2_CTRL_FLAG_NEXT_CTRL;
+}
+</programlisting>
+ </informalexample>
+
+ <para>The 32-bit <structfield>qctrl.id</structfield> value is
+subdivided into three bit ranges: the top 4 bits are reserved for
+flags (&eg; <constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant>) and are not
+actually part of the ID. The remaining 28 bits form the control ID, of
+which the most significant 12 bits define the control class and the
+least significant 16 bits identify the control within the control
+class. It is guaranteed that these last 16 bits are always non-zero
+for controls. The range of 0x1000 and up are reserved for
+driver-specific controls. The macro
+<constant>V4L2_CTRL_ID2CLASS(id)</constant> returns the control class
+ID based on a control ID.</para>
+
+ <para>If the driver does not support extended controls, then
+<constant>VIDIOC_QUERYCTRL</constant> will fail when used in
+combination with <constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant>. In
+that case the old method of enumerating control should be used (see
+<xref linkend="enum_all_controls" />). But if it is supported, then it is guaranteed to enumerate over
+all controls, including driver-private controls.</para>
+ </section>
+
+ <section>
+ <title>Creating Control Panels</title>
+
+ <para>It is possible to create control panels for a graphical
+user interface where the user can select the various controls.
+Basically you will have to iterate over all controls using the method
+described above. Each control class starts with a control of type
+<constant>V4L2_CTRL_TYPE_CTRL_CLASS</constant>.
+<constant>VIDIOC_QUERYCTRL</constant> will return the name of this
+control class which can be used as the title of a tab page within a
+control panel.</para>
+
+ <para>The flags field of &v4l2-queryctrl; also contains hints on
+the behavior of the control. See the &VIDIOC-QUERYCTRL; documentation
+for more details.</para>
+ </section>
+
+ <section id="mpeg-controls">
+ <title>Codec Control Reference</title>
+
+ <para>Below all controls within the Codec control class are
+described. First the generic controls, then controls specific for
+certain hardware.</para>
+
+ <para>Note: These controls are applicable to all codecs and
+not just MPEG. The defines are prefixed with V4L2_CID_MPEG/V4L2_MPEG
+as the controls were originally made for MPEG codecs and later
+extended to cover all encoding formats.</para>
+
+ <section>
+ <title>Generic Codec Controls</title>
+
+ <table pgwide="1" frame="none" id="mpeg-control-id">
+ <title>Codec Control IDs</title>
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The Codec class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class. This description can be used as the
+caption of a Tab page in a GUI, for example.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-stream-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_stream_type</entry>
+ </row><row><entry spanname="descr">The MPEG-1, -2 or -4
+output stream type. One cannot assume anything here. Each hardware
+MPEG encoder tends to support different subsets of the available MPEG
+stream types. This control is specific to multiplexed MPEG streams.
+The currently defined stream types are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_TYPE_MPEG2_PS</constant>&nbsp;</entry>
+ <entry>MPEG-2 program stream</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_TYPE_MPEG2_TS</constant>&nbsp;</entry>
+ <entry>MPEG-2 transport stream</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_TYPE_MPEG1_SS</constant>&nbsp;</entry>
+ <entry>MPEG-1 system stream</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_TYPE_MPEG2_DVD</constant>&nbsp;</entry>
+ <entry>MPEG-2 DVD-compatible stream</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_TYPE_MPEG1_VCD</constant>&nbsp;</entry>
+ <entry>MPEG-1 VCD-compatible stream</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_TYPE_MPEG2_SVCD</constant>&nbsp;</entry>
+ <entry>MPEG-2 SVCD-compatible stream</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_PID_PMT</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Program Map Table
+Packet ID for the MPEG transport stream (default 16)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_PID_AUDIO</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Audio Packet ID for
+the MPEG transport stream (default 256)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_PID_VIDEO</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Video Packet ID for
+the MPEG transport stream (default 260)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_PID_PCR</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Packet ID for the
+MPEG transport stream carrying PCR fields (default 259)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_PES_ID_AUDIO</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Audio ID for MPEG
+PES</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_PES_ID_VIDEO</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Video ID for MPEG
+PES</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-stream-vbi-fmt">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_STREAM_VBI_FMT</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_stream_vbi_fmt</entry>
+ </row><row><entry spanname="descr">Some cards can embed
+VBI data (&eg; Closed Caption, Teletext) into the MPEG stream. This
+control selects whether VBI data should be embedded, and if so, what
+embedding method should be used. The list of possible VBI formats
+depends on the driver. The currently defined VBI format types
+are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_VBI_FMT_NONE</constant>&nbsp;</entry>
+ <entry>No VBI in the MPEG stream</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_STREAM_VBI_FMT_IVTV</constant>&nbsp;</entry>
+ <entry>VBI in private packets, IVTV format (documented
+in the kernel sources in the file <filename>Documentation/video4linux/cx2341x/README.vbi</filename>)</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-sampling-freq">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_SAMPLING_FREQ</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_sampling_freq</entry>
+ </row><row><entry spanname="descr">MPEG Audio sampling
+frequency. Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_SAMPLING_FREQ_44100</constant>&nbsp;</entry>
+ <entry>44.1 kHz</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_SAMPLING_FREQ_48000</constant>&nbsp;</entry>
+ <entry>48 kHz</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_SAMPLING_FREQ_32000</constant>&nbsp;</entry>
+ <entry>32 kHz</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-encoding">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_ENCODING</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_encoding</entry>
+ </row><row><entry spanname="descr">MPEG Audio encoding.
+This control is specific to multiplexed MPEG streams.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_ENCODING_LAYER_1</constant>&nbsp;</entry>
+ <entry>MPEG-1/2 Layer I encoding</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_ENCODING_LAYER_2</constant>&nbsp;</entry>
+ <entry>MPEG-1/2 Layer II encoding</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_ENCODING_LAYER_3</constant>&nbsp;</entry>
+ <entry>MPEG-1/2 Layer III encoding</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_ENCODING_AAC</constant>&nbsp;</entry>
+ <entry>MPEG-2/4 AAC (Advanced Audio Coding)</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_ENCODING_AC3</constant>&nbsp;</entry>
+ <entry>AC-3 aka ATSC A/52 encoding</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-l1-bitrate">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_L1_BITRATE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_l1_bitrate</entry>
+ </row><row><entry spanname="descr">MPEG-1/2 Layer I bitrate.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_32K</constant>&nbsp;</entry>
+ <entry>32 kbit/s</entry></row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_64K</constant>&nbsp;</entry>
+ <entry>64 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_96K</constant>&nbsp;</entry>
+ <entry>96 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_128K</constant>&nbsp;</entry>
+ <entry>128 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_160K</constant>&nbsp;</entry>
+ <entry>160 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_192K</constant>&nbsp;</entry>
+ <entry>192 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_224K</constant>&nbsp;</entry>
+ <entry>224 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_256K</constant>&nbsp;</entry>
+ <entry>256 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_288K</constant>&nbsp;</entry>
+ <entry>288 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_320K</constant>&nbsp;</entry>
+ <entry>320 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_352K</constant>&nbsp;</entry>
+ <entry>352 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_384K</constant>&nbsp;</entry>
+ <entry>384 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_416K</constant>&nbsp;</entry>
+ <entry>416 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L1_BITRATE_448K</constant>&nbsp;</entry>
+ <entry>448 kbit/s</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-l2-bitrate">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_L2_BITRATE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_l2_bitrate</entry>
+ </row><row><entry spanname="descr">MPEG-1/2 Layer II bitrate.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_32K</constant>&nbsp;</entry>
+ <entry>32 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_48K</constant>&nbsp;</entry>
+ <entry>48 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_56K</constant>&nbsp;</entry>
+ <entry>56 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_64K</constant>&nbsp;</entry>
+ <entry>64 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_80K</constant>&nbsp;</entry>
+ <entry>80 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_96K</constant>&nbsp;</entry>
+ <entry>96 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_112K</constant>&nbsp;</entry>
+ <entry>112 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_128K</constant>&nbsp;</entry>
+ <entry>128 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_160K</constant>&nbsp;</entry>
+ <entry>160 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_192K</constant>&nbsp;</entry>
+ <entry>192 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_224K</constant>&nbsp;</entry>
+ <entry>224 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_256K</constant>&nbsp;</entry>
+ <entry>256 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_320K</constant>&nbsp;</entry>
+ <entry>320 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L2_BITRATE_384K</constant>&nbsp;</entry>
+ <entry>384 kbit/s</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-l3-bitrate">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_L3_BITRATE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_l3_bitrate</entry>
+ </row><row><entry spanname="descr">MPEG-1/2 Layer III bitrate.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_32K</constant>&nbsp;</entry>
+ <entry>32 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_40K</constant>&nbsp;</entry>
+ <entry>40 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_48K</constant>&nbsp;</entry>
+ <entry>48 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_56K</constant>&nbsp;</entry>
+ <entry>56 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_64K</constant>&nbsp;</entry>
+ <entry>64 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_80K</constant>&nbsp;</entry>
+ <entry>80 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_96K</constant>&nbsp;</entry>
+ <entry>96 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_112K</constant>&nbsp;</entry>
+ <entry>112 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_128K</constant>&nbsp;</entry>
+ <entry>128 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_160K</constant>&nbsp;</entry>
+ <entry>160 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_192K</constant>&nbsp;</entry>
+ <entry>192 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_224K</constant>&nbsp;</entry>
+ <entry>224 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_256K</constant>&nbsp;</entry>
+ <entry>256 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_L3_BITRATE_320K</constant>&nbsp;</entry>
+ <entry>320 kbit/s</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_AAC_BITRATE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">AAC bitrate in bits per second.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-ac3-bitrate">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_AC3_BITRATE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_ac3_bitrate</entry>
+ </row><row><entry spanname="descr">AC-3 bitrate.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_32K</constant>&nbsp;</entry>
+ <entry>32 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_40K</constant>&nbsp;</entry>
+ <entry>40 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_48K</constant>&nbsp;</entry>
+ <entry>48 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_56K</constant>&nbsp;</entry>
+ <entry>56 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_64K</constant>&nbsp;</entry>
+ <entry>64 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_80K</constant>&nbsp;</entry>
+ <entry>80 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_96K</constant>&nbsp;</entry>
+ <entry>96 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_112K</constant>&nbsp;</entry>
+ <entry>112 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_128K</constant>&nbsp;</entry>
+ <entry>128 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_160K</constant>&nbsp;</entry>
+ <entry>160 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_192K</constant>&nbsp;</entry>
+ <entry>192 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_224K</constant>&nbsp;</entry>
+ <entry>224 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_256K</constant>&nbsp;</entry>
+ <entry>256 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_320K</constant>&nbsp;</entry>
+ <entry>320 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_384K</constant>&nbsp;</entry>
+ <entry>384 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_448K</constant>&nbsp;</entry>
+ <entry>448 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_512K</constant>&nbsp;</entry>
+ <entry>512 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_576K</constant>&nbsp;</entry>
+ <entry>576 kbit/s</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_AC3_BITRATE_640K</constant>&nbsp;</entry>
+ <entry>640 kbit/s</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_mode</entry>
+ </row><row><entry spanname="descr">MPEG Audio mode.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_STEREO</constant>&nbsp;</entry>
+ <entry>Stereo</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_JOINT_STEREO</constant>&nbsp;</entry>
+ <entry>Joint Stereo</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_DUAL</constant>&nbsp;</entry>
+ <entry>Bilingual</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_MONO</constant>&nbsp;</entry>
+ <entry>Mono</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-mode-extension">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_MODE_EXTENSION</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_mode_extension</entry>
+ </row><row><entry spanname="descr">Joint Stereo
+audio mode extension. In Layer I and II they indicate which subbands
+are in intensity stereo. All other subbands are coded in stereo. Layer
+III is not (yet) supported. Possible values
+are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_4</constant>&nbsp;</entry>
+ <entry>Subbands 4-31 in intensity stereo</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_8</constant>&nbsp;</entry>
+ <entry>Subbands 8-31 in intensity stereo</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_12</constant>&nbsp;</entry>
+ <entry>Subbands 12-31 in intensity stereo</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_MODE_EXTENSION_BOUND_16</constant>&nbsp;</entry>
+ <entry>Subbands 16-31 in intensity stereo</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-emphasis">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_EMPHASIS</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_emphasis</entry>
+ </row><row><entry spanname="descr">Audio Emphasis.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_EMPHASIS_NONE</constant>&nbsp;</entry>
+ <entry>None</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_EMPHASIS_50_DIV_15_uS</constant>&nbsp;</entry>
+ <entry>50/15 microsecond emphasis</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_EMPHASIS_CCITT_J17</constant>&nbsp;</entry>
+ <entry>CCITT J.17</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-crc">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_CRC</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_crc</entry>
+ </row><row><entry spanname="descr">CRC method. Possible
+values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_CRC_NONE</constant>&nbsp;</entry>
+ <entry>None</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_CRC_CRC16</constant>&nbsp;</entry>
+ <entry>16 bit parity check</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_MUTE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Mutes the audio when
+capturing. This is not done by muting audio hardware, which can still
+produce a slight hiss, but in the encoder itself, guaranteeing a fixed
+and reproducible audio bitstream. 0 = unmuted, 1 = muted.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-dec-playback">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_DEC_PLAYBACK</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_dec_playback</entry>
+ </row><row><entry spanname="descr">Determines how monolingual audio should be played back.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_DEC_PLAYBACK_AUTO</constant>&nbsp;</entry>
+ <entry>Automatically determines the best playback mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_DEC_PLAYBACK_STEREO</constant>&nbsp;</entry>
+ <entry>Stereo playback.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_DEC_PLAYBACK_LEFT</constant>&nbsp;</entry>
+ <entry>Left channel playback.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_DEC_PLAYBACK_RIGHT</constant>&nbsp;</entry>
+ <entry>Right channel playback.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_DEC_PLAYBACK_MONO</constant>&nbsp;</entry>
+ <entry>Mono playback.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_AUDIO_DEC_PLAYBACK_SWAPPED_STEREO</constant>&nbsp;</entry>
+ <entry>Stereo playback with swapped left and right channels.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-audio-dec-multilingual-playback">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_AUDIO_DEC_MULTILINGUAL_PLAYBACK</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_audio_dec_playback</entry>
+ </row><row><entry spanname="descr">Determines how multilingual audio should be played back.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-encoding">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_ENCODING</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_encoding</entry>
+ </row><row><entry spanname="descr">MPEG Video encoding
+method. This control is specific to multiplexed MPEG streams.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ENCODING_MPEG_1</constant>&nbsp;</entry>
+ <entry>MPEG-1 Video encoding</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ENCODING_MPEG_2</constant>&nbsp;</entry>
+ <entry>MPEG-2 Video encoding</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ENCODING_MPEG_4_AVC</constant>&nbsp;</entry>
+ <entry>MPEG-4 AVC (H.264) Video encoding</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-aspect">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_ASPECT</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_aspect</entry>
+ </row><row><entry spanname="descr">Video aspect.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ASPECT_1x1</constant>&nbsp;</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ASPECT_4x3</constant>&nbsp;</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ASPECT_16x9</constant>&nbsp;</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_ASPECT_221x100</constant>&nbsp;</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_B_FRAMES</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Number of B-Frames
+(default 2)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_GOP_SIZE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">GOP size (default
+12)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_GOP_CLOSURE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">GOP closure (default
+1)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_PULLDOWN</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Enable 3:2 pulldown
+(default 0)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-bitrate-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_BITRATE_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_bitrate_mode</entry>
+ </row><row><entry spanname="descr">Video bitrate mode.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_BITRATE_MODE_VBR</constant>&nbsp;</entry>
+ <entry>Variable bitrate</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_BITRATE_MODE_CBR</constant>&nbsp;</entry>
+ <entry>Constant bitrate</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_BITRATE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Video bitrate in bits
+per second.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_BITRATE_PEAK</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Peak video bitrate in
+bits per second. Must be larger or equal to the average video bitrate.
+It is ignored if the video bitrate mode is set to constant
+bitrate.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_TEMPORAL_DECIMATION</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">For every captured
+frame, skip this many subsequent frames (default 0).</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MUTE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">"Mutes" the video to a
+fixed color when capturing. This is useful for testing, to produce a
+fixed video bitstream. 0 = unmuted, 1 = muted.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MUTE_YUV</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Sets the "mute" color
+of the video. The supplied 32-bit integer is interpreted as follows (bit
+0 = least significant bit):</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry>Bit 0:7</entry>
+ <entry>V chrominance information</entry>
+ </row>
+ <row>
+ <entry>Bit 8:15</entry>
+ <entry>U chrominance information</entry>
+ </row>
+ <row>
+ <entry>Bit 16:23</entry>
+ <entry>Y luminance information</entry>
+ </row>
+ <row>
+ <entry>Bit 24:31</entry>
+ <entry>Must be zero.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-dec-pts">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_DEC_PTS</constant>&nbsp;</entry>
+ <entry>integer64</entry>
+ </row><row><entry spanname="descr">This read-only control returns the
+33-bit video Presentation Time Stamp as defined in ITU T-REC-H.222.0 and ISO/IEC 13818-1 of
+the currently displayed frame. This is the same PTS as is used in &VIDIOC-DECODER-CMD;.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-dec-frame">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_DEC_FRAME</constant>&nbsp;</entry>
+ <entry>integer64</entry>
+ </row><row><entry spanname="descr">This read-only control returns the
+frame counter of the frame that is currently displayed (decoded). This value is reset to 0 whenever
+the decoder is started.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_DECODER_SLICE_INTERFACE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If enabled the decoder expects to receive a single slice per buffer, otherwise
+the decoder expects a single frame in per buffer. Applicable to the decoder, all codecs.
+</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_ENABLE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enable writing sample aspect ratio in the Video Usability Information.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-vui-sar-idc">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_VUI_SAR_IDC</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_vui_sar_idc</entry>
+ </row>
+ <row><entry spanname="descr">VUI sample aspect ratio indicator for H.264 encoding. The value
+is defined in the table E-1 in the standard. Applicable to the H264 encoder.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_UNSPECIFIED</constant>&nbsp;</entry>
+ <entry>Unspecified</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_1x1</constant>&nbsp;</entry>
+ <entry>1x1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_12x11</constant>&nbsp;</entry>
+ <entry>12x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_10x11</constant>&nbsp;</entry>
+ <entry>10x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_16x11</constant>&nbsp;</entry>
+ <entry>16x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_40x33</constant>&nbsp;</entry>
+ <entry>40x33</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_24x11</constant>&nbsp;</entry>
+ <entry>24x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_20x11</constant>&nbsp;</entry>
+ <entry>20x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_32x11</constant>&nbsp;</entry>
+ <entry>32x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_80x33</constant>&nbsp;</entry>
+ <entry>80x33</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_18x11</constant>&nbsp;</entry>
+ <entry>18x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_15x11</constant>&nbsp;</entry>
+ <entry>15x11</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_64x33</constant>&nbsp;</entry>
+ <entry>64x33</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_160x99</constant>&nbsp;</entry>
+ <entry>160x99</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_4x3</constant>&nbsp;</entry>
+ <entry>4x3</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_3x2</constant>&nbsp;</entry>
+ <entry>3x2</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_2x1</constant>&nbsp;</entry>
+ <entry>2x1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_VUI_SAR_IDC_EXTENDED</constant>&nbsp;</entry>
+ <entry>Extended SAR</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_WIDTH</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Extended sample aspect ratio width for H.264 VUI encoding.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_VUI_EXT_SAR_HEIGHT</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Extended sample aspect ratio height for H.264 VUI encoding.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-level">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_LEVEL</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_level</entry>
+ </row>
+ <row><entry spanname="descr">The level information for the H264 video elementary stream.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_1_0</constant>&nbsp;</entry>
+ <entry>Level 1.0</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_1B</constant>&nbsp;</entry>
+ <entry>Level 1B</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_1_1</constant>&nbsp;</entry>
+ <entry>Level 1.1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_1_2</constant>&nbsp;</entry>
+ <entry>Level 1.2</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_1_3</constant>&nbsp;</entry>
+ <entry>Level 1.3</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_2_0</constant>&nbsp;</entry>
+ <entry>Level 2.0</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_2_1</constant>&nbsp;</entry>
+ <entry>Level 2.1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_2_2</constant>&nbsp;</entry>
+ <entry>Level 2.2</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_3_0</constant>&nbsp;</entry>
+ <entry>Level 3.0</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_3_1</constant>&nbsp;</entry>
+ <entry>Level 3.1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_3_2</constant>&nbsp;</entry>
+ <entry>Level 3.2</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_4_0</constant>&nbsp;</entry>
+ <entry>Level 4.0</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_4_1</constant>&nbsp;</entry>
+ <entry>Level 4.1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_4_2</constant>&nbsp;</entry>
+ <entry>Level 4.2</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_5_0</constant>&nbsp;</entry>
+ <entry>Level 5.0</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LEVEL_5_1</constant>&nbsp;</entry>
+ <entry>Level 5.1</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-mpeg4-level">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_LEVEL</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_mpeg4_level</entry>
+ </row>
+ <row><entry spanname="descr">The level information for the MPEG4 elementary stream.
+Applicable to the MPEG4 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_0</constant>&nbsp;</entry>
+ <entry>Level 0</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_0B</constant>&nbsp;</entry>
+ <entry>Level 0b</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_1</constant>&nbsp;</entry>
+ <entry>Level 1</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_2</constant>&nbsp;</entry>
+ <entry>Level 2</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_3</constant>&nbsp;</entry>
+ <entry>Level 3</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_3B</constant>&nbsp;</entry>
+ <entry>Level 3b</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_4</constant>&nbsp;</entry>
+ <entry>Level 4</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_LEVEL_5</constant>&nbsp;</entry>
+ <entry>Level 5</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-profile">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_PROFILE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_profile</entry>
+ </row>
+ <row><entry spanname="descr">The profile information for H264.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_BASELINE</constant>&nbsp;</entry>
+ <entry>Baseline profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_CONSTRAINED_BASELINE</constant>&nbsp;</entry>
+ <entry>Constrained Baseline profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_MAIN</constant>&nbsp;</entry>
+ <entry>Main profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED</constant>&nbsp;</entry>
+ <entry>Extended profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH</constant>&nbsp;</entry>
+ <entry>High profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10</constant>&nbsp;</entry>
+ <entry>High 10 profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422</constant>&nbsp;</entry>
+ <entry>High 422 profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_PREDICTIVE</constant>&nbsp;</entry>
+ <entry>High 444 Predictive profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_10_INTRA</constant>&nbsp;</entry>
+ <entry>High 10 Intra profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_422_INTRA</constant>&nbsp;</entry>
+ <entry>High 422 Intra profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_HIGH_444_INTRA</constant>&nbsp;</entry>
+ <entry>High 444 Intra profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_CAVLC_444_INTRA</constant>&nbsp;</entry>
+ <entry>CAVLC 444 Intra profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_BASELINE</constant>&nbsp;</entry>
+ <entry>Scalable Baseline profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH</constant>&nbsp;</entry>
+ <entry>Scalable High profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_SCALABLE_HIGH_INTRA</constant>&nbsp;</entry>
+ <entry>Scalable High Intra profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_STEREO_HIGH</constant>&nbsp;</entry>
+ <entry>Stereo High profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_PROFILE_MULTIVIEW_HIGH</constant>&nbsp;</entry>
+ <entry>Multiview High profile</entry>
+ </row>
+
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-mpeg4-profile">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_PROFILE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_mpeg4_profile</entry>
+ </row>
+ <row><entry spanname="descr">The profile information for MPEG4.
+Applicable to the MPEG4 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_PROFILE_SIMPLE</constant>&nbsp;</entry>
+ <entry>Simple profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_PROFILE_ADVANCED_SIMPLE</constant>&nbsp;</entry>
+ <entry>Advanced Simple profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_PROFILE_CORE</constant>&nbsp;</entry>
+ <entry>Core profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_PROFILE_SIMPLE_SCALABLE</constant>&nbsp;</entry>
+ <entry>Simple Scalable profile</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_PROFILE_ADVANCED_CODING_EFFICIENCY</constant>&nbsp;</entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MAX_REF_PIC</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">The maximum number of reference pictures used for encoding.
+Applicable to the encoder.
+</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-multi-slice-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_multi_slice_mode</entry>
+ </row>
+ <row><entry spanname="descr">Determines how the encoder should handle division of frame into slices.
+Applicable to the encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_SINGLE</constant>&nbsp;</entry>
+ <entry>Single slice per frame.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB</constant>&nbsp;</entry>
+ <entry>Multiple slices with set maximum number of macroblocks per slice.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_BYTES</constant>&nbsp;</entry>
+ <entry>Multiple slice with set maximum size in bytes per slice.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_MB</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">The maximum number of macroblocks in a slice. Used when
+<constant>V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE</constant> is set to <constant>V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_MB</constant>.
+Applicable to the encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MAX_BYTES</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">The maximum size of a slice in bytes. Used when
+<constant>V4L2_CID_MPEG_VIDEO_MULTI_SLICE_MODE</constant> is set to <constant>V4L2_MPEG_VIDEO_MULTI_SLICE_MODE_MAX_BYTES</constant>.
+Applicable to the encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-loop-filter-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_loop_filter_mode</entry>
+ </row>
+ <row><entry spanname="descr">Loop filter mode for H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_ENABLED</constant>&nbsp;</entry>
+ <entry>Loop filter is enabled.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED</constant>&nbsp;</entry>
+ <entry>Loop filter is disabled.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_LOOP_FILTER_MODE_DISABLED_AT_SLICE_BOUNDARY</constant>&nbsp;</entry>
+ <entry>Loop filter is disabled at the slice boundary.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_ALPHA</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Loop filter alpha coefficient, defined in the H264 standard.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_LOOP_FILTER_BETA</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Loop filter beta coefficient, defined in the H264 standard.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-entropy-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_ENTROPY_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_entropy_mode</entry>
+ </row>
+ <row><entry spanname="descr">Entropy coding mode for H264 - CABAC/CAVALC.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CAVLC</constant>&nbsp;</entry>
+ <entry>Use CAVLC entropy coding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_ENTROPY_MODE_CABAC</constant>&nbsp;</entry>
+ <entry>Use CABAC entropy coding.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_8X8_TRANSFORM</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enable 8X8 transform for H264. Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_CYCLIC_INTRA_REFRESH_MB</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Cyclic intra macroblock refresh. This is the number of continuous macroblocks
+refreshed every frame. Each frame a successive set of macroblocks is refreshed until the cycle completes and starts from the
+top of the frame. Applicable to H264, H263 and MPEG4 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_FRAME_RC_ENABLE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Frame level rate control enable.
+If this control is disabled then the quantization parameter for each frame type is constant and set with appropriate controls
+(e.g. <constant>V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP</constant>).
+If frame rate control is enabled then quantization parameter is adjusted to meet the chosen bitrate. Minimum and maximum value
+for the quantization parameter can be set with appropriate controls (e.g. <constant>V4L2_CID_MPEG_VIDEO_H263_MIN_QP</constant>).
+Applicable to encoders.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Macroblock level rate control enable.
+Applicable to the MPEG4 and H264 encoders.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_QPEL</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Quarter pixel motion estimation for MPEG4. Applicable to the MPEG4 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H263_I_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an I frame for H263. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H263_MIN_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Minimum quantization parameter for H263. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H263_MAX_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Maximum quantization parameter for H263. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H263_P_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an P frame for H263. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H263_B_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an B frame for H263. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_I_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an I frame for H264. Valid range: from 0 to 51.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_MIN_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Minimum quantization parameter for H264. Valid range: from 0 to 51.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_MAX_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Maximum quantization parameter for H264. Valid range: from 0 to 51.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_P_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an P frame for H264. Valid range: from 0 to 51.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_B_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an B frame for H264. Valid range: from 0 to 51.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_I_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an I frame for MPEG4. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_MIN_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Minimum quantization parameter for MPEG4. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_MAX_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Maximum quantization parameter for MPEG4. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_P_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an P frame for MPEG4. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_B_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an B frame for MPEG4. Valid range: from 1 to 31.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VBV_SIZE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">The Video Buffer Verifier size in kilobytes, it is used as a limitation of frame skip.
+The VBV is defined in the standard as a mean to verify that the produced stream will be successfully decoded.
+The standard describes it as "Part of a hypothetical decoder that is conceptually connected to the
+output of the encoder. Its purpose is to provide a constraint on the variability of the data rate that an
+encoder or editing process may produce.".
+Applicable to the MPEG1, MPEG2, MPEG4 encoders.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-vbv-delay">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VBV_DELAY</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Sets the initial delay in milliseconds for
+VBV buffer control.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-hor-search-range">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MV_H_SEARCH_RANGE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Horizontal search range defines maximum horizontal search area in pixels
+to search and match for the present Macroblock (MB) in the reference picture. This V4L2 control macro is used to set
+horizontal search range for motion estimation module in video encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-vert-search-range">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MV_V_SEARCH_RANGE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Vertical search range defines maximum vertical search area in pixels
+to search and match for the present Macroblock (MB) in the reference picture. This V4L2 control macro is used to set
+vertical search range for motion estimation module in video encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_CPB_SIZE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">The Coded Picture Buffer size in kilobytes, it is used as a limitation of frame skip.
+The CPB is defined in the H264 standard as a mean to verify that the produced stream will be successfully decoded.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_I_PERIOD</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Period between I-frames in the open GOP for H264. In case of an open GOP
+this is the period between two I-frames. The period between IDR (Instantaneous Decoding Refresh) frames is taken from the GOP_SIZE control.
+An IDR frame, which stands for Instantaneous Decoding Refresh is an I-frame after which no prior frames are
+referenced. This means that a stream can be restarted from an IDR frame without the need to store or decode any
+previous frames. Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-header-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_HEADER_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_header_mode</entry>
+ </row>
+ <row><entry spanname="descr">Determines whether the header is returned as the first buffer or is
+it returned together with the first frame. Applicable to encoders.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_HEADER_MODE_SEPARATE</constant>&nbsp;</entry>
+ <entry>The stream header is returned separately in the first buffer.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_HEADER_MODE_JOINED_WITH_1ST_FRAME</constant>&nbsp;</entry>
+ <entry>The stream header is returned together with the first encoded frame.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_REPEAT_SEQ_HEADER</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Repeat the video sequence headers. Repeating these
+headers makes random access to the video stream easier. Applicable to the MPEG1, 2 and 4 encoder.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_DECODER_MPEG4_DEBLOCK_FILTER</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Enabled the deblocking post processing filter for MPEG4 decoder.
+Applicable to the MPEG4 decoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_VOP_TIME_RES</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">vop_time_increment_resolution value for MPEG4. Applicable to the MPEG4 encoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_MPEG4_VOP_TIME_INC</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">vop_time_increment value for MPEG4. Applicable to the MPEG4 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_SEI_FRAME_PACKING</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enable generation of frame packing supplemental enhancement information in the encoded bitstream.
+The frame packing SEI message contains the arrangement of L and R planes for 3D viewing. Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_SEI_FP_CURRENT_FRAME_0</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Sets current frame as frame0 in frame packing SEI.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-sei-fp-arrangement-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_sei_fp_arrangement_type</entry>
+ </row>
+ <row><entry spanname="descr">Frame packing arrangement type for H264 SEI.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_CHEKERBOARD</constant>&nbsp;</entry>
+ <entry>Pixels are alternatively from L and R.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_COLUMN</constant>&nbsp;</entry>
+ <entry>L and R are interlaced by column.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_ROW</constant>&nbsp;</entry>
+ <entry>L and R are interlaced by row.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_SIDE_BY_SIDE</constant>&nbsp;</entry>
+ <entry>L is on the left, R on the right.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_TOP_BOTTOM</constant>&nbsp;</entry>
+ <entry>L is on top, R on bottom.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_SEI_FP_ARRANGEMENT_TYPE_TEMPORAL</constant>&nbsp;</entry>
+ <entry>One view per frame.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_FMO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enables flexible macroblock ordering in the encoded bitstream. It is a technique
+used for restructuring the ordering of macroblocks in pictures. Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-fmo-map-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_FMO_MAP_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_fmo_map_type</entry>
+ </row>
+ <row><entry spanname="descr">When using FMO, the map type divides the image in different scan patterns of macroblocks.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_INTERLEAVED_SLICES</constant>&nbsp;</entry>
+ <entry>Slices are interleaved one after other with macroblocks in run length order.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_SCATTERED_SLICES</constant>&nbsp;</entry>
+ <entry>Scatters the macroblocks based on a mathematical function known to both encoder and decoder.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_FOREGROUND_WITH_LEFT_OVER</constant>&nbsp;</entry>
+ <entry>Macroblocks arranged in rectangular areas or regions of interest.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_BOX_OUT</constant>&nbsp;</entry>
+ <entry>Slice groups grow in a cyclic way from centre to outwards.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_RASTER_SCAN</constant>&nbsp;</entry>
+ <entry>Slice groups grow in raster scan pattern from left to right.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_WIPE_SCAN</constant>&nbsp;</entry>
+ <entry>Slice groups grow in wipe scan pattern from top to bottom.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_MAP_TYPE_EXPLICIT</constant>&nbsp;</entry>
+ <entry>User defined map type.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_FMO_SLICE_GROUP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Number of slice groups in FMO.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-fmo-change-direction">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_DIRECTION</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_fmo_change_dir</entry>
+ </row>
+ <row><entry spanname="descr">Specifies a direction of the slice group change for raster and wipe maps.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_CHANGE_DIR_RIGHT</constant>&nbsp;</entry>
+ <entry>Raster scan or wipe right.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_FMO_CHANGE_DIR_LEFT</constant>&nbsp;</entry>
+ <entry>Reverse raster scan or wipe left.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_FMO_CHANGE_RATE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Specifies the size of the first slice group for raster and wipe map.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_FMO_RUN_LENGTH</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Specifies the number of consecutive macroblocks for the interleaved map.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_ASO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enables arbitrary slice ordering in encoded bitstream.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_ASO_SLICE_ORDER</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Specifies the slice order in ASO. Applicable to the H264 encoder.
+The supplied 32-bit integer is interpreted as follows (bit
+0 = least significant bit):</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry>Bit 0:15</entry>
+ <entry>Slice ID</entry>
+ </row>
+ <row>
+ <entry>Bit 16:32</entry>
+ <entry>Slice position or order</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enables H264 hierarchical coding.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-video-h264-hierarchical-coding-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_video_h264_hierarchical_coding_type</entry>
+ </row>
+ <row><entry spanname="descr">Specifies the hierarchical coding type.
+Applicable to the H264 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_HIERARCHICAL_CODING_B</constant>&nbsp;</entry>
+ <entry>Hierarchical B coding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VIDEO_H264_HIERARCHICAL_CODING_P</constant>&nbsp;</entry>
+ <entry>Hierarchical P coding.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Specifies the number of hierarchical coding layers.
+Applicable to the H264 encoder.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_H264_HIERARCHICAL_CODING_LAYER_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Specifies a user defined QP for each layer. Applicable to the H264 encoder.
+The supplied 32-bit integer is interpreted as follows (bit
+0 = least significant bit):</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry>Bit 0:15</entry>
+ <entry>QP value</entry>
+ </row>
+ <row>
+ <entry>Bit 16:32</entry>
+ <entry>Layer number</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>MFC 5.1 MPEG Controls</title>
+
+ <para>The following MPEG class controls deal with MPEG
+decoding and encoding settings that are specific to the Multi Format Codec 5.1 device present
+in the S5P family of SoCs by Samsung.
+</para>
+
+ <table pgwide="1" frame="none" id="mfc51-control-id">
+ <title>MFC 5.1 Control IDs</title>
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY_ENABLE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">If the display delay is enabled then the decoder is forced to return a
+CAPTURE buffer (decoded frame) after processing a certain number of OUTPUT buffers. The delay can be set through
+<constant>V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY</constant>. This feature can be used for example
+for generating thumbnails of videos. Applicable to the H264 decoder.
+ </entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_DECODER_H264_DISPLAY_DELAY</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Display delay value for H264 decoder.
+The decoder is forced to return a decoded frame after the set 'display delay' number of frames. If this number is
+low it may result in frames returned out of dispaly order, in addition the hardware may still be using the returned buffer
+as a reference picture for subsequent frames.
+</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_H264_NUM_REF_PIC_FOR_P</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">The number of reference pictures used for encoding a P picture.
+Applicable to the H264 encoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_PADDING</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Padding enable in the encoder - use a color instead of repeating border pixels.
+Applicable to encoders.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_PADDING_YUV</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Padding color in the encoder. Applicable to encoders. The supplied 32-bit integer is interpreted as follows (bit
+0 = least significant bit):</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry>Bit 0:7</entry>
+ <entry>V chrominance information</entry>
+ </row>
+ <row>
+ <entry>Bit 8:15</entry>
+ <entry>U chrominance information</entry>
+ </row>
+ <row>
+ <entry>Bit 16:23</entry>
+ <entry>Y luminance information</entry>
+ </row>
+ <row>
+ <entry>Bit 24:31</entry>
+ <entry>Must be zero.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_RC_REACTION_COEFF</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Reaction coefficient for MFC rate control. Applicable to encoders.
+<para>Note 1: Valid only when the frame level RC is enabled.</para>
+<para>Note 2: For tight CBR, this field must be small (ex. 2 ~ 10).
+For VBR, this field must be large (ex. 100 ~ 1000).</para>
+<para>Note 3: It is not recommended to use the greater number than FRAME_RATE * (10^9 / BIT_RATE).</para>
+</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_DARK</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Adaptive rate control for dark region.
+Valid only when H.264 and macroblock level RC is enabled (<constant>V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE</constant>).
+Applicable to the H264 encoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_SMOOTH</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Adaptive rate control for smooth region.
+Valid only when H.264 and macroblock level RC is enabled (<constant>V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE</constant>).
+Applicable to the H264 encoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_STATIC</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Adaptive rate control for static region.
+Valid only when H.264 and macroblock level RC is enabled (<constant>V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE</constant>).
+Applicable to the H264 encoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_H264_ADAPTIVE_RC_ACTIVITY</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Adaptive rate control for activity region.
+Valid only when H.264 and macroblock level RC is enabled (<constant>V4L2_CID_MPEG_VIDEO_MB_RC_ENABLE</constant>).
+Applicable to the H264 encoder.</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-mfc51-video-frame-skip-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_FRAME_SKIP_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_mfc51_video_frame_skip_mode</entry>
+ </row>
+ <row><entry spanname="descr">
+Indicates in what conditions the encoder should skip frames. If encoding a frame would cause the encoded stream to be larger then
+a chosen data limit then the frame will be skipped.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_MFC51_FRAME_SKIP_MODE_DISABLED</constant>&nbsp;</entry>
+ <entry>Frame skip mode is disabled.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_MFC51_FRAME_SKIP_MODE_LEVEL_LIMIT</constant>&nbsp;</entry>
+ <entry>Frame skip mode enabled and buffer limit is set by the chosen level and is defined by the standard.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_MFC51_FRAME_SKIP_MODE_BUF_LIMIT</constant>&nbsp;</entry>
+ <entry>Frame skip mode enabled and buffer limit is set by the VBV (MPEG1/2/4) or CPB (H264) buffer size control.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_RC_FIXED_TARGET_BIT</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Enable rate-control with fixed target bit.
+If this setting is enabled, then the rate control logic of the encoder will calculate the average bitrate
+for a GOP and keep it below or equal the set bitrate target. Otherwise the rate control logic calculates the
+overall average bitrate for the stream and keeps it below or equal to the set bitrate. In the first case
+the average bitrate for the whole stream will be smaller then the set bitrate. This is caused because the
+average is calculated for smaller number of frames, on the other hand enabling this setting will ensure that
+the stream will meet tight bandwidth contraints. Applicable to encoders.
+</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-mfc51-video-force-frame-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_MFC51_VIDEO_FORCE_FRAME_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_mfc51_video_force_frame_type</entry>
+ </row>
+ <row><entry spanname="descr">Force a frame type for the next queued buffer. Applicable to encoders.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_MFC51_FORCE_FRAME_TYPE_DISABLED</constant>&nbsp;</entry>
+ <entry>Forcing a specific frame type disabled.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_MFC51_FORCE_FRAME_TYPE_I_FRAME</constant>&nbsp;</entry>
+ <entry>Force an I-frame.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_MFC51_FORCE_FRAME_TYPE_NOT_CODED</constant>&nbsp;</entry>
+ <entry>Force a non-coded frame.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>CX2341x MPEG Controls</title>
+
+ <para>The following MPEG class controls deal with MPEG
+encoding settings that are specific to the Conexant CX23415 and
+CX23416 MPEG encoding chips.</para>
+
+ <table pgwide="1" frame="none" id="cx2341x-control-id">
+ <title>CX2341x Control IDs</title>
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-cx2341x-video-spatial-filter-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_cx2341x_video_spatial_filter_mode</entry>
+ </row><row><entry spanname="descr">Sets the Spatial
+Filter mode (default <constant>MANUAL</constant>). Possible values
+are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_MANUAL</constant>&nbsp;</entry>
+ <entry>Choose the filter manually</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE_AUTO</constant>&nbsp;</entry>
+ <entry>Choose the filter automatically</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER</constant>&nbsp;</entry>
+ <entry>integer&nbsp;(0-15)</entry>
+ </row><row><entry spanname="descr">The setting for the
+Spatial Filter. 0 = off, 15 = maximum. (Default is 0.)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="luma-spatial-filter-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_cx2341x_video_luma_spatial_filter_type</entry>
+ </row><row><entry spanname="descr">Select the algorithm
+to use for the Luma Spatial Filter (default
+<constant>1D_HOR</constant>). Possible values:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_OFF</constant>&nbsp;</entry>
+ <entry>No filter</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_HOR</constant>&nbsp;</entry>
+ <entry>One-dimensional horizontal</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_1D_VERT</constant>&nbsp;</entry>
+ <entry>One-dimensional vertical</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_HV_SEPARABLE</constant>&nbsp;</entry>
+ <entry>Two-dimensional separable</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_LUMA_SPATIAL_FILTER_TYPE_2D_SYM_NON_SEPARABLE</constant>&nbsp;</entry>
+ <entry>Two-dimensional symmetrical
+non-separable</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="chroma-spatial-filter-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_cx2341x_video_chroma_spatial_filter_type</entry>
+ </row><row><entry spanname="descr">Select the algorithm
+for the Chroma Spatial Filter (default <constant>1D_HOR</constant>).
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_OFF</constant>&nbsp;</entry>
+ <entry>No filter</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_CHROMA_SPATIAL_FILTER_TYPE_1D_HOR</constant>&nbsp;</entry>
+ <entry>One-dimensional horizontal</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-cx2341x-video-temporal-filter-mode">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_cx2341x_video_temporal_filter_mode</entry>
+ </row><row><entry spanname="descr">Sets the Temporal
+Filter mode (default <constant>MANUAL</constant>). Possible values
+are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_MANUAL</constant>&nbsp;</entry>
+ <entry>Choose the filter manually</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER_MODE_AUTO</constant>&nbsp;</entry>
+ <entry>Choose the filter automatically</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_TEMPORAL_FILTER</constant>&nbsp;</entry>
+ <entry>integer&nbsp;(0-31)</entry>
+ </row><row><entry spanname="descr">The setting for the
+Temporal Filter. 0 = off, 31 = maximum. (Default is 8 for full-scale
+capturing and 0 for scaled capturing.)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row id="v4l2-mpeg-cx2341x-video-median-filter-type">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_mpeg_cx2341x_video_median_filter_type</entry>
+ </row><row><entry spanname="descr">Median Filter Type
+(default <constant>OFF</constant>). Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_OFF</constant>&nbsp;</entry>
+ <entry>No filter</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR</constant>&nbsp;</entry>
+ <entry>Horizontal filter</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_VERT</constant>&nbsp;</entry>
+ <entry>Vertical filter</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_HOR_VERT</constant>&nbsp;</entry>
+ <entry>Horizontal and vertical filter</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_CX2341X_VIDEO_MEDIAN_FILTER_TYPE_DIAG</constant>&nbsp;</entry>
+ <entry>Diagonal filter</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_BOTTOM</constant>&nbsp;</entry>
+ <entry>integer&nbsp;(0-255)</entry>
+ </row><row><entry spanname="descr">Threshold above which
+the luminance median filter is enabled (default 0)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_LUMA_MEDIAN_FILTER_TOP</constant>&nbsp;</entry>
+ <entry>integer&nbsp;(0-255)</entry>
+ </row><row><entry spanname="descr">Threshold below which
+the luminance median filter is enabled (default 255)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_BOTTOM</constant>&nbsp;</entry>
+ <entry>integer&nbsp;(0-255)</entry>
+ </row><row><entry spanname="descr">Threshold above which
+the chroma median filter is enabled (default 0)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_VIDEO_CHROMA_MEDIAN_FILTER_TOP</constant>&nbsp;</entry>
+ <entry>integer&nbsp;(0-255)</entry>
+ </row><row><entry spanname="descr">Threshold below which
+the chroma median filter is enabled (default 255)</entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_CX2341X_STREAM_INSERT_NAV_PACKETS</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">The CX2341X MPEG encoder
+can insert one empty MPEG-2 PES packet into the stream between every
+four video frames. The packet size is 2048 bytes, including the
+packet_start_code_prefix and stream_id fields. The stream_id is 0xBF
+(private stream 2). The payload consists of 0x00 bytes, to be filled
+in by the application. 0 = do not insert, 1 = insert packets.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>VPX Control Reference</title>
+
+ <para>The VPX controls include controls for encoding parameters
+ of VPx video codec.</para>
+
+ <table pgwide="1" frame="none" id="vpx-control-id">
+ <title>VPX Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-vpx-num-partitions">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_NUM_PARTITIONS</constant></entry>
+ <entry>enum v4l2_vp8_num_partitions</entry>
+ </row>
+ <row><entry spanname="descr">The number of token partitions to use in VP8 encoder.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_1_PARTITION</constant></entry>
+ <entry>1 coefficient partition</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_2_PARTITIONS</constant></entry>
+ <entry>2 coefficient partitions</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_4_PARTITIONS</constant></entry>
+ <entry>4 coefficient partitions</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_8_PARTITIONS</constant></entry>
+ <entry>8 coefficient partitions</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_IMD_DISABLE_4X4</constant></entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Setting this prevents intra 4x4 mode in the intra mode decision.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-vpx-num-ref-frames">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_NUM_REF_FRAMES</constant></entry>
+ <entry>enum v4l2_vp8_num_ref_frames</entry>
+ </row>
+ <row><entry spanname="descr">The number of reference pictures for encoding P frames.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_1_REF_FRAME</constant></entry>
+ <entry>Last encoded frame will be searched</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_2_REF_FRAME</constant></entry>
+ <entry>Two frames will be searched among the last encoded frame, the golden frame
+and the alternate reference (altref) frame. The encoder implementation will decide which two are chosen.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_3_REF_FRAME</constant></entry>
+ <entry>The last encoded frame, the golden frame and the altref frame will be searched.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_FILTER_LEVEL</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Indicates the loop filter level. The adjustment of the loop
+filter level is done via a delta value against a baseline loop filter value.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_FILTER_SHARPNESS</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">This parameter affects the loop filter. Anything above
+zero weakens the deblocking effect on the loop filter.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the refresh period for the golden frame. The period is defined
+in number of frames. For a value of 'n', every nth frame starting from the first key frame will be taken as a golden frame.
+For eg. for encoding sequence of 0, 1, 2, 3, 4, 5, 6, 7 where the golden frame refresh period is set as 4, the frames
+0, 4, 8 etc will be taken as the golden frames as frame 0 is always a key frame.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row id="v4l2-vpx-golden-frame-sel">
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_SEL</constant></entry>
+ <entry>enum v4l2_vp8_golden_frame_sel</entry>
+ </row>
+ <row><entry spanname="descr">Selects the golden frame for encoding.
+Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_USE_PREV</constant></entry>
+ <entry>Use the (n-2)th frame as a golden frame, current frame index being 'n'.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_USE_REF_PERIOD</constant></entry>
+ <entry>Use the previous specific frame indicated by
+V4L2_CID_MPEG_VIDEO_VPX_GOLDEN_FRAME_REF_PERIOD as a golden frame.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_MIN_QP</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Minimum quantization parameter for VP8.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_MAX_QP</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Maximum quantization parameter for VP8.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_I_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for an I frame for VP8.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_P_FRAME_QP</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Quantization parameter for a P frame for VP8.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_MPEG_VIDEO_VPX_PROFILE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Select the desired profile for VPx encoder.
+Acceptable values are 0, 1, 2 and 3 corresponding to encoder profiles 0, 1, 2 and 3.</entry>
+ </row>
+
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+ </section>
+
+ <section id="camera-controls">
+ <title>Camera Control Reference</title>
+
+ <para>The Camera class includes controls for mechanical (or
+equivalent digital) features of a device such as controllable lenses
+or sensors.</para>
+
+ <table pgwide="1" frame="none" id="camera-control-id">
+ <title>Camera Control IDs</title>
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_CAMERA_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The Camera class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-exposure-auto-type">
+ <entry spanname="id"><constant>V4L2_CID_EXPOSURE_AUTO</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_exposure_auto_type</entry>
+ </row><row><entry spanname="descr">Enables automatic
+adjustments of the exposure time and/or iris aperture. The effect of
+manual changes of the exposure time or iris aperture while these
+features are enabled is undefined, drivers should ignore such
+requests. Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_EXPOSURE_AUTO</constant>&nbsp;</entry>
+ <entry>Automatic exposure time, automatic iris
+aperture.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EXPOSURE_MANUAL</constant>&nbsp;</entry>
+ <entry>Manual exposure time, manual iris.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EXPOSURE_SHUTTER_PRIORITY</constant>&nbsp;</entry>
+ <entry>Manual exposure time, auto iris.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EXPOSURE_APERTURE_PRIORITY</constant>&nbsp;</entry>
+ <entry>Auto exposure time, manual iris.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_EXPOSURE_ABSOLUTE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Determines the exposure
+time of the camera sensor. The exposure time is limited by the frame
+interval. Drivers should interpret the values as 100 &micro;s units,
+where the value 1 stands for 1/10000th of a second, 10000 for 1 second
+and 100000 for 10 seconds.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_EXPOSURE_AUTO_PRIORITY</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">When
+<constant>V4L2_CID_EXPOSURE_AUTO</constant> is set to
+<constant>AUTO</constant> or <constant>APERTURE_PRIORITY</constant>,
+this control determines if the device may dynamically vary the frame
+rate. By default this feature is disabled (0) and the frame rate must
+remain constant.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_EXPOSURE_BIAS</constant>&nbsp;</entry>
+ <entry>integer menu</entry>
+ </row><row><entry spanname="descr"> Determines the automatic
+exposure compensation, it is effective only when <constant>V4L2_CID_EXPOSURE_AUTO</constant>
+control is set to <constant>AUTO</constant>, <constant>SHUTTER_PRIORITY </constant>
+or <constant>APERTURE_PRIORITY</constant>.
+It is expressed in terms of EV, drivers should interpret the values as 0.001 EV
+units, where the value 1000 stands for +1 EV.
+<para>Increasing the exposure compensation value is equivalent to decreasing
+the exposure value (EV) and will increase the amount of light at the image
+sensor. The camera performs the exposure compensation by adjusting absolute
+exposure time and/or aperture.</para></entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-exposure-metering">
+ <entry spanname="id"><constant>V4L2_CID_EXPOSURE_METERING</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_exposure_metering</entry>
+ </row><row><entry spanname="descr">Determines how the camera measures
+the amount of light available for the frame exposure. Possible values are:</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_EXPOSURE_METERING_AVERAGE</constant>&nbsp;</entry>
+ <entry>Use the light information coming from the entire frame
+and average giving no weighting to any particular portion of the metered area.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EXPOSURE_METERING_CENTER_WEIGHTED</constant>&nbsp;</entry>
+ <entry>Average the light information coming from the entire frame
+giving priority to the center of the metered area.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EXPOSURE_METERING_SPOT</constant>&nbsp;</entry>
+ <entry>Measure only very small area at the center of the frame.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EXPOSURE_METERING_MATRIX</constant>&nbsp;</entry>
+ <entry>A multi-zone metering. The light intensity is measured
+in several points of the frame and the the results are combined. The
+algorithm of the zones selection and their significance in calculating the
+final value is device dependent.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PAN_RELATIVE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control turns the
+camera horizontally by the specified amount. The unit is undefined. A
+positive value moves the camera to the right (clockwise when viewed
+from above), a negative value to the left. A value of zero does not
+cause motion. This is a write-only control.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TILT_RELATIVE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control turns the
+camera vertically by the specified amount. The unit is undefined. A
+positive value moves the camera up, a negative value down. A value of
+zero does not cause motion. This is a write-only control.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PAN_RESET</constant>&nbsp;</entry>
+ <entry>button</entry>
+ </row><row><entry spanname="descr">When this control is set,
+the camera moves horizontally to the default position.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TILT_RESET</constant>&nbsp;</entry>
+ <entry>button</entry>
+ </row><row><entry spanname="descr">When this control is set,
+the camera moves vertically to the default position.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PAN_ABSOLUTE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control
+turns the camera horizontally to the specified position. Positive
+values move the camera to the right (clockwise when viewed from above),
+negative values to the left. Drivers should interpret the values as arc
+seconds, with valid values between -180 * 3600 and +180 * 3600
+inclusive.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TILT_ABSOLUTE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control
+turns the camera vertically to the specified position. Positive values
+move the camera up, negative values down. Drivers should interpret the
+values as arc seconds, with valid values between -180 * 3600 and +180
+* 3600 inclusive.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FOCUS_ABSOLUTE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control sets the
+focal point of the camera to the specified position. The unit is
+undefined. Positive values set the focus closer to the camera,
+negative values towards infinity.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FOCUS_RELATIVE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control moves the
+focal point of the camera by the specified amount. The unit is
+undefined. Positive values move the focus closer to the camera,
+negative values towards infinity. This is a write-only control.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FOCUS_AUTO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Enables continuous automatic
+focus adjustments. The effect of manual focus adjustments while this feature
+is enabled is undefined, drivers should ignore such requests.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUTO_FOCUS_START</constant>&nbsp;</entry>
+ <entry>button</entry>
+ </row><row><entry spanname="descr">Starts single auto focus process.
+The effect of setting this control when <constant>V4L2_CID_FOCUS_AUTO</constant>
+is set to <constant>TRUE</constant> (1) is undefined, drivers should ignore
+such requests.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUTO_FOCUS_STOP</constant>&nbsp;</entry>
+ <entry>button</entry>
+ </row><row><entry spanname="descr">Aborts automatic focusing
+started with <constant>V4L2_CID_AUTO_FOCUS_START</constant> control. It is
+effective only when the continuous autofocus is disabled, that is when
+<constant>V4L2_CID_FOCUS_AUTO</constant> control is set to <constant>FALSE
+</constant> (0).</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-auto-focus-status">
+ <entry spanname="id">
+ <constant>V4L2_CID_AUTO_FOCUS_STATUS</constant>&nbsp;</entry>
+ <entry>bitmask</entry>
+ </row>
+ <row><entry spanname="descr">The automatic focus status. This is a read-only
+ control.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_STATUS_IDLE</constant>&nbsp;</entry>
+ <entry>Automatic focus is not active.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_STATUS_BUSY</constant>&nbsp;</entry>
+ <entry>Automatic focusing is in progress.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_STATUS_REACHED</constant>&nbsp;</entry>
+ <entry>Focus has been reached.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_STATUS_FAILED</constant>&nbsp;</entry>
+ <entry>Automatic focus has failed, the driver will not
+ transition from this state until another action is
+ performed by an application.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry spanname="descr">
+Setting <constant>V4L2_LOCK_FOCUS</constant> lock bit of the <constant>V4L2_CID_3A_LOCK
+</constant> control may stop updates of the <constant>V4L2_CID_AUTO_FOCUS_STATUS</constant>
+control value.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-auto-focus-range">
+ <entry spanname="id">
+ <constant>V4L2_CID_AUTO_FOCUS_RANGE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_auto_focus_range</entry>
+ </row>
+ <row><entry spanname="descr">Determines auto focus distance range
+for which lens may be adjusted. </entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_RANGE_AUTO</constant>&nbsp;</entry>
+ <entry>The camera automatically selects the focus range.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_RANGE_NORMAL</constant>&nbsp;</entry>
+ <entry>Normal distance range, limited for best automatic focus
+performance.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_RANGE_MACRO</constant>&nbsp;</entry>
+ <entry>Macro (close-up) auto focus. The camera will
+use its minimum possible distance for auto focus.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUTO_FOCUS_RANGE_INFINITY</constant>&nbsp;</entry>
+ <entry>The lens is set to focus on an object at infinite distance.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_ZOOM_ABSOLUTE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Specify the objective lens
+focal length as an absolute value. The zoom unit is driver-specific and its
+value should be a positive integer.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_ZOOM_RELATIVE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Specify the objective lens
+focal length relatively to the current value. Positive values move the zoom
+lens group towards the telephoto direction, negative values towards the
+wide-angle direction. The zoom unit is driver-specific. This is a write-only control.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_ZOOM_CONTINUOUS</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Move the objective lens group
+at the specified speed until it reaches physical device limits or until an
+explicit request to stop the movement. A positive value moves the zoom lens
+group towards the telephoto direction. A value of zero stops the zoom lens
+group movement. A negative value moves the zoom lens group towards the
+wide-angle direction. The zoom speed unit is driver-specific.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_IRIS_ABSOLUTE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control sets the
+camera's aperture to the specified value. The unit is undefined.
+Larger values open the iris wider, smaller values close it.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_IRIS_RELATIVE</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control modifies the
+camera's aperture by the specified amount. The unit is undefined.
+Positive values open the iris one step further, negative values close
+it one step further. This is a write-only control.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PRIVACY</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Prevent video from being acquired
+by the camera. When this control is set to <constant>TRUE</constant> (1), no
+image can be captured by the camera. Common means to enforce privacy are
+mechanical obturation of the sensor and firmware image processing, but the
+device is not restricted to these methods. Devices that implement the privacy
+control must support read access and may support write access.</entry>
+ </row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_BAND_STOP_FILTER</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">Switch the band-stop filter of a
+camera sensor on or off, or specify its strength. Such band-stop filters can
+be used, for example, to filter out the fluorescent light component.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-auto-n-preset-white-balance">
+ <entry spanname="id"><constant>V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_auto_n_preset_white_balance</entry>
+ </row><row><entry spanname="descr">Sets white balance to automatic,
+manual or a preset. The presets determine color temperature of the light as
+a hint to the camera for white balance adjustments resulting in most accurate
+color representation. The following white balance presets are listed in order
+of increasing color temperature.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_MANUAL</constant>&nbsp;</entry>
+ <entry>Manual white balance.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_AUTO</constant>&nbsp;</entry>
+ <entry>Automatic white balance adjustments.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_INCANDESCENT</constant>&nbsp;</entry>
+ <entry>White balance setting for incandescent (tungsten) lighting.
+It generally cools down the colors and corresponds approximately to 2500...3500 K
+color temperature range.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_FLUORESCENT</constant>&nbsp;</entry>
+ <entry>White balance preset for fluorescent lighting.
+It corresponds approximately to 4000...5000 K color temperature.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_FLUORESCENT_H</constant>&nbsp;</entry>
+ <entry>With this setting the camera will compensate for
+fluorescent H lighting.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_HORIZON</constant>&nbsp;</entry>
+ <entry>White balance setting for horizon daylight.
+It corresponds approximately to 5000 K color temperature.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_DAYLIGHT</constant>&nbsp;</entry>
+ <entry>White balance preset for daylight (with clear sky).
+It corresponds approximately to 5000...6500 K color temperature.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_FLASH</constant>&nbsp;</entry>
+ <entry>With this setting the camera will compensate for the flash
+light. It slightly warms up the colors and corresponds roughly to 5000...5500 K
+color temperature.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_CLOUDY</constant>&nbsp;</entry>
+ <entry>White balance preset for moderately overcast sky.
+This option corresponds approximately to 6500...8000 K color temperature
+range.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_WHITE_BALANCE_SHADE</constant>&nbsp;</entry>
+ <entry>White balance preset for shade or heavily overcast
+sky. It corresponds approximately to 9000...10000 K color temperature.
+</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-wide-dynamic-range">
+ <entry spanname="id"><constant>V4L2_CID_WIDE_DYNAMIC_RANGE</constant></entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Enables or disables the camera's wide dynamic
+range feature. This feature allows to obtain clear images in situations where
+intensity of the illumination varies significantly throughout the scene, i.e.
+there are simultaneously very dark and very bright areas. It is most commonly
+realized in cameras by combining two subsequent frames with different exposure
+times. <footnote id="ctypeconv"><para> This control may be changed to a menu
+control in the future, if more options are required.</para></footnote></entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-image-stabilization">
+ <entry spanname="id"><constant>V4L2_CID_IMAGE_STABILIZATION</constant></entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Enables or disables image stabilization.
+ <footnoteref linkend="ctypeconv"/></entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_ISO_SENSITIVITY</constant>&nbsp;</entry>
+ <entry>integer menu</entry>
+ </row><row><entry spanname="descr">Determines ISO equivalent of an
+image sensor indicating the sensor's sensitivity to light. The numbers are
+expressed in arithmetic scale, as per <xref linkend="iso12232" /> standard,
+where doubling the sensor sensitivity is represented by doubling the numerical
+ISO value. Applications should interpret the values as standard ISO values
+multiplied by 1000, e.g. control value 800 stands for ISO 0.8. Drivers will
+usually support only a subset of standard ISO values. The effect of setting
+this control while the <constant>V4L2_CID_ISO_SENSITIVITY_AUTO</constant>
+control is set to a value other than <constant>V4L2_CID_ISO_SENSITIVITY_MANUAL
+</constant> is undefined, drivers should ignore such requests.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-iso-sensitivity-auto-type">
+ <entry spanname="id"><constant>V4L2_CID_ISO_SENSITIVITY_AUTO</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_iso_sensitivity_type</entry>
+ </row><row><entry spanname="descr">Enables or disables automatic ISO
+sensitivity adjustments.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CID_ISO_SENSITIVITY_MANUAL</constant>&nbsp;</entry>
+ <entry>Manual ISO sensitivity.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CID_ISO_SENSITIVITY_AUTO</constant>&nbsp;</entry>
+ <entry>Automatic ISO sensitivity adjustments.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+
+ <row id="v4l2-scene-mode">
+ <entry spanname="id"><constant>V4L2_CID_SCENE_MODE</constant>&nbsp;</entry>
+ <entry>enum&nbsp;v4l2_scene_mode</entry>
+ </row><row><entry spanname="descr">This control allows to select
+scene programs as the camera automatic modes optimized for common shooting
+scenes. Within these modes the camera determines best exposure, aperture,
+focusing, light metering, white balance and equivalent sensitivity. The
+controls of those parameters are influenced by the scene mode control.
+An exact behavior in each mode is subject to the camera specification.
+
+<para>When the scene mode feature is not used, this control should be set to
+<constant>V4L2_SCENE_MODE_NONE</constant> to make sure the other possibly
+related controls are accessible. The following scene programs are defined:
+</para>
+</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_NONE</constant>&nbsp;</entry>
+ <entry>The scene mode feature is disabled.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_BACKLIGHT</constant>&nbsp;</entry>
+ <entry>Backlight. Compensates for dark shadows when light is
+ coming from behind a subject, also by automatically turning
+ on the flash.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_BEACH_SNOW</constant>&nbsp;</entry>
+ <entry>Beach and snow. This mode compensates for all-white or
+bright scenes, which tend to look gray and low contrast, when camera's automatic
+exposure is based on an average scene brightness. To compensate, this mode
+automatically slightly overexposes the frames. The white balance may also be
+adjusted to compensate for the fact that reflected snow looks bluish rather
+than white.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_CANDLELIGHT</constant>&nbsp;</entry>
+ <entry>Candle light. The camera generally raises the ISO
+sensitivity and lowers the shutter speed. This mode compensates for relatively
+close subject in the scene. The flash is disabled in order to preserve the
+ambiance of the light.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_DAWN_DUSK</constant>&nbsp;</entry>
+ <entry>Dawn and dusk. Preserves the colors seen in low
+natural light before dusk and after down. The camera may turn off the flash,
+and automatically focus at infinity. It will usually boost saturation and
+lower the shutter speed.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_FALL_COLORS</constant>&nbsp;</entry>
+ <entry>Fall colors. Increases saturation and adjusts white
+balance for color enhancement. Pictures of autumn leaves get saturated reds
+and yellows.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_FIREWORKS</constant>&nbsp;</entry>
+ <entry>Fireworks. Long exposure times are used to capture
+the expanding burst of light from a firework. The camera may invoke image
+stabilization.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_LANDSCAPE</constant>&nbsp;</entry>
+ <entry>Landscape. The camera may choose a small aperture to
+provide deep depth of field and long exposure duration to help capture detail
+in dim light conditions. The focus is fixed at infinity. Suitable for distant
+and wide scenery.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_NIGHT</constant>&nbsp;</entry>
+ <entry>Night, also known as Night Landscape. Designed for low
+light conditions, it preserves detail in the dark areas without blowing out bright
+objects. The camera generally sets itself to a medium-to-high ISO sensitivity,
+with a relatively long exposure time, and turns flash off. As such, there will be
+increased image noise and the possibility of blurred image.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_PARTY_INDOOR</constant>&nbsp;</entry>
+ <entry>Party and indoor. Designed to capture indoor scenes
+that are lit by indoor background lighting as well as the flash. The camera
+usually increases ISO sensitivity, and adjusts exposure for the low light
+conditions.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_PORTRAIT</constant>&nbsp;</entry>
+ <entry>Portrait. The camera adjusts the aperture so that the
+depth of field is reduced, which helps to isolate the subject against a smooth
+background. Most cameras recognize the presence of faces in the scene and focus
+on them. The color hue is adjusted to enhance skin tones. The intensity of the
+flash is often reduced.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_SPORTS</constant>&nbsp;</entry>
+ <entry>Sports. Significantly increases ISO and uses a fast
+shutter speed to freeze motion of rapidly-moving subjects. Increased image
+noise may be seen in this mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_SUNSET</constant>&nbsp;</entry>
+ <entry>Sunset. Preserves deep hues seen in sunsets and
+sunrises. It bumps up the saturation.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SCENE_MODE_TEXT</constant>&nbsp;</entry>
+ <entry>Text. It applies extra contrast and sharpness, it is
+typically a black-and-white mode optimized for readability. Automatic focus
+may be switched to close-up mode and this setting may also involve some
+lens-distortion correction.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_3A_LOCK</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">This control locks or unlocks the automatic
+focus, exposure and white balance. The automatic adjustments can be paused
+independently by setting the corresponding lock bit to 1. The camera then retains
+the settings until the lock bit is cleared. The following lock bits are defined:
+</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_LOCK_EXPOSURE</constant></entry>
+ <entry>Automatic exposure adjustments lock.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_LOCK_WHITE_BALANCE</constant></entry>
+ <entry>Automatic white balance adjustments lock.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_LOCK_FOCUS</constant></entry>
+ <entry>Automatic focus lock.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry spanname="descr">
+When a given algorithm is not enabled, drivers should ignore requests
+to lock it and should return no error. An example might be an application
+setting bit <constant>V4L2_LOCK_WHITE_BALANCE</constant> when the
+<constant>V4L2_CID_AUTO_WHITE_BALANCE</constant> control is set to
+<constant>FALSE</constant>. The value of this control may be changed
+by exposure, white balance or focus controls.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PAN_SPEED</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control turns the
+camera horizontally at the specific speed. The unit is undefined. A
+positive value moves the camera to the right (clockwise when viewed
+from above), a negative value to the left. A value of zero stops the motion
+if one is in progress and has no effect otherwise.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TILT_SPEED</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row><row><entry spanname="descr">This control turns the
+camera vertically at the specified speed. The unit is undefined. A
+positive value moves the camera up, a negative value down. A value of zero
+stops the motion if one is in progress and has no effect otherwise.</entry>
+ </row>
+ <row><entry></entry></row>
+
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section id="fm-tx-controls">
+ <title>FM Transmitter Control Reference</title>
+
+ <para>The FM Transmitter (FM_TX) class includes controls for common features of
+FM transmissions capable devices. Currently this class includes parameters for audio
+compression, pilot tone generation, audio deviation limiter, RDS transmission and
+tuning power features.</para>
+
+ <table pgwide="1" frame="none" id="fm-tx-control-id">
+ <title>FM_TX Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FM_TX_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The FM_TX class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_DEVIATION</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Configures RDS signal frequency deviation level in Hz.
+The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_PI</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the RDS Programme Identification field
+for transmission.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_PTY</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the RDS Programme Type field for transmission.
+This encodes up to 31 pre-defined programme types.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_PS_NAME</constant>&nbsp;</entry>
+ <entry>string</entry>
+ </row>
+ <row><entry spanname="descr">Sets the Programme Service name (PS_NAME) for transmission.
+It is intended for static display on a receiver. It is the primary aid to listeners in programme service
+identification and selection. In Annex E of <xref linkend="iec62106" />, the RDS specification,
+there is a full description of the correct character encoding for Programme Service name strings.
+Also from RDS specification, PS is usually a single eight character text. However, it is also possible
+to find receivers which can scroll strings sized as 8 x N characters. So, this control must be configured
+with steps of 8 characters. The result is it must always contain a string with size multiple of 8.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_RADIO_TEXT</constant>&nbsp;</entry>
+ <entry>string</entry>
+ </row>
+ <row><entry spanname="descr">Sets the Radio Text info for transmission. It is a textual description of
+what is being broadcasted. RDS Radio Text can be applied when broadcaster wishes to transmit longer PS names,
+programme-related information or any other text. In these cases, RadioText should be used in addition to
+<constant>V4L2_CID_RDS_TX_PS_NAME</constant>. The encoding for Radio Text strings is also fully described
+in Annex E of <xref linkend="iec62106" />. The length of Radio Text strings depends on which RDS Block is being
+used to transmit it, either 32 (2A block) or 64 (2B block). However, it is also possible
+to find receivers which can scroll strings sized as 32 x N or 64 x N characters. So, this control must be configured
+with steps of 32 or 64 characters. The result is it must always contain a string with size multiple of 32 or 64. </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_MONO_STEREO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Sets the Mono/Stereo bit of the Decoder Identification code. If set,
+then the audio was recorded as stereo.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_ARTIFICIAL_HEAD</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Sets the
+<ulink url="http://en.wikipedia.org/wiki/Artificial_head">Artificial Head</ulink> bit of the Decoder
+Identification code. If set, then the audio was recorded using an artificial head.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_COMPRESSED</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Sets the Compressed bit of the Decoder Identification code. If set,
+then the audio is compressed.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_DYNAMIC_PTY</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Sets the Dynamic PTY bit of the Decoder Identification code. If set,
+then the PTY code is dynamically switched.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_TRAFFIC_ANNOUNCEMENT</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then a traffic announcement is in progress.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_TRAFFIC_PROGRAM</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then the tuned programme carries traffic announcements.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_MUSIC_SPEECH</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then this channel broadcasts music. If cleared, then it
+broadcasts speech. If the transmitter doesn't make this distinction, then it should be set.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_ALT_FREQS_ENABLE</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then transmit alternate frequencies.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_TX_ALT_FREQS</constant>&nbsp;</entry>
+ <entry>__u32 array</entry>
+ </row>
+ <row><entry spanname="descr">The alternate frequencies in kHz units. The RDS standard allows
+for up to 25 frequencies to be defined. Drivers may support fewer frequencies so check
+the array size.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_LIMITER_ENABLED</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enables or disables the audio deviation limiter feature.
+The limiter is useful when trying to maximize the audio volume, minimize receiver-generated
+distortion and prevent overmodulation.
+</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_LIMITER_RELEASE_TIME</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the audio deviation limiter feature release time.
+Unit is in useconds. Step and range are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_LIMITER_DEVIATION</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Configures audio frequency deviation level in Hz.
+The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_COMPRESSION_ENABLED</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enables or disables the audio compression feature.
+This feature amplifies signals below the threshold by a fixed gain and compresses audio
+signals above the threshold by the ratio of Threshold/(Gain + Threshold).</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_COMPRESSION_GAIN</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the gain for audio compression feature. It is
+a dB value. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_COMPRESSION_THRESHOLD</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the threshold level for audio compression freature.
+It is a dB value. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_COMPRESSION_ATTACK_TIME</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the attack time for audio compression feature.
+It is a useconds value. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_AUDIO_COMPRESSION_RELEASE_TIME</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the release time for audio compression feature.
+It is a useconds value. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PILOT_TONE_ENABLED</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enables or disables the pilot tone generation feature.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PILOT_TONE_DEVIATION</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Configures pilot tone frequency deviation level. Unit is
+in Hz. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PILOT_TONE_FREQUENCY</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Configures pilot tone frequency value. Unit is
+in Hz. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TUNE_PREEMPHASIS</constant>&nbsp;</entry>
+ <entry>enum v4l2_preemphasis</entry>
+ </row>
+ <row id="v4l2-preemphasis"><entry spanname="descr">Configures the pre-emphasis value for broadcasting.
+A pre-emphasis filter is applied to the broadcast to accentuate the high audio frequencies.
+Depending on the region, a time constant of either 50 or 75 useconds is used. The enum&nbsp;v4l2_preemphasis
+defines possible values for pre-emphasis. Here they are:</entry>
+ </row><row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_PREEMPHASIS_DISABLED</constant>&nbsp;</entry>
+ <entry>No pre-emphasis is applied.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PREEMPHASIS_50_uS</constant>&nbsp;</entry>
+ <entry>A pre-emphasis of 50 uS is used.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PREEMPHASIS_75_uS</constant>&nbsp;</entry>
+ <entry>A pre-emphasis of 75 uS is used.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TUNE_POWER_LEVEL</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the output power level for signal transmission.
+Unit is in dBuV. Range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TUNE_ANTENNA_CAPACITOR</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">This selects the value of antenna tuning capacitor
+manually or automatically if set to zero. Unit, range and step are driver-specific.</entry>
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+
+<para>For more details about RDS specification, refer to
+<xref linkend="iec62106" /> document, from CENELEC.</para>
+ </section>
+
+ <section id="flash-controls">
+ <title>Flash Control Reference</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link linkend="experimental">experimental</link>
+interface and may change in the future.</para>
+ </note>
+
+ <para>
+ The V4L2 flash controls are intended to provide generic access
+ to flash controller devices. Flash controller devices are
+ typically used in digital cameras.
+ </para>
+
+ <para>
+ The interface can support both LED and xenon flash devices. As
+ of writing this, there is no xenon flash driver using this
+ interface.
+ </para>
+
+ <section id="flash-controls-use-cases">
+ <title>Supported use cases</title>
+
+ <section>
+ <title>Unsynchronised LED flash (software strobe)</title>
+
+ <para>
+ Unsynchronised LED flash is controlled directly by the
+ host as the sensor. The flash must be enabled by the host
+ before the exposure of the image starts and disabled once
+ it ends. The host is fully responsible for the timing of
+ the flash.
+ </para>
+
+ <para>Example of such device: Nokia N900.</para>
+ </section>
+
+ <section>
+ <title>Synchronised LED flash (hardware strobe)</title>
+
+ <para>
+ The synchronised LED flash is pre-programmed by the host
+ (power and timeout) but controlled by the sensor through a
+ strobe signal from the sensor to the flash.
+ </para>
+
+ <para>
+ The sensor controls the flash duration and timing. This
+ information typically must be made available to the
+ sensor.
+ </para>
+
+ </section>
+
+ <section>
+ <title>LED flash as torch</title>
+
+ <para>
+ LED flash may be used as torch in conjunction with another
+ use case involving camera or individually.
+ </para>
+
+
+ <table pgwide="1" frame="none" id="flash-control-id">
+ <title>Flash Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_CLASS</constant></entry>
+ <entry>class</entry>
+ </row>
+ <row>
+ <entry spanname="descr">The FLASH class descriptor.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_LED_MODE</constant></entry>
+ <entry>menu</entry>
+ </row>
+ <row id="v4l2-flash-led-mode">
+ <entry spanname="descr">Defines the mode of the flash LED,
+ the high-power white LED attached to the flash controller.
+ Setting this control may not be possible in presence of
+ some faults. See V4L2_CID_FLASH_FAULT.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FLASH_LED_MODE_NONE</constant></entry>
+ <entry>Off.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_LED_MODE_FLASH</constant></entry>
+ <entry>Flash mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_LED_MODE_TORCH</constant></entry>
+ <entry>Torch mode. See V4L2_CID_FLASH_TORCH_INTENSITY.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_STROBE_SOURCE</constant></entry>
+ <entry>menu</entry>
+ </row>
+ <row id="v4l2-flash-strobe-source"><entry
+ spanname="descr">Defines the source of the flash LED
+ strobe.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FLASH_STROBE_SOURCE_SOFTWARE</constant></entry>
+ <entry>The flash strobe is triggered by using
+ the V4L2_CID_FLASH_STROBE control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_STROBE_SOURCE_EXTERNAL</constant></entry>
+ <entry>The flash strobe is triggered by an
+ external source. Typically this is a sensor,
+ which makes it possible to synchronises the
+ flash strobe start to exposure start.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_STROBE</constant></entry>
+ <entry>button</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Strobe flash. Valid when
+ V4L2_CID_FLASH_LED_MODE is set to
+ V4L2_FLASH_LED_MODE_FLASH and V4L2_CID_FLASH_STROBE_SOURCE
+ is set to V4L2_FLASH_STROBE_SOURCE_SOFTWARE. Setting this
+ control may not be possible in presence of some faults.
+ See V4L2_CID_FLASH_FAULT.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_STROBE_STOP</constant></entry>
+ <entry>button</entry>
+ </row>
+ <row><entry spanname="descr">Stop flash strobe immediately.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_STROBE_STATUS</constant></entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Strobe status: whether the flash
+ is strobing at the moment or not. This is a read-only
+ control.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_TIMEOUT</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Hardware timeout for flash. The
+ flash strobe is stopped after this period of time has
+ passed from the start of the strobe.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_INTENSITY</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Intensity of the flash strobe when
+ the flash LED is in flash mode
+ (V4L2_FLASH_LED_MODE_FLASH). The unit should be milliamps
+ (mA) if possible.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_TORCH_INTENSITY</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Intensity of the flash LED in
+ torch mode (V4L2_FLASH_LED_MODE_TORCH). The unit should be
+ milliamps (mA) if possible. Setting this control may not
+ be possible in presence of some faults. See
+ V4L2_CID_FLASH_FAULT.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_INDICATOR_INTENSITY</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Intensity of the indicator LED.
+ The indicator LED may be fully independent of the flash
+ LED. The unit should be microamps (uA) if possible.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_FAULT</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Faults related to the flash. The
+ faults tell about specific problems in the flash chip
+ itself or the LEDs attached to it. Faults may prevent
+ further use of some of the flash controls. In particular,
+ V4L2_CID_FLASH_LED_MODE is set to V4L2_FLASH_LED_MODE_NONE
+ if the fault affects the flash LED. Exactly which faults
+ have such an effect is chip dependent. Reading the faults
+ resets the control and returns the chip to a usable state
+ if possible.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_OVER_VOLTAGE</constant></entry>
+ <entry>Flash controller voltage to the flash LED
+ has exceeded the limit specific to the flash
+ controller.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_TIMEOUT</constant></entry>
+ <entry>The flash strobe was still on when
+ the timeout set by the user ---
+ V4L2_CID_FLASH_TIMEOUT control --- has expired.
+ Not all flash controllers may set this in all
+ such conditions.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_OVER_TEMPERATURE</constant></entry>
+ <entry>The flash controller has overheated.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_SHORT_CIRCUIT</constant></entry>
+ <entry>The short circuit protection of the flash
+ controller has been triggered.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_OVER_CURRENT</constant></entry>
+ <entry>Current in the LED power supply has exceeded the limit
+ specific to the flash controller.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_INDICATOR</constant></entry>
+ <entry>The flash controller has detected a short or open
+ circuit condition on the indicator LED.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_UNDER_VOLTAGE</constant></entry>
+ <entry>Flash controller voltage to the flash LED
+ has been below the minimum limit specific to the flash
+ controller.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_INPUT_VOLTAGE</constant></entry>
+ <entry>The input voltage of the flash controller is below
+ the limit under which strobing the flash at full current
+ will not be possible.The condition persists until this flag
+ is no longer set.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FLASH_FAULT_LED_OVER_TEMPERATURE</constant></entry>
+ <entry>The temperature of the LED has exceeded its
+ allowed upper limit.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_CHARGE</constant></entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">Enable or disable charging of the xenon
+ flash capacitor.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FLASH_READY</constant></entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Is the flash ready to strobe?
+ Xenon flashes require their capacitors charged before
+ strobing. LED flashes often require a cooldown period
+ after strobe during which another strobe will not be
+ possible. This is a read-only control.</entry>
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+ </section>
+ </section>
+
+ <section id="jpeg-controls">
+ <title>JPEG Control Reference</title>
+ <para>The JPEG class includes controls for common features of JPEG
+ encoders and decoders. Currently it includes features for codecs
+ implementing progressive baseline DCT compression process with
+ Huffman entrophy coding.</para>
+ <table pgwide="1" frame="none" id="jpeg-control-id">
+ <title>JPEG Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_JPEG_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The JPEG class descriptor. Calling
+ &VIDIOC-QUERYCTRL; for this control will return a description of this
+ control class.
+
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_JPEG_CHROMA_SUBSAMPLING</constant></entry>
+ <entry>menu</entry>
+ </row>
+ <row id="v4l2-jpeg-chroma-subsampling">
+ <entry spanname="descr">The chroma subsampling factors describe how
+ each component of an input image is sampled, in respect to maximum
+ sample rate in each spatial dimension. See <xref linkend="itu-t81"/>,
+ clause A.1.1. for more details. The <constant>
+ V4L2_CID_JPEG_CHROMA_SUBSAMPLING</constant> control determines how
+ Cb and Cr components are downsampled after coverting an input image
+ from RGB to Y'CbCr color space.
+ </entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_JPEG_CHROMA_SUBSAMPLING_444</constant>
+ </entry><entry>No chroma subsampling, each pixel has
+ Y, Cr and Cb values.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_CHROMA_SUBSAMPLING_422</constant>
+ </entry><entry>Horizontally subsample Cr, Cb components
+ by a factor of 2.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_CHROMA_SUBSAMPLING_420</constant>
+ </entry><entry>Subsample Cr, Cb components horizontally
+ and vertically by 2.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_CHROMA_SUBSAMPLING_411</constant>
+ </entry><entry>Horizontally subsample Cr, Cb components
+ by a factor of 4.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_CHROMA_SUBSAMPLING_410</constant>
+ </entry><entry>Subsample Cr, Cb components horizontally
+ by 4 and vertically by 2.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_CHROMA_SUBSAMPLING_GRAY</constant>
+ </entry><entry>Use only luminance component.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_JPEG_RESTART_INTERVAL</constant>
+ </entry><entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">
+ The restart interval determines an interval of inserting RSTm
+ markers (m = 0..7). The purpose of these markers is to additionally
+ reinitialize the encoder process, in order to process blocks of
+ an image independently.
+ For the lossy compression processes the restart interval unit is
+ MCU (Minimum Coded Unit) and its value is contained in DRI
+ (Define Restart Interval) marker. If <constant>
+ V4L2_CID_JPEG_RESTART_INTERVAL</constant> control is set to 0,
+ DRI and RSTm markers will not be inserted.
+ </entry>
+ </row>
+ <row id="jpeg-quality-control">
+ <entry spanname="id"><constant>V4L2_CID_JPEG_COMPRESSION_QUALITY</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">
+ <constant>V4L2_CID_JPEG_COMPRESSION_QUALITY</constant> control
+ determines trade-off between image quality and size.
+ It provides simpler method for applications to control image quality,
+ without a need for direct reconfiguration of luminance and chrominance
+ quantization tables.
+
+ In cases where a driver uses quantization tables configured directly
+ by an application, using interfaces defined elsewhere, <constant>
+ V4L2_CID_JPEG_COMPRESSION_QUALITY</constant> control should be set
+ by driver to 0.
+
+ <para>The value range of this control is driver-specific. Only
+ positive, non-zero values are meaningful. The recommended range
+ is 1 - 100, where larger values correspond to better image quality.
+ </para>
+ </entry>
+ </row>
+ <row id="jpeg-active-marker-control">
+ <entry spanname="id"><constant>V4L2_CID_JPEG_ACTIVE_MARKER</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Specify which JPEG markers are included
+ in compressed stream. This control is valid only for encoders.
+ </entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_JPEG_ACTIVE_MARKER_APP0</constant></entry>
+ <entry>Application data segment APP<subscript>0</subscript>.</entry>
+ </row><row>
+ <entry><constant>V4L2_JPEG_ACTIVE_MARKER_APP1</constant></entry>
+ <entry>Application data segment APP<subscript>1</subscript>.</entry>
+ </row><row>
+ <entry><constant>V4L2_JPEG_ACTIVE_MARKER_COM</constant></entry>
+ <entry>Comment segment.</entry>
+ </row><row>
+ <entry><constant>V4L2_JPEG_ACTIVE_MARKER_DQT</constant></entry>
+ <entry>Quantization tables segment.</entry>
+ </row><row>
+ <entry><constant>V4L2_JPEG_ACTIVE_MARKER_DHT</constant></entry>
+ <entry>Huffman tables segment.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>For more details about JPEG specification, refer
+ to <xref linkend="itu-t81"/>, <xref linkend="jfif"/>,
+ <xref linkend="w3c-jpeg-jfif"/>.</para>
+ </section>
+
+ <section id="image-source-controls">
+ <title>Image Source Control Reference</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link
+ linkend="experimental">experimental</link> interface and may
+ change in the future.</para>
+ </note>
+
+ <para>
+ The Image Source control class is intended for low-level
+ control of image source devices such as image sensors. The
+ devices feature an analogue to digital converter and a bus
+ transmitter to transmit the image data out of the device.
+ </para>
+
+ <table pgwide="1" frame="none" id="image-source-control-id">
+ <title>Image Source Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_IMAGE_SOURCE_CLASS</constant></entry>
+ <entry>class</entry>
+ </row>
+ <row>
+ <entry spanname="descr">The IMAGE_SOURCE class descriptor.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_VBLANK</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Vertical blanking. The idle period
+ after every frame during which no image data is produced.
+ The unit of vertical blanking is a line. Every line has
+ length of the image width plus horizontal blanking at the
+ pixel rate defined by
+ <constant>V4L2_CID_PIXEL_RATE</constant> control in the
+ same sub-device.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_HBLANK</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Horizontal blanking. The idle
+ period after every line of image data during which no
+ image data is produced. The unit of horizontal blanking is
+ pixels.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_ANALOGUE_GAIN</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Analogue gain is gain affecting
+ all colour components in the pixel matrix. The gain
+ operation is performed in the analogue domain before A/D
+ conversion.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_RED</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Test pattern red colour component.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_GREENR</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Test pattern green (next to red)
+ colour component.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_BLUE</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Test pattern blue colour component.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN_GREENB</constant></entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Test pattern green (next to blue)
+ colour component.
+ </entry>
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+
+ <section id="image-process-controls">
+ <title>Image Process Control Reference</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link
+ linkend="experimental">experimental</link> interface and may
+ change in the future.</para>
+ </note>
+
+ <para>
+ The Image Source control class is intended for low-level control of
+ image processing functions. Unlike
+ <constant>V4L2_CID_IMAGE_SOURCE_CLASS</constant>, the controls in
+ this class affect processing the image, and do not control capturing
+ of it.
+ </para>
+
+ <table pgwide="1" frame="none" id="image-process-control-id">
+ <title>Image Source Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_IMAGE_PROC_CLASS</constant></entry>
+ <entry>class</entry>
+ </row>
+ <row>
+ <entry spanname="descr">The IMAGE_PROC class descriptor.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_LINK_FREQ</constant></entry>
+ <entry>integer menu</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Data bus frequency. Together with the
+ media bus pixel code, bus type (clock cycles per sample), the
+ data bus frequency defines the pixel rate
+ (<constant>V4L2_CID_PIXEL_RATE</constant>) in the
+ pixel array (or possibly elsewhere, if the device is not an
+ image sensor). The frame rate can be calculated from the pixel
+ clock, image width and height and horizontal and vertical
+ blanking. While the pixel rate control may be defined elsewhere
+ than in the subdev containing the pixel array, the frame rate
+ cannot be obtained from that information. This is because only
+ on the pixel array it can be assumed that the vertical and
+ horizontal blanking information is exact: no other blanking is
+ allowed in the pixel array. The selection of frame rate is
+ performed by selecting the desired horizontal and vertical
+ blanking. The unit of this control is Hz. </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_PIXEL_RATE</constant></entry>
+ <entry>64-bit integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Pixel rate in the source pads of
+ the subdev. This control is read-only and its unit is
+ pixels / second.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TEST_PATTERN</constant></entry>
+ <entry>menu</entry>
+ </row>
+ <row id="v4l2-test-pattern">
+ <entry spanname="descr"> Some capture/display/sensor devices have
+ the capability to generate test pattern images. These hardware
+ specific test patterns can be used to test if a device is working
+ properly.</entry>
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+
+ <section id="dv-controls">
+ <title>Digital Video Control Reference</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link
+ linkend="experimental">experimental</link> interface and may
+ change in the future.</para>
+ </note>
+
+ <para>
+ The Digital Video control class is intended to control receivers
+ and transmitters for <ulink url="http://en.wikipedia.org/wiki/Vga">VGA</ulink>,
+ <ulink url="http://en.wikipedia.org/wiki/Digital_Visual_Interface">DVI</ulink>
+ (Digital Visual Interface), HDMI (<xref linkend="hdmi" />) and DisplayPort (<xref linkend="dp" />).
+ These controls are generally expected to be private to the receiver or transmitter
+ subdevice that implements them, so they are only exposed on the
+ <filename>/dev/v4l-subdev*</filename> device node.
+ </para>
+
+ <para>Note that these devices can have multiple input or output pads which are
+ hooked up to e.g. HDMI connectors. Even though the subdevice will receive or
+ transmit video from/to only one of those pads, the other pads can still be
+ active when it comes to EDID (Extended Display Identification Data,
+ <xref linkend="vesaedid" />) and HDCP (High-bandwidth Digital Content
+ Protection System, <xref linkend="hdcp" />) processing, allowing the device
+ to do the fairly slow EDID/HDCP handling in advance. This allows for quick
+ switching between connectors.</para>
+
+ <para>These pads appear in several of the controls in this section as
+ bitmasks, one bit for each pad. Bit 0 corresponds to pad 0, bit 1 to pad 1,
+ etc. The maximum value of the control is the set of valid pads.</para>
+
+ <table pgwide="1" frame="none" id="dv-control-id">
+ <title>Digital Video Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_CLASS</constant></entry>
+ <entry>class</entry>
+ </row>
+ <row>
+ <entry spanname="descr">The Digital Video class descriptor.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_TX_HOTPLUG</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Many connectors have a hotplug pin which is high
+ if EDID information is available from the source. This control shows the
+ state of the hotplug pin as seen by the transmitter.
+ Each bit corresponds to an output pad on the transmitter. If an output pad
+ does not have an associated hotplug pin, then the bit for that pad will be 0.
+ This read-only control is applicable to DVI-D, HDMI and DisplayPort connectors.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_TX_RXSENSE</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Rx Sense is the detection of pull-ups on the TMDS
+ clock lines. This normally means that the sink has left/entered standby (i.e.
+ the transmitter can sense that the receiver is ready to receive video).
+ Each bit corresponds to an output pad on the transmitter. If an output pad
+ does not have an associated Rx Sense, then the bit for that pad will be 0.
+ This read-only control is applicable to DVI-D and HDMI devices.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_TX_EDID_PRESENT</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">When the transmitter sees the hotplug signal from the
+ receiver it will attempt to read the EDID. If set, then the transmitter has read
+ at least the first block (= 128 bytes).
+ Each bit corresponds to an output pad on the transmitter. If an output pad
+ does not support EDIDs, then the bit for that pad will be 0.
+ This read-only control is applicable to VGA, DVI-A/D, HDMI and DisplayPort connectors.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_TX_MODE</constant></entry>
+ <entry id="v4l2-dv-tx-mode">enum v4l2_dv_tx_mode</entry>
+ </row>
+ <row>
+ <entry spanname="descr">HDMI transmitters can transmit in DVI-D mode (just video)
+ or in HDMI mode (video + audio + auxiliary data). This control selects which mode
+ to use: V4L2_DV_TX_MODE_DVI_D or V4L2_DV_TX_MODE_HDMI.
+ This control is applicable to HDMI connectors.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_TX_RGB_RANGE</constant></entry>
+ <entry id="v4l2-dv-rgb-range">enum v4l2_dv_rgb_range</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Select the quantization range for RGB output. V4L2_DV_RANGE_AUTO
+ follows the RGB quantization range specified in the standard for the video interface
+ (ie. <xref linkend="cea861" /> for HDMI). V4L2_DV_RANGE_LIMITED and V4L2_DV_RANGE_FULL override the standard
+ to be compatible with sinks that have not implemented the standard correctly
+ (unfortunately quite common for HDMI and DVI-D). Full range allows all possible values to be
+ used whereas limited range sets the range to (16 &lt;&lt; (N-8)) - (235 &lt;&lt; (N-8))
+ where N is the number of bits per component.
+ This control is applicable to VGA, DVI-A/D, HDMI and DisplayPort connectors.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_RX_POWER_PRESENT</constant></entry>
+ <entry>bitmask</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Detects whether the receiver receives power from the source
+ (e.g. HDMI carries 5V on one of the pins). This is often used to power an eeprom
+ which contains EDID information, such that the source can read the EDID even if
+ the sink is in standby/power off.
+ Each bit corresponds to an input pad on the transmitter. If an input pad
+ cannot detect whether power is present, then the bit for that pad will be 0.
+ This read-only control is applicable to DVI-D, HDMI and DisplayPort connectors.
+ </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DV_RX_RGB_RANGE</constant></entry>
+ <entry>enum v4l2_dv_rgb_range</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Select the quantization range for RGB input. V4L2_DV_RANGE_AUTO
+ follows the RGB quantization range specified in the standard for the video interface
+ (ie. <xref linkend="cea861" /> for HDMI). V4L2_DV_RANGE_LIMITED and V4L2_DV_RANGE_FULL override the standard
+ to be compatible with sources that have not implemented the standard correctly
+ (unfortunately quite common for HDMI and DVI-D). Full range allows all possible values to be
+ used whereas limited range sets the range to (16 &lt;&lt; (N-8)) - (235 &lt;&lt; (N-8))
+ where N is the number of bits per component.
+ This control is applicable to VGA, DVI-A/D, HDMI and DisplayPort connectors.
+ </entry>
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+
+ <section id="fm-rx-controls">
+ <title>FM Receiver Control Reference</title>
+
+ <para>The FM Receiver (FM_RX) class includes controls for common features of
+ FM Reception capable devices.</para>
+
+ <table pgwide="1" frame="none" id="fm-rx-control-id">
+ <title>FM_RX Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_FM_RX_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The FM_RX class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RECEPTION</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row><row><entry spanname="descr">Enables/disables RDS
+ reception by the radio tuner</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RX_PTY</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Gets RDS Programme Type field.
+This encodes up to 31 pre-defined programme types.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RX_PS_NAME</constant>&nbsp;</entry>
+ <entry>string</entry>
+ </row>
+ <row><entry spanname="descr">Gets the Programme Service name (PS_NAME).
+It is intended for static display on a receiver. It is the primary aid to listeners in programme service
+identification and selection. In Annex E of <xref linkend="iec62106" />, the RDS specification,
+there is a full description of the correct character encoding for Programme Service name strings.
+Also from RDS specification, PS is usually a single eight character text. However, it is also possible
+to find receivers which can scroll strings sized as 8 x N characters. So, this control must be configured
+with steps of 8 characters. The result is it must always contain a string with size multiple of 8.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RX_RADIO_TEXT</constant>&nbsp;</entry>
+ <entry>string</entry>
+ </row>
+ <row><entry spanname="descr">Gets the Radio Text info. It is a textual description of
+what is being broadcasted. RDS Radio Text can be applied when broadcaster wishes to transmit longer PS names,
+programme-related information or any other text. In these cases, RadioText can be used in addition to
+<constant>V4L2_CID_RDS_RX_PS_NAME</constant>. The encoding for Radio Text strings is also fully described
+in Annex E of <xref linkend="iec62106" />. The length of Radio Text strings depends on which RDS Block is being
+used to transmit it, either 32 (2A block) or 64 (2B block). However, it is also possible
+to find receivers which can scroll strings sized as 32 x N or 64 x N characters. So, this control must be configured
+with steps of 32 or 64 characters. The result is it must always contain a string with size multiple of 32 or 64. </entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RX_TRAFFIC_ANNOUNCEMENT</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then a traffic announcement is in progress.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RX_TRAFFIC_PROGRAM</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then the tuned programme carries traffic announcements.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RDS_RX_MUSIC_SPEECH</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row><entry spanname="descr">If set, then this channel broadcasts music. If cleared, then it
+broadcasts speech. If the transmitter doesn't make this distinction, then it will be set.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_TUNE_DEEMPHASIS</constant>&nbsp;</entry>
+ <entry>enum v4l2_deemphasis</entry>
+ </row>
+ <row id="v4l2-deemphasis"><entry spanname="descr">Configures the de-emphasis value for reception.
+A de-emphasis filter is applied to the broadcast to accentuate the high audio frequencies.
+Depending on the region, a time constant of either 50 or 75 useconds is used. The enum&nbsp;v4l2_deemphasis
+defines possible values for de-emphasis. Here they are:</entry>
+ </row><row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_DEEMPHASIS_DISABLED</constant>&nbsp;</entry>
+ <entry>No de-emphasis is applied.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DEEMPHASIS_50_uS</constant>&nbsp;</entry>
+ <entry>A de-emphasis of 50 uS is used.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DEEMPHASIS_75_uS</constant>&nbsp;</entry>
+ <entry>A de-emphasis of 75 uS is used.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+
+ </row>
+ <row><entry></entry></row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section id="detect-controls">
+ <title>Detect Control Reference</title>
+
+ <para>The Detect class includes controls for common features of
+ various motion or object detection capable devices.</para>
+
+ <table pgwide="1" frame="none" id="detect-control-id">
+ <title>Detect Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row><row rowsep="1"><entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DETECT_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The Detect class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DETECT_MD_MODE</constant>&nbsp;</entry>
+ <entry>menu</entry>
+ </row><row><entry spanname="descr">Sets the motion detection mode.</entry>
+ </row>
+ <row>
+ <entrytbl spanname="descr" cols="2">
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_DETECT_MD_MODE_DISABLED</constant>
+ </entry><entry>Disable motion detection.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DETECT_MD_MODE_GLOBAL</constant>
+ </entry><entry>Use a single motion detection threshold.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DETECT_MD_MODE_THRESHOLD_GRID</constant>
+ </entry><entry>The image is divided into a grid, each cell with its own
+ motion detection threshold. These thresholds are set through the
+ <constant>V4L2_CID_DETECT_MD_THRESHOLD_GRID</constant> matrix control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DETECT_MD_MODE_REGION_GRID</constant>
+ </entry><entry>The image is divided into a grid, each cell with its own
+ region value that specifies which per-region motion detection thresholds
+ should be used. Each region has its own thresholds. How these per-region
+ thresholds are set up is driver-specific. The region values for the grid are set
+ through the <constant>V4L2_CID_DETECT_MD_REGION_GRID</constant> matrix
+ control.</entry>
+ </row>
+ </tbody>
+ </entrytbl>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DETECT_MD_GLOBAL_THRESHOLD</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row><entry spanname="descr">Sets the global motion detection threshold to be
+ used with the <constant>V4L2_DETECT_MD_MODE_GLOBAL</constant> motion detection mode.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DETECT_MD_THRESHOLD_GRID</constant>&nbsp;</entry>
+ <entry>__u16 matrix</entry>
+ </row>
+ <row><entry spanname="descr">Sets the motion detection thresholds for each cell in the grid.
+ To be used with the <constant>V4L2_DETECT_MD_MODE_THRESHOLD_GRID</constant>
+ motion detection mode. Matrix element (0, 0) represents the cell at the top-left of the
+ grid.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_DETECT_MD_REGION_GRID</constant>&nbsp;</entry>
+ <entry>__u8 matrix</entry>
+ </row>
+ <row><entry spanname="descr">Sets the motion detection region value for each cell in the grid.
+ To be used with the <constant>V4L2_DETECT_MD_MODE_REGION_GRID</constant>
+ motion detection mode. Matrix element (0, 0) represents the cell at the top-left of the
+ grid.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+
+ <section id="rf-tuner-controls">
+ <title>RF Tuner Control Reference</title>
+
+ <para>
+The RF Tuner (RF_TUNER) class includes controls for common features of devices
+having RF tuner.
+ </para>
+ <para>
+In this context, RF tuner is radio receiver circuit between antenna and
+demodulator. It receives radio frequency (RF) from the antenna and converts that
+received signal to lower intermediate frequency (IF) or baseband frequency (BB).
+Tuners that could do baseband output are often called Zero-IF tuners. Older
+tuners were typically simple PLL tuners inside a metal box, whilst newer ones
+are highly integrated chips without a metal box "silicon tuners". These controls
+are mostly applicable for new feature rich silicon tuners, just because older
+tuners does not have much adjustable features.
+ </para>
+ <para>
+For more information about RF tuners see
+<ulink url="http://en.wikipedia.org/wiki/Tuner_%28radio%29">Tuner (radio)</ulink>
+and
+<ulink url="http://en.wikipedia.org/wiki/RF_front_end">RF front end</ulink>
+from Wikipedia.
+ </para>
+
+ <table pgwide="1" frame="none" id="rf-tuner-control-id">
+ <title>RF_TUNER Control IDs</title>
+
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="6*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="6*" />
+ <spanspec namest="c1" nameend="c2" spanname="id" />
+ <spanspec namest="c2" nameend="c4" spanname="descr" />
+ <thead>
+ <row>
+ <entry spanname="id" align="left">ID</entry>
+ <entry align="left">Type</entry>
+ </row>
+ <row rowsep="1">
+ <entry spanname="descr" align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row><entry></entry></row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_CLASS</constant>&nbsp;</entry>
+ <entry>class</entry>
+ </row><row><entry spanname="descr">The RF_TUNER class
+descriptor. Calling &VIDIOC-QUERYCTRL; for this control will return a
+description of this control class.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_BANDWIDTH_AUTO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Enables/disables tuner radio channel
+bandwidth configuration. In automatic mode bandwidth configuration is performed
+by the driver.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_BANDWIDTH</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Filter(s) on tuner signal path are used to
+filter signal according to receiving party needs. Driver configures filters to
+fulfill desired bandwidth requirement. Used when V4L2_CID_RF_TUNER_BANDWIDTH_AUTO is not
+set. Unit is in Hz. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_LNA_GAIN_AUTO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Enables/disables LNA automatic gain control (AGC)</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Enables/disables mixer automatic gain control (AGC)</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_IF_GAIN_AUTO</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Enables/disables IF automatic gain control (AGC)</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_LNA_GAIN</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">LNA (low noise amplifier) gain is first
+gain stage on the RF tuner signal path. It is located very close to tuner
+antenna input. Used when <constant>V4L2_CID_RF_TUNER_LNA_GAIN_AUTO</constant> is not set.
+The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_MIXER_GAIN</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Mixer gain is second gain stage on the RF
+tuner signal path. It is located inside mixer block, where RF signal is
+down-converted by the mixer. Used when <constant>V4L2_CID_RF_TUNER_MIXER_GAIN_AUTO</constant>
+is not set. The range and step are driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_IF_GAIN</constant>&nbsp;</entry>
+ <entry>integer</entry>
+ </row>
+ <row>
+ <entry spanname="descr">IF gain is last gain stage on the RF tuner
+signal path. It is located on output of RF tuner. It controls signal level of
+intermediate frequency output or baseband output. Used when
+<constant>V4L2_CID_RF_TUNER_IF_GAIN_AUTO</constant> is not set. The range and step are
+driver-specific.</entry>
+ </row>
+ <row>
+ <entry spanname="id"><constant>V4L2_CID_RF_TUNER_PLL_LOCK</constant>&nbsp;</entry>
+ <entry>boolean</entry>
+ </row>
+ <row>
+ <entry spanname="descr">Is synthesizer PLL locked? RF tuner is
+receiving given frequency when that control is set. This is a read-only control.
+</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+</section>
diff --git a/Documentation/DocBook/media/v4l/dev-capture.xml b/Documentation/DocBook/media/v4l/dev-capture.xml
new file mode 100644
index 000000000..e1c5f9406
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-capture.xml
@@ -0,0 +1,110 @@
+ <title>Video Capture Interface</title>
+
+ <para>Video capture devices sample an analog video signal and store
+the digitized images in memory. Today nearly all devices can capture
+at full 25 or 30 frames/second. With this interface applications can
+control the capture process and move images from the driver into user
+space.</para>
+
+ <para>Conventionally V4L2 video capture devices are accessed through
+character device special files named <filename>/dev/video</filename>
+and <filename>/dev/video0</filename> to
+<filename>/dev/video63</filename> with major number 81 and minor
+numbers 0 to 63. <filename>/dev/video</filename> is typically a
+symbolic link to the preferred video device. Note the same device
+files are used for video output devices.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the video capture interface set the
+<constant>V4L2_CAP_VIDEO_CAPTURE</constant> or
+<constant>V4L2_CAP_VIDEO_CAPTURE_MPLANE</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. As secondary device functions
+they may also support the <link linkend="overlay">video overlay</link>
+(<constant>V4L2_CAP_VIDEO_OVERLAY</constant>) and the <link
+linkend="raw-vbi">raw VBI capture</link>
+(<constant>V4L2_CAP_VBI_CAPTURE</constant>) interface. At least one of
+the read/write or streaming I/O methods must be supported. Tuners and
+audio inputs are optional.</para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>Video capture devices shall support <link
+linkend="audio">audio input</link>, <link
+linkend="tuner">tuner</link>, <link linkend="control">controls</link>,
+<link linkend="crop">cropping and scaling</link> and <link
+linkend="streaming-par">streaming parameter</link> ioctls as needed.
+The <link linkend="video">video input</link> and <link
+linkend="standard">video standard</link> ioctls must be supported by
+all video capture devices.</para>
+ </section>
+
+ <section>
+ <title>Image Format Negotiation</title>
+
+ <para>The result of a capture operation is determined by
+cropping and image format parameters. The former select an area of the
+video picture to capture, the latter how images are stored in memory,
+&ie; in RGB or YUV format, the number of bits per pixel or width and
+height. Together they also define how images are scaled in the
+process.</para>
+
+ <para>As usual these parameters are <emphasis>not</emphasis> reset
+at &func-open; time to permit Unix tool chains, programming a device
+and then reading from it as if it was a plain file. Well written V4L2
+applications ensure they really get what they want, including cropping
+and scaling.</para>
+
+ <para>Cropping initialization at minimum requires to reset the
+parameters to defaults. An example is given in <xref
+linkend="crop" />.</para>
+
+ <para>To query the current image format applications set the
+<structfield>type</structfield> field of a &v4l2-format; to
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> or
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant> and call the
+&VIDIOC-G-FMT; ioctl with a pointer to this structure. Drivers fill
+the &v4l2-pix-format; <structfield>pix</structfield> or the
+&v4l2-pix-format-mplane; <structfield>pix_mp</structfield> member of the
+<structfield>fmt</structfield> union.</para>
+
+ <para>To request different parameters applications set the
+<structfield>type</structfield> field of a &v4l2-format; as above and
+initialize all fields of the &v4l2-pix-format;
+<structfield>vbi</structfield> member of the
+<structfield>fmt</structfield> union, or better just modify the
+results of <constant>VIDIOC_G_FMT</constant>, and call the
+&VIDIOC-S-FMT; ioctl with a pointer to this structure. Drivers may
+adjust the parameters and finally return the actual parameters as
+<constant>VIDIOC_G_FMT</constant> does.</para>
+
+ <para>Like <constant>VIDIOC_S_FMT</constant> the
+&VIDIOC-TRY-FMT; ioctl can be used to learn about hardware limitations
+without disabling I/O or possibly time consuming hardware
+preparations.</para>
+
+ <para>The contents of &v4l2-pix-format; and &v4l2-pix-format-mplane;
+are discussed in <xref linkend="pixfmt" />. See also the specification of the
+<constant>VIDIOC_G_FMT</constant>, <constant>VIDIOC_S_FMT</constant>
+and <constant>VIDIOC_TRY_FMT</constant> ioctls for details. Video
+capture devices must implement both the
+<constant>VIDIOC_G_FMT</constant> and
+<constant>VIDIOC_S_FMT</constant> ioctl, even if
+<constant>VIDIOC_S_FMT</constant> ignores all requests and always
+returns default parameters as <constant>VIDIOC_G_FMT</constant> does.
+<constant>VIDIOC_TRY_FMT</constant> is optional.</para>
+ </section>
+
+ <section>
+ <title>Reading Images</title>
+
+ <para>A video capture device may support the <link
+linkend="rw">read() function</link> and/or streaming (<link
+linkend="mmap">memory mapping</link> or <link
+linkend="userp">user pointer</link>) I/O. See <xref
+linkend="io" /> for details.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-codec.xml b/Documentation/DocBook/media/v4l/dev-codec.xml
new file mode 100644
index 000000000..ff44c16fc
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-codec.xml
@@ -0,0 +1,27 @@
+ <title>Codec Interface</title>
+
+ <para>A V4L2 codec can compress, decompress, transform, or otherwise
+convert video data from one format into another format, in memory. Typically
+such devices are memory-to-memory devices (i.e. devices with the
+<constant>V4L2_CAP_VIDEO_M2M</constant> or <constant>V4L2_CAP_VIDEO_M2M_MPLANE</constant>
+capability set).
+</para>
+
+ <para>A memory-to-memory video node acts just like a normal video node, but it
+supports both output (sending frames from memory to the codec hardware) and
+capture (receiving the processed frames from the codec hardware into memory)
+stream I/O. An application will have to setup the stream
+I/O for both sides and finally call &VIDIOC-STREAMON; for both capture and output
+to start the codec.</para>
+
+ <para>Video compression codecs use the MPEG controls to setup their codec parameters
+(note that the MPEG controls actually support many more codecs than just MPEG).
+See <xref linkend="mpeg-controls"></xref>.</para>
+
+ <para>Memory-to-memory devices can often be used as a shared resource: you can
+open the video node multiple times, each application setting up their own codec properties
+that are local to the file handle, and each can use it independently from the others.
+The driver will arbitrate access to the codec and reprogram it whenever another file
+handler gets access. This is different from the usual video node behavior where the video properties
+are global to the device (i.e. changing something through one file handle is visible
+through another file handle).</para>
diff --git a/Documentation/DocBook/media/v4l/dev-effect.xml b/Documentation/DocBook/media/v4l/dev-effect.xml
new file mode 100644
index 000000000..2350a67c0
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-effect.xml
@@ -0,0 +1,17 @@
+ <title>Effect Devices Interface</title>
+
+ <note>
+ <title>Suspended</title>
+
+ <para>This interface has been be suspended from the V4L2 API
+implemented in Linux 2.6 until we have more experience with effect
+device interfaces.</para>
+ </note>
+
+ <para>A V4L2 video effect device can do image effects, filtering, or
+combine two or more images or image streams. For example video
+transitions or wipes. Applications send data to be processed and
+receive the result data either with &func-read; and &func-write;
+functions, or through the streaming I/O mechanism.</para>
+
+ <para>[to do]</para>
diff --git a/Documentation/DocBook/media/v4l/dev-event.xml b/Documentation/DocBook/media/v4l/dev-event.xml
new file mode 100644
index 000000000..19f4becfa
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-event.xml
@@ -0,0 +1,43 @@
+ <title>Event Interface</title>
+
+ <para>The V4L2 event interface provides a means for a user to get
+ immediately notified on certain conditions taking place on a device.
+ This might include start of frame or loss of signal events, for
+ example. Changes in the value or state of a V4L2 control can also be
+ reported through events.
+ </para>
+
+ <para>To receive events, the events the user is interested in first must
+ be subscribed using the &VIDIOC-SUBSCRIBE-EVENT; ioctl. Once an event is
+ subscribed, the events of subscribed types are dequeueable using the
+ &VIDIOC-DQEVENT; ioctl. Events may be unsubscribed using
+ VIDIOC_UNSUBSCRIBE_EVENT ioctl. The special event type V4L2_EVENT_ALL may
+ be used to unsubscribe all the events the driver supports.</para>
+
+ <para>The event subscriptions and event queues are specific to file
+ handles. Subscribing an event on one file handle does not affect
+ other file handles.</para>
+
+ <para>The information on dequeueable events is obtained by using select or
+ poll system calls on video devices. The V4L2 events use POLLPRI events on
+ poll system call and exceptions on select system call.</para>
+
+ <para>Starting with kernel 3.1 certain guarantees can be given with
+ regards to events:<orderedlist>
+ <listitem>
+ <para>Each subscribed event has its own internal dedicated event queue.
+This means that flooding of one event type will not interfere with other
+event types.</para>
+ </listitem>
+ <listitem>
+ <para>If the internal event queue for a particular subscribed event
+becomes full, then the oldest event in that queue will be dropped.</para>
+ </listitem>
+ <listitem>
+ <para>Where applicable, certain event types can ensure that the payload
+of the oldest event that is about to be dropped will be merged with the payload
+of the next oldest event. Thus ensuring that no information is lost, but only an
+intermediate step leading up to that information. See the documentation for the
+event you want to subscribe to whether this is applicable for that event or not.</para>
+ </listitem>
+ </orderedlist></para>
diff --git a/Documentation/DocBook/media/v4l/dev-osd.xml b/Documentation/DocBook/media/v4l/dev-osd.xml
new file mode 100644
index 000000000..548533291
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-osd.xml
@@ -0,0 +1,149 @@
+ <title>Video Output Overlay Interface</title>
+ <subtitle>Also known as On-Screen Display (OSD)</subtitle>
+
+ <para>Some video output devices can overlay a framebuffer image onto
+the outgoing video signal. Applications can set up such an overlay
+using this interface, which borrows structures and ioctls of the <link
+linkend="overlay">Video Overlay</link> interface.</para>
+
+ <para>The OSD function is accessible through the same character
+special file as the <link linkend="capture">Video Output</link> function.
+Note the default function of such a <filename>/dev/video</filename> device
+is video capturing or output. The OSD function is only available after
+calling the &VIDIOC-S-FMT; ioctl.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the <wordasword>Video Output
+Overlay</wordasword> interface set the
+<constant>V4L2_CAP_VIDEO_OUTPUT_OVERLAY</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl.</para>
+ </section>
+
+ <section>
+ <title>Framebuffer</title>
+
+ <para>Contrary to the <wordasword>Video Overlay</wordasword>
+interface the framebuffer is normally implemented on the TV card and
+not the graphics card. On Linux it is accessible as a framebuffer
+device (<filename>/dev/fbN</filename>). Given a V4L2 device,
+applications can find the corresponding framebuffer device by calling
+the &VIDIOC-G-FBUF; ioctl. It returns, amongst other information, the
+physical address of the framebuffer in the
+<structfield>base</structfield> field of &v4l2-framebuffer;. The
+framebuffer device ioctl <constant>FBIOGET_FSCREENINFO</constant>
+returns the same address in the <structfield>smem_start</structfield>
+field of struct <structname>fb_fix_screeninfo</structname>. The
+<constant>FBIOGET_FSCREENINFO</constant> ioctl and struct
+<structname>fb_fix_screeninfo</structname> are defined in the
+<filename>linux/fb.h</filename> header file.</para>
+
+ <para>The width and height of the framebuffer depends on the
+current video standard. A V4L2 driver may reject attempts to change
+the video standard (or any other ioctl which would imply a framebuffer
+size change) with an &EBUSY; until all applications closed the
+framebuffer device.</para>
+
+ <example>
+ <title>Finding a framebuffer device for OSD</title>
+
+ <programlisting>
+#include &lt;linux/fb.h&gt;
+
+&v4l2-framebuffer; fbuf;
+unsigned int i;
+int fb_fd;
+
+if (-1 == ioctl(fd, VIDIOC_G_FBUF, &amp;fbuf)) {
+ perror("VIDIOC_G_FBUF");
+ exit(EXIT_FAILURE);
+}
+
+for (i = 0; i &lt; 30; i++) {
+ char dev_name[16];
+ struct fb_fix_screeninfo si;
+
+ snprintf(dev_name, sizeof(dev_name), "/dev/fb%u", i);
+
+ fb_fd = open(dev_name, O_RDWR);
+ if (-1 == fb_fd) {
+ switch (errno) {
+ case ENOENT: /* no such file */
+ case ENXIO: /* no driver */
+ continue;
+
+ default:
+ perror("open");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (0 == ioctl(fb_fd, FBIOGET_FSCREENINFO, &amp;si)) {
+ if (si.smem_start == (unsigned long)fbuf.base)
+ break;
+ } else {
+ /* Apparently not a framebuffer device. */
+ }
+
+ close(fb_fd);
+ fb_fd = -1;
+}
+
+/* fb_fd is the file descriptor of the framebuffer device
+ for the video output overlay, or -1 if no device was found. */
+</programlisting>
+ </example>
+ </section>
+
+ <section>
+ <title>Overlay Window and Scaling</title>
+
+ <para>The overlay is controlled by source and target rectangles.
+The source rectangle selects a subsection of the framebuffer image to
+be overlaid, the target rectangle an area in the outgoing video signal
+where the image will appear. Drivers may or may not support scaling,
+and arbitrary sizes and positions of these rectangles. Further drivers
+may support any (or none) of the clipping/blending methods defined for
+the <link linkend="overlay">Video Overlay</link> interface.</para>
+
+ <para>A &v4l2-window; defines the size of the source rectangle,
+its position in the framebuffer and the clipping/blending method to be
+used for the overlay. To get the current parameters applications set
+the <structfield>type</structfield> field of a &v4l2-format; to
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY</constant> and call the
+&VIDIOC-G-FMT; ioctl. The driver fills the
+<structname>v4l2_window</structname> substructure named
+<structfield>win</structfield>. It is not possible to retrieve a
+previously programmed clipping list or bitmap.</para>
+
+ <para>To program the source rectangle applications set the
+<structfield>type</structfield> field of a &v4l2-format; to
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY</constant>, initialize
+the <structfield>win</structfield> substructure and call the
+&VIDIOC-S-FMT; ioctl. The driver adjusts the parameters against
+hardware limits and returns the actual parameters as
+<constant>VIDIOC_G_FMT</constant> does. Like
+<constant>VIDIOC_S_FMT</constant>, the &VIDIOC-TRY-FMT; ioctl can be
+used to learn about driver capabilities without actually changing
+driver state. Unlike <constant>VIDIOC_S_FMT</constant> this also works
+after the overlay has been enabled.</para>
+
+ <para>A &v4l2-crop; defines the size and position of the target
+rectangle. The scaling factor of the overlay is implied by the width
+and height given in &v4l2-window; and &v4l2-crop;. The cropping API
+applies to <wordasword>Video Output</wordasword> and <wordasword>Video
+Output Overlay</wordasword> devices in the same way as to
+<wordasword>Video Capture</wordasword> and <wordasword>Video
+Overlay</wordasword> devices, merely reversing the direction of the
+data flow. For more information see <xref linkend="crop" />.</para>
+ </section>
+
+ <section>
+ <title>Enabling Overlay</title>
+
+ <para>There is no V4L2 ioctl to enable or disable the overlay,
+however the framebuffer interface of the driver may support the
+<constant>FBIOBLANK</constant> ioctl.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-output.xml b/Documentation/DocBook/media/v4l/dev-output.xml
new file mode 100644
index 000000000..9130a3dc7
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-output.xml
@@ -0,0 +1,106 @@
+ <title>Video Output Interface</title>
+
+ <para>Video output devices encode stills or image sequences as
+analog video signal. With this interface applications can
+control the encoding process and move images from user space to
+the driver.</para>
+
+ <para>Conventionally V4L2 video output devices are accessed through
+character device special files named <filename>/dev/video</filename>
+and <filename>/dev/video0</filename> to
+<filename>/dev/video63</filename> with major number 81 and minor
+numbers 0 to 63. <filename>/dev/video</filename> is typically a
+symbolic link to the preferred video device. Note the same device
+files are used for video capture devices.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the video output interface set the
+<constant>V4L2_CAP_VIDEO_OUTPUT</constant> or
+<constant>V4L2_CAP_VIDEO_OUTPUT_MPLANE</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. As secondary device functions
+they may also support the <link linkend="raw-vbi">raw VBI
+output</link> (<constant>V4L2_CAP_VBI_OUTPUT</constant>) interface. At
+least one of the read/write or streaming I/O methods must be
+supported. Modulators and audio outputs are optional.</para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>Video output devices shall support <link
+linkend="audio">audio output</link>, <link
+linkend="tuner">modulator</link>, <link linkend="control">controls</link>,
+<link linkend="crop">cropping and scaling</link> and <link
+linkend="streaming-par">streaming parameter</link> ioctls as needed.
+The <link linkend="video">video output</link> and <link
+linkend="standard">video standard</link> ioctls must be supported by
+all video output devices.</para>
+ </section>
+
+ <section>
+ <title>Image Format Negotiation</title>
+
+ <para>The output is determined by cropping and image format
+parameters. The former select an area of the video picture where the
+image will appear, the latter how images are stored in memory, &ie; in
+RGB or YUV format, the number of bits per pixel or width and height.
+Together they also define how images are scaled in the process.</para>
+
+ <para>As usual these parameters are <emphasis>not</emphasis> reset
+at &func-open; time to permit Unix tool chains, programming a device
+and then writing to it as if it was a plain file. Well written V4L2
+applications ensure they really get what they want, including cropping
+and scaling.</para>
+
+ <para>Cropping initialization at minimum requires to reset the
+parameters to defaults. An example is given in <xref
+linkend="crop" />.</para>
+
+ <para>To query the current image format applications set the
+<structfield>type</structfield> field of a &v4l2-format; to
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> or
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant> and call the
+&VIDIOC-G-FMT; ioctl with a pointer to this structure. Drivers fill
+the &v4l2-pix-format; <structfield>pix</structfield> or the
+&v4l2-pix-format-mplane; <structfield>pix_mp</structfield> member of the
+<structfield>fmt</structfield> union.</para>
+
+ <para>To request different parameters applications set the
+<structfield>type</structfield> field of a &v4l2-format; as above and
+initialize all fields of the &v4l2-pix-format;
+<structfield>vbi</structfield> member of the
+<structfield>fmt</structfield> union, or better just modify the
+results of <constant>VIDIOC_G_FMT</constant>, and call the
+&VIDIOC-S-FMT; ioctl with a pointer to this structure. Drivers may
+adjust the parameters and finally return the actual parameters as
+<constant>VIDIOC_G_FMT</constant> does.</para>
+
+ <para>Like <constant>VIDIOC_S_FMT</constant> the
+&VIDIOC-TRY-FMT; ioctl can be used to learn about hardware limitations
+without disabling I/O or possibly time consuming hardware
+preparations.</para>
+
+ <para>The contents of &v4l2-pix-format; and &v4l2-pix-format-mplane;
+are discussed in <xref linkend="pixfmt" />. See also the specification of the
+<constant>VIDIOC_G_FMT</constant>, <constant>VIDIOC_S_FMT</constant>
+and <constant>VIDIOC_TRY_FMT</constant> ioctls for details. Video
+output devices must implement both the
+<constant>VIDIOC_G_FMT</constant> and
+<constant>VIDIOC_S_FMT</constant> ioctl, even if
+<constant>VIDIOC_S_FMT</constant> ignores all requests and always
+returns default parameters as <constant>VIDIOC_G_FMT</constant> does.
+<constant>VIDIOC_TRY_FMT</constant> is optional.</para>
+ </section>
+
+ <section>
+ <title>Writing Images</title>
+
+ <para>A video output device may support the <link
+linkend="rw">write() function</link> and/or streaming (<link
+linkend="mmap">memory mapping</link> or <link
+linkend="userp">user pointer</link>) I/O. See <xref
+linkend="io" /> for details.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-overlay.xml b/Documentation/DocBook/media/v4l/dev-overlay.xml
new file mode 100644
index 000000000..cc6e0c5c9
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-overlay.xml
@@ -0,0 +1,368 @@
+ <title>Video Overlay Interface</title>
+ <subtitle>Also known as Framebuffer Overlay or Previewing</subtitle>
+
+ <para>Video overlay devices have the ability to genlock (TV-)video
+into the (VGA-)video signal of a graphics card, or to store captured
+images directly in video memory of a graphics card, typically with
+clipping. This can be considerable more efficient than capturing
+images and displaying them by other means. In the old days when only
+nuclear power plants needed cooling towers this used to be the only
+way to put live video into a window.</para>
+
+ <para>Video overlay devices are accessed through the same character
+special files as <link linkend="capture">video capture</link> devices.
+Note the default function of a <filename>/dev/video</filename> device
+is video capturing. The overlay function is only available after
+calling the &VIDIOC-S-FMT; ioctl.</para>
+
+ <para>The driver may support simultaneous overlay and capturing
+using the read/write and streaming I/O methods. If so, operation at
+the nominal frame rate of the video standard is not guaranteed. Frames
+may be directed away from overlay to capture, or one field may be used
+for overlay and the other for capture if the capture parameters permit
+this.</para>
+
+ <para>Applications should use different file descriptors for
+capturing and overlay. This must be supported by all drivers capable
+of simultaneous capturing and overlay. Optionally these drivers may
+also permit capturing and overlay with a single file descriptor for
+compatibility with V4L and earlier versions of V4L2.<footnote>
+ <para>A common application of two file descriptors is the
+XFree86 <link linkend="xvideo">Xv/V4L</link> interface driver and
+a V4L2 application. While the X server controls video overlay, the
+application can take advantage of memory mapping and DMA.</para>
+ <para>In the opinion of the designers of this API, no driver
+writer taking the efforts to support simultaneous capturing and
+overlay will restrict this ability by requiring a single file
+descriptor, as in V4L and earlier versions of V4L2. Making this
+optional means applications depending on two file descriptors need
+backup routines to be compatible with all drivers, which is
+considerable more work than using two fds in applications which do
+not. Also two fd's fit the general concept of one file descriptor for
+each logical stream. Hence as a complexity trade-off drivers
+<emphasis>must</emphasis> support two file descriptors and
+<emphasis>may</emphasis> support single fd operation.</para>
+ </footnote></para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the video overlay interface set the
+<constant>V4L2_CAP_VIDEO_OVERLAY</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. The overlay I/O method specified
+below must be supported. Tuners and audio inputs are optional.</para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>Video overlay devices shall support <link
+linkend="audio">audio input</link>, <link
+linkend="tuner">tuner</link>, <link linkend="control">controls</link>,
+<link linkend="crop">cropping and scaling</link> and <link
+linkend="streaming-par">streaming parameter</link> ioctls as needed.
+The <link linkend="video">video input</link> and <link
+linkend="standard">video standard</link> ioctls must be supported by
+all video overlay devices.</para>
+ </section>
+
+ <section>
+ <title>Setup</title>
+
+ <para>Before overlay can commence applications must program the
+driver with frame buffer parameters, namely the address and size of
+the frame buffer and the image format, for example RGB 5:6:5. The
+&VIDIOC-G-FBUF; and &VIDIOC-S-FBUF; ioctls are available to get
+and set these parameters, respectively. The
+<constant>VIDIOC_S_FBUF</constant> ioctl is privileged because it
+allows to set up DMA into physical memory, bypassing the memory
+protection mechanisms of the kernel. Only the superuser can change the
+frame buffer address and size. Users are not supposed to run TV
+applications as root or with SUID bit set. A small helper application
+with suitable privileges should query the graphics system and program
+the V4L2 driver at the appropriate time.</para>
+
+ <para>Some devices add the video overlay to the output signal
+of the graphics card. In this case the frame buffer is not modified by
+the video device, and the frame buffer address and pixel format are
+not needed by the driver. The <constant>VIDIOC_S_FBUF</constant> ioctl
+is not privileged. An application can check for this type of device by
+calling the <constant>VIDIOC_G_FBUF</constant> ioctl.</para>
+
+ <para>A driver may support any (or none) of five clipping/blending
+methods:<orderedlist>
+ <listitem>
+ <para>Chroma-keying displays the overlaid image only where
+pixels in the primary graphics surface assume a certain color.</para>
+ </listitem>
+ <listitem>
+ <para>A bitmap can be specified where each bit corresponds
+to a pixel in the overlaid image. When the bit is set, the
+corresponding video pixel is displayed, otherwise a pixel of the
+graphics surface.</para>
+ </listitem>
+ <listitem>
+ <para>A list of clipping rectangles can be specified. In
+these regions <emphasis>no</emphasis> video is displayed, so the
+graphics surface can be seen here.</para>
+ </listitem>
+ <listitem>
+ <para>The framebuffer has an alpha channel that can be used
+to clip or blend the framebuffer with the video.</para>
+ </listitem>
+ <listitem>
+ <para>A global alpha value can be specified to blend the
+framebuffer contents with video images.</para>
+ </listitem>
+ </orderedlist></para>
+
+ <para>When simultaneous capturing and overlay is supported and
+the hardware prohibits different image and frame buffer formats, the
+format requested first takes precedence. The attempt to capture
+(&VIDIOC-S-FMT;) or overlay (&VIDIOC-S-FBUF;) may fail with an
+&EBUSY; or return accordingly modified parameters..</para>
+ </section>
+
+ <section>
+ <title>Overlay Window</title>
+
+ <para>The overlaid image is determined by cropping and overlay
+window parameters. The former select an area of the video picture to
+capture, the latter how images are overlaid and clipped. Cropping
+initialization at minimum requires to reset the parameters to
+defaults. An example is given in <xref linkend="crop" />.</para>
+
+ <para>The overlay window is described by a &v4l2-window;. It
+defines the size of the image, its position over the graphics surface
+and the clipping to be applied. To get the current parameters
+applications set the <structfield>type</structfield> field of a
+&v4l2-format; to <constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant> and
+call the &VIDIOC-G-FMT; ioctl. The driver fills the
+<structname>v4l2_window</structname> substructure named
+<structfield>win</structfield>. It is not possible to retrieve a
+previously programmed clipping list or bitmap.</para>
+
+ <para>To program the overlay window applications set the
+<structfield>type</structfield> field of a &v4l2-format; to
+<constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant>, initialize the
+<structfield>win</structfield> substructure and call the
+&VIDIOC-S-FMT; ioctl. The driver adjusts the parameters against
+hardware limits and returns the actual parameters as
+<constant>VIDIOC_G_FMT</constant> does. Like
+<constant>VIDIOC_S_FMT</constant>, the &VIDIOC-TRY-FMT; ioctl can be
+used to learn about driver capabilities without actually changing
+driver state. Unlike <constant>VIDIOC_S_FMT</constant> this also works
+after the overlay has been enabled.</para>
+
+ <para>The scaling factor of the overlaid image is implied by the
+width and height given in &v4l2-window; and the size of the cropping
+rectangle. For more information see <xref linkend="crop" />.</para>
+
+ <para>When simultaneous capturing and overlay is supported and
+the hardware prohibits different image and window sizes, the size
+requested first takes precedence. The attempt to capture or overlay as
+well (&VIDIOC-S-FMT;) may fail with an &EBUSY; or return accordingly
+modified parameters.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-window">
+ <title>struct <structname>v4l2_window</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>&v4l2-rect;</entry>
+ <entry><structfield>w</structfield></entry>
+ <entry>Size and position of the window relative to the
+top, left corner of the frame buffer defined with &VIDIOC-S-FBUF;. The
+window can extend the frame buffer width and height, the
+<structfield>x</structfield> and <structfield>y</structfield>
+coordinates can be negative, and it can lie completely outside the
+frame buffer. The driver clips the window accordingly, or if that is
+not possible, modifies its size and/or position.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-field;</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>Applications set this field to determine which
+video field shall be overlaid, typically one of
+<constant>V4L2_FIELD_ANY</constant> (0),
+<constant>V4L2_FIELD_TOP</constant>,
+<constant>V4L2_FIELD_BOTTOM</constant> or
+<constant>V4L2_FIELD_INTERLACED</constant>. Drivers may have to choose
+a different field order and return the actual setting here.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>chromakey</structfield></entry>
+ <entry>When chroma-keying has been negotiated with
+&VIDIOC-S-FBUF; applications set this field to the desired pixel value
+for the chroma key. The format is the same as the pixel format of the
+framebuffer (&v4l2-framebuffer;
+<structfield>fmt.pixelformat</structfield> field), with bytes in host
+order. E.&nbsp;g. for <link
+linkend="V4L2-PIX-FMT-BGR32"><constant>V4L2_PIX_FMT_BGR24</constant></link>
+the value should be 0xRRGGBB on a little endian, 0xBBGGRR on a big
+endian host.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-clip; *</entry>
+ <entry><structfield>clips</structfield></entry>
+ <entry>When chroma-keying has <emphasis>not</emphasis>
+been negotiated and &VIDIOC-G-FBUF; indicated this capability,
+applications can set this field to point to an array of
+clipping rectangles.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Like the window coordinates
+<structfield>w</structfield>, clipping rectangles are defined relative
+to the top, left corner of the frame buffer. However clipping
+rectangles must not extend the frame buffer width and height, and they
+must not overlap. If possible applications should merge adjacent
+rectangles. Whether this must create x-y or y-x bands, or the order of
+rectangles, is not defined. When clip lists are not supported the
+driver ignores this field. Its contents after calling &VIDIOC-S-FMT;
+are undefined.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>clipcount</structfield></entry>
+ <entry>When the application set the
+<structfield>clips</structfield> field, this field must contain the
+number of clipping rectangles in the list. When clip lists are not
+supported the driver ignores this field, its contents after calling
+<constant>VIDIOC_S_FMT</constant> are undefined. When clip lists are
+supported but no clipping is desired this field must be set to
+zero.</entry>
+ </row>
+ <row>
+ <entry>void *</entry>
+ <entry><structfield>bitmap</structfield></entry>
+ <entry>When chroma-keying has
+<emphasis>not</emphasis> been negotiated and &VIDIOC-G-FBUF; indicated
+this capability, applications can set this field to point to a
+clipping bit mask.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>It must be of the same size
+as the window, <structfield>w.width</structfield> and
+<structfield>w.height</structfield>. Each bit corresponds to a pixel
+in the overlaid image, which is displayed only when the bit is
+<emphasis>set</emphasis>. Pixel coordinates translate to bits like:
+<programlisting>
+((__u8 *) <structfield>bitmap</structfield>)[<structfield>w.width</structfield> * y + x / 8] &amp; (1 &lt;&lt; (x &amp; 7))</programlisting></para><para>where <structfield>0</structfield> &le; x &lt;
+<structfield>w.width</structfield> and <structfield>0</structfield> &le;
+y &lt;<structfield>w.height</structfield>.<footnote>
+ <para>Should we require
+ <structfield>w.width</structfield> to be a multiple of
+ eight?</para>
+ </footnote></para><para>When a clipping
+bit mask is not supported the driver ignores this field, its contents
+after calling &VIDIOC-S-FMT; are undefined. When a bit mask is supported
+but no clipping is desired this field must be set to
+<constant>NULL</constant>.</para><para>Applications need not create a
+clip list or bit mask. When they pass both, or despite negotiating
+chroma-keying, the results are undefined. Regardless of the chosen
+method, the clipping abilities of the hardware may be limited in
+quantity or quality. The results when these limits are exceeded are
+undefined.<footnote>
+ <para>When the image is written into frame buffer
+memory it will be undesirable if the driver clips out less pixels
+than expected, because the application and graphics system are not
+aware these regions need to be refreshed. The driver should clip out
+more pixels or not write the image at all.</para>
+ </footnote></para></entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>global_alpha</structfield></entry>
+ <entry>The global alpha value used to blend the
+framebuffer with video images, if global alpha blending has been
+negotiated (<constant>V4L2_FBUF_FLAG_GLOBAL_ALPHA</constant>, see
+&VIDIOC-S-FBUF;, <xref linkend="framebuffer-flags" />).</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Note this field was added in Linux 2.6.23, extending the structure. However
+the <link linkend="vidioc-g-fmt">VIDIOC_G/S/TRY_FMT</link> ioctls,
+which take a pointer to a <link
+linkend="v4l2-format">v4l2_format</link> parent structure with padding
+bytes at the end, are not affected.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-clip">
+ <title>struct <structname>v4l2_clip</structname><footnote>
+ <para>The X Window system defines "regions" which are
+vectors of struct BoxRec { short x1, y1, x2, y2; } with width = x2 -
+x1 and height = y2 - y1, so one cannot pass X11 clip lists
+directly.</para>
+ </footnote></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>&v4l2-rect;</entry>
+ <entry><structfield>c</structfield></entry>
+ <entry>Coordinates of the clipping rectangle, relative to
+the top, left corner of the frame buffer. Only window pixels
+<emphasis>outside</emphasis> all clipping rectangles are
+displayed.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-clip; *</entry>
+ <entry><structfield>next</structfield></entry>
+ <entry>Pointer to the next clipping rectangle, NULL when
+this is the last rectangle. Drivers ignore this field, it cannot be
+used to pass a linked list of clipping rectangles.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- NB for easier reading this table is duplicated
+ in the vidioc-cropcap chapter.-->
+
+ <table pgwide="1" frame="none" id="v4l2-rect">
+ <title>struct <structname>v4l2_rect</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>left</structfield></entry>
+ <entry>Horizontal offset of the top, left corner of the
+rectangle, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>top</structfield></entry>
+ <entry>Vertical offset of the top, left corner of the
+rectangle, in pixels. Offsets increase to the right and down.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Width of the rectangle, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Height of the rectangle, in pixels.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>Enabling Overlay</title>
+
+ <para>To start or stop the frame buffer overlay applications call
+the &VIDIOC-OVERLAY; ioctl.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-radio.xml b/Documentation/DocBook/media/v4l/dev-radio.xml
new file mode 100644
index 000000000..3e6ac73b3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-radio.xml
@@ -0,0 +1,49 @@
+ <title>Radio Interface</title>
+
+ <para>This interface is intended for AM and FM (analog) radio
+receivers and transmitters.</para>
+
+ <para>Conventionally V4L2 radio devices are accessed through
+character device special files named <filename>/dev/radio</filename>
+and <filename>/dev/radio0</filename> to
+<filename>/dev/radio63</filename> with major number 81 and minor
+numbers 64 to 127.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the radio interface set the
+<constant>V4L2_CAP_RADIO</constant> and
+<constant>V4L2_CAP_TUNER</constant> or
+<constant>V4L2_CAP_MODULATOR</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. Other combinations of
+capability flags are reserved for future extensions.</para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>Radio devices can support <link
+linkend="control">controls</link>, and must support the <link
+linkend="tuner">tuner or modulator</link> ioctls.</para>
+
+ <para>They do not support the video input or output, audio input
+or output, video standard, cropping and scaling, compression and
+streaming parameter, or overlay ioctls. All other ioctls and I/O
+methods are reserved for future extensions.</para>
+ </section>
+
+ <section>
+ <title>Programming</title>
+
+ <para>Radio devices may have a couple audio controls (as discussed
+in <xref linkend="control" />) such as a volume control, possibly custom
+controls. Further all radio devices have one tuner or modulator (these are
+discussed in <xref linkend="tuner" />) with index number zero to select
+the radio frequency and to determine if a monaural or FM stereo
+program is received/emitted. Drivers switch automatically between AM and FM
+depending on the selected frequency. The &VIDIOC-G-TUNER; or
+&VIDIOC-G-MODULATOR; ioctl
+reports the supported frequency range.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-raw-vbi.xml b/Documentation/DocBook/media/v4l/dev-raw-vbi.xml
new file mode 100644
index 000000000..f4b61b6ce
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-raw-vbi.xml
@@ -0,0 +1,345 @@
+ <title>Raw VBI Data Interface</title>
+
+ <para>VBI is an abbreviation of Vertical Blanking Interval, a gap
+in the sequence of lines of an analog video signal. During VBI
+no picture information is transmitted, allowing some time while the
+electron beam of a cathode ray tube TV returns to the top of the
+screen. Using an oscilloscope you will find here the vertical
+synchronization pulses and short data packages ASK
+modulated<footnote><para>ASK: Amplitude-Shift Keying. A high signal
+level represents a '1' bit, a low level a '0' bit.</para></footnote>
+onto the video signal. These are transmissions of services such as
+Teletext or Closed Caption.</para>
+
+ <para>Subject of this interface type is raw VBI data, as sampled off
+a video signal, or to be added to a signal for output.
+The data format is similar to uncompressed video images, a number of
+lines times a number of samples per line, we call this a VBI image.</para>
+
+ <para>Conventionally V4L2 VBI devices are accessed through character
+device special files named <filename>/dev/vbi</filename> and
+<filename>/dev/vbi0</filename> to <filename>/dev/vbi31</filename> with
+major number 81 and minor numbers 224 to 255.
+<filename>/dev/vbi</filename> is typically a symbolic link to the
+preferred VBI device. This convention applies to both input and output
+devices.</para>
+
+ <para>To address the problems of finding related video and VBI
+devices VBI capturing and output is also available as device function
+under <filename>/dev/video</filename>. To capture or output raw VBI
+data with these devices applications must call the &VIDIOC-S-FMT;
+ioctl. Accessed as <filename>/dev/vbi</filename>, raw VBI capturing
+or output is the default device function.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the raw VBI capturing or output API set
+the <constant>V4L2_CAP_VBI_CAPTURE</constant> or
+<constant>V4L2_CAP_VBI_OUTPUT</constant> flags, respectively, in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. At least one of the
+read/write, streaming or asynchronous I/O methods must be
+supported. VBI devices may or may not have a tuner or modulator.</para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>VBI devices shall support <link linkend="video">video
+input or output</link>, <link linkend="tuner">tuner or
+modulator</link>, and <link linkend="control">controls</link> ioctls
+as needed. The <link linkend="standard">video standard</link> ioctls provide
+information vital to program a VBI device, therefore must be
+supported.</para>
+ </section>
+
+ <section>
+ <title>Raw VBI Format Negotiation</title>
+
+ <para>Raw VBI sampling abilities can vary, in particular the
+sampling frequency. To properly interpret the data V4L2 specifies an
+ioctl to query the sampling parameters. Moreover, to allow for some
+flexibility applications can also suggest different parameters.</para>
+
+ <para>As usual these parameters are <emphasis>not</emphasis>
+reset at &func-open; time to permit Unix tool chains, programming a
+device and then reading from it as if it was a plain file. Well
+written V4L2 applications should always ensure they really get what
+they want, requesting reasonable parameters and then checking if the
+actual parameters are suitable.</para>
+
+ <para>To query the current raw VBI capture parameters
+applications set the <structfield>type</structfield> field of a
+&v4l2-format; to <constant>V4L2_BUF_TYPE_VBI_CAPTURE</constant> or
+<constant>V4L2_BUF_TYPE_VBI_OUTPUT</constant>, and call the
+&VIDIOC-G-FMT; ioctl with a pointer to this structure. Drivers fill
+the &v4l2-vbi-format; <structfield>vbi</structfield> member of the
+<structfield>fmt</structfield> union.</para>
+
+ <para>To request different parameters applications set the
+<structfield>type</structfield> field of a &v4l2-format; as above and
+initialize all fields of the &v4l2-vbi-format;
+<structfield>vbi</structfield> member of the
+<structfield>fmt</structfield> union, or better just modify the
+results of <constant>VIDIOC_G_FMT</constant>, and call the
+&VIDIOC-S-FMT; ioctl with a pointer to this structure. Drivers return
+an &EINVAL; only when the given parameters are ambiguous, otherwise
+they modify the parameters according to the hardware capabilites and
+return the actual parameters. When the driver allocates resources at
+this point, it may return an &EBUSY; to indicate the returned
+parameters are valid but the required resources are currently not
+available. That may happen for instance when the video and VBI areas
+to capture would overlap, or when the driver supports multiple opens
+and another process already requested VBI capturing or output. Anyway,
+applications must expect other resource allocation points which may
+return <errorcode>EBUSY</errorcode>, at the &VIDIOC-STREAMON; ioctl
+and the first read(), write() and select() call.</para>
+
+ <para>VBI devices must implement both the
+<constant>VIDIOC_G_FMT</constant> and
+<constant>VIDIOC_S_FMT</constant> ioctl, even if
+<constant>VIDIOC_S_FMT</constant> ignores all requests and always
+returns default parameters as <constant>VIDIOC_G_FMT</constant> does.
+<constant>VIDIOC_TRY_FMT</constant> is optional.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-vbi-format">
+ <title>struct <structname>v4l2_vbi_format</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>sampling_rate</structfield></entry>
+ <entry>Samples per second, i.&nbsp;e. unit 1 Hz.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>offset</structfield></entry>
+ <entry><para>Horizontal offset of the VBI image,
+relative to the leading edge of the line synchronization pulse and
+counted in samples: The first sample in the VBI image will be located
+<structfield>offset</structfield> /
+<structfield>sampling_rate</structfield> seconds following the leading
+edge. See also <xref linkend="vbi-hsync" />.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>samples_per_line</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>sample_format</structfield></entry>
+ <entry><para>Defines the sample format as in <xref
+linkend="pixfmt" />, a four-character-code.<footnote>
+ <para>A few devices may be unable to
+sample VBI data at all but can extend the video capture window to the
+VBI region.</para>
+ </footnote> Usually this is
+<constant>V4L2_PIX_FMT_GREY</constant>, i.&nbsp;e. each sample
+consists of 8 bits with lower values oriented towards the black level.
+Do not assume any other correlation of values with the signal level.
+For example, the MSB does not necessarily indicate if the signal is
+'high' or 'low' because 128 may not be the mean value of the
+signal. Drivers shall not convert the sample format by software.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>start</structfield>[2]</entry>
+ <entry>This is the scanning system line number
+associated with the first line of the VBI image, of the first and the
+second field respectively. See <xref linkend="vbi-525" /> and
+<xref linkend="vbi-625" /> for valid values.
+The <constant>V4L2_VBI_ITU_525_F1_START</constant>,
+<constant>V4L2_VBI_ITU_525_F2_START</constant>,
+<constant>V4L2_VBI_ITU_625_F1_START</constant> and
+<constant>V4L2_VBI_ITU_625_F2_START</constant> defines give the start line
+numbers for each field for each 525 or 625 line format as a convenience.
+Don't forget that ITU line numbering starts at 1, not 0.
+VBI input drivers can return start values 0 if the hardware cannot
+reliable identify scanning lines, VBI acquisition may not require this
+information.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>count</structfield>[2]</entry>
+ <entry>The number of lines in the first and second
+field image, respectively.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>Drivers should be as
+flexibility as possible. For example, it may be possible to extend or
+move the VBI capture window down to the picture area, implementing a
+'full field mode' to capture data service transmissions embedded in
+the picture.</para><para>An application can set the first or second
+<structfield>count</structfield> value to zero if no data is required
+from the respective field; <structfield>count</structfield>[1] if the
+scanning system is progressive, &ie; not interlaced. The
+corresponding start value shall be ignored by the application and
+driver. Anyway, drivers may not support single field capturing and
+return both count values non-zero.</para><para>Both
+<structfield>count</structfield> values set to zero, or line numbers
+outside the bounds depicted in <xref linkend="vbi-525" /> and <xref
+ linkend="vbi-625" />, or a field image covering
+lines of two fields, are invalid and shall not be returned by the
+driver.</para><para>To initialize the <structfield>start</structfield>
+and <structfield>count</structfield> fields, applications must first
+determine the current video standard selection. The &v4l2-std-id; or
+the <structfield>framelines</structfield> field of &v4l2-standard; can
+be evaluated for this purpose.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>See <xref linkend="vbifmt-flags" /> below. Currently
+only drivers set flags, applications must set this field to
+zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>This array is reserved for future extensions.
+Drivers and applications must set it to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="vbifmt-flags">
+ <title>Raw VBI Format Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_VBI_UNSYNC</constant></entry>
+ <entry>0x0001</entry>
+ <entry><para>This flag indicates hardware which does not
+properly distinguish between fields. Normally the VBI image stores the
+first field (lower scanning line numbers) first in memory. This may be
+a top or bottom field depending on the video standard. When this flag
+is set the first or second field may be stored first, however the
+fields are still in correct temporal order with the older field first
+in memory.<footnote>
+ <para>Most VBI services transmit on both fields, but
+some have different semantics depending on the field number. These
+cannot be reliable decoded or encoded when
+<constant>V4L2_VBI_UNSYNC</constant> is set.</para>
+ </footnote></para></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_VBI_INTERLACED</constant></entry>
+ <entry>0x0002</entry>
+ <entry>By default the two field images will be passed
+sequentially; all lines of the first field followed by all lines of
+the second field (compare <xref linkend="field-order" />
+<constant>V4L2_FIELD_SEQ_TB</constant> and
+<constant>V4L2_FIELD_SEQ_BT</constant>, whether the top or bottom
+field is first in memory depends on the video standard). When this
+flag is set, the two fields are interlaced (cf.
+<constant>V4L2_FIELD_INTERLACED</constant>). The first line of the
+first field followed by the first line of the second field, then the
+two second lines, and so on. Such a layout may be necessary when the
+hardware has been programmed to capture or output interlaced video
+images and is unable to separate the fields for VBI capturing at
+the same time. For simplicity setting this flag implies that both
+<structfield>count</structfield> values are equal and non-zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <figure id="vbi-hsync">
+ <title>Line synchronization</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="vbi_hsync.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="vbi_hsync.gif" format="GIF" />
+ </imageobject>
+ <textobject>
+ <phrase>Line synchronization diagram</phrase>
+ </textobject>
+ </mediaobject>
+ </figure>
+
+ <figure id="vbi-525">
+ <title>ITU-R 525 line numbering (M/NTSC and M/PAL)</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="vbi_525.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="vbi_525.gif" format="GIF" />
+ </imageobject>
+ <textobject>
+ <phrase>NTSC field synchronization diagram</phrase>
+ </textobject>
+ <caption>
+ <para>(1) For the purpose of this specification field 2
+starts in line 264 and not 263.5 because half line capturing is not
+supported.</para>
+ </caption>
+ </mediaobject>
+ </figure>
+
+ <figure id="vbi-625">
+ <title>ITU-R 625 line numbering</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="vbi_625.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="vbi_625.gif" format="GIF" />
+ </imageobject>
+ <textobject>
+ <phrase>PAL/SECAM field synchronization diagram</phrase>
+ </textobject>
+ <caption>
+ <para>(1) For the purpose of this specification field 2
+starts in line 314 and not 313.5 because half line capturing is not
+supported.</para>
+ </caption>
+ </mediaobject>
+ </figure>
+
+ <para>Remember the VBI image format depends on the selected
+video standard, therefore the application must choose a new standard or
+query the current standard first. Attempts to read or write data ahead
+of format negotiation, or after switching the video standard which may
+invalidate the negotiated VBI parameters, should be refused by the
+driver. A format change during active I/O is not permitted.</para>
+ </section>
+
+ <section>
+ <title>Reading and writing VBI images</title>
+
+ <para>To assure synchronization with the field number and easier
+implementation, the smallest unit of data passed at a time is one
+frame, consisting of two fields of VBI images immediately following in
+memory.</para>
+
+ <para>The total size of a frame computes as follows:</para>
+
+ <programlisting>
+(<structfield>count</structfield>[0] + <structfield>count</structfield>[1]) *
+<structfield>samples_per_line</structfield> * sample size in bytes</programlisting>
+
+ <para>The sample size is most likely always one byte,
+applications must check the <structfield>sample_format</structfield>
+field though, to function properly with other drivers.</para>
+
+ <para>A VBI device may support <link
+ linkend="rw">read/write</link> and/or streaming (<link
+ linkend="mmap">memory mapping</link> or <link
+ linkend="userp">user pointer</link>) I/O. The latter bears the
+possibility of synchronizing video and
+VBI data by using buffer timestamps.</para>
+
+ <para>Remember the &VIDIOC-STREAMON; ioctl and the first read(),
+write() and select() call can be resource allocation points returning
+an &EBUSY; if the required hardware resources are temporarily
+unavailable, for example the device is already in use by another
+process.</para>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-rds.xml b/Documentation/DocBook/media/v4l/dev-rds.xml
new file mode 100644
index 000000000..be2f33737
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-rds.xml
@@ -0,0 +1,196 @@
+ <title>RDS Interface</title>
+
+ <para>The Radio Data System transmits supplementary
+information in binary format, for example the station name or travel
+information, on an inaudible audio subcarrier of a radio program. This
+interface is aimed at devices capable of receiving and/or transmitting RDS
+information.</para>
+
+ <para>For more information see the core RDS standard <xref linkend="iec62106" />
+and the RBDS standard <xref linkend="nrsc4" />.</para>
+
+ <para>Note that the RBDS standard as is used in the USA is almost identical
+to the RDS standard. Any RDS decoder/encoder can also handle RBDS. Only some of the
+fields have slightly different meanings. See the RBDS standard for more
+information.</para>
+
+ <para>The RBDS standard also specifies support for MMBS (Modified Mobile Search).
+This is a proprietary format which seems to be discontinued. The RDS interface does not
+support this format. Should support for MMBS (or the so-called 'E blocks' in general)
+be needed, then please contact the linux-media mailing list: &v4l-ml;.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the RDS capturing API set
+the <constant>V4L2_CAP_RDS_CAPTURE</constant> flag in
+the <structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. Any tuner that supports RDS
+will set the <constant>V4L2_TUNER_CAP_RDS</constant> flag in
+the <structfield>capability</structfield> field of &v4l2-tuner;. If
+the driver only passes RDS blocks without interpreting the data
+the <constant>V4L2_TUNER_CAP_RDS_BLOCK_IO</constant> flag has to be
+set, see <link linkend="reading-rds-data">Reading RDS data</link>.
+For future use the
+flag <constant>V4L2_TUNER_CAP_RDS_CONTROLS</constant> has also been
+defined. However, a driver for a radio tuner with this capability does
+not yet exist, so if you are planning to write such a driver you
+should discuss this on the linux-media mailing list: &v4l-ml;.</para>
+
+ <para> Whether an RDS signal is present can be detected by looking
+at the <structfield>rxsubchans</structfield> field of &v4l2-tuner;:
+the <constant>V4L2_TUNER_SUB_RDS</constant> will be set if RDS data
+was detected.</para>
+
+ <para>Devices supporting the RDS output API
+set the <constant>V4L2_CAP_RDS_OUTPUT</constant> flag in
+the <structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl.
+Any modulator that supports RDS will set the
+<constant>V4L2_TUNER_CAP_RDS</constant> flag in the <structfield>capability</structfield>
+field of &v4l2-modulator;.
+In order to enable the RDS transmission one must set the <constant>V4L2_TUNER_SUB_RDS</constant>
+bit in the <structfield>txsubchans</structfield> field of &v4l2-modulator;.
+If the driver only passes RDS blocks without interpreting the data
+the <constant>V4L2_TUNER_CAP_RDS_BLOCK_IO</constant> flag has to be set. If the
+tuner is capable of handling RDS entities like program identification codes and radio
+text, the flag <constant>V4L2_TUNER_CAP_RDS_CONTROLS</constant> should be set,
+see <link linkend="writing-rds-data">Writing RDS data</link> and
+<link linkend="fm-tx-controls">FM Transmitter Control Reference</link>.</para>
+ </section>
+
+ <section id="reading-rds-data">
+ <title>Reading RDS data</title>
+
+ <para>RDS data can be read from the radio device
+with the &func-read; function. The data is packed in groups of three bytes.</para>
+ </section>
+
+ <section id="writing-rds-data">
+ <title>Writing RDS data</title>
+
+ <para>RDS data can be written to the radio device
+with the &func-write; function. The data is packed in groups of three bytes,
+as follows:</para>
+ </section>
+
+ <section>
+ <title>RDS datastructures</title>
+ <table frame="none" pgwide="1" id="v4l2-rds-data">
+ <title>struct
+<structname>v4l2_rds_data</structname></title>
+ <tgroup cols="3">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="1*" />
+ <colspec colname="c3" colwidth="5*" />
+ <tbody valign="top">
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>lsb</structfield></entry>
+ <entry>Least Significant Byte of RDS Block</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>msb</structfield></entry>
+ <entry>Most Significant Byte of RDS Block</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>block</structfield></entry>
+ <entry>Block description</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <table frame="none" pgwide="1" id="v4l2-rds-block">
+ <title>Block description</title>
+ <tgroup cols="2">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="5*" />
+ <tbody valign="top">
+ <row>
+ <entry>Bits 0-2</entry>
+ <entry>Block (aka offset) of the received data.</entry>
+ </row>
+ <row>
+ <entry>Bits 3-5</entry>
+ <entry>Deprecated. Currently identical to bits 0-2. Do not use these bits.</entry>
+ </row>
+ <row>
+ <entry>Bit 6</entry>
+ <entry>Corrected bit. Indicates that an error was corrected for this data block.</entry>
+ </row>
+ <row>
+ <entry>Bit 7</entry>
+ <entry>Error bit. Indicates that an uncorrectable error occurred during reception of this block.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-rds-block-codes">
+ <title>Block defines</title>
+ <tgroup cols="4">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="1*" />
+ <colspec colname="c3" colwidth="1*" />
+ <colspec colname="c4" colwidth="5*" />
+ <tbody valign="top">
+ <row>
+ <entry>V4L2_RDS_BLOCK_MSK</entry>
+ <entry> </entry>
+ <entry>7</entry>
+ <entry>Mask for bits 0-2 to get the block ID.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_A</entry>
+ <entry> </entry>
+ <entry>0</entry>
+ <entry>Block A.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_B</entry>
+ <entry> </entry>
+ <entry>1</entry>
+ <entry>Block B.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_C</entry>
+ <entry> </entry>
+ <entry>2</entry>
+ <entry>Block C.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_D</entry>
+ <entry> </entry>
+ <entry>3</entry>
+ <entry>Block D.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_C_ALT</entry>
+ <entry> </entry>
+ <entry>4</entry>
+ <entry>Block C'.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_INVALID</entry>
+ <entry>read-only</entry>
+ <entry>7</entry>
+ <entry>An invalid block.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_CORRECTED</entry>
+ <entry>read-only</entry>
+ <entry>0x40</entry>
+ <entry>A bit error was detected but corrected.</entry>
+ </row>
+ <row>
+ <entry>V4L2_RDS_BLOCK_ERROR</entry>
+ <entry>read-only</entry>
+ <entry>0x80</entry>
+ <entry>An uncorrectable error occurred.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-sdr.xml b/Documentation/DocBook/media/v4l/dev-sdr.xml
new file mode 100644
index 000000000..f8903568a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-sdr.xml
@@ -0,0 +1,120 @@
+ <title>Software Defined Radio Interface (SDR)</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>
+SDR is an abbreviation of Software Defined Radio, the radio device
+which uses application software for modulation or demodulation. This interface
+is intended for controlling and data streaming of such devices.
+ </para>
+
+ <para>
+SDR devices are accessed through character device special files named
+<filename>/dev/swradio0</filename> to <filename>/dev/swradio255</filename>
+with major number 81 and dynamically allocated minor numbers 0 to 255.
+ </para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>
+Devices supporting the SDR receiver interface set the
+<constant>V4L2_CAP_SDR_CAPTURE</constant> and
+<constant>V4L2_CAP_TUNER</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. That flag means the device has an
+Analog to Digital Converter (ADC), which is a mandatory element for the SDR receiver.
+At least one of the read/write, streaming or asynchronous I/O methods must
+be supported.
+ </para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>
+SDR devices can support <link linkend="control">controls</link>, and must
+support the <link linkend="tuner">tuner</link> ioctls. Tuner ioctls are used
+for setting the ADC sampling rate (sampling frequency) and the possible RF tuner
+frequency.
+ </para>
+
+ <para>
+The <constant>V4L2_TUNER_ADC</constant> tuner type is used for ADC tuners, and
+the <constant>V4L2_TUNER_RF</constant> tuner type is used for RF tuners. The
+tuner index of the RF tuner (if any) must always follow the ADC tuner index.
+Normally the ADC tuner is #0 and the RF tuner is #1.
+ </para>
+
+ <para>
+The &VIDIOC-S-HW-FREQ-SEEK; ioctl is not supported.
+ </para>
+ </section>
+
+ <section>
+ <title>Data Format Negotiation</title>
+
+ <para>
+The SDR capture device uses the <link linkend="format">format</link> ioctls to
+select the capture format. Both the sampling resolution and the data streaming
+format are bound to that selectable format. In addition to the basic
+<link linkend="format">format</link> ioctls, the &VIDIOC-ENUM-FMT; ioctl
+must be supported as well.
+ </para>
+
+ <para>
+To use the <link linkend="format">format</link> ioctls applications set the
+<structfield>type</structfield> field of a &v4l2-format; to
+<constant>V4L2_BUF_TYPE_SDR_CAPTURE</constant> and use the &v4l2-sdr-format;
+<structfield>sdr</structfield> member of the <structfield>fmt</structfield>
+union as needed per the desired operation.
+Currently there is two fields, <structfield>pixelformat</structfield> and
+<structfield>buffersize</structfield>, of struct &v4l2-sdr-format; which are
+used. Content of the <structfield>pixelformat</structfield> is V4L2 FourCC
+code of the data format. The <structfield>buffersize</structfield> field is
+maximum buffer size in bytes required for data transfer, set by the driver in
+order to inform application.
+ </para>
+
+ <table pgwide="1" frame="none" id="v4l2-sdr-format">
+ <title>struct <structname>v4l2_sdr_format</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pixelformat</structfield></entry>
+ <entry>
+The data format or type of compression, set by the application. This is a
+little endian <link linkend="v4l2-fourcc">four character code</link>.
+V4L2 defines SDR formats in <xref linkend="sdr-formats" />.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>buffersize</structfield></entry>
+ <entry>
+Maximum size in bytes required for data. Value is set by the driver.
+ </entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>reserved[24]</structfield></entry>
+ <entry>This array is reserved for future extensions.
+Drivers and applications must set it to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+An SDR device may support <link linkend="rw">read/write</link>
+and/or streaming (<link linkend="mmap">memory mapping</link>
+or <link linkend="userp">user pointer</link>) I/O.
+ </para>
+
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-sliced-vbi.xml b/Documentation/DocBook/media/v4l/dev-sliced-vbi.xml
new file mode 100644
index 000000000..0aec62ed2
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-sliced-vbi.xml
@@ -0,0 +1,706 @@
+ <title>Sliced VBI Data Interface</title>
+
+ <para>VBI stands for Vertical Blanking Interval, a gap in the
+sequence of lines of an analog video signal. During VBI no picture
+information is transmitted, allowing some time while the electron beam
+of a cathode ray tube TV returns to the top of the screen.</para>
+
+ <para>Sliced VBI devices use hardware to demodulate data transmitted
+in the VBI. V4L2 drivers shall <emphasis>not</emphasis> do this by
+software, see also the <link linkend="raw-vbi">raw VBI
+interface</link>. The data is passed as short packets of fixed size,
+covering one scan line each. The number of packets per video frame is
+variable.</para>
+
+ <para>Sliced VBI capture and output devices are accessed through the
+same character special files as raw VBI devices. When a driver
+supports both interfaces, the default function of a
+<filename>/dev/vbi</filename> device is <emphasis>raw</emphasis> VBI
+capturing or output, and the sliced VBI function is only available
+after calling the &VIDIOC-S-FMT; ioctl as defined below. Likewise a
+<filename>/dev/video</filename> device may support the sliced VBI API,
+however the default function here is video capturing or output.
+Different file descriptors must be used to pass raw and sliced VBI
+data simultaneously, if this is supported by the driver.</para>
+
+ <section>
+ <title>Querying Capabilities</title>
+
+ <para>Devices supporting the sliced VBI capturing or output API
+set the <constant>V4L2_CAP_SLICED_VBI_CAPTURE</constant> or
+<constant>V4L2_CAP_SLICED_VBI_OUTPUT</constant> flag respectively, in
+the <structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl. At least one of the
+read/write, streaming or asynchronous <link linkend="io">I/O
+methods</link> must be supported. Sliced VBI devices may have a tuner
+or modulator.</para>
+ </section>
+
+ <section>
+ <title>Supplemental Functions</title>
+
+ <para>Sliced VBI devices shall support <link linkend="video">video
+input or output</link> and <link linkend="tuner">tuner or
+modulator</link> ioctls if they have these capabilities, and they may
+support <link linkend="control">control</link> ioctls. The <link
+linkend="standard">video standard</link> ioctls provide information
+vital to program a sliced VBI device, therefore must be
+supported.</para>
+ </section>
+
+ <section id="sliced-vbi-format-negotitation">
+ <title>Sliced VBI Format Negotiation</title>
+
+ <para>To find out which data services are supported by the
+hardware applications can call the &VIDIOC-G-SLICED-VBI-CAP; ioctl.
+All drivers implementing the sliced VBI interface must support this
+ioctl. The results may differ from those of the &VIDIOC-S-FMT; ioctl
+when the number of VBI lines the hardware can capture or output per
+frame, or the number of services it can identify on a given line are
+limited. For example on PAL line 16 the hardware may be able to look
+for a VPS or Teletext signal, but not both at the same time.</para>
+
+ <para>To determine the currently selected services applications
+set the <structfield>type </structfield> field of &v4l2-format; to
+<constant> V4L2_BUF_TYPE_SLICED_VBI_CAPTURE</constant> or <constant>
+V4L2_BUF_TYPE_SLICED_VBI_OUTPUT</constant>, and the &VIDIOC-G-FMT;
+ioctl fills the <structfield>fmt.sliced</structfield> member, a
+&v4l2-sliced-vbi-format;.</para>
+
+ <para>Applications can request different parameters by
+initializing or modifying the <structfield>fmt.sliced</structfield>
+member and calling the &VIDIOC-S-FMT; ioctl with a pointer to the
+<structname>v4l2_format</structname> structure.</para>
+
+ <para>The sliced VBI API is more complicated than the raw VBI API
+because the hardware must be told which VBI service to expect on each
+scan line. Not all services may be supported by the hardware on all
+lines (this is especially true for VBI output where Teletext is often
+unsupported and other services can only be inserted in one specific
+line). In many cases, however, it is sufficient to just set the
+<structfield>service_set</structfield> field to the required services
+and let the driver fill the <structfield>service_lines</structfield>
+array according to hardware capabilities. Only if more precise control
+is needed should the programmer set the
+<structfield>service_lines</structfield> array explicitly.</para>
+
+ <para>The &VIDIOC-S-FMT; ioctl modifies the parameters
+according to hardware capabilities. When the driver allocates
+resources at this point, it may return an &EBUSY; if the required
+resources are temporarily unavailable. Other resource allocation
+points which may return <errorcode>EBUSY</errorcode> can be the
+&VIDIOC-STREAMON; ioctl and the first &func-read;, &func-write; and
+&func-select; call.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-sliced-vbi-format">
+ <title>struct
+<structname>v4l2_sliced_vbi_format</structname></title>
+ <tgroup cols="5">
+ <colspec colname="c1" colwidth="3*" />
+ <colspec colname="c2" colwidth="3*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="2*" />
+ <colspec colname="c5" colwidth="2*" />
+ <spanspec namest="c3" nameend="c5" spanname="hspan" />
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>service_set</structfield></entry>
+ <entry spanname="hspan"><para>If
+<structfield>service_set</structfield> is non-zero when passed with
+&VIDIOC-S-FMT; or &VIDIOC-TRY-FMT;, the
+<structfield>service_lines</structfield> array will be filled by the
+driver according to the services specified in this field. For example,
+if <structfield>service_set</structfield> is initialized with
+<constant>V4L2_SLICED_TELETEXT_B | V4L2_SLICED_WSS_625</constant>, a
+driver for the cx25840 video decoder sets lines 7-22 of both
+fields<footnote><para>According to <link
+linkend="ets300706">ETS&nbsp;300&nbsp;706</link> lines 6-22 of the
+first field and lines 5-22 of the second field may carry Teletext
+data.</para></footnote> to <constant>V4L2_SLICED_TELETEXT_B</constant>
+and line 23 of the first field to
+<constant>V4L2_SLICED_WSS_625</constant>. If
+<structfield>service_set</structfield> is set to zero, then the values
+of <structfield>service_lines</structfield> will be used instead.
+</para><para>On return the driver sets this field to the union of all
+elements of the returned <structfield>service_lines</structfield>
+array. It may contain less services than requested, perhaps just one,
+if the hardware cannot handle more services simultaneously. It may be
+empty (zero) if none of the requested services are supported by the
+hardware.</para></entry>
+ </row>
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>service_lines</structfield>[2][24]</entry>
+ <entry spanname="hspan"><para>Applications initialize this
+array with sets of data services the driver shall look for or insert
+on the respective scan line. Subject to hardware capabilities drivers
+return the requested set, a subset, which may be just a single
+service, or an empty set. When the hardware cannot handle multiple
+services on the same line the driver shall choose one. No assumptions
+can be made on which service the driver chooses.</para><para>Data
+services are defined in <xref linkend="vbi-services2" />. Array indices
+map to ITU-R line numbers (see also <xref linkend="vbi-525" /> and <xref
+ linkend="vbi-625" />) as follows: <!-- No nested
+tables, sigh. --></para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Element</entry>
+ <entry>525 line systems</entry>
+ <entry>625 line systems</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[0][1]</entry>
+ <entry align="center">1</entry>
+ <entry align="center">1</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[0][23]</entry>
+ <entry align="center">23</entry>
+ <entry align="center">23</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[1][1]</entry>
+ <entry align="center">264</entry>
+ <entry align="center">314</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[1][23]</entry>
+ <entry align="center">286</entry>
+ <entry align="center">336</entry>
+ </row>
+ <!-- End of line numbers table. -->
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry spanname="hspan">Drivers must set
+<structfield>service_lines</structfield>[0][0] and
+<structfield>service_lines</structfield>[1][0] to zero.
+The <constant>V4L2_VBI_ITU_525_F1_START</constant>,
+<constant>V4L2_VBI_ITU_525_F2_START</constant>,
+<constant>V4L2_VBI_ITU_625_F1_START</constant> and
+<constant>V4L2_VBI_ITU_625_F2_START</constant> defines give the start
+line numbers for each field for each 525 or 625 line format as a
+convenience. Don't forget that ITU line numbering starts at 1, not 0.
+</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>io_size</structfield></entry>
+ <entry spanname="hspan">Maximum number of bytes passed by
+one &func-read; or &func-write; call, and the buffer size in bytes for
+the &VIDIOC-QBUF; and &VIDIOC-DQBUF; ioctl. Drivers set this field to
+the size of &v4l2-sliced-vbi-data; times the number of non-zero
+elements in the returned <structfield>service_lines</structfield>
+array (that is the number of lines potentially carrying data).</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry spanname="hspan">This array is reserved for future
+extensions. Applications and drivers must set it to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- See also vidioc-g-sliced-vbi-cap.sgml -->
+ <table frame="none" pgwide="1" id="vbi-services2">
+ <title>Sliced VBI services</title>
+ <tgroup cols="5">
+ <colspec colname="c1" colwidth="2*" />
+ <colspec colname="c2" colwidth="1*" />
+ <colspec colname="c3" colwidth="1*" />
+ <colspec colname="c4" colwidth="2*" />
+ <colspec colname="c5" colwidth="2*" />
+ <spanspec namest="c3" nameend="c5" spanname="rlp" />
+ <thead>
+ <row>
+ <entry>Symbol</entry>
+ <entry>Value</entry>
+ <entry>Reference</entry>
+ <entry>Lines, usually</entry>
+ <entry>Payload</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_SLICED_TELETEXT_B</constant>
+(Teletext System B)</entry>
+ <entry>0x0001</entry>
+ <entry><xref linkend="ets300706" />, <xref linkend="itu653" /></entry>
+ <entry>PAL/SECAM line 7-22, 320-335 (second field 7-22)</entry>
+ <entry>Last 42 of the 45 byte Teletext packet, that is
+without clock run-in and framing code, lsb first transmitted.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_VPS</constant></entry>
+ <entry>0x0400</entry>
+ <entry><xref linkend="ets300231" /></entry>
+ <entry>PAL line 16</entry>
+ <entry>Byte number 3 to 15 according to Figure 9 of
+ETS&nbsp;300&nbsp;231, lsb first transmitted.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_CAPTION_525</constant></entry>
+ <entry>0x1000</entry>
+ <entry><xref linkend="cea608" /></entry>
+ <entry>NTSC line 21, 284 (second field 21)</entry>
+ <entry>Two bytes in transmission order, including parity
+bit, lsb first transmitted.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_WSS_625</constant></entry>
+ <entry>0x4000</entry>
+ <entry><xref linkend="itu1119" />, <xref linkend="en300294" /></entry>
+ <entry>PAL/SECAM line 23</entry>
+ <entry><screen>
+Byte 0 1
+ msb lsb msb lsb
+ Bit 7 6 5 4 3 2 1 0 x x 13 12 11 10 9
+</screen></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_VBI_525</constant></entry>
+ <entry>0x1000</entry>
+ <entry spanname="rlp">Set of services applicable to 525
+line systems.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_VBI_625</constant></entry>
+ <entry>0x4401</entry>
+ <entry spanname="rlp">Set of services applicable to 625
+line systems.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>Drivers may return an &EINVAL; when applications attempt to
+read or write data without prior format negotiation, after switching
+the video standard (which may invalidate the negotiated VBI
+parameters) and after switching the video input (which may change the
+video standard as a side effect). The &VIDIOC-S-FMT; ioctl may return
+an &EBUSY; when applications attempt to change the format while i/o is
+in progress (between a &VIDIOC-STREAMON; and &VIDIOC-STREAMOFF; call,
+and after the first &func-read; or &func-write; call).</para>
+ </section>
+
+ <section>
+ <title>Reading and writing sliced VBI data</title>
+
+ <para>A single &func-read; or &func-write; call must pass all data
+belonging to one video frame. That is an array of
+<structname>v4l2_sliced_vbi_data</structname> structures with one or
+more elements and a total size not exceeding
+<structfield>io_size</structfield> bytes. Likewise in streaming I/O
+mode one buffer of <structfield>io_size</structfield> bytes must
+contain data of one video frame. The <structfield>id</structfield> of
+unused <structname>v4l2_sliced_vbi_data</structname> elements must be
+zero.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-sliced-vbi-data">
+ <title>struct
+<structname>v4l2_sliced_vbi_data</structname></title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>A flag from <xref linkend="vbi-services" />
+identifying the type of data in this packet. Only a single bit must be
+set. When the <structfield>id</structfield> of a captured packet is
+zero, the packet is empty and the contents of other fields are
+undefined. Applications shall ignore empty packets. When the
+<structfield>id</structfield> of a packet for output is zero the
+contents of the <structfield>data</structfield> field are undefined
+and the driver must no longer insert data on the requested
+<structfield>field</structfield> and
+<structfield>line</structfield>.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>The video field number this data has been captured
+from, or shall be inserted at. <constant>0</constant> for the first
+field, <constant>1</constant> for the second field.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>line</structfield></entry>
+ <entry>The field (as opposed to frame) line number this
+data has been captured from, or shall be inserted at. See <xref
+ linkend="vbi-525" /> and <xref linkend="vbi-625" /> for valid
+values. Sliced VBI capture devices can set the line number of all
+packets to <constant>0</constant> if the hardware cannot reliably
+identify scan lines. The field number must always be valid.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield></entry>
+ <entry>This field is reserved for future extensions.
+Applications and drivers must set it to zero.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>data</structfield>[48]</entry>
+ <entry>The packet payload. See <xref
+ linkend="vbi-services" /> for the contents and number of
+bytes passed for each data type. The contents of padding bytes at the
+end of this array are undefined, drivers and applications shall ignore
+them.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>Packets are always passed in ascending line number order,
+without duplicate line numbers. The &func-write; function and the
+&VIDIOC-QBUF; ioctl must return an &EINVAL; when applications violate
+this rule. They must also return an &EINVAL; when applications pass an
+incorrect field or line number, or a combination of
+<structfield>field</structfield>, <structfield>line</structfield> and
+<structfield>id</structfield> which has not been negotiated with the
+&VIDIOC-G-FMT; or &VIDIOC-S-FMT; ioctl. When the line numbers are
+unknown the driver must pass the packets in transmitted order. The
+driver can insert empty packets with <structfield>id</structfield> set
+to zero anywhere in the packet array.</para>
+
+ <para>To assure synchronization and to distinguish from frame
+dropping, when a captured frame does not carry any of the requested
+data services drivers must pass one or more empty packets. When an
+application fails to pass VBI data in time for output, the driver
+must output the last VPS and WSS packet again, and disable the output
+of Closed Caption and Teletext data, or output data which is ignored
+by Closed Caption and Teletext decoders.</para>
+
+ <para>A sliced VBI device may support <link
+linkend="rw">read/write</link> and/or streaming (<link
+linkend="mmap">memory mapping</link> and/or <link linkend="userp">user
+pointer</link>) I/O. The latter bears the possibility of synchronizing
+video and VBI data by using buffer timestamps.</para>
+
+ </section>
+
+ <section>
+ <title>Sliced VBI Data in MPEG Streams</title>
+
+ <para>If a device can produce an MPEG output stream, it may be
+capable of providing <link
+linkend="sliced-vbi-format-negotitation">negotiated sliced VBI
+services</link> as data embedded in the MPEG stream. Users or
+applications control this sliced VBI data insertion with the <link
+linkend="v4l2-mpeg-stream-vbi-fmt">V4L2_CID_MPEG_STREAM_VBI_FMT</link>
+control.</para>
+
+ <para>If the driver does not provide the <link
+linkend="v4l2-mpeg-stream-vbi-fmt">V4L2_CID_MPEG_STREAM_VBI_FMT</link>
+control, or only allows that control to be set to <link
+linkend="v4l2-mpeg-stream-vbi-fmt"><constant>
+V4L2_MPEG_STREAM_VBI_FMT_NONE</constant></link>, then the device
+cannot embed sliced VBI data in the MPEG stream.</para>
+
+ <para>The <link linkend="v4l2-mpeg-stream-vbi-fmt">
+V4L2_CID_MPEG_STREAM_VBI_FMT</link> control does not implicitly set
+the device driver to capture nor cease capturing sliced VBI data. The
+control only indicates to embed sliced VBI data in the MPEG stream, if
+an application has negotiated sliced VBI service be captured.</para>
+
+ <para>It may also be the case that a device can embed sliced VBI
+data in only certain types of MPEG streams: for example in an MPEG-2
+PS but not an MPEG-2 TS. In this situation, if sliced VBI data
+insertion is requested, the sliced VBI data will be embedded in MPEG
+stream types when supported, and silently omitted from MPEG stream
+types where sliced VBI data insertion is not supported by the device.
+</para>
+
+ <para>The following subsections specify the format of the
+embedded sliced VBI data.</para>
+
+ <section>
+ <title>MPEG Stream Embedded, Sliced VBI Data Format: NONE</title>
+ <para>The <link linkend="v4l2-mpeg-stream-vbi-fmt"><constant>
+V4L2_MPEG_STREAM_VBI_FMT_NONE</constant></link> embedded sliced VBI
+format shall be interpreted by drivers as a control to cease
+embedding sliced VBI data in MPEG streams. Neither the device nor
+driver shall insert "empty" embedded sliced VBI data packets in the
+MPEG stream when this format is set. No MPEG stream data structures
+are specified for this format.</para>
+ </section>
+
+ <section>
+ <title>MPEG Stream Embedded, Sliced VBI Data Format: IVTV</title>
+ <para>The <link linkend="v4l2-mpeg-stream-vbi-fmt"><constant>
+V4L2_MPEG_STREAM_VBI_FMT_IVTV</constant></link> embedded sliced VBI
+format, when supported, indicates to the driver to embed up to 36
+lines of sliced VBI data per frame in an MPEG-2 <emphasis>Private
+Stream 1 PES</emphasis> packet encapsulated in an MPEG-2 <emphasis>
+Program Pack</emphasis> in the MPEG stream.</para>
+
+ <para><emphasis>Historical context</emphasis>: This format
+specification originates from a custom, embedded, sliced VBI data
+format used by the <filename>ivtv</filename> driver. This format
+has already been informally specified in the kernel sources in the
+file <filename>Documentation/video4linux/cx2341x/README.vbi</filename>
+. The maximum size of the payload and other aspects of this format
+are driven by the CX23415 MPEG decoder's capabilities and limitations
+with respect to extracting, decoding, and displaying sliced VBI data
+embedded within an MPEG stream.</para>
+
+ <para>This format's use is <emphasis>not</emphasis> exclusive to
+the <filename>ivtv</filename> driver <emphasis>nor</emphasis>
+exclusive to CX2341x devices, as the sliced VBI data packet insertion
+into the MPEG stream is implemented in driver software. At least the
+<filename>cx18</filename> driver provides sliced VBI data insertion
+into an MPEG-2 PS in this format as well.</para>
+
+ <para>The following definitions specify the payload of the
+MPEG-2 <emphasis>Private Stream 1 PES</emphasis> packets that contain
+sliced VBI data when <link linkend="v4l2-mpeg-stream-vbi-fmt">
+<constant>V4L2_MPEG_STREAM_VBI_FMT_IVTV</constant></link> is set.
+(The MPEG-2 <emphasis>Private Stream 1 PES</emphasis> packet header
+and encapsulating MPEG-2 <emphasis>Program Pack</emphasis> header are
+not detailed here. Please refer to the MPEG-2 specifications for
+details on those packet headers.)</para>
+
+ <para>The payload of the MPEG-2 <emphasis>Private Stream 1 PES
+</emphasis> packets that contain sliced VBI data is specified by
+&v4l2-mpeg-vbi-fmt-ivtv;. The payload is variable
+length, depending on the actual number of lines of sliced VBI data
+present in a video frame. The payload may be padded at the end with
+unspecified fill bytes to align the end of the payload to a 4-byte
+boundary. The payload shall never exceed 1552 bytes (2 fields with
+18 lines/field with 43 bytes of data/line and a 4 byte magic number).
+</para>
+
+ <table frame="none" pgwide="1" id="v4l2-mpeg-vbi-fmt-ivtv">
+ <title>struct <structname>v4l2_mpeg_vbi_fmt_ivtv</structname>
+ </title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>magic</structfield>[4]</entry>
+ <entry></entry>
+ <entry>A "magic" constant from <xref
+ linkend="v4l2-mpeg-vbi-fmt-ivtv-magic" /> that indicates
+this is a valid sliced VBI data payload and also indicates which
+member of the anonymous union, <structfield>itv0</structfield> or
+<structfield>ITV0</structfield>, to use for the payload data.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry>(anonymous)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct <link linkend="v4l2-mpeg-vbi-itv0">
+ <structname>v4l2_mpeg_vbi_itv0</structname></link>
+ </entry>
+ <entry><structfield>itv0</structfield></entry>
+ <entry>The primary form of the sliced VBI data payload
+that contains anywhere from 1 to 35 lines of sliced VBI data.
+Line masks are provided in this form of the payload indicating
+which VBI lines are provided.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct <link linkend="v4l2-mpeg-vbi-itv0-1">
+ <structname>v4l2_mpeg_vbi_ITV0</structname></link>
+ </entry>
+ <entry><structfield>ITV0</structfield></entry>
+ <entry>An alternate form of the sliced VBI data payload
+used when 36 lines of sliced VBI data are present. No line masks are
+provided in this form of the payload; all valid line mask bits are
+implcitly set.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-mpeg-vbi-fmt-ivtv-magic">
+ <title>Magic Constants for &v4l2-mpeg-vbi-fmt-ivtv;
+ <structfield>magic</structfield> field</title>
+ <tgroup cols="3">
+ &cs-def;
+ <thead>
+ <row>
+ <entry align="left">Defined Symbol</entry>
+ <entry align="left">Value</entry>
+ <entry align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VBI_IVTV_MAGIC0</constant>
+ </entry>
+ <entry>"itv0"</entry>
+ <entry>Indicates the <structfield>itv0</structfield>
+member of the union in &v4l2-mpeg-vbi-fmt-ivtv; is valid.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VBI_IVTV_MAGIC1</constant>
+ </entry>
+ <entry>"ITV0"</entry>
+ <entry>Indicates the <structfield>ITV0</structfield>
+member of the union in &v4l2-mpeg-vbi-fmt-ivtv; is valid and
+that 36 lines of sliced VBI data are present.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-mpeg-vbi-itv0">
+ <title>struct <structname>v4l2_mpeg_vbi_itv0</structname>
+ </title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__le32</entry>
+ <entry><structfield>linemask</structfield>[2]</entry>
+ <entry><para>Bitmasks indicating the VBI service lines
+present. These <structfield>linemask</structfield> values are stored
+in little endian byte order in the MPEG stream. Some reference
+<structfield>linemask</structfield> bit positions with their
+corresponding VBI line number and video field are given below.
+b<subscript>0</subscript> indicates the least significant bit of a
+<structfield>linemask</structfield> value:<screen>
+<structfield>linemask</structfield>[0] b<subscript>0</subscript>: line 6 first field
+<structfield>linemask</structfield>[0] b<subscript>17</subscript>: line 23 first field
+<structfield>linemask</structfield>[0] b<subscript>18</subscript>: line 6 second field
+<structfield>linemask</structfield>[0] b<subscript>31</subscript>: line 19 second field
+<structfield>linemask</structfield>[1] b<subscript>0</subscript>: line 20 second field
+<structfield>linemask</structfield>[1] b<subscript>3</subscript>: line 23 second field
+<structfield>linemask</structfield>[1] b<subscript>4</subscript>-b<subscript>31</subscript>: unused and set to 0</screen></para></entry>
+ </row>
+ <row>
+ <entry>struct <link linkend="v4l2-mpeg-vbi-itv0-line">
+ <structname>v4l2_mpeg_vbi_itv0_line</structname></link>
+ </entry>
+ <entry><structfield>line</structfield>[35]</entry>
+ <entry>This is a variable length array that holds from 1
+to 35 lines of sliced VBI data. The sliced VBI data lines present
+correspond to the bits set in the <structfield>linemask</structfield>
+array, starting from b<subscript>0</subscript> of <structfield>
+linemask</structfield>[0] up through b<subscript>31</subscript> of
+<structfield>linemask</structfield>[0], and from b<subscript>0
+</subscript> of <structfield>linemask</structfield>[1] up through b
+<subscript>3</subscript> of <structfield>linemask</structfield>[1].
+<structfield>line</structfield>[0] corresponds to the first bit
+found set in the <structfield>linemask</structfield> array,
+<structfield>line</structfield>[1] corresponds to the second bit
+found set in the <structfield>linemask</structfield> array, etc.
+If no <structfield>linemask</structfield> array bits are set, then
+<structfield>line</structfield>[0] may contain one line of
+unspecified data that should be ignored by applications.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-mpeg-vbi-itv0-1">
+ <title>struct <structname>v4l2_mpeg_vbi_ITV0</structname>
+ </title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>struct <link linkend="v4l2-mpeg-vbi-itv0-line">
+ <structname>v4l2_mpeg_vbi_itv0_line</structname></link>
+ </entry>
+ <entry><structfield>line</structfield>[36]</entry>
+ <entry>A fixed length array of 36 lines of sliced VBI
+data. <structfield>line</structfield>[0] through <structfield>line
+</structfield>[17] correspond to lines 6 through 23 of the
+first field. <structfield>line</structfield>[18] through
+<structfield>line</structfield>[35] corresponds to lines 6
+through 23 of the second field.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-mpeg-vbi-itv0-line">
+ <title>struct <structname>v4l2_mpeg_vbi_itv0_line</structname>
+ </title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>A line identifier value from
+<xref linkend="ITV0-Line-Identifier-Constants" /> that indicates
+the type of sliced VBI data stored on this line.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>data</structfield>[42]</entry>
+ <entry>The sliced VBI data for the line.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="ITV0-Line-Identifier-Constants">
+ <title>Line Identifiers for struct <link
+ linkend="v4l2-mpeg-vbi-itv0-line"><structname>
+v4l2_mpeg_vbi_itv0_line</structname></link> <structfield>id
+</structfield> field</title>
+ <tgroup cols="3">
+ &cs-def;
+ <thead>
+ <row>
+ <entry align="left">Defined Symbol</entry>
+ <entry align="left">Value</entry>
+ <entry align="left">Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MPEG_VBI_IVTV_TELETEXT_B</constant>
+ </entry>
+ <entry>1</entry>
+ <entry>Refer to <link linkend="vbi-services2">
+Sliced VBI services</link> for a description of the line payload.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VBI_IVTV_CAPTION_525</constant>
+ </entry>
+ <entry>4</entry>
+ <entry>Refer to <link linkend="vbi-services2">
+Sliced VBI services</link> for a description of the line payload.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VBI_IVTV_WSS_625</constant>
+ </entry>
+ <entry>5</entry>
+ <entry>Refer to <link linkend="vbi-services2">
+Sliced VBI services</link> for a description of the line payload.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MPEG_VBI_IVTV_VPS</constant>
+ </entry>
+ <entry>7</entry>
+ <entry>Refer to <link linkend="vbi-services2">
+Sliced VBI services</link> for a description of the line payload.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/dev-subdev.xml b/Documentation/DocBook/media/v4l/dev-subdev.xml
new file mode 100644
index 000000000..4f0ba58c9
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-subdev.xml
@@ -0,0 +1,484 @@
+ <title>Sub-device Interface</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>The complex nature of V4L2 devices, where hardware is often made of
+ several integrated circuits that need to interact with each other in a
+ controlled way, leads to complex V4L2 drivers. The drivers usually reflect
+ the hardware model in software, and model the different hardware components
+ as software blocks called sub-devices.</para>
+
+ <para>V4L2 sub-devices are usually kernel-only objects. If the V4L2 driver
+ implements the media device API, they will automatically inherit from media
+ entities. Applications will be able to enumerate the sub-devices and discover
+ the hardware topology using the media entities, pads and links enumeration
+ API.</para>
+
+ <para>In addition to make sub-devices discoverable, drivers can also choose
+ to make them directly configurable by applications. When both the sub-device
+ driver and the V4L2 device driver support this, sub-devices will feature a
+ character device node on which ioctls can be called to
+ <itemizedlist>
+ <listitem><para>query, read and write sub-devices controls</para></listitem>
+ <listitem><para>subscribe and unsubscribe to events and retrieve them</para></listitem>
+ <listitem><para>negotiate image formats on individual pads</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>Sub-device character device nodes, conventionally named
+ <filename>/dev/v4l-subdev*</filename>, use major number 81.</para>
+
+ <section>
+ <title>Controls</title>
+ <para>Most V4L2 controls are implemented by sub-device hardware. Drivers
+ usually merge all controls and expose them through video device nodes.
+ Applications can control all sub-devices through a single interface.</para>
+
+ <para>Complex devices sometimes implement the same control in different
+ pieces of hardware. This situation is common in embedded platforms, where
+ both sensors and image processing hardware implement identical functions,
+ such as contrast adjustment, white balance or faulty pixels correction. As
+ the V4L2 controls API doesn't support several identical controls in a single
+ device, all but one of the identical controls are hidden.</para>
+
+ <para>Applications can access those hidden controls through the sub-device
+ node with the V4L2 control API described in <xref linkend="control" />. The
+ ioctls behave identically as when issued on V4L2 device nodes, with the
+ exception that they deal only with controls implemented in the sub-device.
+ </para>
+
+ <para>Depending on the driver, those controls might also be exposed through
+ one (or several) V4L2 device nodes.</para>
+ </section>
+
+ <section>
+ <title>Events</title>
+ <para>V4L2 sub-devices can notify applications of events as described in
+ <xref linkend="event" />. The API behaves identically as when used on V4L2
+ device nodes, with the exception that it only deals with events generated by
+ the sub-device. Depending on the driver, those events might also be reported
+ on one (or several) V4L2 device nodes.</para>
+ </section>
+
+ <section id="pad-level-formats">
+ <title>Pad-level Formats</title>
+
+ <warning><para>Pad-level formats are only applicable to very complex device that
+ need to expose low-level format configuration to user space. Generic V4L2
+ applications do <emphasis>not</emphasis> need to use the API described in
+ this section.</para></warning>
+
+ <note><para>For the purpose of this section, the term
+ <wordasword>format</wordasword> means the combination of media bus data
+ format, frame width and frame height.</para></note>
+
+ <para>Image formats are typically negotiated on video capture and
+ output devices using the format and <link
+ linkend="vidioc-subdev-g-selection">selection</link> ioctls. The
+ driver is responsible for configuring every block in the video
+ pipeline according to the requested format at the pipeline input
+ and/or output.</para>
+
+ <para>For complex devices, such as often found in embedded systems,
+ identical image sizes at the output of a pipeline can be achieved using
+ different hardware configurations. One such example is shown on
+ <xref linkend="pipeline-scaling" />, where
+ image scaling can be performed on both the video sensor and the host image
+ processing hardware.</para>
+
+ <figure id="pipeline-scaling">
+ <title>Image Format Negotiation on Pipelines</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="pipeline.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="pipeline.png" format="PNG" />
+ </imageobject>
+ <textobject>
+ <phrase>High quality and high speed pipeline configuration</phrase>
+ </textobject>
+ </mediaobject>
+ </figure>
+
+ <para>The sensor scaler is usually of less quality than the host scaler, but
+ scaling on the sensor is required to achieve higher frame rates. Depending
+ on the use case (quality vs. speed), the pipeline must be configured
+ differently. Applications need to configure the formats at every point in
+ the pipeline explicitly.</para>
+
+ <para>Drivers that implement the <link linkend="media-controller-intro">media
+ API</link> can expose pad-level image format configuration to applications.
+ When they do, applications can use the &VIDIOC-SUBDEV-G-FMT; and
+ &VIDIOC-SUBDEV-S-FMT; ioctls. to negotiate formats on a per-pad basis.</para>
+
+ <para>Applications are responsible for configuring coherent parameters on
+ the whole pipeline and making sure that connected pads have compatible
+ formats. The pipeline is checked for formats mismatch at &VIDIOC-STREAMON;
+ time, and an &EPIPE; is then returned if the configuration is
+ invalid.</para>
+
+ <para>Pad-level image format configuration support can be tested by calling
+ the &VIDIOC-SUBDEV-G-FMT; ioctl on pad 0. If the driver returns an &EINVAL;
+ pad-level format configuration is not supported by the sub-device.</para>
+
+ <section>
+ <title>Format Negotiation</title>
+
+ <para>Acceptable formats on pads can (and usually do) depend on a number
+ of external parameters, such as formats on other pads, active links, or
+ even controls. Finding a combination of formats on all pads in a video
+ pipeline, acceptable to both application and driver, can't rely on formats
+ enumeration only. A format negotiation mechanism is required.</para>
+
+ <para>Central to the format negotiation mechanism are the get/set format
+ operations. When called with the <structfield>which</structfield> argument
+ set to <constant>V4L2_SUBDEV_FORMAT_TRY</constant>, the
+ &VIDIOC-SUBDEV-G-FMT; and &VIDIOC-SUBDEV-S-FMT; ioctls operate on a set of
+ formats parameters that are not connected to the hardware configuration.
+ Modifying those 'try' formats leaves the device state untouched (this
+ applies to both the software state stored in the driver and the hardware
+ state stored in the device itself).</para>
+
+ <para>While not kept as part of the device state, try formats are stored
+ in the sub-device file handles. A &VIDIOC-SUBDEV-G-FMT; call will return
+ the last try format set <emphasis>on the same sub-device file
+ handle</emphasis>. Several applications querying the same sub-device at
+ the same time will thus not interact with each other.</para>
+
+ <para>To find out whether a particular format is supported by the device,
+ applications use the &VIDIOC-SUBDEV-S-FMT; ioctl. Drivers verify and, if
+ needed, change the requested <structfield>format</structfield> based on
+ device requirements and return the possibly modified value. Applications
+ can then choose to try a different format or accept the returned value and
+ continue.</para>
+
+ <para>Formats returned by the driver during a negotiation iteration are
+ guaranteed to be supported by the device. In particular, drivers guarantee
+ that a returned format will not be further changed if passed to an
+ &VIDIOC-SUBDEV-S-FMT; call as-is (as long as external parameters, such as
+ formats on other pads or links' configuration are not changed).</para>
+
+ <para>Drivers automatically propagate formats inside sub-devices. When a
+ try or active format is set on a pad, corresponding formats on other pads
+ of the same sub-device can be modified by the driver. Drivers are free to
+ modify formats as required by the device. However, they should comply with
+ the following rules when possible:
+ <itemizedlist>
+ <listitem><para>Formats should be propagated from sink pads to source pads.
+ Modifying a format on a source pad should not modify the format on any
+ sink pad.</para></listitem>
+ <listitem><para>Sub-devices that scale frames using variable scaling factors
+ should reset the scale factors to default values when sink pads formats
+ are modified. If the 1:1 scaling ratio is supported, this means that
+ source pads formats should be reset to the sink pads formats.</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>Formats are not propagated across links, as that would involve
+ propagating them from one sub-device file handle to another. Applications
+ must then take care to configure both ends of every link explicitly with
+ compatible formats. Identical formats on the two ends of a link are
+ guaranteed to be compatible. Drivers are free to accept different formats
+ matching device requirements as being compatible.</para>
+
+ <para><xref linkend="sample-pipeline-config" />
+ shows a sample configuration sequence for the pipeline described in
+ <xref linkend="pipeline-scaling" /> (table
+ columns list entity names and pad numbers).</para>
+
+ <table pgwide="0" frame="none" id="sample-pipeline-config">
+ <title>Sample Pipeline Configuration</title>
+ <tgroup cols="3">
+ <colspec colname="what"/>
+ <colspec colname="sensor-0 format" />
+ <colspec colname="frontend-0 format" />
+ <colspec colname="frontend-1 format" />
+ <colspec colname="scaler-0 format" />
+ <colspec colname="scaler-0 compose" />
+ <colspec colname="scaler-1 format" />
+ <thead>
+ <row>
+ <entry></entry>
+ <entry>Sensor/0 format</entry>
+ <entry>Frontend/0 format</entry>
+ <entry>Frontend/1 format</entry>
+ <entry>Scaler/0 format</entry>
+ <entry>Scaler/0 compose selection rectangle</entry>
+ <entry>Scaler/1 format</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Initial state</entry>
+ <entry>2048x1536/SGRBG8_1X8</entry>
+ <entry>(default)</entry>
+ <entry>(default)</entry>
+ <entry>(default)</entry>
+ <entry>(default)</entry>
+ <entry>(default)</entry>
+ </row>
+ <row>
+ <entry>Configure frontend sink format</entry>
+ <entry>2048x1536/SGRBG8_1X8</entry>
+ <entry><emphasis>2048x1536/SGRBG8_1X8</emphasis></entry>
+ <entry><emphasis>2046x1534/SGRBG8_1X8</emphasis></entry>
+ <entry>(default)</entry>
+ <entry>(default)</entry>
+ <entry>(default)</entry>
+ </row>
+ <row>
+ <entry>Configure scaler sink format</entry>
+ <entry>2048x1536/SGRBG8_1X8</entry>
+ <entry>2048x1536/SGRBG8_1X8</entry>
+ <entry>2046x1534/SGRBG8_1X8</entry>
+ <entry><emphasis>2046x1534/SGRBG8_1X8</emphasis></entry>
+ <entry><emphasis>0,0/2046x1534</emphasis></entry>
+ <entry><emphasis>2046x1534/SGRBG8_1X8</emphasis></entry>
+ </row>
+ <row>
+ <entry>Configure scaler sink compose selection</entry>
+ <entry>2048x1536/SGRBG8_1X8</entry>
+ <entry>2048x1536/SGRBG8_1X8</entry>
+ <entry>2046x1534/SGRBG8_1X8</entry>
+ <entry>2046x1534/SGRBG8_1X8</entry>
+ <entry><emphasis>0,0/1280x960</emphasis></entry>
+ <entry><emphasis>1280x960/SGRBG8_1X8</emphasis></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>
+ <orderedlist>
+ <listitem><para>Initial state. The sensor source pad format is
+ set to its native 3MP size and V4L2_MBUS_FMT_SGRBG8_1X8
+ media bus code. Formats on the host frontend and scaler sink
+ and source pads have the default values, as well as the
+ compose rectangle on the scaler's sink pad.</para></listitem>
+
+ <listitem><para>The application configures the frontend sink
+ pad format's size to 2048x1536 and its media bus code to
+ V4L2_MBUS_FMT_SGRBG_1X8. The driver propagates the format to
+ the frontend source pad.</para></listitem>
+
+ <listitem><para>The application configures the scaler sink pad
+ format's size to 2046x1534 and the media bus code to
+ V4L2_MBUS_FMT_SGRBG_1X8 to match the frontend source size and
+ media bus code. The media bus code on the sink pad is set to
+ V4L2_MBUS_FMT_SGRBG_1X8. The driver propagates the size to the
+ compose selection rectangle on the scaler's sink pad, and the
+ format to the scaler source pad.</para></listitem>
+
+ <listitem><para>The application configures the size of the compose
+ selection rectangle of the scaler's sink pad 1280x960. The driver
+ propagates the size to the scaler's source pad
+ format.</para></listitem>
+
+ </orderedlist>
+ </para>
+
+ <para>When satisfied with the try results, applications can set the active
+ formats by setting the <structfield>which</structfield> argument to
+ <constant>V4L2_SUBDEV_FORMAT_ACTIVE</constant>. Active formats are changed
+ exactly as try formats by drivers. To avoid modifying the hardware state
+ during format negotiation, applications should negotiate try formats first
+ and then modify the active settings using the try formats returned during
+ the last negotiation iteration. This guarantees that the active format
+ will be applied as-is by the driver without being modified.
+ </para>
+ </section>
+
+ <section id="v4l2-subdev-selections">
+ <title>Selections: cropping, scaling and composition</title>
+
+ <para>Many sub-devices support cropping frames on their input or output
+ pads (or possible even on both). Cropping is used to select the area of
+ interest in an image, typically on an image sensor or a video decoder. It can
+ also be used as part of digital zoom implementations to select the area of
+ the image that will be scaled up.</para>
+
+ <para>Crop settings are defined by a crop rectangle and represented in a
+ &v4l2-rect; by the coordinates of the top left corner and the rectangle
+ size. Both the coordinates and sizes are expressed in pixels.</para>
+
+ <para>As for pad formats, drivers store try and active
+ rectangles for the selection targets <xref
+ linkend="v4l2-selections-common" />.</para>
+
+ <para>On sink pads, cropping is applied relative to the
+ current pad format. The pad format represents the image size as
+ received by the sub-device from the previous block in the
+ pipeline, and the crop rectangle represents the sub-image that
+ will be transmitted further inside the sub-device for
+ processing.</para>
+
+ <para>The scaling operation changes the size of the image by
+ scaling it to new dimensions. The scaling ratio isn't specified
+ explicitly, but is implied from the original and scaled image
+ sizes. Both sizes are represented by &v4l2-rect;.</para>
+
+ <para>Scaling support is optional. When supported by a subdev,
+ the crop rectangle on the subdev's sink pad is scaled to the
+ size configured using the &VIDIOC-SUBDEV-S-SELECTION; IOCTL
+ using <constant>V4L2_SEL_TGT_COMPOSE</constant>
+ selection target on the same pad. If the subdev supports scaling
+ but not composing, the top and left values are not used and must
+ always be set to zero.</para>
+
+ <para>On source pads, cropping is similar to sink pads, with the
+ exception that the source size from which the cropping is
+ performed, is the COMPOSE rectangle on the sink pad. In both
+ sink and source pads, the crop rectangle must be entirely
+ contained inside the source image size for the crop
+ operation.</para>
+
+ <para>The drivers should always use the closest possible
+ rectangle the user requests on all selection targets, unless
+ specifically told otherwise.
+ <constant>V4L2_SEL_FLAG_GE</constant> and
+ <constant>V4L2_SEL_FLAG_LE</constant> flags may be
+ used to round the image size either up or down. <xref
+ linkend="v4l2-selection-flags" /></para>
+ </section>
+
+ <section>
+ <title>Types of selection targets</title>
+
+ <section>
+ <title>Actual targets</title>
+
+ <para>Actual targets (without a postfix) reflect the actual
+ hardware configuration at any point of time. There is a BOUNDS
+ target corresponding to every actual target.</para>
+ </section>
+
+ <section>
+ <title>BOUNDS targets</title>
+
+ <para>BOUNDS targets is the smallest rectangle that contains all
+ valid actual rectangles. It may not be possible to set the actual
+ rectangle as large as the BOUNDS rectangle, however. This may be
+ because e.g. a sensor's pixel array is not rectangular but
+ cross-shaped or round. The maximum size may also be smaller than the
+ BOUNDS rectangle.</para>
+ </section>
+
+ </section>
+
+ <section>
+ <title>Order of configuration and format propagation</title>
+
+ <para>Inside subdevs, the order of image processing steps will
+ always be from the sink pad towards the source pad. This is also
+ reflected in the order in which the configuration must be
+ performed by the user: the changes made will be propagated to
+ any subsequent stages. If this behaviour is not desired, the
+ user must set
+ <constant>V4L2_SEL_FLAG_KEEP_CONFIG</constant> flag. This
+ flag causes no propagation of the changes are allowed in any
+ circumstances. This may also cause the accessed rectangle to be
+ adjusted by the driver, depending on the properties of the
+ underlying hardware.</para>
+
+ <para>The coordinates to a step always refer to the actual size
+ of the previous step. The exception to this rule is the source
+ compose rectangle, which refers to the sink compose bounds
+ rectangle --- if it is supported by the hardware.</para>
+
+ <orderedlist>
+ <listitem><para>Sink pad format. The user configures the sink pad
+ format. This format defines the parameters of the image the
+ entity receives through the pad for further processing.</para></listitem>
+
+ <listitem><para>Sink pad actual crop selection. The sink pad crop
+ defines the crop performed to the sink pad format.</para></listitem>
+
+ <listitem><para>Sink pad actual compose selection. The size of the
+ sink pad compose rectangle defines the scaling ratio compared
+ to the size of the sink pad crop rectangle. The location of
+ the compose rectangle specifies the location of the actual
+ sink compose rectangle in the sink compose bounds
+ rectangle.</para></listitem>
+
+ <listitem><para>Source pad actual crop selection. Crop on the source
+ pad defines crop performed to the image in the sink compose
+ bounds rectangle.</para></listitem>
+
+ <listitem><para>Source pad format. The source pad format defines the
+ output pixel format of the subdev, as well as the other
+ parameters with the exception of the image width and height.
+ Width and height are defined by the size of the source pad
+ actual crop selection.</para></listitem>
+ </orderedlist>
+
+ <para>Accessing any of the above rectangles not supported by the
+ subdev will return <constant>EINVAL</constant>. Any rectangle
+ referring to a previous unsupported rectangle coordinates will
+ instead refer to the previous supported rectangle. For example,
+ if sink crop is not supported, the compose selection will refer
+ to the sink pad format dimensions instead.</para>
+
+ <figure id="subdev-image-processing-crop">
+ <title>Image processing in subdevs: simple crop example</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="subdev-image-processing-crop.svg"
+ format="SVG" scale="200" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+
+ <para>In the above example, the subdev supports cropping on its
+ sink pad. To configure it, the user sets the media bus format on
+ the subdev's sink pad. Now the actual crop rectangle can be set
+ on the sink pad --- the location and size of this rectangle
+ reflect the location and size of a rectangle to be cropped from
+ the sink format. The size of the sink crop rectangle will also
+ be the size of the format of the subdev's source pad.</para>
+
+ <figure id="subdev-image-processing-scaling-multi-source">
+ <title>Image processing in subdevs: scaling with multiple sources</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="subdev-image-processing-scaling-multi-source.svg"
+ format="SVG" scale="200" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+
+ <para>In this example, the subdev is capable of first cropping,
+ then scaling and finally cropping for two source pads
+ individually from the resulting scaled image. The location of
+ the scaled image in the cropped image is ignored in sink compose
+ target. Both of the locations of the source crop rectangles
+ refer to the sink scaling rectangle, independently cropping an
+ area at location specified by the source crop rectangle from
+ it.</para>
+
+ <figure id="subdev-image-processing-full">
+ <title>Image processing in subdevs: scaling and composition
+ with multiple sinks and sources</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="subdev-image-processing-full.svg"
+ format="SVG" scale="200" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+
+ <para>The subdev driver supports two sink pads and two source
+ pads. The images from both of the sink pads are individually
+ cropped, then scaled and further composed on the composition
+ bounds rectangle. From that, two independent streams are cropped
+ and sent out of the subdev from the source pads.</para>
+
+ </section>
+
+ </section>
+
+ &sub-subdev-formats;
diff --git a/Documentation/DocBook/media/v4l/dev-teletext.xml b/Documentation/DocBook/media/v4l/dev-teletext.xml
new file mode 100644
index 000000000..bd21c64d7
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/dev-teletext.xml
@@ -0,0 +1,29 @@
+ <title>Teletext Interface</title>
+
+ <para>This interface was aimed at devices receiving and demodulating
+Teletext data [<xref linkend="ets300706" />, <xref linkend="itu653" />], evaluating the
+Teletext packages and storing formatted pages in cache memory. Such
+devices are usually implemented as microcontrollers with serial
+interface (I<superscript>2</superscript>C) and could be found on old
+TV cards, dedicated Teletext decoding cards and home-brew devices
+connected to the PC parallel port.</para>
+
+ <para>The Teletext API was designed by Martin Buck. It was defined in
+the kernel header file <filename>linux/videotext.h</filename>, the
+specification is available from <ulink url="ftp://ftp.gwdg.de/pub/linux/misc/videotext/">
+ftp://ftp.gwdg.de/pub/linux/misc/videotext/</ulink>. (Videotext is the name of
+the German public television Teletext service.)</para>
+
+ <para>Eventually the Teletext API was integrated into the V4L API
+with character device file names <filename>/dev/vtx0</filename> to
+<filename>/dev/vtx31</filename>, device major number 81, minor numbers
+192 to 223.</para>
+
+ <para>However, teletext decoders were quickly replaced by more
+generic VBI demodulators and those dedicated teletext decoders no longer exist.
+For many years the vtx devices were still around, even though nobody used
+them. So the decision was made to finally remove support for the Teletext API in
+kernel 2.6.37.</para>
+
+ <para>Modern devices all use the <link linkend="raw-vbi">raw</link> or
+<link linkend="sliced">sliced</link> VBI API.</para>
diff --git a/Documentation/DocBook/media/v4l/driver.xml b/Documentation/DocBook/media/v4l/driver.xml
new file mode 100644
index 000000000..7c6638bac
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/driver.xml
@@ -0,0 +1,200 @@
+ <title>V4L2 Driver Programming</title>
+
+ <!-- This part defines the interface between the "videodev"
+ module and individual drivers. -->
+
+ <para>to do</para>
+<!--
+ <para>V4L2 is a two-layer driver system. The top layer is the "videodev"
+kernel module. When videodev initializes it registers as character device
+with major number 81, and it registers a set of file operations. All V4L2
+drivers are really clients of videodev, which calls V4L2 drivers through
+driver method functions. V4L2 drivers are also written as kernel modules.
+After probing the hardware they register one or more devices with
+videodev.</para>
+
+ <section id="driver-modules">
+ <title>Driver Modules</title>
+
+ <para>V4L2 driver modules must have an initialization function which is
+called after the module was loaded into kernel, an exit function whis is
+called before the module is removed. When the driver is compiled into the
+kernel these functions called at system boot and shutdown time.</para>
+
+ <informalexample>
+ <programlisting>
+#include &lt;linux/module.h&gt;
+
+/* Export information about this module. For details and other useful
+ macros see <filename>linux/module.h</filename>. */
+MODULE_DESCRIPTION("my - driver for my hardware");
+MODULE_AUTHOR("Your name here");
+MODULE_LICENSE("GPL");
+
+static void
+my_module_exit (void)
+{
+ /* Free all resources allocated by my_module_init(). */
+}
+
+static int
+my_module_init (void)
+{
+ /* Bind the driver to the supported hardware, see
+ <link linkend="driver-pci"> and
+ <link linkend="driver-usb"> for examples. */
+
+ return 0; /* a negative value on error, 0 on success. */
+}
+
+/* Export module functions. */
+module_init (my_module_init);
+module_exit (my_module_exit);
+</programlisting>
+ </informalexample>
+
+ <para>Users can add parameters when kernel modules are inserted:</para>
+
+ <informalexample>
+ <programlisting>
+include &lt;linux/moduleparam.h&gt;
+
+static int my_option = 123;
+static int my_option_array[47];
+
+/* Export the symbol, an int, with access permissions 0664.
+ See <filename>linux/moduleparam.h</filename> for other types. */
+module_param (my_option, int, 0644);
+module_param_array (my_option_array, int, NULL, 0644);
+
+MODULE_PARM_DESC (my_option, "Does magic things, default 123");
+</programlisting>
+ </informalexample>
+
+ <para>One parameter should be supported by all V4L2 drivers, the minor
+number of the device it will register. Purpose is to predictably link V4L2
+drivers to device nodes if more than one video device is installed. Use the
+name of the device node followed by a "_nr" suffix, for example "video_nr"
+for <filename>/dev/video</filename>.</para>
+
+ <informalexample>
+ <programlisting>
+/* Minor number of the device, -1 to allocate the first unused. */
+static int video_nr = -1;
+
+module_param (video_nr, int, 0444);
+</programlisting>
+ </informalexample>
+ </section>
+
+ <section id="driver-pci">
+ <title>PCI Devices</title>
+
+ <para>PCI devices are initialized like this:</para>
+
+ <informalexample>
+ <programlisting>
+typedef struct {
+ /* State of one physical device. */
+} my_device;
+
+static int
+my_resume (struct pci_dev * pci_dev)
+{
+ /* Restore the suspended device to working state. */
+}
+
+static int
+my_suspend (struct pci_dev * pci_dev,
+ pm_message_t state)
+{
+ /* This function is called before the system goes to sleep.
+ Stop all DMAs and disable interrupts, then put the device
+ into a low power state. For details see the kernel
+ sources under <filename>Documentation/power</filename>. */
+
+ return 0; /* a negative value on error, 0 on success. */
+}
+
+static void
+my_remove (struct pci_dev * pci_dev)
+{
+ my_device *my = pci_get_drvdata (pci_dev);
+
+ /* Describe me. */
+}
+
+static int
+my_probe (struct pci_dev * pci_dev,
+ const struct pci_device_id * pci_id)
+{
+ my_device *my;
+
+ /* Describe me. */
+
+ /* You can allocate per-device data here and store a pointer
+ to it in the pci_dev structure. */
+ my = ...;
+ pci_set_drvdata (pci_dev, my);
+
+ return 0; /* a negative value on error, 0 on success. */
+}
+
+/* A list of supported PCI devices. */
+static struct pci_device_id
+my_pci_device_ids [] = {
+ { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
+ { 0 } /* end of list */
+};
+
+/* Load our module if supported PCI devices are installed. */
+MODULE_DEVICE_TABLE (pci, my_pci_device_ids);
+
+static struct pci_driver
+my_pci_driver = {
+ .name = "my",
+ .id_table = my_pci_device_ids,
+
+ .probe = my_probe,
+ .remove = my_remove,
+
+ /* Power management functions. */
+ .suspend = my_suspend,
+ .resume = my_resume,
+};
+
+static void
+my_module_exit (void)
+{
+ pci_unregister_driver (&my_pci_driver);
+}
+
+static int
+my_module_init (void)
+{
+ return pci_register_driver (&my_pci_driver);
+}
+</programlisting>
+ </informalexample>
+ </section>
+
+ <section id="driver-usb">
+ <title>USB Devices</title>
+ <para>to do</para>
+ </section>
+ <section id="driver-registering">
+ <title>Registering V4L2 Drivers</title>
+
+ <para>After a V4L2 driver probed the hardware it registers one or more
+devices with the videodev module.</para>
+ </section>
+ <section id="driver-file-ops">
+ <title>File Operations</title>
+ <para>to do</para>
+ </section>
+ <section id="driver-internal-api">
+ <title>Internal API</title>
+ <para>to do</para>
+ </section>
+-->
diff --git a/Documentation/DocBook/media/v4l/fdl-appendix.xml b/Documentation/DocBook/media/v4l/fdl-appendix.xml
new file mode 100644
index 000000000..ae22394ba
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/fdl-appendix.xml
@@ -0,0 +1,671 @@
+<!--
+ The GNU Free Documentation License 1.1 in DocBook
+ Markup by Eric Baudais <baudais@okstate.edu>
+ Maintained by the GNOME Documentation Project
+ http://live.gnome.org/DocumentationProject
+ Version: 1.0.1
+ Last Modified: Nov 16, 2000
+-->
+
+<appendix id="fdl">
+ <appendixinfo>
+ <releaseinfo>
+ Version 1.1, March 2000
+ </releaseinfo>
+ <copyright>
+ <year>2000</year><holder>Free Software Foundation, Inc.</holder>
+ </copyright>
+ <legalnotice id="fdl-legalnotice">
+ <para>
+ <address>Free Software Foundation, Inc. <street>59 Temple Place,
+ Suite 330</street>, <city>Boston</city>, <state>MA</state>
+ <postcode>02111-1307</postcode> <country>USA</country></address>
+ Everyone is permitted to copy and distribute verbatim copies of this
+ license document, but changing it is not allowed.
+ </para>
+ </legalnotice>
+ </appendixinfo>
+ <title>GNU Free Documentation License</title>
+
+ <sect1 id="fdl-preamble">
+ <title>0. PREAMBLE</title>
+ <para>
+ The purpose of this License is to make a manual, textbook, or
+ other written document <quote>free</quote> in the sense of
+ freedom: to assure everyone the effective freedom to copy and
+ redistribute it, with or without modifying it, either
+ commercially or noncommercially. Secondarily, this License
+ preserves for the author and publisher a way to get credit for
+ their work, while not being considered responsible for
+ modifications made by others.
+ </para>
+
+ <para>
+ This License is a kind of <quote>copyleft</quote>, which means
+ that derivative works of the document must themselves be free in
+ the same sense. It complements the GNU General Public License,
+ which is a copyleft license designed for free software.
+ </para>
+
+ <para>
+ We have designed this License in order to use it for manuals for
+ free software, because free software needs free documentation: a
+ free program should come with manuals providing the same
+ freedoms that the software does. But this License is not limited
+ to software manuals; it can be used for any textual work,
+ regardless of subject matter or whether it is published as a
+ printed book. We recommend this License principally for works
+ whose purpose is instruction or reference.
+ </para>
+ </sect1>
+ <sect1 id="fdl-section1">
+ <title>1. APPLICABILITY AND DEFINITIONS</title>
+ <para id="fdl-document">
+ This License applies to any manual or other work that contains a
+ notice placed by the copyright holder saying it can be
+ distributed under the terms of this License. The
+ <quote>Document</quote>, below, refers to any such manual or
+ work. Any member of the public is a licensee, and is addressed
+ as <quote>you</quote>.
+ </para>
+
+ <para id="fdl-modified">
+ A <quote>Modified Version</quote> of the Document means any work
+ containing the Document or a portion of it, either copied
+ verbatim, or with modifications and/or translated into another
+ language.
+ </para>
+
+ <para id="fdl-secondary">
+ A <quote>Secondary Section</quote> is a named appendix or a
+ front-matter section of the <link
+ linkend="fdl-document">Document</link> that deals exclusively
+ with the relationship of the publishers or authors of the
+ Document to the Document's overall subject (or to related
+ matters) and contains nothing that could fall directly within
+ that overall subject. (For example, if the Document is in part a
+ textbook of mathematics, a Secondary Section may not explain any
+ mathematics.) The relationship could be a matter of historical
+ connection with the subject or with related matters, or of
+ legal, commercial, philosophical, ethical or political position
+ regarding them.
+ </para>
+
+ <para id="fdl-invariant">
+ The <quote>Invariant Sections</quote> are certain <link
+ linkend="fdl-secondary"> Secondary Sections</link> whose titles
+ are designated, as being those of Invariant Sections, in the
+ notice that says that the <link
+ linkend="fdl-document">Document</link> is released under this
+ License.
+ </para>
+
+ <para id="fdl-cover-texts">
+ The <quote>Cover Texts</quote> are certain short passages of
+ text that are listed, as Front-Cover Texts or Back-Cover Texts,
+ in the notice that says that the <link
+ linkend="fdl-document">Document</link> is released under this
+ License.
+ </para>
+
+ <para id="fdl-transparent">
+ A <quote>Transparent</quote> copy of the <link
+ linkend="fdl-document"> Document</link> means a machine-readable
+ copy, represented in a format whose specification is available
+ to the general public, whose contents can be viewed and edited
+ directly and straightforwardly with generic text editors or (for
+ images composed of pixels) generic paint programs or (for
+ drawings) some widely available drawing editor, and that is
+ suitable for input to text formatters or for automatic
+ translation to a variety of formats suitable for input to text
+ formatters. A copy made in an otherwise Transparent file format
+ whose markup has been designed to thwart or discourage
+ subsequent modification by readers is not Transparent. A copy
+ that is not <quote>Transparent</quote> is called
+ <quote>Opaque</quote>.
+ </para>
+
+ <para>
+ Examples of suitable formats for Transparent copies include
+ plain ASCII without markup, Texinfo input format, LaTeX input
+ format, SGML or XML using a publicly available DTD, and
+ standard-conforming simple HTML designed for human
+ modification. Opaque formats include PostScript, PDF,
+ proprietary formats that can be read and edited only by
+ proprietary word processors, SGML or XML for which the DTD
+ and/or processing tools are not generally available, and the
+ machine-generated HTML produced by some word processors for
+ output purposes only.
+ </para>
+
+ <para id="fdl-title-page">
+ The <quote>Title Page</quote> means, for a printed book, the
+ title page itself, plus such following pages as are needed to
+ hold, legibly, the material this License requires to appear in
+ the title page. For works in formats which do not have any title
+ page as such, <quote>Title Page</quote> means the text near the
+ most prominent appearance of the work's title, preceding the
+ beginning of the body of the text.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section2">
+ <title>2. VERBATIM COPYING</title>
+ <para>
+ You may copy and distribute the <link
+ linkend="fdl-document">Document</link> in any medium, either
+ commercially or noncommercially, provided that this License, the
+ copyright notices, and the license notice saying this License
+ applies to the Document are reproduced in all copies, and that
+ you add no other conditions whatsoever to those of this
+ License. You may not use technical measures to obstruct or
+ control the reading or further copying of the copies you make or
+ distribute. However, you may accept compensation in exchange for
+ copies. If you distribute a large enough number of copies you
+ must also follow the conditions in <link
+ linkend="fdl-section3">section 3</link>.
+ </para>
+
+ <para>
+ You may also lend copies, under the same conditions stated
+ above, and you may publicly display copies.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section3">
+ <title>3. COPYING IN QUANTITY</title>
+ <para>
+ If you publish printed copies of the <link
+ linkend="fdl-document">Document</link> numbering more than 100,
+ and the Document's license notice requires <link
+ linkend="fdl-cover-texts">Cover Texts</link>, you must enclose
+ the copies in covers that carry, clearly and legibly, all these
+ Cover Texts: Front-Cover Texts on the front cover, and
+ Back-Cover Texts on the back cover. Both covers must also
+ clearly and legibly identify you as the publisher of these
+ copies. The front cover must present the full title with all
+ words of the title equally prominent and visible. You may add
+ other material on the covers in addition. Copying with changes
+ limited to the covers, as long as they preserve the title of the
+ <link linkend="fdl-document">Document</link> and satisfy these
+ conditions, can be treated as verbatim copying in other
+ respects.
+ </para>
+
+ <para>
+ If the required texts for either cover are too voluminous to fit
+ legibly, you should put the first ones listed (as many as fit
+ reasonably) on the actual cover, and continue the rest onto
+ adjacent pages.
+ </para>
+
+ <para>
+ If you publish or distribute <link
+ linkend="fdl-transparent">Opaque</link> copies of the <link
+ linkend="fdl-document">Document</link> numbering more than 100,
+ you must either include a machine-readable <link
+ linkend="fdl-transparent">Transparent</link> copy along with
+ each Opaque copy, or state in or with each Opaque copy a
+ publicly-accessible computer-network location containing a
+ complete Transparent copy of the Document, free of added
+ material, which the general network-using public has access to
+ download anonymously at no charge using public-standard network
+ protocols. If you use the latter option, you must take
+ reasonably prudent steps, when you begin distribution of Opaque
+ copies in quantity, to ensure that this Transparent copy will
+ remain thus accessible at the stated location until at least one
+ year after the last time you distribute an Opaque copy (directly
+ or through your agents or retailers) of that edition to the
+ public.
+ </para>
+
+ <para>
+ It is requested, but not required, that you contact the authors
+ of the <link linkend="fdl-document">Document</link> well before
+ redistributing any large number of copies, to give them a chance
+ to provide you with an updated version of the Document.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section4">
+ <title>4. MODIFICATIONS</title>
+ <para>
+ You may copy and distribute a <link
+ linkend="fdl-modified">Modified Version</link> of the <link
+ linkend="fdl-document">Document</link> under the conditions of
+ sections <link linkend="fdl-section2">2</link> and <link
+ linkend="fdl-section3">3</link> above, provided that you release
+ the Modified Version under precisely this License, with the
+ Modified Version filling the role of the Document, thus
+ licensing distribution and modification of the Modified Version
+ to whoever possesses a copy of it. In addition, you must do
+ these things in the Modified Version:
+ </para>
+
+ <itemizedlist mark="opencircle">
+ <listitem>
+ <formalpara>
+ <title>A</title>
+ <para>
+ Use in the <link linkend="fdl-title-page">Title
+ Page</link> (and on the covers, if any) a title distinct
+ from that of the <link
+ linkend="fdl-document">Document</link>, and from those of
+ previous versions (which should, if there were any, be
+ listed in the History section of the Document). You may
+ use the same title as a previous version if the original
+ publisher of that version gives permission.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>B</title>
+ <para>
+ List on the <link linkend="fdl-title-page">Title
+ Page</link>, as authors, one or more persons or entities
+ responsible for authorship of the modifications in the
+ <link linkend="fdl-modified">Modified Version</link>,
+ together with at least five of the principal authors of
+ the <link linkend="fdl-document">Document</link> (all of
+ its principal authors, if it has less than five).
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>C</title>
+ <para>
+ State on the <link linkend="fdl-title-page">Title
+ Page</link> the name of the publisher of the <link
+ linkend="fdl-modified">Modified Version</link>, as the
+ publisher.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>D</title>
+ <para>
+ Preserve all the copyright notices of the <link
+ linkend="fdl-document">Document</link>.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>E</title>
+ <para>
+ Add an appropriate copyright notice for your modifications
+ adjacent to the other copyright notices.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>F</title>
+ <para>
+ Include, immediately after the copyright notices, a
+ license notice giving the public permission to use the
+ <link linkend="fdl-modified">Modified Version</link> under
+ the terms of this License, in the form shown in the
+ Addendum below.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>G</title>
+ <para>
+ Preserve in that license notice the full lists of <link
+ linkend="fdl-invariant"> Invariant Sections</link> and
+ required <link linkend="fdl-cover-texts">Cover
+ Texts</link> given in the <link
+ linkend="fdl-document">Document's</link> license notice.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>H</title>
+ <para>
+ Include an unaltered copy of this License.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>I</title>
+ <para>
+ Preserve the section entitled <quote>History</quote>, and
+ its title, and add to it an item stating at least the
+ title, year, new authors, and publisher of the <link
+ linkend="fdl-modified">Modified Version </link>as given on
+ the <link linkend="fdl-title-page">Title Page</link>. If
+ there is no section entitled <quote>History</quote> in the
+ <link linkend="fdl-document">Document</link>, create one
+ stating the title, year, authors, and publisher of the
+ Document as given on its Title Page, then add an item
+ describing the Modified Version as stated in the previous
+ sentence.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>J</title>
+ <para>
+ Preserve the network location, if any, given in the <link
+ linkend="fdl-document">Document</link> for public access
+ to a <link linkend="fdl-transparent">Transparent</link>
+ copy of the Document, and likewise the network locations
+ given in the Document for previous versions it was based
+ on. These may be placed in the <quote>History</quote>
+ section. You may omit a network location for a work that
+ was published at least four years before the Document
+ itself, or if the original publisher of the version it
+ refers to gives permission.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>K</title>
+ <para>
+ In any section entitled <quote>Acknowledgements</quote> or
+ <quote>Dedications</quote>, preserve the section's title,
+ and preserve in the section all the substance and tone of
+ each of the contributor acknowledgements and/or
+ dedications given therein.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>L</title>
+ <para>
+ Preserve all the <link linkend="fdl-invariant">Invariant
+ Sections</link> of the <link
+ linkend="fdl-document">Document</link>, unaltered in their
+ text and in their titles. Section numbers or the
+ equivalent are not considered part of the section titles.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>M</title>
+ <para>
+ Delete any section entitled
+ <quote>Endorsements</quote>. Such a section may not be
+ included in the <link linkend="fdl-modified">Modified
+ Version</link>.
+ </para>
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+ <title>N</title>
+ <para>
+ Do not retitle any existing section as
+ <quote>Endorsements</quote> or to conflict in title with
+ any <link linkend="fdl-invariant">Invariant
+ Section</link>.
+ </para>
+ </formalpara>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ If the <link linkend="fdl-modified">Modified Version</link>
+ includes new front-matter sections or appendices that qualify as
+ <link linkend="fdl-secondary">Secondary Sections</link> and
+ contain no material copied from the Document, you may at your
+ option designate some or all of these sections as invariant. To
+ do this, add their titles to the list of <link
+ linkend="fdl-invariant">Invariant Sections</link> in the
+ Modified Version's license notice. These titles must be
+ distinct from any other section titles.
+ </para>
+
+ <para>
+ You may add a section entitled <quote>Endorsements</quote>,
+ provided it contains nothing but endorsements of your <link
+ linkend="fdl-modified">Modified Version</link> by various
+ parties--for example, statements of peer review or that the text
+ has been approved by an organization as the authoritative
+ definition of a standard.
+ </para>
+
+ <para>
+ You may add a passage of up to five words as a <link
+ linkend="fdl-cover-texts">Front-Cover Text</link>, and a passage
+ of up to 25 words as a <link
+ linkend="fdl-cover-texts">Back-Cover Text</link>, to the end of
+ the list of <link linkend="fdl-cover-texts">Cover Texts</link>
+ in the <link linkend="fdl-modified">Modified Version</link>.
+ Only one passage of Front-Cover Text and one of Back-Cover Text
+ may be added by (or through arrangements made by) any one
+ entity. If the <link linkend="fdl-document">Document</link>
+ already includes a cover text for the same cover, previously
+ added by you or by arrangement made by the same entity you are
+ acting on behalf of, you may not add another; but you may
+ replace the old one, on explicit permission from the previous
+ publisher that added the old one.
+ </para>
+
+ <para>
+ The author(s) and publisher(s) of the <link
+ linkend="fdl-document">Document</link> do not by this License
+ give permission to use their names for publicity for or to
+ assert or imply endorsement of any <link
+ linkend="fdl-modified">Modified Version </link>.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section5">
+ <title>5. COMBINING DOCUMENTS</title>
+ <para>
+ You may combine the <link linkend="fdl-document">Document</link>
+ with other documents released under this License, under the
+ terms defined in <link linkend="fdl-section4">section 4</link>
+ above for modified versions, provided that you include in the
+ combination all of the <link linkend="fdl-invariant">Invariant
+ Sections</link> of all of the original documents, unmodified,
+ and list them all as Invariant Sections of your combined work in
+ its license notice.
+ </para>
+
+ <para>
+ The combined work need only contain one copy of this License,
+ and multiple identical <link linkend="fdl-invariant">Invariant
+ Sections</link> may be replaced with a single copy. If there are
+ multiple Invariant Sections with the same name but different
+ contents, make the title of each such section unique by adding
+ at the end of it, in parentheses, the name of the original
+ author or publisher of that section if known, or else a unique
+ number. Make the same adjustment to the section titles in the
+ list of Invariant Sections in the license notice of the combined
+ work.
+ </para>
+
+ <para>
+ In the combination, you must combine any sections entitled
+ <quote>History</quote> in the various original documents,
+ forming one section entitled <quote>History</quote>; likewise
+ combine any sections entitled <quote>Acknowledgements</quote>,
+ and any sections entitled <quote>Dedications</quote>. You must
+ delete all sections entitled <quote>Endorsements.</quote>
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section6">
+ <title>6. COLLECTIONS OF DOCUMENTS</title>
+ <para>
+ You may make a collection consisting of the <link
+ linkend="fdl-document">Document</link> and other documents
+ released under this License, and replace the individual copies
+ of this License in the various documents with a single copy that
+ is included in the collection, provided that you follow the
+ rules of this License for verbatim copying of each of the
+ documents in all other respects.
+ </para>
+
+ <para>
+ You may extract a single document from such a collection, and
+ dispbibute it individually under this License, provided you
+ insert a copy of this License into the extracted document, and
+ follow this License in all other respects regarding verbatim
+ copying of that document.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section7">
+ <title>7. AGGREGATION WITH INDEPENDENT WORKS</title>
+ <para>
+ A compilation of the <link
+ linkend="fdl-document">Document</link> or its derivatives with
+ other separate and independent documents or works, in or on a
+ volume of a storage or distribution medium, does not as a whole
+ count as a <link linkend="fdl-modified">Modified Version</link>
+ of the Document, provided no compilation copyright is claimed
+ for the compilation. Such a compilation is called an
+ <quote>aggregate</quote>, and this License does not apply to the
+ other self-contained works thus compiled with the Document , on
+ account of their being thus compiled, if they are not themselves
+ derivative works of the Document. If the <link
+ linkend="fdl-cover-texts">Cover Text</link> requirement of <link
+ linkend="fdl-section3">section 3</link> is applicable to these
+ copies of the Document, then if the Document is less than one
+ quarter of the entire aggregate, the Document's Cover Texts may
+ be placed on covers that surround only the Document within the
+ aggregate. Otherwise they must appear on covers around the whole
+ aggregate.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section8">
+ <title>8. TRANSLATION</title>
+ <para>
+ Translation is considered a kind of modification, so you may
+ distribute translations of the <link
+ linkend="fdl-document">Document</link> under the terms of <link
+ linkend="fdl-section4">section 4</link>. Replacing <link
+ linkend="fdl-invariant"> Invariant Sections</link> with
+ translations requires special permission from their copyright
+ holders, but you may include translations of some or all
+ Invariant Sections in addition to the original versions of these
+ Invariant Sections. You may include a translation of this
+ License provided that you also include the original English
+ version of this License. In case of a disagreement between the
+ translation and the original English version of this License,
+ the original English version will prevail.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section9">
+ <title>9. TERMINATION</title>
+ <para>
+ You may not copy, modify, sublicense, or distribute the <link
+ linkend="fdl-document">Document</link> except as expressly
+ provided for under this License. Any other attempt to copy,
+ modify, sublicense or distribute the Document is void, and will
+ automatically terminate your rights under this License. However,
+ parties who have received copies, or rights, from you under this
+ License will not have their licenses terminated so long as such
+ parties remain in full compliance.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-section10">
+ <title>10. FUTURE REVISIONS OF THIS LICENSE</title>
+ <para>
+ The <ulink type="http"
+ url="http://www.gnu.org/fsf/fsf.html">Free Software
+ Foundation</ulink> may publish new, revised versions of the GNU
+ Free Documentation License from time to time. Such new versions
+ will be similar in spirit to the present version, but may differ
+ in detail to address new problems or concerns. See <ulink
+ type="http"
+ url="http://www.gnu.org/copyleft">http://www.gnu.org/copyleft/</ulink>.
+ </para>
+
+ <para>
+ Each version of the License is given a distinguishing version
+ number. If the <link linkend="fdl-document">Document</link>
+ specifies that a particular numbered version of this License
+ <quote>or any later version</quote> applies to it, you have the
+ option of following the terms and conditions either of that
+ specified version or of any later version that has been
+ published (not as a draft) by the Free Software Foundation. If
+ the Document does not specify a version number of this License,
+ you may choose any version ever published (not as a draft) by
+ the Free Software Foundation.
+ </para>
+ </sect1>
+
+ <sect1 id="fdl-using">
+ <title>Addendum</title>
+ <para>
+ To use this License in a document you have written, include a copy of
+ the License in the document and put the following copyright and
+ license notices just after the title page:
+ </para>
+
+ <blockquote>
+ <para>
+ Copyright &copy; YEAR YOUR NAME.
+ </para>
+ <para>
+ Permission is granted to copy, distribute and/or modify this
+ document under the terms of the GNU Free Documentation
+ License, Version 1.1 or any later version published by the
+ Free Software Foundation; with the <link
+ linkend="fdl-invariant">Invariant Sections</link> being LIST
+ THEIR TITLES, with the <link
+ linkend="fdl-cover-texts">Front-Cover Texts</link> being LIST,
+ and with the <link linkend="fdl-cover-texts">Back-Cover
+ Texts</link> being LIST. A copy of the license is included in
+ the section entitled <quote>GNU Free Documentation
+ License</quote>.
+ </para>
+ </blockquote>
+
+ <para>
+ If you have no <link linkend="fdl-invariant">Invariant
+ Sections</link>, write <quote>with no Invariant Sections</quote>
+ instead of saying which ones are invariant. If you have no
+ <link linkend="fdl-cover-texts">Front-Cover Texts</link>, write
+ <quote>no Front-Cover Texts</quote> instead of
+ <quote>Front-Cover Texts being LIST</quote>; likewise for <link
+ linkend="fdl-cover-texts">Back-Cover Texts</link>.
+ </para>
+
+ <para>
+ If your document contains nontrivial examples of program code,
+ we recommend releasing these examples in parallel under your
+ choice of free software license, such as the <ulink type="http"
+ url="http://www.gnu.org/copyleft/gpl.html"> GNU General Public
+ License</ulink>, to permit their use in free software.
+ </para>
+ </sect1>
+</appendix>
+
+
+
+
+
+
diff --git a/Documentation/DocBook/media/v4l/func-close.xml b/Documentation/DocBook/media/v4l/func-close.xml
new file mode 100644
index 000000000..232920d2f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-close.xml
@@ -0,0 +1,62 @@
+<refentry id="func-close">
+ <refmeta>
+ <refentrytitle>V4L2 close()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-close</refname>
+ <refpurpose>Close a V4L2 device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;unistd.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>close</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Closes the device. Any I/O in progress is terminated and
+resources associated with the file descriptor are freed. However data
+format parameters, current input or output, control values or other
+properties remain unchanged.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>The function returns <returnvalue>0</returnvalue> on
+success, <returnvalue>-1</returnvalue> on failure and the
+<varname>errno</varname> is set appropriately. Possible error
+codes:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para><parameter>fd</parameter> is not a valid open file
+descriptor.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-ioctl.xml b/Documentation/DocBook/media/v4l/func-ioctl.xml
new file mode 100644
index 000000000..4394184a1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-ioctl.xml
@@ -0,0 +1,71 @@
+<refentry id="func-ioctl">
+ <refmeta>
+ <refentrytitle>V4L2 ioctl()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-ioctl</refname>
+ <refpurpose>Program a V4L2 device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;sys/ioctl.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>void *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>V4L2 ioctl request code as defined in the <filename>videodev2.h</filename> header file, for example
+VIDIOC_QUERYCAP.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para>Pointer to a function parameter, usually a structure.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>The <function>ioctl()</function> function is used to program
+V4L2 devices. The argument <parameter>fd</parameter> must be an open
+file descriptor. An ioctl <parameter>request</parameter> has encoded
+in it whether the argument is an input, output or read/write
+parameter, and the size of the argument <parameter>argp</parameter> in
+bytes. Macros and defines specifying V4L2 ioctl requests are located
+in the <filename>videodev2.h</filename> header file.
+Applications should use their own copy, not include the version in the
+kernel sources on the system they compile on. All V4L2 ioctl requests,
+their respective function and parameters are specified in <xref
+ linkend="user-func" />.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ <para>When an ioctl that takes an output or read/write parameter fails,
+ the parameter remains unmodified.</para>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-mmap.xml b/Documentation/DocBook/media/v4l/func-mmap.xml
new file mode 100644
index 000000000..f31ad71bf
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-mmap.xml
@@ -0,0 +1,183 @@
+<refentry id="func-mmap">
+ <refmeta>
+ <refentrytitle>V4L2 mmap()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-mmap</refname>
+ <refpurpose>Map device memory into application address space</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>
+#include &lt;unistd.h&gt;
+#include &lt;sys/mman.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>void *<function>mmap</function></funcdef>
+ <paramdef>void *<parameter>start</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ <paramdef>int <parameter>prot</parameter></paramdef>
+ <paramdef>int <parameter>flags</parameter></paramdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>off_t <parameter>offset</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+ <variablelist>
+ <varlistentry>
+ <term><parameter>start</parameter></term>
+ <listitem>
+ <para>Map the buffer to this address in the
+application's address space. When the <constant>MAP_FIXED</constant>
+flag is specified, <parameter>start</parameter> must be a multiple of the
+pagesize and mmap will fail when the specified address
+cannot be used. Use of this option is discouraged; applications should
+just specify a <constant>NULL</constant> pointer here.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>length</parameter></term>
+ <listitem>
+ <para>Length of the memory area to map. This must be the
+same value as returned by the driver in the &v4l2-buffer;
+<structfield>length</structfield> field for the
+single-planar API, and the same value as returned by the driver
+in the &v4l2-plane; <structfield>length</structfield> field for the
+multi-planar API.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>prot</parameter></term>
+ <listitem>
+ <para>The <parameter>prot</parameter> argument describes the
+desired memory protection. Regardless of the device type and the
+direction of data exchange it should be set to
+<constant>PROT_READ</constant> | <constant>PROT_WRITE</constant>,
+permitting read and write access to image buffers. Drivers should
+support at least this combination of flags. Note the Linux
+<filename>video-buf</filename> kernel module, which is used by the
+bttv, saa7134, saa7146, cx88 and vivi driver supports only
+<constant>PROT_READ</constant> | <constant>PROT_WRITE</constant>. When
+the driver does not support the desired protection the
+<function>mmap()</function> function fails.</para>
+ <para>Note device memory accesses (&eg; the memory on a
+graphics card with video capturing hardware) may incur a performance
+penalty compared to main memory accesses, or reads may be
+significantly slower than writes or vice versa. Other I/O methods may
+be more efficient in this case.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>flags</parameter></term>
+ <listitem>
+ <para>The <parameter>flags</parameter> parameter
+specifies the type of the mapped object, mapping options and whether
+modifications made to the mapped copy of the page are private to the
+process or are to be shared with other references.</para>
+ <para><constant>MAP_FIXED</constant> requests that the
+driver selects no other address than the one specified. If the
+specified address cannot be used, <function>mmap()</function> will fail. If
+<constant>MAP_FIXED</constant> is specified,
+<parameter>start</parameter> must be a multiple of the pagesize. Use
+of this option is discouraged.</para>
+ <para>One of the <constant>MAP_SHARED</constant> or
+<constant>MAP_PRIVATE</constant> flags must be set.
+<constant>MAP_SHARED</constant> allows applications to share the
+mapped memory with other (&eg; child-) processes. Note the Linux
+<filename>video-buf</filename> module which is used by the bttv,
+saa7134, saa7146, cx88 and vivi driver supports only
+<constant>MAP_SHARED</constant>. <constant>MAP_PRIVATE</constant>
+requests copy-on-write semantics. V4L2 applications should not set the
+<constant>MAP_PRIVATE</constant>, <constant>MAP_DENYWRITE</constant>,
+<constant>MAP_EXECUTABLE</constant> or <constant>MAP_ANON</constant>
+flag.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>offset</parameter></term>
+ <listitem>
+ <para>Offset of the buffer in device memory. This must be the
+same value as returned by the driver in the &v4l2-buffer;
+<structfield>m</structfield> union <structfield>offset</structfield> field for
+the single-planar API, and the same value as returned by the driver
+in the &v4l2-plane; <structfield>m</structfield> union
+<structfield>mem_offset</structfield> field for the multi-planar API.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>The <function>mmap()</function> function asks to map
+<parameter>length</parameter> bytes starting at
+<parameter>offset</parameter> in the memory of the device specified by
+<parameter>fd</parameter> into the application address space,
+preferably at address <parameter>start</parameter>. This latter
+address is a hint only, and is usually specified as 0.</para>
+
+ <para>Suitable length and offset parameters are queried with the
+&VIDIOC-QUERYBUF; ioctl. Buffers must be allocated with the
+&VIDIOC-REQBUFS; ioctl before they can be queried.</para>
+
+ <para>To unmap buffers the &func-munmap; function is used.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success <function>mmap()</function> returns a pointer to
+the mapped buffer. On error <constant>MAP_FAILED</constant> (-1) is
+returned, and the <varname>errno</varname> variable is set
+appropriately. Possible error codes are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para><parameter>fd</parameter> is not a valid file
+descriptor.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EACCES</errorcode></term>
+ <listitem>
+ <para><parameter>fd</parameter> is
+not open for reading and writing.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <parameter>start</parameter> or
+<parameter>length</parameter> or <parameter>offset</parameter> are not
+suitable. (E.&nbsp;g. they are too large, or not aligned on a
+<constant>PAGESIZE</constant> boundary.)</para>
+ <para>The <parameter>flags</parameter> or
+<parameter>prot</parameter> value is not supported.</para>
+ <para>No buffers have been allocated with the
+&VIDIOC-REQBUFS; ioctl.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOMEM</errorcode></term>
+ <listitem>
+ <para>Not enough physical or virtual memory was available to
+complete the request.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-munmap.xml b/Documentation/DocBook/media/v4l/func-munmap.xml
new file mode 100644
index 000000000..860d49ca5
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-munmap.xml
@@ -0,0 +1,76 @@
+<refentry id="func-munmap">
+ <refmeta>
+ <refentrytitle>V4L2 munmap()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-munmap</refname>
+ <refpurpose>Unmap device memory</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>
+#include &lt;unistd.h&gt;
+#include &lt;sys/mman.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>munmap</function></funcdef>
+ <paramdef>void *<parameter>start</parameter></paramdef>
+ <paramdef>size_t <parameter>length</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+ <refsect1>
+ <title>Arguments</title>
+ <variablelist>
+ <varlistentry>
+ <term><parameter>start</parameter></term>
+ <listitem>
+ <para>Address of the mapped buffer as returned by the
+&func-mmap; function.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>length</parameter></term>
+ <listitem>
+ <para>Length of the mapped buffer. This must be the same
+value as given to <function>mmap()</function> and returned by the
+driver in the &v4l2-buffer; <structfield>length</structfield>
+field for the single-planar API and in the &v4l2-plane;
+<structfield>length</structfield> field for the multi-planar API.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Unmaps a previously with the &func-mmap; function mapped
+buffer and frees it, if possible. <!-- ? This function (not freeing)
+has no impact on I/O in progress, specifically it does not imply
+&VIDIOC-STREAMOFF; to terminate I/O. Unmapped buffers can still be
+enqueued, dequeued or queried, they are just not accessible by the
+application.--></para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success <function>munmap()</function> returns 0, on
+failure -1 and the <varname>errno</varname> variable is set
+appropriately:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <parameter>start</parameter> or
+<parameter>length</parameter> is incorrect, or no buffers have been
+mapped yet.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-open.xml b/Documentation/DocBook/media/v4l/func-open.xml
new file mode 100644
index 000000000..cf64e207c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-open.xml
@@ -0,0 +1,113 @@
+<refentry id="func-open">
+ <refmeta>
+ <refentrytitle>V4L2 open()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-open</refname>
+ <refpurpose>Open a V4L2 device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;fcntl.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>open</function></funcdef>
+ <paramdef>const char *<parameter>device_name</parameter></paramdef>
+ <paramdef>int <parameter>flags</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>device_name</parameter></term>
+ <listitem>
+ <para>Device to be opened.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>flags</parameter></term>
+ <listitem>
+ <para>Open flags. Access mode must be
+<constant>O_RDWR</constant>. This is just a technicality, input devices
+still support only reading and output devices only writing.</para>
+ <para>When the <constant>O_NONBLOCK</constant> flag is
+given, the read() function and the &VIDIOC-DQBUF; ioctl will return
+the &EAGAIN; when no data is available or no buffer is in the driver
+outgoing queue, otherwise these functions block until data becomes
+available. All V4L2 drivers exchanging data with applications must
+support the <constant>O_NONBLOCK</constant> flag.</para>
+ <para>Other flags have no effect.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+ <refsect1>
+ <title>Description</title>
+
+ <para>To open a V4L2 device applications call
+<function>open()</function> with the desired device name. This
+function has no side effects; all data format parameters, current
+input or output, control values or other properties remain unchanged.
+At the first <function>open()</function> call after loading the driver
+they will be reset to default values, drivers are never in an
+undefined state.</para>
+ </refsect1>
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success <function>open</function> returns the new file
+descriptor. On error -1 is returned, and the <varname>errno</varname>
+variable is set appropriately. Possible error codes are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EACCES</errorcode></term>
+ <listitem>
+ <para>The caller has no permission to access the
+device.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The driver does not support multiple opens and the
+device is already in use.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENXIO</errorcode></term>
+ <listitem>
+ <para>No device corresponding to this device special file
+exists.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOMEM</errorcode></term>
+ <listitem>
+ <para>Not enough kernel memory was available to complete the
+request.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EMFILE</errorcode></term>
+ <listitem>
+ <para>The process already has the maximum number of
+files open.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENFILE</errorcode></term>
+ <listitem>
+ <para>The limit on the total number of files open on the
+system has been reached.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-poll.xml b/Documentation/DocBook/media/v4l/func-poll.xml
new file mode 100644
index 000000000..4c73f1152
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-poll.xml
@@ -0,0 +1,142 @@
+<refentry id="func-poll">
+ <refmeta>
+ <refentrytitle>V4L2 poll()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-poll</refname>
+ <refpurpose>Wait for some event on a file descriptor</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;sys/poll.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>poll</function></funcdef>
+ <paramdef>struct pollfd *<parameter>ufds</parameter></paramdef>
+ <paramdef>unsigned int <parameter>nfds</parameter></paramdef>
+ <paramdef>int <parameter>timeout</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>With the <function>poll()</function> function applications
+can suspend execution until the driver has captured data or is ready
+to accept data for output.</para>
+
+ <para>When streaming I/O has been negotiated this function waits
+until a buffer has been filled by the capture device and can be dequeued
+with the &VIDIOC-DQBUF; ioctl. For output devices this function waits
+until the device is ready to accept a new buffer to be queued up with
+the &VIDIOC-QBUF; ioctl for display. When buffers are already in the outgoing
+queue of the driver (capture) or the incoming queue isn't full (display)
+the function returns immediately.</para>
+
+ <para>On success <function>poll()</function> returns the number of
+file descriptors that have been selected (that is, file descriptors
+for which the <structfield>revents</structfield> field of the
+respective <structname>pollfd</structname> structure is non-zero).
+Capture devices set the <constant>POLLIN</constant> and
+<constant>POLLRDNORM</constant> flags in the
+<structfield>revents</structfield> field, output devices the
+<constant>POLLOUT</constant> and <constant>POLLWRNORM</constant>
+flags. When the function timed out it returns a value of zero, on
+failure it returns <returnvalue>-1</returnvalue> and the
+<varname>errno</varname> variable is set appropriately. When the
+application did not call &VIDIOC-STREAMON; the
+<function>poll()</function> function succeeds, but sets the
+<constant>POLLERR</constant> flag in the
+<structfield>revents</structfield> field. When the
+application has called &VIDIOC-STREAMON; for a capture device but hasn't
+yet called &VIDIOC-QBUF;, the <function>poll()</function> function
+succeeds and sets the <constant>POLLERR</constant> flag in the
+<structfield>revents</structfield> field. For output devices this
+same situation will cause <function>poll()</function> to succeed
+as well, but it sets the <constant>POLLOUT</constant> and
+<constant>POLLWRNORM</constant> flags in the <structfield>revents</structfield>
+field.</para>
+
+ <para>If an event occurred (see &VIDIOC-DQEVENT;) then
+<constant>POLLPRI</constant> will be set in the <structfield>revents</structfield>
+field and <function>poll()</function> will return.</para>
+
+ <para>When use of the <function>read()</function> function has
+been negotiated and the driver does not capture yet, the
+<function>poll</function> function starts capturing. When that fails
+it returns a <constant>POLLERR</constant> as above. Otherwise it waits
+until data has been captured and can be read. When the driver captures
+continuously (as opposed to, for example, still images) the function
+may return immediately.</para>
+
+ <para>When use of the <function>write()</function> function has
+been negotiated and the driver does not stream yet, the
+<function>poll</function> function starts streaming. When that fails
+it returns a <constant>POLLERR</constant> as above. Otherwise it waits
+until the driver is ready for a non-blocking
+<function>write()</function> call.</para>
+
+ <para>If the caller is only interested in events (just
+<constant>POLLPRI</constant> is set in the <structfield>events</structfield>
+field), then <function>poll()</function> will <emphasis>not</emphasis>
+start streaming if the driver does not stream yet. This makes it
+possible to just poll for events and not for buffers.</para>
+
+ <para>All drivers implementing the <function>read()</function> or
+<function>write()</function> function or streaming I/O must also
+support the <function>poll()</function> function.</para>
+
+ <para>For more details see the
+<function>poll()</function> manual page.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success, <function>poll()</function> returns the number
+structures which have non-zero <structfield>revents</structfield>
+fields, or zero if the call timed out. On error
+<returnvalue>-1</returnvalue> is returned, and the
+<varname>errno</varname> variable is set appropriately:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para>One or more of the <parameter>ufds</parameter> members
+specify an invalid file descriptor.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The driver does not support multiple read or write
+streams and the device is already in use.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EFAULT</errorcode></term>
+ <listitem>
+ <para><parameter>ufds</parameter> references an inaccessible
+memory area.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINTR</errorcode></term>
+ <listitem>
+ <para>The call was interrupted by a signal.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <parameter>nfds</parameter> argument is greater
+than <constant>OPEN_MAX</constant>.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-read.xml b/Documentation/DocBook/media/v4l/func-read.xml
new file mode 100644
index 000000000..e218bbfbd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-read.xml
@@ -0,0 +1,181 @@
+<refentry id="func-read">
+ <refmeta>
+ <refentrytitle>V4L2 read()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-read</refname>
+ <refpurpose>Read from a V4L2 device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;unistd.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>ssize_t <function>read</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>void *<parameter>buf</parameter></paramdef>
+ <paramdef>size_t <parameter>count</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>buf</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>count</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para><function>read()</function> attempts to read up to
+<parameter>count</parameter> bytes from file descriptor
+<parameter>fd</parameter> into the buffer starting at
+<parameter>buf</parameter>. The layout of the data in the buffer is
+discussed in the respective device interface section, see ##. If <parameter>count</parameter> is zero,
+<function>read()</function> returns zero and has no other results. If
+<parameter>count</parameter> is greater than
+<constant>SSIZE_MAX</constant>, the result is unspecified. Regardless
+of the <parameter>count</parameter> value each
+<function>read()</function> call will provide at most one frame (two
+fields) worth of data.</para>
+
+ <para>By default <function>read()</function> blocks until data
+becomes available. When the <constant>O_NONBLOCK</constant> flag was
+given to the &func-open; function it
+returns immediately with an &EAGAIN; when no data is available. The
+&func-select; or &func-poll; functions
+can always be used to suspend execution until data becomes available. All
+drivers supporting the <function>read()</function> function must also
+support <function>select()</function> and
+<function>poll()</function>.</para>
+
+ <para>Drivers can implement read functionality in different
+ways, using a single or multiple buffers and discarding the oldest or
+newest frames once the internal buffers are filled.</para>
+
+ <para><function>read()</function> never returns a "snapshot" of a
+buffer being filled. Using a single buffer the driver will stop
+capturing when the application starts reading the buffer until the
+read is finished. Thus only the period of the vertical blanking
+interval is available for reading, or the capture rate must fall below
+the nominal frame rate of the video standard.</para>
+
+<para>The behavior of
+<function>read()</function> when called during the active picture
+period or the vertical blanking separating the top and bottom field
+depends on the discarding policy. A driver discarding the oldest
+frames keeps capturing into an internal buffer, continuously
+overwriting the previously, not read frame, and returns the frame
+being received at the time of the <function>read()</function> call as
+soon as it is complete.</para>
+
+ <para>A driver discarding the newest frames stops capturing until
+the next <function>read()</function> call. The frame being received at
+<function>read()</function> time is discarded, returning the following
+frame instead. Again this implies a reduction of the capture rate to
+one half or less of the nominal frame rate. An example of this model
+is the video read mode of the bttv driver, initiating a DMA to user
+memory when <function>read()</function> is called and returning when
+the DMA finished.</para>
+
+ <para>In the multiple buffer model drivers maintain a ring of
+internal buffers, automatically advancing to the next free buffer.
+This allows continuous capturing when the application can empty the
+buffers fast enough. Again, the behavior when the driver runs out of
+free buffers depends on the discarding policy.</para>
+
+ <para>Applications can get and set the number of buffers used
+internally by the driver with the &VIDIOC-G-PARM; and &VIDIOC-S-PARM;
+ioctls. They are optional, however. The discarding policy is not
+reported and cannot be changed. For minimum requirements see <xref
+ linkend="devices" />.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success, the number of bytes read is returned. It is not
+an error if this number is smaller than the number of bytes requested,
+or the amount of data required for one frame. This may happen for
+example because <function>read()</function> was interrupted by a
+signal. On error, -1 is returned, and the <varname>errno</varname>
+variable is set appropriately. In this case the next read will start
+at the beginning of a new frame. Possible error codes are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EAGAIN</errorcode></term>
+ <listitem>
+ <para>Non-blocking I/O has been selected using
+O_NONBLOCK and no data was immediately available for reading.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para><parameter>fd</parameter> is not a valid file
+descriptor or is not open for reading, or the process already has the
+maximum number of files open.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The driver does not support multiple read streams and the
+device is already in use.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EFAULT</errorcode></term>
+ <listitem>
+ <para><parameter>buf</parameter> references an inaccessible
+memory area.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINTR</errorcode></term>
+ <listitem>
+ <para>The call was interrupted by a signal before any
+data was read.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EIO</errorcode></term>
+ <listitem>
+ <para>I/O error. This indicates some hardware problem or a
+failure to communicate with a remote device (USB camera etc.).</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <function>read()</function> function is not
+supported by this driver, not on this device, or generally not on this
+type of device.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-select.xml b/Documentation/DocBook/media/v4l/func-select.xml
new file mode 100644
index 000000000..e12a60d9b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-select.xml
@@ -0,0 +1,130 @@
+<refentry id="func-select">
+ <refmeta>
+ <refentrytitle>V4L2 select()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-select</refname>
+ <refpurpose>Synchronous I/O multiplexing</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>
+#include &lt;sys/time.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;unistd.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>select</function></funcdef>
+ <paramdef>int <parameter>nfds</parameter></paramdef>
+ <paramdef>fd_set *<parameter>readfds</parameter></paramdef>
+ <paramdef>fd_set *<parameter>writefds</parameter></paramdef>
+ <paramdef>fd_set *<parameter>exceptfds</parameter></paramdef>
+ <paramdef>struct timeval *<parameter>timeout</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>With the <function>select()</function> function applications
+can suspend execution until the driver has captured data or is ready
+to accept data for output.</para>
+
+ <para>When streaming I/O has been negotiated this function waits
+until a buffer has been filled or displayed and can be dequeued with
+the &VIDIOC-DQBUF; ioctl. When buffers are already in the outgoing
+queue of the driver the function returns immediately.</para>
+
+ <para>On success <function>select()</function> returns the total
+number of bits set in the <structname>fd_set</structname>s. When the
+function timed out it returns a value of zero. On failure it returns
+<returnvalue>-1</returnvalue> and the <varname>errno</varname>
+variable is set appropriately. When the application did not call
+&VIDIOC-QBUF; or &VIDIOC-STREAMON; yet the
+<function>select()</function> function succeeds, setting the bit of
+the file descriptor in <parameter>readfds</parameter> or
+<parameter>writefds</parameter>, but subsequent &VIDIOC-DQBUF; calls
+will fail.<footnote><para>The Linux kernel implements
+<function>select()</function> like the &func-poll; function, but
+<function>select()</function> cannot return a
+<constant>POLLERR</constant>.</para>
+ </footnote></para>
+
+ <para>When use of the <function>read()</function> function has
+been negotiated and the driver does not capture yet, the
+<function>select()</function> function starts capturing. When that
+fails, <function>select()</function> returns successful and a
+subsequent <function>read()</function> call, which also attempts to
+start capturing, will return an appropriate error code. When the
+driver captures continuously (as opposed to, for example, still
+images) and data is already available the
+<function>select()</function> function returns immediately.</para>
+
+ <para>When use of the <function>write()</function> function has
+been negotiated the <function>select()</function> function just waits
+until the driver is ready for a non-blocking
+<function>write()</function> call.</para>
+
+ <para>All drivers implementing the <function>read()</function> or
+<function>write()</function> function or streaming I/O must also
+support the <function>select()</function> function.</para>
+
+ <para>For more details see the <function>select()</function>
+manual page.</para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success, <function>select()</function> returns the number
+of descriptors contained in the three returned descriptor sets, which
+will be zero if the timeout expired. On error
+<returnvalue>-1</returnvalue> is returned, and the
+<varname>errno</varname> variable is set appropriately; the sets and
+<parameter>timeout</parameter> are undefined. Possible error codes
+are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para>One or more of the file descriptor sets specified a
+file descriptor that is not open.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The driver does not support multiple read or write
+streams and the device is already in use.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EFAULT</errorcode></term>
+ <listitem>
+ <para>The <parameter>readfds</parameter>,
+<parameter>writefds</parameter>, <parameter>exceptfds</parameter> or
+<parameter>timeout</parameter> pointer references an inaccessible memory
+area.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINTR</errorcode></term>
+ <listitem>
+ <para>The call was interrupted by a signal.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <parameter>nfds</parameter> argument is less than
+zero or greater than <constant>FD_SETSIZE</constant>.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/func-write.xml b/Documentation/DocBook/media/v4l/func-write.xml
new file mode 100644
index 000000000..575207885
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/func-write.xml
@@ -0,0 +1,128 @@
+<refentry id="func-write">
+ <refmeta>
+ <refentrytitle>V4L2 write()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>v4l2-write</refname>
+ <refpurpose>Write to a V4L2 device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;unistd.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>ssize_t <function>write</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>void *<parameter>buf</parameter></paramdef>
+ <paramdef>size_t <parameter>count</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>buf</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>count</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para><function>write()</function> writes up to
+<parameter>count</parameter> bytes to the device referenced by the
+file descriptor <parameter>fd</parameter> from the buffer starting at
+<parameter>buf</parameter>. When the hardware outputs are not active
+yet, this function enables them. When <parameter>count</parameter> is
+zero, <function>write()</function> returns
+<returnvalue>0</returnvalue> without any other effect.</para>
+
+ <para>When the application does not provide more data in time, the
+previous video frame, raw VBI image, sliced VPS or WSS data is
+displayed again. Sliced Teletext or Closed Caption data is not
+repeated, the driver inserts a blank line instead.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para>On success, the number of bytes written are returned. Zero
+indicates nothing was written. On error, <returnvalue>-1</returnvalue>
+is returned, and the <varname>errno</varname> variable is set
+appropriately. In this case the next write will start at the beginning
+of a new frame. Possible error codes are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EAGAIN</errorcode></term>
+ <listitem>
+ <para>Non-blocking I/O has been selected using the <link
+linkend="func-open"><constant>O_NONBLOCK</constant></link> flag and no
+buffer space was available to write the data immediately.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para><parameter>fd</parameter> is not a valid file
+descriptor or is not open for writing.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The driver does not support multiple write streams and the
+device is already in use.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EFAULT</errorcode></term>
+ <listitem>
+ <para><parameter>buf</parameter> references an inaccessible
+memory area.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINTR</errorcode></term>
+ <listitem>
+ <para>The call was interrupted by a signal before any
+data was written.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EIO</errorcode></term>
+ <listitem>
+ <para>I/O error. This indicates some hardware problem.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <function>write()</function> function is not
+supported by this driver, not on this device, or generally not on this
+type of device.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/gen-errors.xml b/Documentation/DocBook/media/v4l/gen-errors.xml
new file mode 100644
index 000000000..7e29a4e1f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/gen-errors.xml
@@ -0,0 +1,77 @@
+<title>Generic Error Codes</title>
+
+<table frame="none" pgwide="1" id="gen-errors">
+ <title>Generic error codes</title>
+ <tgroup cols="2">
+ &cs-str;
+ <tbody valign="top">
+ <!-- Keep it ordered alphabetically -->
+ <row>
+ <entry>EAGAIN (aka EWOULDBLOCK)</entry>
+ <entry>The ioctl can't be handled because the device is in state where
+ it can't perform it. This could happen for example in case where
+ device is sleeping and ioctl is performed to query statistics.
+ It is also returned when the ioctl would need to wait
+ for an event, but the device was opened in non-blocking mode.
+ </entry>
+ </row>
+ <row>
+ <entry>EBADF</entry>
+ <entry>The file descriptor is not a valid.</entry>
+ </row>
+ <row>
+ <entry>EBUSY</entry>
+ <entry>The ioctl can't be handled because the device is busy. This is
+ typically return while device is streaming, and an ioctl tried to
+ change something that would affect the stream, or would require the
+ usage of a hardware resource that was already allocated. The ioctl
+ must not be retried without performing another action to fix the
+ problem first (typically: stop the stream before retrying).</entry>
+ </row>
+ <row>
+ <entry>EFAULT</entry>
+ <entry>There was a failure while copying data from/to userspace,
+ probably caused by an invalid pointer reference.</entry>
+ </row>
+ <row>
+ <entry>EINVAL</entry>
+ <entry>One or more of the ioctl parameters are invalid or out of the
+ allowed range. This is a widely used error code. See the individual
+ ioctl requests for specific causes.</entry>
+ </row>
+ <row>
+ <entry>ENODEV</entry>
+ <entry>Device not found or was removed.</entry>
+ </row>
+ <row>
+ <entry>ENOMEM</entry>
+ <entry>There's not enough memory to handle the desired operation.</entry>
+ </row>
+ <row>
+ <entry>ENOTTY</entry>
+ <entry>The ioctl is not supported by the driver, actually meaning that
+ the required functionality is not available, or the file
+ descriptor is not for a media device.</entry>
+ </row>
+ <row>
+ <entry>ENOSPC</entry>
+ <entry>On USB devices, the stream ioctl's can return this error, meaning
+ that this request would overcommit the usb bandwidth reserved
+ for periodic transfers (up to 80% of the USB bandwidth).</entry>
+ </row>
+ <row>
+ <entry>EPERM</entry>
+ <entry>Permission denied. Can be returned if the device needs write
+ permission, or some special capabilities is needed
+ (e. g. root)</entry>
+ </row>
+ </tbody>
+ </tgroup>
+</table>
+
+<para>Note 1: ioctls may return other error codes. Since errors may have side
+effects such as a driver reset, applications should abort on unexpected errors.
+</para>
+
+<para>Note 2: Request-specific error codes are listed in the individual
+requests descriptions.</para>
diff --git a/Documentation/DocBook/media/v4l/io.xml b/Documentation/DocBook/media/v4l/io.xml
new file mode 100644
index 000000000..1c17f802b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/io.xml
@@ -0,0 +1,1533 @@
+ <title>Input/Output</title>
+
+ <para>The V4L2 API defines several different methods to read from or
+write to a device. All drivers exchanging data with applications must
+support at least one of them.</para>
+
+ <para>The classic I/O method using the <function>read()</function>
+and <function>write()</function> function is automatically selected
+after opening a V4L2 device. When the driver does not support this
+method attempts to read or write will fail at any time.</para>
+
+ <para>Other methods must be negotiated. To select the streaming I/O
+method with memory mapped or user buffers applications call the
+&VIDIOC-REQBUFS; ioctl. The asynchronous I/O method is not defined
+yet.</para>
+
+ <para>Video overlay can be considered another I/O method, although
+the application does not directly receive the image data. It is
+selected by initiating video overlay with the &VIDIOC-S-FMT; ioctl.
+For more information see <xref linkend="overlay" />.</para>
+
+ <para>Generally exactly one I/O method, including overlay, is
+associated with each file descriptor. The only exceptions are
+applications not exchanging data with a driver ("panel applications",
+see <xref linkend="open" />) and drivers permitting simultaneous video capturing
+and overlay using the same file descriptor, for compatibility with V4L
+and earlier versions of V4L2.</para>
+
+ <para><constant>VIDIOC_S_FMT</constant> and
+<constant>VIDIOC_REQBUFS</constant> would permit this to some degree,
+but for simplicity drivers need not support switching the I/O method
+(after first switching away from read/write) other than by closing
+and reopening the device.</para>
+
+ <para>The following sections describe the various I/O methods in
+more detail.</para>
+
+ <section id="rw">
+ <title>Read/Write</title>
+
+ <para>Input and output devices support the
+<function>read()</function> and <function>write()</function> function,
+respectively, when the <constant>V4L2_CAP_READWRITE</constant> flag in
+the <structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl is set.</para>
+
+ <para>Drivers may need the CPU to copy the data, but they may also
+support DMA to or from user memory, so this I/O method is not
+necessarily less efficient than other methods merely exchanging buffer
+pointers. It is considered inferior though because no meta-information
+like frame counters or timestamps are passed. This information is
+necessary to recognize frame dropping and to synchronize with other
+data streams. However this is also the simplest I/O method, requiring
+little or no setup to exchange data. It permits command line stunts
+like this (the <application>vidctrl</application> tool is
+fictitious):</para>
+
+ <informalexample>
+ <screen>
+&gt; vidctrl /dev/video --input=0 --format=YUYV --size=352x288
+&gt; dd if=/dev/video of=myimage.422 bs=202752 count=1
+</screen>
+ </informalexample>
+
+ <para>To read from the device applications use the
+&func-read; function, to write the &func-write; function.
+Drivers must implement one I/O method if they
+exchange data with applications, but it need not be this.<footnote>
+ <para>It would be desirable if applications could depend on
+drivers supporting all I/O interfaces, but as much as the complex
+memory mapping I/O can be inadequate for some devices we have no
+reason to require this interface, which is most useful for simple
+applications capturing still images.</para>
+ </footnote> When reading or writing is supported, the driver
+must also support the &func-select; and &func-poll;
+function.<footnote>
+ <para>At the driver level <function>select()</function> and
+<function>poll()</function> are the same, and
+<function>select()</function> is too important to be optional.</para>
+ </footnote></para>
+ </section>
+
+ <section id="mmap">
+ <title>Streaming I/O (Memory Mapping)</title>
+
+ <para>Input and output devices support this I/O method when the
+<constant>V4L2_CAP_STREAMING</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl is set. There are two
+streaming methods, to determine if the memory mapping flavor is
+supported applications must call the &VIDIOC-REQBUFS; ioctl.</para>
+
+ <para>Streaming is an I/O method where only pointers to buffers
+are exchanged between application and driver, the data itself is not
+copied. Memory mapping is primarily intended to map buffers in device
+memory into the application's address space. Device memory can be for
+example the video memory on a graphics card with a video capture
+add-on. However, being the most efficient I/O method available for a
+long time, many other drivers support streaming as well, allocating
+buffers in DMA-able main memory.</para>
+
+ <para>A driver can support many sets of buffers. Each set is
+identified by a unique buffer type value. The sets are independent and
+each set can hold a different type of data. To access different sets
+at the same time different file descriptors must be used.<footnote>
+ <para>One could use one file descriptor and set the buffer
+type field accordingly when calling &VIDIOC-QBUF; etc., but it makes
+the <function>select()</function> function ambiguous. We also like the
+clean approach of one file descriptor per logical stream. Video
+overlay for example is also a logical stream, although the CPU is not
+needed for continuous operation.</para>
+ </footnote></para>
+
+ <para>To allocate device buffers applications call the
+&VIDIOC-REQBUFS; ioctl with the desired number of buffers and buffer
+type, for example <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>.
+This ioctl can also be used to change the number of buffers or to free
+the allocated memory, provided none of the buffers are still
+mapped.</para>
+
+ <para>Before applications can access the buffers they must map
+them into their address space with the &func-mmap; function. The
+location of the buffers in device memory can be determined with the
+&VIDIOC-QUERYBUF; ioctl. In the single-planar API case, the
+<structfield>m.offset</structfield> and <structfield>length</structfield>
+returned in a &v4l2-buffer; are passed as sixth and second parameter to the
+<function>mmap()</function> function. When using the multi-planar API,
+&v4l2-buffer; contains an array of &v4l2-plane; structures, each
+containing its own <structfield>m.offset</structfield> and
+<structfield>length</structfield>. When using the multi-planar API, every
+plane of every buffer has to be mapped separately, so the number of
+calls to &func-mmap; should be equal to number of buffers times number of
+planes in each buffer. The offset and length values must not be modified.
+Remember, the buffers are allocated in physical memory, as opposed to virtual
+memory, which can be swapped out to disk. Applications should free the buffers
+as soon as possible with the &func-munmap; function.</para>
+
+ <example>
+ <title>Mapping buffers in the single-planar API</title>
+ <programlisting>
+&v4l2-requestbuffers; reqbuf;
+struct {
+ void *start;
+ size_t length;
+} *buffers;
+unsigned int i;
+
+memset(&amp;reqbuf, 0, sizeof(reqbuf));
+reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+reqbuf.memory = V4L2_MEMORY_MMAP;
+reqbuf.count = 20;
+
+if (-1 == ioctl (fd, &VIDIOC-REQBUFS;, &amp;reqbuf)) {
+ if (errno == EINVAL)
+ printf("Video capturing or mmap-streaming is not supported\n");
+ else
+ perror("VIDIOC_REQBUFS");
+
+ exit(EXIT_FAILURE);
+}
+
+/* We want at least five buffers. */
+
+if (reqbuf.count &lt; 5) {
+ /* You may need to free the buffers here. */
+ printf("Not enough buffer memory\n");
+ exit(EXIT_FAILURE);
+}
+
+buffers = calloc(reqbuf.count, sizeof(*buffers));
+assert(buffers != NULL);
+
+for (i = 0; i &lt; reqbuf.count; i++) {
+ &v4l2-buffer; buffer;
+
+ memset(&amp;buffer, 0, sizeof(buffer));
+ buffer.type = reqbuf.type;
+ buffer.memory = V4L2_MEMORY_MMAP;
+ buffer.index = i;
+
+ if (-1 == ioctl (fd, &VIDIOC-QUERYBUF;, &amp;buffer)) {
+ perror("VIDIOC_QUERYBUF");
+ exit(EXIT_FAILURE);
+ }
+
+ buffers[i].length = buffer.length; /* remember for munmap() */
+
+ buffers[i].start = mmap(NULL, buffer.length,
+ PROT_READ | PROT_WRITE, /* recommended */
+ MAP_SHARED, /* recommended */
+ fd, buffer.m.offset);
+
+ if (MAP_FAILED == buffers[i].start) {
+ /* If you do not exit here you should unmap() and free()
+ the buffers mapped so far. */
+ perror("mmap");
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Cleanup. */
+
+for (i = 0; i &lt; reqbuf.count; i++)
+ munmap(buffers[i].start, buffers[i].length);
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Mapping buffers in the multi-planar API</title>
+ <programlisting>
+&v4l2-requestbuffers; reqbuf;
+/* Our current format uses 3 planes per buffer */
+#define FMT_NUM_PLANES = 3
+
+struct {
+ void *start[FMT_NUM_PLANES];
+ size_t length[FMT_NUM_PLANES];
+} *buffers;
+unsigned int i, j;
+
+memset(&amp;reqbuf, 0, sizeof(reqbuf));
+reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+reqbuf.memory = V4L2_MEMORY_MMAP;
+reqbuf.count = 20;
+
+if (ioctl(fd, &VIDIOC-REQBUFS;, &amp;reqbuf) &lt; 0) {
+ if (errno == EINVAL)
+ printf("Video capturing or mmap-streaming is not supported\n");
+ else
+ perror("VIDIOC_REQBUFS");
+
+ exit(EXIT_FAILURE);
+}
+
+/* We want at least five buffers. */
+
+if (reqbuf.count &lt; 5) {
+ /* You may need to free the buffers here. */
+ printf("Not enough buffer memory\n");
+ exit(EXIT_FAILURE);
+}
+
+buffers = calloc(reqbuf.count, sizeof(*buffers));
+assert(buffers != NULL);
+
+for (i = 0; i &lt; reqbuf.count; i++) {
+ &v4l2-buffer; buffer;
+ &v4l2-plane; planes[FMT_NUM_PLANES];
+
+ memset(&amp;buffer, 0, sizeof(buffer));
+ buffer.type = reqbuf.type;
+ buffer.memory = V4L2_MEMORY_MMAP;
+ buffer.index = i;
+ /* length in struct v4l2_buffer in multi-planar API stores the size
+ * of planes array. */
+ buffer.length = FMT_NUM_PLANES;
+ buffer.m.planes = planes;
+
+ if (ioctl(fd, &VIDIOC-QUERYBUF;, &amp;buffer) &lt; 0) {
+ perror("VIDIOC_QUERYBUF");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Every plane has to be mapped separately */
+ for (j = 0; j &lt; FMT_NUM_PLANES; j++) {
+ buffers[i].length[j] = buffer.m.planes[j].length; /* remember for munmap() */
+
+ buffers[i].start[j] = mmap(NULL, buffer.m.planes[j].length,
+ PROT_READ | PROT_WRITE, /* recommended */
+ MAP_SHARED, /* recommended */
+ fd, buffer.m.planes[j].m.offset);
+
+ if (MAP_FAILED == buffers[i].start[j]) {
+ /* If you do not exit here you should unmap() and free()
+ the buffers and planes mapped so far. */
+ perror("mmap");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+/* Cleanup. */
+
+for (i = 0; i &lt; reqbuf.count; i++)
+ for (j = 0; j &lt; FMT_NUM_PLANES; j++)
+ munmap(buffers[i].start[j], buffers[i].length[j]);
+ </programlisting>
+ </example>
+
+ <para>Conceptually streaming drivers maintain two buffer queues, an incoming
+and an outgoing queue. They separate the synchronous capture or output
+operation locked to a video clock from the application which is
+subject to random disk or network delays and preemption by
+other processes, thereby reducing the probability of data loss.
+The queues are organized as FIFOs, buffers will be
+output in the order enqueued in the incoming FIFO, and were
+captured in the order dequeued from the outgoing FIFO.</para>
+
+ <para>The driver may require a minimum number of buffers enqueued
+at all times to function, apart of this no limit exists on the number
+of buffers applications can enqueue in advance, or dequeue and
+process. They can also enqueue in a different order than buffers have
+been dequeued, and the driver can <emphasis>fill</emphasis> enqueued
+<emphasis>empty</emphasis> buffers in any order. <footnote>
+ <para>Random enqueue order permits applications processing
+images out of order (such as video codecs) to return buffers earlier,
+reducing the probability of data loss. Random fill order allows
+drivers to reuse buffers on a LIFO-basis, taking advantage of caches
+holding scatter-gather lists and the like.</para>
+ </footnote> The index number of a buffer (&v4l2-buffer;
+<structfield>index</structfield>) plays no role here, it only
+identifies the buffer.</para>
+
+ <para>Initially all mapped buffers are in dequeued state,
+inaccessible by the driver. For capturing applications it is customary
+to first enqueue all mapped buffers, then to start capturing and enter
+the read loop. Here the application waits until a filled buffer can be
+dequeued, and re-enqueues the buffer when the data is no longer
+needed. Output applications fill and enqueue buffers, when enough
+buffers are stacked up the output is started with
+<constant>VIDIOC_STREAMON</constant>. In the write loop, when
+the application runs out of free buffers, it must wait until an empty
+buffer can be dequeued and reused.</para>
+
+ <para>To enqueue and dequeue a buffer applications use the
+&VIDIOC-QBUF; and &VIDIOC-DQBUF; ioctl. The status of a buffer being
+mapped, enqueued, full or empty can be determined at any time using the
+&VIDIOC-QUERYBUF; ioctl. Two methods exist to suspend execution of the
+application until one or more buffers can be dequeued. By default
+<constant>VIDIOC_DQBUF</constant> blocks when no buffer is in the
+outgoing queue. When the <constant>O_NONBLOCK</constant> flag was
+given to the &func-open; function, <constant>VIDIOC_DQBUF</constant>
+returns immediately with an &EAGAIN; when no buffer is available. The
+&func-select; or &func-poll; functions are always available.</para>
+
+ <para>To start and stop capturing or output applications call the
+&VIDIOC-STREAMON; and &VIDIOC-STREAMOFF; ioctl. Note
+<constant>VIDIOC_STREAMOFF</constant> removes all buffers from both
+queues as a side effect. Since there is no notion of doing anything
+"now" on a multitasking system, if an application needs to synchronize
+with another event it should examine the &v4l2-buffer;
+<structfield>timestamp</structfield> of captured or outputted buffers.
+</para>
+
+ <para>Drivers implementing memory mapping I/O must
+support the <constant>VIDIOC_REQBUFS</constant>,
+<constant>VIDIOC_QUERYBUF</constant>,
+<constant>VIDIOC_QBUF</constant>, <constant>VIDIOC_DQBUF</constant>,
+<constant>VIDIOC_STREAMON</constant> and
+<constant>VIDIOC_STREAMOFF</constant> ioctl, the
+<function>mmap()</function>, <function>munmap()</function>,
+<function>select()</function> and <function>poll()</function>
+function.<footnote>
+ <para>At the driver level <function>select()</function> and
+<function>poll()</function> are the same, and
+<function>select()</function> is too important to be optional. The
+rest should be evident.</para>
+ </footnote></para>
+
+ <para>[capture example]</para>
+
+ </section>
+
+ <section id="userp">
+ <title>Streaming I/O (User Pointers)</title>
+
+ <para>Input and output devices support this I/O method when the
+<constant>V4L2_CAP_STREAMING</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability;
+returned by the &VIDIOC-QUERYCAP; ioctl is set. If the particular user
+pointer method (not only memory mapping) is supported must be
+determined by calling the &VIDIOC-REQBUFS; ioctl.</para>
+
+ <para>This I/O method combines advantages of the read/write and
+memory mapping methods. Buffers (planes) are allocated by the application
+itself, and can reside for example in virtual or shared memory. Only
+pointers to data are exchanged, these pointers and meta-information
+are passed in &v4l2-buffer; (or in &v4l2-plane; in the multi-planar API case).
+The driver must be switched into user pointer I/O mode by calling the
+&VIDIOC-REQBUFS; with the desired buffer type. No buffers (planes) are allocated
+beforehand, consequently they are not indexed and cannot be queried like mapped
+buffers with the <constant>VIDIOC_QUERYBUF</constant> ioctl.</para>
+
+ <example>
+ <title>Initiating streaming I/O with user pointers</title>
+
+ <programlisting>
+&v4l2-requestbuffers; reqbuf;
+
+memset (&amp;reqbuf, 0, sizeof (reqbuf));
+reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+reqbuf.memory = V4L2_MEMORY_USERPTR;
+
+if (ioctl (fd, &VIDIOC-REQBUFS;, &amp;reqbuf) == -1) {
+ if (errno == EINVAL)
+ printf ("Video capturing or user pointer streaming is not supported\n");
+ else
+ perror ("VIDIOC_REQBUFS");
+
+ exit (EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+
+ <para>Buffer (plane) addresses and sizes are passed on the fly with the
+&VIDIOC-QBUF; ioctl. Although buffers are commonly cycled,
+applications can pass different addresses and sizes at each
+<constant>VIDIOC_QBUF</constant> call. If required by the hardware the
+driver swaps memory pages within physical memory to create a
+continuous area of memory. This happens transparently to the
+application in the virtual memory subsystem of the kernel. When buffer
+pages have been swapped out to disk they are brought back and finally
+locked in physical memory for DMA.<footnote>
+ <para>We expect that frequently used buffers are typically not
+swapped out. Anyway, the process of swapping, locking or generating
+scatter-gather lists may be time consuming. The delay can be masked by
+the depth of the incoming buffer queue, and perhaps by maintaining
+caches assuming a buffer will be soon enqueued again. On the other
+hand, to optimize memory usage drivers can limit the number of buffers
+locked in advance and recycle the most recently used buffers first. Of
+course, the pages of empty buffers in the incoming queue need not be
+saved to disk. Output buffers must be saved on the incoming and
+outgoing queue because an application may share them with other
+processes.</para>
+ </footnote></para>
+
+ <para>Filled or displayed buffers are dequeued with the
+&VIDIOC-DQBUF; ioctl. The driver can unlock the memory pages at any
+time between the completion of the DMA and this ioctl. The memory is
+also unlocked when &VIDIOC-STREAMOFF; is called, &VIDIOC-REQBUFS;, or
+when the device is closed. Applications must take care not to free
+buffers without dequeuing. For once, the buffers remain locked until
+further, wasting physical memory. Second the driver will not be
+notified when the memory is returned to the application's free list
+and subsequently reused for other purposes, possibly completing the
+requested DMA and overwriting valuable data.</para>
+
+ <para>For capturing applications it is customary to enqueue a
+number of empty buffers, to start capturing and enter the read loop.
+Here the application waits until a filled buffer can be dequeued, and
+re-enqueues the buffer when the data is no longer needed. Output
+applications fill and enqueue buffers, when enough buffers are stacked
+up output is started. In the write loop, when the application
+runs out of free buffers it must wait until an empty buffer can be
+dequeued and reused. Two methods exist to suspend execution of the
+application until one or more buffers can be dequeued. By default
+<constant>VIDIOC_DQBUF</constant> blocks when no buffer is in the
+outgoing queue. When the <constant>O_NONBLOCK</constant> flag was
+given to the &func-open; function, <constant>VIDIOC_DQBUF</constant>
+returns immediately with an &EAGAIN; when no buffer is available. The
+&func-select; or &func-poll; function are always available.</para>
+
+ <para>To start and stop capturing or output applications call the
+&VIDIOC-STREAMON; and &VIDIOC-STREAMOFF; ioctl. Note
+<constant>VIDIOC_STREAMOFF</constant> removes all buffers from both
+queues and unlocks all buffers as a side effect. Since there is no
+notion of doing anything "now" on a multitasking system, if an
+application needs to synchronize with another event it should examine
+the &v4l2-buffer; <structfield>timestamp</structfield> of captured
+or outputted buffers.</para>
+
+ <para>Drivers implementing user pointer I/O must
+support the <constant>VIDIOC_REQBUFS</constant>,
+<constant>VIDIOC_QBUF</constant>, <constant>VIDIOC_DQBUF</constant>,
+<constant>VIDIOC_STREAMON</constant> and
+<constant>VIDIOC_STREAMOFF</constant> ioctl, the
+<function>select()</function> and <function>poll()</function> function.<footnote>
+ <para>At the driver level <function>select()</function> and
+<function>poll()</function> are the same, and
+<function>select()</function> is too important to be optional. The
+rest should be evident.</para>
+ </footnote></para>
+ </section>
+
+ <section id="dmabuf">
+ <title>Streaming I/O (DMA buffer importing)</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+<para>The DMABUF framework provides a generic method for sharing buffers
+between multiple devices. Device drivers that support DMABUF can export a DMA
+buffer to userspace as a file descriptor (known as the exporter role), import a
+DMA buffer from userspace using a file descriptor previously exported for a
+different or the same device (known as the importer role), or both. This
+section describes the DMABUF importer role API in V4L2.</para>
+
+ <para>Refer to <link linkend="vidioc-expbuf">DMABUF exporting</link> for
+details about exporting V4L2 buffers as DMABUF file descriptors.</para>
+
+<para>Input and output devices support the streaming I/O method when the
+<constant>V4L2_CAP_STREAMING</constant> flag in the
+<structfield>capabilities</structfield> field of &v4l2-capability; returned by
+the &VIDIOC-QUERYCAP; ioctl is set. Whether importing DMA buffers through
+DMABUF file descriptors is supported is determined by calling the
+&VIDIOC-REQBUFS; ioctl with the memory type set to
+<constant>V4L2_MEMORY_DMABUF</constant>.</para>
+
+ <para>This I/O method is dedicated to sharing DMA buffers between different
+devices, which may be V4L devices or other video-related devices (e.g. DRM).
+Buffers (planes) are allocated by a driver on behalf of an application. Next,
+these buffers are exported to the application as file descriptors using an API
+which is specific for an allocator driver. Only such file descriptor are
+exchanged. The descriptors and meta-information are passed in &v4l2-buffer; (or
+in &v4l2-plane; in the multi-planar API case). The driver must be switched
+into DMABUF I/O mode by calling the &VIDIOC-REQBUFS; with the desired buffer
+type.</para>
+
+ <example>
+ <title>Initiating streaming I/O with DMABUF file descriptors</title>
+
+ <programlisting>
+&v4l2-requestbuffers; reqbuf;
+
+memset(&amp;reqbuf, 0, sizeof (reqbuf));
+reqbuf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+reqbuf.memory = V4L2_MEMORY_DMABUF;
+reqbuf.count = 1;
+
+if (ioctl(fd, &VIDIOC-REQBUFS;, &amp;reqbuf) == -1) {
+ if (errno == EINVAL)
+ printf("Video capturing or DMABUF streaming is not supported\n");
+ else
+ perror("VIDIOC_REQBUFS");
+
+ exit(EXIT_FAILURE);
+}
+ </programlisting>
+ </example>
+
+ <para>The buffer (plane) file descriptor is passed on the fly with the
+&VIDIOC-QBUF; ioctl. In case of multiplanar buffers, every plane can be
+associated with a different DMABUF descriptor. Although buffers are commonly
+cycled, applications can pass a different DMABUF descriptor at each
+<constant>VIDIOC_QBUF</constant> call.</para>
+
+ <example>
+ <title>Queueing DMABUF using single plane API</title>
+
+ <programlisting>
+int buffer_queue(int v4lfd, int index, int dmafd)
+{
+ &v4l2-buffer; buf;
+
+ memset(&amp;buf, 0, sizeof buf);
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_DMABUF;
+ buf.index = index;
+ buf.m.fd = dmafd;
+
+ if (ioctl(v4lfd, &VIDIOC-QBUF;, &amp;buf) == -1) {
+ perror("VIDIOC_QBUF");
+ return -1;
+ }
+
+ return 0;
+}
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Queueing DMABUF using multi plane API</title>
+
+ <programlisting>
+int buffer_queue_mp(int v4lfd, int index, int dmafd[], int n_planes)
+{
+ &v4l2-buffer; buf;
+ &v4l2-plane; planes[VIDEO_MAX_PLANES];
+ int i;
+
+ memset(&amp;buf, 0, sizeof buf);
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
+ buf.memory = V4L2_MEMORY_DMABUF;
+ buf.index = index;
+ buf.m.planes = planes;
+ buf.length = n_planes;
+
+ memset(&amp;planes, 0, sizeof planes);
+
+ for (i = 0; i &lt; n_planes; ++i)
+ buf.m.planes[i].m.fd = dmafd[i];
+
+ if (ioctl(v4lfd, &VIDIOC-QBUF;, &amp;buf) == -1) {
+ perror("VIDIOC_QBUF");
+ return -1;
+ }
+
+ return 0;
+}
+ </programlisting>
+ </example>
+
+ <para>Captured or displayed buffers are dequeued with the
+&VIDIOC-DQBUF; ioctl. The driver can unlock the buffer at any
+time between the completion of the DMA and this ioctl. The memory is
+also unlocked when &VIDIOC-STREAMOFF; is called, &VIDIOC-REQBUFS;, or
+when the device is closed.</para>
+
+ <para>For capturing applications it is customary to enqueue a
+number of empty buffers, to start capturing and enter the read loop.
+Here the application waits until a filled buffer can be dequeued, and
+re-enqueues the buffer when the data is no longer needed. Output
+applications fill and enqueue buffers, when enough buffers are stacked
+up output is started. In the write loop, when the application
+runs out of free buffers it must wait until an empty buffer can be
+dequeued and reused. Two methods exist to suspend execution of the
+application until one or more buffers can be dequeued. By default
+<constant>VIDIOC_DQBUF</constant> blocks when no buffer is in the
+outgoing queue. When the <constant>O_NONBLOCK</constant> flag was
+given to the &func-open; function, <constant>VIDIOC_DQBUF</constant>
+returns immediately with an &EAGAIN; when no buffer is available. The
+&func-select; and &func-poll; functions are always available.</para>
+
+ <para>To start and stop capturing or displaying applications call the
+&VIDIOC-STREAMON; and &VIDIOC-STREAMOFF; ioctls. Note that
+<constant>VIDIOC_STREAMOFF</constant> removes all buffers from both queues and
+unlocks all buffers as a side effect. Since there is no notion of doing
+anything "now" on a multitasking system, if an application needs to synchronize
+with another event it should examine the &v4l2-buffer;
+<structfield>timestamp</structfield> of captured or outputted buffers.</para>
+
+ <para>Drivers implementing DMABUF importing I/O must support the
+<constant>VIDIOC_REQBUFS</constant>, <constant>VIDIOC_QBUF</constant>,
+<constant>VIDIOC_DQBUF</constant>, <constant>VIDIOC_STREAMON</constant> and
+<constant>VIDIOC_STREAMOFF</constant> ioctls, and the
+<function>select()</function> and <function>poll()</function> functions.</para>
+
+ </section>
+
+ <section id="async">
+ <title>Asynchronous I/O</title>
+
+ <para>This method is not defined yet.</para>
+ </section>
+
+ <section id="buffer">
+ <title>Buffers</title>
+
+ <para>A buffer contains data exchanged by application and
+driver using one of the Streaming I/O methods. In the multi-planar API, the
+data is held in planes, while the buffer structure acts as a container
+for the planes. Only pointers to buffers (planes) are exchanged, the data
+itself is not copied. These pointers, together with meta-information like
+timestamps or field parity, are stored in a struct
+<structname>v4l2_buffer</structname>, argument to
+the &VIDIOC-QUERYBUF;, &VIDIOC-QBUF; and &VIDIOC-DQBUF; ioctl.
+In the multi-planar API, some plane-specific members of struct
+<structname>v4l2_buffer</structname>, such as pointers and sizes for each
+plane, are stored in struct <structname>v4l2_plane</structname> instead.
+In that case, struct <structname>v4l2_buffer</structname> contains an array of
+plane structures.</para>
+
+ <para>Dequeued video buffers come with timestamps. The driver
+ decides at which part of the frame and with which clock the
+ timestamp is taken. Please see flags in the masks
+ <constant>V4L2_BUF_FLAG_TIMESTAMP_MASK</constant> and
+ <constant>V4L2_BUF_FLAG_TSTAMP_SRC_MASK</constant> in <xref
+ linkend="buffer-flags" />. These flags are always valid and constant
+ across all buffers during the whole video stream. Changes in these
+ flags may take place as a side effect of &VIDIOC-S-INPUT; or
+ &VIDIOC-S-OUTPUT; however. The
+ <constant>V4L2_BUF_FLAG_TIMESTAMP_COPY</constant> timestamp type
+ which is used by e.g. on mem-to-mem devices is an exception to the
+ rule: the timestamp source flags are copied from the OUTPUT video
+ buffer to the CAPTURE video buffer.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-buffer">
+ <title>struct <structname>v4l2_buffer</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry></entry>
+ <entry>Number of the buffer, set by the application except
+when calling &VIDIOC-DQBUF;, then it is set by the driver.
+This field can range from zero to the number of buffers allocated
+with the &VIDIOC-REQBUFS; ioctl (&v4l2-requestbuffers; <structfield>count</structfield>),
+plus any buffers allocated with &VIDIOC-CREATE-BUFS; minus one.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>Type of the buffer, same as &v4l2-format;
+<structfield>type</structfield> or &v4l2-requestbuffers;
+<structfield>type</structfield>, set by the application. See <xref
+linkend="v4l2-buf-type" /></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>bytesused</structfield></entry>
+ <entry></entry>
+ <entry>The number of bytes occupied by the data in the
+buffer. It depends on the negotiated data format and may change with
+each buffer for compressed variable size data like JPEG images.
+Drivers must set this field when <structfield>type</structfield>
+refers to an input stream, applications when it refers to an output stream.
+If the application sets this to 0 for an output stream, then
+<structfield>bytesused</structfield> will be set to the size of the
+buffer (see the <structfield>length</structfield> field of this struct) by
+the driver. For multiplanar formats this field is ignored and the
+<structfield>planes</structfield> pointer is used instead.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry></entry>
+ <entry>Flags set by the application or driver, see <xref
+linkend="buffer-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry></entry>
+ <entry>Indicates the field order of the image in the
+buffer, see <xref linkend="v4l2-field" />. This field is not used when
+the buffer contains VBI data. Drivers must set it when
+<structfield>type</structfield> refers to an input stream,
+applications when it refers to an output stream.</entry>
+ </row>
+ <row>
+ <entry>struct timeval</entry>
+ <entry><structfield>timestamp</structfield></entry>
+ <entry></entry>
+ <entry><para>For input streams this is time when the first data
+ byte was captured, as returned by the
+ <function>clock_gettime()</function> function for the relevant
+ clock id; see <constant>V4L2_BUF_FLAG_TIMESTAMP_*</constant> in
+ <xref linkend="buffer-flags" />. For output streams the driver
+ stores the time at which the last data byte was actually sent out
+ in the <structfield>timestamp</structfield> field. This permits
+ applications to monitor the drift between the video and system
+ clock. For output streams that use <constant>V4L2_BUF_FLAG_TIMESTAMP_COPY</constant>
+ the application has to fill in the timestamp which will be copied
+ by the driver to the capture stream.</para></entry>
+ </row>
+ <row>
+ <entry>&v4l2-timecode;</entry>
+ <entry><structfield>timecode</structfield></entry>
+ <entry></entry>
+ <entry>When <structfield>type</structfield> is
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> and the
+<constant>V4L2_BUF_FLAG_TIMECODE</constant> flag is set in
+<structfield>flags</structfield>, this structure contains a frame
+timecode. In <link linkend="v4l2-field">V4L2_FIELD_ALTERNATE</link>
+mode the top and bottom field contain the same timecode.
+Timecodes are intended to help video editing and are typically recorded on
+video tapes, but also embedded in compressed formats like MPEG. This
+field is independent of the <structfield>timestamp</structfield> and
+<structfield>sequence</structfield> fields.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>sequence</structfield></entry>
+ <entry></entry>
+ <entry>Set by the driver, counting the frames (not fields!) in
+sequence. This field is set for both input and output devices.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>In <link
+linkend="v4l2-field">V4L2_FIELD_ALTERNATE</link> mode the top and
+bottom field have the same sequence number. The count starts at zero
+and includes dropped or repeated frames. A dropped frame was received
+by an input device but could not be stored due to lack of free buffer
+space. A repeated frame was displayed again by an output device
+because the application did not pass new data in
+time.</para><para>Note this may count the frames received
+e.g. over USB, without taking into account the frames dropped by the
+remote hardware due to limited compression throughput or bus
+bandwidth. These devices identify by not enumerating any video
+standards, see <xref linkend="standard" />.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>memory</structfield></entry>
+ <entry></entry>
+ <entry>This field must be set by applications and/or drivers
+in accordance with the selected I/O method. See <xref linkend="v4l2-memory"
+ /></entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield>m</structfield></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>offset</structfield></entry>
+ <entry>For the single-planar API and when
+<structfield>memory</structfield> is <constant>V4L2_MEMORY_MMAP</constant> this
+is the offset of the buffer from the start of the device memory. The value is
+returned by the driver and apart of serving as parameter to the &func-mmap;
+function not useful for applications. See <xref linkend="mmap" /> for details
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>unsigned long</entry>
+ <entry><structfield>userptr</structfield></entry>
+ <entry>For the single-planar API and when
+<structfield>memory</structfield> is <constant>V4L2_MEMORY_USERPTR</constant>
+this is a pointer to the buffer (casted to unsigned long type) in virtual
+memory, set by the application. See <xref linkend="userp" /> for details.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct v4l2_plane</entry>
+ <entry><structfield>*planes</structfield></entry>
+ <entry>When using the multi-planar API, contains a userspace pointer
+ to an array of &v4l2-plane;. The size of the array should be put
+ in the <structfield>length</structfield> field of this
+ <structname>v4l2_buffer</structname> structure.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>int</entry>
+ <entry><structfield>fd</structfield></entry>
+ <entry>For the single-plane API and when
+<structfield>memory</structfield> is <constant>V4L2_MEMORY_DMABUF</constant> this
+is the file descriptor associated with a DMABUF buffer.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>length</structfield></entry>
+ <entry></entry>
+ <entry>Size of the buffer (not the payload) in bytes for the
+ single-planar API. This is set by the driver based on the calls to
+ &VIDIOC-REQBUFS; and/or &VIDIOC-CREATE-BUFS;. For the multi-planar API the application sets
+ this to the number of elements in the <structfield>planes</structfield>
+ array. The driver will fill in the actual number of valid elements in
+ that array.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved2</structfield></entry>
+ <entry></entry>
+ <entry>A place holder for future extensions. Applications
+should set this to 0.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield></entry>
+ <entry></entry>
+ <entry>A place holder for future extensions. Applications
+should set this to 0.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-plane">
+ <title>struct <structname>v4l2_plane</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>bytesused</structfield></entry>
+ <entry></entry>
+ <entry>The number of bytes occupied by data in the plane
+ (its payload). Drivers must set this field when <structfield>type</structfield>
+ refers to an input stream, applications when it refers to an output stream.
+ If the application sets this to 0 for an output stream, then
+ <structfield>bytesused</structfield> will be set to the size of the
+ plane (see the <structfield>length</structfield> field of this struct)
+ by the driver. Note that the actual image data starts at
+ <structfield>data_offset</structfield> which may not be 0.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>length</structfield></entry>
+ <entry></entry>
+ <entry>Size in bytes of the plane (not its payload). This is set by the driver
+ based on the calls to &VIDIOC-REQBUFS; and/or &VIDIOC-CREATE-BUFS;.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield>m</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>mem_offset</structfield></entry>
+ <entry>When the memory type in the containing &v4l2-buffer; is
+ <constant>V4L2_MEMORY_MMAP</constant>, this is the value that
+ should be passed to &func-mmap;, similar to the
+ <structfield>offset</structfield> field in &v4l2-buffer;.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>unsigned long</entry>
+ <entry><structfield>userptr</structfield></entry>
+ <entry>When the memory type in the containing &v4l2-buffer; is
+ <constant>V4L2_MEMORY_USERPTR</constant>, this is a userspace
+ pointer to the memory allocated for this plane by an application.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>int</entry>
+ <entry><structfield>fd</structfield></entry>
+ <entry>When the memory type in the containing &v4l2-buffer; is
+ <constant>V4L2_MEMORY_DMABUF</constant>, this is a file
+ descriptor associated with a DMABUF buffer, similar to the
+ <structfield>fd</structfield> field in &v4l2-buffer;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>data_offset</structfield></entry>
+ <entry></entry>
+ <entry>Offset in bytes to video data in the plane.
+ Drivers must set this field when <structfield>type</structfield>
+ refers to an input stream, applications when it refers to an output stream.
+ Note that data_offset is included in <structfield>bytesused</structfield>.
+ So the size of the image in the plane is
+ <structfield>bytesused</structfield>-<structfield>data_offset</structfield> at
+ offset <structfield>data_offset</structfield> from the start of the plane.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved[11]</structfield></entry>
+ <entry></entry>
+ <entry>Reserved for future use. Should be zeroed by an
+ application.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-buf-type">
+ <title>enum v4l2_buf_type</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant></entry>
+ <entry>1</entry>
+ <entry>Buffer of a single-planar video capture stream, see <xref
+ linkend="capture" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant>
+ </entry>
+ <entry>9</entry>
+ <entry>Buffer of a multi-planar video capture stream, see <xref
+ linkend="capture" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant></entry>
+ <entry>2</entry>
+ <entry>Buffer of a single-planar video output stream, see <xref
+ linkend="output" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant>
+ </entry>
+ <entry>10</entry>
+ <entry>Buffer of a multi-planar video output stream, see <xref
+ linkend="output" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant></entry>
+ <entry>3</entry>
+ <entry>Buffer for video overlay, see <xref linkend="overlay" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VBI_CAPTURE</constant></entry>
+ <entry>4</entry>
+ <entry>Buffer of a raw VBI capture stream, see <xref
+ linkend="raw-vbi" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VBI_OUTPUT</constant></entry>
+ <entry>5</entry>
+ <entry>Buffer of a raw VBI output stream, see <xref
+ linkend="raw-vbi" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_SLICED_VBI_CAPTURE</constant></entry>
+ <entry>6</entry>
+ <entry>Buffer of a sliced VBI capture stream, see <xref
+ linkend="sliced" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_SLICED_VBI_OUTPUT</constant></entry>
+ <entry>7</entry>
+ <entry>Buffer of a sliced VBI output stream, see <xref
+ linkend="sliced" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY</constant></entry>
+ <entry>8</entry>
+ <entry>Buffer for video output overlay (OSD), see <xref
+ linkend="osd" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_TYPE_SDR_CAPTURE</constant></entry>
+ <entry>11</entry>
+ <entry>Buffer for Software Defined Radio (SDR), see <xref
+ linkend="sdr" />.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="buffer-flags">
+ <title>Buffer Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_MAPPED</constant></entry>
+ <entry>0x00000001</entry>
+ <entry>The buffer resides in device memory and has been mapped
+into the application's address space, see <xref linkend="mmap" /> for details.
+Drivers set or clear this flag when the
+<link linkend="vidioc-querybuf">VIDIOC_QUERYBUF</link>, <link
+ linkend="vidioc-qbuf">VIDIOC_QBUF</link> or <link
+ linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called. Set by the driver.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_QUEUED</constant></entry>
+ <entry>0x00000002</entry>
+ <entry>Internally drivers maintain two buffer queues, an
+incoming and outgoing queue. When this flag is set, the buffer is
+currently on the incoming queue. It automatically moves to the
+outgoing queue after the buffer has been filled (capture devices) or
+displayed (output devices). Drivers set or clear this flag when the
+<constant>VIDIOC_QUERYBUF</constant> ioctl is called. After
+(successful) calling the <constant>VIDIOC_QBUF </constant>ioctl it is
+always set and after <constant>VIDIOC_DQBUF</constant> always
+cleared.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_DONE</constant></entry>
+ <entry>0x00000004</entry>
+ <entry>When this flag is set, the buffer is currently on
+the outgoing queue, ready to be dequeued from the driver. Drivers set
+or clear this flag when the <constant>VIDIOC_QUERYBUF</constant> ioctl
+is called. After calling the <constant>VIDIOC_QBUF</constant> or
+<constant>VIDIOC_DQBUF</constant> it is always cleared. Of course a
+buffer cannot be on both queues at the same time, the
+<constant>V4L2_BUF_FLAG_QUEUED</constant> and
+<constant>V4L2_BUF_FLAG_DONE</constant> flag are mutually exclusive.
+They can be both cleared however, then the buffer is in "dequeued"
+state, in the application domain so to say.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_ERROR</constant></entry>
+ <entry>0x00000040</entry>
+ <entry>When this flag is set, the buffer has been dequeued
+ successfully, although the data might have been corrupted.
+ This is recoverable, streaming may continue as normal and
+ the buffer may be reused normally.
+ Drivers set this flag when the <constant>VIDIOC_DQBUF</constant>
+ ioctl is called.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_KEYFRAME</constant></entry>
+ <entry>0x00000008</entry>
+ <entry>Drivers set or clear this flag when calling the
+<constant>VIDIOC_DQBUF</constant> ioctl. It may be set by video
+capture devices when the buffer contains a compressed image which is a
+key frame (or field), &ie; can be decompressed on its own. Also known as
+an I-frame. Applications can set this bit when <structfield>type</structfield>
+refers to an output stream.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_PFRAME</constant></entry>
+ <entry>0x00000010</entry>
+ <entry>Similar to <constant>V4L2_BUF_FLAG_KEYFRAME</constant>
+this flags predicted frames or fields which contain only differences to a
+previous key frame. Applications can set this bit when <structfield>type</structfield>
+refers to an output stream.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_BFRAME</constant></entry>
+ <entry>0x00000020</entry>
+ <entry>Similar to <constant>V4L2_BUF_FLAG_KEYFRAME</constant>
+this flags a bi-directional predicted frame or field which contains only
+the differences between the current frame and both the preceding and following
+key frames to specify its content. Applications can set this bit when
+<structfield>type</structfield> refers to an output stream.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TIMECODE</constant></entry>
+ <entry>0x00000100</entry>
+ <entry>The <structfield>timecode</structfield> field is valid.
+Drivers set or clear this flag when the <constant>VIDIOC_DQBUF</constant>
+ioctl is called. Applications can set this bit and the corresponding
+<structfield>timecode</structfield> structure when <structfield>type</structfield>
+refers to an output stream.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_PREPARED</constant></entry>
+ <entry>0x00000400</entry>
+ <entry>The buffer has been prepared for I/O and can be queued by the
+application. Drivers set or clear this flag when the
+<link linkend="vidioc-querybuf">VIDIOC_QUERYBUF</link>, <link
+ linkend="vidioc-qbuf">VIDIOC_PREPARE_BUF</link>, <link
+ linkend="vidioc-qbuf">VIDIOC_QBUF</link> or <link
+ linkend="vidioc-qbuf">VIDIOC_DQBUF</link> ioctl is called.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant></entry>
+ <entry>0x00000800</entry>
+ <entry>Caches do not have to be invalidated for this buffer.
+Typically applications shall use this flag if the data captured in the buffer
+is not going to be touched by the CPU, instead the buffer will, probably, be
+passed on to a DMA-capable hardware unit for further processing or output.
+</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant></entry>
+ <entry>0x00001000</entry>
+ <entry>Caches do not have to be cleaned for this buffer.
+Typically applications shall use this flag for output buffers if the data
+in this buffer has not been created by the CPU but by some DMA-capable unit,
+in which case caches have not been used.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_MASK</constant></entry>
+ <entry>0x0000e000</entry>
+ <entry>Mask for timestamp types below. To test the
+ timestamp type, mask out bits not belonging to timestamp
+ type by performing a logical and operation with buffer
+ flags and timestamp mask.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN</constant></entry>
+ <entry>0x00000000</entry>
+ <entry>Unknown timestamp type. This type is used by
+ drivers before Linux 3.9 and may be either monotonic (see
+ below) or realtime (wall clock). Monotonic clock has been
+ favoured in embedded systems whereas most of the drivers
+ use the realtime clock. Either kinds of timestamps are
+ available in user space via
+ <function>clock_gettime(2)</function> using clock IDs
+ <constant>CLOCK_MONOTONIC</constant> and
+ <constant>CLOCK_REALTIME</constant>, respectively.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC</constant></entry>
+ <entry>0x00002000</entry>
+ <entry>The buffer timestamp has been taken from the
+ <constant>CLOCK_MONOTONIC</constant> clock. To access the
+ same clock outside V4L2, use
+ <function>clock_gettime(2)</function> .</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TIMESTAMP_COPY</constant></entry>
+ <entry>0x00004000</entry>
+ <entry>The CAPTURE buffer timestamp has been taken from the
+ corresponding OUTPUT buffer. This flag applies only to mem2mem devices.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TSTAMP_SRC_MASK</constant></entry>
+ <entry>0x00070000</entry>
+ <entry>Mask for timestamp sources below. The timestamp source
+ defines the point of time the timestamp is taken in relation to
+ the frame. Logical 'and' operation between the
+ <structfield>flags</structfield> field and
+ <constant>V4L2_BUF_FLAG_TSTAMP_SRC_MASK</constant> produces the
+ value of the timestamp source. Applications must set the timestamp
+ source when <structfield>type</structfield> refers to an output stream
+ and <constant>V4L2_BUF_FLAG_TIMESTAMP_COPY</constant> is set.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TSTAMP_SRC_EOF</constant></entry>
+ <entry>0x00000000</entry>
+ <entry>End Of Frame. The buffer timestamp has been taken
+ when the last pixel of the frame has been received or the
+ last pixel of the frame has been transmitted. In practice,
+ software generated timestamps will typically be read from
+ the clock a small amount of time after the last pixel has
+ been received or transmitten, depending on the system and
+ other activity in it.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BUF_FLAG_TSTAMP_SRC_SOE</constant></entry>
+ <entry>0x00010000</entry>
+ <entry>Start Of Exposure. The buffer timestamp has been
+ taken when the exposure of the frame has begun. This is
+ only valid for the
+ <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> buffer
+ type.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-memory">
+ <title>enum v4l2_memory</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MEMORY_MMAP</constant></entry>
+ <entry>1</entry>
+ <entry>The buffer is used for <link linkend="mmap">memory
+mapping</link> I/O.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MEMORY_USERPTR</constant></entry>
+ <entry>2</entry>
+ <entry>The buffer is used for <link linkend="userp">user
+pointer</link> I/O.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MEMORY_OVERLAY</constant></entry>
+ <entry>3</entry>
+ <entry>[to do]</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_MEMORY_DMABUF</constant></entry>
+ <entry>4</entry>
+ <entry>The buffer is used for <link linkend="dmabuf">DMA shared
+buffer</link> I/O.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <section>
+ <title>Timecodes</title>
+
+ <para>The <structname>v4l2_timecode</structname> structure is
+designed to hold a <xref linkend="smpte12m" /> or similar timecode.
+(struct <structname>timeval</structname> timestamps are stored in
+&v4l2-buffer; field <structfield>timestamp</structfield>.)</para>
+
+ <table frame="none" pgwide="1" id="v4l2-timecode">
+ <title>struct <structname>v4l2_timecode</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Frame rate the timecodes are based on, see <xref
+ linkend="timecode-type" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Timecode flags, see <xref linkend="timecode-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>frames</structfield></entry>
+ <entry>Frame count, 0 ... 23/24/29/49/59, depending on the
+ type of timecode.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>seconds</structfield></entry>
+ <entry>Seconds count, 0 ... 59. This is a binary, not BCD number.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>minutes</structfield></entry>
+ <entry>Minutes count, 0 ... 59. This is a binary, not BCD number.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>hours</structfield></entry>
+ <entry>Hours count, 0 ... 29. This is a binary, not BCD number.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>userbits</structfield>[4]</entry>
+ <entry>The "user group" bits from the timecode.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="timecode-type">
+ <title>Timecode Types</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TC_TYPE_24FPS</constant></entry>
+ <entry>1</entry>
+ <entry>24 frames per second, i.&nbsp;e. film.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_TYPE_25FPS</constant></entry>
+ <entry>2</entry>
+ <entry>25 frames per second, &ie; PAL or SECAM video.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_TYPE_30FPS</constant></entry>
+ <entry>3</entry>
+ <entry>30 frames per second, &ie; NTSC video.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_TYPE_50FPS</constant></entry>
+ <entry>4</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_TYPE_60FPS</constant></entry>
+ <entry>5</entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="timecode-flags">
+ <title>Timecode Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TC_FLAG_DROPFRAME</constant></entry>
+ <entry>0x0001</entry>
+ <entry>Indicates "drop frame" semantics for counting frames
+in 29.97 fps material. When set, frame numbers 0 and 1 at the start of
+each minute, except minutes 0, 10, 20, 30, 40, 50 are omitted from the
+count.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_FLAG_COLORFRAME</constant></entry>
+ <entry>0x0002</entry>
+ <entry>The "color frame" flag.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_USERBITS_field</constant></entry>
+ <entry>0x000C</entry>
+ <entry>Field mask for the "binary group flags".</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_USERBITS_USERDEFINED</constant></entry>
+ <entry>0x0000</entry>
+ <entry>Unspecified format.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TC_USERBITS_8BITCHARS</constant></entry>
+ <entry>0x0008</entry>
+ <entry>8-bit ISO characters.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+ </section>
+
+ <section id="field-order">
+ <title>Field Order</title>
+
+ <para>We have to distinguish between progressive and interlaced
+video. Progressive video transmits all lines of a video image
+sequentially. Interlaced video divides an image into two fields,
+containing only the odd and even lines of the image, respectively.
+Alternating the so called odd and even field are transmitted, and due
+to a small delay between fields a cathode ray TV displays the lines
+interleaved, yielding the original frame. This curious technique was
+invented because at refresh rates similar to film the image would
+fade out too quickly. Transmitting fields reduces the flicker without
+the necessity of doubling the frame rate and with it the bandwidth
+required for each channel.</para>
+
+ <para>It is important to understand a video camera does not expose
+one frame at a time, merely transmitting the frames separated into
+fields. The fields are in fact captured at two different instances in
+time. An object on screen may well move between one field and the
+next. For applications analysing motion it is of paramount importance
+to recognize which field of a frame is older, the <emphasis>temporal
+order</emphasis>.</para>
+
+ <para>When the driver provides or accepts images field by field
+rather than interleaved, it is also important applications understand
+how the fields combine to frames. We distinguish between top (aka odd) and
+bottom (aka even) fields, the <emphasis>spatial order</emphasis>: The first line
+of the top field is the first line of an interlaced frame, the first
+line of the bottom field is the second line of that frame.</para>
+
+ <para>However because fields were captured one after the other,
+arguing whether a frame commences with the top or bottom field is
+pointless. Any two successive top and bottom, or bottom and top fields
+yield a valid frame. Only when the source was progressive to begin
+with, &eg; when transferring film to video, two fields may come from
+the same frame, creating a natural order.</para>
+
+ <para>Counter to intuition the top field is not necessarily the
+older field. Whether the older field contains the top or bottom lines
+is a convention determined by the video standard. Hence the
+distinction between temporal and spatial order of fields. The diagrams
+below should make this clearer.</para>
+
+ <para>All video capture and output devices must report the current
+field order. Some drivers may permit the selection of a different
+order, to this end applications initialize the
+<structfield>field</structfield> field of &v4l2-pix-format; before
+calling the &VIDIOC-S-FMT; ioctl. If this is not desired it should
+have the value <constant>V4L2_FIELD_ANY</constant> (0).</para>
+
+ <table frame="none" pgwide="1" id="v4l2-field">
+ <title>enum v4l2_field</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FIELD_ANY</constant></entry>
+ <entry>0</entry>
+ <entry>Applications request this field order when any
+one of the <constant>V4L2_FIELD_NONE</constant>,
+<constant>V4L2_FIELD_TOP</constant>,
+<constant>V4L2_FIELD_BOTTOM</constant>, or
+<constant>V4L2_FIELD_INTERLACED</constant> formats is acceptable.
+Drivers choose depending on hardware capabilities or e.&nbsp;g. the
+requested image size, and return the actual field order. Drivers must
+never return <constant>V4L2_FIELD_ANY</constant>. If multiple
+field orders are possible the driver must choose one of the possible
+field orders during &VIDIOC-S-FMT; or &VIDIOC-TRY-FMT;. &v4l2-buffer;
+<structfield>field</structfield> can never be
+<constant>V4L2_FIELD_ANY</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_NONE</constant></entry>
+ <entry>1</entry>
+ <entry>Images are in progressive format, not interlaced.
+The driver may also indicate this order when it cannot distinguish
+between <constant>V4L2_FIELD_TOP</constant> and
+<constant>V4L2_FIELD_BOTTOM</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_TOP</constant></entry>
+ <entry>2</entry>
+ <entry>Images consist of the top (aka odd) field only.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_BOTTOM</constant></entry>
+ <entry>3</entry>
+ <entry>Images consist of the bottom (aka even) field only.
+Applications may wish to prevent a device from capturing interlaced
+images because they will have "comb" or "feathering" artefacts around
+moving objects.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_INTERLACED</constant></entry>
+ <entry>4</entry>
+ <entry>Images contain both fields, interleaved line by
+line. The temporal order of the fields (whether the top or bottom
+field is first transmitted) depends on the current video standard.
+M/NTSC transmits the bottom field first, all other standards the top
+field first.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_SEQ_TB</constant></entry>
+ <entry>5</entry>
+ <entry>Images contain both fields, the top field lines
+are stored first in memory, immediately followed by the bottom field
+lines. Fields are always stored in temporal order, the older one first
+in memory. Image sizes refer to the frame, not fields.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_SEQ_BT</constant></entry>
+ <entry>6</entry>
+ <entry>Images contain both fields, the bottom field
+lines are stored first in memory, immediately followed by the top
+field lines. Fields are always stored in temporal order, the older one
+first in memory. Image sizes refer to the frame, not fields.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_ALTERNATE</constant></entry>
+ <entry>7</entry>
+ <entry>The two fields of a frame are passed in separate
+buffers, in temporal order, &ie; the older one first. To indicate the field
+parity (whether the current field is a top or bottom field) the driver
+or application, depending on data direction, must set &v4l2-buffer;
+<structfield>field</structfield> to
+<constant>V4L2_FIELD_TOP</constant> or
+<constant>V4L2_FIELD_BOTTOM</constant>. Any two successive fields pair
+to build a frame. If fields are successive, without any dropped fields
+between them (fields can drop individually), can be determined from
+the &v4l2-buffer; <structfield>sequence</structfield> field. This format
+cannot be selected when using the read/write I/O method since there
+is no way to communicate if a field was a top or bottom field.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_INTERLACED_TB</constant></entry>
+ <entry>8</entry>
+ <entry>Images contain both fields, interleaved line by
+line, top field first. The top field is transmitted first.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FIELD_INTERLACED_BT</constant></entry>
+ <entry>9</entry>
+ <entry>Images contain both fields, interleaved line by
+line, top field first. The bottom field is transmitted first.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <figure id="fieldseq-tb">
+ <title>Field Order, Top Field First Transmitted</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="fieldseq_tb.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="fieldseq_tb.gif" format="GIF" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+
+ <figure id="fieldseq-bt">
+ <title>Field Order, Bottom Field First Transmitted</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="fieldseq_bt.pdf" format="PS" />
+ </imageobject>
+ <imageobject>
+ <imagedata fileref="fieldseq_bt.gif" format="GIF" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/keytable.c.xml b/Documentation/DocBook/media/v4l/keytable.c.xml
new file mode 100644
index 000000000..d53254a3b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/keytable.c.xml
@@ -0,0 +1,172 @@
+<programlisting>
+/* keytable.c - This program allows checking/replacing keys at IR
+
+ Copyright (C) 2006-2009 Mauro Carvalho Chehab &lt;mchehab@infradead.org&gt;
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ */
+
+#include &lt;ctype.h&gt;
+#include &lt;errno.h&gt;
+#include &lt;fcntl.h&gt;
+#include &lt;stdio.h&gt;
+#include &lt;stdlib.h&gt;
+#include &lt;string.h&gt;
+#include &lt;linux/input.h&gt;
+#include &lt;sys/ioctl.h&gt;
+
+#include "parse.h"
+
+void prtcode (int *codes)
+{
+ struct parse_key *p;
+
+ for (p=keynames;p-&gt;name!=NULL;p++) {
+ if (p-&gt;value == (unsigned)codes[1]) {
+ printf("scancode 0x%04x = %s (0x%02x)\n", codes[0], p-&gt;name, codes[1]);
+ return;
+ }
+ }
+
+ if (isprint (codes[1]))
+ printf("scancode %d = '%c' (0x%02x)\n", codes[0], codes[1], codes[1]);
+ else
+ printf("scancode %d = 0x%02x\n", codes[0], codes[1]);
+}
+
+int parse_code(char *string)
+{
+ struct parse_key *p;
+
+ for (p=keynames;p-&gt;name!=NULL;p++) {
+ if (!strcasecmp(p-&gt;name, string)) {
+ return p-&gt;value;
+ }
+ }
+ return -1;
+}
+
+int main (int argc, char *argv[])
+{
+ int fd;
+ unsigned int i, j;
+ int codes[2];
+
+ if (argc&lt;2 || argc&gt;4) {
+ printf ("usage: %s &lt;device&gt; to get table; or\n"
+ " %s &lt;device&gt; &lt;scancode&gt; &lt;keycode&gt;\n"
+ " %s &lt;device&gt; &lt;keycode_file&gt;\n",*argv,*argv,*argv);
+ return -1;
+ }
+
+ if ((fd = open(argv[1], O_RDONLY)) &lt; 0) {
+ perror("Couldn't open input device");
+ return(-1);
+ }
+
+ if (argc==4) {
+ int value;
+
+ value=parse_code(argv[3]);
+
+ if (value==-1) {
+ value = strtol(argv[3], NULL, 0);
+ if (errno)
+ perror("value");
+ }
+
+ codes [0] = (unsigned) strtol(argv[2], NULL, 0);
+ codes [1] = (unsigned) value;
+
+ if(ioctl(fd, EVIOCSKEYCODE, codes))
+ perror ("EVIOCSKEYCODE");
+
+ if(ioctl(fd, EVIOCGKEYCODE, codes)==0)
+ prtcode(codes);
+ return 0;
+ }
+
+ if (argc==3) {
+ FILE *fin;
+ int value;
+ char *scancode, *keycode, s[2048];
+
+ fin=fopen(argv[2],"r");
+ if (fin==NULL) {
+ perror ("opening keycode file");
+ return -1;
+ }
+
+ /* Clears old table */
+ for (j = 0; j &lt; 256; j++) {
+ for (i = 0; i &lt; 256; i++) {
+ codes[0] = (j &lt;&lt; 8) | i;
+ codes[1] = KEY_RESERVED;
+ ioctl(fd, EVIOCSKEYCODE, codes);
+ }
+ }
+
+ while (fgets(s,sizeof(s),fin)) {
+ scancode=strtok(s,"\n\t =:");
+ if (!scancode) {
+ perror ("parsing input file scancode");
+ return -1;
+ }
+ if (!strcasecmp(scancode, "scancode")) {
+ scancode = strtok(NULL,"\n\t =:");
+ if (!scancode) {
+ perror ("parsing input file scancode");
+ return -1;
+ }
+ }
+
+ keycode=strtok(NULL,"\n\t =:(");
+ if (!keycode) {
+ perror ("parsing input file keycode");
+ return -1;
+ }
+
+ // printf ("parsing %s=%s:", scancode, keycode);
+ value=parse_code(keycode);
+ // printf ("\tvalue=%d\n",value);
+
+ if (value==-1) {
+ value = strtol(keycode, NULL, 0);
+ if (errno)
+ perror("value");
+ }
+
+ codes [0] = (unsigned) strtol(scancode, NULL, 0);
+ codes [1] = (unsigned) value;
+
+ // printf("\t%04x=%04x\n",codes[0], codes[1]);
+ if(ioctl(fd, EVIOCSKEYCODE, codes)) {
+ fprintf(stderr, "Setting scancode 0x%04x with 0x%04x via ",codes[0], codes[1]);
+ perror ("EVIOCSKEYCODE");
+ }
+
+ if(ioctl(fd, EVIOCGKEYCODE, codes)==0)
+ prtcode(codes);
+ }
+ return 0;
+ }
+
+ /* Get scancode table */
+ for (j = 0; j &lt; 256; j++) {
+ for (i = 0; i &lt; 256; i++) {
+ codes[0] = (j &lt;&lt; 8) | i;
+ if (!ioctl(fd, EVIOCGKEYCODE, codes) &amp;&amp; codes[1] != KEY_RESERVED)
+ prtcode(codes);
+ }
+ }
+ return 0;
+}
+
+</programlisting>
diff --git a/Documentation/DocBook/media/v4l/libv4l.xml b/Documentation/DocBook/media/v4l/libv4l.xml
new file mode 100644
index 000000000..d3b71e200
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/libv4l.xml
@@ -0,0 +1,160 @@
+<title>Libv4l Userspace Library</title>
+<section id="libv4l-introduction">
+ <title>Introduction</title>
+
+ <para>libv4l is a collection of libraries which adds a thin abstraction
+layer on top of video4linux2 devices. The purpose of this (thin) layer
+is to make it easy for application writers to support a wide variety of
+devices without having to write separate code for different devices in the
+same class.</para>
+<para>An example of using libv4l is provided by
+<link linkend='v4l2grab-example'>v4l2grab</link>.
+</para>
+
+ <para>libv4l consists of 3 different libraries:</para>
+ <section>
+ <title>libv4lconvert</title>
+
+ <para>libv4lconvert is a library that converts several
+different pixelformats found in V4L2 drivers into a few common RGB and
+YUY formats.</para>
+ <para>It currently accepts the following V4L2 driver formats:
+<link linkend="V4L2-PIX-FMT-BGR24"><constant>V4L2_PIX_FMT_BGR24</constant></link>,
+<link linkend="V4L2-PIX-FMT-HM12"><constant>V4L2_PIX_FMT_HM12</constant></link>,
+<link linkend="V4L2-PIX-FMT-JPEG"><constant>V4L2_PIX_FMT_JPEG</constant></link>,
+<link linkend="V4L2-PIX-FMT-MJPEG"><constant>V4L2_PIX_FMT_MJPEG</constant></link>,
+<link linkend="V4L2-PIX-FMT-MR97310A"><constant>V4L2_PIX_FMT_MR97310A</constant></link>,
+<link linkend="V4L2-PIX-FMT-OV511"><constant>V4L2_PIX_FMT_OV511</constant></link>,
+<link linkend="V4L2-PIX-FMT-OV518"><constant>V4L2_PIX_FMT_OV518</constant></link>,
+<link linkend="V4L2-PIX-FMT-PAC207"><constant>V4L2_PIX_FMT_PAC207</constant></link>,
+<link linkend="V4L2-PIX-FMT-PJPG"><constant>V4L2_PIX_FMT_PJPG</constant></link>,
+<link linkend="V4L2-PIX-FMT-RGB24"><constant>V4L2_PIX_FMT_RGB24</constant></link>,
+<link linkend="V4L2-PIX-FMT-SBGGR8"><constant>V4L2_PIX_FMT_SBGGR8</constant></link>,
+<link linkend="V4L2-PIX-FMT-SGBRG8"><constant>V4L2_PIX_FMT_SGBRG8</constant></link>,
+<link linkend="V4L2-PIX-FMT-SGRBG8"><constant>V4L2_PIX_FMT_SGRBG8</constant></link>,
+<link linkend="V4L2-PIX-FMT-SN9C10X"><constant>V4L2_PIX_FMT_SN9C10X</constant></link>,
+<link linkend="V4L2-PIX-FMT-SN9C20X-I420"><constant>V4L2_PIX_FMT_SN9C20X_I420</constant></link>,
+<link linkend="V4L2-PIX-FMT-SPCA501"><constant>V4L2_PIX_FMT_SPCA501</constant></link>,
+<link linkend="V4L2-PIX-FMT-SPCA505"><constant>V4L2_PIX_FMT_SPCA505</constant></link>,
+<link linkend="V4L2-PIX-FMT-SPCA508"><constant>V4L2_PIX_FMT_SPCA508</constant></link>,
+<link linkend="V4L2-PIX-FMT-SPCA561"><constant>V4L2_PIX_FMT_SPCA561</constant></link>,
+<link linkend="V4L2-PIX-FMT-SQ905C"><constant>V4L2_PIX_FMT_SQ905C</constant></link>,
+<constant>V4L2_PIX_FMT_SRGGB8</constant>,
+<link linkend="V4L2-PIX-FMT-UYVY"><constant>V4L2_PIX_FMT_UYVY</constant></link>,
+<link linkend="V4L2-PIX-FMT-YUV420"><constant>V4L2_PIX_FMT_YUV420</constant></link>,
+<link linkend="V4L2-PIX-FMT-YUYV"><constant>V4L2_PIX_FMT_YUYV</constant></link>,
+<link linkend="V4L2-PIX-FMT-YVU420"><constant>V4L2_PIX_FMT_YVU420</constant></link>,
+and <link linkend="V4L2-PIX-FMT-YVYU"><constant>V4L2_PIX_FMT_YVYU</constant></link>.
+</para>
+ <para>Later on libv4lconvert was expanded to also be able to do
+various video processing functions to improve webcam video quality.
+The video processing is split in to 2 parts: libv4lconvert/control and
+libv4lconvert/processing.</para>
+
+ <para>The control part is used to offer video controls which can
+be used to control the video processing functions made available by
+ libv4lconvert/processing. These controls are stored application wide
+(until reboot) by using a persistent shared memory object.</para>
+
+ <para>libv4lconvert/processing offers the actual video
+processing functionality.</para>
+ </section>
+ <section>
+ <title>libv4l1</title>
+ <para>This library offers functions that can be used to quickly
+make v4l1 applications work with v4l2 devices. These functions work exactly
+like the normal open/close/etc, except that libv4l1 does full emulation of
+the v4l1 api on top of v4l2 drivers, in case of v4l1 drivers it
+will just pass calls through.</para>
+ <para>Since those functions are emulations of the old V4L1 API,
+it shouldn't be used for new applications.</para>
+ </section>
+ <section>
+ <title>libv4l2</title>
+ <para>This library should be used for all modern V4L2
+applications.</para>
+ <para>It provides handles to call V4L2 open/ioctl/close/poll
+methods. Instead of just providing the raw output of the device, it enhances
+the calls in the sense that it will use libv4lconvert to provide more video
+formats and to enhance the image quality.</para>
+ <para>In most cases, libv4l2 just passes the calls directly
+through to the v4l2 driver, intercepting the calls to
+<link linkend='vidioc-g-fmt'><constant>VIDIOC_TRY_FMT</constant></link>,
+<link linkend='vidioc-g-fmt'><constant>VIDIOC_G_FMT</constant></link>
+<link linkend='vidioc-g-fmt'><constant>VIDIOC_S_FMT</constant></link>
+<link linkend='vidioc-enum-framesizes'><constant>VIDIOC_ENUM_FRAMESIZES</constant></link>
+and <link linkend='vidioc-enum-frameintervals'><constant>VIDIOC_ENUM_FRAMEINTERVALS</constant></link>
+in order to emulate the formats
+<link linkend="V4L2-PIX-FMT-BGR24"><constant>V4L2_PIX_FMT_BGR24</constant></link>,
+<link linkend="V4L2-PIX-FMT-RGB24"><constant>V4L2_PIX_FMT_RGB24</constant></link>,
+<link linkend="V4L2-PIX-FMT-YUV420"><constant>V4L2_PIX_FMT_YUV420</constant></link>,
+and <link linkend="V4L2-PIX-FMT-YVU420"><constant>V4L2_PIX_FMT_YVU420</constant></link>,
+if they aren't available in the driver.
+<link linkend='vidioc-enum-fmt'><constant>VIDIOC_ENUM_FMT</constant></link>
+keeps enumerating the hardware supported formats, plus the emulated formats
+offered by libv4l at the end.
+</para>
+ <section id="libv4l-ops">
+ <title>Libv4l device control functions</title>
+ <para>The common file operation methods are provided by
+libv4l.</para>
+ <para>Those functions operate just like glibc
+open/close/dup/ioctl/read/mmap/munmap:</para>
+<itemizedlist><listitem>
+ <para>int v4l2_open(const char *file, int oflag,
+...) -
+operates like the standard <link linkend='func-open'>open()</link> function.
+</para></listitem><listitem>
+ <para>int v4l2_close(int fd) -
+operates like the standard <link linkend='func-close'>close()</link> function.
+</para></listitem><listitem>
+ <para>int v4l2_dup(int fd) -
+operates like the standard dup() function, duplicating a file handler.
+</para></listitem><listitem>
+ <para>int v4l2_ioctl (int fd, unsigned long int request, ...) -
+operates like the standard <link linkend='func-ioctl'>ioctl()</link> function.
+</para></listitem><listitem>
+ <para>int v4l2_read (int fd, void* buffer, size_t n) -
+operates like the standard <link linkend='func-read'>read()</link> function.
+</para></listitem><listitem>
+ <para>void v4l2_mmap(void *start, size_t length, int prot, int flags, int fd, int64_t offset); -
+operates like the standard <link linkend='func-mmap'>mmap()</link> function.
+</para></listitem><listitem>
+ <para>int v4l2_munmap(void *_start, size_t length); -
+operates like the standard <link linkend='func-munmap'>munmap()</link> function.
+</para></listitem>
+</itemizedlist>
+ <para>Those functions provide additional control:</para>
+<itemizedlist><listitem>
+ <para>int v4l2_fd_open(int fd, int v4l2_flags) -
+opens an already opened fd for further use through v4l2lib and possibly
+modify libv4l2's default behavior through the v4l2_flags argument.
+Currently, v4l2_flags can be <constant>V4L2_DISABLE_CONVERSION</constant>,
+to disable format conversion.
+</para></listitem><listitem>
+ <para>int v4l2_set_control(int fd, int cid, int value) -
+This function takes a value of 0 - 65535, and then scales that range to
+the actual range of the given v4l control id, and then if the cid exists
+and is not locked sets the cid to the scaled value.
+</para></listitem><listitem>
+ <para>int v4l2_get_control(int fd, int cid) -
+This function returns a value of 0 - 65535, scaled to from the actual range
+of the given v4l control id. when the cid does not exist, could not be
+accessed for some reason, or some error occurred 0 is returned.
+</para></listitem>
+</itemizedlist>
+ </section>
+ </section>
+ <section>
+
+ <title>v4l1compat.so wrapper library</title>
+
+ <para>This library intercepts calls to
+open/close/ioctl/mmap/mmunmap operations and redirects them to the libv4l
+counterparts, by using LD_PRELOAD=/usr/lib/v4l1compat.so. It also
+emulates V4L1 calls via V4L2 API.</para>
+ <para>It allows usage of binary legacy applications that
+still don't use libv4l.</para>
+ </section>
+
+</section>
diff --git a/Documentation/DocBook/media/v4l/lirc_device_interface.xml b/Documentation/DocBook/media/v4l/lirc_device_interface.xml
new file mode 100644
index 000000000..34cada2ca
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/lirc_device_interface.xml
@@ -0,0 +1,255 @@
+<section id="lirc_dev">
+<title>LIRC Device Interface</title>
+
+
+<section id="lirc_dev_intro">
+<title>Introduction</title>
+
+<para>The LIRC device interface is a bi-directional interface for
+transporting raw IR data between userspace and kernelspace. Fundamentally,
+it is just a chardev (/dev/lircX, for X = 0, 1, 2, ...), with a number
+of standard struct file_operations defined on it. With respect to
+transporting raw IR data to and fro, the essential fops are read, write
+and ioctl.</para>
+
+<para>Example dmesg output upon a driver registering w/LIRC:</para>
+ <blockquote>
+ <para>$ dmesg |grep lirc_dev</para>
+ <para>lirc_dev: IR Remote Control driver registered, major 248</para>
+ <para>rc rc0: lirc_dev: driver ir-lirc-codec (mceusb) registered at minor = 0</para>
+ </blockquote>
+
+<para>What you should see for a chardev:</para>
+ <blockquote>
+ <para>$ ls -l /dev/lirc*</para>
+ <para>crw-rw---- 1 root root 248, 0 Jul 2 22:20 /dev/lirc0</para>
+ </blockquote>
+</section>
+
+<section id="lirc_read">
+<title>LIRC read fop</title>
+
+<para>The lircd userspace daemon reads raw IR data from the LIRC chardev. The
+exact format of the data depends on what modes a driver supports, and what
+mode has been selected. lircd obtains supported modes and sets the active mode
+via the ioctl interface, detailed at <xref linkend="lirc_ioctl"/>. The generally
+preferred mode is LIRC_MODE_MODE2, in which packets containing an int value
+describing an IR signal are read from the chardev.</para>
+
+<para>See also <ulink url="http://www.lirc.org/html/technical.html">http://www.lirc.org/html/technical.html</ulink> for more info.</para>
+</section>
+
+<section id="lirc_write">
+<title>LIRC write fop</title>
+
+<para>The data written to the chardev is a pulse/space sequence of integer
+values. Pulses and spaces are only marked implicitly by their position. The
+data must start and end with a pulse, therefore, the data must always include
+an uneven number of samples. The write function must block until the data has
+been transmitted by the hardware. If more data is provided than the hardware
+can send, the driver returns EINVAL.</para>
+
+</section>
+
+<section id="lirc_ioctl">
+<title>LIRC ioctl fop</title>
+
+<para>The LIRC device's ioctl definition is bound by the ioctl function
+definition of struct file_operations, leaving us with an unsigned int
+for the ioctl command and an unsigned long for the arg. For the purposes
+of ioctl portability across 32-bit and 64-bit, these values are capped
+to their 32-bit sizes.</para>
+
+<para>The following ioctls can be used to change specific hardware settings.
+In general each driver should have a default set of settings. The driver
+implementation is expected to re-apply the default settings when the device
+is closed by user-space, so that every application opening the device can rely
+on working with the default settings initially.</para>
+
+<variablelist>
+ <varlistentry>
+ <term>LIRC_GET_FEATURES</term>
+ <listitem>
+ <para>Obviously, get the underlying hardware device's features. If a driver
+ does not announce support of certain features, calling of the corresponding
+ ioctls is undefined.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_SEND_MODE</term>
+ <listitem>
+ <para>Get supported transmit mode. Only LIRC_MODE_PULSE is supported by lircd.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_REC_MODE</term>
+ <listitem>
+ <para>Get supported receive modes. Only LIRC_MODE_MODE2 and LIRC_MODE_LIRCCODE
+ are supported by lircd.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_SEND_CARRIER</term>
+ <listitem>
+ <para>Get carrier frequency (in Hz) currently used for transmit.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_REC_CARRIER</term>
+ <listitem>
+ <para>Get carrier frequency (in Hz) currently used for IR reception.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_{G,S}ET_{SEND,REC}_DUTY_CYCLE</term>
+ <listitem>
+ <para>Get/set the duty cycle (from 0 to 100) of the carrier signal. Currently,
+ no special meaning is defined for 0 or 100, but this could be used to switch
+ off carrier generation in the future, so these values should be reserved.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_REC_RESOLUTION</term>
+ <listitem>
+ <para>Some receiver have maximum resolution which is defined by internal
+ sample rate or data format limitations. E.g. it's common that signals can
+ only be reported in 50 microsecond steps. This integer value is used by
+ lircd to automatically adjust the aeps tolerance value in the lircd
+ config file.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_M{IN,AX}_TIMEOUT</term>
+ <listitem>
+ <para>Some devices have internal timers that can be used to detect when
+ there's no IR activity for a long time. This can help lircd in detecting
+ that a IR signal is finished and can speed up the decoding process.
+ Returns an integer value with the minimum/maximum timeout that can be
+ set. Some devices have a fixed timeout, in that case both ioctls will
+ return the same value even though the timeout cannot be changed.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_M{IN,AX}_FILTER_{PULSE,SPACE}</term>
+ <listitem>
+ <para>Some devices are able to filter out spikes in the incoming signal
+ using given filter rules. These ioctls return the hardware capabilities
+ that describe the bounds of the possible filters. Filter settings depend
+ on the IR protocols that are expected. lircd derives the settings from
+ all protocols definitions found in its config file.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_GET_LENGTH</term>
+ <listitem>
+ <para>Retrieves the code length in bits (only for LIRC_MODE_LIRCCODE).
+ Reads on the device must be done in blocks matching the bit count.
+ The bit could should be rounded up so that it matches full bytes.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_{SEND,REC}_MODE</term>
+ <listitem>
+ <para>Set send/receive mode. Largely obsolete for send, as only
+ LIRC_MODE_PULSE is supported.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_{SEND,REC}_CARRIER</term>
+ <listitem>
+ <para>Set send/receive carrier (in Hz).</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_TRANSMITTER_MASK</term>
+ <listitem>
+ <para>This enables the given set of transmitters. The first transmitter
+ is encoded by the least significant bit, etc. When an invalid bit mask
+ is given, i.e. a bit is set, even though the device does not have so many
+ transitters, then this ioctl returns the number of available transitters
+ and does nothing otherwise.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_REC_TIMEOUT</term>
+ <listitem>
+ <para>Sets the integer value for IR inactivity timeout (cf.
+ LIRC_GET_MIN_TIMEOUT and LIRC_GET_MAX_TIMEOUT). A value of 0 (if
+ supported by the hardware) disables all hardware timeouts and data should
+ be reported as soon as possible. If the exact value cannot be set, then
+ the next possible value _greater_ than the given value should be set.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_REC_TIMEOUT_REPORTS</term>
+ <listitem>
+ <para>Enable (1) or disable (0) timeout reports in LIRC_MODE_MODE2. By
+ default, timeout reports should be turned off.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_REC_FILTER_{,PULSE,SPACE}</term>
+ <listitem>
+ <para>Pulses/spaces shorter than this are filtered out by hardware. If
+ filters cannot be set independently for pulse/space, the corresponding
+ ioctls must return an error and LIRC_SET_REC_FILTER shall be used instead.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_MEASURE_CARRIER_MODE</term>
+ <listitem>
+ <para>Enable (1)/disable (0) measure mode. If enabled, from the next key
+ press on, the driver will send LIRC_MODE2_FREQUENCY packets. By default
+ this should be turned off.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_REC_{DUTY_CYCLE,CARRIER}_RANGE</term>
+ <listitem>
+ <para>To set a range use LIRC_SET_REC_DUTY_CYCLE_RANGE/LIRC_SET_REC_CARRIER_RANGE
+ with the lower bound first and later LIRC_SET_REC_DUTY_CYCLE/LIRC_SET_REC_CARRIER
+ with the upper bound.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_NOTIFY_DECODE</term>
+ <listitem>
+ <para>This ioctl is called by lircd whenever a successful decoding of an
+ incoming IR signal could be done. This can be used by supporting hardware
+ to give visual feedback to the user e.g. by flashing a LED.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SETUP_{START,END}</term>
+ <listitem>
+ <para>Setting of several driver parameters can be optimized by encapsulating
+ the according ioctl calls with LIRC_SETUP_START/LIRC_SETUP_END. When a
+ driver receives a LIRC_SETUP_START ioctl it can choose to not commit
+ further setting changes to the hardware until a LIRC_SETUP_END is received.
+ But this is open to the driver implementation and every driver must also
+ handle parameter changes which are not encapsulated by LIRC_SETUP_START
+ and LIRC_SETUP_END. Drivers can also choose to ignore these ioctls.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>LIRC_SET_WIDEBAND_RECEIVER</term>
+ <listitem>
+ <para>Some receivers are equipped with special wide band receiver which is intended
+ to be used to learn output of existing remote.
+ Calling that ioctl with (1) will enable it, and with (0) disable it.
+ This might be useful of receivers that have otherwise narrow band receiver
+ that prevents them to be used with some remotes.
+ Wide band receiver might also be more precise
+ On the other hand its disadvantage it usually reduced range of reception.
+ Note: wide band receiver might be implictly enabled if you enable
+ carrier reports. In that case it will be disabled as soon as you disable
+ carrier reports. Trying to disable wide band receiver while carrier
+ reports are active will do nothing.</para>
+ </listitem>
+ </varlistentry>
+</variablelist>
+<section id="lirc_dev_errors">
+ &return-value;
+</section>
+</section>
+</section>
diff --git a/Documentation/DocBook/media/v4l/media-controller.xml b/Documentation/DocBook/media/v4l/media-controller.xml
new file mode 100644
index 000000000..873ac3a62
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-controller.xml
@@ -0,0 +1,89 @@
+<partinfo>
+ <authorgroup>
+ <author>
+ <firstname>Laurent</firstname>
+ <surname>Pinchart</surname>
+ <affiliation><address><email>laurent.pinchart@ideasonboard.com</email></address></affiliation>
+ <contrib>Initial version.</contrib>
+ </author>
+ </authorgroup>
+ <copyright>
+ <year>2010</year>
+ <holder>Laurent Pinchart</holder>
+ </copyright>
+
+ <revhistory>
+ <!-- Put document revisions here, newest first. -->
+ <revision>
+ <revnumber>1.0.0</revnumber>
+ <date>2010-11-10</date>
+ <authorinitials>lp</authorinitials>
+ <revremark>Initial revision</revremark>
+ </revision>
+ </revhistory>
+</partinfo>
+
+<title>Media Controller API</title>
+
+<chapter id="media_controller">
+ <title>Media Controller</title>
+
+ <section id="media-controller-intro">
+ <title>Introduction</title>
+ <para>Media devices increasingly handle multiple related functions. Many USB
+ cameras include microphones, video capture hardware can also output video,
+ or SoC camera interfaces also perform memory-to-memory operations similar to
+ video codecs.</para>
+ <para>Independent functions, even when implemented in the same hardware, can
+ be modelled as separate devices. A USB camera with a microphone will be
+ presented to userspace applications as V4L2 and ALSA capture devices. The
+ devices' relationships (when using a webcam, end-users shouldn't have to
+ manually select the associated USB microphone), while not made available
+ directly to applications by the drivers, can usually be retrieved from
+ sysfs.</para>
+ <para>With more and more advanced SoC devices being introduced, the current
+ approach will not scale. Device topologies are getting increasingly complex
+ and can't always be represented by a tree structure. Hardware blocks are
+ shared between different functions, creating dependencies between seemingly
+ unrelated devices.</para>
+ <para>Kernel abstraction APIs such as V4L2 and ALSA provide means for
+ applications to access hardware parameters. As newer hardware expose an
+ increasingly high number of those parameters, drivers need to guess what
+ applications really require based on limited information, thereby
+ implementing policies that belong to userspace.</para>
+ <para>The media controller API aims at solving those problems.</para>
+ </section>
+
+ <section id="media-controller-model">
+ <title>Media device model</title>
+ <para>Discovering a device internal topology, and configuring it at runtime,
+ is one of the goals of the media controller API. To achieve this, hardware
+ devices are modelled as an oriented graph of building blocks called entities
+ connected through pads.</para>
+ <para>An entity is a basic media hardware or software building block. It can
+ correspond to a large variety of logical blocks such as physical hardware
+ devices (CMOS sensor for instance), logical hardware devices (a building
+ block in a System-on-Chip image processing pipeline), DMA channels or
+ physical connectors.</para>
+ <para>A pad is a connection endpoint through which an entity can interact
+ with other entities. Data (not restricted to video) produced by an entity
+ flows from the entity's output to one or more entity inputs. Pads should not
+ be confused with physical pins at chip boundaries.</para>
+ <para>A link is a point-to-point oriented connection between two pads,
+ either on the same entity or on different entities. Data flows from a source
+ pad to a sink pad.</para>
+ </section>
+</chapter>
+
+<appendix id="media-user-func">
+ <title>Function Reference</title>
+ <!-- Keep this alphabetically sorted. -->
+ &sub-media-func-open;
+ &sub-media-func-close;
+ &sub-media-func-ioctl;
+ <!-- All ioctls go here. -->
+ &sub-media-ioc-device-info;
+ &sub-media-ioc-enum-entities;
+ &sub-media-ioc-enum-links;
+ &sub-media-ioc-setup-link;
+</appendix>
diff --git a/Documentation/DocBook/media/v4l/media-func-close.xml b/Documentation/DocBook/media/v4l/media-func-close.xml
new file mode 100644
index 000000000..be149c802
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-func-close.xml
@@ -0,0 +1,59 @@
+<refentry id="media-func-close">
+ <refmeta>
+ <refentrytitle>media close()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>media-close</refname>
+ <refpurpose>Close a media device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;unistd.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>close</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Closes the media device. Resources associated with the file descriptor
+ are freed. The device configuration remain unchanged.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return Value</title>
+
+ <para><function>close</function> returns 0 on success. On error, -1 is
+ returned, and <varname>errno</varname> is set appropriately. Possible error
+ codes are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBADF</errorcode></term>
+ <listitem>
+ <para><parameter>fd</parameter> is not a valid open file descriptor.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/media-func-ioctl.xml b/Documentation/DocBook/media/v4l/media-func-ioctl.xml
new file mode 100644
index 000000000..39478d0fb
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-func-ioctl.xml
@@ -0,0 +1,73 @@
+<refentry id="media-func-ioctl">
+ <refmeta>
+ <refentrytitle>media ioctl()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>media-ioctl</refname>
+ <refpurpose>Control a media device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;sys/ioctl.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>void *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>Media ioctl request code as defined in the media.h header file,
+ for example MEDIA_IOC_SETUP_LINK.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para>Pointer to a request-specific structure.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+ <para>The <function>ioctl()</function> function manipulates media device
+ parameters. The argument <parameter>fd</parameter> must be an open file
+ descriptor.</para>
+ <para>The ioctl <parameter>request</parameter> code specifies the media
+ function to be called. It has encoded in it whether the argument is an
+ input, output or read/write parameter, and the size of the argument
+ <parameter>argp</parameter> in bytes.</para>
+ <para>Macros and structures definitions specifying media ioctl requests and
+ their parameters are located in the media.h header file. All media ioctl
+ requests, their respective function and parameters are specified in
+ <xref linkend="media-user-func" />.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <para>Request-specific error codes are listed in the
+ individual requests descriptions.</para>
+ <para>When an ioctl that takes an output or read/write parameter fails,
+ the parameter remains unmodified.</para>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/media-func-open.xml b/Documentation/DocBook/media/v4l/media-func-open.xml
new file mode 100644
index 000000000..f7df034dc
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-func-open.xml
@@ -0,0 +1,94 @@
+<refentry id="media-func-open">
+ <refmeta>
+ <refentrytitle>media open()</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>media-open</refname>
+ <refpurpose>Open a media device</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcsynopsisinfo>#include &lt;fcntl.h&gt;</funcsynopsisinfo>
+ <funcprototype>
+ <funcdef>int <function>open</function></funcdef>
+ <paramdef>const char *<parameter>device_name</parameter></paramdef>
+ <paramdef>int <parameter>flags</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>device_name</parameter></term>
+ <listitem>
+ <para>Device to be opened.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>flags</parameter></term>
+ <listitem>
+ <para>Open flags. Access mode must be either <constant>O_RDONLY</constant>
+ or <constant>O_RDWR</constant>. Other flags have no effect.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+ <refsect1>
+ <title>Description</title>
+ <para>To open a media device applications call <function>open()</function>
+ with the desired device name. The function has no side effects; the device
+ configuration remain unchanged.</para>
+ <para>When the device is opened in read-only mode, attemps to modify its
+ configuration will result in an error, and <varname>errno</varname> will be
+ set to <errorcode>EBADF</errorcode>.</para>
+ </refsect1>
+ <refsect1>
+ <title>Return Value</title>
+
+ <para><function>open</function> returns the new file descriptor on success.
+ On error, -1 is returned, and <varname>errno</varname> is set appropriately.
+ Possible error codes are:</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EACCES</errorcode></term>
+ <listitem>
+ <para>The requested access to the file is not allowed.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EMFILE</errorcode></term>
+ <listitem>
+ <para>The process already has the maximum number of files open.
+ </para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENFILE</errorcode></term>
+ <listitem>
+ <para>The system limit on the total number of open files has been
+ reached.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOMEM</errorcode></term>
+ <listitem>
+ <para>Insufficient kernel memory was available.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENXIO</errorcode></term>
+ <listitem>
+ <para>No device corresponding to this device special file exists.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/media-ioc-device-info.xml b/Documentation/DocBook/media/v4l/media-ioc-device-info.xml
new file mode 100644
index 000000000..2ce521419
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-ioc-device-info.xml
@@ -0,0 +1,132 @@
+<refentry id="media-ioc-device-info">
+ <refmeta>
+ <refentrytitle>ioctl MEDIA_IOC_DEVICE_INFO</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>MEDIA_IOC_DEVICE_INFO</refname>
+ <refpurpose>Query device information</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct media_device_info *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>File descriptor returned by
+ <link linkend='media-func-open'><function>open()</function></link>.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>MEDIA_IOC_DEVICE_INFO</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>All media devices must support the <constant>MEDIA_IOC_DEVICE_INFO</constant>
+ ioctl. To query device information, applications call the ioctl with a
+ pointer to a &media-device-info;. The driver fills the structure and returns
+ the information to the application.
+ The ioctl never fails.</para>
+
+ <table pgwide="1" frame="none" id="media-device-info">
+ <title>struct <structname>media_device_info</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>char</entry>
+ <entry><structfield>driver</structfield>[16]</entry>
+ <entry><para>Name of the driver implementing the media API as a
+ NUL-terminated ASCII string. The driver version is stored in the
+ <structfield>driver_version</structfield> field.</para>
+ <para>Driver specific applications can use this information to
+ verify the driver identity. It is also useful to work around
+ known bugs, or to identify drivers in error reports.</para></entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>model</structfield>[32]</entry>
+ <entry>Device model name as a NUL-terminated UTF-8 string. The
+ device version is stored in the <structfield>device_version</structfield>
+ field and is not be appended to the model name.</entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>serial</structfield>[40]</entry>
+ <entry>Serial number as a NUL-terminated ASCII string.</entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>bus_info</structfield>[32]</entry>
+ <entry>Location of the device in the system as a NUL-terminated
+ ASCII string. This includes the bus type name (PCI, USB, ...) and a
+ bus-specific identifier.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>media_version</structfield></entry>
+ <entry>Media API version, formatted with the
+ <constant>KERNEL_VERSION()</constant> macro.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>hw_revision</structfield></entry>
+ <entry>Hardware device revision in a driver-specific format.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>media_version</structfield></entry>
+ <entry>Media device driver version, formatted with the
+ <constant>KERNEL_VERSION()</constant> macro. Together with the
+ <structfield>driver</structfield> field this identifies a particular
+ driver.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[31]</entry>
+ <entry>Reserved for future extensions. Drivers and applications must
+ set this array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>The <structfield>serial</structfield> and <structfield>bus_info</structfield>
+ fields can be used to distinguish between multiple instances of otherwise
+ identical hardware. The serial number takes precedence when provided and can
+ be assumed to be unique. If the serial number is an empty string, the
+ <structfield>bus_info</structfield> field can be used instead. The
+ <structfield>bus_info</structfield> field is guaranteed to be unique, but
+ can vary across reboots or device unplug/replug.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
new file mode 100644
index 000000000..5872f8bbf
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
@@ -0,0 +1,280 @@
+<refentry id="media-ioc-enum-entities">
+ <refmeta>
+ <refentrytitle>ioctl MEDIA_IOC_ENUM_ENTITIES</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>MEDIA_IOC_ENUM_ENTITIES</refname>
+ <refpurpose>Enumerate entities and their properties</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct media_entity_desc *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>File descriptor returned by
+ <link linkend='media-func-open'><function>open()</function></link>.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>MEDIA_IOC_ENUM_ENTITIES</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+ <para>To query the attributes of an entity, applications set the id field
+ of a &media-entity-desc; structure and call the MEDIA_IOC_ENUM_ENTITIES
+ ioctl with a pointer to this structure. The driver fills the rest of the
+ structure or returns an &EINVAL; when the id is invalid.</para>
+ <para>Entities can be enumerated by or'ing the id with the
+ <constant>MEDIA_ENT_ID_FLAG_NEXT</constant> flag. The driver will return
+ information about the entity with the smallest id strictly larger than the
+ requested one ('next entity'), or the &EINVAL; if there is none.</para>
+ <para>Entity IDs can be non-contiguous. Applications must
+ <emphasis>not</emphasis> try to enumerate entities by calling
+ MEDIA_IOC_ENUM_ENTITIES with increasing id's until they get an error.</para>
+ <para>Two or more entities that share a common non-zero
+ <structfield>group_id</structfield> value are considered as logically
+ grouped. Groups are used to report
+ <itemizedlist>
+ <listitem><para>ALSA, VBI and video nodes that carry the same media
+ stream</para></listitem>
+ <listitem><para>lens and flash controllers associated with a sensor</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <table pgwide="1" frame="none" id="media-entity-desc">
+ <title>struct <structname>media_entity_desc</structname></title>
+ <tgroup cols="5">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <colspec colname="c5" />
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Entity id, set by the application. When the id is or'ed with
+ <constant>MEDIA_ENT_ID_FLAG_NEXT</constant>, the driver clears the
+ flag and returns the first entity with a larger id.</entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Entity name as an UTF-8 NULL-terminated string.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Entity type, see <xref linkend="media-entity-type" /> for details.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>revision</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Entity revision in a driver/hardware specific format.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Entity flags, see <xref linkend="media-entity-flag" /> for details.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>group_id</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Entity group ID</entry>
+ </row>
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>pads</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Number of pads</entry>
+ </row>
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>links</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Total number of outbound links. Inbound links are not counted
+ in this field.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct</entry>
+ <entry><structfield>dev</structfield></entry>
+ <entry></entry>
+ <entry>Valid for (sub-)devices that create a single device node.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>major</structfield></entry>
+ <entry>Device node major number.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>minor</structfield></entry>
+ <entry>Device node minor number.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u8</entry>
+ <entry><structfield>raw</structfield>[184]</entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="media-entity-type">
+ <title>Media entity types</title>
+ <tgroup cols="2">
+ <colspec colname="c1"/>
+ <colspec colname="c2"/>
+ <tbody valign="top">
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE</constant></entry>
+ <entry>Unknown device node</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_V4L</constant></entry>
+ <entry>V4L video, radio or vbi device node</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_FB</constant></entry>
+ <entry>Frame buffer device node</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_ALSA</constant></entry>
+ <entry>ALSA card</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_DVB_FE</constant></entry>
+ <entry>DVB frontend devnode</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_DVB_DEMUX</constant></entry>
+ <entry>DVB demux devnode</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_DVB_DVR</constant></entry>
+ <entry>DVB DVR devnode</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_DVB_CA</constant></entry>
+ <entry>DVB CAM devnode</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_DEVNODE_DVB_NET</constant></entry>
+ <entry>DVB network devnode</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV</constant></entry>
+ <entry>Unknown V4L sub-device</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_SENSOR</constant></entry>
+ <entry>Video sensor</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_FLASH</constant></entry>
+ <entry>Flash controller</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_LENS</constant></entry>
+ <entry>Lens controller</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_DECODER</constant></entry>
+ <entry>Video decoder, the basic function of the video decoder is to
+ accept analogue video from a wide variety of sources such as
+ broadcast, DVD players, cameras and video cassette recorders, in
+ either NTSC, PAL or HD format and still occasionally SECAM, separate
+ it into its component parts, luminance and chrominance, and output
+ it in some digital video standard, with appropriate embedded timing
+ signals.</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_ENT_T_V4L2_SUBDEV_TUNER</constant></entry>
+ <entry>TV and/or radio tuner</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="media-entity-flag">
+ <title>Media entity flags</title>
+ <tgroup cols="2">
+ <colspec colname="c1"/>
+ <colspec colname="c2"/>
+ <tbody valign="top">
+ <row>
+ <entry><constant>MEDIA_ENT_FL_DEFAULT</constant></entry>
+ <entry>Default entity for its type. Used to discover the default
+ audio, VBI and video devices, the default camera sensor, ...</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &media-entity-desc; <structfield>id</structfield> references
+ a non-existing entity.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml
new file mode 100644
index 000000000..74fb394ec
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-ioc-enum-links.xml
@@ -0,0 +1,216 @@
+<refentry id="media-ioc-enum-links">
+ <refmeta>
+ <refentrytitle>ioctl MEDIA_IOC_ENUM_LINKS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>MEDIA_IOC_ENUM_LINKS</refname>
+ <refpurpose>Enumerate all pads and links for a given entity</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct media_links_enum *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>File descriptor returned by
+ <link linkend='media-func-open'><function>open()</function></link>.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>MEDIA_IOC_ENUM_LINKS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To enumerate pads and/or links for a given entity, applications set
+ the entity field of a &media-links-enum; structure and initialize the
+ &media-pad-desc; and &media-link-desc; structure arrays pointed by the
+ <structfield>pads</structfield> and <structfield>links</structfield> fields.
+ They then call the MEDIA_IOC_ENUM_LINKS ioctl with a pointer to this
+ structure.</para>
+ <para>If the <structfield>pads</structfield> field is not NULL, the driver
+ fills the <structfield>pads</structfield> array with information about the
+ entity's pads. The array must have enough room to store all the entity's
+ pads. The number of pads can be retrieved with the &MEDIA-IOC-ENUM-ENTITIES;
+ ioctl.</para>
+ <para>If the <structfield>links</structfield> field is not NULL, the driver
+ fills the <structfield>links</structfield> array with information about the
+ entity's outbound links. The array must have enough room to store all the
+ entity's outbound links. The number of outbound links can be retrieved with
+ the &MEDIA-IOC-ENUM-ENTITIES; ioctl.</para>
+ <para>Only forward links that originate at one of the entity's source pads
+ are returned during the enumeration process.</para>
+
+ <table pgwide="1" frame="none" id="media-links-enum">
+ <title>struct <structname>media_links_enum</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>entity</structfield></entry>
+ <entry>Entity id, set by the application.</entry>
+ </row>
+ <row>
+ <entry>&media-pad-desc;</entry>
+ <entry>*<structfield>pads</structfield></entry>
+ <entry>Pointer to a pads array allocated by the application. Ignored
+ if NULL.</entry>
+ </row>
+ <row>
+ <entry>&media-link-desc;</entry>
+ <entry>*<structfield>links</structfield></entry>
+ <entry>Pointer to a links array allocated by the application. Ignored
+ if NULL.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="media-pad-desc">
+ <title>struct <structname>media_pad_desc</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>entity</structfield></entry>
+ <entry>ID of the entity this pad belongs to.</entry>
+ </row>
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>0-based pad index.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Pad flags, see <xref linkend="media-pad-flag" /> for more details.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="media-pad-flag">
+ <title>Media pad flags</title>
+ <tgroup cols="2">
+ <colspec colname="c1"/>
+ <colspec colname="c2"/>
+ <tbody valign="top">
+ <row>
+ <entry><constant>MEDIA_PAD_FL_SINK</constant></entry>
+ <entry>Input pad, relative to the entity. Input pads sink data and
+ are targets of links.</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_PAD_FL_SOURCE</constant></entry>
+ <entry>Output pad, relative to the entity. Output pads source data
+ and are origins of links.</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_PAD_FL_MUST_CONNECT</constant></entry>
+ <entry>If this flag is set and the pad is linked to any other
+ pad, then at least one of those links must be enabled for the
+ entity to be able to stream. There could be temporary reasons
+ (e.g. device configuration dependent) for the pad to need
+ enabled links even when this flag isn't set; the absence of the
+ flag doesn't imply there is none.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="media-link-desc">
+ <title>struct <structname>media_link_desc</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>&media-pad-desc;</entry>
+ <entry><structfield>source</structfield></entry>
+ <entry>Pad at the origin of this link.</entry>
+ </row>
+ <row>
+ <entry>&media-pad-desc;</entry>
+ <entry><structfield>sink</structfield></entry>
+ <entry>Pad at the target of this link.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Link flags, see <xref linkend="media-link-flag" /> for more details.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="media-link-flag">
+ <title>Media link flags</title>
+ <tgroup cols="2">
+ <colspec colname="c1"/>
+ <colspec colname="c2"/>
+ <tbody valign="top">
+ <row>
+ <entry><constant>MEDIA_LNK_FL_ENABLED</constant></entry>
+ <entry>The link is enabled and can be used to transfer media data.
+ When two or more links target a sink pad, only one of them can be
+ enabled at a time.</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_LNK_FL_IMMUTABLE</constant></entry>
+ <entry>The link enabled state can't be modified at runtime. An
+ immutable link is always enabled.</entry>
+ </row>
+ <row>
+ <entry><constant>MEDIA_LNK_FL_DYNAMIC</constant></entry>
+ <entry>The link enabled state can be modified during streaming. This
+ flag is set by drivers and is read-only for applications.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>One and only one of <constant>MEDIA_PAD_FL_SINK</constant> and
+ <constant>MEDIA_PAD_FL_SOURCE</constant> must be set for every pad.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &media-links-enum; <structfield>id</structfield> references
+ a non-existing entity.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/media-ioc-setup-link.xml b/Documentation/DocBook/media/v4l/media-ioc-setup-link.xml
new file mode 100644
index 000000000..fc2e522ee
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/media-ioc-setup-link.xml
@@ -0,0 +1,84 @@
+<refentry id="media-ioc-setup-link">
+ <refmeta>
+ <refentrytitle>ioctl MEDIA_IOC_SETUP_LINK</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>MEDIA_IOC_SETUP_LINK</refname>
+ <refpurpose>Modify the properties of a link</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct media_link_desc *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>File descriptor returned by
+ <link linkend='media-func-open'><function>open()</function></link>.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>MEDIA_IOC_SETUP_LINK</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To change link properties applications fill a &media-link-desc; with
+ link identification information (source and sink pad) and the new requested
+ link flags. They then call the MEDIA_IOC_SETUP_LINK ioctl with a pointer to
+ that structure.</para>
+ <para>The only configurable property is the <constant>ENABLED</constant>
+ link flag to enable/disable a link. Links marked with the
+ <constant>IMMUTABLE</constant> link flag can not be enabled or disabled.
+ </para>
+ <para>Link configuration has no side effect on other links. If an enabled
+ link at the sink pad prevents the link from being enabled, the driver
+ returns with an &EBUSY;.</para>
+ <para>Only links marked with the <constant>DYNAMIC</constant> link flag can
+ be enabled/disabled while streaming media data. Attempting to enable or
+ disable a streaming non-dynamic link will return an &EBUSY;.</para>
+ <para>If the specified link can't be found the driver returns with an
+ &EINVAL;.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &media-link-desc; references a non-existing link, or the
+ link is immutable and an attempt to modify its configuration was made.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-grey.xml b/Documentation/DocBook/media/v4l/pixfmt-grey.xml
new file mode 100644
index 000000000..bee970d3f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-grey.xml
@@ -0,0 +1,62 @@
+ <refentry id="V4L2-PIX-FMT-GREY">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_GREY ('GREY')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_GREY</constant></refname>
+ <refpurpose>Grey-scale image</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a grey-scale image. It is really a degenerate
+Y'CbCr format which simply contains no Cb or Cr data.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_GREY</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-m420.xml b/Documentation/DocBook/media/v4l/pixfmt-m420.xml
new file mode 100644
index 000000000..aadae92c5
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-m420.xml
@@ -0,0 +1,139 @@
+ <refentry id="V4L2-PIX-FMT-M420">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_M420 ('M420')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_M420</constant></refname>
+ <refpurpose>Format with &frac12; horizontal and vertical chroma
+ resolution, also known as YUV 4:2:0. Hybrid plane line-interleaved
+ layout.</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>M420 is a YUV format with &frac12; horizontal and vertical chroma
+ subsampling (YUV 4:2:0). Pixels are organized as interleaved luma and
+ chroma planes. Two lines of luma data are followed by one line of chroma
+ data.</para>
+ <para>The luma plane has one byte per pixel. The chroma plane contains
+ interleaved CbCr pixels subsampled by &frac12; in the horizontal and
+ vertical directions. Each CbCr pair belongs to four pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>,
+Y'<subscript>10</subscript>, Y'<subscript>11</subscript>.</para>
+
+ <para>All line lengths are identical: if the Y lines include pad bytes
+ so do the CbCr lines.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_M420</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;20:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv12.xml b/Documentation/DocBook/media/v4l/pixfmt-nv12.xml
new file mode 100644
index 000000000..84dd4fd7c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv12.xml
@@ -0,0 +1,143 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_NV12 ('NV12'), V4L2_PIX_FMT_NV21 ('NV21')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-NV12"><constant>V4L2_PIX_FMT_NV12</constant></refname>
+ <refname id="V4L2-PIX-FMT-NV21"><constant>V4L2_PIX_FMT_NV21</constant></refname>
+ <refpurpose>Formats with &frac12; horizontal and vertical
+chroma resolution, also known as YUV 4:2:0. One luminance and one
+chrominance plane with alternating chroma samples as opposed to
+<constant>V4L2_PIX_FMT_YVU420</constant></refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These are two-plane versions of the YUV 4:2:0 format.
+The three components are separated into two sub-images or planes. The
+Y plane is first. The Y plane has one byte per pixel. For
+<constant>V4L2_PIX_FMT_NV12</constant>, a combined CbCr plane
+immediately follows the Y plane in memory. The CbCr plane is the same
+width, in bytes, as the Y plane (and of the image), but is half as
+tall in pixels. Each CbCr pair belongs to four pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>,
+Y'<subscript>10</subscript>, Y'<subscript>11</subscript>.
+<constant>V4L2_PIX_FMT_NV21</constant> is the same except the Cb and
+Cr bytes are swapped, the CrCb plane starts with a Cr byte.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the
+CbCr plane has as many pad bytes after its rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_NV12</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;20:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv12m.xml b/Documentation/DocBook/media/v4l/pixfmt-nv12m.xml
new file mode 100644
index 000000000..f3a3d459f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv12m.xml
@@ -0,0 +1,153 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_NV12M ('NM12'), V4L2_PIX_FMT_NV21M ('NM21'), V4L2_PIX_FMT_NV12MT_16X16</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-NV12M"><constant>V4L2_PIX_FMT_NV12M</constant></refname>
+ <refname id="V4L2-PIX-FMT-NV21M"><constant>V4L2_PIX_FMT_NV21M</constant></refname>
+ <refname id="V4L2-PIX-FMT-NV12MT-16X16"><constant>V4L2_PIX_FMT_NV12MT_16X16</constant></refname>
+ <refpurpose>Variation of <constant>V4L2_PIX_FMT_NV12</constant> and <constant>V4L2_PIX_FMT_NV21</constant> with planes
+ non contiguous in memory. </refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a multi-planar, two-plane version of the YUV 4:2:0 format.
+The three components are separated into two sub-images or planes.
+<constant>V4L2_PIX_FMT_NV12M</constant> differs from <constant>V4L2_PIX_FMT_NV12
+</constant> in that the two planes are non-contiguous in memory, i.e. the chroma
+plane do not necessarily immediately follows the luma plane.
+The luminance data occupies the first plane. The Y plane has one byte per pixel.
+In the second plane there is a chrominance data with alternating chroma samples.
+The CbCr plane is the same width, in bytes, as the Y plane (and of the image),
+but is half as tall in pixels. Each CbCr pair belongs to four pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>,
+Y'<subscript>10</subscript>, Y'<subscript>11</subscript>.
+<constant>V4L2_PIX_FMT_NV12MT_16X16</constant> is the tiled version of
+<constant>V4L2_PIX_FMT_NV12M</constant> with 16x16 macroblock tiles. Here pixels
+are arranged in 16x16 2D tiles and tiles are arranged in linear order in memory.
+<constant>V4L2_PIX_FMT_NV21M</constant> is the same as <constant>V4L2_PIX_FMT_NV12M</constant>
+except the Cb and Cr bytes are swapped, the CrCb plane starts with a Cr byte.</para>
+
+ <para><constant>V4L2_PIX_FMT_NV12M</constant> is intended to be
+used only in drivers and applications that support the multi-planar API,
+described in <xref linkend="planar-apis"/>. </para>
+
+ <para>If the Y plane has pad bytes after each row, then the
+CbCr plane has as many pad bytes after its rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_NV12M</constant> 4 &times; 4 pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start0&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;4:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv12mt.xml b/Documentation/DocBook/media/v4l/pixfmt-nv12mt.xml
new file mode 100644
index 000000000..8a70a1707
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv12mt.xml
@@ -0,0 +1,66 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_NV12MT ('TM12')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-NV12MT"><constant>V4L2_PIX_FMT_NV12MT
+</constant></refname>
+ <refpurpose>Formats with &frac12; horizontal and vertical
+chroma resolution. This format has two planes - one for luminance and one for
+chrominance. Chroma samples are interleaved. The difference to
+<constant>V4L2_PIX_FMT_NV12</constant> is the memory layout. Pixels are
+grouped in macroblocks of 64x32 size. The order of macroblocks in memory is
+also not standard.
+ </refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is the two-plane versions of the YUV 4:2:0 format where data
+is grouped into 64x32 macroblocks. The three components are separated into two
+sub-images or planes. The Y plane has one byte per pixel and pixels are grouped
+into 64x32 macroblocks. The CbCr plane has the same width, in bytes, as the Y
+plane (and the image), but is half as tall in pixels. The chroma plane is also
+grouped into 64x32 macroblocks.</para>
+ <para>Width of the buffer has to be aligned to the multiple of 128, and
+height alignment is 32. Every four adjacent buffers - two horizontally and two
+vertically are grouped together and are located in memory in Z or flipped Z
+order. </para>
+ <para>Layout of macroblocks in memory is presented in the following
+figure.</para>
+ <para><figure id="nv12mt">
+ <title><constant>V4L2_PIX_FMT_NV12MT</constant> macroblock Z shape
+memory layout</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="nv12mt.gif" format="GIF" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+ The requirement that width is multiple of 128 is implemented because,
+the Z shape cannot be cut in half horizontally. In case the vertical resolution
+of macroblocks is odd then the last row of macroblocks is arranged in a linear
+order. </para>
+ <para>In case of chroma the layout is identical. Cb and Cr samples are
+interleaved. Height of the buffer is aligned to 32.
+ </para>
+ <example>
+ <title>Memory layout of macroblocks in <constant>V4L2_PIX_FMT_NV12
+</constant> format pixel image - extreme case</title>
+ <para>
+ <figure id="nv12mt_ex">
+ <title>Example <constant>V4L2_PIX_FMT_NV12MT</constant> memory
+layout of macroblocks</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="nv12mt_example.gif" format="GIF" />
+ </imageobject>
+ </mediaobject>
+ </figure>
+ Memory layout of macroblocks of <constant>V4L2_PIX_FMT_NV12MT
+</constant> format in most extreme case.
+ </para>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv16.xml b/Documentation/DocBook/media/v4l/pixfmt-nv16.xml
new file mode 100644
index 000000000..8ae1f8a81
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv16.xml
@@ -0,0 +1,166 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_NV16 ('NV16'), V4L2_PIX_FMT_NV61 ('NV61')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-NV16"><constant>V4L2_PIX_FMT_NV16</constant></refname>
+ <refname id="V4L2-PIX-FMT-NV61"><constant>V4L2_PIX_FMT_NV61</constant></refname>
+ <refpurpose>Formats with &frac12; horizontal
+chroma resolution, also known as YUV 4:2:2. One luminance and one
+chrominance plane with alternating chroma samples as opposed to
+<constant>V4L2_PIX_FMT_YVU420</constant></refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These are two-plane versions of the YUV 4:2:2 format.
+The three components are separated into two sub-images or planes. The
+Y plane is first. The Y plane has one byte per pixel. For
+<constant>V4L2_PIX_FMT_NV16</constant>, a combined CbCr plane
+immediately follows the Y plane in memory. The CbCr plane is the same
+width and height, in bytes, as the Y plane (and of the image).
+Each CbCr pair belongs to two pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>.
+<constant>V4L2_PIX_FMT_NV61</constant> is the same except the Cb and
+Cr bytes are swapped, the CrCb plane starts with a Cr byte.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the
+CbCr plane has as many pad bytes after its rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_NV16</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;20:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;28:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv16m.xml b/Documentation/DocBook/media/v4l/pixfmt-nv16m.xml
new file mode 100644
index 000000000..fb2b5e35d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv16m.xml
@@ -0,0 +1,170 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_NV16M ('NM16'), V4L2_PIX_FMT_NV61M ('NM61')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-NV16M"><constant>V4L2_PIX_FMT_NV16M</constant></refname>
+ <refname id="V4L2-PIX-FMT-NV61M"><constant>V4L2_PIX_FMT_NV61M</constant></refname>
+ <refpurpose>Variation of <constant>V4L2_PIX_FMT_NV16</constant> and <constant>V4L2_PIX_FMT_NV61</constant> with planes
+ non contiguous in memory. </refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a multi-planar, two-plane version of the YUV 4:2:2 format.
+The three components are separated into two sub-images or planes.
+<constant>V4L2_PIX_FMT_NV16M</constant> differs from <constant>V4L2_PIX_FMT_NV16
+</constant> in that the two planes are non-contiguous in memory, i.e. the chroma
+plane does not necessarily immediately follow the luma plane.
+The luminance data occupies the first plane. The Y plane has one byte per pixel.
+In the second plane there is chrominance data with alternating chroma samples.
+The CbCr plane is the same width and height, in bytes, as the Y plane.
+Each CbCr pair belongs to two pixels. For example,
+Cb<subscript>0</subscript>/Cr<subscript>0</subscript> belongs to
+Y'<subscript>00</subscript>, Y'<subscript>01</subscript>.
+<constant>V4L2_PIX_FMT_NV61M</constant> is the same as <constant>V4L2_PIX_FMT_NV16M</constant>
+except the Cb and Cr bytes are swapped, the CrCb plane starts with a Cr byte.</para>
+
+ <para><constant>V4L2_PIX_FMT_NV16M</constant> and
+<constant>V4L2_PIX_FMT_NV61M</constant> are intended to be used only in drivers
+and applications that support the multi-planar API, described in
+<xref linkend="planar-apis"/>. </para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_NV16M</constant> 4 &times; 4 pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start0&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>02</subscript></entry>
+ <entry>Cr<subscript>02</subscript></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;4:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>12</subscript></entry>
+ <entry>Cr<subscript>12</subscript></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;8:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Cb<subscript>22</subscript></entry>
+ <entry>Cr<subscript>22</subscript></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;12:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Cb<subscript>32</subscript></entry>
+ <entry>Cr<subscript>32</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-nv24.xml b/Documentation/DocBook/media/v4l/pixfmt-nv24.xml
new file mode 100644
index 000000000..fb255f2ca
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-nv24.xml
@@ -0,0 +1,121 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_NV24 ('NV24'), V4L2_PIX_FMT_NV42 ('NV42')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-NV24"><constant>V4L2_PIX_FMT_NV24</constant></refname>
+ <refname id="V4L2-PIX-FMT-NV42"><constant>V4L2_PIX_FMT_NV42</constant></refname>
+ <refpurpose>Formats with full horizontal and vertical
+chroma resolutions, also known as YUV 4:4:4. One luminance and one
+chrominance plane with alternating chroma samples as opposed to
+<constant>V4L2_PIX_FMT_YVU420</constant></refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These are two-plane versions of the YUV 4:4:4 format. The three
+ components are separated into two sub-images or planes. The Y plane is
+ first, with each Y sample stored in one byte per pixel. For
+ <constant>V4L2_PIX_FMT_NV24</constant>, a combined CbCr plane
+ immediately follows the Y plane in memory. The CbCr plane has the same
+ width and height, in pixels, as the Y plane (and the image). Each line
+ contains one CbCr pair per pixel, with each Cb and Cr sample stored in
+ one byte. <constant>V4L2_PIX_FMT_NV42</constant> is the same except that
+ the Cb and Cr samples are swapped, the CrCb plane starts with a Cr
+ sample.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the CbCr plane
+ has twice as many pad bytes after its rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_NV24</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ <entry>Cb<subscript>02</subscript></entry>
+ <entry>Cr<subscript>02</subscript></entry>
+ <entry>Cb<subscript>03</subscript></entry>
+ <entry>Cr<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ <entry>Cb<subscript>12</subscript></entry>
+ <entry>Cr<subscript>12</subscript></entry>
+ <entry>Cb<subscript>13</subscript></entry>
+ <entry>Cr<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;32:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ <entry>Cb<subscript>22</subscript></entry>
+ <entry>Cr<subscript>22</subscript></entry>
+ <entry>Cb<subscript>23</subscript></entry>
+ <entry>Cr<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;40:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ <entry>Cb<subscript>32</subscript></entry>
+ <entry>Cr<subscript>32</subscript></entry>
+ <entry>Cb<subscript>33</subscript></entry>
+ <entry>Cr<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
new file mode 100644
index 000000000..b60fb935b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-packed-rgb.xml
@@ -0,0 +1,937 @@
+<refentry id="packed-rgb">
+ <refmeta>
+ <refentrytitle>Packed RGB formats</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>Packed RGB formats</refname>
+ <refpurpose>Packed RGB formats</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These formats are designed to match the pixel formats of
+typical PC graphics frame buffers. They occupy 8, 16, 24 or 32 bits
+per pixel. These are all packed-pixel formats, meaning all the data
+for a pixel lie next to each other in memory.</para>
+
+ <table pgwide="1" frame="none" id="rgb-formats">
+ <title>Packed RGB Image Formats</title>
+ <tgroup cols="37" align="center">
+ <colspec colname="id" align="left" />
+ <colspec colname="fourcc" />
+ <colspec colname="bit" />
+
+ <colspec colnum="4" colname="b07" align="center" />
+ <colspec colnum="5" colname="b06" align="center" />
+ <colspec colnum="6" colname="b05" align="center" />
+ <colspec colnum="7" colname="b04" align="center" />
+ <colspec colnum="8" colname="b03" align="center" />
+ <colspec colnum="9" colname="b02" align="center" />
+ <colspec colnum="10" colname="b01" align="center" />
+ <colspec colnum="11" colname="b00" align="center" />
+
+ <colspec colnum="13" colname="b17" align="center" />
+ <colspec colnum="14" colname="b16" align="center" />
+ <colspec colnum="15" colname="b15" align="center" />
+ <colspec colnum="16" colname="b14" align="center" />
+ <colspec colnum="17" colname="b13" align="center" />
+ <colspec colnum="18" colname="b12" align="center" />
+ <colspec colnum="19" colname="b11" align="center" />
+ <colspec colnum="20" colname="b10" align="center" />
+
+ <colspec colnum="22" colname="b27" align="center" />
+ <colspec colnum="23" colname="b26" align="center" />
+ <colspec colnum="24" colname="b25" align="center" />
+ <colspec colnum="25" colname="b24" align="center" />
+ <colspec colnum="26" colname="b23" align="center" />
+ <colspec colnum="27" colname="b22" align="center" />
+ <colspec colnum="28" colname="b21" align="center" />
+ <colspec colnum="29" colname="b20" align="center" />
+
+ <colspec colnum="31" colname="b37" align="center" />
+ <colspec colnum="32" colname="b36" align="center" />
+ <colspec colnum="33" colname="b35" align="center" />
+ <colspec colnum="34" colname="b34" align="center" />
+ <colspec colnum="35" colname="b33" align="center" />
+ <colspec colnum="36" colname="b32" align="center" />
+ <colspec colnum="37" colname="b31" align="center" />
+ <colspec colnum="38" colname="b30" align="center" />
+
+ <spanspec namest="b07" nameend="b00" spanname="b0" />
+ <spanspec namest="b17" nameend="b10" spanname="b1" />
+ <spanspec namest="b27" nameend="b20" spanname="b2" />
+ <spanspec namest="b37" nameend="b30" spanname="b3" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>&nbsp;</entry>
+ <entry spanname="b0">Byte&nbsp;0 in memory</entry>
+ <entry spanname="b1">Byte&nbsp;1</entry>
+ <entry spanname="b2">Byte&nbsp;2</entry>
+ <entry spanname="b3">Byte&nbsp;3</entry>
+ </row>
+ <row>
+ <entry>&nbsp;</entry>
+ <entry>&nbsp;</entry>
+ <entry>Bit</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="V4L2-PIX-FMT-RGB332">
+ <entry><constant>V4L2_PIX_FMT_RGB332</constant></entry>
+ <entry>'RGB1'</entry>
+ <entry></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-ARGB444">
+ <entry><constant>V4L2_PIX_FMT_ARGB444</constant></entry>
+ <entry>'AR12'</entry>
+ <entry></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-XRGB444">
+ <entry><constant>V4L2_PIX_FMT_XRGB444</constant></entry>
+ <entry>'XR12'</entry>
+ <entry></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-ARGB555">
+ <entry><constant>V4L2_PIX_FMT_ARGB555</constant></entry>
+ <entry>'AR15'</entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-XRGB555">
+ <entry><constant>V4L2_PIX_FMT_XRGB555</constant></entry>
+ <entry>'XR15'</entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-RGB565">
+ <entry><constant>V4L2_PIX_FMT_RGB565</constant></entry>
+ <entry>'RGBP'</entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-ARGB555X">
+ <entry><constant>V4L2_PIX_FMT_ARGB555X</constant></entry>
+ <entry>'AR15' | (1 &lt;&lt; 31)</entry>
+ <entry></entry>
+ <entry>a</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-XRGB555X">
+ <entry><constant>V4L2_PIX_FMT_XRGB555X</constant></entry>
+ <entry>'XR15' | (1 &lt;&lt; 31)</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-RGB565X">
+ <entry><constant>V4L2_PIX_FMT_RGB565X</constant></entry>
+ <entry>'RGBR'</entry>
+ <entry></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-BGR24">
+ <entry><constant>V4L2_PIX_FMT_BGR24</constant></entry>
+ <entry>'BGR3'</entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-RGB24">
+ <entry><constant>V4L2_PIX_FMT_RGB24</constant></entry>
+ <entry>'RGB3'</entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-BGR666">
+ <entry><constant>V4L2_PIX_FMT_BGR666</constant></entry>
+ <entry>'BGRH'</entry>
+ <entry></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-ABGR32">
+ <entry><constant>V4L2_PIX_FMT_ABGR32</constant></entry>
+ <entry>'AR24'</entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-XBGR32">
+ <entry><constant>V4L2_PIX_FMT_XBGR32</constant></entry>
+ <entry>'XR24'</entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-ARGB32">
+ <entry><constant>V4L2_PIX_FMT_ARGB32</constant></entry>
+ <entry>'BA24'</entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-XRGB32">
+ <entry><constant>V4L2_PIX_FMT_XRGB32</constant></entry>
+ <entry>'BX24'</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>Bit 7 is the most significant bit.</para>
+
+ <para>The usage and value of the alpha bits (a) in the ARGB and ABGR formats
+ (collectively referred to as alpha formats) depend on the device type and
+ hardware operation. <link linkend="capture">Capture</link> devices
+ (including capture queues of mem-to-mem devices) fill the alpha component in
+ memory. When the device outputs an alpha channel the alpha component will
+ have a meaningful value. Otherwise, when the device doesn't output an alpha
+ channel but can set the alpha bit to a user-configurable value, the <link
+ linkend="v4l2-alpha-component"><constant>V4L2_CID_ALPHA_COMPONENT</constant>
+ </link> control is used to specify that alpha value, and the alpha component
+ of all pixels will be set to the value specified by that control. Otherwise
+ a corresponding format without an alpha component (XRGB or XBGR) must be
+ used instead of an alpha format.</para>
+
+ <para><link linkend="output">Output</link> devices (including output queues
+ of mem-to-mem devices and <link linkend="osd">video output overlay</link>
+ devices) read the alpha component from memory. When the device processes the
+ alpha channel the alpha component must be filled with meaningful values by
+ applications. Otherwise a corresponding format without an alpha component
+ (XRGB or XBGR) must be used instead of an alpha format.</para>
+
+ <para>The XRGB and XBGR formats contain undefined bits (-). Applications,
+ devices and drivers must ignore those bits, for both <link
+ linkend="capture">capture</link> and <link linkend="output">output</link>
+ devices.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_BGR24</constant> 4 &times; 4 pixel
+image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="13" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>B<subscript>00</subscript></entry>
+ <entry>G<subscript>00</subscript></entry>
+ <entry>R<subscript>00</subscript></entry>
+ <entry>B<subscript>01</subscript></entry>
+ <entry>G<subscript>01</subscript></entry>
+ <entry>R<subscript>01</subscript></entry>
+ <entry>B<subscript>02</subscript></entry>
+ <entry>G<subscript>02</subscript></entry>
+ <entry>R<subscript>02</subscript></entry>
+ <entry>B<subscript>03</subscript></entry>
+ <entry>G<subscript>03</subscript></entry>
+ <entry>R<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>B<subscript>10</subscript></entry>
+ <entry>G<subscript>10</subscript></entry>
+ <entry>R<subscript>10</subscript></entry>
+ <entry>B<subscript>11</subscript></entry>
+ <entry>G<subscript>11</subscript></entry>
+ <entry>R<subscript>11</subscript></entry>
+ <entry>B<subscript>12</subscript></entry>
+ <entry>G<subscript>12</subscript></entry>
+ <entry>R<subscript>12</subscript></entry>
+ <entry>B<subscript>13</subscript></entry>
+ <entry>G<subscript>13</subscript></entry>
+ <entry>R<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>B<subscript>20</subscript></entry>
+ <entry>G<subscript>20</subscript></entry>
+ <entry>R<subscript>20</subscript></entry>
+ <entry>B<subscript>21</subscript></entry>
+ <entry>G<subscript>21</subscript></entry>
+ <entry>R<subscript>21</subscript></entry>
+ <entry>B<subscript>22</subscript></entry>
+ <entry>G<subscript>22</subscript></entry>
+ <entry>R<subscript>22</subscript></entry>
+ <entry>B<subscript>23</subscript></entry>
+ <entry>G<subscript>23</subscript></entry>
+ <entry>R<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;36:</entry>
+ <entry>B<subscript>30</subscript></entry>
+ <entry>G<subscript>30</subscript></entry>
+ <entry>R<subscript>30</subscript></entry>
+ <entry>B<subscript>31</subscript></entry>
+ <entry>G<subscript>31</subscript></entry>
+ <entry>R<subscript>31</subscript></entry>
+ <entry>B<subscript>32</subscript></entry>
+ <entry>G<subscript>32</subscript></entry>
+ <entry>R<subscript>32</subscript></entry>
+ <entry>B<subscript>33</subscript></entry>
+ <entry>G<subscript>33</subscript></entry>
+ <entry>R<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+
+ <para>Formats defined in <xref linkend="rgb-formats-deprecated"/> are
+ deprecated and must not be used by new drivers. They are documented here for
+ reference. The meaning of their alpha bits (a) is ill-defined and
+ interpreted as in either the corresponding ARGB or XRGB format, depending on
+ the driver.</para>
+
+ <table pgwide="1" frame="none" id="rgb-formats-deprecated">
+ <title>Deprecated Packed RGB Image Formats</title>
+ <tgroup cols="37" align="center">
+ <colspec colname="id" align="left" />
+ <colspec colname="fourcc" />
+ <colspec colname="bit" />
+
+ <colspec colnum="4" colname="b07" align="center" />
+ <colspec colnum="5" colname="b06" align="center" />
+ <colspec colnum="6" colname="b05" align="center" />
+ <colspec colnum="7" colname="b04" align="center" />
+ <colspec colnum="8" colname="b03" align="center" />
+ <colspec colnum="9" colname="b02" align="center" />
+ <colspec colnum="10" colname="b01" align="center" />
+ <colspec colnum="11" colname="b00" align="center" />
+
+ <colspec colnum="13" colname="b17" align="center" />
+ <colspec colnum="14" colname="b16" align="center" />
+ <colspec colnum="15" colname="b15" align="center" />
+ <colspec colnum="16" colname="b14" align="center" />
+ <colspec colnum="17" colname="b13" align="center" />
+ <colspec colnum="18" colname="b12" align="center" />
+ <colspec colnum="19" colname="b11" align="center" />
+ <colspec colnum="20" colname="b10" align="center" />
+
+ <colspec colnum="22" colname="b27" align="center" />
+ <colspec colnum="23" colname="b26" align="center" />
+ <colspec colnum="24" colname="b25" align="center" />
+ <colspec colnum="25" colname="b24" align="center" />
+ <colspec colnum="26" colname="b23" align="center" />
+ <colspec colnum="27" colname="b22" align="center" />
+ <colspec colnum="28" colname="b21" align="center" />
+ <colspec colnum="29" colname="b20" align="center" />
+
+ <colspec colnum="31" colname="b37" align="center" />
+ <colspec colnum="32" colname="b36" align="center" />
+ <colspec colnum="33" colname="b35" align="center" />
+ <colspec colnum="34" colname="b34" align="center" />
+ <colspec colnum="35" colname="b33" align="center" />
+ <colspec colnum="36" colname="b32" align="center" />
+ <colspec colnum="37" colname="b31" align="center" />
+ <colspec colnum="38" colname="b30" align="center" />
+
+ <spanspec namest="b07" nameend="b00" spanname="b0" />
+ <spanspec namest="b17" nameend="b10" spanname="b1" />
+ <spanspec namest="b27" nameend="b20" spanname="b2" />
+ <spanspec namest="b37" nameend="b30" spanname="b3" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>&nbsp;</entry>
+ <entry spanname="b0">Byte&nbsp;0 in memory</entry>
+ <entry spanname="b1">Byte&nbsp;1</entry>
+ <entry spanname="b2">Byte&nbsp;2</entry>
+ <entry spanname="b3">Byte&nbsp;3</entry>
+ </row>
+ <row>
+ <entry>&nbsp;</entry>
+ <entry>&nbsp;</entry>
+ <entry>Bit</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row id="V4L2-PIX-FMT-RGB444">
+ <entry><constant>V4L2_PIX_FMT_RGB444</constant></entry>
+ <entry>'R444'</entry>
+ <entry></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-RGB555">
+ <entry><constant>V4L2_PIX_FMT_RGB555</constant></entry>
+ <entry>'RGBO'</entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-RGB555X">
+ <entry><constant>V4L2_PIX_FMT_RGB555X</constant></entry>
+ <entry>'RGBQ'</entry>
+ <entry></entry>
+ <entry>a</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-BGR32">
+ <entry><constant>V4L2_PIX_FMT_BGR32</constant></entry>
+ <entry>'BGR4'</entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-RGB32">
+ <entry><constant>V4L2_PIX_FMT_RGB32</constant></entry>
+ <entry>'RGB4'</entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>A test utility to determine which RGB formats a driver
+actually supports is available from the LinuxTV v4l-dvb repository.
+See &v4l-dvb; for access instructions.</para>
+
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-packed-yuv.xml b/Documentation/DocBook/media/v4l/pixfmt-packed-yuv.xml
new file mode 100644
index 000000000..33fa5a47a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-packed-yuv.xml
@@ -0,0 +1,236 @@
+<refentry id="packed-yuv">
+ <refmeta>
+ <refentrytitle>Packed YUV formats</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>Packed YUV formats</refname>
+ <refpurpose>Packed YUV formats</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>Similar to the packed RGB formats these formats store
+the Y, Cb and Cr component of each pixel in one 16 or 32 bit
+word.</para>
+
+ <table pgwide="1" frame="none">
+ <title>Packed YUV Image Formats</title>
+ <tgroup cols="37" align="center">
+ <colspec colname="id" align="left" />
+ <colspec colname="fourcc" />
+ <colspec colname="bit" />
+
+ <colspec colnum="4" colname="b07" align="center" />
+ <colspec colnum="5" colname="b06" align="center" />
+ <colspec colnum="6" colname="b05" align="center" />
+ <colspec colnum="7" colname="b04" align="center" />
+ <colspec colnum="8" colname="b03" align="center" />
+ <colspec colnum="9" colname="b02" align="center" />
+ <colspec colnum="10" colname="b01" align="center" />
+ <colspec colnum="11" colname="b00" align="center" />
+
+ <colspec colnum="13" colname="b17" align="center" />
+ <colspec colnum="14" colname="b16" align="center" />
+ <colspec colnum="15" colname="b15" align="center" />
+ <colspec colnum="16" colname="b14" align="center" />
+ <colspec colnum="17" colname="b13" align="center" />
+ <colspec colnum="18" colname="b12" align="center" />
+ <colspec colnum="19" colname="b11" align="center" />
+ <colspec colnum="20" colname="b10" align="center" />
+
+ <colspec colnum="22" colname="b27" align="center" />
+ <colspec colnum="23" colname="b26" align="center" />
+ <colspec colnum="24" colname="b25" align="center" />
+ <colspec colnum="25" colname="b24" align="center" />
+ <colspec colnum="26" colname="b23" align="center" />
+ <colspec colnum="27" colname="b22" align="center" />
+ <colspec colnum="28" colname="b21" align="center" />
+ <colspec colnum="29" colname="b20" align="center" />
+
+ <colspec colnum="31" colname="b37" align="center" />
+ <colspec colnum="32" colname="b36" align="center" />
+ <colspec colnum="33" colname="b35" align="center" />
+ <colspec colnum="34" colname="b34" align="center" />
+ <colspec colnum="35" colname="b33" align="center" />
+ <colspec colnum="36" colname="b32" align="center" />
+ <colspec colnum="37" colname="b31" align="center" />
+ <colspec colnum="38" colname="b30" align="center" />
+
+ <spanspec namest="b07" nameend="b00" spanname="b0" />
+ <spanspec namest="b17" nameend="b10" spanname="b1" />
+ <spanspec namest="b27" nameend="b20" spanname="b2" />
+ <spanspec namest="b37" nameend="b30" spanname="b3" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>&nbsp;</entry>
+ <entry spanname="b0">Byte&nbsp;0 in memory</entry>
+ <entry spanname="b1">Byte&nbsp;1</entry>
+ <entry spanname="b2">Byte&nbsp;2</entry>
+ <entry spanname="b3">Byte&nbsp;3</entry>
+ </row>
+ <row>
+ <entry>&nbsp;</entry>
+ <entry>&nbsp;</entry>
+ <entry>Bit</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ <entry>&nbsp;</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="V4L2-PIX-FMT-YUV444">
+ <entry><constant>V4L2_PIX_FMT_YUV444</constant></entry>
+ <entry>'Y444'</entry>
+ <entry></entry>
+ <entry>Cb<subscript>3</subscript></entry>
+ <entry>Cb<subscript>2</subscript></entry>
+ <entry>Cb<subscript>1</subscript></entry>
+ <entry>Cb<subscript>0</subscript></entry>
+ <entry>Cr<subscript>3</subscript></entry>
+ <entry>Cr<subscript>2</subscript></entry>
+ <entry>Cr<subscript>1</subscript></entry>
+ <entry>Cr<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry>Y'<subscript>3</subscript></entry>
+ <entry>Y'<subscript>2</subscript></entry>
+ <entry>Y'<subscript>1</subscript></entry>
+ <entry>Y'<subscript>0</subscript></entry>
+ </row>
+
+ <row id="V4L2-PIX-FMT-YUV555">
+ <entry><constant>V4L2_PIX_FMT_YUV555</constant></entry>
+ <entry>'YUVO'</entry>
+ <entry></entry>
+ <entry>Cb<subscript>2</subscript></entry>
+ <entry>Cb<subscript>1</subscript></entry>
+ <entry>Cb<subscript>0</subscript></entry>
+ <entry>Cr<subscript>4</subscript></entry>
+ <entry>Cr<subscript>3</subscript></entry>
+ <entry>Cr<subscript>2</subscript></entry>
+ <entry>Cr<subscript>1</subscript></entry>
+ <entry>Cr<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>a</entry>
+ <entry>Y'<subscript>4</subscript></entry>
+ <entry>Y'<subscript>3</subscript></entry>
+ <entry>Y'<subscript>2</subscript></entry>
+ <entry>Y'<subscript>1</subscript></entry>
+ <entry>Y'<subscript>0</subscript></entry>
+ <entry>Cb<subscript>4</subscript></entry>
+ <entry>Cb<subscript>3</subscript></entry>
+ </row>
+
+ <row id="V4L2-PIX-FMT-YUV565">
+ <entry><constant>V4L2_PIX_FMT_YUV565</constant></entry>
+ <entry>'YUVP'</entry>
+ <entry></entry>
+ <entry>Cb<subscript>2</subscript></entry>
+ <entry>Cb<subscript>1</subscript></entry>
+ <entry>Cb<subscript>0</subscript></entry>
+ <entry>Cr<subscript>4</subscript></entry>
+ <entry>Cr<subscript>3</subscript></entry>
+ <entry>Cr<subscript>2</subscript></entry>
+ <entry>Cr<subscript>1</subscript></entry>
+ <entry>Cr<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>Y'<subscript>4</subscript></entry>
+ <entry>Y'<subscript>3</subscript></entry>
+ <entry>Y'<subscript>2</subscript></entry>
+ <entry>Y'<subscript>1</subscript></entry>
+ <entry>Y'<subscript>0</subscript></entry>
+ <entry>Cb<subscript>5</subscript></entry>
+ <entry>Cb<subscript>4</subscript></entry>
+ <entry>Cb<subscript>3</subscript></entry>
+ </row>
+
+ <row id="V4L2-PIX-FMT-YUV32">
+ <entry><constant>V4L2_PIX_FMT_YUV32</constant></entry>
+ <entry>'YUV4'</entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>Y'<subscript>7</subscript></entry>
+ <entry>Y'<subscript>6</subscript></entry>
+ <entry>Y'<subscript>5</subscript></entry>
+ <entry>Y'<subscript>4</subscript></entry>
+ <entry>Y'<subscript>3</subscript></entry>
+ <entry>Y'<subscript>2</subscript></entry>
+ <entry>Y'<subscript>1</subscript></entry>
+ <entry>Y'<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>Cb<subscript>7</subscript></entry>
+ <entry>Cb<subscript>6</subscript></entry>
+ <entry>Cb<subscript>5</subscript></entry>
+ <entry>Cb<subscript>4</subscript></entry>
+ <entry>Cb<subscript>3</subscript></entry>
+ <entry>Cb<subscript>2</subscript></entry>
+ <entry>Cb<subscript>1</subscript></entry>
+ <entry>Cb<subscript>0</subscript></entry>
+ <entry></entry>
+ <entry>Cr<subscript>7</subscript></entry>
+ <entry>Cr<subscript>6</subscript></entry>
+ <entry>Cr<subscript>5</subscript></entry>
+ <entry>Cr<subscript>4</subscript></entry>
+ <entry>Cr<subscript>3</subscript></entry>
+ <entry>Cr<subscript>2</subscript></entry>
+ <entry>Cr<subscript>1</subscript></entry>
+ <entry>Cr<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>Bit 7 is the most significant bit. The value of a = alpha
+bits is undefined when reading from the driver, ignored when writing
+to the driver, except when alpha blending has been negotiated for a
+<link linkend="overlay">Video Overlay</link> or <link
+linkend="osd">Video Output Overlay</link>.</para>
+
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sbggr16.xml b/Documentation/DocBook/media/v4l/pixfmt-sbggr16.xml
new file mode 100644
index 000000000..6494b05d8
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sbggr16.xml
@@ -0,0 +1,83 @@
+<refentry id="V4L2-PIX-FMT-SBGGR16">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SBGGR16 ('BYR2')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_SBGGR16</constant></refname>
+ <refpurpose>Bayer RGB format</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This format is similar to <link
+linkend="V4L2-PIX-FMT-SBGGR8">
+<constant>V4L2_PIX_FMT_SBGGR8</constant></link>, except each pixel has
+a depth of 16 bits. The least significant byte is stored at lower
+memory addresses (little-endian). Note the actual sampling precision
+may be lower than 16 bits, for example 10 bits per pixel with values
+in range 0 to 1023.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SBGGR16</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>B<subscript>00low</subscript></entry>
+ <entry>B<subscript>00high</subscript></entry>
+ <entry>G<subscript>01low</subscript></entry>
+ <entry>G<subscript>01high</subscript></entry>
+ <entry>B<subscript>02low</subscript></entry>
+ <entry>B<subscript>02high</subscript></entry>
+ <entry>G<subscript>03low</subscript></entry>
+ <entry>G<subscript>03high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>G<subscript>10low</subscript></entry>
+ <entry>G<subscript>10high</subscript></entry>
+ <entry>R<subscript>11low</subscript></entry>
+ <entry>R<subscript>11high</subscript></entry>
+ <entry>G<subscript>12low</subscript></entry>
+ <entry>G<subscript>12high</subscript></entry>
+ <entry>R<subscript>13low</subscript></entry>
+ <entry>R<subscript>13high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>B<subscript>20low</subscript></entry>
+ <entry>B<subscript>20high</subscript></entry>
+ <entry>G<subscript>21low</subscript></entry>
+ <entry>G<subscript>21high</subscript></entry>
+ <entry>B<subscript>22low</subscript></entry>
+ <entry>B<subscript>22high</subscript></entry>
+ <entry>G<subscript>23low</subscript></entry>
+ <entry>G<subscript>23high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>G<subscript>30low</subscript></entry>
+ <entry>G<subscript>30high</subscript></entry>
+ <entry>R<subscript>31low</subscript></entry>
+ <entry>R<subscript>31high</subscript></entry>
+ <entry>G<subscript>32low</subscript></entry>
+ <entry>G<subscript>32high</subscript></entry>
+ <entry>R<subscript>33low</subscript></entry>
+ <entry>R<subscript>33high</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sbggr8.xml b/Documentation/DocBook/media/v4l/pixfmt-sbggr8.xml
new file mode 100644
index 000000000..5eaf2b42d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sbggr8.xml
@@ -0,0 +1,67 @@
+ <refentry id="V4L2-PIX-FMT-SBGGR8">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SBGGR8 ('BA81')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_SBGGR8</constant></refname>
+ <refpurpose>Bayer RGB format</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is commonly the native format of digital cameras,
+reflecting the arrangement of sensors on the CCD device. Only one red,
+green or blue value is given for each pixel. Missing components must
+be interpolated from neighbouring pixels. From left to right the first
+row consists of a blue and green value, the second row of a green and
+red value. This scheme repeats to the right and down for every two
+columns and rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SBGGR8</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>B<subscript>00</subscript></entry>
+ <entry>G<subscript>01</subscript></entry>
+ <entry>B<subscript>02</subscript></entry>
+ <entry>G<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>G<subscript>10</subscript></entry>
+ <entry>R<subscript>11</subscript></entry>
+ <entry>G<subscript>12</subscript></entry>
+ <entry>R<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>B<subscript>20</subscript></entry>
+ <entry>G<subscript>21</subscript></entry>
+ <entry>B<subscript>22</subscript></entry>
+ <entry>G<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>G<subscript>30</subscript></entry>
+ <entry>R<subscript>31</subscript></entry>
+ <entry>G<subscript>32</subscript></entry>
+ <entry>R<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sdr-cs08.xml b/Documentation/DocBook/media/v4l/pixfmt-sdr-cs08.xml
new file mode 100644
index 000000000..6118d8f7a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sdr-cs08.xml
@@ -0,0 +1,44 @@
+<refentry id="V4L2-SDR-FMT-CS08">
+ <refmeta>
+ <refentrytitle>V4L2_SDR_FMT_CS8 ('CS08')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>
+ <constant>V4L2_SDR_FMT_CS8</constant>
+ </refname>
+ <refpurpose>Complex signed 8-bit IQ sample</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>
+This format contains sequence of complex number samples. Each complex number
+consist two parts, called In-phase and Quadrature (IQ). Both I and Q are
+represented as a 8 bit signed number. I value comes first and Q value after
+that.
+ </para>
+ <example>
+ <title><constant>V4L2_SDR_FMT_CS8</constant> 1 sample</title>
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="2" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>I'<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;1:</entry>
+ <entry>Q'<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sdr-cs14le.xml b/Documentation/DocBook/media/v4l/pixfmt-sdr-cs14le.xml
new file mode 100644
index 000000000..e4b494ce1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sdr-cs14le.xml
@@ -0,0 +1,47 @@
+<refentry id="V4L2-SDR-FMT-CS14LE">
+ <refmeta>
+ <refentrytitle>V4L2_SDR_FMT_CS14LE ('CS14')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>
+ <constant>V4L2_SDR_FMT_CS14LE</constant>
+ </refname>
+ <refpurpose>Complex signed 14-bit little endian IQ sample</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>
+This format contains sequence of complex number samples. Each complex number
+consist two parts, called In-phase and Quadrature (IQ). Both I and Q are
+represented as a 14 bit signed little endian number. I value comes first
+and Q value after that. 14 bit value is stored in 16 bit space with unused
+high bits padded with 0.
+ </para>
+ <example>
+ <title><constant>V4L2_SDR_FMT_CS14LE</constant> 1 sample</title>
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="3" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>I'<subscript>0[7:0]</subscript></entry>
+ <entry>I'<subscript>0[13:8]</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;2:</entry>
+ <entry>Q'<subscript>0[7:0]</subscript></entry>
+ <entry>Q'<subscript>0[13:8]</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sdr-cu08.xml b/Documentation/DocBook/media/v4l/pixfmt-sdr-cu08.xml
new file mode 100644
index 000000000..2d80104c1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sdr-cu08.xml
@@ -0,0 +1,44 @@
+<refentry id="V4L2-SDR-FMT-CU08">
+ <refmeta>
+ <refentrytitle>V4L2_SDR_FMT_CU8 ('CU08')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>
+ <constant>V4L2_SDR_FMT_CU8</constant>
+ </refname>
+ <refpurpose>Complex unsigned 8-bit IQ sample</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>
+This format contains sequence of complex number samples. Each complex number
+consist two parts, called In-phase and Quadrature (IQ). Both I and Q are
+represented as a 8 bit unsigned number. I value comes first and Q value after
+that.
+ </para>
+ <example>
+ <title><constant>V4L2_SDR_FMT_CU8</constant> 1 sample</title>
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="2" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>I'<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;1:</entry>
+ <entry>Q'<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sdr-cu16le.xml b/Documentation/DocBook/media/v4l/pixfmt-sdr-cu16le.xml
new file mode 100644
index 000000000..26288ffa9
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sdr-cu16le.xml
@@ -0,0 +1,46 @@
+<refentry id="V4L2-SDR-FMT-CU16LE">
+ <refmeta>
+ <refentrytitle>V4L2_SDR_FMT_CU16LE ('CU16')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>
+ <constant>V4L2_SDR_FMT_CU16LE</constant>
+ </refname>
+ <refpurpose>Complex unsigned 16-bit little endian IQ sample</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>
+This format contains sequence of complex number samples. Each complex number
+consist two parts, called In-phase and Quadrature (IQ). Both I and Q are
+represented as a 16 bit unsigned little endian number. I value comes first
+and Q value after that.
+ </para>
+ <example>
+ <title><constant>V4L2_SDR_FMT_CU16LE</constant> 1 sample</title>
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="3" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>I'<subscript>0[7:0]</subscript></entry>
+ <entry>I'<subscript>0[15:8]</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;2:</entry>
+ <entry>Q'<subscript>0[7:0]</subscript></entry>
+ <entry>Q'<subscript>0[15:8]</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sdr-ru12le.xml b/Documentation/DocBook/media/v4l/pixfmt-sdr-ru12le.xml
new file mode 100644
index 000000000..3df076b99
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sdr-ru12le.xml
@@ -0,0 +1,40 @@
+<refentry id="V4L2-SDR-FMT-RU12LE">
+ <refmeta>
+ <refentrytitle>V4L2_SDR_FMT_RU12LE ('RU12')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname>
+ <constant>V4L2_SDR_FMT_RU12LE</constant>
+ </refname>
+ <refpurpose>Real unsigned 12-bit little endian sample</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>
+This format contains sequence of real number samples. Each sample is
+represented as a 12 bit unsigned little endian number. Sample is stored
+in 16 bit space with unused high bits padded with 0.
+ </para>
+ <example>
+ <title><constant>V4L2_SDR_FMT_RU12LE</constant> 1 sample</title>
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="3" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>I'<subscript>0[7:0]</subscript></entry>
+ <entry>I'<subscript>0[11:8]</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sgbrg8.xml b/Documentation/DocBook/media/v4l/pixfmt-sgbrg8.xml
new file mode 100644
index 000000000..fee65dca7
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sgbrg8.xml
@@ -0,0 +1,67 @@
+ <refentry id="V4L2-PIX-FMT-SGBRG8">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SGBRG8 ('GBRG')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_SGBRG8</constant></refname>
+ <refpurpose>Bayer RGB format</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is commonly the native format of digital cameras,
+reflecting the arrangement of sensors on the CCD device. Only one red,
+green or blue value is given for each pixel. Missing components must
+be interpolated from neighbouring pixels. From left to right the first
+row consists of a green and blue value, the second row of a red and
+green value. This scheme repeats to the right and down for every two
+columns and rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SGBRG8</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>G<subscript>00</subscript></entry>
+ <entry>B<subscript>01</subscript></entry>
+ <entry>G<subscript>02</subscript></entry>
+ <entry>B<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>R<subscript>10</subscript></entry>
+ <entry>G<subscript>11</subscript></entry>
+ <entry>R<subscript>12</subscript></entry>
+ <entry>G<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>G<subscript>20</subscript></entry>
+ <entry>B<subscript>21</subscript></entry>
+ <entry>G<subscript>22</subscript></entry>
+ <entry>B<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>R<subscript>30</subscript></entry>
+ <entry>G<subscript>31</subscript></entry>
+ <entry>R<subscript>32</subscript></entry>
+ <entry>G<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-sgrbg8.xml b/Documentation/DocBook/media/v4l/pixfmt-sgrbg8.xml
new file mode 100644
index 000000000..7803b8c41
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-sgrbg8.xml
@@ -0,0 +1,67 @@
+ <refentry id="V4L2-PIX-FMT-SGRBG8">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SGRBG8 ('GRBG')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_SGRBG8</constant></refname>
+ <refpurpose>Bayer RGB format</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is commonly the native format of digital cameras,
+reflecting the arrangement of sensors on the CCD device. Only one red,
+green or blue value is given for each pixel. Missing components must
+be interpolated from neighbouring pixels. From left to right the first
+row consists of a green and blue value, the second row of a red and
+green value. This scheme repeats to the right and down for every two
+columns and rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SGRBG8</constant> 4 &times;
+4 pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>G<subscript>00</subscript></entry>
+ <entry>R<subscript>01</subscript></entry>
+ <entry>G<subscript>02</subscript></entry>
+ <entry>R<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>B<subscript>10</subscript></entry>
+ <entry>G<subscript>11</subscript></entry>
+ <entry>B<subscript>12</subscript></entry>
+ <entry>G<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>G<subscript>20</subscript></entry>
+ <entry>R<subscript>21</subscript></entry>
+ <entry>G<subscript>22</subscript></entry>
+ <entry>R<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>B<subscript>30</subscript></entry>
+ <entry>G<subscript>31</subscript></entry>
+ <entry>B<subscript>32</subscript></entry>
+ <entry>G<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10.xml
new file mode 100644
index 000000000..f34d03ebd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10.xml
@@ -0,0 +1,90 @@
+ <refentry id="pixfmt-srggb10">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SRGGB10 ('RG10'),
+ V4L2_PIX_FMT_SGRBG10 ('BA10'),
+ V4L2_PIX_FMT_SGBRG10 ('GB10'),
+ V4L2_PIX_FMT_SBGGR10 ('BG10'),
+ </refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-SRGGB10"><constant>V4L2_PIX_FMT_SRGGB10</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGRBG10"><constant>V4L2_PIX_FMT_SGRBG10</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGBRG10"><constant>V4L2_PIX_FMT_SGBRG10</constant></refname>
+ <refname id="V4L2-PIX-FMT-SBGGR10"><constant>V4L2_PIX_FMT_SBGGR10</constant></refname>
+ <refpurpose>10-bit Bayer formats expanded to 16 bits</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These four pixel formats are raw sRGB / Bayer formats with
+10 bits per colour. Each colour component is stored in a 16-bit word, with 6
+unused high bits filled with zeros. Each n-pixel row contains n/2 green samples
+and n/2 blue or red samples, with alternating red and blue rows. Bytes are
+stored in memory in little endian order. They are conventionally described
+as GRGR... BGBG..., RGRG... GBGB..., etc. Below is an example of one of these
+formats</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SBGGR10</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte, high 6 bits in high bytes are 0.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>B<subscript>00low</subscript></entry>
+ <entry>B<subscript>00high</subscript></entry>
+ <entry>G<subscript>01low</subscript></entry>
+ <entry>G<subscript>01high</subscript></entry>
+ <entry>B<subscript>02low</subscript></entry>
+ <entry>B<subscript>02high</subscript></entry>
+ <entry>G<subscript>03low</subscript></entry>
+ <entry>G<subscript>03high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>G<subscript>10low</subscript></entry>
+ <entry>G<subscript>10high</subscript></entry>
+ <entry>R<subscript>11low</subscript></entry>
+ <entry>R<subscript>11high</subscript></entry>
+ <entry>G<subscript>12low</subscript></entry>
+ <entry>G<subscript>12high</subscript></entry>
+ <entry>R<subscript>13low</subscript></entry>
+ <entry>R<subscript>13high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>B<subscript>20low</subscript></entry>
+ <entry>B<subscript>20high</subscript></entry>
+ <entry>G<subscript>21low</subscript></entry>
+ <entry>G<subscript>21high</subscript></entry>
+ <entry>B<subscript>22low</subscript></entry>
+ <entry>B<subscript>22high</subscript></entry>
+ <entry>G<subscript>23low</subscript></entry>
+ <entry>G<subscript>23high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>G<subscript>30low</subscript></entry>
+ <entry>G<subscript>30high</subscript></entry>
+ <entry>R<subscript>31low</subscript></entry>
+ <entry>R<subscript>31high</subscript></entry>
+ <entry>G<subscript>32low</subscript></entry>
+ <entry>G<subscript>32high</subscript></entry>
+ <entry>R<subscript>33low</subscript></entry>
+ <entry>R<subscript>33high</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml
new file mode 100644
index 000000000..d2e5845e5
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10alaw8.xml
@@ -0,0 +1,34 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>
+ V4L2_PIX_FMT_SBGGR10ALAW8 ('aBA8'),
+ V4L2_PIX_FMT_SGBRG10ALAW8 ('aGA8'),
+ V4L2_PIX_FMT_SGRBG10ALAW8 ('agA8'),
+ V4L2_PIX_FMT_SRGGB10ALAW8 ('aRA8'),
+ </refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-SBGGR10ALAW8">
+ <constant>V4L2_PIX_FMT_SBGGR10ALAW8</constant>
+ </refname>
+ <refname id="V4L2-PIX-FMT-SGBRG10ALAW8">
+ <constant>V4L2_PIX_FMT_SGBRG10ALAW8</constant>
+ </refname>
+ <refname id="V4L2-PIX-FMT-SGRBG10ALAW8">
+ <constant>V4L2_PIX_FMT_SGRBG10ALAW8</constant>
+ </refname>
+ <refname id="V4L2-PIX-FMT-SRGGB10ALAW8">
+ <constant>V4L2_PIX_FMT_SRGGB10ALAW8</constant>
+ </refname>
+ <refpurpose>10-bit Bayer formats compressed to 8 bits</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>These four pixel formats are raw sRGB / Bayer
+ formats with 10 bits per color compressed to 8 bits each,
+ using the A-LAW algorithm. Each color component consumes 8
+ bits of memory. In other respects this format is similar to
+ <xref linkend="V4L2-PIX-FMT-SRGGB8"></xref>.</para>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10dpcm8.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10dpcm8.xml
new file mode 100644
index 000000000..bde89878c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10dpcm8.xml
@@ -0,0 +1,28 @@
+ <refentry id="pixfmt-srggb10dpcm8">
+ <refmeta>
+ <refentrytitle>
+ V4L2_PIX_FMT_SBGGR10DPCM8 ('bBA8'),
+ V4L2_PIX_FMT_SGBRG10DPCM8 ('bGA8'),
+ V4L2_PIX_FMT_SGRBG10DPCM8 ('BD10'),
+ V4L2_PIX_FMT_SRGGB10DPCM8 ('bRA8'),
+ </refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-SBGGR10DPCM8"><constant>V4L2_PIX_FMT_SBGGR10DPCM8</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGBRG10DPCM8"><constant>V4L2_PIX_FMT_SGBRG10DPCM8</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGRBG10DPCM8"><constant>V4L2_PIX_FMT_SGRBG10DPCM8</constant></refname>
+ <refname id="V4L2-PIX-FMT-SRGGB10DPCM8"><constant>V4L2_PIX_FMT_SRGGB10DPCM8</constant></refname>
+ <refpurpose>10-bit Bayer formats compressed to 8 bits</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These four pixel formats are raw sRGB / Bayer formats
+ with 10 bits per colour compressed to 8 bits each, using DPCM
+ compression. DPCM, differential pulse-code modulation, is lossy.
+ Each colour component consumes 8 bits of memory. In other respects
+ this format is similar to <xref linkend="pixfmt-srggb10" />.</para>
+
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml
new file mode 100644
index 000000000..a8cc102cd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb10p.xml
@@ -0,0 +1,99 @@
+ <refentry id="pixfmt-srggb10p">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SRGGB10P ('pRAA'),
+ V4L2_PIX_FMT_SGRBG10P ('pgAA'),
+ V4L2_PIX_FMT_SGBRG10P ('pGAA'),
+ V4L2_PIX_FMT_SBGGR10P ('pBAA'),
+ </refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-SRGGB10P"><constant>V4L2_PIX_FMT_SRGGB10P</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGRBG10P"><constant>V4L2_PIX_FMT_SGRBG10P</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGBRG10P"><constant>V4L2_PIX_FMT_SGBRG10P</constant></refname>
+ <refname id="V4L2-PIX-FMT-SBGGR10P"><constant>V4L2_PIX_FMT_SBGGR10P</constant></refname>
+ <refpurpose>10-bit packed Bayer formats</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These four pixel formats are packed raw sRGB /
+ Bayer formats with 10 bits per colour. Every four consecutive
+ colour components are packed into 5 bytes. Each of the first 4
+ bytes contain the 8 high order bits of the pixels, and the
+ fifth byte contains the two least significants bits of each
+ pixel, in the same order.</para>
+
+ <para>Each n-pixel row contains n/2 green samples and n/2 blue
+ or red samples, with alternating green-red and green-blue
+ rows. They are conventionally described as GRGR... BGBG...,
+ RGRG... GBGB..., etc. Below is an example of one of these
+ formats:</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SBGGR10P</constant> 4 &times; 4
+ pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="topbot" colsep="1" rowsep="1">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>B<subscript>00high</subscript></entry>
+ <entry>G<subscript>01high</subscript></entry>
+ <entry>B<subscript>02high</subscript></entry>
+ <entry>G<subscript>03high</subscript></entry>
+ <entry>B<subscript>00low</subscript>(bits 7--6)
+ G<subscript>01low</subscript>(bits 5--4)
+ B<subscript>02low</subscript>(bits 3--2)
+ G<subscript>03low</subscript>(bits 1--0)
+ </entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;5:</entry>
+ <entry>G<subscript>10high</subscript></entry>
+ <entry>R<subscript>11high</subscript></entry>
+ <entry>G<subscript>12high</subscript></entry>
+ <entry>R<subscript>13high</subscript></entry>
+ <entry>G<subscript>10low</subscript>(bits 7--6)
+ R<subscript>11low</subscript>(bits 5--4)
+ G<subscript>12low</subscript>(bits 3--2)
+ R<subscript>13low</subscript>(bits 1--0)
+ </entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;10:</entry>
+ <entry>B<subscript>20high</subscript></entry>
+ <entry>G<subscript>21high</subscript></entry>
+ <entry>B<subscript>22high</subscript></entry>
+ <entry>G<subscript>23high</subscript></entry>
+ <entry>B<subscript>20low</subscript>(bits 7--6)
+ G<subscript>21low</subscript>(bits 5--4)
+ B<subscript>22low</subscript>(bits 3--2)
+ G<subscript>23low</subscript>(bits 1--0)
+ </entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;15:</entry>
+ <entry>G<subscript>30high</subscript></entry>
+ <entry>R<subscript>31high</subscript></entry>
+ <entry>G<subscript>32high</subscript></entry>
+ <entry>R<subscript>33high</subscript></entry>
+ <entry>G<subscript>30low</subscript>(bits 7--6)
+ R<subscript>31low</subscript>(bits 5--4)
+ G<subscript>32low</subscript>(bits 3--2)
+ R<subscript>33low</subscript>(bits 1--0)
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb12.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb12.xml
new file mode 100644
index 000000000..0c8e4adf4
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb12.xml
@@ -0,0 +1,90 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SRGGB12 ('RG12'),
+ V4L2_PIX_FMT_SGRBG12 ('BA12'),
+ V4L2_PIX_FMT_SGBRG12 ('GB12'),
+ V4L2_PIX_FMT_SBGGR12 ('BG12'),
+ </refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-SRGGB12"><constant>V4L2_PIX_FMT_SRGGB12</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGRBG12"><constant>V4L2_PIX_FMT_SGRBG12</constant></refname>
+ <refname id="V4L2-PIX-FMT-SGBRG12"><constant>V4L2_PIX_FMT_SGBRG12</constant></refname>
+ <refname id="V4L2-PIX-FMT-SBGGR12"><constant>V4L2_PIX_FMT_SBGGR12</constant></refname>
+ <refpurpose>12-bit Bayer formats expanded to 16 bits</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These four pixel formats are raw sRGB / Bayer formats with
+12 bits per colour. Each colour component is stored in a 16-bit word, with 4
+unused high bits filled with zeros. Each n-pixel row contains n/2 green samples
+and n/2 blue or red samples, with alternating red and blue rows. Bytes are
+stored in memory in little endian order. They are conventionally described
+as GRGR... BGBG..., RGRG... GBGB..., etc. Below is an example of one of these
+formats</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SBGGR12</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte, high 6 bits in high bytes are 0.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>B<subscript>00low</subscript></entry>
+ <entry>B<subscript>00high</subscript></entry>
+ <entry>G<subscript>01low</subscript></entry>
+ <entry>G<subscript>01high</subscript></entry>
+ <entry>B<subscript>02low</subscript></entry>
+ <entry>B<subscript>02high</subscript></entry>
+ <entry>G<subscript>03low</subscript></entry>
+ <entry>G<subscript>03high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>G<subscript>10low</subscript></entry>
+ <entry>G<subscript>10high</subscript></entry>
+ <entry>R<subscript>11low</subscript></entry>
+ <entry>R<subscript>11high</subscript></entry>
+ <entry>G<subscript>12low</subscript></entry>
+ <entry>G<subscript>12high</subscript></entry>
+ <entry>R<subscript>13low</subscript></entry>
+ <entry>R<subscript>13high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>B<subscript>20low</subscript></entry>
+ <entry>B<subscript>20high</subscript></entry>
+ <entry>G<subscript>21low</subscript></entry>
+ <entry>G<subscript>21high</subscript></entry>
+ <entry>B<subscript>22low</subscript></entry>
+ <entry>B<subscript>22high</subscript></entry>
+ <entry>G<subscript>23low</subscript></entry>
+ <entry>G<subscript>23high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>G<subscript>30low</subscript></entry>
+ <entry>G<subscript>30high</subscript></entry>
+ <entry>R<subscript>31low</subscript></entry>
+ <entry>R<subscript>31high</subscript></entry>
+ <entry>G<subscript>32low</subscript></entry>
+ <entry>G<subscript>32high</subscript></entry>
+ <entry>R<subscript>33low</subscript></entry>
+ <entry>R<subscript>33high</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-srggb8.xml b/Documentation/DocBook/media/v4l/pixfmt-srggb8.xml
new file mode 100644
index 000000000..2570e3be3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-srggb8.xml
@@ -0,0 +1,67 @@
+ <refentry id="V4L2-PIX-FMT-SRGGB8">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_SRGGB8 ('RGGB')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_SRGGB8</constant></refname>
+ <refpurpose>Bayer RGB format</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is commonly the native format of digital cameras,
+reflecting the arrangement of sensors on the CCD device. Only one red,
+green or blue value is given for each pixel. Missing components must
+be interpolated from neighbouring pixels. From left to right the first
+row consists of a red and green value, the second row of a green and
+blue value. This scheme repeats to the right and down for every two
+columns and rows.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_SRGGB8</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>R<subscript>00</subscript></entry>
+ <entry>G<subscript>01</subscript></entry>
+ <entry>R<subscript>02</subscript></entry>
+ <entry>G<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>G<subscript>10</subscript></entry>
+ <entry>B<subscript>11</subscript></entry>
+ <entry>G<subscript>12</subscript></entry>
+ <entry>B<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>R<subscript>20</subscript></entry>
+ <entry>G<subscript>21</subscript></entry>
+ <entry>R<subscript>22</subscript></entry>
+ <entry>G<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>G<subscript>30</subscript></entry>
+ <entry>B<subscript>31</subscript></entry>
+ <entry>G<subscript>32</subscript></entry>
+ <entry>B<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-uv8.xml b/Documentation/DocBook/media/v4l/pixfmt-uv8.xml
new file mode 100644
index 000000000..c507c1f73
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-uv8.xml
@@ -0,0 +1,62 @@
+ <refentry id="V4L2-PIX-FMT-UV8">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_UV8 ('UV8')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_UV8</constant></refname>
+ <refpurpose>UV plane interleaved</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+ <para>In this format there is no Y plane, Only CbCr plane. ie
+ (UV interleaved)</para>
+ <example>
+ <title>
+ <constant>V4L2_PIX_FMT_UV8</constant>
+ pixel image
+ </title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-uyvy.xml b/Documentation/DocBook/media/v4l/pixfmt-uyvy.xml
new file mode 100644
index 000000000..b1f6801a1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-uyvy.xml
@@ -0,0 +1,120 @@
+ <refentry id="V4L2-PIX-FMT-UYVY">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_UYVY ('UYVY')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_UYVY</constant></refname>
+ <refpurpose>Variation of
+<constant>V4L2_PIX_FMT_YUYV</constant> with different order of samples
+in memory</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>In this format each four bytes is two pixels. Each four
+bytes is two Y's, a Cb and a Cr. Each Y goes to one of the pixels, and
+the Cb and Cr belong to both pixels. As you can see, the Cr and Cb
+components have half the horizontal resolution of the Y
+component.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_UYVY</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-vyuy.xml b/Documentation/DocBook/media/v4l/pixfmt-vyuy.xml
new file mode 100644
index 000000000..82803408b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-vyuy.xml
@@ -0,0 +1,120 @@
+ <refentry id="V4L2-PIX-FMT-VYUY">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_VYUY ('VYUY')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_VYUY</constant></refname>
+ <refpurpose>Variation of
+<constant>V4L2_PIX_FMT_YUYV</constant> with different order of samples
+in memory</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>In this format each four bytes is two pixels. Each four
+bytes is two Y's, a Cb and a Cr. Each Y goes to one of the pixels, and
+the Cb and Cr belong to both pixels. As you can see, the Cr and Cb
+components have half the horizontal resolution of the Y
+component.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_VYUY</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-y10.xml b/Documentation/DocBook/media/v4l/pixfmt-y10.xml
new file mode 100644
index 000000000..d065043db
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-y10.xml
@@ -0,0 +1,79 @@
+<refentry id="V4L2-PIX-FMT-Y10">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_Y10 ('Y10 ')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_Y10</constant></refname>
+ <refpurpose>Grey-scale image</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a grey-scale image with a depth of 10 bits per pixel. Pixels
+are stored in 16-bit words with unused high bits padded with 0. The least
+significant byte is stored at lower memory addresses (little-endian).</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_Y10</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00low</subscript></entry>
+ <entry>Y'<subscript>00high</subscript></entry>
+ <entry>Y'<subscript>01low</subscript></entry>
+ <entry>Y'<subscript>01high</subscript></entry>
+ <entry>Y'<subscript>02low</subscript></entry>
+ <entry>Y'<subscript>02high</subscript></entry>
+ <entry>Y'<subscript>03low</subscript></entry>
+ <entry>Y'<subscript>03high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>10low</subscript></entry>
+ <entry>Y'<subscript>10high</subscript></entry>
+ <entry>Y'<subscript>11low</subscript></entry>
+ <entry>Y'<subscript>11high</subscript></entry>
+ <entry>Y'<subscript>12low</subscript></entry>
+ <entry>Y'<subscript>12high</subscript></entry>
+ <entry>Y'<subscript>13low</subscript></entry>
+ <entry>Y'<subscript>13high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Y'<subscript>20low</subscript></entry>
+ <entry>Y'<subscript>20high</subscript></entry>
+ <entry>Y'<subscript>21low</subscript></entry>
+ <entry>Y'<subscript>21high</subscript></entry>
+ <entry>Y'<subscript>22low</subscript></entry>
+ <entry>Y'<subscript>22high</subscript></entry>
+ <entry>Y'<subscript>23low</subscript></entry>
+ <entry>Y'<subscript>23high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Y'<subscript>30low</subscript></entry>
+ <entry>Y'<subscript>30high</subscript></entry>
+ <entry>Y'<subscript>31low</subscript></entry>
+ <entry>Y'<subscript>31high</subscript></entry>
+ <entry>Y'<subscript>32low</subscript></entry>
+ <entry>Y'<subscript>32high</subscript></entry>
+ <entry>Y'<subscript>33low</subscript></entry>
+ <entry>Y'<subscript>33high</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-y10b.xml b/Documentation/DocBook/media/v4l/pixfmt-y10b.xml
new file mode 100644
index 000000000..adb0ad808
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-y10b.xml
@@ -0,0 +1,43 @@
+<refentry id="V4L2-PIX-FMT-Y10BPACK">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_Y10BPACK ('Y10B')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_Y10BPACK</constant></refname>
+ <refpurpose>Grey-scale image as a bit-packed array</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a packed grey-scale image format with a depth of 10 bits per
+ pixel. Pixels are stored in a bit-packed array of 10bit bits per pixel,
+ with no padding between them and with the most significant bits coming
+ first from the left.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_Y10BPACK</constant> 4 pixel data stream taking 5 bytes</title>
+
+ <formalpara>
+ <title>Bit-packed representation</title>
+ <para>pixels cross the byte boundary and have a ratio of 5 bytes for each 4
+ pixels.
+ <informaltable frame="all">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>Y'<subscript>00[9:2]</subscript></entry>
+ <entry>Y'<subscript>00[1:0]</subscript>Y'<subscript>01[9:4]</subscript></entry>
+ <entry>Y'<subscript>01[3:0]</subscript>Y'<subscript>02[9:6]</subscript></entry>
+ <entry>Y'<subscript>02[5:0]</subscript>Y'<subscript>03[9:8]</subscript></entry>
+ <entry>Y'<subscript>03[7:0]</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-y12.xml b/Documentation/DocBook/media/v4l/pixfmt-y12.xml
new file mode 100644
index 000000000..ff417b858
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-y12.xml
@@ -0,0 +1,79 @@
+<refentry id="V4L2-PIX-FMT-Y12">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_Y12 ('Y12 ')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_Y12</constant></refname>
+ <refpurpose>Grey-scale image</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a grey-scale image with a depth of 12 bits per pixel. Pixels
+are stored in 16-bit words with unused high bits padded with 0. The least
+significant byte is stored at lower memory addresses (little-endian).</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_Y12</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00low</subscript></entry>
+ <entry>Y'<subscript>00high</subscript></entry>
+ <entry>Y'<subscript>01low</subscript></entry>
+ <entry>Y'<subscript>01high</subscript></entry>
+ <entry>Y'<subscript>02low</subscript></entry>
+ <entry>Y'<subscript>02high</subscript></entry>
+ <entry>Y'<subscript>03low</subscript></entry>
+ <entry>Y'<subscript>03high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>10low</subscript></entry>
+ <entry>Y'<subscript>10high</subscript></entry>
+ <entry>Y'<subscript>11low</subscript></entry>
+ <entry>Y'<subscript>11high</subscript></entry>
+ <entry>Y'<subscript>12low</subscript></entry>
+ <entry>Y'<subscript>12high</subscript></entry>
+ <entry>Y'<subscript>13low</subscript></entry>
+ <entry>Y'<subscript>13high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Y'<subscript>20low</subscript></entry>
+ <entry>Y'<subscript>20high</subscript></entry>
+ <entry>Y'<subscript>21low</subscript></entry>
+ <entry>Y'<subscript>21high</subscript></entry>
+ <entry>Y'<subscript>22low</subscript></entry>
+ <entry>Y'<subscript>22high</subscript></entry>
+ <entry>Y'<subscript>23low</subscript></entry>
+ <entry>Y'<subscript>23high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Y'<subscript>30low</subscript></entry>
+ <entry>Y'<subscript>30high</subscript></entry>
+ <entry>Y'<subscript>31low</subscript></entry>
+ <entry>Y'<subscript>31high</subscript></entry>
+ <entry>Y'<subscript>32low</subscript></entry>
+ <entry>Y'<subscript>32high</subscript></entry>
+ <entry>Y'<subscript>33low</subscript></entry>
+ <entry>Y'<subscript>33high</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-y16.xml b/Documentation/DocBook/media/v4l/pixfmt-y16.xml
new file mode 100644
index 000000000..ff4f727d5
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-y16.xml
@@ -0,0 +1,81 @@
+<refentry id="V4L2-PIX-FMT-Y16">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_Y16 ('Y16 ')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_Y16</constant></refname>
+ <refpurpose>Grey-scale image</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a grey-scale image with a depth of 16 bits per
+pixel. The least significant byte is stored at lower memory addresses
+(little-endian). Note the actual sampling precision may be lower than
+16 bits, for example 10 bits per pixel with values in range 0 to
+1023.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_Y16</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00low</subscript></entry>
+ <entry>Y'<subscript>00high</subscript></entry>
+ <entry>Y'<subscript>01low</subscript></entry>
+ <entry>Y'<subscript>01high</subscript></entry>
+ <entry>Y'<subscript>02low</subscript></entry>
+ <entry>Y'<subscript>02high</subscript></entry>
+ <entry>Y'<subscript>03low</subscript></entry>
+ <entry>Y'<subscript>03high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>10low</subscript></entry>
+ <entry>Y'<subscript>10high</subscript></entry>
+ <entry>Y'<subscript>11low</subscript></entry>
+ <entry>Y'<subscript>11high</subscript></entry>
+ <entry>Y'<subscript>12low</subscript></entry>
+ <entry>Y'<subscript>12high</subscript></entry>
+ <entry>Y'<subscript>13low</subscript></entry>
+ <entry>Y'<subscript>13high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Y'<subscript>20low</subscript></entry>
+ <entry>Y'<subscript>20high</subscript></entry>
+ <entry>Y'<subscript>21low</subscript></entry>
+ <entry>Y'<subscript>21high</subscript></entry>
+ <entry>Y'<subscript>22low</subscript></entry>
+ <entry>Y'<subscript>22high</subscript></entry>
+ <entry>Y'<subscript>23low</subscript></entry>
+ <entry>Y'<subscript>23high</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Y'<subscript>30low</subscript></entry>
+ <entry>Y'<subscript>30high</subscript></entry>
+ <entry>Y'<subscript>31low</subscript></entry>
+ <entry>Y'<subscript>31high</subscript></entry>
+ <entry>Y'<subscript>32low</subscript></entry>
+ <entry>Y'<subscript>32high</subscript></entry>
+ <entry>Y'<subscript>33low</subscript></entry>
+ <entry>Y'<subscript>33high</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-y41p.xml b/Documentation/DocBook/media/v4l/pixfmt-y41p.xml
new file mode 100644
index 000000000..98dcb91d2
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-y41p.xml
@@ -0,0 +1,149 @@
+ <refentry id="V4L2-PIX-FMT-Y41P">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_Y41P ('Y41P')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_Y41P</constant></refname>
+ <refpurpose>Format with &frac14; horizontal chroma
+resolution, also known as YUV 4:1:1</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>In this format each 12 bytes is eight pixels. In the
+twelve bytes are two CbCr pairs and eight Y's. The first CbCr pair
+goes with the first four Y's, and the second CbCr pair goes with the
+other four Y's. The Cb and Cr components have one fourth the
+horizontal resolution of the Y component.</para>
+
+ <para>Do not confuse this format with <link
+linkend="V4L2-PIX-FMT-YUV411P"><constant>V4L2_PIX_FMT_YUV411P</constant></link>.
+Y41P is derived from "YUV 4:1:1 <emphasis>packed</emphasis>", while
+YUV411P stands for "YUV 4:1:1 <emphasis>planar</emphasis>".</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_Y41P</constant> 8 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="13" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ <entry>Y'<subscript>04</subscript></entry>
+ <entry>Y'<subscript>05</subscript></entry>
+ <entry>Y'<subscript>06</subscript></entry>
+ <entry>Y'<subscript>07</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ <entry>Y'<subscript>14</subscript></entry>
+ <entry>Y'<subscript>15</subscript></entry>
+ <entry>Y'<subscript>16</subscript></entry>
+ <entry>Y'<subscript>17</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ <entry>Y'<subscript>24</subscript></entry>
+ <entry>Y'<subscript>25</subscript></entry>
+ <entry>Y'<subscript>26</subscript></entry>
+ <entry>Y'<subscript>27</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;36:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ <entry>Y'<subscript>34</subscript></entry>
+ <entry>Y'<subscript>35</subscript></entry>
+ <entry>Y'<subscript>36</subscript></entry>
+ <entry>Y'<subscript>37</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable></para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="15" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry><entry></entry>
+ <entry>4</entry><entry></entry><entry>5</entry><entry></entry>
+ <entry>6</entry><entry></entry><entry>7</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yuv410.xml b/Documentation/DocBook/media/v4l/pixfmt-yuv410.xml
new file mode 100644
index 000000000..0869dce5f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yuv410.xml
@@ -0,0 +1,133 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YVU410 ('YVU9'), V4L2_PIX_FMT_YUV410 ('YUV9')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-YVU410"><constant>V4L2_PIX_FMT_YVU410</constant></refname>
+ <refname id="V4L2-PIX-FMT-YUV410"><constant>V4L2_PIX_FMT_YUV410</constant></refname>
+ <refpurpose>Planar formats with &frac14; horizontal and
+vertical chroma resolution, also known as YUV 4:1:0</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These are planar formats, as opposed to a packed format.
+The three components are separated into three sub-images or planes.
+The Y plane is first. The Y plane has one byte per pixel. For
+<constant>V4L2_PIX_FMT_YVU410</constant>, the Cr plane immediately
+follows the Y plane in memory. The Cr plane is &frac14; the width and
+&frac14; the height of the Y plane (and of the image). Each Cr belongs
+to 16 pixels, a four-by-four square of the image. Following the Cr
+plane is the Cb plane, just like the Cr plane.
+<constant>V4L2_PIX_FMT_YUV410</constant> is the same, except the Cb
+plane comes first, then the Cr plane.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the Cr
+and Cb planes have &frac14; as many pad bytes after their rows. In
+other words, four Cx rows (including padding) are exactly as long as
+one Y row (including padding).</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YVU410</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;17:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry></entry><entry></entry><entry>C</entry>
+ <entry></entry><entry></entry><entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yuv411p.xml b/Documentation/DocBook/media/v4l/pixfmt-yuv411p.xml
new file mode 100644
index 000000000..086dc731b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yuv411p.xml
@@ -0,0 +1,147 @@
+ <refentry id="V4L2-PIX-FMT-YUV411P">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YUV411P ('411P')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_YUV411P</constant></refname>
+ <refpurpose>Format with &frac14; horizontal chroma resolution,
+also known as YUV 4:1:1. Planar layout as opposed to
+<constant>V4L2_PIX_FMT_Y41P</constant></refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This format is not commonly used. This is a planar
+format similar to the 4:2:2 planar format except with half as many
+chroma. The three components are separated into three sub-images or
+planes. The Y plane is first. The Y plane has one byte per pixel. The
+Cb plane immediately follows the Y plane in memory. The Cb plane is
+&frac14; the width of the Y plane (and of the image). Each Cb belongs
+to 4 pixels all on the same row. For example,
+Cb<subscript>0</subscript> belongs to Y'<subscript>00</subscript>,
+Y'<subscript>01</subscript>, Y'<subscript>02</subscript> and
+Y'<subscript>03</subscript>. Following the Cb plane is the Cr plane,
+just like the Cb plane.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the Cr
+and Cb planes have &frac14; as many pad bytes after their rows. In
+other words, four C x rows (including padding) is exactly as long as
+one Y row (including padding).</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YUV411P</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;17:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;18:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;19:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;20:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;21:</entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;22:</entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;23:</entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry>C</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yuv420.xml b/Documentation/DocBook/media/v4l/pixfmt-yuv420.xml
new file mode 100644
index 000000000..48649fac1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yuv420.xml
@@ -0,0 +1,149 @@
+ <refentry>
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YVU420 ('YV12'), V4L2_PIX_FMT_YUV420 ('YU12')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname id="V4L2-PIX-FMT-YVU420"><constant>V4L2_PIX_FMT_YVU420</constant></refname>
+ <refname id="V4L2-PIX-FMT-YUV420"><constant>V4L2_PIX_FMT_YUV420</constant></refname>
+ <refpurpose>Planar formats with &frac12; horizontal and
+vertical chroma resolution, also known as YUV 4:2:0</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>These are planar formats, as opposed to a packed format.
+The three components are separated into three sub- images or planes.
+The Y plane is first. The Y plane has one byte per pixel. For
+<constant>V4L2_PIX_FMT_YVU420</constant>, the Cr plane immediately
+follows the Y plane in memory. The Cr plane is half the width and half
+the height of the Y plane (and of the image). Each Cr belongs to four
+pixels, a two-by-two square of the image. For example,
+Cr<subscript>0</subscript> belongs to Y'<subscript>00</subscript>,
+Y'<subscript>01</subscript>, Y'<subscript>10</subscript>, and
+Y'<subscript>11</subscript>. Following the Cr plane is the Cb plane,
+just like the Cr plane. <constant>V4L2_PIX_FMT_YUV420</constant> is
+the same except the Cb plane comes first, then the Cr plane.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the Cr
+and Cb planes have half as many pad bytes after their rows. In other
+words, two Cx rows (including padding) is exactly as long as one Y row
+(including padding).</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YVU420</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;18:</entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;20:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;22:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yuv420m.xml b/Documentation/DocBook/media/v4l/pixfmt-yuv420m.xml
new file mode 100644
index 000000000..e781cc617
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yuv420m.xml
@@ -0,0 +1,154 @@
+ <refentry id="V4L2-PIX-FMT-YUV420M">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YUV420M ('YM12')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname> <constant>V4L2_PIX_FMT_YUV420M</constant></refname>
+ <refpurpose>Variation of <constant>V4L2_PIX_FMT_YUV420</constant>
+ with planes non contiguous in memory. </refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a multi-planar format, as opposed to a packed format.
+The three components are separated into three sub- images or planes.
+
+The Y plane is first. The Y plane has one byte per pixel. The Cb data
+constitutes the second plane which is half the width and half
+the height of the Y plane (and of the image). Each Cb belongs to four
+pixels, a two-by-two square of the image. For example,
+Cb<subscript>0</subscript> belongs to Y'<subscript>00</subscript>,
+Y'<subscript>01</subscript>, Y'<subscript>10</subscript>, and
+Y'<subscript>11</subscript>. The Cr data, just like the Cb plane, is
+in the third plane. </para>
+
+ <para>If the Y plane has pad bytes after each row, then the Cb
+and Cr planes have half as many pad bytes after their rows. In other
+words, two Cx rows (including padding) is exactly as long as one Y row
+(including padding).</para>
+
+ <para><constant>V4L2_PIX_FMT_YUV420M</constant> is intended to be
+used only in drivers and applications that support the multi-planar API,
+described in <xref linkend="planar-apis"/>. </para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YUV420M</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start0&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;2:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry>start2&nbsp;+&nbsp;0:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start2&nbsp;+&nbsp;2:</entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yuv422p.xml b/Documentation/DocBook/media/v4l/pixfmt-yuv422p.xml
new file mode 100644
index 000000000..4ce6463fe
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yuv422p.xml
@@ -0,0 +1,153 @@
+ <refentry id="V4L2-PIX-FMT-YUV422P">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YUV422P ('422P')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_YUV422P</constant></refname>
+ <refpurpose>Format with &frac12; horizontal chroma resolution,
+also known as YUV 4:2:2. Planar layout as opposed to
+<constant>V4L2_PIX_FMT_YUYV</constant></refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>This format is not commonly used. This is a planar
+version of the YUYV format. The three components are separated into
+three sub-images or planes. The Y plane is first. The Y plane has one
+byte per pixel. The Cb plane immediately follows the Y plane in
+memory. The Cb plane is half the width of the Y plane (and of the
+image). Each Cb belongs to two pixels. For example,
+Cb<subscript>0</subscript> belongs to Y'<subscript>00</subscript>,
+Y'<subscript>01</subscript>. Following the Cb plane is the Cr plane,
+just like the Cb plane.</para>
+
+ <para>If the Y plane has pad bytes after each row, then the Cr
+and Cb planes have half as many pad bytes after their rows. In other
+words, two Cx rows (including padding) is exactly as long as one Y row
+(including padding).</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YUV422P</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;18:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;20:</entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;22:</entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;26:</entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;28:</entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;30:</entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yuyv.xml b/Documentation/DocBook/media/v4l/pixfmt-yuyv.xml
new file mode 100644
index 000000000..583840922
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yuyv.xml
@@ -0,0 +1,120 @@
+ <refentry id="V4L2-PIX-FMT-YUYV">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YUYV ('YUYV')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_YUYV</constant></refname>
+ <refpurpose>Packed format with &frac12; horizontal chroma
+resolution, also known as YUV 4:2:2</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>In this format each four bytes is two pixels. Each four
+bytes is two Y's, a Cb and a Cr. Each Y goes to one of the pixels, and
+the Cb and Cr belong to both pixels. As you can see, the Cr and Cb
+components have half the horizontal resolution of the Y component.
+<constant>V4L2_PIX_FMT_YUYV </constant> is known in the Windows
+environment as YUY2.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YUYV</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yvu420m.xml b/Documentation/DocBook/media/v4l/pixfmt-yvu420m.xml
new file mode 100644
index 000000000..233066790
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yvu420m.xml
@@ -0,0 +1,154 @@
+ <refentry id="V4L2-PIX-FMT-YVU420M">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YVU420M ('YM21')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname> <constant>V4L2_PIX_FMT_YVU420M</constant></refname>
+ <refpurpose>Variation of <constant>V4L2_PIX_FMT_YVU420</constant>
+ with planes non contiguous in memory. </refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This is a multi-planar format, as opposed to a packed format.
+The three components are separated into three sub-images or planes.
+
+The Y plane is first. The Y plane has one byte per pixel. The Cr data
+constitutes the second plane which is half the width and half
+the height of the Y plane (and of the image). Each Cr belongs to four
+pixels, a two-by-two square of the image. For example,
+Cr<subscript>0</subscript> belongs to Y'<subscript>00</subscript>,
+Y'<subscript>01</subscript>, Y'<subscript>10</subscript>, and
+Y'<subscript>11</subscript>. The Cb data, just like the Cr plane, constitutes
+the third plane. </para>
+
+ <para>If the Y plane has pad bytes after each row, then the Cr
+and Cb planes have half as many pad bytes after their rows. In other
+words, two Cx rows (including padding) is exactly as long as one Y row
+(including padding).</para>
+
+ <para><constant>V4L2_PIX_FMT_YVU420M</constant> is intended to be
+used only in drivers and applications that support the multi-planar API,
+described in <xref linkend="planar-apis"/>. </para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YVU420M</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="5" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start0&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;4:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ </row>
+ <row>
+ <entry>start0&nbsp;+&nbsp;12:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;0:</entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start1&nbsp;+&nbsp;2:</entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ </row>
+ <row><entry></entry></row>
+ <row>
+ <entry>start2&nbsp;+&nbsp;0:</entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start2&nbsp;+&nbsp;2:</entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry><entry>C</entry><entry></entry><entry></entry>
+ <entry></entry><entry>C</entry><entry></entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry></entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt-yvyu.xml b/Documentation/DocBook/media/v4l/pixfmt-yvyu.xml
new file mode 100644
index 000000000..bfffdc76d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt-yvyu.xml
@@ -0,0 +1,120 @@
+ <refentry id="V4L2-PIX-FMT-YVYU">
+ <refmeta>
+ <refentrytitle>V4L2_PIX_FMT_YVYU ('YVYU')</refentrytitle>
+ &manvol;
+ </refmeta>
+ <refnamediv>
+ <refname><constant>V4L2_PIX_FMT_YVYU</constant></refname>
+ <refpurpose>Variation of
+<constant>V4L2_PIX_FMT_YUYV</constant> with different order of samples
+in memory</refpurpose>
+ </refnamediv>
+ <refsect1>
+ <title>Description</title>
+
+ <para>In this format each four bytes is two pixels. Each four
+bytes is two Y's, a Cb and a Cr. Each Y goes to one of the pixels, and
+the Cb and Cr belong to both pixels. As you can see, the Cr and Cb
+components have half the horizontal resolution of the Y
+component.</para>
+
+ <example>
+ <title><constant>V4L2_PIX_FMT_YVYU</constant> 4 &times; 4
+pixel image</title>
+
+ <formalpara>
+ <title>Byte Order.</title>
+ <para>Each cell is one byte.
+ <informaltable frame="none">
+ <tgroup cols="9" align="center">
+ <colspec align="left" colwidth="2*" />
+ <tbody valign="top">
+ <row>
+ <entry>start&nbsp;+&nbsp;0:</entry>
+ <entry>Y'<subscript>00</subscript></entry>
+ <entry>Cr<subscript>00</subscript></entry>
+ <entry>Y'<subscript>01</subscript></entry>
+ <entry>Cb<subscript>00</subscript></entry>
+ <entry>Y'<subscript>02</subscript></entry>
+ <entry>Cr<subscript>01</subscript></entry>
+ <entry>Y'<subscript>03</subscript></entry>
+ <entry>Cb<subscript>01</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;8:</entry>
+ <entry>Y'<subscript>10</subscript></entry>
+ <entry>Cr<subscript>10</subscript></entry>
+ <entry>Y'<subscript>11</subscript></entry>
+ <entry>Cb<subscript>10</subscript></entry>
+ <entry>Y'<subscript>12</subscript></entry>
+ <entry>Cr<subscript>11</subscript></entry>
+ <entry>Y'<subscript>13</subscript></entry>
+ <entry>Cb<subscript>11</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;16:</entry>
+ <entry>Y'<subscript>20</subscript></entry>
+ <entry>Cr<subscript>20</subscript></entry>
+ <entry>Y'<subscript>21</subscript></entry>
+ <entry>Cb<subscript>20</subscript></entry>
+ <entry>Y'<subscript>22</subscript></entry>
+ <entry>Cr<subscript>21</subscript></entry>
+ <entry>Y'<subscript>23</subscript></entry>
+ <entry>Cb<subscript>21</subscript></entry>
+ </row>
+ <row>
+ <entry>start&nbsp;+&nbsp;24:</entry>
+ <entry>Y'<subscript>30</subscript></entry>
+ <entry>Cr<subscript>30</subscript></entry>
+ <entry>Y'<subscript>31</subscript></entry>
+ <entry>Cb<subscript>30</subscript></entry>
+ <entry>Y'<subscript>32</subscript></entry>
+ <entry>Cr<subscript>31</subscript></entry>
+ <entry>Y'<subscript>33</subscript></entry>
+ <entry>Cb<subscript>31</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+
+ <formalpara>
+ <title>Color Sample Location.</title>
+ <para>
+ <informaltable frame="none">
+ <tgroup cols="7" align="center">
+ <tbody valign="top">
+ <row>
+ <entry></entry>
+ <entry>0</entry><entry></entry><entry>1</entry><entry></entry>
+ <entry>2</entry><entry></entry><entry>3</entry>
+ </row>
+ <row>
+ <entry>0</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>1</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>2</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ <row>
+ <entry>3</entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry><entry></entry>
+ <entry>Y</entry><entry>C</entry><entry>Y</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+ </para>
+ </formalpara>
+ </example>
+ </refsect1>
+ </refentry>
diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
new file mode 100644
index 000000000..fcde4e202
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -0,0 +1,1801 @@
+ <title>Image Formats</title>
+
+ <para>The V4L2 API was primarily designed for devices exchanging
+image data with applications. The
+<structname>v4l2_pix_format</structname> and <structname>v4l2_pix_format_mplane
+</structname> structures define the format and layout of an image in memory.
+The former is used with the single-planar API, while the latter is used with the
+multi-planar version (see <xref linkend="planar-apis"/>). Image formats are
+negotiated with the &VIDIOC-S-FMT; ioctl. (The explanations here focus on video
+capturing and output, for overlay frame buffer formats see also
+&VIDIOC-G-FBUF;.)</para>
+
+<section>
+ <title>Single-planar format structure</title>
+ <table pgwide="1" frame="none" id="v4l2-pix-format">
+ <title>struct <structname>v4l2_pix_format</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Image width in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Image height in pixels. If <structfield>field</structfield> is
+ one of <constant>V4L2_FIELD_TOP</constant>, <constant>V4L2_FIELD_BOTTOM</constant>
+ or <constant>V4L2_FIELD_ALTERNATE</constant> then height refers to the
+ number of lines in the field, otherwise it refers to the number of
+ lines in the frame (which is twice the field height for interlaced
+ formats).</entry>
+ </row>
+ <row>
+ <entry spanname="hspan">Applications set these fields to
+request an image size, drivers return the closest possible values. In
+case of planar formats the <structfield>width</structfield> and
+<structfield>height</structfield> applies to the largest plane. To
+avoid ambiguities drivers must return values rounded up to a multiple
+of the scale factor of any smaller planes. For example when the image
+format is YUV 4:2:0, <structfield>width</structfield> and
+<structfield>height</structfield> must be multiples of two.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pixelformat</structfield></entry>
+ <entry>The pixel format or type of compression, set by the
+application. This is a little endian <link
+linkend="v4l2-fourcc">four character code</link>. V4L2 defines
+standard RGB formats in <xref linkend="rgb-formats" />, YUV formats in <xref
+linkend="yuv-formats" />, and reserved codes in <xref
+linkend="reserved-formats" /></entry>
+ </row>
+ <row>
+ <entry>&v4l2-field;</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>Video images are typically interlaced. Applications
+can request to capture or output only the top or bottom field, or both
+fields interlaced or sequentially stored in one buffer or alternating
+in separate buffers. Drivers return the actual field order selected.
+For more details on fields see <xref linkend="field-order" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>bytesperline</structfield></entry>
+ <entry>Distance in bytes between the leftmost pixels in two
+adjacent lines.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>Both applications and drivers
+can set this field to request padding bytes at the end of each line.
+Drivers however may ignore the value requested by the application,
+returning <structfield>width</structfield> times bytes per pixel or a
+larger value required by the hardware. That implies applications can
+just set this field to zero to get a reasonable
+default.</para><para>Video hardware may access padding bytes,
+therefore they must reside in accessible memory. Consider cases where
+padding bytes after the last line of an image cross a system page
+boundary. Input devices may write padding bytes, the value is
+undefined. Output devices ignore the contents of padding
+bytes.</para><para>When the image format is planar the
+<structfield>bytesperline</structfield> value applies to the first
+plane and is divided by the same factor as the
+<structfield>width</structfield> field for the other planes. For
+example the Cb and Cr planes of a YUV 4:2:0 image have half as many
+padding bytes following each line as the Y plane. To avoid ambiguities
+drivers must return a <structfield>bytesperline</structfield> value
+rounded up to a multiple of the scale factor.</para>
+<para>For compressed formats the <structfield>bytesperline</structfield>
+value makes no sense. Applications and drivers must set this to 0 in
+that case.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>sizeimage</structfield></entry>
+ <entry>Size in bytes of the buffer to hold a complete image,
+set by the driver. Usually this is
+<structfield>bytesperline</structfield> times
+<structfield>height</structfield>. When the image consists of variable
+length compressed data this is the maximum number of bytes required to
+hold an image.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-colorspace;</entry>
+ <entry><structfield>colorspace</structfield></entry>
+ <entry>This information supplements the
+<structfield>pixelformat</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>priv</structfield></entry>
+ <entry><para>This field indicates whether the remaining fields of the
+<structname>v4l2_pix_format</structname> structure, also called the extended
+fields, are valid. When set to <constant>V4L2_PIX_FMT_PRIV_MAGIC</constant>, it
+indicates that the extended fields have been correctly initialized. When set to
+any other value it indicates that the extended fields contain undefined values.
+</para>
+<para>Applications that wish to use the pixel format extended fields must first
+ensure that the feature is supported by querying the device for the
+<link linkend="querycap"><constant>V4L2_CAP_EXT_PIX_FORMAT</constant></link>
+capability. If the capability isn't set the pixel format extended fields are not
+supported and using the extended fields will lead to undefined results.</para>
+<para>To use the extended fields, applications must set the
+<structfield>priv</structfield> field to
+<constant>V4L2_PIX_FMT_PRIV_MAGIC</constant>, initialize all the extended fields
+and zero the unused bytes of the <structname>v4l2_format</structname>
+<structfield>raw_data</structfield> field.</para>
+<para>When the <structfield>priv</structfield> field isn't set to
+<constant>V4L2_PIX_FMT_PRIV_MAGIC</constant> drivers must act as if all the
+extended fields were set to zero. On return drivers must set the
+<structfield>priv</structfield> field to
+<constant>V4L2_PIX_FMT_PRIV_MAGIC</constant> and all the extended fields to
+applicable values.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags set by the application or driver, see <xref
+linkend="format-flags" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-ycbcr-encoding;</entry>
+ <entry><structfield>ycbcr_enc</structfield></entry>
+ <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-quantization;</entry>
+ <entry><structfield>quantization</structfield></entry>
+ <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+</section>
+
+<section>
+ <title>Multi-planar format structures</title>
+ <para>The <structname>v4l2_plane_pix_format</structname> structures define
+ size and layout for each of the planes in a multi-planar format.
+ The <structname>v4l2_pix_format_mplane</structname> structure contains
+ information common to all planes (such as image width and height) and
+ an array of <structname>v4l2_plane_pix_format</structname> structures,
+ describing all planes of that format.</para>
+ <table pgwide="1" frame="none" id="v4l2-plane-pix-format">
+ <title>struct <structname>v4l2_plane_pix_format</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>sizeimage</structfield></entry>
+ <entry>Maximum size in bytes required for image data in this plane.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>bytesperline</structfield></entry>
+ <entry>Distance in bytes between the leftmost pixels in two adjacent
+ lines. See &v4l2-pix-format;.</entry>
+ </row>
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>reserved[6]</structfield></entry>
+ <entry>Reserved for future extensions. Should be zeroed by the
+ application.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <table pgwide="1" frame="none" id="v4l2-pix-format-mplane">
+ <title>struct <structname>v4l2_pix_format_mplane</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Image width in pixels. See &v4l2-pix-format;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Image height in pixels. See &v4l2-pix-format;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pixelformat</structfield></entry>
+ <entry>The pixel format. Both single- and multi-planar four character
+codes can be used.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-field;</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>See &v4l2-pix-format;.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-colorspace;</entry>
+ <entry><structfield>colorspace</structfield></entry>
+ <entry>See &v4l2-pix-format;.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-plane-pix-format;</entry>
+ <entry><structfield>plane_fmt[VIDEO_MAX_PLANES]</structfield></entry>
+ <entry>An array of structures describing format of each plane this
+ pixel format consists of. The number of valid entries in this array
+ has to be put in the <structfield>num_planes</structfield>
+ field.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>num_planes</structfield></entry>
+ <entry>Number of planes (i.e. separate memory buffers) for this format
+ and the number of valid entries in the
+ <structfield>plane_fmt</structfield> array.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags set by the application or driver, see <xref
+linkend="format-flags" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-ycbcr-encoding;</entry>
+ <entry><structfield>ycbcr_enc</structfield></entry>
+ <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-quantization;</entry>
+ <entry><structfield>quantization</structfield></entry>
+ <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>reserved[8]</structfield></entry>
+ <entry>Reserved for future extensions. Should be zeroed by the
+ application.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+</section>
+
+ <section>
+ <title>Standard Image Formats</title>
+
+ <para>In order to exchange images between drivers and
+applications, it is necessary to have standard image data formats
+which both sides will interpret the same way. V4L2 includes several
+such formats, and this section is intended to be an unambiguous
+specification of the standard image data formats in V4L2.</para>
+
+ <para>V4L2 drivers are not limited to these formats, however.
+Driver-specific formats are possible. In that case the application may
+depend on a codec to convert images to one of the standard formats
+when needed. But the data can still be stored and retrieved in the
+proprietary format. For example, a device may support a proprietary
+compressed format. Applications can still capture and save the data in
+the compressed format, saving much disk space, and later use a codec
+to convert the images to the X Windows screen format when the video is
+to be displayed.</para>
+
+ <para>Even so, ultimately, some standard formats are needed, so
+the V4L2 specification would not be complete without well-defined
+standard formats.</para>
+
+ <para>The V4L2 standard formats are mainly uncompressed formats. The
+pixels are always arranged in memory from left to right, and from top
+to bottom. The first byte of data in the image buffer is always for
+the leftmost pixel of the topmost row. Following that is the pixel
+immediately to its right, and so on until the end of the top row of
+pixels. Following the rightmost pixel of the row there may be zero or
+more bytes of padding to guarantee that each row of pixel data has a
+certain alignment. Following the pad bytes, if any, is data for the
+leftmost pixel of the second row from the top, and so on. The last row
+has just as many pad bytes after it as the other rows.</para>
+
+ <para>In V4L2 each format has an identifier which looks like
+<constant>PIX_FMT_XXX</constant>, defined in the <link
+linkend="videodev">videodev2.h</link> header file. These identifiers
+represent <link linkend="v4l2-fourcc">four character (FourCC) codes</link>
+which are also listed below, however they are not the same as those
+used in the Windows world.</para>
+
+ <para>For some formats, data is stored in separate, discontiguous
+memory buffers. Those formats are identified by a separate set of FourCC codes
+and are referred to as "multi-planar formats". For example, a YUV422 frame is
+normally stored in one memory buffer, but it can also be placed in two or three
+separate buffers, with Y component in one buffer and CbCr components in another
+in the 2-planar version or with each component in its own buffer in the
+3-planar case. Those sub-buffers are referred to as "planes".</para>
+ </section>
+
+ <section id="colorspaces">
+ <title>Colorspaces</title>
+
+ <para>'Color' is a very complex concept and depends on physics, chemistry and
+biology. Just because you have three numbers that describe the 'red', 'green'
+and 'blue' components of the color of a pixel does not mean that you can accurately
+display that color. A colorspace defines what it actually <emphasis>means</emphasis>
+to have an RGB value of e.g. (255,&nbsp;0,&nbsp;0). That is, which color should be
+reproduced on the screen in a perfectly calibrated environment.</para>
+
+ <para>In order to do that we first need to have a good definition of
+color, i.e. some way to uniquely and unambiguously define a color so that someone
+else can reproduce it. Human color vision is trichromatic since the human eye has
+color receptors that are sensitive to three different wavelengths of light. Hence
+the need to use three numbers to describe color. Be glad you are not a mantis shrimp
+as those are sensitive to 12 different wavelengths, so instead of RGB we would be
+using the ABCDEFGHIJKL colorspace...</para>
+
+ <para>Color exists only in the eye and brain and is the result of how strongly
+color receptors are stimulated. This is based on the Spectral
+Power Distribution (SPD) which is a graph showing the intensity (radiant power)
+of the light at wavelengths covering the visible spectrum as it enters the eye.
+The science of colorimetry is about the relationship between the SPD and color as
+perceived by the human brain.</para>
+
+ <para>Since the human eye has only three color receptors it is perfectly
+possible that different SPDs will result in the same stimulation of those receptors
+and are perceived as the same color, even though the SPD of the light is
+different.</para>
+
+ <para>In the 1920s experiments were devised to determine the relationship
+between SPDs and the perceived color and that resulted in the CIE 1931 standard
+that defines spectral weighting functions that model the perception of color.
+Specifically that standard defines functions that can take an SPD and calculate
+the stimulus for each color receptor. After some further mathematical transforms
+these stimuli are known as the <emphasis>CIE XYZ tristimulus</emphasis> values
+and these X, Y and Z values describe a color as perceived by a human unambiguously.
+These X, Y and Z values are all in the range [0&hellip;1].</para>
+
+ <para>The Y value in the CIE XYZ colorspace corresponds to luminance. Often
+the CIE XYZ colorspace is transformed to the normalized CIE xyY colorspace:</para>
+
+ <para>x = X / (X + Y + Z)</para>
+ <para>y = Y / (X + Y + Z)</para>
+
+ <para>The x and y values are the chromaticity coordinates and can be used to
+define a color without the luminance component Y. It is very confusing to
+have such similar names for these colorspaces. Just be aware that if colors
+are specified with lower case 'x' and 'y', then the CIE xyY colorspace is
+used. Upper case 'X' and 'Y' refer to the CIE XYZ colorspace. Also, y has nothing
+to do with luminance. Together x and y specify a color, and Y the luminance.
+That is really all you need to remember from a practical point of view. At
+the end of this section you will find reading resources that go into much more
+detail if you are interested.
+</para>
+
+ <para>A monitor or TV will reproduce colors by emitting light at three
+different wavelengths, the combination of which will stimulate the color receptors
+in the eye and thus cause the perception of color. Historically these wavelengths
+were defined by the red, green and blue phosphors used in the displays. These
+<emphasis>color primaries</emphasis> are part of what defines a colorspace.</para>
+
+ <para>Different display devices will have different primaries and some
+primaries are more suitable for some display technologies than others. This has
+resulted in a variety of colorspaces that are used for different display
+technologies or uses. To define a colorspace you need to define the three
+color primaries (these are typically defined as x,&nbsp;y chromaticity coordinates
+from the CIE xyY colorspace) but also the white reference: that is the color obtained
+when all three primaries are at maximum power. This determines the relative power
+or energy of the primaries. This is usually chosen to be close to daylight which has
+been defined as the CIE D65 Illuminant.</para>
+
+ <para>To recapitulate: the CIE XYZ colorspace uniquely identifies colors.
+Other colorspaces are defined by three chromaticity coordinates defined in the
+CIE xyY colorspace. Based on those a 3x3 matrix can be constructed that
+transforms CIE XYZ colors to colors in the new colorspace.
+</para>
+
+ <para>Both the CIE XYZ and the RGB colorspace that are derived from the
+specific chromaticity primaries are linear colorspaces. But neither the eye,
+nor display technology is linear. Doubling the values of all components in
+the linear colorspace will not be perceived as twice the intensity of the color.
+So each colorspace also defines a transfer function that takes a linear color
+component value and transforms it to the non-linear component value, which is a
+closer match to the non-linear performance of both the eye and displays. Linear
+component values are denoted RGB, non-linear are denoted as R'G'B'. In general
+colors used in graphics are all R'G'B', except in openGL which uses linear RGB.
+Special care should be taken when dealing with openGL to provide linear RGB colors
+or to use the built-in openGL support to apply the inverse transfer function.</para>
+
+ <para>The final piece that defines a colorspace is a function that
+transforms non-linear R'G'B' to non-linear Y'CbCr. This function is determined
+by the so-called luma coefficients. There may be multiple possible Y'CbCr
+encodings allowed for the same colorspace. Many encodings of color
+prefer to use luma (Y') and chroma (CbCr) instead of R'G'B'. Since the human
+eye is more sensitive to differences in luminance than in color this encoding
+allows one to reduce the amount of color information compared to the luma
+data. Note that the luma (Y') is unrelated to the Y in the CIE XYZ colorspace.
+Also note that Y'CbCr is often called YCbCr or YUV even though these are
+strictly speaking wrong.</para>
+
+ <para>Sometimes people confuse Y'CbCr as being a colorspace. This is not
+correct, it is just an encoding of an R'G'B' color into luma and chroma
+values. The underlying colorspace that is associated with the R'G'B' color
+is also associated with the Y'CbCr color.</para>
+
+ <para>The final step is how the RGB, R'G'B' or Y'CbCr values are
+quantized. The CIE XYZ colorspace where X, Y and Z are in the range
+[0&hellip;1] describes all colors that humans can perceive, but the transform to
+another colorspace will produce colors that are outside the [0&hellip;1] range.
+Once clamped to the [0&hellip;1] range those colors can no longer be reproduced
+in that colorspace. This clamping is what reduces the extent or gamut of the
+colorspace. How the range of [0&hellip;1] is translated to integer values in the
+range of [0&hellip;255] (or higher, depending on the color depth) is called the
+quantization. This is <emphasis>not</emphasis> part of the colorspace
+definition. In practice RGB or R'G'B' values are full range, i.e. they
+use the full [0&hellip;255] range. Y'CbCr values on the other hand are limited
+range with Y' using [16&hellip;235] and Cb and Cr using [16&hellip;240].</para>
+
+ <para>Unfortunately, in some cases limited range RGB is also used
+where the components use the range [16&hellip;235]. And full range Y'CbCr also exists
+using the [0&hellip;255] range.</para>
+
+ <para>In order to correctly interpret a color you need to know the
+quantization range, whether it is R'G'B' or Y'CbCr, the used Y'CbCr encoding
+and the colorspace.
+From that information you can calculate the corresponding CIE XYZ color
+and map that again to whatever colorspace your display device uses.</para>
+
+ <para>The colorspace definition itself consists of the three
+chromaticity primaries, the white reference chromaticity, a transfer
+function and the luma coefficients needed to transform R'G'B' to Y'CbCr. While
+some colorspace standards correctly define all four, quite often the colorspace
+standard only defines some, and you have to rely on other standards for
+the missing pieces. The fact that colorspaces are often a mix of different
+standards also led to very confusing naming conventions where the name of
+a standard was used to name a colorspace when in fact that standard was
+part of various other colorspaces as well.</para>
+
+ <para>If you want to read more about colors and colorspaces, then the
+following resources are useful: <xref linkend="poynton" /> is a good practical
+book for video engineers, <xref linkend="colimg" /> has a much broader scope and
+describes many more aspects of color (physics, chemistry, biology, etc.).
+The <ulink url="http://www.brucelindbloom.com">http://www.brucelindbloom.com</ulink>
+website is an excellent resource, especially with respect to the mathematics behind
+colorspace conversions. The wikipedia <ulink url="http://en.wikipedia.org/wiki/CIE_1931_color_space#CIE_xy_chromaticity_diagram_and_the_CIE_xyY_color_space">CIE 1931 colorspace</ulink> article
+is also very useful.</para>
+ </section>
+
+ <section>
+ <title>Defining Colorspaces in V4L2</title>
+ <para>In V4L2 colorspaces are defined by three values. The first is the colorspace
+identifier (&v4l2-colorspace;) which defines the chromaticities, the transfer
+function, the default Y'CbCr encoding and the default quantization method. The second
+is the Y'CbCr encoding identifier (&v4l2-ycbcr-encoding;) to specify non-standard
+Y'CbCr encodings and the third is the quantization identifier (&v4l2-quantization;)
+to specify non-standard quantization methods. Most of the time only the colorspace
+field of &v4l2-pix-format; or &v4l2-pix-format-mplane; needs to be filled in. Note
+that the default R'G'B' quantization is full range for all colorspaces except for
+BT.2020 which uses limited range R'G'B' quantization.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-colorspace">
+ <title>V4L2 Colorspaces</title>
+ <tgroup cols="2" align="left">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Details</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_COLORSPACE_SMPTE170M</constant></entry>
+ <entry>See <xref linkend="col-smpte-170m" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_REC709</constant></entry>
+ <entry>See <xref linkend="col-rec709" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_SRGB</constant></entry>
+ <entry>See <xref linkend="col-srgb" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_ADOBERGB</constant></entry>
+ <entry>See <xref linkend="col-adobergb" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_BT2020</constant></entry>
+ <entry>See <xref linkend="col-bt2020" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_SMPTE240M</constant></entry>
+ <entry>See <xref linkend="col-smpte-240m" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_470_SYSTEM_M</constant></entry>
+ <entry>See <xref linkend="col-sysm" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_470_SYSTEM_BG</constant></entry>
+ <entry>See <xref linkend="col-sysbg" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_COLORSPACE_JPEG</constant></entry>
+ <entry>See <xref linkend="col-jpeg" />.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-ycbcr-encoding">
+ <title>V4L2 Y'CbCr Encodings</title>
+ <tgroup cols="2" align="left">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Details</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_DEFAULT</constant></entry>
+ <entry>Use the default Y'CbCr encoding as defined by the colorspace.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_601</constant></entry>
+ <entry>Use the BT.601 Y'CbCr encoding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_709</constant></entry>
+ <entry>Use the Rec. 709 Y'CbCr encoding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_XV601</constant></entry>
+ <entry>Use the extended gamut xvYCC BT.601 encoding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_XV709</constant></entry>
+ <entry>Use the extended gamut xvYCC Rec. 709 encoding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_SYCC</constant></entry>
+ <entry>Use the extended gamut sYCC encoding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_BT2020</constant></entry>
+ <entry>Use the default non-constant luminance BT.2020 Y'CbCr encoding.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_YCBCR_ENC_BT2020_CONST_LUM</constant></entry>
+ <entry>Use the constant luminance BT.2020 Yc'CbcCrc encoding.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-quantization">
+ <title>V4L2 Quantization Methods</title>
+ <tgroup cols="2" align="left">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Details</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_QUANTIZATION_DEFAULT</constant></entry>
+ <entry>Use the default quantization encoding as defined by the colorspace.
+This is always full range for R'G'B' (except for the BT.2020 colorspace) and usually
+limited range for Y'CbCr.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_QUANTIZATION_FULL_RANGE</constant></entry>
+ <entry>Use the full range quantization encoding. I.e. the range [0&hellip;1]
+is mapped to [0&hellip;255] (with possible clipping to [1&hellip;254] to avoid the
+0x00 and 0xff values). Cb and Cr are mapped from [-0.5&hellip;0.5] to [0&hellip;255]
+(with possible clipping to [1&hellip;254] to avoid the 0x00 and 0xff values).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_QUANTIZATION_LIM_RANGE</constant></entry>
+ <entry>Use the limited range quantization encoding. I.e. the range [0&hellip;1]
+is mapped to [16&hellip;235]. Cb and Cr are mapped from [-0.5&hellip;0.5] to [16&hellip;240].
+</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>Detailed Colorspace Descriptions</title>
+ <section id="col-smpte-170m">
+ <title>Colorspace SMPTE 170M (<constant>V4L2_COLORSPACE_SMPTE170M</constant>)</title>
+ <para>The <xref linkend="smpte170m" /> standard defines the colorspace used by NTSC and PAL and by SDTV
+in general. The default Y'CbCr encoding is <constant>V4L2_YCBCR_ENC_601</constant>.
+The default Y'CbCr quantization is limited range. The chromaticities of the primary colors and
+the white reference are:</para>
+ <table frame="none">
+ <title>SMPTE 170M Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.630</entry>
+ <entry>0.340</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.310</entry>
+ <entry>0.595</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.155</entry>
+ <entry>0.070</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>The red, green and blue chromaticities are also often referred to
+as the SMPTE C set, so this colorspace is sometimes called SMPTE C as well.</para>
+ <variablelist>
+ <varlistentry>
+ <term>The transfer function defined for SMPTE 170M is the same as the
+one defined in Rec. 709.</term>
+ <listitem>
+ <para>L' = -1.099(-L)<superscript>0.45</superscript>&nbsp;+&nbsp;0.099&nbsp;for&nbsp;L&nbsp;&le;&nbsp;-0.018</para>
+ <para>L' = 4.5L&nbsp;for&nbsp;-0.018&nbsp;&lt;&nbsp;L&nbsp;&lt;&nbsp;0.018</para>
+ <para>L' = 1.099L<superscript>0.45</superscript>&nbsp;-&nbsp;0.099&nbsp;for&nbsp;L&nbsp;&ge;&nbsp;0.018</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = -((L'&nbsp;-&nbsp;0.099)&nbsp;/&nbsp;-1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&le;&nbsp;-0.081</para>
+ <para>L = L'&nbsp;/&nbsp;4.5&nbsp;for&nbsp;-0.081&nbsp;&lt;&nbsp;L'&nbsp;&lt;&nbsp;0.081</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.099)&nbsp;/&nbsp;1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&ge;&nbsp;0.081</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with
+the following <constant>V4L2_YCBCR_ENC_601</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.299R'&nbsp;+&nbsp;0.587G'&nbsp;+&nbsp;0.114B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.169R'&nbsp;-&nbsp;0.331G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.419G'&nbsp;-&nbsp;0.081B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are
+clamped to the range [-0.5&hellip;0.5]. This conversion to Y'CbCr is identical to the one
+defined in the <xref linkend="itu601" /> standard and this colorspace is sometimes called BT.601 as well, even
+though BT.601 does not mention any color primaries.</para>
+ <para>The default quantization is limited range, but full range is possible although
+rarely seen.</para>
+ </section>
+
+ <section id="col-rec709">
+ <title>Colorspace Rec. 709 (<constant>V4L2_COLORSPACE_REC709</constant>)</title>
+ <para>The <xref linkend="itu709" /> standard defines the colorspace used by HDTV in general. The default
+Y'CbCr encoding is <constant>V4L2_YCBCR_ENC_709</constant>. The default Y'CbCr quantization is
+limited range. The chromaticities of the primary colors and the white reference are:</para>
+ <table frame="none">
+ <title>Rec. 709 Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.640</entry>
+ <entry>0.330</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.300</entry>
+ <entry>0.600</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.150</entry>
+ <entry>0.060</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>The full name of this standard is Rec. ITU-R BT.709-5.</para>
+ <variablelist>
+ <varlistentry>
+ <term>Transfer function. Normally L is in the range [0&hellip;1], but for the extended
+gamut xvYCC encoding values outside that range are allowed.</term>
+ <listitem>
+ <para>L' = -1.099(-L)<superscript>0.45</superscript>&nbsp;+&nbsp;0.099&nbsp;for&nbsp;L&nbsp;&le;&nbsp;-0.018</para>
+ <para>L' = 4.5L&nbsp;for&nbsp;-0.018&nbsp;&lt;&nbsp;L&nbsp;&lt;&nbsp;0.018</para>
+ <para>L' = 1.099L<superscript>0.45</superscript>&nbsp;-&nbsp;0.099&nbsp;for&nbsp;L&nbsp;&ge;&nbsp;0.018</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = -((L'&nbsp;-&nbsp;0.099)&nbsp;/&nbsp;-1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&le;&nbsp;-0.081</para>
+ <para>L = L'&nbsp;/&nbsp;4.5&nbsp;for&nbsp;-0.081&nbsp;&lt;&nbsp;L'&nbsp;&lt;&nbsp;0.081</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.099)&nbsp;/&nbsp;1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&ge;&nbsp;0.081</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the following
+<constant>V4L2_YCBCR_ENC_709</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.2126R'&nbsp;+&nbsp;0.7152G'&nbsp;+&nbsp;0.0722B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.1146R'&nbsp;-&nbsp;0.3854G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.4542G'&nbsp;-&nbsp;0.0458B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are
+clamped to the range [-0.5&hellip;0.5].</para>
+ <para>The default quantization is limited range, but full range is possible although
+rarely seen.</para>
+ <para>The <constant>V4L2_YCBCR_ENC_709</constant> encoding described above is the default
+for this colorspace, but it can be overridden with <constant>V4L2_YCBCR_ENC_601</constant>, in which
+case the BT.601 Y'CbCr encoding is used.</para>
+ <para>Two additional extended gamut Y'CbCr encodings are also possible with this colorspace:</para>
+ <variablelist>
+ <varlistentry>
+ <term>The xvYCC 709 encoding (<constant>V4L2_YCBCR_ENC_XV709</constant>, <xref linkend="xvycc" />)
+is similar to the Rec. 709 encoding, but it allows for R', G' and B' values that are outside the range
+[0&hellip;1]. The resulting Y', Cb and Cr values are scaled and offset:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;(219&nbsp;/&nbsp;256)&nbsp;*&nbsp;(0.2126R'&nbsp;+&nbsp;0.7152G'&nbsp;+&nbsp;0.0722B')&nbsp;+&nbsp;(16&nbsp;/&nbsp;256)</para>
+ <para>Cb&nbsp;=&nbsp;(224&nbsp;/&nbsp;256)&nbsp;*&nbsp;(-0.1146R'&nbsp;-&nbsp;0.3854G'&nbsp;+&nbsp;0.5B')</para>
+ <para>Cr&nbsp;=&nbsp;(224&nbsp;/&nbsp;256)&nbsp;*&nbsp;(0.5R'&nbsp;-&nbsp;0.4542G'&nbsp;-&nbsp;0.0458B')</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The xvYCC 601 encoding (<constant>V4L2_YCBCR_ENC_XV601</constant>, <xref linkend="xvycc" />) is similar
+to the BT.601 encoding, but it allows for R', G' and B' values that are outside the range
+[0&hellip;1]. The resulting Y', Cb and Cr values are scaled and offset:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;(219&nbsp;/&nbsp;256)&nbsp;*&nbsp;(0.299R'&nbsp;+&nbsp;0.587G'&nbsp;+&nbsp;0.114B')&nbsp;+&nbsp;(16&nbsp;/&nbsp;256)</para>
+ <para>Cb&nbsp;=&nbsp;(224&nbsp;/&nbsp;256)&nbsp;*&nbsp;(-0.169R'&nbsp;-&nbsp;0.331G'&nbsp;+&nbsp;0.5B')</para>
+ <para>Cr&nbsp;=&nbsp;(224&nbsp;/&nbsp;256)&nbsp;*&nbsp;(0.5R'&nbsp;-&nbsp;0.419G'&nbsp;-&nbsp;0.081B')</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are clamped
+to the range [-0.5&hellip;0.5]. The non-standard xvYCC 709 or xvYCC 601 encodings can be used by
+selecting <constant>V4L2_YCBCR_ENC_XV709</constant> or <constant>V4L2_YCBCR_ENC_XV601</constant>.
+The xvYCC encodings always use full range quantization.</para>
+ </section>
+
+ <section id="col-srgb">
+ <title>Colorspace sRGB (<constant>V4L2_COLORSPACE_SRGB</constant>)</title>
+ <para>The <xref linkend="srgb" /> standard defines the colorspace used by most webcams and computer graphics. The
+default Y'CbCr encoding is <constant>V4L2_YCBCR_ENC_SYCC</constant>. The default Y'CbCr quantization
+is full range. The chromaticities of the primary colors and the white reference are:</para>
+ <table frame="none">
+ <title>sRGB Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.640</entry>
+ <entry>0.330</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.300</entry>
+ <entry>0.600</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.150</entry>
+ <entry>0.060</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>These chromaticities are identical to the Rec. 709 colorspace.</para>
+ <variablelist>
+ <varlistentry>
+ <term>Transfer function. Note that negative values for L are only used by the Y'CbCr conversion.</term>
+ <listitem>
+ <para>L' = -1.055(-L)<superscript>1/2.4</superscript>&nbsp;+&nbsp;0.055&nbsp;for&nbsp;L&nbsp;&lt;&nbsp;-0.0031308</para>
+ <para>L' = 12.92L&nbsp;for&nbsp;-0.0031308&nbsp;&le;&nbsp;L&nbsp;&le;&nbsp;0.0031308</para>
+ <para>L' = 1.055L<superscript>1/2.4</superscript>&nbsp;-&nbsp;0.055&nbsp;for&nbsp;0.0031308&nbsp;&lt;&nbsp;L&nbsp;&le;&nbsp;1</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = -((-L'&nbsp;+&nbsp;0.055)&nbsp;/&nbsp;1.055)<superscript>2.4</superscript>&nbsp;for&nbsp;L'&nbsp;&lt;&nbsp;-0.04045</para>
+ <para>L = L'&nbsp;/&nbsp;12.92&nbsp;for&nbsp;-0.04045&nbsp;&le;&nbsp;L'&nbsp;&le;&nbsp;0.04045</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.055)&nbsp;/&nbsp;1.055)<superscript>2.4</superscript>&nbsp;for&nbsp;L'&nbsp;&gt;&nbsp;0.04045</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the following
+<constant>V4L2_YCBCR_ENC_SYCC</constant> encoding as defined by <xref linkend="sycc" />:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.2990R'&nbsp;+&nbsp;0.5870G'&nbsp;+&nbsp;0.1140B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.1687R'&nbsp;-&nbsp;0.3313G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.4187G'&nbsp;-&nbsp;0.0813B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are clamped
+to the range [-0.5&hellip;0.5]. The <constant>V4L2_YCBCR_ENC_SYCC</constant> quantization is always
+full range. Although this Y'CbCr encoding looks very similar to the <constant>V4L2_YCBCR_ENC_XV601</constant>
+encoding, it is not. The <constant>V4L2_YCBCR_ENC_XV601</constant> scales and offsets the Y'CbCr
+values before quantization, but this encoding does not do that.</para>
+ </section>
+
+ <section id="col-adobergb">
+ <title>Colorspace Adobe RGB (<constant>V4L2_COLORSPACE_ADOBERGB</constant>)</title>
+ <para>The <xref linkend="adobergb" /> standard defines the colorspace used by computer graphics
+that use the AdobeRGB colorspace. This is also known as the <xref linkend="oprgb" /> standard.
+The default Y'CbCr encoding is <constant>V4L2_YCBCR_ENC_601</constant>. The default Y'CbCr
+quantization is limited range. The chromaticities of the primary colors and the white reference
+are:</para>
+ <table frame="none">
+ <title>Adobe RGB Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.6400</entry>
+ <entry>0.3300</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.2100</entry>
+ <entry>0.7100</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.1500</entry>
+ <entry>0.0600</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <variablelist>
+ <varlistentry>
+ <term>Transfer function:</term>
+ <listitem>
+ <para>L' = L<superscript>1/2.19921875</superscript></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = L'<superscript>2.19921875</superscript></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the
+following <constant>V4L2_YCBCR_ENC_601</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.299R'&nbsp;+&nbsp;0.587G'&nbsp;+&nbsp;0.114B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.169R'&nbsp;-&nbsp;0.331G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.419G'&nbsp;-&nbsp;0.081B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are
+clamped to the range [-0.5&hellip;0.5]. This transform is identical to one defined in
+SMPTE 170M/BT.601. The Y'CbCr quantization is limited range.</para>
+ </section>
+
+ <section id="col-bt2020">
+ <title>Colorspace BT.2020 (<constant>V4L2_COLORSPACE_BT2020</constant>)</title>
+ <para>The <xref linkend="itu2020" /> standard defines the colorspace used by Ultra-high definition
+television (UHDTV). The default Y'CbCr encoding is <constant>V4L2_YCBCR_ENC_BT2020</constant>.
+The default R'G'B' quantization is limited range (!), and so is the default Y'CbCr quantization.
+The chromaticities of the primary colors and the white reference are:</para>
+ <table frame="none">
+ <title>BT.2020 Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.708</entry>
+ <entry>0.292</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.170</entry>
+ <entry>0.797</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.131</entry>
+ <entry>0.046</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <variablelist>
+ <varlistentry>
+ <term>Transfer function (same as Rec. 709):</term>
+ <listitem>
+ <para>L' = 4.5L&nbsp;for&nbsp;0&nbsp;&le;&nbsp;L&nbsp;&lt;&nbsp;0.018</para>
+ <para>L' = 1.099L<superscript>0.45</superscript>&nbsp;-&nbsp;0.099&nbsp;for&nbsp;0.018&nbsp;&le;&nbsp;L&nbsp;&le;&nbsp;1</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = L'&nbsp;/&nbsp;4.5&nbsp;for&nbsp;L'&nbsp;&lt;&nbsp;0.081</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.099)&nbsp;/&nbsp;1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&ge;&nbsp;0.081</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the
+following <constant>V4L2_YCBCR_ENC_BT2020</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.2627R'&nbsp;+&nbsp;0.6780G'&nbsp;+&nbsp;0.0593B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.1396R'&nbsp;-&nbsp;0.3604G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.4598G'&nbsp;-&nbsp;0.0402B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are
+clamped to the range [-0.5&hellip;0.5]. The Y'CbCr quantization is limited range.</para>
+ <para>There is also an alternate constant luminance R'G'B' to Yc'CbcCrc
+(<constant>V4L2_YCBCR_ENC_BT2020_CONST_LUM</constant>) encoding:</para>
+ <variablelist>
+ <varlistentry>
+ <term>Luma:</term>
+ <listitem>
+ <para>Yc'&nbsp;=&nbsp;(0.2627R&nbsp;+&nbsp;0.6780G&nbsp;+&nbsp;0.0593B)'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>B'&nbsp;-&nbsp;Yc'&nbsp;&le;&nbsp;0:</term>
+ <listitem>
+ <para>Cbc&nbsp;=&nbsp;(B'&nbsp;-&nbsp;Yc')&nbsp;/&nbsp;1.9404</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>B'&nbsp;-&nbsp;Yc'&nbsp;&gt;&nbsp;0:</term>
+ <listitem>
+ <para>Cbc&nbsp;=&nbsp;(B'&nbsp;-&nbsp;Yc')&nbsp;/&nbsp;1.5816</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>R'&nbsp;-&nbsp;Yc'&nbsp;&le;&nbsp;0:</term>
+ <listitem>
+ <para>Crc&nbsp;=&nbsp;(R'&nbsp;-&nbsp;Y')&nbsp;/&nbsp;1.7184</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>R'&nbsp;-&nbsp;Yc'&nbsp;&gt;&nbsp;0:</term>
+ <listitem>
+ <para>Crc&nbsp;=&nbsp;(R'&nbsp;-&nbsp;Y')&nbsp;/&nbsp;0.9936</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Yc' is clamped to the range [0&hellip;1] and Cbc and Crc are
+clamped to the range [-0.5&hellip;0.5]. The Yc'CbcCrc quantization is limited range.</para>
+ </section>
+
+ <section id="col-smpte-240m">
+ <title>Colorspace SMPTE 240M (<constant>V4L2_COLORSPACE_SMPTE240M</constant>)</title>
+ <para>The <xref linkend="smpte240m" /> standard was an interim standard used during the early days of HDTV (1988-1998).
+It has been superseded by Rec. 709. The default Y'CbCr encoding is <constant>V4L2_YCBCR_ENC_SMPTE240M</constant>.
+The default Y'CbCr quantization is limited range. The chromaticities of the primary colors and the
+white reference are:</para>
+ <table frame="none">
+ <title>SMPTE 240M Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.630</entry>
+ <entry>0.340</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.310</entry>
+ <entry>0.595</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.155</entry>
+ <entry>0.070</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>These chromaticities are identical to the SMPTE 170M colorspace.</para>
+ <variablelist>
+ <varlistentry>
+ <term>Transfer function:</term>
+ <listitem>
+ <para>L' = 4L&nbsp;for&nbsp;0&nbsp;&le;&nbsp;L&nbsp;&lt;&nbsp;0.0228</para>
+ <para>L' = 1.1115L<superscript>0.45</superscript>&nbsp;-&nbsp;0.1115&nbsp;for&nbsp;0.0228&nbsp;&le;&nbsp;L&nbsp;&le;&nbsp;1</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = L'&nbsp;/&nbsp;4&nbsp;for&nbsp;0&nbsp;&le;&nbsp;L'&nbsp;&lt;&nbsp;0.0913</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.1115)&nbsp;/&nbsp;1.1115)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&ge;&nbsp;0.0913</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the
+following <constant>V4L2_YCBCR_ENC_SMPTE240M</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.2122R'&nbsp;+&nbsp;0.7013G'&nbsp;+&nbsp;0.0865B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.1161R'&nbsp;-&nbsp;0.3839G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.4451G'&nbsp;-&nbsp;0.0549B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Yc' is clamped to the range [0&hellip;1] and Cbc and Crc are
+clamped to the range [-0.5&hellip;0.5]. The Y'CbCr quantization is limited range.</para>
+ </section>
+
+ <section id="col-sysm">
+ <title>Colorspace NTSC 1953 (<constant>V4L2_COLORSPACE_470_SYSTEM_M</constant>)</title>
+ <para>This standard defines the colorspace used by NTSC in 1953. In practice this
+colorspace is obsolete and SMPTE 170M should be used instead. The default Y'CbCr encoding
+is <constant>V4L2_YCBCR_ENC_601</constant>. The default Y'CbCr quantization is limited range.
+The chromaticities of the primary colors and the white reference are:</para>
+ <table frame="none">
+ <title>NTSC 1953 Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.67</entry>
+ <entry>0.33</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.21</entry>
+ <entry>0.71</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.14</entry>
+ <entry>0.08</entry>
+ </row>
+ <row>
+ <entry>White Reference (C)</entry>
+ <entry>0.310</entry>
+ <entry>0.316</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <para>Note that this colorspace uses Illuminant C instead of D65 as the
+white reference. To correctly convert an image in this colorspace to another
+that uses D65 you need to apply a chromatic adaptation algorithm such as the
+Bradford method.</para>
+ <variablelist>
+ <varlistentry>
+ <term>The transfer function was never properly defined for NTSC 1953. The
+Rec. 709 transfer function is recommended in the literature:</term>
+ <listitem>
+ <para>L' = 4.5L&nbsp;for&nbsp;0&nbsp;&le;&nbsp;L&nbsp;&lt;&nbsp;0.018</para>
+ <para>L' = 1.099L<superscript>0.45</superscript>&nbsp;-&nbsp;0.099&nbsp;for&nbsp;0.018&nbsp;&le;&nbsp;L&nbsp;&le;&nbsp;1</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = L'&nbsp;/&nbsp;4.5&nbsp;for&nbsp;L'&nbsp;&lt;&nbsp;0.081</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.099)&nbsp;/&nbsp;1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&ge;&nbsp;0.081</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the
+following <constant>V4L2_YCBCR_ENC_601</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.299R'&nbsp;+&nbsp;0.587G'&nbsp;+&nbsp;0.114B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.169R'&nbsp;-&nbsp;0.331G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.419G'&nbsp;-&nbsp;0.081B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are
+clamped to the range [-0.5&hellip;0.5]. The Y'CbCr quantization is limited range.
+This transform is identical to one defined in SMPTE 170M/BT.601.</para>
+ </section>
+
+ <section id="col-sysbg">
+ <title>Colorspace EBU Tech. 3213 (<constant>V4L2_COLORSPACE_470_SYSTEM_BG</constant>)</title>
+ <para>The <xref linkend="tech3213" /> standard defines the colorspace used by PAL/SECAM in 1975. In practice this
+colorspace is obsolete and SMPTE 170M should be used instead. The default Y'CbCr encoding
+is <constant>V4L2_YCBCR_ENC_601</constant>. The default Y'CbCr quantization is limited range.
+The chromaticities of the primary colors and the white reference are:</para>
+ <table frame="none">
+ <title>EBU Tech. 3213 Chromaticities</title>
+ <tgroup cols="3" align="left">
+ &cs-str;
+ <thead>
+ <row>
+ <entry>Color</entry>
+ <entry>x</entry>
+ <entry>y</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Red</entry>
+ <entry>0.64</entry>
+ <entry>0.33</entry>
+ </row>
+ <row>
+ <entry>Green</entry>
+ <entry>0.29</entry>
+ <entry>0.60</entry>
+ </row>
+ <row>
+ <entry>Blue</entry>
+ <entry>0.15</entry>
+ <entry>0.06</entry>
+ </row>
+ <row>
+ <entry>White Reference (D65)</entry>
+ <entry>0.3127</entry>
+ <entry>0.3290</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <variablelist>
+ <varlistentry>
+ <term>The transfer function was never properly defined for this colorspace.
+The Rec. 709 transfer function is recommended in the literature:</term>
+ <listitem>
+ <para>L' = 4.5L&nbsp;for&nbsp;0&nbsp;&le;&nbsp;L&nbsp;&lt;&nbsp;0.018</para>
+ <para>L' = 1.099L<superscript>0.45</superscript>&nbsp;-&nbsp;0.099&nbsp;for&nbsp;0.018&nbsp;&le;&nbsp;L&nbsp;&le;&nbsp;1</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>Inverse Transfer function:</term>
+ <listitem>
+ <para>L = L'&nbsp;/&nbsp;4.5&nbsp;for&nbsp;L'&nbsp;&lt;&nbsp;0.081</para>
+ <para>L = ((L'&nbsp;+&nbsp;0.099)&nbsp;/&nbsp;1.099)<superscript>1/0.45</superscript>&nbsp;for&nbsp;L'&nbsp;&ge;&nbsp;0.081</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <variablelist>
+ <varlistentry>
+ <term>The luminance (Y') and color difference (Cb and Cr) are obtained with the
+following <constant>V4L2_YCBCR_ENC_601</constant> encoding:</term>
+ <listitem>
+ <para>Y'&nbsp;=&nbsp;0.299R'&nbsp;+&nbsp;0.587G'&nbsp;+&nbsp;0.114B'</para>
+ <para>Cb&nbsp;=&nbsp;-0.169R'&nbsp;-&nbsp;0.331G'&nbsp;+&nbsp;0.5B'</para>
+ <para>Cr&nbsp;=&nbsp;0.5R'&nbsp;-&nbsp;0.419G'&nbsp;-&nbsp;0.081B'</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>Y' is clamped to the range [0&hellip;1] and Cb and Cr are
+clamped to the range [-0.5&hellip;0.5]. The Y'CbCr quantization is limited range.
+This transform is identical to one defined in SMPTE 170M/BT.601.</para>
+ </section>
+
+ <section id="col-jpeg">
+ <title>Colorspace JPEG (<constant>V4L2_COLORSPACE_JPEG</constant>)</title>
+ <para>This colorspace defines the colorspace used by most (Motion-)JPEG formats. The chromaticities
+of the primary colors and the white reference are identical to sRGB. The Y'CbCr encoding is
+<constant>V4L2_YCBCR_ENC_601</constant> with full range quantization where
+Y' is scaled to [0&hellip;255] and Cb/Cr are scaled to [-128&hellip;128] and
+then clipped to [-128&hellip;127].</para>
+ <para>Note that the JPEG standard does not actually store colorspace information.
+So if something other than sRGB is used, then the driver will have to set that information
+explicitly. Effectively <constant>V4L2_COLORSPACE_JPEG</constant> can be considered to be
+an abbreviation for <constant>V4L2_COLORSPACE_SRGB</constant>, <constant>V4L2_YCBCR_ENC_601</constant>
+and <constant>V4L2_QUANTIZATION_FULL_RANGE</constant>.</para>
+ </section>
+
+ </section>
+
+ <section id="pixfmt-indexed">
+ <title>Indexed Format</title>
+
+ <para>In this format each pixel is represented by an 8 bit index
+into a 256 entry ARGB palette. It is intended for <link
+linkend="osd">Video Output Overlays</link> only. There are no ioctls to
+access the palette, this must be done with ioctls of the Linux framebuffer API.</para>
+
+ <table pgwide="0" frame="none">
+ <title>Indexed Image Format</title>
+ <tgroup cols="37" align="center">
+ <colspec colname="id" align="left" />
+ <colspec colname="fourcc" />
+ <colspec colname="bit" />
+
+ <colspec colnum="4" colname="b07" align="center" />
+ <colspec colnum="5" colname="b06" align="center" />
+ <colspec colnum="6" colname="b05" align="center" />
+ <colspec colnum="7" colname="b04" align="center" />
+ <colspec colnum="8" colname="b03" align="center" />
+ <colspec colnum="9" colname="b02" align="center" />
+ <colspec colnum="10" colname="b01" align="center" />
+ <colspec colnum="11" colname="b00" align="center" />
+
+ <spanspec namest="b07" nameend="b00" spanname="b0" />
+ <spanspec namest="b17" nameend="b10" spanname="b1" />
+ <spanspec namest="b27" nameend="b20" spanname="b2" />
+ <spanspec namest="b37" nameend="b30" spanname="b3" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>&nbsp;</entry>
+ <entry spanname="b0">Byte&nbsp;0</entry>
+ </row>
+ <row>
+ <entry>&nbsp;</entry>
+ <entry>&nbsp;</entry>
+ <entry>Bit</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="V4L2-PIX-FMT-PAL8">
+ <entry><constant>V4L2_PIX_FMT_PAL8</constant></entry>
+ <entry>'PAL8'</entry>
+ <entry></entry>
+ <entry>i<subscript>7</subscript></entry>
+ <entry>i<subscript>6</subscript></entry>
+ <entry>i<subscript>5</subscript></entry>
+ <entry>i<subscript>4</subscript></entry>
+ <entry>i<subscript>3</subscript></entry>
+ <entry>i<subscript>2</subscript></entry>
+ <entry>i<subscript>1</subscript></entry>
+ <entry>i<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section id="pixfmt-rgb">
+ <title>RGB Formats</title>
+
+ &sub-packed-rgb;
+ &sub-sbggr8;
+ &sub-sgbrg8;
+ &sub-sgrbg8;
+ &sub-srggb8;
+ &sub-sbggr16;
+ &sub-srggb10;
+ &sub-srggb10p;
+ &sub-srggb10alaw8;
+ &sub-srggb10dpcm8;
+ &sub-srggb12;
+ </section>
+
+ <section id="yuv-formats">
+ <title>YUV Formats</title>
+
+ <para>YUV is the format native to TV broadcast and composite video
+signals. It separates the brightness information (Y) from the color
+information (U and V or Cb and Cr). The color information consists of
+red and blue <emphasis>color difference</emphasis> signals, this way
+the green component can be reconstructed by subtracting from the
+brightness component. See <xref linkend="colorspaces" /> for conversion
+examples. YUV was chosen because early television would only transmit
+brightness information. To add color in a way compatible with existing
+receivers a new signal carrier was added to transmit the color
+difference signals. Secondary in the YUV format the U and V components
+usually have lower resolution than the Y component. This is an analog
+video compression technique taking advantage of a property of the
+human visual system, being more sensitive to brightness
+information.</para>
+
+ &sub-packed-yuv;
+ &sub-grey;
+ &sub-y10;
+ &sub-y12;
+ &sub-y10b;
+ &sub-y16;
+ &sub-uv8;
+ &sub-yuyv;
+ &sub-uyvy;
+ &sub-yvyu;
+ &sub-vyuy;
+ &sub-y41p;
+ &sub-yuv420;
+ &sub-yuv420m;
+ &sub-yvu420m;
+ &sub-yuv410;
+ &sub-yuv422p;
+ &sub-yuv411p;
+ &sub-nv12;
+ &sub-nv12m;
+ &sub-nv12mt;
+ &sub-nv16;
+ &sub-nv16m;
+ &sub-nv24;
+ &sub-m420;
+ </section>
+
+ <section>
+ <title>Compressed Formats</title>
+
+ <table pgwide="1" frame="none" id="compressed-formats">
+ <title>Compressed Image Formats</title>
+ <tgroup cols="3" align="left">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>Details</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="V4L2-PIX-FMT-JPEG">
+ <entry><constant>V4L2_PIX_FMT_JPEG</constant></entry>
+ <entry>'JPEG'</entry>
+ <entry>TBD. See also &VIDIOC-G-JPEGCOMP;,
+ &VIDIOC-S-JPEGCOMP;.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-MPEG">
+ <entry><constant>V4L2_PIX_FMT_MPEG</constant></entry>
+ <entry>'MPEG'</entry>
+ <entry>MPEG multiplexed stream. The actual format is determined by
+extended control <constant>V4L2_CID_MPEG_STREAM_TYPE</constant>, see
+<xref linkend="mpeg-control-id" />.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-H264">
+ <entry><constant>V4L2_PIX_FMT_H264</constant></entry>
+ <entry>'H264'</entry>
+ <entry>H264 video elementary stream with start codes.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-H264-NO-SC">
+ <entry><constant>V4L2_PIX_FMT_H264_NO_SC</constant></entry>
+ <entry>'AVC1'</entry>
+ <entry>H264 video elementary stream without start codes.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-H264-MVC">
+ <entry><constant>V4L2_PIX_FMT_H264_MVC</constant></entry>
+ <entry>'M264'</entry>
+ <entry>H264 MVC video elementary stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-H263">
+ <entry><constant>V4L2_PIX_FMT_H263</constant></entry>
+ <entry>'H263'</entry>
+ <entry>H263 video elementary stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-MPEG1">
+ <entry><constant>V4L2_PIX_FMT_MPEG1</constant></entry>
+ <entry>'MPG1'</entry>
+ <entry>MPEG1 video elementary stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-MPEG2">
+ <entry><constant>V4L2_PIX_FMT_MPEG2</constant></entry>
+ <entry>'MPG2'</entry>
+ <entry>MPEG2 video elementary stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-MPEG4">
+ <entry><constant>V4L2_PIX_FMT_MPEG4</constant></entry>
+ <entry>'MPG4'</entry>
+ <entry>MPEG4 video elementary stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-XVID">
+ <entry><constant>V4L2_PIX_FMT_XVID</constant></entry>
+ <entry>'XVID'</entry>
+ <entry>Xvid video elementary stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-VC1-ANNEX-G">
+ <entry><constant>V4L2_PIX_FMT_VC1_ANNEX_G</constant></entry>
+ <entry>'VC1G'</entry>
+ <entry>VC1, SMPTE 421M Annex G compliant stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-VC1-ANNEX-L">
+ <entry><constant>V4L2_PIX_FMT_VC1_ANNEX_L</constant></entry>
+ <entry>'VC1L'</entry>
+ <entry>VC1, SMPTE 421M Annex L compliant stream.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-VP8">
+ <entry><constant>V4L2_PIX_FMT_VP8</constant></entry>
+ <entry>'VP80'</entry>
+ <entry>VP8 video elementary stream.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section id="sdr-formats">
+ <title>SDR Formats</title>
+
+ <para>These formats are used for <link linkend="sdr">SDR Capture</link>
+interface only.</para>
+
+ &sub-sdr-cu08;
+ &sub-sdr-cu16le;
+ &sub-sdr-cs08;
+ &sub-sdr-cs14le;
+ &sub-sdr-ru12le;
+
+ </section>
+
+ <section id="pixfmt-reserved">
+ <title>Reserved Format Identifiers</title>
+
+ <para>These formats are not defined by this specification, they
+are just listed for reference and to avoid naming conflicts. If you
+want to register your own format, send an e-mail to the linux-media mailing
+list &v4l-ml; for inclusion in the <filename>videodev2.h</filename>
+file. If you want to share your format with other developers add a
+link to your documentation and send a copy to the linux-media mailing list
+for inclusion in this section. If you think your format should be listed
+in a standard format section please make a proposal on the linux-media mailing
+list.</para>
+
+ <table pgwide="1" frame="none" id="reserved-formats">
+ <title>Reserved Image Formats</title>
+ <tgroup cols="3" align="left">
+ &cs-def;
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>Details</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="V4L2-PIX-FMT-DV">
+ <entry><constant>V4L2_PIX_FMT_DV</constant></entry>
+ <entry>'dvsd'</entry>
+ <entry>unknown</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-ET61X251">
+ <entry><constant>V4L2_PIX_FMT_ET61X251</constant></entry>
+ <entry>'E625'</entry>
+ <entry>Compressed format of the ET61X251 driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-HI240">
+ <entry><constant>V4L2_PIX_FMT_HI240</constant></entry>
+ <entry>'HI24'</entry>
+ <entry><para>8 bit RGB format used by the BTTV driver.</para></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-HM12">
+ <entry><constant>V4L2_PIX_FMT_HM12</constant></entry>
+ <entry>'HM12'</entry>
+ <entry><para>YUV 4:2:0 format used by the
+IVTV driver, <ulink url="http://www.ivtvdriver.org/">
+http://www.ivtvdriver.org/</ulink></para><para>The format is documented in the
+kernel sources in the file <filename>Documentation/video4linux/cx2341x/README.hm12</filename>
+</para></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-CPIA1">
+ <entry><constant>V4L2_PIX_FMT_CPIA1</constant></entry>
+ <entry>'CPIA'</entry>
+ <entry>YUV format used by the gspca cpia1 driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-JPGL">
+ <entry><constant>V4L2_PIX_FMT_JPGL</constant></entry>
+ <entry>'JPGL'</entry>
+ <entry>JPEG-Light format (Pegasus Lossless JPEG)
+ used in Divio webcams NW 80x.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SPCA501">
+ <entry><constant>V4L2_PIX_FMT_SPCA501</constant></entry>
+ <entry>'S501'</entry>
+ <entry>YUYV per line used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SPCA505">
+ <entry><constant>V4L2_PIX_FMT_SPCA505</constant></entry>
+ <entry>'S505'</entry>
+ <entry>YYUV per line used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SPCA508">
+ <entry><constant>V4L2_PIX_FMT_SPCA508</constant></entry>
+ <entry>'S508'</entry>
+ <entry>YUVY per line used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SPCA561">
+ <entry><constant>V4L2_PIX_FMT_SPCA561</constant></entry>
+ <entry>'S561'</entry>
+ <entry>Compressed GBRG Bayer format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-PAC207">
+ <entry><constant>V4L2_PIX_FMT_PAC207</constant></entry>
+ <entry>'P207'</entry>
+ <entry>Compressed BGGR Bayer format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-MR97310A">
+ <entry><constant>V4L2_PIX_FMT_MR97310A</constant></entry>
+ <entry>'M310'</entry>
+ <entry>Compressed BGGR Bayer format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-JL2005BCD">
+ <entry><constant>V4L2_PIX_FMT_JL2005BCD</constant></entry>
+ <entry>'JL20'</entry>
+ <entry>JPEG compressed RGGB Bayer format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-OV511">
+ <entry><constant>V4L2_PIX_FMT_OV511</constant></entry>
+ <entry>'O511'</entry>
+ <entry>OV511 JPEG format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-OV518">
+ <entry><constant>V4L2_PIX_FMT_OV518</constant></entry>
+ <entry>'O518'</entry>
+ <entry>OV518 JPEG format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-PJPG">
+ <entry><constant>V4L2_PIX_FMT_PJPG</constant></entry>
+ <entry>'PJPG'</entry>
+ <entry>Pixart 73xx JPEG format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SE401">
+ <entry><constant>V4L2_PIX_FMT_SE401</constant></entry>
+ <entry>'S401'</entry>
+ <entry>Compressed RGB format used by the gspca se401 driver</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SQ905C">
+ <entry><constant>V4L2_PIX_FMT_SQ905C</constant></entry>
+ <entry>'905C'</entry>
+ <entry>Compressed RGGB bayer format used by the gspca driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-MJPEG">
+ <entry><constant>V4L2_PIX_FMT_MJPEG</constant></entry>
+ <entry>'MJPG'</entry>
+ <entry>Compressed format used by the Zoran driver</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-PWC1">
+ <entry><constant>V4L2_PIX_FMT_PWC1</constant></entry>
+ <entry>'PWC1'</entry>
+ <entry>Compressed format of the PWC driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-PWC2">
+ <entry><constant>V4L2_PIX_FMT_PWC2</constant></entry>
+ <entry>'PWC2'</entry>
+ <entry>Compressed format of the PWC driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SN9C10X">
+ <entry><constant>V4L2_PIX_FMT_SN9C10X</constant></entry>
+ <entry>'S910'</entry>
+ <entry>Compressed format of the SN9C102 driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SN9C20X-I420">
+ <entry><constant>V4L2_PIX_FMT_SN9C20X_I420</constant></entry>
+ <entry>'S920'</entry>
+ <entry>YUV 4:2:0 format of the gspca sn9c20x driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-SN9C2028">
+ <entry><constant>V4L2_PIX_FMT_SN9C2028</constant></entry>
+ <entry>'SONX'</entry>
+ <entry>Compressed GBRG bayer format of the gspca sn9c2028 driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-STV0680">
+ <entry><constant>V4L2_PIX_FMT_STV0680</constant></entry>
+ <entry>'S680'</entry>
+ <entry>Bayer format of the gspca stv0680 driver.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-WNVA">
+ <entry><constant>V4L2_PIX_FMT_WNVA</constant></entry>
+ <entry>'WNVA'</entry>
+ <entry><para>Used by the Winnov Videum driver, <ulink
+url="http://www.thedirks.org/winnov/">
+http://www.thedirks.org/winnov/</ulink></para></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-TM6000">
+ <entry><constant>V4L2_PIX_FMT_TM6000</constant></entry>
+ <entry>'TM60'</entry>
+ <entry><para>Used by Trident tm6000</para></entry>
+ </row>
+ <row id="V4L2-PIX-FMT-CIT-YYVYUY">
+ <entry><constant>V4L2_PIX_FMT_CIT_YYVYUY</constant></entry>
+ <entry>'CITV'</entry>
+ <entry><para>Used by xirlink CIT, found at IBM webcams.</para>
+ <para>Uses one line of Y then 1 line of VYUY</para>
+ </entry>
+ </row>
+ <row id="V4L2-PIX-FMT-KONICA420">
+ <entry><constant>V4L2_PIX_FMT_KONICA420</constant></entry>
+ <entry>'KONI'</entry>
+ <entry><para>Used by Konica webcams.</para>
+ <para>YUV420 planar in blocks of 256 pixels.</para>
+ </entry>
+ </row>
+ <row id="V4L2-PIX-FMT-YYUV">
+ <entry><constant>V4L2_PIX_FMT_YYUV</constant></entry>
+ <entry>'YYUV'</entry>
+ <entry>unknown</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-Y4">
+ <entry><constant>V4L2_PIX_FMT_Y4</constant></entry>
+ <entry>'Y04 '</entry>
+ <entry>Old 4-bit greyscale format. Only the most significant 4 bits of each byte are used,
+the other bits are set to 0.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-Y6">
+ <entry><constant>V4L2_PIX_FMT_Y6</constant></entry>
+ <entry>'Y06 '</entry>
+ <entry>Old 6-bit greyscale format. Only the most significant 6 bits of each byte are used,
+the other bits are set to 0.</entry>
+ </row>
+ <row id="V4L2-PIX-FMT-S5C-UYVY-JPG">
+ <entry><constant>V4L2_PIX_FMT_S5C_UYVY_JPG</constant></entry>
+ <entry>'S5CI'</entry>
+ <entry>Two-planar format used by Samsung S5C73MX cameras. The
+first plane contains interleaved JPEG and UYVY image data, followed by meta data
+in form of an array of offsets to the UYVY data blocks. The actual pointer array
+follows immediately the interleaved JPEG/UYVY data, the number of entries in
+this array equals the height of the UYVY image. Each entry is a 4-byte unsigned
+integer in big endian order and it's an offset to a single pixel line of the
+UYVY image. The first plane can start either with JPEG or UYVY data chunk. The
+size of a single UYVY block equals the UYVY image's width multiplied by 2. The
+size of a JPEG chunk depends on the image and can vary with each line.
+<para>The second plane, at an offset of 4084 bytes, contains a 4-byte offset to
+the pointer array in the first plane. This offset is followed by a 4-byte value
+indicating size of the pointer array. All numbers in the second plane are also
+in big endian order. Remaining data in the second plane is undefined. The
+information in the second plane allows to easily find location of the pointer
+array, which can be different for each frame. The size of the pointer array is
+constant for given UYVY image height.</para>
+<para>In order to extract UYVY and JPEG frames an application can initially set
+a data pointer to the start of first plane and then add an offset from the first
+entry of the pointers table. Such a pointer indicates start of an UYVY image
+pixel line. Whole UYVY line can be copied to a separate buffer. These steps
+should be repeated for each line, i.e. the number of entries in the pointer
+array. Anything what's in between the UYVY lines is JPEG data and should be
+concatenated to form the JPEG stream. </para>
+</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="format-flags">
+ <title>Format Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_PIX_FMT_FLAG_PREMUL_ALPHA</constant></entry>
+ <entry>0x00000001</entry>
+ <entry>The color values are premultiplied by the alpha channel
+value. For example, if a light blue pixel with 50% transparency was described by
+RGBA values (128, 192, 255, 128), the same pixel described with premultiplied
+colors would be described by RGBA values (64, 96, 128, 128) </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
diff --git a/Documentation/DocBook/media/v4l/planar-apis.xml b/Documentation/DocBook/media/v4l/planar-apis.xml
new file mode 100644
index 000000000..878ce2040
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/planar-apis.xml
@@ -0,0 +1,62 @@
+<section id="planar-apis">
+ <title>Single- and multi-planar APIs</title>
+
+ <para>Some devices require data for each input or output video frame
+ to be placed in discontiguous memory buffers. In such cases, one
+ video frame has to be addressed using more than one memory address, i.e. one
+ pointer per "plane". A plane is a sub-buffer of the current frame. For
+ examples of such formats see <xref linkend="pixfmt" />.</para>
+
+ <para>Initially, V4L2 API did not support multi-planar buffers and a set of
+ extensions has been introduced to handle them. Those extensions constitute
+ what is being referred to as the "multi-planar API".</para>
+
+ <para>Some of the V4L2 API calls and structures are interpreted differently,
+ depending on whether single- or multi-planar API is being used. An application
+ can choose whether to use one or the other by passing a corresponding buffer
+ type to its ioctl calls. Multi-planar versions of buffer types are suffixed
+ with an `_MPLANE' string. For a list of available multi-planar buffer types
+ see &v4l2-buf-type;.
+ </para>
+
+ <section>
+ <title>Multi-planar formats</title>
+ <para>Multi-planar API introduces new multi-planar formats. Those formats
+ use a separate set of FourCC codes. It is important to distinguish between
+ the multi-planar API and a multi-planar format. Multi-planar API calls can
+ handle all single-planar formats as well (as long as they are passed in
+ multi-planar API structures), while the single-planar API cannot
+ handle multi-planar formats.</para>
+ </section>
+
+ <section>
+ <title>Calls that distinguish between single and multi-planar APIs</title>
+ <variablelist>
+ <varlistentry>
+ <term>&VIDIOC-QUERYCAP;</term>
+ <listitem><para>Two additional multi-planar capabilities are added. They can
+ be set together with non-multi-planar ones for devices that handle
+ both single- and multi-planar formats.</para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>&VIDIOC-G-FMT;, &VIDIOC-S-FMT;, &VIDIOC-TRY-FMT;</term>
+ <listitem><para>New structures for describing multi-planar formats are added:
+ &v4l2-pix-format-mplane; and &v4l2-plane-pix-format;. Drivers may
+ define new multi-planar formats, which have distinct FourCC codes from
+ the existing single-planar ones.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>&VIDIOC-QBUF;, &VIDIOC-DQBUF;, &VIDIOC-QUERYBUF;</term>
+ <listitem><para>A new &v4l2-plane; structure for describing planes is added.
+ Arrays of this structure are passed in the new
+ <structfield>m.planes</structfield> field of &v4l2-buffer;.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term>&VIDIOC-REQBUFS;</term>
+ <listitem><para>Will allocate multi-planar buffers as requested.</para></listitem>
+ </varlistentry>
+ </variablelist>
+ </section>
+</section>
diff --git a/Documentation/DocBook/media/v4l/remote_controllers.xml b/Documentation/DocBook/media/v4l/remote_controllers.xml
new file mode 100644
index 000000000..5124a6c4d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/remote_controllers.xml
@@ -0,0 +1,320 @@
+<partinfo>
+<authorgroup>
+<author>
+<firstname>Mauro</firstname>
+<surname>Chehab</surname>
+<othername role="mi">Carvalho</othername>
+<affiliation><address><email>m.chehab@samsung.com</email></address></affiliation>
+<contrib>Initial version.</contrib>
+</author>
+</authorgroup>
+<copyright>
+ <year>2009-2014</year>
+ <holder>Mauro Carvalho Chehab</holder>
+</copyright>
+
+<revhistory>
+<!-- Put document revisions here, newest first. -->
+<revision>
+<revnumber>3.15</revnumber>
+<date>2014-02-06</date>
+<authorinitials>mcc</authorinitials>
+<revremark>Added the interface description and the RC sysfs class description.</revremark>
+</revision>
+<revision>
+<revnumber>1.0</revnumber>
+<date>2009-09-06</date>
+<authorinitials>mcc</authorinitials>
+<revremark>Initial revision</revremark>
+</revision>
+</revhistory>
+</partinfo>
+
+ <title>Remote Controller API</title>
+ <chapter id="remote_controllers">
+
+<title>Remote Controllers</title>
+
+<section id="Remote_controllers_Intro">
+<title>Introduction</title>
+
+<para>Currently, most analog and digital devices have a Infrared input for remote controllers. Each
+manufacturer has their own type of control. It is not rare for the same manufacturer to ship different
+types of controls, depending on the device.</para>
+<para>A Remote Controller interface is mapped as a normal evdev/input interface, just like a keyboard or a mouse.
+So, it uses all ioctls already defined for any other input devices.</para>
+<para>However, remove controllers are more flexible than a normal input device, as the IR
+receiver (and/or transmitter) can be used in conjunction with a wide variety of different IR remotes.</para>
+<para>In order to allow flexibility, the Remote Controller subsystem allows controlling the
+RC-specific attributes via <link linkend="remote_controllers_sysfs_nodes">the sysfs class nodes</link>.</para>
+</section>
+
+<section id="remote_controllers_sysfs_nodes">
+<title>Remote Controller's sysfs nodes</title>
+<para>As defined at <constant>Documentation/ABI/testing/sysfs-class-rc</constant>, those are the sysfs nodes that control the Remote Controllers:</para>
+
+<section id="sys_class_rc">
+<title>/sys/class/rc/</title>
+<para>The <constant>/sys/class/rc/</constant> class sub-directory belongs to the Remote Controller
+core and provides a sysfs interface for configuring infrared remote controller receivers.
+</para>
+
+</section>
+<section id="sys_class_rc_rcN">
+<title>/sys/class/rc/rcN/</title>
+<para>A <constant>/sys/class/rc/rcN</constant> directory is created for each remote
+ control receiver device where N is the number of the receiver.</para>
+
+</section>
+<section id="sys_class_rc_rcN_protocols">
+<title>/sys/class/rc/rcN/protocols</title>
+<para>Reading this file returns a list of available protocols, something like:</para>
+<para><constant>rc5 [rc6] nec jvc [sony]</constant></para>
+<para>Enabled protocols are shown in [] brackets.</para>
+<para>Writing "+proto" will add a protocol to the list of enabled protocols.</para>
+<para>Writing "-proto" will remove a protocol from the list of enabled protocols.</para>
+<para>Writing "proto" will enable only "proto".</para>
+<para>Writing "none" will disable all protocols.</para>
+<para>Write fails with EINVAL if an invalid protocol combination or unknown protocol name is used.</para>
+
+</section>
+<section id="sys_class_rc_rcN_filter">
+<title>/sys/class/rc/rcN/filter</title>
+<para>Sets the scancode filter expected value.</para>
+<para>Use in combination with <constant>/sys/class/rc/rcN/filter_mask</constant> to set the
+expected value of the bits set in the filter mask.
+If the hardware supports it then scancodes which do not match
+the filter will be ignored. Otherwise the write will fail with
+an error.</para>
+<para>This value may be reset to 0 if the current protocol is altered.</para>
+
+</section>
+<section id="sys_class_rc_rcN_filter_mask">
+<title>/sys/class/rc/rcN/filter_mask</title>
+<para>Sets the scancode filter mask of bits to compare.
+Use in combination with <constant>/sys/class/rc/rcN/filter</constant> to set the bits
+of the scancode which should be compared against the expected
+value. A value of 0 disables the filter to allow all valid
+scancodes to be processed.</para>
+<para>If the hardware supports it then scancodes which do not match
+the filter will be ignored. Otherwise the write will fail with
+an error.</para>
+<para>This value may be reset to 0 if the current protocol is altered.</para>
+
+</section>
+<section id="sys_class_rc_rcN_wakeup_protocols">
+<title>/sys/class/rc/rcN/wakeup_protocols</title>
+<para>Reading this file returns a list of available protocols to use for the
+wakeup filter, something like:</para>
+<para><constant>rc5 rc6 nec jvc [sony]</constant></para>
+<para>The enabled wakeup protocol is shown in [] brackets.</para>
+<para>Writing "+proto" will add a protocol to the list of enabled wakeup
+protocols.</para>
+<para>Writing "-proto" will remove a protocol from the list of enabled wakeup
+protocols.</para>
+<para>Writing "proto" will use "proto" for wakeup events.</para>
+<para>Writing "none" will disable wakeup.</para>
+<para>Write fails with EINVAL if an invalid protocol combination or unknown
+protocol name is used, or if wakeup is not supported by the hardware.</para>
+
+</section>
+<section id="sys_class_rc_rcN_wakeup_filter">
+<title>/sys/class/rc/rcN/wakeup_filter</title>
+<para>Sets the scancode wakeup filter expected value.
+Use in combination with <constant>/sys/class/rc/rcN/wakeup_filter_mask</constant> to
+set the expected value of the bits set in the wakeup filter mask
+to trigger a system wake event.</para>
+<para>If the hardware supports it and wakeup_filter_mask is not 0 then
+scancodes which match the filter will wake the system from e.g.
+suspend to RAM or power off.
+Otherwise the write will fail with an error.</para>
+<para>This value may be reset to 0 if the wakeup protocol is altered.</para>
+
+</section>
+<section id="sys_class_rc_rcN_wakeup_filter_mask">
+<title>/sys/class/rc/rcN/wakeup_filter_mask</title>
+<para>Sets the scancode wakeup filter mask of bits to compare.
+Use in combination with <constant>/sys/class/rc/rcN/wakeup_filter</constant> to set
+the bits of the scancode which should be compared against the
+expected value to trigger a system wake event.</para>
+<para>If the hardware supports it and wakeup_filter_mask is not 0 then
+scancodes which match the filter will wake the system from e.g.
+suspend to RAM or power off.
+Otherwise the write will fail with an error.</para>
+<para>This value may be reset to 0 if the wakeup protocol is altered.</para>
+</section>
+</section>
+
+<section id="Remote_controllers_tables">
+<title>Remote controller tables</title>
+<para>Unfortunately, for several years, there was no effort to create uniform IR keycodes for
+different devices. This caused the same IR keyname to be mapped completely differently on
+different IR devices. This resulted that the same IR keyname to be mapped completely different on
+different IR's. Due to that, V4L2 API now specifies a standard for mapping Media keys on IR.</para>
+<para>This standard should be used by both V4L/DVB drivers and userspace applications</para>
+<para>The modules register the remote as keyboard within the linux input layer. This means that the IR key strokes will look like normal keyboard key strokes (if CONFIG_INPUT_KEYBOARD is enabled). Using the event devices (CONFIG_INPUT_EVDEV) it is possible for applications to access the remote via /dev/input/event devices.</para>
+
+<table pgwide="1" frame="none" id="rc_standard_keymap">
+<title>IR default keymapping</title>
+<tgroup cols="3">
+&cs-str;
+<tbody valign="top">
+<row>
+<entry>Key code</entry>
+<entry>Meaning</entry>
+<entry>Key examples on IR</entry>
+</row>
+
+<row><entry><emphasis role="bold">Numeric keys</emphasis></entry></row>
+
+<row><entry><constant>KEY_0</constant></entry><entry>Keyboard digit 0</entry><entry>0</entry></row>
+<row><entry><constant>KEY_1</constant></entry><entry>Keyboard digit 1</entry><entry>1</entry></row>
+<row><entry><constant>KEY_2</constant></entry><entry>Keyboard digit 2</entry><entry>2</entry></row>
+<row><entry><constant>KEY_3</constant></entry><entry>Keyboard digit 3</entry><entry>3</entry></row>
+<row><entry><constant>KEY_4</constant></entry><entry>Keyboard digit 4</entry><entry>4</entry></row>
+<row><entry><constant>KEY_5</constant></entry><entry>Keyboard digit 5</entry><entry>5</entry></row>
+<row><entry><constant>KEY_6</constant></entry><entry>Keyboard digit 6</entry><entry>6</entry></row>
+<row><entry><constant>KEY_7</constant></entry><entry>Keyboard digit 7</entry><entry>7</entry></row>
+<row><entry><constant>KEY_8</constant></entry><entry>Keyboard digit 8</entry><entry>8</entry></row>
+<row><entry><constant>KEY_9</constant></entry><entry>Keyboard digit 9</entry><entry>9</entry></row>
+
+<row><entry><emphasis role="bold">Movie play control</emphasis></entry></row>
+
+<row><entry><constant>KEY_FORWARD</constant></entry><entry>Instantly advance in time</entry><entry>&gt;&gt; / FORWARD</entry></row>
+<row><entry><constant>KEY_BACK</constant></entry><entry>Instantly go back in time</entry><entry>&lt;&lt;&lt; / BACK</entry></row>
+<row><entry><constant>KEY_FASTFORWARD</constant></entry><entry>Play movie faster</entry><entry>&gt;&gt;&gt; / FORWARD</entry></row>
+<row><entry><constant>KEY_REWIND</constant></entry><entry>Play movie back</entry><entry>REWIND / BACKWARD</entry></row>
+<row><entry><constant>KEY_NEXT</constant></entry><entry>Select next chapter / sub-chapter / interval</entry><entry>NEXT / SKIP</entry></row>
+<row><entry><constant>KEY_PREVIOUS</constant></entry><entry>Select previous chapter / sub-chapter / interval</entry><entry>&lt;&lt; / PREV / PREVIOUS</entry></row>
+<row><entry><constant>KEY_AGAIN</constant></entry><entry>Repeat the video or a video interval</entry><entry>REPEAT / LOOP / RECALL</entry></row>
+<row><entry><constant>KEY_PAUSE</constant></entry><entry>Pause sroweam</entry><entry>PAUSE / FREEZE</entry></row>
+<row><entry><constant>KEY_PLAY</constant></entry><entry>Play movie at the normal timeshift</entry><entry>NORMAL TIMESHIFT / LIVE / &gt;</entry></row>
+<row><entry><constant>KEY_PLAYPAUSE</constant></entry><entry>Alternate between play and pause</entry><entry>PLAY / PAUSE</entry></row>
+<row><entry><constant>KEY_STOP</constant></entry><entry>Stop sroweam</entry><entry>STOP</entry></row>
+<row><entry><constant>KEY_RECORD</constant></entry><entry>Start/stop recording sroweam</entry><entry>CAPTURE / REC / RECORD/PAUSE</entry></row>
+<row><entry><constant>KEY_CAMERA</constant></entry><entry>Take a picture of the image</entry><entry>CAMERA ICON / CAPTURE / SNAPSHOT</entry></row>
+<row><entry><constant>KEY_SHUFFLE</constant></entry><entry>Enable shuffle mode</entry><entry>SHUFFLE</entry></row>
+<row><entry><constant>KEY_TIME</constant></entry><entry>Activate time shift mode</entry><entry>TIME SHIFT</entry></row>
+<row><entry><constant>KEY_TITLE</constant></entry><entry>Allow changing the chapter</entry><entry>CHAPTER</entry></row>
+<row><entry><constant>KEY_SUBTITLE</constant></entry><entry>Allow changing the subtitle</entry><entry>SUBTITLE</entry></row>
+
+<row><entry><emphasis role="bold">Image control</emphasis></entry></row>
+
+<row><entry><constant>KEY_BRIGHTNESSDOWN</constant></entry><entry>Decrease Brightness</entry><entry>BRIGHTNESS DECREASE</entry></row>
+<row><entry><constant>KEY_BRIGHTNESSUP</constant></entry><entry>Increase Brightness</entry><entry>BRIGHTNESS INCREASE</entry></row>
+
+<row><entry><constant>KEY_ANGLE</constant></entry><entry>Switch video camera angle (on videos with more than one angle stored)</entry><entry>ANGLE / SWAP</entry></row>
+<row><entry><constant>KEY_EPG</constant></entry><entry>Open the Elecrowonic Play Guide (EPG)</entry><entry>EPG / GUIDE</entry></row>
+<row><entry><constant>KEY_TEXT</constant></entry><entry>Activate/change closed caption mode</entry><entry>CLOSED CAPTION/TELETEXT / DVD TEXT / TELETEXT / TTX</entry></row>
+
+<row><entry><emphasis role="bold">Audio control</emphasis></entry></row>
+
+<row><entry><constant>KEY_AUDIO</constant></entry><entry>Change audio source</entry><entry>AUDIO SOURCE / AUDIO / MUSIC</entry></row>
+<row><entry><constant>KEY_MUTE</constant></entry><entry>Mute/unmute audio</entry><entry>MUTE / DEMUTE / UNMUTE</entry></row>
+<row><entry><constant>KEY_VOLUMEDOWN</constant></entry><entry>Decrease volume</entry><entry>VOLUME- / VOLUME DOWN</entry></row>
+<row><entry><constant>KEY_VOLUMEUP</constant></entry><entry>Increase volume</entry><entry>VOLUME+ / VOLUME UP</entry></row>
+<row><entry><constant>KEY_MODE</constant></entry><entry>Change sound mode</entry><entry>MONO/STEREO</entry></row>
+<row><entry><constant>KEY_LANGUAGE</constant></entry><entry>Select Language</entry><entry>1ST / 2ND LANGUAGE / DVD LANG / MTS/SAP / MTS SEL</entry></row>
+
+<row><entry><emphasis role="bold">Channel control</emphasis></entry></row>
+
+<row><entry><constant>KEY_CHANNEL</constant></entry><entry>Go to the next favorite channel</entry><entry>ALT / CHANNEL / CH SURFING / SURF / FAV</entry></row>
+<row><entry><constant>KEY_CHANNELDOWN</constant></entry><entry>Decrease channel sequencially</entry><entry>CHANNEL - / CHANNEL DOWN / DOWN</entry></row>
+<row><entry><constant>KEY_CHANNELUP</constant></entry><entry>Increase channel sequencially</entry><entry>CHANNEL + / CHANNEL UP / UP</entry></row>
+<row><entry><constant>KEY_DIGITS</constant></entry><entry>Use more than one digit for channel</entry><entry>PLUS / 100/ 1xx / xxx / -/-- / Single Double Triple Digit</entry></row>
+<row><entry><constant>KEY_SEARCH</constant></entry><entry>Start channel autoscan</entry><entry>SCAN / AUTOSCAN</entry></row>
+
+<row><entry><emphasis role="bold">Colored keys</emphasis></entry></row>
+
+<row><entry><constant>KEY_BLUE</constant></entry><entry>IR Blue key</entry><entry>BLUE</entry></row>
+<row><entry><constant>KEY_GREEN</constant></entry><entry>IR Green Key</entry><entry>GREEN</entry></row>
+<row><entry><constant>KEY_RED</constant></entry><entry>IR Red key</entry><entry>RED</entry></row>
+<row><entry><constant>KEY_YELLOW</constant></entry><entry>IR Yellow key</entry><entry> YELLOW</entry></row>
+
+<row><entry><emphasis role="bold">Media selection</emphasis></entry></row>
+
+<row><entry><constant>KEY_CD</constant></entry><entry>Change input source to Compact Disc</entry><entry>CD</entry></row>
+<row><entry><constant>KEY_DVD</constant></entry><entry>Change input to DVD</entry><entry>DVD / DVD MENU</entry></row>
+<row><entry><constant>KEY_EJECTCLOSECD</constant></entry><entry>Open/close the CD/DVD player</entry><entry>-&gt; ) / CLOSE / OPEN</entry></row>
+
+<row><entry><constant>KEY_MEDIA</constant></entry><entry>Turn on/off Media application</entry><entry>PC/TV / TURN ON/OFF APP</entry></row>
+<row><entry><constant>KEY_PC</constant></entry><entry>Selects from TV to PC</entry><entry>PC</entry></row>
+<row><entry><constant>KEY_RADIO</constant></entry><entry>Put into AM/FM radio mode</entry><entry>RADIO / TV/FM / TV/RADIO / FM / FM/RADIO</entry></row>
+<row><entry><constant>KEY_TV</constant></entry><entry>Select tv mode</entry><entry>TV / LIVE TV</entry></row>
+<row><entry><constant>KEY_TV2</constant></entry><entry>Select Cable mode</entry><entry>AIR/CBL</entry></row>
+<row><entry><constant>KEY_VCR</constant></entry><entry>Select VCR mode</entry><entry>VCR MODE / DTR</entry></row>
+<row><entry><constant>KEY_VIDEO</constant></entry><entry>Alternate between input modes</entry><entry>SOURCE / SELECT / DISPLAY / SWITCH INPUTS / VIDEO</entry></row>
+
+<row><entry><emphasis role="bold">Power control</emphasis></entry></row>
+
+<row><entry><constant>KEY_POWER</constant></entry><entry>Turn on/off computer</entry><entry>SYSTEM POWER / COMPUTER POWER</entry></row>
+<row><entry><constant>KEY_POWER2</constant></entry><entry>Turn on/off application</entry><entry>TV ON/OFF / POWER</entry></row>
+<row><entry><constant>KEY_SLEEP</constant></entry><entry>Activate sleep timer</entry><entry>SLEEP / SLEEP TIMER</entry></row>
+<row><entry><constant>KEY_SUSPEND</constant></entry><entry>Put computer into suspend mode</entry><entry>STANDBY / SUSPEND</entry></row>
+
+<row><entry><emphasis role="bold">Window control</emphasis></entry></row>
+
+<row><entry><constant>KEY_CLEAR</constant></entry><entry>Stop sroweam and return to default input video/audio</entry><entry>CLEAR / RESET / BOSS KEY</entry></row>
+<row><entry><constant>KEY_CYCLEWINDOWS</constant></entry><entry>Minimize windows and move to the next one</entry><entry>ALT-TAB / MINIMIZE / DESKTOP</entry></row>
+<row><entry><constant>KEY_FAVORITES</constant></entry><entry>Open the favorites sroweam window</entry><entry>TV WALL / Favorites</entry></row>
+<row><entry><constant>KEY_MENU</constant></entry><entry>Call application menu</entry><entry>2ND CONTROLS (USA: MENU) / DVD/MENU / SHOW/HIDE CTRL</entry></row>
+<row><entry><constant>KEY_NEW</constant></entry><entry>Open/Close Picture in Picture</entry><entry>PIP</entry></row>
+<row><entry><constant>KEY_OK</constant></entry><entry>Send a confirmation code to application</entry><entry>OK / ENTER / RETURN</entry></row>
+<row><entry><constant>KEY_SCREEN</constant></entry><entry>Select screen aspect ratio</entry><entry>4:3 16:9 SELECT</entry></row>
+<row><entry><constant>KEY_ZOOM</constant></entry><entry>Put device into zoom/full screen mode</entry><entry>ZOOM / FULL SCREEN / ZOOM+ / HIDE PANNEL / SWITCH</entry></row>
+
+<row><entry><emphasis role="bold">Navigation keys</emphasis></entry></row>
+
+<row><entry><constant>KEY_ESC</constant></entry><entry>Cancel current operation</entry><entry>CANCEL / BACK</entry></row>
+<row><entry><constant>KEY_HELP</constant></entry><entry>Open a Help window</entry><entry>HELP</entry></row>
+<row><entry><constant>KEY_HOMEPAGE</constant></entry><entry>Navigate to Homepage</entry><entry>HOME</entry></row>
+<row><entry><constant>KEY_INFO</constant></entry><entry>Open On Screen Display</entry><entry>DISPLAY INFORMATION / OSD</entry></row>
+<row><entry><constant>KEY_WWW</constant></entry><entry>Open the default browser</entry><entry>WEB</entry></row>
+<row><entry><constant>KEY_UP</constant></entry><entry>Up key</entry><entry>UP</entry></row>
+<row><entry><constant>KEY_DOWN</constant></entry><entry>Down key</entry><entry>DOWN</entry></row>
+<row><entry><constant>KEY_LEFT</constant></entry><entry>Left key</entry><entry>LEFT</entry></row>
+<row><entry><constant>KEY_RIGHT</constant></entry><entry>Right key</entry><entry>RIGHT</entry></row>
+
+<row><entry><emphasis role="bold">Miscellaneous keys</emphasis></entry></row>
+
+<row><entry><constant>KEY_DOT</constant></entry><entry>Return a dot</entry><entry>.</entry></row>
+<row><entry><constant>KEY_FN</constant></entry><entry>Select a function</entry><entry>FUNCTION</entry></row>
+
+</tbody>
+</tgroup>
+</table>
+
+<para>It should be noticed that, sometimes, there some fundamental missing keys at some cheaper IR's. Due to that, it is recommended to:</para>
+
+<table pgwide="1" frame="none" id="rc_keymap_notes">
+<title>Notes</title>
+<tgroup cols="1">
+&cs-str;
+<tbody valign="top">
+<row>
+<entry>On simpler IR's, without separate channel keys, you need to map UP as <constant>KEY_CHANNELUP</constant></entry>
+</row><row>
+<entry>On simpler IR's, without separate channel keys, you need to map DOWN as <constant>KEY_CHANNELDOWN</constant></entry>
+</row><row>
+<entry>On simpler IR's, without separate volume keys, you need to map LEFT as <constant>KEY_VOLUMEDOWN</constant></entry>
+</row><row>
+<entry>On simpler IR's, without separate volume keys, you need to map RIGHT as <constant>KEY_VOLUMEUP</constant></entry>
+</row>
+</tbody>
+</tgroup>
+</table>
+
+</section>
+
+<section id="Remote_controllers_table_change">
+<title>Changing default Remote Controller mappings</title>
+<para>The event interface provides two ioctls to be used against
+the /dev/input/event device, to allow changing the default
+keymapping.</para>
+
+<para>This program demonstrates how to replace the keymap tables.</para>
+&sub-keytable-c;
+</section>
+
+&sub-lirc_device_interface;
+</chapter>
diff --git a/Documentation/DocBook/media/v4l/selection-api.xml b/Documentation/DocBook/media/v4l/selection-api.xml
new file mode 100644
index 000000000..28cbded76
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/selection-api.xml
@@ -0,0 +1,324 @@
+<section id="selection-api">
+
+ <title>Experimental API for cropping, composing and scaling</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link linkend="experimental">experimental</link>
+interface and may change in the future.</para>
+ </note>
+
+ <section>
+ <title>Introduction</title>
+
+<para>Some video capture devices can sample a subsection of a picture and
+shrink or enlarge it to an image of arbitrary size. Next, the devices can
+insert the image into larger one. Some video output devices can crop part of an
+input image, scale it up or down and insert it at an arbitrary scan line and
+horizontal offset into a video signal. We call these abilities cropping,
+scaling and composing.</para>
+
+<para>On a video <emphasis>capture</emphasis> device the source is a video
+signal, and the cropping target determine the area actually sampled. The sink
+is an image stored in a memory buffer. The composing area specifies which part
+of the buffer is actually written to by the hardware. </para>
+
+<para>On a video <emphasis>output</emphasis> device the source is an image in a
+memory buffer, and the cropping target is a part of an image to be shown on a
+display. The sink is the display or the graphics screen. The application may
+select the part of display where the image should be displayed. The size and
+position of such a window is controlled by the compose target.</para>
+
+<para>Rectangles for all cropping and composing targets are defined even if the
+device does supports neither cropping nor composing. Their size and position
+will be fixed in such a case. If the device does not support scaling then the
+cropping and composing rectangles have the same size.</para>
+
+ </section>
+
+ <section>
+ <title>Selection targets</title>
+
+ <para>
+ <figure id="sel-targets-capture">
+ <title>Cropping and composing targets</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="selection.png" format="PNG" />
+ </imageobject>
+ <textobject>
+ <phrase>Targets used by a cropping, composing and scaling
+ process</phrase>
+ </textobject>
+ </mediaobject>
+ </figure>
+ </para>
+
+ <para>See <xref linkend="v4l2-selection-targets" /> for more
+ information.</para>
+ </section>
+
+ <section>
+
+ <title>Configuration</title>
+
+<para>Applications can use the <link linkend="vidioc-g-selection">selection
+API</link> to select an area in a video signal or a buffer, and to query for
+default settings and hardware limits.</para>
+
+<para>Video hardware can have various cropping, composing and scaling
+limitations. It may only scale up or down, support only discrete scaling
+factors, or have different scaling abilities in the horizontal and vertical
+directions. Also it may not support scaling at all. At the same time the
+cropping/composing rectangles may have to be aligned, and both the source and
+the sink may have arbitrary upper and lower size limits. Therefore, as usual,
+drivers are expected to adjust the requested parameters and return the actual
+values selected. An application can control the rounding behaviour using <link
+linkend="v4l2-selection-flags"> constraint flags </link>.</para>
+
+ <section>
+
+ <title>Configuration of video capture</title>
+
+<para>See figure <xref linkend="sel-targets-capture" /> for examples of the
+selection targets available for a video capture device. It is recommended to
+configure the cropping targets before to the composing targets.</para>
+
+<para>The range of coordinates of the top left corner, width and height of
+areas that can be sampled is given by the <constant>V4L2_SEL_TGT_CROP_BOUNDS</constant>
+target. It is recommended for the driver developers to put the
+top/left corner at position <constant>(0,0)</constant>. The rectangle's
+coordinates are expressed in pixels.</para>
+
+<para>The top left corner, width and height of the source rectangle, that is
+the area actually sampled, is given by the <constant>V4L2_SEL_TGT_CROP</constant>
+target. It uses the same coordinate system as <constant>V4L2_SEL_TGT_CROP_BOUNDS</constant>.
+The active cropping area must lie completely inside the capture boundaries. The
+driver may further adjust the requested size and/or position according to hardware
+limitations.</para>
+
+<para>Each capture device has a default source rectangle, given by the
+<constant>V4L2_SEL_TGT_CROP_DEFAULT</constant> target. This rectangle shall
+over what the driver writer considers the complete picture. Drivers shall set
+the active crop rectangle to the default when the driver is first loaded, but
+not later.</para>
+
+<para>The composing targets refer to a memory buffer. The limits of composing
+coordinates are obtained using <constant>V4L2_SEL_TGT_COMPOSE_BOUNDS</constant>.
+All coordinates are expressed in pixels. The rectangle's top/left
+corner must be located at position <constant>(0,0)</constant>. The width and
+height are equal to the image size set by <constant>VIDIOC_S_FMT</constant>.
+</para>
+
+<para>The part of a buffer into which the image is inserted by the hardware is
+controlled by the <constant>V4L2_SEL_TGT_COMPOSE</constant> target.
+The rectangle's coordinates are also expressed in the same coordinate system as
+the bounds rectangle. The composing rectangle must lie completely inside bounds
+rectangle. The driver must adjust the composing rectangle to fit to the
+bounding limits. Moreover, the driver can perform other adjustments according
+to hardware limitations. The application can control rounding behaviour using
+<link linkend="v4l2-selection-flags"> constraint flags</link>.</para>
+
+<para>For capture devices the default composing rectangle is queried using
+<constant>V4L2_SEL_TGT_COMPOSE_DEFAULT</constant>. It is usually equal to the
+bounding rectangle.</para>
+
+<para>The part of a buffer that is modified by the hardware is given by
+<constant>V4L2_SEL_TGT_COMPOSE_PADDED</constant>. It contains all pixels
+defined using <constant>V4L2_SEL_TGT_COMPOSE</constant> plus all
+padding data modified by hardware during insertion process. All pixels outside
+this rectangle <emphasis>must not</emphasis> be changed by the hardware. The
+content of pixels that lie inside the padded area but outside active area is
+undefined. The application can use the padded and active rectangles to detect
+where the rubbish pixels are located and remove them if needed.</para>
+
+ </section>
+
+ <section>
+
+ <title>Configuration of video output</title>
+
+<para>For output devices targets and ioctls are used similarly to the video
+capture case. The <emphasis>composing</emphasis> rectangle refers to the
+insertion of an image into a video signal. The cropping rectangles refer to a
+memory buffer. It is recommended to configure the composing targets before to
+the cropping targets.</para>
+
+<para>The cropping targets refer to the memory buffer that contains an image to
+be inserted into a video signal or graphical screen. The limits of cropping
+coordinates are obtained using <constant>V4L2_SEL_TGT_CROP_BOUNDS</constant>.
+All coordinates are expressed in pixels. The top/left corner is always point
+<constant>(0,0)</constant>. The width and height is equal to the image size
+specified using <constant>VIDIOC_S_FMT</constant> ioctl.</para>
+
+<para>The top left corner, width and height of the source rectangle, that is
+the area from which image date are processed by the hardware, is given by the
+<constant>V4L2_SEL_TGT_CROP</constant>. Its coordinates are expressed
+in in the same coordinate system as the bounds rectangle. The active cropping
+area must lie completely inside the crop boundaries and the driver may further
+adjust the requested size and/or position according to hardware
+limitations.</para>
+
+<para>For output devices the default cropping rectangle is queried using
+<constant>V4L2_SEL_TGT_CROP_DEFAULT</constant>. It is usually equal to the
+bounding rectangle.</para>
+
+<para>The part of a video signal or graphics display where the image is
+inserted by the hardware is controlled by <constant>V4L2_SEL_TGT_COMPOSE</constant>
+target. The rectangle's coordinates are expressed in pixels. The composing
+rectangle must lie completely inside the bounds rectangle. The driver must
+adjust the area to fit to the bounding limits. Moreover, the driver can
+perform other adjustments according to hardware limitations.</para>
+
+<para>The device has a default composing rectangle, given by the
+<constant>V4L2_SEL_TGT_COMPOSE_DEFAULT</constant> target. This rectangle shall cover what
+the driver writer considers the complete picture. It is recommended for the
+driver developers to put the top/left corner at position <constant>(0,0)</constant>.
+Drivers shall set the active composing rectangle to the default
+one when the driver is first loaded.</para>
+
+<para>The devices may introduce additional content to video signal other than
+an image from memory buffers. It includes borders around an image. However,
+such a padded area is driver-dependent feature not covered by this document.
+Driver developers are encouraged to keep padded rectangle equal to active one.
+The padded target is accessed by the <constant>V4L2_SEL_TGT_COMPOSE_PADDED</constant>
+identifier. It must contain all pixels from the <constant>V4L2_SEL_TGT_COMPOSE</constant>
+target.</para>
+
+ </section>
+
+ <section>
+
+ <title>Scaling control</title>
+
+<para>An application can detect if scaling is performed by comparing the width
+and the height of rectangles obtained using <constant>V4L2_SEL_TGT_CROP</constant>
+and <constant>V4L2_SEL_TGT_COMPOSE</constant> targets. If
+these are not equal then the scaling is applied. The application can compute
+the scaling ratios using these values.</para>
+
+ </section>
+
+ </section>
+
+ <section>
+
+ <title>Comparison with old cropping API</title>
+
+<para>The selection API was introduced to cope with deficiencies of previous
+<link linkend="crop"> API</link>, that was designed to control simple capture
+devices. Later the cropping API was adopted by video output drivers. The ioctls
+are used to select a part of the display were the video signal is inserted. It
+should be considered as an API abuse because the described operation is
+actually the composing. The selection API makes a clear distinction between
+composing and cropping operations by setting the appropriate targets. The V4L2
+API lacks any support for composing to and cropping from an image inside a
+memory buffer. The application could configure a capture device to fill only a
+part of an image by abusing V4L2 API. Cropping a smaller image from a larger
+one is achieved by setting the field
+&v4l2-pix-format;<structfield>::bytesperline</structfield>. Introducing an image offsets
+could be done by modifying field &v4l2-buffer;<structfield>::m_userptr</structfield>
+before calling <constant>VIDIOC_QBUF</constant>. Those
+operations should be avoided because they are not portable (endianness), and do
+not work for macroblock and Bayer formats and mmap buffers. The selection API
+deals with configuration of buffer cropping/composing in a clear, intuitive and
+portable way. Next, with the selection API the concepts of the padded target
+and constraints flags are introduced. Finally, &v4l2-crop; and &v4l2-cropcap;
+have no reserved fields. Therefore there is no way to extend their functionality.
+The new &v4l2-selection; provides a lot of place for future
+extensions. Driver developers are encouraged to implement only selection API.
+The former cropping API would be simulated using the new one.</para>
+
+ </section>
+
+ <section>
+ <title>Examples</title>
+ <example>
+ <title>Resetting the cropping parameters</title>
+
+ <para>(A video capture device is assumed; change
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> for other devices; change target to
+<constant>V4L2_SEL_TGT_COMPOSE_*</constant> family to configure composing
+area)</para>
+
+ <programlisting>
+
+ &v4l2-selection; sel = {
+ .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
+ .target = V4L2_SEL_TGT_CROP_DEFAULT,
+ };
+ ret = ioctl(fd, &VIDIOC-G-SELECTION;, &amp;sel);
+ if (ret)
+ exit(-1);
+ sel.target = V4L2_SEL_TGT_CROP;
+ ret = ioctl(fd, &VIDIOC-S-SELECTION;, &amp;sel);
+ if (ret)
+ exit(-1);
+
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Simple downscaling</title>
+ <para>Setting a composing area on output of size of <emphasis> at most
+</emphasis> half of limit placed at a center of a display.</para>
+ <programlisting>
+
+ &v4l2-selection; sel = {
+ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
+ .target = V4L2_SEL_TGT_COMPOSE_BOUNDS,
+ };
+ struct v4l2_rect r;
+
+ ret = ioctl(fd, &VIDIOC-G-SELECTION;, &amp;sel);
+ if (ret)
+ exit(-1);
+ /* setting smaller compose rectangle */
+ r.width = sel.r.width / 2;
+ r.height = sel.r.height / 2;
+ r.left = sel.r.width / 4;
+ r.top = sel.r.height / 4;
+ sel.r = r;
+ sel.target = V4L2_SEL_TGT_COMPOSE;
+ sel.flags = V4L2_SEL_FLAG_LE;
+ ret = ioctl(fd, &VIDIOC-S-SELECTION;, &amp;sel);
+ if (ret)
+ exit(-1);
+
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Querying for scaling factors</title>
+ <para>A video output device is assumed; change
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> for other devices</para>
+ <programlisting>
+
+ &v4l2-selection; compose = {
+ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
+ .target = V4L2_SEL_TGT_COMPOSE,
+ };
+ &v4l2-selection; crop = {
+ .type = V4L2_BUF_TYPE_VIDEO_OUTPUT,
+ .target = V4L2_SEL_TGT_CROP,
+ };
+ double hscale, vscale;
+
+ ret = ioctl(fd, &VIDIOC-G-SELECTION;, &amp;compose);
+ if (ret)
+ exit(-1);
+ ret = ioctl(fd, &VIDIOC-G-SELECTION;, &amp;crop);
+ if (ret)
+ exit(-1);
+
+ /* computing scaling factors */
+ hscale = (double)compose.r.width / crop.r.width;
+ vscale = (double)compose.r.height / crop.r.height;
+
+ </programlisting>
+ </example>
+
+ </section>
+
+</section>
diff --git a/Documentation/DocBook/media/v4l/selections-common.xml b/Documentation/DocBook/media/v4l/selections-common.xml
new file mode 100644
index 000000000..d6d56fb6f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/selections-common.xml
@@ -0,0 +1,180 @@
+<section id="v4l2-selections-common">
+
+ <title>Common selection definitions</title>
+
+ <para>While the <link linkend="selection-api">V4L2 selection
+ API</link> and <link linkend="v4l2-subdev-selections">V4L2 subdev
+ selection APIs</link> are very similar, there's one fundamental
+ difference between the two. On sub-device API, the selection
+ rectangle refers to the media bus format, and is bound to a
+ sub-device's pad. On the V4L2 interface the selection rectangles
+ refer to the in-memory pixel format.</para>
+
+ <para>This section defines the common definitions of the
+ selection interfaces on the two APIs.</para>
+
+ <section id="v4l2-selection-targets">
+
+ <title>Selection targets</title>
+
+ <para>The precise meaning of the selection targets may be
+ dependent on which of the two interfaces they are used.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-selection-targets-table">
+ <title>Selection target definitions</title>
+ <tgroup cols="5">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <colspec colname="c5" />
+ &cs-def;
+ <thead>
+ <row rowsep="1">
+ <entry align="left">Target name</entry>
+ <entry align="left">id</entry>
+ <entry align="left">Definition</entry>
+ <entry align="left">Valid for V4L2</entry>
+ <entry align="left">Valid for V4L2 subdev</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_SEL_TGT_CROP</constant></entry>
+ <entry>0x0000</entry>
+ <entry>Crop rectangle. Defines the cropped area.</entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_CROP_DEFAULT</constant></entry>
+ <entry>0x0001</entry>
+ <entry>Suggested cropping rectangle that covers the "whole picture".</entry>
+ <entry>Yes</entry>
+ <entry>No</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_CROP_BOUNDS</constant></entry>
+ <entry>0x0002</entry>
+ <entry>Bounds of the crop rectangle. All valid crop
+ rectangles fit inside the crop bounds rectangle.
+ </entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_NATIVE_SIZE</constant></entry>
+ <entry>0x0003</entry>
+ <entry>The native size of the device, e.g. a sensor's
+ pixel array. <structfield>left</structfield> and
+ <structfield>top</structfield> fields are zero for this
+ target. Setting the native size will generally only make
+ sense for memory to memory devices where the software can
+ create a canvas of a given size in which for example a
+ video frame can be composed. In that case
+ V4L2_SEL_TGT_NATIVE_SIZE can be used to configure the size
+ of that canvas.
+ </entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_COMPOSE</constant></entry>
+ <entry>0x0100</entry>
+ <entry>Compose rectangle. Used to configure scaling
+ and composition.</entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_COMPOSE_DEFAULT</constant></entry>
+ <entry>0x0101</entry>
+ <entry>Suggested composition rectangle that covers the "whole picture".</entry>
+ <entry>Yes</entry>
+ <entry>No</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_COMPOSE_BOUNDS</constant></entry>
+ <entry>0x0102</entry>
+ <entry>Bounds of the compose rectangle. All valid compose
+ rectangles fit inside the compose bounds rectangle.</entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_TGT_COMPOSE_PADDED</constant></entry>
+ <entry>0x0103</entry>
+ <entry>The active area and all padding pixels that are inserted or
+ modified by hardware.</entry>
+ <entry>Yes</entry>
+ <entry>No</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+
+ <section id="v4l2-selection-flags">
+
+ <title>Selection flags</title>
+
+ <table pgwide="1" frame="none" id="v4l2-selection-flags-table">
+ <title>Selection flag definitions</title>
+ <tgroup cols="5">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <colspec colname="c5" />
+ &cs-def;
+ <thead>
+ <row rowsep="1">
+ <entry align="left">Flag name</entry>
+ <entry align="left">id</entry>
+ <entry align="left">Definition</entry>
+ <entry align="left">Valid for V4L2</entry>
+ <entry align="left">Valid for V4L2 subdev</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_SEL_FLAG_GE</constant></entry>
+ <entry>(1 &lt;&lt; 0)</entry>
+ <entry>Suggest the driver it should choose greater or
+ equal rectangle (in size) than was requested. Albeit the
+ driver may choose a lesser size, it will only do so due to
+ hardware limitations. Without this flag (and
+ <constant>V4L2_SEL_FLAG_LE</constant>) the
+ behaviour is to choose the closest possible
+ rectangle.</entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_FLAG_LE</constant></entry>
+ <entry>(1 &lt;&lt; 1)</entry>
+ <entry>Suggest the driver it
+ should choose lesser or equal rectangle (in size) than was
+ requested. Albeit the driver may choose a greater size, it
+ will only do so due to hardware limitations.</entry>
+ <entry>Yes</entry>
+ <entry>Yes</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SEL_FLAG_KEEP_CONFIG</constant></entry>
+ <entry>(1 &lt;&lt; 2)</entry>
+ <entry>The configuration must not be propagated to any
+ further processing steps. If this flag is not given, the
+ configuration is propagated inside the subdevice to all
+ further processing steps.</entry>
+ <entry>No</entry>
+ <entry>Yes</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </section>
+
+</section>
diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
new file mode 100644
index 000000000..2588ad781
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -0,0 +1,4038 @@
+<section id="v4l2-mbus-format">
+ <title>Media Bus Formats</title>
+
+ <table pgwide="1" frame="none" id="v4l2-mbus-framefmt">
+ <title>struct <structname>v4l2_mbus_framefmt</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Image width, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Image height, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>code</structfield></entry>
+ <entry>Format code, from &v4l2-mbus-pixelcode;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>Field order, from &v4l2-field;. See
+ <xref linkend="field-order" /> for details.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>colorspace</structfield></entry>
+ <entry>Image colorspace, from &v4l2-colorspace;. See
+ <xref linkend="colorspaces" /> for details.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-ycbcr-encoding;</entry>
+ <entry><structfield>ycbcr_enc</structfield></entry>
+ <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-quantization;</entry>
+ <entry><structfield>quantization</structfield></entry>
+ <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[6]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <section id="v4l2-mbus-pixelcode">
+ <title>Media Bus Pixel Codes</title>
+
+ <para>The media bus pixel codes describe image formats as flowing over
+ physical busses (both between separate physical components and inside SoC
+ devices). This should not be confused with the V4L2 pixel formats that
+ describe, using four character codes, image formats as stored in memory.
+ </para>
+
+ <para>While there is a relationship between image formats on busses and
+ image formats in memory (a raw Bayer image won't be magically converted to
+ JPEG just by storing it to memory), there is no one-to-one correspondance
+ between them.</para>
+
+ <section>
+ <title>Packed RGB Formats</title>
+
+ <para>Those formats transfer pixel data as red, green and blue components.
+ The format code is made of the following information.
+ <itemizedlist>
+ <listitem><para>The red, green and blue components order code, as encoded in a
+ pixel sample. Possible values are RGB and BGR.</para></listitem>
+ <listitem><para>The number of bits per component, for each component. The values
+ can be different for all components. Common values are 555 and 565.</para>
+ </listitem>
+ <listitem><para>The number of bus samples per pixel. Pixels that are wider than
+ the bus width must be transferred in multiple samples. Common values are
+ 1 and 2.</para></listitem>
+ <listitem><para>The bus width.</para></listitem>
+ <listitem><para>For formats where the total number of bits per pixel is smaller
+ than the number of bus samples per pixel times the bus width, a padding
+ value stating if the bytes are padded in their most high order bits
+ (PADHI) or low order bits (PADLO). A "C" prefix is used for component-wise
+ padding in the most high order bits (CPADHI) or low order bits (CPADLO)
+ of each separate component.</para></listitem>
+ <listitem><para>For formats where the number of bus samples per pixel is larger
+ than 1, an endianness value stating if the pixel is transferred MSB first
+ (BE) or LSB first (LE).</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>For instance, a format where pixels are encoded as 5-bits red, 5-bits
+ green and 5-bit blue values padded on the high bit, transferred as 2 8-bit
+ samples per pixel with the most significant bits (padding, red and half of
+ the green value) transferred first will be named
+ <constant>MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE</constant>.
+ </para>
+
+ <para>The following tables list existing packed RGB formats.</para>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-rgb">
+ <title>RGB formats</title>
+ <tgroup cols="27">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="center"/>
+ <colspec colname="bit" />
+ <colspec colnum="4" colname="b31" align="center" />
+ <colspec colnum="5" colname="b20" align="center" />
+ <colspec colnum="6" colname="b29" align="center" />
+ <colspec colnum="7" colname="b28" align="center" />
+ <colspec colnum="8" colname="b27" align="center" />
+ <colspec colnum="9" colname="b26" align="center" />
+ <colspec colnum="10" colname="b25" align="center" />
+ <colspec colnum="11" colname="b24" align="center" />
+ <colspec colnum="12" colname="b23" align="center" />
+ <colspec colnum="13" colname="b22" align="center" />
+ <colspec colnum="14" colname="b21" align="center" />
+ <colspec colnum="15" colname="b20" align="center" />
+ <colspec colnum="16" colname="b19" align="center" />
+ <colspec colnum="17" colname="b18" align="center" />
+ <colspec colnum="18" colname="b17" align="center" />
+ <colspec colnum="19" colname="b16" align="center" />
+ <colspec colnum="20" colname="b15" align="center" />
+ <colspec colnum="21" colname="b14" align="center" />
+ <colspec colnum="22" colname="b13" align="center" />
+ <colspec colnum="23" colname="b12" align="center" />
+ <colspec colnum="24" colname="b11" align="center" />
+ <colspec colnum="25" colname="b10" align="center" />
+ <colspec colnum="26" colname="b09" align="center" />
+ <colspec colnum="27" colname="b08" align="center" />
+ <colspec colnum="28" colname="b07" align="center" />
+ <colspec colnum="29" colname="b06" align="center" />
+ <colspec colnum="30" colname="b05" align="center" />
+ <colspec colnum="31" colname="b04" align="center" />
+ <colspec colnum="32" colname="b03" align="center" />
+ <colspec colnum="33" colname="b02" align="center" />
+ <colspec colnum="34" colname="b01" align="center" />
+ <colspec colnum="35" colname="b00" align="center" />
+ <spanspec namest="b31" nameend="b00" spanname="b0" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry></entry>
+ <entry spanname="b0">Data organization</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Bit</entry>
+ <entry>31</entry>
+ <entry>30</entry>
+ <entry>29</entry>
+ <entry>28</entry>
+ <entry>27</entry>
+ <entry>26</entry>
+ <entry>25</entry>
+ <entry>24</entry>
+ <entry>23</entry>
+ <entry>22</entry>
+ <entry>21</entry>
+ <entry>20</entry>
+ <entry>19</entry>
+ <entry>18</entry>
+ <entry>17</entry>
+ <entry>16</entry>
+ <entry>15</entry>
+ <entry>14</entry>
+ <entry>13</entry>
+ <entry>12</entry>
+ <entry>11</entry>
+ <entry>10</entry>
+ <entry>9</entry>
+ <entry>8</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-RGB444-1X12">
+ <entry>MEDIA_BUS_FMT_RGB444_1X12</entry>
+ <entry>0x1016</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB444-2X8-PADHI-BE">
+ <entry>MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE</entry>
+ <entry>0x1001</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB444-2X8-PADHI-LE">
+ <entry>MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE</entry>
+ <entry>0x1002</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB555-2X8-PADHI-BE">
+ <entry>MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE</entry>
+ <entry>0x1003</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>0</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB555-2X8-PADHI-LE">
+ <entry>MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE</entry>
+ <entry>0x1004</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>0</entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB565-1X16">
+ <entry>MEDIA_BUS_FMT_RGB565_1X16</entry>
+ <entry>0x1017</entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-BGR565-2X8-BE">
+ <entry>MEDIA_BUS_FMT_BGR565_2X8_BE</entry>
+ <entry>0x1005</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-BGR565-2X8-LE">
+ <entry>MEDIA_BUS_FMT_BGR565_2X8_LE</entry>
+ <entry>0x1006</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB565-2X8-BE">
+ <entry>MEDIA_BUS_FMT_RGB565_2X8_BE</entry>
+ <entry>0x1007</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB565-2X8-LE">
+ <entry>MEDIA_BUS_FMT_RGB565_2X8_LE</entry>
+ <entry>0x1008</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB666-1X18">
+ <entry>MEDIA_BUS_FMT_RGB666_1X18</entry>
+ <entry>0x1009</entry>
+ <entry></entry>
+ &dash-ent-14;
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RBG888-1X24">
+ <entry>MEDIA_BUS_FMT_RBG888_1X24</entry>
+ <entry>0x100e</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB666-1X24_CPADHI">
+ <entry>MEDIA_BUS_FMT_RGB666_1X24_CPADHI</entry>
+ <entry>0x1015</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-BGR888-1X24">
+ <entry>MEDIA_BUS_FMT_BGR888_1X24</entry>
+ <entry>0x1013</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-GBR888-1X24">
+ <entry>MEDIA_BUS_FMT_GBR888_1X24</entry>
+ <entry>0x1014</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB888-1X24">
+ <entry>MEDIA_BUS_FMT_RGB888_1X24</entry>
+ <entry>0x100a</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB888-2X12-BE">
+ <entry>MEDIA_BUS_FMT_RGB888_2X12_BE</entry>
+ <entry>0x100b</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB888-2X12-LE">
+ <entry>MEDIA_BUS_FMT_RGB888_2X12_LE</entry>
+ <entry>0x100c</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-ARGB888-1X32">
+ <entry>MEDIA_BUS_FMT_ARGB888_1X32</entry>
+ <entry>0x100d</entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB888-1X32-PADHI">
+ <entry>MEDIA_BUS_FMT_RGB888_1X32_PADHI</entry>
+ <entry>0x100f</entry>
+ <entry></entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para>On LVDS buses, usually each sample is transferred serialized in
+ seven time slots per pixel clock, on three (18-bit) or four (24-bit)
+ differential data pairs at the same time. The remaining bits are used for
+ control signals as defined by SPWG/PSWG/VESA or JEIDA standards.
+ The 24-bit RGB format serialized in seven time slots on four lanes using
+ JEIDA defined bit mapping will be named
+ <constant>MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA</constant>, for example.
+ </para>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-rgb-lvds">
+ <title>LVDS RGB formats</title>
+ <tgroup cols="8">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="center" />
+ <colspec colname="slot" align="center" />
+ <colspec colname="lane" />
+ <colspec colnum="5" colname="l03" align="center" />
+ <colspec colnum="6" colname="l02" align="center" />
+ <colspec colnum="7" colname="l01" align="center" />
+ <colspec colnum="8" colname="l00" align="center" />
+ <spanspec namest="l03" nameend="l00" spanname="l0" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry spanname="l0">Data organization</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Timeslot</entry>
+ <entry>Lane</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-RGB666-1X7X3-SPWG">
+ <entry>MEDIA_BUS_FMT_RGB666_1X7X3_SPWG</entry>
+ <entry>0x1010</entry>
+ <entry>0</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>d</entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>1</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>d</entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>2</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>d</entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>3</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>4</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>5</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>6</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB888-1X7X4-SPWG">
+ <entry>MEDIA_BUS_FMT_RGB888_1X7X4_SPWG</entry>
+ <entry>0x1011</entry>
+ <entry>0</entry>
+ <entry></entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>1</entry>
+ <entry></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>d</entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>2</entry>
+ <entry></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>d</entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>3</entry>
+ <entry></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>4</entry>
+ <entry></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>5</entry>
+ <entry></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>6</entry>
+ <entry></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-RGB888-1X7X4-JEIDA">
+ <entry>MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA</entry>
+ <entry>0x1012</entry>
+ <entry>0</entry>
+ <entry></entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>1</entry>
+ <entry></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>d</entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>r<subscript>7</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>2</entry>
+ <entry></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>d</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>3</entry>
+ <entry></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>4</entry>
+ <entry></entry>
+ <entry>g<subscript>0</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>5</entry>
+ <entry></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>6</entry>
+ <entry></entry>
+ <entry>r<subscript>0</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>Bayer Formats</title>
+
+ <para>Those formats transfer pixel data as red, green and blue components.
+ The format code is made of the following information.
+ <itemizedlist>
+ <listitem><para>The red, green and blue components order code, as encoded in a
+ pixel sample. The possible values are shown in <xref
+ linkend="bayer-patterns" />.</para></listitem>
+ <listitem><para>The number of bits per pixel component. All components are
+ transferred on the same number of bits. Common values are 8, 10 and 12.</para>
+ </listitem>
+ <listitem><para>The compression (optional). If the pixel components are
+ ALAW- or DPCM-compressed, a mention of the compression scheme and the
+ number of bits per compressed pixel component.</para></listitem>
+ <listitem><para>The number of bus samples per pixel. Pixels that are wider than
+ the bus width must be transferred in multiple samples. Common values are
+ 1 and 2.</para></listitem>
+ <listitem><para>The bus width.</para></listitem>
+ <listitem><para>For formats where the total number of bits per pixel is smaller
+ than the number of bus samples per pixel times the bus width, a padding
+ value stating if the bytes are padded in their most high order bits
+ (PADHI) or low order bits (PADLO).</para></listitem>
+ <listitem><para>For formats where the number of bus samples per pixel is larger
+ than 1, an endianness value stating if the pixel is transferred MSB first
+ (BE) or LSB first (LE).</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>For instance, a format with uncompressed 10-bit Bayer components
+ arranged in a red, green, green, blue pattern transferred as 2 8-bit
+ samples per pixel with the least significant bits transferred first will
+ be named <constant>MEDIA_BUS_FMT_SRGGB10_2X8_PADHI_LE</constant>.
+ </para>
+
+ <figure id="bayer-patterns">
+ <title>Bayer Patterns</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="bayer.png" format="PNG" />
+ </imageobject>
+ <textobject>
+ <phrase>Bayer filter color patterns</phrase>
+ </textobject>
+ </mediaobject>
+ </figure>
+
+ <para>The following table lists existing packed Bayer formats. The data
+ organization is given as an example for the first pixel only.</para>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-bayer">
+ <title>Bayer Formats</title>
+ <tgroup cols="15">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="center"/>
+ <colspec colname="bit" />
+ <colspec colnum="4" colname="b11" align="center" />
+ <colspec colnum="5" colname="b10" align="center" />
+ <colspec colnum="6" colname="b09" align="center" />
+ <colspec colnum="7" colname="b08" align="center" />
+ <colspec colnum="8" colname="b07" align="center" />
+ <colspec colnum="9" colname="b06" align="center" />
+ <colspec colnum="10" colname="b05" align="center" />
+ <colspec colnum="11" colname="b04" align="center" />
+ <colspec colnum="12" colname="b03" align="center" />
+ <colspec colnum="13" colname="b02" align="center" />
+ <colspec colnum="14" colname="b01" align="center" />
+ <colspec colnum="15" colname="b00" align="center" />
+ <spanspec namest="b11" nameend="b00" spanname="b0" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry></entry>
+ <entry spanname="b0">Data organization</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Bit</entry>
+ <entry>11</entry>
+ <entry>10</entry>
+ <entry>9</entry>
+ <entry>8</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-SBGGR8-1X8">
+ <entry>MEDIA_BUS_FMT_SBGGR8_1X8</entry>
+ <entry>0x3001</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGBRG8-1X8">
+ <entry>MEDIA_BUS_FMT_SGBRG8_1X8</entry>
+ <entry>0x3013</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGRBG8-1X8">
+ <entry>MEDIA_BUS_FMT_SGRBG8_1X8</entry>
+ <entry>0x3002</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SRGGB8-1X8">
+ <entry>MEDIA_BUS_FMT_SRGGB8_1X8</entry>
+ <entry>0x3014</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-ALAW8-1X8">
+ <entry>MEDIA_BUS_FMT_SBGGR10_ALAW8_1X8</entry>
+ <entry>0x3015</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGBRG10-ALAW8-1X8">
+ <entry>MEDIA_BUS_FMT_SGBRG10_ALAW8_1X8</entry>
+ <entry>0x3016</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGRBG10-ALAW8-1X8">
+ <entry>MEDIA_BUS_FMT_SGRBG10_ALAW8_1X8</entry>
+ <entry>0x3017</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SRGGB10-ALAW8-1X8">
+ <entry>MEDIA_BUS_FMT_SRGGB10_ALAW8_1X8</entry>
+ <entry>0x3018</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-DPCM8-1X8">
+ <entry>MEDIA_BUS_FMT_SBGGR10_DPCM8_1X8</entry>
+ <entry>0x300b</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGBRG10-DPCM8-1X8">
+ <entry>MEDIA_BUS_FMT_SGBRG10_DPCM8_1X8</entry>
+ <entry>0x300c</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGRBG10-DPCM8-1X8">
+ <entry>MEDIA_BUS_FMT_SGRBG10_DPCM8_1X8</entry>
+ <entry>0x3009</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SRGGB10-DPCM8-1X8">
+ <entry>MEDIA_BUS_FMT_SRGGB10_DPCM8_1X8</entry>
+ <entry>0x300d</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-2X8-PADHI-BE">
+ <entry>MEDIA_BUS_FMT_SBGGR10_2X8_PADHI_BE</entry>
+ <entry>0x3003</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>b<subscript>9</subscript></entry>
+ <entry>b<subscript>8</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-2X8-PADHI-LE">
+ <entry>MEDIA_BUS_FMT_SBGGR10_2X8_PADHI_LE</entry>
+ <entry>0x3004</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>b<subscript>9</subscript></entry>
+ <entry>b<subscript>8</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-2X8-PADLO-BE">
+ <entry>MEDIA_BUS_FMT_SBGGR10_2X8_PADLO_BE</entry>
+ <entry>0x3005</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>9</subscript></entry>
+ <entry>b<subscript>8</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-2X8-PADLO-LE">
+ <entry>MEDIA_BUS_FMT_SBGGR10_2X8_PADLO_LE</entry>
+ <entry>0x3006</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>9</subscript></entry>
+ <entry>b<subscript>8</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR10-1X10">
+ <entry>MEDIA_BUS_FMT_SBGGR10_1X10</entry>
+ <entry>0x3007</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>b<subscript>9</subscript></entry>
+ <entry>b<subscript>8</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGBRG10-1X10">
+ <entry>MEDIA_BUS_FMT_SGBRG10_1X10</entry>
+ <entry>0x300e</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>9</subscript></entry>
+ <entry>g<subscript>8</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGRBG10-1X10">
+ <entry>MEDIA_BUS_FMT_SGRBG10_1X10</entry>
+ <entry>0x300a</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>g<subscript>9</subscript></entry>
+ <entry>g<subscript>8</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SRGGB10-1X10">
+ <entry>MEDIA_BUS_FMT_SRGGB10_1X10</entry>
+ <entry>0x300f</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>r<subscript>9</subscript></entry>
+ <entry>r<subscript>8</subscript></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SBGGR12-1X12">
+ <entry>MEDIA_BUS_FMT_SBGGR12_1X12</entry>
+ <entry>0x3008</entry>
+ <entry></entry>
+ <entry>b<subscript>11</subscript></entry>
+ <entry>b<subscript>10</subscript></entry>
+ <entry>b<subscript>9</subscript></entry>
+ <entry>b<subscript>8</subscript></entry>
+ <entry>b<subscript>7</subscript></entry>
+ <entry>b<subscript>6</subscript></entry>
+ <entry>b<subscript>5</subscript></entry>
+ <entry>b<subscript>4</subscript></entry>
+ <entry>b<subscript>3</subscript></entry>
+ <entry>b<subscript>2</subscript></entry>
+ <entry>b<subscript>1</subscript></entry>
+ <entry>b<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGBRG12-1X12">
+ <entry>MEDIA_BUS_FMT_SGBRG12_1X12</entry>
+ <entry>0x3010</entry>
+ <entry></entry>
+ <entry>g<subscript>11</subscript></entry>
+ <entry>g<subscript>10</subscript></entry>
+ <entry>g<subscript>9</subscript></entry>
+ <entry>g<subscript>8</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SGRBG12-1X12">
+ <entry>MEDIA_BUS_FMT_SGRBG12_1X12</entry>
+ <entry>0x3011</entry>
+ <entry></entry>
+ <entry>g<subscript>11</subscript></entry>
+ <entry>g<subscript>10</subscript></entry>
+ <entry>g<subscript>9</subscript></entry>
+ <entry>g<subscript>8</subscript></entry>
+ <entry>g<subscript>7</subscript></entry>
+ <entry>g<subscript>6</subscript></entry>
+ <entry>g<subscript>5</subscript></entry>
+ <entry>g<subscript>4</subscript></entry>
+ <entry>g<subscript>3</subscript></entry>
+ <entry>g<subscript>2</subscript></entry>
+ <entry>g<subscript>1</subscript></entry>
+ <entry>g<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-SRGGB12-1X12">
+ <entry>MEDIA_BUS_FMT_SRGGB12_1X12</entry>
+ <entry>0x3012</entry>
+ <entry></entry>
+ <entry>r<subscript>11</subscript></entry>
+ <entry>r<subscript>10</subscript></entry>
+ <entry>r<subscript>9</subscript></entry>
+ <entry>r<subscript>8</subscript></entry>
+ <entry>r<subscript>7</subscript></entry>
+ <entry>r<subscript>6</subscript></entry>
+ <entry>r<subscript>5</subscript></entry>
+ <entry>r<subscript>4</subscript></entry>
+ <entry>r<subscript>3</subscript></entry>
+ <entry>r<subscript>2</subscript></entry>
+ <entry>r<subscript>1</subscript></entry>
+ <entry>r<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>Packed YUV Formats</title>
+
+ <para>Those data formats transfer pixel data as (possibly downsampled) Y, U
+ and V components. Some formats include dummy bits in some of their samples
+ and are collectively referred to as "YDYC" (Y-Dummy-Y-Chroma) formats.
+ One cannot rely on the values of these dummy bits as those are undefined.
+ </para>
+ <para>The format code is made of the following information.
+ <itemizedlist>
+ <listitem><para>The Y, U and V components order code, as transferred on the
+ bus. Possible values are YUYV, UYVY, YVYU and VYUY for formats with no
+ dummy bit, and YDYUYDYV, YDYVYDYU, YUYDYVYD and YVYDYUYD for YDYC formats.
+ </para></listitem>
+ <listitem><para>The number of bits per pixel component. All components are
+ transferred on the same number of bits. Common values are 8, 10 and 12.</para>
+ </listitem>
+ <listitem><para>The number of bus samples per pixel. Pixels that are wider than
+ the bus width must be transferred in multiple samples. Common values are
+ 1, 1.5 (encoded as 1_5) and 2.</para></listitem>
+ <listitem><para>The bus width. When the bus width is larger than the number of
+ bits per pixel component, several components are packed in a single bus
+ sample. The components are ordered as specified by the order code, with
+ components on the left of the code transferred in the high order bits.
+ Common values are 8 and 16.</para>
+ </listitem>
+ </itemizedlist>
+ </para>
+
+ <para>For instance, a format where pixels are encoded as 8-bit YUV values
+ downsampled to 4:2:2 and transferred as 2 8-bit bus samples per pixel in the
+ U, Y, V, Y order will be named <constant>MEDIA_BUS_FMT_UYVY8_2X8</constant>.
+ </para>
+
+ <para><xref linkend="v4l2-mbus-pixelcode-yuv8"/> lists existing packed YUV
+ formats and describes the organization of each pixel data in each sample.
+ When a format pattern is split across multiple samples each of the samples
+ in the pattern is described.</para>
+
+ <para>The role of each bit transferred over the bus is identified by one
+ of the following codes.</para>
+
+ <itemizedlist>
+ <listitem><para>y<subscript>x</subscript> for luma component bit number x</para></listitem>
+ <listitem><para>u<subscript>x</subscript> for blue chroma component bit number x</para></listitem>
+ <listitem><para>v<subscript>x</subscript> for red chroma component bit number x</para></listitem>
+ <listitem><para>a<subscript>x</subscript> for alpha component bit number x</para></listitem>
+ <listitem><para>- for non-available bits (for positions higher than the bus width)</para></listitem>
+ <listitem><para>d for dummy bits</para></listitem>
+ </itemizedlist>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-yuv8">
+ <title>YUV Formats</title>
+ <tgroup cols="23">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="center"/>
+ <colspec colname="bit" />
+ <colspec colnum="4" colname="b31" align="center" />
+ <colspec colnum="5" colname="b20" align="center" />
+ <colspec colnum="6" colname="b29" align="center" />
+ <colspec colnum="7" colname="b28" align="center" />
+ <colspec colnum="8" colname="b27" align="center" />
+ <colspec colnum="9" colname="b26" align="center" />
+ <colspec colnum="10" colname="b25" align="center" />
+ <colspec colnum="11" colname="b24" align="center" />
+ <colspec colnum="12" colname="b23" align="center" />
+ <colspec colnum="13" colname="b22" align="center" />
+ <colspec colnum="14" colname="b21" align="center" />
+ <colspec colnum="15" colname="b20" align="center" />
+ <colspec colnum="16" colname="b19" align="center" />
+ <colspec colnum="17" colname="b18" align="center" />
+ <colspec colnum="18" colname="b17" align="center" />
+ <colspec colnum="19" colname="b16" align="center" />
+ <colspec colnum="20" colname="b15" align="center" />
+ <colspec colnum="21" colname="b14" align="center" />
+ <colspec colnum="22" colname="b13" align="center" />
+ <colspec colnum="23" colname="b12" align="center" />
+ <colspec colnum="24" colname="b11" align="center" />
+ <colspec colnum="25" colname="b10" align="center" />
+ <colspec colnum="26" colname="b09" align="center" />
+ <colspec colnum="27" colname="b08" align="center" />
+ <colspec colnum="28" colname="b07" align="center" />
+ <colspec colnum="29" colname="b06" align="center" />
+ <colspec colnum="30" colname="b05" align="center" />
+ <colspec colnum="31" colname="b04" align="center" />
+ <colspec colnum="32" colname="b03" align="center" />
+ <colspec colnum="33" colname="b02" align="center" />
+ <colspec colnum="34" colname="b01" align="center" />
+ <colspec colnum="35" colname="b00" align="center" />
+ <spanspec namest="b31" nameend="b00" spanname="b0" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry></entry>
+ <entry spanname="b0">Data organization</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Bit</entry>
+ <entry>31</entry>
+ <entry>30</entry>
+ <entry>29</entry>
+ <entry>28</entry>
+ <entry>27</entry>
+ <entry>26</entry>
+ <entry>25</entry>
+ <entry>24</entry>
+ <entry>23</entry>
+ <entry>22</entry>
+ <entry>21</entry>
+ <entry>10</entry>
+ <entry>19</entry>
+ <entry>18</entry>
+ <entry>17</entry>
+ <entry>16</entry>
+ <entry>15</entry>
+ <entry>14</entry>
+ <entry>13</entry>
+ <entry>12</entry>
+ <entry>11</entry>
+ <entry>10</entry>
+ <entry>9</entry>
+ <entry>8</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-Y8-1X8">
+ <entry>MEDIA_BUS_FMT_Y8_1X8</entry>
+ <entry>0x2001</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UV8-1X8">
+ <entry>MEDIA_BUS_FMT_UV8_1X8</entry>
+ <entry>0x2015</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY8-1_5X8">
+ <entry>MEDIA_BUS_FMT_UYVY8_1_5X8</entry>
+ <entry>0x2002</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY8-1_5X8">
+ <entry>MEDIA_BUS_FMT_VYUY8_1_5X8</entry>
+ <entry>0x2003</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV8-1_5X8">
+ <entry>MEDIA_BUS_FMT_YUYV8_1_5X8</entry>
+ <entry>0x2004</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU8-1_5X8">
+ <entry>MEDIA_BUS_FMT_YVYU8_1_5X8</entry>
+ <entry>0x2005</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY8-2X8">
+ <entry>MEDIA_BUS_FMT_UYVY8_2X8</entry>
+ <entry>0x2006</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY8-2X8">
+ <entry>MEDIA_BUS_FMT_VYUY8_2X8</entry>
+ <entry>0x2007</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV8-2X8">
+ <entry>MEDIA_BUS_FMT_YUYV8_2X8</entry>
+ <entry>0x2008</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU8-2X8">
+ <entry>MEDIA_BUS_FMT_YVYU8_2X8</entry>
+ <entry>0x2009</entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-24;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-Y10-1X10">
+ <entry>MEDIA_BUS_FMT_Y10_1X10</entry>
+ <entry>0x200a</entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY10-2X10">
+ <entry>MEDIA_BUS_FMT_UYVY10_2X10</entry>
+ <entry>0x2018</entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY10-2X10">
+ <entry>MEDIA_BUS_FMT_VYUY10_2X10</entry>
+ <entry>0x2019</entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV10-2X10">
+ <entry>MEDIA_BUS_FMT_YUYV10_2X10</entry>
+ <entry>0x200b</entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU10-2X10">
+ <entry>MEDIA_BUS_FMT_YVYU10_2X10</entry>
+ <entry>0x200c</entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-22;
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-Y12-1X12">
+ <entry>MEDIA_BUS_FMT_Y12_1X12</entry>
+ <entry>0x2013</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY12-2X12">
+ <entry>MEDIA_BUS_FMT_UYVY12_2X12</entry>
+ <entry>0x201c</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY12-2X12">
+ <entry>MEDIA_BUS_FMT_VYUY12_2X12</entry>
+ <entry>0x201d</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV12-2X12">
+ <entry>MEDIA_BUS_FMT_YUYV12_2X12</entry>
+ <entry>0x201e</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU12-2X12">
+ <entry>MEDIA_BUS_FMT_YVYU12_2X12</entry>
+ <entry>0x201f</entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-20;
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY8-1X16">
+ <entry>MEDIA_BUS_FMT_UYVY8_1X16</entry>
+ <entry>0x200f</entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY8-1X16">
+ <entry>MEDIA_BUS_FMT_VYUY8_1X16</entry>
+ <entry>0x2010</entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV8-1X16">
+ <entry>MEDIA_BUS_FMT_YUYV8_1X16</entry>
+ <entry>0x2011</entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU8-1X16">
+ <entry>MEDIA_BUS_FMT_YVYU8_1X16</entry>
+ <entry>0x2012</entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YDYUYDYV8-1X16">
+ <entry>MEDIA_BUS_FMT_YDYUYDYV8_1X16</entry>
+ <entry>0x2014</entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ <entry>d</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-16;
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY10-1X20">
+ <entry>MEDIA_BUS_FMT_UYVY10_1X20</entry>
+ <entry>0x201a</entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY10-1X20">
+ <entry>MEDIA_BUS_FMT_VYUY10_1X20</entry>
+ <entry>0x201b</entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV10-1X20">
+ <entry>MEDIA_BUS_FMT_YUYV10_1X20</entry>
+ <entry>0x200d</entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU10-1X20">
+ <entry>MEDIA_BUS_FMT_YVYU10_1X20</entry>
+ <entry>0x200e</entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-12;
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VUY8-1X24">
+ <entry>MEDIA_BUS_FMT_VUY8_1X24</entry>
+ <entry>0x201a</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUV8-1X24">
+ <entry>MEDIA_BUS_FMT_YUV8_1X24</entry>
+ <entry>0x2025</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-UYVY12-1X24">
+ <entry>MEDIA_BUS_FMT_UYVY12_1X24</entry>
+ <entry>0x2020</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-VYUY12-1X24">
+ <entry>MEDIA_BUS_FMT_VYUY12_1X24</entry>
+ <entry>0x2021</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUYV12-1X24">
+ <entry>MEDIA_BUS_FMT_YUYV12_1X24</entry>
+ <entry>0x2022</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YVYU12-1X24">
+ <entry>MEDIA_BUS_FMT_YVYU12_1X24</entry>
+ <entry>0x2023</entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>v<subscript>11</subscript></entry>
+ <entry>v<subscript>10</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ &dash-ent-8;
+ <entry>y<subscript>11</subscript></entry>
+ <entry>y<subscript>10</subscript></entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>11</subscript></entry>
+ <entry>u<subscript>10</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-YUV10-1X30">
+ <entry>MEDIA_BUS_FMT_YUV10_1X30</entry>
+ <entry>0x2016</entry>
+ <entry></entry>
+ <entry>-</entry>
+ <entry>-</entry>
+ <entry>y<subscript>9</subscript></entry>
+ <entry>y<subscript>8</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>9</subscript></entry>
+ <entry>u<subscript>8</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>v<subscript>9</subscript></entry>
+ <entry>v<subscript>8</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ <row id="MEDIA-BUS-FMT-AYUV8-1X32">
+ <entry>MEDIA_BUS_FMT_AYUV8_1X32</entry>
+ <entry>0x2017</entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry>y<subscript>7</subscript></entry>
+ <entry>y<subscript>6</subscript></entry>
+ <entry>y<subscript>5</subscript></entry>
+ <entry>y<subscript>4</subscript></entry>
+ <entry>y<subscript>3</subscript></entry>
+ <entry>y<subscript>2</subscript></entry>
+ <entry>y<subscript>1</subscript></entry>
+ <entry>y<subscript>0</subscript></entry>
+ <entry>u<subscript>7</subscript></entry>
+ <entry>u<subscript>6</subscript></entry>
+ <entry>u<subscript>5</subscript></entry>
+ <entry>u<subscript>4</subscript></entry>
+ <entry>u<subscript>3</subscript></entry>
+ <entry>u<subscript>2</subscript></entry>
+ <entry>u<subscript>1</subscript></entry>
+ <entry>u<subscript>0</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>HSV/HSL Formats</title>
+
+ <para>Those formats transfer pixel data as RGB values in a cylindrical-coordinate
+ system using Hue-Saturation-Value or Hue-Saturation-Lightness components. The
+ format code is made of the following information.
+ <itemizedlist>
+ <listitem><para>The hue, saturation, value or lightness and optional alpha
+ components order code, as encoded in a pixel sample. The only currently
+ supported value is AHSV.
+ </para></listitem>
+ <listitem><para>The number of bits per component, for each component. The values
+ can be different for all components. The only currently supported value is 8888.
+ </para></listitem>
+ <listitem><para>The number of bus samples per pixel. Pixels that are wider than
+ the bus width must be transferred in multiple samples. The only currently
+ supported value is 1.</para></listitem>
+ <listitem><para>The bus width.</para></listitem>
+ <listitem><para>For formats where the total number of bits per pixel is smaller
+ than the number of bus samples per pixel times the bus width, a padding
+ value stating if the bytes are padded in their most high order bits
+ (PADHI) or low order bits (PADLO).</para></listitem>
+ <listitem><para>For formats where the number of bus samples per pixel is larger
+ than 1, an endianness value stating if the pixel is transferred MSB first
+ (BE) or LSB first (LE).</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>The following table lists existing HSV/HSL formats.</para>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-hsv">
+ <title>HSV/HSL formats</title>
+ <tgroup cols="27">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="center"/>
+ <colspec colname="bit" />
+ <colspec colnum="4" colname="b31" align="center" />
+ <colspec colnum="5" colname="b20" align="center" />
+ <colspec colnum="6" colname="b29" align="center" />
+ <colspec colnum="7" colname="b28" align="center" />
+ <colspec colnum="8" colname="b27" align="center" />
+ <colspec colnum="9" colname="b26" align="center" />
+ <colspec colnum="10" colname="b25" align="center" />
+ <colspec colnum="11" colname="b24" align="center" />
+ <colspec colnum="12" colname="b23" align="center" />
+ <colspec colnum="13" colname="b22" align="center" />
+ <colspec colnum="14" colname="b21" align="center" />
+ <colspec colnum="15" colname="b20" align="center" />
+ <colspec colnum="16" colname="b19" align="center" />
+ <colspec colnum="17" colname="b18" align="center" />
+ <colspec colnum="18" colname="b17" align="center" />
+ <colspec colnum="19" colname="b16" align="center" />
+ <colspec colnum="20" colname="b15" align="center" />
+ <colspec colnum="21" colname="b14" align="center" />
+ <colspec colnum="22" colname="b13" align="center" />
+ <colspec colnum="23" colname="b12" align="center" />
+ <colspec colnum="24" colname="b11" align="center" />
+ <colspec colnum="25" colname="b10" align="center" />
+ <colspec colnum="26" colname="b09" align="center" />
+ <colspec colnum="27" colname="b08" align="center" />
+ <colspec colnum="28" colname="b07" align="center" />
+ <colspec colnum="29" colname="b06" align="center" />
+ <colspec colnum="30" colname="b05" align="center" />
+ <colspec colnum="31" colname="b04" align="center" />
+ <colspec colnum="32" colname="b03" align="center" />
+ <colspec colnum="33" colname="b02" align="center" />
+ <colspec colnum="34" colname="b01" align="center" />
+ <colspec colnum="35" colname="b00" align="center" />
+ <spanspec namest="b31" nameend="b00" spanname="b0" />
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry></entry>
+ <entry spanname="b0">Data organization</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Bit</entry>
+ <entry>31</entry>
+ <entry>30</entry>
+ <entry>29</entry>
+ <entry>28</entry>
+ <entry>27</entry>
+ <entry>26</entry>
+ <entry>25</entry>
+ <entry>24</entry>
+ <entry>23</entry>
+ <entry>22</entry>
+ <entry>21</entry>
+ <entry>20</entry>
+ <entry>19</entry>
+ <entry>18</entry>
+ <entry>17</entry>
+ <entry>16</entry>
+ <entry>15</entry>
+ <entry>14</entry>
+ <entry>13</entry>
+ <entry>12</entry>
+ <entry>11</entry>
+ <entry>10</entry>
+ <entry>9</entry>
+ <entry>8</entry>
+ <entry>7</entry>
+ <entry>6</entry>
+ <entry>5</entry>
+ <entry>4</entry>
+ <entry>3</entry>
+ <entry>2</entry>
+ <entry>1</entry>
+ <entry>0</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-AHSV8888-1X32">
+ <entry>MEDIA_BUS_FMT_AHSV8888_1X32</entry>
+ <entry>0x6001</entry>
+ <entry></entry>
+ <entry>a<subscript>7</subscript></entry>
+ <entry>a<subscript>6</subscript></entry>
+ <entry>a<subscript>5</subscript></entry>
+ <entry>a<subscript>4</subscript></entry>
+ <entry>a<subscript>3</subscript></entry>
+ <entry>a<subscript>2</subscript></entry>
+ <entry>a<subscript>1</subscript></entry>
+ <entry>a<subscript>0</subscript></entry>
+ <entry>h<subscript>7</subscript></entry>
+ <entry>h<subscript>6</subscript></entry>
+ <entry>h<subscript>5</subscript></entry>
+ <entry>h<subscript>4</subscript></entry>
+ <entry>h<subscript>3</subscript></entry>
+ <entry>h<subscript>2</subscript></entry>
+ <entry>h<subscript>1</subscript></entry>
+ <entry>h<subscript>0</subscript></entry>
+ <entry>s<subscript>7</subscript></entry>
+ <entry>s<subscript>6</subscript></entry>
+ <entry>s<subscript>5</subscript></entry>
+ <entry>s<subscript>4</subscript></entry>
+ <entry>s<subscript>3</subscript></entry>
+ <entry>s<subscript>2</subscript></entry>
+ <entry>s<subscript>1</subscript></entry>
+ <entry>s<subscript>0</subscript></entry>
+ <entry>v<subscript>7</subscript></entry>
+ <entry>v<subscript>6</subscript></entry>
+ <entry>v<subscript>5</subscript></entry>
+ <entry>v<subscript>4</subscript></entry>
+ <entry>v<subscript>3</subscript></entry>
+ <entry>v<subscript>2</subscript></entry>
+ <entry>v<subscript>1</subscript></entry>
+ <entry>v<subscript>0</subscript></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section>
+ <title>JPEG Compressed Formats</title>
+
+ <para>Those data formats consist of an ordered sequence of 8-bit bytes
+ obtained from JPEG compression process. Additionally to the
+ <constant>_JPEG</constant> postfix the format code is made of
+ the following information.
+ <itemizedlist>
+ <listitem><para>The number of bus samples per entropy encoded byte.</para></listitem>
+ <listitem><para>The bus width.</para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para>For instance, for a JPEG baseline process and an 8-bit bus width
+ the format will be named <constant>MEDIA_BUS_FMT_JPEG_1X8</constant>.
+ </para>
+
+ <para>The following table lists existing JPEG compressed formats.</para>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-jpeg">
+ <title>JPEG Formats</title>
+ <tgroup cols="3">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="left"/>
+ <colspec colname="remarks" align="left"/>
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>Remarks</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-JPEG-1X8">
+ <entry>MEDIA_BUS_FMT_JPEG_1X8</entry>
+ <entry>0x4001</entry>
+ <entry>Besides of its usage for the parallel bus this format is
+ recommended for transmission of JPEG data over MIPI CSI bus
+ using the User Defined 8-bit Data types.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ <section id="v4l2-mbus-vendor-spec-fmts">
+ <title>Vendor and Device Specific Formats</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+interface and may change in the future.</para>
+ </note>
+
+ <para>This section lists complex data formats that are either vendor or
+ device specific.
+ </para>
+
+ <para>The following table lists the existing vendor and device specific
+ formats.</para>
+
+ <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-vendor-specific">
+ <title>Vendor and device specific formats</title>
+ <tgroup cols="3">
+ <colspec colname="id" align="left" />
+ <colspec colname="code" align="left"/>
+ <colspec colname="remarks" align="left"/>
+ <thead>
+ <row>
+ <entry>Identifier</entry>
+ <entry>Code</entry>
+ <entry>Comments</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row id="MEDIA-BUS-FMT-S5C-UYVY-JPEG-1X8">
+ <entry>MEDIA_BUS_FMT_S5C_UYVY_JPEG_1X8</entry>
+ <entry>0x5001</entry>
+ <entry>
+ Interleaved raw UYVY and JPEG image format with embedded
+ meta-data used by Samsung S3C73MX camera sensors.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </section>
+
+ </section>
+</section>
diff --git a/Documentation/DocBook/media/v4l/subdev-image-processing-crop.dia b/Documentation/DocBook/media/v4l/subdev-image-processing-crop.dia
new file mode 100644
index 000000000..e32ba5362
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/subdev-image-processing-crop.dia
@@ -0,0 +1,614 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
+ <dia:diagramdata>
+ <dia:attribute name="background">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="pagebreak">
+ <dia:color val="#000099"/>
+ </dia:attribute>
+ <dia:attribute name="paper">
+ <dia:composite type="paper">
+ <dia:attribute name="name">
+ <dia:string>#A4#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="tmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="bmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="lmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="rmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="is_portrait">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="scaling">
+ <dia:real val="0.49000000953674316"/>
+ </dia:attribute>
+ <dia:attribute name="fitto">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="grid">
+ <dia:composite type="grid">
+ <dia:attribute name="width_x">
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="width_y">
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="visible_x">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="visible_y">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:composite type="color"/>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#d8e5e5"/>
+ </dia:attribute>
+ <dia:attribute name="guides">
+ <dia:composite type="guides">
+ <dia:attribute name="hguides"/>
+ <dia:attribute name="vguides"/>
+ </dia:composite>
+ </dia:attribute>
+ </dia:diagramdata>
+ <dia:layer name="Background" visible="true" active="true">
+ <dia:object type="Standard - Box" version="0" id="O0">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-0.4,6.5"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-0.45,6.45;23.1387,16.2"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-0.4,6.5"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="23.48871579904775"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="9.6500000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O1">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.225,9.45"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.175,9.4;8.225,14.7"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="0.225,9.45"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="7.9499999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="5.1999999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O2">
+ <dia:attribute name="obj_pos">
+ <dia:point val="3.175,10.55"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.125,10.5;7.925,14.45"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="3.175,10.55"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="4.6999999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.8499999999999979"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O3">
+ <dia:attribute name="obj_pos">
+ <dia:point val="3.725,11.3875"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.725,10.7925;6.6025,13.14"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink
+crop
+selection#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="3.725,11.3875"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O4">
+ <dia:attribute name="obj_pos">
+ <dia:point val="1.475,7.9"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="1.475,7.305;1.475,8.0525"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>##</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="1.475,7.9"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O5">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.426918,7.89569"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.426918,7.30069;3.90942,8.84819"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink media
+bus format#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="0.426918,7.89569"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O6">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.4887,7.75"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="17.4887,7.155;21.8112,8.7025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#source media
+bus format#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="17.4887,7.75"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O7">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.5244,9.5417"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="17.4744,9.4917;22.2387,13.35"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="17.5244,9.5417"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="4.6643157990477508"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.758300000000002"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O8">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.5244,13.3"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.12132,13.2463;17.5781,14.4537"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="17.5244,13.3"/>
+ <dia:point val="3.175,14.4"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O7" connection="5"/>
+ <dia:connection handle="1" to="O2" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O9">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.5244,9.5417"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.12162,9.48832;17.5778,10.6034"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="17.5244,9.5417"/>
+ <dia:point val="3.175,10.55"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O7" connection="0"/>
+ <dia:connection handle="1" to="O2" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O10">
+ <dia:attribute name="obj_pos">
+ <dia:point val="22.1887,13.3"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.82132,13.2463;22.2424,14.4537"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="22.1887,13.3"/>
+ <dia:point val="7.875,14.4"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O7" connection="7"/>
+ <dia:connection handle="1" to="O2" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O11">
+ <dia:attribute name="obj_pos">
+ <dia:point val="22.1887,9.5417"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.82161,9.48831;22.2421,10.6034"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="22.1887,9.5417"/>
+ <dia:point val="7.875,10.55"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O7" connection="2"/>
+ <dia:connection handle="1" to="O2" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O12">
+ <dia:attribute name="obj_pos">
+ <dia:point val="23.23,10.5742"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="23.18,10.5242;24.13,11.4742"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="23.23,10.5742"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O13">
+ <dia:attribute name="obj_pos">
+ <dia:point val="24.08,10.9992"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.03,10.6388;32.4953,11.3624"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="24.08,10.9992"/>
+ <dia:point val="32.3835,11.0007"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O12" connection="3"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O14">
+ <dia:attribute name="obj_pos">
+ <dia:point val="25.3454,10.49"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.3454,9.895;29.9904,10.6425"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 1 (source)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="25.3454,10.49"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O15">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-1.44491,11.6506"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-1.49491,11.6006;-0.54491,12.5506"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-1.44491,11.6506"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O16">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-9.61991,12.09"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-9.67,11.7149;-1.33311,12.4385"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="-9.61991,12.09"/>
+ <dia:point val="-1.44491,12.0756"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O15" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O17">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-7.39291,11.49"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-7.39291,10.895;-3.58791,11.6425"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 0 (sink)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="-7.39291,11.49"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ </dia:layer>
+</dia:diagram>
diff --git a/Documentation/DocBook/media/v4l/subdev-image-processing-full.dia b/Documentation/DocBook/media/v4l/subdev-image-processing-full.dia
new file mode 100644
index 000000000..a0d782927
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/subdev-image-processing-full.dia
@@ -0,0 +1,1588 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
+ <dia:diagramdata>
+ <dia:attribute name="background">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="pagebreak">
+ <dia:color val="#000099"/>
+ </dia:attribute>
+ <dia:attribute name="paper">
+ <dia:composite type="paper">
+ <dia:attribute name="name">
+ <dia:string>#A4#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="tmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="bmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="lmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="rmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="is_portrait">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="scaling">
+ <dia:real val="0.49000000953674316"/>
+ </dia:attribute>
+ <dia:attribute name="fitto">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="grid">
+ <dia:composite type="grid">
+ <dia:attribute name="width_x">
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="width_y">
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="visible_x">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="visible_y">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:composite type="color"/>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#d8e5e5"/>
+ </dia:attribute>
+ <dia:attribute name="guides">
+ <dia:composite type="guides">
+ <dia:attribute name="hguides"/>
+ <dia:attribute name="vguides"/>
+ </dia:composite>
+ </dia:attribute>
+ </dia:diagramdata>
+ <dia:layer name="Background" visible="true" active="true">
+ <dia:object type="Standard - Box" version="0" id="O0">
+ <dia:attribute name="obj_pos">
+ <dia:point val="15.945,6.45"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="15.895,6.4;26.4,18.95"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="15.945,6.45"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="10.404999999254942"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="12.449999999999992"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#ff765a"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O1">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-0.1,3.65"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-0.15,3.6;40.25,20.85"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-0.1,3.65"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="40.300000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="17.149999999999999"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O2">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-1.05,7.9106"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-1.1,7.8606;-0.15,8.8106"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-1.05,7.9106"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O3">
+ <dia:attribute name="obj_pos">
+ <dia:point val="40.3366,9.8342"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="40.2866,9.7842;41.2366,10.7342"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="40.3366,9.8342"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O4">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-9.225,8.35"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-9.27509,7.97487;-0.938197,8.69848"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="-9.225,8.35"/>
+ <dia:point val="-1.05,8.3356"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O2" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O5">
+ <dia:attribute name="obj_pos">
+ <dia:point val="41.1866,10.2592"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="41.1366,9.89879;49.6019,10.6224"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="41.1866,10.2592"/>
+ <dia:point val="49.4901,10.2607"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O3" connection="3"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O6">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-6.998,7.75"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-6.998,7.155;-3.193,7.9025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 0 (sink)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="-6.998,7.75"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O7">
+ <dia:attribute name="obj_pos">
+ <dia:point val="42.452,9.75"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="42.452,9.155;47.097,9.9025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 2 (source)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="42.452,9.75"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O8">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.275,6"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.225,5.95;8.275,11.25"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="0.275,6"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="7.9499999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="5.1999999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O9">
+ <dia:attribute name="obj_pos">
+ <dia:point val="3.125,6.8"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.075,6.75;7.875,10.7"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="3.125,6.8"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="4.6999999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.8499999999999979"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O10">
+ <dia:attribute name="obj_pos">
+ <dia:point val="1.525,4.45"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="1.525,3.855;1.525,4.6025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>##</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="1.525,4.45"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O11">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.476918,4.44569"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.476918,3.85069;3.95942,5.39819"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink media
+bus format#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="0.476918,4.44569"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O12">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.6822,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="16.6322,9.23251;24.9922,17.9564"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="16.6822,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="8.2600228398861297"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="8.6238900617957164"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#00ff00"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O13">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.6822,17.9064"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.05732,10.5823;16.7499,17.9741"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="16.6822,17.9064"/>
+ <dia:point val="3.125,10.65"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O12" connection="5"/>
+ <dia:connection handle="1" to="O9" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O14">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.6822,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3.06681,6.74181;16.7404,9.3407"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="16.6822,9.28251"/>
+ <dia:point val="3.125,6.8"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O12" connection="0"/>
+ <dia:connection handle="1" to="O9" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O15">
+ <dia:attribute name="obj_pos">
+ <dia:point val="24.9422,17.9064"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.75945,10.5845;25.0077,17.9719"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="24.9422,17.9064"/>
+ <dia:point val="7.825,10.65"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O12" connection="7"/>
+ <dia:connection handle="1" to="O9" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O16">
+ <dia:attribute name="obj_pos">
+ <dia:point val="24.9422,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.76834,6.74334;24.9989,9.33917"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="24.9422,9.28251"/>
+ <dia:point val="7.825,6.8"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O12" connection="2"/>
+ <dia:connection handle="1" to="O9" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O17">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.7352,7.47209"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="16.7352,6.87709;22.5602,8.42459"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink compose
+selection (scaling)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="16.7352,7.47209"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#00ff00"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O18">
+ <dia:attribute name="obj_pos">
+ <dia:point val="20.4661,9.72825"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="20.4161,9.67825;25.5254,13.3509"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="20.4661,9.72825"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.009308462554376"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.5726155970598077"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O19">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.475,5.2564"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="34.475,4.6614;38.7975,6.2089"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#source media
+bus format#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="34.475,5.2564"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O20">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.4244,8.6917"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="34.3744,8.6417;39.4837,12.3143"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="34.4244,8.6917"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.009308462554376"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.5726155970598077"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O21">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.4244,12.2643"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="20.4125,12.2107;34.478,13.3545"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.4244,12.2643"/>
+ <dia:point val="20.4661,13.3009"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O20" connection="5"/>
+ <dia:connection handle="1" to="O18" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O22">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.4244,8.6917"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="20.4125,8.63813;34.478,9.78182"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.4244,8.6917"/>
+ <dia:point val="20.4661,9.72825"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O20" connection="0"/>
+ <dia:connection handle="1" to="O18" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O23">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.4337,12.2643"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.4218,12.2107;39.4873,13.3545"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="39.4337,12.2643"/>
+ <dia:point val="25.4754,13.3009"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O20" connection="7"/>
+ <dia:connection handle="1" to="O18" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O24">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.4337,8.6917"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.4218,8.63813;39.4873,9.78182"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="39.4337,8.6917"/>
+ <dia:point val="25.4754,9.72825"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O20" connection="2"/>
+ <dia:connection handle="1" to="O18" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O25">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.25,5.15"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="16.25,4.555;21.68,6.1025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink compose
+bounds selection#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="16.25,5.15"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#ff765a"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O26">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-1.02991,16.6506"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-1.07991,16.6006;-0.12991,17.5506"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-1.02991,16.6506"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O27">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-9.20491,17.09"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-9.255,16.7149;-0.918107,17.4385"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="-9.20491,17.09"/>
+ <dia:point val="-1.02991,17.0756"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O26" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O28">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-6.95,16.45"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-6.95,15.855;-3.145,16.6025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 1 (sink)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="-6.95,16.45"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O29">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.390412,14.64"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.340412,14.59;6.045,18.8"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="0.390412,14.64"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.604587512785236"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="4.1099999999999994"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O30">
+ <dia:attribute name="obj_pos">
+ <dia:point val="2.645,15.74"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="2.595,15.69;5.6,18.3"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="2.645,15.74"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="2.904999999254942"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="2.5100000000000016"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O31">
+ <dia:attribute name="obj_pos">
+ <dia:point val="1.595,12.99"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="1.595,12.395;1.595,13.1425"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>##</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="1.595,12.99"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O32">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.945,12.595"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="2.58596,12.536;18.004,15.799"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="17.945,12.595"/>
+ <dia:point val="2.645,15.74"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O36" connection="0"/>
+ <dia:connection handle="1" to="O30" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O33">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.945,15.8"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="2.58772,15.7427;18.0023,18.3073"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="17.945,15.8"/>
+ <dia:point val="2.645,18.25"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O36" connection="5"/>
+ <dia:connection handle="1" to="O30" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O34">
+ <dia:attribute name="obj_pos">
+ <dia:point val="21.7,15.8"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="5.49307,15.7431;21.7569,18.3069"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="21.7,15.8"/>
+ <dia:point val="5.55,18.25"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O36" connection="7"/>
+ <dia:connection handle="1" to="O30" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O35">
+ <dia:attribute name="obj_pos">
+ <dia:point val="21.7,12.595"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="5.49136,12.5364;21.7586,15.7986"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="21.7,12.595"/>
+ <dia:point val="5.55,15.74"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O36" connection="2"/>
+ <dia:connection handle="1" to="O30" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O36">
+ <dia:attribute name="obj_pos">
+ <dia:point val="17.945,12.595"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="17.895,12.545;21.75,15.85"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="17.945,12.595"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="3.7549999992549452"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.2049999992549427"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#00ff00"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O37">
+ <dia:attribute name="obj_pos">
+ <dia:point val="22.1631,14.2233"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="22.1131,14.1733;25.45,16.7"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="22.1631,14.2233"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="3.2369000000000021"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="2.4267000000000003"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O38">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.6714,16.2367"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="34.6214,16.1867;37.9,18.75"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="34.6714,16.2367"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="3.178600000000003"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="2.4632999999999967"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O39">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.6714,18.7"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="22.1057,16.5926;34.7288,18.7574"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.6714,18.7"/>
+ <dia:point val="22.1631,16.65"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O38" connection="5"/>
+ <dia:connection handle="1" to="O37" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O40">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.6714,16.2367"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="22.1058,14.166;34.7287,16.294"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.6714,16.2367"/>
+ <dia:point val="22.1631,14.2233"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O38" connection="0"/>
+ <dia:connection handle="1" to="O37" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O41">
+ <dia:attribute name="obj_pos">
+ <dia:point val="37.85,18.7"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.3425,16.5925;37.9075,18.7575"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="37.85,18.7"/>
+ <dia:point val="25.4,16.65"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O38" connection="7"/>
+ <dia:connection handle="1" to="O37" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O42">
+ <dia:attribute name="obj_pos">
+ <dia:point val="37.85,16.2367"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.3427,14.166;37.9073,16.294"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="37.85,16.2367"/>
+ <dia:point val="25.4,14.2233"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O38" connection="2"/>
+ <dia:connection handle="1" to="O37" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O43">
+ <dia:attribute name="obj_pos">
+ <dia:point val="40.347,16.7742"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="40.297,16.7242;41.247,17.6742"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="40.347,16.7742"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O44">
+ <dia:attribute name="obj_pos">
+ <dia:point val="41.197,17.1992"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="41.147,16.8388;49.6123,17.5624"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="41.197,17.1992"/>
+ <dia:point val="49.5005,17.2007"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O43" connection="3"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O45">
+ <dia:attribute name="obj_pos">
+ <dia:point val="42.4624,16.69"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="42.4624,16.095;47.1074,16.8425"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 3 (source)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="42.4624,16.69"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O46">
+ <dia:attribute name="obj_pos">
+ <dia:point val="9.85,4.55"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="9.85,3.955;12.7275,6.3025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink
+crop
+selection#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="9.85,4.55"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O47">
+ <dia:attribute name="obj_pos">
+ <dia:point val="27.65,4.75"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="27.65,4.155;30.5275,6.5025"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#source
+crop
+selection#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="27.65,4.75"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O48">
+ <dia:attribute name="obj_pos">
+ <dia:point val="10.55,6.6"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.7135,6.39438;10.6035,7.11605"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="10.55,6.6"/>
+ <dia:point val="7.825,6.8"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O9" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O49">
+ <dia:attribute name="obj_pos">
+ <dia:point val="10.45,6.55"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="5.48029,6.48236;10.5176,15.8387"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="10.45,6.55"/>
+ <dia:point val="5.55,15.74"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O30" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O50">
+ <dia:attribute name="obj_pos">
+ <dia:point val="27.5246,6.66071"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.406,6.59136;27.594,9.82122"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="27.5246,6.66071"/>
+ <dia:point val="25.4754,9.72825"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O18" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O51">
+ <dia:attribute name="obj_pos">
+ <dia:point val="27.5036,6.68935"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="25.2161,6.62775;27.5652,14.331"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="27.5036,6.68935"/>
+ <dia:point val="25.4,14.2233"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O37" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ </dia:layer>
+</dia:diagram>
diff --git a/Documentation/DocBook/media/v4l/subdev-image-processing-scaling-multi-source.dia b/Documentation/DocBook/media/v4l/subdev-image-processing-scaling-multi-source.dia
new file mode 100644
index 000000000..0cd50a7bd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/subdev-image-processing-scaling-multi-source.dia
@@ -0,0 +1,1152 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
+ <dia:diagramdata>
+ <dia:attribute name="background">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="pagebreak">
+ <dia:color val="#000099"/>
+ </dia:attribute>
+ <dia:attribute name="paper">
+ <dia:composite type="paper">
+ <dia:attribute name="name">
+ <dia:string>#A4#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="tmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="bmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="lmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="rmargin">
+ <dia:real val="2.8222000598907471"/>
+ </dia:attribute>
+ <dia:attribute name="is_portrait">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="scaling">
+ <dia:real val="0.49000000953674316"/>
+ </dia:attribute>
+ <dia:attribute name="fitto">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="grid">
+ <dia:composite type="grid">
+ <dia:attribute name="width_x">
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="width_y">
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="visible_x">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="visible_y">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:composite type="color"/>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#d8e5e5"/>
+ </dia:attribute>
+ <dia:attribute name="guides">
+ <dia:composite type="guides">
+ <dia:attribute name="hguides"/>
+ <dia:attribute name="vguides"/>
+ </dia:composite>
+ </dia:attribute>
+ </dia:diagramdata>
+ <dia:layer name="Background" visible="true" active="true">
+ <dia:object type="Standard - Box" version="0" id="O0">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-0.4,6.5"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-0.45,6.45;39.95,22.9"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-0.4,6.5"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="40.299999999999997"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="16.349999999999998"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O1">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.225,9.45"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.175,9.4;8.225,14.7"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="0.225,9.45"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="7.9499999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="5.1999999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O2">
+ <dia:attribute name="obj_pos">
+ <dia:point val="2.475,10.2"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="2.425,10.15;7.225,14.1"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="2.475,10.2"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="4.6999999999999975"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.8499999999999979"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O3">
+ <dia:attribute name="obj_pos">
+ <dia:point val="3,11.2"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="3,10.605;5.8775,12.9525"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink
+crop
+selection#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="3,11.2"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#0000ff"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O4">
+ <dia:attribute name="obj_pos">
+ <dia:point val="1.475,7.9"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="1.475,7.305;1.475,8.0525"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>##</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="1.475,7.9"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O5">
+ <dia:attribute name="obj_pos">
+ <dia:point val="0.426918,7.89569"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="0.426918,7.30069;3.90942,8.84819"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink media
+bus format#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="0.426918,7.89569"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#a52a2a"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O6">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.6822,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="16.6322,9.23251;24.9922,17.9564"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="16.6822,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="8.2600228398861297"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="8.6238900617957164"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#00ff00"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O7">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.6822,17.9064"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="2.41365,13.9886;16.7436,17.9678"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="16.6822,17.9064"/>
+ <dia:point val="2.475,14.05"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O6" connection="5"/>
+ <dia:connection handle="1" to="O2" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O8">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.6822,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="2.42188,9.22939;16.7353,10.2531"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="16.6822,9.28251"/>
+ <dia:point val="2.475,10.2"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O6" connection="0"/>
+ <dia:connection handle="1" to="O2" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O9">
+ <dia:attribute name="obj_pos">
+ <dia:point val="24.9422,17.9064"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.11553,13.9905;25.0017,17.9659"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="24.9422,17.9064"/>
+ <dia:point val="7.175,14.05"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O6" connection="7"/>
+ <dia:connection handle="1" to="O2" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O10">
+ <dia:attribute name="obj_pos">
+ <dia:point val="24.9422,9.28251"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="7.12249,9.23;24.9947,10.2525"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="24.9422,9.28251"/>
+ <dia:point val="7.175,10.2"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O6" connection="2"/>
+ <dia:connection handle="1" to="O2" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O11">
+ <dia:attribute name="obj_pos">
+ <dia:point val="16.7352,7.47209"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="16.7352,6.87709;22.5602,8.42459"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#sink compose
+selection (scaling)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="16.7352,7.47209"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#00ff00"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O12">
+ <dia:attribute name="obj_pos">
+ <dia:point val="19.1161,9.97825"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="19.0661,9.92825;24.1754,13.6009"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="19.1161,9.97825"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.009308462554376"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.5726155970598077"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O13">
+ <dia:attribute name="obj_pos">
+ <dia:point val="27.1661,7.47209"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="27.1661,6.87709;30.0436,9.22459"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#source
+crop
+selection#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="27.1661,7.47209"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O14">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.575,7.8564"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="34.575,7.2614;38.8975,8.8089"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#source media
+bus format#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="34.575,7.8564"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O15">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.5244,11.2917"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="34.4744,11.2417;39.5837,14.9143"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="34.5244,11.2917"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.009308462554376"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.5726155970598077"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O16">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.5244,14.8643"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="19.062,13.4968;34.5785,14.9184"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.5244,14.8643"/>
+ <dia:point val="19.1161,13.5509"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O15" connection="5"/>
+ <dia:connection handle="1" to="O12" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O17">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.5244,11.2917"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="19.062,9.92418;34.5785,11.3458"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.5244,11.2917"/>
+ <dia:point val="19.1161,9.97825"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O15" connection="0"/>
+ <dia:connection handle="1" to="O12" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O18">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.5337,14.8643"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.0713,13.4968;39.5878,14.9184"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="39.5337,14.8643"/>
+ <dia:point val="24.1254,13.5509"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O15" connection="7"/>
+ <dia:connection handle="1" to="O12" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O19">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.5337,11.2917"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.0713,9.92418;39.5878,11.3458"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="39.5337,11.2917"/>
+ <dia:point val="24.1254,9.97825"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O15" connection="2"/>
+ <dia:connection handle="1" to="O12" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O20">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.98,12.0742"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="39.93,12.0242;40.88,12.9742"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="39.98,12.0742"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O21">
+ <dia:attribute name="obj_pos">
+ <dia:point val="40.83,12.4992"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="40.78,12.1388;49.2453,12.8624"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="40.83,12.4992"/>
+ <dia:point val="49.1335,12.5007"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O20" connection="3"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O22">
+ <dia:attribute name="obj_pos">
+ <dia:point val="42.0954,11.99"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="42.0954,11.395;46.7404,12.1425"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 1 (source)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="42.0954,11.99"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O23">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-1.44491,11.6506"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-1.49491,11.6006;-0.54491,12.5506"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="-1.44491,11.6506"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O24">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-9.61991,12.09"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-9.67,11.7149;-1.33311,12.4385"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="-9.61991,12.09"/>
+ <dia:point val="-1.44491,12.0756"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O23" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O25">
+ <dia:attribute name="obj_pos">
+ <dia:point val="-7.39291,11.49"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="-7.39291,10.895;-3.58791,11.6425"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 0 (sink)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="-7.39291,11.49"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O26">
+ <dia:attribute name="obj_pos">
+ <dia:point val="19.4911,13.8333"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="19.4411,13.7833;24.5504,17.4559"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="19.4911,13.8333"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.009308462554376"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.5726155970598077"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Box" version="0" id="O27">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.4994,17.2967"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="34.4494,17.2467;39.5587,20.9193"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="34.4994,17.2967"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="5.009308462554376"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="3.5726155970598077"/>
+ </dia:attribute>
+ <dia:attribute name="border_width">
+ <dia:real val="0.10000000149011612"/>
+ </dia:attribute>
+ <dia:attribute name="border_color">
+ <dia:color val="#8b6914"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O28">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.4994,20.8693"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="19.4311,17.3459;34.5594,20.9293"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.4994,20.8693"/>
+ <dia:point val="19.4911,17.4059"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O27" connection="5"/>
+ <dia:connection handle="1" to="O26" connection="5"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O29">
+ <dia:attribute name="obj_pos">
+ <dia:point val="34.4994,17.2967"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="19.4311,13.7733;34.5594,17.3567"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="34.4994,17.2967"/>
+ <dia:point val="19.4911,13.8333"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O27" connection="0"/>
+ <dia:connection handle="1" to="O26" connection="0"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O30">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.5087,20.8693"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.4404,17.3459;39.5687,20.9293"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="39.5087,20.8693"/>
+ <dia:point val="24.5004,17.4059"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O27" connection="7"/>
+ <dia:connection handle="1" to="O26" connection="7"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O31">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.5087,17.2967"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.4404,13.7733;39.5687,17.3567"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="39.5087,17.2967"/>
+ <dia:point val="24.5004,13.8333"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#e60505"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="4"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O27" connection="2"/>
+ <dia:connection handle="1" to="O26" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Geometric - Perfect Circle" version="1" id="O32">
+ <dia:attribute name="obj_pos">
+ <dia:point val="39.855,18.7792"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="39.805,18.7292;40.755,19.6792"/>
+ </dia:attribute>
+ <dia:attribute name="meta">
+ <dia:composite type="dict"/>
+ </dia:attribute>
+ <dia:attribute name="elem_corner">
+ <dia:point val="39.855,18.7792"/>
+ </dia:attribute>
+ <dia:attribute name="elem_width">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="elem_height">
+ <dia:real val="0.84999999999999787"/>
+ </dia:attribute>
+ <dia:attribute name="line_width">
+ <dia:real val="0.10000000000000001"/>
+ </dia:attribute>
+ <dia:attribute name="line_colour">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="fill_colour">
+ <dia:color val="#ffffff"/>
+ </dia:attribute>
+ <dia:attribute name="show_background">
+ <dia:boolean val="true"/>
+ </dia:attribute>
+ <dia:attribute name="line_style">
+ <dia:enum val="0"/>
+ <dia:real val="1"/>
+ </dia:attribute>
+ <dia:attribute name="flip_horizontal">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="flip_vertical">
+ <dia:boolean val="false"/>
+ </dia:attribute>
+ <dia:attribute name="subscale">
+ <dia:real val="1"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O33">
+ <dia:attribute name="obj_pos">
+ <dia:point val="40.705,19.2042"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="40.655,18.8438;49.1203,19.5674"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="40.705,19.2042"/>
+ <dia:point val="49.0085,19.2057"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="0" to="O32" connection="3"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Text" version="1" id="O34">
+ <dia:attribute name="obj_pos">
+ <dia:point val="41.9704,18.695"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="41.9704,18.1;46.6154,18.8475"/>
+ </dia:attribute>
+ <dia:attribute name="text">
+ <dia:composite type="text">
+ <dia:attribute name="string">
+ <dia:string>#pad 2 (source)#</dia:string>
+ </dia:attribute>
+ <dia:attribute name="font">
+ <dia:font family="sans" style="0" name="Helvetica"/>
+ </dia:attribute>
+ <dia:attribute name="height">
+ <dia:real val="0.80000000000000004"/>
+ </dia:attribute>
+ <dia:attribute name="pos">
+ <dia:point val="41.9704,18.695"/>
+ </dia:attribute>
+ <dia:attribute name="color">
+ <dia:color val="#000000"/>
+ </dia:attribute>
+ <dia:attribute name="alignment">
+ <dia:enum val="0"/>
+ </dia:attribute>
+ </dia:composite>
+ </dia:attribute>
+ <dia:attribute name="valign">
+ <dia:enum val="3"/>
+ </dia:attribute>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O35">
+ <dia:attribute name="obj_pos">
+ <dia:point val="27.3,9.55"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.0146,9.49376;27.3562,10.255"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="27.3,9.55"/>
+ <dia:point val="24.1254,9.97825"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O12" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ <dia:object type="Standard - Line" version="0" id="O36">
+ <dia:attribute name="obj_pos">
+ <dia:point val="27.3454,9.53624"/>
+ </dia:attribute>
+ <dia:attribute name="obj_bb">
+ <dia:rectangle val="24.4311,9.46695;27.4147,13.9265"/>
+ </dia:attribute>
+ <dia:attribute name="conn_endpoints">
+ <dia:point val="27.3454,9.53624"/>
+ <dia:point val="24.5004,13.8333"/>
+ </dia:attribute>
+ <dia:attribute name="numcp">
+ <dia:int val="1"/>
+ </dia:attribute>
+ <dia:attribute name="line_color">
+ <dia:color val="#a020f0"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow">
+ <dia:enum val="22"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_length">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:attribute name="end_arrow_width">
+ <dia:real val="0.5"/>
+ </dia:attribute>
+ <dia:connections>
+ <dia:connection handle="1" to="O26" connection="2"/>
+ </dia:connections>
+ </dia:object>
+ </dia:layer>
+</dia:diagram>
diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml
new file mode 100644
index 000000000..e98caa1c3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/v4l2.xml
@@ -0,0 +1,709 @@
+ <partinfo>
+ <authorgroup>
+ <author>
+ <firstname>Michael</firstname>
+ <surname>Schimek</surname>
+ <othername role="mi">H</othername>
+ <affiliation>
+ <address>
+ <email>mschimek@gmx.at</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Bill</firstname>
+ <surname>Dirks</surname>
+ <!-- Commented until Bill opts in to be spammed.
+ <affiliation>
+ <address>
+ <email>bill@thedirks.org</email>
+ </address>
+ </affiliation> -->
+ <contrib>Original author of the V4L2 API and
+documentation.</contrib>
+ </author>
+
+ <author>
+ <firstname>Hans</firstname>
+ <surname>Verkuil</surname>
+ <contrib>Designed and documented the VIDIOC_LOG_STATUS ioctl,
+the extended control ioctls, major parts of the sliced VBI API, the
+MPEG encoder and decoder APIs and the DV Timings API.</contrib>
+ <affiliation>
+ <address>
+ <email>hverkuil@xs4all.nl</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Martin</firstname>
+ <surname>Rubli</surname>
+ <!--
+ <affiliation>
+ <address>
+ <email>martin_rubli@logitech.com</email>
+ </address>
+ </affiliation> -->
+ <contrib>Designed and documented the VIDIOC_ENUM_FRAMESIZES
+and VIDIOC_ENUM_FRAMEINTERVALS ioctls.</contrib>
+ </author>
+
+ <author>
+ <firstname>Andy</firstname>
+ <surname>Walls</surname>
+ <contrib>Documented the fielded V4L2_MPEG_STREAM_VBI_FMT_IVTV
+MPEG stream embedded, sliced VBI data format in this specification.
+</contrib>
+ <affiliation>
+ <address>
+ <email>awalls@md.metrocast.net</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Mauro</firstname>
+ <surname>Carvalho Chehab</surname>
+ <contrib>Documented libv4l, designed and added v4l2grab example,
+Remote Controller chapter.</contrib>
+ <affiliation>
+ <address>
+ <email>m.chehab@samsung.com</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Muralidharan</firstname>
+ <surname>Karicheri</surname>
+ <contrib>Documented the Digital Video timings API.</contrib>
+ <affiliation>
+ <address>
+ <email>m-karicheri2@ti.com</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Pawel</firstname>
+ <surname>Osciak</surname>
+ <contrib>Designed and documented the multi-planar API.</contrib>
+ <affiliation>
+ <address>
+ <email>pawel AT osciak.com</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Sakari</firstname>
+ <surname>Ailus</surname>
+ <contrib>Subdev selections API.</contrib>
+ <affiliation>
+ <address>
+ <email>sakari.ailus@iki.fi</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Antti</firstname>
+ <surname>Palosaari</surname>
+ <contrib>SDR API.</contrib>
+ <affiliation>
+ <address>
+ <email>crope@iki.fi</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>1999</year>
+ <year>2000</year>
+ <year>2001</year>
+ <year>2002</year>
+ <year>2003</year>
+ <year>2004</year>
+ <year>2005</year>
+ <year>2006</year>
+ <year>2007</year>
+ <year>2008</year>
+ <year>2009</year>
+ <year>2010</year>
+ <year>2011</year>
+ <year>2012</year>
+ <year>2013</year>
+ <year>2014</year>
+ <year>2015</year>
+ <holder>Bill Dirks, Michael H. Schimek, Hans Verkuil, Martin
+Rubli, Andy Walls, Muralidharan Karicheri, Mauro Carvalho Chehab,
+ Pawel Osciak</holder>
+ </copyright>
+ <legalnotice>
+ <para>Except when explicitly stated as GPL, programming examples within
+ this part can be used and distributed without restrictions.</para>
+ </legalnotice>
+ <revhistory>
+ <!-- Put document revisions here, newest first. -->
+ <!-- API revisions (changes and additions of defines, enums,
+structs, ioctls) must be noted in more detail in the history chapter
+(compat.xml), along with the possible impact on existing drivers and
+applications. -->
+
+ <revision>
+ <revnumber>3.21</revnumber>
+ <date>2015-02-13</date>
+ <authorinitials>mcc</authorinitials>
+ <revremark>Fix documentation for media controller device nodes and add support for DVB device nodes.
+Add support for Tuner sub-device.
+ </revremark>
+ </revision>
+ <revision>
+ <revnumber>3.19</revnumber>
+ <date>2014-12-05</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding; and &v4l2-quantization; fields
+to &v4l2-pix-format;, &v4l2-pix-format-mplane; and &v4l2-mbus-framefmt;.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.17</revnumber>
+ <date>2014-08-04</date>
+ <authorinitials>lp, hv</authorinitials>
+ <revremark>Extended &v4l2-pix-format;. Added format flags. Added compound control types
+and VIDIOC_QUERY_EXT_CTRL.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.15</revnumber>
+ <date>2014-02-03</date>
+ <authorinitials>hv, ap</authorinitials>
+ <revremark>Update several sections of "Common API Elements": "Opening and Closing Devices"
+"Querying Capabilities", "Application Priority", "Video Inputs and Outputs", "Audio Inputs and Outputs"
+"Tuners and Modulators", "Video Standards" and "Digital Video (DV) Timings". Added SDR API.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.14</revnumber>
+ <date>2013-11-25</date>
+ <authorinitials>rr</authorinitials>
+ <revremark>Set width and height as unsigned on v4l2_rect.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.11</revnumber>
+ <date>2013-05-26</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Remove obsolete VIDIOC_DBG_G_CHIP_IDENT ioctl.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.10</revnumber>
+ <date>2013-03-25</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Remove obsolete and unused DV_PRESET ioctls:
+ VIDIOC_G_DV_PRESET, VIDIOC_S_DV_PRESET, VIDIOC_QUERY_DV_PRESET and
+ VIDIOC_ENUM_DV_PRESET. Remove the related v4l2_input/output capability
+ flags V4L2_IN_CAP_PRESETS and V4L2_OUT_CAP_PRESETS. Added VIDIOC_DBG_G_CHIP_INFO.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.9</revnumber>
+ <date>2012-12-03</date>
+ <authorinitials>sa, sn</authorinitials>
+ <revremark>Added timestamp types to v4l2_buffer.
+ Added V4L2_EVENT_CTRL_CH_RANGE control event changes flag.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.6</revnumber>
+ <date>2012-07-02</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Added VIDIOC_ENUM_FREQ_BANDS.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.5</revnumber>
+ <date>2012-05-07</date>
+ <authorinitials>sa, sn, hv</authorinitials>
+ <revremark>Added V4L2_CTRL_TYPE_INTEGER_MENU and V4L2 subdev
+ selections API. Improved the description of V4L2_CID_COLORFX
+ control, added V4L2_CID_COLORFX_CBCR control.
+ Added camera controls V4L2_CID_AUTO_EXPOSURE_BIAS,
+ V4L2_CID_AUTO_N_PRESET_WHITE_BALANCE, V4L2_CID_IMAGE_STABILIZATION,
+ V4L2_CID_ISO_SENSITIVITY, V4L2_CID_ISO_SENSITIVITY_AUTO,
+ V4L2_CID_EXPOSURE_METERING, V4L2_CID_SCENE_MODE,
+ V4L2_CID_3A_LOCK, V4L2_CID_AUTO_FOCUS_START,
+ V4L2_CID_AUTO_FOCUS_STOP, V4L2_CID_AUTO_FOCUS_STATUS
+ and V4L2_CID_AUTO_FOCUS_RANGE.
+ Added VIDIOC_ENUM_DV_TIMINGS, VIDIOC_QUERY_DV_TIMINGS and
+ VIDIOC_DV_TIMINGS_CAP.
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.4</revnumber>
+ <date>2012-01-25</date>
+ <authorinitials>sn</authorinitials>
+ <revremark>Added <link linkend="jpeg-controls">JPEG compression
+ control class.</link>
+ </revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.3</revnumber>
+ <date>2012-01-11</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Added device_caps field to struct v4l2_capabilities.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.2</revnumber>
+ <date>2011-08-26</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Added V4L2_CTRL_FLAG_VOLATILE.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>3.1</revnumber>
+ <date>2011-06-27</date>
+ <authorinitials>mcc, po, hv</authorinitials>
+ <revremark>Documented that VIDIOC_QUERYCAP now returns a per-subsystem version instead of a per-driver one.
+ Standardize an error code for invalid ioctl.
+ Added V4L2_CTRL_TYPE_BITMASK.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>2.6.39</revnumber>
+ <date>2011-03-01</date>
+ <authorinitials>mcc, po</authorinitials>
+ <revremark>Removed VIDIOC_*_OLD from videodev2.h header and update it to reflect latest changes. Added the <link linkend="planar-apis">multi-planar API</link>.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>2.6.37</revnumber>
+ <date>2010-08-06</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Removed obsolete vtx (videotext) API.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>2.6.33</revnumber>
+ <date>2009-12-03</date>
+ <authorinitials>mk</authorinitials>
+ <revremark>Added documentation for the Digital Video timings API.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>2.6.32</revnumber>
+ <date>2009-08-31</date>
+ <authorinitials>mcc</authorinitials>
+ <revremark>Now, revisions will match the kernel version where
+the V4L2 API changes will be used by the Linux Kernel.
+Also added Remote Controller chapter.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.29</revnumber>
+ <date>2009-08-26</date>
+ <authorinitials>ev</authorinitials>
+ <revremark>Added documentation for string controls and for FM Transmitter controls.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.28</revnumber>
+ <date>2009-08-26</date>
+ <authorinitials>gl</authorinitials>
+ <revremark>Added V4L2_CID_BAND_STOP_FILTER documentation.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.27</revnumber>
+ <date>2009-08-15</date>
+ <authorinitials>mcc</authorinitials>
+ <revremark>Added libv4l and Remote Controller documentation;
+added v4l2grab and keytable application examples.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.26</revnumber>
+ <date>2009-07-23</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Finalized the RDS capture API. Added modulator and RDS encoder
+capabilities. Added support for string controls.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.25</revnumber>
+ <date>2009-01-18</date>
+ <authorinitials>hv</authorinitials>
+ <revremark>Added pixel formats VYUY, NV16 and NV61, and changed
+the debug ioctls VIDIOC_DBG_G/S_REGISTER and VIDIOC_DBG_G_CHIP_IDENT.
+Added camera controls V4L2_CID_ZOOM_ABSOLUTE, V4L2_CID_ZOOM_RELATIVE,
+V4L2_CID_ZOOM_CONTINUOUS and V4L2_CID_PRIVACY.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.24</revnumber>
+ <date>2008-03-04</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Added pixel formats Y16 and SBGGR16, new controls
+and a camera controls class. Removed VIDIOC_G/S_MPEGCOMP.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.23</revnumber>
+ <date>2007-08-30</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Fixed a typo in VIDIOC_DBG_G/S_REGISTER.
+Clarified the byte order of packed pixel formats.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.22</revnumber>
+ <date>2007-08-29</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Added the Video Output Overlay interface, new MPEG
+controls, V4L2_FIELD_INTERLACED_TB and V4L2_FIELD_INTERLACED_BT,
+VIDIOC_DBG_G/S_REGISTER, VIDIOC_(TRY_)ENCODER_CMD,
+VIDIOC_G_CHIP_IDENT, VIDIOC_G_ENC_INDEX, new pixel formats.
+Clarifications in the cropping chapter, about RGB pixel formats, the
+mmap(), poll(), select(), read() and write() functions. Typographical
+fixes.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.21</revnumber>
+ <date>2006-12-19</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Fixed a link in the VIDIOC_G_EXT_CTRLS section.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.20</revnumber>
+ <date>2006-11-24</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Clarified the purpose of the audioset field in
+struct v4l2_input and v4l2_output.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.19</revnumber>
+ <date>2006-10-19</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Documented V4L2_PIX_FMT_RGB444.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.18</revnumber>
+ <date>2006-10-18</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Added the description of extended controls by Hans
+Verkuil. Linked V4L2_PIX_FMT_MPEG to V4L2_CID_MPEG_STREAM_TYPE.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.17</revnumber>
+ <date>2006-10-12</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Corrected V4L2_PIX_FMT_HM12 description.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.16</revnumber>
+ <date>2006-10-08</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>VIDIOC_ENUM_FRAMESIZES and
+VIDIOC_ENUM_FRAMEINTERVALS are now part of the API.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.15</revnumber>
+ <date>2006-09-23</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Cleaned up the bibliography, added BT.653 and
+BT.1119. capture.c/start_capturing() for user pointer I/O did not
+initialize the buffer index. Documented the V4L MPEG and MJPEG
+VID_TYPEs and V4L2_PIX_FMT_SBGGR8. Updated the list of reserved pixel
+formats. See the history chapter for API changes.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.14</revnumber>
+ <date>2006-09-14</date>
+ <authorinitials>mr</authorinitials>
+ <revremark>Added VIDIOC_ENUM_FRAMESIZES and
+VIDIOC_ENUM_FRAMEINTERVALS proposal for frame format enumeration of
+digital devices.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.13</revnumber>
+ <date>2006-04-07</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Corrected the description of struct v4l2_window
+clips. New V4L2_STD_ and V4L2_TUNER_MODE_LANG1_LANG2
+defines.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.12</revnumber>
+ <date>2006-02-03</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Corrected the description of struct
+v4l2_captureparm and v4l2_outputparm.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.11</revnumber>
+ <date>2006-01-27</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Improved the description of struct
+v4l2_tuner.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.10</revnumber>
+ <date>2006-01-10</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>VIDIOC_G_INPUT and VIDIOC_S_PARM
+clarifications.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.9</revnumber>
+ <date>2005-11-27</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Improved the 525 line numbering diagram. Hans
+Verkuil and I rewrote the sliced VBI section. He also contributed a
+VIDIOC_LOG_STATUS page. Fixed VIDIOC_S_STD call in the video standard
+selection example. Various updates.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.8</revnumber>
+ <date>2004-10-04</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Somehow a piece of junk slipped into the capture
+example, removed.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.7</revnumber>
+ <date>2004-09-19</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Fixed video standard selection, control
+enumeration, downscaling and aspect example. Added read and user
+pointer i/o to video capture example.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.6</revnumber>
+ <date>2004-08-01</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>v4l2_buffer changes, added video capture example,
+various corrections.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.5</revnumber>
+ <date>2003-11-05</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Pixel format erratum.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.4</revnumber>
+ <date>2003-09-17</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Corrected source and Makefile to generate a PDF.
+SGML fixes. Added latest API changes. Closed gaps in the history
+chapter.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.3</revnumber>
+ <date>2003-02-05</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Another draft, more corrections.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.2</revnumber>
+ <date>2003-01-15</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>Second draft, with corrections pointed out by Gerd
+Knorr.</revremark>
+ </revision>
+
+ <revision>
+ <revnumber>0.1</revnumber>
+ <date>2002-12-01</date>
+ <authorinitials>mhs</authorinitials>
+ <revremark>First draft, based on documentation by Bill Dirks
+and discussions on the V4L mailing list.</revremark>
+ </revision>
+ </revhistory>
+</partinfo>
+
+<title>Video for Linux Two API Specification</title>
+ <subtitle>Revision 3.19</subtitle>
+
+ <chapter id="common">
+ &sub-common;
+ </chapter>
+
+ <chapter id="pixfmt">
+ &sub-pixfmt;
+ </chapter>
+
+ <chapter id="io">
+ &sub-io;
+ </chapter>
+
+ <chapter id="devices">
+ <title>Interfaces</title>
+
+ <section id="capture"> &sub-dev-capture; </section>
+ <section id="overlay"> &sub-dev-overlay; </section>
+ <section id="output"> &sub-dev-output; </section>
+ <section id="osd"> &sub-dev-osd; </section>
+ <section id="codec"> &sub-dev-codec; </section>
+ <section id="effect"> &sub-dev-effect; </section>
+ <section id="raw-vbi"> &sub-dev-raw-vbi; </section>
+ <section id="sliced"> &sub-dev-sliced-vbi; </section>
+ <section id="ttx"> &sub-dev-teletext; </section>
+ <section id="radio"> &sub-dev-radio; </section>
+ <section id="rds"> &sub-dev-rds; </section>
+ <section id="sdr"> &sub-dev-sdr; </section>
+ <section id="event"> &sub-dev-event; </section>
+ <section id="subdev"> &sub-dev-subdev; </section>
+ </chapter>
+
+ <chapter id="driver">
+ &sub-driver;
+ </chapter>
+
+ <chapter id="libv4l">
+ &sub-libv4l;
+ </chapter>
+
+ <chapter id="compat">
+ &sub-compat;
+ </chapter>
+
+ <appendix id="user-func">
+ <title>Function Reference</title>
+
+ <!-- Keep this alphabetically sorted. -->
+
+ &sub-close;
+ &sub-ioctl;
+ <!-- All ioctls go here. -->
+ &sub-create-bufs;
+ &sub-cropcap;
+ &sub-dbg-g-chip-info;
+ &sub-dbg-g-register;
+ &sub-decoder-cmd;
+ &sub-dqevent;
+ &sub-dv-timings-cap;
+ &sub-encoder-cmd;
+ &sub-enumaudio;
+ &sub-enumaudioout;
+ &sub-enum-dv-timings;
+ &sub-enum-fmt;
+ &sub-enum-framesizes;
+ &sub-enum-frameintervals;
+ &sub-enum-freq-bands;
+ &sub-enuminput;
+ &sub-enumoutput;
+ &sub-enumstd;
+ &sub-expbuf;
+ &sub-g-audio;
+ &sub-g-audioout;
+ &sub-g-crop;
+ &sub-g-ctrl;
+ &sub-g-dv-timings;
+ &sub-g-edid;
+ &sub-g-enc-index;
+ &sub-g-ext-ctrls;
+ &sub-g-fbuf;
+ &sub-g-fmt;
+ &sub-g-frequency;
+ &sub-g-input;
+ &sub-g-jpegcomp;
+ &sub-g-modulator;
+ &sub-g-output;
+ &sub-g-parm;
+ &sub-g-priority;
+ &sub-g-selection;
+ &sub-g-sliced-vbi-cap;
+ &sub-g-std;
+ &sub-g-tuner;
+ &sub-log-status;
+ &sub-overlay;
+ &sub-prepare-buf;
+ &sub-qbuf;
+ &sub-querybuf;
+ &sub-querycap;
+ &sub-queryctrl;
+ &sub-query-dv-timings;
+ &sub-querystd;
+ &sub-reqbufs;
+ &sub-s-hw-freq-seek;
+ &sub-streamon;
+ &sub-subdev-enum-frame-interval;
+ &sub-subdev-enum-frame-size;
+ &sub-subdev-enum-mbus-code;
+ &sub-subdev-g-crop;
+ &sub-subdev-g-fmt;
+ &sub-subdev-g-frame-interval;
+ &sub-subdev-g-selection;
+ &sub-subscribe-event;
+ <!-- End of ioctls. -->
+ &sub-mmap;
+ &sub-munmap;
+ &sub-open;
+ &sub-poll;
+ &sub-read;
+ &sub-select;
+ &sub-write;
+ </appendix>
+
+ <appendix>
+ <title>Common definitions for V4L2 and V4L2 subdev interfaces</title>
+ &sub-selections-common;
+ </appendix>
+
+ <appendix id="videodev">
+ <title>Video For Linux Two Header File</title>
+ &sub-videodev2-h;
+ </appendix>
+
+ <appendix id="capture-example">
+ <title>Video Capture Example</title>
+ &sub-capture-c;
+ </appendix>
+
+ <appendix id="v4l2grab-example">
+ <title>Video Grabber example using libv4l</title>
+ <para>This program demonstrates how to grab V4L2 images in ppm format by
+using libv4l handlers. The advantage is that this grabber can potentially work
+with any V4L2 driver.</para>
+ &sub-v4l2grab-c;
+ </appendix>
+
+ &sub-media-indices;
+
+ &sub-biblio;
+
diff --git a/Documentation/DocBook/media/v4l/v4l2grab.c.xml b/Documentation/DocBook/media/v4l/v4l2grab.c.xml
new file mode 100644
index 000000000..bed12e40b
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/v4l2grab.c.xml
@@ -0,0 +1,164 @@
+<programlisting>
+/* V4L2 video picture grabber
+ Copyright (C) 2009 Mauro Carvalho Chehab &lt;mchehab@infradead.org&gt;
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation version 2 of the License.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ */
+
+#include &lt;stdio.h&gt;
+#include &lt;stdlib.h&gt;
+#include &lt;string.h&gt;
+#include &lt;fcntl.h&gt;
+#include &lt;errno.h&gt;
+#include &lt;sys/ioctl.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;sys/time.h&gt;
+#include &lt;sys/mman.h&gt;
+#include &lt;linux/videodev2.h&gt;
+#include "../libv4l/include/libv4l2.h"
+
+#define CLEAR(x) memset(&amp;(x), 0, sizeof(x))
+
+struct buffer {
+ void *start;
+ size_t length;
+};
+
+static void xioctl(int fh, int request, void *arg)
+{
+ int r;
+
+ do {
+ r = v4l2_ioctl(fh, request, arg);
+ } while (r == -1 &amp;&amp; ((errno == EINTR) || (errno == EAGAIN)));
+
+ if (r == -1) {
+ fprintf(stderr, "error %d, %s\n", errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ struct <link linkend="v4l2-format">v4l2_format</link> fmt;
+ struct <link linkend="v4l2-buffer">v4l2_buffer</link> buf;
+ struct <link linkend="v4l2-requestbuffers">v4l2_requestbuffers</link> req;
+ enum <link linkend="v4l2-buf-type">v4l2_buf_type</link> type;
+ fd_set fds;
+ struct timeval tv;
+ int r, fd = -1;
+ unsigned int i, n_buffers;
+ char *dev_name = "/dev/video0";
+ char out_name[256];
+ FILE *fout;
+ struct buffer *buffers;
+
+ fd = v4l2_open(dev_name, O_RDWR | O_NONBLOCK, 0);
+ if (fd &lt; 0) {
+ perror("Cannot open device");
+ exit(EXIT_FAILURE);
+ }
+
+ CLEAR(fmt);
+ fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ fmt.fmt.pix.width = 640;
+ fmt.fmt.pix.height = 480;
+ fmt.fmt.pix.pixelformat = V4L2_PIX_FMT_RGB24;
+ fmt.fmt.pix.field = V4L2_FIELD_INTERLACED;
+ xioctl(fd, VIDIOC_S_FMT, &amp;fmt);
+ if (fmt.fmt.pix.pixelformat != V4L2_PIX_FMT_RGB24) {
+ printf("Libv4l didn't accept RGB24 format. Can't proceed.\n");
+ exit(EXIT_FAILURE);
+ }
+ if ((fmt.fmt.pix.width != 640) || (fmt.fmt.pix.height != 480))
+ printf("Warning: driver is sending image at %dx%d\n",
+ fmt.fmt.pix.width, fmt.fmt.pix.height);
+
+ CLEAR(req);
+ req.count = 2;
+ req.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ req.memory = V4L2_MEMORY_MMAP;
+ xioctl(fd, VIDIOC_REQBUFS, &amp;req);
+
+ buffers = calloc(req.count, sizeof(*buffers));
+ for (n_buffers = 0; n_buffers &lt; req.count; ++n_buffers) {
+ CLEAR(buf);
+
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_MMAP;
+ buf.index = n_buffers;
+
+ xioctl(fd, VIDIOC_QUERYBUF, &amp;buf);
+
+ buffers[n_buffers].length = buf.length;
+ buffers[n_buffers].start = v4l2_mmap(NULL, buf.length,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ fd, buf.m.offset);
+
+ if (MAP_FAILED == buffers[n_buffers].start) {
+ perror("mmap");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (i = 0; i &lt; n_buffers; ++i) {
+ CLEAR(buf);
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_MMAP;
+ buf.index = i;
+ xioctl(fd, VIDIOC_QBUF, &amp;buf);
+ }
+ type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+
+ xioctl(fd, VIDIOC_STREAMON, &amp;type);
+ for (i = 0; i &lt; 20; i++) {
+ do {
+ FD_ZERO(&amp;fds);
+ FD_SET(fd, &amp;fds);
+
+ /* Timeout. */
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+
+ r = select(fd + 1, &amp;fds, NULL, NULL, &amp;tv);
+ } while ((r == -1 &amp;&amp; (errno = EINTR)));
+ if (r == -1) {
+ perror("select");
+ return errno;
+ }
+
+ CLEAR(buf);
+ buf.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ buf.memory = V4L2_MEMORY_MMAP;
+ xioctl(fd, VIDIOC_DQBUF, &amp;buf);
+
+ sprintf(out_name, "out%03d.ppm", i);
+ fout = fopen(out_name, "w");
+ if (!fout) {
+ perror("Cannot open image");
+ exit(EXIT_FAILURE);
+ }
+ fprintf(fout, "P6\n%d %d 255\n",
+ fmt.fmt.pix.width, fmt.fmt.pix.height);
+ fwrite(buffers[buf.index].start, buf.bytesused, 1, fout);
+ fclose(fout);
+
+ xioctl(fd, VIDIOC_QBUF, &amp;buf);
+ }
+
+ type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ xioctl(fd, VIDIOC_STREAMOFF, &amp;type);
+ for (i = 0; i &lt; n_buffers; ++i)
+ v4l2_munmap(buffers[i].start, buffers[i].length);
+ v4l2_close(fd);
+
+ return 0;
+}
+</programlisting>
diff --git a/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml b/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml
new file mode 100644
index 000000000..9b700a5f4
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-create-bufs.xml
@@ -0,0 +1,165 @@
+<refentry id="vidioc-create-bufs">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_CREATE_BUFS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_CREATE_BUFS</refname>
+ <refpurpose>Create buffers for Memory Mapped or User Pointer or DMA Buffer
+ I/O</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_create_buffers *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_CREATE_BUFS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>This ioctl is used to create buffers for <link linkend="mmap">memory
+mapped</link> or <link linkend="userp">user pointer</link> or <link
+linkend="dmabuf">DMA buffer</link> I/O. It can be used as an alternative or in
+addition to the <constant>VIDIOC_REQBUFS</constant> ioctl, when a tighter
+control over buffers is required. This ioctl can be called multiple times to
+create buffers of different sizes.</para>
+
+ <para>To allocate the device buffers applications must initialize the
+relevant fields of the <structname>v4l2_create_buffers</structname> structure.
+The <structfield>count</structfield> field must be set to the number of
+requested buffers, the <structfield>memory</structfield> field specifies the
+requested I/O method and the <structfield>reserved</structfield> array must be
+zeroed.</para>
+
+ <para>The <structfield>format</structfield> field specifies the image format
+that the buffers must be able to handle. The application has to fill in this
+&v4l2-format;. Usually this will be done using the
+<constant>VIDIOC_TRY_FMT</constant> or <constant>VIDIOC_G_FMT</constant> ioctl()
+to ensure that the requested format is supported by the driver. Unsupported
+formats will result in an error.</para>
+
+ <para>The buffers created by this ioctl will have as minimum size the size
+defined by the <structfield>format.pix.sizeimage</structfield> field. If the
+<structfield>format.pix.sizeimage</structfield> field is less than the minimum
+required for the given format, then <structfield>sizeimage</structfield> will be
+increased by the driver to that minimum to allocate the buffers. If it is
+larger, then the value will be used as-is. The same applies to the
+<structfield>sizeimage</structfield> field of the
+<structname>v4l2_plane_pix_format</structname> structure in the case of
+multiplanar formats.</para>
+
+ <para>When the ioctl is called with a pointer to this structure the driver
+will attempt to allocate up to the requested number of buffers and store the
+actual number allocated and the starting index in the
+<structfield>count</structfield> and the <structfield>index</structfield> fields
+respectively. On return <structfield>count</structfield> can be smaller than
+the number requested. The driver may also increase buffer sizes if required,
+however, it will not update <structfield>sizeimage</structfield> field values.
+The user has to use <constant>VIDIOC_QUERYBUF</constant> to retrieve that
+information.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-create-buffers">
+ <title>struct <structname>v4l2_create_buffers</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>The starting buffer index, returned by the driver.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>count</structfield></entry>
+ <entry>The number of buffers requested or granted. If count == 0, then
+ <constant>VIDIOC_CREATE_BUFS</constant> will set <structfield>index</structfield>
+ to the current number of created buffers, and it will check the validity of
+ <structfield>memory</structfield> and <structfield>format.type</structfield>.
+ If those are invalid -1 is returned and errno is set to &EINVAL;,
+ otherwise <constant>VIDIOC_CREATE_BUFS</constant> returns 0. It will
+ never set errno to &EBUSY; in this particular case.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>memory</structfield></entry>
+ <entry>Applications set this field to
+<constant>V4L2_MEMORY_MMAP</constant>,
+<constant>V4L2_MEMORY_DMABUF</constant> or
+<constant>V4L2_MEMORY_USERPTR</constant>. See <xref linkend="v4l2-memory"
+/></entry>
+ </row>
+ <row>
+ <entry>&v4l2-format;</entry>
+ <entry><structfield>format</structfield></entry>
+ <entry>Filled in by the application, preserved by the driver.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>A place holder for future extensions.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>ENOMEM</errorcode></term>
+ <listitem>
+ <para>No memory to allocate buffers for <link linkend="mmap">memory
+mapped</link> I/O.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The buffer type (<structfield>format.type</structfield> field),
+requested I/O method (<structfield>memory</structfield>) or format
+(<structfield>format</structfield> field) is not valid.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-cropcap.xml b/Documentation/DocBook/media/v4l/vidioc-cropcap.xml
new file mode 100644
index 000000000..50cb940cb
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-cropcap.xml
@@ -0,0 +1,166 @@
+<refentry id="vidioc-cropcap">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_CROPCAP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_CROPCAP</refname>
+ <refpurpose>Information about the video cropping and scaling abilities</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_cropcap
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_CROPCAP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Applications use this function to query the cropping
+limits, the pixel aspect of images and to calculate scale factors.
+They set the <structfield>type</structfield> field of a v4l2_cropcap
+structure to the respective buffer (stream) type and call the
+<constant>VIDIOC_CROPCAP</constant> ioctl with a pointer to this
+structure. Drivers fill the rest of the structure. The results are
+constant except when switching the video standard. Remember this
+switch can occur implicit when switching the video input or
+output.</para>
+
+<para>Do not use the multiplanar buffer types. Use <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>
+instead of <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant>
+and use <constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> instead of
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant>.</para>
+
+ <para>This ioctl must be implemented for video capture or output devices that
+support cropping and/or scaling and/or have non-square pixels, and for overlay devices.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-cropcap">
+ <title>struct <structname>v4l2_cropcap</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the data stream, set by the application.
+Only these types are valid here:
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>,
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> and
+<constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant>. See <xref linkend="v4l2-buf-type" />.</entry>
+ </row>
+ <row>
+ <entry>struct <link linkend="v4l2-rect-crop">v4l2_rect</link></entry>
+ <entry><structfield>bounds</structfield></entry>
+ <entry>Defines the window within capturing or output is
+possible, this may exclude for example the horizontal and vertical
+blanking areas. The cropping rectangle cannot exceed these limits.
+Width and height are defined in pixels, the driver writer is free to
+choose origin and units of the coordinate system in the analog
+domain.</entry>
+ </row>
+ <row>
+ <entry>struct <link linkend="v4l2-rect-crop">v4l2_rect</link></entry>
+ <entry><structfield>defrect</structfield></entry>
+ <entry>Default cropping rectangle, it shall cover the
+"whole picture". Assuming pixel aspect 1/1 this could be for example a
+640&nbsp;&times;&nbsp;480 rectangle for NTSC, a
+768&nbsp;&times;&nbsp;576 rectangle for PAL and SECAM centered over
+the active picture area. The same co-ordinate system as for
+ <structfield>bounds</structfield> is used.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>pixelaspect</structfield></entry>
+ <entry><para>This is the pixel aspect (y / x) when no
+scaling is applied, the ratio of the actual sampling
+frequency and the frequency required to get square
+pixels.</para><para>When cropping coordinates refer to square pixels,
+the driver sets <structfield>pixelaspect</structfield> to 1/1. Other
+common values are 54/59 for PAL and SECAM, 11/10 for NTSC sampled
+according to [<xref linkend="itu601" />].</para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- NB this table is duplicated in the overlay chapter. -->
+
+ <table pgwide="1" frame="none" id="v4l2-rect-crop">
+ <title>struct <structname>v4l2_rect</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>left</structfield></entry>
+ <entry>Horizontal offset of the top, left corner of the
+rectangle, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>top</structfield></entry>
+ <entry>Vertical offset of the top, left corner of the
+rectangle, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Width of the rectangle, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Height of the rectangle, in pixels.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-cropcap; <structfield>type</structfield> is
+invalid.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml
new file mode 100644
index 000000000..4c4603c13
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-chip-info.xml
@@ -0,0 +1,207 @@
+<refentry id="vidioc-dbg-g-chip-info">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_DBG_G_CHIP_INFO</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_DBG_G_CHIP_INFO</refname>
+ <refpurpose>Identify the chips on a TV card</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_dbg_chip_info
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_DBG_G_CHIP_INFO</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link
+linkend="experimental">experimental</link> interface and may change in
+the future.</para>
+ </note>
+
+ <para>For driver debugging purposes this ioctl allows test
+applications to query the driver about the chips present on the TV
+card. Regular applications must not use it. When you found a chip
+specific bug, please contact the linux-media mailing list (&v4l-ml;)
+so it can be fixed.</para>
+
+ <para>Additionally the Linux kernel must be compiled with the
+<constant>CONFIG_VIDEO_ADV_DEBUG</constant> option to enable this ioctl.</para>
+
+ <para>To query the driver applications must initialize the
+<structfield>match.type</structfield> and
+<structfield>match.addr</structfield> or <structfield>match.name</structfield>
+fields of a &v4l2-dbg-chip-info;
+and call <constant>VIDIOC_DBG_G_CHIP_INFO</constant> with a pointer to
+this structure. On success the driver stores information about the
+selected chip in the <structfield>name</structfield> and
+<structfield>flags</structfield> fields.</para>
+
+ <para>When <structfield>match.type</structfield> is
+<constant>V4L2_CHIP_MATCH_BRIDGE</constant>,
+<structfield>match.addr</structfield> selects the nth bridge 'chip'
+on the TV card. You can enumerate all chips by starting at zero and
+incrementing <structfield>match.addr</structfield> by one until
+<constant>VIDIOC_DBG_G_CHIP_INFO</constant> fails with an &EINVAL;.
+The number zero always selects the bridge chip itself, &eg; the chip
+connected to the PCI or USB bus. Non-zero numbers identify specific
+parts of the bridge chip such as an AC97 register block.</para>
+
+ <para>When <structfield>match.type</structfield> is
+<constant>V4L2_CHIP_MATCH_SUBDEV</constant>,
+<structfield>match.addr</structfield> selects the nth sub-device. This
+allows you to enumerate over all sub-devices.</para>
+
+ <para>On success, the <structfield>name</structfield> field will
+contain a chip name and the <structfield>flags</structfield> field will
+contain <constant>V4L2_CHIP_FL_READABLE</constant> if the driver supports
+reading registers from the device or <constant>V4L2_CHIP_FL_WRITABLE</constant>
+if the driver supports writing registers to the device.</para>
+
+ <para>We recommended the <application>v4l2-dbg</application>
+utility over calling this ioctl directly. It is available from the
+LinuxTV v4l-dvb repository; see <ulink
+url="http://linuxtv.org/repo/">http://linuxtv.org/repo/</ulink> for
+access instructions.</para>
+
+ <!-- Note for convenience vidioc-dbg-g-register.sgml
+ contains a duplicate of this table. -->
+ <table pgwide="1" frame="none" id="name-v4l2-dbg-match">
+ <title>struct <structname>v4l2_dbg_match</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>See <xref linkend="name-chip-match-types" /> for a list of
+possible types.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry>(anonymous)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>addr</structfield></entry>
+ <entry>Match a chip by this number, interpreted according
+to the <structfield>type</structfield> field.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>char</entry>
+ <entry><structfield>name[32]</structfield></entry>
+ <entry>Match a chip by this name, interpreted according
+to the <structfield>type</structfield> field. Currently unused.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-dbg-chip-info">
+ <title>struct <structname>v4l2_dbg_chip_info</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>struct v4l2_dbg_match</entry>
+ <entry><structfield>match</structfield></entry>
+ <entry>How to match the chip, see <xref linkend="name-v4l2-dbg-match" />.</entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>name[32]</structfield></entry>
+ <entry>The name of the chip.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Set by the driver. If <constant>V4L2_CHIP_FL_READABLE</constant>
+is set, then the driver supports reading registers from the device. If
+<constant>V4L2_CHIP_FL_WRITABLE</constant> is set, then it supports writing registers.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved[8]</structfield></entry>
+ <entry>Reserved fields, both application and driver must set these to 0.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- Note for convenience vidioc-dbg-g-register.sgml
+ contains a duplicate of this table. -->
+ <table pgwide="1" frame="none" id="name-chip-match-types">
+ <title>Chip Match Types</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CHIP_MATCH_BRIDGE</constant></entry>
+ <entry>0</entry>
+ <entry>Match the nth chip on the card, zero for the
+ bridge chip. Does not match sub-devices.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
+ <entry>4</entry>
+ <entry>Match the nth sub-device.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <structfield>match_type</structfield> is invalid or
+no device could be matched.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml b/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
new file mode 100644
index 000000000..3d038e75d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-dbg-g-register.xml
@@ -0,0 +1,227 @@
+<refentry id="vidioc-dbg-g-register">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_DBG_G_REGISTER, VIDIOC_DBG_S_REGISTER</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_DBG_G_REGISTER</refname>
+ <refname>VIDIOC_DBG_S_REGISTER</refname>
+ <refpurpose>Read or write hardware registers</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_dbg_register *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_dbg_register
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_DBG_G_REGISTER, VIDIOC_DBG_S_REGISTER</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+
+ <para>This is an <link linkend="experimental">experimental</link>
+interface and may change in the future.</para>
+ </note>
+
+ <para>For driver debugging purposes these ioctls allow test
+applications to access hardware registers directly. Regular
+applications must not use them.</para>
+
+ <para>Since writing or even reading registers can jeopardize the
+system security, its stability and damage the hardware, both ioctls
+require superuser privileges. Additionally the Linux kernel must be
+compiled with the <constant>CONFIG_VIDEO_ADV_DEBUG</constant> option
+to enable these ioctls.</para>
+
+ <para>To write a register applications must initialize all fields
+of a &v4l2-dbg-register; except for <structfield>size</structfield> and call
+<constant>VIDIOC_DBG_S_REGISTER</constant> with a pointer to this
+structure. The <structfield>match.type</structfield> and
+<structfield>match.addr</structfield> or <structfield>match.name</structfield>
+fields select a chip on the TV
+card, the <structfield>reg</structfield> field specifies a register
+number and the <structfield>val</structfield> field the value to be
+written into the register.</para>
+
+ <para>To read a register applications must initialize the
+<structfield>match.type</structfield>,
+<structfield>match.addr</structfield> or <structfield>match.name</structfield> and
+<structfield>reg</structfield> fields, and call
+<constant>VIDIOC_DBG_G_REGISTER</constant> with a pointer to this
+structure. On success the driver stores the register value in the
+<structfield>val</structfield> field and the size (in bytes) of the
+value in <structfield>size</structfield>.</para>
+
+ <para>When <structfield>match.type</structfield> is
+<constant>V4L2_CHIP_MATCH_BRIDGE</constant>,
+<structfield>match.addr</structfield> selects the nth non-sub-device chip
+on the TV card. The number zero always selects the host chip, &eg; the
+chip connected to the PCI or USB bus. You can find out which chips are
+present with the &VIDIOC-DBG-G-CHIP-INFO; ioctl.</para>
+
+ <para>When <structfield>match.type</structfield> is
+<constant>V4L2_CHIP_MATCH_SUBDEV</constant>,
+<structfield>match.addr</structfield> selects the nth sub-device.</para>
+
+ <para>These ioctls are optional, not all drivers may support them.
+However when a driver supports these ioctls it must also support
+&VIDIOC-DBG-G-CHIP-INFO;. Conversely it may support
+<constant>VIDIOC_DBG_G_CHIP_INFO</constant> but not these ioctls.</para>
+
+ <para><constant>VIDIOC_DBG_G_REGISTER</constant> and
+<constant>VIDIOC_DBG_S_REGISTER</constant> were introduced in Linux
+2.6.21, but their API was changed to the one described here in kernel 2.6.29.</para>
+
+ <para>We recommended the <application>v4l2-dbg</application>
+utility over calling these ioctls directly. It is available from the
+LinuxTV v4l-dvb repository; see <ulink
+url="http://linuxtv.org/repo/">http://linuxtv.org/repo/</ulink> for
+access instructions.</para>
+
+ <!-- Note for convenience vidioc-dbg-g-chip-info.sgml
+ contains a duplicate of this table. -->
+ <table pgwide="1" frame="none" id="v4l2-dbg-match">
+ <title>struct <structname>v4l2_dbg_match</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>See <xref linkend="chip-match-types" /> for a list of
+possible types.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry>(anonymous)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>addr</structfield></entry>
+ <entry>Match a chip by this number, interpreted according
+to the <structfield>type</structfield> field.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>char</entry>
+ <entry><structfield>name[32]</structfield></entry>
+ <entry>Match a chip by this name, interpreted according
+to the <structfield>type</structfield> field. Currently unused.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+
+ <table pgwide="1" frame="none" id="v4l2-dbg-register">
+ <title>struct <structname>v4l2_dbg_register</structname></title>
+ <tgroup cols="4">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c4" />
+ <tbody valign="top">
+ <row>
+ <entry>struct v4l2_dbg_match</entry>
+ <entry><structfield>match</structfield></entry>
+ <entry>How to match the chip, see <xref linkend="v4l2-dbg-match" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>size</structfield></entry>
+ <entry>The register size in bytes.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>reg</structfield></entry>
+ <entry>A register number.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>val</structfield></entry>
+ <entry>The value read from, or to be written into the
+register.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- Note for convenience vidioc-dbg-g-chip-info.sgml
+ contains a duplicate of this table. -->
+ <table pgwide="1" frame="none" id="chip-match-types">
+ <title>Chip Match Types</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CHIP_MATCH_BRIDGE</constant></entry>
+ <entry>0</entry>
+ <entry>Match the nth chip on the card, zero for the
+ bridge chip. Does not match sub-devices.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CHIP_MATCH_SUBDEV</constant></entry>
+ <entry>4</entry>
+ <entry>Match the nth sub-device.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EPERM</errorcode></term>
+ <listitem>
+ <para>Insufficient permissions. Root privileges are required
+to execute these ioctls.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-decoder-cmd.xml b/Documentation/DocBook/media/v4l/vidioc-decoder-cmd.xml
new file mode 100644
index 000000000..9215627b0
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-decoder-cmd.xml
@@ -0,0 +1,249 @@
+<refentry id="vidioc-decoder-cmd">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_DECODER_CMD, VIDIOC_TRY_DECODER_CMD</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_DECODER_CMD</refname>
+ <refname>VIDIOC_TRY_DECODER_CMD</refname>
+ <refpurpose>Execute an decoder command</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_decoder_cmd *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_DECODER_CMD, VIDIOC_TRY_DECODER_CMD</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>These ioctls control an audio/video (usually MPEG-) decoder.
+<constant>VIDIOC_DECODER_CMD</constant> sends a command to the
+decoder, <constant>VIDIOC_TRY_DECODER_CMD</constant> can be used to
+try a command without actually executing it. To send a command applications
+must initialize all fields of a &v4l2-decoder-cmd; and call
+<constant>VIDIOC_DECODER_CMD</constant> or <constant>VIDIOC_TRY_DECODER_CMD</constant>
+with a pointer to this structure.</para>
+
+ <para>The <structfield>cmd</structfield> field must contain the
+command code. Some commands use the <structfield>flags</structfield> field for
+additional information.
+</para>
+
+ <para>A <function>write</function>() or &VIDIOC-STREAMON; call sends an implicit
+START command to the decoder if it has not been started yet.
+</para>
+
+ <para>A <function>close</function>() or &VIDIOC-STREAMOFF; call of a streaming
+file descriptor sends an implicit immediate STOP command to the decoder, and all
+buffered data is discarded.</para>
+
+ <para>These ioctls are optional, not all drivers may support
+them. They were introduced in Linux 3.3.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-decoder-cmd">
+ <title>struct <structname>v4l2_decoder_cmd</structname></title>
+ <tgroup cols="5">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>cmd</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>The decoder command, see <xref linkend="decoder-cmds" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Flags to go with the command. If no flags are defined for
+this command, drivers and applications must set this field to zero.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry>(anonymous)</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct</entry>
+ <entry><structfield>start</structfield></entry>
+ <entry></entry>
+ <entry>Structure containing additional data for the
+<constant>V4L2_DEC_CMD_START</constant> command.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>__s32</entry>
+ <entry><structfield>speed</structfield></entry>
+ <entry>Playback speed and direction. The playback speed is defined as
+<structfield>speed</structfield>/1000 of the normal speed. So 1000 is normal playback.
+Negative numbers denote reverse playback, so -1000 does reverse playback at normal
+speed. Speeds -1, 0 and 1 have special meanings: speed 0 is shorthand for 1000
+(normal playback). A speed of 1 steps just one frame forward, a speed of -1 steps
+just one frame back.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>format</structfield></entry>
+ <entry>Format restrictions. This field is set by the driver, not the
+application. Possible values are <constant>V4L2_DEC_START_FMT_NONE</constant> if
+there are no format restrictions or <constant>V4L2_DEC_START_FMT_GOP</constant>
+if the decoder operates on full GOPs (<wordasword>Group Of Pictures</wordasword>).
+This is usually the case for reverse playback: the decoder needs full GOPs, which
+it can then play in reverse order. So to implement reverse playback the application
+must feed the decoder the last GOP in the video file, then the GOP before that, etc. etc.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct</entry>
+ <entry><structfield>stop</structfield></entry>
+ <entry></entry>
+ <entry>Structure containing additional data for the
+<constant>V4L2_DEC_CMD_STOP</constant> command.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>__u64</entry>
+ <entry><structfield>pts</structfield></entry>
+ <entry>Stop playback at this <structfield>pts</structfield> or immediately
+if the playback is already past that timestamp. Leave to 0 if you want to stop after the
+last frame was decoded.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>struct</entry>
+ <entry><structfield>raw</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>data</structfield>[16]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="decoder-cmds">
+ <title>Decoder Commands</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_DEC_CMD_START</constant></entry>
+ <entry>0</entry>
+ <entry>Start the decoder. When the decoder is already
+running or paused, this command will just change the playback speed.
+That means that calling <constant>V4L2_DEC_CMD_START</constant> when
+the decoder was paused will <emphasis>not</emphasis> resume the decoder.
+You have to explicitly call <constant>V4L2_DEC_CMD_RESUME</constant> for that.
+This command has one flag:
+<constant>V4L2_DEC_CMD_START_MUTE_AUDIO</constant>. If set, then audio will
+be muted when playing back at a non-standard speed.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DEC_CMD_STOP</constant></entry>
+ <entry>1</entry>
+ <entry>Stop the decoder. When the decoder is already stopped,
+this command does nothing. This command has two flags:
+if <constant>V4L2_DEC_CMD_STOP_TO_BLACK</constant> is set, then the decoder will
+set the picture to black after it stopped decoding. Otherwise the last image will
+repeat. If <constant>V4L2_DEC_CMD_STOP_IMMEDIATELY</constant> is set, then the decoder
+stops immediately (ignoring the <structfield>pts</structfield> value), otherwise it
+will keep decoding until timestamp >= pts or until the last of the pending data from
+its internal buffers was decoded.
+</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DEC_CMD_PAUSE</constant></entry>
+ <entry>2</entry>
+ <entry>Pause the decoder. When the decoder has not been
+started yet, the driver will return an &EPERM;. When the decoder is
+already paused, this command does nothing. This command has one flag:
+if <constant>V4L2_DEC_CMD_PAUSE_TO_BLACK</constant> is set, then set the
+decoder output to black when paused.
+</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_DEC_CMD_RESUME</constant></entry>
+ <entry>3</entry>
+ <entry>Resume decoding after a PAUSE command. When the
+decoder has not been started yet, the driver will return an &EPERM;.
+When the decoder is already running, this command does nothing. No
+flags are defined for this command.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <structfield>cmd</structfield> field is invalid.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EPERM</errorcode></term>
+ <listitem>
+ <para>The application sent a PAUSE or RESUME command when
+the decoder was not running.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dqevent.xml b/Documentation/DocBook/media/v4l/vidioc-dqevent.xml
new file mode 100644
index 000000000..50ccd3394
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-dqevent.xml
@@ -0,0 +1,468 @@
+<refentry id="vidioc-dqevent">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_DQEVENT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_DQEVENT</refname>
+ <refpurpose>Dequeue event</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_event
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_DQEVENT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Dequeue an event from a video device. No input is required
+ for this ioctl. All the fields of the &v4l2-event; structure are
+ filled by the driver. The file handle will also receive exceptions
+ which the application may get by e.g. using the select system
+ call.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-event">
+ <title>struct <structname>v4l2_event</structname></title>
+ <tgroup cols="4">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>Type of the event, see <xref linkend="event-type" />.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield>u</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-event-vsync;</entry>
+ <entry><structfield>vsync</structfield></entry>
+ <entry>Event data for event <constant>V4L2_EVENT_VSYNC</constant>.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-event-ctrl;</entry>
+ <entry><structfield>ctrl</structfield></entry>
+ <entry>Event data for event <constant>V4L2_EVENT_CTRL</constant>.
+ </entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-event-frame-sync;</entry>
+ <entry><structfield>frame_sync</structfield></entry>
+ <entry>Event data for event
+ <constant>V4L2_EVENT_FRAME_SYNC</constant>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-event-motion-det;</entry>
+ <entry><structfield>motion_det</structfield></entry>
+ <entry>Event data for event V4L2_EVENT_MOTION_DET.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-event-src-change;</entry>
+ <entry><structfield>src_change</structfield></entry>
+ <entry>Event data for event V4L2_EVENT_SOURCE_CHANGE.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u8</entry>
+ <entry><structfield>data</structfield>[64]</entry>
+ <entry>Event data. Defined by the event type. The union
+ should be used to define easily accessible type for
+ events.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pending</structfield></entry>
+ <entry></entry>
+ <entry>Number of pending events excluding this one.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>sequence</structfield></entry>
+ <entry></entry>
+ <entry>Event sequence number. The sequence number is
+ incremented for every subscribed event that takes place.
+ If sequence numbers are not contiguous it means that
+ events have been lost.
+ </entry>
+ </row>
+ <row>
+ <entry>struct timespec</entry>
+ <entry><structfield>timestamp</structfield></entry>
+ <entry></entry>
+ <entry>Event timestamp.</entry>
+ </row>
+ <row>
+ <entry>u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry></entry>
+ <entry>The ID associated with the event source. If the event does not
+ have an associated ID (this depends on the event type), then this
+ is 0.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry></entry>
+ <entry>Reserved for future extensions. Drivers must set
+ the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="event-type">
+ <title>Event Types</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_EVENT_ALL</constant></entry>
+ <entry>0</entry>
+ <entry>All events. V4L2_EVENT_ALL is valid only for
+ VIDIOC_UNSUBSCRIBE_EVENT for unsubscribing all events at once.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_VSYNC</constant></entry>
+ <entry>1</entry>
+ <entry>This event is triggered on the vertical sync.
+ This event has a &v4l2-event-vsync; associated with it.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_EOS</constant></entry>
+ <entry>2</entry>
+ <entry>This event is triggered when the end of a stream is reached.
+ This is typically used with MPEG decoders to report to the application
+ when the last of the MPEG stream has been decoded.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_CTRL</constant></entry>
+ <entry>3</entry>
+ <entry><para>This event requires that the <structfield>id</structfield>
+ matches the control ID from which you want to receive events.
+ This event is triggered if the control's value changes, if a
+ button control is pressed or if the control's flags change.
+ This event has a &v4l2-event-ctrl; associated with it. This struct
+ contains much of the same information as &v4l2-queryctrl; and
+ &v4l2-control;.</para>
+
+ <para>If the event is generated due to a call to &VIDIOC-S-CTRL; or
+ &VIDIOC-S-EXT-CTRLS;, then the event will <emphasis>not</emphasis> be sent to
+ the file handle that called the ioctl function. This prevents
+ nasty feedback loops. If you <emphasis>do</emphasis> want to get the
+ event, then set the <constant>V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK</constant>
+ flag.
+ </para>
+
+ <para>This event type will ensure that no information is lost when
+ more events are raised than there is room internally. In that
+ case the &v4l2-event-ctrl; of the second-oldest event is kept,
+ but the <structfield>changes</structfield> field of the
+ second-oldest event is ORed with the <structfield>changes</structfield>
+ field of the oldest event.</para>
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_FRAME_SYNC</constant></entry>
+ <entry>4</entry>
+ <entry>
+ <para>Triggered immediately when the reception of a
+ frame has begun. This event has a
+ &v4l2-event-frame-sync; associated with it.</para>
+
+ <para>If the hardware needs to be stopped in the case of a
+ buffer underrun it might not be able to generate this event.
+ In such cases the <structfield>frame_sequence</structfield>
+ field in &v4l2-event-frame-sync; will not be incremented. This
+ causes two consecutive frame sequence numbers to have n times
+ frame interval in between them.</para>
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_SOURCE_CHANGE</constant></entry>
+ <entry>5</entry>
+ <entry>
+ <para>This event is triggered when a source parameter change is
+ detected during runtime by the video device. It can be a
+ runtime resolution change triggered by a video decoder or the
+ format change happening on an input connector.
+ This event requires that the <structfield>id</structfield>
+ matches the input index (when used with a video device node)
+ or the pad index (when used with a subdevice node) from which
+ you want to receive events.</para>
+
+ <para>This event has a &v4l2-event-src-change; associated
+ with it. The <structfield>changes</structfield> bitfield denotes
+ what has changed for the subscribed pad. If multiple events
+ occurred before application could dequeue them, then the changes
+ will have the ORed value of all the events generated.</para>
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_MOTION_DET</constant></entry>
+ <entry>6</entry>
+ <entry>
+ <para>Triggered whenever the motion detection state for one or more of the regions
+ changes. This event has a &v4l2-event-motion-det; associated with it.</para>
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_PRIVATE_START</constant></entry>
+ <entry>0x08000000</entry>
+ <entry>Base event number for driver-private events.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-event-vsync">
+ <title>struct <structname>v4l2_event_vsync</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>The upcoming field. See &v4l2-field;.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-event-ctrl">
+ <title>struct <structname>v4l2_event_ctrl</structname></title>
+ <tgroup cols="4">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>changes</structfield></entry>
+ <entry></entry>
+ <entry>A bitmask that tells what has changed. See <xref linkend="ctrl-changes-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>The type of the control. See &v4l2-ctrl-type;.</entry>
+ </row>
+ <row>
+ <entry>union (anonymous)</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__s32</entry>
+ <entry><structfield>value</structfield></entry>
+ <entry>The 32-bit value of the control for 32-bit control types.
+ This is 0 for string controls since the value of a string
+ cannot be passed using &VIDIOC-DQEVENT;.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__s64</entry>
+ <entry><structfield>value64</structfield></entry>
+ <entry>The 64-bit value of the control for 64-bit control types.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry></entry>
+ <entry>The control flags. See <xref linkend="control-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>minimum</structfield></entry>
+ <entry></entry>
+ <entry>The minimum value of the control. See &v4l2-queryctrl;.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>maximum</structfield></entry>
+ <entry></entry>
+ <entry>The maximum value of the control. See &v4l2-queryctrl;.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>step</structfield></entry>
+ <entry></entry>
+ <entry>The step value of the control. See &v4l2-queryctrl;.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>default_value</structfield></entry>
+ <entry></entry>
+ <entry>The default value value of the control. See &v4l2-queryctrl;.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-event-frame-sync">
+ <title>struct <structname>v4l2_event_frame_sync</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>frame_sequence</structfield></entry>
+ <entry>
+ The sequence number of the frame being received.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-event-src-change">
+ <title>struct <structname>v4l2_event_src_change</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>changes</structfield></entry>
+ <entry>
+ A bitmask that tells what has changed. See <xref linkend="src-changes-flags" />.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="v4l2-event-motion-det">
+ <title>struct <structname>v4l2_event_motion_det</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>
+ Currently only one flag is available: if <constant>V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ</constant>
+ is set, then the <structfield>frame_sequence</structfield> field is valid,
+ otherwise that field should be ignored.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>frame_sequence</structfield></entry>
+ <entry>
+ The sequence number of the frame being received. Only valid if the
+ <constant>V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ</constant> flag was set.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>region_mask</structfield></entry>
+ <entry>
+ The bitmask of the regions that reported motion. There is at least one
+ region. If this field is 0, then no motion was detected at all.
+ If there is no <constant>V4L2_CID_DETECT_MD_REGION_GRID</constant> control
+ (see <xref linkend="detect-controls" />) to assign a different region
+ to each cell in the motion detection grid, then that all cells
+ are automatically assigned to the default region 0.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="ctrl-changes-flags">
+ <title>Control Changes</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_EVENT_CTRL_CH_VALUE</constant></entry>
+ <entry>0x0001</entry>
+ <entry>This control event was triggered because the value of the control
+ changed. Special cases: Volatile controls do no generate this event;
+ If a control has the <constant>V4L2_CTRL_FLAG_EXECUTE_ON_WRITE</constant>
+ flag set, then this event is sent as well, regardless its value.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_CTRL_CH_FLAGS</constant></entry>
+ <entry>0x0002</entry>
+ <entry>This control event was triggered because the control flags
+ changed.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_CTRL_CH_RANGE</constant></entry>
+ <entry>0x0004</entry>
+ <entry>This control event was triggered because the minimum,
+ maximum, step or the default value of the control changed.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="src-changes-flags">
+ <title>Source Changes</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_EVENT_SRC_CH_RESOLUTION</constant></entry>
+ <entry>0x0001</entry>
+ <entry>This event gets triggered when a resolution change is
+ detected at an input. This can come from an input connector or
+ from a video decoder.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml b/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml
new file mode 100644
index 000000000..a2017bfca
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-dv-timings-cap.xml
@@ -0,0 +1,214 @@
+<refentry id="vidioc-dv-timings-cap">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_DV_TIMINGS_CAP, VIDIOC_SUBDEV_DV_TIMINGS_CAP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_DV_TIMINGS_CAP</refname>
+ <refname>VIDIOC_SUBDEV_DV_TIMINGS_CAP</refname>
+ <refpurpose>The capabilities of the Digital Video receiver/transmitter</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_dv_timings_cap *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_DV_TIMINGS_CAP, VIDIOC_SUBDEV_DV_TIMINGS_CAP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>To query the capabilities of the DV receiver/transmitter applications
+can call the <constant>VIDIOC_DV_TIMINGS_CAP</constant> ioctl on a video node
+and the driver will fill in the structure. Note that drivers may return
+different values after switching the video input or output.</para>
+
+ <para>When implemented by the driver DV capabilities of subdevices can be
+queried by calling the <constant>VIDIOC_SUBDEV_DV_TIMINGS_CAP</constant> ioctl
+directly on a subdevice node. The capabilities are specific to inputs (for DV
+receivers) or outputs (for DV transmitters), applications must specify the
+desired pad number in the &v4l2-dv-timings-cap; <structfield>pad</structfield>
+field. Attempts to query capabilities on a pad that doesn't support them will
+return an &EINVAL;.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-bt-timings-cap">
+ <title>struct <structname>v4l2_bt_timings_cap</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>min_width</structfield></entry>
+ <entry>Minimum width of the active video in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>max_width</structfield></entry>
+ <entry>Maximum width of the active video in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>min_height</structfield></entry>
+ <entry>Minimum height of the active video in lines.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>max_height</structfield></entry>
+ <entry>Maximum height of the active video in lines.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>min_pixelclock</structfield></entry>
+ <entry>Minimum pixelclock frequency in Hz.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>max_pixelclock</structfield></entry>
+ <entry>Maximum pixelclock frequency in Hz.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>standards</structfield></entry>
+ <entry>The video standard(s) supported by the hardware.
+ See <xref linkend="dv-bt-standards"/> for a list of standards.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capabilities</structfield></entry>
+ <entry>Several flags giving more information about the capabilities.
+ See <xref linkend="dv-bt-cap-capabilities"/> for a description of the flags.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[16]</entry>
+ <entry>Reserved for future extensions. Drivers must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-dv-timings-cap">
+ <title>struct <structname>v4l2_dv_timings_cap</structname></title>
+ <tgroup cols="4">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of DV timings as listed in <xref linkend="dv-timing-types"/>.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API. This field
+ is only used when operating on a subdevice node. When operating on a
+ video node applications must set this field to zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions. Drivers must set the array to zero.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield></structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-bt-timings-cap;</entry>
+ <entry><structfield>bt</structfield></entry>
+ <entry>BT.656/1120 timings capabilities of the hardware.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>raw_data</structfield>[32]</entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="dv-bt-cap-capabilities">
+ <title>DV BT Timing capabilities</title>
+ <tgroup cols="2">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>Flag</entry>
+ <entry>Description</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_CAP_INTERLACED</entry>
+ <entry>Interlaced formats are supported.
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_CAP_PROGRESSIVE</entry>
+ <entry>Progressive formats are supported.
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_CAP_REDUCED_BLANKING</entry>
+ <entry>CVT/GTF specific: the timings can make use of reduced blanking (CVT)
+or the 'Secondary GTF' curve (GTF).
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_CAP_CUSTOM</entry>
+ <entry>Can support non-standard timings, i.e. timings not belonging to the
+standards set in the <structfield>standards</structfield> field.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-encoder-cmd.xml b/Documentation/DocBook/media/v4l/vidioc-encoder-cmd.xml
new file mode 100644
index 000000000..0619ca5d2
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-encoder-cmd.xml
@@ -0,0 +1,189 @@
+<refentry id="vidioc-encoder-cmd">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENCODER_CMD, VIDIOC_TRY_ENCODER_CMD</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENCODER_CMD</refname>
+ <refname>VIDIOC_TRY_ENCODER_CMD</refname>
+ <refpurpose>Execute an encoder command</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_encoder_cmd *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENCODER_CMD, VIDIOC_TRY_ENCODER_CMD</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>These ioctls control an audio/video (usually MPEG-) encoder.
+<constant>VIDIOC_ENCODER_CMD</constant> sends a command to the
+encoder, <constant>VIDIOC_TRY_ENCODER_CMD</constant> can be used to
+try a command without actually executing it.</para>
+
+ <para>To send a command applications must initialize all fields of a
+ &v4l2-encoder-cmd; and call
+ <constant>VIDIOC_ENCODER_CMD</constant> or
+ <constant>VIDIOC_TRY_ENCODER_CMD</constant> with a pointer to this
+ structure.</para>
+
+ <para>The <structfield>cmd</structfield> field must contain the
+command code. The <structfield>flags</structfield> field is currently
+only used by the STOP command and contains one bit: If the
+<constant>V4L2_ENC_CMD_STOP_AT_GOP_END</constant> flag is set,
+encoding will continue until the end of the current <wordasword>Group
+Of Pictures</wordasword>, otherwise it will stop immediately.</para>
+
+ <para>A <function>read</function>() or &VIDIOC-STREAMON; call sends an implicit
+START command to the encoder if it has not been started yet. After a STOP command,
+<function>read</function>() calls will read the remaining data
+buffered by the driver. When the buffer is empty,
+<function>read</function>() will return zero and the next
+<function>read</function>() call will restart the encoder.</para>
+
+ <para>A <function>close</function>() or &VIDIOC-STREAMOFF; call of a streaming
+file descriptor sends an implicit immediate STOP to the encoder, and all buffered
+data is discarded.</para>
+
+ <para>These ioctls are optional, not all drivers may support
+them. They were introduced in Linux 2.6.21.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-encoder-cmd">
+ <title>struct <structname>v4l2_encoder_cmd</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>cmd</structfield></entry>
+ <entry>The encoder command, see <xref linkend="encoder-cmds" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags to go with the command, see <xref
+ linkend="encoder-flags" />. If no flags are defined for
+this command, drivers and applications must set this field to
+zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>data</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="encoder-cmds">
+ <title>Encoder Commands</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_ENC_CMD_START</constant></entry>
+ <entry>0</entry>
+ <entry>Start the encoder. When the encoder is already
+running or paused, this command does nothing. No flags are defined for
+this command.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_ENC_CMD_STOP</constant></entry>
+ <entry>1</entry>
+ <entry>Stop the encoder. When the
+<constant>V4L2_ENC_CMD_STOP_AT_GOP_END</constant> flag is set,
+encoding will continue until the end of the current <wordasword>Group
+Of Pictures</wordasword>, otherwise encoding will stop immediately.
+When the encoder is already stopped, this command does
+nothing.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_ENC_CMD_PAUSE</constant></entry>
+ <entry>2</entry>
+ <entry>Pause the encoder. When the encoder has not been
+started yet, the driver will return an &EPERM;. When the encoder is
+already paused, this command does nothing. No flags are defined for
+this command.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_ENC_CMD_RESUME</constant></entry>
+ <entry>3</entry>
+ <entry>Resume encoding after a PAUSE command. When the
+encoder has not been started yet, the driver will return an &EPERM;.
+When the encoder is already running, this command does nothing. No
+flags are defined for this command.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="encoder-flags">
+ <title>Encoder Command Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_ENC_CMD_STOP_AT_GOP_END</constant></entry>
+ <entry>0x0001</entry>
+ <entry>Stop encoding at the end of the current <wordasword>Group Of
+Pictures</wordasword>, rather than immediately.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <structfield>cmd</structfield> field is invalid.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EPERM</errorcode></term>
+ <listitem>
+ <para>The application sent a PAUSE or RESUME command when
+the encoder was not running.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml b/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml
new file mode 100644
index 000000000..6e3cadd4e
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-dv-timings.xml
@@ -0,0 +1,133 @@
+<refentry id="vidioc-enum-dv-timings">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUM_DV_TIMINGS, VIDIOC_SUBDEV_ENUM_DV_TIMINGS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUM_DV_TIMINGS</refname>
+ <refname>VIDIOC_SUBDEV_ENUM_DV_TIMINGS</refname>
+ <refpurpose>Enumerate supported Digital Video timings</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_enum_dv_timings *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUM_DV_TIMINGS, VIDIOC_SUBDEV_ENUM_DV_TIMINGS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>While some DV receivers or transmitters support a wide range of timings, others
+support only a limited number of timings. With this ioctl applications can enumerate a list
+of known supported timings. Call &VIDIOC-DV-TIMINGS-CAP; to check if it also supports other
+standards or even custom timings that are not in this list.</para>
+
+ <para>To query the available timings, applications initialize the
+<structfield>index</structfield> field and zero the reserved array of &v4l2-enum-dv-timings;
+and call the <constant>VIDIOC_ENUM_DV_TIMINGS</constant> ioctl on a video node with a
+pointer to this structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the index is out of bounds. To enumerate all supported DV timings,
+applications shall begin at index zero, incrementing by one until the
+driver returns <errorcode>EINVAL</errorcode>. Note that drivers may enumerate a
+different set of DV timings after switching the video input or
+output.</para>
+
+ <para>When implemented by the driver DV timings of subdevices can be queried
+by calling the <constant>VIDIOC_SUBDEV_ENUM_DV_TIMINGS</constant> ioctl directly
+on a subdevice node. The DV timings are specific to inputs (for DV receivers) or
+outputs (for DV transmitters), applications must specify the desired pad number
+in the &v4l2-enum-dv-timings; <structfield>pad</structfield> field. Attempts to
+enumerate timings on a pad that doesn't support them will return an &EINVAL;.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-enum-dv-timings">
+ <title>struct <structname>v4l2_enum_dv_timings</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the DV timings, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API. This field
+ is only used when operating on a subdevice node. When operating on a
+ video node applications must set this field to zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions. Drivers and applications must
+ set the array to zero.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-dv-timings;</entry>
+ <entry><structfield>timings</structfield></entry>
+ <entry>The timings.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-enum-dv-timings; <structfield>index</structfield>
+is out of bounds or the <structfield>pad</structfield> number is invalid.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>Digital video presets are not supported for this input or output.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-fmt.xml b/Documentation/DocBook/media/v4l/vidioc-enum-fmt.xml
new file mode 100644
index 000000000..f8dfeed34
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-fmt.xml
@@ -0,0 +1,159 @@
+<refentry id="vidioc-enum-fmt">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUM_FMT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUM_FMT</refname>
+ <refpurpose>Enumerate image formats</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_fmtdesc
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUM_FMT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To enumerate image formats applications initialize the
+<structfield>type</structfield> and <structfield>index</structfield>
+field of &v4l2-fmtdesc; and call the
+<constant>VIDIOC_ENUM_FMT</constant> ioctl with a pointer to this
+structure. Drivers fill the rest of the structure or return an
+&EINVAL;. All formats are enumerable by beginning at index zero and
+incrementing by one until <errorcode>EINVAL</errorcode> is
+returned.</para>
+
+ <para>Note that after switching input or output the list of enumerated image
+formats may be different.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-fmtdesc">
+ <title>struct <structname>v4l2_fmtdesc</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the format in the enumeration, set by
+the application. This is in no way related to the <structfield>
+pixelformat</structfield> field.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the data stream, set by the application.
+Only these types are valid here:
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>,
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant>,
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant>,
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant> and
+<constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant>. See <xref linkend="v4l2-buf-type" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>See <xref linkend="fmtdesc-flags" /></entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>description</structfield>[32]</entry>
+ <entry>Description of the format, a NUL-terminated ASCII
+string. This information is intended for the user, for example: "YUV
+4:2:2".</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pixelformat</structfield></entry>
+ <entry>The image format identifier. This is a
+four character code as computed by the v4l2_fourcc()
+macro:</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para><programlisting id="v4l2-fourcc">
+#define v4l2_fourcc(a,b,c,d) (((__u32)(a)&lt;&lt;0)|((__u32)(b)&lt;&lt;8)|((__u32)(c)&lt;&lt;16)|((__u32)(d)&lt;&lt;24))
+</programlisting></para><para>Several image formats are already
+defined by this specification in <xref linkend="pixfmt" />. Note these
+codes are not the same as those used in the Windows world.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry>Reserved for future extensions. Drivers must set
+the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="fmtdesc-flags">
+ <title>Image Format Description Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FMT_FLAG_COMPRESSED</constant></entry>
+ <entry>0x0001</entry>
+ <entry>This is a compressed format.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FMT_FLAG_EMULATED</constant></entry>
+ <entry>0x0002</entry>
+ <entry>This format is not native to the device but emulated
+through software (usually libv4l2), where possible try to use a native format
+instead for better performance.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-fmtdesc; <structfield>type</structfield>
+is not supported or the <structfield>index</structfield> is out of
+bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-frameintervals.xml b/Documentation/DocBook/media/v4l/vidioc-enum-frameintervals.xml
new file mode 100644
index 000000000..5fd72c4c3
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-frameintervals.xml
@@ -0,0 +1,259 @@
+<refentry id="vidioc-enum-frameintervals">
+
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUM_FRAMEINTERVALS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUM_FRAMEINTERVALS</refname>
+ <refpurpose>Enumerate frame intervals</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_frmivalenum *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUM_FRAMEINTERVALS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para>Pointer to a &v4l2-frmivalenum; structure that
+contains a pixel format and size and receives a frame interval.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This ioctl allows applications to enumerate all frame
+intervals that the device supports for the given pixel format and
+frame size.</para>
+ <para>The supported pixel formats and frame sizes can be obtained
+by using the &VIDIOC-ENUM-FMT; and &VIDIOC-ENUM-FRAMESIZES;
+functions.</para>
+ <para>The return value and the content of the
+<structfield>v4l2_frmivalenum.type</structfield> field depend on the
+type of frame intervals the device supports. Here are the semantics of
+the function for the different cases:</para>
+ <itemizedlist>
+ <listitem>
+ <para><emphasis role="bold">Discrete:</emphasis> The function
+returns success if the given index value (zero-based) is valid. The
+application should increase the index by one for each call until
+<constant>EINVAL</constant> is returned. The `v4l2_frmivalenum.type`
+field is set to `V4L2_FRMIVAL_TYPE_DISCRETE` by the driver. Of the
+union only the `discrete` member is valid.</para>
+ </listitem>
+ <listitem>
+ <para><emphasis role="bold">Step-wise:</emphasis> The function
+returns success if the given index value is zero and
+<constant>EINVAL</constant> for any other index value. The
+<structfield>v4l2_frmivalenum.type</structfield> field is set to
+<constant>V4L2_FRMIVAL_TYPE_STEPWISE</constant> by the driver. Of the
+union only the <structfield>stepwise</structfield> member is
+valid.</para>
+ </listitem>
+ <listitem>
+ <para><emphasis role="bold">Continuous:</emphasis> This is a
+special case of the step-wise type above. The function returns success
+if the given index value is zero and <constant>EINVAL</constant> for
+any other index value. The
+<structfield>v4l2_frmivalenum.type</structfield> field is set to
+<constant>V4L2_FRMIVAL_TYPE_CONTINUOUS</constant> by the driver. Of
+the union only the <structfield>stepwise</structfield> member is valid
+and the <structfield>step</structfield> value is set to 1.</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>When the application calls the function with index zero, it
+must check the <structfield>type</structfield> field to determine the
+type of frame interval enumeration the device supports. Only for the
+<constant>V4L2_FRMIVAL_TYPE_DISCRETE</constant> type does it make
+sense to increase the index value to receive more frame
+intervals.</para>
+ <para>Note that the order in which the frame intervals are
+returned has no special meaning. In particular does it not say
+anything about potential default frame intervals.</para>
+ <para>Applications can assume that the enumeration data does not
+change without any interaction from the application itself. This means
+that the enumeration data is consistent if the application does not
+perform any other ioctl calls while it runs the frame interval
+enumeration.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Notes</title>
+
+ <itemizedlist>
+ <listitem>
+ <para><emphasis role="bold">Frame intervals and frame
+rates:</emphasis> The V4L2 API uses frame intervals instead of frame
+rates. Given the frame interval the frame rate can be computed as
+follows:<screen>frame_rate = 1 / frame_interval</screen></para>
+ </listitem>
+ </itemizedlist>
+
+ </refsect1>
+
+ <refsect1>
+ <title>Structs</title>
+
+ <para>In the structs below, <emphasis>IN</emphasis> denotes a
+value that has to be filled in by the application,
+<emphasis>OUT</emphasis> denotes values that the driver fills in. The
+application should zero out all members except for the
+<emphasis>IN</emphasis> fields.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-frmival-stepwise">
+ <title>struct <structname>v4l2_frmival_stepwise</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>min</structfield></entry>
+ <entry>Minimum frame interval [s].</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>max</structfield></entry>
+ <entry>Maximum frame interval [s].</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>step</structfield></entry>
+ <entry>Frame interval step size [s].</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-frmivalenum">
+ <title>struct <structname>v4l2_frmivalenum</structname></title>
+ <tgroup cols="4">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry></entry>
+ <entry>IN: Index of the given frame interval in the
+enumeration.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pixel_format</structfield></entry>
+ <entry></entry>
+ <entry>IN: Pixel format for which the frame intervals are
+enumerated.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry></entry>
+ <entry>IN: Frame width for which the frame intervals are
+enumerated.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry></entry>
+ <entry>IN: Frame height for which the frame intervals are
+enumerated.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>OUT: Frame interval type the device supports.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>OUT: Frame interval with the given index.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>discrete</structfield></entry>
+ <entry>Frame interval [s].</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-frmival-stepwise;</entry>
+ <entry><structfield>stepwise</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved[2]</structfield></entry>
+ <entry></entry>
+ <entry>Reserved space for future use.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ <title>Enums</title>
+
+ <table pgwide="1" frame="none" id="v4l2-frmivaltypes">
+ <title>enum <structname>v4l2_frmivaltypes</structname></title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FRMIVAL_TYPE_DISCRETE</constant></entry>
+ <entry>1</entry>
+ <entry>Discrete frame interval.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FRMIVAL_TYPE_CONTINUOUS</constant></entry>
+ <entry>2</entry>
+ <entry>Continuous frame interval.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FRMIVAL_TYPE_STEPWISE</constant></entry>
+ <entry>3</entry>
+ <entry>Step-wise defined frame interval.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-framesizes.xml b/Documentation/DocBook/media/v4l/vidioc-enum-framesizes.xml
new file mode 100644
index 000000000..a78454b5a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-framesizes.xml
@@ -0,0 +1,264 @@
+<refentry id="vidioc-enum-framesizes">
+
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUM_FRAMESIZES</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUM_FRAMESIZES</refname>
+ <refpurpose>Enumerate frame sizes</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_frmsizeenum *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUM_FRAMESIZES</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para>Pointer to a &v4l2-frmsizeenum; that contains an index
+and pixel format and receives a frame width and height.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This ioctl allows applications to enumerate all frame sizes
+(&ie; width and height in pixels) that the device supports for the
+given pixel format.</para>
+ <para>The supported pixel formats can be obtained by using the
+&VIDIOC-ENUM-FMT; function.</para>
+ <para>The return value and the content of the
+<structfield>v4l2_frmsizeenum.type</structfield> field depend on the
+type of frame sizes the device supports. Here are the semantics of the
+function for the different cases:</para>
+
+ <itemizedlist>
+ <listitem>
+ <para><emphasis role="bold">Discrete:</emphasis> The function
+returns success if the given index value (zero-based) is valid. The
+application should increase the index by one for each call until
+<constant>EINVAL</constant> is returned. The
+<structfield>v4l2_frmsizeenum.type</structfield> field is set to
+<constant>V4L2_FRMSIZE_TYPE_DISCRETE</constant> by the driver. Of the
+union only the <structfield>discrete</structfield> member is
+valid.</para>
+ </listitem>
+ <listitem>
+ <para><emphasis role="bold">Step-wise:</emphasis> The function
+returns success if the given index value is zero and
+<constant>EINVAL</constant> for any other index value. The
+<structfield>v4l2_frmsizeenum.type</structfield> field is set to
+<constant>V4L2_FRMSIZE_TYPE_STEPWISE</constant> by the driver. Of the
+union only the <structfield>stepwise</structfield> member is
+valid.</para>
+ </listitem>
+ <listitem>
+ <para><emphasis role="bold">Continuous:</emphasis> This is a
+special case of the step-wise type above. The function returns success
+if the given index value is zero and <constant>EINVAL</constant> for
+any other index value. The
+<structfield>v4l2_frmsizeenum.type</structfield> field is set to
+<constant>V4L2_FRMSIZE_TYPE_CONTINUOUS</constant> by the driver. Of
+the union only the <structfield>stepwise</structfield> member is valid
+and the <structfield>step_width</structfield> and
+<structfield>step_height</structfield> values are set to 1.</para>
+ </listitem>
+ </itemizedlist>
+
+ <para>When the application calls the function with index zero, it
+must check the <structfield>type</structfield> field to determine the
+type of frame size enumeration the device supports. Only for the
+<constant>V4L2_FRMSIZE_TYPE_DISCRETE</constant> type does it make
+sense to increase the index value to receive more frame sizes.</para>
+ <para>Note that the order in which the frame sizes are returned
+has no special meaning. In particular does it not say anything about
+potential default format sizes.</para>
+ <para>Applications can assume that the enumeration data does not
+change without any interaction from the application itself. This means
+that the enumeration data is consistent if the application does not
+perform any other ioctl calls while it runs the frame size
+enumeration.</para>
+ </refsect1>
+
+ <refsect1>
+ <title>Structs</title>
+
+ <para>In the structs below, <emphasis>IN</emphasis> denotes a
+value that has to be filled in by the application,
+<emphasis>OUT</emphasis> denotes values that the driver fills in. The
+application should zero out all members except for the
+<emphasis>IN</emphasis> fields.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-frmsize-discrete">
+ <title>struct <structname>v4l2_frmsize_discrete</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Width of the frame [pixel].</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Height of the frame [pixel].</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-frmsize-stepwise">
+ <title>struct <structname>v4l2_frmsize_stepwise</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>min_width</structfield></entry>
+ <entry>Minimum frame width [pixel].</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>max_width</structfield></entry>
+ <entry>Maximum frame width [pixel].</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>step_width</structfield></entry>
+ <entry>Frame width step size [pixel].</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>min_height</structfield></entry>
+ <entry>Minimum frame height [pixel].</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>max_height</structfield></entry>
+ <entry>Maximum frame height [pixel].</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>step_height</structfield></entry>
+ <entry>Frame height step size [pixel].</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-frmsizeenum">
+ <title>struct <structname>v4l2_frmsizeenum</structname></title>
+ <tgroup cols="4">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry></entry>
+ <entry>IN: Index of the given frame size in the enumeration.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pixel_format</structfield></entry>
+ <entry></entry>
+ <entry>IN: Pixel format for which the frame sizes are enumerated.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>OUT: Frame size type the device supports.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>OUT: Frame size with the given index.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-frmsize-discrete;</entry>
+ <entry><structfield>discrete</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-frmsize-stepwise;</entry>
+ <entry><structfield>stepwise</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved[2]</structfield></entry>
+ <entry></entry>
+ <entry>Reserved space for future use.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ <title>Enums</title>
+
+ <table pgwide="1" frame="none" id="v4l2-frmsizetypes">
+ <title>enum <structname>v4l2_frmsizetypes</structname></title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FRMSIZE_TYPE_DISCRETE</constant></entry>
+ <entry>1</entry>
+ <entry>Discrete frame size.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FRMSIZE_TYPE_CONTINUOUS</constant></entry>
+ <entry>2</entry>
+ <entry>Continuous frame size.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FRMSIZE_TYPE_STEPWISE</constant></entry>
+ <entry>3</entry>
+ <entry>Step-wise defined frame size.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml b/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml
new file mode 100644
index 000000000..4e8ea65f7
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enum-freq-bands.xml
@@ -0,0 +1,181 @@
+<refentry id="vidioc-enum-freq-bands">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUM_FREQ_BANDS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUM_FREQ_BANDS</refname>
+ <refpurpose>Enumerate supported frequency bands</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_frequency_band
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUM_FREQ_BANDS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>Enumerates the frequency bands that a tuner or modulator supports.
+To do this applications initialize the <structfield>tuner</structfield>,
+<structfield>type</structfield> and <structfield>index</structfield> fields,
+and zero out the <structfield>reserved</structfield> array of a &v4l2-frequency-band; and
+call the <constant>VIDIOC_ENUM_FREQ_BANDS</constant> ioctl with a pointer
+to this structure.</para>
+
+ <para>This ioctl is supported if the <constant>V4L2_TUNER_CAP_FREQ_BANDS</constant> capability
+ of the corresponding tuner/modulator is set.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-frequency-band">
+ <title>struct <structname>v4l2_frequency_band</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>tuner</structfield></entry>
+ <entry>The tuner or modulator index number. This is the
+same value as in the &v4l2-input; <structfield>tuner</structfield>
+field and the &v4l2-tuner; <structfield>index</structfield> field, or
+the &v4l2-output; <structfield>modulator</structfield> field and the
+&v4l2-modulator; <structfield>index</structfield> field.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>The tuner type. This is the same value as in the
+&v4l2-tuner; <structfield>type</structfield> field. The type must be set
+to <constant>V4L2_TUNER_RADIO</constant> for <filename>/dev/radioX</filename>
+device nodes, and to <constant>V4L2_TUNER_ANALOG_TV</constant>
+for all others. Set this field to <constant>V4L2_TUNER_RADIO</constant> for
+modulators (currently only radio modulators are supported).
+See <xref linkend="v4l2-tuner-type" /></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Identifies the frequency band, set by the application.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry spanname="hspan">The tuner/modulator capability flags for
+this frequency band, see <xref linkend="tuner-capability" />. The <constant>V4L2_TUNER_CAP_LOW</constant>
+or <constant>V4L2_TUNER_CAP_1HZ</constant> capability must be the same for all frequency bands of the selected tuner/modulator.
+So either all bands have that capability set, or none of them have that capability.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangelow</structfield></entry>
+ <entry spanname="hspan">The lowest tunable frequency in
+units of 62.5 kHz, or if the <structfield>capability</structfield>
+flag <constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz, for this frequency band. A 1 Hz unit is used when the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangehigh</structfield></entry>
+ <entry spanname="hspan">The highest tunable frequency in
+units of 62.5 kHz, or if the <structfield>capability</structfield>
+flag <constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz, for this frequency band. A 1 Hz unit is used when the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>modulation</structfield></entry>
+ <entry spanname="hspan">The supported modulation systems of this frequency band.
+ See <xref linkend="band-modulation" />. Note that currently only one
+ modulation system per frequency band is supported. More work will need to
+ be done if multiple modulation systems are possible. Contact the
+ linux-media mailing list (&v4l-ml;) if you need that functionality.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[9]</entry>
+ <entry>Reserved for future extensions. Applications and drivers
+ must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="band-modulation">
+ <title>Band Modulation Systems</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_BAND_MODULATION_VSB</constant></entry>
+ <entry>0x02</entry>
+ <entry>Vestigial Sideband modulation, used for analog TV.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BAND_MODULATION_FM</constant></entry>
+ <entry>0x04</entry>
+ <entry>Frequency Modulation, commonly used for analog radio.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_BAND_MODULATION_AM</constant></entry>
+ <entry>0x08</entry>
+ <entry>Amplitude Modulation, commonly used for analog radio.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <structfield>tuner</structfield> or <structfield>index</structfield>
+is out of bounds or the <structfield>type</structfield> field is wrong.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enumaudio.xml b/Documentation/DocBook/media/v4l/vidioc-enumaudio.xml
new file mode 100644
index 000000000..ea816ab2e
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enumaudio.xml
@@ -0,0 +1,76 @@
+<refentry id="vidioc-enumaudio">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUMAUDIO</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUMAUDIO</refname>
+ <refpurpose>Enumerate audio inputs</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_audio *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUMAUDIO</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of an audio input applications
+initialize the <structfield>index</structfield> field and zero out the
+<structfield>reserved</structfield> array of a &v4l2-audio;
+and call the <constant>VIDIOC_ENUMAUDIO</constant> ioctl with a pointer
+to this structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the index is out of bounds. To enumerate all audio
+inputs applications shall begin at index zero, incrementing by one
+until the driver returns <errorcode>EINVAL</errorcode>.</para>
+
+ <para>See <xref linkend="vidioc-g-audio" /> for a description of
+&v4l2-audio;.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The number of the audio input is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enumaudioout.xml b/Documentation/DocBook/media/v4l/vidioc-enumaudioout.xml
new file mode 100644
index 000000000..2e87cedb0
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enumaudioout.xml
@@ -0,0 +1,79 @@
+<refentry id="vidioc-enumaudioout">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUMAUDOUT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUMAUDOUT</refname>
+ <refpurpose>Enumerate audio outputs</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_audioout *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUMAUDOUT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of an audio output applications
+initialize the <structfield>index</structfield> field and zero out the
+<structfield>reserved</structfield> array of a &v4l2-audioout; and
+call the <constant>VIDIOC_G_AUDOUT</constant> ioctl with a pointer
+to this structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the index is out of bounds. To enumerate all audio
+outputs applications shall begin at index zero, incrementing by one
+until the driver returns <errorcode>EINVAL</errorcode>.</para>
+
+ <para>Note connectors on a TV card to loop back the received audio
+signal to a sound card are not audio outputs in this sense.</para>
+
+ <para>See <xref linkend="vidioc-g-audioout" /> for a description of
+&v4l2-audioout;.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The number of the audio output is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enuminput.xml b/Documentation/DocBook/media/v4l/vidioc-enuminput.xml
new file mode 100644
index 000000000..603fecef9
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enuminput.xml
@@ -0,0 +1,316 @@
+<refentry id="vidioc-enuminput">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUMINPUT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUMINPUT</refname>
+ <refpurpose>Enumerate video inputs</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_input
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUMINPUT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of a video input applications
+initialize the <structfield>index</structfield> field of &v4l2-input;
+and call the <constant>VIDIOC_ENUMINPUT</constant> ioctl with a
+pointer to this structure. Drivers fill the rest of the structure or
+return an &EINVAL; when the index is out of bounds. To enumerate all
+inputs applications shall begin at index zero, incrementing by one
+until the driver returns <errorcode>EINVAL</errorcode>.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-input">
+ <title>struct <structname>v4l2_input</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Identifies the input, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the video input, a NUL-terminated ASCII
+string, for example: "Vin (Composite 2)". This information is intended
+for the user, preferably the connector label on the device itself.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the input, see <xref
+ linkend="input-type" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>audioset</structfield></entry>
+ <entry><para>Drivers can enumerate up to 32 video and
+audio inputs. This field shows which audio inputs were selectable as
+audio source if this was the currently selected video input. It is a
+bit mask. The LSB corresponds to audio input 0, the MSB to input 31.
+Any number of bits can be set, or none.</para><para>When the driver
+does not enumerate audio inputs no bits must be set. Applications
+shall not interpret this as lack of audio support. Some drivers
+automatically select audio sources and do not enumerate them since
+there is no choice anyway.</para><para>For details on audio inputs and
+how to select the current input see <xref
+ linkend="audio" />.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>tuner</structfield></entry>
+ <entry>Capture devices can have zero or more tuners (RF
+demodulators). When the <structfield>type</structfield> is set to
+<constant>V4L2_INPUT_TYPE_TUNER</constant> this is an RF connector and
+this field identifies the tuner. It corresponds to
+&v4l2-tuner; field <structfield>index</structfield>. For details on
+tuners see <xref linkend="tuner" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-std-id;</entry>
+ <entry><structfield>std</structfield></entry>
+ <entry>Every video input supports one or more different
+video standards. This field is a set of all supported standards. For
+details on video standards and how to switch see <xref
+linkend="standard" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>status</structfield></entry>
+ <entry>This field provides status information about the
+input. See <xref linkend="input-status" /> for flags.
+With the exception of the sensor orientation bits <structfield>status</structfield> is only valid when this is the
+current input.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capabilities</structfield></entry>
+ <entry>This field provides capabilities for the
+input. See <xref linkend="input-capabilities" /> for flags.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[3]</entry>
+ <entry>Reserved for future extensions. Drivers must set
+the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="input-type">
+ <title>Input Types</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_INPUT_TYPE_TUNER</constant></entry>
+ <entry>1</entry>
+ <entry>This input uses a tuner (RF demodulator).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_INPUT_TYPE_CAMERA</constant></entry>
+ <entry>2</entry>
+ <entry>Analog baseband input, for example CVBS /
+Composite Video, S-Video, RGB.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- Status flags based on proposal by Mark McClelland,
+video4linux-list@redhat.com on 18 Oct 2002, subject "Re: [V4L] Re:
+v4l2 api". "Why are some of them inverted? So that the driver doesn't
+have to lie about the status in cases where it can't tell one way or
+the other. Plus, a status of zero would generally mean that everything
+is OK." -->
+
+ <table frame="none" pgwide="1" id="input-status">
+ <title>Input Status Flags</title>
+ <tgroup cols="3">
+ <colspec colname="c1" />
+ <colspec colname="c2" align="center" />
+ <colspec colname="c3" />
+ <spanspec namest="c1" nameend="c3" spanname="hspan"
+ align="left" />
+ <tbody valign="top">
+ <row>
+ <entry spanname="hspan">General</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_POWER</constant></entry>
+ <entry>0x00000001</entry>
+ <entry>Attached device is off.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_SIGNAL</constant></entry>
+ <entry>0x00000002</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_COLOR</constant></entry>
+ <entry>0x00000004</entry>
+ <entry>The hardware supports color decoding, but does not
+detect color modulation in the signal.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan">Sensor Orientation</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_HFLIP</constant></entry>
+ <entry>0x00000010</entry>
+ <entry>The input is connected to a device that produces a signal
+that is flipped horizontally and does not correct this before passing the
+signal to userspace.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_VFLIP</constant></entry>
+ <entry>0x00000020</entry>
+ <entry>The input is connected to a device that produces a signal
+that is flipped vertically and does not correct this before passing the
+signal to userspace. Note that a 180 degree rotation is the same as HFLIP | VFLIP</entry>
+ </row>
+ <row>
+ <entry spanname="hspan">Analog Video</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_H_LOCK</constant></entry>
+ <entry>0x00000100</entry>
+ <entry>No horizontal sync lock.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_COLOR_KILL</constant></entry>
+ <entry>0x00000200</entry>
+ <entry>A color killer circuit automatically disables color
+decoding when it detects no color modulation. When this flag is set
+the color killer is enabled <emphasis>and</emphasis> has shut off
+color decoding.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan">Digital Video</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_SYNC</constant></entry>
+ <entry>0x00010000</entry>
+ <entry>No synchronization lock.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_EQU</constant></entry>
+ <entry>0x00020000</entry>
+ <entry>No equalizer lock.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_CARRIER</constant></entry>
+ <entry>0x00040000</entry>
+ <entry>Carrier recovery failed.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan">VCR and Set-Top Box</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_MACROVISION</constant></entry>
+ <entry>0x01000000</entry>
+ <entry>Macrovision is an analog copy prevention system
+mangling the video signal to confuse video recorders. When this
+flag is set Macrovision has been detected.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_NO_ACCESS</constant></entry>
+ <entry>0x02000000</entry>
+ <entry>Conditional access denied.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_ST_VTR</constant></entry>
+ <entry>0x04000000</entry>
+ <entry>VTR time constant. [?]</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- Capability flags based on video timings RFC by Muralidharan
+Karicheri, titled RFC (v1.2): V4L - Support for video timings at the
+input/output interface to linux-media@vger.kernel.org on 19 Oct 2009.
+ -->
+ <table frame="none" pgwide="1" id="input-capabilities">
+ <title>Input capabilities</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_IN_CAP_DV_TIMINGS</constant></entry>
+ <entry>0x00000002</entry>
+ <entry>This input supports setting video timings by using VIDIOC_S_DV_TIMINGS.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_CAP_STD</constant></entry>
+ <entry>0x00000004</entry>
+ <entry>This input supports setting the TV standard by using VIDIOC_S_STD.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_IN_CAP_NATIVE_SIZE</constant></entry>
+ <entry>0x00000008</entry>
+ <entry>This input supports setting the native size using
+ the <constant>V4L2_SEL_TGT_NATIVE_SIZE</constant>
+ selection target, see <xref
+ linkend="v4l2-selections-common"/>.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-input; <structfield>index</structfield> is
+out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enumoutput.xml b/Documentation/DocBook/media/v4l/vidioc-enumoutput.xml
new file mode 100644
index 000000000..773fb1258
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enumoutput.xml
@@ -0,0 +1,201 @@
+<refentry id="vidioc-enumoutput">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUMOUTPUT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUMOUTPUT</refname>
+ <refpurpose>Enumerate video outputs</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_output *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUMOUTPUT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of a video outputs applications
+initialize the <structfield>index</structfield> field of &v4l2-output;
+and call the <constant>VIDIOC_ENUMOUTPUT</constant> ioctl with a
+pointer to this structure. Drivers fill the rest of the structure or
+return an &EINVAL; when the index is out of bounds. To enumerate all
+outputs applications shall begin at index zero, incrementing by one
+until the driver returns <errorcode>EINVAL</errorcode>.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-output">
+ <title>struct <structname>v4l2_output</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Identifies the output, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the video output, a NUL-terminated ASCII
+string, for example: "Vout". This information is intended for the
+user, preferably the connector label on the device itself.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the output, see <xref
+ linkend="output-type" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>audioset</structfield></entry>
+ <entry><para>Drivers can enumerate up to 32 video and
+audio outputs. This field shows which audio outputs were
+selectable as the current output if this was the currently selected
+video output. It is a bit mask. The LSB corresponds to audio output 0,
+the MSB to output 31. Any number of bits can be set, or
+none.</para><para>When the driver does not enumerate audio outputs no
+bits must be set. Applications shall not interpret this as lack of
+audio support. Drivers may automatically select audio outputs without
+enumerating them.</para><para>For details on audio outputs and how to
+select the current output see <xref linkend="audio" />.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>modulator</structfield></entry>
+ <entry>Output devices can have zero or more RF modulators.
+When the <structfield>type</structfield> is
+<constant>V4L2_OUTPUT_TYPE_MODULATOR</constant> this is an RF
+connector and this field identifies the modulator. It corresponds to
+&v4l2-modulator; field <structfield>index</structfield>. For details
+on modulators see <xref linkend="tuner" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-std-id;</entry>
+ <entry><structfield>std</structfield></entry>
+ <entry>Every video output supports one or more different
+video standards. This field is a set of all supported standards. For
+details on video standards and how to switch see <xref
+ linkend="standard" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capabilities</structfield></entry>
+ <entry>This field provides capabilities for the
+output. See <xref linkend="output-capabilities" /> for flags.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[3]</entry>
+ <entry>Reserved for future extensions. Drivers must set
+the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table frame="none" pgwide="1" id="output-type">
+ <title>Output Type</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_OUTPUT_TYPE_MODULATOR</constant></entry>
+ <entry>1</entry>
+ <entry>This output is an analog TV modulator.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_OUTPUT_TYPE_ANALOG</constant></entry>
+ <entry>2</entry>
+ <entry>Analog baseband output, for example Composite /
+CVBS, S-Video, RGB.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_OUTPUT_TYPE_ANALOGVGAOVERLAY</constant></entry>
+ <entry>3</entry>
+ <entry>[?]</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- Capabilities flags based on video timings RFC by Muralidharan
+Karicheri, titled RFC (v1.2): V4L - Support for video timings at the
+input/output interface to linux-media@vger.kernel.org on 19 Oct 2009.
+ -->
+ <table frame="none" pgwide="1" id="output-capabilities">
+ <title>Output capabilities</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_OUT_CAP_DV_TIMINGS</constant></entry>
+ <entry>0x00000002</entry>
+ <entry>This output supports setting video timings by using VIDIOC_S_DV_TIMINGS.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_OUT_CAP_STD</constant></entry>
+ <entry>0x00000004</entry>
+ <entry>This output supports setting the TV standard by using VIDIOC_S_STD.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_OUT_CAP_NATIVE_SIZE</constant></entry>
+ <entry>0x00000008</entry>
+ <entry>This output supports setting the native size using
+ the <constant>V4L2_SEL_TGT_NATIVE_SIZE</constant>
+ selection target, see <xref
+ linkend="v4l2-selections-common"/>.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-output; <structfield>index</structfield>
+is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-enumstd.xml b/Documentation/DocBook/media/v4l/vidioc-enumstd.xml
new file mode 100644
index 000000000..806509940
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-enumstd.xml
@@ -0,0 +1,389 @@
+<refentry id="vidioc-enumstd">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_ENUMSTD</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_ENUMSTD</refname>
+ <refpurpose>Enumerate supported video standards</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_standard *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_ENUMSTD</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of a video standard,
+especially a custom (driver defined) one, applications initialize the
+<structfield>index</structfield> field of &v4l2-standard; and call the
+<constant>VIDIOC_ENUMSTD</constant> ioctl with a pointer to this
+structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the index is out of bounds. To enumerate all standards
+applications shall begin at index zero, incrementing by one until the
+driver returns <errorcode>EINVAL</errorcode>. Drivers may enumerate a
+different set of standards after switching the video input or
+output.<footnote>
+ <para>The supported standards may overlap and we need an
+unambiguous set to find the current standard returned by
+<constant>VIDIOC_G_STD</constant>.</para>
+ </footnote></para>
+
+ <table pgwide="1" frame="none" id="v4l2-standard">
+ <title>struct <structname>v4l2_standard</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the video standard, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-std-id;</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>The bits in this field identify the standard as
+one of the common standards listed in <xref linkend="v4l2-std-id" />,
+or if bits 32 to 63 are set as custom standards. Multiple bits can be
+set if the hardware does not distinguish between these standards,
+however separate indices do not indicate the opposite. The
+<structfield>id</structfield> must be unique. No other enumerated
+<structname>v4l2_standard</structname> structure, for this input or
+output anyway, can contain the same set of bits.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[24]</entry>
+ <entry>Name of the standard, a NUL-terminated ASCII
+string, for example: "PAL-B/G", "NTSC Japan". This information is
+intended for the user.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>frameperiod</structfield></entry>
+ <entry>The frame period (not field period) is numerator
+/ denominator. For example M/NTSC has a frame period of 1001 /
+30000 seconds.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>framelines</structfield></entry>
+ <entry>Total lines per frame including blanking,
+e.&nbsp;g. 625 for B/PAL.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry>Reserved for future extensions. Drivers must set
+the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-fract">
+ <title>struct <structname>v4l2_fract</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>numerator</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>denominator</structfield></entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-std-id">
+ <title>typedef <structname>v4l2_std_id</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>v4l2_std_id</structfield></entry>
+ <entry>This type is a set, each bit representing another
+video standard as listed below and in <xref
+linkend="video-standards" />. The 32 most significant bits are reserved
+for custom (driver defined) video standards.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <para><programlisting>
+#define V4L2_STD_PAL_B ((v4l2_std_id)0x00000001)
+#define V4L2_STD_PAL_B1 ((v4l2_std_id)0x00000002)
+#define V4L2_STD_PAL_G ((v4l2_std_id)0x00000004)
+#define V4L2_STD_PAL_H ((v4l2_std_id)0x00000008)
+#define V4L2_STD_PAL_I ((v4l2_std_id)0x00000010)
+#define V4L2_STD_PAL_D ((v4l2_std_id)0x00000020)
+#define V4L2_STD_PAL_D1 ((v4l2_std_id)0x00000040)
+#define V4L2_STD_PAL_K ((v4l2_std_id)0x00000080)
+
+#define V4L2_STD_PAL_M ((v4l2_std_id)0x00000100)
+#define V4L2_STD_PAL_N ((v4l2_std_id)0x00000200)
+#define V4L2_STD_PAL_Nc ((v4l2_std_id)0x00000400)
+#define V4L2_STD_PAL_60 ((v4l2_std_id)0x00000800)
+</programlisting></para><para><constant>V4L2_STD_PAL_60</constant> is
+a hybrid standard with 525 lines, 60 Hz refresh rate, and PAL color
+modulation with a 4.43 MHz color subcarrier. Some PAL video recorders
+can play back NTSC tapes in this mode for display on a 50/60 Hz agnostic
+PAL TV.</para><para><programlisting>
+#define V4L2_STD_NTSC_M ((v4l2_std_id)0x00001000)
+#define V4L2_STD_NTSC_M_JP ((v4l2_std_id)0x00002000)
+#define V4L2_STD_NTSC_443 ((v4l2_std_id)0x00004000)
+</programlisting></para><para><constant>V4L2_STD_NTSC_443</constant>
+is a hybrid standard with 525 lines, 60 Hz refresh rate, and NTSC
+color modulation with a 4.43 MHz color
+subcarrier.</para><para><programlisting>
+#define V4L2_STD_NTSC_M_KR ((v4l2_std_id)0x00008000)
+
+#define V4L2_STD_SECAM_B ((v4l2_std_id)0x00010000)
+#define V4L2_STD_SECAM_D ((v4l2_std_id)0x00020000)
+#define V4L2_STD_SECAM_G ((v4l2_std_id)0x00040000)
+#define V4L2_STD_SECAM_H ((v4l2_std_id)0x00080000)
+#define V4L2_STD_SECAM_K ((v4l2_std_id)0x00100000)
+#define V4L2_STD_SECAM_K1 ((v4l2_std_id)0x00200000)
+#define V4L2_STD_SECAM_L ((v4l2_std_id)0x00400000)
+#define V4L2_STD_SECAM_LC ((v4l2_std_id)0x00800000)
+
+/* ATSC/HDTV */
+#define V4L2_STD_ATSC_8_VSB ((v4l2_std_id)0x01000000)
+#define V4L2_STD_ATSC_16_VSB ((v4l2_std_id)0x02000000)
+</programlisting></para><para><!-- ATSC proposal by Mark McClelland,
+video4linux-list@redhat.com on 17 Oct 2002
+--><constant>V4L2_STD_ATSC_8_VSB</constant> and
+<constant>V4L2_STD_ATSC_16_VSB</constant> are U.S. terrestrial digital
+TV standards. Presently the V4L2 API does not support digital TV. See
+also the Linux DVB API at <ulink
+url="http://linuxtv.org">http://linuxtv.org</ulink>.</para>
+<para><programlisting>
+#define V4L2_STD_PAL_BG (V4L2_STD_PAL_B |\
+ V4L2_STD_PAL_B1 |\
+ V4L2_STD_PAL_G)
+#define V4L2_STD_B (V4L2_STD_PAL_B |\
+ V4L2_STD_PAL_B1 |\
+ V4L2_STD_SECAM_B)
+#define V4L2_STD_GH (V4L2_STD_PAL_G |\
+ V4L2_STD_PAL_H |\
+ V4L2_STD_SECAM_G |\
+ V4L2_STD_SECAM_H)
+#define V4L2_STD_PAL_DK (V4L2_STD_PAL_D |\
+ V4L2_STD_PAL_D1 |\
+ V4L2_STD_PAL_K)
+#define V4L2_STD_PAL (V4L2_STD_PAL_BG |\
+ V4L2_STD_PAL_DK |\
+ V4L2_STD_PAL_H |\
+ V4L2_STD_PAL_I)
+#define V4L2_STD_NTSC (V4L2_STD_NTSC_M |\
+ V4L2_STD_NTSC_M_JP |\
+ V4L2_STD_NTSC_M_KR)
+#define V4L2_STD_MN (V4L2_STD_PAL_M |\
+ V4L2_STD_PAL_N |\
+ V4L2_STD_PAL_Nc |\
+ V4L2_STD_NTSC)
+#define V4L2_STD_SECAM_DK (V4L2_STD_SECAM_D |\
+ V4L2_STD_SECAM_K |\
+ V4L2_STD_SECAM_K1)
+#define V4L2_STD_DK (V4L2_STD_PAL_DK |\
+ V4L2_STD_SECAM_DK)
+
+#define V4L2_STD_SECAM (V4L2_STD_SECAM_B |\
+ V4L2_STD_SECAM_G |\
+ V4L2_STD_SECAM_H |\
+ V4L2_STD_SECAM_DK |\
+ V4L2_STD_SECAM_L |\
+ V4L2_STD_SECAM_LC)
+
+#define V4L2_STD_525_60 (V4L2_STD_PAL_M |\
+ V4L2_STD_PAL_60 |\
+ V4L2_STD_NTSC |\
+ V4L2_STD_NTSC_443)
+#define V4L2_STD_625_50 (V4L2_STD_PAL |\
+ V4L2_STD_PAL_N |\
+ V4L2_STD_PAL_Nc |\
+ V4L2_STD_SECAM)
+
+#define V4L2_STD_UNKNOWN 0
+#define V4L2_STD_ALL (V4L2_STD_525_60 |\
+ V4L2_STD_625_50)
+</programlisting></para>
+
+ <table pgwide="1" id="video-standards" orient="land">
+ <title>Video Standards (based on [<xref linkend="itu470" />])</title>
+ <tgroup cols="12" colsep="1" rowsep="1" align="center">
+ <colspec colname="c1" align="left" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <colspec colname="c5" />
+ <colspec colnum="7" colname="c7" />
+ <colspec colnum="9" colname="c9" />
+ <colspec colnum="12" colname="c12" />
+ <spanspec namest="c2" nameend="c3" spanname="m" align="center" />
+ <spanspec namest="c4" nameend="c12" spanname="x" align="center" />
+ <spanspec namest="c5" nameend="c7" spanname="b" align="center" />
+ <spanspec namest="c9" nameend="c12" spanname="s" align="center" />
+ <thead>
+ <row>
+ <entry>Characteristics</entry>
+ <entry><para>M/NTSC<footnote><para>Japan uses a standard
+similar to M/NTSC
+(V4L2_STD_NTSC_M_JP).</para></footnote></para></entry>
+ <entry>M/PAL</entry>
+ <entry><para>N/PAL<footnote><para> The values in
+brackets apply to the combination N/PAL a.k.a.
+N<subscript>C</subscript> used in Argentina
+(V4L2_STD_PAL_Nc).</para></footnote></para></entry>
+ <entry align="center">B, B1, G/PAL</entry>
+ <entry align="center">D, D1, K/PAL</entry>
+ <entry align="center">H/PAL</entry>
+ <entry align="center">I/PAL</entry>
+ <entry align="center">B, G/SECAM</entry>
+ <entry align="center">D, K/SECAM</entry>
+ <entry align="center">K1/SECAM</entry>
+ <entry align="center">L/SECAM</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry>Frame lines</entry>
+ <entry spanname="m">525</entry>
+ <entry spanname="x">625</entry>
+ </row>
+ <row>
+ <entry>Frame period (s)</entry>
+ <entry spanname="m">1001/30000</entry>
+ <entry spanname="x">1/25</entry>
+ </row>
+ <row>
+ <entry>Chrominance sub-carrier frequency (Hz)</entry>
+ <entry>3579545 &plusmn;&nbsp;10</entry>
+ <entry>3579611.49 &plusmn;&nbsp;10</entry>
+ <entry>4433618.75 &plusmn;&nbsp;5 (3582056.25
+&plusmn;&nbsp;5)</entry>
+ <entry spanname="b">4433618.75 &plusmn;&nbsp;5</entry>
+ <entry>4433618.75 &plusmn;&nbsp;1</entry>
+ <entry spanname="s">f<subscript>OR</subscript>&nbsp;=
+4406250 &plusmn;&nbsp;2000, f<subscript>OB</subscript>&nbsp;= 4250000
+&plusmn;&nbsp;2000</entry>
+ </row>
+ <row>
+ <entry>Nominal radio-frequency channel bandwidth
+(MHz)</entry>
+ <entry>6</entry>
+ <entry>6</entry>
+ <entry>6</entry>
+ <entry>B: 7; B1, G: 8</entry>
+ <entry>8</entry>
+ <entry>8</entry>
+ <entry>8</entry>
+ <entry>8</entry>
+ <entry>8</entry>
+ <entry>8</entry>
+ <entry>8</entry>
+ </row>
+ <row>
+ <entry>Sound carrier relative to vision carrier
+(MHz)</entry>
+ <entry>+&nbsp;4.5</entry>
+ <entry>+&nbsp;4.5</entry>
+ <entry>+&nbsp;4.5</entry>
+ <entry><para>+&nbsp;5.5 &plusmn;&nbsp;0.001
+<footnote><para>In the Federal Republic of Germany, Austria, Italy,
+the Netherlands, Slovakia and Switzerland a system of two sound
+carriers is used, the frequency of the second carrier being
+242.1875&nbsp;kHz above the frequency of the first sound carrier. For
+stereophonic sound transmissions a similar system is used in
+Australia.</para></footnote> <footnote><para>New Zealand uses a sound
+carrier displaced 5.4996 &plusmn;&nbsp;0.0005 MHz from the vision
+carrier.</para></footnote> <footnote><para>In Denmark, Finland, New
+Zealand, Sweden and Spain a system of two sound carriers is used. In
+Iceland, Norway and Poland the same system is being introduced. The
+second carrier is 5.85&nbsp;MHz above the vision carrier and is DQPSK
+modulated with 728&nbsp;kbit/s sound and data multiplex. (NICAM
+system)</para></footnote> <footnote><para>In the United Kingdom, a
+system of two sound carriers is used. The second sound carrier is
+6.552&nbsp;MHz above the vision carrier and is DQPSK modulated with a
+728&nbsp;kbit/s sound and data multiplex able to carry two sound
+channels. (NICAM system)</para></footnote></para></entry>
+ <entry>+&nbsp;6.5 &plusmn;&nbsp;0.001</entry>
+ <entry>+&nbsp;5.5</entry>
+ <entry>+&nbsp;5.9996 &plusmn;&nbsp;0.0005</entry>
+ <entry>+&nbsp;5.5 &plusmn;&nbsp;0.001</entry>
+ <entry>+&nbsp;6.5 &plusmn;&nbsp;0.001</entry>
+ <entry>+&nbsp;6.5</entry>
+ <entry><para>+&nbsp;6.5 <footnote><para>In France, a
+digital carrier 5.85 MHz away from the vision carrier may be used in
+addition to the main sound carrier. It is modulated in differentially
+encoded QPSK with a 728 kbit/s sound and data multiplexer capable of
+carrying two sound channels. (NICAM
+system)</para></footnote></para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-standard; <structfield>index</structfield>
+is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>Standard video timings are not supported for this input or output.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-expbuf.xml b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
new file mode 100644
index 000000000..4165e7bfa
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-expbuf.xml
@@ -0,0 +1,210 @@
+<refentry id="vidioc-expbuf">
+
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_EXPBUF</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_EXPBUF</refname>
+ <refpurpose>Export a buffer as a DMABUF file descriptor.</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_exportbuffer *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_EXPBUF</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+<para>This ioctl is an extension to the <link linkend="mmap">memory
+mapping</link> I/O method, therefore it is available only for
+<constant>V4L2_MEMORY_MMAP</constant> buffers. It can be used to export a
+buffer as a DMABUF file at any time after buffers have been allocated with the
+&VIDIOC-REQBUFS; ioctl.</para>
+
+<para> To export a buffer, applications fill &v4l2-exportbuffer;. The
+<structfield> type </structfield> field is set to the same buffer type as was
+previously used with &v4l2-requestbuffers;<structfield> type </structfield>.
+Applications must also set the <structfield> index </structfield> field. Valid
+index numbers range from zero to the number of buffers allocated with
+&VIDIOC-REQBUFS; (&v4l2-requestbuffers;<structfield> count </structfield>)
+minus one. For the multi-planar API, applications set the <structfield> plane
+</structfield> field to the index of the plane to be exported. Valid planes
+range from zero to the maximal number of valid planes for the currently active
+format. For the single-planar API, applications must set <structfield> plane
+</structfield> to zero. Additional flags may be posted in the <structfield>
+flags </structfield> field. Refer to a manual for open() for details.
+Currently only O_CLOEXEC, O_RDONLY, O_WRONLY, and O_RDWR are supported. All
+other fields must be set to zero.
+In the case of multi-planar API, every plane is exported separately using
+multiple <constant> VIDIOC_EXPBUF </constant> calls. </para>
+
+<para> After calling <constant>VIDIOC_EXPBUF</constant> the <structfield> fd
+</structfield> field will be set by a driver. This is a DMABUF file
+descriptor. The application may pass it to other DMABUF-aware devices. Refer to
+<link linkend="dmabuf">DMABUF importing</link> for details about importing
+DMABUF files into V4L2 nodes. It is recommended to close a DMABUF file when it
+is no longer used to allow the associated memory to be reclaimed. </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Examples</title>
+
+ <example>
+ <title>Exporting a buffer.</title>
+ <programlisting>
+int buffer_export(int v4lfd, &v4l2-buf-type; bt, int index, int *dmafd)
+{
+ &v4l2-exportbuffer; expbuf;
+
+ memset(&amp;expbuf, 0, sizeof(expbuf));
+ expbuf.type = bt;
+ expbuf.index = index;
+ if (ioctl(v4lfd, &VIDIOC-EXPBUF;, &amp;expbuf) == -1) {
+ perror("VIDIOC_EXPBUF");
+ return -1;
+ }
+
+ *dmafd = expbuf.fd;
+
+ return 0;
+}
+ </programlisting>
+ </example>
+
+ <example>
+ <title>Exporting a buffer using the multi-planar API.</title>
+ <programlisting>
+int buffer_export_mp(int v4lfd, &v4l2-buf-type; bt, int index,
+ int dmafd[], int n_planes)
+{
+ int i;
+
+ for (i = 0; i &lt; n_planes; ++i) {
+ &v4l2-exportbuffer; expbuf;
+
+ memset(&amp;expbuf, 0, sizeof(expbuf));
+ expbuf.type = bt;
+ expbuf.index = index;
+ expbuf.plane = i;
+ if (ioctl(v4lfd, &VIDIOC-EXPBUF;, &amp;expbuf) == -1) {
+ perror("VIDIOC_EXPBUF");
+ while (i)
+ close(dmafd[--i]);
+ return -1;
+ }
+ dmafd[i] = expbuf.fd;
+ }
+
+ return 0;
+}
+ </programlisting>
+ </example>
+
+ <table pgwide="1" frame="none" id="v4l2-exportbuffer">
+ <title>struct <structname>v4l2_exportbuffer</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the buffer, same as &v4l2-format;
+<structfield>type</structfield> or &v4l2-requestbuffers;
+<structfield>type</structfield>, set by the application. See <xref
+linkend="v4l2-buf-type" /></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the buffer, set by the application. This field is
+only used for <link linkend="mmap">memory mapping</link> I/O and can range from
+zero to the number of buffers allocated with the &VIDIOC-REQBUFS; and/or
+&VIDIOC-CREATE-BUFS; ioctls. </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>plane</structfield></entry>
+ <entry>Index of the plane to be exported when using the
+multi-planar API. Otherwise this value must be set to zero. </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags for the newly created file, currently only <constant>
+O_CLOEXEC </constant>, <constant>O_RDONLY</constant>, <constant>O_WRONLY
+</constant>, and <constant>O_RDWR</constant> are supported, refer to the manual
+of open() for more details.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>fd</structfield></entry>
+ <entry>The DMABUF file descriptor associated with a buffer. Set by
+ the driver.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved[11]</structfield></entry>
+ <entry>Reserved field for future use. Must be set to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>A queue is not in MMAP mode or DMABUF exporting is not
+supported or <structfield> flags </structfield> or <structfield> type
+</structfield> or <structfield> index </structfield> or <structfield> plane
+</structfield> fields are invalid.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-audio.xml b/Documentation/DocBook/media/v4l/vidioc-g-audio.xml
new file mode 100644
index 000000000..d7bb9b373
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-audio.xml
@@ -0,0 +1,172 @@
+<refentry id="vidioc-g-audio">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_AUDIO, VIDIOC_S_AUDIO</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_AUDIO</refname>
+ <refname>VIDIOC_S_AUDIO</refname>
+ <refpurpose>Query or select the current audio input and its
+attributes</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_audio *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_audio *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_AUDIO, VIDIOC_S_AUDIO</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the current audio input applications zero out the
+<structfield>reserved</structfield> array of a &v4l2-audio;
+and call the <constant>VIDIOC_G_AUDIO</constant> ioctl with a pointer
+to this structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the device has no audio inputs, or none which combine
+with the current video input.</para>
+
+ <para>Audio inputs have one writable property, the audio mode. To
+select the current audio input <emphasis>and</emphasis> change the
+audio mode, applications initialize the
+<structfield>index</structfield> and <structfield>mode</structfield>
+fields, and the
+<structfield>reserved</structfield> array of a
+<structname>v4l2_audio</structname> structure and call the
+<constant>VIDIOC_S_AUDIO</constant> ioctl. Drivers may switch to a
+different audio mode if the request cannot be satisfied. However, this
+is a write-only ioctl, it does not return the actual new audio
+mode.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-audio">
+ <title>struct <structname>v4l2_audio</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Identifies the audio input, set by the
+driver or application.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the audio input, a NUL-terminated ASCII
+string, for example: "Line In". This information is intended for the
+user, preferably the connector label on the device itself.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry>Audio capability flags, see <xref
+ linkend="audio-capability" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>mode</structfield></entry>
+ <entry>Audio mode flags set by drivers and applications (on
+ <constant>VIDIOC_S_AUDIO</constant> ioctl), see <xref linkend="audio-mode" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="audio-capability">
+ <title>Audio Capability Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_AUDCAP_STEREO</constant></entry>
+ <entry>0x00001</entry>
+ <entry>This is a stereo input. The flag is intended to
+automatically disable stereo recording etc. when the signal is always
+monaural. The API provides no means to detect if stereo is
+<emphasis>received</emphasis>, unless the audio input belongs to a
+tuner.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_AUDCAP_AVL</constant></entry>
+ <entry>0x00002</entry>
+ <entry>Automatic Volume Level mode is supported.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="audio-mode">
+ <title>Audio Mode Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_AUDMODE_AVL</constant></entry>
+ <entry>0x00001</entry>
+ <entry>AVL mode is on.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>No audio inputs combine with the current video input,
+or the number of the selected audio input is out of bounds or it does
+not combine.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-audioout.xml b/Documentation/DocBook/media/v4l/vidioc-g-audioout.xml
new file mode 100644
index 000000000..200a2704a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-audioout.xml
@@ -0,0 +1,138 @@
+<refentry id="vidioc-g-audioout">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_AUDOUT, VIDIOC_S_AUDOUT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_AUDOUT</refname>
+ <refname>VIDIOC_S_AUDOUT</refname>
+ <refpurpose>Query or select the current audio output</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_audioout *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_audioout *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_AUDOUT, VIDIOC_S_AUDOUT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the current audio output applications zero out the
+<structfield>reserved</structfield> array of a &v4l2-audioout; and
+call the <constant>VIDIOC_G_AUDOUT</constant> ioctl with a pointer
+to this structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the device has no audio inputs, or none which combine
+with the current video output.</para>
+
+ <para>Audio outputs have no writable properties. Nevertheless, to
+select the current audio output applications can initialize the
+<structfield>index</structfield> field and
+<structfield>reserved</structfield> array (which in the future may
+contain writable properties) of a
+<structname>v4l2_audioout</structname> structure and call the
+<constant>VIDIOC_S_AUDOUT</constant> ioctl. Drivers switch to the
+requested output or return the &EINVAL; when the index is out of
+bounds. This is a write-only ioctl, it does not return the current
+audio output attributes as <constant>VIDIOC_G_AUDOUT</constant>
+does.</para>
+
+ <para>Note connectors on a TV card to loop back the received audio
+signal to a sound card are not audio outputs in this sense.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-audioout">
+ <title>struct <structname>v4l2_audioout</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Identifies the audio output, set by the
+driver or application.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the audio output, a NUL-terminated ASCII
+string, for example: "Line Out". This information is intended for the
+user, preferably the connector label on the device itself.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry>Audio capability flags, none defined yet. Drivers
+must set this field to zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>mode</structfield></entry>
+ <entry>Audio mode, none defined yet. Drivers and
+applications (on <constant>VIDIOC_S_AUDOUT</constant>) must set this
+field to zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>No audio outputs combine with the current video
+output, or the number of the selected audio output is out of bounds or
+it does not combine.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-crop.xml b/Documentation/DocBook/media/v4l/vidioc-g-crop.xml
new file mode 100644
index 000000000..e6c4efb9e
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-crop.xml
@@ -0,0 +1,129 @@
+<refentry id="vidioc-g-crop">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_CROP, VIDIOC_S_CROP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_CROP</refname>
+ <refname>VIDIOC_S_CROP</refname>
+ <refpurpose>Get or set the current cropping rectangle</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_crop *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_crop *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_CROP, VIDIOC_S_CROP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the cropping rectangle size and position
+applications set the <structfield>type</structfield> field of a
+<structname>v4l2_crop</structname> structure to the respective buffer
+(stream) type and call the <constant>VIDIOC_G_CROP</constant> ioctl
+with a pointer to this structure. The driver fills the rest of the
+structure or returns the &EINVAL; if cropping is not supported.</para>
+
+ <para>To change the cropping rectangle applications initialize the
+<structfield>type</structfield> and &v4l2-rect; substructure named
+<structfield>c</structfield> of a v4l2_crop structure and call the
+<constant>VIDIOC_S_CROP</constant> ioctl with a pointer to this
+structure.</para>
+
+<para>Do not use the multiplanar buffer types. Use <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>
+instead of <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant>
+and use <constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> instead of
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant>.</para>
+
+ <para>The driver first adjusts the requested dimensions against
+hardware limits, &ie; the bounds given by the capture/output window,
+and it rounds to the closest possible values of horizontal and
+vertical offset, width and height. In particular the driver must round
+the vertical offset of the cropping rectangle to frame lines modulo
+two, such that the field order cannot be confused.</para>
+
+ <para>Second the driver adjusts the image size (the opposite
+rectangle of the scaling process, source or target depending on the
+data direction) to the closest size possible while maintaining the
+current horizontal and vertical scaling factor.</para>
+
+ <para>Finally the driver programs the hardware with the actual
+cropping and image parameters. <constant>VIDIOC_S_CROP</constant> is a
+write-only ioctl, it does not return the actual parameters. To query
+them applications must call <constant>VIDIOC_G_CROP</constant> and
+&VIDIOC-G-FMT;. When the parameters are unsuitable the application may
+modify the cropping or image parameters and repeat the cycle until
+satisfactory parameters have been negotiated.</para>
+
+ <para>When cropping is not supported then no parameters are
+changed and <constant>VIDIOC_S_CROP</constant> returns the
+&EINVAL;.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-crop">
+ <title>struct <structname>v4l2_crop</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the data stream, set by the application.
+Only these types are valid here: <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>,
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> and
+<constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant>. See <xref linkend="v4l2-buf-type" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-rect;</entry>
+ <entry><structfield>c</structfield></entry>
+ <entry>Cropping rectangle. The same co-ordinate system as
+for &v4l2-cropcap; <structfield>bounds</structfield> is used.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-ctrl.xml b/Documentation/DocBook/media/v4l/vidioc-g-ctrl.xml
new file mode 100644
index 000000000..ee2820d6c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-ctrl.xml
@@ -0,0 +1,133 @@
+<refentry id="vidioc-g-ctrl">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_CTRL, VIDIOC_S_CTRL</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_CTRL</refname>
+ <refname>VIDIOC_S_CTRL</refname>
+ <refpurpose>Get or set the value of a control</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_control
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_CTRL, VIDIOC_S_CTRL</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To get the current value of a control applications
+initialize the <structfield>id</structfield> field of a struct
+<structname>v4l2_control</structname> and call the
+<constant>VIDIOC_G_CTRL</constant> ioctl with a pointer to this
+structure. To change the value of a control applications initialize
+the <structfield>id</structfield> and <structfield>value</structfield>
+fields of a struct <structname>v4l2_control</structname> and call the
+<constant>VIDIOC_S_CTRL</constant> ioctl.</para>
+
+ <para>When the <structfield>id</structfield> is invalid drivers
+return an &EINVAL;. When the <structfield>value</structfield> is out
+of bounds drivers can choose to take the closest valid value or return
+an &ERANGE;, whatever seems more appropriate. However,
+<constant>VIDIOC_S_CTRL</constant> is a write-only ioctl, it does not
+return the actual new value. If the <structfield>value</structfield>
+is inappropriate for the control (e.g. if it refers to an unsupported
+menu index of a menu control), then &EINVAL; is returned as well.</para>
+
+ <para>These ioctls work only with user controls. For other
+control classes the &VIDIOC-G-EXT-CTRLS;, &VIDIOC-S-EXT-CTRLS; or
+&VIDIOC-TRY-EXT-CTRLS; must be used.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-control">
+ <title>struct <structname>v4l2_control</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>Identifies the control, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>value</structfield></entry>
+ <entry>New value or current value.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-control; <structfield>id</structfield> is
+invalid or the <structfield>value</structfield> is inappropriate for
+the given control (i.e. if a menu item is selected that is not supported
+by the driver according to &VIDIOC-QUERYMENU;).</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ERANGE</errorcode></term>
+ <listitem>
+ <para>The &v4l2-control; <structfield>value</structfield>
+is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The control is temporarily not changeable, possibly
+because another applications took over control of the device function
+this control belongs to.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EACCES</errorcode></term>
+ <listitem>
+ <para>Attempt to set a read-only control or to get a
+ write-only control.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-dv-timings.xml b/Documentation/DocBook/media/v4l/vidioc-g-dv-timings.xml
new file mode 100644
index 000000000..764b635ed
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-dv-timings.xml
@@ -0,0 +1,341 @@
+<refentry id="vidioc-g-dv-timings">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_DV_TIMINGS, VIDIOC_S_DV_TIMINGS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_DV_TIMINGS</refname>
+ <refname>VIDIOC_S_DV_TIMINGS</refname>
+ <refpurpose>Get or set DV timings for input or output</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_dv_timings *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_DV_TIMINGS, VIDIOC_S_DV_TIMINGS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+ <para>To set DV timings for the input or output, applications use the
+<constant>VIDIOC_S_DV_TIMINGS</constant> ioctl and to get the current timings,
+applications use the <constant>VIDIOC_G_DV_TIMINGS</constant> ioctl. The detailed timing
+information is filled in using the structure &v4l2-dv-timings;. These ioctls take
+a pointer to the &v4l2-dv-timings; structure as argument. If the ioctl is not supported
+or the timing values are not correct, the driver returns &EINVAL;.</para>
+<para>The <filename>linux/v4l2-dv-timings.h</filename> header can be used to get the
+timings of the formats in the <xref linkend="cea861" /> and <xref linkend="vesadmt" />
+standards. If the current input or output does not support DV timings (e.g. if
+&VIDIOC-ENUMINPUT; does not set the <constant>V4L2_IN_CAP_DV_TIMINGS</constant> flag), then
+&ENODATA; is returned.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>This ioctl is not supported, or the
+<constant>VIDIOC_S_DV_TIMINGS</constant> parameter was unsuitable.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>Digital video timings are not supported for this input or output.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The device is busy and therefore can not change the timings.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <table pgwide="1" frame="none" id="v4l2-bt-timings">
+ <title>struct <structname>v4l2_bt_timings</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Width of the active video in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Height of the active video frame in lines. So for interlaced formats the
+ height of the active video in each field is <structfield>height</structfield>/2.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>interlaced</structfield></entry>
+ <entry>Progressive (0) or interlaced (1)</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>polarities</structfield></entry>
+ <entry>This is a bit mask that defines polarities of sync signals.
+bit 0 (V4L2_DV_VSYNC_POS_POL) is for vertical sync polarity and bit 1 (V4L2_DV_HSYNC_POS_POL) is for horizontal sync polarity. If the bit is set
+(1) it is positive polarity and if is cleared (0), it is negative polarity.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>pixelclock</structfield></entry>
+ <entry>Pixel clock in Hz. Ex. 74.25MHz->74250000</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>hfrontporch</structfield></entry>
+ <entry>Horizontal front porch in pixels</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>hsync</structfield></entry>
+ <entry>Horizontal sync length in pixels</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>hbackporch</structfield></entry>
+ <entry>Horizontal back porch in pixels</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>vfrontporch</structfield></entry>
+ <entry>Vertical front porch in lines. For interlaced formats this refers to the
+ odd field (aka field 1).</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>vsync</structfield></entry>
+ <entry>Vertical sync length in lines. For interlaced formats this refers to the
+ odd field (aka field 1).</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>vbackporch</structfield></entry>
+ <entry>Vertical back porch in lines. For interlaced formats this refers to the
+ odd field (aka field 1).</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>il_vfrontporch</structfield></entry>
+ <entry>Vertical front porch in lines for the even field (aka field 2) of
+ interlaced field formats. Must be 0 for progressive formats.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>il_vsync</structfield></entry>
+ <entry>Vertical sync length in lines for the even field (aka field 2) of
+ interlaced field formats. Must be 0 for progressive formats.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>il_vbackporch</structfield></entry>
+ <entry>Vertical back porch in lines for the even field (aka field 2) of
+ interlaced field formats. Must be 0 for progressive formats.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>standards</structfield></entry>
+ <entry>The video standard(s) this format belongs to. This will be filled in by
+ the driver. Applications must set this to 0. See <xref linkend="dv-bt-standards"/>
+ for a list of standards.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Several flags giving more information about the format.
+ See <xref linkend="dv-bt-flags"/> for a description of the flags.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-dv-timings">
+ <title>struct <structname>v4l2_dv_timings</structname></title>
+ <tgroup cols="4">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>Type of DV timings as listed in <xref linkend="dv-timing-types"/>.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield></structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-bt-timings;</entry>
+ <entry><structfield>bt</structfield></entry>
+ <entry>Timings defined by BT.656/1120 specifications</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[32]</entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="dv-timing-types">
+ <title>DV Timing types</title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>Timing type</entry>
+ <entry>value</entry>
+ <entry>Description</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_656_1120</entry>
+ <entry>0</entry>
+ <entry>BT.656/1120 timings</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <table pgwide="1" frame="none" id="dv-bt-standards">
+ <title>DV BT Timing standards</title>
+ <tgroup cols="2">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>Timing standard</entry>
+ <entry>Description</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_STD_CEA861</entry>
+ <entry>The timings follow the CEA-861 Digital TV Profile standard</entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_STD_DMT</entry>
+ <entry>The timings follow the VESA Discrete Monitor Timings standard</entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_STD_CVT</entry>
+ <entry>The timings follow the VESA Coordinated Video Timings standard</entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_BT_STD_GTF</entry>
+ <entry>The timings follow the VESA Generalized Timings Formula standard</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ <table pgwide="1" frame="none" id="dv-bt-flags">
+ <title>DV BT Timing flags</title>
+ <tgroup cols="2">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>Flag</entry>
+ <entry>Description</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_FL_REDUCED_BLANKING</entry>
+ <entry>CVT/GTF specific: the timings use reduced blanking (CVT) or the 'Secondary
+GTF' curve (GTF). In both cases the horizontal and/or vertical blanking
+intervals are reduced, allowing a higher resolution over the same
+bandwidth. This is a read-only flag, applications must not set this.
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_FL_CAN_REDUCE_FPS</entry>
+ <entry>CEA-861 specific: set for CEA-861 formats with a framerate that is a multiple
+of six. These formats can be optionally played at 1 / 1.001 speed to
+be compatible with 60 Hz based standards such as NTSC and PAL-M that use a framerate of
+29.97 frames per second. If the transmitter can't generate such frequencies, then the
+flag will also be cleared. This is a read-only flag, applications must not set this.
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_FL_REDUCED_FPS</entry>
+ <entry>CEA-861 specific: only valid for video transmitters, the flag is cleared
+by receivers. It is also only valid for formats with the V4L2_DV_FL_CAN_REDUCE_FPS flag
+set, for other formats the flag will be cleared by the driver.
+
+If the application sets this flag, then the pixelclock used to set up the transmitter is
+divided by 1.001 to make it compatible with NTSC framerates. If the transmitter
+can't generate such frequencies, then the flag will also be cleared.
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_FL_HALF_LINE</entry>
+ <entry>Specific to interlaced formats: if set, then the vertical frontporch
+of field 1 (aka the odd field) is really one half-line longer and the vertical backporch
+of field 2 (aka the even field) is really one half-line shorter, so each field has exactly
+the same number of half-lines. Whether half-lines can be detected or used depends on
+the hardware.
+ </entry>
+ </row>
+ <row>
+ <entry>V4L2_DV_FL_IS_CE_VIDEO</entry>
+ <entry>If set, then this is a Consumer Electronics (CE) video format.
+Such formats differ from other formats (commonly called IT formats) in that if
+R'G'B' encoding is used then by default the R'G'B' values use limited range
+(i.e. 16-235) as opposed to full range (i.e. 0-255). All formats defined in CEA-861
+except for the 640x480p59.94 format are CE formats.
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-edid.xml b/Documentation/DocBook/media/v4l/vidioc-g-edid.xml
new file mode 100644
index 000000000..6df40db4c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-edid.xml
@@ -0,0 +1,162 @@
+<refentry id="vidioc-g-edid">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_EDID, VIDIOC_S_EDID</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_EDID</refname>
+ <refname>VIDIOC_S_EDID</refname>
+ <refpurpose>Get or set the EDID of a video receiver/transmitter</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_edid *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_edid *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_EDID, VIDIOC_S_EDID</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+ <para>These ioctls can be used to get or set an EDID associated with an input
+ from a receiver or an output of a transmitter device. They can be
+ used with subdevice nodes (/dev/v4l-subdevX) or with video nodes (/dev/videoX).</para>
+
+ <para>When used with video nodes the <structfield>pad</structfield> field represents the
+ input (for video capture devices) or output (for video output devices) index as
+ is returned by &VIDIOC-ENUMINPUT; and &VIDIOC-ENUMOUTPUT; respectively. When used
+ with subdevice nodes the <structfield>pad</structfield> field represents the
+ input or output pad of the subdevice. If there is no EDID support for the given
+ <structfield>pad</structfield> value, then the &EINVAL; will be returned.</para>
+
+ <para>To get the EDID data the application has to fill in the <structfield>pad</structfield>,
+ <structfield>start_block</structfield>, <structfield>blocks</structfield> and <structfield>edid</structfield>
+ fields and call <constant>VIDIOC_G_EDID</constant>. The current EDID from block
+ <structfield>start_block</structfield> and of size <structfield>blocks</structfield>
+ will be placed in the memory <structfield>edid</structfield> points to. The <structfield>edid</structfield>
+ pointer must point to memory at least <structfield>blocks</structfield>&nbsp;*&nbsp;128 bytes
+ large (the size of one block is 128 bytes).</para>
+
+ <para>If there are fewer blocks than specified, then the driver will set <structfield>blocks</structfield>
+ to the actual number of blocks. If there are no EDID blocks available at all, then the error code
+ ENODATA is set.</para>
+
+ <para>If blocks have to be retrieved from the sink, then this call will block until they
+ have been read.</para>
+
+ <para>To set the EDID blocks of a receiver the application has to fill in the <structfield>pad</structfield>,
+ <structfield>blocks</structfield> and <structfield>edid</structfield> fields and set
+ <structfield>start_block</structfield> to 0. It is not possible to set part of an EDID,
+ it is always all or nothing. Setting the EDID data is only valid for receivers as it makes
+ no sense for a transmitter.</para>
+
+ <para>The driver assumes that the full EDID is passed in. If there are more EDID blocks than
+ the hardware can handle then the EDID is not written, but instead the error code E2BIG is set
+ and <structfield>blocks</structfield> is set to the maximum that the hardware supports.
+ If <structfield>start_block</structfield> is any
+ value other than 0 then the error code EINVAL is set.</para>
+
+ <para>To disable an EDID you set <structfield>blocks</structfield> to 0. Depending on the
+ hardware this will drive the hotplug pin low and/or block the source from reading the EDID
+ data in some way. In any case, the end result is the same: the EDID is no longer available.
+ </para>
+
+ <table pgwide="1" frame="none" id="v4l2-edid">
+ <title>struct <structname>v4l2_edid</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad for which to get/set the EDID blocks. When used with a video device
+ node the pad represents the input or output index as returned by
+ &VIDIOC-ENUMINPUT; and &VIDIOC-ENUMOUTPUT; respectively.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>start_block</structfield></entry>
+ <entry>Read the EDID from starting with this block. Must be 0 when setting
+ the EDID.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>blocks</structfield></entry>
+ <entry>The number of blocks to get or set. Must be less or equal to 256 (the
+ maximum number of blocks as defined by the standard). When you set the EDID and
+ <structfield>blocks</structfield> is 0, then the EDID is disabled or erased.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[5]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ <row>
+ <entry>__u8&nbsp;*</entry>
+ <entry><structfield>edid</structfield></entry>
+ <entry>Pointer to memory that contains the EDID. The minimum size is
+ <structfield>blocks</structfield>&nbsp;*&nbsp;128.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>The EDID data is not available.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>E2BIG</errorcode></term>
+ <listitem>
+ <para>The EDID data you provided is more than the hardware can handle.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-enc-index.xml b/Documentation/DocBook/media/v4l/vidioc-g-enc-index.xml
new file mode 100644
index 000000000..be25029a1
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-enc-index.xml
@@ -0,0 +1,189 @@
+<refentry id="vidioc-g-enc-index">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_ENC_INDEX</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_ENC_INDEX</refname>
+ <refpurpose>Get meta data about a compressed video stream</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_enc_idx *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_ENC_INDEX</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>The <constant>VIDIOC_G_ENC_INDEX</constant> ioctl provides
+meta data about a compressed video stream the same or another
+application currently reads from the driver, which is useful for
+random access into the stream without decoding it.</para>
+
+ <para>To read the data applications must call
+<constant>VIDIOC_G_ENC_INDEX</constant> with a pointer to a
+&v4l2-enc-idx;. On success the driver fills the
+<structfield>entry</structfield> array, stores the number of elements
+written in the <structfield>entries</structfield> field, and
+initializes the <structfield>entries_cap</structfield> field.</para>
+
+ <para>Each element of the <structfield>entry</structfield> array
+contains meta data about one picture. A
+<constant>VIDIOC_G_ENC_INDEX</constant> call reads up to
+<constant>V4L2_ENC_IDX_ENTRIES</constant> entries from a driver
+buffer, which can hold up to <structfield>entries_cap</structfield>
+entries. This number can be lower or higher than
+<constant>V4L2_ENC_IDX_ENTRIES</constant>, but not zero. When the
+application fails to read the meta data in time the oldest entries
+will be lost. When the buffer is empty or no capturing/encoding is in
+progress, <structfield>entries</structfield> will be zero.</para>
+
+ <para>Currently this ioctl is only defined for MPEG-2 program
+streams and video elementary streams.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-enc-idx">
+ <title>struct <structname>v4l2_enc_idx</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>entries</structfield></entry>
+ <entry>The number of entries the driver stored in the
+<structfield>entry</structfield> array.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>entries_cap</structfield></entry>
+ <entry>The number of entries the driver can
+buffer. Must be greater than zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry spanname="hspan">Reserved for future extensions.
+Drivers must set the array to zero.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-enc-idx-entry;</entry>
+ <entry><structfield>entry</structfield>[<constant>V4L2_ENC_IDX_ENTRIES</constant>]</entry>
+ <entry>Meta data about a compressed video stream. Each
+element of the array corresponds to one picture, sorted in ascending
+order by their <structfield>offset</structfield>.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-enc-idx-entry">
+ <title>struct <structname>v4l2_enc_idx_entry</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>offset</structfield></entry>
+ <entry>The offset in bytes from the beginning of the
+compressed video stream to the beginning of this picture, that is a
+<wordasword>PES packet header</wordasword> as defined in <xref
+ linkend="mpeg2part1" /> or a <wordasword>picture
+header</wordasword> as defined in <xref linkend="mpeg2part2" />. When
+the encoder is stopped, the driver resets the offset to zero.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>pts</structfield></entry>
+ <entry>The 33 bit <wordasword>Presentation Time
+Stamp</wordasword> of this picture as defined in <xref
+ linkend="mpeg2part1" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>length</structfield></entry>
+ <entry>The length of this picture in bytes.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags containing the coding type of this picture, see <xref
+ linkend="enc-idx-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions.
+Drivers must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="enc-idx-flags">
+ <title>Index Entry Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_ENC_IDX_FRAME_I</constant></entry>
+ <entry>0x00</entry>
+ <entry>This is an Intra-coded picture.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_ENC_IDX_FRAME_P</constant></entry>
+ <entry>0x01</entry>
+ <entry>This is a Predictive-coded picture.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_ENC_IDX_FRAME_B</constant></entry>
+ <entry>0x02</entry>
+ <entry>This is a Bidirectionally predictive-coded
+picture.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_ENC_IDX_FRAME_MASK</constant></entry>
+ <entry>0x0F</entry>
+ <entry><wordasword>AND</wordasword> the flags field with
+this mask to obtain the picture coding type.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml b/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml
new file mode 100644
index 000000000..c5bdbfcc4
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-ext-ctrls.xml
@@ -0,0 +1,427 @@
+<refentry id="vidioc-g-ext-ctrls">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_EXT_CTRLS, VIDIOC_S_EXT_CTRLS,
+VIDIOC_TRY_EXT_CTRLS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_EXT_CTRLS</refname>
+ <refname>VIDIOC_S_EXT_CTRLS</refname>
+ <refname>VIDIOC_TRY_EXT_CTRLS</refname>
+ <refpurpose>Get or set the value of several controls, try control
+values</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_ext_controls
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_EXT_CTRLS, VIDIOC_S_EXT_CTRLS,
+VIDIOC_TRY_EXT_CTRLS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>These ioctls allow the caller to get or set multiple
+controls atomically. Control IDs are grouped into control classes (see
+<xref linkend="ctrl-class" />) and all controls in the control array
+must belong to the same control class.</para>
+
+ <para>Applications must always fill in the
+<structfield>count</structfield>,
+<structfield>ctrl_class</structfield>,
+<structfield>controls</structfield> and
+<structfield>reserved</structfield> fields of &v4l2-ext-controls;, and
+initialize the &v4l2-ext-control; array pointed to by the
+<structfield>controls</structfield> fields.</para>
+
+ <para>To get the current value of a set of controls applications
+initialize the <structfield>id</structfield>,
+<structfield>size</structfield> and <structfield>reserved2</structfield> fields
+of each &v4l2-ext-control; and call the
+<constant>VIDIOC_G_EXT_CTRLS</constant> ioctl. String controls controls
+must also set the <structfield>string</structfield> field. Controls
+of compound types (<constant>V4L2_CTRL_FLAG_HAS_PAYLOAD</constant> is set)
+must set the <structfield>ptr</structfield> field.</para>
+
+ <para>If the <structfield>size</structfield> is too small to
+receive the control result (only relevant for pointer-type controls
+like strings), then the driver will set <structfield>size</structfield>
+to a valid value and return an &ENOSPC;. You should re-allocate the
+memory to this new size and try again. For the string type it is possible that
+the same issue occurs again if the string has grown in the meantime. It is
+recommended to call &VIDIOC-QUERYCTRL; first and use
+<structfield>maximum</structfield>+1 as the new <structfield>size</structfield>
+value. It is guaranteed that that is sufficient memory.
+</para>
+
+ <para>N-dimensional arrays are set and retrieved row-by-row. You cannot set a partial
+array, all elements have to be set or retrieved. The total size is calculated
+as <structfield>elems</structfield> * <structfield>elem_size</structfield>.
+These values can be obtained by calling &VIDIOC-QUERY-EXT-CTRL;.</para>
+
+ <para>To change the value of a set of controls applications
+initialize the <structfield>id</structfield>, <structfield>size</structfield>,
+<structfield>reserved2</structfield> and
+<structfield>value/value64/string/ptr</structfield> fields of each &v4l2-ext-control; and
+call the <constant>VIDIOC_S_EXT_CTRLS</constant> ioctl. The controls
+will only be set if <emphasis>all</emphasis> control values are
+valid.</para>
+
+ <para>To check if a set of controls have correct values applications
+initialize the <structfield>id</structfield>, <structfield>size</structfield>,
+<structfield>reserved2</structfield> and
+<structfield>value/value64/string/ptr</structfield> fields of each &v4l2-ext-control; and
+call the <constant>VIDIOC_TRY_EXT_CTRLS</constant> ioctl. It is up to
+the driver whether wrong values are automatically adjusted to a valid
+value or if an error is returned.</para>
+
+ <para>When the <structfield>id</structfield> or
+<structfield>ctrl_class</structfield> is invalid drivers return an
+&EINVAL;. When the value is out of bounds drivers can choose to take
+the closest valid value or return an &ERANGE;, whatever seems more
+appropriate. In the first case the new value is set in
+&v4l2-ext-control;. If the new control value is inappropriate (e.g. the
+given menu index is not supported by the menu control), then this will
+also result in an &EINVAL; error.</para>
+
+ <para>The driver will only set/get these controls if all control
+values are correct. This prevents the situation where only some of the
+controls were set/get. Only low-level errors (&eg; a failed i2c
+command) can still cause this situation.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-ext-control">
+ <title>struct <structname>v4l2_ext_control</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry></entry>
+ <entry>Identifies the control, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>size</structfield></entry>
+ <entry></entry>
+ <entry>The total size in bytes of the payload of this
+control. This is normally 0, but for pointer controls this should be
+set to the size of the memory containing the payload, or that will
+receive the payload. If <constant>VIDIOC_G_EXT_CTRLS</constant> finds
+that this value is less than is required to store
+the payload result, then it is set to a value large enough to store the
+payload result and ENOSPC is returned. Note that for string controls
+this <structfield>size</structfield> field should not be confused with the length of the string.
+This field refers to the size of the memory that contains the string.
+The actual <emphasis>length</emphasis> of the string may well be much smaller.
+</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved2</structfield>[1]</entry>
+ <entry></entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry>(anonymous)</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__s32</entry>
+ <entry><structfield>value</structfield></entry>
+ <entry>New value or current value. Valid if this control is not of
+type <constant>V4L2_CTRL_TYPE_INTEGER64</constant> and
+<constant>V4L2_CTRL_FLAG_HAS_PAYLOAD</constant> is not set.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__s64</entry>
+ <entry><structfield>value64</structfield></entry>
+ <entry>New value or current value. Valid if this control is of
+type <constant>V4L2_CTRL_TYPE_INTEGER64</constant> and
+<constant>V4L2_CTRL_FLAG_HAS_PAYLOAD</constant> is not set.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>char *</entry>
+ <entry><structfield>string</structfield></entry>
+ <entry>A pointer to a string. Valid if this control is of
+type <constant>V4L2_CTRL_TYPE_STRING</constant>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u8 *</entry>
+ <entry><structfield>p_u8</structfield></entry>
+ <entry>A pointer to a matrix control of unsigned 8-bit values.
+Valid if this control is of type <constant>V4L2_CTRL_TYPE_U8</constant>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u16 *</entry>
+ <entry><structfield>p_u16</structfield></entry>
+ <entry>A pointer to a matrix control of unsigned 16-bit values.
+Valid if this control is of type <constant>V4L2_CTRL_TYPE_U16</constant>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>void *</entry>
+ <entry><structfield>ptr</structfield></entry>
+ <entry>A pointer to a compound type which can be an N-dimensional array and/or a
+compound type (the control's type is >= <constant>V4L2_CTRL_COMPOUND_TYPES</constant>).
+Valid if <constant>V4L2_CTRL_FLAG_HAS_PAYLOAD</constant> is set for this control.
+</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-ext-controls">
+ <title>struct <structname>v4l2_ext_controls</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>ctrl_class</structfield></entry>
+ <entry>The control class to which all controls belong, see
+<xref linkend="ctrl-class" />. Drivers that use a kernel framework for handling
+controls will also accept a value of 0 here, meaning that the controls can
+belong to any control class. Whether drivers support this can be tested by setting
+<structfield>ctrl_class</structfield> to 0 and calling <constant>VIDIOC_TRY_EXT_CTRLS</constant>
+with a <structfield>count</structfield> of 0. If that succeeds, then the driver
+supports this feature.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>count</structfield></entry>
+ <entry>The number of controls in the controls array. May
+also be zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>error_idx</structfield></entry>
+ <entry><para>Set by the driver in case of an error. If the error is
+associated with a particular control, then <structfield>error_idx</structfield>
+is set to the index of that control. If the error is not related to a specific
+control, or the validation step failed (see below), then
+<structfield>error_idx</structfield> is set to <structfield>count</structfield>.
+The value is undefined if the ioctl returned 0 (success).</para>
+
+<para>Before controls are read from/written to hardware a validation step
+takes place: this checks if all controls in the list are valid controls,
+if no attempt is made to write to a read-only control or read from a write-only
+control, and any other up-front checks that can be done without accessing the
+hardware. The exact validations done during this step are driver dependent
+since some checks might require hardware access for some devices, thus making
+it impossible to do those checks up-front. However, drivers should make a
+best-effort to do as many up-front checks as possible.</para>
+
+<para>This check is done to avoid leaving the hardware in an inconsistent state due
+to easy-to-avoid problems. But it leads to another problem: the application needs to
+know whether an error came from the validation step (meaning that the hardware
+was not touched) or from an error during the actual reading from/writing to hardware.</para>
+
+<para>The, in hindsight quite poor, solution for that is to set <structfield>error_idx</structfield>
+to <structfield>count</structfield> if the validation failed. This has the
+unfortunate side-effect that it is not possible to see which control failed the
+validation. If the validation was successful and the error happened while
+accessing the hardware, then <structfield>error_idx</structfield> is less than
+<structfield>count</structfield> and only the controls up to
+<structfield>error_idx-1</structfield> were read or written correctly, and the
+state of the remaining controls is undefined.</para>
+
+<para>Since <constant>VIDIOC_TRY_EXT_CTRLS</constant> does not access hardware
+there is also no need to handle the validation step in this special way,
+so <structfield>error_idx</structfield> will just be set to the control that
+failed the validation step instead of to <structfield>count</structfield>.
+This means that if <constant>VIDIOC_S_EXT_CTRLS</constant> fails with
+<structfield>error_idx</structfield> set to <structfield>count</structfield>,
+then you can call <constant>VIDIOC_TRY_EXT_CTRLS</constant> to try to discover
+the actual control that failed the validation step. Unfortunately, there
+is no <constant>TRY</constant> equivalent for <constant>VIDIOC_G_EXT_CTRLS</constant>.
+</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-ext-control; *</entry>
+ <entry><structfield>controls</structfield></entry>
+ <entry>Pointer to an array of
+<structfield>count</structfield> v4l2_ext_control structures. Ignored
+if <structfield>count</structfield> equals zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="ctrl-class">
+ <title>Control classes</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_USER</constant></entry>
+ <entry>0x980000</entry>
+ <entry>The class containing user controls. These controls
+are described in <xref linkend="control" />. All controls that can be set
+using the &VIDIOC-S-CTRL; and &VIDIOC-G-CTRL; ioctl belong to this
+class.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_MPEG</constant></entry>
+ <entry>0x990000</entry>
+ <entry>The class containing MPEG compression controls.
+These controls are described in <xref
+ linkend="mpeg-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_CAMERA</constant></entry>
+ <entry>0x9a0000</entry>
+ <entry>The class containing camera controls.
+These controls are described in <xref
+ linkend="camera-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_FM_TX</constant></entry>
+ <entry>0x9b0000</entry>
+ <entry>The class containing FM Transmitter (FM TX) controls.
+These controls are described in <xref
+ linkend="fm-tx-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_FLASH</constant></entry>
+ <entry>0x9c0000</entry>
+ <entry>The class containing flash device controls.
+These controls are described in <xref
+ linkend="flash-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_JPEG</constant></entry>
+ <entry>0x9d0000</entry>
+ <entry>The class containing JPEG compression controls.
+These controls are described in <xref
+ linkend="jpeg-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_IMAGE_SOURCE</constant></entry>
+ <entry>0x9e0000</entry> <entry>The class containing image
+ source controls. These controls are described in <xref
+ linkend="image-source-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_IMAGE_PROC</constant></entry>
+ <entry>0x9f0000</entry> <entry>The class containing image
+ processing controls. These controls are described in <xref
+ linkend="image-process-controls" />.</entry>
+ </row>
+
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_FM_RX</constant></entry>
+ <entry>0xa10000</entry>
+ <entry>The class containing FM Receiver (FM RX) controls.
+These controls are described in <xref
+ linkend="fm-rx-controls" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_CLASS_RF_TUNER</constant></entry>
+ <entry>0xa20000</entry>
+ <entry>The class containing RF tuner controls.
+These controls are described in <xref linkend="rf-tuner-controls" />.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-ext-control; <structfield>id</structfield>
+is invalid, the &v4l2-ext-controls;
+<structfield>ctrl_class</structfield> is invalid, or the &v4l2-ext-control;
+<structfield>value</structfield> was inappropriate (e.g. the given menu
+index is not supported by the driver). This error code is
+also returned by the <constant>VIDIOC_S_EXT_CTRLS</constant> and
+<constant>VIDIOC_TRY_EXT_CTRLS</constant> ioctls if two or more
+control values are in conflict.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ERANGE</errorcode></term>
+ <listitem>
+ <para>The &v4l2-ext-control; <structfield>value</structfield>
+is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The control is temporarily not changeable, possibly
+because another applications took over control of the device function
+this control belongs to.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOSPC</errorcode></term>
+ <listitem>
+ <para>The space reserved for the control's payload is insufficient.
+The field <structfield>size</structfield> is set to a value that is enough
+to store the payload and this error code is returned.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EACCES</errorcode></term>
+ <listitem>
+ <para>Attempt to try or set a read-only control or to get a
+ write-only control.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
+
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml b/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml
new file mode 100644
index 000000000..77607cc19
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-fbuf.xml
@@ -0,0 +1,459 @@
+<refentry id="vidioc-g-fbuf">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_FBUF, VIDIOC_S_FBUF</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_FBUF</refname>
+ <refname>VIDIOC_S_FBUF</refname>
+ <refpurpose>Get or set frame buffer overlay parameters</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_framebuffer *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_framebuffer *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_FBUF, VIDIOC_S_FBUF</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Applications can use the <constant>VIDIOC_G_FBUF</constant> and
+<constant>VIDIOC_S_FBUF</constant> ioctl to get and set the
+framebuffer parameters for a <link linkend="overlay">Video
+Overlay</link> or <link linkend="osd">Video Output Overlay</link>
+(OSD). The type of overlay is implied by the device type (capture or
+output device) and can be determined with the &VIDIOC-QUERYCAP; ioctl.
+One <filename>/dev/videoN</filename> device must not support both
+kinds of overlay.</para>
+
+ <para>The V4L2 API distinguishes destructive and non-destructive
+overlays. A destructive overlay copies captured video images into the
+video memory of a graphics card. A non-destructive overlay blends
+video images into a VGA signal or graphics into a video signal.
+<wordasword>Video Output Overlays</wordasword> are always
+non-destructive.</para>
+
+ <para>To get the current parameters applications call the
+<constant>VIDIOC_G_FBUF</constant> ioctl with a pointer to a
+<structname>v4l2_framebuffer</structname> structure. The driver fills
+all fields of the structure or returns an &EINVAL; when overlays are
+not supported.</para>
+
+ <para>To set the parameters for a <wordasword>Video Output
+Overlay</wordasword>, applications must initialize the
+<structfield>flags</structfield> field of a struct
+<structname>v4l2_framebuffer</structname>. Since the framebuffer is
+implemented on the TV card all other parameters are determined by the
+driver. When an application calls <constant>VIDIOC_S_FBUF</constant>
+with a pointer to this structure, the driver prepares for the overlay
+and returns the framebuffer parameters as
+<constant>VIDIOC_G_FBUF</constant> does, or it returns an error
+code.</para>
+
+ <para>To set the parameters for a <wordasword>non-destructive
+Video Overlay</wordasword>, applications must initialize the
+<structfield>flags</structfield> field, the
+<structfield>fmt</structfield> substructure, and call
+<constant>VIDIOC_S_FBUF</constant>. Again the driver prepares for the
+overlay and returns the framebuffer parameters as
+<constant>VIDIOC_G_FBUF</constant> does, or it returns an error
+code.</para>
+
+ <para>For a <wordasword>destructive Video Overlay</wordasword>
+applications must additionally provide a
+<structfield>base</structfield> address. Setting up a DMA to a
+random memory location can jeopardize the system security, its
+stability or even damage the hardware, therefore only the superuser
+can set the parameters for a destructive video overlay.</para>
+
+ <!-- NB v4l2_pix_format is also specified in pixfmt.sgml.-->
+
+ <table pgwide="1" frame="none" id="v4l2-framebuffer">
+ <title>struct <structname>v4l2_framebuffer</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry></entry>
+ <entry>Overlay capability flags set by the driver, see
+<xref linkend="framebuffer-cap" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry></entry>
+ <entry>Overlay control flags set by application and
+driver, see <xref linkend="framebuffer-flags" /></entry>
+ </row>
+ <row>
+ <entry>void *</entry>
+ <entry><structfield>base</structfield></entry>
+ <entry></entry>
+ <entry>Physical base address of the framebuffer,
+that is the address of the pixel in the top left corner of the
+framebuffer.<footnote><para>A physical base address may not suit all
+platforms. GK notes in theory we should pass something like PCI device
++ memory region + offset instead. If you encounter problems please
+discuss on the linux-media mailing list: &v4l-ml;.</para></footnote></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>This field is irrelevant to
+<wordasword>non-destructive Video Overlays</wordasword>. For
+<wordasword>destructive Video Overlays</wordasword> applications must
+provide a base address. The driver may accept only base addresses
+which are a multiple of two, four or eight bytes. For
+<wordasword>Video Output Overlays</wordasword> the driver must return
+a valid base address, so applications can find the corresponding Linux
+framebuffer device (see <xref linkend="osd" />).</entry>
+ </row>
+ <row>
+ <entry>struct</entry>
+ <entry><structfield>fmt</structfield></entry>
+ <entry></entry>
+ <entry>Layout of the frame buffer.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Width of the frame buffer in pixels.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Height of the frame buffer in pixels.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>pixelformat</structfield></entry>
+ <entry>The pixel format of the
+framebuffer.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>For <wordasword>non-destructive Video
+Overlays</wordasword> this field only defines a format for the
+&v4l2-window; <structfield>chromakey</structfield> field.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>For <wordasword>destructive Video
+Overlays</wordasword> applications must initialize this field. For
+<wordasword>Video Output Overlays</wordasword> the driver must return
+a valid format.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ <entry>Usually this is an RGB format (for example
+<link linkend="V4L2-PIX-FMT-RGB565"><constant>V4L2_PIX_FMT_RGB565</constant></link>)
+but YUV formats (only packed YUV formats when chroma keying is used,
+not including <constant>V4L2_PIX_FMT_YUYV</constant> and
+<constant>V4L2_PIX_FMT_UYVY</constant>) and the
+<constant>V4L2_PIX_FMT_PAL8</constant> format are also permitted. The
+behavior of the driver when an application requests a compressed
+format is undefined. See <xref linkend="pixfmt" /> for information on
+pixel formats.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-field;</entry>
+ <entry><structfield>field</structfield></entry>
+ <entry>Drivers and applications shall ignore this field.
+If applicable, the field order is selected with the &VIDIOC-S-FMT;
+ioctl, using the <structfield>field</structfield> field of
+&v4l2-window;.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>bytesperline</structfield></entry>
+ <entry>Distance in bytes between the leftmost pixels in
+two adjacent lines.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>This field is irrelevant to
+<wordasword>non-destructive Video
+Overlays</wordasword>.</para><para>For <wordasword>destructive Video
+Overlays</wordasword> both applications and drivers can set this field
+to request padding bytes at the end of each line. Drivers however may
+ignore the requested value, returning <structfield>width</structfield>
+times bytes-per-pixel or a larger value required by the hardware. That
+implies applications can just set this field to zero to get a
+reasonable default.</para><para>For <wordasword>Video Output
+Overlays</wordasword> the driver must return a valid
+value.</para><para>Video hardware may access padding bytes, therefore
+they must reside in accessible memory. Consider for example the case
+where padding bytes after the last line of an image cross a system
+page boundary. Capture devices may write padding bytes, the value is
+undefined. Output devices ignore the contents of padding
+bytes.</para><para>When the image format is planar the
+<structfield>bytesperline</structfield> value applies to the first
+plane and is divided by the same factor as the
+<structfield>width</structfield> field for the other planes. For
+example the Cb and Cr planes of a YUV 4:2:0 image have half as many
+padding bytes following each line as the Y plane. To avoid ambiguities
+drivers must return a <structfield>bytesperline</structfield> value
+rounded up to a multiple of the scale factor.</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>sizeimage</structfield></entry>
+ <entry><para>This field is irrelevant to
+<wordasword>non-destructive Video Overlays</wordasword>. For
+<wordasword>destructive Video Overlays</wordasword> applications must
+initialize this field. For <wordasword>Video Output
+Overlays</wordasword> the driver must return a valid
+format.</para><para>Together with <structfield>base</structfield> it
+defines the framebuffer memory accessible by the
+driver.</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-colorspace;</entry>
+ <entry><structfield>colorspace</structfield></entry>
+ <entry>This information supplements the
+<structfield>pixelformat</structfield> and must be set by the driver,
+see <xref linkend="colorspaces" />.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u32</entry>
+ <entry><structfield>priv</structfield></entry>
+ <entry>Reserved. Drivers and applications must set this field to
+zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="framebuffer-cap">
+ <title>Frame Buffer Capability Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_EXTERNOVERLAY</constant></entry>
+ <entry>0x0001</entry>
+ <entry>The device is capable of non-destructive overlays.
+When the driver clears this flag, only destructive overlays are
+supported. There are no drivers yet which support both destructive and
+non-destructive overlays. Video Output Overlays are in practice always
+non-destructive.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_CHROMAKEY</constant></entry>
+ <entry>0x0002</entry>
+ <entry>The device supports clipping by chroma-keying the
+images. That is, image pixels replace pixels in the VGA or video
+signal only where the latter assume a certain color. Chroma-keying
+makes no sense for destructive overlays.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_LIST_CLIPPING</constant></entry>
+ <entry>0x0004</entry>
+ <entry>The device supports clipping using a list of clip
+rectangles.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_BITMAP_CLIPPING</constant></entry>
+ <entry>0x0008</entry>
+ <entry>The device supports clipping using a bit mask.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_LOCAL_ALPHA</constant></entry>
+ <entry>0x0010</entry>
+ <entry>The device supports clipping/blending using the
+alpha channel of the framebuffer or VGA signal. Alpha blending makes
+no sense for destructive overlays.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_GLOBAL_ALPHA</constant></entry>
+ <entry>0x0020</entry>
+ <entry>The device supports alpha blending using a global
+alpha value. Alpha blending makes no sense for destructive overlays.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_LOCAL_INV_ALPHA</constant></entry>
+ <entry>0x0040</entry>
+ <entry>The device supports clipping/blending using the
+inverted alpha channel of the framebuffer or VGA signal. Alpha
+blending makes no sense for destructive overlays.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_CAP_SRC_CHROMAKEY</constant></entry>
+ <entry>0x0080</entry>
+ <entry>The device supports Source Chroma-keying. Video pixels
+with the chroma-key colors are replaced by framebuffer pixels, which is exactly opposite of
+<constant>V4L2_FBUF_CAP_CHROMAKEY</constant></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="framebuffer-flags">
+ <title>Frame Buffer Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_PRIMARY</constant></entry>
+ <entry>0x0001</entry>
+ <entry>The framebuffer is the primary graphics surface.
+In other words, the overlay is destructive. This flag is typically set by any
+driver that doesn't have the <constant>V4L2_FBUF_CAP_EXTERNOVERLAY</constant>
+capability and it is cleared otherwise.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_OVERLAY</constant></entry>
+ <entry>0x0002</entry>
+ <entry>If this flag is set for a video capture device, then the
+driver will set the initial overlay size to cover the full framebuffer size,
+otherwise the existing overlay size (as set by &VIDIOC-S-FMT;) will be used.
+
+Only one video capture driver (bttv) supports this flag. The use of this flag
+for capture devices is deprecated. There is no way to detect which drivers
+support this flag, so the only reliable method of setting the overlay size is
+through &VIDIOC-S-FMT;.
+
+If this flag is set for a video output device, then the video output overlay
+window is relative to the top-left corner of the framebuffer and restricted
+to the size of the framebuffer. If it is cleared, then the video output
+overlay window is relative to the video output display.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_CHROMAKEY</constant></entry>
+ <entry>0x0004</entry>
+ <entry>Use chroma-keying. The chroma-key color is
+determined by the <structfield>chromakey</structfield> field of
+&v4l2-window; and negotiated with the &VIDIOC-S-FMT; ioctl, see <xref
+ linkend="overlay" />
+and
+ <xref linkend="osd" />.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan">There are no flags to enable
+clipping using a list of clip rectangles or a bitmap. These methods
+are negotiated with the &VIDIOC-S-FMT; ioctl, see <xref
+ linkend="overlay" /> and <xref linkend="osd" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_LOCAL_ALPHA</constant></entry>
+ <entry>0x0008</entry>
+ <entry>Use the alpha channel of the framebuffer to clip or
+blend framebuffer pixels with video images. The blend
+function is: output = framebuffer pixel * alpha + video pixel * (1 -
+alpha). The actual alpha depth depends on the framebuffer pixel
+format.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_GLOBAL_ALPHA</constant></entry>
+ <entry>0x0010</entry>
+ <entry>Use a global alpha value to blend the framebuffer
+with video images. The blend function is: output = (framebuffer pixel
+* alpha + video pixel * (255 - alpha)) / 255. The alpha value is
+determined by the <structfield>global_alpha</structfield> field of
+&v4l2-window; and negotiated with the &VIDIOC-S-FMT; ioctl, see <xref
+ linkend="overlay" />
+and <xref linkend="osd" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_LOCAL_INV_ALPHA</constant></entry>
+ <entry>0x0020</entry>
+ <entry>Like
+<constant>V4L2_FBUF_FLAG_LOCAL_ALPHA</constant>, use the alpha channel
+of the framebuffer to clip or blend framebuffer pixels with video
+images, but with an inverted alpha value. The blend function is:
+output = framebuffer pixel * (1 - alpha) + video pixel * alpha. The
+actual alpha depth depends on the framebuffer pixel format.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_FBUF_FLAG_SRC_CHROMAKEY</constant></entry>
+ <entry>0x0040</entry>
+ <entry>Use source chroma-keying. The source chroma-key color is
+determined by the <structfield>chromakey</structfield> field of
+&v4l2-window; and negotiated with the &VIDIOC-S-FMT; ioctl, see <xref
+linkend="overlay" /> and <xref linkend="osd" />.
+Both chroma-keying are mutual exclusive to each other, so same
+<structfield>chromakey</structfield> field of &v4l2-window; is being used.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EPERM</errorcode></term>
+ <listitem>
+ <para><constant>VIDIOC_S_FBUF</constant> can only be called
+by a privileged user to negotiate the parameters for a destructive
+overlay.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <constant>VIDIOC_S_FBUF</constant> parameters are unsuitable.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-fmt.xml b/Documentation/DocBook/media/v4l/vidioc-g-fmt.xml
new file mode 100644
index 000000000..4fe19a7a9
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-fmt.xml
@@ -0,0 +1,204 @@
+<refentry id="vidioc-g-fmt">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_FMT, VIDIOC_S_FMT,
+VIDIOC_TRY_FMT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_FMT</refname>
+ <refname>VIDIOC_S_FMT</refname>
+ <refname>VIDIOC_TRY_FMT</refname>
+ <refpurpose>Get or set the data format, try a format</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_format
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_FMT, VIDIOC_S_FMT, VIDIOC_TRY_FMT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>These ioctls are used to negotiate the format of data
+(typically image format) exchanged between driver and
+application.</para>
+
+ <para>To query the current parameters applications set the
+<structfield>type</structfield> field of a struct
+<structname>v4l2_format</structname> to the respective buffer (stream)
+type. For example video capture devices use
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant> or
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant>. When the application
+calls the <constant>VIDIOC_G_FMT</constant> ioctl with a pointer to
+this structure the driver fills the respective member of the
+<structfield>fmt</structfield> union. In case of video capture devices
+that is either the &v4l2-pix-format; <structfield>pix</structfield> or
+the &v4l2-pix-format-mplane; <structfield>pix_mp</structfield> member.
+When the requested buffer type is not supported drivers return an
+&EINVAL;.</para>
+
+ <para>To change the current format parameters applications
+initialize the <structfield>type</structfield> field and all
+fields of the respective <structfield>fmt</structfield>
+union member. For details see the documentation of the various devices
+types in <xref linkend="devices" />. Good practice is to query the
+current parameters first, and to
+modify only those parameters not suitable for the application. When
+the application calls the <constant>VIDIOC_S_FMT</constant> ioctl
+with a pointer to a <structname>v4l2_format</structname> structure
+the driver checks
+and adjusts the parameters against hardware abilities. Drivers
+should not return an error code unless the <structfield>type</structfield> field is invalid, this is
+a mechanism to fathom device capabilities and to approach parameters
+acceptable for both the application and driver. On success the driver
+may program the hardware, allocate resources and generally prepare for
+data exchange.
+Finally the <constant>VIDIOC_S_FMT</constant> ioctl returns the
+current format parameters as <constant>VIDIOC_G_FMT</constant> does.
+Very simple, inflexible devices may even ignore all input and always
+return the default parameters. However all V4L2 devices exchanging
+data with the application must implement the
+<constant>VIDIOC_G_FMT</constant> and
+<constant>VIDIOC_S_FMT</constant> ioctl. When the requested buffer
+type is not supported drivers return an &EINVAL; on a
+<constant>VIDIOC_S_FMT</constant> attempt. When I/O is already in
+progress or the resource is not available for other reasons drivers
+return the &EBUSY;.</para>
+
+ <para>The <constant>VIDIOC_TRY_FMT</constant> ioctl is equivalent
+to <constant>VIDIOC_S_FMT</constant> with one exception: it does not
+change driver state. It can also be called at any time, never
+returning <errorcode>EBUSY</errorcode>. This function is provided to
+negotiate parameters, to learn about hardware limitations, without
+disabling I/O or possibly time consuming hardware preparations.
+Although strongly recommended drivers are not required to implement
+this ioctl.</para>
+
+ <para>The format as returned by <constant>VIDIOC_TRY_FMT</constant>
+must be identical to what <constant>VIDIOC_S_FMT</constant> returns for
+the same input or output.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-format">
+ <title>struct <structname>v4l2_format</structname></title>
+ <tgroup cols="4">
+ <colspec colname="c1" />
+ <colspec colname="c2" />
+ <colspec colname="c3" />
+ <colspec colname="c4" />
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>Type of the data stream, see <xref
+ linkend="v4l2-buf-type" />.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield>fmt</structfield></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-pix-format;</entry>
+ <entry><structfield>pix</structfield></entry>
+ <entry>Definition of an image format, see <xref
+ linkend="pixfmt" />, used by video capture and output
+devices.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-pix-format-mplane;</entry>
+ <entry><structfield>pix_mp</structfield></entry>
+ <entry>Definition of an image format, see <xref
+ linkend="pixfmt" />, used by video capture and output
+devices that support the <link linkend="planar-apis">multi-planar
+version of the API</link>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-window;</entry>
+ <entry><structfield>win</structfield></entry>
+ <entry>Definition of an overlaid image, see <xref
+ linkend="overlay" />, used by video overlay devices.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-vbi-format;</entry>
+ <entry><structfield>vbi</structfield></entry>
+ <entry>Raw VBI capture or output parameters. This is
+discussed in more detail in <xref linkend="raw-vbi" />. Used by raw VBI
+capture and output devices.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-sliced-vbi-format;</entry>
+ <entry><structfield>sliced</structfield></entry>
+ <entry>Sliced VBI capture or output parameters. See
+<xref linkend="sliced" /> for details. Used by sliced VBI
+capture and output devices.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-sdr-format;</entry>
+ <entry><structfield>sdr</structfield></entry>
+ <entry>Definition of a data format, see
+<xref linkend="pixfmt" />, used by SDR capture devices.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u8</entry>
+ <entry><structfield>raw_data</structfield>[200]</entry>
+ <entry>Place holder for future extensions.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-format; <structfield>type</structfield>
+field is invalid or the requested buffer type not supported.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-frequency.xml b/Documentation/DocBook/media/v4l/vidioc-g-frequency.xml
new file mode 100644
index 000000000..d1034fb61
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-frequency.xml
@@ -0,0 +1,148 @@
+<refentry id="vidioc-g-frequency">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_FREQUENCY, VIDIOC_S_FREQUENCY</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_FREQUENCY</refname>
+ <refname>VIDIOC_S_FREQUENCY</refname>
+ <refpurpose>Get or set tuner or modulator radio
+frequency</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_frequency
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_frequency
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_FREQUENCY, VIDIOC_S_FREQUENCY</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To get the current tuner or modulator radio frequency
+applications set the <structfield>tuner</structfield> field of a
+&v4l2-frequency; to the respective tuner or modulator number (only
+input devices have tuners, only output devices have modulators), zero
+out the <structfield>reserved</structfield> array and
+call the <constant>VIDIOC_G_FREQUENCY</constant> ioctl with a pointer
+to this structure. The driver stores the current frequency in the
+<structfield>frequency</structfield> field.</para>
+
+ <para>To change the current tuner or modulator radio frequency
+applications initialize the <structfield>tuner</structfield>,
+<structfield>type</structfield> and
+<structfield>frequency</structfield> fields, and the
+<structfield>reserved</structfield> array of a &v4l2-frequency; and
+call the <constant>VIDIOC_S_FREQUENCY</constant> ioctl with a pointer
+to this structure. When the requested frequency is not possible the
+driver assumes the closest possible value. However
+<constant>VIDIOC_S_FREQUENCY</constant> is a write-only ioctl, it does
+not return the actual new frequency.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-frequency">
+ <title>struct <structname>v4l2_frequency</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>tuner</structfield></entry>
+ <entry>The tuner or modulator index number. This is the
+same value as in the &v4l2-input; <structfield>tuner</structfield>
+field and the &v4l2-tuner; <structfield>index</structfield> field, or
+the &v4l2-output; <structfield>modulator</structfield> field and the
+&v4l2-modulator; <structfield>index</structfield> field.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>The tuner type. This is the same value as in the
+&v4l2-tuner; <structfield>type</structfield> field. The type must be set
+to <constant>V4L2_TUNER_RADIO</constant> for <filename>/dev/radioX</filename>
+device nodes, and to <constant>V4L2_TUNER_ANALOG_TV</constant>
+for all others. Set this field to <constant>V4L2_TUNER_RADIO</constant> for
+modulators (currently only radio modulators are supported).
+See <xref linkend="v4l2-tuner-type" /></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>frequency</structfield></entry>
+ <entry>Tuning frequency in units of 62.5 kHz, or if the
+&v4l2-tuner; or &v4l2-modulator; <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz. A 1 Hz unit is used when the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Drivers and
+ applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <structfield>tuner</structfield> index is out of
+bounds or the value in the <structfield>type</structfield> field is
+wrong.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>A hardware seek is in progress.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-input.xml b/Documentation/DocBook/media/v4l/vidioc-g-input.xml
new file mode 100644
index 000000000..1d4306509
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-input.xml
@@ -0,0 +1,83 @@
+<refentry id="vidioc-g-input">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_INPUT, VIDIOC_S_INPUT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_INPUT</refname>
+ <refname>VIDIOC_S_INPUT</refname>
+ <refpurpose>Query or select the current video input</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>int *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_INPUT, VIDIOC_S_INPUT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the current video input applications call the
+<constant>VIDIOC_G_INPUT</constant> ioctl with a pointer to an integer
+where the driver stores the number of the input, as in the
+&v4l2-input; <structfield>index</structfield> field. This ioctl will
+fail only when there are no video inputs, returning
+<errorcode>EINVAL</errorcode>.</para>
+
+ <para>To select a video input applications store the number of the
+desired input in an integer and call the
+<constant>VIDIOC_S_INPUT</constant> ioctl with a pointer to this
+integer. Side effects are possible. For example inputs may support
+different video standards, so the driver may implicitly switch the
+current standard. Because of these possible side effects applications
+must select an input before querying or negotiating any other parameters.</para>
+
+ <para>Information about video inputs is available using the
+&VIDIOC-ENUMINPUT; ioctl.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The number of the video input is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-jpegcomp.xml b/Documentation/DocBook/media/v4l/vidioc-g-jpegcomp.xml
new file mode 100644
index 000000000..098ff4838
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-jpegcomp.xml
@@ -0,0 +1,175 @@
+<refentry id="vidioc-g-jpegcomp">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_JPEGCOMP, VIDIOC_S_JPEGCOMP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_JPEGCOMP</refname>
+ <refname>VIDIOC_S_JPEGCOMP</refname>
+ <refpurpose></refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>v4l2_jpegcompression *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const v4l2_jpegcompression *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_JPEGCOMP, VIDIOC_S_JPEGCOMP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>These ioctls are <emphasis role="bold">deprecated</emphasis>.
+ New drivers and applications should use <link linkend="jpeg-controls">
+ JPEG class controls</link> for image quality and JPEG markers control.
+ </para>
+
+ <para>[to do]</para>
+
+ <para>Ronald Bultje elaborates:</para>
+
+ <!-- See video4linux-list@redhat.com on 16 Oct 2002, subject
+"Re: [V4L] Re: v4l2 api / Zoran v4l2_jpegcompression" -->
+
+ <para>APP is some application-specific information. The
+application can set it itself, and it'll be stored in the JPEG-encoded
+fields (eg; interlacing information for in an AVI or so). COM is the
+same, but it's comments, like 'encoded by me' or so.</para>
+
+ <para>jpeg_markers describes whether the huffman tables,
+quantization tables and the restart interval information (all
+JPEG-specific stuff) should be stored in the JPEG-encoded fields.
+These define how the JPEG field is encoded. If you omit them,
+applications assume you've used standard encoding. You usually do want
+to add them.</para>
+
+ <!-- NB VIDIOC_S_JPEGCOMP is w/o. -->
+
+ <table pgwide="1" frame="none" id="v4l2-jpegcompression">
+ <title>struct <structname>v4l2_jpegcompression</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>int</entry>
+ <entry><structfield>quality</structfield></entry>
+ <entry>Deprecated. If <link linkend="jpeg-quality-control"><constant>
+ V4L2_CID_JPEG_COMPRESSION_QUALITY</constant></link> control is exposed
+ by a driver applications should use it instead and ignore this field.
+ </entry>
+ </row>
+ <row>
+ <entry>int</entry>
+ <entry><structfield>APPn</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>int</entry>
+ <entry><structfield>APP_len</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>APP_data</structfield>[60]</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>int</entry>
+ <entry><structfield>COM_len</structfield></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>COM_data</structfield>[60]</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>jpeg_markers</structfield></entry>
+ <entry>See <xref linkend="jpeg-markers"/>. Deprecated.
+ If <link linkend="jpeg-active-marker-control"><constant>
+ V4L2_CID_JPEG_ACTIVE_MARKER</constant></link> control
+ is exposed by a driver applications should use it instead
+ and ignore this field.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="jpeg-markers">
+ <title>JPEG Markers Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_JPEG_MARKER_DHT</constant></entry>
+ <entry>(1&lt;&lt;3)</entry>
+ <entry>Define Huffman Tables</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_MARKER_DQT</constant></entry>
+ <entry>(1&lt;&lt;4)</entry>
+ <entry>Define Quantization Tables</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_MARKER_DRI</constant></entry>
+ <entry>(1&lt;&lt;5)</entry>
+ <entry>Define Restart Interval</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_MARKER_COM</constant></entry>
+ <entry>(1&lt;&lt;6)</entry>
+ <entry>Comment segment</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_JPEG_MARKER_APP</constant></entry>
+ <entry>(1&lt;&lt;7)</entry>
+ <entry>App segment, driver will always use APP0</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-modulator.xml b/Documentation/DocBook/media/v4l/vidioc-g-modulator.xml
new file mode 100644
index 000000000..7068b599a
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-modulator.xml
@@ -0,0 +1,240 @@
+<refentry id="vidioc-g-modulator">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_MODULATOR, VIDIOC_S_MODULATOR</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_MODULATOR</refname>
+ <refname>VIDIOC_S_MODULATOR</refname>
+ <refpurpose>Get or set modulator attributes</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_modulator
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_modulator
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_MODULATOR, VIDIOC_S_MODULATOR</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of a modulator applications initialize
+the <structfield>index</structfield> field and zero out the
+<structfield>reserved</structfield> array of a &v4l2-modulator; and
+call the <constant>VIDIOC_G_MODULATOR</constant> ioctl with a pointer
+to this structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the index is out of bounds. To enumerate all modulators
+applications shall begin at index zero, incrementing by one until the
+driver returns <errorcode>EINVAL</errorcode>.</para>
+
+ <para>Modulators have two writable properties, an audio
+modulation set and the radio frequency. To change the modulated audio
+subprograms, applications initialize the <structfield>index
+</structfield> and <structfield>txsubchans</structfield> fields and the
+<structfield>reserved</structfield> array and call the
+<constant>VIDIOC_S_MODULATOR</constant> ioctl. Drivers may choose a
+different audio modulation if the request cannot be satisfied. However
+this is a write-only ioctl, it does not return the actual audio
+modulation selected.</para>
+
+ <para>To change the radio frequency the &VIDIOC-S-FREQUENCY; ioctl
+is available.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-modulator">
+ <title>struct <structname>v4l2_modulator</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Identifies the modulator, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the modulator, a NUL-terminated ASCII
+string. This information is intended for the user.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry>Modulator capability flags. No flags are defined
+for this field, the tuner flags in &v4l2-tuner;
+are used accordingly. The audio flags indicate the ability
+to encode audio subprograms. They will <emphasis>not</emphasis>
+change for example with the current video standard.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangelow</structfield></entry>
+ <entry>The lowest tunable frequency in units of 62.5
+KHz, or if the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz, or if the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set, in units of 1 Hz.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangehigh</structfield></entry>
+ <entry>The highest tunable frequency in units of 62.5
+KHz, or if the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz, or if the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set, in units of 1 Hz.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>txsubchans</structfield></entry>
+ <entry>With this field applications can determine how
+audio sub-carriers shall be modulated. It contains a set of flags as
+defined in <xref linkend="modulator-txsubchans" />. Note the tuner
+<structfield>rxsubchans</structfield> flags are reused, but the
+semantics are different. Video output devices are assumed to have an
+analog or PCM audio input with 1-3 channels. The
+<structfield>txsubchans</structfield> flags select one or more
+channels for modulation, together with some audio subprogram
+indicator, for example a stereo pilot tone.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="modulator-txsubchans">
+ <title>Modulator Audio Transmission Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_MONO</constant></entry>
+ <entry>0x0001</entry>
+ <entry>Modulate channel 1 as mono audio, when the input
+has more channels, a down-mix of channel 1 and 2. This flag does not
+combine with <constant>V4L2_TUNER_SUB_STEREO</constant> or
+<constant>V4L2_TUNER_SUB_LANG1</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_STEREO</constant></entry>
+ <entry>0x0002</entry>
+ <entry>Modulate channel 1 and 2 as left and right
+channel of a stereo audio signal. When the input has only one channel
+or two channels and <constant>V4L2_TUNER_SUB_SAP</constant> is also
+set, channel 1 is encoded as left and right channel. This flag does
+not combine with <constant>V4L2_TUNER_SUB_MONO</constant> or
+<constant>V4L2_TUNER_SUB_LANG1</constant>. When the driver does not
+support stereo audio it shall fall back to mono.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_LANG1</constant></entry>
+ <entry>0x0008</entry>
+ <entry>Modulate channel 1 and 2 as primary and secondary
+language of a bilingual audio signal. When the input has only one
+channel it is used for both languages. It is not possible to encode
+the primary or secondary language only. This flag does not combine
+with <constant>V4L2_TUNER_SUB_MONO</constant>,
+<constant>V4L2_TUNER_SUB_STEREO</constant> or
+<constant>V4L2_TUNER_SUB_SAP</constant>. If the hardware does not
+support the respective audio matrix, or the current video standard
+does not permit bilingual audio the
+<constant>VIDIOC_S_MODULATOR</constant> ioctl shall return an &EINVAL;
+and the driver shall fall back to mono or stereo mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_LANG2</constant></entry>
+ <entry>0x0004</entry>
+ <entry>Same effect as
+<constant>V4L2_TUNER_SUB_SAP</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_SAP</constant></entry>
+ <entry>0x0004</entry>
+ <entry>When combined with <constant>V4L2_TUNER_SUB_MONO
+</constant> the first channel is encoded as mono audio, the last
+channel as Second Audio Program. When the input has only one channel
+it is used for both audio tracks. When the input has three channels
+the mono track is a down-mix of channel 1 and 2. When combined with
+<constant>V4L2_TUNER_SUB_STEREO</constant> channel 1 and 2 are
+encoded as left and right stereo audio, channel 3 as Second Audio
+Program. When the input has only two channels, the first is encoded as
+left and right channel and the second as SAP. When the input has only
+one channel it is used for all audio tracks. It is not possible to
+encode a Second Audio Program only. This flag must combine with
+<constant>V4L2_TUNER_SUB_MONO</constant> or
+<constant>V4L2_TUNER_SUB_STEREO</constant>. If the hardware does not
+support the respective audio matrix, or the current video standard
+does not permit SAP the <constant>VIDIOC_S_MODULATOR</constant> ioctl
+shall return an &EINVAL; and driver shall fall back to mono or stereo
+mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_RDS</constant></entry>
+ <entry>0x0010</entry>
+ <entry>Enable the RDS encoder for a radio FM transmitter.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-modulator;
+<structfield>index</structfield> is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-output.xml b/Documentation/DocBook/media/v4l/vidioc-g-output.xml
new file mode 100644
index 000000000..4533068ec
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-output.xml
@@ -0,0 +1,85 @@
+<refentry id="vidioc-g-output">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_OUTPUT, VIDIOC_S_OUTPUT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_OUTPUT</refname>
+ <refname>VIDIOC_S_OUTPUT</refname>
+ <refpurpose>Query or select the current video output</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>int *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_OUTPUT, VIDIOC_S_OUTPUT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the current video output applications call the
+<constant>VIDIOC_G_OUTPUT</constant> ioctl with a pointer to an integer
+where the driver stores the number of the output, as in the
+&v4l2-output; <structfield>index</structfield> field. This ioctl
+will fail only when there are no video outputs, returning the
+&EINVAL;.</para>
+
+ <para>To select a video output applications store the number of the
+desired output in an integer and call the
+<constant>VIDIOC_S_OUTPUT</constant> ioctl with a pointer to this integer.
+Side effects are possible. For example outputs may support different
+video standards, so the driver may implicitly switch the current
+standard.
+standard. Because of these possible side effects applications
+must select an output before querying or negotiating any other parameters.</para>
+
+ <para>Information about video outputs is available using the
+&VIDIOC-ENUMOUTPUT; ioctl.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The number of the video output is out of bounds, or
+there are no video outputs at all.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-parm.xml b/Documentation/DocBook/media/v4l/vidioc-g-parm.xml
new file mode 100644
index 000000000..f4e28e7d4
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-parm.xml
@@ -0,0 +1,314 @@
+<refentry id="vidioc-g-parm">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_PARM, VIDIOC_S_PARM</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_PARM</refname>
+ <refname>VIDIOC_S_PARM</refname>
+ <refpurpose>Get or set streaming parameters</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>v4l2_streamparm *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_PARM, VIDIOC_S_PARM</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>The current video standard determines a nominal number of
+frames per second. If less than this number of frames is to be
+captured or output, applications can request frame skipping or
+duplicating on the driver side. This is especially useful when using
+the <function>read()</function> or <function>write()</function>, which
+are not augmented by timestamps or sequence counters, and to avoid
+unnecessary data copying.</para>
+
+ <para>Further these ioctls can be used to determine the number of
+buffers used internally by a driver in read/write mode. For
+implications see the section discussing the &func-read;
+function.</para>
+
+ <para>To get and set the streaming parameters applications call
+the <constant>VIDIOC_G_PARM</constant> and
+<constant>VIDIOC_S_PARM</constant> ioctl, respectively. They take a
+pointer to a struct <structname>v4l2_streamparm</structname> which
+contains a union holding separate parameters for input and output
+devices.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-streamparm">
+ <title>struct <structname>v4l2_streamparm</structname></title>
+ <tgroup cols="4">
+ &cs-ustr;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry></entry>
+ <entry>The buffer (stream) type, same as &v4l2-format;
+<structfield>type</structfield>, set by the application. See <xref
+ linkend="v4l2-buf-type" /></entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry><structfield>parm</structfield></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-captureparm;</entry>
+ <entry><structfield>capture</structfield></entry>
+ <entry>Parameters for capture devices, used when
+<structfield>type</structfield> is
+<constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>&v4l2-outputparm;</entry>
+ <entry><structfield>output</structfield></entry>
+ <entry>Parameters for output devices, used when
+<structfield>type</structfield> is
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant>.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u8</entry>
+ <entry><structfield>raw_data</structfield>[200]</entry>
+ <entry>A place holder for future extensions.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-captureparm">
+ <title>struct <structname>v4l2_captureparm</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry>See <xref linkend="parm-caps" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capturemode</structfield></entry>
+ <entry>Set by drivers and applications, see <xref linkend="parm-flags" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>timeperframe</structfield></entry>
+ <entry><para>This is the desired period between
+successive frames captured by the driver, in seconds. The
+field is intended to skip frames on the driver side, saving I/O
+bandwidth.</para><para>Applications store here the desired frame
+period, drivers return the actual frame period, which must be greater
+or equal to the nominal frame period determined by the current video
+standard (&v4l2-standard; <structfield>frameperiod</structfield>
+field). Changing the video standard (also implicitly by switching the
+video input) may reset this parameter to the nominal frame period. To
+reset manually applications can just set this field to
+zero.</para><para>Drivers support this function only when they set the
+<constant>V4L2_CAP_TIMEPERFRAME</constant> flag in the
+<structfield>capability</structfield> field.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>extendedmode</structfield></entry>
+ <entry>Custom (driver specific) streaming parameters. When
+unused, applications and drivers must set this field to zero.
+Applications using this field should check the driver name and
+version, see <xref linkend="querycap" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>readbuffers</structfield></entry>
+ <entry>Applications set this field to the desired number
+of buffers used internally by the driver in &func-read; mode. Drivers
+return the actual number of buffers. When an application requests zero
+buffers, drivers should just return the current setting rather than
+the minimum or an error code. For details see <xref
+ linkend="rw" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-outputparm">
+ <title>struct <structname>v4l2_outputparm</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry>See <xref linkend="parm-caps" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>outputmode</structfield></entry>
+ <entry>Set by drivers and applications, see <xref
+ linkend="parm-flags" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>timeperframe</structfield></entry>
+ <entry>This is the desired period between
+successive frames output by the driver, in seconds.</entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>The field is intended to
+repeat frames on the driver side in &func-write; mode (in streaming
+mode timestamps can be used to throttle the output), saving I/O
+bandwidth.</para><para>Applications store here the desired frame
+period, drivers return the actual frame period, which must be greater
+or equal to the nominal frame period determined by the current video
+standard (&v4l2-standard; <structfield>frameperiod</structfield>
+field). Changing the video standard (also implicitly by switching the
+video output) may reset this parameter to the nominal frame period. To
+reset manually applications can just set this field to
+zero.</para><para>Drivers support this function only when they set the
+<constant>V4L2_CAP_TIMEPERFRAME</constant> flag in the
+<structfield>capability</structfield> field.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>extendedmode</structfield></entry>
+ <entry>Custom (driver specific) streaming parameters. When
+unused, applications and drivers must set this field to zero.
+Applications using this field should check the driver name and
+version, see <xref linkend="querycap" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>writebuffers</structfield></entry>
+ <entry>Applications set this field to the desired number
+of buffers used internally by the driver in
+<function>write()</function> mode. Drivers return the actual number of
+buffers. When an application requests zero buffers, drivers should
+just return the current setting rather than the minimum or an error
+code. For details see <xref linkend="rw" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry>Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="parm-caps">
+ <title>Streaming Parameters Capabilites</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CAP_TIMEPERFRAME</constant></entry>
+ <entry>0x1000</entry>
+ <entry>The frame skipping/repeating controlled by the
+<structfield>timeperframe</structfield> field is supported.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="parm-flags">
+ <title>Capture Parameters Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_MODE_HIGHQUALITY</constant></entry>
+ <entry>0x0001</entry>
+ <entry><para>High quality imaging mode. High quality mode
+is intended for still imaging applications. The idea is to get the
+best possible image quality that the hardware can deliver. It is not
+defined how the driver writer may achieve that; it will depend on the
+hardware and the ingenuity of the driver writer. High quality mode is
+a different mode from the the regular motion video capture modes. In
+high quality mode:<itemizedlist>
+ <listitem>
+ <para>The driver may be able to capture higher
+resolutions than for motion capture.</para>
+ </listitem>
+ <listitem>
+ <para>The driver may support fewer pixel formats
+than motion capture (eg; true color).</para>
+ </listitem>
+ <listitem>
+ <para>The driver may capture and arithmetically
+combine multiple successive fields or frames to remove color edge
+artifacts and reduce the noise in the video data.
+</para>
+ </listitem>
+ <listitem>
+ <para>The driver may capture images in slices like
+a scanner in order to handle larger format images than would otherwise
+be possible. </para>
+ </listitem>
+ <listitem>
+ <para>An image capture operation may be
+significantly slower than motion capture. </para>
+ </listitem>
+ <listitem>
+ <para>Moving objects in the image might have
+excessive motion blur. </para>
+ </listitem>
+ <listitem>
+ <para>Capture might only work through the
+<function>read()</function> call.</para>
+ </listitem>
+ </itemizedlist></para></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-priority.xml b/Documentation/DocBook/media/v4l/vidioc-g-priority.xml
new file mode 100644
index 000000000..6a81b4fe9
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-priority.xml
@@ -0,0 +1,135 @@
+<refentry id="vidioc-g-priority">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_PRIORITY, VIDIOC_S_PRIORITY</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_PRIORITY</refname>
+ <refname>VIDIOC_S_PRIORITY</refname>
+ <refpurpose>Query or request the access priority associated with a
+file descriptor</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>enum v4l2_priority *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const enum v4l2_priority *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_PRIORITY, VIDIOC_S_PRIORITY</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para>Pointer to an enum v4l2_priority type.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the current access priority
+applications call the <constant>VIDIOC_G_PRIORITY</constant> ioctl
+with a pointer to an enum v4l2_priority variable where the driver stores
+the current priority.</para>
+
+ <para>To request an access priority applications store the
+desired priority in an enum v4l2_priority variable and call
+<constant>VIDIOC_S_PRIORITY</constant> ioctl with a pointer to this
+variable.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-priority">
+ <title>enum v4l2_priority</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_PRIORITY_UNSET</constant></entry>
+ <entry>0</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PRIORITY_BACKGROUND</constant></entry>
+ <entry>1</entry>
+ <entry>Lowest priority, usually applications running in
+background, for example monitoring VBI transmissions. A proxy
+application running in user space will be necessary if multiple
+applications want to read from a device at this priority.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PRIORITY_INTERACTIVE</constant></entry>
+ <entry>2</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PRIORITY_DEFAULT</constant></entry>
+ <entry>2</entry>
+ <entry>Medium priority, usually applications started and
+interactively controlled by the user. For example TV viewers, Teletext
+browsers, or just "panel" applications to change the channel or video
+controls. This is the default priority unless an application requests
+another.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_PRIORITY_RECORD</constant></entry>
+ <entry>3</entry>
+ <entry>Highest priority. Only one file descriptor can have
+this priority, it blocks any other fd from changing device properties.
+Usually applications which must not be interrupted, like video
+recording.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The requested priority value is invalid.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>Another application already requested higher
+priority.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-selection.xml b/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
new file mode 100644
index 000000000..0bb5c060d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-selection.xml
@@ -0,0 +1,239 @@
+<refentry id="vidioc-g-selection">
+
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_SELECTION, VIDIOC_S_SELECTION</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_SELECTION</refname>
+ <refname>VIDIOC_S_SELECTION</refname>
+ <refpurpose>Get or set one of the selection rectangles</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_selection *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_SELECTION, VIDIOC_S_SELECTION</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>The ioctls are used to query and configure selection rectangles.</para>
+
+<para>To query the cropping (composing) rectangle set &v4l2-selection;
+<structfield> type </structfield> field to the respective buffer type.
+Do not use the multiplanar buffer types. Use <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>
+instead of <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant> and use
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> instead of
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant>. The next step is
+setting the value of &v4l2-selection; <structfield>target</structfield> field
+to <constant>V4L2_SEL_TGT_CROP</constant> (<constant>V4L2_SEL_TGT_COMPOSE</constant>).
+Please refer to table <xref linkend="v4l2-selections-common" /> or <xref linkend="selection-api" />
+for additional targets. The <structfield>flags</structfield> and <structfield>reserved
+</structfield> fields of &v4l2-selection; are ignored and they must be filled
+with zeros. The driver fills the rest of the structure or
+returns &EINVAL; if incorrect buffer type or target was used. If cropping
+(composing) is not supported then the active rectangle is not mutable and it is
+always equal to the bounds rectangle. Finally, the &v4l2-rect;
+<structfield>r</structfield> rectangle is filled with the current cropping
+(composing) coordinates. The coordinates are expressed in driver-dependent
+units. The only exception are rectangles for images in raw formats, whose
+coordinates are always expressed in pixels.</para>
+
+<para>To change the cropping (composing) rectangle set the &v4l2-selection;
+<structfield>type</structfield> field to the respective buffer type. Do not
+use multiplanar buffers. Use <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE</constant>
+instead of <constant>V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE</constant>. Use
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant> instead of
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant>. The next step is
+setting the value of &v4l2-selection; <structfield>target</structfield> to
+<constant>V4L2_SEL_TGT_CROP</constant> (<constant>V4L2_SEL_TGT_COMPOSE</constant>).
+Please refer to table <xref linkend="v4l2-selections-common" /> or <xref linkend="selection-api" />
+for additional targets. The &v4l2-rect; <structfield>r</structfield> rectangle need to be
+set to the desired active area. Field &v4l2-selection; <structfield> reserved
+</structfield> is ignored and must be filled with zeros. The driver may adjust
+coordinates of the requested rectangle. An application may
+introduce constraints to control rounding behaviour. The &v4l2-selection;
+<structfield>flags</structfield> field must be set to one of the following:
+
+<itemizedlist>
+ <listitem>
+<para><constant>0</constant> - The driver can adjust the rectangle size freely
+and shall choose a crop/compose rectangle as close as possible to the requested
+one.</para>
+ </listitem>
+ <listitem>
+<para><constant>V4L2_SEL_FLAG_GE</constant> - The driver is not allowed to
+shrink the rectangle. The original rectangle must lay inside the adjusted
+one.</para>
+ </listitem>
+ <listitem>
+<para><constant>V4L2_SEL_FLAG_LE</constant> - The driver is not allowed to
+enlarge the rectangle. The adjusted rectangle must lay inside the original
+one.</para>
+ </listitem>
+ <listitem>
+<para><constant>V4L2_SEL_FLAG_GE | V4L2_SEL_FLAG_LE</constant> - The driver
+must choose the size exactly the same as in the requested rectangle.</para>
+ </listitem>
+</itemizedlist>
+
+Please refer to <xref linkend="sel-const-adjust" />.
+
+</para>
+
+<para> The driver may have to adjusts the requested dimensions against hardware
+limits and other parts as the pipeline, i.e. the bounds given by the
+capture/output window or TV display. The closest possible values of horizontal
+and vertical offset and sizes are chosen according to following priority:
+
+<orderedlist>
+ <listitem>
+ <para>Satisfy constraints from &v4l2-selection; <structfield>flags</structfield>.</para>
+ </listitem>
+ <listitem>
+ <para>Adjust width, height, left, and top to hardware limits and alignments.</para>
+ </listitem>
+ <listitem>
+ <para>Keep center of adjusted rectangle as close as possible to the original one.</para>
+ </listitem>
+ <listitem>
+ <para>Keep width and height as close as possible to original ones.</para>
+ </listitem>
+ <listitem>
+ <para>Keep horizontal and vertical offset as close as possible to original ones.</para>
+ </listitem>
+</orderedlist>
+
+On success the &v4l2-rect; <structfield>r</structfield> field contains
+the adjusted rectangle. When the parameters are unsuitable the application may
+modify the cropping (composing) or image parameters and repeat the cycle until
+satisfactory parameters have been negotiated. If constraints flags have to be
+violated at then ERANGE is returned. The error indicates that <emphasis>there
+exist no rectangle</emphasis> that satisfies the constraints.</para>
+
+ <para>Selection targets and flags are documented in <xref
+ linkend="v4l2-selections-common"/>.</para>
+
+ <para>
+ <figure id="sel-const-adjust">
+ <title>Size adjustments with constraint flags.</title>
+ <mediaobject>
+ <imageobject>
+ <imagedata fileref="constraints.png" format="PNG" />
+ </imageobject>
+ <textobject>
+ <phrase>Behaviour of rectangle adjustment for different constraint
+ flags.</phrase>
+ </textobject>
+ </mediaobject>
+ </figure>
+ </para>
+
+ <para>
+ <table pgwide="1" frame="none" id="v4l2-selection">
+ <title>struct <structname>v4l2_selection</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the buffer (from &v4l2-buf-type;).</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>target</structfield></entry>
+ <entry>Used to select between <link linkend="v4l2-selections-common"> cropping
+ and composing rectangles</link>.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags controlling the selection rectangle adjustments, refer to
+ <link linkend="v4l2-selection-flags">selection flags</link>.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-rect;</entry>
+ <entry><structfield>r</structfield></entry>
+ <entry>The selection rectangle.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved[9]</structfield></entry>
+ <entry>Reserved fields for future use.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>Given buffer type <structfield>type</structfield> or
+the selection target <structfield>target</structfield> is not supported,
+or the <structfield>flags</structfield> argument is not valid.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ERANGE</errorcode></term>
+ <listitem>
+ <para>It is not possible to adjust &v4l2-rect; <structfield>
+r</structfield> rectangle to satisfy all contraints given in the
+<structfield>flags</structfield> argument.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>It is not possible to apply change of the selection rectangle
+at the moment. Usually because streaming is in progress.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-sliced-vbi-cap.xml b/Documentation/DocBook/media/v4l/vidioc-g-sliced-vbi-cap.xml
new file mode 100644
index 000000000..d05623c55
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-sliced-vbi-cap.xml
@@ -0,0 +1,255 @@
+<refentry id="vidioc-g-sliced-vbi-cap">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_SLICED_VBI_CAP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_SLICED_VBI_CAP</refname>
+ <refpurpose>Query sliced VBI capabilities</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_sliced_vbi_cap *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_SLICED_VBI_CAP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To find out which data services are supported by a sliced
+VBI capture or output device, applications initialize the
+<structfield>type</structfield> field of a &v4l2-sliced-vbi-cap;,
+clear the <structfield>reserved</structfield> array and
+call the <constant>VIDIOC_G_SLICED_VBI_CAP</constant> ioctl. The
+driver fills in the remaining fields or returns an &EINVAL; if the
+sliced VBI API is unsupported or <structfield>type</structfield>
+is invalid.</para>
+
+ <para>Note the <structfield>type</structfield> field was added,
+and the ioctl changed from read-only to write-read, in Linux 2.6.19.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-sliced-vbi-cap">
+ <title>struct <structname>v4l2_sliced_vbi_cap</structname></title>
+ <tgroup cols="5">
+ <colspec colname="c1" colwidth="3*" />
+ <colspec colname="c2" colwidth="3*" />
+ <colspec colname="c3" colwidth="2*" />
+ <colspec colname="c4" colwidth="2*" />
+ <colspec colname="c5" colwidth="2*" />
+ <spanspec spanname="hspan" namest="c3" nameend="c5" />
+ <tbody valign="top">
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>service_set</structfield></entry>
+ <entry spanname="hspan">A set of all data services
+supported by the driver. Equal to the union of all elements of the
+<structfield>service_lines </structfield> array.</entry>
+ </row>
+ <row>
+ <entry>__u16</entry>
+ <entry><structfield>service_lines</structfield>[2][24]</entry>
+ <entry spanname="hspan">Each element of this array
+contains a set of data services the hardware can look for or insert
+into a particular scan line. Data services are defined in <xref
+ linkend="vbi-services" />. Array indices map to ITU-R
+line numbers (see also <xref
+ linkend="vbi-525" /> and <xref
+linkend="vbi-625" />) as follows:</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry>Element</entry>
+ <entry>525 line systems</entry>
+ <entry>625 line systems</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[0][1]</entry>
+ <entry align="center">1</entry>
+ <entry align="center">1</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[0][23]</entry>
+ <entry align="center">23</entry>
+ <entry align="center">23</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[1][1]</entry>
+ <entry align="center">264</entry>
+ <entry align="center">314</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><structfield>service_lines</structfield>[1][23]</entry>
+ <entry align="center">286</entry>
+ <entry align="center">336</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry spanname="hspan">The number of VBI lines the
+hardware can capture or output per frame, or the number of services it
+can identify on a given line may be limited. For example on PAL line
+16 the hardware may be able to look for a VPS or Teletext signal, but
+not both at the same time. Applications can learn about these limits
+using the &VIDIOC-S-FMT; ioctl as described in <xref
+ linkend="sliced" />.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry spanname="hspan">Drivers must set
+<structfield>service_lines</structfield>[0][0] and
+<structfield>service_lines</structfield>[1][0] to zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the data stream, see <xref
+ linkend="v4l2-buf-type" />. Should be
+<constant>V4L2_BUF_TYPE_SLICED_VBI_CAPTURE</constant> or
+<constant>V4L2_BUF_TYPE_SLICED_VBI_OUTPUT</constant>.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[3]</entry>
+ <entry spanname="hspan">This array is reserved for future
+extensions. Applications and drivers must set it to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <!-- See also dev-sliced-vbi.sgml -->
+ <table pgwide="1" frame="none" id="vbi-services">
+ <title>Sliced VBI services</title>
+ <tgroup cols="5">
+ <colspec colname="c1" colwidth="2*" />
+ <colspec colname="c2" colwidth="1*" />
+ <colspec colname="c3" colwidth="1*" />
+ <colspec colname="c4" colwidth="2*" />
+ <colspec colname="c5" colwidth="2*" />
+ <spanspec spanname='rlp' namest='c3' nameend='c5' />
+ <thead>
+ <row>
+ <entry>Symbol</entry>
+ <entry>Value</entry>
+ <entry>Reference</entry>
+ <entry>Lines, usually</entry>
+ <entry>Payload</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_SLICED_TELETEXT_B</constant> (Teletext
+System B)</entry>
+ <entry>0x0001</entry>
+ <entry><xref linkend="ets300706" />, <xref linkend="itu653" /></entry>
+ <entry>PAL/SECAM line 7-22, 320-335 (second field 7-22)</entry>
+ <entry>Last 42 of the 45 byte Teletext packet, that is
+without clock run-in and framing code, lsb first transmitted.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_VPS</constant></entry>
+ <entry>0x0400</entry>
+ <entry><xref linkend="ets300231" /></entry>
+ <entry>PAL line 16</entry>
+ <entry>Byte number 3 to 15 according to Figure 9 of
+ETS&nbsp;300&nbsp;231, lsb first transmitted.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_CAPTION_525</constant></entry>
+ <entry>0x1000</entry>
+ <entry><xref linkend="cea608" /></entry>
+ <entry>NTSC line 21, 284 (second field 21)</entry>
+ <entry>Two bytes in transmission order, including parity
+bit, lsb first transmitted.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_WSS_625</constant></entry>
+ <entry>0x4000</entry>
+ <entry><xref linkend="en300294" />, <xref linkend="itu1119" /></entry>
+ <entry>PAL/SECAM line 23</entry>
+ <entry><screen>
+Byte 0 1
+ msb lsb msb lsb
+Bit 7 6 5 4 3 2 1 0 x x 13 12 11 10 9
+</screen></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_VBI_525</constant></entry>
+ <entry>0x1000</entry>
+ <entry spanname="rlp">Set of services applicable to 525
+line systems.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_SLICED_VBI_625</constant></entry>
+ <entry>0x4401</entry>
+ <entry spanname="rlp">Set of services applicable to 625
+line systems.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The value in the <structfield>type</structfield> field is
+wrong.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-std.xml b/Documentation/DocBook/media/v4l/vidioc-g-std.xml
new file mode 100644
index 000000000..4a898417d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-std.xml
@@ -0,0 +1,98 @@
+<refentry id="vidioc-g-std">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_STD, VIDIOC_S_STD</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_STD</refname>
+ <refname>VIDIOC_S_STD</refname>
+ <refpurpose>Query or select the video standard of the current input</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>v4l2_std_id
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const v4l2_std_id
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_STD, VIDIOC_S_STD</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query and select the current video standard applications
+use the <constant>VIDIOC_G_STD</constant> and <constant>VIDIOC_S_STD</constant> ioctls which take a pointer to a
+&v4l2-std-id; type as argument. <constant>VIDIOC_G_STD</constant> can
+return a single flag or a set of flags as in &v4l2-standard; field
+<structfield>id</structfield>. The flags must be unambiguous such
+that they appear in only one enumerated <structname>v4l2_standard</structname> structure.</para>
+
+ <para><constant>VIDIOC_S_STD</constant> accepts one or more
+flags, being a write-only ioctl it does not return the actual new standard as
+<constant>VIDIOC_G_STD</constant> does. When no flags are given or
+the current input does not support the requested standard the driver
+returns an &EINVAL;. When the standard set is ambiguous drivers may
+return <errorcode>EINVAL</errorcode> or choose any of the requested
+standards. If the current input or output does not support standard video timings (e.g. if
+&VIDIOC-ENUMINPUT; does not set the <constant>V4L2_IN_CAP_STD</constant> flag), then
+&ENODATA; is returned.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <constant>VIDIOC_S_STD</constant> parameter was unsuitable.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>Standard video timings are not supported for this input or output.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-g-tuner.xml b/Documentation/DocBook/media/v4l/vidioc-g-tuner.xml
new file mode 100644
index 000000000..b0d865933
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-g-tuner.xml
@@ -0,0 +1,578 @@
+<refentry id="vidioc-g-tuner">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_G_TUNER, VIDIOC_S_TUNER</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_G_TUNER</refname>
+ <refname>VIDIOC_S_TUNER</refname>
+ <refpurpose>Get or set tuner attributes</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_tuner
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_tuner
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_G_TUNER, VIDIOC_S_TUNER</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of a tuner applications initialize the
+<structfield>index</structfield> field and zero out the
+<structfield>reserved</structfield> array of a &v4l2-tuner; and call the
+<constant>VIDIOC_G_TUNER</constant> ioctl with a pointer to this
+structure. Drivers fill the rest of the structure or return an
+&EINVAL; when the index is out of bounds. To enumerate all tuners
+applications shall begin at index zero, incrementing by one until the
+driver returns <errorcode>EINVAL</errorcode>.</para>
+
+ <para>Tuners have two writable properties, the audio mode and
+the radio frequency. To change the audio mode, applications initialize
+the <structfield>index</structfield>,
+<structfield>audmode</structfield> and
+<structfield>reserved</structfield> fields and call the
+<constant>VIDIOC_S_TUNER</constant> ioctl. This will
+<emphasis>not</emphasis> change the current tuner, which is determined
+by the current video input. Drivers may choose a different audio mode
+if the requested mode is invalid or unsupported. Since this is a
+<!-- FIXME -->write-only ioctl, it does not return the actually
+selected audio mode.</para>
+
+ <para>To change the radio frequency the &VIDIOC-S-FREQUENCY; ioctl
+is available.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-tuner">
+ <title>struct <structname>v4l2_tuner</structname></title>
+ <tgroup cols="3">
+ <colspec colname="c1" colwidth="1*" />
+ <colspec colname="c2" colwidth="1*" />
+ <colspec colname="c3" colwidth="1*" />
+ <colspec colname="c4" colwidth="1*" />
+ <spanspec spanname="hspan" namest="c3" nameend="c4" />
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry spanname="hspan">Identifies the tuner, set by the
+application.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry spanname="hspan"><para>Name of the tuner, a
+NUL-terminated ASCII string. This information is intended for the
+user.<!-- FIXME Video inputs already have a name, the purpose of this
+field is not quite clear.--></para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry spanname="hspan">Type of the tuner, see <xref
+ linkend="v4l2-tuner-type" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capability</structfield></entry>
+ <entry spanname="hspan"><para>Tuner capability flags, see
+<xref linkend="tuner-capability" />. Audio flags indicate the ability
+to decode audio subprograms. They will <emphasis>not</emphasis>
+change, for example with the current video standard.</para><para>When
+the structure refers to a radio tuner the
+<constant>V4L2_TUNER_CAP_LANG1</constant>,
+<constant>V4L2_TUNER_CAP_LANG2</constant> and
+<constant>V4L2_TUNER_CAP_NORM</constant> flags can't be used.</para>
+<para>If multiple frequency bands are supported, then
+<structfield>capability</structfield> is the union of all
+<structfield>capability</structfield> fields of each &v4l2-frequency-band;.
+</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangelow</structfield></entry>
+ <entry spanname="hspan">The lowest tunable frequency in
+units of 62.5 kHz, or if the <structfield>capability</structfield>
+flag <constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz, or if the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set, in units of 1 Hz.
+If multiple frequency bands are supported, then
+<structfield>rangelow</structfield> is the lowest frequency
+of all the frequency bands.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangehigh</structfield></entry>
+ <entry spanname="hspan">The highest tunable frequency in
+units of 62.5 kHz, or if the <structfield>capability</structfield>
+flag <constant>V4L2_TUNER_CAP_LOW</constant> is set, in units of 62.5
+Hz, or if the <structfield>capability</structfield> flag
+<constant>V4L2_TUNER_CAP_1HZ</constant> is set, in units of 1 Hz.
+If multiple frequency bands are supported, then
+<structfield>rangehigh</structfield> is the highest frequency
+of all the frequency bands.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rxsubchans</structfield></entry>
+ <entry spanname="hspan"><para>Some tuners or audio
+decoders can determine the received audio subprograms by analyzing
+audio carriers, pilot tones or other indicators. To pass this
+information drivers set flags defined in <xref
+ linkend="tuner-rxsubchans" /> in this field. For
+example:</para></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><constant>V4L2_TUNER_SUB_MONO</constant></entry>
+ <entry>receiving mono audio</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><constant>STEREO | SAP</constant></entry>
+ <entry>receiving stereo audio and a secondary audio
+program</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><constant>MONO | STEREO</constant></entry>
+ <entry>receiving mono or stereo audio, the hardware cannot
+distinguish</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><constant>LANG1 | LANG2</constant></entry>
+ <entry>receiving bilingual audio</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry><constant>MONO | STEREO | LANG1 | LANG2</constant></entry>
+ <entry>receiving mono, stereo or bilingual
+audio</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry></entry>
+ <entry spanname="hspan"><para>When the
+<constant>V4L2_TUNER_CAP_STEREO</constant>,
+<constant>_LANG1</constant>, <constant>_LANG2</constant> or
+<constant>_SAP</constant> flag is cleared in the
+<structfield>capability</structfield> field, the corresponding
+<constant>V4L2_TUNER_SUB_</constant> flag must not be set
+here.</para><para>This field is valid only if this is the tuner of the
+current video input, or when the structure refers to a radio
+tuner.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>audmode</structfield></entry>
+ <entry spanname="hspan"><para>The selected audio mode, see
+<xref linkend="tuner-audmode" /> for valid values. The audio mode does
+not affect audio subprogram detection, and like a <link
+linkend="control">control</link> it does not automatically change
+unless the requested mode is invalid or unsupported. See <xref
+ linkend="tuner-matrix" /> for possible results when
+the selected and received audio programs do not
+match.</para><para>Currently this is the only field of struct
+<structname>v4l2_tuner</structname> applications can
+change.</para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>signal</structfield></entry>
+ <entry spanname="hspan">The signal strength if known, ranging
+from 0 to 65535. Higher values indicate a better signal.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>afc</structfield></entry>
+ <entry spanname="hspan">Automatic frequency control: When the
+<structfield>afc</structfield> value is negative, the frequency is too
+low, when positive too high.<!-- FIXME need example what to do when it never
+settles at zero, &ie; range is what? --></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[4]</entry>
+ <entry spanname="hspan">Reserved for future extensions. Drivers and
+applications must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-tuner-type">
+ <title>enum v4l2_tuner_type</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TUNER_RADIO</constant></entry>
+ <entry>1</entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_ANALOG_TV</constant></entry>
+ <entry>2</entry>
+ <entry></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="tuner-capability">
+ <title>Tuner and Modulator Capability Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_LOW</constant></entry>
+ <entry>0x0001</entry>
+ <entry>When set, tuning frequencies are expressed in units of
+62.5 Hz instead of 62.5 kHz.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_NORM</constant></entry>
+ <entry>0x0002</entry>
+ <entry>This is a multi-standard tuner; the video standard
+can or must be switched. (B/G PAL tuners for example are typically not
+ considered multi-standard because the video standard is automatically
+ determined from the frequency band.) The set of supported video
+ standards is available from the &v4l2-input; pointing to this tuner,
+ see the description of ioctl &VIDIOC-ENUMINPUT; for details. Only
+ <constant>V4L2_TUNER_ANALOG_TV</constant> tuners can have this capability.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_HWSEEK_BOUNDED</constant></entry>
+ <entry>0x0004</entry>
+ <entry>If set, then this tuner supports the hardware seek functionality
+ where the seek stops when it reaches the end of the frequency range.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_HWSEEK_WRAP</constant></entry>
+ <entry>0x0008</entry>
+ <entry>If set, then this tuner supports the hardware seek functionality
+ where the seek wraps around when it reaches the end of the frequency range.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_STEREO</constant></entry>
+ <entry>0x0010</entry>
+ <entry>Stereo audio reception is supported.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_LANG1</constant></entry>
+ <entry>0x0040</entry>
+ <entry>Reception of the primary language of a bilingual
+audio program is supported. Bilingual audio is a feature of
+two-channel systems, transmitting the primary language monaural on the
+main audio carrier and a secondary language monaural on a second
+carrier. Only
+ <constant>V4L2_TUNER_ANALOG_TV</constant> tuners can have this capability.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_LANG2</constant></entry>
+ <entry>0x0020</entry>
+ <entry>Reception of the secondary language of a bilingual
+audio program is supported. Only
+ <constant>V4L2_TUNER_ANALOG_TV</constant> tuners can have this capability.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_SAP</constant></entry>
+ <entry>0x0020</entry>
+ <entry><para>Reception of a secondary audio program is
+supported. This is a feature of the BTSC system which accompanies the
+NTSC video standard. Two audio carriers are available for mono or
+stereo transmissions of a primary language, and an independent third
+carrier for a monaural secondary language. Only
+ <constant>V4L2_TUNER_ANALOG_TV</constant> tuners can have this capability.</para><para>Note the
+<constant>V4L2_TUNER_CAP_LANG2</constant> and
+<constant>V4L2_TUNER_CAP_SAP</constant> flags are synonyms.
+<constant>V4L2_TUNER_CAP_SAP</constant> applies when the tuner
+supports the <constant>V4L2_STD_NTSC_M</constant> video
+standard.</para><!-- FIXME what if PAL+NTSC and Bi but not SAP? --></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_RDS</constant></entry>
+ <entry>0x0080</entry>
+ <entry>RDS capture is supported. This capability is only valid for
+radio tuners.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_RDS_BLOCK_IO</constant></entry>
+ <entry>0x0100</entry>
+ <entry>The RDS data is passed as unparsed RDS blocks.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_RDS_CONTROLS</constant></entry>
+ <entry>0x0200</entry>
+ <entry>The RDS data is parsed by the hardware and set via controls.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_FREQ_BANDS</constant></entry>
+ <entry>0x0400</entry>
+ <entry>The &VIDIOC-ENUM-FREQ-BANDS; ioctl can be used to enumerate
+ the available frequency bands.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_HWSEEK_PROG_LIM</constant></entry>
+ <entry>0x0800</entry>
+ <entry>The range to search when using the hardware seek functionality
+ is programmable, see &VIDIOC-S-HW-FREQ-SEEK; for details.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_CAP_1HZ</constant></entry>
+ <entry>0x1000</entry>
+ <entry>When set, tuning frequencies are expressed in units of 1 Hz instead of 62.5 kHz.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="tuner-rxsubchans">
+ <title>Tuner Audio Reception Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_MONO</constant></entry>
+ <entry>0x0001</entry>
+ <entry>The tuner receives a mono audio signal.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_STEREO</constant></entry>
+ <entry>0x0002</entry>
+ <entry>The tuner receives a stereo audio signal.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_LANG1</constant></entry>
+ <entry>0x0008</entry>
+ <entry>The tuner receives the primary language of a
+bilingual audio signal. Drivers must clear this flag when the current
+video standard is <constant>V4L2_STD_NTSC_M</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_LANG2</constant></entry>
+ <entry>0x0004</entry>
+ <entry>The tuner receives the secondary language of a
+bilingual audio signal (or a second audio program).</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_SAP</constant></entry>
+ <entry>0x0004</entry>
+ <entry>The tuner receives a Second Audio Program. Note the
+<constant>V4L2_TUNER_SUB_LANG2</constant> and
+<constant>V4L2_TUNER_SUB_SAP</constant> flags are synonyms. The
+<constant>V4L2_TUNER_SUB_SAP</constant> flag applies when the
+current video standard is <constant>V4L2_STD_NTSC_M</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_SUB_RDS</constant></entry>
+ <entry>0x0010</entry>
+ <entry>The tuner receives an RDS channel.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="tuner-audmode">
+ <title>Tuner Audio Modes</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_TUNER_MODE_MONO</constant></entry>
+ <entry>0</entry>
+ <entry>Play mono audio. When the tuner receives a stereo
+signal this a down-mix of the left and right channel. When the tuner
+receives a bilingual or SAP signal this mode selects the primary
+language.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_MODE_STEREO</constant></entry>
+ <entry>1</entry>
+ <entry><para>Play stereo audio. When the tuner receives
+bilingual audio it may play different languages on the left and right
+channel or the primary language is played on both channels.</para><para>Playing
+different languages in this mode is
+deprecated. New drivers should do this only in
+<constant>MODE_LANG1_LANG2</constant>.</para><para>When the tuner
+receives no stereo signal or does not support stereo reception the
+driver shall fall back to <constant>MODE_MONO</constant>.</para></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_MODE_LANG1</constant></entry>
+ <entry>3</entry>
+ <entry>Play the primary language, mono or stereo. Only
+<constant>V4L2_TUNER_ANALOG_TV</constant> tuners support this
+mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_MODE_LANG2</constant></entry>
+ <entry>2</entry>
+ <entry>Play the secondary language, mono. When the tuner
+receives no bilingual audio or SAP, or their reception is not
+supported the driver shall fall back to mono or stereo mode. Only
+<constant>V4L2_TUNER_ANALOG_TV</constant> tuners support this
+mode.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_MODE_SAP</constant></entry>
+ <entry>2</entry>
+ <entry>Play the Second Audio Program. When the tuner
+receives no bilingual audio or SAP, or their reception is not
+supported the driver shall fall back to mono or stereo mode. Only
+<constant>V4L2_TUNER_ANALOG_TV</constant> tuners support this mode.
+Note the <constant>V4L2_TUNER_MODE_LANG2</constant> and
+<constant>V4L2_TUNER_MODE_SAP</constant> are synonyms.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_TUNER_MODE_LANG1_LANG2</constant></entry>
+ <entry>4</entry>
+ <entry>Play the primary language on the left channel, the
+secondary language on the right channel. When the tuner receives no
+bilingual audio or SAP, it shall fall back to
+<constant>MODE_LANG1</constant> or <constant>MODE_MONO</constant>.
+Only <constant>V4L2_TUNER_ANALOG_TV</constant> tuners support this
+mode.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="all" id="tuner-matrix">
+ <title>Tuner Audio Matrix</title>
+ <tgroup cols="6" align="center">
+ <colspec align="left" />
+ <colspec colname="c2" colwidth="1*" />
+ <colspec colwidth="1*" />
+ <colspec colwidth="1*" />
+ <colspec colnum="6" colname="c6" colwidth="1*" />
+ <spanspec namest="c2" nameend="c6" spanname="hspan" align="center" />
+ <thead>
+ <row>
+ <entry></entry>
+ <entry spanname="hspan">Selected
+<constant>V4L2_TUNER_MODE_</constant></entry>
+ </row>
+ <row>
+ <entry>Received <constant>V4L2_TUNER_SUB_</constant></entry>
+ <entry><constant>MONO</constant></entry>
+ <entry><constant>STEREO</constant></entry>
+ <entry><constant>LANG1</constant></entry>
+ <entry><constant>LANG2 = SAP</constant></entry>
+ <entry><constant>LANG1_LANG2</constant><footnote><para>This
+mode has been added in Linux 2.6.17 and may not be supported by older
+drivers.</para></footnote></entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>MONO</constant></entry>
+ <entry>Mono</entry>
+ <entry>Mono/Mono</entry>
+ <entry>Mono</entry>
+ <entry>Mono</entry>
+ <entry>Mono/Mono</entry>
+ </row>
+ <row>
+ <entry><constant>MONO | SAP</constant></entry>
+ <entry>Mono</entry>
+ <entry>Mono/Mono</entry>
+ <entry>Mono</entry>
+ <entry>SAP</entry>
+ <entry>Mono/SAP (preferred) or Mono/Mono</entry>
+ </row>
+ <row>
+ <entry><constant>STEREO</constant></entry>
+ <entry>L+R</entry>
+ <entry>L/R</entry>
+ <entry>Stereo L/R (preferred) or Mono L+R</entry>
+ <entry>Stereo L/R (preferred) or Mono L+R</entry>
+ <entry>L/R (preferred) or L+R/L+R</entry>
+ </row>
+ <row>
+ <entry><constant>STEREO | SAP</constant></entry>
+ <entry>L+R</entry>
+ <entry>L/R</entry>
+ <entry>Stereo L/R (preferred) or Mono L+R</entry>
+ <entry>SAP</entry>
+ <entry>L+R/SAP (preferred) or L/R or L+R/L+R</entry>
+ </row>
+ <row>
+ <entry><constant>LANG1 | LANG2</constant></entry>
+ <entry>Language&nbsp;1</entry>
+ <entry>Lang1/Lang2 (deprecated<footnote><para>Playback of
+both languages in <constant>MODE_STEREO</constant> is deprecated. In
+the future drivers should produce only the primary language in this
+mode. Applications should request
+<constant>MODE_LANG1_LANG2</constant> to record both languages or a
+stereo signal.</para></footnote>) or
+Lang1/Lang1</entry>
+ <entry>Language&nbsp;1</entry>
+ <entry>Language&nbsp;2</entry>
+ <entry>Lang1/Lang2 (preferred) or Lang1/Lang1</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-tuner; <structfield>index</structfield> is
+out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-log-status.xml b/Documentation/DocBook/media/v4l/vidioc-log-status.xml
new file mode 100644
index 000000000..5ded7d35e
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-log-status.xml
@@ -0,0 +1,41 @@
+<refentry id="vidioc-log-status">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_LOG_STATUS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_LOG_STATUS</refname>
+ <refpurpose>Log driver status information</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>As the video/audio devices become more complicated it
+becomes harder to debug problems. When this ioctl is called the driver
+will output the current device status to the kernel log. This is
+particular useful when dealing with problems like no sound, no video
+and incorrectly tuned channels. Also many modern devices autodetect
+video and audio standards and this ioctl will report what the device
+thinks what the standard is. Mismatches may give an indication where
+the problem is.</para>
+
+ <para>This ioctl is optional and not all drivers support it. It
+was introduced in Linux 2.6.15.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-overlay.xml b/Documentation/DocBook/media/v4l/vidioc-overlay.xml
new file mode 100644
index 000000000..250a7de18
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-overlay.xml
@@ -0,0 +1,74 @@
+<refentry id="vidioc-overlay">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_OVERLAY</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_OVERLAY</refname>
+ <refpurpose>Start or stop video overlay</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const int *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_OVERLAY</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This ioctl is part of the <link linkend="overlay">video
+ overlay</link> I/O method. Applications call
+ <constant>VIDIOC_OVERLAY</constant> to start or stop the
+ overlay. It takes a pointer to an integer which must be set to
+ zero by the application to stop overlay, to one to start.</para>
+
+ <para>Drivers do not support &VIDIOC-STREAMON; or
+&VIDIOC-STREAMOFF; with <constant>V4L2_BUF_TYPE_VIDEO_OVERLAY</constant>.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The overlay parameters have not been set up. See <xref
+linkend="overlay" /> for the necessary steps.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml b/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml
new file mode 100644
index 000000000..fa7ad7e33
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-prepare-buf.xml
@@ -0,0 +1,94 @@
+<refentry id="vidioc-prepare-buf">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_PREPARE_BUF</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_PREPARE_BUF</refname>
+ <refpurpose>Prepare a buffer for I/O</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_buffer *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_PREPARE_BUF</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>Applications can optionally call the
+<constant>VIDIOC_PREPARE_BUF</constant> ioctl to pass ownership of the buffer
+to the driver before actually enqueuing it, using the
+<constant>VIDIOC_QBUF</constant> ioctl, and to prepare it for future I/O.
+Such preparations may include cache invalidation or cleaning. Performing them
+in advance saves time during the actual I/O. In case such cache operations are
+not required, the application can use one of
+<constant>V4L2_BUF_FLAG_NO_CACHE_INVALIDATE</constant> and
+<constant>V4L2_BUF_FLAG_NO_CACHE_CLEAN</constant> flags to skip the respective
+step.</para>
+
+ <para>The <structname>v4l2_buffer</structname> structure is
+specified in <xref linkend="buffer" />.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>File I/O is in progress.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The buffer <structfield>type</structfield> is not
+supported, or the <structfield>index</structfield> is out of bounds,
+or no buffers have been allocated yet, or the
+<structfield>userptr</structfield> or
+<structfield>length</structfield> are invalid.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-qbuf.xml b/Documentation/DocBook/media/v4l/vidioc-qbuf.xml
new file mode 100644
index 000000000..3504a7f2f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-qbuf.xml
@@ -0,0 +1,192 @@
+<refentry id="vidioc-qbuf">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_QBUF, VIDIOC_DQBUF</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_QBUF</refname>
+ <refname>VIDIOC_DQBUF</refname>
+ <refpurpose>Exchange a buffer with the driver</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_buffer *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_QBUF, VIDIOC_DQBUF</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Applications call the <constant>VIDIOC_QBUF</constant> ioctl
+to enqueue an empty (capturing) or filled (output) buffer in the
+driver's incoming queue. The semantics depend on the selected I/O
+method.</para>
+
+ <para>To enqueue a buffer applications set the <structfield>type</structfield>
+field of a &v4l2-buffer; to the same buffer type as was previously used
+with &v4l2-format; <structfield>type</structfield> and &v4l2-requestbuffers;
+<structfield>type</structfield>. Applications must also set the
+<structfield>index</structfield> field. Valid index numbers range from
+zero to the number of buffers allocated with &VIDIOC-REQBUFS;
+(&v4l2-requestbuffers; <structfield>count</structfield>) minus one. The
+contents of the struct <structname>v4l2_buffer</structname> returned
+by a &VIDIOC-QUERYBUF; ioctl will do as well. When the buffer is
+intended for output (<structfield>type</structfield> is
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT</constant>,
+<constant>V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE</constant>, or
+<constant>V4L2_BUF_TYPE_VBI_OUTPUT</constant>) applications must also
+initialize the <structfield>bytesused</structfield>,
+<structfield>field</structfield> and
+<structfield>timestamp</structfield> fields, see <xref
+linkend="buffer" /> for details.
+Applications must also set <structfield>flags</structfield> to 0.
+The <structfield>reserved2</structfield> and
+<structfield>reserved</structfield> fields must be set to 0. When using
+the <link linkend="planar-apis">multi-planar API</link>, the
+<structfield>m.planes</structfield> field must contain a userspace pointer
+to a filled-in array of &v4l2-plane; and the <structfield>length</structfield>
+field must be set to the number of elements in that array.
+</para>
+
+ <para>To enqueue a <link linkend="mmap">memory mapped</link>
+buffer applications set the <structfield>memory</structfield>
+field to <constant>V4L2_MEMORY_MMAP</constant>. When
+<constant>VIDIOC_QBUF</constant> is called with a pointer to this
+structure the driver sets the
+<constant>V4L2_BUF_FLAG_MAPPED</constant> and
+<constant>V4L2_BUF_FLAG_QUEUED</constant> flags and clears the
+<constant>V4L2_BUF_FLAG_DONE</constant> flag in the
+<structfield>flags</structfield> field, or it returns an
+&EINVAL;.</para>
+
+ <para>To enqueue a <link linkend="userp">user pointer</link>
+buffer applications set the <structfield>memory</structfield>
+field to <constant>V4L2_MEMORY_USERPTR</constant>, the
+<structfield>m.userptr</structfield> field to the address of the
+buffer and <structfield>length</structfield> to its size. When the multi-planar
+API is used, <structfield>m.userptr</structfield> and
+<structfield>length</structfield> members of the passed array of &v4l2-plane;
+have to be used instead. When <constant>VIDIOC_QBUF</constant> is called with
+a pointer to this structure the driver sets the
+<constant>V4L2_BUF_FLAG_QUEUED</constant> flag and clears the
+<constant>V4L2_BUF_FLAG_MAPPED</constant> and
+<constant>V4L2_BUF_FLAG_DONE</constant> flags in the
+<structfield>flags</structfield> field, or it returns an error code.
+This ioctl locks the memory pages of the buffer in physical memory,
+they cannot be swapped out to disk. Buffers remain locked until
+dequeued, until the &VIDIOC-STREAMOFF; or &VIDIOC-REQBUFS; ioctl is
+called, or until the device is closed.</para>
+
+ <para>To enqueue a <link linkend="dmabuf">DMABUF</link> buffer applications
+set the <structfield>memory</structfield> field to
+<constant>V4L2_MEMORY_DMABUF</constant> and the <structfield>m.fd</structfield>
+field to a file descriptor associated with a DMABUF buffer. When the
+multi-planar API is used the <structfield>m.fd</structfield> fields of the
+passed array of &v4l2-plane; have to be used instead. When
+<constant>VIDIOC_QBUF</constant> is called with a pointer to this structure the
+driver sets the <constant>V4L2_BUF_FLAG_QUEUED</constant> flag and clears the
+<constant>V4L2_BUF_FLAG_MAPPED</constant> and
+<constant>V4L2_BUF_FLAG_DONE</constant> flags in the
+<structfield>flags</structfield> field, or it returns an error code. This
+ioctl locks the buffer. Locking a buffer means passing it to a driver for a
+hardware access (usually DMA). If an application accesses (reads/writes) a
+locked buffer then the result is undefined. Buffers remain locked until
+dequeued, until the &VIDIOC-STREAMOFF; or &VIDIOC-REQBUFS; ioctl is called, or
+until the device is closed.</para>
+
+ <para>Applications call the <constant>VIDIOC_DQBUF</constant>
+ioctl to dequeue a filled (capturing) or displayed (output) buffer
+from the driver's outgoing queue. They just set the
+<structfield>type</structfield>, <structfield>memory</structfield>
+and <structfield>reserved</structfield>
+fields of a &v4l2-buffer; as above, when <constant>VIDIOC_DQBUF</constant>
+is called with a pointer to this structure the driver fills the
+remaining fields or returns an error code. The driver may also set
+<constant>V4L2_BUF_FLAG_ERROR</constant> in the <structfield>flags</structfield>
+field. It indicates a non-critical (recoverable) streaming error. In such case
+the application may continue as normal, but should be aware that data in the
+dequeued buffer might be corrupted. When using the multi-planar API, the
+planes array must be passed in as well.</para>
+
+ <para>By default <constant>VIDIOC_DQBUF</constant> blocks when no
+buffer is in the outgoing queue. When the
+<constant>O_NONBLOCK</constant> flag was given to the &func-open;
+function, <constant>VIDIOC_DQBUF</constant> returns immediately
+with an &EAGAIN; when no buffer is available.</para>
+
+ <para>The <structname>v4l2_buffer</structname> structure is
+specified in <xref linkend="buffer" />.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EAGAIN</errorcode></term>
+ <listitem>
+ <para>Non-blocking I/O has been selected using
+<constant>O_NONBLOCK</constant> and no buffer was in the outgoing
+queue.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The buffer <structfield>type</structfield> is not
+supported, or the <structfield>index</structfield> is out of bounds,
+or no buffers have been allocated yet, or the
+<structfield>userptr</structfield> or
+<structfield>length</structfield> are invalid.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EIO</errorcode></term>
+ <listitem>
+ <para><constant>VIDIOC_DQBUF</constant> failed due to an
+internal error. Can also indicate temporary problems like signal
+loss. Note the driver might dequeue an (empty) buffer despite
+returning an error, or even stop capturing. Reusing such buffer may be unsafe
+though and its details (e.g. <structfield>index</structfield>) may not be
+returned either. It is recommended that drivers indicate recoverable errors
+by setting the <constant>V4L2_BUF_FLAG_ERROR</constant> and returning 0 instead.
+In that case the application should be able to safely reuse the buffer and
+continue streaming.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml b/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml
new file mode 100644
index 000000000..e185f149e
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-query-dv-timings.xml
@@ -0,0 +1,110 @@
+<refentry id="vidioc-query-dv-timings">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_QUERY_DV_TIMINGS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_QUERY_DV_TIMINGS</refname>
+ <refpurpose>Sense the DV preset received by the current
+input</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_dv_timings *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_QUERY_DV_TIMINGS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental"> experimental </link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>The hardware may be able to detect the current DV timings
+automatically, similar to sensing the video standard. To do so, applications
+call <constant>VIDIOC_QUERY_DV_TIMINGS</constant> with a pointer to a
+&v4l2-dv-timings;. Once the hardware detects the timings, it will fill in the
+timings structure.
+
+If the timings could not be detected because there was no signal, then
+<errorcode>ENOLINK</errorcode> is returned. If a signal was detected, but
+it was unstable and the receiver could not lock to the signal, then
+<errorcode>ENOLCK</errorcode> is returned. If the receiver could lock to the signal,
+but the format is unsupported (e.g. because the pixelclock is out of range
+of the hardware capabilities), then the driver fills in whatever timings it
+could find and returns <errorcode>ERANGE</errorcode>. In that case the application
+can call &VIDIOC-DV-TIMINGS-CAP; to compare the found timings with the hardware's
+capabilities in order to give more precise feedback to the user.
+</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>Digital video timings are not supported for this input or output.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOLINK</errorcode></term>
+ <listitem>
+ <para>No timings could be detected because no signal was found.
+</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENOLCK</errorcode></term>
+ <listitem>
+ <para>The signal was unstable and the hardware could not lock on to it.
+</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ERANGE</errorcode></term>
+ <listitem>
+ <para>Timings were found, but they are out of range of the hardware
+capabilities.
+</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-querybuf.xml b/Documentation/DocBook/media/v4l/vidioc-querybuf.xml
new file mode 100644
index 000000000..a597155c0
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-querybuf.xml
@@ -0,0 +1,105 @@
+<refentry id="vidioc-querybuf">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_QUERYBUF</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_QUERYBUF</refname>
+ <refpurpose>Query the status of a buffer</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_buffer *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_QUERYBUF</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>This ioctl is part of the <link linkend="mmap">streaming
+</link> I/O method. It can be used to query the status of a
+buffer at any time after buffers have been allocated with the
+&VIDIOC-REQBUFS; ioctl.</para>
+
+ <para>Applications set the <structfield>type</structfield> field
+ of a &v4l2-buffer; to the same buffer type as was previously used with
+&v4l2-format; <structfield>type</structfield> and &v4l2-requestbuffers;
+<structfield>type</structfield>, and the <structfield>index</structfield>
+ field. Valid index numbers range from zero
+to the number of buffers allocated with &VIDIOC-REQBUFS;
+ (&v4l2-requestbuffers; <structfield>count</structfield>) minus one.
+The <structfield>reserved</structfield> field should to set to 0.
+When using the <link linkend="planar-apis">multi-planar API</link>, the
+<structfield>m.planes</structfield> field must contain a userspace pointer to an
+array of &v4l2-plane; and the <structfield>length</structfield> field has
+to be set to the number of elements in that array.
+After calling <constant>VIDIOC_QUERYBUF</constant> with a pointer to
+ this structure drivers return an error code or fill the rest of
+the structure.</para>
+
+ <para>In the <structfield>flags</structfield> field the
+<constant>V4L2_BUF_FLAG_MAPPED</constant>,
+<constant>V4L2_BUF_FLAG_PREPARED</constant>,
+<constant>V4L2_BUF_FLAG_QUEUED</constant> and
+<constant>V4L2_BUF_FLAG_DONE</constant> flags will be valid. The
+<structfield>memory</structfield> field will be set to the current
+I/O method. For the single-planar API, the <structfield>m.offset</structfield>
+contains the offset of the buffer from the start of the device memory,
+the <structfield>length</structfield> field its size. For the multi-planar API,
+fields <structfield>m.mem_offset</structfield> and
+<structfield>length</structfield> in the <structfield>m.planes</structfield>
+array elements will be used instead and the <structfield>length</structfield>
+field of &v4l2-buffer; is set to the number of filled-in array elements.
+The driver may or may not set the remaining fields and flags, they are
+meaningless in this context.</para>
+
+ <para>The <structname>v4l2_buffer</structname> structure is
+ specified in <xref linkend="buffer" />.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The buffer <structfield>type</structfield> is not
+supported, or the <structfield>index</structfield> is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-querycap.xml b/Documentation/DocBook/media/v4l/vidioc-querycap.xml
new file mode 100644
index 000000000..20fda75a0
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-querycap.xml
@@ -0,0 +1,344 @@
+<refentry id="vidioc-querycap">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_QUERYCAP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_QUERYCAP</refname>
+ <refpurpose>Query device capabilities</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_capability *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_QUERYCAP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>All V4L2 devices support the
+<constant>VIDIOC_QUERYCAP</constant> ioctl. It is used to identify
+kernel devices compatible with this specification and to obtain
+information about driver and hardware capabilities. The ioctl takes a
+pointer to a &v4l2-capability; which is filled by the driver. When the
+driver is not compatible with this specification the ioctl returns an
+&EINVAL;.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-capability">
+ <title>struct <structname>v4l2_capability</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>driver</structfield>[16]</entry>
+ <entry><para>Name of the driver, a unique NUL-terminated
+ASCII string. For example: "bttv". Driver specific applications can
+use this information to verify the driver identity. It is also useful
+to work around known bugs, or to identify drivers in error reports.</para>
+<para>Storing strings in fixed sized arrays is bad
+practice but unavoidable here. Drivers and applications should take
+precautions to never read or write beyond the end of the array and to
+make sure the strings are properly NUL-terminated.</para></entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>card</structfield>[32]</entry>
+ <entry>Name of the device, a NUL-terminated UTF-8 string.
+For example: "Yoyodyne TV/FM". One driver may support different brands
+or models of video hardware. This information is intended for users,
+for example in a menu of available devices. Since multiple TV cards of
+the same brand may be installed which are supported by the same
+driver, this name should be combined with the character device file
+name (&eg; <filename>/dev/video2</filename>) or the
+<structfield>bus_info</structfield> string to avoid
+ambiguities.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>bus_info</structfield>[32]</entry>
+ <entry>Location of the device in the system, a
+NUL-terminated ASCII string. For example: "PCI:0000:05:06.0". This
+information is intended for users, to distinguish multiple
+identical devices. If no such information is available the field must
+simply count the devices controlled by the driver ("platform:vivi-000").
+The bus_info must start with "PCI:" for PCI boards, "PCIe:" for PCI Express boards,
+"usb-" for USB devices, "I2C:" for i2c devices, "ISA:" for ISA devices,
+"parport" for parallel port devices and "platform:" for platform devices.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>version</structfield></entry>
+ <entry><para>Version number of the driver.</para>
+<para>Starting with kernel 3.1, the version reported is provided by the
+V4L2 subsystem following the kernel numbering scheme. However, it
+may not always return the same version as the kernel if, for example,
+a stable or distribution-modified kernel uses the V4L2 stack from a
+newer kernel.</para>
+<para>The version number is formatted using the
+<constant>KERNEL_VERSION()</constant> macro:</para></entry>
+ </row>
+ <row>
+ <entry spanname="hspan"><para>
+<programlisting>
+#define KERNEL_VERSION(a,b,c) (((a) &lt;&lt; 16) + ((b) &lt;&lt; 8) + (c))
+
+__u32 version = KERNEL_VERSION(0, 8, 1);
+
+printf ("Version: %u.%u.%u\n",
+ (version &gt;&gt; 16) &amp; 0xFF,
+ (version &gt;&gt; 8) &amp; 0xFF,
+ version &amp; 0xFF);
+</programlisting></para></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>capabilities</structfield></entry>
+ <entry>Available capabilities of the physical device as a whole, see <xref
+ linkend="device-capabilities" />. The same physical device can export
+ multiple devices in /dev (e.g. /dev/videoX, /dev/vbiY and /dev/radioZ).
+ The <structfield>capabilities</structfield> field should contain a union
+ of all capabilities available around the several V4L2 devices exported
+ to userspace.
+ For all those devices the <structfield>capabilities</structfield> field
+ returns the same set of capabilities. This allows applications to open
+ just one of the devices (typically the video device) and discover whether
+ video, vbi and/or radio are also supported.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>device_caps</structfield></entry>
+ <entry>Device capabilities of the opened device, see <xref
+ linkend="device-capabilities" />. Should contain the available capabilities
+ of that specific device node. So, for example, <structfield>device_caps</structfield>
+ of a radio device will only contain radio related capabilities and
+ no video or vbi capabilities. This field is only set if the <structfield>capabilities</structfield>
+ field contains the <constant>V4L2_CAP_DEVICE_CAPS</constant> capability.
+ Only the <structfield>capabilities</structfield> field can have the
+ <constant>V4L2_CAP_DEVICE_CAPS</constant> capability, <structfield>device_caps</structfield>
+ will never set <constant>V4L2_CAP_DEVICE_CAPS</constant>.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[3]</entry>
+ <entry>Reserved for future extensions. Drivers must set
+this array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="device-capabilities">
+ <title>Device Capabilities Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_CAPTURE</constant></entry>
+ <entry>0x00000001</entry>
+ <entry>The device supports the single-planar API through the <link
+linkend="capture">Video Capture</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_CAPTURE_MPLANE</constant></entry>
+ <entry>0x00001000</entry>
+ <entry>The device supports the
+ <link linkend="planar-apis">multi-planar API</link> through the
+ <link linkend="capture">Video Capture</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_OUTPUT</constant></entry>
+ <entry>0x00000002</entry>
+ <entry>The device supports the single-planar API through the <link
+linkend="output">Video Output</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_OUTPUT_MPLANE</constant></entry>
+ <entry>0x00002000</entry>
+ <entry>The device supports the
+ <link linkend="planar-apis">multi-planar API</link> through the
+ <link linkend="output">Video Output</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_M2M</constant></entry>
+ <entry>0x00004000</entry>
+ <entry>The device supports the single-planar API through the
+ Video Memory-To-Memory interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_M2M_MPLANE</constant></entry>
+ <entry>0x00008000</entry>
+ <entry>The device supports the
+ <link linkend="planar-apis">multi-planar API</link> through the
+ Video Memory-To-Memory interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_OVERLAY</constant></entry>
+ <entry>0x00000004</entry>
+ <entry>The device supports the <link
+linkend="overlay">Video Overlay</link> interface. A video overlay device
+typically stores captured images directly in the video memory of a
+graphics card, with hardware clipping and scaling.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VBI_CAPTURE</constant></entry>
+ <entry>0x00000010</entry>
+ <entry>The device supports the <link linkend="raw-vbi">Raw
+VBI Capture</link> interface, providing Teletext and Closed Caption
+data.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VBI_OUTPUT</constant></entry>
+ <entry>0x00000020</entry>
+ <entry>The device supports the <link linkend="raw-vbi">Raw VBI Output</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_SLICED_VBI_CAPTURE</constant></entry>
+ <entry>0x00000040</entry>
+ <entry>The device supports the <link linkend="sliced">Sliced VBI Capture</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_SLICED_VBI_OUTPUT</constant></entry>
+ <entry>0x00000080</entry>
+ <entry>The device supports the <link linkend="sliced">Sliced VBI Output</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_RDS_CAPTURE</constant></entry>
+ <entry>0x00000100</entry>
+ <entry>The device supports the <link linkend="rds">RDS</link> capture interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_VIDEO_OUTPUT_OVERLAY</constant></entry>
+ <entry>0x00000200</entry>
+ <entry>The device supports the <link linkend="osd">Video
+Output Overlay</link> (OSD) interface. Unlike the <wordasword>Video
+Overlay</wordasword> interface, this is a secondary function of video
+output devices and overlays an image onto an outgoing video signal.
+When the driver sets this flag, it must clear the
+<constant>V4L2_CAP_VIDEO_OVERLAY</constant> flag and vice
+versa.<footnote><para>The &v4l2-framebuffer; lacks an
+&v4l2-buf-type; field, therefore the type of overlay is implied by the
+driver capabilities.</para></footnote></entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_HW_FREQ_SEEK</constant></entry>
+ <entry>0x00000400</entry>
+ <entry>The device supports the &VIDIOC-S-HW-FREQ-SEEK; ioctl for
+hardware frequency seeking.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_RDS_OUTPUT</constant></entry>
+ <entry>0x00000800</entry>
+ <entry>The device supports the <link linkend="rds">RDS</link> output interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_TUNER</constant></entry>
+ <entry>0x00010000</entry>
+ <entry>The device has some sort of tuner to
+receive RF-modulated video signals. For more information about
+tuner programming see
+<xref linkend="tuner" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_AUDIO</constant></entry>
+ <entry>0x00020000</entry>
+ <entry>The device has audio inputs or outputs. It may or
+may not support audio recording or playback, in PCM or compressed
+formats. PCM audio support must be implemented as ALSA or OSS
+interface. For more information on audio inputs and outputs see <xref
+ linkend="audio" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_RADIO</constant></entry>
+ <entry>0x00040000</entry>
+ <entry>This is a radio receiver.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_MODULATOR</constant></entry>
+ <entry>0x00080000</entry>
+ <entry>The device has some sort of modulator to
+emit RF-modulated video/audio signals. For more information about
+modulator programming see
+<xref linkend="tuner" />.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_SDR_CAPTURE</constant></entry>
+ <entry>0x00100000</entry>
+ <entry>The device supports the
+<link linkend="sdr">SDR Capture</link> interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_EXT_PIX_FORMAT</constant></entry>
+ <entry>0x00200000</entry>
+ <entry>The device supports the &v4l2-pix-format; extended
+fields.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_READWRITE</constant></entry>
+ <entry>0x01000000</entry>
+ <entry>The device supports the <link
+linkend="rw">read()</link> and/or <link linkend="rw">write()</link>
+I/O methods.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_ASYNCIO</constant></entry>
+ <entry>0x02000000</entry>
+ <entry>The device supports the <link
+linkend="async">asynchronous</link> I/O methods.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_STREAMING</constant></entry>
+ <entry>0x04000000</entry>
+ <entry>The device supports the <link
+linkend="mmap">streaming</link> I/O method.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CAP_DEVICE_CAPS</constant></entry>
+ <entry>0x80000000</entry>
+ <entry>The driver fills the <structfield>device_caps</structfield>
+ field. This capability can only appear in the <structfield>capabilities</structfield>
+ field and never in the <structfield>device_caps</structfield> field.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml b/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml
new file mode 100644
index 000000000..dc83ad70f
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-queryctrl.xml
@@ -0,0 +1,650 @@
+<refentry id="vidioc-queryctrl">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_QUERYCTRL, VIDIOC_QUERY_EXT_CTRL, VIDIOC_QUERYMENU</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_QUERYCTRL</refname>
+ <refname>VIDIOC_QUERY_EXT_CTRL</refname>
+ <refname>VIDIOC_QUERYMENU</refname>
+ <refpurpose>Enumerate controls and menu control items</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_queryctrl *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_query_ext_ctrl *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_querymenu *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_QUERYCTRL, VIDIOC_QUERY_EXT_CTRL, VIDIOC_QUERYMENU</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>To query the attributes of a control applications set the
+<structfield>id</structfield> field of a &v4l2-queryctrl; and call the
+<constant>VIDIOC_QUERYCTRL</constant> ioctl with a pointer to this
+structure. The driver fills the rest of the structure or returns an
+&EINVAL; when the <structfield>id</structfield> is invalid.</para>
+
+ <para>It is possible to enumerate controls by calling
+<constant>VIDIOC_QUERYCTRL</constant> with successive
+<structfield>id</structfield> values starting from
+<constant>V4L2_CID_BASE</constant> up to and exclusive
+<constant>V4L2_CID_LASTP1</constant>. Drivers may return
+<errorcode>EINVAL</errorcode> if a control in this range is not
+supported. Further applications can enumerate private controls, which
+are not defined in this specification, by starting at
+<constant>V4L2_CID_PRIVATE_BASE</constant> and incrementing
+<structfield>id</structfield> until the driver returns
+<errorcode>EINVAL</errorcode>.</para>
+
+ <para>In both cases, when the driver sets the
+<constant>V4L2_CTRL_FLAG_DISABLED</constant> flag in the
+<structfield>flags</structfield> field this control is permanently
+disabled and should be ignored by the application.<footnote>
+ <para><constant>V4L2_CTRL_FLAG_DISABLED</constant> was
+intended for two purposes: Drivers can skip predefined controls not
+supported by the hardware (although returning EINVAL would do as
+well), or disable predefined and private controls after hardware
+detection without the trouble of reordering control arrays and indices
+(EINVAL cannot be used to skip private controls because it would
+prematurely end the enumeration).</para></footnote></para>
+
+ <para>When the application ORs <structfield>id</structfield> with
+<constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant> the driver returns the
+next supported non-compound control, or <errorcode>EINVAL</errorcode>
+if there is none. In addition, the <constant>V4L2_CTRL_FLAG_NEXT_COMPOUND</constant>
+flag can be specified to enumerate all compound controls (i.e. controls
+with type &ge; <constant>V4L2_CTRL_COMPOUND_TYPES</constant>). Specify both
+<constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant> and
+<constant>V4L2_CTRL_FLAG_NEXT_COMPOUND</constant> in order to enumerate
+all controls, compound or not. Drivers which do not support these flags yet
+always return <errorcode>EINVAL</errorcode>.</para>
+
+ <para>The <constant>VIDIOC_QUERY_EXT_CTRL</constant> ioctl was
+introduced in order to better support controls that can use compound
+types, and to expose additional control information that cannot be
+returned in &v4l2-queryctrl; since that structure is full.</para>
+
+ <para><constant>VIDIOC_QUERY_EXT_CTRL</constant> is used in the
+same way as <constant>VIDIOC_QUERYCTRL</constant>, except that the
+<structfield>reserved</structfield> array must be zeroed as well.</para>
+
+ <para>Additional information is required for menu controls: the
+names of the menu items. To query them applications set the
+<structfield>id</structfield> and <structfield>index</structfield>
+fields of &v4l2-querymenu; and call the
+<constant>VIDIOC_QUERYMENU</constant> ioctl with a pointer to this
+structure. The driver fills the rest of the structure or returns an
+&EINVAL; when the <structfield>id</structfield> or
+<structfield>index</structfield> is invalid. Menu items are enumerated
+by calling <constant>VIDIOC_QUERYMENU</constant> with successive
+<structfield>index</structfield> values from &v4l2-queryctrl;
+<structfield>minimum</structfield> to
+<structfield>maximum</structfield>, inclusive. Note that it is possible
+for <constant>VIDIOC_QUERYMENU</constant> to return an &EINVAL; for some
+indices between <structfield>minimum</structfield> and <structfield>maximum</structfield>.
+In that case that particular menu item is not supported by this driver. Also note that
+the <structfield>minimum</structfield> value is not necessarily 0.</para>
+
+ <para>See also the examples in <xref linkend="control" />.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-queryctrl">
+ <title>struct <structname>v4l2_queryctrl</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>Identifies the control, set by the application. See
+<xref linkend="control-id" /> for predefined IDs. When the ID is ORed
+with V4L2_CTRL_FLAG_NEXT_CTRL the driver clears the flag and returns
+the first control with a higher ID. Drivers which do not support this
+flag yet always return an &EINVAL;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of control, see <xref
+ linkend="v4l2-ctrl-type" />.</entry>
+ </row>
+ <row>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the control, a NUL-terminated ASCII
+string. This information is intended for the user.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>minimum</structfield></entry>
+ <entry>Minimum value, inclusive. This field gives a lower
+bound for the control. See &v4l2-ctrl-type; how the minimum value is to
+be used for each possible control type. Note that this a signed 32-bit value.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>maximum</structfield></entry>
+ <entry>Maximum value, inclusive. This field gives an upper
+bound for the control. See &v4l2-ctrl-type; how the maximum value is to
+be used for each possible control type. Note that this a signed 32-bit value.</entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>step</structfield></entry>
+ <entry><para>This field gives a step size for the control.
+See &v4l2-ctrl-type; how the step value is to be used for each possible
+control type. Note that this an unsigned 32-bit value.
+</para><para>Generally drivers should not scale hardware
+control values. It may be necessary for example when the
+<structfield>name</structfield> or <structfield>id</structfield> imply
+a particular unit and the hardware actually accepts only multiples of
+said unit. If so, drivers must take care values are properly rounded
+when scaling, such that errors will not accumulate on repeated
+read-write cycles.</para><para>This field gives the smallest change of
+an integer control actually affecting hardware. Often the information
+is needed when the user can change controls by keyboard or GUI
+buttons, rather than a slider. When for example a hardware register
+accepts values 0-511 and the driver reports 0-65535, step should be
+128.</para><para>Note that although signed, the step value is supposed to
+be always positive.</para></entry>
+ </row>
+ <row>
+ <entry>__s32</entry>
+ <entry><structfield>default_value</structfield></entry>
+ <entry>The default value of a
+<constant>V4L2_CTRL_TYPE_INTEGER</constant>,
+<constant>_BOOLEAN</constant>, <constant>_BITMASK</constant>,
+<constant>_MENU</constant> or <constant>_INTEGER_MENU</constant> control.
+Not valid for other types of controls.
+Note that drivers reset controls to their default value only when the
+driver is first loaded, never afterwards.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Control flags, see <xref
+ linkend="control-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>Reserved for future extensions. Drivers must set
+the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-query-ext-ctrl">
+ <title>struct <structname>v4l2_query_ext_ctrl</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>Identifies the control, set by the application. See
+<xref linkend="control-id" /> for predefined IDs. When the ID is ORed
+with <constant>V4L2_CTRL_FLAG_NEXT_CTRL</constant> the driver clears the
+flag and returns the first non-compound control with a higher ID. When the
+ID is ORed with <constant>V4L2_CTRL_FLAG_NEXT_COMPOUND</constant> the driver
+clears the flag and returns the first compound control with a higher ID.
+Set both to get the first control (compound or not) with a higher ID.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of control, see <xref
+ linkend="v4l2-ctrl-type" />.</entry>
+ </row>
+ <row>
+ <entry>char</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the control, a NUL-terminated ASCII
+string. This information is intended for the user.</entry>
+ </row>
+ <row>
+ <entry>__s64</entry>
+ <entry><structfield>minimum</structfield></entry>
+ <entry>Minimum value, inclusive. This field gives a lower
+bound for the control. See &v4l2-ctrl-type; how the minimum value is to
+be used for each possible control type. Note that this a signed 64-bit value.</entry>
+ </row>
+ <row>
+ <entry>__s64</entry>
+ <entry><structfield>maximum</structfield></entry>
+ <entry>Maximum value, inclusive. This field gives an upper
+bound for the control. See &v4l2-ctrl-type; how the maximum value is to
+be used for each possible control type. Note that this a signed 64-bit value.</entry>
+ </row>
+ <row>
+ <entry>__u64</entry>
+ <entry><structfield>step</structfield></entry>
+ <entry><para>This field gives a step size for the control.
+See &v4l2-ctrl-type; how the step value is to be used for each possible
+control type. Note that this an unsigned 64-bit value.
+</para><para>Generally drivers should not scale hardware
+control values. It may be necessary for example when the
+<structfield>name</structfield> or <structfield>id</structfield> imply
+a particular unit and the hardware actually accepts only multiples of
+said unit. If so, drivers must take care values are properly rounded
+when scaling, such that errors will not accumulate on repeated
+read-write cycles.</para><para>This field gives the smallest change of
+an integer control actually affecting hardware. Often the information
+is needed when the user can change controls by keyboard or GUI
+buttons, rather than a slider. When for example a hardware register
+accepts values 0-511 and the driver reports 0-65535, step should be
+128.</para></entry>
+ </row>
+ <row>
+ <entry>__s64</entry>
+ <entry><structfield>default_value</structfield></entry>
+ <entry>The default value of a
+<constant>V4L2_CTRL_TYPE_INTEGER</constant>, <constant>_INTEGER64</constant>,
+<constant>_BOOLEAN</constant>, <constant>_BITMASK</constant>,
+<constant>_MENU</constant>, <constant>_INTEGER_MENU</constant>,
+<constant>_U8</constant> or <constant>_U16</constant> control.
+Not valid for other types of controls.
+Note that drivers reset controls to their default value only when the
+driver is first loaded, never afterwards.
+</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Control flags, see <xref
+ linkend="control-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>elem_size</structfield></entry>
+ <entry>The size in bytes of a single element of the array.
+Given a char pointer <constant>p</constant> to a 3-dimensional array you can find the
+position of cell <constant>(z, y, x)</constant> as follows:
+<constant>p + ((z * dims[1] + y) * dims[0] + x) * elem_size</constant>. <structfield>elem_size</structfield>
+is always valid, also when the control isn't an array. For string controls
+<structfield>elem_size</structfield> is equal to <structfield>maximum + 1</structfield>.
+</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>elems</structfield></entry>
+ <entry>The number of elements in the N-dimensional array. If this control
+is not an array, then <structfield>elems</structfield> is 1. The <structfield>elems</structfield>
+field can never be 0.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>nr_of_dims</structfield></entry>
+ <entry>The number of dimension in the N-dimensional array. If this control
+is not an array, then this field is 0.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>dims[V4L2_CTRL_MAX_DIMS]</structfield></entry>
+ <entry>The size of each dimension. The first <structfield>nr_of_dims</structfield>
+elements of this array must be non-zero, all remaining elements must be zero.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[32]</entry>
+ <entry>Reserved for future extensions. Applications and drivers
+must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-querymenu">
+ <title>struct <structname>v4l2_querymenu</structname></title>
+ <tgroup cols="4">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry></entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>Identifies the control, set by the application
+from the respective &v4l2-queryctrl;
+<structfield>id</structfield>.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry></entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Index of the menu item, starting at zero, set by
+ the application.</entry>
+ </row>
+ <row>
+ <entry>union</entry>
+ <entry></entry>
+ <entry></entry>
+ <entry></entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__u8</entry>
+ <entry><structfield>name</structfield>[32]</entry>
+ <entry>Name of the menu item, a NUL-terminated ASCII
+string. This information is intended for the user. This field is valid
+for <constant>V4L2_CTRL_FLAG_MENU</constant> type controls.</entry>
+ </row>
+ <row>
+ <entry></entry>
+ <entry>__s64</entry>
+ <entry><structfield>value</structfield></entry>
+ <entry>
+ Value of the integer menu item. This field is valid for
+ <constant>V4L2_CTRL_FLAG_INTEGER_MENU</constant> type
+ controls.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry></entry>
+ <entry><structfield>reserved</structfield></entry>
+ <entry>Reserved for future extensions. Drivers must set
+the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-ctrl-type">
+ <title>enum v4l2_ctrl_type</title>
+ <tgroup cols="5" align="left">
+ <colspec colwidth="30*" />
+ <colspec colwidth="5*" align="center" />
+ <colspec colwidth="5*" align="center" />
+ <colspec colwidth="5*" align="center" />
+ <colspec colwidth="55*" />
+ <thead>
+ <row>
+ <entry>Type</entry>
+ <entry><structfield>minimum</structfield></entry>
+ <entry><structfield>step</structfield></entry>
+ <entry><structfield>maximum</structfield></entry>
+ <entry>Description</entry>
+ </row>
+ </thead>
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_INTEGER</constant></entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>An integer-valued control ranging from minimum to
+maximum inclusive. The step value indicates the increment between
+values which are actually different on the hardware.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_BOOLEAN</constant></entry>
+ <entry>0</entry>
+ <entry>1</entry>
+ <entry>1</entry>
+ <entry>A boolean-valued control. Zero corresponds to
+"disabled", and one means "enabled".</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_MENU</constant></entry>
+ <entry>&ge; 0</entry>
+ <entry>1</entry>
+ <entry>N-1</entry>
+ <entry>The control has a menu of N choices. The names of
+the menu items can be enumerated with the
+<constant>VIDIOC_QUERYMENU</constant> ioctl.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_INTEGER_MENU</constant></entry>
+ <entry>&ge; 0</entry>
+ <entry>1</entry>
+ <entry>N-1</entry>
+ <entry>
+ The control has a menu of N choices. The values of the
+ menu items can be enumerated with the
+ <constant>VIDIOC_QUERYMENU</constant> ioctl. This is
+ similar to <constant>V4L2_CTRL_TYPE_MENU</constant>
+ except that instead of strings, the menu items are
+ signed 64-bit integers.
+ </entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_BITMASK</constant></entry>
+ <entry>0</entry>
+ <entry>n/a</entry>
+ <entry>any</entry>
+ <entry>A bitmask field. The maximum value is the set of bits that can
+be used, all other bits are to be 0. The maximum value is interpreted as a __u32,
+allowing the use of bit 31 in the bitmask.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_BUTTON</constant></entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>0</entry>
+ <entry>A control which performs an action when set.
+Drivers must ignore the value passed with
+<constant>VIDIOC_S_CTRL</constant> and return an &EINVAL; on a
+<constant>VIDIOC_G_CTRL</constant> attempt.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_INTEGER64</constant></entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>A 64-bit integer valued control. Minimum, maximum
+and step size cannot be queried using <constant>VIDIOC_QUERYCTRL</constant>.
+Only <constant>VIDIOC_QUERY_EXT_CTRL</constant> can retrieve the 64-bit
+min/max/step values, they should be interpreted as n/a when using
+<constant>VIDIOC_QUERYCTRL</constant>.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_STRING</constant></entry>
+ <entry>&ge; 0</entry>
+ <entry>&ge; 1</entry>
+ <entry>&ge; 0</entry>
+ <entry>The minimum and maximum string lengths. The step size
+means that the string must be (minimum + N * step) characters long for
+N &ge; 0. These lengths do not include the terminating zero, so in order to
+pass a string of length 8 to &VIDIOC-S-EXT-CTRLS; you need to set the
+<structfield>size</structfield> field of &v4l2-ext-control; to 9. For &VIDIOC-G-EXT-CTRLS; you can
+set the <structfield>size</structfield> field to <structfield>maximum</structfield> + 1.
+Which character encoding is used will depend on the string control itself and
+should be part of the control documentation.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_CTRL_CLASS</constant></entry>
+ <entry>n/a</entry>
+ <entry>n/a</entry>
+ <entry>n/a</entry>
+ <entry>This is not a control. When
+<constant>VIDIOC_QUERYCTRL</constant> is called with a control ID
+equal to a control class code (see <xref linkend="ctrl-class" />) + 1, the
+ioctl returns the name of the control class and this control type.
+Older drivers which do not support this feature return an
+&EINVAL;.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_U8</constant></entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>An unsigned 8-bit valued control ranging from minimum to
+maximum inclusive. The step value indicates the increment between
+values which are actually different on the hardware.
+</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_TYPE_U16</constant></entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>any</entry>
+ <entry>An unsigned 16-bit valued control ranging from minimum to
+maximum inclusive. The step value indicates the increment between
+values which are actually different on the hardware.
+</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="control-flags">
+ <title>Control Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_DISABLED</constant></entry>
+ <entry>0x0001</entry>
+ <entry>This control is permanently disabled and should be
+ignored by the application. Any attempt to change the control will
+result in an &EINVAL;.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_GRABBED</constant></entry>
+ <entry>0x0002</entry>
+ <entry>This control is temporarily unchangeable, for
+example because another application took over control of the
+respective resource. Such controls may be displayed specially in a
+user interface. Attempts to change the control may result in an
+&EBUSY;.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_READ_ONLY</constant></entry>
+ <entry>0x0004</entry>
+ <entry>This control is permanently readable only. Any
+attempt to change the control will result in an &EINVAL;.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_UPDATE</constant></entry>
+ <entry>0x0008</entry>
+ <entry>A hint that changing this control may affect the
+value of other controls within the same control class. Applications
+should update their user interface accordingly.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_INACTIVE</constant></entry>
+ <entry>0x0010</entry>
+ <entry>This control is not applicable to the current
+configuration and should be displayed accordingly in a user interface.
+For example the flag may be set on a MPEG audio level 2 bitrate
+control when MPEG audio encoding level 1 was selected with another
+control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_SLIDER</constant></entry>
+ <entry>0x0020</entry>
+ <entry>A hint that this control is best represented as a
+slider-like element in a user interface.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_WRITE_ONLY</constant></entry>
+ <entry>0x0040</entry>
+ <entry>This control is permanently writable only. Any
+attempt to read the control will result in an &EACCES; error code. This
+flag is typically present for relative controls or action controls where
+writing a value will cause the device to carry out a given action
+(&eg; motor control) but no meaningful value can be returned.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_VOLATILE</constant></entry>
+ <entry>0x0080</entry>
+ <entry>This control is volatile, which means that the value of the control
+changes continuously. A typical example would be the current gain value if the device
+is in auto-gain mode. In such a case the hardware calculates the gain value based on
+the lighting conditions which can change over time. Note that setting a new value for
+a volatile control will have no effect and no <constant>V4L2_EVENT_CTRL_CH_VALUE</constant>
+will be sent, unless the <constant>V4L2_CTRL_FLAG_EXECUTE_ON_WRITE</constant> flag
+(see below) is also set. Otherwise the new value will just be ignored.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_HAS_PAYLOAD</constant></entry>
+ <entry>0x0100</entry>
+ <entry>This control has a pointer type, so its value has to be accessed
+using one of the pointer fields of &v4l2-ext-control;. This flag is set for controls
+that are an array, string, or have a compound type. In all cases you have to set a
+pointer to memory containing the payload of the control.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_CTRL_FLAG_EXECUTE_ON_WRITE</constant></entry>
+ <entry>0x0200</entry>
+ <entry>The value provided to the control will be propagated to the driver
+even if remains constant. This is required when the control represents an action
+on the hardware. For example: clearing an error flag or triggering the flash. All the
+controls of the type <constant>V4L2_CTRL_TYPE_BUTTON</constant> have this flag set.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-queryctrl; <structfield>id</structfield>
+is invalid. The &v4l2-querymenu; <structfield>id</structfield> is
+invalid or <structfield>index</structfield> is out of range (less than
+<structfield>minimum</structfield> or greater than <structfield>maximum</structfield>)
+or this particular menu item is not supported by the driver.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EACCES</errorcode></term>
+ <listitem>
+ <para>An attempt was made to read a write-only control.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-querystd.xml b/Documentation/DocBook/media/v4l/vidioc-querystd.xml
new file mode 100644
index 000000000..222348542
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-querystd.xml
@@ -0,0 +1,75 @@
+<refentry id="vidioc-querystd">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_QUERYSTD</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_QUERYSTD</refname>
+ <refpurpose>Sense the video standard received by the current
+input</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>v4l2_std_id *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_QUERYSTD</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>The hardware may be able to detect the current video
+standard automatically. To do so, applications call <constant>
+VIDIOC_QUERYSTD</constant> with a pointer to a &v4l2-std-id; type. The
+driver stores here a set of candidates, this can be a single flag or a
+set of supported standards if for example the hardware can only
+distinguish between 50 and 60 Hz systems. If no signal was detected,
+then the driver will return V4L2_STD_UNKNOWN. When detection is not
+possible or fails, the set must contain all standards supported by the
+current video input or output.</para>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>Standard video timings are not supported for this input or output.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml b/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml
new file mode 100644
index 000000000..78a06a9a5
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-reqbufs.xml
@@ -0,0 +1,137 @@
+<refentry id="vidioc-reqbufs">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_REQBUFS</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_REQBUFS</refname>
+ <refpurpose>Initiate Memory Mapping or User Pointer I/O</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_requestbuffers *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_REQBUFS</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+<para>This ioctl is used to initiate <link linkend="mmap">memory mapped</link>,
+<link linkend="userp">user pointer</link> or <link
+linkend="dmabuf">DMABUF</link> based I/O. Memory mapped buffers are located in
+device memory and must be allocated with this ioctl before they can be mapped
+into the application's address space. User buffers are allocated by
+applications themselves, and this ioctl is merely used to switch the driver
+into user pointer I/O mode and to setup some internal structures.
+Similarly, DMABUF buffers are allocated by applications through a device
+driver, and this ioctl only configures the driver into DMABUF I/O mode without
+performing any direct allocation.</para>
+
+ <para>To allocate device buffers applications initialize all fields of the
+<structname>v4l2_requestbuffers</structname> structure. They set the
+<structfield>type</structfield> field to the respective stream or buffer type,
+the <structfield>count</structfield> field to the desired number of buffers,
+<structfield>memory</structfield> must be set to the requested I/O method and
+the <structfield>reserved</structfield> array must be zeroed. When the ioctl is
+called with a pointer to this structure the driver will attempt to allocate the
+requested number of buffers and it stores the actual number allocated in the
+<structfield>count</structfield> field. It can be smaller than the number
+requested, even zero, when the driver runs out of free memory. A larger number
+is also possible when the driver requires more buffers to function correctly.
+For example video output requires at least two buffers, one displayed and one
+filled by the application.</para>
+ <para>When the I/O method is not supported the ioctl
+returns an &EINVAL;.</para>
+
+ <para>Applications can call <constant>VIDIOC_REQBUFS</constant>
+again to change the number of buffers, however this cannot succeed
+when any buffers are still mapped. A <structfield>count</structfield>
+value of zero frees all buffers, after aborting or finishing any DMA
+in progress, an implicit &VIDIOC-STREAMOFF;. <!-- mhs: I see no
+reason why munmap()ping one or even all buffers must imply
+streamoff.--></para>
+
+ <table pgwide="1" frame="none" id="v4l2-requestbuffers">
+ <title>struct <structname>v4l2_requestbuffers</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>count</structfield></entry>
+ <entry>The number of buffers requested or granted.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the stream or buffers, this is the same
+as the &v4l2-format; <structfield>type</structfield> field. See <xref
+ linkend="v4l2-buf-type" /> for valid values.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>memory</structfield></entry>
+ <entry>Applications set this field to
+<constant>V4L2_MEMORY_MMAP</constant>,
+<constant>V4L2_MEMORY_DMABUF</constant> or
+<constant>V4L2_MEMORY_USERPTR</constant>. See <xref linkend="v4l2-memory"
+/>.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[2]</entry>
+ <entry>A place holder for future extensions. This array should
+be zeroed by applications.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The buffer type (<structfield>type</structfield> field) or the
+requested I/O method (<structfield>memory</structfield>) is not
+supported.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-s-hw-freq-seek.xml b/Documentation/DocBook/media/v4l/vidioc-s-hw-freq-seek.xml
new file mode 100644
index 000000000..a5fc4c488
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-s-hw-freq-seek.xml
@@ -0,0 +1,188 @@
+<refentry id="vidioc-s-hw-freq-seek">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_S_HW_FREQ_SEEK</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_S_HW_FREQ_SEEK</refname>
+ <refpurpose>Perform a hardware frequency seek</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_hw_freq_seek
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_S_HW_FREQ_SEEK</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Start a hardware frequency seek from the current frequency.
+To do this applications initialize the <structfield>tuner</structfield>,
+<structfield>type</structfield>, <structfield>seek_upward</structfield>,
+<structfield>wrap_around</structfield>, <structfield>spacing</structfield>,
+<structfield>rangelow</structfield> and <structfield>rangehigh</structfield>
+fields, and zero out the <structfield>reserved</structfield> array of a
+&v4l2-hw-freq-seek; and call the <constant>VIDIOC_S_HW_FREQ_SEEK</constant>
+ioctl with a pointer to this structure.</para>
+
+ <para>The <structfield>rangelow</structfield> and
+<structfield>rangehigh</structfield> fields can be set to a non-zero value to
+tell the driver to search a specific band. If the &v4l2-tuner;
+<structfield>capability</structfield> field has the
+<constant>V4L2_TUNER_CAP_HWSEEK_PROG_LIM</constant> flag set, these values
+must fall within one of the bands returned by &VIDIOC-ENUM-FREQ-BANDS;. If
+the <constant>V4L2_TUNER_CAP_HWSEEK_PROG_LIM</constant> flag is not set,
+then these values must exactly match those of one of the bands returned by
+&VIDIOC-ENUM-FREQ-BANDS;. If the current frequency of the tuner does not fall
+within the selected band it will be clamped to fit in the band before the
+seek is started.</para>
+
+ <para>If an error is returned, then the original frequency will
+ be restored.</para>
+
+ <para>This ioctl is supported if the <constant>V4L2_CAP_HW_FREQ_SEEK</constant> capability is set.</para>
+
+ <para>If this ioctl is called from a non-blocking filehandle, then &EAGAIN; is
+ returned and no seek takes place.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-hw-freq-seek">
+ <title>struct <structname>v4l2_hw_freq_seek</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>tuner</structfield></entry>
+ <entry>The tuner index number. This is the
+same value as in the &v4l2-input; <structfield>tuner</structfield>
+field and the &v4l2-tuner; <structfield>index</structfield> field.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>The tuner type. This is the same value as in the
+&v4l2-tuner; <structfield>type</structfield> field. See <xref
+ linkend="v4l2-tuner-type" /></entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>seek_upward</structfield></entry>
+ <entry>If non-zero, seek upward from the current frequency, else seek downward.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>wrap_around</structfield></entry>
+ <entry>If non-zero, wrap around when at the end of the frequency range, else stop seeking.
+ The &v4l2-tuner; <structfield>capability</structfield> field will tell you what the
+ hardware supports.
+ </entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>spacing</structfield></entry>
+ <entry>If non-zero, defines the hardware seek resolution in Hz. The driver selects the nearest value that is supported by the device. If spacing is zero a reasonable default value is used.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangelow</structfield></entry>
+ <entry>If non-zero, the lowest tunable frequency of the band to
+search in units of 62.5 kHz, or if the &v4l2-tuner;
+<structfield>capability</structfield> field has the
+<constant>V4L2_TUNER_CAP_LOW</constant> flag set, in units of 62.5 Hz or if the &v4l2-tuner;
+<structfield>capability</structfield> field has the
+<constant>V4L2_TUNER_CAP_1HZ</constant> flag set, in units of 1 Hz.
+If <structfield>rangelow</structfield> is zero a reasonable default value
+is used.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>rangehigh</structfield></entry>
+ <entry>If non-zero, the highest tunable frequency of the band to
+search in units of 62.5 kHz, or if the &v4l2-tuner;
+<structfield>capability</structfield> field has the
+<constant>V4L2_TUNER_CAP_LOW</constant> flag set, in units of 62.5 Hz or if the &v4l2-tuner;
+<structfield>capability</structfield> field has the
+<constant>V4L2_TUNER_CAP_1HZ</constant> flag set, in units of 1 Hz.
+If <structfield>rangehigh</structfield> is zero a reasonable default value
+is used.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[5]</entry>
+ <entry>Reserved for future extensions. Applications
+ must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The <structfield>tuner</structfield> index is out of
+bounds, the <structfield>wrap_around</structfield> value is not supported or
+one of the values in the <structfield>type</structfield>,
+<structfield>rangelow</structfield> or <structfield>rangehigh</structfield>
+fields is wrong.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EAGAIN</errorcode></term>
+ <listitem>
+ <para>Attempted to call <constant>VIDIOC_S_HW_FREQ_SEEK</constant>
+ with the filehandle in non-blocking mode.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>ENODATA</errorcode></term>
+ <listitem>
+ <para>The hardware seek found no channels.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>Another hardware seek is already in progress.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-streamon.xml b/Documentation/DocBook/media/v4l/vidioc-streamon.xml
new file mode 100644
index 000000000..df2c63d07
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-streamon.xml
@@ -0,0 +1,128 @@
+<refentry id="vidioc-streamon">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_STREAMON, VIDIOC_STREAMOFF</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_STREAMON</refname>
+ <refname>VIDIOC_STREAMOFF</refname>
+ <refpurpose>Start or stop streaming I/O</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const int *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_STREAMON, VIDIOC_STREAMOFF</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>The <constant>VIDIOC_STREAMON</constant> and
+<constant>VIDIOC_STREAMOFF</constant> ioctl start and stop the capture
+or output process during streaming (<link linkend="mmap">memory
+mapping</link>, <link linkend="userp">user pointer</link> or
+<link linkend="dmabuf">DMABUF</link>) I/O.</para>
+
+ <para>Capture hardware is disabled and no input
+buffers are filled (if there are any empty buffers in the incoming
+queue) until <constant>VIDIOC_STREAMON</constant> has been called.
+Output hardware is disabled and no video signal is
+produced until <constant>VIDIOC_STREAMON</constant> has been called.
+The ioctl will succeed when at least one output buffer is in the
+incoming queue.</para>
+
+ <para>Memory-to-memory devices will not start until
+<constant>VIDIOC_STREAMON</constant> has been called for both the capture
+and output stream types.</para>
+
+ <para>If <constant>VIDIOC_STREAMON</constant> fails then any already
+queued buffers will remain queued.</para>
+
+ <para>The <constant>VIDIOC_STREAMOFF</constant> ioctl, apart of
+aborting or finishing any DMA in progress, unlocks any user pointer
+buffers locked in physical memory, and it removes all buffers from the
+incoming and outgoing queues. That means all images captured but not
+dequeued yet will be lost, likewise all images enqueued for output but
+not transmitted yet. I/O returns to the same state as after calling
+&VIDIOC-REQBUFS; and can be restarted accordingly.</para>
+
+ <para>If buffers have been queued with &VIDIOC-QBUF; and
+<constant>VIDIOC_STREAMOFF</constant> is called without ever having
+called <constant>VIDIOC_STREAMON</constant>, then those queued buffers
+will also be removed from the incoming queue and all are returned to the
+same state as after calling &VIDIOC-REQBUFS; and can be restarted
+accordingly.</para>
+
+ <para>Both ioctls take a pointer to an integer, the desired buffer or
+stream type. This is the same as &v4l2-requestbuffers;
+<structfield>type</structfield>.</para>
+
+ <para>If <constant>VIDIOC_STREAMON</constant> is called when streaming
+is already in progress, or if <constant>VIDIOC_STREAMOFF</constant> is called
+when streaming is already stopped, then 0 is returned. Nothing happens in the
+case of <constant>VIDIOC_STREAMON</constant>, but <constant>VIDIOC_STREAMOFF</constant>
+will return queued buffers to their starting state as mentioned above.</para>
+
+ <para>Note that applications can be preempted for unknown periods right
+before or after the <constant>VIDIOC_STREAMON</constant> or
+<constant>VIDIOC_STREAMOFF</constant> calls, there is no notion of
+starting or stopping "now". Buffer timestamps can be used to
+synchronize with other events.</para>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The buffer <structfield>type</structfield> is not supported,
+ or no buffers have been allocated (memory mapping) or enqueued
+ (output) yet.</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EPIPE</errorcode></term>
+ <listitem>
+ <para>The driver implements <link
+ linkend="pad-level-formats">pad-level format configuration</link> and
+ the pipeline configuration is invalid.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml
new file mode 100644
index 000000000..cff59f5cb
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-interval.xml
@@ -0,0 +1,157 @@
+<refentry id="vidioc-subdev-enum-frame-interval">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL</refname>
+ <refpurpose>Enumerate frame intervals</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_frame_interval_enum *
+ <parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>This ioctl lets applications enumerate available frame intervals on a
+ given sub-device pad. Frame intervals only makes sense for sub-devices that
+ can control the frame period on their own. This includes, for instance,
+ image sensors and TV tuners.</para>
+
+ <para>For the common use case of image sensors, the frame intervals
+ available on the sub-device output pad depend on the frame format and size
+ on the same pad. Applications must thus specify the desired format and size
+ when enumerating frame intervals.</para>
+
+ <para>To enumerate frame intervals applications initialize the
+ <structfield>index</structfield>, <structfield>pad</structfield>,
+ <structfield>which</structfield>, <structfield>code</structfield>,
+ <structfield>width</structfield> and <structfield>height</structfield>
+ fields of &v4l2-subdev-frame-interval-enum; and call the
+ <constant>VIDIOC_SUBDEV_ENUM_FRAME_INTERVAL</constant> ioctl with a pointer
+ to this structure. Drivers fill the rest of the structure or return
+ an &EINVAL; if one of the input fields is invalid. All frame intervals are
+ enumerable by beginning at index zero and incrementing by one until
+ <errorcode>EINVAL</errorcode> is returned.</para>
+
+ <para>Available frame intervals may depend on the current 'try' formats
+ at other pads of the sub-device, as well as on the current active links. See
+ &VIDIOC-SUBDEV-G-FMT; for more information about the try formats.</para>
+
+ <para>Sub-devices that support the frame interval enumeration ioctl should
+ implemented it on a single pad only. Its behaviour when supported on
+ multiple pads of the same sub-device is not defined.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-frame-interval-enum">
+ <title>struct <structname>v4l2_subdev_frame_interval_enum</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the format in the enumeration, set by the
+ application.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>code</structfield></entry>
+ <entry>The media bus format code, as defined in
+ <xref linkend="v4l2-mbus-format" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>width</structfield></entry>
+ <entry>Frame width, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>height</structfield></entry>
+ <entry>Frame height, in pixels.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>interval</structfield></entry>
+ <entry>Period, in seconds, between consecutive video frames.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>which</structfield></entry>
+ <entry>Frame intervals to be enumerated, from &v4l2-subdev-format-whence;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-frame-interval-enum;
+ <structfield>pad</structfield> references a non-existing pad, one of
+ the <structfield>code</structfield>, <structfield>width</structfield>
+ or <structfield>height</structfield> fields are invalid for the given
+ pad or the <structfield>index</structfield> field is out of bounds.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml
new file mode 100644
index 000000000..abd545ede
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-frame-size.xml
@@ -0,0 +1,159 @@
+<refentry id="vidioc-subdev-enum-frame-size">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_ENUM_FRAME_SIZE</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_ENUM_FRAME_SIZE</refname>
+ <refpurpose>Enumerate media bus frame sizes</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_frame_size_enum *
+ <parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_ENUM_FRAME_SIZE</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>This ioctl allows applications to enumerate all frame sizes
+ supported by a sub-device on the given pad for the given media bus format.
+ Supported formats can be retrieved with the &VIDIOC-SUBDEV-ENUM-MBUS-CODE;
+ ioctl.</para>
+
+ <para>To enumerate frame sizes applications initialize the
+ <structfield>pad</structfield>, <structfield>which</structfield> ,
+ <structfield>code</structfield> and <structfield>index</structfield>
+ fields of the &v4l2-subdev-mbus-code-enum; and call the
+ <constant>VIDIOC_SUBDEV_ENUM_FRAME_SIZE</constant> ioctl with a pointer to
+ the structure. Drivers fill the minimum and maximum frame sizes or return
+ an &EINVAL; if one of the input parameters is invalid.</para>
+
+ <para>Sub-devices that only support discrete frame sizes (such as most
+ sensors) will return one or more frame sizes with identical minimum and
+ maximum values.</para>
+
+ <para>Not all possible sizes in given [minimum, maximum] ranges need to be
+ supported. For instance, a scaler that uses a fixed-point scaling ratio
+ might not be able to produce every frame size between the minimum and
+ maximum values. Applications must use the &VIDIOC-SUBDEV-S-FMT; ioctl to
+ try the sub-device for an exact supported frame size.</para>
+
+ <para>Available frame sizes may depend on the current 'try' formats at other
+ pads of the sub-device, as well as on the current active links and the
+ current values of V4L2 controls. See &VIDIOC-SUBDEV-G-FMT; for more
+ information about try formats.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-frame-size-enum">
+ <title>struct <structname>v4l2_subdev_frame_size_enum</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the format in the enumeration, set by the
+ application.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>code</structfield></entry>
+ <entry>The media bus format code, as defined in
+ <xref linkend="v4l2-mbus-format" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>min_width</structfield></entry>
+ <entry>Minimum frame width, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>max_width</structfield></entry>
+ <entry>Maximum frame width, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>min_height</structfield></entry>
+ <entry>Minimum frame height, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>max_height</structfield></entry>
+ <entry>Maximum frame height, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>which</structfield></entry>
+ <entry>Frame sizes to be enumerated, from &v4l2-subdev-format-whence;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-frame-size-enum; <structfield>pad</structfield>
+ references a non-existing pad, the <structfield>code</structfield> is
+ invalid for the given pad or the <structfield>index</structfield>
+ field is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml
new file mode 100644
index 000000000..0bcb278fd
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-enum-mbus-code.xml
@@ -0,0 +1,124 @@
+<refentry id="vidioc-subdev-enum-mbus-code">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_ENUM_MBUS_CODE</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_ENUM_MBUS_CODE</refname>
+ <refpurpose>Enumerate media bus formats</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_mbus_code_enum *
+ <parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_ENUM_MBUS_CODE</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>To enumerate media bus formats available at a given sub-device pad
+ applications initialize the <structfield>pad</structfield>, <structfield>which</structfield>
+ and <structfield>index</structfield> fields of &v4l2-subdev-mbus-code-enum; and
+ call the <constant>VIDIOC_SUBDEV_ENUM_MBUS_CODE</constant> ioctl with a
+ pointer to this structure. Drivers fill the rest of the structure or return
+ an &EINVAL; if either the <structfield>pad</structfield> or
+ <structfield>index</structfield> are invalid. All media bus formats are
+ enumerable by beginning at index zero and incrementing by one until
+ <errorcode>EINVAL</errorcode> is returned.</para>
+
+ <para>Available media bus formats may depend on the current 'try' formats
+ at other pads of the sub-device, as well as on the current active links. See
+ &VIDIOC-SUBDEV-G-FMT; for more information about the try formats.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-mbus-code-enum">
+ <title>struct <structname>v4l2_subdev_mbus_code_enum</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>index</structfield></entry>
+ <entry>Number of the format in the enumeration, set by the
+ application.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>code</structfield></entry>
+ <entry>The media bus format code, as defined in
+ <xref linkend="v4l2-mbus-format" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>which</structfield></entry>
+ <entry>Media bus format codes to be enumerated, from &v4l2-subdev-format-whence;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-mbus-code-enum; <structfield>pad</structfield>
+ references a non-existing pad, or the <structfield>index</structfield>
+ field is out of bounds.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-crop.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-crop.xml
new file mode 100644
index 000000000..4cddd788c
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-crop.xml
@@ -0,0 +1,158 @@
+<refentry id="vidioc-subdev-g-crop">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_G_CROP, VIDIOC_SUBDEV_S_CROP</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_G_CROP</refname>
+ <refname>VIDIOC_SUBDEV_S_CROP</refname>
+ <refpurpose>Get or set the crop rectangle on a subdev pad</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_crop *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>const struct v4l2_subdev_crop *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_G_CROP, VIDIOC_SUBDEV_S_CROP</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Obsolete</title>
+
+ <para>This is an <link linkend="obsolete">obsolete</link>
+ interface and may be removed in the future. It is superseded by
+ <link linkend="vidioc-subdev-g-selection">the selection
+ API</link>.</para>
+ </note>
+
+ <para>To retrieve the current crop rectangle applications set the
+ <structfield>pad</structfield> field of a &v4l2-subdev-crop; to the
+ desired pad number as reported by the media API and the
+ <structfield>which</structfield> field to
+ <constant>V4L2_SUBDEV_FORMAT_ACTIVE</constant>. They then call the
+ <constant>VIDIOC_SUBDEV_G_CROP</constant> ioctl with a pointer to this
+ structure. The driver fills the members of the <structfield>rect</structfield>
+ field or returns &EINVAL; if the input arguments are invalid, or if cropping
+ is not supported on the given pad.</para>
+
+ <para>To change the current crop rectangle applications set both the
+ <structfield>pad</structfield> and <structfield>which</structfield> fields
+ and all members of the <structfield>rect</structfield> field. They then call
+ the <constant>VIDIOC_SUBDEV_S_CROP</constant> ioctl with a pointer to this
+ structure. The driver verifies the requested crop rectangle, adjusts it
+ based on the hardware capabilities and configures the device. Upon return
+ the &v4l2-subdev-crop; contains the current format as would be returned
+ by a <constant>VIDIOC_SUBDEV_G_CROP</constant> call.</para>
+
+ <para>Applications can query the device capabilities by setting the
+ <structfield>which</structfield> to
+ <constant>V4L2_SUBDEV_FORMAT_TRY</constant>. When set, 'try' crop
+ rectangles are not applied to the device by the driver, but are mangled
+ exactly as active crop rectangles and stored in the sub-device file handle.
+ Two applications querying the same sub-device would thus not interact with
+ each other.</para>
+
+ <para>Drivers must not return an error solely because the requested crop
+ rectangle doesn't match the device capabilities. They must instead modify
+ the rectangle to match what the hardware can provide. The modified format
+ should be as close as possible to the original request.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-crop">
+ <title>struct <structname>v4l2_subdev_crop</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media framework.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>which</structfield></entry>
+ <entry>Crop rectangle to get or set, from
+ &v4l2-subdev-format-whence;.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-rect;</entry>
+ <entry><structfield>rect</structfield></entry>
+ <entry>Crop rectangle boundaries, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The crop rectangle can't be changed because the pad is currently
+ busy. This can be caused, for instance, by an active video stream on
+ the pad. The ioctl must not be retried without performing another
+ action to fix the problem first. Only returned by
+ <constant>VIDIOC_SUBDEV_S_CROP</constant></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-crop; <structfield>pad</structfield>
+ references a non-existing pad, the <structfield>which</structfield>
+ field references a non-existing format, or cropping is not supported
+ on the given subdev pad.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml
new file mode 100644
index 000000000..a67cde6f8
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-fmt.xml
@@ -0,0 +1,183 @@
+<refentry id="vidioc-subdev-g-fmt">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_G_FMT, VIDIOC_SUBDEV_S_FMT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_G_FMT</refname>
+ <refname>VIDIOC_SUBDEV_S_FMT</refname>
+ <refpurpose>Get or set the data format on a subdev pad</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_format *<parameter>argp</parameter>
+ </paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_G_FMT, VIDIOC_SUBDEV_S_FMT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>These ioctls are used to negotiate the frame format at specific
+ subdev pads in the image pipeline.</para>
+
+ <para>To retrieve the current format applications set the
+ <structfield>pad</structfield> field of a &v4l2-subdev-format; to the
+ desired pad number as reported by the media API and the
+ <structfield>which</structfield> field to
+ <constant>V4L2_SUBDEV_FORMAT_ACTIVE</constant>. When they call the
+ <constant>VIDIOC_SUBDEV_G_FMT</constant> ioctl with a pointer to this
+ structure the driver fills the members of the <structfield>format</structfield>
+ field.</para>
+
+ <para>To change the current format applications set both the
+ <structfield>pad</structfield> and <structfield>which</structfield> fields
+ and all members of the <structfield>format</structfield> field. When they
+ call the <constant>VIDIOC_SUBDEV_S_FMT</constant> ioctl with a pointer to this
+ structure the driver verifies the requested format, adjusts it based on the
+ hardware capabilities and configures the device. Upon return the
+ &v4l2-subdev-format; contains the current format as would be returned by a
+ <constant>VIDIOC_SUBDEV_G_FMT</constant> call.</para>
+
+ <para>Applications can query the device capabilities by setting the
+ <structfield>which</structfield> to
+ <constant>V4L2_SUBDEV_FORMAT_TRY</constant>. When set, 'try' formats are not
+ applied to the device by the driver, but are changed exactly as active
+ formats and stored in the sub-device file handle. Two applications querying
+ the same sub-device would thus not interact with each other.</para>
+
+ <para>For instance, to try a format at the output pad of a sub-device,
+ applications would first set the try format at the sub-device input with the
+ <constant>VIDIOC_SUBDEV_S_FMT</constant> ioctl. They would then either
+ retrieve the default format at the output pad with the
+ <constant>VIDIOC_SUBDEV_G_FMT</constant> ioctl, or set the desired output
+ pad format with the <constant>VIDIOC_SUBDEV_S_FMT</constant> ioctl and check
+ the returned value.</para>
+
+ <para>Try formats do not depend on active formats, but can depend on the
+ current links configuration or sub-device controls value. For instance, a
+ low-pass noise filter might crop pixels at the frame boundaries, modifying
+ its output frame size.</para>
+
+ <para>Drivers must not return an error solely because the requested format
+ doesn't match the device capabilities. They must instead modify the format
+ to match what the hardware can provide. The modified format should be as
+ close as possible to the original request.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-format">
+ <title>struct <structname>v4l2_subdev_format</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>which</structfield></entry>
+ <entry>Format to modified, from &v4l2-subdev-format-whence;.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-mbus-framefmt;</entry>
+ <entry><structfield>format</structfield></entry>
+ <entry>Definition of an image format, see <xref
+ linkend="v4l2-mbus-framefmt" /> for details.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-format-whence">
+ <title>enum <structname>v4l2_subdev_format_whence</structname></title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry>V4L2_SUBDEV_FORMAT_TRY</entry>
+ <entry>0</entry>
+ <entry>Try formats, used for querying device capabilities.</entry>
+ </row>
+ <row>
+ <entry>V4L2_SUBDEV_FORMAT_ACTIVE</entry>
+ <entry>1</entry>
+ <entry>Active formats, applied to the hardware.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The format can't be changed because the pad is currently busy.
+ This can be caused, for instance, by an active video stream on the
+ pad. The ioctl must not be retried without performing another action
+ to fix the problem first. Only returned by
+ <constant>VIDIOC_SUBDEV_S_FMT</constant></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-format; <structfield>pad</structfield>
+ references a non-existing pad, or the <structfield>which</structfield>
+ field references a non-existing format.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml
new file mode 100644
index 000000000..0bc3ea22d
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-frame-interval.xml
@@ -0,0 +1,141 @@
+<refentry id="vidioc-subdev-g-frame-interval">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_G_FRAME_INTERVAL, VIDIOC_SUBDEV_S_FRAME_INTERVAL</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_G_FRAME_INTERVAL</refname>
+ <refname>VIDIOC_SUBDEV_S_FRAME_INTERVAL</refname>
+ <refpurpose>Get or set the frame interval on a subdev pad</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_frame_interval *<parameter>argp</parameter>
+ </paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_G_FRAME_INTERVAL, VIDIOC_SUBDEV_S_FRAME_INTERVAL</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>These ioctls are used to get and set the frame interval at specific
+ subdev pads in the image pipeline. The frame interval only makes sense for
+ sub-devices that can control the frame period on their own. This includes,
+ for instance, image sensors and TV tuners. Sub-devices that don't support
+ frame intervals must not implement these ioctls.</para>
+
+ <para>To retrieve the current frame interval applications set the
+ <structfield>pad</structfield> field of a &v4l2-subdev-frame-interval; to
+ the desired pad number as reported by the media controller API. When they
+ call the <constant>VIDIOC_SUBDEV_G_FRAME_INTERVAL</constant> ioctl with a
+ pointer to this structure the driver fills the members of the
+ <structfield>interval</structfield> field.</para>
+
+ <para>To change the current frame interval applications set both the
+ <structfield>pad</structfield> field and all members of the
+ <structfield>interval</structfield> field. When they call the
+ <constant>VIDIOC_SUBDEV_S_FRAME_INTERVAL</constant> ioctl with a pointer to
+ this structure the driver verifies the requested interval, adjusts it based
+ on the hardware capabilities and configures the device. Upon return the
+ &v4l2-subdev-frame-interval; contains the current frame interval as would be
+ returned by a <constant>VIDIOC_SUBDEV_G_FRAME_INTERVAL</constant> call.
+ </para>
+
+ <para>Drivers must not return an error solely because the requested interval
+ doesn't match the device capabilities. They must instead modify the interval
+ to match what the hardware can provide. The modified interval should be as
+ close as possible to the original request.</para>
+
+ <para>Sub-devices that support the frame interval ioctls should implement
+ them on a single pad only. Their behaviour when supported on multiple pads
+ of the same sub-device is not defined.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-frame-interval">
+ <title>struct <structname>v4l2_subdev_frame_interval</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media controller API.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-fract;</entry>
+ <entry><structfield>interval</structfield></entry>
+ <entry>Period, in seconds, between consecutive video frames.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[9]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The frame interval can't be changed because the pad is currently
+ busy. This can be caused, for instance, by an active video stream on
+ the pad. The ioctl must not be retried without performing another
+ action to fix the problem first. Only returned by
+ <constant>VIDIOC_SUBDEV_S_FRAME_INTERVAL</constant></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-frame-interval; <structfield>pad</structfield>
+ references a non-existing pad, or the pad doesn't support frame
+ intervals.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml b/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml
new file mode 100644
index 000000000..c62a73607
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subdev-g-selection.xml
@@ -0,0 +1,165 @@
+<refentry id="vidioc-subdev-g-selection">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBDEV_G_SELECTION, VIDIOC_SUBDEV_S_SELECTION</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBDEV_G_SELECTION</refname>
+ <refname>VIDIOC_SUBDEV_S_SELECTION</refname>
+ <refpurpose>Get or set selection rectangles on a subdev pad</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_subdev_selection *<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBDEV_G_SELECTION, VIDIOC_SUBDEV_S_SELECTION</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <note>
+ <title>Experimental</title>
+ <para>This is an <link linkend="experimental">experimental</link>
+ interface and may change in the future.</para>
+ </note>
+
+ <para>The selections are used to configure various image
+ processing functionality performed by the subdevs which affect the
+ image size. This currently includes cropping, scaling and
+ composition.</para>
+
+ <para>The selection API replaces <link
+ linkend="vidioc-subdev-g-crop">the old subdev crop API</link>. All
+ the function of the crop API, and more, are supported by the
+ selections API.</para>
+
+ <para>See <xref linkend="subdev"></xref> for
+ more information on how each selection target affects the image
+ processing pipeline inside the subdevice.</para>
+
+ <refsect2>
+ <title>Types of selection targets</title>
+
+ <para>There are two types of selection targets: actual and bounds. The
+ actual targets are the targets which configure the hardware. The BOUNDS
+ target will return a rectangle that contain all possible actual
+ rectangles.</para>
+ </refsect2>
+
+ <refsect2>
+ <title>Discovering supported features</title>
+
+ <para>To discover which targets are supported, the user can
+ perform <constant>VIDIOC_SUBDEV_G_SELECTION</constant> on them.
+ Any unsupported target will return
+ <constant>EINVAL</constant>.</para>
+
+ <para>Selection targets and flags are documented in <xref
+ linkend="v4l2-selections-common"/>.</para>
+
+ <table pgwide="1" frame="none" id="v4l2-subdev-selection">
+ <title>struct <structname>v4l2_subdev_selection</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>which</structfield></entry>
+ <entry>Active or try selection, from
+ &v4l2-subdev-format-whence;.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>pad</structfield></entry>
+ <entry>Pad number as reported by the media framework.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>target</structfield></entry>
+ <entry>Target selection rectangle. See
+ <xref linkend="v4l2-selections-common" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Flags. See
+ <xref linkend="v4l2-selection-flags" />.</entry>
+ </row>
+ <row>
+ <entry>&v4l2-rect;</entry>
+ <entry><structfield>r</structfield></entry>
+ <entry>Selection rectangle, in pixels.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[8]</entry>
+ <entry>Reserved for future extensions. Applications and drivers must
+ set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+ </refsect2>
+
+ </refsect1>
+
+ <refsect1>
+ &return-value;
+
+ <variablelist>
+ <varlistentry>
+ <term><errorcode>EBUSY</errorcode></term>
+ <listitem>
+ <para>The selection rectangle can't be changed because the
+ pad is currently busy. This can be caused, for instance, by
+ an active video stream on the pad. The ioctl must not be
+ retried without performing another action to fix the problem
+ first. Only returned by
+ <constant>VIDIOC_SUBDEV_S_SELECTION</constant></para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><errorcode>EINVAL</errorcode></term>
+ <listitem>
+ <para>The &v4l2-subdev-selection;
+ <structfield>pad</structfield> references a non-existing
+ pad, the <structfield>which</structfield> field references a
+ non-existing format, or the selection target is not
+ supported on the given subdev pad.</para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/v4l/vidioc-subscribe-event.xml b/Documentation/DocBook/media/v4l/vidioc-subscribe-event.xml
new file mode 100644
index 000000000..d0332f610
--- /dev/null
+++ b/Documentation/DocBook/media/v4l/vidioc-subscribe-event.xml
@@ -0,0 +1,129 @@
+<refentry id="vidioc-subscribe-event">
+ <refmeta>
+ <refentrytitle>ioctl VIDIOC_SUBSCRIBE_EVENT, VIDIOC_UNSUBSCRIBE_EVENT</refentrytitle>
+ &manvol;
+ </refmeta>
+
+ <refnamediv>
+ <refname>VIDIOC_SUBSCRIBE_EVENT, VIDIOC_UNSUBSCRIBE_EVENT</refname>
+ <refpurpose>Subscribe or unsubscribe event</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+ <funcsynopsis>
+ <funcprototype>
+ <funcdef>int <function>ioctl</function></funcdef>
+ <paramdef>int <parameter>fd</parameter></paramdef>
+ <paramdef>int <parameter>request</parameter></paramdef>
+ <paramdef>struct v4l2_event_subscription
+*<parameter>argp</parameter></paramdef>
+ </funcprototype>
+ </funcsynopsis>
+ </refsynopsisdiv>
+
+ <refsect1>
+ <title>Arguments</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><parameter>fd</parameter></term>
+ <listitem>
+ <para>&fd;</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>request</parameter></term>
+ <listitem>
+ <para>VIDIOC_SUBSCRIBE_EVENT, VIDIOC_UNSUBSCRIBE_EVENT</para>
+ </listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><parameter>argp</parameter></term>
+ <listitem>
+ <para></para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>Subscribe or unsubscribe V4L2 event. Subscribed events are
+ dequeued by using the &VIDIOC-DQEVENT; ioctl.</para>
+
+ <table frame="none" pgwide="1" id="v4l2-event-subscription">
+ <title>struct <structname>v4l2_event_subscription</structname></title>
+ <tgroup cols="3">
+ &cs-str;
+ <tbody valign="top">
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>type</structfield></entry>
+ <entry>Type of the event, see <xref linkend="event-type" />. Note that
+<constant>V4L2_EVENT_ALL</constant> can be used with VIDIOC_UNSUBSCRIBE_EVENT
+for unsubscribing all events at once.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>id</structfield></entry>
+ <entry>ID of the event source. If there is no ID associated with
+ the event source, then set this to 0. Whether or not an event
+ needs an ID depends on the event type.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>flags</structfield></entry>
+ <entry>Event flags, see <xref linkend="event-flags" />.</entry>
+ </row>
+ <row>
+ <entry>__u32</entry>
+ <entry><structfield>reserved</structfield>[5]</entry>
+ <entry>Reserved for future extensions. Drivers and applications
+ must set the array to zero.</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ <table pgwide="1" frame="none" id="event-flags">
+ <title>Event Flags</title>
+ <tgroup cols="3">
+ &cs-def;
+ <tbody valign="top">
+ <row>
+ <entry><constant>V4L2_EVENT_SUB_FL_SEND_INITIAL</constant></entry>
+ <entry>0x0001</entry>
+ <entry>When this event is subscribed an initial event will be sent
+ containing the current status. This only makes sense for events
+ that are triggered by a status change such as <constant>V4L2_EVENT_CTRL</constant>.
+ Other events will ignore this flag.</entry>
+ </row>
+ <row>
+ <entry><constant>V4L2_EVENT_SUB_FL_ALLOW_FEEDBACK</constant></entry>
+ <entry>0x0002</entry>
+ <entry><para>If set, then events directly caused by an ioctl will also be sent to
+ the filehandle that called that ioctl. For example, changing a control using
+ &VIDIOC-S-CTRL; will cause a V4L2_EVENT_CTRL to be sent back to that same
+ filehandle. Normally such events are suppressed to prevent feedback loops
+ where an application changes a control to a one value and then another, and
+ then receives an event telling it that that control has changed to the first
+ value.</para>
+
+ <para>Since it can't tell whether that event was caused by another application
+ or by the &VIDIOC-S-CTRL; call it is hard to decide whether to set the
+ control to the value in the event, or ignore it.</para>
+
+ <para>Think carefully when you set this flag so you won't get into situations
+ like that.</para>
+ </entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </table>
+
+ </refsect1>
+ <refsect1>
+ &return-value;
+ </refsect1>
+</refentry>
diff --git a/Documentation/DocBook/media/vbi_525.gif.b64 b/Documentation/DocBook/media/vbi_525.gif.b64
new file mode 100644
index 000000000..d5dcf06f2
--- /dev/null
+++ b/Documentation/DocBook/media/vbi_525.gif.b64
@@ -0,0 +1,84 @@
+R0lGODlhKgPIAIAAAAAAAP///yH5BAEAAAEALAAAAAAqA8gAAAL+jI+py+0Po5y02ouz3rz7D4bi
+SJbmiabqWgJs475LLCt0fdy4oeN9/QPuEEFZkXVcJZXDXNP5pC0TgGrMSrRMidhA1/uNbB9j2CZ8
+Kc+qHDXDTT2jK3BuPau13vFpdmc/p6Uh5SeYoXMHyFNomEeYiNEVKCFFx8Wz2Eh56YWp2bfnGXk1
+OEhaKnem2rYa6vp3KIqaBhULmsk4Ufc1KTbq4rfbhxkcOQx22limZ4P8STYH3PsGu8pqe439aw36
+eji9qT1rGCpraf5MkQynyJeuG0c73imvLYzuUAwF/P6WTK8vHDdj2Qia8hYL4bF2o/CpmydOXa6I
+uqQNPFepny/+d+cM0qsH8qNGCI8M3gvG7KG8iSJJVoNIp1w5h/C+gSPjgWE9hR0Lqmzp0RFPjLV+
+hoRki2XNPJyCVmy2U6KnHm6WnboRcOPFkS59xqQpEKZRpkDHfi1rdqlXgTMVKVVL7h/cnmi1rtxq
+t27Yn1n5xrySUi81iYAlvR2MN23Fm/nkyHzp9G9iSof3Ps1pE3PmyV2dhaSL1Jiee3/ZjI5Mkhlj
+xDPXGnkClgns1pxV0K6d4rbYF7pRv44CW7Dtojt6f/YxO7hxrrmVJ3/eZDnd4tCjVw+OPbv27dy7
+e/8OPrz48eTLmz+PPr369ezbu38PP778+fTr27+PP7/+/fz++/v/D2CAAg5IYIEGHohgggouSNFv
+1l2HHIRCACehgw9eOIR0001I4YVq8MJIVZItUpJiG564GG75VJaXb5aVthtljwnV1mauyXijVqtB
+FVRoK7Foxi0kNphaYdhYNRUxQMZDWZKd9IXTQTmmFluUDQln5TcqBrnlYEOhaGJXNZrUpR24sLPN
+kC6uaBGWMywERpWISeUZacIE5iZH8OApJ3FrtvhnY5AdR1iZVOw4p1BTZhljlGNG1aijfgIKl4+f
+kNZjoIL2ySOacX4kYlyyfDgooWBSWmikOH15mU5ksfqiqUVqNsySXN7FqZ5jWdoTr7sSqaOtTH6Y
+EajMNZX+kbC53qopDDMuymhprgLbGaTUbgrtm8smCqOqQRYbZrV58vijtzZgNW2TTHZEag7rHFuU
+Pp4aSq6sc9EJa7jinpVuq/Ruy+xSj9KibL0YyRXrXr7WlC+242qrDMJsEYYSVvAiUzGJwg7c7BqI
+GjyiuQ5f7PG/7j57VqkpqryyyJ0WDDBxC29ymr3+YFEzyRpLE5qG91qYYYVAR4hh0B0WTbTRR1Mn
+NBKTDs0h0lErTTXTSyddNdZabw311ET7nLDTTct2tddmn82bc2V3zbbYazMId9xyz0133XbfjXfe
+eu/Nd99+/w144IIPTnjhhh+OeOKKczcR2CYvDnnkkgf+XoTF2eUCs9uTb85554MrVUjmJGDuuMue
+n4566gKyxM+T2L37cNqqz0577QG2/ikpVxEie7LflW578MIPL1vroVdifOy3outkscD/THz00k+v
+ne46ApQT70o2ZWz1RT5Pffji2w4YWcqLkrzvMhNT/Wjuvy/6+PLPL/w/854vr+t58gP+vufySb8A
+CnB8phEBmo7nhDHwz3vQGKADH0jAT4UgVGZQILjeBsEManB6GqKgP+h0vtFtcIQk5KAJpqAa/znL
+Xc4CXv9KCMP2fMyA8fvDDCdYwzbg7IQbwZ0IqeHCGArRbj4UwgvxgDJSHXEfIUQVEpuIqiLycIhU
+jJv+FNO2RCeJQ4kPuuIHUMi+Kb4piFUso4K8yIQsYm8cIlKj9VrQQyiqUH9mrOPm0DgcN8YsXoLQ
+Ix1HAMY/ArKCdiyk5PDYHD+6qo1dlOPItIXIG0XSkJT02yR5qEg2EqyRHYyjzyrnyEqK8oyhTEgj
+7bFJo13SI2EwzCdDhDP4yXKWtKxYLWWJsVu+L5e6rFkv4bezX9pSmDd0XzdgZkwa7SJnFDMNMX35
+TFdGM5jE5GU1o4kn1WDzmXbg2TaFaSZrgvNks+ymOL9Jy3DesGUiSd5wmEhGt5SiHUipp+naCZL7
+6ZOV+WyixMJhT1MKlJ+CFCP2nmexf9plCZZbJWT+3Cm7MJIxSfGcp0WTglGC9CtL+9RERz3aT3pm
+FFeiuShBHcqNN75ToqjkaBhXqr8XJnSPIC0oHP2JU5FqdKQ2g5jyLNerfgo1qDolKTlMmsqTlrJa
+Km1OAmOGCKa+1KkstRBEUdDQpUpqoEk1KlF2ei2fftQoYyVrSFERUK9aQp4tRakmbXrTqtbUpXD9
+oVw1d9UTZLWiXO0jWnn61Y7xca5mJWxhifpXsKr1IWxV6kQPitc1GnZOTcVqFhRq0Lxmdqp6palb
+L5vYxQL0nkA9rGnVgql9FvWoiu2qX9uqVWxVtrNP/em6lsdZ2t6VbE9ap1B9y9qS9jWwwS2uzvD+
+OdmFDjWoIF0tcZ+7VqTWFLjMpS5Ri6krsaoJpt6M2hFLK7bGuha6DAPsqSi7XNSmV73NDa1xVSLe
+1xLUqlaLbViWCF7vJu27ns2pe8k72rCSq6z3XW+B22ve8rZWvuM9LW/xm13LPo2q9mUufScU3+gm
+OMCiDRtukytVEIcYsRuO44I1LNz5RrTCytXvfo/G3wnTNsOM/S98S+zED1vYwS0WsWxxGkLMbjXF
+DWbvhV185CS/GMm9ky6KOywmHM/xxz7WMY97bFbn3vjENR7ulSVM05QumcljXnGMabwnGysYylO2
+spG/TOUqo1fLa35vl4ksZ7uyeMRmrq8akav+5OI5+c5sFlRaezpgA/P5zXDGLZ05bOc0e5nRD/Zz
+mfscHWYiQdNKAK6n4wfAxSTi09wk5zipqctunvqct1T1L8P5i1GLLtTsdMRBrBvrHNoE18fEL6dH
+CexgC3vYxC62sY+N7GQre9nMbraznw3taEt72tSutrWvje1sa3vb3O62t78N7nCLe9zkLre5z43u
+dKt73exut7vfDe94y3ve9K63ve9t7SBkNdH47re/9Qq6CAP63wQvuGZ2mYneFoPWBm+4w8VUWiMB
+5IIPr7jFX2a/YCZ8zxfvuLnf1VB5QcnjJDd4YTKucN3xuuQsb7nLXw7zmMt85jSvuc1vjvP+nOt8
+5zzvuc9/DvSgC33oRC+60Y+O9KQrfelMb7rTnw71qEt96lSvutWvjvWsa33rXO+6178O9rCLfexk
+L7vZz472scG0vllD24rZzrW28bbtcl873N2uObUfqkQzJFaJPAO9Fm53W34/mcbO+7/t9j1ksfzY
+MiUO+DaXDPCLT9VpKr8yZnpQDM50JcmkyTOdNT5Enx8mxhAPaxApq/CULxjFV9S8kT9yhWts0zL4
+JVnX44uigl1481Cf8KsI3Kf+Er6biMXS18/+gy2JJfBzFw/Mc35U0NcXJxAh+4A1ENC69xdoER38
+34Mf+sZvF/5OP3yQ+QKAt8+14Z9/2dH+H3dnh4d/Als5f1MzMcdsCoj5SfwwqXVb/Mca6qd9WBaA
+R/J+1qddDHeAUZZy85c+mOcp/ndc5QMqGyMawrd5ACVx/8dYKrcsFQg7DAhEu6NAG7g9q3cU3RN4
+zBJV9jdwsXM/GQiCRuZWNWh7Msh3QmaAhoYSIyhja1ALbQJ/obM+L0iExvJry8d8LpiAuPdSN7h9
+3VOD3kdHW1AVsOOAxEclTySEIIQOHViF7XSFZQgUVFiGj8CCYpiGR+g8Axgt24c8Q9gpvTJbHjZg
+IjguFJQVZChbH2h/2rODJjgqxieDGTiFevgyFKWGAYOBj8gtVPF564IpLRKJgziAgAj+ieFniNxX
+fUo4LPcXhn2YEqMnif+TMYNHgKoWeTTYTGoifZzXeAsoivpXJ2f4PaHHik7oMZ1ni4yIi8fDib+I
+gen3g6pohE34gMa4cbO4ixJkh8m4d0HYi5Lniq1XjMqojcqgd2AmNXVnd3g3juRIYXT3dnGXjuZ4
+jl/zjboVjuvIjvB4d/NoUOiYd+qYj/Z4j+6IQXNXj/IojuAYkAK5j/yoZwV5kAa5kA2Zdg8JkREp
+kRNJkRVpkT73ZxwnjASpjwCJkIP0jv3Yke34kSAZjww5kPQ4kiSZkipZkhOkNifpkOWIkjQ5kzZJ
+NqyXi9uYeIrXho8TZtTlCjnEMfn+Z07jN3n3hIuC1ZNKeY2JiD6Zs0gC5iWzliav+Inv51vKx3wo
+WIrTV3uh2IqC9zjZN5ZL2DBgSZW+iI2GBpTT2IwmtpajqJSGIY232JRbuQ1myZZoKZZZmTt8ySV3
+ggapWEHRAJjU2JaL6YVMKYepMpe/GJlH6ZTI2Jdu6ZRcuZGQBJePqTCTmYRG2XyO6Q52Ui5QuJn7
+sA4amC2XOYeJCWukWVugeX2y+ZeiGZSO0ZrncpdGWYKwOZq2mV94SXwzEyymCULIo4u0h5rt95ZD
+uZuuyS2xSJuNeZZ3WJlhBmRQBAhCGVrLmRfGCXF1yTyg2ThkQlZ5eJ3lWYipOZ3+UKmd/uSDrwmf
+ciSY76kuacmY+Hk9lWmEwumJ8BmDSBl9/zKgpEmI6CkjGcOM/MmN3QicnRmX0OBpuvmW3GlD4jkr
+QEmUFuqfHXokUjkPGtoYDSqd+meiE+qMehmf0ZmQComTHtmScSWTMWqjHPmSMPmPMhpRGemjMYmP
+N4mjM0qjMHqjLkmkL5qjIPCjLXqhLqqkSWqSQXqkSFqTLHmlVpqlIrmkF+mlXwqmYSqmY7puiEim
+Zzogj4GEaMqmAIIQmtmmcTofbyqhcqp0GSlD1gCndvpvuqYldSU3dOqkfJpun/VFt1md5sFQjOKn
+hFpu+dObKVMXUnSMx5AfDBX+agfqqH0qQQtkCrMZf81gqBvnmemBTZtacuCyp98yFbyAD/NJSLiD
+p4dKoSuHqu62qJHqlpTYJ5AgcvKBqbfqclroUOUZBynoFP/pHrMqrI8KL2CErB1YQPHBrM06bjwJ
+lxsDJCkkqgD3WNZ6Ro16lT5gq0JCnBPGrfs5SerJcaOKm+BaH+4KC5kkZoR2nTTBrixToKCESTwK
+r2mqkatySi1lr/uJr7nFpJ6kooMWpf8KsHpErwQraed6sIAKLez6SQHrsHAjr6wQsSpGMzzIqp0U
+ZfwKR9W6sfzRsarwsXnWrYDJryurohjbWSibsvohs5MmaBI7se45qQhLq5L+YrIiZLM3ix85i2e/
+oRMHJLJesmfoArVPyWqldnivNrW1hGqvhk5Xi7VcW05ei0u9JrbKNLbS8nioyE1bC7bAtLYIt7Xo
+BLfmBLdWW7Vz20vq9E2mFrZ1u2qihrcdRHq19Vj5CoaFVqIMC2kAdq/U57KWqGh0hWBJu2WG67Q6
+y11AO6WEq6O71WjIhbRSBaubG1OVZrH7R7lAhLhyGWmLO4MHtmOUhoDqhWaJO7mru34YorlBC1mV
+Frr8RmWf61K9q7uaRaO5K1m26xKzq7qKa7CM+7qu27nadVaWC4GnCxXKS2HG+1CYm7nHG717FVnC
+Syuje7mlq0XIK7DUO2T+6Luwvhu97gu7iya7qVu97Fu5khtZ5ju+2ru94uu8v6ux1Oe/BUG8ema8
+A+y9T8Zg9suZCGqZjtu4pfm4wUu/68u8FqzAFwa8H7bBjgZVyAi+vDuo8xvAIVy/F5y++Eu7dZaI
+wym/sQvDL6xc2IvBLFy7C6zBJfxECPV9BIZe+ru/CZy96DfEWHm/DDxGFYyZ1luqcfa+EPy8MQy6
+SsyqXLbCPeti5fq74gq62JWtSMTFwavFUgyPFShlKVxkV7y8ienCkPvEEhzBEkzDS4zEBaq+ZXxp
++RtopEs1MQYwCIzAQJzEZ1zFBPq/8evGiOzEWUbFR4zChZzG5bvHkoz+aWRmyZRsw5mMxRl8w51M
+sYcMvYrsZqFMwiq8xpp8yptcyavMynw8yXrcyqksy7d7x5D8yA46ymScyzKcyKUcySfsyWpMy5Z2
+yZjsyrGMzOBoxlYcsrXsyMHMum28yKSsyz8cub9cw8Kczc1MzK+szHl8zMX8zXVcuNh8uIT8zJ/c
+utUsvVHMyxTszA3MxOWMw8mMx+BcxOIczsY8y9s8zOZsy9DcvOv8zrvcy+zcgI0sz+RsugBdvPic
+z/Z8zxmSoqNT0aq4a1JiI92Q0bm2aqeqt3cb0qk20q1W0iYttbR4ax3N0RsNBBdNQ114QjCNQzLd
+AjRttDmt0zvN0z1u7dM/DdRBLdRDTdRFbdRHjdRJrdRLzdRN7dRPDdVRLdVTTdVVbdVXjdVfVBkx
++APSnNU5bZaaCsVfPdQnR8TkJwlnTdZAnSwXJIidutZBHbhrqpqnuKpx/a9c3RdvndZ43dO+pCSY
+E9gqF8bNWgAAOw==
diff --git a/Documentation/DocBook/media/vbi_625.gif.b64 b/Documentation/DocBook/media/vbi_625.gif.b64
new file mode 100644
index 000000000..831f49a02
--- /dev/null
+++ b/Documentation/DocBook/media/vbi_625.gif.b64
@@ -0,0 +1,90 @@
+R0lGODlhKgPIAIAAAAAAAP///yH5BAEAAAEALAAAAAAqA8gAAAL+jI+py+0Po5y02ouz3rz7D4bi
+SJbmiabqWgJs475LLCt0fdy4oeN9/QPuEEFZkXVcJZXDXNP5pC0TgGrOCqVMidhAVdqVbLmx73Wc
+FXfNabGFzfbG3Rz0bDO/2G1hzJ7o8ceT56dB+Gb4JciD16fnh3VI97bmOCE4tyhVUSbHKOlg1xnp
+6aWFKDfaecrqQlrK2vqK2bjImPFaiLuKuxvY+2HLq1tniHcLzFmWy6mnitxMeWs5iaZo0xZhTahj
+rdzXHa3m6Eod+h1+LW7MXpx83P7962y+ju4O//5oGr8PHUvs36VjoCBsujTsxp5t0MIB1MZLYb07
+CBt+QlWRHz/+Zto62NLYD+Ouj7Q+ZlMj0J80kCr1iaSHT6WmeAXPAXOVzNs0hw8fHAwzkeLATz9E
+xVo2qCa2o7AA9Wz5cmXIgFAhKu2Yb2q1rFSrDmUZFeUgrQaLdhWriFZKGKt6LNTSlopXthevrIUB
+d9rSp6FGcbnLwCRYe2ELo+VK+CxEwF9XkoypeCtZn05dTiqlNupMxnyWxXkL17OVtHz7loMTdO+4
+pGsMsz0dKbVcyK7LXsWbyKSweTA95qatDHho4T7TqqsdWN1toaFbExNMHMkTzimgR2cSZfpgI9qt
+T8aePbz4IQebeLcsZDz56ecjv2g/9z37+fTNd6+vPb/+/fz++/v/D2CAAg5IYIEGHohgggouyGCD
+Dj4IYYQSTkhhhRZeiGGGGm7IYYcefghiiCKOSGKJJp6IYooqrsjidyrAh9yL+K2nng/31WgjjtzN
+mKOO8lFHxhlJxRjkkEY2tloWy51k2mxAVoaQQkImRiRuIyEmD5ZIomeVYMLIZhMkS6rWm4vJecZl
+cWBsRomUz+Vlymg4bWflYnGWo5FOGZ02FphPYmbkmHQmRxRSgzJXpntl/UlmcIca5ItvilJJx2OS
+TkrZo5k6CgemfBDFKJPF7ZRTIZsMgxUip4qKKFN5UropSKD54xasW9p6a65VBiYmb/dc2qZuwMaH
+laXvZEb+FbKPCKpkm68KutBoTshZWpN6MRqtm6H+8ZmTulabqplhXikuNtBhgqqnM6SLa7jE2nZd
+rGzK5CeUqMxJq6l2YavvTn6yGVG7zGn77aZgvOvuruvGexnCndXLq5YCC2Vsmg2LUzGcTSm8r7fg
+0pUKxMgwdOdY/O4JaMkFf/pqyiv/Jau9CY/asqatOlwnzuM6JvHMOsPsZaQZ/3zzV0NfdnS4HL3c
+KsBZpnIk01NCHbXP1o4MsSjgyAzp0xsddzHRHqOz2289d83wmb46e/aibauZNhXGMWuz3KjNG6Vz
++fooHY/p8Q0ejYDL6PeO9hX+4+DVsRr4DjByPMLjE5v+ILnUJ1Qe9t+Cb855j4d/jrnVfSuOQuii
+N+5555qrbjjrrTt+Y4uyz0577bbfjnvuuu/Oe+++/w588MIPT3zxxh+PfPLKL8+87rWGYLqI0TdP
+ffWwM249oXKDgC/y02cPfvgkkPJ97t137075HKovfvvuQ1KXh9zKJ6V37A7P/vv6739Oa0BFnoRK
+QG9+2PlJMLDnu/zxb4EMxJPJ/DLA/sXvF0EogsgG5hQDkupeCOydAhkIwvcdAYJeqYdfymOMCvLK
+Swe7yKqgkLU4dZB3AaRbCG8YwhrOEGazUaHJNuKboqjQaRBMSDrqBkOu4W9uTAQbDp8IRSV2jFtm
+2Y7+thwIDyzi64VIBKIMvQip+/Gwit5Tkw2jiMbsGcVRPfyhBTdGq7gY6ovoG1UL6ximJSwtVLjT
+YRr/mMZZFctJRZSgLswiR73gMWcsqw0Jx0a8DwJyksAj4CCjRr7T2aSCiQTiIiMGsvg8UorBkyQl
+T7k7S3aNXQJEm2lWxcl9bRGFnWFM2TAIyuOZEpUpOqNHLhgMX9ahXqq02xZTQrCdRQyWdpolq+Yk
+uTdqMoG8BOEnZSSsHYLRRmukFAnFGKOA2ayVsBjhNkUgTVcab5fVNNE1F5fNk33wnY2y2iOBWbQ2
+8rFj9axLNBmZy3W2c4H0vFwXcTmUeXaxmBmUlf3+LkmSdJprn5kb50AvWruCUu6g3gKNQrtZmns+
+dJUU/WE/6bjRgAIUoyx1J0e599I0eNQ+INXVPaEH0ZTeAZzE2QI7WwrU7Hw0KzNdT00rOkqckjSm
+9jynUvMJyaBKVX5MDSJN9jHUj+UzqTCdGtWcOECJyAmf8CqSbWDTxLSiVa1MZA1b5+bWt5ImZHI1
+Dj2YZddgiSyvel1rXc3w17bSNbCiIWxhDUsGwyoWbNdYrGITO1jCJjatRXIsYs/gV7betbJkhZtM
+ndqChkaPJ6fYTdk2g9pyQUmVrJVJQDS6Qnak9pBX1RxXxyfa2o4LmoG7LW6nVdJjgfa3imzc/Ez+
+K9ubKNdiuWytSJz7XKbCliKzxapuE+fJ3k5wHVOoX3AB4tvIAYKnxEUp4Yp7Xj5Od6LLtS5tmYtQ
+8Lo2uq5Fbns5+N7Xei68T82ufl3J2/Tyt78Bxm6BS5fb9HJ0vXI57X2jcUv50pe7842uffOLX/f+
+t3UDPmAS59Xd8X63MR32sD9tO1zxfti4y0phcjEMYdV+dsISpnB9XfzgVuS4xgberk79S+Pdphid
+CRbwkEML3KpKmMH6OC6OYaxjKGtVNdDlMYn1e2ENZ3jLQdbuFxe34grL68hdRa+RyaviQo02g51F
+kpN74WApV0rGFumy0sQs3yxzOcpatjOY/eX+Zbols06wCXSbrwzWPyt5w9hdsHQfHVM0L5POMfPz
+mC09Zj3HWM6XZPToFo3nT7Nv0F7e3KhJ+WNHa5rPe04opUkN4FDf+cZwfnGfWY3pH59am2UGda51
+PZ5dj7glb+4Xp5d66yl3VNax/nVzHx3nZM9ZuCiutrV7vN9gZ3t1xW7xjqct7YoK2dlUZnasV+3q
+Y2cqwsL2tY2vLerrDfu68ea2t40dbmS32nIzfreVkYblJ+d73d8GOLxLzeFtHzzhC1e0qgW+705H
+fJrlJveyLb5sdIN74gSnNsM/DvJ6N1zk2H5dt1Vla45v8tWofjbG+01hjUt80wO/dMgRXvL+nOsc
+CHM1Qs/fw9fhkEtMmrBhovMW2Mn+Vel1Zbpcnf50r7KN6CMpOj6DjoSfZ/3o1dG6Erz+da5Pdexk
+L7vZz472tKt97Wxvu9vfDve4y33udK+73e+O97zrfe9877vf/w74wAt+8IQvvOEPj/jEK37xjG+8
+4x8P+chLfvKUr7zlL4/5zGt+85zvvOfx7sNrXfzzpC89gyQB6zqbfvWsL9Bh7xgyNbd+9rT3zxwr
+3aly1n73vAcdMw7rxt4Lf/iE4+LX2rJH4it/+bLNvSI7JXbmS3/61K++9a+P/exrf/vc7773vw/+
+8It//OQvv/nPj/70q3/97G+/+98P//j+y3/+9K+//e+P//zrf//877///w+AASiAA0iABWiAB4iA
+CaiAC8iADeiADwiBtoc4n+Y6FChvFYg6qaOBG/g6HNiBq3OBE7gua1I1FCd1JKhsXkVa4jaCPRRD
+XoOCKUg1MMeCtVQZ0RdVZQVD/+I1dzImWsMT0AKDUmeCR3I3HHOELXdSahMoP/g0n/GCUdKETvgn
+5MMnJ3MYX4VFRQgoUChIboMmybdSIHOFYqhSfQFoJlWDQGOEYjMLs2A5b7iC6kQzaCJ6ayhLX6VN
+JONAgHVUdSiHu2KFPoaHD5QykrZDsYEq3VQSUzQ5qzUyMniDOTiGNoeFGPE8/DZjQjj+XzhIiXfm
+ibymegeFLBqkiZFYM4XoMXqjiqNHiskSikqIKIX2iDA3K9mSJ9QiiZmAiq3YhrIIjCoYjOrFilQo
+dGamibzoMlxoViozBrhIg8yojDOYjM6hi9XoXZcohf/whVaBWYi4LZXQh7WYhNsiil9Gi6eIe4lY
+KsP4Um6yV+04jKVIV7U4ilVIVKkYKzXGUAZHS3QoGbEniRv0j/tYWpmojqT1h+5yTANZaY5Whc8g
+Q8QEJ/AIjlrTi+aIMkn0M7lgKAupPQTTjWiIexfpDBZhhp+4PQ/Zj2TYUNpYh81CkRsJezKYSUt4
+hi6piDBJkuOYkji5ks5nSUA4JZz+uI1KMpPHyBIjeTVqBpKvcYNRmCTRCJBNmYtPaZV22Ip5cHv8
+xpVEWJVQiZRMKZakYZRS+HNkyYRaqJYtaIRS6Y0zGI/zRmlEJoIKFoIeaIF6mYEg6Jcf+JeNlpd/
+Y0qFGTsY2JeCGZiKCZiNuZeO+ZiMCZnnZZikg2CWaVCYiWSaWV6I6XB8mZiRKZmiGYGlaZqniZqp
+qZqryZqt2WuDOZl4uZikKZux+ZmzGZq5WZu2mZmc2ZueeZm+aZfC2V+wyZupZpy0eZu4uZzHuZlE
+OYUK85UlaJA6uJTSuTXU6IvTeJbwpUw9CDluKTZAWZ3N8TZiWZdulZ7UaY9s6Z3+NqidDjmNmFiR
+ntAtKRiI9qknh+GFgoh842iTqvCR7QmWDmmI79mT6hJKCgpVBkpm5RmewQWODRqSP5mTMWmhFLow
+XyOPzdBCC/VfBVmJBqOS5BlfIPomJeqOGvqd40mX71gL53km8RQscdOi6siRCHqiOMqNDGouwCSi
+TUKCSXmUYLSfRzmHYYmeD3mK98meI+qLKgqhUbqWBEqIDpqhUOqS63mOXfqkPJp6SgpgF+RgTnNv
+6Uil8MiOKcpr9AhHzNgsUjpiSZMRXGqidzqCV7c2ERqkVLqicroXdEozb5qQZNSeikimiSiROEGk
+YMhm+FifPTo5v7dPGNkyWTr+pzJ6oQ6ahy76p16KqSy6oYLqp6DqpTB6qqU4oeeIkBjzhDv5iNMZ
+n1NapUlKq/DplOT4P1+6qTwqXbEoqp7lqakao5qKqz66klwqTFQkWJAzV0Z3V31KosT5msmpm7up
+nMH5OcCprdaKrdn6m9yqU5W5reK6meUKms05mteqruwart7aru46rncZr99qr/farelar/mqr+/K
+nPvqr//qmgNLsAVrsAeLsAl7O8ansNP3U9ZjKaHasID0sNxTsc3Dbi86sfxzaPzRsZOUse62sR9y
+Ho8BI+RUp1KhhlMVshc7sgMSG8N0pUGZi8HET2KRYUxGSS37sh60jMuCZgD+Sqgn6U+xtLLTJqIS
+5bInEkD7+LE9qyASQShBCBX3g0j66KHFZbRDS3CkhkfQtLQu9UqGKrJQmyD+s1O1MpciRrYn9opm
+xkrPMkO0VEVqe7QNdFlm2yIFpoxusap1ezO8lTWdFJVu25U3qjKpeDBhWyI1BKx6CyJJJWltyahW
+dCrRgowf9kKH26s3qXrSAkV+BLm086EvKaYNirIZpyqlK2Lsxbmiij5xG7qjKzwh9oxA8k8eCmtf
+m10+pTFXyrgkEry0GyDd5Q2ykbtmtE1DtFN2YUGY2ranyjzDq3ePi05PO3U+IEzF6rsV8byg25mT
+BpJS+0aryqnTe33mC1P+WUVv+iYE6otUMzss4utNpuu6yGlN6auxWWtUMbFGWZW8S6Gza1hiXHJg
+w4lD1Jt38EtBNOW/NMdN+ysqBYwwFDwXB1ycxCsgDGxV/du+7ssdHAyhFtwuJFy/Ioy4GuyxEjwQ
+7OtpMxfCLEwnJvwyNGxTD6qjKkwjLvy++QjBPVy2UmTD0zTETYXCWqrD9MHDMexxuMbEAdxGAZwJ
+sNoCQOGH2MtZjhVZSWdZr7d0W9x00cqseAV2Z7VXz2pZYNx0XRxXSafGXRzGUwjHbwVZcxzHscfG
+39hEWWzHalXH2/saYsWrxYqSMnxxA6xyhoRviTxpyMqkV/Zy9+iPEMf+v+q2cqaGw8BSxEsGaZyR
+jWsWZmdmyM92xLOGyD9cyfdWc7iBN5Dsb678b6ZMyaWVcqjcY6XcbKfMySAGiqO8iUFMaJncaxh8
+rpucboucyoxMXTksybP2ygZnYzIXRrXsxLfsy3Wmy5A8wGH6Wbh8admMaNesusCMS+AMw7RcawUH
+wgm5otzscs8sy+mMzNW8cSjmzeNmzrkcaUr4yYFGzhh0z738z4c80PaLcvK8yo08nu68o84cy/qM
+0Adtyay8rcRcXsY8yW56buKsptPTzwkX0C6Xzx03zy1MzcccngxdcfDcbNE8yyatziSdbSFdzgX9
+yxqdaRxdZIpm0b/+iaY+PcgeJs2UEW3KjKeQGMmPDM2cHNHJbMv1DNKAbMpYLNKJ2kH1I9W5TNWk
+nNWwTHJ9M9SKnNDL7Mgq7YpevdTa/NJuUNRPjXNvbWQKt3NwPdc8nSNhjRdtTc9wqtQOjdZ+PclN
+jc4TrdBy/dV0bdcjp62SZNYEdtdr3RF6jdKH2s6VLYqN/cuCDdOETdYX2G6f7dmGfdg3F9c7gtex
+FdOXvNCWrV6sDZF3KNGqbNT6FNqKDWyiXdqkXdeL/diazdYnDdXsfNmuXWVq7duRDdznPNqJrdvM
+vdu8XdG4DWan3bypTdFlTdzmNm4ufdzTbN2FbdvFLN3OvdzkHd7RF93bJf3b393ZKZ3dSY3Z2AzZ
+3s3ZAhzd551mNv3Ozw3U5lHGpfPfl3NGA351Rmfgj6XHd7xYUKdZCR51rGE2vVJ1E04eAU45Fl7F
+1htMGv5LHN7hXZ3EIS7iI07iJW7iJ47iKa7iK87iLe7iLw7jMS7jM07jNW7jN47jOa7jO87jPe7j
+Pw7kQV68E+EQhqrAQs6aZmirzYzkQC4aAmmIygHlTS7kP0G3gRJ8VB7kAGCRbQB8uqflTu6Ci4jl
+ehjmPs7laf58XB7Fau6DR56aBQAAOw==
diff --git a/Documentation/DocBook/media/vbi_hsync.gif.b64 b/Documentation/DocBook/media/vbi_hsync.gif.b64
new file mode 100644
index 000000000..cdafabed5
--- /dev/null
+++ b/Documentation/DocBook/media/vbi_hsync.gif.b64
@@ -0,0 +1,43 @@
+R0lGODlhBwHJAOcAAAAAAAEBAQICAgMDAwQEBAUFBQYGBgcHBwgICAkJCQoKCgsLCwwMDA0NDQ4O
+Dg8PDxAQEBERERISEhMTExQUFBUVFRYWFhcXFxgYGBkZGRoaGhsbGxwcHB0dHR4eHh8fHyAgICEh
+ISIiIiMjIyQkJCUlJSYmJicnJygoKCkpKSoqKisrKywsLC0tLS4uLi8vLzAwMDExMTIyMjMzMzQ0
+NDU1NTY2Njc3Nzg4ODk5OTo6Ojs7Ozw8PD09PT4+Pj8/P0BAQEFBQUJCQkNDQ0REREVFRUZGRkdH
+R0hISElJSUpKSktLS0xMTE1NTU5OTk9PT1BQUFFRUVJSUlNTU1RUVFVVVVZWVldXV1hYWFlZWVpa
+WltbW1xcXF1dXV5eXl9fX2BgYGFhYWJiYmNjY2RkZGVlZWZmZmdnZ2hoaGlpaWpqamtra2xsbG1t
+bW5ubm9vb3BwcHFxcXJycnNzc3R0dHV1dXZ2dnd3d3h4eHl5eXp6ent7e3x8fH19fX5+fn9/f4CA
+gIGBgYKCgoODg4SEhIWFhYaGhoeHh4iIiImJiYqKiouLi4yMjI2NjY6Ojo+Pj5CQkJGRkZKSkpOT
+k5SUlJWVlZaWlpeXl5iYmJmZmZqampubm5ycnJ2dnZ6enp+fn6CgoKGhoaKioqOjo6SkpKWlpaam
+pqenp6ioqKmpqaqqqqurq6ysrK2tra6urq+vr7CwsLGxsbKysrOzs7S0tLW1tba2tre3t7i4uLm5
+ubq6uru7u7y8vL29vb6+vr+/v8DAwMHBwcLCwsPDw8TExMXFxcbGxsfHx8jIyMnJycrKysvLy8zM
+zM3Nzc7Ozs/Pz9DQ0NHR0dLS0tPT09TU1NXV1dbW1tfX19jY2NnZ2dra2tvb29zc3N3d3d7e3t/f
+3+Dg4OHh4eLi4uPj4+Tk5OXl5ebm5ufn5+jo6Onp6erq6uvr6+zs7O3t7e7u7u/v7/Dw8PHx8fLy
+8vPz8/T09PX19fb29vf39/j4+Pn5+fr6+vv7+/z8/P39/f7+/v///ywAAAAABwHJAAAI/gD/CRxI
+sKDBgwgTKlzIsKHDhxAjSpxIsaLFixgzatzIsaPHjyBDihxJsqTJkyhTqlzJsqXLlzBjypxJs6bN
+mzhz6tzJs6fPn0CDCh1KtKjRo0iTKl3KtKnTp1CjSp1KtarVqyQBYN1aVSvXr1C9gh2rVOxCsV4B
+mE2b0GxDt2TjtnWo9l9du2rrar2bl+BavQL3ApZLeC5du3j77g2MF/FAtIv1AoZb+Gfey5gza97M
+ua/ByJ4XI8b8+PHl0ZkrE6XsuCDr1xD5ip7d2m9pv6IZqxYK+zPC3g/T0mabGLdk4YEH7wYK3PZB
+yqyXSw/++3l139OzS4R+Hbtr7eCp/nv/bp18+PMKuZcfj7792fXm47ufz/52fd308zu/X3u/fv3N
++Sfgf/MFaJ98BLpnIH4IJojegv0d6GB7EEI4oXYVdnfhgxoOyOCG4WXIH4jTidggiSV2KOGHKGa3
+oIUtqvaiijEuNyN8NUp344g5EqYef9H1KNePJwYpJFlEehjhkT7iuCKLTMZl4olRgjWlklV+deWT
+WWpJ45JgdrnVllCKOeaXMJrZFZpfqmkVmWG6SRWcRsoZFZl12hkWmzxemCdXeAr555lOgjnof4de
+tSOVG0KWaFl3GVponH52ZumlmGaq6aaY0pjmhJppmRqQbTaKm6gewgnio2uSOumq/jpO+qmDrE5F
+p6AtSZZeSrf2WOtEoZEmm2C/Astnn6CapKtjbClWZki95lhsbLcRtxmlHkVb47TBWcuYcGvxeiyj
+fp7kGbOJEZscStrGyK1T7bb4blPxojgvU4Hiulu+vto4Lpck3rvUoljCuq+npZp6cKGz0uovwwmX
+u3CRESc7sZINJyhwWbJW7PDFXGZM4MZI1WsvyCF7rDHKZYqMKMuSvmqwS5yOypHJAcP0K8k4z5xr
+RTz/C7DPLO2crdDPEr2S0R31rDDNQB/dMbISQ01R0FOT+/TPV0vtqtZVc21s0wjLLONFJG8XNdkQ
+y5z2UNy+TW3XbN8Ho9xBxa3z/to3lz0i3nljBPhbfG+UZMoqG5db2+KJ9O7gDDHd99dUstpscsgR
+x6CzqC0O0uN70z05xVlHdNpwgvUHGWrFef5RppGHPjawNddue3nB5nYufsKmu/vrhL/3kuRqq1Tr
+6pd/G+6HymGLdvC7Dl+46cYD7aywoSleXGOtj5RnnZALP3vx7Bb2J/iyk6++subTZanz2ZJ2te2R
+st8+9NaFHx/x1Jff5GFz0Z9/+Dc3c9EnSK4ryfLG1z89GaY6AjwQARvnQLfBr24XpFrizGSk+tlv
+aOJbXwULxj3/gTB6DBwhCD2oQLBtkIR66mAEVTe9AqqQhCzMigvNhsIbrnCG/m6ZIAB9+MPqwfCB
+IryhDI14QiQ2kIiUyqH3dqhBHtoJfSZs4gu16CYsGpCKYDyinLz4QS5W8YwcjF0WkxbCJxKRjC0M
+oxnlmCU46tA19BPiCO04xZjM8IBq/GL63hjIMloNitiS4uv+aMUxRk5/ihQXIhMJSUaiUUzgq6RM
+LEmhR5qLk2LsoieVBco5YnKUCiwlG2OIyqyoMoNpPIsm/TjJRMKya698JYZiB7kELq2W6OvlLT8H
+TF62MJfM+R3+lnnIAB5zk8zBHOZks7/BqEuXwXwmLS1DzestDnmNud5MsqlDZPKGWMkzT+9CBc33
+5PGd8IynPOfJwkilLp37/gniN8dZyDgOcienCadudnc6anavnT30p/SKokvH9fOO/+RmqxIK0YUi
+EosBNVz2tnnRR9KzUxyFYjAzqpHehZSQbdxYEBEqUhcVM0WTbGhNZBor+7xNj8SMaT7TJc1Tgcug
+Bf2LNZnlKODp1KYCbR64ujcZ0OBxe5FR3jAfqsSdNiujucMnPnl3uaxiraNI3ep3hro8161uNLbB
+G00fNk3abG+aAiXqcKqlGG8Oy6hgLang+HnUjERyiBFV4VpZitKa5rWEgKJjldgpKs5d9KOQjeym
+XkrSMdnzpYatpWY3y1l6NXGB3RlsZ9eDzp7ydKmnW1dAlTnaQ94zruEkS2tUnfra1iIUdRvlHueu
+iS7N2daic1VncEEz3N/6MbVyNU1TV0tUdL3VuF6aKnQhJdrpWve62M2udrfL3e5697vgDa94x0ve
+8lIkIAA7
diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl
new file mode 100644
index 000000000..03f9a1f8d
--- /dev/null
+++ b/Documentation/DocBook/media_api.tmpl
@@ -0,0 +1,88 @@
+<?xml version="1.0"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
+<!ENTITY % media-entities SYSTEM "./media-entities.tmpl"> %media-entities;
+<!ENTITY media-indices SYSTEM "./media-indices.tmpl">
+
+<!ENTITY eg "e.&nbsp;g.">
+<!ENTITY ie "i.&nbsp;e.">
+<!ENTITY fd "File descriptor returned by <link linkend='func-open'><function>open()</function></link>.">
+<!ENTITY i2c "I<superscript>2</superscript>C">
+<!ENTITY return-value "<title>Return Value</title><para>On success <returnvalue>0</returnvalue> is returned, on error <returnvalue>-1</returnvalue> and the <varname>errno</varname> variable is set appropriately. The generic error codes are described at the <link linkend='gen-errors'>Generic Error Codes</link> chapter.</para>">
+<!ENTITY return-value-dvb "<para>RETURN VALUE</para><para>On success <returnvalue>0</returnvalue> is returned, on error <returnvalue>-1</returnvalue> and the <varname>errno</varname> variable is set appropriately. The generic error codes are described at the <link linkend='gen-errors'>Generic Error Codes</link> chapter.</para>">
+<!ENTITY manvol "<manvolnum>2</manvolnum>">
+
+<!-- Table templates: structs, structs w/union, defines. -->
+<!ENTITY cs-str "<colspec colname='c1' colwidth='1*' /><colspec colname='c2' colwidth='1*' /><colspec colname='c3' colwidth='2*' /><spanspec spanname='hspan' namest='c1' nameend='c3' />">
+<!ENTITY cs-ustr "<colspec colname='c1' colwidth='1*' /><colspec colname='c2' colwidth='1*' /><colspec colname='c3' colwidth='1*' /><colspec colname='c4' colwidth='2*' /><spanspec spanname='hspan' namest='c1' nameend='c4' />">
+<!ENTITY cs-def "<colspec colname='c1' colwidth='3*' /><colspec colname='c2' colwidth='1*' /><colspec colname='c3' colwidth='4*' /><spanspec spanname='hspan' namest='c1' nameend='c3' />">
+
+<!-- Video for Linux mailing list address. -->
+<!ENTITY v4l-ml "<ulink url='http://www.linuxtv.org/lists.php'>http://www.linuxtv.org/lists.php</ulink>">
+
+<!-- LinuxTV v4l-dvb repository. -->
+<!ENTITY v4l-dvb "<ulink url='http://linuxtv.org/repo/'>http://linuxtv.org/repo/</ulink>">
+<!ENTITY dash-ent-8 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-10 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-12 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-14 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-16 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-20 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-22 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+<!ENTITY dash-ent-24 "<entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry><entry>-</entry>">
+]>
+
+<book id="media_api">
+<bookinfo>
+ <title>LINUX MEDIA INFRASTRUCTURE API</title>
+
+ <copyright>
+ <year>2009-2014</year>
+ <holder>LinuxTV Developers</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>Permission is granted to copy, distribute and/or modify
+ this document under the terms of the GNU Free Documentation License,
+ Version 1.1 or any later version published by the Free Software
+ Foundation. A copy of the license is included in the chapter entitled
+ "GNU Free Documentation License"</para>
+ </legalnotice>
+</bookinfo>
+
+<toc></toc> <!-- autogenerated -->
+
+<preface>
+ <title>Introduction</title>
+
+ <para>This document covers the Linux Kernel to Userspace API's used by
+ video and radio streaming devices, including video cameras,
+ analog and digital TV receiver cards, AM/FM receiver cards,
+ streaming capture and output devices, codec devices and remote
+ controllers.</para>
+ <para>It is divided into four parts.</para>
+ <para>The first part covers radio, video capture and output,
+ cameras, analog TV devices and codecs.</para>
+ <para>The second part covers the
+ API used for digital TV and Internet reception via one of the
+ several digital tv standards. While it is called as DVB API,
+ in fact it covers several different video standards including
+ DVB-T, DVB-S, DVB-C and ATSC. The API is currently being updated
+ to document support also for DVB-S2, ISDB-T and ISDB-S.</para>
+ <para>The third part covers the Remote Controller API.</para>
+ <para>The fourth part covers the Media Controller API.</para>
+ <para>For additional information and for the latest development code,
+ see: <ulink url="http://linuxtv.org">http://linuxtv.org</ulink>.</para>
+ <para>For discussing improvements, reporting troubles, sending new drivers, etc, please mail to: <ulink url="http://vger.kernel.org/vger-lists.html#linux-media">Linux Media Mailing List (LMML).</ulink>.</para>
+</preface>
+
+<part id="v4l2spec">&sub-v4l2;</part>
+<part id="dvbapi">&sub-dvbapi;</part>
+<part id="remotes">&sub-remote_controllers;</part>
+<part id="media_common">&sub-media-controller;</part>
+
+<chapter id="gen_errors">&sub-gen-errors;</chapter>
+
+&sub-fdl-appendix;
+
+</book>
diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
new file mode 100644
index 000000000..7da8f0402
--- /dev/null
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -0,0 +1,1292 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="MTD-NAND-Guide">
+ <bookinfo>
+ <title>MTD NAND Driver Programming Interface</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Thomas</firstname>
+ <surname>Gleixner</surname>
+ <affiliation>
+ <address>
+ <email>tglx@linutronix.de</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2004</year>
+ <holder>Thomas Gleixner</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The generic NAND driver supports almost all NAND and AG-AND based
+ chips and connects them to the Memory Technology Devices (MTD)
+ subsystem of the Linux Kernel.
+ </para>
+ <para>
+ This documentation is provided for developers who want to implement
+ board drivers or filesystem drivers suitable for NAND devices.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ None.
+ </para>
+ </chapter>
+
+ <chapter id="dochints">
+ <title>Documentation hints</title>
+ <para>
+ The function and structure docs are autogenerated. Each function and
+ struct member has a short description which is marked with an [XXX] identifier.
+ The following chapters explain the meaning of those identifiers.
+ </para>
+ <sect1 id="Function_identifiers_XXX">
+ <title>Function identifiers [XXX]</title>
+ <para>
+ The functions are marked with [XXX] identifiers in the short
+ comment. The identifiers explain the usage and scope of the
+ functions. Following identifiers are used:
+ </para>
+ <itemizedlist>
+ <listitem><para>
+ [MTD Interface]</para><para>
+ These functions provide the interface to the MTD kernel API.
+ They are not replaceable and provide functionality
+ which is complete hardware independent.
+ </para></listitem>
+ <listitem><para>
+ [NAND Interface]</para><para>
+ These functions are exported and provide the interface to the NAND kernel API.
+ </para></listitem>
+ <listitem><para>
+ [GENERIC]</para><para>
+ Generic functions are not replaceable and provide functionality
+ which is complete hardware independent.
+ </para></listitem>
+ <listitem><para>
+ [DEFAULT]</para><para>
+ Default functions provide hardware related functionality which is suitable
+ for most of the implementations. These functions can be replaced by the
+ board driver if necessary. Those functions are called via pointers in the
+ NAND chip description structure. The board driver can set the functions which
+ should be replaced by board dependent functions before calling nand_scan().
+ If the function pointer is NULL on entry to nand_scan() then the pointer
+ is set to the default function which is suitable for the detected chip type.
+ </para></listitem>
+ </itemizedlist>
+ </sect1>
+ <sect1 id="Struct_member_identifiers_XXX">
+ <title>Struct member identifiers [XXX]</title>
+ <para>
+ The struct members are marked with [XXX] identifiers in the
+ comment. The identifiers explain the usage and scope of the
+ members. Following identifiers are used:
+ </para>
+ <itemizedlist>
+ <listitem><para>
+ [INTERN]</para><para>
+ These members are for NAND driver internal use only and must not be
+ modified. Most of these values are calculated from the chip geometry
+ information which is evaluated during nand_scan().
+ </para></listitem>
+ <listitem><para>
+ [REPLACEABLE]</para><para>
+ Replaceable members hold hardware related functions which can be
+ provided by the board driver. The board driver can set the functions which
+ should be replaced by board dependent functions before calling nand_scan().
+ If the function pointer is NULL on entry to nand_scan() then the pointer
+ is set to the default function which is suitable for the detected chip type.
+ </para></listitem>
+ <listitem><para>
+ [BOARDSPECIFIC]</para><para>
+ Board specific members hold hardware related information which must
+ be provided by the board driver. The board driver must set the function
+ pointers and datafields before calling nand_scan().
+ </para></listitem>
+ <listitem><para>
+ [OPTIONAL]</para><para>
+ Optional members can hold information relevant for the board driver. The
+ generic NAND driver code does not use this information.
+ </para></listitem>
+ </itemizedlist>
+ </sect1>
+ </chapter>
+
+ <chapter id="basicboarddriver">
+ <title>Basic board driver</title>
+ <para>
+ For most boards it will be sufficient to provide just the
+ basic functions and fill out some really board dependent
+ members in the nand chip description structure.
+ </para>
+ <sect1 id="Basic_defines">
+ <title>Basic defines</title>
+ <para>
+ At least you have to provide a mtd structure and
+ a storage for the ioremap'ed chip address.
+ You can allocate the mtd structure using kmalloc
+ or you can allocate it statically.
+ In case of static allocation you have to allocate
+ a nand_chip structure too.
+ </para>
+ <para>
+ Kmalloc based example
+ </para>
+ <programlisting>
+static struct mtd_info *board_mtd;
+static void __iomem *baseaddr;
+ </programlisting>
+ <para>
+ Static example
+ </para>
+ <programlisting>
+static struct mtd_info board_mtd;
+static struct nand_chip board_chip;
+static void __iomem *baseaddr;
+ </programlisting>
+ </sect1>
+ <sect1 id="Partition_defines">
+ <title>Partition defines</title>
+ <para>
+ If you want to divide your device into partitions, then
+ define a partitioning scheme suitable to your board.
+ </para>
+ <programlisting>
+#define NUM_PARTITIONS 2
+static struct mtd_partition partition_info[] = {
+ { .name = "Flash partition 1",
+ .offset = 0,
+ .size = 8 * 1024 * 1024 },
+ { .name = "Flash partition 2",
+ .offset = MTDPART_OFS_NEXT,
+ .size = MTDPART_SIZ_FULL },
+};
+ </programlisting>
+ </sect1>
+ <sect1 id="Hardware_control_functions">
+ <title>Hardware control function</title>
+ <para>
+ The hardware control function provides access to the
+ control pins of the NAND chip(s).
+ The access can be done by GPIO pins or by address lines.
+ If you use address lines, make sure that the timing
+ requirements are met.
+ </para>
+ <para>
+ <emphasis>GPIO based example</emphasis>
+ </para>
+ <programlisting>
+static void board_hwcontrol(struct mtd_info *mtd, int cmd)
+{
+ switch(cmd){
+ case NAND_CTL_SETCLE: /* Set CLE pin high */ break;
+ case NAND_CTL_CLRCLE: /* Set CLE pin low */ break;
+ case NAND_CTL_SETALE: /* Set ALE pin high */ break;
+ case NAND_CTL_CLRALE: /* Set ALE pin low */ break;
+ case NAND_CTL_SETNCE: /* Set nCE pin low */ break;
+ case NAND_CTL_CLRNCE: /* Set nCE pin high */ break;
+ }
+}
+ </programlisting>
+ <para>
+ <emphasis>Address lines based example.</emphasis> It's assumed that the
+ nCE pin is driven by a chip select decoder.
+ </para>
+ <programlisting>
+static void board_hwcontrol(struct mtd_info *mtd, int cmd)
+{
+ struct nand_chip *this = (struct nand_chip *) mtd->priv;
+ switch(cmd){
+ case NAND_CTL_SETCLE: this->IO_ADDR_W |= CLE_ADRR_BIT; break;
+ case NAND_CTL_CLRCLE: this->IO_ADDR_W &amp;= ~CLE_ADRR_BIT; break;
+ case NAND_CTL_SETALE: this->IO_ADDR_W |= ALE_ADRR_BIT; break;
+ case NAND_CTL_CLRALE: this->IO_ADDR_W &amp;= ~ALE_ADRR_BIT; break;
+ }
+}
+ </programlisting>
+ </sect1>
+ <sect1 id="Device_ready_function">
+ <title>Device ready function</title>
+ <para>
+ If the hardware interface has the ready busy pin of the NAND chip connected to a
+ GPIO or other accessible I/O pin, this function is used to read back the state of the
+ pin. The function has no arguments and should return 0, if the device is busy (R/B pin
+ is low) and 1, if the device is ready (R/B pin is high).
+ If the hardware interface does not give access to the ready busy pin, then
+ the function must not be defined and the function pointer this->dev_ready is set to NULL.
+ </para>
+ </sect1>
+ <sect1 id="Init_function">
+ <title>Init function</title>
+ <para>
+ The init function allocates memory and sets up all the board
+ specific parameters and function pointers. When everything
+ is set up nand_scan() is called. This function tries to
+ detect and identify then chip. If a chip is found all the
+ internal data fields are initialized accordingly.
+ The structure(s) have to be zeroed out first and then filled with the necessary
+ information about the device.
+ </para>
+ <programlisting>
+static int __init board_init (void)
+{
+ struct nand_chip *this;
+ int err = 0;
+
+ /* Allocate memory for MTD device structure and private data */
+ board_mtd = kzalloc(sizeof(struct mtd_info) + sizeof(struct nand_chip), GFP_KERNEL);
+ if (!board_mtd) {
+ printk ("Unable to allocate NAND MTD device structure.\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ /* map physical address */
+ baseaddr = ioremap(CHIP_PHYSICAL_ADDRESS, 1024);
+ if (!baseaddr) {
+ printk("Ioremap to access NAND chip failed\n");
+ err = -EIO;
+ goto out_mtd;
+ }
+
+ /* Get pointer to private data */
+ this = (struct nand_chip *) ();
+ /* Link the private data with the MTD structure */
+ board_mtd->priv = this;
+
+ /* Set address of NAND IO lines */
+ this->IO_ADDR_R = baseaddr;
+ this->IO_ADDR_W = baseaddr;
+ /* Reference hardware control function */
+ this->hwcontrol = board_hwcontrol;
+ /* Set command delay time, see datasheet for correct value */
+ this->chip_delay = CHIP_DEPENDEND_COMMAND_DELAY;
+ /* Assign the device ready function, if available */
+ this->dev_ready = board_dev_ready;
+ this->eccmode = NAND_ECC_SOFT;
+
+ /* Scan to find existence of the device */
+ if (nand_scan (board_mtd, 1)) {
+ err = -ENXIO;
+ goto out_ior;
+ }
+
+ add_mtd_partitions(board_mtd, partition_info, NUM_PARTITIONS);
+ goto out;
+
+out_ior:
+ iounmap(baseaddr);
+out_mtd:
+ kfree (board_mtd);
+out:
+ return err;
+}
+module_init(board_init);
+ </programlisting>
+ </sect1>
+ <sect1 id="Exit_function">
+ <title>Exit function</title>
+ <para>
+ The exit function is only necessary if the driver is
+ compiled as a module. It releases all resources which
+ are held by the chip driver and unregisters the partitions
+ in the MTD layer.
+ </para>
+ <programlisting>
+#ifdef MODULE
+static void __exit board_cleanup (void)
+{
+ /* Release resources, unregister device */
+ nand_release (board_mtd);
+
+ /* unmap physical address */
+ iounmap(baseaddr);
+
+ /* Free the MTD device structure */
+ kfree (board_mtd);
+}
+module_exit(board_cleanup);
+#endif
+ </programlisting>
+ </sect1>
+ </chapter>
+
+ <chapter id="boarddriversadvanced">
+ <title>Advanced board driver functions</title>
+ <para>
+ This chapter describes the advanced functionality of the NAND
+ driver. For a list of functions which can be overridden by the board
+ driver see the documentation of the nand_chip structure.
+ </para>
+ <sect1 id="Multiple_chip_control">
+ <title>Multiple chip control</title>
+ <para>
+ The nand driver can control chip arrays. Therefore the
+ board driver must provide an own select_chip function. This
+ function must (de)select the requested chip.
+ The function pointer in the nand_chip structure must
+ be set before calling nand_scan(). The maxchip parameter
+ of nand_scan() defines the maximum number of chips to
+ scan for. Make sure that the select_chip function can
+ handle the requested number of chips.
+ </para>
+ <para>
+ The nand driver concatenates the chips to one virtual
+ chip and provides this virtual chip to the MTD layer.
+ </para>
+ <para>
+ <emphasis>Note: The driver can only handle linear chip arrays
+ of equally sized chips. There is no support for
+ parallel arrays which extend the buswidth.</emphasis>
+ </para>
+ <para>
+ <emphasis>GPIO based example</emphasis>
+ </para>
+ <programlisting>
+static void board_select_chip (struct mtd_info *mtd, int chip)
+{
+ /* Deselect all chips, set all nCE pins high */
+ GPIO(BOARD_NAND_NCE) |= 0xff;
+ if (chip >= 0)
+ GPIO(BOARD_NAND_NCE) &amp;= ~ (1 &lt;&lt; chip);
+}
+ </programlisting>
+ <para>
+ <emphasis>Address lines based example.</emphasis>
+ Its assumed that the nCE pins are connected to an
+ address decoder.
+ </para>
+ <programlisting>
+static void board_select_chip (struct mtd_info *mtd, int chip)
+{
+ struct nand_chip *this = (struct nand_chip *) mtd->priv;
+
+ /* Deselect all chips */
+ this->IO_ADDR_R &amp;= ~BOARD_NAND_ADDR_MASK;
+ this->IO_ADDR_W &amp;= ~BOARD_NAND_ADDR_MASK;
+ switch (chip) {
+ case 0:
+ this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIP0;
+ this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIP0;
+ break;
+ ....
+ case n:
+ this->IO_ADDR_R |= BOARD_NAND_ADDR_CHIPn;
+ this->IO_ADDR_W |= BOARD_NAND_ADDR_CHIPn;
+ break;
+ }
+}
+ </programlisting>
+ </sect1>
+ <sect1 id="Hardware_ECC_support">
+ <title>Hardware ECC support</title>
+ <sect2 id="Functions_and_constants">
+ <title>Functions and constants</title>
+ <para>
+ The nand driver supports three different types of
+ hardware ECC.
+ <itemizedlist>
+ <listitem><para>NAND_ECC_HW3_256</para><para>
+ Hardware ECC generator providing 3 bytes ECC per
+ 256 byte.
+ </para> </listitem>
+ <listitem><para>NAND_ECC_HW3_512</para><para>
+ Hardware ECC generator providing 3 bytes ECC per
+ 512 byte.
+ </para> </listitem>
+ <listitem><para>NAND_ECC_HW6_512</para><para>
+ Hardware ECC generator providing 6 bytes ECC per
+ 512 byte.
+ </para> </listitem>
+ <listitem><para>NAND_ECC_HW8_512</para><para>
+ Hardware ECC generator providing 6 bytes ECC per
+ 512 byte.
+ </para> </listitem>
+ </itemizedlist>
+ If your hardware generator has a different functionality
+ add it at the appropriate place in nand_base.c
+ </para>
+ <para>
+ The board driver must provide following functions:
+ <itemizedlist>
+ <listitem><para>enable_hwecc</para><para>
+ This function is called before reading / writing to
+ the chip. Reset or initialize the hardware generator
+ in this function. The function is called with an
+ argument which let you distinguish between read
+ and write operations.
+ </para> </listitem>
+ <listitem><para>calculate_ecc</para><para>
+ This function is called after read / write from / to
+ the chip. Transfer the ECC from the hardware to
+ the buffer. If the option NAND_HWECC_SYNDROME is set
+ then the function is only called on write. See below.
+ </para> </listitem>
+ <listitem><para>correct_data</para><para>
+ In case of an ECC error this function is called for
+ error detection and correction. Return 1 respectively 2
+ in case the error can be corrected. If the error is
+ not correctable return -1. If your hardware generator
+ matches the default algorithm of the nand_ecc software
+ generator then use the correction function provided
+ by nand_ecc instead of implementing duplicated code.
+ </para> </listitem>
+ </itemizedlist>
+ </para>
+ </sect2>
+ <sect2 id="Hardware_ECC_with_syndrome_calculation">
+ <title>Hardware ECC with syndrome calculation</title>
+ <para>
+ Many hardware ECC implementations provide Reed-Solomon
+ codes and calculate an error syndrome on read. The syndrome
+ must be converted to a standard Reed-Solomon syndrome
+ before calling the error correction code in the generic
+ Reed-Solomon library.
+ </para>
+ <para>
+ The ECC bytes must be placed immediately after the data
+ bytes in order to make the syndrome generator work. This
+ is contrary to the usual layout used by software ECC. The
+ separation of data and out of band area is not longer
+ possible. The nand driver code handles this layout and
+ the remaining free bytes in the oob area are managed by
+ the autoplacement code. Provide a matching oob-layout
+ in this case. See rts_from4.c and diskonchip.c for
+ implementation reference. In those cases we must also
+ use bad block tables on FLASH, because the ECC layout is
+ interfering with the bad block marker positions.
+ See bad block table support for details.
+ </para>
+ </sect2>
+ </sect1>
+ <sect1 id="Bad_Block_table_support">
+ <title>Bad block table support</title>
+ <para>
+ Most NAND chips mark the bad blocks at a defined
+ position in the spare area. Those blocks must
+ not be erased under any circumstances as the bad
+ block information would be lost.
+ It is possible to check the bad block mark each
+ time when the blocks are accessed by reading the
+ spare area of the first page in the block. This
+ is time consuming so a bad block table is used.
+ </para>
+ <para>
+ The nand driver supports various types of bad block
+ tables.
+ <itemizedlist>
+ <listitem><para>Per device</para><para>
+ The bad block table contains all bad block information
+ of the device which can consist of multiple chips.
+ </para> </listitem>
+ <listitem><para>Per chip</para><para>
+ A bad block table is used per chip and contains the
+ bad block information for this particular chip.
+ </para> </listitem>
+ <listitem><para>Fixed offset</para><para>
+ The bad block table is located at a fixed offset
+ in the chip (device). This applies to various
+ DiskOnChip devices.
+ </para> </listitem>
+ <listitem><para>Automatic placed</para><para>
+ The bad block table is automatically placed and
+ detected either at the end or at the beginning
+ of a chip (device)
+ </para> </listitem>
+ <listitem><para>Mirrored tables</para><para>
+ The bad block table is mirrored on the chip (device) to
+ allow updates of the bad block table without data loss.
+ </para> </listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ nand_scan() calls the function nand_default_bbt().
+ nand_default_bbt() selects appropriate default
+ bad block table descriptors depending on the chip information
+ which was retrieved by nand_scan().
+ </para>
+ <para>
+ The standard policy is scanning the device for bad
+ blocks and build a ram based bad block table which
+ allows faster access than always checking the
+ bad block information on the flash chip itself.
+ </para>
+ <sect2 id="Flash_based_tables">
+ <title>Flash based tables</title>
+ <para>
+ It may be desired or necessary to keep a bad block table in FLASH.
+ For AG-AND chips this is mandatory, as they have no factory marked
+ bad blocks. They have factory marked good blocks. The marker pattern
+ is erased when the block is erased to be reused. So in case of
+ powerloss before writing the pattern back to the chip this block
+ would be lost and added to the bad blocks. Therefore we scan the
+ chip(s) when we detect them the first time for good blocks and
+ store this information in a bad block table before erasing any
+ of the blocks.
+ </para>
+ <para>
+ The blocks in which the tables are stored are protected against
+ accidental access by marking them bad in the memory bad block
+ table. The bad block table management functions are allowed
+ to circumvent this protection.
+ </para>
+ <para>
+ The simplest way to activate the FLASH based bad block table support
+ is to set the option NAND_BBT_USE_FLASH in the bbt_option field of
+ the nand chip structure before calling nand_scan(). For AG-AND
+ chips is this done by default.
+ This activates the default FLASH based bad block table functionality
+ of the NAND driver. The default bad block table options are
+ <itemizedlist>
+ <listitem><para>Store bad block table per chip</para></listitem>
+ <listitem><para>Use 2 bits per block</para></listitem>
+ <listitem><para>Automatic placement at the end of the chip</para></listitem>
+ <listitem><para>Use mirrored tables with version numbers</para></listitem>
+ <listitem><para>Reserve 4 blocks at the end of the chip</para></listitem>
+ </itemizedlist>
+ </para>
+ </sect2>
+ <sect2 id="User_defined_tables">
+ <title>User defined tables</title>
+ <para>
+ User defined tables are created by filling out a
+ nand_bbt_descr structure and storing the pointer in the
+ nand_chip structure member bbt_td before calling nand_scan().
+ If a mirror table is necessary a second structure must be
+ created and a pointer to this structure must be stored
+ in bbt_md inside the nand_chip structure. If the bbt_md
+ member is set to NULL then only the main table is used
+ and no scan for the mirrored table is performed.
+ </para>
+ <para>
+ The most important field in the nand_bbt_descr structure
+ is the options field. The options define most of the
+ table properties. Use the predefined constants from
+ nand.h to define the options.
+ <itemizedlist>
+ <listitem><para>Number of bits per block</para>
+ <para>The supported number of bits is 1, 2, 4, 8.</para></listitem>
+ <listitem><para>Table per chip</para>
+ <para>Setting the constant NAND_BBT_PERCHIP selects that
+ a bad block table is managed for each chip in a chip array.
+ If this option is not set then a per device bad block table
+ is used.</para></listitem>
+ <listitem><para>Table location is absolute</para>
+ <para>Use the option constant NAND_BBT_ABSPAGE and
+ define the absolute page number where the bad block
+ table starts in the field pages. If you have selected bad block
+ tables per chip and you have a multi chip array then the start page
+ must be given for each chip in the chip array. Note: there is no scan
+ for a table ident pattern performed, so the fields
+ pattern, veroffs, offs, len can be left uninitialized</para></listitem>
+ <listitem><para>Table location is automatically detected</para>
+ <para>The table can either be located in the first or the last good
+ blocks of the chip (device). Set NAND_BBT_LASTBLOCK to place
+ the bad block table at the end of the chip (device). The
+ bad block tables are marked and identified by a pattern which
+ is stored in the spare area of the first page in the block which
+ holds the bad block table. Store a pointer to the pattern
+ in the pattern field. Further the length of the pattern has to be
+ stored in len and the offset in the spare area must be given
+ in the offs member of the nand_bbt_descr structure. For mirrored
+ bad block tables different patterns are mandatory.</para></listitem>
+ <listitem><para>Table creation</para>
+ <para>Set the option NAND_BBT_CREATE to enable the table creation
+ if no table can be found during the scan. Usually this is done only
+ once if a new chip is found. </para></listitem>
+ <listitem><para>Table write support</para>
+ <para>Set the option NAND_BBT_WRITE to enable the table write support.
+ This allows the update of the bad block table(s) in case a block has
+ to be marked bad due to wear. The MTD interface function block_markbad
+ is calling the update function of the bad block table. If the write
+ support is enabled then the table is updated on FLASH.</para>
+ <para>
+ Note: Write support should only be enabled for mirrored tables with
+ version control.
+ </para></listitem>
+ <listitem><para>Table version control</para>
+ <para>Set the option NAND_BBT_VERSION to enable the table version control.
+ It's highly recommended to enable this for mirrored tables with write
+ support. It makes sure that the risk of losing the bad block
+ table information is reduced to the loss of the information about the
+ one worn out block which should be marked bad. The version is stored in
+ 4 consecutive bytes in the spare area of the device. The position of
+ the version number is defined by the member veroffs in the bad block table
+ descriptor.</para></listitem>
+ <listitem><para>Save block contents on write</para>
+ <para>
+ In case that the block which holds the bad block table does contain
+ other useful information, set the option NAND_BBT_SAVECONTENT. When
+ the bad block table is written then the whole block is read the bad
+ block table is updated and the block is erased and everything is
+ written back. If this option is not set only the bad block table
+ is written and everything else in the block is ignored and erased.
+ </para></listitem>
+ <listitem><para>Number of reserved blocks</para>
+ <para>
+ For automatic placement some blocks must be reserved for
+ bad block table storage. The number of reserved blocks is defined
+ in the maxblocks member of the bad block table description structure.
+ Reserving 4 blocks for mirrored tables should be a reasonable number.
+ This also limits the number of blocks which are scanned for the bad
+ block table ident pattern.
+ </para></listitem>
+ </itemizedlist>
+ </para>
+ </sect2>
+ </sect1>
+ <sect1 id="Spare_area_placement">
+ <title>Spare area (auto)placement</title>
+ <para>
+ The nand driver implements different possibilities for
+ placement of filesystem data in the spare area,
+ <itemizedlist>
+ <listitem><para>Placement defined by fs driver</para></listitem>
+ <listitem><para>Automatic placement</para></listitem>
+ </itemizedlist>
+ The default placement function is automatic placement. The
+ nand driver has built in default placement schemes for the
+ various chiptypes. If due to hardware ECC functionality the
+ default placement does not fit then the board driver can
+ provide a own placement scheme.
+ </para>
+ <para>
+ File system drivers can provide a own placement scheme which
+ is used instead of the default placement scheme.
+ </para>
+ <para>
+ Placement schemes are defined by a nand_oobinfo structure
+ <programlisting>
+struct nand_oobinfo {
+ int useecc;
+ int eccbytes;
+ int eccpos[24];
+ int oobfree[8][2];
+};
+ </programlisting>
+ <itemizedlist>
+ <listitem><para>useecc</para><para>
+ The useecc member controls the ecc and placement function. The header
+ file include/mtd/mtd-abi.h contains constants to select ecc and
+ placement. MTD_NANDECC_OFF switches off the ecc complete. This is
+ not recommended and available for testing and diagnosis only.
+ MTD_NANDECC_PLACE selects caller defined placement, MTD_NANDECC_AUTOPLACE
+ selects automatic placement.
+ </para></listitem>
+ <listitem><para>eccbytes</para><para>
+ The eccbytes member defines the number of ecc bytes per page.
+ </para></listitem>
+ <listitem><para>eccpos</para><para>
+ The eccpos array holds the byte offsets in the spare area where
+ the ecc codes are placed.
+ </para></listitem>
+ <listitem><para>oobfree</para><para>
+ The oobfree array defines the areas in the spare area which can be
+ used for automatic placement. The information is given in the format
+ {offset, size}. offset defines the start of the usable area, size the
+ length in bytes. More than one area can be defined. The list is terminated
+ by an {0, 0} entry.
+ </para></listitem>
+ </itemizedlist>
+ </para>
+ <sect2 id="Placement_defined_by_fs_driver">
+ <title>Placement defined by fs driver</title>
+ <para>
+ The calling function provides a pointer to a nand_oobinfo
+ structure which defines the ecc placement. For writes the
+ caller must provide a spare area buffer along with the
+ data buffer. The spare area buffer size is (number of pages) *
+ (size of spare area). For reads the buffer size is
+ (number of pages) * ((size of spare area) + (number of ecc
+ steps per page) * sizeof (int)). The driver stores the
+ result of the ecc check for each tuple in the spare buffer.
+ The storage sequence is
+ </para>
+ <para>
+ &lt;spare data page 0&gt;&lt;ecc result 0&gt;...&lt;ecc result n&gt;
+ </para>
+ <para>
+ ...
+ </para>
+ <para>
+ &lt;spare data page n&gt;&lt;ecc result 0&gt;...&lt;ecc result n&gt;
+ </para>
+ <para>
+ This is a legacy mode used by YAFFS1.
+ </para>
+ <para>
+ If the spare area buffer is NULL then only the ECC placement is
+ done according to the given scheme in the nand_oobinfo structure.
+ </para>
+ </sect2>
+ <sect2 id="Automatic_placement">
+ <title>Automatic placement</title>
+ <para>
+ Automatic placement uses the built in defaults to place the
+ ecc bytes in the spare area. If filesystem data have to be stored /
+ read into the spare area then the calling function must provide a
+ buffer. The buffer size per page is determined by the oobfree array in
+ the nand_oobinfo structure.
+ </para>
+ <para>
+ If the spare area buffer is NULL then only the ECC placement is
+ done according to the default builtin scheme.
+ </para>
+ </sect2>
+ </sect1>
+ <sect1 id="Spare_area_autoplacement_default">
+ <title>Spare area autoplacement default schemes</title>
+ <sect2 id="pagesize_256">
+ <title>256 byte pagesize</title>
+<informaltable><tgroup cols="3"><tbody>
+<row>
+<entry>Offset</entry>
+<entry>Content</entry>
+<entry>Comment</entry>
+</row>
+<row>
+<entry>0x00</entry>
+<entry>ECC byte 0</entry>
+<entry>Error correction code byte 0</entry>
+</row>
+<row>
+<entry>0x01</entry>
+<entry>ECC byte 1</entry>
+<entry>Error correction code byte 1</entry>
+</row>
+<row>
+<entry>0x02</entry>
+<entry>ECC byte 2</entry>
+<entry>Error correction code byte 2</entry>
+</row>
+<row>
+<entry>0x03</entry>
+<entry>Autoplace 0</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x04</entry>
+<entry>Autoplace 1</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x05</entry>
+<entry>Bad block marker</entry>
+<entry>If any bit in this byte is zero, then this block is bad.
+This applies only to the first page in a block. In the remaining
+pages this byte is reserved</entry>
+</row>
+<row>
+<entry>0x06</entry>
+<entry>Autoplace 2</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x07</entry>
+<entry>Autoplace 3</entry>
+<entry></entry>
+</row>
+</tbody></tgroup></informaltable>
+ </sect2>
+ <sect2 id="pagesize_512">
+ <title>512 byte pagesize</title>
+<informaltable><tgroup cols="3"><tbody>
+<row>
+<entry>Offset</entry>
+<entry>Content</entry>
+<entry>Comment</entry>
+</row>
+<row>
+<entry>0x00</entry>
+<entry>ECC byte 0</entry>
+<entry>Error correction code byte 0 of the lower 256 Byte data in
+this page</entry>
+</row>
+<row>
+<entry>0x01</entry>
+<entry>ECC byte 1</entry>
+<entry>Error correction code byte 1 of the lower 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x02</entry>
+<entry>ECC byte 2</entry>
+<entry>Error correction code byte 2 of the lower 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x03</entry>
+<entry>ECC byte 3</entry>
+<entry>Error correction code byte 0 of the upper 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x04</entry>
+<entry>reserved</entry>
+<entry>reserved</entry>
+</row>
+<row>
+<entry>0x05</entry>
+<entry>Bad block marker</entry>
+<entry>If any bit in this byte is zero, then this block is bad.
+This applies only to the first page in a block. In the remaining
+pages this byte is reserved</entry>
+</row>
+<row>
+<entry>0x06</entry>
+<entry>ECC byte 4</entry>
+<entry>Error correction code byte 1 of the upper 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x07</entry>
+<entry>ECC byte 5</entry>
+<entry>Error correction code byte 2 of the upper 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x08 - 0x0F</entry>
+<entry>Autoplace 0 - 7</entry>
+<entry></entry>
+</row>
+</tbody></tgroup></informaltable>
+ </sect2>
+ <sect2 id="pagesize_2048">
+ <title>2048 byte pagesize</title>
+<informaltable><tgroup cols="3"><tbody>
+<row>
+<entry>Offset</entry>
+<entry>Content</entry>
+<entry>Comment</entry>
+</row>
+<row>
+<entry>0x00</entry>
+<entry>Bad block marker</entry>
+<entry>If any bit in this byte is zero, then this block is bad.
+This applies only to the first page in a block. In the remaining
+pages this byte is reserved</entry>
+</row>
+<row>
+<entry>0x01</entry>
+<entry>Reserved</entry>
+<entry>Reserved</entry>
+</row>
+<row>
+<entry>0x02-0x27</entry>
+<entry>Autoplace 0 - 37</entry>
+<entry></entry>
+</row>
+<row>
+<entry>0x28</entry>
+<entry>ECC byte 0</entry>
+<entry>Error correction code byte 0 of the first 256 Byte data in
+this page</entry>
+</row>
+<row>
+<entry>0x29</entry>
+<entry>ECC byte 1</entry>
+<entry>Error correction code byte 1 of the first 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2A</entry>
+<entry>ECC byte 2</entry>
+<entry>Error correction code byte 2 of the first 256 Bytes data in
+this page</entry>
+</row>
+<row>
+<entry>0x2B</entry>
+<entry>ECC byte 3</entry>
+<entry>Error correction code byte 0 of the second 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2C</entry>
+<entry>ECC byte 4</entry>
+<entry>Error correction code byte 1 of the second 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2D</entry>
+<entry>ECC byte 5</entry>
+<entry>Error correction code byte 2 of the second 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2E</entry>
+<entry>ECC byte 6</entry>
+<entry>Error correction code byte 0 of the third 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x2F</entry>
+<entry>ECC byte 7</entry>
+<entry>Error correction code byte 1 of the third 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x30</entry>
+<entry>ECC byte 8</entry>
+<entry>Error correction code byte 2 of the third 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x31</entry>
+<entry>ECC byte 9</entry>
+<entry>Error correction code byte 0 of the fourth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x32</entry>
+<entry>ECC byte 10</entry>
+<entry>Error correction code byte 1 of the fourth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x33</entry>
+<entry>ECC byte 11</entry>
+<entry>Error correction code byte 2 of the fourth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x34</entry>
+<entry>ECC byte 12</entry>
+<entry>Error correction code byte 0 of the fifth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x35</entry>
+<entry>ECC byte 13</entry>
+<entry>Error correction code byte 1 of the fifth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x36</entry>
+<entry>ECC byte 14</entry>
+<entry>Error correction code byte 2 of the fifth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x37</entry>
+<entry>ECC byte 15</entry>
+<entry>Error correction code byte 0 of the sixt 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x38</entry>
+<entry>ECC byte 16</entry>
+<entry>Error correction code byte 1 of the sixt 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x39</entry>
+<entry>ECC byte 17</entry>
+<entry>Error correction code byte 2 of the sixt 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x3A</entry>
+<entry>ECC byte 18</entry>
+<entry>Error correction code byte 0 of the seventh 256 Bytes of
+data in this page</entry>
+</row>
+<row>
+<entry>0x3B</entry>
+<entry>ECC byte 19</entry>
+<entry>Error correction code byte 1 of the seventh 256 Bytes of
+data in this page</entry>
+</row>
+<row>
+<entry>0x3C</entry>
+<entry>ECC byte 20</entry>
+<entry>Error correction code byte 2 of the seventh 256 Bytes of
+data in this page</entry>
+</row>
+<row>
+<entry>0x3D</entry>
+<entry>ECC byte 21</entry>
+<entry>Error correction code byte 0 of the eighth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x3E</entry>
+<entry>ECC byte 22</entry>
+<entry>Error correction code byte 1 of the eighth 256 Bytes of data
+in this page</entry>
+</row>
+<row>
+<entry>0x3F</entry>
+<entry>ECC byte 23</entry>
+<entry>Error correction code byte 2 of the eighth 256 Bytes of data
+in this page</entry>
+</row>
+</tbody></tgroup></informaltable>
+ </sect2>
+ </sect1>
+ </chapter>
+
+ <chapter id="filesystems">
+ <title>Filesystem support</title>
+ <para>
+ The NAND driver provides all necessary functions for a
+ filesystem via the MTD interface.
+ </para>
+ <para>
+ Filesystems must be aware of the NAND peculiarities and
+ restrictions. One major restrictions of NAND Flash is, that you cannot
+ write as often as you want to a page. The consecutive writes to a page,
+ before erasing it again, are restricted to 1-3 writes, depending on the
+ manufacturers specifications. This applies similar to the spare area.
+ </para>
+ <para>
+ Therefore NAND aware filesystems must either write in page size chunks
+ or hold a writebuffer to collect smaller writes until they sum up to
+ pagesize. Available NAND aware filesystems: JFFS2, YAFFS.
+ </para>
+ <para>
+ The spare area usage to store filesystem data is controlled by
+ the spare area placement functionality which is described in one
+ of the earlier chapters.
+ </para>
+ </chapter>
+ <chapter id="tools">
+ <title>Tools</title>
+ <para>
+ The MTD project provides a couple of helpful tools to handle NAND Flash.
+ <itemizedlist>
+ <listitem><para>flasherase, flasheraseall: Erase and format FLASH partitions</para></listitem>
+ <listitem><para>nandwrite: write filesystem images to NAND FLASH</para></listitem>
+ <listitem><para>nanddump: dump the contents of a NAND FLASH partitions</para></listitem>
+ </itemizedlist>
+ </para>
+ <para>
+ These tools are aware of the NAND restrictions. Please use those tools
+ instead of complaining about errors which are caused by non NAND aware
+ access methods.
+ </para>
+ </chapter>
+
+ <chapter id="defines">
+ <title>Constants</title>
+ <para>
+ This chapter describes the constants which might be relevant for a driver developer.
+ </para>
+ <sect1 id="Chip_option_constants">
+ <title>Chip option constants</title>
+ <sect2 id="Constants_for_chip_id_table">
+ <title>Constants for chip id table</title>
+ <para>
+ These constants are defined in nand.h. They are ored together to describe
+ the chip functionality.
+ <programlisting>
+/* Buswitdh is 16 bit */
+#define NAND_BUSWIDTH_16 0x00000002
+/* Device supports partial programming without padding */
+#define NAND_NO_PADDING 0x00000004
+/* Chip has cache program function */
+#define NAND_CACHEPRG 0x00000008
+/* Chip has copy back function */
+#define NAND_COPYBACK 0x00000010
+/* AND Chip which has 4 banks and a confusing page / block
+ * assignment. See Renesas datasheet for further information */
+#define NAND_IS_AND 0x00000020
+/* Chip has a array of 4 pages which can be read without
+ * additional ready /busy waits */
+#define NAND_4PAGE_ARRAY 0x00000040
+ </programlisting>
+ </para>
+ </sect2>
+ <sect2 id="Constants_for_runtime_options">
+ <title>Constants for runtime options</title>
+ <para>
+ These constants are defined in nand.h. They are ored together to describe
+ the functionality.
+ <programlisting>
+/* The hw ecc generator provides a syndrome instead a ecc value on read
+ * This can only work if we have the ecc bytes directly behind the
+ * data bytes. Applies for DOC and AG-AND Renesas HW Reed Solomon generators */
+#define NAND_HWECC_SYNDROME 0x00020000
+ </programlisting>
+ </para>
+ </sect2>
+ </sect1>
+
+ <sect1 id="EEC_selection_constants">
+ <title>ECC selection constants</title>
+ <para>
+ Use these constants to select the ECC algorithm.
+ <programlisting>
+/* No ECC. Usage is not recommended ! */
+#define NAND_ECC_NONE 0
+/* Software ECC 3 byte ECC per 256 Byte data */
+#define NAND_ECC_SOFT 1
+/* Hardware ECC 3 byte ECC per 256 Byte data */
+#define NAND_ECC_HW3_256 2
+/* Hardware ECC 3 byte ECC per 512 Byte data */
+#define NAND_ECC_HW3_512 3
+/* Hardware ECC 6 byte ECC per 512 Byte data */
+#define NAND_ECC_HW6_512 4
+/* Hardware ECC 6 byte ECC per 512 Byte data */
+#define NAND_ECC_HW8_512 6
+ </programlisting>
+ </para>
+ </sect1>
+
+ <sect1 id="Hardware_control_related_constants">
+ <title>Hardware control related constants</title>
+ <para>
+ These constants describe the requested hardware access function when
+ the boardspecific hardware control function is called
+ <programlisting>
+/* Select the chip by setting nCE to low */
+#define NAND_CTL_SETNCE 1
+/* Deselect the chip by setting nCE to high */
+#define NAND_CTL_CLRNCE 2
+/* Select the command latch by setting CLE to high */
+#define NAND_CTL_SETCLE 3
+/* Deselect the command latch by setting CLE to low */
+#define NAND_CTL_CLRCLE 4
+/* Select the address latch by setting ALE to high */
+#define NAND_CTL_SETALE 5
+/* Deselect the address latch by setting ALE to low */
+#define NAND_CTL_CLRALE 6
+/* Set write protection by setting WP to high. Not used! */
+#define NAND_CTL_SETWP 7
+/* Clear write protection by setting WP to low. Not used! */
+#define NAND_CTL_CLRWP 8
+ </programlisting>
+ </para>
+ </sect1>
+
+ <sect1 id="Bad_block_table_constants">
+ <title>Bad block table related constants</title>
+ <para>
+ These constants describe the options used for bad block
+ table descriptors.
+ <programlisting>
+/* Options for the bad block table descriptors */
+
+/* The number of bits used per block in the bbt on the device */
+#define NAND_BBT_NRBITS_MSK 0x0000000F
+#define NAND_BBT_1BIT 0x00000001
+#define NAND_BBT_2BIT 0x00000002
+#define NAND_BBT_4BIT 0x00000004
+#define NAND_BBT_8BIT 0x00000008
+/* The bad block table is in the last good block of the device */
+#define NAND_BBT_LASTBLOCK 0x00000010
+/* The bbt is at the given page, else we must scan for the bbt */
+#define NAND_BBT_ABSPAGE 0x00000020
+/* bbt is stored per chip on multichip devices */
+#define NAND_BBT_PERCHIP 0x00000080
+/* bbt has a version counter at offset veroffs */
+#define NAND_BBT_VERSION 0x00000100
+/* Create a bbt if none axists */
+#define NAND_BBT_CREATE 0x00000200
+/* Write bbt if necessary */
+#define NAND_BBT_WRITE 0x00001000
+/* Read and write back block contents when writing bbt */
+#define NAND_BBT_SAVECONTENT 0x00002000
+ </programlisting>
+ </para>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="structs">
+ <title>Structures</title>
+ <para>
+ This chapter contains the autogenerated documentation of the structures which are
+ used in the NAND driver and might be relevant for a driver developer. Each
+ struct member has a short description which is marked with an [XXX] identifier.
+ See the chapter "Documentation hints" for an explanation.
+ </para>
+!Iinclude/linux/mtd/nand.h
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the NAND kernel API functions
+ which are exported. Each function has a short description which is marked with an [XXX] identifier.
+ See the chapter "Documentation hints" for an explanation.
+ </para>
+!Edrivers/mtd/nand/nand_base.c
+!Edrivers/mtd/nand/nand_bbt.c
+!Edrivers/mtd/nand/nand_ecc.c
+ </chapter>
+
+ <chapter id="intfunctions">
+ <title>Internal Functions Provided</title>
+ <para>
+ This chapter contains the autogenerated documentation of the NAND driver internal functions.
+ Each function has a short description which is marked with an [XXX] identifier.
+ See the chapter "Documentation hints" for an explanation.
+ The functions marked with [DEFAULT] might be relevant for a board driver developer.
+ </para>
+!Idrivers/mtd/nand/nand_base.c
+!Idrivers/mtd/nand/nand_bbt.c
+<!-- No internal functions for kernel-doc:
+X!Idrivers/mtd/nand/nand_ecc.c
+-->
+ </chapter>
+
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The following people have contributed to the NAND driver:
+ <orderedlist>
+ <listitem><para>Steven J. Hill<email>sjhill@realitydiluted.com</email></para></listitem>
+ <listitem><para>David Woodhouse<email>dwmw2@infradead.org</email></para></listitem>
+ <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+ </orderedlist>
+ A lot of users have provided bugfixes, improvements and helping hands for testing.
+ Thanks a lot.
+ </para>
+ <para>
+ The following people have contributed to this document:
+ <orderedlist>
+ <listitem><para>Thomas Gleixner<email>tglx@linutronix.de</email></para></listitem>
+ </orderedlist>
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/networking.tmpl b/Documentation/DocBook/networking.tmpl
new file mode 100644
index 000000000..29df25016
--- /dev/null
+++ b/Documentation/DocBook/networking.tmpl
@@ -0,0 +1,111 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="LinuxNetworking">
+ <bookinfo>
+ <title>Linux Networking and Network Devices APIs</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="netcore">
+ <title>Linux Networking</title>
+ <sect1><title>Networking Base Types</title>
+!Iinclude/linux/net.h
+ </sect1>
+ <sect1><title>Socket Buffer Functions</title>
+!Iinclude/linux/skbuff.h
+!Iinclude/net/sock.h
+!Enet/socket.c
+!Enet/core/skbuff.c
+!Enet/core/sock.c
+!Enet/core/datagram.c
+!Enet/core/stream.c
+ </sect1>
+ <sect1><title>Socket Filter</title>
+!Enet/core/filter.c
+ </sect1>
+ <sect1><title>Generic Network Statistics</title>
+!Iinclude/uapi/linux/gen_stats.h
+!Enet/core/gen_stats.c
+!Enet/core/gen_estimator.c
+ </sect1>
+ <sect1><title>SUN RPC subsystem</title>
+<!-- The !D functionality is not perfect, garbage has to be protected by comments
+!Dnet/sunrpc/sunrpc_syms.c
+-->
+!Enet/sunrpc/xdr.c
+!Enet/sunrpc/svc_xprt.c
+!Enet/sunrpc/xprt.c
+!Enet/sunrpc/sched.c
+!Enet/sunrpc/socklib.c
+!Enet/sunrpc/stats.c
+!Enet/sunrpc/rpc_pipe.c
+!Enet/sunrpc/rpcb_clnt.c
+!Enet/sunrpc/clnt.c
+ </sect1>
+ <sect1><title>WiMAX</title>
+!Enet/wimax/op-msg.c
+!Enet/wimax/op-reset.c
+!Enet/wimax/op-rfkill.c
+!Enet/wimax/stack.c
+!Iinclude/net/wimax.h
+!Iinclude/uapi/linux/wimax.h
+ </sect1>
+ </chapter>
+
+ <chapter id="netdev">
+ <title>Network device support</title>
+ <sect1><title>Driver Support</title>
+!Enet/core/dev.c
+!Enet/ethernet/eth.c
+!Enet/sched/sch_generic.c
+!Iinclude/linux/etherdevice.h
+!Iinclude/linux/netdevice.h
+ </sect1>
+ <sect1><title>PHY Support</title>
+!Edrivers/net/phy/phy.c
+!Idrivers/net/phy/phy.c
+!Edrivers/net/phy/phy_device.c
+!Idrivers/net/phy/phy_device.c
+!Edrivers/net/phy/mdio_bus.c
+!Idrivers/net/phy/mdio_bus.c
+ </sect1>
+<!-- FIXME: Removed for now since no structured comments in source
+ <sect1><title>Wireless</title>
+X!Enet/core/wireless.c
+ </sect1>
+-->
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/rapidio.tmpl b/Documentation/DocBook/rapidio.tmpl
new file mode 100644
index 000000000..50479360d
--- /dev/null
+++ b/Documentation/DocBook/rapidio.tmpl
@@ -0,0 +1,158 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
+ <!ENTITY rapidio SYSTEM "rapidio.xml">
+ ]>
+
+<book id="RapidIO-Guide">
+ <bookinfo>
+ <title>RapidIO Subsystem Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Matt</firstname>
+ <surname>Porter</surname>
+ <affiliation>
+ <address>
+ <email>mporter@kernel.crashing.org</email>
+ <email>mporter@mvista.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2005</year>
+ <holder>MontaVista Software, Inc.</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ RapidIO is a high speed switched fabric interconnect with
+ features aimed at the embedded market. RapidIO provides
+ support for memory-mapped I/O as well as message-based
+ transactions over the switched fabric network. RapidIO has
+ a standardized discovery mechanism not unlike the PCI bus
+ standard that allows simple detection of devices in a
+ network.
+ </para>
+ <para>
+ This documentation is provided for developers intending
+ to support RapidIO on new architectures, write new drivers,
+ or to understand the subsystem internals.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs and Limitations</title>
+
+ <sect1 id="known_bugs">
+ <title>Bugs</title>
+ <para>None. ;)</para>
+ </sect1>
+ <sect1 id="Limitations">
+ <title>Limitations</title>
+ <para>
+ <orderedlist>
+ <listitem><para>Access/management of RapidIO memory regions is not supported</para></listitem>
+ <listitem><para>Multiple host enumeration is not supported</para></listitem>
+ </orderedlist>
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="drivers">
+ <title>RapidIO driver interface</title>
+ <para>
+ Drivers are provided a set of calls in order
+ to interface with the subsystem to gather info
+ on devices, request/map memory region resources,
+ and manage mailboxes/doorbells.
+ </para>
+ <sect1 id="Functions">
+ <title>Functions</title>
+!Iinclude/linux/rio_drv.h
+!Edrivers/rapidio/rio-driver.c
+!Edrivers/rapidio/rio.c
+ </sect1>
+ </chapter>
+
+ <chapter id="internals">
+ <title>Internals</title>
+
+ <para>
+ This chapter contains the autogenerated documentation of the RapidIO
+ subsystem.
+ </para>
+
+ <sect1 id="Structures"><title>Structures</title>
+!Iinclude/linux/rio.h
+ </sect1>
+ <sect1 id="Enumeration_and_Discovery"><title>Enumeration and Discovery</title>
+!Idrivers/rapidio/rio-scan.c
+ </sect1>
+ <sect1 id="Driver_functionality"><title>Driver functionality</title>
+!Idrivers/rapidio/rio.c
+!Idrivers/rapidio/rio-access.c
+ </sect1>
+ <sect1 id="Device_model_support"><title>Device model support</title>
+!Idrivers/rapidio/rio-driver.c
+ </sect1>
+ <sect1 id="Sysfs_support"><title>Sysfs support</title>
+!Idrivers/rapidio/rio-sysfs.c
+ </sect1>
+ <sect1 id="PPC32_support"><title>PPC32 support</title>
+!Iarch/powerpc/sysdev/fsl_rio.c
+ </sect1>
+ </chapter>
+
+ <chapter id="credits">
+ <title>Credits</title>
+ <para>
+ The following people have contributed to the RapidIO
+ subsystem directly or indirectly:
+ <orderedlist>
+ <listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
+ <listitem><para>Randy Vinson<email>rvinson@mvista.com</email></para></listitem>
+ <listitem><para>Dan Malek<email>dan@embeddedalley.com</email></para></listitem>
+ </orderedlist>
+ </para>
+ <para>
+ The following people have contributed to this document:
+ <orderedlist>
+ <listitem><para>Matt Porter<email>mporter@kernel.crashing.org</email></para></listitem>
+ </orderedlist>
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/regulator.tmpl b/Documentation/DocBook/regulator.tmpl
new file mode 100644
index 000000000..3b08a085d
--- /dev/null
+++ b/Documentation/DocBook/regulator.tmpl
@@ -0,0 +1,304 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="regulator-api">
+ <bookinfo>
+ <title>Voltage and current regulator API</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Liam</firstname>
+ <surname>Girdwood</surname>
+ <affiliation>
+ <address>
+ <email>lrg@slimlogic.co.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>Mark</firstname>
+ <surname>Brown</surname>
+ <affiliation>
+ <orgname>Wolfson Microelectronics</orgname>
+ <address>
+ <email>broonie@opensource.wolfsonmicro.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2007-2008</year>
+ <holder>Wolfson Microelectronics</holder>
+ </copyright>
+ <copyright>
+ <year>2008</year>
+ <holder>Liam Girdwood</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ This framework is designed to provide a standard kernel
+ interface to control voltage and current regulators.
+ </para>
+ <para>
+ The intention is to allow systems to dynamically control
+ regulator power output in order to save power and prolong
+ battery life. This applies to both voltage regulators (where
+ voltage output is controllable) and current sinks (where current
+ limit is controllable).
+ </para>
+ <para>
+ Note that additional (and currently more complete) documentation
+ is available in the Linux kernel source under
+ <filename>Documentation/power/regulator</filename>.
+ </para>
+
+ <sect1 id="glossary">
+ <title>Glossary</title>
+ <para>
+ The regulator API uses a number of terms which may not be
+ familiar:
+ </para>
+ <glossary>
+
+ <glossentry>
+ <glossterm>Regulator</glossterm>
+ <glossdef>
+ <para>
+ Electronic device that supplies power to other devices. Most
+ regulators can enable and disable their output and some can also
+ control their output voltage or current.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry>
+ <glossterm>Consumer</glossterm>
+ <glossdef>
+ <para>
+ Electronic device which consumes power provided by a regulator.
+ These may either be static, requiring only a fixed supply, or
+ dynamic, requiring active management of the regulator at
+ runtime.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry>
+ <glossterm>Power Domain</glossterm>
+ <glossdef>
+ <para>
+ The electronic circuit supplied by a given regulator, including
+ the regulator and all consumer devices. The configuration of
+ the regulator is shared between all the components in the
+ circuit.
+ </para>
+ </glossdef>
+ </glossentry>
+
+ <glossentry>
+ <glossterm>Power Management Integrated Circuit</glossterm>
+ <acronym>PMIC</acronym>
+ <glossdef>
+ <para>
+ An IC which contains numerous regulators and often also other
+ subsystems. In an embedded system the primary PMIC is often
+ equivalent to a combination of the PSU and southbridge in a
+ desktop system.
+ </para>
+ </glossdef>
+ </glossentry>
+ </glossary>
+ </sect1>
+ </chapter>
+
+ <chapter id="consumer">
+ <title>Consumer driver interface</title>
+ <para>
+ This offers a similar API to the kernel clock framework.
+ Consumer drivers use <link
+ linkend='API-regulator-get'>get</link> and <link
+ linkend='API-regulator-put'>put</link> operations to acquire and
+ release regulators. Functions are
+ provided to <link linkend='API-regulator-enable'>enable</link>
+ and <link linkend='API-regulator-disable'>disable</link> the
+ regulator and to get and set the runtime parameters of the
+ regulator.
+ </para>
+ <para>
+ When requesting regulators consumers use symbolic names for their
+ supplies, such as "Vcc", which are mapped into actual regulator
+ devices by the machine interface.
+ </para>
+ <para>
+ A stub version of this API is provided when the regulator
+ framework is not in use in order to minimise the need to use
+ ifdefs.
+ </para>
+
+ <sect1 id="consumer-enable">
+ <title>Enabling and disabling</title>
+ <para>
+ The regulator API provides reference counted enabling and
+ disabling of regulators. Consumer devices use the <function><link
+ linkend='API-regulator-enable'>regulator_enable</link></function>
+ and <function><link
+ linkend='API-regulator-disable'>regulator_disable</link>
+ </function> functions to enable and disable regulators. Calls
+ to the two functions must be balanced.
+ </para>
+ <para>
+ Note that since multiple consumers may be using a regulator and
+ machine constraints may not allow the regulator to be disabled
+ there is no guarantee that calling
+ <function>regulator_disable</function> will actually cause the
+ supply provided by the regulator to be disabled. Consumer
+ drivers should assume that the regulator may be enabled at all
+ times.
+ </para>
+ </sect1>
+
+ <sect1 id="consumer-config">
+ <title>Configuration</title>
+ <para>
+ Some consumer devices may need to be able to dynamically
+ configure their supplies. For example, MMC drivers may need to
+ select the correct operating voltage for their cards. This may
+ be done while the regulator is enabled or disabled.
+ </para>
+ <para>
+ The <function><link
+ linkend='API-regulator-set-voltage'>regulator_set_voltage</link>
+ </function> and <function><link
+ linkend='API-regulator-set-current-limit'
+ >regulator_set_current_limit</link>
+ </function> functions provide the primary interface for this.
+ Both take ranges of voltages and currents, supporting drivers
+ that do not require a specific value (eg, CPU frequency scaling
+ normally permits the CPU to use a wider range of supply
+ voltages at lower frequencies but does not require that the
+ supply voltage be lowered). Where an exact value is required
+ both minimum and maximum values should be identical.
+ </para>
+ </sect1>
+
+ <sect1 id="consumer-callback">
+ <title>Callbacks</title>
+ <para>
+ Callbacks may also be <link
+ linkend='API-regulator-register-notifier'>registered</link>
+ for events such as regulation failures.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="driver">
+ <title>Regulator driver interface</title>
+ <para>
+ Drivers for regulator chips <link
+ linkend='API-regulator-register'>register</link> the regulators
+ with the regulator core, providing operations structures to the
+ core. A <link
+ linkend='API-regulator-notifier-call-chain'>notifier</link> interface
+ allows error conditions to be reported to the core.
+ </para>
+ <para>
+ Registration should be triggered by explicit setup done by the
+ platform, supplying a <link
+ linkend='API-struct-regulator-init-data'>struct
+ regulator_init_data</link> for the regulator containing
+ <link linkend='machine-constraint'>constraint</link> and
+ <link linkend='machine-supply'>supply</link> information.
+ </para>
+ </chapter>
+
+ <chapter id="machine">
+ <title>Machine interface</title>
+ <para>
+ This interface provides a way to define how regulators are
+ connected to consumers on a given system and what the valid
+ operating parameters are for the system.
+ </para>
+
+ <sect1 id="machine-supply">
+ <title>Supplies</title>
+ <para>
+ Regulator supplies are specified using <link
+ linkend='API-struct-regulator-consumer-supply'>struct
+ regulator_consumer_supply</link>. This is done at
+ <link linkend='driver'>driver registration
+ time</link> as part of the machine constraints.
+ </para>
+ </sect1>
+
+ <sect1 id="machine-constraint">
+ <title>Constraints</title>
+ <para>
+ As well as defining the connections the machine interface
+ also provides constraints defining the operations that
+ clients are allowed to perform and the parameters that may be
+ set. This is required since generally regulator devices will
+ offer more flexibility than it is safe to use on a given
+ system, for example supporting higher supply voltages than the
+ consumers are rated for.
+ </para>
+ <para>
+ This is done at <link linkend='driver'>driver
+ registration time</link> by providing a <link
+ linkend='API-struct-regulation-constraints'>struct
+ regulation_constraints</link>.
+ </para>
+ <para>
+ The constraints may also specify an initial configuration for the
+ regulator in the constraints, which is particularly useful for
+ use with static consumers.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="api">
+ <title>API reference</title>
+ <para>
+ Due to limitations of the kernel documentation framework and the
+ existing layout of the source code the entire regulator API is
+ documented here.
+ </para>
+!Iinclude/linux/regulator/consumer.h
+!Iinclude/linux/regulator/machine.h
+!Iinclude/linux/regulator/driver.h
+!Edrivers/regulator/core.c
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/s390-drivers.tmpl b/Documentation/DocBook/s390-drivers.tmpl
new file mode 100644
index 000000000..95bfc12e5
--- /dev/null
+++ b/Documentation/DocBook/s390-drivers.tmpl
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="s390drivers">
+ <bookinfo>
+ <title>Writing s390 channel device drivers</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Cornelia</firstname>
+ <surname>Huck</surname>
+ <affiliation>
+ <address>
+ <email>cornelia.huck@de.ibm.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2007</year>
+ <holder>IBM Corp.</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ This document describes the interfaces available for device drivers that
+ drive s390 based channel attached I/O devices. This includes interfaces for
+ interaction with the hardware and interfaces for interacting with the
+ common driver core. Those interfaces are provided by the s390 common I/O
+ layer.
+ </para>
+ <para>
+ The document assumes a familarity with the technical terms associated
+ with the s390 channel I/O architecture. For a description of this
+ architecture, please refer to the "z/Architecture: Principles of
+ Operation", IBM publication no. SA22-7832.
+ </para>
+ <para>
+ While most I/O devices on a s390 system are typically driven through the
+ channel I/O mechanism described here, there are various other methods
+ (like the diag interface). These are out of the scope of this document.
+ </para>
+ <para>
+ Some additional information can also be found in the kernel source
+ under Documentation/s390/driver-model.txt.
+ </para>
+ </chapter>
+ <chapter id="ccw">
+ <title>The ccw bus</title>
+ <para>
+ The ccw bus typically contains the majority of devices available to
+ a s390 system. Named after the channel command word (ccw), the basic
+ command structure used to address its devices, the ccw bus contains
+ so-called channel attached devices. They are addressed via I/O
+ subchannels, visible on the css bus. A device driver for
+ channel-attached devices, however, will never interact with the
+ subchannel directly, but only via the I/O device on the ccw bus,
+ the ccw device.
+ </para>
+ <sect1 id="channelIO">
+ <title>I/O functions for channel-attached devices</title>
+ <para>
+ Some hardware structures have been translated into C structures for use
+ by the common I/O layer and device drivers. For more information on
+ the hardware structures represented here, please consult the Principles
+ of Operation.
+ </para>
+!Iarch/s390/include/asm/cio.h
+ </sect1>
+ <sect1 id="ccwdev">
+ <title>ccw devices</title>
+ <para>
+ Devices that want to initiate channel I/O need to attach to the ccw bus.
+ Interaction with the driver core is done via the common I/O layer, which
+ provides the abstractions of ccw devices and ccw device drivers.
+ </para>
+ <para>
+ The functions that initiate or terminate channel I/O all act upon a
+ ccw device structure. Device drivers must not bypass those functions
+ or strange side effects may happen.
+ </para>
+!Iarch/s390/include/asm/ccwdev.h
+!Edrivers/s390/cio/device.c
+!Edrivers/s390/cio/device_ops.c
+ </sect1>
+ <sect1 id="cmf">
+ <title>The channel-measurement facility</title>
+ <para>
+ The channel-measurement facility provides a means to collect
+ measurement data which is made available by the channel subsystem
+ for each channel attached device.
+ </para>
+!Iarch/s390/include/asm/cmb.h
+!Edrivers/s390/cio/cmf.c
+ </sect1>
+ </chapter>
+
+ <chapter id="ccwgroup">
+ <title>The ccwgroup bus</title>
+ <para>
+ The ccwgroup bus only contains artificial devices, created by the user.
+ Many networking devices (e.g. qeth) are in fact composed of several
+ ccw devices (like read, write and data channel for qeth). The
+ ccwgroup bus provides a mechanism to create a meta-device which
+ contains those ccw devices as slave devices and can be associated
+ with the netdevice.
+ </para>
+ <sect1 id="ccwgroupdevices">
+ <title>ccw group devices</title>
+!Iarch/s390/include/asm/ccwgroup.h
+!Edrivers/s390/cio/ccwgroup.c
+ </sect1>
+ </chapter>
+
+ <chapter id="genericinterfaces">
+ <title>Generic interfaces</title>
+ <para>
+ Some interfaces are available to other drivers that do not necessarily
+ have anything to do with the busses described above, but still are
+ indirectly using basic infrastructure in the common I/O layer.
+ One example is the support for adapter interrupts.
+ </para>
+!Edrivers/s390/cio/airq.c
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/scsi.tmpl b/Documentation/DocBook/scsi.tmpl
new file mode 100644
index 000000000..324b53494
--- /dev/null
+++ b/Documentation/DocBook/scsi.tmpl
@@ -0,0 +1,409 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="scsimid">
+ <bookinfo>
+ <title>SCSI Interfaces Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>James</firstname>
+ <surname>Bottomley</surname>
+ <affiliation>
+ <address>
+ <email>James.Bottomley@hansenpartnership.com</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <author>
+ <firstname>Rob</firstname>
+ <surname>Landley</surname>
+ <affiliation>
+ <address>
+ <email>rob@landley.net</email>
+ </address>
+ </affiliation>
+ </author>
+
+ </authorgroup>
+
+ <copyright>
+ <year>2007</year>
+ <holder>Linux Foundation</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <sect1 id="protocol_vs_bus">
+ <title>Protocol vs bus</title>
+ <para>
+ Once upon a time, the Small Computer Systems Interface defined both
+ a parallel I/O bus and a data protocol to connect a wide variety of
+ peripherals (disk drives, tape drives, modems, printers, scanners,
+ optical drives, test equipment, and medical devices) to a host
+ computer.
+ </para>
+ <para>
+ Although the old parallel (fast/wide/ultra) SCSI bus has largely
+ fallen out of use, the SCSI command set is more widely used than ever
+ to communicate with devices over a number of different busses.
+ </para>
+ <para>
+ The <ulink url='http://www.t10.org/scsi-3.htm'>SCSI protocol</ulink>
+ is a big-endian peer-to-peer packet based protocol. SCSI commands
+ are 6, 10, 12, or 16 bytes long, often followed by an associated data
+ payload.
+ </para>
+ <para>
+ SCSI commands can be transported over just about any kind of bus, and
+ are the default protocol for storage devices attached to USB, SATA,
+ SAS, Fibre Channel, FireWire, and ATAPI devices. SCSI packets are
+ also commonly exchanged over Infiniband,
+ <ulink url='http://i2o.shadowconnect.com/faq.php'>I20</ulink>, TCP/IP
+ (<ulink url='http://en.wikipedia.org/wiki/ISCSI'>iSCSI</ulink>), even
+ <ulink url='http://cyberelk.net/tim/parport/parscsi.html'>Parallel
+ ports</ulink>.
+ </para>
+ </sect1>
+ <sect1 id="subsystem_design">
+ <title>Design of the Linux SCSI subsystem</title>
+ <para>
+ The SCSI subsystem uses a three layer design, with upper, mid, and low
+ layers. Every operation involving the SCSI subsystem (such as reading
+ a sector from a disk) uses one driver at each of the 3 levels: one
+ upper layer driver, one lower layer driver, and the SCSI midlayer.
+ </para>
+ <para>
+ The SCSI upper layer provides the interface between userspace and the
+ kernel, in the form of block and char device nodes for I/O and
+ ioctl(). The SCSI lower layer contains drivers for specific hardware
+ devices.
+ </para>
+ <para>
+ In between is the SCSI mid-layer, analogous to a network routing
+ layer such as the IPv4 stack. The SCSI mid-layer routes a packet
+ based data protocol between the upper layer's /dev nodes and the
+ corresponding devices in the lower layer. It manages command queues,
+ provides error handling and power management functions, and responds
+ to ioctl() requests.
+ </para>
+ </sect1>
+ </chapter>
+
+ <chapter id="upper_layer">
+ <title>SCSI upper layer</title>
+ <para>
+ The upper layer supports the user-kernel interface by providing
+ device nodes.
+ </para>
+ <sect1 id="sd">
+ <title>sd (SCSI Disk)</title>
+ <para>sd (sd_mod.o)</para>
+<!-- !Idrivers/scsi/sd.c -->
+ </sect1>
+ <sect1 id="sr">
+ <title>sr (SCSI CD-ROM)</title>
+ <para>sr (sr_mod.o)</para>
+ </sect1>
+ <sect1 id="st">
+ <title>st (SCSI Tape)</title>
+ <para>st (st.o)</para>
+ </sect1>
+ <sect1 id="sg">
+ <title>sg (SCSI Generic)</title>
+ <para>sg (sg.o)</para>
+ </sect1>
+ <sect1 id="ch">
+ <title>ch (SCSI Media Changer)</title>
+ <para>ch (ch.c)</para>
+ </sect1>
+ </chapter>
+
+ <chapter id="mid_layer">
+ <title>SCSI mid layer</title>
+
+ <sect1 id="midlayer_implementation">
+ <title>SCSI midlayer implementation</title>
+ <sect2 id="scsi_device.h">
+ <title>include/scsi/scsi_device.h</title>
+ <para>
+ </para>
+!Iinclude/scsi/scsi_device.h
+ </sect2>
+
+ <sect2 id="scsi.c">
+ <title>drivers/scsi/scsi.c</title>
+ <para>Main file for the SCSI midlayer.</para>
+!Edrivers/scsi/scsi.c
+ </sect2>
+ <sect2 id="scsicam.c">
+ <title>drivers/scsi/scsicam.c</title>
+ <para>
+ <ulink url='http://www.t10.org/ftp/t10/drafts/cam/cam-r12b.pdf'>SCSI
+ Common Access Method</ulink> support functions, for use with
+ HDIO_GETGEO, etc.
+ </para>
+!Edrivers/scsi/scsicam.c
+ </sect2>
+ <sect2 id="scsi_error.c">
+ <title>drivers/scsi/scsi_error.c</title>
+ <para>Common SCSI error/timeout handling routines.</para>
+!Edrivers/scsi/scsi_error.c
+ </sect2>
+ <sect2 id="scsi_devinfo.c">
+ <title>drivers/scsi/scsi_devinfo.c</title>
+ <para>
+ Manage scsi_dev_info_list, which tracks blacklisted and whitelisted
+ devices.
+ </para>
+!Idrivers/scsi/scsi_devinfo.c
+ </sect2>
+ <sect2 id="scsi_ioctl.c">
+ <title>drivers/scsi/scsi_ioctl.c</title>
+ <para>
+ Handle ioctl() calls for SCSI devices.
+ </para>
+!Edrivers/scsi/scsi_ioctl.c
+ </sect2>
+ <sect2 id="scsi_lib.c">
+ <title>drivers/scsi/scsi_lib.c</title>
+ <para>
+ SCSI queuing library.
+ </para>
+!Edrivers/scsi/scsi_lib.c
+ </sect2>
+ <sect2 id="scsi_lib_dma.c">
+ <title>drivers/scsi/scsi_lib_dma.c</title>
+ <para>
+ SCSI library functions depending on DMA
+ (map and unmap scatter-gather lists).
+ </para>
+!Edrivers/scsi/scsi_lib_dma.c
+ </sect2>
+ <sect2 id="scsi_module.c">
+ <title>drivers/scsi/scsi_module.c</title>
+ <para>
+ The file drivers/scsi/scsi_module.c contains legacy support for
+ old-style host templates. It should never be used by any new driver.
+ </para>
+ </sect2>
+ <sect2 id="scsi_proc.c">
+ <title>drivers/scsi/scsi_proc.c</title>
+ <para>
+ The functions in this file provide an interface between
+ the PROC file system and the SCSI device drivers
+ It is mainly used for debugging, statistics and to pass
+ information directly to the lowlevel driver.
+
+ I.E. plumbing to manage /proc/scsi/*
+ </para>
+!Idrivers/scsi/scsi_proc.c
+ </sect2>
+ <sect2 id="scsi_netlink.c">
+ <title>drivers/scsi/scsi_netlink.c</title>
+ <para>
+ Infrastructure to provide async events from transports to userspace
+ via netlink, using a single NETLINK_SCSITRANSPORT protocol for all
+ transports.
+
+ See <ulink url='http://marc.info/?l=linux-scsi&amp;m=115507374832500&amp;w=2'>the
+ original patch submission</ulink> for more details.
+ </para>
+!Idrivers/scsi/scsi_netlink.c
+ </sect2>
+ <sect2 id="scsi_scan.c">
+ <title>drivers/scsi/scsi_scan.c</title>
+ <para>
+ Scan a host to determine which (if any) devices are attached.
+
+ The general scanning/probing algorithm is as follows, exceptions are
+ made to it depending on device specific flags, compilation options,
+ and global variable (boot or module load time) settings.
+
+ A specific LUN is scanned via an INQUIRY command; if the LUN has a
+ device attached, a scsi_device is allocated and setup for it.
+
+ For every id of every channel on the given host, start by scanning
+ LUN 0. Skip hosts that don't respond at all to a scan of LUN 0.
+ Otherwise, if LUN 0 has a device attached, allocate and setup a
+ scsi_device for it. If target is SCSI-3 or up, issue a REPORT LUN,
+ and scan all of the LUNs returned by the REPORT LUN; else,
+ sequentially scan LUNs up until some maximum is reached, or a LUN is
+ seen that cannot have a device attached to it.
+ </para>
+!Idrivers/scsi/scsi_scan.c
+ </sect2>
+ <sect2 id="scsi_sysctl.c">
+ <title>drivers/scsi/scsi_sysctl.c</title>
+ <para>
+ Set up the sysctl entry: "/dev/scsi/logging_level"
+ (DEV_SCSI_LOGGING_LEVEL) which sets/returns scsi_logging_level.
+ </para>
+ </sect2>
+ <sect2 id="scsi_sysfs.c">
+ <title>drivers/scsi/scsi_sysfs.c</title>
+ <para>
+ SCSI sysfs interface routines.
+ </para>
+!Edrivers/scsi/scsi_sysfs.c
+ </sect2>
+ <sect2 id="hosts.c">
+ <title>drivers/scsi/hosts.c</title>
+ <para>
+ mid to lowlevel SCSI driver interface
+ </para>
+!Edrivers/scsi/hosts.c
+ </sect2>
+ <sect2 id="constants.c">
+ <title>drivers/scsi/constants.c</title>
+ <para>
+ mid to lowlevel SCSI driver interface
+ </para>
+!Edrivers/scsi/constants.c
+ </sect2>
+ </sect1>
+
+ <sect1 id="Transport_classes">
+ <title>Transport classes</title>
+ <para>
+ Transport classes are service libraries for drivers in the SCSI
+ lower layer, which expose transport attributes in sysfs.
+ </para>
+ <sect2 id="Fibre_Channel_transport">
+ <title>Fibre Channel transport</title>
+ <para>
+ The file drivers/scsi/scsi_transport_fc.c defines transport attributes
+ for Fibre Channel.
+ </para>
+!Edrivers/scsi/scsi_transport_fc.c
+ </sect2>
+ <sect2 id="iSCSI_transport">
+ <title>iSCSI transport class</title>
+ <para>
+ The file drivers/scsi/scsi_transport_iscsi.c defines transport
+ attributes for the iSCSI class, which sends SCSI packets over TCP/IP
+ connections.
+ </para>
+!Edrivers/scsi/scsi_transport_iscsi.c
+ </sect2>
+ <sect2 id="SAS_transport">
+ <title>Serial Attached SCSI (SAS) transport class</title>
+ <para>
+ The file drivers/scsi/scsi_transport_sas.c defines transport
+ attributes for Serial Attached SCSI, a variant of SATA aimed at
+ large high-end systems.
+ </para>
+ <para>
+ The SAS transport class contains common code to deal with SAS HBAs,
+ an aproximated representation of SAS topologies in the driver model,
+ and various sysfs attributes to expose these topologies and management
+ interfaces to userspace.
+ </para>
+ <para>
+ In addition to the basic SCSI core objects this transport class
+ introduces two additional intermediate objects: The SAS PHY
+ as represented by struct sas_phy defines an "outgoing" PHY on
+ a SAS HBA or Expander, and the SAS remote PHY represented by
+ struct sas_rphy defines an "incoming" PHY on a SAS Expander or
+ end device. Note that this is purely a software concept, the
+ underlying hardware for a PHY and a remote PHY is the exactly
+ the same.
+ </para>
+ <para>
+ There is no concept of a SAS port in this code, users can see
+ what PHYs form a wide port based on the port_identifier attribute,
+ which is the same for all PHYs in a port.
+ </para>
+!Edrivers/scsi/scsi_transport_sas.c
+ </sect2>
+ <sect2 id="SATA_transport">
+ <title>SATA transport class</title>
+ <para>
+ The SATA transport is handled by libata, which has its own book of
+ documentation in this directory.
+ </para>
+ </sect2>
+ <sect2 id="SPI_transport">
+ <title>Parallel SCSI (SPI) transport class</title>
+ <para>
+ The file drivers/scsi/scsi_transport_spi.c defines transport
+ attributes for traditional (fast/wide/ultra) SCSI busses.
+ </para>
+!Edrivers/scsi/scsi_transport_spi.c
+ </sect2>
+ <sect2 id="SRP_transport">
+ <title>SCSI RDMA (SRP) transport class</title>
+ <para>
+ The file drivers/scsi/scsi_transport_srp.c defines transport
+ attributes for SCSI over Remote Direct Memory Access.
+ </para>
+!Edrivers/scsi/scsi_transport_srp.c
+ </sect2>
+ </sect1>
+
+ </chapter>
+
+ <chapter id="lower_layer">
+ <title>SCSI lower layer</title>
+ <sect1 id="hba_drivers">
+ <title>Host Bus Adapter transport types</title>
+ <para>
+ Many modern device controllers use the SCSI command set as a protocol to
+ communicate with their devices through many different types of physical
+ connections.
+ </para>
+ <para>
+ In SCSI language a bus capable of carrying SCSI commands is
+ called a "transport", and a controller connecting to such a bus is
+ called a "host bus adapter" (HBA).
+ </para>
+ <sect2 id="scsi_debug.c">
+ <title>Debug transport</title>
+ <para>
+ The file drivers/scsi/scsi_debug.c simulates a host adapter with a
+ variable number of disks (or disk like devices) attached, sharing a
+ common amount of RAM. Does a lot of checking to make sure that we are
+ not getting blocks mixed up, and panics the kernel if anything out of
+ the ordinary is seen.
+ </para>
+ <para>
+ To be more realistic, the simulated devices have the transport
+ attributes of SAS disks.
+ </para>
+ <para>
+ For documentation see
+ <ulink url='http://sg.danny.cz/sg/sdebug26.html'>http://sg.danny.cz/sg/sdebug26.html</ulink>
+ </para>
+<!-- !Edrivers/scsi/scsi_debug.c -->
+ </sect2>
+ <sect2 id="todo">
+ <title>todo</title>
+ <para>Parallel (fast/wide/ultra) SCSI, USB, SATA,
+ SAS, Fibre Channel, FireWire, ATAPI devices, Infiniband,
+ I20, iSCSI, Parallel ports, netlink...
+ </para>
+ </sect2>
+ </sect1>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/sh.tmpl b/Documentation/DocBook/sh.tmpl
new file mode 100644
index 000000000..4a38f604f
--- /dev/null
+++ b/Documentation/DocBook/sh.tmpl
@@ -0,0 +1,105 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="sh-drivers">
+ <bookinfo>
+ <title>SuperH Interfaces Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Paul</firstname>
+ <surname>Mundt</surname>
+ <affiliation>
+ <address>
+ <email>lethal@linux-sh.org</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2008-2010</year>
+ <holder>Paul Mundt</holder>
+ </copyright>
+ <copyright>
+ <year>2008-2010</year>
+ <holder>Renesas Technology Corp.</holder>
+ </copyright>
+ <copyright>
+ <year>2010</year>
+ <holder>Renesas Electronics Corp.</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2 as published by the Free Software Foundation.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="mm">
+ <title>Memory Management</title>
+ <sect1 id="sh4">
+ <title>SH-4</title>
+ <sect2 id="sq">
+ <title>Store Queue API</title>
+!Earch/sh/kernel/cpu/sh4/sq.c
+ </sect2>
+ </sect1>
+ <sect1 id="sh5">
+ <title>SH-5</title>
+ <sect2 id="tlb">
+ <title>TLB Interfaces</title>
+!Iarch/sh/mm/tlb-sh5.c
+!Iarch/sh/include/asm/tlb_64.h
+ </sect2>
+ </sect1>
+ </chapter>
+ <chapter id="mach">
+ <title>Machine Specific Interfaces</title>
+ <sect1 id="dreamcast">
+ <title>mach-dreamcast</title>
+!Iarch/sh/boards/mach-dreamcast/rtc.c
+ </sect1>
+ <sect1 id="x3proto">
+ <title>mach-x3proto</title>
+!Earch/sh/boards/mach-x3proto/ilsel.c
+ </sect1>
+ </chapter>
+ <chapter id="busses">
+ <title>Busses</title>
+ <sect1 id="superhyway">
+ <title>SuperHyway</title>
+!Edrivers/sh/superhyway/superhyway.c
+ </sect1>
+
+ <sect1 id="maple">
+ <title>Maple</title>
+!Edrivers/sh/maple/maple.c
+ </sect1>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/stylesheet.xsl b/Documentation/DocBook/stylesheet.xsl
new file mode 100644
index 000000000..85b252751
--- /dev/null
+++ b/Documentation/DocBook/stylesheet.xsl
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<stylesheet xmlns="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<param name="chunk.quietly">1</param>
+<param name="funcsynopsis.style">ansi</param>
+<param name="funcsynopsis.tabular.threshold">80</param>
+<param name="callout.graphics">0</param>
+<!-- <param name="paper.type">A4</param> -->
+<param name="generate.section.toc.level">2</param>
+<param name="use.id.as.filename">1</param>
+</stylesheet>
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644
index 000000000..b57a9ede3
--- /dev/null
+++ b/Documentation/DocBook/tracepoint.tmpl
@@ -0,0 +1,112 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Tracepoints">
+ <bookinfo>
+ <title>The Linux Kernel Tracepoint API</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Jason</firstname>
+ <surname>Baron</surname>
+ <affiliation>
+ <address>
+ <email>jbaron@redhat.com</email>
+ </address>
+ </affiliation>
+ </author>
+ <author>
+ <firstname>William</firstname>
+ <surname>Cohen</surname>
+ <affiliation>
+ <address>
+ <email>wcohen@redhat.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ Tracepoints are static probe points that are located in strategic points
+ throughout the kernel. 'Probes' register/unregister with tracepoints
+ via a callback mechanism. The 'probes' are strictly typed functions that
+ are passed a unique set of parameters defined by each tracepoint.
+ </para>
+
+ <para>
+ From this simple callback mechanism, 'probes' can be used to profile, debug,
+ and understand kernel behavior. There are a number of tools that provide a
+ framework for using 'probes'. These tools include Systemtap, ftrace, and
+ LTTng.
+ </para>
+
+ <para>
+ Tracepoints are defined in a number of header files via various macros. Thus,
+ the purpose of this document is to provide a clear accounting of the available
+ tracepoints. The intention is to understand not only what tracepoints are
+ available but also to understand where future tracepoints might be added.
+ </para>
+
+ <para>
+ The API presented has functions of the form:
+ <function>trace_tracepointname(function parameters)</function>. These are the
+ tracepoints callbacks that are found throughout the code. Registering and
+ unregistering probes with these callback sites is covered in the
+ <filename>Documentation/trace/*</filename> directory.
+ </para>
+ </chapter>
+
+ <chapter id="irq">
+ <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+ </chapter>
+
+ <chapter id="signal">
+ <title>SIGNAL</title>
+!Iinclude/trace/events/signal.h
+ </chapter>
+
+ <chapter id="block">
+ <title>Block IO</title>
+!Iinclude/trace/events/block.h
+ </chapter>
+
+ <chapter id="workqueue">
+ <title>Workqueue</title>
+!Iinclude/trace/events/workqueue.h
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
new file mode 100644
index 000000000..cd0e452df
--- /dev/null
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -0,0 +1,1050 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" []>
+
+<book id="index">
+<bookinfo>
+<title>The Userspace I/O HOWTO</title>
+
+<author>
+ <firstname>Hans-Jürgen</firstname>
+ <surname>Koch</surname>
+ <authorblurb><para>Linux developer, Linutronix</para></authorblurb>
+ <affiliation>
+ <orgname>
+ <ulink url="http://www.linutronix.de">Linutronix</ulink>
+ </orgname>
+
+ <address>
+ <email>hjk@hansjkoch.de</email>
+ </address>
+ </affiliation>
+</author>
+
+<copyright>
+ <year>2006-2008</year>
+ <holder>Hans-Jürgen Koch.</holder>
+</copyright>
+<copyright>
+ <year>2009</year>
+ <holder>Red Hat Inc, Michael S. Tsirkin (mst@redhat.com)</holder>
+</copyright>
+
+<legalnotice>
+<para>
+This documentation is Free Software licensed under the terms of the
+GPL version 2.
+</para>
+</legalnotice>
+
+<pubdate>2006-12-11</pubdate>
+
+<abstract>
+ <para>This HOWTO describes concept and usage of Linux kernel's
+ Userspace I/O system.</para>
+</abstract>
+
+<revhistory>
+ <revision>
+ <revnumber>0.9</revnumber>
+ <date>2009-07-16</date>
+ <authorinitials>mst</authorinitials>
+ <revremark>Added generic pci driver
+ </revremark>
+ </revision>
+ <revision>
+ <revnumber>0.8</revnumber>
+ <date>2008-12-24</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Added name attributes in mem and portio sysfs directories.
+ </revremark>
+ </revision>
+ <revision>
+ <revnumber>0.7</revnumber>
+ <date>2008-12-23</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Added generic platform drivers and offset attribute.</revremark>
+ </revision>
+ <revision>
+ <revnumber>0.6</revnumber>
+ <date>2008-12-05</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Added description of portio sysfs attributes.</revremark>
+ </revision>
+ <revision>
+ <revnumber>0.5</revnumber>
+ <date>2008-05-22</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Added description of write() function.</revremark>
+ </revision>
+ <revision>
+ <revnumber>0.4</revnumber>
+ <date>2007-11-26</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Removed section about uio_dummy.</revremark>
+ </revision>
+ <revision>
+ <revnumber>0.3</revnumber>
+ <date>2007-04-29</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Added section about userspace drivers.</revremark>
+ </revision>
+ <revision>
+ <revnumber>0.2</revnumber>
+ <date>2007-02-13</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>Update after multiple mappings were added.</revremark>
+ </revision>
+ <revision>
+ <revnumber>0.1</revnumber>
+ <date>2006-12-11</date>
+ <authorinitials>hjk</authorinitials>
+ <revremark>First draft.</revremark>
+ </revision>
+</revhistory>
+</bookinfo>
+
+<chapter id="aboutthisdoc">
+<?dbhtml filename="aboutthis.html"?>
+<title>About this document</title>
+
+<sect1 id="translations">
+<?dbhtml filename="translations.html"?>
+<title>Translations</title>
+
+<para>If you know of any translations for this document, or you are
+interested in translating it, please email me
+<email>hjk@hansjkoch.de</email>.
+</para>
+</sect1>
+
+<sect1 id="preface">
+<title>Preface</title>
+ <para>
+ For many types of devices, creating a Linux kernel driver is
+ overkill. All that is really needed is some way to handle an
+ interrupt and provide access to the memory space of the
+ device. The logic of controlling the device does not
+ necessarily have to be within the kernel, as the device does
+ not need to take advantage of any of other resources that the
+ kernel provides. One such common class of devices that are
+ like this are for industrial I/O cards.
+ </para>
+ <para>
+ To address this situation, the userspace I/O system (UIO) was
+ designed. For typical industrial I/O cards, only a very small
+ kernel module is needed. The main part of the driver will run in
+ user space. This simplifies development and reduces the risk of
+ serious bugs within a kernel module.
+ </para>
+ <para>
+ Please note that UIO is not an universal driver interface. Devices
+ that are already handled well by other kernel subsystems (like
+ networking or serial or USB) are no candidates for an UIO driver.
+ Hardware that is ideally suited for an UIO driver fulfills all of
+ the following:
+ </para>
+<itemizedlist>
+<listitem>
+ <para>The device has memory that can be mapped. The device can be
+ controlled completely by writing to this memory.</para>
+</listitem>
+<listitem>
+ <para>The device usually generates interrupts.</para>
+</listitem>
+<listitem>
+ <para>The device does not fit into one of the standard kernel
+ subsystems.</para>
+</listitem>
+</itemizedlist>
+</sect1>
+
+<sect1 id="thanks">
+<title>Acknowledgments</title>
+ <para>I'd like to thank Thomas Gleixner and Benedikt Spranger of
+ Linutronix, who have not only written most of the UIO code, but also
+ helped greatly writing this HOWTO by giving me all kinds of background
+ information.</para>
+</sect1>
+
+<sect1 id="feedback">
+<title>Feedback</title>
+ <para>Find something wrong with this document? (Or perhaps something
+ right?) I would love to hear from you. Please email me at
+ <email>hjk@hansjkoch.de</email>.</para>
+</sect1>
+</chapter>
+
+<chapter id="about">
+<?dbhtml filename="about.html"?>
+<title>About UIO</title>
+
+<para>If you use UIO for your card's driver, here's what you get:</para>
+
+<itemizedlist>
+<listitem>
+ <para>only one small kernel module to write and maintain.</para>
+</listitem>
+<listitem>
+ <para>develop the main part of your driver in user space,
+ with all the tools and libraries you're used to.</para>
+</listitem>
+<listitem>
+ <para>bugs in your driver won't crash the kernel.</para>
+</listitem>
+<listitem>
+ <para>updates of your driver can take place without recompiling
+ the kernel.</para>
+</listitem>
+</itemizedlist>
+
+<sect1 id="how_uio_works">
+<title>How UIO works</title>
+ <para>
+ Each UIO device is accessed through a device file and several
+ sysfs attribute files. The device file will be called
+ <filename>/dev/uio0</filename> for the first device, and
+ <filename>/dev/uio1</filename>, <filename>/dev/uio2</filename>
+ and so on for subsequent devices.
+ </para>
+
+ <para><filename>/dev/uioX</filename> is used to access the
+ address space of the card. Just use
+ <function>mmap()</function> to access registers or RAM
+ locations of your card.
+ </para>
+
+ <para>
+ Interrupts are handled by reading from
+ <filename>/dev/uioX</filename>. A blocking
+ <function>read()</function> from
+ <filename>/dev/uioX</filename> will return as soon as an
+ interrupt occurs. You can also use
+ <function>select()</function> on
+ <filename>/dev/uioX</filename> to wait for an interrupt. The
+ integer value read from <filename>/dev/uioX</filename>
+ represents the total interrupt count. You can use this number
+ to figure out if you missed some interrupts.
+ </para>
+ <para>
+ For some hardware that has more than one interrupt source internally,
+ but not separate IRQ mask and status registers, there might be
+ situations where userspace cannot determine what the interrupt source
+ was if the kernel handler disables them by writing to the chip's IRQ
+ register. In such a case, the kernel has to disable the IRQ completely
+ to leave the chip's register untouched. Now the userspace part can
+ determine the cause of the interrupt, but it cannot re-enable
+ interrupts. Another cornercase is chips where re-enabling interrupts
+ is a read-modify-write operation to a combined IRQ status/acknowledge
+ register. This would be racy if a new interrupt occurred
+ simultaneously.
+ </para>
+ <para>
+ To address these problems, UIO also implements a write() function. It
+ is normally not used and can be ignored for hardware that has only a
+ single interrupt source or has separate IRQ mask and status registers.
+ If you need it, however, a write to <filename>/dev/uioX</filename>
+ will call the <function>irqcontrol()</function> function implemented
+ by the driver. You have to write a 32-bit value that is usually either
+ 0 or 1 to disable or enable interrupts. If a driver does not implement
+ <function>irqcontrol()</function>, <function>write()</function> will
+ return with <varname>-ENOSYS</varname>.
+ </para>
+
+ <para>
+ To handle interrupts properly, your custom kernel module can
+ provide its own interrupt handler. It will automatically be
+ called by the built-in handler.
+ </para>
+
+ <para>
+ For cards that don't generate interrupts but need to be
+ polled, there is the possibility to set up a timer that
+ triggers the interrupt handler at configurable time intervals.
+ This interrupt simulation is done by calling
+ <function>uio_event_notify()</function>
+ from the timer's event handler.
+ </para>
+
+ <para>
+ Each driver provides attributes that are used to read or write
+ variables. These attributes are accessible through sysfs
+ files. A custom kernel driver module can add its own
+ attributes to the device owned by the uio driver, but not added
+ to the UIO device itself at this time. This might change in the
+ future if it would be found to be useful.
+ </para>
+
+ <para>
+ The following standard attributes are provided by the UIO
+ framework:
+ </para>
+<itemizedlist>
+<listitem>
+ <para>
+ <filename>name</filename>: The name of your device. It is
+ recommended to use the name of your kernel module for this.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>version</filename>: A version string defined by your
+ driver. This allows the user space part of your driver to deal
+ with different versions of the kernel module.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>event</filename>: The total number of interrupts
+ handled by the driver since the last time the device node was
+ read.
+ </para>
+</listitem>
+</itemizedlist>
+<para>
+ These attributes appear under the
+ <filename>/sys/class/uio/uioX</filename> directory. Please
+ note that this directory might be a symlink, and not a real
+ directory. Any userspace code that accesses it must be able
+ to handle this.
+</para>
+<para>
+ Each UIO device can make one or more memory regions available for
+ memory mapping. This is necessary because some industrial I/O cards
+ require access to more than one PCI memory region in a driver.
+</para>
+<para>
+ Each mapping has its own directory in sysfs, the first mapping
+ appears as <filename>/sys/class/uio/uioX/maps/map0/</filename>.
+ Subsequent mappings create directories <filename>map1/</filename>,
+ <filename>map2/</filename>, and so on. These directories will only
+ appear if the size of the mapping is not 0.
+</para>
+<para>
+ Each <filename>mapX/</filename> directory contains four read-only files
+ that show attributes of the memory:
+</para>
+<itemizedlist>
+<listitem>
+ <para>
+ <filename>name</filename>: A string identifier for this mapping. This
+ is optional, the string can be empty. Drivers can set this to make it
+ easier for userspace to find the correct mapping.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>addr</filename>: The address of memory that can be mapped.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>size</filename>: The size, in bytes, of the memory
+ pointed to by addr.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>offset</filename>: The offset, in bytes, that has to be
+ added to the pointer returned by <function>mmap()</function> to get
+ to the actual device memory. This is important if the device's memory
+ is not page aligned. Remember that pointers returned by
+ <function>mmap()</function> are always page aligned, so it is good
+ style to always add this offset.
+ </para>
+</listitem>
+</itemizedlist>
+
+<para>
+ From userspace, the different mappings are distinguished by adjusting
+ the <varname>offset</varname> parameter of the
+ <function>mmap()</function> call. To map the memory of mapping N, you
+ have to use N times the page size as your offset:
+</para>
+<programlisting format="linespecific">
+offset = N * getpagesize();
+</programlisting>
+
+<para>
+ Sometimes there is hardware with memory-like regions that can not be
+ mapped with the technique described here, but there are still ways to
+ access them from userspace. The most common example are x86 ioports.
+ On x86 systems, userspace can access these ioports using
+ <function>ioperm()</function>, <function>iopl()</function>,
+ <function>inb()</function>, <function>outb()</function>, and similar
+ functions.
+</para>
+<para>
+ Since these ioport regions can not be mapped, they will not appear under
+ <filename>/sys/class/uio/uioX/maps/</filename> like the normal memory
+ described above. Without information about the port regions a hardware
+ has to offer, it becomes difficult for the userspace part of the
+ driver to find out which ports belong to which UIO device.
+</para>
+<para>
+ To address this situation, the new directory
+ <filename>/sys/class/uio/uioX/portio/</filename> was added. It only
+ exists if the driver wants to pass information about one or more port
+ regions to userspace. If that is the case, subdirectories named
+ <filename>port0</filename>, <filename>port1</filename>, and so on,
+ will appear underneath
+ <filename>/sys/class/uio/uioX/portio/</filename>.
+</para>
+<para>
+ Each <filename>portX/</filename> directory contains four read-only
+ files that show name, start, size, and type of the port region:
+</para>
+<itemizedlist>
+<listitem>
+ <para>
+ <filename>name</filename>: A string identifier for this port region.
+ The string is optional and can be empty. Drivers can set it to make it
+ easier for userspace to find a certain port region.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>start</filename>: The first port of this region.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>size</filename>: The number of ports in this region.
+ </para>
+</listitem>
+<listitem>
+ <para>
+ <filename>porttype</filename>: A string describing the type of port.
+ </para>
+</listitem>
+</itemizedlist>
+
+
+</sect1>
+</chapter>
+
+<chapter id="custom_kernel_module" xreflabel="Writing your own kernel module">
+<?dbhtml filename="custom_kernel_module.html"?>
+<title>Writing your own kernel module</title>
+ <para>
+ Please have a look at <filename>uio_cif.c</filename> as an
+ example. The following paragraphs explain the different
+ sections of this file.
+ </para>
+
+<sect1 id="uio_info">
+<title>struct uio_info</title>
+ <para>
+ This structure tells the framework the details of your driver,
+ Some of the members are required, others are optional.
+ </para>
+
+<itemizedlist>
+<listitem><para>
+<varname>const char *name</varname>: Required. The name of your driver as
+it will appear in sysfs. I recommend using the name of your module for this.
+</para></listitem>
+
+<listitem><para>
+<varname>const char *version</varname>: Required. This string appears in
+<filename>/sys/class/uio/uioX/version</filename>.
+</para></listitem>
+
+<listitem><para>
+<varname>struct uio_mem mem[ MAX_UIO_MAPS ]</varname>: Required if you
+have memory that can be mapped with <function>mmap()</function>. For each
+mapping you need to fill one of the <varname>uio_mem</varname> structures.
+See the description below for details.
+</para></listitem>
+
+<listitem><para>
+<varname>struct uio_port port[ MAX_UIO_PORTS_REGIONS ]</varname>: Required
+if you want to pass information about ioports to userspace. For each port
+region you need to fill one of the <varname>uio_port</varname> structures.
+See the description below for details.
+</para></listitem>
+
+<listitem><para>
+<varname>long irq</varname>: Required. If your hardware generates an
+interrupt, it's your modules task to determine the irq number during
+initialization. If you don't have a hardware generated interrupt but
+want to trigger the interrupt handler in some other way, set
+<varname>irq</varname> to <varname>UIO_IRQ_CUSTOM</varname>.
+If you had no interrupt at all, you could set
+<varname>irq</varname> to <varname>UIO_IRQ_NONE</varname>, though this
+rarely makes sense.
+</para></listitem>
+
+<listitem><para>
+<varname>unsigned long irq_flags</varname>: Required if you've set
+<varname>irq</varname> to a hardware interrupt number. The flags given
+here will be used in the call to <function>request_irq()</function>.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*mmap)(struct uio_info *info, struct vm_area_struct
+*vma)</varname>: Optional. If you need a special
+<function>mmap()</function> function, you can set it here. If this
+pointer is not NULL, your <function>mmap()</function> will be called
+instead of the built-in one.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*open)(struct uio_info *info, struct inode *inode)
+</varname>: Optional. You might want to have your own
+<function>open()</function>, e.g. to enable interrupts only when your
+device is actually used.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*release)(struct uio_info *info, struct inode *inode)
+</varname>: Optional. If you define your own
+<function>open()</function>, you will probably also want a custom
+<function>release()</function> function.
+</para></listitem>
+
+<listitem><para>
+<varname>int (*irqcontrol)(struct uio_info *info, s32 irq_on)
+</varname>: Optional. If you need to be able to enable or disable
+interrupts from userspace by writing to <filename>/dev/uioX</filename>,
+you can implement this function. The parameter <varname>irq_on</varname>
+will be 0 to disable interrupts and 1 to enable them.
+</para></listitem>
+</itemizedlist>
+
+<para>
+Usually, your device will have one or more memory regions that can be mapped
+to user space. For each region, you have to set up a
+<varname>struct uio_mem</varname> in the <varname>mem[]</varname> array.
+Here's a description of the fields of <varname>struct uio_mem</varname>:
+</para>
+
+<itemizedlist>
+<listitem><para>
+<varname>const char *name</varname>: Optional. Set this to help identify
+the memory region, it will show up in the corresponding sysfs node.
+</para></listitem>
+
+<listitem><para>
+<varname>int memtype</varname>: Required if the mapping is used. Set this to
+<varname>UIO_MEM_PHYS</varname> if you you have physical memory on your
+card to be mapped. Use <varname>UIO_MEM_LOGICAL</varname> for logical
+memory (e.g. allocated with <function>kmalloc()</function>). There's also
+<varname>UIO_MEM_VIRTUAL</varname> for virtual memory.
+</para></listitem>
+
+<listitem><para>
+<varname>phys_addr_t addr</varname>: Required if the mapping is used.
+Fill in the address of your memory block. This address is the one that
+appears in sysfs.
+</para></listitem>
+
+<listitem><para>
+<varname>resource_size_t size</varname>: Fill in the size of the
+memory block that <varname>addr</varname> points to. If <varname>size</varname>
+is zero, the mapping is considered unused. Note that you
+<emphasis>must</emphasis> initialize <varname>size</varname> with zero for
+all unused mappings.
+</para></listitem>
+
+<listitem><para>
+<varname>void *internal_addr</varname>: If you have to access this memory
+region from within your kernel module, you will want to map it internally by
+using something like <function>ioremap()</function>. Addresses
+returned by this function cannot be mapped to user space, so you must not
+store it in <varname>addr</varname>. Use <varname>internal_addr</varname>
+instead to remember such an address.
+</para></listitem>
+</itemizedlist>
+
+<para>
+Please do not touch the <varname>map</varname> element of
+<varname>struct uio_mem</varname>! It is used by the UIO framework
+to set up sysfs files for this mapping. Simply leave it alone.
+</para>
+
+<para>
+Sometimes, your device can have one or more port regions which can not be
+mapped to userspace. But if there are other possibilities for userspace to
+access these ports, it makes sense to make information about the ports
+available in sysfs. For each region, you have to set up a
+<varname>struct uio_port</varname> in the <varname>port[]</varname> array.
+Here's a description of the fields of <varname>struct uio_port</varname>:
+</para>
+
+<itemizedlist>
+<listitem><para>
+<varname>char *porttype</varname>: Required. Set this to one of the predefined
+constants. Use <varname>UIO_PORT_X86</varname> for the ioports found in x86
+architectures.
+</para></listitem>
+
+<listitem><para>
+<varname>unsigned long start</varname>: Required if the port region is used.
+Fill in the number of the first port of this region.
+</para></listitem>
+
+<listitem><para>
+<varname>unsigned long size</varname>: Fill in the number of ports in this
+region. If <varname>size</varname> is zero, the region is considered unused.
+Note that you <emphasis>must</emphasis> initialize <varname>size</varname>
+with zero for all unused regions.
+</para></listitem>
+</itemizedlist>
+
+<para>
+Please do not touch the <varname>portio</varname> element of
+<varname>struct uio_port</varname>! It is used internally by the UIO
+framework to set up sysfs files for this region. Simply leave it alone.
+</para>
+
+</sect1>
+
+<sect1 id="adding_irq_handler">
+<title>Adding an interrupt handler</title>
+ <para>
+ What you need to do in your interrupt handler depends on your
+ hardware and on how you want to handle it. You should try to
+ keep the amount of code in your kernel interrupt handler low.
+ If your hardware requires no action that you
+ <emphasis>have</emphasis> to perform after each interrupt,
+ then your handler can be empty.</para> <para>If, on the other
+ hand, your hardware <emphasis>needs</emphasis> some action to
+ be performed after each interrupt, then you
+ <emphasis>must</emphasis> do it in your kernel module. Note
+ that you cannot rely on the userspace part of your driver. Your
+ userspace program can terminate at any time, possibly leaving
+ your hardware in a state where proper interrupt handling is
+ still required.
+ </para>
+
+ <para>
+ There might also be applications where you want to read data
+ from your hardware at each interrupt and buffer it in a piece
+ of kernel memory you've allocated for that purpose. With this
+ technique you could avoid loss of data if your userspace
+ program misses an interrupt.
+ </para>
+
+ <para>
+ A note on shared interrupts: Your driver should support
+ interrupt sharing whenever this is possible. It is possible if
+ and only if your driver can detect whether your hardware has
+ triggered the interrupt or not. This is usually done by looking
+ at an interrupt status register. If your driver sees that the
+ IRQ bit is actually set, it will perform its actions, and the
+ handler returns IRQ_HANDLED. If the driver detects that it was
+ not your hardware that caused the interrupt, it will do nothing
+ and return IRQ_NONE, allowing the kernel to call the next
+ possible interrupt handler.
+ </para>
+
+ <para>
+ If you decide not to support shared interrupts, your card
+ won't work in computers with no free interrupts. As this
+ frequently happens on the PC platform, you can save yourself a
+ lot of trouble by supporting interrupt sharing.
+ </para>
+</sect1>
+
+<sect1 id="using_uio_pdrv">
+<title>Using uio_pdrv for platform devices</title>
+ <para>
+ In many cases, UIO drivers for platform devices can be handled in a
+ generic way. In the same place where you define your
+ <varname>struct platform_device</varname>, you simply also implement
+ your interrupt handler and fill your
+ <varname>struct uio_info</varname>. A pointer to this
+ <varname>struct uio_info</varname> is then used as
+ <varname>platform_data</varname> for your platform device.
+ </para>
+ <para>
+ You also need to set up an array of <varname>struct resource</varname>
+ containing addresses and sizes of your memory mappings. This
+ information is passed to the driver using the
+ <varname>.resource</varname> and <varname>.num_resources</varname>
+ elements of <varname>struct platform_device</varname>.
+ </para>
+ <para>
+ You now have to set the <varname>.name</varname> element of
+ <varname>struct platform_device</varname> to
+ <varname>"uio_pdrv"</varname> to use the generic UIO platform device
+ driver. This driver will fill the <varname>mem[]</varname> array
+ according to the resources given, and register the device.
+ </para>
+ <para>
+ The advantage of this approach is that you only have to edit a file
+ you need to edit anyway. You do not have to create an extra driver.
+ </para>
+</sect1>
+
+<sect1 id="using_uio_pdrv_genirq">
+<title>Using uio_pdrv_genirq for platform devices</title>
+ <para>
+ Especially in embedded devices, you frequently find chips where the
+ irq pin is tied to its own dedicated interrupt line. In such cases,
+ where you can be really sure the interrupt is not shared, we can take
+ the concept of <varname>uio_pdrv</varname> one step further and use a
+ generic interrupt handler. That's what
+ <varname>uio_pdrv_genirq</varname> does.
+ </para>
+ <para>
+ The setup for this driver is the same as described above for
+ <varname>uio_pdrv</varname>, except that you do not implement an
+ interrupt handler. The <varname>.handler</varname> element of
+ <varname>struct uio_info</varname> must remain
+ <varname>NULL</varname>. The <varname>.irq_flags</varname> element
+ must not contain <varname>IRQF_SHARED</varname>.
+ </para>
+ <para>
+ You will set the <varname>.name</varname> element of
+ <varname>struct platform_device</varname> to
+ <varname>"uio_pdrv_genirq"</varname> to use this driver.
+ </para>
+ <para>
+ The generic interrupt handler of <varname>uio_pdrv_genirq</varname>
+ will simply disable the interrupt line using
+ <function>disable_irq_nosync()</function>. After doing its work,
+ userspace can reenable the interrupt by writing 0x00000001 to the UIO
+ device file. The driver already implements an
+ <function>irq_control()</function> to make this possible, you must not
+ implement your own.
+ </para>
+ <para>
+ Using <varname>uio_pdrv_genirq</varname> not only saves a few lines of
+ interrupt handler code. You also do not need to know anything about
+ the chip's internal registers to create the kernel part of the driver.
+ All you need to know is the irq number of the pin the chip is
+ connected to.
+ </para>
+</sect1>
+
+<sect1 id="using-uio_dmem_genirq">
+<title>Using uio_dmem_genirq for platform devices</title>
+ <para>
+ In addition to statically allocated memory ranges, they may also be
+ a desire to use dynamically allocated regions in a user space driver.
+ In particular, being able to access memory made available through the
+ dma-mapping API, may be particularly useful. The
+ <varname>uio_dmem_genirq</varname> driver provides a way to accomplish
+ this.
+ </para>
+ <para>
+ This driver is used in a similar manner to the
+ <varname>"uio_pdrv_genirq"</varname> driver with respect to interrupt
+ configuration and handling.
+ </para>
+ <para>
+ Set the <varname>.name</varname> element of
+ <varname>struct platform_device</varname> to
+ <varname>"uio_dmem_genirq"</varname> to use this driver.
+ </para>
+ <para>
+ When using this driver, fill in the <varname>.platform_data</varname>
+ element of <varname>struct platform_device</varname>, which is of type
+ <varname>struct uio_dmem_genirq_pdata</varname> and which contains the
+ following elements:
+ </para>
+ <itemizedlist>
+ <listitem><para><varname>struct uio_info uioinfo</varname>: The same
+ structure used as the <varname>uio_pdrv_genirq</varname> platform
+ data</para></listitem>
+ <listitem><para><varname>unsigned int *dynamic_region_sizes</varname>:
+ Pointer to list of sizes of dynamic memory regions to be mapped into
+ user space.
+ </para></listitem>
+ <listitem><para><varname>unsigned int num_dynamic_regions</varname>:
+ Number of elements in <varname>dynamic_region_sizes</varname> array.
+ </para></listitem>
+ </itemizedlist>
+ <para>
+ The dynamic regions defined in the platform data will be appended to
+ the <varname> mem[] </varname> array after the platform device
+ resources, which implies that the total number of static and dynamic
+ memory regions cannot exceed <varname>MAX_UIO_MAPS</varname>.
+ </para>
+ <para>
+ The dynamic memory regions will be allocated when the UIO device file,
+ <varname>/dev/uioX</varname> is opened.
+ Similar to static memory resources, the memory region information for
+ dynamic regions is then visible via sysfs at
+ <varname>/sys/class/uio/uioX/maps/mapY/*</varname>.
+ The dynamic memory regions will be freed when the UIO device file is
+ closed. When no processes are holding the device file open, the address
+ returned to userspace is ~0.
+ </para>
+</sect1>
+
+</chapter>
+
+<chapter id="userspace_driver" xreflabel="Writing a driver in user space">
+<?dbhtml filename="userspace_driver.html"?>
+<title>Writing a driver in userspace</title>
+ <para>
+ Once you have a working kernel module for your hardware, you can
+ write the userspace part of your driver. You don't need any special
+ libraries, your driver can be written in any reasonable language,
+ you can use floating point numbers and so on. In short, you can
+ use all the tools and libraries you'd normally use for writing a
+ userspace application.
+ </para>
+
+<sect1 id="getting_uio_information">
+<title>Getting information about your UIO device</title>
+ <para>
+ Information about all UIO devices is available in sysfs. The
+ first thing you should do in your driver is check
+ <varname>name</varname> and <varname>version</varname> to
+ make sure your talking to the right device and that its kernel
+ driver has the version you expect.
+ </para>
+ <para>
+ You should also make sure that the memory mapping you need
+ exists and has the size you expect.
+ </para>
+ <para>
+ There is a tool called <varname>lsuio</varname> that lists
+ UIO devices and their attributes. It is available here:
+ </para>
+ <para>
+ <ulink url="http://www.osadl.org/projects/downloads/UIO/user/">
+ http://www.osadl.org/projects/downloads/UIO/user/</ulink>
+ </para>
+ <para>
+ With <varname>lsuio</varname> you can quickly check if your
+ kernel module is loaded and which attributes it exports.
+ Have a look at the manpage for details.
+ </para>
+ <para>
+ The source code of <varname>lsuio</varname> can serve as an
+ example for getting information about an UIO device.
+ The file <filename>uio_helper.c</filename> contains a lot of
+ functions you could use in your userspace driver code.
+ </para>
+</sect1>
+
+<sect1 id="mmap_device_memory">
+<title>mmap() device memory</title>
+ <para>
+ After you made sure you've got the right device with the
+ memory mappings you need, all you have to do is to call
+ <function>mmap()</function> to map the device's memory
+ to userspace.
+ </para>
+ <para>
+ The parameter <varname>offset</varname> of the
+ <function>mmap()</function> call has a special meaning
+ for UIO devices: It is used to select which mapping of
+ your device you want to map. To map the memory of
+ mapping N, you have to use N times the page size as
+ your offset:
+ </para>
+<programlisting format="linespecific">
+ offset = N * getpagesize();
+</programlisting>
+ <para>
+ N starts from zero, so if you've got only one memory
+ range to map, set <varname>offset = 0</varname>.
+ A drawback of this technique is that memory is always
+ mapped beginning with its start address.
+ </para>
+</sect1>
+
+<sect1 id="wait_for_interrupts">
+<title>Waiting for interrupts</title>
+ <para>
+ After you successfully mapped your devices memory, you
+ can access it like an ordinary array. Usually, you will
+ perform some initialization. After that, your hardware
+ starts working and will generate an interrupt as soon
+ as it's finished, has some data available, or needs your
+ attention because an error occurred.
+ </para>
+ <para>
+ <filename>/dev/uioX</filename> is a read-only file. A
+ <function>read()</function> will always block until an
+ interrupt occurs. There is only one legal value for the
+ <varname>count</varname> parameter of
+ <function>read()</function>, and that is the size of a
+ signed 32 bit integer (4). Any other value for
+ <varname>count</varname> causes <function>read()</function>
+ to fail. The signed 32 bit integer read is the interrupt
+ count of your device. If the value is one more than the value
+ you read the last time, everything is OK. If the difference
+ is greater than one, you missed interrupts.
+ </para>
+ <para>
+ You can also use <function>select()</function> on
+ <filename>/dev/uioX</filename>.
+ </para>
+</sect1>
+
+</chapter>
+
+<chapter id="uio_pci_generic" xreflabel="Using Generic driver for PCI cards">
+<?dbhtml filename="uio_pci_generic.html"?>
+<title>Generic PCI UIO driver</title>
+ <para>
+ The generic driver is a kernel module named uio_pci_generic.
+ It can work with any device compliant to PCI 2.3 (circa 2002) and
+ any compliant PCI Express device. Using this, you only need to
+ write the userspace driver, removing the need to write
+ a hardware-specific kernel module.
+ </para>
+
+<sect1 id="uio_pci_generic_binding">
+<title>Making the driver recognize the device</title>
+ <para>
+Since the driver does not declare any device ids, it will not get loaded
+automatically and will not automatically bind to any devices, you must load it
+and allocate id to the driver yourself. For example:
+ <programlisting>
+ modprobe uio_pci_generic
+ echo &quot;8086 10f5&quot; &gt; /sys/bus/pci/drivers/uio_pci_generic/new_id
+ </programlisting>
+ </para>
+ <para>
+If there already is a hardware specific kernel driver for your device, the
+generic driver still won't bind to it, in this case if you want to use the
+generic driver (why would you?) you'll have to manually unbind the hardware
+specific driver and bind the generic driver, like this:
+ <programlisting>
+ echo -n 0000:00:19.0 &gt; /sys/bus/pci/drivers/e1000e/unbind
+ echo -n 0000:00:19.0 &gt; /sys/bus/pci/drivers/uio_pci_generic/bind
+ </programlisting>
+ </para>
+ <para>
+You can verify that the device has been bound to the driver
+by looking for it in sysfs, for example like the following:
+ <programlisting>
+ ls -l /sys/bus/pci/devices/0000:00:19.0/driver
+ </programlisting>
+Which if successful should print
+ <programlisting>
+ .../0000:00:19.0/driver -&gt; ../../../bus/pci/drivers/uio_pci_generic
+ </programlisting>
+Note that the generic driver will not bind to old PCI 2.2 devices.
+If binding the device failed, run the following command:
+ <programlisting>
+ dmesg
+ </programlisting>
+and look in the output for failure reasons
+ </para>
+</sect1>
+
+<sect1 id="uio_pci_generic_internals">
+<title>Things to know about uio_pci_generic</title>
+ <para>
+Interrupts are handled using the Interrupt Disable bit in the PCI command
+register and Interrupt Status bit in the PCI status register. All devices
+compliant to PCI 2.3 (circa 2002) and all compliant PCI Express devices should
+support these bits. uio_pci_generic detects this support, and won't bind to
+devices which do not support the Interrupt Disable Bit in the command register.
+ </para>
+ <para>
+On each interrupt, uio_pci_generic sets the Interrupt Disable bit.
+This prevents the device from generating further interrupts
+until the bit is cleared. The userspace driver should clear this
+bit before blocking and waiting for more interrupts.
+ </para>
+</sect1>
+<sect1 id="uio_pci_generic_userspace">
+<title>Writing userspace driver using uio_pci_generic</title>
+ <para>
+Userspace driver can use pci sysfs interface, or the
+libpci libray that wraps it, to talk to the device and to
+re-enable interrupts by writing to the command register.
+ </para>
+</sect1>
+<sect1 id="uio_pci_generic_example">
+<title>Example code using uio_pci_generic</title>
+ <para>
+Here is some sample userspace driver code using uio_pci_generic:
+<programlisting>
+#include &lt;stdlib.h&gt;
+#include &lt;stdio.h&gt;
+#include &lt;unistd.h&gt;
+#include &lt;sys/types.h&gt;
+#include &lt;sys/stat.h&gt;
+#include &lt;fcntl.h&gt;
+#include &lt;errno.h&gt;
+
+int main()
+{
+ int uiofd;
+ int configfd;
+ int err;
+ int i;
+ unsigned icount;
+ unsigned char command_high;
+
+ uiofd = open(&quot;/dev/uio0&quot;, O_RDONLY);
+ if (uiofd &lt; 0) {
+ perror(&quot;uio open:&quot;);
+ return errno;
+ }
+ configfd = open(&quot;/sys/class/uio/uio0/device/config&quot;, O_RDWR);
+ if (configfd &lt; 0) {
+ perror(&quot;config open:&quot;);
+ return errno;
+ }
+
+ /* Read and cache command value */
+ err = pread(configfd, &amp;command_high, 1, 5);
+ if (err != 1) {
+ perror(&quot;command config read:&quot;);
+ return errno;
+ }
+ command_high &amp;= ~0x4;
+
+ for(i = 0;; ++i) {
+ /* Print out a message, for debugging. */
+ if (i == 0)
+ fprintf(stderr, &quot;Started uio test driver.\n&quot;);
+ else
+ fprintf(stderr, &quot;Interrupts: %d\n&quot;, icount);
+
+ /****************************************/
+ /* Here we got an interrupt from the
+ device. Do something to it. */
+ /****************************************/
+
+ /* Re-enable interrupts. */
+ err = pwrite(configfd, &amp;command_high, 1, 5);
+ if (err != 1) {
+ perror(&quot;config write:&quot;);
+ break;
+ }
+
+ /* Wait for next interrupt. */
+ err = read(uiofd, &amp;icount, 4);
+ if (err != 4) {
+ perror(&quot;uio read:&quot;);
+ break;
+ }
+
+ }
+ return errno;
+}
+
+</programlisting>
+ </para>
+</sect1>
+
+</chapter>
+
+<appendix id="app1">
+<title>Further information</title>
+<itemizedlist>
+ <listitem><para>
+ <ulink url="http://www.osadl.org">
+ OSADL homepage.</ulink>
+ </para></listitem>
+ <listitem><para>
+ <ulink url="http://www.linutronix.de">
+ Linutronix homepage.</ulink>
+ </para></listitem>
+</itemizedlist>
+</appendix>
+
+</book>
diff --git a/Documentation/DocBook/usb.tmpl b/Documentation/DocBook/usb.tmpl
new file mode 100644
index 000000000..4cd5b2cd0
--- /dev/null
+++ b/Documentation/DocBook/usb.tmpl
@@ -0,0 +1,980 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Linux-USB-API">
+ <bookinfo>
+ <title>The Linux-USB Host Side API</title>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+<chapter id="intro">
+ <title>Introduction to USB on Linux</title>
+
+ <para>A Universal Serial Bus (USB) is used to connect a host,
+ such as a PC or workstation, to a number of peripheral
+ devices. USB uses a tree structure, with the host as the
+ root (the system's master), hubs as interior nodes, and
+ peripherals as leaves (and slaves).
+ Modern PCs support several such trees of USB devices, usually
+ one USB 2.0 tree (480 Mbit/sec each) with
+ a few USB 1.1 trees (12 Mbit/sec each) that are used when you
+ connect a USB 1.1 device directly to the machine's "root hub".
+ </para>
+
+ <para>That master/slave asymmetry was designed-in for a number of
+ reasons, one being ease of use. It is not physically possible to
+ assemble (legal) USB cables incorrectly: all upstream "to the host"
+ connectors are the rectangular type (matching the sockets on
+ root hubs), and all downstream connectors are the squarish type
+ (or they are built into the peripheral).
+ Also, the host software doesn't need to deal with distributed
+ auto-configuration since the pre-designated master node manages all that.
+ And finally, at the electrical level, bus protocol overhead is reduced by
+ eliminating arbitration and moving scheduling into the host software.
+ </para>
+
+ <para>USB 1.0 was announced in January 1996 and was revised
+ as USB 1.1 (with improvements in hub specification and
+ support for interrupt-out transfers) in September 1998.
+ USB 2.0 was released in April 2000, adding high-speed
+ transfers and transaction-translating hubs (used for USB 1.1
+ and 1.0 backward compatibility).
+ </para>
+
+ <para>Kernel developers added USB support to Linux early in the 2.2 kernel
+ series, shortly before 2.3 development forked. Updates from 2.3 were
+ regularly folded back into 2.2 releases, which improved reliability and
+ brought <filename>/sbin/hotplug</filename> support as well more drivers.
+ Such improvements were continued in the 2.5 kernel series, where they added
+ USB 2.0 support, improved performance, and made the host controller drivers
+ (HCDs) more consistent. They also simplified the API (to make bugs less
+ likely) and added internal "kerneldoc" documentation.
+ </para>
+
+ <para>Linux can run inside USB devices as well as on
+ the hosts that control the devices.
+ But USB device drivers running inside those peripherals
+ don't do the same things as the ones running inside hosts,
+ so they've been given a different name:
+ <emphasis>gadget drivers</emphasis>.
+ This document does not cover gadget drivers.
+ </para>
+
+ </chapter>
+
+<chapter id="host">
+ <title>USB Host-Side API Model</title>
+
+ <para>Host-side drivers for USB devices talk to the "usbcore" APIs.
+ There are two. One is intended for
+ <emphasis>general-purpose</emphasis> drivers (exposed through
+ driver frameworks), and the other is for drivers that are
+ <emphasis>part of the core</emphasis>.
+ Such core drivers include the <emphasis>hub</emphasis> driver
+ (which manages trees of USB devices) and several different kinds
+ of <emphasis>host controller drivers</emphasis>,
+ which control individual busses.
+ </para>
+
+ <para>The device model seen by USB drivers is relatively complex.
+ </para>
+
+ <itemizedlist>
+
+ <listitem><para>USB supports four kinds of data transfers
+ (control, bulk, interrupt, and isochronous). Two of them (control
+ and bulk) use bandwidth as it's available,
+ while the other two (interrupt and isochronous)
+ are scheduled to provide guaranteed bandwidth.
+ </para></listitem>
+
+ <listitem><para>The device description model includes one or more
+ "configurations" per device, only one of which is active at a time.
+ Devices that are capable of high-speed operation must also support
+ full-speed configurations, along with a way to ask about the
+ "other speed" configurations which might be used.
+ </para></listitem>
+
+ <listitem><para>Configurations have one or more "interfaces", each
+ of which may have "alternate settings". Interfaces may be
+ standardized by USB "Class" specifications, or may be specific to
+ a vendor or device.</para>
+
+ <para>USB device drivers actually bind to interfaces, not devices.
+ Think of them as "interface drivers", though you
+ may not see many devices where the distinction is important.
+ <emphasis>Most USB devices are simple, with only one configuration,
+ one interface, and one alternate setting.</emphasis>
+ </para></listitem>
+
+ <listitem><para>Interfaces have one or more "endpoints", each of
+ which supports one type and direction of data transfer such as
+ "bulk out" or "interrupt in". The entire configuration may have
+ up to sixteen endpoints in each direction, allocated as needed
+ among all the interfaces.
+ </para></listitem>
+
+ <listitem><para>Data transfer on USB is packetized; each endpoint
+ has a maximum packet size.
+ Drivers must often be aware of conventions such as flagging the end
+ of bulk transfers using "short" (including zero length) packets.
+ </para></listitem>
+
+ <listitem><para>The Linux USB API supports synchronous calls for
+ control and bulk messages.
+ It also supports asynchronous calls for all kinds of data transfer,
+ using request structures called "URBs" (USB Request Blocks).
+ </para></listitem>
+
+ </itemizedlist>
+
+ <para>Accordingly, the USB Core API exposed to device drivers
+ covers quite a lot of territory. You'll probably need to consult
+ the USB 2.0 specification, available online from www.usb.org at
+ no cost, as well as class or device specifications.
+ </para>
+
+ <para>The only host-side drivers that actually touch hardware
+ (reading/writing registers, handling IRQs, and so on) are the HCDs.
+ In theory, all HCDs provide the same functionality through the same
+ API. In practice, that's becoming more true on the 2.5 kernels,
+ but there are still differences that crop up especially with
+ fault handling. Different controllers don't necessarily report
+ the same aspects of failures, and recovery from faults (including
+ software-induced ones like unlinking an URB) isn't yet fully
+ consistent.
+ Device driver authors should make a point of doing disconnect
+ testing (while the device is active) with each different host
+ controller driver, to make sure drivers don't have bugs of
+ their own as well as to make sure they aren't relying on some
+ HCD-specific behavior.
+ (You will need external USB 1.1 and/or
+ USB 2.0 hubs to perform all those tests.)
+ </para>
+
+ </chapter>
+
+<chapter id="types"><title>USB-Standard Types</title>
+
+ <para>In <filename>&lt;linux/usb/ch9.h&gt;</filename> you will find
+ the USB data types defined in chapter 9 of the USB specification.
+ These data types are used throughout USB, and in APIs including
+ this host side API, gadget APIs, and usbfs.
+ </para>
+
+!Iinclude/linux/usb/ch9.h
+
+ </chapter>
+
+<chapter id="hostside"><title>Host-Side Data Types and Macros</title>
+
+ <para>The host side API exposes several layers to drivers, some of
+ which are more necessary than others.
+ These support lifecycle models for host side drivers
+ and devices, and support passing buffers through usbcore to
+ some HCD that performs the I/O for the device driver.
+ </para>
+
+
+!Iinclude/linux/usb.h
+
+ </chapter>
+
+ <chapter id="usbcore"><title>USB Core APIs</title>
+
+ <para>There are two basic I/O models in the USB API.
+ The most elemental one is asynchronous: drivers submit requests
+ in the form of an URB, and the URB's completion callback
+ handle the next step.
+ All USB transfer types support that model, although there
+ are special cases for control URBs (which always have setup
+ and status stages, but may not have a data stage) and
+ isochronous URBs (which allow large packets and include
+ per-packet fault reports).
+ Built on top of that is synchronous API support, where a
+ driver calls a routine that allocates one or more URBs,
+ submits them, and waits until they complete.
+ There are synchronous wrappers for single-buffer control
+ and bulk transfers (which are awkward to use in some
+ driver disconnect scenarios), and for scatterlist based
+ streaming i/o (bulk or interrupt).
+ </para>
+
+ <para>USB drivers need to provide buffers that can be
+ used for DMA, although they don't necessarily need to
+ provide the DMA mapping themselves.
+ There are APIs to use used when allocating DMA buffers,
+ which can prevent use of bounce buffers on some systems.
+ In some cases, drivers may be able to rely on 64bit DMA
+ to eliminate another kind of bounce buffer.
+ </para>
+
+!Edrivers/usb/core/urb.c
+!Edrivers/usb/core/message.c
+!Edrivers/usb/core/file.c
+!Edrivers/usb/core/driver.c
+!Edrivers/usb/core/usb.c
+!Edrivers/usb/core/hub.c
+ </chapter>
+
+ <chapter id="hcd"><title>Host Controller APIs</title>
+
+ <para>These APIs are only for use by host controller drivers,
+ most of which implement standard register interfaces such as
+ EHCI, OHCI, or UHCI.
+ UHCI was one of the first interfaces, designed by Intel and
+ also used by VIA; it doesn't do much in hardware.
+ OHCI was designed later, to have the hardware do more work
+ (bigger transfers, tracking protocol state, and so on).
+ EHCI was designed with USB 2.0; its design has features that
+ resemble OHCI (hardware does much more work) as well as
+ UHCI (some parts of ISO support, TD list processing).
+ </para>
+
+ <para>There are host controllers other than the "big three",
+ although most PCI based controllers (and a few non-PCI based
+ ones) use one of those interfaces.
+ Not all host controllers use DMA; some use PIO, and there
+ is also a simulator.
+ </para>
+
+ <para>The same basic APIs are available to drivers for all
+ those controllers.
+ For historical reasons they are in two layers:
+ <structname>struct usb_bus</structname> is a rather thin
+ layer that became available in the 2.2 kernels, while
+ <structname>struct usb_hcd</structname> is a more featureful
+ layer (available in later 2.4 kernels and in 2.5) that
+ lets HCDs share common code, to shrink driver size
+ and significantly reduce hcd-specific behaviors.
+ </para>
+
+!Edrivers/usb/core/hcd.c
+!Edrivers/usb/core/hcd-pci.c
+!Idrivers/usb/core/buffer.c
+ </chapter>
+
+ <chapter id="usbfs">
+ <title>The USB Filesystem (usbfs)</title>
+
+ <para>This chapter presents the Linux <emphasis>usbfs</emphasis>.
+ You may prefer to avoid writing new kernel code for your
+ USB driver; that's the problem that usbfs set out to solve.
+ User mode device drivers are usually packaged as applications
+ or libraries, and may use usbfs through some programming library
+ that wraps it. Such libraries include
+ <ulink url="http://libusb.sourceforge.net">libusb</ulink>
+ for C/C++, and
+ <ulink url="http://jUSB.sourceforge.net">jUSB</ulink> for Java.
+ </para>
+
+ <note><title>Unfinished</title>
+ <para>This particular documentation is incomplete,
+ especially with respect to the asynchronous mode.
+ As of kernel 2.5.66 the code and this (new) documentation
+ need to be cross-reviewed.
+ </para>
+ </note>
+
+ <para>Configure usbfs into Linux kernels by enabling the
+ <emphasis>USB filesystem</emphasis> option (CONFIG_USB_DEVICEFS),
+ and you get basic support for user mode USB device drivers.
+ Until relatively recently it was often (confusingly) called
+ <emphasis>usbdevfs</emphasis> although it wasn't solving what
+ <emphasis>devfs</emphasis> was.
+ Every USB device will appear in usbfs, regardless of whether or
+ not it has a kernel driver.
+ </para>
+
+ <sect1 id="usbfs-files">
+ <title>What files are in "usbfs"?</title>
+
+ <para>Conventionally mounted at
+ <filename>/proc/bus/usb</filename>, usbfs
+ features include:
+ <itemizedlist>
+ <listitem><para><filename>/proc/bus/usb/devices</filename>
+ ... a text file
+ showing each of the USB devices on known to the kernel,
+ and their configuration descriptors.
+ You can also poll() this to learn about new devices.
+ </para></listitem>
+ <listitem><para><filename>/proc/bus/usb/BBB/DDD</filename>
+ ... magic files
+ exposing the each device's configuration descriptors, and
+ supporting a series of ioctls for making device requests,
+ including I/O to devices. (Purely for access by programs.)
+ </para></listitem>
+ </itemizedlist>
+ </para>
+
+ <para> Each bus is given a number (BBB) based on when it was
+ enumerated; within each bus, each device is given a similar
+ number (DDD).
+ Those BBB/DDD paths are not "stable" identifiers;
+ expect them to change even if you always leave the devices
+ plugged in to the same hub port.
+ <emphasis>Don't even think of saving these in application
+ configuration files.</emphasis>
+ Stable identifiers are available, for user mode applications
+ that want to use them. HID and networking devices expose
+ these stable IDs, so that for example you can be sure that
+ you told the right UPS to power down its second server.
+ "usbfs" doesn't (yet) expose those IDs.
+ </para>
+
+ </sect1>
+
+ <sect1 id="usbfs-fstab">
+ <title>Mounting and Access Control</title>
+
+ <para>There are a number of mount options for usbfs, which will
+ be of most interest to you if you need to override the default
+ access control policy.
+ That policy is that only root may read or write device files
+ (<filename>/proc/bus/BBB/DDD</filename>) although anyone may read
+ the <filename>devices</filename>
+ or <filename>drivers</filename> files.
+ I/O requests to the device also need the CAP_SYS_RAWIO capability,
+ </para>
+
+ <para>The significance of that is that by default, all user mode
+ device drivers need super-user privileges.
+ You can change modes or ownership in a driver setup
+ when the device hotplugs, or maye just start the
+ driver right then, as a privileged server (or some activity
+ within one).
+ That's the most secure approach for multi-user systems,
+ but for single user systems ("trusted" by that user)
+ it's more convenient just to grant everyone all access
+ (using the <emphasis>devmode=0666</emphasis> option)
+ so the driver can start whenever it's needed.
+ </para>
+
+ <para>The mount options for usbfs, usable in /etc/fstab or
+ in command line invocations of <emphasis>mount</emphasis>, are:
+
+ <variablelist>
+ <varlistentry>
+ <term><emphasis>busgid</emphasis>=NNNNN</term>
+ <listitem><para>Controls the GID used for the
+ /proc/bus/usb/BBB
+ directories. (Default: 0)</para></listitem></varlistentry>
+ <varlistentry><term><emphasis>busmode</emphasis>=MMM</term>
+ <listitem><para>Controls the file mode used for the
+ /proc/bus/usb/BBB
+ directories. (Default: 0555)
+ </para></listitem></varlistentry>
+ <varlistentry><term><emphasis>busuid</emphasis>=NNNNN</term>
+ <listitem><para>Controls the UID used for the
+ /proc/bus/usb/BBB
+ directories. (Default: 0)</para></listitem></varlistentry>
+
+ <varlistentry><term><emphasis>devgid</emphasis>=NNNNN</term>
+ <listitem><para>Controls the GID used for the
+ /proc/bus/usb/BBB/DDD
+ files. (Default: 0)</para></listitem></varlistentry>
+ <varlistentry><term><emphasis>devmode</emphasis>=MMM</term>
+ <listitem><para>Controls the file mode used for the
+ /proc/bus/usb/BBB/DDD
+ files. (Default: 0644)</para></listitem></varlistentry>
+ <varlistentry><term><emphasis>devuid</emphasis>=NNNNN</term>
+ <listitem><para>Controls the UID used for the
+ /proc/bus/usb/BBB/DDD
+ files. (Default: 0)</para></listitem></varlistentry>
+
+ <varlistentry><term><emphasis>listgid</emphasis>=NNNNN</term>
+ <listitem><para>Controls the GID used for the
+ /proc/bus/usb/devices and drivers files.
+ (Default: 0)</para></listitem></varlistentry>
+ <varlistentry><term><emphasis>listmode</emphasis>=MMM</term>
+ <listitem><para>Controls the file mode used for the
+ /proc/bus/usb/devices and drivers files.
+ (Default: 0444)</para></listitem></varlistentry>
+ <varlistentry><term><emphasis>listuid</emphasis>=NNNNN</term>
+ <listitem><para>Controls the UID used for the
+ /proc/bus/usb/devices and drivers files.
+ (Default: 0)</para></listitem></varlistentry>
+ </variablelist>
+
+ </para>
+
+ <para>Note that many Linux distributions hard-wire the mount options
+ for usbfs in their init scripts, such as
+ <filename>/etc/rc.d/rc.sysinit</filename>,
+ rather than making it easy to set this per-system
+ policy in <filename>/etc/fstab</filename>.
+ </para>
+
+ </sect1>
+
+ <sect1 id="usbfs-devices">
+ <title>/proc/bus/usb/devices</title>
+
+ <para>This file is handy for status viewing tools in user
+ mode, which can scan the text format and ignore most of it.
+ More detailed device status (including class and vendor
+ status) is available from device-specific files.
+ For information about the current format of this file,
+ see the
+ <filename>Documentation/usb/proc_usb_info.txt</filename>
+ file in your Linux kernel sources.
+ </para>
+
+ <para>This file, in combination with the poll() system call, can
+ also be used to detect when devices are added or removed:
+<programlisting>int fd;
+struct pollfd pfd;
+
+fd = open("/proc/bus/usb/devices", O_RDONLY);
+pfd = { fd, POLLIN, 0 };
+for (;;) {
+ /* The first time through, this call will return immediately. */
+ poll(&amp;pfd, 1, -1);
+
+ /* To see what's changed, compare the file's previous and current
+ contents or scan the filesystem. (Scanning is more precise.) */
+}</programlisting>
+ Note that this behavior is intended to be used for informational
+ and debug purposes. It would be more appropriate to use programs
+ such as udev or HAL to initialize a device or start a user-mode
+ helper program, for instance.
+ </para>
+ </sect1>
+
+ <sect1 id="usbfs-bbbddd">
+ <title>/proc/bus/usb/BBB/DDD</title>
+
+ <para>Use these files in one of these basic ways:
+ </para>
+
+ <para><emphasis>They can be read,</emphasis>
+ producing first the device descriptor
+ (18 bytes) and then the descriptors for the current configuration.
+ See the USB 2.0 spec for details about those binary data formats.
+ You'll need to convert most multibyte values from little endian
+ format to your native host byte order, although a few of the
+ fields in the device descriptor (both of the BCD-encoded fields,
+ and the vendor and product IDs) will be byteswapped for you.
+ Note that configuration descriptors include descriptors for
+ interfaces, altsettings, endpoints, and maybe additional
+ class descriptors.
+ </para>
+
+ <para><emphasis>Perform USB operations</emphasis> using
+ <emphasis>ioctl()</emphasis> requests to make endpoint I/O
+ requests (synchronously or asynchronously) or manage
+ the device.
+ These requests need the CAP_SYS_RAWIO capability,
+ as well as filesystem access permissions.
+ Only one ioctl request can be made on one of these
+ device files at a time.
+ This means that if you are synchronously reading an endpoint
+ from one thread, you won't be able to write to a different
+ endpoint from another thread until the read completes.
+ This works for <emphasis>half duplex</emphasis> protocols,
+ but otherwise you'd use asynchronous i/o requests.
+ </para>
+
+ </sect1>
+
+
+ <sect1 id="usbfs-lifecycle">
+ <title>Life Cycle of User Mode Drivers</title>
+
+ <para>Such a driver first needs to find a device file
+ for a device it knows how to handle.
+ Maybe it was told about it because a
+ <filename>/sbin/hotplug</filename> event handling agent
+ chose that driver to handle the new device.
+ Or maybe it's an application that scans all the
+ /proc/bus/usb device files, and ignores most devices.
+ In either case, it should <function>read()</function> all
+ the descriptors from the device file,
+ and check them against what it knows how to handle.
+ It might just reject everything except a particular
+ vendor and product ID, or need a more complex policy.
+ </para>
+
+ <para>Never assume there will only be one such device
+ on the system at a time!
+ If your code can't handle more than one device at
+ a time, at least detect when there's more than one, and
+ have your users choose which device to use.
+ </para>
+
+ <para>Once your user mode driver knows what device to use,
+ it interacts with it in either of two styles.
+ The simple style is to make only control requests; some
+ devices don't need more complex interactions than those.
+ (An example might be software using vendor-specific control
+ requests for some initialization or configuration tasks,
+ with a kernel driver for the rest.)
+ </para>
+
+ <para>More likely, you need a more complex style driver:
+ one using non-control endpoints, reading or writing data
+ and claiming exclusive use of an interface.
+ <emphasis>Bulk</emphasis> transfers are easiest to use,
+ but only their sibling <emphasis>interrupt</emphasis> transfers
+ work with low speed devices.
+ Both interrupt and <emphasis>isochronous</emphasis> transfers
+ offer service guarantees because their bandwidth is reserved.
+ Such "periodic" transfers are awkward to use through usbfs,
+ unless you're using the asynchronous calls. However, interrupt
+ transfers can also be used in a synchronous "one shot" style.
+ </para>
+
+ <para>Your user-mode driver should never need to worry
+ about cleaning up request state when the device is
+ disconnected, although it should close its open file
+ descriptors as soon as it starts seeing the ENODEV
+ errors.
+ </para>
+
+ </sect1>
+
+ <sect1 id="usbfs-ioctl"><title>The ioctl() Requests</title>
+
+ <para>To use these ioctls, you need to include the following
+ headers in your userspace program:
+<programlisting>#include &lt;linux/usb.h&gt;
+#include &lt;linux/usbdevice_fs.h&gt;
+#include &lt;asm/byteorder.h&gt;</programlisting>
+ The standard USB device model requests, from "Chapter 9" of
+ the USB 2.0 specification, are automatically included from
+ the <filename>&lt;linux/usb/ch9.h&gt;</filename> header.
+ </para>
+
+ <para>Unless noted otherwise, the ioctl requests
+ described here will
+ update the modification time on the usbfs file to which
+ they are applied (unless they fail).
+ A return of zero indicates success; otherwise, a
+ standard USB error code is returned. (These are
+ documented in
+ <filename>Documentation/usb/error-codes.txt</filename>
+ in your kernel sources.)
+ </para>
+
+ <para>Each of these files multiplexes access to several
+ I/O streams, one per endpoint.
+ Each device has one control endpoint (endpoint zero)
+ which supports a limited RPC style RPC access.
+ Devices are configured
+ by hub_wq (in the kernel) setting a device-wide
+ <emphasis>configuration</emphasis> that affects things
+ like power consumption and basic functionality.
+ The endpoints are part of USB <emphasis>interfaces</emphasis>,
+ which may have <emphasis>altsettings</emphasis>
+ affecting things like which endpoints are available.
+ Many devices only have a single configuration and interface,
+ so drivers for them will ignore configurations and altsettings.
+ </para>
+
+
+ <sect2 id="usbfs-mgmt">
+ <title>Management/Status Requests</title>
+
+ <para>A number of usbfs requests don't deal very directly
+ with device I/O.
+ They mostly relate to device management and status.
+ These are all synchronous requests.
+ </para>
+
+ <variablelist>
+
+ <varlistentry><term>USBDEVFS_CLAIMINTERFACE</term>
+ <listitem><para>This is used to force usbfs to
+ claim a specific interface,
+ which has not previously been claimed by usbfs or any other
+ kernel driver.
+ The ioctl parameter is an integer holding the number of
+ the interface (bInterfaceNumber from descriptor).
+ </para><para>
+ Note that if your driver doesn't claim an interface
+ before trying to use one of its endpoints, and no
+ other driver has bound to it, then the interface is
+ automatically claimed by usbfs.
+ </para><para>
+ This claim will be released by a RELEASEINTERFACE ioctl,
+ or by closing the file descriptor.
+ File modification time is not updated by this request.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_CONNECTINFO</term>
+ <listitem><para>Says whether the device is lowspeed.
+ The ioctl parameter points to a structure like this:
+<programlisting>struct usbdevfs_connectinfo {
+ unsigned int devnum;
+ unsigned char slow;
+}; </programlisting>
+ File modification time is not updated by this request.
+ </para><para>
+ <emphasis>You can't tell whether a "not slow"
+ device is connected at high speed (480 MBit/sec)
+ or just full speed (12 MBit/sec).</emphasis>
+ You should know the devnum value already,
+ it's the DDD value of the device file name.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_GETDRIVER</term>
+ <listitem><para>Returns the name of the kernel driver
+ bound to a given interface (a string). Parameter
+ is a pointer to this structure, which is modified:
+<programlisting>struct usbdevfs_getdriver {
+ unsigned int interface;
+ char driver[USBDEVFS_MAXDRIVERNAME + 1];
+};</programlisting>
+ File modification time is not updated by this request.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_IOCTL</term>
+ <listitem><para>Passes a request from userspace through
+ to a kernel driver that has an ioctl entry in the
+ <emphasis>struct usb_driver</emphasis> it registered.
+<programlisting>struct usbdevfs_ioctl {
+ int ifno;
+ int ioctl_code;
+ void *data;
+};
+
+/* user mode call looks like this.
+ * 'request' becomes the driver->ioctl() 'code' parameter.
+ * the size of 'param' is encoded in 'request', and that data
+ * is copied to or from the driver->ioctl() 'buf' parameter.
+ */
+static int
+usbdev_ioctl (int fd, int ifno, unsigned request, void *param)
+{
+ struct usbdevfs_ioctl wrapper;
+
+ wrapper.ifno = ifno;
+ wrapper.ioctl_code = request;
+ wrapper.data = param;
+
+ return ioctl (fd, USBDEVFS_IOCTL, &amp;wrapper);
+} </programlisting>
+ File modification time is not updated by this request.
+ </para><para>
+ This request lets kernel drivers talk to user mode code
+ through filesystem operations even when they don't create
+ a character or block special device.
+ It's also been used to do things like ask devices what
+ device special file should be used.
+ Two pre-defined ioctls are used
+ to disconnect and reconnect kernel drivers, so
+ that user mode code can completely manage binding
+ and configuration of devices.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_RELEASEINTERFACE</term>
+ <listitem><para>This is used to release the claim usbfs
+ made on interface, either implicitly or because of a
+ USBDEVFS_CLAIMINTERFACE call, before the file
+ descriptor is closed.
+ The ioctl parameter is an integer holding the number of
+ the interface (bInterfaceNumber from descriptor);
+ File modification time is not updated by this request.
+ </para><warning><para>
+ <emphasis>No security check is made to ensure
+ that the task which made the claim is the one
+ which is releasing it.
+ This means that user mode driver may interfere
+ other ones. </emphasis>
+ </para></warning></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_RESETEP</term>
+ <listitem><para>Resets the data toggle value for an endpoint
+ (bulk or interrupt) to DATA0.
+ The ioctl parameter is an integer endpoint number
+ (1 to 15, as identified in the endpoint descriptor),
+ with USB_DIR_IN added if the device's endpoint sends
+ data to the host.
+ </para><warning><para>
+ <emphasis>Avoid using this request.
+ It should probably be removed.</emphasis>
+ Using it typically means the device and driver will lose
+ toggle synchronization. If you really lost synchronization,
+ you likely need to completely handshake with the device,
+ using a request like CLEAR_HALT
+ or SET_INTERFACE.
+ </para></warning></listitem></varlistentry>
+
+ </variablelist>
+
+ </sect2>
+
+ <sect2 id="usbfs-sync">
+ <title>Synchronous I/O Support</title>
+
+ <para>Synchronous requests involve the kernel blocking
+ until the user mode request completes, either by
+ finishing successfully or by reporting an error.
+ In most cases this is the simplest way to use usbfs,
+ although as noted above it does prevent performing I/O
+ to more than one endpoint at a time.
+ </para>
+
+ <variablelist>
+
+ <varlistentry><term>USBDEVFS_BULK</term>
+ <listitem><para>Issues a bulk read or write request to the
+ device.
+ The ioctl parameter is a pointer to this structure:
+<programlisting>struct usbdevfs_bulktransfer {
+ unsigned int ep;
+ unsigned int len;
+ unsigned int timeout; /* in milliseconds */
+ void *data;
+};</programlisting>
+ </para><para>The "ep" value identifies a
+ bulk endpoint number (1 to 15, as identified in an endpoint
+ descriptor),
+ masked with USB_DIR_IN when referring to an endpoint which
+ sends data to the host from the device.
+ The length of the data buffer is identified by "len";
+ Recent kernels support requests up to about 128KBytes.
+ <emphasis>FIXME say how read length is returned,
+ and how short reads are handled.</emphasis>.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_CLEAR_HALT</term>
+ <listitem><para>Clears endpoint halt (stall) and
+ resets the endpoint toggle. This is only
+ meaningful for bulk or interrupt endpoints.
+ The ioctl parameter is an integer endpoint number
+ (1 to 15, as identified in an endpoint descriptor),
+ masked with USB_DIR_IN when referring to an endpoint which
+ sends data to the host from the device.
+ </para><para>
+ Use this on bulk or interrupt endpoints which have
+ stalled, returning <emphasis>-EPIPE</emphasis> status
+ to a data transfer request.
+ Do not issue the control request directly, since
+ that could invalidate the host's record of the
+ data toggle.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_CONTROL</term>
+ <listitem><para>Issues a control request to the device.
+ The ioctl parameter points to a structure like this:
+<programlisting>struct usbdevfs_ctrltransfer {
+ __u8 bRequestType;
+ __u8 bRequest;
+ __u16 wValue;
+ __u16 wIndex;
+ __u16 wLength;
+ __u32 timeout; /* in milliseconds */
+ void *data;
+};</programlisting>
+ </para><para>
+ The first eight bytes of this structure are the contents
+ of the SETUP packet to be sent to the device; see the
+ USB 2.0 specification for details.
+ The bRequestType value is composed by combining a
+ USB_TYPE_* value, a USB_DIR_* value, and a
+ USB_RECIP_* value (from
+ <emphasis>&lt;linux/usb.h&gt;</emphasis>).
+ If wLength is nonzero, it describes the length of the data
+ buffer, which is either written to the device
+ (USB_DIR_OUT) or read from the device (USB_DIR_IN).
+ </para><para>
+ At this writing, you can't transfer more than 4 KBytes
+ of data to or from a device; usbfs has a limit, and
+ some host controller drivers have a limit.
+ (That's not usually a problem.)
+ <emphasis>Also</emphasis> there's no way to say it's
+ not OK to get a short read back from the device.
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_RESET</term>
+ <listitem><para>Does a USB level device reset.
+ The ioctl parameter is ignored.
+ After the reset, this rebinds all device interfaces.
+ File modification time is not updated by this request.
+ </para><warning><para>
+ <emphasis>Avoid using this call</emphasis>
+ until some usbcore bugs get fixed,
+ since it does not fully synchronize device, interface,
+ and driver (not just usbfs) state.
+ </para></warning></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_SETINTERFACE</term>
+ <listitem><para>Sets the alternate setting for an
+ interface. The ioctl parameter is a pointer to a
+ structure like this:
+<programlisting>struct usbdevfs_setinterface {
+ unsigned int interface;
+ unsigned int altsetting;
+}; </programlisting>
+ File modification time is not updated by this request.
+ </para><para>
+ Those struct members are from some interface descriptor
+ applying to the current configuration.
+ The interface number is the bInterfaceNumber value, and
+ the altsetting number is the bAlternateSetting value.
+ (This resets each endpoint in the interface.)
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_SETCONFIGURATION</term>
+ <listitem><para>Issues the
+ <function>usb_set_configuration</function> call
+ for the device.
+ The parameter is an integer holding the number of
+ a configuration (bConfigurationValue from descriptor).
+ File modification time is not updated by this request.
+ </para><warning><para>
+ <emphasis>Avoid using this call</emphasis>
+ until some usbcore bugs get fixed,
+ since it does not fully synchronize device, interface,
+ and driver (not just usbfs) state.
+ </para></warning></listitem></varlistentry>
+
+ </variablelist>
+ </sect2>
+
+ <sect2 id="usbfs-async">
+ <title>Asynchronous I/O Support</title>
+
+ <para>As mentioned above, there are situations where it may be
+ important to initiate concurrent operations from user mode code.
+ This is particularly important for periodic transfers
+ (interrupt and isochronous), but it can be used for other
+ kinds of USB requests too.
+ In such cases, the asynchronous requests described here
+ are essential. Rather than submitting one request and having
+ the kernel block until it completes, the blocking is separate.
+ </para>
+
+ <para>These requests are packaged into a structure that
+ resembles the URB used by kernel device drivers.
+ (No POSIX Async I/O support here, sorry.)
+ It identifies the endpoint type (USBDEVFS_URB_TYPE_*),
+ endpoint (number, masked with USB_DIR_IN as appropriate),
+ buffer and length, and a user "context" value serving to
+ uniquely identify each request.
+ (It's usually a pointer to per-request data.)
+ Flags can modify requests (not as many as supported for
+ kernel drivers).
+ </para>
+
+ <para>Each request can specify a realtime signal number
+ (between SIGRTMIN and SIGRTMAX, inclusive) to request a
+ signal be sent when the request completes.
+ </para>
+
+ <para>When usbfs returns these urbs, the status value
+ is updated, and the buffer may have been modified.
+ Except for isochronous transfers, the actual_length is
+ updated to say how many bytes were transferred; if the
+ USBDEVFS_URB_DISABLE_SPD flag is set
+ ("short packets are not OK"), if fewer bytes were read
+ than were requested then you get an error report.
+ </para>
+
+<programlisting>struct usbdevfs_iso_packet_desc {
+ unsigned int length;
+ unsigned int actual_length;
+ unsigned int status;
+};
+
+struct usbdevfs_urb {
+ unsigned char type;
+ unsigned char endpoint;
+ int status;
+ unsigned int flags;
+ void *buffer;
+ int buffer_length;
+ int actual_length;
+ int start_frame;
+ int number_of_packets;
+ int error_count;
+ unsigned int signr;
+ void *usercontext;
+ struct usbdevfs_iso_packet_desc iso_frame_desc[];
+};</programlisting>
+
+ <para> For these asynchronous requests, the file modification
+ time reflects when the request was initiated.
+ This contrasts with their use with the synchronous requests,
+ where it reflects when requests complete.
+ </para>
+
+ <variablelist>
+
+ <varlistentry><term>USBDEVFS_DISCARDURB</term>
+ <listitem><para>
+ <emphasis>TBS</emphasis>
+ File modification time is not updated by this request.
+ </para><para>
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_DISCSIGNAL</term>
+ <listitem><para>
+ <emphasis>TBS</emphasis>
+ File modification time is not updated by this request.
+ </para><para>
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_REAPURB</term>
+ <listitem><para>
+ <emphasis>TBS</emphasis>
+ File modification time is not updated by this request.
+ </para><para>
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_REAPURBNDELAY</term>
+ <listitem><para>
+ <emphasis>TBS</emphasis>
+ File modification time is not updated by this request.
+ </para><para>
+ </para></listitem></varlistentry>
+
+ <varlistentry><term>USBDEVFS_SUBMITURB</term>
+ <listitem><para>
+ <emphasis>TBS</emphasis>
+ </para><para>
+ </para></listitem></varlistentry>
+
+ </variablelist>
+ </sect2>
+
+ </sect1>
+
+ </chapter>
+
+</book>
+<!-- vim:syntax=sgml:sw=4
+-->
diff --git a/Documentation/DocBook/w1.tmpl b/Documentation/DocBook/w1.tmpl
new file mode 100644
index 000000000..b0228d4c8
--- /dev/null
+++ b/Documentation/DocBook/w1.tmpl
@@ -0,0 +1,101 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="w1id">
+ <bookinfo>
+ <title>W1: Dallas' 1-wire bus</title>
+
+ <authorgroup>
+ <author>
+ <firstname>David</firstname>
+ <surname>Fries</surname>
+ <affiliation>
+ <address>
+ <email>David@Fries.net</email>
+ </address>
+ </affiliation>
+ </author>
+
+ </authorgroup>
+
+ <copyright>
+ <year>2013</year>
+ <!--
+ <holder></holder>
+ -->
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License version 2.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+
+ <chapter id="w1_internal">
+ <title>W1 API internal to the kernel</title>
+
+ <sect1 id="w1_internal_api">
+ <title>W1 API internal to the kernel</title>
+ <sect2 id="w1.h">
+ <title>drivers/w1/w1.h</title>
+ <para>W1 core functions.</para>
+!Idrivers/w1/w1.h
+ </sect2>
+
+ <sect2 id="w1.c">
+ <title>drivers/w1/w1.c</title>
+ <para>W1 core functions.</para>
+!Idrivers/w1/w1.c
+ </sect2>
+
+ <sect2 id="w1_family.h">
+ <title>drivers/w1/w1_family.h</title>
+ <para>Allows registering device family operations.</para>
+!Idrivers/w1/w1_family.h
+ </sect2>
+
+ <sect2 id="w1_family.c">
+ <title>drivers/w1/w1_family.c</title>
+ <para>Allows registering device family operations.</para>
+!Edrivers/w1/w1_family.c
+ </sect2>
+
+ <sect2 id="w1_int.c">
+ <title>drivers/w1/w1_int.c</title>
+ <para>W1 internal initialization for master devices.</para>
+!Edrivers/w1/w1_int.c
+ </sect2>
+
+ <sect2 id="w1_netlink.h">
+ <title>drivers/w1/w1_netlink.h</title>
+ <para>W1 external netlink API structures and commands.</para>
+!Idrivers/w1/w1_netlink.h
+ </sect2>
+
+ <sect2 id="w1_io.c">
+ <title>drivers/w1/w1_io.c</title>
+ <para>W1 input/output.</para>
+!Edrivers/w1/w1_io.c
+!Idrivers/w1/w1_io.c
+ </sect2>
+
+ </sect1>
+
+
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/writing-an-alsa-driver.tmpl b/Documentation/DocBook/writing-an-alsa-driver.tmpl
new file mode 100644
index 000000000..84ef6a901
--- /dev/null
+++ b/Documentation/DocBook/writing-an-alsa-driver.tmpl
@@ -0,0 +1,6221 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<!-- ****************************************************** -->
+<!-- Header -->
+<!-- ****************************************************** -->
+<book id="Writing-an-ALSA-Driver">
+ <bookinfo>
+ <title>Writing an ALSA Driver</title>
+ <author>
+ <firstname>Takashi</firstname>
+ <surname>Iwai</surname>
+ <affiliation>
+ <address>
+ <email>tiwai@suse.de</email>
+ </address>
+ </affiliation>
+ </author>
+
+ <date>Oct 15, 2007</date>
+ <edition>0.3.7</edition>
+
+ <abstract>
+ <para>
+ This document describes how to write an ALSA (Advanced Linux
+ Sound Architecture) driver.
+ </para>
+ </abstract>
+
+ <legalnotice>
+ <para>
+ Copyright (c) 2002-2005 Takashi Iwai <email>tiwai@suse.de</email>
+ </para>
+
+ <para>
+ This document is free; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+ </para>
+
+ <para>
+ This document is distributed in the hope that it will be useful,
+ but <emphasis>WITHOUT ANY WARRANTY</emphasis>; without even the
+ implied warranty of <emphasis>MERCHANTABILITY or FITNESS FOR A
+ PARTICULAR PURPOSE</emphasis>. See the GNU General Public License
+ for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+ </legalnotice>
+
+ </bookinfo>
+
+<!-- ****************************************************** -->
+<!-- Preface -->
+<!-- ****************************************************** -->
+ <preface id="preface">
+ <title>Preface</title>
+ <para>
+ This document describes how to write an
+ <ulink url="http://www.alsa-project.org/"><citetitle>
+ ALSA (Advanced Linux Sound Architecture)</citetitle></ulink>
+ driver. The document focuses mainly on PCI soundcards.
+ In the case of other device types, the API might
+ be different, too. However, at least the ALSA kernel API is
+ consistent, and therefore it would be still a bit help for
+ writing them.
+ </para>
+
+ <para>
+ This document targets people who already have enough
+ C language skills and have basic linux kernel programming
+ knowledge. This document doesn't explain the general
+ topic of linux kernel coding and doesn't cover low-level
+ driver implementation details. It only describes
+ the standard way to write a PCI sound driver on ALSA.
+ </para>
+
+ <para>
+ If you are already familiar with the older ALSA ver.0.5.x API, you
+ can check the drivers such as <filename>sound/pci/es1938.c</filename> or
+ <filename>sound/pci/maestro3.c</filename> which have also almost the same
+ code-base in the ALSA 0.5.x tree, so you can compare the differences.
+ </para>
+
+ <para>
+ This document is still a draft version. Any feedback and
+ corrections, please!!
+ </para>
+ </preface>
+
+
+<!-- ****************************************************** -->
+<!-- File Tree Structure -->
+<!-- ****************************************************** -->
+ <chapter id="file-tree">
+ <title>File Tree Structure</title>
+
+ <section id="file-tree-general">
+ <title>General</title>
+ <para>
+ The ALSA drivers are provided in two ways.
+ </para>
+
+ <para>
+ One is the trees provided as a tarball or via cvs from the
+ ALSA's ftp site, and another is the 2.6 (or later) Linux kernel
+ tree. To synchronize both, the ALSA driver tree is split into
+ two different trees: alsa-kernel and alsa-driver. The former
+ contains purely the source code for the Linux 2.6 (or later)
+ tree. This tree is designed only for compilation on 2.6 or
+ later environment. The latter, alsa-driver, contains many subtle
+ files for compiling ALSA drivers outside of the Linux kernel tree,
+ wrapper functions for older 2.2 and 2.4 kernels, to adapt the latest kernel API,
+ and additional drivers which are still in development or in
+ tests. The drivers in alsa-driver tree will be moved to
+ alsa-kernel (and eventually to the 2.6 kernel tree) when they are
+ finished and confirmed to work fine.
+ </para>
+
+ <para>
+ The file tree structure of ALSA driver is depicted below. Both
+ alsa-kernel and alsa-driver have almost the same file
+ structure, except for <quote>core</quote> directory. It's
+ named as <quote>acore</quote> in alsa-driver tree.
+
+ <example>
+ <title>ALSA File Tree Structure</title>
+ <literallayout>
+ sound
+ /core
+ /oss
+ /seq
+ /oss
+ /instr
+ /ioctl32
+ /include
+ /drivers
+ /mpu401
+ /opl3
+ /i2c
+ /l3
+ /synth
+ /emux
+ /pci
+ /(cards)
+ /isa
+ /(cards)
+ /arm
+ /ppc
+ /sparc
+ /usb
+ /pcmcia /(cards)
+ /oss
+ </literallayout>
+ </example>
+ </para>
+ </section>
+
+ <section id="file-tree-core-directory">
+ <title>core directory</title>
+ <para>
+ This directory contains the middle layer which is the heart
+ of ALSA drivers. In this directory, the native ALSA modules are
+ stored. The sub-directories contain different modules and are
+ dependent upon the kernel config.
+ </para>
+
+ <section id="file-tree-core-directory-oss">
+ <title>core/oss</title>
+
+ <para>
+ The codes for PCM and mixer OSS emulation modules are stored
+ in this directory. The rawmidi OSS emulation is included in
+ the ALSA rawmidi code since it's quite small. The sequencer
+ code is stored in <filename>core/seq/oss</filename> directory (see
+ <link linkend="file-tree-core-directory-seq-oss"><citetitle>
+ below</citetitle></link>).
+ </para>
+ </section>
+
+ <section id="file-tree-core-directory-ioctl32">
+ <title>core/ioctl32</title>
+
+ <para>
+ This directory contains the 32bit-ioctl wrappers for 64bit
+ architectures such like x86-64, ppc64 and sparc64. For 32bit
+ and alpha architectures, these are not compiled.
+ </para>
+ </section>
+
+ <section id="file-tree-core-directory-seq">
+ <title>core/seq</title>
+ <para>
+ This directory and its sub-directories are for the ALSA
+ sequencer. This directory contains the sequencer core and
+ primary sequencer modules such like snd-seq-midi,
+ snd-seq-virmidi, etc. They are compiled only when
+ <constant>CONFIG_SND_SEQUENCER</constant> is set in the kernel
+ config.
+ </para>
+ </section>
+
+ <section id="file-tree-core-directory-seq-oss">
+ <title>core/seq/oss</title>
+ <para>
+ This contains the OSS sequencer emulation codes.
+ </para>
+ </section>
+
+ <section id="file-tree-core-directory-deq-instr">
+ <title>core/seq/instr</title>
+ <para>
+ This directory contains the modules for the sequencer
+ instrument layer.
+ </para>
+ </section>
+ </section>
+
+ <section id="file-tree-include-directory">
+ <title>include directory</title>
+ <para>
+ This is the place for the public header files of ALSA drivers,
+ which are to be exported to user-space, or included by
+ several files at different directories. Basically, the private
+ header files should not be placed in this directory, but you may
+ still find files there, due to historical reasons :)
+ </para>
+ </section>
+
+ <section id="file-tree-drivers-directory">
+ <title>drivers directory</title>
+ <para>
+ This directory contains code shared among different drivers
+ on different architectures. They are hence supposed not to be
+ architecture-specific.
+ For example, the dummy pcm driver and the serial MIDI
+ driver are found in this directory. In the sub-directories,
+ there is code for components which are independent from
+ bus and cpu architectures.
+ </para>
+
+ <section id="file-tree-drivers-directory-mpu401">
+ <title>drivers/mpu401</title>
+ <para>
+ The MPU401 and MPU401-UART modules are stored here.
+ </para>
+ </section>
+
+ <section id="file-tree-drivers-directory-opl3">
+ <title>drivers/opl3 and opl4</title>
+ <para>
+ The OPL3 and OPL4 FM-synth stuff is found here.
+ </para>
+ </section>
+ </section>
+
+ <section id="file-tree-i2c-directory">
+ <title>i2c directory</title>
+ <para>
+ This contains the ALSA i2c components.
+ </para>
+
+ <para>
+ Although there is a standard i2c layer on Linux, ALSA has its
+ own i2c code for some cards, because the soundcard needs only a
+ simple operation and the standard i2c API is too complicated for
+ such a purpose.
+ </para>
+
+ <section id="file-tree-i2c-directory-l3">
+ <title>i2c/l3</title>
+ <para>
+ This is a sub-directory for ARM L3 i2c.
+ </para>
+ </section>
+ </section>
+
+ <section id="file-tree-synth-directory">
+ <title>synth directory</title>
+ <para>
+ This contains the synth middle-level modules.
+ </para>
+
+ <para>
+ So far, there is only Emu8000/Emu10k1 synth driver under
+ the <filename>synth/emux</filename> sub-directory.
+ </para>
+ </section>
+
+ <section id="file-tree-pci-directory">
+ <title>pci directory</title>
+ <para>
+ This directory and its sub-directories hold the top-level card modules
+ for PCI soundcards and the code specific to the PCI BUS.
+ </para>
+
+ <para>
+ The drivers compiled from a single file are stored directly
+ in the pci directory, while the drivers with several source files are
+ stored on their own sub-directory (e.g. emu10k1, ice1712).
+ </para>
+ </section>
+
+ <section id="file-tree-isa-directory">
+ <title>isa directory</title>
+ <para>
+ This directory and its sub-directories hold the top-level card modules
+ for ISA soundcards.
+ </para>
+ </section>
+
+ <section id="file-tree-arm-ppc-sparc-directories">
+ <title>arm, ppc, and sparc directories</title>
+ <para>
+ They are used for top-level card modules which are
+ specific to one of these architectures.
+ </para>
+ </section>
+
+ <section id="file-tree-usb-directory">
+ <title>usb directory</title>
+ <para>
+ This directory contains the USB-audio driver. In the latest version, the
+ USB MIDI driver is integrated in the usb-audio driver.
+ </para>
+ </section>
+
+ <section id="file-tree-pcmcia-directory">
+ <title>pcmcia directory</title>
+ <para>
+ The PCMCIA, especially PCCard drivers will go here. CardBus
+ drivers will be in the pci directory, because their API is identical
+ to that of standard PCI cards.
+ </para>
+ </section>
+
+ <section id="file-tree-oss-directory">
+ <title>oss directory</title>
+ <para>
+ The OSS/Lite source files are stored here in Linux 2.6 (or
+ later) tree. In the ALSA driver tarball, this directory is empty,
+ of course :)
+ </para>
+ </section>
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Basic Flow for PCI Drivers -->
+<!-- ****************************************************** -->
+ <chapter id="basic-flow">
+ <title>Basic Flow for PCI Drivers</title>
+
+ <section id="basic-flow-outline">
+ <title>Outline</title>
+ <para>
+ The minimum flow for PCI soundcards is as follows:
+
+ <itemizedlist>
+ <listitem><para>define the PCI ID table (see the section
+ <link linkend="pci-resource-entries"><citetitle>PCI Entries
+ </citetitle></link>).</para></listitem>
+ <listitem><para>create <function>probe()</function> callback.</para></listitem>
+ <listitem><para>create <function>remove()</function> callback.</para></listitem>
+ <listitem><para>create a <structname>pci_driver</structname> structure
+ containing the three pointers above.</para></listitem>
+ <listitem><para>create an <function>init()</function> function just calling
+ the <function>pci_register_driver()</function> to register the pci_driver table
+ defined above.</para></listitem>
+ <listitem><para>create an <function>exit()</function> function to call
+ the <function>pci_unregister_driver()</function> function.</para></listitem>
+ </itemizedlist>
+ </para>
+ </section>
+
+ <section id="basic-flow-example">
+ <title>Full Code Example</title>
+ <para>
+ The code example is shown below. Some parts are kept
+ unimplemented at this moment but will be filled in the
+ next sections. The numbers in the comment lines of the
+ <function>snd_mychip_probe()</function> function
+ refer to details explained in the following section.
+
+ <example>
+ <title>Basic Flow for PCI Drivers - Example</title>
+ <programlisting>
+<![CDATA[
+ #include <linux/init.h>
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include <sound/core.h>
+ #include <sound/initval.h>
+
+ /* module parameters (see "Module Parameters") */
+ /* SNDRV_CARDS: maximum number of cards supported by this module */
+ static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+ static bool enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+ /* definition of the chip-specific record */
+ struct mychip {
+ struct snd_card *card;
+ /* the rest of the implementation will be in section
+ * "PCI Resource Management"
+ */
+ };
+
+ /* chip-specific destructor
+ * (see "PCI Resource Management")
+ */
+ static int snd_mychip_free(struct mychip *chip)
+ {
+ .... /* will be implemented later... */
+ }
+
+ /* component-destructor
+ * (see "Management of Cards and Components")
+ */
+ static int snd_mychip_dev_free(struct snd_device *device)
+ {
+ return snd_mychip_free(device->device_data);
+ }
+
+ /* chip-specific constructor
+ * (see "Management of Cards and Components")
+ */
+ static int snd_mychip_create(struct snd_card *card,
+ struct pci_dev *pci,
+ struct mychip **rchip)
+ {
+ struct mychip *chip;
+ int err;
+ static struct snd_device_ops ops = {
+ .dev_free = snd_mychip_dev_free,
+ };
+
+ *rchip = NULL;
+
+ /* check PCI availability here
+ * (see "PCI Resource Management")
+ */
+ ....
+
+ /* allocate a chip-specific data with zero filled */
+ chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+ if (chip == NULL)
+ return -ENOMEM;
+
+ chip->card = card;
+
+ /* rest of initialization here; will be implemented
+ * later, see "PCI Resource Management"
+ */
+ ....
+
+ err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+ if (err < 0) {
+ snd_mychip_free(chip);
+ return err;
+ }
+
+ *rchip = chip;
+ return 0;
+ }
+
+ /* constructor -- see "Constructor" sub-section */
+ static int snd_mychip_probe(struct pci_dev *pci,
+ const struct pci_device_id *pci_id)
+ {
+ static int dev;
+ struct snd_card *card;
+ struct mychip *chip;
+ int err;
+
+ /* (1) */
+ if (dev >= SNDRV_CARDS)
+ return -ENODEV;
+ if (!enable[dev]) {
+ dev++;
+ return -ENOENT;
+ }
+
+ /* (2) */
+ err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ 0, &card);
+ if (err < 0)
+ return err;
+
+ /* (3) */
+ err = snd_mychip_create(card, pci, &chip);
+ if (err < 0) {
+ snd_card_free(card);
+ return err;
+ }
+
+ /* (4) */
+ strcpy(card->driver, "My Chip");
+ strcpy(card->shortname, "My Own Chip 123");
+ sprintf(card->longname, "%s at 0x%lx irq %i",
+ card->shortname, chip->ioport, chip->irq);
+
+ /* (5) */
+ .... /* implemented later */
+
+ /* (6) */
+ err = snd_card_register(card);
+ if (err < 0) {
+ snd_card_free(card);
+ return err;
+ }
+
+ /* (7) */
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
+ }
+
+ /* destructor -- see the "Destructor" sub-section */
+ static void snd_mychip_remove(struct pci_dev *pci)
+ {
+ snd_card_free(pci_get_drvdata(pci));
+ pci_set_drvdata(pci, NULL);
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor">
+ <title>Constructor</title>
+ <para>
+ The real constructor of PCI drivers is the <function>probe</function> callback.
+ The <function>probe</function> callback and other component-constructors which are called
+ from the <function>probe</function> callback cannot be used with
+ the <parameter>__init</parameter> prefix
+ because any PCI device could be a hotplug device.
+ </para>
+
+ <para>
+ In the <function>probe</function> callback, the following scheme is often used.
+ </para>
+
+ <section id="basic-flow-constructor-device-index">
+ <title>1) Check and increment the device index.</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int dev;
+ ....
+ if (dev >= SNDRV_CARDS)
+ return -ENODEV;
+ if (!enable[dev]) {
+ dev++;
+ return -ENOENT;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ where enable[dev] is the module option.
+ </para>
+
+ <para>
+ Each time the <function>probe</function> callback is called, check the
+ availability of the device. If not available, simply increment
+ the device index and returns. dev will be incremented also
+ later (<link
+ linkend="basic-flow-constructor-set-pci"><citetitle>step
+ 7</citetitle></link>).
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor-create-card">
+ <title>2) Create a card instance</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_card *card;
+ int err;
+ ....
+ err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ 0, &card);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The details will be explained in the section
+ <link linkend="card-management-card-instance"><citetitle>
+ Management of Cards and Components</citetitle></link>.
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor-create-main">
+ <title>3) Create a main component</title>
+ <para>
+ In this part, the PCI resources are allocated.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mychip *chip;
+ ....
+ err = snd_mychip_create(card, pci, &chip);
+ if (err < 0) {
+ snd_card_free(card);
+ return err;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ The details will be explained in the section <link
+ linkend="pci-resource"><citetitle>PCI Resource
+ Management</citetitle></link>.
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor-main-component">
+ <title>4) Set the driver ID and name strings.</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ strcpy(card->driver, "My Chip");
+ strcpy(card->shortname, "My Own Chip 123");
+ sprintf(card->longname, "%s at 0x%lx irq %i",
+ card->shortname, chip->ioport, chip->irq);
+]]>
+ </programlisting>
+ </informalexample>
+
+ The driver field holds the minimal ID string of the
+ chip. This is used by alsa-lib's configurator, so keep it
+ simple but unique.
+ Even the same driver can have different driver IDs to
+ distinguish the functionality of each chip type.
+ </para>
+
+ <para>
+ The shortname field is a string shown as more verbose
+ name. The longname field contains the information
+ shown in <filename>/proc/asound/cards</filename>.
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor-create-other">
+ <title>5) Create other components, such as mixer, MIDI, etc.</title>
+ <para>
+ Here you define the basic components such as
+ <link linkend="pcm-interface"><citetitle>PCM</citetitle></link>,
+ mixer (e.g. <link linkend="api-ac97"><citetitle>AC97</citetitle></link>),
+ MIDI (e.g. <link linkend="midi-interface"><citetitle>MPU-401</citetitle></link>),
+ and other interfaces.
+ Also, if you want a <link linkend="proc-interface"><citetitle>proc
+ file</citetitle></link>, define it here, too.
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor-register-card">
+ <title>6) Register the card instance.</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ err = snd_card_register(card);
+ if (err < 0) {
+ snd_card_free(card);
+ return err;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Will be explained in the section <link
+ linkend="card-management-registration"><citetitle>Management
+ of Cards and Components</citetitle></link>, too.
+ </para>
+ </section>
+
+ <section id="basic-flow-constructor-set-pci">
+ <title>7) Set the PCI driver data and return zero.</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ pci_set_drvdata(pci, card);
+ dev++;
+ return 0;
+]]>
+ </programlisting>
+ </informalexample>
+
+ In the above, the card record is stored. This pointer is
+ used in the remove callback and power-management
+ callbacks, too.
+ </para>
+ </section>
+ </section>
+
+ <section id="basic-flow-destructor">
+ <title>Destructor</title>
+ <para>
+ The destructor, remove callback, simply releases the card
+ instance. Then the ALSA middle layer will release all the
+ attached components automatically.
+ </para>
+
+ <para>
+ It would be typically like the following:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void snd_mychip_remove(struct pci_dev *pci)
+ {
+ snd_card_free(pci_get_drvdata(pci));
+ pci_set_drvdata(pci, NULL);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ The above code assumes that the card pointer is set to the PCI
+ driver data.
+ </para>
+ </section>
+
+ <section id="basic-flow-header-files">
+ <title>Header Files</title>
+ <para>
+ For the above example, at least the following include files
+ are necessary.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ #include <linux/init.h>
+ #include <linux/pci.h>
+ #include <linux/slab.h>
+ #include <sound/core.h>
+ #include <sound/initval.h>
+]]>
+ </programlisting>
+ </informalexample>
+
+ where the last one is necessary only when module options are
+ defined in the source file. If the code is split into several
+ files, the files without module options don't need them.
+ </para>
+
+ <para>
+ In addition to these headers, you'll need
+ <filename>&lt;linux/interrupt.h&gt;</filename> for interrupt
+ handling, and <filename>&lt;asm/io.h&gt;</filename> for I/O
+ access. If you use the <function>mdelay()</function> or
+ <function>udelay()</function> functions, you'll need to include
+ <filename>&lt;linux/delay.h&gt;</filename> too.
+ </para>
+
+ <para>
+ The ALSA interfaces like the PCM and control APIs are defined in other
+ <filename>&lt;sound/xxx.h&gt;</filename> header files.
+ They have to be included after
+ <filename>&lt;sound/core.h&gt;</filename>.
+ </para>
+
+ </section>
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Management of Cards and Components -->
+<!-- ****************************************************** -->
+ <chapter id="card-management">
+ <title>Management of Cards and Components</title>
+
+ <section id="card-management-card-instance">
+ <title>Card Instance</title>
+ <para>
+ For each soundcard, a <quote>card</quote> record must be allocated.
+ </para>
+
+ <para>
+ A card record is the headquarters of the soundcard. It manages
+ the whole list of devices (components) on the soundcard, such as
+ PCM, mixers, MIDI, synthesizer, and so on. Also, the card
+ record holds the ID and the name strings of the card, manages
+ the root of proc files, and controls the power-management states
+ and hotplug disconnections. The component list on the card
+ record is used to manage the correct release of resources at
+ destruction.
+ </para>
+
+ <para>
+ As mentioned above, to create a card instance, call
+ <function>snd_card_new()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_card *card;
+ int err;
+ err = snd_card_new(&pci->dev, index, id, module, extra_size, &card);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The function takes six arguments: the parent device pointer,
+ the card-index number, the id string, the module pointer (usually
+ <constant>THIS_MODULE</constant>),
+ the size of extra-data space, and the pointer to return the
+ card instance. The extra_size argument is used to
+ allocate card-&gt;private_data for the
+ chip-specific data. Note that these data
+ are allocated by <function>snd_card_new()</function>.
+ </para>
+
+ <para>
+ The first argument, the pointer of struct
+ <structname>device</structname>, specifies the parent device.
+ For PCI devices, typically &amp;pci-&gt; is passed there.
+ </para>
+ </section>
+
+ <section id="card-management-component">
+ <title>Components</title>
+ <para>
+ After the card is created, you can attach the components
+ (devices) to the card instance. In an ALSA driver, a component is
+ represented as a struct <structname>snd_device</structname> object.
+ A component can be a PCM instance, a control interface, a raw
+ MIDI interface, etc. Each such instance has one component
+ entry.
+ </para>
+
+ <para>
+ A component can be created via
+ <function>snd_device_new()</function> function.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_device_new(card, SNDRV_DEV_XXX, chip, &ops);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ This takes the card pointer, the device-level
+ (<constant>SNDRV_DEV_XXX</constant>), the data pointer, and the
+ callback pointers (<parameter>&amp;ops</parameter>). The
+ device-level defines the type of components and the order of
+ registration and de-registration. For most components, the
+ device-level is already defined. For a user-defined component,
+ you can use <constant>SNDRV_DEV_LOWLEVEL</constant>.
+ </para>
+
+ <para>
+ This function itself doesn't allocate the data space. The data
+ must be allocated manually beforehand, and its pointer is passed
+ as the argument. This pointer (<parameter>chip</parameter> in the
+ above example) is used as the identifier for the instance.
+ </para>
+
+ <para>
+ Each pre-defined ALSA component such as ac97 and pcm calls
+ <function>snd_device_new()</function> inside its
+ constructor. The destructor for each component is defined in the
+ callback pointers. Hence, you don't need to take care of
+ calling a destructor for such a component.
+ </para>
+
+ <para>
+ If you wish to create your own component, you need to
+ set the destructor function to the dev_free callback in
+ the <parameter>ops</parameter>, so that it can be released
+ automatically via <function>snd_card_free()</function>.
+ The next example will show an implementation of chip-specific
+ data.
+ </para>
+ </section>
+
+ <section id="card-management-chip-specific">
+ <title>Chip-Specific Data</title>
+ <para>
+ Chip-specific information, e.g. the I/O port address, its
+ resource pointer, or the irq number, is stored in the
+ chip-specific record.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mychip {
+ ....
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ In general, there are two ways of allocating the chip record.
+ </para>
+
+ <section id="card-management-chip-specific-snd-card-new">
+ <title>1. Allocating via <function>snd_card_new()</function>.</title>
+ <para>
+ As mentioned above, you can pass the extra-data-length
+ to the 5th argument of <function>snd_card_new()</function>, i.e.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ sizeof(struct mychip), &card);
+]]>
+ </programlisting>
+ </informalexample>
+
+ struct <structname>mychip</structname> is the type of the chip record.
+ </para>
+
+ <para>
+ In return, the allocated record can be accessed as
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mychip *chip = card->private_data;
+]]>
+ </programlisting>
+ </informalexample>
+
+ With this method, you don't have to allocate twice.
+ The record is released together with the card instance.
+ </para>
+ </section>
+
+ <section id="card-management-chip-specific-allocate-extra">
+ <title>2. Allocating an extra device.</title>
+
+ <para>
+ After allocating a card instance via
+ <function>snd_card_new()</function> (with
+ <constant>0</constant> on the 4th arg), call
+ <function>kzalloc()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_card *card;
+ struct mychip *chip;
+ err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ 0, &card);
+ .....
+ chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The chip record should have the field to hold the card
+ pointer at least,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mychip {
+ struct snd_card *card;
+ ....
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Then, set the card pointer in the returned chip instance.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ chip->card = card;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Next, initialize the fields, and register this chip
+ record as a low-level device with a specified
+ <parameter>ops</parameter>,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct snd_device_ops ops = {
+ .dev_free = snd_mychip_dev_free,
+ };
+ ....
+ snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+]]>
+ </programlisting>
+ </informalexample>
+
+ <function>snd_mychip_dev_free()</function> is the
+ device-destructor function, which will call the real
+ destructor.
+ </para>
+
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_mychip_dev_free(struct snd_device *device)
+ {
+ return snd_mychip_free(device->device_data);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ where <function>snd_mychip_free()</function> is the real destructor.
+ </para>
+ </section>
+ </section>
+
+ <section id="card-management-registration">
+ <title>Registration and Release</title>
+ <para>
+ After all components are assigned, register the card instance
+ by calling <function>snd_card_register()</function>. Access
+ to the device files is enabled at this point. That is, before
+ <function>snd_card_register()</function> is called, the
+ components are safely inaccessible from external side. If this
+ call fails, exit the probe function after releasing the card via
+ <function>snd_card_free()</function>.
+ </para>
+
+ <para>
+ For releasing the card instance, you can call simply
+ <function>snd_card_free()</function>. As mentioned earlier, all
+ components are released automatically by this call.
+ </para>
+
+ <para>
+ For a device which allows hotplugging, you can use
+ <function>snd_card_free_when_closed</function>. This one will
+ postpone the destruction until all devices are closed.
+ </para>
+
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- PCI Resource Management -->
+<!-- ****************************************************** -->
+ <chapter id="pci-resource">
+ <title>PCI Resource Management</title>
+
+ <section id="pci-resource-example">
+ <title>Full Code Example</title>
+ <para>
+ In this section, we'll complete the chip-specific constructor,
+ destructor and PCI entries. Example code is shown first,
+ below.
+
+ <example>
+ <title>PCI Resource Management Example</title>
+ <programlisting>
+<![CDATA[
+ struct mychip {
+ struct snd_card *card;
+ struct pci_dev *pci;
+
+ unsigned long port;
+ int irq;
+ };
+
+ static int snd_mychip_free(struct mychip *chip)
+ {
+ /* disable hardware here if any */
+ .... /* (not implemented in this document) */
+
+ /* release the irq */
+ if (chip->irq >= 0)
+ free_irq(chip->irq, chip);
+ /* release the I/O ports & memory */
+ pci_release_regions(chip->pci);
+ /* disable the PCI entry */
+ pci_disable_device(chip->pci);
+ /* release the data */
+ kfree(chip);
+ return 0;
+ }
+
+ /* chip-specific constructor */
+ static int snd_mychip_create(struct snd_card *card,
+ struct pci_dev *pci,
+ struct mychip **rchip)
+ {
+ struct mychip *chip;
+ int err;
+ static struct snd_device_ops ops = {
+ .dev_free = snd_mychip_dev_free,
+ };
+
+ *rchip = NULL;
+
+ /* initialize the PCI entry */
+ err = pci_enable_device(pci);
+ if (err < 0)
+ return err;
+ /* check PCI availability (28bit DMA) */
+ if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 ||
+ pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) {
+ printk(KERN_ERR "error to set 28bit mask DMA\n");
+ pci_disable_device(pci);
+ return -ENXIO;
+ }
+
+ chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+ if (chip == NULL) {
+ pci_disable_device(pci);
+ return -ENOMEM;
+ }
+
+ /* initialize the stuff */
+ chip->card = card;
+ chip->pci = pci;
+ chip->irq = -1;
+
+ /* (1) PCI resource allocation */
+ err = pci_request_regions(pci, "My Chip");
+ if (err < 0) {
+ kfree(chip);
+ pci_disable_device(pci);
+ return err;
+ }
+ chip->port = pci_resource_start(pci, 0);
+ if (request_irq(pci->irq, snd_mychip_interrupt,
+ IRQF_SHARED, KBUILD_MODNAME, chip)) {
+ printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
+ snd_mychip_free(chip);
+ return -EBUSY;
+ }
+ chip->irq = pci->irq;
+
+ /* (2) initialization of the chip hardware */
+ .... /* (not implemented in this document) */
+
+ err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
+ if (err < 0) {
+ snd_mychip_free(chip);
+ return err;
+ }
+
+ *rchip = chip;
+ return 0;
+ }
+
+ /* PCI IDs */
+ static struct pci_device_id snd_mychip_ids[] = {
+ { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
+ ....
+ { 0, }
+ };
+ MODULE_DEVICE_TABLE(pci, snd_mychip_ids);
+
+ /* pci_driver definition */
+ static struct pci_driver driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_mychip_ids,
+ .probe = snd_mychip_probe,
+ .remove = snd_mychip_remove,
+ };
+
+ /* module initialization */
+ static int __init alsa_card_mychip_init(void)
+ {
+ return pci_register_driver(&driver);
+ }
+
+ /* module clean up */
+ static void __exit alsa_card_mychip_exit(void)
+ {
+ pci_unregister_driver(&driver);
+ }
+
+ module_init(alsa_card_mychip_init)
+ module_exit(alsa_card_mychip_exit)
+
+ EXPORT_NO_SYMBOLS; /* for old kernels only */
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="pci-resource-some-haftas">
+ <title>Some Hafta's</title>
+ <para>
+ The allocation of PCI resources is done in the
+ <function>probe()</function> function, and usually an extra
+ <function>xxx_create()</function> function is written for this
+ purpose.
+ </para>
+
+ <para>
+ In the case of PCI devices, you first have to call
+ the <function>pci_enable_device()</function> function before
+ allocating resources. Also, you need to set the proper PCI DMA
+ mask to limit the accessed I/O range. In some cases, you might
+ need to call <function>pci_set_master()</function> function,
+ too.
+ </para>
+
+ <para>
+ Suppose the 28bit mask, and the code to be added would be like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ err = pci_enable_device(pci);
+ if (err < 0)
+ return err;
+ if (pci_set_dma_mask(pci, DMA_BIT_MASK(28)) < 0 ||
+ pci_set_consistent_dma_mask(pci, DMA_BIT_MASK(28)) < 0) {
+ printk(KERN_ERR "error to set 28bit mask DMA\n");
+ pci_disable_device(pci);
+ return -ENXIO;
+ }
+
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ <section id="pci-resource-resource-allocation">
+ <title>Resource Allocation</title>
+ <para>
+ The allocation of I/O ports and irqs is done via standard kernel
+ functions. Unlike ALSA ver.0.5.x., there are no helpers for
+ that. And these resources must be released in the destructor
+ function (see below). Also, on ALSA 0.9.x, you don't need to
+ allocate (pseudo-)DMA for PCI like in ALSA 0.5.x.
+ </para>
+
+ <para>
+ Now assume that the PCI device has an I/O port with 8 bytes
+ and an interrupt. Then struct <structname>mychip</structname> will have the
+ following fields:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mychip {
+ struct snd_card *card;
+
+ unsigned long port;
+ int irq;
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ For an I/O port (and also a memory region), you need to have
+ the resource pointer for the standard resource management. For
+ an irq, you have to keep only the irq number (integer). But you
+ need to initialize this number as -1 before actual allocation,
+ since irq 0 is valid. The port address and its resource pointer
+ can be initialized as null by
+ <function>kzalloc()</function> automatically, so you
+ don't have to take care of resetting them.
+ </para>
+
+ <para>
+ The allocation of an I/O port is done like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ err = pci_request_regions(pci, "My Chip");
+ if (err < 0) {
+ kfree(chip);
+ pci_disable_device(pci);
+ return err;
+ }
+ chip->port = pci_resource_start(pci, 0);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ <!-- obsolete -->
+ It will reserve the I/O port region of 8 bytes of the given
+ PCI device. The returned value, chip-&gt;res_port, is allocated
+ via <function>kmalloc()</function> by
+ <function>request_region()</function>. The pointer must be
+ released via <function>kfree()</function>, but there is a
+ problem with this. This issue will be explained later.
+ </para>
+
+ <para>
+ The allocation of an interrupt source is done like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ if (request_irq(pci->irq, snd_mychip_interrupt,
+ IRQF_SHARED, KBUILD_MODNAME, chip)) {
+ printk(KERN_ERR "cannot grab irq %d\n", pci->irq);
+ snd_mychip_free(chip);
+ return -EBUSY;
+ }
+ chip->irq = pci->irq;
+]]>
+ </programlisting>
+ </informalexample>
+
+ where <function>snd_mychip_interrupt()</function> is the
+ interrupt handler defined <link
+ linkend="pcm-interface-interrupt-handler"><citetitle>later</citetitle></link>.
+ Note that chip-&gt;irq should be defined
+ only when <function>request_irq()</function> succeeded.
+ </para>
+
+ <para>
+ On the PCI bus, interrupts can be shared. Thus,
+ <constant>IRQF_SHARED</constant> is used as the interrupt flag of
+ <function>request_irq()</function>.
+ </para>
+
+ <para>
+ The last argument of <function>request_irq()</function> is the
+ data pointer passed to the interrupt handler. Usually, the
+ chip-specific record is used for that, but you can use what you
+ like, too.
+ </para>
+
+ <para>
+ I won't give details about the interrupt handler at this
+ point, but at least its appearance can be explained now. The
+ interrupt handler looks usually like the following:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static irqreturn_t snd_mychip_interrupt(int irq, void *dev_id)
+ {
+ struct mychip *chip = dev_id;
+ ....
+ return IRQ_HANDLED;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Now let's write the corresponding destructor for the resources
+ above. The role of destructor is simple: disable the hardware
+ (if already activated) and release the resources. So far, we
+ have no hardware part, so the disabling code is not written here.
+ </para>
+
+ <para>
+ To release the resources, the <quote>check-and-release</quote>
+ method is a safer way. For the interrupt, do like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ if (chip->irq >= 0)
+ free_irq(chip->irq, chip);
+]]>
+ </programlisting>
+ </informalexample>
+
+ Since the irq number can start from 0, you should initialize
+ chip-&gt;irq with a negative value (e.g. -1), so that you can
+ check the validity of the irq number as above.
+ </para>
+
+ <para>
+ When you requested I/O ports or memory regions via
+ <function>pci_request_region()</function> or
+ <function>pci_request_regions()</function> like in this example,
+ release the resource(s) using the corresponding function,
+ <function>pci_release_region()</function> or
+ <function>pci_release_regions()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ pci_release_regions(chip->pci);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ When you requested manually via <function>request_region()</function>
+ or <function>request_mem_region</function>, you can release it via
+ <function>release_resource()</function>. Suppose that you keep
+ the resource pointer returned from <function>request_region()</function>
+ in chip-&gt;res_port, the release procedure looks like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ release_and_free_resource(chip->res_port);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Don't forget to call <function>pci_disable_device()</function>
+ before the end.
+ </para>
+
+ <para>
+ And finally, release the chip-specific record.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ kfree(chip);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ We didn't implement the hardware disabling part in the above.
+ If you need to do this, please note that the destructor may be
+ called even before the initialization of the chip is completed.
+ It would be better to have a flag to skip hardware disabling
+ if the hardware was not initialized yet.
+ </para>
+
+ <para>
+ When the chip-data is assigned to the card using
+ <function>snd_device_new()</function> with
+ <constant>SNDRV_DEV_LOWLELVEL</constant> , its destructor is
+ called at the last. That is, it is assured that all other
+ components like PCMs and controls have already been released.
+ You don't have to stop PCMs, etc. explicitly, but just
+ call low-level hardware stopping.
+ </para>
+
+ <para>
+ The management of a memory-mapped region is almost as same as
+ the management of an I/O port. You'll need three fields like
+ the following:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mychip {
+ ....
+ unsigned long iobase_phys;
+ void __iomem *iobase_virt;
+ };
+]]>
+ </programlisting>
+ </informalexample>
+
+ and the allocation would be like below:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ if ((err = pci_request_regions(pci, "My Chip")) < 0) {
+ kfree(chip);
+ return err;
+ }
+ chip->iobase_phys = pci_resource_start(pci, 0);
+ chip->iobase_virt = ioremap_nocache(chip->iobase_phys,
+ pci_resource_len(pci, 0));
+]]>
+ </programlisting>
+ </informalexample>
+
+ and the corresponding destructor would be:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_mychip_free(struct mychip *chip)
+ {
+ ....
+ if (chip->iobase_virt)
+ iounmap(chip->iobase_virt);
+ ....
+ pci_release_regions(chip->pci);
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ </section>
+
+ <section id="pci-resource-entries">
+ <title>PCI Entries</title>
+ <para>
+ So far, so good. Let's finish the missing PCI
+ stuff. At first, we need a
+ <structname>pci_device_id</structname> table for this
+ chipset. It's a table of PCI vendor/device ID number, and some
+ masks.
+ </para>
+
+ <para>
+ For example,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct pci_device_id snd_mychip_ids[] = {
+ { PCI_VENDOR_ID_FOO, PCI_DEVICE_ID_BAR,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, },
+ ....
+ { 0, }
+ };
+ MODULE_DEVICE_TABLE(pci, snd_mychip_ids);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The first and second fields of
+ the <structname>pci_device_id</structname> structure are the vendor and
+ device IDs. If you have no reason to filter the matching
+ devices, you can leave the remaining fields as above. The last
+ field of the <structname>pci_device_id</structname> struct contains
+ private data for this entry. You can specify any value here, for
+ example, to define specific operations for supported device IDs.
+ Such an example is found in the intel8x0 driver.
+ </para>
+
+ <para>
+ The last entry of this list is the terminator. You must
+ specify this all-zero entry.
+ </para>
+
+ <para>
+ Then, prepare the <structname>pci_driver</structname> record:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct pci_driver driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_mychip_ids,
+ .probe = snd_mychip_probe,
+ .remove = snd_mychip_remove,
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The <structfield>probe</structfield> and
+ <structfield>remove</structfield> functions have already
+ been defined in the previous sections.
+ The <structfield>name</structfield>
+ field is the name string of this device. Note that you must not
+ use a slash <quote>/</quote> in this string.
+ </para>
+
+ <para>
+ And at last, the module entries:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int __init alsa_card_mychip_init(void)
+ {
+ return pci_register_driver(&driver);
+ }
+
+ static void __exit alsa_card_mychip_exit(void)
+ {
+ pci_unregister_driver(&driver);
+ }
+
+ module_init(alsa_card_mychip_init)
+ module_exit(alsa_card_mychip_exit)
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Note that these module entries are tagged with
+ <parameter>__init</parameter> and
+ <parameter>__exit</parameter> prefixes.
+ </para>
+
+ <para>
+ Oh, one thing was forgotten. If you have no exported symbols,
+ you need to declare it in 2.2 or 2.4 kernels (it's not necessary in 2.6 kernels).
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ EXPORT_NO_SYMBOLS;
+]]>
+ </programlisting>
+ </informalexample>
+
+ That's all!
+ </para>
+ </section>
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- PCM Interface -->
+<!-- ****************************************************** -->
+ <chapter id="pcm-interface">
+ <title>PCM Interface</title>
+
+ <section id="pcm-interface-general">
+ <title>General</title>
+ <para>
+ The PCM middle layer of ALSA is quite powerful and it is only
+ necessary for each driver to implement the low-level functions
+ to access its hardware.
+ </para>
+
+ <para>
+ For accessing to the PCM layer, you need to include
+ <filename>&lt;sound/pcm.h&gt;</filename> first. In addition,
+ <filename>&lt;sound/pcm_params.h&gt;</filename> might be needed
+ if you access to some functions related with hw_param.
+ </para>
+
+ <para>
+ Each card device can have up to four pcm instances. A pcm
+ instance corresponds to a pcm device file. The limitation of
+ number of instances comes only from the available bit size of
+ the Linux's device numbers. Once when 64bit device number is
+ used, we'll have more pcm instances available.
+ </para>
+
+ <para>
+ A pcm instance consists of pcm playback and capture streams,
+ and each pcm stream consists of one or more pcm substreams. Some
+ soundcards support multiple playback functions. For example,
+ emu10k1 has a PCM playback of 32 stereo substreams. In this case, at
+ each open, a free substream is (usually) automatically chosen
+ and opened. Meanwhile, when only one substream exists and it was
+ already opened, the successful open will either block
+ or error with <constant>EAGAIN</constant> according to the
+ file open mode. But you don't have to care about such details in your
+ driver. The PCM middle layer will take care of such work.
+ </para>
+ </section>
+
+ <section id="pcm-interface-example">
+ <title>Full Code Example</title>
+ <para>
+ The example code below does not include any hardware access
+ routines but shows only the skeleton, how to build up the PCM
+ interfaces.
+
+ <example>
+ <title>PCM Example Code</title>
+ <programlisting>
+<![CDATA[
+ #include <sound/pcm.h>
+ ....
+
+ /* hardware definition */
+ static struct snd_pcm_hardware snd_mychip_playback_hw = {
+ .info = (SNDRV_PCM_INFO_MMAP |
+ SNDRV_PCM_INFO_INTERLEAVED |
+ SNDRV_PCM_INFO_BLOCK_TRANSFER |
+ SNDRV_PCM_INFO_MMAP_VALID),
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
+ .rates = SNDRV_PCM_RATE_8000_48000,
+ .rate_min = 8000,
+ .rate_max = 48000,
+ .channels_min = 2,
+ .channels_max = 2,
+ .buffer_bytes_max = 32768,
+ .period_bytes_min = 4096,
+ .period_bytes_max = 32768,
+ .periods_min = 1,
+ .periods_max = 1024,
+ };
+
+ /* hardware definition */
+ static struct snd_pcm_hardware snd_mychip_capture_hw = {
+ .info = (SNDRV_PCM_INFO_MMAP |
+ SNDRV_PCM_INFO_INTERLEAVED |
+ SNDRV_PCM_INFO_BLOCK_TRANSFER |
+ SNDRV_PCM_INFO_MMAP_VALID),
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
+ .rates = SNDRV_PCM_RATE_8000_48000,
+ .rate_min = 8000,
+ .rate_max = 48000,
+ .channels_min = 2,
+ .channels_max = 2,
+ .buffer_bytes_max = 32768,
+ .period_bytes_min = 4096,
+ .period_bytes_max = 32768,
+ .periods_min = 1,
+ .periods_max = 1024,
+ };
+
+ /* open callback */
+ static int snd_mychip_playback_open(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ struct snd_pcm_runtime *runtime = substream->runtime;
+
+ runtime->hw = snd_mychip_playback_hw;
+ /* more hardware-initialization will be done here */
+ ....
+ return 0;
+ }
+
+ /* close callback */
+ static int snd_mychip_playback_close(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ /* the hardware-specific codes will be here */
+ ....
+ return 0;
+
+ }
+
+ /* open callback */
+ static int snd_mychip_capture_open(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ struct snd_pcm_runtime *runtime = substream->runtime;
+
+ runtime->hw = snd_mychip_capture_hw;
+ /* more hardware-initialization will be done here */
+ ....
+ return 0;
+ }
+
+ /* close callback */
+ static int snd_mychip_capture_close(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ /* the hardware-specific codes will be here */
+ ....
+ return 0;
+
+ }
+
+ /* hw_params callback */
+ static int snd_mychip_pcm_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *hw_params)
+ {
+ return snd_pcm_lib_malloc_pages(substream,
+ params_buffer_bytes(hw_params));
+ }
+
+ /* hw_free callback */
+ static int snd_mychip_pcm_hw_free(struct snd_pcm_substream *substream)
+ {
+ return snd_pcm_lib_free_pages(substream);
+ }
+
+ /* prepare callback */
+ static int snd_mychip_pcm_prepare(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ struct snd_pcm_runtime *runtime = substream->runtime;
+
+ /* set up the hardware with the current configuration
+ * for example...
+ */
+ mychip_set_sample_format(chip, runtime->format);
+ mychip_set_sample_rate(chip, runtime->rate);
+ mychip_set_channels(chip, runtime->channels);
+ mychip_set_dma_setup(chip, runtime->dma_addr,
+ chip->buffer_size,
+ chip->period_size);
+ return 0;
+ }
+
+ /* trigger callback */
+ static int snd_mychip_pcm_trigger(struct snd_pcm_substream *substream,
+ int cmd)
+ {
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ /* do something to start the PCM engine */
+ ....
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ /* do something to stop the PCM engine */
+ ....
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ /* pointer callback */
+ static snd_pcm_uframes_t
+ snd_mychip_pcm_pointer(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ unsigned int current_ptr;
+
+ /* get the current hardware pointer */
+ current_ptr = mychip_get_hw_pointer(chip);
+ return current_ptr;
+ }
+
+ /* operators */
+ static struct snd_pcm_ops snd_mychip_playback_ops = {
+ .open = snd_mychip_playback_open,
+ .close = snd_mychip_playback_close,
+ .ioctl = snd_pcm_lib_ioctl,
+ .hw_params = snd_mychip_pcm_hw_params,
+ .hw_free = snd_mychip_pcm_hw_free,
+ .prepare = snd_mychip_pcm_prepare,
+ .trigger = snd_mychip_pcm_trigger,
+ .pointer = snd_mychip_pcm_pointer,
+ };
+
+ /* operators */
+ static struct snd_pcm_ops snd_mychip_capture_ops = {
+ .open = snd_mychip_capture_open,
+ .close = snd_mychip_capture_close,
+ .ioctl = snd_pcm_lib_ioctl,
+ .hw_params = snd_mychip_pcm_hw_params,
+ .hw_free = snd_mychip_pcm_hw_free,
+ .prepare = snd_mychip_pcm_prepare,
+ .trigger = snd_mychip_pcm_trigger,
+ .pointer = snd_mychip_pcm_pointer,
+ };
+
+ /*
+ * definitions of capture are omitted here...
+ */
+
+ /* create a pcm device */
+ static int snd_mychip_new_pcm(struct mychip *chip)
+ {
+ struct snd_pcm *pcm;
+ int err;
+
+ err = snd_pcm_new(chip->card, "My Chip", 0, 1, 1, &pcm);
+ if (err < 0)
+ return err;
+ pcm->private_data = chip;
+ strcpy(pcm->name, "My Chip");
+ chip->pcm = pcm;
+ /* set operators */
+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK,
+ &snd_mychip_playback_ops);
+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE,
+ &snd_mychip_capture_ops);
+ /* pre-allocation of buffers */
+ /* NOTE: this may fail */
+ snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+ snd_dma_pci_data(chip->pci),
+ 64*1024, 64*1024);
+ return 0;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="pcm-interface-constructor">
+ <title>Constructor</title>
+ <para>
+ A pcm instance is allocated by the <function>snd_pcm_new()</function>
+ function. It would be better to create a constructor for pcm,
+ namely,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_mychip_new_pcm(struct mychip *chip)
+ {
+ struct snd_pcm *pcm;
+ int err;
+
+ err = snd_pcm_new(chip->card, "My Chip", 0, 1, 1, &pcm);
+ if (err < 0)
+ return err;
+ pcm->private_data = chip;
+ strcpy(pcm->name, "My Chip");
+ chip->pcm = pcm;
+ ....
+ return 0;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The <function>snd_pcm_new()</function> function takes four
+ arguments. The first argument is the card pointer to which this
+ pcm is assigned, and the second is the ID string.
+ </para>
+
+ <para>
+ The third argument (<parameter>index</parameter>, 0 in the
+ above) is the index of this new pcm. It begins from zero. If
+ you create more than one pcm instances, specify the
+ different numbers in this argument. For example,
+ <parameter>index</parameter> = 1 for the second PCM device.
+ </para>
+
+ <para>
+ The fourth and fifth arguments are the number of substreams
+ for playback and capture, respectively. Here 1 is used for
+ both arguments. When no playback or capture substreams are available,
+ pass 0 to the corresponding argument.
+ </para>
+
+ <para>
+ If a chip supports multiple playbacks or captures, you can
+ specify more numbers, but they must be handled properly in
+ open/close, etc. callbacks. When you need to know which
+ substream you are referring to, then it can be obtained from
+ struct <structname>snd_pcm_substream</structname> data passed to each callback
+ as follows:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_pcm_substream *substream;
+ int index = substream->number;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ After the pcm is created, you need to set operators for each
+ pcm stream.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK,
+ &snd_mychip_playback_ops);
+ snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE,
+ &snd_mychip_capture_ops);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The operators are defined typically like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct snd_pcm_ops snd_mychip_playback_ops = {
+ .open = snd_mychip_pcm_open,
+ .close = snd_mychip_pcm_close,
+ .ioctl = snd_pcm_lib_ioctl,
+ .hw_params = snd_mychip_pcm_hw_params,
+ .hw_free = snd_mychip_pcm_hw_free,
+ .prepare = snd_mychip_pcm_prepare,
+ .trigger = snd_mychip_pcm_trigger,
+ .pointer = snd_mychip_pcm_pointer,
+ };
+]]>
+ </programlisting>
+ </informalexample>
+
+ All the callbacks are described in the
+ <link linkend="pcm-interface-operators"><citetitle>
+ Operators</citetitle></link> subsection.
+ </para>
+
+ <para>
+ After setting the operators, you probably will want to
+ pre-allocate the buffer. For the pre-allocation, simply call
+ the following:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+ snd_dma_pci_data(chip->pci),
+ 64*1024, 64*1024);
+]]>
+ </programlisting>
+ </informalexample>
+
+ It will allocate a buffer up to 64kB as default.
+ Buffer management details will be described in the later section <link
+ linkend="buffer-and-memory"><citetitle>Buffer and Memory
+ Management</citetitle></link>.
+ </para>
+
+ <para>
+ Additionally, you can set some extra information for this pcm
+ in pcm-&gt;info_flags.
+ The available values are defined as
+ <constant>SNDRV_PCM_INFO_XXX</constant> in
+ <filename>&lt;sound/asound.h&gt;</filename>, which is used for
+ the hardware definition (described later). When your soundchip
+ supports only half-duplex, specify like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ pcm->info_flags = SNDRV_PCM_INFO_HALF_DUPLEX;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ <section id="pcm-interface-destructor">
+ <title>... And the Destructor?</title>
+ <para>
+ The destructor for a pcm instance is not always
+ necessary. Since the pcm device will be released by the middle
+ layer code automatically, you don't have to call the destructor
+ explicitly.
+ </para>
+
+ <para>
+ The destructor would be necessary if you created
+ special records internally and needed to release them. In such a
+ case, set the destructor function to
+ pcm-&gt;private_free:
+
+ <example>
+ <title>PCM Instance with a Destructor</title>
+ <programlisting>
+<![CDATA[
+ static void mychip_pcm_free(struct snd_pcm *pcm)
+ {
+ struct mychip *chip = snd_pcm_chip(pcm);
+ /* free your own data */
+ kfree(chip->my_private_pcm_data);
+ /* do what you like else */
+ ....
+ }
+
+ static int snd_mychip_new_pcm(struct mychip *chip)
+ {
+ struct snd_pcm *pcm;
+ ....
+ /* allocate your own data */
+ chip->my_private_pcm_data = kmalloc(...);
+ /* set the destructor */
+ pcm->private_data = chip;
+ pcm->private_free = mychip_pcm_free;
+ ....
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="pcm-interface-runtime">
+ <title>Runtime Pointer - The Chest of PCM Information</title>
+ <para>
+ When the PCM substream is opened, a PCM runtime instance is
+ allocated and assigned to the substream. This pointer is
+ accessible via <constant>substream-&gt;runtime</constant>.
+ This runtime pointer holds most information you need
+ to control the PCM: the copy of hw_params and sw_params configurations, the buffer
+ pointers, mmap records, spinlocks, etc.
+ </para>
+
+ <para>
+ The definition of runtime instance is found in
+ <filename>&lt;sound/pcm.h&gt;</filename>. Here are
+ the contents of this file:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+struct _snd_pcm_runtime {
+ /* -- Status -- */
+ struct snd_pcm_substream *trigger_master;
+ snd_timestamp_t trigger_tstamp; /* trigger timestamp */
+ int overrange;
+ snd_pcm_uframes_t avail_max;
+ snd_pcm_uframes_t hw_ptr_base; /* Position at buffer restart */
+ snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time*/
+
+ /* -- HW params -- */
+ snd_pcm_access_t access; /* access mode */
+ snd_pcm_format_t format; /* SNDRV_PCM_FORMAT_* */
+ snd_pcm_subformat_t subformat; /* subformat */
+ unsigned int rate; /* rate in Hz */
+ unsigned int channels; /* channels */
+ snd_pcm_uframes_t period_size; /* period size */
+ unsigned int periods; /* periods */
+ snd_pcm_uframes_t buffer_size; /* buffer size */
+ unsigned int tick_time; /* tick time */
+ snd_pcm_uframes_t min_align; /* Min alignment for the format */
+ size_t byte_align;
+ unsigned int frame_bits;
+ unsigned int sample_bits;
+ unsigned int info;
+ unsigned int rate_num;
+ unsigned int rate_den;
+
+ /* -- SW params -- */
+ struct timespec tstamp_mode; /* mmap timestamp is updated */
+ unsigned int period_step;
+ unsigned int sleep_min; /* min ticks to sleep */
+ snd_pcm_uframes_t start_threshold;
+ snd_pcm_uframes_t stop_threshold;
+ snd_pcm_uframes_t silence_threshold; /* Silence filling happens when
+ noise is nearest than this */
+ snd_pcm_uframes_t silence_size; /* Silence filling size */
+ snd_pcm_uframes_t boundary; /* pointers wrap point */
+
+ snd_pcm_uframes_t silenced_start;
+ snd_pcm_uframes_t silenced_size;
+
+ snd_pcm_sync_id_t sync; /* hardware synchronization ID */
+
+ /* -- mmap -- */
+ volatile struct snd_pcm_mmap_status *status;
+ volatile struct snd_pcm_mmap_control *control;
+ atomic_t mmap_count;
+
+ /* -- locking / scheduling -- */
+ spinlock_t lock;
+ wait_queue_head_t sleep;
+ struct timer_list tick_timer;
+ struct fasync_struct *fasync;
+
+ /* -- private section -- */
+ void *private_data;
+ void (*private_free)(struct snd_pcm_runtime *runtime);
+
+ /* -- hardware description -- */
+ struct snd_pcm_hardware hw;
+ struct snd_pcm_hw_constraints hw_constraints;
+
+ /* -- interrupt callbacks -- */
+ void (*transfer_ack_begin)(struct snd_pcm_substream *substream);
+ void (*transfer_ack_end)(struct snd_pcm_substream *substream);
+
+ /* -- timer -- */
+ unsigned int timer_resolution; /* timer resolution */
+
+ /* -- DMA -- */
+ unsigned char *dma_area; /* DMA area */
+ dma_addr_t dma_addr; /* physical bus address (not accessible from main CPU) */
+ size_t dma_bytes; /* size of DMA area */
+
+ struct snd_dma_buffer *dma_buffer_p; /* allocated buffer */
+
+#if defined(CONFIG_SND_PCM_OSS) || defined(CONFIG_SND_PCM_OSS_MODULE)
+ /* -- OSS things -- */
+ struct snd_pcm_oss_runtime oss;
+#endif
+};
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ For the operators (callbacks) of each sound driver, most of
+ these records are supposed to be read-only. Only the PCM
+ middle-layer changes / updates them. The exceptions are
+ the hardware description (hw), interrupt callbacks
+ (transfer_ack_xxx), DMA buffer information, and the private
+ data. Besides, if you use the standard buffer allocation
+ method via <function>snd_pcm_lib_malloc_pages()</function>,
+ you don't need to set the DMA buffer information by yourself.
+ </para>
+
+ <para>
+ In the sections below, important records are explained.
+ </para>
+
+ <section id="pcm-interface-runtime-hw">
+ <title>Hardware Description</title>
+ <para>
+ The hardware descriptor (struct <structname>snd_pcm_hardware</structname>)
+ contains the definitions of the fundamental hardware
+ configuration. Above all, you'll need to define this in
+ <link linkend="pcm-interface-operators-open-callback"><citetitle>
+ the open callback</citetitle></link>.
+ Note that the runtime instance holds the copy of the
+ descriptor, not the pointer to the existing descriptor. That
+ is, in the open callback, you can modify the copied descriptor
+ (<constant>runtime-&gt;hw</constant>) as you need. For example, if the maximum
+ number of channels is 1 only on some chip models, you can
+ still use the same hardware descriptor and change the
+ channels_max later:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_pcm_runtime *runtime = substream->runtime;
+ ...
+ runtime->hw = snd_mychip_playback_hw; /* common definition */
+ if (chip->model == VERY_OLD_ONE)
+ runtime->hw.channels_max = 1;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Typically, you'll have a hardware descriptor as below:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct snd_pcm_hardware snd_mychip_playback_hw = {
+ .info = (SNDRV_PCM_INFO_MMAP |
+ SNDRV_PCM_INFO_INTERLEAVED |
+ SNDRV_PCM_INFO_BLOCK_TRANSFER |
+ SNDRV_PCM_INFO_MMAP_VALID),
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
+ .rates = SNDRV_PCM_RATE_8000_48000,
+ .rate_min = 8000,
+ .rate_max = 48000,
+ .channels_min = 2,
+ .channels_max = 2,
+ .buffer_bytes_max = 32768,
+ .period_bytes_min = 4096,
+ .period_bytes_max = 32768,
+ .periods_min = 1,
+ .periods_max = 1024,
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ <itemizedlist>
+ <listitem><para>
+ The <structfield>info</structfield> field contains the type and
+ capabilities of this pcm. The bit flags are defined in
+ <filename>&lt;sound/asound.h&gt;</filename> as
+ <constant>SNDRV_PCM_INFO_XXX</constant>. Here, at least, you
+ have to specify whether the mmap is supported and which
+ interleaved format is supported.
+ When the hardware supports mmap, add the
+ <constant>SNDRV_PCM_INFO_MMAP</constant> flag here. When the
+ hardware supports the interleaved or the non-interleaved
+ formats, <constant>SNDRV_PCM_INFO_INTERLEAVED</constant> or
+ <constant>SNDRV_PCM_INFO_NONINTERLEAVED</constant> flag must
+ be set, respectively. If both are supported, you can set both,
+ too.
+ </para>
+
+ <para>
+ In the above example, <constant>MMAP_VALID</constant> and
+ <constant>BLOCK_TRANSFER</constant> are specified for the OSS mmap
+ mode. Usually both are set. Of course,
+ <constant>MMAP_VALID</constant> is set only if the mmap is
+ really supported.
+ </para>
+
+ <para>
+ The other possible flags are
+ <constant>SNDRV_PCM_INFO_PAUSE</constant> and
+ <constant>SNDRV_PCM_INFO_RESUME</constant>. The
+ <constant>PAUSE</constant> bit means that the pcm supports the
+ <quote>pause</quote> operation, while the
+ <constant>RESUME</constant> bit means that the pcm supports
+ the full <quote>suspend/resume</quote> operation.
+ If the <constant>PAUSE</constant> flag is set,
+ the <structfield>trigger</structfield> callback below
+ must handle the corresponding (pause push/release) commands.
+ The suspend/resume trigger commands can be defined even without
+ the <constant>RESUME</constant> flag. See <link
+ linkend="power-management"><citetitle>
+ Power Management</citetitle></link> section for details.
+ </para>
+
+ <para>
+ When the PCM substreams can be synchronized (typically,
+ synchronized start/stop of a playback and a capture streams),
+ you can give <constant>SNDRV_PCM_INFO_SYNC_START</constant>,
+ too. In this case, you'll need to check the linked-list of
+ PCM substreams in the trigger callback. This will be
+ described in the later section.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <structfield>formats</structfield> field contains the bit-flags
+ of supported formats (<constant>SNDRV_PCM_FMTBIT_XXX</constant>).
+ If the hardware supports more than one format, give all or'ed
+ bits. In the example above, the signed 16bit little-endian
+ format is specified.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <structfield>rates</structfield> field contains the bit-flags of
+ supported rates (<constant>SNDRV_PCM_RATE_XXX</constant>).
+ When the chip supports continuous rates, pass
+ <constant>CONTINUOUS</constant> bit additionally.
+ The pre-defined rate bits are provided only for typical
+ rates. If your chip supports unconventional rates, you need to add
+ the <constant>KNOT</constant> bit and set up the hardware
+ constraint manually (explained later).
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <structfield>rate_min</structfield> and
+ <structfield>rate_max</structfield> define the minimum and
+ maximum sample rate. This should correspond somehow to
+ <structfield>rates</structfield> bits.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <structfield>channel_min</structfield> and
+ <structfield>channel_max</structfield>
+ define, as you might already expected, the minimum and maximum
+ number of channels.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <structfield>buffer_bytes_max</structfield> defines the
+ maximum buffer size in bytes. There is no
+ <structfield>buffer_bytes_min</structfield> field, since
+ it can be calculated from the minimum period size and the
+ minimum number of periods.
+ Meanwhile, <structfield>period_bytes_min</structfield> and
+ define the minimum and maximum size of the period in bytes.
+ <structfield>periods_max</structfield> and
+ <structfield>periods_min</structfield> define the maximum and
+ minimum number of periods in the buffer.
+ </para>
+
+ <para>
+ The <quote>period</quote> is a term that corresponds to
+ a fragment in the OSS world. The period defines the size at
+ which a PCM interrupt is generated. This size strongly
+ depends on the hardware.
+ Generally, the smaller period size will give you more
+ interrupts, that is, more controls.
+ In the case of capture, this size defines the input latency.
+ On the other hand, the whole buffer size defines the
+ output latency for the playback direction.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ There is also a field <structfield>fifo_size</structfield>.
+ This specifies the size of the hardware FIFO, but currently it
+ is neither used in the driver nor in the alsa-lib. So, you
+ can ignore this field.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </para>
+ </section>
+
+ <section id="pcm-interface-runtime-config">
+ <title>PCM Configurations</title>
+ <para>
+ Ok, let's go back again to the PCM runtime records.
+ The most frequently referred records in the runtime instance are
+ the PCM configurations.
+ The PCM configurations are stored in the runtime instance
+ after the application sends <type>hw_params</type> data via
+ alsa-lib. There are many fields copied from hw_params and
+ sw_params structs. For example,
+ <structfield>format</structfield> holds the format type
+ chosen by the application. This field contains the enum value
+ <constant>SNDRV_PCM_FORMAT_XXX</constant>.
+ </para>
+
+ <para>
+ One thing to be noted is that the configured buffer and period
+ sizes are stored in <quote>frames</quote> in the runtime.
+ In the ALSA world, 1 frame = channels * samples-size.
+ For conversion between frames and bytes, you can use the
+ <function>frames_to_bytes()</function> and
+ <function>bytes_to_frames()</function> helper functions.
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ period_bytes = frames_to_bytes(runtime, runtime->period_size);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Also, many software parameters (sw_params) are
+ stored in frames, too. Please check the type of the field.
+ <type>snd_pcm_uframes_t</type> is for the frames as unsigned
+ integer while <type>snd_pcm_sframes_t</type> is for the frames
+ as signed integer.
+ </para>
+ </section>
+
+ <section id="pcm-interface-runtime-dma">
+ <title>DMA Buffer Information</title>
+ <para>
+ The DMA buffer is defined by the following four fields,
+ <structfield>dma_area</structfield>,
+ <structfield>dma_addr</structfield>,
+ <structfield>dma_bytes</structfield> and
+ <structfield>dma_private</structfield>.
+ The <structfield>dma_area</structfield> holds the buffer
+ pointer (the logical address). You can call
+ <function>memcpy</function> from/to
+ this pointer. Meanwhile, <structfield>dma_addr</structfield>
+ holds the physical address of the buffer. This field is
+ specified only when the buffer is a linear buffer.
+ <structfield>dma_bytes</structfield> holds the size of buffer
+ in bytes. <structfield>dma_private</structfield> is used for
+ the ALSA DMA allocator.
+ </para>
+
+ <para>
+ If you use a standard ALSA function,
+ <function>snd_pcm_lib_malloc_pages()</function>, for
+ allocating the buffer, these fields are set by the ALSA middle
+ layer, and you should <emphasis>not</emphasis> change them by
+ yourself. You can read them but not write them.
+ On the other hand, if you want to allocate the buffer by
+ yourself, you'll need to manage it in hw_params callback.
+ At least, <structfield>dma_bytes</structfield> is mandatory.
+ <structfield>dma_area</structfield> is necessary when the
+ buffer is mmapped. If your driver doesn't support mmap, this
+ field is not necessary. <structfield>dma_addr</structfield>
+ is also optional. You can use
+ <structfield>dma_private</structfield> as you like, too.
+ </para>
+ </section>
+
+ <section id="pcm-interface-runtime-status">
+ <title>Running Status</title>
+ <para>
+ The running status can be referred via <constant>runtime-&gt;status</constant>.
+ This is the pointer to the struct <structname>snd_pcm_mmap_status</structname>
+ record. For example, you can get the current DMA hardware
+ pointer via <constant>runtime-&gt;status-&gt;hw_ptr</constant>.
+ </para>
+
+ <para>
+ The DMA application pointer can be referred via
+ <constant>runtime-&gt;control</constant>, which points to the
+ struct <structname>snd_pcm_mmap_control</structname> record.
+ However, accessing directly to this value is not recommended.
+ </para>
+ </section>
+
+ <section id="pcm-interface-runtime-private">
+ <title>Private Data</title>
+ <para>
+ You can allocate a record for the substream and store it in
+ <constant>runtime-&gt;private_data</constant>. Usually, this
+ is done in
+ <link linkend="pcm-interface-operators-open-callback"><citetitle>
+ the open callback</citetitle></link>.
+ Don't mix this with <constant>pcm-&gt;private_data</constant>.
+ The <constant>pcm-&gt;private_data</constant> usually points to the
+ chip instance assigned statically at the creation of PCM, while the
+ <constant>runtime-&gt;private_data</constant> points to a dynamic
+ data structure created at the PCM open callback.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_open(struct snd_pcm_substream *substream)
+ {
+ struct my_pcm_data *data;
+ ....
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ substream->runtime->private_data = data;
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The allocated object must be released in
+ <link linkend="pcm-interface-operators-open-callback"><citetitle>
+ the close callback</citetitle></link>.
+ </para>
+ </section>
+
+ <section id="pcm-interface-runtime-intr">
+ <title>Interrupt Callbacks</title>
+ <para>
+ The field <structfield>transfer_ack_begin</structfield> and
+ <structfield>transfer_ack_end</structfield> are called at
+ the beginning and at the end of
+ <function>snd_pcm_period_elapsed()</function>, respectively.
+ </para>
+ </section>
+
+ </section>
+
+ <section id="pcm-interface-operators">
+ <title>Operators</title>
+ <para>
+ OK, now let me give details about each pcm callback
+ (<parameter>ops</parameter>). In general, every callback must
+ return 0 if successful, or a negative error number
+ such as <constant>-EINVAL</constant>. To choose an appropriate
+ error number, it is advised to check what value other parts of
+ the kernel return when the same kind of request fails.
+ </para>
+
+ <para>
+ The callback function takes at least the argument with
+ <structname>snd_pcm_substream</structname> pointer. To retrieve
+ the chip record from the given substream instance, you can use the
+ following macro.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ int xxx() {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ The macro reads <constant>substream-&gt;private_data</constant>,
+ which is a copy of <constant>pcm-&gt;private_data</constant>.
+ You can override the former if you need to assign different data
+ records per PCM substream. For example, the cmi8330 driver assigns
+ different private_data for playback and capture directions,
+ because it uses two different codecs (SB- and AD-compatible) for
+ different directions.
+ </para>
+
+ <section id="pcm-interface-operators-open-callback">
+ <title>open callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_open(struct snd_pcm_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+
+ This is called when a pcm substream is opened.
+ </para>
+
+ <para>
+ At least, here you have to initialize the runtime-&gt;hw
+ record. Typically, this is done by like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_open(struct snd_pcm_substream *substream)
+ {
+ struct mychip *chip = snd_pcm_substream_chip(substream);
+ struct snd_pcm_runtime *runtime = substream->runtime;
+
+ runtime->hw = snd_mychip_playback_hw;
+ return 0;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ where <parameter>snd_mychip_playback_hw</parameter> is the
+ pre-defined hardware description.
+ </para>
+
+ <para>
+ You can allocate a private data in this callback, as described
+ in <link linkend="pcm-interface-runtime-private"><citetitle>
+ Private Data</citetitle></link> section.
+ </para>
+
+ <para>
+ If the hardware configuration needs more constraints, set the
+ hardware constraints here, too.
+ See <link linkend="pcm-interface-constraints"><citetitle>
+ Constraints</citetitle></link> for more details.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-close-callback">
+ <title>close callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_close(struct snd_pcm_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+
+ Obviously, this is called when a pcm substream is closed.
+ </para>
+
+ <para>
+ Any private instance for a pcm substream allocated in the
+ open callback will be released here.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_close(struct snd_pcm_substream *substream)
+ {
+ ....
+ kfree(substream->runtime->private_data);
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-ioctl-callback">
+ <title>ioctl callback</title>
+ <para>
+ This is used for any special call to pcm ioctls. But
+ usually you can pass a generic ioctl callback,
+ <function>snd_pcm_lib_ioctl</function>.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-hw-params-callback">
+ <title>hw_params callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_hw_params(struct snd_pcm_substream *substream,
+ struct snd_pcm_hw_params *hw_params);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ This is called when the hardware parameter
+ (<structfield>hw_params</structfield>) is set
+ up by the application,
+ that is, once when the buffer size, the period size, the
+ format, etc. are defined for the pcm substream.
+ </para>
+
+ <para>
+ Many hardware setups should be done in this callback,
+ including the allocation of buffers.
+ </para>
+
+ <para>
+ Parameters to be initialized are retrieved by
+ <function>params_xxx()</function> macros. To allocate
+ buffer, you can call a helper function,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_lib_malloc_pages(substream, params_buffer_bytes(hw_params));
+]]>
+ </programlisting>
+ </informalexample>
+
+ <function>snd_pcm_lib_malloc_pages()</function> is available
+ only when the DMA buffers have been pre-allocated.
+ See the section <link
+ linkend="buffer-and-memory-buffer-types"><citetitle>
+ Buffer Types</citetitle></link> for more details.
+ </para>
+
+ <para>
+ Note that this and <structfield>prepare</structfield> callbacks
+ may be called multiple times per initialization.
+ For example, the OSS emulation may
+ call these callbacks at each change via its ioctl.
+ </para>
+
+ <para>
+ Thus, you need to be careful not to allocate the same buffers
+ many times, which will lead to memory leaks! Calling the
+ helper function above many times is OK. It will release the
+ previous buffer automatically when it was already allocated.
+ </para>
+
+ <para>
+ Another note is that this callback is non-atomic
+ (schedulable) as default, i.e. when no
+ <structfield>nonatomic</structfield> flag set.
+ This is important, because the
+ <structfield>trigger</structfield> callback
+ is atomic (non-schedulable). That is, mutexes or any
+ schedule-related functions are not available in
+ <structfield>trigger</structfield> callback.
+ Please see the subsection
+ <link linkend="pcm-interface-atomicity"><citetitle>
+ Atomicity</citetitle></link> for details.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-hw-free-callback">
+ <title>hw_free callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_hw_free(struct snd_pcm_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ This is called to release the resources allocated via
+ <structfield>hw_params</structfield>. For example, releasing the
+ buffer via
+ <function>snd_pcm_lib_malloc_pages()</function> is done by
+ calling the following:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_lib_free_pages(substream);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ This function is always called before the close callback is called.
+ Also, the callback may be called multiple times, too.
+ Keep track whether the resource was already released.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-prepare-callback">
+ <title>prepare callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_prepare(struct snd_pcm_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ This callback is called when the pcm is
+ <quote>prepared</quote>. You can set the format type, sample
+ rate, etc. here. The difference from
+ <structfield>hw_params</structfield> is that the
+ <structfield>prepare</structfield> callback will be called each
+ time
+ <function>snd_pcm_prepare()</function> is called, i.e. when
+ recovering after underruns, etc.
+ </para>
+
+ <para>
+ Note that this callback is now non-atomic.
+ You can use schedule-related functions safely in this callback.
+ </para>
+
+ <para>
+ In this and the following callbacks, you can refer to the
+ values via the runtime record,
+ substream-&gt;runtime.
+ For example, to get the current
+ rate, format or channels, access to
+ runtime-&gt;rate,
+ runtime-&gt;format or
+ runtime-&gt;channels, respectively.
+ The physical address of the allocated buffer is set to
+ runtime-&gt;dma_area. The buffer and period sizes are
+ in runtime-&gt;buffer_size and runtime-&gt;period_size,
+ respectively.
+ </para>
+
+ <para>
+ Be careful that this callback will be called many times at
+ each setup, too.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-trigger-callback">
+ <title>trigger callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_trigger(struct snd_pcm_substream *substream, int cmd);
+]]>
+ </programlisting>
+ </informalexample>
+
+ This is called when the pcm is started, stopped or paused.
+ </para>
+
+ <para>
+ Which action is specified in the second argument,
+ <constant>SNDRV_PCM_TRIGGER_XXX</constant> in
+ <filename>&lt;sound/pcm.h&gt;</filename>. At least,
+ the <constant>START</constant> and <constant>STOP</constant>
+ commands must be defined in this callback.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ switch (cmd) {
+ case SNDRV_PCM_TRIGGER_START:
+ /* do something to start the PCM engine */
+ break;
+ case SNDRV_PCM_TRIGGER_STOP:
+ /* do something to stop the PCM engine */
+ break;
+ default:
+ return -EINVAL;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ When the pcm supports the pause operation (given in the info
+ field of the hardware table), the <constant>PAUSE_PUSH</constant>
+ and <constant>PAUSE_RELEASE</constant> commands must be
+ handled here, too. The former is the command to pause the pcm,
+ and the latter to restart the pcm again.
+ </para>
+
+ <para>
+ When the pcm supports the suspend/resume operation,
+ regardless of full or partial suspend/resume support,
+ the <constant>SUSPEND</constant> and <constant>RESUME</constant>
+ commands must be handled, too.
+ These commands are issued when the power-management status is
+ changed. Obviously, the <constant>SUSPEND</constant> and
+ <constant>RESUME</constant> commands
+ suspend and resume the pcm substream, and usually, they
+ are identical to the <constant>STOP</constant> and
+ <constant>START</constant> commands, respectively.
+ See the <link linkend="power-management"><citetitle>
+ Power Management</citetitle></link> section for details.
+ </para>
+
+ <para>
+ As mentioned, this callback is atomic as default unless
+ <structfield>nonatomic</structfield> flag set, and
+ you cannot call functions which may sleep.
+ The trigger callback should be as minimal as possible,
+ just really triggering the DMA. The other stuff should be
+ initialized hw_params and prepare callbacks properly
+ beforehand.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-pointer-callback">
+ <title>pointer callback</title>
+ <para>
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static snd_pcm_uframes_t snd_xxx_pointer(struct snd_pcm_substream *substream)
+]]>
+ </programlisting>
+ </informalexample>
+
+ This callback is called when the PCM middle layer inquires
+ the current hardware position on the buffer. The position must
+ be returned in frames,
+ ranging from 0 to buffer_size - 1.
+ </para>
+
+ <para>
+ This is called usually from the buffer-update routine in the
+ pcm middle layer, which is invoked when
+ <function>snd_pcm_period_elapsed()</function> is called in the
+ interrupt routine. Then the pcm middle layer updates the
+ position and calculates the available space, and wakes up the
+ sleeping poll threads, etc.
+ </para>
+
+ <para>
+ This callback is also atomic as default.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-copy-silence">
+ <title>copy and silence callbacks</title>
+ <para>
+ These callbacks are not mandatory, and can be omitted in
+ most cases. These callbacks are used when the hardware buffer
+ cannot be in the normal memory space. Some chips have their
+ own buffer on the hardware which is not mappable. In such a
+ case, you have to transfer the data manually from the memory
+ buffer to the hardware buffer. Or, if the buffer is
+ non-contiguous on both physical and virtual memory spaces,
+ these callbacks must be defined, too.
+ </para>
+
+ <para>
+ If these two callbacks are defined, copy and set-silence
+ operations are done by them. The detailed will be described in
+ the later section <link
+ linkend="buffer-and-memory"><citetitle>Buffer and Memory
+ Management</citetitle></link>.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-ack">
+ <title>ack callback</title>
+ <para>
+ This callback is also not mandatory. This callback is called
+ when the appl_ptr is updated in read or write operations.
+ Some drivers like emu10k1-fx and cs46xx need to track the
+ current appl_ptr for the internal buffer, and this callback
+ is useful only for such a purpose.
+ </para>
+ <para>
+ This callback is atomic as default.
+ </para>
+ </section>
+
+ <section id="pcm-interface-operators-page-callback">
+ <title>page callback</title>
+
+ <para>
+ This callback is optional too. This callback is used
+ mainly for non-contiguous buffers. The mmap calls this
+ callback to get the page address. Some examples will be
+ explained in the later section <link
+ linkend="buffer-and-memory"><citetitle>Buffer and Memory
+ Management</citetitle></link>, too.
+ </para>
+ </section>
+ </section>
+
+ <section id="pcm-interface-interrupt-handler">
+ <title>Interrupt Handler</title>
+ <para>
+ The rest of pcm stuff is the PCM interrupt handler. The
+ role of PCM interrupt handler in the sound driver is to update
+ the buffer position and to tell the PCM middle layer when the
+ buffer position goes across the prescribed period size. To
+ inform this, call the <function>snd_pcm_period_elapsed()</function>
+ function.
+ </para>
+
+ <para>
+ There are several types of sound chips to generate the interrupts.
+ </para>
+
+ <section id="pcm-interface-interrupt-handler-boundary">
+ <title>Interrupts at the period (fragment) boundary</title>
+ <para>
+ This is the most frequently found type: the hardware
+ generates an interrupt at each period boundary.
+ In this case, you can call
+ <function>snd_pcm_period_elapsed()</function> at each
+ interrupt.
+ </para>
+
+ <para>
+ <function>snd_pcm_period_elapsed()</function> takes the
+ substream pointer as its argument. Thus, you need to keep the
+ substream pointer accessible from the chip instance. For
+ example, define substream field in the chip record to hold the
+ current running substream pointer, and set the pointer value
+ at open callback (and reset at close callback).
+ </para>
+
+ <para>
+ If you acquire a spinlock in the interrupt handler, and the
+ lock is used in other pcm callbacks, too, then you have to
+ release the lock before calling
+ <function>snd_pcm_period_elapsed()</function>, because
+ <function>snd_pcm_period_elapsed()</function> calls other pcm
+ callbacks inside.
+ </para>
+
+ <para>
+ Typical code would be like:
+
+ <example>
+ <title>Interrupt Handler Case #1</title>
+ <programlisting>
+<![CDATA[
+ static irqreturn_t snd_mychip_interrupt(int irq, void *dev_id)
+ {
+ struct mychip *chip = dev_id;
+ spin_lock(&chip->lock);
+ ....
+ if (pcm_irq_invoked(chip)) {
+ /* call updater, unlock before it */
+ spin_unlock(&chip->lock);
+ snd_pcm_period_elapsed(chip->substream);
+ spin_lock(&chip->lock);
+ /* acknowledge the interrupt if necessary */
+ }
+ ....
+ spin_unlock(&chip->lock);
+ return IRQ_HANDLED;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="pcm-interface-interrupt-handler-timer">
+ <title>High frequency timer interrupts</title>
+ <para>
+ This happens when the hardware doesn't generate interrupts
+ at the period boundary but issues timer interrupts at a fixed
+ timer rate (e.g. es1968 or ymfpci drivers).
+ In this case, you need to check the current hardware
+ position and accumulate the processed sample length at each
+ interrupt. When the accumulated size exceeds the period
+ size, call
+ <function>snd_pcm_period_elapsed()</function> and reset the
+ accumulator.
+ </para>
+
+ <para>
+ Typical code would be like the following.
+
+ <example>
+ <title>Interrupt Handler Case #2</title>
+ <programlisting>
+<![CDATA[
+ static irqreturn_t snd_mychip_interrupt(int irq, void *dev_id)
+ {
+ struct mychip *chip = dev_id;
+ spin_lock(&chip->lock);
+ ....
+ if (pcm_irq_invoked(chip)) {
+ unsigned int last_ptr, size;
+ /* get the current hardware pointer (in frames) */
+ last_ptr = get_hw_ptr(chip);
+ /* calculate the processed frames since the
+ * last update
+ */
+ if (last_ptr < chip->last_ptr)
+ size = runtime->buffer_size + last_ptr
+ - chip->last_ptr;
+ else
+ size = last_ptr - chip->last_ptr;
+ /* remember the last updated point */
+ chip->last_ptr = last_ptr;
+ /* accumulate the size */
+ chip->size += size;
+ /* over the period boundary? */
+ if (chip->size >= runtime->period_size) {
+ /* reset the accumulator */
+ chip->size %= runtime->period_size;
+ /* call updater */
+ spin_unlock(&chip->lock);
+ snd_pcm_period_elapsed(substream);
+ spin_lock(&chip->lock);
+ }
+ /* acknowledge the interrupt if necessary */
+ }
+ ....
+ spin_unlock(&chip->lock);
+ return IRQ_HANDLED;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="pcm-interface-interrupt-handler-both">
+ <title>On calling <function>snd_pcm_period_elapsed()</function></title>
+ <para>
+ In both cases, even if more than one period are elapsed, you
+ don't have to call
+ <function>snd_pcm_period_elapsed()</function> many times. Call
+ only once. And the pcm layer will check the current hardware
+ pointer and update to the latest status.
+ </para>
+ </section>
+ </section>
+
+ <section id="pcm-interface-atomicity">
+ <title>Atomicity</title>
+ <para>
+ One of the most important (and thus difficult to debug) problems
+ in kernel programming are race conditions.
+ In the Linux kernel, they are usually avoided via spin-locks, mutexes
+ or semaphores. In general, if a race condition can happen
+ in an interrupt handler, it has to be managed atomically, and you
+ have to use a spinlock to protect the critical session. If the
+ critical section is not in interrupt handler code and
+ if taking a relatively long time to execute is acceptable, you
+ should use mutexes or semaphores instead.
+ </para>
+
+ <para>
+ As already seen, some pcm callbacks are atomic and some are
+ not. For example, the <parameter>hw_params</parameter> callback is
+ non-atomic, while <parameter>trigger</parameter> callback is
+ atomic. This means, the latter is called already in a spinlock
+ held by the PCM middle layer. Please take this atomicity into
+ account when you choose a locking scheme in the callbacks.
+ </para>
+
+ <para>
+ In the atomic callbacks, you cannot use functions which may call
+ <function>schedule</function> or go to
+ <function>sleep</function>. Semaphores and mutexes can sleep,
+ and hence they cannot be used inside the atomic callbacks
+ (e.g. <parameter>trigger</parameter> callback).
+ To implement some delay in such a callback, please use
+ <function>udelay()</function> or <function>mdelay()</function>.
+ </para>
+
+ <para>
+ All three atomic callbacks (trigger, pointer, and ack) are
+ called with local interrupts disabled.
+ </para>
+
+ <para>
+ The recent changes in PCM core code, however, allow all PCM
+ operations to be non-atomic. This assumes that the all caller
+ sides are in non-atomic contexts. For example, the function
+ <function>snd_pcm_period_elapsed()</function> is called
+ typically from the interrupt handler. But, if you set up the
+ driver to use a threaded interrupt handler, this call can be in
+ non-atomic context, too. In such a case, you can set
+ <structfield>nonatomic</structfield> filed of
+ <structname>snd_pcm</structname> object after creating it.
+ When this flag is set, mutex and rwsem are used internally in
+ the PCM core instead of spin and rwlocks, so that you can call
+ all PCM functions safely in a non-atomic context.
+ </para>
+
+ </section>
+ <section id="pcm-interface-constraints">
+ <title>Constraints</title>
+ <para>
+ If your chip supports unconventional sample rates, or only the
+ limited samples, you need to set a constraint for the
+ condition.
+ </para>
+
+ <para>
+ For example, in order to restrict the sample rates in the some
+ supported values, use
+ <function>snd_pcm_hw_constraint_list()</function>.
+ You need to call this function in the open callback.
+
+ <example>
+ <title>Example of Hardware Constraints</title>
+ <programlisting>
+<![CDATA[
+ static unsigned int rates[] =
+ {4000, 10000, 22050, 44100};
+ static struct snd_pcm_hw_constraint_list constraints_rates = {
+ .count = ARRAY_SIZE(rates),
+ .list = rates,
+ .mask = 0,
+ };
+
+ static int snd_mychip_pcm_open(struct snd_pcm_substream *substream)
+ {
+ int err;
+ ....
+ err = snd_pcm_hw_constraint_list(substream->runtime, 0,
+ SNDRV_PCM_HW_PARAM_RATE,
+ &constraints_rates);
+ if (err < 0)
+ return err;
+ ....
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+
+ <para>
+ There are many different constraints.
+ Look at <filename>sound/pcm.h</filename> for a complete list.
+ You can even define your own constraint rules.
+ For example, let's suppose my_chip can manage a substream of 1 channel
+ if and only if the format is S16_LE, otherwise it supports any format
+ specified in the <structname>snd_pcm_hardware</structname> structure (or in any
+ other constraint_list). You can build a rule like this:
+
+ <example>
+ <title>Example of Hardware Constraints for Channels</title>
+ <programlisting>
+<![CDATA[
+ static int hw_rule_channels_by_format(struct snd_pcm_hw_params *params,
+ struct snd_pcm_hw_rule *rule)
+ {
+ struct snd_interval *c = hw_param_interval(params,
+ SNDRV_PCM_HW_PARAM_CHANNELS);
+ struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+ struct snd_interval ch;
+
+ snd_interval_any(&ch);
+ if (f->bits[0] == SNDRV_PCM_FMTBIT_S16_LE) {
+ ch.min = ch.max = 1;
+ ch.integer = 1;
+ return snd_interval_refine(c, &ch);
+ }
+ return 0;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+
+ <para>
+ Then you need to call this function to add your rule:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_hw_rule_add(substream->runtime, 0, SNDRV_PCM_HW_PARAM_CHANNELS,
+ hw_rule_channels_by_format, NULL,
+ SNDRV_PCM_HW_PARAM_FORMAT, -1);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The rule function is called when an application sets the PCM
+ format, and it refines the number of channels accordingly.
+ But an application may set the number of channels before
+ setting the format. Thus you also need to define the inverse rule:
+
+ <example>
+ <title>Example of Hardware Constraints for Formats</title>
+ <programlisting>
+<![CDATA[
+ static int hw_rule_format_by_channels(struct snd_pcm_hw_params *params,
+ struct snd_pcm_hw_rule *rule)
+ {
+ struct snd_interval *c = hw_param_interval(params,
+ SNDRV_PCM_HW_PARAM_CHANNELS);
+ struct snd_mask *f = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+ struct snd_mask fmt;
+
+ snd_mask_any(&fmt); /* Init the struct */
+ if (c->min < 2) {
+ fmt.bits[0] &= SNDRV_PCM_FMTBIT_S16_LE;
+ return snd_mask_refine(f, &fmt);
+ }
+ return 0;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+
+ <para>
+ ...and in the open callback:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_hw_rule_add(substream->runtime, 0, SNDRV_PCM_HW_PARAM_FORMAT,
+ hw_rule_format_by_channels, NULL,
+ SNDRV_PCM_HW_PARAM_CHANNELS, -1);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ I won't give more details here, rather I
+ would like to say, <quote>Luke, use the source.</quote>
+ </para>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Control Interface -->
+<!-- ****************************************************** -->
+ <chapter id="control-interface">
+ <title>Control Interface</title>
+
+ <section id="control-interface-general">
+ <title>General</title>
+ <para>
+ The control interface is used widely for many switches,
+ sliders, etc. which are accessed from user-space. Its most
+ important use is the mixer interface. In other words, since ALSA
+ 0.9.x, all the mixer stuff is implemented on the control kernel API.
+ </para>
+
+ <para>
+ ALSA has a well-defined AC97 control module. If your chip
+ supports only the AC97 and nothing else, you can skip this
+ section.
+ </para>
+
+ <para>
+ The control API is defined in
+ <filename>&lt;sound/control.h&gt;</filename>.
+ Include this file if you want to add your own controls.
+ </para>
+ </section>
+
+ <section id="control-interface-definition">
+ <title>Definition of Controls</title>
+ <para>
+ To create a new control, you need to define the
+ following three
+ callbacks: <structfield>info</structfield>,
+ <structfield>get</structfield> and
+ <structfield>put</structfield>. Then, define a
+ struct <structname>snd_kcontrol_new</structname> record, such as:
+
+ <example>
+ <title>Definition of a Control</title>
+ <programlisting>
+<![CDATA[
+ static struct snd_kcontrol_new my_control = {
+ .iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+ .name = "PCM Playback Switch",
+ .index = 0,
+ .access = SNDRV_CTL_ELEM_ACCESS_READWRITE,
+ .private_value = 0xffff,
+ .info = my_control_info,
+ .get = my_control_get,
+ .put = my_control_put
+ };
+]]>
+ </programlisting>
+ </example>
+ </para>
+
+ <para>
+ The <structfield>iface</structfield> field specifies the control
+ type, <constant>SNDRV_CTL_ELEM_IFACE_XXX</constant>, which
+ is usually <constant>MIXER</constant>.
+ Use <constant>CARD</constant> for global controls that are not
+ logically part of the mixer.
+ If the control is closely associated with some specific device on
+ the sound card, use <constant>HWDEP</constant>,
+ <constant>PCM</constant>, <constant>RAWMIDI</constant>,
+ <constant>TIMER</constant>, or <constant>SEQUENCER</constant>, and
+ specify the device number with the
+ <structfield>device</structfield> and
+ <structfield>subdevice</structfield> fields.
+ </para>
+
+ <para>
+ The <structfield>name</structfield> is the name identifier
+ string. Since ALSA 0.9.x, the control name is very important,
+ because its role is classified from its name. There are
+ pre-defined standard control names. The details are described in
+ the <link linkend="control-interface-control-names"><citetitle>
+ Control Names</citetitle></link> subsection.
+ </para>
+
+ <para>
+ The <structfield>index</structfield> field holds the index number
+ of this control. If there are several different controls with
+ the same name, they can be distinguished by the index
+ number. This is the case when
+ several codecs exist on the card. If the index is zero, you can
+ omit the definition above.
+ </para>
+
+ <para>
+ The <structfield>access</structfield> field contains the access
+ type of this control. Give the combination of bit masks,
+ <constant>SNDRV_CTL_ELEM_ACCESS_XXX</constant>, there.
+ The details will be explained in
+ the <link linkend="control-interface-access-flags"><citetitle>
+ Access Flags</citetitle></link> subsection.
+ </para>
+
+ <para>
+ The <structfield>private_value</structfield> field contains
+ an arbitrary long integer value for this record. When using
+ the generic <structfield>info</structfield>,
+ <structfield>get</structfield> and
+ <structfield>put</structfield> callbacks, you can pass a value
+ through this field. If several small numbers are necessary, you can
+ combine them in bitwise. Or, it's possible to give a pointer
+ (casted to unsigned long) of some record to this field, too.
+ </para>
+
+ <para>
+ The <structfield>tlv</structfield> field can be used to provide
+ metadata about the control; see the
+ <link linkend="control-interface-tlv">
+ <citetitle>Metadata</citetitle></link> subsection.
+ </para>
+
+ <para>
+ The other three are
+ <link linkend="control-interface-callbacks"><citetitle>
+ callback functions</citetitle></link>.
+ </para>
+ </section>
+
+ <section id="control-interface-control-names">
+ <title>Control Names</title>
+ <para>
+ There are some standards to define the control names. A
+ control is usually defined from the three parts as
+ <quote>SOURCE DIRECTION FUNCTION</quote>.
+ </para>
+
+ <para>
+ The first, <constant>SOURCE</constant>, specifies the source
+ of the control, and is a string such as <quote>Master</quote>,
+ <quote>PCM</quote>, <quote>CD</quote> and
+ <quote>Line</quote>. There are many pre-defined sources.
+ </para>
+
+ <para>
+ The second, <constant>DIRECTION</constant>, is one of the
+ following strings according to the direction of the control:
+ <quote>Playback</quote>, <quote>Capture</quote>, <quote>Bypass
+ Playback</quote> and <quote>Bypass Capture</quote>. Or, it can
+ be omitted, meaning both playback and capture directions.
+ </para>
+
+ <para>
+ The third, <constant>FUNCTION</constant>, is one of the
+ following strings according to the function of the control:
+ <quote>Switch</quote>, <quote>Volume</quote> and
+ <quote>Route</quote>.
+ </para>
+
+ <para>
+ The example of control names are, thus, <quote>Master Capture
+ Switch</quote> or <quote>PCM Playback Volume</quote>.
+ </para>
+
+ <para>
+ There are some exceptions:
+ </para>
+
+ <section id="control-interface-control-names-global">
+ <title>Global capture and playback</title>
+ <para>
+ <quote>Capture Source</quote>, <quote>Capture Switch</quote>
+ and <quote>Capture Volume</quote> are used for the global
+ capture (input) source, switch and volume. Similarly,
+ <quote>Playback Switch</quote> and <quote>Playback
+ Volume</quote> are used for the global output gain switch and
+ volume.
+ </para>
+ </section>
+
+ <section id="control-interface-control-names-tone">
+ <title>Tone-controls</title>
+ <para>
+ tone-control switch and volumes are specified like
+ <quote>Tone Control - XXX</quote>, e.g. <quote>Tone Control -
+ Switch</quote>, <quote>Tone Control - Bass</quote>,
+ <quote>Tone Control - Center</quote>.
+ </para>
+ </section>
+
+ <section id="control-interface-control-names-3d">
+ <title>3D controls</title>
+ <para>
+ 3D-control switches and volumes are specified like <quote>3D
+ Control - XXX</quote>, e.g. <quote>3D Control -
+ Switch</quote>, <quote>3D Control - Center</quote>, <quote>3D
+ Control - Space</quote>.
+ </para>
+ </section>
+
+ <section id="control-interface-control-names-mic">
+ <title>Mic boost</title>
+ <para>
+ Mic-boost switch is set as <quote>Mic Boost</quote> or
+ <quote>Mic Boost (6dB)</quote>.
+ </para>
+
+ <para>
+ More precise information can be found in
+ <filename>Documentation/sound/alsa/ControlNames.txt</filename>.
+ </para>
+ </section>
+ </section>
+
+ <section id="control-interface-access-flags">
+ <title>Access Flags</title>
+
+ <para>
+ The access flag is the bitmask which specifies the access type
+ of the given control. The default access type is
+ <constant>SNDRV_CTL_ELEM_ACCESS_READWRITE</constant>,
+ which means both read and write are allowed to this control.
+ When the access flag is omitted (i.e. = 0), it is
+ considered as <constant>READWRITE</constant> access as default.
+ </para>
+
+ <para>
+ When the control is read-only, pass
+ <constant>SNDRV_CTL_ELEM_ACCESS_READ</constant> instead.
+ In this case, you don't have to define
+ the <structfield>put</structfield> callback.
+ Similarly, when the control is write-only (although it's a rare
+ case), you can use the <constant>WRITE</constant> flag instead, and
+ you don't need the <structfield>get</structfield> callback.
+ </para>
+
+ <para>
+ If the control value changes frequently (e.g. the VU meter),
+ <constant>VOLATILE</constant> flag should be given. This means
+ that the control may be changed without
+ <link linkend="control-interface-change-notification"><citetitle>
+ notification</citetitle></link>. Applications should poll such
+ a control constantly.
+ </para>
+
+ <para>
+ When the control is inactive, set
+ the <constant>INACTIVE</constant> flag, too.
+ There are <constant>LOCK</constant> and
+ <constant>OWNER</constant> flags to change the write
+ permissions.
+ </para>
+
+ </section>
+
+ <section id="control-interface-callbacks">
+ <title>Callbacks</title>
+
+ <section id="control-interface-callbacks-info">
+ <title>info callback</title>
+ <para>
+ The <structfield>info</structfield> callback is used to get
+ detailed information on this control. This must store the
+ values of the given struct <structname>snd_ctl_elem_info</structname>
+ object. For example, for a boolean control with a single
+ element:
+
+ <example>
+ <title>Example of info callback</title>
+ <programlisting>
+<![CDATA[
+ static int snd_myctl_mono_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+ {
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_BOOLEAN;
+ uinfo->count = 1;
+ uinfo->value.integer.min = 0;
+ uinfo->value.integer.max = 1;
+ return 0;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+
+ <para>
+ The <structfield>type</structfield> field specifies the type
+ of the control. There are <constant>BOOLEAN</constant>,
+ <constant>INTEGER</constant>, <constant>ENUMERATED</constant>,
+ <constant>BYTES</constant>, <constant>IEC958</constant> and
+ <constant>INTEGER64</constant>. The
+ <structfield>count</structfield> field specifies the
+ number of elements in this control. For example, a stereo
+ volume would have count = 2. The
+ <structfield>value</structfield> field is a union, and
+ the values stored are depending on the type. The boolean and
+ integer types are identical.
+ </para>
+
+ <para>
+ The enumerated type is a bit different from others. You'll
+ need to set the string for the currently given item index.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_myctl_enum_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+ {
+ static char *texts[4] = {
+ "First", "Second", "Third", "Fourth"
+ };
+ uinfo->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED;
+ uinfo->count = 1;
+ uinfo->value.enumerated.items = 4;
+ if (uinfo->value.enumerated.item > 3)
+ uinfo->value.enumerated.item = 3;
+ strcpy(uinfo->value.enumerated.name,
+ texts[uinfo->value.enumerated.item]);
+ return 0;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The above callback can be simplified with a helper function,
+ <function>snd_ctl_enum_info</function>. The final code
+ looks like below.
+ (You can pass ARRAY_SIZE(texts) instead of 4 in the third
+ argument; it's a matter of taste.)
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_myctl_enum_info(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_info *uinfo)
+ {
+ static char *texts[4] = {
+ "First", "Second", "Third", "Fourth"
+ };
+ return snd_ctl_enum_info(uinfo, 1, 4, texts);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Some common info callbacks are available for your convenience:
+ <function>snd_ctl_boolean_mono_info()</function> and
+ <function>snd_ctl_boolean_stereo_info()</function>.
+ Obviously, the former is an info callback for a mono channel
+ boolean item, just like <function>snd_myctl_mono_info</function>
+ above, and the latter is for a stereo channel boolean item.
+ </para>
+
+ </section>
+
+ <section id="control-interface-callbacks-get">
+ <title>get callback</title>
+
+ <para>
+ This callback is used to read the current value of the
+ control and to return to user-space.
+ </para>
+
+ <para>
+ For example,
+
+ <example>
+ <title>Example of get callback</title>
+ <programlisting>
+<![CDATA[
+ static int snd_myctl_get(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct mychip *chip = snd_kcontrol_chip(kcontrol);
+ ucontrol->value.integer.value[0] = get_some_value(chip);
+ return 0;
+ }
+]]>
+ </programlisting>
+ </example>
+ </para>
+
+ <para>
+ The <structfield>value</structfield> field depends on
+ the type of control as well as on the info callback. For example,
+ the sb driver uses this field to store the register offset,
+ the bit-shift and the bit-mask. The
+ <structfield>private_value</structfield> field is set as follows:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ .private_value = reg | (shift << 16) | (mask << 24)
+]]>
+ </programlisting>
+ </informalexample>
+ and is retrieved in callbacks like
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_sbmixer_get_single(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ int reg = kcontrol->private_value & 0xff;
+ int shift = (kcontrol->private_value >> 16) & 0xff;
+ int mask = (kcontrol->private_value >> 24) & 0xff;
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ In the <structfield>get</structfield> callback,
+ you have to fill all the elements if the
+ control has more than one elements,
+ i.e. <structfield>count</structfield> &gt; 1.
+ In the example above, we filled only one element
+ (<structfield>value.integer.value[0]</structfield>) since it's
+ assumed as <structfield>count</structfield> = 1.
+ </para>
+ </section>
+
+ <section id="control-interface-callbacks-put">
+ <title>put callback</title>
+
+ <para>
+ This callback is used to write a value from user-space.
+ </para>
+
+ <para>
+ For example,
+
+ <example>
+ <title>Example of put callback</title>
+ <programlisting>
+<![CDATA[
+ static int snd_myctl_put(struct snd_kcontrol *kcontrol,
+ struct snd_ctl_elem_value *ucontrol)
+ {
+ struct mychip *chip = snd_kcontrol_chip(kcontrol);
+ int changed = 0;
+ if (chip->current_value !=
+ ucontrol->value.integer.value[0]) {
+ change_current_value(chip,
+ ucontrol->value.integer.value[0]);
+ changed = 1;
+ }
+ return changed;
+ }
+]]>
+ </programlisting>
+ </example>
+
+ As seen above, you have to return 1 if the value is
+ changed. If the value is not changed, return 0 instead.
+ If any fatal error happens, return a negative error code as
+ usual.
+ </para>
+
+ <para>
+ As in the <structfield>get</structfield> callback,
+ when the control has more than one elements,
+ all elements must be evaluated in this callback, too.
+ </para>
+ </section>
+
+ <section id="control-interface-callbacks-all">
+ <title>Callbacks are not atomic</title>
+ <para>
+ All these three callbacks are basically not atomic.
+ </para>
+ </section>
+ </section>
+
+ <section id="control-interface-constructor">
+ <title>Constructor</title>
+ <para>
+ When everything is ready, finally we can create a new
+ control. To create a control, there are two functions to be
+ called, <function>snd_ctl_new1()</function> and
+ <function>snd_ctl_add()</function>.
+ </para>
+
+ <para>
+ In the simplest way, you can do like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ err = snd_ctl_add(card, snd_ctl_new1(&my_control, chip));
+ if (err < 0)
+ return err;
+]]>
+ </programlisting>
+ </informalexample>
+
+ where <parameter>my_control</parameter> is the
+ struct <structname>snd_kcontrol_new</structname> object defined above, and chip
+ is the object pointer to be passed to
+ kcontrol-&gt;private_data
+ which can be referred to in callbacks.
+ </para>
+
+ <para>
+ <function>snd_ctl_new1()</function> allocates a new
+ <structname>snd_kcontrol</structname> instance,
+ and <function>snd_ctl_add</function> assigns the given
+ control component to the card.
+ </para>
+ </section>
+
+ <section id="control-interface-change-notification">
+ <title>Change Notification</title>
+ <para>
+ If you need to change and update a control in the interrupt
+ routine, you can call <function>snd_ctl_notify()</function>. For
+ example,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_ctl_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, id_pointer);
+]]>
+ </programlisting>
+ </informalexample>
+
+ This function takes the card pointer, the event-mask, and the
+ control id pointer for the notification. The event-mask
+ specifies the types of notification, for example, in the above
+ example, the change of control values is notified.
+ The id pointer is the pointer of struct <structname>snd_ctl_elem_id</structname>
+ to be notified.
+ You can find some examples in <filename>es1938.c</filename> or
+ <filename>es1968.c</filename> for hardware volume interrupts.
+ </para>
+ </section>
+
+ <section id="control-interface-tlv">
+ <title>Metadata</title>
+ <para>
+ To provide information about the dB values of a mixer control, use
+ on of the <constant>DECLARE_TLV_xxx</constant> macros from
+ <filename>&lt;sound/tlv.h&gt;</filename> to define a variable
+ containing this information, set the<structfield>tlv.p
+ </structfield> field to point to this variable, and include the
+ <constant>SNDRV_CTL_ELEM_ACCESS_TLV_READ</constant> flag in the
+ <structfield>access</structfield> field; like this:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static DECLARE_TLV_DB_SCALE(db_scale_my_control, -4050, 150, 0);
+
+ static struct snd_kcontrol_new my_control = {
+ ...
+ .access = SNDRV_CTL_ELEM_ACCESS_READWRITE |
+ SNDRV_CTL_ELEM_ACCESS_TLV_READ,
+ ...
+ .tlv.p = db_scale_my_control,
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The <function>DECLARE_TLV_DB_SCALE</function> macro defines
+ information about a mixer control where each step in the control's
+ value changes the dB value by a constant dB amount.
+ The first parameter is the name of the variable to be defined.
+ The second parameter is the minimum value, in units of 0.01 dB.
+ The third parameter is the step size, in units of 0.01 dB.
+ Set the fourth parameter to 1 if the minimum value actually mutes
+ the control.
+ </para>
+
+ <para>
+ The <function>DECLARE_TLV_DB_LINEAR</function> macro defines
+ information about a mixer control where the control's value affects
+ the output linearly.
+ The first parameter is the name of the variable to be defined.
+ The second parameter is the minimum value, in units of 0.01 dB.
+ The third parameter is the maximum value, in units of 0.01 dB.
+ If the minimum value mutes the control, set the second parameter to
+ <constant>TLV_DB_GAIN_MUTE</constant>.
+ </para>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- API for AC97 Codec -->
+<!-- ****************************************************** -->
+ <chapter id="api-ac97">
+ <title>API for AC97 Codec</title>
+
+ <section>
+ <title>General</title>
+ <para>
+ The ALSA AC97 codec layer is a well-defined one, and you don't
+ have to write much code to control it. Only low-level control
+ routines are necessary. The AC97 codec API is defined in
+ <filename>&lt;sound/ac97_codec.h&gt;</filename>.
+ </para>
+ </section>
+
+ <section id="api-ac97-example">
+ <title>Full Code Example</title>
+ <para>
+ <example>
+ <title>Example of AC97 Interface</title>
+ <programlisting>
+<![CDATA[
+ struct mychip {
+ ....
+ struct snd_ac97 *ac97;
+ ....
+ };
+
+ static unsigned short snd_mychip_ac97_read(struct snd_ac97 *ac97,
+ unsigned short reg)
+ {
+ struct mychip *chip = ac97->private_data;
+ ....
+ /* read a register value here from the codec */
+ return the_register_value;
+ }
+
+ static void snd_mychip_ac97_write(struct snd_ac97 *ac97,
+ unsigned short reg, unsigned short val)
+ {
+ struct mychip *chip = ac97->private_data;
+ ....
+ /* write the given register value to the codec */
+ }
+
+ static int snd_mychip_ac97(struct mychip *chip)
+ {
+ struct snd_ac97_bus *bus;
+ struct snd_ac97_template ac97;
+ int err;
+ static struct snd_ac97_bus_ops ops = {
+ .write = snd_mychip_ac97_write,
+ .read = snd_mychip_ac97_read,
+ };
+
+ err = snd_ac97_bus(chip->card, 0, &ops, NULL, &bus);
+ if (err < 0)
+ return err;
+ memset(&ac97, 0, sizeof(ac97));
+ ac97.private_data = chip;
+ return snd_ac97_mixer(bus, &ac97, &chip->ac97);
+ }
+
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </section>
+
+ <section id="api-ac97-constructor">
+ <title>Constructor</title>
+ <para>
+ To create an ac97 instance, first call <function>snd_ac97_bus</function>
+ with an <type>ac97_bus_ops_t</type> record with callback functions.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_ac97_bus *bus;
+ static struct snd_ac97_bus_ops ops = {
+ .write = snd_mychip_ac97_write,
+ .read = snd_mychip_ac97_read,
+ };
+
+ snd_ac97_bus(card, 0, &ops, NULL, &pbus);
+]]>
+ </programlisting>
+ </informalexample>
+
+ The bus record is shared among all belonging ac97 instances.
+ </para>
+
+ <para>
+ And then call <function>snd_ac97_mixer()</function> with an
+ struct <structname>snd_ac97_template</structname>
+ record together with the bus pointer created above.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_ac97_template ac97;
+ int err;
+
+ memset(&ac97, 0, sizeof(ac97));
+ ac97.private_data = chip;
+ snd_ac97_mixer(bus, &ac97, &chip->ac97);
+]]>
+ </programlisting>
+ </informalexample>
+
+ where chip-&gt;ac97 is a pointer to a newly created
+ <type>ac97_t</type> instance.
+ In this case, the chip pointer is set as the private data, so that
+ the read/write callback functions can refer to this chip instance.
+ This instance is not necessarily stored in the chip
+ record. If you need to change the register values from the
+ driver, or need the suspend/resume of ac97 codecs, keep this
+ pointer to pass to the corresponding functions.
+ </para>
+ </section>
+
+ <section id="api-ac97-callbacks">
+ <title>Callbacks</title>
+ <para>
+ The standard callbacks are <structfield>read</structfield> and
+ <structfield>write</structfield>. Obviously they
+ correspond to the functions for read and write accesses to the
+ hardware low-level codes.
+ </para>
+
+ <para>
+ The <structfield>read</structfield> callback returns the
+ register value specified in the argument.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static unsigned short snd_mychip_ac97_read(struct snd_ac97 *ac97,
+ unsigned short reg)
+ {
+ struct mychip *chip = ac97->private_data;
+ ....
+ return the_register_value;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ Here, the chip can be cast from ac97-&gt;private_data.
+ </para>
+
+ <para>
+ Meanwhile, the <structfield>write</structfield> callback is
+ used to set the register value.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void snd_mychip_ac97_write(struct snd_ac97 *ac97,
+ unsigned short reg, unsigned short val)
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ These callbacks are non-atomic like the control API callbacks.
+ </para>
+
+ <para>
+ There are also other callbacks:
+ <structfield>reset</structfield>,
+ <structfield>wait</structfield> and
+ <structfield>init</structfield>.
+ </para>
+
+ <para>
+ The <structfield>reset</structfield> callback is used to reset
+ the codec. If the chip requires a special kind of reset, you can
+ define this callback.
+ </para>
+
+ <para>
+ The <structfield>wait</structfield> callback is used to
+ add some waiting time in the standard initialization of the codec. If the
+ chip requires the extra waiting time, define this callback.
+ </para>
+
+ <para>
+ The <structfield>init</structfield> callback is used for
+ additional initialization of the codec.
+ </para>
+ </section>
+
+ <section id="api-ac97-updating-registers">
+ <title>Updating Registers in The Driver</title>
+ <para>
+ If you need to access to the codec from the driver, you can
+ call the following functions:
+ <function>snd_ac97_write()</function>,
+ <function>snd_ac97_read()</function>,
+ <function>snd_ac97_update()</function> and
+ <function>snd_ac97_update_bits()</function>.
+ </para>
+
+ <para>
+ Both <function>snd_ac97_write()</function> and
+ <function>snd_ac97_update()</function> functions are used to
+ set a value to the given register
+ (<constant>AC97_XXX</constant>). The difference between them is
+ that <function>snd_ac97_update()</function> doesn't write a
+ value if the given value has been already set, while
+ <function>snd_ac97_write()</function> always rewrites the
+ value.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_ac97_write(ac97, AC97_MASTER, 0x8080);
+ snd_ac97_update(ac97, AC97_MASTER, 0x8080);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ <function>snd_ac97_read()</function> is used to read the value
+ of the given register. For example,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ value = snd_ac97_read(ac97, AC97_MASTER);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ <function>snd_ac97_update_bits()</function> is used to update
+ some bits in the given register.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_ac97_update_bits(ac97, reg, mask, value);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Also, there is a function to change the sample rate (of a
+ given register such as
+ <constant>AC97_PCM_FRONT_DAC_RATE</constant>) when VRA or
+ DRA is supported by the codec:
+ <function>snd_ac97_set_rate()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_ac97_set_rate(ac97, AC97_PCM_FRONT_DAC_RATE, 44100);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The following registers are available to set the rate:
+ <constant>AC97_PCM_MIC_ADC_RATE</constant>,
+ <constant>AC97_PCM_FRONT_DAC_RATE</constant>,
+ <constant>AC97_PCM_LR_ADC_RATE</constant>,
+ <constant>AC97_SPDIF</constant>. When
+ <constant>AC97_SPDIF</constant> is specified, the register is
+ not really changed but the corresponding IEC958 status bits will
+ be updated.
+ </para>
+ </section>
+
+ <section id="api-ac97-clock-adjustment">
+ <title>Clock Adjustment</title>
+ <para>
+ In some chips, the clock of the codec isn't 48000 but using a
+ PCI clock (to save a quartz!). In this case, change the field
+ bus-&gt;clock to the corresponding
+ value. For example, intel8x0
+ and es1968 drivers have their own function to read from the clock.
+ </para>
+ </section>
+
+ <section id="api-ac97-proc-files">
+ <title>Proc Files</title>
+ <para>
+ The ALSA AC97 interface will create a proc file such as
+ <filename>/proc/asound/card0/codec97#0/ac97#0-0</filename> and
+ <filename>ac97#0-0+regs</filename>. You can refer to these files to
+ see the current status and registers of the codec.
+ </para>
+ </section>
+
+ <section id="api-ac97-multiple-codecs">
+ <title>Multiple Codecs</title>
+ <para>
+ When there are several codecs on the same card, you need to
+ call <function>snd_ac97_mixer()</function> multiple times with
+ ac97.num=1 or greater. The <structfield>num</structfield> field
+ specifies the codec number.
+ </para>
+
+ <para>
+ If you set up multiple codecs, you either need to write
+ different callbacks for each codec or check
+ ac97-&gt;num in the callback routines.
+ </para>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- MIDI (MPU401-UART) Interface -->
+<!-- ****************************************************** -->
+ <chapter id="midi-interface">
+ <title>MIDI (MPU401-UART) Interface</title>
+
+ <section id="midi-interface-general">
+ <title>General</title>
+ <para>
+ Many soundcards have built-in MIDI (MPU401-UART)
+ interfaces. When the soundcard supports the standard MPU401-UART
+ interface, most likely you can use the ALSA MPU401-UART API. The
+ MPU401-UART API is defined in
+ <filename>&lt;sound/mpu401.h&gt;</filename>.
+ </para>
+
+ <para>
+ Some soundchips have a similar but slightly different
+ implementation of mpu401 stuff. For example, emu10k1 has its own
+ mpu401 routines.
+ </para>
+ </section>
+
+ <section id="midi-interface-constructor">
+ <title>Constructor</title>
+ <para>
+ To create a rawmidi object, call
+ <function>snd_mpu401_uart_new()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_rawmidi *rmidi;
+ snd_mpu401_uart_new(card, 0, MPU401_HW_MPU401, port, info_flags,
+ irq, &rmidi);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The first argument is the card pointer, and the second is the
+ index of this component. You can create up to 8 rawmidi
+ devices.
+ </para>
+
+ <para>
+ The third argument is the type of the hardware,
+ <constant>MPU401_HW_XXX</constant>. If it's not a special one,
+ you can use <constant>MPU401_HW_MPU401</constant>.
+ </para>
+
+ <para>
+ The 4th argument is the I/O port address. Many
+ backward-compatible MPU401 have an I/O port such as 0x330. Or, it
+ might be a part of its own PCI I/O region. It depends on the
+ chip design.
+ </para>
+
+ <para>
+ The 5th argument is a bitflag for additional information.
+ When the I/O port address above is part of the PCI I/O
+ region, the MPU401 I/O port might have been already allocated
+ (reserved) by the driver itself. In such a case, pass a bit flag
+ <constant>MPU401_INFO_INTEGRATED</constant>,
+ and the mpu401-uart layer will allocate the I/O ports by itself.
+ </para>
+
+ <para>
+ When the controller supports only the input or output MIDI stream,
+ pass the <constant>MPU401_INFO_INPUT</constant> or
+ <constant>MPU401_INFO_OUTPUT</constant> bitflag, respectively.
+ Then the rawmidi instance is created as a single stream.
+ </para>
+
+ <para>
+ <constant>MPU401_INFO_MMIO</constant> bitflag is used to change
+ the access method to MMIO (via readb and writeb) instead of
+ iob and outb. In this case, you have to pass the iomapped address
+ to <function>snd_mpu401_uart_new()</function>.
+ </para>
+
+ <para>
+ When <constant>MPU401_INFO_TX_IRQ</constant> is set, the output
+ stream isn't checked in the default interrupt handler. The driver
+ needs to call <function>snd_mpu401_uart_interrupt_tx()</function>
+ by itself to start processing the output stream in the irq handler.
+ </para>
+
+ <para>
+ If the MPU-401 interface shares its interrupt with the other logical
+ devices on the card, set <constant>MPU401_INFO_IRQ_HOOK</constant>
+ (see <link linkend="midi-interface-interrupt-handler"><citetitle>
+ below</citetitle></link>).
+ </para>
+
+ <para>
+ Usually, the port address corresponds to the command port and
+ port + 1 corresponds to the data port. If not, you may change
+ the <structfield>cport</structfield> field of
+ struct <structname>snd_mpu401</structname> manually
+ afterward. However, <structname>snd_mpu401</structname> pointer is not
+ returned explicitly by
+ <function>snd_mpu401_uart_new()</function>. You need to cast
+ rmidi-&gt;private_data to
+ <structname>snd_mpu401</structname> explicitly,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_mpu401 *mpu;
+ mpu = rmidi->private_data;
+]]>
+ </programlisting>
+ </informalexample>
+
+ and reset the cport as you like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ mpu->cport = my_own_control_port;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The 6th argument specifies the ISA irq number that will be
+ allocated. If no interrupt is to be allocated (because your
+ code is already allocating a shared interrupt, or because the
+ device does not use interrupts), pass -1 instead.
+ For a MPU-401 device without an interrupt, a polling timer
+ will be used instead.
+ </para>
+ </section>
+
+ <section id="midi-interface-interrupt-handler">
+ <title>Interrupt Handler</title>
+ <para>
+ When the interrupt is allocated in
+ <function>snd_mpu401_uart_new()</function>, an exclusive ISA
+ interrupt handler is automatically used, hence you don't have
+ anything else to do than creating the mpu401 stuff. Otherwise, you
+ have to set <constant>MPU401_INFO_IRQ_HOOK</constant>, and call
+ <function>snd_mpu401_uart_interrupt()</function> explicitly from your
+ own interrupt handler when it has determined that a UART interrupt
+ has occurred.
+ </para>
+
+ <para>
+ In this case, you need to pass the private_data of the
+ returned rawmidi object from
+ <function>snd_mpu401_uart_new()</function> as the second
+ argument of <function>snd_mpu401_uart_interrupt()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_mpu401_uart_interrupt(irq, rmidi->private_data, regs);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- RawMIDI Interface -->
+<!-- ****************************************************** -->
+ <chapter id="rawmidi-interface">
+ <title>RawMIDI Interface</title>
+
+ <section id="rawmidi-interface-overview">
+ <title>Overview</title>
+
+ <para>
+ The raw MIDI interface is used for hardware MIDI ports that can
+ be accessed as a byte stream. It is not used for synthesizer
+ chips that do not directly understand MIDI.
+ </para>
+
+ <para>
+ ALSA handles file and buffer management. All you have to do is
+ to write some code to move data between the buffer and the
+ hardware.
+ </para>
+
+ <para>
+ The rawmidi API is defined in
+ <filename>&lt;sound/rawmidi.h&gt;</filename>.
+ </para>
+ </section>
+
+ <section id="rawmidi-interface-constructor">
+ <title>Constructor</title>
+
+ <para>
+ To create a rawmidi device, call the
+ <function>snd_rawmidi_new</function> function:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_rawmidi *rmidi;
+ err = snd_rawmidi_new(chip->card, "MyMIDI", 0, outs, ins, &rmidi);
+ if (err < 0)
+ return err;
+ rmidi->private_data = chip;
+ strcpy(rmidi->name, "My MIDI");
+ rmidi->info_flags = SNDRV_RAWMIDI_INFO_OUTPUT |
+ SNDRV_RAWMIDI_INFO_INPUT |
+ SNDRV_RAWMIDI_INFO_DUPLEX;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The first argument is the card pointer, the second argument is
+ the ID string.
+ </para>
+
+ <para>
+ The third argument is the index of this component. You can
+ create up to 8 rawmidi devices.
+ </para>
+
+ <para>
+ The fourth and fifth arguments are the number of output and
+ input substreams, respectively, of this device (a substream is
+ the equivalent of a MIDI port).
+ </para>
+
+ <para>
+ Set the <structfield>info_flags</structfield> field to specify
+ the capabilities of the device.
+ Set <constant>SNDRV_RAWMIDI_INFO_OUTPUT</constant> if there is
+ at least one output port,
+ <constant>SNDRV_RAWMIDI_INFO_INPUT</constant> if there is at
+ least one input port,
+ and <constant>SNDRV_RAWMIDI_INFO_DUPLEX</constant> if the device
+ can handle output and input at the same time.
+ </para>
+
+ <para>
+ After the rawmidi device is created, you need to set the
+ operators (callbacks) for each substream. There are helper
+ functions to set the operators for all the substreams of a device:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_OUTPUT, &snd_mymidi_output_ops);
+ snd_rawmidi_set_ops(rmidi, SNDRV_RAWMIDI_STREAM_INPUT, &snd_mymidi_input_ops);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The operators are usually defined like this:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct snd_rawmidi_ops snd_mymidi_output_ops = {
+ .open = snd_mymidi_output_open,
+ .close = snd_mymidi_output_close,
+ .trigger = snd_mymidi_output_trigger,
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ These callbacks are explained in the <link
+ linkend="rawmidi-interface-callbacks"><citetitle>Callbacks</citetitle></link>
+ section.
+ </para>
+
+ <para>
+ If there are more than one substream, you should give a
+ unique name to each of them:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_rawmidi_substream *substream;
+ list_for_each_entry(substream,
+ &rmidi->streams[SNDRV_RAWMIDI_STREAM_OUTPUT].substreams,
+ list {
+ sprintf(substream->name, "My MIDI Port %d", substream->number + 1);
+ }
+ /* same for SNDRV_RAWMIDI_STREAM_INPUT */
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ <section id="rawmidi-interface-callbacks">
+ <title>Callbacks</title>
+
+ <para>
+ In all the callbacks, the private data that you've set for the
+ rawmidi device can be accessed as
+ substream-&gt;rmidi-&gt;private_data.
+ <!-- <code> isn't available before DocBook 4.3 -->
+ </para>
+
+ <para>
+ If there is more than one port, your callbacks can determine the
+ port index from the struct snd_rawmidi_substream data passed to each
+ callback:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_rawmidi_substream *substream;
+ int index = substream->number;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <section id="rawmidi-interface-op-open">
+ <title><function>open</function> callback</title>
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_open(struct snd_rawmidi_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+
+ <para>
+ This is called when a substream is opened.
+ You can initialize the hardware here, but you shouldn't
+ start transmitting/receiving data yet.
+ </para>
+ </section>
+
+ <section id="rawmidi-interface-op-close">
+ <title><function>close</function> callback</title>
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_xxx_close(struct snd_rawmidi_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+
+ <para>
+ Guess what.
+ </para>
+
+ <para>
+ The <function>open</function> and <function>close</function>
+ callbacks of a rawmidi device are serialized with a mutex,
+ and can sleep.
+ </para>
+ </section>
+
+ <section id="rawmidi-interface-op-trigger-out">
+ <title><function>trigger</function> callback for output
+ substreams</title>
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void snd_xxx_output_trigger(struct snd_rawmidi_substream *substream, int up);
+]]>
+ </programlisting>
+ </informalexample>
+
+ <para>
+ This is called with a nonzero <parameter>up</parameter>
+ parameter when there is some data in the substream buffer that
+ must be transmitted.
+ </para>
+
+ <para>
+ To read data from the buffer, call
+ <function>snd_rawmidi_transmit_peek</function>. It will
+ return the number of bytes that have been read; this will be
+ less than the number of bytes requested when there are no more
+ data in the buffer.
+ After the data have been transmitted successfully, call
+ <function>snd_rawmidi_transmit_ack</function> to remove the
+ data from the substream buffer:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ unsigned char data;
+ while (snd_rawmidi_transmit_peek(substream, &data, 1) == 1) {
+ if (snd_mychip_try_to_transmit(data))
+ snd_rawmidi_transmit_ack(substream, 1);
+ else
+ break; /* hardware FIFO full */
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ If you know beforehand that the hardware will accept data, you
+ can use the <function>snd_rawmidi_transmit</function> function
+ which reads some data and removes them from the buffer at once:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ while (snd_mychip_transmit_possible()) {
+ unsigned char data;
+ if (snd_rawmidi_transmit(substream, &data, 1) != 1)
+ break; /* no more data */
+ snd_mychip_transmit(data);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ If you know beforehand how many bytes you can accept, you can
+ use a buffer size greater than one with the
+ <function>snd_rawmidi_transmit*</function> functions.
+ </para>
+
+ <para>
+ The <function>trigger</function> callback must not sleep. If
+ the hardware FIFO is full before the substream buffer has been
+ emptied, you have to continue transmitting data later, either
+ in an interrupt handler, or with a timer if the hardware
+ doesn't have a MIDI transmit interrupt.
+ </para>
+
+ <para>
+ The <function>trigger</function> callback is called with a
+ zero <parameter>up</parameter> parameter when the transmission
+ of data should be aborted.
+ </para>
+ </section>
+
+ <section id="rawmidi-interface-op-trigger-in">
+ <title><function>trigger</function> callback for input
+ substreams</title>
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void snd_xxx_input_trigger(struct snd_rawmidi_substream *substream, int up);
+]]>
+ </programlisting>
+ </informalexample>
+
+ <para>
+ This is called with a nonzero <parameter>up</parameter>
+ parameter to enable receiving data, or with a zero
+ <parameter>up</parameter> parameter do disable receiving data.
+ </para>
+
+ <para>
+ The <function>trigger</function> callback must not sleep; the
+ actual reading of data from the device is usually done in an
+ interrupt handler.
+ </para>
+
+ <para>
+ When data reception is enabled, your interrupt handler should
+ call <function>snd_rawmidi_receive</function> for all received
+ data:
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ void snd_mychip_midi_interrupt(...)
+ {
+ while (mychip_midi_available()) {
+ unsigned char data;
+ data = mychip_midi_read();
+ snd_rawmidi_receive(substream, &data, 1);
+ }
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ <section id="rawmidi-interface-op-drain">
+ <title><function>drain</function> callback</title>
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void snd_xxx_drain(struct snd_rawmidi_substream *substream);
+]]>
+ </programlisting>
+ </informalexample>
+
+ <para>
+ This is only used with output substreams. This function should wait
+ until all data read from the substream buffer have been transmitted.
+ This ensures that the device can be closed and the driver unloaded
+ without losing data.
+ </para>
+
+ <para>
+ This callback is optional. If you do not set
+ <structfield>drain</structfield> in the struct snd_rawmidi_ops
+ structure, ALSA will simply wait for 50&nbsp;milliseconds
+ instead.
+ </para>
+ </section>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Miscellaneous Devices -->
+<!-- ****************************************************** -->
+ <chapter id="misc-devices">
+ <title>Miscellaneous Devices</title>
+
+ <section id="misc-devices-opl3">
+ <title>FM OPL3</title>
+ <para>
+ The FM OPL3 is still used in many chips (mainly for backward
+ compatibility). ALSA has a nice OPL3 FM control layer, too. The
+ OPL3 API is defined in
+ <filename>&lt;sound/opl3.h&gt;</filename>.
+ </para>
+
+ <para>
+ FM registers can be directly accessed through the direct-FM API,
+ defined in <filename>&lt;sound/asound_fm.h&gt;</filename>. In
+ ALSA native mode, FM registers are accessed through
+ the Hardware-Dependent Device direct-FM extension API, whereas in
+ OSS compatible mode, FM registers can be accessed with the OSS
+ direct-FM compatible API in <filename>/dev/dmfmX</filename> device.
+ </para>
+
+ <para>
+ To create the OPL3 component, you have two functions to
+ call. The first one is a constructor for the <type>opl3_t</type>
+ instance.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_opl3 *opl3;
+ snd_opl3_create(card, lport, rport, OPL3_HW_OPL3_XXX,
+ integrated, &opl3);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The first argument is the card pointer, the second one is the
+ left port address, and the third is the right port address. In
+ most cases, the right port is placed at the left port + 2.
+ </para>
+
+ <para>
+ The fourth argument is the hardware type.
+ </para>
+
+ <para>
+ When the left and right ports have been already allocated by
+ the card driver, pass non-zero to the fifth argument
+ (<parameter>integrated</parameter>). Otherwise, the opl3 module will
+ allocate the specified ports by itself.
+ </para>
+
+ <para>
+ When the accessing the hardware requires special method
+ instead of the standard I/O access, you can create opl3 instance
+ separately with <function>snd_opl3_new()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_opl3 *opl3;
+ snd_opl3_new(card, OPL3_HW_OPL3_XXX, &opl3);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Then set <structfield>command</structfield>,
+ <structfield>private_data</structfield> and
+ <structfield>private_free</structfield> for the private
+ access function, the private data and the destructor.
+ The l_port and r_port are not necessarily set. Only the
+ command must be set properly. You can retrieve the data
+ from the opl3-&gt;private_data field.
+ </para>
+
+ <para>
+ After creating the opl3 instance via <function>snd_opl3_new()</function>,
+ call <function>snd_opl3_init()</function> to initialize the chip to the
+ proper state. Note that <function>snd_opl3_create()</function> always
+ calls it internally.
+ </para>
+
+ <para>
+ If the opl3 instance is created successfully, then create a
+ hwdep device for this opl3.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_hwdep *opl3hwdep;
+ snd_opl3_hwdep_new(opl3, 0, 1, &opl3hwdep);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The first argument is the <type>opl3_t</type> instance you
+ created, and the second is the index number, usually 0.
+ </para>
+
+ <para>
+ The third argument is the index-offset for the sequencer
+ client assigned to the OPL3 port. When there is an MPU401-UART,
+ give 1 for here (UART always takes 0).
+ </para>
+ </section>
+
+ <section id="misc-devices-hardware-dependent">
+ <title>Hardware-Dependent Devices</title>
+ <para>
+ Some chips need user-space access for special
+ controls or for loading the micro code. In such a case, you can
+ create a hwdep (hardware-dependent) device. The hwdep API is
+ defined in <filename>&lt;sound/hwdep.h&gt;</filename>. You can
+ find examples in opl3 driver or
+ <filename>isa/sb/sb16_csp.c</filename>.
+ </para>
+
+ <para>
+ The creation of the <type>hwdep</type> instance is done via
+ <function>snd_hwdep_new()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_hwdep *hw;
+ snd_hwdep_new(card, "My HWDEP", 0, &hw);
+]]>
+ </programlisting>
+ </informalexample>
+
+ where the third argument is the index number.
+ </para>
+
+ <para>
+ You can then pass any pointer value to the
+ <parameter>private_data</parameter>.
+ If you assign a private data, you should define the
+ destructor, too. The destructor function is set in
+ the <structfield>private_free</structfield> field.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct mydata *p = kmalloc(sizeof(*p), GFP_KERNEL);
+ hw->private_data = p;
+ hw->private_free = mydata_free;
+]]>
+ </programlisting>
+ </informalexample>
+
+ and the implementation of the destructor would be:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void mydata_free(struct snd_hwdep *hw)
+ {
+ struct mydata *p = hw->private_data;
+ kfree(p);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The arbitrary file operations can be defined for this
+ instance. The file operators are defined in
+ the <parameter>ops</parameter> table. For example, assume that
+ this chip needs an ioctl.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ hw->ops.open = mydata_open;
+ hw->ops.ioctl = mydata_ioctl;
+ hw->ops.release = mydata_release;
+]]>
+ </programlisting>
+ </informalexample>
+
+ And implement the callback functions as you like.
+ </para>
+ </section>
+
+ <section id="misc-devices-IEC958">
+ <title>IEC958 (S/PDIF)</title>
+ <para>
+ Usually the controls for IEC958 devices are implemented via
+ the control interface. There is a macro to compose a name string for
+ IEC958 controls, <function>SNDRV_CTL_NAME_IEC958()</function>
+ defined in <filename>&lt;include/asound.h&gt;</filename>.
+ </para>
+
+ <para>
+ There are some standard controls for IEC958 status bits. These
+ controls use the type <type>SNDRV_CTL_ELEM_TYPE_IEC958</type>,
+ and the size of element is fixed as 4 bytes array
+ (value.iec958.status[x]). For the <structfield>info</structfield>
+ callback, you don't specify
+ the value field for this type (the count field must be set,
+ though).
+ </para>
+
+ <para>
+ <quote>IEC958 Playback Con Mask</quote> is used to return the
+ bit-mask for the IEC958 status bits of consumer mode. Similarly,
+ <quote>IEC958 Playback Pro Mask</quote> returns the bitmask for
+ professional mode. They are read-only controls, and are defined
+ as MIXER controls (iface =
+ <constant>SNDRV_CTL_ELEM_IFACE_MIXER</constant>).
+ </para>
+
+ <para>
+ Meanwhile, <quote>IEC958 Playback Default</quote> control is
+ defined for getting and setting the current default IEC958
+ bits. Note that this one is usually defined as a PCM control
+ (iface = <constant>SNDRV_CTL_ELEM_IFACE_PCM</constant>),
+ although in some places it's defined as a MIXER control.
+ </para>
+
+ <para>
+ In addition, you can define the control switches to
+ enable/disable or to set the raw bit mode. The implementation
+ will depend on the chip, but the control should be named as
+ <quote>IEC958 xxx</quote>, preferably using
+ the <function>SNDRV_CTL_NAME_IEC958()</function> macro.
+ </para>
+
+ <para>
+ You can find several cases, for example,
+ <filename>pci/emu10k1</filename>,
+ <filename>pci/ice1712</filename>, or
+ <filename>pci/cmipci.c</filename>.
+ </para>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Buffer and Memory Management -->
+<!-- ****************************************************** -->
+ <chapter id="buffer-and-memory">
+ <title>Buffer and Memory Management</title>
+
+ <section id="buffer-and-memory-buffer-types">
+ <title>Buffer Types</title>
+ <para>
+ ALSA provides several different buffer allocation functions
+ depending on the bus and the architecture. All these have a
+ consistent API. The allocation of physically-contiguous pages is
+ done via
+ <function>snd_malloc_xxx_pages()</function> function, where xxx
+ is the bus type.
+ </para>
+
+ <para>
+ The allocation of pages with fallback is
+ <function>snd_malloc_xxx_pages_fallback()</function>. This
+ function tries to allocate the specified pages but if the pages
+ are not available, it tries to reduce the page sizes until
+ enough space is found.
+ </para>
+
+ <para>
+ The release the pages, call
+ <function>snd_free_xxx_pages()</function> function.
+ </para>
+
+ <para>
+ Usually, ALSA drivers try to allocate and reserve
+ a large contiguous physical space
+ at the time the module is loaded for the later use.
+ This is called <quote>pre-allocation</quote>.
+ As already written, you can call the following function at
+ pcm instance construction time (in the case of PCI bus).
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+ snd_dma_pci_data(pci), size, max);
+]]>
+ </programlisting>
+ </informalexample>
+
+ where <parameter>size</parameter> is the byte size to be
+ pre-allocated and the <parameter>max</parameter> is the maximum
+ size to be changed via the <filename>prealloc</filename> proc file.
+ The allocator will try to get an area as large as possible
+ within the given size.
+ </para>
+
+ <para>
+ The second argument (type) and the third argument (device pointer)
+ are dependent on the bus.
+ In the case of the ISA bus, pass <function>snd_dma_isa_data()</function>
+ as the third argument with <constant>SNDRV_DMA_TYPE_DEV</constant> type.
+ For the continuous buffer unrelated to the bus can be pre-allocated
+ with <constant>SNDRV_DMA_TYPE_CONTINUOUS</constant> type and the
+ <function>snd_dma_continuous_data(GFP_KERNEL)</function> device pointer,
+ where <constant>GFP_KERNEL</constant> is the kernel allocation flag to
+ use.
+ For the PCI scatter-gather buffers, use
+ <constant>SNDRV_DMA_TYPE_DEV_SG</constant> with
+ <function>snd_dma_pci_data(pci)</function>
+ (see the
+ <link linkend="buffer-and-memory-non-contiguous"><citetitle>Non-Contiguous Buffers
+ </citetitle></link> section).
+ </para>
+
+ <para>
+ Once the buffer is pre-allocated, you can use the
+ allocator in the <structfield>hw_params</structfield> callback:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_pcm_lib_malloc_pages(substream, size);
+]]>
+ </programlisting>
+ </informalexample>
+
+ Note that you have to pre-allocate to use this function.
+ </para>
+ </section>
+
+ <section id="buffer-and-memory-external-hardware">
+ <title>External Hardware Buffers</title>
+ <para>
+ Some chips have their own hardware buffers and the DMA
+ transfer from the host memory is not available. In such a case,
+ you need to either 1) copy/set the audio data directly to the
+ external hardware buffer, or 2) make an intermediate buffer and
+ copy/set the data from it to the external hardware buffer in
+ interrupts (or in tasklets, preferably).
+ </para>
+
+ <para>
+ The first case works fine if the external hardware buffer is large
+ enough. This method doesn't need any extra buffers and thus is
+ more effective. You need to define the
+ <structfield>copy</structfield> and
+ <structfield>silence</structfield> callbacks for
+ the data transfer. However, there is a drawback: it cannot
+ be mmapped. The examples are GUS's GF1 PCM or emu8000's
+ wavetable PCM.
+ </para>
+
+ <para>
+ The second case allows for mmap on the buffer, although you have
+ to handle an interrupt or a tasklet to transfer the data
+ from the intermediate buffer to the hardware buffer. You can find an
+ example in the vxpocket driver.
+ </para>
+
+ <para>
+ Another case is when the chip uses a PCI memory-map
+ region for the buffer instead of the host memory. In this case,
+ mmap is available only on certain architectures like the Intel one.
+ In non-mmap mode, the data cannot be transferred as in the normal
+ way. Thus you need to define the <structfield>copy</structfield> and
+ <structfield>silence</structfield> callbacks as well,
+ as in the cases above. The examples are found in
+ <filename>rme32.c</filename> and <filename>rme96.c</filename>.
+ </para>
+
+ <para>
+ The implementation of the <structfield>copy</structfield> and
+ <structfield>silence</structfield> callbacks depends upon
+ whether the hardware supports interleaved or non-interleaved
+ samples. The <structfield>copy</structfield> callback is
+ defined like below, a bit
+ differently depending whether the direction is playback or
+ capture:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int playback_copy(struct snd_pcm_substream *substream, int channel,
+ snd_pcm_uframes_t pos, void *src, snd_pcm_uframes_t count);
+ static int capture_copy(struct snd_pcm_substream *substream, int channel,
+ snd_pcm_uframes_t pos, void *dst, snd_pcm_uframes_t count);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ In the case of interleaved samples, the second argument
+ (<parameter>channel</parameter>) is not used. The third argument
+ (<parameter>pos</parameter>) points the
+ current position offset in frames.
+ </para>
+
+ <para>
+ The meaning of the fourth argument is different between
+ playback and capture. For playback, it holds the source data
+ pointer, and for capture, it's the destination data pointer.
+ </para>
+
+ <para>
+ The last argument is the number of frames to be copied.
+ </para>
+
+ <para>
+ What you have to do in this callback is again different
+ between playback and capture directions. In the
+ playback case, you copy the given amount of data
+ (<parameter>count</parameter>) at the specified pointer
+ (<parameter>src</parameter>) to the specified offset
+ (<parameter>pos</parameter>) on the hardware buffer. When
+ coded like memcpy-like way, the copy would be like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ my_memcpy(my_buffer + frames_to_bytes(runtime, pos), src,
+ frames_to_bytes(runtime, count));
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ For the capture direction, you copy the given amount of
+ data (<parameter>count</parameter>) at the specified offset
+ (<parameter>pos</parameter>) on the hardware buffer to the
+ specified pointer (<parameter>dst</parameter>).
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ my_memcpy(dst, my_buffer + frames_to_bytes(runtime, pos),
+ frames_to_bytes(runtime, count));
+]]>
+ </programlisting>
+ </informalexample>
+
+ Note that both the position and the amount of data are given
+ in frames.
+ </para>
+
+ <para>
+ In the case of non-interleaved samples, the implementation
+ will be a bit more complicated.
+ </para>
+
+ <para>
+ You need to check the channel argument, and if it's -1, copy
+ the whole channels. Otherwise, you have to copy only the
+ specified channel. Please check
+ <filename>isa/gus/gus_pcm.c</filename> as an example.
+ </para>
+
+ <para>
+ The <structfield>silence</structfield> callback is also
+ implemented in a similar way.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int silence(struct snd_pcm_substream *substream, int channel,
+ snd_pcm_uframes_t pos, snd_pcm_uframes_t count);
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The meanings of arguments are the same as in the
+ <structfield>copy</structfield>
+ callback, although there is no <parameter>src/dst</parameter>
+ argument. In the case of interleaved samples, the channel
+ argument has no meaning, as well as on
+ <structfield>copy</structfield> callback.
+ </para>
+
+ <para>
+ The role of <structfield>silence</structfield> callback is to
+ set the given amount
+ (<parameter>count</parameter>) of silence data at the
+ specified offset (<parameter>pos</parameter>) on the hardware
+ buffer. Suppose that the data format is signed (that is, the
+ silent-data is 0), and the implementation using a memset-like
+ function would be like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ my_memcpy(my_buffer + frames_to_bytes(runtime, pos), 0,
+ frames_to_bytes(runtime, count));
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ In the case of non-interleaved samples, again, the
+ implementation becomes a bit more complicated. See, for example,
+ <filename>isa/gus/gus_pcm.c</filename>.
+ </para>
+ </section>
+
+ <section id="buffer-and-memory-non-contiguous">
+ <title>Non-Contiguous Buffers</title>
+ <para>
+ If your hardware supports the page table as in emu10k1 or the
+ buffer descriptors as in via82xx, you can use the scatter-gather
+ (SG) DMA. ALSA provides an interface for handling SG-buffers.
+ The API is provided in <filename>&lt;sound/pcm.h&gt;</filename>.
+ </para>
+
+ <para>
+ For creating the SG-buffer handler, call
+ <function>snd_pcm_lib_preallocate_pages()</function> or
+ <function>snd_pcm_lib_preallocate_pages_for_all()</function>
+ with <constant>SNDRV_DMA_TYPE_DEV_SG</constant>
+ in the PCM constructor like other PCI pre-allocator.
+ You need to pass <function>snd_dma_pci_data(pci)</function>,
+ where pci is the struct <structname>pci_dev</structname> pointer
+ of the chip as well.
+ The <type>struct snd_sg_buf</type> instance is created as
+ substream-&gt;dma_private. You can cast
+ the pointer like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_sg_buf *sgbuf = (struct snd_sg_buf *)substream->dma_private;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Then call <function>snd_pcm_lib_malloc_pages()</function>
+ in the <structfield>hw_params</structfield> callback
+ as well as in the case of normal PCI buffer.
+ The SG-buffer handler will allocate the non-contiguous kernel
+ pages of the given size and map them onto the virtually contiguous
+ memory. The virtual pointer is addressed in runtime-&gt;dma_area.
+ The physical address (runtime-&gt;dma_addr) is set to zero,
+ because the buffer is physically non-contiguous.
+ The physical address table is set up in sgbuf-&gt;table.
+ You can get the physical address at a certain offset via
+ <function>snd_pcm_sgbuf_get_addr()</function>.
+ </para>
+
+ <para>
+ When a SG-handler is used, you need to set
+ <function>snd_pcm_sgbuf_ops_page</function> as
+ the <structfield>page</structfield> callback.
+ (See <link linkend="pcm-interface-operators-page-callback">
+ <citetitle>page callback section</citetitle></link>.)
+ </para>
+
+ <para>
+ To release the data, call
+ <function>snd_pcm_lib_free_pages()</function> in the
+ <structfield>hw_free</structfield> callback as usual.
+ </para>
+ </section>
+
+ <section id="buffer-and-memory-vmalloced">
+ <title>Vmalloc'ed Buffers</title>
+ <para>
+ It's possible to use a buffer allocated via
+ <function>vmalloc</function>, for example, for an intermediate
+ buffer. Since the allocated pages are not contiguous, you need
+ to set the <structfield>page</structfield> callback to obtain
+ the physical address at every offset.
+ </para>
+
+ <para>
+ The implementation of <structfield>page</structfield> callback
+ would be like this:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ #include <linux/vmalloc.h>
+
+ /* get the physical page pointer on the given offset */
+ static struct page *mychip_page(struct snd_pcm_substream *substream,
+ unsigned long offset)
+ {
+ void *pageptr = substream->runtime->dma_area + offset;
+ return vmalloc_to_page(pageptr);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Proc Interface -->
+<!-- ****************************************************** -->
+ <chapter id="proc-interface">
+ <title>Proc Interface</title>
+ <para>
+ ALSA provides an easy interface for procfs. The proc files are
+ very useful for debugging. I recommend you set up proc files if
+ you write a driver and want to get a running status or register
+ dumps. The API is found in
+ <filename>&lt;sound/info.h&gt;</filename>.
+ </para>
+
+ <para>
+ To create a proc file, call
+ <function>snd_card_proc_new()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ struct snd_info_entry *entry;
+ int err = snd_card_proc_new(card, "my-file", &entry);
+]]>
+ </programlisting>
+ </informalexample>
+
+ where the second argument specifies the name of the proc file to be
+ created. The above example will create a file
+ <filename>my-file</filename> under the card directory,
+ e.g. <filename>/proc/asound/card0/my-file</filename>.
+ </para>
+
+ <para>
+ Like other components, the proc entry created via
+ <function>snd_card_proc_new()</function> will be registered and
+ released automatically in the card registration and release
+ functions.
+ </para>
+
+ <para>
+ When the creation is successful, the function stores a new
+ instance in the pointer given in the third argument.
+ It is initialized as a text proc file for read only. To use
+ this proc file as a read-only text file as it is, set the read
+ callback with a private data via
+ <function>snd_info_set_text_ops()</function>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_info_set_text_ops(entry, chip, my_proc_read);
+]]>
+ </programlisting>
+ </informalexample>
+
+ where the second argument (<parameter>chip</parameter>) is the
+ private data to be used in the callbacks. The third parameter
+ specifies the read buffer size and the fourth
+ (<parameter>my_proc_read</parameter>) is the callback function, which
+ is defined like
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void my_proc_read(struct snd_info_entry *entry,
+ struct snd_info_buffer *buffer);
+]]>
+ </programlisting>
+ </informalexample>
+
+ </para>
+
+ <para>
+ In the read callback, use <function>snd_iprintf()</function> for
+ output strings, which works just like normal
+ <function>printf()</function>. For example,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static void my_proc_read(struct snd_info_entry *entry,
+ struct snd_info_buffer *buffer)
+ {
+ struct my_chip *chip = entry->private_data;
+
+ snd_iprintf(buffer, "This is my chip!\n");
+ snd_iprintf(buffer, "Port = %ld\n", chip->port);
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The file permissions can be changed afterwards. As default, it's
+ set as read only for all users. If you want to add write
+ permission for the user (root as default), do as follows:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ entry->mode = S_IFREG | S_IRUGO | S_IWUSR;
+]]>
+ </programlisting>
+ </informalexample>
+
+ and set the write buffer size and the callback
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ entry->c.text.write = my_proc_write;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ For the write callback, you can use
+ <function>snd_info_get_line()</function> to get a text line, and
+ <function>snd_info_get_str()</function> to retrieve a string from
+ the line. Some examples are found in
+ <filename>core/oss/mixer_oss.c</filename>, core/oss/and
+ <filename>pcm_oss.c</filename>.
+ </para>
+
+ <para>
+ For a raw-data proc-file, set the attributes as follows:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct snd_info_entry_ops my_file_io_ops = {
+ .read = my_file_io_read,
+ };
+
+ entry->content = SNDRV_INFO_CONTENT_DATA;
+ entry->private_data = chip;
+ entry->c.ops = &my_file_io_ops;
+ entry->size = 4096;
+ entry->mode = S_IFREG | S_IRUGO;
+]]>
+ </programlisting>
+ </informalexample>
+
+ For the raw data, <structfield>size</structfield> field must be
+ set properly. This specifies the maximum size of the proc file access.
+ </para>
+
+ <para>
+ The read/write callbacks of raw mode are more direct than the text mode.
+ You need to use a low-level I/O functions such as
+ <function>copy_from/to_user()</function> to transfer the
+ data.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static ssize_t my_file_io_read(struct snd_info_entry *entry,
+ void *file_private_data,
+ struct file *file,
+ char *buf,
+ size_t count,
+ loff_t pos)
+ {
+ if (copy_to_user(buf, local_data + pos, count))
+ return -EFAULT;
+ return count;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ If the size of the info entry has been set up properly,
+ <structfield>count</structfield> and <structfield>pos</structfield> are
+ guaranteed to fit within 0 and the given size.
+ You don't have to check the range in the callbacks unless any
+ other condition is required.
+
+ </para>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Power Management -->
+<!-- ****************************************************** -->
+ <chapter id="power-management">
+ <title>Power Management</title>
+ <para>
+ If the chip is supposed to work with suspend/resume
+ functions, you need to add power-management code to the
+ driver. The additional code for power-management should be
+ <function>ifdef</function>'ed with
+ <constant>CONFIG_PM</constant>.
+ </para>
+
+ <para>
+ If the driver <emphasis>fully</emphasis> supports suspend/resume
+ that is, the device can be
+ properly resumed to its state when suspend was called,
+ you can set the <constant>SNDRV_PCM_INFO_RESUME</constant> flag
+ in the pcm info field. Usually, this is possible when the
+ registers of the chip can be safely saved and restored to
+ RAM. If this is set, the trigger callback is called with
+ <constant>SNDRV_PCM_TRIGGER_RESUME</constant> after the resume
+ callback completes.
+ </para>
+
+ <para>
+ Even if the driver doesn't support PM fully but
+ partial suspend/resume is still possible, it's still worthy to
+ implement suspend/resume callbacks. In such a case, applications
+ would reset the status by calling
+ <function>snd_pcm_prepare()</function> and restart the stream
+ appropriately. Hence, you can define suspend/resume callbacks
+ below but don't set <constant>SNDRV_PCM_INFO_RESUME</constant>
+ info flag to the PCM.
+ </para>
+
+ <para>
+ Note that the trigger with SUSPEND can always be called when
+ <function>snd_pcm_suspend_all</function> is called,
+ regardless of the <constant>SNDRV_PCM_INFO_RESUME</constant> flag.
+ The <constant>RESUME</constant> flag affects only the behavior
+ of <function>snd_pcm_resume()</function>.
+ (Thus, in theory,
+ <constant>SNDRV_PCM_TRIGGER_RESUME</constant> isn't needed
+ to be handled in the trigger callback when no
+ <constant>SNDRV_PCM_INFO_RESUME</constant> flag is set. But,
+ it's better to keep it for compatibility reasons.)
+ </para>
+ <para>
+ In the earlier version of ALSA drivers, a common
+ power-management layer was provided, but it has been removed.
+ The driver needs to define the suspend/resume hooks according to
+ the bus the device is connected to. In the case of PCI drivers, the
+ callbacks look like below:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ #ifdef CONFIG_PM
+ static int snd_my_suspend(struct pci_dev *pci, pm_message_t state)
+ {
+ .... /* do things for suspend */
+ return 0;
+ }
+ static int snd_my_resume(struct pci_dev *pci)
+ {
+ .... /* do things for suspend */
+ return 0;
+ }
+ #endif
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The scheme of the real suspend job is as follows.
+
+ <orderedlist>
+ <listitem><para>Retrieve the card and the chip data.</para></listitem>
+ <listitem><para>Call <function>snd_power_change_state()</function> with
+ <constant>SNDRV_CTL_POWER_D3hot</constant> to change the
+ power status.</para></listitem>
+ <listitem><para>Call <function>snd_pcm_suspend_all()</function> to suspend the running PCM streams.</para></listitem>
+ <listitem><para>If AC97 codecs are used, call
+ <function>snd_ac97_suspend()</function> for each codec.</para></listitem>
+ <listitem><para>Save the register values if necessary.</para></listitem>
+ <listitem><para>Stop the hardware if necessary.</para></listitem>
+ <listitem><para>Disable the PCI device by calling
+ <function>pci_disable_device()</function>. Then, call
+ <function>pci_save_state()</function> at last.</para></listitem>
+ </orderedlist>
+ </para>
+
+ <para>
+ A typical code would be like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int mychip_suspend(struct pci_dev *pci, pm_message_t state)
+ {
+ /* (1) */
+ struct snd_card *card = pci_get_drvdata(pci);
+ struct mychip *chip = card->private_data;
+ /* (2) */
+ snd_power_change_state(card, SNDRV_CTL_POWER_D3hot);
+ /* (3) */
+ snd_pcm_suspend_all(chip->pcm);
+ /* (4) */
+ snd_ac97_suspend(chip->ac97);
+ /* (5) */
+ snd_mychip_save_registers(chip);
+ /* (6) */
+ snd_mychip_stop_hardware(chip);
+ /* (7) */
+ pci_disable_device(pci);
+ pci_save_state(pci);
+ return 0;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ The scheme of the real resume job is as follows.
+
+ <orderedlist>
+ <listitem><para>Retrieve the card and the chip data.</para></listitem>
+ <listitem><para>Set up PCI. First, call <function>pci_restore_state()</function>.
+ Then enable the pci device again by calling <function>pci_enable_device()</function>.
+ Call <function>pci_set_master()</function> if necessary, too.</para></listitem>
+ <listitem><para>Re-initialize the chip.</para></listitem>
+ <listitem><para>Restore the saved registers if necessary.</para></listitem>
+ <listitem><para>Resume the mixer, e.g. calling
+ <function>snd_ac97_resume()</function>.</para></listitem>
+ <listitem><para>Restart the hardware (if any).</para></listitem>
+ <listitem><para>Call <function>snd_power_change_state()</function> with
+ <constant>SNDRV_CTL_POWER_D0</constant> to notify the processes.</para></listitem>
+ </orderedlist>
+ </para>
+
+ <para>
+ A typical code would be like:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int mychip_resume(struct pci_dev *pci)
+ {
+ /* (1) */
+ struct snd_card *card = pci_get_drvdata(pci);
+ struct mychip *chip = card->private_data;
+ /* (2) */
+ pci_restore_state(pci);
+ pci_enable_device(pci);
+ pci_set_master(pci);
+ /* (3) */
+ snd_mychip_reinit_chip(chip);
+ /* (4) */
+ snd_mychip_restore_registers(chip);
+ /* (5) */
+ snd_ac97_resume(chip->ac97);
+ /* (6) */
+ snd_mychip_restart_chip(chip);
+ /* (7) */
+ snd_power_change_state(card, SNDRV_CTL_POWER_D0);
+ return 0;
+ }
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ As shown in the above, it's better to save registers after
+ suspending the PCM operations via
+ <function>snd_pcm_suspend_all()</function> or
+ <function>snd_pcm_suspend()</function>. It means that the PCM
+ streams are already stopped when the register snapshot is
+ taken. But, remember that you don't have to restart the PCM
+ stream in the resume callback. It'll be restarted via
+ trigger call with <constant>SNDRV_PCM_TRIGGER_RESUME</constant>
+ when necessary.
+ </para>
+
+ <para>
+ OK, we have all callbacks now. Let's set them up. In the
+ initialization of the card, make sure that you can get the chip
+ data from the card instance, typically via
+ <structfield>private_data</structfield> field, in case you
+ created the chip data individually.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_mychip_probe(struct pci_dev *pci,
+ const struct pci_device_id *pci_id)
+ {
+ ....
+ struct snd_card *card;
+ struct mychip *chip;
+ int err;
+ ....
+ err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ 0, &card);
+ ....
+ chip = kzalloc(sizeof(*chip), GFP_KERNEL);
+ ....
+ card->private_data = chip;
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ When you created the chip data with
+ <function>snd_card_new()</function>, it's anyway accessible
+ via <structfield>private_data</structfield> field.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int snd_mychip_probe(struct pci_dev *pci,
+ const struct pci_device_id *pci_id)
+ {
+ ....
+ struct snd_card *card;
+ struct mychip *chip;
+ int err;
+ ....
+ err = snd_card_new(&pci->dev, index[dev], id[dev], THIS_MODULE,
+ sizeof(struct mychip), &card);
+ ....
+ chip = card->private_data;
+ ....
+ }
+]]>
+ </programlisting>
+ </informalexample>
+
+ </para>
+
+ <para>
+ If you need a space to save the registers, allocate the
+ buffer for it here, too, since it would be fatal
+ if you cannot allocate a memory in the suspend phase.
+ The allocated buffer should be released in the corresponding
+ destructor.
+ </para>
+
+ <para>
+ And next, set suspend/resume callbacks to the pci_driver.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static struct pci_driver driver = {
+ .name = KBUILD_MODNAME,
+ .id_table = snd_my_ids,
+ .probe = snd_my_probe,
+ .remove = snd_my_remove,
+ #ifdef CONFIG_PM
+ .suspend = snd_my_suspend,
+ .resume = snd_my_resume,
+ #endif
+ };
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Module Parameters -->
+<!-- ****************************************************** -->
+ <chapter id="module-parameters">
+ <title>Module Parameters</title>
+ <para>
+ There are standard module options for ALSA. At least, each
+ module should have the <parameter>index</parameter>,
+ <parameter>id</parameter> and <parameter>enable</parameter>
+ options.
+ </para>
+
+ <para>
+ If the module supports multiple cards (usually up to
+ 8 = <constant>SNDRV_CARDS</constant> cards), they should be
+ arrays. The default initial values are defined already as
+ constants for easier programming:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+ static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+ static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ If the module supports only a single card, they could be single
+ variables, instead. <parameter>enable</parameter> option is not
+ always necessary in this case, but it would be better to have a
+ dummy option for compatibility.
+ </para>
+
+ <para>
+ The module parameters must be declared with the standard
+ <function>module_param()()</function>,
+ <function>module_param_array()()</function> and
+ <function>MODULE_PARM_DESC()</function> macros.
+ </para>
+
+ <para>
+ The typical coding would be like below:
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ #define CARD_NAME "My Chip"
+
+ module_param_array(index, int, NULL, 0444);
+ MODULE_PARM_DESC(index, "Index value for " CARD_NAME " soundcard.");
+ module_param_array(id, charp, NULL, 0444);
+ MODULE_PARM_DESC(id, "ID string for " CARD_NAME " soundcard.");
+ module_param_array(enable, bool, NULL, 0444);
+ MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ Also, don't forget to define the module description, classes,
+ license and devices. Especially, the recent modprobe requires to
+ define the module license as GPL, etc., otherwise the system is
+ shown as <quote>tainted</quote>.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ MODULE_DESCRIPTION("My Chip");
+ MODULE_LICENSE("GPL");
+ MODULE_SUPPORTED_DEVICE("{{Vendor,My Chip Name}}");
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- How To Put Your Driver -->
+<!-- ****************************************************** -->
+ <chapter id="how-to-put-your-driver">
+ <title>How To Put Your Driver Into ALSA Tree</title>
+ <section>
+ <title>General</title>
+ <para>
+ So far, you've learned how to write the driver codes.
+ And you might have a question now: how to put my own
+ driver into the ALSA driver tree?
+ Here (finally :) the standard procedure is described briefly.
+ </para>
+
+ <para>
+ Suppose that you create a new PCI driver for the card
+ <quote>xyz</quote>. The card module name would be
+ snd-xyz. The new driver is usually put into the alsa-driver
+ tree, <filename>alsa-driver/pci</filename> directory in
+ the case of PCI cards.
+ Then the driver is evaluated, audited and tested
+ by developers and users. After a certain time, the driver
+ will go to the alsa-kernel tree (to the corresponding directory,
+ such as <filename>alsa-kernel/pci</filename>) and eventually
+ will be integrated into the Linux 2.6 tree (the directory would be
+ <filename>linux/sound/pci</filename>).
+ </para>
+
+ <para>
+ In the following sections, the driver code is supposed
+ to be put into alsa-driver tree. The two cases are covered:
+ a driver consisting of a single source file and one consisting
+ of several source files.
+ </para>
+ </section>
+
+ <section>
+ <title>Driver with A Single Source File</title>
+ <para>
+ <orderedlist>
+ <listitem>
+ <para>
+ Modify alsa-driver/pci/Makefile
+ </para>
+
+ <para>
+ Suppose you have a file xyz.c. Add the following
+ two lines
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd-xyz-objs := xyz.o
+ obj-$(CONFIG_SND_XYZ) += snd-xyz.o
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Create the Kconfig entry
+ </para>
+
+ <para>
+ Add the new entry of Kconfig for your xyz driver.
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ config SND_XYZ
+ tristate "Foobar XYZ"
+ depends on SND
+ select SND_PCM
+ help
+ Say Y here to include support for Foobar XYZ soundcard.
+
+ To compile this driver as a module, choose M here: the module
+ will be called snd-xyz.
+]]>
+ </programlisting>
+ </informalexample>
+
+ the line, select SND_PCM, specifies that the driver xyz supports
+ PCM. In addition to SND_PCM, the following components are
+ supported for select command:
+ SND_RAWMIDI, SND_TIMER, SND_HWDEP, SND_MPU401_UART,
+ SND_OPL3_LIB, SND_OPL4_LIB, SND_VX_LIB, SND_AC97_CODEC.
+ Add the select command for each supported component.
+ </para>
+
+ <para>
+ Note that some selections imply the lowlevel selections.
+ For example, PCM includes TIMER, MPU401_UART includes RAWMIDI,
+ AC97_CODEC includes PCM, and OPL3_LIB includes HWDEP.
+ You don't need to give the lowlevel selections again.
+ </para>
+
+ <para>
+ For the details of Kconfig script, refer to the kbuild
+ documentation.
+ </para>
+
+ </listitem>
+
+ <listitem>
+ <para>
+ Run cvscompile script to re-generate the configure script and
+ build the whole stuff again.
+ </para>
+ </listitem>
+ </orderedlist>
+ </para>
+ </section>
+
+ <section>
+ <title>Drivers with Several Source Files</title>
+ <para>
+ Suppose that the driver snd-xyz have several source files.
+ They are located in the new subdirectory,
+ pci/xyz.
+
+ <orderedlist>
+ <listitem>
+ <para>
+ Add a new directory (<filename>xyz</filename>) in
+ <filename>alsa-driver/pci/Makefile</filename> as below
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ obj-$(CONFIG_SND) += xyz/
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Under the directory <filename>xyz</filename>, create a Makefile
+
+ <example>
+ <title>Sample Makefile for a driver xyz</title>
+ <programlisting>
+<![CDATA[
+ ifndef SND_TOPDIR
+ SND_TOPDIR=../..
+ endif
+
+ include $(SND_TOPDIR)/toplevel.config
+ include $(SND_TOPDIR)/Makefile.conf
+
+ snd-xyz-objs := xyz.o abc.o def.o
+
+ obj-$(CONFIG_SND_XYZ) += snd-xyz.o
+
+ include $(SND_TOPDIR)/Rules.make
+]]>
+ </programlisting>
+ </example>
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Create the Kconfig entry
+ </para>
+
+ <para>
+ This procedure is as same as in the last section.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Run cvscompile script to re-generate the configure script and
+ build the whole stuff again.
+ </para>
+ </listitem>
+ </orderedlist>
+ </para>
+ </section>
+
+ </chapter>
+
+<!-- ****************************************************** -->
+<!-- Useful Functions -->
+<!-- ****************************************************** -->
+ <chapter id="useful-functions">
+ <title>Useful Functions</title>
+
+ <section id="useful-functions-snd-printk">
+ <title><function>snd_printk()</function> and friends</title>
+ <para>
+ ALSA provides a verbose version of the
+ <function>printk()</function> function. If a kernel config
+ <constant>CONFIG_SND_VERBOSE_PRINTK</constant> is set, this
+ function prints the given message together with the file name
+ and the line of the caller. The <constant>KERN_XXX</constant>
+ prefix is processed as
+ well as the original <function>printk()</function> does, so it's
+ recommended to add this prefix, e.g.
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_printk(KERN_ERR "Oh my, sorry, it's extremely bad!\n");
+]]>
+ </programlisting>
+ </informalexample>
+ </para>
+
+ <para>
+ There are also <function>printk()</function>'s for
+ debugging. <function>snd_printd()</function> can be used for
+ general debugging purposes. If
+ <constant>CONFIG_SND_DEBUG</constant> is set, this function is
+ compiled, and works just like
+ <function>snd_printk()</function>. If the ALSA is compiled
+ without the debugging flag, it's ignored.
+ </para>
+
+ <para>
+ <function>snd_printdd()</function> is compiled in only when
+ <constant>CONFIG_SND_DEBUG_VERBOSE</constant> is set. Please note
+ that <constant>CONFIG_SND_DEBUG_VERBOSE</constant> is not set as default
+ even if you configure the alsa-driver with
+ <option>--with-debug=full</option> option. You need to give
+ explicitly <option>--with-debug=detect</option> option instead.
+ </para>
+ </section>
+
+ <section id="useful-functions-snd-bug">
+ <title><function>snd_BUG()</function></title>
+ <para>
+ It shows the <computeroutput>BUG?</computeroutput> message and
+ stack trace as well as <function>snd_BUG_ON</function> at the point.
+ It's useful to show that a fatal error happens there.
+ </para>
+ <para>
+ When no debug flag is set, this macro is ignored.
+ </para>
+ </section>
+
+ <section id="useful-functions-snd-bug-on">
+ <title><function>snd_BUG_ON()</function></title>
+ <para>
+ <function>snd_BUG_ON()</function> macro is similar with
+ <function>WARN_ON()</function> macro. For example,
+
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ snd_BUG_ON(!pointer);
+]]>
+ </programlisting>
+ </informalexample>
+
+ or it can be used as the condition,
+ <informalexample>
+ <programlisting>
+<![CDATA[
+ if (snd_BUG_ON(non_zero_is_bug))
+ return -EINVAL;
+]]>
+ </programlisting>
+ </informalexample>
+
+ </para>
+
+ <para>
+ The macro takes an conditional expression to evaluate.
+ When <constant>CONFIG_SND_DEBUG</constant>, is set, if the
+ expression is non-zero, it shows the warning message such as
+ <computeroutput>BUG? (xxx)</computeroutput>
+ normally followed by stack trace.
+
+ In both cases it returns the evaluated value.
+ </para>
+
+ </section>
+
+ </chapter>
+
+
+<!-- ****************************************************** -->
+<!-- Acknowledgments -->
+<!-- ****************************************************** -->
+ <chapter id="acknowledgments">
+ <title>Acknowledgments</title>
+ <para>
+ I would like to thank Phil Kerr for his help for improvement and
+ corrections of this document.
+ </para>
+ <para>
+ Kevin Conder reformatted the original plain-text to the
+ DocBook format.
+ </para>
+ <para>
+ Giuliano Pochini corrected typos and contributed the example codes
+ in the hardware constraints section.
+ </para>
+ </chapter>
+</book>
diff --git a/Documentation/DocBook/writing_musb_glue_layer.tmpl b/Documentation/DocBook/writing_musb_glue_layer.tmpl
new file mode 100644
index 000000000..837eca77f
--- /dev/null
+++ b/Documentation/DocBook/writing_musb_glue_layer.tmpl
@@ -0,0 +1,873 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Writing-MUSB-Glue-Layer">
+ <bookinfo>
+ <title>Writing an MUSB Glue Layer</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Apelete</firstname>
+ <surname>Seketeli</surname>
+ <affiliation>
+ <address>
+ <email>apelete at seketeli.net</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2014</year>
+ <holder>Apelete Seketeli</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute it
+ and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later version.
+ </para>
+
+ <para>
+ This documentation is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public License
+ along with this documentation; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the Linux kernel source
+ tree.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="introduction">
+ <title>Introduction</title>
+ <para>
+ The Linux MUSB subsystem is part of the larger Linux USB
+ subsystem. It provides support for embedded USB Device Controllers
+ (UDC) that do not use Universal Host Controller Interface (UHCI)
+ or Open Host Controller Interface (OHCI).
+ </para>
+ <para>
+ Instead, these embedded UDC rely on the USB On-the-Go (OTG)
+ specification which they implement at least partially. The silicon
+ reference design used in most cases is the Multipoint USB
+ Highspeed Dual-Role Controller (MUSB HDRC) found in the Mentor
+ Graphics Inventra™ design.
+ </para>
+ <para>
+ As a self-taught exercise I have written an MUSB glue layer for
+ the Ingenic JZ4740 SoC, modelled after the many MUSB glue layers
+ in the kernel source tree. This layer can be found at
+ drivers/usb/musb/jz4740.c. In this documentation I will walk
+ through the basics of the jz4740.c glue layer, explaining the
+ different pieces and what needs to be done in order to write your
+ own device glue layer.
+ </para>
+ </chapter>
+
+ <chapter id="linux-musb-basics">
+ <title>Linux MUSB Basics</title>
+ <para>
+ To get started on the topic, please read USB On-the-Go Basics (see
+ Resources) which provides an introduction of USB OTG operation at
+ the hardware level. A couple of wiki pages by Texas Instruments
+ and Analog Devices also provide an overview of the Linux kernel
+ MUSB configuration, albeit focused on some specific devices
+ provided by these companies. Finally, getting acquainted with the
+ USB specification at USB home page may come in handy, with
+ practical instance provided through the Writing USB Device Drivers
+ documentation (again, see Resources).
+ </para>
+ <para>
+ Linux USB stack is a layered architecture in which the MUSB
+ controller hardware sits at the lowest. The MUSB controller driver
+ abstract the MUSB controller hardware to the Linux USB stack.
+ </para>
+ <programlisting>
+ ------------------------
+ | | &lt;------- drivers/usb/gadget
+ | Linux USB Core Stack | &lt;------- drivers/usb/host
+ | | &lt;------- drivers/usb/core
+ ------------------------
+ ⬍
+ --------------------------
+ | | &lt;------ drivers/usb/musb/musb_gadget.c
+ | MUSB Controller driver | &lt;------ drivers/usb/musb/musb_host.c
+ | | &lt;------ drivers/usb/musb/musb_core.c
+ --------------------------
+ ⬍
+ ---------------------------------
+ | MUSB Platform Specific Driver |
+ | | &lt;-- drivers/usb/musb/jz4740.c
+ | aka &quot;Glue Layer&quot; |
+ ---------------------------------
+ ⬍
+ ---------------------------------
+ | MUSB Controller Hardware |
+ ---------------------------------
+ </programlisting>
+ <para>
+ As outlined above, the glue layer is actually the platform
+ specific code sitting in between the controller driver and the
+ controller hardware.
+ </para>
+ <para>
+ Just like a Linux USB driver needs to register itself with the
+ Linux USB subsystem, the MUSB glue layer needs first to register
+ itself with the MUSB controller driver. This will allow the
+ controller driver to know about which device the glue layer
+ supports and which functions to call when a supported device is
+ detected or released; remember we are talking about an embedded
+ controller chip here, so no insertion or removal at run-time.
+ </para>
+ <para>
+ All of this information is passed to the MUSB controller driver
+ through a platform_driver structure defined in the glue layer as:
+ </para>
+ <programlisting linenumbering="numbered">
+static struct platform_driver jz4740_driver = {
+ .probe = jz4740_probe,
+ .remove = jz4740_remove,
+ .driver = {
+ .name = "musb-jz4740",
+ },
+};
+ </programlisting>
+ <para>
+ The probe and remove function pointers are called when a matching
+ device is detected and, respectively, released. The name string
+ describes the device supported by this glue layer. In the current
+ case it matches a platform_device structure declared in
+ arch/mips/jz4740/platform.c. Note that we are not using device
+ tree bindings here.
+ </para>
+ <para>
+ In order to register itself to the controller driver, the glue
+ layer goes through a few steps, basically allocating the
+ controller hardware resources and initialising a couple of
+ circuits. To do so, it needs to keep track of the information used
+ throughout these steps. This is done by defining a private
+ jz4740_glue structure:
+ </para>
+ <programlisting linenumbering="numbered">
+struct jz4740_glue {
+ struct device *dev;
+ struct platform_device *musb;
+ struct clk *clk;
+};
+ </programlisting>
+ <para>
+ The dev and musb members are both device structure variables. The
+ first one holds generic information about the device, since it's
+ the basic device structure, and the latter holds information more
+ closely related to the subsystem the device is registered to. The
+ clk variable keeps information related to the device clock
+ operation.
+ </para>
+ <para>
+ Let's go through the steps of the probe function that leads the
+ glue layer to register itself to the controller driver.
+ </para>
+ <para>
+ N.B.: For the sake of readability each function will be split in
+ logical parts, each part being shown as if it was independent from
+ the others.
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_probe(struct platform_device *pdev)
+{
+ struct platform_device *musb;
+ struct jz4740_glue *glue;
+ struct clk *clk;
+ int ret;
+
+ glue = devm_kzalloc(&amp;pdev->dev, sizeof(*glue), GFP_KERNEL);
+ if (!glue)
+ return -ENOMEM;
+
+ musb = platform_device_alloc("musb-hdrc", PLATFORM_DEVID_AUTO);
+ if (!musb) {
+ dev_err(&amp;pdev->dev, "failed to allocate musb device\n");
+ return -ENOMEM;
+ }
+
+ clk = devm_clk_get(&amp;pdev->dev, "udc");
+ if (IS_ERR(clk)) {
+ dev_err(&amp;pdev->dev, "failed to get clock\n");
+ ret = PTR_ERR(clk);
+ goto err_platform_device_put;
+ }
+
+ ret = clk_prepare_enable(clk);
+ if (ret) {
+ dev_err(&amp;pdev->dev, "failed to enable clock\n");
+ goto err_platform_device_put;
+ }
+
+ musb->dev.parent = &amp;pdev->dev;
+
+ glue->dev = &amp;pdev->dev;
+ glue->musb = musb;
+ glue->clk = clk;
+
+ return 0;
+
+err_platform_device_put:
+ platform_device_put(musb);
+ return ret;
+}
+ </programlisting>
+ <para>
+ The first few lines of the probe function allocate and assign the
+ glue, musb and clk variables. The GFP_KERNEL flag (line 8) allows
+ the allocation process to sleep and wait for memory, thus being
+ usable in a blocking situation. The PLATFORM_DEVID_AUTO flag (line
+ 12) allows automatic allocation and management of device IDs in
+ order to avoid device namespace collisions with explicit IDs. With
+ devm_clk_get() (line 18) the glue layer allocates the clock -- the
+ <literal>devm_</literal> prefix indicates that clk_get() is
+ managed: it automatically frees the allocated clock resource data
+ when the device is released -- and enable it.
+ </para>
+ <para>
+ Then comes the registration steps:
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_probe(struct platform_device *pdev)
+{
+ struct musb_hdrc_platform_data *pdata = &amp;jz4740_musb_platform_data;
+
+ pdata->platform_ops = &amp;jz4740_musb_ops;
+
+ platform_set_drvdata(pdev, glue);
+
+ ret = platform_device_add_resources(musb, pdev->resource,
+ pdev->num_resources);
+ if (ret) {
+ dev_err(&amp;pdev->dev, "failed to add resources\n");
+ goto err_clk_disable;
+ }
+
+ ret = platform_device_add_data(musb, pdata, sizeof(*pdata));
+ if (ret) {
+ dev_err(&amp;pdev->dev, "failed to add platform_data\n");
+ goto err_clk_disable;
+ }
+
+ return 0;
+
+err_clk_disable:
+ clk_disable_unprepare(clk);
+err_platform_device_put:
+ platform_device_put(musb);
+ return ret;
+}
+ </programlisting>
+ <para>
+ The first step is to pass the device data privately held by the
+ glue layer on to the controller driver through
+ platform_set_drvdata() (line 7). Next is passing on the device
+ resources information, also privately held at that point, through
+ platform_device_add_resources() (line 9).
+ </para>
+ <para>
+ Finally comes passing on the platform specific data to the
+ controller driver (line 16). Platform data will be discussed in
+ <link linkend="device-platform-data">Chapter 4</link>, but here
+ we are looking at the platform_ops function pointer (line 5) in
+ musb_hdrc_platform_data structure (line 3). This function
+ pointer allows the MUSB controller driver to know which function
+ to call for device operation:
+ </para>
+ <programlisting linenumbering="numbered">
+static const struct musb_platform_ops jz4740_musb_ops = {
+ .init = jz4740_musb_init,
+ .exit = jz4740_musb_exit,
+};
+ </programlisting>
+ <para>
+ Here we have the minimal case where only init and exit functions
+ are called by the controller driver when needed. Fact is the
+ JZ4740 MUSB controller is a basic controller, lacking some
+ features found in other controllers, otherwise we may also have
+ pointers to a few other functions like a power management function
+ or a function to switch between OTG and non-OTG modes, for
+ instance.
+ </para>
+ <para>
+ At that point of the registration process, the controller driver
+ actually calls the init function:
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_musb_init(struct musb *musb)
+{
+ musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
+ if (!musb->xceiv) {
+ pr_err("HS UDC: no transceiver configured\n");
+ return -ENODEV;
+ }
+
+ /* Silicon does not implement ConfigData register.
+ * Set dyn_fifo to avoid reading EP config from hardware.
+ */
+ musb->dyn_fifo = true;
+
+ musb->isr = jz4740_musb_interrupt;
+
+ return 0;
+}
+ </programlisting>
+ <para>
+ The goal of jz4740_musb_init() is to get hold of the transceiver
+ driver data of the MUSB controller hardware and pass it on to the
+ MUSB controller driver, as usual. The transceiver is the circuitry
+ inside the controller hardware responsible for sending/receiving
+ the USB data. Since it is an implementation of the physical layer
+ of the OSI model, the transceiver is also referred to as PHY.
+ </para>
+ <para>
+ Getting hold of the MUSB PHY driver data is done with
+ usb_get_phy() which returns a pointer to the structure
+ containing the driver instance data. The next couple of
+ instructions (line 12 and 14) are used as a quirk and to setup
+ IRQ handling respectively. Quirks and IRQ handling will be
+ discussed later in <link linkend="device-quirks">Chapter
+ 5</link> and <link linkend="handling-irqs">Chapter 3</link>.
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_musb_exit(struct musb *musb)
+{
+ usb_put_phy(musb->xceiv);
+
+ return 0;
+}
+ </programlisting>
+ <para>
+ Acting as the counterpart of init, the exit function releases the
+ MUSB PHY driver when the controller hardware itself is about to be
+ released.
+ </para>
+ <para>
+ Again, note that init and exit are fairly simple in this case due
+ to the basic set of features of the JZ4740 controller hardware.
+ When writing an musb glue layer for a more complex controller
+ hardware, you might need to take care of more processing in those
+ two functions.
+ </para>
+ <para>
+ Returning from the init function, the MUSB controller driver jumps
+ back into the probe function:
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_probe(struct platform_device *pdev)
+{
+ ret = platform_device_add(musb);
+ if (ret) {
+ dev_err(&amp;pdev->dev, "failed to register musb device\n");
+ goto err_clk_disable;
+ }
+
+ return 0;
+
+err_clk_disable:
+ clk_disable_unprepare(clk);
+err_platform_device_put:
+ platform_device_put(musb);
+ return ret;
+}
+ </programlisting>
+ <para>
+ This is the last part of the device registration process where the
+ glue layer adds the controller hardware device to Linux kernel
+ device hierarchy: at this stage, all known information about the
+ device is passed on to the Linux USB core stack.
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_remove(struct platform_device *pdev)
+{
+ struct jz4740_glue *glue = platform_get_drvdata(pdev);
+
+ platform_device_unregister(glue->musb);
+ clk_disable_unprepare(glue->clk);
+
+ return 0;
+}
+ </programlisting>
+ <para>
+ Acting as the counterpart of probe, the remove function unregister
+ the MUSB controller hardware (line 5) and disable the clock (line
+ 6), allowing it to be gated.
+ </para>
+ </chapter>
+
+ <chapter id="handling-irqs">
+ <title>Handling IRQs</title>
+ <para>
+ Additionally to the MUSB controller hardware basic setup and
+ registration, the glue layer is also responsible for handling the
+ IRQs:
+ </para>
+ <programlisting linenumbering="numbered">
+static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci)
+{
+ unsigned long flags;
+ irqreturn_t retval = IRQ_NONE;
+ struct musb *musb = __hci;
+
+ spin_lock_irqsave(&amp;musb->lock, flags);
+
+ musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB);
+ musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX);
+ musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX);
+
+ /*
+ * The controller is gadget only, the state of the host mode IRQ bits is
+ * undefined. Mask them to make sure that the musb driver core will
+ * never see them set
+ */
+ musb->int_usb &amp;= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME |
+ MUSB_INTR_RESET | MUSB_INTR_SOF;
+
+ if (musb->int_usb || musb->int_tx || musb->int_rx)
+ retval = musb_interrupt(musb);
+
+ spin_unlock_irqrestore(&amp;musb->lock, flags);
+
+ return retval;
+}
+ </programlisting>
+ <para>
+ Here the glue layer mostly has to read the relevant hardware
+ registers and pass their values on to the controller driver which
+ will handle the actual event that triggered the IRQ.
+ </para>
+ <para>
+ The interrupt handler critical section is protected by the
+ spin_lock_irqsave() and counterpart spin_unlock_irqrestore()
+ functions (line 7 and 24 respectively), which prevent the
+ interrupt handler code to be run by two different threads at the
+ same time.
+ </para>
+ <para>
+ Then the relevant interrupt registers are read (line 9 to 11):
+ </para>
+ <itemizedlist>
+ <listitem>
+ <para>
+ MUSB_INTRUSB: indicates which USB interrupts are currently
+ active,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ MUSB_INTRTX: indicates which of the interrupts for TX
+ endpoints are currently active,
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ MUSB_INTRRX: indicates which of the interrupts for TX
+ endpoints are currently active.
+ </para>
+ </listitem>
+ </itemizedlist>
+ <para>
+ Note that musb_readb() is used to read 8-bit registers at most,
+ while musb_readw() allows us to read at most 16-bit registers.
+ There are other functions that can be used depending on the size
+ of your device registers. See musb_io.h for more information.
+ </para>
+ <para>
+ Instruction on line 18 is another quirk specific to the JZ4740
+ USB device controller, which will be discussed later in <link
+ linkend="device-quirks">Chapter 5</link>.
+ </para>
+ <para>
+ The glue layer still needs to register the IRQ handler though.
+ Remember the instruction on line 14 of the init function:
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_musb_init(struct musb *musb)
+{
+ musb->isr = jz4740_musb_interrupt;
+
+ return 0;
+}
+ </programlisting>
+ <para>
+ This instruction sets a pointer to the glue layer IRQ handler
+ function, in order for the controller hardware to call the handler
+ back when an IRQ comes from the controller hardware. The interrupt
+ handler is now implemented and registered.
+ </para>
+ </chapter>
+
+ <chapter id="device-platform-data">
+ <title>Device Platform Data</title>
+ <para>
+ In order to write an MUSB glue layer, you need to have some data
+ describing the hardware capabilities of your controller hardware,
+ which is called the platform data.
+ </para>
+ <para>
+ Platform data is specific to your hardware, though it may cover a
+ broad range of devices, and is generally found somewhere in the
+ arch/ directory, depending on your device architecture.
+ </para>
+ <para>
+ For instance, platform data for the JZ4740 SoC is found in
+ arch/mips/jz4740/platform.c. In the platform.c file each device of
+ the JZ4740 SoC is described through a set of structures.
+ </para>
+ <para>
+ Here is the part of arch/mips/jz4740/platform.c that covers the
+ USB Device Controller (UDC):
+ </para>
+ <programlisting linenumbering="numbered">
+/* USB Device Controller */
+struct platform_device jz4740_udc_xceiv_device = {
+ .name = "usb_phy_gen_xceiv",
+ .id = 0,
+};
+
+static struct resource jz4740_udc_resources[] = {
+ [0] = {
+ .start = JZ4740_UDC_BASE_ADDR,
+ .end = JZ4740_UDC_BASE_ADDR + 0x10000 - 1,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = JZ4740_IRQ_UDC,
+ .end = JZ4740_IRQ_UDC,
+ .flags = IORESOURCE_IRQ,
+ .name = "mc",
+ },
+};
+
+struct platform_device jz4740_udc_device = {
+ .name = "musb-jz4740",
+ .id = -1,
+ .dev = {
+ .dma_mask = &amp;jz4740_udc_device.dev.coherent_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ },
+ .num_resources = ARRAY_SIZE(jz4740_udc_resources),
+ .resource = jz4740_udc_resources,
+};
+ </programlisting>
+ <para>
+ The jz4740_udc_xceiv_device platform device structure (line 2)
+ describes the UDC transceiver with a name and id number.
+ </para>
+ <para>
+ At the time of this writing, note that
+ &quot;usb_phy_gen_xceiv&quot; is the specific name to be used for
+ all transceivers that are either built-in with reference USB IP or
+ autonomous and doesn't require any PHY programming. You will need
+ to set CONFIG_NOP_USB_XCEIV=y in the kernel configuration to make
+ use of the corresponding transceiver driver. The id field could be
+ set to -1 (equivalent to PLATFORM_DEVID_NONE), -2 (equivalent to
+ PLATFORM_DEVID_AUTO) or start with 0 for the first device of this
+ kind if we want a specific id number.
+ </para>
+ <para>
+ The jz4740_udc_resources resource structure (line 7) defines the
+ UDC registers base addresses.
+ </para>
+ <para>
+ The first array (line 9 to 11) defines the UDC registers base
+ memory addresses: start points to the first register memory
+ address, end points to the last register memory address and the
+ flags member defines the type of resource we are dealing with. So
+ IORESOURCE_MEM is used to define the registers memory addresses.
+ The second array (line 14 to 17) defines the UDC IRQ registers
+ addresses. Since there is only one IRQ register available for the
+ JZ4740 UDC, start and end point at the same address. The
+ IORESOURCE_IRQ flag tells that we are dealing with IRQ resources,
+ and the name &quot;mc&quot; is in fact hard-coded in the MUSB core
+ in order for the controller driver to retrieve this IRQ resource
+ by querying it by its name.
+ </para>
+ <para>
+ Finally, the jz4740_udc_device platform device structure (line 21)
+ describes the UDC itself.
+ </para>
+ <para>
+ The &quot;musb-jz4740&quot; name (line 22) defines the MUSB
+ driver that is used for this device; remember this is in fact
+ the name that we used in the jz4740_driver platform driver
+ structure in <link linkend="linux-musb-basics">Chapter
+ 2</link>. The id field (line 23) is set to -1 (equivalent to
+ PLATFORM_DEVID_NONE) since we do not need an id for the device:
+ the MUSB controller driver was already set to allocate an
+ automatic id in <link linkend="linux-musb-basics">Chapter
+ 2</link>. In the dev field we care for DMA related information
+ here. The dma_mask field (line 25) defines the width of the DMA
+ mask that is going to be used, and coherent_dma_mask (line 26)
+ has the same purpose but for the alloc_coherent DMA mappings: in
+ both cases we are using a 32 bits mask. Then the resource field
+ (line 29) is simply a pointer to the resource structure defined
+ before, while the num_resources field (line 28) keeps track of
+ the number of arrays defined in the resource structure (in this
+ case there were two resource arrays defined before).
+ </para>
+ <para>
+ With this quick overview of the UDC platform data at the arch/
+ level now done, let's get back to the MUSB glue layer specific
+ platform data in drivers/usb/musb/jz4740.c:
+ </para>
+ <programlisting linenumbering="numbered">
+static struct musb_hdrc_config jz4740_musb_config = {
+ /* Silicon does not implement USB OTG. */
+ .multipoint = 0,
+ /* Max EPs scanned, driver will decide which EP can be used. */
+ .num_eps = 4,
+ /* RAMbits needed to configure EPs from table */
+ .ram_bits = 9,
+ .fifo_cfg = jz4740_musb_fifo_cfg,
+ .fifo_cfg_size = ARRAY_SIZE(jz4740_musb_fifo_cfg),
+};
+
+static struct musb_hdrc_platform_data jz4740_musb_platform_data = {
+ .mode = MUSB_PERIPHERAL,
+ .config = &amp;jz4740_musb_config,
+};
+ </programlisting>
+ <para>
+ First the glue layer configures some aspects of the controller
+ driver operation related to the controller hardware specifics.
+ This is done through the jz4740_musb_config musb_hdrc_config
+ structure.
+ </para>
+ <para>
+ Defining the OTG capability of the controller hardware, the
+ multipoint member (line 3) is set to 0 (equivalent to false)
+ since the JZ4740 UDC is not OTG compatible. Then num_eps (line
+ 5) defines the number of USB endpoints of the controller
+ hardware, including endpoint 0: here we have 3 endpoints +
+ endpoint 0. Next is ram_bits (line 7) which is the width of the
+ RAM address bus for the MUSB controller hardware. This
+ information is needed when the controller driver cannot
+ automatically configure endpoints by reading the relevant
+ controller hardware registers. This issue will be discussed when
+ we get to device quirks in <link linkend="device-quirks">Chapter
+ 5</link>. Last two fields (line 8 and 9) are also about device
+ quirks: fifo_cfg points to the USB endpoints configuration table
+ and fifo_cfg_size keeps track of the size of the number of
+ entries in that configuration table. More on that later in <link
+ linkend="device-quirks">Chapter 5</link>.
+ </para>
+ <para>
+ Then this configuration is embedded inside
+ jz4740_musb_platform_data musb_hdrc_platform_data structure (line
+ 11): config is a pointer to the configuration structure itself,
+ and mode tells the controller driver if the controller hardware
+ may be used as MUSB_HOST only, MUSB_PERIPHERAL only or MUSB_OTG
+ which is a dual mode.
+ </para>
+ <para>
+ Remember that jz4740_musb_platform_data is then used to convey
+ platform data information as we have seen in the probe function
+ in <link linkend="linux-musb-basics">Chapter 2</link>
+ </para>
+ </chapter>
+
+ <chapter id="device-quirks">
+ <title>Device Quirks</title>
+ <para>
+ Completing the platform data specific to your device, you may also
+ need to write some code in the glue layer to work around some
+ device specific limitations. These quirks may be due to some
+ hardware bugs, or simply be the result of an incomplete
+ implementation of the USB On-the-Go specification.
+ </para>
+ <para>
+ The JZ4740 UDC exhibits such quirks, some of which we will discuss
+ here for the sake of insight even though these might not be found
+ in the controller hardware you are working on.
+ </para>
+ <para>
+ Let's get back to the init function first:
+ </para>
+ <programlisting linenumbering="numbered">
+static int jz4740_musb_init(struct musb *musb)
+{
+ musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
+ if (!musb->xceiv) {
+ pr_err("HS UDC: no transceiver configured\n");
+ return -ENODEV;
+ }
+
+ /* Silicon does not implement ConfigData register.
+ * Set dyn_fifo to avoid reading EP config from hardware.
+ */
+ musb->dyn_fifo = true;
+
+ musb->isr = jz4740_musb_interrupt;
+
+ return 0;
+}
+ </programlisting>
+ <para>
+ Instruction on line 12 helps the MUSB controller driver to work
+ around the fact that the controller hardware is missing registers
+ that are used for USB endpoints configuration.
+ </para>
+ <para>
+ Without these registers, the controller driver is unable to read
+ the endpoints configuration from the hardware, so we use line 12
+ instruction to bypass reading the configuration from silicon, and
+ rely on a hard-coded table that describes the endpoints
+ configuration instead:
+ </para>
+ <programlisting linenumbering="numbered">
+static struct musb_fifo_cfg jz4740_musb_fifo_cfg[] = {
+{ .hw_ep_num = 1, .style = FIFO_TX, .maxpacket = 512, },
+{ .hw_ep_num = 1, .style = FIFO_RX, .maxpacket = 512, },
+{ .hw_ep_num = 2, .style = FIFO_TX, .maxpacket = 64, },
+};
+ </programlisting>
+ <para>
+ Looking at the configuration table above, we see that each
+ endpoints is described by three fields: hw_ep_num is the endpoint
+ number, style is its direction (either FIFO_TX for the controller
+ driver to send packets in the controller hardware, or FIFO_RX to
+ receive packets from hardware), and maxpacket defines the maximum
+ size of each data packet that can be transmitted over that
+ endpoint. Reading from the table, the controller driver knows that
+ endpoint 1 can be used to send and receive USB data packets of 512
+ bytes at once (this is in fact a bulk in/out endpoint), and
+ endpoint 2 can be used to send data packets of 64 bytes at once
+ (this is in fact an interrupt endpoint).
+ </para>
+ <para>
+ Note that there is no information about endpoint 0 here: that one
+ is implemented by default in every silicon design, with a
+ predefined configuration according to the USB specification. For
+ more examples of endpoint configuration tables, see musb_core.c.
+ </para>
+ <para>
+ Let's now get back to the interrupt handler function:
+ </para>
+ <programlisting linenumbering="numbered">
+static irqreturn_t jz4740_musb_interrupt(int irq, void *__hci)
+{
+ unsigned long flags;
+ irqreturn_t retval = IRQ_NONE;
+ struct musb *musb = __hci;
+
+ spin_lock_irqsave(&amp;musb->lock, flags);
+
+ musb->int_usb = musb_readb(musb->mregs, MUSB_INTRUSB);
+ musb->int_tx = musb_readw(musb->mregs, MUSB_INTRTX);
+ musb->int_rx = musb_readw(musb->mregs, MUSB_INTRRX);
+
+ /*
+ * The controller is gadget only, the state of the host mode IRQ bits is
+ * undefined. Mask them to make sure that the musb driver core will
+ * never see them set
+ */
+ musb->int_usb &amp;= MUSB_INTR_SUSPEND | MUSB_INTR_RESUME |
+ MUSB_INTR_RESET | MUSB_INTR_SOF;
+
+ if (musb->int_usb || musb->int_tx || musb->int_rx)
+ retval = musb_interrupt(musb);
+
+ spin_unlock_irqrestore(&amp;musb->lock, flags);
+
+ return retval;
+}
+ </programlisting>
+ <para>
+ Instruction on line 18 above is a way for the controller driver to
+ work around the fact that some interrupt bits used for USB host
+ mode operation are missing in the MUSB_INTRUSB register, thus left
+ in an undefined hardware state, since this MUSB controller
+ hardware is used in peripheral mode only. As a consequence, the
+ glue layer masks these missing bits out to avoid parasite
+ interrupts by doing a logical AND operation between the value read
+ from MUSB_INTRUSB and the bits that are actually implemented in
+ the register.
+ </para>
+ <para>
+ These are only a couple of the quirks found in the JZ4740 USB
+ device controller. Some others were directly addressed in the MUSB
+ core since the fixes were generic enough to provide a better
+ handling of the issues for others controller hardware eventually.
+ </para>
+ </chapter>
+
+ <chapter id="conclusion">
+ <title>Conclusion</title>
+ <para>
+ Writing a Linux MUSB glue layer should be a more accessible task,
+ as this documentation tries to show the ins and outs of this
+ exercise.
+ </para>
+ <para>
+ The JZ4740 USB device controller being fairly simple, I hope its
+ glue layer serves as a good example for the curious mind. Used
+ with the current MUSB glue layers, this documentation should
+ provide enough guidance to get started; should anything gets out
+ of hand, the linux-usb mailing list archive is another helpful
+ resource to browse through.
+ </para>
+ </chapter>
+
+ <chapter id="acknowledgements">
+ <title>Acknowledgements</title>
+ <para>
+ Many thanks to Lars-Peter Clausen and Maarten ter Huurne for
+ answering my questions while I was writing the JZ4740 glue layer
+ and for helping me out getting the code in good shape.
+ </para>
+ <para>
+ I would also like to thank the Qi-Hardware community at large for
+ its cheerful guidance and support.
+ </para>
+ </chapter>
+
+ <chapter id="resources">
+ <title>Resources</title>
+ <para>
+ USB Home Page:
+ <ulink url="http://www.usb.org">http://www.usb.org</ulink>
+ </para>
+ <para>
+ linux-usb Mailing List Archives:
+ <ulink url="http://marc.info/?l=linux-usb">http://marc.info/?l=linux-usb</ulink>
+ </para>
+ <para>
+ USB On-the-Go Basics:
+ <ulink url="http://www.maximintegrated.com/app-notes/index.mvp/id/1822">http://www.maximintegrated.com/app-notes/index.mvp/id/1822</ulink>
+ </para>
+ <para>
+ Writing USB Device Drivers:
+ <ulink url="https://www.kernel.org/doc/htmldocs/writing_usb_driver/index.html">https://www.kernel.org/doc/htmldocs/writing_usb_driver/index.html</ulink>
+ </para>
+ <para>
+ Texas Instruments USB Configuration Wiki Page:
+ <ulink url="http://processors.wiki.ti.com/index.php/Usbgeneralpage">http://processors.wiki.ti.com/index.php/Usbgeneralpage</ulink>
+ </para>
+ <para>
+ Analog Devices Blackfin MUSB Configuration:
+ <ulink url="http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:musb">http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:musb</ulink>
+ </para>
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/writing_usb_driver.tmpl b/Documentation/DocBook/writing_usb_driver.tmpl
new file mode 100644
index 000000000..3210dcf74
--- /dev/null
+++ b/Documentation/DocBook/writing_usb_driver.tmpl
@@ -0,0 +1,412 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="USBDeviceDriver">
+ <bookinfo>
+ <title>Writing USB Device Drivers</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Greg</firstname>
+ <surname>Kroah-Hartman</surname>
+ <affiliation>
+ <address>
+ <email>greg@kroah.com</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2001-2002</year>
+ <holder>Greg Kroah-Hartman</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+
+ <para>
+ This documentation is based on an article published in
+ Linux Journal Magazine, October 2001, Issue 90.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The Linux USB subsystem has grown from supporting only two different
+ types of devices in the 2.2.7 kernel (mice and keyboards), to over 20
+ different types of devices in the 2.4 kernel. Linux currently supports
+ almost all USB class devices (standard types of devices like keyboards,
+ mice, modems, printers and speakers) and an ever-growing number of
+ vendor-specific devices (such as USB to serial converters, digital
+ cameras, Ethernet devices and MP3 players). For a full list of the
+ different USB devices currently supported, see Resources.
+ </para>
+ <para>
+ The remaining kinds of USB devices that do not have support on Linux are
+ almost all vendor-specific devices. Each vendor decides to implement a
+ custom protocol to talk to their device, so a custom driver usually needs
+ to be created. Some vendors are open with their USB protocols and help
+ with the creation of Linux drivers, while others do not publish them, and
+ developers are forced to reverse-engineer. See Resources for some links
+ to handy reverse-engineering tools.
+ </para>
+ <para>
+ Because each different protocol causes a new driver to be created, I have
+ written a generic USB driver skeleton, modelled after the pci-skeleton.c
+ file in the kernel source tree upon which many PCI network drivers have
+ been based. This USB skeleton can be found at drivers/usb/usb-skeleton.c
+ in the kernel source tree. In this article I will walk through the basics
+ of the skeleton driver, explaining the different pieces and what needs to
+ be done to customize it to your specific device.
+ </para>
+ </chapter>
+
+ <chapter id="basics">
+ <title>Linux USB Basics</title>
+ <para>
+ If you are going to write a Linux USB driver, please become familiar with
+ the USB protocol specification. It can be found, along with many other
+ useful documents, at the USB home page (see Resources). An excellent
+ introduction to the Linux USB subsystem can be found at the USB Working
+ Devices List (see Resources). It explains how the Linux USB subsystem is
+ structured and introduces the reader to the concept of USB urbs
+ (USB Request Blocks), which are essential to USB drivers.
+ </para>
+ <para>
+ The first thing a Linux USB driver needs to do is register itself with
+ the Linux USB subsystem, giving it some information about which devices
+ the driver supports and which functions to call when a device supported
+ by the driver is inserted or removed from the system. All of this
+ information is passed to the USB subsystem in the usb_driver structure.
+ The skeleton driver declares a usb_driver as:
+ </para>
+ <programlisting>
+static struct usb_driver skel_driver = {
+ .name = "skeleton",
+ .probe = skel_probe,
+ .disconnect = skel_disconnect,
+ .fops = &amp;skel_fops,
+ .minor = USB_SKEL_MINOR_BASE,
+ .id_table = skel_table,
+};
+ </programlisting>
+ <para>
+ The variable name is a string that describes the driver. It is used in
+ informational messages printed to the system log. The probe and
+ disconnect function pointers are called when a device that matches the
+ information provided in the id_table variable is either seen or removed.
+ </para>
+ <para>
+ The fops and minor variables are optional. Most USB drivers hook into
+ another kernel subsystem, such as the SCSI, network or TTY subsystem.
+ These types of drivers register themselves with the other kernel
+ subsystem, and any user-space interactions are provided through that
+ interface. But for drivers that do not have a matching kernel subsystem,
+ such as MP3 players or scanners, a method of interacting with user space
+ is needed. The USB subsystem provides a way to register a minor device
+ number and a set of file_operations function pointers that enable this
+ user-space interaction. The skeleton driver needs this kind of interface,
+ so it provides a minor starting number and a pointer to its
+ file_operations functions.
+ </para>
+ <para>
+ The USB driver is then registered with a call to usb_register, usually in
+ the driver's init function, as shown here:
+ </para>
+ <programlisting>
+static int __init usb_skel_init(void)
+{
+ int result;
+
+ /* register this driver with the USB subsystem */
+ result = usb_register(&amp;skel_driver);
+ if (result &lt; 0) {
+ err(&quot;usb_register failed for the &quot;__FILE__ &quot;driver.&quot;
+ &quot;Error number %d&quot;, result);
+ return -1;
+ }
+
+ return 0;
+}
+module_init(usb_skel_init);
+ </programlisting>
+ <para>
+ When the driver is unloaded from the system, it needs to deregister
+ itself with the USB subsystem. This is done with the usb_deregister
+ function:
+ </para>
+ <programlisting>
+static void __exit usb_skel_exit(void)
+{
+ /* deregister this driver with the USB subsystem */
+ usb_deregister(&amp;skel_driver);
+}
+module_exit(usb_skel_exit);
+ </programlisting>
+ <para>
+ To enable the linux-hotplug system to load the driver automatically when
+ the device is plugged in, you need to create a MODULE_DEVICE_TABLE. The
+ following code tells the hotplug scripts that this module supports a
+ single device with a specific vendor and product ID:
+ </para>
+ <programlisting>
+/* table of devices that work with this driver */
+static struct usb_device_id skel_table [] = {
+ { USB_DEVICE(USB_SKEL_VENDOR_ID, USB_SKEL_PRODUCT_ID) },
+ { } /* Terminating entry */
+};
+MODULE_DEVICE_TABLE (usb, skel_table);
+ </programlisting>
+ <para>
+ There are other macros that can be used in describing a usb_device_id for
+ drivers that support a whole class of USB drivers. See usb.h for more
+ information on this.
+ </para>
+ </chapter>
+
+ <chapter id="device">
+ <title>Device operation</title>
+ <para>
+ When a device is plugged into the USB bus that matches the device ID
+ pattern that your driver registered with the USB core, the probe function
+ is called. The usb_device structure, interface number and the interface ID
+ are passed to the function:
+ </para>
+ <programlisting>
+static int skel_probe(struct usb_interface *interface,
+ const struct usb_device_id *id)
+ </programlisting>
+ <para>
+ The driver now needs to verify that this device is actually one that it
+ can accept. If so, it returns 0.
+ If not, or if any error occurs during initialization, an errorcode
+ (such as <literal>-ENOMEM</literal> or <literal>-ENODEV</literal>)
+ is returned from the probe function.
+ </para>
+ <para>
+ In the skeleton driver, we determine what end points are marked as bulk-in
+ and bulk-out. We create buffers to hold the data that will be sent and
+ received from the device, and a USB urb to write data to the device is
+ initialized.
+ </para>
+ <para>
+ Conversely, when the device is removed from the USB bus, the disconnect
+ function is called with the device pointer. The driver needs to clean any
+ private data that has been allocated at this time and to shut down any
+ pending urbs that are in the USB system.
+ </para>
+ <para>
+ Now that the device is plugged into the system and the driver is bound to
+ the device, any of the functions in the file_operations structure that
+ were passed to the USB subsystem will be called from a user program trying
+ to talk to the device. The first function called will be open, as the
+ program tries to open the device for I/O. We increment our private usage
+ count and save a pointer to our internal structure in the file
+ structure. This is done so that future calls to file operations will
+ enable the driver to determine which device the user is addressing. All
+ of this is done with the following code:
+ </para>
+ <programlisting>
+/* increment our usage count for the module */
+++skel->open_count;
+
+/* save our object in the file's private structure */
+file->private_data = dev;
+ </programlisting>
+ <para>
+ After the open function is called, the read and write functions are called
+ to receive and send data to the device. In the skel_write function, we
+ receive a pointer to some data that the user wants to send to the device
+ and the size of the data. The function determines how much data it can
+ send to the device based on the size of the write urb it has created (this
+ size depends on the size of the bulk out end point that the device has).
+ Then it copies the data from user space to kernel space, points the urb to
+ the data and submits the urb to the USB subsystem. This can be seen in
+ the following code:
+ </para>
+ <programlisting>
+/* we can only write as much as 1 urb will hold */
+bytes_written = (count > skel->bulk_out_size) ? skel->bulk_out_size : count;
+
+/* copy the data from user space into our urb */
+copy_from_user(skel->write_urb->transfer_buffer, buffer, bytes_written);
+
+/* set up our urb */
+usb_fill_bulk_urb(skel->write_urb,
+ skel->dev,
+ usb_sndbulkpipe(skel->dev, skel->bulk_out_endpointAddr),
+ skel->write_urb->transfer_buffer,
+ bytes_written,
+ skel_write_bulk_callback,
+ skel);
+
+/* send the data out the bulk port */
+result = usb_submit_urb(skel->write_urb);
+if (result) {
+ err(&quot;Failed submitting write urb, error %d&quot;, result);
+}
+ </programlisting>
+ <para>
+ When the write urb is filled up with the proper information using the
+ usb_fill_bulk_urb function, we point the urb's completion callback to call our
+ own skel_write_bulk_callback function. This function is called when the
+ urb is finished by the USB subsystem. The callback function is called in
+ interrupt context, so caution must be taken not to do very much processing
+ at that time. Our implementation of skel_write_bulk_callback merely
+ reports if the urb was completed successfully or not and then returns.
+ </para>
+ <para>
+ The read function works a bit differently from the write function in that
+ we do not use an urb to transfer data from the device to the driver.
+ Instead we call the usb_bulk_msg function, which can be used to send or
+ receive data from a device without having to create urbs and handle
+ urb completion callback functions. We call the usb_bulk_msg function,
+ giving it a buffer into which to place any data received from the device
+ and a timeout value. If the timeout period expires without receiving any
+ data from the device, the function will fail and return an error message.
+ This can be shown with the following code:
+ </para>
+ <programlisting>
+/* do an immediate bulk read to get data from the device */
+retval = usb_bulk_msg (skel->dev,
+ usb_rcvbulkpipe (skel->dev,
+ skel->bulk_in_endpointAddr),
+ skel->bulk_in_buffer,
+ skel->bulk_in_size,
+ &amp;count, HZ*10);
+/* if the read was successful, copy the data to user space */
+if (!retval) {
+ if (copy_to_user (buffer, skel->bulk_in_buffer, count))
+ retval = -EFAULT;
+ else
+ retval = count;
+}
+ </programlisting>
+ <para>
+ The usb_bulk_msg function can be very useful for doing single reads or
+ writes to a device; however, if you need to read or write constantly to a
+ device, it is recommended to set up your own urbs and submit them to the
+ USB subsystem.
+ </para>
+ <para>
+ When the user program releases the file handle that it has been using to
+ talk to the device, the release function in the driver is called. In this
+ function we decrement our private usage count and wait for possible
+ pending writes:
+ </para>
+ <programlisting>
+/* decrement our usage count for the device */
+--skel->open_count;
+ </programlisting>
+ <para>
+ One of the more difficult problems that USB drivers must be able to handle
+ smoothly is the fact that the USB device may be removed from the system at
+ any point in time, even if a program is currently talking to it. It needs
+ to be able to shut down any current reads and writes and notify the
+ user-space programs that the device is no longer there. The following
+ code (function <function>skel_delete</function>)
+ is an example of how to do this: </para>
+ <programlisting>
+static inline void skel_delete (struct usb_skel *dev)
+{
+ kfree (dev->bulk_in_buffer);
+ if (dev->bulk_out_buffer != NULL)
+ usb_free_coherent (dev->udev, dev->bulk_out_size,
+ dev->bulk_out_buffer,
+ dev->write_urb->transfer_dma);
+ usb_free_urb (dev->write_urb);
+ kfree (dev);
+}
+ </programlisting>
+ <para>
+ If a program currently has an open handle to the device, we reset the flag
+ <literal>device_present</literal>. For
+ every read, write, release and other functions that expect a device to be
+ present, the driver first checks this flag to see if the device is
+ still present. If not, it releases that the device has disappeared, and a
+ -ENODEV error is returned to the user-space program. When the release
+ function is eventually called, it determines if there is no device
+ and if not, it does the cleanup that the skel_disconnect
+ function normally does if there are no open files on the device (see
+ Listing 5).
+ </para>
+ </chapter>
+
+ <chapter id="iso">
+ <title>Isochronous Data</title>
+ <para>
+ This usb-skeleton driver does not have any examples of interrupt or
+ isochronous data being sent to or from the device. Interrupt data is sent
+ almost exactly as bulk data is, with a few minor exceptions. Isochronous
+ data works differently with continuous streams of data being sent to or
+ from the device. The audio and video camera drivers are very good examples
+ of drivers that handle isochronous data and will be useful if you also
+ need to do this.
+ </para>
+ </chapter>
+
+ <chapter id="Conclusion">
+ <title>Conclusion</title>
+ <para>
+ Writing Linux USB device drivers is not a difficult task as the
+ usb-skeleton driver shows. This driver, combined with the other current
+ USB drivers, should provide enough examples to help a beginning author
+ create a working driver in a minimal amount of time. The linux-usb-devel
+ mailing list archives also contain a lot of helpful information.
+ </para>
+ </chapter>
+
+ <chapter id="resources">
+ <title>Resources</title>
+ <para>
+ The Linux USB Project: <ulink url="http://www.linux-usb.org">http://www.linux-usb.org/</ulink>
+ </para>
+ <para>
+ Linux Hotplug Project: <ulink url="http://linux-hotplug.sourceforge.net">http://linux-hotplug.sourceforge.net/</ulink>
+ </para>
+ <para>
+ Linux USB Working Devices List: <ulink url="http://www.qbik.ch/usb/devices">http://www.qbik.ch/usb/devices/</ulink>
+ </para>
+ <para>
+ linux-usb-devel Mailing List Archives: <ulink url="http://marc.theaimsgroup.com/?l=linux-usb-devel">http://marc.theaimsgroup.com/?l=linux-usb-devel</ulink>
+ </para>
+ <para>
+ Programming Guide for Linux USB Device Drivers: <ulink url="http://usb.cs.tum.edu/usbdoc">http://usb.cs.tum.edu/usbdoc</ulink>
+ </para>
+ <para>
+ USB Home Page: <ulink url="http://www.usb.org">http://www.usb.org</ulink>
+ </para>
+ </chapter>
+
+</book>
diff --git a/Documentation/DocBook/z8530book.tmpl b/Documentation/DocBook/z8530book.tmpl
new file mode 100644
index 000000000..6f3883be8
--- /dev/null
+++ b/Documentation/DocBook/z8530book.tmpl
@@ -0,0 +1,371 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Z85230Guide">
+ <bookinfo>
+ <title>Z8530 Programming Guide</title>
+
+ <authorgroup>
+ <author>
+ <firstname>Alan</firstname>
+ <surname>Cox</surname>
+ <affiliation>
+ <address>
+ <email>alan@lxorguk.ukuu.org.uk</email>
+ </address>
+ </affiliation>
+ </author>
+ </authorgroup>
+
+ <copyright>
+ <year>2000</year>
+ <holder>Alan Cox</holder>
+ </copyright>
+
+ <legalnotice>
+ <para>
+ This documentation is free software; you can redistribute
+ it and/or modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 2 of the License, or (at your option) any later
+ version.
+ </para>
+
+ <para>
+ This program is distributed in the hope that it will be
+ useful, but WITHOUT ANY WARRANTY; without even the implied
+ warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ See the GNU General Public License for more details.
+ </para>
+
+ <para>
+ You should have received a copy of the GNU General Public
+ License along with this program; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ MA 02111-1307 USA
+ </para>
+
+ <para>
+ For more details see the file COPYING in the source
+ distribution of Linux.
+ </para>
+ </legalnotice>
+ </bookinfo>
+
+<toc></toc>
+
+ <chapter id="intro">
+ <title>Introduction</title>
+ <para>
+ The Z85x30 family synchronous/asynchronous controller chips are
+ used on a large number of cheap network interface cards. The
+ kernel provides a core interface layer that is designed to make
+ it easy to provide WAN services using this chip.
+ </para>
+ <para>
+ The current driver only support synchronous operation. Merging the
+ asynchronous driver support into this code to allow any Z85x30
+ device to be used as both a tty interface and as a synchronous
+ controller is a project for Linux post the 2.4 release
+ </para>
+ </chapter>
+
+ <chapter id="Driver_Modes">
+ <title>Driver Modes</title>
+ <para>
+ The Z85230 driver layer can drive Z8530, Z85C30 and Z85230 devices
+ in three different modes. Each mode can be applied to an individual
+ channel on the chip (each chip has two channels).
+ </para>
+ <para>
+ The PIO synchronous mode supports the most common Z8530 wiring. Here
+ the chip is interface to the I/O and interrupt facilities of the
+ host machine but not to the DMA subsystem. When running PIO the
+ Z8530 has extremely tight timing requirements. Doing high speeds,
+ even with a Z85230 will be tricky. Typically you should expect to
+ achieve at best 9600 baud with a Z8C530 and 64Kbits with a Z85230.
+ </para>
+ <para>
+ The DMA mode supports the chip when it is configured to use dual DMA
+ channels on an ISA bus. The better cards tend to support this mode
+ of operation for a single channel. With DMA running the Z85230 tops
+ out when it starts to hit ISA DMA constraints at about 512Kbits. It
+ is worth noting here that many PC machines hang or crash when the
+ chip is driven fast enough to hold the ISA bus solid.
+ </para>
+ <para>
+ Transmit DMA mode uses a single DMA channel. The DMA channel is used
+ for transmission as the transmit FIFO is smaller than the receive
+ FIFO. it gives better performance than pure PIO mode but is nowhere
+ near as ideal as pure DMA mode.
+ </para>
+ </chapter>
+
+ <chapter id="Using_the_Z85230_driver">
+ <title>Using the Z85230 driver</title>
+ <para>
+ The Z85230 driver provides the back end interface to your board. To
+ configure a Z8530 interface you need to detect the board and to
+ identify its ports and interrupt resources. It is also your problem
+ to verify the resources are available.
+ </para>
+ <para>
+ Having identified the chip you need to fill in a struct z8530_dev,
+ which describes each chip. This object must exist until you finally
+ shutdown the board. Firstly zero the active field. This ensures
+ nothing goes off without you intending it. The irq field should
+ be set to the interrupt number of the chip. (Each chip has a single
+ interrupt source rather than each channel). You are responsible
+ for allocating the interrupt line. The interrupt handler should be
+ set to <function>z8530_interrupt</function>. The device id should
+ be set to the z8530_dev structure pointer. Whether the interrupt can
+ be shared or not is board dependent, and up to you to initialise.
+ </para>
+ <para>
+ The structure holds two channel structures.
+ Initialise chanA.ctrlio and chanA.dataio with the address of the
+ control and data ports. You can or this with Z8530_PORT_SLEEP to
+ indicate your interface needs the 5uS delay for chip settling done
+ in software. The PORT_SLEEP option is architecture specific. Other
+ flags may become available on future platforms, eg for MMIO.
+ Initialise the chanA.irqs to &amp;z8530_nop to start the chip up
+ as disabled and discarding interrupt events. This ensures that
+ stray interrupts will be mopped up and not hang the bus. Set
+ chanA.dev to point to the device structure itself. The
+ private and name field you may use as you wish. The private field
+ is unused by the Z85230 layer. The name is used for error reporting
+ and it may thus make sense to make it match the network name.
+ </para>
+ <para>
+ Repeat the same operation with the B channel if your chip has
+ both channels wired to something useful. This isn't always the
+ case. If it is not wired then the I/O values do not matter, but
+ you must initialise chanB.dev.
+ </para>
+ <para>
+ If your board has DMA facilities then initialise the txdma and
+ rxdma fields for the relevant channels. You must also allocate the
+ ISA DMA channels and do any necessary board level initialisation
+ to configure them. The low level driver will do the Z8530 and
+ DMA controller programming but not board specific magic.
+ </para>
+ <para>
+ Having initialised the device you can then call
+ <function>z8530_init</function>. This will probe the chip and
+ reset it into a known state. An identification sequence is then
+ run to identify the chip type. If the checks fail to pass the
+ function returns a non zero error code. Typically this indicates
+ that the port given is not valid. After this call the
+ type field of the z8530_dev structure is initialised to either
+ Z8530, Z85C30 or Z85230 according to the chip found.
+ </para>
+ <para>
+ Once you have called z8530_init you can also make use of the utility
+ function <function>z8530_describe</function>. This provides a
+ consistent reporting format for the Z8530 devices, and allows all
+ the drivers to provide consistent reporting.
+ </para>
+ </chapter>
+
+ <chapter id="Attaching_Network_Interfaces">
+ <title>Attaching Network Interfaces</title>
+ <para>
+ If you wish to use the network interface facilities of the driver,
+ then you need to attach a network device to each channel that is
+ present and in use. In addition to use the generic HDLC
+ you need to follow some additional plumbing rules. They may seem
+ complex but a look at the example hostess_sv11 driver should
+ reassure you.
+ </para>
+ <para>
+ The network device used for each channel should be pointed to by
+ the netdevice field of each channel. The hdlc-&gt; priv field of the
+ network device points to your private data - you will need to be
+ able to find your private data from this.
+ </para>
+ <para>
+ The way most drivers approach this particular problem is to
+ create a structure holding the Z8530 device definition and
+ put that into the private field of the network device. The
+ network device fields of the channels then point back to the
+ network devices.
+ </para>
+ <para>
+ If you wish to use the generic HDLC then you need to register
+ the HDLC device.
+ </para>
+ <para>
+ Before you register your network device you will also need to
+ provide suitable handlers for most of the network device callbacks.
+ See the network device documentation for more details on this.
+ </para>
+ </chapter>
+
+ <chapter id="Configuring_And_Activating_The_Port">
+ <title>Configuring And Activating The Port</title>
+ <para>
+ The Z85230 driver provides helper functions and tables to load the
+ port registers on the Z8530 chips. When programming the register
+ settings for a channel be aware that the documentation recommends
+ initialisation orders. Strange things happen when these are not
+ followed.
+ </para>
+ <para>
+ <function>z8530_channel_load</function> takes an array of
+ pairs of initialisation values in an array of u8 type. The first
+ value is the Z8530 register number. Add 16 to indicate the alternate
+ register bank on the later chips. The array is terminated by a 255.
+ </para>
+ <para>
+ The driver provides a pair of public tables. The
+ z8530_hdlc_kilostream table is for the UK 'Kilostream' service and
+ also happens to cover most other end host configurations. The
+ z8530_hdlc_kilostream_85230 table is the same configuration using
+ the enhancements of the 85230 chip. The configuration loaded is
+ standard NRZ encoded synchronous data with HDLC bitstuffing. All
+ of the timing is taken from the other end of the link.
+ </para>
+ <para>
+ When writing your own tables be aware that the driver internally
+ tracks register values. It may need to reload values. You should
+ therefore be sure to set registers 1-7, 9-11, 14 and 15 in all
+ configurations. Where the register settings depend on DMA selection
+ the driver will update the bits itself when you open or close.
+ Loading a new table with the interface open is not recommended.
+ </para>
+ <para>
+ There are three standard configurations supported by the core
+ code. In PIO mode the interface is programmed up to use
+ interrupt driven PIO. This places high demands on the host processor
+ to avoid latency. The driver is written to take account of latency
+ issues but it cannot avoid latencies caused by other drivers,
+ notably IDE in PIO mode. Because the drivers allocate buffers you
+ must also prevent MTU changes while the port is open.
+ </para>
+ <para>
+ Once the port is open it will call the rx_function of each channel
+ whenever a completed packet arrived. This is invoked from
+ interrupt context and passes you the channel and a network
+ buffer (struct sk_buff) holding the data. The data includes
+ the CRC bytes so most users will want to trim the last two
+ bytes before processing the data. This function is very timing
+ critical. When you wish to simply discard data the support
+ code provides the function <function>z8530_null_rx</function>
+ to discard the data.
+ </para>
+ <para>
+ To active PIO mode sending and receiving the <function>
+ z8530_sync_open</function> is called. This expects to be passed
+ the network device and the channel. Typically this is called from
+ your network device open callback. On a failure a non zero error
+ status is returned. The <function>z8530_sync_close</function>
+ function shuts down a PIO channel. This must be done before the
+ channel is opened again and before the driver shuts down
+ and unloads.
+ </para>
+ <para>
+ The ideal mode of operation is dual channel DMA mode. Here the
+ kernel driver will configure the board for DMA in both directions.
+ The driver also handles ISA DMA issues such as controller
+ programming and the memory range limit for you. This mode is
+ activated by calling the <function>z8530_sync_dma_open</function>
+ function. On failure a non zero error value is returned.
+ Once this mode is activated it can be shut down by calling the
+ <function>z8530_sync_dma_close</function>. You must call the close
+ function matching the open mode you used.
+ </para>
+ <para>
+ The final supported mode uses a single DMA channel to drive the
+ transmit side. As the Z85C30 has a larger FIFO on the receive
+ channel this tends to increase the maximum speed a little.
+ This is activated by calling the <function>z8530_sync_txdma_open
+ </function>. This returns a non zero error code on failure. The
+ <function>z8530_sync_txdma_close</function> function closes down
+ the Z8530 interface from this mode.
+ </para>
+ </chapter>
+
+ <chapter id="Network_Layer_Functions">
+ <title>Network Layer Functions</title>
+ <para>
+ The Z8530 layer provides functions to queue packets for
+ transmission. The driver internally buffers the frame currently
+ being transmitted and one further frame (in order to keep back
+ to back transmission running). Any further buffering is up to
+ the caller.
+ </para>
+ <para>
+ The function <function>z8530_queue_xmit</function> takes a network
+ buffer in sk_buff format and queues it for transmission. The
+ caller must provide the entire packet with the exception of the
+ bitstuffing and CRC. This is normally done by the caller via
+ the generic HDLC interface layer. It returns 0 if the buffer has been
+ queued and non zero values for queue full. If the function accepts
+ the buffer it becomes property of the Z8530 layer and the caller
+ should not free it.
+ </para>
+ <para>
+ The function <function>z8530_get_stats</function> returns a pointer
+ to an internally maintained per interface statistics block. This
+ provides most of the interface code needed to implement the network
+ layer get_stats callback.
+ </para>
+ </chapter>
+
+ <chapter id="Porting_The_Z8530_Driver">
+ <title>Porting The Z8530 Driver</title>
+ <para>
+ The Z8530 driver is written to be portable. In DMA mode it makes
+ assumptions about the use of ISA DMA. These are probably warranted
+ in most cases as the Z85230 in particular was designed to glue to PC
+ type machines. The PIO mode makes no real assumptions.
+ </para>
+ <para>
+ Should you need to retarget the Z8530 driver to another architecture
+ the only code that should need changing are the port I/O functions.
+ At the moment these assume PC I/O port accesses. This may not be
+ appropriate for all platforms. Replacing
+ <function>z8530_read_port</function> and <function>z8530_write_port
+ </function> is intended to be all that is required to port this
+ driver layer.
+ </para>
+ </chapter>
+
+ <chapter id="bugs">
+ <title>Known Bugs And Assumptions</title>
+ <para>
+ <variablelist>
+ <varlistentry><term>Interrupt Locking</term>
+ <listitem>
+ <para>
+ The locking in the driver is done via the global cli/sti lock. This
+ makes for relatively poor SMP performance. Switching this to use a
+ per device spin lock would probably materially improve performance.
+ </para>
+ </listitem></varlistentry>
+
+ <varlistentry><term>Occasional Failures</term>
+ <listitem>
+ <para>
+ We have reports of occasional failures when run for very long
+ periods of time and the driver starts to receive junk frames. At
+ the moment the cause of this is not clear.
+ </para>
+ </listitem></varlistentry>
+ </variablelist>
+
+ </para>
+ </chapter>
+
+ <chapter id="pubfunctions">
+ <title>Public Functions Provided</title>
+!Edrivers/net/wan/z85230.c
+ </chapter>
+
+ <chapter id="intfunctions">
+ <title>Internal Functions</title>
+!Idrivers/net/wan/z85230.c
+ </chapter>
+
+</book>
diff --git a/Documentation/EDID/1024x768.S b/Documentation/EDID/1024x768.S
new file mode 100644
index 000000000..6f3e4b75e
--- /dev/null
+++ b/Documentation/EDID/1024x768.S
@@ -0,0 +1,44 @@
+/*
+ 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor
+
+ Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 65000 /* kHz */
+#define XPIX 1024
+#define YPIX 768
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 320
+#define YBLANK 38
+#define XOFFSET 8
+#define XPULSE 144
+#define YOFFSET (63+3)
+#define YPULSE (63+6)
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux XGA"
+#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
+#define HSYNC_POL 0
+#define VSYNC_POL 0
+#define CRC 0x55
+
+#include "edid.S"
diff --git a/Documentation/EDID/1280x1024.S b/Documentation/EDID/1280x1024.S
new file mode 100644
index 000000000..bd9bef2a6
--- /dev/null
+++ b/Documentation/EDID/1280x1024.S
@@ -0,0 +1,44 @@
+/*
+ 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor
+
+ Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 108000 /* kHz */
+#define XPIX 1280
+#define YPIX 1024
+#define XY_RATIO XY_RATIO_5_4
+#define XBLANK 408
+#define YBLANK 42
+#define XOFFSET 48
+#define XPULSE 112
+#define YOFFSET (63+1)
+#define YPULSE (63+3)
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+#define CRC 0xa0
+
+#include "edid.S"
diff --git a/Documentation/EDID/1600x1200.S b/Documentation/EDID/1600x1200.S
new file mode 100644
index 000000000..a45101c61
--- /dev/null
+++ b/Documentation/EDID/1600x1200.S
@@ -0,0 +1,44 @@
+/*
+ 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor
+
+ Copyright (C) 2013 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 162000 /* kHz */
+#define XPIX 1600
+#define YPIX 1200
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 560
+#define YBLANK 50
+#define XOFFSET 64
+#define XPULSE 192
+#define YOFFSET (63+1)
+#define YPULSE (63+3)
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux UXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+#define CRC 0x9d
+
+#include "edid.S"
diff --git a/Documentation/EDID/1680x1050.S b/Documentation/EDID/1680x1050.S
new file mode 100644
index 000000000..b0d7c6928
--- /dev/null
+++ b/Documentation/EDID/1680x1050.S
@@ -0,0 +1,44 @@
+/*
+ 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor
+
+ Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 146250 /* kHz */
+#define XPIX 1680
+#define YPIX 1050
+#define XY_RATIO XY_RATIO_16_10
+#define XBLANK 560
+#define YBLANK 39
+#define XOFFSET 104
+#define XPULSE 176
+#define YOFFSET (63+3)
+#define YPULSE (63+6)
+#define DPI 96
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux WSXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+#define CRC 0x26
+
+#include "edid.S"
diff --git a/Documentation/EDID/1920x1080.S b/Documentation/EDID/1920x1080.S
new file mode 100644
index 000000000..3084355e8
--- /dev/null
+++ b/Documentation/EDID/1920x1080.S
@@ -0,0 +1,44 @@
+/*
+ 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor
+
+ Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 148500 /* kHz */
+#define XPIX 1920
+#define YPIX 1080
+#define XY_RATIO XY_RATIO_16_9
+#define XBLANK 280
+#define YBLANK 45
+#define XOFFSET 88
+#define XPULSE 44
+#define YOFFSET (63+4)
+#define YPULSE (63+5)
+#define DPI 96
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux FHD"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+#define CRC 0x05
+
+#include "edid.S"
diff --git a/Documentation/EDID/800x600.S b/Documentation/EDID/800x600.S
new file mode 100644
index 000000000..6644e26d5
--- /dev/null
+++ b/Documentation/EDID/800x600.S
@@ -0,0 +1,41 @@
+/*
+ 800x600.S: EDID data set for standard 800x600 60 Hz monitor
+
+ Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+ Copyright (C) 2014 Linaro Limited
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 40000 /* kHz */
+#define XPIX 800
+#define YPIX 600
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 256
+#define YBLANK 28
+#define XOFFSET 40
+#define XPULSE 128
+#define YOFFSET (63+1)
+#define YPULSE (63+4)
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SVGA"
+#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+#define CRC 0xc2
+
+#include "edid.S"
diff --git a/Documentation/EDID/HOWTO.txt b/Documentation/EDID/HOWTO.txt
new file mode 100644
index 000000000..835db3322
--- /dev/null
+++ b/Documentation/EDID/HOWTO.txt
@@ -0,0 +1,58 @@
+In the good old days when graphics parameters were configured explicitly
+in a file called xorg.conf, even broken hardware could be managed.
+
+Today, with the advent of Kernel Mode Setting, a graphics board is
+either correctly working because all components follow the standards -
+or the computer is unusable, because the screen remains dark after
+booting or it displays the wrong area. Cases when this happens are:
+- The graphics board does not recognize the monitor.
+- The graphics board is unable to detect any EDID data.
+- The graphics board incorrectly forwards EDID data to the driver.
+- The monitor sends no or bogus EDID data.
+- A KVM sends its own EDID data instead of querying the connected monitor.
+Adding the kernel parameter "nomodeset" helps in most cases, but causes
+restrictions later on.
+
+As a remedy for such situations, the kernel configuration item
+CONFIG_DRM_LOAD_EDID_FIRMWARE was introduced. It allows to provide an
+individually prepared or corrected EDID data set in the /lib/firmware
+directory from where it is loaded via the firmware interface. The code
+(see drivers/gpu/drm/drm_edid_load.c) contains built-in data sets for
+commonly used screen resolutions (800x600, 1024x768, 1280x1024, 1600x1200,
+1680x1050, 1920x1080) as binary blobs, but the kernel source tree does
+not contain code to create these data. In order to elucidate the origin
+of the built-in binary EDID blobs and to facilitate the creation of
+individual data for a specific misbehaving monitor, commented sources
+and a Makefile environment are given here.
+
+To create binary EDID and C source code files from the existing data
+material, simply type "make".
+
+If you want to create your own EDID file, copy the file 1024x768.S,
+replace the settings with your own data and add a new target to the
+Makefile. Please note that the EDID data structure expects the timing
+values in a different way as compared to the standard X11 format.
+
+X11:
+HTimings: hdisp hsyncstart hsyncend htotal
+VTimings: vdisp vsyncstart vsyncend vtotal
+
+EDID:
+#define XPIX hdisp
+#define XBLANK htotal-hdisp
+#define XOFFSET hsyncstart-hdisp
+#define XPULSE hsyncend-hsyncstart
+
+#define YPIX vdisp
+#define YBLANK vtotal-vdisp
+#define YOFFSET (63+(vsyncstart-vdisp))
+#define YPULSE (63+(vsyncend-vsyncstart))
+
+The CRC value in the last line
+ #define CRC 0x55
+also is a bit tricky. After a first version of the binary data set is
+created, it must be checked with the "edid-decode" utility which will
+most probably complain about a wrong CRC. Fortunately, the utility also
+displays the correct CRC which must then be inserted into the source
+file. After the make procedure is repeated, the EDID data set is ready
+to be used.
diff --git a/Documentation/EDID/Makefile b/Documentation/EDID/Makefile
new file mode 100644
index 000000000..17763ca3f
--- /dev/null
+++ b/Documentation/EDID/Makefile
@@ -0,0 +1,26 @@
+
+SOURCES := $(wildcard [0-9]*x[0-9]*.S)
+
+BIN := $(patsubst %.S, %.bin, $(SOURCES))
+
+IHEX := $(patsubst %.S, %.bin.ihex, $(SOURCES))
+
+CODE := $(patsubst %.S, %.c, $(SOURCES))
+
+all: $(BIN) $(IHEX) $(CODE)
+
+clean:
+ @rm -f *.o *.bin.ihex *.bin *.c
+
+%.o: %.S
+ @cc -c $^
+
+%.bin: %.o
+ @objcopy -Obinary $^ $@
+
+%.bin.ihex: %.o
+ @objcopy -Oihex $^ $@
+ @dos2unix $@ 2>/dev/null
+
+%.c: %.bin
+ @echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@
diff --git a/Documentation/EDID/edid.S b/Documentation/EDID/edid.S
new file mode 100644
index 000000000..7ac03276d
--- /dev/null
+++ b/Documentation/EDID/edid.S
@@ -0,0 +1,272 @@
+/*
+ edid.S: EDID data template
+
+ Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+
+/* Manufacturer */
+#define MFG_LNX1 'L'
+#define MFG_LNX2 'N'
+#define MFG_LNX3 'X'
+#define SERIAL 0
+#define YEAR 2012
+#define WEEK 5
+
+/* EDID 1.3 standard definitions */
+#define XY_RATIO_16_10 0b00
+#define XY_RATIO_4_3 0b01
+#define XY_RATIO_5_4 0b10
+#define XY_RATIO_16_9 0b11
+
+/* Provide defaults for the timing bits */
+#ifndef ESTABLISHED_TIMING1_BITS
+#define ESTABLISHED_TIMING1_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING2_BITS
+#define ESTABLISHED_TIMING2_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING3_BITS
+#define ESTABLISHED_TIMING3_BITS 0x00
+#endif
+
+#define mfgname2id(v1,v2,v3) \
+ ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f))
+#define swap16(v1) ((v1>>8)+((v1&0xff)<<8))
+#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f))
+#define msbs4(v1,v2,v3,v4) \
+ (((v1&0x03)>>2)+((v2&0x03)>>4)+((v3&0x03)>>6)+((v4&0x03)>>8))
+#define pixdpi2mm(pix,dpi) ((pix*25)/dpi)
+#define xsize pixdpi2mm(XPIX,DPI)
+#define ysize pixdpi2mm(YPIX,DPI)
+
+ .data
+
+/* Fixed header pattern */
+header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00
+
+mfg_id: .word swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3))
+
+prod_code: .word 0
+
+/* Serial number. 32 bits, little endian. */
+serial_number: .long SERIAL
+
+/* Week of manufacture */
+week: .byte WEEK
+
+/* Year of manufacture, less 1990. (1990-2245)
+ If week=255, it is the model year instead */
+year: .byte YEAR-1990
+
+version: .byte VERSION /* EDID version, usually 1 (for 1.3) */
+revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */
+
+/* If Bit 7=1 Digital input. If set, the following bit definitions apply:
+ Bits 6-1 Reserved, must be 0
+ Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB,
+ 1 pixel per clock, up to 8 bits per color, MSB aligned,
+ If Bit 7=0 Analog input. If clear, the following bit definitions apply:
+ Bits 6-5 Video white and sync levels, relative to blank
+ 00=+0.7/-0.3 V; 01=+0.714/-0.286 V;
+ 10=+1.0/-0.4 V; 11=+0.7/0 V
+ Bit 4 Blank-to-black setup (pedestal) expected
+ Bit 3 Separate sync supported
+ Bit 2 Composite sync (on HSync) supported
+ Bit 1 Sync on green supported
+ Bit 0 VSync pulse must be serrated when somposite or
+ sync-on-green is used. */
+video_parms: .byte 0x6d
+
+/* Maximum horizontal image size, in centimetres
+ (max 292 cm/115 in at 16:9 aspect ratio) */
+max_hor_size: .byte xsize/10
+
+/* Maximum vertical image size, in centimetres.
+ If either byte is 0, undefined (e.g. projector) */
+max_vert_size: .byte ysize/10
+
+/* Display gamma, minus 1, times 100 (range 1.00-3.5 */
+gamma: .byte 120
+
+/* Bit 7 DPMS standby supported
+ Bit 6 DPMS suspend supported
+ Bit 5 DPMS active-off supported
+ Bits 4-3 Display type: 00=monochrome; 01=RGB colour;
+ 10=non-RGB multicolour; 11=undefined
+ Bit 2 Standard sRGB colour space. Bytes 25-34 must contain
+ sRGB standard values.
+ Bit 1 Preferred timing mode specified in descriptor block 1.
+ Bit 0 GTF supported with default parameter values. */
+dsp_features: .byte 0xea
+
+/* Chromaticity coordinates. */
+/* Red and green least-significant bits
+ Bits 7-6 Red x value least-significant 2 bits
+ Bits 5-4 Red y value least-significant 2 bits
+ Bits 3-2 Green x value lst-significant 2 bits
+ Bits 1-0 Green y value least-significant 2 bits */
+red_green_lsb: .byte 0x5e
+
+/* Blue and white least-significant 2 bits */
+blue_white_lsb: .byte 0xc0
+
+/* Red x value most significant 8 bits.
+ 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */
+red_x_msb: .byte 0xa4
+
+/* Red y value most significant 8 bits */
+red_y_msb: .byte 0x59
+
+/* Green x and y value most significant 8 bits */
+green_x_y_msb: .byte 0x4a,0x98
+
+/* Blue x and y value most significant 8 bits */
+blue_x_y_msb: .byte 0x25,0x20
+
+/* Default white point x and y value most significant 8 bits */
+white_x_y_msb: .byte 0x50,0x54
+
+/* Established timings */
+/* Bit 7 720x400 @ 70 Hz
+ Bit 6 720x400 @ 88 Hz
+ Bit 5 640x480 @ 60 Hz
+ Bit 4 640x480 @ 67 Hz
+ Bit 3 640x480 @ 72 Hz
+ Bit 2 640x480 @ 75 Hz
+ Bit 1 800x600 @ 56 Hz
+ Bit 0 800x600 @ 60 Hz */
+estbl_timing1: .byte ESTABLISHED_TIMING1_BITS
+
+/* Bit 7 800x600 @ 72 Hz
+ Bit 6 800x600 @ 75 Hz
+ Bit 5 832x624 @ 75 Hz
+ Bit 4 1024x768 @ 87 Hz, interlaced (1024x768)
+ Bit 3 1024x768 @ 60 Hz
+ Bit 2 1024x768 @ 72 Hz
+ Bit 1 1024x768 @ 75 Hz
+ Bit 0 1280x1024 @ 75 Hz */
+estbl_timing2: .byte ESTABLISHED_TIMING2_BITS
+
+/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II)
+ Bits 6-0 Other manufacturer-specific display mod */
+estbl_timing3: .byte ESTABLISHED_TIMING3_BITS
+
+/* Standard timing */
+/* X resolution, less 31, divided by 8 (256-2288 pixels) */
+std_xres: .byte (XPIX/8)-31
+/* Y resolution, X:Y pixel ratio
+ Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9.
+ Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */
+std_vres: .byte (XY_RATIO<<6)+VFREQ-60
+ .fill 7,2,0x0101 /* Unused */
+
+descriptor1:
+/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */
+clock: .word CLOCK/10
+
+/* Horizontal active pixels 8 lsbits (0-4095) */
+x_act_lsb: .byte XPIX&0xff
+/* Horizontal blanking pixels 8 lsbits (0-4095)
+ End of active to start of next active. */
+x_blk_lsb: .byte XBLANK&0xff
+/* Bits 7-4 Horizontal active pixels 4 msbits
+ Bits 3-0 Horizontal blanking pixels 4 msbits */
+x_msbs: .byte msbs2(XPIX,XBLANK)
+
+/* Vertical active lines 8 lsbits (0-4095) */
+y_act_lsb: .byte YPIX&0xff
+/* Vertical blanking lines 8 lsbits (0-4095) */
+y_blk_lsb: .byte YBLANK&0xff
+/* Bits 7-4 Vertical active lines 4 msbits
+ Bits 3-0 Vertical blanking lines 4 msbits */
+y_msbs: .byte msbs2(YPIX,YBLANK)
+
+/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */
+x_snc_off_lsb: .byte XOFFSET&0xff
+/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */
+x_snc_pls_lsb: .byte XPULSE&0xff
+/* Bits 7-4 Vertical sync offset lines 4 lsbits -63)
+ Bits 3-0 Vertical sync pulse width lines 4 lsbits -63) */
+y_snc_lsb: .byte ((YOFFSET-63)<<4)+(YPULSE-63)
+/* Bits 7-6 Horizontal sync offset pixels 2 msbits
+ Bits 5-4 Horizontal sync pulse width pixels 2 msbits
+ Bits 3-2 Vertical sync offset lines 2 msbits
+ Bits 1-0 Vertical sync pulse width lines 2 msbits */
+xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE)
+
+/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */
+x_dsp_size: .byte xsize&0xff
+
+/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */
+y_dsp_size: .byte ysize&0xff
+
+/* Bits 7-4 Horizontal display size, mm, 4 msbits
+ Bits 3-0 Vertical display size, mm, 4 msbits */
+dsp_size_mbsb: .byte msbs2(xsize,ysize)
+
+/* Horizontal border pixels (each side; total is twice this) */
+x_border: .byte 0
+/* Vertical border lines (each side; total is twice this) */
+y_border: .byte 0
+
+/* Bit 7 Interlaced
+ Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0:
+ Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar,
+ sync=1 during left; 11=4-way interleaved stereo
+ Bit 0=1 2-way interleaved stereo: 01=Right image on even lines;
+ 10=Left image on even lines; 11=side-by-side
+ Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite;
+ 10=Digital composite (on HSync); 11=Digital separate
+ Bit 2 If digital separate: Vertical sync polarity (1=positive)
+ Other types: VSync serrated (HSync during VSync)
+ Bit 1 If analog sync: Sync on all 3 RGB lines (else green only)
+ Digital: HSync polarity (1=positive)
+ Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */
+features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1)
+
+descriptor2: .byte 0,0 /* Not a detailed timing descriptor */
+ .byte 0 /* Must be zero */
+ .byte 0xff /* Descriptor is monitor serial number (text) */
+ .byte 0 /* Must be zero */
+start1: .ascii "Linux #0"
+end1: .byte 0x0a /* End marker */
+ .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */
+descriptor3: .byte 0,0 /* Not a detailed timing descriptor */
+ .byte 0 /* Must be zero */
+ .byte 0xfd /* Descriptor is monitor range limits */
+ .byte 0 /* Must be zero */
+start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */
+ .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */
+ .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate
+ (1-255 kHz) */
+ .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate
+ (1-255 kHz) */
+ .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up
+ to 10 MHz multiple (10-2550 MHz) */
+ .byte 0 /* No extended timing information type */
+end2: .byte 0x0a /* End marker */
+ .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */
+descriptor4: .byte 0,0 /* Not a detailed timing descriptor */
+ .byte 0 /* Must be zero */
+ .byte 0xfc /* Descriptor is text */
+ .byte 0 /* Must be zero */
+start3: .ascii TIMING_NAME
+end3: .byte 0x0a /* End marker */
+ .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */
+extensions: .byte 0 /* Number of extensions to follow */
+checksum: .byte CRC /* Sum of all bytes must be 0 */
diff --git a/Documentation/EDID/hex b/Documentation/EDID/hex
new file mode 100644
index 000000000..8873ebb61
--- /dev/null
+++ b/Documentation/EDID/hex
@@ -0,0 +1 @@
+"\t" 8/1 "0x%02x, " "\n"
diff --git a/Documentation/HOWTO b/Documentation/HOWTO
new file mode 100644
index 000000000..93aa86046
--- /dev/null
+++ b/Documentation/HOWTO
@@ -0,0 +1,609 @@
+HOWTO do Linux kernel development
+---------------------------------
+
+This is the be-all, end-all document on this topic. It contains
+instructions on how to become a Linux kernel developer and how to learn
+to work with the Linux kernel development community. It tries to not
+contain anything related to the technical aspects of kernel programming,
+but will help point you in the right direction for that.
+
+If anything in this document becomes out of date, please send in patches
+to the maintainer of this file, who is listed at the bottom of the
+document.
+
+
+Introduction
+------------
+
+So, you want to learn how to become a Linux kernel developer? Or you
+have been told by your manager, "Go write a Linux driver for this
+device." This document's goal is to teach you everything you need to
+know to achieve this by describing the process you need to go through,
+and hints on how to work with the community. It will also try to
+explain some of the reasons why the community works like it does.
+
+The kernel is written mostly in C, with some architecture-dependent
+parts written in assembly. A good understanding of C is required for
+kernel development. Assembly (any architecture) is not required unless
+you plan to do low-level development for that architecture. Though they
+are not a good substitute for a solid C education and/or years of
+experience, the following books are good for, if anything, reference:
+ - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
+ - "Practical C Programming" by Steve Oualline [O'Reilly]
+ - "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
+
+The kernel is written using GNU C and the GNU toolchain. While it
+adheres to the ISO C89 standard, it uses a number of extensions that are
+not featured in the standard. The kernel is a freestanding C
+environment, with no reliance on the standard C library, so some
+portions of the C standard are not supported. Arbitrary long long
+divisions and floating point are not allowed. It can sometimes be
+difficult to understand the assumptions the kernel has on the toolchain
+and the extensions that it uses, and unfortunately there is no
+definitive reference for them. Please check the gcc info pages (`info
+gcc`) for some information on them.
+
+Please remember that you are trying to learn how to work with the
+existing development community. It is a diverse group of people, with
+high standards for coding, style and procedure. These standards have
+been created over time based on what they have found to work best for
+such a large and geographically dispersed team. Try to learn as much as
+possible about these standards ahead of time, as they are well
+documented; do not expect people to adapt to you or your company's way
+of doing things.
+
+
+Legal Issues
+------------
+
+The Linux kernel source code is released under the GPL. Please see the
+file, COPYING, in the main directory of the source tree, for details on
+the license. If you have further questions about the license, please
+contact a lawyer, and do not ask on the Linux kernel mailing list. The
+people on the mailing lists are not lawyers, and you should not rely on
+their statements on legal matters.
+
+For common questions and answers about the GPL, please see:
+ http://www.gnu.org/licenses/gpl-faq.html
+
+
+Documentation
+------------
+
+The Linux kernel source tree has a large range of documents that are
+invaluable for learning how to interact with the kernel community. When
+new features are added to the kernel, it is recommended that new
+documentation files are also added which explain how to use the feature.
+When a kernel change causes the interface that the kernel exposes to
+userspace to change, it is recommended that you send the information or
+a patch to the manual pages explaining the change to the manual pages
+maintainer at mtk.manpages@gmail.com, and CC the list
+linux-api@vger.kernel.org.
+
+Here is a list of files that are in the kernel source tree that are
+required reading:
+ README
+ This file gives a short background on the Linux kernel and describes
+ what is necessary to do to configure and build the kernel. People
+ who are new to the kernel should start here.
+
+ Documentation/Changes
+ This file gives a list of the minimum levels of various software
+ packages that are necessary to build and run the kernel
+ successfully.
+
+ Documentation/CodingStyle
+ This describes the Linux kernel coding style, and some of the
+ rationale behind it. All new code is expected to follow the
+ guidelines in this document. Most maintainers will only accept
+ patches if these rules are followed, and many people will only
+ review code if it is in the proper style.
+
+ Documentation/SubmittingPatches
+ Documentation/SubmittingDrivers
+ These files describe in explicit detail how to successfully create
+ and send a patch, including (but not limited to):
+ - Email contents
+ - Email format
+ - Who to send it to
+ Following these rules will not guarantee success (as all patches are
+ subject to scrutiny for content and style), but not following them
+ will almost always prevent it.
+
+ Other excellent descriptions of how to create patches properly are:
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+ "Linux kernel patch submission format"
+ http://linux.yyz.us/patch-format.html
+
+ Documentation/stable_api_nonsense.txt
+ This file describes the rationale behind the conscious decision to
+ not have a stable API within the kernel, including things like:
+ - Subsystem shim-layers (for compatibility?)
+ - Driver portability between Operating Systems.
+ - Mitigating rapid change within the kernel source tree (or
+ preventing rapid change)
+ This document is crucial for understanding the Linux development
+ philosophy and is very important for people moving to Linux from
+ development on other Operating Systems.
+
+ Documentation/SecurityBugs
+ If you feel you have found a security problem in the Linux kernel,
+ please follow the steps in this document to help notify the kernel
+ developers, and help solve the issue.
+
+ Documentation/ManagementStyle
+ This document describes how Linux kernel maintainers operate and the
+ shared ethos behind their methodologies. This is important reading
+ for anyone new to kernel development (or anyone simply curious about
+ it), as it resolves a lot of common misconceptions and confusion
+ about the unique behavior of kernel maintainers.
+
+ Documentation/stable_kernel_rules.txt
+ This file describes the rules on how the stable kernel releases
+ happen, and what to do if you want to get a change into one of these
+ releases.
+
+ Documentation/kernel-docs.txt
+ A list of external documentation that pertains to kernel
+ development. Please consult this list if you do not find what you
+ are looking for within the in-kernel documentation.
+
+ Documentation/applying-patches.txt
+ A good introduction describing exactly what a patch is and how to
+ apply it to the different development branches of the kernel.
+
+The kernel also has a large number of documents that can be
+automatically generated from the source code itself. This includes a
+full description of the in-kernel API, and rules on how to handle
+locking properly. The documents will be created in the
+Documentation/DocBook/ directory and can be generated as PDF,
+Postscript, HTML, and man pages by running:
+ make pdfdocs
+ make psdocs
+ make htmldocs
+ make mandocs
+respectively from the main kernel source directory.
+
+
+Becoming A Kernel Developer
+---------------------------
+
+If you do not know anything about Linux kernel development, you should
+look at the Linux KernelNewbies project:
+ http://kernelnewbies.org
+It consists of a helpful mailing list where you can ask almost any type
+of basic kernel development question (make sure to search the archives
+first, before asking something that has already been answered in the
+past.) It also has an IRC channel that you can use to ask questions in
+real-time, and a lot of helpful documentation that is useful for
+learning about Linux kernel development.
+
+The website has basic information about code organization, subsystems,
+and current projects (both in-tree and out-of-tree). It also describes
+some basic logistical information, like how to compile a kernel and
+apply a patch.
+
+If you do not know where you want to start, but you want to look for
+some task to start doing to join into the kernel development community,
+go to the Linux Kernel Janitor's project:
+ http://kernelnewbies.org/KernelJanitors
+It is a great place to start. It describes a list of relatively simple
+problems that need to be cleaned up and fixed within the Linux kernel
+source tree. Working with the developers in charge of this project, you
+will learn the basics of getting your patch into the Linux kernel tree,
+and possibly be pointed in the direction of what to go work on next, if
+you do not already have an idea.
+
+If you already have a chunk of code that you want to put into the kernel
+tree, but need some help getting it in the proper form, the
+kernel-mentors project was created to help you out with this. It is a
+mailing list, and can be found at:
+ http://selenic.com/mailman/listinfo/kernel-mentors
+
+Before making any actual modifications to the Linux kernel code, it is
+imperative to understand how the code in question works. For this
+purpose, nothing is better than reading through it directly (most tricky
+bits are commented well), perhaps even with the help of specialized
+tools. One such tool that is particularly recommended is the Linux
+Cross-Reference project, which is able to present source code in a
+self-referential, indexed webpage format. An excellent up-to-date
+repository of the kernel code may be found at:
+ http://lxr.linux.no/+trees
+
+
+The development process
+-----------------------
+
+Linux kernel development process currently consists of a few different
+main kernel "branches" and lots of different subsystem-specific kernel
+branches. These different branches are:
+ - main 3.x kernel tree
+ - 3.x.y -stable kernel tree
+ - 3.x -git kernel patches
+ - subsystem specific kernel trees and patches
+ - the 3.x -next kernel tree for integration tests
+
+3.x kernel tree
+-----------------
+3.x kernels are maintained by Linus Torvalds, and can be found on
+kernel.org in the pub/linux/kernel/v3.x/ directory. Its development
+process is as follows:
+ - As soon as a new kernel is released a two weeks window is open,
+ during this period of time maintainers can submit big diffs to
+ Linus, usually the patches that have already been included in the
+ -next kernel for a few weeks. The preferred way to submit big changes
+ is using git (the kernel's source management tool, more information
+ can be found at http://git-scm.com/) but plain patches are also just
+ fine.
+ - After two weeks a -rc1 kernel is released it is now possible to push
+ only patches that do not include new features that could affect the
+ stability of the whole kernel. Please note that a whole new driver
+ (or filesystem) might be accepted after -rc1 because there is no
+ risk of causing regressions with such a change as long as the change
+ is self-contained and does not affect areas outside of the code that
+ is being added. git can be used to send patches to Linus after -rc1
+ is released, but the patches need to also be sent to a public
+ mailing list for review.
+ - A new -rc is released whenever Linus deems the current git tree to
+ be in a reasonably sane state adequate for testing. The goal is to
+ release a new -rc kernel every week.
+ - Process continues until the kernel is considered "ready", the
+ process should last around 6 weeks.
+ - Known regressions in each release are periodically posted to the
+ linux-kernel mailing list. The goal is to reduce the length of
+ that list to zero before declaring the kernel to be "ready," but, in
+ the real world, a small number of regressions often remain at
+ release time.
+
+It is worth mentioning what Andrew Morton wrote on the linux-kernel
+mailing list about kernel releases:
+ "Nobody knows when a kernel will be released, because it's
+ released according to perceived bug status, not according to a
+ preconceived timeline."
+
+3.x.y -stable kernel tree
+---------------------------
+Kernels with 3-part versions are -stable kernels. They contain
+relatively small and critical fixes for security problems or significant
+regressions discovered in a given 3.x kernel.
+
+This is the recommended branch for users who want the most recent stable
+kernel and are not interested in helping test development/experimental
+versions.
+
+If no 3.x.y kernel is available, then the highest numbered 3.x
+kernel is the current stable kernel.
+
+3.x.y are maintained by the "stable" team <stable@vger.kernel.org>, and
+are released as needs dictate. The normal release period is approximately
+two weeks, but it can be longer if there are no pressing problems. A
+security-related problem, instead, can cause a release to happen almost
+instantly.
+
+The file Documentation/stable_kernel_rules.txt in the kernel tree
+documents what kinds of changes are acceptable for the -stable tree, and
+how the release process works.
+
+3.x -git patches
+------------------
+These are daily snapshots of Linus' kernel tree which are managed in a
+git repository (hence the name.) These patches are usually released
+daily and represent the current state of Linus' tree. They are more
+experimental than -rc kernels since they are generated automatically
+without even a cursory glance to see if they are sane.
+
+Subsystem Specific kernel trees and patches
+-------------------------------------------
+The maintainers of the various kernel subsystems --- and also many
+kernel subsystem developers --- expose their current state of
+development in source repositories. That way, others can see what is
+happening in the different areas of the kernel. In areas where
+development is rapid, a developer may be asked to base his submissions
+onto such a subsystem kernel tree so that conflicts between the
+submission and other already ongoing work are avoided.
+
+Most of these repositories are git trees, but there are also other SCMs
+in use, or patch queues being published as quilt series. Addresses of
+these subsystem repositories are listed in the MAINTAINERS file. Many
+of them can be browsed at http://git.kernel.org/.
+
+Before a proposed patch is committed to such a subsystem tree, it is
+subject to review which primarily happens on mailing lists (see the
+respective section below). For several kernel subsystems, this review
+process is tracked with the tool patchwork. Patchwork offers a web
+interface which shows patch postings, any comments on a patch or
+revisions to it, and maintainers can mark patches as under review,
+accepted, or rejected. Most of these patchwork sites are listed at
+http://patchwork.kernel.org/.
+
+3.x -next kernel tree for integration tests
+---------------------------------------------
+Before updates from subsystem trees are merged into the mainline 3.x
+tree, they need to be integration-tested. For this purpose, a special
+testing repository exists into which virtually all subsystem trees are
+pulled on an almost daily basis:
+ http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
+
+This way, the -next kernel gives a summary outlook onto what will be
+expected to go into the mainline kernel at the next merge period.
+Adventurous testers are very welcome to runtime-test the -next kernel.
+
+
+Bug Reporting
+-------------
+
+bugzilla.kernel.org is where the Linux kernel developers track kernel
+bugs. Users are encouraged to report all bugs that they find in this
+tool. For details on how to use the kernel bugzilla, please see:
+ http://bugzilla.kernel.org/page.cgi?id=faq.html
+
+The file REPORTING-BUGS in the main kernel source directory has a good
+template for how to report a possible kernel bug, and details what kind
+of information is needed by the kernel developers to help track down the
+problem.
+
+
+Managing bug reports
+--------------------
+
+One of the best ways to put into practice your hacking skills is by fixing
+bugs reported by other people. Not only you will help to make the kernel
+more stable, you'll learn to fix real world problems and you will improve
+your skills, and other developers will be aware of your presence. Fixing
+bugs is one of the best ways to get merits among other developers, because
+not many people like wasting time fixing other people's bugs.
+
+To work in the already reported bug reports, go to http://bugzilla.kernel.org.
+If you want to be advised of the future bug reports, you can subscribe to the
+bugme-new mailing list (only new bug reports are mailed here) or to the
+bugme-janitor mailing list (every change in the bugzilla is mailed here)
+
+ http://lists.linux-foundation.org/mailman/listinfo/bugme-new
+ http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
+
+
+
+Mailing lists
+-------------
+
+As some of the above documents describe, the majority of the core kernel
+developers participate on the Linux Kernel Mailing list. Details on how
+to subscribe and unsubscribe from the list can be found at:
+ http://vger.kernel.org/vger-lists.html#linux-kernel
+There are archives of the mailing list on the web in many different
+places. Use a search engine to find these archives. For example:
+ http://dir.gmane.org/gmane.linux.kernel
+It is highly recommended that you search the archives about the topic
+you want to bring up, before you post it to the list. A lot of things
+already discussed in detail are only recorded at the mailing list
+archives.
+
+Most of the individual kernel subsystems also have their own separate
+mailing list where they do their development efforts. See the
+MAINTAINERS file for a list of what these lists are for the different
+groups.
+
+Many of the lists are hosted on kernel.org. Information on them can be
+found at:
+ http://vger.kernel.org/vger-lists.html
+
+Please remember to follow good behavioral habits when using the lists.
+Though a bit cheesy, the following URL has some simple guidelines for
+interacting with the list (or any list):
+ http://www.albion.com/netiquette/
+
+If multiple people respond to your mail, the CC: list of recipients may
+get pretty large. Don't remove anybody from the CC: list without a good
+reason, or don't reply only to the list address. Get used to receiving the
+mail twice, one from the sender and the one from the list, and don't try
+to tune that by adding fancy mail-headers, people will not like it.
+
+Remember to keep the context and the attribution of your replies intact,
+keep the "John Kernelhacker wrote ...:" lines at the top of your reply, and
+add your statements between the individual quoted sections instead of
+writing at the top of the mail.
+
+If you add patches to your mail, make sure they are plain readable text
+as stated in Documentation/SubmittingPatches. Kernel developers don't
+want to deal with attachments or compressed patches; they may want
+to comment on individual lines of your patch, which works only that way.
+Make sure you use a mail program that does not mangle spaces and tab
+characters. A good first test is to send the mail to yourself and try
+to apply your own patch by yourself. If that doesn't work, get your
+mail program fixed or change it until it works.
+
+Above all, please remember to show respect to other subscribers.
+
+
+Working with the community
+--------------------------
+
+The goal of the kernel community is to provide the best possible kernel
+there is. When you submit a patch for acceptance, it will be reviewed
+on its technical merits and those alone. So, what should you be
+expecting?
+ - criticism
+ - comments
+ - requests for change
+ - requests for justification
+ - silence
+
+Remember, this is part of getting your patch into the kernel. You have
+to be able to take criticism and comments about your patches, evaluate
+them at a technical level and either rework your patches or provide
+clear and concise reasoning as to why those changes should not be made.
+If there are no responses to your posting, wait a few days and try
+again, sometimes things get lost in the huge volume.
+
+What should you not do?
+ - expect your patch to be accepted without question
+ - become defensive
+ - ignore comments
+ - resubmit the patch without making any of the requested changes
+
+In a community that is looking for the best technical solution possible,
+there will always be differing opinions on how beneficial a patch is.
+You have to be cooperative, and willing to adapt your idea to fit within
+the kernel. Or at least be willing to prove your idea is worth it.
+Remember, being wrong is acceptable as long as you are willing to work
+toward a solution that is right.
+
+It is normal that the answers to your first patch might simply be a list
+of a dozen things you should correct. This does _not_ imply that your
+patch will not be accepted, and it is _not_ meant against you
+personally. Simply correct all issues raised against your patch and
+resend it.
+
+
+Differences between the kernel community and corporate structures
+-----------------------------------------------------------------
+
+The kernel community works differently than most traditional corporate
+development environments. Here are a list of things that you can try to
+do to avoid problems:
+ Good things to say regarding your proposed changes:
+ - "This solves multiple problems."
+ - "This deletes 2000 lines of code."
+ - "Here is a patch that explains what I am trying to describe."
+ - "I tested it on 5 different architectures..."
+ - "Here is a series of small patches that..."
+ - "This increases performance on typical machines..."
+
+ Bad things you should avoid saying:
+ - "We did it this way in AIX/ptx/Solaris, so therefore it must be
+ good..."
+ - "I've being doing this for 20 years, so..."
+ - "This is required for my company to make money"
+ - "This is for our Enterprise product line."
+ - "Here is my 1000 page design document that describes my idea"
+ - "I've been working on this for 6 months..."
+ - "Here's a 5000 line patch that..."
+ - "I rewrote all of the current mess, and here it is..."
+ - "I have a deadline, and this patch needs to be applied now."
+
+Another way the kernel community is different than most traditional
+software engineering work environments is the faceless nature of
+interaction. One benefit of using email and irc as the primary forms of
+communication is the lack of discrimination based on gender or race.
+The Linux kernel work environment is accepting of women and minorities
+because all you are is an email address. The international aspect also
+helps to level the playing field because you can't guess gender based on
+a person's name. A man may be named Andrea and a woman may be named Pat.
+Most women who have worked in the Linux kernel and have expressed an
+opinion have had positive experiences.
+
+The language barrier can cause problems for some people who are not
+comfortable with English. A good grasp of the language can be needed in
+order to get ideas across properly on mailing lists, so it is
+recommended that you check your emails to make sure they make sense in
+English before sending them.
+
+
+Break up your changes
+---------------------
+
+The Linux kernel community does not gladly accept large chunks of code
+dropped on it all at once. The changes need to be properly introduced,
+discussed, and broken up into tiny, individual portions. This is almost
+the exact opposite of what companies are used to doing. Your proposal
+should also be introduced very early in the development process, so that
+you can receive feedback on what you are doing. It also lets the
+community feel that you are working with them, and not simply using them
+as a dumping ground for your feature. However, don't send 50 emails at
+one time to a mailing list, your patch series should be smaller than
+that almost all of the time.
+
+The reasons for breaking things up are the following:
+
+1) Small patches increase the likelihood that your patches will be
+ applied, since they don't take much time or effort to verify for
+ correctness. A 5 line patch can be applied by a maintainer with
+ barely a second glance. However, a 500 line patch may take hours to
+ review for correctness (the time it takes is exponentially
+ proportional to the size of the patch, or something).
+
+ Small patches also make it very easy to debug when something goes
+ wrong. It's much easier to back out patches one by one than it is
+ to dissect a very large patch after it's been applied (and broken
+ something).
+
+2) It's important not only to send small patches, but also to rewrite
+ and simplify (or simply re-order) patches before submitting them.
+
+Here is an analogy from kernel developer Al Viro:
+ "Think of a teacher grading homework from a math student. The
+ teacher does not want to see the student's trials and errors
+ before they came up with the solution. They want to see the
+ cleanest, most elegant answer. A good student knows this, and
+ would never submit her intermediate work before the final
+ solution."
+
+ The same is true of kernel development. The maintainers and
+ reviewers do not want to see the thought process behind the
+ solution to the problem one is solving. They want to see a
+ simple and elegant solution."
+
+It may be challenging to keep the balance between presenting an elegant
+solution and working together with the community and discussing your
+unfinished work. Therefore it is good to get early in the process to
+get feedback to improve your work, but also keep your changes in small
+chunks that they may get already accepted, even when your whole task is
+not ready for inclusion now.
+
+Also realize that it is not acceptable to send patches for inclusion
+that are unfinished and will be "fixed up later."
+
+
+Justify your change
+-------------------
+
+Along with breaking up your patches, it is very important for you to let
+the Linux community know why they should add this change. New features
+must be justified as being needed and useful.
+
+
+Document your change
+--------------------
+
+When sending in your patches, pay special attention to what you say in
+the text in your email. This information will become the ChangeLog
+information for the patch, and will be preserved for everyone to see for
+all time. It should describe the patch completely, containing:
+ - why the change is necessary
+ - the overall design approach in the patch
+ - implementation details
+ - testing results
+
+For more details on what this should all look like, please see the
+ChangeLog section of the document:
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+
+
+
+
+All of these things are sometimes very hard to do. It can take years to
+perfect these practices (if at all). It's a continuous process of
+improvement that requires a lot of patience and determination. But
+don't give up, it's possible. Many have done it before, and each had to
+start exactly where you are now.
+
+
+
+
+----------
+Thanks to Paolo Ciarrocchi who allowed the "Development Process"
+(http://lwn.net/Articles/94386/) section
+to be based on text he had written, and to Randy Dunlap and Gerrit
+Huizenga for some of the list of things you should and should not say.
+Also thanks to Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
+Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi
+Kleen, Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
+David A. Wheeler, Junio Hamano, Michael Kerrisk, and Alex Shepard for
+their review, comments, and contributions. Without their help, this
+document would not have been possible.
+
+
+
+Maintainer: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt
new file mode 100644
index 000000000..31d1d6588
--- /dev/null
+++ b/Documentation/IPMI.txt
@@ -0,0 +1,722 @@
+
+ The Linux IPMI Driver
+ ---------------------
+ Corey Minyard
+ <minyard@mvista.com>
+ <minyard@acm.org>
+
+The Intelligent Platform Management Interface, or IPMI, is a
+standard for controlling intelligent devices that monitor a system.
+It provides for dynamic discovery of sensors in the system and the
+ability to monitor the sensors and be informed when the sensor's
+values change or go outside certain boundaries. It also has a
+standardized database for field-replaceable units (FRUs) and a watchdog
+timer.
+
+To use this, you need an interface to an IPMI controller in your
+system (called a Baseboard Management Controller, or BMC) and
+management software that can use the IPMI system.
+
+This document describes how to use the IPMI driver for Linux. If you
+are not familiar with IPMI itself, see the web site at
+http://www.intel.com/design/servers/ipmi/index.htm. IPMI is a big
+subject and I can't cover it all here!
+
+Configuration
+-------------
+
+The Linux IPMI driver is modular, which means you have to pick several
+things to have it work right depending on your hardware. Most of
+these are available in the 'Character Devices' menu then the IPMI
+menu.
+
+No matter what, you must pick 'IPMI top-level message handler' to use
+IPMI. What you do beyond that depends on your needs and hardware.
+
+The message handler does not provide any user-level interfaces.
+Kernel code (like the watchdog) can still use it. If you need access
+from userland, you need to select 'Device interface for IPMI' if you
+want access through a device driver.
+
+The driver interface depends on your hardware. If your system
+properly provides the SMBIOS info for IPMI, the driver will detect it
+and just work. If you have a board with a standard interface (These
+will generally be either "KCS", "SMIC", or "BT", consult your hardware
+manual), choose the 'IPMI SI handler' option. A driver also exists
+for direct I2C access to the IPMI management controller. Some boards
+support this, but it is unknown if it will work on every board. For
+this, choose 'IPMI SMBus handler', but be ready to try to do some
+figuring to see if it will work on your system if the SMBIOS/APCI
+information is wrong or not present. It is fairly safe to have both
+these enabled and let the drivers auto-detect what is present.
+
+You should generally enable ACPI on your system, as systems with IPMI
+can have ACPI tables describing them.
+
+If you have a standard interface and the board manufacturer has done
+their job correctly, the IPMI controller should be automatically
+detected (via ACPI or SMBIOS tables) and should just work. Sadly,
+many boards do not have this information. The driver attempts
+standard defaults, but they may not work. If you fall into this
+situation, you need to read the section below named 'The SI Driver' or
+"The SMBus Driver" on how to hand-configure your system.
+
+IPMI defines a standard watchdog timer. You can enable this with the
+'IPMI Watchdog Timer' config option. If you compile the driver into
+the kernel, then via a kernel command-line option you can have the
+watchdog timer start as soon as it initializes. It also have a lot
+of other options, see the 'Watchdog' section below for more details.
+Note that you can also have the watchdog continue to run if it is
+closed (by default it is disabled on close). Go into the 'Watchdog
+Cards' menu, enable 'Watchdog Timer Support', and enable the option
+'Disable watchdog shutdown on close'.
+
+IPMI systems can often be powered off using IPMI commands. Select
+'IPMI Poweroff' to do this. The driver will auto-detect if the system
+can be powered off by IPMI. It is safe to enable this even if your
+system doesn't support this option. This works on ATCA systems, the
+Radisys CPI1 card, and any IPMI system that supports standard chassis
+management commands.
+
+If you want the driver to put an event into the event log on a panic,
+enable the 'Generate a panic event to all BMCs on a panic' option. If
+you want the whole panic string put into the event log using OEM
+events, enable the 'Generate OEM events containing the panic string'
+option.
+
+Basic Design
+------------
+
+The Linux IPMI driver is designed to be very modular and flexible, you
+only need to take the pieces you need and you can use it in many
+different ways. Because of that, it's broken into many chunks of
+code. These chunks (by module name) are:
+
+ipmi_msghandler - This is the central piece of software for the IPMI
+system. It handles all messages, message timing, and responses. The
+IPMI users tie into this, and the IPMI physical interfaces (called
+System Management Interfaces, or SMIs) also tie in here. This
+provides the kernelland interface for IPMI, but does not provide an
+interface for use by application processes.
+
+ipmi_devintf - This provides a userland IOCTL interface for the IPMI
+driver, each open file for this device ties in to the message handler
+as an IPMI user.
+
+ipmi_si - A driver for various system interfaces. This supports KCS,
+SMIC, and BT interfaces. Unless you have an SMBus interface or your
+own custom interface, you probably need to use this.
+
+ipmi_ssif - A driver for accessing BMCs on the SMBus. It uses the
+I2C kernel driver's SMBus interfaces to send and receive IPMI messages
+over the SMBus.
+
+ipmi_watchdog - IPMI requires systems to have a very capable watchdog
+timer. This driver implements the standard Linux watchdog timer
+interface on top of the IPMI message handler.
+
+ipmi_poweroff - Some systems support the ability to be turned off via
+IPMI commands.
+
+These are all individually selectable via configuration options.
+
+Note that the KCS-only interface has been removed. The af_ipmi driver
+is no longer supported and has been removed because it was impossible
+to do 32 bit emulation on 64-bit kernels with it.
+
+Much documentation for the interface is in the include files. The
+IPMI include files are:
+
+net/af_ipmi.h - Contains the socket interface.
+
+linux/ipmi.h - Contains the user interface and IOCTL interface for IPMI.
+
+linux/ipmi_smi.h - Contains the interface for system management interfaces
+(things that interface to IPMI controllers) to use.
+
+linux/ipmi_msgdefs.h - General definitions for base IPMI messaging.
+
+
+Addressing
+----------
+
+The IPMI addressing works much like IP addresses, you have an overlay
+to handle the different address types. The overlay is:
+
+ struct ipmi_addr
+ {
+ int addr_type;
+ short channel;
+ char data[IPMI_MAX_ADDR_SIZE];
+ };
+
+The addr_type determines what the address really is. The driver
+currently understands two different types of addresses.
+
+"System Interface" addresses are defined as:
+
+ struct ipmi_system_interface_addr
+ {
+ int addr_type;
+ short channel;
+ };
+
+and the type is IPMI_SYSTEM_INTERFACE_ADDR_TYPE. This is used for talking
+straight to the BMC on the current card. The channel must be
+IPMI_BMC_CHANNEL.
+
+Messages that are destined to go out on the IPMB bus use the
+IPMI_IPMB_ADDR_TYPE address type. The format is
+
+ struct ipmi_ipmb_addr
+ {
+ int addr_type;
+ short channel;
+ unsigned char slave_addr;
+ unsigned char lun;
+ };
+
+The "channel" here is generally zero, but some devices support more
+than one channel, it corresponds to the channel as defined in the IPMI
+spec.
+
+
+Messages
+--------
+
+Messages are defined as:
+
+struct ipmi_msg
+{
+ unsigned char netfn;
+ unsigned char lun;
+ unsigned char cmd;
+ unsigned char *data;
+ int data_len;
+};
+
+The driver takes care of adding/stripping the header information. The
+data portion is just the data to be send (do NOT put addressing info
+here) or the response. Note that the completion code of a response is
+the first item in "data", it is not stripped out because that is how
+all the messages are defined in the spec (and thus makes counting the
+offsets a little easier :-).
+
+When using the IOCTL interface from userland, you must provide a block
+of data for "data", fill it, and set data_len to the length of the
+block of data, even when receiving messages. Otherwise the driver
+will have no place to put the message.
+
+Messages coming up from the message handler in kernelland will come in
+as:
+
+ struct ipmi_recv_msg
+ {
+ struct list_head link;
+
+ /* The type of message as defined in the "Receive Types"
+ defines above. */
+ int recv_type;
+
+ ipmi_user_t *user;
+ struct ipmi_addr addr;
+ long msgid;
+ struct ipmi_msg msg;
+
+ /* Call this when done with the message. It will presumably free
+ the message and do any other necessary cleanup. */
+ void (*done)(struct ipmi_recv_msg *msg);
+
+ /* Place-holder for the data, don't make any assumptions about
+ the size or existence of this, since it may change. */
+ unsigned char msg_data[IPMI_MAX_MSG_LENGTH];
+ };
+
+You should look at the receive type and handle the message
+appropriately.
+
+
+The Upper Layer Interface (Message Handler)
+-------------------------------------------
+
+The upper layer of the interface provides the users with a consistent
+view of the IPMI interfaces. It allows multiple SMI interfaces to be
+addressed (because some boards actually have multiple BMCs on them)
+and the user should not have to care what type of SMI is below them.
+
+
+Creating the User
+
+To user the message handler, you must first create a user using
+ipmi_create_user. The interface number specifies which SMI you want
+to connect to, and you must supply callback functions to be called
+when data comes in. The callback function can run at interrupt level,
+so be careful using the callbacks. This also allows to you pass in a
+piece of data, the handler_data, that will be passed back to you on
+all calls.
+
+Once you are done, call ipmi_destroy_user() to get rid of the user.
+
+From userland, opening the device automatically creates a user, and
+closing the device automatically destroys the user.
+
+
+Messaging
+
+To send a message from kernel-land, the ipmi_request() call does
+pretty much all message handling. Most of the parameter are
+self-explanatory. However, it takes a "msgid" parameter. This is NOT
+the sequence number of messages. It is simply a long value that is
+passed back when the response for the message is returned. You may
+use it for anything you like.
+
+Responses come back in the function pointed to by the ipmi_recv_hndl
+field of the "handler" that you passed in to ipmi_create_user().
+Remember again, these may be running at interrupt level. Remember to
+look at the receive type, too.
+
+From userland, you fill out an ipmi_req_t structure and use the
+IPMICTL_SEND_COMMAND ioctl. For incoming stuff, you can use select()
+or poll() to wait for messages to come in. However, you cannot use
+read() to get them, you must call the IPMICTL_RECEIVE_MSG with the
+ipmi_recv_t structure to actually get the message. Remember that you
+must supply a pointer to a block of data in the msg.data field, and
+you must fill in the msg.data_len field with the size of the data.
+This gives the receiver a place to actually put the message.
+
+If the message cannot fit into the data you provide, you will get an
+EMSGSIZE error and the driver will leave the data in the receive
+queue. If you want to get it and have it truncate the message, us
+the IPMICTL_RECEIVE_MSG_TRUNC ioctl.
+
+When you send a command (which is defined by the lowest-order bit of
+the netfn per the IPMI spec) on the IPMB bus, the driver will
+automatically assign the sequence number to the command and save the
+command. If the response is not receive in the IPMI-specified 5
+seconds, it will generate a response automatically saying the command
+timed out. If an unsolicited response comes in (if it was after 5
+seconds, for instance), that response will be ignored.
+
+In kernelland, after you receive a message and are done with it, you
+MUST call ipmi_free_recv_msg() on it, or you will leak messages. Note
+that you should NEVER mess with the "done" field of a message, that is
+required to properly clean up the message.
+
+Note that when sending, there is an ipmi_request_supply_msgs() call
+that lets you supply the smi and receive message. This is useful for
+pieces of code that need to work even if the system is out of buffers
+(the watchdog timer uses this, for instance). You supply your own
+buffer and own free routines. This is not recommended for normal use,
+though, since it is tricky to manage your own buffers.
+
+
+Events and Incoming Commands
+
+The driver takes care of polling for IPMI events and receiving
+commands (commands are messages that are not responses, they are
+commands that other things on the IPMB bus have sent you). To receive
+these, you must register for them, they will not automatically be sent
+to you.
+
+To receive events, you must call ipmi_set_gets_events() and set the
+"val" to non-zero. Any events that have been received by the driver
+since startup will immediately be delivered to the first user that
+registers for events. After that, if multiple users are registered
+for events, they will all receive all events that come in.
+
+For receiving commands, you have to individually register commands you
+want to receive. Call ipmi_register_for_cmd() and supply the netfn
+and command name for each command you want to receive. You also
+specify a bitmask of the channels you want to receive the command from
+(or use IPMI_CHAN_ALL for all channels if you don't care). Only one
+user may be registered for each netfn/cmd/channel, but different users
+may register for different commands, or the same command if the
+channel bitmasks do not overlap.
+
+From userland, equivalent IOCTLs are provided to do these functions.
+
+
+The Lower Layer (SMI) Interface
+-------------------------------
+
+As mentioned before, multiple SMI interfaces may be registered to the
+message handler, each of these is assigned an interface number when
+they register with the message handler. They are generally assigned
+in the order they register, although if an SMI unregisters and then
+another one registers, all bets are off.
+
+The ipmi_smi.h defines the interface for management interfaces, see
+that for more details.
+
+
+The SI Driver
+-------------
+
+The SI driver allows up to 4 KCS or SMIC interfaces to be configured
+in the system. By default, scan the ACPI tables for interfaces, and
+if it doesn't find any the driver will attempt to register one KCS
+interface at the spec-specified I/O port 0xca2 without interrupts.
+You can change this at module load time (for a module) with:
+
+ modprobe ipmi_si.o type=<type1>,<type2>....
+ ports=<port1>,<port2>... addrs=<addr1>,<addr2>...
+ irqs=<irq1>,<irq2>...
+ regspacings=<sp1>,<sp2>,... regsizes=<size1>,<size2>,...
+ regshifts=<shift1>,<shift2>,...
+ slave_addrs=<addr1>,<addr2>,...
+ force_kipmid=<enable1>,<enable2>,...
+ kipmid_max_busy_us=<ustime1>,<ustime2>,...
+ unload_when_empty=[0|1]
+ trydefaults=[0|1] trydmi=[0|1] tryacpi=[0|1]
+ tryplatform=[0|1] trypci=[0|1]
+
+Each of these except try... items is a list, the first item for the
+first interface, second item for the second interface, etc.
+
+The si_type may be either "kcs", "smic", or "bt". If you leave it blank, it
+defaults to "kcs".
+
+If you specify addrs as non-zero for an interface, the driver will
+use the memory address given as the address of the device. This
+overrides si_ports.
+
+If you specify ports as non-zero for an interface, the driver will
+use the I/O port given as the device address.
+
+If you specify irqs as non-zero for an interface, the driver will
+attempt to use the given interrupt for the device.
+
+trydefaults sets whether the standard IPMI interface at 0xca2 and
+any interfaces specified by ACPE are tried. By default, the driver
+tries it, set this value to zero to turn this off.
+
+The other try... items disable discovery by their corresponding
+names. These are all enabled by default, set them to zero to disable
+them. The tryplatform disables openfirmware.
+
+The next three parameters have to do with register layout. The
+registers used by the interfaces may not appear at successive
+locations and they may not be in 8-bit registers. These parameters
+allow the layout of the data in the registers to be more precisely
+specified.
+
+The regspacings parameter give the number of bytes between successive
+register start addresses. For instance, if the regspacing is set to 4
+and the start address is 0xca2, then the address for the second
+register would be 0xca6. This defaults to 1.
+
+The regsizes parameter gives the size of a register, in bytes. The
+data used by IPMI is 8-bits wide, but it may be inside a larger
+register. This parameter allows the read and write type to specified.
+It may be 1, 2, 4, or 8. The default is 1.
+
+Since the register size may be larger than 32 bits, the IPMI data may not
+be in the lower 8 bits. The regshifts parameter give the amount to shift
+the data to get to the actual IPMI data.
+
+The slave_addrs specifies the IPMI address of the local BMC. This is
+usually 0x20 and the driver defaults to that, but in case it's not, it
+can be specified when the driver starts up.
+
+The force_ipmid parameter forcefully enables (if set to 1) or disables
+(if set to 0) the kernel IPMI daemon. Normally this is auto-detected
+by the driver, but systems with broken interrupts might need an enable,
+or users that don't want the daemon (don't need the performance, don't
+want the CPU hit) can disable it.
+
+If unload_when_empty is set to 1, the driver will be unloaded if it
+doesn't find any interfaces or all the interfaces fail to work. The
+default is one. Setting to 0 is useful with the hotmod, but is
+obviously only useful for modules.
+
+When compiled into the kernel, the parameters can be specified on the
+kernel command line as:
+
+ ipmi_si.type=<type1>,<type2>...
+ ipmi_si.ports=<port1>,<port2>... ipmi_si.addrs=<addr1>,<addr2>...
+ ipmi_si.irqs=<irq1>,<irq2>... ipmi_si.trydefaults=[0|1]
+ ipmi_si.regspacings=<sp1>,<sp2>,...
+ ipmi_si.regsizes=<size1>,<size2>,...
+ ipmi_si.regshifts=<shift1>,<shift2>,...
+ ipmi_si.slave_addrs=<addr1>,<addr2>,...
+ ipmi_si.force_kipmid=<enable1>,<enable2>,...
+ ipmi_si.kipmid_max_busy_us=<ustime1>,<ustime2>,...
+
+It works the same as the module parameters of the same names.
+
+By default, the driver will attempt to detect any device specified by
+ACPI, and if none of those then a KCS device at the spec-specified
+0xca2. If you want to turn this off, set the "trydefaults" option to
+false.
+
+If your IPMI interface does not support interrupts and is a KCS or
+SMIC interface, the IPMI driver will start a kernel thread for the
+interface to help speed things up. This is a low-priority kernel
+thread that constantly polls the IPMI driver while an IPMI operation
+is in progress. The force_kipmid module parameter will all the user to
+force this thread on or off. If you force it off and don't have
+interrupts, the driver will run VERY slowly. Don't blame me,
+these interfaces suck.
+
+Unfortunately, this thread can use a lot of CPU depending on the
+interface's performance. This can waste a lot of CPU and cause
+various issues with detecting idle CPU and using extra power. To
+avoid this, the kipmid_max_busy_us sets the maximum amount of time, in
+microseconds, that kipmid will spin before sleeping for a tick. This
+value sets a balance between performance and CPU waste and needs to be
+tuned to your needs. Maybe, someday, auto-tuning will be added, but
+that's not a simple thing and even the auto-tuning would need to be
+tuned to the user's desired performance.
+
+The driver supports a hot add and remove of interfaces. This way,
+interfaces can be added or removed after the kernel is up and running.
+This is done using /sys/modules/ipmi_si/parameters/hotmod, which is a
+write-only parameter. You write a string to this interface. The string
+has the format:
+ <op1>[:op2[:op3...]]
+The "op"s are:
+ add|remove,kcs|bt|smic,mem|i/o,<address>[,<opt1>[,<opt2>[,...]]]
+You can specify more than one interface on the line. The "opt"s are:
+ rsp=<regspacing>
+ rsi=<regsize>
+ rsh=<regshift>
+ irq=<irq>
+ ipmb=<ipmb slave addr>
+and these have the same meanings as discussed above. Note that you
+can also use this on the kernel command line for a more compact format
+for specifying an interface. Note that when removing an interface,
+only the first three parameters (si type, address type, and address)
+are used for the comparison. Any options are ignored for removing.
+
+The SMBus Driver (SSIF)
+-----------------------
+
+The SMBus driver allows up to 4 SMBus devices to be configured in the
+system. By default, the driver will only register with something it
+finds in DMI or ACPI tables. You can change this
+at module load time (for a module) with:
+
+ modprobe ipmi_ssif.o
+ addr=<i2caddr1>[,<i2caddr2>[,...]]
+ adapter=<adapter1>[,<adapter2>[...]]
+ dbg=<flags1>,<flags2>...
+ slave_addrs=<addr1>,<addr2>,...
+ [dbg_probe=1]
+
+The addresses are normal I2C addresses. The adapter is the string
+name of the adapter, as shown in /sys/class/i2c-adapter/i2c-<n>/name.
+It is *NOT* i2c-<n> itself. Also, the comparison is done ignoring
+spaces, so if the name is "This is an I2C chip" you can say
+adapter_name=ThisisanI2cchip. This is because it's hard to pass in
+spaces in kernel parameters.
+
+The debug flags are bit flags for each BMC found, they are:
+IPMI messages: 1, driver state: 2, timing: 4, I2C probe: 8
+
+Setting dbg_probe to 1 will enable debugging of the probing and
+detection process for BMCs on the SMBusses.
+
+The slave_addrs specifies the IPMI address of the local BMC. This is
+usually 0x20 and the driver defaults to that, but in case it's not, it
+can be specified when the driver starts up.
+
+Discovering the IPMI compliant BMC on the SMBus can cause devices on
+the I2C bus to fail. The SMBus driver writes a "Get Device ID" IPMI
+message as a block write to the I2C bus and waits for a response.
+This action can be detrimental to some I2C devices. It is highly
+recommended that the known I2C address be given to the SMBus driver in
+the smb_addr parameter unless you have DMI or ACPI data to tell the
+driver what to use.
+
+When compiled into the kernel, the addresses can be specified on the
+kernel command line as:
+
+ ipmb_ssif.addr=<i2caddr1>[,<i2caddr2>[...]]
+ ipmi_ssif.adapter=<adapter1>[,<adapter2>[...]]
+ ipmi_ssif.dbg=<flags1>[,<flags2>[...]]
+ ipmi_ssif.dbg_probe=1
+ ipmi_ssif.slave_addrs=<addr1>[,<addr2>[...]]
+
+These are the same options as on the module command line.
+
+The I2C driver does not support non-blocking access or polling, so
+this driver cannod to IPMI panic events, extend the watchdog at panic
+time, or other panic-related IPMI functions without special kernel
+patches and driver modifications. You can get those at the openipmi
+web page.
+
+The driver supports a hot add and remove of interfaces through the I2C
+sysfs interface.
+
+Other Pieces
+------------
+
+Get the detailed info related with the IPMI device
+--------------------------------------------------
+
+Some users need more detailed information about a device, like where
+the address came from or the raw base device for the IPMI interface.
+You can use the IPMI smi_watcher to catch the IPMI interfaces as they
+come or go, and to grab the information, you can use the function
+ipmi_get_smi_info(), which returns the following structure:
+
+struct ipmi_smi_info {
+ enum ipmi_addr_src addr_src;
+ struct device *dev;
+ union {
+ struct {
+ void *acpi_handle;
+ } acpi_info;
+ } addr_info;
+};
+
+Currently special info for only for SI_ACPI address sources is
+returned. Others may be added as necessary.
+
+Note that the dev pointer is included in the above structure, and
+assuming ipmi_smi_get_info returns success, you must call put_device
+on the dev pointer.
+
+
+Watchdog
+--------
+
+A watchdog timer is provided that implements the Linux-standard
+watchdog timer interface. It has three module parameters that can be
+used to control it:
+
+ modprobe ipmi_watchdog timeout=<t> pretimeout=<t> action=<action type>
+ preaction=<preaction type> preop=<preop type> start_now=x
+ nowayout=x ifnum_to_use=n
+
+ifnum_to_use specifies which interface the watchdog timer should use.
+The default is -1, which means to pick the first one registered.
+
+The timeout is the number of seconds to the action, and the pretimeout
+is the amount of seconds before the reset that the pre-timeout panic will
+occur (if pretimeout is zero, then pretimeout will not be enabled). Note
+that the pretimeout is the time before the final timeout. So if the
+timeout is 50 seconds and the pretimeout is 10 seconds, then the pretimeout
+will occur in 40 second (10 seconds before the timeout).
+
+The action may be "reset", "power_cycle", or "power_off", and
+specifies what to do when the timer times out, and defaults to
+"reset".
+
+The preaction may be "pre_smi" for an indication through the SMI
+interface, "pre_int" for an indication through the SMI with an
+interrupts, and "pre_nmi" for a NMI on a preaction. This is how
+the driver is informed of the pretimeout.
+
+The preop may be set to "preop_none" for no operation on a pretimeout,
+"preop_panic" to set the preoperation to panic, or "preop_give_data"
+to provide data to read from the watchdog device when the pretimeout
+occurs. A "pre_nmi" setting CANNOT be used with "preop_give_data"
+because you can't do data operations from an NMI.
+
+When preop is set to "preop_give_data", one byte comes ready to read
+on the device when the pretimeout occurs. Select and fasync work on
+the device, as well.
+
+If start_now is set to 1, the watchdog timer will start running as
+soon as the driver is loaded.
+
+If nowayout is set to 1, the watchdog timer will not stop when the
+watchdog device is closed. The default value of nowayout is true
+if the CONFIG_WATCHDOG_NOWAYOUT option is enabled, or false if not.
+
+When compiled into the kernel, the kernel command line is available
+for configuring the watchdog:
+
+ ipmi_watchdog.timeout=<t> ipmi_watchdog.pretimeout=<t>
+ ipmi_watchdog.action=<action type>
+ ipmi_watchdog.preaction=<preaction type>
+ ipmi_watchdog.preop=<preop type>
+ ipmi_watchdog.start_now=x
+ ipmi_watchdog.nowayout=x
+
+The options are the same as the module parameter options.
+
+The watchdog will panic and start a 120 second reset timeout if it
+gets a pre-action. During a panic or a reboot, the watchdog will
+start a 120 timer if it is running to make sure the reboot occurs.
+
+Note that if you use the NMI preaction for the watchdog, you MUST NOT
+use the nmi watchdog. There is no reasonable way to tell if an NMI
+comes from the IPMI controller, so it must assume that if it gets an
+otherwise unhandled NMI, it must be from IPMI and it will panic
+immediately.
+
+Once you open the watchdog timer, you must write a 'V' character to the
+device to close it, or the timer will not stop. This is a new semantic
+for the driver, but makes it consistent with the rest of the watchdog
+drivers in Linux.
+
+
+Panic Timeouts
+--------------
+
+The OpenIPMI driver supports the ability to put semi-custom and custom
+events in the system event log if a panic occurs. if you enable the
+'Generate a panic event to all BMCs on a panic' option, you will get
+one event on a panic in a standard IPMI event format. If you enable
+the 'Generate OEM events containing the panic string' option, you will
+also get a bunch of OEM events holding the panic string.
+
+
+The field settings of the events are:
+* Generator ID: 0x21 (kernel)
+* EvM Rev: 0x03 (this event is formatting in IPMI 1.0 format)
+* Sensor Type: 0x20 (OS critical stop sensor)
+* Sensor #: The first byte of the panic string (0 if no panic string)
+* Event Dir | Event Type: 0x6f (Assertion, sensor-specific event info)
+* Event Data 1: 0xa1 (Runtime stop in OEM bytes 2 and 3)
+* Event data 2: second byte of panic string
+* Event data 3: third byte of panic string
+See the IPMI spec for the details of the event layout. This event is
+always sent to the local management controller. It will handle routing
+the message to the right place
+
+Other OEM events have the following format:
+Record ID (bytes 0-1): Set by the SEL.
+Record type (byte 2): 0xf0 (OEM non-timestamped)
+byte 3: The slave address of the card saving the panic
+byte 4: A sequence number (starting at zero)
+The rest of the bytes (11 bytes) are the panic string. If the panic string
+is longer than 11 bytes, multiple messages will be sent with increasing
+sequence numbers.
+
+Because you cannot send OEM events using the standard interface, this
+function will attempt to find an SEL and add the events there. It
+will first query the capabilities of the local management controller.
+If it has an SEL, then they will be stored in the SEL of the local
+management controller. If not, and the local management controller is
+an event generator, the event receiver from the local management
+controller will be queried and the events sent to the SEL on that
+device. Otherwise, the events go nowhere since there is nowhere to
+send them.
+
+
+Poweroff
+--------
+
+If the poweroff capability is selected, the IPMI driver will install
+a shutdown function into the standard poweroff function pointer. This
+is in the ipmi_poweroff module. When the system requests a powerdown,
+it will send the proper IPMI commands to do this. This is supported on
+several platforms.
+
+There is a module parameter named "poweroff_powercycle" that may
+either be zero (do a power down) or non-zero (do a power cycle, power
+the system off, then power it on in a few seconds). Setting
+ipmi_poweroff.poweroff_control=x will do the same thing on the kernel
+command line. The parameter is also available via the proc filesystem
+in /proc/sys/dev/ipmi/poweroff_powercycle. Note that if the system
+does not support power cycling, it will always do the power off.
+
+The "ifnum_to_use" parameter specifies which interface the poweroff
+code should use. The default is -1, which means to pick the first one
+registered.
+
+Note that if you have ACPI enabled, the system will prefer using ACPI to
+power off.
diff --git a/Documentation/IRQ-affinity.txt b/Documentation/IRQ-affinity.txt
new file mode 100644
index 000000000..01a675175
--- /dev/null
+++ b/Documentation/IRQ-affinity.txt
@@ -0,0 +1,65 @@
+ChangeLog:
+ Started by Ingo Molnar <mingo@redhat.com>
+ Update by Max Krasnyansky <maxk@qualcomm.com>
+
+SMP IRQ affinity
+
+/proc/irq/IRQ#/smp_affinity and /proc/irq/IRQ#/smp_affinity_list specify
+which target CPUs are permitted for a given IRQ source. It's a bitmask
+(smp_affinity) or cpu list (smp_affinity_list) of allowed CPUs. It's not
+allowed to turn off all CPUs, and if an IRQ controller does not support
+IRQ affinity then the value will not change from the default of all cpus.
+
+/proc/irq/default_smp_affinity specifies default affinity mask that applies
+to all non-active IRQs. Once IRQ is allocated/activated its affinity bitmask
+will be set to the default mask. It can then be changed as described above.
+Default mask is 0xffffffff.
+
+Here is an example of restricting IRQ44 (eth1) to CPU0-3 then restricting
+it to CPU4-7 (this is an 8-CPU SMP box):
+
+[root@moon 44]# cd /proc/irq/44
+[root@moon 44]# cat smp_affinity
+ffffffff
+
+[root@moon 44]# echo 0f > smp_affinity
+[root@moon 44]# cat smp_affinity
+0000000f
+[root@moon 44]# ping -f h
+PING hell (195.4.7.3): 56 data bytes
+...
+--- hell ping statistics ---
+6029 packets transmitted, 6027 packets received, 0% packet loss
+round-trip min/avg/max = 0.1/0.1/0.4 ms
+[root@moon 44]# cat /proc/interrupts | grep 'CPU\|44:'
+ CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
+ 44: 1068 1785 1785 1783 0 0 0 0 IO-APIC-level eth1
+
+As can be seen from the line above IRQ44 was delivered only to the first four
+processors (0-3).
+Now lets restrict that IRQ to CPU(4-7).
+
+[root@moon 44]# echo f0 > smp_affinity
+[root@moon 44]# cat smp_affinity
+000000f0
+[root@moon 44]# ping -f h
+PING hell (195.4.7.3): 56 data bytes
+..
+--- hell ping statistics ---
+2779 packets transmitted, 2777 packets received, 0% packet loss
+round-trip min/avg/max = 0.1/0.5/585.4 ms
+[root@moon 44]# cat /proc/interrupts | 'CPU\|44:'
+ CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
+ 44: 1068 1785 1785 1783 1784 1069 1070 1069 IO-APIC-level eth1
+
+This time around IRQ44 was delivered only to the last four processors.
+i.e counters for the CPU0-3 did not change.
+
+Here is an example of limiting that same irq (44) to cpus 1024 to 1031:
+
+[root@moon 44]# echo 1024-1031 > smp_affinity_list
+[root@moon 44]# cat smp_affinity_list
+1024-1031
+
+Note that to do this with a bitmask would require 32 bitmasks of zero
+to follow the pertinent one.
diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
new file mode 100644
index 000000000..3a8e15cba
--- /dev/null
+++ b/Documentation/IRQ-domain.txt
@@ -0,0 +1,223 @@
+irq_domain interrupt number mapping library
+
+The current design of the Linux kernel uses a single large number
+space where each separate IRQ source is assigned a different number.
+This is simple when there is only one interrupt controller, but in
+systems with multiple interrupt controllers the kernel must ensure
+that each one gets assigned non-overlapping allocations of Linux
+IRQ numbers.
+
+The number of interrupt controllers registered as unique irqchips
+show a rising tendency: for example subdrivers of different kinds
+such as GPIO controllers avoid reimplementing identical callback
+mechanisms as the IRQ core system by modelling their interrupt
+handlers as irqchips, i.e. in effect cascading interrupt controllers.
+
+Here the interrupt number loose all kind of correspondence to
+hardware interrupt numbers: whereas in the past, IRQ numbers could
+be chosen so they matched the hardware IRQ line into the root
+interrupt controller (i.e. the component actually fireing the
+interrupt line to the CPU) nowadays this number is just a number.
+
+For this reason we need a mechanism to separate controller-local
+interrupt numbers, called hardware irq's, from Linux IRQ numbers.
+
+The irq_alloc_desc*() and irq_free_desc*() APIs provide allocation of
+irq numbers, but they don't provide any support for reverse mapping of
+the controller-local IRQ (hwirq) number into the Linux IRQ number
+space.
+
+The irq_domain library adds mapping between hwirq and IRQ numbers on
+top of the irq_alloc_desc*() API. An irq_domain to manage mapping is
+preferred over interrupt controller drivers open coding their own
+reverse mapping scheme.
+
+irq_domain also implements translation from Device Tree interrupt
+specifiers to hwirq numbers, and can be easily extended to support
+other IRQ topology data sources.
+
+=== irq_domain usage ===
+An interrupt controller driver creates and registers an irq_domain by
+calling one of the irq_domain_add_*() functions (each mapping method
+has a different allocator function, more on that later). The function
+will return a pointer to the irq_domain on success. The caller must
+provide the allocator function with an irq_domain_ops structure.
+
+In most cases, the irq_domain will begin empty without any mappings
+between hwirq and IRQ numbers. Mappings are added to the irq_domain
+by calling irq_create_mapping() which accepts the irq_domain and a
+hwirq number as arguments. If a mapping for the hwirq doesn't already
+exist then it will allocate a new Linux irq_desc, associate it with
+the hwirq, and call the .map() callback so the driver can perform any
+required hardware setup.
+
+When an interrupt is received, irq_find_mapping() function should
+be used to find the Linux IRQ number from the hwirq number.
+
+The irq_create_mapping() function must be called *atleast once*
+before any call to irq_find_mapping(), lest the descriptor will not
+be allocated.
+
+If the driver has the Linux IRQ number or the irq_data pointer, and
+needs to know the associated hwirq number (such as in the irq_chip
+callbacks) then it can be directly obtained from irq_data->hwirq.
+
+=== Types of irq_domain mappings ===
+There are several mechanisms available for reverse mapping from hwirq
+to Linux irq, and each mechanism uses a different allocation function.
+Which reverse map type should be used depends on the use case. Each
+of the reverse map types are described below:
+
+==== Linear ====
+irq_domain_add_linear()
+
+The linear reverse map maintains a fixed size table indexed by the
+hwirq number. When a hwirq is mapped, an irq_desc is allocated for
+the hwirq, and the IRQ number is stored in the table.
+
+The Linear map is a good choice when the maximum number of hwirqs is
+fixed and a relatively small number (~ < 256). The advantages of this
+map are fixed time lookup for IRQ numbers, and irq_descs are only
+allocated for in-use IRQs. The disadvantage is that the table must be
+as large as the largest possible hwirq number.
+
+The majority of drivers should use the linear map.
+
+==== Tree ====
+irq_domain_add_tree()
+
+The irq_domain maintains a radix tree map from hwirq numbers to Linux
+IRQs. When an hwirq is mapped, an irq_desc is allocated and the
+hwirq is used as the lookup key for the radix tree.
+
+The tree map is a good choice if the hwirq number can be very large
+since it doesn't need to allocate a table as large as the largest
+hwirq number. The disadvantage is that hwirq to IRQ number lookup is
+dependent on how many entries are in the table.
+
+Very few drivers should need this mapping.
+
+==== No Map ===-
+irq_domain_add_nomap()
+
+The No Map mapping is to be used when the hwirq number is
+programmable in the hardware. In this case it is best to program the
+Linux IRQ number into the hardware itself so that no mapping is
+required. Calling irq_create_direct_mapping() will allocate a Linux
+IRQ number and call the .map() callback so that driver can program the
+Linux IRQ number into the hardware.
+
+Most drivers cannot use this mapping.
+
+==== Legacy ====
+irq_domain_add_simple()
+irq_domain_add_legacy()
+irq_domain_add_legacy_isa()
+
+The Legacy mapping is a special case for drivers that already have a
+range of irq_descs allocated for the hwirqs. It is used when the
+driver cannot be immediately converted to use the linear mapping. For
+example, many embedded system board support files use a set of #defines
+for IRQ numbers that are passed to struct device registrations. In that
+case the Linux IRQ numbers cannot be dynamically assigned and the legacy
+mapping should be used.
+
+The legacy map assumes a contiguous range of IRQ numbers has already
+been allocated for the controller and that the IRQ number can be
+calculated by adding a fixed offset to the hwirq number, and
+visa-versa. The disadvantage is that it requires the interrupt
+controller to manage IRQ allocations and it requires an irq_desc to be
+allocated for every hwirq, even if it is unused.
+
+The legacy map should only be used if fixed IRQ mappings must be
+supported. For example, ISA controllers would use the legacy map for
+mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
+numbers.
+
+Most users of legacy mappings should use irq_domain_add_simple() which
+will use a legacy domain only if an IRQ range is supplied by the
+system and will otherwise use a linear domain mapping. The semantics
+of this call are such that if an IRQ range is specified then
+descriptors will be allocated on-the-fly for it, and if no range is
+specified it will fall through to irq_domain_add_linear() which means
+*no* irq descriptors will be allocated.
+
+A typical use case for simple domains is where an irqchip provider
+is supporting both dynamic and static IRQ assignments.
+
+In order to avoid ending up in a situation where a linear domain is
+used and no descriptor gets allocated it is very important to make sure
+that the driver using the simple domain call irq_create_mapping()
+before any irq_find_mapping() since the latter will actually work
+for the static IRQ assignment case.
+
+==== Hierarchy IRQ domain ====
+On some architectures, there may be multiple interrupt controllers
+involved in delivering an interrupt from the device to the target CPU.
+Let's look at a typical interrupt delivering path on x86 platforms:
+
+Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
+
+There are three interrupt controllers involved:
+1) IOAPIC controller
+2) Interrupt remapping controller
+3) Local APIC controller
+
+To support such a hardware topology and make software architecture match
+hardware architecture, an irq_domain data structure is built for each
+interrupt controller and those irq_domains are organized into hierarchy.
+When building irq_domain hierarchy, the irq_domain near to the device is
+child and the irq_domain near to CPU is parent. So a hierarchy structure
+as below will be built for the example above.
+ CPU Vector irq_domain (root irq_domain to manage CPU vectors)
+ ^
+ |
+ Interrupt Remapping irq_domain (manage irq_remapping entries)
+ ^
+ |
+ IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
+
+There are four major interfaces to use hierarchy irq_domain:
+1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
+ controller related resources to deliver these interrupts.
+2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
+ related resources associated with these interrupts.
+3) irq_domain_activate_irq(): activate interrupt controller hardware to
+ deliver the interrupt.
+3) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
+ to stop delivering the interrupt.
+
+Following changes are needed to support hierarchy irq_domain.
+1) a new field 'parent' is added to struct irq_domain; it's used to
+ maintain irq_domain hierarchy information.
+2) a new field 'parent_data' is added to struct irq_data; it's used to
+ build hierarchy irq_data to match hierarchy irq_domains. The irq_data
+ is used to store irq_domain pointer and hardware irq number.
+3) new callbacks are added to struct irq_domain_ops to support hierarchy
+ irq_domain operations.
+
+With support of hierarchy irq_domain and hierarchy irq_data ready, an
+irq_domain structure is built for each interrupt controller, and an
+irq_data structure is allocated for each irq_domain associated with an
+IRQ. Now we could go one step further to support stacked(hierarchy)
+irq_chip. That is, an irq_chip is associated with each irq_data along
+the hierarchy. A child irq_chip may implement a required action by
+itself or by cooperating with its parent irq_chip.
+
+With stacked irq_chip, interrupt controller driver only needs to deal
+with the hardware managed by itself and may ask for services from its
+parent irq_chip when needed. So we could achieve a much cleaner
+software architecture.
+
+For an interrupt controller driver to support hierarchy irq_domain, it
+needs to:
+1) Implement irq_domain_ops.alloc and irq_domain_ops.free
+2) Optionally implement irq_domain_ops.activate and
+ irq_domain_ops.deactivate.
+3) Optionally implement an irq_chip to manage the interrupt controller
+ hardware.
+4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
+ they are unused with hierarchy irq_domain.
+
+Hierarchy irq_domain may also be used to support other architectures,
+such as ARM, ARM64 etc.
diff --git a/Documentation/IRQ.txt b/Documentation/IRQ.txt
new file mode 100644
index 000000000..1011e7175
--- /dev/null
+++ b/Documentation/IRQ.txt
@@ -0,0 +1,22 @@
+What is an IRQ?
+
+An IRQ is an interrupt request from a device.
+Currently they can come in over a pin, or over a packet.
+Several devices may be connected to the same pin thus
+sharing an IRQ.
+
+An IRQ number is a kernel identifier used to talk about a hardware
+interrupt source. Typically this is an index into the global irq_desc
+array, but except for what linux/interrupt.h implements the details
+are architecture specific.
+
+An IRQ number is an enumeration of the possible interrupt sources on a
+machine. Typically what is enumerated is the number of input pins on
+all of the interrupt controller in the system. In the case of ISA
+what is enumerated are the 16 input pins on the two i8259 interrupt
+controllers.
+
+Architectures can assign additional meaning to the IRQ numbers, and
+are encouraged to in the case where there is any manual configuration
+of the hardware involved. The ISA IRQs are a classic example of
+assigning this kind of additional meaning.
diff --git a/Documentation/Intel-IOMMU.txt b/Documentation/Intel-IOMMU.txt
new file mode 100644
index 000000000..cf9431db8
--- /dev/null
+++ b/Documentation/Intel-IOMMU.txt
@@ -0,0 +1,111 @@
+Linux IOMMU Support
+===================
+
+The architecture spec can be obtained from the below location.
+
+http://www.intel.com/technology/virtualization/
+
+This guide gives a quick cheat sheet for some basic understanding.
+
+Some Keywords
+
+DMAR - DMA remapping
+DRHD - DMA Engine Reporting Structure
+RMRR - Reserved memory Region Reporting Structure
+ZLR - Zero length reads from PCI devices
+IOVA - IO Virtual address.
+
+Basic stuff
+-----------
+
+ACPI enumerates and lists the different DMA engines in the platform, and
+device scope relationships between PCI devices and which DMA engine controls
+them.
+
+What is RMRR?
+-------------
+
+There are some devices the BIOS controls, for e.g USB devices to perform
+PS2 emulation. The regions of memory used for these devices are marked
+reserved in the e820 map. When we turn on DMA translation, DMA to those
+regions will fail. Hence BIOS uses RMRR to specify these regions along with
+devices that need to access these regions. OS is expected to setup
+unity mappings for these regions for these devices to access these regions.
+
+How is IOVA generated?
+---------------------
+
+Well behaved drivers call pci_map_*() calls before sending command to device
+that needs to perform DMA. Once DMA is completed and mapping is no longer
+required, device performs a pci_unmap_*() calls to unmap the region.
+
+The Intel IOMMU driver allocates a virtual address per domain. Each PCIE
+device has its own domain (hence protection). Devices under p2p bridges
+share the virtual address with all devices under the p2p bridge due to
+transaction id aliasing for p2p bridges.
+
+IOVA generation is pretty generic. We used the same technique as vmalloc()
+but these are not global address spaces, but separate for each domain.
+Different DMA engines may support different number of domains.
+
+We also allocate guard pages with each mapping, so we can attempt to catch
+any overflow that might happen.
+
+
+Graphics Problems?
+------------------
+If you encounter issues with graphics devices, you can try adding
+option intel_iommu=igfx_off to turn off the integrated graphics engine.
+If this fixes anything, please ensure you file a bug reporting the problem.
+
+Some exceptions to IOVA
+-----------------------
+Interrupt ranges are not address translated, (0xfee00000 - 0xfeefffff).
+The same is true for peer to peer transactions. Hence we reserve the
+address from PCI MMIO ranges so they are not allocated for IOVA addresses.
+
+
+Fault reporting
+---------------
+When errors are reported, the DMA engine signals via an interrupt. The fault
+reason and device that caused it with fault reason is printed on console.
+
+See below for sample.
+
+
+Boot Message Sample
+-------------------
+
+Something like this gets printed indicating presence of DMAR tables
+in ACPI.
+
+ACPI: DMAR (v001 A M I OEMDMAR 0x00000001 MSFT 0x00000097) @ 0x000000007f5b5ef0
+
+When DMAR is being processed and initialized by ACPI, prints DMAR locations
+and any RMRR's processed.
+
+ACPI DMAR:Host address width 36
+ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed90000
+ACPI DMAR:DRHD (flags: 0x00000000)base: 0x00000000fed91000
+ACPI DMAR:DRHD (flags: 0x00000001)base: 0x00000000fed93000
+ACPI DMAR:RMRR base: 0x00000000000ed000 end: 0x00000000000effff
+ACPI DMAR:RMRR base: 0x000000007f600000 end: 0x000000007fffffff
+
+When DMAR is enabled for use, you will notice..
+
+PCI-DMA: Using DMAR IOMMU
+
+Fault reporting
+---------------
+
+DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+DMAR:[fault reason 05] PTE Write access is not set
+DMAR:[DMA Write] Request device [00:02.0] fault addr 6df084000
+DMAR:[fault reason 05] PTE Write access is not set
+
+TBD
+----
+
+- For compatibility testing, could use unity map domain for all devices, just
+ provide a 1-1 for all useful memory under a single domain for all devices.
+- API for paravirt ops for abstracting functionality for VMM folks.
diff --git a/Documentation/Makefile b/Documentation/Makefile
new file mode 100644
index 000000000..e2127a76b
--- /dev/null
+++ b/Documentation/Makefile
@@ -0,0 +1,4 @@
+subdir-y := accounting auxdisplay blackfin connector \
+ filesystems filesystems ia64 kdbus laptops mic misc-devices \
+ networking pcmcia prctl ptp spi timers vDSO video4linux \
+ watchdog
diff --git a/Documentation/ManagementStyle b/Documentation/ManagementStyle
new file mode 100644
index 000000000..a211ee8d8
--- /dev/null
+++ b/Documentation/ManagementStyle
@@ -0,0 +1,276 @@
+
+ Linux kernel management style
+
+This is a short document describing the preferred (or made up, depending
+on who you ask) management style for the linux kernel. It's meant to
+mirror the CodingStyle document to some degree, and mainly written to
+avoid answering (*) the same (or similar) questions over and over again.
+
+Management style is very personal and much harder to quantify than
+simple coding style rules, so this document may or may not have anything
+to do with reality. It started as a lark, but that doesn't mean that it
+might not actually be true. You'll have to decide for yourself.
+
+Btw, when talking about "kernel manager", it's all about the technical
+lead persons, not the people who do traditional management inside
+companies. If you sign purchase orders or you have any clue about the
+budget of your group, you're almost certainly not a kernel manager.
+These suggestions may or may not apply to you.
+
+First off, I'd suggest buying "Seven Habits of Highly Effective
+People", and NOT read it. Burn it, it's a great symbolic gesture.
+
+(*) This document does so not so much by answering the question, but by
+making it painfully obvious to the questioner that we don't have a clue
+to what the answer is.
+
+Anyway, here goes:
+
+
+ Chapter 1: Decisions
+
+Everybody thinks managers make decisions, and that decision-making is
+important. The bigger and more painful the decision, the bigger the
+manager must be to make it. That's very deep and obvious, but it's not
+actually true.
+
+The name of the game is to _avoid_ having to make a decision. In
+particular, if somebody tells you "choose (a) or (b), we really need you
+to decide on this", you're in trouble as a manager. The people you
+manage had better know the details better than you, so if they come to
+you for a technical decision, you're screwed. You're clearly not
+competent to make that decision for them.
+
+(Corollary:if the people you manage don't know the details better than
+you, you're also screwed, although for a totally different reason.
+Namely that you are in the wrong job, and that _they_ should be managing
+your brilliance instead).
+
+So the name of the game is to _avoid_ decisions, at least the big and
+painful ones. Making small and non-consequential decisions is fine, and
+makes you look like you know what you're doing, so what a kernel manager
+needs to do is to turn the big and painful ones into small things where
+nobody really cares.
+
+It helps to realize that the key difference between a big decision and a
+small one is whether you can fix your decision afterwards. Any decision
+can be made small by just always making sure that if you were wrong (and
+you _will_ be wrong), you can always undo the damage later by
+backtracking. Suddenly, you get to be doubly managerial for making
+_two_ inconsequential decisions - the wrong one _and_ the right one.
+
+And people will even see that as true leadership (*cough* bullshit
+*cough*).
+
+Thus the key to avoiding big decisions becomes to just avoiding to do
+things that can't be undone. Don't get ushered into a corner from which
+you cannot escape. A cornered rat may be dangerous - a cornered manager
+is just pitiful.
+
+It turns out that since nobody would be stupid enough to ever really let
+a kernel manager have huge fiscal responsibility _anyway_, it's usually
+fairly easy to backtrack. Since you're not going to be able to waste
+huge amounts of money that you might not be able to repay, the only
+thing you can backtrack on is a technical decision, and there
+back-tracking is very easy: just tell everybody that you were an
+incompetent nincompoop, say you're sorry, and undo all the worthless
+work you had people work on for the last year. Suddenly the decision
+you made a year ago wasn't a big decision after all, since it could be
+easily undone.
+
+It turns out that some people have trouble with this approach, for two
+reasons:
+ - admitting you were an idiot is harder than it looks. We all like to
+ maintain appearances, and coming out in public to say that you were
+ wrong is sometimes very hard indeed.
+ - having somebody tell you that what you worked on for the last year
+ wasn't worthwhile after all can be hard on the poor lowly engineers
+ too, and while the actual _work_ was easy enough to undo by just
+ deleting it, you may have irrevocably lost the trust of that
+ engineer. And remember: "irrevocable" was what we tried to avoid in
+ the first place, and your decision ended up being a big one after
+ all.
+
+Happily, both of these reasons can be mitigated effectively by just
+admitting up-front that you don't have a friggin' clue, and telling
+people ahead of the fact that your decision is purely preliminary, and
+might be the wrong thing. You should always reserve the right to change
+your mind, and make people very _aware_ of that. And it's much easier
+to admit that you are stupid when you haven't _yet_ done the really
+stupid thing.
+
+Then, when it really does turn out to be stupid, people just roll their
+eyes and say "Oops, he did it again".
+
+This preemptive admission of incompetence might also make the people who
+actually do the work also think twice about whether it's worth doing or
+not. After all, if _they_ aren't certain whether it's a good idea, you
+sure as hell shouldn't encourage them by promising them that what they
+work on will be included. Make them at least think twice before they
+embark on a big endeavor.
+
+Remember: they'd better know more about the details than you do, and
+they usually already think they have the answer to everything. The best
+thing you can do as a manager is not to instill confidence, but rather a
+healthy dose of critical thinking on what they do.
+
+Btw, another way to avoid a decision is to plaintively just whine "can't
+we just do both?" and look pitiful. Trust me, it works. If it's not
+clear which approach is better, they'll eventually figure it out. The
+answer may end up being that both teams get so frustrated by the
+situation that they just give up.
+
+That may sound like a failure, but it's usually a sign that there was
+something wrong with both projects, and the reason the people involved
+couldn't decide was that they were both wrong. You end up coming up
+smelling like roses, and you avoided yet another decision that you could
+have screwed up on.
+
+
+ Chapter 2: People
+
+Most people are idiots, and being a manager means you'll have to deal
+with it, and perhaps more importantly, that _they_ have to deal with
+_you_.
+
+It turns out that while it's easy to undo technical mistakes, it's not
+as easy to undo personality disorders. You just have to live with
+theirs - and yours.
+
+However, in order to prepare yourself as a kernel manager, it's best to
+remember not to burn any bridges, bomb any innocent villagers, or
+alienate too many kernel developers. It turns out that alienating people
+is fairly easy, and un-alienating them is hard. Thus "alienating"
+immediately falls under the heading of "not reversible", and becomes a
+no-no according to Chapter 1.
+
+There's just a few simple rules here:
+ (1) don't call people d*ckheads (at least not in public)
+ (2) learn how to apologize when you forgot rule (1)
+
+The problem with #1 is that it's very easy to do, since you can say
+"you're a d*ckhead" in millions of different ways (*), sometimes without
+even realizing it, and almost always with a white-hot conviction that
+you are right.
+
+And the more convinced you are that you are right (and let's face it,
+you can call just about _anybody_ a d*ckhead, and you often _will_ be
+right), the harder it ends up being to apologize afterwards.
+
+To solve this problem, you really only have two options:
+ - get really good at apologies
+ - spread the "love" out so evenly that nobody really ends up feeling
+ like they get unfairly targeted. Make it inventive enough, and they
+ might even be amused.
+
+The option of being unfailingly polite really doesn't exist. Nobody will
+trust somebody who is so clearly hiding his true character.
+
+(*) Paul Simon sang "Fifty Ways to Leave Your Lover", because quite
+frankly, "A Million Ways to Tell a Developer He Is a D*ckhead" doesn't
+scan nearly as well. But I'm sure he thought about it.
+
+
+ Chapter 3: People II - the Good Kind
+
+While it turns out that most people are idiots, the corollary to that is
+sadly that you are one too, and that while we can all bask in the secure
+knowledge that we're better than the average person (let's face it,
+nobody ever believes that they're average or below-average), we should
+also admit that we're not the sharpest knife around, and there will be
+other people that are less of an idiot than you are.
+
+Some people react badly to smart people. Others take advantage of them.
+
+Make sure that you, as a kernel maintainer, are in the second group.
+Suck up to them, because they are the people who will make your job
+easier. In particular, they'll be able to make your decisions for you,
+which is what the game is all about.
+
+So when you find somebody smarter than you are, just coast along. Your
+management responsibilities largely become ones of saying "Sounds like a
+good idea - go wild", or "That sounds good, but what about xxx?". The
+second version in particular is a great way to either learn something
+new about "xxx" or seem _extra_ managerial by pointing out something the
+smarter person hadn't thought about. In either case, you win.
+
+One thing to look out for is to realize that greatness in one area does
+not necessarily translate to other areas. So you might prod people in
+specific directions, but let's face it, they might be good at what they
+do, and suck at everything else. The good news is that people tend to
+naturally gravitate back to what they are good at, so it's not like you
+are doing something irreversible when you _do_ prod them in some
+direction, just don't push too hard.
+
+
+ Chapter 4: Placing blame
+
+Things will go wrong, and people want somebody to blame. Tag, you're it.
+
+It's not actually that hard to accept the blame, especially if people
+kind of realize that it wasn't _all_ your fault. Which brings us to the
+best way of taking the blame: do it for another guy. You'll feel good
+for taking the fall, he'll feel good about not getting blamed, and the
+guy who lost his whole 36GB porn-collection because of your incompetence
+will grudgingly admit that you at least didn't try to weasel out of it.
+
+Then make the developer who really screwed up (if you can find him) know
+_in_private_ that he screwed up. Not just so he can avoid it in the
+future, but so that he knows he owes you one. And, perhaps even more
+importantly, he's also likely the person who can fix it. Because, let's
+face it, it sure ain't you.
+
+Taking the blame is also why you get to be manager in the first place.
+It's part of what makes people trust you, and allow you the potential
+glory, because you're the one who gets to say "I screwed up". And if
+you've followed the previous rules, you'll be pretty good at saying that
+by now.
+
+
+ Chapter 5: Things to avoid
+
+There's one thing people hate even more than being called "d*ckhead",
+and that is being called a "d*ckhead" in a sanctimonious voice. The
+first you can apologize for, the second one you won't really get the
+chance. They likely will no longer be listening even if you otherwise
+do a good job.
+
+We all think we're better than anybody else, which means that when
+somebody else puts on airs, it _really_ rubs us the wrong way. You may
+be morally and intellectually superior to everybody around you, but
+don't try to make it too obvious unless you really _intend_ to irritate
+somebody (*).
+
+Similarly, don't be too polite or subtle about things. Politeness easily
+ends up going overboard and hiding the problem, and as they say, "On the
+internet, nobody can hear you being subtle". Use a big blunt object to
+hammer the point in, because you can't really depend on people getting
+your point otherwise.
+
+Some humor can help pad both the bluntness and the moralizing. Going
+overboard to the point of being ridiculous can drive a point home
+without making it painful to the recipient, who just thinks you're being
+silly. It can thus help get through the personal mental block we all
+have about criticism.
+
+(*) Hint: internet newsgroups that are not directly related to your work
+are great ways to take out your frustrations at other people. Write
+insulting posts with a sneer just to get into a good flame every once in
+a while, and you'll feel cleansed. Just don't crap too close to home.
+
+
+ Chapter 6: Why me?
+
+Since your main responsibility seems to be to take the blame for other
+peoples mistakes, and make it painfully obvious to everybody else that
+you're incompetent, the obvious question becomes one of why do it in the
+first place?
+
+First off, while you may or may not get screaming teenage girls (or
+boys, let's not be judgmental or sexist here) knocking on your dressing
+room door, you _will_ get an immense feeling of personal accomplishment
+for being "in charge". Never mind the fact that you're really leading
+by trying to keep up with everybody else and running after them as fast
+as you can. Everybody will still think you're the person in charge.
+
+It's a great job if you can hack it.
diff --git a/Documentation/PCI/00-INDEX b/Documentation/PCI/00-INDEX
new file mode 100644
index 000000000..147231f16
--- /dev/null
+++ b/Documentation/PCI/00-INDEX
@@ -0,0 +1,14 @@
+00-INDEX
+ - this file
+MSI-HOWTO.txt
+ - the Message Signaled Interrupts (MSI) Driver Guide HOWTO and FAQ.
+PCIEBUS-HOWTO.txt
+ - a guide describing the PCI Express Port Bus driver
+pci-error-recovery.txt
+ - info on PCI error recovery
+pci-iov-howto.txt
+ - the PCI Express I/O Virtualization HOWTO
+pci.txt
+ - info on the PCI subsystem for device driver authors
+pcieaer-howto.txt
+ - the PCI Express Advanced Error Reporting Driver Guide HOWTO
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
new file mode 100644
index 000000000..1179850f4
--- /dev/null
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -0,0 +1,587 @@
+ The MSI Driver Guide HOWTO
+ Tom L Nguyen tom.l.nguyen@intel.com
+ 10/03/2003
+ Revised Feb 12, 2004 by Martine Silbermann
+ email: Martine.Silbermann@hp.com
+ Revised Jun 25, 2004 by Tom L Nguyen
+ Revised Jul 9, 2008 by Matthew Wilcox <willy@linux.intel.com>
+ Copyright 2003, 2008 Intel Corporation
+
+1. About this guide
+
+This guide describes the basics of Message Signaled Interrupts (MSIs),
+the advantages of using MSI over traditional interrupt mechanisms, how
+to change your driver to use MSI or MSI-X and some basic diagnostics to
+try if a device doesn't support MSIs.
+
+
+2. What are MSIs?
+
+A Message Signaled Interrupt is a write from the device to a special
+address which causes an interrupt to be received by the CPU.
+
+The MSI capability was first specified in PCI 2.2 and was later enhanced
+in PCI 3.0 to allow each interrupt to be masked individually. The MSI-X
+capability was also introduced with PCI 3.0. It supports more interrupts
+per device than MSI and allows interrupts to be independently configured.
+
+Devices may support both MSI and MSI-X, but only one can be enabled at
+a time.
+
+
+3. Why use MSIs?
+
+There are three reasons why using MSIs can give an advantage over
+traditional pin-based interrupts.
+
+Pin-based PCI interrupts are often shared amongst several devices.
+To support this, the kernel must call each interrupt handler associated
+with an interrupt, which leads to reduced performance for the system as
+a whole. MSIs are never shared, so this problem cannot arise.
+
+When a device writes data to memory, then raises a pin-based interrupt,
+it is possible that the interrupt may arrive before all the data has
+arrived in memory (this becomes more likely with devices behind PCI-PCI
+bridges). In order to ensure that all the data has arrived in memory,
+the interrupt handler must read a register on the device which raised
+the interrupt. PCI transaction ordering rules require that all the data
+arrive in memory before the value may be returned from the register.
+Using MSIs avoids this problem as the interrupt-generating write cannot
+pass the data writes, so by the time the interrupt is raised, the driver
+knows that all the data has arrived in memory.
+
+PCI devices can only support a single pin-based interrupt per function.
+Often drivers have to query the device to find out what event has
+occurred, slowing down interrupt handling for the common case. With
+MSIs, a device can support more interrupts, allowing each interrupt
+to be specialised to a different purpose. One possible design gives
+infrequent conditions (such as errors) their own interrupt which allows
+the driver to handle the normal interrupt handling path more efficiently.
+Other possible designs include giving one interrupt to each packet queue
+in a network card or each port in a storage controller.
+
+
+4. How to use MSIs
+
+PCI devices are initialised to use pin-based interrupts. The device
+driver has to set up the device to use MSI or MSI-X. Not all machines
+support MSIs correctly, and for those machines, the APIs described below
+will simply fail and the device will continue to use pin-based interrupts.
+
+4.1 Include kernel support for MSIs
+
+To support MSI or MSI-X, the kernel must be built with the CONFIG_PCI_MSI
+option enabled. This option is only available on some architectures,
+and it may depend on some other options also being set. For example,
+on x86, you must also enable X86_UP_APIC or SMP in order to see the
+CONFIG_PCI_MSI option.
+
+4.2 Using MSI
+
+Most of the hard work is done for the driver in the PCI layer. It simply
+has to request that the PCI layer set up the MSI capability for this
+device.
+
+4.2.1 pci_enable_msi
+
+int pci_enable_msi(struct pci_dev *dev)
+
+A successful call allocates ONE interrupt to the device, regardless
+of how many MSIs the device supports. The device is switched from
+pin-based interrupt mode to MSI mode. The dev->irq number is changed
+to a new number which represents the message signaled interrupt;
+consequently, this function should be called before the driver calls
+request_irq(), because an MSI is delivered via a vector that is
+different from the vector of a pin-based interrupt.
+
+4.2.2 pci_enable_msi_range
+
+int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
+
+This function allows a device driver to request any number of MSI
+interrupts within specified range from 'minvec' to 'maxvec'.
+
+If this function returns a positive number it indicates the number of
+MSI interrupts that have been successfully allocated. In this case
+the device is switched from pin-based interrupt mode to MSI mode and
+updates dev->irq to be the lowest of the new interrupts assigned to it.
+The other interrupts assigned to the device are in the range dev->irq
+to dev->irq + returned value - 1. Device driver can use the returned
+number of successfully allocated MSI interrupts to further allocate
+and initialize device resources.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.
+
+This function should be called before the driver calls request_irq(),
+because MSI interrupts are delivered via vectors that are different
+from the vector of a pin-based interrupt.
+
+It is ideal if drivers can cope with a variable number of MSI interrupts;
+there are many reasons why the platform may not be able to provide the
+exact number that a driver asks for.
+
+There could be devices that can not operate with just any number of MSI
+interrupts within a range. See chapter 4.3.1.3 to get the idea how to
+handle such devices for MSI-X - the same logic applies to MSI.
+
+4.2.1.1 Maximum possible number of MSI interrupts
+
+The typical usage of MSI interrupts is to allocate as many vectors as
+possible, likely up to the limit returned by pci_msi_vec_count() function:
+
+static int foo_driver_enable_msi(struct pci_dev *pdev, int nvec)
+{
+ return pci_enable_msi_range(pdev, 1, nvec);
+}
+
+Note the value of 'minvec' parameter is 1. As 'minvec' is inclusive,
+the value of 0 would be meaningless and could result in error.
+
+Some devices have a minimal limit on number of MSI interrupts.
+In this case the function could look like this:
+
+static int foo_driver_enable_msi(struct pci_dev *pdev, int nvec)
+{
+ return pci_enable_msi_range(pdev, FOO_DRIVER_MINIMUM_NVEC, nvec);
+}
+
+4.2.1.2 Exact number of MSI interrupts
+
+If a driver is unable or unwilling to deal with a variable number of MSI
+interrupts it could request a particular number of interrupts by passing
+that number to pci_enable_msi_range() function as both 'minvec' and 'maxvec'
+parameters:
+
+static int foo_driver_enable_msi(struct pci_dev *pdev, int nvec)
+{
+ return pci_enable_msi_range(pdev, nvec, nvec);
+}
+
+Note, unlike pci_enable_msi_exact() function, which could be also used to
+enable a particular number of MSI-X interrupts, pci_enable_msi_range()
+returns either a negative errno or 'nvec' (not negative errno or 0 - as
+pci_enable_msi_exact() does).
+
+4.2.1.3 Single MSI mode
+
+The most notorious example of the request type described above is
+enabling the single MSI mode for a device. It could be done by passing
+two 1s as 'minvec' and 'maxvec':
+
+static int foo_driver_enable_single_msi(struct pci_dev *pdev)
+{
+ return pci_enable_msi_range(pdev, 1, 1);
+}
+
+Note, unlike pci_enable_msi() function, which could be also used to
+enable the single MSI mode, pci_enable_msi_range() returns either a
+negative errno or 1 (not negative errno or 0 - as pci_enable_msi()
+does).
+
+4.2.3 pci_enable_msi_exact
+
+int pci_enable_msi_exact(struct pci_dev *dev, int nvec)
+
+This variation on pci_enable_msi_range() call allows a device driver to
+request exactly 'nvec' MSIs.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to request any more MSI interrupts for
+this device.
+
+By contrast with pci_enable_msi_range() function, pci_enable_msi_exact()
+returns zero in case of success, which indicates MSI interrupts have been
+successfully allocated.
+
+4.2.4 pci_disable_msi
+
+void pci_disable_msi(struct pci_dev *dev)
+
+This function should be used to undo the effect of pci_enable_msi_range().
+Calling it restores dev->irq to the pin-based interrupt number and frees
+the previously allocated MSIs. The interrupts may subsequently be assigned
+to another device, so drivers should not cache the value of dev->irq.
+
+Before calling this function, a device driver must always call free_irq()
+on any interrupt for which it previously called request_irq().
+Failure to do so results in a BUG_ON(), leaving the device with
+MSI enabled and thus leaking its vector.
+
+4.2.4 pci_msi_vec_count
+
+int pci_msi_vec_count(struct pci_dev *dev)
+
+This function could be used to retrieve the number of MSI vectors the
+device requested (via the Multiple Message Capable register). The MSI
+specification only allows the returned value to be a power of two,
+up to a maximum of 2^5 (32).
+
+If this function returns a negative number, it indicates the device is
+not capable of sending MSIs.
+
+If this function returns a positive number, it indicates the maximum
+number of MSI interrupt vectors that could be allocated.
+
+4.3 Using MSI-X
+
+The MSI-X capability is much more flexible than the MSI capability.
+It supports up to 2048 interrupts, each of which can be controlled
+independently. To support this flexibility, drivers must use an array of
+`struct msix_entry':
+
+struct msix_entry {
+ u16 vector; /* kernel uses to write alloc vector */
+ u16 entry; /* driver uses to specify entry */
+};
+
+This allows for the device to use these interrupts in a sparse fashion;
+for example, it could use interrupts 3 and 1027 and yet allocate only a
+two-element array. The driver is expected to fill in the 'entry' value
+in each element of the array to indicate for which entries the kernel
+should assign interrupts; it is invalid to fill in two entries with the
+same number.
+
+4.3.1 pci_enable_msix_range
+
+int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
+ int minvec, int maxvec)
+
+Calling this function asks the PCI subsystem to allocate any number of
+MSI-X interrupts within specified range from 'minvec' to 'maxvec'.
+The 'entries' argument is a pointer to an array of msix_entry structs
+which should be at least 'maxvec' entries in size.
+
+On success, the device is switched into MSI-X mode and the function
+returns the number of MSI-X interrupts that have been successfully
+allocated. In this case the 'vector' member in entries numbered from
+0 to the returned value - 1 is populated with the interrupt number;
+the driver should then call request_irq() for each 'vector' that it
+decides to use. The device driver is responsible for keeping track of the
+interrupts assigned to the MSI-X vectors so it can free them again later.
+Device driver can use the returned number of successfully allocated MSI-X
+interrupts to further allocate and initialize device resources.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to allocate any more MSI-X interrupts for
+this device.
+
+This function, in contrast with pci_enable_msi_range(), does not adjust
+dev->irq. The device will not generate interrupts for this interrupt
+number once MSI-X is enabled.
+
+Device drivers should normally call this function once per device
+during the initialization phase.
+
+It is ideal if drivers can cope with a variable number of MSI-X interrupts;
+there are many reasons why the platform may not be able to provide the
+exact number that a driver asks for.
+
+There could be devices that can not operate with just any number of MSI-X
+interrupts within a range. E.g., an network adapter might need let's say
+four vectors per each queue it provides. Therefore, a number of MSI-X
+interrupts allocated should be a multiple of four. In this case interface
+pci_enable_msix_range() can not be used alone to request MSI-X interrupts
+(since it can allocate any number within the range, without any notion of
+the multiple of four) and the device driver should master a custom logic
+to request the required number of MSI-X interrupts.
+
+4.3.1.1 Maximum possible number of MSI-X interrupts
+
+The typical usage of MSI-X interrupts is to allocate as many vectors as
+possible, likely up to the limit returned by pci_msix_vec_count() function:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+ return pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+ 1, nvec);
+}
+
+Note the value of 'minvec' parameter is 1. As 'minvec' is inclusive,
+the value of 0 would be meaningless and could result in error.
+
+Some devices have a minimal limit on number of MSI-X interrupts.
+In this case the function could look like this:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+ return pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+ FOO_DRIVER_MINIMUM_NVEC, nvec);
+}
+
+4.3.1.2 Exact number of MSI-X interrupts
+
+If a driver is unable or unwilling to deal with a variable number of MSI-X
+interrupts it could request a particular number of interrupts by passing
+that number to pci_enable_msix_range() function as both 'minvec' and 'maxvec'
+parameters:
+
+static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec)
+{
+ return pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+ nvec, nvec);
+}
+
+Note, unlike pci_enable_msix_exact() function, which could be also used to
+enable a particular number of MSI-X interrupts, pci_enable_msix_range()
+returns either a negative errno or 'nvec' (not negative errno or 0 - as
+pci_enable_msix_exact() does).
+
+4.3.1.3 Specific requirements to the number of MSI-X interrupts
+
+As noted above, there could be devices that can not operate with just any
+number of MSI-X interrupts within a range. E.g., let's assume a device that
+is only capable sending the number of MSI-X interrupts which is a power of
+two. A routine that enables MSI-X mode for such device might look like this:
+
+/*
+ * Assume 'minvec' and 'maxvec' are non-zero
+ */
+static int foo_driver_enable_msix(struct foo_adapter *adapter,
+ int minvec, int maxvec)
+{
+ int rc;
+
+ minvec = roundup_pow_of_two(minvec);
+ maxvec = rounddown_pow_of_two(maxvec);
+
+ if (minvec > maxvec)
+ return -ERANGE;
+
+retry:
+ rc = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
+ maxvec, maxvec);
+ /*
+ * -ENOSPC is the only error code allowed to be analyzed
+ */
+ if (rc == -ENOSPC) {
+ if (maxvec == 1)
+ return -ENOSPC;
+
+ maxvec /= 2;
+
+ if (minvec > maxvec)
+ return -ENOSPC;
+
+ goto retry;
+ }
+
+ return rc;
+}
+
+Note how pci_enable_msix_range() return value is analyzed for a fallback -
+any error code other than -ENOSPC indicates a fatal error and should not
+be retried.
+
+4.3.2 pci_enable_msix_exact
+
+int pci_enable_msix_exact(struct pci_dev *dev,
+ struct msix_entry *entries, int nvec)
+
+This variation on pci_enable_msix_range() call allows a device driver to
+request exactly 'nvec' MSI-Xs.
+
+If this function returns a negative number, it indicates an error and
+the driver should not attempt to allocate any more MSI-X interrupts for
+this device.
+
+By contrast with pci_enable_msix_range() function, pci_enable_msix_exact()
+returns zero in case of success, which indicates MSI-X interrupts have been
+successfully allocated.
+
+Another version of a routine that enables MSI-X mode for a device with
+specific requirements described in chapter 4.3.1.3 might look like this:
+
+/*
+ * Assume 'minvec' and 'maxvec' are non-zero
+ */
+static int foo_driver_enable_msix(struct foo_adapter *adapter,
+ int minvec, int maxvec)
+{
+ int rc;
+
+ minvec = roundup_pow_of_two(minvec);
+ maxvec = rounddown_pow_of_two(maxvec);
+
+ if (minvec > maxvec)
+ return -ERANGE;
+
+retry:
+ rc = pci_enable_msix_exact(adapter->pdev,
+ adapter->msix_entries, maxvec);
+
+ /*
+ * -ENOSPC is the only error code allowed to be analyzed
+ */
+ if (rc == -ENOSPC) {
+ if (maxvec == 1)
+ return -ENOSPC;
+
+ maxvec /= 2;
+
+ if (minvec > maxvec)
+ return -ENOSPC;
+
+ goto retry;
+ } else if (rc < 0) {
+ return rc;
+ }
+
+ return maxvec;
+}
+
+4.3.3 pci_disable_msix
+
+void pci_disable_msix(struct pci_dev *dev)
+
+This function should be used to undo the effect of pci_enable_msix_range().
+It frees the previously allocated MSI-X interrupts. The interrupts may
+subsequently be assigned to another device, so drivers should not cache
+the value of the 'vector' elements over a call to pci_disable_msix().
+
+Before calling this function, a device driver must always call free_irq()
+on any interrupt for which it previously called request_irq().
+Failure to do so results in a BUG_ON(), leaving the device with
+MSI-X enabled and thus leaking its vector.
+
+4.3.3 The MSI-X Table
+
+The MSI-X capability specifies a BAR and offset within that BAR for the
+MSI-X Table. This address is mapped by the PCI subsystem, and should not
+be accessed directly by the device driver. If the driver wishes to
+mask or unmask an interrupt, it should call disable_irq() / enable_irq().
+
+4.3.4 pci_msix_vec_count
+
+int pci_msix_vec_count(struct pci_dev *dev)
+
+This function could be used to retrieve number of entries in the device
+MSI-X table.
+
+If this function returns a negative number, it indicates the device is
+not capable of sending MSI-Xs.
+
+If this function returns a positive number, it indicates the maximum
+number of MSI-X interrupt vectors that could be allocated.
+
+4.4 Handling devices implementing both MSI and MSI-X capabilities
+
+If a device implements both MSI and MSI-X capabilities, it can
+run in either MSI mode or MSI-X mode, but not both simultaneously.
+This is a requirement of the PCI spec, and it is enforced by the
+PCI layer. Calling pci_enable_msi_range() when MSI-X is already
+enabled or pci_enable_msix_range() when MSI is already enabled
+results in an error. If a device driver wishes to switch between MSI
+and MSI-X at runtime, it must first quiesce the device, then switch
+it back to pin-interrupt mode, before calling pci_enable_msi_range()
+or pci_enable_msix_range() and resuming operation. This is not expected
+to be a common operation but may be useful for debugging or testing
+during development.
+
+4.5 Considerations when using MSIs
+
+4.5.1 Choosing between MSI-X and MSI
+
+If your device supports both MSI-X and MSI capabilities, you should use
+the MSI-X facilities in preference to the MSI facilities. As mentioned
+above, MSI-X supports any number of interrupts between 1 and 2048.
+In contrast, MSI is restricted to a maximum of 32 interrupts (and
+must be a power of two). In addition, the MSI interrupt vectors must
+be allocated consecutively, so the system might not be able to allocate
+as many vectors for MSI as it could for MSI-X. On some platforms, MSI
+interrupts must all be targeted at the same set of CPUs whereas MSI-X
+interrupts can all be targeted at different CPUs.
+
+4.5.2 Spinlocks
+
+Most device drivers have a per-device spinlock which is taken in the
+interrupt handler. With pin-based interrupts or a single MSI, it is not
+necessary to disable interrupts (Linux guarantees the same interrupt will
+not be re-entered). If a device uses multiple interrupts, the driver
+must disable interrupts while the lock is held. If the device sends
+a different interrupt, the driver will deadlock trying to recursively
+acquire the spinlock. Such deadlocks can be avoided by using
+spin_lock_irqsave() or spin_lock_irq() which disable local interrupts
+and acquire the lock (see Documentation/DocBook/kernel-locking).
+
+4.6 How to tell whether MSI/MSI-X is enabled on a device
+
+Using 'lspci -v' (as root) may show some devices with "MSI", "Message
+Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities
+has an 'Enable' flag which is followed with either "+" (enabled)
+or "-" (disabled).
+
+
+5. MSI quirks
+
+Several PCI chipsets or devices are known not to support MSIs.
+The PCI stack provides three ways to disable MSIs:
+
+1. globally
+2. on all devices behind a specific bridge
+3. on a single device
+
+5.1. Disabling MSIs globally
+
+Some host chipsets simply don't support MSIs properly. If we're
+lucky, the manufacturer knows this and has indicated it in the ACPI
+FADT table. In this case, Linux automatically disables MSIs.
+Some boards don't include this information in the table and so we have
+to detect them ourselves. The complete list of these is found near the
+quirk_disable_all_msi() function in drivers/pci/quirks.c.
+
+If you have a board which has problems with MSIs, you can pass pci=nomsi
+on the kernel command line to disable MSIs on all devices. It would be
+in your best interests to report the problem to linux-pci@vger.kernel.org
+including a full 'lspci -v' so we can add the quirks to the kernel.
+
+5.2. Disabling MSIs below a bridge
+
+Some PCI bridges are not able to route MSIs between busses properly.
+In this case, MSIs must be disabled on all devices behind the bridge.
+
+Some bridges allow you to enable MSIs by changing some bits in their
+PCI configuration space (especially the Hypertransport chipsets such
+as the nVidia nForce and Serverworks HT2000). As with host chipsets,
+Linux mostly knows about them and automatically enables MSIs if it can.
+If you have a bridge unknown to Linux, you can enable
+MSIs in configuration space using whatever method you know works, then
+enable MSIs on that bridge by doing:
+
+ echo 1 > /sys/bus/pci/devices/$bridge/msi_bus
+
+where $bridge is the PCI address of the bridge you've enabled (eg
+0000:00:0e.0).
+
+To disable MSIs, echo 0 instead of 1. Changing this value should be
+done with caution as it could break interrupt handling for all devices
+below this bridge.
+
+Again, please notify linux-pci@vger.kernel.org of any bridges that need
+special handling.
+
+5.3. Disabling MSIs on a single device
+
+Some devices are known to have faulty MSI implementations. Usually this
+is handled in the individual device driver, but occasionally it's necessary
+to handle this with a quirk. Some drivers have an option to disable use
+of MSI. While this is a convenient workaround for the driver author,
+it is not good practice, and should not be emulated.
+
+5.4. Finding why MSIs are disabled on a device
+
+From the above three sections, you can see that there are many reasons
+why MSIs may not be enabled for a given device. Your first step should
+be to examine your dmesg carefully to determine whether MSIs are enabled
+for your machine. You should also check your .config to be sure you
+have enabled CONFIG_PCI_MSI.
+
+Then, 'lspci -t' gives the list of bridges above a device. Reading
+/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1)
+or disabled (0). If 0 is found in any of the msi_bus files belonging
+to bridges between the PCI root and the device, MSIs are disabled.
+
+It is also worth checking the device driver to see whether it supports MSIs.
+For example, it may contain calls to pci_enable_msi_range() or
+pci_enable_msix_range().
diff --git a/Documentation/PCI/PCIEBUS-HOWTO.txt b/Documentation/PCI/PCIEBUS-HOWTO.txt
new file mode 100644
index 000000000..6bd5f372a
--- /dev/null
+++ b/Documentation/PCI/PCIEBUS-HOWTO.txt
@@ -0,0 +1,217 @@
+ The PCI Express Port Bus Driver Guide HOWTO
+ Tom L Nguyen tom.l.nguyen@intel.com
+ 11/03/2004
+
+1. About this guide
+
+This guide describes the basics of the PCI Express Port Bus driver
+and provides information on how to enable the service drivers to
+register/unregister with the PCI Express Port Bus Driver.
+
+2. Copyright 2004 Intel Corporation
+
+3. What is the PCI Express Port Bus Driver
+
+A PCI Express Port is a logical PCI-PCI Bridge structure. There
+are two types of PCI Express Port: the Root Port and the Switch
+Port. The Root Port originates a PCI Express link from a PCI Express
+Root Complex and the Switch Port connects PCI Express links to
+internal logical PCI buses. The Switch Port, which has its secondary
+bus representing the switch's internal routing logic, is called the
+switch's Upstream Port. The switch's Downstream Port is bridging from
+switch's internal routing bus to a bus representing the downstream
+PCI Express link from the PCI Express Switch.
+
+A PCI Express Port can provide up to four distinct functions,
+referred to in this document as services, depending on its port type.
+PCI Express Port's services include native hotplug support (HP),
+power management event support (PME), advanced error reporting
+support (AER), and virtual channel support (VC). These services may
+be handled by a single complex driver or be individually distributed
+and handled by corresponding service drivers.
+
+4. Why use the PCI Express Port Bus Driver?
+
+In existing Linux kernels, the Linux Device Driver Model allows a
+physical device to be handled by only a single driver. The PCI
+Express Port is a PCI-PCI Bridge device with multiple distinct
+services. To maintain a clean and simple solution each service
+may have its own software service driver. In this case several
+service drivers will compete for a single PCI-PCI Bridge device.
+For example, if the PCI Express Root Port native hotplug service
+driver is loaded first, it claims a PCI-PCI Bridge Root Port. The
+kernel therefore does not load other service drivers for that Root
+Port. In other words, it is impossible to have multiple service
+drivers load and run on a PCI-PCI Bridge device simultaneously
+using the current driver model.
+
+To enable multiple service drivers running simultaneously requires
+having a PCI Express Port Bus driver, which manages all populated
+PCI Express Ports and distributes all provided service requests
+to the corresponding service drivers as required. Some key
+advantages of using the PCI Express Port Bus driver are listed below:
+
+ - Allow multiple service drivers to run simultaneously on
+ a PCI-PCI Bridge Port device.
+
+ - Allow service drivers implemented in an independent
+ staged approach.
+
+ - Allow one service driver to run on multiple PCI-PCI Bridge
+ Port devices.
+
+ - Manage and distribute resources of a PCI-PCI Bridge Port
+ device to requested service drivers.
+
+5. Configuring the PCI Express Port Bus Driver vs. Service Drivers
+
+5.1 Including the PCI Express Port Bus Driver Support into the Kernel
+
+Including the PCI Express Port Bus driver depends on whether the PCI
+Express support is included in the kernel config. The kernel will
+automatically include the PCI Express Port Bus driver as a kernel
+driver when the PCI Express support is enabled in the kernel.
+
+5.2 Enabling Service Driver Support
+
+PCI device drivers are implemented based on Linux Device Driver Model.
+All service drivers are PCI device drivers. As discussed above, it is
+impossible to load any service driver once the kernel has loaded the
+PCI Express Port Bus Driver. To meet the PCI Express Port Bus Driver
+Model requires some minimal changes on existing service drivers that
+imposes no impact on the functionality of existing service drivers.
+
+A service driver is required to use the two APIs shown below to
+register its service with the PCI Express Port Bus driver (see
+section 5.2.1 & 5.2.2). It is important that a service driver
+initializes the pcie_port_service_driver data structure, included in
+header file /include/linux/pcieport_if.h, before calling these APIs.
+Failure to do so will result an identity mismatch, which prevents
+the PCI Express Port Bus driver from loading a service driver.
+
+5.2.1 pcie_port_service_register
+
+int pcie_port_service_register(struct pcie_port_service_driver *new)
+
+This API replaces the Linux Driver Model's pci_register_driver API. A
+service driver should always calls pcie_port_service_register at
+module init. Note that after service driver being loaded, calls
+such as pci_enable_device(dev) and pci_set_master(dev) are no longer
+necessary since these calls are executed by the PCI Port Bus driver.
+
+5.2.2 pcie_port_service_unregister
+
+void pcie_port_service_unregister(struct pcie_port_service_driver *new)
+
+pcie_port_service_unregister replaces the Linux Driver Model's
+pci_unregister_driver. It's always called by service driver when a
+module exits.
+
+5.2.3 Sample Code
+
+Below is sample service driver code to initialize the port service
+driver data structure.
+
+static struct pcie_port_service_id service_id[] = { {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+ .port_type = PCIE_RC_PORT,
+ .service_type = PCIE_PORT_SERVICE_AER,
+ }, { /* end: all zeroes */ }
+};
+
+static struct pcie_port_service_driver root_aerdrv = {
+ .name = (char *)device_name,
+ .id_table = &service_id[0],
+
+ .probe = aerdrv_load,
+ .remove = aerdrv_unload,
+
+ .suspend = aerdrv_suspend,
+ .resume = aerdrv_resume,
+};
+
+Below is a sample code for registering/unregistering a service
+driver.
+
+static int __init aerdrv_service_init(void)
+{
+ int retval = 0;
+
+ retval = pcie_port_service_register(&root_aerdrv);
+ if (!retval) {
+ /*
+ * FIX ME
+ */
+ }
+ return retval;
+}
+
+static void __exit aerdrv_service_exit(void)
+{
+ pcie_port_service_unregister(&root_aerdrv);
+}
+
+module_init(aerdrv_service_init);
+module_exit(aerdrv_service_exit);
+
+6. Possible Resource Conflicts
+
+Since all service drivers of a PCI-PCI Bridge Port device are
+allowed to run simultaneously, below lists a few of possible resource
+conflicts with proposed solutions.
+
+6.1 MSI Vector Resource
+
+The MSI capability structure enables a device software driver to call
+pci_enable_msi to request MSI based interrupts. Once MSI interrupts
+are enabled on a device, it stays in this mode until a device driver
+calls pci_disable_msi to disable MSI interrupts and revert back to
+INTx emulation mode. Since service drivers of the same PCI-PCI Bridge
+port share the same physical device, if an individual service driver
+calls pci_enable_msi/pci_disable_msi it may result unpredictable
+behavior. For example, two service drivers run simultaneously on the
+same physical Root Port. Both service drivers call pci_enable_msi to
+request MSI based interrupts. A service driver may not know whether
+any other service drivers have run on this Root Port. If either one
+of them calls pci_disable_msi, it puts the other service driver
+in a wrong interrupt mode.
+
+To avoid this situation all service drivers are not permitted to
+switch interrupt mode on its device. The PCI Express Port Bus driver
+is responsible for determining the interrupt mode and this should be
+transparent to service drivers. Service drivers need to know only
+the vector IRQ assigned to the field irq of struct pcie_device, which
+is passed in when the PCI Express Port Bus driver probes each service
+driver. Service drivers should use (struct pcie_device*)dev->irq to
+call request_irq/free_irq. In addition, the interrupt mode is stored
+in the field interrupt_mode of struct pcie_device.
+
+6.2 MSI-X Vector Resources
+
+Similar to the MSI a device driver for an MSI-X capable device can
+call pci_enable_msix to request MSI-X interrupts. All service drivers
+are not permitted to switch interrupt mode on its device. The PCI
+Express Port Bus driver is responsible for determining the interrupt
+mode and this should be transparent to service drivers. Any attempt
+by service driver to call pci_enable_msix/pci_disable_msix may
+result unpredictable behavior. Service drivers should use
+(struct pcie_device*)dev->irq and call request_irq/free_irq.
+
+6.3 PCI Memory/IO Mapped Regions
+
+Service drivers for PCI Express Power Management (PME), Advanced
+Error Reporting (AER), Hot-Plug (HP) and Virtual Channel (VC) access
+PCI configuration space on the PCI Express port. In all cases the
+registers accessed are independent of each other. This patch assumes
+that all service drivers will be well behaved and not overwrite
+other service driver's configuration settings.
+
+6.4 PCI Config Registers
+
+Each service driver runs its PCI config operations on its own
+capability structure except the PCI Express capability structure, in
+which Root Control register and Device Control register are shared
+between PME and AER. This patch assumes that all service drivers
+will be well behaved and not overwrite other service driver's
+configuration settings.
diff --git a/Documentation/PCI/pci-error-recovery.txt b/Documentation/PCI/pci-error-recovery.txt
new file mode 100644
index 000000000..ac26869c7
--- /dev/null
+++ b/Documentation/PCI/pci-error-recovery.txt
@@ -0,0 +1,431 @@
+
+ PCI Error Recovery
+ ------------------
+ February 2, 2006
+
+ Current document maintainer:
+ Linas Vepstas <linasvepstas@gmail.com>
+ updated by Richard Lary <rlary@us.ibm.com>
+ and Mike Mason <mmlnx@us.ibm.com> on 27-Jul-2009
+
+
+Many PCI bus controllers are able to detect a variety of hardware
+PCI errors on the bus, such as parity errors on the data and address
+busses, as well as SERR and PERR errors. Some of the more advanced
+chipsets are able to deal with these errors; these include PCI-E chipsets,
+and the PCI-host bridges found on IBM Power4, Power5 and Power6-based
+pSeries boxes. A typical action taken is to disconnect the affected device,
+halting all I/O to it. The goal of a disconnection is to avoid system
+corruption; for example, to halt system memory corruption due to DMA's
+to "wild" addresses. Typically, a reconnection mechanism is also
+offered, so that the affected PCI device(s) are reset and put back
+into working condition. The reset phase requires coordination
+between the affected device drivers and the PCI controller chip.
+This document describes a generic API for notifying device drivers
+of a bus disconnection, and then performing error recovery.
+This API is currently implemented in the 2.6.16 and later kernels.
+
+Reporting and recovery is performed in several steps. First, when
+a PCI hardware error has resulted in a bus disconnect, that event
+is reported as soon as possible to all affected device drivers,
+including multiple instances of a device driver on multi-function
+cards. This allows device drivers to avoid deadlocking in spinloops,
+waiting for some i/o-space register to change, when it never will.
+It also gives the drivers a chance to defer incoming I/O as
+needed.
+
+Next, recovery is performed in several stages. Most of the complexity
+is forced by the need to handle multi-function devices, that is,
+devices that have multiple device drivers associated with them.
+In the first stage, each driver is allowed to indicate what type
+of reset it desires, the choices being a simple re-enabling of I/O
+or requesting a slot reset.
+
+If any driver requests a slot reset, that is what will be done.
+
+After a reset and/or a re-enabling of I/O, all drivers are
+again notified, so that they may then perform any device setup/config
+that may be required. After these have all completed, a final
+"resume normal operations" event is sent out.
+
+The biggest reason for choosing a kernel-based implementation rather
+than a user-space implementation was the need to deal with bus
+disconnects of PCI devices attached to storage media, and, in particular,
+disconnects from devices holding the root file system. If the root
+file system is disconnected, a user-space mechanism would have to go
+through a large number of contortions to complete recovery. Almost all
+of the current Linux file systems are not tolerant of disconnection
+from/reconnection to their underlying block device. By contrast,
+bus errors are easy to manage in the device driver. Indeed, most
+device drivers already handle very similar recovery procedures;
+for example, the SCSI-generic layer already provides significant
+mechanisms for dealing with SCSI bus errors and SCSI bus resets.
+
+
+Detailed Design
+---------------
+Design and implementation details below, based on a chain of
+public email discussions with Ben Herrenschmidt, circa 5 April 2005.
+
+The error recovery API support is exposed to the driver in the form of
+a structure of function pointers pointed to by a new field in struct
+pci_driver. A driver that fails to provide the structure is "non-aware",
+and the actual recovery steps taken are platform dependent. The
+arch/powerpc implementation will simulate a PCI hotplug remove/add.
+
+This structure has the form:
+struct pci_error_handlers
+{
+ int (*error_detected)(struct pci_dev *dev, enum pci_channel_state);
+ int (*mmio_enabled)(struct pci_dev *dev);
+ int (*link_reset)(struct pci_dev *dev);
+ int (*slot_reset)(struct pci_dev *dev);
+ void (*resume)(struct pci_dev *dev);
+};
+
+The possible channel states are:
+enum pci_channel_state {
+ pci_channel_io_normal, /* I/O channel is in normal state */
+ pci_channel_io_frozen, /* I/O to channel is blocked */
+ pci_channel_io_perm_failure, /* PCI card is dead */
+};
+
+Possible return values are:
+enum pci_ers_result {
+ PCI_ERS_RESULT_NONE, /* no result/none/not supported in device driver */
+ PCI_ERS_RESULT_CAN_RECOVER, /* Device driver can recover without slot reset */
+ PCI_ERS_RESULT_NEED_RESET, /* Device driver wants slot to be reset. */
+ PCI_ERS_RESULT_DISCONNECT, /* Device has completely failed, is unrecoverable */
+ PCI_ERS_RESULT_RECOVERED, /* Device driver is fully recovered and operational */
+};
+
+A driver does not have to implement all of these callbacks; however,
+if it implements any, it must implement error_detected(). If a callback
+is not implemented, the corresponding feature is considered unsupported.
+For example, if mmio_enabled() and resume() aren't there, then it
+is assumed that the driver is not doing any direct recovery and requires
+a slot reset. If link_reset() is not implemented, the card is assumed to
+not care about link resets. Typically a driver will want to know about
+a slot_reset().
+
+The actual steps taken by a platform to recover from a PCI error
+event will be platform-dependent, but will follow the general
+sequence described below.
+
+STEP 0: Error Event
+-------------------
+A PCI bus error is detected by the PCI hardware. On powerpc, the slot
+is isolated, in that all I/O is blocked: all reads return 0xffffffff,
+all writes are ignored.
+
+
+STEP 1: Notification
+--------------------
+Platform calls the error_detected() callback on every instance of
+every driver affected by the error.
+
+At this point, the device might not be accessible anymore, depending on
+the platform (the slot will be isolated on powerpc). The driver may
+already have "noticed" the error because of a failing I/O, but this
+is the proper "synchronization point", that is, it gives the driver
+a chance to cleanup, waiting for pending stuff (timers, whatever, etc...)
+to complete; it can take semaphores, schedule, etc... everything but
+touch the device. Within this function and after it returns, the driver
+shouldn't do any new IOs. Called in task context. This is sort of a
+"quiesce" point. See note about interrupts at the end of this doc.
+
+All drivers participating in this system must implement this call.
+The driver must return one of the following result codes:
+ - PCI_ERS_RESULT_CAN_RECOVER:
+ Driver returns this if it thinks it might be able to recover
+ the HW by just banging IOs or if it wants to be given
+ a chance to extract some diagnostic information (see
+ mmio_enable, below).
+ - PCI_ERS_RESULT_NEED_RESET:
+ Driver returns this if it can't recover without a
+ slot reset.
+ - PCI_ERS_RESULT_DISCONNECT:
+ Driver returns this if it doesn't want to recover at all.
+
+The next step taken will depend on the result codes returned by the
+drivers.
+
+If all drivers on the segment/slot return PCI_ERS_RESULT_CAN_RECOVER,
+then the platform should re-enable IOs on the slot (or do nothing in
+particular, if the platform doesn't isolate slots), and recovery
+proceeds to STEP 2 (MMIO Enable).
+
+If any driver requested a slot reset (by returning PCI_ERS_RESULT_NEED_RESET),
+then recovery proceeds to STEP 4 (Slot Reset).
+
+If the platform is unable to recover the slot, the next step
+is STEP 6 (Permanent Failure).
+
+>>> The current powerpc implementation assumes that a device driver will
+>>> *not* schedule or semaphore in this routine; the current powerpc
+>>> implementation uses one kernel thread to notify all devices;
+>>> thus, if one device sleeps/schedules, all devices are affected.
+>>> Doing better requires complex multi-threaded logic in the error
+>>> recovery implementation (e.g. waiting for all notification threads
+>>> to "join" before proceeding with recovery.) This seems excessively
+>>> complex and not worth implementing.
+
+>>> The current powerpc implementation doesn't much care if the device
+>>> attempts I/O at this point, or not. I/O's will fail, returning
+>>> a value of 0xff on read, and writes will be dropped. If more than
+>>> EEH_MAX_FAILS I/O's are attempted to a frozen adapter, EEH
+>>> assumes that the device driver has gone into an infinite loop
+>>> and prints an error to syslog. A reboot is then required to
+>>> get the device working again.
+
+STEP 2: MMIO Enabled
+-------------------
+The platform re-enables MMIO to the device (but typically not the
+DMA), and then calls the mmio_enabled() callback on all affected
+device drivers.
+
+This is the "early recovery" call. IOs are allowed again, but DMA is
+not, with some restrictions. This is NOT a callback for the driver to
+start operations again, only to peek/poke at the device, extract diagnostic
+information, if any, and eventually do things like trigger a device local
+reset or some such, but not restart operations. This callback is made if
+all drivers on a segment agree that they can try to recover and if no automatic
+link reset was performed by the HW. If the platform can't just re-enable IOs
+without a slot reset or a link reset, it will not call this callback, and
+instead will have gone directly to STEP 3 (Link Reset) or STEP 4 (Slot Reset)
+
+>>> The following is proposed; no platform implements this yet:
+>>> Proposal: All I/O's should be done _synchronously_ from within
+>>> this callback, errors triggered by them will be returned via
+>>> the normal pci_check_whatever() API, no new error_detected()
+>>> callback will be issued due to an error happening here. However,
+>>> such an error might cause IOs to be re-blocked for the whole
+>>> segment, and thus invalidate the recovery that other devices
+>>> on the same segment might have done, forcing the whole segment
+>>> into one of the next states, that is, link reset or slot reset.
+
+The driver should return one of the following result codes:
+ - PCI_ERS_RESULT_RECOVERED
+ Driver returns this if it thinks the device is fully
+ functional and thinks it is ready to start
+ normal driver operations again. There is no
+ guarantee that the driver will actually be
+ allowed to proceed, as another driver on the
+ same segment might have failed and thus triggered a
+ slot reset on platforms that support it.
+
+ - PCI_ERS_RESULT_NEED_RESET
+ Driver returns this if it thinks the device is not
+ recoverable in its current state and it needs a slot
+ reset to proceed.
+
+ - PCI_ERS_RESULT_DISCONNECT
+ Same as above. Total failure, no recovery even after
+ reset driver dead. (To be defined more precisely)
+
+The next step taken depends on the results returned by the drivers.
+If all drivers returned PCI_ERS_RESULT_RECOVERED, then the platform
+proceeds to either STEP3 (Link Reset) or to STEP 5 (Resume Operations).
+
+If any driver returned PCI_ERS_RESULT_NEED_RESET, then the platform
+proceeds to STEP 4 (Slot Reset)
+
+STEP 3: Link Reset
+------------------
+The platform resets the link, and then calls the link_reset() callback
+on all affected device drivers. This is a PCI-Express specific state
+and is done whenever a non-fatal error has been detected that can be
+"solved" by resetting the link. This call informs the driver of the
+reset and the driver should check to see if the device appears to be
+in working condition.
+
+The driver is not supposed to restart normal driver I/O operations
+at this point. It should limit itself to "probing" the device to
+check its recoverability status. If all is right, then the platform
+will call resume() once all drivers have ack'd link_reset().
+
+ Result codes:
+ (identical to STEP 3 (MMIO Enabled)
+
+The platform then proceeds to either STEP 4 (Slot Reset) or STEP 5
+(Resume Operations).
+
+>>> The current powerpc implementation does not implement this callback.
+
+STEP 4: Slot Reset
+------------------
+
+In response to a return value of PCI_ERS_RESULT_NEED_RESET, the
+the platform will perform a slot reset on the requesting PCI device(s).
+The actual steps taken by a platform to perform a slot reset
+will be platform-dependent. Upon completion of slot reset, the
+platform will call the device slot_reset() callback.
+
+Powerpc platforms implement two levels of slot reset:
+soft reset(default) and fundamental(optional) reset.
+
+Powerpc soft reset consists of asserting the adapter #RST line and then
+restoring the PCI BAR's and PCI configuration header to a state
+that is equivalent to what it would be after a fresh system
+power-on followed by power-on BIOS/system firmware initialization.
+Soft reset is also known as hot-reset.
+
+Powerpc fundamental reset is supported by PCI Express cards only
+and results in device's state machines, hardware logic, port states and
+configuration registers to initialize to their default conditions.
+
+For most PCI devices, a soft reset will be sufficient for recovery.
+Optional fundamental reset is provided to support a limited number
+of PCI Express PCI devices for which a soft reset is not sufficient
+for recovery.
+
+If the platform supports PCI hotplug, then the reset might be
+performed by toggling the slot electrical power off/on.
+
+It is important for the platform to restore the PCI config space
+to the "fresh poweron" state, rather than the "last state". After
+a slot reset, the device driver will almost always use its standard
+device initialization routines, and an unusual config space setup
+may result in hung devices, kernel panics, or silent data corruption.
+
+This call gives drivers the chance to re-initialize the hardware
+(re-download firmware, etc.). At this point, the driver may assume
+that the card is in a fresh state and is fully functional. The slot
+is unfrozen and the driver has full access to PCI config space,
+memory mapped I/O space and DMA. Interrupts (Legacy, MSI, or MSI-X)
+will also be available.
+
+Drivers should not restart normal I/O processing operations
+at this point. If all device drivers report success on this
+callback, the platform will call resume() to complete the sequence,
+and let the driver restart normal I/O processing.
+
+A driver can still return a critical failure for this function if
+it can't get the device operational after reset. If the platform
+previously tried a soft reset, it might now try a hard reset (power
+cycle) and then call slot_reset() again. It the device still can't
+be recovered, there is nothing more that can be done; the platform
+will typically report a "permanent failure" in such a case. The
+device will be considered "dead" in this case.
+
+Drivers for multi-function cards will need to coordinate among
+themselves as to which driver instance will perform any "one-shot"
+or global device initialization. For example, the Symbios sym53cxx2
+driver performs device init only from PCI function 0:
+
++ if (PCI_FUNC(pdev->devfn) == 0)
++ sym_reset_scsi_bus(np, 0);
+
+ Result codes:
+ - PCI_ERS_RESULT_DISCONNECT
+ Same as above.
+
+Drivers for PCI Express cards that require a fundamental reset must
+set the needs_freset bit in the pci_dev structure in their probe function.
+For example, the QLogic qla2xxx driver sets the needs_freset bit for certain
+PCI card types:
+
++ /* Set EEH reset type to fundamental if required by hba */
++ if (IS_QLA24XX(ha) || IS_QLA25XX(ha) || IS_QLA81XX(ha))
++ pdev->needs_freset = 1;
++
+
+Platform proceeds either to STEP 5 (Resume Operations) or STEP 6 (Permanent
+Failure).
+
+>>> The current powerpc implementation does not try a power-cycle
+>>> reset if the driver returned PCI_ERS_RESULT_DISCONNECT.
+>>> However, it probably should.
+
+
+STEP 5: Resume Operations
+-------------------------
+The platform will call the resume() callback on all affected device
+drivers if all drivers on the segment have returned
+PCI_ERS_RESULT_RECOVERED from one of the 3 previous callbacks.
+The goal of this callback is to tell the driver to restart activity,
+that everything is back and running. This callback does not return
+a result code.
+
+At this point, if a new error happens, the platform will restart
+a new error recovery sequence.
+
+STEP 6: Permanent Failure
+-------------------------
+A "permanent failure" has occurred, and the platform cannot recover
+the device. The platform will call error_detected() with a
+pci_channel_state value of pci_channel_io_perm_failure.
+
+The device driver should, at this point, assume the worst. It should
+cancel all pending I/O, refuse all new I/O, returning -EIO to
+higher layers. The device driver should then clean up all of its
+memory and remove itself from kernel operations, much as it would
+during system shutdown.
+
+The platform will typically notify the system operator of the
+permanent failure in some way. If the device is hotplug-capable,
+the operator will probably want to remove and replace the device.
+Note, however, not all failures are truly "permanent". Some are
+caused by over-heating, some by a poorly seated card. Many
+PCI error events are caused by software bugs, e.g. DMA's to
+wild addresses or bogus split transactions due to programming
+errors. See the discussion in powerpc/eeh-pci-error-recovery.txt
+for additional detail on real-life experience of the causes of
+software errors.
+
+
+Conclusion; General Remarks
+---------------------------
+The way the callbacks are called is platform policy. A platform with
+no slot reset capability may want to just "ignore" drivers that can't
+recover (disconnect them) and try to let other cards on the same segment
+recover. Keep in mind that in most real life cases, though, there will
+be only one driver per segment.
+
+Now, a note about interrupts. If you get an interrupt and your
+device is dead or has been isolated, there is a problem :)
+The current policy is to turn this into a platform policy.
+That is, the recovery API only requires that:
+
+ - There is no guarantee that interrupt delivery can proceed from any
+device on the segment starting from the error detection and until the
+slot_reset callback is called, at which point interrupts are expected
+to be fully operational.
+
+ - There is no guarantee that interrupt delivery is stopped, that is,
+a driver that gets an interrupt after detecting an error, or that detects
+an error within the interrupt handler such that it prevents proper
+ack'ing of the interrupt (and thus removal of the source) should just
+return IRQ_NOTHANDLED. It's up to the platform to deal with that
+condition, typically by masking the IRQ source during the duration of
+the error handling. It is expected that the platform "knows" which
+interrupts are routed to error-management capable slots and can deal
+with temporarily disabling that IRQ number during error processing (this
+isn't terribly complex). That means some IRQ latency for other devices
+sharing the interrupt, but there is simply no other way. High end
+platforms aren't supposed to share interrupts between many devices
+anyway :)
+
+>>> Implementation details for the powerpc platform are discussed in
+>>> the file Documentation/powerpc/eeh-pci-error-recovery.txt
+
+>>> As of this writing, there is a growing list of device drivers with
+>>> patches implementing error recovery. Not all of these patches are in
+>>> mainline yet. These may be used as "examples":
+>>>
+>>> drivers/scsi/ipr
+>>> drivers/scsi/sym53c8xx_2
+>>> drivers/scsi/qla2xxx
+>>> drivers/scsi/lpfc
+>>> drivers/next/bnx2.c
+>>> drivers/next/e100.c
+>>> drivers/net/e1000
+>>> drivers/net/e1000e
+>>> drivers/net/ixgb
+>>> drivers/net/ixgbe
+>>> drivers/net/cxgb3
+>>> drivers/net/s2io.c
+>>> drivers/net/qlge
+
+The End
+-------
diff --git a/Documentation/PCI/pci-iov-howto.txt b/Documentation/PCI/pci-iov-howto.txt
new file mode 100644
index 000000000..2d91ae251
--- /dev/null
+++ b/Documentation/PCI/pci-iov-howto.txt
@@ -0,0 +1,135 @@
+ PCI Express I/O Virtualization Howto
+ Copyright (C) 2009 Intel Corporation
+ Yu Zhao <yu.zhao@intel.com>
+
+ Update: November 2012
+ -- sysfs-based SRIOV enable-/disable-ment
+ Donald Dutile <ddutile@redhat.com>
+
+1. Overview
+
+1.1 What is SR-IOV
+
+Single Root I/O Virtualization (SR-IOV) is a PCI Express Extended
+capability which makes one physical device appear as multiple virtual
+devices. The physical device is referred to as Physical Function (PF)
+while the virtual devices are referred to as Virtual Functions (VF).
+Allocation of the VF can be dynamically controlled by the PF via
+registers encapsulated in the capability. By default, this feature is
+not enabled and the PF behaves as traditional PCIe device. Once it's
+turned on, each VF's PCI configuration space can be accessed by its own
+Bus, Device and Function Number (Routing ID). And each VF also has PCI
+Memory Space, which is used to map its register set. VF device driver
+operates on the register set so it can be functional and appear as a
+real existing PCI device.
+
+2. User Guide
+
+2.1 How can I enable SR-IOV capability
+
+Multiple methods are available for SR-IOV enablement.
+In the first method, the device driver (PF driver) will control the
+enabling and disabling of the capability via API provided by SR-IOV core.
+If the hardware has SR-IOV capability, loading its PF driver would
+enable it and all VFs associated with the PF. Some PF drivers require
+a module parameter to be set to determine the number of VFs to enable.
+In the second method, a write to the sysfs file sriov_numvfs will
+enable and disable the VFs associated with a PCIe PF. This method
+enables per-PF, VF enable/disable values versus the first method,
+which applies to all PFs of the same device. Additionally, the
+PCI SRIOV core support ensures that enable/disable operations are
+valid to reduce duplication in multiple drivers for the same
+checks, e.g., check numvfs == 0 if enabling VFs, ensure
+numvfs <= totalvfs.
+The second method is the recommended method for new/future VF devices.
+
+2.2 How can I use the Virtual Functions
+
+The VF is treated as hot-plugged PCI devices in the kernel, so they
+should be able to work in the same way as real PCI devices. The VF
+requires device driver that is same as a normal PCI device's.
+
+3. Developer Guide
+
+3.1 SR-IOV API
+
+To enable SR-IOV capability:
+(a) For the first method, in the driver:
+ int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn);
+ 'nr_virtfn' is number of VFs to be enabled.
+(b) For the second method, from sysfs:
+ echo 'nr_virtfn' > \
+ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
+
+To disable SR-IOV capability:
+(a) For the first method, in the driver:
+ void pci_disable_sriov(struct pci_dev *dev);
+(b) For the second method, from sysfs:
+ echo 0 > \
+ /sys/bus/pci/devices/<DOMAIN:BUS:DEVICE.FUNCTION>/sriov_numvfs
+
+3.2 Usage example
+
+Following piece of code illustrates the usage of the SR-IOV API.
+
+static int dev_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ pci_enable_sriov(dev, NR_VIRTFN);
+
+ ...
+
+ return 0;
+}
+
+static void dev_remove(struct pci_dev *dev)
+{
+ pci_disable_sriov(dev);
+
+ ...
+}
+
+static int dev_suspend(struct pci_dev *dev, pm_message_t state)
+{
+ ...
+
+ return 0;
+}
+
+static int dev_resume(struct pci_dev *dev)
+{
+ ...
+
+ return 0;
+}
+
+static void dev_shutdown(struct pci_dev *dev)
+{
+ ...
+}
+
+static int dev_sriov_configure(struct pci_dev *dev, int numvfs)
+{
+ if (numvfs > 0) {
+ ...
+ pci_enable_sriov(dev, numvfs);
+ ...
+ return numvfs;
+ }
+ if (numvfs == 0) {
+ ....
+ pci_disable_sriov(dev);
+ ...
+ return 0;
+ }
+}
+
+static struct pci_driver dev_driver = {
+ .name = "SR-IOV Physical Function driver",
+ .id_table = dev_id_table,
+ .probe = dev_probe,
+ .remove = dev_remove,
+ .suspend = dev_suspend,
+ .resume = dev_resume,
+ .shutdown = dev_shutdown,
+ .sriov_configure = dev_sriov_configure,
+};
diff --git a/Documentation/PCI/pci.txt b/Documentation/PCI/pci.txt
new file mode 100644
index 000000000..123881f62
--- /dev/null
+++ b/Documentation/PCI/pci.txt
@@ -0,0 +1,635 @@
+
+ How To Write Linux PCI Drivers
+
+ by Martin Mares <mj@ucw.cz> on 07-Feb-2000
+ updated by Grant Grundler <grundler@parisc-linux.org> on 23-Dec-2006
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The world of PCI is vast and full of (mostly unpleasant) surprises.
+Since each CPU architecture implements different chip-sets and PCI devices
+have different requirements (erm, "features"), the result is the PCI support
+in the Linux kernel is not as trivial as one would wish. This short paper
+tries to introduce all potential driver authors to Linux APIs for
+PCI device drivers.
+
+A more complete resource is the third edition of "Linux Device Drivers"
+by Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman.
+LDD3 is available for free (under Creative Commons License) from:
+
+ http://lwn.net/Kernel/LDD3/
+
+However, keep in mind that all documents are subject to "bit rot".
+Refer to the source code if things are not working as described here.
+
+Please send questions/comments/patches about Linux PCI API to the
+"Linux PCI" <linux-pci@atrey.karlin.mff.cuni.cz> mailing list.
+
+
+
+0. Structure of PCI drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+PCI drivers "discover" PCI devices in a system via pci_register_driver().
+Actually, it's the other way around. When the PCI generic code discovers
+a new device, the driver with a matching "description" will be notified.
+Details on this below.
+
+pci_register_driver() leaves most of the probing for devices to
+the PCI layer and supports online insertion/removal of devices [thus
+supporting hot-pluggable PCI, CardBus, and Express-Card in a single driver].
+pci_register_driver() call requires passing in a table of function
+pointers and thus dictates the high level structure of a driver.
+
+Once the driver knows about a PCI device and takes ownership, the
+driver generally needs to perform the following initialization:
+
+ Enable the device
+ Request MMIO/IOP resources
+ Set the DMA mask size (for both coherent and streaming DMA)
+ Allocate and initialize shared control data (pci_allocate_coherent())
+ Access device configuration space (if needed)
+ Register IRQ handler (request_irq())
+ Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
+ Enable DMA/processing engines
+
+When done using the device, and perhaps the module needs to be unloaded,
+the driver needs to take the follow steps:
+ Disable the device from generating IRQs
+ Release the IRQ (free_irq())
+ Stop all DMA activity
+ Release DMA buffers (both streaming and coherent)
+ Unregister from other subsystems (e.g. scsi or netdev)
+ Release MMIO/IOP resources
+ Disable the device
+
+Most of these topics are covered in the following sections.
+For the rest look at LDD3 or <linux/pci.h> .
+
+If the PCI subsystem is not configured (CONFIG_PCI is not set), most of
+the PCI functions described below are defined as inline functions either
+completely empty or just returning an appropriate error codes to avoid
+lots of ifdefs in the drivers.
+
+
+
+1. pci_register_driver() call
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+PCI device drivers call pci_register_driver() during their
+initialization with a pointer to a structure describing the driver
+(struct pci_driver):
+
+ field name Description
+ ---------- ------------------------------------------------------
+ id_table Pointer to table of device ID's the driver is
+ interested in. Most drivers should export this
+ table using MODULE_DEVICE_TABLE(pci,...).
+
+ probe This probing function gets called (during execution
+ of pci_register_driver() for already existing
+ devices or later if a new device gets inserted) for
+ all PCI devices which match the ID table and are not
+ "owned" by the other drivers yet. This function gets
+ passed a "struct pci_dev *" for each device whose
+ entry in the ID table matches the device. The probe
+ function returns zero when the driver chooses to
+ take "ownership" of the device or an error code
+ (negative number) otherwise.
+ The probe function always gets called from process
+ context, so it can sleep.
+
+ remove The remove() function gets called whenever a device
+ being handled by this driver is removed (either during
+ deregistration of the driver or when it's manually
+ pulled out of a hot-pluggable slot).
+ The remove function always gets called from process
+ context, so it can sleep.
+
+ suspend Put device into low power state.
+ suspend_late Put device into low power state.
+
+ resume_early Wake device from low power state.
+ resume Wake device from low power state.
+
+ (Please see Documentation/power/pci.txt for descriptions
+ of PCI Power Management and the related functions.)
+
+ shutdown Hook into reboot_notifier_list (kernel/sys.c).
+ Intended to stop any idling DMA operations.
+ Useful for enabling wake-on-lan (NIC) or changing
+ the power state of a device before reboot.
+ e.g. drivers/net/e100.c.
+
+ err_handler See Documentation/PCI/pci-error-recovery.txt
+
+
+The ID table is an array of struct pci_device_id entries ending with an
+all-zero entry. Definitions with static const are generally preferred.
+Use of the deprecated macro DEFINE_PCI_DEVICE_TABLE should be avoided.
+
+Each entry consists of:
+
+ vendor,device Vendor and device ID to match (or PCI_ANY_ID)
+
+ subvendor, Subsystem vendor and device ID to match (or PCI_ANY_ID)
+ subdevice,
+
+ class Device class, subclass, and "interface" to match.
+ See Appendix D of the PCI Local Bus Spec or
+ include/linux/pci_ids.h for a full list of classes.
+ Most drivers do not need to specify class/class_mask
+ as vendor/device is normally sufficient.
+
+ class_mask limit which sub-fields of the class field are compared.
+ See drivers/scsi/sym53c8xx_2/ for example of usage.
+
+ driver_data Data private to the driver.
+ Most drivers don't need to use driver_data field.
+ Best practice is to use driver_data as an index
+ into a static list of equivalent device types,
+ instead of using it as a pointer.
+
+
+Most drivers only need PCI_DEVICE() or PCI_DEVICE_CLASS() to set up
+a pci_device_id table.
+
+New PCI IDs may be added to a device driver pci_ids table at runtime
+as shown below:
+
+echo "vendor device subvendor subdevice class class_mask driver_data" > \
+/sys/bus/pci/drivers/{driver}/new_id
+
+All fields are passed in as hexadecimal values (no leading 0x).
+The vendor and device fields are mandatory, the others are optional. Users
+need pass only as many optional fields as necessary:
+ o subvendor and subdevice fields default to PCI_ANY_ID (FFFFFFFF)
+ o class and classmask fields default to 0
+ o driver_data defaults to 0UL.
+
+Note that driver_data must match the value used by any of the pci_device_id
+entries defined in the driver. This makes the driver_data field mandatory
+if all the pci_device_id entries have a non-zero driver_data value.
+
+Once added, the driver probe routine will be invoked for any unclaimed
+PCI devices listed in its (newly updated) pci_ids list.
+
+When the driver exits, it just calls pci_unregister_driver() and the PCI layer
+automatically calls the remove hook for all devices handled by the driver.
+
+
+1.1 "Attributes" for driver functions/data
+
+Please mark the initialization and cleanup functions where appropriate
+(the corresponding macros are defined in <linux/init.h>):
+
+ __init Initialization code. Thrown away after the driver
+ initializes.
+ __exit Exit code. Ignored for non-modular drivers.
+
+Tips on when/where to use the above attributes:
+ o The module_init()/module_exit() functions (and all
+ initialization functions called _only_ from these)
+ should be marked __init/__exit.
+
+ o Do not mark the struct pci_driver.
+
+ o Do NOT mark a function if you are not sure which mark to use.
+ Better to not mark the function than mark the function wrong.
+
+
+
+2. How to find PCI devices manually
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+PCI drivers should have a really good reason for not using the
+pci_register_driver() interface to search for PCI devices.
+The main reason PCI devices are controlled by multiple drivers
+is because one PCI device implements several different HW services.
+E.g. combined serial/parallel port/floppy controller.
+
+A manual search may be performed using the following constructs:
+
+Searching by vendor and device ID:
+
+ struct pci_dev *dev = NULL;
+ while (dev = pci_get_device(VENDOR_ID, DEVICE_ID, dev))
+ configure_device(dev);
+
+Searching by class ID (iterate in a similar way):
+
+ pci_get_class(CLASS_ID, dev)
+
+Searching by both vendor/device and subsystem vendor/device ID:
+
+ pci_get_subsys(VENDOR_ID,DEVICE_ID, SUBSYS_VENDOR_ID, SUBSYS_DEVICE_ID, dev).
+
+You can use the constant PCI_ANY_ID as a wildcard replacement for
+VENDOR_ID or DEVICE_ID. This allows searching for any device from a
+specific vendor, for example.
+
+These functions are hotplug-safe. They increment the reference count on
+the pci_dev that they return. You must eventually (possibly at module unload)
+decrement the reference count on these devices by calling pci_dev_put().
+
+
+
+3. Device Initialization Steps
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+As noted in the introduction, most PCI drivers need the following steps
+for device initialization:
+
+ Enable the device
+ Request MMIO/IOP resources
+ Set the DMA mask size (for both coherent and streaming DMA)
+ Allocate and initialize shared control data (pci_allocate_coherent())
+ Access device configuration space (if needed)
+ Register IRQ handler (request_irq())
+ Initialize non-PCI (i.e. LAN/SCSI/etc parts of the chip)
+ Enable DMA/processing engines.
+
+The driver can access PCI config space registers at any time.
+(Well, almost. When running BIST, config space can go away...but
+that will just result in a PCI Bus Master Abort and config reads
+will return garbage).
+
+
+3.1 Enable the PCI device
+~~~~~~~~~~~~~~~~~~~~~~~~~
+Before touching any device registers, the driver needs to enable
+the PCI device by calling pci_enable_device(). This will:
+ o wake up the device if it was in suspended state,
+ o allocate I/O and memory regions of the device (if BIOS did not),
+ o allocate an IRQ (if BIOS did not).
+
+NOTE: pci_enable_device() can fail! Check the return value.
+
+[ OS BUG: we don't check resource allocations before enabling those
+ resources. The sequence would make more sense if we called
+ pci_request_resources() before calling pci_enable_device().
+ Currently, the device drivers can't detect the bug when when two
+ devices have been allocated the same range. This is not a common
+ problem and unlikely to get fixed soon.
+
+ This has been discussed before but not changed as of 2.6.19:
+ http://lkml.org/lkml/2006/3/2/194
+]
+
+pci_set_master() will enable DMA by setting the bus master bit
+in the PCI_COMMAND register. It also fixes the latency timer value if
+it's set to something bogus by the BIOS. pci_clear_master() will
+disable DMA by clearing the bus master bit.
+
+If the PCI device can use the PCI Memory-Write-Invalidate transaction,
+call pci_set_mwi(). This enables the PCI_COMMAND bit for Mem-Wr-Inval
+and also ensures that the cache line size register is set correctly.
+Check the return value of pci_set_mwi() as not all architectures
+or chip-sets may support Memory-Write-Invalidate. Alternatively,
+if Mem-Wr-Inval would be nice to have but is not required, call
+pci_try_set_mwi() to have the system do its best effort at enabling
+Mem-Wr-Inval.
+
+
+3.2 Request MMIO/IOP resources
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Memory (MMIO), and I/O port addresses should NOT be read directly
+from the PCI device config space. Use the values in the pci_dev structure
+as the PCI "bus address" might have been remapped to a "host physical"
+address by the arch/chip-set specific kernel support.
+
+See Documentation/io-mapping.txt for how to access device registers
+or device memory.
+
+The device driver needs to call pci_request_region() to verify
+no other device is already using the same address resource.
+Conversely, drivers should call pci_release_region() AFTER
+calling pci_disable_device().
+The idea is to prevent two devices colliding on the same address range.
+
+[ See OS BUG comment above. Currently (2.6.19), The driver can only
+ determine MMIO and IO Port resource availability _after_ calling
+ pci_enable_device(). ]
+
+Generic flavors of pci_request_region() are request_mem_region()
+(for MMIO ranges) and request_region() (for IO Port ranges).
+Use these for address resources that are not described by "normal" PCI
+BARs.
+
+Also see pci_request_selected_regions() below.
+
+
+3.3 Set the DMA mask size
+~~~~~~~~~~~~~~~~~~~~~~~~~
+[ If anything below doesn't make sense, please refer to
+ Documentation/DMA-API.txt. This section is just a reminder that
+ drivers need to indicate DMA capabilities of the device and is not
+ an authoritative source for DMA interfaces. ]
+
+While all drivers should explicitly indicate the DMA capability
+(e.g. 32 or 64 bit) of the PCI bus master, devices with more than
+32-bit bus master capability for streaming data need the driver
+to "register" this capability by calling pci_set_dma_mask() with
+appropriate parameters. In general this allows more efficient DMA
+on systems where System RAM exists above 4G _physical_ address.
+
+Drivers for all PCI-X and PCIe compliant devices must call
+pci_set_dma_mask() as they are 64-bit DMA devices.
+
+Similarly, drivers must also "register" this capability if the device
+can directly address "consistent memory" in System RAM above 4G physical
+address by calling pci_set_consistent_dma_mask().
+Again, this includes drivers for all PCI-X and PCIe compliant devices.
+Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
+64-bit DMA capable for payload ("streaming") data but not control
+("consistent") data.
+
+
+3.4 Setup shared control data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
+memory. See Documentation/DMA-API.txt for a full description of
+the DMA APIs. This section is just a reminder that it needs to be done
+before enabling DMA on the device.
+
+
+3.5 Initialize device registers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Some drivers will need specific "capability" fields programmed
+or other "vendor specific" register initialized or reset.
+E.g. clearing pending interrupts.
+
+
+3.6 Register IRQ handler
+~~~~~~~~~~~~~~~~~~~~~~~~
+While calling request_irq() is the last step described here,
+this is often just another intermediate step to initialize a device.
+This step can often be deferred until the device is opened for use.
+
+All interrupt handlers for IRQ lines should be registered with IRQF_SHARED
+and use the devid to map IRQs to devices (remember that all PCI IRQ lines
+can be shared).
+
+request_irq() will associate an interrupt handler and device handle
+with an interrupt number. Historically interrupt numbers represent
+IRQ lines which run from the PCI device to the Interrupt controller.
+With MSI and MSI-X (more below) the interrupt number is a CPU "vector".
+
+request_irq() also enables the interrupt. Make sure the device is
+quiesced and does not have any interrupts pending before registering
+the interrupt handler.
+
+MSI and MSI-X are PCI capabilities. Both are "Message Signaled Interrupts"
+which deliver interrupts to the CPU via a DMA write to a Local APIC.
+The fundamental difference between MSI and MSI-X is how multiple
+"vectors" get allocated. MSI requires contiguous blocks of vectors
+while MSI-X can allocate several individual ones.
+
+MSI capability can be enabled by calling pci_enable_msi() or
+pci_enable_msix() before calling request_irq(). This causes
+the PCI support to program CPU vector data into the PCI device
+capability registers.
+
+If your PCI device supports both, try to enable MSI-X first.
+Only one can be enabled at a time. Many architectures, chip-sets,
+or BIOSes do NOT support MSI or MSI-X and the call to pci_enable_msi/msix
+will fail. This is important to note since many drivers have
+two (or more) interrupt handlers: one for MSI/MSI-X and another for IRQs.
+They choose which handler to register with request_irq() based on the
+return value from pci_enable_msi/msix().
+
+There are (at least) two really good reasons for using MSI:
+1) MSI is an exclusive interrupt vector by definition.
+ This means the interrupt handler doesn't have to verify
+ its device caused the interrupt.
+
+2) MSI avoids DMA/IRQ race conditions. DMA to host memory is guaranteed
+ to be visible to the host CPU(s) when the MSI is delivered. This
+ is important for both data coherency and avoiding stale control data.
+ This guarantee allows the driver to omit MMIO reads to flush
+ the DMA stream.
+
+See drivers/infiniband/hw/mthca/ or drivers/net/tg3.c for examples
+of MSI/MSI-X usage.
+
+
+
+4. PCI device shutdown
+~~~~~~~~~~~~~~~~~~~~~~~
+
+When a PCI device driver is being unloaded, most of the following
+steps need to be performed:
+
+ Disable the device from generating IRQs
+ Release the IRQ (free_irq())
+ Stop all DMA activity
+ Release DMA buffers (both streaming and consistent)
+ Unregister from other subsystems (e.g. scsi or netdev)
+ Disable device from responding to MMIO/IO Port addresses
+ Release MMIO/IO Port resource(s)
+
+
+4.1 Stop IRQs on the device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+How to do this is chip/device specific. If it's not done, it opens
+the possibility of a "screaming interrupt" if (and only if)
+the IRQ is shared with another device.
+
+When the shared IRQ handler is "unhooked", the remaining devices
+using the same IRQ line will still need the IRQ enabled. Thus if the
+"unhooked" device asserts IRQ line, the system will respond assuming
+it was one of the remaining devices asserted the IRQ line. Since none
+of the other devices will handle the IRQ, the system will "hang" until
+it decides the IRQ isn't going to get handled and masks the IRQ (100,000
+iterations later). Once the shared IRQ is masked, the remaining devices
+will stop functioning properly. Not a nice situation.
+
+This is another reason to use MSI or MSI-X if it's available.
+MSI and MSI-X are defined to be exclusive interrupts and thus
+are not susceptible to the "screaming interrupt" problem.
+
+
+4.2 Release the IRQ
+~~~~~~~~~~~~~~~~~~~
+Once the device is quiesced (no more IRQs), one can call free_irq().
+This function will return control once any pending IRQs are handled,
+"unhook" the drivers IRQ handler from that IRQ, and finally release
+the IRQ if no one else is using it.
+
+
+4.3 Stop all DMA activity
+~~~~~~~~~~~~~~~~~~~~~~~~~
+It's extremely important to stop all DMA operations BEFORE attempting
+to deallocate DMA control data. Failure to do so can result in memory
+corruption, hangs, and on some chip-sets a hard crash.
+
+Stopping DMA after stopping the IRQs can avoid races where the
+IRQ handler might restart DMA engines.
+
+While this step sounds obvious and trivial, several "mature" drivers
+didn't get this step right in the past.
+
+
+4.4 Release DMA buffers
+~~~~~~~~~~~~~~~~~~~~~~~
+Once DMA is stopped, clean up streaming DMA first.
+I.e. unmap data buffers and return buffers to "upstream"
+owners if there is one.
+
+Then clean up "consistent" buffers which contain the control data.
+
+See Documentation/DMA-API.txt for details on unmapping interfaces.
+
+
+4.5 Unregister from other subsystems
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Most low level PCI device drivers support some other subsystem
+like USB, ALSA, SCSI, NetDev, Infiniband, etc. Make sure your
+driver isn't losing resources from that other subsystem.
+If this happens, typically the symptom is an Oops (panic) when
+the subsystem attempts to call into a driver that has been unloaded.
+
+
+4.6 Disable Device from responding to MMIO/IO Port addresses
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+io_unmap() MMIO or IO Port resources and then call pci_disable_device().
+This is the symmetric opposite of pci_enable_device().
+Do not access device registers after calling pci_disable_device().
+
+
+4.7 Release MMIO/IO Port Resource(s)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Call pci_release_region() to mark the MMIO or IO Port range as available.
+Failure to do so usually results in the inability to reload the driver.
+
+
+
+5. How to access PCI config space
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can use pci_(read|write)_config_(byte|word|dword) to access the config
+space of a device represented by struct pci_dev *. All these functions return 0
+when successful or an error code (PCIBIOS_...) which can be translated to a text
+string by pcibios_strerror. Most drivers expect that accesses to valid PCI
+devices don't fail.
+
+If you don't have a struct pci_dev available, you can call
+pci_bus_(read|write)_config_(byte|word|dword) to access a given device
+and function on that bus.
+
+If you access fields in the standard portion of the config header, please
+use symbolic names of locations and bits declared in <linux/pci.h>.
+
+If you need to access Extended PCI Capability registers, just call
+pci_find_capability() for the particular capability and it will find the
+corresponding register block for you.
+
+
+
+6. Other interesting functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+pci_get_domain_bus_and_slot() Find pci_dev corresponding to given domain,
+ bus and slot and number. If the device is
+ found, its reference count is increased.
+pci_set_power_state() Set PCI Power Management state (0=D0 ... 3=D3)
+pci_find_capability() Find specified capability in device's capability
+ list.
+pci_resource_start() Returns bus start address for a given PCI region
+pci_resource_end() Returns bus end address for a given PCI region
+pci_resource_len() Returns the byte length of a PCI region
+pci_set_drvdata() Set private driver data pointer for a pci_dev
+pci_get_drvdata() Return private driver data pointer for a pci_dev
+pci_set_mwi() Enable Memory-Write-Invalidate transactions.
+pci_clear_mwi() Disable Memory-Write-Invalidate transactions.
+
+
+
+7. Miscellaneous hints
+~~~~~~~~~~~~~~~~~~~~~~
+
+When displaying PCI device names to the user (for example when a driver wants
+to tell the user what card has it found), please use pci_name(pci_dev).
+
+Always refer to the PCI devices by a pointer to the pci_dev structure.
+All PCI layer functions use this identification and it's the only
+reasonable one. Don't use bus/slot/function numbers except for very
+special purposes -- on systems with multiple primary buses their semantics
+can be pretty complex.
+
+Don't try to turn on Fast Back to Back writes in your driver. All devices
+on the bus need to be capable of doing it, so this is something which needs
+to be handled by platform and generic code, not individual drivers.
+
+
+
+8. Vendor and device identifications
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Do not add new device or vendor IDs to include/linux/pci_ids.h unless they
+are shared across multiple drivers. You can add private definitions in
+your driver if they're helpful, or just use plain hex constants.
+
+The device IDs are arbitrary hex numbers (vendor controlled) and normally used
+only in a single location, the pci_device_id table.
+
+Please DO submit new vendor/device IDs to http://pciids.sourceforge.net/.
+
+
+
+9. Obsolete functions
+~~~~~~~~~~~~~~~~~~~~~
+
+There are several functions which you might come across when trying to
+port an old driver to the new PCI interface. They are no longer present
+in the kernel as they aren't compatible with hotplug or PCI domains or
+having sane locking.
+
+pci_find_device() Superseded by pci_get_device()
+pci_find_subsys() Superseded by pci_get_subsys()
+pci_find_slot() Superseded by pci_get_domain_bus_and_slot()
+pci_get_slot() Superseded by pci_get_domain_bus_and_slot()
+
+
+The alternative is the traditional PCI device driver that walks PCI
+device lists. This is still possible but discouraged.
+
+
+
+10. MMIO Space and "Write Posting"
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Converting a driver from using I/O Port space to using MMIO space
+often requires some additional changes. Specifically, "write posting"
+needs to be handled. Many drivers (e.g. tg3, acenic, sym53c8xx_2)
+already do this. I/O Port space guarantees write transactions reach the PCI
+device before the CPU can continue. Writes to MMIO space allow the CPU
+to continue before the transaction reaches the PCI device. HW weenies
+call this "Write Posting" because the write completion is "posted" to
+the CPU before the transaction has reached its destination.
+
+Thus, timing sensitive code should add readl() where the CPU is
+expected to wait before doing other work. The classic "bit banging"
+sequence works fine for I/O Port space:
+
+ for (i = 8; --i; val >>= 1) {
+ outb(val & 1, ioport_reg); /* write bit */
+ udelay(10);
+ }
+
+The same sequence for MMIO space should be:
+
+ for (i = 8; --i; val >>= 1) {
+ writeb(val & 1, mmio_reg); /* write bit */
+ readb(safe_mmio_reg); /* flush posted write */
+ udelay(10);
+ }
+
+It is important that "safe_mmio_reg" not have any side effects that
+interferes with the correct operation of the device.
+
+Another case to watch out for is when resetting a PCI device. Use PCI
+Configuration space reads to flush the writel(). This will gracefully
+handle the PCI master abort on all platforms if the PCI device is
+expected to not respond to a readl(). Most x86 platforms will allow
+MMIO reads to master abort (a.k.a. "Soft Fail") and return garbage
+(e.g. ~0). But many RISC platforms will crash (a.k.a."Hard Fail").
+
diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt
new file mode 100644
index 000000000..b4987c0bc
--- /dev/null
+++ b/Documentation/PCI/pcieaer-howto.txt
@@ -0,0 +1,270 @@
+ The PCI Express Advanced Error Reporting Driver Guide HOWTO
+ T. Long Nguyen <tom.l.nguyen@intel.com>
+ Yanmin Zhang <yanmin.zhang@intel.com>
+ 07/29/2006
+
+
+1. Overview
+
+1.1 About this guide
+
+This guide describes the basics of the PCI Express Advanced Error
+Reporting (AER) driver and provides information on how to use it, as
+well as how to enable the drivers of endpoint devices to conform with
+PCI Express AER driver.
+
+1.2 Copyright (C) Intel Corporation 2006.
+
+1.3 What is the PCI Express AER Driver?
+
+PCI Express error signaling can occur on the PCI Express link itself
+or on behalf of transactions initiated on the link. PCI Express
+defines two error reporting paradigms: the baseline capability and
+the Advanced Error Reporting capability. The baseline capability is
+required of all PCI Express components providing a minimum defined
+set of error reporting requirements. Advanced Error Reporting
+capability is implemented with a PCI Express advanced error reporting
+extended capability structure providing more robust error reporting.
+
+The PCI Express AER driver provides the infrastructure to support PCI
+Express Advanced Error Reporting capability. The PCI Express AER
+driver provides three basic functions:
+
+- Gathers the comprehensive error information if errors occurred.
+- Reports error to the users.
+- Performs error recovery actions.
+
+AER driver only attaches root ports which support PCI-Express AER
+capability.
+
+
+2. User Guide
+
+2.1 Include the PCI Express AER Root Driver into the Linux Kernel
+
+The PCI Express AER Root driver is a Root Port service driver attached
+to the PCI Express Port Bus driver. If a user wants to use it, the driver
+has to be compiled. Option CONFIG_PCIEAER supports this capability. It
+depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and
+CONFIG_PCIEAER = y.
+
+2.2 Load PCI Express AER Root Driver
+There is a case where a system has AER support in BIOS. Enabling the AER
+Root driver and having AER support in BIOS may result unpredictable
+behavior. To avoid this conflict, a successful load of the AER Root driver
+requires ACPI _OSC support in the BIOS to allow the AER Root driver to
+request for native control of AER. See the PCI FW 3.0 Specification for
+details regarding OSC usage. Currently, lots of firmwares don't provide
+_OSC support while they use PCI Express. To support such firmwares,
+forceload, a parameter of type bool, could enable AER to continue to
+be initiated although firmwares have no _OSC support. To enable the
+walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line
+when booting kernel. Note that forceload=n by default.
+
+nosourceid, another parameter of type bool, can be used when broken
+hardware (mostly chipsets) has root ports that cannot obtain the reporting
+source ID. nosourceid=n by default.
+
+2.3 AER error output
+When a PCI-E AER error is captured, an error message will be outputted to
+console. If it's a correctable error, it is outputted as a warning.
+Otherwise, it is printed as an error. So users could choose different
+log level to filter out correctable error messages.
+
+Below shows an example:
+0000:50:00.0: PCIe Bus Error: severity=Uncorrected (Fatal), type=Transaction Layer, id=0500(Requester ID)
+0000:50:00.0: device [8086:0329] error status/mask=00100000/00000000
+0000:50:00.0: [20] Unsupported Request (First)
+0000:50:00.0: TLP Header: 04000001 00200a03 05010000 00050100
+
+In the example, 'Requester ID' means the ID of the device who sends
+the error message to root port. Pls. refer to pci express specs for
+other fields.
+
+
+3. Developer Guide
+
+To enable AER aware support requires a software driver to configure
+the AER capability structure within its device and to provide callbacks.
+
+To support AER better, developers need understand how AER does work
+firstly.
+
+PCI Express errors are classified into two types: correctable errors
+and uncorrectable errors. This classification is based on the impacts
+of those errors, which may result in degraded performance or function
+failure.
+
+Correctable errors pose no impacts on the functionality of the
+interface. The PCI Express protocol can recover without any software
+intervention or any loss of data. These errors are detected and
+corrected by hardware. Unlike correctable errors, uncorrectable
+errors impact functionality of the interface. Uncorrectable errors
+can cause a particular transaction or a particular PCI Express link
+to be unreliable. Depending on those error conditions, uncorrectable
+errors are further classified into non-fatal errors and fatal errors.
+Non-fatal errors cause the particular transaction to be unreliable,
+but the PCI Express link itself is fully functional. Fatal errors, on
+the other hand, cause the link to be unreliable.
+
+When AER is enabled, a PCI Express device will automatically send an
+error message to the PCIe root port above it when the device captures
+an error. The Root Port, upon receiving an error reporting message,
+internally processes and logs the error message in its PCI Express
+capability structure. Error information being logged includes storing
+the error reporting agent's requestor ID into the Error Source
+Identification Registers and setting the error bits of the Root Error
+Status Register accordingly. If AER error reporting is enabled in Root
+Error Command Register, the Root Port generates an interrupt if an
+error is detected.
+
+Note that the errors as described above are related to the PCI Express
+hierarchy and links. These errors do not include any device specific
+errors because device specific errors will still get sent directly to
+the device driver.
+
+3.1 Configure the AER capability structure
+
+AER aware drivers of PCI Express component need change the device
+control registers to enable AER. They also could change AER registers,
+including mask and severity registers. Helper function
+pci_enable_pcie_error_reporting could be used to enable AER. See
+section 3.3.
+
+3.2. Provide callbacks
+
+3.2.1 callback reset_link to reset pci express link
+
+This callback is used to reset the pci express physical link when a
+fatal error happens. The root port aer service driver provides a
+default reset_link function, but different upstream ports might
+have different specifications to reset pci express link, so all
+upstream ports should provide their own reset_link functions.
+
+In struct pcie_port_service_driver, a new pointer, reset_link, is
+added.
+
+pci_ers_result_t (*reset_link) (struct pci_dev *dev);
+
+Section 3.2.2.2 provides more detailed info on when to call
+reset_link.
+
+3.2.2 PCI error-recovery callbacks
+
+The PCI Express AER Root driver uses error callbacks to coordinate
+with downstream device drivers associated with a hierarchy in question
+when performing error recovery actions.
+
+Data struct pci_driver has a pointer, err_handler, to point to
+pci_error_handlers who consists of a couple of callback function
+pointers. AER driver follows the rules defined in
+pci-error-recovery.txt except pci express specific parts (e.g.
+reset_link). Pls. refer to pci-error-recovery.txt for detailed
+definitions of the callbacks.
+
+Below sections specify when to call the error callback functions.
+
+3.2.2.1 Correctable errors
+
+Correctable errors pose no impacts on the functionality of
+the interface. The PCI Express protocol can recover without any
+software intervention or any loss of data. These errors do not
+require any recovery actions. The AER driver clears the device's
+correctable error status register accordingly and logs these errors.
+
+3.2.2.2 Non-correctable (non-fatal and fatal) errors
+
+If an error message indicates a non-fatal error, performing link reset
+at upstream is not required. The AER driver calls error_detected(dev,
+pci_channel_io_normal) to all drivers associated within a hierarchy in
+question. for example,
+EndPoint<==>DownstreamPort B<==>UpstreamPort A<==>RootPort.
+If Upstream port A captures an AER error, the hierarchy consists of
+Downstream port B and EndPoint.
+
+A driver may return PCI_ERS_RESULT_CAN_RECOVER,
+PCI_ERS_RESULT_DISCONNECT, or PCI_ERS_RESULT_NEED_RESET, depending on
+whether it can recover or the AER driver calls mmio_enabled as next.
+
+If an error message indicates a fatal error, kernel will broadcast
+error_detected(dev, pci_channel_io_frozen) to all drivers within
+a hierarchy in question. Then, performing link reset at upstream is
+necessary. As different kinds of devices might use different approaches
+to reset link, AER port service driver is required to provide the
+function to reset link. Firstly, kernel looks for if the upstream
+component has an aer driver. If it has, kernel uses the reset_link
+callback of the aer driver. If the upstream component has no aer driver
+and the port is downstream port, we will perform a hot reset as the
+default by setting the Secondary Bus Reset bit of the Bridge Control
+register associated with the downstream port. As for upstream ports,
+they should provide their own aer service drivers with reset_link
+function. If error_detected returns PCI_ERS_RESULT_CAN_RECOVER and
+reset_link returns PCI_ERS_RESULT_RECOVERED, the error handling goes
+to mmio_enabled.
+
+3.3 helper functions
+
+3.3.1 int pci_enable_pcie_error_reporting(struct pci_dev *dev);
+pci_enable_pcie_error_reporting enables the device to send error
+messages to root port when an error is detected. Note that devices
+don't enable the error reporting by default, so device drivers need
+call this function to enable it.
+
+3.3.2 int pci_disable_pcie_error_reporting(struct pci_dev *dev);
+pci_disable_pcie_error_reporting disables the device to send error
+messages to root port when an error is detected.
+
+3.3.3 int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev);
+pci_cleanup_aer_uncorrect_error_status cleanups the uncorrectable
+error status register.
+
+3.4 Frequent Asked Questions
+
+Q: What happens if a PCI Express device driver does not provide an
+error recovery handler (pci_driver->err_handler is equal to NULL)?
+
+A: The devices attached with the driver won't be recovered. If the
+error is fatal, kernel will print out warning messages. Please refer
+to section 3 for more information.
+
+Q: What happens if an upstream port service driver does not provide
+callback reset_link?
+
+A: Fatal error recovery will fail if the errors are reported by the
+upstream ports who are attached by the service driver.
+
+Q: How does this infrastructure deal with driver that is not PCI
+Express aware?
+
+A: This infrastructure calls the error callback functions of the
+driver when an error happens. But if the driver is not aware of
+PCI Express, the device might not report its own errors to root
+port.
+
+Q: What modifications will that driver need to make it compatible
+with the PCI Express AER Root driver?
+
+A: It could call the helper functions to enable AER in devices and
+cleanup uncorrectable status register. Pls. refer to section 3.3.
+
+
+4. Software error injection
+
+Debugging PCIe AER error recovery code is quite difficult because it
+is hard to trigger real hardware errors. Software based error
+injection can be used to fake various kinds of PCIe errors.
+
+First you should enable PCIe AER software error injection in kernel
+configuration, that is, following item should be in your .config.
+
+CONFIG_PCIEAER_INJECT=y or CONFIG_PCIEAER_INJECT=m
+
+After reboot with new kernel or insert the module, a device file named
+/dev/aer_inject should be created.
+
+Then, you need a user space tool named aer-inject, which can be gotten
+from:
+ http://www.kernel.org/pub/linux/utils/pci/aer-inject/
+
+More information about aer-inject can be found in the document comes
+with its source code.
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
new file mode 100644
index 000000000..f773a264a
--- /dev/null
+++ b/Documentation/RCU/00-INDEX
@@ -0,0 +1,36 @@
+00-INDEX
+ - This file
+arrayRCU.txt
+ - Using RCU to Protect Read-Mostly Arrays
+checklist.txt
+ - Review Checklist for RCU Patches
+listRCU.txt
+ - Using RCU to Protect Read-Mostly Linked Lists
+lockdep.txt
+ - RCU and lockdep checking
+lockdep-splat.txt
+ - RCU Lockdep splats explained.
+NMI-RCU.txt
+ - Using RCU to Protect Dynamic NMI Handlers
+rcu_dereference.txt
+ - Proper care and feeding of return values from rcu_dereference()
+rcubarrier.txt
+ - RCU and Unloadable Modules
+rculist_nulls.txt
+ - RCU list primitives for use with SLAB_DESTROY_BY_RCU
+rcuref.txt
+ - Reference-count design for elements of lists/arrays protected by RCU
+rcu.txt
+ - RCU Concepts
+RTFP.txt
+ - List of RCU papers (bibliography) going back to 1980.
+stallwarn.txt
+ - RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
+torture.txt
+ - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
+trace.txt
+ - CONFIG_RCU_TRACE debugfs files and formats
+UP.txt
+ - RCU on Uniprocessor Systems
+whatisRCU.txt
+ - What is RCU?
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
new file mode 100644
index 000000000..687777f83
--- /dev/null
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -0,0 +1,120 @@
+Using RCU to Protect Dynamic NMI Handlers
+
+
+Although RCU is usually used to protect read-mostly data structures,
+it is possible to use RCU to provide dynamic non-maskable interrupt
+handlers, as well as dynamic irq handlers. This document describes
+how to do this, drawing loosely from Zwane Mwaikambo's NMI-timer
+work in "arch/x86/oprofile/nmi_timer_int.c" and in
+"arch/x86/kernel/traps.c".
+
+The relevant pieces of code are listed below, each followed by a
+brief explanation.
+
+ static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
+ {
+ return 0;
+ }
+
+The dummy_nmi_callback() function is a "dummy" NMI handler that does
+nothing, but returns zero, thus saying that it did nothing, allowing
+the NMI handler to take the default machine-specific action.
+
+ static nmi_callback_t nmi_callback = dummy_nmi_callback;
+
+This nmi_callback variable is a global function pointer to the current
+NMI handler.
+
+ void do_nmi(struct pt_regs * regs, long error_code)
+ {
+ int cpu;
+
+ nmi_enter();
+
+ cpu = smp_processor_id();
+ ++nmi_count(cpu);
+
+ if (!rcu_dereference_sched(nmi_callback)(regs, cpu))
+ default_do_nmi(regs);
+
+ nmi_exit();
+ }
+
+The do_nmi() function processes each NMI. It first disables preemption
+in the same way that a hardware irq would, then increments the per-CPU
+count of NMIs. It then invokes the NMI handler stored in the nmi_callback
+function pointer. If this handler returns zero, do_nmi() invokes the
+default_do_nmi() function to handle a machine-specific NMI. Finally,
+preemption is restored.
+
+In theory, rcu_dereference_sched() is not needed, since this code runs
+only on i386, which in theory does not need rcu_dereference_sched()
+anyway. However, in practice it is a good documentation aid, particularly
+for anyone attempting to do something similar on Alpha or on systems
+with aggressive optimizing compilers.
+
+Quick Quiz: Why might the rcu_dereference_sched() be necessary on Alpha,
+ given that the code referenced by the pointer is read-only?
+
+
+Back to the discussion of NMI and RCU...
+
+ void set_nmi_callback(nmi_callback_t callback)
+ {
+ rcu_assign_pointer(nmi_callback, callback);
+ }
+
+The set_nmi_callback() function registers an NMI handler. Note that any
+data that is to be used by the callback must be initialized up -before-
+the call to set_nmi_callback(). On architectures that do not order
+writes, the rcu_assign_pointer() ensures that the NMI handler sees the
+initialized values.
+
+ void unset_nmi_callback(void)
+ {
+ rcu_assign_pointer(nmi_callback, dummy_nmi_callback);
+ }
+
+This function unregisters an NMI handler, restoring the original
+dummy_nmi_handler(). However, there may well be an NMI handler
+currently executing on some other CPU. We therefore cannot free
+up any data structures used by the old NMI handler until execution
+of it completes on all other CPUs.
+
+One way to accomplish this is via synchronize_sched(), perhaps as
+follows:
+
+ unset_nmi_callback();
+ synchronize_sched();
+ kfree(my_nmi_data);
+
+This works because synchronize_sched() blocks until all CPUs complete
+any preemption-disabled segments of code that they were executing.
+Since NMI handlers disable preemption, synchronize_sched() is guaranteed
+not to return until all ongoing NMI handlers exit. It is therefore safe
+to free up the handler's data as soon as synchronize_sched() returns.
+
+Important note: for this to work, the architecture in question must
+invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
+
+
+Answer to Quick Quiz
+
+ Why might the rcu_dereference_sched() be necessary on Alpha, given
+ that the code referenced by the pointer is read-only?
+
+ Answer: The caller to set_nmi_callback() might well have
+ initialized some data that is to be used by the new NMI
+ handler. In this case, the rcu_dereference_sched() would
+ be needed, because otherwise a CPU that received an NMI
+ just after the new handler was set might see the pointer
+ to the new NMI handler, but the old pre-initialized
+ version of the handler's data.
+
+ This same sad story can happen on other CPUs when using
+ a compiler with aggressive pointer-value speculation
+ optimizations.
+
+ More important, the rcu_dereference_sched() makes it
+ clear to someone reading the code that the pointer is
+ being protected by RCU-sched.
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
new file mode 100644
index 000000000..f29bcbc46
--- /dev/null
+++ b/Documentation/RCU/RTFP.txt
@@ -0,0 +1,2812 @@
+Read the Fscking Papers!
+
+
+This document describes RCU-related publications, and is followed by
+the corresponding bibtex entries. A number of the publications may
+be found at http://www.rdrop.com/users/paulmck/RCU/. For others, browsers
+and search engines will usually find what you are looking for.
+
+The first thing resembling RCU was published in 1980, when Kung and Lehman
+[Kung80] recommended use of a garbage collector to defer destruction
+of nodes in a parallel binary search tree in order to simplify its
+implementation. This works well in environments that have garbage
+collectors, but most production garbage collectors incur significant
+overhead.
+
+In 1982, Manber and Ladner [Manber82,Manber84] recommended deferring
+destruction until all threads running at that time have terminated, again
+for a parallel binary search tree. This approach works well in systems
+with short-lived threads, such as the K42 research operating system.
+However, Linux has long-lived tasks, so more is needed.
+
+In 1986, Hennessy, Osisek, and Seigh [Hennessy89] introduced passive
+serialization, which is an RCU-like mechanism that relies on the presence
+of "quiescent states" in the VM/XA hypervisor that are guaranteed not
+to be referencing the data structure. However, this mechanism was not
+optimized for modern computer systems, which is not surprising given
+that these overheads were not so expensive in the mid-80s. Nonetheless,
+passive serialization appears to be the first deferred-destruction
+mechanism to be used in production. Furthermore, the relevant patent
+has lapsed, so this approach may be used in non-GPL software, if desired.
+(In contrast, implementation of RCU is permitted only in software licensed
+under either GPL or LGPL. Sorry!!!)
+
+In 1987, Rashid et al. described lazy TLB-flush [RichardRashid87a].
+At first glance, this has nothing to do with RCU, but nevertheless
+this paper helped inspire the update-side batching used in the later
+RCU implementation in DYNIX/ptx. In 1988, Barbara Liskov published
+a description of Argus that noted that use of out-of-date values can
+be tolerated in some situations. Thus, this paper provides some early
+theoretical justification for use of stale data.
+
+In 1990, Pugh [Pugh90] noted that explicitly tracking which threads
+were reading a given data structure permitted deferred free to operate
+in the presence of non-terminating threads. However, this explicit
+tracking imposes significant read-side overhead, which is undesirable
+in read-mostly situations. This algorithm does take pains to avoid
+write-side contention and parallelize the other write-side overheads by
+providing a fine-grained locking design, however, it would be interesting
+to see how much of the performance advantage reported in 1990 remains
+today.
+
+At about this same time, Andrews [Andrews91textbook] described ``chaotic
+relaxation'', where the normal barriers between successive iterations
+of convergent numerical algorithms are relaxed, so that iteration $n$
+might use data from iteration $n-1$ or even $n-2$. This introduces
+error, which typically slows convergence and thus increases the number of
+iterations required. However, this increase is sometimes more than made
+up for by a reduction in the number of expensive barrier operations,
+which are otherwise required to synchronize the threads at the end
+of each iteration. Unfortunately, chaotic relaxation requires highly
+structured data, such as the matrices used in scientific programs, and
+is thus inapplicable to most data structures in operating-system kernels.
+
+In 1992, Henry (now Alexia) Massalin completed a dissertation advising
+parallel programmers to defer processing when feasible to simplify
+synchronization [HMassalinPhD]. RCU makes extremely heavy use of
+this advice.
+
+In 1993, Jacobson [Jacobson93] verbally described what is perhaps the
+simplest deferred-free technique: simply waiting a fixed amount of time
+before freeing blocks awaiting deferred free. Jacobson did not describe
+any write-side changes he might have made in this work using SGI's Irix
+kernel. Aju John published a similar technique in 1995 [AjuJohn95].
+This works well if there is a well-defined upper bound on the length of
+time that reading threads can hold references, as there might well be in
+hard real-time systems. However, if this time is exceeded, perhaps due
+to preemption, excessive interrupts, or larger-than-anticipated load,
+memory corruption can ensue, with no reasonable means of diagnosis.
+Jacobson's technique is therefore inappropriate for use in production
+operating-system kernels, except when such kernels can provide hard
+real-time response guarantees for all operations.
+
+Also in 1995, Pu et al. [Pu95a] applied a technique similar to that of Pugh's
+read-side-tracking to permit replugging of algorithms within a commercial
+Unix operating system. However, this replugging permitted only a single
+reader at a time. The following year, this same group of researchers
+extended their technique to allow for multiple readers [Cowan96a].
+Their approach requires memory barriers (and thus pipeline stalls),
+but reduces memory latency, contention, and locking overheads.
+
+1995 also saw the first publication of DYNIX/ptx's RCU mechanism
+[Slingwine95], which was optimized for modern CPU architectures,
+and was successfully applied to a number of situations within the
+DYNIX/ptx kernel. The corresponding conference paper appeared in 1998
+[McKenney98].
+
+In 1999, the Tornado and K42 groups described their "generations"
+mechanism, which is quite similar to RCU [Gamsa99]. These operating
+systems made pervasive use of RCU in place of "existence locks", which
+greatly simplifies locking hierarchies and helps avoid deadlocks.
+
+The year 2000 saw an email exchange that would likely have
+led to yet another independent invention of something like RCU
+[RustyRussell2000a,RustyRussell2000b]. Instead, 2001 saw the first
+RCU presentation involving Linux [McKenney01a] at OLS. The resulting
+abundance of RCU patches was presented the following year [McKenney02a],
+and use of RCU in dcache was first described that same year [Linder02a].
+
+Also in 2002, Michael [Michael02b,Michael02a] presented "hazard-pointer"
+techniques that defer the destruction of data structures to simplify
+non-blocking synchronization (wait-free synchronization, lock-free
+synchronization, and obstruction-free synchronization are all examples of
+non-blocking synchronization). The corresponding journal article appeared
+in 2004 [MagedMichael04a]. This technique eliminates locking, reduces
+contention, reduces memory latency for readers, and parallelizes pipeline
+stalls and memory latency for writers. However, these techniques still
+impose significant read-side overhead in the form of memory barriers.
+Researchers at Sun worked along similar lines in the same timeframe
+[HerlihyLM02]. These techniques can be thought of as inside-out reference
+counts, where the count is represented by the number of hazard pointers
+referencing a given data structure rather than the more conventional
+counter field within the data structure itself. The key advantage
+of inside-out reference counts is that they can be stored in immortal
+variables, thus allowing races between access and deletion to be avoided.
+
+By the same token, RCU can be thought of as a "bulk reference count",
+where some form of reference counter covers all reference by a given CPU
+or thread during a set timeframe. This timeframe is related to, but
+not necessarily exactly the same as, an RCU grace period. In classic
+RCU, the reference counter is the per-CPU bit in the "bitmask" field,
+and each such bit covers all references that might have been made by
+the corresponding CPU during the prior grace period. Of course, RCU
+can be thought of in other terms as well.
+
+In 2003, the K42 group described how RCU could be used to create
+hot-pluggable implementations of operating-system functions [Appavoo03a].
+Later that year saw a paper describing an RCU implementation
+of System V IPC [Arcangeli03] (following up on a suggestion by
+Hugh Dickins [Dickins02a] and an implementation by Mingming Cao
+[MingmingCao2002IPCRCU]), and an introduction to RCU in Linux Journal
+[McKenney03a].
+
+2004 has seen a Linux-Journal article on use of RCU in dcache
+[McKenney04a], a performance comparison of locking to RCU on several
+different CPUs [McKenney04b], a dissertation describing use of RCU in a
+number of operating-system kernels [PaulEdwardMcKenneyPhD], a paper
+describing how to make RCU safe for soft-realtime applications [Sarma04c],
+and a paper describing SELinux performance with RCU [JamesMorris04b].
+
+2005 brought further adaptation of RCU to realtime use, permitting
+preemption of RCU realtime critical sections [PaulMcKenney05a,
+PaulMcKenney05b].
+
+2006 saw the first best-paper award for an RCU paper [ThomasEHart2006a],
+as well as further work on efficient implementations of preemptible
+RCU [PaulEMcKenney2006b], but priority-boosting of RCU read-side critical
+sections proved elusive. An RCU implementation permitting general
+blocking in read-side critical sections appeared [PaulEMcKenney2006c],
+Robert Olsson described an RCU-protected trie-hash combination
+[RobertOlsson2006a].
+
+2007 saw the journal version of the award-winning RCU paper from 2006
+[ThomasEHart2007a], as well as a paper demonstrating use of Promela
+and Spin to mechanically verify an optimization to Oleg Nesterov's
+QRCU [PaulEMcKenney2007QRCUspin], a design document describing
+preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part
+LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally,
+PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI].
+
+2008 saw a journal paper on real-time RCU [DinakarGuniguntala2008IBMSysJ],
+a history of how Linux changed RCU more than RCU changed Linux
+[PaulEMcKenney2008RCUOSR], and a design overview of hierarchical RCU
+[PaulEMcKenney2008HierarchicalRCU].
+
+2009 introduced user-level RCU algorithms [PaulEMcKenney2009MaliciousURCU],
+which Mathieu Desnoyers is now maintaining [MathieuDesnoyers2009URCU]
+[MathieuDesnoyersPhD]. TINY_RCU [PaulEMcKenney2009BloatWatchRCU] made
+its appearance, as did expedited RCU [PaulEMcKenney2009expeditedRCU].
+The problem of resizeable RCU-protected hash tables may now be on a path
+to a solution [JoshTriplett2009RPHash]. A few academic researchers are now
+using RCU to solve their parallel problems [HariKannan2009DynamicAnalysisRCU].
+
+2010 produced a simpler preemptible-RCU implementation
+based on TREE_RCU [PaulEMcKenney2010SimpleOptRCU], lockdep-RCU
+[PaulEMcKenney2010LockdepRCU], another resizeable RCU-protected hash
+table [HerbertXu2010RCUResizeHash] (this one consuming more memory,
+but allowing arbitrary changes in hash function, as required for DoS
+avoidance in the networking code), realization of the 2009 RCU-protected
+hash table with atomic node move [JoshTriplett2010RPHash], an update on
+the RCU API [PaulEMcKenney2010RCUAPI].
+
+2011 marked the inclusion of Nick Piggin's fully lockless dentry search
+[LinusTorvalds2011Linux2:6:38:rc1:NPigginVFS], an RCU-protected red-black
+tree using software transactional memory to protect concurrent updates
+(strange, but true!) [PhilHoward2011RCUTMRBTree], yet another variant of
+RCU-protected resizeable hash tables [Triplett:2011:RPHash], the 3.0 RCU
+trainwreck [PaulEMcKenney2011RCU3.0trainwreck], and Neil Brown's "Meet the
+Lockers" LWN article [NeilBrown2011MeetTheLockers]. Some academic
+work looked at debugging uses of RCU [Seyster:2011:RFA:2075416.2075425].
+
+In 2012, Josh Triplett received his Ph.D. with his dissertation
+covering RCU-protected resizable hash tables and the relationship
+between memory barriers and read-side traversal order: If the updater
+is making changes in the opposite direction from the read-side traveral
+order, the updater need only execute a memory-barrier instruction,
+but if in the same direction, the updater needs to wait for a grace
+period between the individual updates [JoshTriplettPhD]. Also in 2012,
+after seventeen years of attempts, an RCU paper made it into a top-flight
+academic journal, IEEE Transactions on Parallel and Distributed Systems
+[MathieuDesnoyers2012URCU]. A group of researchers in Spain applied
+user-level RCU to crowd simulation [GuillermoVigueras2012RCUCrowd], and
+another group of researchers in Europe produced a formal description of
+RCU based on separation logic [AlexeyGotsman2012VerifyGraceExtended],
+which was published in the 2013 European Symposium on Programming
+[AlexeyGotsman2013ESOPRCU].
+
+
+
+Bibtex Entries
+
+@article{Kung80
+,author="H. T. Kung and Q. Lehman"
+,title="Concurrent Manipulation of Binary Search Trees"
+,Year="1980"
+,Month="September"
+,journal="ACM Transactions on Database Systems"
+,volume="5"
+,number="3"
+,pages="354-382"
+,annotation={
+ Use garbage collector to clean up data after everyone is done with it.
+ .
+ Oldest use of something vaguely resembling RCU that I have found.
+ http://portal.acm.org/citation.cfm?id=320619&dl=GUIDE,
+ [Viewed December 3, 2007]
+}
+}
+
+@techreport{Manber82
+,author="Udi Manber and Richard E. Ladner"
+,title="Concurrency Control in a Dynamic Search Structure"
+,institution="Department of Computer Science, University of Washington"
+,address="Seattle, Washington"
+,year="1982"
+,number="82-01-01"
+,month="January"
+,pages="28"
+,annotation={
+ .
+ Superseded by Manber84.
+ .
+ Describes concurrent AVL tree implementation. Uses a
+ garbage-collection mechanism to handle concurrent use and deletion
+ of nodes in the tree, but lacks the summary-of-execution-history
+ concept of read-copy locking.
+ .
+ Keeps full list of processes that were active when a given
+ node was to be deleted, and waits until all such processes have
+ -terminated- before allowing this node to be reused. This is
+ not described in great detail -- one could imagine using process
+ IDs for this if the ID space was large enough that overlapping
+ never occurred.
+ .
+ This restriction makes this algorithm unsuitable for use in
+ systems comprised of long-lived processes. It also produces
+ completely unacceptable overhead in systems with large numbers
+ of processes. Finally, it is specific to AVL trees.
+ .
+ Cites Kung80, so not an independent invention, but the first
+ RCU-like usage that does not rely on an automatic garbage
+ collector.
+}
+}
+
+@article{Manber84
+,author="Udi Manber and Richard E. Ladner"
+,title="Concurrency Control in a Dynamic Search Structure"
+,Year="1984"
+,Month="September"
+,journal="ACM Transactions on Database Systems"
+,volume="9"
+,number="3"
+,pages="439-455"
+,annotation={
+ Describes concurrent AVL tree implementation. Uses a
+ garbage-collection mechanism to handle concurrent use and deletion
+ of nodes in the tree, but lacks the summary-of-execution-history
+ concept of read-copy locking.
+ .
+ Keeps full list of processes that were active when a given
+ node was to be deleted, and waits until all such processes have
+ -terminated- before allowing this node to be reused. This is
+ not described in great detail -- one could imagine using process
+ IDs for this if the ID space was large enough that overlapping
+ never occurred.
+ .
+ This restriction makes this algorithm unsuitable for use in
+ systems comprised of long-lived processes. It also produces
+ completely unacceptable overhead in systems with large numbers
+ of processes. Finally, it is specific to AVL trees.
+}
+}
+
+@Conference{RichardRashid87a
+,Author="Richard Rashid and Avadis Tevanian and Michael Young and
+David Golub and Robert Baron and David Black and William Bolosky and
+Jonathan Chew"
+,Title="Machine-Independent Virtual Memory Management for Paged
+Uniprocessor and Multiprocessor Architectures"
+,Booktitle="{2\textsuperscript{nd} Symposium on Architectural Support
+for Programming Languages and Operating Systems}"
+,Publisher="Association for Computing Machinery"
+,Month="October"
+,Year="1987"
+,pages="31-39"
+,Address="Palo Alto, CA"
+,note="Available:
+\url{http://www.cse.ucsc.edu/~randal/221/rashid-machvm.pdf}
+[Viewed February 17, 2005]"
+,annotation={
+ Describes lazy TLB flush, where one waits for each CPU to pass
+ through a scheduling-clock interrupt before reusing a given range
+ of virtual address. Does not describe how one determines that
+ all CPUs have in fact taken such an interrupt, though there are
+ no shortage of straightforward methods for accomplishing this.
+ .
+ Note that it does not make sense to just wait a fixed amount of
+ time, since a given CPU might have interrupts disabled for an
+ extended amount of time.
+}
+}
+
+@article{BarbaraLiskov1988ArgusCACM
+,author = {Barbara Liskov}
+,title = {Distributed programming in {Argus}}
+,journal = {Commun. ACM}
+,volume = {31}
+,number = {3}
+,year = {1988}
+,issn = {0001-0782}
+,pages = {300--312}
+,doi = {http://doi.acm.org/10.1145/42392.42399}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+ At the top of page 307: "Conflicts with deposits and withdrawals
+ are necessary if the reported total is to be up to date. They
+ could be avoided by having total return a sum that is slightly
+ out of date." Relies on semantics -- approximate numerical
+ values sometimes OK.
+}
+}
+
+@techreport{Hennessy89
+,author="James P. Hennessy and Damian L. Osisek and Joseph W. {Seigh II}"
+,title="Passive Serialization in a Multitasking Environment"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="1989"
+,number="US Patent 4,809,168 (lapsed)"
+,month="February"
+,pages="11"
+}
+
+@techreport{Pugh90
+,author="William Pugh"
+,title="Concurrent Maintenance of Skip Lists"
+,institution="Institute of Advanced Computer Science Studies, Department of Computer Science, University of Maryland"
+,address="College Park, Maryland"
+,year="1990"
+,number="CS-TR-2222.1"
+,month="June"
+,annotation={
+ Concurrent access to skip lists. Has both weak and strong search.
+ Uses concept of ``garbage queue'', but has no real way of cleaning
+ the garbage efficiently.
+ .
+ Appears to be an independent invention of an RCU-like mechanism.
+}
+}
+
+# Was Adams91, see also syncrefs.bib.
+@Book{Andrews91textbook
+,Author="Gregory R. Andrews"
+,title="Concurrent Programming, Principles, and Practices"
+,Publisher="Benjamin Cummins"
+,Year="1991"
+,annotation={
+ Has a few paragraphs describing ``chaotic relaxation'', a
+ numerical analysis technique that allows multiprocessors to
+ avoid synchronization overhead by using possibly-stale data.
+ .
+ Seems like this is descended from yet another independent
+ invention of RCU-like function -- but this is restricted
+ in that reclamation is not necessary.
+}
+}
+
+@phdthesis{HMassalinPhD
+,author="H. Massalin"
+,title="Synthesis: An Efficient Implementation of Fundamental Operating
+System Services"
+,school="Columbia University"
+,address="New York, NY"
+,year="1992"
+,annotation={
+ Mondo optimizing compiler.
+ Wait-free stuff.
+ Good advice: defer work to avoid synchronization. See page 90
+ (PDF page 106), Section 5.4, fourth bullet point.
+}
+}
+
+@unpublished{Jacobson93
+,author="Van Jacobson"
+,title="Avoid Read-Side Locking Via Delayed Free"
+,year="1993"
+,month="September"
+,note="private communication"
+,annotation={
+ Use fixed time delay to approximate grace period. Very simple,
+ but subject to random memory corruption under heavy load.
+ .
+ Independent invention of RCU-like mechanism.
+}
+}
+
+@Conference{AjuJohn95
+,Author="Aju John"
+,Title="Dynamic vnodes -- Design and Implementation"
+,Booktitle="{USENIX Winter 1995}"
+,Publisher="USENIX Association"
+,Month="January"
+,Year="1995"
+,pages="11-23"
+,Address="New Orleans, LA"
+,note="Available:
+\url{https://www.usenix.org/publications/library/proceedings/neworl/full_papers/john.a}
+[Viewed October 1, 2010]"
+,annotation={
+ Age vnodes out of the cache, and have a fixed time set by a kernel
+ parameter. Not clear that all races were in fact correctly handled.
+ Used a 20-minute time by default, which would most definitely not
+ be suitable during DoS attacks or virus scans.
+ .
+ Apparently independent invention of RCU-like mechanism.
+}
+}
+
+@conference{Pu95a
+,Author = "Calton Pu and Tito Autrey and Andrew Black and Charles Consel and
+Crispin Cowan and Jon Inouye and Lakshmi Kethana and Jonathan Walpole and
+Ke Zhang"
+,Title = "Optimistic Incremental Specialization: Streamlining a Commercial
+,Operating System"
+,Booktitle = "15\textsuperscript{th} ACM Symposium on
+,Operating Systems Principles (SOSP'95)"
+,address = "Copper Mountain, CO"
+,month="December"
+,year="1995"
+,pages="314-321"
+,annotation={
+ Uses a replugger, but with a flag to signal when people are
+ using the resource at hand. Only one reader at a time.
+}
+}
+
+@conference{Cowan96a
+,Author = "Crispin Cowan and Tito Autrey and Charles Krasic and
+,Calton Pu and Jonathan Walpole"
+,Title = "Fast Concurrent Dynamic Linking for an Adaptive Operating System"
+,Booktitle = "International Conference on Configurable Distributed Systems
+(ICCDS'96)"
+,address = "Annapolis, MD"
+,month="May"
+,year="1996"
+,pages="108"
+,isbn="0-8186-7395-8"
+,annotation={
+ Uses a replugger, but with a counter to signal when people are
+ using the resource at hand. Allows multiple readers.
+}
+}
+
+@techreport{Slingwine95
+,author="John D. Slingwine and Paul E. McKenney"
+,title="Apparatus and Method for Achieving Reduced Overhead Mutual
+Exclusion and Maintaining Coherency in a Multiprocessor System
+Utilizing Execution History and Thread Monitoring"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="1995"
+,number="US Patent 5,442,758"
+,month="August"
+,annotation={
+ Describes the parallel RCU infrastructure. Includes NUMA aspect
+ (structure of bitmap can reflect bus structure of computer system).
+ .
+ Another independent invention of an RCU-like mechanism, but the
+ "real" RCU this time!
+}
+}
+
+@techreport{Slingwine97
+,author="John D. Slingwine and Paul E. McKenney"
+,title="Method for Maintaining Data Coherency Using Thread Activity
+Summaries in a Multicomputer System"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="1997"
+,number="US Patent 5,608,893"
+,month="March"
+,pages="19"
+,annotation={
+ Describes use of RCU to synchronize data between a pair of
+ SMP/NUMA computer systems.
+}
+}
+
+@techreport{Slingwine98
+,author="John D. Slingwine and Paul E. McKenney"
+,title="Apparatus and Method for Achieving Reduced Overhead Mutual
+Exclusion and Maintaining Coherency in a Multiprocessor System
+Utilizing Execution History and Thread Monitoring"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="1998"
+,number="US Patent 5,727,209"
+,month="March"
+,annotation={
+ Describes doing an atomic update by copying the data item and
+ then substituting it into the data structure.
+}
+}
+
+@Conference{McKenney98
+,Author="Paul E. McKenney and John D. Slingwine"
+,Title="Read-Copy Update: Using Execution History to Solve Concurrency
+Problems"
+,Booktitle="{Parallel and Distributed Computing and Systems}"
+,Month="October"
+,Year="1998"
+,pages="509-518"
+,Address="Las Vegas, NV"
+,annotation={
+ Describes and analyzes RCU mechanism in DYNIX/ptx. Describes
+ application to linked list update and log-buffer flushing.
+ Defines 'quiescent state'. Includes both measured and analytic
+ evaluation.
+ http://www.rdrop.com/users/paulmck/RCU/rclockpdcsproof.pdf
+ [Viewed December 3, 2007]
+}
+}
+
+@Conference{Gamsa99
+,Author="Ben Gamsa and Orran Krieger and Jonathan Appavoo and Michael Stumm"
+,Title="Tornado: Maximizing Locality and Concurrency in a Shared Memory
+Multiprocessor Operating System"
+,Booktitle="{Proceedings of the 3\textsuperscript{rd} Symposium on
+Operating System Design and Implementation}"
+,Month="February"
+,Year="1999"
+,pages="87-100"
+,Address="New Orleans, LA"
+,annotation={
+ Use of RCU-like facility in K42/Tornado. Another independent
+ invention of RCU.
+ See especially pages 7-9 (Section 5).
+ http://www.usenix.org/events/osdi99/full_papers/gamsa/gamsa.pdf
+ [Viewed August 30, 2006]
+}
+}
+
+@unpublished{RustyRussell2000a
+,Author="Rusty Russell"
+,Title="Re: modular net drivers"
+,month="June"
+,year="2000"
+,day="23"
+,note="Available:
+\url{http://oss.sgi.com/projects/netdev/archive/2000-06/msg00250.html}
+[Viewed April 10, 2006]"
+,annotation={
+ Proto-RCU proposal from Phil Rumpf and Rusty Russell.
+ Yet another independent invention of RCU.
+ Outline of algorithm to unload modules...
+ .
+ Appeared on net-dev mailing list.
+}
+}
+
+@unpublished{RustyRussell2000b
+,Author="Rusty Russell"
+,Title="Re: modular net drivers"
+,month="June"
+,year="2000"
+,day="24"
+,note="Available:
+\url{http://oss.sgi.com/projects/netdev/archive/2000-06/msg00254.html}
+[Viewed April 10, 2006]"
+,annotation={
+ Proto-RCU proposal from Phil Rumpf and Rusty Russell.
+ .
+ Appeared on net-dev mailing list.
+}
+}
+
+@unpublished{McKenney01b
+,Author="Paul E. McKenney and Dipankar Sarma"
+,Title="Read-Copy Update Mutual Exclusion in {Linux}"
+,month="February"
+,year="2001"
+,note="Available:
+\url{http://lse.sourceforge.net/locking/rcu/rcupdate_doc.html}
+[Viewed October 18, 2004]"
+,annotation={
+ Prototypical Linux documentation for RCU.
+}
+}
+
+@techreport{Slingwine01
+,author="John D. Slingwine and Paul E. McKenney"
+,title="Apparatus and Method for Achieving Reduced Overhead Mutual
+Exclusion and Maintaining Coherency in a Multiprocessor System
+Utilizing Execution History and Thread Monitoring"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="2001"
+,number="US Patent 6,219,690"
+,month="April"
+,annotation={
+ 'Change in mode' aspect of RCU. Can be thought of as a lazy barrier.
+}
+}
+
+@Conference{McKenney01a
+,Author="Paul E. McKenney and Jonathan Appavoo and Andi Kleen and
+Orran Krieger and Rusty Russell and Dipankar Sarma and Maneesh Soni"
+,Title="Read-Copy Update"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="July"
+,Year="2001"
+,note="Available:
+\url{http://www.linuxsymposium.org/2001/abstracts/readcopy.php}
+\url{http://www.rdrop.com/users/paulmck/RCU/rclock_OLS.2001.05.01c.pdf}
+[Viewed June 23, 2004]"
+,annotation={
+ Described RCU, and presented some patches implementing and using
+ it in the Linux kernel.
+}
+}
+
+@unpublished{McKenney01f
+,Author="Paul E. McKenney"
+,Title="{RFC:} patch to allow lock-free traversal of lists with insertion"
+,month="October"
+,year="2001"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=100259266316456&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+ Memory-barrier and Alpha thread. 100 messages, not too bad...
+}
+}
+
+@unpublished{Spraul01
+,Author="Manfred Spraul"
+,Title="Re: {RFC:} patch to allow lock-free traversal of lists with insertion"
+,month="October"
+,year="2001"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=100264675012867&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+ Suggested burying memory barriers in Linux's list-manipulation
+ primitives.
+}
+}
+
+@unpublished{LinusTorvalds2001a
+,Author="Linus Torvalds"
+,Title="{Re:} {[Lse-tech]} {Re:} {RFC:} patch to allow lock-free traversal of lists with insertion"
+,month="October"
+,year="2001"
+,note="Available:
+\url{http://lkml.org/lkml/2001/10/13/105}
+[Viewed August 21, 2004]"
+,annotation={
+}
+}
+
+@unpublished{Blanchard02a
+,Author="Anton Blanchard"
+,Title="some RCU dcache and ratcache results"
+,month="March"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=101637107412972&w=2}
+[Viewed October 18, 2004]"
+}
+
+@conference{Michael02b
+,author="Maged M. Michael"
+,title="High Performance Dynamic Lock-Free Hash Tables and List-Based Sets"
+,Year="2002"
+,Month="August"
+,booktitle="{Proceedings of the 14\textsuperscript{th} Annual ACM
+Symposium on Parallel
+Algorithms and Architecture}"
+,pages="73-82"
+,annotation={
+Like the title says...
+}
+}
+
+@Conference{Linder02a
+,Author="Hanna Linder and Dipankar Sarma and Maneesh Soni"
+,Title="Scalability of the Directory Entry Cache"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="June"
+,Year="2002"
+,pages="289-300"
+,annotation={
+ Measured scalability of Linux 2.4 kernel's directory-entry cache
+ (dcache), and measured some scalability enhancements.
+}
+}
+
+@Conference{McKenney02a
+,Author="Paul E. McKenney and Dipankar Sarma and
+Andrea Arcangeli and Andi Kleen and Orran Krieger and Rusty Russell"
+,Title="Read-Copy Update"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="June"
+,Year="2002"
+,pages="338-367"
+,note="Available:
+\url{http://www.linux.org.uk/~ajh/ols2002_proceedings.pdf.gz}
+[Viewed June 23, 2004]"
+,annotation={
+ Presented and compared a number of RCU implementations for the
+ Linux kernel.
+}
+}
+
+@unpublished{Sarma02a
+,Author="Dipankar Sarma"
+,Title="specweb99: dcache scalability results"
+,month="July"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=102645767914212&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+ Compare fastwalk and RCU for dcache. RCU won.
+}
+}
+
+@unpublished{Barbieri02
+,Author="Luca Barbieri"
+,Title="Re: {[PATCH]} Initial support for struct {vfs\_cred}"
+,month="August"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=103082050621241&w=2}
+[Viewed: June 23, 2004]"
+,annotation={
+ Suggested RCU for vfs\_shared\_cred.
+}
+}
+
+@conference{Michael02a
+,author="Maged M. Michael"
+,title="Safe Memory Reclamation for Dynamic Lock-Free Objects Using Atomic
+Reads and Writes"
+,Year="2002"
+,Month="August"
+,booktitle="{Proceedings of the 21\textsuperscript{st} Annual ACM
+Symposium on Principles of Distributed Computing}"
+,pages="21-30"
+,annotation={
+ Each thread keeps an array of pointers to items that it is
+ currently referencing. Sort of an inside-out garbage collection
+ mechanism, but one that requires the accessing code to explicitly
+ state its needs. Also requires read-side memory barriers on
+ most architectures.
+}
+}
+
+@unpublished{Dickins02a
+,author="Hugh Dickins"
+,title="Use RCU for System-V IPC"
+,year="2002"
+,month="October"
+,note="private communication"
+}
+
+@InProceedings{HerlihyLM02
+,author={Maurice Herlihy and Victor Luchangco and Mark Moir}
+,title="The Repeat Offender Problem: A Mechanism for Supporting Dynamic-Sized,
+Lock-Free Data Structures"
+,booktitle={Proceedings of 16\textsuperscript{th} International
+Symposium on Distributed Computing}
+,year=2002
+,month="October"
+,pages="339-353"
+}
+
+@unpublished{Sarma02b
+,Author="Dipankar Sarma"
+,Title="Some dcache\_rcu benchmark numbers"
+,month="October"
+,year="2002"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=103462075416638&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+ Performance of dcache RCU on kernbench for 16x NUMA-Q and 1x,
+ 2x, and 4x systems. RCU does no harm, and helps on 16x.
+}
+}
+
+@unpublished{MingmingCao2002IPCRCU
+,Author="Mingming Cao"
+,Title="[PATCH]updated ipc lock patch"
+,month="October"
+,year="2002"
+,note="Available:
+\url{https://lkml.org/lkml/2002/10/24/262}
+[Viewed February 15, 2014]"
+,annotation={
+ Mingming Cao's patch to introduce RCU to SysV IPC.
+}
+}
+
+@unpublished{LinusTorvalds2003a
+,Author="Linus Torvalds"
+,Title="Re: {[PATCH]} small fixes in brlock.h"
+,month="March"
+,year="2003"
+,note="Available:
+\url{http://lkml.org/lkml/2003/3/9/205}
+[Viewed March 13, 2006]"
+,annotation={
+ Linus suggests replacing brlock with RCU and/or seqlocks:
+ .
+ 'It's entirely possible that the current user could be replaced
+ by RCU and/or seqlocks, and we could get rid of brlocks entirely.'
+ .
+ Steve Hemminger responds by replacing them with RCU.
+}
+}
+
+@article{Appavoo03a
+,author="J. Appavoo and K. Hui and C. A. N. Soules and R. W. Wisniewski and
+D. M. {Da Silva} and O. Krieger and M. A. Auslander and D. J. Edelsohn and
+B. Gamsa and G. R. Ganger and P. McKenney and M. Ostrowski and
+B. Rosenburg and M. Stumm and J. Xenidis"
+,title="Enabling Autonomic Behavior in Systems Software With Hot Swapping"
+,Year="2003"
+,Month="January"
+,journal="IBM Systems Journal"
+,volume="42"
+,number="1"
+,pages="60-76"
+,annotation={
+ Use of RCU to enable hot-swapping for autonomic behavior in K42.
+}
+}
+
+@unpublished{Seigh03
+,author="Joseph W. {Seigh II}"
+,title="Read Copy Update"
+,Year="2003"
+,Month="March"
+,note="email correspondence"
+,annotation={
+ Described the relationship of the VM/XA passive serialization to RCU.
+}
+}
+
+@Conference{Arcangeli03
+,Author="Andrea Arcangeli and Mingming Cao and Paul E. McKenney and
+Dipankar Sarma"
+,Title="Using Read-Copy Update Techniques for {System V IPC} in the
+{Linux} 2.5 Kernel"
+,Booktitle="Proceedings of the 2003 USENIX Annual Technical Conference
+(FREENIX Track)"
+,Publisher="USENIX Association"
+,year="2003"
+,month="June"
+,pages="297-310"
+,annotation={
+ Compared updated RCU implementations for the Linux kernel, and
+ described System V IPC use of RCU, including order-of-magnitude
+ performance improvements.
+ http://www.rdrop.com/users/paulmck/RCU/rcu.FREENIX.2003.06.14.pdf
+}
+}
+
+@Conference{Soules03a
+,Author="Craig A. N. Soules and Jonathan Appavoo and Kevin Hui and
+Dilma {Da Silva} and Gregory R. Ganger and Orran Krieger and
+Michael Stumm and Robert W. Wisniewski and Marc Auslander and
+Michal Ostrowski and Bryan Rosenburg and Jimi Xenidis"
+,Title="System Support for Online Reconfiguration"
+,Booktitle="Proceedings of the 2003 USENIX Annual Technical Conference"
+,Publisher="USENIX Association"
+,year="2003"
+,month="June"
+,pages="141-154"
+}
+
+@article{McKenney03a
+,author="Paul E. McKenney"
+,title="Using {RCU} in the {Linux} 2.5 Kernel"
+,Year="2003"
+,Month="October"
+,journal="Linux Journal"
+,volume="1"
+,number="114"
+,pages="18-26"
+,note="Available:
+\url{http://www.linuxjournal.com/article/6993}
+[Viewed November 14, 2007]"
+,annotation={
+ Reader-friendly intro to RCU, with the infamous old-man-and-brat
+ cartoon.
+}
+}
+
+@unpublished{Sarma03a
+,Author="Dipankar Sarma"
+,Title="RCU low latency patches"
+,month="December"
+,year="2003"
+,note="Message ID: 20031222180114.GA2248@in.ibm.com"
+,annotation={
+ dipankar/ct.2004.03.27/RCUll.2003.12.22.patch
+}
+}
+
+@techreport{Friedberg03a
+,author="Stuart A. Friedberg"
+,title="Lock-Free Wild Card Search Data Structure and Method"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="2003"
+,number="US Patent 6,662,184"
+,month="December"
+,pages="112"
+,annotation={
+ Applies RCU to a wildcard-search Patricia tree in order to permit
+ synchronization-free lookup. RCU is used to retain removed nodes
+ for a grace period before freeing them.
+}
+}
+
+@article{McKenney04a
+,author="Paul E. McKenney and Dipankar Sarma and Maneesh Soni"
+,title="Scaling dcache with {RCU}"
+,Year="2004"
+,Month="January"
+,journal="Linux Journal"
+,volume="1"
+,number="118"
+,pages="38-46"
+,annotation={
+ Reader friendly intro to dcache and RCU.
+ http://www.linuxjournal.com/node/7124
+ [Viewed December 26, 2010]
+}
+}
+
+@Conference{McKenney04b
+,Author="Paul E. McKenney"
+,Title="{RCU} vs. Locking Performance on Different {CPUs}"
+,Booktitle="{linux.conf.au}"
+,Month="January"
+,Year="2004"
+,Address="Adelaide, Australia"
+,note="Available:
+\url{http://www.linux.org.au/conf/2004/abstracts.html#90}
+\url{http://www.rdrop.com/users/paulmck/RCU/lockperf.2004.01.17a.pdf}
+[Viewed June 23, 2004]"
+,annotation={
+ Compares performance of RCU to that of other locking primitives
+ over a number of CPUs (x86, Opteron, Itanium, and PPC).
+}
+}
+
+@unpublished{Sarma04a
+,Author="Dipankar Sarma"
+,Title="{[PATCH]} {RCU} for low latency (experimental)"
+,month="March"
+,year="2004"
+,note="\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108003746402892&w=2}"
+,annotation={
+ Head of thread: dipankar/2004.03.23/rcu-low-lat.1.patch
+}
+}
+
+@unpublished{Sarma04b
+,Author="Dipankar Sarma"
+,Title="Re: {[PATCH]} {RCU} for low latency (experimental)"
+,month="March"
+,year="2004"
+,note="\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108016474829546&w=2}"
+,annotation={
+ dipankar/rcuth.2004.03.24/rcu-throttle.patch
+}
+}
+
+@unpublished{Spraul04a
+,Author="Manfred Spraul"
+,Title="[RFC] 0/5 rcu lock update"
+,month="May"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108546407726602&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+ Hierarchical-bitmap patch for RCU infrastructure.
+}
+}
+
+@unpublished{Steiner04a
+,Author="Jack Steiner"
+,Title="Re: [Lse-tech] [RFC, PATCH] 1/5 rcu lock update:
+Add per-cpu batch counter"
+,month="May"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=108551764515332&w=2}
+[Viewed June 23, 2004]"
+,annotation={
+ RCU runs reasonably on a 512-CPU SGI using Manfred Spraul's patches,
+ which may be found at:
+ https://lkml.org/lkml/2004/5/20/49 (split vars into cachelines)
+ https://lkml.org/lkml/2004/5/22/114 (cpu_quiet() patch)
+ https://lkml.org/lkml/2004/5/25/24 (0/5)
+ https://lkml.org/lkml/2004/5/25/23 (1/5)
+ https://lkml.org/lkml/2004/5/25/265 (works for Jack)
+ https://lkml.org/lkml/2004/5/25/20 (2/5)
+ https://lkml.org/lkml/2004/5/25/22 (3/5)
+ https://lkml.org/lkml/2004/5/25/19 (4/5)
+ https://lkml.org/lkml/2004/5/25/21 (5/5)
+}
+}
+
+@Conference{Sarma04c
+,Author="Dipankar Sarma and Paul E. McKenney"
+,Title="Making {RCU} Safe for Deep Sub-Millisecond Response
+Realtime Applications"
+,Booktitle="Proceedings of the 2004 USENIX Annual Technical Conference
+(FREENIX Track)"
+,Publisher="USENIX Association"
+,year="2004"
+,month="June"
+,pages="182-191"
+,annotation={
+ Describes and compares a number of modifications to the Linux RCU
+ implementation that make it friendly to realtime applications.
+ https://www.usenix.org/conference/2004-usenix-annual-technical-conference/making-rcu-safe-deep-sub-millisecond-response
+ [Viewed July 26, 2012]
+}
+}
+
+@article{MagedMichael04a
+,author="Maged M. Michael"
+,title="Hazard Pointers: Safe Memory Reclamation for Lock-Free Objects"
+,Year="2004"
+,Month="June"
+,journal="IEEE Transactions on Parallel and Distributed Systems"
+,volume="15"
+,number="6"
+,pages="491-504"
+,url="Available:
+\url{http://www.research.ibm.com/people/m/michael/ieeetpds-2004.pdf}
+[Viewed March 1, 2005]"
+,annotation={
+ New canonical hazard-pointer citation.
+}
+}
+
+@phdthesis{PaulEdwardMcKenneyPhD
+,author="Paul E. McKenney"
+,title="Exploiting Deferred Destruction:
+An Analysis of Read-Copy-Update Techniques
+in Operating System Kernels"
+,school="OGI School of Science and Engineering at
+Oregon Health and Sciences University"
+,year="2004"
+,annotation={
+ Describes RCU implementations and presents design patterns
+ corresponding to common uses of RCU in several operating-system
+ kernels.
+ http://www.rdrop.com/users/paulmck/RCU/RCUdissertation.2004.07.14e1.pdf
+ [Viewed October 15, 2004]
+}
+}
+
+@unpublished{PaulEMcKenney2004rcu:dereference
+,Author="Dipankar Sarma"
+,Title="{Re: RCU : Abstracted RCU dereferencing [5/5]}"
+,month="August"
+,year="2004"
+,note="Available:
+\url{http://lkml.org/lkml/2004/8/6/237}
+[Viewed June 8, 2010]"
+,annotation={
+ Introduce rcu_dereference().
+}
+}
+
+@unpublished{JimHouston04a
+,Author="Jim Houston"
+,Title="{[RFC\&PATCH] Alternative {RCU} implementation}"
+,month="August"
+,year="2004"
+,note="Available:
+\url{http://lkml.org/lkml/2004/8/30/87}
+[Viewed February 17, 2005]"
+,annotation={
+ Uses active code in rcu_read_lock() and rcu_read_unlock() to
+ make RCU happen, allowing RCU to function on CPUs that do not
+ receive a scheduling-clock interrupt.
+}
+}
+
+@unpublished{TomHart04a
+,Author="Thomas E. Hart"
+,Title="Master's Thesis: Applying Lock-free Techniques to the {Linux} Kernel"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://www.cs.toronto.edu/~tomhart/masters_thesis.html}
+[Viewed October 15, 2004]"
+,annotation={
+ Proposes comparing RCU to lock-free methods for the Linux kernel.
+}
+}
+
+@unpublished{Vaddagiri04a
+,Author="Srivatsa Vaddagiri"
+,Title="Subject: [RFC] Use RCU for tcp\_ehash lookup"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?t=109395731700004&r=1&w=2}
+[Viewed October 18, 2004]"
+,annotation={
+ Srivatsa's RCU patch for tcp_ehash lookup.
+}
+}
+
+@unpublished{Thirumalai04a
+,Author="Ravikiran Thirumalai"
+,Title="Subject: [patchset] Lockfree fd lookup 0 of 5"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?t=109144217400003&r=1&w=2}
+[Viewed October 18, 2004]"
+,annotation={
+ Ravikiran's lockfree FD patch.
+}
+}
+
+@unpublished{Thirumalai04b
+,Author="Ravikiran Thirumalai"
+,Title="Subject: Re: [patchset] Lockfree fd lookup 0 of 5"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=109152521410459&w=2}
+[Viewed October 18, 2004]"
+,annotation={
+ Ravikiran's lockfree FD patch.
+}
+}
+
+@unpublished{PaulEMcKenney2004rcu:assign:pointer
+,Author="Paul E. McKenney"
+,Title="{[PATCH 1/3] RCU: \url{rcu_assign_pointer()} removal of memory barriers}"
+,month="October"
+,year="2004"
+,note="Available:
+\url{http://lkml.org/lkml/2004/10/23/241}
+[Viewed June 8, 2010]"
+,annotation={
+ Introduce rcu_assign_pointer().
+}
+}
+
+@unpublished{JamesMorris04a
+,Author="James Morris"
+,Title="{[PATCH 2/3] SELinux} scalability - convert {AVC} to {RCU}"
+,day="15"
+,month="November"
+,year="2004"
+,note="\url{http://marc.theaimsgroup.com/?l=linux-kernel&m=110054979416004&w=2}"
+,annotation={
+ James Morris posts Kaigai Kohei's patch to LKML.
+ [Viewed December 10, 2004]
+ Kaigai's patch is at https://lkml.org/lkml/2004/9/27/52
+}
+}
+
+@unpublished{JamesMorris04b
+,Author="James Morris"
+,Title="Recent Developments in {SELinux} Kernel Performance"
+,month="December"
+,year="2004"
+,note="Available:
+\url{http://www.livejournal.com/users/james_morris/2153.html}
+[Viewed December 10, 2004]"
+,annotation={
+ RCU helps SELinux performance. ;-) Made LWN.
+}
+}
+
+@unpublished{PaulMcKenney2005RCUSemantics
+,Author="Paul E. McKenney and Jonathan Walpole"
+,Title="{RCU} Semantics: A First Attempt"
+,month="January"
+,year="2005"
+,day="30"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/rcu-semantics.2005.01.30a.pdf}
+[Viewed December 6, 2009]"
+,annotation={
+ Early derivation of RCU semantics.
+}
+}
+
+@unpublished{PaulMcKenney2005e
+,Author="Paul E. McKenney"
+,Title="Real-Time Preemption and {RCU}"
+,month="March"
+,year="2005"
+,day="17"
+,note="Available:
+\url{http://lkml.org/lkml/2005/3/17/199}
+[Viewed September 5, 2005]"
+,annotation={
+ First posting showing how RCU can be safely adapted for
+ preemptable RCU read side critical sections.
+}
+}
+
+@unpublished{EsbenNeilsen2005a
+,Author="Esben Neilsen"
+,Title="Re: Real-Time Preemption and {RCU}"
+,month="March"
+,year="2005"
+,day="18"
+,note="Available:
+\url{http://lkml.org/lkml/2005/3/18/122}
+[Viewed March 30, 2006]"
+,annotation={
+ Esben Neilsen suggests read-side suppression of grace-period
+ processing for crude-but-workable realtime RCU. The downside
+ is indefinite grace periods... But this is OK for experimentation
+ and testing.
+}
+}
+
+@unpublished{TomHart05a
+,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown"
+,Title="Efficient Memory Reclamation is Necessary for Fast Lock-Free
+Data Structures"
+,month="March"
+,year="2005"
+,note="Available:
+\url{ftp://ftp.cs.toronto.edu/csrg-technical-reports/515/}
+[Viewed March 4, 2005]"
+,annotation={
+ Comparison of RCU, QBSR, and EBSR. RCU wins for read-mostly
+ workloads. ;-)
+}
+}
+
+@unpublished{JonCorbet2005DeprecateSyncKernel
+,Author="Jonathan Corbet"
+,Title="API change: synchronize_kernel() deprecated"
+,month="May"
+,day="3"
+,year="2005"
+,note="Available:
+\url{http://lwn.net/Articles/134484/}
+[Viewed May 3, 2005]"
+,annotation={
+ Jon Corbet describes deprecation of synchronize_kernel()
+ in favor of synchronize_rcu() and synchronize_sched().
+}
+}
+
+@unpublished{PaulMcKenney05a
+,Author="Paul E. McKenney"
+,Title="{[RFC]} {RCU} and {CONFIG\_PREEMPT\_RT} progress"
+,month="May"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/5/9/185}
+[Viewed May 13, 2005]"
+,annotation={
+ First publication of working lock-based deferred free patches
+ for the CONFIG_PREEMPT_RT environment.
+}
+}
+
+@conference{PaulMcKenney05b
+,Author="Paul E. McKenney and Dipankar Sarma"
+,Title="Towards Hard Realtime Response from the {Linux} Kernel on {SMP} Hardware"
+,Booktitle="linux.conf.au 2005"
+,month="April"
+,year="2005"
+,address="Canberra, Australia"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/realtimeRCU.2005.04.23a.pdf}
+[Viewed May 13, 2005]"
+,annotation={
+ Realtime turns into making RCU yet more realtime friendly.
+ http://lca2005.linux.org.au/Papers/Paul%20McKenney/Towards%20Hard%20Realtime%20Response%20from%20the%20Linux%20Kernel/LKS.2005.04.22a.pdf
+}
+}
+
+@unpublished{PaulEMcKenneyHomePage
+,Author="Paul E. McKenney"
+,Title="{Paul} {E.} {McKenney}"
+,month="May"
+,year="2005"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/}
+[Viewed May 25, 2005]"
+,annotation={
+ Paul McKenney's home page.
+}
+}
+
+@unpublished{PaulEMcKenneyRCUPage
+,Author="Paul E. McKenney"
+,Title="Read-Copy Update {(RCU)}"
+,month="May"
+,year="2005"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU}
+[Viewed May 25, 2005]"
+,annotation={
+ Paul McKenney's RCU page.
+}
+}
+
+@unpublished{JosephSeigh2005a
+,Author="Joseph Seigh"
+,Title="{RCU}+{SMR} (hazard pointers)"
+,month="July"
+,year="2005"
+,note="Personal communication"
+,annotation={
+ Joe Seigh announcing his atomic-ptr-plus project.
+ http://sourceforge.net/projects/atomic-ptr-plus/
+}
+}
+
+@unpublished{JosephSeigh2005b
+,Author="Joseph Seigh"
+,Title="Lock-free synchronization primitives"
+,month="July"
+,day="6"
+,year="2005"
+,note="Available:
+\url{http://sourceforge.net/projects/atomic-ptr-plus/}
+[Viewed August 8, 2005]"
+,annotation={
+ Joe Seigh's atomic-ptr-plus project.
+}
+}
+
+@unpublished{PaulMcKenney2005c
+,Author="Paul E.McKenney"
+,Title="{[RFC,PATCH] RCU} and {CONFIG\_PREEMPT\_RT} sane patch"
+,month="August"
+,day="1"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/8/1/155}
+[Viewed March 14, 2006]"
+,annotation={
+ First operating counter-based realtime RCU patch posted to LKML.
+}
+}
+
+@unpublished{PaulMcKenney2005d
+,Author="Paul E. McKenney"
+,Title="Re: [Fwd: Re: [patch] Real-Time Preemption, -RT-2.6.13-rc4-V0.7.52-01]"
+,month="August"
+,day="8"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/8/8/108}
+[Viewed March 14, 2006]"
+,annotation={
+ First operating counter-based realtime RCU patch posted to LKML,
+ but fixed so that various unusual combinations of configuration
+ parameters all function properly.
+}
+}
+
+@unpublished{PaulMcKenney2005rcutorture
+,Author="Paul E. McKenney"
+,Title="{[PATCH]} {RCU} torture testing"
+,month="October"
+,day="1"
+,year="2005"
+,note="Available:
+\url{http://lkml.org/lkml/2005/10/1/70}
+[Viewed March 14, 2006]"
+,annotation={
+ First rcutorture patch.
+}
+}
+
+@unpublished{DavidSMiller2006HashedLocking
+,Author="David S. Miller"
+,Title="Re: [{PATCH}, {RFC}] {RCU} : {OOM} avoidance and lower latency"
+,month="January"
+,day="6"
+,year="2006"
+,note="Available:
+\url{https://lkml.org/lkml/2006/1/7/22}
+[Viewed February 29, 2012]"
+,annotation={
+ David Miller's view on hashed arrays of locks: used to really
+ like it, but time he saw an opportunity for this technique,
+ something else always proved superior. Partitioning or RCU. ;-)
+}
+}
+
+@conference{ThomasEHart2006a
+,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown"
+,Title="Making Lockless Synchronization Fast: Performance Implications
+of Memory Reclamation"
+,Booktitle="20\textsuperscript{th} {IEEE} International Parallel and
+Distributed Processing Symposium"
+,month="April"
+,year="2006"
+,day="25-29"
+,address="Rhodes, Greece"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/hart_ipdps06.pdf}
+[Viewed April 28, 2008]"
+,annotation={
+ Compares QSBR, HPBR, EBR, and lock-free reference counting.
+ http://www.cs.toronto.edu/~tomhart/perflab/ipdps06.tgz
+}
+}
+
+@unpublished{NickPiggin2006radixtree
+,Author="Nick Piggin"
+,Title="[patch 3/3] radix-tree: {RCU} lockless readside"
+,month="June"
+,day="20"
+,year="2006"
+,note="Available:
+\url{http://lkml.org/lkml/2006/6/20/238}
+[Viewed March 25, 2008]"
+,annotation={
+ RCU-protected radix tree.
+}
+}
+
+@Conference{PaulEMcKenney2006b
+,Author="Paul E. McKenney and Dipankar Sarma and Ingo Molnar and
+Suparna Bhattacharya"
+,Title="Extending {RCU} for Realtime and Embedded Workloads"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="July"
+,Year="2006"
+,pages="v2 123-138"
+,note="Available:
+\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+\url{http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf}
+[Viewed January 1, 2007]"
+,annotation={
+ Described how to improve the -rt implementation of realtime RCU.
+}
+}
+
+@unpublished{WikipediaRCU
+,Author="Paul E. McKenney and Chris Purcell and Algae and Ben Schumin and
+Gaius Cornelius and Qwertyus and Neil Conway and Sbw and Blainster and
+Canis Rufus and Zoicon5 and Anome and Hal Eisen"
+,Title="Read-Copy Update"
+,month="July"
+,day="8"
+,year="2006"
+,note="\url{http://en.wikipedia.org/wiki/Read-copy-update}"
+,annotation={
+ Wikipedia RCU page as of July 8 2006.
+ [Viewed August 21, 2006]
+}
+}
+
+@Conference{NickPiggin2006LocklessPageCache
+,Author="Nick Piggin"
+,Title="A Lockless Pagecache in Linux---Introduction, Progress, Performance"
+,Booktitle="{Ottawa Linux Symposium}"
+,Month="July"
+,Year="2006"
+,pages="v2 249-254"
+,note="Available:
+\url{http://www.linuxsymposium.org/2006/view_abstract.php?content_key=184}
+[Viewed January 11, 2009]"
+,annotation={
+ Uses RCU-protected radix tree for a lockless page cache.
+}
+}
+
+@unpublished{PaulEMcKenney2006c
+,Author="Paul E. McKenney"
+,Title="Sleepable {RCU}"
+,month="October"
+,day="9"
+,year="2006"
+,note="Available:
+\url{http://lwn.net/Articles/202847/}
+Revised:
+\url{http://www.rdrop.com/users/paulmck/RCU/srcu.2007.01.14a.pdf}
+[Viewed August 21, 2006]"
+,annotation={
+ LWN article introducing SRCU.
+}
+}
+
+@unpublished{RobertOlsson2006a
+,Author="Robert Olsson and Stefan Nilsson"
+,Title="{TRASH}: A dynamic {LC}-trie and hash data structure"
+,month="August"
+,day="18"
+,year="2006"
+,note="\url{http://www.nada.kth.se/~snilsson/publications/TRASH/trash.pdf}"
+,annotation={
+ RCU-protected dynamic trie-hash combination.
+ [Viewed March 4, 2011]
+}
+}
+
+@unpublished{ChristophHellwig2006RCU2SRCU
+,Author="Christoph Hellwig"
+,Title="Re: {[-mm PATCH 1/4]} {RCU}: split classic rcu"
+,month="September"
+,day="28"
+,year="2006"
+,note="Available:
+\url{http://lkml.org/lkml/2006/9/28/160}
+[Viewed March 27, 2008]"
+}
+
+@unpublished{PaulEMcKenneyRCUusagePage
+,Author="Paul E. McKenney"
+,Title="{RCU} {Linux} Usage"
+,month="October"
+,year="2006"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/linuxusage.html}
+[Viewed January 14, 2007]"
+,annotation={
+ Paul McKenney's RCU page showing graphs plotting Linux-kernel
+ usage of RCU.
+}
+}
+
+@unpublished{PaulEMcKenneyRCUusageRawDataPage
+,Author="Paul E. McKenney"
+,Title="Read-Copy Update {(RCU)} Usage in {Linux} Kernel"
+,month="October"
+,year="2006"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html}
+[Viewed January 14, 2007]"
+,annotation={
+ Paul McKenney's RCU page showing Linux usage of RCU in tabular
+ form, with links to corresponding cscope databases.
+}
+}
+
+@unpublished{GauthamShenoy2006RCUrwlock
+,Author="Gautham R. Shenoy"
+,Title="[PATCH 4/5] lock\_cpu\_hotplug: Redesign - Lightweight implementation of lock\_cpu\_hotplug"
+,month="October"
+,year="2006"
+,day=26
+,note="Available:
+\url{http://lkml.org/lkml/2006/10/26/73}
+[Viewed January 26, 2009]"
+,annotation={
+ RCU-based reader-writer lock that allows readers to proceed with
+ no memory barriers or atomic instruction in absence of writers.
+ If writer do show up, readers must of course wait as required by
+ the semantics of reader-writer locking. This is a recursive
+ lock.
+}
+}
+
+@unpublished{JensAxboe2006SlowSRCU
+,Author="Jens Axboe"
+,Title="Re: [patch] cpufreq: mark \url{cpufreq_tsc()} as
+\url{core_initcall_sync}"
+,month="November"
+,year="2006"
+,day=17
+,note="Available:
+\url{http://lkml.org/lkml/2006/11/17/56}
+[Viewed May 28, 2007]"
+,annotation={
+ SRCU's grace periods are too slow for Jens, even after a
+ factor-of-three speedup.
+ Sped-up version of SRCU at http://lkml.org/lkml/2006/11/17/359.
+}
+}
+
+@unpublished{OlegNesterov2006QRCU
+,Author="Oleg Nesterov"
+,Title="Re: [patch] cpufreq: mark {\tt cpufreq\_tsc()} as
+{\tt core\_initcall\_sync}"
+,month="November"
+,year="2006"
+,day=19
+,note="Available:
+\url{http://lkml.org/lkml/2006/11/19/69}
+[Viewed May 28, 2007]"
+,annotation={
+ First cut of QRCU. Expanded/corrected versions followed.
+ Used to be OlegNesterov2007QRCU, now time-corrected.
+}
+}
+
+@unpublished{OlegNesterov2006aQRCU
+,Author="Oleg Nesterov"
+,Title="Re: [RFC, PATCH 1/2] qrcu: {"quick"} srcu implementation"
+,month="November"
+,year="2006"
+,day=30
+,note="Available:
+\url{http://lkml.org/lkml/2006/11/29/330}
+[Viewed November 26, 2008]"
+,annotation={
+ Expanded/corrected version of QRCU.
+ Used to be OlegNesterov2007aQRCU, now time-corrected.
+}
+}
+
+@unpublished{EvgeniyPolyakov2006RCUslowdown
+,Author="Evgeniy Polyakov"
+,Title="Badness in postponing work"
+,month="December"
+,year="2006"
+,day=05
+,note="Available:
+\url{http://www.ioremap.net/node/41}
+[Viewed October 28, 2008]"
+,annotation={
+ Using RCU as a pure delay leads to a 2.5x slowdown in skbs in
+ the Linux kernel.
+}
+}
+
+@inproceedings{ChrisMatthews2006ClusteredObjectsRCU
+,author = {Matthews, Chris and Coady, Yvonne and Appavoo, Jonathan}
+,title = {Portability events: a programming model for scalable system infrastructures}
+,booktitle = {PLOS '06: Proceedings of the 3rd workshop on Programming languages and operating systems}
+,year = {2006}
+,isbn = {1-59593-577-0}
+,pages = {11}
+,location = {San Jose, California}
+,doi = {http://doi.acm.org/10.1145/1215995.1216006}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+ Uses K42's RCU-like functionality to manage clustered-object
+ lifetimes.
+}
+}
+
+@article{DilmaDaSilva2006K42
+,author = {Silva, Dilma Da and Krieger, Orran and Wisniewski, Robert W. and Waterland, Amos and Tam, David and Baumann, Andrew}
+,title = {K42: an infrastructure for operating system research}
+,journal = {SIGOPS Oper. Syst. Rev.}
+,volume = {40}
+,number = {2}
+,year = {2006}
+,issn = {0163-5980}
+,pages = {34--42}
+,doi = {http://doi.acm.org/10.1145/1131322.1131333}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+ Describes relationship of K42 generations to RCU.
+}
+}
+
+# CoreyMinyard2007list_splice_rcu
+@unpublished{CoreyMinyard2007list:splice:rcu
+,Author="Corey Minyard and Paul E. McKenney"
+,Title="{[PATCH]} add an {RCU} version of list splicing"
+,month="January"
+,year="2007"
+,day=3
+,note="Available:
+\url{http://lkml.org/lkml/2007/1/3/112}
+[Viewed May 28, 2007]"
+,annotation={
+ Patch for list_splice_rcu().
+}
+}
+
+@unpublished{PaulEMcKenney2007rcubarrier
+,Author="Paul E. McKenney"
+,Title="{RCU} and Unloadable Modules"
+,month="January"
+,day="14"
+,year="2007"
+,note="Available:
+\url{http://lwn.net/Articles/217484/}
+[Viewed November 22, 2007]"
+,annotation={
+ LWN article introducing the rcu_barrier() primitive.
+}
+}
+
+@unpublished{PeterZijlstra2007SyncBarrier
+,Author="Peter Zijlstra and Ingo Molnar"
+,Title="{[PATCH 3/7]} barrier: a scalable synchonisation barrier"
+,month="January"
+,year="2007"
+,day=28
+,note="Available:
+\url{http://lkml.org/lkml/2007/1/28/34}
+[Viewed March 27, 2008]"
+,annotation={
+ RCU-like implementation for frequent updaters and rare readers(!).
+ Subsumed into QRCU. Maybe...
+}
+}
+
+@unpublished{PaulEMcKenney2007BoostRCU
+,Author="Paul E. McKenney"
+,Title="Priority-Boosting {RCU} Read-Side Critical Sections"
+,month="February"
+,day="5"
+,year="2007"
+,note="\url{http://lwn.net/Articles/220677/}"
+,annotation={
+ LWN article introducing RCU priority boosting.
+ Revised:
+ http://www.rdrop.com/users/paulmck/RCU/RCUbooststate.2007.04.16a.pdf
+ [Viewed September 7, 2007]
+}
+}
+
+@unpublished{PaulMcKenney2007QRCUpatch
+,Author="Paul E. McKenney"
+,Title="{[PATCH]} {QRCU} with lockless fastpath"
+,month="February"
+,year="2007"
+,day=24
+,note="Available:
+\url{http://lkml.org/lkml/2007/2/25/18}
+[Viewed March 27, 2008]"
+,annotation={
+ Patch for QRCU supplying lock-free fast path.
+}
+}
+
+@article{JonathanAppavoo2007K42RCU
+,author = {Appavoo, Jonathan and Silva, Dilma Da and Krieger, Orran and Auslander, Marc and Ostrowski, Michal and Rosenburg, Bryan and Waterland, Amos and Wisniewski, Robert W. and Xenidis, Jimi and Stumm, Michael and Soares, Livio}
+,title = {Experience distributing objects in an SMMP OS}
+,journal = {ACM Trans. Comput. Syst.}
+,volume = {25}
+,number = {3}
+,year = {2007}
+,issn = {0734-2071}
+,pages = {6/1--6/52}
+,doi = {http://doi.acm.org/10.1145/1275517.1275518}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+ Role of RCU in K42.
+}
+}
+
+@conference{RobertOlsson2007Trash
+,Author="Robert Olsson and Stefan Nilsson"
+,Title="{TRASH}: A dynamic {LC}-trie and hash data structure"
+,booktitle="Workshop on High Performance Switching and Routing (HPSR'07)"
+,month="May"
+,year="2007"
+,note="Available:
+\url{http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4281239}
+[Viewed October 1, 2010]"
+,annotation={
+ RCU-protected dynamic trie-hash combination.
+}
+}
+
+@conference{PeterZijlstra2007ConcurrentPagecacheRCU
+,Author="Peter Zijlstra"
+,Title="Concurrent Pagecache"
+,Booktitle="Linux Symposium"
+,month="June"
+,year="2007"
+,address="Ottawa, Canada"
+,note="Available:
+\url{http://ols.108.redhat.com/2007/Reprints/zijlstra-Reprint.pdf}
+[Viewed April 14, 2008]"
+,annotation={
+ Page-cache modifications permitting RCU readers and concurrent
+ updates.
+}
+}
+
+@unpublished{PaulEMcKenney2007whatisRCU
+,Author="Paul E. McKenney"
+,Title="What is {RCU}?"
+,year="2007"
+,month="07"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/whatisRCU.html}
+[Viewed July 6, 2007]"
+,annotation={
+ Describes RCU in Linux kernel.
+}
+}
+
+@unpublished{PaulEMcKenney2007QRCUspin
+,Author="Paul E. McKenney"
+,Title="Using {Promela} and {Spin} to verify parallel algorithms"
+,month="August"
+,day="1"
+,year="2007"
+,note="Available:
+\url{http://lwn.net/Articles/243851/}
+[Viewed September 8, 2007]"
+,annotation={
+ LWN article describing Promela and spin, and also using Oleg
+ Nesterov's QRCU as an example (with Paul McKenney's fastpath).
+ Merged patch at: http://lkml.org/lkml/2007/2/25/18
+}
+}
+
+@unpublished{PaulEMcKenney2007WG21DDOatomics
+,Author="Paul E. McKenney and Hans-J. Boehm and Lawrence Crowl"
+,Title="C++ Data-Dependency Ordering: Atomics and Memory Model"
+,month="August"
+,day="3"
+,year="2007"
+,note="Available:
+\url{http://open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2664.htm}
+[Viewed December 7, 2009]"
+,annotation={
+ RCU for C++, parts 1 and 2.
+}
+}
+
+@unpublished{PaulEMcKenney2007WG21DDOannotation
+,Author="Paul E. McKenney and Lawrence Crowl"
+,Title="C++ Data-Dependency Ordering: Function Annotation"
+,month="September"
+,day="18"
+,year="2008"
+,note="Available:
+\url{http://open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2782.htm}
+[Viewed December 7, 2009]"
+,annotation={
+ RCU for C++, part 2, updated many times.
+}
+}
+
+@unpublished{PaulEMcKenney2007PreemptibleRCUPatch
+,Author="Paul E. McKenney"
+,Title="[PATCH RFC 0/9] {RCU}: Preemptible {RCU}"
+,month="September"
+,day="10"
+,year="2007"
+,note="Available:
+\url{http://lkml.org/lkml/2007/9/10/213}
+[Viewed October 25, 2007]"
+,annotation={
+ Final patch for preemptable RCU to -rt. (Later patches were
+ to mainline, eventually incorporated.)
+}
+}
+
+@unpublished{PaulEMcKenney2007PreemptibleRCU
+,Author="Paul E. McKenney"
+,Title="The design of preemptible read-copy-update"
+,month="October"
+,day="8"
+,year="2007"
+,note="Available:
+\url{http://lwn.net/Articles/253651/}
+[Viewed October 25, 2007]"
+,annotation={
+ LWN article describing the design of preemptible RCU.
+}
+}
+
+@article{ThomasEHart2007a
+,Author="Thomas E. Hart and Paul E. McKenney and Angela Demke Brown and Jonathan Walpole"
+,Title="Performance of memory reclamation for lockless synchronization"
+,journal="J. Parallel Distrib. Comput."
+,volume={67}
+,number="12"
+,year="2007"
+,issn="0743-7315"
+,pages="1270--1285"
+,doi="http://dx.doi.org/10.1016/j.jpdc.2007.04.010"
+,publisher="Academic Press, Inc."
+,address="Orlando, FL, USA"
+,annotation={
+ Compares QSBR, HPBR, EBR, and lock-free reference counting.
+ Journal version of ThomasEHart2006a.
+}
+}
+
+# MathieuDesnoyers2007call_rcu_schedNeeded
+@unpublished{MathieuDesnoyers2007call:rcu:schedNeeded
+,Author="Mathieu Desnoyers"
+,Title="Re: [patch 1/2] {Linux} Kernel Markers - Support Multiple Probes"
+,month="December"
+,day="20"
+,year="2007"
+,note="Available:
+\url{http://lkml.org/lkml/2007/12/20/244}
+[Viewed March 27, 2008]"
+,annotation={
+ Request for call_rcu_sched() and rcu_barrier_sched().
+}
+}
+
+
+########################################################################
+#
+# "What is RCU?" LWN series.
+#
+# http://lwn.net/Articles/262464/ (What is RCU, Fundamentally?)
+# http://lwn.net/Articles/263130/ (What is RCU's Usage?)
+# http://lwn.net/Articles/264090/ (What is RCU's API?)
+
+@unpublished{PaulEMcKenney2007WhatIsRCUFundamentally
+,Author="Paul E. McKenney and Jonathan Walpole"
+,Title="What is {RCU}, Fundamentally?"
+,month="December"
+,day="17"
+,year="2007"
+,note="Available:
+\url{http://lwn.net/Articles/262464/}
+[Viewed December 27, 2007]"
+,annotation={
+ Lays out the three basic components of RCU: (1) publish-subscribe,
+ (2) wait for pre-existing readers to complete, and (2) maintain
+ multiple versions.
+}
+}
+
+@unpublished{PaulEMcKenney2008WhatIsRCUUsage
+,Author="Paul E. McKenney"
+,Title="What is {RCU}? Part 2: Usage"
+,month="January"
+,day="4"
+,year="2008"
+,note="Available:
+\url{http://lwn.net/Articles/263130/}
+[Viewed January 4, 2008]"
+,annotation={
+ Lays out six uses of RCU:
+ 1. RCU is a Reader-Writer Lock Replacement
+ 2. RCU is a Restricted Reference-Counting Mechanism
+ 3. RCU is a Bulk Reference-Counting Mechanism
+ 4. RCU is a Poor Man's Garbage Collector
+ 5. RCU is a Way of Providing Existence Guarantees
+ 6. RCU is a Way of Waiting for Things to Finish
+}
+}
+
+@unpublished{PaulEMcKenney2008WhatIsRCUAPI
+,Author="Paul E. McKenney"
+,Title="{RCU} part 3: the {RCU} {API}"
+,month="January"
+,day="17"
+,year="2008"
+,note="Available:
+\url{http://lwn.net/Articles/264090/}
+[Viewed January 10, 2008]"
+,annotation={
+ Gives an overview of the Linux-kernel RCU API and a brief annotated RCU
+ bibliography.
+}
+}
+
+#
+# "What is RCU?" LWN series.
+#
+########################################################################
+
+
+@unpublished{SteveRostedt2008dyntickRCUpatch
+,Author="Steven Rostedt and Paul E. McKenney"
+,Title="{[PATCH]} add support for dynamic ticks and preempt rcu"
+,month="January"
+,day="29"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/1/29/208}
+[Viewed March 27, 2008]"
+,annotation={
+ Patch that prevents preemptible RCU from unnecessarily waking
+ up dynticks-idle CPUs.
+}
+}
+
+@unpublished{PaulEMcKenney2008LKMLDependencyOrdering
+,Author="Paul E. McKenney"
+,Title="Re: [PATCH 02/22 -v7] Add basic support for gcc profiler instrumentation"
+,month="February"
+,day="1"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/2/2/255}
+[Viewed October 18, 2008]"
+,annotation={
+ Explanation of compilers violating dependency ordering.
+}
+}
+
+@Conference{PaulEMcKenney2008Beijing
+,Author="Paul E. McKenney"
+,Title="Introducing Technology Into {Linux} Or:
+Introducing your technology Into {Linux} will require introducing a
+lot of {Linux} into your technology!!!"
+,Booktitle="2008 Linux Developer Symposium - China"
+,Publisher="OSS China"
+,Month="February"
+,Year="2008"
+,Address="Beijing, China"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/TechIntroLinux.2008.02.19a.pdf}
+[Viewed August 12, 2008]"
+}
+
+@unpublished{PaulEMcKenney2008dynticksRCU
+,Author="Paul E. McKenney and Steven Rostedt"
+,Title="Integrating and Validating dynticks and Preemptable RCU"
+,month="April"
+,day="24"
+,year="2008"
+,note="Available:
+\url{http://lwn.net/Articles/279077/}
+[Viewed April 24, 2008]"
+,annotation={
+ Describes use of Promela and Spin to validate (and fix!) the
+ dynticks/RCU interface.
+}
+}
+
+@article{DinakarGuniguntala2008IBMSysJ
+,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole"
+,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}"
+,Year="2008"
+,Month="May"
+,journal="IBM Systems Journal"
+,volume="47"
+,number="2"
+,pages="221-236"
+,annotation={
+ RCU, realtime RCU, sleepable RCU, performance.
+ http://www.research.ibm.com/journal/sj/472/guniguntala.pdf
+ [Viewed April 24, 2008]
+}
+}
+
+@unpublished{LaiJiangshan2008NewClassicAlgorithm
+,Author="Lai Jiangshan"
+,Title="[{RFC}][{PATCH}] rcu classic: new algorithm for callbacks-processing"
+,month="June"
+,day="3"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/6/2/539}
+[Viewed December 10, 2008]"
+,annotation={
+ Updated RCU classic algorithm. Introduced multi-tailed list
+ for RCU callbacks and also pulling common code into
+ __call_rcu().
+}
+}
+
+@article{PaulEMcKenney2008RCUOSR
+,author="Paul E. McKenney and Jonathan Walpole"
+,title="Introducing technology into the {Linux} kernel: a case study"
+,Year="2008"
+,journal="SIGOPS Oper. Syst. Rev."
+,volume="42"
+,number="5"
+,pages="4--17"
+,issn="0163-5980"
+,doi={http://doi.acm.org/10.1145/1400097.1400099}
+,publisher="ACM"
+,address="New York, NY, USA"
+,annotation={
+ Linux changed RCU to a far greater degree than RCU has changed Linux.
+ http://portal.acm.org/citation.cfm?doid=1400097.1400099
+}
+}
+
+@unpublished{ManfredSpraul2008StateMachineRCU
+,Author="Manfred Spraul"
+,Title="[{RFC}, {PATCH}] state machine based rcu"
+,month="August"
+,day="21"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/8/21/336}
+[Viewed December 8, 2008]"
+,annotation={
+ State-based RCU. One key thing that this patch does is to
+ separate the dynticks handling of NMIs and IRQs.
+}
+}
+
+@unpublished{ManfredSpraul2008dyntickIRQNMI
+,Author="Manfred Spraul"
+,Title="Re: [{RFC}, {PATCH}] v4 scalable classic {RCU} implementation"
+,month="September"
+,day="6"
+,year="2008"
+,note="Available:
+\url{http://lkml.org/lkml/2008/9/6/86}
+[Viewed December 8, 2008]"
+,annotation={
+ Manfred notes a fix required to my attempt to separate irq
+ and NMI processing for hierarchical RCU's dynticks interface.
+}
+}
+
+# Was PaulEMcKenney2011cyclicRCU
+@techreport{PaulEMcKenney2008cyclicRCU
+,author="Paul E. McKenney"
+,title="Efficient Support of Consistent Cyclic Search With Read-Copy Update"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="2008"
+,number="US Patent 7,426,511"
+,month="September"
+,pages="23"
+,annotation={
+ Maintains an additional level of indirection to allow
+ readers to confine themselves to the desired snapshot of the
+ data structure. Only permits one update at a time.
+}
+}
+
+@unpublished{PaulEMcKenney2008HierarchicalRCU
+,Author="Paul E. McKenney"
+,Title="Hierarchical {RCU}"
+,month="November"
+,day="3"
+,year="2008"
+,note="\url{http://lwn.net/Articles/305782/}"
+,annotation={
+ RCU with combining-tree-based grace-period detection,
+ permitting it to handle thousands of CPUs.
+ [Viewed November 6, 2008]
+}
+}
+
+@unpublished{PaulEMcKenney2009BloatwatchRCU
+,Author="Paul E. McKenney"
+,Title="Re: [PATCH fyi] RCU: the bloatwatch edition"
+,month="January"
+,day="14"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/1/14/449}
+[Viewed January 15, 2009]"
+,annotation={
+ Small-footprint implementation of RCU for uniprocessor
+ embedded applications -- and also for exposition purposes.
+}
+}
+
+@conference{PaulEMcKenney2009MaliciousURCU
+,Author="Paul E. McKenney"
+,Title="Using a Malicious User-Level {RCU} to Torture {RCU}-Based Algorithms"
+,Booktitle="linux.conf.au 2009"
+,month="January"
+,year="2009"
+,address="Hobart, Australia"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/urcutorture.2009.01.22a.pdf}
+[Viewed February 2, 2009]"
+,annotation={
+ Realtime RCU and torture-testing RCU uses.
+}
+}
+
+@unpublished{MathieuDesnoyers2009URCU
+,Author="Mathieu Desnoyers"
+,Title="[{RFC} git tree] Userspace {RCU} (urcu) for {Linux}"
+,month="February"
+,day="5"
+,year="2009"
+,note="\url{http://lttng.org/urcu}"
+,annotation={
+ Mathieu Desnoyers's user-space RCU implementation.
+ git://lttng.org/userspace-rcu.git
+ http://lttng.org/cgi-bin/gitweb.cgi?p=userspace-rcu.git
+ http://lttng.org/urcu
+ http://lkml.org/lkml/2009/2/5/572
+}
+}
+
+@unpublished{PaulEMcKenney2009LWNBloatWatchRCU
+,Author="Paul E. McKenney"
+,Title="{RCU}: The {Bloatwatch} Edition"
+,month="March"
+,day="17"
+,year="2009"
+,note="Available:
+\url{http://lwn.net/Articles/323929/}
+[Viewed March 20, 2009]"
+,annotation={
+ Uniprocessor assumptions allow simplified RCU implementation.
+}
+}
+
+@unpublished{EvgeniyPolyakov2009EllipticsNetwork
+,Author="Evgeniy Polyakov"
+,Title="The Elliptics Network"
+,month="April"
+,day="17"
+,year="2009"
+,note="Available:
+\url{http://www.ioremap.net/projects/elliptics}
+[Viewed April 30, 2009]"
+,annotation={
+ Distributed hash table with transactions, using elliptic
+ hash functions to distribute data.
+}
+}
+
+@unpublished{PaulEMcKenney2009expeditedRCU
+,Author="Paul E. McKenney"
+,Title="[{PATCH} -tip 0/3] expedited 'big hammer' {RCU} grace periods"
+,month="June"
+,day="25"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/6/25/306}
+[Viewed August 16, 2009]"
+,annotation={
+ First posting of expedited RCU to be accepted into -tip.
+}
+}
+
+@unpublished{PaulEMcKenney2009fastRTRCU
+,Author="Paul E. McKenney"
+,Title="[{PATCH} {RFC} -tip 0/4] {RCU} cleanups and simplified preemptable {RCU}"
+,month="July"
+,day="23"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/7/23/294}
+[Viewed August 15, 2009]"
+,annotation={
+ First posting of simple and fast preemptable RCU.
+}
+}
+
+@unpublished{JoshTriplett2009RPHash
+,Author="Josh Triplett"
+,Title="Scalable concurrent hash tables via relativistic programming"
+,month="September"
+,year="2009"
+,note="Linux Plumbers Conference presentation"
+,annotation={
+ RP fun with hash tables.
+ Superseded by JoshTriplett2010RPHash
+}
+}
+
+@phdthesis{MathieuDesnoyersPhD
+, title = "Low-Impact Operating System Tracing"
+, author = "Mathieu Desnoyers"
+, school = "Ecole Polytechnique de Montr\'{e}al"
+, month = "December"
+, year = 2009
+,note="Available:
+\url{http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf}
+[Viewed December 9, 2009]"
+,annotation={
+ Chapter 6 (page 97) covers user-level RCU.
+}
+}
+
+@unpublished{RelativisticProgrammingWiki
+,Author="Josh Triplett and Paul E. McKenney and Jonathan Walpole"
+,Title="Relativistic Programming"
+,month="September"
+,year="2009"
+,note="Available:
+\url{http://wiki.cs.pdx.edu/rp/}
+[Viewed December 9, 2009]"
+,annotation={
+ Main Relativistic Programming Wiki.
+}
+}
+
+@conference{PaulEMcKenney2009DeterministicRCU
+,Author="Paul E. McKenney"
+,Title="Deterministic Synchronization in Multicore Systems: the Role of {RCU}"
+,Booktitle="Eleventh Real Time Linux Workshop"
+,month="September"
+,year="2009"
+,address="Dresden, Germany"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/realtime/paper/DetSyncRCU.2009.08.18a.pdf}
+[Viewed January 14, 2009]"
+}
+
+@unpublished{PaulEMcKenney2009HuntingHeisenbugs
+,Author="Paul E. McKenney"
+,Title="Hunting Heisenbugs"
+,month="November"
+,year="2009"
+,day="1"
+,note="Available:
+\url{http://paulmck.livejournal.com/14639.html}
+[Viewed June 4, 2010]"
+,annotation={
+ Day-one bug in Tree RCU that took forever to track down.
+}
+}
+
+@unpublished{MathieuDesnoyers2009defer:rcu
+,Author="Mathieu Desnoyers"
+,Title="Kernel RCU: shrink the size of the struct rcu\_head"
+,month="December"
+,year="2009"
+,note="Available:
+\url{http://lkml.org/lkml/2009/10/18/129}
+[Viewed December 29, 2009]"
+,annotation={
+ Mathieu proposed defer_rcu() with fixed-size per-thread pool
+ of RCU callbacks.
+}
+}
+
+@unpublished{MathieuDesnoyers2009VerifPrePub
+,Author="Mathieu Desnoyers and Paul E. McKenney and Michel R. Dagenais"
+,Title="Multi-Core Systems Modeling for Formal Verification of Parallel Algorithms"
+,month="December"
+,year="2009"
+,note="Submitted to IEEE TPDS"
+,annotation={
+ OOMem model for Mathieu's user-level RCU mechanical proof of
+ correctness.
+}
+}
+
+@unpublished{MathieuDesnoyers2009URCUPrePub
+,Author="Mathieu Desnoyers and Paul E. McKenney and Alan Stern and Michel R. Dagenais and Jonathan Walpole"
+,Title="User-Level Implementations of Read-Copy Update"
+,month="December"
+,year="2010"
+,url={\url{http://www.computer.org/csdl/trans/td/2012/02/ttd2012020375-abs.html}}
+,annotation={
+ RCU overview, desiderata, semi-formal semantics, user-level RCU
+ usage scenarios, three classes of RCU implementation, wait-free
+ RCU updates, RCU grace-period batching, update overhead,
+ http://www.rdrop.com/users/paulmck/RCU/urcu-main-accepted.2011.08.30a.pdf
+ http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
+ Superseded by MathieuDesnoyers2012URCU.
+}
+}
+
+@inproceedings{HariKannan2009DynamicAnalysisRCU
+,author = {Kannan, Hari}
+,title = {Ordering decoupled metadata accesses in multiprocessors}
+,booktitle = {MICRO 42: Proceedings of the 42nd Annual IEEE/ACM International Symposium on Microarchitecture}
+,year = {2009}
+,isbn = {978-1-60558-798-1}
+,pages = {381--390}
+,location = {New York, New York}
+,doi = {http://doi.acm.org/10.1145/1669112.1669161}
+,publisher = {ACM}
+,address = {New York, NY, USA}
+,annotation={
+ Uses RCU to protect metadata used in dynamic analysis.
+}
+}
+
+@conference{PaulEMcKenney2010SimpleOptRCU
+,Author="Paul E. McKenney"
+,Title="Simplicity Through Optimization"
+,Booktitle="linux.conf.au 2010"
+,month="January"
+,year="2010"
+,address="Wellington, New Zealand"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/SimplicityThruOptimization.2010.01.21f.pdf}
+[Viewed October 10, 2010]"
+,annotation={
+ TREE_PREEMPT_RCU optimizations greatly simplified the old
+ PREEMPT_RCU implementation.
+}
+}
+
+@unpublished{PaulEMcKenney2010LockdepRCU
+,Author="Paul E. McKenney"
+,Title="Lockdep-{RCU}"
+,month="February"
+,year="2010"
+,day="1"
+,note="\url{https://lwn.net/Articles/371986/}"
+,annotation={
+ CONFIG_PROVE_RCU, or at least an early version.
+ [Viewed June 4, 2010]
+}
+}
+
+@unpublished{AviKivity2010KVM2RCU
+,Author="Avi Kivity"
+,Title="[{PATCH} 37/40] {KVM}: Bump maximum vcpu count to 64"
+,month="February"
+,year="2010"
+,note="Available:
+\url{http://www.mail-archive.com/kvm@vger.kernel.org/msg28640.html}
+[Viewed March 20, 2010]"
+,annotation={
+ Use of RCU permits KVM to increase the size of guest OSes from
+ 16 CPUs to 64 CPUs.
+}
+}
+
+@unpublished{HerbertXu2010RCUResizeHash
+,Author="Herbert Xu"
+,Title="bridge: Add core IGMP snooping support"
+,month="February"
+,year="2010"
+,note="Available:
+\url{http://thread.gmane.org/gmane.linux.network/153338}
+[Viewed June 9, 2014]"
+,annotation={
+ Use a pair of list_head structures to support RCU-protected
+ resizable hash tables.
+}
+}
+
+@mastersthesis{AbhinavDuggal2010Masters
+,author="Abhinav Duggal"
+,title="Stopping Data Races Using Redflag"
+,school="Stony Brook University"
+,year="2010"
+,annotation={
+ Data-race detector incorporating RCU.
+ http://www.filesystems.org/docs/abhinav-thesis/abhinav_thesis.pdf
+}
+}
+
+@article{JoshTriplett2010RPHash
+,author="Josh Triplett and Paul E. McKenney and Jonathan Walpole"
+,title="Scalable Concurrent Hash Tables via Relativistic Programming"
+,journal="ACM Operating Systems Review"
+,year=2010
+,volume=44
+,number=3
+,month="July"
+,annotation={
+ RP fun with hash tables.
+ http://portal.acm.org/citation.cfm?id=1842733.1842750
+}
+}
+
+@unpublished{PaulEMcKenney2010RCUAPI
+,Author="Paul E. McKenney"
+,Title="The {RCU} {API}, 2010 Edition"
+,month="December"
+,day="8"
+,year="2010"
+,note="\url{http://lwn.net/Articles/418853/}"
+,annotation={
+ Includes updated software-engineering features.
+ [Viewed December 8, 2010]
+}
+}
+
+@mastersthesis{AndrejPodzimek2010masters
+,author="Andrej Podzimek"
+,title="Read-Copy-Update for OpenSolaris"
+,school="Charles University in Prague"
+,year="2010"
+,note="Available:
+\url{https://andrej.podzimek.org/thesis.pdf}
+[Viewed January 31, 2011]"
+,annotation={
+ Reviews RCU implementations and creates a few for OpenSolaris.
+ Drives quiescent-state detection from RCU read-side primitives,
+ in a manner roughly similar to that of Jim Houston.
+}
+}
+
+@unpublished{LinusTorvalds2011Linux2:6:38:rc1:NPigginVFS
+,Author="Linus Torvalds"
+,Title="Linux 2.6.38-rc1"
+,month="January"
+,year="2011"
+,note="Available:
+\url{https://lkml.org/lkml/2011/1/18/322}
+[Viewed March 4, 2011]"
+,annotation={
+ "The RCU-based name lookup is at the other end of the spectrum - the
+ absolute anti-gimmick. It's some seriously good stuff, and gets rid of
+ the last main global lock that really tends to hurt some kernel loads.
+ The dentry lock is no longer a big serializing issue. What's really
+ nice about it is that it actually improves performance a lot even for
+ single-threaded loads (on an SMP kernel), because it gets rid of some
+ of the most expensive parts of path component lookup, which was the
+ d_lock on every component lookup. So I'm seeing improvements of 30-50%
+ on some seriously pathname-lookup intensive loads."
+}
+}
+
+@techreport{JoshTriplett2011RPScalableCorrectOrdering
+,author = {Josh Triplett and Philip W. Howard and Paul E. McKenney and Jonathan Walpole}
+,title = {Scalable Correct Memory Ordering via Relativistic Programming}
+,year = {2011}
+,number = {11-03}
+,institution = {Portland State University}
+,note = {\url{http://www.cs.pdx.edu/pdfs/tr1103.pdf}}
+}
+
+@inproceedings{PhilHoward2011RCUTMRBTree
+,author = {Philip W. Howard and Jonathan Walpole}
+,title = {A Relativistic Enhancement to Software Transactional Memory}
+,booktitle = {Proceedings of the 3rd USENIX conference on Hot topics in parallelism}
+,series = {HotPar'11}
+,year = {2011}
+,location = {Berkeley, CA}
+,pages = {1--6}
+,numpages = {6}
+,url = {http://www.usenix.org/event/hotpar11/tech/final_files/Howard.pdf}
+,publisher = {USENIX Association}
+,address = {Berkeley, CA, USA}
+}
+
+@techreport{PaulEMcKenney2011cyclicparallelRCU
+,author="Paul E. McKenney and Jonathan Walpole"
+,title="Efficient Support of Consistent Cyclic Search With Read-Copy Update and Parallel Updates"
+,institution="US Patent and Trademark Office"
+,address="Washington, DC"
+,year="2011"
+,number="US Patent 7,953,778"
+,month="May"
+,pages="34"
+,annotation={
+ Maintains an array of generation numbers to track in-flight
+ updates and keeps an additional level of indirection to allow
+ readers to confine themselves to the desired snapshot of the
+ data structure.
+}
+}
+
+@inproceedings{Triplett:2011:RPHash
+,author = {Triplett, Josh and McKenney, Paul E. and Walpole, Jonathan}
+,title = {Resizable, Scalable, Concurrent Hash Tables via Relativistic Programming}
+,booktitle = {Proceedings of the 2011 USENIX Annual Technical Conference}
+,month = {June}
+,year = {2011}
+,pages = {145--158}
+,numpages = {14}
+,url={http://www.usenix.org/event/atc11/tech/final_files/Triplett.pdf}
+,publisher = {The USENIX Association}
+,address = {Portland, OR USA}
+}
+
+@unpublished{PaulEMcKenney2011RCU3.0trainwreck
+,Author="Paul E. McKenney"
+,Title="3.0 and {RCU:} what went wrong"
+,month="July"
+,day="27"
+,year="2011"
+,note="\url{http://lwn.net/Articles/453002/}"
+,annotation={
+ Analysis of the RCU trainwreck in Linux kernel 3.0.
+ [Viewed July 27, 2011]
+}
+}
+
+@unpublished{NeilBrown2011MeetTheLockers
+,Author="Neil Brown"
+,Title="Meet the {Lockers}"
+,month="August"
+,day="3"
+,year="2011"
+,note="Available:
+\url{http://lwn.net/Articles/453685/}
+[Viewed September 2, 2011]"
+,annotation={
+ The Locker family as an analogy for locking, reference counting,
+ RCU, and seqlock.
+}
+}
+
+@inproceedings{Seyster:2011:RFA:2075416.2075425
+,author = {Seyster, Justin and Radhakrishnan, Prabakar and Katoch, Samriti and Duggal, Abhinav and Stoller, Scott D. and Zadok, Erez}
+,title = {Redflag: a framework for analysis of Kernel-level concurrency}
+,booktitle = {Proceedings of the 11th international conference on Algorithms and architectures for parallel processing - Volume Part I}
+,series = {ICA3PP'11}
+,year = {2011}
+,isbn = {978-3-642-24649-4}
+,location = {Melbourne, Australia}
+,pages = {66--79}
+,numpages = {14}
+,url = {http://dl.acm.org/citation.cfm?id=2075416.2075425}
+,acmid = {2075425}
+,publisher = {Springer-Verlag}
+,address = {Berlin, Heidelberg}
+}
+
+@phdthesis{JoshTriplettPhD
+,author="Josh Triplett"
+,title="Relativistic Causal Ordering: A Memory Model for Scalable Concurrent Data Structures"
+,school="Portland State University"
+,year="2012"
+,annotation={
+ RCU-protected hash tables, barriers vs. read-side traversal order.
+ .
+ If the updater is making changes in the opposite direction from
+ the read-side traveral order, the updater need only execute a
+ memory-barrier instruction, but if in the same direction, the
+ updater needs to wait for a grace period between the individual
+ updates.
+}
+}
+
+@article{MathieuDesnoyers2012URCU
+,Author="Mathieu Desnoyers and Paul E. McKenney and Alan Stern and Michel R. Dagenais and Jonathan Walpole"
+,Title="User-Level Implementations of Read-Copy Update"
+,journal="IEEE Transactions on Parallel and Distributed Systems"
+,volume={23}
+,year="2012"
+,issn="1045-9219"
+,pages="375-382"
+,doi="http://doi.ieeecomputersociety.org/10.1109/TPDS.2011.159"
+,publisher="IEEE Computer Society"
+,address="Los Alamitos, CA, USA"
+,annotation={
+ RCU overview, desiderata, semi-formal semantics, user-level RCU
+ usage scenarios, three classes of RCU implementation, wait-free
+ RCU updates, RCU grace-period batching, update overhead,
+ http://www.rdrop.com/users/paulmck/RCU/urcu-main-accepted.2011.08.30a.pdf
+ http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
+ http://www.computer.org/cms/Computer.org/dl/trans/td/2012/02/extras/ttd2012020375s.pdf
+}
+}
+
+@inproceedings{AustinClements2012RCULinux:mmapsem
+,author = {Austin Clements and Frans Kaashoek and Nickolai Zeldovich}
+,title = {Scalable Address Spaces Using {RCU} Balanced Trees}
+,booktitle = {Architectural Support for Programming Languages and Operating Systems (ASPLOS 2012)}
+,month = {March}
+,year = {2012}
+,pages = {199--210}
+,numpages = {12}
+,publisher = {ACM}
+,address = {London, UK}
+,url="http://people.csail.mit.edu/nickolai/papers/clements-bonsai.pdf"
+}
+
+@unpublished{PaulEMcKenney2012ELCbattery
+,Author="Paul E. McKenney"
+,Title="Making {RCU} Safe For Battery-Powered Devices"
+,month="February"
+,day="15"
+,year="2012"
+,note="Available:
+\url{http://www.rdrop.com/users/paulmck/RCU/RCUdynticks.2012.02.15b.pdf}
+[Viewed March 1, 2012]"
+,annotation={
+ RCU_FAST_NO_HZ, round 2.
+}
+}
+
+@article{GuillermoVigueras2012RCUCrowd
+,author = {Vigueras, Guillermo and Ordu\~{n}a, Juan M. and Lozano, Miguel}
+,day = {25}
+,doi = {10.1007/s11227-012-0766-x}
+,issn = {0920-8542}
+,journal = {The Journal of Supercomputing}
+,keywords = {linux, simulation}
+,month = apr
+,posted-at = {2012-05-03 09:12:04}
+,priority = {2}
+,title = {{A Read-Copy Update based parallel server for distributed crowd simulations}}
+,url = {http://dx.doi.org/10.1007/s11227-012-0766-x}
+,year = {2012}
+}
+
+
+@unpublished{JonCorbet2012ACCESS:ONCE
+,Author="Jon Corbet"
+,Title="{ACCESS\_ONCE()}"
+,month="August"
+,day="1"
+,year="2012"
+,note="\url{http://lwn.net/Articles/508991/}"
+,annotation={
+ A couple of simple specific compiler optimizations that motivate
+ ACCESS_ONCE().
+}
+}
+
+@unpublished{AlexeyGotsman2012VerifyGraceExtended
+,Author="Alexey Gotsman and Noam Rinetzky and Hongseok Yang"
+,Title="Verifying Highly Concurrent Algorithms with Grace (extended version)"
+,month="July"
+,day="10"
+,year="2012"
+,note="\url{http://software.imdea.org/~gotsman/papers/recycling-esop13-ext.pdf}"
+,annotation={
+ Separation-logic formulation of RCU uses.
+}
+}
+
+@unpublished{PaulMcKenney2012RCUUsage
+,Author="Paul E. McKenney and Silas Boyd-Wickizer and Jonathan Walpole"
+,Title="{RCU} Usage In the Linux Kernel: One Decade Later"
+,month="September"
+,day="17"
+,year="2012"
+,url=http://rdrop.com/users/paulmck/techreports/survey.2012.09.17a.pdf
+,note="Technical report paulmck.2012.09.17"
+,annotation={
+ Overview of the first variant of no-CBs CPUs for RCU.
+}
+}
+
+@unpublished{JonCorbet2012NOCB
+,Author="Jon Corbet"
+,Title="Relocating RCU callbacks"
+,month="October"
+,day="31"
+,year="2012"
+,note="\url{http://lwn.net/Articles/522262/}"
+,annotation={
+ Overview of the first variant of no-CBs CPUs for RCU.
+}
+}
+
+@phdthesis{JustinSeyster2012PhD
+,author="Justin Seyster"
+,title="Runtime Verification of Kernel-Level Concurrency Using Compiler-Based Instrumentation"
+,school="Stony Brook University"
+,year="2012"
+,annotation={
+ Looking for data races, including those involving RCU.
+ Proposal:
+ http://www.fsl.cs.sunysb.edu/docs/jseyster-proposal/redflag.pdf
+ Dissertation:
+ http://www.fsl.cs.sunysb.edu/docs/jseyster-dissertation/redflag.pdf
+}
+}
+
+@unpublished{PaulEMcKenney2013RCUUsage
+,Author="Paul E. McKenney and Silas Boyd-Wickizer and Jonathan Walpole"
+,Title="{RCU} Usage in the {Linux} Kernel: One Decade Later"
+,month="February"
+,day="24"
+,year="2013"
+,note="\url{http://rdrop.com/users/paulmck/techreports/RCUUsage.2013.02.24a.pdf}"
+,annotation={
+ Usage of RCU within the Linux kernel.
+}
+}
+
+@inproceedings{AlexeyGotsman2013ESOPRCU
+,author = {Alexey Gotsman and Noam Rinetzky and Hongseok Yang}
+,title = {Verifying concurrent memory reclamation algorithms with grace}
+,booktitle = {ESOP'13: European Symposium on Programming}
+,year = {2013}
+,pages = {249--269}
+,publisher = {Springer}
+,address = {Rome, Italy}
+,annotation={
+ http://software.imdea.org/~gotsman/papers/recycling-esop13.pdf
+}
+}
+
+@unpublished{PaulEMcKenney2013NoTinyPreempt
+,Author="Paul E. McKenney"
+,Title="Simplifying RCU"
+,month="March"
+,day="6"
+,year="2013"
+,note="\url{http://lwn.net/Articles/541037/}"
+,annotation={
+ Getting rid of TINY_PREEMPT_RCU.
+}
+}
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt
new file mode 100644
index 000000000..90ec5341e
--- /dev/null
+++ b/Documentation/RCU/UP.txt
@@ -0,0 +1,135 @@
+RCU on Uniprocessor Systems
+
+
+A common misconception is that, on UP systems, the call_rcu() primitive
+may immediately invoke its function. The basis of this misconception
+is that since there is only one CPU, it should not be necessary to
+wait for anything else to get done, since there are no other CPUs for
+anything else to be happening on. Although this approach will -sort- -of-
+work a surprising amount of the time, it is a very bad idea in general.
+This document presents three examples that demonstrate exactly how bad
+an idea this is.
+
+
+Example 1: softirq Suicide
+
+Suppose that an RCU-based algorithm scans a linked list containing
+elements A, B, and C in process context, and can delete elements from
+this same list in softirq context. Suppose that the process-context scan
+is referencing element B when it is interrupted by softirq processing,
+which deletes element B, and then invokes call_rcu() to free element B
+after a grace period.
+
+Now, if call_rcu() were to directly invoke its arguments, then upon return
+from softirq, the list scan would find itself referencing a newly freed
+element B. This situation can greatly decrease the life expectancy of
+your kernel.
+
+This same problem can occur if call_rcu() is invoked from a hardware
+interrupt handler.
+
+
+Example 2: Function-Call Fatality
+
+Of course, one could avert the suicide described in the preceding example
+by having call_rcu() directly invoke its arguments only if it was called
+from process context. However, this can fail in a similar manner.
+
+Suppose that an RCU-based algorithm again scans a linked list containing
+elements A, B, and C in process contexts, but that it invokes a function
+on each element as it is scanned. Suppose further that this function
+deletes element B from the list, then passes it to call_rcu() for deferred
+freeing. This may be a bit unconventional, but it is perfectly legal
+RCU usage, since call_rcu() must wait for a grace period to elapse.
+Therefore, in this case, allowing call_rcu() to immediately invoke
+its arguments would cause it to fail to make the fundamental guarantee
+underlying RCU, namely that call_rcu() defers invoking its arguments until
+all RCU read-side critical sections currently executing have completed.
+
+Quick Quiz #1: why is it -not- legal to invoke synchronize_rcu() in
+ this case?
+
+
+Example 3: Death by Deadlock
+
+Suppose that call_rcu() is invoked while holding a lock, and that the
+callback function must acquire this same lock. In this case, if
+call_rcu() were to directly invoke the callback, the result would
+be self-deadlock.
+
+In some cases, it would possible to restructure to code so that
+the call_rcu() is delayed until after the lock is released. However,
+there are cases where this can be quite ugly:
+
+1. If a number of items need to be passed to call_rcu() within
+ the same critical section, then the code would need to create
+ a list of them, then traverse the list once the lock was
+ released.
+
+2. In some cases, the lock will be held across some kernel API,
+ so that delaying the call_rcu() until the lock is released
+ requires that the data item be passed up via a common API.
+ It is far better to guarantee that callbacks are invoked
+ with no locks held than to have to modify such APIs to allow
+ arbitrary data items to be passed back up through them.
+
+If call_rcu() directly invokes the callback, painful locking restrictions
+or API changes would be required.
+
+Quick Quiz #2: What locking restriction must RCU callbacks respect?
+
+
+Summary
+
+Permitting call_rcu() to immediately invoke its arguments breaks RCU,
+even on a UP system. So do not do it! Even on a UP system, the RCU
+infrastructure -must- respect grace periods, and -must- invoke callbacks
+from a known environment in which no locks are held.
+
+It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
+immediately on an UP system. It is also safe for synchronize_rcu()
+to return immediately on UP systems, except when running preemptable
+RCU.
+
+Quick Quiz #3: Why can't synchronize_rcu() return immediately on
+ UP systems running preemptable RCU?
+
+
+Answer to Quick Quiz #1:
+ Why is it -not- legal to invoke synchronize_rcu() in this case?
+
+ Because the calling function is scanning an RCU-protected linked
+ list, and is therefore within an RCU read-side critical section.
+ Therefore, the called function has been invoked within an RCU
+ read-side critical section, and is not permitted to block.
+
+Answer to Quick Quiz #2:
+ What locking restriction must RCU callbacks respect?
+
+ Any lock that is acquired within an RCU callback must be
+ acquired elsewhere using an _irq variant of the spinlock
+ primitive. For example, if "mylock" is acquired by an
+ RCU callback, then a process-context acquisition of this
+ lock must use something like spin_lock_irqsave() to
+ acquire the lock.
+
+ If the process-context code were to simply use spin_lock(),
+ then, since RCU callbacks can be invoked from softirq context,
+ the callback might be called from a softirq that interrupted
+ the process-context critical section. This would result in
+ self-deadlock.
+
+ This restriction might seem gratuitous, since very few RCU
+ callbacks acquire locks directly. However, a great many RCU
+ callbacks do acquire locks -indirectly-, for example, via
+ the kfree() primitive.
+
+Answer to Quick Quiz #3:
+ Why can't synchronize_rcu() return immediately on UP systems
+ running preemptable RCU?
+
+ Because some other task might have been preempted in the middle
+ of an RCU read-side critical section. If synchronize_rcu()
+ simply immediately returned, it would prematurely signal the
+ end of the grace period, which would come as a nasty shock to
+ that other thread when it started running again.
diff --git a/Documentation/RCU/arrayRCU.txt b/Documentation/RCU/arrayRCU.txt
new file mode 100644
index 000000000..453ebe695
--- /dev/null
+++ b/Documentation/RCU/arrayRCU.txt
@@ -0,0 +1,141 @@
+Using RCU to Protect Read-Mostly Arrays
+
+
+Although RCU is more commonly used to protect linked lists, it can
+also be used to protect arrays. Three situations are as follows:
+
+1. Hash Tables
+
+2. Static Arrays
+
+3. Resizeable Arrays
+
+Each of these situations are discussed below.
+
+
+Situation 1: Hash Tables
+
+Hash tables are often implemented as an array, where each array entry
+has a linked-list hash chain. Each hash chain can be protected by RCU
+as described in the listRCU.txt document. This approach also applies
+to other array-of-list situations, such as radix trees.
+
+
+Situation 2: Static Arrays
+
+Static arrays, where the data (rather than a pointer to the data) is
+located in each array element, and where the array is never resized,
+have not been used with RCU. Rik van Riel recommends using seqlock in
+this situation, which would also have minimal read-side overhead as long
+as updates are rare.
+
+Quick Quiz: Why is it so important that updates be rare when
+ using seqlock?
+
+
+Situation 3: Resizeable Arrays
+
+Use of RCU for resizeable arrays is demonstrated by the grow_ary()
+function used by the System V IPC code. The array is used to map from
+semaphore, message-queue, and shared-memory IDs to the data structure
+that represents the corresponding IPC construct. The grow_ary()
+function does not acquire any locks; instead its caller must hold the
+ids->sem semaphore.
+
+The grow_ary() function, shown below, does some limit checks, allocates a
+new ipc_id_ary, copies the old to the new portion of the new, initializes
+the remainder of the new, updates the ids->entries pointer to point to
+the new array, and invokes ipc_rcu_putref() to free up the old array.
+Note that rcu_assign_pointer() is used to update the ids->entries pointer,
+which includes any memory barriers required on whatever architecture
+you are running on.
+
+ static int grow_ary(struct ipc_ids* ids, int newsize)
+ {
+ struct ipc_id_ary* new;
+ struct ipc_id_ary* old;
+ int i;
+ int size = ids->entries->size;
+
+ if(newsize > IPCMNI)
+ newsize = IPCMNI;
+ if(newsize <= size)
+ return newsize;
+
+ new = ipc_rcu_alloc(sizeof(struct kern_ipc_perm *)*newsize +
+ sizeof(struct ipc_id_ary));
+ if(new == NULL)
+ return size;
+ new->size = newsize;
+ memcpy(new->p, ids->entries->p,
+ sizeof(struct kern_ipc_perm *)*size +
+ sizeof(struct ipc_id_ary));
+ for(i=size;i<newsize;i++) {
+ new->p[i] = NULL;
+ }
+ old = ids->entries;
+
+ /*
+ * Use rcu_assign_pointer() to make sure the memcpyed
+ * contents of the new array are visible before the new
+ * array becomes visible.
+ */
+ rcu_assign_pointer(ids->entries, new);
+
+ ipc_rcu_putref(old);
+ return newsize;
+ }
+
+The ipc_rcu_putref() function decrements the array's reference count
+and then, if the reference count has dropped to zero, uses call_rcu()
+to free the array after a grace period has elapsed.
+
+The array is traversed by the ipc_lock() function. This function
+indexes into the array under the protection of rcu_read_lock(),
+using rcu_dereference() to pick up the pointer to the array so
+that it may later safely be dereferenced -- memory barriers are
+required on the Alpha CPU. Since the size of the array is stored
+with the array itself, there can be no array-size mismatches, so
+a simple check suffices. The pointer to the structure corresponding
+to the desired IPC object is placed in "out", with NULL indicating
+a non-existent entry. After acquiring "out->lock", the "out->deleted"
+flag indicates whether the IPC object is in the process of being
+deleted, and, if not, the pointer is returned.
+
+ struct kern_ipc_perm* ipc_lock(struct ipc_ids* ids, int id)
+ {
+ struct kern_ipc_perm* out;
+ int lid = id % SEQ_MULTIPLIER;
+ struct ipc_id_ary* entries;
+
+ rcu_read_lock();
+ entries = rcu_dereference(ids->entries);
+ if(lid >= entries->size) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ out = entries->p[lid];
+ if(out == NULL) {
+ rcu_read_unlock();
+ return NULL;
+ }
+ spin_lock(&out->lock);
+
+ /* ipc_rmid() may have already freed the ID while ipc_lock
+ * was spinning: here verify that the structure is still valid
+ */
+ if (out->deleted) {
+ spin_unlock(&out->lock);
+ rcu_read_unlock();
+ return NULL;
+ }
+ return out;
+ }
+
+
+Answer to Quick Quiz:
+
+ The reason that it is important that updates be rare when
+ using seqlock is that frequent updates can livelock readers.
+ One way to avoid this problem is to assign a seqlock for
+ each array entry rather than to the entire array.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
new file mode 100644
index 000000000..877947130
--- /dev/null
+++ b/Documentation/RCU/checklist.txt
@@ -0,0 +1,437 @@
+Review Checklist for RCU Patches
+
+
+This document contains a checklist for producing and reviewing patches
+that make use of RCU. Violating any of the rules listed below will
+result in the same sorts of problems that leaving out a locking primitive
+would cause. This list is based on experiences reviewing such patches
+over a rather long period of time, but improvements are always welcome!
+
+0. Is RCU being applied to a read-mostly situation? If the data
+ structure is updated more than about 10% of the time, then you
+ should strongly consider some other approach, unless detailed
+ performance measurements show that RCU is nonetheless the right
+ tool for the job. Yes, RCU does reduce read-side overhead by
+ increasing write-side overhead, which is exactly why normal uses
+ of RCU will do much more reading than updating.
+
+ Another exception is where performance is not an issue, and RCU
+ provides a simpler implementation. An example of this situation
+ is the dynamic NMI code in the Linux 2.6 kernel, at least on
+ architectures where NMIs are rare.
+
+ Yet another exception is where the low real-time latency of RCU's
+ read-side primitives is critically important.
+
+1. Does the update code have proper mutual exclusion?
+
+ RCU does allow -readers- to run (almost) naked, but -writers- must
+ still use some sort of mutual exclusion, such as:
+
+ a. locking,
+ b. atomic operations, or
+ c. restricting updates to a single task.
+
+ If you choose #b, be prepared to describe how you have handled
+ memory barriers on weakly ordered machines (pretty much all of
+ them -- even x86 allows later loads to be reordered to precede
+ earlier stores), and be prepared to explain why this added
+ complexity is worthwhile. If you choose #c, be prepared to
+ explain how this single task does not become a major bottleneck on
+ big multiprocessor machines (for example, if the task is updating
+ information relating to itself that other tasks can read, there
+ by definition can be no bottleneck).
+
+2. Do the RCU read-side critical sections make proper use of
+ rcu_read_lock() and friends? These primitives are needed
+ to prevent grace periods from ending prematurely, which
+ could result in data being unceremoniously freed out from
+ under your read-side code, which can greatly increase the
+ actuarial risk of your kernel.
+
+ As a rough rule of thumb, any dereference of an RCU-protected
+ pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(),
+ rcu_read_lock_sched(), or by the appropriate update-side lock.
+ Disabling of preemption can serve as rcu_read_lock_sched(), but
+ is less readable.
+
+3. Does the update code tolerate concurrent accesses?
+
+ The whole point of RCU is to permit readers to run without
+ any locks or atomic operations. This means that readers will
+ be running while updates are in progress. There are a number
+ of ways to handle this concurrency, depending on the situation:
+
+ a. Use the RCU variants of the list and hlist update
+ primitives to add, remove, and replace elements on
+ an RCU-protected list. Alternatively, use the other
+ RCU-protected data structures that have been added to
+ the Linux kernel.
+
+ This is almost always the best approach.
+
+ b. Proceed as in (a) above, but also maintain per-element
+ locks (that are acquired by both readers and writers)
+ that guard per-element state. Of course, fields that
+ the readers refrain from accessing can be guarded by
+ some other lock acquired only by updaters, if desired.
+
+ This works quite well, also.
+
+ c. Make updates appear atomic to readers. For example,
+ pointer updates to properly aligned fields will
+ appear atomic, as will individual atomic primitives.
+ Sequences of perations performed under a lock will -not-
+ appear to be atomic to RCU readers, nor will sequences
+ of multiple atomic primitives.
+
+ This can work, but is starting to get a bit tricky.
+
+ d. Carefully order the updates and the reads so that
+ readers see valid data at all phases of the update.
+ This is often more difficult than it sounds, especially
+ given modern CPUs' tendency to reorder memory references.
+ One must usually liberally sprinkle memory barriers
+ (smp_wmb(), smp_rmb(), smp_mb()) through the code,
+ making it difficult to understand and to test.
+
+ It is usually better to group the changing data into
+ a separate structure, so that the change may be made
+ to appear atomic by updating a pointer to reference
+ a new structure containing updated values.
+
+4. Weakly ordered CPUs pose special challenges. Almost all CPUs
+ are weakly ordered -- even x86 CPUs allow later loads to be
+ reordered to precede earlier stores. RCU code must take all of
+ the following measures to prevent memory-corruption problems:
+
+ a. Readers must maintain proper ordering of their memory
+ accesses. The rcu_dereference() primitive ensures that
+ the CPU picks up the pointer before it picks up the data
+ that the pointer points to. This really is necessary
+ on Alpha CPUs. If you don't believe me, see:
+
+ http://www.openvms.compaq.com/wizard/wiz_2637.html
+
+ The rcu_dereference() primitive is also an excellent
+ documentation aid, letting the person reading the
+ code know exactly which pointers are protected by RCU.
+ Please note that compilers can also reorder code, and
+ they are becoming increasingly aggressive about doing
+ just that. The rcu_dereference() primitive therefore also
+ prevents destructive compiler optimizations. However,
+ with a bit of devious creativity, it is possible to
+ mishandle the return value from rcu_dereference().
+ Please see rcu_dereference.txt in this directory for
+ more information.
+
+ The rcu_dereference() primitive is used by the
+ various "_rcu()" list-traversal primitives, such
+ as the list_for_each_entry_rcu(). Note that it is
+ perfectly legal (if redundant) for update-side code to
+ use rcu_dereference() and the "_rcu()" list-traversal
+ primitives. This is particularly useful in code that
+ is common to readers and updaters. However, lockdep
+ will complain if you access rcu_dereference() outside
+ of an RCU read-side critical section. See lockdep.txt
+ to learn what to do about this.
+
+ Of course, neither rcu_dereference() nor the "_rcu()"
+ list-traversal primitives can substitute for a good
+ concurrency design coordinating among multiple updaters.
+
+ b. If the list macros are being used, the list_add_tail_rcu()
+ and list_add_rcu() primitives must be used in order
+ to prevent weakly ordered machines from misordering
+ structure initialization and pointer planting.
+ Similarly, if the hlist macros are being used, the
+ hlist_add_head_rcu() primitive is required.
+
+ c. If the list macros are being used, the list_del_rcu()
+ primitive must be used to keep list_del()'s pointer
+ poisoning from inflicting toxic effects on concurrent
+ readers. Similarly, if the hlist macros are being used,
+ the hlist_del_rcu() primitive is required.
+
+ The list_replace_rcu() and hlist_replace_rcu() primitives
+ may be used to replace an old structure with a new one
+ in their respective types of RCU-protected lists.
+
+ d. Rules similar to (4b) and (4c) apply to the "hlist_nulls"
+ type of RCU-protected linked lists.
+
+ e. Updates must ensure that initialization of a given
+ structure happens before pointers to that structure are
+ publicized. Use the rcu_assign_pointer() primitive
+ when publicizing a pointer to a structure that can
+ be traversed by an RCU read-side critical section.
+
+5. If call_rcu(), or a related primitive such as call_rcu_bh(),
+ call_rcu_sched(), or call_srcu() is used, the callback function
+ must be written to be called from softirq context. In particular,
+ it cannot block.
+
+6. Since synchronize_rcu() can block, it cannot be called from
+ any sort of irq context. The same rule applies for
+ synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(),
+ synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(),
+ synchronize_sched_expedite(), and synchronize_srcu_expedited().
+
+ The expedited forms of these primitives have the same semantics
+ as the non-expedited forms, but expediting is both expensive
+ and unfriendly to real-time workloads. Use of the expedited
+ primitives should be restricted to rare configuration-change
+ operations that would not normally be undertaken while a real-time
+ workload is running.
+
+ In particular, if you find yourself invoking one of the expedited
+ primitives repeatedly in a loop, please do everyone a favor:
+ Restructure your code so that it batches the updates, allowing
+ a single non-expedited primitive to cover the entire batch.
+ This will very likely be faster than the loop containing the
+ expedited primitive, and will be much much easier on the rest
+ of the system, especially to real-time workloads running on
+ the rest of the system.
+
+ In addition, it is illegal to call the expedited forms from
+ a CPU-hotplug notifier, or while holding a lock that is acquired
+ by a CPU-hotplug notifier. Failing to observe this restriction
+ will result in deadlock.
+
+7. If the updater uses call_rcu() or synchronize_rcu(), then the
+ corresponding readers must use rcu_read_lock() and
+ rcu_read_unlock(). If the updater uses call_rcu_bh() or
+ synchronize_rcu_bh(), then the corresponding readers must
+ use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the
+ updater uses call_rcu_sched() or synchronize_sched(), then
+ the corresponding readers must disable preemption, possibly
+ by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
+ If the updater uses synchronize_srcu() or call_srcu(), then
+ the corresponding readers must use srcu_read_lock() and
+ srcu_read_unlock(), and with the same srcu_struct. The rules for
+ the expedited primitives are the same as for their non-expedited
+ counterparts. Mixing things up will result in confusion and
+ broken kernels.
+
+ One exception to this rule: rcu_read_lock() and rcu_read_unlock()
+ may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
+ in cases where local bottom halves are already known to be
+ disabled, for example, in irq or softirq context. Commenting
+ such cases is a must, of course! And the jury is still out on
+ whether the increased speed is worth it.
+
+8. Although synchronize_rcu() is slower than is call_rcu(), it
+ usually results in simpler code. So, unless update performance is
+ critically important, the updaters cannot block, or the latency of
+ synchronize_rcu() is visible from userspace, synchronize_rcu()
+ should be used in preference to call_rcu(). Furthermore,
+ kfree_rcu() usually results in even simpler code than does
+ synchronize_rcu() without synchronize_rcu()'s multi-millisecond
+ latency. So please take advantage of kfree_rcu()'s "fire and
+ forget" memory-freeing capabilities where it applies.
+
+ An especially important property of the synchronize_rcu()
+ primitive is that it automatically self-limits: if grace periods
+ are delayed for whatever reason, then the synchronize_rcu()
+ primitive will correspondingly delay updates. In contrast,
+ code using call_rcu() should explicitly limit update rate in
+ cases where grace periods are delayed, as failing to do so can
+ result in excessive realtime latencies or even OOM conditions.
+
+ Ways of gaining this self-limiting property when using call_rcu()
+ include:
+
+ a. Keeping a count of the number of data-structure elements
+ used by the RCU-protected data structure, including
+ those waiting for a grace period to elapse. Enforce a
+ limit on this number, stalling updates as needed to allow
+ previously deferred frees to complete. Alternatively,
+ limit only the number awaiting deferred free rather than
+ the total number of elements.
+
+ One way to stall the updates is to acquire the update-side
+ mutex. (Don't try this with a spinlock -- other CPUs
+ spinning on the lock could prevent the grace period
+ from ever ending.) Another way to stall the updates
+ is for the updates to use a wrapper function around
+ the memory allocator, so that this wrapper function
+ simulates OOM when there is too much memory awaiting an
+ RCU grace period. There are of course many other
+ variations on this theme.
+
+ b. Limiting update rate. For example, if updates occur only
+ once per hour, then no explicit rate limiting is
+ required, unless your system is already badly broken.
+ Older versions of the dcache subsystem take this approach,
+ guarding updates with a global lock, limiting their rate.
+
+ c. Trusted update -- if updates can only be done manually by
+ superuser or some other trusted user, then it might not
+ be necessary to automatically limit them. The theory
+ here is that superuser already has lots of ways to crash
+ the machine.
+
+ d. Use call_rcu_bh() rather than call_rcu(), in order to take
+ advantage of call_rcu_bh()'s faster grace periods. (This
+ is only a partial solution, though.)
+
+ e. Periodically invoke synchronize_rcu(), permitting a limited
+ number of updates per grace period.
+
+ The same cautions apply to call_rcu_bh(), call_rcu_sched(),
+ call_srcu(), and kfree_rcu().
+
+ Note that although these primitives do take action to avoid memory
+ exhaustion when any given CPU has too many callbacks, a determined
+ user could still exhaust memory. This is especially the case
+ if a system with a large number of CPUs has been configured to
+ offload all of its RCU callbacks onto a single CPU, or if the
+ system has relatively little free memory.
+
+9. All RCU list-traversal primitives, which include
+ rcu_dereference(), list_for_each_entry_rcu(), and
+ list_for_each_safe_rcu(), must be either within an RCU read-side
+ critical section or must be protected by appropriate update-side
+ locks. RCU read-side critical sections are delimited by
+ rcu_read_lock() and rcu_read_unlock(), or by similar primitives
+ such as rcu_read_lock_bh() and rcu_read_unlock_bh(), in which
+ case the matching rcu_dereference() primitive must be used in
+ order to keep lockdep happy, in this case, rcu_dereference_bh().
+
+ The reason that it is permissible to use RCU list-traversal
+ primitives when the update-side lock is held is that doing so
+ can be quite helpful in reducing code bloat when common code is
+ shared between readers and updaters. Additional primitives
+ are provided for this case, as discussed in lockdep.txt.
+
+10. Conversely, if you are in an RCU read-side critical section,
+ and you don't hold the appropriate update-side lock, you -must-
+ use the "_rcu()" variants of the list macros. Failing to do so
+ will break Alpha, cause aggressive compilers to generate bad code,
+ and confuse people trying to read your code.
+
+11. Note that synchronize_rcu() -only- guarantees to wait until
+ all currently executing rcu_read_lock()-protected RCU read-side
+ critical sections complete. It does -not- necessarily guarantee
+ that all currently running interrupts, NMIs, preempt_disable()
+ code, or idle loops will complete. Therefore, if your
+ read-side critical sections are protected by something other
+ than rcu_read_lock(), do -not- use synchronize_rcu().
+
+ Similarly, disabling preemption is not an acceptable substitute
+ for rcu_read_lock(). Code that attempts to use preemption
+ disabling where it should be using rcu_read_lock() will break
+ in real-time kernel builds.
+
+ If you want to wait for interrupt handlers, NMI handlers, and
+ code under the influence of preempt_disable(), you instead
+ need to use synchronize_irq() or synchronize_sched().
+
+ This same limitation also applies to synchronize_rcu_bh()
+ and synchronize_srcu(), as well as to the asynchronous and
+ expedited forms of the three primitives, namely call_rcu(),
+ call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(),
+ synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited().
+
+12. Any lock acquired by an RCU callback must be acquired elsewhere
+ with softirq disabled, e.g., via spin_lock_irqsave(),
+ spin_lock_bh(), etc. Failing to disable irq on a given
+ acquisition of that lock will result in deadlock as soon as
+ the RCU softirq handler happens to run your RCU callback while
+ interrupting that acquisition's critical section.
+
+13. RCU callbacks can be and are executed in parallel. In many cases,
+ the callback code simply wrappers around kfree(), so that this
+ is not an issue (or, more accurately, to the extent that it is
+ an issue, the memory-allocator locking handles it). However,
+ if the callbacks do manipulate a shared data structure, they
+ must use whatever locking or other synchronization is required
+ to safely access and/or modify that data structure.
+
+ RCU callbacks are -usually- executed on the same CPU that executed
+ the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
+ but are by -no- means guaranteed to be. For example, if a given
+ CPU goes offline while having an RCU callback pending, then that
+ RCU callback will execute on some surviving CPU. (If this was
+ not the case, a self-spawning RCU callback would prevent the
+ victim CPU from ever going offline.)
+
+14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(),
+ synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu())
+ may only be invoked from process context. Unlike other forms of
+ RCU, it -is- permissible to block in an SRCU read-side critical
+ section (demarked by srcu_read_lock() and srcu_read_unlock()),
+ hence the "SRCU": "sleepable RCU". Please note that if you
+ don't need to sleep in read-side critical sections, you should be
+ using RCU rather than SRCU, because RCU is almost always faster
+ and easier to use than is SRCU.
+
+ Also unlike other forms of RCU, explicit initialization
+ and cleanup is required via init_srcu_struct() and
+ cleanup_srcu_struct(). These are passed a "struct srcu_struct"
+ that defines the scope of a given SRCU domain. Once initialized,
+ the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock()
+ synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
+ A given synchronize_srcu() waits only for SRCU read-side critical
+ sections governed by srcu_read_lock() and srcu_read_unlock()
+ calls that have been passed the same srcu_struct. This property
+ is what makes sleeping read-side critical sections tolerable --
+ a given subsystem delays only its own updates, not those of other
+ subsystems using SRCU. Therefore, SRCU is less prone to OOM the
+ system than RCU would be if RCU's read-side critical sections
+ were permitted to sleep.
+
+ The ability to sleep in read-side critical sections does not
+ come for free. First, corresponding srcu_read_lock() and
+ srcu_read_unlock() calls must be passed the same srcu_struct.
+ Second, grace-period-detection overhead is amortized only
+ over those updates sharing a given srcu_struct, rather than
+ being globally amortized as they are for other forms of RCU.
+ Therefore, SRCU should be used in preference to rw_semaphore
+ only in extremely read-intensive situations, or in situations
+ requiring SRCU's read-side deadlock immunity or low read-side
+ realtime latency.
+
+ Note that, rcu_assign_pointer() relates to SRCU just as it does
+ to other forms of RCU.
+
+15. The whole point of call_rcu(), synchronize_rcu(), and friends
+ is to wait until all pre-existing readers have finished before
+ carrying out some otherwise-destructive operation. It is
+ therefore critically important to -first- remove any path
+ that readers can follow that could be affected by the
+ destructive operation, and -only- -then- invoke call_rcu(),
+ synchronize_rcu(), or friends.
+
+ Because these primitives only wait for pre-existing readers, it
+ is the caller's responsibility to guarantee that any subsequent
+ readers will execute safely.
+
+16. The various RCU read-side primitives do -not- necessarily contain
+ memory barriers. You should therefore plan for the CPU
+ and the compiler to freely reorder code into and out of RCU
+ read-side critical sections. It is the responsibility of the
+ RCU update-side primitives to deal with this.
+
+17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
+ __rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to
+ validate your RCU code. These can help find problems as follows:
+
+ CONFIG_PROVE_RCU: check that accesses to RCU-protected data
+ structures are carried out under the proper RCU
+ read-side critical section, while holding the right
+ combination of locks, or whatever other conditions
+ are appropriate.
+
+ CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
+ same object to call_rcu() (or friends) before an RCU
+ grace period has elapsed since the last time that you
+ passed that same object to call_rcu() (or friends).
+
+ __rcu sparse checks: tag the pointer to the RCU-protected data
+ structure with __rcu, and sparse will warn you if you
+ access that pointer without the services of one of the
+ variants of rcu_dereference().
+
+ These debugging aids can help you find problems that are
+ otherwise extremely difficult to spot.
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt
new file mode 100644
index 000000000..adb5a3782
--- /dev/null
+++ b/Documentation/RCU/listRCU.txt
@@ -0,0 +1,315 @@
+Using RCU to Protect Read-Mostly Linked Lists
+
+
+One of the best applications of RCU is to protect read-mostly linked lists
+("struct list_head" in list.h). One big advantage of this approach
+is that all of the required memory barriers are included for you in
+the list macros. This document describes several applications of RCU,
+with the best fits first.
+
+
+Example 1: Read-Side Action Taken Outside of Lock, No In-Place Updates
+
+The best applications are cases where, if reader-writer locking were
+used, the read-side lock would be dropped before taking any action
+based on the results of the search. The most celebrated example is
+the routing table. Because the routing table is tracking the state of
+equipment outside of the computer, it will at times contain stale data.
+Therefore, once the route has been computed, there is no need to hold
+the routing table static during transmission of the packet. After all,
+you can hold the routing table static all you want, but that won't keep
+the external Internet from changing, and it is the state of the external
+Internet that really matters. In addition, routing entries are typically
+added or deleted, rather than being modified in place.
+
+A straightforward example of this use of RCU may be found in the
+system-call auditing support. For example, a reader-writer locked
+implementation of audit_filter_task() might be as follows:
+
+ static enum audit_state audit_filter_task(struct task_struct *tsk)
+ {
+ struct audit_entry *e;
+ enum audit_state state;
+
+ read_lock(&auditsc_lock);
+ /* Note: audit_netlink_sem held by caller. */
+ list_for_each_entry(e, &audit_tsklist, list) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ read_unlock(&auditsc_lock);
+ return state;
+ }
+ }
+ read_unlock(&auditsc_lock);
+ return AUDIT_BUILD_CONTEXT;
+ }
+
+Here the list is searched under the lock, but the lock is dropped before
+the corresponding value is returned. By the time that this value is acted
+on, the list may well have been modified. This makes sense, since if
+you are turning auditing off, it is OK to audit a few extra system calls.
+
+This means that RCU can be easily applied to the read side, as follows:
+
+ static enum audit_state audit_filter_task(struct task_struct *tsk)
+ {
+ struct audit_entry *e;
+ enum audit_state state;
+
+ rcu_read_lock();
+ /* Note: audit_netlink_sem held by caller. */
+ list_for_each_entry_rcu(e, &audit_tsklist, list) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ rcu_read_unlock();
+ return state;
+ }
+ }
+ rcu_read_unlock();
+ return AUDIT_BUILD_CONTEXT;
+ }
+
+The read_lock() and read_unlock() calls have become rcu_read_lock()
+and rcu_read_unlock(), respectively, and the list_for_each_entry() has
+become list_for_each_entry_rcu(). The _rcu() list-traversal primitives
+insert the read-side memory barriers that are required on DEC Alpha CPUs.
+
+The changes to the update side are also straightforward. A reader-writer
+lock might be used as follows for deletion and insertion:
+
+ static inline int audit_del_rule(struct audit_rule *rule,
+ struct list_head *list)
+ {
+ struct audit_entry *e;
+
+ write_lock(&auditsc_lock);
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ list_del(&e->list);
+ write_unlock(&auditsc_lock);
+ return 0;
+ }
+ }
+ write_unlock(&auditsc_lock);
+ return -EFAULT; /* No matching rule */
+ }
+
+ static inline int audit_add_rule(struct audit_entry *entry,
+ struct list_head *list)
+ {
+ write_lock(&auditsc_lock);
+ if (entry->rule.flags & AUDIT_PREPEND) {
+ entry->rule.flags &= ~AUDIT_PREPEND;
+ list_add(&entry->list, list);
+ } else {
+ list_add_tail(&entry->list, list);
+ }
+ write_unlock(&auditsc_lock);
+ return 0;
+ }
+
+Following are the RCU equivalents for these two functions:
+
+ static inline int audit_del_rule(struct audit_rule *rule,
+ struct list_head *list)
+ {
+ struct audit_entry *e;
+
+ /* Do not use the _rcu iterator here, since this is the only
+ * deletion routine. */
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ list_del_rcu(&e->list);
+ call_rcu(&e->rcu, audit_free_rule);
+ return 0;
+ }
+ }
+ return -EFAULT; /* No matching rule */
+ }
+
+ static inline int audit_add_rule(struct audit_entry *entry,
+ struct list_head *list)
+ {
+ if (entry->rule.flags & AUDIT_PREPEND) {
+ entry->rule.flags &= ~AUDIT_PREPEND;
+ list_add_rcu(&entry->list, list);
+ } else {
+ list_add_tail_rcu(&entry->list, list);
+ }
+ return 0;
+ }
+
+Normally, the write_lock() and write_unlock() would be replaced by
+a spin_lock() and a spin_unlock(), but in this case, all callers hold
+audit_netlink_sem, so no additional locking is required. The auditsc_lock
+can therefore be eliminated, since use of RCU eliminates the need for
+writers to exclude readers. Normally, the write_lock() calls would
+be converted into spin_lock() calls.
+
+The list_del(), list_add(), and list_add_tail() primitives have been
+replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu().
+The _rcu() list-manipulation primitives add memory barriers that are
+needed on weakly ordered CPUs (most of them!). The list_del_rcu()
+primitive omits the pointer poisoning debug-assist code that would
+otherwise cause concurrent readers to fail spectacularly.
+
+So, when readers can tolerate stale data and when entries are either added
+or deleted, without in-place modification, it is very easy to use RCU!
+
+
+Example 2: Handling In-Place Updates
+
+The system-call auditing code does not update auditing rules in place.
+However, if it did, reader-writer-locked code to do so might look as
+follows (presumably, the field_count is only permitted to decrease,
+otherwise, the added fields would need to be filled in):
+
+ static inline int audit_upd_rule(struct audit_rule *rule,
+ struct list_head *list,
+ __u32 newaction,
+ __u32 newfield_count)
+ {
+ struct audit_entry *e;
+ struct audit_newentry *ne;
+
+ write_lock(&auditsc_lock);
+ /* Note: audit_netlink_sem held by caller. */
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ e->rule.action = newaction;
+ e->rule.file_count = newfield_count;
+ write_unlock(&auditsc_lock);
+ return 0;
+ }
+ }
+ write_unlock(&auditsc_lock);
+ return -EFAULT; /* No matching rule */
+ }
+
+The RCU version creates a copy, updates the copy, then replaces the old
+entry with the newly updated entry. This sequence of actions, allowing
+concurrent reads while doing a copy to perform an update, is what gives
+RCU ("read-copy update") its name. The RCU code is as follows:
+
+ static inline int audit_upd_rule(struct audit_rule *rule,
+ struct list_head *list,
+ __u32 newaction,
+ __u32 newfield_count)
+ {
+ struct audit_entry *e;
+ struct audit_newentry *ne;
+
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ ne = kmalloc(sizeof(*entry), GFP_ATOMIC);
+ if (ne == NULL)
+ return -ENOMEM;
+ audit_copy_rule(&ne->rule, &e->rule);
+ ne->rule.action = newaction;
+ ne->rule.file_count = newfield_count;
+ list_replace_rcu(&e->list, &ne->list);
+ call_rcu(&e->rcu, audit_free_rule);
+ return 0;
+ }
+ }
+ return -EFAULT; /* No matching rule */
+ }
+
+Again, this assumes that the caller holds audit_netlink_sem. Normally,
+the reader-writer lock would become a spinlock in this sort of code.
+
+
+Example 3: Eliminating Stale Data
+
+The auditing examples above tolerate stale data, as do most algorithms
+that are tracking external state. Because there is a delay from the
+time the external state changes before Linux becomes aware of the change,
+additional RCU-induced staleness is normally not a problem.
+
+However, there are many examples where stale data cannot be tolerated.
+One example in the Linux kernel is the System V IPC (see the ipc_lock()
+function in ipc/util.c). This code checks a "deleted" flag under a
+per-entry spinlock, and, if the "deleted" flag is set, pretends that the
+entry does not exist. For this to be helpful, the search function must
+return holding the per-entry spinlock, as ipc_lock() does in fact do.
+
+Quick Quiz: Why does the search function need to return holding the
+ per-entry lock for this deleted-flag technique to be helpful?
+
+If the system-call audit module were to ever need to reject stale data,
+one way to accomplish this would be to add a "deleted" flag and a "lock"
+spinlock to the audit_entry structure, and modify audit_filter_task()
+as follows:
+
+ static enum audit_state audit_filter_task(struct task_struct *tsk)
+ {
+ struct audit_entry *e;
+ enum audit_state state;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(e, &audit_tsklist, list) {
+ if (audit_filter_rules(tsk, &e->rule, NULL, &state)) {
+ spin_lock(&e->lock);
+ if (e->deleted) {
+ spin_unlock(&e->lock);
+ rcu_read_unlock();
+ return AUDIT_BUILD_CONTEXT;
+ }
+ rcu_read_unlock();
+ return state;
+ }
+ }
+ rcu_read_unlock();
+ return AUDIT_BUILD_CONTEXT;
+ }
+
+Note that this example assumes that entries are only added and deleted.
+Additional mechanism is required to deal correctly with the
+update-in-place performed by audit_upd_rule(). For one thing,
+audit_upd_rule() would need additional memory barriers to ensure
+that the list_add_rcu() was really executed before the list_del_rcu().
+
+The audit_del_rule() function would need to set the "deleted"
+flag under the spinlock as follows:
+
+ static inline int audit_del_rule(struct audit_rule *rule,
+ struct list_head *list)
+ {
+ struct audit_entry *e;
+
+ /* Do not need to use the _rcu iterator here, since this
+ * is the only deletion routine. */
+ list_for_each_entry(e, list, list) {
+ if (!audit_compare_rule(rule, &e->rule)) {
+ spin_lock(&e->lock);
+ list_del_rcu(&e->list);
+ e->deleted = 1;
+ spin_unlock(&e->lock);
+ call_rcu(&e->rcu, audit_free_rule);
+ return 0;
+ }
+ }
+ return -EFAULT; /* No matching rule */
+ }
+
+
+Summary
+
+Read-mostly list-based data structures that can tolerate stale data are
+the most amenable to use of RCU. The simplest case is where entries are
+either added or deleted from the data structure (or atomically modified
+in place), but non-atomic in-place modifications can be handled by making
+a copy, updating the copy, then replacing the original with the copy.
+If stale data cannot be tolerated, then a "deleted" flag may be used
+in conjunction with a per-entry spinlock in order to allow the search
+function to reject newly deleted data.
+
+
+Answer to Quick Quiz
+ Why does the search function need to return holding the per-entry
+ lock for this deleted-flag technique to be helpful?
+
+ If the search function drops the per-entry lock before returning,
+ then the caller will be processing stale data in any case. If it
+ is really OK to be processing stale data, then you don't need a
+ "deleted" flag. If processing stale data really is a problem,
+ then you need to hold the per-entry lock across all of the code
+ that uses the value that was returned.
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt
new file mode 100644
index 000000000..bf9061142
--- /dev/null
+++ b/Documentation/RCU/lockdep-splat.txt
@@ -0,0 +1,110 @@
+Lockdep-RCU was added to the Linux kernel in early 2010
+(http://lwn.net/Articles/371986/). This facility checks for some common
+misuses of the RCU API, most notably using one of the rcu_dereference()
+family to access an RCU-protected pointer without the proper protection.
+When such misuse is detected, an lockdep-RCU splat is emitted.
+
+The usual cause of a lockdep-RCU slat is someone accessing an
+RCU-protected data structure without either (1) being in the right kind of
+RCU read-side critical section or (2) holding the right update-side lock.
+This problem can therefore be serious: it might result in random memory
+overwriting or worse. There can of course be false positives, this
+being the real world and all that.
+
+So let's look at an example RCU lockdep splat from 3.0-rc5, one that
+has long since been fixed:
+
+===============================
+[ INFO: suspicious RCU usage. ]
+-------------------------------
+block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
+
+other info that might help us debug this:
+
+
+rcu_scheduler_active = 1, debug_locks = 0
+3 locks held by scsi_scan_6/1552:
+ #0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>]
+scsi_scan_host_selected+0x5a/0x150
+ #1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>]
+elevator_exit+0x22/0x60
+ #2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>]
+cfq_exit_queue+0x43/0x190
+
+stack backtrace:
+Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
+Call Trace:
+ [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
+ [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
+ [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
+ [<ffffffff812a5046>] elevator_exit+0x36/0x60
+ [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
+ [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
+ [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
+ [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
+ [<ffffffff817da069>] ? error_exit+0x29/0xb0
+ [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
+ [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
+ [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
+ [<ffffffff817da069>] ? error_exit+0x29/0xb0
+ [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
+ [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
+ [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
+ [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
+ [<ffffffff8145f170>] do_scan_async+0x20/0x160
+ [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
+ [<ffffffff810975b6>] kthread+0xa6/0xb0
+ [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
+ [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
+ [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
+ [<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70
+ [<ffffffff817db150>] ? gs_change+0xb/0xb
+
+Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
+
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+
+This form says that it must be in a plain vanilla RCU read-side critical
+section, but the "other info" list above shows that this is not the
+case. Instead, we hold three locks, one of which might be RCU related.
+And maybe that lock really does protect this reference. If so, the fix
+is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
+take the struct request_queue "q" from cfq_exit_queue() as an argument,
+which would permit us to invoke rcu_dereference_protected as follows:
+
+ if (rcu_dereference_protected(ioc->ioc_data,
+ lockdep_is_held(&q->queue_lock)) == cic) {
+
+With this change, there would be no lockdep-RCU splat emitted if this
+code was invoked either from within an RCU read-side critical section
+or with the ->queue_lock held. In particular, this would have suppressed
+the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
+list above).
+
+On the other hand, perhaps we really do need an RCU read-side critical
+section. In this case, the critical section must span the use of the
+return value from rcu_dereference(), or at least until there is some
+reference count incremented or some such. One way to handle this is to
+add rcu_read_lock() and rcu_read_unlock() as follows:
+
+ rcu_read_lock();
+ if (rcu_dereference(ioc->ioc_data) == cic) {
+ spin_lock(&ioc->lock);
+ rcu_assign_pointer(ioc->ioc_data, NULL);
+ spin_unlock(&ioc->lock);
+ }
+ rcu_read_unlock();
+
+With this change, the rcu_dereference() is always within an RCU
+read-side critical section, which again would have suppressed the
+above lockdep-RCU splat.
+
+But in this particular case, we don't actually deference the pointer
+returned from rcu_dereference(). Instead, that pointer is just compared
+to the cic pointer, which means that the rcu_dereference() can be replaced
+by rcu_access_pointer() as follows:
+
+ if (rcu_access_pointer(ioc->ioc_data) == cic) {
+
+Because it is legal to invoke rcu_access_pointer() without protection,
+this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
new file mode 100644
index 000000000..cd83d2348
--- /dev/null
+++ b/Documentation/RCU/lockdep.txt
@@ -0,0 +1,112 @@
+RCU and lockdep checking
+
+All flavors of RCU have lockdep checking available, so that lockdep is
+aware of when each task enters and leaves any flavor of RCU read-side
+critical section. Each flavor of RCU is tracked separately (but note
+that this is not the case in 2.6.32 and earlier). This allows lockdep's
+tracking to include RCU state, which can sometimes help when debugging
+deadlocks and the like.
+
+In addition, RCU provides the following primitives that check lockdep's
+state:
+
+ rcu_read_lock_held() for normal RCU.
+ rcu_read_lock_bh_held() for RCU-bh.
+ rcu_read_lock_sched_held() for RCU-sched.
+ srcu_read_lock_held() for SRCU.
+
+These functions are conservative, and will therefore return 1 if they
+aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set).
+This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false
+positives when lockdep is disabled.
+
+In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables
+checking of rcu_dereference() primitives:
+
+ rcu_dereference(p):
+ Check for RCU read-side critical section.
+ rcu_dereference_bh(p):
+ Check for RCU-bh read-side critical section.
+ rcu_dereference_sched(p):
+ Check for RCU-sched read-side critical section.
+ srcu_dereference(p, sp):
+ Check for SRCU read-side critical section.
+ rcu_dereference_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_held(). This is useful in code that is
+ invoked by both RCU readers and updaters.
+ rcu_dereference_bh_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_bh_held(). This is useful in code that
+ is invoked by both RCU-bh readers and updaters.
+ rcu_dereference_sched_check(p, c):
+ Use explicit check expression "c" along with
+ rcu_read_lock_sched_held(). This is useful in code that
+ is invoked by both RCU-sched readers and updaters.
+ srcu_dereference_check(p, c):
+ Use explicit check expression "c" along with
+ srcu_read_lock_held()(). This is useful in code that
+ is invoked by both SRCU readers and updaters.
+ rcu_dereference_index_check(p, c):
+ Use explicit check expression "c", but the caller
+ must supply one of the rcu_read_lock_held() functions.
+ This is useful in code that uses RCU-protected arrays
+ that is invoked by both RCU readers and updaters.
+ rcu_dereference_raw(p):
+ Don't check. (Use sparingly, if at all.)
+ rcu_dereference_protected(p, c):
+ Use explicit check expression "c", and omit all barriers
+ and compiler constraints. This is useful when the data
+ structure cannot change, for example, in code that is
+ invoked only by updaters.
+ rcu_access_pointer(p):
+ Return the value of the pointer and omit all barriers,
+ but retain the compiler constraints that prevent duplicating
+ or coalescsing. This is useful when when testing the
+ value of the pointer itself, for example, against NULL.
+ rcu_access_index(idx):
+ Return the value of the index and omit all barriers, but
+ retain the compiler constraints that prevent duplicating
+ or coalescsing. This is useful when when testing the
+ value of the index itself, for example, against -1.
+
+The rcu_dereference_check() check expression can be any boolean
+expression, but would normally include a lockdep expression. However,
+any boolean expression can be used. For a moderately ornate example,
+consider the following:
+
+ file = rcu_dereference_check(fdt->fd[fd],
+ lockdep_is_held(&files->file_lock) ||
+ atomic_read(&files->count) == 1);
+
+This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
+and, if CONFIG_PROVE_RCU is configured, verifies that this expression
+is used in:
+
+1. An RCU read-side critical section (implicit), or
+2. with files->file_lock held, or
+3. on an unshared files_struct.
+
+In case (1), the pointer is picked up in an RCU-safe manner for vanilla
+RCU read-side critical sections, in case (2) the ->file_lock prevents
+any change from taking place, and finally, in case (3) the current task
+is the only task accessing the file_struct, again preventing any change
+from taking place. If the above statement was invoked only from updater
+code, it could instead be written as follows:
+
+ file = rcu_dereference_protected(fdt->fd[fd],
+ lockdep_is_held(&files->file_lock) ||
+ atomic_read(&files->count) == 1);
+
+This would verify cases #2 and #3 above, and furthermore lockdep would
+complain if this was used in an RCU read-side critical section unless one
+of these two cases held. Because rcu_dereference_protected() omits all
+barriers and compiler constraints, it generates better code than do the
+other flavors of rcu_dereference(). On the other hand, it is illegal
+to use rcu_dereference_protected() if either the RCU-protected pointer
+or the RCU-protected data that it points to can change concurrently.
+
+There are currently only "universal" versions of the rcu_assign_pointer()
+and RCU list-/tree-traversal primitives, which do not (yet) check for
+being in an RCU read-side critical section. In the future, separate
+versions of these primitives might be created.
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
new file mode 100644
index 000000000..745f429fd
--- /dev/null
+++ b/Documentation/RCU/rcu.txt
@@ -0,0 +1,96 @@
+RCU Concepts
+
+
+The basic idea behind RCU (read-copy update) is to split destructive
+operations into two parts, one that prevents anyone from seeing the data
+item being destroyed, and one that actually carries out the destruction.
+A "grace period" must elapse between the two parts, and this grace period
+must be long enough that any readers accessing the item being deleted have
+since dropped their references. For example, an RCU-protected deletion
+from a linked list would first remove the item from the list, wait for
+a grace period to elapse, then free the element. See the listRCU.txt
+file for more information on using RCU with linked lists.
+
+
+Frequently Asked Questions
+
+o Why would anyone want to use RCU?
+
+ The advantage of RCU's two-part approach is that RCU readers need
+ not acquire any locks, perform any atomic instructions, write to
+ shared memory, or (on CPUs other than Alpha) execute any memory
+ barriers. The fact that these operations are quite expensive
+ on modern CPUs is what gives RCU its performance advantages
+ in read-mostly situations. The fact that RCU readers need not
+ acquire locks can also greatly simplify deadlock-avoidance code.
+
+o How can the updater tell when a grace period has completed
+ if the RCU readers give no indication when they are done?
+
+ Just as with spinlocks, RCU readers are not permitted to
+ block, switch to user-mode execution, or enter the idle loop.
+ Therefore, as soon as a CPU is seen passing through any of these
+ three states, we know that that CPU has exited any previous RCU
+ read-side critical sections. So, if we remove an item from a
+ linked list, and then wait until all CPUs have switched context,
+ executed in user mode, or executed in the idle loop, we can
+ safely free up that item.
+
+ Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
+ same effect, but require that the readers manipulate CPU-local
+ counters. These counters allow limited types of blocking within
+ RCU read-side critical sections. SRCU also uses CPU-local
+ counters, and permits general blocking within RCU read-side
+ critical sections. These variants of RCU detect grace periods
+ by sampling these counters.
+
+o If I am running on a uniprocessor kernel, which can only do one
+ thing at a time, why should I wait for a grace period?
+
+ See the UP.txt file in this directory.
+
+o How can I see where RCU is currently used in the Linux kernel?
+
+ Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
+ "rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh",
+ "srcu_read_lock", "srcu_read_unlock", "synchronize_rcu",
+ "synchronize_net", "synchronize_srcu", and the other RCU
+ primitives. Or grab one of the cscope databases from:
+
+ http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
+
+o What guidelines should I follow when writing code that uses RCU?
+
+ See the checklist.txt file in this directory.
+
+o Why the name "RCU"?
+
+ "RCU" stands for "read-copy update". The file listRCU.txt has
+ more information on where this name came from, search for
+ "read-copy update" to find it.
+
+o I hear that RCU is patented? What is with that?
+
+ Yes, it is. There are several known patents related to RCU,
+ search for the string "Patent" in RTFP.txt to find them.
+ Of these, one was allowed to lapse by the assignee, and the
+ others have been contributed to the Linux kernel under GPL.
+ There are now also LGPL implementations of user-level RCU
+ available (http://lttng.org/?q=node/18).
+
+o I hear that RCU needs work in order to support realtime kernels?
+
+ This work is largely completed. Realtime-friendly RCU can be
+ enabled via the CONFIG_PREEMPT_RCU kernel configuration
+ parameter. However, work is in progress for enabling priority
+ boosting of preempted RCU read-side critical sections. This is
+ needed if you have CPU-bound realtime threads.
+
+o Where can I find more information on RCU?
+
+ See the RTFP.txt file in this directory.
+ Or point your browser at http://www.rdrop.com/users/paulmck/RCU/.
+
+o What are all these files in this directory?
+
+ See 00-INDEX for the list.
diff --git a/Documentation/RCU/rcu_dereference.txt b/Documentation/RCU/rcu_dereference.txt
new file mode 100644
index 000000000..ceb05da5a
--- /dev/null
+++ b/Documentation/RCU/rcu_dereference.txt
@@ -0,0 +1,371 @@
+PROPER CARE AND FEEDING OF RETURN VALUES FROM rcu_dereference()
+
+Most of the time, you can use values from rcu_dereference() or one of
+the similar primitives without worries. Dereferencing (prefix "*"),
+field selection ("->"), assignment ("="), address-of ("&"), addition and
+subtraction of constants, and casts all work quite naturally and safely.
+
+It is nevertheless possible to get into trouble with other operations.
+Follow these rules to keep your RCU code working properly:
+
+o You must use one of the rcu_dereference() family of primitives
+ to load an RCU-protected pointer, otherwise CONFIG_PROVE_RCU
+ will complain. Worse yet, your code can see random memory-corruption
+ bugs due to games that compilers and DEC Alpha can play.
+ Without one of the rcu_dereference() primitives, compilers
+ can reload the value, and won't your code have fun with two
+ different values for a single pointer! Without rcu_dereference(),
+ DEC Alpha can load a pointer, dereference that pointer, and
+ return data preceding initialization that preceded the store of
+ the pointer.
+
+ In addition, the volatile cast in rcu_dereference() prevents the
+ compiler from deducing the resulting pointer value. Please see
+ the section entitled "EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH"
+ for an example where the compiler can in fact deduce the exact
+ value of the pointer, and thus cause misordering.
+
+o Do not use single-element RCU-protected arrays. The compiler
+ is within its right to assume that the value of an index into
+ such an array must necessarily evaluate to zero. The compiler
+ could then substitute the constant zero for the computation, so
+ that the array index no longer depended on the value returned
+ by rcu_dereference(). If the array index no longer depends
+ on rcu_dereference(), then both the compiler and the CPU
+ are within their rights to order the array access before the
+ rcu_dereference(), which can cause the array access to return
+ garbage.
+
+o Avoid cancellation when using the "+" and "-" infix arithmetic
+ operators. For example, for a given variable "x", avoid
+ "(x-x)". There are similar arithmetic pitfalls from other
+ arithmetic operatiors, such as "(x*0)", "(x/(x+1))" or "(x%1)".
+ The compiler is within its rights to substitute zero for all of
+ these expressions, so that subsequent accesses no longer depend
+ on the rcu_dereference(), again possibly resulting in bugs due
+ to misordering.
+
+ Of course, if "p" is a pointer from rcu_dereference(), and "a"
+ and "b" are integers that happen to be equal, the expression
+ "p+a-b" is safe because its value still necessarily depends on
+ the rcu_dereference(), thus maintaining proper ordering.
+
+o Avoid all-zero operands to the bitwise "&" operator, and
+ similarly avoid all-ones operands to the bitwise "|" operator.
+ If the compiler is able to deduce the value of such operands,
+ it is within its rights to substitute the corresponding constant
+ for the bitwise operation. Once again, this causes subsequent
+ accesses to no longer depend on the rcu_dereference(), causing
+ bugs due to misordering.
+
+ Please note that single-bit operands to bitwise "&" can also
+ be dangerous. At this point, the compiler knows that the
+ resulting value can only take on one of two possible values.
+ Therefore, a very small amount of additional information will
+ allow the compiler to deduce the exact value, which again can
+ result in misordering.
+
+o If you are using RCU to protect JITed functions, so that the
+ "()" function-invocation operator is applied to a value obtained
+ (directly or indirectly) from rcu_dereference(), you may need to
+ interact directly with the hardware to flush instruction caches.
+ This issue arises on some systems when a newly JITed function is
+ using the same memory that was used by an earlier JITed function.
+
+o Do not use the results from the boolean "&&" and "||" when
+ dereferencing. For example, the following (rather improbable)
+ code is buggy:
+
+ int a[2];
+ int index;
+ int force_zero_index = 1;
+
+ ...
+
+ r1 = rcu_dereference(i1)
+ r2 = a[r1 && force_zero_index]; /* BUGGY!!! */
+
+ The reason this is buggy is that "&&" and "||" are often compiled
+ using branches. While weak-memory machines such as ARM or PowerPC
+ do order stores after such branches, they can speculate loads,
+ which can result in misordering bugs.
+
+o Do not use the results from relational operators ("==", "!=",
+ ">", ">=", "<", or "<=") when dereferencing. For example,
+ the following (quite strange) code is buggy:
+
+ int a[2];
+ int index;
+ int flip_index = 0;
+
+ ...
+
+ r1 = rcu_dereference(i1)
+ r2 = a[r1 != flip_index]; /* BUGGY!!! */
+
+ As before, the reason this is buggy is that relational operators
+ are often compiled using branches. And as before, although
+ weak-memory machines such as ARM or PowerPC do order stores
+ after such branches, but can speculate loads, which can again
+ result in misordering bugs.
+
+o Be very careful about comparing pointers obtained from
+ rcu_dereference() against non-NULL values. As Linus Torvalds
+ explained, if the two pointers are equal, the compiler could
+ substitute the pointer you are comparing against for the pointer
+ obtained from rcu_dereference(). For example:
+
+ p = rcu_dereference(gp);
+ if (p == &default_struct)
+ do_default(p->a);
+
+ Because the compiler now knows that the value of "p" is exactly
+ the address of the variable "default_struct", it is free to
+ transform this code into the following:
+
+ p = rcu_dereference(gp);
+ if (p == &default_struct)
+ do_default(default_struct.a);
+
+ On ARM and Power hardware, the load from "default_struct.a"
+ can now be speculated, such that it might happen before the
+ rcu_dereference(). This could result in bugs due to misordering.
+
+ However, comparisons are OK in the following cases:
+
+ o The comparison was against the NULL pointer. If the
+ compiler knows that the pointer is NULL, you had better
+ not be dereferencing it anyway. If the comparison is
+ non-equal, the compiler is none the wiser. Therefore,
+ it is safe to compare pointers from rcu_dereference()
+ against NULL pointers.
+
+ o The pointer is never dereferenced after being compared.
+ Since there are no subsequent dereferences, the compiler
+ cannot use anything it learned from the comparison
+ to reorder the non-existent subsequent dereferences.
+ This sort of comparison occurs frequently when scanning
+ RCU-protected circular linked lists.
+
+ o The comparison is against a pointer that references memory
+ that was initialized "a long time ago." The reason
+ this is safe is that even if misordering occurs, the
+ misordering will not affect the accesses that follow
+ the comparison. So exactly how long ago is "a long
+ time ago"? Here are some possibilities:
+
+ o Compile time.
+
+ o Boot time.
+
+ o Module-init time for module code.
+
+ o Prior to kthread creation for kthread code.
+
+ o During some prior acquisition of the lock that
+ we now hold.
+
+ o Before mod_timer() time for a timer handler.
+
+ There are many other possibilities involving the Linux
+ kernel's wide array of primitives that cause code to
+ be invoked at a later time.
+
+ o The pointer being compared against also came from
+ rcu_dereference(). In this case, both pointers depend
+ on one rcu_dereference() or another, so you get proper
+ ordering either way.
+
+ That said, this situation can make certain RCU usage
+ bugs more likely to happen. Which can be a good thing,
+ at least if they happen during testing. An example
+ of such an RCU usage bug is shown in the section titled
+ "EXAMPLE OF AMPLIFIED RCU-USAGE BUG".
+
+ o All of the accesses following the comparison are stores,
+ so that a control dependency preserves the needed ordering.
+ That said, it is easy to get control dependencies wrong.
+ Please see the "CONTROL DEPENDENCIES" section of
+ Documentation/memory-barriers.txt for more details.
+
+ o The pointers are not equal -and- the compiler does
+ not have enough information to deduce the value of the
+ pointer. Note that the volatile cast in rcu_dereference()
+ will normally prevent the compiler from knowing too much.
+
+o Disable any value-speculation optimizations that your compiler
+ might provide, especially if you are making use of feedback-based
+ optimizations that take data collected from prior runs. Such
+ value-speculation optimizations reorder operations by design.
+
+ There is one exception to this rule: Value-speculation
+ optimizations that leverage the branch-prediction hardware are
+ safe on strongly ordered systems (such as x86), but not on weakly
+ ordered systems (such as ARM or Power). Choose your compiler
+ command-line options wisely!
+
+
+EXAMPLE OF AMPLIFIED RCU-USAGE BUG
+
+Because updaters can run concurrently with RCU readers, RCU readers can
+see stale and/or inconsistent values. If RCU readers need fresh or
+consistent values, which they sometimes do, they need to take proper
+precautions. To see this, consider the following code fragment:
+
+ struct foo {
+ int a;
+ int b;
+ int c;
+ };
+ struct foo *gp1;
+ struct foo *gp2;
+
+ void updater(void)
+ {
+ struct foo *p;
+
+ p = kmalloc(...);
+ if (p == NULL)
+ deal_with_it();
+ p->a = 42; /* Each field in its own cache line. */
+ p->b = 43;
+ p->c = 44;
+ rcu_assign_pointer(gp1, p);
+ p->b = 143;
+ p->c = 144;
+ rcu_assign_pointer(gp2, p);
+ }
+
+ void reader(void)
+ {
+ struct foo *p;
+ struct foo *q;
+ int r1, r2;
+
+ p = rcu_dereference(gp2);
+ if (p == NULL)
+ return;
+ r1 = p->b; /* Guaranteed to get 143. */
+ q = rcu_dereference(gp1); /* Guaranteed non-NULL. */
+ if (p == q) {
+ /* The compiler decides that q->c is same as p->c. */
+ r2 = p->c; /* Could get 44 on weakly order system. */
+ }
+ do_something_with(r1, r2);
+ }
+
+You might be surprised that the outcome (r1 == 143 && r2 == 44) is possible,
+but you should not be. After all, the updater might have been invoked
+a second time between the time reader() loaded into "r1" and the time
+that it loaded into "r2". The fact that this same result can occur due
+to some reordering from the compiler and CPUs is beside the point.
+
+But suppose that the reader needs a consistent view?
+
+Then one approach is to use locking, for example, as follows:
+
+ struct foo {
+ int a;
+ int b;
+ int c;
+ spinlock_t lock;
+ };
+ struct foo *gp1;
+ struct foo *gp2;
+
+ void updater(void)
+ {
+ struct foo *p;
+
+ p = kmalloc(...);
+ if (p == NULL)
+ deal_with_it();
+ spin_lock(&p->lock);
+ p->a = 42; /* Each field in its own cache line. */
+ p->b = 43;
+ p->c = 44;
+ spin_unlock(&p->lock);
+ rcu_assign_pointer(gp1, p);
+ spin_lock(&p->lock);
+ p->b = 143;
+ p->c = 144;
+ spin_unlock(&p->lock);
+ rcu_assign_pointer(gp2, p);
+ }
+
+ void reader(void)
+ {
+ struct foo *p;
+ struct foo *q;
+ int r1, r2;
+
+ p = rcu_dereference(gp2);
+ if (p == NULL)
+ return;
+ spin_lock(&p->lock);
+ r1 = p->b; /* Guaranteed to get 143. */
+ q = rcu_dereference(gp1); /* Guaranteed non-NULL. */
+ if (p == q) {
+ /* The compiler decides that q->c is same as p->c. */
+ r2 = p->c; /* Locking guarantees r2 == 144. */
+ }
+ spin_unlock(&p->lock);
+ do_something_with(r1, r2);
+ }
+
+As always, use the right tool for the job!
+
+
+EXAMPLE WHERE THE COMPILER KNOWS TOO MUCH
+
+If a pointer obtained from rcu_dereference() compares not-equal to some
+other pointer, the compiler normally has no clue what the value of the
+first pointer might be. This lack of knowledge prevents the compiler
+from carrying out optimizations that otherwise might destroy the ordering
+guarantees that RCU depends on. And the volatile cast in rcu_dereference()
+should prevent the compiler from guessing the value.
+
+But without rcu_dereference(), the compiler knows more than you might
+expect. Consider the following code fragment:
+
+ struct foo {
+ int a;
+ int b;
+ };
+ static struct foo variable1;
+ static struct foo variable2;
+ static struct foo *gp = &variable1;
+
+ void updater(void)
+ {
+ initialize_foo(&variable2);
+ rcu_assign_pointer(gp, &variable2);
+ /*
+ * The above is the only store to gp in this translation unit,
+ * and the address of gp is not exported in any way.
+ */
+ }
+
+ int reader(void)
+ {
+ struct foo *p;
+
+ p = gp;
+ barrier();
+ if (p == &variable1)
+ return p->a; /* Must be variable1.a. */
+ else
+ return p->b; /* Must be variable2.b. */
+ }
+
+Because the compiler can see all stores to "gp", it knows that the only
+possible values of "gp" are "variable1" on the one hand and "variable2"
+on the other. The comparison in reader() therefore tells the compiler
+the exact value of "p" even in the not-equals case. This allows the
+compiler to make the return values independent of the load from "gp",
+in turn destroying the ordering between this load and the loads of the
+return values. This can result in "p->b" returning pre-initialization
+garbage values.
+
+In short, rcu_dereference() is -not- optional when you are going to
+dereference the resulting pointer.
diff --git a/Documentation/RCU/rcubarrier.txt b/Documentation/RCU/rcubarrier.txt
new file mode 100644
index 000000000..b10cfe711
--- /dev/null
+++ b/Documentation/RCU/rcubarrier.txt
@@ -0,0 +1,321 @@
+RCU and Unloadable Modules
+
+[Originally published in LWN Jan. 14, 2007: http://lwn.net/Articles/217484/]
+
+RCU (read-copy update) is a synchronization mechanism that can be thought
+of as a replacement for read-writer locking (among other things), but with
+very low-overhead readers that are immune to deadlock, priority inversion,
+and unbounded latency. RCU read-side critical sections are delimited
+by rcu_read_lock() and rcu_read_unlock(), which, in non-CONFIG_PREEMPT
+kernels, generate no code whatsoever.
+
+This means that RCU writers are unaware of the presence of concurrent
+readers, so that RCU updates to shared data must be undertaken quite
+carefully, leaving an old version of the data structure in place until all
+pre-existing readers have finished. These old versions are needed because
+such readers might hold a reference to them. RCU updates can therefore be
+rather expensive, and RCU is thus best suited for read-mostly situations.
+
+How can an RCU writer possibly determine when all readers are finished,
+given that readers might well leave absolutely no trace of their
+presence? There is a synchronize_rcu() primitive that blocks until all
+pre-existing readers have completed. An updater wishing to delete an
+element p from a linked list might do the following, while holding an
+appropriate lock, of course:
+
+ list_del_rcu(p);
+ synchronize_rcu();
+ kfree(p);
+
+But the above code cannot be used in IRQ context -- the call_rcu()
+primitive must be used instead. This primitive takes a pointer to an
+rcu_head struct placed within the RCU-protected data structure and
+another pointer to a function that may be invoked later to free that
+structure. Code to delete an element p from the linked list from IRQ
+context might then be as follows:
+
+ list_del_rcu(p);
+ call_rcu(&p->rcu, p_callback);
+
+Since call_rcu() never blocks, this code can safely be used from within
+IRQ context. The function p_callback() might be defined as follows:
+
+ static void p_callback(struct rcu_head *rp)
+ {
+ struct pstruct *p = container_of(rp, struct pstruct, rcu);
+
+ kfree(p);
+ }
+
+
+Unloading Modules That Use call_rcu()
+
+But what if p_callback is defined in an unloadable module?
+
+If we unload the module while some RCU callbacks are pending,
+the CPUs executing these callbacks are going to be severely
+disappointed when they are later invoked, as fancifully depicted at
+http://lwn.net/images/ns/kernel/rcu-drop.jpg.
+
+We could try placing a synchronize_rcu() in the module-exit code path,
+but this is not sufficient. Although synchronize_rcu() does wait for a
+grace period to elapse, it does not wait for the callbacks to complete.
+
+One might be tempted to try several back-to-back synchronize_rcu()
+calls, but this is still not guaranteed to work. If there is a very
+heavy RCU-callback load, then some of the callbacks might be deferred
+in order to allow other processing to proceed. Such deferral is required
+in realtime kernels in order to avoid excessive scheduling latencies.
+
+
+rcu_barrier()
+
+We instead need the rcu_barrier() primitive. Rather than waiting for
+a grace period to elapse, rcu_barrier() waits for all outstanding RCU
+callbacks to complete. Please note that rcu_barrier() does -not- imply
+synchronize_rcu(), in particular, if there are no RCU callbacks queued
+anywhere, rcu_barrier() is within its rights to return immediately,
+without waiting for a grace period to elapse.
+
+Pseudo-code using rcu_barrier() is as follows:
+
+ 1. Prevent any new RCU callbacks from being posted.
+ 2. Execute rcu_barrier().
+ 3. Allow the module to be unloaded.
+
+There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier()
+functions for the other flavors of RCU, and you of course must match
+the flavor of rcu_barrier() with that of call_rcu(). If your module
+uses multiple flavors of call_rcu(), then it must also use multiple
+flavors of rcu_barrier() when unloading that module. For example, if
+it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on
+srcu_struct_2(), then the following three lines of code will be required
+when unloading:
+
+ 1 rcu_barrier_bh();
+ 2 srcu_barrier(&srcu_struct_1);
+ 3 srcu_barrier(&srcu_struct_2);
+
+The rcutorture module makes use of rcu_barrier() in its exit function
+as follows:
+
+ 1 static void
+ 2 rcu_torture_cleanup(void)
+ 3 {
+ 4 int i;
+ 5
+ 6 fullstop = 1;
+ 7 if (shuffler_task != NULL) {
+ 8 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
+ 9 kthread_stop(shuffler_task);
+10 }
+11 shuffler_task = NULL;
+12
+13 if (writer_task != NULL) {
+14 VERBOSE_PRINTK_STRING("Stopping rcu_torture_writer task");
+15 kthread_stop(writer_task);
+16 }
+17 writer_task = NULL;
+18
+19 if (reader_tasks != NULL) {
+20 for (i = 0; i < nrealreaders; i++) {
+21 if (reader_tasks[i] != NULL) {
+22 VERBOSE_PRINTK_STRING(
+23 "Stopping rcu_torture_reader task");
+24 kthread_stop(reader_tasks[i]);
+25 }
+26 reader_tasks[i] = NULL;
+27 }
+28 kfree(reader_tasks);
+29 reader_tasks = NULL;
+30 }
+31 rcu_torture_current = NULL;
+32
+33 if (fakewriter_tasks != NULL) {
+34 for (i = 0; i < nfakewriters; i++) {
+35 if (fakewriter_tasks[i] != NULL) {
+36 VERBOSE_PRINTK_STRING(
+37 "Stopping rcu_torture_fakewriter task");
+38 kthread_stop(fakewriter_tasks[i]);
+39 }
+40 fakewriter_tasks[i] = NULL;
+41 }
+42 kfree(fakewriter_tasks);
+43 fakewriter_tasks = NULL;
+44 }
+45
+46 if (stats_task != NULL) {
+47 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stats task");
+48 kthread_stop(stats_task);
+49 }
+50 stats_task = NULL;
+51
+52 /* Wait for all RCU callbacks to fire. */
+53 rcu_barrier();
+54
+55 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
+56
+57 if (cur_ops->cleanup != NULL)
+58 cur_ops->cleanup();
+59 if (atomic_read(&n_rcu_torture_error))
+60 rcu_torture_print_module_parms("End of test: FAILURE");
+61 else
+62 rcu_torture_print_module_parms("End of test: SUCCESS");
+63 }
+
+Line 6 sets a global variable that prevents any RCU callbacks from
+re-posting themselves. This will not be necessary in most cases, since
+RCU callbacks rarely include calls to call_rcu(). However, the rcutorture
+module is an exception to this rule, and therefore needs to set this
+global variable.
+
+Lines 7-50 stop all the kernel tasks associated with the rcutorture
+module. Therefore, once execution reaches line 53, no more rcutorture
+RCU callbacks will be posted. The rcu_barrier() call on line 53 waits
+for any pre-existing callbacks to complete.
+
+Then lines 55-62 print status and do operation-specific cleanup, and
+then return, permitting the module-unload operation to be completed.
+
+Quick Quiz #1: Is there any other situation where rcu_barrier() might
+ be required?
+
+Your module might have additional complications. For example, if your
+module invokes call_rcu() from timers, you will need to first cancel all
+the timers, and only then invoke rcu_barrier() to wait for any remaining
+RCU callbacks to complete.
+
+Of course, if you module uses call_rcu_bh(), you will need to invoke
+rcu_barrier_bh() before unloading. Similarly, if your module uses
+call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
+unloading. If your module uses call_rcu(), call_rcu_bh(), -and-
+call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
+rcu_barrier_bh(), and rcu_barrier_sched().
+
+
+Implementing rcu_barrier()
+
+Dipankar Sarma's implementation of rcu_barrier() makes use of the fact
+that RCU callbacks are never reordered once queued on one of the per-CPU
+queues. His implementation queues an RCU callback on each of the per-CPU
+callback queues, and then waits until they have all started executing, at
+which point, all earlier RCU callbacks are guaranteed to have completed.
+
+The original code for rcu_barrier() was as follows:
+
+ 1 void rcu_barrier(void)
+ 2 {
+ 3 BUG_ON(in_interrupt());
+ 4 /* Take cpucontrol mutex to protect against CPU hotplug */
+ 5 mutex_lock(&rcu_barrier_mutex);
+ 6 init_completion(&rcu_barrier_completion);
+ 7 atomic_set(&rcu_barrier_cpu_count, 0);
+ 8 on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+ 9 wait_for_completion(&rcu_barrier_completion);
+10 mutex_unlock(&rcu_barrier_mutex);
+11 }
+
+Line 3 verifies that the caller is in process context, and lines 5 and 10
+use rcu_barrier_mutex to ensure that only one rcu_barrier() is using the
+global completion and counters at a time, which are initialized on lines
+6 and 7. Line 8 causes each CPU to invoke rcu_barrier_func(), which is
+shown below. Note that the final "1" in on_each_cpu()'s argument list
+ensures that all the calls to rcu_barrier_func() will have completed
+before on_each_cpu() returns. Line 9 then waits for the completion.
+
+This code was rewritten in 2008 to support rcu_barrier_bh() and
+rcu_barrier_sched() in addition to the original rcu_barrier().
+
+The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
+to post an RCU callback, as follows:
+
+ 1 static void rcu_barrier_func(void *notused)
+ 2 {
+ 3 int cpu = smp_processor_id();
+ 4 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+ 5 struct rcu_head *head;
+ 6
+ 7 head = &rdp->barrier;
+ 8 atomic_inc(&rcu_barrier_cpu_count);
+ 9 call_rcu(head, rcu_barrier_callback);
+10 }
+
+Lines 3 and 4 locate RCU's internal per-CPU rcu_data structure,
+which contains the struct rcu_head that needed for the later call to
+call_rcu(). Line 7 picks up a pointer to this struct rcu_head, and line
+8 increments a global counter. This counter will later be decremented
+by the callback. Line 9 then registers the rcu_barrier_callback() on
+the current CPU's queue.
+
+The rcu_barrier_callback() function simply atomically decrements the
+rcu_barrier_cpu_count variable and finalizes the completion when it
+reaches zero, as follows:
+
+ 1 static void rcu_barrier_callback(struct rcu_head *notused)
+ 2 {
+ 3 if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+ 4 complete(&rcu_barrier_completion);
+ 5 }
+
+Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
+ immediately (thus incrementing rcu_barrier_cpu_count to the
+ value one), but the other CPU's rcu_barrier_func() invocations
+ are delayed for a full grace period? Couldn't this result in
+ rcu_barrier() returning prematurely?
+
+
+rcu_barrier() Summary
+
+The rcu_barrier() primitive has seen relatively little use, since most
+code using RCU is in the core kernel rather than in modules. However, if
+you are using RCU from an unloadable module, you need to use rcu_barrier()
+so that your module may be safely unloaded.
+
+
+Answers to Quick Quizzes
+
+Quick Quiz #1: Is there any other situation where rcu_barrier() might
+ be required?
+
+Answer: Interestingly enough, rcu_barrier() was not originally
+ implemented for module unloading. Nikita Danilov was using
+ RCU in a filesystem, which resulted in a similar situation at
+ filesystem-unmount time. Dipankar Sarma coded up rcu_barrier()
+ in response, so that Nikita could invoke it during the
+ filesystem-unmount process.
+
+ Much later, yours truly hit the RCU module-unload problem when
+ implementing rcutorture, and found that rcu_barrier() solves
+ this problem as well.
+
+Quick Quiz #2: What happens if CPU 0's rcu_barrier_func() executes
+ immediately (thus incrementing rcu_barrier_cpu_count to the
+ value one), but the other CPU's rcu_barrier_func() invocations
+ are delayed for a full grace period? Couldn't this result in
+ rcu_barrier() returning prematurely?
+
+Answer: This cannot happen. The reason is that on_each_cpu() has its last
+ argument, the wait flag, set to "1". This flag is passed through
+ to smp_call_function() and further to smp_call_function_on_cpu(),
+ causing this latter to spin until the cross-CPU invocation of
+ rcu_barrier_func() has completed. This by itself would prevent
+ a grace period from completing on non-CONFIG_PREEMPT kernels,
+ since each CPU must undergo a context switch (or other quiescent
+ state) before the grace period can complete. However, this is
+ of no use in CONFIG_PREEMPT kernels.
+
+ Therefore, on_each_cpu() disables preemption across its call
+ to smp_call_function() and also across the local call to
+ rcu_barrier_func(). This prevents the local CPU from context
+ switching, again preventing grace periods from completing. This
+ means that all CPUs have executed rcu_barrier_func() before
+ the first rcu_barrier_callback() can possibly execute, in turn
+ preventing rcu_barrier_cpu_count from prematurely reaching zero.
+
+ Currently, -rt implementations of RCU keep but a single global
+ queue for RCU callbacks, and thus do not suffer from this
+ problem. However, when the -rt RCU eventually does have per-CPU
+ callback queues, things will have to change. One simple change
+ is to add an rcu_read_lock() before line 8 of rcu_barrier()
+ and an rcu_read_unlock() after line 8 of this same function. If
+ you can think of a better change, please let me know!
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
new file mode 100644
index 000000000..18f9651ff
--- /dev/null
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -0,0 +1,172 @@
+Using hlist_nulls to protect read-mostly linked lists and
+objects using SLAB_DESTROY_BY_RCU allocations.
+
+Please read the basics in Documentation/RCU/listRCU.txt
+
+Using special makers (called 'nulls') is a convenient way
+to solve following problem :
+
+A typical RCU linked list managing objects which are
+allocated with SLAB_DESTROY_BY_RCU kmem_cache can
+use following algos :
+
+1) Lookup algo
+--------------
+rcu_read_lock()
+begin:
+obj = lockless_lookup(key);
+if (obj) {
+ if (!try_get_ref(obj)) // might fail for free objects
+ goto begin;
+ /*
+ * Because a writer could delete object, and a writer could
+ * reuse these object before the RCU grace period, we
+ * must check key after getting the reference on object
+ */
+ if (obj->key != key) { // not the object we expected
+ put_ref(obj);
+ goto begin;
+ }
+}
+rcu_read_unlock();
+
+Beware that lockless_lookup(key) cannot use traditional hlist_for_each_entry_rcu()
+but a version with an additional memory barrier (smp_rmb())
+
+lockless_lookup(key)
+{
+ struct hlist_node *node, *next;
+ for (pos = rcu_dereference((head)->first);
+ pos && ({ next = pos->next; smp_rmb(); prefetch(next); 1; }) &&
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+ pos = rcu_dereference(next))
+ if (obj->key == key)
+ return obj;
+ return NULL;
+
+And note the traditional hlist_for_each_entry_rcu() misses this smp_rmb() :
+
+ struct hlist_node *node;
+ for (pos = rcu_dereference((head)->first);
+ pos && ({ prefetch(pos->next); 1; }) &&
+ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });
+ pos = rcu_dereference(pos->next))
+ if (obj->key == key)
+ return obj;
+ return NULL;
+}
+
+Quoting Corey Minyard :
+
+"If the object is moved from one list to another list in-between the
+ time the hash is calculated and the next field is accessed, and the
+ object has moved to the end of a new list, the traversal will not
+ complete properly on the list it should have, since the object will
+ be on the end of the new list and there's not a way to tell it's on a
+ new list and restart the list traversal. I think that this can be
+ solved by pre-fetching the "next" field (with proper barriers) before
+ checking the key."
+
+2) Insert algo :
+----------------
+
+We need to make sure a reader cannot read the new 'obj->obj_next' value
+and previous value of 'obj->key'. Or else, an item could be deleted
+from a chain, and inserted into another chain. If new chain was empty
+before the move, 'next' pointer is NULL, and lockless reader can
+not detect it missed following items in original chain.
+
+/*
+ * Please note that new inserts are done at the head of list,
+ * not in the middle or end.
+ */
+obj = kmem_cache_alloc(...);
+lock_chain(); // typically a spin_lock()
+obj->key = key;
+/*
+ * we need to make sure obj->key is updated before obj->next
+ * or obj->refcnt
+ */
+smp_wmb();
+atomic_set(&obj->refcnt, 1);
+hlist_add_head_rcu(&obj->obj_node, list);
+unlock_chain(); // typically a spin_unlock()
+
+
+3) Remove algo
+--------------
+Nothing special here, we can use a standard RCU hlist deletion.
+But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused
+very very fast (before the end of RCU grace period)
+
+if (put_last_reference_on(obj) {
+ lock_chain(); // typically a spin_lock()
+ hlist_del_init_rcu(&obj->obj_node);
+ unlock_chain(); // typically a spin_unlock()
+ kmem_cache_free(cachep, obj);
+}
+
+
+
+--------------------------------------------------------------------------
+With hlist_nulls we can avoid extra smp_rmb() in lockless_lookup()
+and extra smp_wmb() in insert function.
+
+For example, if we choose to store the slot number as the 'nulls'
+end-of-list marker for each slot of the hash table, we can detect
+a race (some writer did a delete and/or a move of an object
+to another chain) checking the final 'nulls' value if
+the lookup met the end of chain. If final 'nulls' value
+is not the slot number, then we must restart the lookup at
+the beginning. If the object was moved to the same chain,
+then the reader doesn't care : It might eventually
+scan the list again without harm.
+
+
+1) lookup algo
+
+ head = &table[slot];
+ rcu_read_lock();
+begin:
+ hlist_nulls_for_each_entry_rcu(obj, node, head, member) {
+ if (obj->key == key) {
+ if (!try_get_ref(obj)) // might fail for free objects
+ goto begin;
+ if (obj->key != key) { // not the object we expected
+ put_ref(obj);
+ goto begin;
+ }
+ goto out;
+ }
+/*
+ * if the nulls value we got at the end of this lookup is
+ * not the expected one, we must restart lookup.
+ * We probably met an item that was moved to another chain.
+ */
+ if (get_nulls_value(node) != slot)
+ goto begin;
+ obj = NULL;
+
+out:
+ rcu_read_unlock();
+
+2) Insert function :
+--------------------
+
+/*
+ * Please note that new inserts are done at the head of list,
+ * not in the middle or end.
+ */
+obj = kmem_cache_alloc(cachep);
+lock_chain(); // typically a spin_lock()
+obj->key = key;
+/*
+ * changes to obj->key must be visible before refcnt one
+ */
+smp_wmb();
+atomic_set(&obj->refcnt, 1);
+/*
+ * insert obj in RCU way (readers might be traversing chain)
+ */
+hlist_nulls_add_head_rcu(&obj->obj_node, list);
+unlock_chain(); // typically a spin_unlock()
diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt
new file mode 100644
index 000000000..613033ff2
--- /dev/null
+++ b/Documentation/RCU/rcuref.txt
@@ -0,0 +1,132 @@
+Reference-count design for elements of lists/arrays protected by RCU.
+
+
+Please note that the percpu-ref feature is likely your first
+stop if you need to combine reference counts and RCU. Please see
+include/linux/percpu-refcount.h for more information. However, in
+those unusual cases where percpu-ref would consume too much memory,
+please read on.
+
+------------------------------------------------------------------------
+
+Reference counting on elements of lists which are protected by traditional
+reader/writer spinlocks or semaphores are straightforward:
+
+1. 2.
+add() search_and_reference()
+{ {
+ alloc_object read_lock(&list_lock);
+ ... search_for_element
+ atomic_set(&el->rc, 1); atomic_inc(&el->rc);
+ write_lock(&list_lock); ...
+ add_element read_unlock(&list_lock);
+ ... ...
+ write_unlock(&list_lock); }
+}
+
+3. 4.
+release_referenced() delete()
+{ {
+ ... write_lock(&list_lock);
+ atomic_dec(&el->rc, relfunc) ...
+ ... remove_element
+} write_unlock(&list_lock);
+ ...
+ if (atomic_dec_and_test(&el->rc))
+ kfree(el);
+ ...
+ }
+
+If this list/array is made lock free using RCU as in changing the
+write_lock() in add() and delete() to spin_lock() and changing read_lock()
+in search_and_reference() to rcu_read_lock(), the atomic_inc() in
+search_and_reference() could potentially hold reference to an element which
+has already been deleted from the list/array. Use atomic_inc_not_zero()
+in this scenario as follows:
+
+1. 2.
+add() search_and_reference()
+{ {
+ alloc_object rcu_read_lock();
+ ... search_for_element
+ atomic_set(&el->rc, 1); if (!atomic_inc_not_zero(&el->rc)) {
+ spin_lock(&list_lock); rcu_read_unlock();
+ return FAIL;
+ add_element }
+ ... ...
+ spin_unlock(&list_lock); rcu_read_unlock();
+} }
+3. 4.
+release_referenced() delete()
+{ {
+ ... spin_lock(&list_lock);
+ if (atomic_dec_and_test(&el->rc)) ...
+ call_rcu(&el->head, el_free); remove_element
+ ... spin_unlock(&list_lock);
+} ...
+ if (atomic_dec_and_test(&el->rc))
+ call_rcu(&el->head, el_free);
+ ...
+ }
+
+Sometimes, a reference to the element needs to be obtained in the
+update (write) stream. In such cases, atomic_inc_not_zero() might be
+overkill, since we hold the update-side spinlock. One might instead
+use atomic_inc() in such cases.
+
+It is not always convenient to deal with "FAIL" in the
+search_and_reference() code path. In such cases, the
+atomic_dec_and_test() may be moved from delete() to el_free()
+as follows:
+
+1. 2.
+add() search_and_reference()
+{ {
+ alloc_object rcu_read_lock();
+ ... search_for_element
+ atomic_set(&el->rc, 1); atomic_inc(&el->rc);
+ spin_lock(&list_lock); ...
+
+ add_element rcu_read_unlock();
+ ... }
+ spin_unlock(&list_lock); 4.
+} delete()
+3. {
+release_referenced() spin_lock(&list_lock);
+{ ...
+ ... remove_element
+ if (atomic_dec_and_test(&el->rc)) spin_unlock(&list_lock);
+ kfree(el); ...
+ ... call_rcu(&el->head, el_free);
+} ...
+5. }
+void el_free(struct rcu_head *rhp)
+{
+ release_referenced();
+}
+
+The key point is that the initial reference added by add() is not removed
+until after a grace period has elapsed following removal. This means that
+search_and_reference() cannot find this element, which means that the value
+of el->rc cannot increase. Thus, once it reaches zero, there are no
+readers that can or ever will be able to reference the element. The
+element can therefore safely be freed. This in turn guarantees that if
+any reader finds the element, that reader may safely acquire a reference
+without checking the value of the reference counter.
+
+In cases where delete() can sleep, synchronize_rcu() can be called from
+delete(), so that el_free() can be subsumed into delete as follows:
+
+4.
+delete()
+{
+ spin_lock(&list_lock);
+ ...
+ remove_element
+ spin_unlock(&list_lock);
+ ...
+ synchronize_rcu();
+ if (atomic_dec_and_test(&el->rc))
+ kfree(el);
+ ...
+}
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
new file mode 100644
index 000000000..b57c0c1cd
--- /dev/null
+++ b/Documentation/RCU/stallwarn.txt
@@ -0,0 +1,250 @@
+Using RCU's CPU Stall Detector
+
+The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall
+detector, which detects conditions that unduly delay RCU grace periods.
+This module parameter enables CPU stall detection by default, but
+may be overridden via boot-time parameter or at runtime via sysfs.
+The stall detector's idea of what constitutes "unduly delayed" is
+controlled by a set of kernel configuration variables and cpp macros:
+
+CONFIG_RCU_CPU_STALL_TIMEOUT
+
+ This kernel configuration parameter defines the period of time
+ that RCU will wait from the beginning of a grace period until it
+ issues an RCU CPU stall warning. This time period is normally
+ 21 seconds.
+
+ This configuration parameter may be changed at runtime via the
+ /sys/module/rcupdate/parameters/rcu_cpu_stall_timeout, however
+ this parameter is checked only at the beginning of a cycle.
+ So if you are 10 seconds into a 40-second stall, setting this
+ sysfs parameter to (say) five will shorten the timeout for the
+ -next- stall, or the following warning for the current stall
+ (assuming the stall lasts long enough). It will not affect the
+ timing of the next warning for the current stall.
+
+ Stall-warning messages may be enabled and disabled completely via
+ /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress.
+
+CONFIG_RCU_CPU_STALL_INFO
+
+ This kernel configuration parameter causes the stall warning to
+ print out additional per-CPU diagnostic information, including
+ information on scheduling-clock ticks and RCU's idle-CPU tracking.
+
+RCU_STALL_DELAY_DELTA
+
+ Although the lockdep facility is extremely useful, it does add
+ some overhead. Therefore, under CONFIG_PROVE_RCU, the
+ RCU_STALL_DELAY_DELTA macro allows five extra seconds before
+ giving an RCU CPU stall warning message. (This is a cpp
+ macro, not a kernel configuration parameter.)
+
+RCU_STALL_RAT_DELAY
+
+ The CPU stall detector tries to make the offending CPU print its
+ own warnings, as this often gives better-quality stack traces.
+ However, if the offending CPU does not detect its own stall in
+ the number of jiffies specified by RCU_STALL_RAT_DELAY, then
+ some other CPU will complain. This delay is normally set to
+ two jiffies. (This is a cpp macro, not a kernel configuration
+ parameter.)
+
+rcupdate.rcu_task_stall_timeout
+
+ This boot/sysfs parameter controls the RCU-tasks stall warning
+ interval. A value of zero or less suppresses RCU-tasks stall
+ warnings. A positive value sets the stall-warning interval
+ in jiffies. An RCU-tasks stall warning starts wtih the line:
+
+ INFO: rcu_tasks detected stalls on tasks:
+
+ And continues with the output of sched_show_task() for each
+ task stalling the current RCU-tasks grace period.
+
+For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
+it will print a message similar to the following:
+
+INFO: rcu_sched_state detected stall on CPU 5 (t=2500 jiffies)
+
+This message indicates that CPU 5 detected that it was causing a stall,
+and that the stall was affecting RCU-sched. This message will normally be
+followed by a stack dump of the offending CPU. On TREE_RCU kernel builds,
+RCU and RCU-sched are implemented by the same underlying mechanism,
+while on PREEMPT_RCU kernel builds, RCU is instead implemented
+by rcu_preempt_state.
+
+On the other hand, if the offending CPU fails to print out a stall-warning
+message quickly enough, some other CPU will print a message similar to
+the following:
+
+INFO: rcu_bh_state detected stalls on CPUs/tasks: { 3 5 } (detected by 2, 2502 jiffies)
+
+This message indicates that CPU 2 detected that CPUs 3 and 5 were both
+causing stalls, and that the stall was affecting RCU-bh. This message
+will normally be followed by stack dumps for each CPU. Please note that
+PREEMPT_RCU builds can be stalled by tasks as well as by CPUs,
+and that the tasks will be indicated by PID, for example, "P3421".
+It is even possible for a rcu_preempt_state stall to be caused by both
+CPUs -and- tasks, in which case the offending CPUs and tasks will all
+be called out in the list.
+
+Finally, if the grace period ends just as the stall warning starts
+printing, there will be a spurious stall-warning message:
+
+INFO: rcu_bh_state detected stalls on CPUs/tasks: { } (detected by 4, 2502 jiffies)
+
+This is rare, but does happen from time to time in real life. It is also
+possible for a zero-jiffy stall to be flagged in this case, depending
+on how the stall warning and the grace-period initialization happen to
+interact. Please note that it is not possible to entirely eliminate this
+sort of false positive without resorting to things like stop_machine(),
+which is overkill for this sort of problem.
+
+If the CONFIG_RCU_CPU_STALL_INFO kernel configuration parameter is set,
+more information is printed with the stall-warning message, for example:
+
+ INFO: rcu_preempt detected stall on CPU
+ 0: (63959 ticks this GP) idle=241/3fffffffffffffff/0 softirq=82/543
+ (t=65000 jiffies)
+
+In kernels with CONFIG_RCU_FAST_NO_HZ, even more information is
+printed:
+
+ INFO: rcu_preempt detected stall on CPU
+ 0: (64628 ticks this GP) idle=dd5/3fffffffffffffff/0 softirq=82/543 last_accelerate: a345/d342 nonlazy_posted: 25 .D
+ (t=65000 jiffies)
+
+The "(64628 ticks this GP)" indicates that this CPU has taken more
+than 64,000 scheduling-clock interrupts during the current stalled
+grace period. If the CPU was not yet aware of the current grace
+period (for example, if it was offline), then this part of the message
+indicates how many grace periods behind the CPU is.
+
+The "idle=" portion of the message prints the dyntick-idle state.
+The hex number before the first "/" is the low-order 12 bits of the
+dynticks counter, which will have an even-numbered value if the CPU is
+in dyntick-idle mode and an odd-numbered value otherwise. The hex
+number between the two "/"s is the value of the nesting, which will
+be a small positive number if in the idle loop and a very large positive
+number (as shown above) otherwise.
+
+The "softirq=" portion of the message tracks the number of RCU softirq
+handlers that the stalled CPU has executed. The number before the "/"
+is the number that had executed since boot at the time that this CPU
+last noted the beginning of a grace period, which might be the current
+(stalled) grace period, or it might be some earlier grace period (for
+example, if the CPU might have been in dyntick-idle mode for an extended
+time period. The number after the "/" is the number that have executed
+since boot until the current time. If this latter number stays constant
+across repeated stall-warning messages, it is possible that RCU's softirq
+handlers are no longer able to execute on this CPU. This can happen if
+the stalled CPU is spinning with interrupts are disabled, or, in -rt
+kernels, if a high-priority process is starving RCU's softirq handler.
+
+For CONFIG_RCU_FAST_NO_HZ kernels, the "last_accelerate:" prints the
+low-order 16 bits (in hex) of the jiffies counter when this CPU last
+invoked rcu_try_advance_all_cbs() from rcu_needs_cpu() or last invoked
+rcu_accelerate_cbs() from rcu_prepare_for_idle(). The "nonlazy_posted:"
+prints the number of non-lazy callbacks posted since the last call to
+rcu_needs_cpu(). Finally, an "L" indicates that there are currently
+no non-lazy callbacks ("." is printed otherwise, as shown above) and
+"D" indicates that dyntick-idle processing is enabled ("." is printed
+otherwise, for example, if disabled via the "nohz=" kernel boot parameter).
+
+If the relevant grace-period kthread has been unable to run prior to
+the stall warning, the following additional line is printed:
+
+ rcu_preempt kthread starved for 2023 jiffies!
+
+Starving the grace-period kthreads of CPU time can of course result in
+RCU CPU stall warnings even when all CPUs and tasks have passed through
+the required quiescent states.
+
+
+Multiple Warnings From One Stall
+
+If a stall lasts long enough, multiple stall-warning messages will be
+printed for it. The second and subsequent messages are printed at
+longer intervals, so that the time between (say) the first and second
+message will be about three times the interval between the beginning
+of the stall and the first message.
+
+
+What Causes RCU CPU Stall Warnings?
+
+So your kernel printed an RCU CPU stall warning. The next question is
+"What caused it?" The following problems can result in RCU CPU stall
+warnings:
+
+o A CPU looping in an RCU read-side critical section.
+
+o A CPU looping with interrupts disabled. This condition can
+ result in RCU-sched and RCU-bh stalls.
+
+o A CPU looping with preemption disabled. This condition can
+ result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh
+ stalls.
+
+o A CPU looping with bottom halves disabled. This condition can
+ result in RCU-sched and RCU-bh stalls.
+
+o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
+ kernel without invoking schedule(). Note that cond_resched()
+ does not necessarily prevent RCU CPU stall warnings. Therefore,
+ if the looping in the kernel is really expected and desirable
+ behavior, you might need to replace some of the cond_resched()
+ calls with calls to cond_resched_rcu_qs().
+
+o Anything that prevents RCU's grace-period kthreads from running.
+ This can result in the "All QSes seen" console-log message.
+ This message will include information on when the kthread last
+ ran and how often it should be expected to run.
+
+o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+ happen to preempt a low-priority task in the middle of an RCU
+ read-side critical section. This is especially damaging if
+ that low-priority task is not permitted to run on any other CPU,
+ in which case the next RCU grace period can never complete, which
+ will eventually cause the system to run out of memory and hang.
+ While the system is in the process of running itself out of
+ memory, you might see stall-warning messages.
+
+o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+ is running at a higher priority than the RCU softirq threads.
+ This will prevent RCU callbacks from ever being invoked,
+ and in a CONFIG_PREEMPT_RCU kernel will further prevent
+ RCU grace periods from ever completing. Either way, the
+ system will eventually run out of memory and hang. In the
+ CONFIG_PREEMPT_RCU case, you might see stall-warning
+ messages.
+
+o A hardware or software issue shuts off the scheduler-clock
+ interrupt on a CPU that is not in dyntick-idle mode. This
+ problem really has happened, and seems to be most likely to
+ result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels.
+
+o A bug in the RCU implementation.
+
+o A hardware failure. This is quite unlikely, but has occurred
+ at least once in real life. A CPU failed in a running system,
+ becoming unresponsive, but not causing an immediate crash.
+ This resulted in a series of RCU CPU stall warnings, eventually
+ leading the realization that the CPU had failed.
+
+The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
+warning. Note that SRCU does -not- have CPU stall warnings. Please note
+that RCU only detects CPU stalls when there is a grace period in progress.
+No grace period, no CPU stall warnings.
+
+To diagnose the cause of the stall, inspect the stack traces.
+The offending function will usually be near the top of the stack.
+If you have a series of stall warnings from a single extended stall,
+comparing the stack traces can often help determine where the stall
+is occurring, which will usually be in the function nearest the top of
+that portion of the stack which remains the same from trace to trace.
+If you can reliably trigger the stall, ftrace can be quite helpful.
+
+RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE
+and with RCU's event tracing. For information on RCU's event tracing,
+see include/trace/events/rcu.h.
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
new file mode 100644
index 000000000..dac02a621
--- /dev/null
+++ b/Documentation/RCU/torture.txt
@@ -0,0 +1,334 @@
+RCU Torture Test Operation
+
+
+CONFIG_RCU_TORTURE_TEST
+
+The CONFIG_RCU_TORTURE_TEST config option is available for all RCU
+implementations. It creates an rcutorture kernel module that can
+be loaded to run a torture test. The test periodically outputs
+status messages via printk(), which can be examined via the dmesg
+command (perhaps grepping for "torture"). The test is started
+when the module is loaded, and stops when the module is unloaded.
+
+CONFIG_RCU_TORTURE_TEST_RUNNABLE
+
+It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will
+result in the tests being loaded into the base kernel. In this case,
+the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify
+whether the RCU torture tests are to be started immediately during
+boot or whether the /proc/sys/kernel/rcutorture_runnable file is used
+to enable them. This /proc file can be used to repeatedly pause and
+restart the tests, regardless of the initial state specified by the
+CONFIG_RCU_TORTURE_TEST_RUNNABLE config option.
+
+You will normally -not- want to start the RCU torture tests during boot
+(and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing
+this can sometimes be useful in finding boot-time bugs.
+
+
+MODULE PARAMETERS
+
+This module has the following parameters:
+
+fqs_duration Duration (in microseconds) of artificially induced bursts
+ of force_quiescent_state() invocations. In RCU
+ implementations having force_quiescent_state(), these
+ bursts help force races between forcing a given grace
+ period and that grace period ending on its own.
+
+fqs_holdoff Holdoff time (in microseconds) between consecutive calls
+ to force_quiescent_state() within a burst.
+
+fqs_stutter Wait time (in seconds) between consecutive bursts
+ of calls to force_quiescent_state().
+
+gp_normal Make the fake writers use normal synchronous grace-period
+ primitives.
+
+gp_exp Make the fake writers use expedited synchronous grace-period
+ primitives. If both gp_normal and gp_exp are set, or
+ if neither gp_normal nor gp_exp are set, then randomly
+ choose the primitive so that about 50% are normal and
+ 50% expedited. By default, neither are set, which
+ gives best overall test coverage.
+
+irqreader Says to invoke RCU readers from irq level. This is currently
+ done via timers. Defaults to "1" for variants of RCU that
+ permit this. (Or, more accurately, variants of RCU that do
+ -not- permit this know to ignore this variable.)
+
+n_barrier_cbs If this is nonzero, RCU barrier testing will be conducted,
+ in which case n_barrier_cbs specifies the number of
+ RCU callbacks (and corresponding kthreads) to use for
+ this testing. The value cannot be negative. If you
+ specify this to be non-zero when torture_type indicates a
+ synchronous RCU implementation (one for which a member of
+ the synchronize_rcu() rather than the call_rcu() family is
+ used -- see the documentation for torture_type below), an
+ error will be reported and no testing will be carried out.
+
+nfakewriters This is the number of RCU fake writer threads to run. Fake
+ writer threads repeatedly use the synchronous "wait for
+ current readers" function of the interface selected by
+ torture_type, with a delay between calls to allow for various
+ different numbers of writers running in parallel.
+ nfakewriters defaults to 4, which provides enough parallelism
+ to trigger special cases caused by multiple writers, such as
+ the synchronize_srcu() early return optimization.
+
+nreaders This is the number of RCU reading threads supported.
+ The default is twice the number of CPUs. Why twice?
+ To properly exercise RCU implementations with preemptible
+ read-side critical sections.
+
+onoff_interval
+ The number of seconds between each attempt to execute a
+ randomly selected CPU-hotplug operation. Defaults to
+ zero, which disables CPU hotplugging. In HOTPLUG_CPU=n
+ kernels, rcutorture will silently refuse to do any
+ CPU-hotplug operations regardless of what value is
+ specified for onoff_interval.
+
+onoff_holdoff The number of seconds to wait until starting CPU-hotplug
+ operations. This would normally only be used when
+ rcutorture was built into the kernel and started
+ automatically at boot time, in which case it is useful
+ in order to avoid confusing boot-time code with CPUs
+ coming and going.
+
+shuffle_interval
+ The number of seconds to keep the test threads affinitied
+ to a particular subset of the CPUs, defaults to 3 seconds.
+ Used in conjunction with test_no_idle_hz.
+
+shutdown_secs The number of seconds to run the test before terminating
+ the test and powering off the system. The default is
+ zero, which disables test termination and system shutdown.
+ This capability is useful for automated testing.
+
+stall_cpu The number of seconds that a CPU should be stalled while
+ within both an rcu_read_lock() and a preempt_disable().
+ This stall happens only once per rcutorture run.
+ If you need multiple stalls, use modprobe and rmmod to
+ repeatedly run rcutorture. The default for stall_cpu
+ is zero, which prevents rcutorture from stalling a CPU.
+
+ Note that attempts to rmmod rcutorture while the stall
+ is ongoing will hang, so be careful what value you
+ choose for this module parameter! In addition, too-large
+ values for stall_cpu might well induce failures and
+ warnings in other parts of the kernel. You have been
+ warned!
+
+stall_cpu_holdoff
+ The number of seconds to wait after rcutorture starts
+ before stalling a CPU. Defaults to 10 seconds.
+
+stat_interval The number of seconds between output of torture
+ statistics (via printk()). Regardless of the interval,
+ statistics are printed when the module is unloaded.
+ Setting the interval to zero causes the statistics to
+ be printed -only- when the module is unloaded, and this
+ is the default.
+
+stutter The length of time to run the test before pausing for this
+ same period of time. Defaults to "stutter=5", so as
+ to run and pause for (roughly) five-second intervals.
+ Specifying "stutter=0" causes the test to run continuously
+ without pausing, which is the old default behavior.
+
+test_boost Whether or not to test the ability of RCU to do priority
+ boosting. Defaults to "test_boost=1", which performs
+ RCU priority-inversion testing only if the selected
+ RCU implementation supports priority boosting. Specifying
+ "test_boost=0" never performs RCU priority-inversion
+ testing. Specifying "test_boost=2" performs RCU
+ priority-inversion testing even if the selected RCU
+ implementation does not support RCU priority boosting,
+ which can be used to test rcutorture's ability to
+ carry out RCU priority-inversion testing.
+
+test_boost_interval
+ The number of seconds in an RCU priority-inversion test
+ cycle. Defaults to "test_boost_interval=7". It is
+ usually wise for this value to be relatively prime to
+ the value selected for "stutter".
+
+test_boost_duration
+ The number of seconds to do RCU priority-inversion testing
+ within any given "test_boost_interval". Defaults to
+ "test_boost_duration=4".
+
+test_no_idle_hz Whether or not to test the ability of RCU to operate in
+ a kernel that disables the scheduling-clock interrupt to
+ idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
+ Defaults to omitting this test.
+
+torture_type The type of RCU to test, with string values as follows:
+
+ "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu().
+
+ "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and
+ synchronize_rcu().
+
+ "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and
+ synchronize_rcu_expedited().
+
+ "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
+ call_rcu_bh().
+
+ "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(),
+ and synchronize_rcu_bh().
+
+ "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(),
+ and synchronize_rcu_bh_expedited().
+
+ "srcu": srcu_read_lock(), srcu_read_unlock() and
+ call_srcu().
+
+ "srcu_sync": srcu_read_lock(), srcu_read_unlock() and
+ synchronize_srcu().
+
+ "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
+ synchronize_srcu_expedited().
+
+ "sched": preempt_disable(), preempt_enable(), and
+ call_rcu_sched().
+
+ "sched_sync": preempt_disable(), preempt_enable(), and
+ synchronize_sched().
+
+ "sched_expedited": preempt_disable(), preempt_enable(), and
+ synchronize_sched_expedited().
+
+ Defaults to "rcu".
+
+verbose Enable debug printk()s. Default is disabled.
+
+
+OUTPUT
+
+The statistics output is as follows:
+
+ rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+ rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
+ rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
+ rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
+ rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
+ rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
+
+The command "dmesg | grep torture:" will extract this information on
+most systems. On more esoteric configurations, it may be necessary to
+use other commands to access the output of the printk()s used by
+the RCU torture test. The printk()s use KERN_ALERT, so they should
+be evident. ;-)
+
+The first and last lines show the rcutorture module parameters, and the
+last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
+automatic determination as to whether RCU operated correctly.
+
+The entries are as follows:
+
+o "rtc": The hexadecimal address of the structure currently visible
+ to readers.
+
+o "ver": The number of times since boot that the RCU writer task
+ has changed the structure visible to readers.
+
+o "tfle": If non-zero, indicates that the "torture freelist"
+ containing structures to be placed into the "rtc" area is empty.
+ This condition is important, since it can fool you into thinking
+ that RCU is working when it is not. :-/
+
+o "rta": Number of structures allocated from the torture freelist.
+
+o "rtaf": Number of allocations from the torture freelist that have
+ failed due to the list being empty. It is not unusual for this
+ to be non-zero, but it is bad for it to be a large fraction of
+ the value indicated by "rta".
+
+o "rtf": Number of frees into the torture freelist.
+
+o "rtmbe": A non-zero value indicates that rcutorture believes that
+ rcu_assign_pointer() and rcu_dereference() are not working
+ correctly. This value should be zero.
+
+o "rtbe": A non-zero value indicates that one of the rcu_barrier()
+ family of functions is not working correctly.
+
+o "rtbke": rcutorture was unable to create the real-time kthreads
+ used to force RCU priority inversion. This value should be zero.
+
+o "rtbre": Although rcutorture successfully created the kthreads
+ used to force RCU priority inversion, it was unable to set them
+ to the real-time priority level of 1. This value should be zero.
+
+o "rtbf": The number of times that RCU priority boosting failed
+ to resolve RCU priority inversion.
+
+o "rtb": The number of times that rcutorture attempted to force
+ an RCU priority inversion condition. If you are testing RCU
+ priority boosting via the "test_boost" module parameter, this
+ value should be non-zero.
+
+o "nt": The number of times rcutorture ran RCU read-side code from
+ within a timer handler. This value should be non-zero only
+ if you specified the "irqreader" module parameter.
+
+o "Reader Pipe": Histogram of "ages" of structures seen by readers.
+ If any entries past the first two are non-zero, RCU is broken.
+ And rcutorture prints the error flag string "!!!" to make sure
+ you notice. The age of a newly allocated structure is zero,
+ it becomes one when removed from reader visibility, and is
+ incremented once per grace period subsequently -- and is freed
+ after passing through (RCU_TORTURE_PIPE_LEN-2) grace periods.
+
+ The output displayed above was taken from a correctly working
+ RCU. If you want to see what it looks like when broken, break
+ it yourself. ;-)
+
+o "Reader Batch": Another histogram of "ages" of structures seen
+ by readers, but in terms of counter flips (or batches) rather
+ than in terms of grace periods. The legal number of non-zero
+ entries is again two. The reason for this separate view is that
+ it is sometimes easier to get the third entry to show up in the
+ "Reader Batch" list than in the "Reader Pipe" list.
+
+o "Free-Block Circulation": Shows the number of torture structures
+ that have reached a given point in the pipeline. The first element
+ should closely correspond to the number of structures allocated,
+ the second to the number that have been removed from reader view,
+ and all but the last remaining to the corresponding number of
+ passes through a grace period. The last entry should be zero,
+ as it is only incremented if a torture structure's counter
+ somehow gets incremented farther than it should.
+
+Different implementations of RCU can provide implementation-specific
+additional information. For example, SRCU provides the following
+additional line:
+
+ srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1)
+
+This line shows the per-CPU counter state. The numbers in parentheses are
+the values of the "old" and "current" counters for the corresponding CPU.
+The "idx" value maps the "old" and "current" values to the underlying
+array, and is useful for debugging.
+
+
+USAGE
+
+The following script may be used to torture RCU:
+
+ #!/bin/sh
+
+ modprobe rcutorture
+ sleep 3600
+ rmmod rcutorture
+ dmesg | grep torture:
+
+The output can be manually inspected for the error flag of "!!!".
+One could of course create a more elaborate script that automatically
+checked for such errors. The "rmmod" command forces a "SUCCESS",
+"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first
+two are self-explanatory, while the last indicates that while there
+were no RCU failures, CPU-hotplug problems were detected.
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
new file mode 100644
index 000000000..08651da15
--- /dev/null
+++ b/Documentation/RCU/trace.txt
@@ -0,0 +1,554 @@
+CONFIG_RCU_TRACE debugfs Files and Formats
+
+
+The rcutree and rcutiny implementations of RCU provide debugfs trace
+output that summarizes counters and state. This information is useful for
+debugging RCU itself, and can sometimes also help to debug abuses of RCU.
+The following sections describe the debugfs files and formats, first
+for rcutree and next for rcutiny.
+
+
+CONFIG_TREE_RCU and CONFIG_PREEMPT_RCU debugfs Files and Formats
+
+These implementations of RCU provide several debugfs directories under the
+top-level directory "rcu":
+
+rcu/rcu_bh
+rcu/rcu_preempt
+rcu/rcu_sched
+
+Each directory contains files for the corresponding flavor of RCU.
+Note that rcu/rcu_preempt is only present for CONFIG_PREEMPT_RCU.
+For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor,
+so that activity for both appears in rcu/rcu_sched.
+
+In addition, the following file appears in the top-level directory:
+rcu/rcutorture. This file displays rcutorture test progress. The output
+of "cat rcu/rcutorture" looks as follows:
+
+rcutorture test sequence: 0 (test in progress)
+rcutorture update version number: 615
+
+The first line shows the number of rcutorture tests that have completed
+since boot. If a test is currently running, the "(test in progress)"
+string will appear as shown above. The second line shows the number of
+update cycles that the current test has started, or zero if there is
+no test in progress.
+
+
+Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly
+also rcu/rcu_preempt) the following files will be present:
+
+rcudata:
+ Displays fields in struct rcu_data.
+rcuexp:
+ Displays statistics for expedited grace periods.
+rcugp:
+ Displays grace-period counters.
+rcuhier:
+ Displays the struct rcu_node hierarchy.
+rcu_pending:
+ Displays counts of the reasons rcu_pending() decided that RCU had
+ work to do.
+rcuboost:
+ Displays RCU boosting statistics. Only present if
+ CONFIG_RCU_BOOST=y.
+
+The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
+
+ 0!c=30455 g=30456 pq=1/0 qp=1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
+ 1!c=30719 g=30720 pq=1/0 qp=0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
+ 2!c=30150 g=30151 pq=1/1 qp=1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
+ 3 c=31249 g=31250 pq=1/1 qp=0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
+ 4!c=29502 g=29503 pq=1/0 qp=1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
+ 5 c=31201 g=31202 pq=1/0 qp=1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
+ 6!c=30253 g=30254 pq=1/0 qp=1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
+ 7 c=31178 g=31178 pq=1/0 qp=0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
+
+This file has one line per CPU, or eight for this 8-CPU system.
+The fields are as follows:
+
+o The number at the beginning of each line is the CPU number.
+ CPUs numbers followed by an exclamation mark are offline,
+ but have been online at least once since boot. There will be
+ no output for CPUs that have never been online, which can be
+ a good thing in the surprisingly common case where NR_CPUS is
+ substantially larger than the number of actual CPUs.
+
+o "c" is the count of grace periods that this CPU believes have
+ completed. Offlined CPUs and CPUs in dynticks idle mode may lag
+ quite a ways behind, for example, CPU 4 under "rcu_sched" above,
+ which has been offline through 16 RCU grace periods. It is not
+ unusual to see offline CPUs lagging by thousands of grace periods.
+ Note that although the grace-period number is an unsigned long,
+ it is printed out as a signed long to allow more human-friendly
+ representation near boot time.
+
+o "g" is the count of grace periods that this CPU believes have
+ started. Again, offlined CPUs and CPUs in dynticks idle mode
+ may lag behind. If the "c" and "g" values are equal, this CPU
+ has already reported a quiescent state for the last RCU grace
+ period that it is aware of, otherwise, the CPU believes that it
+ owes RCU a quiescent state.
+
+o "pq" indicates that this CPU has passed through a quiescent state
+ for the current grace period. It is possible for "pq" to be
+ "1" and "c" different than "g", which indicates that although
+ the CPU has passed through a quiescent state, either (1) this
+ CPU has not yet reported that fact, (2) some other CPU has not
+ yet reported for this grace period, or (3) both.
+
+o "qp" indicates that RCU still expects a quiescent state from
+ this CPU. Offlined CPUs and CPUs in dyntick idle mode might
+ well have qp=1, which is OK: RCU is still ignoring them.
+
+o "dt" is the current value of the dyntick counter that is incremented
+ when entering or leaving idle, either due to a context switch or
+ due to an interrupt. This number is even if the CPU is in idle
+ from RCU's viewpoint and odd otherwise. The number after the
+ first "/" is the interrupt nesting depth when in idle state,
+ or a large number added to the interrupt-nesting depth when
+ running a non-idle task. Some architectures do not accurately
+ count interrupt nesting when running in non-idle kernel context,
+ which can result in interesting anomalies such as negative
+ interrupt-nesting levels. The number after the second "/"
+ is the NMI nesting depth.
+
+o "df" is the number of times that some other CPU has forced a
+ quiescent state on behalf of this CPU due to this CPU being in
+ idle state.
+
+o "of" is the number of times that some other CPU has forced a
+ quiescent state on behalf of this CPU due to this CPU being
+ offline. In a perfect world, this might never happen, but it
+ turns out that offlining and onlining a CPU can take several grace
+ periods, and so there is likely to be an extended period of time
+ when RCU believes that the CPU is online when it really is not.
+ Please note that erring in the other direction (RCU believing a
+ CPU is offline when it is really alive and kicking) is a fatal
+ error, so it makes sense to err conservatively.
+
+o "ql" is the number of RCU callbacks currently residing on
+ this CPU. The first number is the number of "lazy" callbacks
+ that are known to RCU to only be freeing memory, and the number
+ after the "/" is the total number of callbacks, lazy or not.
+ These counters count callbacks regardless of what phase of
+ grace-period processing that they are in (new, waiting for
+ grace period to start, waiting for grace period to end, ready
+ to invoke).
+
+o "qs" gives an indication of the state of the callback queue
+ with four characters:
+
+ "N" Indicates that there are callbacks queued that are not
+ ready to be handled by the next grace period, and thus
+ will be handled by the grace period following the next
+ one.
+
+ "R" Indicates that there are callbacks queued that are
+ ready to be handled by the next grace period.
+
+ "W" Indicates that there are callbacks queued that are
+ waiting on the current grace period.
+
+ "D" Indicates that there are callbacks queued that have
+ already been handled by a prior grace period, and are
+ thus waiting to be invoked. Note that callbacks in
+ the process of being invoked are not counted here.
+ Callbacks in the process of being invoked are those
+ that have been removed from the rcu_data structures
+ queues by rcu_do_batch(), but which have not yet been
+ invoked.
+
+ If there are no callbacks in a given one of the above states,
+ the corresponding character is replaced by ".".
+
+o "b" is the batch limit for this CPU. If more than this number
+ of RCU callbacks is ready to invoke, then the remainder will
+ be deferred.
+
+o "ci" is the number of RCU callbacks that have been invoked for
+ this CPU. Note that ci+nci+ql is the number of callbacks that have
+ been registered in absence of CPU-hotplug activity.
+
+o "nci" is the number of RCU callbacks that have been offloaded from
+ this CPU. This will always be zero unless the kernel was built
+ with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot
+ parameter was specified.
+
+o "co" is the number of RCU callbacks that have been orphaned due to
+ this CPU going offline. These orphaned callbacks have been moved
+ to an arbitrarily chosen online CPU.
+
+o "ca" is the number of RCU callbacks that have been adopted by this
+ CPU due to other CPUs going offline. Note that ci+co-ca+ql is
+ the number of RCU callbacks registered on this CPU.
+
+
+Kernels compiled with CONFIG_RCU_BOOST=y display the following from
+/debug/rcu/rcu_preempt/rcudata:
+
+ 0!c=12865 g=12866 pq=1/0 qp=1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
+ 1 c=14407 g=14408 pq=1/0 qp=0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
+ 2 c=14407 g=14408 pq=1/0 qp=0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
+ 3 c=14407 g=14408 pq=1/0 qp=0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
+ 4 c=14405 g=14406 pq=1/0 qp=1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
+ 5!c=14168 g=14169 pq=1/0 qp=0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
+ 6 c=14404 g=14405 pq=1/0 qp=0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
+ 7 c=14407 g=14408 pq=1/0 qp=1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
+
+This is similar to the output discussed above, but contains the following
+additional fields:
+
+o "kt" is the per-CPU kernel-thread state. The digit preceding
+ the first slash is zero if there is no work pending and 1
+ otherwise. The character between the first pair of slashes is
+ as follows:
+
+ "S" The kernel thread is stopped, in other words, all
+ CPUs corresponding to this rcu_node structure are
+ offline.
+
+ "R" The kernel thread is running.
+
+ "W" The kernel thread is waiting because there is no work
+ for it to do.
+
+ "O" The kernel thread is waiting because it has been
+ forced off of its designated CPU or because its
+ ->cpus_allowed mask permits it to run on other than
+ its designated CPU.
+
+ "Y" The kernel thread is yielding to avoid hogging CPU.
+
+ "?" Unknown value, indicates a bug.
+
+ The number after the final slash is the CPU that the kthread
+ is actually running on.
+
+ This field is displayed only for CONFIG_RCU_BOOST kernels.
+
+o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
+ the number of times that this CPU's per-CPU kthread has gone
+ through its loop servicing invoke_rcu_cpu_kthread() requests.
+
+ This field is displayed only for CONFIG_RCU_BOOST kernels.
+
+
+The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
+
+s=21872 d=21872 w=0 tf=0 wd1=0 wd2=0 n=0 sc=21872 dt=21872 dl=0 dx=21872
+
+These fields are as follows:
+
+o "s" is the starting sequence number.
+
+o "d" is the ending sequence number. When the starting and ending
+ numbers differ, there is an expedited grace period in progress.
+
+o "w" is the number of times that the sequence numbers have been
+ in danger of wrapping.
+
+o "tf" is the number of times that contention has resulted in a
+ failure to begin an expedited grace period.
+
+o "wd1" and "wd2" are the number of times that an attempt to
+ start an expedited grace period found that someone else had
+ completed an expedited grace period that satisfies the
+ attempted request. "Our work is done."
+
+o "n" is number of times that contention was so great that
+ the request was demoted from an expedited grace period to
+ a normal grace period.
+
+o "sc" is the number of times that the attempt to start a
+ new expedited grace period succeeded.
+
+o "dt" is the number of times that we attempted to update
+ the "d" counter.
+
+o "dl" is the number of times that we failed to update the "d"
+ counter.
+
+o "dx" is the number of times that we succeeded in updating
+ the "d" counter.
+
+
+The output of "cat rcu/rcu_preempt/rcugp" looks as follows:
+
+completed=31249 gpnum=31250 age=1 max=18
+
+These fields are taken from the rcu_state structure, and are as follows:
+
+o "completed" is the number of grace periods that have completed.
+ It is comparable to the "c" field from rcu/rcudata in that a
+ CPU whose "c" field matches the value of "completed" is aware
+ that the corresponding RCU grace period has completed.
+
+o "gpnum" is the number of grace periods that have started. It is
+ similarly comparable to the "g" field from rcu/rcudata in that
+ a CPU whose "g" field matches the value of "gpnum" is aware that
+ the corresponding RCU grace period has started.
+
+ If these two fields are equal, then there is no grace period
+ in progress, in other words, RCU is idle. On the other hand,
+ if the two fields differ (as they are above), then an RCU grace
+ period is in progress.
+
+o "age" is the number of jiffies that the current grace period
+ has extended for, or zero if there is no grace period currently
+ in effect.
+
+o "max" is the age in jiffies of the longest-duration grace period
+ thus far.
+
+The output of "cat rcu/rcu_preempt/rcuhier" looks as follows:
+
+c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0
+3/3 ..>. 0:7 ^0
+e/e ..>. 0:3 ^0 d/d ..>. 4:7 ^1
+
+The fields are as follows:
+
+o "c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp.
+
+o "g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp.
+
+o "s" is the current state of the force_quiescent_state()
+ state machine.
+
+o "jfq" is the number of jiffies remaining for this grace period
+ before force_quiescent_state() is invoked to help push things
+ along. Note that CPUs in idle mode throughout the grace period
+ will not report on their own, but rather must be check by some
+ other CPU via force_quiescent_state().
+
+o "j" is the low-order four hex digits of the jiffies counter.
+ Yes, Paul did run into a number of problems that turned out to
+ be due to the jiffies counter no longer counting. Why do you ask?
+
+o "nfqs" is the number of calls to force_quiescent_state() since
+ boot.
+
+o "nfqsng" is the number of useless calls to force_quiescent_state(),
+ where there wasn't actually a grace period active. This can
+ no longer happen due to grace-period processing being pushed
+ into a kthread. The number in parentheses is the difference
+ between "nfqs" and "nfqsng", or the number of times that
+ force_quiescent_state() actually did some real work.
+
+o "fqlh" is the number of calls to force_quiescent_state() that
+ exited immediately (without even being counted in nfqs above)
+ due to contention on ->fqslock.
+
+o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
+ structure. Each line represents one level of the hierarchy,
+ from root to leaves. It is best to think of the rcu_data
+ structures as forming yet another level after the leaves.
+ Note that there might be either one, two, three, or even four
+ levels of rcu_node structures, depending on the relationship
+ between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly
+ adjusted using the rcu_fanout_leaf kernel boot parameter), and
+ CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of
+ possible CPUs for the booting hardware).
+
+ o The numbers separated by the "/" are the qsmask followed
+ by the qsmaskinit. The qsmask will have one bit
+ set for each entity in the next lower level that has
+ not yet checked in for the current grace period ("e"
+ indicating CPUs 5, 6, and 7 in the example above).
+ The qsmaskinit will have one bit for each entity that is
+ currently expected to check in during each grace period.
+ The value of qsmaskinit is assigned to that of qsmask
+ at the beginning of each grace period.
+
+ o The characters separated by the ">" indicate the state
+ of the blocked-tasks lists. A "G" preceding the ">"
+ indicates that at least one task blocked in an RCU
+ read-side critical section blocks the current grace
+ period, while a "E" preceding the ">" indicates that
+ at least one task blocked in an RCU read-side critical
+ section blocks the current expedited grace period.
+ A "T" character following the ">" indicates that at
+ least one task is blocked within an RCU read-side
+ critical section, regardless of whether any current
+ grace period (expedited or normal) is inconvenienced.
+ A "." character appears if the corresponding condition
+ does not hold, so that "..>." indicates that no tasks
+ are blocked. In contrast, "GE>T" indicates maximal
+ inconvenience from blocked tasks. CONFIG_TREE_RCU
+ builds of the kernel will always show "..>.".
+
+ o The numbers separated by the ":" are the range of CPUs
+ served by this struct rcu_node. This can be helpful
+ in working out how the hierarchy is wired together.
+
+ For example, the example rcu_node structure shown above
+ has "0:7", indicating that it covers CPUs 0 through 7.
+
+ o The number after the "^" indicates the bit in the
+ next higher level rcu_node structure that this rcu_node
+ structure corresponds to. For example, the "d/d ..>. 4:7
+ ^1" has a "1" in this position, indicating that it
+ corresponds to the "1" bit in the "3" shown in the
+ "3/3 ..>. 0:7 ^0" entry on the next level up.
+
+
+The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
+
+ 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0
+ 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0
+ 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0
+ 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0
+ 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0
+ 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0
+ 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0
+ 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0
+
+The fields are as follows:
+
+o The leading number is the CPU number, with "!" indicating
+ an offline CPU.
+
+o "np" is the number of times that __rcu_pending() has been invoked
+ for the corresponding flavor of RCU.
+
+o "qsp" is the number of times that the RCU was waiting for a
+ quiescent state from this CPU.
+
+o "rpq" is the number of times that the CPU had passed through
+ a quiescent state, but not yet reported it to RCU.
+
+o "cbr" is the number of times that this CPU had RCU callbacks
+ that had passed through a grace period, and were thus ready
+ to be invoked.
+
+o "cng" is the number of times that this CPU needed another
+ grace period while RCU was idle.
+
+o "gpc" is the number of times that an old grace period had
+ completed, but this CPU was not yet aware of it.
+
+o "gps" is the number of times that a new grace period had started,
+ but this CPU was not yet aware of it.
+
+o "ndw" is the number of times that a wakeup of an rcuo
+ callback-offload kthread had to be deferred in order to avoid
+ deadlock.
+
+o "nn" is the number of times that this CPU needed nothing.
+
+
+The output of "cat rcu/rcuboost" looks as follows:
+
+0:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
+ balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0
+4:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
+ balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0
+
+This information is output only for rcu_preempt. Each two-line entry
+corresponds to a leaf rcu_node structure. The fields are as follows:
+
+o "n:m" is the CPU-number range for the corresponding two-line
+ entry. In the sample output above, the first entry covers
+ CPUs zero through three and the second entry covers CPUs four
+ through seven.
+
+o "tasks=TNEB" gives the state of the various segments of the
+ rnp->blocked_tasks list:
+
+ "T" This indicates that there are some tasks that blocked
+ while running on one of the corresponding CPUs while
+ in an RCU read-side critical section.
+
+ "N" This indicates that some of the blocked tasks are preventing
+ the current normal (non-expedited) grace period from
+ completing.
+
+ "E" This indicates that some of the blocked tasks are preventing
+ the current expedited grace period from completing.
+
+ "B" This indicates that some of the blocked tasks are in
+ need of RCU priority boosting.
+
+ Each character is replaced with "." if the corresponding
+ condition does not hold.
+
+o "kt" is the state of the RCU priority-boosting kernel
+ thread associated with the corresponding rcu_node structure.
+ The state can be one of the following:
+
+ "S" The kernel thread is stopped, in other words, all
+ CPUs corresponding to this rcu_node structure are
+ offline.
+
+ "R" The kernel thread is running.
+
+ "W" The kernel thread is waiting because there is no work
+ for it to do.
+
+ "Y" The kernel thread is yielding to avoid hogging CPU.
+
+ "?" Unknown value, indicates a bug.
+
+o "ntb" is the number of tasks boosted.
+
+o "neb" is the number of tasks boosted in order to complete an
+ expedited grace period.
+
+o "nnb" is the number of tasks boosted in order to complete a
+ normal (non-expedited) grace period. When boosting a task
+ that was blocking both an expedited and a normal grace period,
+ it is counted against the expedited total above.
+
+o "j" is the low-order 16 bits of the jiffies counter in
+ hexadecimal.
+
+o "bt" is the low-order 16 bits of the value that the jiffies
+ counter will have when we next start boosting, assuming that
+ the current grace period does not end beforehand. This is
+ also in hexadecimal.
+
+o "balk: nt" counts the number of times we didn't boost (in
+ other words, we balked) even though it was time to boost because
+ there were no blocked tasks to boost. This situation occurs
+ when there is one blocked task on one rcu_node structure and
+ none on some other rcu_node structure.
+
+o "egt" counts the number of times we balked because although
+ there were blocked tasks, none of them were blocking the
+ current grace period, whether expedited or otherwise.
+
+o "bt" counts the number of times we balked because boosting
+ had already been initiated for the current grace period.
+
+o "nb" counts the number of times we balked because there
+ was at least one task blocking the current non-expedited grace
+ period that never had blocked. If it is already running, it
+ just won't help to boost its priority!
+
+o "ny" counts the number of times we balked because it was
+ not yet time to start boosting.
+
+o "nos" counts the number of times we balked for other
+ reasons, e.g., the grace period ended first.
+
+
+CONFIG_TINY_RCU debugfs Files and Formats
+
+These implementations of RCU provides a single debugfs file under the
+top-level directory RCU, namely rcu/rcudata, which displays fields in
+rcu_bh_ctrlblk and rcu_sched_ctrlblk.
+
+The output of "cat rcu/rcudata" is as follows:
+
+rcu_sched: qlen: 0
+rcu_bh: qlen: 0
+
+This is split into rcu_sched and rcu_bh sections. The field is as
+follows:
+
+o "qlen" is the number of RCU callbacks currently waiting either
+ for an RCU grace period or waiting to be invoked. This is the
+ only field present for rcu_sched and rcu_bh, due to the
+ short-circuiting of grace period in those two cases.
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
new file mode 100644
index 000000000..88dfce182
--- /dev/null
+++ b/Documentation/RCU/whatisRCU.txt
@@ -0,0 +1,1031 @@
+Please note that the "What is RCU?" LWN series is an excellent place
+to start learning about RCU:
+
+1. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
+2. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
+3. RCU part 3: the RCU API http://lwn.net/Articles/264090/
+4. The RCU API, 2010 Edition http://lwn.net/Articles/418853/
+
+
+What is RCU?
+
+RCU is a synchronization mechanism that was added to the Linux kernel
+during the 2.5 development effort that is optimized for read-mostly
+situations. Although RCU is actually quite simple once you understand it,
+getting there can sometimes be a challenge. Part of the problem is that
+most of the past descriptions of RCU have been written with the mistaken
+assumption that there is "one true way" to describe RCU. Instead,
+the experience has been that different people must take different paths
+to arrive at an understanding of RCU. This document provides several
+different paths, as follows:
+
+1. RCU OVERVIEW
+2. WHAT IS RCU'S CORE API?
+3. WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
+4. WHAT IF MY UPDATING THREAD CANNOT BLOCK?
+5. WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
+6. ANALOGY WITH READER-WRITER LOCKING
+7. FULL LIST OF RCU APIs
+8. ANSWERS TO QUICK QUIZZES
+
+People who prefer starting with a conceptual overview should focus on
+Section 1, though most readers will profit by reading this section at
+some point. People who prefer to start with an API that they can then
+experiment with should focus on Section 2. People who prefer to start
+with example uses should focus on Sections 3 and 4. People who need to
+understand the RCU implementation should focus on Section 5, then dive
+into the kernel source code. People who reason best by analogy should
+focus on Section 6. Section 7 serves as an index to the docbook API
+documentation, and Section 8 is the traditional answer key.
+
+So, start with the section that makes the most sense to you and your
+preferred method of learning. If you need to know everything about
+everything, feel free to read the whole thing -- but if you are really
+that type of person, you have perused the source code and will therefore
+never need this document anyway. ;-)
+
+
+1. RCU OVERVIEW
+
+The basic idea behind RCU is to split updates into "removal" and
+"reclamation" phases. The removal phase removes references to data items
+within a data structure (possibly by replacing them with references to
+new versions of these data items), and can run concurrently with readers.
+The reason that it is safe to run the removal phase concurrently with
+readers is the semantics of modern CPUs guarantee that readers will see
+either the old or the new version of the data structure rather than a
+partially updated reference. The reclamation phase does the work of reclaiming
+(e.g., freeing) the data items removed from the data structure during the
+removal phase. Because reclaiming data items can disrupt any readers
+concurrently referencing those data items, the reclamation phase must
+not start until readers no longer hold references to those data items.
+
+Splitting the update into removal and reclamation phases permits the
+updater to perform the removal phase immediately, and to defer the
+reclamation phase until all readers active during the removal phase have
+completed, either by blocking until they finish or by registering a
+callback that is invoked after they finish. Only readers that are active
+during the removal phase need be considered, because any reader starting
+after the removal phase will be unable to gain a reference to the removed
+data items, and therefore cannot be disrupted by the reclamation phase.
+
+So the typical RCU update sequence goes something like the following:
+
+a. Remove pointers to a data structure, so that subsequent
+ readers cannot gain a reference to it.
+
+b. Wait for all previous readers to complete their RCU read-side
+ critical sections.
+
+c. At this point, there cannot be any readers who hold references
+ to the data structure, so it now may safely be reclaimed
+ (e.g., kfree()d).
+
+Step (b) above is the key idea underlying RCU's deferred destruction.
+The ability to wait until all readers are done allows RCU readers to
+use much lighter-weight synchronization, in some cases, absolutely no
+synchronization at all. In contrast, in more conventional lock-based
+schemes, readers must use heavy-weight synchronization in order to
+prevent an updater from deleting the data structure out from under them.
+This is because lock-based updaters typically update data items in place,
+and must therefore exclude readers. In contrast, RCU-based updaters
+typically take advantage of the fact that writes to single aligned
+pointers are atomic on modern CPUs, allowing atomic insertion, removal,
+and replacement of data items in a linked structure without disrupting
+readers. Concurrent RCU readers can then continue accessing the old
+versions, and can dispense with the atomic operations, memory barriers,
+and communications cache misses that are so expensive on present-day
+SMP computer systems, even in absence of lock contention.
+
+In the three-step procedure shown above, the updater is performing both
+the removal and the reclamation step, but it is often helpful for an
+entirely different thread to do the reclamation, as is in fact the case
+in the Linux kernel's directory-entry cache (dcache). Even if the same
+thread performs both the update step (step (a) above) and the reclamation
+step (step (c) above), it is often helpful to think of them separately.
+For example, RCU readers and updaters need not communicate at all,
+but RCU provides implicit low-overhead communication between readers
+and reclaimers, namely, in step (b) above.
+
+So how the heck can a reclaimer tell when a reader is done, given
+that readers are not doing any sort of synchronization operations???
+Read on to learn about how RCU's API makes this easy.
+
+
+2. WHAT IS RCU'S CORE API?
+
+The core RCU API is quite small:
+
+a. rcu_read_lock()
+b. rcu_read_unlock()
+c. synchronize_rcu() / call_rcu()
+d. rcu_assign_pointer()
+e. rcu_dereference()
+
+There are many other members of the RCU API, but the rest can be
+expressed in terms of these five, though most implementations instead
+express synchronize_rcu() in terms of the call_rcu() callback API.
+
+The five core RCU APIs are described below, the other 18 will be enumerated
+later. See the kernel docbook documentation for more info, or look directly
+at the function header comments.
+
+rcu_read_lock()
+
+ void rcu_read_lock(void);
+
+ Used by a reader to inform the reclaimer that the reader is
+ entering an RCU read-side critical section. It is illegal
+ to block while in an RCU read-side critical section, though
+ kernels built with CONFIG_PREEMPT_RCU can preempt RCU
+ read-side critical sections. Any RCU-protected data structure
+ accessed during an RCU read-side critical section is guaranteed to
+ remain unreclaimed for the full duration of that critical section.
+ Reference counts may be used in conjunction with RCU to maintain
+ longer-term references to data structures.
+
+rcu_read_unlock()
+
+ void rcu_read_unlock(void);
+
+ Used by a reader to inform the reclaimer that the reader is
+ exiting an RCU read-side critical section. Note that RCU
+ read-side critical sections may be nested and/or overlapping.
+
+synchronize_rcu()
+
+ void synchronize_rcu(void);
+
+ Marks the end of updater code and the beginning of reclaimer
+ code. It does this by blocking until all pre-existing RCU
+ read-side critical sections on all CPUs have completed.
+ Note that synchronize_rcu() will -not- necessarily wait for
+ any subsequent RCU read-side critical sections to complete.
+ For example, consider the following sequence of events:
+
+ CPU 0 CPU 1 CPU 2
+ ----------------- ------------------------- ---------------
+ 1. rcu_read_lock()
+ 2. enters synchronize_rcu()
+ 3. rcu_read_lock()
+ 4. rcu_read_unlock()
+ 5. exits synchronize_rcu()
+ 6. rcu_read_unlock()
+
+ To reiterate, synchronize_rcu() waits only for ongoing RCU
+ read-side critical sections to complete, not necessarily for
+ any that begin after synchronize_rcu() is invoked.
+
+ Of course, synchronize_rcu() does not necessarily return
+ -immediately- after the last pre-existing RCU read-side critical
+ section completes. For one thing, there might well be scheduling
+ delays. For another thing, many RCU implementations process
+ requests in batches in order to improve efficiencies, which can
+ further delay synchronize_rcu().
+
+ Since synchronize_rcu() is the API that must figure out when
+ readers are done, its implementation is key to RCU. For RCU
+ to be useful in all but the most read-intensive situations,
+ synchronize_rcu()'s overhead must also be quite small.
+
+ The call_rcu() API is a callback form of synchronize_rcu(),
+ and is described in more detail in a later section. Instead of
+ blocking, it registers a function and argument which are invoked
+ after all ongoing RCU read-side critical sections have completed.
+ This callback variant is particularly useful in situations where
+ it is illegal to block or where update-side performance is
+ critically important.
+
+ However, the call_rcu() API should not be used lightly, as use
+ of the synchronize_rcu() API generally results in simpler code.
+ In addition, the synchronize_rcu() API has the nice property
+ of automatically limiting update rate should grace periods
+ be delayed. This property results in system resilience in face
+ of denial-of-service attacks. Code using call_rcu() should limit
+ update rate in order to gain this same sort of resilience. See
+ checklist.txt for some approaches to limiting the update rate.
+
+rcu_assign_pointer()
+
+ typeof(p) rcu_assign_pointer(p, typeof(p) v);
+
+ Yes, rcu_assign_pointer() -is- implemented as a macro, though it
+ would be cool to be able to declare a function in this manner.
+ (Compiler experts will no doubt disagree.)
+
+ The updater uses this function to assign a new value to an
+ RCU-protected pointer, in order to safely communicate the change
+ in value from the updater to the reader. This function returns
+ the new value, and also executes any memory-barrier instructions
+ required for a given CPU architecture.
+
+ Perhaps just as important, it serves to document (1) which
+ pointers are protected by RCU and (2) the point at which a
+ given structure becomes accessible to other CPUs. That said,
+ rcu_assign_pointer() is most frequently used indirectly, via
+ the _rcu list-manipulation primitives such as list_add_rcu().
+
+rcu_dereference()
+
+ typeof(p) rcu_dereference(p);
+
+ Like rcu_assign_pointer(), rcu_dereference() must be implemented
+ as a macro.
+
+ The reader uses rcu_dereference() to fetch an RCU-protected
+ pointer, which returns a value that may then be safely
+ dereferenced. Note that rcu_deference() does not actually
+ dereference the pointer, instead, it protects the pointer for
+ later dereferencing. It also executes any needed memory-barrier
+ instructions for a given CPU architecture. Currently, only Alpha
+ needs memory barriers within rcu_dereference() -- on other CPUs,
+ it compiles to nothing, not even a compiler directive.
+
+ Common coding practice uses rcu_dereference() to copy an
+ RCU-protected pointer to a local variable, then dereferences
+ this local variable, for example as follows:
+
+ p = rcu_dereference(head.next);
+ return p->data;
+
+ However, in this case, one could just as easily combine these
+ into one statement:
+
+ return rcu_dereference(head.next)->data;
+
+ If you are going to be fetching multiple fields from the
+ RCU-protected structure, using the local variable is of
+ course preferred. Repeated rcu_dereference() calls look
+ ugly and incur unnecessary overhead on Alpha CPUs.
+
+ Note that the value returned by rcu_dereference() is valid
+ only within the enclosing RCU read-side critical section.
+ For example, the following is -not- legal:
+
+ rcu_read_lock();
+ p = rcu_dereference(head.next);
+ rcu_read_unlock();
+ x = p->address; /* BUG!!! */
+ rcu_read_lock();
+ y = p->data; /* BUG!!! */
+ rcu_read_unlock();
+
+ Holding a reference from one RCU read-side critical section
+ to another is just as illegal as holding a reference from
+ one lock-based critical section to another! Similarly,
+ using a reference outside of the critical section in which
+ it was acquired is just as illegal as doing so with normal
+ locking.
+
+ As with rcu_assign_pointer(), an important function of
+ rcu_dereference() is to document which pointers are protected by
+ RCU, in particular, flagging a pointer that is subject to changing
+ at any time, including immediately after the rcu_dereference().
+ And, again like rcu_assign_pointer(), rcu_dereference() is
+ typically used indirectly, via the _rcu list-manipulation
+ primitives, such as list_for_each_entry_rcu().
+
+The following diagram shows how each API communicates among the
+reader, updater, and reclaimer.
+
+
+ rcu_assign_pointer()
+ +--------+
+ +---------------------->| reader |---------+
+ | +--------+ |
+ | | |
+ | | | Protect:
+ | | | rcu_read_lock()
+ | | | rcu_read_unlock()
+ | rcu_dereference() | |
+ +---------+ | |
+ | updater |<---------------------+ |
+ +---------+ V
+ | +-----------+
+ +----------------------------------->| reclaimer |
+ +-----------+
+ Defer:
+ synchronize_rcu() & call_rcu()
+
+
+The RCU infrastructure observes the time sequence of rcu_read_lock(),
+rcu_read_unlock(), synchronize_rcu(), and call_rcu() invocations in
+order to determine when (1) synchronize_rcu() invocations may return
+to their callers and (2) call_rcu() callbacks may be invoked. Efficient
+implementations of the RCU infrastructure make heavy use of batching in
+order to amortize their overhead over many uses of the corresponding APIs.
+
+There are no fewer than three RCU mechanisms in the Linux kernel; the
+diagram above shows the first one, which is by far the most commonly used.
+The rcu_dereference() and rcu_assign_pointer() primitives are used for
+all three mechanisms, but different defer and protect primitives are
+used as follows:
+
+ Defer Protect
+
+a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock()
+ call_rcu() rcu_dereference()
+
+b. synchronize_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh()
+ call_rcu_bh() rcu_dereference_bh()
+
+c. synchronize_sched() rcu_read_lock_sched() / rcu_read_unlock_sched()
+ call_rcu_sched() preempt_disable() / preempt_enable()
+ local_irq_save() / local_irq_restore()
+ hardirq enter / hardirq exit
+ NMI enter / NMI exit
+ rcu_dereference_sched()
+
+These three mechanisms are used as follows:
+
+a. RCU applied to normal data structures.
+
+b. RCU applied to networking data structures that may be subjected
+ to remote denial-of-service attacks.
+
+c. RCU applied to scheduler and interrupt/NMI-handler tasks.
+
+Again, most uses will be of (a). The (b) and (c) cases are important
+for specialized uses, but are relatively uncommon.
+
+
+3. WHAT ARE SOME EXAMPLE USES OF CORE RCU API?
+
+This section shows a simple use of the core RCU API to protect a
+global pointer to a dynamically allocated structure. More-typical
+uses of RCU may be found in listRCU.txt, arrayRCU.txt, and NMI-RCU.txt.
+
+ struct foo {
+ int a;
+ char b;
+ long c;
+ };
+ DEFINE_SPINLOCK(foo_mutex);
+
+ struct foo *gbl_foo;
+
+ /*
+ * Create a new struct foo that is the same as the one currently
+ * pointed to by gbl_foo, except that field "a" is replaced
+ * with "new_a". Points gbl_foo to the new structure, and
+ * frees up the old structure after a grace period.
+ *
+ * Uses rcu_assign_pointer() to ensure that concurrent readers
+ * see the initialized version of the new structure.
+ *
+ * Uses synchronize_rcu() to ensure that any readers that might
+ * have references to the old structure complete before freeing
+ * the old structure.
+ */
+ void foo_update_a(int new_a)
+ {
+ struct foo *new_fp;
+ struct foo *old_fp;
+
+ new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
+ spin_lock(&foo_mutex);
+ old_fp = gbl_foo;
+ *new_fp = *old_fp;
+ new_fp->a = new_a;
+ rcu_assign_pointer(gbl_foo, new_fp);
+ spin_unlock(&foo_mutex);
+ synchronize_rcu();
+ kfree(old_fp);
+ }
+
+ /*
+ * Return the value of field "a" of the current gbl_foo
+ * structure. Use rcu_read_lock() and rcu_read_unlock()
+ * to ensure that the structure does not get deleted out
+ * from under us, and use rcu_dereference() to ensure that
+ * we see the initialized version of the structure (important
+ * for DEC Alpha and for people reading the code).
+ */
+ int foo_get_a(void)
+ {
+ int retval;
+
+ rcu_read_lock();
+ retval = rcu_dereference(gbl_foo)->a;
+ rcu_read_unlock();
+ return retval;
+ }
+
+So, to sum up:
+
+o Use rcu_read_lock() and rcu_read_unlock() to guard RCU
+ read-side critical sections.
+
+o Within an RCU read-side critical section, use rcu_dereference()
+ to dereference RCU-protected pointers.
+
+o Use some solid scheme (such as locks or semaphores) to
+ keep concurrent updates from interfering with each other.
+
+o Use rcu_assign_pointer() to update an RCU-protected pointer.
+ This primitive protects concurrent readers from the updater,
+ -not- concurrent updates from each other! You therefore still
+ need to use locking (or something similar) to keep concurrent
+ rcu_assign_pointer() primitives from interfering with each other.
+
+o Use synchronize_rcu() -after- removing a data element from an
+ RCU-protected data structure, but -before- reclaiming/freeing
+ the data element, in order to wait for the completion of all
+ RCU read-side critical sections that might be referencing that
+ data item.
+
+See checklist.txt for additional rules to follow when using RCU.
+And again, more-typical uses of RCU may be found in listRCU.txt,
+arrayRCU.txt, and NMI-RCU.txt.
+
+
+4. WHAT IF MY UPDATING THREAD CANNOT BLOCK?
+
+In the example above, foo_update_a() blocks until a grace period elapses.
+This is quite simple, but in some cases one cannot afford to wait so
+long -- there might be other high-priority work to be done.
+
+In such cases, one uses call_rcu() rather than synchronize_rcu().
+The call_rcu() API is as follows:
+
+ void call_rcu(struct rcu_head * head,
+ void (*func)(struct rcu_head *head));
+
+This function invokes func(head) after a grace period has elapsed.
+This invocation might happen from either softirq or process context,
+so the function is not permitted to block. The foo struct needs to
+have an rcu_head structure added, perhaps as follows:
+
+ struct foo {
+ int a;
+ char b;
+ long c;
+ struct rcu_head rcu;
+ };
+
+The foo_update_a() function might then be written as follows:
+
+ /*
+ * Create a new struct foo that is the same as the one currently
+ * pointed to by gbl_foo, except that field "a" is replaced
+ * with "new_a". Points gbl_foo to the new structure, and
+ * frees up the old structure after a grace period.
+ *
+ * Uses rcu_assign_pointer() to ensure that concurrent readers
+ * see the initialized version of the new structure.
+ *
+ * Uses call_rcu() to ensure that any readers that might have
+ * references to the old structure complete before freeing the
+ * old structure.
+ */
+ void foo_update_a(int new_a)
+ {
+ struct foo *new_fp;
+ struct foo *old_fp;
+
+ new_fp = kmalloc(sizeof(*new_fp), GFP_KERNEL);
+ spin_lock(&foo_mutex);
+ old_fp = gbl_foo;
+ *new_fp = *old_fp;
+ new_fp->a = new_a;
+ rcu_assign_pointer(gbl_foo, new_fp);
+ spin_unlock(&foo_mutex);
+ call_rcu(&old_fp->rcu, foo_reclaim);
+ }
+
+The foo_reclaim() function might appear as follows:
+
+ void foo_reclaim(struct rcu_head *rp)
+ {
+ struct foo *fp = container_of(rp, struct foo, rcu);
+
+ foo_cleanup(fp->a);
+
+ kfree(fp);
+ }
+
+The container_of() primitive is a macro that, given a pointer into a
+struct, the type of the struct, and the pointed-to field within the
+struct, returns a pointer to the beginning of the struct.
+
+The use of call_rcu() permits the caller of foo_update_a() to
+immediately regain control, without needing to worry further about the
+old version of the newly updated element. It also clearly shows the
+RCU distinction between updater, namely foo_update_a(), and reclaimer,
+namely foo_reclaim().
+
+The summary of advice is the same as for the previous section, except
+that we are now using call_rcu() rather than synchronize_rcu():
+
+o Use call_rcu() -after- removing a data element from an
+ RCU-protected data structure in order to register a callback
+ function that will be invoked after the completion of all RCU
+ read-side critical sections that might be referencing that
+ data item.
+
+If the callback for call_rcu() is not doing anything more than calling
+kfree() on the structure, you can use kfree_rcu() instead of call_rcu()
+to avoid having to write your own callback:
+
+ kfree_rcu(old_fp, rcu);
+
+Again, see checklist.txt for additional rules governing the use of RCU.
+
+
+5. WHAT ARE SOME SIMPLE IMPLEMENTATIONS OF RCU?
+
+One of the nice things about RCU is that it has extremely simple "toy"
+implementations that are a good first step towards understanding the
+production-quality implementations in the Linux kernel. This section
+presents two such "toy" implementations of RCU, one that is implemented
+in terms of familiar locking primitives, and another that more closely
+resembles "classic" RCU. Both are way too simple for real-world use,
+lacking both functionality and performance. However, they are useful
+in getting a feel for how RCU works. See kernel/rcupdate.c for a
+production-quality implementation, and see:
+
+ http://www.rdrop.com/users/paulmck/RCU
+
+for papers describing the Linux kernel RCU implementation. The OLS'01
+and OLS'02 papers are a good introduction, and the dissertation provides
+more details on the current implementation as of early 2004.
+
+
+5A. "TOY" IMPLEMENTATION #1: LOCKING
+
+This section presents a "toy" RCU implementation that is based on
+familiar locking primitives. Its overhead makes it a non-starter for
+real-life use, as does its lack of scalability. It is also unsuitable
+for realtime use, since it allows scheduling latency to "bleed" from
+one read-side critical section to another.
+
+However, it is probably the easiest implementation to relate to, so is
+a good starting point.
+
+It is extremely simple:
+
+ static DEFINE_RWLOCK(rcu_gp_mutex);
+
+ void rcu_read_lock(void)
+ {
+ read_lock(&rcu_gp_mutex);
+ }
+
+ void rcu_read_unlock(void)
+ {
+ read_unlock(&rcu_gp_mutex);
+ }
+
+ void synchronize_rcu(void)
+ {
+ write_lock(&rcu_gp_mutex);
+ write_unlock(&rcu_gp_mutex);
+ }
+
+[You can ignore rcu_assign_pointer() and rcu_dereference() without
+missing much. But here they are anyway. And whatever you do, don't
+forget about them when submitting patches making use of RCU!]
+
+ #define rcu_assign_pointer(p, v) ({ \
+ smp_wmb(); \
+ (p) = (v); \
+ })
+
+ #define rcu_dereference(p) ({ \
+ typeof(p) _________p1 = p; \
+ smp_read_barrier_depends(); \
+ (_________p1); \
+ })
+
+
+The rcu_read_lock() and rcu_read_unlock() primitive read-acquire
+and release a global reader-writer lock. The synchronize_rcu()
+primitive write-acquires this same lock, then immediately releases
+it. This means that once synchronize_rcu() exits, all RCU read-side
+critical sections that were in progress before synchronize_rcu() was
+called are guaranteed to have completed -- there is no way that
+synchronize_rcu() would have been able to write-acquire the lock
+otherwise.
+
+It is possible to nest rcu_read_lock(), since reader-writer locks may
+be recursively acquired. Note also that rcu_read_lock() is immune
+from deadlock (an important property of RCU). The reason for this is
+that the only thing that can block rcu_read_lock() is a synchronize_rcu().
+But synchronize_rcu() does not acquire any locks while holding rcu_gp_mutex,
+so there can be no deadlock cycle.
+
+Quick Quiz #1: Why is this argument naive? How could a deadlock
+ occur when using this algorithm in a real-world Linux
+ kernel? How could this deadlock be avoided?
+
+
+5B. "TOY" EXAMPLE #2: CLASSIC RCU
+
+This section presents a "toy" RCU implementation that is based on
+"classic RCU". It is also short on performance (but only for updates) and
+on features such as hotplug CPU and the ability to run in CONFIG_PREEMPT
+kernels. The definitions of rcu_dereference() and rcu_assign_pointer()
+are the same as those shown in the preceding section, so they are omitted.
+
+ void rcu_read_lock(void) { }
+
+ void rcu_read_unlock(void) { }
+
+ void synchronize_rcu(void)
+ {
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ run_on(cpu);
+ }
+
+Note that rcu_read_lock() and rcu_read_unlock() do absolutely nothing.
+This is the great strength of classic RCU in a non-preemptive kernel:
+read-side overhead is precisely zero, at least on non-Alpha CPUs.
+And there is absolutely no way that rcu_read_lock() can possibly
+participate in a deadlock cycle!
+
+The implementation of synchronize_rcu() simply schedules itself on each
+CPU in turn. The run_on() primitive can be implemented straightforwardly
+in terms of the sched_setaffinity() primitive. Of course, a somewhat less
+"toy" implementation would restore the affinity upon completion rather
+than just leaving all tasks running on the last CPU, but when I said
+"toy", I meant -toy-!
+
+So how the heck is this supposed to work???
+
+Remember that it is illegal to block while in an RCU read-side critical
+section. Therefore, if a given CPU executes a context switch, we know
+that it must have completed all preceding RCU read-side critical sections.
+Once -all- CPUs have executed a context switch, then -all- preceding
+RCU read-side critical sections will have completed.
+
+So, suppose that we remove a data item from its structure and then invoke
+synchronize_rcu(). Once synchronize_rcu() returns, we are guaranteed
+that there are no RCU read-side critical sections holding a reference
+to that data item, so we can safely reclaim it.
+
+Quick Quiz #2: Give an example where Classic RCU's read-side
+ overhead is -negative-.
+
+Quick Quiz #3: If it is illegal to block in an RCU read-side
+ critical section, what the heck do you do in
+ PREEMPT_RT, where normal spinlocks can block???
+
+
+6. ANALOGY WITH READER-WRITER LOCKING
+
+Although RCU can be used in many different ways, a very common use of
+RCU is analogous to reader-writer locking. The following unified
+diff shows how closely related RCU and reader-writer locking can be.
+
+ @@ -13,15 +14,15 @@
+ struct list_head *lp;
+ struct el *p;
+
+ - read_lock();
+ - list_for_each_entry(p, head, lp) {
+ + rcu_read_lock();
+ + list_for_each_entry_rcu(p, head, lp) {
+ if (p->key == key) {
+ *result = p->data;
+ - read_unlock();
+ + rcu_read_unlock();
+ return 1;
+ }
+ }
+ - read_unlock();
+ + rcu_read_unlock();
+ return 0;
+ }
+
+ @@ -29,15 +30,16 @@
+ {
+ struct el *p;
+
+ - write_lock(&listmutex);
+ + spin_lock(&listmutex);
+ list_for_each_entry(p, head, lp) {
+ if (p->key == key) {
+ - list_del(&p->list);
+ - write_unlock(&listmutex);
+ + list_del_rcu(&p->list);
+ + spin_unlock(&listmutex);
+ + synchronize_rcu();
+ kfree(p);
+ return 1;
+ }
+ }
+ - write_unlock(&listmutex);
+ + spin_unlock(&listmutex);
+ return 0;
+ }
+
+Or, for those who prefer a side-by-side listing:
+
+ 1 struct el { 1 struct el {
+ 2 struct list_head list; 2 struct list_head list;
+ 3 long key; 3 long key;
+ 4 spinlock_t mutex; 4 spinlock_t mutex;
+ 5 int data; 5 int data;
+ 6 /* Other data fields */ 6 /* Other data fields */
+ 7 }; 7 };
+ 8 spinlock_t listmutex; 8 spinlock_t listmutex;
+ 9 struct el head; 9 struct el head;
+
+ 1 int search(long key, int *result) 1 int search(long key, int *result)
+ 2 { 2 {
+ 3 struct list_head *lp; 3 struct list_head *lp;
+ 4 struct el *p; 4 struct el *p;
+ 5 5
+ 6 read_lock(); 6 rcu_read_lock();
+ 7 list_for_each_entry(p, head, lp) { 7 list_for_each_entry_rcu(p, head, lp) {
+ 8 if (p->key == key) { 8 if (p->key == key) {
+ 9 *result = p->data; 9 *result = p->data;
+10 read_unlock(); 10 rcu_read_unlock();
+11 return 1; 11 return 1;
+12 } 12 }
+13 } 13 }
+14 read_unlock(); 14 rcu_read_unlock();
+15 return 0; 15 return 0;
+16 } 16 }
+
+ 1 int delete(long key) 1 int delete(long key)
+ 2 { 2 {
+ 3 struct el *p; 3 struct el *p;
+ 4 4
+ 5 write_lock(&listmutex); 5 spin_lock(&listmutex);
+ 6 list_for_each_entry(p, head, lp) { 6 list_for_each_entry(p, head, lp) {
+ 7 if (p->key == key) { 7 if (p->key == key) {
+ 8 list_del(&p->list); 8 list_del_rcu(&p->list);
+ 9 write_unlock(&listmutex); 9 spin_unlock(&listmutex);
+ 10 synchronize_rcu();
+10 kfree(p); 11 kfree(p);
+11 return 1; 12 return 1;
+12 } 13 }
+13 } 14 }
+14 write_unlock(&listmutex); 15 spin_unlock(&listmutex);
+15 return 0; 16 return 0;
+16 } 17 }
+
+Either way, the differences are quite small. Read-side locking moves
+to rcu_read_lock() and rcu_read_unlock, update-side locking moves from
+a reader-writer lock to a simple spinlock, and a synchronize_rcu()
+precedes the kfree().
+
+However, there is one potential catch: the read-side and update-side
+critical sections can now run concurrently. In many cases, this will
+not be a problem, but it is necessary to check carefully regardless.
+For example, if multiple independent list updates must be seen as
+a single atomic update, converting to RCU will require special care.
+
+Also, the presence of synchronize_rcu() means that the RCU version of
+delete() can now block. If this is a problem, there is a callback-based
+mechanism that never blocks, namely call_rcu() or kfree_rcu(), that can
+be used in place of synchronize_rcu().
+
+
+7. FULL LIST OF RCU APIs
+
+The RCU APIs are documented in docbook-format header comments in the
+Linux-kernel source code, but it helps to have a full list of the
+APIs, since there does not appear to be a way to categorize them
+in docbook. Here is the list, by category.
+
+RCU list traversal:
+
+ list_entry_rcu
+ list_first_entry_rcu
+ list_next_rcu
+ list_for_each_entry_rcu
+ list_for_each_entry_continue_rcu
+ hlist_first_rcu
+ hlist_next_rcu
+ hlist_pprev_rcu
+ hlist_for_each_entry_rcu
+ hlist_for_each_entry_rcu_bh
+ hlist_for_each_entry_continue_rcu
+ hlist_for_each_entry_continue_rcu_bh
+ hlist_nulls_first_rcu
+ hlist_nulls_for_each_entry_rcu
+ hlist_bl_first_rcu
+ hlist_bl_for_each_entry_rcu
+
+RCU pointer/list update:
+
+ rcu_assign_pointer
+ list_add_rcu
+ list_add_tail_rcu
+ list_del_rcu
+ list_replace_rcu
+ hlist_add_behind_rcu
+ hlist_add_before_rcu
+ hlist_add_head_rcu
+ hlist_del_rcu
+ hlist_del_init_rcu
+ hlist_replace_rcu
+ list_splice_init_rcu()
+ hlist_nulls_del_init_rcu
+ hlist_nulls_del_rcu
+ hlist_nulls_add_head_rcu
+ hlist_bl_add_head_rcu
+ hlist_bl_del_init_rcu
+ hlist_bl_del_rcu
+ hlist_bl_set_first_rcu
+
+RCU: Critical sections Grace period Barrier
+
+ rcu_read_lock synchronize_net rcu_barrier
+ rcu_read_unlock synchronize_rcu
+ rcu_dereference synchronize_rcu_expedited
+ rcu_read_lock_held call_rcu
+ rcu_dereference_check kfree_rcu
+ rcu_dereference_protected
+
+bh: Critical sections Grace period Barrier
+
+ rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
+ rcu_read_unlock_bh synchronize_rcu_bh
+ rcu_dereference_bh synchronize_rcu_bh_expedited
+ rcu_dereference_bh_check
+ rcu_dereference_bh_protected
+ rcu_read_lock_bh_held
+
+sched: Critical sections Grace period Barrier
+
+ rcu_read_lock_sched synchronize_sched rcu_barrier_sched
+ rcu_read_unlock_sched call_rcu_sched
+ [preempt_disable] synchronize_sched_expedited
+ [and friends]
+ rcu_read_lock_sched_notrace
+ rcu_read_unlock_sched_notrace
+ rcu_dereference_sched
+ rcu_dereference_sched_check
+ rcu_dereference_sched_protected
+ rcu_read_lock_sched_held
+
+
+SRCU: Critical sections Grace period Barrier
+
+ srcu_read_lock synchronize_srcu srcu_barrier
+ srcu_read_unlock call_srcu
+ srcu_dereference synchronize_srcu_expedited
+ srcu_dereference_check
+ srcu_read_lock_held
+
+SRCU: Initialization/cleanup
+ init_srcu_struct
+ cleanup_srcu_struct
+
+All: lockdep-checked RCU-protected pointer access
+
+ rcu_access_index
+ rcu_access_pointer
+ rcu_dereference_index_check
+ rcu_dereference_raw
+ rcu_lockdep_assert
+ rcu_sleep_check
+ RCU_NONIDLE
+
+See the comment headers in the source code (or the docbook generated
+from them) for more information.
+
+However, given that there are no fewer than four families of RCU APIs
+in the Linux kernel, how do you choose which one to use? The following
+list can be helpful:
+
+a. Will readers need to block? If so, you need SRCU.
+
+b. What about the -rt patchset? If readers would need to block
+ in an non-rt kernel, you need SRCU. If readers would block
+ in a -rt kernel, but not in a non-rt kernel, SRCU is not
+ necessary.
+
+c. Do you need to treat NMI handlers, hardirq handlers,
+ and code segments with preemption disabled (whether
+ via preempt_disable(), local_irq_save(), local_bh_disable(),
+ or some other mechanism) as if they were explicit RCU readers?
+ If so, RCU-sched is the only choice that will work for you.
+
+d. Do you need RCU grace periods to complete even in the face
+ of softirq monopolization of one or more of the CPUs? For
+ example, is your code subject to network-based denial-of-service
+ attacks? If so, you need RCU-bh.
+
+e. Is your workload too update-intensive for normal use of
+ RCU, but inappropriate for other synchronization mechanisms?
+ If so, consider SLAB_DESTROY_BY_RCU. But please be careful!
+
+f. Do you need read-side critical sections that are respected
+ even though they are in the middle of the idle loop, during
+ user-mode execution, or on an offlined CPU? If so, SRCU is the
+ only choice that will work for you.
+
+g. Otherwise, use RCU.
+
+Of course, this all assumes that you have determined that RCU is in fact
+the right tool for your job.
+
+
+8. ANSWERS TO QUICK QUIZZES
+
+Quick Quiz #1: Why is this argument naive? How could a deadlock
+ occur when using this algorithm in a real-world Linux
+ kernel? [Referring to the lock-based "toy" RCU
+ algorithm.]
+
+Answer: Consider the following sequence of events:
+
+ 1. CPU 0 acquires some unrelated lock, call it
+ "problematic_lock", disabling irq via
+ spin_lock_irqsave().
+
+ 2. CPU 1 enters synchronize_rcu(), write-acquiring
+ rcu_gp_mutex.
+
+ 3. CPU 0 enters rcu_read_lock(), but must wait
+ because CPU 1 holds rcu_gp_mutex.
+
+ 4. CPU 1 is interrupted, and the irq handler
+ attempts to acquire problematic_lock.
+
+ The system is now deadlocked.
+
+ One way to avoid this deadlock is to use an approach like
+ that of CONFIG_PREEMPT_RT, where all normal spinlocks
+ become blocking locks, and all irq handlers execute in
+ the context of special tasks. In this case, in step 4
+ above, the irq handler would block, allowing CPU 1 to
+ release rcu_gp_mutex, avoiding the deadlock.
+
+ Even in the absence of deadlock, this RCU implementation
+ allows latency to "bleed" from readers to other
+ readers through synchronize_rcu(). To see this,
+ consider task A in an RCU read-side critical section
+ (thus read-holding rcu_gp_mutex), task B blocked
+ attempting to write-acquire rcu_gp_mutex, and
+ task C blocked in rcu_read_lock() attempting to
+ read_acquire rcu_gp_mutex. Task A's RCU read-side
+ latency is holding up task C, albeit indirectly via
+ task B.
+
+ Realtime RCU implementations therefore use a counter-based
+ approach where tasks in RCU read-side critical sections
+ cannot be blocked by tasks executing synchronize_rcu().
+
+Quick Quiz #2: Give an example where Classic RCU's read-side
+ overhead is -negative-.
+
+Answer: Imagine a single-CPU system with a non-CONFIG_PREEMPT
+ kernel where a routing table is used by process-context
+ code, but can be updated by irq-context code (for example,
+ by an "ICMP REDIRECT" packet). The usual way of handling
+ this would be to have the process-context code disable
+ interrupts while searching the routing table. Use of
+ RCU allows such interrupt-disabling to be dispensed with.
+ Thus, without RCU, you pay the cost of disabling interrupts,
+ and with RCU you don't.
+
+ One can argue that the overhead of RCU in this
+ case is negative with respect to the single-CPU
+ interrupt-disabling approach. Others might argue that
+ the overhead of RCU is merely zero, and that replacing
+ the positive overhead of the interrupt-disabling scheme
+ with the zero-overhead RCU scheme does not constitute
+ negative overhead.
+
+ In real life, of course, things are more complex. But
+ even the theoretical possibility of negative overhead for
+ a synchronization primitive is a bit unexpected. ;-)
+
+Quick Quiz #3: If it is illegal to block in an RCU read-side
+ critical section, what the heck do you do in
+ PREEMPT_RT, where normal spinlocks can block???
+
+Answer: Just as PREEMPT_RT permits preemption of spinlock
+ critical sections, it permits preemption of RCU
+ read-side critical sections. It also permits
+ spinlocks blocking while in RCU read-side critical
+ sections.
+
+ Why the apparent inconsistency? Because it is it
+ possible to use priority boosting to keep the RCU
+ grace periods short if need be (for example, if running
+ short of memory). In contrast, if blocking waiting
+ for (say) network reception, there is no way to know
+ what should be boosted. Especially given that the
+ process we need to boost might well be a human being
+ who just went out for a pizza or something. And although
+ a computer-operated cattle prod might arouse serious
+ interest, it might also provoke serious objections.
+ Besides, how does the computer know what pizza parlor
+ the human being went to???
+
+
+ACKNOWLEDGEMENTS
+
+My thanks to the people who helped make this human-readable, including
+Jon Walpole, Josh Triplett, Serge Hallyn, Suzanne Wood, and Alan Stern.
+
+
+For more information, see http://www.rdrop.com/users/paulmck/RCU.
diff --git a/Documentation/SAK.txt b/Documentation/SAK.txt
new file mode 100644
index 000000000..74be14679
--- /dev/null
+++ b/Documentation/SAK.txt
@@ -0,0 +1,88 @@
+Linux 2.4.2 Secure Attention Key (SAK) handling
+18 March 2001, Andrew Morton
+
+An operating system's Secure Attention Key is a security tool which is
+provided as protection against trojan password capturing programs. It
+is an undefeatable way of killing all programs which could be
+masquerading as login applications. Users need to be taught to enter
+this key sequence before they log in to the system.
+
+From the PC keyboard, Linux has two similar but different ways of
+providing SAK. One is the ALT-SYSRQ-K sequence. You shouldn't use
+this sequence. It is only available if the kernel was compiled with
+sysrq support.
+
+The proper way of generating a SAK is to define the key sequence using
+`loadkeys'. This will work whether or not sysrq support is compiled
+into the kernel.
+
+SAK works correctly when the keyboard is in raw mode. This means that
+once defined, SAK will kill a running X server. If the system is in
+run level 5, the X server will restart. This is what you want to
+happen.
+
+What key sequence should you use? Well, CTRL-ALT-DEL is used to reboot
+the machine. CTRL-ALT-BACKSPACE is magical to the X server. We'll
+choose CTRL-ALT-PAUSE.
+
+In your rc.sysinit (or rc.local) file, add the command
+
+ echo "control alt keycode 101 = SAK" | /bin/loadkeys
+
+And that's it! Only the superuser may reprogram the SAK key.
+
+
+NOTES
+=====
+
+1: Linux SAK is said to be not a "true SAK" as is required by
+ systems which implement C2 level security. This author does not
+ know why.
+
+
+2: On the PC keyboard, SAK kills all applications which have
+ /dev/console opened.
+
+ Unfortunately this includes a number of things which you don't
+ actually want killed. This is because these applications are
+ incorrectly holding /dev/console open. Be sure to complain to your
+ Linux distributor about this!
+
+ You can identify processes which will be killed by SAK with the
+ command
+
+ # ls -l /proc/[0-9]*/fd/* | grep console
+ l-wx------ 1 root root 64 Mar 18 00:46 /proc/579/fd/0 -> /dev/console
+
+ Then:
+
+ # ps aux|grep 579
+ root 579 0.0 0.1 1088 436 ? S 00:43 0:00 gpm -t ps/2
+
+ So `gpm' will be killed by SAK. This is a bug in gpm. It should
+ be closing standard input. You can work around this by finding the
+ initscript which launches gpm and changing it thusly:
+
+ Old:
+
+ daemon gpm
+
+ New:
+
+ daemon gpm < /dev/null
+
+ Vixie cron also seems to have this problem, and needs the same treatment.
+
+ Also, one prominent Linux distribution has the following three
+ lines in its rc.sysinit and rc scripts:
+
+ exec 3<&0
+ exec 4>&1
+ exec 5>&2
+
+ These commands cause *all* daemons which are launched by the
+ initscripts to have file descriptors 3, 4 and 5 attached to
+ /dev/console. So SAK kills them all. A workaround is to simply
+ delete these lines, but this may cause system management
+ applications to malfunction - test everything well.
+
diff --git a/Documentation/SM501.txt b/Documentation/SM501.txt
new file mode 100644
index 000000000..561826f82
--- /dev/null
+++ b/Documentation/SM501.txt
@@ -0,0 +1,71 @@
+ SM501 Driver
+ ============
+
+Copyright 2006, 2007 Simtec Electronics
+
+The Silicon Motion SM501 multimedia companion chip is a multifunction device
+which may provide numerous interfaces including USB host controller USB gadget,
+asynchronous serial ports, audio functions, and a dual display video interface.
+The device may be connected by PCI or local bus with varying functions enabled.
+
+Core
+----
+
+The core driver in drivers/mfd provides common services for the
+drivers which manage the specific hardware blocks. These services
+include locking for common registers, clock control and resource
+management.
+
+The core registers drivers for both PCI and generic bus based
+chips via the platform device and driver system.
+
+On detection of a device, the core initialises the chip (which may
+be specified by the platform data) and then exports the selected
+peripheral set as platform devices for the specific drivers.
+
+The core re-uses the platform device system as the platform device
+system provides enough features to support the drivers without the
+need to create a new bus-type and the associated code to go with it.
+
+
+Resources
+---------
+
+Each peripheral has a view of the device which is implicitly narrowed to
+the specific set of resources that peripheral requires in order to
+function correctly.
+
+The centralised memory allocation allows the driver to ensure that the
+maximum possible resource allocation can be made to the video subsystem
+as this is by-far the most resource-sensitive of the on-chip functions.
+
+The primary issue with memory allocation is that of moving the video
+buffers once a display mode is chosen. Indeed when a video mode change
+occurs the memory footprint of the video subsystem changes.
+
+Since video memory is difficult to move without changing the display
+(unless sufficient contiguous memory can be provided for the old and new
+modes simultaneously) the video driver fully utilises the memory area
+given to it by aligning fb0 to the start of the area and fb1 to the end
+of it. Any memory left over in the middle is used for the acceleration
+functions, which are transient and thus their location is less critical
+as it can be moved.
+
+
+Configuration
+-------------
+
+The platform device driver uses a set of platform data to pass
+configurations through to the core and the subsidiary drivers
+so that there can be support for more than one system carrying
+an SM501 built into a single kernel image.
+
+The PCI driver assumes that the PCI card behaves as per the Silicon
+Motion reference design.
+
+There is an errata (AB-5) affecting the selection of the
+of the M1XCLK and M1CLK frequencies. These two clocks
+must be sourced from the same PLL, although they can then
+be divided down individually. If this is not set, then SM501 may
+lock and hang the whole system. The driver will refuse to
+attach if the PLL selection is different.
diff --git a/Documentation/SecurityBugs b/Documentation/SecurityBugs
new file mode 100644
index 000000000..a660d494c
--- /dev/null
+++ b/Documentation/SecurityBugs
@@ -0,0 +1,38 @@
+Linux kernel developers take security very seriously. As such, we'd
+like to know when a security bug is found so that it can be fixed and
+disclosed as quickly as possible. Please report security bugs to the
+Linux kernel security team.
+
+1) Contact
+
+The Linux kernel security team can be contacted by email at
+<security@kernel.org>. This is a private list of security officers
+who will help verify the bug report and develop and release a fix.
+It is possible that the security team will bring in extra help from
+area maintainers to understand and fix the security vulnerability.
+
+As it is with any bug, the more information provided the easier it
+will be to diagnose and fix. Please review the procedure outlined in
+REPORTING-BUGS if you are unclear about what information is helpful.
+Any exploit code is very helpful and will not be released without
+consent from the reporter unless it has already been made public.
+
+2) Disclosure
+
+The goal of the Linux kernel security team is to work with the
+bug submitter to bug resolution as well as disclosure. We prefer
+to fully disclose the bug as soon as possible. It is reasonable to
+delay disclosure when the bug or the fix is not yet fully understood,
+the solution is not well-tested or for vendor coordination. However, we
+expect these delays to be short, measurable in days, not weeks or months.
+A disclosure date is negotiated by the security team working with the
+bug submitter as well as vendors. However, the kernel security team
+holds the final say when setting a disclosure date. The timeframe for
+disclosure is from immediate (esp. if it's already publicly known)
+to a few weeks. As a basic default policy, we expect report date to
+disclosure date to be on the order of 7 days.
+
+3) Non-disclosure agreements
+
+The Linux kernel security team is not a formal body and therefore unable
+to enter any non-disclosure agreements.
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
new file mode 100644
index 000000000..2b7e32dfe
--- /dev/null
+++ b/Documentation/SubmitChecklist
@@ -0,0 +1,109 @@
+Linux Kernel patch submission checklist
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here are some basic things that developers should do if they want to see their
+kernel patch submissions accepted more quickly.
+
+These are all above and beyond the documentation that is provided in
+Documentation/SubmittingPatches and elsewhere regarding submitting Linux
+kernel patches.
+
+
+1: If you use a facility then #include the file that defines/declares
+ that facility. Don't depend on other header files pulling in ones
+ that you use.
+
+2: Builds cleanly with applicable or modified CONFIG options =y, =m, and
+ =n. No gcc warnings/errors, no linker warnings/errors.
+
+2b: Passes allnoconfig, allmodconfig
+
+2c: Builds successfully when using O=builddir
+
+3: Builds on multiple CPU architectures by using local cross-compile tools
+ or some other build farm.
+
+4: ppc64 is a good architecture for cross-compilation checking because it
+ tends to use `unsigned long' for 64-bit quantities.
+
+5: Check your patch for general style as detailed in
+ Documentation/CodingStyle. Check for trivial violations with the
+ patch style checker prior to submission (scripts/checkpatch.pl).
+ You should be able to justify all violations that remain in
+ your patch.
+
+6: Any new or modified CONFIG options don't muck up the config menu.
+
+7: All new Kconfig options have help text.
+
+8: Has been carefully reviewed with respect to relevant Kconfig
+ combinations. This is very hard to get right with testing -- brainpower
+ pays off here.
+
+9: Check cleanly with sparse.
+
+10: Use 'make checkstack' and 'make namespacecheck' and fix any problems
+ that they find. Note: checkstack does not point out problems explicitly,
+ but any one function that uses more than 512 bytes on the stack is a
+ candidate for change.
+
+11: Include kernel-doc to document global kernel APIs. (Not required for
+ static functions, but OK there also.) Use 'make htmldocs' or 'make
+ mandocs' to check the kernel-doc and fix any issues.
+
+12: Has been tested with CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT,
+ CONFIG_DEBUG_SLAB, CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES,
+ CONFIG_DEBUG_SPINLOCK, CONFIG_DEBUG_ATOMIC_SLEEP, CONFIG_PROVE_RCU
+ and CONFIG_DEBUG_OBJECTS_RCU_HEAD all simultaneously enabled.
+
+13: Has been build- and runtime tested with and without CONFIG_SMP and
+ CONFIG_PREEMPT.
+
+14: If the patch affects IO/Disk, etc: has been tested with and without
+ CONFIG_LBDAF.
+
+15: All codepaths have been exercised with all lockdep features enabled.
+
+16: All new /proc entries are documented under Documentation/
+
+17: All new kernel boot parameters are documented in
+ Documentation/kernel-parameters.txt.
+
+18: All new module parameters are documented with MODULE_PARM_DESC()
+
+19: All new userspace interfaces are documented in Documentation/ABI/.
+ See Documentation/ABI/README for more information.
+ Patches that change userspace interfaces should be CCed to
+ linux-api@vger.kernel.org.
+
+20: Check that it all passes `make headers_check'.
+
+21: Has been checked with injection of at least slab and page-allocation
+ failures. See Documentation/fault-injection/.
+
+ If the new code is substantial, addition of subsystem-specific fault
+ injection might be appropriate.
+
+22: Newly-added code has been compiled with `gcc -W' (use "make
+ EXTRA_CFLAGS=-W"). This will generate lots of noise, but is good for
+ finding bugs like "warning: comparison between signed and unsigned".
+
+23: Tested after it has been merged into the -mm patchset to make sure
+ that it still works with all of the other queued patches and various
+ changes in the VM, VFS, and other subsystems.
+
+24: All memory barriers {e.g., barrier(), rmb(), wmb()} need a comment in the
+ source code that explains the logic of what they are doing and why.
+
+25: If any ioctl's are added by the patch, then also update
+ Documentation/ioctl/ioctl-number.txt.
+
+26: If your modified source code depends on or uses any of the kernel
+ APIs or features that are related to the following kconfig symbols,
+ then test multiple builds with the related kconfig symbols disabled
+ and/or =m (if that option is available) [not all of these at the
+ same time, just various/random combinations of them]:
+
+ CONFIG_SMP, CONFIG_SYSFS, CONFIG_PROC_FS, CONFIG_INPUT, CONFIG_PCI,
+ CONFIG_BLOCK, CONFIG_PM, CONFIG_MAGIC_SYSRQ,
+ CONFIG_NET, CONFIG_INET=n (but latter with CONFIG_NET=y)
diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers
new file mode 100644
index 000000000..31d372609
--- /dev/null
+++ b/Documentation/SubmittingDrivers
@@ -0,0 +1,163 @@
+Submitting Drivers For The Linux Kernel
+---------------------------------------
+
+This document is intended to explain how to submit device drivers to the
+various kernel trees. Note that if you are interested in video card drivers
+you should probably talk to XFree86 (http://www.xfree86.org/) and/or X.Org
+(http://x.org/) instead.
+
+Also read the Documentation/SubmittingPatches document.
+
+
+Allocating Device Numbers
+-------------------------
+
+Major and minor numbers for block and character devices are allocated
+by the Linux assigned name and number authority (currently this is
+Torben Mathiasen). The site is http://www.lanana.org/. This
+also deals with allocating numbers for devices that are not going to
+be submitted to the mainstream kernel.
+See Documentation/devices.txt for more information on this.
+
+If you don't use assigned numbers then when your device is submitted it will
+be given an assigned number even if that is different from values you may
+have shipped to customers before.
+
+Who To Submit Drivers To
+------------------------
+
+Linux 2.0:
+ No new drivers are accepted for this kernel tree.
+
+Linux 2.2:
+ No new drivers are accepted for this kernel tree.
+
+Linux 2.4:
+ If the code area has a general maintainer then please submit it to
+ the maintainer listed in MAINTAINERS in the kernel file. If the
+ maintainer does not respond or you cannot find the appropriate
+ maintainer then please contact Willy Tarreau <w@1wt.eu>.
+
+Linux 2.6:
+ The same rules apply as 2.4 except that you should follow linux-kernel
+ to track changes in API's. The final contact point for Linux 2.6
+ submissions is Andrew Morton.
+
+What Criteria Determine Acceptance
+----------------------------------
+
+Licensing: The code must be released to us under the
+ GNU General Public License. We don't insist on any kind
+ of exclusive GPL licensing, and if you wish the driver
+ to be useful to other communities such as BSD you may well
+ wish to release under multiple licenses.
+ See accepted licenses at include/linux/module.h
+
+Copyright: The copyright owner must agree to use of GPL.
+ It's best if the submitter and copyright owner
+ are the same person/entity. If not, the name of
+ the person/entity authorizing use of GPL should be
+ listed in case it's necessary to verify the will of
+ the copyright owner.
+
+Interfaces: If your driver uses existing interfaces and behaves like
+ other drivers in the same class it will be much more likely
+ to be accepted than if it invents gratuitous new ones.
+ If you need to implement a common API over Linux and NT
+ drivers do it in userspace.
+
+Code: Please use the Linux style of code formatting as documented
+ in Documentation/CodingStyle. If you have sections of code
+ that need to be in other formats, for example because they
+ are shared with a windows driver kit and you want to
+ maintain them just once separate them out nicely and note
+ this fact.
+
+Portability: Pointers are not always 32bits, not all computers are little
+ endian, people do not all have floating point and you
+ shouldn't use inline x86 assembler in your driver without
+ careful thought. Pure x86 drivers generally are not popular.
+ If you only have x86 hardware it is hard to test portability
+ but it is easy to make sure the code can easily be made
+ portable.
+
+Clarity: It helps if anyone can see how to fix the driver. It helps
+ you because you get patches not bug reports. If you submit a
+ driver that intentionally obfuscates how the hardware works
+ it will go in the bitbucket.
+
+PM support: Since Linux is used on many portable and desktop systems, your
+ driver is likely to be used on such a system and therefore it
+ should support basic power management by implementing, if
+ necessary, the .suspend and .resume methods used during the
+ system-wide suspend and resume transitions. You should verify
+ that your driver correctly handles the suspend and resume, but
+ if you are unable to ensure that, please at least define the
+ .suspend method returning the -ENOSYS ("Function not
+ implemented") error. You should also try to make sure that your
+ driver uses as little power as possible when it's not doing
+ anything. For the driver testing instructions see
+ Documentation/power/drivers-testing.txt and for a relatively
+ complete overview of the power management issues related to
+ drivers see Documentation/power/devices.txt .
+
+Control: In general if there is active maintenance of a driver by
+ the author then patches will be redirected to them unless
+ they are totally obvious and without need of checking.
+ If you want to be the contact and update point for the
+ driver it is a good idea to state this in the comments,
+ and include an entry in MAINTAINERS for your driver.
+
+What Criteria Do Not Determine Acceptance
+-----------------------------------------
+
+Vendor: Being the hardware vendor and maintaining the driver is
+ often a good thing. If there is a stable working driver from
+ other people already in the tree don't expect 'we are the
+ vendor' to get your driver chosen. Ideally work with the
+ existing driver author to build a single perfect driver.
+
+Author: It doesn't matter if a large Linux company wrote the driver,
+ or you did. Nobody has any special access to the kernel
+ tree. Anyone who tells you otherwise isn't telling the
+ whole story.
+
+
+Resources
+---------
+
+Linux kernel master tree:
+ ftp.??.kernel.org:/pub/linux/kernel/...
+ ?? == your country code, such as "us", "uk", "fr", etc.
+
+ http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
+
+Linux kernel mailing list:
+ linux-kernel@vger.kernel.org
+ [mail majordomo@vger.kernel.org to subscribe]
+
+Linux Device Drivers, Third Edition (covers 2.6.10):
+ http://lwn.net/Kernel/LDD3/ (free version)
+
+LWN.net:
+ Weekly summary of kernel development activity - http://lwn.net/
+ 2.6 API changes:
+ http://lwn.net/Articles/2.6-kernel-api/
+ Porting drivers from prior kernels to 2.6:
+ http://lwn.net/Articles/driver-porting/
+
+KernelNewbies:
+ Documentation and assistance for new kernel programmers
+ http://kernelnewbies.org/
+
+Linux USB project:
+ http://www.linux-usb.org/
+
+How to NOT write kernel driver by Arjan van de Ven:
+ http://www.fenrus.org/how-to-not-write-a-device-driver-paper.pdf
+
+Kernel Janitor:
+ http://kernelnewbies.org/KernelJanitors
+
+GIT, Fast Version Control System:
+ http://git-scm.com/
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
new file mode 100644
index 000000000..b03a832a0
--- /dev/null
+++ b/Documentation/SubmittingPatches
@@ -0,0 +1,806 @@
+
+ How to Get Your Change Into the Linux Kernel
+ or
+ Care And Operation Of Your Linus Torvalds
+
+
+
+For a person or company who wishes to submit a change to the Linux
+kernel, the process can sometimes be daunting if you're not familiar
+with "the system." This text is a collection of suggestions which
+can greatly increase the chances of your change being accepted.
+
+This document contains a large number of suggestions in a relatively terse
+format. For detailed information on how the kernel development process
+works, see Documentation/development-process. Also, read
+Documentation/SubmitChecklist for a list of items to check before
+submitting code. If you are submitting a driver, also read
+Documentation/SubmittingDrivers; for device tree binding patches, read
+Documentation/devicetree/bindings/submitting-patches.txt.
+
+Many of these steps describe the default behavior of the git version
+control system; if you use git to prepare your patches, you'll find much
+of the mechanical work done for you, though you'll still need to prepare
+and document a sensible set of patches. In general, use of git will make
+your life as a kernel developer easier.
+
+--------------------------------------------
+SECTION 1 - CREATING AND SENDING YOUR CHANGE
+--------------------------------------------
+
+
+0) Obtain a current source tree
+-------------------------------
+
+If you do not have a repository with the current kernel source handy, use
+git to obtain one. You'll want to start with the mainline repository,
+which can be grabbed with:
+
+ git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
+
+Note, however, that you may not want to develop against the mainline tree
+directly. Most subsystem maintainers run their own trees and want to see
+patches prepared against those trees. See the "T:" entry for the subsystem
+in the MAINTAINERS file to find that tree, or simply ask the maintainer if
+the tree is not listed there.
+
+It is still possible to download kernel releases via tarballs (as described
+in the next section), but that is the hard way to do kernel development.
+
+1) "diff -up"
+------------
+
+If you must generate your patches by hand, use "diff -up" or "diff -uprN"
+to create patches. Git generates patches in this form by default; if
+you're using git, you can skip this section entirely.
+
+All changes to the Linux kernel occur in the form of patches, as
+generated by diff(1). When creating your patch, make sure to create it
+in "unified diff" format, as supplied by the '-u' argument to diff(1).
+Also, please use the '-p' argument which shows which C function each
+change is in - that makes the resultant diff a lot easier to read.
+Patches should be based in the root kernel source directory,
+not in any lower subdirectory.
+
+To create a patch for a single file, it is often sufficient to do:
+
+ SRCTREE= linux
+ MYFILE= drivers/net/mydriver.c
+
+ cd $SRCTREE
+ cp $MYFILE $MYFILE.orig
+ vi $MYFILE # make your change
+ cd ..
+ diff -up $SRCTREE/$MYFILE{.orig,} > /tmp/patch
+
+To create a patch for multiple files, you should unpack a "vanilla",
+or unmodified kernel source tree, and generate a diff against your
+own source tree. For example:
+
+ MYSRC= /devel/linux
+
+ tar xvfz linux-3.19.tar.gz
+ mv linux-3.19 linux-3.19-vanilla
+ diff -uprN -X linux-3.19-vanilla/Documentation/dontdiff \
+ linux-3.19-vanilla $MYSRC > /tmp/patch
+
+"dontdiff" is a list of files which are generated by the kernel during
+the build process, and should be ignored in any diff(1)-generated
+patch.
+
+Make sure your patch does not include any extra files which do not
+belong in a patch submission. Make sure to review your patch -after-
+generated it with diff(1), to ensure accuracy.
+
+If your changes produce a lot of deltas, you need to split them into
+individual patches which modify things in logical stages; see section
+#3. This will facilitate easier reviewing by other kernel developers,
+very important if you want your patch accepted.
+
+If you're using git, "git rebase -i" can help you with this process. If
+you're not using git, quilt <http://savannah.nongnu.org/projects/quilt>
+is another popular alternative.
+
+
+
+2) Describe your changes.
+-------------------------
+
+Describe your problem. Whether your patch is a one-line bug fix or
+5000 lines of a new feature, there must be an underlying problem that
+motivated you to do this work. Convince the reviewer that there is a
+problem worth fixing and that it makes sense for them to read past the
+first paragraph.
+
+Describe user-visible impact. Straight up crashes and lockups are
+pretty convincing, but not all bugs are that blatant. Even if the
+problem was spotted during code review, describe the impact you think
+it can have on users. Keep in mind that the majority of Linux
+installations run kernels from secondary stable trees or
+vendor/product-specific trees that cherry-pick only specific patches
+from upstream, so include anything that could help route your change
+downstream: provoking circumstances, excerpts from dmesg, crash
+descriptions, performance regressions, latency spikes, lockups, etc.
+
+Quantify optimizations and trade-offs. If you claim improvements in
+performance, memory consumption, stack footprint, or binary size,
+include numbers that back them up. But also describe non-obvious
+costs. Optimizations usually aren't free but trade-offs between CPU,
+memory, and readability; or, when it comes to heuristics, between
+different workloads. Describe the expected downsides of your
+optimization so that the reviewer can weigh costs against benefits.
+
+Once the problem is established, describe what you are actually doing
+about it in technical detail. It's important to describe the change
+in plain English for the reviewer to verify that the code is behaving
+as you intend it to.
+
+The maintainer will thank you if you write your patch description in a
+form which can be easily pulled into Linux's source code management
+system, git, as a "commit log". See #15, below.
+
+Solve only one problem per patch. If your description starts to get
+long, that's a sign that you probably need to split up your patch.
+See #3, next.
+
+When you submit or resubmit a patch or patch series, include the
+complete patch description and justification for it. Don't just
+say that this is version N of the patch (series). Don't expect the
+subsystem maintainer to refer back to earlier patch versions or referenced
+URLs to find the patch description and put that into the patch.
+I.e., the patch (series) and its description should be self-contained.
+This benefits both the maintainers and reviewers. Some reviewers
+probably didn't even receive earlier versions of the patch.
+
+Describe your changes in imperative mood, e.g. "make xyzzy do frotz"
+instead of "[This patch] makes xyzzy do frotz" or "[I] changed xyzzy
+to do frotz", as if you are giving orders to the codebase to change
+its behaviour.
+
+If the patch fixes a logged bug entry, refer to that bug entry by
+number and URL. If the patch follows from a mailing list discussion,
+give a URL to the mailing list archive; use the https://lkml.kernel.org/
+redirector with a Message-Id, to ensure that the links cannot become
+stale.
+
+However, try to make your explanation understandable without external
+resources. In addition to giving a URL to a mailing list archive or
+bug, summarize the relevant points of the discussion that led to the
+patch as submitted.
+
+If you want to refer to a specific commit, don't just refer to the
+SHA-1 ID of the commit. Please also include the oneline summary of
+the commit, to make it easier for reviewers to know what it is about.
+Example:
+
+ Commit e21d2170f36602ae2708 ("video: remove unnecessary
+ platform_set_drvdata()") removed the unnecessary
+ platform_set_drvdata(), but left the variable "dev" unused,
+ delete it.
+
+You should also be sure to use at least the first twelve characters of the
+SHA-1 ID. The kernel repository holds a *lot* of objects, making
+collisions with shorter IDs a real possibility. Bear in mind that, even if
+there is no collision with your six-character ID now, that condition may
+change five years from now.
+
+If your patch fixes a bug in a specific commit, e.g. you found an issue using
+git-bisect, please use the 'Fixes:' tag with the first 12 characters of the
+SHA-1 ID, and the one line summary. For example:
+
+ Fixes: e21d2170f366 ("video: remove unnecessary platform_set_drvdata()")
+
+The following git-config settings can be used to add a pretty format for
+outputting the above style in the git log or git show commands
+
+ [core]
+ abbrev = 12
+ [pretty]
+ fixes = Fixes: %h (\"%s\")
+
+3) Separate your changes.
+-------------------------
+
+Separate each _logical change_ into a separate patch.
+
+For example, if your changes include both bug fixes and performance
+enhancements for a single driver, separate those changes into two
+or more patches. If your changes include an API update, and a new
+driver which uses that new API, separate those into two patches.
+
+On the other hand, if you make a single change to numerous files,
+group those changes into a single patch. Thus a single logical change
+is contained within a single patch.
+
+The point to remember is that each patch should make an easily understood
+change that can be verified by reviewers. Each patch should be justifiable
+on its own merits.
+
+If one patch depends on another patch in order for a change to be
+complete, that is OK. Simply note "this patch depends on patch X"
+in your patch description.
+
+When dividing your change into a series of patches, take special care to
+ensure that the kernel builds and runs properly after each patch in the
+series. Developers using "git bisect" to track down a problem can end up
+splitting your patch series at any point; they will not thank you if you
+introduce bugs in the middle.
+
+If you cannot condense your patch set into a smaller set of patches,
+then only post say 15 or so at a time and wait for review and integration.
+
+
+
+4) Style-check your changes.
+----------------------------
+
+Check your patch for basic style violations, details of which can be
+found in Documentation/CodingStyle. Failure to do so simply wastes
+the reviewers time and will get your patch rejected, probably
+without even being read.
+
+One significant exception is when moving code from one file to
+another -- in this case you should not modify the moved code at all in
+the same patch which moves it. This clearly delineates the act of
+moving the code and your changes. This greatly aids review of the
+actual differences and allows tools to better track the history of
+the code itself.
+
+Check your patches with the patch style checker prior to submission
+(scripts/checkpatch.pl). Note, though, that the style checker should be
+viewed as a guide, not as a replacement for human judgment. If your code
+looks better with a violation then its probably best left alone.
+
+The checker reports at three levels:
+ - ERROR: things that are very likely to be wrong
+ - WARNING: things requiring careful review
+ - CHECK: things requiring thought
+
+You should be able to justify all violations that remain in your
+patch.
+
+
+5) Select the recipients for your patch.
+----------------------------------------
+
+You should always copy the appropriate subsystem maintainer(s) on any patch
+to code that they maintain; look through the MAINTAINERS file and the
+source code revision history to see who those maintainers are. The
+script scripts/get_maintainer.pl can be very useful at this step. If you
+cannot find a maintainer for the subsystem your are working on, Andrew
+Morton (akpm@linux-foundation.org) serves as a maintainer of last resort.
+
+You should also normally choose at least one mailing list to receive a copy
+of your patch set. linux-kernel@vger.kernel.org functions as a list of
+last resort, but the volume on that list has caused a number of developers
+to tune it out. Look in the MAINTAINERS file for a subsystem-specific
+list; your patch will probably get more attention there. Please do not
+spam unrelated lists, though.
+
+Many kernel-related lists are hosted on vger.kernel.org; you can find a
+list of them at http://vger.kernel.org/vger-lists.html. There are
+kernel-related lists hosted elsewhere as well, though.
+
+Do not send more than 15 patches at once to the vger mailing lists!!!
+
+Linus Torvalds is the final arbiter of all changes accepted into the
+Linux kernel. His e-mail address is <torvalds@linux-foundation.org>.
+He gets a lot of e-mail, and, at this point, very few patches go through
+Linus directly, so typically you should do your best to -avoid-
+sending him e-mail.
+
+If you have a patch that fixes an exploitable security bug, send that patch
+to security@kernel.org. For severe bugs, a short embargo may be considered
+to allow distrbutors to get the patch out to users; in such cases,
+obviously, the patch should not be sent to any public lists.
+
+Patches that fix a severe bug in a released kernel should be directed
+toward the stable maintainers by putting a line like this:
+
+ Cc: stable@vger.kernel.org
+
+into your patch.
+
+Note, however, that some subsystem maintainers want to come to their own
+conclusions on which patches should go to the stable trees. The networking
+maintainer, in particular, would rather not see individual developers
+adding lines like the above to their patches.
+
+If changes affect userland-kernel interfaces, please send the MAN-PAGES
+maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at
+least a notification of the change, so that some information makes its way
+into the manual pages. User-space API changes should also be copied to
+linux-api@vger.kernel.org.
+
+For small patches you may want to CC the Trivial Patch Monkey
+trivial@kernel.org which collects "trivial" patches. Have a look
+into the MAINTAINERS file for its current manager.
+Trivial patches must qualify for one of the following rules:
+ Spelling fixes in documentation
+ Spelling fixes for errors which could break grep(1)
+ Warning fixes (cluttering with useless warnings is bad)
+ Compilation fixes (only if they are actually correct)
+ Runtime fixes (only if they actually fix things)
+ Removing use of deprecated functions/macros
+ Contact detail and documentation fixes
+ Non-portable code replaced by portable code (even in arch-specific,
+ since people copy, as long as it's trivial)
+ Any fix by the author/maintainer of the file (ie. patch monkey
+ in re-transmission mode)
+
+
+
+6) No MIME, no links, no compression, no attachments. Just plain text.
+-----------------------------------------------------------------------
+
+Linus and other kernel developers need to be able to read and comment
+on the changes you are submitting. It is important for a kernel
+developer to be able to "quote" your changes, using standard e-mail
+tools, so that they may comment on specific portions of your code.
+
+For this reason, all patches should be submitting e-mail "inline".
+WARNING: Be wary of your editor's word-wrap corrupting your patch,
+if you choose to cut-n-paste your patch.
+
+Do not attach the patch as a MIME attachment, compressed or not.
+Many popular e-mail applications will not always transmit a MIME
+attachment as plain text, making it impossible to comment on your
+code. A MIME attachment also takes Linus a bit more time to process,
+decreasing the likelihood of your MIME-attached change being accepted.
+
+Exception: If your mailer is mangling patches then someone may ask
+you to re-send them using MIME.
+
+See Documentation/email-clients.txt for hints about configuring
+your e-mail client so that it sends your patches untouched.
+
+7) E-mail size.
+---------------
+
+Large changes are not appropriate for mailing lists, and some
+maintainers. If your patch, uncompressed, exceeds 300 kB in size,
+it is preferred that you store your patch on an Internet-accessible
+server, and provide instead a URL (link) pointing to your patch. But note
+that if your patch exceeds 300 kB, it almost certainly needs to be broken up
+anyway.
+
+8) Respond to review comments.
+------------------------------
+
+Your patch will almost certainly get comments from reviewers on ways in
+which the patch can be improved. You must respond to those comments;
+ignoring reviewers is a good way to get ignored in return. Review comments
+or questions that do not lead to a code change should almost certainly
+bring about a comment or changelog entry so that the next reviewer better
+understands what is going on.
+
+Be sure to tell the reviewers what changes you are making and to thank them
+for their time. Code review is a tiring and time-consuming process, and
+reviewers sometimes get grumpy. Even in that case, though, respond
+politely and address the problems they have pointed out.
+
+
+9) Don't get discouraged - or impatient.
+----------------------------------------
+
+After you have submitted your change, be patient and wait. Reviewers are
+busy people and may not get to your patch right away.
+
+Once upon a time, patches used to disappear into the void without comment,
+but the development process works more smoothly than that now. You should
+receive comments within a week or so; if that does not happen, make sure
+that you have sent your patches to the right place. Wait for a minimum of
+one week before resubmitting or pinging reviewers - possibly longer during
+busy times like merge windows.
+
+
+10) Include PATCH in the subject
+--------------------------------
+
+Due to high e-mail traffic to Linus, and to linux-kernel, it is common
+convention to prefix your subject line with [PATCH]. This lets Linus
+and other kernel developers more easily distinguish patches from other
+e-mail discussions.
+
+
+
+11) Sign your work
+------------------
+
+To improve tracking of who did what, especially with patches that can
+percolate to their final resting place in the kernel through several
+layers of maintainers, we've introduced a "sign-off" procedure on
+patches that are being emailed around.
+
+The sign-off is a simple line at the end of the explanation for the
+patch, which certifies that you wrote it or otherwise have the right to
+pass it on as an open-source patch. The rules are pretty simple: if you
+can certify the below:
+
+ Developer's Certificate of Origin 1.1
+
+ By making a contribution to this project, I certify that:
+
+ (a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+ (b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+ (c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+ (d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
+then you just add a line saying
+
+ Signed-off-by: Random J Developer <random@developer.example.org>
+
+using your real name (sorry, no pseudonyms or anonymous contributions.)
+
+Some people also put extra tags at the end. They'll just be ignored for
+now, but you can do this to mark internal company procedures or just
+point out some special detail about the sign-off.
+
+If you are a subsystem or branch maintainer, sometimes you need to slightly
+modify patches you receive in order to merge them, because the code is not
+exactly the same in your tree and the submitters'. If you stick strictly to
+rule (c), you should ask the submitter to rediff, but this is a totally
+counter-productive waste of time and energy. Rule (b) allows you to adjust
+the code, but then it is very impolite to change one submitter's code and
+make him endorse your bugs. To solve this problem, it is recommended that
+you add a line between the last Signed-off-by header and yours, indicating
+the nature of your changes. While there is nothing mandatory about this, it
+seems like prepending the description with your mail and/or name, all
+enclosed in square brackets, is noticeable enough to make it obvious that
+you are responsible for last-minute changes. Example :
+
+ Signed-off-by: Random J Developer <random@developer.example.org>
+ [lucky@maintainer.example.org: struct foo moved from foo.c to foo.h]
+ Signed-off-by: Lucky K Maintainer <lucky@maintainer.example.org>
+
+This practice is particularly helpful if you maintain a stable branch and
+want at the same time to credit the author, track changes, merge the fix,
+and protect the submitter from complaints. Note that under no circumstances
+can you change the author's identity (the From header), as it is the one
+which appears in the changelog.
+
+Special note to back-porters: It seems to be a common and useful practice
+to insert an indication of the origin of a patch at the top of the commit
+message (just after the subject line) to facilitate tracking. For instance,
+here's what we see in a 3.x-stable release:
+
+Date: Tue Oct 7 07:26:38 2014 -0400
+
+ libata: Un-break ATA blacklist
+
+ commit 1c40279960bcd7d52dbdf1d466b20d24b99176c8 upstream.
+
+And here's what might appear in an older kernel once a patch is backported:
+
+ Date: Tue May 13 22:12:27 2008 +0200
+
+ wireless, airo: waitbusy() won't delay
+
+ [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a]
+
+Whatever the format, this information provides a valuable help to people
+tracking your trees, and to people trying to troubleshoot bugs in your
+tree.
+
+
+12) When to use Acked-by: and Cc:
+---------------------------------
+
+The Signed-off-by: tag indicates that the signer was involved in the
+development of the patch, or that he/she was in the patch's delivery path.
+
+If a person was not directly involved in the preparation or handling of a
+patch but wishes to signify and record their approval of it then they can
+ask to have an Acked-by: line added to the patch's changelog.
+
+Acked-by: is often used by the maintainer of the affected code when that
+maintainer neither contributed to nor forwarded the patch.
+
+Acked-by: is not as formal as Signed-off-by:. It is a record that the acker
+has at least reviewed the patch and has indicated acceptance. Hence patch
+mergers will sometimes manually convert an acker's "yep, looks good to me"
+into an Acked-by: (but note that it is usually better to ask for an
+explicit ack).
+
+Acked-by: does not necessarily indicate acknowledgement of the entire patch.
+For example, if a patch affects multiple subsystems and has an Acked-by: from
+one subsystem maintainer then this usually indicates acknowledgement of just
+the part which affects that maintainer's code. Judgement should be used here.
+When in doubt people should refer to the original discussion in the mailing
+list archives.
+
+If a person has had the opportunity to comment on a patch, but has not
+provided such comments, you may optionally add a "Cc:" tag to the patch.
+This is the only tag which might be added without an explicit action by the
+person it names - but it should indicate that this person was copied on the
+patch. This tag documents that potentially interested parties
+have been included in the discussion.
+
+
+13) Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
+--------------------------------------------------------------------------
+
+The Reported-by tag gives credit to people who find bugs and report them and it
+hopefully inspires them to help us again in the future. Please note that if
+the bug was reported in private, then ask for permission first before using the
+Reported-by tag.
+
+A Tested-by: tag indicates that the patch has been successfully tested (in
+some environment) by the person named. This tag informs maintainers that
+some testing has been performed, provides a means to locate testers for
+future patches, and ensures credit for the testers.
+
+Reviewed-by:, instead, indicates that the patch has been reviewed and found
+acceptable according to the Reviewer's Statement:
+
+ Reviewer's statement of oversight
+
+ By offering my Reviewed-by: tag, I state that:
+
+ (a) I have carried out a technical review of this patch to
+ evaluate its appropriateness and readiness for inclusion into
+ the mainline kernel.
+
+ (b) Any problems, concerns, or questions relating to the patch
+ have been communicated back to the submitter. I am satisfied
+ with the submitter's response to my comments.
+
+ (c) While there may be things that could be improved with this
+ submission, I believe that it is, at this time, (1) a
+ worthwhile modification to the kernel, and (2) free of known
+ issues which would argue against its inclusion.
+
+ (d) While I have reviewed the patch and believe it to be sound, I
+ do not (unless explicitly stated elsewhere) make any
+ warranties or guarantees that it will achieve its stated
+ purpose or function properly in any given situation.
+
+A Reviewed-by tag is a statement of opinion that the patch is an
+appropriate modification of the kernel without any remaining serious
+technical issues. Any interested reviewer (who has done the work) can
+offer a Reviewed-by tag for a patch. This tag serves to give credit to
+reviewers and to inform maintainers of the degree of review which has been
+done on the patch. Reviewed-by: tags, when supplied by reviewers known to
+understand the subject area and to perform thorough reviews, will normally
+increase the likelihood of your patch getting into the kernel.
+
+A Suggested-by: tag indicates that the patch idea is suggested by the person
+named and ensures credit to the person for the idea. Please note that this
+tag should not be added without the reporter's permission, especially if the
+idea was not posted in a public forum. That said, if we diligently credit our
+idea reporters, they will, hopefully, be inspired to help us again in the
+future.
+
+A Fixes: tag indicates that the patch fixes an issue in a previous commit. It
+is used to make it easy to determine where a bug originated, which can help
+review a bug fix. This tag also assists the stable kernel team in determining
+which stable kernel versions should receive your fix. This is the preferred
+method for indicating a bug fixed by the patch. See #2 above for more details.
+
+
+14) The canonical patch format
+------------------------------
+
+This section describes how the patch itself should be formatted. Note
+that, if you have your patches stored in a git repository, proper patch
+formatting can be had with "git format-patch". The tools cannot create
+the necessary text, though, so read the instructions below anyway.
+
+The canonical patch subject line is:
+
+ Subject: [PATCH 001/123] subsystem: summary phrase
+
+The canonical patch message body contains the following:
+
+ - A "from" line specifying the patch author (only needed if the person
+ sending the patch is not the author).
+
+ - An empty line.
+
+ - The body of the explanation, line wrapped at 75 columns, which will
+ be copied to the permanent changelog to describe this patch.
+
+ - The "Signed-off-by:" lines, described above, which will
+ also go in the changelog.
+
+ - A marker line containing simply "---".
+
+ - Any additional comments not suitable for the changelog.
+
+ - The actual patch (diff output).
+
+The Subject line format makes it very easy to sort the emails
+alphabetically by subject line - pretty much any email reader will
+support that - since because the sequence number is zero-padded,
+the numerical and alphabetic sort is the same.
+
+The "subsystem" in the email's Subject should identify which
+area or subsystem of the kernel is being patched.
+
+The "summary phrase" in the email's Subject should concisely
+describe the patch which that email contains. The "summary
+phrase" should not be a filename. Do not use the same "summary
+phrase" for every patch in a whole patch series (where a "patch
+series" is an ordered sequence of multiple, related patches).
+
+Bear in mind that the "summary phrase" of your email becomes a
+globally-unique identifier for that patch. It propagates all the way
+into the git changelog. The "summary phrase" may later be used in
+developer discussions which refer to the patch. People will want to
+google for the "summary phrase" to read discussion regarding that
+patch. It will also be the only thing that people may quickly see
+when, two or three months later, they are going through perhaps
+thousands of patches using tools such as "gitk" or "git log
+--oneline".
+
+For these reasons, the "summary" must be no more than 70-75
+characters, and it must describe both what the patch changes, as well
+as why the patch might be necessary. It is challenging to be both
+succinct and descriptive, but that is what a well-written summary
+should do.
+
+The "summary phrase" may be prefixed by tags enclosed in square
+brackets: "Subject: [PATCH tag] <summary phrase>". The tags are not
+considered part of the summary phrase, but describe how the patch
+should be treated. Common tags might include a version descriptor if
+the multiple versions of the patch have been sent out in response to
+comments (i.e., "v1, v2, v3"), or "RFC" to indicate a request for
+comments. If there are four patches in a patch series the individual
+patches may be numbered like this: 1/4, 2/4, 3/4, 4/4. This assures
+that developers understand the order in which the patches should be
+applied and that they have reviewed or applied all of the patches in
+the patch series.
+
+A couple of example Subjects:
+
+ Subject: [patch 2/5] ext2: improve scalability of bitmap searching
+ Subject: [PATCHv2 001/207] x86: fix eflags tracking
+
+The "from" line must be the very first line in the message body,
+and has the form:
+
+ From: Original Author <author@example.com>
+
+The "from" line specifies who will be credited as the author of the
+patch in the permanent changelog. If the "from" line is missing,
+then the "From:" line from the email header will be used to determine
+the patch author in the changelog.
+
+The explanation body will be committed to the permanent source
+changelog, so should make sense to a competent reader who has long
+since forgotten the immediate details of the discussion that might
+have led to this patch. Including symptoms of the failure which the
+patch addresses (kernel log messages, oops messages, etc.) is
+especially useful for people who might be searching the commit logs
+looking for the applicable patch. If a patch fixes a compile failure,
+it may not be necessary to include _all_ of the compile failures; just
+enough that it is likely that someone searching for the patch can find
+it. As in the "summary phrase", it is important to be both succinct as
+well as descriptive.
+
+The "---" marker line serves the essential purpose of marking for patch
+handling tools where the changelog message ends.
+
+One good use for the additional comments after the "---" marker is for
+a diffstat, to show what files have changed, and the number of
+inserted and deleted lines per file. A diffstat is especially useful
+on bigger patches. Other comments relevant only to the moment or the
+maintainer, not suitable for the permanent changelog, should also go
+here. A good example of such comments might be "patch changelogs"
+which describe what has changed between the v1 and v2 version of the
+patch.
+
+If you are going to include a diffstat after the "---" marker, please
+use diffstat options "-p 1 -w 70" so that filenames are listed from
+the top of the kernel source tree and don't use too much horizontal
+space (easily fit in 80 columns, maybe with some indentation). (git
+generates appropriate diffstats by default.)
+
+See more details on the proper patch format in the following
+references.
+
+
+15) Sending "git pull" requests
+-------------------------------
+
+If you have a series of patches, it may be most convenient to have the
+maintainer pull them directly into the subsystem repository with a
+"git pull" operation. Note, however, that pulling patches from a developer
+requires a higher degree of trust than taking patches from a mailing list.
+As a result, many subsystem maintainers are reluctant to take pull
+requests, especially from new, unknown developers. If in doubt you can use
+the pull request as the cover letter for a normal posting of the patch
+series, giving the maintainer the option of using either.
+
+A pull request should have [GIT] or [PULL] in the subject line. The
+request itself should include the repository name and the branch of
+interest on a single line; it should look something like:
+
+ Please pull from
+
+ git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus
+
+ to get these changes:"
+
+A pull request should also include an overall message saying what will be
+included in the request, a "git shortlog" listing of the patches
+themselves, and a diffstat showing the overall effect of the patch series.
+The easiest way to get all this information together is, of course, to let
+git do it for you with the "git request-pull" command.
+
+Some maintainers (including Linus) want to see pull requests from signed
+commits; that increases their confidence that the request actually came
+from you. Linus, in particular, will not pull from public hosting sites
+like GitHub in the absence of a signed tag.
+
+The first step toward creating such tags is to make a GNUPG key and get it
+signed by one or more core kernel developers. This step can be hard for
+new developers, but there is no way around it. Attending conferences can
+be a good way to find developers who can sign your key.
+
+Once you have prepared a patch series in git that you wish to have somebody
+pull, create a signed tag with "git tag -s". This will create a new tag
+identifying the last commit in the series and containing a signature
+created with your private key. You will also have the opportunity to add a
+changelog-style message to the tag; this is an ideal place to describe the
+effects of the pull request as a whole.
+
+If the tree the maintainer will be pulling from is not the repository you
+are working from, don't forget to push the signed tag explicitly to the
+public tree.
+
+When generating your pull request, use the signed tag as the target. A
+command like this will do the trick:
+
+ git request-pull master git://my.public.tree/linux.git my-signed-tag
+
+
+----------------------
+SECTION 2 - REFERENCES
+----------------------
+
+Andrew Morton, "The perfect patch" (tpp).
+ <http://www.ozlabs.org/~akpm/stuff/tpp.txt>
+
+Jeff Garzik, "Linux kernel patch submission format".
+ <http://linux.yyz.us/patch-format.html>
+
+Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
+ <http://www.kroah.com/log/linux/maintainer.html>
+ <http://www.kroah.com/log/linux/maintainer-02.html>
+ <http://www.kroah.com/log/linux/maintainer-03.html>
+ <http://www.kroah.com/log/linux/maintainer-04.html>
+ <http://www.kroah.com/log/linux/maintainer-05.html>
+ <http://www.kroah.com/log/linux/maintainer-06.html>
+
+NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
+ <https://lkml.org/lkml/2005/7/11/336>
+
+Kernel Documentation/CodingStyle:
+ <http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle>
+
+Linus Torvalds's mail on the canonical patch format:
+ <http://lkml.org/lkml/2005/4/7/183>
+
+Andi Kleen, "On submitting kernel patches"
+ Some strategies to get difficult or controversial changes in.
+ http://halobates.de/on-submitting-patches.pdf
+
+--
diff --git a/Documentation/VGA-softcursor.txt b/Documentation/VGA-softcursor.txt
new file mode 100644
index 000000000..70acfbf39
--- /dev/null
+++ b/Documentation/VGA-softcursor.txt
@@ -0,0 +1,39 @@
+Software cursor for VGA by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>
+======================= and Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+
+ Linux now has some ability to manipulate cursor appearance. Normally, you
+can set the size of hardware cursor (and also work around some ugly bugs in
+those miserable Trident cards--see #define TRIDENT_GLITCH in drivers/video/
+vgacon.c). You can now play a few new tricks: you can make your cursor look
+like a non-blinking red block, make it inverse background of the character it's
+over or to highlight that character and still choose whether the original
+hardware cursor should remain visible or not. There may be other things I have
+never thought of.
+
+ The cursor appearance is controlled by a "<ESC>[?1;2;3c" escape sequence
+where 1, 2 and 3 are parameters described below. If you omit any of them,
+they will default to zeroes.
+
+ Parameter 1 specifies cursor size (0=default, 1=invisible, 2=underline, ...,
+8=full block) + 16 if you want the software cursor to be applied + 32 if you
+want to always change the background color + 64 if you dislike having the
+background the same as the foreground. Highlights are ignored for the last two
+flags.
+
+ The second parameter selects character attribute bits you want to change
+(by simply XORing them with the value of this parameter). On standard VGA,
+the high four bits specify background and the low four the foreground. In both
+groups, low three bits set color (as in normal color codes used by the console)
+and the most significant one turns on highlight (or sometimes blinking--it
+depends on the configuration of your VGA).
+
+ The third parameter consists of character attribute bits you want to set.
+Bit setting takes place before bit toggling, so you can simply clear a bit by
+including it in both the set mask and the toggle mask.
+
+Examples:
+=========
+
+To get normal blinking underline, use: echo -e '\033[?2c'
+To get blinking block, use: echo -e '\033[?6c'
+To get red non-blinking block, use: echo -e '\033[?17;0;64c'
diff --git a/Documentation/accounting/.gitignore b/Documentation/accounting/.gitignore
new file mode 100644
index 000000000..86485203c
--- /dev/null
+++ b/Documentation/accounting/.gitignore
@@ -0,0 +1 @@
+getdelays
diff --git a/Documentation/accounting/Makefile b/Documentation/accounting/Makefile
new file mode 100644
index 000000000..7e232cb6f
--- /dev/null
+++ b/Documentation/accounting/Makefile
@@ -0,0 +1,7 @@
+# List of programs to build
+hostprogs-y := getdelays
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_getdelays.o += -I$(objtree)/usr/include
diff --git a/Documentation/accounting/cgroupstats.txt b/Documentation/accounting/cgroupstats.txt
new file mode 100644
index 000000000..d16a9849e
--- /dev/null
+++ b/Documentation/accounting/cgroupstats.txt
@@ -0,0 +1,27 @@
+Control Groupstats is inspired by the discussion at
+http://lkml.org/lkml/2007/4/11/187 and implements per cgroup statistics as
+suggested by Andrew Morton in http://lkml.org/lkml/2007/4/11/263.
+
+Per cgroup statistics infrastructure re-uses code from the taskstats
+interface. A new set of cgroup operations are registered with commands
+and attributes specific to cgroups. It should be very easy to
+extend per cgroup statistics, by adding members to the cgroupstats
+structure.
+
+The current model for cgroupstats is a pull, a push model (to post
+statistics on interesting events), should be very easy to add. Currently
+user space requests for statistics by passing the cgroup path.
+Statistics about the state of all the tasks in the cgroup is returned to
+user space.
+
+NOTE: We currently rely on delay accounting for extracting information
+about tasks blocked on I/O. If CONFIG_TASK_DELAY_ACCT is disabled, this
+information will not be available.
+
+To extract cgroup statistics a utility very similar to getdelays.c
+has been developed, the sample output of the utility is shown below
+
+~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup/a"
+sleeping 1, blocked 0, running 1, stopped 0, uninterruptible 0
+~/balbir/cgroupstats # ./getdelays -C "/sys/fs/cgroup"
+sleeping 155, blocked 0, running 1, stopped 0, uninterruptible 2
diff --git a/Documentation/accounting/delay-accounting.txt b/Documentation/accounting/delay-accounting.txt
new file mode 100644
index 000000000..8a12f0730
--- /dev/null
+++ b/Documentation/accounting/delay-accounting.txt
@@ -0,0 +1,117 @@
+Delay accounting
+----------------
+
+Tasks encounter delays in execution when they wait
+for some kernel resource to become available e.g. a
+runnable task may wait for a free CPU to run on.
+
+The per-task delay accounting functionality measures
+the delays experienced by a task while
+
+a) waiting for a CPU (while being runnable)
+b) completion of synchronous block I/O initiated by the task
+c) swapping in pages
+d) memory reclaim
+
+and makes these statistics available to userspace through
+the taskstats interface.
+
+Such delays provide feedback for setting a task's cpu priority,
+io priority and rss limit values appropriately. Long delays for
+important tasks could be a trigger for raising its corresponding priority.
+
+The functionality, through its use of the taskstats interface, also provides
+delay statistics aggregated for all tasks (or threads) belonging to a
+thread group (corresponding to a traditional Unix process). This is a commonly
+needed aggregation that is more efficiently done by the kernel.
+
+Userspace utilities, particularly resource management applications, can also
+aggregate delay statistics into arbitrary groups. To enable this, delay
+statistics of a task are available both during its lifetime as well as on its
+exit, ensuring continuous and complete monitoring can be done.
+
+
+Interface
+---------
+
+Delay accounting uses the taskstats interface which is described
+in detail in a separate document in this directory. Taskstats returns a
+generic data structure to userspace corresponding to per-pid and per-tgid
+statistics. The delay accounting functionality populates specific fields of
+this structure. See
+ include/linux/taskstats.h
+for a description of the fields pertaining to delay accounting.
+It will generally be in the form of counters returning the cumulative
+delay seen for cpu, sync block I/O, swapin, memory reclaim etc.
+
+Taking the difference of two successive readings of a given
+counter (say cpu_delay_total) for a task will give the delay
+experienced by the task waiting for the corresponding resource
+in that interval.
+
+When a task exits, records containing the per-task statistics
+are sent to userspace without requiring a command. If it is the last exiting
+task of a thread group, the per-tgid statistics are also sent. More details
+are given in the taskstats interface description.
+
+The getdelays.c userspace utility in this directory allows simple commands to
+be run and the corresponding delay statistics to be displayed. It also serves
+as an example of using the taskstats interface.
+
+Usage
+-----
+
+Compile the kernel with
+ CONFIG_TASK_DELAY_ACCT=y
+ CONFIG_TASKSTATS=y
+
+Delay accounting is enabled by default at boot up.
+To disable, add
+ nodelayacct
+to the kernel boot options. The rest of the instructions
+below assume this has not been done.
+
+After the system has booted up, use a utility
+similar to getdelays.c to access the delays
+seen by a given task or a task group (tgid).
+The utility also allows a given command to be
+executed and the corresponding delays to be
+seen.
+
+General format of the getdelays command
+
+getdelays [-t tgid] [-p pid] [-c cmd...]
+
+
+Get delays, since system boot, for pid 10
+# ./getdelays -p 10
+(output similar to next case)
+
+Get sum of delays, since system boot, for all pids with tgid 5
+# ./getdelays -t 5
+
+
+CPU count real total virtual total delay total
+ 7876 92005750 100000000 24001500
+IO count delay total
+ 0 0
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
+ 0 0
+
+Get delays seen in executing a given simple command
+# ./getdelays -c ls /
+
+bin data1 data3 data5 dev home media opt root srv sys usr
+boot data2 data4 data6 etc lib mnt proc sbin subdomain tmp var
+
+
+CPU count real total virtual total delay total
+ 6 4000250 4000000 0
+IO count delay total
+ 0 0
+SWAP count delay total
+ 0 0
+RECLAIM count delay total
+ 0 0
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c
new file mode 100644
index 000000000..f40578026
--- /dev/null
+++ b/Documentation/accounting/getdelays.c
@@ -0,0 +1,546 @@
+/* getdelays.c
+ *
+ * Utility to get per-pid and per-tgid delay accounting statistics
+ * Also illustrates usage of the taskstats interface
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2005
+ * Copyright (C) Balbir Singh, IBM Corp. 2006
+ * Copyright (c) Jay Lan, SGI. 2006
+ *
+ * Compile with
+ * gcc -I/usr/src/linux/include getdelays.c -o getdelays
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <poll.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+#include <linux/genetlink.h>
+#include <linux/taskstats.h>
+#include <linux/cgroupstats.h>
+
+/*
+ * Generic macros for dealing with netlink sockets. Might be duplicated
+ * elsewhere. It is recommended that commercial grade applications use
+ * libnl or libnetlink and use the interfaces provided by the library
+ */
+#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
+
+#define err(code, fmt, arg...) \
+ do { \
+ fprintf(stderr, fmt, ##arg); \
+ exit(code); \
+ } while (0)
+
+int done;
+int rcvbufsz;
+char name[100];
+int dbg;
+int print_delays;
+int print_io_accounting;
+int print_task_context_switch_counts;
+
+#define PRINTF(fmt, arg...) { \
+ if (dbg) { \
+ printf(fmt, ##arg); \
+ } \
+ }
+
+/* Maximum size of response requested or message sent */
+#define MAX_MSG_SIZE 1024
+/* Maximum number of cpus expected to be specified in a cpumask */
+#define MAX_CPUS 32
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[MAX_MSG_SIZE];
+};
+
+char cpumask[100+6*MAX_CPUS];
+
+static void usage(void)
+{
+ fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
+ "[-m cpumask] [-t tgid] [-p pid]\n");
+ fprintf(stderr, " -d: print delayacct stats\n");
+ fprintf(stderr, " -i: print IO accounting (works only with -p)\n");
+ fprintf(stderr, " -l: listen forever\n");
+ fprintf(stderr, " -v: debug on\n");
+ fprintf(stderr, " -C: container path\n");
+}
+
+/*
+ * Create a raw netlink socket and bind
+ */
+static int create_nl_socket(int protocol)
+{
+ int fd;
+ struct sockaddr_nl local;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+ if (fd < 0)
+ return -1;
+
+ if (rcvbufsz)
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+ &rcvbufsz, sizeof(rcvbufsz)) < 0) {
+ fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
+ rcvbufsz);
+ goto error;
+ }
+
+ memset(&local, 0, sizeof(local));
+ local.nl_family = AF_NETLINK;
+
+ if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
+ goto error;
+
+ return fd;
+error:
+ close(fd);
+ return -1;
+}
+
+
+static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u8 genl_cmd, __u16 nla_type,
+ void *nla_data, int nla_len)
+{
+ struct nlattr *na;
+ struct sockaddr_nl nladdr;
+ int r, buflen;
+ char *buf;
+
+ struct msgtemplate msg;
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = NLM_F_REQUEST;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 0x1;
+ na = (struct nlattr *) GENLMSG_DATA(&msg);
+ na->nla_type = nla_type;
+ na->nla_len = nla_len + 1 + NLA_HDRLEN;
+ memcpy(NLA_DATA(na), nla_data, nla_len);
+ msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+ buf = (char *) &msg;
+ buflen = msg.n.nlmsg_len ;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN)
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ * Probe the controller in genetlink to find the family id
+ * for the TASKSTATS family
+ */
+static int get_family_id(int sd)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[256];
+ } ans;
+
+ int id = 0, rc;
+ struct nlattr *na;
+ int rep_len;
+
+ strcpy(name, TASKSTATS_GENL_NAME);
+ rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
+ CTRL_ATTR_FAMILY_NAME, (void *)name,
+ strlen(TASKSTATS_GENL_NAME)+1);
+ if (rc < 0)
+ return 0; /* sendto() failure? */
+
+ rep_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR ||
+ (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
+ return 0;
+
+ na = (struct nlattr *) GENLMSG_DATA(&ans);
+ na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(__u16 *) NLA_DATA(na);
+ }
+ return id;
+}
+
+#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
+
+static void print_delayacct(struct taskstats *t)
+{
+ printf("\n\nCPU %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llu%15.3fms\n"
+ "IO %15s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "SWAP %15s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "RECLAIM %12s%15s%15s\n"
+ " %15llu%15llu%15llums\n",
+ "count", "real total", "virtual total",
+ "delay total", "delay average",
+ (unsigned long long)t->cpu_count,
+ (unsigned long long)t->cpu_run_real_total,
+ (unsigned long long)t->cpu_run_virtual_total,
+ (unsigned long long)t->cpu_delay_total,
+ average_ms((double)t->cpu_delay_total, t->cpu_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->blkio_count,
+ (unsigned long long)t->blkio_delay_total,
+ average_ms(t->blkio_delay_total, t->blkio_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->swapin_count,
+ (unsigned long long)t->swapin_delay_total,
+ average_ms(t->swapin_delay_total, t->swapin_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->freepages_count,
+ (unsigned long long)t->freepages_delay_total,
+ average_ms(t->freepages_delay_total, t->freepages_count));
+}
+
+static void task_context_switch_counts(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s\n"
+ " %15llu%15llu\n",
+ "voluntary", "nonvoluntary",
+ (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
+}
+
+static void print_cgroupstats(struct cgroupstats *c)
+{
+ printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
+ "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
+ (unsigned long long)c->nr_io_wait,
+ (unsigned long long)c->nr_running,
+ (unsigned long long)c->nr_stopped,
+ (unsigned long long)c->nr_uninterruptible);
+}
+
+
+static void print_ioacct(struct taskstats *t)
+{
+ printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
+ t->ac_comm,
+ (unsigned long long)t->read_bytes,
+ (unsigned long long)t->write_bytes,
+ (unsigned long long)t->cancelled_write_bytes);
+}
+
+int main(int argc, char *argv[])
+{
+ int c, rc, rep_len, aggr_len, len2;
+ int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
+ __u16 id;
+ __u32 mypid;
+
+ struct nlattr *na;
+ int nl_sd = -1;
+ int len = 0;
+ pid_t tid = 0;
+ pid_t rtid = 0;
+
+ int fd = 0;
+ int count = 0;
+ int write_file = 0;
+ int maskset = 0;
+ char *logfile = NULL;
+ int loop = 0;
+ int containerset = 0;
+ char *containerpath = NULL;
+ int cfd = 0;
+ int forking = 0;
+ sigset_t sigset;
+
+ struct msgtemplate msg;
+
+ while (!forking) {
+ c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'd':
+ printf("print delayacct stats ON\n");
+ print_delays = 1;
+ break;
+ case 'i':
+ printf("printing IO accounting\n");
+ print_io_accounting = 1;
+ break;
+ case 'q':
+ printf("printing task/process context switch rates\n");
+ print_task_context_switch_counts = 1;
+ break;
+ case 'C':
+ containerset = 1;
+ containerpath = optarg;
+ break;
+ case 'w':
+ logfile = strdup(optarg);
+ printf("write to file %s\n", logfile);
+ write_file = 1;
+ break;
+ case 'r':
+ rcvbufsz = atoi(optarg);
+ printf("receive buf size %d\n", rcvbufsz);
+ if (rcvbufsz < 0)
+ err(1, "Invalid rcv buf size\n");
+ break;
+ case 'm':
+ strncpy(cpumask, optarg, sizeof(cpumask));
+ cpumask[sizeof(cpumask) - 1] = '\0';
+ maskset = 1;
+ printf("cpumask %s maskset %d\n", cpumask, maskset);
+ break;
+ case 't':
+ tid = atoi(optarg);
+ if (!tid)
+ err(1, "Invalid tgid\n");
+ cmd_type = TASKSTATS_CMD_ATTR_TGID;
+ break;
+ case 'p':
+ tid = atoi(optarg);
+ if (!tid)
+ err(1, "Invalid pid\n");
+ cmd_type = TASKSTATS_CMD_ATTR_PID;
+ break;
+ case 'c':
+
+ /* Block SIGCHLD for sigwait() later */
+ if (sigemptyset(&sigset) == -1)
+ err(1, "Failed to empty sigset");
+ if (sigaddset(&sigset, SIGCHLD))
+ err(1, "Failed to set sigchld in sigset");
+ sigprocmask(SIG_BLOCK, &sigset, NULL);
+
+ /* fork/exec a child */
+ tid = fork();
+ if (tid < 0)
+ err(1, "Fork failed\n");
+ if (tid == 0)
+ if (execvp(argv[optind - 1],
+ &argv[optind - 1]) < 0)
+ exit(-1);
+
+ /* Set the command type and avoid further processing */
+ cmd_type = TASKSTATS_CMD_ATTR_PID;
+ forking = 1;
+ break;
+ case 'v':
+ printf("debug on\n");
+ dbg = 1;
+ break;
+ case 'l':
+ printf("listen forever\n");
+ loop = 1;
+ break;
+ default:
+ usage();
+ exit(-1);
+ }
+ }
+
+ if (write_file) {
+ fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd == -1) {
+ perror("Cannot open output file\n");
+ exit(1);
+ }
+ }
+
+ if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
+ err(1, "error creating Netlink socket\n");
+
+
+ mypid = getpid();
+ id = get_family_id(nl_sd);
+ if (!id) {
+ fprintf(stderr, "Error getting family id, errno %d\n", errno);
+ goto err;
+ }
+ PRINTF("family id %d\n", id);
+
+ if (maskset) {
+ rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+ TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+ &cpumask, strlen(cpumask) + 1);
+ PRINTF("Sent register cpumask, retval %d\n", rc);
+ if (rc < 0) {
+ fprintf(stderr, "error sending register cpumask\n");
+ goto err;
+ }
+ }
+
+ if (tid && containerset) {
+ fprintf(stderr, "Select either -t or -C, not both\n");
+ goto err;
+ }
+
+ /*
+ * If we forked a child, wait for it to exit. Cannot use waitpid()
+ * as all the delicious data would be reaped as part of the wait
+ */
+ if (tid && forking) {
+ int sig_received;
+ sigwait(&sigset, &sig_received);
+ }
+
+ if (tid) {
+ rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+ cmd_type, &tid, sizeof(__u32));
+ PRINTF("Sent pid/tgid, retval %d\n", rc);
+ if (rc < 0) {
+ fprintf(stderr, "error sending tid/tgid cmd\n");
+ goto done;
+ }
+ }
+
+ if (containerset) {
+ cfd = open(containerpath, O_RDONLY);
+ if (cfd < 0) {
+ perror("error opening container file");
+ goto err;
+ }
+ rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
+ CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
+ if (rc < 0) {
+ perror("error sending cgroupstats command");
+ goto err;
+ }
+ }
+ if (!maskset && !tid && !containerset) {
+ usage();
+ goto err;
+ }
+
+ do {
+ rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
+ PRINTF("received %d bytes\n", rep_len);
+
+ if (rep_len < 0) {
+ fprintf(stderr, "nonfatal reply error: errno %d\n",
+ errno);
+ continue;
+ }
+ if (msg.n.nlmsg_type == NLMSG_ERROR ||
+ !NLMSG_OK((&msg.n), rep_len)) {
+ struct nlmsgerr *err = NLMSG_DATA(&msg);
+ fprintf(stderr, "fatal reply error, errno %d\n",
+ err->error);
+ goto done;
+ }
+
+ PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
+ sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
+
+
+ rep_len = GENLMSG_PAYLOAD(&msg.n);
+
+ na = (struct nlattr *) GENLMSG_DATA(&msg);
+ len = 0;
+ while (len < rep_len) {
+ len += NLA_ALIGN(na->nla_len);
+ switch (na->nla_type) {
+ case TASKSTATS_TYPE_AGGR_TGID:
+ /* Fall through */
+ case TASKSTATS_TYPE_AGGR_PID:
+ aggr_len = NLA_PAYLOAD(na->nla_len);
+ len2 = 0;
+ /* For nested attributes, na follows */
+ na = (struct nlattr *) NLA_DATA(na);
+ done = 0;
+ while (len2 < aggr_len) {
+ switch (na->nla_type) {
+ case TASKSTATS_TYPE_PID:
+ rtid = *(int *) NLA_DATA(na);
+ if (print_delays)
+ printf("PID\t%d\n", rtid);
+ break;
+ case TASKSTATS_TYPE_TGID:
+ rtid = *(int *) NLA_DATA(na);
+ if (print_delays)
+ printf("TGID\t%d\n", rtid);
+ break;
+ case TASKSTATS_TYPE_STATS:
+ count++;
+ if (print_delays)
+ print_delayacct((struct taskstats *) NLA_DATA(na));
+ if (print_io_accounting)
+ print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_context_switch_counts)
+ task_context_switch_counts((struct taskstats *) NLA_DATA(na));
+ if (fd) {
+ if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
+ err(1,"write error\n");
+ }
+ }
+ if (!loop)
+ goto done;
+ break;
+ default:
+ fprintf(stderr, "Unknown nested"
+ " nla_type %d\n",
+ na->nla_type);
+ break;
+ }
+ len2 += NLA_ALIGN(na->nla_len);
+ na = (struct nlattr *) ((char *) na + len2);
+ }
+ break;
+
+ case CGROUPSTATS_TYPE_CGROUP_STATS:
+ print_cgroupstats(NLA_DATA(na));
+ break;
+ default:
+ fprintf(stderr, "Unknown nla_type %d\n",
+ na->nla_type);
+ case TASKSTATS_TYPE_NULL:
+ break;
+ }
+ na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
+ }
+ } while (loop);
+done:
+ if (maskset) {
+ rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+ TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
+ &cpumask, strlen(cpumask) + 1);
+ printf("Sent deregister mask, retval %d\n", rc);
+ if (rc < 0)
+ err(rc, "error sending deregister cpumask\n");
+ }
+err:
+ close(nl_sd);
+ if (fd)
+ close(fd);
+ if (cfd)
+ close(cfd);
+ return 0;
+}
diff --git a/Documentation/accounting/taskstats-struct.txt b/Documentation/accounting/taskstats-struct.txt
new file mode 100644
index 000000000..e7512c061
--- /dev/null
+++ b/Documentation/accounting/taskstats-struct.txt
@@ -0,0 +1,180 @@
+The struct taskstats
+--------------------
+
+This document contains an explanation of the struct taskstats fields.
+
+There are three different groups of fields in the struct taskstats:
+
+1) Common and basic accounting fields
+ If CONFIG_TASKSTATS is set, the taskstats interface is enabled and
+ the common fields and basic accounting fields are collected for
+ delivery at do_exit() of a task.
+2) Delay accounting fields
+ These fields are placed between
+ /* Delay accounting fields start */
+ and
+ /* Delay accounting fields end */
+ Their values are collected if CONFIG_TASK_DELAY_ACCT is set.
+3) Extended accounting fields
+ These fields are placed between
+ /* Extended accounting fields start */
+ and
+ /* Extended accounting fields end */
+ Their values are collected if CONFIG_TASK_XACCT is set.
+
+4) Per-task and per-thread context switch count statistics
+
+5) Time accounting for SMT machines
+
+6) Extended delay accounting fields for memory reclaim
+
+Future extension should add fields to the end of the taskstats struct, and
+should not change the relative position of each field within the struct.
+
+
+struct taskstats {
+
+1) Common and basic accounting fields:
+ /* The version number of this struct. This field is always set to
+ * TAKSTATS_VERSION, which is defined in <linux/taskstats.h>.
+ * Each time the struct is changed, the value should be incremented.
+ */
+ __u16 version;
+
+ /* The exit code of a task. */
+ __u32 ac_exitcode; /* Exit status */
+
+ /* The accounting flags of a task as defined in <linux/acct.h>
+ * Defined values are AFORK, ASU, ACOMPAT, ACORE, and AXSIG.
+ */
+ __u8 ac_flag; /* Record flags */
+
+ /* The value of task_nice() of a task. */
+ __u8 ac_nice; /* task_nice */
+
+ /* The name of the command that started this task. */
+ char ac_comm[TS_COMM_LEN]; /* Command name */
+
+ /* The scheduling discipline as set in task->policy field. */
+ __u8 ac_sched; /* Scheduling discipline */
+
+ __u8 ac_pad[3];
+ __u32 ac_uid; /* User ID */
+ __u32 ac_gid; /* Group ID */
+ __u32 ac_pid; /* Process ID */
+ __u32 ac_ppid; /* Parent process ID */
+
+ /* The time when a task begins, in [secs] since 1970. */
+ __u32 ac_btime; /* Begin time [sec since 1970] */
+
+ /* The elapsed time of a task, in [usec]. */
+ __u64 ac_etime; /* Elapsed time [usec] */
+
+ /* The user CPU time of a task, in [usec]. */
+ __u64 ac_utime; /* User CPU time [usec] */
+
+ /* The system CPU time of a task, in [usec]. */
+ __u64 ac_stime; /* System CPU time [usec] */
+
+ /* The minor page fault count of a task, as set in task->min_flt. */
+ __u64 ac_minflt; /* Minor Page Fault Count */
+
+ /* The major page fault count of a task, as set in task->maj_flt. */
+ __u64 ac_majflt; /* Major Page Fault Count */
+
+
+2) Delay accounting fields:
+ /* Delay accounting fields start
+ *
+ * All values, until the comment "Delay accounting fields end" are
+ * available only if delay accounting is enabled, even though the last
+ * few fields are not delays
+ *
+ * xxx_count is the number of delay values recorded
+ * xxx_delay_total is the corresponding cumulative delay in nanoseconds
+ *
+ * xxx_delay_total wraps around to zero on overflow
+ * xxx_count incremented regardless of overflow
+ */
+
+ /* Delay waiting for cpu, while runnable
+ * count, delay_total NOT updated atomically
+ */
+ __u64 cpu_count;
+ __u64 cpu_delay_total;
+
+ /* Following four fields atomically updated using task->delays->lock */
+
+ /* Delay waiting for synchronous block I/O to complete
+ * does not account for delays in I/O submission
+ */
+ __u64 blkio_count;
+ __u64 blkio_delay_total;
+
+ /* Delay waiting for page fault I/O (swap in only) */
+ __u64 swapin_count;
+ __u64 swapin_delay_total;
+
+ /* cpu "wall-clock" running time
+ * On some architectures, value will adjust for cpu time stolen
+ * from the kernel in involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_real_total;
+
+ /* cpu "virtual" running time
+ * Uses time intervals seen by the kernel i.e. no adjustment
+ * for kernel's involuntary waits due to virtualization.
+ * Value is cumulative, in nanoseconds, without a corresponding count
+ * and wraps around to zero silently on overflow
+ */
+ __u64 cpu_run_virtual_total;
+ /* Delay accounting fields end */
+ /* version 1 ends here */
+
+
+3) Extended accounting fields
+ /* Extended accounting fields start */
+
+ /* Accumulated RSS usage in duration of a task, in MBytes-usecs.
+ * The current rss usage is added to this counter every time
+ * a tick is charged to a task's system time. So, at the end we
+ * will have memory usage multiplied by system time. Thus an
+ * average usage per system time unit can be calculated.
+ */
+ __u64 coremem; /* accumulated RSS usage in MB-usec */
+
+ /* Accumulated virtual memory usage in duration of a task.
+ * Same as acct_rss_mem1 above except that we keep track of VM usage.
+ */
+ __u64 virtmem; /* accumulated VM usage in MB-usec */
+
+ /* High watermark of RSS usage in duration of a task, in KBytes. */
+ __u64 hiwater_rss; /* High-watermark of RSS usage */
+
+ /* High watermark of VM usage in duration of a task, in KBytes. */
+ __u64 hiwater_vm; /* High-water virtual memory usage */
+
+ /* The following four fields are I/O statistics of a task. */
+ __u64 read_char; /* bytes read */
+ __u64 write_char; /* bytes written */
+ __u64 read_syscalls; /* read syscalls */
+ __u64 write_syscalls; /* write syscalls */
+
+ /* Extended accounting fields end */
+
+4) Per-task and per-thread statistics
+ __u64 nvcsw; /* Context voluntary switch counter */
+ __u64 nivcsw; /* Context involuntary switch counter */
+
+5) Time accounting for SMT machines
+ __u64 ac_utimescaled; /* utime scaled on frequency etc */
+ __u64 ac_stimescaled; /* stime scaled on frequency etc */
+ __u64 cpu_scaled_run_real_total; /* scaled cpu_run_real_total */
+
+6) Extended delay accounting fields for memory reclaim
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
+}
diff --git a/Documentation/accounting/taskstats.txt b/Documentation/accounting/taskstats.txt
new file mode 100644
index 000000000..ff06b738b
--- /dev/null
+++ b/Documentation/accounting/taskstats.txt
@@ -0,0 +1,181 @@
+Per-task statistics interface
+-----------------------------
+
+
+Taskstats is a netlink-based interface for sending per-task and
+per-process statistics from the kernel to userspace.
+
+Taskstats was designed for the following benefits:
+
+- efficiently provide statistics during lifetime of a task and on its exit
+- unified interface for multiple accounting subsystems
+- extensibility for use by future accounting patches
+
+Terminology
+-----------
+
+"pid", "tid" and "task" are used interchangeably and refer to the standard
+Linux task defined by struct task_struct. per-pid stats are the same as
+per-task stats.
+
+"tgid", "process" and "thread group" are used interchangeably and refer to the
+tasks that share an mm_struct i.e. the traditional Unix process. Despite the
+use of tgid, there is no special treatment for the task that is thread group
+leader - a process is deemed alive as long as it has any task belonging to it.
+
+Usage
+-----
+
+To get statistics during a task's lifetime, userspace opens a unicast netlink
+socket (NETLINK_GENERIC family) and sends commands specifying a pid or a tgid.
+The response contains statistics for a task (if pid is specified) or the sum of
+statistics for all tasks of the process (if tgid is specified).
+
+To obtain statistics for tasks which are exiting, the userspace listener
+sends a register command and specifies a cpumask. Whenever a task exits on
+one of the cpus in the cpumask, its per-pid statistics are sent to the
+registered listener. Using cpumasks allows the data received by one listener
+to be limited and assists in flow control over the netlink interface and is
+explained in more detail below.
+
+If the exiting task is the last thread exiting its thread group,
+an additional record containing the per-tgid stats is also sent to userspace.
+The latter contains the sum of per-pid stats for all threads in the thread
+group, both past and present.
+
+getdelays.c is a simple utility demonstrating usage of the taskstats interface
+for reporting delay accounting statistics. Users can register cpumasks,
+send commands and process responses, listen for per-tid/tgid exit data,
+write the data received to a file and do basic flow control by increasing
+receive buffer sizes.
+
+Interface
+---------
+
+The user-kernel interface is encapsulated in include/linux/taskstats.h
+
+To avoid this documentation becoming obsolete as the interface evolves, only
+an outline of the current version is given. taskstats.h always overrides the
+description here.
+
+struct taskstats is the common accounting structure for both per-pid and
+per-tgid data. It is versioned and can be extended by each accounting subsystem
+that is added to the kernel. The fields and their semantics are defined in the
+taskstats.h file.
+
+The data exchanged between user and kernel space is a netlink message belonging
+to the NETLINK_GENERIC family and using the netlink attributes interface.
+The messages are in the format
+
+ +----------+- - -+-------------+-------------------+
+ | nlmsghdr | Pad | genlmsghdr | taskstats payload |
+ +----------+- - -+-------------+-------------------+
+
+
+The taskstats payload is one of the following three kinds:
+
+1. Commands: Sent from user to kernel. Commands to get data on
+a pid/tgid consist of one attribute, of type TASKSTATS_CMD_ATTR_PID/TGID,
+containing a u32 pid or tgid in the attribute payload. The pid/tgid denotes
+the task/process for which userspace wants statistics.
+
+Commands to register/deregister interest in exit data from a set of cpus
+consist of one attribute, of type
+TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK and contain a cpumask in the
+attribute payload. The cpumask is specified as an ascii string of
+comma-separated cpu ranges e.g. to listen to exit data from cpus 1,2,3,5,7,8
+the cpumask would be "1-3,5,7-8". If userspace forgets to deregister interest
+in cpus before closing the listening socket, the kernel cleans up its interest
+set over time. However, for the sake of efficiency, an explicit deregistration
+is advisable.
+
+2. Response for a command: sent from the kernel in response to a userspace
+command. The payload is a series of three attributes of type:
+
+a) TASKSTATS_TYPE_AGGR_PID/TGID : attribute containing no payload but indicates
+a pid/tgid will be followed by some stats.
+
+b) TASKSTATS_TYPE_PID/TGID: attribute whose payload is the pid/tgid whose stats
+are being returned.
+
+c) TASKSTATS_TYPE_STATS: attribute with a struct taskstats as payload. The
+same structure is used for both per-pid and per-tgid stats.
+
+3. New message sent by kernel whenever a task exits. The payload consists of a
+ series of attributes of the following type:
+
+a) TASKSTATS_TYPE_AGGR_PID: indicates next two attributes will be pid+stats
+b) TASKSTATS_TYPE_PID: contains exiting task's pid
+c) TASKSTATS_TYPE_STATS: contains the exiting task's per-pid stats
+d) TASKSTATS_TYPE_AGGR_TGID: indicates next two attributes will be tgid+stats
+e) TASKSTATS_TYPE_TGID: contains tgid of process to which task belongs
+f) TASKSTATS_TYPE_STATS: contains the per-tgid stats for exiting task's process
+
+
+per-tgid stats
+--------------
+
+Taskstats provides per-process stats, in addition to per-task stats, since
+resource management is often done at a process granularity and aggregating task
+stats in userspace alone is inefficient and potentially inaccurate (due to lack
+of atomicity).
+
+However, maintaining per-process, in addition to per-task stats, within the
+kernel has space and time overheads. To address this, the taskstats code
+accumulates each exiting task's statistics into a process-wide data structure.
+When the last task of a process exits, the process level data accumulated also
+gets sent to userspace (along with the per-task data).
+
+When a user queries to get per-tgid data, the sum of all other live threads in
+the group is added up and added to the accumulated total for previously exited
+threads of the same thread group.
+
+Extending taskstats
+-------------------
+
+There are two ways to extend the taskstats interface to export more
+per-task/process stats as patches to collect them get added to the kernel
+in future:
+
+1. Adding more fields to the end of the existing struct taskstats. Backward
+ compatibility is ensured by the version number within the
+ structure. Userspace will use only the fields of the struct that correspond
+ to the version its using.
+
+2. Defining separate statistic structs and using the netlink attributes
+ interface to return them. Since userspace processes each netlink attribute
+ independently, it can always ignore attributes whose type it does not
+ understand (because it is using an older version of the interface).
+
+
+Choosing between 1. and 2. is a matter of trading off flexibility and
+overhead. If only a few fields need to be added, then 1. is the preferable
+path since the kernel and userspace don't need to incur the overhead of
+processing new netlink attributes. But if the new fields expand the existing
+struct too much, requiring disparate userspace accounting utilities to
+unnecessarily receive large structures whose fields are of no interest, then
+extending the attributes structure would be worthwhile.
+
+Flow control for taskstats
+--------------------------
+
+When the rate of task exits becomes large, a listener may not be able to keep
+up with the kernel's rate of sending per-tid/tgid exit data leading to data
+loss. This possibility gets compounded when the taskstats structure gets
+extended and the number of cpus grows large.
+
+To avoid losing statistics, userspace should do one or more of the following:
+
+- increase the receive buffer sizes for the netlink sockets opened by
+listeners to receive exit data.
+
+- create more listeners and reduce the number of cpus being listened to by
+each listener. In the extreme case, there could be one listener for each cpu.
+Users may also consider setting the cpu affinity of the listener to the subset
+of cpus to which it listens, especially if they are listening to just one cpu.
+
+Despite these measures, if the userspace receives ENOBUFS error messages
+indicated overflow of receive buffers, it should take measures to handle the
+loss of data.
+
+----
diff --git a/Documentation/acpi/apei/einj.txt b/Documentation/acpi/apei/einj.txt
new file mode 100644
index 000000000..e550c8b98
--- /dev/null
+++ b/Documentation/acpi/apei/einj.txt
@@ -0,0 +1,177 @@
+ APEI Error INJection
+ ~~~~~~~~~~~~~~~~~~~~
+
+EINJ provides a hardware error injection mechanism. It is very useful
+for debugging and testing APEI and RAS features in general.
+
+You need to check whether your BIOS supports EINJ first. For that, look
+for early boot messages similar to this one:
+
+ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL 00000001 INTL 00000001)
+
+which shows that the BIOS is exposing an EINJ table - it is the
+mechanism through which the injection is done.
+
+Alternatively, look in /sys/firmware/acpi/tables for an "EINJ" file,
+which is a different representation of the same thing.
+
+It doesn't necessarily mean that EINJ is not supported if those above
+don't exist: before you give up, go into BIOS setup to see if the BIOS
+has an option to enable error injection. Look for something called WHEA
+or similar. Often, you need to enable an ACPI5 support option prior, in
+order to see the APEI,EINJ,... functionality supported and exposed by
+the BIOS menu.
+
+To use EINJ, make sure the following are options enabled in your kernel
+configuration:
+
+CONFIG_DEBUG_FS
+CONFIG_ACPI_APEI
+CONFIG_ACPI_APEI_EINJ
+
+The EINJ user interface is in <debugfs mount point>/apei/einj.
+
+The following files belong to it:
+
+- available_error_type
+
+ This file shows which error types are supported:
+
+ Error Type Value Error Description
+ ================ =================
+ 0x00000001 Processor Correctable
+ 0x00000002 Processor Uncorrectable non-fatal
+ 0x00000004 Processor Uncorrectable fatal
+ 0x00000008 Memory Correctable
+ 0x00000010 Memory Uncorrectable non-fatal
+ 0x00000020 Memory Uncorrectable fatal
+ 0x00000040 PCI Express Correctable
+ 0x00000080 PCI Express Uncorrectable fatal
+ 0x00000100 PCI Express Uncorrectable non-fatal
+ 0x00000200 Platform Correctable
+ 0x00000400 Platform Uncorrectable non-fatal
+ 0x00000800 Platform Uncorrectable fatal
+
+ The format of the file contents are as above, except present are only
+ the available error types.
+
+- error_type
+
+ Set the value of the error type being injected. Possible error types
+ are defined in the file available_error_type above.
+
+- error_inject
+
+ Write any integer to this file to trigger the error injection. Make
+ sure you have specified all necessary error parameters, i.e. this
+ write should be the last step when injecting errors.
+
+- flags
+
+ Present for kernel versions 3.13 and above. Used to specify which
+ of param{1..4} are valid and should be used by the firmware during
+ injection. Value is a bitmask as specified in ACPI5.0 spec for the
+ SET_ERROR_TYPE_WITH_ADDRESS data structure:
+
+ Bit 0 - Processor APIC field valid (see param3 below).
+ Bit 1 - Memory address and mask valid (param1 and param2).
+ Bit 2 - PCIe (seg,bus,dev,fn) valid (see param4 below).
+
+ If set to zero, legacy behavior is mimicked where the type of
+ injection specifies just one bit set, and param1 is multiplexed.
+
+- param1
+
+ This file is used to set the first error parameter value. Its effect
+ depends on the error type specified in error_type. For example, if
+ error type is memory related type, the param1 should be a valid
+ physical memory address. [Unless "flag" is set - see above]
+
+- param2
+
+ Same use as param1 above. For example, if error type is of memory
+ related type, then param2 should be a physical memory address mask.
+ Linux requires page or narrower granularity, say, 0xfffffffffffff000.
+
+- param3
+
+ Used when the 0x1 bit is set in "flags" to specify the APIC id
+
+- param4
+ Used when the 0x4 bit is set in "flags" to specify target PCIe device
+
+- notrigger
+
+ The error injection mechanism is a two-step process. First inject the
+ error, then perform some actions to trigger it. Setting "notrigger"
+ to 1 skips the trigger phase, which *may* allow the user to cause the
+ error in some other context by a simple access to the CPU, memory
+ location, or device that is the target of the error injection. Whether
+ this actually works depends on what operations the BIOS actually
+ includes in the trigger phase.
+
+BIOS versions based on the ACPI 4.0 specification have limited options
+in controlling where the errors are injected. Your BIOS may support an
+extension (enabled with the param_extension=1 module parameter, or boot
+command line einj.param_extension=1). This allows the address and mask
+for memory injections to be specified by the param1 and param2 files in
+apei/einj.
+
+BIOS versions based on the ACPI 5.0 specification have more control over
+the target of the injection. For processor-related errors (type 0x1, 0x2
+and 0x4), you can set flags to 0x3 (param3 for bit 0, and param1 and
+param2 for bit 1) so that you have more information added to the error
+signature being injected. The actual data passed is this:
+
+ memory_address = param1;
+ memory_address_range = param2;
+ apicid = param3;
+ pcie_sbdf = param4;
+
+For memory errors (type 0x8, 0x10 and 0x20) the address is set using
+param1 with a mask in param2 (0x0 is equivalent to all ones). For PCI
+express errors (type 0x40, 0x80 and 0x100) the segment, bus, device and
+function are specified using param1:
+
+ 31 24 23 16 15 11 10 8 7 0
+ +-------------------------------------------------+
+ | segment | bus | device | function | reserved |
+ +-------------------------------------------------+
+
+Anyway, you get the idea, if there's doubt just take a look at the code
+in drivers/acpi/apei/einj.c.
+
+An ACPI 5.0 BIOS may also allow vendor-specific errors to be injected.
+In this case a file named vendor will contain identifying information
+from the BIOS that hopefully will allow an application wishing to use
+the vendor-specific extension to tell that they are running on a BIOS
+that supports it. All vendor extensions have the 0x80000000 bit set in
+error_type. A file vendor_flags controls the interpretation of param1
+and param2 (1 = PROCESSOR, 2 = MEMORY, 4 = PCI). See your BIOS vendor
+documentation for details (and expect changes to this API if vendors
+creativity in using this feature expands beyond our expectations).
+
+
+An error injection example:
+
+# cd /sys/kernel/debug/apei/einj
+# cat available_error_type # See which errors can be injected
+0x00000002 Processor Uncorrectable non-fatal
+0x00000008 Memory Correctable
+0x00000010 Memory Uncorrectable non-fatal
+# echo 0x12345000 > param1 # Set memory address for injection
+# echo $((-1 << 12)) > param2 # Mask 0xfffffffffffff000 - anywhere in this page
+# echo 0x8 > error_type # Choose correctable memory error
+# echo 1 > error_inject # Inject now
+
+You should see something like this in dmesg:
+
+[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
+[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
+[22715.834759] EDAC sbridge MC3: TSC 0
+[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
+[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
+[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
+
+For more information about EINJ, please refer to ACPI specification
+version 4.0, section 17.5 and ACPI 5.0, section 18.6.
diff --git a/Documentation/acpi/apei/output_format.txt b/Documentation/acpi/apei/output_format.txt
new file mode 100644
index 000000000..0c49c197c
--- /dev/null
+++ b/Documentation/acpi/apei/output_format.txt
@@ -0,0 +1,147 @@
+ APEI output format
+ ~~~~~~~~~~~~~~~~~~
+
+APEI uses printk as hardware error reporting interface, the output
+format is as follow.
+
+<error record> :=
+APEI generic hardware error status
+severity: <integer>, <severity string>
+section: <integer>, severity: <integer>, <severity string>
+flags: <integer>
+<section flags strings>
+fru_id: <uuid string>
+fru_text: <string>
+section_type: <section type string>
+<section data>
+
+<severity string>* := recoverable | fatal | corrected | info
+
+<section flags strings># :=
+[primary][, containment warning][, reset][, threshold exceeded]\
+[, resource not accessible][, latent error]
+
+<section type string> := generic processor error | memory error | \
+PCIe error | unknown, <uuid string>
+
+<section data> :=
+<generic processor section data> | <memory section data> | \
+<pcie section data> | <null>
+
+<generic processor section data> :=
+[processor_type: <integer>, <proc type string>]
+[processor_isa: <integer>, <proc isa string>]
+[error_type: <integer>
+<proc error type strings>]
+[operation: <integer>, <proc operation string>]
+[flags: <integer>
+<proc flags strings>]
+[level: <integer>]
+[version_info: <integer>]
+[processor_id: <integer>]
+[target_address: <integer>]
+[requestor_id: <integer>]
+[responder_id: <integer>]
+[IP: <integer>]
+
+<proc type string>* := IA32/X64 | IA64
+
+<proc isa string>* := IA32 | IA64 | X64
+
+<processor error type strings># :=
+[cache error][, TLB error][, bus error][, micro-architectural error]
+
+<proc operation string>* := unknown or generic | data read | data write | \
+instruction execution
+
+<proc flags strings># :=
+[restartable][, precise IP][, overflow][, corrected]
+
+<memory section data> :=
+[error_status: <integer>]
+[physical_address: <integer>]
+[physical_address_mask: <integer>]
+[node: <integer>]
+[card: <integer>]
+[module: <integer>]
+[bank: <integer>]
+[device: <integer>]
+[row: <integer>]
+[column: <integer>]
+[bit_position: <integer>]
+[requestor_id: <integer>]
+[responder_id: <integer>]
+[target_id: <integer>]
+[error_type: <integer>, <mem error type string>]
+
+<mem error type string>* :=
+unknown | no error | single-bit ECC | multi-bit ECC | \
+single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
+target abort | parity error | watchdog timeout | invalid address | \
+mirror Broken | memory sparing | scrub corrected error | \
+scrub uncorrected error
+
+<pcie section data> :=
+[port_type: <integer>, <pcie port type string>]
+[version: <integer>.<integer>]
+[command: <integer>, status: <integer>]
+[device_id: <integer>:<integer>:<integer>.<integer>
+slot: <integer>
+secondary_bus: <integer>
+vendor_id: <integer>, device_id: <integer>
+class_code: <integer>]
+[serial number: <integer>, <integer>]
+[bridge: secondary_status: <integer>, control: <integer>]
+[aer_status: <integer>, aer_mask: <integer>
+<aer status string>
+[aer_uncor_severity: <integer>]
+aer_layer=<aer layer string>, aer_agent=<aer agent string>
+aer_tlp_header: <integer> <integer> <integer> <integer>]
+
+<pcie port type string>* := PCIe end point | legacy PCI end point | \
+unknown | unknown | root port | upstream switch port | \
+downstream switch port | PCIe to PCI/PCI-X bridge | \
+PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
+root complex event collector
+
+if section severity is fatal or recoverable
+<aer status string># :=
+unknown | unknown | unknown | unknown | Data Link Protocol | \
+unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
+Poisoned TLP | Flow Control Protocol | Completion Timeout | \
+Completer Abort | Unexpected Completion | Receiver Overflow | \
+Malformed TLP | ECRC | Unsupported Request
+else
+<aer status string># :=
+Receiver Error | unknown | unknown | unknown | unknown | unknown | \
+Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
+Replay Timer Timeout | Advisory Non-Fatal
+fi
+
+<aer layer string> :=
+Physical Layer | Data Link Layer | Transaction Layer
+
+<aer agent string> :=
+Receiver ID | Requester ID | Completer ID | Transmitter ID
+
+Where, [] designate corresponding content is optional
+
+All <field string> description with * has the following format:
+
+field: <integer>, <field string>
+
+Where value of <integer> should be the position of "string" in <field
+string> description. Otherwise, <field string> will be "unknown".
+
+All <field strings> description with # has the following format:
+
+field: <integer>
+<field strings>
+
+Where each string in <fields strings> corresponding to one set bit of
+<integer>. The bit position is the position of "string" in <field
+strings> description.
+
+For more detailed explanation of every field, please refer to UEFI
+specification version 2.3 or later, section Appendix N: Common
+Platform Error Record.
diff --git a/Documentation/acpi/debug.txt b/Documentation/acpi/debug.txt
new file mode 100644
index 000000000..65bf47c46
--- /dev/null
+++ b/Documentation/acpi/debug.txt
@@ -0,0 +1,148 @@
+ ACPI Debug Output
+
+
+The ACPI CA, the Linux ACPI core, and some ACPI drivers can generate debug
+output. This document describes how to use this facility.
+
+Compile-time configuration
+--------------------------
+
+ACPI debug output is globally enabled by CONFIG_ACPI_DEBUG. If this config
+option is turned off, the debug messages are not even built into the
+kernel.
+
+Boot- and run-time configuration
+--------------------------------
+
+When CONFIG_ACPI_DEBUG=y, you can select the component and level of messages
+you're interested in. At boot-time, use the acpi.debug_layer and
+acpi.debug_level kernel command line options. After boot, you can use the
+debug_layer and debug_level files in /sys/module/acpi/parameters/ to control
+the debug messages.
+
+debug_layer (component)
+-----------------------
+
+The "debug_layer" is a mask that selects components of interest, e.g., a
+specific driver or part of the ACPI interpreter. To build the debug_layer
+bitmask, look for the "#define _COMPONENT" in an ACPI source file.
+
+You can set the debug_layer mask at boot-time using the acpi.debug_layer
+command line argument, and you can change it after boot by writing values
+to /sys/module/acpi/parameters/debug_layer.
+
+The possible components are defined in include/acpi/acoutput.h and
+include/acpi/acpi_drivers.h. Reading /sys/module/acpi/parameters/debug_layer
+shows the supported mask values, currently these:
+
+ ACPI_UTILITIES 0x00000001
+ ACPI_HARDWARE 0x00000002
+ ACPI_EVENTS 0x00000004
+ ACPI_TABLES 0x00000008
+ ACPI_NAMESPACE 0x00000010
+ ACPI_PARSER 0x00000020
+ ACPI_DISPATCHER 0x00000040
+ ACPI_EXECUTER 0x00000080
+ ACPI_RESOURCES 0x00000100
+ ACPI_CA_DEBUGGER 0x00000200
+ ACPI_OS_SERVICES 0x00000400
+ ACPI_CA_DISASSEMBLER 0x00000800
+ ACPI_COMPILER 0x00001000
+ ACPI_TOOLS 0x00002000
+ ACPI_BUS_COMPONENT 0x00010000
+ ACPI_AC_COMPONENT 0x00020000
+ ACPI_BATTERY_COMPONENT 0x00040000
+ ACPI_BUTTON_COMPONENT 0x00080000
+ ACPI_SBS_COMPONENT 0x00100000
+ ACPI_FAN_COMPONENT 0x00200000
+ ACPI_PCI_COMPONENT 0x00400000
+ ACPI_POWER_COMPONENT 0x00800000
+ ACPI_CONTAINER_COMPONENT 0x01000000
+ ACPI_SYSTEM_COMPONENT 0x02000000
+ ACPI_THERMAL_COMPONENT 0x04000000
+ ACPI_MEMORY_DEVICE_COMPONENT 0x08000000
+ ACPI_VIDEO_COMPONENT 0x10000000
+ ACPI_PROCESSOR_COMPONENT 0x20000000
+
+debug_level
+-----------
+
+The "debug_level" is a mask that selects different types of messages, e.g.,
+those related to initialization, method execution, informational messages, etc.
+To build debug_level, look at the level specified in an ACPI_DEBUG_PRINT()
+statement.
+
+The ACPI interpreter uses several different levels, but the Linux
+ACPI core and ACPI drivers generally only use ACPI_LV_INFO.
+
+You can set the debug_level mask at boot-time using the acpi.debug_level
+command line argument, and you can change it after boot by writing values
+to /sys/module/acpi/parameters/debug_level.
+
+The possible levels are defined in include/acpi/acoutput.h. Reading
+/sys/module/acpi/parameters/debug_level shows the supported mask values,
+currently these:
+
+ ACPI_LV_INIT 0x00000001
+ ACPI_LV_DEBUG_OBJECT 0x00000002
+ ACPI_LV_INFO 0x00000004
+ ACPI_LV_INIT_NAMES 0x00000020
+ ACPI_LV_PARSE 0x00000040
+ ACPI_LV_LOAD 0x00000080
+ ACPI_LV_DISPATCH 0x00000100
+ ACPI_LV_EXEC 0x00000200
+ ACPI_LV_NAMES 0x00000400
+ ACPI_LV_OPREGION 0x00000800
+ ACPI_LV_BFIELD 0x00001000
+ ACPI_LV_TABLES 0x00002000
+ ACPI_LV_VALUES 0x00004000
+ ACPI_LV_OBJECTS 0x00008000
+ ACPI_LV_RESOURCES 0x00010000
+ ACPI_LV_USER_REQUESTS 0x00020000
+ ACPI_LV_PACKAGE 0x00040000
+ ACPI_LV_ALLOCATIONS 0x00100000
+ ACPI_LV_FUNCTIONS 0x00200000
+ ACPI_LV_OPTIMIZATIONS 0x00400000
+ ACPI_LV_MUTEX 0x01000000
+ ACPI_LV_THREADS 0x02000000
+ ACPI_LV_IO 0x04000000
+ ACPI_LV_INTERRUPTS 0x08000000
+ ACPI_LV_AML_DISASSEMBLE 0x10000000
+ ACPI_LV_VERBOSE_INFO 0x20000000
+ ACPI_LV_FULL_TABLES 0x40000000
+ ACPI_LV_EVENTS 0x80000000
+
+Examples
+--------
+
+For example, drivers/acpi/bus.c contains this:
+
+ #define _COMPONENT ACPI_BUS_COMPONENT
+ ...
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Device insertion detected\n"));
+
+To turn on this message, set the ACPI_BUS_COMPONENT bit in acpi.debug_layer
+and the ACPI_LV_INFO bit in acpi.debug_level. (The ACPI_DEBUG_PRINT
+statement uses ACPI_DB_INFO, which is macro based on the ACPI_LV_INFO
+definition.)
+
+Enable all AML "Debug" output (stores to the Debug object while interpreting
+AML) during boot:
+
+ acpi.debug_layer=0xffffffff acpi.debug_level=0x2
+
+Enable PCI and PCI interrupt routing debug messages:
+
+ acpi.debug_layer=0x400000 acpi.debug_level=0x4
+
+Enable all ACPI hardware-related messages:
+
+ acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
+
+Enable all ACPI_DB_INFO messages after boot:
+
+ # echo 0x4 > /sys/module/acpi/parameters/debug_level
+
+Show all valid component values:
+
+ # cat /sys/module/acpi/parameters/debug_layer
diff --git a/Documentation/acpi/dsdt-override.txt b/Documentation/acpi/dsdt-override.txt
new file mode 100644
index 000000000..784841caa
--- /dev/null
+++ b/Documentation/acpi/dsdt-override.txt
@@ -0,0 +1,7 @@
+Linux supports a method of overriding the BIOS DSDT:
+
+CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel.
+
+When to use this method is described in detail on the
+Linux/ACPI home page:
+https://01.org/linux-acpi/documentation/overriding-dsdt
diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
new file mode 100644
index 000000000..15dfce708
--- /dev/null
+++ b/Documentation/acpi/enumeration.txt
@@ -0,0 +1,361 @@
+ACPI based device enumeration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ACPI 5 introduced a set of new resources (UartTSerialBus, I2cSerialBus,
+SpiSerialBus, GpioIo and GpioInt) which can be used in enumerating slave
+devices behind serial bus controllers.
+
+In addition we are starting to see peripherals integrated in the
+SoC/Chipset to appear only in ACPI namespace. These are typically devices
+that are accessed through memory-mapped registers.
+
+In order to support this and re-use the existing drivers as much as
+possible we decided to do following:
+
+ o Devices that have no bus connector resource are represented as
+ platform devices.
+
+ o Devices behind real busses where there is a connector resource
+ are represented as struct spi_device or struct i2c_device
+ (standard UARTs are not busses so there is no struct uart_device).
+
+As both ACPI and Device Tree represent a tree of devices (and their
+resources) this implementation follows the Device Tree way as much as
+possible.
+
+The ACPI implementation enumerates devices behind busses (platform, SPI and
+I2C), creates the physical devices and binds them to their ACPI handle in
+the ACPI namespace.
+
+This means that when ACPI_HANDLE(dev) returns non-NULL the device was
+enumerated from ACPI namespace. This handle can be used to extract other
+device-specific configuration. There is an example of this below.
+
+Platform bus support
+~~~~~~~~~~~~~~~~~~~~
+Since we are using platform devices to represent devices that are not
+connected to any physical bus we only need to implement a platform driver
+for the device and add supported ACPI IDs. If this same IP-block is used on
+some other non-ACPI platform, the driver might work out of the box or needs
+some minor changes.
+
+Adding ACPI support for an existing driver should be pretty
+straightforward. Here is the simplest example:
+
+ #ifdef CONFIG_ACPI
+ static struct acpi_device_id mydrv_acpi_match[] = {
+ /* ACPI IDs here */
+ { }
+ };
+ MODULE_DEVICE_TABLE(acpi, mydrv_acpi_match);
+ #endif
+
+ static struct platform_driver my_driver = {
+ ...
+ .driver = {
+ .acpi_match_table = ACPI_PTR(mydrv_acpi_match),
+ },
+ };
+
+If the driver needs to perform more complex initialization like getting and
+configuring GPIOs it can get its ACPI handle and extract this information
+from ACPI tables.
+
+DMA support
+~~~~~~~~~~~
+DMA controllers enumerated via ACPI should be registered in the system to
+provide generic access to their resources. For example, a driver that would
+like to be accessible to slave devices via generic API call
+dma_request_slave_channel() must register itself at the end of the probe
+function like this:
+
+ err = devm_acpi_dma_controller_register(dev, xlate_func, dw);
+ /* Handle the error if it's not a case of !CONFIG_ACPI */
+
+and implement custom xlate function if needed (usually acpi_dma_simple_xlate()
+is enough) which converts the FixedDMA resource provided by struct
+acpi_dma_spec into the corresponding DMA channel. A piece of code for that case
+could look like:
+
+ #ifdef CONFIG_ACPI
+ struct filter_args {
+ /* Provide necessary information for the filter_func */
+ ...
+ };
+
+ static bool filter_func(struct dma_chan *chan, void *param)
+ {
+ /* Choose the proper channel */
+ ...
+ }
+
+ static struct dma_chan *xlate_func(struct acpi_dma_spec *dma_spec,
+ struct acpi_dma *adma)
+ {
+ dma_cap_mask_t cap;
+ struct filter_args args;
+
+ /* Prepare arguments for filter_func */
+ ...
+ return dma_request_channel(cap, filter_func, &args);
+ }
+ #else
+ static struct dma_chan *xlate_func(struct acpi_dma_spec *dma_spec,
+ struct acpi_dma *adma)
+ {
+ return NULL;
+ }
+ #endif
+
+dma_request_slave_channel() will call xlate_func() for each registered DMA
+controller. In the xlate function the proper channel must be chosen based on
+information in struct acpi_dma_spec and the properties of the controller
+provided by struct acpi_dma.
+
+Clients must call dma_request_slave_channel() with the string parameter that
+corresponds to a specific FixedDMA resource. By default "tx" means the first
+entry of the FixedDMA resource array, "rx" means the second entry. The table
+below shows a layout:
+
+ Device (I2C0)
+ {
+ ...
+ Method (_CRS, 0, NotSerialized)
+ {
+ Name (DBUF, ResourceTemplate ()
+ {
+ FixedDMA (0x0018, 0x0004, Width32bit, _Y48)
+ FixedDMA (0x0019, 0x0005, Width32bit, )
+ })
+ ...
+ }
+ }
+
+So, the FixedDMA with request line 0x0018 is "tx" and next one is "rx" in
+this example.
+
+In robust cases the client unfortunately needs to call
+acpi_dma_request_slave_chan_by_index() directly and therefore choose the
+specific FixedDMA resource by its index.
+
+SPI serial bus support
+~~~~~~~~~~~~~~~~~~~~~~
+Slave devices behind SPI bus have SpiSerialBus resource attached to them.
+This is extracted automatically by the SPI core and the slave devices are
+enumerated once spi_register_master() is called by the bus driver.
+
+Here is what the ACPI namespace for a SPI slave might look like:
+
+ Device (EEP0)
+ {
+ Name (_ADR, 1)
+ Name (_CID, Package() {
+ "ATML0025",
+ "AT25",
+ })
+ ...
+ Method (_CRS, 0, NotSerialized)
+ {
+ SPISerialBus(1, PolarityLow, FourWireMode, 8,
+ ControllerInitiated, 1000000, ClockPolarityLow,
+ ClockPhaseFirst, "\\_SB.PCI0.SPI1",)
+ }
+ ...
+
+The SPI device drivers only need to add ACPI IDs in a similar way than with
+the platform device drivers. Below is an example where we add ACPI support
+to at25 SPI eeprom driver (this is meant for the above ACPI snippet):
+
+ #ifdef CONFIG_ACPI
+ static struct acpi_device_id at25_acpi_match[] = {
+ { "AT25", 0 },
+ { },
+ };
+ MODULE_DEVICE_TABLE(acpi, at25_acpi_match);
+ #endif
+
+ static struct spi_driver at25_driver = {
+ .driver = {
+ ...
+ .acpi_match_table = ACPI_PTR(at25_acpi_match),
+ },
+ };
+
+Note that this driver actually needs more information like page size of the
+eeprom etc. but at the time writing this there is no standard way of
+passing those. One idea is to return this in _DSM method like:
+
+ Device (EEP0)
+ {
+ ...
+ Method (_DSM, 4, NotSerialized)
+ {
+ Store (Package (6)
+ {
+ "byte-len", 1024,
+ "addr-mode", 2,
+ "page-size, 32
+ }, Local0)
+
+ // Check UUIDs etc.
+
+ Return (Local0)
+ }
+
+Then the at25 SPI driver can get this configuration by calling _DSM on its
+ACPI handle like:
+
+ struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_object_list input;
+ acpi_status status;
+
+ /* Fill in the input buffer */
+
+ status = acpi_evaluate_object(ACPI_HANDLE(&spi->dev), "_DSM",
+ &input, &output);
+ if (ACPI_FAILURE(status))
+ /* Handle the error */
+
+ /* Extract the data here */
+
+ kfree(output.pointer);
+
+I2C serial bus support
+~~~~~~~~~~~~~~~~~~~~~~
+The slaves behind I2C bus controller only need to add the ACPI IDs like
+with the platform and SPI drivers. The I2C core automatically enumerates
+any slave devices behind the controller device once the adapter is
+registered.
+
+Below is an example of how to add ACPI support to the existing mpu3050
+input driver:
+
+ #ifdef CONFIG_ACPI
+ static struct acpi_device_id mpu3050_acpi_match[] = {
+ { "MPU3050", 0 },
+ { },
+ };
+ MODULE_DEVICE_TABLE(acpi, mpu3050_acpi_match);
+ #endif
+
+ static struct i2c_driver mpu3050_i2c_driver = {
+ .driver = {
+ .name = "mpu3050",
+ .owner = THIS_MODULE,
+ .pm = &mpu3050_pm,
+ .of_match_table = mpu3050_of_match,
+ .acpi_match_table = ACPI_PTR(mpu3050_acpi_match),
+ },
+ .probe = mpu3050_probe,
+ .remove = mpu3050_remove,
+ .id_table = mpu3050_ids,
+ };
+
+GPIO support
+~~~~~~~~~~~~
+ACPI 5 introduced two new resources to describe GPIO connections: GpioIo
+and GpioInt. These resources can be used to pass GPIO numbers used by
+the device to the driver. ACPI 5.1 extended this with _DSD (Device
+Specific Data) which made it possible to name the GPIOs among other things.
+
+For example:
+
+Device (DEV)
+{
+ Method (_CRS, 0, NotSerialized)
+ {
+ Name (SBUF, ResourceTemplate()
+ {
+ ...
+ // Used to power on/off the device
+ GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
+ IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
+ 0x00, ResourceConsumer,,)
+ {
+ // Pin List
+ 0x0055
+ }
+
+ // Interrupt for the device
+ GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
+ 0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
+ {
+ // Pin list
+ 0x0058
+ }
+
+ ...
+
+ }
+
+ Return (SBUF)
+ }
+
+ // ACPI 5.1 _DSD used for naming the GPIOs
+ Name (_DSD, Package ()
+ {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package ()
+ {
+ Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
+ Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
+ }
+ })
+ ...
+
+These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
+specifies the path to the controller. In order to use these GPIOs in Linux
+we need to translate them to the corresponding Linux GPIO descriptors.
+
+There is a standard GPIO API for that and is documented in
+Documentation/gpio/.
+
+In the above example we can get the corresponding two GPIO descriptors with
+a code like this:
+
+ #include <linux/gpio/consumer.h>
+ ...
+
+ struct gpio_desc *irq_desc, *power_desc;
+
+ irq_desc = gpiod_get(dev, "irq");
+ if (IS_ERR(irq_desc))
+ /* handle error */
+
+ power_desc = gpiod_get(dev, "power");
+ if (IS_ERR(power_desc))
+ /* handle error */
+
+ /* Now we can use the GPIO descriptors */
+
+There are also devm_* versions of these functions which release the
+descriptors once the device is released.
+
+See Documentation/acpi/gpio-properties.txt for more information about the
+_DSD binding related to GPIOs.
+
+MFD devices
+~~~~~~~~~~~
+The MFD devices register their children as platform devices. For the child
+devices there needs to be an ACPI handle that they can use to reference
+parts of the ACPI namespace that relate to them. In the Linux MFD subsystem
+we provide two ways:
+
+ o The children share the parent ACPI handle.
+ o The MFD cell can specify the ACPI id of the device.
+
+For the first case, the MFD drivers do not need to do anything. The
+resulting child platform device will have its ACPI_COMPANION() set to point
+to the parent device.
+
+If the ACPI namespace has a device that we can match using an ACPI id,
+the id should be set like:
+
+ static struct mfd_cell my_subdevice_cell = {
+ .name = "my_subdevice",
+ /* set the resources relative to the parent */
+ .acpi_pnpid = "XYZ0001",
+ };
+
+The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
+the MFD device and if found, that ACPI companion device is bound to the
+resulting child platform device.
diff --git a/Documentation/acpi/gpio-properties.txt b/Documentation/acpi/gpio-properties.txt
new file mode 100644
index 000000000..f35dad11f
--- /dev/null
+++ b/Documentation/acpi/gpio-properties.txt
@@ -0,0 +1,96 @@
+_DSD Device Properties Related to GPIO
+--------------------------------------
+
+With the release of ACPI 5.1, the _DSD configuration object finally
+allows names to be given to GPIOs (and other things as well) returned
+by _CRS. Previously, we were only able to use an integer index to find
+the corresponding GPIO, which is pretty error prone (it depends on
+the _CRS output ordering, for example).
+
+With _DSD we can now query GPIOs using a name instead of an integer
+index, like the ASL example below shows:
+
+ // Bluetooth device with reset and shutdown GPIOs
+ Device (BTH)
+ {
+ Name (_HID, ...)
+
+ Name (_CRS, ResourceTemplate ()
+ {
+ GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionInputOnly,
+ "\\_SB.GPO0", 0, ResourceConsumer) {15}
+ GpioIo (Exclusive, PullUp, 0, 0, IoRestrictionInputOnly,
+ "\\_SB.GPO0", 0, ResourceConsumer) {27, 31}
+ })
+
+ Name (_DSD, Package ()
+ {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package ()
+ {
+ Package () {"reset-gpio", Package() {^BTH, 1, 1, 0 }},
+ Package () {"shutdown-gpio", Package() {^BTH, 0, 0, 0 }},
+ }
+ })
+ }
+
+The format of the supported GPIO property is:
+
+ Package () { "name", Package () { ref, index, pin, active_low }}
+
+ ref - The device that has _CRS containing GpioIo()/GpioInt() resources,
+ typically this is the device itself (BTH in our case).
+ index - Index of the GpioIo()/GpioInt() resource in _CRS starting from zero.
+ pin - Pin in the GpioIo()/GpioInt() resource. Typically this is zero.
+ active_low - If 1 the GPIO is marked as active_low.
+
+Since ACPI GpioIo() resource does not have a field saying whether it is
+active low or high, the "active_low" argument can be used here. Setting
+it to 1 marks the GPIO as active low.
+
+In our Bluetooth example the "reset-gpio" refers to the second GpioIo()
+resource, second pin in that resource with the GPIO number of 31.
+
+ACPI GPIO Mappings Provided by Drivers
+--------------------------------------
+
+There are systems in which the ACPI tables do not contain _DSD but provide _CRS
+with GpioIo()/GpioInt() resources and device drivers still need to work with
+them.
+
+In those cases ACPI device identification objects, _HID, _CID, _CLS, _SUB, _HRV,
+available to the driver can be used to identify the device and that is supposed
+to be sufficient to determine the meaning and purpose of all of the GPIO lines
+listed by the GpioIo()/GpioInt() resources returned by _CRS. In other words,
+the driver is supposed to know what to use the GpioIo()/GpioInt() resources for
+once it has identified the device. Having done that, it can simply assign names
+to the GPIO lines it is going to use and provide the GPIO subsystem with a
+mapping between those names and the ACPI GPIO resources corresponding to them.
+
+To do that, the driver needs to define a mapping table as a NULL-terminated
+array of struct acpi_gpio_mapping objects that each contain a name, a pointer
+to an array of line data (struct acpi_gpio_params) objects and the size of that
+array. Each struct acpi_gpio_params object consists of three fields,
+crs_entry_index, line_index, active_low, representing the index of the target
+GpioIo()/GpioInt() resource in _CRS starting from zero, the index of the target
+line in that resource starting from zero, and the active-low flag for that line,
+respectively, in analogy with the _DSD GPIO property format specified above.
+
+For the example Bluetooth device discussed previously the data structures in
+question would look like this:
+
+static const struct acpi_gpio_params reset_gpio = { 1, 1, false };
+static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
+
+static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
+ { "reset-gpio", &reset_gpio, 1 },
+ { "shutdown-gpio", &shutdown_gpio, 1 },
+ { },
+};
+
+Next, the mapping table needs to be passed as the second argument to
+acpi_dev_add_driver_gpios() that will register it with the ACPI device object
+pointed to by its first argument. That should be done in the driver's .probe()
+routine. On removal, the driver should unregister its GPIO mapping table by
+calling acpi_dev_remove_driver_gpios() on the ACPI device object where that
+table was previously registered.
diff --git a/Documentation/acpi/initrd_table_override.txt b/Documentation/acpi/initrd_table_override.txt
new file mode 100644
index 000000000..35c3f5415
--- /dev/null
+++ b/Documentation/acpi/initrd_table_override.txt
@@ -0,0 +1,94 @@
+Overriding ACPI tables via initrd
+=================================
+
+1) Introduction (What is this about)
+2) What is this for
+3) How does it work
+4) References (Where to retrieve userspace tools)
+
+1) What is this about
+---------------------
+
+If the ACPI_INITRD_TABLE_OVERRIDE compile option is true, it is possible to
+override nearly any ACPI table provided by the BIOS with an instrumented,
+modified one.
+
+For a full list of ACPI tables that can be overridden, take a look at
+the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in drivers/acpi/osl.c
+All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
+be overridable, except:
+ - ACPI_SIG_RSDP (has a signature of 6 bytes)
+ - ACPI_SIG_FACS (does not have an ordinary ACPI table header)
+Both could get implemented as well.
+
+
+2) What is this for
+-------------------
+
+Please keep in mind that this is a debug option.
+ACPI tables should not get overridden for productive use.
+If BIOS ACPI tables are overridden the kernel will get tainted with the
+TAINT_OVERRIDDEN_ACPI_TABLE flag.
+Complain to your platform/BIOS vendor if you find a bug which is so sever
+that a workaround is not accepted in the Linux kernel.
+
+Still, it can and should be enabled in any kernel, because:
+ - There is no functional change with not instrumented initrds
+ - It provides a powerful feature to easily debug and test ACPI BIOS table
+ compatibility with the Linux kernel.
+
+
+3) How does it work
+-------------------
+
+# Extract the machine's ACPI tables:
+cd /tmp
+acpidump >acpidump
+acpixtract -a acpidump
+# Disassemble, modify and recompile them:
+iasl -d *.dat
+# For example add this statement into a _PRT (PCI Routing Table) function
+# of the DSDT:
+Store("HELLO WORLD", debug)
+iasl -sa dsdt.dsl
+# Add the raw ACPI tables to an uncompressed cpio archive.
+# They must be put into a /kernel/firmware/acpi directory inside the
+# cpio archive.
+# The uncompressed cpio archive must be the first.
+# Other, typically compressed cpio archives, must be
+# concatenated on top of the uncompressed one.
+mkdir -p kernel/firmware/acpi
+cp dsdt.aml kernel/firmware/acpi
+# A maximum of: #define ACPI_OVERRIDE_TABLES 10
+# tables are currently allowed (see osl.c):
+iasl -sa facp.dsl
+iasl -sa ssdt1.dsl
+cp facp.aml kernel/firmware/acpi
+cp ssdt1.aml kernel/firmware/acpi
+# Create the uncompressed cpio archive and concatenate the original initrd
+# on top:
+find kernel | cpio -H newc --create > /boot/instrumented_initrd
+cat /boot/initrd >>/boot/instrumented_initrd
+# reboot with increased acpi debug level, e.g. boot params:
+acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
+# and check your syslog:
+[ 1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
+[ 1.272091] [ACPI Debug] String [0x0B] "HELLO WORLD"
+
+iasl is able to disassemble and recompile quite a lot different,
+also static ACPI tables.
+
+
+4) Where to retrieve userspace tools
+------------------------------------
+
+iasl and acpixtract are part of Intel's ACPICA project:
+http://acpica.org/
+and should be packaged by distributions (for example in the acpica package
+on SUSE).
+
+acpidump can be found in Len Browns pmtools:
+ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
+This tool is also part of the acpica package on SUSE.
+Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
+/sys/firmware/acpi/tables
diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt
new file mode 100644
index 000000000..5f55373dd
--- /dev/null
+++ b/Documentation/acpi/method-customizing.txt
@@ -0,0 +1,73 @@
+Linux ACPI Custom Control Method How To
+=======================================
+
+Written by Zhang Rui <rui.zhang@intel.com>
+
+
+Linux supports customizing ACPI control methods at runtime.
+
+Users can use this to
+1. override an existing method which may not work correctly,
+ or just for debugging purposes.
+2. insert a completely new method in order to create a missing
+ method such as _OFF, _ON, _STA, _INI, etc.
+For these cases, it is far simpler to dynamically install a single
+control method rather than override the entire DSDT, because kernel
+rebuild/reboot is not needed and test result can be got in minutes.
+
+Note: Only ACPI METHOD can be overridden, any other object types like
+ "Device", "OperationRegion", are not recognized.
+Note: The same ACPI control method can be overridden for many times,
+ and it's always the latest one that used by Linux/kernel.
+Note: To get the ACPI debug object output (Store (AAAA, Debug)),
+ please run "echo 1 > /sys/module/acpi/parameters/aml_debug_output".
+
+1. override an existing method
+ a) get the ACPI table via ACPI sysfs I/F. e.g. to get the DSDT,
+ just run "cat /sys/firmware/acpi/tables/DSDT > /tmp/dsdt.dat"
+ b) disassemble the table by running "iasl -d dsdt.dat".
+ c) rewrite the ASL code of the method and save it in a new file,
+ d) package the new file (psr.asl) to an ACPI table format.
+ Here is an example of a customized \_SB._AC._PSR method,
+
+ DefinitionBlock ("", "SSDT", 1, "", "", 0x20080715)
+ {
+ External (ACON)
+
+ Method (\_SB_.AC._PSR, 0, NotSerialized)
+ {
+ Store ("In AC _PSR", Debug)
+ Return (ACON)
+ }
+ }
+ Note that the full pathname of the method in ACPI namespace
+ should be used.
+ And remember to use "External" to declare external objects.
+ e) assemble the file to generate the AML code of the method.
+ e.g. "iasl psr.asl" (psr.aml is generated as a result)
+ f) mount debugfs by "mount -t debugfs none /sys/kernel/debug"
+ g) override the old method via the debugfs by running
+ "cat /tmp/psr.aml > /sys/kernel/debug/acpi/custom_method"
+
+2. insert a new method
+ This is easier than overriding an existing method.
+ We just need to create the ASL code of the method we want to
+ insert and then follow the step c) ~ g) in section 1.
+
+3. undo your changes
+ The "undo" operation is not supported for a new inserted method
+ right now, i.e. we can not remove a method currently.
+ For an overrided method, in order to undo your changes, please
+ save a copy of the method original ASL code in step c) section 1,
+ and redo step c) ~ g) to override the method with the original one.
+
+
+Note: We can use a kernel with multiple custom ACPI method running,
+ But each individual write to debugfs can implement a SINGLE
+ method override. i.e. if we want to insert/override multiple
+ ACPI methods, we need to redo step c) ~ g) for multiple times.
+
+Note: Be aware that root can mis-use this driver to modify arbitrary
+ memory and gain additional rights, if root's privileges got
+ restricted (for example if root is not allowed to load additional
+ modules after boot).
diff --git a/Documentation/acpi/method-tracing.txt b/Documentation/acpi/method-tracing.txt
new file mode 100644
index 000000000..f6efb1ea5
--- /dev/null
+++ b/Documentation/acpi/method-tracing.txt
@@ -0,0 +1,26 @@
+/sys/module/acpi/parameters/:
+
+trace_method_name
+ The AML method name that the user wants to trace
+
+trace_debug_layer
+ The temporary debug_layer used when tracing the method.
+ Using 0xffffffff by default if it is 0.
+
+trace_debug_level
+ The temporary debug_level used when tracing the method.
+ Using 0x00ffffff by default if it is 0.
+
+trace_state
+ The status of the tracing feature.
+
+ "enabled" means this feature is enabled
+ and the AML method is traced every time it's executed.
+
+ "1" means this feature is enabled and the AML method
+ will only be traced during the next execution.
+
+ "disabled" means this feature is disabled.
+ Users can enable/disable this debug tracing feature by
+ "echo string > /sys/module/acpi/parameters/trace_state".
+ "string" should be one of "enable", "disable" and "1".
diff --git a/Documentation/acpi/namespace.txt b/Documentation/acpi/namespace.txt
new file mode 100644
index 000000000..1860cb386
--- /dev/null
+++ b/Documentation/acpi/namespace.txt
@@ -0,0 +1,388 @@
+ACPI Device Tree - Representation of ACPI Namespace
+
+Copyright (C) 2013, Intel Corporation
+Author: Lv Zheng <lv.zheng@intel.com>
+
+
+Abstract:
+
+The Linux ACPI subsystem converts ACPI namespace objects into a Linux
+device tree under the /sys/devices/LNXSYSTEM:00 and updates it upon
+receiving ACPI hotplug notification events. For each device object in this
+hierarchy there is a corresponding symbolic link in the
+/sys/bus/acpi/devices.
+This document illustrates the structure of the ACPI device tree.
+
+
+Credit:
+
+Thanks for the help from Zhang Rui <rui.zhang@intel.com> and Rafael J.
+Wysocki <rafael.j.wysocki@intel.com>.
+
+
+1. ACPI Definition Blocks
+
+ The ACPI firmware sets up RSDP (Root System Description Pointer) in the
+ system memory address space pointing to the XSDT (Extended System
+ Description Table). The XSDT always points to the FADT (Fixed ACPI
+ Description Table) using its first entry, the data within the FADT
+ includes various fixed-length entries that describe fixed ACPI features
+ of the hardware. The FADT contains a pointer to the DSDT
+ (Differentiated System Descripition Table). The XSDT also contains
+ entries pointing to possibly multiple SSDTs (Secondary System
+ Description Table).
+
+ The DSDT and SSDT data is organized in data structures called definition
+ blocks that contain definitions of various objects, including ACPI
+ control methods, encoded in AML (ACPI Machine Language). The data block
+ of the DSDT along with the contents of SSDTs represents a hierarchical
+ data structure called the ACPI namespace whose topology reflects the
+ structure of the underlying hardware platform.
+
+ The relationships between ACPI System Definition Tables described above
+ are illustrated in the following diagram.
+
+ +---------+ +-------+ +--------+ +------------------------+
+ | RSDP | +->| XSDT | +->| FADT | | +-------------------+ |
+ +---------+ | +-------+ | +--------+ +-|->| DSDT | |
+ | Pointer | | | Entry |-+ | ...... | | | +-------------------+ |
+ +---------+ | +-------+ | X_DSDT |--+ | | Definition Blocks | |
+ | Pointer |-+ | ..... | | ...... | | +-------------------+ |
+ +---------+ +-------+ +--------+ | +-------------------+ |
+ | Entry |------------------|->| SSDT | |
+ +- - - -+ | +-------------------| |
+ | Entry | - - - - - - - -+ | | Definition Blocks | |
+ +- - - -+ | | +-------------------+ |
+ | | +- - - - - - - - - -+ |
+ +-|->| SSDT | |
+ | +-------------------+ |
+ | | Definition Blocks | |
+ | +- - - - - - - - - -+ |
+ +------------------------+
+ |
+ OSPM Loading |
+ \|/
+ +----------------+
+ | ACPI Namespace |
+ +----------------+
+
+ Figure 1. ACPI Definition Blocks
+
+ NOTE: RSDP can also contain a pointer to the RSDT (Root System
+ Description Table). Platforms provide RSDT to enable
+ compatibility with ACPI 1.0 operating systems. The OS is expected
+ to use XSDT, if present.
+
+
+2. Example ACPI Namespace
+
+ All definition blocks are loaded into a single namespace. The namespace
+ is a hierarchy of objects identified by names and paths.
+ The following naming conventions apply to object names in the ACPI
+ namespace:
+ 1. All names are 32 bits long.
+ 2. The first byte of a name must be one of 'A' - 'Z', '_'.
+ 3. Each of the remaining bytes of a name must be one of 'A' - 'Z', '0'
+ - '9', '_'.
+ 4. Names starting with '_' are reserved by the ACPI specification.
+ 5. The '\' symbol represents the root of the namespace (i.e. names
+ prepended with '\' are relative to the namespace root).
+ 6. The '^' symbol represents the parent of the current namespace node
+ (i.e. names prepended with '^' are relative to the parent of the
+ current namespace node).
+
+ The figure below shows an example ACPI namespace.
+
+ +------+
+ | \ | Root
+ +------+
+ |
+ | +------+
+ +-| _PR | Scope(_PR): the processor namespace
+ | +------+
+ | |
+ | | +------+
+ | +-| CPU0 | Processor(CPU0): the first processor
+ | +------+
+ |
+ | +------+
+ +-| _SB | Scope(_SB): the system bus namespace
+ | +------+
+ | |
+ | | +------+
+ | +-| LID0 | Device(LID0); the lid device
+ | | +------+
+ | | |
+ | | | +------+
+ | | +-| _HID | Name(_HID, "PNP0C0D"): the hardware ID
+ | | | +------+
+ | | |
+ | | | +------+
+ | | +-| _STA | Method(_STA): the status control method
+ | | +------+
+ | |
+ | | +------+
+ | +-| PCI0 | Device(PCI0); the PCI root bridge
+ | +------+
+ | |
+ | | +------+
+ | +-| _HID | Name(_HID, "PNP0A08"): the hardware ID
+ | | +------+
+ | |
+ | | +------+
+ | +-| _CID | Name(_CID, "PNP0A03"): the compatible ID
+ | | +------+
+ | |
+ | | +------+
+ | +-| RP03 | Scope(RP03): the PCI0 power scope
+ | | +------+
+ | | |
+ | | | +------+
+ | | +-| PXP3 | PowerResource(PXP3): the PCI0 power resource
+ | | +------+
+ | |
+ | | +------+
+ | +-| GFX0 | Device(GFX0): the graphics adapter
+ | +------+
+ | |
+ | | +------+
+ | +-| _ADR | Name(_ADR, 0x00020000): the PCI bus address
+ | | +------+
+ | |
+ | | +------+
+ | +-| DD01 | Device(DD01): the LCD output device
+ | +------+
+ | |
+ | | +------+
+ | +-| _BCL | Method(_BCL): the backlight control method
+ | +------+
+ |
+ | +------+
+ +-| _TZ | Scope(_TZ): the thermal zone namespace
+ | +------+
+ | |
+ | | +------+
+ | +-| FN00 | PowerResource(FN00): the FAN0 power resource
+ | | +------+
+ | |
+ | | +------+
+ | +-| FAN0 | Device(FAN0): the FAN0 cooling device
+ | | +------+
+ | | |
+ | | | +------+
+ | | +-| _HID | Name(_HID, "PNP0A0B"): the hardware ID
+ | | +------+
+ | |
+ | | +------+
+ | +-| TZ00 | ThermalZone(TZ00); the FAN thermal zone
+ | +------+
+ |
+ | +------+
+ +-| _GPE | Scope(_GPE): the GPE namespace
+ +------+
+
+ Figure 2. Example ACPI Namespace
+
+
+3. Linux ACPI Device Objects
+
+ The Linux kernel's core ACPI subsystem creates struct acpi_device
+ objects for ACPI namespace objects representing devices, power resources
+ processors, thermal zones. Those objects are exported to user space via
+ sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00. The
+ format of their names is <bus_id:instance>, where 'bus_id' refers to the
+ ACPI namespace representation of the given object and 'instance' is used
+ for distinguishing different object of the same 'bus_id' (it is
+ two-digit decimal representation of an unsigned integer).
+
+ The value of 'bus_id' depends on the type of the object whose name it is
+ part of as listed in the table below.
+
+ +---+-----------------+-------+----------+
+ | | Object/Feature | Table | bus_id |
+ +---+-----------------+-------+----------+
+ | N | Root | xSDT | LNXSYSTM |
+ +---+-----------------+-------+----------+
+ | N | Device | xSDT | _HID |
+ +---+-----------------+-------+----------+
+ | N | Processor | xSDT | LNXCPU |
+ +---+-----------------+-------+----------+
+ | N | ThermalZone | xSDT | LNXTHERM |
+ +---+-----------------+-------+----------+
+ | N | PowerResource | xSDT | LNXPOWER |
+ +---+-----------------+-------+----------+
+ | N | Other Devices | xSDT | device |
+ +---+-----------------+-------+----------+
+ | F | PWR_BUTTON | FADT | LNXPWRBN |
+ +---+-----------------+-------+----------+
+ | F | SLP_BUTTON | FADT | LNXSLPBN |
+ +---+-----------------+-------+----------+
+ | M | Video Extension | xSDT | LNXVIDEO |
+ +---+-----------------+-------+----------+
+ | M | ATA Controller | xSDT | LNXIOBAY |
+ +---+-----------------+-------+----------+
+ | M | Docking Station | xSDT | LNXDOCK |
+ +---+-----------------+-------+----------+
+
+ Table 1. ACPI Namespace Objects Mapping
+
+ The following rules apply when creating struct acpi_device objects on
+ the basis of the contents of ACPI System Description Tables (as
+ indicated by the letter in the first column and the notation in the
+ second column of the table above):
+ N:
+ The object's source is an ACPI namespace node (as indicated by the
+ named object's type in the second column). In that case the object's
+ directory in sysfs will contain the 'path' attribute whose value is
+ the full path to the node from the namespace root.
+ F:
+ The struct acpi_device object is created for a fixed hardware
+ feature (as indicated by the fixed feature flag's name in the second
+ column), so its sysfs directory will not contain the 'path'
+ attribute.
+ M:
+ The struct acpi_device object is created for an ACPI namespace node
+ with specific control methods (as indicated by the ACPI defined
+ device's type in the second column). The 'path' attribute containing
+ its namespace path will be present in its sysfs directory. For
+ example, if the _BCL method is present for an ACPI namespace node, a
+ struct acpi_device object with LNXVIDEO 'bus_id' will be created for
+ it.
+
+ The third column of the above table indicates which ACPI System
+ Description Tables contain information used for the creation of the
+ struct acpi_device objects represented by the given row (xSDT means DSDT
+ or SSDT).
+
+ The forth column of the above table indicates the 'bus_id' generation
+ rule of the struct acpi_device object:
+ _HID:
+ _HID in the last column of the table means that the object's bus_id
+ is derived from the _HID/_CID identification objects present under
+ the corresponding ACPI namespace node. The object's sysfs directory
+ will then contain the 'hid' and 'modalias' attributes that can be
+ used to retrieve the _HID and _CIDs of that object.
+ LNXxxxxx:
+ The 'modalias' attribute is also present for struct acpi_device
+ objects having bus_id of the "LNXxxxxx" form (pseudo devices), in
+ which cases it contains the bus_id string itself.
+ device:
+ 'device' in the last column of the table indicates that the object's
+ bus_id cannot be determined from _HID/_CID of the corresponding
+ ACPI namespace node, although that object represents a device (for
+ example, it may be a PCI device with _ADR defined and without _HID
+ or _CID). In that case the string 'device' will be used as the
+ object's bus_id.
+
+
+4. Linux ACPI Physical Device Glue
+
+ ACPI device (i.e. struct acpi_device) objects may be linked to other
+ objects in the Linux' device hierarchy that represent "physical" devices
+ (for example, devices on the PCI bus). If that happens, it means that
+ the ACPI device object is a "companion" of a device otherwise
+ represented in a different way and is used (1) to provide configuration
+ information on that device which cannot be obtained by other means and
+ (2) to do specific things to the device with the help of its ACPI
+ control methods. One ACPI device object may be linked this way to
+ multiple "physical" devices.
+
+ If an ACPI device object is linked to a "physical" device, its sysfs
+ directory contains the "physical_node" symbolic link to the sysfs
+ directory of the target device object. In turn, the target device's
+ sysfs directory will then contain the "firmware_node" symbolic link to
+ the sysfs directory of the companion ACPI device object.
+ The linking mechanism relies on device identification provided by the
+ ACPI namespace. For example, if there's an ACPI namespace object
+ representing a PCI device (i.e. a device object under an ACPI namespace
+ object representing a PCI bridge) whose _ADR returns 0x00020000 and the
+ bus number of the parent PCI bridge is 0, the sysfs directory
+ representing the struct acpi_device object created for that ACPI
+ namespace object will contain the 'physical_node' symbolic link to the
+ /sys/devices/pci0000:00/0000:00:02:0/ sysfs directory of the
+ corresponding PCI device.
+
+ The linking mechanism is generally bus-specific. The core of its
+ implementation is located in the drivers/acpi/glue.c file, but there are
+ complementary parts depending on the bus types in question located
+ elsewhere. For example, the PCI-specific part of it is located in
+ drivers/pci/pci-acpi.c.
+
+
+5. Example Linux ACPI Device Tree
+
+ The sysfs hierarchy of struct acpi_device objects corresponding to the
+ example ACPI namespace illustrated in Figure 2 with the addition of
+ fixed PWR_BUTTON/SLP_BUTTON devices is shown below.
+
+ +--------------+---+-----------------+
+ | LNXSYSTEM:00 | \ | acpi:LNXSYSTEM: |
+ +--------------+---+-----------------+
+ |
+ | +-------------+-----+----------------+
+ +-| LNXPWRBN:00 | N/A | acpi:LNXPWRBN: |
+ | +-------------+-----+----------------+
+ |
+ | +-------------+-----+----------------+
+ +-| LNXSLPBN:00 | N/A | acpi:LNXSLPBN: |
+ | +-------------+-----+----------------+
+ |
+ | +-----------+------------+--------------+
+ +-| LNXCPU:00 | \_PR_.CPU0 | acpi:LNXCPU: |
+ | +-----------+------------+--------------+
+ |
+ | +-------------+-------+----------------+
+ +-| LNXSYBUS:00 | \_SB_ | acpi:LNXSYBUS: |
+ | +-------------+-------+----------------+
+ | |
+ | | +- - - - - - - +- - - - - - +- - - - - - - -+
+ | +-| PNP0C0D:00 | \_SB_.LID0 | acpi:PNP0C0D: |
+ | | +- - - - - - - +- - - - - - +- - - - - - - -+
+ | |
+ | | +------------+------------+-----------------------+
+ | +-| PNP0A08:00 | \_SB_.PCI0 | acpi:PNP0A08:PNP0A03: |
+ | +------------+------------+-----------------------+
+ | |
+ | | +-----------+-----------------+-----+
+ | +-| device:00 | \_SB_.PCI0.RP03 | N/A |
+ | | +-----------+-----------------+-----+
+ | | |
+ | | | +-------------+----------------------+----------------+
+ | | +-| LNXPOWER:00 | \_SB_.PCI0.RP03.PXP3 | acpi:LNXPOWER: |
+ | | +-------------+----------------------+----------------+
+ | |
+ | | +-------------+-----------------+----------------+
+ | +-| LNXVIDEO:00 | \_SB_.PCI0.GFX0 | acpi:LNXVIDEO: |
+ | +-------------+-----------------+----------------+
+ | |
+ | | +-----------+-----------------+-----+
+ | +-| device:01 | \_SB_.PCI0.DD01 | N/A |
+ | +-----------+-----------------+-----+
+ |
+ | +-------------+-------+----------------+
+ +-| LNXSYBUS:01 | \_TZ_ | acpi:LNXSYBUS: |
+ +-------------+-------+----------------+
+ |
+ | +-------------+------------+----------------+
+ +-| LNXPOWER:0a | \_TZ_.FN00 | acpi:LNXPOWER: |
+ | +-------------+------------+----------------+
+ |
+ | +------------+------------+---------------+
+ +-| PNP0C0B:00 | \_TZ_.FAN0 | acpi:PNP0C0B: |
+ | +------------+------------+---------------+
+ |
+ | +-------------+------------+----------------+
+ +-| LNXTHERM:00 | \_TZ_.TZ00 | acpi:LNXTHERM: |
+ +-------------+------------+----------------+
+
+ Figure 3. Example Linux ACPI Device Tree
+
+ NOTE: Each node is represented as "object/path/modalias", where:
+ 1. 'object' is the name of the object's directory in sysfs.
+ 2. 'path' is the ACPI namespace path of the corresponding
+ ACPI namespace object, as returned by the object's 'path'
+ sysfs attribute.
+ 3. 'modalias' is the value of the object's 'modalias' sysfs
+ attribute (as described earlier in this document).
+ NOTE: N/A indicates the device object does not have the 'path' or the
+ 'modalias' attribute.
diff --git a/Documentation/acpi/scan_handlers.txt b/Documentation/acpi/scan_handlers.txt
new file mode 100644
index 000000000..3246ccf15
--- /dev/null
+++ b/Documentation/acpi/scan_handlers.txt
@@ -0,0 +1,77 @@
+ACPI Scan Handlers
+
+Copyright (C) 2012, Intel Corporation
+Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+During system initialization and ACPI-based device hot-add, the ACPI namespace
+is scanned in search of device objects that generally represent various pieces
+of hardware. This causes a struct acpi_device object to be created and
+registered with the driver core for every device object in the ACPI namespace
+and the hierarchy of those struct acpi_device objects reflects the namespace
+layout (i.e. parent device objects in the namespace are represented by parent
+struct acpi_device objects and analogously for their children). Those struct
+acpi_device objects are referred to as "device nodes" in what follows, but they
+should not be confused with struct device_node objects used by the Device Trees
+parsing code (although their role is analogous to the role of those objects).
+
+During ACPI-based device hot-remove device nodes representing pieces of hardware
+being removed are unregistered and deleted.
+
+The core ACPI namespace scanning code in drivers/acpi/scan.c carries out basic
+initialization of device nodes, such as retrieving common configuration
+information from the device objects represented by them and populating them with
+appropriate data, but some of them require additional handling after they have
+been registered. For example, if the given device node represents a PCI host
+bridge, its registration should cause the PCI bus under that bridge to be
+enumerated and PCI devices on that bus to be registered with the driver core.
+Similarly, if the device node represents a PCI interrupt link, it is necessary
+to configure that link so that the kernel can use it.
+
+Those additional configuration tasks usually depend on the type of the hardware
+component represented by the given device node which can be determined on the
+basis of the device node's hardware ID (HID). They are performed by objects
+called ACPI scan handlers represented by the following structure:
+
+struct acpi_scan_handler {
+ const struct acpi_device_id *ids;
+ struct list_head list_node;
+ int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
+ void (*detach)(struct acpi_device *dev);
+};
+
+where ids is the list of IDs of device nodes the given handler is supposed to
+take care of, list_node is the hook to the global list of ACPI scan handlers
+maintained by the ACPI core and the .attach() and .detach() callbacks are
+executed, respectively, after registration of new device nodes and before
+unregistration of device nodes the handler attached to previously.
+
+The namespace scanning function, acpi_bus_scan(), first registers all of the
+device nodes in the given namespace scope with the driver core. Then, it tries
+to match a scan handler against each of them using the ids arrays of the
+available scan handlers. If a matching scan handler is found, its .attach()
+callback is executed for the given device node. If that callback returns 1,
+that means that the handler has claimed the device node and is now responsible
+for carrying out any additional configuration tasks related to it. It also will
+be responsible for preparing the device node for unregistration in that case.
+The device node's handler field is then populated with the address of the scan
+handler that has claimed it.
+
+If the .attach() callback returns 0, it means that the device node is not
+interesting to the given scan handler and may be matched against the next scan
+handler in the list. If it returns a (negative) error code, that means that
+the namespace scan should be terminated due to a serious error. The error code
+returned should then reflect the type of the error.
+
+The namespace trimming function, acpi_bus_trim(), first executes .detach()
+callbacks from the scan handlers of all device nodes in the given namespace
+scope (if they have scan handlers). Next, it unregisters all of the device
+nodes in that scope.
+
+ACPI scan handlers can be added to the list maintained by the ACPI core with the
+help of the acpi_scan_add_handler() function taking a pointer to the new scan
+handler as an argument. The order in which scan handlers are added to the list
+is the order in which they are matched against device nodes during namespace
+scans.
+
+All scan handles must be added to the list before acpi_bus_scan() is run for the
+first time and they cannot be removed from it.
diff --git a/Documentation/acpi/video_extension.txt b/Documentation/acpi/video_extension.txt
new file mode 100644
index 000000000..78b32ac02
--- /dev/null
+++ b/Documentation/acpi/video_extension.txt
@@ -0,0 +1,106 @@
+ACPI video extensions
+~~~~~~~~~~~~~~~~~~~~~
+
+This driver implement the ACPI Extensions For Display Adapters for
+integrated graphics devices on motherboard, as specified in ACPI 2.0
+Specification, Appendix B, allowing to perform some basic control like
+defining the video POST device, retrieving EDID information or to
+setup a video output, etc. Note that this is an ref. implementation
+only. It may or may not work for your integrated video device.
+
+The ACPI video driver does 3 things regarding backlight control:
+
+1 Export a sysfs interface for user space to control backlight level
+
+If the ACPI table has a video device, and acpi_backlight=vendor kernel
+command line is not present, the driver will register a backlight device
+and set the required backlight operation structure for it for the sysfs
+interface control. For every registered class device, there will be a
+directory named acpi_videoX under /sys/class/backlight.
+
+The backlight sysfs interface has a standard definition here:
+Documentation/ABI/stable/sysfs-class-backlight.
+
+And what ACPI video driver does is:
+actual_brightness: on read, control method _BQC will be evaluated to
+get the brightness level the firmware thinks it is at;
+bl_power: not implemented, will set the current brightness instead;
+brightness: on write, control method _BCM will run to set the requested
+brightness level;
+max_brightness: Derived from the _BCL package(see below);
+type: firmware
+
+Note that ACPI video backlight driver will always use index for
+brightness, actual_brightness and max_brightness. So if we have
+the following _BCL package:
+
+Method (_BCL, 0, NotSerialized)
+{
+ Return (Package (0x0C)
+ {
+ 0x64,
+ 0x32,
+ 0x0A,
+ 0x14,
+ 0x1E,
+ 0x28,
+ 0x32,
+ 0x3C,
+ 0x46,
+ 0x50,
+ 0x5A,
+ 0x64
+ })
+}
+
+The first two levels are for when laptop are on AC or on battery and are
+not used by Linux currently. The remaining 10 levels are supported levels
+that we can choose from. The applicable index values are from 0 (that
+corresponds to the 0x0A brightness value) to 9 (that corresponds to the
+0x64 brightness value) inclusive. Each of those index values is regarded
+as a "brightness level" indicator. Thus from the user space perspective
+the range of available brightness levels is from 0 to 9 (max_brightness)
+inclusive.
+
+2 Notify user space about hotkey event
+
+There are generally two cases for hotkey event reporting:
+i) For some laptops, when user presses the hotkey, a scancode will be
+ generated and sent to user space through the input device created by
+ the keyboard driver as a key type input event, with proper remap, the
+ following key code will appear to user space:
+
+ EV_KEY, KEY_BRIGHTNESSUP
+ EV_KEY, KEY_BRIGHTNESSDOWN
+ etc.
+
+For this case, ACPI video driver does not need to do anything(actually,
+it doesn't even know this happened).
+
+ii) For some laptops, the press of the hotkey will not generate the
+ scancode, instead, firmware will notify the video device ACPI node
+ about the event. The event value is defined in the ACPI spec. ACPI
+ video driver will generate an key type input event according to the
+ notify value it received and send the event to user space through the
+ input device it created:
+
+ event keycode
+ 0x86 KEY_BRIGHTNESSUP
+ 0x87 KEY_BRIGHTNESSDOWN
+ etc.
+
+so this would lead to the same effect as case i) now.
+
+Once user space tool receives this event, it can modify the backlight
+level through the sysfs interface.
+
+3 Change backlight level in the kernel
+
+This works for machines covered by case ii) in Section 2. Once the driver
+received a notification, it will set the backlight level accordingly. This does
+not affect the sending of event to user space, they are always sent to user
+space regardless of whether or not the video module controls the backlight level
+directly. This behaviour can be controlled through the brightness_switch_enabled
+module parameter as documented in kernel-parameters.txt. It is recommended to
+disable this behaviour once a GUI environment starts up and wants to have full
+control of the backlight level.
diff --git a/Documentation/aoe/aoe.txt b/Documentation/aoe/aoe.txt
new file mode 100644
index 000000000..c71487d39
--- /dev/null
+++ b/Documentation/aoe/aoe.txt
@@ -0,0 +1,143 @@
+ATA over Ethernet is a network protocol that provides simple access to
+block storage on the LAN.
+
+ http://support.coraid.com/documents/AoEr11.txt
+
+The EtherDrive (R) HOWTO for 2.6 and 3.x kernels is found at ...
+
+ http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO.html
+
+It has many tips and hints! Please see, especially, recommended
+tunings for virtual memory:
+
+ http://support.coraid.com/support/linux/EtherDrive-2.6-HOWTO-5.html#ss5.19
+
+The aoetools are userland programs that are designed to work with this
+driver. The aoetools are on sourceforge.
+
+ http://aoetools.sourceforge.net/
+
+The scripts in this Documentation/aoe directory are intended to
+document the use of the driver and are not necessary if you install
+the aoetools.
+
+
+CREATING DEVICE NODES
+
+ Users of udev should find the block device nodes created
+ automatically, but to create all the necessary device nodes, use the
+ udev configuration rules provided in udev.txt (in this directory).
+
+ There is a udev-install.sh script that shows how to install these
+ rules on your system.
+
+ There is also an autoload script that shows how to edit
+ /etc/modprobe.d/aoe.conf to ensure that the aoe module is loaded when
+ necessary. Preloading the aoe module is preferable to autoloading,
+ however, because AoE discovery takes a few seconds. It can be
+ confusing when an AoE device is not present the first time the a
+ command is run but appears a second later.
+
+USING DEVICE NODES
+
+ "cat /dev/etherd/err" blocks, waiting for error diagnostic output,
+ like any retransmitted packets.
+
+ "echo eth2 eth4 > /dev/etherd/interfaces" tells the aoe driver to
+ limit ATA over Ethernet traffic to eth2 and eth4. AoE traffic from
+ untrusted networks should be ignored as a matter of security. See
+ also the aoe_iflist driver option described below.
+
+ "echo > /dev/etherd/discover" tells the driver to find out what AoE
+ devices are available.
+
+ In the future these character devices may disappear and be replaced
+ by sysfs counterparts. Using the commands in aoetools insulates
+ users from these implementation details.
+
+ The block devices are named like this:
+
+ e{shelf}.{slot}
+ e{shelf}.{slot}p{part}
+
+ ... so that "e0.2" is the third blade from the left (slot 2) in the
+ first shelf (shelf address zero). That's the whole disk. The first
+ partition on that disk would be "e0.2p1".
+
+USING SYSFS
+
+ Each aoe block device in /sys/block has the extra attributes of
+ state, mac, and netif. The state attribute is "up" when the device
+ is ready for I/O and "down" if detected but unusable. The
+ "down,closewait" state shows that the device is still open and
+ cannot come up again until it has been closed.
+
+ The mac attribute is the ethernet address of the remote AoE device.
+ The netif attribute is the network interface on the localhost
+ through which we are communicating with the remote AoE device.
+
+ There is a script in this directory that formats this information in
+ a convenient way. Users with aoetools should use the aoe-stat
+ command.
+
+ root@makki root# sh Documentation/aoe/status.sh
+ e10.0 eth3 up
+ e10.1 eth3 up
+ e10.2 eth3 up
+ e10.3 eth3 up
+ e10.4 eth3 up
+ e10.5 eth3 up
+ e10.6 eth3 up
+ e10.7 eth3 up
+ e10.8 eth3 up
+ e10.9 eth3 up
+ e4.0 eth1 up
+ e4.1 eth1 up
+ e4.2 eth1 up
+ e4.3 eth1 up
+ e4.4 eth1 up
+ e4.5 eth1 up
+ e4.6 eth1 up
+ e4.7 eth1 up
+ e4.8 eth1 up
+ e4.9 eth1 up
+
+ Use /sys/module/aoe/parameters/aoe_iflist (or better, the driver
+ option discussed below) instead of /dev/etherd/interfaces to limit
+ AoE traffic to the network interfaces in the given
+ whitespace-separated list. Unlike the old character device, the
+ sysfs entry can be read from as well as written to.
+
+ It's helpful to trigger discovery after setting the list of allowed
+ interfaces. The aoetools package provides an aoe-discover script
+ for this purpose. You can also directly use the
+ /dev/etherd/discover special file described above.
+
+DRIVER OPTIONS
+
+ There is a boot option for the built-in aoe driver and a
+ corresponding module parameter, aoe_iflist. Without this option,
+ all network interfaces may be used for ATA over Ethernet. Here is a
+ usage example for the module parameter.
+
+ modprobe aoe_iflist="eth1 eth3"
+
+ The aoe_deadsecs module parameter determines the maximum number of
+ seconds that the driver will wait for an AoE device to provide a
+ response to an AoE command. After aoe_deadsecs seconds have
+ elapsed, the AoE device will be marked as "down". A value of zero
+ is supported for testing purposes and makes the aoe driver keep
+ trying AoE commands forever.
+
+ The aoe_maxout module parameter has a default of 128. This is the
+ maximum number of unresponded packets that will be sent to an AoE
+ target at one time.
+
+ The aoe_dyndevs module parameter defaults to 1, meaning that the
+ driver will assign a block device minor number to a discovered AoE
+ target based on the order of its discovery. With dynamic minor
+ device numbers in use, a greater range of AoE shelf and slot
+ addresses can be supported. Users with udev will never have to
+ think about minor numbers. Using aoe_dyndevs=0 allows device nodes
+ to be pre-created using a static minor-number scheme with the
+ aoe-mkshelf script in the aoetools.
diff --git a/Documentation/aoe/autoload.sh b/Documentation/aoe/autoload.sh
new file mode 100644
index 000000000..815dff469
--- /dev/null
+++ b/Documentation/aoe/autoload.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+# set aoe to autoload by installing the
+# aliases in /etc/modprobe.d/
+
+f=/etc/modprobe.d/aoe.conf
+
+if test ! -r $f || test ! -w $f; then
+ echo "cannot configure $f for module autoloading" 1>&2
+ exit 1
+fi
+
+grep major-152 $f >/dev/null
+if [ $? = 1 ]; then
+ echo alias block-major-152 aoe >> $f
+ echo alias char-major-152 aoe >> $f
+fi
+
diff --git a/Documentation/aoe/status.sh b/Documentation/aoe/status.sh
new file mode 100644
index 000000000..eeec7baae
--- /dev/null
+++ b/Documentation/aoe/status.sh
@@ -0,0 +1,30 @@
+#! /bin/sh
+# collate and present sysfs information about AoE storage
+#
+# A more complete version of this script is aoe-stat, in the
+# aoetools.
+
+set -e
+format="%8s\t%8s\t%8s\n"
+me=`basename $0`
+sysd=${sysfs_dir:-/sys}
+
+# printf "$format" device mac netif state
+
+# Suse 9.1 Pro doesn't put /sys in /etc/mtab
+#test -z "`mount | grep sysfs`" && {
+test ! -d "$sysd/block" && {
+ echo "$me Error: sysfs is not mounted" 1>&2
+ exit 1
+}
+
+for d in `ls -d $sysd/block/etherd* 2>/dev/null | grep -v p` end; do
+ # maybe ls comes up empty, so we use "end"
+ test $d = end && continue
+
+ dev=`echo "$d" | sed 's/.*!//'`
+ printf "$format" \
+ "$dev" \
+ "`cat \"$d/netif\"`" \
+ "`cat \"$d/state\"`"
+done | sort
diff --git a/Documentation/aoe/todo.txt b/Documentation/aoe/todo.txt
new file mode 100644
index 000000000..c09dfad4a
--- /dev/null
+++ b/Documentation/aoe/todo.txt
@@ -0,0 +1,14 @@
+There is a potential for deadlock when allocating a struct sk_buff for
+data that needs to be written out to aoe storage. If the data is
+being written from a dirty page in order to free that page, and if
+there are no other pages available, then deadlock may occur when a
+free page is needed for the sk_buff allocation. This situation has
+not been observed, but it would be nice to eliminate any potential for
+deadlock under memory pressure.
+
+Because ATA over Ethernet is not fragmented by the kernel's IP code,
+the destructor member of the struct sk_buff is available to the aoe
+driver. By using a mempool for allocating all but the first few
+sk_buffs, and by registering a destructor, we should be able to
+efficiently allocate sk_buffs without introducing any potential for
+deadlock.
diff --git a/Documentation/aoe/udev-install.sh b/Documentation/aoe/udev-install.sh
new file mode 100644
index 000000000..15e86f58c
--- /dev/null
+++ b/Documentation/aoe/udev-install.sh
@@ -0,0 +1,33 @@
+# install the aoe-specific udev rules from udev.txt into
+# the system's udev configuration
+#
+
+me="`basename $0`"
+
+# find udev.conf, often /etc/udev/udev.conf
+# (or environment can specify where to find udev.conf)
+#
+if test -z "$conf"; then
+ if test -r /etc/udev/udev.conf; then
+ conf=/etc/udev/udev.conf
+ else
+ conf="`find /etc -type f -name udev.conf 2> /dev/null`"
+ if test -z "$conf" || test ! -r "$conf"; then
+ echo "$me Error: no udev.conf found" 1>&2
+ exit 1
+ fi
+ fi
+fi
+
+# find the directory where udev rules are stored, often
+# /etc/udev/rules.d
+#
+rules_d="`sed -n '/^udev_rules=/{ s!udev_rules=!!; s!\"!!g; p; }' $conf`"
+if test -z "$rules_d" ; then
+ rules_d=/etc/udev/rules.d
+fi
+if test ! -d "$rules_d"; then
+ echo "$me Error: cannot find udev rules directory" 1>&2
+ exit 1
+fi
+sh -xc "cp `dirname $0`/udev.txt $rules_d/60-aoe.rules"
diff --git a/Documentation/aoe/udev.txt b/Documentation/aoe/udev.txt
new file mode 100644
index 000000000..1f06daf03
--- /dev/null
+++ b/Documentation/aoe/udev.txt
@@ -0,0 +1,26 @@
+# These rules tell udev what device nodes to create for aoe support.
+# They may be installed along the following lines. Check the section
+# 8 udev manpage to see whether your udev supports SUBSYSTEM, and
+# whether it uses one or two equal signs for SUBSYSTEM and KERNEL.
+#
+# ecashin@makki ~$ su
+# Password:
+# bash# find /etc -type f -name udev.conf
+# /etc/udev/udev.conf
+# bash# grep udev_rules= /etc/udev/udev.conf
+# udev_rules="/etc/udev/rules.d/"
+# bash# ls /etc/udev/rules.d/
+# 10-wacom.rules 50-udev.rules
+# bash# cp /path/to/linux-2.6.xx/Documentation/aoe/udev.txt \
+# /etc/udev/rules.d/60-aoe.rules
+#
+
+# aoe char devices
+SUBSYSTEM=="aoe", KERNEL=="discover", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="err", NAME="etherd/%k", GROUP="disk", MODE="0440"
+SUBSYSTEM=="aoe", KERNEL=="interfaces", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="revalidate", NAME="etherd/%k", GROUP="disk", MODE="0220"
+SUBSYSTEM=="aoe", KERNEL=="flush", NAME="etherd/%k", GROUP="disk", MODE="0220"
+
+# aoe block devices
+KERNEL=="etherd*", GROUP="disk"
diff --git a/Documentation/applying-patches.txt b/Documentation/applying-patches.txt
new file mode 100644
index 000000000..77df55b02
--- /dev/null
+++ b/Documentation/applying-patches.txt
@@ -0,0 +1,454 @@
+
+ Applying Patches To The Linux Kernel
+ ------------------------------------
+
+ Original by: Jesper Juhl, August 2005
+ Last update: 2006-01-05
+
+
+A frequently asked question on the Linux Kernel Mailing List is how to apply
+a patch to the kernel or, more specifically, what base kernel a patch for
+one of the many trees/branches should be applied to. Hopefully this document
+will explain this to you.
+
+In addition to explaining how to apply and revert patches, a brief
+description of the different kernel trees (and examples of how to apply
+their specific patches) is also provided.
+
+
+What is a patch?
+---
+ A patch is a small text document containing a delta of changes between two
+different versions of a source tree. Patches are created with the `diff'
+program.
+To correctly apply a patch you need to know what base it was generated from
+and what new version the patch will change the source tree into. These
+should both be present in the patch file metadata or be possible to deduce
+from the filename.
+
+
+How do I apply or revert a patch?
+---
+ You apply a patch with the `patch' program. The patch program reads a diff
+(or patch) file and makes the changes to the source tree described in it.
+
+Patches for the Linux kernel are generated relative to the parent directory
+holding the kernel source dir.
+
+This means that paths to files inside the patch file contain the name of the
+kernel source directories it was generated against (or some other directory
+names like "a/" and "b/").
+Since this is unlikely to match the name of the kernel source dir on your
+local machine (but is often useful info to see what version an otherwise
+unlabeled patch was generated against) you should change into your kernel
+source directory and then strip the first element of the path from filenames
+in the patch file when applying it (the -p1 argument to `patch' does this).
+
+To revert a previously applied patch, use the -R argument to patch.
+So, if you applied a patch like this:
+ patch -p1 < ../patch-x.y.z
+
+You can revert (undo) it like this:
+ patch -R -p1 < ../patch-x.y.z
+
+
+How do I feed a patch/diff file to `patch'?
+---
+ This (as usual with Linux and other UNIX like operating systems) can be
+done in several different ways.
+In all the examples below I feed the file (in uncompressed form) to patch
+via stdin using the following syntax:
+ patch -p1 < path/to/patch-x.y.z
+
+If you just want to be able to follow the examples below and don't want to
+know of more than one way to use patch, then you can stop reading this
+section here.
+
+Patch can also get the name of the file to use via the -i argument, like
+this:
+ patch -p1 -i path/to/patch-x.y.z
+
+If your patch file is compressed with gzip or bzip2 and you don't want to
+uncompress it before applying it, then you can feed it to patch like this
+instead:
+ zcat path/to/patch-x.y.z.gz | patch -p1
+ bzcat path/to/patch-x.y.z.bz2 | patch -p1
+
+If you wish to uncompress the patch file by hand first before applying it
+(what I assume you've done in the examples below), then you simply run
+gunzip or bunzip2 on the file -- like this:
+ gunzip patch-x.y.z.gz
+ bunzip2 patch-x.y.z.bz2
+
+Which will leave you with a plain text patch-x.y.z file that you can feed to
+patch via stdin or the -i argument, as you prefer.
+
+A few other nice arguments for patch are -s which causes patch to be silent
+except for errors which is nice to prevent errors from scrolling out of the
+screen too fast, and --dry-run which causes patch to just print a listing of
+what would happen, but doesn't actually make any changes. Finally --verbose
+tells patch to print more information about the work being done.
+
+
+Common errors when patching
+---
+ When patch applies a patch file it attempts to verify the sanity of the
+file in different ways.
+Checking that the file looks like a valid patch file and checking the code
+around the bits being modified matches the context provided in the patch are
+just two of the basic sanity checks patch does.
+
+If patch encounters something that doesn't look quite right it has two
+options. It can either refuse to apply the changes and abort or it can try
+to find a way to make the patch apply with a few minor changes.
+
+One example of something that's not 'quite right' that patch will attempt to
+fix up is if all the context matches, the lines being changed match, but the
+line numbers are different. This can happen, for example, if the patch makes
+a change in the middle of the file but for some reasons a few lines have
+been added or removed near the beginning of the file. In that case
+everything looks good it has just moved up or down a bit, and patch will
+usually adjust the line numbers and apply the patch.
+
+Whenever patch applies a patch that it had to modify a bit to make it fit
+it'll tell you about it by saying the patch applied with 'fuzz'.
+You should be wary of such changes since even though patch probably got it
+right it doesn't /always/ get it right, and the result will sometimes be
+wrong.
+
+When patch encounters a change that it can't fix up with fuzz it rejects it
+outright and leaves a file with a .rej extension (a reject file). You can
+read this file to see exactly what change couldn't be applied, so you can
+go fix it up by hand if you wish.
+
+If you don't have any third-party patches applied to your kernel source, but
+only patches from kernel.org and you apply the patches in the correct order,
+and have made no modifications yourself to the source files, then you should
+never see a fuzz or reject message from patch. If you do see such messages
+anyway, then there's a high risk that either your local source tree or the
+patch file is corrupted in some way. In that case you should probably try
+re-downloading the patch and if things are still not OK then you'd be advised
+to start with a fresh tree downloaded in full from kernel.org.
+
+Let's look a bit more at some of the messages patch can produce.
+
+If patch stops and presents a "File to patch:" prompt, then patch could not
+find a file to be patched. Most likely you forgot to specify -p1 or you are
+in the wrong directory. Less often, you'll find patches that need to be
+applied with -p0 instead of -p1 (reading the patch file should reveal if
+this is the case -- if so, then this is an error by the person who created
+the patch but is not fatal).
+
+If you get "Hunk #2 succeeded at 1887 with fuzz 2 (offset 7 lines)." or a
+message similar to that, then it means that patch had to adjust the location
+of the change (in this example it needed to move 7 lines from where it
+expected to make the change to make it fit).
+The resulting file may or may not be OK, depending on the reason the file
+was different than expected.
+This often happens if you try to apply a patch that was generated against a
+different kernel version than the one you are trying to patch.
+
+If you get a message like "Hunk #3 FAILED at 2387.", then it means that the
+patch could not be applied correctly and the patch program was unable to
+fuzz its way through. This will generate a .rej file with the change that
+caused the patch to fail and also a .orig file showing you the original
+content that couldn't be changed.
+
+If you get "Reversed (or previously applied) patch detected! Assume -R? [n]"
+then patch detected that the change contained in the patch seems to have
+already been made.
+If you actually did apply this patch previously and you just re-applied it
+in error, then just say [n]o and abort this patch. If you applied this patch
+previously and actually intended to revert it, but forgot to specify -R,
+then you can say [y]es here to make patch revert it for you.
+This can also happen if the creator of the patch reversed the source and
+destination directories when creating the patch, and in that case reverting
+the patch will in fact apply it.
+
+A message similar to "patch: **** unexpected end of file in patch" or "patch
+unexpectedly ends in middle of line" means that patch could make no sense of
+the file you fed to it. Either your download is broken, you tried to feed
+patch a compressed patch file without uncompressing it first, or the patch
+file that you are using has been mangled by a mail client or mail transfer
+agent along the way somewhere, e.g., by splitting a long line into two lines.
+Often these warnings can easily be fixed by joining (concatenating) the
+two lines that had been split.
+
+As I already mentioned above, these errors should never happen if you apply
+a patch from kernel.org to the correct version of an unmodified source tree.
+So if you get these errors with kernel.org patches then you should probably
+assume that either your patch file or your tree is broken and I'd advise you
+to start over with a fresh download of a full kernel tree and the patch you
+wish to apply.
+
+
+Are there any alternatives to `patch'?
+---
+ Yes there are alternatives.
+
+ You can use the `interdiff' program (http://cyberelk.net/tim/patchutils/) to
+generate a patch representing the differences between two patches and then
+apply the result.
+This will let you move from something like 2.6.12.2 to 2.6.12.3 in a single
+step. The -z flag to interdiff will even let you feed it patches in gzip or
+bzip2 compressed form directly without the use of zcat or bzcat or manual
+decompression.
+
+Here's how you'd go from 2.6.12.2 to 2.6.12.3 in a single step:
+ interdiff -z ../patch-2.6.12.2.bz2 ../patch-2.6.12.3.gz | patch -p1
+
+Although interdiff may save you a step or two you are generally advised to
+do the additional steps since interdiff can get things wrong in some cases.
+
+ Another alternative is `ketchup', which is a python script for automatic
+downloading and applying of patches (http://www.selenic.com/ketchup/).
+
+ Other nice tools are diffstat, which shows a summary of changes made by a
+patch; lsdiff, which displays a short listing of affected files in a patch
+file, along with (optionally) the line numbers of the start of each patch;
+and grepdiff, which displays a list of the files modified by a patch where
+the patch contains a given regular expression.
+
+
+Where can I download the patches?
+---
+ The patches are available at http://kernel.org/
+Most recent patches are linked from the front page, but they also have
+specific homes.
+
+The 2.6.x.y (-stable) and 2.6.x patches live at
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
+
+The -rc patches live at
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/testing/
+
+The -git patches live at
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/snapshots/
+
+The -mm kernels live at
+ ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/
+
+In place of ftp.kernel.org you can use ftp.cc.kernel.org, where cc is a
+country code. This way you'll be downloading from a mirror site that's most
+likely geographically closer to you, resulting in faster downloads for you,
+less bandwidth used globally and less load on the main kernel.org servers --
+these are good things, so do use mirrors when possible.
+
+
+The 2.6.x kernels
+---
+ These are the base stable releases released by Linus. The highest numbered
+release is the most recent.
+
+If regressions or other serious flaws are found, then a -stable fix patch
+will be released (see below) on top of this base. Once a new 2.6.x base
+kernel is released, a patch is made available that is a delta between the
+previous 2.6.x kernel and the new one.
+
+To apply a patch moving from 2.6.11 to 2.6.12, you'd do the following (note
+that such patches do *NOT* apply on top of 2.6.x.y kernels but on top of the
+base 2.6.x kernel -- if you need to move from 2.6.x.y to 2.6.x+1 you need to
+first revert the 2.6.x.y patch).
+
+Here are some examples:
+
+# moving from 2.6.11 to 2.6.12
+$ cd ~/linux-2.6.11 # change to kernel source dir
+$ patch -p1 < ../patch-2.6.12 # apply the 2.6.12 patch
+$ cd ..
+$ mv linux-2.6.11 linux-2.6.12 # rename source dir
+
+# moving from 2.6.11.1 to 2.6.12
+$ cd ~/linux-2.6.11.1 # change to kernel source dir
+$ patch -p1 -R < ../patch-2.6.11.1 # revert the 2.6.11.1 patch
+ # source dir is now 2.6.11
+$ patch -p1 < ../patch-2.6.12 # apply new 2.6.12 patch
+$ cd ..
+$ mv linux-2.6.11.1 linux-2.6.12 # rename source dir
+
+
+The 2.6.x.y kernels
+---
+ Kernels with 4-digit versions are -stable kernels. They contain small(ish)
+critical fixes for security problems or significant regressions discovered
+in a given 2.6.x kernel.
+
+This is the recommended branch for users who want the most recent stable
+kernel and are not interested in helping test development/experimental
+versions.
+
+If no 2.6.x.y kernel is available, then the highest numbered 2.6.x kernel is
+the current stable kernel.
+
+ note: the -stable team usually do make incremental patches available as well
+ as patches against the latest mainline release, but I only cover the
+ non-incremental ones below. The incremental ones can be found at
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/incr/
+
+These patches are not incremental, meaning that for example the 2.6.12.3
+patch does not apply on top of the 2.6.12.2 kernel source, but rather on top
+of the base 2.6.12 kernel source .
+So, in order to apply the 2.6.12.3 patch to your existing 2.6.12.2 kernel
+source you have to first back out the 2.6.12.2 patch (so you are left with a
+base 2.6.12 kernel source) and then apply the new 2.6.12.3 patch.
+
+Here's a small example:
+
+$ cd ~/linux-2.6.12.2 # change into the kernel source dir
+$ patch -p1 -R < ../patch-2.6.12.2 # revert the 2.6.12.2 patch
+$ patch -p1 < ../patch-2.6.12.3 # apply the new 2.6.12.3 patch
+$ cd ..
+$ mv linux-2.6.12.2 linux-2.6.12.3 # rename the kernel source dir
+
+
+The -rc kernels
+---
+ These are release-candidate kernels. These are development kernels released
+by Linus whenever he deems the current git (the kernel's source management
+tool) tree to be in a reasonably sane state adequate for testing.
+
+These kernels are not stable and you should expect occasional breakage if
+you intend to run them. This is however the most stable of the main
+development branches and is also what will eventually turn into the next
+stable kernel, so it is important that it be tested by as many people as
+possible.
+
+This is a good branch to run for people who want to help out testing
+development kernels but do not want to run some of the really experimental
+stuff (such people should see the sections about -git and -mm kernels below).
+
+The -rc patches are not incremental, they apply to a base 2.6.x kernel, just
+like the 2.6.x.y patches described above. The kernel version before the -rcN
+suffix denotes the version of the kernel that this -rc kernel will eventually
+turn into.
+So, 2.6.13-rc5 means that this is the fifth release candidate for the 2.6.13
+kernel and the patch should be applied on top of the 2.6.12 kernel source.
+
+Here are 3 examples of how to apply these patches:
+
+# first an example of moving from 2.6.12 to 2.6.13-rc3
+$ cd ~/linux-2.6.12 # change into the 2.6.12 source dir
+$ patch -p1 < ../patch-2.6.13-rc3 # apply the 2.6.13-rc3 patch
+$ cd ..
+$ mv linux-2.6.12 linux-2.6.13-rc3 # rename the source dir
+
+# now let's move from 2.6.13-rc3 to 2.6.13-rc5
+$ cd ~/linux-2.6.13-rc3 # change into the 2.6.13-rc3 dir
+$ patch -p1 -R < ../patch-2.6.13-rc3 # revert the 2.6.13-rc3 patch
+$ patch -p1 < ../patch-2.6.13-rc5 # apply the new 2.6.13-rc5 patch
+$ cd ..
+$ mv linux-2.6.13-rc3 linux-2.6.13-rc5 # rename the source dir
+
+# finally let's try and move from 2.6.12.3 to 2.6.13-rc5
+$ cd ~/linux-2.6.12.3 # change to the kernel source dir
+$ patch -p1 -R < ../patch-2.6.12.3 # revert the 2.6.12.3 patch
+$ patch -p1 < ../patch-2.6.13-rc5 # apply new 2.6.13-rc5 patch
+$ cd ..
+$ mv linux-2.6.12.3 linux-2.6.13-rc5 # rename the kernel source dir
+
+
+The -git kernels
+---
+ These are daily snapshots of Linus' kernel tree (managed in a git
+repository, hence the name).
+
+These patches are usually released daily and represent the current state of
+Linus's tree. They are more experimental than -rc kernels since they are
+generated automatically without even a cursory glance to see if they are
+sane.
+
+-git patches are not incremental and apply either to a base 2.6.x kernel or
+a base 2.6.x-rc kernel -- you can see which from their name.
+A patch named 2.6.12-git1 applies to the 2.6.12 kernel source and a patch
+named 2.6.13-rc3-git2 applies to the source of the 2.6.13-rc3 kernel.
+
+Here are some examples of how to apply these patches:
+
+# moving from 2.6.12 to 2.6.12-git1
+$ cd ~/linux-2.6.12 # change to the kernel source dir
+$ patch -p1 < ../patch-2.6.12-git1 # apply the 2.6.12-git1 patch
+$ cd ..
+$ mv linux-2.6.12 linux-2.6.12-git1 # rename the kernel source dir
+
+# moving from 2.6.12-git1 to 2.6.13-rc2-git3
+$ cd ~/linux-2.6.12-git1 # change to the kernel source dir
+$ patch -p1 -R < ../patch-2.6.12-git1 # revert the 2.6.12-git1 patch
+ # we now have a 2.6.12 kernel
+$ patch -p1 < ../patch-2.6.13-rc2 # apply the 2.6.13-rc2 patch
+ # the kernel is now 2.6.13-rc2
+$ patch -p1 < ../patch-2.6.13-rc2-git3 # apply the 2.6.13-rc2-git3 patch
+ # the kernel is now 2.6.13-rc2-git3
+$ cd ..
+$ mv linux-2.6.12-git1 linux-2.6.13-rc2-git3 # rename source dir
+
+
+The -mm kernels
+---
+ These are experimental kernels released by Andrew Morton.
+
+The -mm tree serves as a sort of proving ground for new features and other
+experimental patches.
+Once a patch has proved its worth in -mm for a while Andrew pushes it on to
+Linus for inclusion in mainline.
+
+Although it's encouraged that patches flow to Linus via the -mm tree, this
+is not always enforced.
+Subsystem maintainers (or individuals) sometimes push their patches directly
+to Linus, even though (or after) they have been merged and tested in -mm (or
+sometimes even without prior testing in -mm).
+
+You should generally strive to get your patches into mainline via -mm to
+ensure maximum testing.
+
+This branch is in constant flux and contains many experimental features, a
+lot of debugging patches not appropriate for mainline etc., and is the most
+experimental of the branches described in this document.
+
+These kernels are not appropriate for use on systems that are supposed to be
+stable and they are more risky to run than any of the other branches (make
+sure you have up-to-date backups -- that goes for any experimental kernel but
+even more so for -mm kernels).
+
+These kernels in addition to all the other experimental patches they contain
+usually also contain any changes in the mainline -git kernels available at
+the time of release.
+
+Testing of -mm kernels is greatly appreciated since the whole point of the
+tree is to weed out regressions, crashes, data corruption bugs, build
+breakage (and any other bug in general) before changes are merged into the
+more stable mainline Linus tree.
+But testers of -mm should be aware that breakage in this tree is more common
+than in any other tree.
+
+The -mm kernels are not released on a fixed schedule, but usually a few -mm
+kernels are released in between each -rc kernel (1 to 3 is common).
+The -mm kernels apply to either a base 2.6.x kernel (when no -rc kernels
+have been released yet) or to a Linus -rc kernel.
+
+Here are some examples of applying the -mm patches:
+
+# moving from 2.6.12 to 2.6.12-mm1
+$ cd ~/linux-2.6.12 # change to the 2.6.12 source dir
+$ patch -p1 < ../2.6.12-mm1 # apply the 2.6.12-mm1 patch
+$ cd ..
+$ mv linux-2.6.12 linux-2.6.12-mm1 # rename the source appropriately
+
+# moving from 2.6.12-mm1 to 2.6.13-rc3-mm3
+$ cd ~/linux-2.6.12-mm1
+$ patch -p1 -R < ../2.6.12-mm1 # revert the 2.6.12-mm1 patch
+ # we now have a 2.6.12 source
+$ patch -p1 < ../patch-2.6.13-rc3 # apply the 2.6.13-rc3 patch
+ # we now have a 2.6.13-rc3 source
+$ patch -p1 < ../2.6.13-rc3-mm3 # apply the 2.6.13-rc3-mm3 patch
+$ cd ..
+$ mv linux-2.6.12-mm1 linux-2.6.13-rc3-mm3 # rename the source dir
+
+
+This concludes this list of explanations of the various kernel trees.
+I hope you are now clear on how to apply the various patches and help testing
+the kernel.
+
+Thank you's to Randy Dunlap, Rolf Eike Beer, Linus Torvalds, Bodo Eggert,
+Johannes Stezenbach, Grant Coady, Pavel Machek and others that I may have
+forgotten for their reviews and contributions to this document.
+
diff --git a/Documentation/arm/00-INDEX b/Documentation/arm/00-INDEX
new file mode 100644
index 000000000..dea011c8d
--- /dev/null
+++ b/Documentation/arm/00-INDEX
@@ -0,0 +1,52 @@
+00-INDEX
+ - this file
+Booting
+ - requirements for booting
+CCN.txt
+ - Cache Coherent Network ring-bus and perf PMU driver.
+Interrupts
+ - ARM Interrupt subsystem documentation
+IXP4xx
+ - Intel IXP4xx Network processor.
+Makefile
+ - Build sourcefiles as part of the Documentation-build for arm
+Netwinder
+ - Netwinder specific documentation
+Porting
+ - Symbol definitions for porting Linux to a new ARM machine.
+Setup
+ - Kernel initialization parameters on ARM Linux
+README
+ - General ARM documentation
+SA1100/
+ - SA1100 documentation
+Samsung-S3C24XX/
+ - S3C24XX ARM Linux Overview
+SPEAr/
+ - ST SPEAr platform Linux Overview
+VFP/
+ - Release notes for Linux Kernel Vector Floating Point support code
+cluster-pm-race-avoidance.txt
+ - Algorithm for CPU and Cluster setup/teardown
+empeg/
+ - Ltd's Empeg MP3 Car Audio Player
+firmware.txt
+ - Secure firmware registration and calling.
+kernel_mode_neon.txt
+ - How to use NEON instructions in kernel mode
+kernel_user_helpers.txt
+ - Helper functions in kernel space made available for userspace.
+mem_alignment
+ - alignment abort handler documentation
+memory.txt
+ - description of the virtual memory layout
+nwfpe/
+ - NWFPE floating point emulator documentation
+swp_emulation
+ - SWP/SWPB emulation handler/logging description
+tcm.txt
+ - ARM Tightly Coupled Memory
+uefi.txt
+ - [U]EFI configuration and runtime services documentation
+vlocks.txt
+ - Voting locks, low-level mechanism relying on memory system atomic writes.
diff --git a/Documentation/arm/Atmel/README b/Documentation/arm/Atmel/README
new file mode 100644
index 000000000..c53a19b4a
--- /dev/null
+++ b/Documentation/arm/Atmel/README
@@ -0,0 +1,124 @@
+ARM Atmel SoCs (aka AT91)
+=========================
+
+
+Introduction
+------------
+This document gives useful information about the ARM Atmel SoCs that are
+currently supported in Linux Mainline (you know, the one on kernel.org).
+
+It is important to note that the Atmel | SMART ARM-based MPU product line is
+historically named "AT91" or "at91" throughout the Linux kernel development
+process even if this product prefix has completely disappeared from the
+official Atmel product name. Anyway, files, directories, git trees,
+git branches/tags and email subject always contain this "at91" sub-string.
+
+
+AT91 SoCs
+---------
+Documentation and detailled datasheet for each product are available on
+the Atmel website: http://www.atmel.com.
+
+ Flavors:
+ * ARM 920 based SoC
+ - at91rm9200
+ + Datasheet
+ http://www.atmel.com/Images/doc1768.pdf
+
+ * ARM 926 based SoCs
+ - at91sam9260
+ + Datasheet
+ http://www.atmel.com/Images/doc6221.pdf
+
+ - at91sam9xe
+ + Datasheet
+ http://www.atmel.com/Images/Atmel-6254-32-bit-ARM926EJ-S-Embedded-Microprocessor-SAM9XE_Datasheet.pdf
+
+ - at91sam9261
+ + Datasheet
+ http://www.atmel.com/Images/doc6062.pdf
+
+ - at91sam9263
+ + Datasheet
+ http://www.atmel.com/Images/Atmel_6249_32-bit-ARM926EJ-S-Microcontroller_SAM9263_Datasheet.pdf
+
+ - at91sam9rl
+ + Datasheet
+ http://www.atmel.com/Images/doc6289.pdf
+
+ - at91sam9g20
+ + Datasheet
+ http://www.atmel.com/Images/doc6384.pdf
+
+ - at91sam9g45 family
+ - at91sam9g45
+ - at91sam9g46
+ - at91sam9m10
+ - at91sam9m11 (device superset)
+ + Datasheet
+ http://www.atmel.com/Images/Atmel-6437-32-bit-ARM926-Embedded-Microprocessor-SAM9M11_Datasheet.pdf
+
+ - at91sam9x5 family (aka "The 5 series")
+ - at91sam9g15
+ - at91sam9g25
+ - at91sam9g35
+ - at91sam9x25
+ - at91sam9x35
+ + Datasheet (can be considered as covering the whole family)
+ http://www.atmel.com/Images/Atmel_11055_32-bit-ARM926EJ-S-Microcontroller_SAM9X35_Datasheet.pdf
+
+ - at91sam9n12
+ + Datasheet
+ http://www.atmel.com/Images/Atmel_11063_32-bit-ARM926EJ-S-Microcontroller_SAM9N12CN11CN12_Datasheet.pdf
+
+ * ARM Cortex-A5 based SoCs
+ - sama5d3 family
+ - sama5d31
+ - sama5d33
+ - sama5d34
+ - sama5d35
+ - sama5d36 (device superset)
+ + Datasheet
+ http://www.atmel.com/Images/Atmel-11121-32-bit-Cortex-A5-Microcontroller-SAMA5D3_Datasheet.pdf
+
+ * ARM Cortex-A5 + NEON based SoCs
+ - sama5d4 family
+ - sama5d41
+ - sama5d42
+ - sama5d43
+ - sama5d44 (device superset)
+ + Datasheet
+ http://www.atmel.com/Images/Atmel-11238-32-bit-Cortex-A5-Microcontroller-SAMA5D4_Datasheet.pdf
+
+
+Linux kernel information
+------------------------
+Linux kernel mach directory: arch/arm/mach-at91
+MAINTAINERS entry is: "ARM/ATMEL AT91RM9200 AND AT91SAM ARM ARCHITECTURES"
+
+
+Device Tree for AT91 SoCs and boards
+------------------------------------
+All AT91 SoCs are converted to Device Tree. Since Linux 3.19, these products
+must use this method to boot the Linux kernel.
+
+Work In Progress statement:
+Device Tree files and Device Tree bindings that apply to AT91 SoCs and boards are
+considered as "Unstable". To be completely clear, any at91 binding can change at
+any time. So, be sure to use a Device Tree Binary and a Kernel Image generated from
+the same source tree.
+Please refer to the Documentation/devicetree/bindings/ABI.txt file for a
+definition of a "Stable" binding/ABI.
+This statement will be removed by AT91 MAINTAINERS when appropriate.
+
+Naming conventions and best practice:
+- SoCs Device Tree Source Include files are named after the official name of
+ the product (at91sam9g20.dtsi or sama5d33.dtsi for instance).
+- Device Tree Source Include files (.dtsi) are used to collect common nodes that can be
+ shared across SoCs or boards (sama5d3.dtsi or at91sam9x5cm.dtsi for instance).
+ When collecting nodes for a particular peripheral or topic, the identifier have to
+ be placed at the end of the file name, separated with a "_" (at91sam9x5_can.dtsi
+ or sama5d3_gmac.dtsi for example).
+- board Device Tree Source files (.dts) are prefixed by the string "at91-" so
+ that they can be identified easily. Note that some files are historical exceptions
+ to this rule (sama5d3[13456]ek.dts, usb_a9g20.dts or animeo_ip.dts for example).
diff --git a/Documentation/arm/Booting b/Documentation/arm/Booting
new file mode 100644
index 000000000..83c1df2fc
--- /dev/null
+++ b/Documentation/arm/Booting
@@ -0,0 +1,218 @@
+ Booting ARM Linux
+ =================
+
+Author: Russell King
+Date : 18 May 2002
+
+The following documentation is relevant to 2.4.18-rmk6 and beyond.
+
+In order to boot ARM Linux, you require a boot loader, which is a small
+program that runs before the main kernel. The boot loader is expected
+to initialise various devices, and eventually call the Linux kernel,
+passing information to the kernel.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM.
+2. Initialise one serial port.
+3. Detect the machine type.
+4. Setup the kernel tagged list.
+5. Load initramfs.
+6. Call the kernel image.
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Existing boot loaders: MANDATORY
+New boot loaders: MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system. It performs
+this in a machine dependent manner. (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+
+2. Initialise one serial port
+-----------------------------
+
+Existing boot loaders: OPTIONAL, RECOMMENDED
+New boot loaders: OPTIONAL, RECOMMENDED
+
+The boot loader should initialise and enable one serial port on the
+target. This allows the kernel serial driver to automatically detect
+which serial port it should use for the kernel console (generally
+used for debugging purposes, or communication with the target.)
+
+As an alternative, the boot loader can pass the relevant 'console='
+option to the kernel via the tagged lists specifying the port, and
+serial format options as described in
+
+ Documentation/kernel-parameters.txt.
+
+
+3. Detect the machine type
+--------------------------
+
+Existing boot loaders: OPTIONAL
+New boot loaders: MANDATORY except for DT-only platforms
+
+The boot loader should detect the machine type its running on by some
+method. Whether this is a hard coded value or some algorithm that
+looks at the connected hardware is beyond the scope of this document.
+The boot loader must ultimately be able to provide a MACH_TYPE_xxx
+value to the kernel. (see linux/arch/arm/tools/mach-types). This
+should be passed to the kernel in register r1.
+
+For DT-only platforms, the machine type will be determined by device
+tree. set the machine type to all ones (~0). This is not strictly
+necessary, but assures that it will not match any existing types.
+
+4. Setup boot data
+------------------
+
+Existing boot loaders: OPTIONAL, HIGHLY RECOMMENDED
+New boot loaders: MANDATORY
+
+The boot loader must provide either a tagged list or a dtb image for
+passing configuration data to the kernel. The physical address of the
+boot data is passed to the kernel in register r2.
+
+4a. Setup the kernel tagged list
+--------------------------------
+
+The boot loader must create and initialise the kernel tagged list.
+A valid tagged list starts with ATAG_CORE and ends with ATAG_NONE.
+The ATAG_CORE tag may or may not be empty. An empty ATAG_CORE tag
+has the size field set to '2' (0x00000002). The ATAG_NONE must set
+the size field to zero.
+
+Any number of tags can be placed in the list. It is undefined
+whether a repeated tag appends to the information carried by the
+previous tag, or whether it replaces the information in its
+entirety; some tags behave as the former, others the latter.
+
+The boot loader must pass at a minimum the size and location of
+the system memory, and root filesystem location. Therefore, the
+minimum tagged list should look:
+
+ +-----------+
+base -> | ATAG_CORE | |
+ +-----------+ |
+ | ATAG_MEM | | increasing address
+ +-----------+ |
+ | ATAG_NONE | |
+ +-----------+ v
+
+The tagged list should be stored in system RAM.
+
+The tagged list must be placed in a region of memory where neither
+the kernel decompressor nor initrd 'bootp' program will overwrite
+it. The recommended placement is in the first 16KiB of RAM.
+
+4b. Setup the device tree
+-------------------------
+
+The boot loader must load a device tree image (dtb) into system ram
+at a 64bit aligned address and initialize it with the boot data. The
+dtb format is documented in Documentation/devicetree/booting-without-of.txt.
+The kernel will look for the dtb magic value of 0xd00dfeed at the dtb
+physical address to determine if a dtb has been passed instead of a
+tagged list.
+
+The boot loader must pass at a minimum the size and location of the
+system memory, and the root filesystem location. The dtb must be
+placed in a region of memory where the kernel decompressor will not
+overwrite it, whilst remaining within the region which will be covered
+by the kernel's low-memory mapping.
+
+A safe location is just above the 128MiB boundary from start of RAM.
+
+5. Load initramfs.
+------------------
+
+Existing boot loaders: OPTIONAL
+New boot loaders: OPTIONAL
+
+If an initramfs is in use then, as with the dtb, it must be placed in
+a region of memory where the kernel decompressor will not overwrite it
+while also with the region which will be covered by the kernel's
+low-memory mapping.
+
+A safe location is just above the device tree blob which itself will
+be loaded just above the 128MiB boundary from the start of RAM as
+recommended above.
+
+6. Calling the kernel image
+---------------------------
+
+Existing boot loaders: MANDATORY
+New boot loaders: MANDATORY
+
+There are two options for calling the kernel zImage. If the zImage
+is stored in flash, and is linked correctly to be run from flash,
+then it is legal for the boot loader to call the zImage in flash
+directly.
+
+The zImage may also be placed in system RAM and called there. The
+kernel should be placed in the first 128MiB of RAM. It is recommended
+that it is loaded above 32MiB in order to avoid the need to relocate
+prior to decompression, which will make the boot process slightly
+faster.
+
+When booting a raw (non-zImage) kernel the constraints are tighter.
+In this case the kernel must be loaded at an offset into system equal
+to TEXT_OFFSET - PAGE_OFFSET.
+
+In any case, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+ corrupted by bogus network packets or disk data. This will save
+ you many hours of debug.
+
+- CPU register settings
+ r0 = 0,
+ r1 = machine type number discovered in (3) above.
+ r2 = physical address of tagged list in system RAM, or
+ physical address of device tree block (dtb) in system RAM
+
+- CPU mode
+ All forms of interrupts must be disabled (IRQs and FIQs)
+
+ For CPUs which do not include the ARM virtualization extensions, the
+ CPU must be in SVC mode. (A special exception exists for Angel)
+
+ CPUs which include support for the virtualization extensions can be
+ entered in HYP mode in order to enable the kernel to make full use of
+ these extensions. This is the recommended boot method for such CPUs,
+ unless the virtualisations are already in use by a pre-installed
+ hypervisor.
+
+ If the kernel is not entered in HYP mode for any reason, it must be
+ entered in SVC mode.
+
+- Caches, MMUs
+ The MMU must be off.
+ Instruction cache may be on or off.
+ Data cache must be off.
+
+ If the kernel is entered in HYP mode, the above requirements apply to
+ the HYP mode configuration in addition to the ordinary PL1 (privileged
+ kernel modes) configuration. In addition, all traps into the
+ hypervisor must be disabled, and PL1 access must be granted for all
+ peripherals and CPU resources for which this is architecturally
+ possible. Except for entering in HYP mode, the system configuration
+ should be such that a kernel which does not include support for the
+ virtualization extensions can boot correctly without extra help.
+
+- The boot loader is expected to call the kernel image by jumping
+ directly to the first instruction of the kernel image.
+
+ On CPUs supporting the ARM instruction set, the entry must be
+ made in ARM state, even for a Thumb-2 kernel.
+
+ On CPUs supporting only the Thumb instruction set such as
+ Cortex-M class CPUs, the entry must be made in Thumb state.
diff --git a/Documentation/arm/CCN.txt b/Documentation/arm/CCN.txt
new file mode 100644
index 000000000..0632b3aad
--- /dev/null
+++ b/Documentation/arm/CCN.txt
@@ -0,0 +1,52 @@
+ARM Cache Coherent Network
+==========================
+
+CCN-504 is a ring-bus interconnect consisting of 11 crosspoints
+(XPs), with each crosspoint supporting up to two device ports,
+so nodes (devices) 0 and 1 are connected to crosspoint 0,
+nodes 2 and 3 to crosspoint 1 etc.
+
+PMU (perf) driver
+-----------------
+
+The CCN driver registers a perf PMU driver, which provides
+description of available events and configuration options
+in sysfs, see /sys/bus/event_source/devices/ccn*.
+
+The "format" directory describes format of the config, config1
+and config2 fields of the perf_event_attr structure. The "events"
+directory provides configuration templates for all documented
+events, that can be used with perf tool. For example "xp_valid_flit"
+is an equivalent of "type=0x8,event=0x4". Other parameters must be
+explicitly specified. For events originating from device, "node"
+defines its index. All crosspoint events require "xp" (index),
+"port" (device port number) and "vc" (virtual channel ID) and
+"dir" (direction). Watchpoints (special "event" value 0xfe) also
+require comparator values ("cmp_l" and "cmp_h") and "mask", being
+index of the comparator mask.
+
+Masks are defined separately from the event description
+(due to limited number of the config values) in the "cmp_mask"
+directory, with first 8 configurable by user and additional
+4 hardcoded for the most frequent use cases.
+
+Cycle counter is described by a "type" value 0xff and does
+not require any other settings.
+
+Example of perf tool use:
+
+/ # perf list | grep ccn
+ ccn/cycles/ [Kernel PMU event]
+<...>
+ ccn/xp_valid_flit/ [Kernel PMU event]
+<...>
+
+/ # perf stat -C 0 -e ccn/cycles/,ccn/xp_valid_flit,xp=1,port=0,vc=1,dir=1/ \
+ sleep 1
+
+The driver does not support sampling, therefore "perf record" will
+not work. Also notice that only single cpu is being selected
+("-C 0") - this is because perf framework does not support
+"non-CPU related" counters (yet?) so system-wide session ("-a")
+would try (and in most cases fail) to set up the same event
+per each CPU.
diff --git a/Documentation/arm/IXP4xx b/Documentation/arm/IXP4xx
new file mode 100644
index 000000000..e7331d5b1
--- /dev/null
+++ b/Documentation/arm/IXP4xx
@@ -0,0 +1,168 @@
+
+-------------------------------------------------------------------------
+Release Notes for Linux on Intel's IXP4xx Network Processor
+
+Maintained by Deepak Saxena <dsaxena@plexity.net>
+-------------------------------------------------------------------------
+
+1. Overview
+
+Intel's IXP4xx network processor is a highly integrated SOC that
+is targeted for network applications, though it has become popular
+in industrial control and other areas due to low cost and power
+consumption. The IXP4xx family currently consists of several processors
+that support different network offload functions such as encryption,
+routing, firewalling, etc. The IXP46x family is an updated version which
+supports faster speeds, new memory and flash configurations, and more
+integration such as an on-chip I2C controller.
+
+For more information on the various versions of the CPU, see:
+
+ http://developer.intel.com/design/network/products/npfamily/ixp4xx.htm
+
+Intel also made the IXCP1100 CPU for sometime which is an IXP4xx
+stripped of much of the network intelligence.
+
+2. Linux Support
+
+Linux currently supports the following features on the IXP4xx chips:
+
+- Dual serial ports
+- PCI interface
+- Flash access (MTD/JFFS)
+- I2C through GPIO on IXP42x
+- GPIO for input/output/interrupts
+ See arch/arm/mach-ixp4xx/include/mach/platform.h for access functions.
+- Timers (watchdog, OS)
+
+The following components of the chips are not supported by Linux /*(DEBLOBBED)*/:
+
+- USB device interface
+- Network interfaces (HSS, Utopia, NPEs, etc)
+- Network offload functionality
+
+/*(DEBLOBBED)*/
+
+DO NOT POST QUESTIONS TO THE LINUX MAILING LISTS REGARDING THE PROPRIETARY
+SOFTWARE.
+
+There are several websites that provide directions/pointers on using
+Intel's software:
+
+ http://sourceforge.net/projects/ixp4xx-osdg/
+ Open Source Developer's Guide for using uClinux and the Intel libraries
+
+http://gatewaymaker.sourceforge.net/
+ Simple one page summary of building a gateway using an IXP425 and Linux
+
+http://ixp425.sourceforge.net/
+ ATM device driver for IXP425 that relies on Intel's libraries
+
+3. Known Issues/Limitations
+
+3a. Limited inbound PCI window
+
+The IXP4xx family allows for up to 256MB of memory but the PCI interface
+can only expose 64MB of that memory to the PCI bus. This means that if
+you are running with > 64MB, all PCI buffers outside of the accessible
+range will be bounced using the routines in arch/arm/common/dmabounce.c.
+
+3b. Limited outbound PCI window
+
+IXP4xx provides two methods of accessing PCI memory space:
+
+1) A direct mapped window from 0x48000000 to 0x4bffffff (64MB).
+ To access PCI via this space, we simply ioremap() the BAR
+ into the kernel and we can use the standard read[bwl]/write[bwl]
+ macros. This is the preffered method due to speed but it
+ limits the system to just 64MB of PCI memory. This can be
+ problamatic if using video cards and other memory-heavy devices.
+
+2) If > 64MB of memory space is required, the IXP4xx can be
+ configured to use indirect registers to access PCI This allows
+ for up to 128MB (0x48000000 to 0x4fffffff) of memory on the bus.
+ The disadvantage of this is that every PCI access requires
+ three local register accesses plus a spinlock, but in some
+ cases the performance hit is acceptable. In addition, you cannot
+ mmap() PCI devices in this case due to the indirect nature
+ of the PCI window.
+
+By default, the direct method is used for performance reasons. If
+you need more PCI memory, enable the IXP4XX_INDIRECT_PCI config option.
+
+3c. GPIO as Interrupts
+
+Currently the code only handles level-sensitive GPIO interrupts
+
+4. Supported platforms
+
+ADI Engineering Coyote Gateway Reference Platform
+http://www.adiengineering.com/productsCoyote.html
+
+ The ADI Coyote platform is reference design for those building
+ small residential/office gateways. One NPE is connected to a 10/100
+ interface, one to 4-port 10/100 switch, and the third to and ADSL
+ interface. In addition, it also supports to POTs interfaces connected
+ via SLICs. Note that those are not supported by Linux ATM. Finally,
+ the platform has two mini-PCI slots used for 802.11[bga] cards.
+ Finally, there is an IDE port hanging off the expansion bus.
+
+Gateworks Avila Network Platform
+http://www.gateworks.com/support/overview.php
+
+ The Avila platform is basically and IXDP425 with the 4 PCI slots
+ replaced with mini-PCI slots and a CF IDE interface hanging off
+ the expansion bus.
+
+Intel IXDP425 Development Platform
+http://www.intel.com/design/network/products/npfamily/ixdpg425.htm
+
+ This is Intel's standard reference platform for the IXDP425 and is
+ also known as the Richfield board. It contains 4 PCI slots, 16MB
+ of flash, two 10/100 ports and one ADSL port.
+
+Intel IXDP465 Development Platform
+http://www.intel.com/design/network/products/npfamily/ixdp465.htm
+
+ This is basically an IXDP425 with an IXP465 and 32M of flash instead
+ of just 16.
+
+Intel IXDPG425 Development Platform
+
+ This is basically and ADI Coyote board with a NEC EHCI controller
+ added. One issue with this board is that the mini-PCI slots only
+ have the 3.3v line connected, so you can't use a PCI to mini-PCI
+ adapter with an E100 card. So to NFS root you need to use either
+ the CSR or a WiFi card and a ramdisk that BOOTPs and then does
+ a pivot_root to NFS.
+
+Motorola PrPMC1100 Processor Mezanine Card
+http://www.fountainsys.com
+
+ The PrPMC1100 is based on the IXCP1100 and is meant to plug into
+ and IXP2400/2800 system to act as the system controller. It simply
+ contains a CPU and 16MB of flash on the board and needs to be
+ plugged into a carrier board to function. Currently Linux only
+ supports the Motorola PrPMC carrier board for this platform.
+
+5. TODO LIST
+
+- Add support for Coyote IDE
+- Add support for edge-based GPIO interrupts
+- Add support for CF IDE on expansion bus
+
+6. Thanks
+
+The IXP4xx work has been funded by Intel Corp. and MontaVista Software, Inc.
+
+The following people have contributed patches/comments/etc:
+
+Lennerty Buytenhek
+Lutz Jaenicke
+Justin Mayfield
+Robert E. Ranslam
+[I know I've forgotten others, please email me to be added]
+
+-------------------------------------------------------------------------
+
+Last Update: 01/04/2005
diff --git a/Documentation/arm/Interrupts b/Documentation/arm/Interrupts
new file mode 100644
index 000000000..f09ab1b90
--- /dev/null
+++ b/Documentation/arm/Interrupts
@@ -0,0 +1,167 @@
+2.5.2-rmk5
+----------
+
+This is the first kernel that contains a major shake up of some of the
+major architecture-specific subsystems.
+
+Firstly, it contains some pretty major changes to the way we handle the
+MMU TLB. Each MMU TLB variant is now handled completely separately -
+we have TLB v3, TLB v4 (without write buffer), TLB v4 (with write buffer),
+and finally TLB v4 (with write buffer, with I TLB invalidate entry).
+There is more assembly code inside each of these functions, mainly to
+allow more flexible TLB handling for the future.
+
+Secondly, the IRQ subsystem.
+
+The 2.5 kernels will be having major changes to the way IRQs are handled.
+Unfortunately, this means that machine types that touch the irq_desc[]
+array (basically all machine types) will break, and this means every
+machine type that we currently have.
+
+Lets take an example. On the Assabet with Neponset, we have:
+
+ GPIO25 IRR:2
+ SA1100 ------------> Neponset -----------> SA1111
+ IIR:1
+ -----------> USAR
+ IIR:0
+ -----------> SMC9196
+
+The way stuff currently works, all SA1111 interrupts are mutually
+exclusive of each other - if you're processing one interrupt from the
+SA1111 and another comes in, you have to wait for that interrupt to
+finish processing before you can service the new interrupt. Eg, an
+IDE PIO-based interrupt on the SA1111 excludes all other SA1111 and
+SMC9196 interrupts until it has finished transferring its multi-sector
+data, which can be a long time. Note also that since we loop in the
+SA1111 IRQ handler, SA1111 IRQs can hold off SMC9196 IRQs indefinitely.
+
+
+The new approach brings several new ideas...
+
+We introduce the concept of a "parent" and a "child". For example,
+to the Neponset handler, the "parent" is GPIO25, and the "children"d
+are SA1111, SMC9196 and USAR.
+
+We also bring the idea of an IRQ "chip" (mainly to reduce the size of
+the irqdesc array). This doesn't have to be a real "IC"; indeed the
+SA11x0 IRQs are handled by two separate "chip" structures, one for
+GPIO0-10, and another for all the rest. It is just a container for
+the various operations (maybe this'll change to a better name).
+This structure has the following operations:
+
+struct irqchip {
+ /*
+ * Acknowledge the IRQ.
+ * If this is a level-based IRQ, then it is expected to mask the IRQ
+ * as well.
+ */
+ void (*ack)(unsigned int irq);
+ /*
+ * Mask the IRQ in hardware.
+ */
+ void (*mask)(unsigned int irq);
+ /*
+ * Unmask the IRQ in hardware.
+ */
+ void (*unmask)(unsigned int irq);
+ /*
+ * Re-run the IRQ
+ */
+ void (*rerun)(unsigned int irq);
+ /*
+ * Set the type of the IRQ.
+ */
+ int (*type)(unsigned int irq, unsigned int, type);
+};
+
+ack - required. May be the same function as mask for IRQs
+ handled by do_level_IRQ.
+mask - required.
+unmask - required.
+rerun - optional. Not required if you're using do_level_IRQ for all
+ IRQs that use this 'irqchip'. Generally expected to re-trigger
+ the hardware IRQ if possible. If not, may call the handler
+ directly.
+type - optional. If you don't support changing the type of an IRQ,
+ it should be null so people can detect if they are unable to
+ set the IRQ type.
+
+For each IRQ, we keep the following information:
+
+ - "disable" depth (number of disable_irq()s without enable_irq()s)
+ - flags indicating what we can do with this IRQ (valid, probe,
+ noautounmask) as before
+ - status of the IRQ (probing, enable, etc)
+ - chip
+ - per-IRQ handler
+ - irqaction structure list
+
+The handler can be one of the 3 standard handlers - "level", "edge" and
+"simple", or your own specific handler if you need to do something special.
+
+The "level" handler is what we currently have - its pretty simple.
+"edge" knows about the brokenness of such IRQ implementations - that you
+need to leave the hardware IRQ enabled while processing it, and queueing
+further IRQ events should the IRQ happen again while processing. The
+"simple" handler is very basic, and does not perform any hardware
+manipulation, nor state tracking. This is useful for things like the
+SMC9196 and USAR above.
+
+So, what's changed?
+
+1. Machine implementations must not write to the irqdesc array.
+
+2. New functions to manipulate the irqdesc array. The first 4 are expected
+ to be useful only to machine specific code. The last is recommended to
+ only be used by machine specific code, but may be used in drivers if
+ absolutely necessary.
+
+ set_irq_chip(irq,chip)
+
+ Set the mask/unmask methods for handling this IRQ
+
+ set_irq_handler(irq,handler)
+
+ Set the handler for this IRQ (level, edge, simple)
+
+ set_irq_chained_handler(irq,handler)
+
+ Set a "chained" handler for this IRQ - automatically
+ enables this IRQ (eg, Neponset and SA1111 handlers).
+
+ set_irq_flags(irq,flags)
+
+ Set the valid/probe/noautoenable flags.
+
+ set_irq_type(irq,type)
+
+ Set active the IRQ edge(s)/level. This replaces the
+ SA1111 INTPOL manipulation, and the set_GPIO_IRQ_edge()
+ function. Type should be one of IRQ_TYPE_xxx defined in
+ <linux/irq.h>
+
+3. set_GPIO_IRQ_edge() is obsolete, and should be replaced by set_irq_type.
+
+4. Direct access to SA1111 INTPOL is deprecated. Use set_irq_type instead.
+
+5. A handler is expected to perform any necessary acknowledgement of the
+ parent IRQ via the correct chip specific function. For instance, if
+ the SA1111 is directly connected to a SA1110 GPIO, then you should
+ acknowledge the SA1110 IRQ each time you re-read the SA1111 IRQ status.
+
+6. For any child which doesn't have its own IRQ enable/disable controls
+ (eg, SMC9196), the handler must mask or acknowledge the parent IRQ
+ while the child handler is called, and the child handler should be the
+ "simple" handler (not "edge" nor "level"). After the handler completes,
+ the parent IRQ should be unmasked, and the status of all children must
+ be re-checked for pending events. (see the Neponset IRQ handler for
+ details).
+
+7. fixup_irq() is gone, as is arch/arm/mach-*/include/mach/irq.h
+
+Please note that this will not solve all problems - some of them are
+hardware based. Mixing level-based and edge-based IRQs on the same
+parent signal (eg neponset) is one such area where a software based
+solution can't provide the full answer to low IRQ latency.
+
diff --git a/Documentation/arm/Marvell/README b/Documentation/arm/Marvell/README
new file mode 100644
index 000000000..18a775d10
--- /dev/null
+++ b/Documentation/arm/Marvell/README
@@ -0,0 +1,284 @@
+ARM Marvell SoCs
+================
+
+This document lists all the ARM Marvell SoCs that are currently
+supported in mainline by the Linux kernel. As the Marvell families of
+SoCs are large and complex, it is hard to understand where the support
+for a particular SoC is available in the Linux kernel. This document
+tries to help in understanding where those SoCs are supported, and to
+match them with their corresponding public datasheet, when available.
+
+Orion family
+------------
+
+ Flavors:
+ 88F5082
+ 88F5181
+ 88F5181L
+ 88F5182
+ Datasheet : http://www.embeddedarm.com/documentation/third-party/MV88F5182-datasheet.pdf
+ Programmer's User Guide : http://www.embeddedarm.com/documentation/third-party/MV88F5182-opensource-manual.pdf
+ User Manual : http://www.embeddedarm.com/documentation/third-party/MV88F5182-usermanual.pdf
+ 88F5281
+ Datasheet : http://www.ocmodshop.com/images/reviews/networking/qnap_ts409u/marvel_88f5281_data_sheet.pdf
+ 88F6183
+ Core: Feroceon ARMv5 compatible
+ Linux kernel mach directory: arch/arm/mach-orion5x
+ Linux kernel plat directory: arch/arm/plat-orion
+
+Kirkwood family
+---------------
+
+ Flavors:
+ 88F6282 a.k.a Armada 300
+ Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+ 88F6283 a.k.a Armada 310
+ Product Brief : http://www.marvell.com/embedded-processors/armada-300/assets/armada_310.pdf
+ 88F6190
+ Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6190-003_WEB.pdf
+ Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ 88F6192
+ Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6192-003_ver1.pdf
+ Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F619x_OpenSource.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ 88F6182
+ 88F6180
+ Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6180-003_ver1.pdf
+ Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6180_OpenSource.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ 88F6281
+ Product Brief : http://www.marvell.com/embedded-processors/kirkwood/assets/88F6281-004_ver1.pdf
+ Hardware Spec : http://www.marvell.com/embedded-processors/kirkwood/assets/HW_88F6281_OpenSource.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/kirkwood/assets/FS_88F6180_9x_6281_OpenSource.pdf
+ Homepage: http://www.marvell.com/embedded-processors/kirkwood/
+ Core: Feroceon ARMv5 compatible
+ Linux kernel mach directory: arch/arm/mach-mvebu
+ Linux kernel plat directory: none
+
+Discovery family
+----------------
+
+ Flavors:
+ MV78100
+ Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78100-003_WEB.pdf
+ Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78100_OpenSource.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+ MV78200
+ Product Brief : http://www.marvell.com/embedded-processors/discovery-innovation/assets/MV78200-002_WEB.pdf
+ Hardware Spec : http://www.marvell.com/embedded-processors/discovery-innovation/assets/HW_MV78200_OpenSource.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/discovery-innovation/assets/FS_MV76100_78100_78200_OpenSource.pdf
+ MV76100
+ Not supported by the Linux kernel.
+
+ Core: Feroceon ARMv5 compatible
+
+ Linux kernel mach directory: arch/arm/mach-mv78xx0
+ Linux kernel plat directory: arch/arm/plat-orion
+
+EBU Armada family
+-----------------
+
+ Armada 370 Flavors:
+ 88F6710
+ 88F6707
+ 88F6W11
+ Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/Marvell_ARMADA_370_SoC.pdf
+ Hardware Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-datasheet.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA370-FunctionalSpec-datasheet.pdf
+
+ Armada 375 Flavors:
+ 88F6720
+ Product Brief: http://www.marvell.com/embedded-processors/armada-300/assets/ARMADA_375_SoC-01_product_brief.pdf
+
+ Armada 380/385 Flavors:
+ 88F6810
+ 88F6820
+ 88F6828
+
+ Armada 390/398 Flavors:
+ 88F6920
+ 88F6928
+ Product infos: http://www.marvell.com/embedded-processors/armada-39x/
+
+ Armada XP Flavors:
+ MV78230
+ MV78260
+ MV78460
+ NOTE: not to be confused with the non-SMP 78xx0 SoCs
+ Product Brief: http://www.marvell.com/embedded-processors/armada-xp/assets/Marvell-ArmadaXP-SoC-product%20brief.pdf
+ Functional Spec: http://www.marvell.com/embedded-processors/armada-xp/assets/ARMADA-XP-Functional-SpecDatasheet.pdf
+ Hardware Specs:
+ http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78230_OS.PDF
+ http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78260_OS.PDF
+ http://www.marvell.com/embedded-processors/armada-xp/assets/HW_MV78460_OS.PDF
+
+ Core: Sheeva ARMv7 compatible
+
+ Linux kernel mach directory: arch/arm/mach-mvebu
+ Linux kernel plat directory: none
+
+Avanta family
+-------------
+
+ Flavors:
+ 88F6510
+ 88F6530P
+ 88F6550
+ 88F6560
+ Homepage : http://www.marvell.com/broadband/
+ Product Brief: http://www.marvell.com/broadband/assets/Marvell_Avanta_88F6510_305_060-001_product_brief.pdf
+ No public datasheet available.
+
+ Core: ARMv5 compatible
+
+ Linux kernel mach directory: no code in mainline yet, planned for the future
+ Linux kernel plat directory: no code in mainline yet, planned for the future
+
+Dove family (application processor)
+-----------------------------------
+
+ Flavors:
+ 88AP510 a.k.a Armada 510
+ Product Brief : http://www.marvell.com/application-processors/armada-500/assets/Marvell_Armada510_SoC.pdf
+ Hardware Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Hardware-Spec.pdf
+ Functional Spec : http://www.marvell.com/application-processors/armada-500/assets/Armada-510-Functional-Spec.pdf
+ Homepage: http://www.marvell.com/application-processors/armada-500/
+ Core: ARMv7 compatible
+
+ Directory: arch/arm/mach-mvebu (DT enabled platforms)
+ arch/arm/mach-dove (non-DT enabled platforms)
+
+PXA 2xx/3xx/93x/95x family
+--------------------------
+
+ Flavors:
+ PXA21x, PXA25x, PXA26x
+ Application processor only
+ Core: ARMv5 XScale core
+ PXA270, PXA271, PXA272
+ Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_pb.pdf
+ Design guide : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_design_guide.pdf
+ Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_dev_man.pdf
+ Specification : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_emts.pdf
+ Specification update : http://www.marvell.com/application-processors/pxa-family/assets/pxa_27x_spec_update.pdf
+ Application processor only
+ Core: ARMv5 XScale core
+ PXA300, PXA310, PXA320
+ PXA 300 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA300_PB_R4.pdf
+ PXA 310 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA310_PB_R4.pdf
+ PXA 320 Product Brief : http://www.marvell.com/application-processors/pxa-family/assets/PXA320_PB_R4.pdf
+ Design guide : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Design_Guide.pdf
+ Developers manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Developers_Manual.zip
+ Specifications : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_EMTS.pdf
+ Specification Update : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_Spec_Update.zip
+ Reference Manual : http://www.marvell.com/application-processors/pxa-family/assets/PXA3xx_TavorP_BootROM_Ref_Manual.pdf
+ Application processor only
+ Core: ARMv5 XScale core
+ PXA930, PXA935
+ Application processor with Communication processor
+ Core: ARMv5 XScale core
+ PXA955
+ Application processor with Communication processor
+ Core: ARMv7 compatible Sheeva PJ4 core
+
+ Comments:
+
+ * This line of SoCs originates from the XScale family developed by
+ Intel and acquired by Marvell in ~2006. The PXA21x, PXA25x,
+ PXA26x, PXA27x, PXA3xx and PXA93x were developed by Intel, while
+ the later PXA95x were developed by Marvell.
+
+ * Due to their XScale origin, these SoCs have virtually nothing in
+ common with the other (Kirkwood, Dove, etc.) families of Marvell
+ SoCs, except with the MMP/MMP2 family of SoCs.
+
+ Linux kernel mach directory: arch/arm/mach-pxa
+ Linux kernel plat directory: arch/arm/plat-pxa
+
+MMP/MMP2 family (communication processor)
+-----------------------------------------
+
+ Flavors:
+ PXA168, a.k.a Armada 168
+ Homepage : http://www.marvell.com/application-processors/armada-100/armada-168.jsp
+ Product brief : http://www.marvell.com/application-processors/armada-100/assets/pxa_168_pb.pdf
+ Hardware manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_datasheet.pdf
+ Software manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_software_manual.pdf
+ Specification update : http://www.marvell.com/application-processors/armada-100/assets/ARMADA16x_Spec_update.pdf
+ Boot ROM manual : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_ref_manual.pdf
+ App node package : http://www.marvell.com/application-processors/armada-100/assets/armada_16x_app_note_package.pdf
+ Application processor only
+ Core: ARMv5 compatible Marvell PJ1 (Mohawk)
+ PXA910
+ Homepage : http://www.marvell.com/communication-processors/pxa910/
+ Product Brief : http://www.marvell.com/communication-processors/pxa910/assets/Marvell_PXA910_Platform-001_PB_final.pdf
+ Application processor with Communication processor
+ Core: ARMv5 compatible Marvell PJ1 (Mohawk)
+ MMP2, a.k.a Armada 610
+ Product Brief : http://www.marvell.com/application-processors/armada-600/assets/armada610_pb.pdf
+ Application processor only
+ Core: ARMv7 compatible Sheeva PJ4 core
+
+ Comments:
+
+ * This line of SoCs originates from the XScale family developed by
+ Intel and acquired by Marvell in ~2006. All the processors of
+ this MMP/MMP2 family were developed by Marvell.
+
+ * Due to their XScale origin, these SoCs have virtually nothing in
+ common with the other (Kirkwood, Dove, etc.) families of Marvell
+ SoCs, except with the PXA family of SoCs listed above.
+
+ Linux kernel mach directory: arch/arm/mach-mmp
+ Linux kernel plat directory: arch/arm/plat-pxa
+
+Berlin family (Digital Entertainment)
+-------------------------------------
+
+ Flavors:
+ 88DE3005, Armada 1500-mini
+ Design name: BG2CD
+ Core: ARM Cortex-A9, PL310 L2CC
+ Homepage: http://www.marvell.com/digital-entertainment/armada-1500-mini/
+ 88DE3100, Armada 1500
+ Design name: BG2
+ Core: Marvell PJ4B (ARMv7), Tauros3 L2CC
+ Homepage: http://www.marvell.com/digital-entertainment/armada-1500/
+ Product Brief: http://www.marvell.com/digital-entertainment/armada-1500/assets/Marvell-ARMADA-1500-Product-Brief.pdf
+ 88DE3114, Armada 1500 Pro
+ Design name: BG2-Q
+ Core: Quad Core ARM Cortex-A9, PL310 L2CC
+ Homepage: http://www.marvell.com/digital-entertainment/armada-1500-pro/
+ Product Brief: http://www.marvell.com/digital-entertainment/armada-1500-pro/assets/Marvell_ARMADA_1500_PRO-01_product_brief.pdf
+ 88DE????
+ Design name: BG3
+ Core: ARM Cortex-A15, CA15 integrated L2CC
+
+ Homepage: http://www.marvell.com/digital-entertainment/
+ Directory: arch/arm/mach-berlin
+
+ Comments:
+ * This line of SoCs is based on Marvell Sheeva or ARM Cortex CPUs
+ with Synopsys DesignWare (IRQ, GPIO, Timers, ...) and PXA IP (SDHCI, USB, ETH, ...).
+
+Long-term plans
+---------------
+
+ * Unify the mach-dove/, mach-mv78xx0/, mach-orion5x/ into the
+ mach-mvebu/ to support all SoCs from the Marvell EBU (Engineering
+ Business Unit) in a single mach-<foo> directory. The plat-orion/
+ would therefore disappear.
+
+ * Unify the mach-mmp/ and mach-pxa/ into the same mach-pxa
+ directory. The plat-pxa/ would therefore disappear.
+
+Credits
+-------
+
+ Maen Suleiman <maen@marvell.com>
+ Lior Amsalem <alior@marvell.com>
+ Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ Andrew Lunn <andrew@lunn.ch>
+ Nicolas Pitre <nico@fluxnic.net>
+ Eric Miao <eric.y.miao@gmail.com>
diff --git a/Documentation/arm/Netwinder b/Documentation/arm/Netwinder
new file mode 100644
index 000000000..f1b457fbd
--- /dev/null
+++ b/Documentation/arm/Netwinder
@@ -0,0 +1,78 @@
+NetWinder specific documentation
+================================
+
+The NetWinder is a small low-power computer, primarily designed
+to run Linux. It is based around the StrongARM RISC processor,
+DC21285 PCI bridge, with PC-type hardware glued around it.
+
+Port usage
+==========
+
+Min - Max Description
+---------------------------
+0x0000 - 0x000f DMA1
+0x0020 - 0x0021 PIC1
+0x0060 - 0x006f Keyboard
+0x0070 - 0x007f RTC
+0x0080 - 0x0087 DMA1
+0x0088 - 0x008f DMA2
+0x00a0 - 0x00a3 PIC2
+0x00c0 - 0x00df DMA2
+0x0180 - 0x0187 IRDA
+0x01f0 - 0x01f6 ide0
+0x0201 Game port
+0x0203 RWA010 configuration read
+0x0220 - ? SoundBlaster
+0x0250 - ? WaveArtist
+0x0279 RWA010 configuration index
+0x02f8 - 0x02ff Serial ttyS1
+0x0300 - 0x031f Ether10
+0x0338 GPIO1
+0x033a GPIO2
+0x0370 - 0x0371 W83977F configuration registers
+0x0388 - ? AdLib
+0x03c0 - 0x03df VGA
+0x03f6 ide0
+0x03f8 - 0x03ff Serial ttyS0
+0x0400 - 0x0408 DC21143
+0x0480 - 0x0487 DMA1
+0x0488 - 0x048f DMA2
+0x0a79 RWA010 configuration write
+0xe800 - 0xe80f ide0/ide1 BM DMA
+
+
+Interrupt usage
+===============
+
+IRQ type Description
+---------------------------
+ 0 ISA 100Hz timer
+ 1 ISA Keyboard
+ 2 ISA cascade
+ 3 ISA Serial ttyS1
+ 4 ISA Serial ttyS0
+ 5 ISA PS/2 mouse
+ 6 ISA IRDA
+ 7 ISA Printer
+ 8 ISA RTC alarm
+ 9 ISA
+10 ISA GP10 (Orange reset button)
+11 ISA
+12 ISA WaveArtist
+13 ISA
+14 ISA hda1
+15 ISA
+
+DMA usage
+=========
+
+DMA type Description
+---------------------------
+ 0 ISA IRDA
+ 1 ISA
+ 2 ISA cascade
+ 3 ISA WaveArtist
+ 4 ISA
+ 5 ISA
+ 6 ISA
+ 7 ISA WaveArtist
diff --git a/Documentation/arm/OMAP/DSS b/Documentation/arm/OMAP/DSS
new file mode 100644
index 000000000..4484e0212
--- /dev/null
+++ b/Documentation/arm/OMAP/DSS
@@ -0,0 +1,362 @@
+OMAP2/3 Display Subsystem
+-------------------------
+
+This is an almost total rewrite of the OMAP FB driver in drivers/video/omap
+(let's call it DSS1). The main differences between DSS1 and DSS2 are DSI,
+TV-out and multiple display support, but there are lots of small improvements
+also.
+
+The DSS2 driver (omapdss module) is in arch/arm/plat-omap/dss/, and the FB,
+panel and controller drivers are in drivers/video/omap2/. DSS1 and DSS2 live
+currently side by side, you can choose which one to use.
+
+Features
+--------
+
+Working and tested features include:
+
+- MIPI DPI (parallel) output
+- MIPI DSI output in command mode
+- MIPI DBI (RFBI) output
+- SDI output
+- TV output
+- All pieces can be compiled as a module or inside kernel
+- Use DISPC to update any of the outputs
+- Use CPU to update RFBI or DSI output
+- OMAP DISPC planes
+- RGB16, RGB24 packed, RGB24 unpacked
+- YUV2, UYVY
+- Scaling
+- Adjusting DSS FCK to find a good pixel clock
+- Use DSI DPLL to create DSS FCK
+
+Tested boards include:
+- OMAP3 SDP board
+- Beagle board
+- N810
+
+omapdss driver
+--------------
+
+The DSS driver does not itself have any support for Linux framebuffer, V4L or
+such like the current ones, but it has an internal kernel API that upper level
+drivers can use.
+
+The DSS driver models OMAP's overlays, overlay managers and displays in a
+flexible way to enable non-common multi-display configuration. In addition to
+modelling the hardware overlays, omapdss supports virtual overlays and overlay
+managers. These can be used when updating a display with CPU or system DMA.
+
+omapdss driver support for audio
+--------------------------------
+There exist several display technologies and standards that support audio as
+well. Hence, it is relevant to update the DSS device driver to provide an audio
+interface that may be used by an audio driver or any other driver interested in
+the functionality.
+
+The audio_enable function is intended to prepare the relevant
+IP for playback (e.g., enabling an audio FIFO, taking in/out of reset
+some IP, enabling companion chips, etc). It is intended to be called before
+audio_start. The audio_disable function performs the reverse operation and is
+intended to be called after audio_stop.
+
+While a given DSS device driver may support audio, it is possible that for
+certain configurations audio is not supported (e.g., an HDMI display using a
+VESA video timing). The audio_supported function is intended to query whether
+the current configuration of the display supports audio.
+
+The audio_config function is intended to configure all the relevant audio
+parameters of the display. In order to make the function independent of any
+specific DSS device driver, a struct omap_dss_audio is defined. Its purpose
+is to contain all the required parameters for audio configuration. At the
+moment, such structure contains pointers to IEC-60958 channel status word
+and CEA-861 audio infoframe structures. This should be enough to support
+HDMI and DisplayPort, as both are based on CEA-861 and IEC-60958.
+
+The audio_enable/disable, audio_config and audio_supported functions could be
+implemented as functions that may sleep. Hence, they should not be called
+while holding a spinlock or a readlock.
+
+The audio_start/audio_stop function is intended to effectively start/stop audio
+playback after the configuration has taken place. These functions are designed
+to be used in an atomic context. Hence, audio_start should return quickly and be
+called only after all the needed resources for audio playback (audio FIFOs,
+DMA channels, companion chips, etc) have been enabled to begin data transfers.
+audio_stop is designed to only stop the audio transfers. The resources used
+for playback are released using audio_disable.
+
+The enum omap_dss_audio_state may be used to help the implementations of
+the interface to keep track of the audio state. The initial state is _DISABLED;
+then, the state transitions to _CONFIGURED, and then, when it is ready to
+play audio, to _ENABLED. The state _PLAYING is used when the audio is being
+rendered.
+
+
+Panel and controller drivers
+----------------------------
+
+The drivers implement panel or controller specific functionality and are not
+usually visible to users except through omapfb driver. They register
+themselves to the DSS driver.
+
+omapfb driver
+-------------
+
+The omapfb driver implements arbitrary number of standard linux framebuffers.
+These framebuffers can be routed flexibly to any overlays, thus allowing very
+dynamic display architecture.
+
+The driver exports some omapfb specific ioctls, which are compatible with the
+ioctls in the old driver.
+
+The rest of the non standard features are exported via sysfs. Whether the final
+implementation will use sysfs, or ioctls, is still open.
+
+V4L2 drivers
+------------
+
+V4L2 is being implemented in TI.
+
+From omapdss point of view the V4L2 drivers should be similar to framebuffer
+driver.
+
+Architecture
+--------------------
+
+Some clarification what the different components do:
+
+ - Framebuffer is a memory area inside OMAP's SRAM/SDRAM that contains the
+ pixel data for the image. Framebuffer has width and height and color
+ depth.
+ - Overlay defines where the pixels are read from and where they go on the
+ screen. The overlay may be smaller than framebuffer, thus displaying only
+ part of the framebuffer. The position of the overlay may be changed if
+ the overlay is smaller than the display.
+ - Overlay manager combines the overlays in to one image and feeds them to
+ display.
+ - Display is the actual physical display device.
+
+A framebuffer can be connected to multiple overlays to show the same pixel data
+on all of the overlays. Note that in this case the overlay input sizes must be
+the same, but, in case of video overlays, the output size can be different. Any
+framebuffer can be connected to any overlay.
+
+An overlay can be connected to one overlay manager. Also DISPC overlays can be
+connected only to DISPC overlay managers, and virtual overlays can be only
+connected to virtual overlays.
+
+An overlay manager can be connected to one display. There are certain
+restrictions which kinds of displays an overlay manager can be connected:
+
+ - DISPC TV overlay manager can be only connected to TV display.
+ - Virtual overlay managers can only be connected to DBI or DSI displays.
+ - DISPC LCD overlay manager can be connected to all displays, except TV
+ display.
+
+Sysfs
+-----
+The sysfs interface is mainly used for testing. I don't think sysfs
+interface is the best for this in the final version, but I don't quite know
+what would be the best interfaces for these things.
+
+The sysfs interface is divided to two parts: DSS and FB.
+
+/sys/class/graphics/fb? directory:
+mirror 0=off, 1=on
+rotate Rotation 0-3 for 0, 90, 180, 270 degrees
+rotate_type 0 = DMA rotation, 1 = VRFB rotation
+overlays List of overlay numbers to which framebuffer pixels go
+phys_addr Physical address of the framebuffer
+virt_addr Virtual address of the framebuffer
+size Size of the framebuffer
+
+/sys/devices/platform/omapdss/overlay? directory:
+enabled 0=off, 1=on
+input_size width,height (ie. the framebuffer size)
+manager Destination overlay manager name
+name
+output_size width,height
+position x,y
+screen_width width
+global_alpha global alpha 0-255 0=transparent 255=opaque
+
+/sys/devices/platform/omapdss/manager? directory:
+display Destination display
+name
+alpha_blending_enabled 0=off, 1=on
+trans_key_enabled 0=off, 1=on
+trans_key_type gfx-destination, video-source
+trans_key_value transparency color key (RGB24)
+default_color default background color (RGB24)
+
+/sys/devices/platform/omapdss/display? directory:
+ctrl_name Controller name
+mirror 0=off, 1=on
+update_mode 0=off, 1=auto, 2=manual
+enabled 0=off, 1=on
+name
+rotate Rotation 0-3 for 0, 90, 180, 270 degrees
+timings Display timings (pixclock,xres/hfp/hbp/hsw,yres/vfp/vbp/vsw)
+ When writing, two special timings are accepted for tv-out:
+ "pal" and "ntsc"
+panel_name
+tear_elim Tearing elimination 0=off, 1=on
+output_type Output type (video encoder only): "composite" or "svideo"
+
+There are also some debugfs files at <debugfs>/omapdss/ which show information
+about clocks and registers.
+
+Examples
+--------
+
+The following definitions have been made for the examples below:
+
+ovl0=/sys/devices/platform/omapdss/overlay0
+ovl1=/sys/devices/platform/omapdss/overlay1
+ovl2=/sys/devices/platform/omapdss/overlay2
+
+mgr0=/sys/devices/platform/omapdss/manager0
+mgr1=/sys/devices/platform/omapdss/manager1
+
+lcd=/sys/devices/platform/omapdss/display0
+dvi=/sys/devices/platform/omapdss/display1
+tv=/sys/devices/platform/omapdss/display2
+
+fb0=/sys/class/graphics/fb0
+fb1=/sys/class/graphics/fb1
+fb2=/sys/class/graphics/fb2
+
+Default setup on OMAP3 SDP
+--------------------------
+
+Here's the default setup on OMAP3 SDP board. All planes go to LCD. DVI
+and TV-out are not in use. The columns from left to right are:
+framebuffers, overlays, overlay managers, displays. Framebuffers are
+handled by omapfb, and the rest by the DSS.
+
+FB0 --- GFX -\ DVI
+FB1 --- VID1 --+- LCD ---- LCD
+FB2 --- VID2 -/ TV ----- TV
+
+Example: Switch from LCD to DVI
+----------------------
+
+w=`cat $dvi/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+h=`cat $dvi/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+
+echo "0" > $lcd/enabled
+echo "" > $mgr0/display
+fbset -fb /dev/fb0 -xres $w -yres $h -vxres $w -vyres $h
+# at this point you have to switch the dvi/lcd dip-switch from the omap board
+echo "dvi" > $mgr0/display
+echo "1" > $dvi/enabled
+
+After this the configuration looks like:
+
+FB0 --- GFX -\ -- DVI
+FB1 --- VID1 --+- LCD -/ LCD
+FB2 --- VID2 -/ TV ----- TV
+
+Example: Clone GFX overlay to LCD and TV
+-------------------------------
+
+w=`cat $tv/timings | cut -d "," -f 2 | cut -d "/" -f 1`
+h=`cat $tv/timings | cut -d "," -f 3 | cut -d "/" -f 1`
+
+echo "0" > $ovl0/enabled
+echo "0" > $ovl1/enabled
+
+echo "" > $fb1/overlays
+echo "0,1" > $fb0/overlays
+
+echo "$w,$h" > $ovl1/output_size
+echo "tv" > $ovl1/manager
+
+echo "1" > $ovl0/enabled
+echo "1" > $ovl1/enabled
+
+echo "1" > $tv/enabled
+
+After this the configuration looks like (only relevant parts shown):
+
+FB0 +-- GFX ---- LCD ---- LCD
+ \- VID1 ---- TV ---- TV
+
+Misc notes
+----------
+
+OMAP FB allocates the framebuffer memory using the standard dma allocator. You
+can enable Contiguous Memory Allocator (CONFIG_CMA) to improve the dma
+allocator, and if CMA is enabled, you use "cma=" kernel parameter to increase
+the global memory area for CMA.
+
+Using DSI DPLL to generate pixel clock it is possible produce the pixel clock
+of 86.5MHz (max possible), and with that you get 1280x1024@57 output from DVI.
+
+Rotation and mirroring currently only supports RGB565 and RGB8888 modes. VRFB
+does not support mirroring.
+
+VRFB rotation requires much more memory than non-rotated framebuffer, so you
+probably need to increase your vram setting before using VRFB rotation. Also,
+many applications may not work with VRFB if they do not pay attention to all
+framebuffer parameters.
+
+Kernel boot arguments
+---------------------
+
+omapfb.mode=<display>:<mode>[,...]
+ - Default video mode for specified displays. For example,
+ "dvi:800x400MR-24@60". See drivers/video/modedb.c.
+ There are also two special modes: "pal" and "ntsc" that
+ can be used to tv out.
+
+omapfb.vram=<fbnum>:<size>[@<physaddr>][,...]
+ - VRAM allocated for a framebuffer. Normally omapfb allocates vram
+ depending on the display size. With this you can manually allocate
+ more or define the physical address of each framebuffer. For example,
+ "1:4M" to allocate 4M for fb1.
+
+omapfb.debug=<y|n>
+ - Enable debug printing. You have to have OMAPFB debug support enabled
+ in kernel config.
+
+omapfb.test=<y|n>
+ - Draw test pattern to framebuffer whenever framebuffer settings change.
+ You need to have OMAPFB debug support enabled in kernel config.
+
+omapfb.vrfb=<y|n>
+ - Use VRFB rotation for all framebuffers.
+
+omapfb.rotate=<angle>
+ - Default rotation applied to all framebuffers.
+ 0 - 0 degree rotation
+ 1 - 90 degree rotation
+ 2 - 180 degree rotation
+ 3 - 270 degree rotation
+
+omapfb.mirror=<y|n>
+ - Default mirror for all framebuffers. Only works with DMA rotation.
+
+omapdss.def_disp=<display>
+ - Name of default display, to which all overlays will be connected.
+ Common examples are "lcd" or "tv".
+
+omapdss.debug=<y|n>
+ - Enable debug printing. You have to have DSS debug support enabled in
+ kernel config.
+
+TODO
+----
+
+DSS locking
+
+Error checking
+- Lots of checks are missing or implemented just as BUG()
+
+System DMA update for DSI
+- Can be used for RGB16 and RGB24P modes. Probably not for RGB24U (how
+ to skip the empty byte?)
+
+OMAP1 support
+- Not sure if needed
+
diff --git a/Documentation/arm/OMAP/omap_pm b/Documentation/arm/OMAP/omap_pm
new file mode 100644
index 000000000..4ae915a9f
--- /dev/null
+++ b/Documentation/arm/OMAP/omap_pm
@@ -0,0 +1,154 @@
+
+The OMAP PM interface
+=====================
+
+This document describes the temporary OMAP PM interface. Driver
+authors use these functions to communicate minimum latency or
+throughput constraints to the kernel power management code.
+Over time, the intention is to merge features from the OMAP PM
+interface into the Linux PM QoS code.
+
+Drivers need to express PM parameters which:
+
+- support the range of power management parameters present in the TI SRF;
+
+- separate the drivers from the underlying PM parameter
+ implementation, whether it is the TI SRF or Linux PM QoS or Linux
+ latency framework or something else;
+
+- specify PM parameters in terms of fundamental units, such as
+ latency and throughput, rather than units which are specific to OMAP
+ or to particular OMAP variants;
+
+- allow drivers which are shared with other architectures (e.g.,
+ DaVinci) to add these constraints in a way which won't affect non-OMAP
+ systems,
+
+- can be implemented immediately with minimal disruption of other
+ architectures.
+
+
+This document proposes the OMAP PM interface, including the following
+five power management functions for driver code:
+
+1. Set the maximum MPU wakeup latency:
+ (*pdata->set_max_mpu_wakeup_lat)(struct device *dev, unsigned long t)
+
+2. Set the maximum device wakeup latency:
+ (*pdata->set_max_dev_wakeup_lat)(struct device *dev, unsigned long t)
+
+3. Set the maximum system DMA transfer start latency (CORE pwrdm):
+ (*pdata->set_max_sdma_lat)(struct device *dev, long t)
+
+4. Set the minimum bus throughput needed by a device:
+ (*pdata->set_min_bus_tput)(struct device *dev, u8 agent_id, unsigned long r)
+
+5. Return the number of times the device has lost context
+ (*pdata->get_dev_context_loss_count)(struct device *dev)
+
+
+Further documentation for all OMAP PM interface functions can be
+found in arch/arm/plat-omap/include/mach/omap-pm.h.
+
+
+The OMAP PM layer is intended to be temporary
+---------------------------------------------
+
+The intention is that eventually the Linux PM QoS layer should support
+the range of power management features present in OMAP3. As this
+happens, existing drivers using the OMAP PM interface can be modified
+to use the Linux PM QoS code; and the OMAP PM interface can disappear.
+
+
+Driver usage of the OMAP PM functions
+-------------------------------------
+
+As the 'pdata' in the above examples indicates, these functions are
+exposed to drivers through function pointers in driver .platform_data
+structures. The function pointers are initialized by the board-*.c
+files to point to the corresponding OMAP PM functions:
+.set_max_dev_wakeup_lat will point to
+omap_pm_set_max_dev_wakeup_lat(), etc. Other architectures which do
+not support these functions should leave these function pointers set
+to NULL. Drivers should use the following idiom:
+
+ if (pdata->set_max_dev_wakeup_lat)
+ (*pdata->set_max_dev_wakeup_lat)(dev, t);
+
+The most common usage of these functions will probably be to specify
+the maximum time from when an interrupt occurs, to when the device
+becomes accessible. To accomplish this, driver writers should use the
+set_max_mpu_wakeup_lat() function to constrain the MPU wakeup
+latency, and the set_max_dev_wakeup_lat() function to constrain the
+device wakeup latency (from clk_enable() to accessibility). For
+example,
+
+ /* Limit MPU wakeup latency */
+ if (pdata->set_max_mpu_wakeup_lat)
+ (*pdata->set_max_mpu_wakeup_lat)(dev, tc);
+
+ /* Limit device powerdomain wakeup latency */
+ if (pdata->set_max_dev_wakeup_lat)
+ (*pdata->set_max_dev_wakeup_lat)(dev, td);
+
+ /* total wakeup latency in this example: (tc + td) */
+
+The PM parameters can be overwritten by calling the function again
+with the new value. The settings can be removed by calling the
+function with a t argument of -1 (except in the case of
+set_max_bus_tput(), which should be called with an r argument of 0).
+
+The fifth function above, omap_pm_get_dev_context_loss_count(),
+is intended as an optimization to allow drivers to determine whether the
+device has lost its internal context. If context has been lost, the
+driver must restore its internal context before proceeding.
+
+
+Other specialized interface functions
+-------------------------------------
+
+The five functions listed above are intended to be usable by any
+device driver. DSPBridge and CPUFreq have a few special requirements.
+DSPBridge expresses target DSP performance levels in terms of OPP IDs.
+CPUFreq expresses target MPU performance levels in terms of MPU
+frequency. The OMAP PM interface contains functions for these
+specialized cases to convert that input information (OPPs/MPU
+frequency) into the form that the underlying power management
+implementation needs:
+
+6. (*pdata->dsp_get_opp_table)(void)
+
+7. (*pdata->dsp_set_min_opp)(u8 opp_id)
+
+8. (*pdata->dsp_get_opp)(void)
+
+9. (*pdata->cpu_get_freq_table)(void)
+
+10. (*pdata->cpu_set_freq)(unsigned long f)
+
+11. (*pdata->cpu_get_freq)(void)
+
+Customizing OPP for platform
+============================
+Defining CONFIG_PM should enable OPP layer for the silicon
+and the registration of OPP table should take place automatically.
+However, in special cases, the default OPP table may need to be
+tweaked, for e.g.:
+ * enable default OPPs which are disabled by default, but which
+ could be enabled on a platform
+ * Disable an unsupported OPP on the platform
+ * Define and add a custom opp table entry
+in these cases, the board file needs to do additional steps as follows:
+arch/arm/mach-omapx/board-xyz.c
+ #include "pm.h"
+ ....
+ static void __init omap_xyz_init_irq(void)
+ {
+ ....
+ /* Initialize the default table */
+ omapx_opp_init();
+ /* Do customization to the defaults */
+ ....
+ }
+NOTE: omapx_opp_init will be omap3_opp_init or as required
+based on the omap family.
diff --git a/Documentation/arm/Porting b/Documentation/arm/Porting
new file mode 100644
index 000000000..a49223393
--- /dev/null
+++ b/Documentation/arm/Porting
@@ -0,0 +1,135 @@
+Taken from list archive at http://lists.arm.linux.org.uk/pipermail/linux-arm-kernel/2001-July/004064.html
+
+Initial definitions
+-------------------
+
+The following symbol definitions rely on you knowing the translation that
+__virt_to_phys() does for your machine. This macro converts the passed
+virtual address to a physical address. Normally, it is simply:
+
+ phys = virt - PAGE_OFFSET + PHYS_OFFSET
+
+
+Decompressor Symbols
+--------------------
+
+ZTEXTADDR
+ Start address of decompressor. There's no point in talking about
+ virtual or physical addresses here, since the MMU will be off at
+ the time when you call the decompressor code. You normally call
+ the kernel at this address to start it booting. This doesn't have
+ to be located in RAM, it can be in flash or other read-only or
+ read-write addressable medium.
+
+ZBSSADDR
+ Start address of zero-initialised work area for the decompressor.
+ This must be pointing at RAM. The decompressor will zero initialise
+ this for you. Again, the MMU will be off.
+
+ZRELADDR
+ This is the address where the decompressed kernel will be written,
+ and eventually executed. The following constraint must be valid:
+
+ __virt_to_phys(TEXTADDR) == ZRELADDR
+
+ The initial part of the kernel is carefully coded to be position
+ independent.
+
+INITRD_PHYS
+ Physical address to place the initial RAM disk. Only relevant if
+ you are using the bootpImage stuff (which only works on the old
+ struct param_struct).
+
+INITRD_VIRT
+ Virtual address of the initial RAM disk. The following constraint
+ must be valid:
+
+ __virt_to_phys(INITRD_VIRT) == INITRD_PHYS
+
+PARAMS_PHYS
+ Physical address of the struct param_struct or tag list, giving the
+ kernel various parameters about its execution environment.
+
+
+Kernel Symbols
+--------------
+
+PHYS_OFFSET
+ Physical start address of the first bank of RAM.
+
+PAGE_OFFSET
+ Virtual start address of the first bank of RAM. During the kernel
+ boot phase, virtual address PAGE_OFFSET will be mapped to physical
+ address PHYS_OFFSET, along with any other mappings you supply.
+ This should be the same value as TASK_SIZE.
+
+TASK_SIZE
+ The maximum size of a user process in bytes. Since user space
+ always starts at zero, this is the maximum address that a user
+ process can access+1. The user space stack grows down from this
+ address.
+
+ Any virtual address below TASK_SIZE is deemed to be user process
+ area, and therefore managed dynamically on a process by process
+ basis by the kernel. I'll call this the user segment.
+
+ Anything above TASK_SIZE is common to all processes. I'll call
+ this the kernel segment.
+
+ (In other words, you can't put IO mappings below TASK_SIZE, and
+ hence PAGE_OFFSET).
+
+TEXTADDR
+ Virtual start address of kernel, normally PAGE_OFFSET + 0x8000.
+ This is where the kernel image ends up. With the latest kernels,
+ it must be located at 32768 bytes into a 128MB region. Previous
+ kernels placed a restriction of 256MB here.
+
+DATAADDR
+ Virtual address for the kernel data segment. Must not be defined
+ when using the decompressor.
+
+VMALLOC_START
+VMALLOC_END
+ Virtual addresses bounding the vmalloc() area. There must not be
+ any static mappings in this area; vmalloc will overwrite them.
+ The addresses must also be in the kernel segment (see above).
+ Normally, the vmalloc() area starts VMALLOC_OFFSET bytes above the
+ last virtual RAM address (found using variable high_memory).
+
+VMALLOC_OFFSET
+ Offset normally incorporated into VMALLOC_START to provide a hole
+ between virtual RAM and the vmalloc area. We do this to allow
+ out of bounds memory accesses (eg, something writing off the end
+ of the mapped memory map) to be caught. Normally set to 8MB.
+
+Architecture Specific Macros
+----------------------------
+
+BOOT_MEM(pram,pio,vio)
+ `pram' specifies the physical start address of RAM. Must always
+ be present, and should be the same as PHYS_OFFSET.
+
+ `pio' is the physical address of an 8MB region containing IO for
+ use with the debugging macros in arch/arm/kernel/debug-armv.S.
+
+ `vio' is the virtual address of the 8MB debugging region.
+
+ It is expected that the debugging region will be re-initialised
+ by the architecture specific code later in the code (via the
+ MAPIO function).
+
+BOOT_PARAMS
+ Same as, and see PARAMS_PHYS.
+
+FIXUP(func)
+ Machine specific fixups, run before memory subsystems have been
+ initialised.
+
+MAPIO(func)
+ Machine specific function to map IO areas (including the debug
+ region above).
+
+INITIRQ(func)
+ Machine specific function to initialise interrupts.
+
diff --git a/Documentation/arm/README b/Documentation/arm/README
new file mode 100644
index 000000000..9d1e5b2c9
--- /dev/null
+++ b/Documentation/arm/README
@@ -0,0 +1,204 @@
+ ARM Linux 2.6
+ =============
+
+ Please check <ftp://ftp.arm.linux.org.uk/pub/armlinux> for
+ updates.
+
+Compilation of kernel
+---------------------
+
+ In order to compile ARM Linux, you will need a compiler capable of
+ generating ARM ELF code with GNU extensions. GCC 3.3 is known to be
+ a good compiler. Fortunately, you needn't guess. The kernel will report
+ an error if your compiler is a recognized offender.
+
+ To build ARM Linux natively, you shouldn't have to alter the ARCH = line
+ in the top level Makefile. However, if you don't have the ARM Linux ELF
+ tools installed as default, then you should change the CROSS_COMPILE
+ line as detailed below.
+
+ If you wish to cross-compile, then alter the following lines in the top
+ level make file:
+
+ ARCH = <whatever>
+ with
+ ARCH = arm
+
+ and
+
+ CROSS_COMPILE=
+ to
+ CROSS_COMPILE=<your-path-to-your-compiler-without-gcc>
+ eg.
+ CROSS_COMPILE=arm-linux-
+
+ Do a 'make config', followed by 'make Image' to build the kernel
+ (arch/arm/boot/Image). A compressed image can be built by doing a
+ 'make zImage' instead of 'make Image'.
+
+
+Bug reports etc
+---------------
+
+ Please send patches to the patch system. For more information, see
+ http://www.arm.linux.org.uk/developer/patches/info.php Always include some
+ explanation as to what the patch does and why it is needed.
+
+ Bug reports should be sent to linux-arm-kernel@lists.arm.linux.org.uk,
+ or submitted through the web form at
+ http://www.arm.linux.org.uk/developer/
+
+ When sending bug reports, please ensure that they contain all relevant
+ information, eg. the kernel messages that were printed before/during
+ the problem, what you were doing, etc.
+
+
+Include files
+-------------
+
+ Several new include directories have been created under include/asm-arm,
+ which are there to reduce the clutter in the top-level directory. These
+ directories, and their purpose is listed below:
+
+ arch-* machine/platform specific header files
+ hardware driver-internal ARM specific data structures/definitions
+ mach descriptions of generic ARM to specific machine interfaces
+ proc-* processor dependent header files (currently only two
+ categories)
+
+
+Machine/Platform support
+------------------------
+
+ The ARM tree contains support for a lot of different machine types. To
+ continue supporting these differences, it has become necessary to split
+ machine-specific parts by directory. For this, the machine category is
+ used to select which directories and files get included (we will use
+ $(MACHINE) to refer to the category)
+
+ To this end, we now have arch/arm/mach-$(MACHINE) directories which are
+ designed to house the non-driver files for a particular machine (eg, PCI,
+ memory management, architecture definitions etc). For all future
+ machines, there should be a corresponding arch/arm/mach-$(MACHINE)/include/mach
+ directory.
+
+
+Modules
+-------
+
+ Although modularisation is supported (and required for the FP emulator),
+ each module on an ARM2/ARM250/ARM3 machine when is loaded will take
+ memory up to the next 32k boundary due to the size of the pages.
+ Therefore, is modularisation on these machines really worth it?
+
+ However, ARM6 and up machines allow modules to take multiples of 4k, and
+ as such Acorn RiscPCs and other architectures using these processors can
+ make good use of modularisation.
+
+
+ADFS Image files
+----------------
+
+ You can access image files on your ADFS partitions by mounting the ADFS
+ partition, and then using the loopback device driver. You must have
+ losetup installed.
+
+ Please note that the PCEmulator DOS partitions have a partition table at
+ the start, and as such, you will have to give '-o offset' to losetup.
+
+
+Request to developers
+---------------------
+
+ When writing device drivers which include a separate assembler file, please
+ include it in with the C file, and not the arch/arm/lib directory. This
+ allows the driver to be compiled as a loadable module without requiring
+ half the code to be compiled into the kernel image.
+
+ In general, try to avoid using assembler unless it is really necessary. It
+ makes drivers far less easy to port to other hardware.
+
+
+ST506 hard drives
+-----------------
+
+ The ST506 hard drive controllers seem to be working fine (if a little
+ slowly). At the moment they will only work off the controllers on an
+ A4x0's motherboard, but for it to work off a Podule just requires
+ someone with a podule to add the addresses for the IRQ mask and the
+ HDC base to the source.
+
+ As of 31/3/96 it works with two drives (you should get the ADFS
+ *configure harddrive set to 2). I've got an internal 20MB and a great
+ big external 5.25" FH 64MB drive (who could ever want more :-) ).
+
+ I've just got 240K/s off it (a dd with bs=128k); thats about half of what
+ RiscOS gets; but it's a heck of a lot better than the 50K/s I was getting
+ last week :-)
+
+ Known bug: Drive data errors can cause a hang; including cases where
+ the controller has fixed the error using ECC. (Possibly ONLY
+ in that case...hmm).
+
+
+1772 Floppy
+-----------
+ This also seems to work OK, but hasn't been stressed much lately. It
+ hasn't got any code for disc change detection in there at the moment which
+ could be a bit of a problem! Suggestions on the correct way to do this
+ are welcome.
+
+
+CONFIG_MACH_ and CONFIG_ARCH_
+-----------------------------
+ A change was made in 2003 to the macro names for new machines.
+ Historically, CONFIG_ARCH_ was used for the bonafide architecture,
+ e.g. SA1100, as well as implementations of the architecture,
+ e.g. Assabet. It was decided to change the implementation macros
+ to read CONFIG_MACH_ for clarity. Moreover, a retroactive fixup has
+ not been made because it would complicate patching.
+
+ Previous registrations may be found online.
+
+ <http://www.arm.linux.org.uk/developer/machines/>
+
+Kernel entry (head.S)
+--------------------------
+ The initial entry into the kernel is via head.S, which uses machine
+ independent code. The machine is selected by the value of 'r1' on
+ entry, which must be kept unique.
+
+ Due to the large number of machines which the ARM port of Linux provides
+ for, we have a method to manage this which ensures that we don't end up
+ duplicating large amounts of code.
+
+ We group machine (or platform) support code into machine classes. A
+ class typically based around one or more system on a chip devices, and
+ acts as a natural container around the actual implementations. These
+ classes are given directories - arch/arm/mach-<class> and
+ arch/arm/mach-<class> - which contain the source files to/include/mach
+ support the machine class. This directories also contain any machine
+ specific supporting code.
+
+ For example, the SA1100 class is based upon the SA1100 and SA1110 SoC
+ devices, and contains the code to support the way the on-board and off-
+ board devices are used, or the device is setup, and provides that
+ machine specific "personality."
+
+ For platforms that support device tree (DT), the machine selection is
+ controlled at runtime by passing the device tree blob to the kernel. At
+ compile-time, support for the machine type must be selected. This allows for
+ a single multiplatform kernel build to be used for several machine types.
+
+ For platforms that do not use device tree, this machine selection is
+ controlled by the machine type ID, which acts both as a run-time and a
+ compile-time code selection method. You can register a new machine via the
+ web site at:
+
+ <http://www.arm.linux.org.uk/developer/machines/>
+
+ Note: Please do not register a machine type for DT-only platforms. If your
+ platform is DT-only, you do not need a registered machine type.
+
+---
+Russell King (15/03/2004)
diff --git a/Documentation/arm/SA1100/ADSBitsy b/Documentation/arm/SA1100/ADSBitsy
new file mode 100644
index 000000000..f9f62e8c0
--- /dev/null
+++ b/Documentation/arm/SA1100/ADSBitsy
@@ -0,0 +1,43 @@
+ADS Bitsy Single Board Computer
+(It is different from Bitsy(iPAQ) of Compaq)
+
+For more details, contact Applied Data Systems or see
+http://www.applieddata.net/products.html
+
+The Linux support for this product has been provided by
+Woojung Huh <whuh@applieddata.net>
+
+Use 'make adsbitsy_config' before any 'make config'.
+This will set up defaults for ADS Bitsy support.
+
+The kernel zImage is linked to be loaded and executed at 0xc0400000.
+
+Linux can be used with the ADS BootLoader that ships with the
+newer rev boards. See their documentation on how to load Linux.
+
+Supported peripherals:
+- SA1100 LCD frame buffer (8/16bpp...sort of)
+- SA1111 USB Master
+- SA1100 serial port
+- pcmcia, compact flash
+- touchscreen(ucb1200)
+- console on LCD screen
+- serial ports (ttyS[0-2])
+ - ttyS0 is default for serial console
+
+To do:
+- everything else! :-)
+
+Notes:
+
+- The flash on board is divided into 3 partitions.
+ You should be careful to use flash on board.
+ Its partition is different from GraphicsClient Plus and GraphicsMaster
+
+- 16bpp mode requires a different cable than what ships with the board.
+ Contact ADS or look through the manual to wire your own. Currently,
+ if you compile with 16bit mode support and switch into a lower bpp
+ mode, the timing is off so the image is corrupted. This will be
+ fixed soon.
+
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/Assabet b/Documentation/arm/SA1100/Assabet
new file mode 100644
index 000000000..08b885d35
--- /dev/null
+++ b/Documentation/arm/SA1100/Assabet
@@ -0,0 +1,300 @@
+The Intel Assabet (SA-1110 evaluation) board
+============================================
+
+Please see:
+http://developer.intel.com
+
+Also some notes from John G Dorsey <jd5q@andrew.cmu.edu>:
+http://www.cs.cmu.edu/~wearable/software/assabet.html
+
+
+Building the kernel
+-------------------
+
+To build the kernel with current defaults:
+
+ make assabet_config
+ make oldconfig
+ make zImage
+
+The resulting kernel image should be available in linux/arch/arm/boot/zImage.
+
+
+Installing a bootloader
+-----------------------
+
+A couple of bootloaders able to boot Linux on Assabet are available:
+
+BLOB (http://www.lartmaker.nl/lartware/blob/)
+
+ BLOB is a bootloader used within the LART project. Some contributed
+ patches were merged into BLOB to add support for Assabet.
+
+Compaq's Bootldr + John Dorsey's patch for Assabet support
+(http://www.handhelds.org/Compaq/bootldr.html)
+(http://www.wearablegroup.org/software/bootldr/)
+
+ Bootldr is the bootloader developed by Compaq for the iPAQ Pocket PC.
+ John Dorsey has produced add-on patches to add support for Assabet and
+ the JFFS filesystem.
+
+RedBoot (http://sources.redhat.com/redboot/)
+
+ RedBoot is a bootloader developed by Red Hat based on the eCos RTOS
+ hardware abstraction layer. It supports Assabet amongst many other
+ hardware platforms.
+
+RedBoot is currently the recommended choice since it's the only one to have
+networking support, and is the most actively maintained.
+
+Brief examples on how to boot Linux with RedBoot are shown below. But first
+you need to have RedBoot installed in your flash memory. A known to work
+precompiled RedBoot binary is available from the following location:
+
+ftp://ftp.netwinder.org/users/n/nico/
+ftp://ftp.arm.linux.org.uk/pub/linux/arm/people/nico/
+ftp://ftp.handhelds.org/pub/linux/arm/sa-1100-patches/
+
+Look for redboot-assabet*.tgz. Some installation infos are provided in
+redboot-assabet*.txt.
+
+
+Initial RedBoot configuration
+-----------------------------
+
+The commands used here are explained in The RedBoot User's Guide available
+on-line at http://sources.redhat.com/ecos/docs.html.
+Please refer to it for explanations.
+
+If you have a CF network card (my Assabet kit contained a CF+ LP-E from
+Socket Communications Inc.), you should strongly consider using it for TFTP
+file transfers. You must insert it before RedBoot runs since it can't detect
+it dynamically.
+
+To initialize the flash directory:
+
+ fis init -f
+
+To initialize the non-volatile settings, like whether you want to use BOOTP or
+a static IP address, etc, use this command:
+
+ fconfig -i
+
+
+Writing a kernel image into flash
+---------------------------------
+
+First, the kernel image must be loaded into RAM. If you have the zImage file
+available on a TFTP server:
+
+ load zImage -r -b 0x100000
+
+If you rather want to use Y-Modem upload over the serial port:
+
+ load -m ymodem -r -b 0x100000
+
+To write it to flash:
+
+ fis create "Linux kernel" -b 0x100000 -l 0xc0000
+
+
+Booting the kernel
+------------------
+
+The kernel still requires a filesystem to boot. A ramdisk image can be loaded
+as follows:
+
+ load ramdisk_image.gz -r -b 0x800000
+
+Again, Y-Modem upload can be used instead of TFTP by replacing the file name
+by '-y ymodem'.
+
+Now the kernel can be retrieved from flash like this:
+
+ fis load "Linux kernel"
+
+or loaded as described previously. To boot the kernel:
+
+ exec -b 0x100000 -l 0xc0000
+
+The ramdisk image could be stored into flash as well, but there are better
+solutions for on-flash filesystems as mentioned below.
+
+
+Using JFFS2
+-----------
+
+Using JFFS2 (the Second Journalling Flash File System) is probably the most
+convenient way to store a writable filesystem into flash. JFFS2 is used in
+conjunction with the MTD layer which is responsible for low-level flash
+management. More information on the Linux MTD can be found on-line at:
+http://www.linux-mtd.infradead.org/. A JFFS howto with some infos about
+creating JFFS/JFFS2 images is available from the same site.
+
+For instance, a sample JFFS2 image can be retrieved from the same FTP sites
+mentioned below for the precompiled RedBoot image.
+
+To load this file:
+
+ load sample_img.jffs2 -r -b 0x100000
+
+The result should look like:
+
+RedBoot> load sample_img.jffs2 -r -b 0x100000
+Raw file loaded 0x00100000-0x00377424
+
+Now we must know the size of the unallocated flash:
+
+ fis free
+
+Result:
+
+RedBoot> fis free
+ 0x500E0000 .. 0x503C0000
+
+The values above may be different depending on the size of the filesystem and
+the type of flash. See their usage below as an example and take care of
+substituting yours appropriately.
+
+We must determine some values:
+
+size of unallocated flash: 0x503c0000 - 0x500e0000 = 0x2e0000
+size of the filesystem image: 0x00377424 - 0x00100000 = 0x277424
+
+We want to fit the filesystem image of course, but we also want to give it all
+the remaining flash space as well. To write it:
+
+ fis unlock -f 0x500E0000 -l 0x2e0000
+ fis erase -f 0x500E0000 -l 0x2e0000
+ fis write -b 0x100000 -l 0x277424 -f 0x500E0000
+ fis create "JFFS2" -n -f 0x500E0000 -l 0x2e0000
+
+Now the filesystem is associated to a MTD "partition" once Linux has discovered
+what they are in the boot process. From Redboot, the 'fis list' command
+displays them:
+
+RedBoot> fis list
+Name FLASH addr Mem addr Length Entry point
+RedBoot 0x50000000 0x50000000 0x00020000 0x00000000
+RedBoot config 0x503C0000 0x503C0000 0x00020000 0x00000000
+FIS directory 0x503E0000 0x503E0000 0x00020000 0x00000000
+Linux kernel 0x50020000 0x00100000 0x000C0000 0x00000000
+JFFS2 0x500E0000 0x500E0000 0x002E0000 0x00000000
+
+However Linux should display something like:
+
+SA1100 flash: probing 32-bit flash bus
+SA1100 flash: Found 2 x16 devices at 0x0 in 32-bit mode
+Using RedBoot partition definition
+Creating 5 MTD partitions on "SA1100 flash":
+0x00000000-0x00020000 : "RedBoot"
+0x00020000-0x000e0000 : "Linux kernel"
+0x000e0000-0x003c0000 : "JFFS2"
+0x003c0000-0x003e0000 : "RedBoot config"
+0x003e0000-0x00400000 : "FIS directory"
+
+What's important here is the position of the partition we are interested in,
+which is the third one. Within Linux, this correspond to /dev/mtdblock2.
+Therefore to boot Linux with the kernel and its root filesystem in flash, we
+need this RedBoot command:
+
+ fis load "Linux kernel"
+ exec -b 0x100000 -l 0xc0000 -c "root=/dev/mtdblock2"
+
+Of course other filesystems than JFFS might be used, like cramfs for example.
+You might want to boot with a root filesystem over NFS, etc. It is also
+possible, and sometimes more convenient, to flash a filesystem directly from
+within Linux while booted from a ramdisk or NFS. The Linux MTD repository has
+many tools to deal with flash memory as well, to erase it for example. JFFS2
+can then be mounted directly on a freshly erased partition and files can be
+copied over directly. Etc...
+
+
+RedBoot scripting
+-----------------
+
+All the commands above aren't so useful if they have to be typed in every
+time the Assabet is rebooted. Therefore it's possible to automatize the boot
+process using RedBoot's scripting capability.
+
+For example, I use this to boot Linux with both the kernel and the ramdisk
+images retrieved from a TFTP server on the network:
+
+RedBoot> fconfig
+Run script at boot: false true
+Boot script:
+Enter script, terminate with empty line
+>> load zImage -r -b 0x100000
+>> load ramdisk_ks.gz -r -b 0x800000
+>> exec -b 0x100000 -l 0xc0000
+>>
+Boot script timeout (1000ms resolution): 3
+Use BOOTP for network configuration: true
+GDB connection port: 9000
+Network debug at boot time: false
+Update RedBoot non-volatile configuration - are you sure (y/n)? y
+
+Then, rebooting the Assabet is just a matter of waiting for the login prompt.
+
+
+
+Nicolas Pitre
+nico@fluxnic.net
+June 12, 2001
+
+
+Status of peripherals in -rmk tree (updated 14/10/2001)
+-------------------------------------------------------
+
+Assabet:
+ Serial ports:
+ Radio: TX, RX, CTS, DSR, DCD, RI
+ PM: Not tested.
+ COM: TX, RX, CTS, DSR, DCD, RTS, DTR, PM
+ PM: Not tested.
+ I2C: Implemented, not fully tested.
+ L3: Fully tested, pass.
+ PM: Not tested.
+
+ Video:
+ LCD: Fully tested. PM
+ (LCD doesn't like being blanked with
+ neponset connected)
+ Video out: Not fully
+
+ Audio:
+ UDA1341:
+ Playback: Fully tested, pass.
+ Record: Implemented, not tested.
+ PM: Not tested.
+
+ UCB1200:
+ Audio play: Implemented, not heavily tested.
+ Audio rec: Implemented, not heavily tested.
+ Telco audio play: Implemented, not heavily tested.
+ Telco audio rec: Implemented, not heavily tested.
+ POTS control: No
+ Touchscreen: Yes
+ PM: Not tested.
+
+ Other:
+ PCMCIA:
+ LPE: Fully tested, pass.
+ USB: No
+ IRDA:
+ SIR: Fully tested, pass.
+ FIR: Fully tested, pass.
+ PM: Not tested.
+
+Neponset:
+ Serial ports:
+ COM1,2: TX, RX, CTS, DSR, DCD, RTS, DTR
+ PM: Not tested.
+ USB: Implemented, not heavily tested.
+ PCMCIA: Implemented, not heavily tested.
+ PM: Not tested.
+ CF: Implemented, not heavily tested.
+ PM: Not tested.
+
+More stuff can be found in the -np (Nicolas Pitre's) tree.
+
diff --git a/Documentation/arm/SA1100/Brutus b/Documentation/arm/SA1100/Brutus
new file mode 100644
index 000000000..6a3aa95e9
--- /dev/null
+++ b/Documentation/arm/SA1100/Brutus
@@ -0,0 +1,66 @@
+Brutus is an evaluation platform for the SA1100 manufactured by Intel.
+For more details, see:
+
+http://developer.intel.com
+
+To compile for Brutus, you must issue the following commands:
+
+ make brutus_config
+ make config
+ [accept all the defaults]
+ make zImage
+
+The resulting kernel will end up in linux/arch/arm/boot/zImage. This file
+must be loaded at 0xc0008000 in Brutus's memory and execution started at
+0xc0008000 as well with the value of registers r0 = 0 and r1 = 16 upon
+entry.
+
+But prior to execute the kernel, a ramdisk image must also be loaded in
+memory. Use memory address 0xd8000000 for this. Note that the file
+containing the (compressed) ramdisk image must not exceed 4 MB.
+
+Typically, you'll need angelboot to load the kernel.
+The following angelboot.opt file should be used:
+
+----- begin angelboot.opt -----
+base 0xc0008000
+entry 0xc0008000
+r0 0x00000000
+r1 0x00000010
+device /dev/ttyS0
+options "9600 8N1"
+baud 115200
+otherfile ramdisk_img.gz
+otherbase 0xd8000000
+----- end angelboot.opt -----
+
+Then load the kernel and ramdisk with:
+
+ angelboot -f angelboot.opt zImage
+
+The first Brutus serial port (assumed to be linked to /dev/ttyS0 on your
+host PC) is used by angel to load the kernel and ramdisk image. The serial
+console is provided through the second Brutus serial port. To access it,
+you may use minicom configured with /dev/ttyS1, 9600 baud, 8N1, no flow
+control.
+
+Currently supported:
+ - RS232 serial ports
+ - audio output
+ - LCD screen
+ - keyboard
+
+The actual Brutus support may not be complete without extra patches.
+If such patches exist, they should be found from
+ftp.netwinder.org/users/n/nico.
+
+A full PCMCIA support is still missing, although it's possible to hack
+some drivers in order to drive already inserted cards at boot time with
+little modifications.
+
+Any contribution is welcome.
+
+Please send patches to nico@fluxnic.net
+
+Have Fun !
+
diff --git a/Documentation/arm/SA1100/CERF b/Documentation/arm/SA1100/CERF
new file mode 100644
index 000000000..b3d845301
--- /dev/null
+++ b/Documentation/arm/SA1100/CERF
@@ -0,0 +1,29 @@
+*** The StrongARM version of the CerfBoard/Cube has been discontinued ***
+
+The Intrinsyc CerfBoard is a StrongARM 1110-based computer on a board
+that measures approximately 2" square. It includes an Ethernet
+controller, an RS232-compatible serial port, a USB function port, and
+one CompactFlash+ slot on the back. Pictures can be found at the
+Intrinsyc website, http://www.intrinsyc.com.
+
+This document describes the support in the Linux kernel for the
+Intrinsyc CerfBoard.
+
+Supported in this version:
+ - CompactFlash+ slot (select PCMCIA in General Setup and any options
+ that may be required)
+ - Onboard Crystal CS8900 Ethernet controller (Cerf CS8900A support in
+ Network Devices)
+ - Serial ports with a serial console (hardcoded to 38400 8N1)
+
+In order to get this kernel onto your Cerf, you need a server that runs
+both BOOTP and TFTP. Detailed instructions should have come with your
+evaluation kit on how to use the bootloader. This series of commands
+will suffice:
+
+ make ARCH=arm CROSS_COMPILE=arm-linux- cerfcube_defconfig
+ make ARCH=arm CROSS_COMPILE=arm-linux- zImage
+ make ARCH=arm CROSS_COMPILE=arm-linux- modules
+ cp arch/arm/boot/zImage <TFTP directory>
+
+support@intrinsyc.com
diff --git a/Documentation/arm/SA1100/FreeBird b/Documentation/arm/SA1100/FreeBird
new file mode 100644
index 000000000..ab9193663
--- /dev/null
+++ b/Documentation/arm/SA1100/FreeBird
@@ -0,0 +1,21 @@
+Freebird-1.1 is produced by Legend(C), Inc.
+http://web.archive.org/web/*/http://www.legend.com.cn
+and software/linux maintained by Coventive(C), Inc.
+(http://www.coventive.com)
+
+Based on the Nicolas's strongarm kernel tree.
+
+===============================================================
+Maintainer:
+
+Chester Kuo <chester@coventive.com>
+ <chester@linux.org.tw>
+
+Author :
+Tim wu <timwu@coventive.com>
+CIH <cih@coventive.com>
+Eric Peng <ericpeng@coventive.com>
+Jeff Lee <jeff_lee@coventive.com>
+Allen Cheng
+Tony Liu <tonyliu@coventive.com>
+
diff --git a/Documentation/arm/SA1100/GraphicsClient b/Documentation/arm/SA1100/GraphicsClient
new file mode 100644
index 000000000..867bb3594
--- /dev/null
+++ b/Documentation/arm/SA1100/GraphicsClient
@@ -0,0 +1,98 @@
+ADS GraphicsClient Plus Single Board Computer
+
+For more details, contact Applied Data Systems or see
+http://www.applieddata.net/products.html
+
+The original Linux support for this product has been provided by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
+Woojung Huh <whuh@applieddata.net>
+
+It's currently possible to mount a root filesystem via NFS providing a
+complete Linux environment. Otherwise a ramdisk image may be used. The
+board supports MTD/JFFS, so you could also mount something on there.
+
+Use 'make graphicsclient_config' before any 'make config'. This will set up
+defaults for GraphicsClient Plus support.
+
+The kernel zImage is linked to be loaded and executed at 0xc0200000.
+Also the following registers should have the specified values upon entry:
+
+ r0 = 0
+ r1 = 29 (this is the GraphicsClient architecture number)
+
+Linux can be used with the ADS BootLoader that ships with the
+newer rev boards. See their documentation on how to load Linux.
+Angel is not available for the GraphicsClient Plus AFAIK.
+
+There is a board known as just the GraphicsClient that ADS used to
+produce but has end of lifed. This code will not work on the older
+board with the ADS bootloader, but should still work with Angel,
+as outlined below. In any case, if you're planning on deploying
+something en masse, you should probably get the newer board.
+
+If using Angel on the older boards, here is a typical angel.opt option file
+if the kernel is loaded through the Angel Debug Monitor:
+
+----- begin angelboot.opt -----
+base 0xc0200000
+entry 0xc0200000
+r0 0x00000000
+r1 0x0000001d
+device /dev/ttyS1
+options "38400 8N1"
+baud 115200
+#otherfile ramdisk.gz
+#otherbase 0xc0800000
+exec minicom
+----- end angelboot.opt -----
+
+Then the kernel (and ramdisk if otherfile/otherbase lines above are
+uncommented) would be loaded with:
+
+ angelboot -f angelboot.opt zImage
+
+Here it is assumed that the board is connected to ttyS1 on your PC
+and that minicom is preconfigured with /dev/ttyS1, 38400 baud, 8N1, no flow
+control by default.
+
+If any other bootloader is used, ensure it accomplish the same, especially
+for r0/r1 register values before jumping into the kernel.
+
+
+Supported peripherals:
+- SA1100 LCD frame buffer (8/16bpp...sort of)
+- on-board SMC 92C96 ethernet NIC
+- SA1100 serial port
+- flash memory access (MTD/JFFS)
+- pcmcia
+- touchscreen(ucb1200)
+- ps/2 keyboard
+- console on LCD screen
+- serial ports (ttyS[0-2])
+ - ttyS0 is default for serial console
+- Smart I/O (ADC, keypad, digital inputs, etc)
+ See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
+ and example user space code. ps/2 keybd is multiplexed through this driver
+
+To do:
+- UCB1200 audio with new ucb_generic layer
+- everything else! :-)
+
+Notes:
+
+- The flash on board is divided into 3 partitions. mtd0 is where
+ the ADS boot ROM and zImage is stored. It's been marked as
+ read-only to keep you from blasting over the bootloader. :) mtd1 is
+ for the ramdisk.gz image. mtd2 is user flash space and can be
+ utilized for either JFFS or if you're feeling crazy, running ext2
+ on top of it. If you're not using the ADS bootloader, you're
+ welcome to blast over the mtd1 partition also.
+
+- 16bpp mode requires a different cable than what ships with the board.
+ Contact ADS or look through the manual to wire your own. Currently,
+ if you compile with 16bit mode support and switch into a lower bpp
+ mode, the timing is off so the image is corrupted. This will be
+ fixed soon.
+
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
+
diff --git a/Documentation/arm/SA1100/GraphicsMaster b/Documentation/arm/SA1100/GraphicsMaster
new file mode 100644
index 000000000..9145088a0
--- /dev/null
+++ b/Documentation/arm/SA1100/GraphicsMaster
@@ -0,0 +1,53 @@
+ADS GraphicsMaster Single Board Computer
+
+For more details, contact Applied Data Systems or see
+http://www.applieddata.net/products.html
+
+The original Linux support for this product has been provided by
+Nicolas Pitre <nico@fluxnic.net>. Continued development work by
+Woojung Huh <whuh@applieddata.net>
+
+Use 'make graphicsmaster_config' before any 'make config'.
+This will set up defaults for GraphicsMaster support.
+
+The kernel zImage is linked to be loaded and executed at 0xc0400000.
+
+Linux can be used with the ADS BootLoader that ships with the
+newer rev boards. See their documentation on how to load Linux.
+
+Supported peripherals:
+- SA1100 LCD frame buffer (8/16bpp...sort of)
+- SA1111 USB Master
+- on-board SMC 92C96 ethernet NIC
+- SA1100 serial port
+- flash memory access (MTD/JFFS)
+- pcmcia, compact flash
+- touchscreen(ucb1200)
+- ps/2 keyboard
+- console on LCD screen
+- serial ports (ttyS[0-2])
+ - ttyS0 is default for serial console
+- Smart I/O (ADC, keypad, digital inputs, etc)
+ See http://www.eurotech-inc.com/linux-sbc.asp for IOCTL documentation
+ and example user space code. ps/2 keybd is multiplexed through this driver
+
+To do:
+- everything else! :-)
+
+Notes:
+
+- The flash on board is divided into 3 partitions. mtd0 is where
+ the zImage is stored. It's been marked as read-only to keep you
+ from blasting over the bootloader. :) mtd1 is
+ for the ramdisk.gz image. mtd2 is user flash space and can be
+ utilized for either JFFS or if you're feeling crazy, running ext2
+ on top of it. If you're not using the ADS bootloader, you're
+ welcome to blast over the mtd1 partition also.
+
+- 16bpp mode requires a different cable than what ships with the board.
+ Contact ADS or look through the manual to wire your own. Currently,
+ if you compile with 16bit mode support and switch into a lower bpp
+ mode, the timing is off so the image is corrupted. This will be
+ fixed soon.
+
+Any contribution can be sent to nico@fluxnic.net and will be greatly welcome!
diff --git a/Documentation/arm/SA1100/HUW_WEBPANEL b/Documentation/arm/SA1100/HUW_WEBPANEL
new file mode 100644
index 000000000..fd56b48d4
--- /dev/null
+++ b/Documentation/arm/SA1100/HUW_WEBPANEL
@@ -0,0 +1,17 @@
+The HUW_WEBPANEL is a product of the german company Hoeft & Wessel AG
+
+If you want more information, please visit
+http://www.hoeft-wessel.de
+
+To build the kernel:
+ make huw_webpanel_config
+ make oldconfig
+ [accept all defaults]
+ make zImage
+
+Mostly of the work is done by:
+Roman Jordan jor@hoeft-wessel.de
+Christoph Schulz schu@hoeft-wessel.de
+
+2000/12/18/
+
diff --git a/Documentation/arm/SA1100/Itsy b/Documentation/arm/SA1100/Itsy
new file mode 100644
index 000000000..44b94997f
--- /dev/null
+++ b/Documentation/arm/SA1100/Itsy
@@ -0,0 +1,39 @@
+Itsy is a research project done by the Western Research Lab, and Systems
+Research Center in Palo Alto, CA. The Itsy project is one of several
+research projects at Compaq that are related to pocket computing.
+
+For more information, see:
+
+ http://www.hpl.hp.com/downloads/crl/itsy/
+
+Notes on initial 2.4 Itsy support (8/27/2000) :
+The port was done on an Itsy version 1.5 machine with a daughtercard with
+64 Meg of DRAM and 32 Meg of Flash. The initial work includes support for
+serial console (to see what you're doing). No other devices have been
+enabled.
+
+To build, do a "make menuconfig" (or xmenuconfig) and select Itsy support.
+Disable Flash and LCD support. and then do a make zImage.
+Finally, you will need to cd to arch/arm/boot/tools and execute a make there
+to build the params-itsy program used to boot the kernel.
+
+In order to install the port of 2.4 to the itsy, You will need to set the
+configuration parameters in the monitor as follows:
+Arg 1:0x08340000, Arg2: 0xC0000000, Arg3:18 (0x12), Arg4:0
+Make sure the start-routine address is set to 0x00060000.
+
+Next, flash the params-itsy program to 0x00060000 ("p 1 0x00060000" in the
+flash menu) Flash the kernel in arch/arm/boot/zImage into 0x08340000
+("p 1 0x00340000"). Finally flash an initial ramdisk into 0xC8000000
+("p 2 0x0") We used ramdisk-2-30.gz from the 0.11 version directory on
+handhelds.org.
+
+The serial connection we established was at:
+ 8-bit data, no parity, 1 stop bit(s), 115200.00 b/s. in the monitor, in the
+params-itsy program, and in the kernel itself. This can be changed, but
+not easily. The monitor parameters are easily changed, the params program
+setup is assembly outl's, and the kernel is a configuration item specific to
+the itsy. (i.e. grep for CONFIG_SA1100_ITSY and you'll find where it is.)
+
+
+This should get you a properly booting 2.4 kernel on the itsy.
diff --git a/Documentation/arm/SA1100/LART b/Documentation/arm/SA1100/LART
new file mode 100644
index 000000000..6d412b685
--- /dev/null
+++ b/Documentation/arm/SA1100/LART
@@ -0,0 +1,14 @@
+Linux Advanced Radio Terminal (LART)
+------------------------------------
+
+The LART is a small (7.5 x 10cm) SA-1100 board, designed for embedded
+applications. It has 32 MB DRAM, 4MB Flash ROM, double RS232 and all
+other StrongARM-gadgets. Almost all SA signals are directly accessible
+through a number of connectors. The powersupply accepts voltages
+between 3.5V and 16V and is overdimensioned to support a range of
+daughterboards. A quad Ethernet / IDE / PS2 / sound daughterboard
+is under development, with plenty of others in different stages of
+planning.
+
+The hardware designs for this board have been released under an open license;
+see the LART page at http://www.lartmaker.nl/ for more information.
diff --git a/Documentation/arm/SA1100/PLEB b/Documentation/arm/SA1100/PLEB
new file mode 100644
index 000000000..b9c8a631a
--- /dev/null
+++ b/Documentation/arm/SA1100/PLEB
@@ -0,0 +1,11 @@
+The PLEB project was started as a student initiative at the School of
+Computer Science and Engineering, University of New South Wales to make a
+pocket computer capable of running the Linux Kernel.
+
+PLEB support has yet to be fully integrated.
+
+For more information, see:
+
+ http://www.cse.unsw.edu.au
+
+
diff --git a/Documentation/arm/SA1100/Pangolin b/Documentation/arm/SA1100/Pangolin
new file mode 100644
index 000000000..077a6120e
--- /dev/null
+++ b/Documentation/arm/SA1100/Pangolin
@@ -0,0 +1,23 @@
+Pangolin is a StrongARM 1110-based evaluation platform produced
+by Dialogue Technology (http://www.dialogue.com.tw/).
+It has EISA slots for ease of configuration with SDRAM/Flash
+memory card, USB/Serial/Audio card, Compact Flash card,
+PCMCIA/IDE card and TFT-LCD card.
+
+To compile for Pangolin, you must issue the following commands:
+
+ make pangolin_config
+ make oldconfig
+ make zImage
+
+Supported peripherals:
+- SA1110 serial port (UART1/UART2/UART3)
+- flash memory access
+- compact flash driver
+- UDA1341 sound driver
+- SA1100 LCD controller for 800x600 16bpp TFT-LCD
+- MQ-200 driver for 800x600 16bpp TFT-LCD
+- Penmount(touch panel) driver
+- PCMCIA driver
+- SMC91C94 LAN driver
+- IDE driver (experimental)
diff --git a/Documentation/arm/SA1100/Tifon b/Documentation/arm/SA1100/Tifon
new file mode 100644
index 000000000..dd1934d9c
--- /dev/null
+++ b/Documentation/arm/SA1100/Tifon
@@ -0,0 +1,7 @@
+Tifon
+-----
+
+More info has to come...
+
+Contact: Peter Danielsson <peter.danielsson@era-t.ericsson.se>
+
diff --git a/Documentation/arm/SA1100/Victor b/Documentation/arm/SA1100/Victor
new file mode 100644
index 000000000..9cff415da
--- /dev/null
+++ b/Documentation/arm/SA1100/Victor
@@ -0,0 +1,16 @@
+Victor is known as a "digital talking book player" manufactured by
+VisuAide, Inc. to be used by blind people.
+
+For more information related to Victor, see:
+
+ http://www.humanware.com/en-usa/products
+
+Of course Victor is using Linux as its main operating system.
+The Victor implementation for Linux is maintained by Nicolas Pitre:
+
+ nico@visuaide.com
+ nico@fluxnic.net
+
+For any comments, please feel free to contact me through the above
+addresses.
+
diff --git a/Documentation/arm/SA1100/Yopy b/Documentation/arm/SA1100/Yopy
new file mode 100644
index 000000000..e14f16d83
--- /dev/null
+++ b/Documentation/arm/SA1100/Yopy
@@ -0,0 +1,2 @@
+See http://www.yopydeveloper.org for more.
+
diff --git a/Documentation/arm/SA1100/empeg b/Documentation/arm/SA1100/empeg
new file mode 100644
index 000000000..4ece4849a
--- /dev/null
+++ b/Documentation/arm/SA1100/empeg
@@ -0,0 +1,2 @@
+See ../empeg/README
+
diff --git a/Documentation/arm/SA1100/nanoEngine b/Documentation/arm/SA1100/nanoEngine
new file mode 100644
index 000000000..48a7934f9
--- /dev/null
+++ b/Documentation/arm/SA1100/nanoEngine
@@ -0,0 +1,11 @@
+nanoEngine
+----------
+
+"nanoEngine" is a SA1110 based single board computer from
+Bright Star Engineering Inc. See www.brightstareng.com/arm
+for more info.
+(Ref: Stuart Adams <sja@brightstareng.com>)
+
+Also visit Larry Doolittle's "Linux for the nanoEngine" site:
+http://www.brightstareng.com/arm/nanoeng.htm
+
diff --git a/Documentation/arm/SA1100/serial_UART b/Documentation/arm/SA1100/serial_UART
new file mode 100644
index 000000000..a63966f1d
--- /dev/null
+++ b/Documentation/arm/SA1100/serial_UART
@@ -0,0 +1,47 @@
+The SA1100 serial port had its major/minor numbers officially assigned:
+
+> Date: Sun, 24 Sep 2000 21:40:27 -0700
+> From: H. Peter Anvin <hpa@transmeta.com>
+> To: Nicolas Pitre <nico@CAM.ORG>
+> Cc: Device List Maintainer <device@lanana.org>
+> Subject: Re: device
+>
+> Okay. Note that device numbers 204 and 205 are used for "low density
+> serial devices", so you will have a range of minors on those majors (the
+> tty device layer handles this just fine, so you don't have to worry about
+> doing anything special.)
+>
+> So your assignments are:
+>
+> 204 char Low-density serial ports
+> 5 = /dev/ttySA0 SA1100 builtin serial port 0
+> 6 = /dev/ttySA1 SA1100 builtin serial port 1
+> 7 = /dev/ttySA2 SA1100 builtin serial port 2
+>
+> 205 char Low-density serial ports (alternate device)
+> 5 = /dev/cusa0 Callout device for ttySA0
+> 6 = /dev/cusa1 Callout device for ttySA1
+> 7 = /dev/cusa2 Callout device for ttySA2
+>
+
+You must create those inodes in /dev on the root filesystem used
+by your SA1100-based device:
+
+ mknod ttySA0 c 204 5
+ mknod ttySA1 c 204 6
+ mknod ttySA2 c 204 7
+ mknod cusa0 c 205 5
+ mknod cusa1 c 205 6
+ mknod cusa2 c 205 7
+
+In addition to the creation of the appropriate device nodes above, you
+must ensure your user space applications make use of the correct device
+name. The classic example is the content of the /etc/inittab file where
+you might have a getty process started on ttyS0. In this case:
+
+- replace occurrences of ttyS0 with ttySA0, ttyS1 with ttySA1, etc.
+
+- don't forget to add 'ttySA0', 'console', or the appropriate tty name
+ in /etc/securetty for root to be allowed to login as well.
+
+
diff --git a/Documentation/arm/SH-Mobile/.gitignore b/Documentation/arm/SH-Mobile/.gitignore
new file mode 100644
index 000000000..c928dbf3c
--- /dev/null
+++ b/Documentation/arm/SH-Mobile/.gitignore
@@ -0,0 +1 @@
+vrl4
diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/SPEAr/overview.txt
new file mode 100644
index 000000000..65610bf52
--- /dev/null
+++ b/Documentation/arm/SPEAr/overview.txt
@@ -0,0 +1,63 @@
+ SPEAr ARM Linux Overview
+ ==========================
+
+Introduction
+------------
+
+ SPEAr (Structured Processor Enhanced Architecture).
+ weblink : http://www.st.com/spear
+
+ The ST Microelectronics SPEAr range of ARM9/CortexA9 System-on-Chip CPUs are
+ supported by the 'spear' platform of ARM Linux. Currently SPEAr1310,
+ SPEAr1340, SPEAr300, SPEAr310, SPEAr320 and SPEAr600 SOCs are supported.
+
+ Hierarchy in SPEAr is as follows:
+
+ SPEAr (Platform)
+ - SPEAr3XX (3XX SOC series, based on ARM9)
+ - SPEAr300 (SOC)
+ - SPEAr300 Evaluation Board
+ - SPEAr310 (SOC)
+ - SPEAr310 Evaluation Board
+ - SPEAr320 (SOC)
+ - SPEAr320 Evaluation Board
+ - SPEAr6XX (6XX SOC series, based on ARM9)
+ - SPEAr600 (SOC)
+ - SPEAr600 Evaluation Board
+ - SPEAr13XX (13XX SOC series, based on ARM CORTEXA9)
+ - SPEAr1310 (SOC)
+ - SPEAr1310 Evaluation Board
+ - SPEAr1340 (SOC)
+ - SPEAr1340 Evaluation Board
+
+ Configuration
+ -------------
+
+ A generic configuration is provided for each machine, and can be used as the
+ default by
+ make spear13xx_defconfig
+ make spear3xx_defconfig
+ make spear6xx_defconfig
+
+ Layout
+ ------
+
+ The common files for multiple machine families (SPEAr3xx, SPEAr6xx and
+ SPEAr13xx) are located in the platform code contained in arch/arm/plat-spear
+ with headers in plat/.
+
+ Each machine series have a directory with name arch/arm/mach-spear followed by
+ series name. Like mach-spear3xx, mach-spear6xx and mach-spear13xx.
+
+ Common file for machines of spear3xx family is mach-spear3xx/spear3xx.c, for
+ spear6xx is mach-spear6xx/spear6xx.c and for spear13xx family is
+ mach-spear13xx/spear13xx.c. mach-spear* also contain soc/machine specific
+ files, like spear1310.c, spear1340.c spear300.c, spear310.c, spear320.c and
+ spear600.c. mach-spear* doesn't contains board specific files as they fully
+ support Flattened Device Tree.
+
+
+ Document Author
+ ---------------
+
+ Viresh Kumar <viresh.linux@gmail.com>, (c) 2010-2012 ST Microelectronics
diff --git a/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
new file mode 100644
index 000000000..fa968aa99
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/CPUfreq.txt
@@ -0,0 +1,75 @@
+ S3C24XX CPUfreq support
+ =======================
+
+Introduction
+------------
+
+ The S3C24XX series support a number of power saving systems, such as
+ the ability to change the core, memory and peripheral operating
+ frequencies. The core control is exported via the CPUFreq driver
+ which has a number of different manual or automatic controls over the
+ rate the core is running at.
+
+ There are two forms of the driver depending on the specific CPU and
+ how the clocks are arranged. The first implementation used as single
+ PLL to feed the ARM, memory and peripherals via a series of dividers
+ and muxes and this is the implementation that is documented here. A
+ newer version where there is a separate PLL and clock divider for the
+ ARM core is available as a separate driver.
+
+
+Layout
+------
+
+ The code core manages the CPU specific drivers, any data that they
+ need to register and the interface to the generic drivers/cpufreq
+ system. Each CPU registers a driver to control the PLL, clock dividers
+ and anything else associated with it. Any board that wants to use this
+ framework needs to supply at least basic details of what is required.
+
+ The core registers with drivers/cpufreq at init time if all the data
+ necessary has been supplied.
+
+
+CPU support
+-----------
+
+ The support for each CPU depends on the facilities provided by the
+ SoC and the driver as each device has different PLL and clock chains
+ associated with it.
+
+
+Slow Mode
+---------
+
+ The SLOW mode where the PLL is turned off altogether and the
+ system is fed by the external crystal input is currently not
+ supported.
+
+
+sysfs
+-----
+
+ The core code exports extra information via sysfs in the directory
+ devices/system/cpu/cpu0/arch-freq.
+
+
+Board Support
+-------------
+
+ Each board that wants to use the cpufreq code must register some basic
+ information with the core driver to provide information about what the
+ board requires and any restrictions being placed on it.
+
+ The board needs to supply information about whether it needs the IO bank
+ timings changing, any maximum frequency limits and information about the
+ SDRAM refresh rate.
+
+
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2009 Simtec Electronics
+Licensed under GPLv2
diff --git a/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt b/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
new file mode 100644
index 000000000..b87292e05
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/EB2410ITX.txt
@@ -0,0 +1,58 @@
+ Simtec Electronics EB2410ITX (BAST)
+ ===================================
+
+ http://www.simtec.co.uk/products/EB2410ITX/
+
+Introduction
+------------
+
+ The EB2410ITX is a S3C2410 based development board with a variety of
+ peripherals and expansion connectors. This board is also known by
+ the shortened name of Bast.
+
+
+Configuration
+-------------
+
+ To set the default configuration, use `make bast_defconfig` which
+ supports the commonly used features of this board.
+
+
+Support
+-------
+
+ Official support information can be found on the Simtec Electronics
+ website, at the product page http://www.simtec.co.uk/products/EB2410ITX/
+
+ Useful links:
+
+ - Resources Page http://www.simtec.co.uk/products/EB2410ITX/resources.html
+
+ - Board FAQ at http://www.simtec.co.uk/products/EB2410ITX/faq.html
+
+ - Bootloader info http://www.simtec.co.uk/products/SWABLE/resources.html
+ and FAQ http://www.simtec.co.uk/products/SWABLE/faq.html
+
+
+MTD
+---
+
+ The NAND and NOR support has been merged from the linux-mtd project.
+ Any problems, see http://www.linux-mtd.infradead.org/ for more
+ information or up-to-date versions of linux-mtd.
+
+
+IDE
+---
+
+ Both onboard IDE ports are supported, however there is no support for
+ changing speed of devices, PIO Mode 4 capable drives should be used.
+
+
+Maintainers
+-----------
+
+ This board is maintained by Simtec Electronics.
+
+
+Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/GPIO.txt b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
new file mode 100644
index 000000000..0ebd7e224
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/GPIO.txt
@@ -0,0 +1,171 @@
+ S3C24XX GPIO Control
+ ====================
+
+Introduction
+------------
+
+ The s3c2410 kernel provides an interface to configure and
+ manipulate the state of the GPIO pins, and find out other
+ information about them.
+
+ There are a number of conditions attached to the configuration
+ of the s3c2410 GPIO system, please read the Samsung provided
+ data-sheet/users manual to find out the complete list.
+
+ See Documentation/arm/Samsung/GPIO.txt for the core implementation.
+
+
+GPIOLIB
+-------
+
+ With the event of the GPIOLIB in drivers/gpio, support for some
+ of the GPIO functions such as reading and writing a pin will
+ be removed in favour of this common access method.
+
+ Once all the extant drivers have been converted, the functions
+ listed below will be removed (they may be marked as __deprecated
+ in the near future).
+
+ The following functions now either have a s3c_ specific variant
+ or are merged into gpiolib. See the definitions in
+ arch/arm/plat-samsung/include/plat/gpio-cfg.h:
+
+ s3c2410_gpio_setpin() gpio_set_value() or gpio_direction_output()
+ s3c2410_gpio_getpin() gpio_get_value() or gpio_direction_input()
+ s3c2410_gpio_getirq() gpio_to_irq()
+ s3c2410_gpio_cfgpin() s3c_gpio_cfgpin()
+ s3c2410_gpio_getcfg() s3c_gpio_getcfg()
+ s3c2410_gpio_pullup() s3c_gpio_setpull()
+
+
+GPIOLIB conversion
+------------------
+
+If you need to convert your board or driver to use gpiolib from the phased
+out s3c2410 API, then here are some notes on the process.
+
+1) If your board is exclusively using an GPIO, say to control peripheral
+ power, then it will require to claim the gpio with gpio_request() before
+ it can use it.
+
+ It is recommended to check the return value, with at least WARN_ON()
+ during initialisation.
+
+2) The s3c2410_gpio_cfgpin() can be directly replaced with s3c_gpio_cfgpin()
+ as they have the same arguments, and can either take the pin specific
+ values, or the more generic special-function-number arguments.
+
+3) s3c2410_gpio_pullup() changes have the problem that whilst the
+ s3c2410_gpio_pullup(x, 1) can be easily translated to the
+ s3c_gpio_setpull(x, S3C_GPIO_PULL_NONE), the s3c2410_gpio_pullup(x, 0)
+ are not so easy.
+
+ The s3c2410_gpio_pullup(x, 0) case enables the pull-up (or in the case
+ of some of the devices, a pull-down) and as such the new API distinguishes
+ between the UP and DOWN case. There is currently no 'just turn on' setting
+ which may be required if this becomes a problem.
+
+4) s3c2410_gpio_setpin() can be replaced by gpio_set_value(), the old call
+ does not implicitly configure the relevant gpio to output. The gpio
+ direction should be changed before using gpio_set_value().
+
+5) s3c2410_gpio_getpin() is replaceable by gpio_get_value() if the pin
+ has been set to input. It is currently unknown what the behaviour is
+ when using gpio_get_value() on an output pin (s3c2410_gpio_getpin
+ would return the value the pin is supposed to be outputting).
+
+6) s3c2410_gpio_getirq() should be directly replaceable with the
+ gpio_to_irq() call.
+
+The s3c2410_gpio and gpio_ calls have always operated on the same gpio
+numberspace, so there is no problem with converting the gpio numbering
+between the calls.
+
+
+Headers
+-------
+
+ See arch/arm/mach-s3c24xx/include/mach/regs-gpio.h for the list
+ of GPIO pins, and the configuration values for them. This
+ is included by using #include <mach/regs-gpio.h>
+
+
+PIN Numbers
+-----------
+
+ Each pin has an unique number associated with it in regs-gpio.h,
+ e.g. S3C2410_GPA(0) or S3C2410_GPF(1). These defines are used to tell
+ the GPIO functions which pin is to be used.
+
+ With the conversion to gpiolib, there is no longer a direct conversion
+ from gpio pin number to register base address as in earlier kernels. This
+ is due to the number space required for newer SoCs where the later
+ GPIOs are not contiguous.
+
+
+Configuring a pin
+-----------------
+
+ The following function allows the configuration of a given pin to
+ be changed.
+
+ void s3c_gpio_cfgpin(unsigned int pin, unsigned int function);
+
+ e.g.:
+
+ s3c_gpio_cfgpin(S3C2410_GPA(0), S3C_GPIO_SFN(1));
+ s3c_gpio_cfgpin(S3C2410_GPE(8), S3C_GPIO_SFN(2));
+
+ which would turn GPA(0) into the lowest Address line A0, and set
+ GPE(8) to be connected to the SDIO/MMC controller's SDDAT1 line.
+
+
+Reading the current configuration
+---------------------------------
+
+ The current configuration of a pin can be read by using standard
+ gpiolib function:
+
+ s3c_gpio_getcfg(unsigned int pin);
+
+ The return value will be from the same set of values which can be
+ passed to s3c_gpio_cfgpin().
+
+
+Configuring a pull-up resistor
+------------------------------
+
+ A large proportion of the GPIO pins on the S3C2410 can have weak
+ pull-up resistors enabled. This can be configured by the following
+ function:
+
+ void s3c_gpio_setpull(unsigned int pin, unsigned int to);
+
+ Where the to value is S3C_GPIO_PULL_NONE to set the pull-up off,
+ and S3C_GPIO_PULL_UP to enable the specified pull-up. Any other
+ values are currently undefined.
+
+
+Getting and setting the state of a PIN
+--------------------------------------
+
+ These calls are now implemented by the relevant gpiolib calls, convert
+ your board or driver to use gpiolib.
+
+
+Getting the IRQ number associated with a PIN
+--------------------------------------------
+
+ A standard gpiolib function can map the given pin number to an IRQ
+ number to pass to the IRQ system.
+
+ int gpio_to_irq(unsigned int pin);
+
+ Note, not all pins have an IRQ.
+
+
+Author
+-------
+
+Ben Dooks, 03 October 2004
+Copyright 2004 Ben Dooks, Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/H1940.txt b/Documentation/arm/Samsung-S3C24XX/H1940.txt
new file mode 100644
index 000000000..b738859b1
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/H1940.txt
@@ -0,0 +1,40 @@
+ HP IPAQ H1940
+ =============
+
+http://www.handhelds.org/projects/h1940.html
+
+Introduction
+------------
+
+ The HP H1940 is a S3C2410 based handheld device, with
+ bluetooth connectivity.
+
+
+Support
+-------
+
+ A variety of information is available
+
+ handhelds.org project page:
+
+ http://www.handhelds.org/projects/h1940.html
+
+ handhelds.org wiki page:
+
+ http://handhelds.org/moin/moin.cgi/HpIpaqH1940
+
+ Herbert Pötzl pages:
+
+ http://vserver.13thfloor.at/H1940/
+
+
+Maintainers
+-----------
+
+ This project is being maintained and developed by a variety
+ of people, including Ben Dooks, Arnaud Patard, and Herbert Pötzl.
+
+ Thanks to the many others who have also provided support.
+
+
+(c) 2005 Ben Dooks
diff --git a/Documentation/arm/Samsung-S3C24XX/NAND.txt b/Documentation/arm/Samsung-S3C24XX/NAND.txt
new file mode 100644
index 000000000..bc478a340
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/NAND.txt
@@ -0,0 +1,30 @@
+ S3C24XX NAND Support
+ ====================
+
+Introduction
+------------
+
+Small Page NAND
+---------------
+
+The driver uses a 512 byte (1 page) ECC code for this setup. The
+ECC code is not directly compatible with the default kernel ECC
+code, so the driver enforces its own OOB layout and ECC parameters
+
+Large Page NAND
+---------------
+
+The driver is capable of handling NAND flash with a 2KiB page
+size, with support for hardware ECC generation and correction.
+
+Unlike the 512byte page mode, the driver generates ECC data for
+each 256 byte block in an 2KiB page. This means that more than
+one error in a page can be rectified. It also means that the
+OOB layout remains the default kernel layout for these flashes.
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2007 Simtec Electronics
+
diff --git a/Documentation/arm/Samsung-S3C24XX/Overview.txt b/Documentation/arm/Samsung-S3C24XX/Overview.txt
new file mode 100644
index 000000000..359587b23
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/Overview.txt
@@ -0,0 +1,318 @@
+ S3C24XX ARM Linux Overview
+ ==========================
+
+
+
+Introduction
+------------
+
+ The Samsung S3C24XX range of ARM9 System-on-Chip CPUs are supported
+ by the 's3c2410' architecture of ARM Linux. Currently the S3C2410,
+ S3C2412, S3C2413, S3C2416, S3C2440, S3C2442, S3C2443 and S3C2450 devices
+ are supported.
+
+ Support for the S3C2400 and S3C24A0 series was never completed and the
+ corresponding code has been removed after a while. If someone wishes to
+ revive this effort, partial support can be retrieved from earlier Linux
+ versions.
+
+ The S3C2416 and S3C2450 devices are very similar and S3C2450 support is
+ included under the arch/arm/mach-s3c2416 directory. Note, whilst core
+ support for these SoCs is in, work on some of the extra peripherals
+ and extra interrupts is still ongoing.
+
+
+Configuration
+-------------
+
+ A generic S3C2410 configuration is provided, and can be used as the
+ default by `make s3c2410_defconfig`. This configuration has support
+ for all the machines, and the commonly used features on them.
+
+ Certain machines may have their own default configurations as well,
+ please check the machine specific documentation.
+
+
+Layout
+------
+
+ The core support files are located in the platform code contained in
+ arch/arm/plat-s3c24xx with headers in include/asm-arm/plat-s3c24xx.
+ This directory should be kept to items shared between the platform
+ code (arch/arm/plat-s3c24xx) and the arch/arm/mach-s3c24* code.
+
+ Each cpu has a directory with the support files for it, and the
+ machines that carry the device. For example S3C2410 is contained
+ in arch/arm/mach-s3c2410 and S3C2440 in arch/arm/mach-s3c2440
+
+ Register, kernel and platform data definitions are held in the
+ arch/arm/mach-s3c2410 directory./include/mach
+
+arch/arm/plat-s3c24xx:
+
+ Files in here are either common to all the s3c24xx family,
+ or are common to only some of them with names to indicate this
+ status. The files that are not common to all are generally named
+ with the initial cpu they support in the series to ensure a short
+ name without any possibility of confusion with newer devices.
+
+ As an example, initially s3c244x would cover s3c2440 and s3c2442, but
+ with the s3c2443 which does not share many of the same drivers in
+ this directory, the name becomes invalid. We stick to s3c2440-<x>
+ to indicate a driver that is s3c2440 and s3c2442 compatible.
+
+ This does mean that to find the status of any given SoC, a number
+ of directories may need to be searched.
+
+
+Machines
+--------
+
+ The currently supported machines are as follows:
+
+ Simtec Electronics EB2410ITX (BAST)
+
+ A general purpose development board, see EB2410ITX.txt for further
+ details
+
+ Simtec Electronics IM2440D20 (Osiris)
+
+ CPU Module from Simtec Electronics, with a S3C2440A CPU, nand flash
+ and a PCMCIA controller.
+
+ Samsung SMDK2410
+
+ Samsung's own development board, geared for PDA work.
+
+ Samsung/Aiji SMDK2412
+
+ The S3C2412 version of the SMDK2440.
+
+ Samsung/Aiji SMDK2413
+
+ The S3C2412 version of the SMDK2440.
+
+ Samsung/Meritech SMDK2440
+
+ The S3C2440 compatible version of the SMDK2440, which has the
+ option of an S3C2440 or S3C2442 CPU module.
+
+ Thorcom VR1000
+
+ Custom embedded board
+
+ HP IPAQ 1940
+
+ Handheld (IPAQ), available in several varieties
+
+ HP iPAQ rx3715
+
+ S3C2440 based IPAQ, with a number of variations depending on
+ features shipped.
+
+ Acer N30
+
+ A S3C2410 based PDA from Acer. There is a Wiki page at
+ http://handhelds.org/moin/moin.cgi/AcerN30Documentation .
+
+ AML M5900
+
+ American Microsystems' M5900
+
+ Nex Vision Nexcoder
+ Nex Vision Otom
+
+ Two machines by Nex Vision
+
+
+Adding New Machines
+-------------------
+
+ The architecture has been designed to support as many machines as can
+ be configured for it in one kernel build, and any future additions
+ should keep this in mind before altering items outside of their own
+ machine files.
+
+ Machine definitions should be kept in linux/arch/arm/mach-s3c2410,
+ and there are a number of examples that can be looked at.
+
+ Read the kernel patch submission policies as well as the
+ Documentation/arm directory before submitting patches. The
+ ARM kernel series is managed by Russell King, and has a patch system
+ located at http://www.arm.linux.org.uk/developer/patches/
+ as well as mailing lists that can be found from the same site.
+
+ As a courtesy, please notify <ben-linux@fluff.org> of any new
+ machines or other modifications.
+
+ Any large scale modifications, or new drivers should be discussed
+ on the ARM kernel mailing list (linux-arm-kernel) before being
+ attempted. See http://www.arm.linux.org.uk/mailinglists/ for the
+ mailing list information.
+
+
+I2C
+---
+
+ The hardware I2C core in the CPU is supported in single master
+ mode, and can be configured via platform data.
+
+
+RTC
+---
+
+ Support for the onboard RTC unit, including alarm function.
+
+ This has recently been upgraded to use the new RTC core,
+ and the module has been renamed to rtc-s3c to fit in with
+ the new rtc naming scheme.
+
+
+Watchdog
+--------
+
+ The onchip watchdog is available via the standard watchdog
+ interface.
+
+
+NAND
+----
+
+ The current kernels now have support for the s3c2410 NAND
+ controller. If there are any problems the latest linux-mtd
+ code can be found from http://www.linux-mtd.infradead.org/
+
+ For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt
+
+
+SD/MMC
+------
+
+ The SD/MMC hardware pre S3C2443 is supported in the current
+ kernel, the driver is drivers/mmc/host/s3cmci.c and supports
+ 1 and 4 bit SD or MMC cards.
+
+ The SDIO behaviour of this driver has not been fully tested. There is no
+ current support for hardware SDIO interrupts.
+
+
+Serial
+------
+
+ The s3c2410 serial driver provides support for the internal
+ serial ports. These devices appear as /dev/ttySAC0 through 3.
+
+ To create device nodes for these, use the following commands
+
+ mknod ttySAC0 c 204 64
+ mknod ttySAC1 c 204 65
+ mknod ttySAC2 c 204 66
+
+
+GPIO
+----
+
+ The core contains support for manipulating the GPIO, see the
+ documentation in GPIO.txt in the same directory as this file.
+
+ Newer kernels carry GPIOLIB, and support is being moved towards
+ this with some of the older support in line to be removed.
+
+ As of v2.6.34, the move towards using gpiolib support is almost
+ complete, and very little of the old calls are left.
+
+ See Documentation/arm/Samsung-S3C24XX/GPIO.txt for the S3C24XX specific
+ support and Documentation/arm/Samsung/GPIO.txt for the core Samsung
+ implementation.
+
+
+Clock Management
+----------------
+
+ The core provides the interface defined in the header file
+ include/asm-arm/hardware/clock.h, to allow control over the
+ various clock units
+
+
+Suspend to RAM
+--------------
+
+ For boards that provide support for suspend to RAM, the
+ system can be placed into low power suspend.
+
+ See Suspend.txt for more information.
+
+
+SPI
+---
+
+ SPI drivers are available for both the in-built hardware
+ (although there is no DMA support yet) and a generic
+ GPIO based solution.
+
+
+LEDs
+----
+
+ There is support for GPIO based LEDs via a platform driver
+ in the LED subsystem.
+
+
+Platform Data
+-------------
+
+ Whenever a device has platform specific data that is specified
+ on a per-machine basis, care should be taken to ensure the
+ following:
+
+ 1) that default data is not left in the device to confuse the
+ driver if a machine does not set it at startup
+
+ 2) the data should (if possible) be marked as __initdata,
+ to ensure that the data is thrown away if the machine is
+ not the one currently in use.
+
+ The best way of doing this is to make a function that
+ kmalloc()s an area of memory, and copies the __initdata
+ and then sets the relevant device's platform data. Making
+ the function `__init` takes care of ensuring it is discarded
+ with the rest of the initialisation code
+
+ static __init void s3c24xx_xxx_set_platdata(struct xxx_data *pd)
+ {
+ struct s3c2410_xxx_mach_info *npd;
+
+ npd = kmalloc(sizeof(struct s3c2410_xxx_mach_info), GFP_KERNEL);
+ if (npd) {
+ memcpy(npd, pd, sizeof(struct s3c2410_xxx_mach_info));
+ s3c_device_xxx.dev.platform_data = npd;
+ } else {
+ printk(KERN_ERR "no memory for xxx platform data\n");
+ }
+ }
+
+ Note, since the code is marked as __init, it should not be
+ exported outside arch/arm/mach-s3c2410/, or exported to
+ modules via EXPORT_SYMBOL() and related functions.
+
+
+Port Contributors
+-----------------
+
+ Ben Dooks (BJD)
+ Vincent Sanders
+ Herbert Potzl
+ Arnaud Patard (RTP)
+ Roc Wu
+ Klaus Fetscher
+ Dimitry Andric
+ Shannon Holland
+ Guillaume Gourat (NexVision)
+ Christer Weinigel (wingel) (Acer N30)
+ Lucas Correia Villa Real (S3C2400 port)
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2004-2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2412.txt b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
new file mode 100644
index 000000000..f057876b9
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/S3C2412.txt
@@ -0,0 +1,120 @@
+ S3C2412 ARM Linux Overview
+ ==========================
+
+Introduction
+------------
+
+ The S3C2412 is part of the S3C24XX range of ARM9 System-on-Chip CPUs
+ from Samsung. This part has an ARM926-EJS core, capable of running up
+ to 266MHz (see data-sheet for more information)
+
+
+Clock
+-----
+
+ The core clock code provides a set of clocks to the drivers, and allows
+ for source selection and a number of other features.
+
+
+Power
+-----
+
+ No support for suspend/resume to RAM in the current system.
+
+
+DMA
+---
+
+ No current support for DMA.
+
+
+GPIO
+----
+
+ There is support for setting the GPIO to input/output/special function
+ and reading or writing to them.
+
+
+UART
+----
+
+ The UART hardware is similar to the S3C2440, and is supported by the
+ s3c2410 driver in the drivers/serial directory.
+
+
+NAND
+----
+
+ The NAND hardware is similar to the S3C2440, and is supported by the
+ s3c2410 driver in the drivers/mtd/nand directory.
+
+
+USB Host
+--------
+
+ The USB hardware is similar to the S3C2410, with extended clock source
+ control. The OHCI portion is supported by the ohci-s3c2410 driver, and
+ the clock control selection is supported by the core clock code.
+
+
+USB Device
+----------
+
+ No current support in the kernel
+
+
+IRQs
+----
+
+ All the standard, and external interrupt sources are supported. The
+ extra sub-sources are not yet supported.
+
+
+RTC
+---
+
+ The RTC hardware is similar to the S3C2410, and is supported by the
+ s3c2410-rtc driver.
+
+
+Watchdog
+--------
+
+ The watchdog hardware is the same as the S3C2410, and is supported by
+ the s3c2410_wdt driver.
+
+
+MMC/SD/SDIO
+-----------
+
+ No current support for the MMC/SD/SDIO block.
+
+IIC
+---
+
+ The IIC hardware is the same as the S3C2410, and is supported by the
+ i2c-s3c24xx driver.
+
+
+IIS
+---
+
+ No current support for the IIS interface.
+
+
+SPI
+---
+
+ No current support for the SPI interfaces.
+
+
+ATA
+---
+
+ No current support for the on-board ATA block.
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/S3C2413.txt b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
new file mode 100644
index 000000000..909bdc7dd
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/S3C2413.txt
@@ -0,0 +1,21 @@
+ S3C2413 ARM Linux Overview
+ ==========================
+
+Introduction
+------------
+
+ The S3C2413 is an extended version of the S3C2412, with an camera
+ interface and mobile DDR memory support. See the S3C2412 support
+ documentation for more information.
+
+
+Camera Interface
+---------------
+
+ This block is currently not supported.
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2006 Simtec Electronics
diff --git a/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt b/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
new file mode 100644
index 000000000..429390bd4
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/SMDK2440.txt
@@ -0,0 +1,56 @@
+ Samsung/Meritech SMDK2440
+ =========================
+
+Introduction
+------------
+
+ The SMDK2440 is a two part evaluation board for the Samsung S3C2440
+ processor. It includes support for LCD, SmartMedia, Audio, SD and
+ 10MBit Ethernet, and expansion headers for various signals, including
+ the camera and unused GPIO.
+
+
+Configuration
+-------------
+
+ To set the default configuration, use `make smdk2440_defconfig` which
+ will configure the common features of this board, or use
+ `make s3c2410_config` to include support for all s3c2410/s3c2440 machines
+
+
+Support
+-------
+
+ Ben Dooks' SMDK2440 site at http://www.fluff.org/ben/smdk2440/ which
+ includes linux based USB download tools.
+
+ Some of the h1940 patches that can be found from the H1940 project
+ site at http://www.handhelds.org/projects/h1940.html can also be
+ applied to this board.
+
+
+Peripherals
+-----------
+
+ There is no current support for any of the extra peripherals on the
+ base-board itself.
+
+
+MTD
+---
+
+ The NAND flash should be supported by the in kernel MTD NAND support,
+ NOR flash will be added later.
+
+
+Maintainers
+-----------
+
+ This board is being maintained by Ben Dooks, for more info, see
+ http://www.fluff.org/ben/smdk2440/
+
+ Many thanks to Dimitry Andric of TomTom for the loan of the SMDK2440,
+ and to Simtec Electronics for allowing me time to work on this.
+
+
+(c) 2004 Ben Dooks
diff --git a/Documentation/arm/Samsung-S3C24XX/Suspend.txt b/Documentation/arm/Samsung-S3C24XX/Suspend.txt
new file mode 100644
index 000000000..1ca63b3e5
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/Suspend.txt
@@ -0,0 +1,137 @@
+ S3C24XX Suspend Support
+ =======================
+
+
+Introduction
+------------
+
+ The S3C24XX supports a low-power suspend mode, where the SDRAM is kept
+ in Self-Refresh mode, and all but the essential peripheral blocks are
+ powered down. For more information on how this works, please look
+ at the relevant CPU datasheet from Samsung.
+
+
+Requirements
+------------
+
+ 1) A bootloader that can support the necessary resume operation
+
+ 2) Support for at least 1 source for resume
+
+ 3) CONFIG_PM enabled in the kernel
+
+ 4) Any peripherals that are going to be powered down at the same
+ time require suspend/resume support.
+
+
+Resuming
+--------
+
+ The S3C2410 user manual defines the process of sending the CPU to
+ sleep and how it resumes. The default behaviour of the Linux code
+ is to set the GSTATUS3 register to the physical address of the
+ code to resume Linux operation.
+
+ GSTATUS4 is currently left alone by the sleep code, and is free to
+ use for any other purposes (for example, the EB2410ITX uses this to
+ save memory configuration in).
+
+
+Machine Support
+---------------
+
+ The machine specific functions must call the s3c_pm_init() function
+ to say that its bootloader is capable of resuming. This can be as
+ simple as adding the following to the machine's definition:
+
+ INITMACHINE(s3c_pm_init)
+
+ A board can do its own setup before calling s3c_pm_init, if it
+ needs to setup anything else for power management support.
+
+ There is currently no support for over-riding the default method of
+ saving the resume address, if your board requires it, then contact
+ the maintainer and discuss what is required.
+
+ Note, the original method of adding an late_initcall() is wrong,
+ and will end up initialising all compiled machines' pm init!
+
+ The following is an example of code used for testing wakeup from
+ an falling edge on IRQ_EINT0:
+
+
+static irqreturn_t button_irq(int irq, void *pw)
+{
+ return IRQ_HANDLED;
+}
+
+statuc void __init machine_init(void)
+{
+ ...
+
+ request_irq(IRQ_EINT0, button_irq, IRQF_TRIGGER_FALLING,
+ "button-irq-eint0", NULL);
+
+ enable_irq_wake(IRQ_EINT0);
+
+ s3c_pm_init();
+}
+
+
+Debugging
+---------
+
+ There are several important things to remember when using PM suspend:
+
+ 1) The uart drivers will disable the clocks to the UART blocks when
+ suspending, which means that use of printascii() or similar direct
+ access to the UARTs will cause the debug to stop.
+
+ 2) Whilst the pm code itself will attempt to re-enable the UART clocks,
+ care should be taken that any external clock sources that the UARTs
+ rely on are still enabled at that point.
+
+ 3) If any debugging is placed in the resume path, then it must have the
+ relevant clocks and peripherals setup before use (ie, bootloader).
+
+ For example, if you transmit a character from the UART, the baud
+ rate and uart controls must be setup beforehand.
+
+
+Configuration
+-------------
+
+ The S3C2410 specific configuration in `System Type` defines various
+ aspects of how the S3C2410 suspend and resume support is configured
+
+ `S3C2410 PM Suspend debug`
+
+ This option prints messages to the serial console before and after
+ the actual suspend, giving detailed information on what is
+ happening
+
+
+ `S3C2410 PM Suspend Memory CRC`
+
+ Allows the entire memory to be checksummed before and after the
+ suspend to see if there has been any corruption of the contents.
+
+ Note, the time to calculate the CRC is dependent on the CPU speed
+ and the size of memory. For an 64Mbyte RAM area on an 200MHz
+ S3C2410, this can take approximately 4 seconds to complete.
+
+ This support requires the CRC32 function to be enabled.
+
+
+ `S3C2410 PM Suspend CRC Chunksize (KiB)`
+
+ Defines the size of memory each CRC chunk covers. A smaller value
+ will mean that the CRC data block will take more memory, but will
+ identify any faults with better precision
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2004 Simtec Electronics
+
diff --git a/Documentation/arm/Samsung-S3C24XX/USB-Host.txt b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
new file mode 100644
index 000000000..f82b1faef
--- /dev/null
+++ b/Documentation/arm/Samsung-S3C24XX/USB-Host.txt
@@ -0,0 +1,93 @@
+ S3C24XX USB Host support
+ ========================
+
+
+
+Introduction
+------------
+
+ This document details the S3C2410/S3C2440 in-built OHCI USB host support.
+
+Configuration
+-------------
+
+ Enable at least the following kernel options:
+
+ menuconfig:
+
+ Device Drivers --->
+ USB support --->
+ <*> Support for Host-side USB
+ <*> OHCI HCD support
+
+
+ .config:
+ CONFIG_USB
+ CONFIG_USB_OHCI_HCD
+
+
+ Once these options are configured, the standard set of USB device
+ drivers can be configured and used.
+
+
+Board Support
+-------------
+
+ The driver attaches to a platform device, which will need to be
+ added by the board specific support file in linux/arch/arm/mach-s3c2410,
+ such as mach-bast.c or mach-smdk2410.c
+
+ The platform device's platform_data field is only needed if the
+ board implements extra power control or over-current monitoring.
+
+ The OHCI driver does not ensure the state of the S3C2410's MISCCTRL
+ register, so if both ports are to be used for the host, then it is
+ the board support file's responsibility to ensure that the second
+ port is configured to be connected to the OHCI core.
+
+
+Platform Data
+-------------
+
+ See arch/arm/mach-s3c2410/include/mach/usb-control.h for the
+ descriptions of the platform device data. An implementation
+ can be found in linux/arch/arm/mach-s3c2410/usb-simtec.c .
+
+ The `struct s3c2410_hcd_info` contains a pair of functions
+ that get called to enable over-current detection, and to
+ control the port power status.
+
+ The ports are numbered 0 and 1.
+
+ power_control:
+
+ Called to enable or disable the power on the port.
+
+ enable_oc:
+
+ Called to enable or disable the over-current monitoring.
+ This should claim or release the resources being used to
+ check the power condition on the port, such as an IRQ.
+
+ report_oc:
+
+ The OHCI driver fills this field in for the over-current code
+ to call when there is a change to the over-current state on
+ an port. The ports argument is a bitmask of 1 bit per port,
+ with bit X being 1 for an over-current on port X.
+
+ The function s3c2410_usb_report_oc() has been provided to
+ ensure this is called correctly.
+
+ port[x]:
+
+ This is struct describes each port, 0 or 1. The platform driver
+ should set the flags field of each port to S3C_HCDFLG_USED if
+ the port is enabled.
+
+
+
+Document Author
+---------------
+
+Ben Dooks, Copyright 2005 Simtec Electronics
diff --git a/Documentation/arm/Samsung/GPIO.txt b/Documentation/arm/Samsung/GPIO.txt
new file mode 100644
index 000000000..795adfd88
--- /dev/null
+++ b/Documentation/arm/Samsung/GPIO.txt
@@ -0,0 +1,40 @@
+ Samsung GPIO implementation
+ ===========================
+
+Introduction
+------------
+
+This outlines the Samsung GPIO implementation and the architecture
+specific calls provided alongside the drivers/gpio core.
+
+
+S3C24XX (Legacy)
+----------------
+
+See Documentation/arm/Samsung-S3C24XX/GPIO.txt for more information
+about these devices. Their implementation has been brought into line
+with the core samsung implementation described in this document.
+
+
+GPIOLIB integration
+-------------------
+
+The gpio implementation uses gpiolib as much as possible, only providing
+specific calls for the items that require Samsung specific handling, such
+as pin special-function or pull resistor control.
+
+GPIO numbering is synchronised between the Samsung and gpiolib system.
+
+
+PIN configuration
+-----------------
+
+Pin configuration is specific to the Samsung architecture, with each SoC
+registering the necessary information for the core gpio configuration
+implementation to configure pins as necessary.
+
+The s3c_gpio_cfgpin() and s3c_gpio_setpull() provide the means for a
+driver or machine to change gpio configuration.
+
+See arch/arm/plat-samsung/include/plat/gpio-cfg.h for more information
+on these functions.
diff --git a/Documentation/arm/Samsung/Overview.txt b/Documentation/arm/Samsung/Overview.txt
new file mode 100644
index 000000000..8f7309bad
--- /dev/null
+++ b/Documentation/arm/Samsung/Overview.txt
@@ -0,0 +1,86 @@
+ Samsung ARM Linux Overview
+ ==========================
+
+Introduction
+------------
+
+ The Samsung range of ARM SoCs spans many similar devices, from the initial
+ ARM9 through to the newest ARM cores. This document shows an overview of
+ the current kernel support, how to use it and where to find the code
+ that supports this.
+
+ The currently supported SoCs are:
+
+ - S3C24XX: See Documentation/arm/Samsung-S3C24XX/Overview.txt for full list
+ - S3C64XX: S3C6400 and S3C6410
+ - S5PC110 / S5PV210
+
+
+S3C24XX Systems
+---------------
+
+ There is still documentation in Documnetation/arm/Samsung-S3C24XX/ which
+ deals with the architecture and drivers specific to these devices.
+
+ See Documentation/arm/Samsung-S3C24XX/Overview.txt for more information
+ on the implementation details and specific support.
+
+
+Configuration
+-------------
+
+ A number of configurations are supplied, as there is no current way of
+ unifying all the SoCs into one kernel.
+
+ s5pc110_defconfig - S5PC110 specific default configuration
+ s5pv210_defconfig - S5PV210 specific default configuration
+
+
+Layout
+------
+
+ The directory layout is currently being restructured, and consists of
+ several platform directories and then the machine specific directories
+ of the CPUs being built for.
+
+ plat-samsung provides the base for all the implementations, and is the
+ last in the line of include directories that are processed for the build
+ specific information. It contains the base clock, GPIO and device definitions
+ to get the system running.
+
+ plat-s3c24xx is for s3c24xx specific builds, see the S3C24XX docs.
+
+ plat-s5p is for s5p specific builds, and contains common support for the
+ S5P specific systems. Not all S5Ps use all the features in this directory
+ due to differences in the hardware.
+
+
+Layout changes
+--------------
+
+ The old plat-s3c and plat-s5pc1xx directories have been removed, with
+ support moved to either plat-samsung or plat-s5p as necessary. These moves
+ where to simplify the include and dependency issues involved with having
+ so many different platform directories.
+
+
+Port Contributors
+-----------------
+
+ Ben Dooks (BJD)
+ Vincent Sanders
+ Herbert Potzl
+ Arnaud Patard (RTP)
+ Roc Wu
+ Klaus Fetscher
+ Dimitry Andric
+ Shannon Holland
+ Guillaume Gourat (NexVision)
+ Christer Weinigel (wingel) (Acer N30)
+ Lucas Correia Villa Real (S3C2400 port)
+
+
+Document Author
+---------------
+
+Copyright 2009-2010 Ben Dooks <ben-linux@fluff.org>
diff --git a/Documentation/arm/Samsung/clksrc-change-registers.awk b/Documentation/arm/Samsung/clksrc-change-registers.awk
new file mode 100755
index 000000000..d9174fabe
--- /dev/null
+++ b/Documentation/arm/Samsung/clksrc-change-registers.awk
@@ -0,0 +1,166 @@
+#!/usr/bin/awk -f
+#
+# Copyright 2010 Ben Dooks <ben-linux@fluff.org>
+#
+# Released under GPLv2
+
+# example usage
+# ./clksrc-change-registers.awk arch/arm/plat-s5pc1xx/include/plat/regs-clock.h < src > dst
+
+function extract_value(s)
+{
+ eqat = index(s, "=")
+ comat = index(s, ",")
+ return substr(s, eqat+2, (comat-eqat)-2)
+}
+
+function remove_brackets(b)
+{
+ return substr(b, 2, length(b)-2)
+}
+
+function splitdefine(l, p)
+{
+ r = split(l, tp)
+
+ p[0] = tp[2]
+ p[1] = remove_brackets(tp[3])
+}
+
+function find_length(f)
+{
+ if (0)
+ printf "find_length " f "\n" > "/dev/stderr"
+
+ if (f ~ /0x1/)
+ return 1
+ else if (f ~ /0x3/)
+ return 2
+ else if (f ~ /0x7/)
+ return 3
+ else if (f ~ /0xf/)
+ return 4
+
+ printf "unknown legnth " f "\n" > "/dev/stderr"
+ exit
+}
+
+function find_shift(s)
+{
+ id = index(s, "<")
+ if (id <= 0) {
+ printf "cannot find shift " s "\n" > "/dev/stderr"
+ exit
+ }
+
+ return substr(s, id+2)
+}
+
+
+BEGIN {
+ if (ARGC < 2) {
+ print "too few arguments" > "/dev/stderr"
+ exit
+ }
+
+# read the header file and find the mask values that we will need
+# to replace and create an associative array of values
+
+ while (getline line < ARGV[1] > 0) {
+ if (line ~ /\#define.*_MASK/ &&
+ !(line ~ /USB_SIG_MASK/)) {
+ splitdefine(line, fields)
+ name = fields[0]
+ if (0)
+ printf "MASK " line "\n" > "/dev/stderr"
+ dmask[name,0] = find_length(fields[1])
+ dmask[name,1] = find_shift(fields[1])
+ if (0)
+ printf "=> '" name "' LENGTH=" dmask[name,0] " SHIFT=" dmask[name,1] "\n" > "/dev/stderr"
+ } else {
+ }
+ }
+
+ delete ARGV[1]
+}
+
+/clksrc_clk.*=.*{/ {
+ shift=""
+ mask=""
+ divshift=""
+ reg_div=""
+ reg_src=""
+ indent=1
+
+ print $0
+
+ for(; indent >= 1;) {
+ if ((getline line) <= 0) {
+ printf "unexpected end of file" > "/dev/stderr"
+ exit 1;
+ }
+
+ if (line ~ /\.shift/) {
+ shift = extract_value(line)
+ } else if (line ~ /\.mask/) {
+ mask = extract_value(line)
+ } else if (line ~ /\.reg_divider/) {
+ reg_div = extract_value(line)
+ } else if (line ~ /\.reg_source/) {
+ reg_src = extract_value(line)
+ } else if (line ~ /\.divider_shift/) {
+ divshift = extract_value(line)
+ } else if (line ~ /{/) {
+ indent++
+ print line
+ } else if (line ~ /}/) {
+ indent--
+
+ if (indent == 0) {
+ if (0) {
+ printf "shift '" shift "' ='" dmask[shift,0] "'\n" > "/dev/stderr"
+ printf "mask '" mask "'\n" > "/dev/stderr"
+ printf "dshft '" divshift "'\n" > "/dev/stderr"
+ printf "rdiv '" reg_div "'\n" > "/dev/stderr"
+ printf "rsrc '" reg_src "'\n" > "/dev/stderr"
+ }
+
+ generated = mask
+ sub(reg_src, reg_div, generated)
+
+ if (0) {
+ printf "/* rsrc " reg_src " */\n"
+ printf "/* rdiv " reg_div " */\n"
+ printf "/* shift " shift " */\n"
+ printf "/* mask " mask " */\n"
+ printf "/* generated " generated " */\n"
+ }
+
+ if (reg_div != "") {
+ printf "\t.reg_div = { "
+ printf ".reg = " reg_div ", "
+ printf ".shift = " dmask[generated,1] ", "
+ printf ".size = " dmask[generated,0] ", "
+ printf "},\n"
+ }
+
+ printf "\t.reg_src = { "
+ printf ".reg = " reg_src ", "
+ printf ".shift = " dmask[mask,1] ", "
+ printf ".size = " dmask[mask,0] ", "
+
+ printf "},\n"
+
+ }
+
+ print line
+ } else {
+ print line
+ }
+
+ if (0)
+ printf indent ":" line "\n" > "/dev/stderr"
+ }
+}
+
+// && ! /clksrc_clk.*=.*{/ { print $0 }
diff --git a/Documentation/arm/Setup b/Documentation/arm/Setup
new file mode 100644
index 000000000..0cb1e64bd
--- /dev/null
+++ b/Documentation/arm/Setup
@@ -0,0 +1,129 @@
+Kernel initialisation parameters on ARM Linux
+---------------------------------------------
+
+The following document describes the kernel initialisation parameter
+structure, otherwise known as 'struct param_struct' which is used
+for most ARM Linux architectures.
+
+This structure is used to pass initialisation parameters from the
+kernel loader to the Linux kernel proper, and may be short lived
+through the kernel initialisation process. As a general rule, it
+should not be referenced outside of arch/arm/kernel/setup.c:setup_arch().
+
+There are a lot of parameters listed in there, and they are described
+below:
+
+ page_size
+
+ This parameter must be set to the page size of the machine, and
+ will be checked by the kernel.
+
+ nr_pages
+
+ This is the total number of pages of memory in the system. If
+ the memory is banked, then this should contain the total number
+ of pages in the system.
+
+ If the system contains separate VRAM, this value should not
+ include this information.
+
+ ramdisk_size
+
+ This is now obsolete, and should not be used.
+
+ flags
+
+ Various kernel flags, including:
+ bit 0 - 1 = mount root read only
+ bit 1 - unused
+ bit 2 - 0 = load ramdisk
+ bit 3 - 0 = prompt for ramdisk
+
+ rootdev
+
+ major/minor number pair of device to mount as the root filesystem.
+
+ video_num_cols
+ video_num_rows
+
+ These two together describe the character size of the dummy console,
+ or VGA console character size. They should not be used for any other
+ purpose.
+
+ It's generally a good idea to set these to be either standard VGA, or
+ the equivalent character size of your fbcon display. This then allows
+ all the bootup messages to be displayed correctly.
+
+ video_x
+ video_y
+
+ This describes the character position of cursor on VGA console, and
+ is otherwise unused. (should not be used for other console types, and
+ should not be used for other purposes).
+
+ memc_control_reg
+
+ MEMC chip control register for Acorn Archimedes and Acorn A5000
+ based machines. May be used differently by different architectures.
+
+ sounddefault
+
+ Default sound setting on Acorn machines. May be used differently by
+ different architectures.
+
+ adfsdrives
+
+ Number of ADFS/MFM disks. May be used differently by different
+ architectures.
+
+ bytes_per_char_h
+ bytes_per_char_v
+
+ These are now obsolete, and should not be used.
+
+ pages_in_bank[4]
+
+ Number of pages in each bank of the systems memory (used for RiscPC).
+ This is intended to be used on systems where the physical memory
+ is non-contiguous from the processors point of view.
+
+ pages_in_vram
+
+ Number of pages in VRAM (used on Acorn RiscPC). This value may also
+ be used by loaders if the size of the video RAM can't be obtained
+ from the hardware.
+
+ initrd_start
+ initrd_size
+
+ This describes the kernel virtual start address and size of the
+ initial ramdisk.
+
+ rd_start
+
+ Start address in sectors of the ramdisk image on a floppy disk.
+
+ system_rev
+
+ system revision number.
+
+ system_serial_low
+ system_serial_high
+
+ system 64-bit serial number
+
+ mem_fclk_21285
+
+ The speed of the external oscillator to the 21285 (footbridge),
+ which control's the speed of the memory bus, timer & serial port.
+ Depending upon the speed of the cpu its value can be between
+ 0-66 MHz. If no params are passed or a value of zero is passed,
+ then a value of 50 Mhz is the default on 21285 architectures.
+
+ paths[8][128]
+
+ These are now obsolete, and should not be used.
+
+ commandline
+
+ Kernel command line parameters. Details can be found elsewhere.
diff --git a/Documentation/arm/VFP/release-notes.txt b/Documentation/arm/VFP/release-notes.txt
new file mode 100644
index 000000000..28a279570
--- /dev/null
+++ b/Documentation/arm/VFP/release-notes.txt
@@ -0,0 +1,55 @@
+Release notes for Linux Kernel VFP support code
+-----------------------------------------------
+
+Date: 20 May 2004
+Author: Russell King
+
+This is the first release of the Linux Kernel VFP support code. It
+provides support for the exceptions bounced from VFP hardware found
+on ARM926EJ-S.
+
+This release has been validated against the SoftFloat-2b library by
+John R. Hauser using the TestFloat-2a test suite. Details of this
+library and test suite can be found at:
+
+ http://www.jhauser.us/arithmetic/SoftFloat.html
+
+The operations which have been tested with this package are:
+
+ - fdiv
+ - fsub
+ - fadd
+ - fmul
+ - fcmp
+ - fcmpe
+ - fcvtd
+ - fcvts
+ - fsito
+ - ftosi
+ - fsqrt
+
+All the above pass softfloat tests with the following exceptions:
+
+- fadd/fsub shows some differences in the handling of +0 / -0 results
+ when input operands differ in signs.
+- the handling of underflow exceptions is slightly different. If a
+ result underflows before rounding, but becomes a normalised number
+ after rounding, we do not signal an underflow exception.
+
+Other operations which have been tested by basic assembly-only tests
+are:
+
+ - fcpy
+ - fabs
+ - fneg
+ - ftoui
+ - ftosiz
+ - ftouiz
+
+The combination operations have not been tested:
+
+ - fmac
+ - fnmac
+ - fmsc
+ - fnmsc
+ - fnmul
diff --git a/Documentation/arm/cluster-pm-race-avoidance.txt b/Documentation/arm/cluster-pm-race-avoidance.txt
new file mode 100644
index 000000000..750b6fc24
--- /dev/null
+++ b/Documentation/arm/cluster-pm-race-avoidance.txt
@@ -0,0 +1,498 @@
+Cluster-wide Power-up/power-down race avoidance algorithm
+=========================================================
+
+This file documents the algorithm which is used to coordinate CPU and
+cluster setup and teardown operations and to manage hardware coherency
+controls safely.
+
+The section "Rationale" explains what the algorithm is for and why it is
+needed. "Basic model" explains general concepts using a simplified view
+of the system. The other sections explain the actual details of the
+algorithm in use.
+
+
+Rationale
+---------
+
+In a system containing multiple CPUs, it is desirable to have the
+ability to turn off individual CPUs when the system is idle, reducing
+power consumption and thermal dissipation.
+
+In a system containing multiple clusters of CPUs, it is also desirable
+to have the ability to turn off entire clusters.
+
+Turning entire clusters off and on is a risky business, because it
+involves performing potentially destructive operations affecting a group
+of independently running CPUs, while the OS continues to run. This
+means that we need some coordination in order to ensure that critical
+cluster-level operations are only performed when it is truly safe to do
+so.
+
+Simple locking may not be sufficient to solve this problem, because
+mechanisms like Linux spinlocks may rely on coherency mechanisms which
+are not immediately enabled when a cluster powers up. Since enabling or
+disabling those mechanisms may itself be a non-atomic operation (such as
+writing some hardware registers and invalidating large caches), other
+methods of coordination are required in order to guarantee safe
+power-down and power-up at the cluster level.
+
+The mechanism presented in this document describes a coherent memory
+based protocol for performing the needed coordination. It aims to be as
+lightweight as possible, while providing the required safety properties.
+
+
+Basic model
+-----------
+
+Each cluster and CPU is assigned a state, as follows:
+
+ DOWN
+ COMING_UP
+ UP
+ GOING_DOWN
+
+ +---------> UP ----------+
+ | v
+
+ COMING_UP GOING_DOWN
+
+ ^ |
+ +--------- DOWN <--------+
+
+
+DOWN: The CPU or cluster is not coherent, and is either powered off or
+ suspended, or is ready to be powered off or suspended.
+
+COMING_UP: The CPU or cluster has committed to moving to the UP state.
+ It may be part way through the process of initialisation and
+ enabling coherency.
+
+UP: The CPU or cluster is active and coherent at the hardware
+ level. A CPU in this state is not necessarily being used
+ actively by the kernel.
+
+GOING_DOWN: The CPU or cluster has committed to moving to the DOWN
+ state. It may be part way through the process of teardown and
+ coherency exit.
+
+
+Each CPU has one of these states assigned to it at any point in time.
+The CPU states are described in the "CPU state" section, below.
+
+Each cluster is also assigned a state, but it is necessary to split the
+state value into two parts (the "cluster" state and "inbound" state) and
+to introduce additional states in order to avoid races between different
+CPUs in the cluster simultaneously modifying the state. The cluster-
+level states are described in the "Cluster state" section.
+
+To help distinguish the CPU states from cluster states in this
+discussion, the state names are given a CPU_ prefix for the CPU states,
+and a CLUSTER_ or INBOUND_ prefix for the cluster states.
+
+
+CPU state
+---------
+
+In this algorithm, each individual core in a multi-core processor is
+referred to as a "CPU". CPUs are assumed to be single-threaded:
+therefore, a CPU can only be doing one thing at a single point in time.
+
+This means that CPUs fit the basic model closely.
+
+The algorithm defines the following states for each CPU in the system:
+
+ CPU_DOWN
+ CPU_COMING_UP
+ CPU_UP
+ CPU_GOING_DOWN
+
+ cluster setup and
+ CPU setup complete policy decision
+ +-----------> CPU_UP ------------+
+ | v
+
+ CPU_COMING_UP CPU_GOING_DOWN
+
+ ^ |
+ +----------- CPU_DOWN <----------+
+ policy decision CPU teardown complete
+ or hardware event
+
+
+The definitions of the four states correspond closely to the states of
+the basic model.
+
+Transitions between states occur as follows.
+
+A trigger event (spontaneous) means that the CPU can transition to the
+next state as a result of making local progress only, with no
+requirement for any external event to happen.
+
+
+CPU_DOWN:
+
+ A CPU reaches the CPU_DOWN state when it is ready for
+ power-down. On reaching this state, the CPU will typically
+ power itself down or suspend itself, via a WFI instruction or a
+ firmware call.
+
+ Next state: CPU_COMING_UP
+ Conditions: none
+
+ Trigger events:
+
+ a) an explicit hardware power-up operation, resulting
+ from a policy decision on another CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CPU_COMING_UP:
+
+ A CPU cannot start participating in hardware coherency until the
+ cluster is set up and coherent. If the cluster is not ready,
+ then the CPU will wait in the CPU_COMING_UP state until the
+ cluster has been set up.
+
+ Next state: CPU_UP
+ Conditions: The CPU's parent cluster must be in CLUSTER_UP.
+ Trigger events: Transition of the parent cluster to CLUSTER_UP.
+
+ Refer to the "Cluster state" section for a description of the
+ CLUSTER_UP state.
+
+
+CPU_UP:
+ When a CPU reaches the CPU_UP state, it is safe for the CPU to
+ start participating in local coherency.
+
+ This is done by jumping to the kernel's CPU resume code.
+
+ Note that the definition of this state is slightly different
+ from the basic model definition: CPU_UP does not mean that the
+ CPU is coherent yet, but it does mean that it is safe to resume
+ the kernel. The kernel handles the rest of the resume
+ procedure, so the remaining steps are not visible as part of the
+ race avoidance algorithm.
+
+ The CPU remains in this state until an explicit policy decision
+ is made to shut down or suspend the CPU.
+
+ Next state: CPU_GOING_DOWN
+ Conditions: none
+ Trigger events: explicit policy decision
+
+
+CPU_GOING_DOWN:
+
+ While in this state, the CPU exits coherency, including any
+ operations required to achieve this (such as cleaning data
+ caches).
+
+ Next state: CPU_DOWN
+ Conditions: local CPU teardown complete
+ Trigger events: (spontaneous)
+
+
+Cluster state
+-------------
+
+A cluster is a group of connected CPUs with some common resources.
+Because a cluster contains multiple CPUs, it can be doing multiple
+things at the same time. This has some implications. In particular, a
+CPU can start up while another CPU is tearing the cluster down.
+
+In this discussion, the "outbound side" is the view of the cluster state
+as seen by a CPU tearing the cluster down. The "inbound side" is the
+view of the cluster state as seen by a CPU setting the CPU up.
+
+In order to enable safe coordination in such situations, it is important
+that a CPU which is setting up the cluster can advertise its state
+independently of the CPU which is tearing down the cluster. For this
+reason, the cluster state is split into two parts:
+
+ "cluster" state: The global state of the cluster; or the state
+ on the outbound side:
+
+ CLUSTER_DOWN
+ CLUSTER_UP
+ CLUSTER_GOING_DOWN
+
+ "inbound" state: The state of the cluster on the inbound side.
+
+ INBOUND_NOT_COMING_UP
+ INBOUND_COMING_UP
+
+
+ The different pairings of these states results in six possible
+ states for the cluster as a whole:
+
+ CLUSTER_UP
+ +==========> INBOUND_NOT_COMING_UP -------------+
+ # |
+ |
+ CLUSTER_UP <----+ |
+ INBOUND_COMING_UP | v
+
+ ^ CLUSTER_GOING_DOWN CLUSTER_GOING_DOWN
+ # INBOUND_COMING_UP <=== INBOUND_NOT_COMING_UP
+
+ CLUSTER_DOWN | |
+ INBOUND_COMING_UP <----+ |
+ |
+ ^ |
+ +=========== CLUSTER_DOWN <------------+
+ INBOUND_NOT_COMING_UP
+
+ Transitions -----> can only be made by the outbound CPU, and
+ only involve changes to the "cluster" state.
+
+ Transitions ===##> can only be made by the inbound CPU, and only
+ involve changes to the "inbound" state, except where there is no
+ further transition possible on the outbound side (i.e., the
+ outbound CPU has put the cluster into the CLUSTER_DOWN state).
+
+ The race avoidance algorithm does not provide a way to determine
+ which exact CPUs within the cluster play these roles. This must
+ be decided in advance by some other means. Refer to the section
+ "Last man and first man selection" for more explanation.
+
+
+ CLUSTER_DOWN/INBOUND_NOT_COMING_UP is the only state where the
+ cluster can actually be powered down.
+
+ The parallelism of the inbound and outbound CPUs is observed by
+ the existence of two different paths from CLUSTER_GOING_DOWN/
+ INBOUND_NOT_COMING_UP (corresponding to GOING_DOWN in the basic
+ model) to CLUSTER_DOWN/INBOUND_COMING_UP (corresponding to
+ COMING_UP in the basic model). The second path avoids cluster
+ teardown completely.
+
+ CLUSTER_UP/INBOUND_COMING_UP is equivalent to UP in the basic
+ model. The final transition to CLUSTER_UP/INBOUND_NOT_COMING_UP
+ is trivial and merely resets the state machine ready for the
+ next cycle.
+
+ Details of the allowable transitions follow.
+
+ The next state in each case is notated
+
+ <cluster state>/<inbound state> (<transitioner>)
+
+ where the <transitioner> is the side on which the transition
+ can occur; either the inbound or the outbound side.
+
+
+CLUSTER_DOWN/INBOUND_NOT_COMING_UP:
+
+ Next state: CLUSTER_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions: none
+ Trigger events:
+
+ a) an explicit hardware power-up operation, resulting
+ from a policy decision on another CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CLUSTER_DOWN/INBOUND_COMING_UP:
+
+ In this state, an inbound CPU sets up the cluster, including
+ enabling of hardware coherency at the cluster level and any
+ other operations (such as cache invalidation) which are required
+ in order to achieve this.
+
+ The purpose of this state is to do sufficient cluster-level
+ setup to enable other CPUs in the cluster to enter coherency
+ safely.
+
+ Next state: CLUSTER_UP/INBOUND_COMING_UP (inbound)
+ Conditions: cluster-level setup and hardware coherency complete
+ Trigger events: (spontaneous)
+
+
+CLUSTER_UP/INBOUND_COMING_UP:
+
+ Cluster-level setup is complete and hardware coherency is
+ enabled for the cluster. Other CPUs in the cluster can safely
+ enter coherency.
+
+ This is a transient state, leading immediately to
+ CLUSTER_UP/INBOUND_NOT_COMING_UP. All other CPUs on the cluster
+ should consider treat these two states as equivalent.
+
+ Next state: CLUSTER_UP/INBOUND_NOT_COMING_UP (inbound)
+ Conditions: none
+ Trigger events: (spontaneous)
+
+
+CLUSTER_UP/INBOUND_NOT_COMING_UP:
+
+ Cluster-level setup is complete and hardware coherency is
+ enabled for the cluster. Other CPUs in the cluster can safely
+ enter coherency.
+
+ The cluster will remain in this state until a policy decision is
+ made to power the cluster down.
+
+ Next state: CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions: none
+ Trigger events: policy decision to power down the cluster
+
+
+CLUSTER_GOING_DOWN/INBOUND_NOT_COMING_UP:
+
+ An outbound CPU is tearing the cluster down. The selected CPU
+ must wait in this state until all CPUs in the cluster are in the
+ CPU_DOWN state.
+
+ When all CPUs are in the CPU_DOWN state, the cluster can be torn
+ down, for example by cleaning data caches and exiting
+ cluster-level coherency.
+
+ To avoid wasteful unnecessary teardown operations, the outbound
+ should check the inbound cluster state for asynchronous
+ transitions to INBOUND_COMING_UP. Alternatively, individual
+ CPUs can be checked for entry into CPU_COMING_UP or CPU_UP.
+
+
+ Next states:
+
+ CLUSTER_DOWN/INBOUND_NOT_COMING_UP (outbound)
+ Conditions: cluster torn down and ready to power off
+ Trigger events: (spontaneous)
+
+ CLUSTER_GOING_DOWN/INBOUND_COMING_UP (inbound)
+ Conditions: none
+ Trigger events:
+
+ a) an explicit hardware power-up operation,
+ resulting from a policy decision on another
+ CPU;
+
+ b) a hardware event, such as an interrupt.
+
+
+CLUSTER_GOING_DOWN/INBOUND_COMING_UP:
+
+ The cluster is (or was) being torn down, but another CPU has
+ come online in the meantime and is trying to set up the cluster
+ again.
+
+ If the outbound CPU observes this state, it has two choices:
+
+ a) back out of teardown, restoring the cluster to the
+ CLUSTER_UP state;
+
+ b) finish tearing the cluster down and put the cluster
+ in the CLUSTER_DOWN state; the inbound CPU will
+ set up the cluster again from there.
+
+ Choice (a) permits the removal of some latency by avoiding
+ unnecessary teardown and setup operations in situations where
+ the cluster is not really going to be powered down.
+
+
+ Next states:
+
+ CLUSTER_UP/INBOUND_COMING_UP (outbound)
+ Conditions: cluster-level setup and hardware
+ coherency complete
+ Trigger events: (spontaneous)
+
+ CLUSTER_DOWN/INBOUND_COMING_UP (outbound)
+ Conditions: cluster torn down and ready to power off
+ Trigger events: (spontaneous)
+
+
+Last man and First man selection
+--------------------------------
+
+The CPU which performs cluster tear-down operations on the outbound side
+is commonly referred to as the "last man".
+
+The CPU which performs cluster setup on the inbound side is commonly
+referred to as the "first man".
+
+The race avoidance algorithm documented above does not provide a
+mechanism to choose which CPUs should play these roles.
+
+
+Last man:
+
+When shutting down the cluster, all the CPUs involved are initially
+executing Linux and hence coherent. Therefore, ordinary spinlocks can
+be used to select a last man safely, before the CPUs become
+non-coherent.
+
+
+First man:
+
+Because CPUs may power up asynchronously in response to external wake-up
+events, a dynamic mechanism is needed to make sure that only one CPU
+attempts to play the first man role and do the cluster-level
+initialisation: any other CPUs must wait for this to complete before
+proceeding.
+
+Cluster-level initialisation may involve actions such as configuring
+coherency controls in the bus fabric.
+
+The current implementation in mcpm_head.S uses a separate mutual exclusion
+mechanism to do this arbitration. This mechanism is documented in
+detail in vlocks.txt.
+
+
+Features and Limitations
+------------------------
+
+Implementation:
+
+ The current ARM-based implementation is split between
+ arch/arm/common/mcpm_head.S (low-level inbound CPU operations) and
+ arch/arm/common/mcpm_entry.c (everything else):
+
+ __mcpm_cpu_going_down() signals the transition of a CPU to the
+ CPU_GOING_DOWN state.
+
+ __mcpm_cpu_down() signals the transition of a CPU to the CPU_DOWN
+ state.
+
+ A CPU transitions to CPU_COMING_UP and then to CPU_UP via the
+ low-level power-up code in mcpm_head.S. This could
+ involve CPU-specific setup code, but in the current
+ implementation it does not.
+
+ __mcpm_outbound_enter_critical() and __mcpm_outbound_leave_critical()
+ handle transitions from CLUSTER_UP to CLUSTER_GOING_DOWN
+ and from there to CLUSTER_DOWN or back to CLUSTER_UP (in
+ the case of an aborted cluster power-down).
+
+ These functions are more complex than the __mcpm_cpu_*()
+ functions due to the extra inter-CPU coordination which
+ is needed for safe transitions at the cluster level.
+
+ A cluster transitions from CLUSTER_DOWN back to CLUSTER_UP via
+ the low-level power-up code in mcpm_head.S. This
+ typically involves platform-specific setup code,
+ provided by the platform-specific power_up_setup
+ function registered via mcpm_sync_init.
+
+Deep topologies:
+
+ As currently described and implemented, the algorithm does not
+ support CPU topologies involving more than two levels (i.e.,
+ clusters of clusters are not supported). The algorithm could be
+ extended by replicating the cluster-level states for the
+ additional topological levels, and modifying the transition
+ rules for the intermediate (non-outermost) cluster levels.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, in
+collaboration with Nicolas Pitre and Achin Gupta.
+
+Copyright (C) 2012-2013 Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
diff --git a/Documentation/arm/firmware.txt b/Documentation/arm/firmware.txt
new file mode 100644
index 000000000..da6713ada
--- /dev/null
+++ b/Documentation/arm/firmware.txt
@@ -0,0 +1,70 @@
+Interface for registering and calling firmware-specific operations for ARM.
+----
+Written by Tomasz Figa <t.figa@samsung.com>
+
+Some boards are running with secure firmware running in TrustZone secure
+world, which changes the way some things have to be initialized. This makes
+a need to provide an interface for such platforms to specify available firmware
+operations and call them when needed.
+
+Firmware operations can be specified by filling in a struct firmware_ops
+with appropriate callbacks and then registering it with register_firmware_ops()
+function.
+
+ void register_firmware_ops(const struct firmware_ops *ops)
+
+The ops pointer must be non-NULL. More information about struct firmware_ops
+and its members can be found in arch/arm/include/asm/firmware.h header.
+
+There is a default, empty set of operations provided, so there is no need to
+set anything if platform does not require firmware operations.
+
+To call a firmware operation, a helper macro is provided
+
+ #define call_firmware_op(op, ...) \
+ ((firmware_ops->op) ? firmware_ops->op(__VA_ARGS__) : (-ENOSYS))
+
+the macro checks if the operation is provided and calls it or otherwise returns
+-ENOSYS to signal that given operation is not available (for example, to allow
+fallback to legacy operation).
+
+Example of registering firmware operations:
+
+ /* board file */
+
+ static int platformX_do_idle(void)
+ {
+ /* tell platformX firmware to enter idle */
+ return 0;
+ }
+
+ static int platformX_cpu_boot(int i)
+ {
+ /* tell platformX firmware to boot CPU i */
+ return 0;
+ }
+
+ static const struct firmware_ops platformX_firmware_ops = {
+ .do_idle = exynos_do_idle,
+ .cpu_boot = exynos_cpu_boot,
+ /* other operations not available on platformX */
+ };
+
+ /* init_early callback of machine descriptor */
+ static void __init board_init_early(void)
+ {
+ register_firmware_ops(&platformX_firmware_ops);
+ }
+
+Example of using a firmware operation:
+
+ /* some platform code, e.g. SMP initialization */
+
+ __raw_writel(virt_to_phys(exynos4_secondary_startup),
+ CPU1_BOOT_REG);
+
+ /* Call Exynos specific smc call */
+ if (call_firmware_op(cpu_boot, cpu) == -ENOSYS)
+ cpu_boot_legacy(...); /* Try legacy way */
+
+ gic_raise_softirq(cpumask_of(cpu), 1);
diff --git a/Documentation/arm/kernel_mode_neon.txt b/Documentation/arm/kernel_mode_neon.txt
new file mode 100644
index 000000000..525452726
--- /dev/null
+++ b/Documentation/arm/kernel_mode_neon.txt
@@ -0,0 +1,121 @@
+Kernel mode NEON
+================
+
+TL;DR summary
+-------------
+* Use only NEON instructions, or VFP instructions that don't rely on support
+ code
+* Isolate your NEON code in a separate compilation unit, and compile it with
+ '-mfpu=neon -mfloat-abi=softfp'
+* Put kernel_neon_begin() and kernel_neon_end() calls around the calls into your
+ NEON code
+* Don't sleep in your NEON code, and be aware that it will be executed with
+ preemption disabled
+
+
+Introduction
+------------
+It is possible to use NEON instructions (and in some cases, VFP instructions) in
+code that runs in kernel mode. However, for performance reasons, the NEON/VFP
+register file is not preserved and restored at every context switch or taken
+exception like the normal register file is, so some manual intervention is
+required. Furthermore, special care is required for code that may sleep [i.e.,
+may call schedule()], as NEON or VFP instructions will be executed in a
+non-preemptible section for reasons outlined below.
+
+
+Lazy preserve and restore
+-------------------------
+The NEON/VFP register file is managed using lazy preserve (on UP systems) and
+lazy restore (on both SMP and UP systems). This means that the register file is
+kept 'live', and is only preserved and restored when multiple tasks are
+contending for the NEON/VFP unit (or, in the SMP case, when a task migrates to
+another core). Lazy restore is implemented by disabling the NEON/VFP unit after
+every context switch, resulting in a trap when subsequently a NEON/VFP
+instruction is issued, allowing the kernel to step in and perform the restore if
+necessary.
+
+Any use of the NEON/VFP unit in kernel mode should not interfere with this, so
+it is required to do an 'eager' preserve of the NEON/VFP register file, and
+enable the NEON/VFP unit explicitly so no exceptions are generated on first
+subsequent use. This is handled by the function kernel_neon_begin(), which
+should be called before any kernel mode NEON or VFP instructions are issued.
+Likewise, the NEON/VFP unit should be disabled again after use to make sure user
+mode will hit the lazy restore trap upon next use. This is handled by the
+function kernel_neon_end().
+
+
+Interruptions in kernel mode
+----------------------------
+For reasons of performance and simplicity, it was decided that there shall be no
+preserve/restore mechanism for the kernel mode NEON/VFP register contents. This
+implies that interruptions of a kernel mode NEON section can only be allowed if
+they are guaranteed not to touch the NEON/VFP registers. For this reason, the
+following rules and restrictions apply in the kernel:
+* NEON/VFP code is not allowed in interrupt context;
+* NEON/VFP code is not allowed to sleep;
+* NEON/VFP code is executed with preemption disabled.
+
+If latency is a concern, it is possible to put back to back calls to
+kernel_neon_end() and kernel_neon_begin() in places in your code where none of
+the NEON registers are live. (Additional calls to kernel_neon_begin() should be
+reasonably cheap if no context switch occurred in the meantime)
+
+
+VFP and support code
+--------------------
+Earlier versions of VFP (prior to version 3) rely on software support for things
+like IEEE-754 compliant underflow handling etc. When the VFP unit needs such
+software assistance, it signals the kernel by raising an undefined instruction
+exception. The kernel responds by inspecting the VFP control registers and the
+current instruction and arguments, and emulates the instruction in software.
+
+Such software assistance is currently not implemented for VFP instructions
+executed in kernel mode. If such a condition is encountered, the kernel will
+fail and generate an OOPS.
+
+
+Separating NEON code from ordinary code
+---------------------------------------
+The compiler is not aware of the special significance of kernel_neon_begin() and
+kernel_neon_end(), i.e., that it is only allowed to issue NEON/VFP instructions
+between calls to these respective functions. Furthermore, GCC may generate NEON
+instructions of its own at -O3 level if -mfpu=neon is selected, and even if the
+kernel is currently compiled at -O2, future changes may result in NEON/VFP
+instructions appearing in unexpected places if no special care is taken.
+
+Therefore, the recommended and only supported way of using NEON/VFP in the
+kernel is by adhering to the following rules:
+* isolate the NEON code in a separate compilation unit and compile it with
+ '-mfpu=neon -mfloat-abi=softfp';
+* issue the calls to kernel_neon_begin(), kernel_neon_end() as well as the calls
+ into the unit containing the NEON code from a compilation unit which is *not*
+ built with the GCC flag '-mfpu=neon' set.
+
+As the kernel is compiled with '-msoft-float', the above will guarantee that
+both NEON and VFP instructions will only ever appear in designated compilation
+units at any optimization level.
+
+
+NEON assembler
+--------------
+NEON assembler is supported with no additional caveats as long as the rules
+above are followed.
+
+
+NEON code generated by GCC
+--------------------------
+The GCC option -ftree-vectorize (implied by -O3) tries to exploit implicit
+parallelism, and generates NEON code from ordinary C source code. This is fully
+supported as long as the rules above are followed.
+
+
+NEON intrinsics
+---------------
+NEON intrinsics are also supported. However, as code using NEON intrinsics
+relies on the GCC header <arm_neon.h>, (which #includes <stdint.h>), you should
+observe the following in addition to the rules above:
+* Compile the unit containing the NEON intrinsics with '-ffreestanding' so GCC
+ uses its builtin version of <stdint.h> (this is a C99 header which the kernel
+ does not supply);
+* Include <arm_neon.h> last, or at least after <linux/types.h>
diff --git a/Documentation/arm/kernel_user_helpers.txt b/Documentation/arm/kernel_user_helpers.txt
new file mode 100644
index 000000000..567359471
--- /dev/null
+++ b/Documentation/arm/kernel_user_helpers.txt
@@ -0,0 +1,267 @@
+Kernel-provided User Helpers
+============================
+
+These are segment of kernel provided user code reachable from user space
+at a fixed address in kernel memory. This is used to provide user space
+with some operations which require kernel help because of unimplemented
+native feature and/or instructions in many ARM CPUs. The idea is for this
+code to be executed directly in user mode for best efficiency but which is
+too intimate with the kernel counter part to be left to user libraries.
+In fact this code might even differ from one CPU to another depending on
+the available instruction set, or whether it is a SMP systems. In other
+words, the kernel reserves the right to change this code as needed without
+warning. Only the entry points and their results as documented here are
+guaranteed to be stable.
+
+This is different from (but doesn't preclude) a full blown VDSO
+implementation, however a VDSO would prevent some assembly tricks with
+constants that allows for efficient branching to those code segments. And
+since those code segments only use a few cycles before returning to user
+code, the overhead of a VDSO indirect far call would add a measurable
+overhead to such minimalistic operations.
+
+User space is expected to bypass those helpers and implement those things
+inline (either in the code emitted directly by the compiler, or part of
+the implementation of a library call) when optimizing for a recent enough
+processor that has the necessary native support, but only if resulting
+binaries are already to be incompatible with earlier ARM processors due to
+usage of similar native instructions for other things. In other words
+don't make binaries unable to run on earlier processors just for the sake
+of not using these kernel helpers if your compiled code is not going to
+use new instructions for other purpose.
+
+New helpers may be added over time, so an older kernel may be missing some
+helpers present in a newer kernel. For this reason, programs must check
+the value of __kuser_helper_version (see below) before assuming that it is
+safe to call any particular helper. This check should ideally be
+performed only once at process startup time, and execution aborted early
+if the required helpers are not provided by the kernel version that
+process is running on.
+
+kuser_helper_version
+--------------------
+
+Location: 0xffff0ffc
+
+Reference declaration:
+
+ extern int32_t __kuser_helper_version;
+
+Definition:
+
+ This field contains the number of helpers being implemented by the
+ running kernel. User space may read this to determine the availability
+ of a particular helper.
+
+Usage example:
+
+#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+
+void check_kuser_version(void)
+{
+ if (__kuser_helper_version < 2) {
+ fprintf(stderr, "can't do atomic operations, kernel too old\n");
+ abort();
+ }
+}
+
+Notes:
+
+ User space may assume that the value of this field never changes
+ during the lifetime of any single process. This means that this
+ field can be read once during the initialisation of a library or
+ startup phase of a program.
+
+kuser_get_tls
+-------------
+
+Location: 0xffff0fe0
+
+Reference prototype:
+
+ void * __kuser_get_tls(void);
+
+Input:
+
+ lr = return address
+
+Output:
+
+ r0 = TLS value
+
+Clobbered registers:
+
+ none
+
+Definition:
+
+ Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+
+Usage example:
+
+typedef void * (__kuser_get_tls_t)(void);
+#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+
+void foo()
+{
+ void *tls = __kuser_get_tls();
+ printf("TLS = %p\n", tls);
+}
+
+Notes:
+
+ - Valid only if __kuser_helper_version >= 1 (from kernel version 2.6.12).
+
+kuser_cmpxchg
+-------------
+
+Location: 0xffff0fc0
+
+Reference prototype:
+
+ int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
+
+Input:
+
+ r0 = oldval
+ r1 = newval
+ r2 = ptr
+ lr = return address
+
+Output:
+
+ r0 = success code (zero or non-zero)
+ C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+ r3, ip, flags
+
+Definition:
+
+ Atomically store newval in *ptr only if *ptr is equal to oldval.
+ Return zero if *ptr was changed or non-zero if no exchange happened.
+ The C flag is also set if *ptr was changed to allow for assembly
+ optimization in the calling code.
+
+Usage example:
+
+typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+
+int atomic_add(volatile int *ptr, int val)
+{
+ int old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg(old, new, ptr));
+
+ return new;
+}
+
+Notes:
+
+ - This routine already includes memory barriers as needed.
+
+ - Valid only if __kuser_helper_version >= 2 (from kernel version 2.6.12).
+
+kuser_memory_barrier
+--------------------
+
+Location: 0xffff0fa0
+
+Reference prototype:
+
+ void __kuser_memory_barrier(void);
+
+Input:
+
+ lr = return address
+
+Output:
+
+ none
+
+Clobbered registers:
+
+ none
+
+Definition:
+
+ Apply any needed memory barrier to preserve consistency with data modified
+ manually and __kuser_cmpxchg usage.
+
+Usage example:
+
+typedef void (__kuser_dmb_t)(void);
+#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+
+Notes:
+
+ - Valid only if __kuser_helper_version >= 3 (from kernel version 2.6.15).
+
+kuser_cmpxchg64
+---------------
+
+Location: 0xffff0f60
+
+Reference prototype:
+
+ int __kuser_cmpxchg64(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+
+Input:
+
+ r0 = pointer to oldval
+ r1 = pointer to newval
+ r2 = pointer to target value
+ lr = return address
+
+Output:
+
+ r0 = success code (zero or non-zero)
+ C flag = set if r0 == 0, clear if r0 != 0
+
+Clobbered registers:
+
+ r3, lr, flags
+
+Definition:
+
+ Atomically store the 64-bit value pointed by *newval in *ptr only if *ptr
+ is equal to the 64-bit value pointed by *oldval. Return zero if *ptr was
+ changed or non-zero if no exchange happened.
+
+ The C flag is also set if *ptr was changed to allow for assembly
+ optimization in the calling code.
+
+Usage example:
+
+typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+
+int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+{
+ int64_t old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg64(&old, &new, ptr));
+
+ return new;
+}
+
+Notes:
+
+ - This routine already includes memory barriers as needed.
+
+ - Due to the length of this sequence, this spans 2 conventional kuser
+ "slots", therefore 0xffff0f80 is not used as a valid entry point.
+
+ - Valid only if __kuser_helper_version >= 5 (from kernel version 3.1).
diff --git a/Documentation/arm/mem_alignment b/Documentation/arm/mem_alignment
new file mode 100644
index 000000000..c7c7a114c
--- /dev/null
+++ b/Documentation/arm/mem_alignment
@@ -0,0 +1,58 @@
+Too many problems poped up because of unnoticed misaligned memory access in
+kernel code lately. Therefore the alignment fixup is now unconditionally
+configured in for SA11x0 based targets. According to Alan Cox, this is a
+bad idea to configure it out, but Russell King has some good reasons for
+doing so on some f***ed up ARM architectures like the EBSA110. However
+this is not the case on many design I'm aware of, like all SA11x0 based
+ones.
+
+Of course this is a bad idea to rely on the alignment trap to perform
+unaligned memory access in general. If those access are predictable, you
+are better to use the macros provided by include/asm/unaligned.h. The
+alignment trap can fixup misaligned access for the exception cases, but at
+a high performance cost. It better be rare.
+
+Now for user space applications, it is possible to configure the alignment
+trap to SIGBUS any code performing unaligned access (good for debugging bad
+code), or even fixup the access by software like for kernel code. The later
+mode isn't recommended for performance reasons (just think about the
+floating point emulation that works about the same way). Fix your code
+instead!
+
+Please note that randomly changing the behaviour without good thought is
+real bad - it changes the behaviour of all unaligned instructions in user
+space, and might cause programs to fail unexpectedly.
+
+To change the alignment trap behavior, simply echo a number into
+/proc/cpu/alignment. The number is made up from various bits:
+
+bit behavior when set
+--- -----------------
+
+0 A user process performing an unaligned memory access
+ will cause the kernel to print a message indicating
+ process name, pid, pc, instruction, address, and the
+ fault code.
+
+1 The kernel will attempt to fix up the user process
+ performing the unaligned access. This is of course
+ slow (think about the floating point emulator) and
+ not recommended for production use.
+
+2 The kernel will send a SIGBUS signal to the user process
+ performing the unaligned access.
+
+Note that not all combinations are supported - only values 0 through 5.
+(6 and 7 don't make sense).
+
+For example, the following will turn on the warnings, but without
+fixing up or sending SIGBUS signals:
+
+ echo 1 > /proc/sys/debug/alignment
+
+You can also read the content of the same file to get statistical
+information on unaligned access occurrences plus the current mode of
+operation for user space code.
+
+
+Nicolas Pitre, Mar 13, 2001. Modified Russell King, Nov 30, 2001.
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
new file mode 100644
index 000000000..4178ebda6
--- /dev/null
+++ b/Documentation/arm/memory.txt
@@ -0,0 +1,88 @@
+ Kernel Memory Layout on ARM Linux
+
+ Russell King <rmk@arm.linux.org.uk>
+ November 17, 2005 (2.6.15)
+
+This document describes the virtual memory layout which the Linux
+kernel uses for ARM processors. It indicates which regions are
+free for platforms to use, and which are used by generic code.
+
+The ARM CPU is capable of addressing a maximum of 4GB virtual memory
+space, and this must be shared between user space processes, the
+kernel, and hardware devices.
+
+As the ARM architecture matures, it becomes necessary to reserve
+certain regions of VM space for use for new facilities; therefore
+this document may reserve more VM space over time.
+
+Start End Use
+--------------------------------------------------------------------------
+ffff8000 ffffffff copy_user_page / clear_user_page use.
+ For SA11xx and Xscale, this is used to
+ setup a minicache mapping.
+
+ffff4000 ffffffff cache aliasing on ARMv6 and later CPUs.
+
+ffff1000 ffff7fff Reserved.
+ Platforms must not use this address range.
+
+ffff0000 ffff0fff CPU vector page.
+ The CPU vectors are mapped here if the
+ CPU supports vector relocation (control
+ register V bit.)
+
+fffe0000 fffeffff XScale cache flush area. This is used
+ in proc-xscale.S to flush the whole data
+ cache. (XScale does not have TCM.)
+
+fffe8000 fffeffff DTCM mapping area for platforms with
+ DTCM mounted inside the CPU.
+
+fffe0000 fffe7fff ITCM mapping area for platforms with
+ ITCM mounted inside the CPU.
+
+ffc00000 ffefffff Fixmap mapping region. Addresses provided
+ by fix_to_virt() will be located here.
+
+fee00000 feffffff Mapping of PCI I/O space. This is a static
+ mapping within the vmalloc space.
+
+VMALLOC_START VMALLOC_END-1 vmalloc() / ioremap() space.
+ Memory returned by vmalloc/ioremap will
+ be dynamically placed in this region.
+ Machine specific static mappings are also
+ located here through iotable_init().
+ VMALLOC_START is based upon the value
+ of the high_memory variable, and VMALLOC_END
+ is equal to 0xff000000.
+
+PAGE_OFFSET high_memory-1 Kernel direct-mapped RAM region.
+ This maps the platforms RAM, and typically
+ maps all platform RAM in a 1:1 relationship.
+
+PKMAP_BASE PAGE_OFFSET-1 Permanent kernel mappings
+ One way of mapping HIGHMEM pages into kernel
+ space.
+
+MODULES_VADDR MODULES_END-1 Kernel module space
+ Kernel modules inserted via insmod are
+ placed here using dynamic mappings.
+
+00001000 TASK_SIZE-1 User space mappings
+ Per-thread mappings are placed here via
+ the mmap() system call.
+
+00000000 00000fff CPU vector page / null pointer trap
+ CPUs which do not support vector remapping
+ place their vector page here. NULL pointer
+ dereferences by both the kernel and user
+ space are also caught via this mapping.
+
+Please note that mappings which collide with the above areas may result
+in a non-bootable kernel, or may cause the kernel to (eventually) panic
+at run time.
+
+Since future CPUs may impact the kernel mapping layout, user programs
+must not access any memory which is not mapped inside their 0x0001000
+to TASK_SIZE address range. If they wish to access these areas, they
+must set up their own mappings using open() and mmap().
diff --git a/Documentation/arm/nwfpe/NOTES b/Documentation/arm/nwfpe/NOTES
new file mode 100644
index 000000000..40577b5a4
--- /dev/null
+++ b/Documentation/arm/nwfpe/NOTES
@@ -0,0 +1,29 @@
+There seems to be a problem with exp(double) and our emulator. I haven't
+been able to track it down yet. This does not occur with the emulator
+supplied by Russell King.
+
+I also found one oddity in the emulator. I don't think it is serious but
+will point it out. The ARM calling conventions require floating point
+registers f4-f7 to be preserved over a function call. The compiler quite
+often uses an stfe instruction to save f4 on the stack upon entry to a
+function, and an ldfe instruction to restore it before returning.
+
+I was looking at some code, that calculated a double result, stored it in f4
+then made a function call. Upon return from the function call the number in
+f4 had been converted to an extended value in the emulator.
+
+This is a side effect of the stfe instruction. The double in f4 had to be
+converted to extended, then stored. If an lfm/sfm combination had been used,
+then no conversion would occur. This has performance considerations. The
+result from the function call and f4 were used in a multiplication. If the
+emulator sees a multiply of a double and extended, it promotes the double to
+extended, then does the multiply in extended precision.
+
+This code will cause this problem:
+
+double x, y, z;
+z = log(x)/log(y);
+
+The result of log(x) (a double) will be calculated, returned in f0, then
+moved to f4 to preserve it over the log(y) call. The division will be done
+in extended precision, due to the stfe instruction used to save f4 in log(y).
diff --git a/Documentation/arm/nwfpe/README b/Documentation/arm/nwfpe/README
new file mode 100644
index 000000000..771871de0
--- /dev/null
+++ b/Documentation/arm/nwfpe/README
@@ -0,0 +1,70 @@
+This directory contains the version 0.92 test release of the NetWinder
+Floating Point Emulator.
+
+The majority of the code was written by me, Scott Bambrough It is
+written in C, with a small number of routines in inline assembler
+where required. It was written quickly, with a goal of implementing a
+working version of all the floating point instructions the compiler
+emits as the first target. I have attempted to be as optimal as
+possible, but there remains much room for improvement.
+
+I have attempted to make the emulator as portable as possible. One of
+the problems is with leading underscores on kernel symbols. Elf
+kernels have no leading underscores, a.out compiled kernels do. I
+have attempted to use the C_SYMBOL_NAME macro wherever this may be
+important.
+
+Another choice I made was in the file structure. I have attempted to
+contain all operating system specific code in one module (fpmodule.*).
+All the other files contain emulator specific code. This should allow
+others to port the emulator to NetBSD for instance relatively easily.
+
+The floating point operations are based on SoftFloat Release 2, by
+John Hauser. SoftFloat is a software implementation of floating-point
+that conforms to the IEC/IEEE Standard for Binary Floating-point
+Arithmetic. As many as four formats are supported: single precision,
+double precision, extended double precision, and quadruple precision.
+All operations required by the standard are implemented, except for
+conversions to and from decimal. We use only the single precision,
+double precision and extended double precision formats. The port of
+SoftFloat to the ARM was done by Phil Blundell, based on an earlier
+port of SoftFloat version 1 by Neil Carson for NetBSD/arm32.
+
+The file README.FPE contains a description of what has been implemented
+so far in the emulator. The file TODO contains a information on what
+remains to be done, and other ideas for the emulator.
+
+Bug reports, comments, suggestions should be directed to me at
+<scottb@netwinder.org>. General reports of "this program doesn't
+work correctly when your emulator is installed" are useful for
+determining that bugs still exist; but are virtually useless when
+attempting to isolate the problem. Please report them, but don't
+expect quick action. Bugs still exist. The problem remains in isolating
+which instruction contains the bug. Small programs illustrating a specific
+problem are a godsend.
+
+Legal Notices
+-------------
+
+The NetWinder Floating Point Emulator is free software. Everything Rebel.com
+has written is provided under the GNU GPL. See the file COPYING for copying
+conditions. Excluded from the above is the SoftFloat code. John Hauser's
+legal notice for SoftFloat is included below.
+
+-------------------------------------------------------------------------------
+SoftFloat Legal Notice
+
+SoftFloat was written by John R. Hauser. This work was made possible in
+part by the International Computer Science Institute, located at Suite 600,
+1947 Center Street, Berkeley, California 94704. Funding was partially
+provided by the National Science Foundation under grant MIP-9311980. The
+original version of this code was written as part of a project to build
+a fixed-point vector processor in collaboration with the University of
+California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
+has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
+TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
+PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
+AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
+-------------------------------------------------------------------------------
diff --git a/Documentation/arm/nwfpe/README.FPE b/Documentation/arm/nwfpe/README.FPE
new file mode 100644
index 000000000..26f5d7bb9
--- /dev/null
+++ b/Documentation/arm/nwfpe/README.FPE
@@ -0,0 +1,156 @@
+The following describes the current state of the NetWinder's floating point
+emulator.
+
+In the following nomenclature is used to describe the floating point
+instructions. It follows the conventions in the ARM manual.
+
+<S|D|E> = <single|double|extended>, no default
+{P|M|Z} = {round to +infinity,round to -infinity,round to zero},
+ default = round to nearest
+
+Note: items enclosed in {} are optional.
+
+Floating Point Coprocessor Data Transfer Instructions (CPDT)
+------------------------------------------------------------
+
+LDF/STF - load and store floating
+
+<LDF|STF>{cond}<S|D|E> Fd, Rn
+<LDF|STF>{cond}<S|D|E> Fd, [Rn, #<expression>]{!}
+<LDF|STF>{cond}<S|D|E> Fd, [Rn], #<expression>
+
+These instructions are fully implemented.
+
+LFM/SFM - load and store multiple floating
+
+Form 1 syntax:
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn]
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn, #<expression>]{!}
+<LFM|SFM>{cond}<S|D|E> Fd, <count>, [Rn], #<expression>
+
+Form 2 syntax:
+<LFM|SFM>{cond}<FD,EA> Fd, <count>, [Rn]{!}
+
+These instructions are fully implemented. They store/load three words
+for each floating point register into the memory location given in the
+instruction. The format in memory is unlikely to be compatible with
+other implementations, in particular the actual hardware. Specific
+mention of this is made in the ARM manuals.
+
+Floating Point Coprocessor Register Transfer Instructions (CPRT)
+----------------------------------------------------------------
+
+Conversions, read/write status/control register instructions
+
+FLT{cond}<S,D,E>{P,M,Z} Fn, Rd Convert integer to floating point
+FIX{cond}{P,M,Z} Rd, Fn Convert floating point to integer
+WFS{cond} Rd Write floating point status register
+RFS{cond} Rd Read floating point status register
+WFC{cond} Rd Write floating point control register
+RFC{cond} Rd Read floating point control register
+
+FLT/FIX are fully implemented.
+
+RFS/WFS are fully implemented.
+
+RFC/WFC are fully implemented. RFC/WFC are supervisor only instructions, and
+presently check the CPU mode, and do an invalid instruction trap if not called
+from supervisor mode.
+
+Compare instructions
+
+CMF{cond} Fn, Fm Compare floating
+CMFE{cond} Fn, Fm Compare floating with exception
+CNF{cond} Fn, Fm Compare negated floating
+CNFE{cond} Fn, Fm Compare negated floating with exception
+
+These are fully implemented.
+
+Floating Point Coprocessor Data Instructions (CPDT)
+---------------------------------------------------
+
+Dyadic operations:
+
+ADF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - add
+SUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - subtract
+RSF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse subtract
+MUF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - multiply
+DVF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - divide
+RDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse divide
+
+These are fully implemented.
+
+FML{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast multiply
+FDV{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast divide
+FRD{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - fast reverse divide
+
+These are fully implemented as well. They use the same algorithm as the
+non-fast versions. Hence, in this implementation their performance is
+equivalent to the MUF/DVF/RDV instructions. This is acceptable according
+to the ARM manual. The manual notes these are defined only for single
+operands, on the actual FPA11 hardware they do not work for double or
+extended precision operands. The emulator currently does not check
+the requested permissions conditions, and performs the requested operation.
+
+RMF{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - IEEE remainder
+
+This is fully implemented.
+
+Monadic operations:
+
+MVF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move
+MNF{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - move negated
+
+These are fully implemented.
+
+ABS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - absolute value
+SQT{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - square root
+RND{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - round
+
+These are fully implemented.
+
+URD{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - unnormalized round
+NRM{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - normalize
+
+These are implemented. URD is implemented using the same code as the RND
+instruction. Since URD cannot return a unnormalized number, NRM becomes
+a NOP.
+
+Library calls:
+
+POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+
+These are not implemented. They are not currently issued by the compiler,
+and are handled by routines in libc. These are not implemented by the FPA11
+hardware, but are handled by the floating point support code. They should
+be implemented in future versions.
+
+Signalling:
+
+Signals are implemented. However current ELF kernels produced by Rebel.com
+have a bug in them that prevents the module from generating a SIGFPE. This
+is caused by a failure to alias fp_current to the kernel variable
+current_set[0] correctly.
+
+The kernel provided with this distribution (vmlinux-nwfpe-0.93) contains
+a fix for this problem and also incorporates the current version of the
+emulator directly. It is possible to run with no floating point module
+loaded with this kernel. It is provided as a demonstration of the
+technology and for those who want to do floating point work that depends
+on signals. It is not strictly necessary to use the module.
+
+A module (either the one provided by Russell King, or the one in this
+distribution) can be loaded to replace the functionality of the emulator
+built into the kernel.
diff --git a/Documentation/arm/nwfpe/TODO b/Documentation/arm/nwfpe/TODO
new file mode 100644
index 000000000..8027061b6
--- /dev/null
+++ b/Documentation/arm/nwfpe/TODO
@@ -0,0 +1,67 @@
+TODO LIST
+---------
+
+POW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - power
+RPW{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - reverse power
+POL{cond}<S|D|E>{P,M,Z} Fd, Fn, <Fm,#value> - polar angle (arctan2)
+
+LOG{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base 10
+LGN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - logarithm to base e
+EXP{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - exponent
+SIN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - sine
+COS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - cosine
+TAN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - tangent
+ASN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arcsine
+ACS{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arccosine
+ATN{cond}<S|D|E>{P,M,Z} Fd, <Fm,#value> - arctangent
+
+These are not implemented. They are not currently issued by the compiler,
+and are handled by routines in libc. These are not implemented by the FPA11
+hardware, but are handled by the floating point support code. They should
+be implemented in future versions.
+
+There are a couple of ways to approach the implementation of these. One
+method would be to use accurate table methods for these routines. I have
+a couple of papers by S. Gal from IBM's research labs in Haifa, Israel that
+seem to promise extreme accuracy (in the order of 99.8%) and reasonable speed.
+These methods are used in GLIBC for some of the transcendental functions.
+
+Another approach, which I know little about is CORDIC. This stands for
+Coordinate Rotation Digital Computer, and is a method of computing
+transcendental functions using mostly shifts and adds and a few
+multiplications and divisions. The ARM excels at shifts and adds,
+so such a method could be promising, but requires more research to
+determine if it is feasible.
+
+Rounding Methods
+
+The IEEE standard defines 4 rounding modes. Round to nearest is the
+default, but rounding to + or - infinity or round to zero are also allowed.
+Many architectures allow the rounding mode to be specified by modifying bits
+in a control register. Not so with the ARM FPA11 architecture. To change
+the rounding mode one must specify it with each instruction.
+
+This has made porting some benchmarks difficult. It is possible to
+introduce such a capability into the emulator. The FPCR contains
+bits describing the rounding mode. The emulator could be altered to
+examine a flag, which if set forced it to ignore the rounding mode in
+the instruction, and use the mode specified in the bits in the FPCR.
+
+This would require a method of getting/setting the flag, and the bits
+in the FPCR. This requires a kernel call in ArmLinux, as WFC/RFC are
+supervisor only instructions. If anyone has any ideas or comments I
+would like to hear them.
+
+[NOTE: pulled out from some docs on ARM floating point, specifically
+ for the Acorn FPE, but not limited to it:
+
+ The floating point control register (FPCR) may only be present in some
+ implementations: it is there to control the hardware in an implementation-
+ specific manner, for example to disable the floating point system. The user
+ mode of the ARM is not permitted to use this register (since the right is
+ reserved to alter it between implementations) and the WFC and RFC
+ instructions will trap if tried in user mode.
+
+ Hence, the answer is yes, you could do this, but then you will run a high
+ risk of becoming isolated if and when hardware FP emulation comes out
+ -- Russell].
diff --git a/Documentation/arm/pxa/mfp.txt b/Documentation/arm/pxa/mfp.txt
new file mode 100644
index 000000000..a179e5bc0
--- /dev/null
+++ b/Documentation/arm/pxa/mfp.txt
@@ -0,0 +1,286 @@
+ MFP Configuration for PXA2xx/PXA3xx Processors
+
+ Eric Miao <eric.miao@marvell.com>
+
+MFP stands for Multi-Function Pin, which is the pin-mux logic on PXA3xx and
+later PXA series processors. This document describes the existing MFP API,
+and how board/platform driver authors could make use of it.
+
+ Basic Concept
+===============
+
+Unlike the GPIO alternate function settings on PXA25x and PXA27x, a new MFP
+mechanism is introduced from PXA3xx to completely move the pin-mux functions
+out of the GPIO controller. In addition to pin-mux configurations, the MFP
+also controls the low power state, driving strength, pull-up/down and event
+detection of each pin. Below is a diagram of internal connections between
+the MFP logic and the remaining SoC peripherals:
+
+ +--------+
+ | |--(GPIO19)--+
+ | GPIO | |
+ | |--(GPIO...) |
+ +--------+ |
+ | +---------+
+ +--------+ +------>| |
+ | PWM2 |--(PWM_OUT)-------->| MFP |
+ +--------+ +------>| |-------> to external PAD
+ | +---->| |
+ +--------+ | | +-->| |
+ | SSP2 |---(TXD)----+ | | +---------+
+ +--------+ | |
+ | |
+ +--------+ | |
+ | Keypad |--(MKOUT4)----+ |
+ +--------+ |
+ |
+ +--------+ |
+ | UART2 |---(TXD)--------+
+ +--------+
+
+NOTE: the external pad is named as MFP_PIN_GPIO19, it doesn't necessarily
+mean it's dedicated for GPIO19, only as a hint that internally this pin
+can be routed from GPIO19 of the GPIO controller.
+
+To better understand the change from PXA25x/PXA27x GPIO alternate function
+to this new MFP mechanism, here are several key points:
+
+ 1. GPIO controller on PXA3xx is now a dedicated controller, same as other
+ internal controllers like PWM, SSP and UART, with 128 internal signals
+ which can be routed to external through one or more MFPs (e.g. GPIO<0>
+ can be routed through either MFP_PIN_GPIO0 as well as MFP_PIN_GPIO0_2,
+ see arch/arm/mach-pxa/mach/include/mfp-pxa300.h)
+
+ 2. Alternate function configuration is removed from this GPIO controller,
+ the remaining functions are pure GPIO-specific, i.e.
+
+ - GPIO signal level control
+ - GPIO direction control
+ - GPIO level change detection
+
+ 3. Low power state for each pin is now controlled by MFP, this means the
+ PGSRx registers on PXA2xx are now useless on PXA3xx
+
+ 4. Wakeup detection is now controlled by MFP, PWER does not control the
+ wakeup from GPIO(s) any more, depending on the sleeping state, ADxER
+ (as defined in pxa3xx-regs.h) controls the wakeup from MFP
+
+NOTE: with such a clear separation of MFP and GPIO, by GPIO<xx> we normally
+mean it is a GPIO signal, and by MFP<xxx> or pin xxx, we mean a physical
+pad (or ball).
+
+ MFP API Usage
+===============
+
+For board code writers, here are some guidelines:
+
+1. include ONE of the following header files in your <board>.c:
+
+ - #include <mach/mfp-pxa25x.h>
+ - #include <mach/mfp-pxa27x.h>
+ - #include <mach/mfp-pxa300.h>
+ - #include <mach/mfp-pxa320.h>
+ - #include <mach/mfp-pxa930.h>
+
+ NOTE: only one file in your <board>.c, depending on the processors used,
+ because pin configuration definitions may conflict in these file (i.e.
+ same name, different meaning and settings on different processors). E.g.
+ for zylonite platform, which support both PXA300/PXA310 and PXA320, two
+ separate files are introduced: zylonite_pxa300.c and zylonite_pxa320.c
+ (in addition to handle MFP configuration differences, they also handle
+ the other differences between the two combinations).
+
+ NOTE: PXA300 and PXA310 are almost identical in pin configurations (with
+ PXA310 supporting some additional ones), thus the difference is actually
+ covered in a single mfp-pxa300.h.
+
+2. prepare an array for the initial pin configurations, e.g.:
+
+ static unsigned long mainstone_pin_config[] __initdata = {
+ /* Chip Select */
+ GPIO15_nCS_1,
+
+ /* LCD - 16bpp Active TFT */
+ GPIOxx_TFT_LCD_16BPP,
+ GPIO16_PWM0_OUT, /* Backlight */
+
+ /* MMC */
+ GPIO32_MMC_CLK,
+ GPIO112_MMC_CMD,
+ GPIO92_MMC_DAT_0,
+ GPIO109_MMC_DAT_1,
+ GPIO110_MMC_DAT_2,
+ GPIO111_MMC_DAT_3,
+
+ ...
+
+ /* GPIO */
+ GPIO1_GPIO | WAKEUP_ON_EDGE_BOTH,
+ };
+
+ a) once the pin configurations are passed to pxa{2xx,3xx}_mfp_config(),
+ and written to the actual registers, they are useless and may discard,
+ adding '__initdata' will help save some additional bytes here.
+
+ b) when there is only one possible pin configurations for a component,
+ some simplified definitions can be used, e.g. GPIOxx_TFT_LCD_16BPP on
+ PXA25x and PXA27x processors
+
+ c) if by board design, a pin can be configured to wake up the system
+ from low power state, it can be 'OR'ed with any of:
+
+ WAKEUP_ON_EDGE_BOTH
+ WAKEUP_ON_EDGE_RISE
+ WAKEUP_ON_EDGE_FALL
+ WAKEUP_ON_LEVEL_HIGH - specifically for enabling of keypad GPIOs,
+
+ to indicate that this pin has the capability of wake-up the system,
+ and on which edge(s). This, however, doesn't necessarily mean the
+ pin _will_ wakeup the system, it will only when set_irq_wake() is
+ invoked with the corresponding GPIO IRQ (GPIO_IRQ(xx) or gpio_to_irq())
+ and eventually calls gpio_set_wake() for the actual register setting.
+
+ d) although PXA3xx MFP supports edge detection on each pin, the
+ internal logic will only wakeup the system when those specific bits
+ in ADxER registers are set, which can be well mapped to the
+ corresponding peripheral, thus set_irq_wake() can be called with
+ the peripheral IRQ to enable the wakeup.
+
+
+ MFP on PXA3xx
+===============
+
+Every external I/O pad on PXA3xx (excluding those for special purpose) has
+one MFP logic associated, and is controlled by one MFP register (MFPR).
+
+The MFPR has the following bit definitions (for PXA300/PXA310/PXA320):
+
+ 31 16 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+ +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+ | RESERVED |PS|PU|PD| DRIVE |SS|SD|SO|EC|EF|ER|--| AF_SEL |
+ +-------------------------+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
+
+ Bit 3: RESERVED
+ Bit 4: EDGE_RISE_EN - enable detection of rising edge on this pin
+ Bit 5: EDGE_FALL_EN - enable detection of falling edge on this pin
+ Bit 6: EDGE_CLEAR - disable edge detection on this pin
+ Bit 7: SLEEP_OE_N - enable outputs during low power modes
+ Bit 8: SLEEP_DATA - output data on the pin during low power modes
+ Bit 9: SLEEP_SEL - selection control for low power modes signals
+ Bit 13: PULLDOWN_EN - enable the internal pull-down resistor on this pin
+ Bit 14: PULLUP_EN - enable the internal pull-up resistor on this pin
+ Bit 15: PULL_SEL - pull state controlled by selected alternate function
+ (0) or by PULL{UP,DOWN}_EN bits (1)
+
+ Bit 0 - 2: AF_SEL - alternate function selection, 8 possibilities, from 0-7
+ Bit 10-12: DRIVE - drive strength and slew rate
+ 0b000 - fast 1mA
+ 0b001 - fast 2mA
+ 0b002 - fast 3mA
+ 0b003 - fast 4mA
+ 0b004 - slow 6mA
+ 0b005 - fast 6mA
+ 0b006 - slow 10mA
+ 0b007 - fast 10mA
+
+ MFP Design for PXA2xx/PXA3xx
+==============================
+
+Due to the difference of pin-mux handling between PXA2xx and PXA3xx, a unified
+MFP API is introduced to cover both series of processors.
+
+The basic idea of this design is to introduce definitions for all possible pin
+configurations, these definitions are processor and platform independent, and
+the actual API invoked to convert these definitions into register settings and
+make them effective there-after.
+
+ Files Involved
+ --------------
+
+ - arch/arm/mach-pxa/include/mach/mfp.h
+
+ for
+ 1. Unified pin definitions - enum constants for all configurable pins
+ 2. processor-neutral bit definitions for a possible MFP configuration
+
+ - arch/arm/mach-pxa/include/mach/mfp-pxa3xx.h
+
+ for PXA3xx specific MFPR register bit definitions and PXA3xx common pin
+ configurations
+
+ - arch/arm/mach-pxa/include/mach/mfp-pxa2xx.h
+
+ for PXA2xx specific definitions and PXA25x/PXA27x common pin configurations
+
+ - arch/arm/mach-pxa/include/mach/mfp-pxa25x.h
+ arch/arm/mach-pxa/include/mach/mfp-pxa27x.h
+ arch/arm/mach-pxa/include/mach/mfp-pxa300.h
+ arch/arm/mach-pxa/include/mach/mfp-pxa320.h
+ arch/arm/mach-pxa/include/mach/mfp-pxa930.h
+
+ for processor specific definitions
+
+ - arch/arm/mach-pxa/mfp-pxa3xx.c
+ - arch/arm/mach-pxa/mfp-pxa2xx.c
+
+ for implementation of the pin configuration to take effect for the actual
+ processor.
+
+ Pin Configuration
+ -----------------
+
+ The following comments are copied from mfp.h (see the actual source code
+ for most updated info)
+
+ /*
+ * a possible MFP configuration is represented by a 32-bit integer
+ *
+ * bit 0.. 9 - MFP Pin Number (1024 Pins Maximum)
+ * bit 10..12 - Alternate Function Selection
+ * bit 13..15 - Drive Strength
+ * bit 16..18 - Low Power Mode State
+ * bit 19..20 - Low Power Mode Edge Detection
+ * bit 21..22 - Run Mode Pull State
+ *
+ * to facilitate the definition, the following macros are provided
+ *
+ * MFP_CFG_DEFAULT - default MFP configuration value, with
+ * alternate function = 0,
+ * drive strength = fast 3mA (MFP_DS03X)
+ * low power mode = default
+ * edge detection = none
+ *
+ * MFP_CFG - default MFPR value with alternate function
+ * MFP_CFG_DRV - default MFPR value with alternate function and
+ * pin drive strength
+ * MFP_CFG_LPM - default MFPR value with alternate function and
+ * low power mode
+ * MFP_CFG_X - default MFPR value with alternate function,
+ * pin drive strength and low power mode
+ */
+
+ Examples of pin configurations are:
+
+ #define GPIO94_SSP3_RXD MFP_CFG_X(GPIO94, AF1, DS08X, FLOAT)
+
+ which reads GPIO94 can be configured as SSP3_RXD, with alternate function
+ selection of 1, driving strength of 0b101, and a float state in low power
+ modes.
+
+ NOTE: this is the default setting of this pin being configured as SSP3_RXD
+ which can be modified a bit in board code, though it is not recommended to
+ do so, simply because this default setting is usually carefully encoded,
+ and is supposed to work in most cases.
+
+ Register Settings
+ -----------------
+
+ Register settings on PXA3xx for a pin configuration is actually very
+ straight-forward, most bits can be converted directly into MFPR value
+ in a easier way. Two sets of MFPR values are calculated: the run-time
+ ones and the low power mode ones, to allow different settings.
+
+ The conversion from a generic pin configuration to the actual register
+ settings on PXA2xx is a bit complicated: many registers are involved,
+ including GAFRx, GPDRx, PGSRx, PWER, PKWR, PFER and PRER. Please see
+ mfp-pxa2xx.c for how the conversion is made.
diff --git a/Documentation/arm/sti/overview.txt b/Documentation/arm/sti/overview.txt
new file mode 100644
index 000000000..1a4e93d60
--- /dev/null
+++ b/Documentation/arm/sti/overview.txt
@@ -0,0 +1,33 @@
+ STi ARM Linux Overview
+ ==========================
+
+Introduction
+------------
+
+ The ST Microelectronics Multimedia and Application Processors range of
+ CortexA9 System-on-Chip are supported by the 'STi' platform of
+ ARM Linux. Currently STiH415, STiH416 SOCs are supported with both
+ B2000 and B2020 Reference boards.
+
+
+ configuration
+ -------------
+
+ A generic configuration is provided for both STiH415/416, and can be used as the
+ default by
+ make stih41x_defconfig
+
+ Layout
+ ------
+ All the files for multiple machine families (STiH415, STiH416, and STiG125)
+ are located in the platform code contained in arch/arm/mach-sti
+
+ There is a generic board board-dt.c in the mach folder which support
+ Flattened Device Tree, which means, It works with any compatible board with
+ Device Trees.
+
+
+ Document Author
+ ---------------
+
+ Srinivas Kandagatla <srinivas.kandagatla@st.com>, (c) 2013 ST Microelectronics
diff --git a/Documentation/arm/sti/stih407-overview.txt b/Documentation/arm/sti/stih407-overview.txt
new file mode 100644
index 000000000..3343f32f5
--- /dev/null
+++ b/Documentation/arm/sti/stih407-overview.txt
@@ -0,0 +1,18 @@
+ STiH407 Overview
+ ================
+
+Introduction
+------------
+
+ The STiH407 is the new generation of SoC for Multi-HD, AVC set-top boxes
+ and server/connected client application for satellite, cable, terrestrial
+ and IP-STB markets.
+
+ Features
+ - ARM Cortex-A9 1.5 GHz dual core CPU (28nm)
+ - SATA2, USB 3.0, PCIe, Gbit Ethernet
+
+ Document Author
+ ---------------
+
+ Maxime Coquelin <maxime.coquelin@st.com>, (c) 2014 ST Microelectronics
diff --git a/Documentation/arm/sti/stih415-overview.txt b/Documentation/arm/sti/stih415-overview.txt
new file mode 100644
index 000000000..1383e33f2
--- /dev/null
+++ b/Documentation/arm/sti/stih415-overview.txt
@@ -0,0 +1,12 @@
+ STiH415 Overview
+ ================
+
+Introduction
+------------
+
+ The STiH415 is the next generation of HD, AVC set-top box processors
+ for satellite, cable, terrestrial and IP-STB markets.
+
+ Features
+ - ARM Cortex-A9 1.0 GHz, dual-core CPU
+ - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih416-overview.txt b/Documentation/arm/sti/stih416-overview.txt
new file mode 100644
index 000000000..558444c20
--- /dev/null
+++ b/Documentation/arm/sti/stih416-overview.txt
@@ -0,0 +1,12 @@
+ STiH416 Overview
+ ================
+
+Introduction
+------------
+
+ The STiH416 is the next generation of HD, AVC set-top box processors
+ for satellite, cable, terrestrial and IP-STB markets.
+
+ Features
+ - ARM Cortex-A9 1.2 GHz dual core CPU
+ - SATA2x2,USB 2.0x3, PCIe, Gbit Ethernet MACx2
diff --git a/Documentation/arm/sti/stih418-overview.txt b/Documentation/arm/sti/stih418-overview.txt
new file mode 100644
index 000000000..1cd8fc806
--- /dev/null
+++ b/Documentation/arm/sti/stih418-overview.txt
@@ -0,0 +1,20 @@
+ STiH418 Overview
+ ================
+
+Introduction
+------------
+
+ The STiH418 is the new generation of SoC for UHDp60 set-top boxes
+ and server/connected client application for satellite, cable, terrestrial
+ and IP-STB markets.
+
+ Features
+ - ARM Cortex-A9 1.5 GHz quad core CPU (28nm)
+ - SATA2, USB 3.0, PCIe, Gbit Ethernet
+ - HEVC L5.1 Main 10
+ - VP9
+
+ Document Author
+ ---------------
+
+ Maxime Coquelin <maxime.coquelin@st.com>, (c) 2015 ST Microelectronics
diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README
new file mode 100644
index 000000000..1fe2d7fd4
--- /dev/null
+++ b/Documentation/arm/sunxi/README
@@ -0,0 +1,61 @@
+ARM Allwinner SoCs
+==================
+
+This document lists all the ARM Allwinner SoCs that are currently
+supported in mainline by the Linux kernel. This document will also
+provide links to documentation and/or datasheet for these SoCs.
+
+SunXi family
+------------
+ Linux kernel mach directory: arch/arm/mach-sunxi
+
+ Flavors:
+ * ARM926 based SoCs
+ - Allwinner F20 (sun3i)
+ + Not Supported
+
+ * ARM Cortex-A8 based SoCs
+ - Allwinner A10 (sun4i)
+ + Datasheet
+ http://dl.linux-sunxi.org/A10/A10%20Datasheet%20-%20v1.21%20%282012-04-06%29.pdf
+ + User Manual
+ http://dl.linux-sunxi.org/A10/A10%20User%20Manual%20-%20v1.20%20%282012-04-09%2c%20DECRYPTED%29.pdf
+
+ - Allwinner A10s (sun5i)
+ + Datasheet
+ http://dl.linux-sunxi.org/A10s/A10s%20Datasheet%20-%20v1.20%20%282012-03-27%29.pdf
+
+ - Allwinner A13 (sun5i)
+ + Datasheet
+ http://dl.linux-sunxi.org/A13/A13%20Datasheet%20-%20v1.12%20%282012-03-29%29.pdf
+ + User Manual
+ http://dl.linux-sunxi.org/A13/A13%20User%20Manual%20-%20v1.2%20%282013-01-08%29.pdf
+
+ * Dual ARM Cortex-A7 based SoCs
+ - Allwinner A20 (sun7i)
+ + User Manual
+ http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf
+
+ - Allwinner A23
+ + Datasheet
+ http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf
+ + User Manual
+ http://dl.linux-sunxi.org/A23/A23%20User%20Manual%20V1.0%2020130830.pdf
+
+ * Quad ARM Cortex-A7 based SoCs
+ - Allwinner A31 (sun6i)
+ + Datasheet
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20datasheet%20V1.3%2020131106.pdf
+ + User Manual
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31/IC/A31%20user%20manual%20V1.1%2020130630.pdf
+
+ - Allwinner A31s (sun6i)
+ + Datasheet
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20datasheet%20V1.3%2020131106.pdf
+ + User Manual
+ http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf
+
+ * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs
+ - Allwinner A80
+ + Datasheet
+ http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf
diff --git a/Documentation/arm/sunxi/clocks.txt b/Documentation/arm/sunxi/clocks.txt
new file mode 100644
index 000000000..e09a88aa3
--- /dev/null
+++ b/Documentation/arm/sunxi/clocks.txt
@@ -0,0 +1,56 @@
+Frequently asked questions about the sunxi clock system
+=======================================================
+
+This document contains useful bits of information that people tend to ask
+about the sunxi clock system, as well as accompanying ASCII art when adequate.
+
+Q: Why is the main 24MHz oscillator gatable? Wouldn't that break the
+ system?
+
+A: The 24MHz oscillator allows gating to save power. Indeed, if gated
+ carelessly the system would stop functioning, but with the right
+ steps, one can gate it and keep the system running. Consider this
+ simplified suspend example:
+
+ While the system is operational, you would see something like
+
+ 24MHz 32kHz
+ |
+ PLL1
+ \
+ \_ CPU Mux
+ |
+ [CPU]
+
+ When you are about to suspend, you switch the CPU Mux to the 32kHz
+ oscillator:
+
+ 24Mhz 32kHz
+ | |
+ PLL1 |
+ /
+ CPU Mux _/
+ |
+ [CPU]
+
+ Finally you can gate the main oscillator
+
+ 32kHz
+ |
+ |
+ /
+ CPU Mux _/
+ |
+ [CPU]
+
+Q: Were can I learn more about the sunxi clocks?
+
+A: The linux-sunxi wiki contains a page documenting the clock registers,
+ you can find it at
+
+ http://linux-sunxi.org/A10/CCM
+
+ The authoritative source for information at this time is the ccmu driver
+ released by Allwinner, you can find it at
+
+ https://github.com/linux-sunxi/linux-sunxi/tree/sunxi-3.0/arch/arm/mach-sun4i/clock/ccmu
diff --git a/Documentation/arm/swp_emulation b/Documentation/arm/swp_emulation
new file mode 100644
index 000000000..af903d22f
--- /dev/null
+++ b/Documentation/arm/swp_emulation
@@ -0,0 +1,27 @@
+Software emulation of deprecated SWP instruction (CONFIG_SWP_EMULATE)
+---------------------------------------------------------------------
+
+ARMv6 architecture deprecates use of the SWP/SWPB instructions, and recommeds
+moving to the load-locked/store-conditional instructions LDREX and STREX.
+
+ARMv7 multiprocessing extensions introduce the ability to disable these
+instructions, triggering an undefined instruction exception when executed.
+Trapped instructions are emulated using an LDREX/STREX or LDREXB/STREXB
+sequence. If a memory access fault (an abort) occurs, a segmentation fault is
+signalled to the triggering process.
+
+/proc/cpu/swp_emulation holds some statistics/information, including the PID of
+the last process to trigger the emulation to be invocated. For example:
+---
+Emulated SWP: 12
+Emulated SWPB: 0
+Aborted SWP{B}: 1
+Last process: 314
+---
+
+NOTE: when accessing uncached shared regions, LDREX/STREX rely on an external
+transaction monitoring block called a global monitor to maintain update
+atomicity. If your system does not implement a global monitor, this option can
+cause programs that perform SWP operations to uncached memory to deadlock, as
+the STREX operation will always fail.
+
diff --git a/Documentation/arm/tcm.txt b/Documentation/arm/tcm.txt
new file mode 100644
index 000000000..7c15871c1
--- /dev/null
+++ b/Documentation/arm/tcm.txt
@@ -0,0 +1,155 @@
+ARM TCM (Tightly-Coupled Memory) handling in Linux
+----
+Written by Linus Walleij <linus.walleij@stericsson.com>
+
+Some ARM SoC:s have a so-called TCM (Tightly-Coupled Memory).
+This is usually just a few (4-64) KiB of RAM inside the ARM
+processor.
+
+Due to being embedded inside the CPU The TCM has a
+Harvard-architecture, so there is an ITCM (instruction TCM)
+and a DTCM (data TCM). The DTCM can not contain any
+instructions, but the ITCM can actually contain data.
+The size of DTCM or ITCM is minimum 4KiB so the typical
+minimum configuration is 4KiB ITCM and 4KiB DTCM.
+
+ARM CPU:s have special registers to read out status, physical
+location and size of TCM memories. arch/arm/include/asm/cputype.h
+defines a CPUID_TCM register that you can read out from the
+system control coprocessor. Documentation from ARM can be found
+at http://infocenter.arm.com, search for "TCM Status Register"
+to see documents for all CPUs. Reading this register you can
+determine if ITCM (bits 1-0) and/or DTCM (bit 17-16) is present
+in the machine.
+
+There is further a TCM region register (search for "TCM Region
+Registers" at the ARM site) that can report and modify the location
+size of TCM memories at runtime. This is used to read out and modify
+TCM location and size. Notice that this is not a MMU table: you
+actually move the physical location of the TCM around. At the
+place you put it, it will mask any underlying RAM from the
+CPU so it is usually wise not to overlap any physical RAM with
+the TCM.
+
+The TCM memory can then be remapped to another address again using
+the MMU, but notice that the TCM if often used in situations where
+the MMU is turned off. To avoid confusion the current Linux
+implementation will map the TCM 1 to 1 from physical to virtual
+memory in the location specified by the kernel. Currently Linux
+will map ITCM to 0xfffe0000 and on, and DTCM to 0xfffe8000 and
+on, supporting a maximum of 32KiB of ITCM and 32KiB of DTCM.
+
+Newer versions of the region registers also support dividing these
+TCMs in two separate banks, so for example an 8KiB ITCM is divided
+into two 4KiB banks with its own control registers. The idea is to
+be able to lock and hide one of the banks for use by the secure
+world (TrustZone).
+
+TCM is used for a few things:
+
+- FIQ and other interrupt handlers that need deterministic
+ timing and cannot wait for cache misses.
+
+- Idle loops where all external RAM is set to self-refresh
+ retention mode, so only on-chip RAM is accessible by
+ the CPU and then we hang inside ITCM waiting for an
+ interrupt.
+
+- Other operations which implies shutting off or reconfiguring
+ the external RAM controller.
+
+There is an interface for using TCM on the ARM architecture
+in <asm/tcm.h>. Using this interface it is possible to:
+
+- Define the physical address and size of ITCM and DTCM.
+
+- Tag functions to be compiled into ITCM.
+
+- Tag data and constants to be allocated to DTCM and ITCM.
+
+- Have the remaining TCM RAM added to a special
+ allocation pool with gen_pool_create() and gen_pool_add()
+ and provice tcm_alloc() and tcm_free() for this
+ memory. Such a heap is great for things like saving
+ device state when shutting off device power domains.
+
+A machine that has TCM memory shall select HAVE_TCM from
+arch/arm/Kconfig for itself. Code that needs to use TCM shall
+#include <asm/tcm.h>
+
+Functions to go into itcm can be tagged like this:
+int __tcmfunc foo(int bar);
+
+Since these are marked to become long_calls and you may want
+to have functions called locally inside the TCM without
+wasting space, there is also the __tcmlocalfunc prefix that
+will make the call relative.
+
+Variables to go into dtcm can be tagged like this:
+int __tcmdata foo;
+
+Constants can be tagged like this:
+int __tcmconst foo;
+
+To put assembler into TCM just use
+.section ".tcm.text" or .section ".tcm.data"
+respectively.
+
+Example code:
+
+#include <asm/tcm.h>
+
+/* Uninitialized data */
+static u32 __tcmdata tcmvar;
+/* Initialized data */
+static u32 __tcmdata tcmassigned = 0x2BADBABEU;
+/* Constant */
+static const u32 __tcmconst tcmconst = 0xCAFEBABEU;
+
+static void __tcmlocalfunc tcm_to_tcm(void)
+{
+ int i;
+ for (i = 0; i < 100; i++)
+ tcmvar ++;
+}
+
+static void __tcmfunc hello_tcm(void)
+{
+ /* Some abstract code that runs in ITCM */
+ int i;
+ for (i = 0; i < 100; i++) {
+ tcmvar ++;
+ }
+ tcm_to_tcm();
+}
+
+static void __init test_tcm(void)
+{
+ u32 *tcmem;
+ int i;
+
+ hello_tcm();
+ printk("Hello TCM executed from ITCM RAM\n");
+
+ printk("TCM variable from testrun: %u @ %p\n", tcmvar, &tcmvar);
+ tcmvar = 0xDEADBEEFU;
+ printk("TCM variable: 0x%x @ %p\n", tcmvar, &tcmvar);
+
+ printk("TCM assigned variable: 0x%x @ %p\n", tcmassigned, &tcmassigned);
+
+ printk("TCM constant: 0x%x @ %p\n", tcmconst, &tcmconst);
+
+ /* Allocate some TCM memory from the pool */
+ tcmem = tcm_alloc(20);
+ if (tcmem) {
+ printk("TCM Allocated 20 bytes of TCM @ %p\n", tcmem);
+ tcmem[0] = 0xDEADBEEFU;
+ tcmem[1] = 0x2BADBABEU;
+ tcmem[2] = 0xCAFEBABEU;
+ tcmem[3] = 0xDEADBEEFU;
+ tcmem[4] = 0x2BADBABEU;
+ for (i = 0; i < 5; i++)
+ printk("TCM tcmem[%d] = %08x\n", i, tcmem[i]);
+ tcm_free(tcmem, 20);
+ }
+}
diff --git a/Documentation/arm/uefi.txt b/Documentation/arm/uefi.txt
new file mode 100644
index 000000000..d60030a1b
--- /dev/null
+++ b/Documentation/arm/uefi.txt
@@ -0,0 +1,64 @@
+UEFI, the Unified Extensible Firmware Interface, is a specification
+governing the behaviours of compatible firmware interfaces. It is
+maintained by the UEFI Forum - http://www.uefi.org/.
+
+UEFI is an evolution of its predecessor 'EFI', so the terms EFI and
+UEFI are used somewhat interchangeably in this document and associated
+source code. As a rule, anything new uses 'UEFI', whereas 'EFI' refers
+to legacy code or specifications.
+
+UEFI support in Linux
+=====================
+Booting on a platform with firmware compliant with the UEFI specification
+makes it possible for the kernel to support additional features:
+- UEFI Runtime Services
+- Retrieving various configuration information through the standardised
+ interface of UEFI configuration tables. (ACPI, SMBIOS, ...)
+
+For actually enabling [U]EFI support, enable:
+- CONFIG_EFI=y
+- CONFIG_EFI_VARS=y or m
+
+The implementation depends on receiving information about the UEFI environment
+in a Flattened Device Tree (FDT) - so is only available with CONFIG_OF.
+
+UEFI stub
+=========
+The "stub" is a feature that extends the Image/zImage into a valid UEFI
+PE/COFF executable, including a loader application that makes it possible to
+load the kernel directly from the UEFI shell, boot menu, or one of the
+lightweight bootloaders like Gummiboot or rEFInd.
+
+The kernel image built with stub support remains a valid kernel image for
+booting in non-UEFI environments.
+
+UEFI kernel support on ARM
+==========================
+UEFI kernel support on the ARM architectures (arm and arm64) is only available
+when boot is performed through the stub.
+
+When booting in UEFI mode, the stub deletes any memory nodes from a provided DT.
+Instead, the kernel reads the UEFI memory map.
+
+The stub populates the FDT /chosen node with (and the kernel scans for) the
+following parameters:
+________________________________________________________________________________
+Name | Size | Description
+================================================================================
+linux,uefi-system-table | 64-bit | Physical address of the UEFI System Table.
+--------------------------------------------------------------------------------
+linux,uefi-mmap-start | 64-bit | Physical address of the UEFI memory map,
+ | | populated by the UEFI GetMemoryMap() call.
+--------------------------------------------------------------------------------
+linux,uefi-mmap-size | 32-bit | Size in bytes of the UEFI memory map
+ | | pointed to in previous entry.
+--------------------------------------------------------------------------------
+linux,uefi-mmap-desc-size | 32-bit | Size in bytes of each entry in the UEFI
+ | | memory map.
+--------------------------------------------------------------------------------
+linux,uefi-mmap-desc-ver | 32-bit | Version of the mmap descriptor format.
+--------------------------------------------------------------------------------
+linux,uefi-stub-kern-ver | string | Copy of linux_banner from build.
+--------------------------------------------------------------------------------
+
+For verbose debug messages, specify 'uefi_debug' on the kernel command line.
diff --git a/Documentation/arm/vlocks.txt b/Documentation/arm/vlocks.txt
new file mode 100644
index 000000000..415960a9b
--- /dev/null
+++ b/Documentation/arm/vlocks.txt
@@ -0,0 +1,211 @@
+vlocks for Bare-Metal Mutual Exclusion
+======================================
+
+Voting Locks, or "vlocks" provide a simple low-level mutual exclusion
+mechanism, with reasonable but minimal requirements on the memory
+system.
+
+These are intended to be used to coordinate critical activity among CPUs
+which are otherwise non-coherent, in situations where the hardware
+provides no other mechanism to support this and ordinary spinlocks
+cannot be used.
+
+
+vlocks make use of the atomicity provided by the memory system for
+writes to a single memory location. To arbitrate, every CPU "votes for
+itself", by storing a unique number to a common memory location. The
+final value seen in that memory location when all the votes have been
+cast identifies the winner.
+
+In order to make sure that the election produces an unambiguous result
+in finite time, a CPU will only enter the election in the first place if
+no winner has been chosen and the election does not appear to have
+started yet.
+
+
+Algorithm
+---------
+
+The easiest way to explain the vlocks algorithm is with some pseudo-code:
+
+
+ int currently_voting[NR_CPUS] = { 0, };
+ int last_vote = -1; /* no votes yet */
+
+ bool vlock_trylock(int this_cpu)
+ {
+ /* signal our desire to vote */
+ currently_voting[this_cpu] = 1;
+ if (last_vote != -1) {
+ /* someone already volunteered himself */
+ currently_voting[this_cpu] = 0;
+ return false; /* not ourself */
+ }
+
+ /* let's suggest ourself */
+ last_vote = this_cpu;
+ currently_voting[this_cpu] = 0;
+
+ /* then wait until everyone else is done voting */
+ for_each_cpu(i) {
+ while (currently_voting[i] != 0)
+ /* wait */;
+ }
+
+ /* result */
+ if (last_vote == this_cpu)
+ return true; /* we won */
+ return false;
+ }
+
+ bool vlock_unlock(void)
+ {
+ last_vote = -1;
+ }
+
+
+The currently_voting[] array provides a way for the CPUs to determine
+whether an election is in progress, and plays a role analogous to the
+"entering" array in Lamport's bakery algorithm [1].
+
+However, once the election has started, the underlying memory system
+atomicity is used to pick the winner. This avoids the need for a static
+priority rule to act as a tie-breaker, or any counters which could
+overflow.
+
+As long as the last_vote variable is globally visible to all CPUs, it
+will contain only one value that won't change once every CPU has cleared
+its currently_voting flag.
+
+
+Features and limitations
+------------------------
+
+ * vlocks are not intended to be fair. In the contended case, it is the
+ _last_ CPU which attempts to get the lock which will be most likely
+ to win.
+
+ vlocks are therefore best suited to situations where it is necessary
+ to pick a unique winner, but it does not matter which CPU actually
+ wins.
+
+ * Like other similar mechanisms, vlocks will not scale well to a large
+ number of CPUs.
+
+ vlocks can be cascaded in a voting hierarchy to permit better scaling
+ if necessary, as in the following hypothetical example for 4096 CPUs:
+
+ /* first level: local election */
+ my_town = towns[(this_cpu >> 4) & 0xf];
+ I_won = vlock_trylock(my_town, this_cpu & 0xf);
+ if (I_won) {
+ /* we won the town election, let's go for the state */
+ my_state = states[(this_cpu >> 8) & 0xf];
+ I_won = vlock_lock(my_state, this_cpu & 0xf));
+ if (I_won) {
+ /* and so on */
+ I_won = vlock_lock(the_whole_country, this_cpu & 0xf];
+ if (I_won) {
+ /* ... */
+ }
+ vlock_unlock(the_whole_country);
+ }
+ vlock_unlock(my_state);
+ }
+ vlock_unlock(my_town);
+
+
+ARM implementation
+------------------
+
+The current ARM implementation [2] contains some optimisations beyond
+the basic algorithm:
+
+ * By packing the members of the currently_voting array close together,
+ we can read the whole array in one transaction (providing the number
+ of CPUs potentially contending the lock is small enough). This
+ reduces the number of round-trips required to external memory.
+
+ In the ARM implementation, this means that we can use a single load
+ and comparison:
+
+ LDR Rt, [Rn]
+ CMP Rt, #0
+
+ ...in place of code equivalent to:
+
+ LDRB Rt, [Rn]
+ CMP Rt, #0
+ LDRBEQ Rt, [Rn, #1]
+ CMPEQ Rt, #0
+ LDRBEQ Rt, [Rn, #2]
+ CMPEQ Rt, #0
+ LDRBEQ Rt, [Rn, #3]
+ CMPEQ Rt, #0
+
+ This cuts down on the fast-path latency, as well as potentially
+ reducing bus contention in contended cases.
+
+ The optimisation relies on the fact that the ARM memory system
+ guarantees coherency between overlapping memory accesses of
+ different sizes, similarly to many other architectures. Note that
+ we do not care which element of currently_voting appears in which
+ bits of Rt, so there is no need to worry about endianness in this
+ optimisation.
+
+ If there are too many CPUs to read the currently_voting array in
+ one transaction then multiple transations are still required. The
+ implementation uses a simple loop of word-sized loads for this
+ case. The number of transactions is still fewer than would be
+ required if bytes were loaded individually.
+
+
+ In principle, we could aggregate further by using LDRD or LDM, but
+ to keep the code simple this was not attempted in the initial
+ implementation.
+
+
+ * vlocks are currently only used to coordinate between CPUs which are
+ unable to enable their caches yet. This means that the
+ implementation removes many of the barriers which would be required
+ when executing the algorithm in cached memory.
+
+ packing of the currently_voting array does not work with cached
+ memory unless all CPUs contending the lock are cache-coherent, due
+ to cache writebacks from one CPU clobbering values written by other
+ CPUs. (Though if all the CPUs are cache-coherent, you should be
+ probably be using proper spinlocks instead anyway).
+
+
+ * The "no votes yet" value used for the last_vote variable is 0 (not
+ -1 as in the pseudocode). This allows statically-allocated vlocks
+ to be implicitly initialised to an unlocked state simply by putting
+ them in .bss.
+
+ An offset is added to each CPU's ID for the purpose of setting this
+ variable, so that no CPU uses the value 0 for its ID.
+
+
+Colophon
+--------
+
+Originally created and documented by Dave Martin for Linaro Limited, for
+use in ARM-based big.LITTLE platforms, with review and input gratefully
+received from Nicolas Pitre and Achin Gupta. Thanks to Nicolas for
+grabbing most of this text out of the relevant mail thread and writing
+up the pseudocode.
+
+Copyright (C) 2012-2013 Linaro Limited
+Distributed under the terms of Version 2 of the GNU General Public
+License, as defined in linux/COPYING.
+
+
+References
+----------
+
+[1] Lamport, L. "A New Solution of Dijkstra's Concurrent Programming
+ Problem", Communications of the ACM 17, 8 (August 1974), 453-455.
+
+ http://en.wikipedia.org/wiki/Lamport%27s_bakery_algorithm
+
+[2] linux/arch/arm/common/vlock.S, www.kernel.org.
diff --git a/Documentation/arm64/acpi_object_usage.txt b/Documentation/arm64/acpi_object_usage.txt
new file mode 100644
index 000000000..a6e1a1805
--- /dev/null
+++ b/Documentation/arm64/acpi_object_usage.txt
@@ -0,0 +1,593 @@
+ACPI Tables
+-----------
+The expectations of individual ACPI tables are discussed in the list that
+follows.
+
+If a section number is used, it refers to a section number in the ACPI
+specification where the object is defined. If "Signature Reserved" is used,
+the table signature (the first four bytes of the table) is the only portion
+of the table recognized by the specification, and the actual table is defined
+outside of the UEFI Forum (see Section 5.2.6 of the specification).
+
+For ACPI on arm64, tables also fall into the following categories:
+
+ -- Required: DSDT, FADT, GTDT, MADT, MCFG, RSDP, SPCR, XSDT
+
+ -- Recommended: BERT, EINJ, ERST, HEST, SSDT
+
+ -- Optional: BGRT, CPEP, CSRT, DRTM, ECDT, FACS, FPDT, MCHI, MPST,
+ MSCT, RASF, SBST, SLIT, SPMI, SRAT, TCPA, TPM2, UEFI
+
+ -- Not supported: BOOT, DBG2, DBGP, DMAR, ETDT, HPET, IBFT, IVRS,
+ LPIT, MSDM, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+
+
+Table Usage for ARMv8 Linux
+----- ----------------------------------------------------------------
+BERT Section 18.3 (signature == "BERT")
+ == Boot Error Record Table ==
+ Must be supplied if RAS support is provided by the platform. It
+ is recommended this table be supplied.
+
+BOOT Signature Reserved (signature == "BOOT")
+ == simple BOOT flag table ==
+ Microsoft only table, will not be supported.
+
+BGRT Section 5.2.22 (signature == "BGRT")
+ == Boot Graphics Resource Table ==
+ Optional, not currently supported, with no real use-case for an
+ ARM server.
+
+CPEP Section 5.2.18 (signature == "CPEP")
+ == Corrected Platform Error Polling table ==
+ Optional, not currently supported, and not recommended until such
+ time as ARM-compatible hardware is available, and the specification
+ suitably modified.
+
+CSRT Signature Reserved (signature == "CSRT")
+ == Core System Resources Table ==
+ Optional, not currently supported.
+
+DBG2 Signature Reserved (signature == "DBG2")
+ == DeBuG port table 2 ==
+ Microsoft only table, will not be supported.
+
+DBGP Signature Reserved (signature == "DBGP")
+ == DeBuG Port table ==
+ Microsoft only table, will not be supported.
+
+DSDT Section 5.2.11.1 (signature == "DSDT")
+ == Differentiated System Description Table ==
+ A DSDT is required; see also SSDT.
+
+ ACPI tables contain only one DSDT but can contain one or more SSDTs,
+ which are optional. Each SSDT can only add to the ACPI namespace,
+ but cannot modify or replace anything in the DSDT.
+
+DMAR Signature Reserved (signature == "DMAR")
+ == DMA Remapping table ==
+ x86 only table, will not be supported.
+
+DRTM Signature Reserved (signature == "DRTM")
+ == Dynamic Root of Trust for Measurement table ==
+ Optional, not currently supported.
+
+ECDT Section 5.2.16 (signature == "ECDT")
+ == Embedded Controller Description Table ==
+ Optional, not currently supported, but could be used on ARM if and
+ only if one uses the GPE_BIT field to represent an IRQ number, since
+ there are no GPE blocks defined in hardware reduced mode. This would
+ need to be modified in the ACPI specification.
+
+EINJ Section 18.6 (signature == "EINJ")
+ == Error Injection table ==
+ This table is very useful for testing platform response to error
+ conditions; it allows one to inject an error into the system as
+ if it had actually occurred. However, this table should not be
+ shipped with a production system; it should be dynamically loaded
+ and executed with the ACPICA tools only during testing.
+
+ERST Section 18.5 (signature == "ERST")
+ == Error Record Serialization Table ==
+ On a platform supports RAS, this table must be supplied if it is not
+ UEFI-based; if it is UEFI-based, this table may be supplied. When this
+ table is not present, UEFI run time service will be utilized to save
+ and retrieve hardware error information to and from a persistent store.
+
+ETDT Signature Reserved (signature == "ETDT")
+ == Event Timer Description Table ==
+ Obsolete table, will not be supported.
+
+FACS Section 5.2.10 (signature == "FACS")
+ == Firmware ACPI Control Structure ==
+ It is unlikely that this table will be terribly useful. If it is
+ provided, the Global Lock will NOT be used since it is not part of
+ the hardware reduced profile, and only 64-bit address fields will
+ be considered valid.
+
+FADT Section 5.2.9 (signature == "FACP")
+ == Fixed ACPI Description Table ==
+ Required for arm64.
+
+ The HW_REDUCED_ACPI flag must be set. All of the fields that are
+ to be ignored when HW_REDUCED_ACPI is set are expected to be set to
+ zero.
+
+ If an FACS table is provided, the X_FIRMWARE_CTRL field is to be
+ used, not FIRMWARE_CTRL.
+
+ If PSCI is used (as is recommended), make sure that ARM_BOOT_ARCH is
+ filled in properly -- that the PSCI_COMPLIANT flag is set and that
+ PSCI_USE_HVC is set or unset as needed (see table 5-37).
+
+ For the DSDT that is also required, the X_DSDT field is to be used,
+ not the DSDT field.
+
+FPDT Section 5.2.23 (signature == "FPDT")
+ == Firmware Performance Data Table ==
+ Optional, not currently supported.
+
+GTDT Section 5.2.24 (signature == "GTDT")
+ == Generic Timer Description Table ==
+ Required for arm64.
+
+HEST Section 18.3.2 (signature == "HEST")
+ == Hardware Error Source Table ==
+ Until further error source types are defined, use only types 6 (AER
+ Root Port), 7 (AER Endpoint), 8 (AER Bridge), or 9 (Generic Hardware
+ Error Source). Firmware first error handling is possible if and only
+ if Trusted Firmware is being used on arm64.
+
+ Must be supplied if RAS support is provided by the platform. It
+ is recommended this table be supplied.
+
+HPET Signature Reserved (signature == "HPET")
+ == High Precision Event timer Table ==
+ x86 only table, will not be supported.
+
+IBFT Signature Reserved (signature == "IBFT")
+ == iSCSI Boot Firmware Table ==
+ Microsoft defined table, support TBD.
+
+IVRS Signature Reserved (signature == "IVRS")
+ == I/O Virtualization Reporting Structure ==
+ x86_64 (AMD) only table, will not be supported.
+
+LPIT Signature Reserved (signature == "LPIT")
+ == Low Power Idle Table ==
+ x86 only table as of ACPI 5.1; future versions have been adapted for
+ use with ARM and will be recommended in order to support ACPI power
+ management.
+
+MADT Section 5.2.12 (signature == "APIC")
+ == Multiple APIC Description Table ==
+ Required for arm64. Only the GIC interrupt controller structures
+ should be used (types 0xA - 0xE).
+
+MCFG Signature Reserved (signature == "MCFG")
+ == Memory-mapped ConFiGuration space ==
+ If the platform supports PCI/PCIe, an MCFG table is required.
+
+MCHI Signature Reserved (signature == "MCHI")
+ == Management Controller Host Interface table ==
+ Optional, not currently supported.
+
+MPST Section 5.2.21 (signature == "MPST")
+ == Memory Power State Table ==
+ Optional, not currently supported.
+
+MSDM Signature Reserved (signature == "MSDM")
+ == Microsoft Data Management table ==
+ Microsoft only table, will not be supported.
+
+MSCT Section 5.2.19 (signature == "MSCT")
+ == Maximum System Characteristic Table ==
+ Optional, not currently supported.
+
+RASF Section 5.2.20 (signature == "RASF")
+ == RAS Feature table ==
+ Optional, not currently supported.
+
+RSDP Section 5.2.5 (signature == "RSD PTR")
+ == Root System Description PoinTeR ==
+ Required for arm64.
+
+RSDT Section 5.2.7 (signature == "RSDT")
+ == Root System Description Table ==
+ Since this table can only provide 32-bit addresses, it is deprecated
+ on arm64, and will not be used.
+
+SBST Section 5.2.14 (signature == "SBST")
+ == Smart Battery Subsystem Table ==
+ Optional, not currently supported.
+
+SLIC Signature Reserved (signature == "SLIC")
+ == Software LIcensing table ==
+ Microsoft only table, will not be supported.
+
+SLIT Section 5.2.17 (signature == "SLIT")
+ == System Locality distance Information Table ==
+ Optional in general, but required for NUMA systems.
+
+SPCR Signature Reserved (signature == "SPCR")
+ == Serial Port Console Redirection table ==
+ Required for arm64.
+
+SPMI Signature Reserved (signature == "SPMI")
+ == Server Platform Management Interface table ==
+ Optional, not currently supported.
+
+SRAT Section 5.2.16 (signature == "SRAT")
+ == System Resource Affinity Table ==
+ Optional, but if used, only the GICC Affinity structures are read.
+ To support NUMA, this table is required.
+
+SSDT Section 5.2.11.2 (signature == "SSDT")
+ == Secondary System Description Table ==
+ These tables are a continuation of the DSDT; these are recommended
+ for use with devices that can be added to a running system, but can
+ also serve the purpose of dividing up device descriptions into more
+ manageable pieces.
+
+ An SSDT can only ADD to the ACPI namespace. It cannot modify or
+ replace existing device descriptions already in the namespace.
+
+ These tables are optional, however. ACPI tables should contain only
+ one DSDT but can contain many SSDTs.
+
+TCPA Signature Reserved (signature == "TCPA")
+ == Trusted Computing Platform Alliance table ==
+ Optional, not currently supported, and may need changes to fully
+ interoperate with arm64.
+
+TPM2 Signature Reserved (signature == "TPM2")
+ == Trusted Platform Module 2 table ==
+ Optional, not currently supported, and may need changes to fully
+ interoperate with arm64.
+
+UEFI Signature Reserved (signature == "UEFI")
+ == UEFI ACPI data table ==
+ Optional, not currently supported. No known use case for arm64,
+ at present.
+
+WAET Signature Reserved (signature == "WAET")
+ == Windows ACPI Emulated devices Table ==
+ Microsoft only table, will not be supported.
+
+WDAT Signature Reserved (signature == "WDAT")
+ == Watch Dog Action Table ==
+ Microsoft only table, will not be supported.
+
+WDRT Signature Reserved (signature == "WDRT")
+ == Watch Dog Resource Table ==
+ Microsoft only table, will not be supported.
+
+WPBT Signature Reserved (signature == "WPBT")
+ == Windows Platform Binary Table ==
+ Microsoft only table, will not be supported.
+
+XSDT Section 5.2.8 (signature == "XSDT")
+ == eXtended System Description Table ==
+ Required for arm64.
+
+
+ACPI Objects
+------------
+The expectations on individual ACPI objects are discussed in the list that
+follows:
+
+Name Section Usage for ARMv8 Linux
+---- ------------ -------------------------------------------------
+_ADR 6.1.1 Use as needed.
+
+_BBN 6.5.5 Use as needed; PCI-specific.
+
+_BDN 6.5.3 Optional; not likely to be used on arm64.
+
+_CCA 6.2.17 This method should be defined for all bus masters
+ on arm64. While cache coherency is assumed, making
+ it explicit ensures the kernel will set up DMA as
+ it should.
+
+_CDM 6.2.1 Optional, to be used only for processor devices.
+
+_CID 6.1.2 Use as needed.
+
+_CLS 6.1.3 Use as needed.
+
+_CRS 6.2.2 Required on arm64.
+
+_DCK 6.5.2 Optional; not likely to be used on arm64.
+
+_DDN 6.1.4 This field can be used for a device name. However,
+ it is meant for DOS device names (e.g., COM1), so be
+ careful of its use across OSes.
+
+_DEP 6.5.8 Use as needed.
+
+_DIS 6.2.3 Optional, for power management use.
+
+_DLM 5.7.5 Optional.
+
+_DMA 6.2.4 Optional.
+
+_DSD 6.2.5 To be used with caution. If this object is used, try
+ to use it within the constraints already defined by the
+ Device Properties UUID. Only in rare circumstances
+ should it be necessary to create a new _DSD UUID.
+
+ In either case, submit the _DSD definition along with
+ any driver patches for discussion, especially when
+ device properties are used. A driver will not be
+ considered complete without a corresponding _DSD
+ description. Once approved by kernel maintainers,
+ the UUID or device properties must then be registered
+ with the UEFI Forum; this may cause some iteration as
+ more than one OS will be registering entries.
+
+_DSM Do not use this method. It is not standardized, the
+ return values are not well documented, and it is
+ currently a frequent source of error.
+
+_DSW 7.2.1 Use as needed; power management specific.
+
+_EDL 6.3.1 Optional.
+
+_EJD 6.3.2 Optional.
+
+_EJx 6.3.3 Optional.
+
+_FIX 6.2.7 x86 specific, not used on arm64.
+
+\_GL 5.7.1 This object is not to be used in hardware reduced
+ mode, and therefore should not be used on arm64.
+
+_GLK 6.5.7 This object requires a global lock be defined; there
+ is no global lock on arm64 since it runs in hardware
+ reduced mode. Hence, do not use this object on arm64.
+
+\_GPE 5.3.1 This namespace is for x86 use only. Do not use it
+ on arm64.
+
+_GSB 6.2.7 Optional.
+
+_HID 6.1.5 Use as needed. This is the primary object to use in
+ device probing, though _CID and _CLS may also be used.
+
+_HPP 6.2.8 Optional, PCI specific.
+
+_HPX 6.2.9 Optional, PCI specific.
+
+_HRV 6.1.6 Optional, use as needed to clarify device behavior; in
+ some cases, this may be easier to use than _DSD.
+
+_INI 6.5.1 Not required, but can be useful in setting up devices
+ when UEFI leaves them in a state that may not be what
+ the driver expects before it starts probing.
+
+_IRC 7.2.15 Use as needed; power management specific.
+
+_LCK 6.3.4 Optional.
+
+_MAT 6.2.10 Optional; see also the MADT.
+
+_MLS 6.1.7 Optional, but highly recommended for use in
+ internationalization.
+
+_OFF 7.1.2 It is recommended to define this method for any device
+ that can be turned on or off.
+
+_ON 7.1.3 It is recommended to define this method for any device
+ that can be turned on or off.
+
+\_OS 5.7.3 This method will return "Linux" by default (this is
+ the value of the macro ACPI_OS_NAME on Linux). The
+ command line parameter acpi_os=<string> can be used
+ to set it to some other value.
+
+_OSC 6.2.11 This method can be a global method in ACPI (i.e.,
+ \_SB._OSC), or it may be associated with a specific
+ device (e.g., \_SB.DEV0._OSC), or both. When used
+ as a global method, only capabilities published in
+ the ACPI specification are allowed. When used as
+ a device-specific method, the process described for
+ using _DSD MUST be used to create an _OSC definition;
+ out-of-process use of _OSC is not allowed. That is,
+ submit the device-specific _OSC usage description as
+ part of the kernel driver submission, get it approved
+ by the kernel community, then register it with the
+ UEFI Forum.
+
+\_OSI 5.7.2 Deprecated on ARM64. Any invocation of this method
+ will print a warning on the console and return false.
+ That is, as far as ACPI firmware is concerned, _OSI
+ cannot be used to determine what sort of system is
+ being used or what functionality is provided. The
+ _OSC method is to be used instead.
+
+_OST 6.3.5 Optional.
+
+_PDC 8.4.1 Deprecated, do not use on arm64.
+
+\_PIC 5.8.1 The method should not be used. On arm64, the only
+ interrupt model available is GIC.
+
+_PLD 6.1.8 Optional.
+
+\_PR 5.3.1 This namespace is for x86 use only on legacy systems.
+ Do not use it on arm64.
+
+_PRS 6.2.12 Optional.
+
+_PRT 6.2.13 Required as part of the definition of all PCI root
+ devices.
+
+_PRW 7.2.13 Use as needed; power management specific.
+
+_PRx 7.2.8-11 Use as needed; power management specific. If _PR0 is
+ defined, _PR3 must also be defined.
+
+_PSC 7.2.6 Use as needed; power management specific.
+
+_PSE 7.2.7 Use as needed; power management specific.
+
+_PSW 7.2.14 Use as needed; power management specific.
+
+_PSx 7.2.2-5 Use as needed; power management specific. If _PS0 is
+ defined, _PS3 must also be defined. If clocks or
+ regulators need adjusting to be consistent with power
+ usage, change them in these methods.
+
+\_PTS 7.3.1 Use as needed; power management specific.
+
+_PXM 6.2.14 Optional.
+
+_REG 6.5.4 Use as needed.
+
+\_REV 5.7.4 Always returns the latest version of ACPI supported.
+
+_RMV 6.3.6 Optional.
+
+\_SB 5.3.1 Required on arm64; all devices must be defined in this
+ namespace.
+
+_SEG 6.5.6 Use as needed; PCI-specific.
+
+\_SI 5.3.1, Optional.
+ 9.1
+
+_SLI 6.2.15 Optional; recommended when SLIT table is in use.
+
+_STA 6.3.7, It is recommended to define this method for any device
+ 7.1.4 that can be turned on or off.
+
+_SRS 6.2.16 Optional; see also _PRS.
+
+_STR 6.1.10 Recommended for conveying device names to end users;
+ this is preferred over using _DDN.
+
+_SUB 6.1.9 Use as needed; _HID or _CID are preferred.
+
+_SUN 6.1.11 Optional.
+
+\_Sx 7.3.2 Use as needed; power management specific.
+
+_SxD 7.2.16-19 Use as needed; power management specific.
+
+_SxW 7.2.20-24 Use as needed; power management specific.
+
+_SWS 7.3.3 Use as needed; power management specific; this may
+ require specification changes for use on arm64.
+
+\_TTS 7.3.4 Use as needed; power management specific.
+
+\_TZ 5.3.1 Optional.
+
+_UID 6.1.12 Recommended for distinguishing devices of the same
+ class; define it if at all possible.
+
+\_WAK 7.3.5 Use as needed; power management specific.
+
+
+ACPI Event Model
+----------------
+Do not use GPE block devices; these are not supported in the hardware reduced
+profile used by arm64. Since there are no GPE blocks defined for use on ARM
+platforms, GPIO-signaled interrupts should be used for creating system events.
+
+
+ACPI Processor Control
+----------------------
+Section 8 of the ACPI specification is currently undergoing change that
+should be completed in the 6.0 version of the specification. Processor
+performance control will be handled differently for arm64 at that point
+in time. Processor aggregator devices (section 8.5) will not be used,
+for example, but another similar mechanism instead.
+
+While UEFI constrains what we can say until the release of 6.0, it is
+recommended that CPPC (8.4.5) be used as the primary model. This will
+still be useful into the future. C-states and P-states will still be
+provided, but most of the current design work appears to favor CPPC.
+
+Further, it is essential that the ARMv8 SoC provide a fully functional
+implementation of PSCI; this will be the only mechanism supported by ACPI
+to control CPU power state (including secondary CPU booting).
+
+More details will be provided on the release of the ACPI 6.0 specification.
+
+
+ACPI System Address Map Interfaces
+----------------------------------
+In Section 15 of the ACPI specification, several methods are mentioned as
+possible mechanisms for conveying memory resource information to the kernel.
+For arm64, we will only support UEFI for booting with ACPI, hence the UEFI
+GetMemoryMap() boot service is the only mechanism that will be used.
+
+
+ACPI Platform Error Interfaces (APEI)
+-------------------------------------
+The APEI tables supported are described above.
+
+APEI requires the equivalent of an SCI and an NMI on ARMv8. The SCI is used
+to notify the OSPM of errors that have occurred but can be corrected and the
+system can continue correct operation, even if possibly degraded. The NMI is
+used to indicate fatal errors that cannot be corrected, and require immediate
+attention.
+
+Since there is no direct equivalent of the x86 SCI or NMI, arm64 handles
+these slightly differently. The SCI is handled as a normal GPIO-signaled
+interrupt; given that these are corrected (or correctable) errors being
+reported, this is sufficient. The NMI is emulated as the highest priority
+GPIO-signaled interrupt possible. This implies some caution must be used
+since there could be interrupts at higher privilege levels or even interrupts
+at the same priority as the emulated NMI. In Linux, this should not be the
+case but one should be aware it could happen.
+
+
+ACPI Objects Not Supported on ARM64
+-----------------------------------
+While this may change in the future, there are several classes of objects
+that can be defined, but are not currently of general interest to ARM servers.
+
+These are not supported:
+
+ -- Section 9.2: ambient light sensor devices
+
+ -- Section 9.3: battery devices
+
+ -- Section 9.4: lids (e.g., laptop lids)
+
+ -- Section 9.8.2: IDE controllers
+
+ -- Section 9.9: floppy controllers
+
+ -- Section 9.10: GPE block devices
+
+ -- Section 9.15: PC/AT RTC/CMOS devices
+
+ -- Section 9.16: user presence detection devices
+
+ -- Section 9.17: I/O APIC devices; all GICs must be enumerable via MADT
+
+ -- Section 9.18: time and alarm devices (see 9.15)
+
+
+ACPI Objects Not Yet Implemented
+--------------------------------
+While these objects have x86 equivalents, and they do make some sense in ARM
+servers, there is either no hardware available at present, or in some cases
+there may not yet be a non-ARM implementation. Hence, they are currently not
+implemented though that may change in the future.
+
+Not yet implemented are:
+
+ -- Section 10: power source and power meter devices
+
+ -- Section 11: thermal management
+
+ -- Section 12: embedded controllers interface
+
+ -- Section 13: SMBus interfaces
+
+ -- Section 17: NUMA support (prototypes have been submitted for
+ review)
diff --git a/Documentation/arm64/arm-acpi.txt b/Documentation/arm64/arm-acpi.txt
new file mode 100644
index 000000000..570a4f8e1
--- /dev/null
+++ b/Documentation/arm64/arm-acpi.txt
@@ -0,0 +1,505 @@
+ACPI on ARMv8 Servers
+---------------------
+ACPI can be used for ARMv8 general purpose servers designed to follow
+the ARM SBSA (Server Base System Architecture) [0] and SBBR (Server
+Base Boot Requirements) [1] specifications. Please note that the SBBR
+can be retrieved simply by visiting [1], but the SBSA is currently only
+available to those with an ARM login due to ARM IP licensing concerns.
+
+The ARMv8 kernel implements the reduced hardware model of ACPI version
+5.1 or later. Links to the specification and all external documents
+it refers to are managed by the UEFI Forum. The specification is
+available at http://www.uefi.org/specifications and documents referenced
+by the specification can be found via http://www.uefi.org/acpi.
+
+If an ARMv8 system does not meet the requirements of the SBSA and SBBR,
+or cannot be described using the mechanisms defined in the required ACPI
+specifications, then ACPI may not be a good fit for the hardware.
+
+While the documents mentioned above set out the requirements for building
+industry-standard ARMv8 servers, they also apply to more than one operating
+system. The purpose of this document is to describe the interaction between
+ACPI and Linux only, on an ARMv8 system -- that is, what Linux expects of
+ACPI and what ACPI can expect of Linux.
+
+
+Why ACPI on ARM?
+----------------
+Before examining the details of the interface between ACPI and Linux, it is
+useful to understand why ACPI is being used. Several technologies already
+exist in Linux for describing non-enumerable hardware, after all. In this
+section we summarize a blog post [2] from Grant Likely that outlines the
+reasoning behind ACPI on ARMv8 servers. Actually, we snitch a good portion
+of the summary text almost directly, to be honest.
+
+The short form of the rationale for ACPI on ARM is:
+
+-- ACPI’s bytecode (AML) allows the platform to encode hardware behavior,
+ while DT explicitly does not support this. For hardware vendors, being
+ able to encode behavior is a key tool used in supporting operating
+ system releases on new hardware.
+
+-- ACPI’s OSPM defines a power management model that constrains what the
+ platform is allowed to do into a specific model, while still providing
+ flexibility in hardware design.
+
+-- In the enterprise server environment, ACPI has established bindings (such
+ as for RAS) which are currently used in production systems. DT does not.
+ Such bindings could be defined in DT at some point, but doing so means ARM
+ and x86 would end up using completely different code paths in both firmware
+ and the kernel.
+
+-- Choosing a single interface to describe the abstraction between a platform
+ and an OS is important. Hardware vendors would not be required to implement
+ both DT and ACPI if they want to support multiple operating systems. And,
+ agreeing on a single interface instead of being fragmented into per OS
+ interfaces makes for better interoperability overall.
+
+-- The new ACPI governance process works well and Linux is now at the same
+ table as hardware vendors and other OS vendors. In fact, there is no
+ longer any reason to feel that ACPI is only belongs to Windows or that
+ Linux is in any way secondary to Microsoft in this arena. The move of
+ ACPI governance into the UEFI forum has significantly opened up the
+ specification development process, and currently, a large portion of the
+ changes being made to ACPI is being driven by Linux.
+
+Key to the use of ACPI is the support model. For servers in general, the
+responsibility for hardware behaviour cannot solely be the domain of the
+kernel, but rather must be split between the platform and the kernel, in
+order to allow for orderly change over time. ACPI frees the OS from needing
+to understand all the minute details of the hardware so that the OS doesn’t
+need to be ported to each and every device individually. It allows the
+hardware vendors to take responsibility for power management behaviour without
+depending on an OS release cycle which is not under their control.
+
+ACPI is also important because hardware and OS vendors have already worked
+out the mechanisms for supporting a general purpose computing ecosystem. The
+infrastructure is in place, the bindings are in place, and the processes are
+in place. DT does exactly what Linux needs it to when working with vertically
+integrated devices, but there are no good processes for supporting what the
+server vendors need. Linux could potentially get there with DT, but doing so
+really just duplicates something that already works. ACPI already does what
+the hardware vendors need, Microsoft won’t collaborate on DT, and hardware
+vendors would still end up providing two completely separate firmware
+interfaces -- one for Linux and one for Windows.
+
+
+Kernel Compatibility
+--------------------
+One of the primary motivations for ACPI is standardization, and using that
+to provide backward compatibility for Linux kernels. In the server market,
+software and hardware are often used for long periods. ACPI allows the
+kernel and firmware to agree on a consistent abstraction that can be
+maintained over time, even as hardware or software change. As long as the
+abstraction is supported, systems can be updated without necessarily having
+to replace the kernel.
+
+When a Linux driver or subsystem is first implemented using ACPI, it by
+definition ends up requiring a specific version of the ACPI specification
+-- it's baseline. ACPI firmware must continue to work, even though it may
+not be optimal, with the earliest kernel version that first provides support
+for that baseline version of ACPI. There may be a need for additional drivers,
+but adding new functionality (e.g., CPU power management) should not break
+older kernel versions. Further, ACPI firmware must also work with the most
+recent version of the kernel.
+
+
+Relationship with Device Tree
+-----------------------------
+ACPI support in drivers and subsystems for ARMv8 should never be mutually
+exclusive with DT support at compile time.
+
+At boot time the kernel will only use one description method depending on
+parameters passed from the bootloader (including kernel bootargs).
+
+Regardless of whether DT or ACPI is used, the kernel must always be capable
+of booting with either scheme (in kernels with both schemes enabled at compile
+time).
+
+
+Booting using ACPI tables
+-------------------------
+The only defined method for passing ACPI tables to the kernel on ARMv8
+is via the UEFI system configuration table. Just so it is explicit, this
+means that ACPI is only supported on platforms that boot via UEFI.
+
+When an ARMv8 system boots, it can either have DT information, ACPI tables,
+or in some very unusual cases, both. If no command line parameters are used,
+the kernel will try to use DT for device enumeration; if there is no DT
+present, the kernel will try to use ACPI tables, but only if they are present.
+In neither is available, the kernel will not boot. If acpi=force is used
+on the command line, the kernel will attempt to use ACPI tables first, but
+fall back to DT if there are no ACPI tables present. The basic idea is that
+the kernel will not fail to boot unless it absolutely has no other choice.
+
+Processing of ACPI tables may be disabled by passing acpi=off on the kernel
+command line; this is the default behavior.
+
+In order for the kernel to load and use ACPI tables, the UEFI implementation
+MUST set the ACPI_20_TABLE_GUID to point to the RSDP table (the table with
+the ACPI signature "RSD PTR "). If this pointer is incorrect and acpi=force
+is used, the kernel will disable ACPI and try to use DT to boot instead; the
+kernel has, in effect, determined that ACPI tables are not present at that
+point.
+
+If the pointer to the RSDP table is correct, the table will be mapped into
+the kernel by the ACPI core, using the address provided by UEFI.
+
+The ACPI core will then locate and map in all other ACPI tables provided by
+using the addresses in the RSDP table to find the XSDT (eXtended System
+Description Table). The XSDT in turn provides the addresses to all other
+ACPI tables provided by the system firmware; the ACPI core will then traverse
+this table and map in the tables listed.
+
+The ACPI core will ignore any provided RSDT (Root System Description Table).
+RSDTs have been deprecated and are ignored on arm64 since they only allow
+for 32-bit addresses.
+
+Further, the ACPI core will only use the 64-bit address fields in the FADT
+(Fixed ACPI Description Table). Any 32-bit address fields in the FADT will
+be ignored on arm64.
+
+Hardware reduced mode (see Section 4.1 of the ACPI 5.1 specification) will
+be enforced by the ACPI core on arm64. Doing so allows the ACPI core to
+run less complex code since it no longer has to provide support for legacy
+hardware from other architectures. Any fields that are not to be used for
+hardware reduced mode must be set to zero.
+
+For the ACPI core to operate properly, and in turn provide the information
+the kernel needs to configure devices, it expects to find the following
+tables (all section numbers refer to the ACPI 5.1 specfication):
+
+ -- RSDP (Root System Description Pointer), section 5.2.5
+
+ -- XSDT (eXtended System Description Table), section 5.2.8
+
+ -- FADT (Fixed ACPI Description Table), section 5.2.9
+
+ -- DSDT (Differentiated System Description Table), section
+ 5.2.11.1
+
+ -- MADT (Multiple APIC Description Table), section 5.2.12
+
+ -- GTDT (Generic Timer Description Table), section 5.2.24
+
+ -- If PCI is supported, the MCFG (Memory mapped ConFiGuration
+ Table), section 5.2.6, specifically Table 5-31.
+
+If the above tables are not all present, the kernel may or may not be
+able to boot properly since it may not be able to configure all of the
+devices available.
+
+
+ACPI Detection
+--------------
+Drivers should determine their probe() type by checking for a null
+value for ACPI_HANDLE, or checking .of_node, or other information in
+the device structure. This is detailed further in the "Driver
+Recommendations" section.
+
+In non-driver code, if the presence of ACPI needs to be detected at
+runtime, then check the value of acpi_disabled. If CONFIG_ACPI is not
+set, acpi_disabled will always be 1.
+
+
+Device Enumeration
+------------------
+Device descriptions in ACPI should use standard recognized ACPI interfaces.
+These may contain less information than is typically provided via a Device
+Tree description for the same device. This is also one of the reasons that
+ACPI can be useful -- the driver takes into account that it may have less
+detailed information about the device and uses sensible defaults instead.
+If done properly in the driver, the hardware can change and improve over
+time without the driver having to change at all.
+
+Clocks provide an excellent example. In DT, clocks need to be specified
+and the drivers need to take them into account. In ACPI, the assumption
+is that UEFI will leave the device in a reasonable default state, including
+any clock settings. If for some reason the driver needs to change a clock
+value, this can be done in an ACPI method; all the driver needs to do is
+invoke the method and not concern itself with what the method needs to do
+to change the clock. Changing the hardware can then take place over time
+by changing what the ACPI method does, and not the driver.
+
+In DT, the parameters needed by the driver to set up clocks as in the example
+above are known as "bindings"; in ACPI, these are known as "Device Properties"
+and provided to a driver via the _DSD object.
+
+ACPI tables are described with a formal language called ASL, the ACPI
+Source Language (section 19 of the specification). This means that there
+are always multiple ways to describe the same thing -- including device
+properties. For example, device properties could use an ASL construct
+that looks like this: Name(KEY0, "value0"). An ACPI device driver would
+then retrieve the value of the property by evaluating the KEY0 object.
+However, using Name() this way has multiple problems: (1) ACPI limits
+names ("KEY0") to four characters unlike DT; (2) there is no industry
+wide registry that maintains a list of names, minimzing re-use; (3)
+there is also no registry for the definition of property values ("value0"),
+again making re-use difficult; and (4) how does one maintain backward
+compatibility as new hardware comes out? The _DSD method was created
+to solve precisely these sorts of problems; Linux drivers should ALWAYS
+use the _DSD method for device properties and nothing else.
+
+The _DSM object (ACPI Section 9.14.1) could also be used for conveying
+device properties to a driver. Linux drivers should only expect it to
+be used if _DSD cannot represent the data required, and there is no way
+to create a new UUID for the _DSD object. Note that there is even less
+regulation of the use of _DSM than there is of _DSD. Drivers that depend
+on the contents of _DSM objects will be more difficult to maintain over
+time because of this; as of this writing, the use of _DSM is the cause
+of quite a few firmware problems and is not recommended.
+
+Drivers should look for device properties in the _DSD object ONLY; the _DSD
+object is described in the ACPI specification section 6.2.5, but this only
+describes how to define the structure of an object returned via _DSD, and
+how specific data structures are defined by specific UUIDs. Linux should
+only use the _DSD Device Properties UUID [5]:
+
+ -- UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301
+
+ -- http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
+
+The UEFI Forum provides a mechanism for registering device properties [4]
+so that they may be used across all operating systems supporting ACPI.
+Device properties that have not been registered with the UEFI Forum should
+not be used.
+
+Before creating new device properties, check to be sure that they have not
+been defined before and either registered in the Linux kernel documentation
+as DT bindings, or the UEFI Forum as device properties. While we do not want
+to simply move all DT bindings into ACPI device properties, we can learn from
+what has been previously defined.
+
+If it is necessary to define a new device property, or if it makes sense to
+synthesize the definition of a binding so it can be used in any firmware,
+both DT bindings and ACPI device properties for device drivers have review
+processes. Use them both. When the driver itself is submitted for review
+to the Linux mailing lists, the device property definitions needed must be
+submitted at the same time. A driver that supports ACPI and uses device
+properties will not be considered complete without their definitions. Once
+the device property has been accepted by the Linux community, it must be
+registered with the UEFI Forum [4], which will review it again for consistency
+within the registry. This may require iteration. The UEFI Forum, though,
+will always be the canonical site for device property definitions.
+
+It may make sense to provide notice to the UEFI Forum that there is the
+intent to register a previously unused device property name as a means of
+reserving the name for later use. Other operating system vendors will
+also be submitting registration requests and this may help smooth the
+process.
+
+Once registration and review have been completed, the kernel provides an
+interface for looking up device properties in a manner independent of
+whether DT or ACPI is being used. This API should be used [6]; it can
+eliminate some duplication of code paths in driver probing functions and
+discourage divergence between DT bindings and ACPI device properties.
+
+
+Programmable Power Control Resources
+------------------------------------
+Programmable power control resources include such resources as voltage/current
+providers (regulators) and clock sources.
+
+With ACPI, the kernel clock and regulator framework is not expected to be used
+at all.
+
+The kernel assumes that power control of these resources is represented with
+Power Resource Objects (ACPI section 7.1). The ACPI core will then handle
+correctly enabling and disabling resources as they are needed. In order to
+get that to work, ACPI assumes each device has defined D-states and that these
+can be controlled through the optional ACPI methods _PS0, _PS1, _PS2, and _PS3;
+in ACPI, _PS0 is the method to invoke to turn a device full on, and _PS3 is for
+turning a device full off.
+
+There are two options for using those Power Resources. They can:
+
+ -- be managed in a _PSx method which gets called on entry to power
+ state Dx.
+
+ -- be declared separately as power resources with their own _ON and _OFF
+ methods. They are then tied back to D-states for a particular device
+ via _PRx which specifies which power resources a device needs to be on
+ while in Dx. Kernel then tracks number of devices using a power resource
+ and calls _ON/_OFF as needed.
+
+The kernel ACPI code will also assume that the _PSx methods follow the normal
+ACPI rules for such methods:
+
+ -- If either _PS0 or _PS3 is implemented, then the other method must also
+ be implemented.
+
+ -- If a device requires usage or setup of a power resource when on, the ASL
+ should organize that it is allocated/enabled using the _PS0 method.
+
+ -- Resources allocated or enabled in the _PS0 method should be disabled
+ or de-allocated in the _PS3 method.
+
+ -- Firmware will leave the resources in a reasonable state before handing
+ over control to the kernel.
+
+Such code in _PSx methods will of course be very platform specific. But,
+this allows the driver to abstract out the interface for operating the device
+and avoid having to read special non-standard values from ACPI tables. Further,
+abstracting the use of these resources allows the hardware to change over time
+without requiring updates to the driver.
+
+
+Clocks
+------
+ACPI makes the assumption that clocks are initialized by the firmware --
+UEFI, in this case -- to some working value before control is handed over
+to the kernel. This has implications for devices such as UARTs, or SoC-driven
+LCD displays, for example.
+
+When the kernel boots, the clocks are assumed to be set to reasonable
+working values. If for some reason the frequency needs to change -- e.g.,
+throttling for power management -- the device driver should expect that
+process to be abstracted out into some ACPI method that can be invoked
+(please see the ACPI specification for further recommendations on standard
+methods to be expected). The only exceptions to this are CPU clocks where
+CPPC provides a much richer interface than ACPI methods. If the clocks
+are not set, there is no direct way for Linux to control them.
+
+If an SoC vendor wants to provide fine-grained control of the system clocks,
+they could do so by providing ACPI methods that could be invoked by Linux
+drivers. However, this is NOT recommended and Linux drivers should NOT use
+such methods, even if they are provided. Such methods are not currently
+standardized in the ACPI specification, and using them could tie a kernel
+to a very specific SoC, or tie an SoC to a very specific version of the
+kernel, both of which we are trying to avoid.
+
+
+Driver Recommendations
+----------------------
+DO NOT remove any DT handling when adding ACPI support for a driver. The
+same device may be used on many different systems.
+
+DO try to structure the driver so that it is data-driven. That is, set up
+a struct containing internal per-device state based on defaults and whatever
+else must be discovered by the driver probe function. Then, have the rest
+of the driver operate off of the contents of that struct. Doing so should
+allow most divergence between ACPI and DT functionality to be kept local to
+the probe function instead of being scattered throughout the driver. For
+example:
+
+static int device_probe_dt(struct platform_device *pdev)
+{
+ /* DT specific functionality */
+ ...
+}
+
+static int device_probe_acpi(struct platform_device *pdev)
+{
+ /* ACPI specific functionality */
+ ...
+}
+
+static int device_probe(struct platform_device *pdev)
+{
+ ...
+ struct device_node node = pdev->dev.of_node;
+ ...
+
+ if (node)
+ ret = device_probe_dt(pdev);
+ else if (ACPI_HANDLE(&pdev->dev))
+ ret = device_probe_acpi(pdev);
+ else
+ /* other initialization */
+ ...
+ /* Continue with any generic probe operations */
+ ...
+}
+
+DO keep the MODULE_DEVICE_TABLE entries together in the driver to make it
+clear the different names the driver is probed for, both from DT and from
+ACPI:
+
+static struct of_device_id virtio_mmio_match[] = {
+ { .compatible = "virtio,mmio", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, virtio_mmio_match);
+
+static const struct acpi_device_id virtio_mmio_acpi_match[] = {
+ { "LNRO0005", },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
+
+
+ASWG
+----
+The ACPI specification changes regularly. During the year 2014, for instance,
+version 5.1 was released and version 6.0 substantially completed, with most of
+the changes being driven by ARM-specific requirements. Proposed changes are
+presented and discussed in the ASWG (ACPI Specification Working Group) which
+is a part of the UEFI Forum.
+
+Participation in this group is open to all UEFI members. Please see
+http://www.uefi.org/workinggroup for details on group membership.
+
+It is the intent of the ARMv8 ACPI kernel code to follow the ACPI specification
+as closely as possible, and to only implement functionality that complies with
+the released standards from UEFI ASWG. As a practical matter, there will be
+vendors that provide bad ACPI tables or violate the standards in some way.
+If this is because of errors, quirks and fixups may be necessary, but will
+be avoided if possible. If there are features missing from ACPI that preclude
+it from being used on a platform, ECRs (Engineering Change Requests) should be
+submitted to ASWG and go through the normal approval process; for those that
+are not UEFI members, many other members of the Linux community are and would
+likely be willing to assist in submitting ECRs.
+
+
+Linux Code
+----------
+Individual items specific to Linux on ARM, contained in the the Linux
+source code, are in the list that follows:
+
+ACPI_OS_NAME This macro defines the string to be returned when
+ an ACPI method invokes the _OS method. On ARM64
+ systems, this macro will be "Linux" by default.
+ The command line parameter acpi_os=<string>
+ can be used to set it to some other value. The
+ default value for other architectures is "Microsoft
+ Windows NT", for example.
+
+ACPI Objects
+------------
+Detailed expectations for ACPI tables and object are listed in the file
+Documentation/arm64/acpi_object_usage.txt.
+
+
+References
+----------
+[0] http://silver.arm.com -- document ARM-DEN-0029, or newer
+ "Server Base System Architecture", version 2.3, dated 27 Mar 2014
+
+[1] http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
+ Document ARM-DEN-0044A, or newer: "Server Base Boot Requirements, System
+ Software on ARM Platforms", dated 16 Aug 2014
+
+[2] http://www.secretlab.ca/archives/151, 10 Jan 2015, Copyright (c) 2015,
+ Linaro Ltd., written by Grant Likely. A copy of the verbatim text (apart
+ from formatting) is also in Documentation/arm64/why_use_acpi.txt.
+
+[3] AMD ACPI for Seattle platform documentation:
+ http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2012/10/Seattle_ACPI_Guide.pdf
+
+[4] http://www.uefi.org/acpi -- please see the link for the "ACPI _DSD Device
+ Property Registry Instructions"
+
+[5] http://www.uefi.org/acpi -- please see the link for the "_DSD (Device
+ Specific Data) Implementation Guide"
+
+[6] Kernel code for the unified device property interface can be found in
+ include/linux/property.h and drivers/base/property.c.
+
+
+Authors
+-------
+Al Stone <al.stone@linaro.org>
+Graeme Gregory <graeme.gregory@linaro.org>
+Hanjun Guo <hanjun.guo@linaro.org>
+
+Grant Likely <grant.likely@linaro.org>, for the "Why ACPI on ARM?" section
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
new file mode 100644
index 000000000..f3c05b5f9
--- /dev/null
+++ b/Documentation/arm64/booting.txt
@@ -0,0 +1,224 @@
+ Booting AArch64 Linux
+ =====================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date : 07 September 2012
+
+This document is based on the ARM booting document by Russell King and
+is relevant to all public releases of the AArch64 Linux kernel.
+
+The AArch64 exception model is made up of a number of exception levels
+(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
+counterpart. EL2 is the hypervisor level and exists only in non-secure
+mode. EL3 is the highest priority level and exists only in secure mode.
+
+For the purposes of this document, we will use the term `boot loader'
+simply to define all software that executes on the CPU(s) before control
+is passed to the Linux kernel. This may include secure monitor and
+hypervisor code, or it may just be a handful of instructions for
+preparing a minimal boot environment.
+
+Essentially, the boot loader should provide (as a minimum) the
+following:
+
+1. Setup and initialise the RAM
+2. Setup the device tree
+3. Decompress the kernel image
+4. Call the kernel image
+
+
+1. Setup and initialise RAM
+---------------------------
+
+Requirement: MANDATORY
+
+The boot loader is expected to find and initialise all RAM that the
+kernel will use for volatile data storage in the system. It performs
+this in a machine dependent manner. (It may use internal algorithms
+to automatically locate and size all RAM, or it may use knowledge of
+the RAM in the machine, or any other method the boot loader designer
+sees fit.)
+
+
+2. Setup the device tree
+-------------------------
+
+Requirement: MANDATORY
+
+The device tree blob (dtb) must be placed on an 8-byte boundary within
+the first 512 megabytes from the start of the kernel image and must not
+cross a 2-megabyte boundary. This is to allow the kernel to map the
+blob using a single section mapping in the initial page tables.
+
+
+3. Decompress the kernel image
+------------------------------
+
+Requirement: OPTIONAL
+
+The AArch64 kernel does not currently provide a decompressor and
+therefore requires decompression (gzip etc.) to be performed by the boot
+loader if a compressed Image target (e.g. Image.gz) is used. For
+bootloaders that do not implement this requirement, the uncompressed
+Image target is available instead.
+
+
+4. Call the kernel image
+------------------------
+
+Requirement: MANDATORY
+
+The decompressed kernel image contains a 64-byte header as follows:
+
+ u32 code0; /* Executable code */
+ u32 code1; /* Executable code */
+ u64 text_offset; /* Image load offset, little endian */
+ u64 image_size; /* Effective Image size, little endian */
+ u64 flags; /* kernel flags, little endian */
+ u64 res2 = 0; /* reserved */
+ u64 res3 = 0; /* reserved */
+ u64 res4 = 0; /* reserved */
+ u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */
+ u32 res5; /* reserved (used for PE COFF offset) */
+
+
+Header notes:
+
+- As of v3.17, all fields are little endian unless stated otherwise.
+
+- code0/code1 are responsible for branching to stext.
+
+- when booting through EFI, code0/code1 are initially skipped.
+ res5 is an offset to the PE header and the PE header has the EFI
+ entry point (efi_stub_entry). When the stub has done its work, it
+ jumps to code0 to resume the normal boot process.
+
+- Prior to v3.17, the endianness of text_offset was not specified. In
+ these cases image_size is zero and text_offset is 0x80000 in the
+ endianness of the kernel. Where image_size is non-zero image_size is
+ little-endian and must be respected. Where image_size is zero,
+ text_offset can be assumed to be 0x80000.
+
+- The flags field (introduced in v3.17) is a little-endian 64-bit field
+ composed as follows:
+ Bit 0: Kernel endianness. 1 if BE, 0 if LE.
+ Bits 1-63: Reserved.
+
+- When image_size is zero, a bootloader should attempt to keep as much
+ memory as possible free for use by the kernel immediately after the
+ end of the kernel image. The amount of space required will vary
+ depending on selected features, and is effectively unbound.
+
+The Image must be placed text_offset bytes from a 2MB aligned base
+address near the start of usable system RAM and called there. Memory
+below that base address is currently unusable by Linux, and therefore it
+is strongly recommended that this location is the start of system RAM.
+At least image_size bytes from the start of the image must be free for
+use by the kernel.
+
+Any memory described to the kernel (even that below the 2MB aligned base
+address) which is not marked as reserved from the kernel e.g. with a
+memreserve region in the device tree) will be considered as available to
+the kernel.
+
+Before jumping into the kernel, the following conditions must be met:
+
+- Quiesce all DMA capable devices so that memory does not get
+ corrupted by bogus network packets or disk data. This will save
+ you many hours of debug.
+
+- Primary CPU general-purpose register settings
+ x0 = physical address of device tree blob (dtb) in system RAM.
+ x1 = 0 (reserved for future use)
+ x2 = 0 (reserved for future use)
+ x3 = 0 (reserved for future use)
+
+- CPU mode
+ All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
+ IRQ and FIQ).
+ The CPU must be in either EL2 (RECOMMENDED in order to have access to
+ the virtualisation extensions) or non-secure EL1.
+
+- Caches, MMUs
+ The MMU must be off.
+ Instruction cache may be on or off.
+ The address range corresponding to the loaded kernel image must be
+ cleaned to the PoC. In the presence of a system cache or other
+ coherent masters with caches enabled, this will typically require
+ cache maintenance by VA rather than set/way operations.
+ System caches which respect the architected cache maintenance by VA
+ operations must be configured and may be enabled.
+ System caches which do not respect architected cache maintenance by VA
+ operations (not recommended) must be configured and disabled.
+
+- Architected timers
+ CNTFRQ must be programmed with the timer frequency and CNTVOFF must
+ be programmed with a consistent value on all CPUs. If entering the
+ kernel at EL1, CNTHCTL_EL2 must have EL1PCTEN (bit 0) set where
+ available.
+
+- Coherency
+ All CPUs to be booted by the kernel must be part of the same coherency
+ domain on entry to the kernel. This may require IMPLEMENTATION DEFINED
+ initialisation to enable the receiving of maintenance operations on
+ each CPU.
+
+- System registers
+ All writable architected system registers at the exception level where
+ the kernel image will be entered must be initialised by software at a
+ higher exception level to prevent execution in an UNKNOWN state.
+
+ For systems with a GICv3 interrupt controller:
+ - If EL3 is present:
+ ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1.
+ ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1.
+ - If the kernel is entered at EL1:
+ ICC.SRE_EL2.Enable (bit 3) must be initialised to 0b1
+ ICC_SRE_EL2.SRE (bit 0) must be initialised to 0b1.
+
+The requirements described above for CPU mode, caches, MMUs, architected
+timers, coherency and system registers apply to all CPUs. All CPUs must
+enter the kernel in the same exception level.
+
+The boot loader is expected to enter the kernel on each CPU in the
+following manner:
+
+- The primary CPU must jump directly to the first instruction of the
+ kernel image. The device tree blob passed by this CPU must contain
+ an 'enable-method' property for each cpu node. The supported
+ enable-methods are described below.
+
+ It is expected that the bootloader will generate these device tree
+ properties and insert them into the blob prior to kernel entry.
+
+- CPUs with a "spin-table" enable-method must have a 'cpu-release-addr'
+ property in their cpu node. This property identifies a
+ naturally-aligned 64-bit zero-initalised memory location.
+
+ These CPUs should spin outside of the kernel in a reserved area of
+ memory (communicated to the kernel by a /memreserve/ region in the
+ device tree) polling their cpu-release-addr location, which must be
+ contained in the reserved region. A wfe instruction may be inserted
+ to reduce the overhead of the busy-loop and a sev will be issued by
+ the primary CPU. When a read of the location pointed to by the
+ cpu-release-addr returns a non-zero value, the CPU must jump to this
+ value. The value will be written as a single 64-bit little-endian
+ value, so CPUs must convert the read value to their native endianness
+ before jumping to it.
+
+- CPUs with a "psci" enable method should remain outside of
+ the kernel (i.e. outside of the regions of memory described to the
+ kernel in the memory node, or in a reserved area of memory described
+ to the kernel by a /memreserve/ region in the device tree). The
+ kernel will issue CPU_ON calls as described in ARM document number ARM
+ DEN 0022A ("Power State Coordination Interface System Software on ARM
+ processors") to bring CPUs into the kernel.
+
+ The device tree should contain a 'psci' node, as described in
+ Documentation/devicetree/bindings/arm/psci.txt.
+
+- Secondary CPU general-purpose register settings
+ x0 = 0 (reserved for future use)
+ x1 = 0 (reserved for future use)
+ x2 = 0 (reserved for future use)
+ x3 = 0 (reserved for future use)
diff --git a/Documentation/arm64/legacy_instructions.txt b/Documentation/arm64/legacy_instructions.txt
new file mode 100644
index 000000000..01bf3d9fa
--- /dev/null
+++ b/Documentation/arm64/legacy_instructions.txt
@@ -0,0 +1,57 @@
+The arm64 port of the Linux kernel provides infrastructure to support
+emulation of instructions which have been deprecated, or obsoleted in
+the architecture. The infrastructure code uses undefined instruction
+hooks to support emulation. Where available it also allows turning on
+the instruction execution in hardware.
+
+The emulation mode can be controlled by writing to sysctl nodes
+(/proc/sys/abi). The following explains the different execution
+behaviours and the corresponding values of the sysctl nodes -
+
+* Undef
+ Value: 0
+ Generates undefined instruction abort. Default for instructions that
+ have been obsoleted in the architecture, e.g., SWP
+
+* Emulate
+ Value: 1
+ Uses software emulation. To aid migration of software, in this mode
+ usage of emulated instruction is traced as well as rate limited
+ warnings are issued. This is the default for deprecated
+ instructions, .e.g., CP15 barriers
+
+* Hardware Execution
+ Value: 2
+ Although marked as deprecated, some implementations may support the
+ enabling/disabling of hardware support for the execution of these
+ instructions. Using hardware execution generally provides better
+ performance, but at the loss of ability to gather runtime statistics
+ about the use of the deprecated instructions.
+
+The default mode depends on the status of the instruction in the
+architecture. Deprecated instructions should default to emulation
+while obsolete instructions must be undefined by default.
+
+Note: Instruction emulation may not be possible in all cases. See
+individual instruction notes for further information.
+
+Supported legacy instructions
+-----------------------------
+* SWP{B}
+Node: /proc/sys/abi/swp
+Status: Obsolete
+Default: Undef (0)
+
+* CP15 Barriers
+Node: /proc/sys/abi/cp15_barrier
+Status: Deprecated
+Default: Emulate (1)
+
+* SETEND
+Node: /proc/sys/abi/setend
+Status: Deprecated
+Default: Emulate (1)*
+Note: All the cpus on the system must have mixed endian support at EL0
+for this feature to be enabled. If a new CPU - which doesn't support mixed
+endian - is hotplugged in after this feature has been enabled, there could
+be unexpected results in the application.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
new file mode 100644
index 000000000..d7273a5f6
--- /dev/null
+++ b/Documentation/arm64/memory.txt
@@ -0,0 +1,94 @@
+ Memory Layout on AArch64 Linux
+ ==============================
+
+Author: Catalin Marinas <catalin.marinas@arm.com>
+
+This document describes the virtual memory layout used by the AArch64
+Linux kernel. The architecture allows up to 4 levels of translation
+tables with a 4KB page size and up to 3 levels with a 64KB page size.
+
+AArch64 Linux uses either 3 levels or 4 levels of translation tables
+with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit
+(256TB) virtual addresses, respectively, for both user and kernel. With
+64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB)
+virtual address, are used but the memory layout is the same.
+
+User addresses have bits 63:48 set to 0 while the kernel addresses have
+the same bits set to 1. TTBRx selection is given by bit 63 of the
+virtual address. The swapper_pg_dir contains only kernel (global)
+mappings while the user pgd contains only user (non-global) mappings.
+The swapper_pg_dir address is written to TTBR1 and never written to
+TTBR0.
+
+
+AArch64 Linux memory layout with 4KB pages + 3 levels:
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000000000000000 0000007fffffffff 512GB user
+ffffff8000000000 ffffffffffffffff 512GB kernel
+
+
+AArch64 Linux memory layout with 4KB pages + 4 levels:
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000000000000000 0000ffffffffffff 256TB user
+ffff000000000000 ffffffffffffffff 256TB kernel
+
+
+AArch64 Linux memory layout with 64KB pages + 2 levels:
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000000000000000 000003ffffffffff 4TB user
+fffffc0000000000 ffffffffffffffff 4TB kernel
+
+
+AArch64 Linux memory layout with 64KB pages + 3 levels:
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000000000000000 0000ffffffffffff 256TB user
+ffff000000000000 ffffffffffffffff 256TB kernel
+
+
+For details of the virtual kernel memory layout please see the kernel
+booting log.
+
+
+Translation table lookup with 4KB pages:
+
++--------+--------+--------+--------+--------+--------+--------+--------+
+|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
++--------+--------+--------+--------+--------+--------+--------+--------+
+ | | | | | |
+ | | | | | v
+ | | | | | [11:0] in-page offset
+ | | | | +-> [20:12] L3 index
+ | | | +-----------> [29:21] L2 index
+ | | +---------------------> [38:30] L1 index
+ | +-------------------------------> [47:39] L0 index
+ +-------------------------------------------------> [63] TTBR0/1
+
+
+Translation table lookup with 64KB pages:
+
++--------+--------+--------+--------+--------+--------+--------+--------+
+|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
++--------+--------+--------+--------+--------+--------+--------+--------+
+ | | | | |
+ | | | | v
+ | | | | [15:0] in-page offset
+ | | | +----------> [28:16] L3 index
+ | | +--------------------------> [41:29] L2 index
+ | +-------------------------------> [47:42] L1 index
+ +-------------------------------------------------> [63] TTBR0/1
+
+
+When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
+offset from the kernel VA (top 24bits of the kernel VA set to zero):
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000004000000000 0000007fffffffff 256GB kernel objects mapped in HYP
diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
new file mode 100644
index 000000000..d9995f1f5
--- /dev/null
+++ b/Documentation/arm64/tagged-pointers.txt
@@ -0,0 +1,34 @@
+ Tagged virtual addresses in AArch64 Linux
+ =========================================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date : 12 June 2013
+
+This document briefly describes the provision of tagged virtual
+addresses in the AArch64 translation system and their potential uses
+in AArch64 Linux.
+
+The kernel configures the translation tables so that translations made
+via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
+the virtual address ignored by the translation hardware. This frees up
+this byte for application use, with the following caveats:
+
+ (1) The kernel requires that all user addresses passed to EL1
+ are tagged with tag 0x00. This means that any syscall
+ parameters containing user virtual addresses *must* have
+ their top byte cleared before trapping to the kernel.
+
+ (2) Non-zero tags are not preserved when delivering signals.
+ This means that signal handlers in applications making use
+ of tags cannot rely on the tag information for user virtual
+ addresses being maintained for fields inside siginfo_t.
+ One exception to this rule is for signals raised in response
+ to watchpoint debug exceptions, where the tag information
+ will be preserved.
+
+ (3) Special care should be taken when using tagged pointers,
+ since it is likely that C compilers will not hazard two
+ virtual addresses differing only in the upper byte.
+
+The architecture prevents the use of a tagged PC, so the upper byte will
+be set to a sign-extension of bit 55 on exception return.
diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
new file mode 100644
index 000000000..2f2c6cdd7
--- /dev/null
+++ b/Documentation/assoc_array.txt
@@ -0,0 +1,574 @@
+ ========================================
+ GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
+ ========================================
+
+Contents:
+
+ - Overview.
+
+ - The public API.
+ - Edit script.
+ - Operations table.
+ - Manipulation functions.
+ - Access functions.
+ - Index key form.
+
+ - Internal workings.
+ - Basic internal tree layout.
+ - Shortcuts.
+ - Splitting and collapsing nodes.
+ - Non-recursive iteration.
+ - Simultaneous alteration and iteration.
+
+
+========
+OVERVIEW
+========
+
+This associative array implementation is an object container with the following
+properties:
+
+ (1) Objects are opaque pointers. The implementation does not care where they
+ point (if anywhere) or what they point to (if anything).
+
+ [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
+
+ (2) Objects do not need to contain linkage blocks for use by the array. This
+ permits an object to be located in multiple arrays simultaneously.
+ Rather, the array is made up of metadata blocks that point to objects.
+
+ (3) Objects require index keys to locate them within the array.
+
+ (4) Index keys must be unique. Inserting an object with the same key as one
+ already in the array will replace the old object.
+
+ (5) Index keys can be of any length and can be of different lengths.
+
+ (6) Index keys should encode the length early on, before any variation due to
+ length is seen.
+
+ (7) Index keys can include a hash to scatter objects throughout the array.
+
+ (8) The array can iterated over. The objects will not necessarily come out in
+ key order.
+
+ (9) The array can be iterated over whilst it is being modified, provided the
+ RCU readlock is being held by the iterator. Note, however, under these
+ circumstances, some objects may be seen more than once. If this is a
+ problem, the iterator should lock against modification. Objects will not
+ be missed, however, unless deleted.
+
+(10) Objects in the array can be looked up by means of their index key.
+
+(11) Objects can be looked up whilst the array is being modified, provided the
+ RCU readlock is being held by the thread doing the look up.
+
+The implementation uses a tree of 16-pointer nodes internally that are indexed
+on each level by nibbles from the index key in the same manner as in a radix
+tree. To improve memory efficiency, shortcuts can be emplaced to skip over
+what would otherwise be a series of single-occupancy nodes. Further, nodes
+pack leaf object pointers into spare space in the node rather than making an
+extra branch until as such time an object needs to be added to a full node.
+
+
+==============
+THE PUBLIC API
+==============
+
+The public API can be found in <linux/assoc_array.h>. The associative array is
+rooted on the following structure:
+
+ struct assoc_array {
+ ...
+ };
+
+The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
+
+
+EDIT SCRIPT
+-----------
+
+The insertion and deletion functions produce an 'edit script' that can later be
+applied to effect the changes without risking ENOMEM. This retains the
+preallocated metadata blocks that will be installed in the internal tree and
+keeps track of the metadata blocks that will be removed from the tree when the
+script is applied.
+
+This is also used to keep track of dead blocks and dead objects after the
+script has been applied so that they can be freed later. The freeing is done
+after an RCU grace period has passed - thus allowing access functions to
+proceed under the RCU read lock.
+
+The script appears as outside of the API as a pointer of the type:
+
+ struct assoc_array_edit;
+
+There are two functions for dealing with the script:
+
+ (1) Apply an edit script.
+
+ void assoc_array_apply_edit(struct assoc_array_edit *edit);
+
+ This will perform the edit functions, interpolating various write barriers
+ to permit accesses under the RCU read lock to continue. The edit script
+ will then be passed to call_rcu() to free it and any dead stuff it points
+ to.
+
+ (2) Cancel an edit script.
+
+ void assoc_array_cancel_edit(struct assoc_array_edit *edit);
+
+ This frees the edit script and all preallocated memory immediately. If
+ this was for insertion, the new object is _not_ released by this function,
+ but must rather be released by the caller.
+
+These functions are guaranteed not to fail.
+
+
+OPERATIONS TABLE
+----------------
+
+Various functions take a table of operations:
+
+ struct assoc_array_ops {
+ ...
+ };
+
+This points to a number of methods, all of which need to be provided:
+
+ (1) Get a chunk of index key from caller data:
+
+ unsigned long (*get_key_chunk)(const void *index_key, int level);
+
+ This should return a chunk of caller-supplied index key starting at the
+ *bit* position given by the level argument. The level argument will be a
+ multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
+ ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible.
+
+
+ (2) Get a chunk of an object's index key.
+
+ unsigned long (*get_object_key_chunk)(const void *object, int level);
+
+ As the previous function, but gets its data from an object in the array
+ rather than from a caller-supplied index key.
+
+
+ (3) See if this is the object we're looking for.
+
+ bool (*compare_object)(const void *object, const void *index_key);
+
+ Compare the object against an index key and return true if it matches and
+ false if it doesn't.
+
+
+ (4) Diff the index keys of two objects.
+
+ int (*diff_objects)(const void *object, const void *index_key);
+
+ Return the bit position at which the index key of the specified object
+ differs from the given index key or -1 if they are the same.
+
+
+ (5) Free an object.
+
+ void (*free_object)(void *object);
+
+ Free the specified object. Note that this may be called an RCU grace
+ period after assoc_array_apply_edit() was called, so synchronize_rcu() may
+ be necessary on module unloading.
+
+
+MANIPULATION FUNCTIONS
+----------------------
+
+There are a number of functions for manipulating an associative array:
+
+ (1) Initialise an associative array.
+
+ void assoc_array_init(struct assoc_array *array);
+
+ This initialises the base structure for an associative array. It can't
+ fail.
+
+
+ (2) Insert/replace an object in an associative array.
+
+ struct assoc_array_edit *
+ assoc_array_insert(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key,
+ void *object);
+
+ This inserts the given object into the array. Note that the least
+ significant bit of the pointer must be zero as it's used to type-mark
+ pointers internally.
+
+ If an object already exists for that key then it will be replaced with the
+ new object and the old one will be freed automatically.
+
+ The index_key argument should hold index key information and is
+ passed to the methods in the ops table when they are called.
+
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. -ENOMEM is returned in the case of
+ an out-of-memory error.
+
+ The caller should lock exclusively against other modifiers of the array.
+
+
+ (3) Delete an object from an associative array.
+
+ struct assoc_array_edit *
+ assoc_array_delete(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key);
+
+ This deletes an object that matches the specified data from the array.
+
+ The index_key argument should hold index key information and is
+ passed to the methods in the ops table when they are called.
+
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. -ENOMEM is returned in the case of
+ an out-of-memory error. NULL will be returned if the specified object is
+ not found within the array.
+
+ The caller should lock exclusively against other modifiers of the array.
+
+
+ (4) Delete all objects from an associative array.
+
+ struct assoc_array_edit *
+ assoc_array_clear(struct assoc_array *array,
+ const struct assoc_array_ops *ops);
+
+ This deletes all the objects from an associative array and leaves it
+ completely empty.
+
+ This function makes no alteration to the array itself, but rather returns
+ an edit script that must be applied. -ENOMEM is returned in the case of
+ an out-of-memory error.
+
+ The caller should lock exclusively against other modifiers of the array.
+
+
+ (5) Destroy an associative array, deleting all objects.
+
+ void assoc_array_destroy(struct assoc_array *array,
+ const struct assoc_array_ops *ops);
+
+ This destroys the contents of the associative array and leaves it
+ completely empty. It is not permitted for another thread to be traversing
+ the array under the RCU read lock at the same time as this function is
+ destroying it as no RCU deferral is performed on memory release -
+ something that would require memory to be allocated.
+
+ The caller should lock exclusively against other modifiers and accessors
+ of the array.
+
+
+ (6) Garbage collect an associative array.
+
+ int assoc_array_gc(struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ bool (*iterator)(void *object, void *iterator_data),
+ void *iterator_data);
+
+ This iterates over the objects in an associative array and passes each one
+ to iterator(). If iterator() returns true, the object is kept. If it
+ returns false, the object will be freed. If the iterator() function
+ returns true, it must perform any appropriate refcount incrementing on the
+ object before returning.
+
+ The internal tree will be packed down if possible as part of the iteration
+ to reduce the number of nodes in it.
+
+ The iterator_data is passed directly to iterator() and is otherwise
+ ignored by the function.
+
+ The function will return 0 if successful and -ENOMEM if there wasn't
+ enough memory.
+
+ It is possible for other threads to iterate over or search the array under
+ the RCU read lock whilst this function is in progress. The caller should
+ lock exclusively against other modifiers of the array.
+
+
+ACCESS FUNCTIONS
+----------------
+
+There are two functions for accessing an associative array:
+
+ (1) Iterate over all the objects in an associative array.
+
+ int assoc_array_iterate(const struct assoc_array *array,
+ int (*iterator)(const void *object,
+ void *iterator_data),
+ void *iterator_data);
+
+ This passes each object in the array to the iterator callback function.
+ iterator_data is private data for that function.
+
+ This may be used on an array at the same time as the array is being
+ modified, provided the RCU read lock is held. Under such circumstances,
+ it is possible for the iteration function to see some objects twice. If
+ this is a problem, then modification should be locked against. The
+ iteration algorithm should not, however, miss any objects.
+
+ The function will return 0 if no objects were in the array or else it will
+ return the result of the last iterator function called. Iteration stops
+ immediately if any call to the iteration function results in a non-zero
+ return.
+
+
+ (2) Find an object in an associative array.
+
+ void *assoc_array_find(const struct assoc_array *array,
+ const struct assoc_array_ops *ops,
+ const void *index_key);
+
+ This walks through the array's internal tree directly to the object
+ specified by the index key..
+
+ This may be used on an array at the same time as the array is being
+ modified, provided the RCU read lock is held.
+
+ The function will return the object if found (and set *_type to the object
+ type) or will return NULL if the object was not found.
+
+
+INDEX KEY FORM
+--------------
+
+The index key can be of any form, but since the algorithms aren't told how long
+the key is, it is strongly recommended that the index key includes its length
+very early on before any variation due to the length would have an effect on
+comparisons.
+
+This will cause leaves with different length keys to scatter away from each
+other - and those with the same length keys to cluster together.
+
+It is also recommended that the index key begin with a hash of the rest of the
+key to maximise scattering throughout keyspace.
+
+The better the scattering, the wider and lower the internal tree will be.
+
+Poor scattering isn't too much of a problem as there are shortcuts and nodes
+can contain mixtures of leaves and metadata pointers.
+
+The index key is read in chunks of machine word. Each chunk is subdivided into
+one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
+on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is
+unlikely that more than one word of any particular index key will have to be
+used.
+
+
+=================
+INTERNAL WORKINGS
+=================
+
+The associative array data structure has an internal tree. This tree is
+constructed of two types of metadata blocks: nodes and shortcuts.
+
+A node is an array of slots. Each slot can contain one of four things:
+
+ (*) A NULL pointer, indicating that the slot is empty.
+
+ (*) A pointer to an object (a leaf).
+
+ (*) A pointer to a node at the next level.
+
+ (*) A pointer to a shortcut.
+
+
+BASIC INTERNAL TREE LAYOUT
+--------------------------
+
+Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index
+key space is strictly subdivided by the nodes in the tree and nodes occur on
+fixed levels. For example:
+
+ Level: 0 1 2 3
+ =============== =============== =============== ===============
+ NODE D
+ NODE B NODE C +------>+---+
+ +------>+---+ +------>+---+ | | 0 |
+ NODE A | | 0 | | | 0 | | +---+
+ +---+ | +---+ | +---+ | : :
+ | 0 | | : : | : : | +---+
+ +---+ | +---+ | +---+ | | f |
+ | 1 |---+ | 3 |---+ | 7 |---+ +---+
+ +---+ +---+ +---+
+ : : : : | 8 |---+
+ +---+ +---+ +---+ | NODE E
+ | e |---+ | f | : : +------>+---+
+ +---+ | +---+ +---+ | 0 |
+ | f | | | f | +---+
+ +---+ | +---+ : :
+ | NODE F +---+
+ +------>+---+ | f |
+ | 0 | NODE G +---+
+ +---+ +------>+---+
+ : : | | 0 |
+ +---+ | +---+
+ | 6 |---+ : :
+ +---+ +---+
+ : : | f |
+ +---+ +---+
+ | f |
+ +---+
+
+In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
+Assuming no other meta data nodes in the tree, the key space is divided thusly:
+
+ KEY PREFIX NODE
+ ========== ====
+ 137* D
+ 138* E
+ 13[0-69-f]* C
+ 1[0-24-f]* B
+ e6* G
+ e[0-57-f]* F
+ [02-df]* A
+
+So, for instance, keys with the following example index keys will be found in
+the appropriate nodes:
+
+ INDEX KEY PREFIX NODE
+ =============== ======= ====
+ 13694892892489 13 C
+ 13795289025897 137 D
+ 13889dde88793 138 E
+ 138bbb89003093 138 E
+ 1394879524789 12 C
+ 1458952489 1 B
+ 9431809de993ba - A
+ b4542910809cd - A
+ e5284310def98 e F
+ e68428974237 e6 G
+ e7fffcbd443 e F
+ f3842239082 - A
+
+To save memory, if a node can hold all the leaves in its portion of keyspace,
+then the node will have all those leaves in it and will not have any metadata
+pointers - even if some of those leaves would like to be in the same slot.
+
+A node can contain a heterogeneous mix of leaves and metadata pointers.
+Metadata pointers must be in the slots that match their subdivisions of key
+space. The leaves can be in any slot not occupied by a metadata pointer. It
+is guaranteed that none of the leaves in a node will match a slot occupied by a
+metadata pointer. If the metadata pointer is there, any leaf whose key matches
+the metadata key prefix must be in the subtree that the metadata pointer points
+to.
+
+In the above example list of index keys, node A will contain:
+
+ SLOT CONTENT INDEX KEY (PREFIX)
+ ==== =============== ==================
+ 1 PTR TO NODE B 1*
+ any LEAF 9431809de993ba
+ any LEAF b4542910809cd
+ e PTR TO NODE F e*
+ any LEAF f3842239082
+
+and node B:
+
+ 3 PTR TO NODE C 13*
+ any LEAF 1458952489
+
+
+SHORTCUTS
+---------
+
+Shortcuts are metadata records that jump over a piece of keyspace. A shortcut
+is a replacement for a series of single-occupancy nodes ascending through the
+levels. Shortcuts exist to save memory and to speed up traversal.
+
+It is possible for the root of the tree to be a shortcut - say, for example,
+the tree contains at least 17 nodes all with key prefix '1111'. The insertion
+algorithm will insert a shortcut to skip over the '1111' keyspace in a single
+bound and get to the fourth level where these actually become different.
+
+
+SPLITTING AND COLLAPSING NODES
+------------------------------
+
+Each node has a maximum capacity of 16 leaves and metadata pointers. If the
+insertion algorithm finds that it is trying to insert a 17th object into a
+node, that node will be split such that at least two leaves that have a common
+key segment at that level end up in a separate node rooted on that slot for
+that common key segment.
+
+If the leaves in a full node and the leaf that is being inserted are
+sufficiently similar, then a shortcut will be inserted into the tree.
+
+When the number of objects in the subtree rooted at a node falls to 16 or
+fewer, then the subtree will be collapsed down to a single node - and this will
+ripple towards the root if possible.
+
+
+NON-RECURSIVE ITERATION
+-----------------------
+
+Each node and shortcut contains a back pointer to its parent and the number of
+slot in that parent that points to it. None-recursive iteration uses these to
+proceed rootwards through the tree, going to the parent node, slot N + 1 to
+make sure progress is made without the need for a stack.
+
+The backpointers, however, make simultaneous alteration and iteration tricky.
+
+
+SIMULTANEOUS ALTERATION AND ITERATION
+-------------------------------------
+
+There are a number of cases to consider:
+
+ (1) Simple insert/replace. This involves simply replacing a NULL or old
+ matching leaf pointer with the pointer to the new leaf after a barrier.
+ The metadata blocks don't change otherwise. An old leaf won't be freed
+ until after the RCU grace period.
+
+ (2) Simple delete. This involves just clearing an old matching leaf. The
+ metadata blocks don't change otherwise. The old leaf won't be freed until
+ after the RCU grace period.
+
+ (3) Insertion replacing part of a subtree that we haven't yet entered. This
+ may involve replacement of part of that subtree - but that won't affect
+ the iteration as we won't have reached the pointer to it yet and the
+ ancestry blocks are not replaced (the layout of those does not change).
+
+ (4) Insertion replacing nodes that we're actively processing. This isn't a
+ problem as we've passed the anchoring pointer and won't switch onto the
+ new layout until we follow the back pointers - at which point we've
+ already examined the leaves in the replaced node (we iterate over all the
+ leaves in a node before following any of its metadata pointers).
+
+ We might, however, re-see some leaves that have been split out into a new
+ branch that's in a slot further along than we were at.
+
+ (5) Insertion replacing nodes that we're processing a dependent branch of.
+ This won't affect us until we follow the back pointers. Similar to (4).
+
+ (6) Deletion collapsing a branch under us. This doesn't affect us because the
+ back pointers will get us back to the parent of the new node before we
+ could see the new node. The entire collapsed subtree is thrown away
+ unchanged - and will still be rooted on the same slot, so we shouldn't
+ process it a second time as we'll go back to slot + 1.
+
+Note:
+
+ (*) Under some circumstances, we need to simultaneously change the parent
+ pointer and the parent slot pointer on a node (say, for example, we
+ inserted another node before it and moved it up a level). We cannot do
+ this without locking against a read - so we have to replace that node too.
+
+ However, when we're changing a shortcut into a node this isn't a problem
+ as shortcuts only have one slot and so the parent slot number isn't used
+ when traversing backwards over one. This means that it's okay to change
+ the slot number first - provided suitable barriers are used to make sure
+ the parent slot number is read after the back pointer.
+
+Obsolete blocks and leaves are freed up after an RCU grace period has passed,
+so as long as anyone doing walking or iteration holds the RCU read lock, the
+old superstructure should not go away on them.
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
new file mode 100644
index 000000000..dab6da338
--- /dev/null
+++ b/Documentation/atomic_ops.txt
@@ -0,0 +1,634 @@
+ Semantics and Behavior of Atomic and
+ Bitmask Operations
+
+ David S. Miller
+
+ This document is intended to serve as a guide to Linux port
+maintainers on how to implement atomic counter, bitops, and spinlock
+interfaces properly.
+
+ The atomic_t type should be defined as a signed integer and
+the atomic_long_t type as a signed long integer. Also, they should
+be made opaque such that any kind of cast to a normal C integer type
+will fail. Something like the following should suffice:
+
+ typedef struct { int counter; } atomic_t;
+ typedef struct { long counter; } atomic_long_t;
+
+Historically, counter has been declared volatile. This is now discouraged.
+See Documentation/volatile-considered-harmful.txt for the complete rationale.
+
+local_t is very similar to atomic_t. If the counter is per CPU and only
+updated by one CPU, local_t is probably more appropriate. Please see
+Documentation/local_ops.txt for the semantics of local_t.
+
+The first operations to implement for atomic_t's are the initializers and
+plain reads.
+
+ #define ATOMIC_INIT(i) { (i) }
+ #define atomic_set(v, i) ((v)->counter = (i))
+
+The first macro is used in definitions, such as:
+
+static atomic_t my_counter = ATOMIC_INIT(1);
+
+The initializer is atomic in that the return values of the atomic operations
+are guaranteed to be correct reflecting the initialized value if the
+initializer is used before runtime. If the initializer is used at runtime, a
+proper implicit or explicit read memory barrier is needed before reading the
+value with atomic_read from another thread.
+
+As with all of the atomic_ interfaces, replace the leading "atomic_"
+with "atomic_long_" to operate on atomic_long_t.
+
+The second interface can be used at runtime, as in:
+
+ struct foo { atomic_t counter; };
+ ...
+
+ struct foo *k;
+
+ k = kmalloc(sizeof(*k), GFP_KERNEL);
+ if (!k)
+ return -ENOMEM;
+ atomic_set(&k->counter, 0);
+
+The setting is atomic in that the return values of the atomic operations by
+all threads are guaranteed to be correct reflecting either the value that has
+been set with this operation or set with another operation. A proper implicit
+or explicit memory barrier is needed before the value set with the operation
+is guaranteed to be readable with atomic_read from another thread.
+
+Next, we have:
+
+ #define atomic_read(v) ((v)->counter)
+
+which simply reads the counter value currently visible to the calling thread.
+The read is atomic in that the return value is guaranteed to be one of the
+values initialized or modified with the interface operations if a proper
+implicit or explicit memory barrier is used after possible runtime
+initialization by any other thread and the value is modified only with the
+interface operations. atomic_read does not guarantee that the runtime
+initialization by any other thread is visible yet, so the user of the
+interface must take care of that with a proper implicit or explicit memory
+barrier.
+
+*** WARNING: atomic_read() and atomic_set() DO NOT IMPLY BARRIERS! ***
+
+Some architectures may choose to use the volatile keyword, barriers, or inline
+assembly to guarantee some degree of immediacy for atomic_read() and
+atomic_set(). This is not uniformly guaranteed, and may change in the future,
+so all users of atomic_t should treat atomic_read() and atomic_set() as simple
+C statements that may be reordered or optimized away entirely by the compiler
+or processor, and explicitly invoke the appropriate compiler and/or memory
+barrier for each use case. Failure to do so will result in code that may
+suddenly break when used with different architectures or compiler
+optimizations, or even changes in unrelated code which changes how the
+compiler optimizes the section accessing atomic_t variables.
+
+*** YOU HAVE BEEN WARNED! ***
+
+Properly aligned pointers, longs, ints, and chars (and unsigned
+equivalents) may be atomically loaded from and stored to in the same
+sense as described for atomic_read() and atomic_set(). The ACCESS_ONCE()
+macro should be used to prevent the compiler from using optimizations
+that might otherwise optimize accesses out of existence on the one hand,
+or that might create unsolicited accesses on the other.
+
+For example consider the following code:
+
+ while (a > 0)
+ do_something();
+
+If the compiler can prove that do_something() does not store to the
+variable a, then the compiler is within its rights transforming this to
+the following:
+
+ tmp = a;
+ if (a > 0)
+ for (;;)
+ do_something();
+
+If you don't want the compiler to do this (and you probably don't), then
+you should use something like the following:
+
+ while (ACCESS_ONCE(a) < 0)
+ do_something();
+
+Alternatively, you could place a barrier() call in the loop.
+
+For another example, consider the following code:
+
+ tmp_a = a;
+ do_something_with(tmp_a);
+ do_something_else_with(tmp_a);
+
+If the compiler can prove that do_something_with() does not store to the
+variable a, then the compiler is within its rights to manufacture an
+additional load as follows:
+
+ tmp_a = a;
+ do_something_with(tmp_a);
+ tmp_a = a;
+ do_something_else_with(tmp_a);
+
+This could fatally confuse your code if it expected the same value
+to be passed to do_something_with() and do_something_else_with().
+
+The compiler would be likely to manufacture this additional load if
+do_something_with() was an inline function that made very heavy use
+of registers: reloading from variable a could save a flush to the
+stack and later reload. To prevent the compiler from attacking your
+code in this manner, write the following:
+
+ tmp_a = ACCESS_ONCE(a);
+ do_something_with(tmp_a);
+ do_something_else_with(tmp_a);
+
+For a final example, consider the following code, assuming that the
+variable a is set at boot time before the second CPU is brought online
+and never changed later, so that memory barriers are not needed:
+
+ if (a)
+ b = 9;
+ else
+ b = 42;
+
+The compiler is within its rights to manufacture an additional store
+by transforming the above code into the following:
+
+ b = 42;
+ if (a)
+ b = 9;
+
+This could come as a fatal surprise to other code running concurrently
+that expected b to never have the value 42 if a was zero. To prevent
+the compiler from doing this, write something like:
+
+ if (a)
+ ACCESS_ONCE(b) = 9;
+ else
+ ACCESS_ONCE(b) = 42;
+
+Don't even -think- about doing this without proper use of memory barriers,
+locks, or atomic operations if variable a can change at runtime!
+
+*** WARNING: ACCESS_ONCE() DOES NOT IMPLY A BARRIER! ***
+
+Now, we move onto the atomic operation interfaces typically implemented with
+the help of assembly code.
+
+ void atomic_add(int i, atomic_t *v);
+ void atomic_sub(int i, atomic_t *v);
+ void atomic_inc(atomic_t *v);
+ void atomic_dec(atomic_t *v);
+
+These four routines add and subtract integral values to/from the given
+atomic_t value. The first two routines pass explicit integers by
+which to make the adjustment, whereas the latter two use an implicit
+adjustment value of "1".
+
+One very important aspect of these two routines is that they DO NOT
+require any explicit memory barriers. They need only perform the
+atomic_t counter update in an SMP safe manner.
+
+Next, we have:
+
+ int atomic_inc_return(atomic_t *v);
+ int atomic_dec_return(atomic_t *v);
+
+These routines add 1 and subtract 1, respectively, from the given
+atomic_t and return the new counter value after the operation is
+performed.
+
+Unlike the above routines, it is required that these primitives
+include explicit memory barriers that are performed before and after
+the operation. It must be done such that all memory operations before
+and after the atomic operation calls are strongly ordered with respect
+to the atomic operation itself.
+
+For example, it should behave as if a smp_mb() call existed both
+before and after the atomic operation.
+
+If the atomic instructions used in an implementation provide explicit
+memory barrier semantics which satisfy the above requirements, that is
+fine as well.
+
+Let's move on:
+
+ int atomic_add_return(int i, atomic_t *v);
+ int atomic_sub_return(int i, atomic_t *v);
+
+These behave just like atomic_{inc,dec}_return() except that an
+explicit counter adjustment is given instead of the implicit "1".
+This means that like atomic_{inc,dec}_return(), the memory barrier
+semantics are required.
+
+Next:
+
+ int atomic_inc_and_test(atomic_t *v);
+ int atomic_dec_and_test(atomic_t *v);
+
+These two routines increment and decrement by 1, respectively, the
+given atomic counter. They return a boolean indicating whether the
+resulting counter value was zero or not.
+
+Again, these primitives provide explicit memory barrier semantics around
+the atomic operation.
+
+ int atomic_sub_and_test(int i, atomic_t *v);
+
+This is identical to atomic_dec_and_test() except that an explicit
+decrement is given instead of the implicit "1". This primitive must
+provide explicit memory barrier semantics around the operation.
+
+ int atomic_add_negative(int i, atomic_t *v);
+
+The given increment is added to the given atomic counter value. A boolean
+is return which indicates whether the resulting counter value is negative.
+This primitive must provide explicit memory barrier semantics around
+the operation.
+
+Then:
+
+ int atomic_xchg(atomic_t *v, int new);
+
+This performs an atomic exchange operation on the atomic variable v, setting
+the given new value. It returns the old value that the atomic variable v had
+just before the operation.
+
+atomic_xchg must provide explicit memory barriers around the operation.
+
+ int atomic_cmpxchg(atomic_t *v, int old, int new);
+
+This performs an atomic compare exchange operation on the atomic value v,
+with the given old and new values. Like all atomic_xxx operations,
+atomic_cmpxchg will only satisfy its atomicity semantics as long as all
+other accesses of *v are performed through atomic_xxx operations.
+
+atomic_cmpxchg must provide explicit memory barriers around the operation.
+
+The semantics for atomic_cmpxchg are the same as those defined for 'cas'
+below.
+
+Finally:
+
+ int atomic_add_unless(atomic_t *v, int a, int u);
+
+If the atomic value v is not equal to u, this function adds a to v, and
+returns non zero. If v is equal to u then it returns zero. This is done as
+an atomic operation.
+
+atomic_add_unless must provide explicit memory barriers around the
+operation unless it fails (returns 0).
+
+atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0)
+
+
+If a caller requires memory barrier semantics around an atomic_t
+operation which does not return a value, a set of interfaces are
+defined which accomplish this:
+
+ void smp_mb__before_atomic(void);
+ void smp_mb__after_atomic(void);
+
+For example, smp_mb__before_atomic() can be used like so:
+
+ obj->dead = 1;
+ smp_mb__before_atomic();
+ atomic_dec(&obj->ref_count);
+
+It makes sure that all memory operations preceding the atomic_dec()
+call are strongly ordered with respect to the atomic counter
+operation. In the above example, it guarantees that the assignment of
+"1" to obj->dead will be globally visible to other cpus before the
+atomic counter decrement.
+
+Without the explicit smp_mb__before_atomic() call, the
+implementation could legally allow the atomic counter update visible
+to other cpus before the "obj->dead = 1;" assignment.
+
+A missing memory barrier in the cases where they are required by the
+atomic_t implementation above can have disastrous results. Here is
+an example, which follows a pattern occurring frequently in the Linux
+kernel. It is the use of atomic counters to implement reference
+counting, and it works such that once the counter falls to zero it can
+be guaranteed that no other entity can be accessing the object:
+
+static void obj_list_add(struct obj *obj, struct list_head *head)
+{
+ obj->active = 1;
+ list_add(&obj->list, head);
+}
+
+static void obj_list_del(struct obj *obj)
+{
+ list_del(&obj->list);
+ obj->active = 0;
+}
+
+static void obj_destroy(struct obj *obj)
+{
+ BUG_ON(obj->active);
+ kfree(obj);
+}
+
+struct obj *obj_list_peek(struct list_head *head)
+{
+ if (!list_empty(head)) {
+ struct obj *obj;
+
+ obj = list_entry(head->next, struct obj, list);
+ atomic_inc(&obj->refcnt);
+ return obj;
+ }
+ return NULL;
+}
+
+void obj_poke(void)
+{
+ struct obj *obj;
+
+ spin_lock(&global_list_lock);
+ obj = obj_list_peek(&global_list);
+ spin_unlock(&global_list_lock);
+
+ if (obj) {
+ obj->ops->poke(obj);
+ if (atomic_dec_and_test(&obj->refcnt))
+ obj_destroy(obj);
+ }
+}
+
+void obj_timeout(struct obj *obj)
+{
+ spin_lock(&global_list_lock);
+ obj_list_del(obj);
+ spin_unlock(&global_list_lock);
+
+ if (atomic_dec_and_test(&obj->refcnt))
+ obj_destroy(obj);
+}
+
+(This is a simplification of the ARP queue management in the
+ generic neighbour discover code of the networking. Olaf Kirch
+ found a bug wrt. memory barriers in kfree_skb() that exposed
+ the atomic_t memory barrier requirements quite clearly.)
+
+Given the above scheme, it must be the case that the obj->active
+update done by the obj list deletion be visible to other processors
+before the atomic counter decrement is performed.
+
+Otherwise, the counter could fall to zero, yet obj->active would still
+be set, thus triggering the assertion in obj_destroy(). The error
+sequence looks like this:
+
+ cpu 0 cpu 1
+ obj_poke() obj_timeout()
+ obj = obj_list_peek();
+ ... gains ref to obj, refcnt=2
+ obj_list_del(obj);
+ obj->active = 0 ...
+ ... visibility delayed ...
+ atomic_dec_and_test()
+ ... refcnt drops to 1 ...
+ atomic_dec_and_test()
+ ... refcount drops to 0 ...
+ obj_destroy()
+ BUG() triggers since obj->active
+ still seen as one
+ obj->active update visibility occurs
+
+With the memory barrier semantics required of the atomic_t operations
+which return values, the above sequence of memory visibility can never
+happen. Specifically, in the above case the atomic_dec_and_test()
+counter decrement would not become globally visible until the
+obj->active update does.
+
+As a historical note, 32-bit Sparc used to only allow usage of
+24-bits of its atomic_t type. This was because it used 8 bits
+as a spinlock for SMP safety. Sparc32 lacked a "compare and swap"
+type instruction. However, 32-bit Sparc has since been moved over
+to a "hash table of spinlocks" scheme, that allows the full 32-bit
+counter to be realized. Essentially, an array of spinlocks are
+indexed into based upon the address of the atomic_t being operated
+on, and that lock protects the atomic operation. Parisc uses the
+same scheme.
+
+Another note is that the atomic_t operations returning values are
+extremely slow on an old 386.
+
+We will now cover the atomic bitmask operations. You will find that
+their SMP and memory barrier semantics are similar in shape and scope
+to the atomic_t ops above.
+
+Native atomic bit operations are defined to operate on objects aligned
+to the size of an "unsigned long" C data type, and are least of that
+size. The endianness of the bits within each "unsigned long" are the
+native endianness of the cpu.
+
+ void set_bit(unsigned long nr, volatile unsigned long *addr);
+ void clear_bit(unsigned long nr, volatile unsigned long *addr);
+ void change_bit(unsigned long nr, volatile unsigned long *addr);
+
+These routines set, clear, and change, respectively, the bit number
+indicated by "nr" on the bit mask pointed to by "ADDR".
+
+They must execute atomically, yet there are no implicit memory barrier
+semantics required of these interfaces.
+
+ int test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
+ int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
+ int test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
+
+Like the above, except that these routines return a boolean which
+indicates whether the changed bit was set _BEFORE_ the atomic bit
+operation.
+
+WARNING! It is incredibly important that the value be a boolean,
+ie. "0" or "1". Do not try to be fancy and save a few instructions by
+declaring the above to return "long" and just returning something like
+"old_val & mask" because that will not work.
+
+For one thing, this return value gets truncated to int in many code
+paths using these interfaces, so on 64-bit if the bit is set in the
+upper 32-bits then testers will never see that.
+
+One great example of where this problem crops up are the thread_info
+flag operations. Routines such as test_and_set_ti_thread_flag() chop
+the return value into an int. There are other places where things
+like this occur as well.
+
+These routines, like the atomic_t counter operations returning values,
+must provide explicit memory barrier semantics around their execution.
+All memory operations before the atomic bit operation call must be
+made visible globally before the atomic bit operation is made visible.
+Likewise, the atomic bit operation must be visible globally before any
+subsequent memory operation is made visible. For example:
+
+ obj->dead = 1;
+ if (test_and_set_bit(0, &obj->flags))
+ /* ... */;
+ obj->killed = 1;
+
+The implementation of test_and_set_bit() must guarantee that
+"obj->dead = 1;" is visible to cpus before the atomic memory operation
+done by test_and_set_bit() becomes visible. Likewise, the atomic
+memory operation done by test_and_set_bit() must become visible before
+"obj->killed = 1;" is visible.
+
+Finally there is the basic operation:
+
+ int test_bit(unsigned long nr, __const__ volatile unsigned long *addr);
+
+Which returns a boolean indicating if bit "nr" is set in the bitmask
+pointed to by "addr".
+
+If explicit memory barriers are required around {set,clear}_bit() (which do
+not return a value, and thus does not need to provide memory barrier
+semantics), two interfaces are provided:
+
+ void smp_mb__before_atomic(void);
+ void smp_mb__after_atomic(void);
+
+They are used as follows, and are akin to their atomic_t operation
+brothers:
+
+ /* All memory operations before this call will
+ * be globally visible before the clear_bit().
+ */
+ smp_mb__before_atomic();
+ clear_bit( ... );
+
+ /* The clear_bit() will be visible before all
+ * subsequent memory operations.
+ */
+ smp_mb__after_atomic();
+
+There are two special bitops with lock barrier semantics (acquire/release,
+same as spinlocks). These operate in the same way as their non-_lock/unlock
+postfixed variants, except that they are to provide acquire/release semantics,
+respectively. This means they can be used for bit_spin_trylock and
+bit_spin_unlock type operations without specifying any more barriers.
+
+ int test_and_set_bit_lock(unsigned long nr, unsigned long *addr);
+ void clear_bit_unlock(unsigned long nr, unsigned long *addr);
+ void __clear_bit_unlock(unsigned long nr, unsigned long *addr);
+
+The __clear_bit_unlock version is non-atomic, however it still implements
+unlock barrier semantics. This can be useful if the lock itself is protecting
+the other bits in the word.
+
+Finally, there are non-atomic versions of the bitmask operations
+provided. They are used in contexts where some other higher-level SMP
+locking scheme is being used to protect the bitmask, and thus less
+expensive non-atomic operations may be used in the implementation.
+They have names similar to the above bitmask operation interfaces,
+except that two underscores are prefixed to the interface name.
+
+ void __set_bit(unsigned long nr, volatile unsigned long *addr);
+ void __clear_bit(unsigned long nr, volatile unsigned long *addr);
+ void __change_bit(unsigned long nr, volatile unsigned long *addr);
+ int __test_and_set_bit(unsigned long nr, volatile unsigned long *addr);
+ int __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr);
+ int __test_and_change_bit(unsigned long nr, volatile unsigned long *addr);
+
+These non-atomic variants also do not require any special memory
+barrier semantics.
+
+The routines xchg() and cmpxchg() must provide the same exact
+memory-barrier semantics as the atomic and bit operations returning
+values.
+
+Spinlocks and rwlocks have memory barrier expectations as well.
+The rule to follow is simple:
+
+1) When acquiring a lock, the implementation must make it globally
+ visible before any subsequent memory operation.
+
+2) When releasing a lock, the implementation must make it such that
+ all previous memory operations are globally visible before the
+ lock release.
+
+Which finally brings us to _atomic_dec_and_lock(). There is an
+architecture-neutral version implemented in lib/dec_and_lock.c,
+but most platforms will wish to optimize this in assembler.
+
+ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+
+Atomically decrement the given counter, and if will drop to zero
+atomically acquire the given spinlock and perform the decrement
+of the counter to zero. If it does not drop to zero, do nothing
+with the spinlock.
+
+It is actually pretty simple to get the memory barrier correct.
+Simply satisfy the spinlock grab requirements, which is make
+sure the spinlock operation is globally visible before any
+subsequent memory operation.
+
+We can demonstrate this operation more clearly if we define
+an abstract atomic operation:
+
+ long cas(long *mem, long old, long new);
+
+"cas" stands for "compare and swap". It atomically:
+
+1) Compares "old" with the value currently at "mem".
+2) If they are equal, "new" is written to "mem".
+3) Regardless, the current value at "mem" is returned.
+
+As an example usage, here is what an atomic counter update
+might look like:
+
+void example_atomic_inc(long *counter)
+{
+ long old, new, ret;
+
+ while (1) {
+ old = *counter;
+ new = old + 1;
+
+ ret = cas(counter, old, new);
+ if (ret == old)
+ break;
+ }
+}
+
+Let's use cas() in order to build a pseudo-C atomic_dec_and_lock():
+
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+ long old, new, ret;
+ int went_to_zero;
+
+ went_to_zero = 0;
+ while (1) {
+ old = atomic_read(atomic);
+ new = old - 1;
+ if (new == 0) {
+ went_to_zero = 1;
+ spin_lock(lock);
+ }
+ ret = cas(atomic, old, new);
+ if (ret == old)
+ break;
+ if (went_to_zero) {
+ spin_unlock(lock);
+ went_to_zero = 0;
+ }
+ }
+
+ return went_to_zero;
+}
+
+Now, as far as memory barriers go, as long as spin_lock()
+strictly orders all subsequent memory operations (including
+the cas()) with respect to itself, things will be fine.
+
+Said another way, _atomic_dec_and_lock() must guarantee that
+a counter dropping to zero is never made visible before the
+spinlock being acquired.
+
+Note that this also means that for the case where the counter
+is not dropping to zero, there are no memory ordering
+requirements.
diff --git a/Documentation/auxdisplay/.gitignore b/Documentation/auxdisplay/.gitignore
new file mode 100644
index 000000000..7af222860
--- /dev/null
+++ b/Documentation/auxdisplay/.gitignore
@@ -0,0 +1 @@
+cfag12864b-example
diff --git a/Documentation/auxdisplay/Makefile b/Documentation/auxdisplay/Makefile
new file mode 100644
index 000000000..ada4dac99
--- /dev/null
+++ b/Documentation/auxdisplay/Makefile
@@ -0,0 +1,7 @@
+# List of programs to build
+hostprogs-y := cfag12864b-example
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_cfag12864b-example.o += -I$(objtree)/usr/include
diff --git a/Documentation/auxdisplay/cfag12864b b/Documentation/auxdisplay/cfag12864b
new file mode 100644
index 000000000..eb7be393a
--- /dev/null
+++ b/Documentation/auxdisplay/cfag12864b
@@ -0,0 +1,105 @@
+ ===================================
+ cfag12864b LCD Driver Documentation
+ ===================================
+
+License: GPLv2
+Author & Maintainer: Miguel Ojeda Sandonis
+Date: 2006-10-27
+
+
+
+--------
+0. INDEX
+--------
+
+ 1. DRIVER INFORMATION
+ 2. DEVICE INFORMATION
+ 3. WIRING
+ 4. USERSPACE PROGRAMMING
+
+
+---------------------
+1. DRIVER INFORMATION
+---------------------
+
+This driver supports a cfag12864b LCD.
+
+
+---------------------
+2. DEVICE INFORMATION
+---------------------
+
+Manufacturer: Crystalfontz
+Device Name: Crystalfontz 12864b LCD Series
+Device Code: cfag12864b
+Webpage: http://www.crystalfontz.com
+Device Webpage: http://www.crystalfontz.com/products/12864b/
+Type: LCD (Liquid Crystal Display)
+Width: 128
+Height: 64
+Colors: 2 (B/N)
+Controller: ks0108
+Controllers: 2
+Pages: 8 each controller
+Addresses: 64 each page
+Data size: 1 byte each address
+Memory size: 2 * 8 * 64 * 1 = 1024 bytes = 1 Kbyte
+
+
+---------
+3. WIRING
+---------
+
+The cfag12864b LCD Series don't have official wiring.
+
+The common wiring is done to the parallel port as shown:
+
+Parallel Port cfag12864b
+
+ Name Pin# Pin# Name
+
+Strobe ( 1)------------------------------(17) Enable
+Data 0 ( 2)------------------------------( 4) Data 0
+Data 1 ( 3)------------------------------( 5) Data 1
+Data 2 ( 4)------------------------------( 6) Data 2
+Data 3 ( 5)------------------------------( 7) Data 3
+Data 4 ( 6)------------------------------( 8) Data 4
+Data 5 ( 7)------------------------------( 9) Data 5
+Data 6 ( 8)------------------------------(10) Data 6
+Data 7 ( 9)------------------------------(11) Data 7
+ (10) [+5v]---( 1) Vdd
+ (11) [GND]---( 2) Ground
+ (12) [+5v]---(14) Reset
+ (13) [GND]---(15) Read / Write
+ Line (14)------------------------------(13) Controller Select 1
+ (15)
+ Init (16)------------------------------(12) Controller Select 2
+Select (17)------------------------------(16) Data / Instruction
+Ground (18)---[GND] [+5v]---(19) LED +
+Ground (19)---[GND]
+Ground (20)---[GND] E A Values:
+Ground (21)---[GND] [GND]---[P1]---(18) Vee - R = Resistor = 22 ohm
+Ground (22)---[GND] | - P1 = Preset = 10 Kohm
+Ground (23)---[GND] ---- S ------( 3) V0 - P2 = Preset = 1 Kohm
+Ground (24)---[GND] | |
+Ground (25)---[GND] [GND]---[P2]---[R]---(20) LED -
+
+
+------------------------
+4. USERSPACE PROGRAMMING
+------------------------
+
+The cfag12864bfb describes a framebuffer device (/dev/fbX).
+
+It has a size of 1024 bytes = 1 Kbyte.
+Each bit represents one pixel. If the bit is high, the pixel will
+turn on. If the pixel is low, the pixel will turn off.
+
+You can use the framebuffer as a file: fopen, fwrite, fclose...
+Although the LCD won't get updated until the next refresh time arrives.
+
+Also, you can mmap the framebuffer: open & mmap, munmap & close...
+which is the best option for most uses.
+
+Check Documentation/auxdisplay/cfag12864b-example.c
+for a real working userspace complete program with usage examples.
diff --git a/Documentation/auxdisplay/cfag12864b-example.c b/Documentation/auxdisplay/cfag12864b-example.c
new file mode 100644
index 000000000..e7823ffb1
--- /dev/null
+++ b/Documentation/auxdisplay/cfag12864b-example.c
@@ -0,0 +1,281 @@
+/*
+ * Filename: cfag12864b-example.c
+ * Version: 0.1.0
+ * Description: cfag12864b LCD userspace example program
+ * License: GPLv2
+ *
+ * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Date: 2006-10-31
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+/*
+ * ------------------------
+ * start of cfag12864b code
+ * ------------------------
+ */
+
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#define CFAG12864B_WIDTH (128)
+#define CFAG12864B_HEIGHT (64)
+#define CFAG12864B_SIZE (128 * 64 / 8)
+#define CFAG12864B_BPB (8)
+#define CFAG12864B_ADDRESS(x, y) ((y) * CFAG12864B_WIDTH / \
+ CFAG12864B_BPB + (x) / CFAG12864B_BPB)
+#define CFAG12864B_BIT(n) (((unsigned char) 1) << (n))
+
+#undef CFAG12864B_DOCHECK
+#ifdef CFAG12864B_DOCHECK
+ #define CFAG12864B_CHECK(x, y) ((x) < CFAG12864B_WIDTH && \
+ (y) < CFAG12864B_HEIGHT)
+#else
+ #define CFAG12864B_CHECK(x, y) (1)
+#endif
+
+int cfag12864b_fd;
+unsigned char * cfag12864b_mem;
+unsigned char cfag12864b_buffer[CFAG12864B_SIZE];
+
+/*
+ * init a cfag12864b framebuffer device
+ *
+ * No error: return = 0
+ * Unable to open: return = -1
+ * Unable to mmap: return = -2
+ */
+static int cfag12864b_init(char *path)
+{
+ cfag12864b_fd = open(path, O_RDWR);
+ if (cfag12864b_fd == -1)
+ return -1;
+
+ cfag12864b_mem = mmap(0, CFAG12864B_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, cfag12864b_fd, 0);
+ if (cfag12864b_mem == MAP_FAILED) {
+ close(cfag12864b_fd);
+ return -2;
+ }
+
+ return 0;
+}
+
+/*
+ * exit a cfag12864b framebuffer device
+ */
+static void cfag12864b_exit(void)
+{
+ munmap(cfag12864b_mem, CFAG12864B_SIZE);
+ close(cfag12864b_fd);
+}
+
+/*
+ * set (x, y) pixel
+ */
+static void cfag12864b_set(unsigned char x, unsigned char y)
+{
+ if (CFAG12864B_CHECK(x, y))
+ cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] |=
+ CFAG12864B_BIT(x % CFAG12864B_BPB);
+}
+
+/*
+ * unset (x, y) pixel
+ */
+static void cfag12864b_unset(unsigned char x, unsigned char y)
+{
+ if (CFAG12864B_CHECK(x, y))
+ cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &=
+ ~CFAG12864B_BIT(x % CFAG12864B_BPB);
+}
+
+/*
+ * is set (x, y) pixel?
+ *
+ * Pixel off: return = 0
+ * Pixel on: return = 1
+ */
+static unsigned char cfag12864b_isset(unsigned char x, unsigned char y)
+{
+ if (CFAG12864B_CHECK(x, y))
+ if (cfag12864b_buffer[CFAG12864B_ADDRESS(x, y)] &
+ CFAG12864B_BIT(x % CFAG12864B_BPB))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * not (x, y) pixel
+ */
+static void cfag12864b_not(unsigned char x, unsigned char y)
+{
+ if (cfag12864b_isset(x, y))
+ cfag12864b_unset(x, y);
+ else
+ cfag12864b_set(x, y);
+}
+
+/*
+ * fill (set all pixels)
+ */
+static void cfag12864b_fill(void)
+{
+ unsigned short i;
+
+ for (i = 0; i < CFAG12864B_SIZE; i++)
+ cfag12864b_buffer[i] = 0xFF;
+}
+
+/*
+ * clear (unset all pixels)
+ */
+static void cfag12864b_clear(void)
+{
+ unsigned short i;
+
+ for (i = 0; i < CFAG12864B_SIZE; i++)
+ cfag12864b_buffer[i] = 0;
+}
+
+/*
+ * format a [128*64] matrix
+ *
+ * Pixel off: src[i] = 0
+ * Pixel on: src[i] > 0
+ */
+static void cfag12864b_format(unsigned char * matrix)
+{
+ unsigned char i, j, n;
+
+ for (i = 0; i < CFAG12864B_HEIGHT; i++)
+ for (j = 0; j < CFAG12864B_WIDTH / CFAG12864B_BPB; j++) {
+ cfag12864b_buffer[i * CFAG12864B_WIDTH / CFAG12864B_BPB +
+ j] = 0;
+ for (n = 0; n < CFAG12864B_BPB; n++)
+ if (matrix[i * CFAG12864B_WIDTH +
+ j * CFAG12864B_BPB + n])
+ cfag12864b_buffer[i * CFAG12864B_WIDTH /
+ CFAG12864B_BPB + j] |=
+ CFAG12864B_BIT(n);
+ }
+}
+
+/*
+ * blit buffer to lcd
+ */
+static void cfag12864b_blit(void)
+{
+ memcpy(cfag12864b_mem, cfag12864b_buffer, CFAG12864B_SIZE);
+}
+
+/*
+ * ----------------------
+ * end of cfag12864b code
+ * ----------------------
+ */
+
+#include <stdio.h>
+
+#define EXAMPLES 6
+
+static void example(unsigned char n)
+{
+ unsigned short i, j;
+ unsigned char matrix[CFAG12864B_WIDTH * CFAG12864B_HEIGHT];
+
+ if (n > EXAMPLES)
+ return;
+
+ printf("Example %i/%i - ", n, EXAMPLES);
+
+ switch (n) {
+ case 1:
+ printf("Draw points setting bits");
+ cfag12864b_clear();
+ for (i = 0; i < CFAG12864B_WIDTH; i += 2)
+ for (j = 0; j < CFAG12864B_HEIGHT; j += 2)
+ cfag12864b_set(i, j);
+ break;
+
+ case 2:
+ printf("Clear the LCD");
+ cfag12864b_clear();
+ break;
+
+ case 3:
+ printf("Draw rows formatting a [128*64] matrix");
+ memset(matrix, 0, CFAG12864B_WIDTH * CFAG12864B_HEIGHT);
+ for (i = 0; i < CFAG12864B_WIDTH; i++)
+ for (j = 0; j < CFAG12864B_HEIGHT; j += 2)
+ matrix[j * CFAG12864B_WIDTH + i] = 1;
+ cfag12864b_format(matrix);
+ break;
+
+ case 4:
+ printf("Fill the lcd");
+ cfag12864b_fill();
+ break;
+
+ case 5:
+ printf("Draw columns unsetting bits");
+ for (i = 0; i < CFAG12864B_WIDTH; i += 2)
+ for (j = 0; j < CFAG12864B_HEIGHT; j++)
+ cfag12864b_unset(i, j);
+ break;
+
+ case 6:
+ printf("Do negative not-ing all bits");
+ for (i = 0; i < CFAG12864B_WIDTH; i++)
+ for (j = 0; j < CFAG12864B_HEIGHT; j ++)
+ cfag12864b_not(i, j);
+ break;
+ }
+
+ puts(" - [Press Enter]");
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned char n;
+
+ if (argc != 2) {
+ printf(
+ "Sintax: %s fbdev\n"
+ "Usually: /dev/fb0, /dev/fb1...\n", argv[0]);
+ return -1;
+ }
+
+ if (cfag12864b_init(argv[1])) {
+ printf("Can't init %s fbdev\n", argv[1]);
+ return -2;
+ }
+
+ for (n = 1; n <= EXAMPLES; n++) {
+ example(n);
+ cfag12864b_blit();
+ while (getchar() != '\n');
+ }
+
+ cfag12864b_exit();
+
+ return 0;
+}
diff --git a/Documentation/auxdisplay/ks0108 b/Documentation/auxdisplay/ks0108
new file mode 100644
index 000000000..8ddda0c8c
--- /dev/null
+++ b/Documentation/auxdisplay/ks0108
@@ -0,0 +1,55 @@
+ ==========================================
+ ks0108 LCD Controller Driver Documentation
+ ==========================================
+
+License: GPLv2
+Author & Maintainer: Miguel Ojeda Sandonis
+Date: 2006-10-27
+
+
+
+--------
+0. INDEX
+--------
+
+ 1. DRIVER INFORMATION
+ 2. DEVICE INFORMATION
+ 3. WIRING
+
+
+---------------------
+1. DRIVER INFORMATION
+---------------------
+
+This driver supports the ks0108 LCD controller.
+
+
+---------------------
+2. DEVICE INFORMATION
+---------------------
+
+Manufacturer: Samsung
+Device Name: KS0108 LCD Controller
+Device Code: ks0108
+Webpage: -
+Device Webpage: -
+Type: LCD Controller (Liquid Crystal Display Controller)
+Width: 64
+Height: 64
+Colors: 2 (B/N)
+Pages: 8
+Addresses: 64 each page
+Data size: 1 byte each address
+Memory size: 8 * 64 * 1 = 512 bytes
+
+
+---------
+3. WIRING
+---------
+
+The driver supports data parallel port wiring.
+
+If you aren't building LCD related hardware, you should check
+your LCD specific wiring information in the same folder.
+
+For example, check Documentation/auxdisplay/cfag12864b.
diff --git a/Documentation/backlight/lp855x-driver.txt b/Documentation/backlight/lp855x-driver.txt
new file mode 100644
index 000000000..01bce243d
--- /dev/null
+++ b/Documentation/backlight/lp855x-driver.txt
@@ -0,0 +1,66 @@
+Kernel driver lp855x
+====================
+
+Backlight driver for LP855x ICs
+
+Supported chips:
+ Texas Instruments LP8550, LP8551, LP8552, LP8553, LP8555, LP8556 and
+ LP8557
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+
+* Brightness control
+
+Brightness can be controlled by the pwm input or the i2c command.
+The lp855x driver supports both cases.
+
+* Device attributes
+
+1) bl_ctl_mode
+Backlight control mode.
+Value : pwm based or register based
+
+2) chip_id
+The lp855x chip id.
+Value : lp8550/lp8551/lp8552/lp8553/lp8555/lp8556/lp8557
+
+Platform data for lp855x
+------------------------
+
+For supporting platform specific data, the lp855x platform data can be used.
+
+* name : Backlight driver name. If it is not defined, default name is set.
+* device_control : Value of DEVICE CONTROL register.
+* initial_brightness : Initial value of backlight brightness.
+* period_ns : Platform specific PWM period value. unit is nano.
+ Only valid when brightness is pwm input mode.
+* size_program : Total size of lp855x_rom_data.
+* rom_data : List of new eeprom/eprom registers.
+
+example 1) lp8552 platform data : i2c register mode with new eeprom data
+
+#define EEPROM_A5_ADDR 0xA5
+#define EEPROM_A5_VAL 0x4f /* EN_VSYNC=0 */
+
+static struct lp855x_rom_data lp8552_eeprom_arr[] = {
+ {EEPROM_A5_ADDR, EEPROM_A5_VAL},
+};
+
+static struct lp855x_platform_data lp8552_pdata = {
+ .name = "lcd-bl",
+ .device_control = I2C_CONFIG(LP8552),
+ .initial_brightness = INITIAL_BRT,
+ .size_program = ARRAY_SIZE(lp8552_eeprom_arr),
+ .rom_data = lp8552_eeprom_arr,
+};
+
+example 2) lp8556 platform data : pwm input mode with default rom data
+
+static struct lp855x_platform_data lp8556_pdata = {
+ .device_control = PWM_CONFIG(LP8556),
+ .initial_brightness = INITIAL_BRT,
+ .period_ns = 1000000,
+};
diff --git a/Documentation/bad_memory.txt b/Documentation/bad_memory.txt
new file mode 100644
index 000000000..df8416213
--- /dev/null
+++ b/Documentation/bad_memory.txt
@@ -0,0 +1,45 @@
+March 2008
+Jan-Simon Moeller, dl9pf@gmx.de
+
+
+How to deal with bad memory e.g. reported by memtest86+ ?
+#########################################################
+
+There are three possibilities I know of:
+
+1) Reinsert/swap the memory modules
+
+2) Buy new modules (best!) or try to exchange the memory
+ if you have spare-parts
+
+3) Use BadRAM or memmap
+
+This Howto is about number 3) .
+
+
+BadRAM
+######
+BadRAM is the actively developed and available as kernel-patch
+here: http://rick.vanrein.org/linux/badram/
+
+For more details see the BadRAM documentation.
+
+memmap
+######
+
+memmap is already in the kernel and usable as kernel-parameter at
+boot-time. Its syntax is slightly strange and you may need to
+calculate the values by yourself!
+
+Syntax to exclude a memory area (see kernel-parameters.txt for details):
+memmap=<size>$<address>
+
+Example: memtest86+ reported here errors at address 0x18691458, 0x18698424 and
+ some others. All had 0x1869xxxx in common, so I chose a pattern of
+ 0x18690000,0xffff0000.
+
+With the numbers of the example above:
+memmap=64K$0x18690000
+ or
+memmap=0x10000$0x18690000
+
diff --git a/Documentation/basic_profiling.txt b/Documentation/basic_profiling.txt
new file mode 100644
index 000000000..8764e9f70
--- /dev/null
+++ b/Documentation/basic_profiling.txt
@@ -0,0 +1,56 @@
+These instructions are deliberately very basic. If you want something clever,
+go read the real docs ;-) Please don't add more stuff, but feel free to
+correct my mistakes ;-) (mbligh@aracnet.com)
+Thanks to John Levon, Dave Hansen, et al. for help writing this.
+
+<test> is the thing you're trying to measure.
+Make sure you have the correct System.map / vmlinux referenced!
+
+It is probably easiest to use "make install" for linux and hack
+/sbin/installkernel to copy vmlinux to /boot, in addition to vmlinuz,
+config, System.map, which are usually installed by default.
+
+Readprofile
+-----------
+A recent readprofile command is needed for 2.6, such as found in util-linux
+2.12a, which can be downloaded from:
+
+http://www.kernel.org/pub/linux/utils/util-linux/
+
+Most distributions will ship it already.
+
+Add "profile=2" to the kernel command line.
+
+clear readprofile -r
+ <test>
+dump output readprofile -m /boot/System.map > captured_profile
+
+Oprofile
+--------
+
+Get the source (see Changes for required version) from
+http://oprofile.sourceforge.net/ and add "idle=poll" to the kernel command
+line.
+
+Configure with CONFIG_PROFILING=y and CONFIG_OPROFILE=y & reboot on new kernel
+
+./configure --with-kernel-support
+make install
+
+For superior results, be sure to enable the local APIC. If opreport sees
+a 0Hz CPU, APIC was not on. Be aware that idle=poll may mean a performance
+penalty.
+
+One time setup:
+ opcontrol --setup --vmlinux=/boot/vmlinux
+
+clear opcontrol --reset
+start opcontrol --start
+ <test>
+stop opcontrol --stop
+dump output opreport > output_file
+
+To only report on the kernel, run opreport -l /boot/vmlinux > output_file
+
+A reset is needed to clear old statistics, which survive a reboot.
+
diff --git a/Documentation/bcache.txt b/Documentation/bcache.txt
new file mode 100644
index 000000000..32b6c3189
--- /dev/null
+++ b/Documentation/bcache.txt
@@ -0,0 +1,448 @@
+Say you've got a big slow raid 6, and an X-25E or three. Wouldn't it be
+nice if you could use them as cache... Hence bcache.
+
+Wiki and git repositories are at:
+ http://bcache.evilpiepirate.org
+ http://evilpiepirate.org/git/linux-bcache.git
+ http://evilpiepirate.org/git/bcache-tools.git
+
+It's designed around the performance characteristics of SSDs - it only allocates
+in erase block sized buckets, and it uses a hybrid btree/log to track cached
+extants (which can be anywhere from a single sector to the bucket size). It's
+designed to avoid random writes at all costs; it fills up an erase block
+sequentially, then issues a discard before reusing it.
+
+Both writethrough and writeback caching are supported. Writeback defaults to
+off, but can be switched on and off arbitrarily at runtime. Bcache goes to
+great lengths to protect your data - it reliably handles unclean shutdown. (It
+doesn't even have a notion of a clean shutdown; bcache simply doesn't return
+writes as completed until they're on stable storage).
+
+Writeback caching can use most of the cache for buffering writes - writing
+dirty data to the backing device is always done sequentially, scanning from the
+start to the end of the index.
+
+Since random IO is what SSDs excel at, there generally won't be much benefit
+to caching large sequential IO. Bcache detects sequential IO and skips it;
+it also keeps a rolling average of the IO sizes per task, and as long as the
+average is above the cutoff it will skip all IO from that task - instead of
+caching the first 512k after every seek. Backups and large file copies should
+thus entirely bypass the cache.
+
+In the event of a data IO error on the flash it will try to recover by reading
+from disk or invalidating cache entries. For unrecoverable errors (meta data
+or dirty data), caching is automatically disabled; if dirty data was present
+in the cache it first disables writeback caching and waits for all dirty data
+to be flushed.
+
+Getting started:
+You'll need make-bcache from the bcache-tools repository. Both the cache device
+and backing device must be formatted before use.
+ make-bcache -B /dev/sdb
+ make-bcache -C /dev/sdc
+
+make-bcache has the ability to format multiple devices at the same time - if
+you format your backing devices and cache device at the same time, you won't
+have to manually attach:
+ make-bcache -B /dev/sda /dev/sdb -C /dev/sdc
+
+bcache-tools now ships udev rules, and bcache devices are known to the kernel
+immediately. Without udev, you can manually register devices like this:
+
+ echo /dev/sdb > /sys/fs/bcache/register
+ echo /dev/sdc > /sys/fs/bcache/register
+
+Registering the backing device makes the bcache device show up in /dev; you can
+now format it and use it as normal. But the first time using a new bcache
+device, it'll be running in passthrough mode until you attach it to a cache.
+See the section on attaching.
+
+The devices show up as:
+
+ /dev/bcache<N>
+
+As well as (with udev):
+
+ /dev/bcache/by-uuid/<uuid>
+ /dev/bcache/by-label/<label>
+
+To get started:
+
+ mkfs.ext4 /dev/bcache0
+ mount /dev/bcache0 /mnt
+
+You can control bcache devices through sysfs at /sys/block/bcache<N>/bcache .
+
+Cache devices are managed as sets; multiple caches per set isn't supported yet
+but will allow for mirroring of metadata and dirty data in the future. Your new
+cache set shows up as /sys/fs/bcache/<UUID>
+
+ATTACHING:
+
+After your cache device and backing device are registered, the backing device
+must be attached to your cache set to enable caching. Attaching a backing
+device to a cache set is done thusly, with the UUID of the cache set in
+/sys/fs/bcache:
+
+ echo <CSET-UUID> > /sys/block/bcache0/bcache/attach
+
+This only has to be done once. The next time you reboot, just reregister all
+your bcache devices. If a backing device has data in a cache somewhere, the
+/dev/bcache<N> device won't be created until the cache shows up - particularly
+important if you have writeback caching turned on.
+
+If you're booting up and your cache device is gone and never coming back, you
+can force run the backing device:
+
+ echo 1 > /sys/block/sdb/bcache/running
+
+(You need to use /sys/block/sdb (or whatever your backing device is called), not
+/sys/block/bcache0, because bcache0 doesn't exist yet. If you're using a
+partition, the bcache directory would be at /sys/block/sdb/sdb2/bcache)
+
+The backing device will still use that cache set if it shows up in the future,
+but all the cached data will be invalidated. If there was dirty data in the
+cache, don't expect the filesystem to be recoverable - you will have massive
+filesystem corruption, though ext4's fsck does work miracles.
+
+ERROR HANDLING:
+
+Bcache tries to transparently handle IO errors to/from the cache device without
+affecting normal operation; if it sees too many errors (the threshold is
+configurable, and defaults to 0) it shuts down the cache device and switches all
+the backing devices to passthrough mode.
+
+ - For reads from the cache, if they error we just retry the read from the
+ backing device.
+
+ - For writethrough writes, if the write to the cache errors we just switch to
+ invalidating the data at that lba in the cache (i.e. the same thing we do for
+ a write that bypasses the cache)
+
+ - For writeback writes, we currently pass that error back up to the
+ filesystem/userspace. This could be improved - we could retry it as a write
+ that skips the cache so we don't have to error the write.
+
+ - When we detach, we first try to flush any dirty data (if we were running in
+ writeback mode). It currently doesn't do anything intelligent if it fails to
+ read some of the dirty data, though.
+
+TROUBLESHOOTING PERFORMANCE:
+
+Bcache has a bunch of config options and tunables. The defaults are intended to
+be reasonable for typical desktop and server workloads, but they're not what you
+want for getting the best possible numbers when benchmarking.
+
+ - Bad write performance
+
+ If write performance is not what you expected, you probably wanted to be
+ running in writeback mode, which isn't the default (not due to a lack of
+ maturity, but simply because in writeback mode you'll lose data if something
+ happens to your SSD)
+
+ # echo writeback > /sys/block/bcache0/cache_mode
+
+ - Bad performance, or traffic not going to the SSD that you'd expect
+
+ By default, bcache doesn't cache everything. It tries to skip sequential IO -
+ because you really want to be caching the random IO, and if you copy a 10
+ gigabyte file you probably don't want that pushing 10 gigabytes of randomly
+ accessed data out of your cache.
+
+ But if you want to benchmark reads from cache, and you start out with fio
+ writing an 8 gigabyte test file - so you want to disable that.
+
+ # echo 0 > /sys/block/bcache0/bcache/sequential_cutoff
+
+ To set it back to the default (4 mb), do
+
+ # echo 4M > /sys/block/bcache0/bcache/sequential_cutoff
+
+ - Traffic's still going to the spindle/still getting cache misses
+
+ In the real world, SSDs don't always keep up with disks - particularly with
+ slower SSDs, many disks being cached by one SSD, or mostly sequential IO. So
+ you want to avoid being bottlenecked by the SSD and having it slow everything
+ down.
+
+ To avoid that bcache tracks latency to the cache device, and gradually
+ throttles traffic if the latency exceeds a threshold (it does this by
+ cranking down the sequential bypass).
+
+ You can disable this if you need to by setting the thresholds to 0:
+
+ # echo 0 > /sys/fs/bcache/<cache set>/congested_read_threshold_us
+ # echo 0 > /sys/fs/bcache/<cache set>/congested_write_threshold_us
+
+ The default is 2000 us (2 milliseconds) for reads, and 20000 for writes.
+
+ - Still getting cache misses, of the same data
+
+ One last issue that sometimes trips people up is actually an old bug, due to
+ the way cache coherency is handled for cache misses. If a btree node is full,
+ a cache miss won't be able to insert a key for the new data and the data
+ won't be written to the cache.
+
+ In practice this isn't an issue because as soon as a write comes along it'll
+ cause the btree node to be split, and you need almost no write traffic for
+ this to not show up enough to be noticeable (especially since bcache's btree
+ nodes are huge and index large regions of the device). But when you're
+ benchmarking, if you're trying to warm the cache by reading a bunch of data
+ and there's no other traffic - that can be a problem.
+
+ Solution: warm the cache by doing writes, or use the testing branch (there's
+ a fix for the issue there).
+
+SYSFS - BACKING DEVICE:
+
+Available at /sys/block/<bdev>/bcache, /sys/block/bcache*/bcache and
+(if attached) /sys/fs/bcache/<cset-uuid>/bdev*
+
+attach
+ Echo the UUID of a cache set to this file to enable caching.
+
+cache_mode
+ Can be one of either writethrough, writeback, writearound or none.
+
+clear_stats
+ Writing to this file resets the running total stats (not the day/hour/5 minute
+ decaying versions).
+
+detach
+ Write to this file to detach from a cache set. If there is dirty data in the
+ cache, it will be flushed first.
+
+dirty_data
+ Amount of dirty data for this backing device in the cache. Continuously
+ updated unlike the cache set's version, but may be slightly off.
+
+label
+ Name of underlying device.
+
+readahead
+ Size of readahead that should be performed. Defaults to 0. If set to e.g.
+ 1M, it will round cache miss reads up to that size, but without overlapping
+ existing cache entries.
+
+running
+ 1 if bcache is running (i.e. whether the /dev/bcache device exists, whether
+ it's in passthrough mode or caching).
+
+sequential_cutoff
+ A sequential IO will bypass the cache once it passes this threshold; the
+ most recent 128 IOs are tracked so sequential IO can be detected even when
+ it isn't all done at once.
+
+sequential_merge
+ If non zero, bcache keeps a list of the last 128 requests submitted to compare
+ against all new requests to determine which new requests are sequential
+ continuations of previous requests for the purpose of determining sequential
+ cutoff. This is necessary if the sequential cutoff value is greater than the
+ maximum acceptable sequential size for any single request.
+
+state
+ The backing device can be in one of four different states:
+
+ no cache: Has never been attached to a cache set.
+
+ clean: Part of a cache set, and there is no cached dirty data.
+
+ dirty: Part of a cache set, and there is cached dirty data.
+
+ inconsistent: The backing device was forcibly run by the user when there was
+ dirty data cached but the cache set was unavailable; whatever data was on the
+ backing device has likely been corrupted.
+
+stop
+ Write to this file to shut down the bcache device and close the backing
+ device.
+
+writeback_delay
+ When dirty data is written to the cache and it previously did not contain
+ any, waits some number of seconds before initiating writeback. Defaults to
+ 30.
+
+writeback_percent
+ If nonzero, bcache tries to keep around this percentage of the cache dirty by
+ throttling background writeback and using a PD controller to smoothly adjust
+ the rate.
+
+writeback_rate
+ Rate in sectors per second - if writeback_percent is nonzero, background
+ writeback is throttled to this rate. Continuously adjusted by bcache but may
+ also be set by the user.
+
+writeback_running
+ If off, writeback of dirty data will not take place at all. Dirty data will
+ still be added to the cache until it is mostly full; only meant for
+ benchmarking. Defaults to on.
+
+SYSFS - BACKING DEVICE STATS:
+
+There are directories with these numbers for a running total, as well as
+versions that decay over the past day, hour and 5 minutes; they're also
+aggregated in the cache set directory as well.
+
+bypassed
+ Amount of IO (both reads and writes) that has bypassed the cache
+
+cache_hits
+cache_misses
+cache_hit_ratio
+ Hits and misses are counted per individual IO as bcache sees them; a
+ partial hit is counted as a miss.
+
+cache_bypass_hits
+cache_bypass_misses
+ Hits and misses for IO that is intended to skip the cache are still counted,
+ but broken out here.
+
+cache_miss_collisions
+ Counts instances where data was going to be inserted into the cache from a
+ cache miss, but raced with a write and data was already present (usually 0
+ since the synchronization for cache misses was rewritten)
+
+cache_readaheads
+ Count of times readahead occurred.
+
+SYSFS - CACHE SET:
+
+Available at /sys/fs/bcache/<cset-uuid>
+
+average_key_size
+ Average data per key in the btree.
+
+bdev<0..n>
+ Symlink to each of the attached backing devices.
+
+block_size
+ Block size of the cache devices.
+
+btree_cache_size
+ Amount of memory currently used by the btree cache
+
+bucket_size
+ Size of buckets
+
+cache<0..n>
+ Symlink to each of the cache devices comprising this cache set.
+
+cache_available_percent
+ Percentage of cache device which doesn't contain dirty data, and could
+ potentially be used for writeback. This doesn't mean this space isn't used
+ for clean cached data; the unused statistic (in priority_stats) is typically
+ much lower.
+
+clear_stats
+ Clears the statistics associated with this cache
+
+dirty_data
+ Amount of dirty data is in the cache (updated when garbage collection runs).
+
+flash_vol_create
+ Echoing a size to this file (in human readable units, k/M/G) creates a thinly
+ provisioned volume backed by the cache set.
+
+io_error_halflife
+io_error_limit
+ These determines how many errors we accept before disabling the cache.
+ Each error is decayed by the half life (in # ios). If the decaying count
+ reaches io_error_limit dirty data is written out and the cache is disabled.
+
+journal_delay_ms
+ Journal writes will delay for up to this many milliseconds, unless a cache
+ flush happens sooner. Defaults to 100.
+
+root_usage_percent
+ Percentage of the root btree node in use. If this gets too high the node
+ will split, increasing the tree depth.
+
+stop
+ Write to this file to shut down the cache set - waits until all attached
+ backing devices have been shut down.
+
+tree_depth
+ Depth of the btree (A single node btree has depth 0).
+
+unregister
+ Detaches all backing devices and closes the cache devices; if dirty data is
+ present it will disable writeback caching and wait for it to be flushed.
+
+SYSFS - CACHE SET INTERNAL:
+
+This directory also exposes timings for a number of internal operations, with
+separate files for average duration, average frequency, last occurrence and max
+duration: garbage collection, btree read, btree node sorts and btree splits.
+
+active_journal_entries
+ Number of journal entries that are newer than the index.
+
+btree_nodes
+ Total nodes in the btree.
+
+btree_used_percent
+ Average fraction of btree in use.
+
+bset_tree_stats
+ Statistics about the auxiliary search trees
+
+btree_cache_max_chain
+ Longest chain in the btree node cache's hash table
+
+cache_read_races
+ Counts instances where while data was being read from the cache, the bucket
+ was reused and invalidated - i.e. where the pointer was stale after the read
+ completed. When this occurs the data is reread from the backing device.
+
+trigger_gc
+ Writing to this file forces garbage collection to run.
+
+SYSFS - CACHE DEVICE:
+
+Available at /sys/block/<cdev>/bcache
+
+block_size
+ Minimum granularity of writes - should match hardware sector size.
+
+btree_written
+ Sum of all btree writes, in (kilo/mega/giga) bytes
+
+bucket_size
+ Size of buckets
+
+cache_replacement_policy
+ One of either lru, fifo or random.
+
+discard
+ Boolean; if on a discard/TRIM will be issued to each bucket before it is
+ reused. Defaults to off, since SATA TRIM is an unqueued command (and thus
+ slow).
+
+freelist_percent
+ Size of the freelist as a percentage of nbuckets. Can be written to to
+ increase the number of buckets kept on the freelist, which lets you
+ artificially reduce the size of the cache at runtime. Mostly for testing
+ purposes (i.e. testing how different size caches affect your hit rate), but
+ since buckets are discarded when they move on to the freelist will also make
+ the SSD's garbage collection easier by effectively giving it more reserved
+ space.
+
+io_errors
+ Number of errors that have occurred, decayed by io_error_halflife.
+
+metadata_written
+ Sum of all non data writes (btree writes and all other metadata).
+
+nbuckets
+ Total buckets in this cache
+
+priority_stats
+ Statistics about how recently data in the cache has been accessed.
+ This can reveal your working set size. Unused is the percentage of
+ the cache that doesn't contain any data. Metadata is bcache's
+ metadata overhead. Average is the average priority of cache buckets.
+ Next is a list of quantiles with the priority threshold of each.
+
+written
+ Sum of all data that has been written to the cache; comparison with
+ btree_written gives the amount of write inflation in bcache.
diff --git a/Documentation/binfmt_misc.txt b/Documentation/binfmt_misc.txt
new file mode 100644
index 000000000..6b1de7058
--- /dev/null
+++ b/Documentation/binfmt_misc.txt
@@ -0,0 +1,124 @@
+ Kernel Support for miscellaneous (your favourite) Binary Formats v1.1
+ =====================================================================
+
+This Kernel feature allows you to invoke almost (for restrictions see below)
+every program by simply typing its name in the shell.
+This includes for example compiled Java(TM), Python or Emacs programs.
+
+To achieve this you must tell binfmt_misc which interpreter has to be invoked
+with which binary. Binfmt_misc recognises the binary-type by matching some bytes
+at the beginning of the file with a magic byte sequence (masking out specified
+bits) you have supplied. Binfmt_misc can also recognise a filename extension
+aka '.com' or '.exe'.
+
+First you must mount binfmt_misc:
+ mount binfmt_misc -t binfmt_misc /proc/sys/fs/binfmt_misc
+
+To actually register a new binary type, you have to set up a string looking like
+:name:type:offset:magic:mask:interpreter:flags (where you can choose the ':'
+upon your needs) and echo it to /proc/sys/fs/binfmt_misc/register.
+
+Here is what the fields mean:
+ - 'name' is an identifier string. A new /proc file will be created with this
+ name below /proc/sys/fs/binfmt_misc; cannot contain slashes '/' for obvious
+ reasons.
+ - 'type' is the type of recognition. Give 'M' for magic and 'E' for extension.
+ - 'offset' is the offset of the magic/mask in the file, counted in bytes. This
+ defaults to 0 if you omit it (i.e. you write ':name:type::magic...'). Ignored
+ when using filename extension matching.
+ - 'magic' is the byte sequence binfmt_misc is matching for. The magic string
+ may contain hex-encoded characters like \x0a or \xA4. Note that you must
+ escape any NUL bytes; parsing halts at the first one. In a shell environment
+ you might have to write \\x0a to prevent the shell from eating your \.
+ If you chose filename extension matching, this is the extension to be
+ recognised (without the '.', the \x0a specials are not allowed). Extension
+ matching is case sensitive, and slashes '/' are not allowed!
+ - 'mask' is an (optional, defaults to all 0xff) mask. You can mask out some
+ bits from matching by supplying a string like magic and as long as magic.
+ The mask is anded with the byte sequence of the file. Note that you must
+ escape any NUL bytes; parsing halts at the first one. Ignored when using
+ filename extension matching.
+ - 'interpreter' is the program that should be invoked with the binary as first
+ argument (specify the full path)
+ - 'flags' is an optional field that controls several aspects of the invocation
+ of the interpreter. It is a string of capital letters, each controls a
+ certain aspect. The following flags are supported -
+ 'P' - preserve-argv[0]. Legacy behavior of binfmt_misc is to overwrite
+ the original argv[0] with the full path to the binary. When this
+ flag is included, binfmt_misc will add an argument to the argument
+ vector for this purpose, thus preserving the original argv[0].
+ e.g. If your interp is set to /bin/foo and you run `blah` (which is
+ in /usr/local/bin), then the kernel will execute /bin/foo with
+ argv[] set to ["/bin/foo", "/usr/local/bin/blah", "blah"]. The
+ interp has to be aware of this so it can execute /usr/local/bin/blah
+ with argv[] set to ["blah"].
+ 'O' - open-binary. Legacy behavior of binfmt_misc is to pass the full path
+ of the binary to the interpreter as an argument. When this flag is
+ included, binfmt_misc will open the file for reading and pass its
+ descriptor as an argument, instead of the full path, thus allowing
+ the interpreter to execute non-readable binaries. This feature
+ should be used with care - the interpreter has to be trusted not to
+ emit the contents of the non-readable binary.
+ 'C' - credentials. Currently, the behavior of binfmt_misc is to calculate
+ the credentials and security token of the new process according to
+ the interpreter. When this flag is included, these attributes are
+ calculated according to the binary. It also implies the 'O' flag.
+ This feature should be used with care as the interpreter
+ will run with root permissions when a setuid binary owned by root
+ is run with binfmt_misc.
+
+
+There are some restrictions:
+ - the whole register string may not exceed 1920 characters
+ - the magic must reside in the first 128 bytes of the file, i.e.
+ offset+size(magic) has to be less than 128
+ - the interpreter string may not exceed 127 characters
+
+To use binfmt_misc you have to mount it first. You can mount it with
+"mount -t binfmt_misc none /proc/sys/fs/binfmt_misc" command, or you can add
+a line "none /proc/sys/fs/binfmt_misc binfmt_misc defaults 0 0" to your
+/etc/fstab so it auto mounts on boot.
+
+You may want to add the binary formats in one of your /etc/rc scripts during
+boot-up. Read the manual of your init program to figure out how to do this
+right.
+
+Think about the order of adding entries! Later added entries are matched first!
+
+
+A few examples (assumed you are in /proc/sys/fs/binfmt_misc):
+
+- enable support for em86 (like binfmt_em86, for Alpha AXP only):
+ echo ':i386:M::\x7fELF\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff:/bin/em86:' > register
+ echo ':i486:M::\x7fELF\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x06:\xff\xff\xff\xff\xff\xfe\xfe\xff\xff\xff\xff\xff\xff\xff\xff\xff\xfb\xff\xff:/bin/em86:' > register
+
+- enable support for packed DOS applications (pre-configured dosemu hdimages):
+ echo ':DEXE:M::\x0eDEX::/usr/bin/dosexec:' > register
+
+- enable support for Windows executables using wine:
+ echo ':DOSWin:M::MZ::/usr/local/bin/wine:' > register
+
+For java support see Documentation/java.txt
+
+
+You can enable/disable binfmt_misc or one binary type by echoing 0 (to disable)
+or 1 (to enable) to /proc/sys/fs/binfmt_misc/status or /proc/.../the_name.
+Catting the file tells you the current status of binfmt_misc/the entry.
+
+You can remove one entry or all entries by echoing -1 to /proc/.../the_name
+or /proc/sys/fs/binfmt_misc/status.
+
+
+HINTS:
+======
+
+If you want to pass special arguments to your interpreter, you can
+write a wrapper script for it. See Documentation/java.txt for an
+example.
+
+Your interpreter should NOT look in the PATH for the filename; the kernel
+passes it the full filename (or the file descriptor) to use. Using $PATH can
+cause unexpected behaviour and can be a security hazard.
+
+
+Richard Günther <rguenth@tat.physik.uni-tuebingen.de>
diff --git a/Documentation/blackfin/00-INDEX b/Documentation/blackfin/00-INDEX
new file mode 100644
index 000000000..c54fcdd4a
--- /dev/null
+++ b/Documentation/blackfin/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+ - This file
+Makefile
+ - Makefile for gptimers example file.
+bfin-gpio-notes.txt
+ - Notes in developing/using bfin-gpio driver.
+bfin-spi-notes.txt
+ - Notes for using bfin spi bus driver.
+gptimers-example.c
+ - gptimers example
diff --git a/Documentation/blackfin/Makefile b/Documentation/blackfin/Makefile
new file mode 100644
index 000000000..6782c58fb
--- /dev/null
+++ b/Documentation/blackfin/Makefile
@@ -0,0 +1,5 @@
+ifneq ($(CONFIG_BLACKFIN),)
+ifneq ($(CONFIG_BFIN_GPTIMERS),)
+obj-m := gptimers-example.o
+endif
+endif
diff --git a/Documentation/blackfin/bfin-gpio-notes.txt b/Documentation/blackfin/bfin-gpio-notes.txt
new file mode 100644
index 000000000..d245f39c3
--- /dev/null
+++ b/Documentation/blackfin/bfin-gpio-notes.txt
@@ -0,0 +1,71 @@
+/*
+ * File: Documentation/blackfin/bfin-gpio-notes.txt
+ * Based on:
+ * Author:
+ *
+ * Created: $Id: bfin-gpio-note.txt 2008-11-24 16:42 grafyang $
+ * Description: This file contains the notes in developing/using bfin-gpio.
+ *
+ *
+ * Rev:
+ *
+ * Modified:
+ * Copyright 2004-2008 Analog Devices Inc.
+ *
+ * Bugs: Enter bugs at http://blackfin.uclinux.org/
+ *
+ */
+
+
+1. Blackfin GPIO introduction
+
+ There are many GPIO pins on Blackfin. Most of these pins are muxed to
+ multi-functions. They can be configured as peripheral, or just as GPIO,
+ configured to input with interrupt enabled, or output.
+
+ For detailed information, please see "arch/blackfin/kernel/bfin_gpio.c",
+ or the relevant HRM.
+
+
+2. Avoiding resource conflict
+
+ Followed function groups are used to avoiding resource conflict,
+ - Use the pin as peripheral,
+ int peripheral_request(unsigned short per, const char *label);
+ int peripheral_request_list(const unsigned short per[], const char *label);
+ void peripheral_free(unsigned short per);
+ void peripheral_free_list(const unsigned short per[]);
+ - Use the pin as GPIO,
+ int bfin_gpio_request(unsigned gpio, const char *label);
+ void bfin_gpio_free(unsigned gpio);
+ - Use the pin as GPIO interrupt,
+ int bfin_gpio_irq_request(unsigned gpio, const char *label);
+ void bfin_gpio_irq_free(unsigned gpio);
+
+ The request functions will record the function state for a certain pin,
+ the free functions will clear its function state.
+ Once a pin is requested, it can't be requested again before it is freed by
+ previous caller, otherwise kernel will dump stacks, and the request
+ function fail.
+ These functions are wrapped by other functions, most of the users need not
+ care.
+
+
+3. But there are some exceptions
+ - Kernel permit the identical GPIO be requested both as GPIO and GPIO
+ interrupt.
+ Some drivers, like gpio-keys, need this behavior. Kernel only print out
+ warning messages like,
+ bfin-gpio: GPIO 24 is already reserved by gpio-keys: BTN0, and you are
+configuring it as IRQ!
+
+ Note: Consider the case that, if there are two drivers need the
+ identical GPIO, one of them use it as GPIO, the other use it as
+ GPIO interrupt. This will really cause resource conflict. So if
+ there is any abnormal driver behavior, please check the bfin-gpio
+ warning messages.
+
+ - Kernel permit the identical GPIO be requested from the same driver twice.
+
+
+
diff --git a/Documentation/blackfin/bfin-spi-notes.txt b/Documentation/blackfin/bfin-spi-notes.txt
new file mode 100644
index 000000000..eae6eaf2a
--- /dev/null
+++ b/Documentation/blackfin/bfin-spi-notes.txt
@@ -0,0 +1,16 @@
+SPI Chip Select behavior:
+
+With the Blackfin on-chip SPI peripheral, there is some logic tied to the CPHA
+bit whether the Slave Select Line is controlled by hardware (CPHA=0) or
+controlled by software (CPHA=1). However, the Linux SPI bus driver assumes that
+the Slave Select is always under software control and being asserted during
+the entire SPI transfer. - And not just bits_per_word duration.
+
+In most cases you can utilize SPI MODE_3 instead of MODE_0 to work-around this
+behavior. If your SPI slave device in question requires SPI MODE_0 or MODE_2
+timing, you can utilize the GPIO controlled SPI Slave Select option instead.
+In this case, you should use GPIO based CS for all of your slaves and not just
+the ones using mode 0 or 2 in order to guarantee correct CS toggling behavior.
+
+You can even use the same pin whose peripheral role is a SSEL,
+but use it as a GPIO instead.
diff --git a/Documentation/blackfin/gptimers-example.c b/Documentation/blackfin/gptimers-example.c
new file mode 100644
index 000000000..b1bd6340e
--- /dev/null
+++ b/Documentation/blackfin/gptimers-example.c
@@ -0,0 +1,83 @@
+/*
+ * Simple gptimers example
+ * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:drivers:gptimers
+ *
+ * Copyright 2007-2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/gptimers.h>
+#include <asm/portmux.h>
+
+/* ... random driver includes ... */
+
+#define DRIVER_NAME "gptimer_example"
+
+struct gptimer_data {
+ uint32_t period, width;
+};
+static struct gptimer_data data;
+
+/* ... random driver state ... */
+
+static irqreturn_t gptimer_example_irq(int irq, void *dev_id)
+{
+ struct gptimer_data *data = dev_id;
+
+ /* make sure it was our timer which caused the interrupt */
+ if (!get_gptimer_intr(TIMER5_id))
+ return IRQ_NONE;
+
+ /* read the width/period values that were captured for the waveform */
+ data->width = get_gptimer_pwidth(TIMER5_id);
+ data->period = get_gptimer_period(TIMER5_id);
+
+ /* acknowledge the interrupt */
+ clear_gptimer_intr(TIMER5_id);
+
+ /* tell the upper layers we took care of things */
+ return IRQ_HANDLED;
+}
+
+/* ... random driver code ... */
+
+static int __init gptimer_example_init(void)
+{
+ int ret;
+
+ /* grab the peripheral pins */
+ ret = peripheral_request(P_TMR5, DRIVER_NAME);
+ if (ret) {
+ printk(KERN_NOTICE DRIVER_NAME ": peripheral request failed\n");
+ return ret;
+ }
+
+ /* grab the IRQ for the timer */
+ ret = request_irq(IRQ_TIMER5, gptimer_example_irq, IRQF_SHARED, DRIVER_NAME, &data);
+ if (ret) {
+ printk(KERN_NOTICE DRIVER_NAME ": IRQ request failed\n");
+ peripheral_free(P_TMR5);
+ return ret;
+ }
+
+ /* setup the timer and enable it */
+ set_gptimer_config(TIMER5_id, WDTH_CAP | PULSE_HI | PERIOD_CNT | IRQ_ENA);
+ enable_gptimers(TIMER5bit);
+
+ return 0;
+}
+module_init(gptimer_example_init);
+
+static void __exit gptimer_example_exit(void)
+{
+ disable_gptimers(TIMER5bit);
+ free_irq(IRQ_TIMER5, &data);
+ peripheral_free(P_TMR5);
+}
+module_exit(gptimer_example_exit);
+
+MODULE_LICENSE("BSD");
diff --git a/Documentation/block/00-INDEX b/Documentation/block/00-INDEX
new file mode 100644
index 000000000..e840b4761
--- /dev/null
+++ b/Documentation/block/00-INDEX
@@ -0,0 +1,28 @@
+00-INDEX
+ - This file
+biodoc.txt
+ - Notes on the Generic Block Layer Rewrite in Linux 2.5
+capability.txt
+ - Generic Block Device Capability (/sys/block/<device>/capability)
+cfq-iosched.txt
+ - CFQ IO scheduler tunables
+cmdline-partition.txt
+ - how to specify block device partitions on kernel command line
+data-integrity.txt
+ - Block data integrity
+deadline-iosched.txt
+ - Deadline IO scheduler tunables
+ioprio.txt
+ - Block io priorities (in CFQ scheduler)
+null_blk.txt
+ - Null block for block-layer benchmarking.
+queue-sysfs.txt
+ - Queue's sysfs entries
+request.txt
+ - The members of struct request (in include/linux/blkdev.h)
+stat.txt
+ - Block layer statistics in /sys/block/<device>/stat
+switching-sched.txt
+ - Switching I/O schedulers at runtime
+writeback_cache_control.txt
+ - Control of volatile write back caches
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
new file mode 100644
index 000000000..fd12c0d83
--- /dev/null
+++ b/Documentation/block/biodoc.txt
@@ -0,0 +1,1180 @@
+ Notes on the Generic Block Layer Rewrite in Linux 2.5
+ =====================================================
+
+Notes Written on Jan 15, 2002:
+ Jens Axboe <jens.axboe@oracle.com>
+ Suparna Bhattacharya <suparna@in.ibm.com>
+
+Last Updated May 2, 2002
+September 2003: Updated I/O Scheduler portions
+ Nick Piggin <npiggin@kernel.dk>
+
+Introduction:
+
+These are some notes describing some aspects of the 2.5 block layer in the
+context of the bio rewrite. The idea is to bring out some of the key
+changes and a glimpse of the rationale behind those changes.
+
+Please mail corrections & suggestions to suparna@in.ibm.com.
+
+Credits:
+---------
+
+2.5 bio rewrite:
+ Jens Axboe <jens.axboe@oracle.com>
+
+Many aspects of the generic block layer redesign were driven by and evolved
+over discussions, prior patches and the collective experience of several
+people. See sections 8 and 9 for a list of some related references.
+
+The following people helped with review comments and inputs for this
+document:
+ Christoph Hellwig <hch@infradead.org>
+ Arjan van de Ven <arjanv@redhat.com>
+ Randy Dunlap <rdunlap@xenotime.net>
+ Andre Hedrick <andre@linux-ide.org>
+
+The following people helped with fixes/contributions to the bio patches
+while it was still work-in-progress:
+ David S. Miller <davem@redhat.com>
+
+
+Description of Contents:
+------------------------
+
+1. Scope for tuning of logic to various needs
+ 1.1 Tuning based on device or low level driver capabilities
+ - Per-queue parameters
+ - Highmem I/O support
+ - I/O scheduler modularization
+ 1.2 Tuning based on high level requirements/capabilities
+ 1.2.1 Request Priority/Latency
+ 1.3 Direct access/bypass to lower layers for diagnostics and special
+ device operations
+ 1.3.1 Pre-built commands
+2. New flexible and generic but minimalist i/o structure or descriptor
+ (instead of using buffer heads at the i/o layer)
+ 2.1 Requirements/Goals addressed
+ 2.2 The bio struct in detail (multi-page io unit)
+ 2.3 Changes in the request structure
+3. Using bios
+ 3.1 Setup/teardown (allocation, splitting)
+ 3.2 Generic bio helper routines
+ 3.2.1 Traversing segments and completion units in a request
+ 3.2.2 Setting up DMA scatterlists
+ 3.2.3 I/O completion
+ 3.2.4 Implications for drivers that do not interpret bios (don't handle
+ multiple segments)
+ 3.2.5 Request command tagging
+ 3.3 I/O submission
+4. The I/O scheduler
+5. Scalability related changes
+ 5.1 Granular locking: Removal of io_request_lock
+ 5.2 Prepare for transition to 64 bit sector_t
+6. Other Changes/Implications
+ 6.1 Partition re-mapping handled by the generic block layer
+7. A few tips on migration of older drivers
+8. A list of prior/related/impacted patches/ideas
+9. Other References/Discussion Threads
+
+---------------------------------------------------------------------------
+
+Bio Notes
+--------
+
+Let us discuss the changes in the context of how some overall goals for the
+block layer are addressed.
+
+1. Scope for tuning the generic logic to satisfy various requirements
+
+The block layer design supports adaptable abstractions to handle common
+processing with the ability to tune the logic to an appropriate extent
+depending on the nature of the device and the requirements of the caller.
+One of the objectives of the rewrite was to increase the degree of tunability
+and to enable higher level code to utilize underlying device/driver
+capabilities to the maximum extent for better i/o performance. This is
+important especially in the light of ever improving hardware capabilities
+and application/middleware software designed to take advantage of these
+capabilities.
+
+1.1 Tuning based on low level device / driver capabilities
+
+Sophisticated devices with large built-in caches, intelligent i/o scheduling
+optimizations, high memory DMA support, etc may find some of the
+generic processing an overhead, while for less capable devices the
+generic functionality is essential for performance or correctness reasons.
+Knowledge of some of the capabilities or parameters of the device should be
+used at the generic block layer to take the right decisions on
+behalf of the driver.
+
+How is this achieved ?
+
+Tuning at a per-queue level:
+
+i. Per-queue limits/values exported to the generic layer by the driver
+
+Various parameters that the generic i/o scheduler logic uses are set at
+a per-queue level (e.g maximum request size, maximum number of segments in
+a scatter-gather list, hardsect size)
+
+Some parameters that were earlier available as global arrays indexed by
+major/minor are now directly associated with the queue. Some of these may
+move into the block device structure in the future. Some characteristics
+have been incorporated into a queue flags field rather than separate fields
+in themselves. There are blk_queue_xxx functions to set the parameters,
+rather than update the fields directly
+
+Some new queue property settings:
+
+ blk_queue_bounce_limit(q, u64 dma_address)
+ Enable I/O to highmem pages, dma_address being the
+ limit. No highmem default.
+
+ blk_queue_max_sectors(q, max_sectors)
+ Sets two variables that limit the size of the request.
+
+ - The request queue's max_sectors, which is a soft size in
+ units of 512 byte sectors, and could be dynamically varied
+ by the core kernel.
+
+ - The request queue's max_hw_sectors, which is a hard limit
+ and reflects the maximum size request a driver can handle
+ in units of 512 byte sectors.
+
+ The default for both max_sectors and max_hw_sectors is
+ 255. The upper limit of max_sectors is 1024.
+
+ blk_queue_max_phys_segments(q, max_segments)
+ Maximum physical segments you can handle in a request. 128
+ default (driver limit). (See 3.2.2)
+
+ blk_queue_max_hw_segments(q, max_segments)
+ Maximum dma segments the hardware can handle in a request. 128
+ default (host adapter limit, after dma remapping).
+ (See 3.2.2)
+
+ blk_queue_max_segment_size(q, max_seg_size)
+ Maximum size of a clustered segment, 64kB default.
+
+ blk_queue_hardsect_size(q, hardsect_size)
+ Lowest possible sector size that the hardware can operate
+ on, 512 bytes default.
+
+New queue flags:
+
+ QUEUE_FLAG_CLUSTER (see 3.2.2)
+ QUEUE_FLAG_QUEUED (see 3.2.4)
+
+
+ii. High-mem i/o capabilities are now considered the default
+
+The generic bounce buffer logic, present in 2.4, where the block layer would
+by default copyin/out i/o requests on high-memory buffers to low-memory buffers
+assuming that the driver wouldn't be able to handle it directly, has been
+changed in 2.5. The bounce logic is now applied only for memory ranges
+for which the device cannot handle i/o. A driver can specify this by
+setting the queue bounce limit for the request queue for the device
+(blk_queue_bounce_limit()). This avoids the inefficiencies of the copyin/out
+where a device is capable of handling high memory i/o.
+
+In order to enable high-memory i/o where the device is capable of supporting
+it, the pci dma mapping routines and associated data structures have now been
+modified to accomplish a direct page -> bus translation, without requiring
+a virtual address mapping (unlike the earlier scheme of virtual address
+-> bus translation). So this works uniformly for high-memory pages (which
+do not have a corresponding kernel virtual address space mapping) and
+low-memory pages.
+
+Note: Please refer to Documentation/DMA-API-HOWTO.txt for a discussion
+on PCI high mem DMA aspects and mapping of scatter gather lists, and support
+for 64 bit PCI.
+
+Special handling is required only for cases where i/o needs to happen on
+pages at physical memory addresses beyond what the device can support. In these
+cases, a bounce bio representing a buffer from the supported memory range
+is used for performing the i/o with copyin/copyout as needed depending on
+the type of the operation. For example, in case of a read operation, the
+data read has to be copied to the original buffer on i/o completion, so a
+callback routine is set up to do this, while for write, the data is copied
+from the original buffer to the bounce buffer prior to issuing the
+operation. Since an original buffer may be in a high memory area that's not
+mapped in kernel virtual addr, a kmap operation may be required for
+performing the copy, and special care may be needed in the completion path
+as it may not be in irq context. Special care is also required (by way of
+GFP flags) when allocating bounce buffers, to avoid certain highmem
+deadlock possibilities.
+
+It is also possible that a bounce buffer may be allocated from high-memory
+area that's not mapped in kernel virtual addr, but within the range that the
+device can use directly; so the bounce page may need to be kmapped during
+copy operations. [Note: This does not hold in the current implementation,
+though]
+
+There are some situations when pages from high memory may need to
+be kmapped, even if bounce buffers are not necessary. For example a device
+may need to abort DMA operations and revert to PIO for the transfer, in
+which case a virtual mapping of the page is required. For SCSI it is also
+done in some scenarios where the low level driver cannot be trusted to
+handle a single sg entry correctly. The driver is expected to perform the
+kmaps as needed on such occasions using the __bio_kmap_atomic and bio_kmap_irq
+routines as appropriate. A driver could also use the blk_queue_bounce()
+routine on its own to bounce highmem i/o to low memory for specific requests
+if so desired.
+
+iii. The i/o scheduler algorithm itself can be replaced/set as appropriate
+
+As in 2.4, it is possible to plugin a brand new i/o scheduler for a particular
+queue or pick from (copy) existing generic schedulers and replace/override
+certain portions of it. The 2.5 rewrite provides improved modularization
+of the i/o scheduler. There are more pluggable callbacks, e.g for init,
+add request, extract request, which makes it possible to abstract specific
+i/o scheduling algorithm aspects and details outside of the generic loop.
+It also makes it possible to completely hide the implementation details of
+the i/o scheduler from block drivers.
+
+I/O scheduler wrappers are to be used instead of accessing the queue directly.
+See section 4. The I/O scheduler for details.
+
+1.2 Tuning Based on High level code capabilities
+
+i. Application capabilities for raw i/o
+
+This comes from some of the high-performance database/middleware
+requirements where an application prefers to make its own i/o scheduling
+decisions based on an understanding of the access patterns and i/o
+characteristics
+
+ii. High performance filesystems or other higher level kernel code's
+capabilities
+
+Kernel components like filesystems could also take their own i/o scheduling
+decisions for optimizing performance. Journalling filesystems may need
+some control over i/o ordering.
+
+What kind of support exists at the generic block layer for this ?
+
+The flags and rw fields in the bio structure can be used for some tuning
+from above e.g indicating that an i/o is just a readahead request, or priority
+settings (currently unused). As far as user applications are concerned they
+would need an additional mechanism either via open flags or ioctls, or some
+other upper level mechanism to communicate such settings to block.
+
+1.2.1 Request Priority/Latency
+
+Todo/Under discussion:
+Arjan's proposed request priority scheme allows higher levels some broad
+ control (high/med/low) over the priority of an i/o request vs other pending
+ requests in the queue. For example it allows reads for bringing in an
+ executable page on demand to be given a higher priority over pending write
+ requests which haven't aged too much on the queue. Potentially this priority
+ could even be exposed to applications in some manner, providing higher level
+ tunability. Time based aging avoids starvation of lower priority
+ requests. Some bits in the bi_rw flags field in the bio structure are
+ intended to be used for this priority information.
+
+
+1.3 Direct Access to Low level Device/Driver Capabilities (Bypass mode)
+ (e.g Diagnostics, Systems Management)
+
+There are situations where high-level code needs to have direct access to
+the low level device capabilities or requires the ability to issue commands
+to the device bypassing some of the intermediate i/o layers.
+These could, for example, be special control commands issued through ioctl
+interfaces, or could be raw read/write commands that stress the drive's
+capabilities for certain kinds of fitness tests. Having direct interfaces at
+multiple levels without having to pass through upper layers makes
+it possible to perform bottom up validation of the i/o path, layer by
+layer, starting from the media.
+
+The normal i/o submission interfaces, e.g submit_bio, could be bypassed
+for specially crafted requests which such ioctl or diagnostics
+interfaces would typically use, and the elevator add_request routine
+can instead be used to directly insert such requests in the queue or preferably
+the blk_do_rq routine can be used to place the request on the queue and
+wait for completion. Alternatively, sometimes the caller might just
+invoke a lower level driver specific interface with the request as a
+parameter.
+
+If the request is a means for passing on special information associated with
+the command, then such information is associated with the request->special
+field (rather than misuse the request->buffer field which is meant for the
+request data buffer's virtual mapping).
+
+For passing request data, the caller must build up a bio descriptor
+representing the concerned memory buffer if the underlying driver interprets
+bio segments or uses the block layer end*request* functions for i/o
+completion. Alternatively one could directly use the request->buffer field to
+specify the virtual address of the buffer, if the driver expects buffer
+addresses passed in this way and ignores bio entries for the request type
+involved. In the latter case, the driver would modify and manage the
+request->buffer, request->sector and request->nr_sectors or
+request->current_nr_sectors fields itself rather than using the block layer
+end_request or end_that_request_first completion interfaces.
+(See 2.3 or Documentation/block/request.txt for a brief explanation of
+the request structure fields)
+
+[TBD: end_that_request_last should be usable even in this case;
+Perhaps an end_that_direct_request_first routine could be implemented to make
+handling direct requests easier for such drivers; Also for drivers that
+expect bios, a helper function could be provided for setting up a bio
+corresponding to a data buffer]
+
+<JENS: I dont understand the above, why is end_that_request_first() not
+usable? Or _last for that matter. I must be missing something>
+<SUP: What I meant here was that if the request doesn't have a bio, then
+ end_that_request_first doesn't modify nr_sectors or current_nr_sectors,
+ and hence can't be used for advancing request state settings on the
+ completion of partial transfers. The driver has to modify these fields
+ directly by hand.
+ This is because end_that_request_first only iterates over the bio list,
+ and always returns 0 if there are none associated with the request.
+ _last works OK in this case, and is not a problem, as I mentioned earlier
+>
+
+1.3.1 Pre-built Commands
+
+A request can be created with a pre-built custom command to be sent directly
+to the device. The cmd block in the request structure has room for filling
+in the command bytes. (i.e rq->cmd is now 16 bytes in size, and meant for
+command pre-building, and the type of the request is now indicated
+through rq->flags instead of via rq->cmd)
+
+The request structure flags can be set up to indicate the type of request
+in such cases (REQ_PC: direct packet command passed to driver, REQ_BLOCK_PC:
+packet command issued via blk_do_rq, REQ_SPECIAL: special request).
+
+It can help to pre-build device commands for requests in advance.
+Drivers can now specify a request prepare function (q->prep_rq_fn) that the
+block layer would invoke to pre-build device commands for a given request,
+or perform other preparatory processing for the request. This is routine is
+called by elv_next_request(), i.e. typically just before servicing a request.
+(The prepare function would not be called for requests that have REQ_DONTPREP
+enabled)
+
+Aside:
+ Pre-building could possibly even be done early, i.e before placing the
+ request on the queue, rather than construct the command on the fly in the
+ driver while servicing the request queue when it may affect latencies in
+ interrupt context or responsiveness in general. One way to add early
+ pre-building would be to do it whenever we fail to merge on a request.
+ Now REQ_NOMERGE is set in the request flags to skip this one in the future,
+ which means that it will not change before we feed it to the device. So
+ the pre-builder hook can be invoked there.
+
+
+2. Flexible and generic but minimalist i/o structure/descriptor.
+
+2.1 Reason for a new structure and requirements addressed
+
+Prior to 2.5, buffer heads were used as the unit of i/o at the generic block
+layer, and the low level request structure was associated with a chain of
+buffer heads for a contiguous i/o request. This led to certain inefficiencies
+when it came to large i/o requests and readv/writev style operations, as it
+forced such requests to be broken up into small chunks before being passed
+on to the generic block layer, only to be merged by the i/o scheduler
+when the underlying device was capable of handling the i/o in one shot.
+Also, using the buffer head as an i/o structure for i/os that didn't originate
+from the buffer cache unnecessarily added to the weight of the descriptors
+which were generated for each such chunk.
+
+The following were some of the goals and expectations considered in the
+redesign of the block i/o data structure in 2.5.
+
+i. Should be appropriate as a descriptor for both raw and buffered i/o -
+ avoid cache related fields which are irrelevant in the direct/page i/o path,
+ or filesystem block size alignment restrictions which may not be relevant
+ for raw i/o.
+ii. Ability to represent high-memory buffers (which do not have a virtual
+ address mapping in kernel address space).
+iii.Ability to represent large i/os w/o unnecessarily breaking them up (i.e
+ greater than PAGE_SIZE chunks in one shot)
+iv. At the same time, ability to retain independent identity of i/os from
+ different sources or i/o units requiring individual completion (e.g. for
+ latency reasons)
+v. Ability to represent an i/o involving multiple physical memory segments
+ (including non-page aligned page fragments, as specified via readv/writev)
+ without unnecessarily breaking it up, if the underlying device is capable of
+ handling it.
+vi. Preferably should be based on a memory descriptor structure that can be
+ passed around different types of subsystems or layers, maybe even
+ networking, without duplication or extra copies of data/descriptor fields
+ themselves in the process
+vii.Ability to handle the possibility of splits/merges as the structure passes
+ through layered drivers (lvm, md, evms), with minimal overhead.
+
+The solution was to define a new structure (bio) for the block layer,
+instead of using the buffer head structure (bh) directly, the idea being
+avoidance of some associated baggage and limitations. The bio structure
+is uniformly used for all i/o at the block layer ; it forms a part of the
+bh structure for buffered i/o, and in the case of raw/direct i/o kiobufs are
+mapped to bio structures.
+
+2.2 The bio struct
+
+The bio structure uses a vector representation pointing to an array of tuples
+of <page, offset, len> to describe the i/o buffer, and has various other
+fields describing i/o parameters and state that needs to be maintained for
+performing the i/o.
+
+Notice that this representation means that a bio has no virtual address
+mapping at all (unlike buffer heads).
+
+struct bio_vec {
+ struct page *bv_page;
+ unsigned short bv_len;
+ unsigned short bv_offset;
+};
+
+/*
+ * main unit of I/O for the block layer and lower layers (ie drivers)
+ */
+struct bio {
+ struct bio *bi_next; /* request queue link */
+ struct block_device *bi_bdev; /* target device */
+ unsigned long bi_flags; /* status, command, etc */
+ unsigned long bi_rw; /* low bits: r/w, high: priority */
+
+ unsigned int bi_vcnt; /* how may bio_vec's */
+ struct bvec_iter bi_iter; /* current index into bio_vec array */
+
+ unsigned int bi_size; /* total size in bytes */
+ unsigned short bi_phys_segments; /* segments after physaddr coalesce*/
+ unsigned short bi_hw_segments; /* segments after DMA remapping */
+ unsigned int bi_max; /* max bio_vecs we can hold
+ used as index into pool */
+ struct bio_vec *bi_io_vec; /* the actual vec list */
+ bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
+ atomic_t bi_cnt; /* pin count: free when it hits zero */
+ void *bi_private;
+};
+
+With this multipage bio design:
+
+- Large i/os can be sent down in one go using a bio_vec list consisting
+ of an array of <page, offset, len> fragments (similar to the way fragments
+ are represented in the zero-copy network code)
+- Splitting of an i/o request across multiple devices (as in the case of
+ lvm or raid) is achieved by cloning the bio (where the clone points to
+ the same bi_io_vec array, but with the index and size accordingly modified)
+- A linked list of bios is used as before for unrelated merges (*) - this
+ avoids reallocs and makes independent completions easier to handle.
+- Code that traverses the req list can find all the segments of a bio
+ by using rq_for_each_segment. This handles the fact that a request
+ has multiple bios, each of which can have multiple segments.
+- Drivers which can't process a large bio in one shot can use the bi_iter
+ field to keep track of the next bio_vec entry to process.
+ (e.g a 1MB bio_vec needs to be handled in max 128kB chunks for IDE)
+ [TBD: Should preferably also have a bi_voffset and bi_vlen to avoid modifying
+ bi_offset an len fields]
+
+(*) unrelated merges -- a request ends up containing two or more bios that
+ didn't originate from the same place.
+
+bi_end_io() i/o callback gets called on i/o completion of the entire bio.
+
+At a lower level, drivers build a scatter gather list from the merged bios.
+The scatter gather list is in the form of an array of <page, offset, len>
+entries with their corresponding dma address mappings filled in at the
+appropriate time. As an optimization, contiguous physical pages can be
+covered by a single entry where <page> refers to the first page and <len>
+covers the range of pages (up to 16 contiguous pages could be covered this
+way). There is a helper routine (blk_rq_map_sg) which drivers can use to build
+the sg list.
+
+Note: Right now the only user of bios with more than one page is ll_rw_kio,
+which in turn means that only raw I/O uses it (direct i/o may not work
+right now). The intent however is to enable clustering of pages etc to
+become possible. The pagebuf abstraction layer from SGI also uses multi-page
+bios, but that is currently not included in the stock development kernels.
+The same is true of Andrew Morton's work-in-progress multipage bio writeout
+and readahead patches.
+
+2.3 Changes in the Request Structure
+
+The request structure is the structure that gets passed down to low level
+drivers. The block layer make_request function builds up a request structure,
+places it on the queue and invokes the drivers request_fn. The driver makes
+use of block layer helper routine elv_next_request to pull the next request
+off the queue. Control or diagnostic functions might bypass block and directly
+invoke underlying driver entry points passing in a specially constructed
+request structure.
+
+Only some relevant fields (mainly those which changed or may be referred
+to in some of the discussion here) are listed below, not necessarily in
+the order in which they occur in the structure (see include/linux/blkdev.h)
+Refer to Documentation/block/request.txt for details about all the request
+structure fields and a quick reference about the layers which are
+supposed to use or modify those fields.
+
+struct request {
+ struct list_head queuelist; /* Not meant to be directly accessed by
+ the driver.
+ Used by q->elv_next_request_fn
+ rq->queue is gone
+ */
+ .
+ .
+ unsigned char cmd[16]; /* prebuilt command data block */
+ unsigned long flags; /* also includes earlier rq->cmd settings */
+ .
+ .
+ sector_t sector; /* this field is now of type sector_t instead of int
+ preparation for 64 bit sectors */
+ .
+ .
+
+ /* Number of scatter-gather DMA addr+len pairs after
+ * physical address coalescing is performed.
+ */
+ unsigned short nr_phys_segments;
+
+ /* Number of scatter-gather addr+len pairs after
+ * physical and DMA remapping hardware coalescing is performed.
+ * This is the number of scatter-gather entries the driver
+ * will actually have to deal with after DMA mapping is done.
+ */
+ unsigned short nr_hw_segments;
+
+ /* Various sector counts */
+ unsigned long nr_sectors; /* no. of sectors left: driver modifiable */
+ unsigned long hard_nr_sectors; /* block internal copy of above */
+ unsigned int current_nr_sectors; /* no. of sectors left in the
+ current segment:driver modifiable */
+ unsigned long hard_cur_sectors; /* block internal copy of the above */
+ .
+ .
+ int tag; /* command tag associated with request */
+ void *special; /* same as before */
+ char *buffer; /* valid only for low memory buffers up to
+ current_nr_sectors */
+ .
+ .
+ struct bio *bio, *biotail; /* bio list instead of bh */
+ struct request_list *rl;
+}
+
+See the rq_flag_bits definitions for an explanation of the various flags
+available. Some bits are used by the block layer or i/o scheduler.
+
+The behaviour of the various sector counts are almost the same as before,
+except that since we have multi-segment bios, current_nr_sectors refers
+to the numbers of sectors in the current segment being processed which could
+be one of the many segments in the current bio (i.e i/o completion unit).
+The nr_sectors value refers to the total number of sectors in the whole
+request that remain to be transferred (no change). The purpose of the
+hard_xxx values is for block to remember these counts every time it hands
+over the request to the driver. These values are updated by block on
+end_that_request_first, i.e. every time the driver completes a part of the
+transfer and invokes block end*request helpers to mark this. The
+driver should not modify these values. The block layer sets up the
+nr_sectors and current_nr_sectors fields (based on the corresponding
+hard_xxx values and the number of bytes transferred) and updates it on
+every transfer that invokes end_that_request_first. It does the same for the
+buffer, bio, bio->bi_iter fields too.
+
+The buffer field is just a virtual address mapping of the current segment
+of the i/o buffer in cases where the buffer resides in low-memory. For high
+memory i/o, this field is not valid and must not be used by drivers.
+
+Code that sets up its own request structures and passes them down to
+a driver needs to be careful about interoperation with the block layer helper
+functions which the driver uses. (Section 1.3)
+
+3. Using bios
+
+3.1 Setup/Teardown
+
+There are routines for managing the allocation, and reference counting, and
+freeing of bios (bio_alloc, bio_get, bio_put).
+
+This makes use of Ingo Molnar's mempool implementation, which enables
+subsystems like bio to maintain their own reserve memory pools for guaranteed
+deadlock-free allocations during extreme VM load. For example, the VM
+subsystem makes use of the block layer to writeout dirty pages in order to be
+able to free up memory space, a case which needs careful handling. The
+allocation logic draws from the preallocated emergency reserve in situations
+where it cannot allocate through normal means. If the pool is empty and it
+can wait, then it would trigger action that would help free up memory or
+replenish the pool (without deadlocking) and wait for availability in the pool.
+If it is in IRQ context, and hence not in a position to do this, allocation
+could fail if the pool is empty. In general mempool always first tries to
+perform allocation without having to wait, even if it means digging into the
+pool as long it is not less that 50% full.
+
+On a free, memory is released to the pool or directly freed depending on
+the current availability in the pool. The mempool interface lets the
+subsystem specify the routines to be used for normal alloc and free. In the
+case of bio, these routines make use of the standard slab allocator.
+
+The caller of bio_alloc is expected to taken certain steps to avoid
+deadlocks, e.g. avoid trying to allocate more memory from the pool while
+already holding memory obtained from the pool.
+[TBD: This is a potential issue, though a rare possibility
+ in the bounce bio allocation that happens in the current code, since
+ it ends up allocating a second bio from the same pool while
+ holding the original bio ]
+
+Memory allocated from the pool should be released back within a limited
+amount of time (in the case of bio, that would be after the i/o is completed).
+This ensures that if part of the pool has been used up, some work (in this
+case i/o) must already be in progress and memory would be available when it
+is over. If allocating from multiple pools in the same code path, the order
+or hierarchy of allocation needs to be consistent, just the way one deals
+with multiple locks.
+
+The bio_alloc routine also needs to allocate the bio_vec_list (bvec_alloc())
+for a non-clone bio. There are the 6 pools setup for different size biovecs,
+so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
+given size from these slabs.
+
+The bio_get() routine may be used to hold an extra reference on a bio prior
+to i/o submission, if the bio fields are likely to be accessed after the
+i/o is issued (since the bio may otherwise get freed in case i/o completion
+happens in the meantime).
+
+The bio_clone() routine may be used to duplicate a bio, where the clone
+shares the bio_vec_list with the original bio (i.e. both point to the
+same bio_vec_list). This would typically be used for splitting i/o requests
+in lvm or md.
+
+3.2 Generic bio helper Routines
+
+3.2.1 Traversing segments and completion units in a request
+
+The macro rq_for_each_segment() should be used for traversing the bios
+in the request list (drivers should avoid directly trying to do it
+themselves). Using these helpers should also make it easier to cope
+with block changes in the future.
+
+ struct req_iterator iter;
+ rq_for_each_segment(bio_vec, rq, iter)
+ /* bio_vec is now current segment */
+
+I/O completion callbacks are per-bio rather than per-segment, so drivers
+that traverse bio chains on completion need to keep that in mind. Drivers
+which don't make a distinction between segments and completion units would
+need to be reorganized to support multi-segment bios.
+
+3.2.2 Setting up DMA scatterlists
+
+The blk_rq_map_sg() helper routine would be used for setting up scatter
+gather lists from a request, so a driver need not do it on its own.
+
+ nr_segments = blk_rq_map_sg(q, rq, scatterlist);
+
+The helper routine provides a level of abstraction which makes it easier
+to modify the internals of request to scatterlist conversion down the line
+without breaking drivers. The blk_rq_map_sg routine takes care of several
+things like collapsing physically contiguous segments (if QUEUE_FLAG_CLUSTER
+is set) and correct segment accounting to avoid exceeding the limits which
+the i/o hardware can handle, based on various queue properties.
+
+- Prevents a clustered segment from crossing a 4GB mem boundary
+- Avoids building segments that would exceed the number of physical
+ memory segments that the driver can handle (phys_segments) and the
+ number that the underlying hardware can handle at once, accounting for
+ DMA remapping (hw_segments) (i.e. IOMMU aware limits).
+
+Routines which the low level driver can use to set up the segment limits:
+
+blk_queue_max_hw_segments() : Sets an upper limit of the maximum number of
+hw data segments in a request (i.e. the maximum number of address/length
+pairs the host adapter can actually hand to the device at once)
+
+blk_queue_max_phys_segments() : Sets an upper limit on the maximum number
+of physical data segments in a request (i.e. the largest sized scatter list
+a driver could handle)
+
+3.2.3 I/O completion
+
+The existing generic block layer helper routines end_request,
+end_that_request_first and end_that_request_last can be used for i/o
+completion (and setting things up so the rest of the i/o or the next
+request can be kicked of) as before. With the introduction of multi-page
+bio support, end_that_request_first requires an additional argument indicating
+the number of sectors completed.
+
+3.2.4 Implications for drivers that do not interpret bios (don't handle
+ multiple segments)
+
+Drivers that do not interpret bios e.g those which do not handle multiple
+segments and do not support i/o into high memory addresses (require bounce
+buffers) and expect only virtually mapped buffers, can access the rq->buffer
+field. As before the driver should use current_nr_sectors to determine the
+size of remaining data in the current segment (that is the maximum it can
+transfer in one go unless it interprets segments), and rely on the block layer
+end_request, or end_that_request_first/last to take care of all accounting
+and transparent mapping of the next bio segment when a segment boundary
+is crossed on completion of a transfer. (The end*request* functions should
+be used if only if the request has come down from block/bio path, not for
+direct access requests which only specify rq->buffer without a valid rq->bio)
+
+3.2.5 Generic request command tagging
+
+3.2.5.1 Tag helpers
+
+Block now offers some simple generic functionality to help support command
+queueing (typically known as tagged command queueing), ie manage more than
+one outstanding command on a queue at any given time.
+
+ blk_queue_init_tags(struct request_queue *q, int depth)
+
+ Initialize internal command tagging structures for a maximum
+ depth of 'depth'.
+
+ blk_queue_free_tags((struct request_queue *q)
+
+ Teardown tag info associated with the queue. This will be done
+ automatically by block if blk_queue_cleanup() is called on a queue
+ that is using tagging.
+
+The above are initialization and exit management, the main helpers during
+normal operations are:
+
+ blk_queue_start_tag(struct request_queue *q, struct request *rq)
+
+ Start tagged operation for this request. A free tag number between
+ 0 and 'depth' is assigned to the request (rq->tag holds this number),
+ and 'rq' is added to the internal tag management. If the maximum depth
+ for this queue is already achieved (or if the tag wasn't started for
+ some other reason), 1 is returned. Otherwise 0 is returned.
+
+ blk_queue_end_tag(struct request_queue *q, struct request *rq)
+
+ End tagged operation on this request. 'rq' is removed from the internal
+ book keeping structures.
+
+To minimize struct request and queue overhead, the tag helpers utilize some
+of the same request members that are used for normal request queue management.
+This means that a request cannot both be an active tag and be on the queue
+list at the same time. blk_queue_start_tag() will remove the request, but
+the driver must remember to call blk_queue_end_tag() before signalling
+completion of the request to the block layer. This means ending tag
+operations before calling end_that_request_last()! For an example of a user
+of these helpers, see the IDE tagged command queueing support.
+
+Certain hardware conditions may dictate a need to invalidate the block tag
+queue. For instance, on IDE any tagged request error needs to clear both
+the hardware and software block queue and enable the driver to sanely restart
+all the outstanding requests. There's a third helper to do that:
+
+ blk_queue_invalidate_tags(struct request_queue *q)
+
+ Clear the internal block tag queue and re-add all the pending requests
+ to the request queue. The driver will receive them again on the
+ next request_fn run, just like it did the first time it encountered
+ them.
+
+3.2.5.2 Tag info
+
+Some block functions exist to query current tag status or to go from a
+tag number to the associated request. These are, in no particular order:
+
+ blk_queue_tagged(q)
+
+ Returns 1 if the queue 'q' is using tagging, 0 if not.
+
+ blk_queue_tag_request(q, tag)
+
+ Returns a pointer to the request associated with tag 'tag'.
+
+ blk_queue_tag_depth(q)
+
+ Return current queue depth.
+
+ blk_queue_tag_queue(q)
+
+ Returns 1 if the queue can accept a new queued command, 0 if we are
+ at the maximum depth already.
+
+ blk_queue_rq_tagged(rq)
+
+ Returns 1 if the request 'rq' is tagged.
+
+3.2.5.2 Internal structure
+
+Internally, block manages tags in the blk_queue_tag structure:
+
+ struct blk_queue_tag {
+ struct request **tag_index; /* array or pointers to rq */
+ unsigned long *tag_map; /* bitmap of free tags */
+ struct list_head busy_list; /* fifo list of busy tags */
+ int busy; /* queue depth */
+ int max_depth; /* max queue depth */
+ };
+
+Most of the above is simple and straight forward, however busy_list may need
+a bit of explaining. Normally we don't care too much about request ordering,
+but in the event of any barrier requests in the tag queue we need to ensure
+that requests are restarted in the order they were queue. This may happen
+if the driver needs to use blk_queue_invalidate_tags().
+
+3.3 I/O Submission
+
+The routine submit_bio() is used to submit a single io. Higher level i/o
+routines make use of this:
+
+(a) Buffered i/o:
+The routine submit_bh() invokes submit_bio() on a bio corresponding to the
+bh, allocating the bio if required. ll_rw_block() uses submit_bh() as before.
+
+(b) Kiobuf i/o (for raw/direct i/o):
+The ll_rw_kio() routine breaks up the kiobuf into page sized chunks and
+maps the array to one or more multi-page bios, issuing submit_bio() to
+perform the i/o on each of these.
+
+The embedded bh array in the kiobuf structure has been removed and no
+preallocation of bios is done for kiobufs. [The intent is to remove the
+blocks array as well, but it's currently in there to kludge around direct i/o.]
+Thus kiobuf allocation has switched back to using kmalloc rather than vmalloc.
+
+Todo/Observation:
+
+ A single kiobuf structure is assumed to correspond to a contiguous range
+ of data, so brw_kiovec() invokes ll_rw_kio for each kiobuf in a kiovec.
+ So right now it wouldn't work for direct i/o on non-contiguous blocks.
+ This is to be resolved. The eventual direction is to replace kiobuf
+ by kvec's.
+
+ Badari Pulavarty has a patch to implement direct i/o correctly using
+ bio and kvec.
+
+
+(c) Page i/o:
+Todo/Under discussion:
+
+ Andrew Morton's multi-page bio patches attempt to issue multi-page
+ writeouts (and reads) from the page cache, by directly building up
+ large bios for submission completely bypassing the usage of buffer
+ heads. This work is still in progress.
+
+ Christoph Hellwig had some code that uses bios for page-io (rather than
+ bh). This isn't included in bio as yet. Christoph was also working on a
+ design for representing virtual/real extents as an entity and modifying
+ some of the address space ops interfaces to utilize this abstraction rather
+ than buffer_heads. (This is somewhat along the lines of the SGI XFS pagebuf
+ abstraction, but intended to be as lightweight as possible).
+
+(d) Direct access i/o:
+Direct access requests that do not contain bios would be submitted differently
+as discussed earlier in section 1.3.
+
+Aside:
+
+ Kvec i/o:
+
+ Ben LaHaise's aio code uses a slightly different structure instead
+ of kiobufs, called a kvec_cb. This contains an array of <page, offset, len>
+ tuples (very much like the networking code), together with a callback function
+ and data pointer. This is embedded into a brw_cb structure when passed
+ to brw_kvec_async().
+
+ Now it should be possible to directly map these kvecs to a bio. Just as while
+ cloning, in this case rather than PRE_BUILT bio_vecs, we set the bi_io_vec
+ array pointer to point to the veclet array in kvecs.
+
+ TBD: In order for this to work, some changes are needed in the way multi-page
+ bios are handled today. The values of the tuples in such a vector passed in
+ from higher level code should not be modified by the block layer in the course
+ of its request processing, since that would make it hard for the higher layer
+ to continue to use the vector descriptor (kvec) after i/o completes. Instead,
+ all such transient state should either be maintained in the request structure,
+ and passed on in some way to the endio completion routine.
+
+
+4. The I/O scheduler
+I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
+queue and specific I/O schedulers. Unless stated otherwise, elevator is used
+to refer to both parts and I/O scheduler to specific I/O schedulers.
+
+Block layer implements generic dispatch queue in block/*.c.
+The generic dispatch queue is responsible for requeueing, handling non-fs
+requests and all other subtleties.
+
+Specific I/O schedulers are responsible for ordering normal filesystem
+requests. They can also choose to delay certain requests to improve
+throughput or whatever purpose. As the plural form indicates, there are
+multiple I/O schedulers. They can be built as modules but at least one should
+be built inside the kernel. Each queue can choose different one and can also
+change to another one dynamically.
+
+A block layer call to the i/o scheduler follows the convention elv_xxx(). This
+calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx
+and xxx might not match exactly, but use your imagination. If an elevator
+doesn't implement a function, the switch does nothing or some minimal house
+keeping work.
+
+4.1. I/O scheduler API
+
+The functions an elevator may implement are: (* are mandatory)
+elevator_merge_fn called to query requests for merge with a bio
+
+elevator_merge_req_fn called when two requests get merged. the one
+ which gets merged into the other one will be
+ never seen by I/O scheduler again. IOW, after
+ being merged, the request is gone.
+
+elevator_merged_fn called when a request in the scheduler has been
+ involved in a merge. It is used in the deadline
+ scheduler for example, to reposition the request
+ if its sorting order has changed.
+
+elevator_allow_merge_fn called whenever the block layer determines
+ that a bio can be merged into an existing
+ request safely. The io scheduler may still
+ want to stop a merge at this point if it
+ results in some sort of conflict internally,
+ this hook allows it to do that. Note however
+ that two *requests* can still be merged at later
+ time. Currently the io scheduler has no way to
+ prevent that. It can only learn about the fact
+ from elevator_merge_req_fn callback.
+
+elevator_dispatch_fn* fills the dispatch queue with ready requests.
+ I/O schedulers are free to postpone requests by
+ not filling the dispatch queue unless @force
+ is non-zero. Once dispatched, I/O schedulers
+ are not allowed to manipulate the requests -
+ they belong to generic dispatch queue.
+
+elevator_add_req_fn* called to add a new request into the scheduler
+
+elevator_former_req_fn
+elevator_latter_req_fn These return the request before or after the
+ one specified in disk sort order. Used by the
+ block layer to find merge possibilities.
+
+elevator_completed_req_fn called when a request is completed.
+
+elevator_may_queue_fn returns true if the scheduler wants to allow the
+ current context to queue a new request even if
+ it is over the queue limit. This must be used
+ very carefully!!
+
+elevator_set_req_fn
+elevator_put_req_fn Must be used to allocate and free any elevator
+ specific storage for a request.
+
+elevator_activate_req_fn Called when device driver first sees a request.
+ I/O schedulers can use this callback to
+ determine when actual execution of a request
+ starts.
+elevator_deactivate_req_fn Called when device driver decides to delay
+ a request by requeueing it.
+
+elevator_init_fn*
+elevator_exit_fn Allocate and free any elevator specific storage
+ for a queue.
+
+4.2 Request flows seen by I/O schedulers
+All requests seen by I/O schedulers strictly follow one of the following three
+flows.
+
+ set_req_fn ->
+
+ i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
+ (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
+ ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn
+ iii. [none]
+
+ -> put_req_fn
+
+4.3 I/O scheduler implementation
+The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
+optimal disk scan and request servicing performance (based on generic
+principles and device capabilities), optimized for:
+i. improved throughput
+ii. improved latency
+iii. better utilization of h/w & CPU time
+
+Characteristics:
+
+i. Binary tree
+AS and deadline i/o schedulers use red black binary trees for disk position
+sorting and searching, and a fifo linked list for time-based searching. This
+gives good scalability and good availability of information. Requests are
+almost always dispatched in disk sort order, so a cache is kept of the next
+request in sort order to prevent binary tree lookups.
+
+This arrangement is not a generic block layer characteristic however, so
+elevators may implement queues as they please.
+
+ii. Merge hash
+AS and deadline use a hash table indexed by the last sector of a request. This
+enables merging code to quickly look up "back merge" candidates, even when
+multiple I/O streams are being performed at once on one disk.
+
+"Front merges", a new request being merged at the front of an existing request,
+are far less common than "back merges" due to the nature of most I/O patterns.
+Front merges are handled by the binary trees in AS and deadline schedulers.
+
+iii. Plugging the queue to batch requests in anticipation of opportunities for
+ merge/sort optimizations
+
+Plugging is an approach that the current i/o scheduling algorithm resorts to so
+that it collects up enough requests in the queue to be able to take
+advantage of the sorting/merging logic in the elevator. If the
+queue is empty when a request comes in, then it plugs the request queue
+(sort of like plugging the bath tub of a vessel to get fluid to build up)
+till it fills up with a few more requests, before starting to service
+the requests. This provides an opportunity to merge/sort the requests before
+passing them down to the device. There are various conditions when the queue is
+unplugged (to open up the flow again), either through a scheduled task or
+could be on demand. For example wait_on_buffer sets the unplugging going
+through sync_buffer() running blk_run_address_space(mapping). Or the caller
+can do it explicity through blk_unplug(bdev). So in the read case,
+the queue gets explicitly unplugged as part of waiting for completion on that
+buffer. For page driven IO, the address space ->sync_page() takes care of
+doing the blk_run_address_space().
+
+Aside:
+ This is kind of controversial territory, as it's not clear if plugging is
+ always the right thing to do. Devices typically have their own queues,
+ and allowing a big queue to build up in software, while letting the device be
+ idle for a while may not always make sense. The trick is to handle the fine
+ balance between when to plug and when to open up. Also now that we have
+ multi-page bios being queued in one shot, we may not need to wait to merge
+ a big request from the broken up pieces coming by.
+
+4.4 I/O contexts
+I/O contexts provide a dynamically allocated per process data area. They may
+be used in I/O schedulers, and in the block layer (could be used for IO statis,
+priorities for example). See *io_context in block/ll_rw_blk.c, and as-iosched.c
+for an example of usage in an i/o scheduler.
+
+
+5. Scalability related changes
+
+5.1 Granular Locking: io_request_lock replaced by a per-queue lock
+
+The global io_request_lock has been removed as of 2.5, to avoid
+the scalability bottleneck it was causing, and has been replaced by more
+granular locking. The request queue structure has a pointer to the
+lock to be used for that queue. As a result, locking can now be
+per-queue, with a provision for sharing a lock across queues if
+necessary (e.g the scsi layer sets the queue lock pointers to the
+corresponding adapter lock, which results in a per host locking
+granularity). The locking semantics are the same, i.e. locking is
+still imposed by the block layer, grabbing the lock before
+request_fn execution which it means that lots of older drivers
+should still be SMP safe. Drivers are free to drop the queue
+lock themselves, if required. Drivers that explicitly used the
+io_request_lock for serialization need to be modified accordingly.
+Usually it's as easy as adding a global lock:
+
+ static DEFINE_SPINLOCK(my_driver_lock);
+
+and passing the address to that lock to blk_init_queue().
+
+5.2 64 bit sector numbers (sector_t prepares for 64 bit support)
+
+The sector number used in the bio structure has been changed to sector_t,
+which could be defined as 64 bit in preparation for 64 bit sector support.
+
+6. Other Changes/Implications
+
+6.1 Partition re-mapping handled by the generic block layer
+
+In 2.5 some of the gendisk/partition related code has been reorganized.
+Now the generic block layer performs partition-remapping early and thus
+provides drivers with a sector number relative to whole device, rather than
+having to take partition number into account in order to arrive at the true
+sector number. The routine blk_partition_remap() is invoked by
+generic_make_request even before invoking the queue specific make_request_fn,
+so the i/o scheduler also gets to operate on whole disk sector numbers. This
+should typically not require changes to block drivers, it just never gets
+to invoke its own partition sector offset calculations since all bios
+sent are offset from the beginning of the device.
+
+
+7. A Few Tips on Migration of older drivers
+
+Old-style drivers that just use CURRENT and ignores clustered requests,
+may not need much change. The generic layer will automatically handle
+clustered requests, multi-page bios, etc for the driver.
+
+For a low performance driver or hardware that is PIO driven or just doesn't
+support scatter-gather changes should be minimal too.
+
+The following are some points to keep in mind when converting old drivers
+to bio.
+
+Drivers should use elv_next_request to pick up requests and are no longer
+supposed to handle looping directly over the request list.
+(struct request->queue has been removed)
+
+Now end_that_request_first takes an additional number_of_sectors argument.
+It used to handle always just the first buffer_head in a request, now
+it will loop and handle as many sectors (on a bio-segment granularity)
+as specified.
+
+Now bh->b_end_io is replaced by bio->bi_end_io, but most of the time the
+right thing to use is bio_endio(bio, uptodate) instead.
+
+If the driver is dropping the io_request_lock from its request_fn strategy,
+then it just needs to replace that with q->queue_lock instead.
+
+As described in Sec 1.1, drivers can set max sector size, max segment size
+etc per queue now. Drivers that used to define their own merge functions i
+to handle things like this can now just use the blk_queue_* functions at
+blk_init_queue time.
+
+Drivers no longer have to map a {partition, sector offset} into the
+correct absolute location anymore, this is done by the block layer, so
+where a driver received a request ala this before:
+
+ rq->rq_dev = mk_kdev(3, 5); /* /dev/hda5 */
+ rq->sector = 0; /* first sector on hda5 */
+
+ it will now see
+
+ rq->rq_dev = mk_kdev(3, 0); /* /dev/hda */
+ rq->sector = 123128; /* offset from start of disk */
+
+As mentioned, there is no virtual mapping of a bio. For DMA, this is
+not a problem as the driver probably never will need a virtual mapping.
+Instead it needs a bus mapping (dma_map_page for a single segment or
+use dma_map_sg for scatter gather) to be able to ship it to the driver. For
+PIO drivers (or drivers that need to revert to PIO transfer once in a
+while (IDE for example)), where the CPU is doing the actual data
+transfer a virtual mapping is needed. If the driver supports highmem I/O,
+(Sec 1.1, (ii) ) it needs to use __bio_kmap_atomic and bio_kmap_irq to
+temporarily map a bio into the virtual address space.
+
+
+8. Prior/Related/Impacted patches
+
+8.1. Earlier kiobuf patches (sct/axboe/chait/hch/mkp)
+- orig kiobuf & raw i/o patches (now in 2.4 tree)
+- direct kiobuf based i/o to devices (no intermediate bh's)
+- page i/o using kiobuf
+- kiobuf splitting for lvm (mkp)
+- elevator support for kiobuf request merging (axboe)
+8.2. Zero-copy networking (Dave Miller)
+8.3. SGI XFS - pagebuf patches - use of kiobufs
+8.4. Multi-page pioent patch for bio (Christoph Hellwig)
+8.5. Direct i/o implementation (Andrea Arcangeli) since 2.4.10-pre11
+8.6. Async i/o implementation patch (Ben LaHaise)
+8.7. EVMS layering design (IBM EVMS team)
+8.8. Larger page cache size patch (Ben LaHaise) and
+ Large page size (Daniel Phillips)
+ => larger contiguous physical memory buffers
+8.9. VM reservations patch (Ben LaHaise)
+8.10. Write clustering patches ? (Marcelo/Quintela/Riel ?)
+8.11. Block device in page cache patch (Andrea Archangeli) - now in 2.4.10+
+8.12. Multiple block-size transfers for faster raw i/o (Shailabh Nagar,
+ Badari)
+8.13 Priority based i/o scheduler - prepatches (Arjan van de Ven)
+8.14 IDE Taskfile i/o patch (Andre Hedrick)
+8.15 Multi-page writeout and readahead patches (Andrew Morton)
+8.16 Direct i/o patches for 2.5 using kvec and bio (Badari Pulavarthy)
+
+9. Other References:
+
+9.1 The Splice I/O Model - Larry McVoy (and subsequent discussions on lkml,
+and Linus' comments - Jan 2001)
+9.2 Discussions about kiobuf and bh design on lkml between sct, linus, alan
+et al - Feb-March 2001 (many of the initial thoughts that led to bio were
+brought up in this discussion thread)
+9.3 Discussions on mempool on lkml - Dec 2001.
+
diff --git a/Documentation/block/biovecs.txt b/Documentation/block/biovecs.txt
new file mode 100644
index 000000000..74a32ad52
--- /dev/null
+++ b/Documentation/block/biovecs.txt
@@ -0,0 +1,111 @@
+
+Immutable biovecs and biovec iterators:
+=======================================
+
+Kent Overstreet <kmo@daterainc.com>
+
+As of 3.13, biovecs should never be modified after a bio has been submitted.
+Instead, we have a new struct bvec_iter which represents a range of a biovec -
+the iterator will be modified as the bio is completed, not the biovec.
+
+More specifically, old code that needed to partially complete a bio would
+update bi_sector and bi_size, and advance bi_idx to the next biovec. If it
+ended up partway through a biovec, it would increment bv_offset and decrement
+bv_len by the number of bytes completed in that biovec.
+
+In the new scheme of things, everything that must be mutated in order to
+partially complete a bio is segregated into struct bvec_iter: bi_sector,
+bi_size and bi_idx have been moved there; and instead of modifying bv_offset
+and bv_len, struct bvec_iter has bi_bvec_done, which represents the number of
+bytes completed in the current bvec.
+
+There are a bunch of new helper macros for hiding the gory details - in
+particular, presenting the illusion of partially completed biovecs so that
+normal code doesn't have to deal with bi_bvec_done.
+
+ * Driver code should no longer refer to biovecs directly; we now have
+ bio_iovec() and bio_iovec_iter() macros that return literal struct biovecs,
+ constructed from the raw biovecs but taking into account bi_bvec_done and
+ bi_size.
+
+ bio_for_each_segment() has been updated to take a bvec_iter argument
+ instead of an integer (that corresponded to bi_idx); for a lot of code the
+ conversion just required changing the types of the arguments to
+ bio_for_each_segment().
+
+ * Advancing a bvec_iter is done with bio_advance_iter(); bio_advance() is a
+ wrapper around bio_advance_iter() that operates on bio->bi_iter, and also
+ advances the bio integrity's iter if present.
+
+ There is a lower level advance function - bvec_iter_advance() - which takes
+ a pointer to a biovec, not a bio; this is used by the bio integrity code.
+
+What's all this get us?
+=======================
+
+Having a real iterator, and making biovecs immutable, has a number of
+advantages:
+
+ * Before, iterating over bios was very awkward when you weren't processing
+ exactly one bvec at a time - for example, bio_copy_data() in fs/bio.c,
+ which copies the contents of one bio into another. Because the biovecs
+ wouldn't necessarily be the same size, the old code was tricky convoluted -
+ it had to walk two different bios at the same time, keeping both bi_idx and
+ and offset into the current biovec for each.
+
+ The new code is much more straightforward - have a look. This sort of
+ pattern comes up in a lot of places; a lot of drivers were essentially open
+ coding bvec iterators before, and having common implementation considerably
+ simplifies a lot of code.
+
+ * Before, any code that might need to use the biovec after the bio had been
+ completed (perhaps to copy the data somewhere else, or perhaps to resubmit
+ it somewhere else if there was an error) had to save the entire bvec array
+ - again, this was being done in a fair number of places.
+
+ * Biovecs can be shared between multiple bios - a bvec iter can represent an
+ arbitrary range of an existing biovec, both starting and ending midway
+ through biovecs. This is what enables efficient splitting of arbitrary
+ bios. Note that this means we _only_ use bi_size to determine when we've
+ reached the end of a bio, not bi_vcnt - and the bio_iovec() macro takes
+ bi_size into account when constructing biovecs.
+
+ * Splitting bios is now much simpler. The old bio_split() didn't even work on
+ bios with more than a single bvec! Now, we can efficiently split arbitrary
+ size bios - because the new bio can share the old bio's biovec.
+
+ Care must be taken to ensure the biovec isn't freed while the split bio is
+ still using it, in case the original bio completes first, though. Using
+ bio_chain() when splitting bios helps with this.
+
+ * Submitting partially completed bios is now perfectly fine - this comes up
+ occasionally in stacking block drivers and various code (e.g. md and
+ bcache) had some ugly workarounds for this.
+
+ It used to be the case that submitting a partially completed bio would work
+ fine to _most_ devices, but since accessing the raw bvec array was the
+ norm, not all drivers would respect bi_idx and those would break. Now,
+ since all drivers _must_ go through the bvec iterator - and have been
+ audited to make sure they are - submitting partially completed bios is
+ perfectly fine.
+
+Other implications:
+===================
+
+ * Almost all usage of bi_idx is now incorrect and has been removed; instead,
+ where previously you would have used bi_idx you'd now use a bvec_iter,
+ probably passing it to one of the helper macros.
+
+ I.e. instead of using bio_iovec_idx() (or bio->bi_iovec[bio->bi_idx]), you
+ now use bio_iter_iovec(), which takes a bvec_iter and returns a
+ literal struct bio_vec - constructed on the fly from the raw biovec but
+ taking into account bi_bvec_done (and bi_size).
+
+ * bi_vcnt can't be trusted or relied upon by driver code - i.e. anything that
+ doesn't actually own the bio. The reason is twofold: firstly, it's not
+ actually needed for iterating over the bio anymore - we only use bi_size.
+ Secondly, when cloning a bio and reusing (a portion of) the original bio's
+ biovec, in order to calculate bi_vcnt for the new bio we'd have to iterate
+ over all the biovecs in the new bio - which is silly as it's not needed.
+
+ So, don't use bi_vcnt anymore.
diff --git a/Documentation/block/capability.txt b/Documentation/block/capability.txt
new file mode 100644
index 000000000..2f1729424
--- /dev/null
+++ b/Documentation/block/capability.txt
@@ -0,0 +1,15 @@
+Generic Block Device Capability
+===============================================================================
+This file documents the sysfs file block/<disk>/capability
+
+capability is a hex word indicating which capabilities a specific disk
+supports. For more information on bits not listed here, see
+include/linux/genhd.h
+
+Capability Value
+-------------------------------------------------------------------------------
+GENHD_FL_MEDIA_CHANGE_NOTIFY 4
+ When this bit is set, the disk supports Asynchronous Notification
+ of media change events. These events will be broadcast to user
+ space via kernel uevent.
+
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt
new file mode 100644
index 000000000..f3bc72945
--- /dev/null
+++ b/Documentation/block/cfq-iosched.txt
@@ -0,0 +1,292 @@
+CFQ (Complete Fairness Queueing)
+===============================
+
+The main aim of CFQ scheduler is to provide a fair allocation of the disk
+I/O bandwidth for all the processes which requests an I/O operation.
+
+CFQ maintains the per process queue for the processes which request I/O
+operation(synchronous requests). In case of asynchronous requests, all the
+requests from all the processes are batched together according to their
+process's I/O priority.
+
+CFQ ioscheduler tunables
+========================
+
+slice_idle
+----------
+This specifies how long CFQ should idle for next request on certain cfq queues
+(for sequential workloads) and service trees (for random workloads) before
+queue is expired and CFQ selects next queue to dispatch from.
+
+By default slice_idle is a non-zero value. That means by default we idle on
+queues/service trees. This can be very helpful on highly seeky media like
+single spindle SATA/SAS disks where we can cut down on overall number of
+seeks and see improved throughput.
+
+Setting slice_idle to 0 will remove all the idling on queues/service tree
+level and one should see an overall improved throughput on faster storage
+devices like multiple SATA/SAS disks in hardware RAID configuration. The down
+side is that isolation provided from WRITES also goes down and notion of
+IO priority becomes weaker.
+
+So depending on storage and workload, it might be useful to set slice_idle=0.
+In general I think for SATA/SAS disks and software RAID of SATA/SAS disks
+keeping slice_idle enabled should be useful. For any configurations where
+there are multiple spindles behind single LUN (Host based hardware RAID
+controller or for storage arrays), setting slice_idle=0 might end up in better
+throughput and acceptable latencies.
+
+back_seek_max
+-------------
+This specifies, given in Kbytes, the maximum "distance" for backward seeking.
+The distance is the amount of space from the current head location to the
+sectors that are backward in terms of distance.
+
+This parameter allows the scheduler to anticipate requests in the "backward"
+direction and consider them as being the "next" if they are within this
+distance from the current head location.
+
+back_seek_penalty
+-----------------
+This parameter is used to compute the cost of backward seeking. If the
+backward distance of request is just 1/back_seek_penalty from a "front"
+request, then the seeking cost of two requests is considered equivalent.
+
+So scheduler will not bias toward one or the other request (otherwise scheduler
+will bias toward front request). Default value of back_seek_penalty is 2.
+
+fifo_expire_async
+-----------------
+This parameter is used to set the timeout of asynchronous requests. Default
+value of this is 248ms.
+
+fifo_expire_sync
+----------------
+This parameter is used to set the timeout of synchronous requests. Default
+value of this is 124ms. In case to favor synchronous requests over asynchronous
+one, this value should be decreased relative to fifo_expire_async.
+
+group_idle
+-----------
+This parameter forces idling at the CFQ group level instead of CFQ
+queue level. This was introduced after a bottleneck was observed
+in higher end storage due to idle on sequential queue and allow dispatch
+from a single queue. The idea with this parameter is that it can be run with
+slice_idle=0 and group_idle=8, so that idling does not happen on individual
+queues in the group but happens overall on the group and thus still keeps the
+IO controller working.
+Not idling on individual queues in the group will dispatch requests from
+multiple queues in the group at the same time and achieve higher throughput
+on higher end storage.
+
+Default value for this parameter is 8ms.
+
+latency
+-------
+This parameter is used to enable/disable the latency mode of the CFQ
+scheduler. If latency mode (called low_latency) is enabled, CFQ tries
+to recompute the slice time for each process based on the target_latency set
+for the system. This favors fairness over throughput. Disabling low
+latency (setting it to 0) ignores target latency, allowing each process in the
+system to get a full time slice.
+
+By default low latency mode is enabled.
+
+target_latency
+--------------
+This parameter is used to calculate the time slice for a process if cfq's
+latency mode is enabled. It will ensure that sync requests have an estimated
+latency. But if sequential workload is higher(e.g. sequential read),
+then to meet the latency constraints, throughput may decrease because of less
+time for each process to issue I/O request before the cfq queue is switched.
+
+Though this can be overcome by disabling the latency_mode, it may increase
+the read latency for some applications. This parameter allows for changing
+target_latency through the sysfs interface which can provide the balanced
+throughput and read latency.
+
+Default value for target_latency is 300ms.
+
+slice_async
+-----------
+This parameter is same as of slice_sync but for asynchronous queue. The
+default value is 40ms.
+
+slice_async_rq
+--------------
+This parameter is used to limit the dispatching of asynchronous request to
+device request queue in queue's slice time. The maximum number of request that
+are allowed to be dispatched also depends upon the io priority. Default value
+for this is 2.
+
+slice_sync
+----------
+When a queue is selected for execution, the queues IO requests are only
+executed for a certain amount of time(time_slice) before switching to another
+queue. This parameter is used to calculate the time slice of synchronous
+queue.
+
+time_slice is computed using the below equation:-
+time_slice = slice_sync + (slice_sync/5 * (4 - prio)). To increase the
+time_slice of synchronous queue, increase the value of slice_sync. Default
+value is 100ms.
+
+quantum
+-------
+This specifies the number of request dispatched to the device queue. In a
+queue's time slice, a request will not be dispatched if the number of request
+in the device exceeds this parameter. This parameter is used for synchronous
+request.
+
+In case of storage with several disk, this setting can limit the parallel
+processing of request. Therefore, increasing the value can improve the
+performance although this can cause the latency of some I/O to increase due
+to more number of requests.
+
+CFQ Group scheduling
+====================
+
+CFQ supports blkio cgroup and has "blkio." prefixed files in each
+blkio cgroup directory. It is weight-based and there are four knobs
+for configuration - weight[_device] and leaf_weight[_device].
+Internal cgroup nodes (the ones with children) can also have tasks in
+them, so the former two configure how much proportion the cgroup as a
+whole is entitled to at its parent's level while the latter two
+configure how much proportion the tasks in the cgroup have compared to
+its direct children.
+
+Another way to think about it is assuming that each internal node has
+an implicit leaf child node which hosts all the tasks whose weight is
+configured by leaf_weight[_device]. Let's assume a blkio hierarchy
+composed of five cgroups - root, A, B, AA and AB - with the following
+weights where the names represent the hierarchy.
+
+ weight leaf_weight
+ root : 125 125
+ A : 500 750
+ B : 250 500
+ AA : 500 500
+ AB : 1000 500
+
+root never has a parent making its weight is meaningless. For backward
+compatibility, weight is always kept in sync with leaf_weight. B, AA
+and AB have no child and thus its tasks have no children cgroup to
+compete with. They always get 100% of what the cgroup won at the
+parent level. Considering only the weights which matter, the hierarchy
+looks like the following.
+
+ root
+ / | \
+ A B leaf
+ 500 250 125
+ / | \
+ AA AB leaf
+ 500 1000 750
+
+If all cgroups have active IOs and competing with each other, disk
+time will be distributed like the following.
+
+Distribution below root. The total active weight at this level is
+A:500 + B:250 + C:125 = 875.
+
+ root-leaf : 125 / 875 =~ 14%
+ A : 500 / 875 =~ 57%
+ B(-leaf) : 250 / 875 =~ 28%
+
+A has children and further distributes its 57% among the children and
+the implicit leaf node. The total active weight at this level is
+AA:500 + AB:1000 + A-leaf:750 = 2250.
+
+ A-leaf : ( 750 / 2250) * A =~ 19%
+ AA(-leaf) : ( 500 / 2250) * A =~ 12%
+ AB(-leaf) : (1000 / 2250) * A =~ 25%
+
+CFQ IOPS Mode for group scheduling
+===================================
+Basic CFQ design is to provide priority based time slices. Higher priority
+process gets bigger time slice and lower priority process gets smaller time
+slice. Measuring time becomes harder if storage is fast and supports NCQ and
+it would be better to dispatch multiple requests from multiple cfq queues in
+request queue at a time. In such scenario, it is not possible to measure time
+consumed by single queue accurately.
+
+What is possible though is to measure number of requests dispatched from a
+single queue and also allow dispatch from multiple cfq queue at the same time.
+This effectively becomes the fairness in terms of IOPS (IO operations per
+second).
+
+If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches
+to IOPS mode and starts providing fairness in terms of number of requests
+dispatched. Note that this mode switching takes effect only for group
+scheduling. For non-cgroup users nothing should change.
+
+CFQ IO scheduler Idling Theory
+===============================
+Idling on a queue is primarily about waiting for the next request to come
+on same queue after completion of a request. In this process CFQ will not
+dispatch requests from other cfq queues even if requests are pending there.
+
+The rationale behind idling is that it can cut down on number of seeks
+on rotational media. For example, if a process is doing dependent
+sequential reads (next read will come on only after completion of previous
+one), then not dispatching request from other queue should help as we
+did not move the disk head and kept on dispatching sequential IO from
+one queue.
+
+CFQ has following service trees and various queues are put on these trees.
+
+ sync-idle sync-noidle async
+
+All cfq queues doing synchronous sequential IO go on to sync-idle tree.
+On this tree we idle on each queue individually.
+
+All synchronous non-sequential queues go on sync-noidle tree. Also any
+request which are marked with REQ_NOIDLE go on this service tree. On this
+tree we do not idle on individual queues instead idle on the whole group
+of queues or the tree. So if there are 4 queues waiting for IO to dispatch
+we will idle only once last queue has dispatched the IO and there is
+no more IO on this service tree.
+
+All async writes go on async service tree. There is no idling on async
+queues.
+
+CFQ has some optimizations for SSDs and if it detects a non-rotational
+media which can support higher queue depth (multiple requests at in
+flight at a time), then it cuts down on idling of individual queues and
+all the queues move to sync-noidle tree and only tree idle remains. This
+tree idling provides isolation with buffered write queues on async tree.
+
+FAQ
+===
+Q1. Why to idle at all on queues marked with REQ_NOIDLE.
+
+A1. We only do tree idle (all queues on sync-noidle tree) on queues marked
+ with REQ_NOIDLE. This helps in providing isolation with all the sync-idle
+ queues. Otherwise in presence of many sequential readers, other
+ synchronous IO might not get fair share of disk.
+
+ For example, if there are 10 sequential readers doing IO and they get
+ 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled
+ roughly after 1 second. If after completion of REQ_NOIDLE request we
+ do not idle, and after a couple of milli seconds a another REQ_NOIDLE
+ request comes in, again it will be scheduled after 1second. Repeat it
+ and notice how a workload can lose its disk share and suffer due to
+ multiple sequential readers.
+
+ fsync can generate dependent IO where bunch of data is written in the
+ context of fsync, and later some journaling data is written. Journaling
+ data comes in only after fsync has finished its IO (atleast for ext4
+ that seemed to be the case). Now if one decides not to idle on fsync
+ thread due to REQ_NOIDLE, then next journaling write will not get
+ scheduled for another second. A process doing small fsync, will suffer
+ badly in presence of multiple sequential readers.
+
+ Hence doing tree idling on threads using REQ_NOIDLE flag on requests
+ provides isolation from multiple sequential readers and at the same
+ time we do not idle on individual threads.
+
+Q2. When to specify REQ_NOIDLE
+A2. I would think whenever one is doing synchronous write and not expecting
+ more writes to be dispatched from same context soon, should be able
+ to specify REQ_NOIDLE on writes and that probably should work well for
+ most of the cases.
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt
new file mode 100644
index 000000000..525b9f6d7
--- /dev/null
+++ b/Documentation/block/cmdline-partition.txt
@@ -0,0 +1,39 @@
+Embedded device command line partition parsing
+=====================================================================
+
+Support for reading the block device partition table from the command line.
+It is typically used for fixed block (eMMC) embedded devices.
+It has no MBR, so saves storage space. Bootloader can be easily accessed
+by absolute address of data on the block device.
+Users can easily change the partition.
+
+The format for the command line is just like mtdparts:
+
+blkdevparts=<blkdev-def>[;<blkdev-def>]
+ <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>]
+ <partdef> := <size>[@<offset>](part-name)
+
+<blkdev-id>
+ block device disk name, embedded device used fixed block device,
+ it's disk name also fixed. such as: mmcblk0, mmcblk1, mmcblk0boot0.
+
+<size>
+ partition size, in bytes, such as: 512, 1m, 1G.
+
+<offset>
+ partition start address, in bytes.
+
+(part-name)
+ partition name, kernel send uevent with "PARTNAME". application can create
+ a link to block device partition with the name "PARTNAME".
+ user space application can access partition by partition name.
+
+Example:
+ eMMC disk name is "mmcblk0" and "mmcblk0boot0"
+
+ bootargs:
+ 'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
+
+ dmesg:
+ mmcblk0: p1(data0) p2(data1) p3()
+ mmcblk0boot0: p1(boot) p2(kernel)
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
new file mode 100644
index 000000000..f56ec97f0
--- /dev/null
+++ b/Documentation/block/data-integrity.txt
@@ -0,0 +1,283 @@
+----------------------------------------------------------------------
+1. INTRODUCTION
+
+Modern filesystems feature checksumming of data and metadata to
+protect against data corruption. However, the detection of the
+corruption is done at read time which could potentially be months
+after the data was written. At that point the original data that the
+application tried to write is most likely lost.
+
+The solution is to ensure that the disk is actually storing what the
+application meant it to. Recent additions to both the SCSI family
+protocols (SBC Data Integrity Field, SCC protection proposal) as well
+as SATA/T13 (External Path Protection) try to remedy this by adding
+support for appending integrity metadata to an I/O. The integrity
+metadata (or protection information in SCSI terminology) includes a
+checksum for each sector as well as an incrementing counter that
+ensures the individual sectors are written in the right order. And
+for some protection schemes also that the I/O is written to the right
+place on disk.
+
+Current storage controllers and devices implement various protective
+measures, for instance checksumming and scrubbing. But these
+technologies are working in their own isolated domains or at best
+between adjacent nodes in the I/O path. The interesting thing about
+DIF and the other integrity extensions is that the protection format
+is well defined and every node in the I/O path can verify the
+integrity of the I/O and reject it if corruption is detected. This
+allows not only corruption prevention but also isolation of the point
+of failure.
+
+----------------------------------------------------------------------
+2. THE DATA INTEGRITY EXTENSIONS
+
+As written, the protocol extensions only protect the path between
+controller and storage device. However, many controllers actually
+allow the operating system to interact with the integrity metadata
+(IMD). We have been working with several FC/SAS HBA vendors to enable
+the protection information to be transferred to and from their
+controllers.
+
+The SCSI Data Integrity Field works by appending 8 bytes of protection
+information to each sector. The data + integrity metadata is stored
+in 520 byte sectors on disk. Data + IMD are interleaved when
+transferred between the controller and target. The T13 proposal is
+similar.
+
+Because it is highly inconvenient for operating systems to deal with
+520 (and 4104) byte sectors, we approached several HBA vendors and
+encouraged them to allow separation of the data and integrity metadata
+scatter-gather lists.
+
+The controller will interleave the buffers on write and split them on
+read. This means that Linux can DMA the data buffers to and from
+host memory without changes to the page cache.
+
+Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
+is somewhat heavy to compute in software. Benchmarks found that
+calculating this checksum had a significant impact on system
+performance for a number of workloads. Some controllers allow a
+lighter-weight checksum to be used when interfacing with the operating
+system. Emulex, for instance, supports the TCP/IP checksum instead.
+The IP checksum received from the OS is converted to the 16-bit CRC
+when writing and vice versa. This allows the integrity metadata to be
+generated by Linux or the application at very low cost (comparable to
+software RAID5).
+
+The IP checksum is weaker than the CRC in terms of detecting bit
+errors. However, the strength is really in the separation of the data
+buffers and the integrity metadata. These two distinct buffers must
+match up for an I/O to complete.
+
+The separation of the data and integrity metadata buffers as well as
+the choice in checksums is referred to as the Data Integrity
+Extensions. As these extensions are outside the scope of the protocol
+bodies (T10, T13), Oracle and its partners are trying to standardize
+them within the Storage Networking Industry Association.
+
+----------------------------------------------------------------------
+3. KERNEL CHANGES
+
+The data integrity framework in Linux enables protection information
+to be pinned to I/Os and sent to/received from controllers that
+support it.
+
+The advantage to the integrity extensions in SCSI and SATA is that
+they enable us to protect the entire path from application to storage
+device. However, at the same time this is also the biggest
+disadvantage. It means that the protection information must be in a
+format that can be understood by the disk.
+
+Generally Linux/POSIX applications are agnostic to the intricacies of
+the storage devices they are accessing. The virtual filesystem switch
+and the block layer make things like hardware sector size and
+transport protocols completely transparent to the application.
+
+However, this level of detail is required when preparing the
+protection information to send to a disk. Consequently, the very
+concept of an end-to-end protection scheme is a layering violation.
+It is completely unreasonable for an application to be aware whether
+it is accessing a SCSI or SATA disk.
+
+The data integrity support implemented in Linux attempts to hide this
+from the application. As far as the application (and to some extent
+the kernel) is concerned, the integrity metadata is opaque information
+that's attached to the I/O.
+
+The current implementation allows the block layer to automatically
+generate the protection information for any I/O. Eventually the
+intent is to move the integrity metadata calculation to userspace for
+user data. Metadata and other I/O that originates within the kernel
+will still use the automatic generation interface.
+
+Some storage devices allow each hardware sector to be tagged with a
+16-bit value. The owner of this tag space is the owner of the block
+device. I.e. the filesystem in most cases. The filesystem can use
+this extra space to tag sectors as they see fit. Because the tag
+space is limited, the block interface allows tagging bigger chunks by
+way of interleaving. This way, 8*16 bits of information can be
+attached to a typical 4KB filesystem block.
+
+This also means that applications such as fsck and mkfs will need
+access to manipulate the tags from user space. A passthrough
+interface for this is being worked on.
+
+
+----------------------------------------------------------------------
+4. BLOCK LAYER IMPLEMENTATION DETAILS
+
+4.1 BIO
+
+The data integrity patches add a new field to struct bio when
+CONFIG_BLK_DEV_INTEGRITY is enabled. bio_integrity(bio) returns a
+pointer to a struct bip which contains the bio integrity payload.
+Essentially a bip is a trimmed down struct bio which holds a bio_vec
+containing the integrity metadata and the required housekeeping
+information (bvec pool, vector count, etc.)
+
+A kernel subsystem can enable data integrity protection on a bio by
+calling bio_integrity_alloc(bio). This will allocate and attach the
+bip to the bio.
+
+Individual pages containing integrity metadata can subsequently be
+attached using bio_integrity_add_page().
+
+bio_free() will automatically free the bip.
+
+
+4.2 BLOCK DEVICE
+
+Because the format of the protection data is tied to the physical
+disk, each block device has been extended with a block integrity
+profile (struct blk_integrity). This optional profile is registered
+with the block layer using blk_integrity_register().
+
+The profile contains callback functions for generating and verifying
+the protection data, as well as getting and setting application tags.
+The profile also contains a few constants to aid in completing,
+merging and splitting the integrity metadata.
+
+Layered block devices will need to pick a profile that's appropriate
+for all subdevices. blk_integrity_compare() can help with that. DM
+and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6
+will require extra work due to the application tag.
+
+
+----------------------------------------------------------------------
+5.0 BLOCK LAYER INTEGRITY API
+
+5.1 NORMAL FILESYSTEM
+
+ The normal filesystem is unaware that the underlying block device
+ is capable of sending/receiving integrity metadata. The IMD will
+ be automatically generated by the block layer at submit_bio() time
+ in case of a WRITE. A READ request will cause the I/O integrity
+ to be verified upon completion.
+
+ IMD generation and verification can be toggled using the
+
+ /sys/block/<bdev>/integrity/write_generate
+
+ and
+
+ /sys/block/<bdev>/integrity/read_verify
+
+ flags.
+
+
+5.2 INTEGRITY-AWARE FILESYSTEM
+
+ A filesystem that is integrity-aware can prepare I/Os with IMD
+ attached. It can also use the application tag space if this is
+ supported by the block device.
+
+
+ int bio_integrity_prep(bio);
+
+ To generate IMD for WRITE and to set up buffers for READ, the
+ filesystem must call bio_integrity_prep(bio).
+
+ Prior to calling this function, the bio data direction and start
+ sector must be set, and the bio should have all data pages
+ added. It is up to the caller to ensure that the bio does not
+ change while I/O is in progress.
+
+ bio_integrity_prep() should only be called if
+ bio_integrity_enabled() returned 1.
+
+
+5.3 PASSING EXISTING INTEGRITY METADATA
+
+ Filesystems that either generate their own integrity metadata or
+ are capable of transferring IMD from user space can use the
+ following calls:
+
+
+ struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
+
+ Allocates the bio integrity payload and hangs it off of the bio.
+ nr_pages indicate how many pages of protection data need to be
+ stored in the integrity bio_vec list (similar to bio_alloc()).
+
+ The integrity payload will be freed at bio_free() time.
+
+
+ int bio_integrity_add_page(bio, page, len, offset);
+
+ Attaches a page containing integrity metadata to an existing
+ bio. The bio must have an existing bip,
+ i.e. bio_integrity_alloc() must have been called. For a WRITE,
+ the integrity metadata in the pages must be in a format
+ understood by the target device with the notable exception that
+ the sector numbers will be remapped as the request traverses the
+ I/O stack. This implies that the pages added using this call
+ will be modified during I/O! The first reference tag in the
+ integrity metadata must have a value of bip->bip_sector.
+
+ Pages can be added using bio_integrity_add_page() as long as
+ there is room in the bip bio_vec array (nr_pages).
+
+ Upon completion of a READ operation, the attached pages will
+ contain the integrity metadata received from the storage device.
+ It is up to the receiver to process them and verify data
+ integrity upon completion.
+
+
+5.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
+ METADATA
+
+ To enable integrity exchange on a block device the gendisk must be
+ registered as capable:
+
+ int blk_integrity_register(gendisk, blk_integrity);
+
+ The blk_integrity struct is a template and should contain the
+ following:
+
+ static struct blk_integrity my_profile = {
+ .name = "STANDARDSBODY-TYPE-VARIANT-CSUM",
+ .generate_fn = my_generate_fn,
+ .verify_fn = my_verify_fn,
+ .tuple_size = sizeof(struct my_tuple_size),
+ .tag_size = <tag bytes per hw sector>,
+ };
+
+ 'name' is a text string which will be visible in sysfs. This is
+ part of the userland API so chose it carefully and never change
+ it. The format is standards body-type-variant.
+ E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
+
+ 'generate_fn' generates appropriate integrity metadata (for WRITE).
+
+ 'verify_fn' verifies that the data buffer matches the integrity
+ metadata.
+
+ 'tuple_size' must be set to match the size of the integrity
+ metadata per sector. I.e. 8 for DIF and EPP.
+
+ 'tag_size' must be set to identify how many bytes of tag space
+ are available per hardware sector. For DIF this is either 2 or
+ 0 depending on the value of the Control Mode Page ATO bit.
+
+----------------------------------------------------------------------
+2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/Documentation/block/deadline-iosched.txt b/Documentation/block/deadline-iosched.txt
new file mode 100644
index 000000000..2d82c8032
--- /dev/null
+++ b/Documentation/block/deadline-iosched.txt
@@ -0,0 +1,75 @@
+Deadline IO scheduler tunables
+==============================
+
+This little file attempts to document how the deadline io scheduler works.
+In particular, it will clarify the meaning of the exposed tunables that may be
+of interest to power users.
+
+Selecting IO schedulers
+-----------------------
+Refer to Documentation/block/switching-sched.txt for information on
+selecting an io scheduler on a per-device basis.
+
+
+********************************************************************************
+
+
+read_expire (in ms)
+-----------
+
+The goal of the deadline io scheduler is to attempt to guarantee a start
+service time for a request. As we focus mainly on read latencies, this is
+tunable. When a read request first enters the io scheduler, it is assigned
+a deadline that is the current time + the read_expire value in units of
+milliseconds.
+
+
+write_expire (in ms)
+-----------
+
+Similar to read_expire mentioned above, but for writes.
+
+
+fifo_batch (number of requests)
+----------
+
+Requests are grouped into ``batches'' of a particular data direction (read or
+write) which are serviced in increasing sector order. To limit extra seeking,
+deadline expiries are only checked between batches. fifo_batch controls the
+maximum number of requests per batch.
+
+This parameter tunes the balance between per-request latency and aggregate
+throughput. When low latency is the primary concern, smaller is better (where
+a value of 1 yields first-come first-served behaviour). Increasing fifo_batch
+generally improves throughput, at the cost of latency variation.
+
+
+writes_starved (number of dispatches)
+--------------
+
+When we have to move requests from the io scheduler queue to the block
+device dispatch queue, we always give a preference to reads. However, we
+don't want to starve writes indefinitely either. So writes_starved controls
+how many times we give preference to reads over writes. When that has been
+done writes_starved number of times, we dispatch some writes based on the
+same criteria as reads.
+
+
+front_merges (bool)
+------------
+
+Sometimes it happens that a request enters the io scheduler that is contiguous
+with a request that is already on the queue. Either it fits in the back of that
+request, or it fits at the front. That is called either a back merge candidate
+or a front merge candidate. Due to the way files are typically laid out,
+back merges are much more common than front merges. For some work loads, you
+may even know that it is a waste of time to spend any time attempting to
+front merge requests. Setting front_merges to 0 disables this functionality.
+Front merges may still occur due to the cached last_merge hint, but since
+that comes at basically 0 cost we leave that on. We simply disable the
+rbtree front sector lookup when the io scheduler merge function is called.
+
+
+Nov 11 2002, Jens Axboe <jens.axboe@oracle.com>
+
+
diff --git a/Documentation/block/ioprio.txt b/Documentation/block/ioprio.txt
new file mode 100644
index 000000000..8ed8c5938
--- /dev/null
+++ b/Documentation/block/ioprio.txt
@@ -0,0 +1,183 @@
+Block io priorities
+===================
+
+
+Intro
+-----
+
+With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
+priorities are supported for reads on files. This enables users to io nice
+processes or process groups, similar to what has been possible with cpu
+scheduling for ages. This document mainly details the current possibilities
+with cfq; other io schedulers do not support io priorities thus far.
+
+Scheduling classes
+------------------
+
+CFQ implements three generic scheduling classes that determine how io is
+served for a process.
+
+IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
+higher priority than any other in the system, processes from this class are
+given first access to the disk every time. Thus it needs to be used with some
+care, one io RT process can starve the entire system. Within the RT class,
+there are 8 levels of class data that determine exactly how much time this
+process needs the disk for on each service. In the future this might change
+to be more directly mappable to performance, by passing in a wanted data
+rate instead.
+
+IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
+for any process that hasn't set a specific io priority. The class data
+determines how much io bandwidth the process will get, it's directly mappable
+to the cpu nice levels just more coarsely implemented. 0 is the highest
+BE prio level, 7 is the lowest. The mapping between cpu nice level and io
+nice level is determined as: io_nice = (cpu_nice + 20) / 5.
+
+IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
+level only get io time when no one else needs the disk. The idle class has no
+class data, since it doesn't really apply here.
+
+Tools
+-----
+
+See below for a sample ionice tool. Usage:
+
+# ionice -c<class> -n<level> -p<pid>
+
+If pid isn't given, the current process is assumed. IO priority settings
+are inherited on fork, so you can use ionice to start the process at a given
+level:
+
+# ionice -c2 -n0 /bin/ls
+
+will run ls at the best-effort scheduling class at the highest priority.
+For a running process, you can give the pid instead:
+
+# ionice -c1 -n2 -p100
+
+will change pid 100 to run at the realtime scheduling class, at priority 2.
+
+---> snip ionice.c tool <---
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <asm/unistd.h>
+
+extern int sys_ioprio_set(int, int, int);
+extern int sys_ioprio_get(int, int);
+
+#if defined(__i386__)
+#define __NR_ioprio_set 289
+#define __NR_ioprio_get 290
+#elif defined(__ppc__)
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#elif defined(__x86_64__)
+#define __NR_ioprio_set 251
+#define __NR_ioprio_get 252
+#elif defined(__ia64__)
+#define __NR_ioprio_set 1274
+#define __NR_ioprio_get 1275
+#else
+#error "Unsupported arch"
+#endif
+
+static inline int ioprio_set(int which, int who, int ioprio)
+{
+ return syscall(__NR_ioprio_set, which, who, ioprio);
+}
+
+static inline int ioprio_get(int which, int who)
+{
+ return syscall(__NR_ioprio_get, which, who);
+}
+
+enum {
+ IOPRIO_CLASS_NONE,
+ IOPRIO_CLASS_RT,
+ IOPRIO_CLASS_BE,
+ IOPRIO_CLASS_IDLE,
+};
+
+enum {
+ IOPRIO_WHO_PROCESS = 1,
+ IOPRIO_WHO_PGRP,
+ IOPRIO_WHO_USER,
+};
+
+#define IOPRIO_CLASS_SHIFT 13
+
+const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
+
+int main(int argc, char *argv[])
+{
+ int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
+ int c, pid = 0;
+
+ while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
+ switch (c) {
+ case 'n':
+ ioprio = strtol(optarg, NULL, 10);
+ set = 1;
+ break;
+ case 'c':
+ ioprio_class = strtol(optarg, NULL, 10);
+ set = 1;
+ break;
+ case 'p':
+ pid = strtol(optarg, NULL, 10);
+ break;
+ }
+ }
+
+ switch (ioprio_class) {
+ case IOPRIO_CLASS_NONE:
+ ioprio_class = IOPRIO_CLASS_BE;
+ break;
+ case IOPRIO_CLASS_RT:
+ case IOPRIO_CLASS_BE:
+ break;
+ case IOPRIO_CLASS_IDLE:
+ ioprio = 7;
+ break;
+ default:
+ printf("bad prio class %d\n", ioprio_class);
+ return 1;
+ }
+
+ if (!set) {
+ if (!pid && argv[optind])
+ pid = strtol(argv[optind], NULL, 10);
+
+ ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
+
+ printf("pid=%d, %d\n", pid, ioprio);
+
+ if (ioprio == -1)
+ perror("ioprio_get");
+ else {
+ ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
+ ioprio = ioprio & 0xff;
+ printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
+ }
+ } else {
+ if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
+ perror("ioprio_set");
+ return 1;
+ }
+
+ if (argv[optind])
+ execvp(argv[optind], &argv[optind]);
+ }
+
+ return 0;
+}
+
+---> snip ionice.c tool <---
+
+
+March 11 2005, Jens Axboe <jens.axboe@oracle.com>
diff --git a/Documentation/block/null_blk.txt b/Documentation/block/null_blk.txt
new file mode 100644
index 000000000..2f6c6ff71
--- /dev/null
+++ b/Documentation/block/null_blk.txt
@@ -0,0 +1,72 @@
+Null block device driver
+================================================================================
+
+I. Overview
+
+The null block device (/dev/nullb*) is used for benchmarking the various
+block-layer implementations. It emulates a block device of X gigabytes in size.
+The following instances are possible:
+
+ Single-queue block-layer
+ - Request-based.
+ - Single submission queue per device.
+ - Implements IO scheduling algorithms (CFQ, Deadline, noop).
+ Multi-queue block-layer
+ - Request-based.
+ - Configurable submission queues per device.
+ No block-layer (Known as bio-based)
+ - Bio-based. IO requests are submitted directly to the device driver.
+ - Directly accepts bio data structure and returns them.
+
+All of them have a completion queue for each core in the system.
+
+II. Module parameters applicable for all instances:
+
+queue_mode=[0-2]: Default: 2-Multi-queue
+ Selects which block-layer the module should instantiate with.
+
+ 0: Bio-based.
+ 1: Single-queue.
+ 2: Multi-queue.
+
+home_node=[0--nr_nodes]: Default: NUMA_NO_NODE
+ Selects what CPU node the data structures are allocated from.
+
+gb=[Size in GB]: Default: 250GB
+ The size of the device reported to the system.
+
+bs=[Block size (in bytes)]: Default: 512 bytes
+ The block size reported to the system.
+
+nr_devices=[Number of devices]: Default: 2
+ Number of block devices instantiated. They are instantiated as /dev/nullb0,
+ etc.
+
+irqmode=[0-2]: Default: 1-Soft-irq
+ The completion mode used for completing IOs to the block-layer.
+
+ 0: None.
+ 1: Soft-irq. Uses IPI to complete IOs across CPU nodes. Simulates the overhead
+ when IOs are issued from another CPU node than the home the device is
+ connected to.
+ 2: Timer: Waits a specific period (completion_nsec) for each IO before
+ completion.
+
+completion_nsec=[ns]: Default: 10.000ns
+ Combined with irqmode=2 (timer). The time each completion event must wait.
+
+submit_queues=[0..nr_cpus]:
+ The number of submission queues attached to the device driver. If unset, it
+ defaults to 1 on single-queue and bio-based instances. For multi-queue,
+ it is ignored when use_per_node_hctx module parameter is 1.
+
+hw_queue_depth=[0..qdepth]: Default: 64
+ The hardware queue depth of the device.
+
+III: Multi-queue specific parameters
+
+use_per_node_hctx=[0/1]: Default: 0
+ 0: The number of submit queues are set to the value of the submit_queues
+ parameter.
+ 1: The multi-queue block layer is instantiated with a hardware dispatch
+ queue for each CPU node in the system.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
new file mode 100644
index 000000000..3a29f8914
--- /dev/null
+++ b/Documentation/block/queue-sysfs.txt
@@ -0,0 +1,138 @@
+Queue sysfs files
+=================
+
+This text file will detail the queue files that are located in the sysfs tree
+for each block device. Note that stacked devices typically do not export
+any settings, since their queue merely functions are a remapping target.
+These files are the ones found in the /sys/block/xxx/queue/ directory.
+
+Files denoted with a RO postfix are readonly and the RW postfix means
+read-write.
+
+add_random (RW)
+----------------
+This file allows to turn off the disk entropy contribution. Default
+value of this file is '1'(on).
+
+discard_granularity (RO)
+-----------------------
+This shows the size of internal allocation of the device in bytes, if
+reported by the device. A value of '0' means device does not support
+the discard functionality.
+
+discard_max_bytes (RO)
+----------------------
+Devices that support discard functionality may have internal limits on
+the number of bytes that can be trimmed or unmapped in a single operation.
+The discard_max_bytes parameter is set by the device driver to the maximum
+number of bytes that can be discarded in a single operation. Discard
+requests issued to the device must not exceed this limit. A discard_max_bytes
+value of 0 means that the device does not support discard functionality.
+
+discard_zeroes_data (RO)
+------------------------
+When read, this file will show if the discarded block are zeroed by the
+device or not. If its value is '1' the blocks are zeroed otherwise not.
+
+hw_sector_size (RO)
+-------------------
+This is the hardware sector size of the device, in bytes.
+
+iostats (RW)
+-------------
+This file is used to control (on/off) the iostats accounting of the
+disk.
+
+logical_block_size (RO)
+-----------------------
+This is the logcal block size of the device, in bytes.
+
+max_hw_sectors_kb (RO)
+----------------------
+This is the maximum number of kilobytes supported in a single data transfer.
+
+max_integrity_segments (RO)
+---------------------------
+When read, this file shows the max limit of integrity segments as
+set by block layer which a hardware controller can handle.
+
+max_sectors_kb (RW)
+-------------------
+This is the maximum number of kilobytes that the block layer will allow
+for a filesystem request. Must be smaller than or equal to the maximum
+size allowed by the hardware.
+
+max_segments (RO)
+-----------------
+Maximum number of segments of the device.
+
+max_segment_size (RO)
+---------------------
+Maximum segment size of the device.
+
+minimum_io_size (RO)
+--------------------
+This is the smallest preferred IO size reported by the device.
+
+nomerges (RW)
+-------------
+This enables the user to disable the lookup logic involved with IO
+merging requests in the block layer. By default (0) all merges are
+enabled. When set to 1 only simple one-hit merges will be tried. When
+set to 2 no merge algorithms will be tried (including one-hit or more
+complex tree/hash lookups).
+
+nr_requests (RW)
+----------------
+This controls how many requests may be allocated in the block layer for
+read or write requests. Note that the total allocated number may be twice
+this amount, since it applies only to reads or writes (not the accumulated
+sum).
+
+To avoid priority inversion through request starvation, a request
+queue maintains a separate request pool per each cgroup when
+CONFIG_BLK_CGROUP is enabled, and this parameter applies to each such
+per-block-cgroup request pool. IOW, if there are N block cgroups,
+each request queue may have up to N request pools, each independently
+regulated by nr_requests.
+
+optimal_io_size (RO)
+--------------------
+This is the optimal IO size reported by the device.
+
+physical_block_size (RO)
+------------------------
+This is the physical block size of device, in bytes.
+
+read_ahead_kb (RW)
+------------------
+Maximum number of kilobytes to read-ahead for filesystems on this block
+device.
+
+rotational (RW)
+---------------
+This file is used to stat if the device is of rotational type or
+non-rotational type.
+
+rq_affinity (RW)
+----------------
+If this option is '1', the block layer will migrate request completions to the
+cpu "group" that originally submitted the request. For some workloads this
+provides a significant reduction in CPU cycles due to caching effects.
+
+For storage configurations that need to maximize distribution of completion
+processing setting this option to '2' forces the completion to run on the
+requesting cpu (bypassing the "group" aggregation logic).
+
+scheduler (RW)
+--------------
+When read, this file will display the current and available IO schedulers
+for this block device. The currently active IO scheduler will be enclosed
+in [] brackets. Writing an IO scheduler name to this file will switch
+control of this block device to that new IO scheduler. Note that writing
+an IO scheduler name to this file will attempt to load that IO scheduler
+module, if it isn't already present in the system.
+
+
+
+Jens Axboe <jens.axboe@oracle.com>, February 2009
diff --git a/Documentation/block/request.txt b/Documentation/block/request.txt
new file mode 100644
index 000000000..754e104ed
--- /dev/null
+++ b/Documentation/block/request.txt
@@ -0,0 +1,88 @@
+
+struct request documentation
+
+Jens Axboe <jens.axboe@oracle.com> 27/05/02
+
+1.0
+Index
+
+2.0 Struct request members classification
+
+ 2.1 struct request members explanation
+
+3.0
+
+
+2.0
+Short explanation of request members
+
+Classification flags:
+
+ D driver member
+ B block layer member
+ I I/O scheduler member
+
+Unless an entry contains a D classification, a device driver must not access
+this member. Some members may contain D classifications, but should only be
+access through certain macros or functions (eg ->flags).
+
+<linux/blkdev.h>
+
+2.1
+Member Flag Comment
+------ ---- -------
+
+struct list_head queuelist BI Organization on various internal
+ queues
+
+void *elevator_private I I/O scheduler private data
+
+unsigned char cmd[16] D Driver can use this for setting up
+ a cdb before execution, see
+ blk_queue_prep_rq
+
+unsigned long flags DBI Contains info about data direction,
+ request type, etc.
+
+int rq_status D Request status bits
+
+kdev_t rq_dev DBI Target device
+
+int errors DB Error counts
+
+sector_t sector DBI Target location
+
+unsigned long hard_nr_sectors B Used to keep sector sane
+
+unsigned long nr_sectors DBI Total number of sectors in request
+
+unsigned long hard_nr_sectors B Used to keep nr_sectors sane
+
+unsigned short nr_phys_segments DB Number of physical scatter gather
+ segments in a request
+
+unsigned short nr_hw_segments DB Number of hardware scatter gather
+ segments in a request
+
+unsigned int current_nr_sectors DB Number of sectors in first segment
+ of request
+
+unsigned int hard_cur_sectors B Used to keep current_nr_sectors sane
+
+int tag DB TCQ tag, if assigned
+
+void *special D Free to be used by driver
+
+char *buffer D Map of first segment, also see
+ section on bouncing SECTION
+
+struct completion *waiting D Can be used by driver to get signalled
+ on request completion
+
+struct bio *bio DBI First bio in request
+
+struct bio *biotail DBI Last bio in request
+
+struct request_queue *q DB Request queue this request belongs to
+
+struct request_list *rl B Request list this request came from
diff --git a/Documentation/block/stat.txt b/Documentation/block/stat.txt
new file mode 100644
index 000000000..0dbc946de
--- /dev/null
+++ b/Documentation/block/stat.txt
@@ -0,0 +1,82 @@
+Block layer statistics in /sys/block/<dev>/stat
+===============================================
+
+This file documents the contents of the /sys/block/<dev>/stat file.
+
+The stat file provides several statistics about the state of block
+device <dev>.
+
+Q. Why are there multiple statistics in a single file? Doesn't sysfs
+ normally contain a single value per file?
+A. By having a single file, the kernel can guarantee that the statistics
+ represent a consistent snapshot of the state of the device. If the
+ statistics were exported as multiple files containing one statistic
+ each, it would be impossible to guarantee that a set of readings
+ represent a single point in time.
+
+The stat file consists of a single line of text containing 11 decimal
+values separated by whitespace. The fields are summarized in the
+following table, and described in more detail below.
+
+Name units description
+---- ----- -----------
+read I/Os requests number of read I/Os processed
+read merges requests number of read I/Os merged with in-queue I/O
+read sectors sectors number of sectors read
+read ticks milliseconds total wait time for read requests
+write I/Os requests number of write I/Os processed
+write merges requests number of write I/Os merged with in-queue I/O
+write sectors sectors number of sectors written
+write ticks milliseconds total wait time for write requests
+in_flight requests number of I/Os currently in flight
+io_ticks milliseconds total time this block device has been active
+time_in_queue milliseconds total wait time for all requests
+
+read I/Os, write I/Os
+=====================
+
+These values increment when an I/O request completes.
+
+read merges, write merges
+=========================
+
+These values increment when an I/O request is merged with an
+already-queued I/O request.
+
+read sectors, write sectors
+===========================
+
+These values count the number of sectors read from or written to this
+block device. The "sectors" in question are the standard UNIX 512-byte
+sectors, not any device- or filesystem-specific block size. The
+counters are incremented when the I/O completes.
+
+read ticks, write ticks
+=======================
+
+These values count the number of milliseconds that I/O requests have
+waited on this block device. If there are multiple I/O requests waiting,
+these values will increase at a rate greater than 1000/second; for
+example, if 60 read requests wait for an average of 30 ms, the read_ticks
+field will increase by 60*30 = 1800.
+
+in_flight
+=========
+
+This value counts the number of I/O requests that have been issued to
+the device driver but have not yet completed. It does not include I/O
+requests that are in the queue but not yet issued to the device driver.
+
+io_ticks
+========
+
+This value counts the number of milliseconds during which the device has
+had I/O requests queued.
+
+time_in_queue
+=============
+
+This value counts the number of milliseconds that I/O requests have waited
+on this block device. If there are multiple I/O requests waiting, this
+value will increase as the product of the number of milliseconds times the
+number of requests waiting (see "read ticks" above for an example).
diff --git a/Documentation/block/switching-sched.txt b/Documentation/block/switching-sched.txt
new file mode 100644
index 000000000..3b2612e34
--- /dev/null
+++ b/Documentation/block/switching-sched.txt
@@ -0,0 +1,37 @@
+To choose IO schedulers at boot time, use the argument 'elevator=deadline'.
+'noop' and 'cfq' (the default) are also available. IO schedulers are assigned
+globally at boot time only presently.
+
+Each io queue has a set of io scheduler tunables associated with it. These
+tunables control how the io scheduler works. You can find these entries
+in:
+
+/sys/block/<device>/queue/iosched
+
+assuming that you have sysfs mounted on /sys. If you don't have sysfs mounted,
+you can do so by typing:
+
+# mount none /sys -t sysfs
+
+As of the Linux 2.6.10 kernel, it is now possible to change the
+IO scheduler for a given block device on the fly (thus making it possible,
+for instance, to set the CFQ scheduler for the system default, but
+set a specific device to use the deadline or noop schedulers - which
+can improve that device's throughput).
+
+To set a specific scheduler, simply do this:
+
+echo SCHEDNAME > /sys/block/DEV/queue/scheduler
+
+where SCHEDNAME is the name of a defined IO scheduler, and DEV is the
+device name (hda, hdb, sga, or whatever you happen to have).
+
+The list of defined schedulers can be found by simply doing
+a "cat /sys/block/DEV/queue/scheduler" - the list of valid names
+will be displayed, with the currently selected scheduler in brackets:
+
+# cat /sys/block/hda/queue/scheduler
+noop deadline [cfq]
+# echo deadline > /sys/block/hda/queue/scheduler
+# cat /sys/block/hda/queue/scheduler
+noop [deadline] cfq
diff --git a/Documentation/block/writeback_cache_control.txt b/Documentation/block/writeback_cache_control.txt
new file mode 100644
index 000000000..83407d366
--- /dev/null
+++ b/Documentation/block/writeback_cache_control.txt
@@ -0,0 +1,86 @@
+
+Explicit volatile write back cache control
+=====================================
+
+Introduction
+------------
+
+Many storage devices, especially in the consumer market, come with volatile
+write back caches. That means the devices signal I/O completion to the
+operating system before data actually has hit the non-volatile storage. This
+behavior obviously speeds up various workloads, but it means the operating
+system needs to force data out to the non-volatile storage when it performs
+a data integrity operation like fsync, sync or an unmount.
+
+The Linux block layer provides two simple mechanisms that let filesystems
+control the caching behavior of the storage device. These mechanisms are
+a forced cache flush, and the Force Unit Access (FUA) flag for requests.
+
+
+Explicit cache flushes
+----------------------
+
+The REQ_FLUSH flag can be OR ed into the r/w flags of a bio submitted from
+the filesystem and will make sure the volatile cache of the storage device
+has been flushed before the actual I/O operation is started. This explicitly
+guarantees that previously completed write requests are on non-volatile
+storage before the flagged bio starts. In addition the REQ_FLUSH flag can be
+set on an otherwise empty bio structure, which causes only an explicit cache
+flush without any dependent I/O. It is recommend to use
+the blkdev_issue_flush() helper for a pure cache flush.
+
+
+Forced Unit Access
+-----------------
+
+The REQ_FUA flag can be OR ed into the r/w flags of a bio submitted from the
+filesystem and will make sure that I/O completion for this request is only
+signaled after the data has been committed to non-volatile storage.
+
+
+Implementation details for filesystems
+--------------------------------------
+
+Filesystems can simply set the REQ_FLUSH and REQ_FUA bits and do not have to
+worry if the underlying devices need any explicit cache flushing and how
+the Forced Unit Access is implemented. The REQ_FLUSH and REQ_FUA flags
+may both be set on a single bio.
+
+
+Implementation details for make_request_fn based block drivers
+--------------------------------------------------------------
+
+These drivers will always see the REQ_FLUSH and REQ_FUA bits as they sit
+directly below the submit_bio interface. For remapping drivers the REQ_FUA
+bits need to be propagated to underlying devices, and a global flush needs
+to be implemented for bios with the REQ_FLUSH bit set. For real device
+drivers that do not have a volatile cache the REQ_FLUSH and REQ_FUA bits
+on non-empty bios can simply be ignored, and REQ_FLUSH requests without
+data can be completed successfully without doing any work. Drivers for
+devices with volatile caches need to implement the support for these
+flags themselves without any help from the block layer.
+
+
+Implementation details for request_fn based block drivers
+--------------------------------------------------------------
+
+For devices that do not support volatile write caches there is no driver
+support required, the block layer completes empty REQ_FLUSH requests before
+entering the driver and strips off the REQ_FLUSH and REQ_FUA bits from
+requests that have a payload. For devices with volatile write caches the
+driver needs to tell the block layer that it supports flushing caches by
+doing:
+
+ blk_queue_flush(sdkp->disk->queue, REQ_FLUSH);
+
+and handle empty REQ_FLUSH requests in its prep_fn/request_fn. Note that
+REQ_FLUSH requests with a payload are automatically turned into a sequence
+of an empty REQ_FLUSH request followed by the actual write by the block
+layer. For devices that also support the FUA bit the block layer needs
+to be told to pass through the REQ_FUA bit using:
+
+ blk_queue_flush(sdkp->disk->queue, REQ_FLUSH | REQ_FUA);
+
+and the driver must handle write requests that have the REQ_FUA bit set
+in prep_fn/request_fn. If the FUA bit is not natively supported the block
+layer turns it into an empty REQ_FLUSH request after the actual write.
diff --git a/Documentation/blockdev/00-INDEX b/Documentation/blockdev/00-INDEX
new file mode 100644
index 000000000..c08df56dd
--- /dev/null
+++ b/Documentation/blockdev/00-INDEX
@@ -0,0 +1,18 @@
+00-INDEX
+ - this file
+README.DAC960
+ - info on Mylex DAC960/DAC1100 PCI RAID Controller Driver for Linux.
+cciss.txt
+ - info, major/minor #'s for Compaq's SMART Array Controllers.
+cpqarray.txt
+ - info on using Compaq's SMART2 Intelligent Disk Array Controllers.
+floppy.txt
+ - notes and driver options for the floppy disk driver.
+mflash.txt
+ - info on mGine m(g)flash driver for linux.
+nbd.txt
+ - info on a TCP implementation of a network block device.
+paride.txt
+ - information about the parallel port IDE subsystem.
+ramdisk.txt
+ - short guide on how to set up and use the RAM disk.
diff --git a/Documentation/blockdev/README.DAC960 b/Documentation/blockdev/README.DAC960
new file mode 100644
index 000000000..bd85fb9dc
--- /dev/null
+++ b/Documentation/blockdev/README.DAC960
@@ -0,0 +1,756 @@
+ Linux Driver for Mylex DAC960/AcceleRAID/eXtremeRAID PCI RAID Controllers
+
+ Version 2.2.11 for Linux 2.2.19
+ Version 2.4.11 for Linux 2.4.12
+
+ PRODUCTION RELEASE
+
+ 11 October 2001
+
+ Leonard N. Zubkoff
+ Dandelion Digital
+ lnz@dandelion.com
+
+ Copyright 1998-2001 by Leonard N. Zubkoff <lnz@dandelion.com>
+
+
+ INTRODUCTION
+
+Mylex, Inc. designs and manufactures a variety of high performance PCI RAID
+controllers. Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont,
+California 94555, USA and can be reached at 510.796.6100 or on the World Wide
+Web at http://www.mylex.com. Mylex Technical Support can be reached by
+electronic mail at mylexsup@us.ibm.com, by voice at 510.608.2400, or by FAX at
+510.745.7715. Contact information for offices in Europe and Japan is available
+on their Web site.
+
+The latest information on Linux support for DAC960 PCI RAID Controllers, as
+well as the most recent release of this driver, will always be available from
+my Linux Home Page at URL "http://www.dandelion.com/Linux/". The Linux DAC960
+driver supports all current Mylex PCI RAID controllers including the new
+eXtremeRAID 2000/3000 and AcceleRAID 352/170/160 models which have an entirely
+new firmware interface from the older eXtremeRAID 1100, AcceleRAID 150/200/250,
+and DAC960PJ/PG/PU/PD/PL. See below for a complete controller list as well as
+minimum firmware version requirements. For simplicity, in most places this
+documentation refers to DAC960 generically rather than explicitly listing all
+the supported models.
+
+Driver bug reports should be sent via electronic mail to "lnz@dandelion.com".
+Please include with the bug report the complete configuration messages reported
+by the driver at startup, along with any subsequent system messages relevant to
+the controller's operation, and a detailed description of your system's
+hardware configuration. Driver bugs are actually quite rare; if you encounter
+problems with disks being marked offline, for example, please contact Mylex
+Technical Support as the problem is related to the hardware configuration
+rather than the Linux driver.
+
+Please consult the RAID controller documentation for detailed information
+regarding installation and configuration of the controllers. This document
+primarily provides information specific to the Linux support.
+
+
+ DRIVER FEATURES
+
+The DAC960 RAID controllers are supported solely as high performance RAID
+controllers, not as interfaces to arbitrary SCSI devices. The Linux DAC960
+driver operates at the block device level, the same level as the SCSI and IDE
+drivers. Unlike other RAID controllers currently supported on Linux, the
+DAC960 driver is not dependent on the SCSI subsystem, and hence avoids all the
+complexity and unnecessary code that would be associated with an implementation
+as a SCSI driver. The DAC960 driver is designed for as high a performance as
+possible with no compromises or extra code for compatibility with lower
+performance devices. The DAC960 driver includes extensive error logging and
+online configuration management capabilities. Except for initial configuration
+of the controller and adding new disk drives, most everything can be handled
+from Linux while the system is operational.
+
+The DAC960 driver is architected to support up to 8 controllers per system.
+Each DAC960 parallel SCSI controller can support up to 15 disk drives per
+channel, for a maximum of 60 drives on a four channel controller; the fibre
+channel eXtremeRAID 3000 controller supports up to 125 disk drives per loop for
+a total of 250 drives. The drives installed on a controller are divided into
+one or more "Drive Groups", and then each Drive Group is subdivided further
+into 1 to 32 "Logical Drives". Each Logical Drive has a specific RAID Level
+and caching policy associated with it, and it appears to Linux as a single
+block device. Logical Drives are further subdivided into up to 7 partitions
+through the normal Linux and PC disk partitioning schemes. Logical Drives are
+also known as "System Drives", and Drive Groups are also called "Packs". Both
+terms are in use in the Mylex documentation; I have chosen to standardize on
+the more generic "Logical Drive" and "Drive Group".
+
+DAC960 RAID disk devices are named in the style of the obsolete Device File
+System (DEVFS). The device corresponding to Logical Drive D on Controller C
+is referred to as /dev/rd/cCdD, and the partitions are called /dev/rd/cCdDp1
+through /dev/rd/cCdDp7. For example, partition 3 of Logical Drive 5 on
+Controller 2 is referred to as /dev/rd/c2d5p3. Note that unlike with SCSI
+disks the device names will not change in the event of a disk drive failure.
+The DAC960 driver is assigned major numbers 48 - 55 with one major number per
+controller. The 8 bits of minor number are divided into 5 bits for the Logical
+Drive and 3 bits for the partition.
+
+
+ SUPPORTED DAC960/AcceleRAID/eXtremeRAID PCI RAID CONTROLLERS
+
+The following list comprises the supported DAC960, AcceleRAID, and eXtremeRAID
+PCI RAID Controllers as of the date of this document. It is recommended that
+anyone purchasing a Mylex PCI RAID Controller not in the following table
+contact the author beforehand to verify that it is or will be supported.
+
+eXtremeRAID 3000
+ 1 Wide Ultra-2/LVD SCSI channel
+ 2 External Fibre FC-AL channels
+ 233MHz StrongARM SA 110 Processor
+ 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
+ 32MB/64MB ECC SDRAM Memory
+
+eXtremeRAID 2000
+ 4 Wide Ultra-160 LVD SCSI channels
+ 233MHz StrongARM SA 110 Processor
+ 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
+ 32MB/64MB ECC SDRAM Memory
+
+AcceleRAID 352
+ 2 Wide Ultra-160 LVD SCSI channels
+ 100MHz Intel i960RN RISC Processor
+ 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
+ 32MB/64MB ECC SDRAM Memory
+
+AcceleRAID 170
+ 1 Wide Ultra-160 LVD SCSI channel
+ 100MHz Intel i960RM RISC Processor
+ 16MB/32MB/64MB ECC SDRAM Memory
+
+AcceleRAID 160 (AcceleRAID 170LP)
+ 1 Wide Ultra-160 LVD SCSI channel
+ 100MHz Intel i960RS RISC Processor
+ Built in 16M ECC SDRAM Memory
+ PCI Low Profile Form Factor - fit for 2U height
+
+eXtremeRAID 1100 (DAC1164P)
+ 3 Wide Ultra-2/LVD SCSI channels
+ 233MHz StrongARM SA 110 Processor
+ 64 Bit 33MHz PCI (backward compatible with 32 Bit PCI slots)
+ 16MB/32MB/64MB Parity SDRAM Memory with Battery Backup
+
+AcceleRAID 250 (DAC960PTL1)
+ Uses onboard Symbios SCSI chips on certain motherboards
+ Also includes one onboard Wide Ultra-2/LVD SCSI Channel
+ 66MHz Intel i960RD RISC Processor
+ 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
+
+AcceleRAID 200 (DAC960PTL0)
+ Uses onboard Symbios SCSI chips on certain motherboards
+ Includes no onboard SCSI Channels
+ 66MHz Intel i960RD RISC Processor
+ 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
+
+AcceleRAID 150 (DAC960PRL)
+ Uses onboard Symbios SCSI chips on certain motherboards
+ Also includes one onboard Wide Ultra-2/LVD SCSI Channel
+ 33MHz Intel i960RP RISC Processor
+ 4MB Parity EDO Memory
+
+DAC960PJ 1/2/3 Wide Ultra SCSI-3 Channels
+ 66MHz Intel i960RD RISC Processor
+ 4MB/8MB/16MB/32MB/64MB/128MB ECC EDO Memory
+
+DAC960PG 1/2/3 Wide Ultra SCSI-3 Channels
+ 33MHz Intel i960RP RISC Processor
+ 4MB/8MB ECC EDO Memory
+
+DAC960PU 1/2/3 Wide Ultra SCSI-3 Channels
+ Intel i960CF RISC Processor
+ 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory
+
+DAC960PD 1/2/3 Wide Fast SCSI-2 Channels
+ Intel i960CF RISC Processor
+ 4MB/8MB EDRAM or 2MB/4MB/8MB/16MB/32MB DRAM Memory
+
+DAC960PL 1/2/3 Wide Fast SCSI-2 Channels
+ Intel i960 RISC Processor
+ 2MB/4MB/8MB/16MB/32MB DRAM Memory
+
+DAC960P 1/2/3 Wide Fast SCSI-2 Channels
+ Intel i960 RISC Processor
+ 2MB/4MB/8MB/16MB/32MB DRAM Memory
+
+For the eXtremeRAID 2000/3000 and AcceleRAID 352/170/160, firmware version
+6.00-01 or above is required.
+
+For the eXtremeRAID 1100, firmware version 5.06-0-52 or above is required.
+
+For the AcceleRAID 250, 200, and 150, firmware version 4.06-0-57 or above is
+required.
+
+For the DAC960PJ and DAC960PG, firmware version 4.06-0-00 or above is required.
+
+For the DAC960PU, DAC960PD, DAC960PL, and DAC960P, either firmware version
+3.51-0-04 or above is required (for dual Flash ROM controllers), or firmware
+version 2.73-0-00 or above is required (for single Flash ROM controllers)
+
+Please note that not all SCSI disk drives are suitable for use with DAC960
+controllers, and only particular firmware versions of any given model may
+actually function correctly. Similarly, not all motherboards have a BIOS that
+properly initializes the AcceleRAID 250, AcceleRAID 200, AcceleRAID 150,
+DAC960PJ, and DAC960PG because the Intel i960RD/RP is a multi-function device.
+If in doubt, contact Mylex RAID Technical Support (mylexsup@us.ibm.com) to
+verify compatibility. Mylex makes available a hard disk compatibility list at
+http://www.mylex.com/support/hdcomp/hd-lists.html.
+
+
+ DRIVER INSTALLATION
+
+This distribution was prepared for Linux kernel version 2.2.19 or 2.4.12.
+
+To install the DAC960 RAID driver, you may use the following commands,
+replacing "/usr/src" with wherever you keep your Linux kernel source tree:
+
+ cd /usr/src
+ tar -xvzf DAC960-2.2.11.tar.gz (or DAC960-2.4.11.tar.gz)
+ mv README.DAC960 linux/Documentation
+ mv DAC960.[ch] linux/drivers/block
+ patch -p0 < DAC960.patch (if DAC960.patch is included)
+ cd linux
+ make config
+ make bzImage (or zImage)
+
+Then install "arch/x86/boot/bzImage" or "arch/x86/boot/zImage" as your
+standard kernel, run lilo if appropriate, and reboot.
+
+To create the necessary devices in /dev, the "make_rd" script included in
+"DAC960-Utilities.tar.gz" from http://www.dandelion.com/Linux/ may be used.
+LILO 21 and FDISK v2.9 include DAC960 support; also included in this archive
+are patches to LILO 20 and FDISK v2.8 that add DAC960 support, along with
+statically linked executables of LILO and FDISK. This modified version of LILO
+will allow booting from a DAC960 controller and/or mounting the root file
+system from a DAC960.
+
+Red Hat Linux 6.0 and SuSE Linux 6.1 include support for Mylex PCI RAID
+controllers. Installing directly onto a DAC960 may be problematic from other
+Linux distributions until their installation utilities are updated.
+
+
+ INSTALLATION NOTES
+
+Before installing Linux or adding DAC960 logical drives to an existing Linux
+system, the controller must first be configured to provide one or more logical
+drives using the BIOS Configuration Utility or DACCF. Please note that since
+there are only at most 6 usable partitions on each logical drive, systems
+requiring more partitions should subdivide a drive group into multiple logical
+drives, each of which can have up to 6 usable partitions. Also, note that with
+large disk arrays it is advisable to enable the 8GB BIOS Geometry (255/63)
+rather than accepting the default 2GB BIOS Geometry (128/32); failing to so do
+will cause the logical drive geometry to have more than 65535 cylinders which
+will make it impossible for FDISK to be used properly. The 8GB BIOS Geometry
+can be enabled by configuring the DAC960 BIOS, which is accessible via Alt-M
+during the BIOS initialization sequence.
+
+For maximum performance and the most efficient E2FSCK performance, it is
+recommended that EXT2 file systems be built with a 4KB block size and 16 block
+stride to match the DAC960 controller's 64KB default stripe size. The command
+"mke2fs -b 4096 -R stride=16 <device>" is appropriate. Unless there will be a
+large number of small files on the file systems, it is also beneficial to add
+the "-i 16384" option to increase the bytes per inode parameter thereby
+reducing the file system metadata. Finally, on systems that will only be run
+with Linux 2.2 or later kernels it is beneficial to enable sparse superblocks
+with the "-s 1" option.
+
+
+ DAC960 ANNOUNCEMENTS MAILING LIST
+
+The DAC960 Announcements Mailing List provides a forum for informing Linux
+users of new driver releases and other announcements regarding Linux support
+for DAC960 PCI RAID Controllers. To join the mailing list, send a message to
+"dac960-announce-request@dandelion.com" with the line "subscribe" in the
+message body.
+
+
+ CONTROLLER CONFIGURATION AND STATUS MONITORING
+
+The DAC960 RAID controllers running firmware 4.06 or above include a Background
+Initialization facility so that system downtime is minimized both for initial
+installation and subsequent configuration of additional storage. The BIOS
+Configuration Utility (accessible via Alt-R during the BIOS initialization
+sequence) is used to quickly configure the controller, and then the logical
+drives that have been created are available for immediate use even while they
+are still being initialized by the controller. The primary need for online
+configuration and status monitoring is then to avoid system downtime when disk
+drives fail and must be replaced. Mylex's online monitoring and configuration
+utilities are being ported to Linux and will become available at some point in
+the future. Note that with a SAF-TE (SCSI Accessed Fault-Tolerant Enclosure)
+enclosure, the controller is able to rebuild failed drives automatically as
+soon as a drive replacement is made available.
+
+The primary interfaces for controller configuration and status monitoring are
+special files created in the /proc/rd/... hierarchy along with the normal
+system console logging mechanism. Whenever the system is operating, the DAC960
+driver queries each controller for status information every 10 seconds, and
+checks for additional conditions every 60 seconds. The initial status of each
+controller is always available for controller N in /proc/rd/cN/initial_status,
+and the current status as of the last status monitoring query is available in
+/proc/rd/cN/current_status. In addition, status changes are also logged by the
+driver to the system console and will appear in the log files maintained by
+syslog. The progress of asynchronous rebuild or consistency check operations
+is also available in /proc/rd/cN/current_status, and progress messages are
+logged to the system console at most every 60 seconds.
+
+Starting with the 2.2.3/2.0.3 versions of the driver, the status information
+available in /proc/rd/cN/initial_status and /proc/rd/cN/current_status has been
+augmented to include the vendor, model, revision, and serial number (if
+available) for each physical device found connected to the controller:
+
+***** DAC960 RAID Driver Version 2.2.3 of 19 August 1999 *****
+Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
+Configuring Mylex DAC960PRL PCI RAID Controller
+ Firmware Version: 4.07-0-07, Channels: 1, Memory Size: 16MB
+ PCI Bus: 1, Device: 4, Function: 1, I/O Address: Unassigned
+ PCI Address: 0xFE300000 mapped at 0xA0800000, IRQ Channel: 21
+ Controller Queue Depth: 128, Maximum Blocks per Command: 128
+ Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
+ Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
+ SAF-TE Enclosure Management Enabled
+ Physical Devices:
+ 0:0 Vendor: IBM Model: DRVS09D Revision: 0270
+ Serial Number: 68016775HA
+ Disk Status: Online, 17928192 blocks
+ 0:1 Vendor: IBM Model: DRVS09D Revision: 0270
+ Serial Number: 68004E53HA
+ Disk Status: Online, 17928192 blocks
+ 0:2 Vendor: IBM Model: DRVS09D Revision: 0270
+ Serial Number: 13013935HA
+ Disk Status: Online, 17928192 blocks
+ 0:3 Vendor: IBM Model: DRVS09D Revision: 0270
+ Serial Number: 13016897HA
+ Disk Status: Online, 17928192 blocks
+ 0:4 Vendor: IBM Model: DRVS09D Revision: 0270
+ Serial Number: 68019905HA
+ Disk Status: Online, 17928192 blocks
+ 0:5 Vendor: IBM Model: DRVS09D Revision: 0270
+ Serial Number: 68012753HA
+ Disk Status: Online, 17928192 blocks
+ 0:6 Vendor: ESG-SHV Model: SCA HSBP M6 Revision: 0.61
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Online, 89640960 blocks, Write Thru
+ No Rebuild or Consistency Check in Progress
+
+To simplify the monitoring process for custom software, the special file
+/proc/rd/status returns "OK" when all DAC960 controllers in the system are
+operating normally and no failures have occurred, or "ALERT" if any logical
+drives are offline or critical or any non-standby physical drives are dead.
+
+Configuration commands for controller N are available via the special file
+/proc/rd/cN/user_command. A human readable command can be written to this
+special file to initiate a configuration operation, and the results of the
+operation can then be read back from the special file in addition to being
+logged to the system console. The shell command sequence
+
+ echo "<configuration-command>" > /proc/rd/c0/user_command
+ cat /proc/rd/c0/user_command
+
+is typically used to execute configuration commands. The configuration
+commands are:
+
+ flush-cache
+
+ The "flush-cache" command flushes the controller's cache. The system
+ automatically flushes the cache at shutdown or if the driver module is
+ unloaded, so this command is only needed to be certain a write back cache
+ is flushed to disk before the system is powered off by a command to a UPS.
+ Note that the flush-cache command also stops an asynchronous rebuild or
+ consistency check, so it should not be used except when the system is being
+ halted.
+
+ kill <channel>:<target-id>
+
+ The "kill" command marks the physical drive <channel>:<target-id> as DEAD.
+ This command is provided primarily for testing, and should not be used
+ during normal system operation.
+
+ make-online <channel>:<target-id>
+
+ The "make-online" command changes the physical drive <channel>:<target-id>
+ from status DEAD to status ONLINE. In cases where multiple physical drives
+ have been killed simultaneously, this command may be used to bring all but
+ one of them back online, after which a rebuild to the final drive is
+ necessary.
+
+ Warning: make-online should only be used on a dead physical drive that is
+ an active part of a drive group, never on a standby drive. The command
+ should never be used on a dead drive that is part of a critical logical
+ drive; rebuild should be used if only a single drive is dead.
+
+ make-standby <channel>:<target-id>
+
+ The "make-standby" command changes physical drive <channel>:<target-id>
+ from status DEAD to status STANDBY. It should only be used in cases where
+ a dead drive was replaced after an automatic rebuild was performed onto a
+ standby drive. It cannot be used to add a standby drive to the controller
+ configuration if one was not created initially; the BIOS Configuration
+ Utility must be used for that currently.
+
+ rebuild <channel>:<target-id>
+
+ The "rebuild" command initiates an asynchronous rebuild onto physical drive
+ <channel>:<target-id>. It should only be used when a dead drive has been
+ replaced.
+
+ check-consistency <logical-drive-number>
+
+ The "check-consistency" command initiates an asynchronous consistency check
+ of <logical-drive-number> with automatic restoration. It can be used
+ whenever it is desired to verify the consistency of the redundancy
+ information.
+
+ cancel-rebuild
+ cancel-consistency-check
+
+ The "cancel-rebuild" and "cancel-consistency-check" commands cancel any
+ rebuild or consistency check operations previously initiated.
+
+
+ EXAMPLE I - DRIVE FAILURE WITHOUT A STANDBY DRIVE
+
+The following annotated logs demonstrate the controller configuration and and
+online status monitoring capabilities of the Linux DAC960 Driver. The test
+configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a
+DAC960PJ controller. The physical drives are configured into a single drive
+group without a standby drive, and the drive group has been configured into two
+logical drives, one RAID-5 and one RAID-6. Note that these logs are from an
+earlier version of the driver and the messages have changed somewhat with newer
+releases, but the functionality remains similar. First, here is the current
+status of the RAID configuration:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
+Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
+Configuring Mylex DAC960PJ PCI RAID Controller
+ Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
+ PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
+ PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
+ Controller Queue Depth: 128, Maximum Blocks per Command: 128
+ Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
+ Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Online, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru
+ No Rebuild or Consistency Check in Progress
+
+gwynedd:/u/lnz# cat /proc/rd/status
+OK
+
+The above messages indicate that everything is healthy, and /proc/rd/status
+returns "OK" indicating that there are no problems with any DAC960 controller
+in the system. For demonstration purposes, while I/O is active Physical Drive
+1:1 is now disconnected, simulating a drive failure. The failure is noted by
+the driver within 10 seconds of the controller's having detected it, and the
+driver logs the following console status messages indicating that Logical
+Drives 0 and 1 are now CRITICAL as a result of Physical Drive 1:1 being DEAD:
+
+DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
+DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
+DAC960#0: Physical Drive 1:1 killed because of timeout on SCSI command
+DAC960#0: Physical Drive 1:1 is now DEAD
+DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL
+DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL
+
+The Sense Keys logged here are just Check Condition / Unit Attention conditions
+arising from a SCSI bus reset that is forced by the controller during its error
+recovery procedures. Concurrently with the above, the driver status available
+from /proc/rd also reflects the drive failure. The status message in
+/proc/rd/status has changed from "OK" to "ALERT":
+
+gwynedd:/u/lnz# cat /proc/rd/status
+ALERT
+
+and /proc/rd/c0/current_status has been updated:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Dead, 2201600 blocks
+ 1:2 - Disk: Online, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
+ No Rebuild or Consistency Check in Progress
+
+Since there are no standby drives configured, the system can continue to access
+the logical drives in a performance degraded mode until the failed drive is
+replaced and a rebuild operation completed to restore the redundancy of the
+logical drives. Once Physical Drive 1:1 is replaced with a properly
+functioning drive, or if the physical drive was killed without having failed
+(e.g., due to electrical problems on the SCSI bus), the user can instruct the
+controller to initiate a rebuild operation onto the newly replaced drive:
+
+gwynedd:/u/lnz# echo "rebuild 1:1" > /proc/rd/c0/user_command
+gwynedd:/u/lnz# cat /proc/rd/c0/user_command
+Rebuild of Physical Drive 1:1 Initiated
+
+The echo command instructs the controller to initiate an asynchronous rebuild
+operation onto Physical Drive 1:1, and the status message that results from the
+operation is then available for reading from /proc/rd/c0/user_command, as well
+as being logged to the console by the driver.
+
+Within 10 seconds of this command the driver logs the initiation of the
+asynchronous rebuild operation:
+
+DAC960#0: Rebuild of Physical Drive 1:1 Initiated
+DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01
+DAC960#0: Physical Drive 1:1 is now WRITE-ONLY
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 1% completed
+
+and /proc/rd/c0/current_status is updated:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Write-Only, 2201600 blocks
+ 1:2 - Disk: Online, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
+ Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 6% completed
+
+As the rebuild progresses, the current status in /proc/rd/c0/current_status is
+updated every 10 seconds:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Write-Only, 2201600 blocks
+ 1:2 - Disk: Online, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Critical, 5498880 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Critical, 3305472 blocks, Write Thru
+ Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 15% completed
+
+and every minute a progress message is logged to the console by the driver:
+
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 32% completed
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 63% completed
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 94% completed
+DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 94% completed
+
+Finally, the rebuild completes successfully. The driver logs the status of the
+logical and physical drives and the rebuild completion:
+
+DAC960#0: Rebuild Completed Successfully
+DAC960#0: Physical Drive 1:1 is now ONLINE
+DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE
+DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE
+
+/proc/rd/c0/current_status is updated:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Online, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Online, 5498880 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Online, 3305472 blocks, Write Thru
+ Rebuild Completed Successfully
+
+and /proc/rd/status indicates that everything is healthy once again:
+
+gwynedd:/u/lnz# cat /proc/rd/status
+OK
+
+
+ EXAMPLE II - DRIVE FAILURE WITH A STANDBY DRIVE
+
+The following annotated logs demonstrate the controller configuration and and
+online status monitoring capabilities of the Linux DAC960 Driver. The test
+configuration comprises 6 1GB Quantum Atlas I disk drives on two channels of a
+DAC960PJ controller. The physical drives are configured into a single drive
+group with a standby drive, and the drive group has been configured into two
+logical drives, one RAID-5 and one RAID-6. Note that these logs are from an
+earlier version of the driver and the messages have changed somewhat with newer
+releases, but the functionality remains similar. First, here is the current
+status of the RAID configuration:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
+Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
+Configuring Mylex DAC960PJ PCI RAID Controller
+ Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
+ PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
+ PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
+ Controller Queue Depth: 128, Maximum Blocks per Command: 128
+ Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
+ Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Online, 2201600 blocks
+ 1:3 - Disk: Standby, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
+ No Rebuild or Consistency Check in Progress
+
+gwynedd:/u/lnz# cat /proc/rd/status
+OK
+
+The above messages indicate that everything is healthy, and /proc/rd/status
+returns "OK" indicating that there are no problems with any DAC960 controller
+in the system. For demonstration purposes, while I/O is active Physical Drive
+1:2 is now disconnected, simulating a drive failure. The failure is noted by
+the driver within 10 seconds of the controller's having detected it, and the
+driver logs the following console status messages:
+
+DAC960#0: Physical Drive 1:1 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
+DAC960#0: Physical Drive 1:3 Error Log: Sense Key = 6, ASC = 29, ASCQ = 02
+DAC960#0: Physical Drive 1:2 killed because of timeout on SCSI command
+DAC960#0: Physical Drive 1:2 is now DEAD
+DAC960#0: Physical Drive 1:2 killed because it was removed
+DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now CRITICAL
+DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now CRITICAL
+
+Since a standby drive is configured, the controller automatically begins
+rebuilding onto the standby drive:
+
+DAC960#0: Physical Drive 1:3 is now WRITE-ONLY
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed
+
+Concurrently with the above, the driver status available from /proc/rd also
+reflects the drive failure and automatic rebuild. The status message in
+/proc/rd/status has changed from "OK" to "ALERT":
+
+gwynedd:/u/lnz# cat /proc/rd/status
+ALERT
+
+and /proc/rd/c0/current_status has been updated:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Dead, 2201600 blocks
+ 1:3 - Disk: Write-Only, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru
+ Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 4% completed
+
+As the rebuild progresses, the current status in /proc/rd/c0/current_status is
+updated every 10 seconds:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Dead, 2201600 blocks
+ 1:3 - Disk: Write-Only, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Critical, 4399104 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Critical, 2754560 blocks, Write Thru
+ Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed
+
+and every minute a progress message is logged on the console by the driver:
+
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 40% completed
+DAC960#0: Rebuild in Progress: Logical Drive 0 (/dev/rd/c0d0) 76% completed
+DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 66% completed
+DAC960#0: Rebuild in Progress: Logical Drive 1 (/dev/rd/c0d1) 84% completed
+
+Finally, the rebuild completes successfully. The driver logs the status of the
+logical and physical drives and the rebuild completion:
+
+DAC960#0: Rebuild Completed Successfully
+DAC960#0: Physical Drive 1:3 is now ONLINE
+DAC960#0: Logical Drive 0 (/dev/rd/c0d0) is now ONLINE
+DAC960#0: Logical Drive 1 (/dev/rd/c0d1) is now ONLINE
+
+/proc/rd/c0/current_status is updated:
+
+***** DAC960 RAID Driver Version 2.0.0 of 23 March 1999 *****
+Copyright 1998-1999 by Leonard N. Zubkoff <lnz@dandelion.com>
+Configuring Mylex DAC960PJ PCI RAID Controller
+ Firmware Version: 4.06-0-08, Channels: 3, Memory Size: 8MB
+ PCI Bus: 0, Device: 19, Function: 1, I/O Address: Unassigned
+ PCI Address: 0xFD4FC000 mapped at 0x8807000, IRQ Channel: 9
+ Controller Queue Depth: 128, Maximum Blocks per Command: 128
+ Driver Queue Depth: 127, Maximum Scatter/Gather Segments: 33
+ Stripe Size: 64KB, Segment Size: 8KB, BIOS Geometry: 255/63
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Dead, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
+ Rebuild Completed Successfully
+
+and /proc/rd/status indicates that everything is healthy once again:
+
+gwynedd:/u/lnz# cat /proc/rd/status
+OK
+
+Note that the absence of a viable standby drive does not create an "ALERT"
+status. Once dead Physical Drive 1:2 has been replaced, the controller must be
+told that this has occurred and that the newly replaced drive should become the
+new standby drive:
+
+gwynedd:/u/lnz# echo "make-standby 1:2" > /proc/rd/c0/user_command
+gwynedd:/u/lnz# cat /proc/rd/c0/user_command
+Make Standby of Physical Drive 1:2 Succeeded
+
+The echo command instructs the controller to make Physical Drive 1:2 into a
+standby drive, and the status message that results from the operation is then
+available for reading from /proc/rd/c0/user_command, as well as being logged to
+the console by the driver. Within 60 seconds of this command the driver logs:
+
+DAC960#0: Physical Drive 1:2 Error Log: Sense Key = 6, ASC = 29, ASCQ = 01
+DAC960#0: Physical Drive 1:2 is now STANDBY
+DAC960#0: Make Standby of Physical Drive 1:2 Succeeded
+
+and /proc/rd/c0/current_status is updated:
+
+gwynedd:/u/lnz# cat /proc/rd/c0/current_status
+ ...
+ Physical Devices:
+ 0:1 - Disk: Online, 2201600 blocks
+ 0:2 - Disk: Online, 2201600 blocks
+ 0:3 - Disk: Online, 2201600 blocks
+ 1:1 - Disk: Online, 2201600 blocks
+ 1:2 - Disk: Standby, 2201600 blocks
+ 1:3 - Disk: Online, 2201600 blocks
+ Logical Drives:
+ /dev/rd/c0d0: RAID-5, Online, 4399104 blocks, Write Thru
+ /dev/rd/c0d1: RAID-6, Online, 2754560 blocks, Write Thru
+ Rebuild Completed Successfully
diff --git a/Documentation/blockdev/cciss.txt b/Documentation/blockdev/cciss.txt
new file mode 100644
index 000000000..b79d0a13e
--- /dev/null
+++ b/Documentation/blockdev/cciss.txt
@@ -0,0 +1,194 @@
+This driver is for Compaq's SMART Array Controllers.
+
+Supported Cards:
+----------------
+
+This driver is known to work with the following cards:
+
+ * SA 5300
+ * SA 5i
+ * SA 532
+ * SA 5312
+ * SA 641
+ * SA 642
+ * SA 6400
+ * SA 6400 U320 Expansion Module
+ * SA 6i
+ * SA P600
+ * SA P800
+ * SA E400
+ * SA P400i
+ * SA E200
+ * SA E200i
+ * SA E500
+ * SA P700m
+ * SA P212
+ * SA P410
+ * SA P410i
+ * SA P411
+ * SA P812
+ * SA P712m
+ * SA P711m
+
+Detecting drive failures:
+-------------------------
+
+To get the status of logical volumes and to detect physical drive
+failures, you can use the cciss_vol_status program found here:
+http://cciss.sourceforge.net/#cciss_utils
+
+Device Naming:
+--------------
+
+If nodes are not already created in the /dev/cciss directory, run as root:
+
+# cd /dev
+# ./MAKEDEV cciss
+
+You need some entries in /dev for the cciss device. The MAKEDEV script
+can make device nodes for you automatically. Currently the device setup
+is as follows:
+
+Major numbers:
+ 104 cciss0
+ 105 cciss1
+ 106 cciss2
+ 105 cciss3
+ 108 cciss4
+ 109 cciss5
+ 110 cciss6
+ 111 cciss7
+
+Minor numbers:
+ b7 b6 b5 b4 b3 b2 b1 b0
+ |----+----| |----+----|
+ | |
+ | +-------- Partition ID (0=wholedev, 1-15 partition)
+ |
+ +-------------------- Logical Volume number
+
+The device naming scheme is:
+/dev/cciss/c0d0 Controller 0, disk 0, whole device
+/dev/cciss/c0d0p1 Controller 0, disk 0, partition 1
+/dev/cciss/c0d0p2 Controller 0, disk 0, partition 2
+/dev/cciss/c0d0p3 Controller 0, disk 0, partition 3
+
+/dev/cciss/c1d1 Controller 1, disk 1, whole device
+/dev/cciss/c1d1p1 Controller 1, disk 1, partition 1
+/dev/cciss/c1d1p2 Controller 1, disk 1, partition 2
+/dev/cciss/c1d1p3 Controller 1, disk 1, partition 3
+
+CCISS simple mode support
+-------------------------
+
+The "cciss_simple_mode=1" boot parameter may be used to prevent the driver
+from putting the controller into "performant" mode. The difference is that
+with simple mode, each command completion requires an interrupt, while with
+"performant mode" (the default, and ordinarily better performing) it is
+possible to have multiple command completions indicated by a single
+interrupt.
+
+SCSI tape drive and medium changer support
+------------------------------------------
+
+SCSI sequential access devices and medium changer devices are supported and
+appropriate device nodes are automatically created. (e.g.
+/dev/st0, /dev/st1, etc. See the "st" man page for more details.)
+You must enable "SCSI tape drive support for Smart Array 5xxx" and
+"SCSI support" in your kernel configuration to be able to use SCSI
+tape drives with your Smart Array 5xxx controller.
+
+Additionally, note that the driver will engage the SCSI core at init
+time if any tape drives or medium changers are detected. The driver may
+also be directed to dynamically engage the SCSI core via the /proc filesystem
+entry which the "block" side of the driver creates as
+/proc/driver/cciss/cciss* at runtime. This is best done via a script.
+
+For example:
+
+ for x in /proc/driver/cciss/cciss[0-9]*
+ do
+ echo "engage scsi" > $x
+ done
+
+Once the SCSI core is engaged by the driver, it cannot be disengaged
+(except by unloading the driver, if it happens to be linked as a module.)
+
+Note also that if no sequential access devices or medium changers are
+detected, the SCSI core will not be engaged by the action of the above
+script.
+
+Hot plug support for SCSI tape drives
+-------------------------------------
+
+Hot plugging of SCSI tape drives is supported, with some caveats.
+The cciss driver must be informed that changes to the SCSI bus
+have been made. This may be done via the /proc filesystem.
+For example:
+
+ echo "rescan" > /proc/scsi/cciss0/1
+
+This causes the driver to query the adapter about changes to the
+physical SCSI buses and/or fibre channel arbitrated loop and the
+driver to make note of any new or removed sequential access devices
+or medium changers. The driver will output messages indicating what
+devices have been added or removed and the controller, bus, target and
+lun used to address the device. It then notifies the SCSI mid layer
+of these changes.
+
+Note that the naming convention of the /proc filesystem entries
+contains a number in addition to the driver name. (E.g. "cciss0"
+instead of just "cciss" which you might expect.)
+
+Note: ONLY sequential access devices and medium changers are presented
+as SCSI devices to the SCSI mid layer by the cciss driver. Specifically,
+physical SCSI disk drives are NOT presented to the SCSI mid layer. The
+physical SCSI disk drives are controlled directly by the array controller
+hardware and it is important to prevent the kernel from attempting to directly
+access these devices too, as if the array controller were merely a SCSI
+controller in the same way that we are allowing it to access SCSI tape drives.
+
+SCSI error handling for tape drives and medium changers
+-------------------------------------------------------
+
+The linux SCSI mid layer provides an error handling protocol which
+kicks into gear whenever a SCSI command fails to complete within a
+certain amount of time (which can vary depending on the command).
+The cciss driver participates in this protocol to some extent. The
+normal protocol is a four step process. First the device is told
+to abort the command. If that doesn't work, the device is reset.
+If that doesn't work, the SCSI bus is reset. If that doesn't work
+the host bus adapter is reset. Because the cciss driver is a block
+driver as well as a SCSI driver and only the tape drives and medium
+changers are presented to the SCSI mid layer, and unlike more
+straightforward SCSI drivers, disk i/o continues through the block
+side during the SCSI error recovery process, the cciss driver only
+implements the first two of these actions, aborting the command, and
+resetting the device. Additionally, most tape drives will not oblige
+in aborting commands, and sometimes it appears they will not even
+obey a reset command, though in most circumstances they will. In
+the case that the command cannot be aborted and the device cannot be
+reset, the device will be set offline.
+
+In the event the error handling code is triggered and a tape drive is
+successfully reset or the tardy command is successfully aborted, the
+tape drive may still not allow i/o to continue until some command
+is issued which positions the tape to a known position. Typically you
+must rewind the tape (by issuing "mt -f /dev/st0 rewind" for example)
+before i/o can proceed again to a tape drive which was reset.
+
+There is a cciss_tape_cmds module parameter which can be used to make cciss
+allocate more commands for use by tape drives. Ordinarily only a few commands
+(6) are allocated for tape drives because tape drives are slow and
+infrequently used and the primary purpose of Smart Array controllers is to
+act as a RAID controller for disk drives, so the vast majority of commands
+are allocated for disk devices. However, if you have more than a few tape
+drives attached to a smart array, the default number of commands may not be
+enought (for example, if you have 8 tape drives, you could only rewind 6
+at one time with the default number of commands.) The cciss_tape_cmds module
+parameter allows more commands (up to 16 more) to be allocated for use by
+tape drives. For example:
+
+ insmod cciss.ko cciss_tape_cmds=16
+
+Or, as a kernel boot parameter passed in via grub: cciss.cciss_tape_cmds=8
diff --git a/Documentation/blockdev/cpqarray.txt b/Documentation/blockdev/cpqarray.txt
new file mode 100644
index 000000000..c7154e20e
--- /dev/null
+++ b/Documentation/blockdev/cpqarray.txt
@@ -0,0 +1,93 @@
+This driver is for Compaq's SMART2 Intelligent Disk Array Controllers.
+
+Supported Cards:
+----------------
+
+This driver is known to work with the following cards:
+
+ * SMART (EISA)
+ * SMART-2/E (EISA)
+ * SMART-2/P
+ * SMART-2DH
+ * SMART-2SL
+ * SMART-221
+ * SMART-3100ES
+ * SMART-3200
+ * Integrated Smart Array Controller
+ * SA 4200
+ * SA 4250ES
+ * SA 431
+ * RAID LC2 Controller
+
+It should also work with some really old Disk array adapters, but I am
+unable to test against these cards:
+
+ * IDA
+ * IDA-2
+ * IAES
+
+
+EISA Controllers:
+-----------------
+
+If you want to use an EISA controller you'll have to supply some
+modprobe/lilo parameters. If the driver is compiled into the kernel, must
+give it the controller's IO port address at boot time (it is not
+necessary to specify the IRQ). For example, if you had two SMART-2/E
+controllers, in EISA slots 1 and 2 you'd give it a boot argument like
+this:
+
+ smart2=0x1000,0x2000
+
+If you were loading the driver as a module, you'd give load it like this:
+
+ modprobe cpqarray eisa=0x1000,0x2000
+
+You can use EISA and PCI adapters at the same time.
+
+
+Device Naming:
+--------------
+
+You need some entries in /dev for the ida device. MAKEDEV in the /dev
+directory can make device nodes for you automatically. The device setup is
+as follows:
+
+Major numbers:
+ 72 ida0
+ 73 ida1
+ 74 ida2
+ 75 ida3
+ 76 ida4
+ 77 ida5
+ 78 ida6
+ 79 ida7
+
+Minor numbers:
+ b7 b6 b5 b4 b3 b2 b1 b0
+ |----+----| |----+----|
+ | |
+ | +-------- Partition ID (0=wholedev, 1-15 partition)
+ |
+ +-------------------- Logical Volume number
+
+The device naming scheme is:
+/dev/ida/c0d0 Controller 0, disk 0, whole device
+/dev/ida/c0d0p1 Controller 0, disk 0, partition 1
+/dev/ida/c0d0p2 Controller 0, disk 0, partition 2
+/dev/ida/c0d0p3 Controller 0, disk 0, partition 3
+
+/dev/ida/c1d1 Controller 1, disk 1, whole device
+/dev/ida/c1d1p1 Controller 1, disk 1, partition 1
+/dev/ida/c1d1p2 Controller 1, disk 1, partition 2
+/dev/ida/c1d1p3 Controller 1, disk 1, partition 3
+
+
+Changelog:
+==========
+
+10-28-2004 : General cleanup, syntax fixes for in-kernel driver version.
+ James Nelson <james4765@gmail.com>
+
+
+1999 : Original Document
diff --git a/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
new file mode 100644
index 000000000..f87cfa0dc
--- /dev/null
+++ b/Documentation/blockdev/drbd/DRBD-8.3-data-packets.svg
@@ -0,0 +1,588 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ version="1.0"
+ width="210mm"
+ height="297mm"
+ viewBox="0 0 21000 29700"
+ id="svg2"
+ style="fill-rule:evenodd">
+ <defs
+ id="defs4" />
+ <g
+ id="Default"
+ style="visibility:visible">
+ <desc
+ id="desc180">Master slide</desc>
+ </g>
+ <path
+ d="M 11999,8601 L 11899,8301 L 12099,8301 L 11999,8601 z"
+ id="path193"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 11999,7801 L 11999,8361"
+ id="path197"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 7999,10401 L 7899,10101 L 8099,10101 L 7999,10401 z"
+ id="path209"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 7999,9601 L 7999,10161"
+ id="path213"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 11999,7801 L 11685,7840 L 11724,7644 L 11999,7801 z"
+ id="path225"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 7999,7001 L 11764,7754"
+ id="path229"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <g
+ transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-1244.4792,1416.5139)"
+ id="g245"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text247">
+ <tspan
+ x="9139 9368 9579 9808 9986 10075 10252 10481 10659 10837 10909"
+ y="9284"
+ id="tspan249">RSDataReply</tspan>
+ </text>
+ </g>
+ <path
+ d="M 7999,9601 L 8281,9458 L 8311,9655 L 7999,9601 z"
+ id="path259"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 11999,9001 L 8236,9565"
+ id="path263"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <g
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,1620.9382,-1639.4947)"
+ id="g279"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text281">
+ <tspan
+ x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
+ y="7023"
+ id="tspan283">CsumRSRequest</tspan>
+ </text>
+ </g>
+ <text
+ id="text297"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+ y="5707"
+ id="tspan299">w_make_resync_request()</tspan>
+ </text>
+ <text
+ id="text313"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+ y="7806"
+ id="tspan315">receive_DataRequest()</tspan>
+ </text>
+ <text
+ id="text329"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+ y="8606"
+ id="tspan331">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text345"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
+ y="9007"
+ id="tspan347">w_e_end_csum_rs_req()</tspan>
+ </text>
+ <text
+ id="text361"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
+ y="9507"
+ id="tspan363">receive_RSDataReply()</tspan>
+ </text>
+ <text
+ id="text377"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
+ y="10407"
+ id="tspan379">drbd_endio_write_sec()</tspan>
+ </text>
+ <text
+ id="text393"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
+ y="10907"
+ id="tspan395">e_end_resync_block()</tspan>
+ </text>
+ <path
+ d="M 11999,11601 L 11685,11640 L 11724,11444 L 11999,11601 z"
+ id="path405"
+ style="fill:#000080;visibility:visible" />
+ <path
+ d="M 7999,10801 L 11764,11554"
+ id="path409"
+ style="fill:none;stroke:#000080;visibility:visible" />
+ <g
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,2434.7562,-1674.649)"
+ id="g425"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text427">
+ <tspan
+ x="9320 9621 9726 9798 9887 10065 10277 10438"
+ y="10943"
+ id="tspan429">WriteAck</tspan>
+ </text>
+ </g>
+ <text
+ id="text443"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
+ y="11559"
+ id="tspan445">got_BlockAck()</tspan>
+ </text>
+ <text
+ id="text459"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14302 14540 14658 14777 14870 15107 15225 15437 15649 15886"
+ y="4877"
+ id="tspan461">Checksum based Resync, case not in sync</tspan>
+ </text>
+ <text
+ id="text475"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="6961 7266 7571 7854 8159 8299 8536 8654 8891 9010 9247 9484 9603 9840 9958 10077 10170 10407"
+ y="2806"
+ id="tspan477">DRBD-8.3 data flow</tspan>
+ </text>
+ <text
+ id="text491"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
+ y="7005"
+ id="tspan493">w_e_send_csum()</tspan>
+ </text>
+ <path
+ d="M 11999,17601 L 11899,17301 L 12099,17301 L 11999,17601 z"
+ id="path503"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 11999,16801 L 11999,17361"
+ id="path507"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 11999,16801 L 11685,16840 L 11724,16644 L 11999,16801 z"
+ id="path519"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 7999,16001 L 11764,16754"
+ id="path523"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <g
+ transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-2539.5806,1529.3491)"
+ id="g539"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text541">
+ <tspan
+ x="9269 9498 9709 9798 9959 10048 10226 10437 10598 10776"
+ y="18265"
+ id="tspan543">RSIsInSync</tspan>
+ </text>
+ </g>
+ <path
+ d="M 7999,18601 L 8281,18458 L 8311,18655 L 7999,18601 z"
+ id="path553"
+ style="fill:#000080;visibility:visible" />
+ <path
+ d="M 11999,18001 L 8236,18565"
+ id="path557"
+ style="fill:none;stroke:#000080;visibility:visible" />
+ <g
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,3461.4027,-1449.3012)"
+ id="g573"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text575">
+ <tspan
+ x="8743 8972 9132 9310 9573 9801 10013 10242 10419 10597 10775 10953 11114"
+ y="16023"
+ id="tspan577">CsumRSRequest</tspan>
+ </text>
+ </g>
+ <text
+ id="text591"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+ y="16806"
+ id="tspan593">receive_DataRequest()</tspan>
+ </text>
+ <text
+ id="text607"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+ y="17606"
+ id="tspan609">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text623"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13825 13986 14164 14426 14604 14710 14871 15049 15154 15332 15510 15616"
+ y="18007"
+ id="tspan625">w_e_end_csum_rs_req()</tspan>
+ </text>
+ <text
+ id="text639"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5735 5913 6091 6180 6357 6446 6607 6696 6874 7085 7246 7424 7585 7691"
+ y="18507"
+ id="tspan641">got_IsInSync()</tspan>
+ </text>
+ <text
+ id="text655"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="7999 8304 8541 8778 8990 9201 9413 9650 10001 10120 10357 10594 10806 11043 11280 11398 11703 11940 12152 12364 12601 12812 12931 13049 13261 13498 13710 13947 14065 14159 14396 14514 14726 14937 15175"
+ y="13877"
+ id="tspan657">Checksum based Resync, case in sync</tspan>
+ </text>
+ <path
+ d="M 12000,24601 L 11900,24301 L 12100,24301 L 12000,24601 z"
+ id="path667"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 12000,23801 L 12000,24361"
+ id="path671"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 8000,26401 L 7900,26101 L 8100,26101 L 8000,26401 z"
+ id="path683"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,25601 L 8000,26161"
+ id="path687"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 12000,23801 L 11686,23840 L 11725,23644 L 12000,23801 z"
+ id="path699"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,23001 L 11765,23754"
+ id="path703"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <g
+ transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,-3543.8452,1630.5143)"
+ id="g719"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text721">
+ <tspan
+ x="9464 9710 9921 10150 10328 10505 10577"
+ y="25236"
+ id="tspan723">OVReply</tspan>
+ </text>
+ </g>
+ <path
+ d="M 8000,25601 L 8282,25458 L 8312,25655 L 8000,25601 z"
+ id="path733"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 12000,25001 L 8237,25565"
+ id="path737"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <g
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,4918.2801,-1381.2128)"
+ id="g753"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text755">
+ <tspan
+ x="9142 9388 9599 9828 10006 10183 10361 10539 10700"
+ y="23106"
+ id="tspan757">OVRequest</tspan>
+ </text>
+ </g>
+ <text
+ id="text771"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13656 13868 14097 14274 14452 14630 14808 14969 15058 15163"
+ y="23806"
+ id="tspan773">receive_OVRequest()</tspan>
+ </text>
+ <text
+ id="text787"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
+ y="24606"
+ id="tspan789">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text803"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14004 14182 14288 14465 14643 14749"
+ y="25007"
+ id="tspan805">w_e_end_ov_req()</tspan>
+ </text>
+ <text
+ id="text819"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5101 5207 5385 5546 5723 5795 5956 6134 6312 6557 6769 6998 7175 7353 7425 7586 7692"
+ y="25507"
+ id="tspan821">receive_OVReply()</tspan>
+ </text>
+ <text
+ id="text835"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+ y="26407"
+ id="tspan837">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text851"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4902 5131 5308 5486 5664 5842 6020 6197 6375 6553 6714 6892 6998 7175 7353 7425 7586 7692"
+ y="26907"
+ id="tspan853">w_e_end_ov_reply()</tspan>
+ </text>
+ <path
+ d="M 12000,27601 L 11686,27640 L 11725,27444 L 12000,27601 z"
+ id="path863"
+ style="fill:#000080;visibility:visible" />
+ <path
+ d="M 8000,26801 L 11765,27554"
+ id="path867"
+ style="fill:none;stroke:#000080;visibility:visible" />
+ <g
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,5704.1907,-1328.312)"
+ id="g883"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <text
+ id="text885">
+ <tspan
+ x="9279 9525 9736 9965 10143 10303 10481 10553"
+ y="26935"
+ id="tspan887">OVResult</tspan>
+ </text>
+ </g>
+ <text
+ id="text901"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12378 12556 12645 12822 13068 13280 13508 13686 13847 14025 14097 14185 14291"
+ y="27559"
+ id="tspan903">got_OVResult()</tspan>
+ </text>
+ <text
+ id="text917"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="8000 8330 8567 8660 8754 8991 9228 9346 9558 9795 9935 10028 10146"
+ y="21877"
+ id="tspan919">Online verify</tspan>
+ </text>
+ <text
+ id="text933"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4641 4870 5047 5310 5488 5649 5826 6004 6182 6343 6521 6626 6804 6982 7160 7338 7499 7587 7693"
+ y="23005"
+ id="tspan935">w_make_ov_request()</tspan>
+ </text>
+ <path
+ d="M 8000,6500 L 7900,6200 L 8100,6200 L 8000,6500 z"
+ id="path945"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,5700 L 8000,6260"
+ id="path949"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 3900,5500 L 3700,5500 L 3700,11000 L 3900,11000"
+ id="path961"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <path
+ d="M 3900,14500 L 3700,14500 L 3700,18600 L 3900,18600"
+ id="path973"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <path
+ d="M 3900,22800 L 3700,22800 L 3700,26900 L 3900,26900"
+ id="path985"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <text
+ id="text1001"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+ y="6506"
+ id="tspan1003">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text1017"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+ y="14708"
+ id="tspan1019">w_make_resync_request()</tspan>
+ </text>
+ <text
+ id="text1033"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5190 5419 5596 5774 5952 6113 6291 6468 6646 6824 6985 7146 7324 7586 7692"
+ y="16006"
+ id="tspan1035">w_e_send_csum()</tspan>
+ </text>
+ <path
+ d="M 8000,15501 L 7900,15201 L 8100,15201 L 8000,15501 z"
+ id="path1045"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,14701 L 8000,15261"
+ id="path1049"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ id="text1065"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4492 4670 4776 4953 5131 5309 5487 5665 5842 5914 6092 6270 6376 6554 6731 6909 7087 7248 7426 7587 7692"
+ y="15507"
+ id="tspan1067">drbd_endio_read_sec()</tspan>
+ </text>
+ <path
+ d="M 16100,9000 L 16300,9000 L 16300,7500 L 16100,7500"
+ id="path1077"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <path
+ d="M 16100,18000 L 16300,18000 L 16300,16500 L 16100,16500"
+ id="path1089"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <path
+ d="M 16100,25000 L 16300,25000 L 16300,23500 L 16100,23500"
+ id="path1101"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <text
+ id="text1117"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
+ y="5402"
+ id="tspan1119">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1133"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="2027 2133 2294 2472 2649 2827 3005 3077 3255 3432 3504 3682 3788"
+ y="14402"
+ id="tspan1135">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1149"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="2026 2132 2293 2471 2648 2826 3004 3076 3254 3431 3503 3681 3787"
+ y="22602"
+ id="tspan1151">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1165"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="1426 1532 1693 1871 2031 2209 2472 2649 2721 2899 2988 3166 3344 3416 3593 3699"
+ y="11302"
+ id="tspan1167">rs_complete_io()</tspan>
+ </text>
+ <text
+ id="text1181"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
+ y="18931"
+ id="tspan1183">rs_complete_io()</tspan>
+ </text>
+ <text
+ id="text1197"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="1526 1632 1793 1971 2131 2309 2572 2749 2821 2999 3088 3266 3444 3516 3693 3799"
+ y="27231"
+ id="tspan1199">rs_complete_io()</tspan>
+ </text>
+ <text
+ id="text1213"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16126 16232 16393 16571 16748 16926 17104 17176 17354 17531 17603 17781 17887"
+ y="7402"
+ id="tspan1215">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1229"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
+ y="16331"
+ id="tspan1231">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1245"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16127 16233 16394 16572 16749 16927 17105 17177 17355 17532 17604 17782 17888"
+ y="23302"
+ id="tspan1247">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1261"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+ y="9302"
+ id="tspan1263">rs_complete_io()</tspan>
+ </text>
+ <text
+ id="text1277"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+ y="18331"
+ id="tspan1279">rs_complete_io()</tspan>
+ </text>
+ <text
+ id="text1293"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16126 16232 16393 16571 16731 16909 17172 17349 17421 17599 17688 17866 18044 18116 18293 18399"
+ y="25302"
+ id="tspan1295">rs_complete_io()</tspan>
+ </text>
+</svg>
diff --git a/Documentation/blockdev/drbd/DRBD-data-packets.svg b/Documentation/blockdev/drbd/DRBD-data-packets.svg
new file mode 100644
index 000000000..48a1e2165
--- /dev/null
+++ b/Documentation/blockdev/drbd/DRBD-data-packets.svg
@@ -0,0 +1,459 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+<svg
+ xmlns:svg="http://www.w3.org/2000/svg"
+ xmlns="http://www.w3.org/2000/svg"
+ version="1.0"
+ width="210mm"
+ height="297mm"
+ viewBox="0 0 21000 29700"
+ id="svg2"
+ style="fill-rule:evenodd">
+ <defs
+ id="defs4" />
+ <g
+ id="Default"
+ style="visibility:visible">
+ <desc
+ id="desc176">Master slide</desc>
+ </g>
+ <path
+ d="M 11999,19601 L 11899,19301 L 12099,19301 L 11999,19601 z"
+ id="path189"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 11999,18801 L 11999,19361"
+ id="path193"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 7999,21401 L 7899,21101 L 8099,21101 L 7999,21401 z"
+ id="path205"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 7999,20601 L 7999,21161"
+ id="path209"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 11999,18801 L 11685,18840 L 11724,18644 L 11999,18801 z"
+ id="path221"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 7999,18001 L 11764,18754"
+ id="path225"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ x="-3023.845"
+ y="1106.8124"
+ transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+ id="text243"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="6115.1553 6344.1553 6555.1553 6784.1553 6962.1553 7051.1553 7228.1553 7457.1553 7635.1553 7813.1553 7885.1553"
+ y="21390.812"
+ id="tspan245">RSDataReply</tspan>
+ </text>
+ <path
+ d="M 7999,20601 L 8281,20458 L 8311,20655 L 7999,20601 z"
+ id="path255"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 11999,20001 L 8236,20565"
+ id="path259"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ x="3502.5356"
+ y="-2184.6621"
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+ id="text277"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12321.536 12550.536 12761.536 12990.536 13168.536 13257.536 13434.536 13663.536 13841.536 14019.536 14196.536 14374.536 14535.536"
+ y="15854.338"
+ id="tspan279">RSDataRequest</tspan>
+ </text>
+ <text
+ id="text293"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4034 4263 4440 4703 4881 5042 5219 5397 5503 5681 5842 6003 6180 6341 6519 6625 6803 6980 7158 7336 7497 7586 7692"
+ y="17807"
+ id="tspan295">w_make_resync_request()</tspan>
+ </text>
+ <text
+ id="text309"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12305 12483 12644 12821 12893 13054 13232 13410 13638 13816 13905 14083 14311 14489 14667 14845 15023 15184 15272 15378"
+ y="18806"
+ id="tspan311">receive_DataRequest()</tspan>
+ </text>
+ <text
+ id="text325"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12377 12483 12660 12838 13016 13194 13372 13549 13621 13799 13977 14083 14261 14438 14616 14794 14955 15133 15294 15399"
+ y="19606"
+ id="tspan327">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text341"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12191 12420 12597 12775 12953 13131 13309 13486 13664 13770 13931 14109 14287 14375 14553 14731 14837 15015 15192 15298"
+ y="20007"
+ id="tspan343">w_e_end_rsdata_req()</tspan>
+ </text>
+ <text
+ id="text357"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4444 4550 4728 4889 5066 5138 5299 5477 5655 5883 6095 6324 6501 6590 6768 6997 7175 7352 7424 7585 7691"
+ y="20507"
+ id="tspan359">receive_RSDataReply()</tspan>
+ </text>
+ <text
+ id="text373"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4457 4635 4741 4918 5096 5274 5452 5630 5807 5879 6057 6235 6464 6569 6641 6730 6908 7086 7247 7425 7585 7691"
+ y="21407"
+ id="tspan375">drbd_endio_write_sec()</tspan>
+ </text>
+ <text
+ id="text389"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4647 4825 5003 5180 5358 5536 5714 5820 5997 6158 6319 6497 6658 6836 7013 7085 7263 7424 7585 7691"
+ y="21907"
+ id="tspan391">e_end_resync_block()</tspan>
+ </text>
+ <path
+ d="M 11999,22601 L 11685,22640 L 11724,22444 L 11999,22601 z"
+ id="path401"
+ style="fill:#000080;visibility:visible" />
+ <path
+ d="M 7999,21801 L 11764,22554"
+ id="path405"
+ style="fill:none;stroke:#000080;visibility:visible" />
+ <text
+ x="4290.3008"
+ y="-2369.6162"
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+ id="text423"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="13610.301 13911.301 14016.301 14088.301 14177.301 14355.301 14567.301 14728.301"
+ y="19573.385"
+ id="tspan425">WriteAck</tspan>
+ </text>
+ <text
+ id="text439"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12199 12377 12555 12644 12821 13033 13105 13283 13444 13604 13816 13977 14138 14244"
+ y="22559"
+ id="tspan441">got_BlockAck()</tspan>
+ </text>
+ <text
+ id="text455"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="7999 8304 8541 8753 8964 9201 9413 9531 9769 9862 10099 10310 10522 10734 10852 10971 11208 11348 11585 11822"
+ y="16877"
+ id="tspan457">Resync blocks, 4-32K</tspan>
+ </text>
+ <path
+ d="M 12000,7601 L 11900,7301 L 12100,7301 L 12000,7601 z"
+ id="path467"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 12000,6801 L 12000,7361"
+ id="path471"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 12000,6801 L 11686,6840 L 11725,6644 L 12000,6801 z"
+ id="path483"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,6001 L 11765,6754"
+ id="path487"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ x="-1288.1796"
+ y="1279.7666"
+ transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+ id="text505"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="8174.8208 8475.8203 8580.8203 8652.8203 8741.8203 8919.8203 9131.8203 9292.8203"
+ y="9516.7666"
+ id="tspan507">WriteAck</tspan>
+ </text>
+ <path
+ d="M 8000,8601 L 8282,8458 L 8312,8655 L 8000,8601 z"
+ id="path517"
+ style="fill:#000080;visibility:visible" />
+ <path
+ d="M 12000,8001 L 8237,8565"
+ id="path521"
+ style="fill:none;stroke:#000080;visibility:visible" />
+ <text
+ x="1065.6655"
+ y="-2097.7664"
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+ id="text539"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="10682.666 10911.666 11088.666 11177.666"
+ y="4107.2339"
+ id="tspan541">Data</tspan>
+ </text>
+ <text
+ id="text555"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
+ y="5505"
+ id="tspan557">drbd_make_request()</tspan>
+ </text>
+ <text
+ id="text571"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14190"
+ y="6806"
+ id="tspan573">receive_Data()</tspan>
+ </text>
+ <text
+ id="text587"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14207 14312 14384 14473 14651 14829 14990 15168 15328 15434"
+ y="7606"
+ id="tspan589">drbd_endio_write_sec()</tspan>
+ </text>
+ <text
+ id="text603"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12192 12370 12548 12725 12903 13081 13259 13437 13509 13686 13847 14008 14114"
+ y="8007"
+ id="tspan605">e_end_block()</tspan>
+ </text>
+ <text
+ id="text619"
+ style="font-size:318px;font-weight:400;fill:#000080;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5647 5825 6003 6092 6269 6481 6553 6731 6892 7052 7264 7425 7586 7692"
+ y="8606"
+ id="tspan621">got_BlockAck()</tspan>
+ </text>
+ <text
+ id="text635"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="8000 8305 8542 8779 9016 9109 9346 9486 9604 9956 10049 10189 10328 10565 10705 10942 11179 11298 11603 11742 11835 11954 12191 12310 12428 12665 12902 13139 13279 13516 13753"
+ y="4877"
+ id="tspan637">Regular mirrored write, 512-32K</tspan>
+ </text>
+ <text
+ id="text651"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5381 5610 5787 5948 6126 6304 6482 6659 6837 7015 7087 7265 7426 7587 7692"
+ y="6003"
+ id="tspan653">w_send_dblock()</tspan>
+ </text>
+ <path
+ d="M 8000,6800 L 7900,6500 L 8100,6500 L 8000,6800 z"
+ id="path663"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,6000 L 8000,6560"
+ id="path667"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ id="text683"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4602 4780 4886 5063 5241 5419 5597 5775 5952 6024 6202 6380 6609 6714 6786 6875 7053 7231 7409 7515 7587 7692"
+ y="6905"
+ id="tspan685">drbd_endio_write_pri()</tspan>
+ </text>
+ <path
+ d="M 12000,13602 L 11900,13302 L 12100,13302 L 12000,13602 z"
+ id="path695"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 12000,12802 L 12000,13362"
+ id="path699"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <path
+ d="M 12000,12802 L 11686,12841 L 11725,12645 L 12000,12802 z"
+ id="path711"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 8000,12002 L 11765,12755"
+ id="path715"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ x="-2155.5266"
+ y="1201.5964"
+ transform="matrix(0.9895258,-0.1443562,0.1443562,0.9895258,0,0)"
+ id="text733"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="7202.4736 7431.4736 7608.4736 7697.4736 7875.4736 8104.4736 8282.4736 8459.4736 8531.4736"
+ y="15454.597"
+ id="tspan735">DataReply</tspan>
+ </text>
+ <path
+ d="M 8000,14602 L 8282,14459 L 8312,14656 L 8000,14602 z"
+ id="path745"
+ style="fill:#008000;visibility:visible" />
+ <path
+ d="M 12000,14002 L 8237,14566"
+ id="path749"
+ style="fill:none;stroke:#008000;visibility:visible" />
+ <text
+ x="2280.3804"
+ y="-2103.2141"
+ transform="matrix(0.9788674,0.2044961,-0.2044961,0.9788674,0,0)"
+ id="text767"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="11316.381 11545.381 11722.381 11811.381 11989.381 12218.381 12396.381 12573.381 12751.381 12929.381 13090.381"
+ y="9981.7861"
+ id="tspan769">DataRequest</tspan>
+ </text>
+ <text
+ id="text783"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="4746 4924 5030 5207 5385 5563 5826 6003 6164 6342 6520 6626 6803 6981 7159 7337 7498 7587 7692"
+ y="11506"
+ id="tspan785">drbd_make_request()</tspan>
+ </text>
+ <text
+ id="text799"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12306 12484 12645 12822 12894 13055 13233 13411 13639 13817 13906 14084 14312 14490 14668 14846 15024 15185 15273 15379"
+ y="12807"
+ id="tspan801">receive_DataRequest()</tspan>
+ </text>
+ <text
+ id="text815"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12200 12378 12484 12661 12839 13017 13195 13373 13550 13622 13800 13978 14084 14262 14439 14617 14795 14956 15134 15295 15400"
+ y="13607"
+ id="tspan817">drbd_endio_read_sec()</tspan>
+ </text>
+ <text
+ id="text831"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="12192 12421 12598 12776 12954 13132 13310 13487 13665 13843 14021 14110 14288 14465 14571 14749 14927 15033"
+ y="14008"
+ id="tspan833">w_e_end_data_req()</tspan>
+ </text>
+ <g
+ id="g835"
+ style="visibility:visible">
+ <desc
+ id="desc837">Drawing</desc>
+ <text
+ id="text847"
+ style="font-size:318px;font-weight:400;fill:#008000;font-family:Helvetica embedded">
+ <tspan
+ x="4885 4991 5169 5330 5507 5579 5740 5918 6096 6324 6502 6591 6769 6997 7175 7353 7425 7586 7692"
+ y="14607"
+ id="tspan849">receive_DataReply()</tspan>
+ </text>
+ </g>
+ <text
+ id="text863"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="8000 8305 8398 8610 8821 8914 9151 9363 9575 9693 9833 10070 10307 10544 10663 10781 11018 11255 11493 11632 11869 12106"
+ y="10878"
+ id="tspan865">Diskless read, 512-32K</tspan>
+ </text>
+ <text
+ id="text879"
+ style="font-size:318px;font-weight:400;fill:#008000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="5029 5258 5435 5596 5774 5952 6130 6307 6413 6591 6769 6947 7125 7230 7408 7586 7692"
+ y="12004"
+ id="tspan881">w_send_read_req()</tspan>
+ </text>
+ <text
+ id="text895"
+ style="font-size:423px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="6961 7266 7571 7854 8159 8278 8515 8633 8870 9107 9226 9463 9581 9700 9793 10030"
+ y="2806"
+ id="tspan897">DRBD 8 data flow</tspan>
+ </text>
+ <path
+ d="M 3900,5300 L 3700,5300 L 3700,7000 L 3900,7000"
+ id="path907"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <path
+ d="M 3900,17600 L 3700,17600 L 3700,22000 L 3900,22000"
+ id="path919"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <path
+ d="M 16100,20000 L 16300,20000 L 16300,18500 L 16100,18500"
+ id="path931"
+ style="fill:none;stroke:#000000;visibility:visible" />
+ <text
+ id="text947"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="2126 2304 2376 2554 2731 2909 3087 3159 3337 3515 3587 3764 3870"
+ y="5202"
+ id="tspan949">al_begin_io()</tspan>
+ </text>
+ <text
+ id="text963"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="1632 1810 1882 2060 2220 2398 2661 2839 2910 3088 3177 3355 3533 3605 3783 3888"
+ y="7331"
+ id="tspan965">al_complete_io()</tspan>
+ </text>
+ <text
+ id="text979"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="2126 2232 2393 2571 2748 2926 3104 3176 3354 3531 3603 3781 3887"
+ y="17431"
+ id="tspan981">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text995"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="1626 1732 1893 2071 2231 2409 2672 2849 2921 3099 3188 3366 3544 3616 3793 3899"
+ y="22331"
+ id="tspan997">rs_complete_io()</tspan>
+ </text>
+ <text
+ id="text1011"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16027 16133 16294 16472 16649 16827 17005 17077 17255 17432 17504 17682 17788"
+ y="18402"
+ id="tspan1013">rs_begin_io()</tspan>
+ </text>
+ <text
+ id="text1027"
+ style="font-size:318px;font-weight:400;fill:#000000;visibility:visible;font-family:Helvetica embedded">
+ <tspan
+ x="16115 16221 16382 16560 16720 16898 17161 17338 17410 17588 17677 17855 18033 18105 18282 18388"
+ y="20331"
+ id="tspan1029">rs_complete_io()</tspan>
+ </text>
+</svg>
diff --git a/Documentation/blockdev/drbd/README.txt b/Documentation/blockdev/drbd/README.txt
new file mode 100644
index 000000000..627b0a1bf
--- /dev/null
+++ b/Documentation/blockdev/drbd/README.txt
@@ -0,0 +1,16 @@
+Description
+
+ DRBD is a shared-nothing, synchronously replicated block device. It
+ is designed to serve as a building block for high availability
+ clusters and in this context, is a "drop-in" replacement for shared
+ storage. Simplistically, you could see it as a network RAID 1.
+
+ Please visit http://www.drbd.org to find out more.
+
+The here included files are intended to help understand the implementation
+
+DRBD-8.3-data-packets.svg, DRBD-data-packets.svg
+ relates some functions, and write packets.
+
+conn-states-8.dot, disk-states-8.dot, node-states-8.dot
+ The sub graphs of DRBD's state transitions
diff --git a/Documentation/blockdev/drbd/conn-states-8.dot b/Documentation/blockdev/drbd/conn-states-8.dot
new file mode 100644
index 000000000..025e8cf5e
--- /dev/null
+++ b/Documentation/blockdev/drbd/conn-states-8.dot
@@ -0,0 +1,18 @@
+digraph conn_states {
+ StandAllone -> WFConnection [ label = "ioctl_set_net()" ]
+ WFConnection -> Unconnected [ label = "unable to bind()" ]
+ WFConnection -> WFReportParams [ label = "in connect() after accept" ]
+ WFReportParams -> StandAllone [ label = "checks in receive_param()" ]
+ WFReportParams -> Connected [ label = "in receive_param()" ]
+ WFReportParams -> WFBitMapS [ label = "sync_handshake()" ]
+ WFReportParams -> WFBitMapT [ label = "sync_handshake()" ]
+ WFBitMapS -> SyncSource [ label = "receive_bitmap()" ]
+ WFBitMapT -> SyncTarget [ label = "receive_bitmap()" ]
+ SyncSource -> Connected
+ SyncTarget -> Connected
+ SyncSource -> PausedSyncS
+ SyncTarget -> PausedSyncT
+ PausedSyncS -> SyncSource
+ PausedSyncT -> SyncTarget
+ Connected -> WFConnection [ label = "* on network error" ]
+}
diff --git a/Documentation/blockdev/drbd/data-structure-v9.txt b/Documentation/blockdev/drbd/data-structure-v9.txt
new file mode 100644
index 000000000..1e52a0e32
--- /dev/null
+++ b/Documentation/blockdev/drbd/data-structure-v9.txt
@@ -0,0 +1,38 @@
+This describes the in kernel data structure for DRBD-9. Starting with
+Linux v3.14 we are reorganizing DRBD to use this data structure.
+
+Basic Data Structure
+====================
+
+A node has a number of DRBD resources. Each such resource has a number of
+devices (aka volumes) and connections to other nodes ("peer nodes"). Each DRBD
+device is represented by a block device locally.
+
+The DRBD objects are interconnected to form a matrix as depicted below; a
+drbd_peer_device object sits at each intersection between a drbd_device and a
+drbd_connection:
+
+ /--------------+---------------+.....+---------------\
+ | resource | device | | device |
+ +--------------+---------------+.....+---------------+
+ | connection | peer_device | | peer_device |
+ +--------------+---------------+.....+---------------+
+ : : : : :
+ : : : : :
+ +--------------+---------------+.....+---------------+
+ | connection | peer_device | | peer_device |
+ \--------------+---------------+.....+---------------/
+
+In this table, horizontally, devices can be accessed from resources by their
+volume number. Likewise, peer_devices can be accessed from connections by
+their volume number. Objects in the vertical direction are connected by double
+linked lists. There are back pointers from peer_devices to their connections a
+devices, and from connections and devices to their resource.
+
+All resources are in the drbd_resources double-linked list. In addition, all
+devices can be accessed by their minor device number via the drbd_devices idr.
+
+The drbd_resource, drbd_connection, and drbd_device objects are reference
+counted. The peer_device objects only serve to establish the links between
+devices and connections; their lifetime is determined by the lifetime of the
+device and connection which they reference.
diff --git a/Documentation/blockdev/drbd/disk-states-8.dot b/Documentation/blockdev/drbd/disk-states-8.dot
new file mode 100644
index 000000000..d06cfb46f
--- /dev/null
+++ b/Documentation/blockdev/drbd/disk-states-8.dot
@@ -0,0 +1,16 @@
+digraph disk_states {
+ Diskless -> Inconsistent [ label = "ioctl_set_disk()" ]
+ Diskless -> Consistent [ label = "ioctl_set_disk()" ]
+ Diskless -> Outdated [ label = "ioctl_set_disk()" ]
+ Consistent -> Outdated [ label = "receive_param()" ]
+ Consistent -> UpToDate [ label = "receive_param()" ]
+ Consistent -> Inconsistent [ label = "start resync" ]
+ Outdated -> Inconsistent [ label = "start resync" ]
+ UpToDate -> Inconsistent [ label = "ioctl_replicate" ]
+ Inconsistent -> UpToDate [ label = "resync completed" ]
+ Consistent -> Failed [ label = "io completion error" ]
+ Outdated -> Failed [ label = "io completion error" ]
+ UpToDate -> Failed [ label = "io completion error" ]
+ Inconsistent -> Failed [ label = "io completion error" ]
+ Failed -> Diskless [ label = "sending notify to peer" ]
+}
diff --git a/Documentation/blockdev/drbd/drbd-connection-state-overview.dot b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
new file mode 100644
index 000000000..6d9cf0a7b
--- /dev/null
+++ b/Documentation/blockdev/drbd/drbd-connection-state-overview.dot
@@ -0,0 +1,85 @@
+// vim: set sw=2 sts=2 :
+digraph {
+ rankdir=BT
+ bgcolor=white
+
+ node [shape=plaintext]
+ node [fontcolor=black]
+
+ StandAlone [ style=filled,fillcolor=gray,label=StandAlone ]
+
+ node [fontcolor=lightgray]
+
+ Unconnected [ label=Unconnected ]
+
+ CommTrouble [ shape=record,
+ label="{communication loss|{Timeout|BrokenPipe|NetworkFailure}}" ]
+
+ node [fontcolor=gray]
+
+ subgraph cluster_try_connect {
+ label="try to connect, handshake"
+ rank=max
+ WFConnection [ label=WFConnection ]
+ WFReportParams [ label=WFReportParams ]
+ }
+
+ TearDown [ label=TearDown ]
+
+ Connected [ label=Connected,style=filled,fillcolor=green,fontcolor=black ]
+
+ node [fontcolor=lightblue]
+
+ StartingSyncS [ label=StartingSyncS ]
+ StartingSyncT [ label=StartingSyncT ]
+
+ subgraph cluster_bitmap_exchange {
+ node [fontcolor=red]
+ fontcolor=red
+ label="new application (WRITE?) requests blocked\lwhile bitmap is exchanged"
+
+ WFBitMapT [ label=WFBitMapT ]
+ WFSyncUUID [ label=WFSyncUUID ]
+ WFBitMapS [ label=WFBitMapS ]
+ }
+
+ node [fontcolor=blue]
+
+ cluster_resync [ shape=record,label="{<any>resynchronisation process running\l'concurrent' application requests allowed|{{<T>PausedSyncT\nSyncTarget}|{<S>PausedSyncS\nSyncSource}}}" ]
+
+ node [shape=box,fontcolor=black]
+
+ // drbdadm [label="drbdadm connect"]
+ // handshake [label="drbd_connect()\ndrbd_do_handshake\ndrbd_sync_handshake() etc."]
+ // comm_error [label="communication trouble"]
+
+ //
+ // edges
+ // --------------------------------------
+
+ StandAlone -> Unconnected [ label="drbdadm connect" ]
+ Unconnected -> StandAlone [ label="drbdadm disconnect\lor serious communication trouble" ]
+ Unconnected -> WFConnection [ label="receiver thread is started" ]
+ WFConnection -> WFReportParams [ headlabel="accept()\land/or \lconnect()\l" ]
+
+ WFReportParams -> StandAlone [ label="during handshake\lpeers do not agree\labout something essential" ]
+ WFReportParams -> Connected [ label="data identical\lno sync needed",color=green,fontcolor=green ]
+
+ WFReportParams -> WFBitMapS
+ WFReportParams -> WFBitMapT
+ WFBitMapT -> WFSyncUUID [minlen=0.1,constraint=false]
+
+ WFBitMapS -> cluster_resync:S
+ WFSyncUUID -> cluster_resync:T
+
+ edge [color=green]
+ cluster_resync:any -> Connected [ label="resnyc done",fontcolor=green ]
+
+ edge [color=red]
+ WFReportParams -> CommTrouble
+ Connected -> CommTrouble
+ cluster_resync:any -> CommTrouble
+ edge [color=black]
+ CommTrouble -> Unconnected [label="receiver thread is stopped" ]
+
+}
diff --git a/Documentation/blockdev/drbd/node-states-8.dot b/Documentation/blockdev/drbd/node-states-8.dot
new file mode 100644
index 000000000..4a2b00c23
--- /dev/null
+++ b/Documentation/blockdev/drbd/node-states-8.dot
@@ -0,0 +1,14 @@
+digraph node_states {
+ Secondary -> Primary [ label = "ioctl_set_state()" ]
+ Primary -> Secondary [ label = "ioctl_set_state()" ]
+}
+
+digraph peer_states {
+ Secondary -> Primary [ label = "recv state packet" ]
+ Primary -> Secondary [ label = "recv state packet" ]
+ Primary -> Unknown [ label = "connection lost" ]
+ Secondary -> Unknown [ label = "connection lost" ]
+ Unknown -> Primary [ label = "connected" ]
+ Unknown -> Secondary [ label = "connected" ]
+}
+
diff --git a/Documentation/blockdev/floppy.txt b/Documentation/blockdev/floppy.txt
new file mode 100644
index 000000000..e2240f5ab
--- /dev/null
+++ b/Documentation/blockdev/floppy.txt
@@ -0,0 +1,245 @@
+This file describes the floppy driver.
+
+FAQ list:
+=========
+
+ A FAQ list may be found in the fdutils package (see below), and also
+at <http://fdutils.linux.lu/faq.html>.
+
+
+LILO configuration options (Thinkpad users, read this)
+======================================================
+
+ The floppy driver is configured using the 'floppy=' option in
+lilo. This option can be typed at the boot prompt, or entered in the
+lilo configuration file.
+
+ Example: If your kernel is called linux-2.6.9, type the following line
+at the lilo boot prompt (if you have a thinkpad):
+
+ linux-2.6.9 floppy=thinkpad
+
+You may also enter the following line in /etc/lilo.conf, in the description
+of linux-2.6.9:
+
+ append = "floppy=thinkpad"
+
+ Several floppy related options may be given, example:
+
+ linux-2.6.9 floppy=daring floppy=two_fdc
+ append = "floppy=daring floppy=two_fdc"
+
+ If you give options both in the lilo config file and on the boot
+prompt, the option strings of both places are concatenated, the boot
+prompt options coming last. That's why there are also options to
+restore the default behavior.
+
+
+Module configuration options
+============================
+
+ If you use the floppy driver as a module, use the following syntax:
+modprobe floppy floppy="<options>"
+
+Example:
+ modprobe floppy floppy="omnibook messages"
+
+ If you need certain options enabled every time you load the floppy driver,
+you can put:
+
+ options floppy floppy="omnibook messages"
+
+in a configuration file in /etc/modprobe.d/.
+
+
+ The floppy driver related options are:
+
+ floppy=asus_pci
+ Sets the bit mask to allow only units 0 and 1. (default)
+
+ floppy=daring
+ Tells the floppy driver that you have a well behaved floppy controller.
+ This allows more efficient and smoother operation, but may fail on
+ certain controllers. This may speed up certain operations.
+
+ floppy=0,daring
+ Tells the floppy driver that your floppy controller should be used
+ with caution.
+
+ floppy=one_fdc
+ Tells the floppy driver that you have only one floppy controller.
+ (default)
+
+ floppy=two_fdc
+ floppy=<address>,two_fdc
+ Tells the floppy driver that you have two floppy controllers.
+ The second floppy controller is assumed to be at <address>.
+ This option is not needed if the second controller is at address
+ 0x370, and if you use the 'cmos' option.
+
+ floppy=thinkpad
+ Tells the floppy driver that you have a Thinkpad. Thinkpads use an
+ inverted convention for the disk change line.
+
+ floppy=0,thinkpad
+ Tells the floppy driver that you don't have a Thinkpad.
+
+ floppy=omnibook
+ floppy=nodma
+ Tells the floppy driver not to use Dma for data transfers.
+ This is needed on HP Omnibooks, which don't have a workable
+ DMA channel for the floppy driver. This option is also useful
+ if you frequently get "Unable to allocate DMA memory" messages.
+ Indeed, dma memory needs to be continuous in physical memory,
+ and is thus harder to find, whereas non-dma buffers may be
+ allocated in virtual memory. However, I advise against this if
+ you have an FDC without a FIFO (8272A or 82072). 82072A and
+ later are OK. You also need at least a 486 to use nodma.
+ If you use nodma mode, I suggest you also set the FIFO
+ threshold to 10 or lower, in order to limit the number of data
+ transfer interrupts.
+
+ If you have a FIFO-able FDC, the floppy driver automatically
+ falls back on non DMA mode if no DMA-able memory can be found.
+ If you want to avoid this, explicitly ask for 'yesdma'.
+
+ floppy=yesdma
+ Tells the floppy driver that a workable DMA channel is available.
+ (default)
+
+ floppy=nofifo
+ Disables the FIFO entirely. This is needed if you get "Bus
+ master arbitration error" messages from your Ethernet card (or
+ from other devices) while accessing the floppy.
+
+ floppy=usefifo
+ Enables the FIFO. (default)
+
+ floppy=<threshold>,fifo_depth
+ Sets the FIFO threshold. This is mostly relevant in DMA
+ mode. If this is higher, the floppy driver tolerates more
+ interrupt latency, but it triggers more interrupts (i.e. it
+ imposes more load on the rest of the system). If this is
+ lower, the interrupt latency should be lower too (faster
+ processor). The benefit of a lower threshold is less
+ interrupts.
+
+ To tune the fifo threshold, switch on over/underrun messages
+ using 'floppycontrol --messages'. Then access a floppy
+ disk. If you get a huge amount of "Over/Underrun - retrying"
+ messages, then the fifo threshold is too low. Try with a
+ higher value, until you only get an occasional Over/Underrun.
+ It is a good idea to compile the floppy driver as a module
+ when doing this tuning. Indeed, it allows to try different
+ fifo values without rebooting the machine for each test. Note
+ that you need to do 'floppycontrol --messages' every time you
+ re-insert the module.
+
+ Usually, tuning the fifo threshold should not be needed, as
+ the default (0xa) is reasonable.
+
+ floppy=<drive>,<type>,cmos
+ Sets the CMOS type of <drive> to <type>. This is mandatory if
+ you have more than two floppy drives (only two can be
+ described in the physical CMOS), or if your BIOS uses
+ non-standard CMOS types. The CMOS types are:
+
+ 0 - Use the value of the physical CMOS
+ 1 - 5 1/4 DD
+ 2 - 5 1/4 HD
+ 3 - 3 1/2 DD
+ 4 - 3 1/2 HD
+ 5 - 3 1/2 ED
+ 6 - 3 1/2 ED
+ 16 - unknown or not installed
+
+ (Note: there are two valid types for ED drives. This is because 5 was
+ initially chosen to represent floppy *tapes*, and 6 for ED drives.
+ AMI ignored this, and used 5 for ED drives. That's why the floppy
+ driver handles both.)
+
+ floppy=unexpected_interrupts
+ Print a warning message when an unexpected interrupt is received.
+ (default)
+
+ floppy=no_unexpected_interrupts
+ floppy=L40SX
+ Don't print a message when an unexpected interrupt is received. This
+ is needed on IBM L40SX laptops in certain video modes. (There seems
+ to be an interaction between video and floppy. The unexpected
+ interrupts affect only performance, and can be safely ignored.)
+
+ floppy=broken_dcl
+ Don't use the disk change line, but assume that the disk was
+ changed whenever the device node is reopened. Needed on some
+ boxes where the disk change line is broken or unsupported.
+ This should be regarded as a stopgap measure, indeed it makes
+ floppy operation less efficient due to unneeded cache
+ flushings, and slightly more unreliable. Please verify your
+ cable, connection and jumper settings if you have any DCL
+ problems. However, some older drives, and also some laptops
+ are known not to have a DCL.
+
+ floppy=debug
+ Print debugging messages.
+
+ floppy=messages
+ Print informational messages for some operations (disk change
+ notifications, warnings about over and underruns, and about
+ autodetection).
+
+ floppy=silent_dcl_clear
+ Uses a less noisy way to clear the disk change line (which
+ doesn't involve seeks). Implied by 'daring' option.
+
+ floppy=<nr>,irq
+ Sets the floppy IRQ to <nr> instead of 6.
+
+ floppy=<nr>,dma
+ Sets the floppy DMA channel to <nr> instead of 2.
+
+ floppy=slow
+ Use PS/2 stepping rate:
+ " PS/2 floppies have much slower step rates than regular floppies.
+ It's been recommended that take about 1/4 of the default speed
+ in some more extreme cases."
+
+
+Supporting utilities and additional documentation:
+==================================================
+
+ Additional parameters of the floppy driver can be configured at
+runtime. Utilities which do this can be found in the fdutils package.
+This package also contains a new version of mtools which allows to
+access high capacity disks (up to 1992K on a high density 3 1/2 disk!).
+It also contains additional documentation about the floppy driver.
+
+The latest version can be found at fdutils homepage:
+ http://fdutils.linux.lu
+
+The fdutils releases can be found at:
+ http://fdutils.linux.lu/download.html
+ http://www.tux.org/pub/knaff/fdutils/
+ ftp://metalab.unc.edu/pub/Linux/utils/disk-management/
+
+Reporting problems about the floppy driver
+==========================================
+
+ If you have a question or a bug report about the floppy driver, mail
+me at Alain.Knaff@poboxes.com . If you post to Usenet, preferably use
+comp.os.linux.hardware. As the volume in these groups is rather high,
+be sure to include the word "floppy" (or "FLOPPY") in the subject
+line. If the reported problem happens when mounting floppy disks, be
+sure to mention also the type of the filesystem in the subject line.
+
+ Be sure to read the FAQ before mailing/posting any bug reports!
+
+ Alain
+
+Changelog
+=========
+
+10-30-2004 : Cleanup, updating, add reference to module configuration.
+ James Nelson <james4765@gmail.com>
+
+6-3-2000 : Original Document
diff --git a/Documentation/blockdev/mflash.txt b/Documentation/blockdev/mflash.txt
new file mode 100644
index 000000000..1f610ecf6
--- /dev/null
+++ b/Documentation/blockdev/mflash.txt
@@ -0,0 +1,84 @@
+This document describes m[g]flash support in linux.
+
+Contents
+ 1. Overview
+ 2. Reserved area configuration
+ 3. Example of mflash platform driver registration
+
+1. Overview
+
+Mflash and gflash are embedded flash drive. The only difference is mflash is
+MCP(Multi Chip Package) device. These two device operate exactly same way.
+So the rest mflash repersents mflash and gflash altogether.
+
+Internally, mflash has nand flash and other hardware logics and supports
+2 different operation (ATA, IO) modes. ATA mode doesn't need any new
+driver and currently works well under standard IDE subsystem. Actually it's
+one chip SSD. IO mode is ATA-like custom mode for the host that doesn't have
+IDE interface.
+
+Followings are brief descriptions about IO mode.
+A. IO mode based on ATA protocol and uses some custom command. (read confirm,
+write confirm)
+B. IO mode uses SRAM bus interface.
+C. IO mode supports 4kB boot area, so host can boot from mflash.
+
+2. Reserved area configuration
+If host boot from mflash, usually needs raw area for boot loader image. All of
+the mflash's block device operation will be taken this value as start offset.
+Note that boot loader's size of reserved area and kernel configuration value
+must be same.
+
+3. Example of mflash platform driver registration
+Working mflash is very straight forward. Adding platform device stuff to board
+configuration file is all. Here is some pseudo example.
+
+static struct mg_drv_data mflash_drv_data = {
+ /* If you want to polling driver set to 1 */
+ .use_polling = 0,
+ /* device attribution */
+ .dev_attr = MG_BOOT_DEV
+};
+
+static struct resource mg_mflash_rsc[] = {
+ /* Base address of mflash */
+ [0] = {
+ .start = 0x08000000,
+ .end = 0x08000000 + SZ_64K - 1,
+ .flags = IORESOURCE_MEM
+ },
+ /* mflash interrupt pin */
+ [1] = {
+ .start = IRQ_GPIO(84),
+ .end = IRQ_GPIO(84),
+ .flags = IORESOURCE_IRQ
+ },
+ /* mflash reset pin */
+ [2] = {
+ .start = 43,
+ .end = 43,
+ .name = MG_RST_PIN,
+ .flags = IORESOURCE_IO
+ },
+ /* mflash reset-out pin
+ * If you use mflash as storage device (i.e. other than MG_BOOT_DEV),
+ * should assign this */
+ [3] = {
+ .start = 51,
+ .end = 51,
+ .name = MG_RSTOUT_PIN,
+ .flags = IORESOURCE_IO
+ }
+};
+
+static struct platform_device mflash_dev = {
+ .name = MG_DEV_NAME,
+ .id = -1,
+ .dev = {
+ .platform_data = &mflash_drv_data,
+ },
+ .num_resources = ARRAY_SIZE(mg_mflash_rsc),
+ .resource = mg_mflash_rsc
+};
+
+platform_device_register(&mflash_dev);
diff --git a/Documentation/blockdev/nbd.txt b/Documentation/blockdev/nbd.txt
new file mode 100644
index 000000000..db242ea2b
--- /dev/null
+++ b/Documentation/blockdev/nbd.txt
@@ -0,0 +1,31 @@
+Network Block Device (TCP version)
+==================================
+
+1) Overview
+-----------
+
+What is it: With this compiled in the kernel (or as a module), Linux
+can use a remote server as one of its block devices. So every time
+the client computer wants to read, e.g., /dev/nb0, it sends a
+request over TCP to the server, which will reply with the data read.
+This can be used for stations with low disk space (or even diskless)
+to borrow disk space from another computer.
+Unlike NFS, it is possible to put any filesystem on it, etc.
+
+For more information, or to download the nbd-client and nbd-server
+tools, go to http://nbd.sf.net/.
+
+The nbd kernel module need only be installed on the client
+system, as the nbd-server is completely in userspace. In fact,
+the nbd-server has been successfully ported to other operating
+systems, including Windows.
+
+A) NBD parameters
+-----------------
+
+max_part
+ Number of partitions per device (default: 0).
+
+nbds_max
+ Number of block devices that should be initialized (default: 16).
+
diff --git a/Documentation/blockdev/paride.txt b/Documentation/blockdev/paride.txt
new file mode 100644
index 000000000..ee6717e37
--- /dev/null
+++ b/Documentation/blockdev/paride.txt
@@ -0,0 +1,417 @@
+
+ Linux and parallel port IDE devices
+
+PARIDE v1.03 (c) 1997-8 Grant Guenther <grant@torque.net>
+
+1. Introduction
+
+Owing to the simplicity and near universality of the parallel port interface
+to personal computers, many external devices such as portable hard-disk,
+CD-ROM, LS-120 and tape drives use the parallel port to connect to their
+host computer. While some devices (notably scanners) use ad-hoc methods
+to pass commands and data through the parallel port interface, most
+external devices are actually identical to an internal model, but with
+a parallel-port adapter chip added in. Some of the original parallel port
+adapters were little more than mechanisms for multiplexing a SCSI bus.
+(The Iomega PPA-3 adapter used in the ZIP drives is an example of this
+approach). Most current designs, however, take a different approach.
+The adapter chip reproduces a small ISA or IDE bus in the external device
+and the communication protocol provides operations for reading and writing
+device registers, as well as data block transfer functions. Sometimes,
+the device being addressed via the parallel cable is a standard SCSI
+controller like an NCR 5380. The "ditto" family of external tape
+drives use the ISA replicator to interface a floppy disk controller,
+which is then connected to a floppy-tape mechanism. The vast majority
+of external parallel port devices, however, are now based on standard
+IDE type devices, which require no intermediate controller. If one
+were to open up a parallel port CD-ROM drive, for instance, one would
+find a standard ATAPI CD-ROM drive, a power supply, and a single adapter
+that interconnected a standard PC parallel port cable and a standard
+IDE cable. It is usually possible to exchange the CD-ROM device with
+any other device using the IDE interface.
+
+The document describes the support in Linux for parallel port IDE
+devices. It does not cover parallel port SCSI devices, "ditto" tape
+drives or scanners. Many different devices are supported by the
+parallel port IDE subsystem, including:
+
+ MicroSolutions backpack CD-ROM
+ MicroSolutions backpack PD/CD
+ MicroSolutions backpack hard-drives
+ MicroSolutions backpack 8000t tape drive
+ SyQuest EZ-135, EZ-230 & SparQ drives
+ Avatar Shark
+ Imation Superdisk LS-120
+ Maxell Superdisk LS-120
+ FreeCom Power CD
+ Hewlett-Packard 5GB and 8GB tape drives
+ Hewlett-Packard 7100 and 7200 CD-RW drives
+
+as well as most of the clone and no-name products on the market.
+
+To support such a wide range of devices, PARIDE, the parallel port IDE
+subsystem, is actually structured in three parts. There is a base
+paride module which provides a registry and some common methods for
+accessing the parallel ports. The second component is a set of
+high-level drivers for each of the different types of supported devices:
+
+ pd IDE disk
+ pcd ATAPI CD-ROM
+ pf ATAPI disk
+ pt ATAPI tape
+ pg ATAPI generic
+
+(Currently, the pg driver is only used with CD-R drives).
+
+The high-level drivers function according to the relevant standards.
+The third component of PARIDE is a set of low-level protocol drivers
+for each of the parallel port IDE adapter chips. Thanks to the interest
+and encouragement of Linux users from many parts of the world,
+support is available for almost all known adapter protocols:
+
+ aten ATEN EH-100 (HK)
+ bpck Microsolutions backpack (US)
+ comm DataStor (old-type) "commuter" adapter (TW)
+ dstr DataStor EP-2000 (TW)
+ epat Shuttle EPAT (UK)
+ epia Shuttle EPIA (UK)
+ fit2 FIT TD-2000 (US)
+ fit3 FIT TD-3000 (US)
+ friq Freecom IQ cable (DE)
+ frpw Freecom Power (DE)
+ kbic KingByte KBIC-951A and KBIC-971A (TW)
+ ktti KT Technology PHd adapter (SG)
+ on20 OnSpec 90c20 (US)
+ on26 OnSpec 90c26 (US)
+
+
+2. Using the PARIDE subsystem
+
+While configuring the Linux kernel, you may choose either to build
+the PARIDE drivers into your kernel, or to build them as modules.
+
+In either case, you will need to select "Parallel port IDE device support"
+as well as at least one of the high-level drivers and at least one
+of the parallel port communication protocols. If you do not know
+what kind of parallel port adapter is used in your drive, you could
+begin by checking the file names and any text files on your DOS
+installation floppy. Alternatively, you can look at the markings on
+the adapter chip itself. That's usually sufficient to identify the
+correct device.
+
+You can actually select all the protocol modules, and allow the PARIDE
+subsystem to try them all for you.
+
+For the "brand-name" products listed above, here are the protocol
+and high-level drivers that you would use:
+
+ Manufacturer Model Driver Protocol
+
+ MicroSolutions CD-ROM pcd bpck
+ MicroSolutions PD drive pf bpck
+ MicroSolutions hard-drive pd bpck
+ MicroSolutions 8000t tape pt bpck
+ SyQuest EZ, SparQ pd epat
+ Imation Superdisk pf epat
+ Maxell Superdisk pf friq
+ Avatar Shark pd epat
+ FreeCom CD-ROM pcd frpw
+ Hewlett-Packard 5GB Tape pt epat
+ Hewlett-Packard 7200e (CD) pcd epat
+ Hewlett-Packard 7200e (CD-R) pg epat
+
+2.1 Configuring built-in drivers
+
+We recommend that you get to know how the drivers work and how to
+configure them as loadable modules, before attempting to compile a
+kernel with the drivers built-in.
+
+If you built all of your PARIDE support directly into your kernel,
+and you have just a single parallel port IDE device, your kernel should
+locate it automatically for you. If you have more than one device,
+you may need to give some command line options to your bootloader
+(eg: LILO), how to do that is beyond the scope of this document.
+
+The high-level drivers accept a number of command line parameters, all
+of which are documented in the source files in linux/drivers/block/paride.
+By default, each driver will automatically try all parallel ports it
+can find, and all protocol types that have been installed, until it finds
+a parallel port IDE adapter. Once it finds one, the probe stops. So,
+if you have more than one device, you will need to tell the drivers
+how to identify them. This requires specifying the port address, the
+protocol identification number and, for some devices, the drive's
+chain ID. While your system is booting, a number of messages are
+displayed on the console. Like all such messages, they can be
+reviewed with the 'dmesg' command. Among those messages will be
+some lines like:
+
+ paride: bpck registered as protocol 0
+ paride: epat registered as protocol 1
+
+The numbers will always be the same until you build a new kernel with
+different protocol selections. You should note these numbers as you
+will need them to identify the devices.
+
+If you happen to be using a MicroSolutions backpack device, you will
+also need to know the unit ID number for each drive. This is usually
+the last two digits of the drive's serial number (but read MicroSolutions'
+documentation about this).
+
+As an example, let's assume that you have a MicroSolutions PD/CD drive
+with unit ID number 36 connected to the parallel port at 0x378, a SyQuest
+EZ-135 connected to the chained port on the PD/CD drive and also an
+Imation Superdisk connected to port 0x278. You could give the following
+options on your boot command:
+
+ pd.drive0=0x378,1 pf.drive0=0x278,1 pf.drive1=0x378,0,36
+
+In the last option, pf.drive1 configures device /dev/pf1, the 0x378
+is the parallel port base address, the 0 is the protocol registration
+number and 36 is the chain ID.
+
+Please note: while PARIDE will work both with and without the
+PARPORT parallel port sharing system that is included by the
+"Parallel port support" option, PARPORT must be included and enabled
+if you want to use chains of devices on the same parallel port.
+
+2.2 Loading and configuring PARIDE as modules
+
+It is much faster and simpler to get to understand the PARIDE drivers
+if you use them as loadable kernel modules.
+
+Note 1: using these drivers with the "kerneld" automatic module loading
+system is not recommended for beginners, and is not documented here.
+
+Note 2: if you build PARPORT support as a loadable module, PARIDE must
+also be built as loadable modules, and PARPORT must be loaded before the
+PARIDE modules.
+
+To use PARIDE, you must begin by
+
+ insmod paride
+
+this loads a base module which provides a registry for the protocols,
+among other tasks.
+
+Then, load as many of the protocol modules as you think you might need.
+As you load each module, it will register the protocols that it supports,
+and print a log message to your kernel log file and your console. For
+example:
+
+ # insmod epat
+ paride: epat registered as protocol 0
+ # insmod kbic
+ paride: k951 registered as protocol 1
+ paride: k971 registered as protocol 2
+
+Finally, you can load high-level drivers for each kind of device that
+you have connected. By default, each driver will autoprobe for a single
+device, but you can support up to four similar devices by giving their
+individual co-ordinates when you load the driver.
+
+For example, if you had two no-name CD-ROM drives both using the
+KingByte KBIC-951A adapter, one on port 0x378 and the other on 0x3bc
+you could give the following command:
+
+ # insmod pcd drive0=0x378,1 drive1=0x3bc,1
+
+For most adapters, giving a port address and protocol number is sufficient,
+but check the source files in linux/drivers/block/paride for more
+information. (Hopefully someone will write some man pages one day !).
+
+As another example, here's what happens when PARPORT is installed, and
+a SyQuest EZ-135 is attached to port 0x378:
+
+ # insmod paride
+ paride: version 1.0 installed
+ # insmod epat
+ paride: epat registered as protocol 0
+ # insmod pd
+ pd: pd version 1.0, major 45, cluster 64, nice 0
+ pda: Sharing parport1 at 0x378
+ pda: epat 1.0, Shuttle EPAT chip c3 at 0x378, mode 5 (EPP-32), delay 1
+ pda: SyQuest EZ135A, 262144 blocks [128M], (512/16/32), removable media
+ pda: pda1
+
+Note that the last line is the output from the generic partition table
+scanner - in this case it reports that it has found a disk with one partition.
+
+2.3 Using a PARIDE device
+
+Once the drivers have been loaded, you can access PARIDE devices in the
+same way as their traditional counterparts. You will probably need to
+create the device "special files". Here is a simple script that you can
+cut to a file and execute:
+
+#!/bin/bash
+#
+# mkd -- a script to create the device special files for the PARIDE subsystem
+#
+function mkdev {
+ mknod $1 $2 $3 $4 ; chmod 0660 $1 ; chown root:disk $1
+}
+#
+function pd {
+ D=$( printf \\$( printf "x%03x" $[ $1 + 97 ] ) )
+ mkdev pd$D b 45 $[ $1 * 16 ]
+ for P in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+ do mkdev pd$D$P b 45 $[ $1 * 16 + $P ]
+ done
+}
+#
+cd /dev
+#
+for u in 0 1 2 3 ; do pd $u ; done
+for u in 0 1 2 3 ; do mkdev pcd$u b 46 $u ; done
+for u in 0 1 2 3 ; do mkdev pf$u b 47 $u ; done
+for u in 0 1 2 3 ; do mkdev pt$u c 96 $u ; done
+for u in 0 1 2 3 ; do mkdev npt$u c 96 $[ $u + 128 ] ; done
+for u in 0 1 2 3 ; do mkdev pg$u c 97 $u ; done
+#
+# end of mkd
+
+With the device files and drivers in place, you can access PARIDE devices
+like any other Linux device. For example, to mount a CD-ROM in pcd0, use:
+
+ mount /dev/pcd0 /cdrom
+
+If you have a fresh Avatar Shark cartridge, and the drive is pda, you
+might do something like:
+
+ fdisk /dev/pda -- make a new partition table with
+ partition 1 of type 83
+
+ mke2fs /dev/pda1 -- to build the file system
+
+ mkdir /shark -- make a place to mount the disk
+
+ mount /dev/pda1 /shark
+
+Devices like the Imation superdisk work in the same way, except that
+they do not have a partition table. For example to make a 120MB
+floppy that you could share with a DOS system:
+
+ mkdosfs /dev/pf0
+ mount /dev/pf0 /mnt
+
+
+2.4 The pf driver
+
+The pf driver is intended for use with parallel port ATAPI disk
+devices. The most common devices in this category are PD drives
+and LS-120 drives. Traditionally, media for these devices are not
+partitioned. Consequently, the pf driver does not support partitioned
+media. This may be changed in a future version of the driver.
+
+2.5 Using the pt driver
+
+The pt driver for parallel port ATAPI tape drives is a minimal driver.
+It does not yet support many of the standard tape ioctl operations.
+For best performance, a block size of 32KB should be used. You will
+probably want to set the parallel port delay to 0, if you can.
+
+2.6 Using the pg driver
+
+The pg driver can be used in conjunction with the cdrecord program
+to create CD-ROMs. Please get cdrecord version 1.6.1 or later
+from ftp://ftp.fokus.gmd.de/pub/unix/cdrecord/ . To record CD-R media
+your parallel port should ideally be set to EPP mode, and the "port delay"
+should be set to 0. With those settings it is possible to record at 2x
+speed without any buffer underruns. If you cannot get the driver to work
+in EPP mode, try to use "bidirectional" or "PS/2" mode and 1x speeds only.
+
+
+3. Troubleshooting
+
+3.1 Use EPP mode if you can
+
+The most common problems that people report with the PARIDE drivers
+concern the parallel port CMOS settings. At this time, none of the
+PARIDE protocol modules support ECP mode, or any ECP combination modes.
+If you are able to do so, please set your parallel port into EPP mode
+using your CMOS setup procedure.
+
+3.2 Check the port delay
+
+Some parallel ports cannot reliably transfer data at full speed. To
+offset the errors, the PARIDE protocol modules introduce a "port
+delay" between each access to the i/o ports. Each protocol sets
+a default value for this delay. In most cases, the user can override
+the default and set it to 0 - resulting in somewhat higher transfer
+rates. In some rare cases (especially with older 486 systems) the
+default delays are not long enough. if you experience corrupt data
+transfers, or unexpected failures, you may wish to increase the
+port delay. The delay can be programmed using the "driveN" parameters
+to each of the high-level drivers. Please see the notes above, or
+read the comments at the beginning of the driver source files in
+linux/drivers/block/paride.
+
+3.3 Some drives need a printer reset
+
+There appear to be a number of "noname" external drives on the market
+that do not always power up correctly. We have noticed this with some
+drives based on OnSpec and older Freecom adapters. In these rare cases,
+the adapter can often be reinitialised by issuing a "printer reset" on
+the parallel port. As the reset operation is potentially disruptive in
+multiple device environments, the PARIDE drivers will not do it
+automatically. You can however, force a printer reset by doing:
+
+ insmod lp reset=1
+ rmmod lp
+
+If you have one of these marginal cases, you should probably build
+your paride drivers as modules, and arrange to do the printer reset
+before loading the PARIDE drivers.
+
+3.4 Use the verbose option and dmesg if you need help
+
+While a lot of testing has gone into these drivers to make them work
+as smoothly as possible, problems will arise. If you do have problems,
+please check all the obvious things first: does the drive work in
+DOS with the manufacturer's drivers ? If that doesn't yield any useful
+clues, then please make sure that only one drive is hooked to your system,
+and that either (a) PARPORT is enabled or (b) no other device driver
+is using your parallel port (check in /proc/ioports). Then, load the
+appropriate drivers (you can load several protocol modules if you want)
+as in:
+
+ # insmod paride
+ # insmod epat
+ # insmod bpck
+ # insmod kbic
+ ...
+ # insmod pd verbose=1
+
+(using the correct driver for the type of device you have, of course).
+The verbose=1 parameter will cause the drivers to log a trace of their
+activity as they attempt to locate your drive.
+
+Use 'dmesg' to capture a log of all the PARIDE messages (any messages
+beginning with paride:, a protocol module's name or a driver's name) and
+include that with your bug report. You can submit a bug report in one
+of two ways. Either send it directly to the author of the PARIDE suite,
+by e-mail to grant@torque.net, or join the linux-parport mailing list
+and post your report there.
+
+3.5 For more information or help
+
+You can join the linux-parport mailing list by sending a mail message
+to
+ linux-parport-request@torque.net
+
+with the single word
+
+ subscribe
+
+in the body of the mail message (not in the subject line). Please be
+sure that your mail program is correctly set up when you do this, as
+the list manager is a robot that will subscribe you using the reply
+address in your mail headers. REMOVE any anti-spam gimmicks you may
+have in your mail headers, when sending mail to the list server.
+
+You might also find some useful information on the linux-parport
+web pages (although they are not always up to date) at
+
+ http://web.archive.org/web/*/http://www.torque.net/parport/
+
+
diff --git a/Documentation/blockdev/ramdisk.txt b/Documentation/blockdev/ramdisk.txt
new file mode 100644
index 000000000..fe2ef978d
--- /dev/null
+++ b/Documentation/blockdev/ramdisk.txt
@@ -0,0 +1,174 @@
+Using the RAM disk block device with Linux
+------------------------------------------
+
+Contents:
+
+ 1) Overview
+ 2) Kernel Command Line Parameters
+ 3) Using "rdev -r"
+ 4) An Example of Creating a Compressed RAM Disk
+
+
+1) Overview
+-----------
+
+The RAM disk driver is a way to use main system memory as a block device. It
+is required for initrd, an initial filesystem used if you need to load modules
+in order to access the root filesystem (see Documentation/initrd.txt). It can
+also be used for a temporary filesystem for crypto work, since the contents
+are erased on reboot.
+
+The RAM disk dynamically grows as more space is required. It does this by using
+RAM from the buffer cache. The driver marks the buffers it is using as dirty
+so that the VM subsystem does not try to reclaim them later.
+
+The RAM disk supports up to 16 RAM disks by default, and can be reconfigured
+to support an unlimited number of RAM disks (at your own risk). Just change
+the configuration symbol BLK_DEV_RAM_COUNT in the Block drivers config menu
+and (re)build the kernel.
+
+To use RAM disk support with your system, run './MAKEDEV ram' from the /dev
+directory. RAM disks are all major number 1, and start with minor number 0
+for /dev/ram0, etc. If used, modern kernels use /dev/ram0 for an initrd.
+
+The new RAM disk also has the ability to load compressed RAM disk images,
+allowing one to squeeze more programs onto an average installation or
+rescue floppy disk.
+
+
+2) Parameters
+---------------------------------
+
+2a) Kernel Command Line Parameters
+
+ ramdisk_size=N
+ ==============
+
+This parameter tells the RAM disk driver to set up RAM disks of N k size. The
+default is 4096 (4 MB).
+
+2b) Module parameters
+
+ rd_nr
+ =====
+ /dev/ramX devices created.
+
+ max_part
+ ========
+ Maximum partition number.
+
+ rd_size
+ =======
+ See ramdisk_size.
+
+3) Using "rdev -r"
+------------------
+
+The usage of the word (two bytes) that "rdev -r" sets in the kernel image is
+as follows. The low 11 bits (0 -> 10) specify an offset (in 1 k blocks) of up
+to 2 MB (2^11) of where to find the RAM disk (this used to be the size). Bit
+14 indicates that a RAM disk is to be loaded, and bit 15 indicates whether a
+prompt/wait sequence is to be given before trying to read the RAM disk. Since
+the RAM disk dynamically grows as data is being written into it, a size field
+is not required. Bits 11 to 13 are not currently used and may as well be zero.
+These numbers are no magical secrets, as seen below:
+
+./arch/x86/kernel/setup.c:#define RAMDISK_IMAGE_START_MASK 0x07FF
+./arch/x86/kernel/setup.c:#define RAMDISK_PROMPT_FLAG 0x8000
+./arch/x86/kernel/setup.c:#define RAMDISK_LOAD_FLAG 0x4000
+
+Consider a typical two floppy disk setup, where you will have the
+kernel on disk one, and have already put a RAM disk image onto disk #2.
+
+Hence you want to set bits 0 to 13 as 0, meaning that your RAM disk
+starts at an offset of 0 kB from the beginning of the floppy.
+The command line equivalent is: "ramdisk_start=0"
+
+You want bit 14 as one, indicating that a RAM disk is to be loaded.
+The command line equivalent is: "load_ramdisk=1"
+
+You want bit 15 as one, indicating that you want a prompt/keypress
+sequence so that you have a chance to switch floppy disks.
+The command line equivalent is: "prompt_ramdisk=1"
+
+Putting that together gives 2^15 + 2^14 + 0 = 49152 for an rdev word.
+So to create disk one of the set, you would do:
+
+ /usr/src/linux# cat arch/x86/boot/zImage > /dev/fd0
+ /usr/src/linux# rdev /dev/fd0 /dev/fd0
+ /usr/src/linux# rdev -r /dev/fd0 49152
+
+If you make a boot disk that has LILO, then for the above, you would use:
+ append = "ramdisk_start=0 load_ramdisk=1 prompt_ramdisk=1"
+Since the default start = 0 and the default prompt = 1, you could use:
+ append = "load_ramdisk=1"
+
+
+4) An Example of Creating a Compressed RAM Disk
+----------------------------------------------
+
+To create a RAM disk image, you will need a spare block device to
+construct it on. This can be the RAM disk device itself, or an
+unused disk partition (such as an unmounted swap partition). For this
+example, we will use the RAM disk device, "/dev/ram0".
+
+Note: This technique should not be done on a machine with less than 8 MB
+of RAM. If using a spare disk partition instead of /dev/ram0, then this
+restriction does not apply.
+
+a) Decide on the RAM disk size that you want. Say 2 MB for this example.
+ Create it by writing to the RAM disk device. (This step is not currently
+ required, but may be in the future.) It is wise to zero out the
+ area (esp. for disks) so that maximal compression is achieved for
+ the unused blocks of the image that you are about to create.
+
+ dd if=/dev/zero of=/dev/ram0 bs=1k count=2048
+
+b) Make a filesystem on it. Say ext2fs for this example.
+
+ mke2fs -vm0 /dev/ram0 2048
+
+c) Mount it, copy the files you want to it (eg: /etc/* /dev/* ...)
+ and unmount it again.
+
+d) Compress the contents of the RAM disk. The level of compression
+ will be approximately 50% of the space used by the files. Unused
+ space on the RAM disk will compress to almost nothing.
+
+ dd if=/dev/ram0 bs=1k count=2048 | gzip -v9 > /tmp/ram_image.gz
+
+e) Put the kernel onto the floppy
+
+ dd if=zImage of=/dev/fd0 bs=1k
+
+f) Put the RAM disk image onto the floppy, after the kernel. Use an offset
+ that is slightly larger than the kernel, so that you can put another
+ (possibly larger) kernel onto the same floppy later without overlapping
+ the RAM disk image. An offset of 400 kB for kernels about 350 kB in
+ size would be reasonable. Make sure offset+size of ram_image.gz is
+ not larger than the total space on your floppy (usually 1440 kB).
+
+ dd if=/tmp/ram_image.gz of=/dev/fd0 bs=1k seek=400
+
+g) Use "rdev" to set the boot device, RAM disk offset, prompt flag, etc.
+ For prompt_ramdisk=1, load_ramdisk=1, ramdisk_start=400, one would
+ have 2^15 + 2^14 + 400 = 49552.
+
+ rdev /dev/fd0 /dev/fd0
+ rdev -r /dev/fd0 49552
+
+That is it. You now have your boot/root compressed RAM disk floppy. Some
+users may wish to combine steps (d) and (f) by using a pipe.
+
+--------------------------------------------------------------------------
+ Paul Gortmaker 12/95
+
+Changelog:
+----------
+
+10-22-04 : Updated to reflect changes in command line options, remove
+ obsolete references, general cleanup.
+ James Nelson (james4765@gmail.com)
+
+
+12-95 : Original Document
diff --git a/Documentation/blockdev/zram.txt b/Documentation/blockdev/zram.txt
new file mode 100644
index 000000000..48a183e29
--- /dev/null
+++ b/Documentation/blockdev/zram.txt
@@ -0,0 +1,189 @@
+zram: Compressed RAM based block devices
+----------------------------------------
+
+* Introduction
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+* Usage
+
+Following shows a typical sequence of steps for using zram.
+
+1) Load Module:
+ modprobe zram num_devices=4
+ This creates 4 devices: /dev/zram{0,1,2,3}
+ (num_devices parameter is optional. Default: 1)
+
+2) Set max number of compression streams
+ Compression backend may use up to max_comp_streams compression streams,
+ thus allowing up to max_comp_streams concurrent compression operations.
+ By default, compression backend uses single compression stream.
+
+ Examples:
+ #show max compression streams number
+ cat /sys/block/zram0/max_comp_streams
+
+ #set max compression streams number to 3
+ echo 3 > /sys/block/zram0/max_comp_streams
+
+Note:
+In order to enable compression backend's multi stream support max_comp_streams
+must be initially set to desired concurrency level before ZRAM device
+initialisation. Once the device initialised as a single stream compression
+backend (max_comp_streams equals to 1), you will see error if you try to change
+the value of max_comp_streams because single stream compression backend
+implemented as a special case by lock overhead issue and does not support
+dynamic max_comp_streams. Only multi stream backend supports dynamic
+max_comp_streams adjustment.
+
+3) Select compression algorithm
+ Using comp_algorithm device attribute one can see available and
+ currently selected (shown in square brackets) compression algortithms,
+ change selected compression algorithm (once the device is initialised
+ there is no way to change compression algorithm).
+
+ Examples:
+ #show supported compression algorithms
+ cat /sys/block/zram0/comp_algorithm
+ lzo [lz4]
+
+ #select lzo compression algorithm
+ echo lzo > /sys/block/zram0/comp_algorithm
+
+4) Set Disksize
+ Set disk size by writing the value to sysfs node 'disksize'.
+ The value can be either in bytes or you can use mem suffixes.
+ Examples:
+ # Initialize /dev/zram0 with 50MB disksize
+ echo $((50*1024*1024)) > /sys/block/zram0/disksize
+
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/disksize
+ echo 512M > /sys/block/zram0/disksize
+ echo 1G > /sys/block/zram0/disksize
+
+Note:
+There is little point creating a zram of greater than twice the size of memory
+since we expect a 2:1 compression ratio. Note that zram uses about 0.1% of the
+size of the disk when not in use so a huge zram is wasteful.
+
+5) Set memory limit: Optional
+ Set memory limit by writing the value to sysfs node 'mem_limit'.
+ The value can be either in bytes or you can use mem suffixes.
+ In addition, you could change the value in runtime.
+ Examples:
+ # limit /dev/zram0 with 50MB memory
+ echo $((50*1024*1024)) > /sys/block/zram0/mem_limit
+
+ # Using mem suffixes
+ echo 256K > /sys/block/zram0/mem_limit
+ echo 512M > /sys/block/zram0/mem_limit
+ echo 1G > /sys/block/zram0/mem_limit
+
+ # To disable memory limit
+ echo 0 > /sys/block/zram0/mem_limit
+
+6) Activate:
+ mkswap /dev/zram0
+ swapon /dev/zram0
+
+ mkfs.ext4 /dev/zram1
+ mount /dev/zram1 /tmp
+
+7) Stats:
+Per-device statistics are exported as various nodes under /sys/block/zram<id>/
+
+A brief description of exported device attritbutes. For more details please
+read Documentation/ABI/testing/sysfs-block-zram.
+
+Name access description
+---- ------ -----------
+disksize RW show and set the device's disk size
+initstate RO shows the initialization state of the device
+reset WO trigger device reset
+num_reads RO the number of reads
+failed_reads RO the number of failed reads
+num_write RO the number of writes
+failed_writes RO the number of failed writes
+invalid_io RO the number of non-page-size-aligned I/O requests
+max_comp_streams RW the number of possible concurrent compress operations
+comp_algorithm RW show and change the compression algorithm
+notify_free RO the number of notifications to free pages (either
+ slot free notifications or REQ_DISCARD requests)
+zero_pages RO the number of zero filled pages written to this disk
+orig_data_size RO uncompressed size of data stored in this disk
+compr_data_size RO compressed size of data stored in this disk
+mem_used_total RO the amount of memory allocated for this disk
+mem_used_max RW the maximum amount memory zram have consumed to
+ store compressed data
+mem_limit RW the maximum amount of memory ZRAM can use to store
+ the compressed data
+num_migrated RO the number of objects migrated migrated by compaction
+
+
+WARNING
+=======
+per-stat sysfs attributes are considered to be deprecated.
+The basic strategy is:
+-- the existing RW nodes will be downgraded to WO nodes (in linux 4.11)
+-- deprecated RO sysfs nodes will eventually be removed (in linux 4.11)
+
+The list of deprecated attributes can be found here:
+Documentation/ABI/obsolete/sysfs-block-zram
+
+Basically, every attribute that has its own read accessible sysfs node
+(e.g. num_reads) *AND* is accessible via one of the stat files (zram<id>/stat
+or zram<id>/io_stat or zram<id>/mm_stat) is considered to be deprecated.
+
+User space is advised to use the following files to read the device statistics.
+
+File /sys/block/zram<id>/stat
+
+Represents block layer statistics. Read Documentation/block/stat.txt for
+details.
+
+File /sys/block/zram<id>/io_stat
+
+The stat file represents device's I/O statistics not accounted by block
+layer and, thus, not available in zram<id>/stat file. It consists of a
+single line of text and contains the following stats separated by
+whitespace:
+ failed_reads
+ failed_writes
+ invalid_io
+ notify_free
+
+File /sys/block/zram<id>/mm_stat
+
+The stat file represents device's mm statistics. It consists of a single
+line of text and contains the following stats separated by whitespace:
+ orig_data_size
+ compr_data_size
+ mem_used_total
+ mem_limit
+ mem_used_max
+ zero_pages
+ num_migrated
+
+8) Deactivate:
+ swapoff /dev/zram0
+ umount /dev/zram1
+
+9) Reset:
+ Write any positive value to 'reset' sysfs node
+ echo 1 > /sys/block/zram0/reset
+ echo 1 > /sys/block/zram1/reset
+
+ This frees all the memory allocated for the given device and
+ resets the disksize to zero. You must set the disksize again
+ before reusing the device.
+
+Nitin Gupta
+ngupta@vflare.org
diff --git a/Documentation/braille-console.txt b/Documentation/braille-console.txt
new file mode 100644
index 000000000..d0d042c2f
--- /dev/null
+++ b/Documentation/braille-console.txt
@@ -0,0 +1,34 @@
+ Linux Braille Console
+
+To get early boot messages on a braille device (before userspace screen
+readers can start), you first need to compile the support for the usual serial
+console (see serial-console.txt), and for braille device (in Device Drivers -
+Accessibility).
+
+Then you need to specify a console=brl, option on the kernel command line, the
+format is:
+
+ console=brl,serial_options...
+
+where serial_options... are the same as described in serial-console.txt
+
+So for instance you can use console=brl,ttyS0 if the braille device is connected
+to the first serial port, and console=brl,ttyS0,115200 to override the baud rate
+to 115200, etc.
+
+By default, the braille device will just show the last kernel message (console
+mode). To review previous messages, press the Insert key to switch to the VT
+review mode. In review mode, the arrow keys permit to browse in the VT content,
+page up/down keys go at the top/bottom of the screen, and the home key goes back
+to the cursor, hence providing very basic screen reviewing facility.
+
+Sound feedback can be obtained by adding the braille_console.sound=1 kernel
+parameter.
+
+For simplicity, only one braille console can be enabled, other uses of
+console=brl,... will be discarded. Also note that it does not interfere with
+the console selection mechanism described in serial-console.txt
+
+For now, only the VisioBraille device is supported.
+
+Samuel Thibault <samuel.thibault@ens-lyon.org>
diff --git a/Documentation/bt8xxgpio.txt b/Documentation/bt8xxgpio.txt
new file mode 100644
index 000000000..d8297e4eb
--- /dev/null
+++ b/Documentation/bt8xxgpio.txt
@@ -0,0 +1,67 @@
+===============================================================
+== BT8XXGPIO driver ==
+== ==
+== A driver for a selfmade cheap BT8xx based PCI GPIO-card ==
+== ==
+== For advanced documentation, see ==
+== http://www.bu3sch.de/btgpio.php ==
+===============================================================
+
+
+A generic digital 24-port PCI GPIO card can be built out of an ordinary
+Brooktree bt848, bt849, bt878 or bt879 based analog TV tuner card. The
+Brooktree chip is used in old analog Hauppauge WinTV PCI cards. You can easily
+find them used for low prices on the net.
+
+The bt8xx chip does have 24 digital GPIO ports.
+These ports are accessible via 24 pins on the SMD chip package.
+
+
+==============================================
+== How to physically access the GPIO pins ==
+==============================================
+
+The are several ways to access these pins. One might unsolder the whole chip
+and put it on a custom PCI board, or one might only unsolder each individual
+GPIO pin and solder that to some tiny wire. As the chip package really is tiny
+there are some advanced soldering skills needed in any case.
+
+The physical pinouts are drawn in the following ASCII art.
+The GPIO pins are marked with G00-G23
+
+ G G G G G G G G G G G G G G G G G G
+ 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7
+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ ---------------------------------------------------------------------------
+ --| ^ ^ |--
+ --| pin 86 pin 67 |--
+ --| |--
+ --| pin 61 > |-- G18
+ --| |-- G19
+ --| |-- G20
+ --| |-- G21
+ --| |-- G22
+ --| pin 56 > |-- G23
+ --| |--
+ --| Brooktree 878/879 |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| |--
+ --| O |--
+ --| |--
+ ---------------------------------------------------------------------------
+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
+ ^
+ This is pin 1
+
diff --git a/Documentation/btmrvl.txt b/Documentation/btmrvl.txt
new file mode 100644
index 000000000..63fd8e7a4
--- /dev/null
+++ b/Documentation/btmrvl.txt
@@ -0,0 +1,111 @@
+=======================================================================
+ README for btmrvl driver
+=======================================================================
+
+
+All commands are used via debugfs interface.
+
+=====================
+Set/get driver configurations:
+
+Path: /debug/btmrvl/config/
+
+gpiogap=[n]
+hscfgcmd
+ These commands are used to configure the host sleep parameters.
+ bit 8:0 -- Gap
+ bit 16:8 -- GPIO
+
+ where GPIO is the pin number of GPIO used to wake up the host.
+ It could be any valid GPIO pin# (e.g. 0-7) or 0xff (SDIO interface
+ wakeup will be used instead).
+
+ where Gap is the gap in milli seconds between wakeup signal and
+ wakeup event, or 0xff for special host sleep setting.
+
+ Usage:
+ # Use SDIO interface to wake up the host and set GAP to 0x80:
+ echo 0xff80 > /debug/btmrvl/config/gpiogap
+ echo 1 > /debug/btmrvl/config/hscfgcmd
+
+ # Use GPIO pin #3 to wake up the host and set GAP to 0xff:
+ echo 0x03ff > /debug/btmrvl/config/gpiogap
+ echo 1 > /debug/btmrvl/config/hscfgcmd
+
+psmode=[n]
+pscmd
+ These commands are used to enable/disable auto sleep mode
+
+ where the option is:
+ 1 -- Enable auto sleep mode
+ 0 -- Disable auto sleep mode
+
+ Usage:
+ # Enable auto sleep mode
+ echo 1 > /debug/btmrvl/config/psmode
+ echo 1 > /debug/btmrvl/config/pscmd
+
+ # Disable auto sleep mode
+ echo 0 > /debug/btmrvl/config/psmode
+ echo 1 > /debug/btmrvl/config/pscmd
+
+
+hsmode=[n]
+hscmd
+ These commands are used to enable host sleep or wake up firmware
+
+ where the option is:
+ 1 -- Enable host sleep
+ 0 -- Wake up firmware
+
+ Usage:
+ # Enable host sleep
+ echo 1 > /debug/btmrvl/config/hsmode
+ echo 1 > /debug/btmrvl/config/hscmd
+
+ # Wake up firmware
+ echo 0 > /debug/btmrvl/config/hsmode
+ echo 1 > /debug/btmrvl/config/hscmd
+
+
+======================
+Get driver status:
+
+Path: /debug/btmrvl/status/
+
+Usage:
+ cat /debug/btmrvl/status/<args>
+
+where the args are:
+
+curpsmode
+ This command displays current auto sleep status.
+
+psstate
+ This command display the power save state.
+
+hsstate
+ This command display the host sleep state.
+
+txdnldrdy
+ This command displays the value of Tx download ready flag.
+
+
+=====================
+
+Use hcitool to issue raw hci command, refer to hcitool manual
+
+ Usage: Hcitool cmd <ogf> <ocf> [Parameters]
+
+ Interface Control Command
+ hcitool cmd 0x3f 0x5b 0xf5 0x01 0x00 --Enable All interface
+ hcitool cmd 0x3f 0x5b 0xf5 0x01 0x01 --Enable Wlan interface
+ hcitool cmd 0x3f 0x5b 0xf5 0x01 0x02 --Enable BT interface
+ hcitool cmd 0x3f 0x5b 0xf5 0x00 0x00 --Disable All interface
+ hcitool cmd 0x3f 0x5b 0xf5 0x00 0x01 --Disable Wlan interface
+ hcitool cmd 0x3f 0x5b 0xf5 0x00 0x02 --Disable BT interface
+
+=======================================================================
+
+
+/*(DEBLOBBED)*/
diff --git a/Documentation/bus-devices/ti-gpmc.txt b/Documentation/bus-devices/ti-gpmc.txt
new file mode 100644
index 000000000..cc9ce57e0
--- /dev/null
+++ b/Documentation/bus-devices/ti-gpmc.txt
@@ -0,0 +1,122 @@
+GPMC (General Purpose Memory Controller):
+=========================================
+
+GPMC is an unified memory controller dedicated to interfacing external
+memory devices like
+ * Asynchronous SRAM like memories and application specific integrated
+ circuit devices.
+ * Asynchronous, synchronous, and page mode burst NOR flash devices
+ NAND flash
+ * Pseudo-SRAM devices
+
+GPMC is found on Texas Instruments SoC's (OMAP based)
+IP details: http://www.ti.com/lit/pdf/spruh73 section 7.1
+
+
+GPMC generic timing calculation:
+================================
+
+GPMC has certain timings that has to be programmed for proper
+functioning of the peripheral, while peripheral has another set of
+timings. To have peripheral work with gpmc, peripheral timings has to
+be translated to the form gpmc can understand. The way it has to be
+translated depends on the connected peripheral. Also there is a
+dependency for certain gpmc timings on gpmc clock frequency. Hence a
+generic timing routine was developed to achieve above requirements.
+
+Generic routine provides a generic method to calculate gpmc timings
+from gpmc peripheral timings. struct gpmc_device_timings fields has to
+be updated with timings from the datasheet of the peripheral that is
+connected to gpmc. A few of the peripheral timings can be fed either
+in time or in cycles, provision to handle this scenario has been
+provided (refer struct gpmc_device_timings definition). It may so
+happen that timing as specified by peripheral datasheet is not present
+in timing structure, in this scenario, try to correlate peripheral
+timing to the one available. If that doesn't work, try to add a new
+field as required by peripheral, educate generic timing routine to
+handle it, make sure that it does not break any of the existing.
+Then there may be cases where peripheral datasheet doesn't mention
+certain fields of struct gpmc_device_timings, zero those entries.
+
+Generic timing routine has been verified to work properly on
+multiple onenand's and tusb6010 peripherals.
+
+A word of caution: generic timing routine has been developed based
+on understanding of gpmc timings, peripheral timings, available
+custom timing routines, a kind of reverse engineering without
+most of the datasheets & hardware (to be exact none of those supported
+in mainline having custom timing routine) and by simulation.
+
+gpmc timing dependency on peripheral timings:
+[<gpmc_timing>: <peripheral timing1>, <peripheral timing2> ...]
+
+1. common
+cs_on: t_ceasu
+adv_on: t_avdasu, t_ceavd
+
+2. sync common
+sync_clk: clk
+page_burst_access: t_bacc
+clk_activation: t_ces, t_avds
+
+3. read async muxed
+adv_rd_off: t_avdp_r
+oe_on: t_oeasu, t_aavdh
+access: t_iaa, t_oe, t_ce, t_aa
+rd_cycle: t_rd_cycle, t_cez_r, t_oez
+
+4. read async non-muxed
+adv_rd_off: t_avdp_r
+oe_on: t_oeasu
+access: t_iaa, t_oe, t_ce, t_aa
+rd_cycle: t_rd_cycle, t_cez_r, t_oez
+
+5. read sync muxed
+adv_rd_off: t_avdp_r, t_avdh
+oe_on: t_oeasu, t_ach, cyc_aavdh_oe
+access: t_iaa, cyc_iaa, cyc_oe
+rd_cycle: t_cez_r, t_oez, t_ce_rdyz
+
+6. read sync non-muxed
+adv_rd_off: t_avdp_r
+oe_on: t_oeasu
+access: t_iaa, cyc_iaa, cyc_oe
+rd_cycle: t_cez_r, t_oez, t_ce_rdyz
+
+7. write async muxed
+adv_wr_off: t_avdp_w
+we_on, wr_data_mux_bus: t_weasu, t_aavdh, cyc_aavhd_we
+we_off: t_wpl
+cs_wr_off: t_wph
+wr_cycle: t_cez_w, t_wr_cycle
+
+8. write async non-muxed
+adv_wr_off: t_avdp_w
+we_on, wr_data_mux_bus: t_weasu
+we_off: t_wpl
+cs_wr_off: t_wph
+wr_cycle: t_cez_w, t_wr_cycle
+
+9. write sync muxed
+adv_wr_off: t_avdp_w, t_avdh
+we_on, wr_data_mux_bus: t_weasu, t_rdyo, t_aavdh, cyc_aavhd_we
+we_off: t_wpl, cyc_wpl
+cs_wr_off: t_wph
+wr_cycle: t_cez_w, t_ce_rdyz
+
+10. write sync non-muxed
+adv_wr_off: t_avdp_w
+we_on, wr_data_mux_bus: t_weasu, t_rdyo
+we_off: t_wpl, cyc_wpl
+cs_wr_off: t_wph
+wr_cycle: t_cez_w, t_ce_rdyz
+
+
+Note: Many of gpmc timings are dependent on other gpmc timings (a few
+gpmc timings purely dependent on other gpmc timings, a reason that
+some of the gpmc timings are missing above), and it will result in
+indirect dependency of peripheral timings to gpmc timings other than
+mentioned above, refer timing routine for more details. To know what
+these peripheral timings correspond to, please see explanations in
+struct gpmc_device_timings definition. And for gpmc timings refer
+IP details (link above).
diff --git a/Documentation/bus-virt-phys-mapping.txt b/Documentation/bus-virt-phys-mapping.txt
new file mode 100644
index 000000000..2bc55ff3b
--- /dev/null
+++ b/Documentation/bus-virt-phys-mapping.txt
@@ -0,0 +1,208 @@
+[ NOTE: The virt_to_bus() and bus_to_virt() functions have been
+ superseded by the functionality provided by the PCI DMA interface
+ (see Documentation/DMA-API-HOWTO.txt). They continue
+ to be documented below for historical purposes, but new code
+ must not use them. --davidm 00/12/12 ]
+
+[ This is a mail message in response to a query on IO mapping, thus the
+ strange format for a "document" ]
+
+The AHA-1542 is a bus-master device, and your patch makes the driver give the
+controller the physical address of the buffers, which is correct on x86
+(because all bus master devices see the physical memory mappings directly).
+
+However, on many setups, there are actually _three_ different ways of looking
+at memory addresses, and in this case we actually want the third, the
+so-called "bus address".
+
+Essentially, the three ways of addressing memory are (this is "real memory",
+that is, normal RAM--see later about other details):
+
+ - CPU untranslated. This is the "physical" address. Physical address
+ 0 is what the CPU sees when it drives zeroes on the memory bus.
+
+ - CPU translated address. This is the "virtual" address, and is
+ completely internal to the CPU itself with the CPU doing the appropriate
+ translations into "CPU untranslated".
+
+ - bus address. This is the address of memory as seen by OTHER devices,
+ not the CPU. Now, in theory there could be many different bus
+ addresses, with each device seeing memory in some device-specific way, but
+ happily most hardware designers aren't actually actively trying to make
+ things any more complex than necessary, so you can assume that all
+ external hardware sees the memory the same way.
+
+Now, on normal PCs the bus address is exactly the same as the physical
+address, and things are very simple indeed. However, they are that simple
+because the memory and the devices share the same address space, and that is
+not generally necessarily true on other PCI/ISA setups.
+
+Now, just as an example, on the PReP (PowerPC Reference Platform), the
+CPU sees a memory map something like this (this is from memory):
+
+ 0-2 GB "real memory"
+ 2 GB-3 GB "system IO" (inb/out and similar accesses on x86)
+ 3 GB-4 GB "IO memory" (shared memory over the IO bus)
+
+Now, that looks simple enough. However, when you look at the same thing from
+the viewpoint of the devices, you have the reverse, and the physical memory
+address 0 actually shows up as address 2 GB for any IO master.
+
+So when the CPU wants any bus master to write to physical memory 0, it
+has to give the master address 0x80000000 as the memory address.
+
+So, for example, depending on how the kernel is actually mapped on the
+PPC, you can end up with a setup like this:
+
+ physical address: 0
+ virtual address: 0xC0000000
+ bus address: 0x80000000
+
+where all the addresses actually point to the same thing. It's just seen
+through different translations..
+
+Similarly, on the Alpha, the normal translation is
+
+ physical address: 0
+ virtual address: 0xfffffc0000000000
+ bus address: 0x40000000
+
+(but there are also Alphas where the physical address and the bus address
+are the same).
+
+Anyway, the way to look up all these translations, you do
+
+ #include <asm/io.h>
+
+ phys_addr = virt_to_phys(virt_addr);
+ virt_addr = phys_to_virt(phys_addr);
+ bus_addr = virt_to_bus(virt_addr);
+ virt_addr = bus_to_virt(bus_addr);
+
+Now, when do you need these?
+
+You want the _virtual_ address when you are actually going to access that
+pointer from the kernel. So you can have something like this:
+
+ /*
+ * this is the hardware "mailbox" we use to communicate with
+ * the controller. The controller sees this directly.
+ */
+ struct mailbox {
+ __u32 status;
+ __u32 bufstart;
+ __u32 buflen;
+ ..
+ } mbox;
+
+ unsigned char * retbuffer;
+
+ /* get the address from the controller */
+ retbuffer = bus_to_virt(mbox.bufstart);
+ switch (retbuffer[0]) {
+ case STATUS_OK:
+ ...
+
+on the other hand, you want the bus address when you have a buffer that
+you want to give to the controller:
+
+ /* ask the controller to read the sense status into "sense_buffer" */
+ mbox.bufstart = virt_to_bus(&sense_buffer);
+ mbox.buflen = sizeof(sense_buffer);
+ mbox.status = 0;
+ notify_controller(&mbox);
+
+And you generally _never_ want to use the physical address, because you can't
+use that from the CPU (the CPU only uses translated virtual addresses), and
+you can't use it from the bus master.
+
+So why do we care about the physical address at all? We do need the physical
+address in some cases, it's just not very often in normal code. The physical
+address is needed if you use memory mappings, for example, because the
+"remap_pfn_range()" mm function wants the physical address of the memory to
+be remapped as measured in units of pages, a.k.a. the pfn (the memory
+management layer doesn't know about devices outside the CPU, so it
+shouldn't need to know about "bus addresses" etc).
+
+NOTE NOTE NOTE! The above is only one part of the whole equation. The above
+only talks about "real memory", that is, CPU memory (RAM).
+
+There is a completely different type of memory too, and that's the "shared
+memory" on the PCI or ISA bus. That's generally not RAM (although in the case
+of a video graphics card it can be normal DRAM that is just used for a frame
+buffer), but can be things like a packet buffer in a network card etc.
+
+This memory is called "PCI memory" or "shared memory" or "IO memory" or
+whatever, and there is only one way to access it: the readb/writeb and
+related functions. You should never take the address of such memory, because
+there is really nothing you can do with such an address: it's not
+conceptually in the same memory space as "real memory" at all, so you cannot
+just dereference a pointer. (Sadly, on x86 it _is_ in the same memory space,
+so on x86 it actually works to just deference a pointer, but it's not
+portable).
+
+For such memory, you can do things like
+
+ - reading:
+ /*
+ * read first 32 bits from ISA memory at 0xC0000, aka
+ * C000:0000 in DOS terms
+ */
+ unsigned int signature = isa_readl(0xC0000);
+
+ - remapping and writing:
+ /*
+ * remap framebuffer PCI memory area at 0xFC000000,
+ * size 1MB, so that we can access it: We can directly
+ * access only the 640k-1MB area, so anything else
+ * has to be remapped.
+ */
+ void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
+
+ /* write a 'A' to the offset 10 of the area */
+ writeb('A',baseptr+10);
+
+ /* unmap when we unload the driver */
+ iounmap(baseptr);
+
+ - copying and clearing:
+ /* get the 6-byte Ethernet address at ISA address E000:0040 */
+ memcpy_fromio(kernel_buffer, 0xE0040, 6);
+ /* write a packet to the driver */
+ memcpy_toio(0xE1000, skb->data, skb->len);
+ /* clear the frame buffer */
+ memset_io(0xA0000, 0, 0x10000);
+
+OK, that just about covers the basics of accessing IO portably. Questions?
+Comments? You may think that all the above is overly complex, but one day you
+might find yourself with a 500 MHz Alpha in front of you, and then you'll be
+happy that your driver works ;)
+
+Note that kernel versions 2.0.x (and earlier) mistakenly called the
+ioremap() function "vremap()". ioremap() is the proper name, but I
+didn't think straight when I wrote it originally. People who have to
+support both can do something like:
+
+ /* support old naming silliness */
+ #if LINUX_VERSION_CODE < 0x020100
+ #define ioremap vremap
+ #define iounmap vfree
+ #endif
+
+at the top of their source files, and then they can use the right names
+even on 2.0.x systems.
+
+And the above sounds worse than it really is. Most real drivers really
+don't do all that complex things (or rather: the complexity is not so
+much in the actual IO accesses as in error handling and timeouts etc).
+It's generally not hard to fix drivers, and in many cases the code
+actually looks better afterwards:
+
+ unsigned long signature = *(unsigned int *) 0xC0000;
+ vs
+ unsigned long signature = readl(0xC0000);
+
+I think the second version actually is more readable, no?
+
+ Linus
+
diff --git a/Documentation/cachetlb.txt b/Documentation/cachetlb.txt
new file mode 100644
index 000000000..3f9f808b5
--- /dev/null
+++ b/Documentation/cachetlb.txt
@@ -0,0 +1,403 @@
+ Cache and TLB Flushing
+ Under Linux
+
+ David S. Miller <davem@redhat.com>
+
+This document describes the cache/tlb flushing interfaces called
+by the Linux VM subsystem. It enumerates over each interface,
+describes its intended purpose, and what side effect is expected
+after the interface is invoked.
+
+The side effects described below are stated for a uniprocessor
+implementation, and what is to happen on that single processor. The
+SMP cases are a simple extension, in that you just extend the
+definition such that the side effect for a particular interface occurs
+on all processors in the system. Don't let this scare you into
+thinking SMP cache/tlb flushing must be so inefficient, this is in
+fact an area where many optimizations are possible. For example,
+if it can be proven that a user address space has never executed
+on a cpu (see mm_cpumask()), one need not perform a flush
+for this address space on that cpu.
+
+First, the TLB flushing interfaces, since they are the simplest. The
+"TLB" is abstracted under Linux as something the cpu uses to cache
+virtual-->physical address translations obtained from the software
+page tables. Meaning that if the software page tables change, it is
+possible for stale translations to exist in this "TLB" cache.
+Therefore when software page table changes occur, the kernel will
+invoke one of the following flush methods _after_ the page table
+changes occur:
+
+1) void flush_tlb_all(void)
+
+ The most severe flush of all. After this interface runs,
+ any previous page table modification whatsoever will be
+ visible to the cpu.
+
+ This is usually invoked when the kernel page tables are
+ changed, since such translations are "global" in nature.
+
+2) void flush_tlb_mm(struct mm_struct *mm)
+
+ This interface flushes an entire user address space from
+ the TLB. After running, this interface must make sure that
+ any previous page table modifications for the address space
+ 'mm' will be visible to the cpu. That is, after running,
+ there will be no entries in the TLB for 'mm'.
+
+ This interface is used to handle whole address space
+ page table operations such as what happens during
+ fork, and exec.
+
+3) void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+
+ Here we are flushing a specific range of (user) virtual
+ address translations from the TLB. After running, this
+ interface must make sure that any previous page table
+ modifications for the address space 'vma->vm_mm' in the range
+ 'start' to 'end-1' will be visible to the cpu. That is, after
+ running, there will be no entries in the TLB for 'mm' for
+ virtual addresses in the range 'start' to 'end-1'.
+
+ The "vma" is the backing store being used for the region.
+ Primarily, this is used for munmap() type operations.
+
+ The interface is provided in hopes that the port can find
+ a suitably efficient method for removing multiple page
+ sized translations from the TLB, instead of having the kernel
+ call flush_tlb_page (see below) for each entry which may be
+ modified.
+
+4) void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+
+ This time we need to remove the PAGE_SIZE sized translation
+ from the TLB. The 'vma' is the backing structure used by
+ Linux to keep track of mmap'd regions for a process, the
+ address space is available via vma->vm_mm. Also, one may
+ test (vma->vm_flags & VM_EXEC) to see if this region is
+ executable (and thus could be in the 'instruction TLB' in
+ split-tlb type setups).
+
+ After running, this interface must make sure that any previous
+ page table modification for address space 'vma->vm_mm' for
+ user virtual address 'addr' will be visible to the cpu. That
+ is, after running, there will be no entries in the TLB for
+ 'vma->vm_mm' for virtual address 'addr'.
+
+ This is used primarily during fault processing.
+
+5) void update_mmu_cache(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+
+ At the end of every page fault, this routine is invoked to
+ tell the architecture specific code that a translation
+ now exists at virtual address "address" for address space
+ "vma->vm_mm", in the software page tables.
+
+ A port may use this information in any way it so chooses.
+ For example, it could use this event to pre-load TLB
+ translations for software managed TLB configurations.
+ The sparc64 port currently does this.
+
+6) void tlb_migrate_finish(struct mm_struct *mm)
+
+ This interface is called at the end of an explicit
+ process migration. This interface provides a hook
+ to allow a platform to update TLB or context-specific
+ information for the address space.
+
+ The ia64 sn2 platform is one example of a platform
+ that uses this interface.
+
+Next, we have the cache flushing interfaces. In general, when Linux
+is changing an existing virtual-->physical mapping to a new value,
+the sequence will be in one of the following forms:
+
+ 1) flush_cache_mm(mm);
+ change_all_page_tables_of(mm);
+ flush_tlb_mm(mm);
+
+ 2) flush_cache_range(vma, start, end);
+ change_range_of_page_tables(mm, start, end);
+ flush_tlb_range(vma, start, end);
+
+ 3) flush_cache_page(vma, addr, pfn);
+ set_pte(pte_pointer, new_pte_val);
+ flush_tlb_page(vma, addr);
+
+The cache level flush will always be first, because this allows
+us to properly handle systems whose caches are strict and require
+a virtual-->physical translation to exist for a virtual address
+when that virtual address is flushed from the cache. The HyperSparc
+cpu is one such cpu with this attribute.
+
+The cache flushing routines below need only deal with cache flushing
+to the extent that it is necessary for a particular cpu. Mostly,
+these routines must be implemented for cpus which have virtually
+indexed caches which must be flushed when virtual-->physical
+translations are changed or removed. So, for example, the physically
+indexed physically tagged caches of IA32 processors have no need to
+implement these interfaces since the caches are fully synchronized
+and have no dependency on translation information.
+
+Here are the routines, one by one:
+
+1) void flush_cache_mm(struct mm_struct *mm)
+
+ This interface flushes an entire user address space from
+ the caches. That is, after running, there will be no cache
+ lines associated with 'mm'.
+
+ This interface is used to handle whole address space
+ page table operations such as what happens during exit and exec.
+
+2) void flush_cache_dup_mm(struct mm_struct *mm)
+
+ This interface flushes an entire user address space from
+ the caches. That is, after running, there will be no cache
+ lines associated with 'mm'.
+
+ This interface is used to handle whole address space
+ page table operations such as what happens during fork.
+
+ This option is separate from flush_cache_mm to allow some
+ optimizations for VIPT caches.
+
+3) void flush_cache_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+
+ Here we are flushing a specific range of (user) virtual
+ addresses from the cache. After running, there will be no
+ entries in the cache for 'vma->vm_mm' for virtual addresses in
+ the range 'start' to 'end-1'.
+
+ The "vma" is the backing store being used for the region.
+ Primarily, this is used for munmap() type operations.
+
+ The interface is provided in hopes that the port can find
+ a suitably efficient method for removing multiple page
+ sized regions from the cache, instead of having the kernel
+ call flush_cache_page (see below) for each entry which may be
+ modified.
+
+4) void flush_cache_page(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn)
+
+ This time we need to remove a PAGE_SIZE sized range
+ from the cache. The 'vma' is the backing structure used by
+ Linux to keep track of mmap'd regions for a process, the
+ address space is available via vma->vm_mm. Also, one may
+ test (vma->vm_flags & VM_EXEC) to see if this region is
+ executable (and thus could be in the 'instruction cache' in
+ "Harvard" type cache layouts).
+
+ The 'pfn' indicates the physical page frame (shift this value
+ left by PAGE_SHIFT to get the physical address) that 'addr'
+ translates to. It is this mapping which should be removed from
+ the cache.
+
+ After running, there will be no entries in the cache for
+ 'vma->vm_mm' for virtual address 'addr' which translates
+ to 'pfn'.
+
+ This is used primarily during fault processing.
+
+5) void flush_cache_kmaps(void)
+
+ This routine need only be implemented if the platform utilizes
+ highmem. It will be called right before all of the kmaps
+ are invalidated.
+
+ After running, there will be no entries in the cache for
+ the kernel virtual address range PKMAP_ADDR(0) to
+ PKMAP_ADDR(LAST_PKMAP).
+
+ This routing should be implemented in asm/highmem.h
+
+6) void flush_cache_vmap(unsigned long start, unsigned long end)
+ void flush_cache_vunmap(unsigned long start, unsigned long end)
+
+ Here in these two interfaces we are flushing a specific range
+ of (kernel) virtual addresses from the cache. After running,
+ there will be no entries in the cache for the kernel address
+ space for virtual addresses in the range 'start' to 'end-1'.
+
+ The first of these two routines is invoked after map_vm_area()
+ has installed the page table entries. The second is invoked
+ before unmap_kernel_range() deletes the page table entries.
+
+There exists another whole class of cpu cache issues which currently
+require a whole different set of interfaces to handle properly.
+The biggest problem is that of virtual aliasing in the data cache
+of a processor.
+
+Is your port susceptible to virtual aliasing in its D-cache?
+Well, if your D-cache is virtually indexed, is larger in size than
+PAGE_SIZE, and does not prevent multiple cache lines for the same
+physical address from existing at once, you have this problem.
+
+If your D-cache has this problem, first define asm/shmparam.h SHMLBA
+properly, it should essentially be the size of your virtually
+addressed D-cache (or if the size is variable, the largest possible
+size). This setting will force the SYSv IPC layer to only allow user
+processes to mmap shared memory at address which are a multiple of
+this value.
+
+NOTE: This does not fix shared mmaps, check out the sparc64 port for
+one way to solve this (in particular SPARC_FLAG_MMAPSHARED).
+
+Next, you have to solve the D-cache aliasing issue for all
+other cases. Please keep in mind that fact that, for a given page
+mapped into some user address space, there is always at least one more
+mapping, that of the kernel in its linear mapping starting at
+PAGE_OFFSET. So immediately, once the first user maps a given
+physical page into its address space, by implication the D-cache
+aliasing problem has the potential to exist since the kernel already
+maps this page at its virtual address.
+
+ void copy_user_page(void *to, void *from, unsigned long addr, struct page *page)
+ void clear_user_page(void *to, unsigned long addr, struct page *page)
+
+ These two routines store data in user anonymous or COW
+ pages. It allows a port to efficiently avoid D-cache alias
+ issues between userspace and the kernel.
+
+ For example, a port may temporarily map 'from' and 'to' to
+ kernel virtual addresses during the copy. The virtual address
+ for these two pages is chosen in such a way that the kernel
+ load/store instructions happen to virtual addresses which are
+ of the same "color" as the user mapping of the page. Sparc64
+ for example, uses this technique.
+
+ The 'addr' parameter tells the virtual address where the
+ user will ultimately have this page mapped, and the 'page'
+ parameter gives a pointer to the struct page of the target.
+
+ If D-cache aliasing is not an issue, these two routines may
+ simply call memcpy/memset directly and do nothing more.
+
+ void flush_dcache_page(struct page *page)
+
+ Any time the kernel writes to a page cache page, _OR_
+ the kernel is about to read from a page cache page and
+ user space shared/writable mappings of this page potentially
+ exist, this routine is called.
+
+ NOTE: This routine need only be called for page cache pages
+ which can potentially ever be mapped into the address
+ space of a user process. So for example, VFS layer code
+ handling vfs symlinks in the page cache need not call
+ this interface at all.
+
+ The phrase "kernel writes to a page cache page" means,
+ specifically, that the kernel executes store instructions
+ that dirty data in that page at the page->virtual mapping
+ of that page. It is important to flush here to handle
+ D-cache aliasing, to make sure these kernel stores are
+ visible to user space mappings of that page.
+
+ The corollary case is just as important, if there are users
+ which have shared+writable mappings of this file, we must make
+ sure that kernel reads of these pages will see the most recent
+ stores done by the user.
+
+ If D-cache aliasing is not an issue, this routine may
+ simply be defined as a nop on that architecture.
+
+ There is a bit set aside in page->flags (PG_arch_1) as
+ "architecture private". The kernel guarantees that,
+ for pagecache pages, it will clear this bit when such
+ a page first enters the pagecache.
+
+ This allows these interfaces to be implemented much more
+ efficiently. It allows one to "defer" (perhaps indefinitely)
+ the actual flush if there are currently no user processes
+ mapping this page. See sparc64's flush_dcache_page and
+ update_mmu_cache implementations for an example of how to go
+ about doing this.
+
+ The idea is, first at flush_dcache_page() time, if
+ page->mapping->i_mmap is an empty tree, just mark the architecture
+ private page flag bit. Later, in update_mmu_cache(), a check is
+ made of this flag bit, and if set the flush is done and the flag
+ bit is cleared.
+
+ IMPORTANT NOTE: It is often important, if you defer the flush,
+ that the actual flush occurs on the same CPU
+ as did the cpu stores into the page to make it
+ dirty. Again, see sparc64 for examples of how
+ to deal with this.
+
+ void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr,
+ void *dst, void *src, int len)
+ void copy_from_user_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long user_vaddr,
+ void *dst, void *src, int len)
+ When the kernel needs to copy arbitrary data in and out
+ of arbitrary user pages (f.e. for ptrace()) it will use
+ these two routines.
+
+ Any necessary cache flushing or other coherency operations
+ that need to occur should happen here. If the processor's
+ instruction cache does not snoop cpu stores, it is very
+ likely that you will need to flush the instruction cache
+ for copy_to_user_page().
+
+ void flush_anon_page(struct vm_area_struct *vma, struct page *page,
+ unsigned long vmaddr)
+ When the kernel needs to access the contents of an anonymous
+ page, it calls this function (currently only
+ get_user_pages()). Note: flush_dcache_page() deliberately
+ doesn't work for an anonymous page. The default
+ implementation is a nop (and should remain so for all coherent
+ architectures). For incoherent architectures, it should flush
+ the cache of the page at vmaddr.
+
+ void flush_kernel_dcache_page(struct page *page)
+ When the kernel needs to modify a user page is has obtained
+ with kmap, it calls this function after all modifications are
+ complete (but before kunmapping it) to bring the underlying
+ page up to date. It is assumed here that the user has no
+ incoherent cached copies (i.e. the original page was obtained
+ from a mechanism like get_user_pages()). The default
+ implementation is a nop and should remain so on all coherent
+ architectures. On incoherent architectures, this should flush
+ the kernel cache for page (using page_address(page)).
+
+
+ void flush_icache_range(unsigned long start, unsigned long end)
+ When the kernel stores into addresses that it will execute
+ out of (eg when loading modules), this function is called.
+
+ If the icache does not snoop stores then this routine will need
+ to flush it.
+
+ void flush_icache_page(struct vm_area_struct *vma, struct page *page)
+ All the functionality of flush_icache_page can be implemented in
+ flush_dcache_page and update_mmu_cache. In the future, the hope
+ is to remove this interface completely.
+
+The final category of APIs is for I/O to deliberately aliased address
+ranges inside the kernel. Such aliases are set up by use of the
+vmap/vmalloc API. Since kernel I/O goes via physical pages, the I/O
+subsystem assumes that the user mapping and kernel offset mapping are
+the only aliases. This isn't true for vmap aliases, so anything in
+the kernel trying to do I/O to vmap areas must manually manage
+coherency. It must do this by flushing the vmap range before doing
+I/O and invalidating it after the I/O returns.
+
+ void flush_kernel_vmap_range(void *vaddr, int size)
+ flushes the kernel cache for a given virtual address range in
+ the vmap area. This is to make sure that any data the kernel
+ modified in the vmap range is made visible to the physical
+ page. The design is to make this area safe to perform I/O on.
+ Note that this API does *not* also flush the offset map alias
+ of the area.
+
+ void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
+ the cache for a given virtual address range in the vmap area
+ which prevents the processor from making the cache stale by
+ speculatively reading data while the I/O was occurring to the
+ physical pages. This is only necessary for data reads into the
+ vmap area.
diff --git a/Documentation/cdrom/00-INDEX b/Documentation/cdrom/00-INDEX
new file mode 100644
index 000000000..433edf23d
--- /dev/null
+++ b/Documentation/cdrom/00-INDEX
@@ -0,0 +1,11 @@
+00-INDEX
+ - this file (info on CD-ROMs and Linux)
+Makefile
+ - only used to generate TeX output from the documentation.
+cdrom-standard.tex
+ - LaTeX document on standardizing the CD-ROM programming interface.
+ide-cd
+ - info on setting up and using ATAPI (aka IDE) CD-ROMs.
+packet-writing.txt
+ - Info on the CDRW packet writing module
+
diff --git a/Documentation/cdrom/Makefile b/Documentation/cdrom/Makefile
new file mode 100644
index 000000000..a19e32192
--- /dev/null
+++ b/Documentation/cdrom/Makefile
@@ -0,0 +1,21 @@
+LATEXFILE = cdrom-standard
+
+all:
+ make clean
+ latex $(LATEXFILE)
+ latex $(LATEXFILE)
+ @if [ -x `which gv` ]; then \
+ `dvips -q -t letter -o $(LATEXFILE).ps $(LATEXFILE).dvi` ;\
+ `gv -antialias -media letter -nocenter $(LATEXFILE).ps` ;\
+ else \
+ `xdvi $(LATEXFILE).dvi &` ;\
+ fi
+ make sortofclean
+
+clean:
+ rm -f $(LATEXFILE).ps $(LATEXFILE).dvi $(LATEXFILE).aux $(LATEXFILE).log
+
+sortofclean:
+ rm -f $(LATEXFILE).aux $(LATEXFILE).log
+
+
diff --git a/Documentation/cdrom/cdrom-standard.tex b/Documentation/cdrom/cdrom-standard.tex
new file mode 100644
index 000000000..c06233fe5
--- /dev/null
+++ b/Documentation/cdrom/cdrom-standard.tex
@@ -0,0 +1,1022 @@
+\documentclass{article}
+\def\version{$Id: cdrom-standard.tex,v 1.9 1997/12/28 15:42:49 david Exp $}
+\newcommand{\newsection}[1]{\newpage\section{#1}}
+
+\evensidemargin=0pt
+\oddsidemargin=0pt
+\topmargin=-\headheight \advance\topmargin by -\headsep
+\textwidth=15.99cm \textheight=24.62cm % normal A4, 1'' margin
+
+\def\linux{{\sc Linux}}
+\def\cdrom{{\sc cd-rom}}
+\def\UCD{{\sc Uniform cd-rom Driver}}
+\def\cdromc{{\tt {cdrom.c}}}
+\def\cdromh{{\tt {cdrom.h}}}
+\def\fo{\sl} % foreign words
+\def\ie{{\fo i.e.}}
+\def\eg{{\fo e.g.}}
+
+\everymath{\it} \everydisplay{\it}
+\catcode `\_=\active \def_{\_\penalty100 }
+\catcode`\<=\active \def<#1>{{\langle\hbox{\rm#1}\rangle}}
+
+\begin{document}
+\title{A \linux\ \cdrom\ standard}
+\author{David van Leeuwen\\{\normalsize\tt david@ElseWare.cistron.nl}
+\\{\footnotesize updated by Erik Andersen {\tt(andersee@debian.org)}}
+\\{\footnotesize updated by Jens Axboe {\tt(axboe@image.dk)}}}
+\date{12 March 1999}
+
+\maketitle
+
+\newsection{Introduction}
+
+\linux\ is probably the Unix-like operating system that supports
+the widest variety of hardware devices. The reasons for this are
+presumably
+\begin{itemize}
+\item
+ The large list of hardware devices available for the many platforms
+ that \linux\ now supports (\ie, i386-PCs, Sparc Suns, etc.)
+\item
+ The open design of the operating system, such that anybody can write a
+ driver for \linux.
+\item
+ There is plenty of source code around as examples of how to write a driver.
+\end{itemize}
+The openness of \linux, and the many different types of available
+hardware has allowed \linux\ to support many different hardware devices.
+Unfortunately, the very openness that has allowed \linux\ to support
+all these different devices has also allowed the behavior of each
+device driver to differ significantly from one device to another.
+This divergence of behavior has been very significant for \cdrom\
+devices; the way a particular drive reacts to a `standard' $ioctl()$
+call varies greatly from one device driver to another. To avoid making
+their drivers totally inconsistent, the writers of \linux\ \cdrom\
+drivers generally created new device drivers by understanding, copying,
+and then changing an existing one. Unfortunately, this practice did not
+maintain uniform behavior across all the \linux\ \cdrom\ drivers.
+
+This document describes an effort to establish Uniform behavior across
+all the different \cdrom\ device drivers for \linux. This document also
+defines the various $ioctl$s, and how the low-level \cdrom\ device
+drivers should implement them. Currently (as of the \linux\ 2.1.$x$
+development kernels) several low-level \cdrom\ device drivers, including
+both IDE/ATAPI and SCSI, now use this Uniform interface.
+
+When the \cdrom\ was developed, the interface between the \cdrom\ drive
+and the computer was not specified in the standards. As a result, many
+different \cdrom\ interfaces were developed. Some of them had their
+own proprietary design (Sony, Mitsumi, Panasonic, Philips), other
+manufacturers adopted an existing electrical interface and changed
+the functionality (CreativeLabs/SoundBlaster, Teac, Funai) or simply
+adapted their drives to one or more of the already existing electrical
+interfaces (Aztech, Sanyo, Funai, Vertos, Longshine, Optics Storage and
+most of the `NoName' manufacturers). In cases where a new drive really
+brought its own interface or used its own command set and flow control
+scheme, either a separate driver had to be written, or an existing
+driver had to be enhanced. History has delivered us \cdrom\ support for
+many of these different interfaces. Nowadays, almost all new \cdrom\
+drives are either IDE/ATAPI or SCSI, and it is very unlikely that any
+manufacturer will create a new interface. Even finding drives for the
+old proprietary interfaces is getting difficult.
+
+When (in the 1.3.70's) I looked at the existing software interface,
+which was expressed through \cdromh, it appeared to be a rather wild
+set of commands and data formats.\footnote{I cannot recollect what
+kernel version I looked at, then, presumably 1.2.13 and 1.3.34---the
+latest kernel that I was indirectly involved in.} It seemed that many
+features of the software interface had been added to accommodate the
+capabilities of a particular drive, in an {\fo ad hoc\/} manner. More
+importantly, it appeared that the behavior of the `standard' commands
+was different for most of the different drivers: \eg, some drivers
+close the tray if an $open()$ call occurs when the tray is open, while
+others do not. Some drivers lock the door upon opening the device, to
+prevent an incoherent file system, but others don't, to allow software
+ejection. Undoubtedly, the capabilities of the different drives vary,
+but even when two drives have the same capability their drivers'
+behavior was usually different.
+
+I decided to start a discussion on how to make all the \linux\ \cdrom\
+drivers behave more uniformly. I began by contacting the developers of
+the many \cdrom\ drivers found in the \linux\ kernel. Their reactions
+encouraged me to write the \UCD\ which this document is intended to
+describe. The implementation of the \UCD\ is in the file \cdromc. This
+driver is intended to be an additional software layer that sits on top
+of the low-level device drivers for each \cdrom\ drive. By adding this
+additional layer, it is possible to have all the different \cdrom\
+devices behave {\em exactly\/} the same (insofar as the underlying
+hardware will allow).
+
+The goal of the \UCD\ is {\em not\/} to alienate driver developers who
+have not yet taken steps to support this effort. The goal of \UCD\ is
+simply to give people writing application programs for \cdrom\ drives
+{\em one\/} \linux\ \cdrom\ interface with consistent behavior for all
+\cdrom\ devices. In addition, this also provides a consistent interface
+between the low-level device driver code and the \linux\ kernel. Care
+is taken that 100\,\% compatibility exists with the data structures and
+programmer's interface defined in \cdromh. This guide was written to
+help \cdrom\ driver developers adapt their code to use the \UCD\ code
+defined in \cdromc.
+
+Personally, I think that the most important hardware interfaces are
+the IDE/ATAPI drives and, of course, the SCSI drives, but as prices
+of hardware drop continuously, it is also likely that people may have
+more than one \cdrom\ drive, possibly of mixed types. It is important
+that these drives behave in the same way. In December 1994, one of the
+cheapest \cdrom\ drives was a Philips cm206, a double-speed proprietary
+drive. In the months that I was busy writing a \linux\ driver for it,
+proprietary drives became obsolete and IDE/ATAPI drives became the
+standard. At the time of the last update to this document (November
+1997) it is becoming difficult to even {\em find} anything less than a
+16 speed \cdrom\ drive, and 24 speed drives are common.
+
+\newsection{Standardizing through another software level}
+\label{cdrom.c}
+
+At the time this document was conceived, all drivers directly
+implemented the \cdrom\ $ioctl()$ calls through their own routines. This
+led to the danger of different drivers forgetting to do important things
+like checking that the user was giving the driver valid data. More
+importantly, this led to the divergence of behavior, which has already
+been discussed.
+
+For this reason, the \UCD\ was created to enforce consistent \cdrom\
+drive behavior, and to provide a common set of services to the various
+low-level \cdrom\ device drivers. The \UCD\ now provides another
+software-level, that separates the $ioctl()$ and $open()$ implementation
+from the actual hardware implementation. Note that this effort has
+made few changes which will affect a user's application programs. The
+greatest change involved moving the contents of the various low-level
+\cdrom\ drivers' header files to the kernel's cdrom directory. This was
+done to help ensure that the user is only presented with only one cdrom
+interface, the interface defined in \cdromh.
+
+\cdrom\ drives are specific enough (\ie, different from other
+block-devices such as floppy or hard disc drives), to define a set
+of common {\em \cdrom\ device operations}, $<cdrom-device>_dops$.
+These operations are different from the classical block-device file
+operations, $<block-device>_fops$.
+
+The routines for the \UCD\ interface level are implemented in the file
+\cdromc. In this file, the \UCD\ interfaces with the kernel as a block
+device by registering the following general $struct\ file_operations$:
+$$
+\halign{$#$\ \hfil&$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
+struct& file_operations\ cdrom_fops = \{\hidewidth\cr
+ &NULL, & lseek \cr
+ &block_read, & read---general block-dev read \cr
+ &block_write, & write---general block-dev write \cr
+ &NULL, & readdir \cr
+ &NULL, & select \cr
+ &cdrom_ioctl, & ioctl \cr
+ &NULL, & mmap \cr
+ &cdrom_open, & open \cr
+ &cdrom_release, & release \cr
+ &NULL, & fsync \cr
+ &NULL, & fasync \cr
+ &cdrom_media_changed, & media change \cr
+ &NULL & revalidate \cr
+\};\cr
+}
+$$
+
+Every active \cdrom\ device shares this $struct$. The routines
+declared above are all implemented in \cdromc, since this file is the
+place where the behavior of all \cdrom-devices is defined and
+standardized. The actual interface to the various types of \cdrom\
+hardware is still performed by various low-level \cdrom-device
+drivers. These routines simply implement certain {\em capabilities\/}
+that are common to all \cdrom\ (and really, all removable-media
+devices).
+
+Registration of a low-level \cdrom\ device driver is now done through
+the general routines in \cdromc, not through the Virtual File System
+(VFS) any more. The interface implemented in \cdromc\ is carried out
+through two general structures that contain information about the
+capabilities of the driver, and the specific drives on which the
+driver operates. The structures are:
+\begin{description}
+\item[$cdrom_device_ops$]
+ This structure contains information about the low-level driver for a
+ \cdrom\ device. This structure is conceptually connected to the major
+ number of the device (although some drivers may have different
+ major numbers, as is the case for the IDE driver).
+\item[$cdrom_device_info$]
+ This structure contains information about a particular \cdrom\ drive,
+ such as its device name, speed, etc. This structure is conceptually
+ connected to the minor number of the device.
+\end{description}
+
+Registering a particular \cdrom\ drive with the \UCD\ is done by the
+low-level device driver though a call to:
+$$register_cdrom(struct\ cdrom_device_info * <device>_info)
+$$
+The device information structure, $<device>_info$, contains all the
+information needed for the kernel to interface with the low-level
+\cdrom\ device driver. One of the most important entries in this
+structure is a pointer to the $cdrom_device_ops$ structure of the
+low-level driver.
+
+The device operations structure, $cdrom_device_ops$, contains a list
+of pointers to the functions which are implemented in the low-level
+device driver. When \cdromc\ accesses a \cdrom\ device, it does it
+through the functions in this structure. It is impossible to know all
+the capabilities of future \cdrom\ drives, so it is expected that this
+list may need to be expanded from time to time as new technologies are
+developed. For example, CD-R and CD-R/W drives are beginning to become
+popular, and support will soon need to be added for them. For now, the
+current $struct$ is:
+$$
+\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
+ $/*$ \rm# $*/$\hfil\cr
+struct& cdrom_device_ops\ \{ \hidewidth\cr
+ &int& (* open)(struct\ cdrom_device_info *, int)\cr
+ &void& (* release)(struct\ cdrom_device_info *);\cr
+ &int& (* drive_status)(struct\ cdrom_device_info *, int);\cr
+ &int& (* media_changed)(struct\ cdrom_device_info *, int);\cr
+ &int& (* tray_move)(struct\ cdrom_device_info *, int);\cr
+ &int& (* lock_door)(struct\ cdrom_device_info *, int);\cr
+ &int& (* select_speed)(struct\ cdrom_device_info *, int);\cr
+ &int& (* select_disc)(struct\ cdrom_device_info *, int);\cr
+ &int& (* get_last_session) (struct\ cdrom_device_info *,
+ struct\ cdrom_multisession *{});\cr
+ &int& (* get_mcn)(struct\ cdrom_device_info *, struct\ cdrom_mcn *{});\cr
+ &int& (* reset)(struct\ cdrom_device_info *);\cr
+ &int& (* audio_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
+ void *{});\cr
+ &int& (* dev_ioctl)(struct\ cdrom_device_info *, unsigned\ int,
+ unsigned\ long);\cr
+\noalign{\medskip}
+ &const\ int& capability;& capability flags \cr
+ &int& n_minors;& number of active minor devices \cr
+\};\cr
+}
+$$
+When a low-level device driver implements one of these capabilities,
+it should add a function pointer to this $struct$. When a particular
+function is not implemented, however, this $struct$ should contain a
+NULL instead. The $capability$ flags specify the capabilities of the
+\cdrom\ hardware and/or low-level \cdrom\ driver when a \cdrom\ drive
+is registered with the \UCD. The value $n_minors$ should be a positive
+value indicating the number of minor devices that are supported by
+the low-level device driver, normally~1. Although these two variables
+are `informative' rather than `operational,' they are included in
+$cdrom_device_ops$ because they describe the capability of the {\em
+driver\/} rather than the {\em drive}. Nomenclature has always been
+difficult in computer programming.
+
+Note that most functions have fewer parameters than their
+$blkdev_fops$ counterparts. This is because very little of the
+information in the structures $inode$ and $file$ is used. For most
+drivers, the main parameter is the $struct$ $cdrom_device_info$, from
+which the major and minor number can be extracted. (Most low-level
+\cdrom\ drivers don't even look at the major and minor number though,
+since many of them only support one device.) This will be available
+through $dev$ in $cdrom_device_info$ described below.
+
+The drive-specific, minor-like information that is registered with
+\cdromc, currently contains the following fields:
+$$
+\halign{$#$\ \hfil&$#$\ \hfil&\hbox to 10em{$#$\hss}&
+ $/*$ \rm# $*/$\hfil\cr
+struct& cdrom_device_info\ \{ \hidewidth\cr
+ & struct\ cdrom_device_ops *& ops;& device operations for this major\cr
+ & struct\ cdrom_device_info *& next;& next device_info for this major\cr
+ & void *& handle;& driver-dependent data\cr
+\noalign{\medskip}
+ & kdev_t& dev;& device number (incorporates minor)\cr
+ & int& mask;& mask of capability: disables them \cr
+ & int& speed;& maximum speed for reading data \cr
+ & int& capacity;& number of discs in a jukebox \cr
+\noalign{\medskip}
+ &int& options : 30;& options flags \cr
+ &unsigned& mc_flags : 2;& media-change buffer flags \cr
+ & int& use_count;& number of times device is opened\cr
+ & char& name[20];& name of the device type\cr
+\}\cr
+}$$
+Using this $struct$, a linked list of the registered minor devices is
+built, using the $next$ field. The device number, the device operations
+struct and specifications of properties of the drive are stored in this
+structure.
+
+The $mask$ flags can be used to mask out some of the capabilities listed
+in $ops\to capability$, if a specific drive doesn't support a feature
+of the driver. The value $speed$ specifies the maximum head-rate of the
+drive, measured in units of normal audio speed (176\,kB/sec raw data or
+150\,kB/sec file system data). The value $n_discs$ should reflect the
+number of discs the drive can hold simultaneously, if it is designed
+as a juke-box, or otherwise~1. The parameters are declared $const$
+because they describe properties of the drive, which don't change after
+registration.
+
+A few registers contain variables local to the \cdrom\ drive. The
+flags $options$ are used to specify how the general \cdrom\ routines
+should behave. These various flags registers should provide enough
+flexibility to adapt to the different users' wishes (and {\em not\/} the
+`arbitrary' wishes of the author of the low-level device driver, as is
+the case in the old scheme). The register $mc_flags$ is used to buffer
+the information from $media_changed()$ to two separate queues. Other
+data that is specific to a minor drive, can be accessed through $handle$,
+which can point to a data structure specific to the low-level driver.
+The fields $use_count$, $next$, $options$ and $mc_flags$ need not be
+initialized.
+
+The intermediate software layer that \cdromc\ forms will perform some
+additional bookkeeping. The use count of the device (the number of
+processes that have the device opened) is registered in $use_count$. The
+function $cdrom_ioctl()$ will verify the appropriate user-memory regions
+for read and write, and in case a location on the CD is transferred,
+it will `sanitize' the format by making requests to the low-level
+drivers in a standard format, and translating all formats between the
+user-software and low level drivers. This relieves much of the drivers'
+memory checking and format checking and translation. Also, the necessary
+structures will be declared on the program stack.
+
+The implementation of the functions should be as defined in the
+following sections. Two functions {\em must\/} be implemented, namely
+$open()$ and $release()$. Other functions may be omitted, their
+corresponding capability flags will be cleared upon registration.
+Generally, a function returns zero on success and negative on error. A
+function call should return only after the command has completed, but of
+course waiting for the device should not use processor time.
+
+\subsection{$Int\ open(struct\ cdrom_device_info * cdi, int\ purpose)$}
+
+$Open()$ should try to open the device for a specific $purpose$, which
+can be either:
+\begin{itemize}
+\item[0] Open for reading data, as done by {\tt {mount()}} (2), or the
+user commands {\tt {dd}} or {\tt {cat}}.
+\item[1] Open for $ioctl$ commands, as done by audio-CD playing
+programs.
+\end{itemize}
+Notice that any strategic code (closing tray upon $open()$, etc.)\ is
+done by the calling routine in \cdromc, so the low-level routine
+should only be concerned with proper initialization, such as spinning
+up the disc, etc. % and device-use count
+
+
+\subsection{$Void\ release(struct\ cdrom_device_info * cdi)$}
+
+
+Device-specific actions should be taken such as spinning down the device.
+However, strategic actions such as ejection of the tray, or unlocking
+the door, should be left over to the general routine $cdrom_release()$.
+This is the only function returning type $void$.
+
+\subsection{$Int\ drive_status(struct\ cdrom_device_info * cdi, int\ slot_nr)$}
+\label{drive status}
+
+The function $drive_status$, if implemented, should provide
+information on the status of the drive (not the status of the disc,
+which may or may not be in the drive). If the drive is not a changer,
+$slot_nr$ should be ignored. In \cdromh\ the possibilities are listed:
+$$
+\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
+CDS_NO_INFO& no information available\cr
+CDS_NO_DISC& no disc is inserted, tray is closed\cr
+CDS_TRAY_OPEN& tray is opened\cr
+CDS_DRIVE_NOT_READY& something is wrong, tray is moving?\cr
+CDS_DISC_OK& a disc is loaded and everything is fine\cr
+}
+$$
+
+\subsection{$Int\ media_changed(struct\ cdrom_device_info * cdi, int\ disc_nr)$}
+
+This function is very similar to the original function in $struct\
+file_operations$. It returns 1 if the medium of the device $cdi\to
+dev$ has changed since the last call, and 0 otherwise. The parameter
+$disc_nr$ identifies a specific slot in a juke-box, it should be
+ignored for single-disc drives. Note that by `re-routing' this
+function through $cdrom_media_changed()$, we can implement separate
+queues for the VFS and a new $ioctl()$ function that can report device
+changes to software (\eg, an auto-mounting daemon).
+
+\subsection{$Int\ tray_move(struct\ cdrom_device_info * cdi, int\ position)$}
+
+This function, if implemented, should control the tray movement. (No
+other function should control this.) The parameter $position$ controls
+the desired direction of movement:
+\begin{itemize}
+\item[0] Close tray
+\item[1] Open tray
+\end{itemize}
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the tray is already in the desired position, no
+action need be taken, and the return value should be 0.
+
+\subsection{$Int\ lock_door(struct\ cdrom_device_info * cdi, int\ lock)$}
+
+This function (and no other code) controls locking of the door, if the
+drive allows this. The value of $lock$ controls the desired locking
+state:
+\begin{itemize}
+\item[0] Unlock door, manual opening is allowed
+\item[1] Lock door, tray cannot be ejected manually
+\end{itemize}
+This function returns 0 upon success, and a non-zero value upon
+error. Note that if the door is already in the requested state, no
+action need be taken, and the return value should be 0.
+
+\subsection{$Int\ select_speed(struct\ cdrom_device_info * cdi, int\ speed)$}
+
+Some \cdrom\ drives are capable of changing their head-speed. There
+are several reasons for changing the speed of a \cdrom\ drive. Badly
+pressed \cdrom s may benefit from less-than-maximum head rate. Modern
+\cdrom\ drives can obtain very high head rates (up to $24\times$ is
+common). It has been reported that these drives can make reading
+errors at these high speeds, reducing the speed can prevent data loss
+in these circumstances. Finally, some of these drives can
+make an annoyingly loud noise, which a lower speed may reduce. %Finally,
+%although the audio-low-pass filters probably aren't designed for it,
+%more than real-time playback of audio might be used for high-speed
+%copying of audio tracks.
+
+This function specifies the speed at which data is read or audio is
+played back. The value of $speed$ specifies the head-speed of the
+drive, measured in units of standard cdrom speed (176\,kB/sec raw data
+or 150\,kB/sec file system data). So to request that a \cdrom\ drive
+operate at 300\,kB/sec you would call the CDROM_SELECT_SPEED $ioctl$
+with $speed=2$. The special value `0' means `auto-selection', \ie,
+maximum data-rate or real-time audio rate. If the drive doesn't have
+this `auto-selection' capability, the decision should be made on the
+current disc loaded and the return value should be positive. A negative
+return value indicates an error.
+
+\subsection{$Int\ select_disc(struct\ cdrom_device_info * cdi, int\ number)$}
+
+If the drive can store multiple discs (a juke-box) this function
+will perform disc selection. It should return the number of the
+selected disc on success, a negative value on error. Currently, only
+the ide-cd driver supports this functionality.
+
+\subsection{$Int\ get_last_session(struct\ cdrom_device_info * cdi, struct\
+ cdrom_multisession * ms_info)$}
+
+This function should implement the old corresponding $ioctl()$. For
+device $cdi\to dev$, the start of the last session of the current disc
+should be returned in the pointer argument $ms_info$. Note that
+routines in \cdromc\ have sanitized this argument: its requested
+format will {\em always\/} be of the type $CDROM_LBA$ (linear block
+addressing mode), whatever the calling software requested. But
+sanitization goes even further: the low-level implementation may
+return the requested information in $CDROM_MSF$ format if it wishes so
+(setting the $ms_info\rightarrow addr_format$ field appropriately, of
+course) and the routines in \cdromc\ will make the transformation if
+necessary. The return value is 0 upon success.
+
+\subsection{$Int\ get_mcn(struct\ cdrom_device_info * cdi, struct\
+ cdrom_mcn * mcn)$}
+
+Some discs carry a `Media Catalog Number' (MCN), also called
+`Universal Product Code' (UPC). This number should reflect the number
+that is generally found in the bar-code on the product. Unfortunately,
+the few discs that carry such a number on the disc don't even use the
+same format. The return argument to this function is a pointer to a
+pre-declared memory region of type $struct\ cdrom_mcn$. The MCN is
+expected as a 13-character string, terminated by a null-character.
+
+\subsection{$Int\ reset(struct\ cdrom_device_info * cdi)$}
+
+This call should perform a hard-reset on the drive (although in
+circumstances that a hard-reset is necessary, a drive may very well not
+listen to commands anymore). Preferably, control is returned to the
+caller only after the drive has finished resetting. If the drive is no
+longer listening, it may be wise for the underlying low-level cdrom
+driver to time out.
+
+\subsection{$Int\ audio_ioctl(struct\ cdrom_device_info * cdi, unsigned\
+ int\ cmd, void * arg)$}
+
+Some of the \cdrom-$ioctl$s defined in \cdromh\ can be
+implemented by the routines described above, and hence the function
+$cdrom_ioctl$ will use those. However, most $ioctl$s deal with
+audio-control. We have decided to leave these to be accessed through a
+single function, repeating the arguments $cmd$ and $arg$. Note that
+the latter is of type $void*{}$, rather than $unsigned\ long\
+int$. The routine $cdrom_ioctl()$ does do some useful things,
+though. It sanitizes the address format type to $CDROM_MSF$ (Minutes,
+Seconds, Frames) for all audio calls. It also verifies the memory
+location of $arg$, and reserves stack-memory for the argument. This
+makes implementation of the $audio_ioctl()$ much simpler than in the
+old driver scheme. For example, you may look up the function
+$cm206_audio_ioctl()$ in {\tt {cm206.c}} that should be updated with
+this documentation.
+
+An unimplemented ioctl should return $-ENOSYS$, but a harmless request
+(\eg, $CDROMSTART$) may be ignored by returning 0 (success). Other
+errors should be according to the standards, whatever they are. When
+an error is returned by the low-level driver, the \UCD\ tries whenever
+possible to return the error code to the calling program. (We may decide
+to sanitize the return value in $cdrom_ioctl()$ though, in order to
+guarantee a uniform interface to the audio-player software.)
+
+\subsection{$Int\ dev_ioctl(struct\ cdrom_device_info * cdi, unsigned\ int\
+ cmd, unsigned\ long\ arg)$}
+
+Some $ioctl$s seem to be specific to certain \cdrom\ drives. That is,
+they are introduced to service some capabilities of certain drives. In
+fact, there are 6 different $ioctl$s for reading data, either in some
+particular kind of format, or audio data. Not many drives support
+reading audio tracks as data, I believe this is because of protection
+of copyrights of artists. Moreover, I think that if audio-tracks are
+supported, it should be done through the VFS and not via $ioctl$s. A
+problem here could be the fact that audio-frames are 2352 bytes long,
+so either the audio-file-system should ask for 75264 bytes at once
+(the least common multiple of 512 and 2352), or the drivers should
+bend their backs to cope with this incoherence (to which I would be
+opposed). Furthermore, it is very difficult for the hardware to find
+the exact frame boundaries, since there are no synchronization headers
+in audio frames. Once these issues are resolved, this code should be
+standardized in \cdromc.
+
+Because there are so many $ioctl$s that seem to be introduced to
+satisfy certain drivers,\footnote{Is there software around that
+ actually uses these? I'd be interested!} any `non-standard' $ioctl$s
+are routed through the call $dev_ioctl()$. In principle, `private'
+$ioctl$s should be numbered after the device's major number, and not
+the general \cdrom\ $ioctl$ number, {\tt {0x53}}. Currently the
+non-supported $ioctl$s are: {\it CDROMREADMODE1, CDROMREADMODE2,
+ CDROMREADAUDIO, CDROMREADRAW, CDROMREADCOOKED, CDROMSEEK,
+ CDROMPLAY\-BLK and CDROM\-READALL}.
+
+
+\subsection{\cdrom\ capabilities}
+\label{capability}
+
+Instead of just implementing some $ioctl$ calls, the interface in
+\cdromc\ supplies the possibility to indicate the {\em capabilities\/}
+of a \cdrom\ drive. This can be done by ORing any number of
+capability-constants that are defined in \cdromh\ at the registration
+phase. Currently, the capabilities are any of:
+$$
+\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
+CDC_CLOSE_TRAY& can close tray by software control\cr
+CDC_OPEN_TRAY& can open tray\cr
+CDC_LOCK& can lock and unlock the door\cr
+CDC_SELECT_SPEED& can select speed, in units of $\sim$150\,kB/s\cr
+CDC_SELECT_DISC& drive is juke-box\cr
+CDC_MULTI_SESSION& can read sessions $>\rm1$\cr
+CDC_MCN& can read Media Catalog Number\cr
+CDC_MEDIA_CHANGED& can report if disc has changed\cr
+CDC_PLAY_AUDIO& can perform audio-functions (play, pause, etc)\cr
+CDC_RESET& hard reset device\cr
+CDC_IOCTLS& driver has non-standard ioctls\cr
+CDC_DRIVE_STATUS& driver implements drive status\cr
+}
+$$
+The capability flag is declared $const$, to prevent drivers from
+accidentally tampering with the contents. The capability fags actually
+inform \cdromc\ of what the driver can do. If the drive found
+by the driver does not have the capability, is can be masked out by
+the $cdrom_device_info$ variable $mask$. For instance, the SCSI \cdrom\
+driver has implemented the code for loading and ejecting \cdrom's, and
+hence its corresponding flags in $capability$ will be set. But a SCSI
+\cdrom\ drive might be a caddy system, which can't load the tray, and
+hence for this drive the $cdrom_device_info$ struct will have set
+the $CDC_CLOSE_TRAY$ bit in $mask$.
+
+In the file \cdromc\ you will encounter many constructions of the type
+$$\it
+if\ (cdo\rightarrow capability \mathrel\& \mathord{\sim} cdi\rightarrow mask
+ \mathrel{\&} CDC_<capability>) \ldots
+$$
+There is no $ioctl$ to set the mask\dots The reason is that
+I think it is better to control the {\em behavior\/} rather than the
+{\em capabilities}.
+
+\subsection{Options}
+
+A final flag register controls the {\em behavior\/} of the \cdrom\
+drives, in order to satisfy different users' wishes, hopefully
+independently of the ideas of the respective author who happened to
+have made the drive's support available to the \linux\ community. The
+current behavior options are:
+$$
+\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
+CDO_AUTO_CLOSE& try to close tray upon device $open()$\cr
+CDO_AUTO_EJECT& try to open tray on last device $close()$\cr
+CDO_USE_FFLAGS& use $file_pointer\rightarrow f_flags$ to indicate
+ purpose for $open()$\cr
+CDO_LOCK& try to lock door if device is opened\cr
+CDO_CHECK_TYPE& ensure disc type is data if opened for data\cr
+}
+$$
+
+The initial value of this register is $CDO_AUTO_CLOSE \mathrel|
+CDO_USE_FFLAGS \mathrel| CDO_LOCK$, reflecting my own view on user
+interface and software standards. Before you protest, there are two
+new $ioctl$s implemented in \cdromc, that allow you to control the
+behavior by software. These are:
+$$
+\halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
+CDROM_SET_OPTIONS& set options specified in $(int)\ arg$\cr
+CDROM_CLEAR_OPTIONS& clear options specified in $(int)\ arg$\cr
+}
+$$
+One option needs some more explanation: $CDO_USE_FFLAGS$. In the next
+newsection we explain what the need for this option is.
+
+A software package {\tt setcd}, available from the Debian distribution
+and {\tt sunsite.unc.edu}, allows user level control of these flags.
+
+\newsection{The need to know the purpose of opening the \cdrom\ device}
+
+Traditionally, Unix devices can be used in two different `modes',
+either by reading/writing to the device file, or by issuing
+controlling commands to the device, by the device's $ioctl()$
+call. The problem with \cdrom\ drives, is that they can be used for
+two entirely different purposes. One is to mount removable
+file systems, \cdrom s, the other is to play audio CD's. Audio commands
+are implemented entirely through $ioctl$s, presumably because the
+first implementation (SUN?) has been such. In principle there is
+nothing wrong with this, but a good control of the `CD player' demands
+that the device can {\em always\/} be opened in order to give the
+$ioctl$ commands, regardless of the state the drive is in.
+
+On the other hand, when used as a removable-media disc drive (what the
+original purpose of \cdrom s is) we would like to make sure that the
+disc drive is ready for operation upon opening the device. In the old
+scheme, some \cdrom\ drivers don't do any integrity checking, resulting
+in a number of i/o errors reported by the VFS to the kernel when an
+attempt for mounting a \cdrom\ on an empty drive occurs. This is not a
+particularly elegant way to find out that there is no \cdrom\ inserted;
+it more-or-less looks like the old IBM-PC trying to read an empty floppy
+drive for a couple of seconds, after which the system complains it
+can't read from it. Nowadays we can {\em sense\/} the existence of a
+removable medium in a drive, and we believe we should exploit that
+fact. An integrity check on opening of the device, that verifies the
+availability of a \cdrom\ and its correct type (data), would be
+desirable.
+
+These two ways of using a \cdrom\ drive, principally for data and
+secondarily for playing audio discs, have different demands for the
+behavior of the $open()$ call. Audio use simply wants to open the
+device in order to get a file handle which is needed for issuing
+$ioctl$ commands, while data use wants to open for correct and
+reliable data transfer. The only way user programs can indicate what
+their {\em purpose\/} of opening the device is, is through the $flags$
+parameter (see {\tt {open(2)}}). For \cdrom\ devices, these flags aren't
+implemented (some drivers implement checking for write-related flags,
+but this is not strictly necessary if the device file has correct
+permission flags). Most option flags simply don't make sense to
+\cdrom\ devices: $O_CREAT$, $O_NOCTTY$, $O_TRUNC$, $O_APPEND$, and
+$O_SYNC$ have no meaning to a \cdrom.
+
+We therefore propose to use the flag $O_NONBLOCK$ to indicate
+that the device is opened just for issuing $ioctl$
+commands. Strictly, the meaning of $O_NONBLOCK$ is that opening and
+subsequent calls to the device don't cause the calling process to
+wait. We could interpret this as ``don't wait until someone has
+inserted some valid data-\cdrom.'' Thus, our proposal of the
+implementation for the $open()$ call for \cdrom s is:
+\begin{itemize}
+\item If no other flags are set than $O_RDONLY$, the device is opened
+for data transfer, and the return value will be 0 only upon successful
+initialization of the transfer. The call may even induce some actions
+on the \cdrom, such as closing the tray.
+\item If the option flag $O_NONBLOCK$ is set, opening will always be
+successful, unless the whole device doesn't exist. The drive will take
+no actions whatsoever.
+\end{itemize}
+
+\subsection{And what about standards?}
+
+You might hesitate to accept this proposal as it comes from the
+\linux\ community, and not from some standardizing institute. What
+about SUN, SGI, HP and all those other Unix and hardware vendors?
+Well, these companies are in the lucky position that they generally
+control both the hardware and software of their supported products,
+and are large enough to set their own standard. They do not have to
+deal with a dozen or more different, competing hardware
+configurations.\footnote{Incidentally, I think that SUN's approach to
+mounting \cdrom s is very good in origin: under Solaris a
+volume-daemon automatically mounts a newly inserted \cdrom\ under {\tt
+{/cdrom/$<volume-name>$/}}. In my opinion they should have pushed this
+further and have {\em every\/} \cdrom\ on the local area network be
+mounted at the similar location, \ie, no matter in which particular
+machine you insert a \cdrom, it will always appear at the same
+position in the directory tree, on every system. When I wanted to
+implement such a user-program for \linux, I came across the
+differences in behavior of the various drivers, and the need for an
+$ioctl$ informing about media changes.}
+
+We believe that using $O_NONBLOCK$ to indicate that a device is being opened
+for $ioctl$ commands only can be easily introduced in the \linux\
+community. All the CD-player authors will have to be informed, we can
+even send in our own patches to the programs. The use of $O_NONBLOCK$
+has most likely no influence on the behavior of the CD-players on
+other operating systems than \linux. Finally, a user can always revert
+to old behavior by a call to $ioctl(file_descriptor, CDROM_CLEAR_OPTIONS,
+CDO_USE_FFLAGS)$.
+
+\subsection{The preferred strategy of $open()$}
+
+The routines in \cdromc\ are designed in such a way that run-time
+configuration of the behavior of \cdrom\ devices (of {\em any\/} type)
+can be carried out, by the $CDROM_SET/CLEAR_OPTIONS$ $ioctls$. Thus, various
+modes of operation can be set:
+\begin{description}
+\item[$CDO_AUTO_CLOSE \mathrel| CDO_USE_FFLAGS \mathrel| CDO_LOCK$] This
+is the default setting. (With $CDO_CHECK_TYPE$ it will be better, in the
+future.) If the device is not yet opened by any other process, and if
+the device is being opened for data ($O_NONBLOCK$ is not set) and the
+tray is found to be open, an attempt to close the tray is made. Then,
+it is verified that a disc is in the drive and, if $CDO_CHECK_TYPE$ is
+set, that it contains tracks of type `data mode 1.' Only if all tests
+are passed is the return value zero. The door is locked to prevent file
+system corruption. If the drive is opened for audio ($O_NONBLOCK$ is
+set), no actions are taken and a value of 0 will be returned.
+\item[$CDO_AUTO_CLOSE \mathrel| CDO_AUTO_EJECT \mathrel| CDO_LOCK$] This
+mimics the behavior of the current sbpcd-driver. The option flags are
+ignored, the tray is closed on the first open, if necessary. Similarly,
+the tray is opened on the last release, \ie, if a \cdrom\ is unmounted,
+it is automatically ejected, such that the user can replace it.
+\end{description}
+We hope that these option can convince everybody (both driver
+maintainers and user program developers) to adopt the new \cdrom\
+driver scheme and option flag interpretation.
+
+\newsection{Description of routines in \cdromc}
+
+Only a few routines in \cdromc\ are exported to the drivers. In this
+new section we will discuss these, as well as the functions that `take
+over' the \cdrom\ interface to the kernel. The header file belonging
+to \cdromc\ is called \cdromh. Formerly, some of the contents of this
+file were placed in the file {\tt {ucdrom.h}}, but this file has now been
+merged back into \cdromh.
+
+\subsection{$Struct\ file_operations\ cdrom_fops$}
+
+The contents of this structure were described in section~\ref{cdrom.c}.
+A pointer to this structure is assigned to the $fops$ field
+of the $struct gendisk$.
+
+\subsection{$Int\ register_cdrom( struct\ cdrom_device_info\ * cdi)$}
+
+This function is used in about the same way one registers $cdrom_fops$
+with the kernel, the device operations and information structures,
+as described in section~\ref{cdrom.c}, should be registered with the
+\UCD:
+$$
+register_cdrom(\&<device>_info));
+$$
+This function returns zero upon success, and non-zero upon
+failure. The structure $<device>_info$ should have a pointer to the
+driver's $<device>_dops$, as in
+$$
+\vbox{\halign{&$#$\hfil\cr
+struct\ &cdrom_device_info\ <device>_info = \{\cr
+& <device>_dops;\cr
+&\ldots\cr
+\}\cr
+}}$$
+Note that a driver must have one static structure, $<device>_dops$, while
+it may have as many structures $<device>_info$ as there are minor devices
+active. $Register_cdrom()$ builds a linked list from these.
+
+\subsection{$Void\ unregister_cdrom(struct\ cdrom_device_info * cdi)$}
+
+Unregistering device $cdi$ with minor number $MINOR(cdi\to dev)$ removes
+the minor device from the list. If it was the last registered minor for
+the low-level driver, this disconnects the registered device-operation
+routines from the \cdrom\ interface. This function returns zero upon
+success, and non-zero upon failure.
+
+\subsection{$Int\ cdrom_open(struct\ inode * ip, struct\ file * fp)$}
+
+This function is not called directly by the low-level drivers, it is
+listed in the standard $cdrom_fops$. If the VFS opens a file, this
+function becomes active. A strategy is implemented in this routine,
+taking care of all capabilities and options that are set in the
+$cdrom_device_ops$ connected to the device. Then, the program flow is
+transferred to the device_dependent $open()$ call.
+
+\subsection{$Void\ cdrom_release(struct\ inode *ip, struct\ file
+*fp)$}
+
+This function implements the reverse-logic of $cdrom_open()$, and then
+calls the device-dependent $release()$ routine. When the use-count has
+reached 0, the allocated buffers are flushed by calls to $sync_dev(dev)$
+and $invalidate_buffers(dev)$.
+
+
+\subsection{$Int\ cdrom_ioctl(struct\ inode *ip, struct\ file *fp,
+unsigned\ int\ cmd, unsigned\ long\ arg)$}
+\label{cdrom-ioctl}
+
+This function handles all the standard $ioctl$ requests for \cdrom\
+devices in a uniform way. The different calls fall into three
+categories: $ioctl$s that can be directly implemented by device
+operations, ones that are routed through the call $audio_ioctl()$, and
+the remaining ones, that are presumable device-dependent. Generally, a
+negative return value indicates an error.
+
+\subsubsection{Directly implemented $ioctl$s}
+\label{ioctl-direct}
+
+The following `old' \cdrom-$ioctl$s are implemented by directly
+calling device-operations in $cdrom_device_ops$, if implemented and
+not masked:
+\begin{description}
+\item[CDROMMULTISESSION] Requests the last session on a \cdrom.
+\item[CDROMEJECT] Open tray.
+\item[CDROMCLOSETRAY] Close tray.
+\item[CDROMEJECT_SW] If $arg\not=0$, set behavior to auto-close (close
+tray on first open) and auto-eject (eject on last release), otherwise
+set behavior to non-moving on $open()$ and $release()$ calls.
+\item[CDROM_GET_MCN] Get the Media Catalog Number from a CD.
+\end{description}
+
+\subsubsection{$Ioctl$s routed through $audio_ioctl()$}
+\label{ioctl-audio}
+
+The following set of $ioctl$s are all implemented through a call to
+the $cdrom_fops$ function $audio_ioctl()$. Memory checks and
+allocation are performed in $cdrom_ioctl()$, and also sanitization of
+address format ($CDROM_LBA$/$CDROM_MSF$) is done.
+\begin{description}
+\item[CDROMSUBCHNL] Get sub-channel data in argument $arg$ of type $struct\
+cdrom_subchnl *{}$.
+\item[CDROMREADTOCHDR] Read Table of Contents header, in $arg$ of type
+$struct\ cdrom_tochdr *{}$.
+\item[CDROMREADTOCENTRY] Read a Table of Contents entry in $arg$ and
+specified by $arg$ of type $struct\ cdrom_tocentry *{}$.
+\item[CDROMPLAYMSF] Play audio fragment specified in Minute, Second,
+Frame format, delimited by $arg$ of type $struct\ cdrom_msf *{}$.
+\item[CDROMPLAYTRKIND] Play audio fragment in track-index format
+delimited by $arg$ of type $struct\ \penalty-1000 cdrom_ti *{}$.
+\item[CDROMVOLCTRL] Set volume specified by $arg$ of type $struct\
+cdrom_volctrl *{}$.
+\item[CDROMVOLREAD] Read volume into by $arg$ of type $struct\
+cdrom_volctrl *{}$.
+\item[CDROMSTART] Spin up disc.
+\item[CDROMSTOP] Stop playback of audio fragment.
+\item[CDROMPAUSE] Pause playback of audio fragment.
+\item[CDROMRESUME] Resume playing.
+\end{description}
+
+\subsubsection{New $ioctl$s in \cdromc}
+
+The following $ioctl$s have been introduced to allow user programs to
+control the behavior of individual \cdrom\ devices. New $ioctl$
+commands can be identified by the underscores in their names.
+\begin{description}
+\item[CDROM_SET_OPTIONS] Set options specified by $arg$. Returns the
+option flag register after modification. Use $arg = \rm0$ for reading
+the current flags.
+\item[CDROM_CLEAR_OPTIONS] Clear options specified by $arg$. Returns
+ the option flag register after modification.
+\item[CDROM_SELECT_SPEED] Select head-rate speed of disc specified as
+ by $arg$ in units of standard cdrom speed (176\,kB/sec raw data or
+ 150\,kB/sec file system data). The value 0 means `auto-select', \ie,
+ play audio discs at real time and data discs at maximum speed. The value
+ $arg$ is checked against the maximum head rate of the drive found in the
+ $cdrom_dops$.
+\item[CDROM_SELECT_DISC] Select disc numbered $arg$ from a juke-box.
+ First disc is numbered 0. The number $arg$ is checked against the
+ maximum number of discs in the juke-box found in the $cdrom_dops$.
+\item[CDROM_MEDIA_CHANGED] Returns 1 if a disc has been changed since
+ the last call. Note that calls to $cdrom_media_changed$ by the VFS
+ are treated by an independent queue, so both mechanisms will detect
+ a media change once. For juke-boxes, an extra argument $arg$
+ specifies the slot for which the information is given. The special
+ value $CDSL_CURRENT$ requests that information about the currently
+ selected slot be returned.
+\item[CDROM_DRIVE_STATUS] Returns the status of the drive by a call to
+ $drive_status()$. Return values are defined in section~\ref{drive
+ status}. Note that this call doesn't return information on the
+ current playing activity of the drive; this can be polled through an
+ $ioctl$ call to $CDROMSUBCHNL$. For juke-boxes, an extra argument
+ $arg$ specifies the slot for which (possibly limited) information is
+ given. The special value $CDSL_CURRENT$ requests that information
+ about the currently selected slot be returned.
+\item[CDROM_DISC_STATUS] Returns the type of the disc currently in the
+ drive. It should be viewed as a complement to $CDROM_DRIVE_STATUS$.
+ This $ioctl$ can provide \emph {some} information about the current
+ disc that is inserted in the drive. This functionality used to be
+ implemented in the low level drivers, but is now carried out
+ entirely in \UCD.
+
+ The history of development of the CD's use as a carrier medium for
+ various digital information has lead to many different disc types.
+ This $ioctl$ is useful only in the case that CDs have \emph {only
+ one} type of data on them. While this is often the case, it is
+ also very common for CDs to have some tracks with data, and some
+ tracks with audio. Because this is an existing interface, rather
+ than fixing this interface by changing the assumptions it was made
+ under, thereby breaking all user applications that use this
+ function, the \UCD\ implements this $ioctl$ as follows: If the CD in
+ question has audio tracks on it, and it has absolutely no CD-I, XA,
+ or data tracks on it, it will be reported as $CDS_AUDIO$. If it has
+ both audio and data tracks, it will return $CDS_MIXED$. If there
+ are no audio tracks on the disc, and if the CD in question has any
+ CD-I tracks on it, it will be reported as $CDS_XA_2_2$. Failing
+ that, if the CD in question has any XA tracks on it, it will be
+ reported as $CDS_XA_2_1$. Finally, if the CD in question has any
+ data tracks on it, it will be reported as a data CD ($CDS_DATA_1$).
+
+ This $ioctl$ can return:
+ $$
+ \halign{$#$\ \hfil&$/*$ \rm# $*/$\hfil\cr
+ CDS_NO_INFO& no information available\cr
+ CDS_NO_DISC& no disc is inserted, or tray is opened\cr
+ CDS_AUDIO& Audio disc (2352 audio bytes/frame)\cr
+ CDS_DATA_1& data disc, mode 1 (2048 user bytes/frame)\cr
+ CDS_XA_2_1& mixed data (XA), mode 2, form 1 (2048 user bytes)\cr
+ CDS_XA_2_2& mixed data (XA), mode 2, form 1 (2324 user bytes)\cr
+ CDS_MIXED& mixed audio/data disc\cr
+ }
+ $$
+ For some information concerning frame layout of the various disc
+ types, see a recent version of \cdromh.
+
+\item[CDROM_CHANGER_NSLOTS] Returns the number of slots in a
+ juke-box.
+\item[CDROMRESET] Reset the drive.
+\item[CDROM_GET_CAPABILITY] Returns the $capability$ flags for the
+ drive. Refer to section \ref{capability} for more information on
+ these flags.
+\item[CDROM_LOCKDOOR] Locks the door of the drive. $arg == \rm0$
+ unlocks the door, any other value locks it.
+\item[CDROM_DEBUG] Turns on debugging info. Only root is allowed
+ to do this. Same semantics as CDROM_LOCKDOOR.
+\end{description}
+
+\subsubsection{Device dependent $ioctl$s}
+
+Finally, all other $ioctl$s are passed to the function $dev_ioctl()$,
+if implemented. No memory allocation or verification is carried out.
+
+\newsection{How to update your driver}
+
+\begin{enumerate}
+\item Make a backup of your current driver.
+\item Get hold of the files \cdromc\ and \cdromh, they should be in
+ the directory tree that came with this documentation.
+\item Make sure you include \cdromh.
+\item Change the 3rd argument of $register_blkdev$ from
+$\&<your-drive>_fops$ to $\&cdrom_fops$.
+\item Just after that line, add the following to register with the \UCD:
+ $$register_cdrom(\&<your-drive>_info);$$
+ Similarly, add a call to $unregister_cdrom()$ at the appropriate place.
+\item Copy an example of the device-operations $struct$ to your
+ source, \eg, from {\tt {cm206.c}} $cm206_dops$, and change all
+ entries to names corresponding to your driver, or names you just
+ happen to like. If your driver doesn't support a certain function,
+ make the entry $NULL$. At the entry $capability$ you should list all
+ capabilities your driver currently supports. If your driver
+ has a capability that is not listed, please send me a message.
+\item Copy the $cdrom_device_info$ declaration from the same example
+ driver, and modify the entries according to your needs. If your
+ driver dynamically determines the capabilities of the hardware, this
+ structure should also be declared dynamically.
+\item Implement all functions in your $<device>_dops$ structure,
+ according to prototypes listed in \cdromh, and specifications given
+ in section~\ref{cdrom.c}. Most likely you have already implemented
+ the code in a large part, and you will almost certainly need to adapt the
+ prototype and return values.
+\item Rename your $<device>_ioctl()$ function to $audio_ioctl$ and
+ change the prototype a little. Remove entries listed in the first
+ part in section~\ref{cdrom-ioctl}, if your code was OK, these are
+ just calls to the routines you adapted in the previous step.
+\item You may remove all remaining memory checking code in the
+ $audio_ioctl()$ function that deals with audio commands (these are
+ listed in the second part of section~\ref{cdrom-ioctl}). There is no
+ need for memory allocation either, so most $case$s in the $switch$
+ statement look similar to:
+ $$
+ case\ CDROMREADTOCENTRY\colon get_toc_entry\bigl((struct\
+ cdrom_tocentry *{})\ arg\bigr);
+ $$
+\item All remaining $ioctl$ cases must be moved to a separate
+ function, $<device>_ioctl$, the device-dependent $ioctl$s. Note that
+ memory checking and allocation must be kept in this code!
+\item Change the prototypes of $<device>_open()$ and
+ $<device>_release()$, and remove any strategic code (\ie, tray
+ movement, door locking, etc.).
+\item Try to recompile the drivers. We advise you to use modules, both
+ for {\tt {cdrom.o}} and your driver, as debugging is much easier this
+ way.
+\end{enumerate}
+
+\newsection{Thanks}
+
+Thanks to all the people involved. First, Erik Andersen, who has
+taken over the torch in maintaining \cdromc\ and integrating much
+\cdrom-related code in the 2.1-kernel. Thanks to Scott Snyder and
+Gerd Knorr, who were the first to implement this interface for SCSI
+and IDE-CD drivers and added many ideas for extension of the data
+structures relative to kernel~2.0. Further thanks to Heiko Ei{\sz}feldt,
+Thomas Quinot, Jon Tombs, Ken Pizzini, Eberhard M\"onkeberg and Andrew
+Kroll, the \linux\ \cdrom\ device driver developers who were kind
+enough to give suggestions and criticisms during the writing. Finally
+of course, I want to thank Linus Torvalds for making this possible in
+the first place.
+
+\vfill
+$ \version\ $
+\eject
+\end{document}
diff --git a/Documentation/cdrom/ide-cd b/Documentation/cdrom/ide-cd
new file mode 100644
index 000000000..f4dc9de26
--- /dev/null
+++ b/Documentation/cdrom/ide-cd
@@ -0,0 +1,538 @@
+IDE-CD driver documentation
+Originally by scott snyder <snyder@fnald0.fnal.gov> (19 May 1996)
+Carrying on the torch is: Erik Andersen <andersee@debian.org>
+New maintainers (19 Oct 1998): Jens Axboe <axboe@image.dk>
+
+1. Introduction
+---------------
+
+The ide-cd driver should work with all ATAPI ver 1.2 to ATAPI 2.6 compliant
+CDROM drives which attach to an IDE interface. Note that some CDROM vendors
+(including Mitsumi, Sony, Creative, Aztech, and Goldstar) have made
+both ATAPI-compliant drives and drives which use a proprietary
+interface. If your drive uses one of those proprietary interfaces,
+this driver will not work with it (but one of the other CDROM drivers
+probably will). This driver will not work with `ATAPI' drives which
+attach to the parallel port. In addition, there is at least one drive
+(CyCDROM CR520ie) which attaches to the IDE port but is not ATAPI;
+this driver will not work with drives like that either (but see the
+aztcd driver).
+
+This driver provides the following features:
+
+ - Reading from data tracks, and mounting ISO 9660 filesystems.
+
+ - Playing audio tracks. Most of the CDROM player programs floating
+ around should work; I usually use Workman.
+
+ - Multisession support.
+
+ - On drives which support it, reading digital audio data directly
+ from audio tracks. The program cdda2wav can be used for this.
+ Note, however, that only some drives actually support this.
+
+ - There is now support for CDROM changers which comply with the
+ ATAPI 2.6 draft standard (such as the NEC CDR-251). This additional
+ functionality includes a function call to query which slot is the
+ currently selected slot, a function call to query which slots contain
+ CDs, etc. A sample program which demonstrates this functionality is
+ appended to the end of this file. The Sanyo 3-disc changer
+ (which does not conform to the standard) is also now supported.
+ Please note the driver refers to the first CD as slot # 0.
+
+
+2. Installation
+---------------
+
+0. The ide-cd relies on the ide disk driver. See
+ Documentation/ide/ide.txt for up-to-date information on the ide
+ driver.
+
+1. Make sure that the ide and ide-cd drivers are compiled into the
+ kernel you're using. When configuring the kernel, in the section
+ entitled "Floppy, IDE, and other block devices", say either `Y'
+ (which will compile the support directly into the kernel) or `M'
+ (to compile support as a module which can be loaded and unloaded)
+ to the options:
+
+ Enhanced IDE/MFM/RLL disk/cdrom/tape/floppy support
+ Include IDE/ATAPI CDROM support
+
+ and `no' to
+
+ Use old disk-only driver on primary interface
+
+ Depending on what type of IDE interface you have, you may need to
+ specify additional configuration options. See
+ Documentation/ide/ide.txt.
+
+2. You should also ensure that the iso9660 filesystem is either
+ compiled into the kernel or available as a loadable module. You
+ can see if a filesystem is known to the kernel by catting
+ /proc/filesystems.
+
+3. The CDROM drive should be connected to the host on an IDE
+ interface. Each interface on a system is defined by an I/O port
+ address and an IRQ number, the standard assignments being
+ 0x1f0 and 14 for the primary interface and 0x170 and 15 for the
+ secondary interface. Each interface can control up to two devices,
+ where each device can be a hard drive, a CDROM drive, a floppy drive,
+ or a tape drive. The two devices on an interface are called `master'
+ and `slave'; this is usually selectable via a jumper on the drive.
+
+ Linux names these devices as follows. The master and slave devices
+ on the primary IDE interface are called `hda' and `hdb',
+ respectively. The drives on the secondary interface are called
+ `hdc' and `hdd'. (Interfaces at other locations get other letters
+ in the third position; see Documentation/ide/ide.txt.)
+
+ If you want your CDROM drive to be found automatically by the
+ driver, you should make sure your IDE interface uses either the
+ primary or secondary addresses mentioned above. In addition, if
+ the CDROM drive is the only device on the IDE interface, it should
+ be jumpered as `master'. (If for some reason you cannot configure
+ your system in this manner, you can probably still use the driver.
+ You may have to pass extra configuration information to the kernel
+ when you boot, however. See Documentation/ide/ide.txt for more
+ information.)
+
+4. Boot the system. If the drive is recognized, you should see a
+ message which looks like
+
+ hdb: NEC CD-ROM DRIVE:260, ATAPI CDROM drive
+
+ If you do not see this, see section 5 below.
+
+5. You may want to create a symbolic link /dev/cdrom pointing to the
+ actual device. You can do this with the command
+
+ ln -s /dev/hdX /dev/cdrom
+
+ where X should be replaced by the letter indicating where your
+ drive is installed.
+
+6. You should be able to see any error messages from the driver with
+ the `dmesg' command.
+
+
+3. Basic usage
+--------------
+
+An ISO 9660 CDROM can be mounted by putting the disc in the drive and
+typing (as root)
+
+ mount -t iso9660 /dev/cdrom /mnt/cdrom
+
+where it is assumed that /dev/cdrom is a link pointing to the actual
+device (as described in step 5 of the last section) and /mnt/cdrom is
+an empty directory. You should now be able to see the contents of the
+CDROM under the /mnt/cdrom directory. If you want to eject the CDROM,
+you must first dismount it with a command like
+
+ umount /mnt/cdrom
+
+Note that audio CDs cannot be mounted.
+
+Some distributions set up /etc/fstab to always try to mount a CDROM
+filesystem on bootup. It is not required to mount the CDROM in this
+manner, though, and it may be a nuisance if you change CDROMs often.
+You should feel free to remove the cdrom line from /etc/fstab and
+mount CDROMs manually if that suits you better.
+
+Multisession and photocd discs should work with no special handling.
+The hpcdtoppm package (ftp.gwdg.de:/pub/linux/hpcdtoppm/) may be
+useful for reading photocds.
+
+To play an audio CD, you should first unmount and remove any data
+CDROM. Any of the CDROM player programs should then work (workman,
+workbone, cdplayer, etc.).
+
+On a few drives, you can read digital audio directly using a program
+such as cdda2wav. The only types of drive which I've heard support
+this are Sony and Toshiba drives. You will get errors if you try to
+use this function on a drive which does not support it.
+
+For supported changers, you can use the `cdchange' program (appended to
+the end of this file) to switch between changer slots. Note that the
+drive should be unmounted before attempting this. The program takes
+two arguments: the CDROM device, and the slot number to which you wish
+to change. If the slot number is -1, the drive is unloaded.
+
+
+4. Common problems
+------------------
+
+This section discusses some common problems encountered when trying to
+use the driver, and some possible solutions. Note that if you are
+experiencing problems, you should probably also review
+Documentation/ide/ide.txt for current information about the underlying
+IDE support code. Some of these items apply only to earlier versions
+of the driver, but are mentioned here for completeness.
+
+In most cases, you should probably check with `dmesg' for any errors
+from the driver.
+
+a. Drive is not detected during booting.
+
+ - Review the configuration instructions above and in
+ Documentation/ide/ide.txt, and check how your hardware is
+ configured.
+
+ - If your drive is the only device on an IDE interface, it should
+ be jumpered as master, if at all possible.
+
+ - If your IDE interface is not at the standard addresses of 0x170
+ or 0x1f0, you'll need to explicitly inform the driver using a
+ lilo option. See Documentation/ide/ide.txt. (This feature was
+ added around kernel version 1.3.30.)
+
+ - If the autoprobing is not finding your drive, you can tell the
+ driver to assume that one exists by using a lilo option of the
+ form `hdX=cdrom', where X is the drive letter corresponding to
+ where your drive is installed. Note that if you do this and you
+ see a boot message like
+
+ hdX: ATAPI cdrom (?)
+
+ this does _not_ mean that the driver has successfully detected
+ the drive; rather, it means that the driver has not detected a
+ drive, but is assuming there's one there anyway because you told
+ it so. If you actually try to do I/O to a drive defined at a
+ nonexistent or nonresponding I/O address, you'll probably get
+ errors with a status value of 0xff.
+
+ - Some IDE adapters require a nonstandard initialization sequence
+ before they'll function properly. (If this is the case, there
+ will often be a separate MS-DOS driver just for the controller.)
+ IDE interfaces on sound cards often fall into this category.
+
+ Support for some interfaces needing extra initialization is
+ provided in later 1.3.x kernels. You may need to turn on
+ additional kernel configuration options to get them to work;
+ see Documentation/ide/ide.txt.
+
+ Even if support is not available for your interface, you may be
+ able to get it to work with the following procedure. First boot
+ MS-DOS and load the appropriate drivers. Then warm-boot linux
+ (i.e., without powering off). If this works, it can be automated
+ by running loadlin from the MS-DOS autoexec.
+
+
+b. Timeout/IRQ errors.
+
+ - If you always get timeout errors, interrupts from the drive are
+ probably not making it to the host.
+
+ - IRQ problems may also be indicated by the message
+ `IRQ probe failed (<n>)' while booting. If <n> is zero, that
+ means that the system did not see an interrupt from the drive when
+ it was expecting one (on any feasible IRQ). If <n> is negative,
+ that means the system saw interrupts on multiple IRQ lines, when
+ it was expecting to receive just one from the CDROM drive.
+
+ - Double-check your hardware configuration to make sure that the IRQ
+ number of your IDE interface matches what the driver expects.
+ (The usual assignments are 14 for the primary (0x1f0) interface
+ and 15 for the secondary (0x170) interface.) Also be sure that
+ you don't have some other hardware which might be conflicting with
+ the IRQ you're using. Also check the BIOS setup for your system;
+ some have the ability to disable individual IRQ levels, and I've
+ had one report of a system which was shipped with IRQ 15 disabled
+ by default.
+
+ - Note that many MS-DOS CDROM drivers will still function even if
+ there are hardware problems with the interrupt setup; they
+ apparently don't use interrupts.
+
+ - If you own a Pioneer DR-A24X, you _will_ get nasty error messages
+ on boot such as "irq timeout: status=0x50 { DriveReady SeekComplete }"
+ The Pioneer DR-A24X CDROM drives are fairly popular these days.
+ Unfortunately, these drives seem to become very confused when we perform
+ the standard Linux ATA disk drive probe. If you own one of these drives,
+ you can bypass the ATA probing which confuses these CDROM drives, by
+ adding `append="hdX=noprobe hdX=cdrom"' to your lilo.conf file and running
+ lilo (again where X is the drive letter corresponding to where your drive
+ is installed.)
+
+c. System hangups.
+
+ - If the system locks up when you try to access the CDROM, the most
+ likely cause is that you have a buggy IDE adapter which doesn't
+ properly handle simultaneous transactions on multiple interfaces.
+ The most notorious of these is the CMD640B chip. This problem can
+ be worked around by specifying the `serialize' option when
+ booting. Recent kernels should be able to detect the need for
+ this automatically in most cases, but the detection is not
+ foolproof. See Documentation/ide/ide.txt for more information
+ about the `serialize' option and the CMD640B.
+
+ - Note that many MS-DOS CDROM drivers will work with such buggy
+ hardware, apparently because they never attempt to overlap CDROM
+ operations with other disk activity.
+
+
+d. Can't mount a CDROM.
+
+ - If you get errors from mount, it may help to check `dmesg' to see
+ if there are any more specific errors from the driver or from the
+ filesystem.
+
+ - Make sure there's a CDROM loaded in the drive, and that's it's an
+ ISO 9660 disc. You can't mount an audio CD.
+
+ - With the CDROM in the drive and unmounted, try something like
+
+ cat /dev/cdrom | od | more
+
+ If you see a dump, then the drive and driver are probably working
+ OK, and the problem is at the filesystem level (i.e., the CDROM is
+ not ISO 9660 or has errors in the filesystem structure).
+
+ - If you see `not a block device' errors, check that the definitions
+ of the device special files are correct. They should be as
+ follows:
+
+ brw-rw---- 1 root disk 3, 0 Nov 11 18:48 /dev/hda
+ brw-rw---- 1 root disk 3, 64 Nov 11 18:48 /dev/hdb
+ brw-rw---- 1 root disk 22, 0 Nov 11 18:48 /dev/hdc
+ brw-rw---- 1 root disk 22, 64 Nov 11 18:48 /dev/hdd
+
+ Some early Slackware releases had these defined incorrectly. If
+ these are wrong, you can remake them by running the script
+ scripts/MAKEDEV.ide. (You may have to make it executable
+ with chmod first.)
+
+ If you have a /dev/cdrom symbolic link, check that it is pointing
+ to the correct device file.
+
+ If you hear people talking of the devices `hd1a' and `hd1b', these
+ were old names for what are now called hdc and hdd. Those names
+ should be considered obsolete.
+
+ - If mount is complaining that the iso9660 filesystem is not
+ available, but you know it is (check /proc/filesystems), you
+ probably need a newer version of mount. Early versions would not
+ always give meaningful error messages.
+
+
+e. Directory listings are unpredictably truncated, and `dmesg' shows
+ `buffer botch' error messages from the driver.
+
+ - There was a bug in the version of the driver in 1.2.x kernels
+ which could cause this. It was fixed in 1.3.0. If you can't
+ upgrade, you can probably work around the problem by specifying a
+ blocksize of 2048 when mounting. (Note that you won't be able to
+ directly execute binaries off the CDROM in that case.)
+
+ If you see this in kernels later than 1.3.0, please report it as a
+ bug.
+
+
+f. Data corruption.
+
+ - Random data corruption was occasionally observed with the Hitachi
+ CDR-7730 CDROM. If you experience data corruption, using "hdx=slow"
+ as a command line parameter may work around the problem, at the
+ expense of low system performance.
+
+
+5. cdchange.c
+-------------
+
+/*
+ * cdchange.c [-v] <device> [<slot>]
+ *
+ * This loads a CDROM from a specified slot in a changer, and displays
+ * information about the changer status. The drive should be unmounted before
+ * using this program.
+ *
+ * Changer information is displayed if either the -v flag is specified
+ * or no slot was specified.
+ *
+ * Based on code originally from Gerhard Zuber <zuber@berlin.snafu.de>.
+ * Changer status information, and rewrite for the new Uniform CDROM driver
+ * interface by Erik Andersen <andersee@debian.org>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/cdrom.h>
+
+
+int
+main (int argc, char **argv)
+{
+ char *program;
+ char *device;
+ int fd; /* file descriptor for CD-ROM device */
+ int status; /* return status for system calls */
+ int verbose = 0;
+ int slot=-1, x_slot;
+ int total_slots_available;
+
+ program = argv[0];
+
+ ++argv;
+ --argc;
+
+ if (argc < 1 || argc > 3) {
+ fprintf (stderr, "usage: %s [-v] <device> [<slot>]\n",
+ program);
+ fprintf (stderr, " Slots are numbered 1 -- n.\n");
+ exit (1);
+ }
+
+ if (strcmp (argv[0], "-v") == 0) {
+ verbose = 1;
+ ++argv;
+ --argc;
+ }
+
+ device = argv[0];
+
+ if (argc == 2)
+ slot = atoi (argv[1]) - 1;
+
+ /* open device */
+ fd = open(device, O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ fprintf (stderr, "%s: open failed for `%s': %s\n",
+ program, device, strerror (errno));
+ exit (1);
+ }
+
+ /* Check CD player status */
+ total_slots_available = ioctl (fd, CDROM_CHANGER_NSLOTS);
+ if (total_slots_available <= 1 ) {
+ fprintf (stderr, "%s: Device `%s' is not an ATAPI "
+ "compliant CD changer.\n", program, device);
+ exit (1);
+ }
+
+ if (slot >= 0) {
+ if (slot >= total_slots_available) {
+ fprintf (stderr, "Bad slot number. "
+ "Should be 1 -- %d.\n",
+ total_slots_available);
+ exit (1);
+ }
+
+ /* load */
+ slot=ioctl (fd, CDROM_SELECT_DISC, slot);
+ if (slot<0) {
+ fflush(stdout);
+ perror ("CDROM_SELECT_DISC ");
+ exit(1);
+ }
+ }
+
+ if (slot < 0 || verbose) {
+
+ status=ioctl (fd, CDROM_SELECT_DISC, CDSL_CURRENT);
+ if (status<0) {
+ fflush(stdout);
+ perror (" CDROM_SELECT_DISC");
+ exit(1);
+ }
+ slot=status;
+
+ printf ("Current slot: %d\n", slot+1);
+ printf ("Total slots available: %d\n",
+ total_slots_available);
+
+ printf ("Drive status: ");
+ status = ioctl (fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
+ if (status<0) {
+ perror(" CDROM_DRIVE_STATUS");
+ } else switch(status) {
+ case CDS_DISC_OK:
+ printf ("Ready.\n");
+ break;
+ case CDS_TRAY_OPEN:
+ printf ("Tray Open.\n");
+ break;
+ case CDS_DRIVE_NOT_READY:
+ printf ("Drive Not Ready.\n");
+ break;
+ default:
+ printf ("This Should not happen!\n");
+ break;
+ }
+
+ for (x_slot=0; x_slot<total_slots_available; x_slot++) {
+ printf ("Slot %2d: ", x_slot+1);
+ status = ioctl (fd, CDROM_DRIVE_STATUS, x_slot);
+ if (status<0) {
+ perror(" CDROM_DRIVE_STATUS");
+ } else switch(status) {
+ case CDS_DISC_OK:
+ printf ("Disc present.");
+ break;
+ case CDS_NO_DISC:
+ printf ("Empty slot.");
+ break;
+ case CDS_TRAY_OPEN:
+ printf ("CD-ROM tray open.\n");
+ break;
+ case CDS_DRIVE_NOT_READY:
+ printf ("CD-ROM drive not ready.\n");
+ break;
+ case CDS_NO_INFO:
+ printf ("No Information available.");
+ break;
+ default:
+ printf ("This Should not happen!\n");
+ break;
+ }
+ if (slot == x_slot) {
+ status = ioctl (fd, CDROM_DISC_STATUS);
+ if (status<0) {
+ perror(" CDROM_DISC_STATUS");
+ }
+ switch (status) {
+ case CDS_AUDIO:
+ printf ("\tAudio disc.\t");
+ break;
+ case CDS_DATA_1:
+ case CDS_DATA_2:
+ printf ("\tData disc type %d.\t", status-CDS_DATA_1+1);
+ break;
+ case CDS_XA_2_1:
+ case CDS_XA_2_2:
+ printf ("\tXA data disc type %d.\t", status-CDS_XA_2_1+1);
+ break;
+ default:
+ printf ("\tUnknown disc type 0x%x!\t", status);
+ break;
+ }
+ }
+ status = ioctl (fd, CDROM_MEDIA_CHANGED, x_slot);
+ if (status<0) {
+ perror(" CDROM_MEDIA_CHANGED");
+ }
+ switch (status) {
+ case 1:
+ printf ("Changed.\n");
+ break;
+ default:
+ printf ("\n");
+ break;
+ }
+ }
+ }
+
+ /* close device */
+ status = close (fd);
+ if (status != 0) {
+ fprintf (stderr, "%s: close failed for `%s': %s\n",
+ program, device, strerror (errno));
+ exit (1);
+ }
+
+ exit (0);
+}
diff --git a/Documentation/cdrom/packet-writing.txt b/Documentation/cdrom/packet-writing.txt
new file mode 100644
index 000000000..2834170d8
--- /dev/null
+++ b/Documentation/cdrom/packet-writing.txt
@@ -0,0 +1,132 @@
+Getting started quick
+---------------------
+
+- Select packet support in the block device section and UDF support in
+ the file system section.
+
+- Compile and install kernel and modules, reboot.
+
+- You need the udftools package (pktsetup, mkudffs, cdrwtool).
+ Download from http://sourceforge.net/projects/linux-udf/
+
+- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
+ as appropriate):
+ # cdrwtool -d /dev/hdc -q
+
+- Setup your writer
+ # pktsetup dev_name /dev/hdc
+
+- Now you can mount /dev/pktcdvd/dev_name and copy files to it. Enjoy!
+ # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
+
+
+Packet writing for DVD-RW media
+-------------------------------
+
+DVD-RW discs can be written to much like CD-RW discs if they are in
+the so called "restricted overwrite" mode. To put a disc in restricted
+overwrite mode, run:
+
+ # dvd+rw-format /dev/hdc
+
+You can then use the disc the same way you would use a CD-RW disc:
+
+ # pktsetup dev_name /dev/hdc
+ # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
+
+
+Packet writing for DVD+RW media
+-------------------------------
+
+According to the DVD+RW specification, a drive supporting DVD+RW discs
+shall implement "true random writes with 2KB granularity", which means
+that it should be possible to put any filesystem with a block size >=
+2KB on such a disc. For example, it should be possible to do:
+
+ # dvd+rw-format /dev/hdc (only needed if the disc has never
+ been formatted)
+ # mkudffs /dev/hdc
+ # mount /dev/hdc /cdrom -t udf -o rw,noatime
+
+However, some drives don't follow the specification and expect the
+host to perform aligned writes at 32KB boundaries. Other drives do
+follow the specification, but suffer bad performance problems if the
+writes are not 32KB aligned.
+
+Both problems can be solved by using the pktcdvd driver, which always
+generates aligned writes.
+
+ # dvd+rw-format /dev/hdc
+ # pktsetup dev_name /dev/hdc
+ # mkudffs /dev/pktcdvd/dev_name
+ # mount /dev/pktcdvd/dev_name /cdrom -t udf -o rw,noatime
+
+
+Packet writing for DVD-RAM media
+--------------------------------
+
+DVD-RAM discs are random writable, so using the pktcdvd driver is not
+necessary. However, using the pktcdvd driver can improve performance
+in the same way it does for DVD+RW media.
+
+
+Notes
+-----
+
+- CD-RW media can usually not be overwritten more than about 1000
+ times, so to avoid unnecessary wear on the media, you should always
+ use the noatime mount option.
+
+- Defect management (ie automatic remapping of bad sectors) has not
+ been implemented yet, so you are likely to get at least some
+ filesystem corruption if the disc wears out.
+
+- Since the pktcdvd driver makes the disc appear as a regular block
+ device with a 2KB block size, you can put any filesystem you like on
+ the disc. For example, run:
+
+ # /sbin/mke2fs /dev/pktcdvd/dev_name
+
+ to create an ext2 filesystem on the disc.
+
+
+Using the pktcdvd sysfs interface
+---------------------------------
+
+Since Linux 2.6.20, the pktcdvd module has a sysfs interface
+and can be controlled by it. For example the "pktcdvd" tool uses
+this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
+
+"pktcdvd" works similar to "pktsetup", e.g.:
+
+ # pktcdvd -a dev_name /dev/hdc
+ # mkudffs /dev/pktcdvd/dev_name
+ # mount -t udf -o rw,noatime /dev/pktcdvd/dev_name /dvdram
+ # cp files /dvdram
+ # umount /dvdram
+ # pktcdvd -r dev_name
+
+
+For a description of the sysfs interface look into the file:
+
+ Documentation/ABI/testing/sysfs-class-pktcdvd
+
+
+Using the pktcdvd debugfs interface
+-----------------------------------
+
+To read pktcdvd device infos in human readable form, do:
+
+ # cat /sys/kernel/debug/pktcdvd/pktcdvd[0-7]/info
+
+For a description of the debugfs interface look into the file:
+
+ Documentation/ABI/testing/debugfs-pktcdvd
+
+
+
+Links
+-----
+
+See http://fy.chalmers.se/~appro/linux/DVD+RW/ for more information
+about DVD writing.
diff --git a/Documentation/cgroups/00-INDEX b/Documentation/cgroups/00-INDEX
new file mode 100644
index 000000000..96ce071a3
--- /dev/null
+++ b/Documentation/cgroups/00-INDEX
@@ -0,0 +1,28 @@
+00-INDEX
+ - this file
+blkio-controller.txt
+ - Description for Block IO Controller, implementation and usage details.
+cgroups.txt
+ - Control Groups definition, implementation details, examples and API.
+cpuacct.txt
+ - CPU Accounting Controller; account CPU usage for groups of tasks.
+cpusets.txt
+ - documents the cpusets feature; assign CPUs and Mem to a set of tasks.
+devices.txt
+ - Device Whitelist Controller; description, interface and security.
+freezer-subsystem.txt
+ - checkpointing; rationale to not use signals, interface.
+hugetlb.txt
+ - HugeTLB Controller implementation and usage details.
+memcg_test.txt
+ - Memory Resource Controller; implementation details.
+memory.txt
+ - Memory Resource Controller; design, accounting, interface, testing.
+net_cls.txt
+ - Network classifier cgroups details and usages.
+net_prio.txt
+ - Network priority cgroups details and usages.
+resource_counter.txt
+ - Resource Counter API.
+unified-hierarchy.txt
+ - Description the new/next cgroup interface.
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt
new file mode 100644
index 000000000..cd556b914
--- /dev/null
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -0,0 +1,394 @@
+ Block IO Controller
+ ===================
+Overview
+========
+cgroup subsys "blkio" implements the block io controller. There seems to be
+a need of various kinds of IO control policies (like proportional BW, max BW)
+both at leaf nodes as well as at intermediate nodes in a storage hierarchy.
+Plan is to use the same cgroup based management interface for blkio controller
+and based on user options switch IO policies in the background.
+
+Currently two IO control policies are implemented. First one is proportional
+weight time based division of disk policy. It is implemented in CFQ. Hence
+this policy takes effect only on leaf nodes when CFQ is being used. The second
+one is throttling policy which can be used to specify upper IO rate limits
+on devices. This policy is implemented in generic block layer and can be
+used on leaf nodes as well as higher level logical devices like device mapper.
+
+HOWTO
+=====
+Proportional Weight division of bandwidth
+-----------------------------------------
+You can do a very simple testing of running two dd threads in two different
+cgroups. Here is what you can do.
+
+- Enable Block IO controller
+ CONFIG_BLK_CGROUP=y
+
+- Enable group scheduling in CFQ
+ CONFIG_CFQ_GROUP_IOSCHED=y
+
+- Compile and boot into kernel and mount IO controller (blkio); see
+ cgroups.txt, Why are cgroups needed?.
+
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/blkio
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
+
+- Create two cgroups
+ mkdir -p /sys/fs/cgroup/blkio/test1/ /sys/fs/cgroup/blkio/test2
+
+- Set weights of group test1 and test2
+ echo 1000 > /sys/fs/cgroup/blkio/test1/blkio.weight
+ echo 500 > /sys/fs/cgroup/blkio/test2/blkio.weight
+
+- Create two same size files (say 512MB each) on same disk (file1, file2) and
+ launch two dd threads in different cgroup to read those files.
+
+ sync
+ echo 3 > /proc/sys/vm/drop_caches
+
+ dd if=/mnt/sdb/zerofile1 of=/dev/null &
+ echo $! > /sys/fs/cgroup/blkio/test1/tasks
+ cat /sys/fs/cgroup/blkio/test1/tasks
+
+ dd if=/mnt/sdb/zerofile2 of=/dev/null &
+ echo $! > /sys/fs/cgroup/blkio/test2/tasks
+ cat /sys/fs/cgroup/blkio/test2/tasks
+
+- At macro level, first dd should finish first. To get more precise data, keep
+ on looking at (with the help of script), at blkio.disk_time and
+ blkio.disk_sectors files of both test1 and test2 groups. This will tell how
+ much disk time (in milli seconds), each group got and how many secotors each
+ group dispatched to the disk. We provide fairness in terms of disk time, so
+ ideally io.disk_time of cgroups should be in proportion to the weight.
+
+Throttling/Upper Limit policy
+-----------------------------
+- Enable Block IO controller
+ CONFIG_BLK_CGROUP=y
+
+- Enable throttling in block layer
+ CONFIG_BLK_DEV_THROTTLING=y
+
+- Mount blkio controller (see cgroups.txt, Why are cgroups needed?)
+ mount -t cgroup -o blkio none /sys/fs/cgroup/blkio
+
+- Specify a bandwidth rate on particular device for root group. The format
+ for policy is "<major>:<minor> <bytes_per_second>".
+
+ echo "8:16 1048576" > /sys/fs/cgroup/blkio/blkio.throttle.read_bps_device
+
+ Above will put a limit of 1MB/second on reads happening for root group
+ on device having major/minor number 8:16.
+
+- Run dd to read a file and see if rate is throttled to 1MB/s or not.
+
+ # dd if=/mnt/common/zerofile of=/dev/null bs=4K count=1024
+ # iflag=direct
+ 1024+0 records in
+ 1024+0 records out
+ 4194304 bytes (4.2 MB) copied, 4.0001 s, 1.0 MB/s
+
+ Limits for writes can be put using blkio.throttle.write_bps_device file.
+
+Hierarchical Cgroups
+====================
+
+Both CFQ and throttling implement hierarchy support; however,
+throttling's hierarchy support is enabled iff "sane_behavior" is
+enabled from cgroup side, which currently is a development option and
+not publicly available.
+
+If somebody created a hierarchy like as follows.
+
+ root
+ / \
+ test1 test2
+ |
+ test3
+
+CFQ by default and throttling with "sane_behavior" will handle the
+hierarchy correctly. For details on CFQ hierarchy support, refer to
+Documentation/block/cfq-iosched.txt. For throttling, all limits apply
+to the whole subtree while all statistics are local to the IOs
+directly generated by tasks in that cgroup.
+
+Throttling without "sane_behavior" enabled from cgroup side will
+practically treat all groups at same level as if it looks like the
+following.
+
+ pivot
+ / / \ \
+ root test1 test2 test3
+
+Various user visible config options
+===================================
+CONFIG_BLK_CGROUP
+ - Block IO controller.
+
+CONFIG_DEBUG_BLK_CGROUP
+ - Debug help. Right now some additional stats file show up in cgroup
+ if this option is enabled.
+
+CONFIG_CFQ_GROUP_IOSCHED
+ - Enables group scheduling in CFQ. Currently only 1 level of group
+ creation is allowed.
+
+CONFIG_BLK_DEV_THROTTLING
+ - Enable block device throttling support in block layer.
+
+Details of cgroup files
+=======================
+Proportional weight policy files
+--------------------------------
+- blkio.weight
+ - Specifies per cgroup weight. This is default weight of the group
+ on all the devices until and unless overridden by per device rule.
+ (See blkio.weight_device).
+ Currently allowed range of weights is from 10 to 1000.
+
+- blkio.weight_device
+ - One can specify per cgroup per device rules using this interface.
+ These rules override the default value of group weight as specified
+ by blkio.weight.
+
+ Following is the format.
+
+ # echo dev_maj:dev_minor weight > blkio.weight_device
+ Configure weight=300 on /dev/sdb (8:16) in this cgroup
+ # echo 8:16 300 > blkio.weight_device
+ # cat blkio.weight_device
+ dev weight
+ 8:16 300
+
+ Configure weight=500 on /dev/sda (8:0) in this cgroup
+ # echo 8:0 500 > blkio.weight_device
+ # cat blkio.weight_device
+ dev weight
+ 8:0 500
+ 8:16 300
+
+ Remove specific weight for /dev/sda in this cgroup
+ # echo 8:0 0 > blkio.weight_device
+ # cat blkio.weight_device
+ dev weight
+ 8:16 300
+
+- blkio.leaf_weight[_device]
+ - Equivalents of blkio.weight[_device] for the purpose of
+ deciding how much weight tasks in the given cgroup has while
+ competing with the cgroup's child cgroups. For details,
+ please refer to Documentation/block/cfq-iosched.txt.
+
+- blkio.time
+ - disk time allocated to cgroup per device in milliseconds. First
+ two fields specify the major and minor number of the device and
+ third field specifies the disk time allocated to group in
+ milliseconds.
+
+- blkio.sectors
+ - number of sectors transferred to/from disk by the group. First
+ two fields specify the major and minor number of the device and
+ third field specifies the number of sectors transferred by the
+ group to/from the device.
+
+- blkio.io_service_bytes
+ - Number of bytes transferred to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of bytes.
+
+- blkio.io_serviced
+ - Number of IOs completed to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of IOs.
+
+- blkio.io_service_time
+ - Total amount of time between request dispatch and request completion
+ for the IOs done by this cgroup. This is in nanoseconds to make it
+ meaningful for flash devices too. For devices with queue depth of 1,
+ this time represents the actual service time. When queue_depth > 1,
+ that is no longer true as requests may be served out of order. This
+ may cause the service time for a given IO to include the service time
+ of multiple IOs when served out of order which may result in total
+ io_service_time > actual time elapsed. This time is further divided by
+ the type of operation - read or write, sync or async. First two fields
+ specify the major and minor number of the device, third field
+ specifies the operation type and the fourth field specifies the
+ io_service_time in ns.
+
+- blkio.io_wait_time
+ - Total amount of time the IOs for this cgroup spent waiting in the
+ scheduler queues for service. This can be greater than the total time
+ elapsed since it is cumulative io_wait_time for all IOs. It is not a
+ measure of total time the cgroup spent waiting but rather a measure of
+ the wait_time for its individual IOs. For devices with queue_depth > 1
+ this metric does not include the time spent waiting for service once
+ the IO is dispatched to the device but till it actually gets serviced
+ (there might be a time lag here due to re-ordering of requests by the
+ device). This is in nanoseconds to make it meaningful for flash
+ devices too. This time is further divided by the type of operation -
+ read or write, sync or async. First two fields specify the major and
+ minor number of the device, third field specifies the operation type
+ and the fourth field specifies the io_wait_time in ns.
+
+- blkio.io_merged
+ - Total number of bios/requests merged into requests belonging to this
+ cgroup. This is further divided by the type of operation - read or
+ write, sync or async.
+
+- blkio.io_queued
+ - Total number of requests queued up at any given instant for this
+ cgroup. This is further divided by the type of operation - read or
+ write, sync or async.
+
+- blkio.avg_queue_size
+ - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+ The average queue size for this cgroup over the entire time of this
+ cgroup's existence. Queue size samples are taken each time one of the
+ queues of this cgroup gets a timeslice.
+
+- blkio.group_wait_time
+ - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+ This is the amount of time the cgroup had to wait since it became busy
+ (i.e., went from 0 to 1 request queued) to get a timeslice for one of
+ its queues. This is different from the io_wait_time which is the
+ cumulative total of the amount of time spent by each IO in that cgroup
+ waiting in the scheduler queue. This is in nanoseconds. If this is
+ read when the cgroup is in a waiting (for timeslice) state, the stat
+ will only report the group_wait_time accumulated till the last time it
+ got a timeslice and will not include the current delta.
+
+- blkio.empty_time
+ - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+ This is the amount of time a cgroup spends without any pending
+ requests when not being served, i.e., it does not include any time
+ spent idling for one of the queues of the cgroup. This is in
+ nanoseconds. If this is read when the cgroup is in an empty state,
+ the stat will only report the empty_time accumulated till the last
+ time it had a pending request and will not include the current delta.
+
+- blkio.idle_time
+ - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y.
+ This is the amount of time spent by the IO scheduler idling for a
+ given cgroup in anticipation of a better request than the existing ones
+ from other queues/cgroups. This is in nanoseconds. If this is read
+ when the cgroup is in an idling state, the stat will only report the
+ idle_time accumulated till the last idle period and will not include
+ the current delta.
+
+- blkio.dequeue
+ - Debugging aid only enabled if CONFIG_DEBUG_BLK_CGROUP=y. This
+ gives the statistics about how many a times a group was dequeued
+ from service tree of the device. First two fields specify the major
+ and minor number of the device and third field specifies the number
+ of times a group was dequeued from a particular device.
+
+- blkio.*_recursive
+ - Recursive version of various stats. These files show the
+ same information as their non-recursive counterparts but
+ include stats from all the descendant cgroups.
+
+Throttling/Upper limit policy files
+-----------------------------------
+- blkio.throttle.read_bps_device
+ - Specifies upper limit on READ rate from the device. IO rate is
+ specified in bytes per second. Rules are per device. Following is
+ the format.
+
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.read_bps_device
+
+- blkio.throttle.write_bps_device
+ - Specifies upper limit on WRITE rate to the device. IO rate is
+ specified in bytes per second. Rules are per device. Following is
+ the format.
+
+ echo "<major>:<minor> <rate_bytes_per_second>" > /cgrp/blkio.throttle.write_bps_device
+
+- blkio.throttle.read_iops_device
+ - Specifies upper limit on READ rate from the device. IO rate is
+ specified in IO per second. Rules are per device. Following is
+ the format.
+
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.read_iops_device
+
+- blkio.throttle.write_iops_device
+ - Specifies upper limit on WRITE rate to the device. IO rate is
+ specified in io per second. Rules are per device. Following is
+ the format.
+
+ echo "<major>:<minor> <rate_io_per_second>" > /cgrp/blkio.throttle.write_iops_device
+
+Note: If both BW and IOPS rules are specified for a device, then IO is
+ subjected to both the constraints.
+
+- blkio.throttle.io_serviced
+ - Number of IOs (bio) completed to/from the disk by the group (as
+ seen by throttling policy). These are further divided by the type
+ of operation - read or write, sync or async. First two fields specify
+ the major and minor number of the device, third field specifies the
+ operation type and the fourth field specifies the number of IOs.
+
+ blkio.io_serviced does accounting as seen by CFQ and counts are in
+ number of requests (struct request). On the other hand,
+ blkio.throttle.io_serviced counts number of IO in terms of number
+ of bios as seen by throttling policy. These bios can later be
+ merged by elevator and total number of requests completed can be
+ lesser.
+
+- blkio.throttle.io_service_bytes
+ - Number of bytes transferred to/from the disk by the group. These
+ are further divided by the type of operation - read or write, sync
+ or async. First two fields specify the major and minor number of the
+ device, third field specifies the operation type and the fourth field
+ specifies the number of bytes.
+
+ These numbers should roughly be same as blkio.io_service_bytes as
+ updated by CFQ. The difference between two is that
+ blkio.io_service_bytes will not be updated if CFQ is not operating
+ on request queue.
+
+Common files among various policies
+-----------------------------------
+- blkio.reset_stats
+ - Writing an int to this file will result in resetting all the stats
+ for that cgroup.
+
+CFQ sysfs tunable
+=================
+/sys/block/<disk>/queue/iosched/slice_idle
+------------------------------------------
+On a faster hardware CFQ can be slow, especially with sequential workload.
+This happens because CFQ idles on a single queue and single queue might not
+drive deeper request queue depths to keep the storage busy. In such scenarios
+one can try setting slice_idle=0 and that would switch CFQ to IOPS
+(IO operations per second) mode on NCQ supporting hardware.
+
+That means CFQ will not idle between cfq queues of a cfq group and hence be
+able to driver higher queue depth and achieve better throughput. That also
+means that cfq provides fairness among groups in terms of IOPS and not in
+terms of disk time.
+
+/sys/block/<disk>/queue/iosched/group_idle
+------------------------------------------
+If one disables idling on individual cfq queues and cfq service trees by
+setting slice_idle=0, group_idle kicks in. That means CFQ will still idle
+on the group in an attempt to provide fairness among groups.
+
+By default group_idle is same as slice_idle and does not do anything if
+slice_idle is enabled.
+
+One can experience an overall throughput drop if you have created multiple
+groups and put applications in that group which are not driving enough
+IO to keep disk busy. In that case set group_idle=0, and CFQ will not idle
+on individual groups and throughput should improve.
+
+What works
+==========
+- Currently only sync IO queues are support. All the buffered writes are
+ still system wide and not per group. Hence we will not see service
+ differentiation between buffered writes between groups.
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
new file mode 100644
index 000000000..f935fac1e
--- /dev/null
+++ b/Documentation/cgroups/cgroups.txt
@@ -0,0 +1,678 @@
+ CGROUPS
+ -------
+
+Written by Paul Menage <menage@google.com> based on
+Documentation/cgroups/cpusets.txt
+
+Original copyright statements from cpusets.txt:
+Portions Copyright (C) 2004 BULL SA.
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+Modified by Paul Jackson <pj@sgi.com>
+Modified by Christoph Lameter <clameter@sgi.com>
+
+CONTENTS:
+=========
+
+1. Control Groups
+ 1.1 What are cgroups ?
+ 1.2 Why are cgroups needed ?
+ 1.3 How are cgroups implemented ?
+ 1.4 What does notify_on_release do ?
+ 1.5 What does clone_children do ?
+ 1.6 How do I use cgroups ?
+2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Attaching processes
+ 2.3 Mounting hierarchies by name
+3. Kernel API
+ 3.1 Overview
+ 3.2 Synchronization
+ 3.3 Subsystem API
+4. Extended attributes usage
+5. Questions
+
+1. Control Groups
+=================
+
+1.1 What are cgroups ?
+----------------------
+
+Control Groups provide a mechanism for aggregating/partitioning sets of
+tasks, and all their future children, into hierarchical groups with
+specialized behaviour.
+
+Definitions:
+
+A *cgroup* associates a set of tasks with a set of parameters for one
+or more subsystems.
+
+A *subsystem* is a module that makes use of the task grouping
+facilities provided by cgroups to treat groups of tasks in
+particular ways. A subsystem is typically a "resource controller" that
+schedules a resource or applies per-cgroup limits, but it may be
+anything that wants to act on a group of processes, e.g. a
+virtualization subsystem.
+
+A *hierarchy* is a set of cgroups arranged in a tree, such that
+every task in the system is in exactly one of the cgroups in the
+hierarchy, and a set of subsystems; each subsystem has system-specific
+state attached to each cgroup in the hierarchy. Each hierarchy has
+an instance of the cgroup virtual filesystem associated with it.
+
+At any one time there may be multiple active hierarchies of task
+cgroups. Each hierarchy is a partition of all tasks in the system.
+
+User-level code may create and destroy cgroups by name in an
+instance of the cgroup virtual file system, specify and query to
+which cgroup a task is assigned, and list the task PIDs assigned to
+a cgroup. Those creations and assignments only affect the hierarchy
+associated with that instance of the cgroup file system.
+
+On their own, the only use for cgroups is for simple job
+tracking. The intention is that other subsystems hook into the generic
+cgroup support to provide new attributes for cgroups, such as
+accounting/limiting the resources which processes in a cgroup can
+access. For example, cpusets (see Documentation/cgroups/cpusets.txt) allow
+you to associate a set of CPUs and a set of memory nodes with the
+tasks in each cgroup.
+
+1.2 Why are cgroups needed ?
+----------------------------
+
+There are multiple efforts to provide process aggregations in the
+Linux kernel, mainly for resource-tracking purposes. Such efforts
+include cpusets, CKRM/ResGroups, UserBeanCounters, and virtual server
+namespaces. These all require the basic notion of a
+grouping/partitioning of processes, with newly forked processes ending
+up in the same group (cgroup) as their parent process.
+
+The kernel cgroup patch provides the minimum essential kernel
+mechanisms required to efficiently implement such groups. It has
+minimal impact on the system fast paths, and provides hooks for
+specific subsystems such as cpusets to provide additional behaviour as
+desired.
+
+Multiple hierarchy support is provided to allow for situations where
+the division of tasks into cgroups is distinctly different for
+different subsystems - having parallel hierarchies allows each
+hierarchy to be a natural division of tasks, without having to handle
+complex combinations of tasks that would be present if several
+unrelated subsystems needed to be forced into the same tree of
+cgroups.
+
+At one extreme, each resource controller or subsystem could be in a
+separate hierarchy; at the other extreme, all subsystems
+would be attached to the same hierarchy.
+
+As an example of a scenario (originally proposed by vatsa@in.ibm.com)
+that can benefit from multiple hierarchies, consider a large
+university server with various users - students, professors, system
+tasks etc. The resource planning for this server could be along the
+following lines:
+
+ CPU : "Top cpuset"
+ / \
+ CPUSet1 CPUSet2
+ | |
+ (Professors) (Students)
+
+ In addition (system tasks) are attached to topcpuset (so
+ that they can run anywhere) with a limit of 20%
+
+ Memory : Professors (50%), Students (30%), system (20%)
+
+ Disk : Professors (50%), Students (30%), system (20%)
+
+ Network : WWW browsing (20%), Network File System (60%), others (20%)
+ / \
+ Professors (15%) students (5%)
+
+Browsers like Firefox/Lynx go into the WWW network class, while (k)nfsd goes
+into the NFS network class.
+
+At the same time Firefox/Lynx will share an appropriate CPU/Memory class
+depending on who launched it (prof/student).
+
+With the ability to classify tasks differently for different resources
+(by putting those resource subsystems in different hierarchies),
+the admin can easily set up a script which receives exec notifications
+and depending on who is launching the browser he can
+
+ # echo browser_pid > /sys/fs/cgroup/<restype>/<userclass>/tasks
+
+With only a single hierarchy, he now would potentially have to create
+a separate cgroup for every browser launched and associate it with
+appropriate network and other resource class. This may lead to
+proliferation of such cgroups.
+
+Also let's say that the administrator would like to give enhanced network
+access temporarily to a student's browser (since it is night and the user
+wants to do online gaming :)) OR give one of the student's simulation
+apps enhanced CPU power.
+
+With ability to write PIDs directly to resource classes, it's just a
+matter of:
+
+ # echo pid > /sys/fs/cgroup/network/<new_class>/tasks
+ (after some time)
+ # echo pid > /sys/fs/cgroup/network/<orig_class>/tasks
+
+Without this ability, the administrator would have to split the cgroup into
+multiple separate ones and then associate the new cgroups with the
+new resource classes.
+
+
+
+1.3 How are cgroups implemented ?
+---------------------------------
+
+Control Groups extends the kernel as follows:
+
+ - Each task in the system has a reference-counted pointer to a
+ css_set.
+
+ - A css_set contains a set of reference-counted pointers to
+ cgroup_subsys_state objects, one for each cgroup subsystem
+ registered in the system. There is no direct link from a task to
+ the cgroup of which it's a member in each hierarchy, but this
+ can be determined by following pointers through the
+ cgroup_subsys_state objects. This is because accessing the
+ subsystem state is something that's expected to happen frequently
+ and in performance-critical code, whereas operations that require a
+ task's actual cgroup assignments (in particular, moving between
+ cgroups) are less common. A linked list runs through the cg_list
+ field of each task_struct using the css_set, anchored at
+ css_set->tasks.
+
+ - A cgroup hierarchy filesystem can be mounted for browsing and
+ manipulation from user space.
+
+ - You can list all the tasks (by PID) attached to any cgroup.
+
+The implementation of cgroups requires a few, simple hooks
+into the rest of the kernel, none in performance-critical paths:
+
+ - in init/main.c, to initialize the root cgroups and initial
+ css_set at system boot.
+
+ - in fork and exit, to attach and detach a task from its css_set.
+
+In addition, a new file system of type "cgroup" may be mounted, to
+enable browsing and modifying the cgroups presently known to the
+kernel. When mounting a cgroup hierarchy, you may specify a
+comma-separated list of subsystems to mount as the filesystem mount
+options. By default, mounting the cgroup filesystem attempts to
+mount a hierarchy containing all registered subsystems.
+
+If an active hierarchy with exactly the same set of subsystems already
+exists, it will be reused for the new mount. If no existing hierarchy
+matches, and any of the requested subsystems are in use in an existing
+hierarchy, the mount will fail with -EBUSY. Otherwise, a new hierarchy
+is activated, associated with the requested subsystems.
+
+It's not currently possible to bind a new subsystem to an active
+cgroup hierarchy, or to unbind a subsystem from an active cgroup
+hierarchy. This may be possible in future, but is fraught with nasty
+error-recovery issues.
+
+When a cgroup filesystem is unmounted, if there are any
+child cgroups created below the top-level cgroup, that hierarchy
+will remain active even though unmounted; if there are no
+child cgroups then the hierarchy will be deactivated.
+
+No new system calls are added for cgroups - all support for
+querying and modifying cgroups is via this cgroup file system.
+
+Each task under /proc has an added file named 'cgroup' displaying,
+for each active hierarchy, the subsystem names and the cgroup name
+as the path relative to the root of the cgroup file system.
+
+Each cgroup is represented by a directory in the cgroup file system
+containing the following files describing that cgroup:
+
+ - tasks: list of tasks (by PID) attached to that cgroup. This list
+ is not guaranteed to be sorted. Writing a thread ID into this file
+ moves the thread into this cgroup.
+ - cgroup.procs: list of thread group IDs in the cgroup. This list is
+ not guaranteed to be sorted or free of duplicate TGIDs, and userspace
+ should sort/uniquify the list if this property is required.
+ Writing a thread group ID into this file moves all threads in that
+ group into this cgroup.
+ - notify_on_release flag: run the release agent on exit?
+ - release_agent: the path to use for release notifications (this file
+ exists in the top cgroup only)
+
+Other subsystems such as cpusets may add additional files in each
+cgroup dir.
+
+New cgroups are created using the mkdir system call or shell
+command. The properties of a cgroup, such as its flags, are
+modified by writing to the appropriate file in that cgroups
+directory, as listed above.
+
+The named hierarchical structure of nested cgroups allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cgroup allows organizing the work load
+on a system into related sets of tasks. A task may be re-attached to
+any other cgroup, if allowed by the permissions on the necessary
+cgroup file system directories.
+
+When a task is moved from one cgroup to another, it gets a new
+css_set pointer - if there's an already existing css_set with the
+desired collection of cgroups then that group is reused, otherwise a new
+css_set is allocated. The appropriate existing css_set is located by
+looking into a hash table.
+
+To allow access from a cgroup to the css_sets (and hence tasks)
+that comprise it, a set of cg_cgroup_link objects form a lattice;
+each cg_cgroup_link is linked into a list of cg_cgroup_links for
+a single cgroup on its cgrp_link_list field, and a list of
+cg_cgroup_links for a single css_set on its cg_link_list.
+
+Thus the set of tasks in a cgroup can be listed by iterating over
+each css_set that references the cgroup, and sub-iterating over
+each css_set's task set.
+
+The use of a Linux virtual file system (vfs) to represent the
+cgroup hierarchy provides for a familiar permission and name space
+for cgroups, with a minimum of additional kernel code.
+
+1.4 What does notify_on_release do ?
+------------------------------------
+
+If the notify_on_release flag is enabled (1) in a cgroup, then
+whenever the last task in the cgroup leaves (exits or attaches to
+some other cgroup) and the last child cgroup of that cgroup
+is removed, then the kernel runs the command specified by the contents
+of the "release_agent" file in that hierarchy's root directory,
+supplying the pathname (relative to the mount point of the cgroup
+file system) of the abandoned cgroup. This enables automatic
+removal of abandoned cgroups. The default value of
+notify_on_release in the root cgroup at system boot is disabled
+(0). The default value of other cgroups at creation is the current
+value of their parents' notify_on_release settings. The default value of
+a cgroup hierarchy's release_agent path is empty.
+
+1.5 What does clone_children do ?
+---------------------------------
+
+This flag only affects the cpuset controller. If the clone_children
+flag is enabled (1) in a cgroup, a new cpuset cgroup will copy its
+configuration from the parent during initialization.
+
+1.6 How do I use cgroups ?
+--------------------------
+
+To start a new job that is to be contained within a cgroup, using
+the "cpuset" cgroup subsystem, the steps are something like:
+
+ 1) mount -t tmpfs cgroup_root /sys/fs/cgroup
+ 2) mkdir /sys/fs/cgroup/cpuset
+ 3) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 4) Create the new cgroup by doing mkdir's and write's (or echo's) in
+ the /sys/fs/cgroup/cpuset virtual file system.
+ 5) Start a task that will be the "founding father" of the new job.
+ 6) Attach that task to the new cgroup by writing its PID to the
+ /sys/fs/cgroup/cpuset tasks file for that cgroup.
+ 7) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cgroup
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cgroup:
+
+ mount -t tmpfs cgroup_root /sys/fs/cgroup
+ mkdir /sys/fs/cgroup/cpuset
+ mount -t cgroup cpuset -ocpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
+ mkdir Charlie
+ cd Charlie
+ /bin/echo 2-3 > cpuset.cpus
+ /bin/echo 1 > cpuset.mems
+ /bin/echo $$ > tasks
+ sh
+ # The subshell 'sh' is now running in cgroup Charlie
+ # The next line should display '/Charlie'
+ cat /proc/self/cgroup
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using cgroups can be done through the cgroup
+virtual filesystem.
+
+To mount a cgroup hierarchy with all available subsystems, type:
+# mount -t cgroup xxx /sys/fs/cgroup
+
+The "xxx" is not interpreted by the cgroup code, but will appear in
+/proc/mounts so may be any useful identifying string that you like.
+
+Note: Some subsystems do not work without some user input first. For instance,
+if cpusets are enabled the user will have to populate the cpus and mems files
+for each new cgroup created before that group can be used.
+
+As explained in section `1.2 Why are cgroups needed?' you should create
+different hierarchies of cgroups for each single resource or group of
+resources you want to control. Therefore, you should mount a tmpfs on
+/sys/fs/cgroup and create directories for each cgroup resource or resource
+group.
+
+# mount -t tmpfs cgroup_root /sys/fs/cgroup
+# mkdir /sys/fs/cgroup/rg1
+
+To mount a cgroup hierarchy with just the cpuset and memory
+subsystems, type:
+# mount -t cgroup -o cpuset,memory hier1 /sys/fs/cgroup/rg1
+
+While remounting cgroups is currently supported, it is not recommend
+to use it. Remounting allows changing bound subsystems and
+release_agent. Rebinding is hardly useful as it only works when the
+hierarchy is empty and release_agent itself should be replaced with
+conventional fsnotify. The support for remounting will be removed in
+the future.
+
+To Specify a hierarchy's release_agent:
+# mount -t cgroup -o cpuset,release_agent="/sbin/cpuset_release_agent" \
+ xxx /sys/fs/cgroup/rg1
+
+Note that specifying 'release_agent' more than once will return failure.
+
+Note that changing the set of subsystems is currently only supported
+when the hierarchy consists of a single (root) cgroup. Supporting
+the ability to arbitrarily bind/unbind subsystems from an existing
+cgroup hierarchy is intended to be implemented in the future.
+
+Then under /sys/fs/cgroup/rg1 you can find a tree that corresponds to the
+tree of the cgroups in the system. For instance, /sys/fs/cgroup/rg1
+is the cgroup that holds the whole system.
+
+If you want to change the value of release_agent:
+# echo "/sbin/new_release_agent" > /sys/fs/cgroup/rg1/release_agent
+
+It can also be changed via remount.
+
+If you want to create a new cgroup under /sys/fs/cgroup/rg1:
+# cd /sys/fs/cgroup/rg1
+# mkdir my_cgroup
+
+Now you want to do something with this cgroup.
+# cd my_cgroup
+
+In this directory you can find several files:
+# ls
+cgroup.procs notify_on_release tasks
+(plus whatever files added by the attached subsystems)
+
+Now attach your shell to this cgroup:
+# /bin/echo $$ > tasks
+
+You can also create cgroups inside your cgroup by using mkdir in this
+directory.
+# mkdir my_sub_cs
+
+To remove a cgroup, just use rmdir:
+# rmdir my_sub_cs
+
+This will fail if the cgroup is in use (has cgroups inside, or
+has processes attached, or is held alive by other subsystem-specific
+reference).
+
+2.2 Attaching processes
+-----------------------
+
+# /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another:
+
+# /bin/echo PID1 > tasks
+# /bin/echo PID2 > tasks
+ ...
+# /bin/echo PIDn > tasks
+
+You can attach the current shell task by echoing 0:
+
+# echo 0 > tasks
+
+You can use the cgroup.procs file instead of the tasks file to move all
+threads in a threadgroup at once. Echoing the PID of any task in a
+threadgroup to cgroup.procs causes all tasks in that threadgroup to be
+attached to the cgroup. Writing 0 to cgroup.procs moves all tasks
+in the writing task's threadgroup.
+
+Note: Since every task is always a member of exactly one cgroup in each
+mounted hierarchy, to remove a task from its current cgroup you must
+move it into a new cgroup (possibly the root cgroup) by writing to the
+new cgroup's tasks file.
+
+Note: Due to some restrictions enforced by some cgroup subsystems, moving
+a process to another cgroup can fail.
+
+2.3 Mounting hierarchies by name
+--------------------------------
+
+Passing the name=<x> option when mounting a cgroups hierarchy
+associates the given name with the hierarchy. This can be used when
+mounting a pre-existing hierarchy, in order to refer to it by name
+rather than by its set of active subsystems. Each hierarchy is either
+nameless, or has a unique name.
+
+The name should match [\w.-]+
+
+When passing a name=<x> option for a new hierarchy, you need to
+specify subsystems manually; the legacy behaviour of mounting all
+subsystems when none are explicitly specified is not supported when
+you give a subsystem a name.
+
+The name of the subsystem appears as part of the hierarchy description
+in /proc/mounts and /proc/<pid>/cgroups.
+
+
+3. Kernel API
+=============
+
+3.1 Overview
+------------
+
+Each kernel subsystem that wants to hook into the generic cgroup
+system needs to create a cgroup_subsys object. This contains
+various methods, which are callbacks from the cgroup system, along
+with a subsystem ID which will be assigned by the cgroup system.
+
+Other fields in the cgroup_subsys object include:
+
+- subsys_id: a unique array index for the subsystem, indicating which
+ entry in cgroup->subsys[] this subsystem should be managing.
+
+- name: should be initialized to a unique subsystem name. Should be
+ no longer than MAX_CGROUP_TYPE_NAMELEN.
+
+- early_init: indicate if the subsystem needs early initialization
+ at system boot.
+
+Each cgroup object created by the system has an array of pointers,
+indexed by subsystem ID; this pointer is entirely managed by the
+subsystem; the generic cgroup code will never touch this pointer.
+
+3.2 Synchronization
+-------------------
+
+There is a global mutex, cgroup_mutex, used by the cgroup
+system. This should be taken by anything that wants to modify a
+cgroup. It may also be taken to prevent cgroups from being
+modified, but more specific locks may be more appropriate in that
+situation.
+
+See kernel/cgroup.c for more details.
+
+Subsystems can take/release the cgroup_mutex via the functions
+cgroup_lock()/cgroup_unlock().
+
+Accessing a task's cgroup pointer may be done in the following ways:
+- while holding cgroup_mutex
+- while holding the task's alloc_lock (via task_lock())
+- inside an rcu_read_lock() section via rcu_dereference()
+
+3.3 Subsystem API
+-----------------
+
+Each subsystem should:
+
+- add an entry in linux/cgroup_subsys.h
+- define a cgroup_subsys object called <name>_subsys
+
+If a subsystem can be compiled as a module, it should also have in its
+module initcall a call to cgroup_load_subsys(), and in its exitcall a
+call to cgroup_unload_subsys(). It should also set its_subsys.module =
+THIS_MODULE in its .c file.
+
+Each subsystem may export the following methods. The only mandatory
+methods are css_alloc/free. Any others that are null are presumed to
+be successful no-ops.
+
+struct cgroup_subsys_state *css_alloc(struct cgroup *cgrp)
+(cgroup_mutex held by caller)
+
+Called to allocate a subsystem state object for a cgroup. The
+subsystem should allocate its subsystem state object for the passed
+cgroup, returning a pointer to the new object on success or a
+ERR_PTR() value. On success, the subsystem pointer should point to
+a structure of type cgroup_subsys_state (typically embedded in a
+larger subsystem-specific object), which will be initialized by the
+cgroup system. Note that this will be called at initialization to
+create the root subsystem state for this subsystem; this case can be
+identified by the passed cgroup object having a NULL parent (since
+it's the root of the hierarchy) and may be an appropriate place for
+initialization code.
+
+int css_online(struct cgroup *cgrp)
+(cgroup_mutex held by caller)
+
+Called after @cgrp successfully completed all allocations and made
+visible to cgroup_for_each_child/descendant_*() iterators. The
+subsystem may choose to fail creation by returning -errno. This
+callback can be used to implement reliable state sharing and
+propagation along the hierarchy. See the comment on
+cgroup_for_each_descendant_pre() for details.
+
+void css_offline(struct cgroup *cgrp);
+(cgroup_mutex held by caller)
+
+This is the counterpart of css_online() and called iff css_online()
+has succeeded on @cgrp. This signifies the beginning of the end of
+@cgrp. @cgrp is being removed and the subsystem should start dropping
+all references it's holding on @cgrp. When all references are dropped,
+cgroup removal will proceed to the next step - css_free(). After this
+callback, @cgrp should be considered dead to the subsystem.
+
+void css_free(struct cgroup *cgrp)
+(cgroup_mutex held by caller)
+
+The cgroup system is about to free @cgrp; the subsystem should free
+its subsystem state object. By the time this method is called, @cgrp
+is completely unused; @cgrp->parent is still valid. (Note - can also
+be called for a newly-created cgroup if an error occurs after this
+subsystem's create() method has been called for the new cgroup).
+
+int can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+(cgroup_mutex held by caller)
+
+Called prior to moving one or more tasks into a cgroup; if the
+subsystem returns an error, this will abort the attach operation.
+@tset contains the tasks to be attached and is guaranteed to have at
+least one task in it.
+
+If there are multiple tasks in the taskset, then:
+ - it's guaranteed that all are from the same thread group
+ - @tset contains all tasks from the thread group whether or not
+ they're switching cgroups
+ - the first task is the leader
+
+Each @tset entry also contains the task's old cgroup and tasks which
+aren't switching cgroup can be skipped easily using the
+cgroup_taskset_for_each() iterator. Note that this isn't called on a
+fork. If this method returns 0 (success) then this should remain valid
+while the caller holds cgroup_mutex and it is ensured that either
+attach() or cancel_attach() will be called in future.
+
+void css_reset(struct cgroup_subsys_state *css)
+(cgroup_mutex held by caller)
+
+An optional operation which should restore @css's configuration to the
+initial state. This is currently only used on the unified hierarchy
+when a subsystem is disabled on a cgroup through
+"cgroup.subtree_control" but should remain enabled because other
+subsystems depend on it. cgroup core makes such a css invisible by
+removing the associated interface files and invokes this callback so
+that the hidden subsystem can return to the initial neutral state.
+This prevents unexpected resource control from a hidden css and
+ensures that the configuration is in the initial state when it is made
+visible again later.
+
+void cancel_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+(cgroup_mutex held by caller)
+
+Called when a task attach operation has failed after can_attach() has succeeded.
+A subsystem whose can_attach() has some side-effects should provide this
+function, so that the subsystem can implement a rollback. If not, not necessary.
+This will be called only about subsystems whose can_attach() operation have
+succeeded. The parameters are identical to can_attach().
+
+void attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+(cgroup_mutex held by caller)
+
+Called after the task has been attached to the cgroup, to allow any
+post-attachment activity that requires memory allocations or blocking.
+The parameters are identical to can_attach().
+
+void fork(struct task_struct *task)
+
+Called when a task is forked into a cgroup.
+
+void exit(struct task_struct *task)
+
+Called during task exit.
+
+void bind(struct cgroup *root)
+(cgroup_mutex held by caller)
+
+Called when a cgroup subsystem is rebound to a different hierarchy
+and root cgroup. Currently this will only involve movement between
+the default hierarchy (which never has sub-cgroups) and a hierarchy
+that is being created/destroyed (and hence has no sub-cgroups).
+
+4. Extended attribute usage
+===========================
+
+cgroup filesystem supports certain types of extended attributes in its
+directories and files. The current supported types are:
+ - Trusted (XATTR_TRUSTED)
+ - Security (XATTR_SECURITY)
+
+Both require CAP_SYS_ADMIN capability to set.
+
+Like in tmpfs, the extended attributes in cgroup filesystem are stored
+using kernel memory and it's advised to keep the usage at minimum. This
+is the reason why user defined extended attributes are not supported, since
+any user can do it and there's no limit in the value size.
+
+The current known users for this feature are SELinux to limit cgroup usage
+in containers and systemd for assorted meta data like main PID in a cgroup
+(systemd creates a cgroup per service).
+
+5. Questions
+============
+
+Q: what's up with this '/bin/echo' ?
+A: bash's builtin 'echo' command does not check calls to write() against
+ errors. If you use it in the cgroup file system, you won't be
+ able to tell whether a command succeeded or failed.
+
+Q: When I attach processes, only the first of the line gets really attached !
+A: We can only return one error code per call to write(). So you should also
+ put only ONE PID.
+
diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt
new file mode 100644
index 000000000..9d73cc0ca
--- /dev/null
+++ b/Documentation/cgroups/cpuacct.txt
@@ -0,0 +1,49 @@
+CPU Accounting Controller
+-------------------------
+
+The CPU accounting controller is used to group tasks using cgroups and
+account the CPU usage of these groups of tasks.
+
+The CPU accounting controller supports multi-hierarchy groups. An accounting
+group accumulates the CPU usage of all of its child groups and the tasks
+directly present in its group.
+
+Accounting groups can be created by first mounting the cgroup filesystem.
+
+# mount -t cgroup -ocpuacct none /sys/fs/cgroup
+
+With the above step, the initial or the parent accounting group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+/sys/fs/cgroup/cpuacct.usage gives the CPU time (in nanoseconds) obtained
+by this group which is essentially the CPU time obtained by all the tasks
+in the system.
+
+New accounting groups can be created under the parent group /sys/fs/cgroup.
+
+# cd /sys/fs/cgroup
+# mkdir g1
+# echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it. CPU time consumed by this bash and its children
+can be obtained from g1/cpuacct.usage and the same is accumulated in
+/sys/fs/cgroup/cpuacct.usage also.
+
+cpuacct.stat file lists a few statistics which further divide the
+CPU time obtained by the cgroup into user and system times. Currently
+the following statistics are supported:
+
+user: Time spent by tasks of the cgroup in user mode.
+system: Time spent by tasks of the cgroup in kernel mode.
+
+user and system are in USER_HZ unit.
+
+cpuacct controller uses percpu_counter interface to collect user and
+system times. This has two side effects:
+
+- It is theoretically possible to see wrong values for user and system times.
+ This is because percpu_counter_read() on 32bit systems isn't safe
+ against concurrent writes.
+- It is possible to see slightly outdated values for user and system times
+ due to the batch processing nature of percpu_counter.
diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt
new file mode 100644
index 000000000..fdf7dff3f
--- /dev/null
+++ b/Documentation/cgroups/cpusets.txt
@@ -0,0 +1,839 @@
+ CPUSETS
+ -------
+
+Copyright (C) 2004 BULL SA.
+Written by Simon.Derr@bull.net
+
+Portions Copyright (c) 2004-2006 Silicon Graphics, Inc.
+Modified by Paul Jackson <pj@sgi.com>
+Modified by Christoph Lameter <clameter@sgi.com>
+Modified by Paul Menage <menage@google.com>
+Modified by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+
+CONTENTS:
+=========
+
+1. Cpusets
+ 1.1 What are cpusets ?
+ 1.2 Why are cpusets needed ?
+ 1.3 How are cpusets implemented ?
+ 1.4 What are exclusive cpusets ?
+ 1.5 What is memory_pressure ?
+ 1.6 What is memory spread ?
+ 1.7 What is sched_load_balance ?
+ 1.8 What is sched_relax_domain_level ?
+ 1.9 How do I use cpusets ?
+2. Usage Examples and Syntax
+ 2.1 Basic Usage
+ 2.2 Adding/removing cpus
+ 2.3 Setting flags
+ 2.4 Attaching processes
+3. Questions
+4. Contact
+
+1. Cpusets
+==========
+
+1.1 What are cpusets ?
+----------------------
+
+Cpusets provide a mechanism for assigning a set of CPUs and Memory
+Nodes to a set of tasks. In this document "Memory Node" refers to
+an on-line node that contains memory.
+
+Cpusets constrain the CPU and Memory placement of tasks to only
+the resources within a task's current cpuset. They form a nested
+hierarchy visible in a virtual file system. These are the essential
+hooks, beyond what is already present, required to manage dynamic
+job placement on large systems.
+
+Cpusets use the generic cgroup subsystem described in
+Documentation/cgroups/cgroups.txt.
+
+Requests by a task, using the sched_setaffinity(2) system call to
+include CPUs in its CPU affinity mask, and using the mbind(2) and
+set_mempolicy(2) system calls to include Memory Nodes in its memory
+policy, are both filtered through that task's cpuset, filtering out any
+CPUs or Memory Nodes not in that cpuset. The scheduler will not
+schedule a task on a CPU that is not allowed in its cpus_allowed
+vector, and the kernel page allocator will not allocate a page on a
+node that is not allowed in the requesting task's mems_allowed vector.
+
+User level code may create and destroy cpusets by name in the cgroup
+virtual file system, manage the attributes and permissions of these
+cpusets and which CPUs and Memory Nodes are assigned to each cpuset,
+specify and query to which cpuset a task is assigned, and list the
+task pids assigned to a cpuset.
+
+
+1.2 Why are cpusets needed ?
+----------------------------
+
+The management of large computer systems, with many processors (CPUs),
+complex memory cache hierarchies and multiple Memory Nodes having
+non-uniform access times (NUMA) presents additional challenges for
+the efficient scheduling and memory placement of processes.
+
+Frequently more modest sized systems can be operated with adequate
+efficiency just by letting the operating system automatically share
+the available CPU and Memory resources amongst the requesting tasks.
+
+But larger systems, which benefit more from careful processor and
+memory placement to reduce memory access times and contention,
+and which typically represent a larger investment for the customer,
+can benefit from explicitly placing jobs on properly sized subsets of
+the system.
+
+This can be especially valuable on:
+
+ * Web Servers running multiple instances of the same web application,
+ * Servers running different applications (for instance, a web server
+ and a database), or
+ * NUMA systems running large HPC applications with demanding
+ performance characteristics.
+
+These subsets, or "soft partitions" must be able to be dynamically
+adjusted, as the job mix changes, without impacting other concurrently
+executing jobs. The location of the running jobs pages may also be moved
+when the memory locations are changed.
+
+The kernel cpuset patch provides the minimum essential kernel
+mechanisms required to efficiently implement such subsets. It
+leverages existing CPU and Memory Placement facilities in the Linux
+kernel to avoid any additional impact on the critical scheduler or
+memory allocator code.
+
+
+1.3 How are cpusets implemented ?
+---------------------------------
+
+Cpusets provide a Linux kernel mechanism to constrain which CPUs and
+Memory Nodes are used by a process or set of processes.
+
+The Linux kernel already has a pair of mechanisms to specify on which
+CPUs a task may be scheduled (sched_setaffinity) and on which Memory
+Nodes it may obtain memory (mbind, set_mempolicy).
+
+Cpusets extends these two mechanisms as follows:
+
+ - Cpusets are sets of allowed CPUs and Memory Nodes, known to the
+ kernel.
+ - Each task in the system is attached to a cpuset, via a pointer
+ in the task structure to a reference counted cgroup structure.
+ - Calls to sched_setaffinity are filtered to just those CPUs
+ allowed in that task's cpuset.
+ - Calls to mbind and set_mempolicy are filtered to just
+ those Memory Nodes allowed in that task's cpuset.
+ - The root cpuset contains all the systems CPUs and Memory
+ Nodes.
+ - For any cpuset, one can define child cpusets containing a subset
+ of the parents CPU and Memory Node resources.
+ - The hierarchy of cpusets can be mounted at /dev/cpuset, for
+ browsing and manipulation from user space.
+ - A cpuset may be marked exclusive, which ensures that no other
+ cpuset (except direct ancestors and descendants) may contain
+ any overlapping CPUs or Memory Nodes.
+ - You can list all the tasks (by pid) attached to any cpuset.
+
+The implementation of cpusets requires a few, simple hooks
+into the rest of the kernel, none in performance critical paths:
+
+ - in init/main.c, to initialize the root cpuset at system boot.
+ - in fork and exit, to attach and detach a task from its cpuset.
+ - in sched_setaffinity, to mask the requested CPUs by what's
+ allowed in that task's cpuset.
+ - in sched.c migrate_live_tasks(), to keep migrating tasks within
+ the CPUs allowed by their cpuset, if possible.
+ - in the mbind and set_mempolicy system calls, to mask the requested
+ Memory Nodes by what's allowed in that task's cpuset.
+ - in page_alloc.c, to restrict memory to allowed nodes.
+ - in vmscan.c, to restrict page recovery to the current cpuset.
+
+You should mount the "cgroup" filesystem type in order to enable
+browsing and modifying the cpusets presently known to the kernel. No
+new system calls are added for cpusets - all support for querying and
+modifying cpusets is via this cpuset file system.
+
+The /proc/<pid>/status file for each task has four added lines,
+displaying the task's cpus_allowed (on which CPUs it may be scheduled)
+and mems_allowed (on which Memory Nodes it may obtain memory),
+in the two formats seen in the following example:
+
+ Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff
+ Cpus_allowed_list: 0-127
+ Mems_allowed: ffffffff,ffffffff
+ Mems_allowed_list: 0-63
+
+Each cpuset is represented by a directory in the cgroup file system
+containing (on top of the standard cgroup files) the following
+files describing that cpuset:
+
+ - cpuset.cpus: list of CPUs in that cpuset
+ - cpuset.mems: list of Memory Nodes in that cpuset
+ - cpuset.memory_migrate flag: if set, move pages to cpusets nodes
+ - cpuset.cpu_exclusive flag: is cpu placement exclusive?
+ - cpuset.mem_exclusive flag: is memory placement exclusive?
+ - cpuset.mem_hardwall flag: is memory allocation hardwalled
+ - cpuset.memory_pressure: measure of how much paging pressure in cpuset
+ - cpuset.memory_spread_page flag: if set, spread page cache evenly on allowed nodes
+ - cpuset.memory_spread_slab flag: if set, spread slab cache evenly on allowed nodes
+ - cpuset.sched_load_balance flag: if set, load balance within CPUs on that cpuset
+ - cpuset.sched_relax_domain_level: the searching range when migrating tasks
+
+In addition, only the root cpuset has the following file:
+ - cpuset.memory_pressure_enabled flag: compute memory_pressure?
+
+New cpusets are created using the mkdir system call or shell
+command. The properties of a cpuset, such as its flags, allowed
+CPUs and Memory Nodes, and attached tasks, are modified by writing
+to the appropriate file in that cpusets directory, as listed above.
+
+The named hierarchical structure of nested cpusets allows partitioning
+a large system into nested, dynamically changeable, "soft-partitions".
+
+The attachment of each task, automatically inherited at fork by any
+children of that task, to a cpuset allows organizing the work load
+on a system into related sets of tasks such that each set is constrained
+to using the CPUs and Memory Nodes of a particular cpuset. A task
+may be re-attached to any other cpuset, if allowed by the permissions
+on the necessary cpuset file system directories.
+
+Such management of a system "in the large" integrates smoothly with
+the detailed placement done on individual tasks and memory regions
+using the sched_setaffinity, mbind and set_mempolicy system calls.
+
+The following rules apply to each cpuset:
+
+ - Its CPUs and Memory Nodes must be a subset of its parents.
+ - It can't be marked exclusive unless its parent is.
+ - If its cpu or memory is exclusive, they may not overlap any sibling.
+
+These rules, and the natural hierarchy of cpusets, enable efficient
+enforcement of the exclusive guarantee, without having to scan all
+cpusets every time any of them change to ensure nothing overlaps a
+exclusive cpuset. Also, the use of a Linux virtual file system (vfs)
+to represent the cpuset hierarchy provides for a familiar permission
+and name space for cpusets, with a minimum of additional kernel code.
+
+The cpus and mems files in the root (top_cpuset) cpuset are
+read-only. The cpus file automatically tracks the value of
+cpu_online_mask using a CPU hotplug notifier, and the mems file
+automatically tracks the value of node_states[N_MEMORY]--i.e.,
+nodes with memory--using the cpuset_track_online_nodes() hook.
+
+
+1.4 What are exclusive cpusets ?
+--------------------------------
+
+If a cpuset is cpu or mem exclusive, no other cpuset, other than
+a direct ancestor or descendant, may share any of the same CPUs or
+Memory Nodes.
+
+A cpuset that is cpuset.mem_exclusive *or* cpuset.mem_hardwall is "hardwalled",
+i.e. it restricts kernel allocations for page, buffer and other data
+commonly shared by the kernel across multiple users. All cpusets,
+whether hardwalled or not, restrict allocations of memory for user
+space. This enables configuring a system so that several independent
+jobs can share common kernel data, such as file system pages, while
+isolating each job's user allocation in its own cpuset. To do this,
+construct a large mem_exclusive cpuset to hold all the jobs, and
+construct child, non-mem_exclusive cpusets for each individual job.
+Only a small amount of typical kernel memory, such as requests from
+interrupt handlers, is allowed to be taken outside even a
+mem_exclusive cpuset.
+
+
+1.5 What is memory_pressure ?
+-----------------------------
+The memory_pressure of a cpuset provides a simple per-cpuset metric
+of the rate that the tasks in a cpuset are attempting to free up in
+use memory on the nodes of the cpuset to satisfy additional memory
+requests.
+
+This enables batch managers monitoring jobs running in dedicated
+cpusets to efficiently detect what level of memory pressure that job
+is causing.
+
+This is useful both on tightly managed systems running a wide mix of
+submitted jobs, which may choose to terminate or re-prioritize jobs that
+are trying to use more memory than allowed on the nodes assigned to them,
+and with tightly coupled, long running, massively parallel scientific
+computing jobs that will dramatically fail to meet required performance
+goals if they start to use more memory than allowed to them.
+
+This mechanism provides a very economical way for the batch manager
+to monitor a cpuset for signs of memory pressure. It's up to the
+batch manager or other user code to decide what to do about it and
+take action.
+
+==> Unless this feature is enabled by writing "1" to the special file
+ /dev/cpuset/memory_pressure_enabled, the hook in the rebalance
+ code of __alloc_pages() for this metric reduces to simply noticing
+ that the cpuset_memory_pressure_enabled flag is zero. So only
+ systems that enable this feature will compute the metric.
+
+Why a per-cpuset, running average:
+
+ Because this meter is per-cpuset, rather than per-task or mm,
+ the system load imposed by a batch scheduler monitoring this
+ metric is sharply reduced on large systems, because a scan of
+ the tasklist can be avoided on each set of queries.
+
+ Because this meter is a running average, instead of an accumulating
+ counter, a batch scheduler can detect memory pressure with a
+ single read, instead of having to read and accumulate results
+ for a period of time.
+
+ Because this meter is per-cpuset rather than per-task or mm,
+ the batch scheduler can obtain the key information, memory
+ pressure in a cpuset, with a single read, rather than having to
+ query and accumulate results over all the (dynamically changing)
+ set of tasks in the cpuset.
+
+A per-cpuset simple digital filter (requires a spinlock and 3 words
+of data per-cpuset) is kept, and updated by any task attached to that
+cpuset, if it enters the synchronous (direct) page reclaim code.
+
+A per-cpuset file provides an integer number representing the recent
+(half-life of 10 seconds) rate of direct page reclaims caused by
+the tasks in the cpuset, in units of reclaims attempted per second,
+times 1000.
+
+
+1.6 What is memory spread ?
+---------------------------
+There are two boolean flag files per cpuset that control where the
+kernel allocates pages for the file system buffers and related in
+kernel data structures. They are called 'cpuset.memory_spread_page' and
+'cpuset.memory_spread_slab'.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_page' is set, then
+the kernel will spread the file system buffers (page cache) evenly
+over all the nodes that the faulting task is allowed to use, instead
+of preferring to put those pages on the node where the task is running.
+
+If the per-cpuset boolean flag file 'cpuset.memory_spread_slab' is set,
+then the kernel will spread some file system related slab caches,
+such as for inodes and dentries evenly over all the nodes that the
+faulting task is allowed to use, instead of preferring to put those
+pages on the node where the task is running.
+
+The setting of these flags does not affect anonymous data segment or
+stack segment pages of a task.
+
+By default, both kinds of memory spreading are off, and memory
+pages are allocated on the node local to where the task is running,
+except perhaps as modified by the task's NUMA mempolicy or cpuset
+configuration, so long as sufficient free memory pages are available.
+
+When new cpusets are created, they inherit the memory spread settings
+of their parent.
+
+Setting memory spreading causes allocations for the affected page
+or slab caches to ignore the task's NUMA mempolicy and be spread
+instead. Tasks using mbind() or set_mempolicy() calls to set NUMA
+mempolicies will not notice any change in these calls as a result of
+their containing task's memory spread settings. If memory spreading
+is turned off, then the currently specified NUMA mempolicy once again
+applies to memory page allocations.
+
+Both 'cpuset.memory_spread_page' and 'cpuset.memory_spread_slab' are boolean flag
+files. By default they contain "0", meaning that the feature is off
+for that cpuset. If a "1" is written to that file, then that turns
+the named feature on.
+
+The implementation is simple.
+
+Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag
+PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently
+joins that cpuset. The page allocation calls for the page cache
+is modified to perform an inline check for this PFA_SPREAD_PAGE task
+flag, and if set, a call to a new routine cpuset_mem_spread_node()
+returns the node to prefer for the allocation.
+
+Similarly, setting 'cpuset.memory_spread_slab' turns on the flag
+PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate
+pages from the node returned by cpuset_mem_spread_node().
+
+The cpuset_mem_spread_node() routine is also simple. It uses the
+value of a per-task rotor cpuset_mem_spread_rotor to select the next
+node in the current task's mems_allowed to prefer for the allocation.
+
+This memory placement policy is also known (in other contexts) as
+round-robin or interleave.
+
+This policy can provide substantial improvements for jobs that need
+to place thread local data on the corresponding node, but that need
+to access large file system data sets that need to be spread across
+the several nodes in the jobs cpuset in order to fit. Without this
+policy, especially for jobs that might have one thread reading in the
+data set, the memory allocation across the nodes in the jobs cpuset
+can become very uneven.
+
+1.7 What is sched_load_balance ?
+--------------------------------
+
+The kernel scheduler (kernel/sched/core.c) automatically load balances
+tasks. If one CPU is underutilized, kernel code running on that
+CPU will look for tasks on other more overloaded CPUs and move those
+tasks to itself, within the constraints of such placement mechanisms
+as cpusets and sched_setaffinity.
+
+The algorithmic cost of load balancing and its impact on key shared
+kernel data structures such as the task list increases more than
+linearly with the number of CPUs being balanced. So the scheduler
+has support to partition the systems CPUs into a number of sched
+domains such that it only load balances within each sched domain.
+Each sched domain covers some subset of the CPUs in the system;
+no two sched domains overlap; some CPUs might not be in any sched
+domain and hence won't be load balanced.
+
+Put simply, it costs less to balance between two smaller sched domains
+than one big one, but doing so means that overloads in one of the
+two domains won't be load balanced to the other one.
+
+By default, there is one sched domain covering all CPUs, including those
+marked isolated using the kernel boot time "isolcpus=" argument. However,
+the isolated CPUs will not participate in load balancing, and will not
+have tasks running on them unless explicitly assigned.
+
+This default load balancing across all CPUs is not well suited for
+the following two situations:
+ 1) On large systems, load balancing across many CPUs is expensive.
+ If the system is managed using cpusets to place independent jobs
+ on separate sets of CPUs, full load balancing is unnecessary.
+ 2) Systems supporting realtime on some CPUs need to minimize
+ system overhead on those CPUs, including avoiding task load
+ balancing if that is not needed.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is enabled (the default
+setting), it requests that all the CPUs in that cpusets allowed 'cpuset.cpus'
+be contained in a single sched domain, ensuring that load balancing
+can move a task (not otherwised pinned, as by sched_setaffinity)
+from any CPU in that cpuset to any other.
+
+When the per-cpuset flag "cpuset.sched_load_balance" is disabled, then the
+scheduler will avoid load balancing across the CPUs in that cpuset,
+--except-- in so far as is necessary because some overlapping cpuset
+has "sched_load_balance" enabled.
+
+So, for example, if the top cpuset has the flag "cpuset.sched_load_balance"
+enabled, then the scheduler will have one sched domain covering all
+CPUs, and the setting of the "cpuset.sched_load_balance" flag in any other
+cpusets won't matter, as we're already fully load balancing.
+
+Therefore in the above two situations, the top cpuset flag
+"cpuset.sched_load_balance" should be disabled, and only some of the smaller,
+child cpusets have this flag enabled.
+
+When doing this, you don't usually want to leave any unpinned tasks in
+the top cpuset that might use non-trivial amounts of CPU, as such tasks
+may be artificially constrained to some subset of CPUs, depending on
+the particulars of this flag setting in descendant cpusets. Even if
+such a task could use spare CPU cycles in some other CPUs, the kernel
+scheduler might not consider the possibility of load balancing that
+task to that underused CPU.
+
+Of course, tasks pinned to a particular CPU can be left in a cpuset
+that disables "cpuset.sched_load_balance" as those tasks aren't going anywhere
+else anyway.
+
+There is an impedance mismatch here, between cpusets and sched domains.
+Cpusets are hierarchical and nest. Sched domains are flat; they don't
+overlap and each CPU is in at most one sched domain.
+
+It is necessary for sched domains to be flat because load balancing
+across partially overlapping sets of CPUs would risk unstable dynamics
+that would be beyond our understanding. So if each of two partially
+overlapping cpusets enables the flag 'cpuset.sched_load_balance', then we
+form a single sched domain that is a superset of both. We won't move
+a task to a CPU outside its cpuset, but the scheduler load balancing
+code might waste some compute cycles considering that possibility.
+
+This mismatch is why there is not a simple one-to-one relation
+between which cpusets have the flag "cpuset.sched_load_balance" enabled,
+and the sched domain configuration. If a cpuset enables the flag, it
+will get balancing across all its CPUs, but if it disables the flag,
+it will only be assured of no load balancing if no other overlapping
+cpuset enables the flag.
+
+If two cpusets have partially overlapping 'cpuset.cpus' allowed, and only
+one of them has this flag enabled, then the other may find its
+tasks only partially load balanced, just on the overlapping CPUs.
+This is just the general case of the top_cpuset example given a few
+paragraphs above. In the general case, as in the top cpuset case,
+don't leave tasks that might use non-trivial amounts of CPU in
+such partially load balanced cpusets, as they may be artificially
+constrained to some subset of the CPUs allowed to them, for lack of
+load balancing to the other CPUs.
+
+CPUs in "cpuset.isolcpus" were excluded from load balancing by the
+isolcpus= kernel boot option, and will never be load balanced regardless
+of the value of "cpuset.sched_load_balance" in any cpuset.
+
+1.7.1 sched_load_balance implementation details.
+------------------------------------------------
+
+The per-cpuset flag 'cpuset.sched_load_balance' defaults to enabled (contrary
+to most cpuset flags.) When enabled for a cpuset, the kernel will
+ensure that it can load balance across all the CPUs in that cpuset
+(makes sure that all the CPUs in the cpus_allowed of that cpuset are
+in the same sched domain.)
+
+If two overlapping cpusets both have 'cpuset.sched_load_balance' enabled,
+then they will be (must be) both in the same sched domain.
+
+If, as is the default, the top cpuset has 'cpuset.sched_load_balance' enabled,
+then by the above that means there is a single sched domain covering
+the whole system, regardless of any other cpuset settings.
+
+The kernel commits to user space that it will avoid load balancing
+where it can. It will pick as fine a granularity partition of sched
+domains as it can while still providing load balancing for any set
+of CPUs allowed to a cpuset having 'cpuset.sched_load_balance' enabled.
+
+The internal kernel cpuset to scheduler interface passes from the
+cpuset code to the scheduler code a partition of the load balanced
+CPUs in the system. This partition is a set of subsets (represented
+as an array of struct cpumask) of CPUs, pairwise disjoint, that cover
+all the CPUs that must be load balanced.
+
+The cpuset code builds a new such partition and passes it to the
+scheduler sched domain setup code, to have the sched domains rebuilt
+as necessary, whenever:
+ - the 'cpuset.sched_load_balance' flag of a cpuset with non-empty CPUs changes,
+ - or CPUs come or go from a cpuset with this flag enabled,
+ - or 'cpuset.sched_relax_domain_level' value of a cpuset with non-empty CPUs
+ and with this flag enabled changes,
+ - or a cpuset with non-empty CPUs and with this flag enabled is removed,
+ - or a cpu is offlined/onlined.
+
+This partition exactly defines what sched domains the scheduler should
+setup - one sched domain for each element (struct cpumask) in the
+partition.
+
+The scheduler remembers the currently active sched domain partitions.
+When the scheduler routine partition_sched_domains() is invoked from
+the cpuset code to update these sched domains, it compares the new
+partition requested with the current, and updates its sched domains,
+removing the old and adding the new, for each change.
+
+
+1.8 What is sched_relax_domain_level ?
+--------------------------------------
+
+In sched domain, the scheduler migrates tasks in 2 ways; periodic load
+balance on tick, and at time of some schedule events.
+
+When a task is woken up, scheduler try to move the task on idle CPU.
+For example, if a task A running on CPU X activates another task B
+on the same CPU X, and if CPU Y is X's sibling and performing idle,
+then scheduler migrate task B to CPU Y so that task B can start on
+CPU Y without waiting task A on CPU X.
+
+And if a CPU run out of tasks in its runqueue, the CPU try to pull
+extra tasks from other busy CPUs to help them before it is going to
+be idle.
+
+Of course it takes some searching cost to find movable tasks and/or
+idle CPUs, the scheduler might not search all CPUs in the domain
+every time. In fact, in some architectures, the searching ranges on
+events are limited in the same socket or node where the CPU locates,
+while the load balance on tick searches all.
+
+For example, assume CPU Z is relatively far from CPU X. Even if CPU Z
+is idle while CPU X and the siblings are busy, scheduler can't migrate
+woken task B from X to Z since it is out of its searching range.
+As the result, task B on CPU X need to wait task A or wait load balance
+on the next tick. For some applications in special situation, waiting
+1 tick may be too long.
+
+The 'cpuset.sched_relax_domain_level' file allows you to request changing
+this searching range as you like. This file takes int value which
+indicates size of searching range in levels ideally as follows,
+otherwise initial value -1 that indicates the cpuset has no request.
+
+ -1 : no request. use system default or follow request of others.
+ 0 : no search.
+ 1 : search siblings (hyperthreads in a core).
+ 2 : search cores in a package.
+ 3 : search cpus in a node [= system wide on non-NUMA system]
+ 4 : search nodes in a chunk of node [on NUMA system]
+ 5 : search system wide [on NUMA system]
+
+The system default is architecture dependent. The system default
+can be changed using the relax_domain_level= boot parameter.
+
+This file is per-cpuset and affect the sched domain where the cpuset
+belongs to. Therefore if the flag 'cpuset.sched_load_balance' of a cpuset
+is disabled, then 'cpuset.sched_relax_domain_level' have no effect since
+there is no sched domain belonging the cpuset.
+
+If multiple cpusets are overlapping and hence they form a single sched
+domain, the largest value among those is used. Be careful, if one
+requests 0 and others are -1 then 0 is used.
+
+Note that modifying this file will have both good and bad effects,
+and whether it is acceptable or not depends on your situation.
+Don't modify this file if you are not sure.
+
+If your situation is:
+ - The migration costs between each cpu can be assumed considerably
+ small(for you) due to your special application's behavior or
+ special hardware support for CPU cache etc.
+ - The searching cost doesn't have impact(for you) or you can make
+ the searching cost enough small by managing cpuset to compact etc.
+ - The latency is required even it sacrifices cache hit rate etc.
+then increasing 'sched_relax_domain_level' would benefit you.
+
+
+1.9 How do I use cpusets ?
+--------------------------
+
+In order to minimize the impact of cpusets on critical kernel
+code, such as the scheduler, and due to the fact that the kernel
+does not support one task updating the memory placement of another
+task directly, the impact on a task of changing its cpuset CPU
+or Memory Node placement, or of changing to which cpuset a task
+is attached, is subtle.
+
+If a cpuset has its Memory Nodes modified, then for each task attached
+to that cpuset, the next time that the kernel attempts to allocate
+a page of memory for that task, the kernel will notice the change
+in the task's cpuset, and update its per-task memory placement to
+remain within the new cpusets memory placement. If the task was using
+mempolicy MPOL_BIND, and the nodes to which it was bound overlap with
+its new cpuset, then the task will continue to use whatever subset
+of MPOL_BIND nodes are still allowed in the new cpuset. If the task
+was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed
+in the new cpuset, then the task will be essentially treated as if it
+was MPOL_BIND bound to the new cpuset (even though its NUMA placement,
+as queried by get_mempolicy(), doesn't change). If a task is moved
+from one cpuset to another, then the kernel will adjust the task's
+memory placement, as above, the next time that the kernel attempts
+to allocate a page of memory for that task.
+
+If a cpuset has its 'cpuset.cpus' modified, then each task in that cpuset
+will have its allowed CPU placement changed immediately. Similarly,
+if a task's pid is written to another cpusets 'cpuset.tasks' file, then its
+allowed CPU placement is changed immediately. If such a task had been
+bound to some subset of its cpuset using the sched_setaffinity() call,
+the task will be allowed to run on any CPU allowed in its new cpuset,
+negating the effect of the prior sched_setaffinity() call.
+
+In summary, the memory placement of a task whose cpuset is changed is
+updated by the kernel, on the next allocation of a page for that task,
+and the processor placement is updated immediately.
+
+Normally, once a page is allocated (given a physical page
+of main memory) then that page stays on whatever node it
+was allocated, so long as it remains allocated, even if the
+cpusets memory placement policy 'cpuset.mems' subsequently changes.
+If the cpuset flag file 'cpuset.memory_migrate' is set true, then when
+tasks are attached to that cpuset, any pages that task had
+allocated to it on nodes in its previous cpuset are migrated
+to the task's new cpuset. The relative placement of the page within
+the cpuset is preserved during these migration operations if possible.
+For example if the page was on the second valid node of the prior cpuset
+then the page will be placed on the second valid node of the new cpuset.
+
+Also if 'cpuset.memory_migrate' is set true, then if that cpuset's
+'cpuset.mems' file is modified, pages allocated to tasks in that
+cpuset, that were on nodes in the previous setting of 'cpuset.mems',
+will be moved to nodes in the new setting of 'mems.'
+Pages that were not in the task's prior cpuset, or in the cpuset's
+prior 'cpuset.mems' setting, will not be moved.
+
+There is an exception to the above. If hotplug functionality is used
+to remove all the CPUs that are currently assigned to a cpuset,
+then all the tasks in that cpuset will be moved to the nearest ancestor
+with non-empty cpus. But the moving of some (or all) tasks might fail if
+cpuset is bound with another cgroup subsystem which has some restrictions
+on task attaching. In this failing case, those tasks will stay
+in the original cpuset, and the kernel will automatically update
+their cpus_allowed to allow all online CPUs. When memory hotplug
+functionality for removing Memory Nodes is available, a similar exception
+is expected to apply there as well. In general, the kernel prefers to
+violate cpuset placement, over starving a task that has had all
+its allowed CPUs or Memory Nodes taken offline.
+
+There is a second exception to the above. GFP_ATOMIC requests are
+kernel internal allocations that must be satisfied, immediately.
+The kernel may drop some request, in rare cases even panic, if a
+GFP_ATOMIC alloc fails. If the request cannot be satisfied within
+the current task's cpuset, then we relax the cpuset, and look for
+memory anywhere we can find it. It's better to violate the cpuset
+than stress the kernel.
+
+To start a new job that is to be contained within a cpuset, the steps are:
+
+ 1) mkdir /sys/fs/cgroup/cpuset
+ 2) mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ 3) Create the new cpuset by doing mkdir's and write's (or echo's) in
+ the /sys/fs/cgroup/cpuset virtual file system.
+ 4) Start a task that will be the "founding father" of the new job.
+ 5) Attach that task to the new cpuset by writing its pid to the
+ /sys/fs/cgroup/cpuset tasks file for that cpuset.
+ 6) fork, exec or clone the job tasks from this founding father task.
+
+For example, the following sequence of commands will setup a cpuset
+named "Charlie", containing just CPUs 2 and 3, and Memory Node 1,
+and then start a subshell 'sh' in that cpuset:
+
+ mount -t cgroup -ocpuset cpuset /sys/fs/cgroup/cpuset
+ cd /sys/fs/cgroup/cpuset
+ mkdir Charlie
+ cd Charlie
+ /bin/echo 2-3 > cpuset.cpus
+ /bin/echo 1 > cpuset.mems
+ /bin/echo $$ > tasks
+ sh
+ # The subshell 'sh' is now running in cpuset Charlie
+ # The next line should display '/Charlie'
+ cat /proc/self/cpuset
+
+There are ways to query or modify cpusets:
+ - via the cpuset file system directly, using the various cd, mkdir, echo,
+ cat, rmdir commands from the shell, or their equivalent from C.
+ - via the C library libcpuset.
+ - via the C library libcgroup.
+ (http://sourceforge.net/projects/libcg/)
+ - via the python application cset.
+ (http://code.google.com/p/cpuset/)
+
+The sched_setaffinity calls can also be done at the shell prompt using
+SGI's runon or Robert Love's taskset. The mbind and set_mempolicy
+calls can be done at the shell prompt using the numactl command
+(part of Andi Kleen's numa package).
+
+2. Usage Examples and Syntax
+============================
+
+2.1 Basic Usage
+---------------
+
+Creating, modifying, using the cpusets can be done through the cpuset
+virtual filesystem.
+
+To mount it, type:
+# mount -t cgroup -o cpuset cpuset /sys/fs/cgroup/cpuset
+
+Then under /sys/fs/cgroup/cpuset you can find a tree that corresponds to the
+tree of the cpusets in the system. For instance, /sys/fs/cgroup/cpuset
+is the cpuset that holds the whole system.
+
+If you want to create a new cpuset under /sys/fs/cgroup/cpuset:
+# cd /sys/fs/cgroup/cpuset
+# mkdir my_cpuset
+
+Now you want to do something with this cpuset.
+# cd my_cpuset
+
+In this directory you can find several files:
+# ls
+cgroup.clone_children cpuset.memory_pressure
+cgroup.event_control cpuset.memory_spread_page
+cgroup.procs cpuset.memory_spread_slab
+cpuset.cpu_exclusive cpuset.mems
+cpuset.cpus cpuset.sched_load_balance
+cpuset.mem_exclusive cpuset.sched_relax_domain_level
+cpuset.mem_hardwall notify_on_release
+cpuset.memory_migrate tasks
+
+Reading them will give you information about the state of this cpuset:
+the CPUs and Memory Nodes it can use, the processes that are using
+it, its properties. By writing to these files you can manipulate
+the cpuset.
+
+Set some flags:
+# /bin/echo 1 > cpuset.cpu_exclusive
+
+Add some cpus:
+# /bin/echo 0-7 > cpuset.cpus
+
+Add some mems:
+# /bin/echo 0-7 > cpuset.mems
+
+Now attach your shell to this cpuset:
+# /bin/echo $$ > tasks
+
+You can also create cpusets inside your cpuset by using mkdir in this
+directory.
+# mkdir my_sub_cs
+
+To remove a cpuset, just use rmdir:
+# rmdir my_sub_cs
+This will fail if the cpuset is in use (has cpusets inside, or has
+processes attached).
+
+Note that for legacy reasons, the "cpuset" filesystem exists as a
+wrapper around the cgroup filesystem.
+
+The command
+
+mount -t cpuset X /sys/fs/cgroup/cpuset
+
+is equivalent to
+
+mount -t cgroup -ocpuset,noprefix X /sys/fs/cgroup/cpuset
+echo "/sbin/cpuset_release_agent" > /sys/fs/cgroup/cpuset/release_agent
+
+2.2 Adding/removing cpus
+------------------------
+
+This is the syntax to use when writing in the cpus or mems files
+in cpuset directories:
+
+# /bin/echo 1-4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
+# /bin/echo 1,2,3,4 > cpuset.cpus -> set cpus list to cpus 1,2,3,4
+
+To add a CPU to a cpuset, write the new list of CPUs including the
+CPU to be added. To add 6 to the above cpuset:
+
+# /bin/echo 1-4,6 > cpuset.cpus -> set cpus list to cpus 1,2,3,4,6
+
+Similarly to remove a CPU from a cpuset, write the new list of CPUs
+without the CPU to be removed.
+
+To remove all the CPUs:
+
+# /bin/echo "" > cpuset.cpus -> clear cpus list
+
+2.3 Setting flags
+-----------------
+
+The syntax is very simple:
+
+# /bin/echo 1 > cpuset.cpu_exclusive -> set flag 'cpuset.cpu_exclusive'
+# /bin/echo 0 > cpuset.cpu_exclusive -> unset flag 'cpuset.cpu_exclusive'
+
+2.4 Attaching processes
+-----------------------
+
+# /bin/echo PID > tasks
+
+Note that it is PID, not PIDs. You can only attach ONE task at a time.
+If you have several tasks to attach, you have to do it one after another:
+
+# /bin/echo PID1 > tasks
+# /bin/echo PID2 > tasks
+ ...
+# /bin/echo PIDn > tasks
+
+
+3. Questions
+============
+
+Q: what's up with this '/bin/echo' ?
+A: bash's builtin 'echo' command does not check calls to write() against
+ errors. If you use it in the cpuset file system, you won't be
+ able to tell whether a command succeeded or failed.
+
+Q: When I attach processes, only the first of the line gets really attached !
+A: We can only return one error code per call to write(). So you should also
+ put only ONE pid.
+
+4. Contact
+==========
+
+Web: http://www.bullopensource.org/cpuset
diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt
new file mode 100644
index 000000000..3c1095ca0
--- /dev/null
+++ b/Documentation/cgroups/devices.txt
@@ -0,0 +1,116 @@
+Device Whitelist Controller
+
+1. Description:
+
+Implement a cgroup to track and enforce open and mknod restrictions
+on device files. A device cgroup associates a device access
+whitelist with each cgroup. A whitelist entry has 4 fields.
+'type' is a (all), c (char), or b (block). 'all' means it applies
+to all types and all major and minor numbers. Major and minor are
+either an integer or * for all. Access is a composition of r
+(read), w (write), and m (mknod).
+
+The root device cgroup starts with rwm to 'all'. A child device
+cgroup gets a copy of the parent. Administrators can then remove
+devices from the whitelist or add new entries. A child cgroup can
+never receive a device access which is denied by its parent.
+
+2. User Interface
+
+An entry is added using devices.allow, and removed using
+devices.deny. For instance
+
+ echo 'c 1:3 mr' > /sys/fs/cgroup/1/devices.allow
+
+allows cgroup 1 to read and mknod the device usually known as
+/dev/null. Doing
+
+ echo a > /sys/fs/cgroup/1/devices.deny
+
+will remove the default 'a *:* rwm' entry. Doing
+
+ echo a > /sys/fs/cgroup/1/devices.allow
+
+will add the 'a *:* rwm' entry to the whitelist.
+
+3. Security
+
+Any task can move itself between cgroups. This clearly won't
+suffice, but we can decide the best way to adequately restrict
+movement as people get some experience with this. We may just want
+to require CAP_SYS_ADMIN, which at least is a separate bit from
+CAP_MKNOD. We may want to just refuse moving to a cgroup which
+isn't a descendant of the current one. Or we may want to use
+CAP_MAC_ADMIN, since we really are trying to lock down root.
+
+CAP_SYS_ADMIN is needed to modify the whitelist or move another
+task to a new cgroup. (Again we'll probably want to change that).
+
+A cgroup may not be granted more permissions than the cgroup's
+parent has.
+
+4. Hierarchy
+
+device cgroups maintain hierarchy by making sure a cgroup never has more
+access permissions than its parent. Every time an entry is written to
+a cgroup's devices.deny file, all its children will have that entry removed
+from their whitelist and all the locally set whitelist entries will be
+re-evaluated. In case one of the locally set whitelist entries would provide
+more access than the cgroup's parent, it'll be removed from the whitelist.
+
+Example:
+ A
+ / \
+ B
+
+ group behavior exceptions
+ A allow "b 8:* rwm", "c 116:1 rw"
+ B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm"
+
+If a device is denied in group A:
+ # echo "c 116:* r" > A/devices.deny
+it'll propagate down and after revalidating B's entries, the whitelist entry
+"c 116:2 rwm" will be removed:
+
+ group whitelist entries denied devices
+ A all "b 8:* rwm", "c 116:* rw"
+ B "c 1:3 rwm", "b 3:* rwm" all the rest
+
+In case parent's exceptions change and local exceptions are not allowed
+anymore, they'll be deleted.
+
+Notice that new whitelist entries will not be propagated:
+ A
+ / \
+ B
+
+ group whitelist entries denied devices
+ A "c 1:3 rwm", "c 1:5 r" all the rest
+ B "c 1:3 rwm", "c 1:5 r" all the rest
+
+when adding "c *:3 rwm":
+ # echo "c *:3 rwm" >A/devices.allow
+
+the result:
+ group whitelist entries denied devices
+ A "c *:3 rwm", "c 1:5 r" all the rest
+ B "c 1:3 rwm", "c 1:5 r" all the rest
+
+but now it'll be possible to add new entries to B:
+ # echo "c 2:3 rwm" >B/devices.allow
+ # echo "c 50:3 r" >B/devices.allow
+or even
+ # echo "c *:3 rwm" >B/devices.allow
+
+Allowing or denying all by writing 'a' to devices.allow or devices.deny will
+not be possible once the device cgroups has children.
+
+4.1 Hierarchy (internal implementation)
+
+device cgroups is implemented internally using a behavior (ALLOW, DENY) and a
+list of exceptions. The internal state is controlled using the same user
+interface to preserve compatibility with the previous whitelist-only
+implementation. Removal or addition of exceptions that will reduce the access
+to devices will be propagated down the hierarchy.
+For every propagated exception, the effective rules will be re-evaluated based
+on current parent's access rules.
diff --git a/Documentation/cgroups/freezer-subsystem.txt b/Documentation/cgroups/freezer-subsystem.txt
new file mode 100644
index 000000000..c96a72cbb
--- /dev/null
+++ b/Documentation/cgroups/freezer-subsystem.txt
@@ -0,0 +1,123 @@
+The cgroup freezer is useful to batch job management system which start
+and stop sets of tasks in order to schedule the resources of a machine
+according to the desires of a system administrator. This sort of program
+is often used on HPC clusters to schedule access to the cluster as a
+whole. The cgroup freezer uses cgroups to describe the set of tasks to
+be started/stopped by the batch job management system. It also provides
+a means to start and stop the tasks composing the job.
+
+The cgroup freezer will also be useful for checkpointing running groups
+of tasks. The freezer allows the checkpoint code to obtain a consistent
+image of the tasks by attempting to force the tasks in a cgroup into a
+quiescent state. Once the tasks are quiescent another task can
+walk /proc or invoke a kernel interface to gather information about the
+quiesced tasks. Checkpointed tasks can be restarted later should a
+recoverable error occur. This also allows the checkpointed tasks to be
+migrated between nodes in a cluster by copying the gathered information
+to another node and restarting the tasks there.
+
+Sequences of SIGSTOP and SIGCONT are not always sufficient for stopping
+and resuming tasks in userspace. Both of these signals are observable
+from within the tasks we wish to freeze. While SIGSTOP cannot be caught,
+blocked, or ignored it can be seen by waiting or ptracing parent tasks.
+SIGCONT is especially unsuitable since it can be caught by the task. Any
+programs designed to watch for SIGSTOP and SIGCONT could be broken by
+attempting to use SIGSTOP and SIGCONT to stop and resume tasks. We can
+demonstrate this problem using nested bash shells:
+
+ $ echo $$
+ 16644
+ $ bash
+ $ echo $$
+ 16690
+
+ From a second, unrelated bash shell:
+ $ kill -SIGSTOP 16690
+ $ kill -SIGCONT 16690
+
+ <at this point 16690 exits and causes 16644 to exit too>
+
+This happens because bash can observe both signals and choose how it
+responds to them.
+
+Another example of a program which catches and responds to these
+signals is gdb. In fact any program designed to use ptrace is likely to
+have a problem with this method of stopping and resuming tasks.
+
+In contrast, the cgroup freezer uses the kernel freezer code to
+prevent the freeze/unfreeze cycle from becoming visible to the tasks
+being frozen. This allows the bash example above and gdb to run as
+expected.
+
+The cgroup freezer is hierarchical. Freezing a cgroup freezes all
+tasks beloning to the cgroup and all its descendant cgroups. Each
+cgroup has its own state (self-state) and the state inherited from the
+parent (parent-state). Iff both states are THAWED, the cgroup is
+THAWED.
+
+The following cgroupfs files are created by cgroup freezer.
+
+* freezer.state: Read-write.
+
+ When read, returns the effective state of the cgroup - "THAWED",
+ "FREEZING" or "FROZEN". This is the combined self and parent-states.
+ If any is freezing, the cgroup is freezing (FREEZING or FROZEN).
+
+ FREEZING cgroup transitions into FROZEN state when all tasks
+ belonging to the cgroup and its descendants become frozen. Note that
+ a cgroup reverts to FREEZING from FROZEN after a new task is added
+ to the cgroup or one of its descendant cgroups until the new task is
+ frozen.
+
+ When written, sets the self-state of the cgroup. Two values are
+ allowed - "FROZEN" and "THAWED". If FROZEN is written, the cgroup,
+ if not already freezing, enters FREEZING state along with all its
+ descendant cgroups.
+
+ If THAWED is written, the self-state of the cgroup is changed to
+ THAWED. Note that the effective state may not change to THAWED if
+ the parent-state is still freezing. If a cgroup's effective state
+ becomes THAWED, all its descendants which are freezing because of
+ the cgroup also leave the freezing state.
+
+* freezer.self_freezing: Read only.
+
+ Shows the self-state. 0 if the self-state is THAWED; otherwise, 1.
+ This value is 1 iff the last write to freezer.state was "FROZEN".
+
+* freezer.parent_freezing: Read only.
+
+ Shows the parent-state. 0 if none of the cgroup's ancestors is
+ frozen; otherwise, 1.
+
+The root cgroup is non-freezable and the above interface files don't
+exist.
+
+* Examples of usage :
+
+ # mkdir /sys/fs/cgroup/freezer
+ # mount -t cgroup -ofreezer freezer /sys/fs/cgroup/freezer
+ # mkdir /sys/fs/cgroup/freezer/0
+ # echo $some_pid > /sys/fs/cgroup/freezer/0/tasks
+
+to get status of the freezer subsystem :
+
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ THAWED
+
+to freeze all tasks in the container :
+
+ # echo FROZEN > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ FREEZING
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ FROZEN
+
+to unfreeze all tasks in the container :
+
+ # echo THAWED > /sys/fs/cgroup/freezer/0/freezer.state
+ # cat /sys/fs/cgroup/freezer/0/freezer.state
+ THAWED
+
+This is the basic mechanism which should do the right thing for user space task
+in a simple scenario.
diff --git a/Documentation/cgroups/hugetlb.txt b/Documentation/cgroups/hugetlb.txt
new file mode 100644
index 000000000..106245c3a
--- /dev/null
+++ b/Documentation/cgroups/hugetlb.txt
@@ -0,0 +1,45 @@
+HugeTLB Controller
+-------------------
+
+The HugeTLB controller allows to limit the HugeTLB usage per control group and
+enforces the controller limit during page fault. Since HugeTLB doesn't
+support page reclaim, enforcing the limit at page fault time implies that,
+the application will get SIGBUS signal if it tries to access HugeTLB pages
+beyond its limit. This requires the application to know beforehand how much
+HugeTLB pages it would require for its use.
+
+HugeTLB controller can be created by first mounting the cgroup filesystem.
+
+# mount -t cgroup -o hugetlb none /sys/fs/cgroup
+
+With the above step, the initial or the parent HugeTLB group becomes
+visible at /sys/fs/cgroup. At bootup, this group includes all the tasks in
+the system. /sys/fs/cgroup/tasks lists the tasks in this cgroup.
+
+New groups can be created under the parent group /sys/fs/cgroup.
+
+# cd /sys/fs/cgroup
+# mkdir g1
+# echo $$ > g1/tasks
+
+The above steps create a new group g1 and move the current shell
+process (bash) into it.
+
+Brief summary of control files
+
+ hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
+ hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
+ hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
+
+For a system supporting two hugepage size (16M and 16G) the control
+files include:
+
+hugetlb.16GB.limit_in_bytes
+hugetlb.16GB.max_usage_in_bytes
+hugetlb.16GB.usage_in_bytes
+hugetlb.16GB.failcnt
+hugetlb.16MB.limit_in_bytes
+hugetlb.16MB.max_usage_in_bytes
+hugetlb.16MB.usage_in_bytes
+hugetlb.16MB.failcnt
diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt
new file mode 100644
index 000000000..8870b0212
--- /dev/null
+++ b/Documentation/cgroups/memcg_test.txt
@@ -0,0 +1,280 @@
+Memory Resource Controller(Memcg) Implementation Memo.
+Last Updated: 2010/2
+Base Kernel Version: based on 2.6.33-rc7-mm(candidate for 34).
+
+Because VM is getting complex (one of reasons is memcg...), memcg's behavior
+is complex. This is a document for memcg's internal behavior.
+Please note that implementation details can be changed.
+
+(*) Topics on API should be in Documentation/cgroups/memory.txt)
+
+0. How to record usage ?
+ 2 objects are used.
+
+ page_cgroup ....an object per page.
+ Allocated at boot or memory hotplug. Freed at memory hot removal.
+
+ swap_cgroup ... an entry per swp_entry.
+ Allocated at swapon(). Freed at swapoff().
+
+ The page_cgroup has USED bit and double count against a page_cgroup never
+ occurs. swap_cgroup is used only when a charged page is swapped-out.
+
+1. Charge
+
+ a page/swp_entry may be charged (usage += PAGE_SIZE) at
+
+ mem_cgroup_try_charge()
+
+2. Uncharge
+ a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by
+
+ mem_cgroup_uncharge()
+ Called when a page's refcount goes down to 0.
+
+ mem_cgroup_uncharge_swap()
+ Called when swp_entry's refcnt goes down to 0. A charge against swap
+ disappears.
+
+3. charge-commit-cancel
+ Memcg pages are charged in two steps:
+ mem_cgroup_try_charge()
+ mem_cgroup_commit_charge() or mem_cgroup_cancel_charge()
+
+ At try_charge(), there are no flags to say "this page is charged".
+ at this point, usage += PAGE_SIZE.
+
+ At commit(), the page is associated with the memcg.
+
+ At cancel(), simply usage -= PAGE_SIZE.
+
+Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
+
+4. Anonymous
+ Anonymous page is newly allocated at
+ - page fault into MAP_ANONYMOUS mapping.
+ - Copy-On-Write.
+
+ 4.1 Swap-in.
+ At swap-in, the page is taken from swap-cache. There are 2 cases.
+
+ (a) If the SwapCache is newly allocated and read, it has no charges.
+ (b) If the SwapCache has been mapped by processes, it has been
+ charged already.
+
+ 4.2 Swap-out.
+ At swap-out, typical state transition is below.
+
+ (a) add to swap cache. (marked as SwapCache)
+ swp_entry's refcnt += 1.
+ (b) fully unmapped.
+ swp_entry's refcnt += # of ptes.
+ (c) write back to swap.
+ (d) delete from swap cache. (remove from SwapCache)
+ swp_entry's refcnt -= 1.
+
+
+ Finally, at task exit,
+ (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0.
+
+5. Page Cache
+ Page Cache is charged at
+ - add_to_page_cache_locked().
+
+ The logic is very clear. (About migration, see below)
+ Note: __remove_from_page_cache() is called by remove_from_page_cache()
+ and __remove_mapping().
+
+6. Shmem(tmpfs) Page Cache
+ The best way to understand shmem's page state transition is to read
+ mm/shmem.c.
+ But brief explanation of the behavior of memcg around shmem will be
+ helpful to understand the logic.
+
+ Shmem's page (just leaf page, not direct/indirect block) can be on
+ - radix-tree of shmem's inode.
+ - SwapCache.
+ - Both on radix-tree and SwapCache. This happens at swap-in
+ and swap-out,
+
+ It's charged when...
+ - A new page is added to shmem's radix-tree.
+ - A swp page is read. (move a charge from swap_cgroup to page_cgroup)
+
+7. Page Migration
+
+ mem_cgroup_migrate()
+
+8. LRU
+ Each memcg has its own private LRU. Now, its handling is under global
+ VM's control (means that it's handled under global zone->lru_lock).
+ Almost all routines around memcg's LRU is called by global LRU's
+ list management functions under zone->lru_lock().
+
+ A special function is mem_cgroup_isolate_pages(). This scans
+ memcg's private LRU and call __isolate_lru_page() to extract a page
+ from LRU.
+ (By __isolate_lru_page(), the page is removed from both of global and
+ private LRU.)
+
+
+9. Typical Tests.
+
+ Tests for racy cases.
+
+ 9.1 Small limit to memcg.
+ When you do test to do racy case, it's good test to set memcg's limit
+ to be very small rather than GB. Many races found in the test under
+ xKB or xxMB limits.
+ (Memory behavior under GB and Memory behavior under MB shows very
+ different situation.)
+
+ 9.2 Shmem
+ Historically, memcg's shmem handling was poor and we saw some amount
+ of troubles here. This is because shmem is page-cache but can be
+ SwapCache. Test with shmem/tmpfs is always good test.
+
+ 9.3 Migration
+ For NUMA, migration is an another special case. To do easy test, cpuset
+ is useful. Following is a sample script to do migration.
+
+ mount -t cgroup -o cpuset none /opt/cpuset
+
+ mkdir /opt/cpuset/01
+ echo 1 > /opt/cpuset/01/cpuset.cpus
+ echo 0 > /opt/cpuset/01/cpuset.mems
+ echo 1 > /opt/cpuset/01/cpuset.memory_migrate
+ mkdir /opt/cpuset/02
+ echo 1 > /opt/cpuset/02/cpuset.cpus
+ echo 1 > /opt/cpuset/02/cpuset.mems
+ echo 1 > /opt/cpuset/02/cpuset.memory_migrate
+
+ In above set, when you moves a task from 01 to 02, page migration to
+ node 0 to node 1 will occur. Following is a script to migrate all
+ under cpuset.
+ --
+ move_task()
+ {
+ for pid in $1
+ do
+ /bin/echo $pid >$2/tasks 2>/dev/null
+ echo -n $pid
+ echo -n " "
+ done
+ echo END
+ }
+
+ G1_TASK=`cat ${G1}/tasks`
+ G2_TASK=`cat ${G2}/tasks`
+ move_task "${G1_TASK}" ${G2} &
+ --
+ 9.4 Memory hotplug.
+ memory hotplug test is one of good test.
+ to offline memory, do following.
+ # echo offline > /sys/devices/system/memory/memoryXXX/state
+ (XXX is the place of memory)
+ This is an easy way to test page migration, too.
+
+ 9.5 mkdir/rmdir
+ When using hierarchy, mkdir/rmdir test should be done.
+ Use tests like the following.
+
+ echo 1 >/opt/cgroup/01/memory/use_hierarchy
+ mkdir /opt/cgroup/01/child_a
+ mkdir /opt/cgroup/01/child_b
+
+ set limit to 01.
+ add limit to 01/child_b
+ run jobs under child_a and child_b
+
+ create/delete following groups at random while jobs are running.
+ /opt/cgroup/01/child_a/child_aa
+ /opt/cgroup/01/child_b/child_bb
+ /opt/cgroup/01/child_c
+
+ running new jobs in new group is also good.
+
+ 9.6 Mount with other subsystems.
+ Mounting with other subsystems is a good test because there is a
+ race and lock dependency with other cgroup subsystems.
+
+ example)
+ # mount -t cgroup none /cgroup -o cpuset,memory,cpu,devices
+
+ and do task move, mkdir, rmdir etc...under this.
+
+ 9.7 swapoff.
+ Besides management of swap is one of complicated parts of memcg,
+ call path of swap-in at swapoff is not same as usual swap-in path..
+ It's worth to be tested explicitly.
+
+ For example, test like following is good.
+ (Shell-A)
+ # mount -t cgroup none /cgroup -o memory
+ # mkdir /cgroup/test
+ # echo 40M > /cgroup/test/memory.limit_in_bytes
+ # echo 0 > /cgroup/test/tasks
+ Run malloc(100M) program under this. You'll see 60M of swaps.
+ (Shell-B)
+ # move all tasks in /cgroup/test to /cgroup
+ # /sbin/swapoff -a
+ # rmdir /cgroup/test
+ # kill malloc task.
+
+ Of course, tmpfs v.s. swapoff test should be tested, too.
+
+ 9.8 OOM-Killer
+ Out-of-memory caused by memcg's limit will kill tasks under
+ the memcg. When hierarchy is used, a task under hierarchy
+ will be killed by the kernel.
+ In this case, panic_on_oom shouldn't be invoked and tasks
+ in other groups shouldn't be killed.
+
+ It's not difficult to cause OOM under memcg as following.
+ Case A) when you can swapoff
+ #swapoff -a
+ #echo 50M > /memory.limit_in_bytes
+ run 51M of malloc
+
+ Case B) when you use mem+swap limitation.
+ #echo 50M > memory.limit_in_bytes
+ #echo 50M > memory.memsw.limit_in_bytes
+ run 51M of malloc
+
+ 9.9 Move charges at task migration
+ Charges associated with a task can be moved along with task migration.
+
+ (Shell-A)
+ #mkdir /cgroup/A
+ #echo $$ >/cgroup/A/tasks
+ run some programs which uses some amount of memory in /cgroup/A.
+
+ (Shell-B)
+ #mkdir /cgroup/B
+ #echo 1 >/cgroup/B/memory.move_charge_at_immigrate
+ #echo "pid of the program running in group A" >/cgroup/B/tasks
+
+ You can see charges have been moved by reading *.usage_in_bytes or
+ memory.stat of both A and B.
+ See 8.2 of Documentation/cgroups/memory.txt to see what value should be
+ written to move_charge_at_immigrate.
+
+ 9.10 Memory thresholds
+ Memory controller implements memory thresholds using cgroups notification
+ API. You can use tools/cgroup/cgroup_event_listener.c to test it.
+
+ (Shell-A) Create cgroup and run event listener
+ # mkdir /cgroup/A
+ # ./cgroup_event_listener /cgroup/A/memory.usage_in_bytes 5M
+
+ (Shell-B) Add task to cgroup and try to allocate and free memory
+ # echo $$ >/cgroup/A/tasks
+ # a="$(dd if=/dev/zero bs=1M count=10)"
+ # a=
+
+ You will see message from cgroup_event_listener every time you cross
+ the thresholds.
+
+ Use /cgroup/A/memory.memsw.usage_in_bytes to test memsw thresholds.
+
+ It's good idea to test root cgroup as well.
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
new file mode 100644
index 000000000..f456b4315
--- /dev/null
+++ b/Documentation/cgroups/memory.txt
@@ -0,0 +1,875 @@
+Memory Resource Controller
+
+NOTE: This document is hopelessly outdated and it asks for a complete
+ rewrite. It still contains a useful information so we are keeping it
+ here but make sure to check the current code if you need a deeper
+ understanding.
+
+NOTE: The Memory Resource Controller has generically been referred to as the
+ memory controller in this document. Do not confuse memory controller
+ used here with the memory controller that is used in hardware.
+
+(For editors)
+In this document:
+ When we mention a cgroup (cgroupfs's directory) with memory controller,
+ we call it "memory cgroup". When you see git-log and source code, you'll
+ see patch's title and function names tend to use "memcg".
+ In this document, we avoid using it.
+
+Benefits and Purpose of the memory controller
+
+The memory controller isolates the memory behaviour of a group of tasks
+from the rest of the system. The article on LWN [12] mentions some probable
+uses of the memory controller. The memory controller can be used to
+
+a. Isolate an application or a group of applications
+ Memory-hungry applications can be isolated and limited to a smaller
+ amount of memory.
+b. Create a cgroup with a limited amount of memory; this can be used
+ as a good alternative to booting with mem=XXXX.
+c. Virtualization solutions can control the amount of memory they want
+ to assign to a virtual machine instance.
+d. A CD/DVD burner could control the amount of memory used by the
+ rest of the system to ensure that burning does not fail due to lack
+ of available memory.
+e. There are several other use cases; find one or use the controller just
+ for fun (to learn and hack on the VM subsystem).
+
+Current Status: linux-2.6.34-mmotm(development version of 2010/April)
+
+Features:
+ - accounting anonymous pages, file caches, swap caches usage and limiting them.
+ - pages are linked to per-memcg LRU exclusively, and there is no global LRU.
+ - optionally, memory+swap usage can be accounted and limited.
+ - hierarchical accounting
+ - soft limit
+ - moving (recharging) account at moving a task is selectable.
+ - usage threshold notifier
+ - memory pressure notifier
+ - oom-killer disable knob and oom-notifier
+ - Root cgroup has no limit controls.
+
+ Kernel memory support is a work in progress, and the current version provides
+ basically functionality. (See Section 2.7)
+
+Brief summary of control files.
+
+ tasks # attach a task(thread) and show list of threads
+ cgroup.procs # show list of processes
+ cgroup.event_control # an interface for event_fd()
+ memory.usage_in_bytes # show current usage for memory
+ (See 5.5 for details)
+ memory.memsw.usage_in_bytes # show current usage for memory+Swap
+ (See 5.5 for details)
+ memory.limit_in_bytes # set/show limit of memory usage
+ memory.memsw.limit_in_bytes # set/show limit of memory+Swap usage
+ memory.failcnt # show the number of memory usage hits limits
+ memory.memsw.failcnt # show the number of memory+Swap hits limits
+ memory.max_usage_in_bytes # show max memory usage recorded
+ memory.memsw.max_usage_in_bytes # show max memory+Swap usage recorded
+ memory.soft_limit_in_bytes # set/show soft limit of memory usage
+ memory.stat # show various statistics
+ memory.use_hierarchy # set/show hierarchical account enabled
+ memory.force_empty # trigger forced move charge to parent
+ memory.pressure_level # set memory pressure notifications
+ memory.swappiness # set/show swappiness parameter of vmscan
+ (See sysctl's vm.swappiness)
+ memory.move_charge_at_immigrate # set/show controls of moving charges
+ memory.oom_control # set/show oom controls.
+ memory.numa_stat # show the number of memory usage per numa node
+
+ memory.kmem.limit_in_bytes # set/show hard limit for kernel memory
+ memory.kmem.usage_in_bytes # show current kernel memory allocation
+ memory.kmem.failcnt # show the number of kernel memory usage hits limits
+ memory.kmem.max_usage_in_bytes # show max kernel memory usage recorded
+
+ memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory
+ memory.kmem.tcp.usage_in_bytes # show current tcp buf memory allocation
+ memory.kmem.tcp.failcnt # show the number of tcp buf memory usage hits limits
+ memory.kmem.tcp.max_usage_in_bytes # show max tcp buf memory usage recorded
+
+1. History
+
+The memory controller has a long history. A request for comments for the memory
+controller was posted by Balbir Singh [1]. At the time the RFC was posted
+there were several implementations for memory control. The goal of the
+RFC was to build consensus and agreement for the minimal features required
+for memory control. The first RSS controller was posted by Balbir Singh[2]
+in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the
+RSS controller. At OLS, at the resource management BoF, everyone suggested
+that we handle both page cache and RSS together. Another request was raised
+to allow user space handling of OOM. The current memory controller is
+at version 6; it combines both mapped (RSS) and unmapped Page
+Cache Control [11].
+
+2. Memory Control
+
+Memory is a unique resource in the sense that it is present in a limited
+amount. If a task requires a lot of CPU processing, the task can spread
+its processing over a period of hours, days, months or years, but with
+memory, the same physical memory needs to be reused to accomplish the task.
+
+The memory controller implementation has been divided into phases. These
+are:
+
+1. Memory controller
+2. mlock(2) controller
+3. Kernel user memory accounting and slab control
+4. user mappings length controller
+
+The memory controller is the first controller developed.
+
+2.1. Design
+
+The core of the design is a counter called the page_counter. The
+page_counter tracks the current memory usage and limit of the group of
+processes associated with the controller. Each cgroup has a memory controller
+specific data structure (mem_cgroup) associated with it.
+
+2.2. Accounting
+
+ +--------------------+
+ | mem_cgroup |
+ | (page_counter) |
+ +--------------------+
+ / ^ \
+ / | \
+ +---------------+ | +---------------+
+ | mm_struct | |.... | mm_struct |
+ | | | | |
+ +---------------+ | +---------------+
+ |
+ + --------------+
+ |
+ +---------------+ +------+--------+
+ | page +----------> page_cgroup|
+ | | | |
+ +---------------+ +---------------+
+
+ (Figure 1: Hierarchy of Accounting)
+
+
+Figure 1 shows the important aspects of the controller
+
+1. Accounting happens per cgroup
+2. Each mm_struct knows about which cgroup it belongs to
+3. Each page has a pointer to the page_cgroup, which in turn knows the
+ cgroup it belongs to
+
+The accounting is done as follows: mem_cgroup_charge_common() is invoked to
+set up the necessary data structures and check if the cgroup that is being
+charged is over its limit. If it is, then reclaim is invoked on the cgroup.
+More details can be found in the reclaim section of this document.
+If everything goes well, a page meta-data-structure called page_cgroup is
+updated. page_cgroup has its own LRU on cgroup.
+(*) page_cgroup structure is allocated at boot/memory-hotplug time.
+
+2.2.1 Accounting details
+
+All mapped anon pages (RSS) and cache pages (Page Cache) are accounted.
+Some pages which are never reclaimable and will not be on the LRU
+are not accounted. We just account pages under usual VM management.
+
+RSS pages are accounted at page_fault unless they've already been accounted
+for earlier. A file page will be accounted for as Page Cache when it's
+inserted into inode (radix-tree). While it's mapped into the page tables of
+processes, duplicate accounting is carefully avoided.
+
+An RSS page is unaccounted when it's fully unmapped. A PageCache page is
+unaccounted when it's removed from radix-tree. Even if RSS pages are fully
+unmapped (by kswapd), they may exist as SwapCache in the system until they
+are really freed. Such SwapCaches are also accounted.
+A swapped-in page is not accounted until it's mapped.
+
+Note: The kernel does swapin-readahead and reads multiple swaps at once.
+This means swapped-in pages may contain pages for other tasks than a task
+causing page fault. So, we avoid accounting at swap-in I/O.
+
+At page migration, accounting information is kept.
+
+Note: we just account pages-on-LRU because our purpose is to control amount
+of used pages; not-on-LRU pages tend to be out-of-control from VM view.
+
+2.3 Shared Page Accounting
+
+Shared pages are accounted on the basis of the first touch approach. The
+cgroup that first touches a page is accounted for the page. The principle
+behind this approach is that a cgroup that aggressively uses a shared
+page will eventually get charged for it (once it is uncharged from
+the cgroup that brought it in -- this will happen on memory pressure).
+
+But see section 8.2: when moving a task to another cgroup, its pages may
+be recharged to the new cgroup, if move_charge_at_immigrate has been chosen.
+
+Exception: If CONFIG_MEMCG_SWAP is not used.
+When you do swapoff and make swapped-out pages of shmem(tmpfs) to
+be backed into memory in force, charges for pages are accounted against the
+caller of swapoff rather than the users of shmem.
+
+2.4 Swap Extension (CONFIG_MEMCG_SWAP)
+
+Swap Extension allows you to record charge for swap. A swapped-in page is
+charged back to original page allocator if possible.
+
+When swap is accounted, following files are added.
+ - memory.memsw.usage_in_bytes.
+ - memory.memsw.limit_in_bytes.
+
+memsw means memory+swap. Usage of memory+swap is limited by
+memsw.limit_in_bytes.
+
+Example: Assume a system with 4G of swap. A task which allocates 6G of memory
+(by mistake) under 2G memory limitation will use all swap.
+In this case, setting memsw.limit_in_bytes=3G will prevent bad use of swap.
+By using the memsw limit, you can avoid system OOM which can be caused by swap
+shortage.
+
+* why 'memory+swap' rather than swap.
+The global LRU(kswapd) can swap out arbitrary pages. Swap-out means
+to move account from memory to swap...there is no change in usage of
+memory+swap. In other words, when we want to limit the usage of swap without
+affecting global LRU, memory+swap limit is better than just limiting swap from
+an OS point of view.
+
+* What happens when a cgroup hits memory.memsw.limit_in_bytes
+When a cgroup hits memory.memsw.limit_in_bytes, it's useless to do swap-out
+in this cgroup. Then, swap-out will not be done by cgroup routine and file
+caches are dropped. But as mentioned above, global LRU can do swapout memory
+from it for sanity of the system's memory management state. You can't forbid
+it by cgroup.
+
+2.5 Reclaim
+
+Each cgroup maintains a per cgroup LRU which has the same structure as
+global VM. When a cgroup goes over its limit, we first try
+to reclaim memory from the cgroup so as to make space for the new
+pages that the cgroup has touched. If the reclaim is unsuccessful,
+an OOM routine is invoked to select and kill the bulkiest task in the
+cgroup. (See 10. OOM Control below.)
+
+The reclaim algorithm has not been modified for cgroups, except that
+pages that are selected for reclaiming come from the per-cgroup LRU
+list.
+
+NOTE: Reclaim does not work for the root cgroup, since we cannot set any
+limits on the root cgroup.
+
+Note2: When panic_on_oom is set to "2", the whole system will panic.
+
+When oom event notifier is registered, event will be delivered.
+(See oom_control section)
+
+2.6 Locking
+
+ lock_page_cgroup()/unlock_page_cgroup() should not be called under
+ mapping->tree_lock.
+
+ Other lock order is following:
+ PG_locked.
+ mm->page_table_lock
+ zone->lru_lock
+ lock_page_cgroup.
+ In many cases, just lock_page_cgroup() is called.
+ per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by
+ zone->lru_lock, it has no lock of its own.
+
+2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM)
+
+With the Kernel memory extension, the Memory Controller is able to limit
+the amount of kernel memory used by the system. Kernel memory is fundamentally
+different than user memory, since it can't be swapped out, which makes it
+possible to DoS the system by consuming too much of this precious resource.
+
+Kernel memory won't be accounted at all until limit on a group is set. This
+allows for existing setups to continue working without disruption. The limit
+cannot be set if the cgroup have children, or if there are already tasks in the
+cgroup. Attempting to set the limit under those conditions will return -EBUSY.
+When use_hierarchy == 1 and a group is accounted, its children will
+automatically be accounted regardless of their limit value.
+
+After a group is first limited, it will be kept being accounted until it
+is removed. The memory limitation itself, can of course be removed by writing
+-1 to memory.kmem.limit_in_bytes. In this case, kmem will be accounted, but not
+limited.
+
+Kernel memory limits are not imposed for the root cgroup. Usage for the root
+cgroup may or may not be accounted. The memory used is accumulated into
+memory.kmem.usage_in_bytes, or in a separate counter when it makes sense.
+(currently only for tcp).
+The main "kmem" counter is fed into the main counter, so kmem charges will
+also be visible from the user counter.
+
+Currently no soft limit is implemented for kernel memory. It is future work
+to trigger slab reclaim when those limits are reached.
+
+2.7.1 Current Kernel Memory resources accounted
+
+* stack pages: every process consumes some stack pages. By accounting into
+kernel memory, we prevent new processes from being created when the kernel
+memory usage is too high.
+
+* slab pages: pages allocated by the SLAB or SLUB allocator are tracked. A copy
+of each kmem_cache is created every time the cache is touched by the first time
+from inside the memcg. The creation is done lazily, so some objects can still be
+skipped while the cache is being created. All objects in a slab page should
+belong to the same memcg. This only fails to hold when a task is migrated to a
+different memcg during the page allocation by the cache.
+
+* sockets memory pressure: some sockets protocols have memory pressure
+thresholds. The Memory Controller allows them to be controlled individually
+per cgroup, instead of globally.
+
+* tcp memory pressure: sockets memory pressure for the tcp protocol.
+
+2.7.2 Common use cases
+
+Because the "kmem" counter is fed to the main user counter, kernel memory can
+never be limited completely independently of user memory. Say "U" is the user
+limit, and "K" the kernel limit. There are three possible ways limits can be
+set:
+
+ U != 0, K = unlimited:
+ This is the standard memcg limitation mechanism already present before kmem
+ accounting. Kernel memory is completely ignored.
+
+ U != 0, K < U:
+ Kernel memory is a subset of the user memory. This setup is useful in
+ deployments where the total amount of memory per-cgroup is overcommited.
+ Overcommiting kernel memory limits is definitely not recommended, since the
+ box can still run out of non-reclaimable memory.
+ In this case, the admin could set up K so that the sum of all groups is
+ never greater than the total memory, and freely set U at the cost of his
+ QoS.
+ WARNING: In the current implementation, memory reclaim will NOT be
+ triggered for a cgroup when it hits K while staying below U, which makes
+ this setup impractical.
+
+ U != 0, K >= U:
+ Since kmem charges will also be fed to the user counter and reclaim will be
+ triggered for the cgroup for both kinds of memory. This setup gives the
+ admin a unified view of memory, and it is also useful for people who just
+ want to track kernel memory usage.
+
+3. User Interface
+
+3.0. Configuration
+
+a. Enable CONFIG_CGROUPS
+b. Enable CONFIG_MEMCG
+c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
+d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
+
+3.1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
+# mount -t tmpfs none /sys/fs/cgroup
+# mkdir /sys/fs/cgroup/memory
+# mount -t cgroup none /sys/fs/cgroup/memory -o memory
+
+3.2. Make the new group and move bash into it
+# mkdir /sys/fs/cgroup/memory/0
+# echo $$ > /sys/fs/cgroup/memory/0/tasks
+
+Since now we're in the 0 cgroup, we can alter the memory limit:
+# echo 4M > /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+
+NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo,
+mega or gigabytes. (Here, Kilo, Mega, Giga are Kibibytes, Mebibytes, Gibibytes.)
+
+NOTE: We can write "-1" to reset the *.limit_in_bytes(unlimited).
+NOTE: We cannot set limits on the root cgroup any more.
+
+# cat /sys/fs/cgroup/memory/0/memory.limit_in_bytes
+4194304
+
+We can check the usage:
+# cat /sys/fs/cgroup/memory/0/memory.usage_in_bytes
+1216512
+
+A successful write to this file does not guarantee a successful setting of
+this limit to the value written into the file. This can be due to a
+number of factors, such as rounding up to page boundaries or the total
+availability of memory on the system. The user is required to re-read
+this file after a write to guarantee the value committed by the kernel.
+
+# echo 1 > memory.limit_in_bytes
+# cat memory.limit_in_bytes
+4096
+
+The memory.failcnt field gives the number of times that the cgroup limit was
+exceeded.
+
+The memory.stat file gives accounting information. Now, the number of
+caches, RSS and Active pages/Inactive pages are shown.
+
+4. Testing
+
+For testing features and implementation, see memcg_test.txt.
+
+Performance test is also important. To see pure memory controller's overhead,
+testing on tmpfs will give you good numbers of small overheads.
+Example: do kernel make on tmpfs.
+
+Page-fault scalability is also important. At measuring parallel
+page fault test, multi-process test may be better than multi-thread
+test because it has noise of shared objects/status.
+
+But the above two are testing extreme situations.
+Trying usual test under memory controller is always helpful.
+
+4.1 Troubleshooting
+
+Sometimes a user might find that the application under a cgroup is
+terminated by the OOM killer. There are several causes for this:
+
+1. The cgroup limit is too low (just too low to do anything useful)
+2. The user is using anonymous memory and swap is turned off or too low
+
+A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of
+some of the pages cached in the cgroup (page cache pages).
+
+To know what happens, disabling OOM_Kill as per "10. OOM Control" (below) and
+seeing what happens will be helpful.
+
+4.2 Task migration
+
+When a task migrates from one cgroup to another, its charge is not
+carried forward by default. The pages allocated from the original cgroup still
+remain charged to it, the charge is dropped when the page is freed or
+reclaimed.
+
+You can move charges of a task along with task migration.
+See 8. "Move charges at task migration"
+
+4.3 Removing a cgroup
+
+A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
+cgroup might have some charge associated with it, even though all
+tasks have migrated away from it. (because we charge against pages, not
+against tasks.)
+
+We move the stats to root (if use_hierarchy==0) or parent (if
+use_hierarchy==1), and no change on the charge except uncharging
+from the child.
+
+Charges recorded in swap information is not updated at removal of cgroup.
+Recorded information is discarded and a cgroup which uses swap (swapcache)
+will be charged as a new owner of it.
+
+About use_hierarchy, see Section 6.
+
+5. Misc. interfaces.
+
+5.1 force_empty
+ memory.force_empty interface is provided to make cgroup's memory usage empty.
+ When writing anything to this
+
+ # echo 0 > memory.force_empty
+
+ the cgroup will be reclaimed and as many pages reclaimed as possible.
+
+ The typical use case for this interface is before calling rmdir().
+ Because rmdir() moves all pages to parent, some out-of-use page caches can be
+ moved to the parent. If you want to avoid that, force_empty will be useful.
+
+ Also, note that when memory.kmem.limit_in_bytes is set the charges due to
+ kernel pages will still be seen. This is not considered a failure and the
+ write will still return success. In this case, it is expected that
+ memory.kmem.usage_in_bytes == memory.usage_in_bytes.
+
+ About use_hierarchy, see Section 6.
+
+5.2 stat file
+
+memory.stat file includes following statistics
+
+# per-memory cgroup local status
+cache - # of bytes of page cache memory.
+rss - # of bytes of anonymous and swap cache memory (includes
+ transparent hugepages).
+rss_huge - # of bytes of anonymous transparent hugepages.
+mapped_file - # of bytes of mapped file (includes tmpfs/shmem)
+pgpgin - # of charging events to the memory cgroup. The charging
+ event happens each time a page is accounted as either mapped
+ anon page(RSS) or cache page(Page Cache) to the cgroup.
+pgpgout - # of uncharging events to the memory cgroup. The uncharging
+ event happens each time a page is unaccounted from the cgroup.
+swap - # of bytes of swap usage
+writeback - # of bytes of file/anon cache that are queued for syncing to
+ disk.
+inactive_anon - # of bytes of anonymous and swap cache memory on inactive
+ LRU list.
+active_anon - # of bytes of anonymous and swap cache memory on active
+ LRU list.
+inactive_file - # of bytes of file-backed memory on inactive LRU list.
+active_file - # of bytes of file-backed memory on active LRU list.
+unevictable - # of bytes of memory that cannot be reclaimed (mlocked etc).
+
+# status considering hierarchy (see memory.use_hierarchy settings)
+
+hierarchical_memory_limit - # of bytes of memory limit with regard to hierarchy
+ under which the memory cgroup is
+hierarchical_memsw_limit - # of bytes of memory+swap limit with regard to
+ hierarchy under which memory cgroup is.
+
+total_<counter> - # hierarchical version of <counter>, which in
+ addition to the cgroup's own value includes the
+ sum of all hierarchical children's values of
+ <counter>, i.e. total_cache
+
+# The following additional stats are dependent on CONFIG_DEBUG_VM.
+
+recent_rotated_anon - VM internal parameter. (see mm/vmscan.c)
+recent_rotated_file - VM internal parameter. (see mm/vmscan.c)
+recent_scanned_anon - VM internal parameter. (see mm/vmscan.c)
+recent_scanned_file - VM internal parameter. (see mm/vmscan.c)
+
+Memo:
+ recent_rotated means recent frequency of LRU rotation.
+ recent_scanned means recent # of scans to LRU.
+ showing for better debug please see the code for meanings.
+
+Note:
+ Only anonymous and swap cache memory is listed as part of 'rss' stat.
+ This should not be confused with the true 'resident set size' or the
+ amount of physical memory used by the cgroup.
+ 'rss + file_mapped" will give you resident set size of cgroup.
+ (Note: file and shmem may be shared among other cgroups. In that case,
+ file_mapped is accounted only when the memory cgroup is owner of page
+ cache.)
+
+5.3 swappiness
+
+Overrides /proc/sys/vm/swappiness for the particular group. The tunable
+in the root cgroup corresponds to the global swappiness setting.
+
+Please note that unlike during the global reclaim, limit reclaim
+enforces that 0 swappiness really prevents from any swapping even if
+there is a swap storage available. This might lead to memcg OOM killer
+if there are no file pages to reclaim.
+
+5.4 failcnt
+
+A memory cgroup provides memory.failcnt and memory.memsw.failcnt files.
+This failcnt(== failure count) shows the number of times that a usage counter
+hit its limit. When a memory cgroup hits a limit, failcnt increases and
+memory under it will be reclaimed.
+
+You can reset failcnt by writing 0 to failcnt file.
+# echo 0 > .../memory.failcnt
+
+5.5 usage_in_bytes
+
+For efficiency, as other kernel components, memory cgroup uses some optimization
+to avoid unnecessary cacheline false sharing. usage_in_bytes is affected by the
+method and doesn't show 'exact' value of memory (and swap) usage, it's a fuzz
+value for efficient access. (Of course, when necessary, it's synchronized.)
+If you want to know more exact memory usage, you should use RSS+CACHE(+SWAP)
+value in memory.stat(see 5.2).
+
+5.6 numa_stat
+
+This is similar to numa_maps but operates on a per-memcg basis. This is
+useful for providing visibility into the numa locality information within
+an memcg since the pages are allowed to be allocated from any physical
+node. One of the use cases is evaluating application performance by
+combining this information with the application's CPU allocation.
+
+Each memcg's numa_stat file includes "total", "file", "anon" and "unevictable"
+per-node page counts including "hierarchical_<counter>" which sums up all
+hierarchical children's values in addition to the memcg's own value.
+
+The output format of memory.numa_stat is:
+
+total=<total pages> N0=<node 0 pages> N1=<node 1 pages> ...
+file=<total file pages> N0=<node 0 pages> N1=<node 1 pages> ...
+anon=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
+hierarchical_<counter>=<counter pages> N0=<node 0 pages> N1=<node 1 pages> ...
+
+The "total" count is sum of file + anon + unevictable.
+
+6. Hierarchy support
+
+The memory controller supports a deep hierarchy and hierarchical accounting.
+The hierarchy is created by creating the appropriate cgroups in the
+cgroup filesystem. Consider for example, the following cgroup filesystem
+hierarchy
+
+ root
+ / | \
+ / | \
+ a b c
+ | \
+ | \
+ d e
+
+In the diagram above, with hierarchical accounting enabled, all memory
+usage of e, is accounted to its ancestors up until the root (i.e, c and root),
+that has memory.use_hierarchy enabled. If one of the ancestors goes over its
+limit, the reclaim algorithm reclaims from the tasks in the ancestor and the
+children of the ancestor.
+
+6.1 Enabling hierarchical accounting and reclaim
+
+A memory cgroup by default disables the hierarchy feature. Support
+can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup
+
+# echo 1 > memory.use_hierarchy
+
+The feature can be disabled by
+
+# echo 0 > memory.use_hierarchy
+
+NOTE1: Enabling/disabling will fail if either the cgroup already has other
+ cgroups created below it, or if the parent cgroup has use_hierarchy
+ enabled.
+
+NOTE2: When panic_on_oom is set to "2", the whole system will panic in
+ case of an OOM event in any cgroup.
+
+7. Soft limits
+
+Soft limits allow for greater sharing of memory. The idea behind soft limits
+is to allow control groups to use as much of the memory as needed, provided
+
+a. There is no memory contention
+b. They do not exceed their hard limit
+
+When the system detects memory contention or low memory, control groups
+are pushed back to their soft limits. If the soft limit of each control
+group is very high, they are pushed back as much as possible to make
+sure that one control group does not starve the others of memory.
+
+Please note that soft limits is a best-effort feature; it comes with
+no guarantees, but it does its best to make sure that when memory is
+heavily contended for, memory is allocated based on the soft limit
+hints/setup. Currently soft limit based reclaim is set up such that
+it gets invoked from balance_pgdat (kswapd).
+
+7.1 Interface
+
+Soft limits can be setup by using the following commands (in this example we
+assume a soft limit of 256 MiB)
+
+# echo 256M > memory.soft_limit_in_bytes
+
+If we want to change this to 1G, we can at any time use
+
+# echo 1G > memory.soft_limit_in_bytes
+
+NOTE1: Soft limits take effect over a long period of time, since they involve
+ reclaiming memory for balancing between memory cgroups
+NOTE2: It is recommended to set the soft limit always below the hard limit,
+ otherwise the hard limit will take precedence.
+
+8. Move charges at task migration
+
+Users can move charges associated with a task along with task migration, that
+is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+This feature is not supported in !CONFIG_MMU environments because of lack of
+page tables.
+
+8.1 Interface
+
+This feature is disabled by default. It can be enabled (and disabled again) by
+writing to memory.move_charge_at_immigrate of the destination cgroup.
+
+If you want to enable it:
+
+# echo (some positive value) > memory.move_charge_at_immigrate
+
+Note: Each bits of move_charge_at_immigrate has its own meaning about what type
+ of charges should be moved. See 8.2 for details.
+Note: Charges are moved only when you move mm->owner, in other words,
+ a leader of a thread group.
+Note: If we cannot find enough space for the task in the destination cgroup, we
+ try to make space by reclaiming memory. Task migration may fail if we
+ cannot make enough space.
+Note: It can take several seconds if you move charges much.
+
+And if you want disable it again:
+
+# echo 0 > memory.move_charge_at_immigrate
+
+8.2 Type of charges which can be moved
+
+Each bit in move_charge_at_immigrate has its own meaning about what type of
+charges should be moved. But in any case, it must be noted that an account of
+a page or a swap can be moved only when it is charged to the task's current
+(old) memory cgroup.
+
+ bit | what type of charges would be moved ?
+ -----+------------------------------------------------------------------------
+ 0 | A charge of an anonymous page (or swap of it) used by the target task.
+ | You must enable Swap Extension (see 2.4) to enable move of swap charges.
+ -----+------------------------------------------------------------------------
+ 1 | A charge of file pages (normal file, tmpfs file (e.g. ipc shared memory)
+ | and swaps of tmpfs file) mmapped by the target task. Unlike the case of
+ | anonymous pages, file pages (and swaps) in the range mmapped by the task
+ | will be moved even if the task hasn't done page fault, i.e. they might
+ | not be the task's "RSS", but other task's "RSS" that maps the same file.
+ | And mapcount of the page is ignored (the page can be moved even if
+ | page_mapcount(page) > 1). You must enable Swap Extension (see 2.4) to
+ | enable move of swap charges.
+
+8.3 TODO
+
+- All of moving charge operations are done under cgroup_mutex. It's not good
+ behavior to hold the mutex too long, so we may need some trick.
+
+9. Memory thresholds
+
+Memory cgroup implements memory thresholds using the cgroups notification
+API (see cgroups.txt). It allows to register multiple memory and memsw
+thresholds and gets notifications when it crosses.
+
+To register a threshold, an application must:
+- create an eventfd using eventfd(2);
+- open memory.usage_in_bytes or memory.memsw.usage_in_bytes;
+- write string like "<event_fd> <fd of memory.usage_in_bytes> <threshold>" to
+ cgroup.event_control.
+
+Application will be notified through eventfd when memory usage crosses
+threshold in any direction.
+
+It's applicable for root and non-root cgroup.
+
+10. OOM Control
+
+memory.oom_control file is for OOM notification and other controls.
+
+Memory cgroup implements OOM notifier using the cgroup notification
+API (See cgroups.txt). It allows to register multiple OOM notification
+delivery and gets notification when OOM happens.
+
+To register a notifier, an application must:
+ - create an eventfd using eventfd(2)
+ - open memory.oom_control file
+ - write string like "<event_fd> <fd of memory.oom_control>" to
+ cgroup.event_control
+
+The application will be notified through eventfd when OOM happens.
+OOM notification doesn't work for the root cgroup.
+
+You can disable the OOM-killer by writing "1" to memory.oom_control file, as:
+
+ #echo 1 > memory.oom_control
+
+If OOM-killer is disabled, tasks under cgroup will hang/sleep
+in memory cgroup's OOM-waitqueue when they request accountable memory.
+
+For running them, you have to relax the memory cgroup's OOM status by
+ * enlarge limit or reduce usage.
+To reduce usage,
+ * kill some tasks.
+ * move some tasks to other group with account migration.
+ * remove some files (on tmpfs?)
+
+Then, stopped tasks will work again.
+
+At reading, current status of OOM is shown.
+ oom_kill_disable 0 or 1 (if 1, oom-killer is disabled)
+ under_oom 0 or 1 (if 1, the memory cgroup is under OOM, tasks may
+ be stopped.)
+
+11. Memory Pressure
+
+The pressure level notifications can be used to monitor the memory
+allocation cost; based on the pressure, applications can implement
+different strategies of managing their memory resources. The pressure
+levels are defined as following:
+
+The "low" level means that the system is reclaiming memory for new
+allocations. Monitoring this reclaiming activity might be useful for
+maintaining cache level. Upon notification, the program (typically
+"Activity Manager") might analyze vmstat and act in advance (i.e.
+prematurely shutdown unimportant services).
+
+The "medium" level means that the system is experiencing medium memory
+pressure, the system might be making swap, paging out active file caches,
+etc. Upon this event applications may decide to further analyze
+vmstat/zoneinfo/memcg or internal memory usage statistics and free any
+resources that can be easily reconstructed or re-read from a disk.
+
+The "critical" level means that the system is actively thrashing, it is
+about to out of memory (OOM) or even the in-kernel OOM killer is on its
+way to trigger. Applications should do whatever they can to help the
+system. It might be too late to consult with vmstat or any other
+statistics, so it's advisable to take an immediate action.
+
+The events are propagated upward until the event is handled, i.e. the
+events are not pass-through. Here is what this means: for example you have
+three cgroups: A->B->C. Now you set up an event listener on cgroups A, B
+and C, and suppose group C experiences some pressure. In this situation,
+only group C will receive the notification, i.e. groups A and B will not
+receive it. This is done to avoid excessive "broadcasting" of messages,
+which disturbs the system and which is especially bad if we are low on
+memory or thrashing. So, organize the cgroups wisely, or propagate the
+events manually (or, ask us to implement the pass-through events,
+explaining why would you need them.)
+
+The file memory.pressure_level is only used to setup an eventfd. To
+register a notification, an application must:
+
+- create an eventfd using eventfd(2);
+- open memory.pressure_level;
+- write string like "<event_fd> <fd of memory.pressure_level> <level>"
+ to cgroup.event_control.
+
+Application will be notified through eventfd when memory pressure is at
+the specific level (or higher). Read/write operations to
+memory.pressure_level are no implemented.
+
+Test:
+
+ Here is a small script example that makes a new cgroup, sets up a
+ memory limit, sets up a notification in the cgroup and then makes child
+ cgroup experience a critical pressure:
+
+ # cd /sys/fs/cgroup/memory/
+ # mkdir foo
+ # cd foo
+ # cgroup_event_listener memory.pressure_level low &
+ # echo 8000000 > memory.limit_in_bytes
+ # echo 8000000 > memory.memsw.limit_in_bytes
+ # echo $$ > tasks
+ # dd if=/dev/zero | read x
+
+ (Expect a bunch of notifications, and eventually, the oom-killer will
+ trigger.)
+
+12. TODO
+
+1. Make per-cgroup scanner reclaim not-shared pages first
+2. Teach controller to account for shared-pages
+3. Start reclamation in the background when the limit is
+ not yet hit but the usage is getting closer
+
+Summary
+
+Overall, the memory controller has been a stable controller and has been
+commented and discussed quite extensively in the community.
+
+References
+
+1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/
+2. Singh, Balbir. Memory Controller (RSS Control),
+ http://lwn.net/Articles/222762/
+3. Emelianov, Pavel. Resource controllers based on process cgroups
+ http://lkml.org/lkml/2007/3/6/198
+4. Emelianov, Pavel. RSS controller based on process cgroups (v2)
+ http://lkml.org/lkml/2007/4/9/78
+5. Emelianov, Pavel. RSS controller based on process cgroups (v3)
+ http://lkml.org/lkml/2007/5/30/244
+6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/
+7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control
+ subsystem (v3), http://lwn.net/Articles/235534/
+8. Singh, Balbir. RSS controller v2 test results (lmbench),
+ http://lkml.org/lkml/2007/5/17/232
+9. Singh, Balbir. RSS controller v2 AIM9 results
+ http://lkml.org/lkml/2007/5/18/1
+10. Singh, Balbir. Memory controller v6 test results,
+ http://lkml.org/lkml/2007/8/19/36
+11. Singh, Balbir. Memory controller introduction (v6),
+ http://lkml.org/lkml/2007/8/17/69
+12. Corbet, Jonathan, Controlling memory use in cgroups,
+ http://lwn.net/Articles/243795/
diff --git a/Documentation/cgroups/net_cls.txt b/Documentation/cgroups/net_cls.txt
new file mode 100644
index 000000000..ec182346d
--- /dev/null
+++ b/Documentation/cgroups/net_cls.txt
@@ -0,0 +1,39 @@
+Network classifier cgroup
+-------------------------
+
+The Network classifier cgroup provides an interface to
+tag network packets with a class identifier (classid).
+
+The Traffic Controller (tc) can be used to assign
+different priorities to packets from different cgroups.
+Also, Netfilter (iptables) can use this tag to perform
+actions on such packets.
+
+Creating a net_cls cgroups instance creates a net_cls.classid file.
+This net_cls.classid value is initialized to 0.
+
+You can write hexadecimal values to net_cls.classid; the format for these
+values is 0xAAAABBBB; AAAA is the major handle number and BBBB
+is the minor handle number.
+Reading net_cls.classid yields a decimal result.
+
+Example:
+mkdir /sys/fs/cgroup/net_cls
+mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
+mkdir /sys/fs/cgroup/net_cls/0
+echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid
+ - setting a 10:1 handle.
+
+cat /sys/fs/cgroup/net_cls/0/net_cls.classid
+1048577
+
+configuring tc:
+tc qdisc add dev eth0 root handle 10: htb
+
+tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit
+ - creating traffic class 10:1
+
+tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup
+
+configuring iptables, basic example:
+iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP
diff --git a/Documentation/cgroups/net_prio.txt b/Documentation/cgroups/net_prio.txt
new file mode 100644
index 000000000..a82cbd28e
--- /dev/null
+++ b/Documentation/cgroups/net_prio.txt
@@ -0,0 +1,55 @@
+Network priority cgroup
+-------------------------
+
+The Network priority cgroup provides an interface to allow an administrator to
+dynamically set the priority of network traffic generated by various
+applications
+
+Nominally, an application would set the priority of its traffic via the
+SO_PRIORITY socket option. This however, is not always possible because:
+
+1) The application may not have been coded to set this value
+2) The priority of application traffic is often a site-specific administrative
+ decision rather than an application defined one.
+
+This cgroup allows an administrator to assign a process to a group which defines
+the priority of egress traffic on a given interface. Network priority groups can
+be created by first mounting the cgroup filesystem.
+
+# mount -t cgroup -onet_prio none /sys/fs/cgroup/net_prio
+
+With the above step, the initial group acting as the parent accounting group
+becomes visible at '/sys/fs/cgroup/net_prio'. This group includes all tasks in
+the system. '/sys/fs/cgroup/net_prio/tasks' lists the tasks in this cgroup.
+
+Each net_prio cgroup contains two files that are subsystem specific
+
+net_prio.prioidx
+This file is read-only, and is simply informative. It contains a unique integer
+value that the kernel uses as an internal representation of this cgroup.
+
+net_prio.ifpriomap
+This file contains a map of the priorities assigned to traffic originating from
+processes in this group and egressing the system on various interfaces. It
+contains a list of tuples in the form <ifname priority>. Contents of this file
+can be modified by echoing a string into the file using the same tuple format.
+for example:
+
+echo "eth0 5" > /sys/fs/cgroups/net_prio/iscsi/net_prio.ifpriomap
+
+This command would force any traffic originating from processes belonging to the
+iscsi net_prio cgroup and egressing on interface eth0 to have the priority of
+said traffic set to the value 5. The parent accounting group also has a
+writeable 'net_prio.ifpriomap' file that can be used to set a system default
+priority.
+
+Priorities are set immediately prior to queueing a frame to the device
+queueing discipline (qdisc) so priorities will be assigned prior to the hardware
+queue selection being made.
+
+One usage for the net_prio cgroup is with mqprio qdisc allowing application
+traffic to be steered to hardware/driver based traffic classes. These mappings
+can then be managed by administrators or other networking protocols such as
+DCBX.
+
+A new net_prio cgroup inherits the parent's configuration.
diff --git a/Documentation/cgroups/unified-hierarchy.txt b/Documentation/cgroups/unified-hierarchy.txt
new file mode 100644
index 000000000..eb102fb72
--- /dev/null
+++ b/Documentation/cgroups/unified-hierarchy.txt
@@ -0,0 +1,461 @@
+
+Cgroup unified hierarchy
+
+April, 2014 Tejun Heo <tj@kernel.org>
+
+This document describes the changes made by unified hierarchy and
+their rationales. It will eventually be merged into the main cgroup
+documentation.
+
+CONTENTS
+
+1. Background
+2. Basic Operation
+ 2-1. Mounting
+ 2-2. cgroup.subtree_control
+ 2-3. cgroup.controllers
+3. Structural Constraints
+ 3-1. Top-down
+ 3-2. No internal tasks
+4. Other Changes
+ 4-1. [Un]populated Notification
+ 4-2. Other Core Changes
+ 4-3. Per-Controller Changes
+ 4-3-1. blkio
+ 4-3-2. cpuset
+ 4-3-3. memory
+5. Planned Changes
+ 5-1. CAP for resource control
+
+
+1. Background
+
+cgroup allows an arbitrary number of hierarchies and each hierarchy
+can host any number of controllers. While this seems to provide a
+high level of flexibility, it isn't quite useful in practice.
+
+For example, as there is only one instance of each controller, utility
+type controllers such as freezer which can be useful in all
+hierarchies can only be used in one. The issue is exacerbated by the
+fact that controllers can't be moved around once hierarchies are
+populated. Another issue is that all controllers bound to a hierarchy
+are forced to have exactly the same view of the hierarchy. It isn't
+possible to vary the granularity depending on the specific controller.
+
+In practice, these issues heavily limit which controllers can be put
+on the same hierarchy and most configurations resort to putting each
+controller on its own hierarchy. Only closely related ones, such as
+the cpu and cpuacct controllers, make sense to put on the same
+hierarchy. This often means that userland ends up managing multiple
+similar hierarchies repeating the same steps on each hierarchy
+whenever a hierarchy management operation is necessary.
+
+Unfortunately, support for multiple hierarchies comes at a steep cost.
+Internal implementation in cgroup core proper is dazzlingly
+complicated but more importantly the support for multiple hierarchies
+restricts how cgroup is used in general and what controllers can do.
+
+There's no limit on how many hierarchies there may be, which means
+that a task's cgroup membership can't be described in finite length.
+The key may contain any varying number of entries and is unlimited in
+length, which makes it highly awkward to handle and leads to addition
+of controllers which exist only to identify membership, which in turn
+exacerbates the original problem.
+
+Also, as a controller can't have any expectation regarding what shape
+of hierarchies other controllers would be on, each controller has to
+assume that all other controllers are operating on completely
+orthogonal hierarchies. This makes it impossible, or at least very
+cumbersome, for controllers to cooperate with each other.
+
+In most use cases, putting controllers on hierarchies which are
+completely orthogonal to each other isn't necessary. What usually is
+called for is the ability to have differing levels of granularity
+depending on the specific controller. In other words, hierarchy may
+be collapsed from leaf towards root when viewed from specific
+controllers. For example, a given configuration might not care about
+how memory is distributed beyond a certain level while still wanting
+to control how CPU cycles are distributed.
+
+Unified hierarchy is the next version of cgroup interface. It aims to
+address the aforementioned issues by having more structure while
+retaining enough flexibility for most use cases. Various other
+general and controller-specific interface issues are also addressed in
+the process.
+
+
+2. Basic Operation
+
+2-1. Mounting
+
+Currently, unified hierarchy can be mounted with the following mount
+command. Note that this is still under development and scheduled to
+change soon.
+
+ mount -t cgroup -o __DEVEL__sane_behavior cgroup $MOUNT_POINT
+
+All controllers which support the unified hierarchy and are not bound
+to other hierarchies are automatically bound to unified hierarchy and
+show up at the root of it. Controllers which are enabled only in the
+root of unified hierarchy can be bound to other hierarchies. This
+allows mixing unified hierarchy with the traditional multiple
+hierarchies in a fully backward compatible way.
+
+For development purposes, the following boot parameter makes all
+controllers to appear on the unified hierarchy whether supported or
+not.
+
+ cgroup__DEVEL__legacy_files_on_dfl
+
+A controller can be moved across hierarchies only after the controller
+is no longer referenced in its current hierarchy. Because per-cgroup
+controller states are destroyed asynchronously and controllers may
+have lingering references, a controller may not show up immediately on
+the unified hierarchy after the final umount of the previous
+hierarchy. Similarly, a controller should be fully disabled to be
+moved out of the unified hierarchy and it may take some time for the
+disabled controller to become available for other hierarchies;
+furthermore, due to dependencies among controllers, other controllers
+may need to be disabled too.
+
+While useful for development and manual configurations, dynamically
+moving controllers between the unified and other hierarchies is
+strongly discouraged for production use. It is recommended to decide
+the hierarchies and controller associations before starting using the
+controllers.
+
+
+2-2. cgroup.subtree_control
+
+All cgroups on unified hierarchy have a "cgroup.subtree_control" file
+which governs which controllers are enabled on the children of the
+cgroup. Let's assume a hierarchy like the following.
+
+ root - A - B - C
+ \ D
+
+root's "cgroup.subtree_control" file determines which controllers are
+enabled on A. A's on B. B's on C and D. This coincides with the
+fact that controllers on the immediate sub-level are used to
+distribute the resources of the parent. In fact, it's natural to
+assume that resource control knobs of a child belong to its parent.
+Enabling a controller in a "cgroup.subtree_control" file declares that
+distribution of the respective resources of the cgroup will be
+controlled. Note that this means that controller enable states are
+shared among siblings.
+
+When read, the file contains a space-separated list of currently
+enabled controllers. A write to the file should contain a
+space-separated list of controllers with '+' or '-' prefixed (without
+the quotes). Controllers prefixed with '+' are enabled and '-'
+disabled. If a controller is listed multiple times, the last entry
+wins. The specific operations are executed atomically - either all
+succeed or fail.
+
+
+2-3. cgroup.controllers
+
+Read-only "cgroup.controllers" file contains a space-separated list of
+controllers which can be enabled in the cgroup's
+"cgroup.subtree_control" file.
+
+In the root cgroup, this lists controllers which are not bound to
+other hierarchies and the content changes as controllers are bound to
+and unbound from other hierarchies.
+
+In non-root cgroups, the content of this file equals that of the
+parent's "cgroup.subtree_control" file as only controllers enabled
+from the parent can be used in its children.
+
+
+3. Structural Constraints
+
+3-1. Top-down
+
+As it doesn't make sense to nest control of an uncontrolled resource,
+all non-root "cgroup.subtree_control" files can only contain
+controllers which are enabled in the parent's "cgroup.subtree_control"
+file. A controller can be enabled only if the parent has the
+controller enabled and a controller can't be disabled if one or more
+children have it enabled.
+
+
+3-2. No internal tasks
+
+One long-standing issue that cgroup faces is the competition between
+tasks belonging to the parent cgroup and its children cgroups. This
+is inherently nasty as two different types of entities compete and
+there is no agreed-upon obvious way to handle it. Different
+controllers are doing different things.
+
+The cpu controller considers tasks and cgroups as equivalents and maps
+nice levels to cgroup weights. This works for some cases but falls
+flat when children should be allocated specific ratios of CPU cycles
+and the number of internal tasks fluctuates - the ratios constantly
+change as the number of competing entities fluctuates. There also are
+other issues. The mapping from nice level to weight isn't obvious or
+universal, and there are various other knobs which simply aren't
+available for tasks.
+
+The blkio controller implicitly creates a hidden leaf node for each
+cgroup to host the tasks. The hidden leaf has its own copies of all
+the knobs with "leaf_" prefixed. While this allows equivalent control
+over internal tasks, it's with serious drawbacks. It always adds an
+extra layer of nesting which may not be necessary, makes the interface
+messy and significantly complicates the implementation.
+
+The memory controller currently doesn't have a way to control what
+happens between internal tasks and child cgroups and the behavior is
+not clearly defined. There have been attempts to add ad-hoc behaviors
+and knobs to tailor the behavior to specific workloads. Continuing
+this direction will lead to problems which will be extremely difficult
+to resolve in the long term.
+
+Multiple controllers struggle with internal tasks and came up with
+different ways to deal with it; unfortunately, all the approaches in
+use now are severely flawed and, furthermore, the widely different
+behaviors make cgroup as whole highly inconsistent.
+
+It is clear that this is something which needs to be addressed from
+cgroup core proper in a uniform way so that controllers don't need to
+worry about it and cgroup as a whole shows a consistent and logical
+behavior. To achieve that, unified hierarchy enforces the following
+structural constraint:
+
+ Except for the root, only cgroups which don't contain any task may
+ have controllers enabled in their "cgroup.subtree_control" files.
+
+Combined with other properties, this guarantees that, when a
+controller is looking at the part of the hierarchy which has it
+enabled, tasks are always only on the leaves. This rules out
+situations where child cgroups compete against internal tasks of the
+parent.
+
+There are two things to note. Firstly, the root cgroup is exempt from
+the restriction. Root contains tasks and anonymous resource
+consumption which can't be associated with any other cgroup and
+requires special treatment from most controllers. How resource
+consumption in the root cgroup is governed is up to each controller.
+
+Secondly, the restriction doesn't take effect if there is no enabled
+controller in the cgroup's "cgroup.subtree_control" file. This is
+important as otherwise it wouldn't be possible to create children of a
+populated cgroup. To control resource distribution of a cgroup, the
+cgroup must create children and transfer all its tasks to the children
+before enabling controllers in its "cgroup.subtree_control" file.
+
+
+4. Other Changes
+
+4-1. [Un]populated Notification
+
+cgroup users often need a way to determine when a cgroup's
+subhierarchy becomes empty so that it can be cleaned up. cgroup
+currently provides release_agent for it; unfortunately, this mechanism
+is riddled with issues.
+
+- It delivers events by forking and execing a userland binary
+ specified as the release_agent. This is a long deprecated method of
+ notification delivery. It's extremely heavy, slow and cumbersome to
+ integrate with larger infrastructure.
+
+- There is single monitoring point at the root. There's no way to
+ delegate management of a subtree.
+
+- The event isn't recursive. It triggers when a cgroup doesn't have
+ any tasks or child cgroups. Events for internal nodes trigger only
+ after all children are removed. This again makes it impossible to
+ delegate management of a subtree.
+
+- Events are filtered from the kernel side. A "notify_on_release"
+ file is used to subscribe to or suppress release events. This is
+ unnecessarily complicated and probably done this way because event
+ delivery itself was expensive.
+
+Unified hierarchy implements an interface file "cgroup.populated"
+which can be used to monitor whether the cgroup's subhierarchy has
+tasks in it or not. Its value is 0 if there is no task in the cgroup
+and its descendants; otherwise, 1. poll and [id]notify events are
+triggered when the value changes.
+
+This is significantly lighter and simpler and trivially allows
+delegating management of subhierarchy - subhierarchy monitoring can
+block further propagation simply by putting itself or another process
+in the subhierarchy and monitor events that it's interested in from
+there without interfering with monitoring higher in the tree.
+
+In unified hierarchy, the release_agent mechanism is no longer
+supported and the interface files "release_agent" and
+"notify_on_release" do not exist.
+
+
+4-2. Other Core Changes
+
+- None of the mount options is allowed.
+
+- remount is disallowed.
+
+- rename(2) is disallowed.
+
+- The "tasks" file is removed. Everything should at process
+ granularity. Use the "cgroup.procs" file instead.
+
+- The "cgroup.procs" file is not sorted. pids will be unique unless
+ they got recycled in-between reads.
+
+- The "cgroup.clone_children" file is removed.
+
+
+4-3. Per-Controller Changes
+
+4-3-1. blkio
+
+- blk-throttle becomes properly hierarchical.
+
+
+4-3-2. cpuset
+
+- Tasks are kept in empty cpusets after hotplug and take on the masks
+ of the nearest non-empty ancestor, instead of being moved to it.
+
+- A task can be moved into an empty cpuset, and again it takes on the
+ masks of the nearest non-empty ancestor.
+
+
+4-3-3. memory
+
+- use_hierarchy is on by default and the cgroup file for the flag is
+ not created.
+
+- The original lower boundary, the soft limit, is defined as a limit
+ that is per default unset. As a result, the set of cgroups that
+ global reclaim prefers is opt-in, rather than opt-out. The costs
+ for optimizing these mostly negative lookups are so high that the
+ implementation, despite its enormous size, does not even provide the
+ basic desirable behavior. First off, the soft limit has no
+ hierarchical meaning. All configured groups are organized in a
+ global rbtree and treated like equal peers, regardless where they
+ are located in the hierarchy. This makes subtree delegation
+ impossible. Second, the soft limit reclaim pass is so aggressive
+ that it not just introduces high allocation latencies into the
+ system, but also impacts system performance due to overreclaim, to
+ the point where the feature becomes self-defeating.
+
+ The memory.low boundary on the other hand is a top-down allocated
+ reserve. A cgroup enjoys reclaim protection when it and all its
+ ancestors are below their low boundaries, which makes delegation of
+ subtrees possible. Secondly, new cgroups have no reserve per
+ default and in the common case most cgroups are eligible for the
+ preferred reclaim pass. This allows the new low boundary to be
+ efficiently implemented with just a minor addition to the generic
+ reclaim code, without the need for out-of-band data structures and
+ reclaim passes. Because the generic reclaim code considers all
+ cgroups except for the ones running low in the preferred first
+ reclaim pass, overreclaim of individual groups is eliminated as
+ well, resulting in much better overall workload performance.
+
+- The original high boundary, the hard limit, is defined as a strict
+ limit that can not budge, even if the OOM killer has to be called.
+ But this generally goes against the goal of making the most out of
+ the available memory. The memory consumption of workloads varies
+ during runtime, and that requires users to overcommit. But doing
+ that with a strict upper limit requires either a fairly accurate
+ prediction of the working set size or adding slack to the limit.
+ Since working set size estimation is hard and error prone, and
+ getting it wrong results in OOM kills, most users tend to err on the
+ side of a looser limit and end up wasting precious resources.
+
+ The memory.high boundary on the other hand can be set much more
+ conservatively. When hit, it throttles allocations by forcing them
+ into direct reclaim to work off the excess, but it never invokes the
+ OOM killer. As a result, a high boundary that is chosen too
+ aggressively will not terminate the processes, but instead it will
+ lead to gradual performance degradation. The user can monitor this
+ and make corrections until the minimal memory footprint that still
+ gives acceptable performance is found.
+
+ In extreme cases, with many concurrent allocations and a complete
+ breakdown of reclaim progress within the group, the high boundary
+ can be exceeded. But even then it's mostly better to satisfy the
+ allocation from the slack available in other groups or the rest of
+ the system than killing the group. Otherwise, memory.max is there
+ to limit this type of spillover and ultimately contain buggy or even
+ malicious applications.
+
+- The original control file names are unwieldy and inconsistent in
+ many different ways. For example, the upper boundary hit count is
+ exported in the memory.failcnt file, but an OOM event count has to
+ be manually counted by listening to memory.oom_control events, and
+ lower boundary / soft limit events have to be counted by first
+ setting a threshold for that value and then counting those events.
+ Also, usage and limit files encode their units in the filename.
+ That makes the filenames very long, even though this is not
+ information that a user needs to be reminded of every time they type
+ out those names.
+
+ To address these naming issues, as well as to signal clearly that
+ the new interface carries a new configuration model, the naming
+ conventions in it necessarily differ from the old interface.
+
+- The original limit files indicate the state of an unset limit with a
+ Very High Number, and a configured limit can be unset by echoing -1
+ into those files. But that very high number is implementation and
+ architecture dependent and not very descriptive. And while -1 can
+ be understood as an underflow into the highest possible value, -2 or
+ -10M etc. do not work, so it's not consistent.
+
+ memory.low, memory.high, and memory.max will use the string "max" to
+ indicate and set the highest possible value.
+
+5. Planned Changes
+
+5-1. CAP for resource control
+
+Unified hierarchy will require one of the capabilities(7), which is
+yet to be decided, for all resource control related knobs. Process
+organization operations - creation of sub-cgroups and migration of
+processes in sub-hierarchies may be delegated by changing the
+ownership and/or permissions on the cgroup directory and
+"cgroup.procs" interface file; however, all operations which affect
+resource control - writes to a "cgroup.subtree_control" file or any
+controller-specific knobs - will require an explicit CAP privilege.
+
+This, in part, is to prevent the cgroup interface from being
+inadvertently promoted to programmable API used by non-privileged
+binaries. cgroup exposes various aspects of the system in ways which
+aren't properly abstracted for direct consumption by regular programs.
+This is an administration interface much closer to sysctl knobs than
+system calls. Even the basic access model, being filesystem path
+based, isn't suitable for direct consumption. There's no way to
+access "my cgroup" in a race-free way or make multiple operations
+atomic against migration to another cgroup.
+
+Another aspect is that, for better or for worse, the cgroup interface
+goes through far less scrutiny than regular interfaces for
+unprivileged userland. The upside is that cgroup is able to expose
+useful features which may not be suitable for general consumption in a
+reasonable time frame. It provides a relatively short path between
+internal details and userland-visible interface. Of course, this
+shortcut comes with high risk. We go through what we go through for
+general kernel APIs for good reasons. It may end up leaking internal
+details in a way which can exert significant pain by locking the
+kernel into a contract that can't be maintained in a reasonable
+manner.
+
+Also, due to the specific nature, cgroup and its controllers don't
+tend to attract attention from a wide scope of developers. cgroup's
+short history is already fraught with severely mis-designed
+interfaces, unnecessary commitments to and exposing of internal
+details, broken and dangerous implementations of various features.
+
+Keeping cgroup as an administration interface is both advantageous for
+its role and imperative given its nature. Some of the cgroup features
+may make sense for unprivileged access. If deemed justified, those
+must be further abstracted and implemented as a different interface,
+be it a system call or process-private filesystem, and survive through
+the scrutiny that any interface for general consumption is required to
+go through.
+
+Requiring CAP is not a complete solution but should serve as a
+significant deterrent against spraying cgroup usages in non-privileged
+programs.
diff --git a/Documentation/circular-buffers.txt b/Documentation/circular-buffers.txt
new file mode 100644
index 000000000..88951b179
--- /dev/null
+++ b/Documentation/circular-buffers.txt
@@ -0,0 +1,243 @@
+ ================
+ CIRCULAR BUFFERS
+ ================
+
+By: David Howells <dhowells@redhat.com>
+ Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+
+Linux provides a number of features that can be used to implement circular
+buffering. There are two sets of such features:
+
+ (1) Convenience functions for determining information about power-of-2 sized
+ buffers.
+
+ (2) Memory barriers for when the producer and the consumer of objects in the
+ buffer don't want to share a lock.
+
+To use these facilities, as discussed below, there needs to be just one
+producer and just one consumer. It is possible to handle multiple producers by
+serialising them, and to handle multiple consumers by serialising them.
+
+
+Contents:
+
+ (*) What is a circular buffer?
+
+ (*) Measuring power-of-2 buffers.
+
+ (*) Using memory barriers with circular buffers.
+ - The producer.
+ - The consumer.
+
+
+==========================
+WHAT IS A CIRCULAR BUFFER?
+==========================
+
+First of all, what is a circular buffer? A circular buffer is a buffer of
+fixed, finite size into which there are two indices:
+
+ (1) A 'head' index - the point at which the producer inserts items into the
+ buffer.
+
+ (2) A 'tail' index - the point at which the consumer finds the next item in
+ the buffer.
+
+Typically when the tail pointer is equal to the head pointer, the buffer is
+empty; and the buffer is full when the head pointer is one less than the tail
+pointer.
+
+The head index is incremented when items are added, and the tail index when
+items are removed. The tail index should never jump the head index, and both
+indices should be wrapped to 0 when they reach the end of the buffer, thus
+allowing an infinite amount of data to flow through the buffer.
+
+Typically, items will all be of the same unit size, but this isn't strictly
+required to use the techniques below. The indices can be increased by more
+than 1 if multiple items or variable-sized items are to be included in the
+buffer, provided that neither index overtakes the other. The implementer must
+be careful, however, as a region more than one unit in size may wrap the end of
+the buffer and be broken into two segments.
+
+
+============================
+MEASURING POWER-OF-2 BUFFERS
+============================
+
+Calculation of the occupancy or the remaining capacity of an arbitrarily sized
+circular buffer would normally be a slow operation, requiring the use of a
+modulus (divide) instruction. However, if the buffer is of a power-of-2 size,
+then a much quicker bitwise-AND instruction can be used instead.
+
+Linux provides a set of macros for handling power-of-2 circular buffers. These
+can be made use of by:
+
+ #include <linux/circ_buf.h>
+
+The macros are:
+
+ (*) Measure the remaining capacity of a buffer:
+
+ CIRC_SPACE(head_index, tail_index, buffer_size);
+
+ This returns the amount of space left in the buffer[1] into which items
+ can be inserted.
+
+
+ (*) Measure the maximum consecutive immediate space in a buffer:
+
+ CIRC_SPACE_TO_END(head_index, tail_index, buffer_size);
+
+ This returns the amount of consecutive space left in the buffer[1] into
+ which items can be immediately inserted without having to wrap back to the
+ beginning of the buffer.
+
+
+ (*) Measure the occupancy of a buffer:
+
+ CIRC_CNT(head_index, tail_index, buffer_size);
+
+ This returns the number of items currently occupying a buffer[2].
+
+
+ (*) Measure the non-wrapping occupancy of a buffer:
+
+ CIRC_CNT_TO_END(head_index, tail_index, buffer_size);
+
+ This returns the number of consecutive items[2] that can be extracted from
+ the buffer without having to wrap back to the beginning of the buffer.
+
+
+Each of these macros will nominally return a value between 0 and buffer_size-1,
+however:
+
+ [1] CIRC_SPACE*() are intended to be used in the producer. To the producer
+ they will return a lower bound as the producer controls the head index,
+ but the consumer may still be depleting the buffer on another CPU and
+ moving the tail index.
+
+ To the consumer it will show an upper bound as the producer may be busy
+ depleting the space.
+
+ [2] CIRC_CNT*() are intended to be used in the consumer. To the consumer they
+ will return a lower bound as the consumer controls the tail index, but the
+ producer may still be filling the buffer on another CPU and moving the
+ head index.
+
+ To the producer it will show an upper bound as the consumer may be busy
+ emptying the buffer.
+
+ [3] To a third party, the order in which the writes to the indices by the
+ producer and consumer become visible cannot be guaranteed as they are
+ independent and may be made on different CPUs - so the result in such a
+ situation will merely be a guess, and may even be negative.
+
+
+===========================================
+USING MEMORY BARRIERS WITH CIRCULAR BUFFERS
+===========================================
+
+By using memory barriers in conjunction with circular buffers, you can avoid
+the need to:
+
+ (1) use a single lock to govern access to both ends of the buffer, thus
+ allowing the buffer to be filled and emptied at the same time; and
+
+ (2) use atomic counter operations.
+
+There are two sides to this: the producer that fills the buffer, and the
+consumer that empties it. Only one thing should be filling a buffer at any one
+time, and only one thing should be emptying a buffer at any one time, but the
+two sides can operate simultaneously.
+
+
+THE PRODUCER
+------------
+
+The producer will look something like this:
+
+ spin_lock(&producer_lock);
+
+ unsigned long head = buffer->head;
+ /* The spin_unlock() and next spin_lock() provide needed ordering. */
+ unsigned long tail = ACCESS_ONCE(buffer->tail);
+
+ if (CIRC_SPACE(head, tail, buffer->size) >= 1) {
+ /* insert one item into the buffer */
+ struct item *item = buffer[head];
+
+ produce_item(item);
+
+ smp_store_release(buffer->head,
+ (head + 1) & (buffer->size - 1));
+
+ /* wake_up() will make sure that the head is committed before
+ * waking anyone up */
+ wake_up(consumer);
+ }
+
+ spin_unlock(&producer_lock);
+
+This will instruct the CPU that the contents of the new item must be written
+before the head index makes it available to the consumer and then instructs the
+CPU that the revised head index must be written before the consumer is woken.
+
+Note that wake_up() does not guarantee any sort of barrier unless something
+is actually awakened. We therefore cannot rely on it for ordering. However,
+there is always one element of the array left empty. Therefore, the
+producer must produce two elements before it could possibly corrupt the
+element currently being read by the consumer. Therefore, the unlock-lock
+pair between consecutive invocations of the consumer provides the necessary
+ordering between the read of the index indicating that the consumer has
+vacated a given element and the write by the producer to that same element.
+
+
+THE CONSUMER
+------------
+
+The consumer will look something like this:
+
+ spin_lock(&consumer_lock);
+
+ /* Read index before reading contents at that index. */
+ unsigned long head = smp_load_acquire(buffer->head);
+ unsigned long tail = buffer->tail;
+
+ if (CIRC_CNT(head, tail, buffer->size) >= 1) {
+
+ /* extract one item from the buffer */
+ struct item *item = buffer[tail];
+
+ consume_item(item);
+
+ /* Finish reading descriptor before incrementing tail. */
+ smp_store_release(buffer->tail,
+ (tail + 1) & (buffer->size - 1));
+ }
+
+ spin_unlock(&consumer_lock);
+
+This will instruct the CPU to make sure the index is up to date before reading
+the new item, and then it shall make sure the CPU has finished reading the item
+before it writes the new tail pointer, which will erase the item.
+
+Note the use of ACCESS_ONCE() and smp_load_acquire() to read the
+opposition index. This prevents the compiler from discarding and
+reloading its cached value - which some compilers will do across
+smp_read_barrier_depends(). This isn't strictly needed if you can
+be sure that the opposition index will _only_ be used the once.
+The smp_load_acquire() additionally forces the CPU to order against
+subsequent memory references. Similarly, smp_store_release() is used
+in both algorithms to write the thread's index. This documents the
+fact that we are writing to something that can be read concurrently,
+prevents the compiler from tearing the store, and enforces ordering
+against previous accesses.
+
+
+===============
+FURTHER READING
+===============
+
+See also Documentation/memory-barriers.txt for a description of Linux's memory
+barrier facilities.
diff --git a/Documentation/clk.txt b/Documentation/clk.txt
new file mode 100644
index 000000000..0e4f90aa1
--- /dev/null
+++ b/Documentation/clk.txt
@@ -0,0 +1,299 @@
+ The Common Clk Framework
+ Mike Turquette <mturquette@ti.com>
+
+This document endeavours to explain the common clk framework details,
+and how to port a platform over to this framework. It is not yet a
+detailed explanation of the clock api in include/linux/clk.h, but
+perhaps someday it will include that information.
+
+ Part 1 - introduction and interface split
+
+The common clk framework is an interface to control the clock nodes
+available on various devices today. This may come in the form of clock
+gating, rate adjustment, muxing or other operations. This framework is
+enabled with the CONFIG_COMMON_CLK option.
+
+The interface itself is divided into two halves, each shielded from the
+details of its counterpart. First is the common definition of struct
+clk which unifies the framework-level accounting and infrastructure that
+has traditionally been duplicated across a variety of platforms. Second
+is a common implementation of the clk.h api, defined in
+drivers/clk/clk.c. Finally there is struct clk_ops, whose operations
+are invoked by the clk api implementation.
+
+The second half of the interface is comprised of the hardware-specific
+callbacks registered with struct clk_ops and the corresponding
+hardware-specific structures needed to model a particular clock. For
+the remainder of this document any reference to a callback in struct
+clk_ops, such as .enable or .set_rate, implies the hardware-specific
+implementation of that code. Likewise, references to struct clk_foo
+serve as a convenient shorthand for the implementation of the
+hardware-specific bits for the hypothetical "foo" hardware.
+
+Tying the two halves of this interface together is struct clk_hw, which
+is defined in struct clk_foo and pointed to within struct clk. This
+allows for easy navigation between the two discrete halves of the common
+clock interface.
+
+ Part 2 - common data structures and api
+
+Below is the common struct clk definition from
+include/linux/clk-private.h, modified for brevity:
+
+ struct clk {
+ const char *name;
+ const struct clk_ops *ops;
+ struct clk_hw *hw;
+ char **parent_names;
+ struct clk **parents;
+ struct clk *parent;
+ struct hlist_head children;
+ struct hlist_node child_node;
+ ...
+ };
+
+The members above make up the core of the clk tree topology. The clk
+api itself defines several driver-facing functions which operate on
+struct clk. That api is documented in include/linux/clk.h.
+
+Platforms and devices utilizing the common struct clk use the struct
+clk_ops pointer in struct clk to perform the hardware-specific parts of
+the operations defined in clk.h:
+
+ struct clk_ops {
+ int (*prepare)(struct clk_hw *hw);
+ void (*unprepare)(struct clk_hw *hw);
+ int (*enable)(struct clk_hw *hw);
+ void (*disable)(struct clk_hw *hw);
+ int (*is_enabled)(struct clk_hw *hw);
+ unsigned long (*recalc_rate)(struct clk_hw *hw,
+ unsigned long parent_rate);
+ long (*round_rate)(struct clk_hw *hw,
+ unsigned long rate,
+ unsigned long *parent_rate);
+ long (*determine_rate)(struct clk_hw *hw,
+ unsigned long rate,
+ unsigned long min_rate,
+ unsigned long max_rate,
+ unsigned long *best_parent_rate,
+ struct clk_hw **best_parent_clk);
+ int (*set_parent)(struct clk_hw *hw, u8 index);
+ u8 (*get_parent)(struct clk_hw *hw);
+ int (*set_rate)(struct clk_hw *hw,
+ unsigned long rate,
+ unsigned long parent_rate);
+ int (*set_rate_and_parent)(struct clk_hw *hw,
+ unsigned long rate,
+ unsigned long parent_rate,
+ u8 index);
+ unsigned long (*recalc_accuracy)(struct clk_hw *hw,
+ unsigned long parent_accuracy);
+ void (*init)(struct clk_hw *hw);
+ int (*debug_init)(struct clk_hw *hw,
+ struct dentry *dentry);
+ };
+
+ Part 3 - hardware clk implementations
+
+The strength of the common struct clk comes from its .ops and .hw pointers
+which abstract the details of struct clk from the hardware-specific bits, and
+vice versa. To illustrate consider the simple gateable clk implementation in
+drivers/clk/clk-gate.c:
+
+struct clk_gate {
+ struct clk_hw hw;
+ void __iomem *reg;
+ u8 bit_idx;
+ ...
+};
+
+struct clk_gate contains struct clk_hw hw as well as hardware-specific
+knowledge about which register and bit controls this clk's gating.
+Nothing about clock topology or accounting, such as enable_count or
+notifier_count, is needed here. That is all handled by the common
+framework code and struct clk.
+
+Let's walk through enabling this clk from driver code:
+
+ struct clk *clk;
+ clk = clk_get(NULL, "my_gateable_clk");
+
+ clk_prepare(clk);
+ clk_enable(clk);
+
+The call graph for clk_enable is very simple:
+
+clk_enable(clk);
+ clk->ops->enable(clk->hw);
+ [resolves to...]
+ clk_gate_enable(hw);
+ [resolves struct clk gate with to_clk_gate(hw)]
+ clk_gate_set_bit(gate);
+
+And the definition of clk_gate_set_bit:
+
+static void clk_gate_set_bit(struct clk_gate *gate)
+{
+ u32 reg;
+
+ reg = __raw_readl(gate->reg);
+ reg |= BIT(gate->bit_idx);
+ writel(reg, gate->reg);
+}
+
+Note that to_clk_gate is defined as:
+
+#define to_clk_gate(_hw) container_of(_hw, struct clk_gate, clk)
+
+This pattern of abstraction is used for every clock hardware
+representation.
+
+ Part 4 - supporting your own clk hardware
+
+When implementing support for a new type of clock it only necessary to
+include the following header:
+
+#include <linux/clk-provider.h>
+
+include/linux/clk.h is included within that header and clk-private.h
+must never be included from the code which implements the operations for
+a clock. More on that below in Part 5.
+
+To construct a clk hardware structure for your platform you must define
+the following:
+
+struct clk_foo {
+ struct clk_hw hw;
+ ... hardware specific data goes here ...
+};
+
+To take advantage of your data you'll need to support valid operations
+for your clk:
+
+struct clk_ops clk_foo_ops {
+ .enable = &clk_foo_enable;
+ .disable = &clk_foo_disable;
+};
+
+Implement the above functions using container_of:
+
+#define to_clk_foo(_hw) container_of(_hw, struct clk_foo, hw)
+
+int clk_foo_enable(struct clk_hw *hw)
+{
+ struct clk_foo *foo;
+
+ foo = to_clk_foo(hw);
+
+ ... perform magic on foo ...
+
+ return 0;
+};
+
+Below is a matrix detailing which clk_ops are mandatory based upon the
+hardware capabilities of that clock. A cell marked as "y" means
+mandatory, a cell marked as "n" implies that either including that
+callback is invalid or otherwise unnecessary. Empty cells are either
+optional or must be evaluated on a case-by-case basis.
+
+ clock hardware characteristics
+ -----------------------------------------------------------
+ | gate | change rate | single parent | multiplexer | root |
+ |------|-------------|---------------|-------------|------|
+.prepare | | | | | |
+.unprepare | | | | | |
+ | | | | | |
+.enable | y | | | | |
+.disable | y | | | | |
+.is_enabled | y | | | | |
+ | | | | | |
+.recalc_rate | | y | | | |
+.round_rate | | y [1] | | | |
+.determine_rate | | y [1] | | | |
+.set_rate | | y | | | |
+ | | | | | |
+.set_parent | | | n | y | n |
+.get_parent | | | n | y | n |
+ | | | | | |
+.recalc_accuracy| | | | | |
+ | | | | | |
+.init | | | | | |
+ -----------------------------------------------------------
+[1] either one of round_rate or determine_rate is required.
+
+Finally, register your clock at run-time with a hardware-specific
+registration function. This function simply populates struct clk_foo's
+data and then passes the common struct clk parameters to the framework
+with a call to:
+
+clk_register(...)
+
+See the basic clock types in drivers/clk/clk-*.c for examples.
+
+ Part 5 - static initialization of clock data
+
+For platforms with many clocks (often numbering into the hundreds) it
+may be desirable to statically initialize some clock data. This
+presents a problem since the definition of struct clk should be hidden
+from everyone except for the clock core in drivers/clk/clk.c.
+
+To get around this problem struct clk's definition is exposed in
+include/linux/clk-private.h along with some macros for more easily
+initializing instances of the basic clock types. These clocks must
+still be initialized with the common clock framework via a call to
+__clk_init.
+
+clk-private.h must NEVER be included by code which implements struct
+clk_ops callbacks, nor must it be included by any logic which pokes
+around inside of struct clk at run-time. To do so is a layering
+violation.
+
+To better enforce this policy, always follow this simple rule: any
+statically initialized clock data MUST be defined in a separate file
+from the logic that implements its ops. Basically separate the logic
+from the data and all is well.
+
+ Part 6 - Disabling clock gating of unused clocks
+
+Sometimes during development it can be useful to be able to bypass the
+default disabling of unused clocks. For example, if drivers aren't enabling
+clocks properly but rely on them being on from the bootloader, bypassing
+the disabling means that the driver will remain functional while the issues
+are sorted out.
+
+To bypass this disabling, include "clk_ignore_unused" in the bootargs to the
+kernel.
+
+ Part 7 - Locking
+
+The common clock framework uses two global locks, the prepare lock and the
+enable lock.
+
+The enable lock is a spinlock and is held across calls to the .enable,
+.disable and .is_enabled operations. Those operations are thus not allowed to
+sleep, and calls to the clk_enable(), clk_disable() and clk_is_enabled() API
+functions are allowed in atomic context.
+
+The prepare lock is a mutex and is held across calls to all other operations.
+All those operations are allowed to sleep, and calls to the corresponding API
+functions are not allowed in atomic context.
+
+This effectively divides operations in two groups from a locking perspective.
+
+Drivers don't need to manually protect resources shared between the operations
+of one group, regardless of whether those resources are shared by multiple
+clocks or not. However, access to resources that are shared between operations
+of the two groups needs to be protected by the drivers. An example of such a
+resource would be a register that controls both the clock rate and the clock
+enable/disable state.
+
+The clock framework is reentrant, in that a driver is allowed to call clock
+framework functions from within its implementation of clock operations. This
+can for instance cause a .set_rate operation of one clock being called from
+within the .set_rate operation of another clock. This case must be considered
+in the driver implementations, but the code flow is usually controlled by the
+driver in that case.
+
+Note that locking must also be considered when code outside of the common
+clock framework needs to access resources used by the clock operations. This
+is considered out of scope of this document.
diff --git a/Documentation/cma/debugfs.txt b/Documentation/cma/debugfs.txt
new file mode 100644
index 000000000..6cef20a8c
--- /dev/null
+++ b/Documentation/cma/debugfs.txt
@@ -0,0 +1,21 @@
+The CMA debugfs interface is useful to retrieve basic information out of the
+different CMA areas and to test allocation/release in each of the areas.
+
+Each CMA zone represents a directory under <debugfs>/cma/, indexed by the
+kernel's CMA index. So the first CMA zone would be:
+
+ <debugfs>/cma/cma-0
+
+The structure of the files created under that directory is as follows:
+
+ - [RO] base_pfn: The base PFN (Page Frame Number) of the zone.
+ - [RO] count: Amount of memory in the CMA area.
+ - [RO] order_per_bit: Order of pages represented by one bit.
+ - [RO] bitmap: The bitmap of page states in the zone.
+ - [WO] alloc: Allocate N pages from that CMA area. For example:
+
+ echo 5 > <debugfs>/cma/cma-2/alloc
+
+would try to allocate 5 pages from the cma-2 area.
+
+ - [WO] free: Free N pages from that CMA area, similar to the above.
diff --git a/Documentation/coccinelle.txt b/Documentation/coccinelle.txt
new file mode 100644
index 000000000..7f773d51f
--- /dev/null
+++ b/Documentation/coccinelle.txt
@@ -0,0 +1,324 @@
+Copyright 2010 Nicolas Palix <npalix@diku.dk>
+Copyright 2010 Julia Lawall <julia@diku.dk>
+Copyright 2010 Gilles Muller <Gilles.Muller@lip6.fr>
+
+
+ Getting Coccinelle
+~~~~~~~~~~~~~~~~~~~~
+
+The semantic patches included in the kernel use features and options
+which are provided by Coccinelle version 1.0.0-rc11 and above.
+Using earlier versions will fail as the option names used by
+the Coccinelle files and coccicheck have been updated.
+
+Coccinelle is available through the package manager
+of many distributions, e.g. :
+
+ - Debian
+ - Fedora
+ - Ubuntu
+ - OpenSUSE
+ - Arch Linux
+ - NetBSD
+ - FreeBSD
+
+
+You can get the latest version released from the Coccinelle homepage at
+http://coccinelle.lip6.fr/
+
+Information and tips about Coccinelle are also provided on the wiki
+pages at http://cocci.ekstranet.diku.dk/wiki/doku.php
+
+Once you have it, run the following command:
+
+ ./configure
+ make
+
+as a regular user, and install it with
+
+ sudo make install
+
+ Using Coccinelle on the Linux kernel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+A Coccinelle-specific target is defined in the top level
+Makefile. This target is named 'coccicheck' and calls the 'coccicheck'
+front-end in the 'scripts' directory.
+
+Four basic modes are defined: patch, report, context, and org. The mode to
+use is specified by setting the MODE variable with 'MODE=<mode>'.
+
+'patch' proposes a fix, when possible.
+
+'report' generates a list in the following format:
+ file:line:column-column: message
+
+'context' highlights lines of interest and their context in a
+diff-like style.Lines of interest are indicated with '-'.
+
+'org' generates a report in the Org mode format of Emacs.
+
+Note that not all semantic patches implement all modes. For easy use
+of Coccinelle, the default mode is "report".
+
+Two other modes provide some common combinations of these modes.
+
+'chain' tries the previous modes in the order above until one succeeds.
+
+'rep+ctxt' runs successively the report mode and the context mode.
+ It should be used with the C option (described later)
+ which checks the code on a file basis.
+
+Examples:
+ To make a report for every semantic patch, run the following command:
+
+ make coccicheck MODE=report
+
+ To produce patches, run:
+
+ make coccicheck MODE=patch
+
+
+The coccicheck target applies every semantic patch available in the
+sub-directories of 'scripts/coccinelle' to the entire Linux kernel.
+
+For each semantic patch, a commit message is proposed. It gives a
+description of the problem being checked by the semantic patch, and
+includes a reference to Coccinelle.
+
+As any static code analyzer, Coccinelle produces false
+positives. Thus, reports must be carefully checked, and patches
+reviewed.
+
+To enable verbose messages set the V= variable, for example:
+
+ make coccicheck MODE=report V=1
+
+By default, coccicheck tries to run as parallel as possible. To change
+the parallelism, set the J= variable. For example, to run across 4 CPUs:
+
+ make coccicheck MODE=report J=4
+
+
+ Using Coccinelle with a single semantic patch
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The optional make variable COCCI can be used to check a single
+semantic patch. In that case, the variable must be initialized with
+the name of the semantic patch to apply.
+
+For instance:
+
+ make coccicheck COCCI=<my_SP.cocci> MODE=patch
+or
+ make coccicheck COCCI=<my_SP.cocci> MODE=report
+
+
+ Controlling Which Files are Processed by Coccinelle
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+By default the entire kernel source tree is checked.
+
+To apply Coccinelle to a specific directory, M= can be used.
+For example, to check drivers/net/wireless/ one may write:
+
+ make coccicheck M=drivers/net/wireless/
+
+To apply Coccinelle on a file basis, instead of a directory basis, the
+following command may be used:
+
+ make C=1 CHECK="scripts/coccicheck"
+
+To check only newly edited code, use the value 2 for the C flag, i.e.
+
+ make C=2 CHECK="scripts/coccicheck"
+
+In these modes, which works on a file basis, there is no information
+about semantic patches displayed, and no commit message proposed.
+
+This runs every semantic patch in scripts/coccinelle by default. The
+COCCI variable may additionally be used to only apply a single
+semantic patch as shown in the previous section.
+
+The "report" mode is the default. You can select another one with the
+MODE variable explained above.
+
+ Additional flags
+~~~~~~~~~~~~~~~~~~
+
+Additional flags can be passed to spatch through the SPFLAGS
+variable.
+
+ make SPFLAGS=--use-glimpse coccicheck
+ make SPFLAGS=--use-idutils coccicheck
+
+See spatch --help to learn more about spatch options.
+
+Note that the '--use-glimpse' and '--use-idutils' options
+require external tools for indexing the code. None of them is
+thus active by default. However, by indexing the code with
+one of these tools, and according to the cocci file used,
+spatch could proceed the entire code base more quickly.
+
+ Proposing new semantic patches
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+New semantic patches can be proposed and submitted by kernel
+developers. For sake of clarity, they should be organized in the
+sub-directories of 'scripts/coccinelle/'.
+
+
+ Detailed description of the 'report' mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+'report' generates a list in the following format:
+ file:line:column-column: message
+
+Example:
+
+Running
+
+ make coccicheck MODE=report COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script.
+
+<smpl>
+@r depends on !context && !patch && (org || report)@
+expression x;
+position p;
+@@
+
+ ERR_PTR@p(PTR_ERR(x))
+
+@script:python depends on report@
+p << r.p;
+x << r.x;
+@@
+
+msg="ERR_CAST can be used with %s" % (x)
+coccilib.report.print_report(p[0], msg)
+</smpl>
+
+This SmPL excerpt generates entries on the standard output, as
+illustrated below:
+
+/home/user/linux/crypto/ctr.c:188:9-16: ERR_CAST can be used with alg
+/home/user/linux/crypto/authenc.c:619:9-16: ERR_CAST can be used with auth
+/home/user/linux/crypto/xts.c:227:9-16: ERR_CAST can be used with alg
+
+
+ Detailed description of the 'patch' mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When the 'patch' mode is available, it proposes a fix for each problem
+identified.
+
+Example:
+
+Running
+ make coccicheck MODE=patch COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script.
+
+<smpl>
+@ depends on !context && patch && !org && !report @
+expression x;
+@@
+
+- ERR_PTR(PTR_ERR(x))
++ ERR_CAST(x)
+</smpl>
+
+This SmPL excerpt generates patch hunks on the standard output, as
+illustrated below:
+
+diff -u -p a/crypto/ctr.c b/crypto/ctr.c
+--- a/crypto/ctr.c 2010-05-26 10:49:38.000000000 +0200
++++ b/crypto/ctr.c 2010-06-03 23:44:49.000000000 +0200
+@@ -185,7 +185,7 @@ static struct crypto_instance *crypto_ct
+ alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
+ CRYPTO_ALG_TYPE_MASK);
+ if (IS_ERR(alg))
+- return ERR_PTR(PTR_ERR(alg));
++ return ERR_CAST(alg);
+
+ /* Block size must be >= 4 bytes. */
+ err = -EINVAL;
+
+ Detailed description of the 'context' mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+'context' highlights lines of interest and their context
+in a diff-like style.
+
+NOTE: The diff-like output generated is NOT an applicable patch. The
+ intent of the 'context' mode is to highlight the important lines
+ (annotated with minus, '-') and gives some surrounding context
+ lines around. This output can be used with the diff mode of
+ Emacs to review the code.
+
+Example:
+
+Running
+ make coccicheck MODE=context COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script.
+
+<smpl>
+@ depends on context && !patch && !org && !report@
+expression x;
+@@
+
+* ERR_PTR(PTR_ERR(x))
+</smpl>
+
+This SmPL excerpt generates diff hunks on the standard output, as
+illustrated below:
+
+diff -u -p /home/user/linux/crypto/ctr.c /tmp/nothing
+--- /home/user/linux/crypto/ctr.c 2010-05-26 10:49:38.000000000 +0200
++++ /tmp/nothing
+@@ -185,7 +185,6 @@ static struct crypto_instance *crypto_ct
+ alg = crypto_attr_alg(tb[1], CRYPTO_ALG_TYPE_CIPHER,
+ CRYPTO_ALG_TYPE_MASK);
+ if (IS_ERR(alg))
+- return ERR_PTR(PTR_ERR(alg));
+
+ /* Block size must be >= 4 bytes. */
+ err = -EINVAL;
+
+ Detailed description of the 'org' mode
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+'org' generates a report in the Org mode format of Emacs.
+
+Example:
+
+Running
+ make coccicheck MODE=org COCCI=scripts/coccinelle/api/err_cast.cocci
+
+will execute the following part of the SmPL script.
+
+<smpl>
+@r depends on !context && !patch && (org || report)@
+expression x;
+position p;
+@@
+
+ ERR_PTR@p(PTR_ERR(x))
+
+@script:python depends on org@
+p << r.p;
+x << r.x;
+@@
+
+msg="ERR_CAST can be used with %s" % (x)
+msg_safe=msg.replace("[","@(").replace("]",")")
+coccilib.org.print_todo(p[0], msg_safe)
+</smpl>
+
+This SmPL excerpt generates Org entries on the standard output, as
+illustrated below:
+
+* TODO [[view:/home/user/linux/crypto/ctr.c::face=ovl-face1::linb=188::colb=9::cole=16][ERR_CAST can be used with alg]]
+* TODO [[view:/home/user/linux/crypto/authenc.c::face=ovl-face1::linb=619::colb=9::cole=16][ERR_CAST can be used with auth]]
+* TODO [[view:/home/user/linux/crypto/xts.c::face=ovl-face1::linb=227::colb=9::cole=16][ERR_CAST can be used with alg]]
diff --git a/Documentation/connector/.gitignore b/Documentation/connector/.gitignore
new file mode 100644
index 000000000..d2b9c32ac
--- /dev/null
+++ b/Documentation/connector/.gitignore
@@ -0,0 +1 @@
+ucon
diff --git a/Documentation/connector/Makefile b/Documentation/connector/Makefile
new file mode 100644
index 000000000..d98e4df98
--- /dev/null
+++ b/Documentation/connector/Makefile
@@ -0,0 +1,16 @@
+ifneq ($(CONFIG_CONNECTOR),)
+obj-m += cn_test.o
+endif
+
+# List of programs to build
+hostprogs-y := ucon
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_ucon.o += -I$(objtree)/usr/include
+
+all: modules
+
+modules clean:
+ $(MAKE) -C ../.. SUBDIRS=$(PWD) $@
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
new file mode 100644
index 000000000..d12cc944b
--- /dev/null
+++ b/Documentation/connector/cn_test.c
@@ -0,0 +1,201 @@
+/*
+ * cn_test.c
+ *
+ * 2004+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#define pr_fmt(fmt) "cn_test: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+
+#include <linux/connector.h>
+
+static struct cb_id cn_test_id = { CN_NETLINK_USERS + 3, 0x456 };
+static char cn_test_name[] = "cn_test";
+static struct sock *nls;
+static struct timer_list cn_test_timer;
+
+static void cn_test_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
+{
+ pr_info("%s: %lu: idx=%x, val=%x, seq=%u, ack=%u, len=%d: %s.\n",
+ __func__, jiffies, msg->id.idx, msg->id.val,
+ msg->seq, msg->ack, msg->len,
+ msg->len ? (char *)msg->data : "");
+}
+
+/*
+ * Do not remove this function even if no one is using it as
+ * this is an example of how to get notifications about new
+ * connector user registration
+ */
+#if 0
+static int cn_test_want_notify(void)
+{
+ struct cn_ctl_msg *ctl;
+ struct cn_notify_req *req;
+ struct cn_msg *msg = NULL;
+ int size, size0;
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ u32 group = 1;
+
+ size0 = sizeof(*msg) + sizeof(*ctl) + 3 * sizeof(*req);
+
+ size = NLMSG_SPACE(size0);
+
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb) {
+ pr_err("failed to allocate new skb with size=%u\n", size);
+ return -ENOMEM;
+ }
+
+ nlh = nlmsg_put(skb, 0, 0x123, NLMSG_DONE, size - sizeof(*nlh), 0);
+ if (!nlh) {
+ kfree_skb(skb);
+ return -EMSGSIZE;
+ }
+
+ msg = nlmsg_data(nlh);
+
+ memset(msg, 0, size0);
+
+ msg->id.idx = -1;
+ msg->id.val = -1;
+ msg->seq = 0x123;
+ msg->ack = 0x345;
+ msg->len = size0 - sizeof(*msg);
+
+ ctl = (struct cn_ctl_msg *)(msg + 1);
+
+ ctl->idx_notify_num = 1;
+ ctl->val_notify_num = 2;
+ ctl->group = group;
+ ctl->len = msg->len - sizeof(*ctl);
+
+ req = (struct cn_notify_req *)(ctl + 1);
+
+ /*
+ * Idx.
+ */
+ req->first = cn_test_id.idx;
+ req->range = 10;
+
+ /*
+ * Val 0.
+ */
+ req++;
+ req->first = cn_test_id.val;
+ req->range = 10;
+
+ /*
+ * Val 1.
+ */
+ req++;
+ req->first = cn_test_id.val + 20;
+ req->range = 10;
+
+ NETLINK_CB(skb).dst_group = ctl->group;
+ //netlink_broadcast(nls, skb, 0, ctl->group, GFP_ATOMIC);
+ netlink_unicast(nls, skb, 0, 0);
+
+ pr_info("request was sent: group=0x%x\n", ctl->group);
+
+ return 0;
+}
+#endif
+
+static u32 cn_test_timer_counter;
+static void cn_test_timer_func(unsigned long __data)
+{
+ struct cn_msg *m;
+ char data[32];
+
+ pr_debug("%s: timer fired with data %lu\n", __func__, __data);
+
+ m = kzalloc(sizeof(*m) + sizeof(data), GFP_ATOMIC);
+ if (m) {
+
+ memcpy(&m->id, &cn_test_id, sizeof(m->id));
+ m->seq = cn_test_timer_counter;
+ m->len = sizeof(data);
+
+ m->len =
+ scnprintf(data, sizeof(data), "counter = %u",
+ cn_test_timer_counter) + 1;
+
+ memcpy(m + 1, data, m->len);
+
+ cn_netlink_send(m, 0, 0, GFP_ATOMIC);
+ kfree(m);
+ }
+
+ cn_test_timer_counter++;
+
+ mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000));
+}
+
+static int cn_test_init(void)
+{
+ int err;
+
+ err = cn_add_callback(&cn_test_id, cn_test_name, cn_test_callback);
+ if (err)
+ goto err_out;
+ cn_test_id.val++;
+ err = cn_add_callback(&cn_test_id, cn_test_name, cn_test_callback);
+ if (err) {
+ cn_del_callback(&cn_test_id);
+ goto err_out;
+ }
+
+ setup_timer(&cn_test_timer, cn_test_timer_func, 0);
+ mod_timer(&cn_test_timer, jiffies + msecs_to_jiffies(1000));
+
+ pr_info("initialized with id={%u.%u}\n",
+ cn_test_id.idx, cn_test_id.val);
+
+ return 0;
+
+ err_out:
+ if (nls && nls->sk_socket)
+ sock_release(nls->sk_socket);
+
+ return err;
+}
+
+static void cn_test_fini(void)
+{
+ del_timer_sync(&cn_test_timer);
+ cn_del_callback(&cn_test_id);
+ cn_test_id.val--;
+ cn_del_callback(&cn_test_id);
+ if (nls && nls->sk_socket)
+ sock_release(nls->sk_socket);
+}
+
+module_init(cn_test_init);
+module_exit(cn_test_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+MODULE_DESCRIPTION("Connector's test module");
diff --git a/Documentation/connector/connector.txt b/Documentation/connector/connector.txt
new file mode 100644
index 000000000..f6215f951
--- /dev/null
+++ b/Documentation/connector/connector.txt
@@ -0,0 +1,188 @@
+/*****************************************/
+Kernel Connector.
+/*****************************************/
+
+Kernel connector - new netlink based userspace <-> kernel space easy
+to use communication module.
+
+The Connector driver makes it easy to connect various agents using a
+netlink based network. One must register a callback and an identifier.
+When the driver receives a special netlink message with the appropriate
+identifier, the appropriate callback will be called.
+
+From the userspace point of view it's quite straightforward:
+
+ socket();
+ bind();
+ send();
+ recv();
+
+But if kernelspace wants to use the full power of such connections, the
+driver writer must create special sockets, must know about struct sk_buff
+handling, etc... The Connector driver allows any kernelspace agents to use
+netlink based networking for inter-process communication in a significantly
+easier way:
+
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
+void cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __group, int gfp_mask);
+void cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __group, int gfp_mask);
+
+struct cb_id
+{
+ __u32 idx;
+ __u32 val;
+};
+
+idx and val are unique identifiers which must be registered in the
+connector.h header for in-kernel usage. void (*callback) (void *) is a
+callback function which will be called when a message with above idx.val
+is received by the connector core. The argument for that function must
+be dereferenced to struct cn_msg *.
+
+struct cn_msg
+{
+ struct cb_id id;
+
+ __u32 seq;
+ __u32 ack;
+
+ __u32 len; /* Length of the following data */
+ __u8 data[0];
+};
+
+/*****************************************/
+Connector interfaces.
+/*****************************************/
+
+int cn_add_callback(struct cb_id *id, char *name, void (*callback) (struct cn_msg *, struct netlink_skb_parms *));
+
+ Registers new callback with connector core.
+
+ struct cb_id *id - unique connector's user identifier.
+ It must be registered in connector.h for legal in-kernel users.
+ char *name - connector's callback symbolic name.
+ void (*callback) (struct cn..) - connector's callback.
+ cn_msg and the sender's credentials
+
+
+void cn_del_callback(struct cb_id *id);
+
+ Unregisters new callback with connector core.
+
+ struct cb_id *id - unique connector's user identifier.
+
+
+int cn_netlink_send_multi(struct cn_msg *msg, u16 len, u32 portid, u32 __groups, int gfp_mask);
+int cn_netlink_send(struct cn_msg *msg, u32 portid, u32 __groups, int gfp_mask);
+
+ Sends message to the specified groups. It can be safely called from
+ softirq context, but may silently fail under strong memory pressure.
+ If there are no listeners for given group -ESRCH can be returned.
+
+ struct cn_msg * - message header(with attached data).
+ u16 len - for *_multi multiple cn_msg messages can be sent
+ u32 port - destination port.
+ If non-zero the message will be sent to the
+ given port, which should be set to the
+ original sender.
+ u32 __group - destination group.
+ If port and __group is zero, then appropriate group will
+ be searched through all registered connector users,
+ and message will be delivered to the group which was
+ created for user with the same ID as in msg.
+ If __group is not zero, then message will be delivered
+ to the specified group.
+ int gfp_mask - GFP mask.
+
+ Note: When registering new callback user, connector core assigns
+ netlink group to the user which is equal to its id.idx.
+
+/*****************************************/
+Protocol description.
+/*****************************************/
+
+The current framework offers a transport layer with fixed headers. The
+recommended protocol which uses such a header is as following:
+
+msg->seq and msg->ack are used to determine message genealogy. When
+someone sends a message, they use a locally unique sequence and random
+acknowledge number. The sequence number may be copied into
+nlmsghdr->nlmsg_seq too.
+
+The sequence number is incremented with each message sent.
+
+If you expect a reply to the message, then the sequence number in the
+received message MUST be the same as in the original message, and the
+acknowledge number MUST be the same + 1.
+
+If we receive a message and its sequence number is not equal to one we
+are expecting, then it is a new message. If we receive a message and
+its sequence number is the same as one we are expecting, but its
+acknowledge is not equal to the sequence number in the original
+message + 1, then it is a new message.
+
+Obviously, the protocol header contains the above id.
+
+The connector allows event notification in the following form: kernel
+driver or userspace process can ask connector to notify it when
+selected ids will be turned on or off (registered or unregistered its
+callback). It is done by sending a special command to the connector
+driver (it also registers itself with id={-1, -1}).
+
+As example of this usage can be found in the cn_test.c module which
+uses the connector to request notification and to send messages.
+
+/*****************************************/
+Reliability.
+/*****************************************/
+
+Netlink itself is not a reliable protocol. That means that messages can
+be lost due to memory pressure or process' receiving queue overflowed,
+so caller is warned that it must be prepared. That is why the struct
+cn_msg [main connector's message header] contains u32 seq and u32 ack
+fields.
+
+/*****************************************/
+Userspace usage.
+/*****************************************/
+
+2.6.14 has a new netlink socket implementation, which by default does not
+allow people to send data to netlink groups other than 1.
+So, if you wish to use a netlink socket (for example using connector)
+with a different group number, the userspace application must subscribe to
+that group first. It can be achieved by the following pseudocode:
+
+s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+l_local.nl_family = AF_NETLINK;
+l_local.nl_groups = 12345;
+l_local.nl_pid = 0;
+
+if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+ perror("bind");
+ close(s);
+ return -1;
+}
+
+{
+ int on = l_local.nl_groups;
+ setsockopt(s, 270, 1, &on, sizeof(on));
+}
+
+Where 270 above is SOL_NETLINK, and 1 is a NETLINK_ADD_MEMBERSHIP socket
+option. To drop a multicast subscription, one should call the above socket
+option with the NETLINK_DROP_MEMBERSHIP parameter which is defined as 0.
+
+2.6.14 netlink code only allows to select a group which is less or equal to
+the maximum group number, which is used at netlink_kernel_create() time.
+In case of connector it is CN_NETLINK_USERS + 0xf, so if you want to use
+group number 12345, you must increment CN_NETLINK_USERS to that number.
+Additional 0xf numbers are allocated to be used by non-in-kernel users.
+
+Due to this limitation, group 0xffffffff does not work now, so one can
+not use add/remove connector's group notifications, but as far as I know,
+only cn_test.c test module used it.
+
+Some work in netlink area is still being done, so things can be changed in
+2.6.15 timeframe, if it will happen, documentation will be updated for that
+kernel.
diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c
new file mode 100644
index 000000000..8a4da64e0
--- /dev/null
+++ b/Documentation/connector/ucon.c
@@ -0,0 +1,250 @@
+/*
+ * ucon.c
+ *
+ * Copyright (c) 2004+ Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <asm/types.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+#include <arpa/inet.h>
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <getopt.h>
+
+#include <linux/connector.h>
+
+#define DEBUG
+#define NETLINK_CONNECTOR 11
+
+/* Hopefully your userspace connector.h matches this kernel */
+#define CN_TEST_IDX CN_NETLINK_USERS + 3
+#define CN_TEST_VAL 0x456
+
+#ifdef DEBUG
+#define ulog(f, a...) fprintf(stdout, f, ##a)
+#else
+#define ulog(f, a...) do {} while (0)
+#endif
+
+static int need_exit;
+static __u32 seq;
+
+static int netlink_send(int s, struct cn_msg *msg)
+{
+ struct nlmsghdr *nlh;
+ unsigned int size;
+ int err;
+ char buf[128];
+ struct cn_msg *m;
+
+ size = NLMSG_SPACE(sizeof(struct cn_msg) + msg->len);
+
+ nlh = (struct nlmsghdr *)buf;
+ nlh->nlmsg_seq = seq++;
+ nlh->nlmsg_pid = getpid();
+ nlh->nlmsg_type = NLMSG_DONE;
+ nlh->nlmsg_len = size;
+ nlh->nlmsg_flags = 0;
+
+ m = NLMSG_DATA(nlh);
+#if 0
+ ulog("%s: [%08x.%08x] len=%u, seq=%u, ack=%u.\n",
+ __func__, msg->id.idx, msg->id.val, msg->len, msg->seq, msg->ack);
+#endif
+ memcpy(m, msg, sizeof(*m) + msg->len);
+
+ err = send(s, nlh, size, 0);
+ if (err == -1)
+ ulog("Failed to send: %s [%d].\n",
+ strerror(errno), errno);
+
+ return err;
+}
+
+static void usage(void)
+{
+ printf(
+ "Usage: ucon [options] [output file]\n"
+ "\n"
+ "\t-h\tthis help screen\n"
+ "\t-s\tsend buffers to the test module\n"
+ "\n"
+ "The default behavior of ucon is to subscribe to the test module\n"
+ "and wait for state messages. Any ones received are dumped to the\n"
+ "specified output file (or stdout). The test module is assumed to\n"
+ "have an id of {%u.%u}\n"
+ "\n"
+ "If you get no output, then verify the cn_test module id matches\n"
+ "the expected id above.\n"
+ , CN_TEST_IDX, CN_TEST_VAL
+ );
+}
+
+int main(int argc, char *argv[])
+{
+ int s;
+ char buf[1024];
+ int len;
+ struct nlmsghdr *reply;
+ struct sockaddr_nl l_local;
+ struct cn_msg *data;
+ FILE *out;
+ time_t tm;
+ struct pollfd pfd;
+ bool send_msgs = false;
+
+ while ((s = getopt(argc, argv, "hs")) != -1) {
+ switch (s) {
+ case 's':
+ send_msgs = true;
+ break;
+
+ case 'h':
+ usage();
+ return 0;
+
+ default:
+ /* getopt() outputs an error for us */
+ usage();
+ return 1;
+ }
+ }
+
+ if (argc != optind) {
+ out = fopen(argv[optind], "a+");
+ if (!out) {
+ ulog("Unable to open %s for writing: %s\n",
+ argv[1], strerror(errno));
+ out = stdout;
+ }
+ } else
+ out = stdout;
+
+ memset(buf, 0, sizeof(buf));
+
+ s = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ if (s == -1) {
+ perror("socket");
+ return -1;
+ }
+
+ l_local.nl_family = AF_NETLINK;
+ l_local.nl_groups = -1; /* bitmask of requested groups */
+ l_local.nl_pid = 0;
+
+ ulog("subscribing to %u.%u\n", CN_TEST_IDX, CN_TEST_VAL);
+
+ if (bind(s, (struct sockaddr *)&l_local, sizeof(struct sockaddr_nl)) == -1) {
+ perror("bind");
+ close(s);
+ return -1;
+ }
+
+#if 0
+ {
+ int on = 0x57; /* Additional group number */
+ setsockopt(s, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &on, sizeof(on));
+ }
+#endif
+ if (send_msgs) {
+ int i, j;
+
+ memset(buf, 0, sizeof(buf));
+
+ data = (struct cn_msg *)buf;
+
+ data->id.idx = CN_TEST_IDX;
+ data->id.val = CN_TEST_VAL;
+ data->seq = seq++;
+ data->ack = 0;
+ data->len = 0;
+
+ for (j=0; j<10; ++j) {
+ for (i=0; i<1000; ++i) {
+ len = netlink_send(s, data);
+ }
+
+ ulog("%d messages have been sent to %08x.%08x.\n", i, data->id.idx, data->id.val);
+ }
+
+ return 0;
+ }
+
+
+ pfd.fd = s;
+
+ while (!need_exit) {
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ switch (poll(&pfd, 1, -1)) {
+ case 0:
+ need_exit = 1;
+ break;
+ case -1:
+ if (errno != EINTR) {
+ need_exit = 1;
+ break;
+ }
+ continue;
+ }
+ if (need_exit)
+ break;
+
+ memset(buf, 0, sizeof(buf));
+ len = recv(s, buf, sizeof(buf), 0);
+ if (len == -1) {
+ perror("recv buf");
+ close(s);
+ return -1;
+ }
+ reply = (struct nlmsghdr *)buf;
+
+ switch (reply->nlmsg_type) {
+ case NLMSG_ERROR:
+ fprintf(out, "Error message received.\n");
+ fflush(out);
+ break;
+ case NLMSG_DONE:
+ data = (struct cn_msg *)NLMSG_DATA(reply);
+
+ time(&tm);
+ fprintf(out, "%.24s : [%x.%x] [%08u.%08u].\n",
+ ctime(&tm), data->id.idx, data->id.val, data->seq, data->ack);
+ fflush(out);
+ break;
+ default:
+ break;
+ }
+ }
+
+ close(s);
+ return 0;
+}
diff --git a/Documentation/console/console.txt b/Documentation/console/console.txt
new file mode 100644
index 000000000..f93810d59
--- /dev/null
+++ b/Documentation/console/console.txt
@@ -0,0 +1,144 @@
+Console Drivers
+===============
+
+The linux kernel has 2 general types of console drivers. The first type is
+assigned by the kernel to all the virtual consoles during the boot process.
+This type will be called 'system driver', and only one system driver is allowed
+to exist. The system driver is persistent and it can never be unloaded, though
+it may become inactive.
+
+The second type has to be explicitly loaded and unloaded. This will be called
+'modular driver' by this document. Multiple modular drivers can coexist at
+any time with each driver sharing the console with other drivers including
+the system driver. However, modular drivers cannot take over the console
+that is currently occupied by another modular driver. (Exception: Drivers that
+call do_take_over_console() will succeed in the takeover regardless of the type
+of driver occupying the consoles.) They can only take over the console that is
+occupied by the system driver. In the same token, if the modular driver is
+released by the console, the system driver will take over.
+
+Modular drivers, from the programmer's point of view, has to call:
+
+ do_take_over_console() - load and bind driver to console layer
+ give_up_console() - unload driver, it will only work if driver is fully unbond
+
+In newer kernels, the following are also available:
+
+ do_register_con_driver()
+ do_unregister_con_driver()
+
+If sysfs is enabled, the contents of /sys/class/vtconsole can be
+examined. This shows the console backends currently registered by the
+system which are named vtcon<n> where <n> is an integer from 0 to 15. Thus:
+
+ ls /sys/class/vtconsole
+ . .. vtcon0 vtcon1
+
+Each directory in /sys/class/vtconsole has 3 files:
+
+ ls /sys/class/vtconsole/vtcon0
+ . .. bind name uevent
+
+What do these files signify?
+
+ 1. bind - this is a read/write file. It shows the status of the driver if
+ read, or acts to bind or unbind the driver to the virtual consoles
+ when written to. The possible values are:
+
+ 0 - means the driver is not bound and if echo'ed, commands the driver
+ to unbind
+
+ 1 - means the driver is bound and if echo'ed, commands the driver to
+ bind
+
+ 2. name - read-only file. Shows the name of the driver in this format:
+
+ cat /sys/class/vtconsole/vtcon0/name
+ (S) VGA+
+
+ '(S)' stands for a (S)ystem driver, ie, it cannot be directly
+ commanded to bind or unbind
+
+ 'VGA+' is the name of the driver
+
+ cat /sys/class/vtconsole/vtcon1/name
+ (M) frame buffer device
+
+ In this case, '(M)' stands for a (M)odular driver, one that can be
+ directly commanded to bind or unbind.
+
+ 3. uevent - ignore this file
+
+When unbinding, the modular driver is detached first, and then the system
+driver takes over the consoles vacated by the driver. Binding, on the other
+hand, will bind the driver to the consoles that are currently occupied by a
+system driver.
+
+NOTE1: Binding and unbinding must be selected in Kconfig. It's under:
+
+Device Drivers -> Character devices -> Support for binding and unbinding
+console drivers
+
+NOTE2: If any of the virtual consoles are in KD_GRAPHICS mode, then binding or
+unbinding will not succeed. An example of an application that sets the console
+to KD_GRAPHICS is X.
+
+How useful is this feature? This is very useful for console driver
+developers. By unbinding the driver from the console layer, one can unload the
+driver, make changes, recompile, reload and rebind the driver without any need
+for rebooting the kernel. For regular users who may want to switch from
+framebuffer console to VGA console and vice versa, this feature also makes
+this possible. (NOTE NOTE NOTE: Please read fbcon.txt under Documentation/fb
+for more details).
+
+Notes for developers:
+=====================
+
+do_take_over_console() is now broken up into:
+
+ do_register_con_driver()
+ do_bind_con_driver() - private function
+
+give_up_console() is a wrapper to do_unregister_con_driver(), and a driver must
+be fully unbound for this call to succeed. con_is_bound() will check if the
+driver is bound or not.
+
+Guidelines for console driver writers:
+=====================================
+
+In order for binding to and unbinding from the console to properly work,
+console drivers must follow these guidelines:
+
+1. All drivers, except system drivers, must call either do_register_con_driver()
+ or do_take_over_console(). do_register_con_driver() will just add the driver to
+ the console's internal list. It won't take over the
+ console. do_take_over_console(), as it name implies, will also take over (or
+ bind to) the console.
+
+2. All resources allocated during con->con_init() must be released in
+ con->con_deinit().
+
+3. All resources allocated in con->con_startup() must be released when the
+ driver, which was previously bound, becomes unbound. The console layer
+ does not have a complementary call to con->con_startup() so it's up to the
+ driver to check when it's legal to release these resources. Calling
+ con_is_bound() in con->con_deinit() will help. If the call returned
+ false(), then it's safe to release the resources. This balance has to be
+ ensured because con->con_startup() can be called again when a request to
+ rebind the driver to the console arrives.
+
+4. Upon exit of the driver, ensure that the driver is totally unbound. If the
+ condition is satisfied, then the driver must call do_unregister_con_driver()
+ or give_up_console().
+
+5. do_unregister_con_driver() can also be called on conditions which make it
+ impossible for the driver to service console requests. This can happen
+ with the framebuffer console that suddenly lost all of its drivers.
+
+The current crop of console drivers should still work correctly, but binding
+and unbinding them may cause problems. With minimal fixes, these drivers can
+be made to work correctly.
+
+==========================
+Antonino Daplas <adaplas@pol.net>
+
diff --git a/Documentation/cpu-freq/amd-powernow.txt b/Documentation/cpu-freq/amd-powernow.txt
new file mode 100644
index 000000000..254da155f
--- /dev/null
+++ b/Documentation/cpu-freq/amd-powernow.txt
@@ -0,0 +1,38 @@
+
+PowerNow! and Cool'n'Quiet are AMD names for frequency
+management capabilities in AMD processors. As the hardware
+implementation changes in new generations of the processors,
+there is a different cpu-freq driver for each generation.
+
+Note that the driver's will not load on the "wrong" hardware,
+so it is safe to try each driver in turn when in doubt as to
+which is the correct driver.
+
+Note that the functionality to change frequency (and voltage)
+is not available in all processors. The drivers will refuse
+to load on processors without this capability. The capability
+is detected with the cpuid instruction.
+
+The drivers use BIOS supplied tables to obtain frequency and
+voltage information appropriate for a particular platform.
+Frequency transitions will be unavailable if the BIOS does
+not supply these tables.
+
+6th Generation: powernow-k6
+
+7th Generation: powernow-k7: Athlon, Duron, Geode.
+
+8th Generation: powernow-k8: Athlon, Athlon 64, Opteron, Sempron.
+Documentation on this functionality in 8th generation processors
+is available in the "BIOS and Kernel Developer's Guide", publication
+26094, in chapter 9, available for download from www.amd.com.
+
+BIOS supplied data, for powernow-k7 and for powernow-k8, may be
+from either the PSB table or from ACPI objects. The ACPI support
+is only available if the kernel config sets CONFIG_ACPI_PROCESSOR.
+The powernow-k8 driver will attempt to use ACPI if so configured,
+and fall back to PST if that fails.
+The powernow-k7 driver will try to use the PSB support first, and
+fall back to ACPI if the PSB support fails. A module parameter,
+acpi_force, is provided to force ACPI support to be used instead
+of PSB support.
diff --git a/Documentation/cpu-freq/boost.txt b/Documentation/cpu-freq/boost.txt
new file mode 100644
index 000000000..dd62e1334
--- /dev/null
+++ b/Documentation/cpu-freq/boost.txt
@@ -0,0 +1,93 @@
+Processor boosting control
+
+ - information for users -
+
+Quick guide for the impatient:
+--------------------
+/sys/devices/system/cpu/cpufreq/boost
+controls the boost setting for the whole system. You can read and write
+that file with either "0" (boosting disabled) or "1" (boosting allowed).
+Reading or writing 1 does not mean that the system is boosting at this
+very moment, but only that the CPU _may_ raise the frequency at it's
+discretion.
+--------------------
+
+Introduction
+-------------
+Some CPUs support a functionality to raise the operating frequency of
+some cores in a multi-core package if certain conditions apply, mostly
+if the whole chip is not fully utilized and below it's intended thermal
+budget. The decision about boost disable/enable is made either at hardware
+(e.g. x86) or software (e.g ARM).
+On Intel CPUs this is called "Turbo Boost", AMD calls it "Turbo-Core",
+in technical documentation "Core performance boost". In Linux we use
+the term "boost" for convenience.
+
+Rationale for disable switch
+----------------------------
+
+Though the idea is to just give better performance without any user
+intervention, sometimes the need arises to disable this functionality.
+Most systems offer a switch in the (BIOS) firmware to disable the
+functionality at all, but a more fine-grained and dynamic control would
+be desirable:
+1. While running benchmarks, reproducible results are important. Since
+ the boosting functionality depends on the load of the whole package,
+ single thread performance can vary. By explicitly disabling the boost
+ functionality at least for the benchmark's run-time the system will run
+ at a fixed frequency and results are reproducible again.
+2. To examine the impact of the boosting functionality it is helpful
+ to do tests with and without boosting.
+3. Boosting means overclocking the processor, though under controlled
+ conditions. By raising the frequency and the voltage the processor
+ will consume more power than without the boosting, which may be
+ undesirable for instance for mobile users. Disabling boosting may
+ save power here, though this depends on the workload.
+
+
+User controlled switch
+----------------------
+
+To allow the user to toggle the boosting functionality, the cpufreq core
+driver exports a sysfs knob to enable or disable it. There is a file:
+/sys/devices/system/cpu/cpufreq/boost
+which can either read "0" (boosting disabled) or "1" (boosting enabled).
+The file is exported only when cpufreq driver supports boosting.
+Explicitly changing the permissions and writing to that file anyway will
+return EINVAL.
+
+On supported CPUs one can write either a "0" or a "1" into this file.
+This will either disable the boost functionality on all cores in the
+whole system (0) or will allow the software or hardware to boost at will
+(1).
+
+Writing a "1" does not explicitly boost the system, but just allows the
+CPU to boost at their discretion. Some implementations take external
+factors like the chip's temperature into account, so boosting once does
+not necessarily mean that it will occur every time even using the exact
+same software setup.
+
+
+AMD legacy cpb switch
+---------------------
+The AMD powernow-k8 driver used to support a very similar switch to
+disable or enable the "Core Performance Boost" feature of some AMD CPUs.
+This switch was instantiated in each CPU's cpufreq directory
+(/sys/devices/system/cpu[0-9]*/cpufreq) and was called "cpb".
+Though the per CPU existence hints at a more fine grained control, the
+actual implementation only supported a system-global switch semantics,
+which was simply reflected into each CPU's file. Writing a 0 or 1 into it
+would pull the other CPUs to the same state.
+For compatibility reasons this file and its behavior is still supported
+on AMD CPUs, though it is now protected by a config switch
+(X86_ACPI_CPUFREQ_CPB). On Intel CPUs this file will never be created,
+even with the config option set.
+This functionality is considered legacy and will be removed in some future
+kernel version.
+
+More fine grained boosting control
+----------------------------------
+
+Technically it is possible to switch the boosting functionality at least
+on a per package basis, for some CPUs even per core. Currently the driver
+does not support it, but this may be implemented in the future.
diff --git a/Documentation/cpu-freq/core.txt b/Documentation/cpu-freq/core.txt
new file mode 100644
index 000000000..70933eadc
--- /dev/null
+++ b/Documentation/cpu-freq/core.txt
@@ -0,0 +1,123 @@
+ CPU frequency and voltage scaling code in the Linux(TM) kernel
+
+
+ L i n u x C P U F r e q
+
+ C P U F r e q C o r e
+
+
+ Dominik Brodowski <linux@brodo.de>
+ David Kimdon <dwhedon@debian.org>
+
+
+
+ Clock scaling allows you to change the clock speed of the CPUs on the
+ fly. This is a nice method to save battery power, because the lower
+ the clock speed, the less power the CPU consumes.
+
+
+Contents:
+---------
+1. CPUFreq core and interfaces
+2. CPUFreq notifiers
+3. CPUFreq Table Generation with Operating Performance Point (OPP)
+
+1. General Information
+=======================
+
+The CPUFreq core code is located in drivers/cpufreq/cpufreq.c. This
+cpufreq code offers a standardized interface for the CPUFreq
+architecture drivers (those pieces of code that do actual
+frequency transitions), as well as to "notifiers". These are device
+drivers or other part of the kernel that need to be informed of
+policy changes (ex. thermal modules like ACPI) or of all
+frequency changes (ex. timing code) or even need to force certain
+speed limits (like LCD drivers on ARM architecture). Additionally, the
+kernel "constant" loops_per_jiffy is updated on frequency changes
+here.
+
+Reference counting is done by cpufreq_get_cpu and cpufreq_put_cpu,
+which make sure that the cpufreq processor driver is correctly
+registered with the core, and will not be unloaded until
+cpufreq_put_cpu is called.
+
+2. CPUFreq notifiers
+====================
+
+CPUFreq notifiers conform to the standard kernel notifier interface.
+See linux/include/linux/notifier.h for details on notifiers.
+
+There are two different CPUFreq notifiers - policy notifiers and
+transition notifiers.
+
+
+2.1 CPUFreq policy notifiers
+----------------------------
+
+These are notified when a new policy is intended to be set. Each
+CPUFreq policy notifier is called three times for a policy transition:
+
+1.) During CPUFREQ_ADJUST all CPUFreq notifiers may change the limit if
+ they see a need for this - may it be thermal considerations or
+ hardware limitations.
+
+2.) During CPUFREQ_INCOMPATIBLE only changes may be done in order to avoid
+ hardware failure.
+
+3.) And during CPUFREQ_NOTIFY all notifiers are informed of the new policy
+ - if two hardware drivers failed to agree on a new policy before this
+ stage, the incompatible hardware shall be shut down, and the user
+ informed of this.
+
+The phase is specified in the second argument to the notifier.
+
+The third argument, a void *pointer, points to a struct cpufreq_policy
+consisting of five values: cpu, min, max, policy and max_cpu_freq. min
+and max are the lower and upper frequencies (in kHz) of the new
+policy, policy the new policy, cpu the number of the affected CPU; and
+max_cpu_freq the maximum supported CPU frequency. This value is given
+for informational purposes only.
+
+
+2.2 CPUFreq transition notifiers
+--------------------------------
+
+These are notified twice when the CPUfreq driver switches the CPU core
+frequency and this change has any external implications.
+
+The second argument specifies the phase - CPUFREQ_PRECHANGE or
+CPUFREQ_POSTCHANGE.
+
+The third argument is a struct cpufreq_freqs with the following
+values:
+cpu - number of the affected CPU
+old - old frequency
+new - new frequency
+
+3. CPUFreq Table Generation with Operating Performance Point (OPP)
+==================================================================
+For details about OPP, see Documentation/power/opp.txt
+
+dev_pm_opp_init_cpufreq_table - cpufreq framework typically is initialized with
+ cpufreq_frequency_table_cpuinfo which is provided with the list of
+ frequencies that are available for operation. This function provides
+ a ready to use conversion routine to translate the OPP layer's internal
+ information about the available frequencies into a format readily
+ providable to cpufreq.
+
+ WARNING: Do not use this function in interrupt context.
+
+ Example:
+ soc_pm_init()
+ {
+ /* Do things */
+ r = dev_pm_opp_init_cpufreq_table(dev, &freq_table);
+ if (!r)
+ cpufreq_frequency_table_cpuinfo(policy, freq_table);
+ /* Do other things */
+ }
+
+ NOTE: This function is available only if CONFIG_CPU_FREQ is enabled in
+ addition to CONFIG_PM_OPP.
+
+dev_pm_opp_free_cpufreq_table - Free up the table allocated by dev_pm_opp_init_cpufreq_table
diff --git a/Documentation/cpu-freq/cpu-drivers.txt b/Documentation/cpu-freq/cpu-drivers.txt
new file mode 100644
index 000000000..14f4e6336
--- /dev/null
+++ b/Documentation/cpu-freq/cpu-drivers.txt
@@ -0,0 +1,274 @@
+ CPU frequency and voltage scaling code in the Linux(TM) kernel
+
+
+ L i n u x C P U F r e q
+
+ C P U D r i v e r s
+
+ - information for developers -
+
+
+ Dominik Brodowski <linux@brodo.de>
+
+
+
+ Clock scaling allows you to change the clock speed of the CPUs on the
+ fly. This is a nice method to save battery power, because the lower
+ the clock speed, the less power the CPU consumes.
+
+
+Contents:
+---------
+1. What To Do?
+1.1 Initialization
+1.2 Per-CPU Initialization
+1.3 verify
+1.4 target/target_index or setpolicy?
+1.5 target/target_index
+1.6 setpolicy
+1.7 get_intermediate and target_intermediate
+2. Frequency Table Helpers
+
+
+
+1. What To Do?
+==============
+
+So, you just got a brand-new CPU / chipset with datasheets and want to
+add cpufreq support for this CPU / chipset? Great. Here are some hints
+on what is necessary:
+
+
+1.1 Initialization
+------------------
+
+First of all, in an __initcall level 7 (module_init()) or later
+function check whether this kernel runs on the right CPU and the right
+chipset. If so, register a struct cpufreq_driver with the CPUfreq core
+using cpufreq_register_driver()
+
+What shall this struct cpufreq_driver contain?
+
+cpufreq_driver.name - The name of this driver.
+
+cpufreq_driver.init - A pointer to the per-CPU initialization
+ function.
+
+cpufreq_driver.verify - A pointer to a "verification" function.
+
+cpufreq_driver.setpolicy _or_
+cpufreq_driver.target/
+target_index - See below on the differences.
+
+And optionally
+
+cpufreq_driver.exit - A pointer to a per-CPU cleanup
+ function called during CPU_POST_DEAD
+ phase of cpu hotplug process.
+
+cpufreq_driver.stop_cpu - A pointer to a per-CPU stop function
+ called during CPU_DOWN_PREPARE phase of
+ cpu hotplug process.
+
+cpufreq_driver.resume - A pointer to a per-CPU resume function
+ which is called with interrupts disabled
+ and _before_ the pre-suspend frequency
+ and/or policy is restored by a call to
+ ->target/target_index or ->setpolicy.
+
+cpufreq_driver.attr - A pointer to a NULL-terminated list of
+ "struct freq_attr" which allow to
+ export values to sysfs.
+
+cpufreq_driver.get_intermediate
+and target_intermediate Used to switch to stable frequency while
+ changing CPU frequency.
+
+
+1.2 Per-CPU Initialization
+--------------------------
+
+Whenever a new CPU is registered with the device model, or after the
+cpufreq driver registers itself, the per-CPU initialization function
+cpufreq_driver.init is called. It takes a struct cpufreq_policy
+*policy as argument. What to do now?
+
+If necessary, activate the CPUfreq support on your CPU.
+
+Then, the driver must fill in the following values:
+
+policy->cpuinfo.min_freq _and_
+policy->cpuinfo.max_freq - the minimum and maximum frequency
+ (in kHz) which is supported by
+ this CPU
+policy->cpuinfo.transition_latency the time it takes on this CPU to
+ switch between two frequencies in
+ nanoseconds (if appropriate, else
+ specify CPUFREQ_ETERNAL)
+
+policy->cur The current operating frequency of
+ this CPU (if appropriate)
+policy->min,
+policy->max,
+policy->policy and, if necessary,
+policy->governor must contain the "default policy" for
+ this CPU. A few moments later,
+ cpufreq_driver.verify and either
+ cpufreq_driver.setpolicy or
+ cpufreq_driver.target/target_index is called
+ with these values.
+
+For setting some of these values (cpuinfo.min[max]_freq, policy->min[max]), the
+frequency table helpers might be helpful. See the section 2 for more information
+on them.
+
+SMP systems normally have same clock source for a group of cpus. For these the
+.init() would be called only once for the first online cpu. Here the .init()
+routine must initialize policy->cpus with mask of all possible cpus (Online +
+Offline) that share the clock. Then the core would copy this mask onto
+policy->related_cpus and will reset policy->cpus to carry only online cpus.
+
+
+1.3 verify
+------------
+
+When the user decides a new policy (consisting of
+"policy,governor,min,max") shall be set, this policy must be validated
+so that incompatible values can be corrected. For verifying these
+values, a frequency table helper and/or the
+cpufreq_verify_within_limits(struct cpufreq_policy *policy, unsigned
+int min_freq, unsigned int max_freq) function might be helpful. See
+section 2 for details on frequency table helpers.
+
+You need to make sure that at least one valid frequency (or operating
+range) is within policy->min and policy->max. If necessary, increase
+policy->max first, and only if this is no solution, decrease policy->min.
+
+
+1.4 target/target_index or setpolicy?
+----------------------------
+
+Most cpufreq drivers or even most cpu frequency scaling algorithms
+only allow the CPU to be set to one frequency. For these, you use the
+->target/target_index call.
+
+Some cpufreq-capable processors switch the frequency between certain
+limits on their own. These shall use the ->setpolicy call
+
+
+1.5. target/target_index
+-------------
+
+The target_index call has two arguments: struct cpufreq_policy *policy,
+and unsigned int index (into the exposed frequency table).
+
+The CPUfreq driver must set the new frequency when called here. The
+actual frequency must be determined by freq_table[index].frequency.
+
+It should always restore to earlier frequency (i.e. policy->restore_freq) in
+case of errors, even if we switched to intermediate frequency earlier.
+
+Deprecated:
+----------
+The target call has three arguments: struct cpufreq_policy *policy,
+unsigned int target_frequency, unsigned int relation.
+
+The CPUfreq driver must set the new frequency when called here. The
+actual frequency must be determined using the following rules:
+
+- keep close to "target_freq"
+- policy->min <= new_freq <= policy->max (THIS MUST BE VALID!!!)
+- if relation==CPUFREQ_REL_L, try to select a new_freq higher than or equal
+ target_freq. ("L for lowest, but no lower than")
+- if relation==CPUFREQ_REL_H, try to select a new_freq lower than or equal
+ target_freq. ("H for highest, but no higher than")
+
+Here again the frequency table helper might assist you - see section 2
+for details.
+
+
+1.6 setpolicy
+---------------
+
+The setpolicy call only takes a struct cpufreq_policy *policy as
+argument. You need to set the lower limit of the in-processor or
+in-chipset dynamic frequency switching to policy->min, the upper limit
+to policy->max, and -if supported- select a performance-oriented
+setting when policy->policy is CPUFREQ_POLICY_PERFORMANCE, and a
+powersaving-oriented setting when CPUFREQ_POLICY_POWERSAVE. Also check
+the reference implementation in drivers/cpufreq/longrun.c
+
+1.7 get_intermediate and target_intermediate
+--------------------------------------------
+
+Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION unset.
+
+get_intermediate should return a stable intermediate frequency platform wants to
+switch to, and target_intermediate() should set CPU to to that frequency, before
+jumping to the frequency corresponding to 'index'. Core will take care of
+sending notifications and driver doesn't have to handle them in
+target_intermediate() or target_index().
+
+Drivers can return '0' from get_intermediate() in case they don't wish to switch
+to intermediate frequency for some target frequency. In that case core will
+directly call ->target_index().
+
+NOTE: ->target_index() should restore to policy->restore_freq in case of
+failures as core would send notifications for that.
+
+
+2. Frequency Table Helpers
+==========================
+
+As most cpufreq processors only allow for being set to a few specific
+frequencies, a "frequency table" with some functions might assist in
+some work of the processor driver. Such a "frequency table" consists
+of an array of struct cpufreq_frequency_table entries, with any value in
+"driver_data" you want to use, and the corresponding frequency in
+"frequency". At the end of the table, you need to add a
+cpufreq_frequency_table entry with frequency set to CPUFREQ_TABLE_END. And
+if you want to skip one entry in the table, set the frequency to
+CPUFREQ_ENTRY_INVALID. The entries don't need to be in ascending
+order.
+
+By calling cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table);
+the cpuinfo.min_freq and cpuinfo.max_freq values are detected, and
+policy->min and policy->max are set to the same values. This is
+helpful for the per-CPU initialization stage.
+
+int cpufreq_frequency_table_verify(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table);
+assures that at least one valid frequency is within policy->min and
+policy->max, and all other criteria are met. This is helpful for the
+->verify call.
+
+int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
+ struct cpufreq_frequency_table *table,
+ unsigned int target_freq,
+ unsigned int relation,
+ unsigned int *index);
+
+is the corresponding frequency table helper for the ->target
+stage. Just pass the values to this function, and the unsigned int
+index returns the number of the frequency table entry which contains
+the frequency the CPU shall be set to.
+
+The following macros can be used as iterators over cpufreq_frequency_table:
+
+cpufreq_for_each_entry(pos, table) - iterates over all entries of frequency
+table.
+
+cpufreq-for_each_valid_entry(pos, table) - iterates over all entries,
+excluding CPUFREQ_ENTRY_INVALID frequencies.
+Use arguments "pos" - a cpufreq_frequency_table * as a loop cursor and
+"table" - the cpufreq_frequency_table * you want to iterate over.
+
+For example:
+
+ struct cpufreq_frequency_table *pos, *driver_freq_table;
+
+ cpufreq_for_each_entry(pos, driver_freq_table) {
+ /* Do something with pos */
+ pos->frequency = ...
+ }
diff --git a/Documentation/cpu-freq/cpufreq-nforce2.txt b/Documentation/cpu-freq/cpufreq-nforce2.txt
new file mode 100644
index 000000000..babce1315
--- /dev/null
+++ b/Documentation/cpu-freq/cpufreq-nforce2.txt
@@ -0,0 +1,19 @@
+
+The cpufreq-nforce2 driver changes the FSB on nVidia nForce2 platforms.
+
+This works better than on other platforms, because the FSB of the CPU
+can be controlled independently from the PCI/AGP clock.
+
+The module has two options:
+
+ fid: multiplier * 10 (for example 8.5 = 85)
+ min_fsb: minimum FSB
+
+If not set, fid is calculated from the current CPU speed and the FSB.
+min_fsb defaults to FSB at boot time - 50 MHz.
+
+IMPORTANT: The available range is limited downwards!
+ Also the minimum available FSB can differ, for systems
+ booting with 200 MHz, 150 should always work.
+
+
diff --git a/Documentation/cpu-freq/cpufreq-stats.txt b/Documentation/cpu-freq/cpufreq-stats.txt
new file mode 100644
index 000000000..fc647492e
--- /dev/null
+++ b/Documentation/cpu-freq/cpufreq-stats.txt
@@ -0,0 +1,128 @@
+
+ CPU frequency and voltage scaling statistics in the Linux(TM) kernel
+
+
+ L i n u x c p u f r e q - s t a t s d r i v e r
+
+ - information for users -
+
+
+ Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+
+Contents
+1. Introduction
+2. Statistics Provided (with example)
+3. Configuring cpufreq-stats
+
+
+1. Introduction
+
+cpufreq-stats is a driver that provides CPU frequency statistics for each CPU.
+These statistics are provided in /sysfs as a bunch of read_only interfaces. This
+interface (when configured) will appear in a separate directory under cpufreq
+in /sysfs (<sysfs root>/devices/system/cpu/cpuX/cpufreq/stats/) for each CPU.
+Various statistics will form read_only files under this directory.
+
+This driver is designed to be independent of any particular cpufreq_driver
+that may be running on your CPU. So, it will work with any cpufreq_driver.
+
+
+2. Statistics Provided (with example)
+
+cpufreq stats provides following statistics (explained in detail below).
+- time_in_state
+- total_trans
+- trans_table
+
+All the statistics will be from the time the stats driver has been inserted
+to the time when a read of a particular statistic is done. Obviously, stats
+driver will not have any information about the frequency transitions before
+the stats driver insertion.
+
+--------------------------------------------------------------------------------
+<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # ls -l
+total 0
+drwxr-xr-x 2 root root 0 May 14 16:06 .
+drwxr-xr-x 3 root root 0 May 14 15:58 ..
+-r--r--r-- 1 root root 4096 May 14 16:06 time_in_state
+-r--r--r-- 1 root root 4096 May 14 16:06 total_trans
+-r--r--r-- 1 root root 4096 May 14 16:06 trans_table
+--------------------------------------------------------------------------------
+
+- time_in_state
+This gives the amount of time spent in each of the frequencies supported by
+this CPU. The cat output will have "<frequency> <time>" pair in each line, which
+will mean this CPU spent <time> usertime units of time at <frequency>. Output
+will have one line for each of the supported frequencies. usertime units here
+is 10mS (similar to other time exported in /proc).
+
+--------------------------------------------------------------------------------
+<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat time_in_state
+3600000 2089
+3400000 136
+3200000 34
+3000000 67
+2800000 172488
+--------------------------------------------------------------------------------
+
+
+- total_trans
+This gives the total number of frequency transitions on this CPU. The cat
+output will have a single count which is the total number of frequency
+transitions.
+
+--------------------------------------------------------------------------------
+<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat total_trans
+20
+--------------------------------------------------------------------------------
+
+- trans_table
+This will give a fine grained information about all the CPU frequency
+transitions. The cat output here is a two dimensional matrix, where an entry
+<i,j> (row i, column j) represents the count of number of transitions from
+Freq_i to Freq_j. Freq_i is in descending order with increasing rows and
+Freq_j is in descending order with increasing columns. The output here also
+contains the actual freq values for each row and column for better readability.
+
+--------------------------------------------------------------------------------
+<mysystem>:/sys/devices/system/cpu/cpu0/cpufreq/stats # cat trans_table
+ From : To
+ : 3600000 3400000 3200000 3000000 2800000
+ 3600000: 0 5 0 0 0
+ 3400000: 4 0 2 0 0
+ 3200000: 0 1 0 2 0
+ 3000000: 0 0 1 0 3
+ 2800000: 0 0 0 2 0
+--------------------------------------------------------------------------------
+
+
+3. Configuring cpufreq-stats
+
+To configure cpufreq-stats in your kernel
+Config Main Menu
+ Power management options (ACPI, APM) --->
+ CPU Frequency scaling --->
+ [*] CPU Frequency scaling
+ <*> CPU frequency translation statistics
+ [*] CPU frequency translation statistics details
+
+
+"CPU Frequency scaling" (CONFIG_CPU_FREQ) should be enabled to configure
+cpufreq-stats.
+
+"CPU frequency translation statistics" (CONFIG_CPU_FREQ_STAT) provides the
+basic statistics which includes time_in_state and total_trans.
+
+"CPU frequency translation statistics details" (CONFIG_CPU_FREQ_STAT_DETAILS)
+provides fine grained cpufreq stats by trans_table. The reason for having a
+separate config option for trans_table is:
+- trans_table goes against the traditional /sysfs rule of one value per
+ interface. It provides a whole bunch of value in a 2 dimensional matrix
+ form.
+
+Once these two options are enabled and your CPU supports cpufrequency, you
+will be able to see the CPU frequency statistics in /sysfs.
+
+
+
+
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
new file mode 100644
index 000000000..77ec21574
--- /dev/null
+++ b/Documentation/cpu-freq/governors.txt
@@ -0,0 +1,269 @@
+ CPU frequency and voltage scaling code in the Linux(TM) kernel
+
+
+ L i n u x C P U F r e q
+
+ C P U F r e q G o v e r n o r s
+
+ - information for users and developers -
+
+
+ Dominik Brodowski <linux@brodo.de>
+ some additions and corrections by Nico Golde <nico@ngolde.de>
+
+
+
+ Clock scaling allows you to change the clock speed of the CPUs on the
+ fly. This is a nice method to save battery power, because the lower
+ the clock speed, the less power the CPU consumes.
+
+
+Contents:
+---------
+1. What is a CPUFreq Governor?
+
+2. Governors In the Linux Kernel
+2.1 Performance
+2.2 Powersave
+2.3 Userspace
+2.4 Ondemand
+2.5 Conservative
+
+3. The Governor Interface in the CPUfreq Core
+
+
+
+1. What Is A CPUFreq Governor?
+==============================
+
+Most cpufreq drivers (in fact, all except one, longrun) or even most
+cpu frequency scaling algorithms only offer the CPU to be set to one
+frequency. In order to offer dynamic frequency scaling, the cpufreq
+core must be able to tell these drivers of a "target frequency". So
+these specific drivers will be transformed to offer a "->target/target_index"
+call instead of the existing "->setpolicy" call. For "longrun", all
+stays the same, though.
+
+How to decide what frequency within the CPUfreq policy should be used?
+That's done using "cpufreq governors". Two are already in this patch
+-- they're the already existing "powersave" and "performance" which
+set the frequency statically to the lowest or highest frequency,
+respectively. At least two more such governors will be ready for
+addition in the near future, but likely many more as there are various
+different theories and models about dynamic frequency scaling
+around. Using such a generic interface as cpufreq offers to scaling
+governors, these can be tested extensively, and the best one can be
+selected for each specific use.
+
+Basically, it's the following flow graph:
+
+CPU can be set to switch independently | CPU can only be set
+ within specific "limits" | to specific frequencies
+
+ "CPUfreq policy"
+ consists of frequency limits (policy->{min,max})
+ and CPUfreq governor to be used
+ / \
+ / \
+ / the cpufreq governor decides
+ / (dynamically or statically)
+ / what target_freq to set within
+ / the limits of policy->{min,max}
+ / \
+ / \
+ Using the ->setpolicy call, Using the ->target/target_index call,
+ the limits and the the frequency closest
+ "policy" is set. to target_freq is set.
+ It is assured that it
+ is within policy->{min,max}
+
+
+2. Governors In the Linux Kernel
+================================
+
+2.1 Performance
+---------------
+
+The CPUfreq governor "performance" sets the CPU statically to the
+highest frequency within the borders of scaling_min_freq and
+scaling_max_freq.
+
+
+2.2 Powersave
+-------------
+
+The CPUfreq governor "powersave" sets the CPU statically to the
+lowest frequency within the borders of scaling_min_freq and
+scaling_max_freq.
+
+
+2.3 Userspace
+-------------
+
+The CPUfreq governor "userspace" allows the user, or any userspace
+program running with UID "root", to set the CPU to a specific frequency
+by making a sysfs file "scaling_setspeed" available in the CPU-device
+directory.
+
+
+2.4 Ondemand
+------------
+
+The CPUfreq governor "ondemand" sets the CPU depending on the
+current usage. To do this the CPU must have the capability to
+switch the frequency very quickly. There are a number of sysfs file
+accessible parameters:
+
+sampling_rate: measured in uS (10^-6 seconds), this is how often you
+want the kernel to look at the CPU usage and to make decisions on
+what to do about the frequency. Typically this is set to values of
+around '10000' or more. It's default value is (cmp. with users-guide.txt):
+transition_latency * 1000
+Be aware that transition latency is in ns and sampling_rate is in us, so you
+get the same sysfs value by default.
+Sampling rate should always get adjusted considering the transition latency
+To set the sampling rate 750 times as high as the transition latency
+in the bash (as said, 1000 is default), do:
+echo `$(($(cat cpuinfo_transition_latency) * 750 / 1000)) \
+ >ondemand/sampling_rate
+
+sampling_rate_min:
+The sampling rate is limited by the HW transition latency:
+transition_latency * 100
+Or by kernel restrictions:
+If CONFIG_NO_HZ_COMMON is set, the limit is 10ms fixed.
+If CONFIG_NO_HZ_COMMON is not set or nohz=off boot parameter is used, the
+limits depend on the CONFIG_HZ option:
+HZ=1000: min=20000us (20ms)
+HZ=250: min=80000us (80ms)
+HZ=100: min=200000us (200ms)
+The highest value of kernel and HW latency restrictions is shown and
+used as the minimum sampling rate.
+
+up_threshold: defines what the average CPU usage between the samplings
+of 'sampling_rate' needs to be for the kernel to make a decision on
+whether it should increase the frequency. For example when it is set
+to its default value of '95' it means that between the checking
+intervals the CPU needs to be on average more than 95% in use to then
+decide that the CPU frequency needs to be increased.
+
+ignore_nice_load: this parameter takes a value of '0' or '1'. When
+set to '0' (its default), all processes are counted towards the
+'cpu utilisation' value. When set to '1', the processes that are
+run with a 'nice' value will not count (and thus be ignored) in the
+overall usage calculation. This is useful if you are running a CPU
+intensive calculation on your laptop that you do not care how long it
+takes to complete as you can 'nice' it and prevent it from taking part
+in the deciding process of whether to increase your CPU frequency.
+
+sampling_down_factor: this parameter controls the rate at which the
+kernel makes a decision on when to decrease the frequency while running
+at top speed. When set to 1 (the default) decisions to reevaluate load
+are made at the same interval regardless of current clock speed. But
+when set to greater than 1 (e.g. 100) it acts as a multiplier for the
+scheduling interval for reevaluating load when the CPU is at its top
+speed due to high load. This improves performance by reducing the overhead
+of load evaluation and helping the CPU stay at its top speed when truly
+busy, rather than shifting back and forth in speed. This tunable has no
+effect on behavior at lower speeds/lower CPU loads.
+
+powersave_bias: this parameter takes a value between 0 to 1000. It
+defines the percentage (times 10) value of the target frequency that
+will be shaved off of the target. For example, when set to 100 -- 10%,
+when ondemand governor would have targeted 1000 MHz, it will target
+1000 MHz - (10% of 1000 MHz) = 900 MHz instead. This is set to 0
+(disabled) by default.
+When AMD frequency sensitivity powersave bias driver --
+drivers/cpufreq/amd_freq_sensitivity.c is loaded, this parameter
+defines the workload frequency sensitivity threshold in which a lower
+frequency is chosen instead of ondemand governor's original target.
+The frequency sensitivity is a hardware reported (on AMD Family 16h
+Processors and above) value between 0 to 100% that tells software how
+the performance of the workload running on a CPU will change when
+frequency changes. A workload with sensitivity of 0% (memory/IO-bound)
+will not perform any better on higher core frequency, whereas a
+workload with sensitivity of 100% (CPU-bound) will perform better
+higher the frequency. When the driver is loaded, this is set to 400
+by default -- for CPUs running workloads with sensitivity value below
+40%, a lower frequency is chosen. Unloading the driver or writing 0
+will disable this feature.
+
+
+2.5 Conservative
+----------------
+
+The CPUfreq governor "conservative", much like the "ondemand"
+governor, sets the CPU depending on the current usage. It differs in
+behaviour in that it gracefully increases and decreases the CPU speed
+rather than jumping to max speed the moment there is any load on the
+CPU. This behaviour more suitable in a battery powered environment.
+The governor is tweaked in the same manner as the "ondemand" governor
+through sysfs with the addition of:
+
+freq_step: this describes what percentage steps the cpu freq should be
+increased and decreased smoothly by. By default the cpu frequency will
+increase in 5% chunks of your maximum cpu frequency. You can change this
+value to anywhere between 0 and 100 where '0' will effectively lock your
+CPU at a speed regardless of its load whilst '100' will, in theory, make
+it behave identically to the "ondemand" governor.
+
+down_threshold: same as the 'up_threshold' found for the "ondemand"
+governor but for the opposite direction. For example when set to its
+default value of '20' it means that if the CPU usage needs to be below
+20% between samples to have the frequency decreased.
+
+sampling_down_factor: similar functionality as in "ondemand" governor.
+But in "conservative", it controls the rate at which the kernel makes
+a decision on when to decrease the frequency while running in any
+speed. Load for frequency increase is still evaluated every
+sampling rate.
+
+3. The Governor Interface in the CPUfreq Core
+=============================================
+
+A new governor must register itself with the CPUfreq core using
+"cpufreq_register_governor". The struct cpufreq_governor, which has to
+be passed to that function, must contain the following values:
+
+governor->name - A unique name for this governor
+governor->governor - The governor callback function
+governor->owner - .THIS_MODULE for the governor module (if
+ appropriate)
+
+The governor->governor callback is called with the current (or to-be-set)
+cpufreq_policy struct for that CPU, and an unsigned int event. The
+following events are currently defined:
+
+CPUFREQ_GOV_START: This governor shall start its duty for the CPU
+ policy->cpu
+CPUFREQ_GOV_STOP: This governor shall end its duty for the CPU
+ policy->cpu
+CPUFREQ_GOV_LIMITS: The limits for CPU policy->cpu have changed to
+ policy->min and policy->max.
+
+If you need other "events" externally of your driver, _only_ use the
+cpufreq_governor_l(unsigned int cpu, unsigned int event) call to the
+CPUfreq core to ensure proper locking.
+
+
+The CPUfreq governor may call the CPU processor driver using one of
+these two functions:
+
+int cpufreq_driver_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation);
+
+int __cpufreq_driver_target(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int relation);
+
+target_freq must be within policy->min and policy->max, of course.
+What's the difference between these two functions? When your governor
+still is in a direct code path of a call to governor->governor, the
+per-CPU cpufreq lock is still held in the cpufreq core, and there's
+no need to lock it again (in fact, this would cause a deadlock). So
+use __cpufreq_driver_target only in these cases. In all other cases
+(for example, when there's a "daemonized" function that wakes up
+every second), use cpufreq_driver_target to lock the cpufreq per-CPU
+lock before the command is passed to the cpufreq processor driver.
+
diff --git a/Documentation/cpu-freq/index.txt b/Documentation/cpu-freq/index.txt
new file mode 100644
index 000000000..dc024ab40
--- /dev/null
+++ b/Documentation/cpu-freq/index.txt
@@ -0,0 +1,54 @@
+ CPU frequency and voltage scaling code in the Linux(TM) kernel
+
+
+ L i n u x C P U F r e q
+
+
+
+
+ Dominik Brodowski <linux@brodo.de>
+
+
+
+ Clock scaling allows you to change the clock speed of the CPUs on the
+ fly. This is a nice method to save battery power, because the lower
+ the clock speed, the less power the CPU consumes.
+
+
+
+Documents in this directory:
+----------------------------
+core.txt - General description of the CPUFreq core and
+ of CPUFreq notifiers
+
+cpu-drivers.txt - How to implement a new cpufreq processor driver
+
+governors.txt - What are cpufreq governors and how to
+ implement them?
+
+index.txt - File index, Mailing list and Links (this document)
+
+user-guide.txt - User Guide to CPUFreq
+
+
+Mailing List
+------------
+There is a CPU frequency changing CVS commit and general list where
+you can report bugs, problems or submit patches. To post a message,
+send an email to linux-pm@vger.kernel.org, to subscribe go to
+http://vger.kernel.org/vger-lists.html#linux-pm and follow the
+instructions there.
+
+Links
+-----
+the FTP archives:
+* ftp://ftp.linux.org.uk/pub/linux/cpufreq/
+
+how to access the CVS repository:
+* http://cvs.arm.linux.org.uk/
+
+the CPUFreq Mailing list:
+* http://vger.kernel.org/vger-lists.html#cpufreq
+
+Clock and voltage scaling for the SA-1100:
+* http://www.lartmaker.nl/projects/scaling
diff --git a/Documentation/cpu-freq/intel-pstate.txt b/Documentation/cpu-freq/intel-pstate.txt
new file mode 100644
index 000000000..655750743
--- /dev/null
+++ b/Documentation/cpu-freq/intel-pstate.txt
@@ -0,0 +1,64 @@
+Intel P-state driver
+--------------------
+
+This driver provides an interface to control the P state selection for
+SandyBridge+ Intel processors. The driver can operate two different
+modes based on the processor model legacy and Hardware P state (HWP)
+mode.
+
+In legacy mode the driver implements a scaling driver with an internal
+governor for Intel Core processors. The driver follows the same model
+as the Transmeta scaling driver (longrun.c) and implements the
+setpolicy() instead of target(). Scaling drivers that implement
+setpolicy() are assumed to implement internal governors by the cpufreq
+core. All the logic for selecting the current P state is contained
+within the driver; no external governor is used by the cpufreq core.
+
+In HWP mode P state selection is implemented in the processor
+itself. The driver provides the interfaces between the cpufreq core and
+the processor to control P state selection based on user preferences
+and reporting frequency to the cpufreq core. In this mode the
+internal governor code is disabled.
+
+In addtion to the interfaces provided by the cpufreq core for
+controlling frequency the driver provides sysfs files for
+controlling P state selection. These files have been added to
+/sys/devices/system/cpu/intel_pstate/
+
+ max_perf_pct: limits the maximum P state that will be requested by
+ the driver stated as a percentage of the available performance. The
+ available (P states) performance may be reduced by the no_turbo
+ setting described below.
+
+ min_perf_pct: limits the minimum P state that will be requested by
+ the driver stated as a percentage of the max (non-turbo)
+ performance level.
+
+ no_turbo: limits the driver to selecting P states below the turbo
+ frequency range.
+
+ turbo_pct: displays the percentage of the total performance that
+ is supported by hardware that is in the turbo range. This number
+ is independent of whether turbo has been disabled or not.
+
+ num_pstates: displays the number of pstates that are supported
+ by hardware. This number is independent of whether turbo has
+ been disabled or not.
+
+For contemporary Intel processors, the frequency is controlled by the
+processor itself and the P-states exposed to software are related to
+performance levels. The idea that frequency can be set to a single
+frequency is fiction for Intel Core processors. Even if the scaling
+driver selects a single P state the actual frequency the processor
+will run at is selected by the processor itself.
+
+For legacy mode debugfs files have also been added to allow tuning of
+the internal governor algorythm. These files are located at
+/sys/kernel/debug/pstate_snb/ These files are NOT present in HWP mode.
+
+ deadband
+ d_gain_pct
+ i_gain_pct
+ p_gain_pct
+ sample_rate_ms
+ setpoint
diff --git a/Documentation/cpu-freq/pcc-cpufreq.txt b/Documentation/cpu-freq/pcc-cpufreq.txt
new file mode 100644
index 000000000..9e3c3b335
--- /dev/null
+++ b/Documentation/cpu-freq/pcc-cpufreq.txt
@@ -0,0 +1,207 @@
+/*
+ * pcc-cpufreq.txt - PCC interface documentation
+ *
+ * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com>
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON
+ * INFRINGEMENT. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+
+ Processor Clocking Control Driver
+ ---------------------------------
+
+Contents:
+---------
+1. Introduction
+1.1 PCC interface
+1.1.1 Get Average Frequency
+1.1.2 Set Desired Frequency
+1.2 Platforms affected
+2. Driver and /sys details
+2.1 scaling_available_frequencies
+2.2 cpuinfo_transition_latency
+2.3 cpuinfo_cur_freq
+2.4 related_cpus
+3. Caveats
+
+1. Introduction:
+----------------
+Processor Clocking Control (PCC) is an interface between the platform
+firmware and OSPM. It is a mechanism for coordinating processor
+performance (ie: frequency) between the platform firmware and the OS.
+
+The PCC driver (pcc-cpufreq) allows OSPM to take advantage of the PCC
+interface.
+
+OS utilizes the PCC interface to inform platform firmware what frequency the
+OS wants for a logical processor. The platform firmware attempts to achieve
+the requested frequency. If the request for the target frequency could not be
+satisfied by platform firmware, then it usually means that power budget
+conditions are in place, and "power capping" is taking place.
+
+1.1 PCC interface:
+------------------
+The complete PCC specification is available here:
+http://www.acpica.org/download/Processor-Clocking-Control-v1p0.pdf
+
+PCC relies on a shared memory region that provides a channel for communication
+between the OS and platform firmware. PCC also implements a "doorbell" that
+is used by the OS to inform the platform firmware that a command has been
+sent.
+
+The ACPI PCCH() method is used to discover the location of the PCC shared
+memory region. The shared memory region header contains the "command" and
+"status" interface. PCCH() also contains details on how to access the platform
+doorbell.
+
+The following commands are supported by the PCC interface:
+* Get Average Frequency
+* Set Desired Frequency
+
+The ACPI PCCP() method is implemented for each logical processor and is
+used to discover the offsets for the input and output buffers in the shared
+memory region.
+
+When PCC mode is enabled, the platform will not expose processor performance
+or throttle states (_PSS, _TSS and related ACPI objects) to OSPM. Therefore,
+the native P-state driver (such as acpi-cpufreq for Intel, powernow-k8 for
+AMD) will not load.
+
+However, OSPM remains in control of policy. The governor (eg: "ondemand")
+computes the required performance for each processor based on server workload.
+The PCC driver fills in the command interface, and the input buffer and
+communicates the request to the platform firmware. The platform firmware is
+responsible for delivering the requested performance.
+
+Each PCC command is "global" in scope and can affect all the logical CPUs in
+the system. Therefore, PCC is capable of performing "group" updates. With PCC
+the OS is capable of getting/setting the frequency of all the logical CPUs in
+the system with a single call to the BIOS.
+
+1.1.1 Get Average Frequency:
+----------------------------
+This command is used by the OSPM to query the running frequency of the
+processor since the last time this command was completed. The output buffer
+indicates the average unhalted frequency of the logical processor expressed as
+a percentage of the nominal (ie: maximum) CPU frequency. The output buffer
+also signifies if the CPU frequency is limited by a power budget condition.
+
+1.1.2 Set Desired Frequency:
+----------------------------
+This command is used by the OSPM to communicate to the platform firmware the
+desired frequency for a logical processor. The output buffer is currently
+ignored by OSPM. The next invocation of "Get Average Frequency" will inform
+OSPM if the desired frequency was achieved or not.
+
+1.2 Platforms affected:
+-----------------------
+The PCC driver will load on any system where the platform firmware:
+* supports the PCC interface, and the associated PCCH() and PCCP() methods
+* assumes responsibility for managing the hardware clocking controls in order
+to deliver the requested processor performance
+
+Currently, certain HP ProLiant platforms implement the PCC interface. On those
+platforms PCC is the "default" choice.
+
+However, it is possible to disable this interface via a BIOS setting. In
+such an instance, as is also the case on platforms where the PCC interface
+is not implemented, the PCC driver will fail to load silently.
+
+2. Driver and /sys details:
+---------------------------
+When the driver loads, it merely prints the lowest and the highest CPU
+frequencies supported by the platform firmware.
+
+The PCC driver loads with a message such as:
+pcc-cpufreq: (v1.00.00) driver loaded with frequency limits: 1600 MHz, 2933
+MHz
+
+This means that the OPSM can request the CPU to run at any frequency in
+between the limits (1600 MHz, and 2933 MHz) specified in the message.
+
+Internally, there is no need for the driver to convert the "target" frequency
+to a corresponding P-state.
+
+The VERSION number for the driver will be of the format v.xy.ab.
+eg: 1.00.02
+ ----- --
+ | |
+ | -- this will increase with bug fixes/enhancements to the driver
+ |-- this is the version of the PCC specification the driver adheres to
+
+
+The following is a brief discussion on some of the fields exported via the
+/sys filesystem and how their values are affected by the PCC driver:
+
+2.1 scaling_available_frequencies:
+----------------------------------
+scaling_available_frequencies is not created in /sys. No intermediate
+frequencies need to be listed because the BIOS will try to achieve any
+frequency, within limits, requested by the governor. A frequency does not have
+to be strictly associated with a P-state.
+
+2.2 cpuinfo_transition_latency:
+-------------------------------
+The cpuinfo_transition_latency field is 0. The PCC specification does
+not include a field to expose this value currently.
+
+2.3 cpuinfo_cur_freq:
+---------------------
+A) Often cpuinfo_cur_freq will show a value different than what is declared
+in the scaling_available_frequencies or scaling_cur_freq, or scaling_max_freq.
+This is due to "turbo boost" available on recent Intel processors. If certain
+conditions are met the BIOS can achieve a slightly higher speed than requested
+by OSPM. An example:
+
+scaling_cur_freq : 2933000
+cpuinfo_cur_freq : 3196000
+
+B) There is a round-off error associated with the cpuinfo_cur_freq value.
+Since the driver obtains the current frequency as a "percentage" (%) of the
+nominal frequency from the BIOS, sometimes, the values displayed by
+scaling_cur_freq and cpuinfo_cur_freq may not match. An example:
+
+scaling_cur_freq : 1600000
+cpuinfo_cur_freq : 1583000
+
+In this example, the nominal frequency is 2933 MHz. The driver obtains the
+current frequency, cpuinfo_cur_freq, as 54% of the nominal frequency:
+
+ 54% of 2933 MHz = 1583 MHz
+
+Nominal frequency is the maximum frequency of the processor, and it usually
+corresponds to the frequency of the P0 P-state.
+
+2.4 related_cpus:
+-----------------
+The related_cpus field is identical to affected_cpus.
+
+affected_cpus : 4
+related_cpus : 4
+
+Currently, the PCC driver does not evaluate _PSD. The platforms that support
+PCC do not implement SW_ALL. So OSPM doesn't need to perform any coordination
+to ensure that the same frequency is requested of all dependent CPUs.
+
+3. Caveats:
+-----------
+The "cpufreq_stats" module in its present form cannot be loaded and
+expected to work with the PCC driver. Since the "cpufreq_stats" module
+provides information wrt each P-state, it is not applicable to the PCC driver.
diff --git a/Documentation/cpu-freq/user-guide.txt b/Documentation/cpu-freq/user-guide.txt
new file mode 100644
index 000000000..ff2f28332
--- /dev/null
+++ b/Documentation/cpu-freq/user-guide.txt
@@ -0,0 +1,224 @@
+ CPU frequency and voltage scaling code in the Linux(TM) kernel
+
+
+ L i n u x C P U F r e q
+
+ U S E R G U I D E
+
+
+ Dominik Brodowski <linux@brodo.de>
+
+
+
+ Clock scaling allows you to change the clock speed of the CPUs on the
+ fly. This is a nice method to save battery power, because the lower
+ the clock speed, the less power the CPU consumes.
+
+
+Contents:
+---------
+1. Supported Architectures and Processors
+1.1 ARM
+1.2 x86
+1.3 sparc64
+1.4 ppc
+1.5 SuperH
+1.6 Blackfin
+
+2. "Policy" / "Governor"?
+2.1 Policy
+2.2 Governor
+
+3. How to change the CPU cpufreq policy and/or speed
+3.1 Preferred interface: sysfs
+
+
+
+1. Supported Architectures and Processors
+=========================================
+
+1.1 ARM
+-------
+
+The following ARM processors are supported by cpufreq:
+
+ARM Integrator
+ARM-SA1100
+ARM-SA1110
+Intel PXA
+
+
+1.2 x86
+-------
+
+The following processors for the x86 architecture are supported by cpufreq:
+
+AMD Elan - SC400, SC410
+AMD mobile K6-2+
+AMD mobile K6-3+
+AMD mobile Duron
+AMD mobile Athlon
+AMD Opteron
+AMD Athlon 64
+Cyrix Media GXm
+Intel mobile PIII and Intel mobile PIII-M on certain chipsets
+Intel Pentium 4, Intel Xeon
+Intel Pentium M (Centrino)
+National Semiconductors Geode GX
+Transmeta Crusoe
+Transmeta Efficeon
+VIA Cyrix 3 / C3
+various processors on some ACPI 2.0-compatible systems [*]
+
+[*] Only if "ACPI Processor Performance States" are available
+to the ACPI<->BIOS interface.
+
+
+1.3 sparc64
+-----------
+
+The following processors for the sparc64 architecture are supported by
+cpufreq:
+
+UltraSPARC-III
+
+
+1.4 ppc
+-------
+
+Several "PowerBook" and "iBook2" notebooks are supported.
+
+
+1.5 SuperH
+----------
+
+All SuperH processors supporting rate rounding through the clock
+framework are supported by cpufreq.
+
+1.6 Blackfin
+------------
+
+The following Blackfin processors are supported by cpufreq:
+
+BF522, BF523, BF524, BF525, BF526, BF527, Rev 0.1 or higher
+BF531, BF532, BF533, Rev 0.3 or higher
+BF534, BF536, BF537, Rev 0.2 or higher
+BF561, Rev 0.3 or higher
+BF542, BF544, BF547, BF548, BF549, Rev 0.1 or higher
+
+
+2. "Policy" / "Governor" ?
+==========================
+
+Some CPU frequency scaling-capable processor switch between various
+frequencies and operating voltages "on the fly" without any kernel or
+user involvement. This guarantees very fast switching to a frequency
+which is high enough to serve the user's needs, but low enough to save
+power.
+
+
+2.1 Policy
+----------
+
+On these systems, all you can do is select the lower and upper
+frequency limit as well as whether you want more aggressive
+power-saving or more instantly available processing power.
+
+
+2.2 Governor
+------------
+
+On all other cpufreq implementations, these boundaries still need to
+be set. Then, a "governor" must be selected. Such a "governor" decides
+what speed the processor shall run within the boundaries. One such
+"governor" is the "userspace" governor. This one allows the user - or
+a yet-to-implement userspace program - to decide what specific speed
+the processor shall run at.
+
+
+3. How to change the CPU cpufreq policy and/or speed
+====================================================
+
+3.1 Preferred Interface: sysfs
+------------------------------
+
+The preferred interface is located in the sysfs filesystem. If you
+mounted it at /sys, the cpufreq interface is located in a subdirectory
+"cpufreq" within the cpu-device directory
+(e.g. /sys/devices/system/cpu/cpu0/cpufreq/ for the first CPU).
+
+cpuinfo_min_freq : this file shows the minimum operating
+ frequency the processor can run at(in kHz)
+cpuinfo_max_freq : this file shows the maximum operating
+ frequency the processor can run at(in kHz)
+cpuinfo_transition_latency The time it takes on this CPU to
+ switch between two frequencies in nano
+ seconds. If unknown or known to be
+ that high that the driver does not
+ work with the ondemand governor, -1
+ (CPUFREQ_ETERNAL) will be returned.
+ Using this information can be useful
+ to choose an appropriate polling
+ frequency for a kernel governor or
+ userspace daemon. Make sure to not
+ switch the frequency too often
+ resulting in performance loss.
+scaling_driver : this file shows what cpufreq driver is
+ used to set the frequency on this CPU
+
+scaling_available_governors : this file shows the CPUfreq governors
+ available in this kernel. You can see the
+ currently activated governor in
+
+scaling_governor, and by "echoing" the name of another
+ governor you can change it. Please note
+ that some governors won't load - they only
+ work on some specific architectures or
+ processors.
+
+cpuinfo_cur_freq : Current frequency of the CPU as obtained from
+ the hardware, in KHz. This is the frequency
+ the CPU actually runs at.
+
+scaling_available_frequencies : List of available frequencies, in KHz.
+
+scaling_min_freq and
+scaling_max_freq show the current "policy limits" (in
+ kHz). By echoing new values into these
+ files, you can change these limits.
+ NOTE: when setting a policy you need to
+ first set scaling_max_freq, then
+ scaling_min_freq.
+
+affected_cpus : List of Online CPUs that require software
+ coordination of frequency.
+
+related_cpus : List of Online + Offline CPUs that need software
+ coordination of frequency.
+
+scaling_driver : Hardware driver for cpufreq.
+
+scaling_cur_freq : Current frequency of the CPU as determined by
+ the governor and cpufreq core, in KHz. This is
+ the frequency the kernel thinks the CPU runs
+ at.
+
+bios_limit : If the BIOS tells the OS to limit a CPU to
+ lower frequencies, the user can read out the
+ maximum available frequency from this file.
+ This typically can happen through (often not
+ intended) BIOS settings, restrictions
+ triggered through a service processor or other
+ BIOS/HW based implementations.
+ This does not cover thermal ACPI limitations
+ which can be detected through the generic
+ thermal driver.
+
+If you have selected the "userspace" governor which allows you to
+set the CPU operating frequency to a specific value, you can read out
+the current frequency in
+
+scaling_setspeed. By "echoing" a new frequency into this
+ you can change the speed of the CPU,
+ but only within the limits of
+ scaling_min_freq and scaling_max_freq.
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
new file mode 100644
index 000000000..f9ad5e048
--- /dev/null
+++ b/Documentation/cpu-hotplug.txt
@@ -0,0 +1,452 @@
+ CPU hotplug Support in Linux(tm) Kernel
+
+ Maintainers:
+ CPU Hotplug Core:
+ Rusty Russell <rusty@rustcorp.com.au>
+ Srivatsa Vaddagiri <vatsa@in.ibm.com>
+ i386:
+ Zwane Mwaikambo <zwanem@gmail.com>
+ ppc64:
+ Nathan Lynch <nathanl@austin.ibm.com>
+ Joel Schopp <jschopp@austin.ibm.com>
+ ia64/x86_64:
+ Ashok Raj <ashok.raj@intel.com>
+ s390:
+ Heiko Carstens <heiko.carstens@de.ibm.com>
+
+Authors: Ashok Raj <ashok.raj@intel.com>
+Lots of feedback: Nathan Lynch <nathanl@austin.ibm.com>,
+ Joel Schopp <jschopp@austin.ibm.com>
+
+Introduction
+
+Modern advances in system architectures have introduced advanced error
+reporting and correction capabilities in processors. CPU architectures permit
+partitioning support, where compute resources of a single CPU could be made
+available to virtual machine environments. There are couple OEMS that
+support NUMA hardware which are hot pluggable as well, where physical
+node insertion and removal require support for CPU hotplug.
+
+Such advances require CPUs available to a kernel to be removed either for
+provisioning reasons, or for RAS purposes to keep an offending CPU off
+system execution path. Hence the need for CPU hotplug support in the
+Linux kernel.
+
+A more novel use of CPU-hotplug support is its use today in suspend
+resume support for SMP. Dual-core and HT support makes even
+a laptop run SMP kernels which didn't support these methods. SMP support
+for suspend/resume is a work in progress.
+
+General Stuff about CPU Hotplug
+--------------------------------
+
+Command Line Switches
+---------------------
+maxcpus=n Restrict boot time cpus to n. Say if you have 4 cpus, using
+ maxcpus=2 will only boot 2. You can choose to bring the
+ other cpus later online, read FAQ's for more info.
+
+additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
+ cpu_possible_mask = cpu_present_mask + additional_cpus
+
+cede_offline={"off","on"} Use this option to disable/enable putting offlined
+ processors to an extended H_CEDE state on
+ supported pseries platforms.
+ If nothing is specified,
+ cede_offline is set to "on".
+
+(*) Option valid only for following architectures
+- ia64
+
+ia64 uses the number of disabled local apics in ACPI tables MADT to
+determine the number of potentially hot-pluggable cpus. The implementation
+should only rely on this to count the # of cpus, but *MUST* not rely
+on the apicid values in those tables for disabled apics. In the event
+BIOS doesn't mark such hot-pluggable cpus as disabled entries, one could
+use this parameter "additional_cpus=x" to represent those cpus in the
+cpu_possible_mask.
+
+possible_cpus=n [s390,x86_64] use this to set hotpluggable cpus.
+ This option sets possible_cpus bits in
+ cpu_possible_mask. Thus keeping the numbers of bits set
+ constant even if the machine gets rebooted.
+
+CPU maps and such
+-----------------
+[More on cpumaps and primitive to manipulate, please check
+include/linux/cpumask.h that has more descriptive text.]
+
+cpu_possible_mask: Bitmap of possible CPUs that can ever be available in the
+system. This is used to allocate some boot time memory for per_cpu variables
+that aren't designed to grow/shrink as CPUs are made available or removed.
+Once set during boot time discovery phase, the map is static, i.e no bits
+are added or removed anytime. Trimming it accurately for your system needs
+upfront can save some boot time memory. See below for how we use heuristics
+in x86_64 case to keep this under check.
+
+cpu_online_mask: Bitmap of all CPUs currently online. Its set in __cpu_up()
+after a cpu is available for kernel scheduling and ready to receive
+interrupts from devices. Its cleared when a cpu is brought down using
+__cpu_disable(), before which all OS services including interrupts are
+migrated to another target CPU.
+
+cpu_present_mask: Bitmap of CPUs currently present in the system. Not all
+of them may be online. When physical hotplug is processed by the relevant
+subsystem (e.g ACPI) can change and new bit either be added or removed
+from the map depending on the event is hot-add/hot-remove. There are currently
+no locking rules as of now. Typical usage is to init topology during boot,
+at which time hotplug is disabled.
+
+You really dont need to manipulate any of the system cpu maps. They should
+be read-only for most use. When setting up per-cpu resources almost always use
+cpu_possible_mask/for_each_possible_cpu() to iterate.
+
+Never use anything other than cpumask_t to represent bitmap of CPUs.
+
+ #include <linux/cpumask.h>
+
+ for_each_possible_cpu - Iterate over cpu_possible_mask
+ for_each_online_cpu - Iterate over cpu_online_mask
+ for_each_present_cpu - Iterate over cpu_present_mask
+ for_each_cpu(x,mask) - Iterate over some random collection of cpu mask.
+
+ #include <linux/cpu.h>
+ get_online_cpus() and put_online_cpus():
+
+The above calls are used to inhibit cpu hotplug operations. While the
+cpu_hotplug.refcount is non zero, the cpu_online_mask will not change.
+If you merely need to avoid cpus going away, you could also use
+preempt_disable() and preempt_enable() for those sections.
+Just remember the critical section cannot call any
+function that can sleep or schedule this process away. The preempt_disable()
+will work as long as stop_machine_run() is used to take a cpu down.
+
+CPU Hotplug - Frequently Asked Questions.
+
+Q: How to enable my kernel to support CPU hotplug?
+A: When doing make defconfig, Enable CPU hotplug support
+
+ "Processor type and Features" -> Support for Hotpluggable CPUs
+
+Make sure that you have CONFIG_SMP turned on as well.
+
+You would need to enable CONFIG_HOTPLUG_CPU for SMP suspend/resume support
+as well.
+
+Q: What architectures support CPU hotplug?
+A: As of 2.6.14, the following architectures support CPU hotplug.
+
+i386 (Intel), ppc, ppc64, parisc, s390, ia64 and x86_64
+
+Q: How to test if hotplug is supported on the newly built kernel?
+A: You should now notice an entry in sysfs.
+
+Check if sysfs is mounted, using the "mount" command. You should notice
+an entry as shown below in the output.
+
+ ....
+ none on /sys type sysfs (rw)
+ ....
+
+If this is not mounted, do the following.
+
+ #mkdir /sysfs
+ #mount -t sysfs sys /sys
+
+Now you should see entries for all present cpu, the following is an example
+in a 8-way system.
+
+ #pwd
+ #/sys/devices/system/cpu
+ #ls -l
+ total 0
+ drwxr-xr-x 10 root root 0 Sep 19 07:44 .
+ drwxr-xr-x 13 root root 0 Sep 19 07:45 ..
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu0
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu1
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu2
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu3
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu4
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu5
+ drwxr-xr-x 3 root root 0 Sep 19 07:44 cpu6
+ drwxr-xr-x 3 root root 0 Sep 19 07:48 cpu7
+
+Under each directory you would find an "online" file which is the control
+file to logically online/offline a processor.
+
+Q: Does hot-add/hot-remove refer to physical add/remove of cpus?
+A: The usage of hot-add/remove may not be very consistently used in the code.
+CONFIG_HOTPLUG_CPU enables logical online/offline capability in the kernel.
+To support physical addition/removal, one would need some BIOS hooks and
+the platform should have something like an attention button in PCI hotplug.
+CONFIG_ACPI_HOTPLUG_CPU enables ACPI support for physical add/remove of CPUs.
+
+Q: How do i logically offline a CPU?
+A: Do the following.
+
+ #echo 0 > /sys/devices/system/cpu/cpuX/online
+
+Once the logical offline is successful, check
+
+ #cat /proc/interrupts
+
+You should now not see the CPU that you removed. Also online file will report
+the state as 0 when a cpu if offline and 1 when its online.
+
+ #To display the current cpu state.
+ #cat /sys/devices/system/cpu/cpuX/online
+
+Q: Why can't i remove CPU0 on some systems?
+A: Some architectures may have some special dependency on a certain CPU.
+
+For e.g in IA64 platforms we have ability to sent platform interrupts to the
+OS. a.k.a Corrected Platform Error Interrupts (CPEI). In current ACPI
+specifications, we didn't have a way to change the target CPU. Hence if the
+current ACPI version doesn't support such re-direction, we disable that CPU
+by making it not-removable.
+
+In such cases you will also notice that the online file is missing under cpu0.
+
+Q: Is CPU0 removable on X86?
+A: Yes. If kernel is compiled with CONFIG_BOOTPARAM_HOTPLUG_CPU0=y, CPU0 is
+removable by default. Otherwise, CPU0 is also removable by kernel option
+cpu0_hotplug.
+
+But some features depend on CPU0. Two known dependencies are:
+
+1. Resume from hibernate/suspend depends on CPU0. Hibernate/suspend will fail if
+CPU0 is offline and you need to online CPU0 before hibernate/suspend can
+continue.
+2. PIC interrupts also depend on CPU0. CPU0 can't be removed if a PIC interrupt
+is detected.
+
+It's said poweroff/reboot may depend on CPU0 on some machines although I haven't
+seen any poweroff/reboot failure so far after CPU0 is offline on a few tested
+machines.
+
+Please let me know if you know or see any other dependencies of CPU0.
+
+If the dependencies are under your control, you can turn on CPU0 hotplug feature
+either by CONFIG_BOOTPARAM_HOTPLUG_CPU0 or by kernel parameter cpu0_hotplug.
+
+--Fenghua Yu <fenghua.yu@intel.com>
+
+Q: How do i find out if a particular CPU is not removable?
+A: Depending on the implementation, some architectures may show this by the
+absence of the "online" file. This is done if it can be determined ahead of
+time that this CPU cannot be removed.
+
+In some situations, this can be a run time check, i.e if you try to remove the
+last CPU, this will not be permitted. You can find such failures by
+investigating the return value of the "echo" command.
+
+Q: What happens when a CPU is being logically offlined?
+A: The following happen, listed in no particular order :-)
+
+- A notification is sent to in-kernel registered modules by sending an event
+ CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
+ CPU is being offlined while tasks are frozen due to a suspend operation in
+ progress
+- All processes are migrated away from this outgoing CPU to new CPUs.
+ The new CPU is chosen from each process' current cpuset, which may be
+ a subset of all online CPUs.
+- All interrupts targeted to this CPU is migrated to a new CPU
+- timers/bottom half/task lets are also migrated to a new CPU
+- Once all services are migrated, kernel calls an arch specific routine
+ __cpu_disable() to perform arch specific cleanup.
+- Once this is successful, an event for successful cleanup is sent by an event
+ CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the
+ CPU is being offlined).
+
+ "It is expected that each service cleans up when the CPU_DOWN_PREPARE
+ notifier is called, when CPU_DEAD is called its expected there is nothing
+ running on behalf of this CPU that was offlined"
+
+Q: If i have some kernel code that needs to be aware of CPU arrival and
+ departure, how to i arrange for proper notification?
+A: This is what you would need in your kernel code to receive notifications.
+
+ #include <linux/cpu.h>
+ static int foobar_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+ {
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ foobar_online_action(cpu);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ foobar_dead_action(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+ }
+
+ static struct notifier_block foobar_cpu_notifier =
+ {
+ .notifier_call = foobar_cpu_callback,
+ };
+
+You need to call register_cpu_notifier() from your init function.
+Init functions could be of two types:
+1. early init (init function called when only the boot processor is online).
+2. late init (init function called _after_ all the CPUs are online).
+
+For the first case, you should add the following to your init function
+
+ register_cpu_notifier(&foobar_cpu_notifier);
+
+For the second case, you should add the following to your init function
+
+ register_hotcpu_notifier(&foobar_cpu_notifier);
+
+You can fail PREPARE notifiers if something doesn't work to prepare resources.
+This will stop the activity and send a following CANCELED event back.
+
+CPU_DEAD should not be failed, its just a goodness indication, but bad
+things will happen if a notifier in path sent a BAD notify code.
+
+Q: I don't see my action being called for all CPUs already up and running?
+A: Yes, CPU notifiers are called only when new CPUs are on-lined or offlined.
+ If you need to perform some action for each cpu already in the system, then
+ do this:
+
+ for_each_online_cpu(i) {
+ foobar_cpu_callback(&foobar_cpu_notifier, CPU_UP_PREPARE, i);
+ foobar_cpu_callback(&foobar_cpu_notifier, CPU_ONLINE, i);
+ }
+
+ However, if you want to register a hotplug callback, as well as perform
+ some initialization for CPUs that are already online, then do this:
+
+ Version 1: (Correct)
+ ---------
+
+ cpu_notifier_register_begin();
+
+ for_each_online_cpu(i) {
+ foobar_cpu_callback(&foobar_cpu_notifier,
+ CPU_UP_PREPARE, i);
+ foobar_cpu_callback(&foobar_cpu_notifier,
+ CPU_ONLINE, i);
+ }
+
+ /* Note the use of the double underscored version of the API */
+ __register_cpu_notifier(&foobar_cpu_notifier);
+
+ cpu_notifier_register_done();
+
+ Note that the following code is *NOT* the right way to achieve this,
+ because it is prone to an ABBA deadlock between the cpu_add_remove_lock
+ and the cpu_hotplug.lock.
+
+ Version 2: (Wrong!)
+ ---------
+
+ get_online_cpus();
+
+ for_each_online_cpu(i) {
+ foobar_cpu_callback(&foobar_cpu_notifier,
+ CPU_UP_PREPARE, i);
+ foobar_cpu_callback(&foobar_cpu_notifier,
+ CPU_ONLINE, i);
+ }
+
+ register_cpu_notifier(&foobar_cpu_notifier);
+
+ put_online_cpus();
+
+ So always use the first version shown above when you want to register
+ callbacks as well as initialize the already online CPUs.
+
+
+Q: If i would like to develop cpu hotplug support for a new architecture,
+ what do i need at a minimum?
+A: The following are what is required for CPU hotplug infrastructure to work
+ correctly.
+
+ - Make sure you have an entry in Kconfig to enable CONFIG_HOTPLUG_CPU
+ - __cpu_up() - Arch interface to bring up a CPU
+ - __cpu_disable() - Arch interface to shutdown a CPU, no more interrupts
+ can be handled by the kernel after the routine
+ returns. Including local APIC timers etc are
+ shutdown.
+ - __cpu_die() - This actually supposed to ensure death of the CPU.
+ Actually look at some example code in other arch
+ that implement CPU hotplug. The processor is taken
+ down from the idle() loop for that specific
+ architecture. __cpu_die() typically waits for some
+ per_cpu state to be set, to ensure the processor
+ dead routine is called to be sure positively.
+
+Q: I need to ensure that a particular cpu is not removed when there is some
+ work specific to this cpu is in progress.
+A: There are two ways. If your code can be run in interrupt context, use
+ smp_call_function_single(), otherwise use work_on_cpu(). Note that
+ work_on_cpu() is slow, and can fail due to out of memory:
+
+ int my_func_on_cpu(int cpu)
+ {
+ int err;
+ get_online_cpus();
+ if (!cpu_online(cpu))
+ err = -EINVAL;
+ else
+#if NEEDS_BLOCKING
+ err = work_on_cpu(cpu, __my_func_on_cpu, NULL);
+#else
+ smp_call_function_single(cpu, __my_func_on_cpu, &err,
+ true);
+#endif
+ put_online_cpus();
+ return err;
+ }
+
+Q: How do we determine how many CPUs are available for hotplug.
+A: There is no clear spec defined way from ACPI that can give us that
+ information today. Based on some input from Natalie of Unisys,
+ that the ACPI MADT (Multiple APIC Description Tables) marks those possible
+ CPUs in a system with disabled status.
+
+ Andi implemented some simple heuristics that count the number of disabled
+ CPUs in MADT as hotpluggable CPUS. In the case there are no disabled CPUS
+ we assume 1/2 the number of CPUs currently present can be hotplugged.
+
+ Caveat: ACPI MADT can only provide 256 entries in systems with only ACPI 2.0c
+ or earlier ACPI version supported, because the apicid field in MADT is only
+ 8 bits. From ACPI 3.0, this limitation was removed since the apicid field
+ was extended to 32 bits with x2APIC introduced.
+
+User Space Notification
+
+Hotplug support for devices is common in Linux today. Its being used today to
+support automatic configuration of network, usb and pci devices. A hotplug
+event can be used to invoke an agent script to perform the configuration task.
+
+You can add /etc/hotplug/cpu.agent to handle hotplug notification user space
+scripts.
+
+ #!/bin/bash
+ # $Id: cpu.agent
+ # Kernel hotplug params include:
+ #ACTION=%s [online or offline]
+ #DEVPATH=%s
+ #
+ cd /etc/hotplug
+ . ./hotplug.functions
+
+ case $ACTION in
+ online)
+ echo `date` ":cpu.agent" add cpu >> /tmp/hotplug.txt
+ ;;
+ offline)
+ echo `date` ":cpu.agent" remove cpu >>/tmp/hotplug.txt
+ ;;
+ *)
+ debug_mesg CPU $ACTION event not supported
+ exit 1
+ ;;
+ esac
diff --git a/Documentation/cpu-load.txt b/Documentation/cpu-load.txt
new file mode 100644
index 000000000..287224e57
--- /dev/null
+++ b/Documentation/cpu-load.txt
@@ -0,0 +1,113 @@
+CPU load
+--------
+
+Linux exports various bits of information via `/proc/stat' and
+`/proc/uptime' that userland tools, such as top(1), use to calculate
+the average time system spent in a particular state, for example:
+
+ $ iostat
+ Linux 2.6.18.3-exp (linmac) 02/20/2007
+
+ avg-cpu: %user %nice %system %iowait %steal %idle
+ 10.01 0.00 2.92 5.44 0.00 81.63
+
+ ...
+
+Here the system thinks that over the default sampling period the
+system spent 10.01% of the time doing work in user space, 2.92% in the
+kernel, and was overall 81.63% of the time idle.
+
+In most cases the `/proc/stat' information reflects the reality quite
+closely, however due to the nature of how/when the kernel collects
+this data sometimes it can not be trusted at all.
+
+So how is this information collected? Whenever timer interrupt is
+signalled the kernel looks what kind of task was running at this
+moment and increments the counter that corresponds to this tasks
+kind/state. The problem with this is that the system could have
+switched between various states multiple times between two timer
+interrupts yet the counter is incremented only for the last state.
+
+
+Example
+-------
+
+If we imagine the system with one task that periodically burns cycles
+in the following manner:
+
+ time line between two timer interrupts
+|--------------------------------------|
+ ^ ^
+ |_ something begins working |
+ |_ something goes to sleep
+ (only to be awaken quite soon)
+
+In the above situation the system will be 0% loaded according to the
+`/proc/stat' (since the timer interrupt will always happen when the
+system is executing the idle handler), but in reality the load is
+closer to 99%.
+
+One can imagine many more situations where this behavior of the kernel
+will lead to quite erratic information inside `/proc/stat'.
+
+
+/* gcc -o hog smallhog.c */
+#include <time.h>
+#include <limits.h>
+#include <signal.h>
+#include <sys/time.h>
+#define HIST 10
+
+static volatile sig_atomic_t stop;
+
+static void sighandler (int signr)
+{
+ (void) signr;
+ stop = 1;
+}
+static unsigned long hog (unsigned long niters)
+{
+ stop = 0;
+ while (!stop && --niters);
+ return niters;
+}
+int main (void)
+{
+ int i;
+ struct itimerval it = { .it_interval = { .tv_sec = 0, .tv_usec = 1 },
+ .it_value = { .tv_sec = 0, .tv_usec = 1 } };
+ sigset_t set;
+ unsigned long v[HIST];
+ double tmp = 0.0;
+ unsigned long n;
+ signal (SIGALRM, &sighandler);
+ setitimer (ITIMER_REAL, &it, NULL);
+
+ hog (ULONG_MAX);
+ for (i = 0; i < HIST; ++i) v[i] = ULONG_MAX - hog (ULONG_MAX);
+ for (i = 0; i < HIST; ++i) tmp += v[i];
+ tmp /= HIST;
+ n = tmp - (tmp / 3.0);
+
+ sigemptyset (&set);
+ sigaddset (&set, SIGALRM);
+
+ for (;;) {
+ hog (n);
+ sigwait (&set, &i);
+ }
+ return 0;
+}
+
+
+References
+----------
+
+http://lkml.org/lkml/2007/2/12/6
+Documentation/filesystems/proc.txt (1.8)
+
+
+Thanks
+------
+
+Con Kolivas, Pavel Machek
diff --git a/Documentation/cpuidle/core.txt b/Documentation/cpuidle/core.txt
new file mode 100644
index 000000000..63ecc5dc9
--- /dev/null
+++ b/Documentation/cpuidle/core.txt
@@ -0,0 +1,23 @@
+
+ Supporting multiple CPU idle levels in kernel
+
+ cpuidle
+
+General Information:
+
+Various CPUs today support multiple idle levels that are differentiated
+by varying exit latencies and power consumption during idle.
+cpuidle is a generic in-kernel infrastructure that separates
+idle policy (governor) from idle mechanism (driver) and provides a
+standardized infrastructure to support independent development of
+governors and drivers.
+
+cpuidle resides under drivers/cpuidle.
+
+Boot options:
+"cpuidle_sysfs_switch"
+enables current_governor interface in /sys/devices/system/cpu/cpuidle/,
+which can be used to switch governors at run time. This boot option
+is meant for developer testing only. In normal usage, kernel picks the
+best governor based on governor ratings.
+SEE ALSO: sysfs.txt in this directory.
diff --git a/Documentation/cpuidle/driver.txt b/Documentation/cpuidle/driver.txt
new file mode 100644
index 000000000..1b0d81d92
--- /dev/null
+++ b/Documentation/cpuidle/driver.txt
@@ -0,0 +1,37 @@
+
+
+ Supporting multiple CPU idle levels in kernel
+
+ cpuidle drivers
+
+
+
+
+cpuidle driver hooks into the cpuidle infrastructure and handles the
+architecture/platform dependent part of CPU idle states. Driver
+provides the platform idle state detection capability and also
+has mechanisms in place to support actual entry-exit into CPU idle states.
+
+cpuidle driver initializes the cpuidle_device structure for each CPU device
+and registers with cpuidle using cpuidle_register_device.
+
+If all the idle states are the same, the wrapper function cpuidle_register
+could be used instead.
+
+It can also support the dynamic changes (like battery <-> AC), by using
+cpuidle_pause_and_lock, cpuidle_disable_device and cpuidle_enable_device,
+cpuidle_resume_and_unlock.
+
+Interfaces:
+extern int cpuidle_register(struct cpuidle_driver *drv,
+ const struct cpumask *const coupled_cpus);
+extern int cpuidle_unregister(struct cpuidle_driver *drv);
+extern int cpuidle_register_driver(struct cpuidle_driver *drv);
+extern void cpuidle_unregister_driver(struct cpuidle_driver *drv);
+extern int cpuidle_register_device(struct cpuidle_device *dev);
+extern void cpuidle_unregister_device(struct cpuidle_device *dev);
+
+extern void cpuidle_pause_and_lock(void);
+extern void cpuidle_resume_and_unlock(void);
+extern int cpuidle_enable_device(struct cpuidle_device *dev);
+extern void cpuidle_disable_device(struct cpuidle_device *dev);
diff --git a/Documentation/cpuidle/governor.txt b/Documentation/cpuidle/governor.txt
new file mode 100644
index 000000000..d9020f5e8
--- /dev/null
+++ b/Documentation/cpuidle/governor.txt
@@ -0,0 +1,28 @@
+
+
+
+ Supporting multiple CPU idle levels in kernel
+
+ cpuidle governors
+
+
+
+
+cpuidle governor is policy routine that decides what idle state to enter at
+any given time. cpuidle core uses different callbacks to the governor.
+
+* enable() to enable governor for a particular device
+* disable() to disable governor for a particular device
+* select() to select an idle state to enter
+* reflect() called after returning from the idle state, which can be used
+ by the governor for some record keeping.
+
+More than one governor can be registered at the same time and
+users can switch between drivers using /sysfs interface (when enabled).
+More than one governor part is supported for developers to easily experiment
+with different governors. By default, most optimal governor based on your
+kernel configuration and platform will be selected by cpuidle.
+
+Interfaces:
+extern int cpuidle_register_governor(struct cpuidle_governor *gov);
+struct cpuidle_governor
diff --git a/Documentation/cpuidle/sysfs.txt b/Documentation/cpuidle/sysfs.txt
new file mode 100644
index 000000000..b6f44f490
--- /dev/null
+++ b/Documentation/cpuidle/sysfs.txt
@@ -0,0 +1,92 @@
+
+
+ Supporting multiple CPU idle levels in kernel
+
+ cpuidle sysfs
+
+System global cpuidle related information and tunables are under
+/sys/devices/system/cpu/cpuidle
+
+The current interfaces in this directory has self-explanatory names:
+* current_driver
+* current_governor_ro
+
+With cpuidle_sysfs_switch boot option (meant for developer testing)
+following objects are visible instead.
+* current_driver
+* available_governors
+* current_governor
+In this case users can switch the governor at run time by writing
+to current_governor.
+
+
+Per logical CPU specific cpuidle information are under
+/sys/devices/system/cpu/cpuX/cpuidle
+for each online cpu X
+
+--------------------------------------------------------------------------------
+# ls -lR /sys/devices/system/cpu/cpu0/cpuidle/
+/sys/devices/system/cpu/cpu0/cpuidle/:
+total 0
+drwxr-xr-x 2 root root 0 Feb 8 10:42 state0
+drwxr-xr-x 2 root root 0 Feb 8 10:42 state1
+drwxr-xr-x 2 root root 0 Feb 8 10:42 state2
+drwxr-xr-x 2 root root 0 Feb 8 10:42 state3
+
+/sys/devices/system/cpu/cpu0/cpuidle/state0:
+total 0
+-r--r--r-- 1 root root 4096 Feb 8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
+-r--r--r-- 1 root root 4096 Feb 8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb 8 10:42 name
+-r--r--r-- 1 root root 4096 Feb 8 10:42 power
+-r--r--r-- 1 root root 4096 Feb 8 10:42 time
+-r--r--r-- 1 root root 4096 Feb 8 10:42 usage
+
+/sys/devices/system/cpu/cpu0/cpuidle/state1:
+total 0
+-r--r--r-- 1 root root 4096 Feb 8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
+-r--r--r-- 1 root root 4096 Feb 8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb 8 10:42 name
+-r--r--r-- 1 root root 4096 Feb 8 10:42 power
+-r--r--r-- 1 root root 4096 Feb 8 10:42 time
+-r--r--r-- 1 root root 4096 Feb 8 10:42 usage
+
+/sys/devices/system/cpu/cpu0/cpuidle/state2:
+total 0
+-r--r--r-- 1 root root 4096 Feb 8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
+-r--r--r-- 1 root root 4096 Feb 8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb 8 10:42 name
+-r--r--r-- 1 root root 4096 Feb 8 10:42 power
+-r--r--r-- 1 root root 4096 Feb 8 10:42 time
+-r--r--r-- 1 root root 4096 Feb 8 10:42 usage
+
+/sys/devices/system/cpu/cpu0/cpuidle/state3:
+total 0
+-r--r--r-- 1 root root 4096 Feb 8 10:42 desc
+-rw-r--r-- 1 root root 4096 Feb 8 10:42 disable
+-r--r--r-- 1 root root 4096 Feb 8 10:42 latency
+-r--r--r-- 1 root root 4096 Feb 8 10:42 name
+-r--r--r-- 1 root root 4096 Feb 8 10:42 power
+-r--r--r-- 1 root root 4096 Feb 8 10:42 time
+-r--r--r-- 1 root root 4096 Feb 8 10:42 usage
+--------------------------------------------------------------------------------
+
+
+* desc : Small description about the idle state (string)
+* disable : Option to disable this idle state (bool) -> see note below
+* latency : Latency to exit out of this idle state (in microseconds)
+* name : Name of the idle state (string)
+* power : Power consumed while in this idle state (in milliwatts)
+* time : Total time spent in this idle state (in microseconds)
+* usage : Number of times this state was entered (count)
+
+Note:
+The behavior and the effect of the disable variable depends on the
+implementation of a particular governor. In the ladder governor, for
+example, it is not coherent, i.e. if one is disabling a light state,
+then all deeper states are disabled as well, but the disable variable
+does not reflect it. Likewise, if one enables a deep state but a lighter
+state still is disabled, then this has no effect.
diff --git a/Documentation/cputopology.txt b/Documentation/cputopology.txt
new file mode 100644
index 000000000..0aad6deb2
--- /dev/null
+++ b/Documentation/cputopology.txt
@@ -0,0 +1,111 @@
+
+Export CPU topology info via sysfs. Items (attributes) are similar
+to /proc/cpuinfo.
+
+1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
+
+ physical package id of cpuX. Typically corresponds to a physical
+ socket number, but the actual value is architecture and platform
+ dependent.
+
+2) /sys/devices/system/cpu/cpuX/topology/core_id:
+
+ the CPU core ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+
+3) /sys/devices/system/cpu/cpuX/topology/book_id:
+
+ the book ID of cpuX. Typically it is the hardware platform's
+ identifier (rather than the kernel's). The actual value is
+ architecture and platform dependent.
+
+4) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
+
+ internal kernel map of cpuX's hardware threads within the same
+ core as cpuX
+
+5) /sys/devices/system/cpu/cpuX/topology/core_siblings:
+
+ internal kernel map of cpuX's hardware threads within the same
+ physical_package_id.
+
+6) /sys/devices/system/cpu/cpuX/topology/book_siblings:
+
+ internal kernel map of cpuX's hardware threads within the same
+ book_id.
+
+To implement it in an architecture-neutral way, a new source file,
+drivers/base/topology.c, is to export the 4 or 6 attributes. The two book
+related sysfs files will only be created if CONFIG_SCHED_BOOK is selected.
+
+For an architecture to support this feature, it must define some of
+these macros in include/asm-XXX/topology.h:
+#define topology_physical_package_id(cpu)
+#define topology_core_id(cpu)
+#define topology_book_id(cpu)
+#define topology_thread_cpumask(cpu)
+#define topology_core_cpumask(cpu)
+#define topology_book_cpumask(cpu)
+
+The type of **_id is int.
+The type of siblings is (const) struct cpumask *.
+
+To be consistent on all architectures, include/linux/topology.h
+provides default definitions for any of the above macros that are
+not defined by include/asm-XXX/topology.h:
+1) physical_package_id: -1
+2) core_id: 0
+3) thread_siblings: just the given CPU
+4) core_siblings: just the given CPU
+
+For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
+default definitions for topology_book_id() and topology_book_cpumask().
+
+Additionally, CPU topology information is provided under
+/sys/devices/system/cpu and includes these files. The internal
+source for the output is in brackets ("[]").
+
+ kernel_max: the maximum CPU index allowed by the kernel configuration.
+ [NR_CPUS-1]
+
+ offline: CPUs that are not online because they have been
+ HOTPLUGGED off (see cpu-hotplug.txt) or exceed the limit
+ of CPUs allowed by the kernel configuration (kernel_max
+ above). [~cpu_online_mask + cpus >= NR_CPUS]
+
+ online: CPUs that are online and being scheduled [cpu_online_mask]
+
+ possible: CPUs that have been allocated resources and can be
+ brought online if they are present. [cpu_possible_mask]
+
+ present: CPUs that have been identified as being present in the
+ system. [cpu_present_mask]
+
+The format for the above output is compatible with cpulist_parse()
+[see <linux/cpumask.h>]. Some examples follow.
+
+In this example, there are 64 CPUs in the system but cpus 32-63 exceed
+the kernel max which is limited to 0..31 by the NR_CPUS config option
+being 32. Note also that CPUs 2 and 4-31 are not online but could be
+brought online as they are both present and possible.
+
+ kernel_max: 31
+ offline: 2,4-31,32-63
+ online: 0-1,3
+ possible: 0-31
+ present: 0-31
+
+In this example, the NR_CPUS config option is 128, but the kernel was
+started with possible_cpus=144. There are 4 CPUs in the system and cpu2
+was manually taken offline (and is the only CPU that can be brought
+online.)
+
+ kernel_max: 127
+ offline: 2,4-127,128-143
+ online: 0-1,3
+ possible: 0-127
+ present: 0-3
+
+See cpu-hotplug.txt for the possible_cpus=NUM kernel start parameter
+as well as more information on the various cpumasks.
diff --git a/Documentation/crc32.txt b/Documentation/crc32.txt
new file mode 100644
index 000000000..a08a7dd9d
--- /dev/null
+++ b/Documentation/crc32.txt
@@ -0,0 +1,182 @@
+A brief CRC tutorial.
+
+A CRC is a long-division remainder. You add the CRC to the message,
+and the whole thing (message+CRC) is a multiple of the given
+CRC polynomial. To check the CRC, you can either check that the
+CRC matches the recomputed value, *or* you can check that the
+remainder computed on the message+CRC is 0. This latter approach
+is used by a lot of hardware implementations, and is why so many
+protocols put the end-of-frame flag after the CRC.
+
+It's actually the same long division you learned in school, except that
+- We're working in binary, so the digits are only 0 and 1, and
+- When dividing polynomials, there are no carries. Rather than add and
+ subtract, we just xor. Thus, we tend to get a bit sloppy about
+ the difference between adding and subtracting.
+
+Like all division, the remainder is always smaller than the divisor.
+To produce a 32-bit CRC, the divisor is actually a 33-bit CRC polynomial.
+Since it's 33 bits long, bit 32 is always going to be set, so usually the
+CRC is written in hex with the most significant bit omitted. (If you're
+familiar with the IEEE 754 floating-point format, it's the same idea.)
+
+Note that a CRC is computed over a string of *bits*, so you have
+to decide on the endianness of the bits within each byte. To get
+the best error-detecting properties, this should correspond to the
+order they're actually sent. For example, standard RS-232 serial is
+little-endian; the most significant bit (sometimes used for parity)
+is sent last. And when appending a CRC word to a message, you should
+do it in the right order, matching the endianness.
+
+Just like with ordinary division, you proceed one digit (bit) at a time.
+Each step of the division you take one more digit (bit) of the dividend
+and append it to the current remainder. Then you figure out the
+appropriate multiple of the divisor to subtract to being the remainder
+back into range. In binary, this is easy - it has to be either 0 or 1,
+and to make the XOR cancel, it's just a copy of bit 32 of the remainder.
+
+When computing a CRC, we don't care about the quotient, so we can
+throw the quotient bit away, but subtract the appropriate multiple of
+the polynomial from the remainder and we're back to where we started,
+ready to process the next bit.
+
+A big-endian CRC written this way would be coded like:
+for (i = 0; i < input_bits; i++) {
+ multiple = remainder & 0x80000000 ? CRCPOLY : 0;
+ remainder = (remainder << 1 | next_input_bit()) ^ multiple;
+}
+
+Notice how, to get at bit 32 of the shifted remainder, we look
+at bit 31 of the remainder *before* shifting it.
+
+But also notice how the next_input_bit() bits we're shifting into
+the remainder don't actually affect any decision-making until
+32 bits later. Thus, the first 32 cycles of this are pretty boring.
+Also, to add the CRC to a message, we need a 32-bit-long hole for it at
+the end, so we have to add 32 extra cycles shifting in zeros at the
+end of every message,
+
+These details lead to a standard trick: rearrange merging in the
+next_input_bit() until the moment it's needed. Then the first 32 cycles
+can be precomputed, and merging in the final 32 zero bits to make room
+for the CRC can be skipped entirely. This changes the code to:
+
+for (i = 0; i < input_bits; i++) {
+ remainder ^= next_input_bit() << 31;
+ multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+ remainder = (remainder << 1) ^ multiple;
+}
+
+With this optimization, the little-endian code is particularly simple:
+for (i = 0; i < input_bits; i++) {
+ remainder ^= next_input_bit();
+ multiple = (remainder & 1) ? CRCPOLY : 0;
+ remainder = (remainder >> 1) ^ multiple;
+}
+
+The most significant coefficient of the remainder polynomial is stored
+in the least significant bit of the binary "remainder" variable.
+The other details of endianness have been hidden in CRCPOLY (which must
+be bit-reversed) and next_input_bit().
+
+As long as next_input_bit is returning the bits in a sensible order, we don't
+*have* to wait until the last possible moment to merge in additional bits.
+We can do it 8 bits at a time rather than 1 bit at a time:
+for (i = 0; i < input_bytes; i++) {
+ remainder ^= next_input_byte() << 24;
+ for (j = 0; j < 8; j++) {
+ multiple = (remainder & 0x80000000) ? CRCPOLY : 0;
+ remainder = (remainder << 1) ^ multiple;
+ }
+}
+
+Or in little-endian:
+for (i = 0; i < input_bytes; i++) {
+ remainder ^= next_input_byte();
+ for (j = 0; j < 8; j++) {
+ multiple = (remainder & 1) ? CRCPOLY : 0;
+ remainder = (remainder >> 1) ^ multiple;
+ }
+}
+
+If the input is a multiple of 32 bits, you can even XOR in a 32-bit
+word at a time and increase the inner loop count to 32.
+
+You can also mix and match the two loop styles, for example doing the
+bulk of a message byte-at-a-time and adding bit-at-a-time processing
+for any fractional bytes at the end.
+
+To reduce the number of conditional branches, software commonly uses
+the byte-at-a-time table method, popularized by Dilip V. Sarwate,
+"Computation of Cyclic Redundancy Checks via Table Look-Up", Comm. ACM
+v.31 no.8 (August 1998) p. 1008-1013.
+
+Here, rather than just shifting one bit of the remainder to decide
+in the correct multiple to subtract, we can shift a byte at a time.
+This produces a 40-bit (rather than a 33-bit) intermediate remainder,
+and the correct multiple of the polynomial to subtract is found using
+a 256-entry lookup table indexed by the high 8 bits.
+
+(The table entries are simply the CRC-32 of the given one-byte messages.)
+
+When space is more constrained, smaller tables can be used, e.g. two
+4-bit shifts followed by a lookup in a 16-entry table.
+
+It is not practical to process much more than 8 bits at a time using this
+technique, because tables larger than 256 entries use too much memory and,
+more importantly, too much of the L1 cache.
+
+To get higher software performance, a "slicing" technique can be used.
+See "High Octane CRC Generation with the Intel Slicing-by-8 Algorithm",
+ftp://download.intel.com/technology/comms/perfnet/download/slicing-by-8.pdf
+
+This does not change the number of table lookups, but does increase
+the parallelism. With the classic Sarwate algorithm, each table lookup
+must be completed before the index of the next can be computed.
+
+A "slicing by 2" technique would shift the remainder 16 bits at a time,
+producing a 48-bit intermediate remainder. Rather than doing a single
+lookup in a 65536-entry table, the two high bytes are looked up in
+two different 256-entry tables. Each contains the remainder required
+to cancel out the corresponding byte. The tables are different because the
+polynomials to cancel are different. One has non-zero coefficients from
+x^32 to x^39, while the other goes from x^40 to x^47.
+
+Since modern processors can handle many parallel memory operations, this
+takes barely longer than a single table look-up and thus performs almost
+twice as fast as the basic Sarwate algorithm.
+
+This can be extended to "slicing by 4" using 4 256-entry tables.
+Each step, 32 bits of data is fetched, XORed with the CRC, and the result
+broken into bytes and looked up in the tables. Because the 32-bit shift
+leaves the low-order bits of the intermediate remainder zero, the
+final CRC is simply the XOR of the 4 table look-ups.
+
+But this still enforces sequential execution: a second group of table
+look-ups cannot begin until the previous groups 4 table look-ups have all
+been completed. Thus, the processor's load/store unit is sometimes idle.
+
+To make maximum use of the processor, "slicing by 8" performs 8 look-ups
+in parallel. Each step, the 32-bit CRC is shifted 64 bits and XORed
+with 64 bits of input data. What is important to note is that 4 of
+those 8 bytes are simply copies of the input data; they do not depend
+on the previous CRC at all. Thus, those 4 table look-ups may commence
+immediately, without waiting for the previous loop iteration.
+
+By always having 4 loads in flight, a modern superscalar processor can
+be kept busy and make full use of its L1 cache.
+
+Two more details about CRC implementation in the real world:
+
+Normally, appending zero bits to a message which is already a multiple
+of a polynomial produces a larger multiple of that polynomial. Thus,
+a basic CRC will not detect appended zero bits (or bytes). To enable
+a CRC to detect this condition, it's common to invert the CRC before
+appending it. This makes the remainder of the message+crc come out not
+as zero, but some fixed non-zero value. (The CRC of the inversion
+pattern, 0xffffffff.)
+
+The same problem applies to zero bits prepended to the message, and a
+similar solution is used. Instead of starting the CRC computation with
+a remainder of 0, an initial remainder of all ones is used. As long as
+you start the same way on decoding, it doesn't make a difference.
diff --git a/Documentation/cris/README b/Documentation/cris/README
new file mode 100644
index 000000000..8dbdb1a44
--- /dev/null
+++ b/Documentation/cris/README
@@ -0,0 +1,195 @@
+Linux on the CRIS architecture
+==============================
+
+This is a port of Linux to Axis Communications ETRAX 100LX,
+ETRAX FS and ARTPEC-3 embedded network CPUs.
+
+For more information about CRIS and ETRAX please see further below.
+
+In order to compile this you need a version of gcc with support for the
+ETRAX chip family. Please see this link for more information on how to
+download the compiler and other tools useful when building and booting
+software for the ETRAX platform:
+
+http://developer.axis.com/wiki/doku.php?id=axis:install-howto-2_20
+
+What is CRIS ?
+--------------
+
+CRIS is an acronym for 'Code Reduced Instruction Set'. It is the CPU
+architecture in Axis Communication AB's range of embedded network CPU's,
+called ETRAX.
+
+The ETRAX 100LX chip
+--------------------
+
+For reference, please see the following link:
+
+http://www.axis.com/products/dev_etrax_100lx/index.htm
+
+The ETRAX 100LX is a 100 MIPS processor with 8kB cache, MMU, and a very broad
+range of built-in interfaces, all with modern scatter/gather DMA.
+
+Memory interfaces:
+
+ * SRAM
+ * NOR-flash/ROM
+ * EDO or page-mode DRAM
+ * SDRAM
+
+I/O interfaces:
+
+ * one 10/100 Mbit/s ethernet controller
+ * four serial-ports (up to 6 Mbit/s)
+ * two synchronous serial-ports for multimedia codec's etc.
+ * USB host controller and USB slave
+ * ATA
+ * SCSI
+ * two parallel-ports
+ * two generic 8-bit ports
+
+ (not all interfaces are available at the same time due to chip pin
+ multiplexing)
+
+ETRAX 100LX is CRISv10 architecture.
+
+
+The ETRAX FS and ARTPEC-3 chips
+-------------------------------
+
+The ETRAX FS is a 200MHz 32-bit RISC processor with on-chip 16kB
+I-cache and 16kB D-cache and with a wide range of device interfaces
+including multiple high speed serial ports and an integrated USB 1.1 PHY.
+
+The ARTPEC-3 is a variant of the ETRAX FS with additional IO-units
+used by the Axis Communications network cameras.
+
+See below link for more information:
+
+http://www.axis.com/products/dev_etrax_fs/index.htm
+
+ETRAX FS and ARTPEC-3 are both CRISv32 architectures.
+
+Bootlog
+-------
+
+Just as an example, this is the debug-output from a boot of Linux 2.4 on
+a board with ETRAX 100LX. The displayed BogoMIPS value is 5 times too small :)
+At the end you see some user-mode programs booting like telnet and ftp daemons.
+
+Linux version 2.4.1 (bjornw@godzilla.axis.se) (gcc version 2.96 20000427 (experimental)) #207 Wed Feb 21 15:48:15 CET 2001
+ROM fs in RAM, size 1376256 bytes
+Setting up paging and the MMU.
+On node 0 totalpages: 2048
+zone(0): 2048 pages.
+zone(1): 0 pages.
+zone(2): 0 pages.
+Linux/CRIS port on ETRAX 100LX (c) 2001 Axis Communications AB
+Kernel command line:
+Calibrating delay loop... 19.91 BogoMIPS
+Memory: 13872k/16384k available (587k kernel code, 2512k reserved, 44k data, 24k init)
+kmem_create: Forcing size word alignment - vm_area_struct
+kmem_create: Forcing size word alignment - filp
+Dentry-cache hash table entries: 2048 (order: 1, 16384 bytes)
+Buffer-cache hash table entries: 2048 (order: 0, 8192 bytes)
+Page-cache hash table entries: 2048 (order: 0, 8192 bytes)
+kmem_create: Forcing size word alignment - kiobuf
+kmem_create: Forcing size word alignment - bdev_cache
+Inode-cache hash table entries: 1024 (order: 0, 8192 bytes)
+kmem_create: Forcing size word alignment - inode_cache
+POSIX conformance testing by UNIFIX
+Linux NET4.0 for Linux 2.4
+Based upon Swansea University Computer Society NET3.039
+Starting kswapd v1.8
+kmem_create: Forcing size word alignment - file lock cache
+kmem_create: Forcing size word alignment - blkdev_requests
+block: queued sectors max/low 9109kB/3036kB, 64 slots per queue
+ETRAX 100LX 10/100MBit ethernet v2.0 (c) 2000 Axis Communications AB
+eth0 initialized
+eth0: changed MAC to 00:40:8C:CD:00:00
+ETRAX 100LX serial-driver $Revision: 1.7 $, (c) 2000 Axis Communications AB
+ttyS0 at 0xb0000060 is a builtin UART with DMA
+ttyS1 at 0xb0000068 is a builtin UART with DMA
+ttyS2 at 0xb0000070 is a builtin UART with DMA
+ttyS3 at 0xb0000078 is a builtin UART with DMA
+Axis flash mapping: 200000 at 50000000
+Axis flash: Found 1 x16 CFI device at 0x0 in 16 bit mode
+ Amd/Fujitsu Extended Query Table v1.0 at 0x0040
+Axis flash: JEDEC Device ID is 0xC4. Assuming broken CFI table.
+Axis flash: Swapping erase regions for broken CFI table.
+number of CFI chips: 1
+ Using default partition table
+I2C driver v2.2, (c) 1999-2001 Axis Communications AB
+ETRAX 100LX GPIO driver v2.1, (c) 2001 Axis Communications AB
+NET4: Linux TCP/IP 1.0 for NET4.0
+IP Protocols: ICMP, UDP, TCP
+kmem_create: Forcing size word alignment - ip_dst_cache
+IP: routing cache hash table of 1024 buckets, 8Kbytes
+TCP: Hash tables configured (established 2048 bind 2048)
+NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.
+VFS: Mounted root (cramfs filesystem) readonly.
+Init starts up...
+Mounted none on /proc ok.
+Setting up eth0 with ip 10.13.9.116 and mac 00:40:8c:18:04:60
+eth0: changed MAC to 00:40:8C:18:04:60
+Setting up lo with ip 127.0.0.1
+Default gateway is 10.13.9.1
+Hostname is bbox1
+Telnetd starting, using port 23.
+ using /bin/sash as shell.
+sftpd[15]: sftpd $Revision: 1.7 $ starting up
+
+
+
+And here is how some /proc entries look:
+
+17# cd /proc
+17# cat cpuinfo
+cpu : CRIS
+cpu revision : 10
+cpu model : ETRAX 100LX
+cache size : 8 kB
+fpu : no
+mmu : yes
+ethernet : 10/100 Mbps
+token ring : no
+scsi : yes
+ata : yes
+usb : yes
+bogomips : 99.84
+
+17# cat meminfo
+ total: used: free: shared: buffers: cached:
+Mem: 7028736 925696 6103040 114688 0 229376
+Swap: 0 0 0
+MemTotal: 6864 kB
+MemFree: 5960 kB
+MemShared: 112 kB
+Buffers: 0 kB
+Cached: 224 kB
+Active: 224 kB
+Inact_dirty: 0 kB
+Inact_clean: 0 kB
+Inact_target: 0 kB
+HighTotal: 0 kB
+HighFree: 0 kB
+LowTotal: 6864 kB
+LowFree: 5960 kB
+SwapTotal: 0 kB
+SwapFree: 0 kB
+17# ls -l /bin
+-rwxr-xr-x 1 342 100 10356 Jan 01 00:00 ifconfig
+-rwxr-xr-x 1 342 100 17548 Jan 01 00:00 init
+-rwxr-xr-x 1 342 100 9488 Jan 01 00:00 route
+-rwxr-xr-x 1 342 100 46036 Jan 01 00:00 sftpd
+-rwxr-xr-x 1 342 100 48104 Jan 01 00:00 sh
+-rwxr-xr-x 1 342 100 16252 Jan 01 00:00 telnetd
+
+
+
+
+
+
+
+
+
diff --git a/Documentation/crypto/api-intro.txt b/Documentation/crypto/api-intro.txt
new file mode 100644
index 000000000..8b4930271
--- /dev/null
+++ b/Documentation/crypto/api-intro.txt
@@ -0,0 +1,248 @@
+
+ Scatterlist Cryptographic API
+
+INTRODUCTION
+
+The Scatterlist Crypto API takes page vectors (scatterlists) as
+arguments, and works directly on pages. In some cases (e.g. ECB
+mode ciphers), this will allow for pages to be encrypted in-place
+with no copying.
+
+One of the initial goals of this design was to readily support IPsec,
+so that processing can be applied to paged skb's without the need
+for linearization.
+
+
+DETAILS
+
+At the lowest level are algorithms, which register dynamically with the
+API.
+
+'Transforms' are user-instantiated objects, which maintain state, handle all
+of the implementation logic (e.g. manipulating page vectors) and provide an
+abstraction to the underlying algorithms. However, at the user
+level they are very simple.
+
+Conceptually, the API layering looks like this:
+
+ [transform api] (user interface)
+ [transform ops] (per-type logic glue e.g. cipher.c, compress.c)
+ [algorithm api] (for registering algorithms)
+
+The idea is to make the user interface and algorithm registration API
+very simple, while hiding the core logic from both. Many good ideas
+from existing APIs such as Cryptoapi and Nettle have been adapted for this.
+
+The API currently supports five main types of transforms: AEAD (Authenticated
+Encryption with Associated Data), Block Ciphers, Ciphers, Compressors and
+Hashes.
+
+Please note that Block Ciphers is somewhat of a misnomer. It is in fact
+meant to support all ciphers including stream ciphers. The difference
+between Block Ciphers and Ciphers is that the latter operates on exactly
+one block while the former can operate on an arbitrary amount of data,
+subject to block size requirements (i.e., non-stream ciphers can only
+process multiples of blocks).
+
+Support for hardware crypto devices via an asynchronous interface is
+under development.
+
+Here's an example of how to use the API:
+
+ #include <linux/crypto.h>
+ #include <linux/err.h>
+ #include <linux/scatterlist.h>
+
+ struct scatterlist sg[2];
+ char result[128];
+ struct crypto_hash *tfm;
+ struct hash_desc desc;
+
+ tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ fail();
+
+ /* ... set up the scatterlists ... */
+
+ desc.tfm = tfm;
+ desc.flags = 0;
+
+ if (crypto_hash_digest(&desc, sg, 2, result))
+ fail();
+
+ crypto_free_hash(tfm);
+
+
+Many real examples are available in the regression test module (tcrypt.c).
+
+
+DEVELOPER NOTES
+
+Transforms may only be allocated in user context, and cryptographic
+methods may only be called from softirq and user contexts. For
+transforms with a setkey method it too should only be called from
+user context.
+
+When using the API for ciphers, performance will be optimal if each
+scatterlist contains data which is a multiple of the cipher's block
+size (typically 8 bytes). This prevents having to do any copying
+across non-aligned page fragment boundaries.
+
+
+ADDING NEW ALGORITHMS
+
+When submitting a new algorithm for inclusion, a mandatory requirement
+is that at least a few test vectors from known sources (preferably
+standards) be included.
+
+Converting existing well known code is preferred, as it is more likely
+to have been reviewed and widely tested. If submitting code from LGPL
+sources, please consider changing the license to GPL (see section 3 of
+the LGPL).
+
+Algorithms submitted must also be generally patent-free (e.g. IDEA
+will not be included in the mainline until around 2011), and be based
+on a recognized standard and/or have been subjected to appropriate
+peer review.
+
+Also check for any RFCs which may relate to the use of specific algorithms,
+as well as general application notes such as RFC2451 ("The ESP CBC-Mode
+Cipher Algorithms").
+
+It's a good idea to avoid using lots of macros and use inlined functions
+instead, as gcc does a good job with inlining, while excessive use of
+macros can cause compilation problems on some platforms.
+
+Also check the TODO list at the web site listed below to see what people
+might already be working on.
+
+
+BUGS
+
+Send bug reports to:
+linux-crypto@vger.kernel.org
+Cc: Herbert Xu <herbert@gondor.apana.org.au>,
+ David S. Miller <davem@redhat.com>
+
+
+FURTHER INFORMATION
+
+For further patches and various updates, including the current TODO
+list, see:
+http://gondor.apana.org.au/~herbert/crypto/
+
+
+AUTHORS
+
+James Morris
+David S. Miller
+Herbert Xu
+
+
+CREDITS
+
+The following people provided invaluable feedback during the development
+of the API:
+
+ Alexey Kuznetzov
+ Rusty Russell
+ Herbert Valerio Riedel
+ Jeff Garzik
+ Michael Richardson
+ Andrew Morton
+ Ingo Oeser
+ Christoph Hellwig
+
+Portions of this API were derived from the following projects:
+
+ Kerneli Cryptoapi (http://www.kerneli.org/)
+ Alexander Kjeldaas
+ Herbert Valerio Riedel
+ Kyle McMartin
+ Jean-Luc Cooke
+ David Bryson
+ Clemens Fruhwirth
+ Tobias Ringstrom
+ Harald Welte
+
+and;
+
+ Nettle (http://www.lysator.liu.se/~nisse/nettle/)
+ Niels Möller
+
+Original developers of the crypto algorithms:
+
+ Dana L. How (DES)
+ Andrew Tridgell and Steve French (MD4)
+ Colin Plumb (MD5)
+ Steve Reid (SHA1)
+ Jean-Luc Cooke (SHA256, SHA384, SHA512)
+ Kazunori Miyazawa / USAGI (HMAC)
+ Matthew Skala (Twofish)
+ Dag Arne Osvik (Serpent)
+ Brian Gladman (AES)
+ Kartikey Mahendra Bhatt (CAST6)
+ Jon Oberheide (ARC4)
+ Jouni Malinen (Michael MIC)
+ NTT(Nippon Telegraph and Telephone Corporation) (Camellia)
+
+SHA1 algorithm contributors:
+ Jean-Francois Dive
+
+DES algorithm contributors:
+ Raimar Falke
+ Gisle Sælensminde
+ Niels Möller
+
+Blowfish algorithm contributors:
+ Herbert Valerio Riedel
+ Kyle McMartin
+
+Twofish algorithm contributors:
+ Werner Koch
+ Marc Mutz
+
+SHA256/384/512 algorithm contributors:
+ Andrew McDonald
+ Kyle McMartin
+ Herbert Valerio Riedel
+
+AES algorithm contributors:
+ Alexander Kjeldaas
+ Herbert Valerio Riedel
+ Kyle McMartin
+ Adam J. Richter
+ Fruhwirth Clemens (i586)
+ Linus Torvalds (i586)
+
+CAST5 algorithm contributors:
+ Kartikey Mahendra Bhatt (original developers unknown, FSF copyright).
+
+TEA/XTEA algorithm contributors:
+ Aaron Grothe
+ Michael Ringe
+
+Khazad algorithm contributors:
+ Aaron Grothe
+
+Whirlpool algorithm contributors:
+ Aaron Grothe
+ Jean-Luc Cooke
+
+Anubis algorithm contributors:
+ Aaron Grothe
+
+Tiger algorithm contributors:
+ Aaron Grothe
+
+VIA PadLock contributors:
+ Michal Ludvig
+
+Camellia algorithm contributors:
+ NTT(Nippon Telegraph and Telephone Corporation) (Camellia)
+
+Generic scatterwalk code by Adam J. Richter <adam@yggdrasil.com>
+
+Please send any credits updates or corrections to:
+Herbert Xu <herbert@gondor.apana.org.au>
+
diff --git a/Documentation/crypto/asymmetric-keys.txt b/Documentation/crypto/asymmetric-keys.txt
new file mode 100644
index 000000000..b7675904a
--- /dev/null
+++ b/Documentation/crypto/asymmetric-keys.txt
@@ -0,0 +1,312 @@
+ =============================================
+ ASYMMETRIC / PUBLIC-KEY CRYPTOGRAPHY KEY TYPE
+ =============================================
+
+Contents:
+
+ - Overview.
+ - Key identification.
+ - Accessing asymmetric keys.
+ - Signature verification.
+ - Asymmetric key subtypes.
+ - Instantiation data parsers.
+
+
+========
+OVERVIEW
+========
+
+The "asymmetric" key type is designed to be a container for the keys used in
+public-key cryptography, without imposing any particular restrictions on the
+form or mechanism of the cryptography or form of the key.
+
+The asymmetric key is given a subtype that defines what sort of data is
+associated with the key and provides operations to describe and destroy it.
+However, no requirement is made that the key data actually be stored in the
+key.
+
+A completely in-kernel key retention and operation subtype can be defined, but
+it would also be possible to provide access to cryptographic hardware (such as
+a TPM) that might be used to both retain the relevant key and perform
+operations using that key. In such a case, the asymmetric key would then
+merely be an interface to the TPM driver.
+
+Also provided is the concept of a data parser. Data parsers are responsible
+for extracting information from the blobs of data passed to the instantiation
+function. The first data parser that recognises the blob gets to set the
+subtype of the key and define the operations that can be done on that key.
+
+A data parser may interpret the data blob as containing the bits representing a
+key, or it may interpret it as a reference to a key held somewhere else in the
+system (for example, a TPM).
+
+
+==================
+KEY IDENTIFICATION
+==================
+
+If a key is added with an empty name, the instantiation data parsers are given
+the opportunity to pre-parse a key and to determine the description the key
+should be given from the content of the key.
+
+This can then be used to refer to the key, either by complete match or by
+partial match. The key type may also use other criteria to refer to a key.
+
+The asymmetric key type's match function can then perform a wider range of
+comparisons than just the straightforward comparison of the description with
+the criterion string:
+
+ (1) If the criterion string is of the form "id:<hexdigits>" then the match
+ function will examine a key's fingerprint to see if the hex digits given
+ after the "id:" match the tail. For instance:
+
+ keyctl search @s asymmetric id:5acc2142
+
+ will match a key with fingerprint:
+
+ 1A00 2040 7601 7889 DE11 882C 3823 04AD 5ACC 2142
+
+ (2) If the criterion string is of the form "<subtype>:<hexdigits>" then the
+ match will match the ID as in (1), but with the added restriction that
+ only keys of the specified subtype (e.g. tpm) will be matched. For
+ instance:
+
+ keyctl search @s asymmetric tpm:5acc2142
+
+Looking in /proc/keys, the last 8 hex digits of the key fingerprint are
+displayed, along with the subtype:
+
+ 1a39e171 I----- 1 perm 3f010000 0 0 asymmetri modsign.0: DSA 5acc2142 []
+
+
+=========================
+ACCESSING ASYMMETRIC KEYS
+=========================
+
+For general access to asymmetric keys from within the kernel, the following
+inclusion is required:
+
+ #include <crypto/public_key.h>
+
+This gives access to functions for dealing with asymmetric / public keys.
+Three enums are defined there for representing public-key cryptography
+algorithms:
+
+ enum pkey_algo
+
+digest algorithms used by those:
+
+ enum pkey_hash_algo
+
+and key identifier representations:
+
+ enum pkey_id_type
+
+Note that the key type representation types are required because key
+identifiers from different standards aren't necessarily compatible. For
+instance, PGP generates key identifiers by hashing the key data plus some
+PGP-specific metadata, whereas X.509 has arbitrary certificate identifiers.
+
+The operations defined upon a key are:
+
+ (1) Signature verification.
+
+Other operations are possible (such as encryption) with the same key data
+required for verification, but not currently supported, and others
+(eg. decryption and signature generation) require extra key data.
+
+
+SIGNATURE VERIFICATION
+----------------------
+
+An operation is provided to perform cryptographic signature verification, using
+an asymmetric key to provide or to provide access to the public key.
+
+ int verify_signature(const struct key *key,
+ const struct public_key_signature *sig);
+
+The caller must have already obtained the key from some source and can then use
+it to check the signature. The caller must have parsed the signature and
+transferred the relevant bits to the structure pointed to by sig.
+
+ struct public_key_signature {
+ u8 *digest;
+ u8 digest_size;
+ enum pkey_hash_algo pkey_hash_algo : 8;
+ u8 nr_mpi;
+ union {
+ MPI mpi[2];
+ ...
+ };
+ };
+
+The algorithm used must be noted in sig->pkey_hash_algo, and all the MPIs that
+make up the actual signature must be stored in sig->mpi[] and the count of MPIs
+placed in sig->nr_mpi.
+
+In addition, the data must have been digested by the caller and the resulting
+hash must be pointed to by sig->digest and the size of the hash be placed in
+sig->digest_size.
+
+The function will return 0 upon success or -EKEYREJECTED if the signature
+doesn't match.
+
+The function may also return -ENOTSUPP if an unsupported public-key algorithm
+or public-key/hash algorithm combination is specified or the key doesn't
+support the operation; -EBADMSG or -ERANGE if some of the parameters have weird
+data; or -ENOMEM if an allocation can't be performed. -EINVAL can be returned
+if the key argument is the wrong type or is incompletely set up.
+
+
+=======================
+ASYMMETRIC KEY SUBTYPES
+=======================
+
+Asymmetric keys have a subtype that defines the set of operations that can be
+performed on that key and that determines what data is attached as the key
+payload. The payload format is entirely at the whim of the subtype.
+
+The subtype is selected by the key data parser and the parser must initialise
+the data required for it. The asymmetric key retains a reference on the
+subtype module.
+
+The subtype definition structure can be found in:
+
+ #include <keys/asymmetric-subtype.h>
+
+and looks like the following:
+
+ struct asymmetric_key_subtype {
+ struct module *owner;
+ const char *name;
+
+ void (*describe)(const struct key *key, struct seq_file *m);
+ void (*destroy)(void *payload);
+ int (*verify_signature)(const struct key *key,
+ const struct public_key_signature *sig);
+ };
+
+Asymmetric keys point to this with their type_data[0] member.
+
+The owner and name fields should be set to the owning module and the name of
+the subtype. Currently, the name is only used for print statements.
+
+There are a number of operations defined by the subtype:
+
+ (1) describe().
+
+ Mandatory. This allows the subtype to display something in /proc/keys
+ against the key. For instance the name of the public key algorithm type
+ could be displayed. The key type will display the tail of the key
+ identity string after this.
+
+ (2) destroy().
+
+ Mandatory. This should free the memory associated with the key. The
+ asymmetric key will look after freeing the fingerprint and releasing the
+ reference on the subtype module.
+
+ (3) verify_signature().
+
+ Optional. These are the entry points for the key usage operations.
+ Currently there is only the one defined. If not set, the caller will be
+ given -ENOTSUPP. The subtype may do anything it likes to implement an
+ operation, including offloading to hardware.
+
+
+==========================
+INSTANTIATION DATA PARSERS
+==========================
+
+The asymmetric key type doesn't generally want to store or to deal with a raw
+blob of data that holds the key data. It would have to parse it and error
+check it each time it wanted to use it. Further, the contents of the blob may
+have various checks that can be performed on it (eg. self-signatures, validity
+dates) and may contain useful data about the key (identifiers, capabilities).
+
+Also, the blob may represent a pointer to some hardware containing the key
+rather than the key itself.
+
+Examples of blob formats for which parsers could be implemented include:
+
+ - OpenPGP packet stream [RFC 4880].
+ - X.509 ASN.1 stream.
+ - Pointer to TPM key.
+ - Pointer to UEFI key.
+
+During key instantiation each parser in the list is tried until one doesn't
+return -EBADMSG.
+
+The parser definition structure can be found in:
+
+ #include <keys/asymmetric-parser.h>
+
+and looks like the following:
+
+ struct asymmetric_key_parser {
+ struct module *owner;
+ const char *name;
+
+ int (*parse)(struct key_preparsed_payload *prep);
+ };
+
+The owner and name fields should be set to the owning module and the name of
+the parser.
+
+There is currently only a single operation defined by the parser, and it is
+mandatory:
+
+ (1) parse().
+
+ This is called to preparse the key from the key creation and update paths.
+ In particular, it is called during the key creation _before_ a key is
+ allocated, and as such, is permitted to provide the key's description in
+ the case that the caller declines to do so.
+
+ The caller passes a pointer to the following struct with all of the fields
+ cleared, except for data, datalen and quotalen [see
+ Documentation/security/keys.txt].
+
+ struct key_preparsed_payload {
+ char *description;
+ void *type_data[2];
+ void *payload;
+ const void *data;
+ size_t datalen;
+ size_t quotalen;
+ };
+
+ The instantiation data is in a blob pointed to by data and is datalen in
+ size. The parse() function is not permitted to change these two values at
+ all, and shouldn't change any of the other values _unless_ they are
+ recognise the blob format and will not return -EBADMSG to indicate it is
+ not theirs.
+
+ If the parser is happy with the blob, it should propose a description for
+ the key and attach it to ->description, ->type_data[0] should be set to
+ point to the subtype to be used, ->payload should be set to point to the
+ initialised data for that subtype, ->type_data[1] should point to a hex
+ fingerprint and quotalen should be updated to indicate how much quota this
+ key should account for.
+
+ When clearing up, the data attached to ->type_data[1] and ->description
+ will be kfree()'d and the data attached to ->payload will be passed to the
+ subtype's ->destroy() method to be disposed of. A module reference for
+ the subtype pointed to by ->type_data[0] will be put.
+
+
+ If the data format is not recognised, -EBADMSG should be returned. If it
+ is recognised, but the key cannot for some reason be set up, some other
+ negative error code should be returned. On success, 0 should be returned.
+
+ The key's fingerprint string may be partially matched upon. For a
+ public-key algorithm such as RSA and DSA this will likely be a printable
+ hex version of the key's fingerprint.
+
+Functions are provided to register and unregister parsers:
+
+ int register_asymmetric_key_parser(struct asymmetric_key_parser *parser);
+ void unregister_asymmetric_key_parser(struct asymmetric_key_parser *subtype);
+
+Parsers may not have the same name. The names are otherwise only used for
+displaying in debugging messages.
diff --git a/Documentation/crypto/async-tx-api.txt b/Documentation/crypto/async-tx-api.txt
new file mode 100644
index 000000000..7bf1be20d
--- /dev/null
+++ b/Documentation/crypto/async-tx-api.txt
@@ -0,0 +1,225 @@
+ Asynchronous Transfers/Transforms API
+
+1 INTRODUCTION
+
+2 GENEALOGY
+
+3 USAGE
+3.1 General format of the API
+3.2 Supported operations
+3.3 Descriptor management
+3.4 When does the operation execute?
+3.5 When does the operation complete?
+3.6 Constraints
+3.7 Example
+
+4 DMAENGINE DRIVER DEVELOPER NOTES
+4.1 Conformance points
+4.2 "My application needs exclusive control of hardware channels"
+
+5 SOURCE
+
+---
+
+1 INTRODUCTION
+
+The async_tx API provides methods for describing a chain of asynchronous
+bulk memory transfers/transforms with support for inter-transactional
+dependencies. It is implemented as a dmaengine client that smooths over
+the details of different hardware offload engine implementations. Code
+that is written to the API can optimize for asynchronous operation and
+the API will fit the chain of operations to the available offload
+resources.
+
+2 GENEALOGY
+
+The API was initially designed to offload the memory copy and
+xor-parity-calculations of the md-raid5 driver using the offload engines
+present in the Intel(R) Xscale series of I/O processors. It also built
+on the 'dmaengine' layer developed for offloading memory copies in the
+network stack using Intel(R) I/OAT engines. The following design
+features surfaced as a result:
+1/ implicit synchronous path: users of the API do not need to know if
+ the platform they are running on has offload capabilities. The
+ operation will be offloaded when an engine is available and carried out
+ in software otherwise.
+2/ cross channel dependency chains: the API allows a chain of dependent
+ operations to be submitted, like xor->copy->xor in the raid5 case. The
+ API automatically handles cases where the transition from one operation
+ to another implies a hardware channel switch.
+3/ dmaengine extensions to support multiple clients and operation types
+ beyond 'memcpy'
+
+3 USAGE
+
+3.1 General format of the API:
+struct dma_async_tx_descriptor *
+async_<operation>(<op specific parameters>, struct async_submit ctl *submit)
+
+3.2 Supported operations:
+memcpy - memory copy between a source and a destination buffer
+memset - fill a destination buffer with a byte value
+xor - xor a series of source buffers and write the result to a
+ destination buffer
+xor_val - xor a series of source buffers and set a flag if the
+ result is zero. The implementation attempts to prevent
+ writes to memory
+pq - generate the p+q (raid6 syndrome) from a series of source buffers
+pq_val - validate that a p and or q buffer are in sync with a given series of
+ sources
+datap - (raid6_datap_recov) recover a raid6 data block and the p block
+ from the given sources
+2data - (raid6_2data_recov) recover 2 raid6 data blocks from the given
+ sources
+
+3.3 Descriptor management:
+The return value is non-NULL and points to a 'descriptor' when the operation
+has been queued to execute asynchronously. Descriptors are recycled
+resources, under control of the offload engine driver, to be reused as
+operations complete. When an application needs to submit a chain of
+operations it must guarantee that the descriptor is not automatically recycled
+before the dependency is submitted. This requires that all descriptors be
+acknowledged by the application before the offload engine driver is allowed to
+recycle (or free) the descriptor. A descriptor can be acked by one of the
+following methods:
+1/ setting the ASYNC_TX_ACK flag if no child operations are to be submitted
+2/ submitting an unacknowledged descriptor as a dependency to another
+ async_tx call will implicitly set the acknowledged state.
+3/ calling async_tx_ack() on the descriptor.
+
+3.4 When does the operation execute?
+Operations do not immediately issue after return from the
+async_<operation> call. Offload engine drivers batch operations to
+improve performance by reducing the number of mmio cycles needed to
+manage the channel. Once a driver-specific threshold is met the driver
+automatically issues pending operations. An application can force this
+event by calling async_tx_issue_pending_all(). This operates on all
+channels since the application has no knowledge of channel to operation
+mapping.
+
+3.5 When does the operation complete?
+There are two methods for an application to learn about the completion
+of an operation.
+1/ Call dma_wait_for_async_tx(). This call causes the CPU to spin while
+ it polls for the completion of the operation. It handles dependency
+ chains and issuing pending operations.
+2/ Specify a completion callback. The callback routine runs in tasklet
+ context if the offload engine driver supports interrupts, or it is
+ called in application context if the operation is carried out
+ synchronously in software. The callback can be set in the call to
+ async_<operation>, or when the application needs to submit a chain of
+ unknown length it can use the async_trigger_callback() routine to set a
+ completion interrupt/callback at the end of the chain.
+
+3.6 Constraints:
+1/ Calls to async_<operation> are not permitted in IRQ context. Other
+ contexts are permitted provided constraint #2 is not violated.
+2/ Completion callback routines cannot submit new operations. This
+ results in recursion in the synchronous case and spin_locks being
+ acquired twice in the asynchronous case.
+
+3.7 Example:
+Perform a xor->copy->xor operation where each operation depends on the
+result from the previous operation:
+
+void callback(void *param)
+{
+ struct completion *cmp = param;
+
+ complete(cmp);
+}
+
+void run_xor_copy_xor(struct page **xor_srcs,
+ int xor_src_cnt,
+ struct page *xor_dest,
+ size_t xor_len,
+ struct page *copy_src,
+ struct page *copy_dest,
+ size_t copy_len)
+{
+ struct dma_async_tx_descriptor *tx;
+ addr_conv_t addr_conv[xor_src_cnt];
+ struct async_submit_ctl submit;
+ addr_conv_t addr_conv[NDISKS];
+ struct completion cmp;
+
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST, NULL, NULL, NULL,
+ addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit)
+
+ submit->depend_tx = tx;
+ tx = async_memcpy(copy_dest, copy_src, 0, 0, copy_len, &submit);
+
+ init_completion(&cmp);
+ init_async_submit(&submit, ASYNC_TX_XOR_DROP_DST | ASYNC_TX_ACK, tx,
+ callback, &cmp, addr_conv);
+ tx = async_xor(xor_dest, xor_srcs, 0, xor_src_cnt, xor_len, &submit);
+
+ async_tx_issue_pending_all();
+
+ wait_for_completion(&cmp);
+}
+
+See include/linux/async_tx.h for more information on the flags. See the
+ops_run_* and ops_complete_* routines in drivers/md/raid5.c for more
+implementation examples.
+
+4 DRIVER DEVELOPMENT NOTES
+
+4.1 Conformance points:
+There are a few conformance points required in dmaengine drivers to
+accommodate assumptions made by applications using the async_tx API:
+1/ Completion callbacks are expected to happen in tasklet context
+2/ dma_async_tx_descriptor fields are never manipulated in IRQ context
+3/ Use async_tx_run_dependencies() in the descriptor clean up path to
+ handle submission of dependent operations
+
+4.2 "My application needs exclusive control of hardware channels"
+Primarily this requirement arises from cases where a DMA engine driver
+is being used to support device-to-memory operations. A channel that is
+performing these operations cannot, for many platform specific reasons,
+be shared. For these cases the dma_request_channel() interface is
+provided.
+
+The interface is:
+struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+ dma_filter_fn filter_fn,
+ void *filter_param);
+
+Where dma_filter_fn is defined as:
+typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
+When the optional 'filter_fn' parameter is set to NULL
+dma_request_channel simply returns the first channel that satisfies the
+capability mask. Otherwise, when the mask parameter is insufficient for
+specifying the necessary channel, the filter_fn routine can be used to
+disposition the available channels in the system. The filter_fn routine
+is called once for each free channel in the system. Upon seeing a
+suitable channel filter_fn returns DMA_ACK which flags that channel to
+be the return value from dma_request_channel. A channel allocated via
+this interface is exclusive to the caller, until dma_release_channel()
+is called.
+
+The DMA_PRIVATE capability flag is used to tag dma devices that should
+not be used by the general-purpose allocator. It can be set at
+initialization time if it is known that a channel will always be
+private. Alternatively, it is set when dma_request_channel() finds an
+unused "public" channel.
+
+A couple caveats to note when implementing a driver and consumer:
+1/ Once a channel has been privately allocated it will no longer be
+ considered by the general-purpose allocator even after a call to
+ dma_release_channel().
+2/ Since capabilities are specified at the device level a dma_device
+ with multiple channels will either have all channels public, or all
+ channels private.
+
+5 SOURCE
+
+include/linux/dmaengine.h: core header file for DMA drivers and api users
+drivers/dma/dmaengine.c: offload engine channel management routines
+drivers/dma/: location for offload engine drivers
+include/linux/async_tx.h: core header file for the async_tx api
+crypto/async_tx/async_tx.c: async_tx interface to dmaengine and common code
+crypto/async_tx/async_memcpy.c: copy offload
+crypto/async_tx/async_xor.c: xor and xor zero sum offload
diff --git a/Documentation/crypto/descore-readme.txt b/Documentation/crypto/descore-readme.txt
new file mode 100644
index 000000000..16e9e6350
--- /dev/null
+++ b/Documentation/crypto/descore-readme.txt
@@ -0,0 +1,352 @@
+Below is the original README file from the descore.shar package.
+------------------------------------------------------------------------------
+
+des - fast & portable DES encryption & decryption.
+Copyright (C) 1992 Dana L. How
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU Library General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Author's address: how@isl.stanford.edu
+
+$Id: README,v 1.15 1992/05/20 00:25:32 how E $
+
+
+==>> To compile after untarring/unsharring, just `make' <<==
+
+
+This package was designed with the following goals:
+1. Highest possible encryption/decryption PERFORMANCE.
+2. PORTABILITY to any byte-addressable host with a 32bit unsigned C type
+3. Plug-compatible replacement for KERBEROS's low-level routines.
+
+This second release includes a number of performance enhancements for
+register-starved machines. My discussions with Richard Outerbridge,
+71755.204@compuserve.com, sparked a number of these enhancements.
+
+To more rapidly understand the code in this package, inspect desSmallFips.i
+(created by typing `make') BEFORE you tackle desCode.h. The latter is set
+up in a parameterized fashion so it can easily be modified by speed-daemon
+hackers in pursuit of that last microsecond. You will find it more
+illuminating to inspect one specific implementation,
+and then move on to the common abstract skeleton with this one in mind.
+
+
+performance comparison to other available des code which i could
+compile on a SPARCStation 1 (cc -O4, gcc -O2):
+
+this code (byte-order independent):
+ 30us per encryption (options: 64k tables, no IP/FP)
+ 33us per encryption (options: 64k tables, FIPS standard bit ordering)
+ 45us per encryption (options: 2k tables, no IP/FP)
+ 48us per encryption (options: 2k tables, FIPS standard bit ordering)
+ 275us to set a new key (uses 1k of key tables)
+ this has the quickest encryption/decryption routines i've seen.
+ since i was interested in fast des filters rather than crypt(3)
+ and password cracking, i haven't really bothered yet to speed up
+ the key setting routine. also, i have no interest in re-implementing
+ all the other junk in the mit kerberos des library, so i've just
+ provided my routines with little stub interfaces so they can be
+ used as drop-in replacements with mit's code or any of the mit-
+ compatible packages below. (note that the first two timings above
+ are highly variable because of cache effects).
+
+kerberos des replacement from australia (version 1.95):
+ 53us per encryption (uses 2k of tables)
+ 96us to set a new key (uses 2.25k of key tables)
+ so despite the author's inclusion of some of the performance
+ improvements i had suggested to him, this package's
+ encryption/decryption is still slower on the sparc and 68000.
+ more specifically, 19-40% slower on the 68020 and 11-35% slower
+ on the sparc, depending on the compiler;
+ in full gory detail (ALT_ECB is a libdes variant):
+ compiler machine desCore libdes ALT_ECB slower by
+ gcc 2.1 -O2 Sun 3/110 304 uS 369.5uS 461.8uS 22%
+ cc -O1 Sun 3/110 336 uS 436.6uS 399.3uS 19%
+ cc -O2 Sun 3/110 360 uS 532.4uS 505.1uS 40%
+ cc -O4 Sun 3/110 365 uS 532.3uS 505.3uS 38%
+ gcc 2.1 -O2 Sun 4/50 48 uS 53.4uS 57.5uS 11%
+ cc -O2 Sun 4/50 48 uS 64.6uS 64.7uS 35%
+ cc -O4 Sun 4/50 48 uS 64.7uS 64.9uS 35%
+ (my time measurements are not as accurate as his).
+ the comments in my first release of desCore on version 1.92:
+ 68us per encryption (uses 2k of tables)
+ 96us to set a new key (uses 2.25k of key tables)
+ this is a very nice package which implements the most important
+ of the optimizations which i did in my encryption routines.
+ it's a bit weak on common low-level optimizations which is why
+ it's 39%-106% slower. because he was interested in fast crypt(3) and
+ password-cracking applications, he also used the same ideas to
+ speed up the key-setting routines with impressive results.
+ (at some point i may do the same in my package). he also implements
+ the rest of the mit des library.
+ (code from eay@psych.psy.uq.oz.au via comp.sources.misc)
+
+fast crypt(3) package from denmark:
+ the des routine here is buried inside a loop to do the
+ crypt function and i didn't feel like ripping it out and measuring
+ performance. his code takes 26 sparc instructions to compute one
+ des iteration; above, Quick (64k) takes 21 and Small (2k) takes 37.
+ he claims to use 280k of tables but the iteration calculation seems
+ to use only 128k. his tables and code are machine independent.
+ (code from glad@daimi.aau.dk via alt.sources or comp.sources.misc)
+
+swedish reimplementation of Kerberos des library
+ 108us per encryption (uses 34k worth of tables)
+ 134us to set a new key (uses 32k of key tables to get this speed!)
+ the tables used seem to be machine-independent;
+ he seems to have included a lot of special case code
+ so that, e.g., `long' loads can be used instead of 4 `char' loads
+ when the machine's architecture allows it.
+ (code obtained from chalmers.se:pub/des)
+
+crack 3.3c package from england:
+ as in crypt above, the des routine is buried in a loop. it's
+ also very modified for crypt. his iteration code uses 16k
+ of tables and appears to be slow.
+ (code obtained from aem@aber.ac.uk via alt.sources or comp.sources.misc)
+
+``highly optimized'' and tweaked Kerberos/Athena code (byte-order dependent):
+ 165us per encryption (uses 6k worth of tables)
+ 478us to set a new key (uses <1k of key tables)
+ so despite the comments in this code, it was possible to get
+ faster code AND smaller tables, as well as making the tables
+ machine-independent.
+ (code obtained from prep.ai.mit.edu)
+
+UC Berkeley code (depends on machine-endedness):
+ 226us per encryption
+10848us to set a new key
+ table sizes are unclear, but they don't look very small
+ (code obtained from wuarchive.wustl.edu)
+
+
+motivation and history
+
+a while ago i wanted some des routines and the routines documented on sun's
+man pages either didn't exist or dumped core. i had heard of kerberos,
+and knew that it used des, so i figured i'd use its routines. but once
+i got it and looked at the code, it really set off a lot of pet peeves -
+it was too convoluted, the code had been written without taking
+advantage of the regular structure of operations such as IP, E, and FP
+(i.e. the author didn't sit down and think before coding),
+it was excessively slow, the author had attempted to clarify the code
+by adding MORE statements to make the data movement more `consistent'
+instead of simplifying his implementation and cutting down on all data
+movement (in particular, his use of L1, R1, L2, R2), and it was full of
+idiotic `tweaks' for particular machines which failed to deliver significant
+speedups but which did obfuscate everything. so i took the test data
+from his verification program and rewrote everything else.
+
+a while later i ran across the great crypt(3) package mentioned above.
+the fact that this guy was computing 2 sboxes per table lookup rather
+than one (and using a MUCH larger table in the process) emboldened me to
+do the same - it was a trivial change from which i had been scared away
+by the larger table size. in his case he didn't realize you don't need to keep
+the working data in TWO forms, one for easy use of half the sboxes in
+indexing, the other for easy use of the other half; instead you can keep
+it in the form for the first half and use a simple rotate to get the other
+half. this means i have (almost) half the data manipulation and half
+the table size. in fairness though he might be encoding something particular
+to crypt(3) in his tables - i didn't check.
+
+i'm glad that i implemented it the way i did, because this C version is
+portable (the ifdef's are performance enhancements) and it is faster
+than versions hand-written in assembly for the sparc!
+
+
+porting notes
+
+one thing i did not want to do was write an enormous mess
+which depended on endedness and other machine quirks,
+and which necessarily produced different code and different lookup tables
+for different machines. see the kerberos code for an example
+of what i didn't want to do; all their endedness-specific `optimizations'
+obfuscate the code and in the end were slower than a simpler machine
+independent approach. however, there are always some portability
+considerations of some kind, and i have included some options
+for varying numbers of register variables.
+perhaps some will still regard the result as a mess!
+
+1) i assume everything is byte addressable, although i don't actually
+ depend on the byte order, and that bytes are 8 bits.
+ i assume word pointers can be freely cast to and from char pointers.
+ note that 99% of C programs make these assumptions.
+ i always use unsigned char's if the high bit could be set.
+2) the typedef `word' means a 32 bit unsigned integral type.
+ if `unsigned long' is not 32 bits, change the typedef in desCore.h.
+ i assume sizeof(word) == 4 EVERYWHERE.
+
+the (worst-case) cost of my NOT doing endedness-specific optimizations
+in the data loading and storing code surrounding the key iterations
+is less than 12%. also, there is the added benefit that
+the input and output work areas do not need to be word-aligned.
+
+
+OPTIONAL performance optimizations
+
+1) you should define one of `i386,' `vax,' `mc68000,' or `sparc,'
+ whichever one is closest to the capabilities of your machine.
+ see the start of desCode.h to see exactly what this selection implies.
+ note that if you select the wrong one, the des code will still work;
+ these are just performance tweaks.
+2) for those with functional `asm' keywords: you should change the
+ ROR and ROL macros to use machine rotate instructions if you have them.
+ this will save 2 instructions and a temporary per use,
+ or about 32 to 40 instructions per en/decryption.
+ note that gcc is smart enough to translate the ROL/R macros into
+ machine rotates!
+
+these optimizations are all rather persnickety, yet with them you should
+be able to get performance equal to assembly-coding, except that:
+1) with the lack of a bit rotate operator in C, rotates have to be synthesized
+ from shifts. so access to `asm' will speed things up if your machine
+ has rotates, as explained above in (3) (not necessary if you use gcc).
+2) if your machine has less than 12 32-bit registers i doubt your compiler will
+ generate good code.
+ `i386' tries to configure the code for a 386 by only declaring 3 registers
+ (it appears that gcc can use ebx, esi and edi to hold register variables).
+ however, if you like assembly coding, the 386 does have 7 32-bit registers,
+ and if you use ALL of them, use `scaled by 8' address modes with displacement
+ and other tricks, you can get reasonable routines for DesQuickCore... with
+ about 250 instructions apiece. For DesSmall... it will help to rearrange
+ des_keymap, i.e., now the sbox # is the high part of the index and
+ the 6 bits of data is the low part; it helps to exchange these.
+ since i have no way to conveniently test it i have not provided my
+ shoehorned 386 version. note that with this release of desCore, gcc is able
+ to put everything in registers(!), and generate about 370 instructions apiece
+ for the DesQuickCore... routines!
+
+coding notes
+
+the en/decryption routines each use 6 necessary register variables,
+with 4 being actively used at once during the inner iterations.
+if you don't have 4 register variables get a new machine.
+up to 8 more registers are used to hold constants in some configurations.
+
+i assume that the use of a constant is more expensive than using a register:
+a) additionally, i have tried to put the larger constants in registers.
+ registering priority was by the following:
+ anything more than 12 bits (bad for RISC and CISC)
+ greater than 127 in value (can't use movq or byte immediate on CISC)
+ 9-127 (may not be able to use CISC shift immediate or add/sub quick),
+ 1-8 were never registered, being the cheapest constants.
+b) the compiler may be too stupid to realize table and table+256 should
+ be assigned to different constant registers and instead repetitively
+ do the arithmetic, so i assign these to explicit `m' register variables
+ when possible and helpful.
+
+i assume that indexing is cheaper or equivalent to auto increment/decrement,
+where the index is 7 bits unsigned or smaller.
+this assumption is reversed for 68k and vax.
+
+i assume that addresses can be cheaply formed from two registers,
+or from a register and a small constant.
+for the 68000, the `two registers and small offset' form is used sparingly.
+all index scaling is done explicitly - no hidden shifts by log2(sizeof).
+
+the code is written so that even a dumb compiler
+should never need more than one hidden temporary,
+increasing the chance that everything will fit in the registers.
+KEEP THIS MORE SUBTLE POINT IN MIND IF YOU REWRITE ANYTHING.
+(actually, there are some code fragments now which do require two temps,
+but fixing it would either break the structure of the macros or
+require declaring another temporary).
+
+
+special efficient data format
+
+bits are manipulated in this arrangement most of the time (S7 S5 S3 S1):
+ 003130292827xxxx242322212019xxxx161514131211xxxx080706050403xxxx
+(the x bits are still there, i'm just emphasizing where the S boxes are).
+bits are rotated left 4 when computing S6 S4 S2 S0:
+ 282726252423xxxx201918171615xxxx121110090807xxxx040302010031xxxx
+the rightmost two bits are usually cleared so the lower byte can be used
+as an index into an sbox mapping table. the next two x'd bits are set
+to various values to access different parts of the tables.
+
+
+how to use the routines
+
+datatypes:
+ pointer to 8 byte area of type DesData
+ used to hold keys and input/output blocks to des.
+
+ pointer to 128 byte area of type DesKeys
+ used to hold full 768-bit key.
+ must be long-aligned.
+
+DesQuickInit()
+ call this before using any other routine with `Quick' in its name.
+ it generates the special 64k table these routines need.
+DesQuickDone()
+ frees this table
+
+DesMethod(m, k)
+ m points to a 128byte block, k points to an 8 byte des key
+ which must have odd parity (or -1 is returned) and which must
+ not be a (semi-)weak key (or -2 is returned).
+ normally DesMethod() returns 0.
+ m is filled in from k so that when one of the routines below
+ is called with m, the routine will act like standard des
+ en/decryption with the key k. if you use DesMethod,
+ you supply a standard 56bit key; however, if you fill in
+ m yourself, you will get a 768bit key - but then it won't
+ be standard. it's 768bits not 1024 because the least significant
+ two bits of each byte are not used. note that these two bits
+ will be set to magic constants which speed up the encryption/decryption
+ on some machines. and yes, each byte controls
+ a specific sbox during a specific iteration.
+ you really shouldn't use the 768bit format directly; i should
+ provide a routine that converts 128 6-bit bytes (specified in
+ S-box mapping order or something) into the right format for you.
+ this would entail some byte concatenation and rotation.
+
+Des{Small|Quick}{Fips|Core}{Encrypt|Decrypt}(d, m, s)
+ performs des on the 8 bytes at s into the 8 bytes at d. (d,s: char *).
+ uses m as a 768bit key as explained above.
+ the Encrypt|Decrypt choice is obvious.
+ Fips|Core determines whether a completely standard FIPS initial
+ and final permutation is done; if not, then the data is loaded
+ and stored in a nonstandard bit order (FIPS w/o IP/FP).
+ Fips slows down Quick by 10%, Small by 9%.
+ Small|Quick determines whether you use the normal routine
+ or the crazy quick one which gobbles up 64k more of memory.
+ Small is 50% slower then Quick, but Quick needs 32 times as much
+ memory. Quick is included for programs that do nothing but DES,
+ e.g., encryption filters, etc.
+
+
+Getting it to compile on your machine
+
+there are no machine-dependencies in the code (see porting),
+except perhaps the `now()' macro in desTest.c.
+ALL generated tables are machine independent.
+you should edit the Makefile with the appropriate optimization flags
+for your compiler (MAX optimization).
+
+
+Speeding up kerberos (and/or its des library)
+
+note that i have included a kerberos-compatible interface in desUtil.c
+through the functions des_key_sched() and des_ecb_encrypt().
+to use these with kerberos or kerberos-compatible code put desCore.a
+ahead of the kerberos-compatible library on your linker's command line.
+you should not need to #include desCore.h; just include the header
+file provided with the kerberos library.
+
+Other uses
+
+the macros in desCode.h would be very useful for putting inline des
+functions in more complicated encryption routines.
diff --git a/Documentation/dcdbas.txt b/Documentation/dcdbas.txt
new file mode 100644
index 000000000..e1c52e2dc
--- /dev/null
+++ b/Documentation/dcdbas.txt
@@ -0,0 +1,91 @@
+Overview
+
+The Dell Systems Management Base Driver provides a sysfs interface for
+systems management software such as Dell OpenManage to perform system
+management interrupts and host control actions (system power cycle or
+power off after OS shutdown) on certain Dell systems.
+
+Dell OpenManage requires this driver on the following Dell PowerEdge systems:
+300, 1300, 1400, 400SC, 500SC, 1500SC, 1550, 600SC, 1600SC, 650, 1655MC,
+700, and 750. Other Dell software such as the open source libsmbios project
+is expected to make use of this driver, and it may include the use of this
+driver on other Dell systems.
+
+The Dell libsmbios project aims towards providing access to as much BIOS
+information as possible. See http://linux.dell.com/libsmbios/main/ for
+more information about the libsmbios project.
+
+
+System Management Interrupt
+
+On some Dell systems, systems management software must access certain
+management information via a system management interrupt (SMI). The SMI data
+buffer must reside in 32-bit address space, and the physical address of the
+buffer is required for the SMI. The driver maintains the memory required for
+the SMI and provides a way for the application to generate the SMI.
+The driver creates the following sysfs entries for systems management
+software to perform these system management interrupts:
+
+/sys/devices/platform/dcdbas/smi_data
+/sys/devices/platform/dcdbas/smi_data_buf_phys_addr
+/sys/devices/platform/dcdbas/smi_data_buf_size
+/sys/devices/platform/dcdbas/smi_request
+
+Systems management software must perform the following steps to execute
+a SMI using this driver:
+
+1) Lock smi_data.
+2) Write system management command to smi_data.
+3) Write "1" to smi_request to generate a calling interface SMI or
+ "2" to generate a raw SMI.
+4) Read system management command response from smi_data.
+5) Unlock smi_data.
+
+
+Host Control Action
+
+Dell OpenManage supports a host control feature that allows the administrator
+to perform a power cycle or power off of the system after the OS has finished
+shutting down. On some Dell systems, this host control feature requires that
+a driver perform a SMI after the OS has finished shutting down.
+
+The driver creates the following sysfs entries for systems management software
+to schedule the driver to perform a power cycle or power off host control
+action after the system has finished shutting down:
+
+/sys/devices/platform/dcdbas/host_control_action
+/sys/devices/platform/dcdbas/host_control_smi_type
+/sys/devices/platform/dcdbas/host_control_on_shutdown
+
+Dell OpenManage performs the following steps to execute a power cycle or
+power off host control action using this driver:
+
+1) Write host control action to be performed to host_control_action.
+2) Write type of SMI that driver needs to perform to host_control_smi_type.
+3) Write "1" to host_control_on_shutdown to enable host control action.
+4) Initiate OS shutdown.
+ (Driver will perform host control SMI when it is notified that the OS
+ has finished shutting down.)
+
+
+Host Control SMI Type
+
+The following table shows the value to write to host_control_smi_type to
+perform a power cycle or power off host control action:
+
+PowerEdge System Host Control SMI Type
+---------------- ---------------------
+ 300 HC_SMITYPE_TYPE1
+ 1300 HC_SMITYPE_TYPE1
+ 1400 HC_SMITYPE_TYPE2
+ 500SC HC_SMITYPE_TYPE2
+ 1500SC HC_SMITYPE_TYPE2
+ 1550 HC_SMITYPE_TYPE2
+ 600SC HC_SMITYPE_TYPE2
+ 1600SC HC_SMITYPE_TYPE2
+ 650 HC_SMITYPE_TYPE2
+ 1655MC HC_SMITYPE_TYPE2
+ 700 HC_SMITYPE_TYPE3
+ 750 HC_SMITYPE_TYPE3
+
+
diff --git a/Documentation/debugging-modules.txt b/Documentation/debugging-modules.txt
new file mode 100644
index 000000000..172ad4aec
--- /dev/null
+++ b/Documentation/debugging-modules.txt
@@ -0,0 +1,22 @@
+Debugging Modules after 2.6.3
+-----------------------------
+
+In almost all distributions, the kernel asks for modules which don't
+exist, such as "net-pf-10" or whatever. Changing "modprobe -q" to
+"succeed" in this case is hacky and breaks some setups, and also we
+want to know if it failed for the fallback code for old aliases in
+fs/char_dev.c, for example.
+
+In the past a debugging message which would fill people's logs was
+emitted. This debugging message has been removed. The correct way
+of debugging module problems is something like this:
+
+echo '#! /bin/sh' > /tmp/modprobe
+echo 'echo "$@" >> /tmp/modprobe.log' >> /tmp/modprobe
+echo 'exec /sbin/modprobe "$@"' >> /tmp/modprobe
+chmod a+x /tmp/modprobe
+echo /tmp/modprobe > /proc/sys/kernel/modprobe
+
+Note that the above applies only when the *kernel* is requesting
+that the module be loaded -- it won't have any effect if that module
+is being loaded explicitly using "modprobe" from userspace.
diff --git a/Documentation/debugging-via-ohci1394.txt b/Documentation/debugging-via-ohci1394.txt
new file mode 100644
index 000000000..5c9a567b3
--- /dev/null
+++ b/Documentation/debugging-via-ohci1394.txt
@@ -0,0 +1,184 @@
+
+ Using physical DMA provided by OHCI-1394 FireWire controllers for debugging
+ ---------------------------------------------------------------------------
+
+Introduction
+------------
+
+Basically all FireWire controllers which are in use today are compliant
+to the OHCI-1394 specification which defines the controller to be a PCI
+bus master which uses DMA to offload data transfers from the CPU and has
+a "Physical Response Unit" which executes specific requests by employing
+PCI-Bus master DMA after applying filters defined by the OHCI-1394 driver.
+
+Once properly configured, remote machines can send these requests to
+ask the OHCI-1394 controller to perform read and write requests on
+physical system memory and, for read requests, send the result of
+the physical memory read back to the requester.
+
+With that, it is possible to debug issues by reading interesting memory
+locations such as buffers like the printk buffer or the process table.
+
+Retrieving a full system memory dump is also possible over the FireWire,
+using data transfer rates in the order of 10MB/s or more.
+
+With most FireWire controllers, memory access is limited to the low 4 GB
+of physical address space. This can be a problem on IA64 machines where
+memory is located mostly above that limit, but it is rarely a problem on
+more common hardware such as x86, x86-64 and PowerPC.
+
+At least LSI FW643e and FW643e2 controllers are known to support access to
+physical addresses above 4 GB, but this feature is currently not enabled by
+Linux.
+
+Together with a early initialization of the OHCI-1394 controller for debugging,
+this facility proved most useful for examining long debugs logs in the printk
+buffer on to debug early boot problems in areas like ACPI where the system
+fails to boot and other means for debugging (serial port) are either not
+available (notebooks) or too slow for extensive debug information (like ACPI).
+
+Drivers
+-------
+
+The firewire-ohci driver in drivers/firewire uses filtered physical
+DMA by default, which is more secure but not suitable for remote debugging.
+Pass the remote_dma=1 parameter to the driver to get unfiltered physical DMA.
+
+Because the firewire-ohci driver depends on the PCI enumeration to be
+completed, an initialization routine which runs pretty early has been
+implemented for x86. This routine runs long before console_init() can be
+called, i.e. before the printk buffer appears on the console.
+
+To activate it, enable CONFIG_PROVIDE_OHCI1394_DMA_INIT (Kernel hacking menu:
+Remote debugging over FireWire early on boot) and pass the parameter
+"ohci1394_dma=early" to the recompiled kernel on boot.
+
+Tools
+-----
+
+firescope - Originally developed by Benjamin Herrenschmidt, Andi Kleen ported
+it from PowerPC to x86 and x86_64 and added functionality, firescope can now
+be used to view the printk buffer of a remote machine, even with live update.
+
+Bernhard Kaindl enhanced firescope to support accessing 64-bit machines
+from 32-bit firescope and vice versa:
+- http://v3.sk/~lkundrak/firescope/
+
+and he implemented fast system dump (alpha version - read README.txt):
+- http://halobates.de/firewire/firedump-0.1.tar.bz2
+
+There is also a gdb proxy for firewire which allows to use gdb to access
+data which can be referenced from symbols found by gdb in vmlinux:
+- http://halobates.de/firewire/fireproxy-0.33.tar.bz2
+
+The latest version of this gdb proxy (fireproxy-0.34) can communicate (not
+yet stable) with kgdb over an memory-based communication module (kgdbom).
+
+Getting Started
+---------------
+
+The OHCI-1394 specification regulates that the OHCI-1394 controller must
+disable all physical DMA on each bus reset.
+
+This means that if you want to debug an issue in a system state where
+interrupts are disabled and where no polling of the OHCI-1394 controller
+for bus resets takes place, you have to establish any FireWire cable
+connections and fully initialize all FireWire hardware __before__ the
+system enters such state.
+
+Step-by-step instructions for using firescope with early OHCI initialization:
+
+1) Verify that your hardware is supported:
+
+ Load the firewire-ohci module and check your kernel logs.
+ You should see a line similar to
+
+ firewire_ohci 0000:15:00.1: added OHCI v1.0 device as card 2, 4 IR + 4 IT
+ ... contexts, quirks 0x11
+
+ when loading the driver. If you have no supported controller, many PCI,
+ CardBus and even some Express cards which are fully compliant to OHCI-1394
+ specification are available. If it requires no driver for Windows operating
+ systems, it most likely is. Only specialized shops have cards which are not
+ compliant, they are based on TI PCILynx chips and require drivers for Win-
+ dows operating systems.
+
+ The mentioned kernel log message contains the string "physUB" if the
+ controller implements a writable Physical Upper Bound register. This is
+ required for physical DMA above 4 GB (but not utilized by Linux yet).
+
+2) Establish a working FireWire cable connection:
+
+ Any FireWire cable, as long at it provides electrically and mechanically
+ stable connection and has matching connectors (there are small 4-pin and
+ large 6-pin FireWire ports) will do.
+
+ If an driver is running on both machines you should see a line like
+
+ firewire_core 0000:15:00.1: created device fw1: GUID 00061b0020105917, S400
+
+ on both machines in the kernel log when the cable is plugged in
+ and connects the two machines.
+
+3) Test physical DMA using firescope:
+
+ On the debug host, make sure that /dev/fw* is accessible,
+ then start firescope:
+
+ $ firescope
+ Port 0 (/dev/fw1) opened, 2 nodes detected
+
+ FireScope
+ ---------
+ Target : <unspecified>
+ Gen : 1
+ [Ctrl-T] choose target
+ [Ctrl-H] this menu
+ [Ctrl-Q] quit
+
+ ------> Press Ctrl-T now, the output should be similar to:
+
+ 2 nodes available, local node is: 0
+ 0: ffc0, uuid: 00000000 00000000 [LOCAL]
+ 1: ffc1, uuid: 00279000 ba4bb801
+
+ Besides the [LOCAL] node, it must show another node without error message.
+
+4) Prepare for debugging with early OHCI-1394 initialization:
+
+ 4.1) Kernel compilation and installation on debug target
+
+ Compile the kernel to be debugged with CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ (Kernel hacking: Provide code for enabling DMA over FireWire early on boot)
+ enabled and install it on the machine to be debugged (debug target).
+
+ 4.2) Transfer the System.map of the debugged kernel to the debug host
+
+ Copy the System.map of the kernel be debugged to the debug host (the host
+ which is connected to the debugged machine over the FireWire cable).
+
+5) Retrieving the printk buffer contents:
+
+ With the FireWire cable connected, the OHCI-1394 driver on the debugging
+ host loaded, reboot the debugged machine, booting the kernel which has
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT enabled, with the option ohci1394_dma=early.
+
+ Then, on the debugging host, run firescope, for example by using -A:
+
+ firescope -A System.map-of-debug-target-kernel
+
+ Note: -A automatically attaches to the first non-local node. It only works
+ reliably if only connected two machines are connected using FireWire.
+
+ After having attached to the debug target, press Ctrl-D to view the
+ complete printk buffer or Ctrl-U to enter auto update mode and get an
+ updated live view of recent kernel messages logged on the debug target.
+
+ Call "firescope -h" to get more information on firescope's options.
+
+Notes
+-----
+Documentation and specifications: http://halobates.de/firewire/
+
+FireWire is a trademark of Apple Inc. - for more information please refer to:
+http://en.wikipedia.org/wiki/FireWire
diff --git a/Documentation/dell_rbu.txt b/Documentation/dell_rbu.txt
new file mode 100644
index 000000000..d262e22bd
--- /dev/null
+++ b/Documentation/dell_rbu.txt
@@ -0,0 +1,100 @@
+Purpose:
+Demonstrate the usage of the new open sourced rbu (Remote BIOS Update) driver
+for updating BIOS images on Dell servers and desktops.
+
+Scope:
+This document discusses the functionality of the rbu driver only.
+It does not cover the support needed from applications to enable the BIOS to
+update itself with the image downloaded in to the memory.
+
+Overview:
+This driver works with Dell OpenManage or Dell Update Packages for updating
+the BIOS on Dell servers (starting from servers sold since 1999), desktops
+and notebooks (starting from those sold in 2005).
+Please go to http://support.dell.com register and you can find info on
+OpenManage and Dell Update packages (DUP).
+Libsmbios can also be used to update BIOS on Dell systems go to
+http://linux.dell.com/libsmbios/ for details.
+
+Dell_RBU driver supports BIOS update using the monolithic image and packetized
+image methods. In case of monolithic the driver allocates a contiguous chunk
+of physical pages having the BIOS image. In case of packetized the app
+using the driver breaks the image in to packets of fixed sizes and the driver
+would place each packet in contiguous physical memory. The driver also
+maintains a link list of packets for reading them back.
+If the dell_rbu driver is unloaded all the allocated memory is freed.
+
+The rbu driver needs to have an application (as mentioned above)which will
+inform the BIOS to enable the update in the next system reboot.
+
+The user should not unload the rbu driver after downloading the BIOS image
+or updating.
+
+The driver load creates the following directories under the /sys file system.
+/sys/class/firmware/dell_rbu/loading
+/sys/class/firmware/dell_rbu/data
+/sys/devices/platform/dell_rbu/image_type
+/sys/devices/platform/dell_rbu/data
+/sys/devices/platform/dell_rbu/packet_size
+
+The driver supports two types of update mechanism; monolithic and packetized.
+These update mechanism depends upon the BIOS currently running on the system.
+Most of the Dell systems support a monolithic update where the BIOS image is
+copied to a single contiguous block of physical memory.
+In case of packet mechanism the single memory can be broken in smaller chunks
+of contiguous memory and the BIOS image is scattered in these packets.
+
+By default the driver uses monolithic memory for the update type. This can be
+changed to packets during the driver load time by specifying the load
+parameter image_type=packet. This can also be changed later as below
+echo packet > /sys/devices/platform/dell_rbu/image_type
+
+In packet update mode the packet size has to be given before any packets can
+be downloaded. It is done as below
+echo XXXX > /sys/devices/platform/dell_rbu/packet_size
+In the packet update mechanism, the user needs to create a new file having
+packets of data arranged back to back. It can be done as follows
+The user creates packets header, gets the chunk of the BIOS image and
+places it next to the packetheader; now, the packetheader + BIOS image chunk
+added together should match the specified packet_size. This makes one
+packet, the user needs to create more such packets out of the entire BIOS
+image file and then arrange all these packets back to back in to one single
+file.
+This file is then copied to /sys/class/firmware/dell_rbu/data.
+Once this file gets to the driver, the driver extracts packet_size data from
+the file and spreads it across the physical memory in contiguous packet_sized
+space.
+This method makes sure that all the packets get to the driver in a single operation.
+
+In monolithic update the user simply get the BIOS image (.hdr file) and copies
+to the data file as is without any change to the BIOS image itself.
+
+Do the steps below to download the BIOS image.
+1) echo 1 > /sys/class/firmware/dell_rbu/loading
+2) cp bios_image.hdr /sys/class/firmware/dell_rbu/data
+3) echo 0 > /sys/class/firmware/dell_rbu/loading
+
+The /sys/class/firmware/dell_rbu/ entries will remain till the following is
+done.
+echo -1 > /sys/class/firmware/dell_rbu/loading
+Until this step is completed the driver cannot be unloaded.
+Also echoing either mono, packet or init in to image_type will free up the
+memory allocated by the driver.
+
+If a user by accident executes steps 1 and 3 above without executing step 2;
+it will make the /sys/class/firmware/dell_rbu/ entries disappear.
+The entries can be recreated by doing the following
+echo init > /sys/devices/platform/dell_rbu/image_type
+NOTE: echoing init in image_type does not change it original value.
+
+Also the driver provides /sys/devices/platform/dell_rbu/data readonly file to
+read back the image downloaded.
+
+NOTE:
+This driver requires a patch for firmware_class.c which has the modified
+request_firmware_nowait function.
+Also after updating the BIOS image a user mode application needs to execute
+code which sends the BIOS update request to the BIOS. So on the next reboot
+the BIOS knows about the new image downloaded and it updates itself.
+Also don't unload the rbu driver if the image has to be updated.
+
diff --git a/Documentation/development-process/1.Intro b/Documentation/development-process/1.Intro
new file mode 100644
index 000000000..9b614480a
--- /dev/null
+++ b/Documentation/development-process/1.Intro
@@ -0,0 +1,274 @@
+1: A GUIDE TO THE KERNEL DEVELOPMENT PROCESS
+
+The purpose of this document is to help developers (and their managers)
+work with the development community with a minimum of frustration. It is
+an attempt to document how this community works in a way which is
+accessible to those who are not intimately familiar with Linux kernel
+development (or, indeed, free software development in general). While
+there is some technical material here, this is very much a process-oriented
+discussion which does not require a deep knowledge of kernel programming to
+understand.
+
+
+1.1: EXECUTIVE SUMMARY
+
+The rest of this section covers the scope of the kernel development process
+and the kinds of frustrations that developers and their employers can
+encounter there. There are a great many reasons why kernel code should be
+merged into the official ("mainline") kernel, including automatic
+availability to users, community support in many forms, and the ability to
+influence the direction of kernel development. Code contributed to the
+Linux kernel must be made available under a GPL-compatible license.
+
+Section 2 introduces the development process, the kernel release cycle, and
+the mechanics of the merge window. The various phases in the patch
+development, review, and merging cycle are covered. There is some
+discussion of tools and mailing lists. Developers wanting to get started
+with kernel development are encouraged to track down and fix bugs as an
+initial exercise.
+
+Section 3 covers early-stage project planning, with an emphasis on
+involving the development community as soon as possible.
+
+Section 4 is about the coding process; several pitfalls which have been
+encountered by other developers are discussed. Some requirements for
+patches are covered, and there is an introduction to some of the tools
+which can help to ensure that kernel patches are correct.
+
+Section 5 talks about the process of posting patches for review. To be
+taken seriously by the development community, patches must be properly
+formatted and described, and they must be sent to the right place.
+Following the advice in this section should help to ensure the best
+possible reception for your work.
+
+Section 6 covers what happens after posting patches; the job is far from
+done at that point. Working with reviewers is a crucial part of the
+development process; this section offers a number of tips on how to avoid
+problems at this important stage. Developers are cautioned against
+assuming that the job is done when a patch is merged into the mainline.
+
+Section 7 introduces a couple of "advanced" topics: managing patches with
+git and reviewing patches posted by others.
+
+Section 8 concludes the document with pointers to sources for more
+information on kernel development.
+
+
+1.2: WHAT THIS DOCUMENT IS ABOUT
+
+The Linux kernel, at over 8 million lines of code and well over 1000
+contributors to each release, is one of the largest and most active free
+software projects in existence. Since its humble beginning in 1991, this
+kernel has evolved into a best-of-breed operating system component which
+runs on pocket-sized digital music players, desktop PCs, the largest
+supercomputers in existence, and all types of systems in between. It is a
+robust, efficient, and scalable solution for almost any situation.
+
+With the growth of Linux has come an increase in the number of developers
+(and companies) wishing to participate in its development. Hardware
+vendors want to ensure that Linux supports their products well, making
+those products attractive to Linux users. Embedded systems vendors, who
+use Linux as a component in an integrated product, want Linux to be as
+capable and well-suited to the task at hand as possible. Distributors and
+other software vendors who base their products on Linux have a clear
+interest in the capabilities, performance, and reliability of the Linux
+kernel. And end users, too, will often wish to change Linux to make it
+better suit their needs.
+
+One of the most compelling features of Linux is that it is accessible to
+these developers; anybody with the requisite skills can improve Linux and
+influence the direction of its development. Proprietary products cannot
+offer this kind of openness, which is a characteristic of the free software
+process. But, if anything, the kernel is even more open than most other
+free software projects. A typical three-month kernel development cycle can
+involve over 1000 developers working for more than 100 different companies
+(or for no company at all).
+
+Working with the kernel development community is not especially hard. But,
+that notwithstanding, many potential contributors have experienced
+difficulties when trying to do kernel work. The kernel community has
+evolved its own distinct ways of operating which allow it to function
+smoothly (and produce a high-quality product) in an environment where
+thousands of lines of code are being changed every day. So it is not
+surprising that Linux kernel development process differs greatly from
+proprietary development methods.
+
+The kernel's development process may come across as strange and
+intimidating to new developers, but there are good reasons and solid
+experience behind it. A developer who does not understand the kernel
+community's ways (or, worse, who tries to flout or circumvent them) will
+have a frustrating experience in store. The development community, while
+being helpful to those who are trying to learn, has little time for those
+who will not listen or who do not care about the development process.
+
+It is hoped that those who read this document will be able to avoid that
+frustrating experience. There is a lot of material here, but the effort
+involved in reading it will be repaid in short order. The development
+community is always in need of developers who will help to make the kernel
+better; the following text should help you - or those who work for you -
+join our community.
+
+
+1.3: CREDITS
+
+This document was written by Jonathan Corbet, corbet@lwn.net. It has been
+improved by comments from Johannes Berg, James Berry, Alex Chiang, Roland
+Dreier, Randy Dunlap, Jake Edge, Jiri Kosina, Matt Mackall, Arthur Marsh,
+Amanda McPherson, Andrew Morton, Andrew Price, Tsugikazu Shibata, and
+Jochen Voß.
+
+This work was supported by the Linux Foundation; thanks especially to
+Amanda McPherson, who saw the value of this effort and made it all happen.
+
+
+1.4: THE IMPORTANCE OF GETTING CODE INTO THE MAINLINE
+
+Some companies and developers occasionally wonder why they should bother
+learning how to work with the kernel community and get their code into the
+mainline kernel (the "mainline" being the kernel maintained by Linus
+Torvalds and used as a base by Linux distributors). In the short term,
+contributing code can look like an avoidable expense; it seems easier to
+just keep the code separate and support users directly. The truth of the
+matter is that keeping code separate ("out of tree") is a false economy.
+
+As a way of illustrating the costs of out-of-tree code, here are a few
+relevant aspects of the kernel development process; most of these will be
+discussed in greater detail later in this document. Consider:
+
+- Code which has been merged into the mainline kernel is available to all
+ Linux users. It will automatically be present on all distributions which
+ enable it. There is no need for driver disks, downloads, or the hassles
+ of supporting multiple versions of multiple distributions; it all just
+ works, for the developer and for the user. Incorporation into the
+ mainline solves a large number of distribution and support problems.
+
+- While kernel developers strive to maintain a stable interface to user
+ space, the internal kernel API is in constant flux. The lack of a stable
+ internal interface is a deliberate design decision; it allows fundamental
+ improvements to be made at any time and results in higher-quality code.
+ But one result of that policy is that any out-of-tree code requires
+ constant upkeep if it is to work with new kernels. Maintaining
+ out-of-tree code requires significant amounts of work just to keep that
+ code working.
+
+ Code which is in the mainline, instead, does not require this work as the
+ result of a simple rule requiring any developer who makes an API change
+ to also fix any code that breaks as the result of that change. So code
+ which has been merged into the mainline has significantly lower
+ maintenance costs.
+
+- Beyond that, code which is in the kernel will often be improved by other
+ developers. Surprising results can come from empowering your user
+ community and customers to improve your product.
+
+- Kernel code is subjected to review, both before and after merging into
+ the mainline. No matter how strong the original developer's skills are,
+ this review process invariably finds ways in which the code can be
+ improved. Often review finds severe bugs and security problems. This is
+ especially true for code which has been developed in a closed
+ environment; such code benefits strongly from review by outside
+ developers. Out-of-tree code is lower-quality code.
+
+- Participation in the development process is your way to influence the
+ direction of kernel development. Users who complain from the sidelines
+ are heard, but active developers have a stronger voice - and the ability
+ to implement changes which make the kernel work better for their needs.
+
+- When code is maintained separately, the possibility that a third party
+ will contribute a different implementation of a similar feature always
+ exists. Should that happen, getting your code merged will become much
+ harder - to the point of impossibility. Then you will be faced with the
+ unpleasant alternatives of either (1) maintaining a nonstandard feature
+ out of tree indefinitely, or (2) abandoning your code and migrating your
+ users over to the in-tree version.
+
+- Contribution of code is the fundamental action which makes the whole
+ process work. By contributing your code you can add new functionality to
+ the kernel and provide capabilities and examples which are of use to
+ other kernel developers. If you have developed code for Linux (or are
+ thinking about doing so), you clearly have an interest in the continued
+ success of this platform; contributing code is one of the best ways to
+ help ensure that success.
+
+All of the reasoning above applies to any out-of-tree kernel code,
+including code which is distributed in proprietary, binary-only form.
+There are, however, additional factors which should be taken into account
+before considering any sort of binary-only kernel code distribution. These
+include:
+
+- The legal issues around the distribution of proprietary kernel modules
+ are cloudy at best; quite a few kernel copyright holders believe that
+ most binary-only modules are derived products of the kernel and that, as
+ a result, their distribution is a violation of the GNU General Public
+ license (about which more will be said below). Your author is not a
+ lawyer, and nothing in this document can possibly be considered to be
+ legal advice. The true legal status of closed-source modules can only be
+ determined by the courts. But the uncertainty which haunts those modules
+ is there regardless.
+
+- Binary modules greatly increase the difficulty of debugging kernel
+ problems, to the point that most kernel developers will not even try. So
+ the distribution of binary-only modules will make it harder for your
+ users to get support from the community.
+
+- Support is also harder for distributors of binary-only modules, who must
+ provide a version of the module for every distribution and every kernel
+ version they wish to support. Dozens of builds of a single module can
+ be required to provide reasonably comprehensive coverage, and your users
+ will have to upgrade your module separately every time they upgrade their
+ kernel.
+
+- Everything that was said above about code review applies doubly to
+ closed-source code. Since this code is not available at all, it cannot
+ have been reviewed by the community and will, beyond doubt, have serious
+ problems.
+
+Makers of embedded systems, in particular, may be tempted to disregard much
+of what has been said in this section in the belief that they are shipping
+a self-contained product which uses a frozen kernel version and requires no
+more development after its release. This argument misses the value of
+widespread code review and the value of allowing your users to add
+capabilities to your product. But these products, too, have a limited
+commercial life, after which a new version must be released. At that
+point, vendors whose code is in the mainline and well maintained will be
+much better positioned to get the new product ready for market quickly.
+
+
+1.5: LICENSING
+
+Code is contributed to the Linux kernel under a number of licenses, but all
+code must be compatible with version 2 of the GNU General Public License
+(GPLv2), which is the license covering the kernel distribution as a whole.
+In practice, that means that all code contributions are covered either by
+GPLv2 (with, optionally, language allowing distribution under later
+versions of the GPL) or the three-clause BSD license. Any contributions
+which are not covered by a compatible license will not be accepted into the
+kernel.
+
+Copyright assignments are not required (or requested) for code contributed
+to the kernel. All code merged into the mainline kernel retains its
+original ownership; as a result, the kernel now has thousands of owners.
+
+One implication of this ownership structure is that any attempt to change
+the licensing of the kernel is doomed to almost certain failure. There are
+few practical scenarios where the agreement of all copyright holders could
+be obtained (or their code removed from the kernel). So, in particular,
+there is no prospect of a migration to version 3 of the GPL in the
+foreseeable future.
+
+It is imperative that all code contributed to the kernel be legitimately
+free software. For that reason, code from anonymous (or pseudonymous)
+contributors will not be accepted. All contributors are required to "sign
+off" on their code, stating that the code can be distributed with the
+kernel under the GPL. Code which has not been licensed as free software by
+its owner, or which risks creating copyright-related problems for the
+kernel (such as code which derives from reverse-engineering efforts lacking
+proper safeguards) cannot be contributed.
+
+Questions about copyright-related issues are common on Linux development
+mailing lists. Such questions will normally receive no shortage of
+answers, but one should bear in mind that the people answering those
+questions are not lawyers and cannot provide legal advice. If you have
+legal questions relating to Linux source code, there is no substitute for
+talking with a lawyer who understands this field. Relying on answers
+obtained on technical mailing lists is a risky affair.
diff --git a/Documentation/development-process/2.Process b/Documentation/development-process/2.Process
new file mode 100644
index 000000000..c24e156a6
--- /dev/null
+++ b/Documentation/development-process/2.Process
@@ -0,0 +1,478 @@
+2: HOW THE DEVELOPMENT PROCESS WORKS
+
+Linux kernel development in the early 1990's was a pretty loose affair,
+with relatively small numbers of users and developers involved. With a
+user base in the millions and with some 2,000 developers involved over the
+course of one year, the kernel has since had to evolve a number of
+processes to keep development happening smoothly. A solid understanding of
+how the process works is required in order to be an effective part of it.
+
+
+2.1: THE BIG PICTURE
+
+The kernel developers use a loosely time-based release process, with a new
+major kernel release happening every two or three months. The recent
+release history looks like this:
+
+ 2.6.38 March 14, 2011
+ 2.6.37 January 4, 2011
+ 2.6.36 October 20, 2010
+ 2.6.35 August 1, 2010
+ 2.6.34 May 15, 2010
+ 2.6.33 February 24, 2010
+
+Every 2.6.x release is a major kernel release with new features, internal
+API changes, and more. A typical 2.6 release can contain nearly 10,000
+changesets with changes to several hundred thousand lines of code. 2.6 is
+thus the leading edge of Linux kernel development; the kernel uses a
+rolling development model which is continually integrating major changes.
+
+A relatively straightforward discipline is followed with regard to the
+merging of patches for each release. At the beginning of each development
+cycle, the "merge window" is said to be open. At that time, code which is
+deemed to be sufficiently stable (and which is accepted by the development
+community) is merged into the mainline kernel. The bulk of changes for a
+new development cycle (and all of the major changes) will be merged during
+this time, at a rate approaching 1,000 changes ("patches," or "changesets")
+per day.
+
+(As an aside, it is worth noting that the changes integrated during the
+merge window do not come out of thin air; they have been collected, tested,
+and staged ahead of time. How that process works will be described in
+detail later on).
+
+The merge window lasts for approximately two weeks. At the end of this
+time, Linus Torvalds will declare that the window is closed and release the
+first of the "rc" kernels. For the kernel which is destined to be 2.6.40,
+for example, the release which happens at the end of the merge window will
+be called 2.6.40-rc1. The -rc1 release is the signal that the time to
+merge new features has passed, and that the time to stabilize the next
+kernel has begun.
+
+Over the next six to ten weeks, only patches which fix problems should be
+submitted to the mainline. On occasion a more significant change will be
+allowed, but such occasions are rare; developers who try to merge new
+features outside of the merge window tend to get an unfriendly reception.
+As a general rule, if you miss the merge window for a given feature, the
+best thing to do is to wait for the next development cycle. (An occasional
+exception is made for drivers for previously-unsupported hardware; if they
+touch no in-tree code, they cannot cause regressions and should be safe to
+add at any time).
+
+As fixes make their way into the mainline, the patch rate will slow over
+time. Linus releases new -rc kernels about once a week; a normal series
+will get up to somewhere between -rc6 and -rc9 before the kernel is
+considered to be sufficiently stable and the final 2.6.x release is made.
+At that point the whole process starts over again.
+
+As an example, here is how the 2.6.38 development cycle went (all dates in
+2011):
+
+ January 4 2.6.37 stable release
+ January 18 2.6.38-rc1, merge window closes
+ January 21 2.6.38-rc2
+ February 1 2.6.38-rc3
+ February 7 2.6.38-rc4
+ February 15 2.6.38-rc5
+ February 21 2.6.38-rc6
+ March 1 2.6.38-rc7
+ March 7 2.6.38-rc8
+ March 14 2.6.38 stable release
+
+How do the developers decide when to close the development cycle and create
+the stable release? The most significant metric used is the list of
+regressions from previous releases. No bugs are welcome, but those which
+break systems which worked in the past are considered to be especially
+serious. For this reason, patches which cause regressions are looked upon
+unfavorably and are quite likely to be reverted during the stabilization
+period.
+
+The developers' goal is to fix all known regressions before the stable
+release is made. In the real world, this kind of perfection is hard to
+achieve; there are just too many variables in a project of this size.
+There comes a point where delaying the final release just makes the problem
+worse; the pile of changes waiting for the next merge window will grow
+larger, creating even more regressions the next time around. So most 2.6.x
+kernels go out with a handful of known regressions though, hopefully, none
+of them are serious.
+
+Once a stable release is made, its ongoing maintenance is passed off to the
+"stable team," currently consisting of Greg Kroah-Hartman. The stable team
+will release occasional updates to the stable release using the 2.6.x.y
+numbering scheme. To be considered for an update release, a patch must (1)
+fix a significant bug, and (2) already be merged into the mainline for the
+next development kernel. Kernels will typically receive stable updates for
+a little more than one development cycle past their initial release. So,
+for example, the 2.6.36 kernel's history looked like:
+
+ October 10 2.6.36 stable release
+ November 22 2.6.36.1
+ December 9 2.6.36.2
+ January 7 2.6.36.3
+ February 17 2.6.36.4
+
+2.6.36.4 was the final stable update for the 2.6.36 release.
+
+Some kernels are designated "long term" kernels; they will receive support
+for a longer period. As of this writing, the current long term kernels
+and their maintainers are:
+
+ 2.6.27 Willy Tarreau (Deep-frozen stable kernel)
+ 2.6.32 Greg Kroah-Hartman
+ 2.6.35 Andi Kleen (Embedded flag kernel)
+
+The selection of a kernel for long-term support is purely a matter of a
+maintainer having the need and the time to maintain that release. There
+are no known plans for long-term support for any specific upcoming
+release.
+
+
+2.2: THE LIFECYCLE OF A PATCH
+
+Patches do not go directly from the developer's keyboard into the mainline
+kernel. There is, instead, a somewhat involved (if somewhat informal)
+process designed to ensure that each patch is reviewed for quality and that
+each patch implements a change which is desirable to have in the mainline.
+This process can happen quickly for minor fixes, or, in the case of large
+and controversial changes, go on for years. Much developer frustration
+comes from a lack of understanding of this process or from attempts to
+circumvent it.
+
+In the hopes of reducing that frustration, this document will describe how
+a patch gets into the kernel. What follows below is an introduction which
+describes the process in a somewhat idealized way. A much more detailed
+treatment will come in later sections.
+
+The stages that a patch goes through are, generally:
+
+ - Design. This is where the real requirements for the patch - and the way
+ those requirements will be met - are laid out. Design work is often
+ done without involving the community, but it is better to do this work
+ in the open if at all possible; it can save a lot of time redesigning
+ things later.
+
+ - Early review. Patches are posted to the relevant mailing list, and
+ developers on that list reply with any comments they may have. This
+ process should turn up any major problems with a patch if all goes
+ well.
+
+ - Wider review. When the patch is getting close to ready for mainline
+ inclusion, it should be accepted by a relevant subsystem maintainer -
+ though this acceptance is not a guarantee that the patch will make it
+ all the way to the mainline. The patch will show up in the maintainer's
+ subsystem tree and into the -next trees (described below). When the
+ process works, this step leads to more extensive review of the patch and
+ the discovery of any problems resulting from the integration of this
+ patch with work being done by others.
+
+- Please note that most maintainers also have day jobs, so merging
+ your patch may not be their highest priority. If your patch is
+ getting feedback about changes that are needed, you should either
+ make those changes or justify why they should not be made. If your
+ patch has no review complaints but is not being merged by its
+ appropriate subsystem or driver maintainer, you should be persistent
+ in updating the patch to the current kernel so that it applies cleanly
+ and keep sending it for review and merging.
+
+ - Merging into the mainline. Eventually, a successful patch will be
+ merged into the mainline repository managed by Linus Torvalds. More
+ comments and/or problems may surface at this time; it is important that
+ the developer be responsive to these and fix any issues which arise.
+
+ - Stable release. The number of users potentially affected by the patch
+ is now large, so, once again, new problems may arise.
+
+ - Long-term maintenance. While it is certainly possible for a developer
+ to forget about code after merging it, that sort of behavior tends to
+ leave a poor impression in the development community. Merging code
+ eliminates some of the maintenance burden, in that others will fix
+ problems caused by API changes. But the original developer should
+ continue to take responsibility for the code if it is to remain useful
+ in the longer term.
+
+One of the largest mistakes made by kernel developers (or their employers)
+is to try to cut the process down to a single "merging into the mainline"
+step. This approach invariably leads to frustration for everybody
+involved.
+
+
+2.3: HOW PATCHES GET INTO THE KERNEL
+
+There is exactly one person who can merge patches into the mainline kernel
+repository: Linus Torvalds. But, of the over 9,500 patches which went
+into the 2.6.38 kernel, only 112 (around 1.3%) were directly chosen by Linus
+himself. The kernel project has long since grown to a size where no single
+developer could possibly inspect and select every patch unassisted. The
+way the kernel developers have addressed this growth is through the use of
+a lieutenant system built around a chain of trust.
+
+The kernel code base is logically broken down into a set of subsystems:
+networking, specific architecture support, memory management, video
+devices, etc. Most subsystems have a designated maintainer, a developer
+who has overall responsibility for the code within that subsystem. These
+subsystem maintainers are the gatekeepers (in a loose way) for the portion
+of the kernel they manage; they are the ones who will (usually) accept a
+patch for inclusion into the mainline kernel.
+
+Subsystem maintainers each manage their own version of the kernel source
+tree, usually (but certainly not always) using the git source management
+tool. Tools like git (and related tools like quilt or mercurial) allow
+maintainers to track a list of patches, including authorship information
+and other metadata. At any given time, the maintainer can identify which
+patches in his or her repository are not found in the mainline.
+
+When the merge window opens, top-level maintainers will ask Linus to "pull"
+the patches they have selected for merging from their repositories. If
+Linus agrees, the stream of patches will flow up into his repository,
+becoming part of the mainline kernel. The amount of attention that Linus
+pays to specific patches received in a pull operation varies. It is clear
+that, sometimes, he looks quite closely. But, as a general rule, Linus
+trusts the subsystem maintainers to not send bad patches upstream.
+
+Subsystem maintainers, in turn, can pull patches from other maintainers.
+For example, the networking tree is built from patches which accumulated
+first in trees dedicated to network device drivers, wireless networking,
+etc. This chain of repositories can be arbitrarily long, though it rarely
+exceeds two or three links. Since each maintainer in the chain trusts
+those managing lower-level trees, this process is known as the "chain of
+trust."
+
+Clearly, in a system like this, getting patches into the kernel depends on
+finding the right maintainer. Sending patches directly to Linus is not
+normally the right way to go.
+
+
+2.4: NEXT TREES
+
+The chain of subsystem trees guides the flow of patches into the kernel,
+but it also raises an interesting question: what if somebody wants to look
+at all of the patches which are being prepared for the next merge window?
+Developers will be interested in what other changes are pending to see
+whether there are any conflicts to worry about; a patch which changes a
+core kernel function prototype, for example, will conflict with any other
+patches which use the older form of that function. Reviewers and testers
+want access to the changes in their integrated form before all of those
+changes land in the mainline kernel. One could pull changes from all of
+the interesting subsystem trees, but that would be a big and error-prone
+job.
+
+The answer comes in the form of -next trees, where subsystem trees are
+collected for testing and review. The older of these trees, maintained by
+Andrew Morton, is called "-mm" (for memory management, which is how it got
+started). The -mm tree integrates patches from a long list of subsystem
+trees; it also has some patches aimed at helping with debugging.
+
+Beyond that, -mm contains a significant collection of patches which have
+been selected by Andrew directly. These patches may have been posted on a
+mailing list, or they may apply to a part of the kernel for which there is
+no designated subsystem tree. As a result, -mm operates as a sort of
+subsystem tree of last resort; if there is no other obvious path for a
+patch into the mainline, it is likely to end up in -mm. Miscellaneous
+patches which accumulate in -mm will eventually either be forwarded on to
+an appropriate subsystem tree or be sent directly to Linus. In a typical
+development cycle, approximately 5-10% of the patches going into the
+mainline get there via -mm.
+
+The current -mm patch is available in the "mmotm" (-mm of the moment)
+directory at:
+
+ http://www.ozlabs.org/~akpm/mmotm/
+
+Use of the MMOTM tree is likely to be a frustrating experience, though;
+there is a definite chance that it will not even compile.
+
+The primary tree for next-cycle patch merging is linux-next, maintained by
+Stephen Rothwell. The linux-next tree is, by design, a snapshot of what
+the mainline is expected to look like after the next merge window closes.
+Linux-next trees are announced on the linux-kernel and linux-next mailing
+lists when they are assembled; they can be downloaded from:
+
+ http://www.kernel.org/pub/linux/kernel/next/
+
+Linux-next has become an integral part of the kernel development process;
+all patches merged during a given merge window should really have found
+their way into linux-next some time before the merge window opens.
+
+
+2.4.1: STAGING TREES
+
+The kernel source tree contains the drivers/staging/ directory, where
+many sub-directories for drivers or filesystems that are on their way to
+being added to the kernel tree live. They remain in drivers/staging while
+they still need more work; once complete, they can be moved into the
+kernel proper. This is a way to keep track of drivers that aren't
+up to Linux kernel coding or quality standards, but people may want to use
+them and track development.
+
+Greg Kroah-Hartman currently maintains the staging tree. Drivers that
+still need work are sent to him, with each driver having its own
+subdirectory in drivers/staging/. Along with the driver source files, a
+TODO file should be present in the directory as well. The TODO file lists
+the pending work that the driver needs for acceptance into the kernel
+proper, as well as a list of people that should be Cc'd for any patches to
+the driver. Current rules require that drivers contributed to staging
+must, at a minimum, compile properly.
+
+Staging can be a relatively easy way to get new drivers into the mainline
+where, with luck, they will come to the attention of other developers and
+improve quickly. Entry into staging is not the end of the story, though;
+code in staging which is not seeing regular progress will eventually be
+removed. Distributors also tend to be relatively reluctant to enable
+staging drivers. So staging is, at best, a stop on the way toward becoming
+a proper mainline driver.
+
+
+2.5: TOOLS
+
+As can be seen from the above text, the kernel development process depends
+heavily on the ability to herd collections of patches in various
+directions. The whole thing would not work anywhere near as well as it
+does without suitably powerful tools. Tutorials on how to use these tools
+are well beyond the scope of this document, but there is space for a few
+pointers.
+
+By far the dominant source code management system used by the kernel
+community is git. Git is one of a number of distributed version control
+systems being developed in the free software community. It is well tuned
+for kernel development, in that it performs quite well when dealing with
+large repositories and large numbers of patches. It also has a reputation
+for being difficult to learn and use, though it has gotten better over
+time. Some sort of familiarity with git is almost a requirement for kernel
+developers; even if they do not use it for their own work, they'll need git
+to keep up with what other developers (and the mainline) are doing.
+
+Git is now packaged by almost all Linux distributions. There is a home
+page at:
+
+ http://git-scm.com/
+
+That page has pointers to documentation and tutorials.
+
+Among the kernel developers who do not use git, the most popular choice is
+almost certainly Mercurial:
+
+ http://www.selenic.com/mercurial/
+
+Mercurial shares many features with git, but it provides an interface which
+many find easier to use.
+
+The other tool worth knowing about is Quilt:
+
+ http://savannah.nongnu.org/projects/quilt/
+
+Quilt is a patch management system, rather than a source code management
+system. It does not track history over time; it is, instead, oriented
+toward tracking a specific set of changes against an evolving code base.
+Some major subsystem maintainers use quilt to manage patches intended to go
+upstream. For the management of certain kinds of trees (-mm, for example),
+quilt is the best tool for the job.
+
+
+2.6: MAILING LISTS
+
+A great deal of Linux kernel development work is done by way of mailing
+lists. It is hard to be a fully-functioning member of the community
+without joining at least one list somewhere. But Linux mailing lists also
+represent a potential hazard to developers, who risk getting buried under a
+load of electronic mail, running afoul of the conventions used on the Linux
+lists, or both.
+
+Most kernel mailing lists are run on vger.kernel.org; the master list can
+be found at:
+
+ http://vger.kernel.org/vger-lists.html
+
+There are lists hosted elsewhere, though; a number of them are at
+lists.redhat.com.
+
+The core mailing list for kernel development is, of course, linux-kernel.
+This list is an intimidating place to be; volume can reach 500 messages per
+day, the amount of noise is high, the conversation can be severely
+technical, and participants are not always concerned with showing a high
+degree of politeness. But there is no other place where the kernel
+development community comes together as a whole; developers who avoid this
+list will miss important information.
+
+There are a few hints which can help with linux-kernel survival:
+
+- Have the list delivered to a separate folder, rather than your main
+ mailbox. One must be able to ignore the stream for sustained periods of
+ time.
+
+- Do not try to follow every conversation - nobody else does. It is
+ important to filter on both the topic of interest (though note that
+ long-running conversations can drift away from the original subject
+ without changing the email subject line) and the people who are
+ participating.
+
+- Do not feed the trolls. If somebody is trying to stir up an angry
+ response, ignore them.
+
+- When responding to linux-kernel email (or that on other lists) preserve
+ the Cc: header for all involved. In the absence of a strong reason (such
+ as an explicit request), you should never remove recipients. Always make
+ sure that the person you are responding to is in the Cc: list. This
+ convention also makes it unnecessary to explicitly ask to be copied on
+ replies to your postings.
+
+- Search the list archives (and the net as a whole) before asking
+ questions. Some developers can get impatient with people who clearly
+ have not done their homework.
+
+- Avoid top-posting (the practice of putting your answer above the quoted
+ text you are responding to). It makes your response harder to read and
+ makes a poor impression.
+
+- Ask on the correct mailing list. Linux-kernel may be the general meeting
+ point, but it is not the best place to find developers from all
+ subsystems.
+
+The last point - finding the correct mailing list - is a common place for
+beginning developers to go wrong. Somebody who asks a networking-related
+question on linux-kernel will almost certainly receive a polite suggestion
+to ask on the netdev list instead, as that is the list frequented by most
+networking developers. Other lists exist for the SCSI, video4linux, IDE,
+filesystem, etc. subsystems. The best place to look for mailing lists is
+in the MAINTAINERS file packaged with the kernel source.
+
+
+2.7: GETTING STARTED WITH KERNEL DEVELOPMENT
+
+Questions about how to get started with the kernel development process are
+common - from both individuals and companies. Equally common are missteps
+which make the beginning of the relationship harder than it has to be.
+
+Companies often look to hire well-known developers to get a development
+group started. This can, in fact, be an effective technique. But it also
+tends to be expensive and does not do much to grow the pool of experienced
+kernel developers. It is possible to bring in-house developers up to speed
+on Linux kernel development, given the investment of a bit of time. Taking
+this time can endow an employer with a group of developers who understand
+the kernel and the company both, and who can help to train others as well.
+Over the medium term, this is often the more profitable approach.
+
+Individual developers are often, understandably, at a loss for a place to
+start. Beginning with a large project can be intimidating; one often wants
+to test the waters with something smaller first. This is the point where
+some developers jump into the creation of patches fixing spelling errors or
+minor coding style issues. Unfortunately, such patches create a level of
+noise which is distracting for the development community as a whole, so,
+increasingly, they are looked down upon. New developers wishing to
+introduce themselves to the community will not get the sort of reception
+they wish for by these means.
+
+Andrew Morton gives this advice for aspiring kernel developers
+
+ The #1 project for all kernel beginners should surely be "make sure
+ that the kernel runs perfectly at all times on all machines which
+ you can lay your hands on". Usually the way to do this is to work
+ with others on getting things fixed up (this can require
+ persistence!) but that's fine - it's a part of kernel development.
+
+(http://lwn.net/Articles/283982/).
+
+In the absence of obvious problems to fix, developers are advised to look
+at the current lists of regressions and open bugs in general. There is
+never any shortage of issues in need of fixing; by addressing these issues,
+developers will gain experience with the process while, at the same time,
+building respect with the rest of the development community.
diff --git a/Documentation/development-process/3.Early-stage b/Documentation/development-process/3.Early-stage
new file mode 100644
index 000000000..f87ba7b3f
--- /dev/null
+++ b/Documentation/development-process/3.Early-stage
@@ -0,0 +1,212 @@
+3: EARLY-STAGE PLANNING
+
+When contemplating a Linux kernel development project, it can be tempting
+to jump right in and start coding. As with any significant project,
+though, much of the groundwork for success is best laid before the first
+line of code is written. Some time spent in early planning and
+communication can save far more time later on.
+
+
+3.1: SPECIFYING THE PROBLEM
+
+Like any engineering project, a successful kernel enhancement starts with a
+clear description of the problem to be solved. In some cases, this step is
+easy: when a driver is needed for a specific piece of hardware, for
+example. In others, though, it is tempting to confuse the real problem
+with the proposed solution, and that can lead to difficulties.
+
+Consider an example: some years ago, developers working with Linux audio
+sought a way to run applications without dropouts or other artifacts caused
+by excessive latency in the system. The solution they arrived at was a
+kernel module intended to hook into the Linux Security Module (LSM)
+framework; this module could be configured to give specific applications
+access to the realtime scheduler. This module was implemented and sent to
+the linux-kernel mailing list, where it immediately ran into problems.
+
+To the audio developers, this security module was sufficient to solve their
+immediate problem. To the wider kernel community, though, it was seen as a
+misuse of the LSM framework (which is not intended to confer privileges
+onto processes which they would not otherwise have) and a risk to system
+stability. Their preferred solutions involved realtime scheduling access
+via the rlimit mechanism for the short term, and ongoing latency reduction
+work in the long term.
+
+The audio community, however, could not see past the particular solution
+they had implemented; they were unwilling to accept alternatives. The
+resulting disagreement left those developers feeling disillusioned with the
+entire kernel development process; one of them went back to an audio list
+and posted this:
+
+ There are a number of very good Linux kernel developers, but they
+ tend to get outshouted by a large crowd of arrogant fools. Trying
+ to communicate user requirements to these people is a waste of
+ time. They are much too "intelligent" to listen to lesser mortals.
+
+(http://lwn.net/Articles/131776/).
+
+The reality of the situation was different; the kernel developers were far
+more concerned about system stability, long-term maintenance, and finding
+the right solution to the problem than they were with a specific module.
+The moral of the story is to focus on the problem - not a specific solution
+- and to discuss it with the development community before investing in the
+creation of a body of code.
+
+So, when contemplating a kernel development project, one should obtain
+answers to a short set of questions:
+
+ - What, exactly, is the problem which needs to be solved?
+
+ - Who are the users affected by this problem? Which use cases should the
+ solution address?
+
+ - How does the kernel fall short in addressing that problem now?
+
+Only then does it make sense to start considering possible solutions.
+
+
+3.2: EARLY DISCUSSION
+
+When planning a kernel development project, it makes great sense to hold
+discussions with the community before launching into implementation. Early
+communication can save time and trouble in a number of ways:
+
+ - It may well be that the problem is addressed by the kernel in ways which
+ you have not understood. The Linux kernel is large and has a number of
+ features and capabilities which are not immediately obvious. Not all
+ kernel capabilities are documented as well as one might like, and it is
+ easy to miss things. Your author has seen the posting of a complete
+ driver which duplicated an existing driver that the new author had been
+ unaware of. Code which reinvents existing wheels is not only wasteful;
+ it will also not be accepted into the mainline kernel.
+
+ - There may be elements of the proposed solution which will not be
+ acceptable for mainline merging. It is better to find out about
+ problems like this before writing the code.
+
+ - It's entirely possible that other developers have thought about the
+ problem; they may have ideas for a better solution, and may be willing
+ to help in the creation of that solution.
+
+Years of experience with the kernel development community have taught a
+clear lesson: kernel code which is designed and developed behind closed
+doors invariably has problems which are only revealed when the code is
+released into the community. Sometimes these problems are severe,
+requiring months or years of effort before the code can be brought up to
+the kernel community's standards. Some examples include:
+
+ - The Devicescape network stack was designed and implemented for
+ single-processor systems. It could not be merged into the mainline
+ until it was made suitable for multiprocessor systems. Retrofitting
+ locking and such into code is a difficult task; as a result, the merging
+ of this code (now called mac80211) was delayed for over a year.
+
+ - The Reiser4 filesystem included a number of capabilities which, in the
+ core kernel developers' opinion, should have been implemented in the
+ virtual filesystem layer instead. It also included features which could
+ not easily be implemented without exposing the system to user-caused
+ deadlocks. The late revelation of these problems - and refusal to
+ address some of them - has caused Reiser4 to stay out of the mainline
+ kernel.
+
+ - The AppArmor security module made use of internal virtual filesystem
+ data structures in ways which were considered to be unsafe and
+ unreliable. This concern (among others) kept AppArmor out of the
+ mainline for years.
+
+In each of these cases, a great deal of pain and extra work could have been
+avoided with some early discussion with the kernel developers.
+
+
+3.3: WHO DO YOU TALK TO?
+
+When developers decide to take their plans public, the next question will
+be: where do we start? The answer is to find the right mailing list(s) and
+the right maintainer. For mailing lists, the best approach is to look in
+the MAINTAINERS file for a relevant place to post. If there is a suitable
+subsystem list, posting there is often preferable to posting on
+linux-kernel; you are more likely to reach developers with expertise in the
+relevant subsystem and the environment may be more supportive.
+
+Finding maintainers can be a bit harder. Again, the MAINTAINERS file is
+the place to start. That file tends to not always be up to date, though,
+and not all subsystems are represented there. The person listed in the
+MAINTAINERS file may, in fact, not be the person who is actually acting in
+that role currently. So, when there is doubt about who to contact, a
+useful trick is to use git (and "git log" in particular) to see who is
+currently active within the subsystem of interest. Look at who is writing
+patches, and who, if anybody, is attaching Signed-off-by lines to those
+patches. Those are the people who will be best placed to help with a new
+development project.
+
+The task of finding the right maintainer is sometimes challenging enough
+that the kernel developers have added a script to ease the process:
+
+ .../scripts/get_maintainer.pl
+
+This script will return the current maintainer(s) for a given file or
+directory when given the "-f" option. If passed a patch on the
+command line, it will list the maintainers who should probably receive
+copies of the patch. There are a number of options regulating how hard
+get_maintainer.pl will search for maintainers; please be careful about
+using the more aggressive options as you may end up including developers
+who have no real interest in the code you are modifying.
+
+If all else fails, talking to Andrew Morton can be an effective way to
+track down a maintainer for a specific piece of code.
+
+
+3.4: WHEN TO POST?
+
+If possible, posting your plans during the early stages can only be
+helpful. Describe the problem being solved and any plans that have been
+made on how the implementation will be done. Any information you can
+provide can help the development community provide useful input on the
+project.
+
+One discouraging thing which can happen at this stage is not a hostile
+reaction, but, instead, little or no reaction at all. The sad truth of the
+matter is (1) kernel developers tend to be busy, (2) there is no shortage
+of people with grand plans and little code (or even prospect of code) to
+back them up, and (3) nobody is obligated to review or comment on ideas
+posted by others. Beyond that, high-level designs often hide problems
+which are only reviewed when somebody actually tries to implement those
+designs; for that reason, kernel developers would rather see the code.
+
+If a request-for-comments posting yields little in the way of comments, do
+not assume that it means there is no interest in the project.
+Unfortunately, you also cannot assume that there are no problems with your
+idea. The best thing to do in this situation is to proceed, keeping the
+community informed as you go.
+
+
+3.5: GETTING OFFICIAL BUY-IN
+
+If your work is being done in a corporate environment - as most Linux
+kernel work is - you must, obviously, have permission from suitably
+empowered managers before you can post your company's plans or code to a
+public mailing list. The posting of code which has not been cleared for
+release under a GPL-compatible license can be especially problematic; the
+sooner that a company's management and legal staff can agree on the posting
+of a kernel development project, the better off everybody involved will be.
+
+Some readers may be thinking at this point that their kernel work is
+intended to support a product which does not yet have an officially
+acknowledged existence. Revealing their employer's plans on a public
+mailing list may not be a viable option. In cases like this, it is worth
+considering whether the secrecy is really necessary; there is often no real
+need to keep development plans behind closed doors.
+
+That said, there are also cases where a company legitimately cannot
+disclose its plans early in the development process. Companies with
+experienced kernel developers may choose to proceed in an open-loop manner
+on the assumption that they will be able to avoid serious integration
+problems later. For companies without that sort of in-house expertise, the
+best option is often to hire an outside developer to review the plans under
+a non-disclosure agreement. The Linux Foundation operates an NDA program
+designed to help with this sort of situation; more information can be found
+at:
+
+ http://www.linuxfoundation.org/en/NDA_program
+
+This kind of review is often enough to avoid serious problems later on
+without requiring public disclosure of the project.
diff --git a/Documentation/development-process/4.Coding b/Documentation/development-process/4.Coding
new file mode 100644
index 000000000..e3cb6a566
--- /dev/null
+++ b/Documentation/development-process/4.Coding
@@ -0,0 +1,399 @@
+4: GETTING THE CODE RIGHT
+
+While there is much to be said for a solid and community-oriented design
+process, the proof of any kernel development project is in the resulting
+code. It is the code which will be examined by other developers and merged
+(or not) into the mainline tree. So it is the quality of this code which
+will determine the ultimate success of the project.
+
+This section will examine the coding process. We'll start with a look at a
+number of ways in which kernel developers can go wrong. Then the focus
+will shift toward doing things right and the tools which can help in that
+quest.
+
+
+4.1: PITFALLS
+
+* Coding style
+
+The kernel has long had a standard coding style, described in
+Documentation/CodingStyle. For much of that time, the policies described
+in that file were taken as being, at most, advisory. As a result, there is
+a substantial amount of code in the kernel which does not meet the coding
+style guidelines. The presence of that code leads to two independent
+hazards for kernel developers.
+
+The first of these is to believe that the kernel coding standards do not
+matter and are not enforced. The truth of the matter is that adding new
+code to the kernel is very difficult if that code is not coded according to
+the standard; many developers will request that the code be reformatted
+before they will even review it. A code base as large as the kernel
+requires some uniformity of code to make it possible for developers to
+quickly understand any part of it. So there is no longer room for
+strangely-formatted code.
+
+Occasionally, the kernel's coding style will run into conflict with an
+employer's mandated style. In such cases, the kernel's style will have to
+win before the code can be merged. Putting code into the kernel means
+giving up a degree of control in a number of ways - including control over
+how the code is formatted.
+
+The other trap is to assume that code which is already in the kernel is
+urgently in need of coding style fixes. Developers may start to generate
+reformatting patches as a way of gaining familiarity with the process, or
+as a way of getting their name into the kernel changelogs - or both. But
+pure coding style fixes are seen as noise by the development community;
+they tend to get a chilly reception. So this type of patch is best
+avoided. It is natural to fix the style of a piece of code while working
+on it for other reasons, but coding style changes should not be made for
+their own sake.
+
+The coding style document also should not be read as an absolute law which
+can never be transgressed. If there is a good reason to go against the
+style (a line which becomes far less readable if split to fit within the
+80-column limit, for example), just do it.
+
+
+* Abstraction layers
+
+Computer Science professors teach students to make extensive use of
+abstraction layers in the name of flexibility and information hiding.
+Certainly the kernel makes extensive use of abstraction; no project
+involving several million lines of code could do otherwise and survive.
+But experience has shown that excessive or premature abstraction can be
+just as harmful as premature optimization. Abstraction should be used to
+the level required and no further.
+
+At a simple level, consider a function which has an argument which is
+always passed as zero by all callers. One could retain that argument just
+in case somebody eventually needs to use the extra flexibility that it
+provides. By that time, though, chances are good that the code which
+implements this extra argument has been broken in some subtle way which was
+never noticed - because it has never been used. Or, when the need for
+extra flexibility arises, it does not do so in a way which matches the
+programmer's early expectation. Kernel developers will routinely submit
+patches to remove unused arguments; they should, in general, not be added
+in the first place.
+
+Abstraction layers which hide access to hardware - often to allow the bulk
+of a driver to be used with multiple operating systems - are especially
+frowned upon. Such layers obscure the code and may impose a performance
+penalty; they do not belong in the Linux kernel.
+
+On the other hand, if you find yourself copying significant amounts of code
+from another kernel subsystem, it is time to ask whether it would, in fact,
+make sense to pull out some of that code into a separate library or to
+implement that functionality at a higher level. There is no value in
+replicating the same code throughout the kernel.
+
+
+* #ifdef and preprocessor use in general
+
+The C preprocessor seems to present a powerful temptation to some C
+programmers, who see it as a way to efficiently encode a great deal of
+flexibility into a source file. But the preprocessor is not C, and heavy
+use of it results in code which is much harder for others to read and
+harder for the compiler to check for correctness. Heavy preprocessor use
+is almost always a sign of code which needs some cleanup work.
+
+Conditional compilation with #ifdef is, indeed, a powerful feature, and it
+is used within the kernel. But there is little desire to see code which is
+sprinkled liberally with #ifdef blocks. As a general rule, #ifdef use
+should be confined to header files whenever possible.
+Conditionally-compiled code can be confined to functions which, if the code
+is not to be present, simply become empty. The compiler will then quietly
+optimize out the call to the empty function. The result is far cleaner
+code which is easier to follow.
+
+C preprocessor macros present a number of hazards, including possible
+multiple evaluation of expressions with side effects and no type safety.
+If you are tempted to define a macro, consider creating an inline function
+instead. The code which results will be the same, but inline functions are
+easier to read, do not evaluate their arguments multiple times, and allow
+the compiler to perform type checking on the arguments and return value.
+
+
+* Inline functions
+
+Inline functions present a hazard of their own, though. Programmers can
+become enamored of the perceived efficiency inherent in avoiding a function
+call and fill a source file with inline functions. Those functions,
+however, can actually reduce performance. Since their code is replicated
+at each call site, they end up bloating the size of the compiled kernel.
+That, in turn, creates pressure on the processor's memory caches, which can
+slow execution dramatically. Inline functions, as a rule, should be quite
+small and relatively rare. The cost of a function call, after all, is not
+that high; the creation of large numbers of inline functions is a classic
+example of premature optimization.
+
+In general, kernel programmers ignore cache effects at their peril. The
+classic time/space tradeoff taught in beginning data structures classes
+often does not apply to contemporary hardware. Space *is* time, in that a
+larger program will run slower than one which is more compact.
+
+More recent compilers take an increasingly active role in deciding whether
+a given function should actually be inlined or not. So the liberal
+placement of "inline" keywords may not just be excessive; it could also be
+irrelevant.
+
+
+* Locking
+
+In May, 2006, the "Devicescape" networking stack was, with great
+fanfare, released under the GPL and made available for inclusion in the
+mainline kernel. This donation was welcome news; support for wireless
+networking in Linux was considered substandard at best, and the Devicescape
+stack offered the promise of fixing that situation. Yet, this code did not
+actually make it into the mainline until June, 2007 (2.6.22). What
+happened?
+
+This code showed a number of signs of having been developed behind
+corporate doors. But one large problem in particular was that it was not
+designed to work on multiprocessor systems. Before this networking stack
+(now called mac80211) could be merged, a locking scheme needed to be
+retrofitted onto it.
+
+Once upon a time, Linux kernel code could be developed without thinking
+about the concurrency issues presented by multiprocessor systems. Now,
+however, this document is being written on a dual-core laptop. Even on
+single-processor systems, work being done to improve responsiveness will
+raise the level of concurrency within the kernel. The days when kernel
+code could be written without thinking about locking are long past.
+
+Any resource (data structures, hardware registers, etc.) which could be
+accessed concurrently by more than one thread must be protected by a lock.
+New code should be written with this requirement in mind; retrofitting
+locking after the fact is a rather more difficult task. Kernel developers
+should take the time to understand the available locking primitives well
+enough to pick the right tool for the job. Code which shows a lack of
+attention to concurrency will have a difficult path into the mainline.
+
+
+* Regressions
+
+One final hazard worth mentioning is this: it can be tempting to make a
+change (which may bring big improvements) which causes something to break
+for existing users. This kind of change is called a "regression," and
+regressions have become most unwelcome in the mainline kernel. With few
+exceptions, changes which cause regressions will be backed out if the
+regression cannot be fixed in a timely manner. Far better to avoid the
+regression in the first place.
+
+It is often argued that a regression can be justified if it causes things
+to work for more people than it creates problems for. Why not make a
+change if it brings new functionality to ten systems for each one it
+breaks? The best answer to this question was expressed by Linus in July,
+2007:
+
+ So we don't fix bugs by introducing new problems. That way lies
+ madness, and nobody ever knows if you actually make any real
+ progress at all. Is it two steps forwards, one step back, or one
+ step forward and two steps back?
+
+(http://lwn.net/Articles/243460/).
+
+An especially unwelcome type of regression is any sort of change to the
+user-space ABI. Once an interface has been exported to user space, it must
+be supported indefinitely. This fact makes the creation of user-space
+interfaces particularly challenging: since they cannot be changed in
+incompatible ways, they must be done right the first time. For this
+reason, a great deal of thought, clear documentation, and wide review for
+user-space interfaces is always required.
+
+
+
+4.2: CODE CHECKING TOOLS
+
+For now, at least, the writing of error-free code remains an ideal that few
+of us can reach. What we can hope to do, though, is to catch and fix as
+many of those errors as possible before our code goes into the mainline
+kernel. To that end, the kernel developers have put together an impressive
+array of tools which can catch a wide variety of obscure problems in an
+automated way. Any problem caught by the computer is a problem which will
+not afflict a user later on, so it stands to reason that the automated
+tools should be used whenever possible.
+
+The first step is simply to heed the warnings produced by the compiler.
+Contemporary versions of gcc can detect (and warn about) a large number of
+potential errors. Quite often, these warnings point to real problems.
+Code submitted for review should, as a rule, not produce any compiler
+warnings. When silencing warnings, take care to understand the real cause
+and try to avoid "fixes" which make the warning go away without addressing
+its cause.
+
+Note that not all compiler warnings are enabled by default. Build the
+kernel with "make EXTRA_CFLAGS=-W" to get the full set.
+
+The kernel provides several configuration options which turn on debugging
+features; most of these are found in the "kernel hacking" submenu. Several
+of these options should be turned on for any kernel used for development or
+testing purposes. In particular, you should turn on:
+
+ - ENABLE_WARN_DEPRECATED, ENABLE_MUST_CHECK, and FRAME_WARN to get an
+ extra set of warnings for problems like the use of deprecated interfaces
+ or ignoring an important return value from a function. The output
+ generated by these warnings can be verbose, but one need not worry about
+ warnings from other parts of the kernel.
+
+ - DEBUG_OBJECTS will add code to track the lifetime of various objects
+ created by the kernel and warn when things are done out of order. If
+ you are adding a subsystem which creates (and exports) complex objects
+ of its own, consider adding support for the object debugging
+ infrastructure.
+
+ - DEBUG_SLAB can find a variety of memory allocation and use errors; it
+ should be used on most development kernels.
+
+ - DEBUG_SPINLOCK, DEBUG_ATOMIC_SLEEP, and DEBUG_MUTEXES will find a
+ number of common locking errors.
+
+There are quite a few other debugging options, some of which will be
+discussed below. Some of them have a significant performance impact and
+should not be used all of the time. But some time spent learning the
+available options will likely be paid back many times over in short order.
+
+One of the heavier debugging tools is the locking checker, or "lockdep."
+This tool will track the acquisition and release of every lock (spinlock or
+mutex) in the system, the order in which locks are acquired relative to
+each other, the current interrupt environment, and more. It can then
+ensure that locks are always acquired in the same order, that the same
+interrupt assumptions apply in all situations, and so on. In other words,
+lockdep can find a number of scenarios in which the system could, on rare
+occasion, deadlock. This kind of problem can be painful (for both
+developers and users) in a deployed system; lockdep allows them to be found
+in an automated manner ahead of time. Code with any sort of non-trivial
+locking should be run with lockdep enabled before being submitted for
+inclusion.
+
+As a diligent kernel programmer, you will, beyond doubt, check the return
+status of any operation (such as a memory allocation) which can fail. The
+fact of the matter, though, is that the resulting failure recovery paths
+are, probably, completely untested. Untested code tends to be broken code;
+you could be much more confident of your code if all those error-handling
+paths had been exercised a few times.
+
+The kernel provides a fault injection framework which can do exactly that,
+especially where memory allocations are involved. With fault injection
+enabled, a configurable percentage of memory allocations will be made to
+fail; these failures can be restricted to a specific range of code.
+Running with fault injection enabled allows the programmer to see how the
+code responds when things go badly. See
+Documentation/fault-injection/fault-injection.txt for more information on
+how to use this facility.
+
+Other kinds of errors can be found with the "sparse" static analysis tool.
+With sparse, the programmer can be warned about confusion between
+user-space and kernel-space addresses, mixture of big-endian and
+small-endian quantities, the passing of integer values where a set of bit
+flags is expected, and so on. Sparse must be installed separately (it can
+be found at https://sparse.wiki.kernel.org/index.php/Main_Page if your
+distributor does not package it); it can then be run on the code by adding
+"C=1" to your make command.
+
+The "Coccinelle" tool (http://coccinelle.lip6.fr/) is able to find a wide
+variety of potential coding problems; it can also propose fixes for those
+problems. Quite a few "semantic patches" for the kernel have been packaged
+under the scripts/coccinelle directory; running "make coccicheck" will run
+through those semantic patches and report on any problems found. See
+Documentation/coccinelle.txt for more information.
+
+Other kinds of portability errors are best found by compiling your code for
+other architectures. If you do not happen to have an S/390 system or a
+Blackfin development board handy, you can still perform the compilation
+step. A large set of cross compilers for x86 systems can be found at
+
+ http://www.kernel.org/pub/tools/crosstool/
+
+Some time spent installing and using these compilers will help avoid
+embarrassment later.
+
+
+4.3: DOCUMENTATION
+
+Documentation has often been more the exception than the rule with kernel
+development. Even so, adequate documentation will help to ease the merging
+of new code into the kernel, make life easier for other developers, and
+will be helpful for your users. In many cases, the addition of
+documentation has become essentially mandatory.
+
+The first piece of documentation for any patch is its associated
+changelog. Log entries should describe the problem being solved, the form
+of the solution, the people who worked on the patch, any relevant
+effects on performance, and anything else that might be needed to
+understand the patch. Be sure that the changelog says *why* the patch is
+worth applying; a surprising number of developers fail to provide that
+information.
+
+Any code which adds a new user-space interface - including new sysfs or
+/proc files - should include documentation of that interface which enables
+user-space developers to know what they are working with. See
+Documentation/ABI/README for a description of how this documentation should
+be formatted and what information needs to be provided.
+
+The file Documentation/kernel-parameters.txt describes all of the kernel's
+boot-time parameters. Any patch which adds new parameters should add the
+appropriate entries to this file.
+
+Any new configuration options must be accompanied by help text which
+clearly explains the options and when the user might want to select them.
+
+Internal API information for many subsystems is documented by way of
+specially-formatted comments; these comments can be extracted and formatted
+in a number of ways by the "kernel-doc" script. If you are working within
+a subsystem which has kerneldoc comments, you should maintain them and add
+them, as appropriate, for externally-available functions. Even in areas
+which have not been so documented, there is no harm in adding kerneldoc
+comments for the future; indeed, this can be a useful activity for
+beginning kernel developers. The format of these comments, along with some
+information on how to create kerneldoc templates can be found in the file
+Documentation/kernel-doc-nano-HOWTO.txt.
+
+Anybody who reads through a significant amount of existing kernel code will
+note that, often, comments are most notable by their absence. Once again,
+the expectations for new code are higher than they were in the past;
+merging uncommented code will be harder. That said, there is little desire
+for verbosely-commented code. The code should, itself, be readable, with
+comments explaining the more subtle aspects.
+
+Certain things should always be commented. Uses of memory barriers should
+be accompanied by a line explaining why the barrier is necessary. The
+locking rules for data structures generally need to be explained somewhere.
+Major data structures need comprehensive documentation in general.
+Non-obvious dependencies between separate bits of code should be pointed
+out. Anything which might tempt a code janitor to make an incorrect
+"cleanup" needs a comment saying why it is done the way it is. And so on.
+
+
+4.4: INTERNAL API CHANGES
+
+The binary interface provided by the kernel to user space cannot be broken
+except under the most severe circumstances. The kernel's internal
+programming interfaces, instead, are highly fluid and can be changed when
+the need arises. If you find yourself having to work around a kernel API,
+or simply not using a specific functionality because it does not meet your
+needs, that may be a sign that the API needs to change. As a kernel
+developer, you are empowered to make such changes.
+
+There are, of course, some catches. API changes can be made, but they need
+to be well justified. So any patch making an internal API change should be
+accompanied by a description of what the change is and why it is
+necessary. This kind of change should also be broken out into a separate
+patch, rather than buried within a larger patch.
+
+The other catch is that a developer who changes an internal API is
+generally charged with the task of fixing any code within the kernel tree
+which is broken by the change. For a widely-used function, this duty can
+lead to literally hundreds or thousands of changes - many of which are
+likely to conflict with work being done by other developers. Needless to
+say, this can be a large job, so it is best to be sure that the
+justification is solid. Note that the Coccinelle tool can help with
+wide-ranging API changes.
+
+When making an incompatible API change, one should, whenever possible,
+ensure that code which has not been updated is caught by the compiler.
+This will help you to be sure that you have found all in-tree uses of that
+interface. It will also alert developers of out-of-tree code that there is
+a change that they need to respond to. Supporting out-of-tree code is not
+something that kernel developers need to be worried about, but we also do
+not have to make life harder for out-of-tree developers than it needs to
+be.
diff --git a/Documentation/development-process/5.Posting b/Documentation/development-process/5.Posting
new file mode 100644
index 000000000..8a48c9b62
--- /dev/null
+++ b/Documentation/development-process/5.Posting
@@ -0,0 +1,307 @@
+5: POSTING PATCHES
+
+Sooner or later, the time comes when your work is ready to be presented to
+the community for review and, eventually, inclusion into the mainline
+kernel. Unsurprisingly, the kernel development community has evolved a set
+of conventions and procedures which are used in the posting of patches;
+following them will make life much easier for everybody involved. This
+document will attempt to cover these expectations in reasonable detail;
+more information can also be found in the files SubmittingPatches,
+SubmittingDrivers, and SubmitChecklist in the kernel documentation
+directory.
+
+
+5.1: WHEN TO POST
+
+There is a constant temptation to avoid posting patches before they are
+completely "ready." For simple patches, that is not a problem. If the
+work being done is complex, though, there is a lot to be gained by getting
+feedback from the community before the work is complete. So you should
+consider posting in-progress work, or even making a git tree available so
+that interested developers can catch up with your work at any time.
+
+When posting code which is not yet considered ready for inclusion, it is a
+good idea to say so in the posting itself. Also mention any major work
+which remains to be done and any known problems. Fewer people will look at
+patches which are known to be half-baked, but those who do will come in
+with the idea that they can help you drive the work in the right direction.
+
+
+5.2: BEFORE CREATING PATCHES
+
+There are a number of things which should be done before you consider
+sending patches to the development community. These include:
+
+ - Test the code to the extent that you can. Make use of the kernel's
+ debugging tools, ensure that the kernel will build with all reasonable
+ combinations of configuration options, use cross-compilers to build for
+ different architectures, etc.
+
+ - Make sure your code is compliant with the kernel coding style
+ guidelines.
+
+ - Does your change have performance implications? If so, you should run
+ benchmarks showing what the impact (or benefit) of your change is; a
+ summary of the results should be included with the patch.
+
+ - Be sure that you have the right to post the code. If this work was done
+ for an employer, the employer likely has a right to the work and must be
+ agreeable with its release under the GPL.
+
+As a general rule, putting in some extra thought before posting code almost
+always pays back the effort in short order.
+
+
+5.3: PATCH PREPARATION
+
+The preparation of patches for posting can be a surprising amount of work,
+but, once again, attempting to save time here is not generally advisable
+even in the short term.
+
+Patches must be prepared against a specific version of the kernel. As a
+general rule, a patch should be based on the current mainline as found in
+Linus's git tree. When basing on mainline, start with a well-known release
+point - a stable or -rc release - rather than branching off the mainline at
+an arbitrary spot.
+
+It may become necessary to make versions against -mm, linux-next, or a
+subsystem tree, though, to facilitate wider testing and review. Depending
+on the area of your patch and what is going on elsewhere, basing a patch
+against these other trees can require a significant amount of work
+resolving conflicts and dealing with API changes.
+
+Only the most simple changes should be formatted as a single patch;
+everything else should be made as a logical series of changes. Splitting
+up patches is a bit of an art; some developers spend a long time figuring
+out how to do it in the way that the community expects. There are a few
+rules of thumb, however, which can help considerably:
+
+ - The patch series you post will almost certainly not be the series of
+ changes found in your working revision control system. Instead, the
+ changes you have made need to be considered in their final form, then
+ split apart in ways which make sense. The developers are interested in
+ discrete, self-contained changes, not the path you took to get to those
+ changes.
+
+ - Each logically independent change should be formatted as a separate
+ patch. These changes can be small ("add a field to this structure") or
+ large (adding a significant new driver, for example), but they should be
+ conceptually small and amenable to a one-line description. Each patch
+ should make a specific change which can be reviewed on its own and
+ verified to do what it says it does.
+
+ - As a way of restating the guideline above: do not mix different types of
+ changes in the same patch. If a single patch fixes a critical security
+ bug, rearranges a few structures, and reformats the code, there is a
+ good chance that it will be passed over and the important fix will be
+ lost.
+
+ - Each patch should yield a kernel which builds and runs properly; if your
+ patch series is interrupted in the middle, the result should still be a
+ working kernel. Partial application of a patch series is a common
+ scenario when the "git bisect" tool is used to find regressions; if the
+ result is a broken kernel, you will make life harder for developers and
+ users who are engaging in the noble work of tracking down problems.
+
+ - Do not overdo it, though. One developer once posted a set of edits
+ to a single file as 500 separate patches - an act which did not make him
+ the most popular person on the kernel mailing list. A single patch can
+ be reasonably large as long as it still contains a single *logical*
+ change.
+
+ - It can be tempting to add a whole new infrastructure with a series of
+ patches, but to leave that infrastructure unused until the final patch
+ in the series enables the whole thing. This temptation should be
+ avoided if possible; if that series adds regressions, bisection will
+ finger the last patch as the one which caused the problem, even though
+ the real bug is elsewhere. Whenever possible, a patch which adds new
+ code should make that code active immediately.
+
+Working to create the perfect patch series can be a frustrating process
+which takes quite a bit of time and thought after the "real work" has been
+done. When done properly, though, it is time well spent.
+
+
+5.4: PATCH FORMATTING AND CHANGELOGS
+
+So now you have a perfect series of patches for posting, but the work is
+not done quite yet. Each patch needs to be formatted into a message which
+quickly and clearly communicates its purpose to the rest of the world. To
+that end, each patch will be composed of the following:
+
+ - An optional "From" line naming the author of the patch. This line is
+ only necessary if you are passing on somebody else's patch via email,
+ but it never hurts to add it when in doubt.
+
+ - A one-line description of what the patch does. This message should be
+ enough for a reader who sees it with no other context to figure out the
+ scope of the patch; it is the line that will show up in the "short form"
+ changelogs. This message is usually formatted with the relevant
+ subsystem name first, followed by the purpose of the patch. For
+ example:
+
+ gpio: fix build on CONFIG_GPIO_SYSFS=n
+
+ - A blank line followed by a detailed description of the contents of the
+ patch. This description can be as long as is required; it should say
+ what the patch does and why it should be applied to the kernel.
+
+ - One or more tag lines, with, at a minimum, one Signed-off-by: line from
+ the author of the patch. Tags will be described in more detail below.
+
+The items above, together, form the changelog for the patch. Writing good
+changelogs is a crucial but often-neglected art; it's worth spending
+another moment discussing this issue. When writing a changelog, you should
+bear in mind that a number of different people will be reading your words.
+These include subsystem maintainers and reviewers who need to decide
+whether the patch should be included, distributors and other maintainers
+trying to decide whether a patch should be backported to other kernels, bug
+hunters wondering whether the patch is responsible for a problem they are
+chasing, users who want to know how the kernel has changed, and more. A
+good changelog conveys the needed information to all of these people in the
+most direct and concise way possible.
+
+To that end, the summary line should describe the effects of and motivation
+for the change as well as possible given the one-line constraint. The
+detailed description can then amplify on those topics and provide any
+needed additional information. If the patch fixes a bug, cite the commit
+which introduced the bug if possible (and please provide both the commit ID
+and the title when citing commits). If a problem is associated with
+specific log or compiler output, include that output to help others
+searching for a solution to the same problem. If the change is meant to
+support other changes coming in later patch, say so. If internal APIs are
+changed, detail those changes and how other developers should respond. In
+general, the more you can put yourself into the shoes of everybody who will
+be reading your changelog, the better that changelog (and the kernel as a
+whole) will be.
+
+Needless to say, the changelog should be the text used when committing the
+change to a revision control system. It will be followed by:
+
+ - The patch itself, in the unified ("-u") patch format. Using the "-p"
+ option to diff will associate function names with changes, making the
+ resulting patch easier for others to read.
+
+You should avoid including changes to irrelevant files (those generated by
+the build process, for example, or editor backup files) in the patch. The
+file "dontdiff" in the Documentation directory can help in this regard;
+pass it to diff with the "-X" option.
+
+The tags mentioned above are used to describe how various developers have
+been associated with the development of this patch. They are described in
+detail in the SubmittingPatches document; what follows here is a brief
+summary. Each of these lines has the format:
+
+ tag: Full Name <email address> optional-other-stuff
+
+The tags in common use are:
+
+ - Signed-off-by: this is a developer's certification that he or she has
+ the right to submit the patch for inclusion into the kernel. It is an
+ agreement to the Developer's Certificate of Origin, the full text of
+ which can be found in Documentation/SubmittingPatches. Code without a
+ proper signoff cannot be merged into the mainline.
+
+ - Acked-by: indicates an agreement by another developer (often a
+ maintainer of the relevant code) that the patch is appropriate for
+ inclusion into the kernel.
+
+ - Tested-by: states that the named person has tested the patch and found
+ it to work.
+
+ - Reviewed-by: the named developer has reviewed the patch for correctness;
+ see the reviewer's statement in Documentation/SubmittingPatches for more
+ detail.
+
+ - Reported-by: names a user who reported a problem which is fixed by this
+ patch; this tag is used to give credit to the (often underappreciated)
+ people who test our code and let us know when things do not work
+ correctly.
+
+ - Cc: the named person received a copy of the patch and had the
+ opportunity to comment on it.
+
+Be careful in the addition of tags to your patches: only Cc: is appropriate
+for addition without the explicit permission of the person named.
+
+
+5.5: SENDING THE PATCH
+
+Before you mail your patches, there are a couple of other things you should
+take care of:
+
+ - Are you sure that your mailer will not corrupt the patches? Patches
+ which have had gratuitous white-space changes or line wrapping performed
+ by the mail client will not apply at the other end, and often will not
+ be examined in any detail. If there is any doubt at all, mail the patch
+ to yourself and convince yourself that it shows up intact.
+
+ Documentation/email-clients.txt has some helpful hints on making
+ specific mail clients work for sending patches.
+
+ - Are you sure your patch is free of silly mistakes? You should always
+ run patches through scripts/checkpatch.pl and address the complaints it
+ comes up with. Please bear in mind that checkpatch.pl, while being the
+ embodiment of a fair amount of thought about what kernel patches should
+ look like, is not smarter than you. If fixing a checkpatch.pl complaint
+ would make the code worse, don't do it.
+
+Patches should always be sent as plain text. Please do not send them as
+attachments; that makes it much harder for reviewers to quote sections of
+the patch in their replies. Instead, just put the patch directly into your
+message.
+
+When mailing patches, it is important to send copies to anybody who might
+be interested in it. Unlike some other projects, the kernel encourages
+people to err on the side of sending too many copies; don't assume that the
+relevant people will see your posting on the mailing lists. In particular,
+copies should go to:
+
+ - The maintainer(s) of the affected subsystem(s). As described earlier,
+ the MAINTAINERS file is the first place to look for these people.
+
+ - Other developers who have been working in the same area - especially
+ those who might be working there now. Using git to see who else has
+ modified the files you are working on can be helpful.
+
+ - If you are responding to a bug report or a feature request, copy the
+ original poster as well.
+
+ - Send a copy to the relevant mailing list, or, if nothing else applies,
+ the linux-kernel list.
+
+ - If you are fixing a bug, think about whether the fix should go into the
+ next stable update. If so, stable@vger.kernel.org should get a copy of
+ the patch. Also add a "Cc: stable@vger.kernel.org" to the tags within
+ the patch itself; that will cause the stable team to get a notification
+ when your fix goes into the mainline.
+
+When selecting recipients for a patch, it is good to have an idea of who
+you think will eventually accept the patch and get it merged. While it
+is possible to send patches directly to Linus Torvalds and have him merge
+them, things are not normally done that way. Linus is busy, and there are
+subsystem maintainers who watch over specific parts of the kernel. Usually
+you will be wanting that maintainer to merge your patches. If there is no
+obvious maintainer, Andrew Morton is often the patch target of last resort.
+
+Patches need good subject lines. The canonical format for a patch line is
+something like:
+
+ [PATCH nn/mm] subsys: one-line description of the patch
+
+where "nn" is the ordinal number of the patch, "mm" is the total number of
+patches in the series, and "subsys" is the name of the affected subsystem.
+Clearly, nn/mm can be omitted for a single, standalone patch.
+
+If you have a significant series of patches, it is customary to send an
+introductory description as part zero. This convention is not universally
+followed though; if you use it, remember that information in the
+introduction does not make it into the kernel changelogs. So please ensure
+that the patches, themselves, have complete changelog information.
+
+In general, the second and following parts of a multi-part patch should be
+sent as a reply to the first part so that they all thread together at the
+receiving end. Tools like git and quilt have commands to mail out a set of
+patches with the proper threading. If you have a long series, though, and
+are using git, please stay away from the --chain-reply-to option to avoid
+creating exceptionally deep nesting.
diff --git a/Documentation/development-process/6.Followthrough b/Documentation/development-process/6.Followthrough
new file mode 100644
index 000000000..41d324a94
--- /dev/null
+++ b/Documentation/development-process/6.Followthrough
@@ -0,0 +1,206 @@
+6: FOLLOWTHROUGH
+
+At this point, you have followed the guidelines given so far and, with the
+addition of your own engineering skills, have posted a perfect series of
+patches. One of the biggest mistakes that even experienced kernel
+developers can make is to conclude that their work is now done. In truth,
+posting patches indicates a transition into the next stage of the process,
+with, possibly, quite a bit of work yet to be done.
+
+It is a rare patch which is so good at its first posting that there is no
+room for improvement. The kernel development process recognizes this fact,
+and, as a result, is heavily oriented toward the improvement of posted
+code. You, as the author of that code, will be expected to work with the
+kernel community to ensure that your code is up to the kernel's quality
+standards. A failure to participate in this process is quite likely to
+prevent the inclusion of your patches into the mainline.
+
+
+6.1: WORKING WITH REVIEWERS
+
+A patch of any significance will result in a number of comments from other
+developers as they review the code. Working with reviewers can be, for
+many developers, the most intimidating part of the kernel development
+process. Life can be made much easier, though, if you keep a few things in
+mind:
+
+ - If you have explained your patch well, reviewers will understand its
+ value and why you went to the trouble of writing it. But that value
+ will not keep them from asking a fundamental question: what will it be
+ like to maintain a kernel with this code in it five or ten years later?
+ Many of the changes you may be asked to make - from coding style tweaks
+ to substantial rewrites - come from the understanding that Linux will
+ still be around and under development a decade from now.
+
+ - Code review is hard work, and it is a relatively thankless occupation;
+ people remember who wrote kernel code, but there is little lasting fame
+ for those who reviewed it. So reviewers can get grumpy, especially when
+ they see the same mistakes being made over and over again. If you get a
+ review which seems angry, insulting, or outright offensive, resist the
+ impulse to respond in kind. Code review is about the code, not about
+ the people, and code reviewers are not attacking you personally.
+
+ - Similarly, code reviewers are not trying to promote their employers'
+ agendas at the expense of your own. Kernel developers often expect to
+ be working on the kernel years from now, but they understand that their
+ employer could change. They truly are, almost without exception,
+ working toward the creation of the best kernel they can; they are not
+ trying to create discomfort for their employers' competitors.
+
+What all of this comes down to is that, when reviewers send you comments,
+you need to pay attention to the technical observations that they are
+making. Do not let their form of expression or your own pride keep that
+from happening. When you get review comments on a patch, take the time to
+understand what the reviewer is trying to say. If possible, fix the things
+that the reviewer is asking you to fix. And respond back to the reviewer:
+thank them, and describe how you will answer their questions.
+
+Note that you do not have to agree with every change suggested by
+reviewers. If you believe that the reviewer has misunderstood your code,
+explain what is really going on. If you have a technical objection to a
+suggested change, describe it and justify your solution to the problem. If
+your explanations make sense, the reviewer will accept them. Should your
+explanation not prove persuasive, though, especially if others start to
+agree with the reviewer, take some time to think things over again. It can
+be easy to become blinded by your own solution to a problem to the point
+that you don't realize that something is fundamentally wrong or, perhaps,
+you're not even solving the right problem.
+
+Andrew Morton has suggested that every review comment which does not result
+in a code change should result in an additional code comment instead; that
+can help future reviewers avoid the questions which came up the first time
+around.
+
+One fatal mistake is to ignore review comments in the hope that they will
+go away. They will not go away. If you repost code without having
+responded to the comments you got the time before, you're likely to find
+that your patches go nowhere.
+
+Speaking of reposting code: please bear in mind that reviewers are not
+going to remember all the details of the code you posted the last time
+around. So it is always a good idea to remind reviewers of previously
+raised issues and how you dealt with them; the patch changelog is a good
+place for this kind of information. Reviewers should not have to search
+through list archives to familiarize themselves with what was said last
+time; if you help them get a running start, they will be in a better mood
+when they revisit your code.
+
+What if you've tried to do everything right and things still aren't going
+anywhere? Most technical disagreements can be resolved through discussion,
+but there are times when somebody simply has to make a decision. If you
+honestly believe that this decision is going against you wrongly, you can
+always try appealing to a higher power. As of this writing, that higher
+power tends to be Andrew Morton. Andrew has a great deal of respect in the
+kernel development community; he can often unjam a situation which seems to
+be hopelessly blocked. Appealing to Andrew should not be done lightly,
+though, and not before all other alternatives have been explored. And bear
+in mind, of course, that he may not agree with you either.
+
+
+6.2: WHAT HAPPENS NEXT
+
+If a patch is considered to be a good thing to add to the kernel, and once
+most of the review issues have been resolved, the next step is usually
+entry into a subsystem maintainer's tree. How that works varies from one
+subsystem to the next; each maintainer has his or her own way of doing
+things. In particular, there may be more than one tree - one, perhaps,
+dedicated to patches planned for the next merge window, and another for
+longer-term work.
+
+For patches applying to areas for which there is no obvious subsystem tree
+(memory management patches, for example), the default tree often ends up
+being -mm. Patches which affect multiple subsystems can also end up going
+through the -mm tree.
+
+Inclusion into a subsystem tree can bring a higher level of visibility to a
+patch. Now other developers working with that tree will get the patch by
+default. Subsystem trees typically feed linux-next as well, making their
+contents visible to the development community as a whole. At this point,
+there's a good chance that you will get more comments from a new set of
+reviewers; these comments need to be answered as in the previous round.
+
+What may also happen at this point, depending on the nature of your patch,
+is that conflicts with work being done by others turn up. In the worst
+case, heavy patch conflicts can result in some work being put on the back
+burner so that the remaining patches can be worked into shape and merged.
+Other times, conflict resolution will involve working with the other
+developers and, possibly, moving some patches between trees to ensure that
+everything applies cleanly. This work can be a pain, but count your
+blessings: before the advent of the linux-next tree, these conflicts often
+only turned up during the merge window and had to be addressed in a hurry.
+Now they can be resolved at leisure, before the merge window opens.
+
+Some day, if all goes well, you'll log on and see that your patch has been
+merged into the mainline kernel. Congratulations! Once the celebration is
+complete (and you have added yourself to the MAINTAINERS file), though, it
+is worth remembering an important little fact: the job still is not done.
+Merging into the mainline brings its own challenges.
+
+To begin with, the visibility of your patch has increased yet again. There
+may be a new round of comments from developers who had not been aware of
+the patch before. It may be tempting to ignore them, since there is no
+longer any question of your code being merged. Resist that temptation,
+though; you still need to be responsive to developers who have questions or
+suggestions.
+
+More importantly, though: inclusion into the mainline puts your code into
+the hands of a much larger group of testers. Even if you have contributed
+a driver for hardware which is not yet available, you will be surprised by
+how many people will build your code into their kernels. And, of course,
+where there are testers, there will be bug reports.
+
+The worst sort of bug reports are regressions. If your patch causes a
+regression, you'll find an uncomfortable number of eyes upon you;
+regressions need to be fixed as soon as possible. If you are unwilling or
+unable to fix the regression (and nobody else does it for you), your patch
+will almost certainly be removed during the stabilization period. Beyond
+negating all of the work you have done to get your patch into the mainline,
+having a patch pulled as the result of a failure to fix a regression could
+well make it harder for you to get work merged in the future.
+
+After any regressions have been dealt with, there may be other, ordinary
+bugs to deal with. The stabilization period is your best opportunity to
+fix these bugs and ensure that your code's debut in a mainline kernel
+release is as solid as possible. So, please, answer bug reports, and fix
+the problems if at all possible. That's what the stabilization period is
+for; you can start creating cool new patches once any problems with the old
+ones have been taken care of.
+
+And don't forget that there are other milestones which may also create bug
+reports: the next mainline stable release, when prominent distributors pick
+up a version of the kernel containing your patch, etc. Continuing to
+respond to these reports is a matter of basic pride in your work. If that
+is insufficient motivation, though, it's also worth considering that the
+development community remembers developers who lose interest in their code
+after it's merged. The next time you post a patch, they will be evaluating
+it with the assumption that you will not be around to maintain it
+afterward.
+
+
+6.3: OTHER THINGS THAT CAN HAPPEN
+
+One day, you may open your mail client and see that somebody has mailed you
+a patch to your code. That is one of the advantages of having your code
+out there in the open, after all. If you agree with the patch, you can
+either forward it on to the subsystem maintainer (be sure to include a
+proper From: line so that the attribution is correct, and add a signoff of
+your own), or send an Acked-by: response back and let the original poster
+send it upward.
+
+If you disagree with the patch, send a polite response explaining why. If
+possible, tell the author what changes need to be made to make the patch
+acceptable to you. There is a certain resistance to merging patches which
+are opposed by the author and maintainer of the code, but it only goes so
+far. If you are seen as needlessly blocking good work, those patches will
+eventually flow around you and get into the mainline anyway. In the Linux
+kernel, nobody has absolute veto power over any code. Except maybe Linus.
+
+On very rare occasion, you may see something completely different: another
+developer posts a different solution to your problem. At that point,
+chances are that one of the two patches will not be merged, and "mine was
+here first" is not considered to be a compelling technical argument. If
+somebody else's patch displaces yours and gets into the mainline, there is
+really only one way to respond: be pleased that your problem got solved and
+get on with your work. Having one's work shoved aside in this manner can
+be hurtful and discouraging, but the community will remember your reaction
+long after they have forgotten whose patch actually got merged.
diff --git a/Documentation/development-process/7.AdvancedTopics b/Documentation/development-process/7.AdvancedTopics
new file mode 100644
index 000000000..26dc3fa19
--- /dev/null
+++ b/Documentation/development-process/7.AdvancedTopics
@@ -0,0 +1,173 @@
+7: ADVANCED TOPICS
+
+At this point, hopefully, you have a handle on how the development process
+works. There is still more to learn, however! This section will cover a
+number of topics which can be helpful for developers wanting to become a
+regular part of the Linux kernel development process.
+
+7.1: MANAGING PATCHES WITH GIT
+
+The use of distributed version control for the kernel began in early 2002,
+when Linus first started playing with the proprietary BitKeeper
+application. While BitKeeper was controversial, the approach to software
+version management it embodied most certainly was not. Distributed version
+control enabled an immediate acceleration of the kernel development
+project. In current times, there are several free alternatives to
+BitKeeper. For better or for worse, the kernel project has settled on git
+as its tool of choice.
+
+Managing patches with git can make life much easier for the developer,
+especially as the volume of those patches grows. Git also has its rough
+edges and poses certain hazards; it is a young and powerful tool which is
+still being civilized by its developers. This document will not attempt to
+teach the reader how to use git; that would be sufficient material for a
+long document in its own right. Instead, the focus here will be on how git
+fits into the kernel development process in particular. Developers who
+wish to come up to speed with git will find more information at:
+
+ http://git-scm.com/
+
+ http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
+
+and on various tutorials found on the web.
+
+The first order of business is to read the above sites and get a solid
+understanding of how git works before trying to use it to make patches
+available to others. A git-using developer should be able to obtain a copy
+of the mainline repository, explore the revision history, commit changes to
+the tree, use branches, etc. An understanding of git's tools for the
+rewriting of history (such as rebase) is also useful. Git comes with its
+own terminology and concepts; a new user of git should know about refs,
+remote branches, the index, fast-forward merges, pushes and pulls, detached
+heads, etc. It can all be a little intimidating at the outset, but the
+concepts are not that hard to grasp with a bit of study.
+
+Using git to generate patches for submission by email can be a good
+exercise while coming up to speed.
+
+When you are ready to start putting up git trees for others to look at, you
+will, of course, need a server that can be pulled from. Setting up such a
+server with git-daemon is relatively straightforward if you have a system
+which is accessible to the Internet. Otherwise, free, public hosting sites
+(Github, for example) are starting to appear on the net. Established
+developers can get an account on kernel.org, but those are not easy to come
+by; see http://kernel.org/faq/ for more information.
+
+The normal git workflow involves the use of a lot of branches. Each line
+of development can be separated into a separate "topic branch" and
+maintained independently. Branches in git are cheap, there is no reason to
+not make free use of them. And, in any case, you should not do your
+development in any branch which you intend to ask others to pull from.
+Publicly-available branches should be created with care; merge in patches
+from development branches when they are in complete form and ready to go -
+not before.
+
+Git provides some powerful tools which can allow you to rewrite your
+development history. An inconvenient patch (one which breaks bisection,
+say, or which has some other sort of obvious bug) can be fixed in place or
+made to disappear from the history entirely. A patch series can be
+rewritten as if it had been written on top of today's mainline, even though
+you have been working on it for months. Changes can be transparently
+shifted from one branch to another. And so on. Judicious use of git's
+ability to revise history can help in the creation of clean patch sets with
+fewer problems.
+
+Excessive use of this capability can lead to other problems, though, beyond
+a simple obsession for the creation of the perfect project history.
+Rewriting history will rewrite the changes contained in that history,
+turning a tested (hopefully) kernel tree into an untested one. But, beyond
+that, developers cannot easily collaborate if they do not have a shared
+view of the project history; if you rewrite history which other developers
+have pulled into their repositories, you will make life much more difficult
+for those developers. So a simple rule of thumb applies here: history
+which has been exported to others should generally be seen as immutable
+thereafter.
+
+So, once you push a set of changes to your publicly-available server, those
+changes should not be rewritten. Git will attempt to enforce this rule if
+you try to push changes which do not result in a fast-forward merge
+(i.e. changes which do not share the same history). It is possible to
+override this check, and there may be times when it is necessary to rewrite
+an exported tree. Moving changesets between trees to avoid conflicts in
+linux-next is one example. But such actions should be rare. This is one
+of the reasons why development should be done in private branches (which
+can be rewritten if necessary) and only moved into public branches when
+it's in a reasonably advanced state.
+
+As the mainline (or other tree upon which a set of changes is based)
+advances, it is tempting to merge with that tree to stay on the leading
+edge. For a private branch, rebasing can be an easy way to keep up with
+another tree, but rebasing is not an option once a tree is exported to the
+world. Once that happens, a full merge must be done. Merging occasionally
+makes good sense, but overly frequent merges can clutter the history
+needlessly. Suggested technique in this case is to merge infrequently, and
+generally only at specific release points (such as a mainline -rc
+release). If you are nervous about specific changes, you can always
+perform test merges in a private branch. The git "rerere" tool can be
+useful in such situations; it remembers how merge conflicts were resolved
+so that you don't have to do the same work twice.
+
+One of the biggest recurring complaints about tools like git is this: the
+mass movement of patches from one repository to another makes it easy to
+slip in ill-advised changes which go into the mainline below the review
+radar. Kernel developers tend to get unhappy when they see that kind of
+thing happening; putting up a git tree with unreviewed or off-topic patches
+can affect your ability to get trees pulled in the future. Quoting Linus:
+
+ You can send me patches, but for me to pull a git patch from you, I
+ need to know that you know what you're doing, and I need to be able
+ to trust things *without* then having to go and check every
+ individual change by hand.
+
+(http://lwn.net/Articles/224135/).
+
+To avoid this kind of situation, ensure that all patches within a given
+branch stick closely to the associated topic; a "driver fixes" branch
+should not be making changes to the core memory management code. And, most
+importantly, do not use a git tree to bypass the review process. Post an
+occasional summary of the tree to the relevant list, and, when the time is
+right, request that the tree be included in linux-next.
+
+If and when others start to send patches for inclusion into your tree,
+don't forget to review them. Also ensure that you maintain the correct
+authorship information; the git "am" tool does its best in this regard, but
+you may have to add a "From:" line to the patch if it has been relayed to
+you via a third party.
+
+When requesting a pull, be sure to give all the relevant information: where
+your tree is, what branch to pull, and what changes will result from the
+pull. The git request-pull command can be helpful in this regard; it will
+format the request as other developers expect, and will also check to be
+sure that you have remembered to push those changes to the public server.
+
+
+7.2: REVIEWING PATCHES
+
+Some readers will certainly object to putting this section with "advanced
+topics" on the grounds that even beginning kernel developers should be
+reviewing patches. It is certainly true that there is no better way to
+learn how to program in the kernel environment than by looking at code
+posted by others. In addition, reviewers are forever in short supply; by
+looking at code you can make a significant contribution to the process as a
+whole.
+
+Reviewing code can be an intimidating prospect, especially for a new kernel
+developer who may well feel nervous about questioning code - in public -
+which has been posted by those with more experience. Even code written by
+the most experienced developers can be improved, though. Perhaps the best
+piece of advice for reviewers (all reviewers) is this: phrase review
+comments as questions rather than criticisms. Asking "how does the lock
+get released in this path?" will always work better than stating "the
+locking here is wrong."
+
+Different developers will review code from different points of view. Some
+are mostly concerned with coding style and whether code lines have trailing
+white space. Others will focus primarily on whether the change implemented
+by the patch as a whole is a good thing for the kernel or not. Yet others
+will check for problematic locking, excessive stack usage, possible
+security issues, duplication of code found elsewhere, adequate
+documentation, adverse effects on performance, user-space ABI changes, etc.
+All types of review, if they lead to better code going into the kernel, are
+welcome and worthwhile.
+
+
diff --git a/Documentation/development-process/8.Conclusion b/Documentation/development-process/8.Conclusion
new file mode 100644
index 000000000..caef69022
--- /dev/null
+++ b/Documentation/development-process/8.Conclusion
@@ -0,0 +1,70 @@
+8: FOR MORE INFORMATION
+
+There are numerous sources of information on Linux kernel development and
+related topics. First among those will always be the Documentation
+directory found in the kernel source distribution. The top-level HOWTO
+file is an important starting point; SubmittingPatches and
+SubmittingDrivers are also something which all kernel developers should
+read. Many internal kernel APIs are documented using the kerneldoc
+mechanism; "make htmldocs" or "make pdfdocs" can be used to generate those
+documents in HTML or PDF format (though the version of TeX shipped by some
+distributions runs into internal limits and fails to process the documents
+properly).
+
+Various web sites discuss kernel development at all levels of detail. Your
+author would like to humbly suggest http://lwn.net/ as a source;
+information on many specific kernel topics can be found via the LWN kernel
+index at:
+
+ http://lwn.net/Kernel/Index/
+
+Beyond that, a valuable resource for kernel developers is:
+
+ http://kernelnewbies.org/
+
+And, of course, one should not forget http://kernel.org/, the definitive
+location for kernel release information.
+
+There are a number of books on kernel development:
+
+ Linux Device Drivers, 3rd Edition (Jonathan Corbet, Alessandro
+ Rubini, and Greg Kroah-Hartman). Online at
+ http://lwn.net/Kernel/LDD3/.
+
+ Linux Kernel Development (Robert Love).
+
+ Understanding the Linux Kernel (Daniel Bovet and Marco Cesati).
+
+All of these books suffer from a common fault, though: they tend to be
+somewhat obsolete by the time they hit the shelves, and they have been on
+the shelves for a while now. Still, there is quite a bit of good
+information to be found there.
+
+Documentation for git can be found at:
+
+ http://www.kernel.org/pub/software/scm/git/docs/
+
+ http://www.kernel.org/pub/software/scm/git/docs/user-manual.html
+
+
+9: CONCLUSION
+
+Congratulations to anybody who has made it through this long-winded
+document. Hopefully it has provided a helpful understanding of how the
+Linux kernel is developed and how you can participate in that process.
+
+In the end, it's the participation that matters. Any open source software
+project is no more than the sum of what its contributors put into it. The
+Linux kernel has progressed as quickly and as well as it has because it has
+been helped by an impressively large group of developers, all of whom are
+working to make it better. The kernel is a premier example of what can be
+done when thousands of people work together toward a common goal.
+
+The kernel can always benefit from a larger developer base, though. There
+is always more work to do. But, just as importantly, most other
+participants in the Linux ecosystem can benefit through contributing to the
+kernel. Getting code into the mainline is the key to higher code quality,
+lower maintenance and distribution costs, a higher level of influence over
+the direction of kernel development, and more. It is a situation where
+everybody involved wins. Fire up your editor and come join us; you will be
+more than welcome.
diff --git a/Documentation/device-mapper/cache-policies.txt b/Documentation/device-mapper/cache-policies.txt
new file mode 100644
index 000000000..0d124a971
--- /dev/null
+++ b/Documentation/device-mapper/cache-policies.txt
@@ -0,0 +1,97 @@
+Guidance for writing policies
+=============================
+
+Try to keep transactionality out of it. The core is careful to
+avoid asking about anything that is migrating. This is a pain, but
+makes it easier to write the policies.
+
+Mappings are loaded into the policy at construction time.
+
+Every bio that is mapped by the target is referred to the policy.
+The policy can return a simple HIT or MISS or issue a migration.
+
+Currently there's no way for the policy to issue background work,
+e.g. to start writing back dirty blocks that are going to be evicte
+soon.
+
+Because we map bios, rather than requests it's easy for the policy
+to get fooled by many small bios. For this reason the core target
+issues periodic ticks to the policy. It's suggested that the policy
+doesn't update states (eg, hit counts) for a block more than once
+for each tick. The core ticks by watching bios complete, and so
+trying to see when the io scheduler has let the ios run.
+
+
+Overview of supplied cache replacement policies
+===============================================
+
+multiqueue
+----------
+
+This policy is the default.
+
+The multiqueue policy has three sets of 16 queues: one set for entries
+waiting for the cache and another two for those in the cache (a set for
+clean entries and a set for dirty entries).
+
+Cache entries in the queues are aged based on logical time. Entry into
+the cache is based on variable thresholds and queue selection is based
+on hit count on entry. The policy aims to take different cache miss
+costs into account and to adjust to varying load patterns automatically.
+
+Message and constructor argument pairs are:
+ 'sequential_threshold <#nr_sequential_ios>'
+ 'random_threshold <#nr_random_ios>'
+ 'read_promote_adjustment <value>'
+ 'write_promote_adjustment <value>'
+ 'discard_promote_adjustment <value>'
+
+The sequential threshold indicates the number of contiguous I/Os
+required before a stream is treated as sequential. Once a stream is
+considered sequential it will bypass the cache. The random threshold
+is the number of intervening non-contiguous I/Os that must be seen
+before the stream is treated as random again.
+
+The sequential and random thresholds default to 512 and 4 respectively.
+
+Large, sequential I/Os are probably better left on the origin device
+since spindles tend to have good sequential I/O bandwidth. The
+io_tracker counts contiguous I/Os to try to spot when the I/O is in one
+of these sequential modes. But there are use-cases for wanting to
+promote sequential blocks to the cache (e.g. fast application startup).
+If sequential threshold is set to 0 the sequential I/O detection is
+disabled and sequential I/O will no longer implicitly bypass the cache.
+Setting the random threshold to 0 does _not_ disable the random I/O
+stream detection.
+
+Internally the mq policy determines a promotion threshold. If the hit
+count of a block not in the cache goes above this threshold it gets
+promoted to the cache. The read, write and discard promote adjustment
+tunables allow you to tweak the promotion threshold by adding a small
+value based on the io type. They default to 4, 8 and 1 respectively.
+If you're trying to quickly warm a new cache device you may wish to
+reduce these to encourage promotion. Remember to switch them back to
+their defaults after the cache fills though.
+
+cleaner
+-------
+
+The cleaner writes back all dirty blocks in a cache to decommission it.
+
+Examples
+========
+
+The syntax for a table is:
+ cache <metadata dev> <cache dev> <origin dev> <block size>
+ <#feature_args> [<feature arg>]*
+ <policy> <#policy_args> [<policy arg>]*
+
+The syntax to send a message using the dmsetup command is:
+ dmsetup message <mapped device> 0 sequential_threshold 1024
+ dmsetup message <mapped device> 0 random_threshold 8
+
+Using dmsetup:
+ dmsetup create blah --table "0 268435456 cache /dev/sdb /dev/sdc \
+ /dev/sdd 512 0 mq 4 sequential_threshold 1024 random_threshold 8"
+ creates a 128GB large mapped device named 'blah' with the
+ sequential threshold set to 1024 and the random_threshold set to 8.
diff --git a/Documentation/device-mapper/cache.txt b/Documentation/device-mapper/cache.txt
new file mode 100644
index 000000000..68c0f517c
--- /dev/null
+++ b/Documentation/device-mapper/cache.txt
@@ -0,0 +1,298 @@
+Introduction
+============
+
+dm-cache is a device mapper target written by Joe Thornber, Heinz
+Mauelshagen, and Mike Snitzer.
+
+It aims to improve performance of a block device (eg, a spindle) by
+dynamically migrating some of its data to a faster, smaller device
+(eg, an SSD).
+
+This device-mapper solution allows us to insert this caching at
+different levels of the dm stack, for instance above the data device for
+a thin-provisioning pool. Caching solutions that are integrated more
+closely with the virtual memory system should give better performance.
+
+The target reuses the metadata library used in the thin-provisioning
+library.
+
+The decision as to what data to migrate and when is left to a plug-in
+policy module. Several of these have been written as we experiment,
+and we hope other people will contribute others for specific io
+scenarios (eg. a vm image server).
+
+Glossary
+========
+
+ Migration - Movement of the primary copy of a logical block from one
+ device to the other.
+ Promotion - Migration from slow device to fast device.
+ Demotion - Migration from fast device to slow device.
+
+The origin device always contains a copy of the logical block, which
+may be out of date or kept in sync with the copy on the cache device
+(depending on policy).
+
+Design
+======
+
+Sub-devices
+-----------
+
+The target is constructed by passing three devices to it (along with
+other parameters detailed later):
+
+1. An origin device - the big, slow one.
+
+2. A cache device - the small, fast one.
+
+3. A small metadata device - records which blocks are in the cache,
+ which are dirty, and extra hints for use by the policy object.
+ This information could be put on the cache device, but having it
+ separate allows the volume manager to configure it differently,
+ e.g. as a mirror for extra robustness. This metadata device may only
+ be used by a single cache device.
+
+Fixed block size
+----------------
+
+The origin is divided up into blocks of a fixed size. This block size
+is configurable when you first create the cache. Typically we've been
+using block sizes of 256KB - 1024KB. The block size must be between 64
+(32KB) and 2097152 (1GB) and a multiple of 64 (32KB).
+
+Having a fixed block size simplifies the target a lot. But it is
+something of a compromise. For instance, a small part of a block may be
+getting hit a lot, yet the whole block will be promoted to the cache.
+So large block sizes are bad because they waste cache space. And small
+block sizes are bad because they increase the amount of metadata (both
+in core and on disk).
+
+Cache operating modes
+---------------------
+
+The cache has three operating modes: writeback, writethrough and
+passthrough.
+
+If writeback, the default, is selected then a write to a block that is
+cached will go only to the cache and the block will be marked dirty in
+the metadata.
+
+If writethrough is selected then a write to a cached block will not
+complete until it has hit both the origin and cache devices. Clean
+blocks should remain clean.
+
+If passthrough is selected, useful when the cache contents are not known
+to be coherent with the origin device, then all reads are served from
+the origin device (all reads miss the cache) and all writes are
+forwarded to the origin device; additionally, write hits cause cache
+block invalidates. To enable passthrough mode the cache must be clean.
+Passthrough mode allows a cache device to be activated without having to
+worry about coherency. Coherency that exists is maintained, although
+the cache will gradually cool as writes take place. If the coherency of
+the cache can later be verified, or established through use of the
+"invalidate_cblocks" message, the cache device can be transitioned to
+writethrough or writeback mode while still warm. Otherwise, the cache
+contents can be discarded prior to transitioning to the desired
+operating mode.
+
+A simple cleaner policy is provided, which will clean (write back) all
+dirty blocks in a cache. Useful for decommissioning a cache or when
+shrinking a cache. Shrinking the cache's fast device requires all cache
+blocks, in the area of the cache being removed, to be clean. If the
+area being removed from the cache still contains dirty blocks the resize
+will fail. Care must be taken to never reduce the volume used for the
+cache's fast device until the cache is clean. This is of particular
+importance if writeback mode is used. Writethrough and passthrough
+modes already maintain a clean cache. Future support to partially clean
+the cache, above a specified threshold, will allow for keeping the cache
+warm and in writeback mode during resize.
+
+Migration throttling
+--------------------
+
+Migrating data between the origin and cache device uses bandwidth.
+The user can set a throttle to prevent more than a certain amount of
+migration occurring at any one time. Currently we're not taking any
+account of normal io traffic going to the devices. More work needs
+doing here to avoid migrating during those peak io moments.
+
+For the time being, a message "migration_threshold <#sectors>"
+can be used to set the maximum number of sectors being migrated,
+the default being 204800 sectors (or 100MB).
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second. This
+means the cache behaves like a physical disk that has a volatile write
+cache. If power is lost you may lose some recent writes. The metadata
+should always be consistent in spite of any crash.
+
+The 'dirty' state for a cache block changes far too frequently for us
+to keep updating it on the fly. So we treat it as a hint. In normal
+operation it will be written when the dm device is suspended. If the
+system crashes all cache blocks will be assumed dirty when restarted.
+
+Per-block policy hints
+----------------------
+
+Policy plug-ins can store a chunk of data per cache block. It's up to
+the policy how big this chunk is, but it should be kept small. Like the
+dirty flags this data is lost if there's a crash so a safe fallback
+value should always be possible.
+
+For instance, the 'mq' policy, which is currently the default policy,
+uses this facility to store the hit count of the cache blocks. If
+there's a crash this information will be lost, which means the cache
+may be less efficient until those hit counts are regenerated.
+
+Policy hints affect performance, not correctness.
+
+Policy messaging
+----------------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these. Device-mapper
+messages are used. Refer to cache-policies.txt.
+
+Discard bitset resolution
+-------------------------
+
+We can avoid copying data during migration if we know the block has
+been discarded. A prime example of this is when mkfs discards the
+whole block device. We store a bitset tracking the discard state of
+blocks. However, we allow this bitset to have a different block size
+from the cache blocks. This is because we need to track the discard
+state for all of the origin device (compare with the dirty bitset
+which is just for the smaller cache device).
+
+Target interface
+================
+
+Constructor
+-----------
+
+ cache <metadata dev> <cache dev> <origin dev> <block size>
+ <#feature args> [<feature arg>]*
+ <policy> <#policy args> [policy args]*
+
+ metadata dev : fast device holding the persistent metadata
+ cache dev : fast device holding cached data blocks
+ origin dev : slow device holding original data blocks
+ block size : cache unit size in sectors
+
+ #feature args : number of feature arguments passed
+ feature args : writethrough or passthrough (The default is writeback.)
+
+ policy : the replacement policy to use
+ #policy args : an even number of arguments corresponding to
+ key/value pairs passed to the policy
+ policy args : key/value pairs passed to the policy
+ E.g. 'sequential_threshold 1024'
+ See cache-policies.txt for details.
+
+Optional feature arguments are:
+ writethrough : write through caching that prohibits cache block
+ content from being different from origin block content.
+ Without this argument, the default behaviour is to write
+ back cache block contents later for performance reasons,
+ so they may differ from the corresponding origin blocks.
+
+ passthrough : a degraded mode useful for various cache coherency
+ situations (e.g., rolling back snapshots of
+ underlying storage). Reads and writes always go to
+ the origin. If a write goes to a cached origin
+ block, then the cache block is invalidated.
+ To enable passthrough mode the cache must be clean.
+
+A policy called 'default' is always registered. This is an alias for
+the policy we currently think is giving best all round performance.
+
+As the default policy could vary between kernels, if you are relying on
+the characteristics of a specific policy, always request it by name.
+
+Status
+------
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<cache block size> <#used cache blocks>/<#total cache blocks>
+<#read hits> <#read misses> <#write hits> <#write misses>
+<#demotions> <#promotions> <#dirty> <#features> <features>*
+<#core args> <core args>* <policy name> <#policy args> <policy args>*
+
+metadata block size : Fixed block size for each metadata block in
+ sectors
+#used metadata blocks : Number of metadata blocks used
+#total metadata blocks : Total number of metadata blocks
+cache block size : Configurable block size for the cache device
+ in sectors
+#used cache blocks : Number of blocks resident in the cache
+#total cache blocks : Total number of cache blocks
+#read hits : Number of times a READ bio has been mapped
+ to the cache
+#read misses : Number of times a READ bio has been mapped
+ to the origin
+#write hits : Number of times a WRITE bio has been mapped
+ to the cache
+#write misses : Number of times a WRITE bio has been
+ mapped to the origin
+#demotions : Number of times a block has been removed
+ from the cache
+#promotions : Number of times a block has been moved to
+ the cache
+#dirty : Number of blocks in the cache that differ
+ from the origin
+#feature args : Number of feature args to follow
+feature args : 'writethrough' (optional)
+#core args : Number of core arguments (must be even)
+core args : Key/value pairs for tuning the core
+ e.g. migration_threshold
+policy name : Name of the policy
+#policy args : Number of policy arguments to follow (must be even)
+policy args : Key/value pairs
+ e.g. sequential_threshold
+
+Messages
+--------
+
+Policies will have different tunables, specific to each one, so we
+need a generic way of getting and setting these. Device-mapper
+messages are used. (A sysfs interface would also be possible.)
+
+The message format is:
+
+ <key> <value>
+
+E.g.
+ dmsetup message my_cache 0 sequential_threshold 1024
+
+
+Invalidation is removing an entry from the cache without writing it
+back. Cache blocks can be invalidated via the invalidate_cblocks
+message, which takes an arbitrary number of cblock ranges. Each cblock
+range's end value is "one past the end", meaning 5-10 expresses a range
+of values from 5 to 9. Each cblock must be expressed as a decimal
+value, in the future a variant message that takes cblock ranges
+expressed in hexidecimal may be needed to better support efficient
+invalidation of larger caches. The cache must be in passthrough mode
+when invalidate_cblocks is used.
+
+ invalidate_cblocks [<cblock>|<cblock begin>-<cblock end>]*
+
+E.g.
+ dmsetup message my_cache 0 invalidate_cblocks 2345 3456-4567 5678-6789
+
+Examples
+========
+
+The test suite can be found here:
+
+https://github.com/jthornber/device-mapper-test-suite
+
+dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+ /dev/mapper/ssd /dev/mapper/origin 512 1 writeback default 0'
+dmsetup create my_cache --table '0 41943040 cache /dev/mapper/metadata \
+ /dev/mapper/ssd /dev/mapper/origin 1024 1 writeback \
+ mq 4 sequential_threshold 1024 random_threshold 8'
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt
new file mode 100644
index 000000000..15adc5535
--- /dev/null
+++ b/Documentation/device-mapper/delay.txt
@@ -0,0 +1,26 @@
+dm-delay
+========
+
+Device-Mapper's "delay" target delays reads and/or writes
+and maps them to different devices.
+
+Parameters:
+ <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
+
+With separate write parameters, the first set is only used for reads.
+Delays are specified in milliseconds.
+
+Example scripts
+===============
+[[
+#!/bin/sh
+# Create device delaying rw operation for 500ms
+echo "0 `blockdev --getsize $1` delay $1 0 500" | dmsetup create delayed
+]]
+
+[[
+#!/bin/sh
+# Create device delaying only write operation for 500ms and
+# splitting reads and writes to different devices $1 $2
+echo "0 `blockdev --getsize $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
+]]
diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt
new file mode 100644
index 000000000..692171fe9
--- /dev/null
+++ b/Documentation/device-mapper/dm-crypt.txt
@@ -0,0 +1,96 @@
+dm-crypt
+=========
+
+Device-Mapper's "crypt" target provides transparent encryption of block devices
+using the kernel crypto API.
+
+For a more detailed description of supported parameters see:
+https://gitlab.com/cryptsetup/cryptsetup/wikis/DMCrypt
+
+Parameters: <cipher> <key> <iv_offset> <device path> \
+ <offset> [<#opt_params> <opt_params>]
+
+<cipher>
+ Encryption cipher and an optional IV generation mode.
+ (In format cipher[:keycount]-chainmode-ivmode[:ivopts]).
+ Examples:
+ des
+ aes-cbc-essiv:sha256
+ twofish-ecb
+
+ /proc/crypto contains supported crypto modes
+
+<key>
+ Key used for encryption. It is encoded as a hexadecimal number.
+ You can only use key sizes that are valid for the selected cipher
+ in combination with the selected iv mode.
+ Note that for some iv modes the key string can contain additional
+ keys (for example IV seed) so the key contains more parts concatenated
+ into a single string.
+
+<keycount>
+ Multi-key compatibility mode. You can define <keycount> keys and
+ then sectors are encrypted according to their offsets (sector 0 uses key0;
+ sector 1 uses key1 etc.). <keycount> must be a power of two.
+
+<iv_offset>
+ The IV offset is a sector count that is added to the sector number
+ before creating the IV.
+
+<device path>
+ This is the device that is going to be used as backend and contains the
+ encrypted data. You can specify it as a path like /dev/xxx or a device
+ number <major>:<minor>.
+
+<offset>
+ Starting sector within the device where the encrypted data begins.
+
+<#opt_params>
+ Number of optional parameters. If there are no optional parameters,
+ the optional paramaters section can be skipped or #opt_params can be zero.
+ Otherwise #opt_params is the number of following arguments.
+
+ Example of optional parameters section:
+ 3 allow_discards same_cpu_crypt submit_from_crypt_cpus
+
+allow_discards
+ Block discard requests (a.k.a. TRIM) are passed through the crypt device.
+ The default is to ignore discard requests.
+
+ WARNING: Assess the specific security risks carefully before enabling this
+ option. For example, allowing discards on encrypted devices may lead to
+ the leak of information about the ciphertext device (filesystem type,
+ used space etc.) if the discarded blocks can be located easily on the
+ device later.
+
+same_cpu_crypt
+ Perform encryption using the same cpu that IO was submitted on.
+ The default is to use an unbound workqueue so that encryption work
+ is automatically balanced between available CPUs.
+
+submit_from_crypt_cpus
+ Disable offloading writes to a separate thread after encryption.
+ There are some situations where offloading write bios from the
+ encryption threads to a single thread degrades performance
+ significantly. The default is to offload write bios to the same
+ thread because it benefits CFQ to have writes submitted using the
+ same context.
+
+Example scripts
+===============
+LUKS (Linux Unified Key Setup) is now the preferred way to set up disk
+encryption with dm-crypt using the 'cryptsetup' utility, see
+https://gitlab.com/cryptsetup/cryptsetup
+
+[[
+#!/bin/sh
+# Create a crypt device using dmsetup
+dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0"
+]]
+
+[[
+#!/bin/sh
+# Create a crypt device using cryptsetup and LUKS header with default cipher
+cryptsetup luksFormat $1
+cryptsetup luksOpen $1 crypt1
+]]
diff --git a/Documentation/device-mapper/dm-flakey.txt b/Documentation/device-mapper/dm-flakey.txt
new file mode 100644
index 000000000..6ff5c2327
--- /dev/null
+++ b/Documentation/device-mapper/dm-flakey.txt
@@ -0,0 +1,53 @@
+dm-flakey
+=========
+
+This target is the same as the linear target except that it exhibits
+unreliable behaviour periodically. It's been found useful in simulating
+failing devices for testing purposes.
+
+Starting from the time the table is loaded, the device is available for
+<up interval> seconds, then exhibits unreliable behaviour for <down
+interval> seconds, and then this cycle repeats.
+
+Also, consider using this in combination with the dm-delay target too,
+which can delay reads and writes and/or send them to different
+underlying devices.
+
+Table parameters
+----------------
+ <dev path> <offset> <up interval> <down interval> \
+ [<num_features> [<feature arguments>]]
+
+Mandatory parameters:
+ <dev path>: Full pathname to the underlying block-device, or a
+ "major:minor" device-number.
+ <offset>: Starting sector within the device.
+ <up interval>: Number of seconds device is available.
+ <down interval>: Number of seconds device returns errors.
+
+Optional feature parameters:
+ If no feature parameters are present, during the periods of
+ unreliability, all I/O returns errors.
+
+ drop_writes:
+ All write I/O is silently ignored.
+ Read I/O is handled correctly.
+
+ corrupt_bio_byte <Nth_byte> <direction> <value> <flags>:
+ During <down interval>, replace <Nth_byte> of the data of
+ each matching bio with <value>.
+
+ <Nth_byte>: The offset of the byte to replace.
+ Counting starts at 1, to replace the first byte.
+ <direction>: Either 'r' to corrupt reads or 'w' to corrupt writes.
+ 'w' is incompatible with drop_writes.
+ <value>: The value (from 0-255) to write.
+ <flags>: Perform the replacement only if bio->bi_rw has all the
+ selected flags set.
+
+Examples:
+ corrupt_bio_byte 32 r 1 0
+ - replaces the 32nd byte of READ bios with the value 1
+
+ corrupt_bio_byte 224 w 0 32
+ - replaces the 224th byte of REQ_META (=32) bios with the value 0
diff --git a/Documentation/device-mapper/dm-io.txt b/Documentation/device-mapper/dm-io.txt
new file mode 100644
index 000000000..3b5d9a52c
--- /dev/null
+++ b/Documentation/device-mapper/dm-io.txt
@@ -0,0 +1,75 @@
+dm-io
+=====
+
+Dm-io provides synchronous and asynchronous I/O services. There are three
+types of I/O services available, and each type has a sync and an async
+version.
+
+The user must set up an io_region structure to describe the desired location
+of the I/O. Each io_region indicates a block-device along with the starting
+sector and size of the region.
+
+ struct io_region {
+ struct block_device *bdev;
+ sector_t sector;
+ sector_t count;
+ };
+
+Dm-io can read from one io_region or write to one or more io_regions. Writes
+to multiple regions are specified by an array of io_region structures.
+
+The first I/O service type takes a list of memory pages as the data buffer for
+the I/O, along with an offset into the first page.
+
+ struct page_list {
+ struct page_list *next;
+ struct page *page;
+ };
+
+ int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
+ struct page_list *pl, unsigned int offset,
+ unsigned long *error_bits);
+ int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
+ struct page_list *pl, unsigned int offset,
+ io_notify_fn fn, void *context);
+
+The second I/O service type takes an array of bio vectors as the data buffer
+for the I/O. This service can be handy if the caller has a pre-assembled bio,
+but wants to direct different portions of the bio to different devices.
+
+ int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where,
+ int rw, struct bio_vec *bvec,
+ unsigned long *error_bits);
+ int dm_io_async_bvec(unsigned int num_regions, struct io_region *where,
+ int rw, struct bio_vec *bvec,
+ io_notify_fn fn, void *context);
+
+The third I/O service type takes a pointer to a vmalloc'd memory buffer as the
+data buffer for the I/O. This service can be handy if the caller needs to do
+I/O to a large region but doesn't want to allocate a large number of individual
+memory pages.
+
+ int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
+ void *data, unsigned long *error_bits);
+ int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
+ void *data, io_notify_fn fn, void *context);
+
+Callers of the asynchronous I/O services must include the name of a completion
+callback routine and a pointer to some context data for the I/O.
+
+ typedef void (*io_notify_fn)(unsigned long error, void *context);
+
+The "error" parameter in this callback, as well as the "*error" parameter in
+all of the synchronous versions, is a bitset (instead of a simple error value).
+In the case of an write-I/O to multiple regions, this bitset allows dm-io to
+indicate success or failure on each individual region.
+
+Before using any of the dm-io services, the user should call dm_io_get()
+and specify the number of pages they expect to perform I/O on concurrently.
+Dm-io will attempt to resize its mempool to make sure enough pages are
+always available in order to avoid unnecessary waiting while performing I/O.
+
+When the user is finished using the dm-io services, they should call
+dm_io_put() and specify the same number of pages that were given on the
+dm_io_get() call.
+
diff --git a/Documentation/device-mapper/dm-log.txt b/Documentation/device-mapper/dm-log.txt
new file mode 100644
index 000000000..c155ac569
--- /dev/null
+++ b/Documentation/device-mapper/dm-log.txt
@@ -0,0 +1,54 @@
+Device-Mapper Logging
+=====================
+The device-mapper logging code is used by some of the device-mapper
+RAID targets to track regions of the disk that are not consistent.
+A region (or portion of the address space) of the disk may be
+inconsistent because a RAID stripe is currently being operated on or
+a machine died while the region was being altered. In the case of
+mirrors, a region would be considered dirty/inconsistent while you
+are writing to it because the writes need to be replicated for all
+the legs of the mirror and may not reach the legs at the same time.
+Once all writes are complete, the region is considered clean again.
+
+There is a generic logging interface that the device-mapper RAID
+implementations use to perform logging operations (see
+dm_dirty_log_type in include/linux/dm-dirty-log.h). Various different
+logging implementations are available and provide different
+capabilities. The list includes:
+
+Type Files
+==== =====
+disk drivers/md/dm-log.c
+core drivers/md/dm-log.c
+userspace drivers/md/dm-log-userspace* include/linux/dm-log-userspace.h
+
+The "disk" log type
+-------------------
+This log implementation commits the log state to disk. This way, the
+logging state survives reboots/crashes.
+
+The "core" log type
+-------------------
+This log implementation keeps the log state in memory. The log state
+will not survive a reboot or crash, but there may be a small boost in
+performance. This method can also be used if no storage device is
+available for storing log state.
+
+The "userspace" log type
+------------------------
+This log type simply provides a way to export the log API to userspace,
+so log implementations can be done there. This is done by forwarding most
+logging requests to userspace, where a daemon receives and processes the
+request.
+
+The structure used for communication between kernel and userspace are
+located in include/linux/dm-log-userspace.h. Due to the frequency,
+diversity, and 2-way communication nature of the exchanges between
+kernel and userspace, 'connector' is used as the interface for
+communication.
+
+There are currently two userspace log implementations that leverage this
+framework - "clustered-disk" and "clustered-core". These implementations
+provide a cluster-coherent log for shared-storage. Device-mapper mirroring
+can be used in a shared-storage environment when the cluster log implementations
+are employed.
diff --git a/Documentation/device-mapper/dm-queue-length.txt b/Documentation/device-mapper/dm-queue-length.txt
new file mode 100644
index 000000000..f4db25621
--- /dev/null
+++ b/Documentation/device-mapper/dm-queue-length.txt
@@ -0,0 +1,39 @@
+dm-queue-length
+===============
+
+dm-queue-length is a path selector module for device-mapper targets,
+which selects a path with the least number of in-flight I/Os.
+The path selector name is 'queue-length'.
+
+Table parameters for each path: [<repeat_count>]
+ <repeat_count>: The number of I/Os to dispatch using the selected
+ path before switching to the next path.
+ If not given, internal default is used. To check
+ the default value, see the activated table.
+
+Status for each path: <status> <fail-count> <in-flight>
+ <status>: 'A' if the path is active, 'F' if the path is failed.
+ <fail-count>: The number of path failures.
+ <in-flight>: The number of in-flight I/Os on the path.
+
+
+Algorithm
+=========
+
+dm-queue-length increments/decrements 'in-flight' when an I/O is
+dispatched/completed respectively.
+dm-queue-length selects a path with the minimum 'in-flight'.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128.
+
+# echo "0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128" \
+ dmsetup create test
+#
+# dmsetup table
+test: 0 10 multipath 0 0 1 1 queue-length 0 2 1 8:0 128 8:16 128
+#
+# dmsetup status
+test: 0 10 multipath 2 0 0 0 1 1 E 0 2 1 8:0 A 0 0 8:16 A 0 0
diff --git a/Documentation/device-mapper/dm-raid.txt b/Documentation/device-mapper/dm-raid.txt
new file mode 100644
index 000000000..ef8ba9fa5
--- /dev/null
+++ b/Documentation/device-mapper/dm-raid.txt
@@ -0,0 +1,226 @@
+dm-raid
+=======
+
+The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
+It allows the MD RAID drivers to be accessed using a device-mapper
+interface.
+
+
+Mapping Table Interface
+-----------------------
+The target is named "raid" and it accepts the following parameters:
+
+ <raid_type> <#raid_params> <raid_params> \
+ <#raid_devs> <metadata_dev0> <dev0> [.. <metadata_devN> <devN>]
+
+<raid_type>:
+ raid1 RAID1 mirroring
+ raid4 RAID4 dedicated parity disk
+ raid5_la RAID5 left asymmetric
+ - rotating parity 0 with data continuation
+ raid5_ra RAID5 right asymmetric
+ - rotating parity N with data continuation
+ raid5_ls RAID5 left symmetric
+ - rotating parity 0 with data restart
+ raid5_rs RAID5 right symmetric
+ - rotating parity N with data restart
+ raid6_zr RAID6 zero restart
+ - rotating parity zero (left-to-right) with data restart
+ raid6_nr RAID6 N restart
+ - rotating parity N (right-to-left) with data restart
+ raid6_nc RAID6 N continue
+ - rotating parity N (right-to-left) with data continuation
+ raid10 Various RAID10 inspired algorithms chosen by additional params
+ - RAID10: Striped Mirrors (aka 'Striping on top of mirrors')
+ - RAID1E: Integrated Adjacent Stripe Mirroring
+ - RAID1E: Integrated Offset Stripe Mirroring
+ - and other similar RAID10 variants
+
+ Reference: Chapter 4 of
+ http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf
+
+<#raid_params>: The number of parameters that follow.
+
+<raid_params> consists of
+ Mandatory parameters:
+ <chunk_size>: Chunk size in sectors. This parameter is often known as
+ "stripe size". It is the only mandatory parameter and
+ is placed first.
+
+ followed by optional parameters (in any order):
+ [sync|nosync] Force or prevent RAID initialization.
+
+ [rebuild <idx>] Rebuild drive number 'idx' (first drive is 0).
+
+ [daemon_sleep <ms>]
+ Interval between runs of the bitmap daemon that
+ clear bits. A longer interval means less bitmap I/O but
+ resyncing after a failure is likely to take longer.
+
+ [min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ [max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
+ [write_mostly <idx>] Mark drive index 'idx' write-mostly.
+ [max_write_behind <sectors>] See '--write-behind=' (man mdadm)
+ [stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only)
+ [region_size <sectors>]
+ The region_size multiplied by the number of regions is the
+ logical size of the array. The bitmap records the device
+ synchronisation state for each region.
+
+ [raid10_copies <# copies>]
+ [raid10_format <near|far|offset>]
+ These two options are used to alter the default layout of
+ a RAID10 configuration. The number of copies is can be
+ specified, but the default is 2. There are also three
+ variations to how the copies are laid down - the default
+ is "near". Near copies are what most people think of with
+ respect to mirroring. If these options are left unspecified,
+ or 'raid10_copies 2' and/or 'raid10_format near' are given,
+ then the layouts for 2, 3 and 4 devices are:
+ 2 drives 3 drives 4 drives
+ -------- ---------- --------------
+ A1 A1 A1 A1 A2 A1 A1 A2 A2
+ A2 A2 A2 A3 A3 A3 A3 A4 A4
+ A3 A3 A4 A4 A5 A5 A5 A6 A6
+ A4 A4 A5 A6 A6 A7 A7 A8 A8
+ .. .. .. .. .. .. .. .. ..
+ The 2-device layout is equivalent 2-way RAID1. The 4-device
+ layout is what a traditional RAID10 would look like. The
+ 3-device layout is what might be called a 'RAID1E - Integrated
+ Adjacent Stripe Mirroring'.
+
+ If 'raid10_copies 2' and 'raid10_format far', then the layouts
+ for 2, 3 and 4 devices are:
+ 2 drives 3 drives 4 drives
+ -------- -------------- --------------------
+ A1 A2 A1 A2 A3 A1 A2 A3 A4
+ A3 A4 A4 A5 A6 A5 A6 A7 A8
+ A5 A6 A7 A8 A9 A9 A10 A11 A12
+ .. .. .. .. .. .. .. .. ..
+ A2 A1 A3 A1 A2 A2 A1 A4 A3
+ A4 A3 A6 A4 A5 A6 A5 A8 A7
+ A6 A5 A9 A7 A8 A10 A9 A12 A11
+ .. .. .. .. .. .. .. .. ..
+
+ If 'raid10_copies 2' and 'raid10_format offset', then the
+ layouts for 2, 3 and 4 devices are:
+ 2 drives 3 drives 4 drives
+ -------- ------------ -----------------
+ A1 A2 A1 A2 A3 A1 A2 A3 A4
+ A2 A1 A3 A1 A2 A2 A1 A4 A3
+ A3 A4 A4 A5 A6 A5 A6 A7 A8
+ A4 A3 A6 A4 A5 A6 A5 A8 A7
+ A5 A6 A7 A8 A9 A9 A10 A11 A12
+ A6 A5 A9 A7 A8 A10 A9 A12 A11
+ .. .. .. .. .. .. .. .. ..
+ Here we see layouts closely akin to 'RAID1E - Integrated
+ Offset Stripe Mirroring'.
+
+<#raid_devs>: The number of devices composing the array.
+ Each device consists of two entries. The first is the device
+ containing the metadata (if any); the second is the one containing the
+ data.
+
+ If a drive has failed or is missing at creation time, a '-' can be
+ given for both the metadata and data drives for a given position.
+
+
+Example Tables
+--------------
+# RAID4 - 4 data drives, 1 parity (no metadata devices)
+# No metadata devices specified to hold superblock/bitmap info
+# Chunk size of 1MiB
+# (Lines separated for easy reading)
+
+0 1960893648 raid \
+ raid4 1 2048 \
+ 5 - 8:17 - 8:33 - 8:49 - 8:65 - 8:81
+
+# RAID4 - 4 data drives, 1 parity (with metadata devices)
+# Chunk size of 1MiB, force RAID initialization,
+# min recovery rate at 20 kiB/sec/disk
+
+0 1960893648 raid \
+ raid4 4 2048 sync min_recovery_rate 20 \
+ 5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
+
+
+Status Output
+-------------
+'dmsetup table' displays the table used to construct the mapping.
+The optional parameters are always printed in the order listed
+above with "sync" or "nosync" always output ahead of the other
+arguments, regardless of the order used when originally loading the table.
+Arguments that can be repeated are ordered by value.
+
+
+'dmsetup status' yields information on the state and health of the array.
+The output is as follows (normally a single line, but expanded here for
+clarity):
+1: <s> <l> raid \
+2: <raid_type> <#devices> <health_chars> \
+3: <sync_ratio> <sync_action> <mismatch_cnt>
+
+Line 1 is the standard output produced by device-mapper.
+Line 2 & 3 are produced by the raid target and are best explained by example:
+ 0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
+Here we can see the RAID type is raid4, there are 5 devices - all of
+which are 'A'live, and the array is 2/490221568 complete with its initial
+recovery. Here is a fuller description of the individual fields:
+ <raid_type> Same as the <raid_type> used to create the array.
+ <health_chars> One char for each device, indicating: 'A' = alive and
+ in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
+ <sync_ratio> The ratio indicating how much of the array has undergone
+ the process described by 'sync_action'. If the
+ 'sync_action' is "check" or "repair", then the process
+ of "resync" or "recover" can be considered complete.
+ <sync_action> One of the following possible states:
+ idle - No synchronization action is being performed.
+ frozen - The current action has been halted.
+ resync - Array is undergoing its initial synchronization
+ or is resynchronizing after an unclean shutdown
+ (possibly aided by a bitmap).
+ recover - A device in the array is being rebuilt or
+ replaced.
+ check - A user-initiated full check of the array is
+ being performed. All blocks are read and
+ checked for consistency. The number of
+ discrepancies found are recorded in
+ <mismatch_cnt>. No changes are made to the
+ array by this action.
+ repair - The same as "check", but discrepancies are
+ corrected.
+ reshape - The array is undergoing a reshape.
+ <mismatch_cnt> The number of discrepancies found between mirror copies
+ in RAID1/10 or wrong parity values found in RAID4/5/6.
+ This value is valid only after a "check" of the array
+ is performed. A healthy array has a 'mismatch_cnt' of 0.
+
+Message Interface
+-----------------
+The dm-raid target will accept certain actions through the 'message' interface.
+('man dmsetup' for more information on the message interface.) These actions
+include:
+ "idle" - Halt the current sync action.
+ "frozen" - Freeze the current sync action.
+ "resync" - Initiate/continue a resync.
+ "recover"- Initiate/continue a recover process.
+ "check" - Initiate a check (i.e. a "scrub") of the array.
+ "repair" - Initiate a repair of the array.
+ "reshape"- Currently unsupported (-EINVAL).
+
+Version History
+---------------
+1.0.0 Initial version. Support for RAID 4/5/6
+1.1.0 Added support for RAID 1
+1.2.0 Handle creation of arrays that contain failed devices.
+1.3.0 Added support for RAID 10
+1.3.1 Allow device replacement/rebuild for RAID 10
+1.3.2 Fix/improve redundancy checking for RAID10
+1.4.0 Non-functional change. Removes arg from mapping function.
+1.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
+1.4.2 Add RAID10 "far" and "offset" algorithm support.
+1.5.0 Add message interface to allow manipulation of the sync_action.
+ New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
+1.5.1 Add ability to restore transiently failed devices on resume.
+1.5.2 'mismatch_cnt' is zero unless [last_]sync_action is "check".
diff --git a/Documentation/device-mapper/dm-service-time.txt b/Documentation/device-mapper/dm-service-time.txt
new file mode 100644
index 000000000..fb1d4a0cf
--- /dev/null
+++ b/Documentation/device-mapper/dm-service-time.txt
@@ -0,0 +1,91 @@
+dm-service-time
+===============
+
+dm-service-time is a path selector module for device-mapper targets,
+which selects a path with the shortest estimated service time for
+the incoming I/O.
+
+The service time for each path is estimated by dividing the total size
+of in-flight I/Os on a path with the performance value of the path.
+The performance value is a relative throughput value among all paths
+in a path-group, and it can be specified as a table argument.
+
+The path selector name is 'service-time'.
+
+Table parameters for each path: [<repeat_count> [<relative_throughput>]]
+ <repeat_count>: The number of I/Os to dispatch using the selected
+ path before switching to the next path.
+ If not given, internal default is used. To check
+ the default value, see the activated table.
+ <relative_throughput>: The relative throughput value of the path
+ among all paths in the path-group.
+ The valid range is 0-100.
+ If not given, minimum value '1' is used.
+ If '0' is given, the path isn't selected while
+ other paths having a positive value are available.
+
+Status for each path: <status> <fail-count> <in-flight-size> \
+ <relative_throughput>
+ <status>: 'A' if the path is active, 'F' if the path is failed.
+ <fail-count>: The number of path failures.
+ <in-flight-size>: The size of in-flight I/Os on the path.
+ <relative_throughput>: The relative throughput value of the path
+ among all paths in the path-group.
+
+
+Algorithm
+=========
+
+dm-service-time adds the I/O size to 'in-flight-size' when the I/O is
+dispatched and subtracts when completed.
+Basically, dm-service-time selects a path having minimum service time
+which is calculated by:
+
+ ('in-flight-size' + 'size-of-incoming-io') / 'relative_throughput'
+
+However, some optimizations below are used to reduce the calculation
+as much as possible.
+
+ 1. If the paths have the same 'relative_throughput', skip
+ the division and just compare the 'in-flight-size'.
+
+ 2. If the paths have the same 'in-flight-size', skip the division
+ and just compare the 'relative_throughput'.
+
+ 3. If some paths have non-zero 'relative_throughput' and others
+ have zero 'relative_throughput', ignore those paths with zero
+ 'relative_throughput'.
+
+If such optimizations can't be applied, calculate service time, and
+compare service time.
+If calculated service time is equal, the path having maximum
+'relative_throughput' may be better. So compare 'relative_throughput'
+then.
+
+
+Examples
+========
+In case that 2 paths (sda and sdb) are used with repeat_count == 128
+and sda has an average throughput 1GB/s and sdb has 4GB/s,
+'relative_throughput' value may be '1' for sda and '4' for sdb.
+
+# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4" \
+ dmsetup create test
+#
+# dmsetup table
+test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 1 8:16 128 4
+#
+# dmsetup status
+test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 1 8:16 A 0 0 4
+
+
+Or '2' for sda and '8' for sdb would be also true.
+
+# echo "0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8" \
+ dmsetup create test
+#
+# dmsetup table
+test: 0 10 multipath 0 0 1 1 service-time 0 2 2 8:0 128 2 8:16 128 8
+#
+# dmsetup status
+test: 0 10 multipath 2 0 0 0 1 1 E 0 2 2 8:0 A 0 0 2 8:16 A 0 0 8
diff --git a/Documentation/device-mapper/dm-uevent.txt b/Documentation/device-mapper/dm-uevent.txt
new file mode 100644
index 000000000..07edbd85c
--- /dev/null
+++ b/Documentation/device-mapper/dm-uevent.txt
@@ -0,0 +1,97 @@
+The device-mapper uevent code adds the capability to device-mapper to create
+and send kobject uevents (uevents). Previously device-mapper events were only
+available through the ioctl interface. The advantage of the uevents interface
+is the event contains environment attributes providing increased context for
+the event avoiding the need to query the state of the device-mapper device after
+the event is received.
+
+There are two functions currently for device-mapper events. The first function
+listed creates the event and the second function sends the event(s).
+
+void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti,
+ const char *path, unsigned nr_valid_paths)
+
+void dm_send_uevents(struct list_head *events, struct kobject *kobj)
+
+
+The variables added to the uevent environment are:
+
+Variable Name: DM_TARGET
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Name of device-mapper target that generated the event.
+
+Variable Name: DM_ACTION
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description:
+Value: Device-mapper specific action that caused the uevent action.
+ PATH_FAILED - A path has failed.
+ PATH_REINSTATED - A path has been reinstated.
+
+Variable Name: DM_SEQNUM
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description: A sequence number for this specific device-mapper device.
+Value: Valid unsigned integer range.
+
+Variable Name: DM_PATH
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Major and minor number of the path device pertaining to this
+event.
+Value: Path name in the form of "Major:Minor"
+
+Variable Name: DM_NR_VALID_PATHS
+Uevent Action(s): KOBJ_CHANGE
+Type: unsigned integer
+Description:
+Value: Valid unsigned integer range.
+
+Variable Name: DM_NAME
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: Name of the device-mapper device.
+Value: Name
+
+Variable Name: DM_UUID
+Uevent Action(s): KOBJ_CHANGE
+Type: string
+Description: UUID of the device-mapper device.
+Value: UUID. (Empty string if there isn't one.)
+
+An example of the uevents generated as captured by udevmonitor is shown
+below.
+
+1.) Path failure.
+UEVENT[1192521009.711215] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_FAILED
+DM_SEQNUM=1
+DM_PATH=8:32
+DM_NR_VALID_PATHS=0
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1130
+
+2.) Path reinstate.
+UEVENT[1192521132.989927] change@/block/dm-3
+ACTION=change
+DEVPATH=/block/dm-3
+SUBSYSTEM=block
+DM_TARGET=multipath
+DM_ACTION=PATH_REINSTATED
+DM_SEQNUM=2
+DM_PATH=8:32
+DM_NR_VALID_PATHS=1
+DM_NAME=mpath2
+DM_UUID=mpath-35333333000002328
+MINOR=3
+MAJOR=253
+SEQNUM=1131
diff --git a/Documentation/device-mapper/era.txt b/Documentation/device-mapper/era.txt
new file mode 100644
index 000000000..3c6d01be3
--- /dev/null
+++ b/Documentation/device-mapper/era.txt
@@ -0,0 +1,108 @@
+Introduction
+============
+
+dm-era is a target that behaves similar to the linear target. In
+addition it keeps track of which blocks were written within a user
+defined period of time called an 'era'. Each era target instance
+maintains the current era as a monotonically increasing 32-bit
+counter.
+
+Use cases include tracking changed blocks for backup software, and
+partially invalidating the contents of a cache to restore cache
+coherency after rolling back a vendor snapshot.
+
+Constructor
+===========
+
+ era <metadata dev> <origin dev> <block size>
+
+ metadata dev : fast device holding the persistent metadata
+ origin dev : device holding data blocks that may change
+ block size : block size of origin data device, granularity that is
+ tracked by the target
+
+Messages
+========
+
+None of the dm messages take any arguments.
+
+checkpoint
+----------
+
+Possibly move to a new era. You shouldn't assume the era has
+incremented. After sending this message, you should check the
+current era via the status line.
+
+take_metadata_snap
+------------------
+
+Create a clone of the metadata, to allow a userland process to read it.
+
+drop_metadata_snap
+------------------
+
+Drop the metadata snapshot.
+
+Status
+======
+
+<metadata block size> <#used metadata blocks>/<#total metadata blocks>
+<current era> <held metadata root | '-'>
+
+metadata block size : Fixed block size for each metadata block in
+ sectors
+#used metadata blocks : Number of metadata blocks used
+#total metadata blocks : Total number of metadata blocks
+current era : The current era
+held metadata root : The location, in blocks, of the metadata root
+ that has been 'held' for userspace read
+ access. '-' indicates there is no held root
+
+Detailed use case
+=================
+
+The scenario of invalidating a cache when rolling back a vendor
+snapshot was the primary use case when developing this target:
+
+Taking a vendor snapshot
+------------------------
+
+- Send a checkpoint message to the era target
+- Make a note of the current era in its status line
+- Take vendor snapshot (the era and snapshot should be forever
+ associated now).
+
+Rolling back to an vendor snapshot
+----------------------------------
+
+- Cache enters passthrough mode (see: dm-cache's docs in cache.txt)
+- Rollback vendor storage
+- Take metadata snapshot
+- Ascertain which blocks have been written since the snapshot was taken
+ by checking each block's era
+- Invalidate those blocks in the caching software
+- Cache returns to writeback/writethrough mode
+
+Memory usage
+============
+
+The target uses a bitset to record writes in the current era. It also
+has a spare bitset ready for switching over to a new era. Other than
+that it uses a few 4k blocks for updating metadata.
+
+ (4 * nr_blocks) bytes + buffers
+
+Resilience
+==========
+
+Metadata is updated on disk before a write to a previously unwritten
+block is performed. As such dm-era should not be effected by a hard
+crash such as power failure.
+
+Userland tools
+==============
+
+Userland tools are found in the increasingly poorly named
+thin-provisioning-tools project:
+
+ https://github.com/jthornber/thin-provisioning-tools
diff --git a/Documentation/device-mapper/kcopyd.txt b/Documentation/device-mapper/kcopyd.txt
new file mode 100644
index 000000000..820382c4c
--- /dev/null
+++ b/Documentation/device-mapper/kcopyd.txt
@@ -0,0 +1,47 @@
+kcopyd
+======
+
+Kcopyd provides the ability to copy a range of sectors from one block-device
+to one or more other block-devices, with an asynchronous completion
+notification. It is used by dm-snapshot and dm-mirror.
+
+Users of kcopyd must first create a client and indicate how many memory pages
+to set aside for their copy jobs. This is done with a call to
+kcopyd_client_create().
+
+ int kcopyd_client_create(unsigned int num_pages,
+ struct kcopyd_client **result);
+
+To start a copy job, the user must set up io_region structures to describe
+the source and destinations of the copy. Each io_region indicates a
+block-device along with the starting sector and size of the region. The source
+of the copy is given as one io_region structure, and the destinations of the
+copy are given as an array of io_region structures.
+
+ struct io_region {
+ struct block_device *bdev;
+ sector_t sector;
+ sector_t count;
+ };
+
+To start the copy, the user calls kcopyd_copy(), passing in the client
+pointer, pointers to the source and destination io_regions, the name of a
+completion callback routine, and a pointer to some context data for the copy.
+
+ int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
+ unsigned int num_dests, struct io_region *dests,
+ unsigned int flags, kcopyd_notify_fn fn, void *context);
+
+ typedef void (*kcopyd_notify_fn)(int read_err, unsigned int write_err,
+ void *context);
+
+When the copy completes, kcopyd will call the user's completion routine,
+passing back the user's context pointer. It will also indicate if a read or
+write error occurred during the copy.
+
+When a user is done with all their copy jobs, they should call
+kcopyd_client_destroy() to delete the kcopyd client, which will release the
+associated memory pages.
+
+ void kcopyd_client_destroy(struct kcopyd_client *kc);
+
diff --git a/Documentation/device-mapper/linear.txt b/Documentation/device-mapper/linear.txt
new file mode 100644
index 000000000..d5307d380
--- /dev/null
+++ b/Documentation/device-mapper/linear.txt
@@ -0,0 +1,61 @@
+dm-linear
+=========
+
+Device-Mapper's "linear" target maps a linear range of the Device-Mapper
+device onto a linear range of another device. This is the basic building
+block of logical volume managers.
+
+Parameters: <dev path> <offset>
+ <dev path>: Full pathname to the underlying block-device, or a
+ "major:minor" device-number.
+ <offset>: Starting sector within the device.
+
+
+Example scripts
+===============
+[[
+#!/bin/sh
+# Create an identity mapping for a device
+echo "0 `blockdev --getsize $1` linear $1 0" | dmsetup create identity
+]]
+
+
+[[
+#!/bin/sh
+# Join 2 devices together
+size1=`blockdev --getsize $1`
+size2=`blockdev --getsize $2`
+echo "0 $size1 linear $1 0
+$size1 $size2 linear $2 0" | dmsetup create joined
+]]
+
+
+[[
+#!/usr/bin/perl -w
+# Split a device into 4M chunks and then join them together in reverse order.
+
+my $name = "reverse";
+my $extent_size = 4 * 1024 * 2;
+my $dev = $ARGV[0];
+my $table = "";
+my $count = 0;
+
+if (!defined($dev)) {
+ die("Please specify a device.\n");
+}
+
+my $dev_size = `blockdev --getsize $dev`;
+my $extents = int($dev_size / $extent_size) -
+ (($dev_size % $extent_size) ? 1 : 0);
+
+while ($extents > 0) {
+ my $this_start = $count * $extent_size;
+ $extents--;
+ $count++;
+ my $this_offset = $extents * $extent_size;
+
+ $table .= "$this_start $extent_size linear $dev $this_offset\n";
+}
+
+`echo \"$table\" | dmsetup create $name`;
+]]
diff --git a/Documentation/device-mapper/log-writes.txt b/Documentation/device-mapper/log-writes.txt
new file mode 100644
index 000000000..c10f30c9b
--- /dev/null
+++ b/Documentation/device-mapper/log-writes.txt
@@ -0,0 +1,140 @@
+dm-log-writes
+=============
+
+This target takes 2 devices, one to pass all IO to normally, and one to log all
+of the write operations to. This is intended for file system developers wishing
+to verify the integrity of metadata or data as the file system is written to.
+There is a log_write_entry written for every WRITE request and the target is
+able to take arbitrary data from userspace to insert into the log. The data
+that is in the WRITE requests is copied into the log to make the replay happen
+exactly as it happened originally.
+
+Log Ordering
+============
+
+We log things in order of completion once we are sure the write is no longer in
+cache. This means that normal WRITE requests are not actually logged until the
+next REQ_FLUSH request. This is to make it easier for userspace to replay the
+log in a way that correlates to what is on disk and not what is in cache, to
+make it easier to detect improper waiting/flushing.
+
+This works by attaching all WRITE requests to a list once the write completes.
+Once we see a REQ_FLUSH request we splice this list onto the request and once
+the FLUSH request completes we log all of the WRITEs and then the FLUSH. Only
+completed WRITEs, at the time the REQ_FLUSH is issued, are added in order to
+simulate the worst case scenario with regard to power failures. Consider the
+following example (W means write, C means complete):
+
+W1,W2,W3,C3,C2,Wflush,C1,Cflush
+
+The log would show the following
+
+W3,W2,flush,W1....
+
+Again this is to simulate what is actually on disk, this allows us to detect
+cases where a power failure at a particular point in time would create an
+inconsistent file system.
+
+Any REQ_FUA requests bypass this flushing mechanism and are logged as soon as
+they complete as those requests will obviously bypass the device cache.
+
+Any REQ_DISCARD requests are treated like WRITE requests. Otherwise we would
+have all the DISCARD requests, and then the WRITE requests and then the FLUSH
+request. Consider the following example:
+
+WRITE block 1, DISCARD block 1, FLUSH
+
+If we logged DISCARD when it completed, the replay would look like this
+
+DISCARD 1, WRITE 1, FLUSH
+
+which isn't quite what happened and wouldn't be caught during the log replay.
+
+Target interface
+================
+
+i) Constructor
+
+ log-writes <dev_path> <log_dev_path>
+
+ dev_path : Device that all of the IO will go to normally.
+ log_dev_path : Device where the log entries are written to.
+
+ii) Status
+
+ <#logged entries> <highest allocated sector>
+
+ #logged entries : Number of logged entries
+ highest allocated sector : Highest allocated sector
+
+iii) Messages
+
+ mark <description>
+
+ You can use a dmsetup message to set an arbitrary mark in a log.
+ For example say you want to fsck a file system after every
+ write, but first you need to replay up to the mkfs to make sure
+ we're fsck'ing something reasonable, you would do something like
+ this:
+
+ mkfs.btrfs -f /dev/mapper/log
+ dmsetup message log 0 mark mkfs
+ <run test>
+
+ This would allow you to replay the log up to the mkfs mark and
+ then replay from that point on doing the fsck check in the
+ interval that you want.
+
+ Every log has a mark at the end labeled "dm-log-writes-end".
+
+Userspace component
+===================
+
+There is a userspace tool that will replay the log for you in various ways.
+It can be found here: https://github.com/josefbacik/log-writes
+
+Example usage
+=============
+
+Say you want to test fsync on your file system. You would do something like
+this:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<some test that does fsync at the end>
+dmsetup message log 0 mark fsync
+md5sum /mnt/btrfs-test/foo
+umount /mnt/btrfs-test
+
+dmsetup remove log
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark fsync
+mount /dev/sdb /mnt/btrfs-test
+md5sum /mnt/btrfs-test/foo
+<verify md5sum's are correct>
+
+Another option is to do a complicated file system operation and verify the file
+system is consistent during the entire operation. You could do this with:
+
+TABLE="0 $(blockdev --getsz /dev/sdb) log-writes /dev/sdb /dev/sdc"
+dmsetup create log --table "$TABLE"
+mkfs.btrfs -f /dev/mapper/log
+dmsetup message log 0 mark mkfs
+
+mount /dev/mapper/log /mnt/btrfs-test
+<fsstress to dirty the fs>
+btrfs filesystem balance /mnt/btrfs-test
+umount /mnt/btrfs-test
+dmsetup remove log
+
+replay-log --log /dev/sdc --replay /dev/sdb --end-mark mkfs
+btrfsck /dev/sdb
+replay-log --log /dev/sdc --replay /dev/sdb --start-mark mkfs \
+ --fsck "btrfsck /dev/sdb" --check fua
+
+And that will replay the log until it sees a FUA request, run the fsck command
+and if the fsck passes it will replay to the next FUA, until it is completed or
+the fsck command exists abnormally.
diff --git a/Documentation/device-mapper/persistent-data.txt b/Documentation/device-mapper/persistent-data.txt
new file mode 100644
index 000000000..a333bcb3a
--- /dev/null
+++ b/Documentation/device-mapper/persistent-data.txt
@@ -0,0 +1,84 @@
+Introduction
+============
+
+The more-sophisticated device-mapper targets require complex metadata
+that is managed in kernel. In late 2010 we were seeing that various
+different targets were rolling their own data structures, for example:
+
+- Mikulas Patocka's multisnap implementation
+- Heinz Mauelshagen's thin provisioning target
+- Another btree-based caching target posted to dm-devel
+- Another multi-snapshot target based on a design of Daniel Phillips
+
+Maintaining these data structures takes a lot of work, so if possible
+we'd like to reduce the number.
+
+The persistent-data library is an attempt to provide a re-usable
+framework for people who want to store metadata in device-mapper
+targets. It's currently used by the thin-provisioning target and an
+upcoming hierarchical storage target.
+
+Overview
+========
+
+The main documentation is in the header files which can all be found
+under drivers/md/persistent-data.
+
+The block manager
+-----------------
+
+dm-block-manager.[hc]
+
+This provides access to the data on disk in fixed sized-blocks. There
+is a read/write locking interface to prevent concurrent accesses, and
+keep data that is being used in the cache.
+
+Clients of persistent-data are unlikely to use this directly.
+
+The transaction manager
+-----------------------
+
+dm-transaction-manager.[hc]
+
+This restricts access to blocks and enforces copy-on-write semantics.
+The only way you can get hold of a writable block through the
+transaction manager is by shadowing an existing block (ie. doing
+copy-on-write) or allocating a fresh one. Shadowing is elided within
+the same transaction so performance is reasonable. The commit method
+ensures that all data is flushed before it writes the superblock.
+On power failure your metadata will be as it was when last committed.
+
+The Space Maps
+--------------
+
+dm-space-map.h
+dm-space-map-metadata.[hc]
+dm-space-map-disk.[hc]
+
+On-disk data structures that keep track of reference counts of blocks.
+Also acts as the allocator of new blocks. Currently two
+implementations: a simpler one for managing blocks on a different
+device (eg. thinly-provisioned data blocks); and one for managing
+the metadata space. The latter is complicated by the need to store
+its own data within the space it's managing.
+
+The data structures
+-------------------
+
+dm-btree.[hc]
+dm-btree-remove.c
+dm-btree-spine.c
+dm-btree-internal.h
+
+Currently there is only one data structure, a hierarchical btree.
+There are plans to add more. For example, something with an
+array-like interface would see a lot of use.
+
+The btree is 'hierarchical' in that you can define it to be composed
+of nested btrees, and take multiple keys. For example, the
+thin-provisioning target uses a btree with two levels of nesting.
+The first maps a device id to a mapping tree, and that in turn maps a
+virtual block to a physical block.
+
+Values stored in the btrees can have arbitrary size. Keys are always
+64bits, although nesting allows you to use multiple keys.
diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt
new file mode 100644
index 000000000..0d5bc46dc
--- /dev/null
+++ b/Documentation/device-mapper/snapshot.txt
@@ -0,0 +1,168 @@
+Device-mapper snapshot support
+==============================
+
+Device-mapper allows you, without massive data copying:
+
+*) To create snapshots of any block device i.e. mountable, saved states of
+the block device which are also writable without interfering with the
+original content;
+*) To create device "forks", i.e. multiple different versions of the
+same data stream.
+*) To merge a snapshot of a block device back into the snapshot's origin
+device.
+
+In the first two cases, dm copies only the chunks of data that get
+changed and uses a separate copy-on-write (COW) block device for
+storage.
+
+For snapshot merge the contents of the COW storage are merged back into
+the origin device.
+
+
+There are three dm targets available:
+snapshot, snapshot-origin, and snapshot-merge.
+
+*) snapshot-origin <origin>
+
+which will normally have one or more snapshots based on it.
+Reads will be mapped directly to the backing device. For each write, the
+original data will be saved in the <COW device> of each snapshot to keep
+its visible content unchanged, at least until the <COW device> fills up.
+
+
+*) snapshot <origin> <COW device> <persistent?> <chunksize>
+
+A snapshot of the <origin> block device is created. Changed chunks of
+<chunksize> sectors will be stored on the <COW device>. Writes will
+only go to the <COW device>. Reads will come from the <COW device> or
+from <origin> for unchanged data. <COW device> will often be
+smaller than the origin and if it fills up the snapshot will become
+useless and be disabled, returning errors. So it is important to monitor
+the amount of free space and expand the <COW device> before it fills up.
+
+<persistent?> is P (Persistent) or N (Not persistent - will not survive
+after reboot).
+The difference is that for transient snapshots less metadata must be
+saved on disk - they can be kept in memory by the kernel.
+
+
+* snapshot-merge <origin> <COW device> <persistent> <chunksize>
+
+takes the same table arguments as the snapshot target except it only
+works with persistent snapshots. This target assumes the role of the
+"snapshot-origin" target and must not be loaded if the "snapshot-origin"
+is still present for <origin>.
+
+Creates a merging snapshot that takes control of the changed chunks
+stored in the <COW device> of an existing snapshot, through a handover
+procedure, and merges these chunks back into the <origin>. Once merging
+has started (in the background) the <origin> may be opened and the merge
+will continue while I/O is flowing to it. Changes to the <origin> are
+deferred until the merging snapshot's corresponding chunk(s) have been
+merged. Once merging has started the snapshot device, associated with
+the "snapshot" target, will return -EIO when accessed.
+
+
+How snapshot is used by LVM2
+============================
+When you create the first LVM2 snapshot of a volume, four dm devices are used:
+
+1) a device containing the original mapping table of the source volume;
+2) a device used as the <COW device>;
+3) a "snapshot" device, combining #1 and #2, which is the visible snapshot
+ volume;
+4) the "original" volume (which uses the device number used by the original
+ source volume), whose table is replaced by a "snapshot-origin" mapping
+ from device #1.
+
+A fixed naming scheme is used, so with the following commands:
+
+lvcreate -L 1G -n base volumeGroup
+lvcreate -L 100M --snapshot -n snap volumeGroup/base
+
+we'll have this situation (with volumes in above order):
+
+# dmsetup table|grep volumeGroup
+
+volumeGroup-base-real: 0 2097152 linear 8:19 384
+volumeGroup-snap-cow: 0 204800 linear 8:19 2097536
+volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16
+volumeGroup-base: 0 2097152 snapshot-origin 254:11
+
+# ls -lL /dev/mapper/volumeGroup-*
+brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow
+brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap
+brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base
+
+
+How snapshot-merge is used by LVM2
+==================================
+A merging snapshot assumes the role of the "snapshot-origin" while
+merging. As such the "snapshot-origin" is replaced with
+"snapshot-merge". The "-real" device is not changed and the "-cow"
+device is renamed to <origin name>-cow to aid LVM2's cleanup of the
+merging snapshot after it completes. The "snapshot" that hands over its
+COW device to the "snapshot-merge" is deactivated (unless using lvchange
+--refresh); but if it is left active it will simply return I/O errors.
+
+A snapshot will merge into its origin with the following command:
+
+lvconvert --merge volumeGroup/snap
+
+we'll now have this situation:
+
+# dmsetup table|grep volumeGroup
+
+volumeGroup-base-real: 0 2097152 linear 8:19 384
+volumeGroup-base-cow: 0 204800 linear 8:19 2097536
+volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16
+
+# ls -lL /dev/mapper/volumeGroup-*
+brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real
+brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow
+brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base
+
+
+How to determine when a merging is complete
+===========================================
+The snapshot-merge and snapshot status lines end with:
+ <sectors_allocated>/<total_sectors> <metadata_sectors>
+
+Both <sectors_allocated> and <total_sectors> include both data and metadata.
+During merging, the number of sectors allocated gets smaller and
+smaller. Merging has finished when the number of sectors holding data
+is zero, in other words <sectors_allocated> == <metadata_sectors>.
+
+Here is a practical example (using a hybrid of lvm and dmsetup commands):
+
+# lvs
+ LV VG Attr LSize Origin Snap% Move Log Copy% Convert
+ base volumeGroup owi-a- 4.00g
+ snap volumeGroup swi-a- 1.00g base 18.97
+
+# dmsetup status volumeGroup-snap
+0 8388608 snapshot 397896/2097152 1560
+ ^^^^ metadata sectors
+
+# lvconvert --merge -b volumeGroup/snap
+ Merging of volume snap started.
+
+# lvs volumeGroup/snap
+ LV VG Attr LSize Origin Snap% Move Log Copy% Convert
+ base volumeGroup Owi-a- 4.00g 17.23
+
+# dmsetup status volumeGroup-base
+0 8388608 snapshot-merge 281688/2097152 1104
+
+# dmsetup status volumeGroup-base
+0 8388608 snapshot-merge 180480/2097152 712
+
+# dmsetup status volumeGroup-base
+0 8388608 snapshot-merge 16/2097152 16
+
+Merging has finished.
+
+# lvs
+ LV VG Attr LSize Origin Snap% Move Log Copy% Convert
+ base volumeGroup owi-a- 4.00g
diff --git a/Documentation/device-mapper/statistics.txt b/Documentation/device-mapper/statistics.txt
new file mode 100644
index 000000000..2a1673adc
--- /dev/null
+++ b/Documentation/device-mapper/statistics.txt
@@ -0,0 +1,186 @@
+DM statistics
+=============
+
+Device Mapper supports the collection of I/O statistics on user-defined
+regions of a DM device. If no regions are defined no statistics are
+collected so there isn't any performance impact. Only bio-based DM
+devices are currently supported.
+
+Each user-defined region specifies a starting sector, length and step.
+Individual statistics will be collected for each step-sized area within
+the range specified.
+
+The I/O statistics counters for each step-sized area of a region are
+in the same format as /sys/block/*/stat or /proc/diskstats (see:
+Documentation/iostats.txt). But two extra counters (12 and 13) are
+provided: total time spent reading and writing in milliseconds. All
+these counters may be accessed by sending the @stats_print message to
+the appropriate DM device via dmsetup.
+
+Each region has a corresponding unique identifier, which we call a
+region_id, that is assigned when the region is created. The region_id
+must be supplied when querying statistics about the region, deleting the
+region, etc. Unique region_ids enable multiple userspace programs to
+request and process statistics for the same DM device without stepping
+on each other's data.
+
+The creation of DM statistics will allocate memory via kmalloc or
+fallback to using vmalloc space. At most, 1/4 of the overall system
+memory may be allocated by DM statistics. The admin can see how much
+memory is used by reading
+/sys/module/dm_mod/parameters/stats_current_allocated_bytes
+
+Messages
+========
+
+ @stats_create <range> <step> [<program_id> [<aux_data>]]
+
+ Create a new region and return the region_id.
+
+ <range>
+ "-" - whole device
+ "<start_sector>+<length>" - a range of <length> 512-byte sectors
+ starting with <start_sector>.
+
+ <step>
+ "<area_size>" - the range is subdivided into areas each containing
+ <area_size> sectors.
+ "/<number_of_areas>" - the range is subdivided into the specified
+ number of areas.
+
+ <program_id>
+ An optional parameter. A name that uniquely identifies
+ the userspace owner of the range. This groups ranges together
+ so that userspace programs can identify the ranges they
+ created and ignore those created by others.
+ The kernel returns this string back in the output of
+ @stats_list message, but it doesn't use it for anything else.
+
+ <aux_data>
+ An optional parameter. A word that provides auxiliary data
+ that is useful to the client program that created the range.
+ The kernel returns this string back in the output of
+ @stats_list message, but it doesn't use this value for anything.
+
+ @stats_delete <region_id>
+
+ Delete the region with the specified id.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ @stats_clear <region_id>
+
+ Clear all the counters except the in-flight i/o counters.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ @stats_list [<program_id>]
+
+ List all regions registered with @stats_create.
+
+ <program_id>
+ An optional parameter.
+ If this parameter is specified, only matching regions
+ are returned.
+ If it is not specified, all regions are returned.
+
+ Output format:
+ <region_id>: <start_sector>+<length> <step> <program_id> <aux_data>
+
+ @stats_print <region_id> [<starting_line> <number_of_lines>]
+
+ Print counters for each step-sized area of a region.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <starting_line>
+ The index of the starting line in the output.
+ If omitted, all lines are returned.
+
+ <number_of_lines>
+ The number of lines to include in the output.
+ If omitted, all lines are returned.
+
+ Output format for each step-sized area of a region:
+
+ <start_sector>+<length> counters
+
+ The first 11 counters have the same meaning as
+ /sys/block/*/stat or /proc/diskstats.
+
+ Please refer to Documentation/iostats.txt for details.
+
+ 1. the number of reads completed
+ 2. the number of reads merged
+ 3. the number of sectors read
+ 4. the number of milliseconds spent reading
+ 5. the number of writes completed
+ 6. the number of writes merged
+ 7. the number of sectors written
+ 8. the number of milliseconds spent writing
+ 9. the number of I/Os currently in progress
+ 10. the number of milliseconds spent doing I/Os
+ 11. the weighted number of milliseconds spent doing I/Os
+
+ Additional counters:
+ 12. the total time spent reading in milliseconds
+ 13. the total time spent writing in milliseconds
+
+ @stats_print_clear <region_id> [<starting_line> <number_of_lines>]
+
+ Atomically print and then clear all the counters except the
+ in-flight i/o counters. Useful when the client consuming the
+ statistics does not want to lose any statistics (those updated
+ between printing and clearing).
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <starting_line>
+ The index of the starting line in the output.
+ If omitted, all lines are printed and then cleared.
+
+ <number_of_lines>
+ The number of lines to process.
+ If omitted, all lines are printed and then cleared.
+
+ @stats_set_aux <region_id> <aux_data>
+
+ Store auxiliary data aux_data for the specified region.
+
+ <region_id>
+ region_id returned from @stats_create
+
+ <aux_data>
+ The string that identifies data which is useful to the client
+ program that created the range. The kernel returns this
+ string back in the output of @stats_list message, but it
+ doesn't use this value for anything.
+
+Examples
+========
+
+Subdivide the DM device 'vol' into 100 pieces and start collecting
+statistics on them:
+
+ dmsetup message vol 0 @stats_create - /100
+
+Set the auxillary data string to "foo bar baz" (the escape for each
+space must also be escaped, otherwise the shell will consume them):
+
+ dmsetup message vol 0 @stats_set_aux 0 foo\\ bar\\ baz
+
+List the statistics:
+
+ dmsetup message vol 0 @stats_list
+
+Print the statistics:
+
+ dmsetup message vol 0 @stats_print 0
+
+Delete the statistics:
+
+ dmsetup message vol 0 @stats_delete 0
diff --git a/Documentation/device-mapper/striped.txt b/Documentation/device-mapper/striped.txt
new file mode 100644
index 000000000..45f3b91ea
--- /dev/null
+++ b/Documentation/device-mapper/striped.txt
@@ -0,0 +1,57 @@
+dm-stripe
+=========
+
+Device-Mapper's "striped" target is used to create a striped (i.e. RAID-0)
+device across one or more underlying devices. Data is written in "chunks",
+with consecutive chunks rotating among the underlying devices. This can
+potentially provide improved I/O throughput by utilizing several physical
+devices in parallel.
+
+Parameters: <num devs> <chunk size> [<dev path> <offset>]+
+ <num devs>: Number of underlying devices.
+ <chunk size>: Size of each chunk of data. Must be at least as
+ large as the system's PAGE_SIZE.
+ <dev path>: Full pathname to the underlying block-device, or a
+ "major:minor" device-number.
+ <offset>: Starting sector within the device.
+
+One or more underlying devices can be specified. The striped device size must
+be a multiple of the chunk size multiplied by the number of underlying devices.
+
+
+Example scripts
+===============
+
+[[
+#!/usr/bin/perl -w
+# Create a striped device across any number of underlying devices. The device
+# will be called "stripe_dev" and have a chunk-size of 128k.
+
+my $chunk_size = 128 * 2;
+my $dev_name = "stripe_dev";
+my $num_devs = @ARGV;
+my @devs = @ARGV;
+my ($min_dev_size, $stripe_dev_size, $i);
+
+if (!$num_devs) {
+ die("Specify at least one device\n");
+}
+
+$min_dev_size = `blockdev --getsize $devs[0]`;
+for ($i = 1; $i < $num_devs; $i++) {
+ my $this_size = `blockdev --getsize $devs[$i]`;
+ $min_dev_size = ($min_dev_size < $this_size) ?
+ $min_dev_size : $this_size;
+}
+
+$stripe_dev_size = $min_dev_size * $num_devs;
+$stripe_dev_size -= $stripe_dev_size % ($chunk_size * $num_devs);
+
+$table = "0 $stripe_dev_size striped $num_devs $chunk_size";
+for ($i = 0; $i < $num_devs; $i++) {
+ $table .= " $devs[$i] 0";
+}
+
+`echo $table | dmsetup create $dev_name`;
+]]
+
diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt
new file mode 100644
index 000000000..424835e57
--- /dev/null
+++ b/Documentation/device-mapper/switch.txt
@@ -0,0 +1,138 @@
+dm-switch
+=========
+
+The device-mapper switch target creates a device that supports an
+arbitrary mapping of fixed-size regions of I/O across a fixed set of
+paths. The path used for any specific region can be switched
+dynamically by sending the target a message.
+
+It maps I/O to underlying block devices efficiently when there is a large
+number of fixed-sized address regions but there is no simple pattern
+that would allow for a compact representation of the mapping such as
+dm-stripe.
+
+Background
+----------
+
+Dell EqualLogic and some other iSCSI storage arrays use a distributed
+frameless architecture. In this architecture, the storage group
+consists of a number of distinct storage arrays ("members") each having
+independent controllers, disk storage and network adapters. When a LUN
+is created it is spread across multiple members. The details of the
+spreading are hidden from initiators connected to this storage system.
+The storage group exposes a single target discovery portal, no matter
+how many members are being used. When iSCSI sessions are created, each
+session is connected to an eth port on a single member. Data to a LUN
+can be sent on any iSCSI session, and if the blocks being accessed are
+stored on another member the I/O will be forwarded as required. This
+forwarding is invisible to the initiator. The storage layout is also
+dynamic, and the blocks stored on disk may be moved from member to
+member as needed to balance the load.
+
+This architecture simplifies the management and configuration of both
+the storage group and initiators. In a multipathing configuration, it
+is possible to set up multiple iSCSI sessions to use multiple network
+interfaces on both the host and target to take advantage of the
+increased network bandwidth. An initiator could use a simple round
+robin algorithm to send I/O across all paths and let the storage array
+members forward it as necessary, but there is a performance advantage to
+sending data directly to the correct member.
+
+A device-mapper table already lets you map different regions of a
+device onto different targets. However in this architecture the LUN is
+spread with an address region size on the order of 10s of MBs, which
+means the resulting table could have more than a million entries and
+consume far too much memory.
+
+Using this device-mapper switch target we can now build a two-layer
+device hierarchy:
+
+ Upper Tier - Determine which array member the I/O should be sent to.
+ Lower Tier - Load balance amongst paths to a particular member.
+
+The lower tier consists of a single dm multipath device for each member.
+Each of these multipath devices contains the set of paths directly to
+the array member in one priority group, and leverages existing path
+selectors to load balance amongst these paths. We also build a
+non-preferred priority group containing paths to other array members for
+failover reasons.
+
+The upper tier consists of a single dm-switch device. This device uses
+a bitmap to look up the location of the I/O and choose the appropriate
+lower tier device to route the I/O. By using a bitmap we are able to
+use 4 bits for each address range in a 16 member group (which is very
+large for us). This is a much denser representation than the dm table
+b-tree can achieve.
+
+Construction Parameters
+=======================
+
+ <num_paths> <region_size> <num_optional_args> [<optional_args>...]
+ [<dev_path> <offset>]+
+
+<num_paths>
+ The number of paths across which to distribute the I/O.
+
+<region_size>
+ The number of 512-byte sectors in a region. Each region can be redirected
+ to any of the available paths.
+
+<num_optional_args>
+ The number of optional arguments. Currently, no optional arguments
+ are supported and so this must be zero.
+
+<dev_path>
+ The block device that represents a specific path to the device.
+
+<offset>
+ The offset of the start of data on the specific <dev_path> (in units
+ of 512-byte sectors). This number is added to the sector number when
+ forwarding the request to the specific path. Typically it is zero.
+
+Messages
+========
+
+set_region_mappings <index>:<path_nr> [<index>]:<path_nr> [<index>]:<path_nr>...
+
+Modify the region table by specifying which regions are redirected to
+which paths.
+
+<index>
+ The region number (region size was specified in constructor parameters).
+ If index is omitted, the next region (previous index + 1) is used.
+ Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+<path_nr>
+ The path number in the range 0 ... (<num_paths> - 1).
+ Expressed in hexadecimal (WITHOUT any prefix like 0x).
+
+R<n>,<m>
+ This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
+ are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
+ slots.
+
+Status
+======
+
+No status line is reported.
+
+Example
+=======
+
+Assume that you have volumes vg1/switch0 vg1/switch1 vg1/switch2 with
+the same size.
+
+Create a switch device with 64kB region size:
+ dmsetup create switch --table "0 `blockdev --getsize /dev/vg1/switch0`
+ switch 3 128 0 /dev/vg1/switch0 0 /dev/vg1/switch1 0 /dev/vg1/switch2 0"
+
+Set mappings for the first 7 entries to point to devices switch0, switch1,
+switch2, switch0, switch1, switch2, switch1:
+ dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
+
+Set repetitive mapping. This command:
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
+is equivalent to:
+ dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
+ :1 :2 :1 :2 :1 :2 :1 :2 :1 :2
+
diff --git a/Documentation/device-mapper/thin-provisioning.txt b/Documentation/device-mapper/thin-provisioning.txt
new file mode 100644
index 000000000..4f67578b2
--- /dev/null
+++ b/Documentation/device-mapper/thin-provisioning.txt
@@ -0,0 +1,389 @@
+Introduction
+============
+
+This document describes a collection of device-mapper targets that
+between them implement thin-provisioning and snapshots.
+
+The main highlight of this implementation, compared to the previous
+implementation of snapshots, is that it allows many virtual devices to
+be stored on the same data volume. This simplifies administration and
+allows the sharing of data between volumes, thus reducing disk usage.
+
+Another significant feature is support for an arbitrary depth of
+recursive snapshots (snapshots of snapshots of snapshots ...). The
+previous implementation of snapshots did this by chaining together
+lookup tables, and so performance was O(depth). This new
+implementation uses a single data structure to avoid this degradation
+with depth. Fragmentation may still be an issue, however, in some
+scenarios.
+
+Metadata is stored on a separate device from data, giving the
+administrator some freedom, for example to:
+
+- Improve metadata resilience by storing metadata on a mirrored volume
+ but data on a non-mirrored one.
+
+- Improve performance by storing the metadata on SSD.
+
+Status
+======
+
+These targets are very much still in the EXPERIMENTAL state. Please
+do not yet rely on them in production. But do experiment and offer us
+feedback. Different use cases will have different performance
+characteristics, for example due to fragmentation of the data volume.
+
+If you find this software is not performing as expected please mail
+dm-devel@redhat.com with details and we'll try our best to improve
+things for you.
+
+Userspace tools for checking and repairing the metadata are under
+development.
+
+Cookbook
+========
+
+This section describes some quick recipes for using thin provisioning.
+They use the dmsetup program to control the device-mapper driver
+directly. End users will be advised to use a higher-level volume
+manager such as LVM2 once support has been added.
+
+Pool device
+-----------
+
+The pool device ties together the metadata volume and the data volume.
+It maps I/O linearly to the data volume and updates the metadata via
+two mechanisms:
+
+- Function calls from the thin targets
+
+- Device-mapper 'messages' from userspace which control the creation of new
+ virtual devices amongst other things.
+
+Setting up a fresh pool device
+------------------------------
+
+Setting up a pool device requires a valid metadata device, and a
+data device. If you do not have an existing metadata device you can
+make one by zeroing the first 4k to indicate empty metadata.
+
+ dd if=/dev/zero of=$metadata_dev bs=4096 count=1
+
+The amount of metadata you need will vary according to how many blocks
+are shared between thin devices (i.e. through snapshots). If you have
+less sharing than average you'll need a larger-than-average metadata device.
+
+As a guide, we suggest you calculate the number of bytes to use in the
+metadata device as 48 * $data_dev_size / $data_block_size but round it up
+to 2MB if the answer is smaller. If you're creating large numbers of
+snapshots which are recording large amounts of change, you may find you
+need to increase this.
+
+The largest size supported is 16GB: If the device is larger,
+a warning will be issued and the excess space will not be used.
+
+Reloading a pool table
+----------------------
+
+You may reload a pool's table, indeed this is how the pool is resized
+if it runs out of space. (N.B. While specifying a different metadata
+device when reloading is not forbidden at the moment, things will go
+wrong if it does not route I/O to exactly the same on-disk location as
+previously.)
+
+Using an existing pool device
+-----------------------------
+
+ dmsetup create pool \
+ --table "0 20971520 thin-pool $metadata_dev $data_dev \
+ $data_block_size $low_water_mark"
+
+$data_block_size gives the smallest unit of disk space that can be
+allocated at a time expressed in units of 512-byte sectors.
+$data_block_size must be between 128 (64KB) and 2097152 (1GB) and a
+multiple of 128 (64KB). $data_block_size cannot be changed after the
+thin-pool is created. People primarily interested in thin provisioning
+may want to use a value such as 1024 (512KB). People doing lots of
+snapshotting may want a smaller value such as 128 (64KB). If you are
+not zeroing newly-allocated data, a larger $data_block_size in the
+region of 256000 (128MB) is suggested.
+
+$low_water_mark is expressed in blocks of size $data_block_size. If
+free space on the data device drops below this level then a dm event
+will be triggered which a userspace daemon should catch allowing it to
+extend the pool device. Only one such event will be sent.
+Resuming a device with a new table itself triggers an event so the
+userspace daemon can use this to detect a situation where a new table
+already exceeds the threshold.
+
+A low water mark for the metadata device is maintained in the kernel and
+will trigger a dm event if free space on the metadata device drops below
+it.
+
+Updating on-disk metadata
+-------------------------
+
+On-disk metadata is committed every time a FLUSH or FUA bio is written.
+If no such requests are made then commits will occur every second. This
+means the thin-provisioning target behaves like a physical disk that has
+a volatile write cache. If power is lost you may lose some recent
+writes. The metadata should always be consistent in spite of any crash.
+
+If data space is exhausted the pool will either error or queue IO
+according to the configuration (see: error_if_no_space). If metadata
+space is exhausted or a metadata operation fails: the pool will error IO
+until the pool is taken offline and repair is performed to 1) fix any
+potential inconsistencies and 2) clear the flag that imposes repair.
+Once the pool's metadata device is repaired it may be resized, which
+will allow the pool to return to normal operation. Note that if a pool
+is flagged as needing repair, the pool's data and metadata devices
+cannot be resized until repair is performed. It should also be noted
+that when the pool's metadata space is exhausted the current metadata
+transaction is aborted. Given that the pool will cache IO whose
+completion may have already been acknowledged to upper IO layers
+(e.g. filesystem) it is strongly suggested that consistency checks
+(e.g. fsck) be performed on those layers when repair of the pool is
+required.
+
+Thin provisioning
+-----------------
+
+i) Creating a new thinly-provisioned volume.
+
+ To create a new thinly- provisioned volume you must send a message to an
+ active pool device, /dev/mapper/pool in this example.
+
+ dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ Here '0' is an identifier for the volume, a 24-bit number. It's up
+ to the caller to allocate and manage these identifiers. If the
+ identifier is already in use, the message will fail with -EEXIST.
+
+ii) Using a thinly-provisioned volume.
+
+ Thinly-provisioned volumes are activated using the 'thin' target:
+
+ dmsetup create thin --table "0 2097152 thin /dev/mapper/pool 0"
+
+ The last parameter is the identifier for the thinp device.
+
+Internal snapshots
+------------------
+
+i) Creating an internal snapshot.
+
+ Snapshots are created with another message to the pool.
+
+ N.B. If the origin device that you wish to snapshot is active, you
+ must suspend it before creating the snapshot to avoid corruption.
+ This is NOT enforced at the moment, so please be careful!
+
+ dmsetup suspend /dev/mapper/thin
+ dmsetup message /dev/mapper/pool 0 "create_snap 1 0"
+ dmsetup resume /dev/mapper/thin
+
+ Here '1' is the identifier for the volume, a 24-bit number. '0' is the
+ identifier for the origin device.
+
+ii) Using an internal snapshot.
+
+ Once created, the user doesn't have to worry about any connection
+ between the origin and the snapshot. Indeed the snapshot is no
+ different from any other thinly-provisioned device and can be
+ snapshotted itself via the same method. It's perfectly legal to
+ have only one of them active, and there's no ordering requirement on
+ activating or removing them both. (This differs from conventional
+ device-mapper snapshots.)
+
+ Activate it exactly the same way as any other thinly-provisioned volume:
+
+ dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 1"
+
+External snapshots
+------------------
+
+You can use an external _read only_ device as an origin for a
+thinly-provisioned volume. Any read to an unprovisioned area of the
+thin device will be passed through to the origin. Writes trigger
+the allocation of new blocks as usual.
+
+One use case for this is VM hosts that want to run guests on
+thinly-provisioned volumes but have the base image on another device
+(possibly shared between many VMs).
+
+You must not write to the origin device if you use this technique!
+Of course, you may write to the thin device and take internal snapshots
+of the thin volume.
+
+i) Creating a snapshot of an external device
+
+ This is the same as creating a thin device.
+ You don't mention the origin at this stage.
+
+ dmsetup message /dev/mapper/pool 0 "create_thin 0"
+
+ii) Using a snapshot of an external device.
+
+ Append an extra parameter to the thin target specifying the origin:
+
+ dmsetup create snap --table "0 2097152 thin /dev/mapper/pool 0 /dev/image"
+
+ N.B. All descendants (internal snapshots) of this snapshot require the
+ same extra origin parameter.
+
+Deactivation
+------------
+
+All devices using a pool must be deactivated before the pool itself
+can be.
+
+ dmsetup remove thin
+ dmsetup remove snap
+ dmsetup remove pool
+
+Reference
+=========
+
+'thin-pool' target
+------------------
+
+i) Constructor
+
+ thin-pool <metadata dev> <data dev> <data block size (sectors)> \
+ <low water mark (blocks)> [<number of feature args> [<arg>]*]
+
+ Optional feature arguments:
+
+ skip_block_zeroing: Skip the zeroing of newly-provisioned blocks.
+
+ ignore_discard: Disable discard support.
+
+ no_discard_passdown: Don't pass discards down to the underlying
+ data device, but just remove the mapping.
+
+ read_only: Don't allow any changes to be made to the pool
+ metadata.
+
+ error_if_no_space: Error IOs, instead of queueing, if no space.
+
+ Data block size must be between 64KB (128 sectors) and 1GB
+ (2097152 sectors) inclusive.
+
+
+ii) Status
+
+ <transaction id> <used metadata blocks>/<total metadata blocks>
+ <used data blocks>/<total data blocks> <held metadata root>
+ [no_]discard_passdown ro|rw
+
+ transaction id:
+ A 64-bit number used by userspace to help synchronise with metadata
+ from volume managers.
+
+ used data blocks / total data blocks
+ If the number of free blocks drops below the pool's low water mark a
+ dm event will be sent to userspace. This event is edge-triggered and
+ it will occur only once after each resume so volume manager writers
+ should register for the event and then check the target's status.
+
+ held metadata root:
+ The location, in blocks, of the metadata root that has been
+ 'held' for userspace read access. '-' indicates there is no
+ held root.
+
+ discard_passdown|no_discard_passdown
+ Whether or not discards are actually being passed down to the
+ underlying device. When this is enabled when loading the table,
+ it can get disabled if the underlying device doesn't support it.
+
+ ro|rw
+ If the pool encounters certain types of device failures it will
+ drop into a read-only metadata mode in which no changes to
+ the pool metadata (like allocating new blocks) are permitted.
+
+ In serious cases where even a read-only mode is deemed unsafe
+ no further I/O will be permitted and the status will just
+ contain the string 'Fail'. The userspace recovery tools
+ should then be used.
+
+ error_if_no_space|queue_if_no_space
+ If the pool runs out of data or metadata space, the pool will
+ either queue or error the IO destined to the data device. The
+ default is to queue the IO until more space is added or the
+ 'no_space_timeout' expires. The 'no_space_timeout' dm-thin-pool
+ module parameter can be used to change this timeout -- it
+ defaults to 60 seconds but may be disabled using a value of 0.
+
+iii) Messages
+
+ create_thin <dev id>
+
+ Create a new thinly-provisioned device.
+ <dev id> is an arbitrary unique 24-bit identifier chosen by
+ the caller.
+
+ create_snap <dev id> <origin id>
+
+ Create a new snapshot of another thinly-provisioned device.
+ <dev id> is an arbitrary unique 24-bit identifier chosen by
+ the caller.
+ <origin id> is the identifier of the thinly-provisioned device
+ of which the new device will be a snapshot.
+
+ delete <dev id>
+
+ Deletes a thin device. Irreversible.
+
+ set_transaction_id <current id> <new id>
+
+ Userland volume managers, such as LVM, need a way to
+ synchronise their external metadata with the internal metadata of the
+ pool target. The thin-pool target offers to store an
+ arbitrary 64-bit transaction id and return it on the target's
+ status line. To avoid races you must provide what you think
+ the current transaction id is when you change it with this
+ compare-and-swap message.
+
+ reserve_metadata_snap
+
+ Reserve a copy of the data mapping btree for use by userland.
+ This allows userland to inspect the mappings as they were when
+ this message was executed. Use the pool's status command to
+ get the root block associated with the metadata snapshot.
+
+ release_metadata_snap
+
+ Release a previously reserved copy of the data mapping btree.
+
+'thin' target
+-------------
+
+i) Constructor
+
+ thin <pool dev> <dev id> [<external origin dev>]
+
+ pool dev:
+ the thin-pool device, e.g. /dev/mapper/my_pool or 253:0
+
+ dev id:
+ the internal device identifier of the device to be
+ activated.
+
+ external origin dev:
+ an optional block device outside the pool to be treated as a
+ read-only snapshot origin: reads to unprovisioned areas of the
+ thin target will be mapped to this device.
+
+The pool doesn't store any size against the thin devices. If you
+load a thin target that is smaller than you've been using previously,
+then you'll have no access to blocks mapped beyond the end. If you
+load a target that is bigger than before, then extra blocks will be
+provisioned as and when needed.
+
+ii) Status
+
+ <nr mapped sectors> <highest mapped sector>
+
+ If the pool has encountered device errors and failed, the status
+ will just contain the string 'Fail'. The userspace recovery
+ tools should then be used.
diff --git a/Documentation/device-mapper/verity.txt b/Documentation/device-mapper/verity.txt
new file mode 100644
index 000000000..e15bc1a0f
--- /dev/null
+++ b/Documentation/device-mapper/verity.txt
@@ -0,0 +1,172 @@
+dm-verity
+==========
+
+Device-Mapper's "verity" target provides transparent integrity checking of
+block devices using a cryptographic digest provided by the kernel crypto API.
+This target is read-only.
+
+Construction Parameters
+=======================
+ <version> <dev> <hash_dev>
+ <data_block_size> <hash_block_size>
+ <num_data_blocks> <hash_start_block>
+ <algorithm> <digest> <salt>
+ [<#opt_params> <opt_params>]
+
+<version>
+ This is the type of the on-disk hash format.
+
+ 0 is the original format used in the Chromium OS.
+ The salt is appended when hashing, digests are stored continuously and
+ the rest of the block is padded with zeros.
+
+ 1 is the current format that should be used for new devices.
+ The salt is prepended when hashing and each digest is
+ padded with zeros to the power of two.
+
+<dev>
+ This is the device containing data, the integrity of which needs to be
+ checked. It may be specified as a path, like /dev/sdaX, or a device number,
+ <major>:<minor>.
+
+<hash_dev>
+ This is the device that supplies the hash tree data. It may be
+ specified similarly to the device path and may be the same device. If the
+ same device is used, the hash_start should be outside the configured
+ dm-verity device.
+
+<data_block_size>
+ The block size on a data device in bytes.
+ Each block corresponds to one digest on the hash device.
+
+<hash_block_size>
+ The size of a hash block in bytes.
+
+<num_data_blocks>
+ The number of data blocks on the data device. Additional blocks are
+ inaccessible. You can place hashes to the same partition as data, in this
+ case hashes are placed after <num_data_blocks>.
+
+<hash_start_block>
+ This is the offset, in <hash_block_size>-blocks, from the start of hash_dev
+ to the root block of the hash tree.
+
+<algorithm>
+ The cryptographic hash algorithm used for this device. This should
+ be the name of the algorithm, like "sha1".
+
+<digest>
+ The hexadecimal encoding of the cryptographic hash of the root hash block
+ and the salt. This hash should be trusted as there is no other authenticity
+ beyond this point.
+
+<salt>
+ The hexadecimal encoding of the salt value.
+
+<#opt_params>
+ Number of optional parameters. If there are no optional parameters,
+ the optional paramaters section can be skipped or #opt_params can be zero.
+ Otherwise #opt_params is the number of following arguments.
+
+ Example of optional parameters section:
+ 1 ignore_corruption
+
+ignore_corruption
+ Log corrupted blocks, but allow read operations to proceed normally.
+
+restart_on_corruption
+ Restart the system when a corrupted block is discovered. This option is
+ not compatible with ignore_corruption and requires user space support to
+ avoid restart loops.
+
+Theory of operation
+===================
+
+dm-verity is meant to be set up as part of a verified boot path. This
+may be anything ranging from a boot using tboot or trustedgrub to just
+booting from a known-good device (like a USB drive or CD).
+
+When a dm-verity device is configured, it is expected that the caller
+has been authenticated in some way (cryptographic signatures, etc).
+After instantiation, all hashes will be verified on-demand during
+disk access. If they cannot be verified up to the root node of the
+tree, the root hash, then the I/O will fail. This should detect
+tampering with any data on the device and the hash data.
+
+Cryptographic hashes are used to assert the integrity of the device on a
+per-block basis. This allows for a lightweight hash computation on first read
+into the page cache. Block hashes are stored linearly, aligned to the nearest
+block size.
+
+Hash Tree
+---------
+
+Each node in the tree is a cryptographic hash. If it is a leaf node, the hash
+of some data block on disk is calculated. If it is an intermediary node,
+the hash of a number of child nodes is calculated.
+
+Each entry in the tree is a collection of neighboring nodes that fit in one
+block. The number is determined based on block_size and the size of the
+selected cryptographic digest algorithm. The hashes are linearly-ordered in
+this entry and any unaligned trailing space is ignored but included when
+calculating the parent node.
+
+The tree looks something like:
+
+alg = sha256, num_blocks = 32768, block_size = 4096
+
+ [ root ]
+ / . . . \
+ [entry_0] [entry_1]
+ / . . . \ . . . \
+ [entry_0_0] . . . [entry_0_127] . . . . [entry_1_127]
+ / ... \ / . . . \ / \
+ blk_0 ... blk_127 blk_16256 blk_16383 blk_32640 . . . blk_32767
+
+
+On-disk format
+==============
+
+The verity kernel code does not read the verity metadata on-disk header.
+It only reads the hash blocks which directly follow the header.
+It is expected that a user-space tool will verify the integrity of the
+verity header.
+
+Alternatively, the header can be omitted and the dmsetup parameters can
+be passed via the kernel command-line in a rooted chain of trust where
+the command-line is verified.
+
+Directly following the header (and with sector number padded to the next hash
+block boundary) are the hash blocks which are stored a depth at a time
+(starting from the root), sorted in order of increasing index.
+
+The full specification of kernel parameters and on-disk metadata format
+is available at the cryptsetup project's wiki page
+ https://gitlab.com/cryptsetup/cryptsetup/wikis/DMVerity
+
+Status
+======
+V (for Valid) is returned if every check performed so far was valid.
+If any check failed, C (for Corruption) is returned.
+
+Example
+=======
+Set up a device:
+ # dmsetup create vroot --readonly --table \
+ "0 2097152 verity 1 /dev/sda1 /dev/sda2 4096 4096 262144 1 sha256 "\
+ "4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076 "\
+ "1234000000000000000000000000000000000000000000000000000000000000"
+
+A command line tool veritysetup is available to compute or verify
+the hash tree or activate the kernel device. This is available from
+the cryptsetup upstream repository https://gitlab.com/cryptsetup/cryptsetup/
+(as a libcryptsetup extension).
+
+Create hash on the device:
+ # veritysetup format /dev/sda1 /dev/sda2
+ ...
+ Root hash: 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
+
+Activate the device:
+ # veritysetup create vroot /dev/sda1 /dev/sda2 \
+ 4392712ba01368efdf14b05c76f9e4df0d53664630b5d48632ed17a137f39076
diff --git a/Documentation/device-mapper/zero.txt b/Documentation/device-mapper/zero.txt
new file mode 100644
index 000000000..20fb38e7f
--- /dev/null
+++ b/Documentation/device-mapper/zero.txt
@@ -0,0 +1,37 @@
+dm-zero
+=======
+
+Device-Mapper's "zero" target provides a block-device that always returns
+zero'd data on reads and silently drops writes. This is similar behavior to
+/dev/zero, but as a block-device instead of a character-device.
+
+Dm-zero has no target-specific parameters.
+
+One very interesting use of dm-zero is for creating "sparse" devices in
+conjunction with dm-snapshot. A sparse device reports a device-size larger
+than the amount of actual storage space available for that device. A user can
+write data anywhere within the sparse device and read it back like a normal
+device. Reads to previously unwritten areas will return a zero'd buffer. When
+enough data has been written to fill up the actual storage space, the sparse
+device is deactivated. This can be very useful for testing device and
+filesystem limitations.
+
+To create a sparse device, start by creating a dm-zero device that's the
+desired size of the sparse device. For this example, we'll assume a 10TB
+sparse device.
+
+TEN_TERABYTES=`expr 10 \* 1024 \* 1024 \* 1024 \* 2` # 10 TB in sectors
+echo "0 $TEN_TERABYTES zero" | dmsetup create zero1
+
+Then create a snapshot of the zero device, using any available block-device as
+the COW device. The size of the COW device will determine the amount of real
+space available to the sparse device. For this example, we'll assume /dev/sdb1
+is an available 10GB partition.
+
+echo "0 $TEN_TERABYTES snapshot /dev/mapper/zero1 /dev/sdb1 p 128" | \
+ dmsetup create sparse1
+
+This will create a 10TB sparse device called /dev/mapper/sparse1 that has
+10GB of actual storage space available. If more than 10GB of data is written
+to this device, it will start returning I/O errors.
+
diff --git a/Documentation/devices.txt b/Documentation/devices.txt
new file mode 100644
index 000000000..87b4c5e82
--- /dev/null
+++ b/Documentation/devices.txt
@@ -0,0 +1,3355 @@
+
+ LINUX ALLOCATED DEVICES (2.6+ version)
+
+ Maintained by Alan Cox <device@lanana.org>
+
+ Last revised: 6th April 2009
+
+This list is the Linux Device List, the official registry of allocated
+device numbers and /dev directory nodes for the Linux operating
+system.
+
+The latest version of this list is available from
+http://www.lanana.org/docs/device-list/ or
+ftp://ftp.kernel.org/pub/linux/docs/device-list/. This version may be
+newer than the one distributed with the Linux kernel.
+
+The LaTeX version of this document is no longer maintained.
+
+This document is included by reference into the Filesystem Hierarchy
+Standard (FHS). The FHS is available from http://www.pathname.com/fhs/.
+
+Allocations marked (68k/Amiga) apply to Linux/68k on the Amiga
+platform only. Allocations marked (68k/Atari) apply to Linux/68k on
+the Atari platform only.
+
+The symbol {2.6} means the allocation is obsolete and scheduled for
+removal once kernel version 2.6 (or equivalent) is released. Some of these
+allocations have already been removed.
+
+This document is in the public domain. The author requests, however,
+that semantically altered versions are not distributed without
+permission of the author, assuming the author can be contacted without
+an unreasonable effort.
+
+In particular, please don't sent patches for this list to Linus, at
+least not without contacting me first.
+
+I do not have any information about these devices beyond what appears
+on this list. Any such information requests will be deleted without
+reply.
+
+
+ **** DEVICE DRIVERS AUTHORS PLEASE READ THIS ****
+
+To have a major number allocated, or a minor number in situations
+where that applies (e.g. busmice), please contact me with the
+appropriate device information. Also, if you have additional
+information regarding any of the devices listed below, or if I have
+made a mistake, I would greatly appreciate a note.
+
+I do, however, make a few requests about the nature of your report.
+This is necessary for me to be able to keep this list up to date and
+correct in a timely manner. First of all, *please* send it to the
+correct address... <device@lanana.org>. I receive hundreds of email
+messages a day, so mail sent to other addresses may very well get lost
+in the avalanche. Please put in a descriptive subject, so I can find
+your mail again should I need to. Too many people send me email
+saying just "device number request" in the subject.
+
+Second, please include a description of the device *in the same format
+as this list*. The reason for this is that it is the only way I have
+found to ensure I have all the requisite information to publish your
+device and avoid conflicts.
+
+Third, please don't assume that the distributed version of the list is
+up to date. Due to the number of registrations I have to maintain it
+in "batch mode", so there is likely additional registrations that
+haven't been listed yet.
+
+Fourth, remember that Linux now has extensive support for dynamic allocation
+of device numbering and can use sysfs and udev to handle the naming needs.
+There are still some exceptions in the serial and boot device area. Before
+asking for a device number make sure you actually need one.
+
+Finally, sometimes I have to play "namespace police." Please don't be
+offended. I often get submissions for /dev names that would be bound
+to cause conflicts down the road. I am trying to avoid getting in a
+situation where we would have to suffer an incompatible forward
+change. Therefore, please consult with me *before* you make your
+device names and numbers in any way public, at least to the point
+where it would be at all difficult to get them changed.
+
+Your cooperation is appreciated.
+
+
+ 0 Unnamed devices (e.g. non-device mounts)
+ 0 = reserved as null device number
+ See block major 144, 145, 146 for expansion areas.
+
+ 1 char Memory devices
+ 1 = /dev/mem Physical memory access
+ 2 = /dev/kmem Kernel virtual memory access
+ 3 = /dev/null Null device
+ 4 = /dev/port I/O port access
+ 5 = /dev/zero Null byte source
+ 6 = /dev/core OBSOLETE - replaced by /proc/kcore
+ 7 = /dev/full Returns ENOSPC on write
+ 8 = /dev/random Nondeterministic random number gen.
+ 9 = /dev/urandom Faster, less secure random number gen.
+ 10 = /dev/aio Asynchronous I/O notification interface
+ 11 = /dev/kmsg Writes to this come out as printk's, reads
+ export the buffered printk records.
+ 12 = /dev/oldmem OBSOLETE - replaced by /proc/vmcore
+
+ 1 block RAM disk
+ 0 = /dev/ram0 First RAM disk
+ 1 = /dev/ram1 Second RAM disk
+ ...
+ 250 = /dev/initrd Initial RAM disk
+
+ Older kernels had /dev/ramdisk (1, 1) here.
+ /dev/initrd refers to a RAM disk which was preloaded
+ by the boot loader; newer kernels use /dev/ram0 for
+ the initrd.
+
+ 2 char Pseudo-TTY masters
+ 0 = /dev/ptyp0 First PTY master
+ 1 = /dev/ptyp1 Second PTY master
+ ...
+ 255 = /dev/ptyef 256th PTY master
+
+ Pseudo-tty's are named as follows:
+ * Masters are "pty", slaves are "tty";
+ * the fourth letter is one of pqrstuvwxyzabcde indicating
+ the 1st through 16th series of 16 pseudo-ttys each, and
+ * the fifth letter is one of 0123456789abcdef indicating
+ the position within the series.
+
+ These are the old-style (BSD) PTY devices; Unix98
+ devices are on major 128 and above and use the PTY
+ master multiplex (/dev/ptmx) to acquire a PTY on
+ demand.
+
+ 2 block Floppy disks
+ 0 = /dev/fd0 Controller 0, drive 0, autodetect
+ 1 = /dev/fd1 Controller 0, drive 1, autodetect
+ 2 = /dev/fd2 Controller 0, drive 2, autodetect
+ 3 = /dev/fd3 Controller 0, drive 3, autodetect
+ 128 = /dev/fd4 Controller 1, drive 0, autodetect
+ 129 = /dev/fd5 Controller 1, drive 1, autodetect
+ 130 = /dev/fd6 Controller 1, drive 2, autodetect
+ 131 = /dev/fd7 Controller 1, drive 3, autodetect
+
+ To specify format, add to the autodetect device number:
+ 0 = /dev/fd? Autodetect format
+ 4 = /dev/fd?d360 5.25" 360K in a 360K drive(1)
+ 20 = /dev/fd?h360 5.25" 360K in a 1200K drive(1)
+ 48 = /dev/fd?h410 5.25" 410K in a 1200K drive
+ 64 = /dev/fd?h420 5.25" 420K in a 1200K drive
+ 24 = /dev/fd?h720 5.25" 720K in a 1200K drive
+ 80 = /dev/fd?h880 5.25" 880K in a 1200K drive(1)
+ 8 = /dev/fd?h1200 5.25" 1200K in a 1200K drive(1)
+ 40 = /dev/fd?h1440 5.25" 1440K in a 1200K drive(1)
+ 56 = /dev/fd?h1476 5.25" 1476K in a 1200K drive
+ 72 = /dev/fd?h1494 5.25" 1494K in a 1200K drive
+ 92 = /dev/fd?h1600 5.25" 1600K in a 1200K drive(1)
+
+ 12 = /dev/fd?u360 3.5" 360K Double Density(2)
+ 16 = /dev/fd?u720 3.5" 720K Double Density(1)
+ 120 = /dev/fd?u800 3.5" 800K Double Density(2)
+ 52 = /dev/fd?u820 3.5" 820K Double Density
+ 68 = /dev/fd?u830 3.5" 830K Double Density
+ 84 = /dev/fd?u1040 3.5" 1040K Double Density(1)
+ 88 = /dev/fd?u1120 3.5" 1120K Double Density(1)
+ 28 = /dev/fd?u1440 3.5" 1440K High Density(1)
+ 124 = /dev/fd?u1600 3.5" 1600K High Density(1)
+ 44 = /dev/fd?u1680 3.5" 1680K High Density(3)
+ 60 = /dev/fd?u1722 3.5" 1722K High Density
+ 76 = /dev/fd?u1743 3.5" 1743K High Density
+ 96 = /dev/fd?u1760 3.5" 1760K High Density
+ 116 = /dev/fd?u1840 3.5" 1840K High Density(3)
+ 100 = /dev/fd?u1920 3.5" 1920K High Density(1)
+ 32 = /dev/fd?u2880 3.5" 2880K Extra Density(1)
+ 104 = /dev/fd?u3200 3.5" 3200K Extra Density
+ 108 = /dev/fd?u3520 3.5" 3520K Extra Density
+ 112 = /dev/fd?u3840 3.5" 3840K Extra Density(1)
+
+ 36 = /dev/fd?CompaQ Compaq 2880K drive; obsolete?
+
+ (1) Autodetectable format
+ (2) Autodetectable format in a Double Density (720K) drive only
+ (3) Autodetectable format in a High Density (1440K) drive only
+
+ NOTE: The letter in the device name (d, q, h or u)
+ signifies the type of drive: 5.25" Double Density (d),
+ 5.25" Quad Density (q), 5.25" High Density (h) or 3.5"
+ (any model, u). The use of the capital letters D, H
+ and E for the 3.5" models have been deprecated, since
+ the drive type is insignificant for these devices.
+
+ 3 char Pseudo-TTY slaves
+ 0 = /dev/ttyp0 First PTY slave
+ 1 = /dev/ttyp1 Second PTY slave
+ ...
+ 255 = /dev/ttyef 256th PTY slave
+
+ These are the old-style (BSD) PTY devices; Unix98
+ devices are on major 136 and above.
+
+ 3 block First MFM, RLL and IDE hard disk/CD-ROM interface
+ 0 = /dev/hda Master: whole disk (or CD-ROM)
+ 64 = /dev/hdb Slave: whole disk (or CD-ROM)
+
+ For partitions, add to the whole disk device number:
+ 0 = /dev/hd? Whole disk
+ 1 = /dev/hd?1 First partition
+ 2 = /dev/hd?2 Second partition
+ ...
+ 63 = /dev/hd?63 63rd partition
+
+ For Linux/i386, partitions 1-4 are the primary
+ partitions, and 5 and above are logical partitions.
+ Other versions of Linux use partitioning schemes
+ appropriate to their respective architectures.
+
+ 4 char TTY devices
+ 0 = /dev/tty0 Current virtual console
+
+ 1 = /dev/tty1 First virtual console
+ ...
+ 63 = /dev/tty63 63rd virtual console
+ 64 = /dev/ttyS0 First UART serial port
+ ...
+ 255 = /dev/ttyS191 192nd UART serial port
+
+ UART serial ports refer to 8250/16450/16550 series devices.
+
+ Older versions of the Linux kernel used this major
+ number for BSD PTY devices. As of Linux 2.1.115, this
+ is no longer supported. Use major numbers 2 and 3.
+
+ 4 block Aliases for dynamically allocated major devices to be used
+ when its not possible to create the real device nodes
+ because the root filesystem is mounted read-only.
+
+ 0 = /dev/root
+
+ 5 char Alternate TTY devices
+ 0 = /dev/tty Current TTY device
+ 1 = /dev/console System console
+ 2 = /dev/ptmx PTY master multiplex
+ 3 = /dev/ttyprintk User messages via printk TTY device
+ 64 = /dev/cua0 Callout device for ttyS0
+ ...
+ 255 = /dev/cua191 Callout device for ttyS191
+
+ (5,1) is /dev/console starting with Linux 2.1.71. See
+ the section on terminal devices for more information
+ on /dev/console.
+
+ 6 char Parallel printer devices
+ 0 = /dev/lp0 Parallel printer on parport0
+ 1 = /dev/lp1 Parallel printer on parport1
+ ...
+
+ Current Linux kernels no longer have a fixed mapping
+ between parallel ports and I/O addresses. Instead,
+ they are redirected through the parport multiplex layer.
+
+ 7 char Virtual console capture devices
+ 0 = /dev/vcs Current vc text contents
+ 1 = /dev/vcs1 tty1 text contents
+ ...
+ 63 = /dev/vcs63 tty63 text contents
+ 128 = /dev/vcsa Current vc text/attribute contents
+ 129 = /dev/vcsa1 tty1 text/attribute contents
+ ...
+ 191 = /dev/vcsa63 tty63 text/attribute contents
+
+ NOTE: These devices permit both read and write access.
+
+ 7 block Loopback devices
+ 0 = /dev/loop0 First loop device
+ 1 = /dev/loop1 Second loop device
+ ...
+
+ The loop devices are used to mount filesystems not
+ associated with block devices. The binding to the
+ loop devices is handled by mount(8) or losetup(8).
+
+ 8 block SCSI disk devices (0-15)
+ 0 = /dev/sda First SCSI disk whole disk
+ 16 = /dev/sdb Second SCSI disk whole disk
+ 32 = /dev/sdc Third SCSI disk whole disk
+ ...
+ 240 = /dev/sdp Sixteenth SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 9 char SCSI tape devices
+ 0 = /dev/st0 First SCSI tape, mode 0
+ 1 = /dev/st1 Second SCSI tape, mode 0
+ ...
+ 32 = /dev/st0l First SCSI tape, mode 1
+ 33 = /dev/st1l Second SCSI tape, mode 1
+ ...
+ 64 = /dev/st0m First SCSI tape, mode 2
+ 65 = /dev/st1m Second SCSI tape, mode 2
+ ...
+ 96 = /dev/st0a First SCSI tape, mode 3
+ 97 = /dev/st1a Second SCSI tape, mode 3
+ ...
+ 128 = /dev/nst0 First SCSI tape, mode 0, no rewind
+ 129 = /dev/nst1 Second SCSI tape, mode 0, no rewind
+ ...
+ 160 = /dev/nst0l First SCSI tape, mode 1, no rewind
+ 161 = /dev/nst1l Second SCSI tape, mode 1, no rewind
+ ...
+ 192 = /dev/nst0m First SCSI tape, mode 2, no rewind
+ 193 = /dev/nst1m Second SCSI tape, mode 2, no rewind
+ ...
+ 224 = /dev/nst0a First SCSI tape, mode 3, no rewind
+ 225 = /dev/nst1a Second SCSI tape, mode 3, no rewind
+ ...
+
+ "No rewind" refers to the omission of the default
+ automatic rewind on device close. The MTREW or MTOFFL
+ ioctl()'s can be used to rewind the tape regardless of
+ the device used to access it.
+
+ 9 block Metadisk (RAID) devices
+ 0 = /dev/md0 First metadisk group
+ 1 = /dev/md1 Second metadisk group
+ ...
+
+ The metadisk driver is used to span a
+ filesystem across multiple physical disks.
+
+ 10 char Non-serial mice, misc features
+ 0 = /dev/logibm Logitech bus mouse
+ 1 = /dev/psaux PS/2-style mouse port
+ 2 = /dev/inportbm Microsoft Inport bus mouse
+ 3 = /dev/atibm ATI XL bus mouse
+ 4 = /dev/jbm J-mouse
+ 4 = /dev/amigamouse Amiga mouse (68k/Amiga)
+ 5 = /dev/atarimouse Atari mouse
+ 6 = /dev/sunmouse Sun mouse
+ 7 = /dev/amigamouse1 Second Amiga mouse
+ 8 = /dev/smouse Simple serial mouse driver
+ 9 = /dev/pc110pad IBM PC-110 digitizer pad
+ 10 = /dev/adbmouse Apple Desktop Bus mouse
+ 11 = /dev/vrtpanel Vr41xx embedded touch panel
+ 13 = /dev/vpcmouse Connectix Virtual PC Mouse
+ 14 = /dev/touchscreen/ucb1x00 UCB 1x00 touchscreen
+ 15 = /dev/touchscreen/mk712 MK712 touchscreen
+ 128 = /dev/beep Fancy beep device
+ 129 =
+ 130 = /dev/watchdog Watchdog timer port
+ 131 = /dev/temperature Machine internal temperature
+ 132 = /dev/hwtrap Hardware fault trap
+ 133 = /dev/exttrp External device trap
+ 134 = /dev/apm_bios Advanced Power Management BIOS
+ 135 = /dev/rtc Real Time Clock
+ 137 = /dev/vhci Bluetooth virtual HCI driver
+ 139 = /dev/openprom SPARC OpenBoot PROM
+ 140 = /dev/relay8 Berkshire Products Octal relay card
+ 141 = /dev/relay16 Berkshire Products ISO-16 relay card
+ 142 =
+ 143 = /dev/pciconf PCI configuration space
+ 144 = /dev/nvram Non-volatile configuration RAM
+ 145 = /dev/hfmodem Soundcard shortwave modem control
+ 146 = /dev/graphics Linux/SGI graphics device
+ 147 = /dev/opengl Linux/SGI OpenGL pipe
+ 148 = /dev/gfx Linux/SGI graphics effects device
+ 149 = /dev/input/mouse Linux/SGI Irix emulation mouse
+ 150 = /dev/input/keyboard Linux/SGI Irix emulation keyboard
+ 151 = /dev/led Front panel LEDs
+ 152 = /dev/kpoll Kernel Poll Driver
+ 153 = /dev/mergemem Memory merge device
+ 154 = /dev/pmu Macintosh PowerBook power manager
+ 155 = /dev/isictl MultiTech ISICom serial control
+ 156 = /dev/lcd Front panel LCD display
+ 157 = /dev/ac Applicom Intl Profibus card
+ 158 = /dev/nwbutton Netwinder external button
+ 159 = /dev/nwdebug Netwinder debug interface
+ 160 = /dev/nwflash Netwinder flash memory
+ 161 = /dev/userdma User-space DMA access
+ 162 = /dev/smbus System Management Bus
+ 163 = /dev/lik Logitech Internet Keyboard
+ 164 = /dev/ipmo Intel Intelligent Platform Management
+ 165 = /dev/vmmon VMware virtual machine monitor
+ 166 = /dev/i2o/ctl I2O configuration manager
+ 167 = /dev/specialix_sxctl Specialix serial control
+ 168 = /dev/tcldrv Technology Concepts serial control
+ 169 = /dev/specialix_rioctl Specialix RIO serial control
+ 170 = /dev/thinkpad/thinkpad IBM Thinkpad devices
+ 171 = /dev/srripc QNX4 API IPC manager
+ 172 = /dev/usemaclone Semaphore clone device
+ 173 = /dev/ipmikcs Intelligent Platform Management
+ 174 = /dev/uctrl SPARCbook 3 microcontroller
+ 175 = /dev/agpgart AGP Graphics Address Remapping Table
+ 176 = /dev/gtrsc Gorgy Timing radio clock
+ 177 = /dev/cbm Serial CBM bus
+ 178 = /dev/jsflash JavaStation OS flash SIMM
+ 179 = /dev/xsvc High-speed shared-mem/semaphore service
+ 180 = /dev/vrbuttons Vr41xx button input device
+ 181 = /dev/toshiba Toshiba laptop SMM support
+ 182 = /dev/perfctr Performance-monitoring counters
+ 183 = /dev/hwrng Generic random number generator
+ 184 = /dev/cpu/microcode CPU microcode update interface
+ 186 = /dev/atomicps Atomic shapshot of process state data
+ 187 = /dev/irnet IrNET device
+ 188 = /dev/smbusbios SMBus BIOS
+ 189 = /dev/ussp_ctl User space serial port control
+ 190 = /dev/crash Mission Critical Linux crash dump facility
+ 191 = /dev/pcl181 <information missing>
+ 192 = /dev/nas_xbus NAS xbus LCD/buttons access
+ 193 = /dev/d7s SPARC 7-segment display
+ 194 = /dev/zkshim Zero-Knowledge network shim control
+ 195 = /dev/elographics/e2201 Elographics touchscreen E271-2201
+ 196 = /dev/vfio/vfio VFIO userspace driver interface
+ 197 = /dev/pxa3xx-gcu PXA3xx graphics controller unit driver
+ 198 = /dev/sexec Signed executable interface
+ 199 = /dev/scanners/cuecat :CueCat barcode scanner
+ 200 = /dev/net/tun TAP/TUN network device
+ 201 = /dev/button/gulpb Transmeta GULP-B buttons
+ 202 = /dev/emd/ctl Enhanced Metadisk RAID (EMD) control
+ 203 = /dev/cuse Cuse (character device in user-space)
+ 204 = /dev/video/em8300 EM8300 DVD decoder control
+ 205 = /dev/video/em8300_mv EM8300 DVD decoder video
+ 206 = /dev/video/em8300_ma EM8300 DVD decoder audio
+ 207 = /dev/video/em8300_sp EM8300 DVD decoder subpicture
+ 208 = /dev/compaq/cpqphpc Compaq PCI Hot Plug Controller
+ 209 = /dev/compaq/cpqrid Compaq Remote Insight Driver
+ 210 = /dev/impi/bt IMPI coprocessor block transfer
+ 211 = /dev/impi/smic IMPI coprocessor stream interface
+ 212 = /dev/watchdogs/0 First watchdog device
+ 213 = /dev/watchdogs/1 Second watchdog device
+ 214 = /dev/watchdogs/2 Third watchdog device
+ 215 = /dev/watchdogs/3 Fourth watchdog device
+ 216 = /dev/fujitsu/apanel Fujitsu/Siemens application panel
+ 217 = /dev/ni/natmotn National Instruments Motion
+ 218 = /dev/kchuid Inter-process chuid control
+ 219 = /dev/modems/mwave MWave modem firmware upload
+ 220 = /dev/mptctl Message passing technology (MPT) control
+ 221 = /dev/mvista/hssdsi Montavista PICMG hot swap system driver
+ 222 = /dev/mvista/hasi Montavista PICMG high availability
+ 223 = /dev/input/uinput User level driver support for input
+ 224 = /dev/tpm TCPA TPM driver
+ 225 = /dev/pps Pulse Per Second driver
+ 226 = /dev/systrace Systrace device
+ 227 = /dev/mcelog X86_64 Machine Check Exception driver
+ 228 = /dev/hpet HPET driver
+ 229 = /dev/fuse Fuse (virtual filesystem in user-space)
+ 230 = /dev/midishare MidiShare driver
+ 231 = /dev/snapshot System memory snapshot device
+ 232 = /dev/kvm Kernel-based virtual machine (hardware virtualization extensions)
+ 233 = /dev/kmview View-OS A process with a view
+ 234 = /dev/btrfs-control Btrfs control device
+ 235 = /dev/autofs Autofs control device
+ 236 = /dev/mapper/control Device-Mapper control device
+ 237 = /dev/loop-control Loopback control device
+ 238 = /dev/vhost-net Host kernel accelerator for virtio net
+ 239 = /dev/uhid User-space I/O driver support for HID subsystem
+
+ 240-254 Reserved for local use
+ 255 Reserved for MISC_DYNAMIC_MINOR
+
+ 11 char Raw keyboard device (Linux/SPARC only)
+ 0 = /dev/kbd Raw keyboard device
+
+ 11 char Serial Mux device (Linux/PA-RISC only)
+ 0 = /dev/ttyB0 First mux port
+ 1 = /dev/ttyB1 Second mux port
+ ...
+
+ 11 block SCSI CD-ROM devices
+ 0 = /dev/scd0 First SCSI CD-ROM
+ 1 = /dev/scd1 Second SCSI CD-ROM
+ ...
+
+ The prefix /dev/sr (instead of /dev/scd) has been deprecated.
+
+ 12 char QIC-02 tape
+ 2 = /dev/ntpqic11 QIC-11, no rewind-on-close
+ 3 = /dev/tpqic11 QIC-11, rewind-on-close
+ 4 = /dev/ntpqic24 QIC-24, no rewind-on-close
+ 5 = /dev/tpqic24 QIC-24, rewind-on-close
+ 6 = /dev/ntpqic120 QIC-120, no rewind-on-close
+ 7 = /dev/tpqic120 QIC-120, rewind-on-close
+ 8 = /dev/ntpqic150 QIC-150, no rewind-on-close
+ 9 = /dev/tpqic150 QIC-150, rewind-on-close
+
+ The device names specified are proposed -- if there
+ are "standard" names for these devices, please let me know.
+
+ 12 block
+
+ 13 char Input core
+ 0 = /dev/input/js0 First joystick
+ 1 = /dev/input/js1 Second joystick
+ ...
+ 32 = /dev/input/mouse0 First mouse
+ 33 = /dev/input/mouse1 Second mouse
+ ...
+ 63 = /dev/input/mice Unified mouse
+ 64 = /dev/input/event0 First event queue
+ 65 = /dev/input/event1 Second event queue
+ ...
+
+ Each device type has 5 bits (32 minors).
+
+ 13 block Previously used for the XT disk (/dev/xdN)
+ Deleted in kernel v3.9.
+
+ 14 char Open Sound System (OSS)
+ 0 = /dev/mixer Mixer control
+ 1 = /dev/sequencer Audio sequencer
+ 2 = /dev/midi00 First MIDI port
+ 3 = /dev/dsp Digital audio
+ 4 = /dev/audio Sun-compatible digital audio
+ 6 =
+ 7 = /dev/audioctl SPARC audio control device
+ 8 = /dev/sequencer2 Sequencer -- alternate device
+ 16 = /dev/mixer1 Second soundcard mixer control
+ 17 = /dev/patmgr0 Sequencer patch manager
+ 18 = /dev/midi01 Second MIDI port
+ 19 = /dev/dsp1 Second soundcard digital audio
+ 20 = /dev/audio1 Second soundcard Sun digital audio
+ 33 = /dev/patmgr1 Sequencer patch manager
+ 34 = /dev/midi02 Third MIDI port
+ 50 = /dev/midi03 Fourth MIDI port
+
+ 14 block
+
+ 15 char Joystick
+ 0 = /dev/js0 First analog joystick
+ 1 = /dev/js1 Second analog joystick
+ ...
+ 128 = /dev/djs0 First digital joystick
+ 129 = /dev/djs1 Second digital joystick
+ ...
+ 15 block Sony CDU-31A/CDU-33A CD-ROM
+ 0 = /dev/sonycd Sony CDU-31a CD-ROM
+
+ 16 char Non-SCSI scanners
+ 0 = /dev/gs4500 Genius 4500 handheld scanner
+
+ 16 block GoldStar CD-ROM
+ 0 = /dev/gscd GoldStar CD-ROM
+
+ 17 char OBSOLETE (was Chase serial card)
+ 0 = /dev/ttyH0 First Chase port
+ 1 = /dev/ttyH1 Second Chase port
+ ...
+ 17 block Optics Storage CD-ROM
+ 0 = /dev/optcd Optics Storage CD-ROM
+
+ 18 char OBSOLETE (was Chase serial card - alternate devices)
+ 0 = /dev/cuh0 Callout device for ttyH0
+ 1 = /dev/cuh1 Callout device for ttyH1
+ ...
+ 18 block Sanyo CD-ROM
+ 0 = /dev/sjcd Sanyo CD-ROM
+
+ 19 char Cyclades serial card
+ 0 = /dev/ttyC0 First Cyclades port
+ ...
+ 31 = /dev/ttyC31 32nd Cyclades port
+
+ 19 block "Double" compressed disk
+ 0 = /dev/double0 First compressed disk
+ ...
+ 7 = /dev/double7 Eighth compressed disk
+ 128 = /dev/cdouble0 Mirror of first compressed disk
+ ...
+ 135 = /dev/cdouble7 Mirror of eighth compressed disk
+
+ See the Double documentation for the meaning of the
+ mirror devices.
+
+ 20 char Cyclades serial card - alternate devices
+ 0 = /dev/cub0 Callout device for ttyC0
+ ...
+ 31 = /dev/cub31 Callout device for ttyC31
+
+ 20 block Hitachi CD-ROM (under development)
+ 0 = /dev/hitcd Hitachi CD-ROM
+
+ 21 char Generic SCSI access
+ 0 = /dev/sg0 First generic SCSI device
+ 1 = /dev/sg1 Second generic SCSI device
+ ...
+
+ Most distributions name these /dev/sga, /dev/sgb...;
+ this sets an unnecessary limit of 26 SCSI devices in
+ the system and is counter to standard Linux
+ device-naming practice.
+
+ 21 block Acorn MFM hard drive interface
+ 0 = /dev/mfma First MFM drive whole disk
+ 64 = /dev/mfmb Second MFM drive whole disk
+
+ This device is used on the ARM-based Acorn RiscPC.
+ Partitions are handled the same way as for IDE disks
+ (see major number 3).
+
+ 22 char Digiboard serial card
+ 0 = /dev/ttyD0 First Digiboard port
+ 1 = /dev/ttyD1 Second Digiboard port
+ ...
+ 22 block Second IDE hard disk/CD-ROM interface
+ 0 = /dev/hdc Master: whole disk (or CD-ROM)
+ 64 = /dev/hdd Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 23 char Digiboard serial card - alternate devices
+ 0 = /dev/cud0 Callout device for ttyD0
+ 1 = /dev/cud1 Callout device for ttyD1
+ ...
+ 23 block Mitsumi proprietary CD-ROM
+ 0 = /dev/mcd Mitsumi CD-ROM
+
+ 24 char Stallion serial card
+ 0 = /dev/ttyE0 Stallion port 0 card 0
+ 1 = /dev/ttyE1 Stallion port 1 card 0
+ ...
+ 64 = /dev/ttyE64 Stallion port 0 card 1
+ 65 = /dev/ttyE65 Stallion port 1 card 1
+ ...
+ 128 = /dev/ttyE128 Stallion port 0 card 2
+ 129 = /dev/ttyE129 Stallion port 1 card 2
+ ...
+ 192 = /dev/ttyE192 Stallion port 0 card 3
+ 193 = /dev/ttyE193 Stallion port 1 card 3
+ ...
+ 24 block Sony CDU-535 CD-ROM
+ 0 = /dev/cdu535 Sony CDU-535 CD-ROM
+
+ 25 char Stallion serial card - alternate devices
+ 0 = /dev/cue0 Callout device for ttyE0
+ 1 = /dev/cue1 Callout device for ttyE1
+ ...
+ 64 = /dev/cue64 Callout device for ttyE64
+ 65 = /dev/cue65 Callout device for ttyE65
+ ...
+ 128 = /dev/cue128 Callout device for ttyE128
+ 129 = /dev/cue129 Callout device for ttyE129
+ ...
+ 192 = /dev/cue192 Callout device for ttyE192
+ 193 = /dev/cue193 Callout device for ttyE193
+ ...
+ 25 block First Matsushita (Panasonic/SoundBlaster) CD-ROM
+ 0 = /dev/sbpcd0 Panasonic CD-ROM controller 0 unit 0
+ 1 = /dev/sbpcd1 Panasonic CD-ROM controller 0 unit 1
+ 2 = /dev/sbpcd2 Panasonic CD-ROM controller 0 unit 2
+ 3 = /dev/sbpcd3 Panasonic CD-ROM controller 0 unit 3
+
+ 26 char
+
+ 26 block Second Matsushita (Panasonic/SoundBlaster) CD-ROM
+ 0 = /dev/sbpcd4 Panasonic CD-ROM controller 1 unit 0
+ 1 = /dev/sbpcd5 Panasonic CD-ROM controller 1 unit 1
+ 2 = /dev/sbpcd6 Panasonic CD-ROM controller 1 unit 2
+ 3 = /dev/sbpcd7 Panasonic CD-ROM controller 1 unit 3
+
+ 27 char QIC-117 tape
+ 0 = /dev/qft0 Unit 0, rewind-on-close
+ 1 = /dev/qft1 Unit 1, rewind-on-close
+ 2 = /dev/qft2 Unit 2, rewind-on-close
+ 3 = /dev/qft3 Unit 3, rewind-on-close
+ 4 = /dev/nqft0 Unit 0, no rewind-on-close
+ 5 = /dev/nqft1 Unit 1, no rewind-on-close
+ 6 = /dev/nqft2 Unit 2, no rewind-on-close
+ 7 = /dev/nqft3 Unit 3, no rewind-on-close
+ 16 = /dev/zqft0 Unit 0, rewind-on-close, compression
+ 17 = /dev/zqft1 Unit 1, rewind-on-close, compression
+ 18 = /dev/zqft2 Unit 2, rewind-on-close, compression
+ 19 = /dev/zqft3 Unit 3, rewind-on-close, compression
+ 20 = /dev/nzqft0 Unit 0, no rewind-on-close, compression
+ 21 = /dev/nzqft1 Unit 1, no rewind-on-close, compression
+ 22 = /dev/nzqft2 Unit 2, no rewind-on-close, compression
+ 23 = /dev/nzqft3 Unit 3, no rewind-on-close, compression
+ 32 = /dev/rawqft0 Unit 0, rewind-on-close, no file marks
+ 33 = /dev/rawqft1 Unit 1, rewind-on-close, no file marks
+ 34 = /dev/rawqft2 Unit 2, rewind-on-close, no file marks
+ 35 = /dev/rawqft3 Unit 3, rewind-on-close, no file marks
+ 36 = /dev/nrawqft0 Unit 0, no rewind-on-close, no file marks
+ 37 = /dev/nrawqft1 Unit 1, no rewind-on-close, no file marks
+ 38 = /dev/nrawqft2 Unit 2, no rewind-on-close, no file marks
+ 39 = /dev/nrawqft3 Unit 3, no rewind-on-close, no file marks
+
+ 27 block Third Matsushita (Panasonic/SoundBlaster) CD-ROM
+ 0 = /dev/sbpcd8 Panasonic CD-ROM controller 2 unit 0
+ 1 = /dev/sbpcd9 Panasonic CD-ROM controller 2 unit 1
+ 2 = /dev/sbpcd10 Panasonic CD-ROM controller 2 unit 2
+ 3 = /dev/sbpcd11 Panasonic CD-ROM controller 2 unit 3
+
+ 28 char Stallion serial card - card programming
+ 0 = /dev/staliomem0 First Stallion card I/O memory
+ 1 = /dev/staliomem1 Second Stallion card I/O memory
+ 2 = /dev/staliomem2 Third Stallion card I/O memory
+ 3 = /dev/staliomem3 Fourth Stallion card I/O memory
+
+ 28 char Atari SLM ACSI laser printer (68k/Atari)
+ 0 = /dev/slm0 First SLM laser printer
+ 1 = /dev/slm1 Second SLM laser printer
+ ...
+ 28 block Fourth Matsushita (Panasonic/SoundBlaster) CD-ROM
+ 0 = /dev/sbpcd12 Panasonic CD-ROM controller 3 unit 0
+ 1 = /dev/sbpcd13 Panasonic CD-ROM controller 3 unit 1
+ 2 = /dev/sbpcd14 Panasonic CD-ROM controller 3 unit 2
+ 3 = /dev/sbpcd15 Panasonic CD-ROM controller 3 unit 3
+
+ 28 block ACSI disk (68k/Atari)
+ 0 = /dev/ada First ACSI disk whole disk
+ 16 = /dev/adb Second ACSI disk whole disk
+ 32 = /dev/adc Third ACSI disk whole disk
+ ...
+ 240 = /dev/adp 16th ACSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15, like SCSI.
+
+ 29 char Universal frame buffer
+ 0 = /dev/fb0 First frame buffer
+ 1 = /dev/fb1 Second frame buffer
+ ...
+ 31 = /dev/fb31 32nd frame buffer
+
+ 29 block Aztech/Orchid/Okano/Wearnes CD-ROM
+ 0 = /dev/aztcd Aztech CD-ROM
+
+ 30 char iBCS-2 compatibility devices
+ 0 = /dev/socksys Socket access
+ 1 = /dev/spx SVR3 local X interface
+ 32 = /dev/inet/ip Network access
+ 33 = /dev/inet/icmp
+ 34 = /dev/inet/ggp
+ 35 = /dev/inet/ipip
+ 36 = /dev/inet/tcp
+ 37 = /dev/inet/egp
+ 38 = /dev/inet/pup
+ 39 = /dev/inet/udp
+ 40 = /dev/inet/idp
+ 41 = /dev/inet/rawip
+
+ Additionally, iBCS-2 requires the following links:
+
+ /dev/ip -> /dev/inet/ip
+ /dev/icmp -> /dev/inet/icmp
+ /dev/ggp -> /dev/inet/ggp
+ /dev/ipip -> /dev/inet/ipip
+ /dev/tcp -> /dev/inet/tcp
+ /dev/egp -> /dev/inet/egp
+ /dev/pup -> /dev/inet/pup
+ /dev/udp -> /dev/inet/udp
+ /dev/idp -> /dev/inet/idp
+ /dev/rawip -> /dev/inet/rawip
+ /dev/inet/arp -> /dev/inet/udp
+ /dev/inet/rip -> /dev/inet/udp
+ /dev/nfsd -> /dev/socksys
+ /dev/X0R -> /dev/null (? apparently not required ?)
+
+ 30 block Philips LMS CM-205 CD-ROM
+ 0 = /dev/cm205cd Philips LMS CM-205 CD-ROM
+
+ /dev/lmscd is an older name for this device. This
+ driver does not work with the CM-205MS CD-ROM.
+
+ 31 char MPU-401 MIDI
+ 0 = /dev/mpu401data MPU-401 data port
+ 1 = /dev/mpu401stat MPU-401 status port
+
+ 31 block ROM/flash memory card
+ 0 = /dev/rom0 First ROM card (rw)
+ ...
+ 7 = /dev/rom7 Eighth ROM card (rw)
+ 8 = /dev/rrom0 First ROM card (ro)
+ ...
+ 15 = /dev/rrom7 Eighth ROM card (ro)
+ 16 = /dev/flash0 First flash memory card (rw)
+ ...
+ 23 = /dev/flash7 Eighth flash memory card (rw)
+ 24 = /dev/rflash0 First flash memory card (ro)
+ ...
+ 31 = /dev/rflash7 Eighth flash memory card (ro)
+
+ The read-write (rw) devices support back-caching
+ written data in RAM, as well as writing to flash RAM
+ devices. The read-only devices (ro) support reading
+ only.
+
+ 32 char Specialix serial card
+ 0 = /dev/ttyX0 First Specialix port
+ 1 = /dev/ttyX1 Second Specialix port
+ ...
+ 32 block Philips LMS CM-206 CD-ROM
+ 0 = /dev/cm206cd Philips LMS CM-206 CD-ROM
+
+ 33 char Specialix serial card - alternate devices
+ 0 = /dev/cux0 Callout device for ttyX0
+ 1 = /dev/cux1 Callout device for ttyX1
+ ...
+ 33 block Third IDE hard disk/CD-ROM interface
+ 0 = /dev/hde Master: whole disk (or CD-ROM)
+ 64 = /dev/hdf Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 34 char Z8530 HDLC driver
+ 0 = /dev/scc0 First Z8530, first port
+ 1 = /dev/scc1 First Z8530, second port
+ 2 = /dev/scc2 Second Z8530, first port
+ 3 = /dev/scc3 Second Z8530, second port
+ ...
+
+ In a previous version these devices were named
+ /dev/sc1 for /dev/scc0, /dev/sc2 for /dev/scc1, and so
+ on.
+
+ 34 block Fourth IDE hard disk/CD-ROM interface
+ 0 = /dev/hdg Master: whole disk (or CD-ROM)
+ 64 = /dev/hdh Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 35 char tclmidi MIDI driver
+ 0 = /dev/midi0 First MIDI port, kernel timed
+ 1 = /dev/midi1 Second MIDI port, kernel timed
+ 2 = /dev/midi2 Third MIDI port, kernel timed
+ 3 = /dev/midi3 Fourth MIDI port, kernel timed
+ 64 = /dev/rmidi0 First MIDI port, untimed
+ 65 = /dev/rmidi1 Second MIDI port, untimed
+ 66 = /dev/rmidi2 Third MIDI port, untimed
+ 67 = /dev/rmidi3 Fourth MIDI port, untimed
+ 128 = /dev/smpte0 First MIDI port, SMPTE timed
+ 129 = /dev/smpte1 Second MIDI port, SMPTE timed
+ 130 = /dev/smpte2 Third MIDI port, SMPTE timed
+ 131 = /dev/smpte3 Fourth MIDI port, SMPTE timed
+
+ 35 block Slow memory ramdisk
+ 0 = /dev/slram Slow memory ramdisk
+
+ 36 char Netlink support
+ 0 = /dev/route Routing, device updates, kernel to user
+ 1 = /dev/skip enSKIP security cache control
+ 3 = /dev/fwmonitor Firewall packet copies
+ 16 = /dev/tap0 First Ethertap device
+ ...
+ 31 = /dev/tap15 16th Ethertap device
+
+ 36 block OBSOLETE (was MCA ESDI hard disk)
+
+ 37 char IDE tape
+ 0 = /dev/ht0 First IDE tape
+ 1 = /dev/ht1 Second IDE tape
+ ...
+ 128 = /dev/nht0 First IDE tape, no rewind-on-close
+ 129 = /dev/nht1 Second IDE tape, no rewind-on-close
+ ...
+
+ Currently, only one IDE tape drive is supported.
+
+ 37 block Zorro II ramdisk
+ 0 = /dev/z2ram Zorro II ramdisk
+
+ 38 char Myricom PCI Myrinet board
+ 0 = /dev/mlanai0 First Myrinet board
+ 1 = /dev/mlanai1 Second Myrinet board
+ ...
+
+ This device is used for status query, board control
+ and "user level packet I/O." This board is also
+ accessible as a standard networking "eth" device.
+
+ 38 block OBSOLETE (was Linux/AP+)
+
+ 39 char ML-16P experimental I/O board
+ 0 = /dev/ml16pa-a0 First card, first analog channel
+ 1 = /dev/ml16pa-a1 First card, second analog channel
+ ...
+ 15 = /dev/ml16pa-a15 First card, 16th analog channel
+ 16 = /dev/ml16pa-d First card, digital lines
+ 17 = /dev/ml16pa-c0 First card, first counter/timer
+ 18 = /dev/ml16pa-c1 First card, second counter/timer
+ 19 = /dev/ml16pa-c2 First card, third counter/timer
+ 32 = /dev/ml16pb-a0 Second card, first analog channel
+ 33 = /dev/ml16pb-a1 Second card, second analog channel
+ ...
+ 47 = /dev/ml16pb-a15 Second card, 16th analog channel
+ 48 = /dev/ml16pb-d Second card, digital lines
+ 49 = /dev/ml16pb-c0 Second card, first counter/timer
+ 50 = /dev/ml16pb-c1 Second card, second counter/timer
+ 51 = /dev/ml16pb-c2 Second card, third counter/timer
+ ...
+ 39 block
+
+ 40 char
+
+ 40 block
+
+ 41 char Yet Another Micro Monitor
+ 0 = /dev/yamm Yet Another Micro Monitor
+
+ 41 block
+
+ 42 char Demo/sample use
+
+ 42 block Demo/sample use
+
+ This number is intended for use in sample code, as
+ well as a general "example" device number. It
+ should never be used for a device driver that is being
+ distributed; either obtain an official number or use
+ the local/experimental range. The sudden addition or
+ removal of a driver with this number should not cause
+ ill effects to the system (bugs excepted.)
+
+ IN PARTICULAR, ANY DISTRIBUTION WHICH CONTAINS A
+ DEVICE DRIVER USING MAJOR NUMBER 42 IS NONCOMPLIANT.
+
+ 43 char isdn4linux virtual modem
+ 0 = /dev/ttyI0 First virtual modem
+ ...
+ 63 = /dev/ttyI63 64th virtual modem
+
+ 43 block Network block devices
+ 0 = /dev/nb0 First network block device
+ 1 = /dev/nb1 Second network block device
+ ...
+
+ Network Block Device is somehow similar to loopback
+ devices: If you read from it, it sends packet across
+ network asking server for data. If you write to it, it
+ sends packet telling server to write. It could be used
+ to mounting filesystems over the net, swapping over
+ the net, implementing block device in userland etc.
+
+ 44 char isdn4linux virtual modem - alternate devices
+ 0 = /dev/cui0 Callout device for ttyI0
+ ...
+ 63 = /dev/cui63 Callout device for ttyI63
+
+ 44 block Flash Translation Layer (FTL) filesystems
+ 0 = /dev/ftla FTL on first Memory Technology Device
+ 16 = /dev/ftlb FTL on second Memory Technology Device
+ 32 = /dev/ftlc FTL on third Memory Technology Device
+ ...
+ 240 = /dev/ftlp FTL on 16th Memory Technology Device
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the partition
+ limit is 15 rather than 63 per disk (same as SCSI.)
+
+ 45 char isdn4linux ISDN BRI driver
+ 0 = /dev/isdn0 First virtual B channel raw data
+ ...
+ 63 = /dev/isdn63 64th virtual B channel raw data
+ 64 = /dev/isdnctrl0 First channel control/debug
+ ...
+ 127 = /dev/isdnctrl63 64th channel control/debug
+
+ 128 = /dev/ippp0 First SyncPPP device
+ ...
+ 191 = /dev/ippp63 64th SyncPPP device
+
+ 255 = /dev/isdninfo ISDN monitor interface
+
+ 45 block Parallel port IDE disk devices
+ 0 = /dev/pda First parallel port IDE disk
+ 16 = /dev/pdb Second parallel port IDE disk
+ 32 = /dev/pdc Third parallel port IDE disk
+ 48 = /dev/pdd Fourth parallel port IDE disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the partition
+ limit is 15 rather than 63 per disk.
+
+ 46 char Comtrol Rocketport serial card
+ 0 = /dev/ttyR0 First Rocketport port
+ 1 = /dev/ttyR1 Second Rocketport port
+ ...
+ 46 block Parallel port ATAPI CD-ROM devices
+ 0 = /dev/pcd0 First parallel port ATAPI CD-ROM
+ 1 = /dev/pcd1 Second parallel port ATAPI CD-ROM
+ 2 = /dev/pcd2 Third parallel port ATAPI CD-ROM
+ 3 = /dev/pcd3 Fourth parallel port ATAPI CD-ROM
+
+ 47 char Comtrol Rocketport serial card - alternate devices
+ 0 = /dev/cur0 Callout device for ttyR0
+ 1 = /dev/cur1 Callout device for ttyR1
+ ...
+ 47 block Parallel port ATAPI disk devices
+ 0 = /dev/pf0 First parallel port ATAPI disk
+ 1 = /dev/pf1 Second parallel port ATAPI disk
+ 2 = /dev/pf2 Third parallel port ATAPI disk
+ 3 = /dev/pf3 Fourth parallel port ATAPI disk
+
+ This driver is intended for floppy disks and similar
+ devices and hence does not support partitioning.
+
+ 48 char SDL RISCom serial card
+ 0 = /dev/ttyL0 First RISCom port
+ 1 = /dev/ttyL1 Second RISCom port
+ ...
+ 48 block Mylex DAC960 PCI RAID controller; first controller
+ 0 = /dev/rd/c0d0 First disk, whole disk
+ 8 = /dev/rd/c0d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c0d31 32nd disk, whole disk
+
+ For partitions add:
+ 0 = /dev/rd/c?d? Whole disk
+ 1 = /dev/rd/c?d?p1 First partition
+ ...
+ 7 = /dev/rd/c?d?p7 Seventh partition
+
+ 49 char SDL RISCom serial card - alternate devices
+ 0 = /dev/cul0 Callout device for ttyL0
+ 1 = /dev/cul1 Callout device for ttyL1
+ ...
+ 49 block Mylex DAC960 PCI RAID controller; second controller
+ 0 = /dev/rd/c1d0 First disk, whole disk
+ 8 = /dev/rd/c1d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c1d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+ 50 char Reserved for GLINT
+
+ 50 block Mylex DAC960 PCI RAID controller; third controller
+ 0 = /dev/rd/c2d0 First disk, whole disk
+ 8 = /dev/rd/c2d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c2d31 32nd disk, whole disk
+
+ 51 char Baycom radio modem OR Radio Tech BIM-XXX-RS232 radio modem
+ 0 = /dev/bc0 First Baycom radio modem
+ 1 = /dev/bc1 Second Baycom radio modem
+ ...
+ 51 block Mylex DAC960 PCI RAID controller; fourth controller
+ 0 = /dev/rd/c3d0 First disk, whole disk
+ 8 = /dev/rd/c3d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c3d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+ 52 char Spellcaster DataComm/BRI ISDN card
+ 0 = /dev/dcbri0 First DataComm card
+ 1 = /dev/dcbri1 Second DataComm card
+ 2 = /dev/dcbri2 Third DataComm card
+ 3 = /dev/dcbri3 Fourth DataComm card
+
+ 52 block Mylex DAC960 PCI RAID controller; fifth controller
+ 0 = /dev/rd/c4d0 First disk, whole disk
+ 8 = /dev/rd/c4d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c4d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+ 53 char BDM interface for remote debugging MC683xx microcontrollers
+ 0 = /dev/pd_bdm0 PD BDM interface on lp0
+ 1 = /dev/pd_bdm1 PD BDM interface on lp1
+ 2 = /dev/pd_bdm2 PD BDM interface on lp2
+ 4 = /dev/icd_bdm0 ICD BDM interface on lp0
+ 5 = /dev/icd_bdm1 ICD BDM interface on lp1
+ 6 = /dev/icd_bdm2 ICD BDM interface on lp2
+
+ This device is used for the interfacing to the MC683xx
+ microcontrollers via Background Debug Mode by use of a
+ Parallel Port interface. PD is the Motorola Public
+ Domain Interface and ICD is the commercial interface
+ by P&E.
+
+ 53 block Mylex DAC960 PCI RAID controller; sixth controller
+ 0 = /dev/rd/c5d0 First disk, whole disk
+ 8 = /dev/rd/c5d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c5d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+ 54 char Electrocardiognosis Holter serial card
+ 0 = /dev/holter0 First Holter port
+ 1 = /dev/holter1 Second Holter port
+ 2 = /dev/holter2 Third Holter port
+
+ A custom serial card used by Electrocardiognosis SRL
+ <mseritan@ottonel.pub.ro> to transfer data from Holter
+ 24-hour heart monitoring equipment.
+
+ 54 block Mylex DAC960 PCI RAID controller; seventh controller
+ 0 = /dev/rd/c6d0 First disk, whole disk
+ 8 = /dev/rd/c6d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c6d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+ 55 char DSP56001 digital signal processor
+ 0 = /dev/dsp56k First DSP56001
+
+ 55 block Mylex DAC960 PCI RAID controller; eighth controller
+ 0 = /dev/rd/c7d0 First disk, whole disk
+ 8 = /dev/rd/c7d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c7d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+ 56 char Apple Desktop Bus
+ 0 = /dev/adb ADB bus control
+
+ Additional devices will be added to this number, all
+ starting with /dev/adb.
+
+ 56 block Fifth IDE hard disk/CD-ROM interface
+ 0 = /dev/hdi Master: whole disk (or CD-ROM)
+ 64 = /dev/hdj Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 57 char Hayes ESP serial card
+ 0 = /dev/ttyP0 First ESP port
+ 1 = /dev/ttyP1 Second ESP port
+ ...
+
+ 57 block Sixth IDE hard disk/CD-ROM interface
+ 0 = /dev/hdk Master: whole disk (or CD-ROM)
+ 64 = /dev/hdl Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 58 char Hayes ESP serial card - alternate devices
+ 0 = /dev/cup0 Callout device for ttyP0
+ 1 = /dev/cup1 Callout device for ttyP1
+ ...
+
+ 58 block Reserved for logical volume manager
+
+ 59 char sf firewall package
+ 0 = /dev/firewall Communication with sf kernel module
+
+ 59 block Generic PDA filesystem device
+ 0 = /dev/pda0 First PDA device
+ 1 = /dev/pda1 Second PDA device
+ ...
+
+ The pda devices are used to mount filesystems on
+ remote pda's (basically slow handheld machines with
+ proprietary OS's and limited memory and storage
+ running small fs translation drivers) through serial /
+ IRDA / parallel links.
+
+ NAMING CONFLICT -- PROPOSED REVISED NAME /dev/rpda0 etc
+
+ 60-63 char LOCAL/EXPERIMENTAL USE
+
+ 60-63 block LOCAL/EXPERIMENTAL USE
+ Allocated for local/experimental use. For devices not
+ assigned official numbers, these ranges should be
+ used in order to avoid conflicting with future assignments.
+
+ 64 char ENskip kernel encryption package
+ 0 = /dev/enskip Communication with ENskip kernel module
+
+ 64 block Scramdisk/DriveCrypt encrypted devices
+ 0 = /dev/scramdisk/master Master node for ioctls
+ 1 = /dev/scramdisk/1 First encrypted device
+ 2 = /dev/scramdisk/2 Second encrypted device
+ ...
+ 255 = /dev/scramdisk/255 255th encrypted device
+
+ The filename of the encrypted container and the passwords
+ are sent via ioctls (using the sdmount tool) to the master
+ node which then activates them via one of the
+ /dev/scramdisk/x nodes for loop mounting (all handled
+ through the sdmount tool).
+
+ Requested by: andy@scramdisklinux.org
+
+ 65 char Sundance "plink" Transputer boards (obsolete, unused)
+ 0 = /dev/plink0 First plink device
+ 1 = /dev/plink1 Second plink device
+ 2 = /dev/plink2 Third plink device
+ 3 = /dev/plink3 Fourth plink device
+ 64 = /dev/rplink0 First plink device, raw
+ 65 = /dev/rplink1 Second plink device, raw
+ 66 = /dev/rplink2 Third plink device, raw
+ 67 = /dev/rplink3 Fourth plink device, raw
+ 128 = /dev/plink0d First plink device, debug
+ 129 = /dev/plink1d Second plink device, debug
+ 130 = /dev/plink2d Third plink device, debug
+ 131 = /dev/plink3d Fourth plink device, debug
+ 192 = /dev/rplink0d First plink device, raw, debug
+ 193 = /dev/rplink1d Second plink device, raw, debug
+ 194 = /dev/rplink2d Third plink device, raw, debug
+ 195 = /dev/rplink3d Fourth plink device, raw, debug
+
+ This is a commercial driver; contact James Howes
+ <jth@prosig.demon.co.uk> for information.
+
+ 65 block SCSI disk devices (16-31)
+ 0 = /dev/sdq 17th SCSI disk whole disk
+ 16 = /dev/sdr 18th SCSI disk whole disk
+ 32 = /dev/sds 19th SCSI disk whole disk
+ ...
+ 240 = /dev/sdaf 32nd SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 66 char YARC PowerPC PCI coprocessor card
+ 0 = /dev/yppcpci0 First YARC card
+ 1 = /dev/yppcpci1 Second YARC card
+ ...
+
+ 66 block SCSI disk devices (32-47)
+ 0 = /dev/sdag 33th SCSI disk whole disk
+ 16 = /dev/sdah 34th SCSI disk whole disk
+ 32 = /dev/sdai 35th SCSI disk whole disk
+ ...
+ 240 = /dev/sdav 48nd SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 67 char Coda network file system
+ 0 = /dev/cfs0 Coda cache manager
+
+ See http://www.coda.cs.cmu.edu for information about Coda.
+
+ 67 block SCSI disk devices (48-63)
+ 0 = /dev/sdaw 49th SCSI disk whole disk
+ 16 = /dev/sdax 50th SCSI disk whole disk
+ 32 = /dev/sday 51st SCSI disk whole disk
+ ...
+ 240 = /dev/sdbl 64th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 68 char CAPI 2.0 interface
+ 0 = /dev/capi20 Control device
+ 1 = /dev/capi20.00 First CAPI 2.0 application
+ 2 = /dev/capi20.01 Second CAPI 2.0 application
+ ...
+ 20 = /dev/capi20.19 19th CAPI 2.0 application
+
+ ISDN CAPI 2.0 driver for use with CAPI 2.0
+ applications; currently supports the AVM B1 card.
+
+ 68 block SCSI disk devices (64-79)
+ 0 = /dev/sdbm 65th SCSI disk whole disk
+ 16 = /dev/sdbn 66th SCSI disk whole disk
+ 32 = /dev/sdbo 67th SCSI disk whole disk
+ ...
+ 240 = /dev/sdcb 80th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 69 char MA16 numeric accelerator card
+ 0 = /dev/ma16 Board memory access
+
+ 69 block SCSI disk devices (80-95)
+ 0 = /dev/sdcc 81st SCSI disk whole disk
+ 16 = /dev/sdcd 82nd SCSI disk whole disk
+ 32 = /dev/sdce 83th SCSI disk whole disk
+ ...
+ 240 = /dev/sdcr 96th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 70 char SpellCaster Protocol Services Interface
+ 0 = /dev/apscfg Configuration interface
+ 1 = /dev/apsauth Authentication interface
+ 2 = /dev/apslog Logging interface
+ 3 = /dev/apsdbg Debugging interface
+ 64 = /dev/apsisdn ISDN command interface
+ 65 = /dev/apsasync Async command interface
+ 128 = /dev/apsmon Monitor interface
+
+ 70 block SCSI disk devices (96-111)
+ 0 = /dev/sdcs 97th SCSI disk whole disk
+ 16 = /dev/sdct 98th SCSI disk whole disk
+ 32 = /dev/sdcu 99th SCSI disk whole disk
+ ...
+ 240 = /dev/sddh 112nd SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 71 char Computone IntelliPort II serial card
+ 0 = /dev/ttyF0 IntelliPort II board 0, port 0
+ 1 = /dev/ttyF1 IntelliPort II board 0, port 1
+ ...
+ 63 = /dev/ttyF63 IntelliPort II board 0, port 63
+ 64 = /dev/ttyF64 IntelliPort II board 1, port 0
+ 65 = /dev/ttyF65 IntelliPort II board 1, port 1
+ ...
+ 127 = /dev/ttyF127 IntelliPort II board 1, port 63
+ 128 = /dev/ttyF128 IntelliPort II board 2, port 0
+ 129 = /dev/ttyF129 IntelliPort II board 2, port 1
+ ...
+ 191 = /dev/ttyF191 IntelliPort II board 2, port 63
+ 192 = /dev/ttyF192 IntelliPort II board 3, port 0
+ 193 = /dev/ttyF193 IntelliPort II board 3, port 1
+ ...
+ 255 = /dev/ttyF255 IntelliPort II board 3, port 63
+
+ 71 block SCSI disk devices (112-127)
+ 0 = /dev/sddi 113th SCSI disk whole disk
+ 16 = /dev/sddj 114th SCSI disk whole disk
+ 32 = /dev/sddk 115th SCSI disk whole disk
+ ...
+ 240 = /dev/sddx 128th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 72 char Computone IntelliPort II serial card - alternate devices
+ 0 = /dev/cuf0 Callout device for ttyF0
+ 1 = /dev/cuf1 Callout device for ttyF1
+ ...
+ 63 = /dev/cuf63 Callout device for ttyF63
+ 64 = /dev/cuf64 Callout device for ttyF64
+ 65 = /dev/cuf65 Callout device for ttyF65
+ ...
+ 127 = /dev/cuf127 Callout device for ttyF127
+ 128 = /dev/cuf128 Callout device for ttyF128
+ 129 = /dev/cuf129 Callout device for ttyF129
+ ...
+ 191 = /dev/cuf191 Callout device for ttyF191
+ 192 = /dev/cuf192 Callout device for ttyF192
+ 193 = /dev/cuf193 Callout device for ttyF193
+ ...
+ 255 = /dev/cuf255 Callout device for ttyF255
+
+ 72 block Compaq Intelligent Drive Array, first controller
+ 0 = /dev/ida/c0d0 First logical drive whole disk
+ 16 = /dev/ida/c0d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c0d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 73 char Computone IntelliPort II serial card - control devices
+ 0 = /dev/ip2ipl0 Loadware device for board 0
+ 1 = /dev/ip2stat0 Status device for board 0
+ 4 = /dev/ip2ipl1 Loadware device for board 1
+ 5 = /dev/ip2stat1 Status device for board 1
+ 8 = /dev/ip2ipl2 Loadware device for board 2
+ 9 = /dev/ip2stat2 Status device for board 2
+ 12 = /dev/ip2ipl3 Loadware device for board 3
+ 13 = /dev/ip2stat3 Status device for board 3
+
+ 73 block Compaq Intelligent Drive Array, second controller
+ 0 = /dev/ida/c1d0 First logical drive whole disk
+ 16 = /dev/ida/c1d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c1d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 74 char SCI bridge
+ 0 = /dev/SCI/0 SCI device 0
+ 1 = /dev/SCI/1 SCI device 1
+ ...
+
+ Currently for Dolphin Interconnect Solutions' PCI-SCI
+ bridge.
+
+ 74 block Compaq Intelligent Drive Array, third controller
+ 0 = /dev/ida/c2d0 First logical drive whole disk
+ 16 = /dev/ida/c2d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c2d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 75 char Specialix IO8+ serial card
+ 0 = /dev/ttyW0 First IO8+ port, first card
+ 1 = /dev/ttyW1 Second IO8+ port, first card
+ ...
+ 8 = /dev/ttyW8 First IO8+ port, second card
+ ...
+
+ 75 block Compaq Intelligent Drive Array, fourth controller
+ 0 = /dev/ida/c3d0 First logical drive whole disk
+ 16 = /dev/ida/c3d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c3d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 76 char Specialix IO8+ serial card - alternate devices
+ 0 = /dev/cuw0 Callout device for ttyW0
+ 1 = /dev/cuw1 Callout device for ttyW1
+ ...
+ 8 = /dev/cuw8 Callout device for ttyW8
+ ...
+
+ 76 block Compaq Intelligent Drive Array, fifth controller
+ 0 = /dev/ida/c4d0 First logical drive whole disk
+ 16 = /dev/ida/c4d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c4d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+
+ 77 char ComScire Quantum Noise Generator
+ 0 = /dev/qng ComScire Quantum Noise Generator
+
+ 77 block Compaq Intelligent Drive Array, sixth controller
+ 0 = /dev/ida/c5d0 First logical drive whole disk
+ 16 = /dev/ida/c5d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c5d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 78 char PAM Software's multimodem boards
+ 0 = /dev/ttyM0 First PAM modem
+ 1 = /dev/ttyM1 Second PAM modem
+ ...
+
+ 78 block Compaq Intelligent Drive Array, seventh controller
+ 0 = /dev/ida/c6d0 First logical drive whole disk
+ 16 = /dev/ida/c6d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c6d15 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 79 char PAM Software's multimodem boards - alternate devices
+ 0 = /dev/cum0 Callout device for ttyM0
+ 1 = /dev/cum1 Callout device for ttyM1
+ ...
+
+ 79 block Compaq Intelligent Drive Array, eighth controller
+ 0 = /dev/ida/c7d0 First logical drive whole disk
+ 16 = /dev/ida/c7d1 Second logical drive whole disk
+ ...
+ 240 = /dev/ida/c715 16th logical drive whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+ 80 char Photometrics AT200 CCD camera
+ 0 = /dev/at200 Photometrics AT200 CCD camera
+
+ 80 block I2O hard disk
+ 0 = /dev/i2o/hda First I2O hard disk, whole disk
+ 16 = /dev/i2o/hdb Second I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hdp 16th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 81 char video4linux
+ 0 = /dev/video0 Video capture/overlay device
+ ...
+ 63 = /dev/video63 Video capture/overlay device
+ 64 = /dev/radio0 Radio device
+ ...
+ 127 = /dev/radio63 Radio device
+ 128 = /dev/swradio0 Software Defined Radio device
+ ...
+ 191 = /dev/swradio63 Software Defined Radio device
+ 224 = /dev/vbi0 Vertical blank interrupt
+ ...
+ 255 = /dev/vbi31 Vertical blank interrupt
+
+ Minor numbers are allocated dynamically unless
+ CONFIG_VIDEO_FIXED_MINOR_RANGES (default n)
+ configuration option is set.
+
+ 81 block I2O hard disk
+ 0 = /dev/i2o/hdq 17th I2O hard disk, whole disk
+ 16 = /dev/i2o/hdr 18th I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hdaf 32nd I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 82 char WiNRADiO communications receiver card
+ 0 = /dev/winradio0 First WiNRADiO card
+ 1 = /dev/winradio1 Second WiNRADiO card
+ ...
+
+ The driver and documentation may be obtained from
+ http://www.winradio.com/
+
+ 82 block I2O hard disk
+ 0 = /dev/i2o/hdag 33rd I2O hard disk, whole disk
+ 16 = /dev/i2o/hdah 34th I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hdav 48th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 83 char Matrox mga_vid video driver
+ 0 = /dev/mga_vid0 1st video card
+ 1 = /dev/mga_vid1 2nd video card
+ 2 = /dev/mga_vid2 3rd video card
+ ...
+ 15 = /dev/mga_vid15 16th video card
+
+ 83 block I2O hard disk
+ 0 = /dev/i2o/hdaw 49th I2O hard disk, whole disk
+ 16 = /dev/i2o/hdax 50th I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hdbl 64th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 84 char Ikon 1011[57] Versatec Greensheet Interface
+ 0 = /dev/ihcp0 First Greensheet port
+ 1 = /dev/ihcp1 Second Greensheet port
+
+ 84 block I2O hard disk
+ 0 = /dev/i2o/hdbm 65th I2O hard disk, whole disk
+ 16 = /dev/i2o/hdbn 66th I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hdcb 80th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 85 char Linux/SGI shared memory input queue
+ 0 = /dev/shmiq Master shared input queue
+ 1 = /dev/qcntl0 First device pushed
+ 2 = /dev/qcntl1 Second device pushed
+ ...
+
+ 85 block I2O hard disk
+ 0 = /dev/i2o/hdcc 81st I2O hard disk, whole disk
+ 16 = /dev/i2o/hdcd 82nd I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hdcr 96th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 86 char SCSI media changer
+ 0 = /dev/sch0 First SCSI media changer
+ 1 = /dev/sch1 Second SCSI media changer
+ ...
+
+ 86 block I2O hard disk
+ 0 = /dev/i2o/hdcs 97th I2O hard disk, whole disk
+ 16 = /dev/i2o/hdct 98th I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hddh 112th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 87 char Sony Control-A1 stereo control bus
+ 0 = /dev/controla0 First device on chain
+ 1 = /dev/controla1 Second device on chain
+ ...
+
+ 87 block I2O hard disk
+ 0 = /dev/i2o/hddi 113rd I2O hard disk, whole disk
+ 16 = /dev/i2o/hddj 114th I2O hard disk, whole disk
+ ...
+ 240 = /dev/i2o/hddx 128th I2O hard disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 88 char COMX synchronous serial card
+ 0 = /dev/comx0 COMX channel 0
+ 1 = /dev/comx1 COMX channel 1
+ ...
+
+ 88 block Seventh IDE hard disk/CD-ROM interface
+ 0 = /dev/hdm Master: whole disk (or CD-ROM)
+ 64 = /dev/hdn Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 89 char I2C bus interface
+ 0 = /dev/i2c-0 First I2C adapter
+ 1 = /dev/i2c-1 Second I2C adapter
+ ...
+
+ 89 block Eighth IDE hard disk/CD-ROM interface
+ 0 = /dev/hdo Master: whole disk (or CD-ROM)
+ 64 = /dev/hdp Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 90 char Memory Technology Device (RAM, ROM, Flash)
+ 0 = /dev/mtd0 First MTD (rw)
+ 1 = /dev/mtdr0 First MTD (ro)
+ ...
+ 30 = /dev/mtd15 16th MTD (rw)
+ 31 = /dev/mtdr15 16th MTD (ro)
+
+ 90 block Ninth IDE hard disk/CD-ROM interface
+ 0 = /dev/hdq Master: whole disk (or CD-ROM)
+ 64 = /dev/hdr Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 91 char CAN-Bus devices
+ 0 = /dev/can0 First CAN-Bus controller
+ 1 = /dev/can1 Second CAN-Bus controller
+ ...
+
+ 91 block Tenth IDE hard disk/CD-ROM interface
+ 0 = /dev/hds Master: whole disk (or CD-ROM)
+ 64 = /dev/hdt Slave: whole disk (or CD-ROM)
+
+ Partitions are handled the same way as for the first
+ interface (see major number 3).
+
+ 92 char Reserved for ith Kommunikationstechnik MIC ISDN card
+
+ 92 block PPDD encrypted disk driver
+ 0 = /dev/ppdd0 First encrypted disk
+ 1 = /dev/ppdd1 Second encrypted disk
+ ...
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ 93 char
+
+ 93 block NAND Flash Translation Layer filesystem
+ 0 = /dev/nftla First NFTL layer
+ 16 = /dev/nftlb Second NFTL layer
+ ...
+ 240 = /dev/nftlp 16th NTFL layer
+
+ 94 char
+
+ 94 block IBM S/390 DASD block storage
+ 0 = /dev/dasda First DASD device, major
+ 1 = /dev/dasda1 First DASD device, block 1
+ 2 = /dev/dasda2 First DASD device, block 2
+ 3 = /dev/dasda3 First DASD device, block 3
+ 4 = /dev/dasdb Second DASD device, major
+ 5 = /dev/dasdb1 Second DASD device, block 1
+ 6 = /dev/dasdb2 Second DASD device, block 2
+ 7 = /dev/dasdb3 Second DASD device, block 3
+ ...
+
+ 95 char IP filter
+ 0 = /dev/ipl Filter control device/log file
+ 1 = /dev/ipnat NAT control device/log file
+ 2 = /dev/ipstate State information log file
+ 3 = /dev/ipauth Authentication control device/log file
+ ...
+
+ 96 char Parallel port ATAPI tape devices
+ 0 = /dev/pt0 First parallel port ATAPI tape
+ 1 = /dev/pt1 Second parallel port ATAPI tape
+ ...
+ 128 = /dev/npt0 First p.p. ATAPI tape, no rewind
+ 129 = /dev/npt1 Second p.p. ATAPI tape, no rewind
+ ...
+
+ 96 block Inverse NAND Flash Translation Layer
+ 0 = /dev/inftla First INFTL layer
+ 16 = /dev/inftlb Second INFTL layer
+ ...
+ 240 = /dev/inftlp 16th INTFL layer
+
+ 97 char Parallel port generic ATAPI interface
+ 0 = /dev/pg0 First parallel port ATAPI device
+ 1 = /dev/pg1 Second parallel port ATAPI device
+ 2 = /dev/pg2 Third parallel port ATAPI device
+ 3 = /dev/pg3 Fourth parallel port ATAPI device
+
+ These devices support the same API as the generic SCSI
+ devices.
+
+ 98 char Control and Measurement Device (comedi)
+ 0 = /dev/comedi0 First comedi device
+ 1 = /dev/comedi1 Second comedi device
+ ...
+
+ See http://stm.lbl.gov/comedi.
+
+ 98 block User-mode virtual block device
+ 0 = /dev/ubda First user-mode block device
+ 16 = /dev/udbb Second user-mode block device
+ ...
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+ This device is used by the user-mode virtual kernel port.
+
+ 99 char Raw parallel ports
+ 0 = /dev/parport0 First parallel port
+ 1 = /dev/parport1 Second parallel port
+ ...
+
+ 99 block JavaStation flash disk
+ 0 = /dev/jsfd JavaStation flash disk
+
+100 char Telephony for Linux
+ 0 = /dev/phone0 First telephony device
+ 1 = /dev/phone1 Second telephony device
+ ...
+
+101 char Motorola DSP 56xxx board
+ 0 = /dev/mdspstat Status information
+ 1 = /dev/mdsp1 First DSP board I/O controls
+ ...
+ 16 = /dev/mdsp16 16th DSP board I/O controls
+
+101 block AMI HyperDisk RAID controller
+ 0 = /dev/amiraid/ar0 First array whole disk
+ 16 = /dev/amiraid/ar1 Second array whole disk
+ ...
+ 240 = /dev/amiraid/ar15 16th array whole disk
+
+ For each device, partitions are added as:
+ 0 = /dev/amiraid/ar? Whole disk
+ 1 = /dev/amiraid/ar?p1 First partition
+ 2 = /dev/amiraid/ar?p2 Second partition
+ ...
+ 15 = /dev/amiraid/ar?p15 15th partition
+
+102 char
+
+102 block Compressed block device
+ 0 = /dev/cbd/a First compressed block device, whole device
+ 16 = /dev/cbd/b Second compressed block device, whole device
+ ...
+ 240 = /dev/cbd/p 16th compressed block device, whole device
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+103 char Arla network file system
+ 0 = /dev/nnpfs0 First NNPFS device
+ 1 = /dev/nnpfs1 Second NNPFS device
+
+ Arla is a free clone of the Andrew File System, AFS.
+ The NNPFS device gives user mode filesystem
+ implementations a kernel presence for caching and easy
+ mounting. For more information about the project,
+ write to <arla-drinkers@stacken.kth.se> or see
+ http://www.stacken.kth.se/project/arla/
+
+103 block Audit device
+ 0 = /dev/audit Audit device
+
+104 char Flash BIOS support
+
+104 block Compaq Next Generation Drive Array, first controller
+ 0 = /dev/cciss/c0d0 First logical drive, whole disk
+ 16 = /dev/cciss/c0d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c0d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+105 char Comtrol VS-1000 serial controller
+ 0 = /dev/ttyV0 First VS-1000 port
+ 1 = /dev/ttyV1 Second VS-1000 port
+ ...
+
+105 block Compaq Next Generation Drive Array, second controller
+ 0 = /dev/cciss/c1d0 First logical drive, whole disk
+ 16 = /dev/cciss/c1d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c1d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+106 char Comtrol VS-1000 serial controller - alternate devices
+ 0 = /dev/cuv0 First VS-1000 port
+ 1 = /dev/cuv1 Second VS-1000 port
+ ...
+
+106 block Compaq Next Generation Drive Array, third controller
+ 0 = /dev/cciss/c2d0 First logical drive, whole disk
+ 16 = /dev/cciss/c2d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c2d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+107 char 3Dfx Voodoo Graphics device
+ 0 = /dev/3dfx Primary 3Dfx graphics device
+
+107 block Compaq Next Generation Drive Array, fourth controller
+ 0 = /dev/cciss/c3d0 First logical drive, whole disk
+ 16 = /dev/cciss/c3d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c3d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+108 char Device independent PPP interface
+ 0 = /dev/ppp Device independent PPP interface
+
+108 block Compaq Next Generation Drive Array, fifth controller
+ 0 = /dev/cciss/c4d0 First logical drive, whole disk
+ 16 = /dev/cciss/c4d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c4d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+109 char Reserved for logical volume manager
+
+109 block Compaq Next Generation Drive Array, sixth controller
+ 0 = /dev/cciss/c5d0 First logical drive, whole disk
+ 16 = /dev/cciss/c5d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c5d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+110 char miroMEDIA Surround board
+ 0 = /dev/srnd0 First miroMEDIA Surround board
+ 1 = /dev/srnd1 Second miroMEDIA Surround board
+ ...
+
+110 block Compaq Next Generation Drive Array, seventh controller
+ 0 = /dev/cciss/c6d0 First logical drive, whole disk
+ 16 = /dev/cciss/c6d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c6d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+111 char
+
+111 block Compaq Next Generation Drive Array, eighth controller
+ 0 = /dev/cciss/c7d0 First logical drive, whole disk
+ 16 = /dev/cciss/c7d1 Second logical drive, whole disk
+ ...
+ 240 = /dev/cciss/c7d15 16th logical drive, whole disk
+
+ Partitions are handled the same way as for Mylex
+ DAC960 (see major number 48) except that the limit on
+ partitions is 15.
+
+112 char ISI serial card
+ 0 = /dev/ttyM0 First ISI port
+ 1 = /dev/ttyM1 Second ISI port
+ ...
+
+ There is currently a device-naming conflict between
+ these and PAM multimodems (major 78).
+
+112 block IBM iSeries virtual disk
+ 0 = /dev/iseries/vda First virtual disk, whole disk
+ 8 = /dev/iseries/vdb Second virtual disk, whole disk
+ ...
+ 200 = /dev/iseries/vdz 26th virtual disk, whole disk
+ 208 = /dev/iseries/vdaa 27th virtual disk, whole disk
+ ...
+ 248 = /dev/iseries/vdaf 32nd virtual disk, whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 7.
+
+113 char ISI serial card - alternate devices
+ 0 = /dev/cum0 Callout device for ttyM0
+ 1 = /dev/cum1 Callout device for ttyM1
+ ...
+
+113 block IBM iSeries virtual CD-ROM
+ 0 = /dev/iseries/vcda First virtual CD-ROM
+ 1 = /dev/iseries/vcdb Second virtual CD-ROM
+ ...
+
+114 char Picture Elements ISE board
+ 0 = /dev/ise0 First ISE board
+ 1 = /dev/ise1 Second ISE board
+ ...
+ 128 = /dev/isex0 Control node for first ISE board
+ 129 = /dev/isex1 Control node for second ISE board
+ ...
+
+ The ISE board is an embedded computer, optimized for
+ image processing. The /dev/iseN nodes are the general
+ I/O access to the board, the /dev/isex0 nodes command
+ nodes used to control the board.
+
+114 block IDE BIOS powered software RAID interfaces such as the
+ Promise Fastrak
+
+ 0 = /dev/ataraid/d0
+ 1 = /dev/ataraid/d0p1
+ 2 = /dev/ataraid/d0p2
+ ...
+ 16 = /dev/ataraid/d1
+ 17 = /dev/ataraid/d1p1
+ 18 = /dev/ataraid/d1p2
+ ...
+ 255 = /dev/ataraid/d15p15
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+115 char TI link cable devices (115 was formerly the console driver speaker)
+ 0 = /dev/tipar0 Parallel cable on first parallel port
+ ...
+ 7 = /dev/tipar7 Parallel cable on seventh parallel port
+
+ 8 = /dev/tiser0 Serial cable on first serial port
+ ...
+ 15 = /dev/tiser7 Serial cable on seventh serial port
+
+ 16 = /dev/tiusb0 First USB cable
+ ...
+ 47 = /dev/tiusb31 32nd USB cable
+
+115 block NetWare (NWFS) Devices (0-255)
+
+ The NWFS (NetWare) devices are used to present a
+ collection of NetWare Mirror Groups or NetWare
+ Partitions as a logical storage segment for
+ use in mounting NetWare volumes. A maximum of
+ 256 NetWare volumes can be supported in a single
+ machine.
+
+ http://cgfa.telepac.pt/ftp2/kernel.org/linux/kernel/people/jmerkey/nwfs/
+
+ 0 = /dev/nwfs/v0 First NetWare (NWFS) Logical Volume
+ 1 = /dev/nwfs/v1 Second NetWare (NWFS) Logical Volume
+ 2 = /dev/nwfs/v2 Third NetWare (NWFS) Logical Volume
+ ...
+ 255 = /dev/nwfs/v255 Last NetWare (NWFS) Logical Volume
+
+116 char Advanced Linux Sound Driver (ALSA)
+
+116 block MicroMemory battery backed RAM adapter (NVRAM)
+ Supports 16 boards, 15 partitions each.
+ Requested by neilb at cse.unsw.edu.au.
+
+ 0 = /dev/umem/d0 Whole of first board
+ 1 = /dev/umem/d0p1 First partition of first board
+ 2 = /dev/umem/d0p2 Second partition of first board
+ 15 = /dev/umem/d0p15 15th partition of first board
+
+ 16 = /dev/umem/d1 Whole of second board
+ 17 = /dev/umem/d1p1 First partition of second board
+ ...
+ 255= /dev/umem/d15p15 15th partition of 16th board.
+
+117 char COSA/SRP synchronous serial card
+ 0 = /dev/cosa0c0 1st board, 1st channel
+ 1 = /dev/cosa0c1 1st board, 2nd channel
+ ...
+ 16 = /dev/cosa1c0 2nd board, 1st channel
+ 17 = /dev/cosa1c1 2nd board, 2nd channel
+ ...
+
+117 block Enterprise Volume Management System (EVMS)
+
+ The EVMS driver uses a layered, plug-in model to provide
+ unparalleled flexibility and extensibility in managing
+ storage. This allows for easy expansion or customization
+ of various levels of volume management. Requested by
+ Mark Peloquin (peloquin at us.ibm.com).
+
+ Note: EVMS populates and manages all the devnodes in
+ /dev/evms.
+
+ http://sf.net/projects/evms
+
+ 0 = /dev/evms/block_device EVMS block device
+ 1 = /dev/evms/legacyname1 First EVMS legacy device
+ 2 = /dev/evms/legacyname2 Second EVMS legacy device
+ ...
+ Both ranges can grow (down or up) until they meet.
+ ...
+ 254 = /dev/evms/EVMSname2 Second EVMS native device
+ 255 = /dev/evms/EVMSname1 First EVMS native device
+
+ Note: legacyname(s) are derived from the normal legacy
+ device names. For example, /dev/hda5 would become
+ /dev/evms/hda5.
+
+118 char IBM Cryptographic Accelerator
+ 0 = /dev/ica Virtual interface to all IBM Crypto Accelerators
+ 1 = /dev/ica0 IBMCA Device 0
+ 2 = /dev/ica1 IBMCA Device 1
+ ...
+
+119 char VMware virtual network control
+ 0 = /dev/vnet0 1st virtual network
+ 1 = /dev/vnet1 2nd virtual network
+ ...
+
+120-127 char LOCAL/EXPERIMENTAL USE
+
+120-127 block LOCAL/EXPERIMENTAL USE
+ Allocated for local/experimental use. For devices not
+ assigned official numbers, these ranges should be
+ used in order to avoid conflicting with future assignments.
+
+128-135 char Unix98 PTY masters
+
+ These devices should not have corresponding device
+ nodes; instead they should be accessed through the
+ /dev/ptmx cloning interface.
+
+128 block SCSI disk devices (128-143)
+ 0 = /dev/sddy 129th SCSI disk whole disk
+ 16 = /dev/sddz 130th SCSI disk whole disk
+ 32 = /dev/sdea 131th SCSI disk whole disk
+ ...
+ 240 = /dev/sden 144th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+129 block SCSI disk devices (144-159)
+ 0 = /dev/sdeo 145th SCSI disk whole disk
+ 16 = /dev/sdep 146th SCSI disk whole disk
+ 32 = /dev/sdeq 147th SCSI disk whole disk
+ ...
+ 240 = /dev/sdfd 160th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+130 char (Misc devices)
+
+130 block SCSI disk devices (160-175)
+ 0 = /dev/sdfe 161st SCSI disk whole disk
+ 16 = /dev/sdff 162nd SCSI disk whole disk
+ 32 = /dev/sdfg 163rd SCSI disk whole disk
+ ...
+ 240 = /dev/sdft 176th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+131 block SCSI disk devices (176-191)
+ 0 = /dev/sdfu 177th SCSI disk whole disk
+ 16 = /dev/sdfv 178th SCSI disk whole disk
+ 32 = /dev/sdfw 179th SCSI disk whole disk
+ ...
+ 240 = /dev/sdgj 192nd SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+132 block SCSI disk devices (192-207)
+ 0 = /dev/sdgk 193rd SCSI disk whole disk
+ 16 = /dev/sdgl 194th SCSI disk whole disk
+ 32 = /dev/sdgm 195th SCSI disk whole disk
+ ...
+ 240 = /dev/sdgz 208th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+133 block SCSI disk devices (208-223)
+ 0 = /dev/sdha 209th SCSI disk whole disk
+ 16 = /dev/sdhb 210th SCSI disk whole disk
+ 32 = /dev/sdhc 211th SCSI disk whole disk
+ ...
+ 240 = /dev/sdhp 224th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+134 block SCSI disk devices (224-239)
+ 0 = /dev/sdhq 225th SCSI disk whole disk
+ 16 = /dev/sdhr 226th SCSI disk whole disk
+ 32 = /dev/sdhs 227th SCSI disk whole disk
+ ...
+ 240 = /dev/sdif 240th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+135 block SCSI disk devices (240-255)
+ 0 = /dev/sdig 241st SCSI disk whole disk
+ 16 = /dev/sdih 242nd SCSI disk whole disk
+ 32 = /dev/sdih 243rd SCSI disk whole disk
+ ...
+ 240 = /dev/sdiv 256th SCSI disk whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+136-143 char Unix98 PTY slaves
+ 0 = /dev/pts/0 First Unix98 pseudo-TTY
+ 1 = /dev/pts/1 Second Unix98 pseudo-TTY
+ ...
+
+ These device nodes are automatically generated with
+ the proper permissions and modes by mounting the
+ devpts filesystem onto /dev/pts with the appropriate
+ mount options (distribution dependent, however, on
+ *most* distributions the appropriate options are
+ "mode=0620,gid=<gid of the "tty" group>".)
+
+136 block Mylex DAC960 PCI RAID controller; ninth controller
+ 0 = /dev/rd/c8d0 First disk, whole disk
+ 8 = /dev/rd/c8d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c8d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+137 block Mylex DAC960 PCI RAID controller; tenth controller
+ 0 = /dev/rd/c9d0 First disk, whole disk
+ 8 = /dev/rd/c9d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c9d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+138 block Mylex DAC960 PCI RAID controller; eleventh controller
+ 0 = /dev/rd/c10d0 First disk, whole disk
+ 8 = /dev/rd/c10d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c10d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+139 block Mylex DAC960 PCI RAID controller; twelfth controller
+ 0 = /dev/rd/c11d0 First disk, whole disk
+ 8 = /dev/rd/c11d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c11d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+140 block Mylex DAC960 PCI RAID controller; thirteenth controller
+ 0 = /dev/rd/c12d0 First disk, whole disk
+ 8 = /dev/rd/c12d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c12d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+141 block Mylex DAC960 PCI RAID controller; fourteenth controller
+ 0 = /dev/rd/c13d0 First disk, whole disk
+ 8 = /dev/rd/c13d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c13d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+142 block Mylex DAC960 PCI RAID controller; fifteenth controller
+ 0 = /dev/rd/c14d0 First disk, whole disk
+ 8 = /dev/rd/c14d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c14d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+143 block Mylex DAC960 PCI RAID controller; sixteenth controller
+ 0 = /dev/rd/c15d0 First disk, whole disk
+ 8 = /dev/rd/c15d1 Second disk, whole disk
+ ...
+ 248 = /dev/rd/c15d31 32nd disk, whole disk
+
+ Partitions are handled as for major 48.
+
+144 char Encapsulated PPP
+ 0 = /dev/pppox0 First PPP over Ethernet
+ ...
+ 63 = /dev/pppox63 64th PPP over Ethernet
+
+ This is primarily used for ADSL.
+
+ The SST 5136-DN DeviceNet interface driver has been
+ relocated to major 183 due to an unfortunate conflict.
+
+144 block Expansion Area #1 for more non-device (e.g. NFS) mounts
+ 0 = mounted device 256
+ 255 = mounted device 511
+
+145 char SAM9407-based soundcard
+ 0 = /dev/sam0_mixer
+ 1 = /dev/sam0_sequencer
+ 2 = /dev/sam0_midi00
+ 3 = /dev/sam0_dsp
+ 4 = /dev/sam0_audio
+ 6 = /dev/sam0_sndstat
+ 18 = /dev/sam0_midi01
+ 34 = /dev/sam0_midi02
+ 50 = /dev/sam0_midi03
+ 64 = /dev/sam1_mixer
+ ...
+ 128 = /dev/sam2_mixer
+ ...
+ 192 = /dev/sam3_mixer
+ ...
+
+ Device functions match OSS, but offer a number of
+ addons, which are sam9407 specific. OSS can be
+ operated simultaneously, taking care of the codec.
+
+145 block Expansion Area #2 for more non-device (e.g. NFS) mounts
+ 0 = mounted device 512
+ 255 = mounted device 767
+
+146 char SYSTRAM SCRAMNet mirrored-memory network
+ 0 = /dev/scramnet0 First SCRAMNet device
+ 1 = /dev/scramnet1 Second SCRAMNet device
+ ...
+
+146 block Expansion Area #3 for more non-device (e.g. NFS) mounts
+ 0 = mounted device 768
+ 255 = mounted device 1023
+
+147 char Aureal Semiconductor Vortex Audio device
+ 0 = /dev/aureal0 First Aureal Vortex
+ 1 = /dev/aureal1 Second Aureal Vortex
+ ...
+
+147 block Distributed Replicated Block Device (DRBD)
+ 0 = /dev/drbd0 First DRBD device
+ 1 = /dev/drbd1 Second DRBD device
+ ...
+
+148 char Technology Concepts serial card
+ 0 = /dev/ttyT0 First TCL port
+ 1 = /dev/ttyT1 Second TCL port
+ ...
+
+149 char Technology Concepts serial card - alternate devices
+ 0 = /dev/cut0 Callout device for ttyT0
+ 1 = /dev/cut0 Callout device for ttyT1
+ ...
+
+150 char Real-Time Linux FIFOs
+ 0 = /dev/rtf0 First RTLinux FIFO
+ 1 = /dev/rtf1 Second RTLinux FIFO
+ ...
+
+151 char DPT I2O SmartRaid V controller
+ 0 = /dev/dpti0 First DPT I2O adapter
+ 1 = /dev/dpti1 Second DPT I2O adapter
+ ...
+
+152 char EtherDrive Control Device
+ 0 = /dev/etherd/ctl Connect/Disconnect an EtherDrive
+ 1 = /dev/etherd/err Monitor errors
+ 2 = /dev/etherd/raw Raw AoE packet monitor
+
+152 block EtherDrive Block Devices
+ 0 = /dev/etherd/0 EtherDrive 0
+ ...
+ 255 = /dev/etherd/255 EtherDrive 255
+
+153 char SPI Bus Interface (sometimes referred to as MicroWire)
+ 0 = /dev/spi0 First SPI device on the bus
+ 1 = /dev/spi1 Second SPI device on the bus
+ ...
+ 15 = /dev/spi15 Sixteenth SPI device on the bus
+
+153 block Enhanced Metadisk RAID (EMD) storage units
+ 0 = /dev/emd/0 First unit
+ 1 = /dev/emd/0p1 Partition 1 on First unit
+ 2 = /dev/emd/0p2 Partition 2 on First unit
+ ...
+ 15 = /dev/emd/0p15 Partition 15 on First unit
+
+ 16 = /dev/emd/1 Second unit
+ 32 = /dev/emd/2 Third unit
+ ...
+ 240 = /dev/emd/15 Sixteenth unit
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+154 char Specialix RIO serial card
+ 0 = /dev/ttySR0 First RIO port
+ ...
+ 255 = /dev/ttySR255 256th RIO port
+
+155 char Specialix RIO serial card - alternate devices
+ 0 = /dev/cusr0 Callout device for ttySR0
+ ...
+ 255 = /dev/cusr255 Callout device for ttySR255
+
+156 char Specialix RIO serial card
+ 0 = /dev/ttySR256 257th RIO port
+ ...
+ 255 = /dev/ttySR511 512th RIO port
+
+157 char Specialix RIO serial card - alternate devices
+ 0 = /dev/cusr256 Callout device for ttySR256
+ ...
+ 255 = /dev/cusr511 Callout device for ttySR511
+
+158 char Dialogic GammaLink fax driver
+ 0 = /dev/gfax0 GammaLink channel 0
+ 1 = /dev/gfax1 GammaLink channel 1
+ ...
+
+159 char RESERVED
+
+159 block RESERVED
+
+160 char General Purpose Instrument Bus (GPIB)
+ 0 = /dev/gpib0 First GPIB bus
+ 1 = /dev/gpib1 Second GPIB bus
+ ...
+
+160 block Carmel 8-port SATA Disks on First Controller
+ 0 = /dev/carmel/0 SATA disk 0 whole disk
+ 1 = /dev/carmel/0p1 SATA disk 0 partition 1
+ ...
+ 31 = /dev/carmel/0p31 SATA disk 0 partition 31
+
+ 32 = /dev/carmel/1 SATA disk 1 whole disk
+ 64 = /dev/carmel/2 SATA disk 2 whole disk
+ ...
+ 224 = /dev/carmel/7 SATA disk 7 whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 31.
+
+161 char IrCOMM devices (IrDA serial/parallel emulation)
+ 0 = /dev/ircomm0 First IrCOMM device
+ 1 = /dev/ircomm1 Second IrCOMM device
+ ...
+ 16 = /dev/irlpt0 First IrLPT device
+ 17 = /dev/irlpt1 Second IrLPT device
+ ...
+
+161 block Carmel 8-port SATA Disks on Second Controller
+ 0 = /dev/carmel/8 SATA disk 8 whole disk
+ 1 = /dev/carmel/8p1 SATA disk 8 partition 1
+ ...
+ 31 = /dev/carmel/8p31 SATA disk 8 partition 31
+
+ 32 = /dev/carmel/9 SATA disk 9 whole disk
+ 64 = /dev/carmel/10 SATA disk 10 whole disk
+ ...
+ 224 = /dev/carmel/15 SATA disk 15 whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 31.
+
+162 char Raw block device interface
+ 0 = /dev/rawctl Raw I/O control device
+ 1 = /dev/raw/raw1 First raw I/O device
+ 2 = /dev/raw/raw2 Second raw I/O device
+ ...
+ max minor number of raw device is set by kernel config
+ MAX_RAW_DEVS or raw module parameter 'max_raw_devs'
+
+163 char
+
+164 char Chase Research AT/PCI-Fast serial card
+ 0 = /dev/ttyCH0 AT/PCI-Fast board 0, port 0
+ ...
+ 15 = /dev/ttyCH15 AT/PCI-Fast board 0, port 15
+ 16 = /dev/ttyCH16 AT/PCI-Fast board 1, port 0
+ ...
+ 31 = /dev/ttyCH31 AT/PCI-Fast board 1, port 15
+ 32 = /dev/ttyCH32 AT/PCI-Fast board 2, port 0
+ ...
+ 47 = /dev/ttyCH47 AT/PCI-Fast board 2, port 15
+ 48 = /dev/ttyCH48 AT/PCI-Fast board 3, port 0
+ ...
+ 63 = /dev/ttyCH63 AT/PCI-Fast board 3, port 15
+
+165 char Chase Research AT/PCI-Fast serial card - alternate devices
+ 0 = /dev/cuch0 Callout device for ttyCH0
+ ...
+ 63 = /dev/cuch63 Callout device for ttyCH63
+
+166 char ACM USB modems
+ 0 = /dev/ttyACM0 First ACM modem
+ 1 = /dev/ttyACM1 Second ACM modem
+ ...
+
+167 char ACM USB modems - alternate devices
+ 0 = /dev/cuacm0 Callout device for ttyACM0
+ 1 = /dev/cuacm1 Callout device for ttyACM1
+ ...
+
+168 char Eracom CSA7000 PCI encryption adaptor
+ 0 = /dev/ecsa0 First CSA7000
+ 1 = /dev/ecsa1 Second CSA7000
+ ...
+
+169 char Eracom CSA8000 PCI encryption adaptor
+ 0 = /dev/ecsa8-0 First CSA8000
+ 1 = /dev/ecsa8-1 Second CSA8000
+ ...
+
+170 char AMI MegaRAC remote access controller
+ 0 = /dev/megarac0 First MegaRAC card
+ 1 = /dev/megarac1 Second MegaRAC card
+ ...
+
+171 char Reserved for IEEE 1394 (Firewire)
+
+172 char Moxa Intellio serial card
+ 0 = /dev/ttyMX0 First Moxa port
+ 1 = /dev/ttyMX1 Second Moxa port
+ ...
+ 127 = /dev/ttyMX127 128th Moxa port
+ 128 = /dev/moxactl Moxa control port
+
+173 char Moxa Intellio serial card - alternate devices
+ 0 = /dev/cumx0 Callout device for ttyMX0
+ 1 = /dev/cumx1 Callout device for ttyMX1
+ ...
+ 127 = /dev/cumx127 Callout device for ttyMX127
+
+174 char SmartIO serial card
+ 0 = /dev/ttySI0 First SmartIO port
+ 1 = /dev/ttySI1 Second SmartIO port
+ ...
+
+175 char SmartIO serial card - alternate devices
+ 0 = /dev/cusi0 Callout device for ttySI0
+ 1 = /dev/cusi1 Callout device for ttySI1
+ ...
+
+176 char nCipher nFast PCI crypto accelerator
+ 0 = /dev/nfastpci0 First nFast PCI device
+ 1 = /dev/nfastpci1 First nFast PCI device
+ ...
+
+177 char TI PCILynx memory spaces
+ 0 = /dev/pcilynx/aux0 AUX space of first PCILynx card
+ ...
+ 15 = /dev/pcilynx/aux15 AUX space of 16th PCILynx card
+ 16 = /dev/pcilynx/rom0 ROM space of first PCILynx card
+ ...
+ 31 = /dev/pcilynx/rom15 ROM space of 16th PCILynx card
+ 32 = /dev/pcilynx/ram0 RAM space of first PCILynx card
+ ...
+ 47 = /dev/pcilynx/ram15 RAM space of 16th PCILynx card
+
+178 char Giganet cLAN1xxx virtual interface adapter
+ 0 = /dev/clanvi0 First cLAN adapter
+ 1 = /dev/clanvi1 Second cLAN adapter
+ ...
+
+179 block MMC block devices
+ 0 = /dev/mmcblk0 First SD/MMC card
+ 1 = /dev/mmcblk0p1 First partition on first MMC card
+ 8 = /dev/mmcblk1 Second SD/MMC card
+ ...
+
+ The start of next SD/MMC card can be configured with
+ CONFIG_MMC_BLOCK_MINORS, or overridden at boot/modprobe
+ time using the mmcblk.perdev_minors option. That would
+ bump the offset between each card to be the configured
+ value instead of the default 8.
+
+179 char CCube DVXChip-based PCI products
+ 0 = /dev/dvxirq0 First DVX device
+ 1 = /dev/dvxirq1 Second DVX device
+ ...
+
+180 char USB devices
+ 0 = /dev/usb/lp0 First USB printer
+ ...
+ 15 = /dev/usb/lp15 16th USB printer
+ 48 = /dev/usb/scanner0 First USB scanner
+ ...
+ 63 = /dev/usb/scanner15 16th USB scanner
+ 64 = /dev/usb/rio500 Diamond Rio 500
+ 65 = /dev/usb/usblcd USBLCD Interface (info@usblcd.de)
+ 66 = /dev/usb/cpad0 Synaptics cPad (mouse/LCD)
+ 96 = /dev/usb/hiddev0 1st USB HID device
+ ...
+ 111 = /dev/usb/hiddev15 16th USB HID device
+ 112 = /dev/usb/auer0 1st auerswald ISDN device
+ ...
+ 127 = /dev/usb/auer15 16th auerswald ISDN device
+ 128 = /dev/usb/brlvgr0 First Braille Voyager device
+ ...
+ 131 = /dev/usb/brlvgr3 Fourth Braille Voyager device
+ 132 = /dev/usb/idmouse ID Mouse (fingerprint scanner) device
+ 133 = /dev/usb/sisusbvga1 First SiSUSB VGA device
+ ...
+ 140 = /dev/usb/sisusbvga8 Eighth SISUSB VGA device
+ 144 = /dev/usb/lcd USB LCD device
+ 160 = /dev/usb/legousbtower0 1st USB Legotower device
+ ...
+ 175 = /dev/usb/legousbtower15 16th USB Legotower device
+ 176 = /dev/usb/usbtmc1 First USB TMC device
+ ...
+ 191 = /dev/usb/usbtmc16 16th USB TMC device
+ 192 = /dev/usb/yurex1 First USB Yurex device
+ ...
+ 209 = /dev/usb/yurex16 16th USB Yurex device
+
+180 block USB block devices
+ 0 = /dev/uba First USB block device
+ 8 = /dev/ubb Second USB block device
+ 16 = /dev/ubc Third USB block device
+ ...
+
+181 char Conrad Electronic parallel port radio clocks
+ 0 = /dev/pcfclock0 First Conrad radio clock
+ 1 = /dev/pcfclock1 Second Conrad radio clock
+ ...
+
+182 char Picture Elements THR2 binarizer
+ 0 = /dev/pethr0 First THR2 board
+ 1 = /dev/pethr1 Second THR2 board
+ ...
+
+183 char SST 5136-DN DeviceNet interface
+ 0 = /dev/ss5136dn0 First DeviceNet interface
+ 1 = /dev/ss5136dn1 Second DeviceNet interface
+ ...
+
+ This device used to be assigned to major number 144.
+ It had to be moved due to an unfortunate conflict.
+
+184 char Picture Elements' video simulator/sender
+ 0 = /dev/pevss0 First sender board
+ 1 = /dev/pevss1 Second sender board
+ ...
+
+185 char InterMezzo high availability file system
+ 0 = /dev/intermezzo0 First cache manager
+ 1 = /dev/intermezzo1 Second cache manager
+ ...
+
+ See http://web.archive.org/web/20080115195241/
+ http://inter-mezzo.org/index.html
+
+186 char Object-based storage control device
+ 0 = /dev/obd0 First obd control device
+ 1 = /dev/obd1 Second obd control device
+ ...
+
+ See ftp://ftp.lustre.org/pub/obd for code and information.
+
+187 char DESkey hardware encryption device
+ 0 = /dev/deskey0 First DES key
+ 1 = /dev/deskey1 Second DES key
+ ...
+
+188 char USB serial converters
+ 0 = /dev/ttyUSB0 First USB serial converter
+ 1 = /dev/ttyUSB1 Second USB serial converter
+ ...
+
+189 char USB serial converters - alternate devices
+ 0 = /dev/cuusb0 Callout device for ttyUSB0
+ 1 = /dev/cuusb1 Callout device for ttyUSB1
+ ...
+
+190 char Kansas City tracker/tuner card
+ 0 = /dev/kctt0 First KCT/T card
+ 1 = /dev/kctt1 Second KCT/T card
+ ...
+
+191 char Reserved for PCMCIA
+
+192 char Kernel profiling interface
+ 0 = /dev/profile Profiling control device
+ 1 = /dev/profile0 Profiling device for CPU 0
+ 2 = /dev/profile1 Profiling device for CPU 1
+ ...
+
+193 char Kernel event-tracing interface
+ 0 = /dev/trace Tracing control device
+ 1 = /dev/trace0 Tracing device for CPU 0
+ 2 = /dev/trace1 Tracing device for CPU 1
+ ...
+
+194 char linVideoStreams (LINVS)
+ 0 = /dev/mvideo/status0 Video compression status
+ 1 = /dev/mvideo/stream0 Video stream
+ 2 = /dev/mvideo/frame0 Single compressed frame
+ 3 = /dev/mvideo/rawframe0 Raw uncompressed frame
+ 4 = /dev/mvideo/codec0 Direct codec access
+ 5 = /dev/mvideo/video4linux0 Video4Linux compatibility
+
+ 16 = /dev/mvideo/status1 Second device
+ ...
+ 32 = /dev/mvideo/status2 Third device
+ ...
+ ...
+ 240 = /dev/mvideo/status15 16th device
+ ...
+
+195 char Nvidia graphics devices
+ 0 = /dev/nvidia0 First Nvidia card
+ 1 = /dev/nvidia1 Second Nvidia card
+ ...
+ 255 = /dev/nvidiactl Nvidia card control device
+
+196 char Tormenta T1 card
+ 0 = /dev/tor/0 Master control channel for all cards
+ 1 = /dev/tor/1 First DS0
+ 2 = /dev/tor/2 Second DS0
+ ...
+ 48 = /dev/tor/48 48th DS0
+ 49 = /dev/tor/49 First pseudo-channel
+ 50 = /dev/tor/50 Second pseudo-channel
+ ...
+
+197 char OpenTNF tracing facility
+ 0 = /dev/tnf/t0 Trace 0 data extraction
+ 1 = /dev/tnf/t1 Trace 1 data extraction
+ ...
+ 128 = /dev/tnf/status Tracing facility status
+ 130 = /dev/tnf/trace Tracing device
+
+198 char Total Impact TPMP2 quad coprocessor PCI card
+ 0 = /dev/tpmp2/0 First card
+ 1 = /dev/tpmp2/1 Second card
+ ...
+
+199 char Veritas volume manager (VxVM) volumes
+ 0 = /dev/vx/rdsk/*/* First volume
+ 1 = /dev/vx/rdsk/*/* Second volume
+ ...
+
+199 block Veritas volume manager (VxVM) volumes
+ 0 = /dev/vx/dsk/*/* First volume
+ 1 = /dev/vx/dsk/*/* Second volume
+ ...
+
+ The namespace in these directories is maintained by
+ the user space VxVM software.
+
+200 char Veritas VxVM configuration interface
+ 0 = /dev/vx/config Configuration access node
+ 1 = /dev/vx/trace Volume i/o trace access node
+ 2 = /dev/vx/iod Volume i/o daemon access node
+ 3 = /dev/vx/info Volume information access node
+ 4 = /dev/vx/task Volume tasks access node
+ 5 = /dev/vx/taskmon Volume tasks monitor daemon
+
+201 char Veritas VxVM dynamic multipathing driver
+ 0 = /dev/vx/rdmp/* First multipath device
+ 1 = /dev/vx/rdmp/* Second multipath device
+ ...
+201 block Veritas VxVM dynamic multipathing driver
+ 0 = /dev/vx/dmp/* First multipath device
+ 1 = /dev/vx/dmp/* Second multipath device
+ ...
+
+ The namespace in these directories is maintained by
+ the user space VxVM software.
+
+202 char CPU model-specific registers
+ 0 = /dev/cpu/0/msr MSRs on CPU 0
+ 1 = /dev/cpu/1/msr MSRs on CPU 1
+ ...
+
+202 block Xen Virtual Block Device
+ 0 = /dev/xvda First Xen VBD whole disk
+ 16 = /dev/xvdb Second Xen VBD whole disk
+ 32 = /dev/xvdc Third Xen VBD whole disk
+ ...
+ 240 = /dev/xvdp Sixteenth Xen VBD whole disk
+
+ Partitions are handled in the same way as for IDE
+ disks (see major number 3) except that the limit on
+ partitions is 15.
+
+203 char CPU CPUID information
+ 0 = /dev/cpu/0/cpuid CPUID on CPU 0
+ 1 = /dev/cpu/1/cpuid CPUID on CPU 1
+ ...
+
+204 char Low-density serial ports
+ 0 = /dev/ttyLU0 LinkUp Systems L72xx UART - port 0
+ 1 = /dev/ttyLU1 LinkUp Systems L72xx UART - port 1
+ 2 = /dev/ttyLU2 LinkUp Systems L72xx UART - port 2
+ 3 = /dev/ttyLU3 LinkUp Systems L72xx UART - port 3
+ 4 = /dev/ttyFB0 Intel Footbridge (ARM)
+ 5 = /dev/ttySA0 StrongARM builtin serial port 0
+ 6 = /dev/ttySA1 StrongARM builtin serial port 1
+ 7 = /dev/ttySA2 StrongARM builtin serial port 2
+ 8 = /dev/ttySC0 SCI serial port (SuperH) - port 0
+ 9 = /dev/ttySC1 SCI serial port (SuperH) - port 1
+ 10 = /dev/ttySC2 SCI serial port (SuperH) - port 2
+ 11 = /dev/ttySC3 SCI serial port (SuperH) - port 3
+ 12 = /dev/ttyFW0 Firmware console - port 0
+ 13 = /dev/ttyFW1 Firmware console - port 1
+ 14 = /dev/ttyFW2 Firmware console - port 2
+ 15 = /dev/ttyFW3 Firmware console - port 3
+ 16 = /dev/ttyAM0 ARM "AMBA" serial port 0
+ ...
+ 31 = /dev/ttyAM15 ARM "AMBA" serial port 15
+ 32 = /dev/ttyDB0 DataBooster serial port 0
+ ...
+ 39 = /dev/ttyDB7 DataBooster serial port 7
+ 40 = /dev/ttySG0 SGI Altix console port
+ 41 = /dev/ttySMX0 Motorola i.MX - port 0
+ 42 = /dev/ttySMX1 Motorola i.MX - port 1
+ 43 = /dev/ttySMX2 Motorola i.MX - port 2
+ 44 = /dev/ttyMM0 Marvell MPSC - port 0
+ 45 = /dev/ttyMM1 Marvell MPSC - port 1
+ 46 = /dev/ttyCPM0 PPC CPM (SCC or SMC) - port 0
+ ...
+ 47 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 5
+ 50 = /dev/ttyIOC0 Altix serial card
+ ...
+ 81 = /dev/ttyIOC31 Altix serial card
+ 82 = /dev/ttyVR0 NEC VR4100 series SIU
+ 83 = /dev/ttyVR1 NEC VR4100 series DSIU
+ 84 = /dev/ttyIOC84 Altix ioc4 serial card
+ ...
+ 115 = /dev/ttyIOC115 Altix ioc4 serial card
+ 116 = /dev/ttySIOC0 Altix ioc3 serial card
+ ...
+ 147 = /dev/ttySIOC31 Altix ioc3 serial card
+ 148 = /dev/ttyPSC0 PPC PSC - port 0
+ ...
+ 153 = /dev/ttyPSC5 PPC PSC - port 5
+ 154 = /dev/ttyAT0 ATMEL serial port 0
+ ...
+ 169 = /dev/ttyAT15 ATMEL serial port 15
+ 170 = /dev/ttyNX0 Hilscher netX serial port 0
+ ...
+ 185 = /dev/ttyNX15 Hilscher netX serial port 15
+ 186 = /dev/ttyJ0 JTAG1 DCC protocol based serial port emulation
+ 187 = /dev/ttyUL0 Xilinx uartlite - port 0
+ ...
+ 190 = /dev/ttyUL3 Xilinx uartlite - port 3
+ 191 = /dev/xvc0 Xen virtual console - port 0
+ 192 = /dev/ttyPZ0 pmac_zilog - port 0
+ ...
+ 195 = /dev/ttyPZ3 pmac_zilog - port 3
+ 196 = /dev/ttyTX0 TX39/49 serial port 0
+ ...
+ 204 = /dev/ttyTX7 TX39/49 serial port 7
+ 205 = /dev/ttySC0 SC26xx serial port 0
+ 206 = /dev/ttySC1 SC26xx serial port 1
+ 207 = /dev/ttySC2 SC26xx serial port 2
+ 208 = /dev/ttySC3 SC26xx serial port 3
+ 209 = /dev/ttyMAX0 MAX3100 serial port 0
+ 210 = /dev/ttyMAX1 MAX3100 serial port 1
+ 211 = /dev/ttyMAX2 MAX3100 serial port 2
+ 212 = /dev/ttyMAX3 MAX3100 serial port 3
+
+205 char Low-density serial ports (alternate device)
+ 0 = /dev/culu0 Callout device for ttyLU0
+ 1 = /dev/culu1 Callout device for ttyLU1
+ 2 = /dev/culu2 Callout device for ttyLU2
+ 3 = /dev/culu3 Callout device for ttyLU3
+ 4 = /dev/cufb0 Callout device for ttyFB0
+ 5 = /dev/cusa0 Callout device for ttySA0
+ 6 = /dev/cusa1 Callout device for ttySA1
+ 7 = /dev/cusa2 Callout device for ttySA2
+ 8 = /dev/cusc0 Callout device for ttySC0
+ 9 = /dev/cusc1 Callout device for ttySC1
+ 10 = /dev/cusc2 Callout device for ttySC2
+ 11 = /dev/cusc3 Callout device for ttySC3
+ 12 = /dev/cufw0 Callout device for ttyFW0
+ 13 = /dev/cufw1 Callout device for ttyFW1
+ 14 = /dev/cufw2 Callout device for ttyFW2
+ 15 = /dev/cufw3 Callout device for ttyFW3
+ 16 = /dev/cuam0 Callout device for ttyAM0
+ ...
+ 31 = /dev/cuam15 Callout device for ttyAM15
+ 32 = /dev/cudb0 Callout device for ttyDB0
+ ...
+ 39 = /dev/cudb7 Callout device for ttyDB7
+ 40 = /dev/cusg0 Callout device for ttySG0
+ 41 = /dev/ttycusmx0 Callout device for ttySMX0
+ 42 = /dev/ttycusmx1 Callout device for ttySMX1
+ 43 = /dev/ttycusmx2 Callout device for ttySMX2
+ 46 = /dev/cucpm0 Callout device for ttyCPM0
+ ...
+ 49 = /dev/cucpm5 Callout device for ttyCPM5
+ 50 = /dev/cuioc40 Callout device for ttyIOC40
+ ...
+ 81 = /dev/cuioc431 Callout device for ttyIOC431
+ 82 = /dev/cuvr0 Callout device for ttyVR0
+ 83 = /dev/cuvr1 Callout device for ttyVR1
+
+206 char OnStream SC-x0 tape devices
+ 0 = /dev/osst0 First OnStream SCSI tape, mode 0
+ 1 = /dev/osst1 Second OnStream SCSI tape, mode 0
+ ...
+ 32 = /dev/osst0l First OnStream SCSI tape, mode 1
+ 33 = /dev/osst1l Second OnStream SCSI tape, mode 1
+ ...
+ 64 = /dev/osst0m First OnStream SCSI tape, mode 2
+ 65 = /dev/osst1m Second OnStream SCSI tape, mode 2
+ ...
+ 96 = /dev/osst0a First OnStream SCSI tape, mode 3
+ 97 = /dev/osst1a Second OnStream SCSI tape, mode 3
+ ...
+ 128 = /dev/nosst0 No rewind version of /dev/osst0
+ 129 = /dev/nosst1 No rewind version of /dev/osst1
+ ...
+ 160 = /dev/nosst0l No rewind version of /dev/osst0l
+ 161 = /dev/nosst1l No rewind version of /dev/osst1l
+ ...
+ 192 = /dev/nosst0m No rewind version of /dev/osst0m
+ 193 = /dev/nosst1m No rewind version of /dev/osst1m
+ ...
+ 224 = /dev/nosst0a No rewind version of /dev/osst0a
+ 225 = /dev/nosst1a No rewind version of /dev/osst1a
+ ...
+
+ The OnStream SC-x0 SCSI tapes do not support the
+ standard SCSI SASD command set and therefore need
+ their own driver "osst". Note that the IDE, USB (and
+ maybe ParPort) versions may be driven via ide-scsi or
+ usb-storage SCSI emulation and this osst device and
+ driver as well. The ADR-x0 drives are QIC-157
+ compliant and don't need osst.
+
+207 char Compaq ProLiant health feature indicate
+ 0 = /dev/cpqhealth/cpqw Redirector interface
+ 1 = /dev/cpqhealth/crom EISA CROM
+ 2 = /dev/cpqhealth/cdt Data Table
+ 3 = /dev/cpqhealth/cevt Event Log
+ 4 = /dev/cpqhealth/casr Automatic Server Recovery
+ 5 = /dev/cpqhealth/cecc ECC Memory
+ 6 = /dev/cpqhealth/cmca Machine Check Architecture
+ 7 = /dev/cpqhealth/ccsm Deprecated CDT
+ 8 = /dev/cpqhealth/cnmi NMI Handling
+ 9 = /dev/cpqhealth/css Sideshow Management
+ 10 = /dev/cpqhealth/cram CMOS interface
+ 11 = /dev/cpqhealth/cpci PCI IRQ interface
+
+208 char User space serial ports
+ 0 = /dev/ttyU0 First user space serial port
+ 1 = /dev/ttyU1 Second user space serial port
+ ...
+
+209 char User space serial ports (alternate devices)
+ 0 = /dev/cuu0 Callout device for ttyU0
+ 1 = /dev/cuu1 Callout device for ttyU1
+ ...
+
+210 char SBE, Inc. sync/async serial card
+ 0 = /dev/sbei/wxcfg0 Configuration device for board 0
+ 1 = /dev/sbei/dld0 Download device for board 0
+ 2 = /dev/sbei/wan00 WAN device, port 0, board 0
+ 3 = /dev/sbei/wan01 WAN device, port 1, board 0
+ 4 = /dev/sbei/wan02 WAN device, port 2, board 0
+ 5 = /dev/sbei/wan03 WAN device, port 3, board 0
+ 6 = /dev/sbei/wanc00 WAN clone device, port 0, board 0
+ 7 = /dev/sbei/wanc01 WAN clone device, port 1, board 0
+ 8 = /dev/sbei/wanc02 WAN clone device, port 2, board 0
+ 9 = /dev/sbei/wanc03 WAN clone device, port 3, board 0
+ 10 = /dev/sbei/wxcfg1 Configuration device for board 1
+ 11 = /dev/sbei/dld1 Download device for board 1
+ 12 = /dev/sbei/wan10 WAN device, port 0, board 1
+ 13 = /dev/sbei/wan11 WAN device, port 1, board 1
+ 14 = /dev/sbei/wan12 WAN device, port 2, board 1
+ 15 = /dev/sbei/wan13 WAN device, port 3, board 1
+ 16 = /dev/sbei/wanc10 WAN clone device, port 0, board 1
+ 17 = /dev/sbei/wanc11 WAN clone device, port 1, board 1
+ 18 = /dev/sbei/wanc12 WAN clone device, port 2, board 1
+ 19 = /dev/sbei/wanc13 WAN clone device, port 3, board 1
+ ...
+
+ Yes, each board is really spaced 10 (decimal) apart.
+
+211 char Addinum CPCI1500 digital I/O card
+ 0 = /dev/addinum/cpci1500/0 First CPCI1500 card
+ 1 = /dev/addinum/cpci1500/1 Second CPCI1500 card
+ ...
+
+212 char LinuxTV.org DVB driver subsystem
+ 0 = /dev/dvb/adapter0/video0 first video decoder of first card
+ 1 = /dev/dvb/adapter0/audio0 first audio decoder of first card
+ 2 = /dev/dvb/adapter0/sec0 (obsolete/unused)
+ 3 = /dev/dvb/adapter0/frontend0 first frontend device of first card
+ 4 = /dev/dvb/adapter0/demux0 first demux device of first card
+ 5 = /dev/dvb/adapter0/dvr0 first digital video recoder device of first card
+ 6 = /dev/dvb/adapter0/ca0 first common access port of first card
+ 7 = /dev/dvb/adapter0/net0 first network device of first card
+ 8 = /dev/dvb/adapter0/osd0 first on-screen-display device of first card
+ 9 = /dev/dvb/adapter0/video1 second video decoder of first card
+ ...
+ 64 = /dev/dvb/adapter1/video0 first video decoder of second card
+ ...
+ 128 = /dev/dvb/adapter2/video0 first video decoder of third card
+ ...
+ 196 = /dev/dvb/adapter3/video0 first video decoder of fourth card
+
+216 char Bluetooth RFCOMM TTY devices
+ 0 = /dev/rfcomm0 First Bluetooth RFCOMM TTY device
+ 1 = /dev/rfcomm1 Second Bluetooth RFCOMM TTY device
+ ...
+
+217 char Bluetooth RFCOMM TTY devices (alternate devices)
+ 0 = /dev/curf0 Callout device for rfcomm0
+ 1 = /dev/curf1 Callout device for rfcomm1
+ ...
+
+218 char The Logical Company bus Unibus/Qbus adapters
+ 0 = /dev/logicalco/bci/0 First bus adapter
+ 1 = /dev/logicalco/bci/1 First bus adapter
+ ...
+
+219 char The Logical Company DCI-1300 digital I/O card
+ 0 = /dev/logicalco/dci1300/0 First DCI-1300 card
+ 1 = /dev/logicalco/dci1300/1 Second DCI-1300 card
+ ...
+
+220 char Myricom Myrinet "GM" board
+ 0 = /dev/myricom/gm0 First Myrinet GM board
+ 1 = /dev/myricom/gmp0 First board "root access"
+ 2 = /dev/myricom/gm1 Second Myrinet GM board
+ 3 = /dev/myricom/gmp1 Second board "root access"
+ ...
+
+221 char VME bus
+ 0 = /dev/bus/vme/m0 First master image
+ 1 = /dev/bus/vme/m1 Second master image
+ 2 = /dev/bus/vme/m2 Third master image
+ 3 = /dev/bus/vme/m3 Fourth master image
+ 4 = /dev/bus/vme/s0 First slave image
+ 5 = /dev/bus/vme/s1 Second slave image
+ 6 = /dev/bus/vme/s2 Third slave image
+ 7 = /dev/bus/vme/s3 Fourth slave image
+ 8 = /dev/bus/vme/ctl Control
+
+ It is expected that all VME bus drivers will use the
+ same interface. For interface documentation see
+ http://www.vmelinux.org/.
+
+224 char A2232 serial card
+ 0 = /dev/ttyY0 First A2232 port
+ 1 = /dev/ttyY1 Second A2232 port
+ ...
+
+225 char A2232 serial card (alternate devices)
+ 0 = /dev/cuy0 Callout device for ttyY0
+ 1 = /dev/cuy1 Callout device for ttyY1
+ ...
+
+226 char Direct Rendering Infrastructure (DRI)
+ 0 = /dev/dri/card0 First graphics card
+ 1 = /dev/dri/card1 Second graphics card
+ ...
+
+227 char IBM 3270 terminal Unix tty access
+ 1 = /dev/3270/tty1 First 3270 terminal
+ 2 = /dev/3270/tty2 Seconds 3270 terminal
+ ...
+
+228 char IBM 3270 terminal block-mode access
+ 0 = /dev/3270/tub Controlling interface
+ 1 = /dev/3270/tub1 First 3270 terminal
+ 2 = /dev/3270/tub2 Second 3270 terminal
+ ...
+
+229 char IBM iSeries/pSeries virtual console
+ 0 = /dev/hvc0 First console port
+ 1 = /dev/hvc1 Second console port
+ ...
+
+230 char IBM iSeries virtual tape
+ 0 = /dev/iseries/vt0 First virtual tape, mode 0
+ 1 = /dev/iseries/vt1 Second virtual tape, mode 0
+ ...
+ 32 = /dev/iseries/vt0l First virtual tape, mode 1
+ 33 = /dev/iseries/vt1l Second virtual tape, mode 1
+ ...
+ 64 = /dev/iseries/vt0m First virtual tape, mode 2
+ 65 = /dev/iseries/vt1m Second virtual tape, mode 2
+ ...
+ 96 = /dev/iseries/vt0a First virtual tape, mode 3
+ 97 = /dev/iseries/vt1a Second virtual tape, mode 3
+ ...
+ 128 = /dev/iseries/nvt0 First virtual tape, mode 0, no rewind
+ 129 = /dev/iseries/nvt1 Second virtual tape, mode 0, no rewind
+ ...
+ 160 = /dev/iseries/nvt0l First virtual tape, mode 1, no rewind
+ 161 = /dev/iseries/nvt1l Second virtual tape, mode 1, no rewind
+ ...
+ 192 = /dev/iseries/nvt0m First virtual tape, mode 2, no rewind
+ 193 = /dev/iseries/nvt1m Second virtual tape, mode 2, no rewind
+ ...
+ 224 = /dev/iseries/nvt0a First virtual tape, mode 3, no rewind
+ 225 = /dev/iseries/nvt1a Second virtual tape, mode 3, no rewind
+ ...
+
+ "No rewind" refers to the omission of the default
+ automatic rewind on device close. The MTREW or MTOFFL
+ ioctl()'s can be used to rewind the tape regardless of
+ the device used to access it.
+
+231 char InfiniBand
+ 0 = /dev/infiniband/umad0
+ 1 = /dev/infiniband/umad1
+ ...
+ 63 = /dev/infiniband/umad63 63rd InfiniBandMad device
+ 64 = /dev/infiniband/issm0 First InfiniBand IsSM device
+ 65 = /dev/infiniband/issm1 Second InfiniBand IsSM device
+ ...
+ 127 = /dev/infiniband/issm63 63rd InfiniBand IsSM device
+ 128 = /dev/infiniband/uverbs0 First InfiniBand verbs device
+ 129 = /dev/infiniband/uverbs1 Second InfiniBand verbs device
+ ...
+ 159 = /dev/infiniband/uverbs31 31st InfiniBand verbs device
+
+232 char Biometric Devices
+ 0 = /dev/biometric/sensor0/fingerprint first fingerprint sensor on first device
+ 1 = /dev/biometric/sensor0/iris first iris sensor on first device
+ 2 = /dev/biometric/sensor0/retina first retina sensor on first device
+ 3 = /dev/biometric/sensor0/voiceprint first voiceprint sensor on first device
+ 4 = /dev/biometric/sensor0/facial first facial sensor on first device
+ 5 = /dev/biometric/sensor0/hand first hand sensor on first device
+ ...
+ 10 = /dev/biometric/sensor1/fingerprint first fingerprint sensor on second device
+ ...
+ 20 = /dev/biometric/sensor2/fingerprint first fingerprint sensor on third device
+ ...
+
+233 char PathScale InfiniPath interconnect
+ 0 = /dev/ipath Primary device for programs (any unit)
+ 1 = /dev/ipath0 Access specifically to unit 0
+ 2 = /dev/ipath1 Access specifically to unit 1
+ ...
+ 4 = /dev/ipath3 Access specifically to unit 3
+ 129 = /dev/ipath_sma Device used by Subnet Management Agent
+ 130 = /dev/ipath_diag Device used by diagnostics programs
+
+234-239 UNASSIGNED
+
+240-254 char LOCAL/EXPERIMENTAL USE
+
+240-254 block LOCAL/EXPERIMENTAL USE
+ Allocated for local/experimental use. For devices not
+ assigned official numbers, these ranges should be
+ used in order to avoid conflicting with future assignments.
+
+255 char RESERVED
+
+255 block RESERVED
+
+ This major is reserved to assist the expansion to a
+ larger number space. No device nodes with this major
+ should ever be created on the filesystem.
+ (This is probably not true anymore, but I'll leave it
+ for now /Torben)
+
+---LARGE MAJORS!!!!!---
+
+256 char Equinox SST multi-port serial boards
+ 0 = /dev/ttyEQ0 First serial port on first Equinox SST board
+ 127 = /dev/ttyEQ127 Last serial port on first Equinox SST board
+ 128 = /dev/ttyEQ128 First serial port on second Equinox SST board
+ ...
+ 1027 = /dev/ttyEQ1027 Last serial port on eighth Equinox SST board
+
+256 block Resident Flash Disk Flash Translation Layer
+ 0 = /dev/rfda First RFD FTL layer
+ 16 = /dev/rfdb Second RFD FTL layer
+ ...
+ 240 = /dev/rfdp 16th RFD FTL layer
+
+257 char Phoenix Technologies Cryptographic Services Driver
+ 0 = /dev/ptlsec Crypto Services Driver
+
+257 block SSFDC Flash Translation Layer filesystem
+ 0 = /dev/ssfdca First SSFDC layer
+ 8 = /dev/ssfdcb Second SSFDC layer
+ 16 = /dev/ssfdcc Third SSFDC layer
+ 24 = /dev/ssfdcd 4th SSFDC layer
+ 32 = /dev/ssfdce 5th SSFDC layer
+ 40 = /dev/ssfdcf 6th SSFDC layer
+ 48 = /dev/ssfdcg 7th SSFDC layer
+ 56 = /dev/ssfdch 8th SSFDC layer
+
+258 block ROM/Flash read-only translation layer
+ 0 = /dev/blockrom0 First ROM card's translation layer interface
+ 1 = /dev/blockrom1 Second ROM card's translation layer interface
+ ...
+
+259 block Block Extended Major
+ Used dynamically to hold additional partition minor
+ numbers and allow large numbers of partitions per device
+
+259 char FPGA configuration interfaces
+ 0 = /dev/icap0 First Xilinx internal configuration
+ 1 = /dev/icap1 Second Xilinx internal configuration
+
+260 char OSD (Object-based-device) SCSI Device
+ 0 = /dev/osd0 First OSD Device
+ 1 = /dev/osd1 Second OSD Device
+ ...
+ 255 = /dev/osd255 256th OSD Device
+
+ **** ADDITIONAL /dev DIRECTORY ENTRIES
+
+This section details additional entries that should or may exist in
+the /dev directory. It is preferred that symbolic links use the same
+form (absolute or relative) as is indicated here. Links are
+classified as "hard" or "symbolic" depending on the preferred type of
+link; if possible, the indicated type of link should be used.
+
+
+ Compulsory links
+
+These links should exist on all systems:
+
+/dev/fd /proc/self/fd symbolic File descriptors
+/dev/stdin fd/0 symbolic stdin file descriptor
+/dev/stdout fd/1 symbolic stdout file descriptor
+/dev/stderr fd/2 symbolic stderr file descriptor
+/dev/nfsd socksys symbolic Required by iBCS-2
+/dev/X0R null symbolic Required by iBCS-2
+
+Note: /dev/X0R is <letter X>-<digit 0>-<letter R>.
+
+ Recommended links
+
+It is recommended that these links exist on all systems:
+
+/dev/core /proc/kcore symbolic Backward compatibility
+/dev/ramdisk ram0 symbolic Backward compatibility
+/dev/ftape qft0 symbolic Backward compatibility
+/dev/bttv0 video0 symbolic Backward compatibility
+/dev/radio radio0 symbolic Backward compatibility
+/dev/i2o* /dev/i2o/* symbolic Backward compatibility
+/dev/scd? sr? hard Alternate SCSI CD-ROM name
+
+ Locally defined links
+
+The following links may be established locally to conform to the
+configuration of the system. This is merely a tabulation of existing
+practice, and does not constitute a recommendation. However, if they
+exist, they should have the following uses.
+
+/dev/mouse mouse port symbolic Current mouse device
+/dev/tape tape device symbolic Current tape device
+/dev/cdrom CD-ROM device symbolic Current CD-ROM device
+/dev/cdwriter CD-writer symbolic Current CD-writer device
+/dev/scanner scanner symbolic Current scanner device
+/dev/modem modem port symbolic Current dialout device
+/dev/root root device symbolic Current root filesystem
+/dev/swap swap device symbolic Current swap device
+
+/dev/modem should not be used for a modem which supports dialin as
+well as dialout, as it tends to cause lock file problems. If it
+exists, /dev/modem should point to the appropriate primary TTY device
+(the use of the alternate callout devices is deprecated).
+
+For SCSI devices, /dev/tape and /dev/cdrom should point to the
+``cooked'' devices (/dev/st* and /dev/sr*, respectively), whereas
+/dev/cdwriter and /dev/scanner should point to the appropriate generic
+SCSI devices (/dev/sg*).
+
+/dev/mouse may point to a primary serial TTY device, a hardware mouse
+device, or a socket for a mouse driver program (e.g. /dev/gpmdata).
+
+ Sockets and pipes
+
+Non-transient sockets and named pipes may exist in /dev. Common entries are:
+
+/dev/printer socket lpd local socket
+/dev/log socket syslog local socket
+/dev/gpmdata socket gpm mouse multiplexer
+
+ Mount points
+
+The following names are reserved for mounting special filesystems
+under /dev. These special filesystems provide kernel interfaces that
+cannot be provided with standard device nodes.
+
+/dev/pts devpts PTY slave filesystem
+/dev/shm tmpfs POSIX shared memory maintenance access
+
+ **** TERMINAL DEVICES
+
+Terminal, or TTY devices are a special class of character devices. A
+terminal device is any device that could act as a controlling terminal
+for a session; this includes virtual consoles, serial ports, and
+pseudoterminals (PTYs).
+
+All terminal devices share a common set of capabilities known as line
+disciplines; these include the common terminal line discipline as well
+as SLIP and PPP modes.
+
+All terminal devices are named similarly; this section explains the
+naming and use of the various types of TTYs. Note that the naming
+conventions include several historical warts; some of these are
+Linux-specific, some were inherited from other systems, and some
+reflect Linux outgrowing a borrowed convention.
+
+A hash mark (#) in a device name is used here to indicate a decimal
+number without leading zeroes.
+
+ Virtual consoles and the console device
+
+Virtual consoles are full-screen terminal displays on the system video
+monitor. Virtual consoles are named /dev/tty#, with numbering
+starting at /dev/tty1; /dev/tty0 is the current virtual console.
+/dev/tty0 is the device that should be used to access the system video
+card on those architectures for which the frame buffer devices
+(/dev/fb*) are not applicable. Do not use /dev/console
+for this purpose.
+
+The console device, /dev/console, is the device to which system
+messages should be sent, and on which logins should be permitted in
+single-user mode. Starting with Linux 2.1.71, /dev/console is managed
+by the kernel; for previous versions it should be a symbolic link to
+either /dev/tty0, a specific virtual console such as /dev/tty1, or to
+a serial port primary (tty*, not cu*) device, depending on the
+configuration of the system.
+
+ Serial ports
+
+Serial ports are RS-232 serial ports and any device which simulates
+one, either in hardware (such as internal modems) or in software (such
+as the ISDN driver.) Under Linux, each serial ports has two device
+names, the primary or callin device and the alternate or callout one.
+Each kind of device is indicated by a different letter. For any
+letter X, the names of the devices are /dev/ttyX# and /dev/cux#,
+respectively; for historical reasons, /dev/ttyS# and /dev/ttyC#
+correspond to /dev/cua# and /dev/cub#. In the future, it should be
+expected that multiple letters will be used; all letters will be upper
+case for the "tty" device (e.g. /dev/ttyDP#) and lower case for the
+"cu" device (e.g. /dev/cudp#).
+
+The names /dev/ttyQ# and /dev/cuq# are reserved for local use.
+
+The alternate devices provide for kernel-based exclusion and somewhat
+different defaults than the primary devices. Their main purpose is to
+allow the use of serial ports with programs with no inherent or broken
+support for serial ports. Their use is deprecated, and they may be
+removed from a future version of Linux.
+
+Arbitration of serial ports is provided by the use of lock files with
+the names /var/lock/LCK..ttyX#. The contents of the lock file should
+be the PID of the locking process as an ASCII number.
+
+It is common practice to install links such as /dev/modem
+which point to serial ports. In order to ensure proper locking in the
+presence of these links, it is recommended that software chase
+symlinks and lock all possible names; additionally, it is recommended
+that a lock file be installed with the corresponding alternate
+device. In order to avoid deadlocks, it is recommended that the locks
+are acquired in the following order, and released in the reverse:
+
+ 1. The symbolic link name, if any (/var/lock/LCK..modem)
+ 2. The "tty" name (/var/lock/LCK..ttyS2)
+ 3. The alternate device name (/var/lock/LCK..cua2)
+
+In the case of nested symbolic links, the lock files should be
+installed in the order the symlinks are resolved.
+
+Under no circumstances should an application hold a lock while waiting
+for another to be released. In addition, applications which attempt
+to create lock files for the corresponding alternate device names
+should take into account the possibility of being used on a non-serial
+port TTY, for which no alternate device would exist.
+
+ Pseudoterminals (PTYs)
+
+Pseudoterminals, or PTYs, are used to create login sessions or provide
+other capabilities requiring a TTY line discipline (including SLIP or
+PPP capability) to arbitrary data-generation processes. Each PTY has
+a master side, named /dev/pty[p-za-e][0-9a-f], and a slave side, named
+/dev/tty[p-za-e][0-9a-f]. The kernel arbitrates the use of PTYs by
+allowing each master side to be opened only once.
+
+Once the master side has been opened, the corresponding slave device
+can be used in the same manner as any TTY device. The master and
+slave devices are connected by the kernel, generating the equivalent
+of a bidirectional pipe with TTY capabilities.
+
+Recent versions of the Linux kernels and GNU libc contain support for
+the System V/Unix98 naming scheme for PTYs, which assigns a common
+device, /dev/ptmx, to all the masters (opening it will automatically
+give you a previously unassigned PTY) and a subdirectory, /dev/pts,
+for the slaves; the slaves are named with decimal integers (/dev/pts/#
+in our notation). This removes the problem of exhausting the
+namespace and enables the kernel to automatically create the device
+nodes for the slaves on demand using the "devpts" filesystem.
+
diff --git a/Documentation/devicetree/00-INDEX b/Documentation/devicetree/00-INDEX
new file mode 100644
index 000000000..8c4102c6a
--- /dev/null
+++ b/Documentation/devicetree/00-INDEX
@@ -0,0 +1,12 @@
+Documentation for device trees, a data structure by which bootloaders pass
+hardware layout to Linux in a device-independent manner, simplifying hardware
+probing. This subsystem is maintained by Grant Likely
+<grant.likely@secretlab.ca> and has a mailing list at
+https://lists.ozlabs.org/listinfo/devicetree-discuss
+
+00-INDEX
+ - this file
+booting-without-of.txt
+ - Booting Linux without Open Firmware, describes history and format of device trees.
+usage-model.txt
+ - How Linux uses DT and what DT aims to solve. \ No newline at end of file
diff --git a/Documentation/devicetree/bindings/ABI.txt b/Documentation/devicetree/bindings/ABI.txt
new file mode 100644
index 000000000..d25f8d379
--- /dev/null
+++ b/Documentation/devicetree/bindings/ABI.txt
@@ -0,0 +1,39 @@
+
+ Devicetree (DT) ABI
+
+I. Regarding stable bindings/ABI, we quote from the 2013 ARM mini-summit
+ summary document:
+
+ "That still leaves the question of, what does a stable binding look
+ like? Certainly a stable binding means that a newer kernel will not
+ break on an older device tree, but that doesn't mean the binding is
+ frozen for all time. Grant said there are ways to change bindings that
+ don't result in breakage. For instance, if a new property is added,
+ then default to the previous behaviour if it is missing. If a binding
+ truly needs an incompatible change, then change the compatible string
+ at the same time. The driver can bind against both the old and the
+ new. These guidelines aren't new, but they desperately need to be
+ documented."
+
+II. General binding rules
+
+ 1) Maintainers, don't let perfect be the enemy of good. Don't hold up a
+ binding because it isn't perfect.
+
+ 2) Use specific compatible strings so that if we need to add a feature (DMA)
+ in the future, we can create a new compatible string. See I.
+
+ 3) Bindings can be augmented, but the driver shouldn't break when given
+ the old binding. ie. add additional properties, but don't change the
+ meaning of an existing property. For drivers, default to the original
+ behaviour when a newly added property is missing.
+
+ 4) Don't submit bindings for staging or unstable. That will be decided by
+ the devicetree maintainers *after* discussion on the mailinglist.
+
+III. Notes
+
+ 1) This document is intended as a general familiarization with the process as
+ decided at the 2013 Kernel Summit. When in doubt, the current word of the
+ devicetree maintainers overrules this document. In that situation, a patch
+ updating this document would be appreciated.
diff --git a/Documentation/devicetree/bindings/arc/interrupts.txt b/Documentation/devicetree/bindings/arc/interrupts.txt
new file mode 100644
index 000000000..9a5d56243
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/interrupts.txt
@@ -0,0 +1,24 @@
+* ARC700 incore Interrupt Controller
+
+ The core interrupt controller provides 32 prioritised interrupts (2 levels)
+ to ARC700 core.
+
+Properties:
+
+- compatible: "snps,arc700-intc"
+- interrupt-controller: This is an interrupt controller.
+- #interrupt-cells: Must be <1>.
+
+ Single Cell "interrupts" property of a device specifies the IRQ number
+ between 0 to 31
+
+ intc accessed via the special ARC AUX register interface, hence "reg" property
+ is not specified.
+
+Example:
+
+ intc: interrupt-controller {
+ compatible = "snps,arc700-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/arc/pct.txt b/Documentation/devicetree/bindings/arc/pct.txt
new file mode 100644
index 000000000..7b9588444
--- /dev/null
+++ b/Documentation/devicetree/bindings/arc/pct.txt
@@ -0,0 +1,20 @@
+* ARC Performance Counters
+
+The ARC700 can be configured with a pipeline performance monitor for counting
+CPU and cache events like cache misses and hits. Like conventional PCT there
+are 100+ hardware conditions dynamically mapped to upto 32 counters
+
+Note that:
+ * The ARC 700 PCT does not support interrupts; although HW events may be
+ counted, the HW events themselves cannot serve as a trigger for a sample.
+
+Required properties:
+
+- compatible : should contain
+ "snps,arc700-pct"
+
+Example:
+
+pmu {
+ compatible = "snps,arc700-pct";
+};
diff --git a/Documentation/devicetree/bindings/arm/adapteva.txt b/Documentation/devicetree/bindings/arm/adapteva.txt
new file mode 100644
index 000000000..1d8af9e36
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/adapteva.txt
@@ -0,0 +1,7 @@
+Adapteva Platforms Device Tree Bindings
+---------------------------------------
+
+Parallella board
+
+Required root node properties:
+ - compatible = "adapteva,parallella";
diff --git a/Documentation/devicetree/bindings/arm/al,alpine.txt b/Documentation/devicetree/bindings/arm/al,alpine.txt
new file mode 100644
index 000000000..f404a4f9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/al,alpine.txt
@@ -0,0 +1,88 @@
+Annapurna Labs Alpine Platform Device Tree Bindings
+---------------------------------------------------------------
+
+Boards in the Alpine family shall have the following properties:
+
+* Required root node properties:
+compatible: must contain "al,alpine"
+
+* Example:
+
+/ {
+ model = "Annapurna Labs Alpine Dev Board";
+ compatible = "al,alpine";
+
+ ...
+}
+
+* CPU node:
+
+The Alpine platform includes cortex-a15 cores.
+enable-method: must be "al,alpine-smp" to allow smp [1]
+
+Example:
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ enable-method = "al,alpine-smp";
+
+ cpu@0 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <1>;
+ };
+
+ cpu@2 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <2>;
+ };
+
+ cpu@3 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <3>;
+ };
+};
+
+
+* Alpine CPU resume registers
+
+The CPU resume register are used to define required resume address after
+reset.
+
+Properties:
+- compatible : Should contain "al,alpine-cpu-resume".
+- reg : Offset and length of the register set for the device
+
+Example:
+
+cpu_resume {
+ compatible = "al,alpine-cpu-resume";
+ reg = <0xfbff5ed0 0x30>;
+};
+
+* Alpine System-Fabric Service Registers
+
+The System-Fabric Service Registers allow various operation on CPU and
+system fabric, like powering CPUs off.
+
+Properties:
+- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
+- reg : Offset and length of the register set for the device
+
+Example:
+
+nb_service {
+ compatible = "al,alpine-sysfabric-service", "syscon";
+ reg = <0xfb070000 0x10000>;
+};
+
+[1] arm/cpu-enable-method/al,alpine-smp
diff --git a/Documentation/devicetree/bindings/arm/altera.txt b/Documentation/devicetree/bindings/arm/altera.txt
new file mode 100644
index 000000000..558735aac
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera.txt
@@ -0,0 +1,14 @@
+Altera's SoCFPGA platform device tree bindings
+---------------------------------------------
+
+Boards with Cyclone 5 SoC:
+Required root node properties:
+compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+
+Boards with Arria 5 SoC:
+Required root node properties:
+compatible = "altr,socfpga-arria5", "altr,socfpga";
+
+Boards with Arria 10 SoC:
+Required root node properties:
+compatible = "altr,socfpga-arria10", "altr,socfpga";
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt
new file mode 100644
index 000000000..2c28f1d12
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-clk-manager.txt
@@ -0,0 +1,11 @@
+Altera SOCFPGA Clock Manager
+
+Required properties:
+- compatible : "altr,clk-mgr"
+- reg : Should contain base address and length for Clock Manager
+
+Example:
+ clkmgr@ffd04000 {
+ compatible = "altr,clk-mgr";
+ reg = <0xffd04000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
new file mode 100644
index 000000000..d0ce01da5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-sdram-edac.txt
@@ -0,0 +1,15 @@
+Altera SOCFPGA SDRAM Error Detection & Correction [EDAC]
+The EDAC accesses a range of registers in the SDRAM controller.
+
+Required properties:
+- compatible : should contain "altr,sdram-edac";
+- altr,sdr-syscon : phandle of the sdr module
+- interrupts : Should contain the SDRAM ECC IRQ in the
+ appropriate format for the IRQ controller.
+
+Example:
+ sdramedac {
+ compatible = "altr,sdram-edac";
+ altr,sdr-syscon = <&sdr>;
+ interrupts = <0 39 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
new file mode 100644
index 000000000..f4d04a067
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
@@ -0,0 +1,13 @@
+Altera SOCFPGA System Manager
+
+Required properties:
+- compatible : "altr,sys-mgr"
+- reg : Should contain 1 register ranges(address and length)
+- cpu1-start-addr : CPU1 start address in hex.
+
+Example:
+ sysmgr@ffd08000 {
+ compatible = "altr,sys-mgr";
+ reg = <0xffd08000 0x1000>;
+ cpu1-start-addr = <0xffd080c4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/amlogic.txt b/Documentation/devicetree/bindings/arm/amlogic.txt
new file mode 100644
index 000000000..973884a1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/amlogic.txt
@@ -0,0 +1,14 @@
+Amlogic MesonX device tree bindings
+-------------------------------------------
+
+Boards with the Amlogic Meson6 SoC shall have the following properties:
+ Required root node property:
+ compatible: "amlogic,meson6"
+
+Boards with the Amlogic Meson8 SoC shall have the following properties:
+ Required root node property:
+ compatible: "amlogic,meson8";
+
+Board compatible values:
+ - "geniatech,atv1200"
+ - "minix,neo-x8"
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
new file mode 100644
index 000000000..e77412893
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -0,0 +1,96 @@
+* ARM architected timer
+
+ARM cores may have a per-core architected timer, which provides per-cpu timers,
+or a memory mapped architected timer, which provides up to 8 frames with a
+physical and optional virtual timer per frame.
+
+The per-core architected timer is attached to a GIC to deliver its
+per-processor interrupts via PPIs. The memory mapped timer is attached to a GIC
+to deliver its interrupts via SPIs.
+
+** CP15 Timer node properties:
+
+- compatible : Should at least contain one of
+ "arm,armv7-timer"
+ "arm,armv8-timer"
+
+- interrupts : Interrupt list for secure, non-secure, virtual and
+ hypervisor timers, in that order.
+
+- clock-frequency : The frequency of the main counter, in Hz. Should be present
+ only where necessary to work around broken firmware which does not configure
+ CNTFRQ on all CPUs to a uniform correct value. Use of this property is
+ strongly discouraged; fix your firmware unless absolutely impossible.
+
+- always-on : a boolean property. If present, the timer is powered through an
+ always-on power domain, therefore it never loses context.
+
+** Optional properties:
+
+- arm,cpu-registers-not-fw-configured : Firmware does not initialize
+ any of the generic timer CPU registers, which contain their
+ architecturally-defined reset values. Only supported for 32-bit
+ systems which follow the ARMv7 architected reset values.
+
+
+Example:
+
+ timer {
+ compatible = "arm,cortex-a15-timer",
+ "arm,armv7-timer";
+ interrupts = <1 13 0xf08>,
+ <1 14 0xf08>,
+ <1 11 0xf08>,
+ <1 10 0xf08>;
+ clock-frequency = <100000000>;
+ };
+
+** Memory mapped timer node properties:
+
+- compatible : Should at least contain "arm,armv7-timer-mem".
+
+- clock-frequency : The frequency of the main counter, in Hz. Should be present
+ only when firmware has not configured the MMIO CNTFRQ registers.
+
+- reg : The control frame base address.
+
+Note that #address-cells, #size-cells, and ranges shall be present to ensure
+the CPU can address a frame's registers.
+
+A timer node has up to 8 frame sub-nodes, each with the following properties:
+
+- frame-number: 0 to 7.
+
+- interrupts : Interrupt list for physical and virtual timers in that order.
+ The virtual timer interrupt is optional.
+
+- reg : The first and second view base addresses in that order. The second view
+ base address is optional.
+
+- status : "disabled" indicates the frame is not available for use. Optional.
+
+Example:
+
+ timer@f0000000 {
+ compatible = "arm,armv7-timer-mem";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ reg = <0xf0000000 0x1000>;
+ clock-frequency = <50000000>;
+
+ frame@f0001000 {
+ frame-number = <0>
+ interrupts = <0 13 0x8>,
+ <0 14 0x8>;
+ reg = <0xf0001000 0x1000>,
+ <0xf0002000 0x1000>;
+ };
+
+ frame@f0003000 {
+ frame-number = <1>
+ interrupts = <0 15 0x8>;
+ reg = <0xf0003000 0x1000>;
+ status = "disabled";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/arm-boards b/Documentation/devicetree/bindings/arm/arm-boards
new file mode 100644
index 000000000..b78564b2b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/arm-boards
@@ -0,0 +1,159 @@
+ARM Integrator/AP (Application Platform) and Integrator/CP (Compact Platform)
+-----------------------------------------------------------------------------
+ARM's oldest Linux-supported platform with connectors for different core
+tiles of ARMv4, ARMv5 and ARMv6 type.
+
+Required properties (in root node):
+ compatible = "arm,integrator-ap"; /* Application Platform */
+ compatible = "arm,integrator-cp"; /* Compact Platform */
+
+FPGA type interrupt controllers, see the versatile-fpga-irq binding doc.
+
+Required nodes:
+
+- core-module: the root node to the Integrator platforms must have
+ a core-module with regs and the compatible string
+ "arm,core-module-integrator"
+- external-bus-interface: the root node to the Integrator platforms
+ must have an external bus interface with regs and the
+ compatible-string "arm,external-bus-interface"
+
+ Required properties for the core module:
+ - regs: the location and size of the core module registers, one
+ range of 0x200 bytes.
+
+- syscon: the root node of the Integrator platforms must have a
+ system controller node pointing to the control registers,
+ with the compatible string
+ "arm,integrator-ap-syscon"
+ "arm,integrator-cp-syscon"
+ respectively.
+
+ Required properties for the system controller:
+ - regs: the location and size of the system controller registers,
+ one range of 0x100 bytes.
+
+ Required properties for the AP system controller:
+ - interrupts: the AP syscon node must include the logical module
+ interrupts, stated in order of module instance <module 0>,
+ <module 1>, <module 2> ... for the CP system controller this
+ is not required not of any use.
+
+/dts-v1/;
+/include/ "integrator.dtsi"
+
+/ {
+ model = "ARM Integrator/AP";
+ compatible = "arm,integrator-ap";
+
+ core-module@10000000 {
+ compatible = "arm,core-module-integrator";
+ reg = <0x10000000 0x200>;
+ };
+
+ ebi@12000000 {
+ compatible = "arm,external-bus-interface";
+ reg = <0x12000000 0x100>;
+ };
+
+ syscon {
+ compatible = "arm,integrator-ap-syscon";
+ reg = <0x11000000 0x100>;
+ interrupt-parent = <&pic>;
+ /* These are the logic module IRQs */
+ interrupts = <9>, <10>, <11>, <12>;
+ };
+};
+
+
+ARM Versatile Application and Platform Baseboards
+-------------------------------------------------
+ARM's development hardware platform with connectors for customizable
+core tiles. The hardware configuration of the Versatile boards is
+highly customizable.
+
+Required properties (in root node):
+ compatible = "arm,versatile-ab"; /* Application baseboard */
+ compatible = "arm,versatile-pb"; /* Platform baseboard */
+
+Interrupt controllers:
+- VIC required properties:
+ compatible = "arm,versatile-vic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+- SIC required properties:
+ compatible = "arm,versatile-sic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+Required nodes:
+
+- core-module: the root node to the Versatile platforms must have
+ a core-module with regs and the compatible strings
+ "arm,core-module-versatile", "syscon"
+
+ARM RealView Boards
+-------------------
+The RealView boards cover tailored evaluation boards that are used to explore
+the ARM11 and Cortex A-8 and Cortex A-9 processors.
+
+Required properties (in root node):
+ /* RealView Emulation Baseboard */
+ compatible = "arm,realview-eb";
+ /* RealView Platform Baseboard for ARM1176JZF-S */
+ compatible = "arm,realview-pb1176";
+ /* RealView Platform Baseboard for ARM11 MPCore */
+ compatible = "arm,realview-pb11mp";
+ /* RealView Platform Baseboard for Cortex A-8 */
+ compatible = "arm,realview-pba8";
+ /* RealView Platform Baseboard Explore for Cortex A-9 */
+ compatible = "arm,realview-pbx";
+
+Required nodes:
+
+- soc: some node of the RealView platforms must be the SoC
+ node that contain the SoC-specific devices, withe the compatible
+ string set to one of these tuples:
+ "arm,realview-eb-soc", "simple-bus"
+ "arm,realview-pb1176-soc", "simple-bus"
+ "arm,realview-pb11mp-soc", "simple-bus"
+ "arm,realview-pba8-soc", "simple-bus"
+ "arm,realview-pbx-soc", "simple-bus"
+
+- syscon: some subnode of the RealView SoC node must be a
+ system controller node pointing to the control registers,
+ with the compatible string set to one of these tuples:
+ "arm,realview-eb-syscon", "syscon"
+ "arm,realview-pb1176-syscon", "syscon"
+ "arm,realview-pb11mp-syscon", "syscon"
+ "arm,realview-pba8-syscon", "syscon"
+ "arm,realview-pbx-syscon", "syscon"
+
+ Required properties for the system controller:
+ - regs: the location and size of the system controller registers,
+ one range of 0x1000 bytes.
+
+Example:
+
+/dts-v1/;
+#include <dt-bindings/interrupt-controller/irq.h>
+#include "skeleton.dtsi"
+
+/ {
+ model = "ARM RealView PB1176 with device tree";
+ compatible = "arm,realview-pb1176";
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "arm,realview-pb1176-soc", "simple-bus";
+ ranges;
+
+ syscon: syscon@10000000 {
+ compatible = "arm,realview-syscon", "syscon";
+ reg = <0x10000000 0x1000>;
+ };
+
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/armada-370-xp-pmsu.txt b/Documentation/devicetree/bindings/arm/armada-370-xp-pmsu.txt
new file mode 100644
index 000000000..26799ef56
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-370-xp-pmsu.txt
@@ -0,0 +1,21 @@
+Power Management Service Unit(PMSU)
+-----------------------------------
+Available on Marvell SOCs: Armada 370, Armada 38x and Armada XP
+
+Required properties:
+
+- compatible: should be one of:
+ - "marvell,armada-370-pmsu" for Armada 370 or Armada XP
+ - "marvell,armada-380-pmsu" for Armada 38x
+ - "marvell,armada-370-xp-pmsu" was used for Armada 370/XP but is now
+ deprecated and will be removed
+
+- reg: Should contain PMSU registers location and length.
+
+Example:
+
+armada-370-xp-pmsu@22000 {
+ compatible = "marvell,armada-370-pmsu";
+ reg = <0x22000 0x1000>;
+};
+
diff --git a/Documentation/devicetree/bindings/arm/armada-370-xp.txt b/Documentation/devicetree/bindings/arm/armada-370-xp.txt
new file mode 100644
index 000000000..c6ed90ea6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-370-xp.txt
@@ -0,0 +1,24 @@
+Marvell Armada 370 and Armada XP Platforms Device Tree Bindings
+---------------------------------------------------------------
+
+Boards with a SoC of the Marvell Armada 370 and Armada XP families
+shall have the following property:
+
+Required root node property:
+
+compatible: must contain "marvell,armada-370-xp"
+
+In addition, boards using the Marvell Armada 370 SoC shall have the
+following property:
+
+Required root node property:
+
+compatible: must contain "marvell,armada370"
+
+In addition, boards using the Marvell Armada XP SoC shall have the
+following property:
+
+Required root node property:
+
+compatible: must contain "marvell,armadaxp"
+
diff --git a/Documentation/devicetree/bindings/arm/armada-375.txt b/Documentation/devicetree/bindings/arm/armada-375.txt
new file mode 100644
index 000000000..867d0b80c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-375.txt
@@ -0,0 +1,9 @@
+Marvell Armada 375 Platforms Device Tree Bindings
+-------------------------------------------------
+
+Boards with a SoC of the Marvell Armada 375 family shall have the
+following property:
+
+Required root node property:
+
+compatible: must contain "marvell,armada375"
diff --git a/Documentation/devicetree/bindings/arm/armada-380-mpcore-soc-ctrl.txt b/Documentation/devicetree/bindings/arm/armada-380-mpcore-soc-ctrl.txt
new file mode 100644
index 000000000..878107302
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-380-mpcore-soc-ctrl.txt
@@ -0,0 +1,14 @@
+Marvell Armada 38x CA9 MPcore SoC Controller
+============================================
+
+Required properties:
+
+- compatible: Should be "marvell,armada-380-mpcore-soc-ctrl".
+
+- reg: should be the register base and length as documented in the
+ datasheet for the CA9 MPcore SoC Control registers
+
+mpcore-soc-ctrl@20d20 {
+ compatible = "marvell,armada-380-mpcore-soc-ctrl";
+ reg = <0x20d20 0x6c>;
+};
diff --git a/Documentation/devicetree/bindings/arm/armada-38x.txt b/Documentation/devicetree/bindings/arm/armada-38x.txt
new file mode 100644
index 000000000..202953f18
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-38x.txt
@@ -0,0 +1,27 @@
+Marvell Armada 38x Platforms Device Tree Bindings
+-------------------------------------------------
+
+Boards with a SoC of the Marvell Armada 38x family shall have the
+following property:
+
+Required root node property:
+
+ - compatible: must contain "marvell,armada380"
+
+In addition, boards using the Marvell Armada 385 SoC shall have the
+following property before the previous one:
+
+Required root node property:
+
+compatible: must contain "marvell,armada385"
+
+In addition, boards using the Marvell Armada 388 SoC shall have the
+following property before the previous one:
+
+Required root node property:
+
+compatible: must contain "marvell,armada388"
+
+Example:
+
+compatible = "marvell,a385-rd", "marvell,armada385", "marvell,armada380";
diff --git a/Documentation/devicetree/bindings/arm/armada-39x.txt b/Documentation/devicetree/bindings/arm/armada-39x.txt
new file mode 100644
index 000000000..53d4ff9ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-39x.txt
@@ -0,0 +1,20 @@
+Marvell Armada 39x Platforms Device Tree Bindings
+-------------------------------------------------
+
+Boards with a SoC of the Marvell Armada 39x family shall have the
+following property:
+
+Required root node property:
+
+ - compatible: must contain "marvell,armada390"
+
+In addition, boards using the Marvell Armada 398 SoC shall have the
+following property before the previous one:
+
+Required root node property:
+
+compatible: must contain "marvell,armada398"
+
+Example:
+
+compatible = "marvell,a398-db", "marvell,armada398", "marvell,armada390";
diff --git a/Documentation/devicetree/bindings/arm/armada-cpu-reset.txt b/Documentation/devicetree/bindings/arm/armada-cpu-reset.txt
new file mode 100644
index 000000000..b63a7b6ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armada-cpu-reset.txt
@@ -0,0 +1,14 @@
+Marvell Armada CPU reset controller
+===================================
+
+Required properties:
+
+- compatible: Should be "marvell,armada-370-cpu-reset".
+
+- reg: should be register base and length as documented in the
+ datasheet for the CPU reset registers
+
+cpurst: cpurst@20800 {
+ compatible = "marvell,armada-370-cpu-reset";
+ reg = <0x20800 0x20>;
+};
diff --git a/Documentation/devicetree/bindings/arm/armadeus.txt b/Documentation/devicetree/bindings/arm/armadeus.txt
new file mode 100644
index 000000000..9821283ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/armadeus.txt
@@ -0,0 +1,6 @@
+Armadeus i.MX Platforms Device Tree Bindings
+-----------------------------------------------
+
+APF51: i.MX51 based module.
+Required root node properties:
+ - compatible = "armadeus,imx51-apf51", "fsl,imx51";
diff --git a/Documentation/devicetree/bindings/arm/atmel-at91.txt b/Documentation/devicetree/bindings/arm/atmel-at91.txt
new file mode 100644
index 000000000..2e99b5b57
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-at91.txt
@@ -0,0 +1,157 @@
+Atmel AT91 device tree bindings.
+================================
+
+Boards with a SoC of the Atmel AT91 or SMART family shall have the following
+properties:
+
+Required root node properties:
+compatible: must be one of:
+ * "atmel,at91rm9200"
+
+ * "atmel,at91sam9" for SoCs using an ARM926EJ-S core, shall be extended with
+ the specific SoC family or compatible:
+ o "atmel,at91sam9260"
+ o "atmel,at91sam9261"
+ o "atmel,at91sam9263"
+ o "atmel,at91sam9x5" for the 5 series, shall be extended with the specific
+ SoC compatible:
+ - "atmel,at91sam9g15"
+ - "atmel,at91sam9g25"
+ - "atmel,at91sam9g35"
+ - "atmel,at91sam9x25"
+ - "atmel,at91sam9x35"
+ o "atmel,at91sam9g20"
+ o "atmel,at91sam9g45"
+ o "atmel,at91sam9n12"
+ o "atmel,at91sam9rl"
+ o "atmel,at91sam9xe"
+ * "atmel,sama5" for SoCs using a Cortex-A5, shall be extended with the specific
+ SoC family:
+ o "atmel,sama5d3" shall be extended with the specific SoC compatible:
+ - "atmel,sama5d31"
+ - "atmel,sama5d33"
+ - "atmel,sama5d34"
+ - "atmel,sama5d35"
+ - "atmel,sama5d36"
+ o "atmel,sama5d4" shall be extended with the specific SoC compatible:
+ - "atmel,sama5d41"
+ - "atmel,sama5d42"
+ - "atmel,sama5d43"
+ - "atmel,sama5d44"
+
+PIT Timer required properties:
+- compatible: Should be "atmel,at91sam9260-pit"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the PIT which is the IRQ line
+ shared across all System Controller members.
+
+System Timer (ST) required properties:
+- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the ST which is the IRQ line
+ shared across all System Controller members.
+Its subnodes can be:
+- watchdog: compatible should be "atmel,at91rm9200-wdt"
+
+TC/TCLIB Timer required properties:
+- compatible: Should be "atmel,<chip>-tcb".
+ <chip> can be "at91rm9200" or "at91sam9x5"
+- reg: Should contain registers location and length
+- interrupts: Should contain all interrupts for the TC block
+ Note that you can specify several interrupt cells if the TC
+ block has one interrupt per channel.
+- clock-names: tuple listing input clock names.
+ Required elements: "t0_clk"
+ Optional elements: "t1_clk", "t2_clk"
+- clocks: phandles to input clocks.
+
+Examples:
+
+One interrupt per TC block:
+ tcb0: timer@fff7c000 {
+ compatible = "atmel,at91rm9200-tcb";
+ reg = <0xfff7c000 0x100>;
+ interrupts = <18 4>;
+ clocks = <&tcb0_clk>;
+ clock-names = "t0_clk";
+ };
+
+One interrupt per TC channel in a TC block:
+ tcb1: timer@fffdc000 {
+ compatible = "atmel,at91rm9200-tcb";
+ reg = <0xfffdc000 0x100>;
+ interrupts = <26 4 27 4 28 4>;
+ clocks = <&tcb1_clk>;
+ clock-names = "t0_clk";
+ };
+
+RSTC Reset Controller required properties:
+- compatible: Should be "atmel,<chip>-rstc".
+ <chip> can be "at91sam9260" or "at91sam9g45"
+- reg: Should contain registers location and length
+
+Example:
+
+ rstc@fffffd00 {
+ compatible = "atmel,at91sam9260-rstc";
+ reg = <0xfffffd00 0x10>;
+ };
+
+RAMC SDRAM/DDR Controller required properties:
+- compatible: Should be "atmel,at91rm9200-sdramc",
+ "atmel,at91sam9260-sdramc",
+ "atmel,at91sam9g45-ddramc",
+ "atmel,sama5d3-ddramc",
+- reg: Should contain registers location and length
+
+Examples:
+
+ ramc0: ramc@ffffe800 {
+ compatible = "atmel,at91sam9g45-ddramc";
+ reg = <0xffffe800 0x200>;
+ };
+
+SHDWC Shutdown Controller
+
+required properties:
+- compatible: Should be "atmel,<chip>-shdwc".
+ <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
+- reg: Should contain registers location and length
+
+optional properties:
+- atmel,wakeup-mode: String, operation mode of the wakeup mode.
+ Supported values are: "none", "high", "low", "any".
+- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
+
+optional at91sam9260 properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9rl properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9x5 properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+
+Example:
+
+ rstc@fffffd00 {
+ compatible = "atmel,at91sam9260-rstc";
+ reg = <0xfffffd00 0x10>;
+ };
+
+Special Function Registers (SFR)
+
+Special Function Registers (SFR) manage specific aspects of the integrated
+memory, bridge implementations, processor and other functionality not controlled
+elsewhere.
+
+required properties:
+- compatible: Should be "atmel,<chip>-sfr", "syscon".
+ <chip> can be "sama5d3" or "sama5d4".
+- reg: Should contain registers location and length
+
+ sfr@f0038000 {
+ compatible = "atmel,sama5d3-sfr", "syscon";
+ reg = <0xf0038000 0x60>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/atmel-pmc.txt b/Documentation/devicetree/bindings/arm/atmel-pmc.txt
new file mode 100644
index 000000000..795cc7854
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/atmel-pmc.txt
@@ -0,0 +1,14 @@
+* Power Management Controller (PMC)
+
+Required properties:
+- compatible: Should be "atmel,<chip>-pmc".
+ <chip> can be: at91rm9200, at91sam9260, at91sam9g45, at91sam9n12,
+ at91sam9x5, sama5d3
+
+- reg: Should contain PMC registers location and length
+
+Examples:
+ pmc: pmc@fffffc00 {
+ compatible = "atmel,at91rm9200-pmc";
+ reg = <0xfffffc00 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/axxia.txt b/Documentation/devicetree/bindings/arm/axxia.txt
new file mode 100644
index 000000000..7b4ef9c07
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/axxia.txt
@@ -0,0 +1,12 @@
+Axxia AXM55xx device tree bindings
+
+Boards using the AXM55xx SoC need to have the following properties:
+
+Required root node property:
+
+ - compatible = "lsi,axm5516"
+
+Boards:
+
+ LSI AXM5516 Validation board (Amarillo)
+ compatible = "lsi,axm5516-amarillo", "lsi,axm5516"
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt
new file mode 100644
index 000000000..8240c023e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351-cpu-method.txt
@@ -0,0 +1,36 @@
+Broadcom Kona Family CPU Enable Method
+--------------------------------------
+This binding defines the enable method used for starting secondary
+CPUs in the following Broadcom SoCs:
+ BCM11130, BCM11140, BCM11351, BCM28145, BCM28155, BCM21664
+
+The enable method is specified by defining the following required
+properties in the "cpus" device tree node:
+ - enable-method = "brcm,bcm11351-cpu-method";
+ - secondary-boot-reg = <...>;
+
+The secondary-boot-reg property is a u32 value that specifies the
+physical address of the register used to request the ROM holding pen
+code release a secondary CPU. The value written to the register is
+formed by encoding the target CPU id into the low bits of the
+physical start address it should jump to.
+
+Example:
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ enable-method = "brcm,bcm11351-cpu-method";
+ secondary-boot-reg = <0x3500417c>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ };
+
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a9";
+ reg = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt
new file mode 100644
index 000000000..0ff6560e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.txt
@@ -0,0 +1,10 @@
+Broadcom BCM11351 device tree bindings
+-------------------------------------------
+
+Boards with the bcm281xx SoC family (which includes bcm11130, bcm11140,
+bcm11351, bcm28145, bcm28155 SoCs) shall have the following properties:
+
+Required root node property:
+
+compatible = "brcm,bcm11351";
+DEPRECATED: compatible = "bcm,bcm11351";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt
new file mode 100644
index 000000000..e0774255e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.txt
@@ -0,0 +1,15 @@
+Broadcom BCM21664 device tree bindings
+--------------------------------------
+
+This document describes the device tree bindings for boards with the BCM21664
+SoC.
+
+Required root node property:
+ - compatible: brcm,bcm21664
+
+Example:
+ / {
+ model = "BCM21664 SoC";
+ compatible = "brcm,bcm21664";
+ [...]
+ }
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
new file mode 100644
index 000000000..ac683480c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
@@ -0,0 +1,8 @@
+Broadcom BCM2835 device tree bindings
+-------------------------------------------
+
+Boards with the BCM2835 SoC shall have the following properties:
+
+Required root node property:
+
+compatible = "brcm,bcm2835";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt
new file mode 100644
index 000000000..6b0f49f6f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.txt
@@ -0,0 +1,8 @@
+Broadcom BCM4708 device tree bindings
+-------------------------------------------
+
+Boards with the BCM4708 SoC shall have the following properties:
+
+Required root node property:
+
+compatible = "brcm,bcm4708";
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
new file mode 100644
index 000000000..bd49987a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm63138.txt
@@ -0,0 +1,9 @@
+Broadcom BCM63138 DSL System-on-a-Chip device tree bindings
+-----------------------------------------------------------
+
+Boards compatible with the BCM63138 DSL System-on-a-Chip should have the
+following properties:
+
+Required root node property:
+
+compatible: should be "brcm,bcm63138"
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
new file mode 100644
index 000000000..430608ec0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,brcmstb.txt
@@ -0,0 +1,97 @@
+ARM Broadcom STB platforms Device Tree Bindings
+-----------------------------------------------
+Boards with Broadcom Brahma15 ARM-based BCMxxxx (generally BCM7xxx variants)
+SoC shall have the following DT organization:
+
+Required root node properties:
+ - compatible: "brcm,bcm<chip_id>", "brcm,brcmstb"
+
+example:
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "Broadcom STB (bcm7445)";
+ compatible = "brcm,bcm7445", "brcm,brcmstb";
+
+Further, syscon nodes that map platform-specific registers used for general
+system control is required:
+
+ - compatible: "brcm,bcm<chip_id>-sun-top-ctrl", "syscon"
+ - compatible: "brcm,bcm<chip_id>-hif-cpubiuctrl", "syscon"
+ - compatible: "brcm,bcm<chip_id>-hif-continuation", "syscon"
+
+example:
+ rdb {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+ ranges = <0 0x00 0xf0000000 0x1000000>;
+
+ sun_top_ctrl: syscon@404000 {
+ compatible = "brcm,bcm7445-sun-top-ctrl", "syscon";
+ reg = <0x404000 0x51c>;
+ };
+
+ hif_cpubiuctrl: syscon@3e2400 {
+ compatible = "brcm,bcm7445-hif-cpubiuctrl", "syscon";
+ reg = <0x3e2400 0x5b4>;
+ };
+
+ hif_continuation: syscon@452000 {
+ compatible = "brcm,bcm7445-hif-continuation", "syscon";
+ reg = <0x452000 0x100>;
+ };
+ };
+
+Lastly, nodes that allow for support of SMP initialization and reboot are
+required:
+
+smpboot
+-------
+Required properties:
+
+ - compatible
+ The string "brcm,brcmstb-smpboot".
+
+ - syscon-cpu
+ A phandle / integer array property which lets the BSP know the location
+ of certain CPU power-on registers.
+
+ The layout of the property is as follows:
+ o a phandle to the "hif_cpubiuctrl" syscon node
+ o offset to the base CPU power zone register
+ o offset to the base CPU reset register
+
+ - syscon-cont
+ A phandle pointing to the syscon node which describes the CPU boot
+ continuation registers.
+ o a phandle to the "hif_continuation" syscon node
+
+example:
+ smpboot {
+ compatible = "brcm,brcmstb-smpboot";
+ syscon-cpu = <&hif_cpubiuctrl 0x88 0x178>;
+ syscon-cont = <&hif_continuation>;
+ };
+
+reboot
+-------
+Required properties
+
+ - compatible
+ The string property "brcm,brcmstb-reboot" for 40nm/28nm chips with
+ the new SYS_CTRL interface, or "brcm,bcm7038-reboot" for 65nm
+ chips with the old SUN_TOP_CTRL interface.
+
+ - syscon
+ A phandle / integer array that points to the syscon node which describes
+ the general system reset registers.
+ o a phandle to "sun_top_ctrl"
+ o offset to the "reset source enable" register
+ o offset to the "software master reset" register
+
+example:
+ reboot {
+ compatible = "brcm,brcmstb-reboot";
+ syscon = <&sun_top_ctrl 0x304 0x308>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt
new file mode 100644
index 000000000..4c77169bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.txt
@@ -0,0 +1,31 @@
+Broadcom Cygnus device tree bindings
+------------------------------------
+
+
+Boards with Cygnus SoCs shall have the following properties:
+
+Required root node property:
+
+BCM11300
+compatible = "brcm,bcm11300", "brcm,cygnus";
+
+BCM11320
+compatible = "brcm,bcm11320", "brcm,cygnus";
+
+BCM11350
+compatible = "brcm,bcm11350", "brcm,cygnus";
+
+BCM11360
+compatible = "brcm,bcm11360", "brcm,cygnus";
+
+BCM58300
+compatible = "brcm,bcm58300", "brcm,cygnus";
+
+BCM58302
+compatible = "brcm,bcm58302", "brcm,cygnus";
+
+BCM58303
+compatible = "brcm,bcm58303", "brcm,cygnus";
+
+BCM58305
+compatible = "brcm,bcm58305", "brcm,cygnus";
diff --git a/Documentation/devicetree/bindings/arm/calxeda.txt b/Documentation/devicetree/bindings/arm/calxeda.txt
new file mode 100644
index 000000000..25fcf9679
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda.txt
@@ -0,0 +1,15 @@
+Calxeda Platforms Device Tree Bindings
+-----------------------------------------------
+
+Boards with Calxeda Cortex-A9 based ECX-1000 (Highbank) SOC shall have the
+following properties.
+
+Required root node properties:
+ - compatible = "calxeda,highbank";
+
+
+Boards with Calxeda Cortex-A15 based ECX-2000 SOC shall have the following
+properties.
+
+Required root node properties:
+ - compatible = "calxeda,ecx-2000";
diff --git a/Documentation/devicetree/bindings/arm/calxeda/combophy.txt b/Documentation/devicetree/bindings/arm/calxeda/combophy.txt
new file mode 100644
index 000000000..6622bdb2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda/combophy.txt
@@ -0,0 +1,17 @@
+Calxeda Highbank Combination Phys for SATA
+
+Properties:
+- compatible : Should be "calxeda,hb-combophy"
+- #phy-cells: Should be 1.
+- reg : Address and size for Combination Phy registers.
+- phydev: device ID for programming the combophy.
+
+Example:
+
+ combophy5: combo-phy@fff5d000 {
+ compatible = "calxeda,hb-combophy";
+ #phy-cells = <1>;
+ reg = <0xfff5d000 0x1000>;
+ phydev = <31>;
+ };
+
diff --git a/Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt b/Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt
new file mode 100644
index 000000000..94e642a33
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda/l2ecc.txt
@@ -0,0 +1,15 @@
+Calxeda Highbank L2 cache ECC
+
+Properties:
+- compatible : Should be "calxeda,hb-sregs-l2-ecc"
+- reg : Address and size for ECC error interrupt clear registers.
+- interrupts : Should be single bit error interrupt, then double bit error
+ interrupt.
+
+Example:
+
+ sregs@fff3c200 {
+ compatible = "calxeda,hb-sregs-l2-ecc";
+ reg = <0xfff3c200 0x100>;
+ interrupts = <0 71 4 0 72 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt b/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt
new file mode 100644
index 000000000..049675944
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/calxeda/mem-ctrlr.txt
@@ -0,0 +1,16 @@
+Calxeda DDR memory controller
+
+Properties:
+- compatible : Should be:
+ - "calxeda,hb-ddr-ctrl" for ECX-1000
+ - "calxeda,ecx-2000-ddr-ctrl" for ECX-2000
+- reg : Address and size for DDR controller registers.
+- interrupts : Interrupt for DDR controller.
+
+Example:
+
+ memory-controller@fff00000 {
+ compatible = "calxeda,hb-ddr-ctrl";
+ reg = <0xfff00000 0x1000>;
+ interrupts = <0 91 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/cavium-thunder.txt b/Documentation/devicetree/bindings/arm/cavium-thunder.txt
new file mode 100644
index 000000000..6f63a5866
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cavium-thunder.txt
@@ -0,0 +1,10 @@
+Cavium Thunder platform device tree bindings
+--------------------------------------------
+
+Boards with Cavium's Thunder SoC shall have following properties.
+
+Root Node
+---------
+Required root node properties:
+
+ - compatible = "cavium,thunder-88xx";
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
new file mode 100644
index 000000000..3c5c63132
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -0,0 +1,227 @@
+=======================================================
+ARM CCI cache coherent interconnect binding description
+=======================================================
+
+ARM multi-cluster systems maintain intra-cluster coherency through a
+cache coherent interconnect (CCI) that is capable of monitoring bus
+transactions and manage coherency, TLB invalidations and memory barriers.
+
+It allows snooping and distributed virtual memory message broadcast across
+clusters, through memory mapped interface, with a global control register
+space and multiple sets of interface control registers, one per slave
+interface.
+
+Bindings for the CCI node follow the ePAPR standard, available from:
+
+www.power.org/documentation/epapr-version-1-1/
+
+with the addition of the bindings described in this document which are
+specific to ARM.
+
+* CCI interconnect node
+
+ Description: Describes a CCI cache coherent Interconnect component
+
+ Node name must be "cci".
+ Node's parent must be the root node /, and the address space visible
+ through the CCI interconnect is the same as the one seen from the
+ root node (ie from CPUs perspective as per DT standard).
+ Every CCI node has to define the following properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be set to
+ "arm,cci-400"
+
+ - reg
+ Usage: required
+ Value type: Integer cells. A register entry, expressed as a pair
+ of cells, containing base and size.
+ Definition: A standard property. Specifies base physical
+ address of CCI control registers common to all
+ interfaces.
+
+ - ranges:
+ Usage: required
+ Value type: Integer cells. An array of range entries, expressed
+ as a tuple of cells, containing child address,
+ parent address and the size of the region in the
+ child address space.
+ Definition: A standard property. Follow rules in the ePAPR for
+ hierarchical bus addressing. CCI interfaces
+ addresses refer to the parent node addressing
+ scheme to declare their register bases.
+
+ CCI interconnect node can define the following child nodes:
+
+ - CCI control interface nodes
+
+ Node name must be "slave-if".
+ Parent node must be CCI interconnect node.
+
+ A CCI control interface node must contain the following
+ properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be set to
+ "arm,cci-400-ctrl-if"
+
+ - interface-type:
+ Usage: required
+ Value type: <string>
+ Definition: must be set to one of {"ace", "ace-lite"}
+ depending on the interface type the node
+ represents.
+
+ - reg:
+ Usage: required
+ Value type: Integer cells. A register entry, expressed
+ as a pair of cells, containing base and
+ size.
+ Definition: the base address and size of the
+ corresponding interface programming
+ registers.
+
+ - CCI PMU node
+
+ Parent node must be CCI interconnect node.
+
+ A CCI pmu node must contain the following properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must contain one of:
+ "arm,cci-400-pmu,r0"
+ "arm,cci-400-pmu,r1"
+ "arm,cci-400-pmu" - DEPRECATED, permitted only where OS has
+ secure acces to CCI registers
+ - reg:
+ Usage: required
+ Value type: Integer cells. A register entry, expressed
+ as a pair of cells, containing base and
+ size.
+ Definition: the base address and size of the
+ corresponding interface programming
+ registers.
+
+ - interrupts:
+ Usage: required
+ Value type: Integer cells. Array of interrupt specifier
+ entries, as defined in
+ ../interrupt-controller/interrupts.txt.
+ Definition: list of counter overflow interrupts, one per
+ counter. The interrupts must be specified
+ starting with the cycle counter overflow
+ interrupt, followed by counter0 overflow
+ interrupt, counter1 overflow interrupt,...
+ ,counterN overflow interrupt.
+
+ The CCI PMU has an interrupt signal for each
+ counter. The number of interrupts must be
+ equal to the number of counters.
+
+* CCI interconnect bus masters
+
+ Description: masters in the device tree connected to a CCI port
+ (inclusive of CPUs and their cpu nodes).
+
+ A CCI interconnect bus master node must contain the following
+ properties:
+
+ - cci-control-port:
+ Usage: required
+ Value type: <phandle>
+ Definition: a phandle containing the CCI control interface node
+ the master is connected to.
+
+Example:
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x1>;
+ };
+
+ CPU2: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x100>;
+ };
+
+ CPU3: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x101>;
+ };
+
+ };
+
+ dma0: dma@3000000 {
+ compatible = "arm,pl330", "arm,primecell";
+ cci-control-port = <&cci_control0>;
+ reg = <0x0 0x3000000 0x0 0x1000>;
+ interrupts = <10>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x10000>;
+
+ cci_control0: slave-if@1000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace-lite";
+ reg = <0x1000 0x1000>;
+ };
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+
+ pmu@9000 {
+ compatible = "arm,cci-400-pmu";
+ reg = <0x9000 0x5000>;
+ interrupts = <0 101 4>,
+ <0 102 4>,
+ <0 103 4>,
+ <0 104 4>,
+ <0 105 4>;
+ };
+ };
+
+This CCI node corresponds to a CCI component whose control registers sits
+at address 0x000000002c090000.
+CCI slave interface @0x000000002c091000 is connected to dma controller dma0.
+CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1};
+CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3};
diff --git a/Documentation/devicetree/bindings/arm/ccn.txt b/Documentation/devicetree/bindings/arm/ccn.txt
new file mode 100644
index 000000000..b100d3847
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ccn.txt
@@ -0,0 +1,21 @@
+* ARM CCN (Cache Coherent Network)
+
+Required properties:
+
+- compatible: (standard compatible string) should be one of:
+ "arm,ccn-504"
+ "arm,ccn-508"
+
+- reg: (standard registers property) physical address and size
+ (16MB) of the configuration registers block
+
+- interrupts: (standard interrupt property) single interrupt
+ generated by the control block
+
+Example:
+
+ ccn@0x2000000000 {
+ compatible = "arm,ccn-504";
+ reg = <0x20 0x00000000 0 0x1000000>;
+ interrupts = <0 181 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/coherency-fabric.txt b/Documentation/devicetree/bindings/arm/coherency-fabric.txt
new file mode 100644
index 000000000..8dd46617c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/coherency-fabric.txt
@@ -0,0 +1,43 @@
+Coherency fabric
+----------------
+Available on Marvell SOCs: Armada 370, Armada 375, Armada 38x and Armada XP
+
+Required properties:
+
+- compatible: the possible values are:
+
+ * "marvell,coherency-fabric", to be used for the coherency fabric of
+ the Armada 370 and Armada XP.
+
+ * "marvell,armada-375-coherency-fabric", for the Armada 375 coherency
+ fabric.
+
+ * "marvell,armada-380-coherency-fabric", for the Armada 38x coherency
+ fabric.
+
+- reg: Should contain coherency fabric registers location and
+ length.
+
+ * For "marvell,coherency-fabric", the first pair for the coherency
+ fabric registers, second pair for the per-CPU fabric registers.
+
+ * For "marvell,armada-375-coherency-fabric", only one pair is needed
+ for the per-CPU fabric registers.
+
+ * For "marvell,armada-380-coherency-fabric", only one pair is needed
+ for the per-CPU fabric registers.
+
+Examples:
+
+coherency-fabric@d0020200 {
+ compatible = "marvell,coherency-fabric";
+ reg = <0xd0020200 0xb0>,
+ <0xd0021810 0x1c>;
+
+};
+
+coherency-fabric@21810 {
+ compatible = "marvell,armada-375-coherency-fabric";
+ reg = <0x21810 0x1c>;
+};
+
diff --git a/Documentation/devicetree/bindings/arm/coresight.txt b/Documentation/devicetree/bindings/arm/coresight.txt
new file mode 100644
index 000000000..88602b754
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/coresight.txt
@@ -0,0 +1,199 @@
+* CoreSight Components:
+
+CoreSight components are compliant with the ARM CoreSight architecture
+specification and can be connected in various topologies to suit a particular
+SoCs tracing needs. These trace components can generally be classified as
+sinks, links and sources. Trace data produced by one or more sources flows
+through the intermediate links connecting the source to the currently selected
+sink. Each CoreSight component device should use these properties to describe
+its hardware characteristcs.
+
+* Required properties for all components *except* non-configurable replicators:
+
+ * compatible: These have to be supplemented with "arm,primecell" as
+ drivers are using the AMBA bus interface. Possible values include:
+ - "arm,coresight-etb10", "arm,primecell";
+ - "arm,coresight-tpiu", "arm,primecell";
+ - "arm,coresight-tmc", "arm,primecell";
+ - "arm,coresight-funnel", "arm,primecell";
+ - "arm,coresight-etm3x", "arm,primecell";
+
+ * reg: physical base address and length of the register
+ set(s) of the component.
+
+ * clocks: the clock associated to this component.
+
+ * clock-names: the name of the clock as referenced by the code.
+ Since we are using the AMBA framework, the name should be
+ "apb_pclk".
+
+ * port or ports: The representation of the component's port
+ layout using the generic DT graph presentation found in
+ "bindings/graph.txt".
+
+* Required properties for devices that don't show up on the AMBA bus, such as
+ non-configurable replicators:
+
+ * compatible: Currently supported value is (note the absence of the
+ AMBA markee):
+ - "arm,coresight-replicator"
+
+ * port or ports: same as above.
+
+* Optional properties for ETM/PTMs:
+
+ * arm,cp14: must be present if the system accesses ETM/PTM management
+ registers via co-processor 14.
+
+ * cpu: the cpu phandle this ETM/PTM is affined to. When omitted the
+ source is considered to belong to CPU0.
+
+* Optional property for TMC:
+
+ * arm,buffer-size: size of contiguous buffer space for TMC ETR
+ (embedded trace router)
+
+
+Example:
+
+1. Sinks
+ etb@20010000 {
+ compatible = "arm,coresight-etb10", "arm,primecell";
+ reg = <0 0x20010000 0 0x1000>;
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ port {
+ etb_in_port: endpoint@0 {
+ slave-mode;
+ remote-endpoint = <&replicator_out_port0>;
+ };
+ };
+ };
+
+ tpiu@20030000 {
+ compatible = "arm,coresight-tpiu", "arm,primecell";
+ reg = <0 0x20030000 0 0x1000>;
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ port {
+ tpiu_in_port: endpoint@0 {
+ slave-mode;
+ remote-endpoint = <&replicator_out_port1>;
+ };
+ };
+ };
+
+2. Links
+ replicator {
+ /* non-configurable replicators don't show up on the
+ * AMBA bus. As such no need to add "arm,primecell".
+ */
+ compatible = "arm,coresight-replicator";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* replicator output ports */
+ port@0 {
+ reg = <0>;
+ replicator_out_port0: endpoint {
+ remote-endpoint = <&etb_in_port>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ replicator_out_port1: endpoint {
+ remote-endpoint = <&tpiu_in_port>;
+ };
+ };
+
+ /* replicator input port */
+ port@2 {
+ reg = <0>;
+ replicator_in_port0: endpoint {
+ slave-mode;
+ remote-endpoint = <&funnel_out_port0>;
+ };
+ };
+ };
+ };
+
+ funnel@20040000 {
+ compatible = "arm,coresight-funnel", "arm,primecell";
+ reg = <0 0x20040000 0 0x1000>;
+
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* funnel output port */
+ port@0 {
+ reg = <0>;
+ funnel_out_port0: endpoint {
+ remote-endpoint =
+ <&replicator_in_port0>;
+ };
+ };
+
+ /* funnel input ports */
+ port@1 {
+ reg = <0>;
+ funnel_in_port0: endpoint {
+ slave-mode;
+ remote-endpoint = <&ptm0_out_port>;
+ };
+ };
+
+ port@2 {
+ reg = <1>;
+ funnel_in_port1: endpoint {
+ slave-mode;
+ remote-endpoint = <&ptm1_out_port>;
+ };
+ };
+
+ port@3 {
+ reg = <2>;
+ funnel_in_port2: endpoint {
+ slave-mode;
+ remote-endpoint = <&etm0_out_port>;
+ };
+ };
+
+ };
+ };
+
+3. Sources
+ ptm@2201c000 {
+ compatible = "arm,coresight-etm3x", "arm,primecell";
+ reg = <0 0x2201c000 0 0x1000>;
+
+ cpu = <&cpu0>;
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ port {
+ ptm0_out_port: endpoint {
+ remote-endpoint = <&funnel_in_port0>;
+ };
+ };
+ };
+
+ ptm@2201d000 {
+ compatible = "arm,coresight-etm3x", "arm,primecell";
+ reg = <0 0x2201d000 0 0x1000>;
+
+ cpu = <&cpu1>;
+ clocks = <&oscclk6a>;
+ clock-names = "apb_pclk";
+ port {
+ ptm1_out_port: endpoint {
+ remote-endpoint = <&funnel_in_port1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
new file mode 100644
index 000000000..c2e0cc5e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cpu-enable-method/al,alpine-smp
@@ -0,0 +1,52 @@
+========================================================
+Secondary CPU enable-method "al,alpine-smp" binding
+========================================================
+
+This document describes the "al,alpine-smp" method for
+enabling secondary CPUs. To apply to all CPUs, a single
+"al,alpine-smp" enable method should be defined in the
+"cpus" node.
+
+Enable method name: "al,alpine-smp"
+Compatible machines: "al,alpine"
+Compatible CPUs: "arm,cortex-a15"
+Related properties: (none)
+
+Note:
+This enable method requires valid nodes compatible with
+"al,alpine-cpu-resume" and "al,alpine-nb-service"[1].
+
+Example:
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ enable-method = "al,alpine-smp";
+
+ cpu@0 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <0>;
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <1>;
+ };
+
+ cpu@2 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <2>;
+ };
+
+ cpu@3 {
+ compatible = "arm,cortex-a15";
+ device_type = "cpu";
+ reg = <3>;
+ };
+};
+
+--
+[1] arm/al,alpine.txt
diff --git a/Documentation/devicetree/bindings/arm/cpu-enable-method/marvell,berlin-smp b/Documentation/devicetree/bindings/arm/cpu-enable-method/marvell,berlin-smp
new file mode 100644
index 000000000..cd236b727
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cpu-enable-method/marvell,berlin-smp
@@ -0,0 +1,41 @@
+========================================================
+Secondary CPU enable-method "marvell,berlin-smp" binding
+========================================================
+
+This document describes the "marvell,berlin-smp" method for enabling secondary
+CPUs. To apply to all CPUs, a single "marvell,berlin-smp" enable method should
+be defined in the "cpus" node.
+
+Enable method name: "marvell,berlin-smp"
+Compatible machines: "marvell,berlin2" and "marvell,berlin2q"
+Compatible CPUs: "marvell,pj4b" and "arm,cortex-a9"
+Related properties: (none)
+
+Note:
+This enable method needs valid nodes compatible with "arm,cortex-a9-scu" and
+"marvell,berlin-cpu-ctrl"[1].
+
+Example:
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ enable-method = "marvell,berlin-smp";
+
+ cpu@0 {
+ compatible = "marvell,pj4b";
+ device_type = "cpu";
+ next-level-cache = <&l2>;
+ reg = <0>;
+ };
+
+ cpu@1 {
+ compatible = "marvell,pj4b";
+ device_type = "cpu";
+ next-level-cache = <&l2>;
+ reg = <1>;
+ };
+ };
+
+--
+[1] arm/marvell,berlin.txt
diff --git a/Documentation/devicetree/bindings/arm/cpus.txt b/Documentation/devicetree/bindings/arm/cpus.txt
new file mode 100644
index 000000000..6aa331d11
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cpus.txt
@@ -0,0 +1,437 @@
+=================
+ARM CPUs bindings
+=================
+
+The device tree allows to describe the layout of CPUs in a system through
+the "cpus" node, which in turn contains a number of subnodes (ie "cpu")
+defining properties for every cpu.
+
+Bindings for CPU nodes follow the ePAPR v1.1 standard, available from:
+
+https://www.power.org/documentation/epapr-version-1-1/
+
+with updates for 32-bit and 64-bit ARM systems provided in this document.
+
+================================
+Convention used in this document
+================================
+
+This document follows the conventions described in the ePAPR v1.1, with
+the addition:
+
+- square brackets define bitfields, eg reg[7:0] value of the bitfield in
+ the reg property contained in bits 7 down to 0
+
+=====================================
+cpus and cpu node bindings definition
+=====================================
+
+The ARM architecture, in accordance with the ePAPR, requires the cpus and cpu
+nodes to be present and contain the properties described below.
+
+- cpus node
+
+ Description: Container of cpu nodes
+
+ The node name must be "cpus".
+
+ A cpus node must define the following properties:
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+
+ Definition depends on ARM architecture version and
+ configuration:
+
+ # On uniprocessor ARM architectures previous to v7
+ value must be 1, to enable a simple enumeration
+ scheme for processors that do not have a HW CPU
+ identification register.
+ # On 32-bit ARM 11 MPcore, ARM v7 or later systems
+ value must be 1, that corresponds to CPUID/MPIDR
+ registers sizes.
+ # On ARM v8 64-bit systems value should be set to 2,
+ that corresponds to the MPIDR_EL1 register size.
+ If MPIDR_EL1[63:32] value is equal to 0 on all CPUs
+ in the system, #address-cells can be set to 1, since
+ MPIDR_EL1[63:32] bits are not used for CPUs
+ identification.
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: must be set to 0
+
+- cpu node
+
+ Description: Describes a CPU in an ARM based system
+
+ PROPERTIES
+
+ - device_type
+ Usage: required
+ Value type: <string>
+ Definition: must be "cpu"
+ - reg
+ Usage and definition depend on ARM architecture version and
+ configuration:
+
+ # On uniprocessor ARM architectures previous to v7
+ this property is required and must be set to 0.
+
+ # On ARM 11 MPcore based systems this property is
+ required and matches the CPUID[11:0] register bits.
+
+ Bits [11:0] in the reg cell must be set to
+ bits [11:0] in CPU ID register.
+
+ All other bits in the reg cell must be set to 0.
+
+ # On 32-bit ARM v7 or later systems this property is
+ required and matches the CPU MPIDR[23:0] register
+ bits.
+
+ Bits [23:0] in the reg cell must be set to
+ bits [23:0] in MPIDR.
+
+ All other bits in the reg cell must be set to 0.
+
+ # On ARM v8 64-bit systems this property is required
+ and matches the MPIDR_EL1 register affinity bits.
+
+ * If cpus node's #address-cells property is set to 2
+
+ The first reg cell bits [7:0] must be set to
+ bits [39:32] of MPIDR_EL1.
+
+ The second reg cell bits [23:0] must be set to
+ bits [23:0] of MPIDR_EL1.
+
+ * If cpus node's #address-cells property is set to 1
+
+ The reg cell bits [23:0] must be set to bits [23:0]
+ of MPIDR_EL1.
+
+ All other bits in the reg cells must be set to 0.
+
+ - compatible:
+ Usage: required
+ Value type: <string>
+ Definition: should be one of:
+ "arm,arm710t"
+ "arm,arm720t"
+ "arm,arm740t"
+ "arm,arm7ej-s"
+ "arm,arm7tdmi"
+ "arm,arm7tdmi-s"
+ "arm,arm9es"
+ "arm,arm9ej-s"
+ "arm,arm920t"
+ "arm,arm922t"
+ "arm,arm925"
+ "arm,arm926e-s"
+ "arm,arm926ej-s"
+ "arm,arm940t"
+ "arm,arm946e-s"
+ "arm,arm966e-s"
+ "arm,arm968e-s"
+ "arm,arm9tdmi"
+ "arm,arm1020e"
+ "arm,arm1020t"
+ "arm,arm1022e"
+ "arm,arm1026ej-s"
+ "arm,arm1136j-s"
+ "arm,arm1136jf-s"
+ "arm,arm1156t2-s"
+ "arm,arm1156t2f-s"
+ "arm,arm1176jzf"
+ "arm,arm1176jz-s"
+ "arm,arm1176jzf-s"
+ "arm,arm11mpcore"
+ "arm,cortex-a5"
+ "arm,cortex-a7"
+ "arm,cortex-a8"
+ "arm,cortex-a9"
+ "arm,cortex-a12"
+ "arm,cortex-a15"
+ "arm,cortex-a17"
+ "arm,cortex-a53"
+ "arm,cortex-a57"
+ "arm,cortex-m0"
+ "arm,cortex-m0+"
+ "arm,cortex-m1"
+ "arm,cortex-m3"
+ "arm,cortex-m4"
+ "arm,cortex-r4"
+ "arm,cortex-r5"
+ "arm,cortex-r7"
+ "brcm,brahma-b15"
+ "cavium,thunder"
+ "faraday,fa526"
+ "intel,sa110"
+ "intel,sa1100"
+ "marvell,feroceon"
+ "marvell,mohawk"
+ "marvell,pj4a"
+ "marvell,pj4b"
+ "marvell,sheeva-v5"
+ "nvidia,tegra132-denver"
+ "qcom,krait"
+ "qcom,scorpion"
+ - enable-method
+ Value type: <stringlist>
+ Usage and definition depend on ARM architecture version.
+ # On ARM v8 64-bit this property is required and must
+ be one of:
+ "psci"
+ "spin-table"
+ # On ARM 32-bit systems this property is optional and
+ can be one of:
+ "allwinner,sun6i-a31"
+ "arm,psci"
+ "brcm,brahma-b15"
+ "marvell,armada-375-smp"
+ "marvell,armada-380-smp"
+ "marvell,armada-390-smp"
+ "marvell,armada-xp-smp"
+ "qcom,gcc-msm8660"
+ "qcom,kpss-acc-v1"
+ "qcom,kpss-acc-v2"
+ "rockchip,rk3066-smp"
+
+ - cpu-release-addr
+ Usage: required for systems that have an "enable-method"
+ property value of "spin-table".
+ Value type: <prop-encoded-array>
+ Definition:
+ # On ARM v8 64-bit systems must be a two cell
+ property identifying a 64-bit zero-initialised
+ memory location.
+
+ - qcom,saw
+ Usage: required for systems that have an "enable-method"
+ property value of "qcom,kpss-acc-v1" or
+ "qcom,kpss-acc-v2"
+ Value type: <phandle>
+ Definition: Specifies the SAW[1] node associated with this CPU.
+
+ - qcom,acc
+ Usage: required for systems that have an "enable-method"
+ property value of "qcom,kpss-acc-v1" or
+ "qcom,kpss-acc-v2"
+ Value type: <phandle>
+ Definition: Specifies the ACC[2] node associated with this CPU.
+
+ - cpu-idle-states
+ Usage: Optional
+ Value type: <prop-encoded-array>
+ Definition:
+ # List of phandles to idle state nodes supported
+ by this cpu [3].
+
+ - rockchip,pmu
+ Usage: optional for systems that have an "enable-method"
+ property value of "rockchip,rk3066-smp"
+ While optional, it is the preferred way to get access to
+ the cpu-core power-domains.
+ Value type: <phandle>
+ Definition: Specifies the syscon node controlling the cpu core
+ power domains.
+
+Example 1 (dual-cluster big.LITTLE system 32-bit):
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x0>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x1>;
+ };
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ };
+ };
+
+Example 2 (Cortex-A8 uniprocessor 32-bit system):
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a8";
+ reg = <0x0>;
+ };
+ };
+
+Example 3 (ARM 926EJ-S uniprocessor 32-bit system):
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,arm926ej-s";
+ reg = <0x0>;
+ };
+ };
+
+Example 4 (ARM Cortex-A57 64-bit system):
+
+cpus {
+ #size-cells = <0>;
+ #address-cells = <2>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10000>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10001>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@10101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100000101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10000>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10001>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ cpu@100010101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+};
+
+--
+[1] arm/msm/qcom,saw2.txt
+[2] arm/msm/qcom,kpss-acc.txt
+[3] ARM Linux kernel documentation - idle states bindings
+ Documentation/devicetree/bindings/arm/idle-states.txt
diff --git a/Documentation/devicetree/bindings/arm/davinci.txt b/Documentation/devicetree/bindings/arm/davinci.txt
new file mode 100644
index 000000000..cfaeda427
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/davinci.txt
@@ -0,0 +1,17 @@
+Texas Instruments DaVinci Platforms Device Tree Bindings
+--------------------------------------------------------
+
+DA850/OMAP-L138/AM18x Evaluation Module (EVM) board
+Required root node properties:
+ - compatible = "ti,da850-evm", "ti,da850";
+
+EnBW AM1808 based CMC board
+Required root node properties:
+ - compatible = "enbw,cmc", "ti,da850;
+
+Generic DaVinci Boards
+----------------------
+
+DA850/OMAP-L138/AM18x generic board
+Required root node properties:
+ - compatible = "ti,da850";
diff --git a/Documentation/devicetree/bindings/arm/davinci/cp-intc.txt b/Documentation/devicetree/bindings/arm/davinci/cp-intc.txt
new file mode 100644
index 000000000..597e8a089
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/davinci/cp-intc.txt
@@ -0,0 +1,27 @@
+* TI Common Platform Interrupt Controller
+
+Common Platform Interrupt Controller (cp_intc) is used on
+OMAP-L1x SoCs and can support several configurable number
+of interrupts.
+
+Main node required properties:
+
+- compatible : should be:
+ "ti,cp-intc"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 1.
+
+ The cell contains the interrupt number in the range [0-128].
+- ti,intc-size: Number of interrupts handled by the interrupt controller.
+- reg: physical base address and size of the intc registers map.
+
+Example:
+
+ intc: interrupt-controller@1 {
+ compatible = "ti,cp-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ ti,intc-size = <101>;
+ reg = <0xfffee000 0x2000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/digicolor.txt b/Documentation/devicetree/bindings/arm/digicolor.txt
new file mode 100644
index 000000000..658553f40
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/digicolor.txt
@@ -0,0 +1,6 @@
+Conexant Digicolor Platforms Device Tree Bindings
+
+Each device tree must specify which Conexant Digicolor SoC it uses.
+Must be the following compatible string:
+
+ cnxt,cx92755
diff --git a/Documentation/devicetree/bindings/arm/exynos/power_domain.txt b/Documentation/devicetree/bindings/arm/exynos/power_domain.txt
new file mode 100644
index 000000000..5da38c5ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/exynos/power_domain.txt
@@ -0,0 +1,52 @@
+* Samsung Exynos Power Domains
+
+Exynos processors include support for multiple power domains which are used
+to gate power to one or more peripherals on the processor.
+
+Required Properties:
+- compatible: should be one of the following.
+ * samsung,exynos4210-pd - for exynos4210 type power domain.
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- #power-domain-cells: number of cells in power domain specifier;
+ must be 0.
+
+Optional Properties:
+- clocks: List of clock handles. The parent clocks of the input clocks to the
+ devices in this power domain are set to oscclk before power gating
+ and restored back after powering on a domain. This is required for
+ all domains which are powered on and off and not required for unused
+ domains.
+- clock-names: The following clocks can be specified:
+ - oscclk: Oscillator clock.
+ - pclkN, clkN: Pairs of parent of input clock and input clock to the
+ devices in this power domain. Maximum of 4 pairs (N = 0 to 3)
+ are supported currently.
+ - asbN: Clocks required by asynchronous bridges (ASB) present in
+ the power domain. These clock should be enabled during power
+ domain on/off operations.
+- power-domains: phandle pointing to the parent power domain, for more details
+ see Documentation/devicetree/bindings/power/power_domain.txt
+
+Node of a device using power domains must have a power-domains property
+defined with a phandle to respective power domain.
+
+Example:
+
+ lcd0: power-domain-lcd0 {
+ compatible = "samsung,exynos4210-pd";
+ reg = <0x10023C00 0x10>;
+ #power-domain-cells = <0>;
+ };
+
+ mfc_pd: power-domain@10044060 {
+ compatible = "samsung,exynos4210-pd";
+ reg = <0x10044060 0x20>;
+ clocks = <&clock CLK_FIN_PLL>, <&clock CLK_MOUT_SW_ACLK333>,
+ <&clock CLK_MOUT_USER_ACLK333>;
+ clock-names = "oscclk", "pclk0", "clk0";
+ #power-domain-cells = <0>;
+ };
+
+See Documentation/devicetree/bindings/power/power_domain.txt for description
+of consumer-side bindings.
diff --git a/Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt b/Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt
new file mode 100644
index 000000000..4a0a4f70a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/exynos/smp-sysram.txt
@@ -0,0 +1,38 @@
+Samsung Exynos SYSRAM for SMP bringup:
+------------------------------------
+
+Samsung SMP-capable Exynos SoCs use part of the SYSRAM for the bringup
+of the secondary cores. Once the core gets powered up it executes the
+code that is residing at some specific location of the SYSRAM.
+
+Therefore reserved section sub-nodes have to be added to the mmio-sram
+declaration. These nodes are of two types depending upon secure or
+non-secure execution environment.
+
+Required sub-node properties:
+- compatible : depending upon boot mode, should be
+ "samsung,exynos4210-sysram" : for Secure SYSRAM
+ "samsung,exynos4210-sysram-ns" : for Non-secure SYSRAM
+
+The rest of the properties should follow the generic mmio-sram discription
+found in ../../misc/sysram.txt
+
+Example:
+
+ sysram@02020000 {
+ compatible = "mmio-sram";
+ reg = <0x02020000 0x54000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x02020000 0x54000>;
+
+ smp-sysram@0 {
+ compatible = "samsung,exynos4210-sysram";
+ reg = <0x0 0x1000>;
+ };
+
+ smp-sysram@53000 {
+ compatible = "samsung,exynos4210-sysram-ns";
+ reg = <0x53000 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt b/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt
new file mode 100644
index 000000000..780d0392a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/firmware/tlm,trusted-foundations.txt
@@ -0,0 +1,20 @@
+Trusted Foundations
+-------------------
+
+Boards that use the Trusted Foundations secure monitor can signal its
+presence by declaring a node compatible with "tlm,trusted-foundations"
+under the /firmware/ node
+
+Required properties:
+- compatible: "tlm,trusted-foundations"
+- tlm,version-major: major version number of Trusted Foundations firmware
+- tlm,version-minor: minor version number of Trusted Foundations firmware
+
+Example:
+ firmware {
+ trusted-foundations {
+ compatible = "tlm,trusted-foundations";
+ tlm,version-major = <2>;
+ tlm,version-minor = <8>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt
new file mode 100644
index 000000000..44aa3c451
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-cpucfg.txt
@@ -0,0 +1,14 @@
+Freescale Vybrid Miscellaneous System Control - CPU Configuration
+
+The MSCM IP contains multiple sub modules, this binding describes the first
+block of registers which contains CPU configuration information.
+
+Required properties:
+- compatible: "fsl,vf610-mscm-cpucfg", "syscon"
+- reg: the register range of the MSCM CPU configuration registers
+
+Example:
+ mscm_cpucfg: cpucfg@40001000 {
+ compatible = "fsl,vf610-mscm-cpucfg", "syscon";
+ reg = <0x40001000 0x800>;
+ }
diff --git a/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt
new file mode 100644
index 000000000..669808b2a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/freescale/fsl,vf610-mscm-ir.txt
@@ -0,0 +1,33 @@
+Freescale Vybrid Miscellaneous System Control - Interrupt Router
+
+The MSCM IP contains multiple sub modules, this binding describes the second
+block of registers which control the interrupt router. The interrupt router
+allows to configure the recipient of each peripheral interrupt. Furthermore
+it controls the directed processor interrupts. The module is available in all
+Vybrid SoC's but is only really useful in dual core configurations (VF6xx
+which comes with a Cortex-A5/Cortex-M4 combination).
+
+Required properties:
+- compatible: "fsl,vf610-mscm-ir"
+- reg: the register range of the MSCM Interrupt Router
+- fsl,cpucfg: The handle to the MSCM CPU configuration node, required
+ to get the current CPU ID
+- interrupt-controller: Identifies the node as an interrupt controller
+- #interrupt-cells: Two cells, interrupt number and cells.
+ The hardware interrupt number according to interrupt
+ assignment of the interrupt router is required.
+ Flags get passed only when using GIC as parent. Flags
+ encoding as documented by the GIC bindings.
+- interrupt-parent: Should be the phandle for the interrupt controller of
+ the CPU the device tree is intended to be used on. This
+ is either the node of the GIC or NVIC controller.
+
+Example:
+ mscm_ir: interrupt-controller@40001800 {
+ compatible = "fsl,vf610-mscm-ir";
+ reg = <0x40001800 0x400>;
+ fsl,cpucfg = <&mscm_cpucfg>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&intc>;
+ }
diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt
new file mode 100644
index 000000000..a5462b6b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/fsl.txt
@@ -0,0 +1,134 @@
+Freescale i.MX Platforms Device Tree Bindings
+-----------------------------------------------
+
+i.MX23 Evaluation Kit
+Required root node properties:
+ - compatible = "fsl,imx23-evk", "fsl,imx23";
+
+i.MX25 Product Development Kit
+Required root node properties:
+ - compatible = "fsl,imx25-pdk", "fsl,imx25";
+
+i.MX27 Product Development Kit
+Required root node properties:
+ - compatible = "fsl,imx27-pdk", "fsl,imx27";
+
+i.MX28 Evaluation Kit
+Required root node properties:
+ - compatible = "fsl,imx28-evk", "fsl,imx28";
+
+i.MX51 Babbage Board
+Required root node properties:
+ - compatible = "fsl,imx51-babbage", "fsl,imx51";
+
+i.MX53 Automotive Reference Design Board
+Required root node properties:
+ - compatible = "fsl,imx53-ard", "fsl,imx53";
+
+i.MX53 Evaluation Kit
+Required root node properties:
+ - compatible = "fsl,imx53-evk", "fsl,imx53";
+
+i.MX53 Quick Start Board
+Required root node properties:
+ - compatible = "fsl,imx53-qsb", "fsl,imx53";
+
+i.MX53 Smart Mobile Reference Design Board
+Required root node properties:
+ - compatible = "fsl,imx53-smd", "fsl,imx53";
+
+i.MX6 Quad Armadillo2 Board
+Required root node properties:
+ - compatible = "fsl,imx6q-arm2", "fsl,imx6q";
+
+i.MX6 Quad SABRE Lite Board
+Required root node properties:
+ - compatible = "fsl,imx6q-sabrelite", "fsl,imx6q";
+
+i.MX6 Quad SABRE Smart Device Board
+Required root node properties:
+ - compatible = "fsl,imx6q-sabresd", "fsl,imx6q";
+
+i.MX6 Quad SABRE Automotive Board
+Required root node properties:
+ - compatible = "fsl,imx6q-sabreauto", "fsl,imx6q";
+
+Generic i.MX boards
+-------------------
+
+No iomux setup is done for these boards, so this must have been configured
+by the bootloader for boards to work with the generic bindings.
+
+i.MX27 generic board
+Required root node properties:
+ - compatible = "fsl,imx27";
+
+i.MX51 generic board
+Required root node properties:
+ - compatible = "fsl,imx51";
+
+i.MX53 generic board
+Required root node properties:
+ - compatible = "fsl,imx53";
+
+i.MX6q generic board
+Required root node properties:
+ - compatible = "fsl,imx6q";
+
+Freescale Vybrid Platform Device Tree Bindings
+----------------------------------------------
+
+For the Vybrid SoC familiy all variants with DDR controller are supported,
+which is the VF5xx and VF6xx series. Out of historical reasons, in most
+places the kernel uses vf610 to refer to the whole familiy.
+
+Required root node compatible property (one of them):
+ - compatible = "fsl,vf500";
+ - compatible = "fsl,vf510";
+ - compatible = "fsl,vf600";
+ - compatible = "fsl,vf610";
+
+Freescale LS1021A Platform Device Tree Bindings
+------------------------------------------------
+
+Required root node compatible properties:
+ - compatible = "fsl,ls1021a";
+
+Freescale LS1021A SoC-specific Device Tree Bindings
+-------------------------------------------
+
+Freescale SCFG
+ SCFG is the supplemental configuration unit, that provides SoC specific
+configuration and status registers for the chip. Such as getting PEX port
+status.
+ Required properties:
+ - compatible: should be "fsl,ls1021a-scfg"
+ - reg: should contain base address and length of SCFG memory-mapped registers
+
+Example:
+ scfg: scfg@1570000 {
+ compatible = "fsl,ls1021a-scfg";
+ reg = <0x0 0x1570000 0x0 0x10000>;
+ };
+
+Freescale DCFG
+ DCFG is the device configuration unit, that provides general purpose
+configuration and status for the device. Such as setting the secondary
+core start address and release the secondary core from holdoff and startup.
+ Required properties:
+ - compatible: should be "fsl,ls1021a-dcfg"
+ - reg : should contain base address and length of DCFG memory-mapped registers
+
+Example:
+ dcfg: dcfg@1ee0000 {
+ compatible = "fsl,ls1021a-dcfg";
+ reg = <0x0 0x1ee0000 0x0 0x10000>;
+ };
+
+Freescale LS2085A SoC Device Tree Bindings
+------------------------------------------
+
+LS2085A ARMv8 based Simulator model
+Required root node properties:
+ - compatible = "fsl,ls2085a-simu", "fsl,ls2085a";
+
diff --git a/Documentation/devicetree/bindings/arm/fw-cfg.txt b/Documentation/devicetree/bindings/arm/fw-cfg.txt
new file mode 100644
index 000000000..953fb640d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/fw-cfg.txt
@@ -0,0 +1,72 @@
+* QEMU Firmware Configuration bindings for ARM
+
+QEMU's arm-softmmu and aarch64-softmmu emulation / virtualization targets
+provide the following Firmware Configuration interface on the "virt" machine
+type:
+
+- A write-only, 16-bit wide selector (or control) register,
+- a read-write, 64-bit wide data register.
+
+QEMU exposes the control and data register to ARM guests as memory mapped
+registers; their location is communicated to the guest's UEFI firmware in the
+DTB that QEMU places at the bottom of the guest's DRAM.
+
+The guest writes a selector value (a key) to the selector register, and then
+can read the corresponding data (produced by QEMU) via the data register. If
+the selected entry is writable, the guest can rewrite it through the data
+register.
+
+The selector register takes keys in big endian byte order.
+
+The data register allows accesses with 8, 16, 32 and 64-bit width (only at
+offset 0 of the register). Accesses larger than a byte are interpreted as
+arrays, bundled together only for better performance. The bytes constituting
+such a word, in increasing address order, correspond to the bytes that would
+have been transferred by byte-wide accesses in chronological order.
+
+The interface allows guest firmware to download various parameters and blobs
+that affect how the firmware works and what tables it installs for the guest
+OS. For example, boot order of devices, ACPI tables, SMBIOS tables, kernel and
+initrd images for direct kernel booting, virtual machine UUID, SMP information,
+virtual NUMA topology, and so on.
+
+The authoritative registry of the valid selector values and their meanings is
+the QEMU source code; the structure of the data blobs corresponding to the
+individual key values is also defined in the QEMU source code.
+
+The presence of the registers can be verified by selecting the "signature" blob
+with key 0x0000, and reading four bytes from the data register. The returned
+signature is "QEMU".
+
+The outermost protocol (involving the write / read sequences of the control and
+data registers) is expected to be versioned, and/or described by feature bits.
+The interface revision / feature bitmap can be retrieved with key 0x0001. The
+blob to be read from the data register has size 4, and it is to be interpreted
+as a uint32_t value in little endian byte order. The current value
+(corresponding to the above outer protocol) is zero.
+
+The guest kernel is not expected to use these registers (although it is
+certainly allowed to); the device tree bindings are documented here because
+this is where device tree bindings reside in general.
+
+Required properties:
+
+- compatible: "qemu,fw-cfg-mmio".
+
+- reg: the MMIO region used by the device.
+ * Bytes 0x0 to 0x7 cover the data register.
+ * Bytes 0x8 to 0x9 cover the selector register.
+ * Further registers may be appended to the region in case of future interface
+ revisions / feature bits.
+
+Example:
+
+/ {
+ #size-cells = <0x2>;
+ #address-cells = <0x2>;
+
+ fw-cfg@9020000 {
+ compatible = "qemu,fw-cfg-mmio";
+ reg = <0x0 0x9020000 0x0 0xa>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/gic-v3.txt b/Documentation/devicetree/bindings/arm/gic-v3.txt
new file mode 100644
index 000000000..ddfade40a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/gic-v3.txt
@@ -0,0 +1,118 @@
+* ARM Generic Interrupt Controller, version 3
+
+AArch64 SMP cores are often associated with a GICv3, providing Private
+Peripheral Interrupts (PPI), Shared Peripheral Interrupts (SPI),
+Software Generated Interrupts (SGI), and Locality-specific Peripheral
+Interrupts (LPI).
+
+Main node required properties:
+
+- compatible : should at least contain "arm,gic-v3".
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. Must be a single cell with a value of at least 3.
+
+ The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
+ interrupts. Other values are reserved for future use.
+
+ The 2nd cell contains the interrupt number for the interrupt type.
+ SPI interrupts are in the range [0-987]. PPI interrupts are in the
+ range [0-15].
+
+ The 3rd cell is the flags, encoded as follows:
+ bits[3:0] trigger type and level flags.
+ 1 = edge triggered
+ 4 = level triggered
+
+ Cells 4 and beyond are reserved for future use. When the 1st cell
+ has a value of 0 or 1, cells 4 and beyond act as padding, and may be
+ ignored. It is recommended that padding cells have a value of 0.
+
+- reg : Specifies base physical address(s) and size of the GIC
+ registers, in the following order:
+ - GIC Distributor interface (GICD)
+ - GIC Redistributors (GICR), one range per redistributor region
+ - GIC CPU interface (GICC)
+ - GIC Hypervisor interface (GICH)
+ - GIC Virtual CPU interface (GICV)
+
+ GICC, GICH and GICV are optional.
+
+- interrupts : Interrupt source of the VGIC maintenance interrupt.
+
+Optional
+
+- redistributor-stride : If using padding pages, specifies the stride
+ of consecutive redistributors. Must be a multiple of 64kB.
+
+- #redistributor-regions: The number of independent contiguous regions
+ occupied by the redistributors. Required if more than one such
+ region is present.
+
+Sub-nodes:
+
+GICv3 has one or more Interrupt Translation Services (ITS) that are
+used to route Message Signalled Interrupts (MSI) to the CPUs.
+
+These nodes must have the following properties:
+- compatible : Should at least contain "arm,gic-v3-its".
+- msi-controller : Boolean property. Identifies the node as an MSI controller
+- reg: Specifies the base physical address and size of the ITS
+ registers.
+
+The main GIC node must contain the appropriate #address-cells,
+#size-cells and ranges properties for the reg property of all ITS
+nodes.
+
+Examples:
+
+ gic: interrupt-controller@2cf00000 {
+ compatible = "arm,gic-v3";
+ #interrupt-cells = <3>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+ interrupt-controller;
+ reg = <0x0 0x2f000000 0 0x10000>, // GICD
+ <0x0 0x2f100000 0 0x200000>, // GICR
+ <0x0 0x2c000000 0 0x2000>, // GICC
+ <0x0 0x2c010000 0 0x2000>, // GICH
+ <0x0 0x2c020000 0 0x2000>; // GICV
+ interrupts = <1 9 4>;
+
+ gic-its@2c200000 {
+ compatible = "arm,gic-v3-its";
+ msi-controller;
+ reg = <0x0 0x2c200000 0 0x200000>;
+ };
+ };
+
+ gic: interrupt-controller@2c010000 {
+ compatible = "arm,gic-v3";
+ #interrupt-cells = <3>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+ interrupt-controller;
+ redistributor-stride = <0x0 0x40000>; // 256kB stride
+ #redistributor-regions = <2>;
+ reg = <0x0 0x2c010000 0 0x10000>, // GICD
+ <0x0 0x2d000000 0 0x800000>, // GICR 1: CPUs 0-31
+ <0x0 0x2e000000 0 0x800000>; // GICR 2: CPUs 32-63
+ <0x0 0x2c040000 0 0x2000>, // GICC
+ <0x0 0x2c060000 0 0x2000>, // GICH
+ <0x0 0x2c080000 0 0x2000>; // GICV
+ interrupts = <1 9 4>;
+
+ gic-its@2c200000 {
+ compatible = "arm,gic-v3-its";
+ msi-controller;
+ reg = <0x0 0x2c200000 0 0x200000>;
+ };
+
+ gic-its@2c400000 {
+ compatible = "arm,gic-v3-its";
+ msi-controller;
+ reg = <0x0 0x2c400000 0 0x200000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt
new file mode 100644
index 000000000..2da059a47
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/gic.txt
@@ -0,0 +1,152 @@
+* ARM Generic Interrupt Controller
+
+ARM SMP cores are often associated with a GIC, providing per processor
+interrupts (PPI), shared processor interrupts (SPI) and software
+generated interrupts (SGI).
+
+Primary GIC is attached directly to the CPU and typically has PPIs and SGIs.
+Secondary GICs are cascaded into the upward interrupt controller and do not
+have PPIs or SGIs.
+
+Main node required properties:
+
+- compatible : should be one of:
+ "arm,gic-400"
+ "arm,cortex-a15-gic"
+ "arm,cortex-a9-gic"
+ "arm,cortex-a7-gic"
+ "arm,arm11mp-gic"
+ "brcm,brahma-b15-gic"
+ "arm,arm1176jzf-devchip-gic"
+ "qcom,msm-8660-qgic"
+ "qcom,msm-qgic2"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 3.
+
+ The 1st cell is the interrupt type; 0 for SPI interrupts, 1 for PPI
+ interrupts.
+
+ The 2nd cell contains the interrupt number for the interrupt type.
+ SPI interrupts are in the range [0-987]. PPI interrupts are in the
+ range [0-15].
+
+ The 3rd cell is the flags, encoded as follows:
+ bits[3:0] trigger type and level flags.
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered (invalid for SPIs)
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive (invalid for SPIs).
+ bits[15:8] PPI interrupt cpu mask. Each bit corresponds to each of
+ the 8 possible cpus attached to the GIC. A bit set to '1' indicated
+ the interrupt is wired to that CPU. Only valid for PPI interrupts.
+ Also note that the configurability of PPI interrupts is IMPLEMENTATION
+ DEFINED and as such not guaranteed to be present (most SoC available
+ in 2014 seem to ignore the setting of this flag and use the hardware
+ default value).
+
+- reg : Specifies base physical address(s) and size of the GIC registers. The
+ first region is the GIC distributor register base and size. The 2nd region is
+ the GIC cpu interface register base and size.
+
+Optional
+- interrupts : Interrupt source of the parent interrupt controller on
+ secondary GICs, or VGIC maintenance interrupt on primary GIC (see
+ below).
+
+- cpu-offset : per-cpu offset within the distributor and cpu interface
+ regions, used when the GIC doesn't have banked registers. The offset is
+ cpu-offset * cpu-nr.
+
+Example:
+
+ intc: interrupt-controller@fff11000 {
+ compatible = "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <1>;
+ interrupt-controller;
+ reg = <0xfff11000 0x1000>,
+ <0xfff10100 0x100>;
+ };
+
+
+* GIC virtualization extensions (VGIC)
+
+For ARM cores that support the virtualization extensions, additional
+properties must be described (they only exist if the GIC is the
+primary interrupt controller).
+
+Required properties:
+
+- reg : Additional regions specifying the base physical address and
+ size of the VGIC registers. The first additional region is the GIC
+ virtual interface control register base and size. The 2nd additional
+ region is the GIC virtual cpu interface register base and size.
+
+- interrupts : VGIC maintenance interrupt.
+
+Example:
+
+ interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a15-gic";
+ #interrupt-cells = <3>;
+ interrupt-controller;
+ reg = <0x2c001000 0x1000>,
+ <0x2c002000 0x1000>,
+ <0x2c004000 0x2000>,
+ <0x2c006000 0x2000>;
+ interrupts = <1 9 0xf04>;
+ };
+
+
+* GICv2m extension for MSI/MSI-x support (Optional)
+
+Certain revisions of GIC-400 supports MSI/MSI-x via V2M register frame(s).
+This is enabled by specifying v2m sub-node(s).
+
+Required properties:
+
+- compatible : The value here should contain "arm,gic-v2m-frame".
+
+- msi-controller : Identifies the node as an MSI controller.
+
+- reg : GICv2m MSI interface register base and size
+
+Optional properties:
+
+- arm,msi-base-spi : When the MSI_TYPER register contains an incorrect
+ value, this property should contain the SPI base of
+ the MSI frame, overriding the HW value.
+
+- arm,msi-num-spis : When the MSI_TYPER register contains an incorrect
+ value, this property should contain the number of
+ SPIs assigned to the frame, overriding the HW value.
+
+Example:
+
+ interrupt-controller@e1101000 {
+ compatible = "arm,gic-400";
+ #interrupt-cells = <3>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ interrupt-controller;
+ interrupts = <1 8 0xf04>;
+ ranges = <0 0 0 0xe1100000 0 0x100000>;
+ reg = <0x0 0xe1110000 0 0x01000>,
+ <0x0 0xe112f000 0 0x02000>,
+ <0x0 0xe1140000 0 0x10000>,
+ <0x0 0xe1160000 0 0x10000>;
+ v2m0: v2m@0x8000 {
+ compatible = "arm,gic-v2m-frame";
+ msi-controller;
+ reg = <0x0 0x80000 0 0x1000>;
+ };
+
+ ....
+
+ v2mN: v2m@0x9000 {
+ compatible = "arm,gic-v2m-frame";
+ msi-controller;
+ reg = <0x0 0x90000 0 0x1000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/global_timer.txt b/Documentation/devicetree/bindings/arm/global_timer.txt
new file mode 100644
index 000000000..bdae3a818
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/global_timer.txt
@@ -0,0 +1,27 @@
+
+* ARM Global Timer
+ Cortex-A9 are often associated with a per-core Global timer.
+
+** Timer node required properties:
+
+- compatible : should contain
+ * "arm,cortex-a5-global-timer" for Cortex-A5 global timers.
+ * "arm,cortex-a9-global-timer" for Cortex-A9 global
+ timers or any compatible implementation. Note: driver
+ supports versions r2p0 and above.
+
+- interrupts : One interrupt to each core
+
+- reg : Specify the base address and the size of the GT timer
+ register window.
+
+- clocks : Should be phandle to a clock.
+
+Example:
+
+ timer@2c000600 {
+ compatible = "arm,cortex-a9-global-timer";
+ reg = <0x2c000600 0x20>;
+ interrupts = <1 13 0xf01>;
+ clocks = <&arm_periph_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
new file mode 100644
index 000000000..35b1bd49c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.txt
@@ -0,0 +1,105 @@
+Hisilicon Platforms Device Tree Bindings
+----------------------------------------------------
+
+Hi4511 Board
+Required root node properties:
+ - compatible = "hisilicon,hi3620-hi4511";
+
+HiP04 D01 Board
+Required root node properties:
+ - compatible = "hisilicon,hip04-d01";
+
+HiP01 ca9x2 Board
+Required root node properties:
+ - compatible = "hisilicon,hip01-ca9x2";
+
+
+Hisilicon system controller
+
+Required properties:
+- compatible : "hisilicon,sysctrl"
+- reg : Register address and size
+
+Optional properties:
+- smp-offset : offset in sysctrl for notifying slave cpu booting
+ cpu 1, reg;
+ cpu 2, reg + 0x4;
+ cpu 3, reg + 0x8;
+ If reg value is not zero, cpun exit wfi and go
+- resume-offset : offset in sysctrl for notifying cpu0 when resume
+- reboot-offset : offset in sysctrl for system reboot
+
+Example:
+
+ /* for Hi3620 */
+ sysctrl: system-controller@fc802000 {
+ compatible = "hisilicon,sysctrl";
+ reg = <0xfc802000 0x1000>;
+ smp-offset = <0x31c>;
+ resume-offset = <0x308>;
+ reboot-offset = <0x4>;
+ };
+
+-----------------------------------------------------------------------
+Hisilicon HiP01 system controller
+
+Required properties:
+- compatible : "hisilicon,hip01-sysctrl"
+- reg : Register address and size
+
+The HiP01 system controller is mostly compatible with hisilicon
+system controller,but it has some specific control registers for
+HIP01 SoC family, such as slave core boot, and also some same
+registers located at different offset.
+
+Example:
+
+ /* for hip01-ca9x2 */
+ sysctrl: system-controller@10000000 {
+ compatible = "hisilicon,hip01-sysctrl", "hisilicon,sysctrl";
+ reg = <0x10000000 0x1000>;
+ reboot-offset = <0x4>;
+ };
+
+-----------------------------------------------------------------------
+Hisilicon CPU controller
+
+Required properties:
+- compatible : "hisilicon,cpuctrl"
+- reg : Register address and size
+
+The clock registers and power registers of secondary cores are defined
+in CPU controller, especially in HIX5HD2 SoC.
+
+-----------------------------------------------------------------------
+PCTRL: Peripheral misc control register
+
+Required Properties:
+- compatible: "hisilicon,pctrl"
+- reg: Address and size of pctrl.
+
+Example:
+
+ /* for Hi3620 */
+ pctrl: pctrl@fca09000 {
+ compatible = "hisilicon,pctrl";
+ reg = <0xfca09000 0x1000>;
+ };
+
+-----------------------------------------------------------------------
+Fabric:
+
+Required Properties:
+- compatible: "hisilicon,hip04-fabric";
+- reg: Address and size of Fabric
+
+-----------------------------------------------------------------------
+Bootwrapper boot method (software protocol on SMP):
+
+Required Properties:
+- compatible: "hisilicon,hip04-bootwrapper";
+- boot-method: Address and size of boot method.
+ [0]: bootwrapper physical address
+ [1]: bootwrapper size
+ [2]: relocation physical address
+ [3]: relocation size
diff --git a/Documentation/devicetree/bindings/arm/idle-states.txt b/Documentation/devicetree/bindings/arm/idle-states.txt
new file mode 100644
index 000000000..a8274eaba
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/idle-states.txt
@@ -0,0 +1,699 @@
+==========================================
+ARM idle states binding description
+==========================================
+
+==========================================
+1 - Introduction
+==========================================
+
+ARM systems contain HW capable of managing power consumption dynamically,
+where cores can be put in different low-power states (ranging from simple
+wfi to power gating) according to OS PM policies. The CPU states representing
+the range of dynamic idle states that a processor can enter at run-time, can be
+specified through device tree bindings representing the parameters required
+to enter/exit specific idle states on a given processor.
+
+According to the Server Base System Architecture document (SBSA, [3]), the
+power states an ARM CPU can be put into are identified by the following list:
+
+- Running
+- Idle_standby
+- Idle_retention
+- Sleep
+- Off
+
+The power states described in the SBSA document define the basic CPU states on
+top of which ARM platforms implement power management schemes that allow an OS
+PM implementation to put the processor in different idle states (which include
+states listed above; "off" state is not an idle state since it does not have
+wake-up capabilities, hence it is not considered in this document).
+
+Idle state parameters (eg entry latency) are platform specific and need to be
+characterized with bindings that provide the required information to OS PM
+code so that it can build the required tables and use them at runtime.
+
+The device tree binding definition for ARM idle states is the subject of this
+document.
+
+===========================================
+2 - idle-states definitions
+===========================================
+
+Idle states are characterized for a specific system through a set of
+timing and energy related properties, that underline the HW behaviour
+triggered upon idle states entry and exit.
+
+The following diagram depicts the CPU execution phases and related timing
+properties required to enter and exit an idle state:
+
+..__[EXEC]__|__[PREP]__|__[ENTRY]__|__[IDLE]__|__[EXIT]__|__[EXEC]__..
+ | | | | |
+
+ |<------ entry ------->|
+ | latency |
+ |<- exit ->|
+ | latency |
+ |<-------- min-residency -------->|
+ |<------- wakeup-latency ------->|
+
+ Diagram 1: CPU idle state execution phases
+
+EXEC: Normal CPU execution.
+
+PREP: Preparation phase before committing the hardware to idle mode
+ like cache flushing. This is abortable on pending wake-up
+ event conditions. The abort latency is assumed to be negligible
+ (i.e. less than the ENTRY + EXIT duration). If aborted, CPU
+ goes back to EXEC. This phase is optional. If not abortable,
+ this should be included in the ENTRY phase instead.
+
+ENTRY: The hardware is committed to idle mode. This period must run
+ to completion up to IDLE before anything else can happen.
+
+IDLE: This is the actual energy-saving idle period. This may last
+ between 0 and infinite time, until a wake-up event occurs.
+
+EXIT: Period during which the CPU is brought back to operational
+ mode (EXEC).
+
+entry-latency: Worst case latency required to enter the idle state. The
+exit-latency may be guaranteed only after entry-latency has passed.
+
+min-residency: Minimum period, including preparation and entry, for a given
+idle state to be worthwhile energywise.
+
+wakeup-latency: Maximum delay between the signaling of a wake-up event and the
+CPU being able to execute normal code again. If not specified, this is assumed
+to be entry-latency + exit-latency.
+
+These timing parameters can be used by an OS in different circumstances.
+
+An idle CPU requires the expected min-residency time to select the most
+appropriate idle state based on the expected expiry time of the next IRQ
+(ie wake-up) that causes the CPU to return to the EXEC phase.
+
+An operating system scheduler may need to compute the shortest wake-up delay
+for CPUs in the system by detecting how long will it take to get a CPU out
+of an idle state, eg:
+
+wakeup-delay = exit-latency + max(entry-latency - (now - entry-timestamp), 0)
+
+In other words, the scheduler can make its scheduling decision by selecting
+(eg waking-up) the CPU with the shortest wake-up latency.
+The wake-up latency must take into account the entry latency if that period
+has not expired. The abortable nature of the PREP period can be ignored
+if it cannot be relied upon (e.g. the PREP deadline may occur much sooner than
+the worst case since it depends on the CPU operating conditions, ie caches
+state).
+
+An OS has to reliably probe the wakeup-latency since some devices can enforce
+latency constraints guarantees to work properly, so the OS has to detect the
+worst case wake-up latency it can incur if a CPU is allowed to enter an
+idle state, and possibly to prevent that to guarantee reliable device
+functioning.
+
+The min-residency time parameter deserves further explanation since it is
+expressed in time units but must factor in energy consumption coefficients.
+
+The energy consumption of a cpu when it enters a power state can be roughly
+characterised by the following graph:
+
+ |
+ |
+ |
+ e |
+ n | /---
+ e | /------
+ r | /------
+ g | /-----
+ y | /------
+ | ----
+ | /|
+ | / |
+ | / |
+ | / |
+ | / |
+ | / |
+ |/ |
+ -----|-------+----------------------------------
+ 0| 1 time(ms)
+
+ Graph 1: Energy vs time example
+
+The graph is split in two parts delimited by time 1ms on the X-axis.
+The graph curve with X-axis values = { x | 0 < x < 1ms } has a steep slope
+and denotes the energy costs incurred whilst entering and leaving the idle
+state.
+The graph curve in the area delimited by X-axis values = {x | x > 1ms } has
+shallower slope and essentially represents the energy consumption of the idle
+state.
+
+min-residency is defined for a given idle state as the minimum expected
+residency time for a state (inclusive of preparation and entry) after
+which choosing that state become the most energy efficient option. A good
+way to visualise this, is by taking the same graph above and comparing some
+states energy consumptions plots.
+
+For sake of simplicity, let's consider a system with two idle states IDLE1,
+and IDLE2:
+
+ |
+ |
+ |
+ | /-- IDLE1
+ e | /---
+ n | /----
+ e | /---
+ r | /-----/--------- IDLE2
+ g | /-------/---------
+ y | ------------ /---|
+ | / /---- |
+ | / /--- |
+ | / /---- |
+ | / /--- |
+ | --- |
+ | / |
+ | / |
+ |/ | time
+ ---/----------------------------+------------------------
+ |IDLE1-energy < IDLE2-energy | IDLE2-energy < IDLE1-energy
+ |
+ IDLE2-min-residency
+
+ Graph 2: idle states min-residency example
+
+In graph 2 above, that takes into account idle states entry/exit energy
+costs, it is clear that if the idle state residency time (ie time till next
+wake-up IRQ) is less than IDLE2-min-residency, IDLE1 is the better idle state
+choice energywise.
+
+This is mainly down to the fact that IDLE1 entry/exit energy costs are lower
+than IDLE2.
+
+However, the lower power consumption (ie shallower energy curve slope) of idle
+state IDLE2 implies that after a suitable time, IDLE2 becomes more energy
+efficient.
+
+The time at which IDLE2 becomes more energy efficient than IDLE1 (and other
+shallower states in a system with multiple idle states) is defined
+IDLE2-min-residency and corresponds to the time when energy consumption of
+IDLE1 and IDLE2 states breaks even.
+
+The definitions provided in this section underpin the idle states
+properties specification that is the subject of the following sections.
+
+===========================================
+3 - idle-states node
+===========================================
+
+ARM processor idle states are defined within the idle-states node, which is
+a direct child of the cpus node [1] and provides a container where the
+processor idle states, defined as device tree nodes, are listed.
+
+- idle-states node
+
+ Usage: Optional - On ARM systems, it is a container of processor idle
+ states nodes. If the system does not provide CPU
+ power management capabilities or the processor just
+ supports idle_standby an idle-states node is not
+ required.
+
+ Description: idle-states node is a container node, where its
+ subnodes describe the CPU idle states.
+
+ Node name must be "idle-states".
+
+ The idle-states node's parent node must be the cpus node.
+
+ The idle-states node's child nodes can be:
+
+ - one or more state nodes
+
+ Any other configuration is considered invalid.
+
+ An idle-states node defines the following properties:
+
+ - entry-method
+ Value type: <stringlist>
+ Usage and definition depend on ARM architecture version.
+ # On ARM v8 64-bit this property is required and must
+ be one of:
+ - "psci" (see bindings in [2])
+ # On ARM 32-bit systems this property is optional
+
+The nodes describing the idle states (state) can only be defined within the
+idle-states node, any other configuration is considered invalid and therefore
+must be ignored.
+
+===========================================
+4 - state node
+===========================================
+
+A state node represents an idle state description and must be defined as
+follows:
+
+- state node
+
+ Description: must be child of the idle-states node
+
+ The state node name shall follow standard device tree naming
+ rules ([5], 2.2.1 "Node names"), in particular state nodes which
+ are siblings within a single common parent must be given a unique name.
+
+ The idle state entered by executing the wfi instruction (idle_standby
+ SBSA,[3][4]) is considered standard on all ARM platforms and therefore
+ must not be listed.
+
+ With the definitions provided above, the following list represents
+ the valid properties for a state node:
+
+ - compatible
+ Usage: Required
+ Value type: <stringlist>
+ Definition: Must be "arm,idle-state".
+
+ - local-timer-stop
+ Usage: See definition
+ Value type: <none>
+ Definition: if present the CPU local timer control logic is
+ lost on state entry, otherwise it is retained.
+
+ - entry-latency-us
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: u32 value representing worst case latency in
+ microseconds required to enter the idle state.
+ The exit-latency-us duration may be guaranteed
+ only after entry-latency-us has passed.
+
+ - exit-latency-us
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: u32 value representing worst case latency
+ in microseconds required to exit the idle state.
+
+ - min-residency-us
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: u32 value representing minimum residency duration
+ in microseconds, inclusive of preparation and
+ entry, for this idle state to be considered
+ worthwhile energy wise (refer to section 2 of
+ this document for a complete description).
+
+ - wakeup-latency-us:
+ Usage: Optional
+ Value type: <prop-encoded-array>
+ Definition: u32 value representing maximum delay between the
+ signaling of a wake-up event and the CPU being
+ able to execute normal code again. If omitted,
+ this is assumed to be equal to:
+
+ entry-latency-us + exit-latency-us
+
+ It is important to supply this value on systems
+ where the duration of PREP phase (see diagram 1,
+ section 2) is non-neglibigle.
+ In such systems entry-latency-us + exit-latency-us
+ will exceed wakeup-latency-us by this duration.
+
+ - status:
+ Usage: Optional
+ Value type: <string>
+ Definition: A standard device tree property [5] that indicates
+ the operational status of an idle-state.
+ If present, it shall be:
+ "okay": to indicate that the idle state is
+ operational.
+ "disabled": to indicate that the idle state has
+ been disabled in firmware so it is not
+ operational.
+ If the property is not present the idle-state must
+ be considered operational.
+
+ - idle-state-name:
+ Usage: Optional
+ Value type: <string>
+ Definition: A string used as a descriptive name for the idle
+ state.
+
+ In addition to the properties listed above, a state node may require
+ additional properties specifics to the entry-method defined in the
+ idle-states node, please refer to the entry-method bindings
+ documentation for properties definitions.
+
+===========================================
+4 - Examples
+===========================================
+
+Example 1 (ARM 64-bit, 16-cpu system, PSCI enable-method):
+
+cpus {
+ #size-cells = <0>;
+ #address-cells = <2>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x0>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x1>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU2: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU3: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU4: cpu@10000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10000>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU5: cpu@10001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10001>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU6: cpu@10100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU7: cpu@10101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_0_0 &CPU_SLEEP_0_0
+ &CLUSTER_RETENTION_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU8: cpu@100000000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x0>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU9: cpu@100000001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x1>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU10: cpu@100000100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU11: cpu@100000101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU12: cpu@100010000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10000>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU13: cpu@100010001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10001>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU14: cpu@100010100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10100>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU15: cpu@100010101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a53";
+ reg = <0x1 0x10101>;
+ enable-method = "psci";
+ cpu-idle-states = <&CPU_RETENTION_1_0 &CPU_SLEEP_1_0
+ &CLUSTER_RETENTION_1 &CLUSTER_SLEEP_1>;
+ };
+
+ idle-states {
+ entry-method = "arm,psci";
+
+ CPU_RETENTION_0_0: cpu-retention-0-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <80>;
+ };
+
+ CLUSTER_RETENTION_0: cluster-retention-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <50>;
+ exit-latency-us = <100>;
+ min-residency-us = <250>;
+ wakeup-latency-us = <130>;
+ };
+
+ CPU_SLEEP_0_0: cpu-sleep-0-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <250>;
+ exit-latency-us = <500>;
+ min-residency-us = <950>;
+ };
+
+ CLUSTER_SLEEP_0: cluster-sleep-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <600>;
+ exit-latency-us = <1100>;
+ min-residency-us = <2700>;
+ wakeup-latency-us = <1500>;
+ };
+
+ CPU_RETENTION_1_0: cpu-retention-1-0 {
+ compatible = "arm,idle-state";
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <20>;
+ exit-latency-us = <40>;
+ min-residency-us = <90>;
+ };
+
+ CLUSTER_RETENTION_1: cluster-retention-1 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <50>;
+ exit-latency-us = <100>;
+ min-residency-us = <270>;
+ wakeup-latency-us = <100>;
+ };
+
+ CPU_SLEEP_1_0: cpu-sleep-1-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x0010000>;
+ entry-latency-us = <70>;
+ exit-latency-us = <100>;
+ min-residency-us = <300>;
+ wakeup-latency-us = <150>;
+ };
+
+ CLUSTER_SLEEP_1: cluster-sleep-1 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ arm,psci-suspend-param = <0x1010000>;
+ entry-latency-us = <500>;
+ exit-latency-us = <1200>;
+ min-residency-us = <3500>;
+ wakeup-latency-us = <1300>;
+ };
+ };
+
+};
+
+Example 2 (ARM 32-bit, 8-cpu system, two clusters):
+
+cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x0>;
+ cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x1>;
+ cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x2>;
+ cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x3>;
+ cpu-idle-states = <&CPU_SLEEP_0_0 &CLUSTER_SLEEP_0>;
+ };
+
+ CPU4: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU5: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU6: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x102>;
+ cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
+ };
+
+ CPU7: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x103>;
+ cpu-idle-states = <&CPU_SLEEP_1_0 &CLUSTER_SLEEP_1>;
+ };
+
+ idle-states {
+ CPU_SLEEP_0_0: cpu-sleep-0-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <200>;
+ exit-latency-us = <100>;
+ min-residency-us = <400>;
+ wakeup-latency-us = <250>;
+ };
+
+ CLUSTER_SLEEP_0: cluster-sleep-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <500>;
+ exit-latency-us = <1500>;
+ min-residency-us = <2500>;
+ wakeup-latency-us = <1700>;
+ };
+
+ CPU_SLEEP_1_0: cpu-sleep-1-0 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <300>;
+ exit-latency-us = <500>;
+ min-residency-us = <900>;
+ wakeup-latency-us = <600>;
+ };
+
+ CLUSTER_SLEEP_1: cluster-sleep-1 {
+ compatible = "arm,idle-state";
+ local-timer-stop;
+ entry-latency-us = <800>;
+ exit-latency-us = <2000>;
+ min-residency-us = <6500>;
+ wakeup-latency-us = <2300>;
+ };
+ };
+
+};
+
+===========================================
+5 - References
+===========================================
+
+[1] ARM Linux Kernel documentation - CPUs bindings
+ Documentation/devicetree/bindings/arm/cpus.txt
+
+[2] ARM Linux Kernel documentation - PSCI bindings
+ Documentation/devicetree/bindings/arm/psci.txt
+
+[3] ARM Server Base System Architecture (SBSA)
+ http://infocenter.arm.com/help/index.jsp
+
+[4] ARM Architecture Reference Manuals
+ http://infocenter.arm.com/help/index.jsp
+
+[5] ePAPR standard
+ https://www.power.org/documentation/epapr-version-1-1/
diff --git a/Documentation/devicetree/bindings/arm/insignal-boards.txt b/Documentation/devicetree/bindings/arm/insignal-boards.txt
new file mode 100644
index 000000000..524c3dc5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/insignal-boards.txt
@@ -0,0 +1,8 @@
+* Insignal's Exynos4210 based Origen evaluation board
+
+Origen low-cost evaluation board is based on Samsung's Exynos4210 SoC.
+
+Required root node properties:
+ - compatible = should be one or more of the following.
+ (a) "samsung,smdkv310" - for Samsung's SMDKV310 eval board.
+ (b) "samsung,exynos4210" - for boards based on Exynos4210 SoC.
diff --git a/Documentation/devicetree/bindings/arm/keystone/keystone.txt b/Documentation/devicetree/bindings/arm/keystone/keystone.txt
new file mode 100644
index 000000000..59d7a46f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/keystone/keystone.txt
@@ -0,0 +1,20 @@
+TI Keystone Platforms Device Tree Bindings
+-----------------------------------------------
+
+Boards with Keystone2 based devices (TCI66xxK2H) SOC shall have the
+following properties.
+
+Required properties:
+ - compatible: All TI specific devices present in Keystone SOC should be in
+ the form "ti,keystone-*". Generic devices like gic, arch_timers, ns16550
+ type UART should use the specified compatible for those devices.
+
+Boards:
+- Keystone 2 Hawking/Kepler EVM
+ compatible = "ti,k2hk-evm","ti,keystone"
+
+- Keystone 2 Lamarr EVM
+ compatible = "ti,k2l-evm","ti,keystone"
+
+- Keystone 2 Edison EVM
+ compatible = "ti,k2e-evm","ti,keystone"
diff --git a/Documentation/devicetree/bindings/arm/kirkwood.txt b/Documentation/devicetree/bindings/arm/kirkwood.txt
new file mode 100644
index 000000000..98cce9a65
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/kirkwood.txt
@@ -0,0 +1,27 @@
+Marvell Kirkwood Platforms Device Tree Bindings
+-----------------------------------------------
+
+Boards with a SoC of the Marvell Kirkwood
+shall have the following property:
+
+Required root node property:
+
+compatible: must contain "marvell,kirkwood";
+
+In order to support the kirkwood cpufreq driver, there must be a node
+cpus/cpu@0 with three clocks, "cpu_clk", "ddrclk" and "powersave",
+where the "powersave" clock is a gating clock used to switch the CPU
+between the "cpu_clk" and the "ddrclk".
+
+Example:
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "marvell,sheeva-88SV131";
+ clocks = <&core_clk 1>, <&core_clk 3>, <&gate_clk 11>;
+ clock-names = "cpu_clk", "ddrclk", "powersave";
+ };
diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt
new file mode 100644
index 000000000..0dbabe9a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/l2cc.txt
@@ -0,0 +1,82 @@
+* ARM L2 Cache Controller
+
+ARM cores often have a separate level 2 cache controller. There are various
+implementations of the L2 cache controller with compatible programming models.
+Some of the properties that are just prefixed "cache-*" are taken from section
+3.7.3 of the ePAPR v1.1 specification which can be found at:
+https://www.power.org/wp-content/uploads/2012/06/Power_ePAPR_APPROVED_v1.1.pdf
+
+The ARM L2 cache representation in the device tree should be done as follows:
+
+Required properties:
+
+- compatible : should be one of:
+ "arm,pl310-cache"
+ "arm,l220-cache"
+ "arm,l210-cache"
+ "bcm,bcm11351-a2-pl310-cache": DEPRECATED by "brcm,bcm11351-a2-pl310-cache"
+ "brcm,bcm11351-a2-pl310-cache": For Broadcom bcm11351 chipset where an
+ offset needs to be added to the address before passing down to the L2
+ cache controller
+ "marvell,aurora-system-cache": Marvell Controller designed to be
+ compatible with the ARM one, with system cache mode (meaning
+ maintenance operations on L1 are broadcasted to the L2 and L2
+ performs the same operation).
+ "marvell,aurora-outer-cache": Marvell Controller designed to be
+ compatible with the ARM one with outer cache mode.
+ "marvell,tauros3-cache": Marvell Tauros3 cache controller, compatible
+ with arm,pl310-cache controller.
+- cache-unified : Specifies the cache is a unified cache.
+- cache-level : Should be set to 2 for a level 2 cache.
+- reg : Physical base address and size of cache controller's memory mapped
+ registers.
+
+Optional properties:
+
+- arm,data-latency : Cycles of latency for Data RAM accesses. Specifies 3 cells of
+ read, write and setup latencies. Minimum valid values are 1. Controllers
+ without setup latency control should use a value of 0.
+- arm,tag-latency : Cycles of latency for Tag RAM accesses. Specifies 3 cells of
+ read, write and setup latencies. Controllers without setup latency control
+ should use 0. Controllers without separate read and write Tag RAM latency
+ values should only use the first cell.
+- arm,dirty-latency : Cycles of latency for Dirty RAMs. This is a single cell.
+- arm,filter-ranges : <start length> Starting address and length of window to
+ filter. Addresses in the filter window are directed to the M1 port. Other
+ addresses will go to the M0 port.
+- arm,io-coherent : indicates that the system is operating in an hardware
+ I/O coherent mode. Valid only when the arm,pl310-cache compatible
+ string is used.
+- interrupts : 1 combined interrupt.
+- cache-size : specifies the size in bytes of the cache
+- cache-sets : specifies the number of associativity sets of the cache
+- cache-block-size : specifies the size in bytes of a cache block
+- cache-line-size : specifies the size in bytes of a line in the cache,
+ if this is not specified, the line size is assumed to be equal to the
+ cache block size
+- cache-id-part: cache id part number to be used if it is not present
+ on hardware
+- wt-override: If present then L2 is forced to Write through mode
+- arm,double-linefill : Override double linefill enable setting. Enable if
+ non-zero, disable if zero.
+- arm,double-linefill-incr : Override double linefill on INCR read. Enable
+ if non-zero, disable if zero.
+- arm,double-linefill-wrap : Override double linefill on WRAP read. Enable
+ if non-zero, disable if zero.
+- arm,prefetch-drop : Override prefetch drop enable setting. Enable if non-zero,
+ disable if zero.
+- arm,prefetch-offset : Override prefetch offset value. Valid values are
+ 0-7, 15, 23, and 31.
+
+Example:
+
+L2: cache-controller {
+ compatible = "arm,pl310-cache";
+ reg = <0xfff12000 0x1000>;
+ arm,data-latency = <1 1 1>;
+ arm,tag-latency = <2 2 2>;
+ arm,filter-ranges = <0x80000000 0x8000000>;
+ cache-unified;
+ cache-level = <2>;
+ interrupts = <45>;
+};
diff --git a/Documentation/devicetree/bindings/arm/lpc32xx-mic.txt b/Documentation/devicetree/bindings/arm/lpc32xx-mic.txt
new file mode 100644
index 000000000..539adca19
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/lpc32xx-mic.txt
@@ -0,0 +1,38 @@
+* NXP LPC32xx Main Interrupt Controller
+ (MIC, including SIC1 and SIC2 secondary controllers)
+
+Required properties:
+- compatible: Should be "nxp,lpc3220-mic"
+- interrupt-controller: Identifies the node as an interrupt controller.
+- interrupt-parent: Empty for the interrupt controller itself
+- #interrupt-cells: The number of cells to define the interrupts. Should be 2.
+ The first cell is the IRQ number
+ The second cell is used to specify mode:
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive
+ Default for internal sources should be set to 4 (active high).
+- reg: Should contain MIC registers location and length
+
+Examples:
+ /*
+ * MIC
+ */
+ mic: interrupt-controller@40008000 {
+ compatible = "nxp,lpc3220-mic";
+ interrupt-controller;
+ interrupt-parent;
+ #interrupt-cells = <2>;
+ reg = <0x40008000 0xC000>;
+ };
+
+ /*
+ * ADC
+ */
+ adc@40048000 {
+ compatible = "nxp,lpc3220-adc";
+ reg = <0x40048000 0x1000>;
+ interrupt-parent = <&mic>;
+ interrupts = <39 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/lpc32xx.txt b/Documentation/devicetree/bindings/arm/lpc32xx.txt
new file mode 100644
index 000000000..56ec8ddc4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/lpc32xx.txt
@@ -0,0 +1,8 @@
+NXP LPC32xx Platforms Device Tree Bindings
+------------------------------------------
+
+Boards with the NXP LPC32xx SoC shall have the following properties:
+
+Required root node property:
+
+compatible: must be "nxp,lpc3220", "nxp,lpc3230", "nxp,lpc3240" or "nxp,lpc3250"
diff --git a/Documentation/devicetree/bindings/arm/marvell,berlin.txt b/Documentation/devicetree/bindings/arm/marvell,berlin.txt
new file mode 100644
index 000000000..a99eb9eb1
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell,berlin.txt
@@ -0,0 +1,152 @@
+Marvell Berlin SoC Family Device Tree Bindings
+---------------------------------------------------------------
+
+Boards with a SoC of the Marvell Berlin family, e.g. Armada 1500
+shall have the following properties:
+
+* Required root node properties:
+compatible: must contain "marvell,berlin"
+
+In addition, the above compatible shall be extended with the specific
+SoC and board used. Currently known SoC compatibles are:
+ "marvell,berlin2" for Marvell Armada 1500 (BG2, 88DE3100),
+ "marvell,berlin2cd" for Marvell Armada 1500-mini (BG2CD, 88DE3005)
+ "marvell,berlin2ct" for Marvell Armada ? (BG2CT, 88DE????)
+ "marvell,berlin2q" for Marvell Armada 1500-pro (BG2Q, 88DE3114)
+ "marvell,berlin3" for Marvell Armada ? (BG3, 88DE????)
+
+* Example:
+
+/ {
+ model = "Sony NSZ-GS7";
+ compatible = "sony,nsz-gs7", "marvell,berlin2", "marvell,berlin";
+
+ ...
+}
+
+* Marvell Berlin CPU control bindings
+
+CPU control register allows various operations on CPUs, like resetting them
+independently.
+
+Required properties:
+- compatible: should be "marvell,berlin-cpu-ctrl"
+- reg: address and length of the register set
+
+Example:
+
+cpu-ctrl@f7dd0000 {
+ compatible = "marvell,berlin-cpu-ctrl";
+ reg = <0xf7dd0000 0x10000>;
+};
+
+* Marvell Berlin2 chip control binding
+
+Marvell Berlin SoCs have a chip control register set providing several
+individual registers dealing with pinmux, padmux, clock, reset, and secondary
+CPU boot address. Unfortunately, the individual registers are spread among the
+chip control registers, so there should be a single DT node only providing the
+different functions which are described below.
+
+Required properties:
+- compatible: shall be one of
+ "marvell,berlin2-chip-ctrl" for BG2
+ "marvell,berlin2cd-chip-ctrl" for BG2CD
+ "marvell,berlin2q-chip-ctrl" for BG2Q
+- reg: address and length of following register sets for
+ BG2/BG2CD: chip control register set
+ BG2Q: chip control register set and cpu pll registers
+
+* Marvell Berlin2 system control binding
+
+Marvell Berlin SoCs have a system control register set providing several
+individual registers dealing with pinmux, padmux, and reset.
+
+Required properties:
+- compatible: should be one of
+ "marvell,berlin2-system-ctrl" for BG2
+ "marvell,berlin2cd-system-ctrl" for BG2CD
+ "marvell,berlin2q-system-ctrl" for BG2Q
+- reg: address and length of the system control register set
+
+* Clock provider binding
+
+As clock related registers are spread among the chip control registers, the
+chip control node also provides the clocks. Marvell Berlin2 (BG2, BG2CD, BG2Q)
+SoCs share the same IP for PLLs and clocks, with some minor differences in
+features and register layout.
+
+Required properties:
+- #clock-cells: shall be set to 1
+- clocks: clock specifiers referencing the core clock input clocks
+- clock-names: array of strings describing the input clock specifiers above.
+ Allowed clock-names for the reference clocks are
+ "refclk" for the SoCs osciallator input on all SoCs,
+ and SoC-specific input clocks for
+ BG2/BG2CD: "video_ext0" for the external video clock input
+
+Clocks provided by core clocks shall be referenced by a clock specifier
+indexing one of the provided clocks. Refer to dt-bindings/clock/berlin<soc>.h
+for the corresponding index mapping.
+
+* Pin controller binding
+
+Pin control registers are part of both register sets, chip control and system
+control. The pins controlled are organized in groups, so no actual pin
+information is needed.
+
+A pin-controller node should contain subnodes representing the pin group
+configurations, one per function. Each subnode has the group name and the muxing
+function used.
+
+Be aware the Marvell Berlin datasheets use the keyword 'mode' for what is called
+a 'function' in the pin-controller subsystem.
+
+Required subnode-properties:
+- groups: a list of strings describing the group names.
+- function: a string describing the function used to mux the groups.
+
+* Reset controller binding
+
+A reset controller is part of the chip control registers set. The chip control
+node also provides the reset. The register set is not at the same offset between
+Berlin SoCs.
+
+Required property:
+- #reset-cells: must be set to 2
+
+Example:
+
+chip: chip-control@ea0000 {
+ compatible = "marvell,berlin2-chip-ctrl";
+ #clock-cells = <1>;
+ #reset-cells = <2>;
+ reg = <0xea0000 0x400>;
+ clocks = <&refclk>, <&externaldev 0>;
+ clock-names = "refclk", "video_ext0";
+
+ spi1_pmux: spi1-pmux {
+ groups = "G0";
+ function = "spi1";
+ };
+};
+
+sysctrl: system-controller@d000 {
+ compatible = "marvell,berlin2-system-ctrl";
+ reg = <0xd000 0x100>;
+
+ uart0_pmux: uart0-pmux {
+ groups = "GSM4";
+ function = "uart0";
+ };
+
+ uart1_pmux: uart1-pmux {
+ groups = "GSM5";
+ function = "uart1";
+ };
+
+ uart2_pmux: uart2-pmux {
+ groups = "GSM3";
+ function = "uart2";
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/marvell,dove.txt b/Documentation/devicetree/bindings/arm/marvell,dove.txt
new file mode 100644
index 000000000..aaaf64c56
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell,dove.txt
@@ -0,0 +1,22 @@
+Marvell Dove Platforms Device Tree Bindings
+-----------------------------------------------
+
+Boards with a Marvell Dove SoC shall have the following properties:
+
+Required root node property:
+- compatible: must contain "marvell,dove";
+
+* Global Configuration registers
+
+Global Configuration registers of Dove SoC are shared by a syscon node.
+
+Required properties:
+- compatible: must contain "marvell,dove-global-config" and "syscon".
+- reg: base address and size of the Global Configuration registers.
+
+Example:
+
+gconf: global-config@e802c {
+ compatible = "marvell,dove-global-config", "syscon";
+ reg = <0xe802c 0x14>;
+};
diff --git a/Documentation/devicetree/bindings/arm/marvell,kirkwood.txt b/Documentation/devicetree/bindings/arm/marvell,kirkwood.txt
new file mode 100644
index 000000000..4f40ff3fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/marvell,kirkwood.txt
@@ -0,0 +1,98 @@
+Marvell Kirkwood SoC Family Device Tree Bindings
+------------------------------------------------
+
+Boards with a SoC of the Marvell Kirkwook family, eg 88f6281
+
+* Required root node properties:
+compatible: must contain "marvell,kirkwood"
+
+In addition, the above compatible shall be extended with the specific
+SoC. Currently known SoC compatibles are:
+
+"marvell,kirkwood-88f6192"
+"marvell,kirkwood-88f6281"
+"marvell,kirkwood-88f6282"
+"marvell,kirkwood-88f6283"
+"marvell,kirkwood-88f6702"
+"marvell,kirkwood-98DX4122"
+
+And in addition, the compatible shall be extended with the specific
+board. Currently known boards are:
+
+"buffalo,lschlv2"
+"buffalo,lsxhl"
+"buffalo,lsxl"
+"dlink,dns-320"
+"dlink,dns-320-a1"
+"dlink,dns-325"
+"dlink,dns-325-a1"
+"dlink,dns-kirkwood"
+"excito,b3"
+"globalscale,dreamplug-003-ds2001"
+"globalscale,guruplug"
+"globalscale,guruplug-server-plus"
+"globalscale,sheevaplug"
+"globalscale,sheevaplug"
+"globalscale,sheevaplug-esata"
+"globalscale,sheevaplug-esata-rev13"
+"iom,iconnect"
+"iom,iconnect-1.1"
+"iom,ix2-200"
+"keymile,km_kirkwood"
+"lacie,cloudbox"
+"lacie,inetspace_v2"
+"lacie,laplug"
+"lacie,nas2big"
+"lacie,netspace_lite_v2"
+"lacie,netspace_max_v2"
+"lacie,netspace_mini_v2"
+"lacie,netspace_v2"
+"marvell,db-88f6281-bp"
+"marvell,db-88f6282-bp"
+"marvell,mv88f6281gtw-ge"
+"marvell,rd88f6281"
+"marvell,rd88f6281"
+"marvell,rd88f6281-a0"
+"marvell,rd88f6281-a1"
+"mpl,cec4"
+"mpl,cec4-10"
+"netgear,readynas"
+"netgear,readynas"
+"netgear,readynas-duo-v2"
+"netgear,readynas-nv+-v2"
+"plathome,openblocks-a6"
+"plathome,openblocks-a7"
+"raidsonic,ib-nas6210"
+"raidsonic,ib-nas6210-b"
+"raidsonic,ib-nas6220"
+"raidsonic,ib-nas6220-b"
+"raidsonic,ib-nas62x0"
+"seagate,dockstar"
+"seagate,goflexnet"
+"synology,ds109"
+"synology,ds110jv10"
+"synology,ds110jv20"
+"synology,ds110jv30"
+"synology,ds111"
+"synology,ds209"
+"synology,ds210jv10"
+"synology,ds210jv20"
+"synology,ds212"
+"synology,ds212jv10"
+"synology,ds212jv20"
+"synology,ds212pv10"
+"synology,ds409"
+"synology,ds409slim"
+"synology,ds410j"
+"synology,ds411"
+"synology,ds411j"
+"synology,ds411slim"
+"synology,ds413jv10"
+"synology,rs212"
+"synology,rs409"
+"synology,rs411"
+"synology,rs812"
+"usi,topkick"
+"usi,topkick-1281P2"
+"zyxel,nsa310"
+"zyxel,nsa310a"
diff --git a/Documentation/devicetree/bindings/arm/mediatek.txt b/Documentation/devicetree/bindings/arm/mediatek.txt
new file mode 100644
index 000000000..dd7550a29
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek.txt
@@ -0,0 +1,31 @@
+MediaTek mt65xx & mt81xx Platforms Device Tree Bindings
+
+Boards with a MediaTek mt65xx/mt81xx SoC shall have the following property:
+
+Required root node property:
+
+compatible: Must contain one of
+ "mediatek,mt6589"
+ "mediatek,mt6592"
+ "mediatek,mt8127"
+ "mediatek,mt8135"
+ "mediatek,mt8173"
+
+
+Supported boards:
+
+- bq Aquaris5 smart phone:
+ Required root node properties:
+ - compatible = "mundoreader,bq-aquaris5", "mediatek,mt6589";
+- Evaluation board for MT6592:
+ Required root node properties:
+ - compatible = "mediatek,mt6592-evb", "mediatek,mt6592";
+- MTK mt8127 tablet moose EVB:
+ Required root node properties:
+ - compatible = "mediatek,mt8127-moose", "mediatek,mt8127";
+- MTK mt8135 tablet EVB:
+ Required root node properties:
+ - compatible = "mediatek,mt8135-evbp1", "mediatek,mt8135";
+- MTK mt8173 tablet EVB:
+ Required root node properties:
+ - compatible = "mediatek,mt8173-evb", "mediatek,mt8173";
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sysirq.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sysirq.txt
new file mode 100644
index 000000000..4f5a5352c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sysirq.txt
@@ -0,0 +1,30 @@
+Mediatek 65xx/81xx sysirq
+
+Mediatek SOCs sysirq support controllable irq inverter for each GIC SPI
+interrupt.
+
+Required properties:
+- compatible: should be one of:
+ "mediatek,mt8173-sysirq"
+ "mediatek,mt8135-sysirq"
+ "mediatek,mt8127-sysirq"
+ "mediatek,mt6592-sysirq"
+ "mediatek,mt6589-sysirq"
+ "mediatek,mt6582-sysirq"
+ "mediatek,mt6577-sysirq"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Use the same format as specified by GIC in
+ Documentation/devicetree/bindings/arm/gic.txt
+- interrupt-parent: phandle of irq parent for sysirq. The parent must
+ use the same interrupt-cells format as GIC.
+- reg: Physical base address of the intpol registers and length of memory
+ mapped region.
+
+Example:
+ sysirq: interrupt-controller@10200100 {
+ compatible = "mediatek,mt6589-sysirq", "mediatek,mt6577-sysirq";
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ interrupt-parent = <&gic>;
+ reg = <0 0x10200100 0 0x1c>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/moxart.txt b/Documentation/devicetree/bindings/arm/moxart.txt
new file mode 100644
index 000000000..11087edb0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/moxart.txt
@@ -0,0 +1,12 @@
+MOXA ART device tree bindings
+
+Boards with the MOXA ART SoC shall have the following properties:
+
+Required root node property:
+
+compatible = "moxa,moxart";
+
+Boards:
+
+- UC-7112-LX: embedded computer
+ compatible = "moxa,moxart-uc-7112-lx", "moxa,moxart"
diff --git a/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt b/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt
new file mode 100644
index 000000000..0d244b999
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/feroceon.txt
@@ -0,0 +1,16 @@
+* Marvell Feroceon Cache
+
+Required properties:
+- compatible : Should be either "marvell,feroceon-cache" or
+ "marvell,kirkwood-cache".
+
+Optional properties:
+- reg : Address of the L2 cache control register. Mandatory for
+ "marvell,kirkwood-cache", not used by "marvell,feroceon-cache"
+
+
+Example:
+ l2: l2-cache@20128 {
+ compatible = "marvell,kirkwood-cache";
+ reg = <0x20128 0x4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mrvl/intc.txt b/Documentation/devicetree/bindings/arm/mrvl/intc.txt
new file mode 100644
index 000000000..8b53273cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/intc.txt
@@ -0,0 +1,60 @@
+* Marvell MMP Interrupt controller
+
+Required properties:
+- compatible : Should be "mrvl,mmp-intc", "mrvl,mmp2-intc" or
+ "mrvl,mmp2-mux-intc"
+- reg : Address and length of the register set of the interrupt controller.
+ If the interrupt controller is intc, address and length means the range
+ of the whold interrupt controller. If the interrupt controller is mux-intc,
+ address and length means one register. Since address of mux-intc is in the
+ range of intc. mux-intc is secondary interrupt controller.
+- reg-names : Name of the register set of the interrupt controller. It's
+ only required in mux-intc interrupt controller.
+- interrupts : Should be the port interrupt shared by mux interrupts. It's
+ only required in mux-intc interrupt controller.
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source.
+- mrvl,intc-nr-irqs : Specifies the number of interrupts in the interrupt
+ controller.
+- mrvl,clr-mfp-irq : Specifies the interrupt that needs to clear MFP edge
+ detection first.
+
+Example:
+ intc: interrupt-controller@d4282000 {
+ compatible = "mrvl,mmp2-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0xd4282000 0x1000>;
+ mrvl,intc-nr-irqs = <64>;
+ };
+
+ intcmux4@d4282150 {
+ compatible = "mrvl,mmp2-mux-intc";
+ interrupts = <4>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x150 0x4>, <0x168 0x4>;
+ reg-names = "mux status", "mux mask";
+ mrvl,intc-nr-irqs = <2>;
+ };
+
+* Marvell Orion Interrupt controller
+
+Required properties
+- compatible : Should be "marvell,orion-intc".
+- #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. Supported value is <1>.
+- interrupt-controller : Declare this node to be an interrupt controller.
+- reg : Interrupt mask address. A list of 4 byte ranges, one per controller.
+ One entry in the list represents 32 interrupts.
+
+Example:
+
+ intc: interrupt-controller {
+ compatible = "marvell,orion-intc", "marvell,intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0xfed20204 0x04>,
+ <0xfed20214 0x04>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt b/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt
new file mode 100644
index 000000000..117d741a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/mrvl.txt
@@ -0,0 +1,14 @@
+Marvell Platforms Device Tree Bindings
+----------------------------------------------------
+
+PXA168 Aspenite Board
+Required root node properties:
+ - compatible = "mrvl,pxa168-aspenite", "mrvl,pxa168";
+
+PXA910 DKB Board
+Required root node properties:
+ - compatible = "mrvl,pxa910-dkb";
+
+MMP2 Brownstone Board
+Required root node properties:
+ - compatible = "mrvl,mmp2-brownstone";
diff --git a/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt b/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt
new file mode 100644
index 000000000..31af1cbb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/tauros2.txt
@@ -0,0 +1,17 @@
+* Marvell Tauros2 Cache
+
+Required properties:
+- compatible : Should be "marvell,tauros2-cache".
+- marvell,tauros2-cache-features : Specify the features supported for the
+ tauros2 cache.
+ The features including
+ CACHE_TAUROS2_PREFETCH_ON (1 << 0)
+ CACHE_TAUROS2_LINEFILL_BURST8 (1 << 1)
+ The definition can be found at
+ arch/arm/include/asm/hardware/cache-tauros2.h
+
+Example:
+ L2: l2-cache {
+ compatible = "marvell,tauros2-cache";
+ marvell,tauros2-cache-features = <0x3>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mrvl/timer.txt b/Documentation/devicetree/bindings/arm/mrvl/timer.txt
new file mode 100644
index 000000000..9a6e25146
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mrvl/timer.txt
@@ -0,0 +1,13 @@
+* Marvell MMP Timer controller
+
+Required properties:
+- compatible : Should be "mrvl,mmp-timer".
+- reg : Address and length of the register set of timer controller.
+- interrupts : Should be the interrupt number.
+
+Example:
+ timer0: timer@d4014000 {
+ compatible = "mrvl,mmp-timer";
+ reg = <0xd4014000 0x100>;
+ interrupts = <13>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
new file mode 100644
index 000000000..06df04cc8
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/qcom,idle-state.txt
@@ -0,0 +1,84 @@
+QCOM Idle States for cpuidle driver
+
+ARM provides idle-state node to define the cpuidle states, as defined in [1].
+cpuidle-qcom is the cpuidle driver for Qualcomm SoCs and uses these idle
+states. Idle states have different enter/exit latency and residency values.
+The idle states supported by the QCOM SoC are defined as -
+
+ * Standby
+ * Retention
+ * Standalone Power Collapse (Standalone PC or SPC)
+ * Power Collapse (PC)
+
+Standby: Standby does a little more in addition to architectural clock gating.
+When the WFI instruction is executed the ARM core would gate its internal
+clocks. In addition to gating the clocks, QCOM cpus use this instruction as a
+trigger to execute the SPM state machine. The SPM state machine waits for the
+interrupt to trigger the core back in to active. This triggers the cache
+hierarchy to enter standby states, when all cpus are idle. An interrupt brings
+the SPM state machine out of its wait, the next step is to ensure that the
+cache hierarchy is also out of standby, and then the cpu is allowed to resume
+execution. This state is defined as a generic ARM WFI state by the ARM cpuidle
+driver and is not defined in the DT. The SPM state machine should be
+configured to execute this state by default and after executing every other
+state below.
+
+Retention: Retention is a low power state where the core is clock gated and
+the memory and the registers associated with the core are retained. The
+voltage may be reduced to the minimum value needed to keep the processor
+registers active. The SPM should be configured to execute the retention
+sequence and would wait for interrupt, before restoring the cpu to execution
+state. Retention may have a slightly higher latency than Standby.
+
+Standalone PC: A cpu can power down and warmboot if there is a sufficient time
+between the time it enters idle and the next known wake up. SPC mode is used
+to indicate a core entering a power down state without consulting any other
+cpu or the system resources. This helps save power only on that core. The SPM
+sequence for this idle state is programmed to power down the supply to the
+core, wait for the interrupt, restore power to the core, and ensure the
+system state including cache hierarchy is ready before allowing core to
+resume. Applying power and resetting the core causes the core to warmboot
+back into Elevation Level (EL) which trampolines the control back to the
+kernel. Entering a power down state for the cpu, needs to be done by trapping
+into a EL. Failing to do so, would result in a crash enforced by the warm boot
+code in the EL for the SoC. On SoCs with write-back L1 cache, the cache has to
+be flushed in s/w, before powering down the core.
+
+Power Collapse: This state is similar to the SPC mode, but distinguishes
+itself in that the cpu acknowledges and permits the SoC to enter deeper sleep
+modes. In a hierarchical power domain SoC, this means L2 and other caches can
+be flushed, system bus, clocks - lowered, and SoC main XO clock gated and
+voltages reduced, provided all cpus enter this state. Since the span of low
+power modes possible at this state is vast, the exit latency and the residency
+of this low power mode would be considered high even though at a cpu level,
+this essentially is cpu power down. The SPM in this state also may handshake
+with the Resource power manager (RPM) processor in the SoC to indicate a
+complete application processor subsystem shut down.
+
+The idle-state for QCOM SoCs are distinguished by the compatible property of
+the idle-states device node.
+
+The devicetree representation of the idle state should be -
+
+Required properties:
+
+- compatible: Must be one of -
+ "qcom,idle-state-ret",
+ "qcom,idle-state-spc",
+ "qcom,idle-state-pc",
+ and "arm,idle-state".
+
+Other required and optional properties are specified in [1].
+
+Example:
+
+ idle-states {
+ CPU_SPC: spc {
+ compatible = "qcom,idle-state-spc", "arm,idle-state";
+ entry-latency-us = <150>;
+ exit-latency-us = <200>;
+ min-residency-us = <2000>;
+ };
+ };
+
+[1]. Documentation/devicetree/bindings/arm/idle-states.txt
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt b/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt
new file mode 100644
index 000000000..1333db9ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/qcom,kpss-acc.txt
@@ -0,0 +1,30 @@
+Krait Processor Sub-system (KPSS) Application Clock Controller (ACC)
+
+The KPSS ACC provides clock, power domain, and reset control to a Krait CPU.
+There is one ACC register region per CPU within the KPSS remapped region as
+well as an alias register region that remaps accesses to the ACC associated
+with the CPU accessing the region.
+
+PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: should be one of:
+ "qcom,kpss-acc-v1"
+ "qcom,kpss-acc-v2"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the first element specifies the base address and size of
+ the register region. An optional second element specifies
+ the base address and size of the alias register region.
+
+Example:
+
+ clock-controller@2088000 {
+ compatible = "qcom,kpss-acc-v2";
+ reg = <0x02088000 0x1000>,
+ <0x02008000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt b/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt
new file mode 100644
index 000000000..ae4afc6dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/qcom,saw2.txt
@@ -0,0 +1,57 @@
+SPM AVS Wrapper 2 (SAW2)
+
+The SAW2 is a wrapper around the Subsystem Power Manager (SPM) and the
+Adaptive Voltage Scaling (AVS) hardware. The SPM is a programmable
+power-controller that transitions a piece of hardware (like a processor or
+subsystem) into and out of low power modes via a direct connection to
+the PMIC. It can also be wired up to interact with other processors in the
+system, notifying them when a low power state is entered or exited.
+
+Multiple revisions of the SAW hardware are supported using these Device Nodes.
+SAW2 revisions differ in the register offset and configuration data. Also, the
+same revision of the SAW in different SoCs may have different configuration
+data due the the differences in hardware capabilities. Hence the SoC name, the
+version of the SAW hardware in that SoC and the distinction between cpu (big
+or Little) or cache, may be needed to uniquely identify the SAW register
+configuration and initialization data. The compatible string is used to
+indicate this parameter.
+
+PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: Must have
+ "qcom,saw2"
+ A more specific value could be one of:
+ "qcom,apq8064-saw2-v1.1-cpu"
+ "qcom,msm8974-saw2-v2.1-cpu"
+ "qcom,apq8084-saw2-v2.1-cpu"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the first element specifies the base address and size of
+ the register region. An optional second element specifies
+ the base address and size of the alias register region.
+
+- regulator:
+ Usage: optional
+ Value type: boolean
+ Definition: Indicates that this SPM device acts as a regulator device
+ device for the core (CPU or Cache) the SPM is attached
+ to.
+
+Example 1:
+
+ power-controller@2099000 {
+ compatible = "qcom,saw2";
+ reg = <0x02099000 0x1000>, <0x02009000 0x1000>;
+ regulator;
+ };
+
+Example 2:
+ saw0: power-controller@f9089000 {
+ compatible = "qcom,apq8084-saw2-v2.1-cpu", "qcom,saw2";
+ reg = <0xf9089000 0x1000>, <0xf9009000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/msm/ssbi.txt b/Documentation/devicetree/bindings/arm/msm/ssbi.txt
new file mode 100644
index 000000000..54fd5ced3
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/ssbi.txt
@@ -0,0 +1,18 @@
+* Qualcomm SSBI
+
+Some Qualcomm MSM devices contain a point-to-point serial bus used to
+communicate with a limited range of devices (mostly power management
+chips).
+
+These require the following properties:
+
+- compatible: "qcom,ssbi"
+
+- qcom,controller-type
+ indicates the SSBI bus variant the controller should use to talk
+ with the slave device. This should be one of "ssbi", "ssbi2", or
+ "pmic-arbiter". The type chosen is determined by the attached
+ slave.
+
+The slave device should be the single child node of the ssbi device
+with a compatible field.
diff --git a/Documentation/devicetree/bindings/arm/msm/timer.txt b/Documentation/devicetree/bindings/arm/msm/timer.txt
new file mode 100644
index 000000000..5e10c3455
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/msm/timer.txt
@@ -0,0 +1,47 @@
+* MSM Timer
+
+Properties:
+
+- compatible : Should at least contain "qcom,msm-timer". More specific
+ properties specify which subsystem the timers are paired with.
+
+ "qcom,kpss-timer" - krait subsystem
+ "qcom,scss-timer" - scorpion subsystem
+
+- interrupts : Interrupts for the debug timer, the first general purpose
+ timer, and optionally a second general purpose timer, and
+ optionally as well, 2 watchdog interrupts, in that order.
+
+- reg : Specifies the base address of the timer registers.
+
+- clocks: Reference to the parent clocks, one per output clock. The parents
+ must appear in the same order as the clock names.
+
+- clock-names: The name of the clocks as free-form strings. They should be in
+ the same order as the clocks.
+
+- clock-frequency : The frequency of the debug timer and the general purpose
+ timer(s) in Hz in that order.
+
+Optional:
+
+- cpu-offset : per-cpu offset used when the timer is accessed without the
+ CPU remapping facilities. The offset is
+ cpu-offset + (0x10000 * cpu-nr).
+
+Example:
+
+ timer@200a000 {
+ compatible = "qcom,scss-timer", "qcom,msm-timer";
+ interrupts = <1 1 0x301>,
+ <1 2 0x301>,
+ <1 3 0x301>,
+ <1 4 0x301>,
+ <1 5 0x301>;
+ reg = <0x0200a000 0x100>;
+ clock-frequency = <19200000>,
+ <32768>;
+ clocks = <&sleep_clk>;
+ clock-names = "sleep";
+ cpu-offset = <0x40000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/mvebu-system-controller.txt b/Documentation/devicetree/bindings/arm/mvebu-system-controller.txt
new file mode 100644
index 000000000..d24ab2ebf
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mvebu-system-controller.txt
@@ -0,0 +1,18 @@
+MVEBU System Controller
+-----------------------
+MVEBU (Marvell SOCs: Armada 370/375/XP, Dove, mv78xx0, Kirkwood, Orion5x)
+
+Required properties:
+
+- compatible: one of:
+ - "marvell,orion-system-controller"
+ - "marvell,armada-370-xp-system-controller"
+ - "marvell,armada-375-system-controller"
+- reg: Should contain system controller registers location and length.
+
+Example:
+
+ system-controller@d0018200 {
+ compatible = "marvell,armada-370-xp-system-controller";
+ reg = <0xd0018200 0x500>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/nspire.txt b/Documentation/devicetree/bindings/arm/nspire.txt
new file mode 100644
index 000000000..4d08518bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/nspire.txt
@@ -0,0 +1,14 @@
+TI-NSPIRE calculators
+
+Required properties:
+- compatible: Compatible property value should contain "ti,nspire".
+ CX models should have "ti,nspire-cx"
+ Touchpad models should have "ti,nspire-tp"
+ Clickpad models should have "ti,nspire-clp"
+
+Example:
+
+/ {
+ model = "TI-NSPIRE CX";
+ compatible = "ti,nspire-cx";
+ ...
diff --git a/Documentation/devicetree/bindings/arm/olimex.txt b/Documentation/devicetree/bindings/arm/olimex.txt
new file mode 100644
index 000000000..007fb5c68
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/olimex.txt
@@ -0,0 +1,6 @@
+Olimex i.MX Platforms Device Tree Bindings
+------------------------------------------
+
+i.MX23 Olinuxino Low Cost Board
+Required root node properties:
+ - compatible = "olimex,imx23-olinuxino", "fsl,imx23";
diff --git a/Documentation/devicetree/bindings/arm/omap/counter.txt b/Documentation/devicetree/bindings/arm/omap/counter.txt
new file mode 100644
index 000000000..5bd8aa091
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/counter.txt
@@ -0,0 +1,15 @@
+OMAP Counter-32K bindings
+
+Required properties:
+- compatible: Must be "ti,omap-counter32k" for OMAP controllers
+- reg: Contains timer register address range (base address and length)
+- ti,hwmods: Name of the hwmod associated to the counter, which is typically
+ "counter_32k"
+
+Example:
+
+counter32k: counter@4a304000 {
+ compatible = "ti,omap-counter32k";
+ reg = <0x4a304000 0x20>;
+ ti,hwmods = "counter_32k";
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/crossbar.txt b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
new file mode 100644
index 000000000..a9b28d74d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/crossbar.txt
@@ -0,0 +1,55 @@
+Some socs have a large number of interrupts requests to service
+the needs of its many peripherals and subsystems. All of the
+interrupt lines from the subsystems are not needed at the same
+time, so they have to be muxed to the irq-controller appropriately.
+In such places a interrupt controllers are preceded by an CROSSBAR
+that provides flexibility in muxing the device requests to the controller
+inputs.
+
+Required properties:
+- compatible : Should be "ti,irq-crossbar"
+- reg: Base address and the size of the crossbar registers.
+- interrupt-controller: indicates that this block is an interrupt controller.
+- interrupt-parent: the interrupt controller this block is connected to.
+- ti,max-irqs: Total number of irqs available at the parent interrupt controller.
+- ti,max-crossbar-sources: Maximum number of crossbar sources that can be routed.
+- ti,reg-size: Size of a individual register in bytes. Every individual
+ register is assumed to be of same size. Valid sizes are 1, 2, 4.
+- ti,irqs-reserved: List of the reserved irq lines that are not muxed using
+ crossbar. These interrupt lines are reserved in the soc,
+ so crossbar bar driver should not consider them as free
+ lines.
+
+Optional properties:
+- ti,irqs-skip: This is similar to "ti,irqs-reserved", but these are for
+ SOC-specific hard-wiring of those irqs which unexpectedly bypasses the
+ crossbar. These irqs have a crossbar register, but still cannot be used.
+
+- ti,irqs-safe-map: integer which maps to a safe configuration to use
+ when the interrupt controller irq is unused (when not provided, default is 0)
+
+Examples:
+ crossbar_mpu: crossbar@4a002a48 {
+ compatible = "ti,irq-crossbar";
+ reg = <0x4a002a48 0x130>;
+ ti,max-irqs = <160>;
+ ti,max-crossbar-sources = <400>;
+ ti,reg-size = <2>;
+ ti,irqs-reserved = <0 1 2 3 5 6 131 132>;
+ ti,irqs-skip = <10 133 139 140>;
+ };
+
+Consumer:
+========
+See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt and
+Documentation/devicetree/bindings/arm/gic.txt for further details.
+
+An interrupt consumer on an SoC using crossbar will use:
+ interrupts = <GIC_SPI request_number interrupt_level>
+
+Example:
+ device_x@0x4a023000 {
+ /* Crossbar 8 used */
+ interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/arm/omap/ctrl.txt b/Documentation/devicetree/bindings/arm/omap/ctrl.txt
new file mode 100644
index 000000000..3a4e5901c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/ctrl.txt
@@ -0,0 +1,79 @@
+OMAP Control Module bindings
+
+Control Module contains miscellaneous features under it based on SoC type.
+Pincontrol is one common feature, and it has a specialized support
+described in [1]. Typically some clock nodes are also under control module.
+Syscon is used to share register level access to drivers external to
+control module driver itself.
+
+See [2] for documentation about clock/clockdomain nodes.
+
+[1] Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
+[2] Documentation/devicetree/bindings/clock/ti/*
+
+Required properties:
+- compatible: Must be one of:
+ "ti,am3-scm"
+ "ti,am4-scm"
+ "ti,dm814-scrm"
+ "ti,dm816-scrm"
+ "ti,omap2-scm"
+ "ti,omap3-scm"
+ "ti,omap4-scm-core"
+ "ti,omap4-scm-padconf-core"
+ "ti,omap5-scm-core"
+ "ti,omap5-scm-padconf-core"
+ "ti,dra7-scm-core"
+- reg: Contains Control Module register address range
+ (base address and length)
+
+Optional properties:
+- clocks: clocks for this module
+- clockdomains: clockdomains for this module
+
+Examples:
+
+scm: scm@2000 {
+ compatible = "ti,omap3-scm", "simple-bus";
+ reg = <0x2000 0x2000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x2000 0x2000>;
+
+ omap3_pmx_core: pinmux@30 {
+ compatible = "ti,omap3-padconf",
+ "pinctrl-single";
+ reg = <0x30 0x230>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ pinctrl-single,register-width = <16>;
+ pinctrl-single,function-mask = <0xff1f>;
+ };
+
+ scm_conf: scm_conf@270 {
+ compatible = "syscon";
+ reg = <0x270 0x330>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ scm_clocks: clocks {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
+
+ scm_clockdomains: clockdomains {
+ };
+}
+
+&scm_clocks {
+ mcbsp5_mux_fck: mcbsp5_mux_fck {
+ #clock-cells = <0>;
+ compatible = "ti,composite-mux-clock";
+ clocks = <&core_96m_fck>, <&mcbsp_clks>;
+ ti,bit-shift = <4>;
+ reg = <0x02d8>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/dmm.txt b/Documentation/devicetree/bindings/arm/omap/dmm.txt
new file mode 100644
index 000000000..8bd6d0a23
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/dmm.txt
@@ -0,0 +1,22 @@
+OMAP Dynamic Memory Manager (DMM) bindings
+
+The dynamic memory manager (DMM) is a module located immediately in front of the
+SDRAM controllers (called EMIFs on OMAP). DMM manages various aspects of memory
+accesses such as priority generation amongst initiators, configuration of SDRAM
+interleaving, optimizing transfer of 2D block objects, and provide MMU-like page
+translation for initiators which need contiguous dma bus addresses.
+
+Required properties:
+- compatible: Should contain "ti,omap4-dmm" for OMAP4 family
+ Should contain "ti,omap5-dmm" for OMAP5 and DRA7x family
+- reg: Contains DMM register address range (base address and length)
+- interrupts: Should contain an interrupt-specifier for DMM_IRQ.
+- ti,hwmods: Name of the hwmod associated to DMM, which is typically "dmm"
+
+Example:
+
+dmm@4e000000 {
+ compatible = "ti,omap4-dmm";
+ reg = <0x4e000000 0x800>;
+ ti,hwmods = "dmm";
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/dsp.txt b/Documentation/devicetree/bindings/arm/omap/dsp.txt
new file mode 100644
index 000000000..d3830a32c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/dsp.txt
@@ -0,0 +1,14 @@
+* TI - DSP (Digital Signal Processor)
+
+TI DSP included in OMAP SoC
+
+Required properties:
+- compatible : Should be "ti,omap3-c64" for OMAP3 & 4
+- ti,hwmods: "dsp"
+
+Examples:
+
+dsp {
+ compatible = "ti,omap3-c64";
+ ti,hwmods = "dsp";
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/intc.txt b/Documentation/devicetree/bindings/arm/omap/intc.txt
new file mode 100644
index 000000000..f2583e6ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/intc.txt
@@ -0,0 +1,27 @@
+* OMAP Interrupt Controller
+
+OMAP2/3 are using a TI interrupt controller that can support several
+configurable number of interrupts.
+
+Main node required properties:
+
+- compatible : should be:
+ "ti,omap2-intc"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 1.
+
+ The cell contains the interrupt number in the range [0-128].
+- ti,intc-size: Number of interrupts handled by the interrupt controller.
+- reg: physical base address and size of the intc registers map.
+
+Example:
+
+ intc: interrupt-controller@1 {
+ compatible = "ti,omap2-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ ti,intc-size = <96>;
+ reg = <0x48200000 0x1000>;
+ };
+
diff --git a/Documentation/devicetree/bindings/arm/omap/iva.txt b/Documentation/devicetree/bindings/arm/omap/iva.txt
new file mode 100644
index 000000000..6d6295171
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/iva.txt
@@ -0,0 +1,19 @@
+* TI - IVA (Imaging and Video Accelerator) subsystem
+
+The IVA contain various audio, video or imaging HW accelerator
+depending of the version.
+
+Required properties:
+- compatible : Should be:
+ - "ti,ivahd" for OMAP4
+ - "ti,iva2.2" for OMAP3
+ - "ti,iva2.1" for OMAP2430
+ - "ti,iva1" for OMAP2420
+- ti,hwmods: "iva"
+
+Examples:
+
+iva {
+ compatible = "ti,ivahd", "ti,iva";
+ ti,hwmods = "iva";
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/l3-noc.txt b/Documentation/devicetree/bindings/arm/omap/l3-noc.txt
new file mode 100644
index 000000000..161448da9
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/l3-noc.txt
@@ -0,0 +1,23 @@
+* TI - L3 Network On Chip (NoC)
+
+This version is an implementation of the generic NoC IP
+provided by Arteris.
+
+Required properties:
+- compatible : Should be "ti,omap3-l3-smx" for OMAP3 family
+ Should be "ti,omap4-l3-noc" for OMAP4 family
+ Should be "ti,omap5-l3-noc" for OMAP5 family
+ Should be "ti,dra7-l3-noc" for DRA7 family
+ Should be "ti,am4372-l3-noc" for AM43 family
+- reg: Contains L3 register address range for each noc domain.
+- ti,hwmods: "l3_main_1", ... One hwmod for each noc domain.
+
+Examples:
+
+ocp {
+ compatible = "ti,omap4-l3-noc", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ ti,hwmods = "l3_main_1", "l3_main_2", "l3_main_3";
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/l4.txt b/Documentation/devicetree/bindings/arm/omap/l4.txt
new file mode 100644
index 000000000..b4f8a16e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/l4.txt
@@ -0,0 +1,26 @@
+L4 interconnect bindings
+
+These bindings describe the OMAP SoCs L4 interconnect bus.
+
+Required properties:
+- compatible : Should be "ti,omap2-l4" for OMAP2 family l4 core bus
+ Should be "ti,omap2-l4-wkup" for OMAP2 family l4 wkup bus
+ Should be "ti,omap3-l4-core" for OMAP3 family l4 core bus
+ Should be "ti,omap4-l4-cfg" for OMAP4 family l4 cfg bus
+ Should be "ti,omap4-l4-wkup" for OMAP4 family l4 wkup bus
+ Should be "ti,omap5-l4-cfg" for OMAP5 family l4 cfg bus
+ Should be "ti,omap5-l4-wkup" for OMAP5 family l4 wkup bus
+ Should be "ti,dra7-l4-cfg" for DRA7 family l4 cfg bus
+ Should be "ti,dra7-l4-wkup" for DRA7 family l4 wkup bus
+ Should be "ti,am3-l4-wkup" for AM33xx family l4 wkup bus
+ Should be "ti,am4-l4-wkup" for AM43xx family l4 wkup bus
+- ranges : contains the IO map range for the bus
+
+Examples:
+
+l4: l4@48000000 {
+ compatible "ti,omap2-l4", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x48000000 0x100000>;
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/mpu.txt b/Documentation/devicetree/bindings/arm/omap/mpu.txt
new file mode 100644
index 000000000..763695db2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/mpu.txt
@@ -0,0 +1,38 @@
+* TI - MPU (Main Processor Unit) subsystem
+
+The MPU subsystem contain one or several ARM cores
+depending of the version.
+The MPU contain CPUs, GIC, L2 cache and a local PRCM.
+
+Required properties:
+- compatible : Should be "ti,omap3-mpu" for OMAP3
+ Should be "ti,omap4-mpu" for OMAP4
+ Should be "ti,omap5-mpu" for OMAP5
+- ti,hwmods: "mpu"
+
+Optional properties:
+- sram: Phandle to the ocmcram node
+
+Examples:
+
+- For an OMAP5 SMP system:
+
+mpu {
+ compatible = "ti,omap5-mpu";
+ ti,hwmods = "mpu"
+};
+
+- For an OMAP4 SMP system:
+
+mpu {
+ compatible = "ti,omap4-mpu";
+ ti,hwmods = "mpu";
+};
+
+
+- For an OMAP3 monocore system:
+
+mpu {
+ compatible = "ti,omap3-mpu";
+ ti,hwmods = "mpu";
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt
new file mode 100644
index 000000000..4f6a82cef
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/omap.txt
@@ -0,0 +1,154 @@
+* Texas Instruments OMAP
+
+OMAP is currently using a static file per SoC family to describe the
+IPs present in the SoC.
+On top of that an omap_device is created to extend the platform_device
+capabilities and to allow binding with one or several hwmods.
+The hwmods will contain all the information to build the device:
+address range, irq lines, dma lines, interconnect, PRCM register,
+clock domain, input clocks.
+For the moment just point to the existing hwmod, the next step will be
+to move data from hwmod to device-tree representation.
+
+
+Required properties:
+- compatible: Every devices present in OMAP SoC should be in the
+ form: "ti,XXX"
+- ti,hwmods: list of hwmod names (ascii strings), that comes from the OMAP
+ HW documentation, attached to a device. Must contain at least
+ one hwmod.
+
+Optional properties:
+- ti,no_idle_on_suspend: When present, it prevents the PM to idle the module
+ during suspend.
+- ti,no-reset-on-init: When present, the module should not be reset at init
+- ti,no-idle-on-init: When present, the module should not be idled at init
+
+Example:
+
+spinlock@1 {
+ compatible = "ti,omap4-spinlock";
+ ti,hwmods = "spinlock";
+};
+
+SoC Type (optional):
+
+- General Purpose devices
+ compatible = "ti,gp"
+- High Security devices
+ compatible = "ti,hs"
+
+SoC Families:
+
+- OMAP2 generic - defaults to OMAP2420
+ compatible = "ti,omap2"
+- OMAP3 generic - defaults to OMAP3430
+ compatible = "ti,omap3"
+- OMAP4 generic - defaults to OMAP4430
+ compatible = "ti,omap4"
+- OMAP5 generic - defaults to OMAP5430
+ compatible = "ti,omap5"
+- DRA7 generic - defaults to DRA742
+ compatible = "ti,dra7"
+- AM43x generic - defaults to AM4372
+ compatible = "ti,am43"
+
+SoCs:
+
+- OMAP2420
+ compatible = "ti,omap2420", "ti,omap2"
+- OMAP2430
+ compatible = "ti,omap2430", "ti,omap2"
+
+- OMAP3430
+ compatible = "ti,omap3430", "ti,omap3"
+- AM3517
+ compatible = "ti,am3517", "ti,omap3"
+- OMAP3630
+ compatible = "ti,omap36xx", "ti,omap3"
+- AM33xx
+ compatible = "ti,am33xx", "ti,omap3"
+
+- OMAP4430
+ compatible = "ti,omap4430", "ti,omap4"
+- OMAP4460
+ compatible = "ti,omap4460", "ti,omap4"
+
+- OMAP5430
+ compatible = "ti,omap5430", "ti,omap5"
+- OMAP5432
+ compatible = "ti,omap5432", "ti,omap5"
+
+- DRA742
+ compatible = "ti,dra742", "ti,dra74", "ti,dra7"
+
+- DRA722
+ compatible = "ti,dra722", "ti,dra72", "ti,dra7"
+
+- AM5728
+ compatible = "ti,am5728", "ti,dra742", "ti,dra74", "ti,dra7"
+
+- AM5726
+ compatible = "ti,am5726", "ti,dra742", "ti,dra74", "ti,dra7"
+
+- AM5718
+ compatible = "ti,am5718", "ti,dra722", "ti,dra72", "ti,dra7"
+
+- AM5716
+ compatible = "ti,am5716", "ti,dra722", "ti,dra72", "ti,dra7"
+
+- AM4372
+ compatible = "ti,am4372", "ti,am43"
+
+Boards:
+
+- OMAP3 BeagleBoard : Low cost community board
+ compatible = "ti,omap3-beagle", "ti,omap3"
+
+- OMAP3 Tobi with Overo : Commercial expansion board with daughter board
+ compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3"
+
+- OMAP4 SDP : Software Development Board
+ compatible = "ti,omap4-sdp", "ti,omap4430"
+
+- OMAP4 PandaBoard : Low cost community board
+ compatible = "ti,omap4-panda", "ti,omap4430"
+
+- OMAP4 DuoVero with Parlor : Commercial expansion board with daughter board
+ compatible = "gumstix,omap4-duovero-parlor", "gumstix,omap4-duovero", "ti,omap4430", "ti,omap4";
+
+- OMAP4 VAR-STK-OM44 : Commercial dev kit with VAR-OM44CustomBoard and VAR-SOM-OM44 w/WLAN
+ compatible = "variscite,var-stk-om44", "variscite,var-som-om44", "ti,omap4460", "ti,omap4";
+
+- OMAP4 VAR-DVK-OM44 : Commercial dev kit with VAR-OM44CustomBoard, VAR-SOM-OM44 w/WLAN and LCD touchscreen
+ compatible = "variscite,var-dvk-om44", "variscite,var-som-om44", "ti,omap4460", "ti,omap4";
+
+- OMAP3 EVM : Software Development Board for OMAP35x, AM/DM37x
+ compatible = "ti,omap3-evm", "ti,omap3"
+
+- AM335X EVM : Software Development Board for AM335x
+ compatible = "ti,am335x-evm", "ti,am33xx", "ti,omap3"
+
+- AM335X Bone : Low cost community board
+ compatible = "ti,am335x-bone", "ti,am33xx", "ti,omap3"
+
+- AM335X OrionLXm : Substation Automation Platform
+ compatible = "novatech,am335x-lxm", "ti,am33xx"
+
+- OMAP5 EVM : Evaluation Module
+ compatible = "ti,omap5-evm", "ti,omap5"
+
+- AM43x EPOS EVM
+ compatible = "ti,am43x-epos-evm", "ti,am4372", "ti,am43"
+
+- AM437x GP EVM
+ compatible = "ti,am437x-gp-evm", "ti,am4372", "ti,am43"
+
+- AM437x SK EVM: AM437x StarterKit Evaluation Module
+ compatible = "ti,am437x-sk-evm", "ti,am4372", "ti,am43"
+
+- DRA742 EVM: Software Development Board for DRA742
+ compatible = "ti,dra7-evm", "ti,dra742", "ti,dra74", "ti,dra7"
+
+- DRA722 EVM: Software Development Board for DRA722
+ compatible = "ti,dra72-evm", "ti,dra722", "ti,dra72", "ti,dra7"
diff --git a/Documentation/devicetree/bindings/arm/omap/prcm.txt b/Documentation/devicetree/bindings/arm/omap/prcm.txt
new file mode 100644
index 000000000..3eb6d7aff
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/prcm.txt
@@ -0,0 +1,63 @@
+OMAP PRCM bindings
+
+Power Reset and Clock Manager lists the device clocks and clockdomains under
+a DT hierarchy. Each TI SoC can have multiple PRCM entities listed for it,
+each describing one module and the clock hierarchy under it. see [1] for
+documentation about the individual clock/clockdomain nodes.
+
+[1] Documentation/devicetree/bindings/clock/ti/*
+
+Required properties:
+- compatible: Must be one of:
+ "ti,am3-prcm"
+ "ti,am4-prcm"
+ "ti,omap2-prcm"
+ "ti,omap3-prm"
+ "ti,omap3-cm"
+ "ti,omap4-cm1"
+ "ti,omap4-prm"
+ "ti,omap4-cm2"
+ "ti,omap4-scrm"
+ "ti,omap5-prm"
+ "ti,omap5-cm-core-aon"
+ "ti,omap5-scrm"
+ "ti,omap5-cm-core"
+ "ti,dra7-prm"
+ "ti,dra7-cm-core-aon"
+ "ti,dra7-cm-core"
+ "ti,dm814-prcm"
+ "ti,dm816-prcm"
+- reg: Contains PRCM module register address range
+ (base address and length)
+- clocks: clocks for this module
+- clockdomains: clockdomains for this module
+
+Example:
+
+cm: cm@48004000 {
+ compatible = "ti,omap3-cm";
+ reg = <0x48004000 0x4000>;
+
+ cm_clocks: clocks {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ cm_clockdomains: clockdomains {
+ };
+}
+
+&cm_clocks {
+ omap2_32k_fck: omap_32k_fck {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <32768>;
+ };
+};
+
+&cm_clockdomains {
+ core_l3_clkdm: core_l3_clkdm {
+ compatible = "ti,clockdomain";
+ clocks = <&sdrc_ick>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/omap/timer.txt b/Documentation/devicetree/bindings/arm/omap/timer.txt
new file mode 100644
index 000000000..d02e27c76
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/omap/timer.txt
@@ -0,0 +1,44 @@
+OMAP Timer bindings
+
+Required properties:
+- compatible: Should be set to one of the below. Please note that
+ OMAP44xx devices have timer instances that are 100%
+ register compatible with OMAP3xxx devices as well as
+ newer timers that are not 100% register compatible.
+ So for OMAP44xx devices timer instances may use
+ different compatible strings.
+
+ ti,omap2420-timer (applicable to OMAP24xx devices)
+ ti,omap3430-timer (applicable to OMAP3xxx/44xx devices)
+ ti,omap4430-timer (applicable to OMAP44xx devices)
+ ti,omap5430-timer (applicable to OMAP543x devices)
+ ti,am335x-timer (applicable to AM335x devices)
+ ti,am335x-timer-1ms (applicable to AM335x devices)
+
+- reg: Contains timer register address range (base address and
+ length).
+- interrupts: Contains the interrupt information for the timer. The
+ format is being dependent on which interrupt controller
+ the OMAP device uses.
+- ti,hwmods: Name of the hwmod associated to the timer, "timer<X>",
+ where <X> is the instance number of the timer from the
+ HW spec.
+
+Optional properties:
+- ti,timer-alwon: Indicates the timer is in an alway-on power domain.
+- ti,timer-dsp: Indicates the timer can interrupt the on-chip DSP in
+ addition to the ARM CPU.
+- ti,timer-pwm: Indicates the timer can generate a PWM output.
+- ti,timer-secure: Indicates the timer is reserved on a secure OMAP device
+ and therefore cannot be used by the kernel.
+
+Example:
+
+timer12: timer@48304000 {
+ compatible = "ti,omap3430-timer";
+ reg = <0x48304000 0x400>;
+ interrupts = <95>;
+ ti,hwmods = "timer12"
+ ti,timer-alwon;
+ ti,timer-secure;
+};
diff --git a/Documentation/devicetree/bindings/arm/picoxcell.txt b/Documentation/devicetree/bindings/arm/picoxcell.txt
new file mode 100644
index 000000000..e75c0ef51
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/picoxcell.txt
@@ -0,0 +1,24 @@
+Picochip picoXcell device tree bindings.
+========================================
+
+Required root node properties:
+ - compatible:
+ - "picochip,pc7302-pc3x3" : PC7302 development board with PC3X3 device.
+ - "picochip,pc7302-pc3x2" : PC7302 development board with PC3X2 device.
+ - "picochip,pc3x3" : picoXcell PC3X3 device based board.
+ - "picochip,pc3x2" : picoXcell PC3X2 device based board.
+
+Timers required properties:
+ - compatible = "picochip,pc3x2-timer"
+ - interrupts : The single IRQ line for the timer.
+ - clock-freq : The frequency in HZ of the timer.
+ - reg : The register bank for the timer.
+
+Note: two timers are required - one for the scheduler clock and one for the
+event tick/NOHZ.
+
+VIC required properties:
+ - compatible = "arm,pl192-vic".
+ - interrupt-controller.
+ - reg : The register bank for the device.
+ - #interrupt-cells : Must be 1.
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
new file mode 100644
index 000000000..3b5f5d108
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -0,0 +1,44 @@
+* ARM Performance Monitor Units
+
+ARM cores often have a PMU for counting cpu and cache events like cache misses
+and hits. The interface to the PMU is part of the ARM ARM. The ARM PMU
+representation in the device tree should be done as under:-
+
+Required properties:
+
+- compatible : should be one of
+ "arm,armv8-pmuv3"
+ "arm,cortex-a17-pmu"
+ "arm,cortex-a15-pmu"
+ "arm,cortex-a12-pmu"
+ "arm,cortex-a9-pmu"
+ "arm,cortex-a8-pmu"
+ "arm,cortex-a7-pmu"
+ "arm,cortex-a5-pmu"
+ "arm,arm11mpcore-pmu"
+ "arm,arm1176-pmu"
+ "arm,arm1136-pmu"
+ "qcom,scorpion-pmu"
+ "qcom,scorpion-mp-pmu"
+ "qcom,krait-pmu"
+- interrupts : 1 combined interrupt or 1 per core. If the interrupt is a per-cpu
+ interrupt (PPI) then 1 interrupt should be specified.
+
+Optional properties:
+
+- interrupt-affinity : Valid only when using SPIs, specifies a list of phandles
+ to CPU nodes corresponding directly to the affinity of
+ the SPIs listed in the interrupts property.
+
+ This property should be present when there is more than
+ a single SPI.
+
+- qcom,no-pc-write : Indicates that this PMU doesn't support the 0xc and 0xd
+ events.
+
+Example:
+
+pmu {
+ compatible = "arm,cortex-a9-pmu";
+ interrupts = <100 101>;
+};
diff --git a/Documentation/devicetree/bindings/arm/primecell.txt b/Documentation/devicetree/bindings/arm/primecell.txt
new file mode 100644
index 000000000..0df6acacf
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/primecell.txt
@@ -0,0 +1,46 @@
+* ARM Primecell Peripherals
+
+ARM, Ltd. Primecell peripherals have a standard id register that can be used to
+identify the peripheral type, vendor, and revision. This value can be used for
+driver matching.
+
+Required properties:
+
+- compatible : should be a specific name for the peripheral and
+ "arm,primecell". The specific name will match the ARM
+ engineering name for the logic block in the form: "arm,pl???"
+
+Optional properties:
+
+- arm,primecell-periphid : Value to override the h/w value with
+- clocks : From common clock binding. First clock is phandle to clock for apb
+ pclk. Additional clocks are optional and specific to those peripherals.
+- clock-names : From common clock binding. Shall be "apb_pclk" for first clock.
+- dmas : From common DMA binding. If present, refers to one or more dma channels.
+- dma-names : From common DMA binding, needs to match the 'dmas' property.
+ Devices with exactly one receive and transmit channel shall name
+ these "rx" and "tx", respectively.
+- pinctrl-<n> : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt
+- pinctrl-names : Names corresponding to the numbered pinctrl states
+- interrupts : one or more interrupt specifiers
+- interrupt-names : names corresponding to the interrupts properties
+
+Example:
+
+serial@fff36000 {
+ compatible = "arm,pl011", "arm,primecell";
+ arm,primecell-periphid = <0x00341011>;
+
+ clocks = <&pclk>;
+ clock-names = "apb_pclk";
+
+ dmas = <&dma-controller 4>, <&dma-controller 5>;
+ dma-names = "rx", "tx";
+
+ pinctrl-0 = <&uart0_default_mux>, <&uart0_default_mode>;
+ pinctrl-1 = <&uart0_sleep_mode>;
+ pinctrl-names = "default","sleep";
+
+ interrupts = <0 11 0x4>;
+};
+
diff --git a/Documentation/devicetree/bindings/arm/psci.txt b/Documentation/devicetree/bindings/arm/psci.txt
new file mode 100644
index 000000000..5aa40ede0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/psci.txt
@@ -0,0 +1,102 @@
+* Power State Coordination Interface (PSCI)
+
+Firmware implementing the PSCI functions described in ARM document number
+ARM DEN 0022A ("Power State Coordination Interface System Software on ARM
+processors") can be used by Linux to initiate various CPU-centric power
+operations.
+
+Issue A of the specification describes functions for CPU suspend, hotplug
+and migration of secure software.
+
+Functions are invoked by trapping to the privilege level of the PSCI
+firmware (specified as part of the binding below) and passing arguments
+in a manner similar to that specified by AAPCS:
+
+ r0 => 32-bit Function ID / return value
+ {r1 - r3} => Parameters
+
+Note that the immediate field of the trapping instruction must be set
+to #0.
+
+
+Main node required properties:
+
+ - compatible : should contain at least one of:
+
+ * "arm,psci" : for implementations complying to PSCI versions prior to
+ 0.2. For these cases function IDs must be provided.
+
+ * "arm,psci-0.2" : for implementations complying to PSCI 0.2. Function
+ IDs are not required and should be ignored by an OS with PSCI 0.2
+ support, but are permitted to be present for compatibility with
+ existing software when "arm,psci" is later in the compatible list.
+
+ - method : The method of calling the PSCI firmware. Permitted
+ values are:
+
+ "smc" : SMC #0, with the register assignments specified
+ in this binding.
+
+ "hvc" : HVC #0, with the register assignments specified
+ in this binding.
+
+Main node optional properties:
+
+ - cpu_suspend : Function ID for CPU_SUSPEND operation
+
+ - cpu_off : Function ID for CPU_OFF operation
+
+ - cpu_on : Function ID for CPU_ON operation
+
+ - migrate : Function ID for MIGRATE operation
+
+Device tree nodes that require usage of PSCI CPU_SUSPEND function (ie idle
+state nodes, as per bindings in [1]) must specify the following properties:
+
+- arm,psci-suspend-param
+ Usage: Required for state nodes[1] if the corresponding
+ idle-states node entry-method property is set
+ to "psci".
+ Value type: <u32>
+ Definition: power_state parameter to pass to the PSCI
+ suspend call.
+
+Example:
+
+Case 1: PSCI v0.1 only.
+
+ psci {
+ compatible = "arm,psci";
+ method = "smc";
+ cpu_suspend = <0x95c10000>;
+ cpu_off = <0x95c10001>;
+ cpu_on = <0x95c10002>;
+ migrate = <0x95c10003>;
+ };
+
+Case 2: PSCI v0.2 only
+
+ psci {
+ compatible = "arm,psci-0.2";
+ method = "smc";
+ };
+
+Case 3: PSCI v0.2 and PSCI v0.1.
+
+ A DTB may provide IDs for use by kernels without PSCI 0.2 support,
+ enabling firmware and hypervisors to support existing and new kernels.
+ These IDs will be ignored by kernels with PSCI 0.2 support, which will
+ use the standard PSCI 0.2 IDs exclusively.
+
+ psci {
+ compatible = "arm,psci-0.2", "arm,psci";
+ method = "hvc";
+
+ cpu_on = < arbitrary value >;
+ cpu_off = < arbitrary value >;
+
+ ...
+ };
+
+[1] Kernel documentation - ARM idle states bindings
+ Documentation/devicetree/bindings/arm/idle-states.txt
diff --git a/Documentation/devicetree/bindings/arm/rockchip.txt b/Documentation/devicetree/bindings/arm/rockchip.txt
new file mode 100644
index 000000000..60d4a1e0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rockchip.txt
@@ -0,0 +1,28 @@
+Rockchip platforms device tree bindings
+---------------------------------------
+
+- MarsBoard RK3066 board:
+ Required root node properties:
+ - compatible = "haoyu,marsboard-rk3066", "rockchip,rk3066a";
+
+- bq Curie 2 tablet:
+ Required root node properties:
+ - compatible = "mundoreader,bq-curie2", "rockchip,rk3066a";
+
+- ChipSPARK Rayeager PX2 board:
+ Required root node properties:
+ - compatible = "chipspark,rayeager-px2", "rockchip,rk3066a";
+
+- Radxa Rock board:
+ Required root node properties:
+ - compatible = "radxa,rock", "rockchip,rk3188";
+
+- Firefly Firefly-RK3288 board:
+ Required root node properties:
+ - compatible = "firefly,firefly-rk3288", "rockchip,rk3288";
+ or
+ - compatible = "firefly,firefly-rk3288-beta", "rockchip,rk3288";
+
+- ChipSPARK PopMetal-RK3288 board:
+ Required root node properties:
+ - compatible = "chipspark,popmetal-rk3288", "rockchip,rk3288";
diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu-sram.txt b/Documentation/devicetree/bindings/arm/rockchip/pmu-sram.txt
new file mode 100644
index 000000000..6b42fda30
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rockchip/pmu-sram.txt
@@ -0,0 +1,16 @@
+Rockchip SRAM for pmu:
+------------------------------
+
+The sram of pmu is used to store the function of resume from maskrom(the 1st
+level loader). This is a common use of the "pmu-sram" because it keeps power
+even in low power states in the system.
+
+Required node properties:
+- compatible : should be "rockchip,rk3288-pmu-sram"
+- reg : physical base address and the size of the registers window
+
+Example:
+ sram@ff720000 {
+ compatible = "rockchip,rk3288-pmu-sram", "mmio-sram";
+ reg = <0xff720000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/rockchip/pmu.txt b/Documentation/devicetree/bindings/arm/rockchip/pmu.txt
new file mode 100644
index 000000000..3ee9b428b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rockchip/pmu.txt
@@ -0,0 +1,16 @@
+Rockchip power-management-unit:
+-------------------------------
+
+The pmu is used to turn off and on different power domains of the SoCs
+This includes the power to the CPU cores.
+
+Required node properties:
+- compatible value : = "rockchip,rk3066-pmu";
+- reg : physical base address and the size of the registers window
+
+Example:
+
+ pmu@20004000 {
+ compatible = "rockchip,rk3066-pmu";
+ reg = <0x20004000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt b/Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt
new file mode 100644
index 000000000..d9416fb8d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rockchip/smp-sram.txt
@@ -0,0 +1,30 @@
+Rockchip SRAM for smp bringup:
+------------------------------
+
+Rockchip's smp-capable SoCs use the first part of the sram for the bringup
+of the cores. Once the core gets powered up it executes the code that is
+residing at the very beginning of the sram.
+
+Therefore a reserved section sub-node has to be added to the mmio-sram
+declaration.
+
+Required sub-node properties:
+- compatible : should be "rockchip,rk3066-smp-sram"
+
+The rest of the properties should follow the generic mmio-sram discription
+found in ../../misc/sram.txt
+
+Example:
+
+ sram: sram@10080000 {
+ compatible = "mmio-sram";
+ reg = <0x10080000 0x10000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ smp-sram@10080000 {
+ compatible = "rockchip,rk3066-smp-sram";
+ reg = <0x10080000 0x50>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
new file mode 100644
index 000000000..3b8fbf3c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
@@ -0,0 +1,19 @@
+ARM Dual Cluster System Configuration Block
+-------------------------------------------
+
+The Dual Cluster System Configuration Block (DCSCB) provides basic
+functionality for controlling clocks, resets and configuration pins in
+the Dual Cluster System implemented by the Real-Time System Model (RTSM).
+
+Required properties:
+
+- compatible : should be "arm,rtsm,dcscb"
+
+- reg : physical base address and the size of the registers window
+
+Example:
+
+ dcscb@60000000 {
+ compatible = "arm,rtsm,dcscb";
+ reg = <0x60000000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/samsung-boards.txt b/Documentation/devicetree/bindings/arm/samsung-boards.txt
new file mode 100644
index 000000000..43589d246
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung-boards.txt
@@ -0,0 +1,27 @@
+* Samsung's Exynos SoC based boards
+
+Required root node properties:
+ - compatible = should be one or more of the following.
+ - "samsung,monk" - for Exynos3250-based Samsung Simband board.
+ - "samsung,rinato" - for Exynos3250-based Samsung Gear2 board.
+ - "samsung,smdkv310" - for Exynos4210-based Samsung SMDKV310 eval board.
+ - "samsung,trats" - for Exynos4210-based Tizen Reference board.
+ - "samsung,universal_c210" - for Exynos4210-based Samsung board.
+ - "samsung,smdk4412", - for Exynos4412-based Samsung SMDK4412 eval board.
+ - "samsung,trats2" - for Exynos4412-based Tizen Reference board.
+ - "samsung,smdk5250" - for Exynos5250-based Samsung SMDK5250 eval board.
+ - "samsung,xyref5260" - for Exynos5260-based Samsung board.
+ - "samsung,smdk5410" - for Exynos5410-based Samsung SMDK5410 eval board.
+ - "samsung,smdk5420" - for Exynos5420-based Samsung SMDK5420 eval board.
+ - "samsung,sd5v1" - for Exynos5440-based Samsung board.
+ - "samsung,ssdk5440" - for Exynos5440-based Samsung board.
+
+Optional:
+ - firmware node, specifying presence and type of secure firmware:
+ - compatible: only "samsung,secure-firmware" is currently supported
+ - reg: address of non-secure SYSRAM used for communication with firmware
+
+ firmware@0203F000 {
+ compatible = "samsung,secure-firmware";
+ reg = <0x0203F000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt b/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt
new file mode 100644
index 000000000..f46ca9a31
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/exynos-adc.txt
@@ -0,0 +1,100 @@
+Samsung Exynos Analog to Digital Converter bindings
+
+The devicetree bindings are for the new ADC driver written for
+Exynos4 and upward SoCs from Samsung.
+
+New driver handles the following
+1. Supports ADC IF found on EXYNOS4412/EXYNOS5250
+ and future SoCs from Samsung
+2. Add ADC driver under iio/adc framework
+3. Also adds the Documentation for device tree bindings
+
+Required properties:
+- compatible: Must be "samsung,exynos-adc-v1"
+ for exynos4412/5250 and s5pv210 controllers.
+ Must be "samsung,exynos-adc-v2" for
+ future controllers.
+ Must be "samsung,exynos3250-adc" for
+ controllers compatible with ADC of Exynos3250.
+ Must be "samsung,exynos7-adc" for
+ the ADC in Exynos7 and compatibles
+ Must be "samsung,s3c2410-adc" for
+ the ADC in s3c2410 and compatibles
+ Must be "samsung,s3c2416-adc" for
+ the ADC in s3c2416 and compatibles
+ Must be "samsung,s3c2440-adc" for
+ the ADC in s3c2440 and compatibles
+ Must be "samsung,s3c2443-adc" for
+ the ADC in s3c2443 and compatibles
+ Must be "samsung,s3c6410-adc" for
+ the ADC in s3c6410 and compatibles
+- reg: List of ADC register address range
+ - The base address and range of ADC register
+ - The base address and range of ADC_PHY register (every
+ SoC except for s3c24xx/s3c64xx ADC)
+- interrupts: Contains the interrupt information for the timer. The
+ format is being dependent on which interrupt controller
+ the Samsung device uses.
+- #io-channel-cells = <1>; As ADC has multiple outputs
+- clocks From common clock bindings: handles to clocks specified
+ in "clock-names" property, in the same order.
+- clock-names From common clock bindings: list of clock input names
+ used by ADC block:
+ - "adc" : ADC bus clock
+ - "sclk" : ADC special clock (only for Exynos3250 and
+ compatible ADC block)
+- vdd-supply VDD input supply.
+
+- samsung,syscon-phandle Contains the PMU system controller node
+ (To access the ADC_PHY register on Exynos5250/5420/5800/3250)
+
+Note: child nodes can be added for auto probing from device tree.
+
+Example: adding device info in dtsi file
+
+adc: adc@12D10000 {
+ compatible = "samsung,exynos-adc-v1";
+ reg = <0x12D10000 0x100>;
+ interrupts = <0 106 0>;
+ #io-channel-cells = <1>;
+ io-channel-ranges;
+
+ clocks = <&clock 303>;
+ clock-names = "adc";
+
+ vdd-supply = <&buck5_reg>;
+ samsung,syscon-phandle = <&pmu_system_controller>;
+};
+
+Example: adding device info in dtsi file for Exynos3250 with additional sclk
+
+adc: adc@126C0000 {
+ compatible = "samsung,exynos3250-adc", "samsung,exynos-adc-v2;
+ reg = <0x126C0000 0x100>;
+ interrupts = <0 137 0>;
+ #io-channel-cells = <1>;
+ io-channel-ranges;
+
+ clocks = <&cmu CLK_TSADC>, <&cmu CLK_SCLK_TSADC>;
+ clock-names = "adc", "sclk";
+
+ vdd-supply = <&buck5_reg>;
+ samsung,syscon-phandle = <&pmu_system_controller>;
+};
+
+Example: Adding child nodes in dts file
+
+adc@12D10000 {
+
+ /* NTC thermistor is a hwmon device */
+ ncp15wb473@0 {
+ compatible = "murata,ncp15wb473";
+ pullup-uv = <1800000>;
+ pullup-ohm = <47000>;
+ pulldown-ohm = <0>;
+ io-channels = <&adc 4>;
+ };
+};
+
+Note: Does not apply to ADC driver under arch/arm/plat-samsung/
+Note: The child node can be added under the adc node or separately.
diff --git a/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt b/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt
new file mode 100644
index 000000000..85c5dfd4a
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/exynos-chipid.txt
@@ -0,0 +1,12 @@
+SAMSUNG Exynos SoCs Chipid driver.
+
+Required properties:
+- compatible : Should at least contain "samsung,exynos4210-chipid".
+
+- reg: offset and length of the register set
+
+Example:
+ chipid@10000000 {
+ compatible = "samsung,exynos4210-chipid";
+ reg = <0x10000000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/samsung/interrupt-combiner.txt b/Documentation/devicetree/bindings/arm/samsung/interrupt-combiner.txt
new file mode 100644
index 000000000..9e5f73412
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/interrupt-combiner.txt
@@ -0,0 +1,52 @@
+* Samsung Exynos Interrupt Combiner Controller
+
+Samsung's Exynos4 architecture includes a interrupt combiner controller which
+can combine interrupt sources as a group and provide a single interrupt request
+for the group. The interrupt request from each group are connected to a parent
+interrupt controller, such as GIC in case of Exynos4210.
+
+The interrupt combiner controller consists of multiple combiners. Up to eight
+interrupt sources can be connected to a combiner. The combiner outputs one
+combined interrupt for its eight interrupt sources. The combined interrupt
+is usually connected to a parent interrupt controller.
+
+A single node in the device tree is used to describe the interrupt combiner
+controller module (which includes multiple combiners). A combiner in the
+interrupt controller module shares config/control registers with other
+combiners. For example, a 32-bit interrupt enable/disable config register
+can accommodate up to 4 interrupt combiners (with each combiner supporting
+up to 8 interrupt sources).
+
+Required properties:
+- compatible: should be "samsung,exynos4210-combiner".
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt-cells: should be <2>. The meaning of the cells are
+ * First Cell: Combiner Group Number.
+ * Second Cell: Interrupt number within the group.
+- reg: Base address and size of interrupt combiner registers.
+- interrupts: The list of interrupts generated by the combiners which are then
+ connected to a parent interrupt controller. The format of the interrupt
+ specifier depends in the interrupt parent controller.
+
+Optional properties:
+- samsung,combiner-nr: The number of interrupt combiners supported. If this
+ property is not specified, the default number of combiners is assumed
+ to be 16.
+- interrupt-parent: pHandle of the parent interrupt controller, if not
+ inherited from the parent node.
+
+
+Example:
+
+ The following is a an example from the Exynos4210 SoC dtsi file.
+
+ combiner:interrupt-controller@10440000 {
+ compatible = "samsung,exynos4210-combiner";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x10440000 0x1000>;
+ interrupts = <0 0 0>, <0 1 0>, <0 2 0>, <0 3 0>,
+ <0 4 0>, <0 5 0>, <0 6 0>, <0 7 0>,
+ <0 8 0>, <0 9 0>, <0 10 0>, <0 11 0>,
+ <0 12 0>, <0 13 0>, <0 14 0>, <0 15 0>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/samsung/pmu.txt b/Documentation/devicetree/bindings/arm/samsung/pmu.txt
new file mode 100644
index 000000000..2d6356d8d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/pmu.txt
@@ -0,0 +1,69 @@
+SAMSUNG Exynos SoC series PMU Registers
+
+Properties:
+ - compatible : should contain two values. First value must be one from following list:
+ - "samsung,exynos3250-pmu" - for Exynos3250 SoC,
+ - "samsung,exynos4210-pmu" - for Exynos4210 SoC,
+ - "samsung,exynos4212-pmu" - for Exynos4212 SoC,
+ - "samsung,exynos4412-pmu" - for Exynos4412 SoC,
+ - "samsung,exynos5250-pmu" - for Exynos5250 SoC,
+ - "samsung,exynos5260-pmu" - for Exynos5260 SoC.
+ - "samsung,exynos5410-pmu" - for Exynos5410 SoC,
+ - "samsung,exynos5420-pmu" - for Exynos5420 SoC.
+ - "samsung,exynos7-pmu" - for Exynos7 SoC.
+ second value must be always "syscon".
+
+ - reg : offset and length of the register set.
+
+ - #clock-cells : must be <1>, since PMU requires once cell as clock specifier.
+ The single specifier cell is used as index to list of clocks
+ provided by PMU, which is currently:
+ 0 : SoC clock output (CLKOUT pin)
+
+ - clock-names : list of clock names for particular CLKOUT mux inputs in
+ following format:
+ "clkoutN", where N is a decimal number corresponding to
+ CLKOUT mux control bits value for given input, e.g.
+ "clkout0", "clkout7", "clkout15".
+
+ - clocks : list of phandles and specifiers to all input clocks listed in
+ clock-names property.
+
+Optional properties:
+
+Some PMUs are capable of behaving as an interrupt controller (mostly
+to wake up a suspended PMU). In which case, they can have the
+following properties:
+
+- interrupt-controller: indicate that said PMU is an interrupt controller
+
+- #interrupt-cells: must be identical to the that of the parent interrupt
+ controller.
+
+- interrupt-parent: a phandle indicating which interrupt controller
+ this PMU signals interrupts to.
+
+Example :
+pmu_system_controller: system-controller@10040000 {
+ compatible = "samsung,exynos5250-pmu", "syscon";
+ reg = <0x10040000 0x5000>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ interrupt-parent = <&gic>;
+ #clock-cells = <1>;
+ clock-names = "clkout0", "clkout1", "clkout2", "clkout3",
+ "clkout4", "clkout8", "clkout9";
+ clocks = <&clock CLK_OUT_DMC>, <&clock CLK_OUT_TOP>,
+ <&clock CLK_OUT_LEFTBUS>, <&clock CLK_OUT_RIGHTBUS>,
+ <&clock CLK_OUT_CPU>, <&clock CLK_XXTI>,
+ <&clock CLK_XUSBXTI>;
+};
+
+Example of clock consumer :
+
+usb3503: usb3503@08 {
+ /* ... */
+ clock-names = "refclk";
+ clocks = <&pmu_system_controller 0>;
+ /* ... */
+};
diff --git a/Documentation/devicetree/bindings/arm/samsung/sysreg.txt b/Documentation/devicetree/bindings/arm/samsung/sysreg.txt
new file mode 100644
index 000000000..4fced6e9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/samsung/sysreg.txt
@@ -0,0 +1,19 @@
+SAMSUNG S5P/Exynos SoC series System Registers (SYSREG)
+
+Properties:
+ - compatible : should contain two values. First value must be one from following list:
+ - "samsung,exynos4-sysreg" - for Exynos4 based SoCs,
+ - "samsung,exynos5-sysreg" - for Exynos5 based SoCs.
+ second value must be always "syscon".
+ - reg : offset and length of the register set.
+
+Example:
+ syscon@10010000 {
+ compatible = "samsung,exynos4-sysreg", "syscon";
+ reg = <0x10010000 0x400>;
+ };
+
+ syscon@10050000 {
+ compatible = "samsung,exynos5-sysreg", "syscon";
+ reg = <0x10050000 0x5000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/shmobile.txt b/Documentation/devicetree/bindings/arm/shmobile.txt
new file mode 100644
index 000000000..c4f19b2e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/shmobile.txt
@@ -0,0 +1,63 @@
+Renesas SH-Mobile, R-Mobile, and R-Car Platform Device Tree Bindings
+--------------------------------------------------------------------
+
+SoCs:
+
+ - Emma Mobile EV2
+ compatible = "renesas,emev2"
+ - RZ/A1H (R7S72100)
+ compatible = "renesas,r7s72100"
+ - SH-Mobile AG5 (R8A73A00/SH73A0)
+ compatible = "renesas,sh73a0"
+ - R-Mobile APE6 (R8A73A40)
+ compatible = "renesas,r8a73a4"
+ - R-Mobile A1 (R8A77400)
+ compatible = "renesas,r8a7740"
+ - R-Car M1A (R8A77781)
+ compatible = "renesas,r8a7778"
+ - R-Car H1 (R8A77790)
+ compatible = "renesas,r8a7779"
+ - R-Car H2 (R8A77900)
+ compatible = "renesas,r8a7790"
+ - R-Car M2-W (R8A77910)
+ compatible = "renesas,r8a7791"
+ - R-Car V2H (R8A77920)
+ compatible = "renesas,r8a7792"
+ - R-Car M2-N (R8A77930)
+ compatible = "renesas,r8a7793"
+ - R-Car E2 (R8A77940)
+ compatible = "renesas,r8a7794"
+
+
+Boards:
+
+ - Alt
+ compatible = "renesas,alt", "renesas,r8a7794"
+ - APE6-EVM
+ compatible = "renesas,ape6evm", "renesas,r8a73a4"
+ - Atmark Techno Armadillo-800 EVA
+ compatible = "renesas,armadillo800eva"
+ - BOCK-W
+ compatible = "renesas,bockw", "renesas,r8a7778"
+ - BOCK-W - Reference Device Tree Implementation
+ compatible = "renesas,bockw-reference", "renesas,r8a7778"
+ - Genmai (RTK772100BC00000BR)
+ compatible = "renesas,genmai", "renesas,r7s72100"
+ - Gose
+ compatible = "renesas,gose", "renesas,r8a7793"
+ - Henninger
+ compatible = "renesas,henninger", "renesas,r8a7791"
+ - Koelsch (RTP0RC7791SEB00010S)
+ compatible = "renesas,koelsch", "renesas,r8a7791"
+ - Kyoto Microcomputer Co. KZM-A9-Dual
+ compatible = "renesas,kzm9d", "renesas,emev2"
+ - Kyoto Microcomputer Co. KZM-A9-GT
+ compatible = "renesas,kzm9g", "renesas,sh73a0"
+ - Lager (RTP0RC7790SEB00010S)
+ compatible = "renesas,lager", "renesas,r8a7790"
+ - Marzen
+ compatible = "renesas,marzen", "renesas,r8a7779"
+
+Note: Reference Device Tree Implementations are temporary implementations
+ to ease the migration from platform devices to Device Tree, and are
+ intended to be removed in the future.
diff --git a/Documentation/devicetree/bindings/arm/sirf.txt b/Documentation/devicetree/bindings/arm/sirf.txt
new file mode 100644
index 000000000..7b28ee6fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sirf.txt
@@ -0,0 +1,11 @@
+CSR SiRFprimaII and SiRFmarco device tree bindings.
+========================================
+
+Required root node properties:
+ - compatible:
+ - "sirf,atlas6-cb" : atlas6 "cb" evaluation board
+ - "sirf,atlas6" : atlas6 device based board
+ - "sirf,atlas7-cb" : atlas7 "cb" evaluation board
+ - "sirf,atlas7" : atlas7 device based board
+ - "sirf,prima2-cb" : prima2 "cb" evaluation board
+ - "sirf,prima2" : prima2 device based board
diff --git a/Documentation/devicetree/bindings/arm/spear-misc.txt b/Documentation/devicetree/bindings/arm/spear-misc.txt
new file mode 100644
index 000000000..cf649827f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spear-misc.txt
@@ -0,0 +1,9 @@
+SPEAr Misc configuration
+===========================
+SPEAr SOCs have some miscellaneous registers which are used to configure
+few properties of different peripheral controllers.
+
+misc node required properties:
+
+- compatible Should be "st,spear1340-misc", "syscon".
+- reg: Address range of misc space upto 8K
diff --git a/Documentation/devicetree/bindings/arm/spear-timer.txt b/Documentation/devicetree/bindings/arm/spear-timer.txt
new file mode 100644
index 000000000..c0017221c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spear-timer.txt
@@ -0,0 +1,18 @@
+* SPEAr ARM Timer
+
+** Timer node required properties:
+
+- compatible : Should be:
+ "st,spear-timer"
+- reg: Address range of the timer registers
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupt: Should contain the timer interrupt number
+
+Example:
+
+ timer@f0000000 {
+ compatible = "st,spear-timer";
+ reg = <0xf0000000 0x400>;
+ interrupts = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/spear.txt b/Documentation/devicetree/bindings/arm/spear.txt
new file mode 100644
index 000000000..0d42949df
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spear.txt
@@ -0,0 +1,26 @@
+ST SPEAr Platforms Device Tree Bindings
+---------------------------------------
+
+Boards with the ST SPEAr600 SoC shall have the following properties:
+Required root node property:
+compatible = "st,spear600";
+
+Boards with the ST SPEAr300 SoC shall have the following properties:
+Required root node property:
+compatible = "st,spear300";
+
+Boards with the ST SPEAr310 SoC shall have the following properties:
+Required root node property:
+compatible = "st,spear310";
+
+Boards with the ST SPEAr320 SoC shall have the following properties:
+Required root node property:
+compatible = "st,spear320";
+
+Boards with the ST SPEAr1310 SoC shall have the following properties:
+Required root node property:
+compatible = "st,spear1310";
+
+Boards with the ST SPEAr1340 SoC shall have the following properties:
+Required root node property:
+compatible = "st,spear1340";
diff --git a/Documentation/devicetree/bindings/arm/spear/shirq.txt b/Documentation/devicetree/bindings/arm/spear/shirq.txt
new file mode 100644
index 000000000..715a013ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/spear/shirq.txt
@@ -0,0 +1,48 @@
+* SPEAr Shared IRQ layer (shirq)
+
+SPEAr3xx architecture includes shared/multiplexed irqs for certain set
+of devices. The multiplexor provides a single interrupt to parent
+interrupt controller (VIC) on behalf of a group of devices.
+
+There can be multiple groups available on SPEAr3xx variants but not
+exceeding 4. The number of devices in a group can differ, further they
+may share same set of status/mask registers spanning across different
+bit masks. Also in some cases the group may not have enable or other
+registers. This makes software little complex.
+
+A single node in the device tree is used to describe the shared
+interrupt multiplexor (one node for all groups). A group in the
+interrupt controller shares config/control registers with other groups.
+For example, a 32-bit interrupt enable/disable config register can
+accommodate up to 4 interrupt groups.
+
+Required properties:
+ - compatible: should be, either of
+ - "st,spear300-shirq"
+ - "st,spear310-shirq"
+ - "st,spear320-shirq"
+ - interrupt-controller: Identifies the node as an interrupt controller.
+ - #interrupt-cells: should be <1> which basically contains the offset
+ (starting from 0) of interrupts for all the groups.
+ - reg: Base address and size of shirq registers.
+ - interrupts: The list of interrupts generated by the groups which are
+ then connected to a parent interrupt controller. Each group is
+ associated with one of the interrupts, hence number of interrupts (to
+ parent) is equal to number of groups. The format of the interrupt
+ specifier depends in the interrupt parent controller.
+
+ Optional properties:
+ - interrupt-parent: pHandle of the parent interrupt controller, if not
+ inherited from the parent node.
+
+Example:
+
+The following is an example from the SPEAr320 SoC dtsi file.
+
+shirq: interrupt-controller@0xb3000000 {
+ compatible = "st,spear320-shirq";
+ reg = <0xb3000000 0x1000>;
+ interrupts = <28 29 30 1>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+};
diff --git a/Documentation/devicetree/bindings/arm/sprd.txt b/Documentation/devicetree/bindings/arm/sprd.txt
new file mode 100644
index 000000000..31a629dc7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sprd.txt
@@ -0,0 +1,11 @@
+Spreadtrum SoC Platforms Device Tree Bindings
+----------------------------------------------------
+
+Sharkl64 is a Spreadtrum's SoC Platform which is based
+on ARM 64-bit processor.
+
+SC9836 openphone board with SC9836 SoC based on the
+Sharkl64 Platform shall have the following properties.
+
+Required root node properties:
+ - compatible = "sprd,sc9836-openphone", "sprd,sc9836";
diff --git a/Documentation/devicetree/bindings/arm/ste-nomadik.txt b/Documentation/devicetree/bindings/arm/ste-nomadik.txt
new file mode 100644
index 000000000..2fdff5a80
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ste-nomadik.txt
@@ -0,0 +1,38 @@
+ST-Ericsson Nomadik Device Tree Bindings
+
+For various board the "board" node may contain specific properties
+that pertain to this particular board, such as board-specific GPIOs.
+
+Required root node property: src
+- Nomadik System and reset controller used for basic chip control, clock
+ and reset line control.
+- compatible: must be "stericsson,nomadik,src"
+
+Boards with the Nomadik SoC include:
+
+Nomadik NHK-15 board manufactured by ST Microelectronics:
+
+Required root node property:
+
+compatible="st,nomadik-nhk-15";
+
+S8815 "MiniKit" manufactured by Calao Systems:
+
+Required root node property:
+
+compatible="calaosystems,usb-s8815";
+
+Required node: usb-s8815
+
+Example:
+
+usb-s8815 {
+ ethernet-gpio {
+ gpios = <&gpio3 19 0x1>;
+ interrupts = <19 0x1>;
+ interrupt-parent = <&gpio3>;
+ };
+ mmcsd-gpio {
+ gpios = <&gpio3 16 0x1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/ste-u300.txt b/Documentation/devicetree/bindings/arm/ste-u300.txt
new file mode 100644
index 000000000..d11d80006
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ste-u300.txt
@@ -0,0 +1,46 @@
+ST-Ericsson U300 Device Tree Bindings
+
+For various board the "board" node may contain specific properties
+that pertain to this particular board, such as board-specific GPIOs
+or board power regulator supplies.
+
+Required root node property:
+
+compatible="stericsson,u300";
+
+Required node: syscon
+This contains the system controller.
+- compatible: must be "stericsson,u300-syscon".
+- reg: the base address and size of the system controller.
+
+Boards with the U300 SoC include:
+
+S365 "Small Board U365":
+
+Required node: s365
+This contains the board-specific information.
+- compatible: must be "stericsson,s365".
+- vana15-supply: the regulator supplying the 1.5V to drive the
+ board.
+- syscon: a pointer to the syscon node so we can access the
+ syscon registers to set the board as self-powered.
+
+Example:
+
+/ {
+ model = "ST-Ericsson U300";
+ compatible = "stericsson,u300";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ s365 {
+ compatible = "stericsson,s365";
+ vana15-supply = <&ab3100_ldo_d_reg>;
+ syscon = <&syscon>;
+ };
+
+ syscon: syscon@c0011000 {
+ compatible = "stericsson,u300-syscon";
+ reg = <0xc0011000 0x1000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/arm/sti.txt b/Documentation/devicetree/bindings/arm/sti.txt
new file mode 100644
index 000000000..8d27f6b08
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sti.txt
@@ -0,0 +1,23 @@
+ST STi Platforms Device Tree Bindings
+---------------------------------------
+
+Boards with the ST STiH415 SoC shall have the following properties:
+Required root node property:
+compatible = "st,stih415";
+
+Boards with the ST STiH416 SoC shall have the following properties:
+Required root node property:
+compatible = "st,stih416";
+
+Boards with the ST STiH407 SoC shall have the following properties:
+Required root node property:
+compatible = "st,stih407";
+
+Boards with the ST STiH410 SoC shall have the following properties:
+Required root node property:
+compatible = "st,stih410";
+
+Boards with the ST STiH418 SoC shall have the following properties:
+Required root node property:
+compatible = "st,stih418";
+
diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt
new file mode 100644
index 000000000..42941fdef
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/sunxi.txt
@@ -0,0 +1,12 @@
+Allwinner sunXi Platforms Device Tree Bindings
+
+Each device tree must specify which Allwinner SoC it uses,
+using one of the following compatible strings:
+
+ allwinner,sun4i-a10
+ allwinner,sun5i-a10s
+ allwinner,sun5i-a13
+ allwinner,sun6i-a31
+ allwinner,sun7i-a20
+ allwinner,sun8i-a23
+ allwinner,sun9i-a80
diff --git a/Documentation/devicetree/bindings/arm/tegra.txt b/Documentation/devicetree/bindings/arm/tegra.txt
new file mode 100644
index 000000000..73278c6d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra.txt
@@ -0,0 +1,41 @@
+NVIDIA Tegra device tree bindings
+-------------------------------------------
+
+SoCs
+-------------------------------------------
+
+Each device tree must specify which Tegra SoC it uses, using one of the
+following compatible values:
+
+ nvidia,tegra20
+ nvidia,tegra30
+
+Boards
+-------------------------------------------
+
+Each device tree must specify which one or more of the following
+board-specific compatible values:
+
+ ad,medcom-wide
+ ad,plutux
+ ad,tamonten
+ ad,tec
+ compal,paz00
+ compulab,trimslice
+ nvidia,beaver
+ nvidia,cardhu
+ nvidia,cardhu-a02
+ nvidia,cardhu-a04
+ nvidia,harmony
+ nvidia,seaboard
+ nvidia,ventana
+ nvidia,whistler
+ toradex,apalis_t30
+ toradex,apalis_t30-eval
+ toradex,colibri_t20-512
+ toradex,iris
+
+Trusted Foundations
+-------------------------------------------
+Tegra supports the Trusted Foundation secure monitor. See the
+"tlm,trusted-foundations" binding's documentation for more details.
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt
new file mode 100644
index 000000000..9a4295b54
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-ahb.txt
@@ -0,0 +1,17 @@
+NVIDIA Tegra AHB
+
+Required properties:
+- compatible : For Tegra20, must contain "nvidia,tegra20-ahb". For
+ Tegra30, must contain "nvidia,tegra30-ahb". Otherwise, must contain
+ '"nvidia,<chip>-ahb", "nvidia,tegra30-ahb"' where <chip> is tegra124,
+ tegra132, or tegra210.
+- reg : Should contain 1 register ranges(address and length). For
+ Tegra20, Tegra30, and Tegra114 chips, the value must be <0x6000c004
+ 0x10c>. For Tegra124, Tegra132 and Tegra210 chips, the value should
+ be be <0x6000c000 0x150>.
+
+Example (for a Tegra20 chip):
+ ahb: ahb@6000c004 {
+ compatible = "nvidia,tegra20-ahb";
+ reg = <0x6000c004 0x10c>; /* AHB Arbitration + Gizmo Controller */
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt
new file mode 100644
index 000000000..4c33b29dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-emc.txt
@@ -0,0 +1,100 @@
+Embedded Memory Controller
+
+Properties:
+- name : Should be emc
+- #address-cells : Should be 1
+- #size-cells : Should be 0
+- compatible : Should contain "nvidia,tegra20-emc".
+- reg : Offset and length of the register set for the device
+- nvidia,use-ram-code : If present, the sub-nodes will be addressed
+ and chosen using the ramcode board selector. If omitted, only one
+ set of tables can be present and said tables will be used
+ irrespective of ram-code configuration.
+
+Child device nodes describe the memory settings for different configurations and clock rates.
+
+Example:
+
+ memory-controller@7000f400 {
+ #address-cells = < 1 >;
+ #size-cells = < 0 >;
+ compatible = "nvidia,tegra20-emc";
+ reg = <0x7000f4000 0x200>;
+ }
+
+
+Embedded Memory Controller ram-code table
+
+If the emc node has the nvidia,use-ram-code property present, then the
+next level of nodes below the emc table are used to specify which settings
+apply for which ram-code settings.
+
+If the emc node lacks the nvidia,use-ram-code property, this level is omitted
+and the tables are stored directly under the emc node (see below).
+
+Properties:
+
+- name : Should be emc-tables
+- nvidia,ram-code : the binary representation of the ram-code board strappings
+ for which this node (and children) are valid.
+
+
+
+Embedded Memory Controller configuration table
+
+This is a table containing the EMC register settings for the various
+operating speeds of the memory controller. They are always located as
+subnodes of the emc controller node.
+
+There are two ways of specifying which tables to use:
+
+* The simplest is if there is just one set of tables in the device tree,
+ and they will always be used (based on which frequency is used).
+ This is the preferred method, especially when firmware can fill in
+ this information based on the specific system information and just
+ pass it on to the kernel.
+
+* The slightly more complex one is when more than one memory configuration
+ might exist on the system. The Tegra20 platform handles this during
+ early boot by selecting one out of possible 4 memory settings based
+ on a 2-pin "ram code" bootstrap setting on the board. The values of
+ these strappings can be read through a register in the SoC, and thus
+ used to select which tables to use.
+
+Properties:
+- name : Should be emc-table
+- compatible : Should contain "nvidia,tegra20-emc-table".
+- reg : either an opaque enumerator to tell different tables apart, or
+ the valid frequency for which the table should be used (in kHz).
+- clock-frequency : the clock frequency for the EMC at which this
+ table should be used (in kHz).
+- nvidia,emc-registers : a 46 word array of EMC registers to be programmed
+ for operation at the 'clock-frequency' setting.
+ The order and contents of the registers are:
+ RC, RFC, RAS, RP, R2W, W2R, R2P, W2P, RD_RCD, WR_RCD, RRD, REXT,
+ WDV, QUSE, QRST, QSAFE, RDV, REFRESH, BURST_REFRESH_NUM, PDEX2WR,
+ PDEX2RD, PCHG2PDEN, ACT2PDEN, AR2PDEN, RW2PDEN, TXSR, TCKE, TFAW,
+ TRPAB, TCLKSTABLE, TCLKSTOP, TREFBW, QUSE_EXTRA, FBIO_CFG6, ODT_WRITE,
+ ODT_READ, FBIO_CFG5, CFG_DIG_DLL, DLL_XFORM_DQS, DLL_XFORM_QUSE,
+ ZCAL_REF_CNT, ZCAL_WAIT_CNT, AUTO_CAL_INTERVAL, CFG_CLKTRIM_0,
+ CFG_CLKTRIM_1, CFG_CLKTRIM_2
+
+ emc-table@166000 {
+ reg = <166000>;
+ compatible = "nvidia,tegra20-emc-table";
+ clock-frequency = < 166000 >;
+ nvidia,emc-registers = < 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 >;
+ };
+
+ emc-table@333000 {
+ reg = <333000>;
+ compatible = "nvidia,tegra20-emc-table";
+ clock-frequency = < 333000 >;
+ nvidia,emc-registers = < 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+ 0 0 0 0 >;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt
new file mode 100644
index 000000000..ccf0adddc
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-flowctrl.txt
@@ -0,0 +1,12 @@
+NVIDIA Tegra Flow Controller
+
+Required properties:
+- compatible: Should be "nvidia,tegra<chip>-flowctrl"
+- reg: Should contain one register range (address and length)
+
+Example:
+
+ flow-controller@60007000 {
+ compatible = "nvidia,tegra20-flowctrl";
+ reg = <0x60007000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
new file mode 100644
index 000000000..866d93421
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-mc.txt
@@ -0,0 +1,16 @@
+NVIDIA Tegra20 MC(Memory Controller)
+
+Required properties:
+- compatible : "nvidia,tegra20-mc"
+- reg : Should contain 2 register ranges(address and length); see the
+ example below. Note that the MC registers are interleaved with the
+ GART registers, and hence must be represented as multiple ranges.
+- interrupts : Should contain MC General interrupt.
+
+Example:
+ memory-controller@0x7000f000 {
+ compatible = "nvidia,tegra20-mc";
+ reg = <0x7000f000 0x024
+ 0x7000f03c 0x3c4>;
+ interrupts = <0 77 0x04>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
new file mode 100644
index 000000000..02c27004d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra20-pmc.txt
@@ -0,0 +1,115 @@
+NVIDIA Tegra Power Management Controller (PMC)
+
+The PMC block interacts with an external Power Management Unit. The PMC
+mostly controls the entry and exit of the system from different sleep
+modes. It provides power-gating controllers for SoC and CPU power-islands.
+
+Required properties:
+- name : Should be pmc
+- compatible : For Tegra20, must contain "nvidia,tegra20-pmc". For Tegra30,
+ must contain "nvidia,tegra30-pmc". For Tegra114, must contain
+ "nvidia,tegra114-pmc". For Tegra124, must contain "nvidia,tegra124-pmc".
+ Otherwise, must contain "nvidia,<chip>-pmc", plus at least one of the
+ above, where <chip> is tegra132.
+- reg : Offset and length of the register set for the device
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ "pclk" (The Tegra clock of that name),
+ "clk32k_in" (The 32KHz clock input to Tegra).
+
+Optional properties:
+- nvidia,invert-interrupt : If present, inverts the PMU interrupt signal.
+ The PMU is an external Power Management Unit, whose interrupt output
+ signal is fed into the PMC. This signal is optionally inverted, and then
+ fed into the ARM GIC. The PMC is not involved in the detection or
+ handling of this interrupt signal, merely its inversion.
+- nvidia,suspend-mode : The suspend mode that the platform should use.
+ Valid values are 0, 1 and 2:
+ 0 (LP0): CPU + Core voltage off and DRAM in self-refresh
+ 1 (LP1): CPU voltage off and DRAM in self-refresh
+ 2 (LP2): CPU voltage off
+- nvidia,core-power-req-active-high : Boolean, core power request active-high
+- nvidia,sys-clock-req-active-high : Boolean, system clock request active-high
+- nvidia,combined-power-req : Boolean, combined power request for CPU & Core
+- nvidia,cpu-pwr-good-en : Boolean, CPU power good signal (from PMIC to PMC)
+ is enabled.
+
+Required properties when nvidia,suspend-mode is specified:
+- nvidia,cpu-pwr-good-time : CPU power good time in uS.
+- nvidia,cpu-pwr-off-time : CPU power off time in uS.
+- nvidia,core-pwr-good-time : <Oscillator-stable-time Power-stable-time>
+ Core power good time in uS.
+- nvidia,core-pwr-off-time : Core power off time in uS.
+
+Required properties when nvidia,suspend-mode=<0>:
+- nvidia,lp0-vec : <start length> Starting address and length of LP0 vector
+ The LP0 vector contains the warm boot code that is executed by AVP when
+ resuming from the LP0 state. The AVP (Audio-Video Processor) is an ARM7
+ processor and always being the first boot processor when chip is power on
+ or resume from deep sleep mode. When the system is resumed from the deep
+ sleep mode, the warm boot code will restore some PLLs, clocks and then
+ bring up CPU0 for resuming the system.
+
+Hardware-triggered thermal reset:
+On Tegra30, Tegra114 and Tegra124, if the 'i2c-thermtrip' subnode exists,
+hardware-triggered thermal reset will be enabled.
+
+Required properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'):
+- nvidia,i2c-controller-id : ID of I2C controller to send poweroff command to. Valid values are
+ described in section 9.2.148 "APBDEV_PMC_SCRATCH53_0" of the
+ Tegra K1 Technical Reference Manual.
+- nvidia,bus-addr : Bus address of the PMU on the I2C bus
+- nvidia,reg-addr : I2C register address to write poweroff command to
+- nvidia,reg-data : Poweroff command to write to PMU
+
+Optional properties for hardware-triggered thermal reset (inside 'i2c-thermtrip'):
+- nvidia,pinmux-id : Pinmux used by the hardware when issuing poweroff command.
+ Defaults to 0. Valid values are described in section 12.5.2
+ "Pinmux Support" of the Tegra4 Technical Reference Manual.
+
+Example:
+
+/ SoC dts including file
+pmc@7000f400 {
+ compatible = "nvidia,tegra20-pmc";
+ reg = <0x7000e400 0x400>;
+ clocks = <&tegra_car 110>, <&clk32k_in>;
+ clock-names = "pclk", "clk32k_in";
+ nvidia,invert-interrupt;
+ nvidia,suspend-mode = <1>;
+ nvidia,cpu-pwr-good-time = <2000>;
+ nvidia,cpu-pwr-off-time = <100>;
+ nvidia,core-pwr-good-time = <3845 3845>;
+ nvidia,core-pwr-off-time = <458>;
+ nvidia,core-power-req-active-high;
+ nvidia,sys-clock-req-active-high;
+ nvidia,lp0-vec = <0xbdffd000 0x2000>;
+};
+
+/ Tegra board dts file
+{
+ ...
+ pmc@7000f400 {
+ i2c-thermtrip {
+ nvidia,i2c-controller-id = <4>;
+ nvidia,bus-addr = <0x40>;
+ nvidia,reg-addr = <0x36>;
+ nvidia,reg-data = <0x2>;
+ };
+ };
+ ...
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ clk32k_in: clock {
+ compatible = "fixed-clock";
+ reg=<0>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+ ...
+};
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
new file mode 100644
index 000000000..ea670a5d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-actmon.txt
@@ -0,0 +1,32 @@
+NVIDIA Tegra Activity Monitor
+
+The activity monitor block collects statistics about the behaviour of other
+components in the system. This information can be used to derive the rate at
+which the external memory needs to be clocked in order to serve all requests
+from the monitored clients.
+
+Required properties:
+- compatible: should be "nvidia,tegra<chip>-actmon"
+- reg: offset and length of the register set for the device
+- interrupts: standard interrupt property
+- clocks: Must contain a phandle and clock specifier pair for each entry in
+clock-names. See ../../clock/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - actmon
+ - emc
+- resets: Must contain an entry for each entry in reset-names. See
+../../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - actmon
+
+Example:
+ actmon@6000c800 {
+ compatible = "nvidia,tegra124-actmon";
+ reg = <0x0 0x6000c800 0x0 0x400>;
+ interrupts = <GIC_SPI 45 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_ACTMON>,
+ <&tegra_car TEGRA124_CLK_EMC>;
+ clock-names = "actmon", "emc";
+ resets = <&tegra_car 119>;
+ reset-names = "actmon";
+ };
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt
new file mode 100644
index 000000000..bdf1a6124
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra30-mc.txt
@@ -0,0 +1,18 @@
+NVIDIA Tegra30 MC(Memory Controller)
+
+Required properties:
+- compatible : "nvidia,tegra30-mc"
+- reg : Should contain 4 register ranges(address and length); see the
+ example below. Note that the MC registers are interleaved with the
+ SMMU registers, and hence must be represented as multiple ranges.
+- interrupts : Should contain MC General interrupt.
+
+Example:
+ memory-controller {
+ compatible = "nvidia,tegra30-mc";
+ reg = <0x7000f000 0x010
+ 0x7000f03c 0x1b4
+ 0x7000f200 0x028
+ 0x7000f284 0x17c>;
+ interrupts = <0 77 0x04>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/topology.txt b/Documentation/devicetree/bindings/arm/topology.txt
new file mode 100644
index 000000000..1061faf5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/topology.txt
@@ -0,0 +1,475 @@
+===========================================
+ARM topology binding description
+===========================================
+
+===========================================
+1 - Introduction
+===========================================
+
+In an ARM system, the hierarchy of CPUs is defined through three entities that
+are used to describe the layout of physical CPUs in the system:
+
+- cluster
+- core
+- thread
+
+The cpu nodes (bindings defined in [1]) represent the devices that
+correspond to physical CPUs and are to be mapped to the hierarchy levels.
+
+The bottom hierarchy level sits at core or thread level depending on whether
+symmetric multi-threading (SMT) is supported or not.
+
+For instance in a system where CPUs support SMT, "cpu" nodes represent all
+threads existing in the system and map to the hierarchy level "thread" above.
+In systems where SMT is not supported "cpu" nodes represent all cores present
+in the system and map to the hierarchy level "core" above.
+
+ARM topology bindings allow one to associate cpu nodes with hierarchical groups
+corresponding to the system hierarchy; syntactically they are defined as device
+tree nodes.
+
+The remainder of this document provides the topology bindings for ARM, based
+on the ePAPR standard, available from:
+
+http://www.power.org/documentation/epapr-version-1-1/
+
+If not stated otherwise, whenever a reference to a cpu node phandle is made its
+value must point to a cpu node compliant with the cpu node bindings as
+documented in [1].
+A topology description containing phandles to cpu nodes that are not compliant
+with bindings standardized in [1] is therefore considered invalid.
+
+===========================================
+2 - cpu-map node
+===========================================
+
+The ARM CPU topology is defined within the cpu-map node, which is a direct
+child of the cpus node and provides a container where the actual topology
+nodes are listed.
+
+- cpu-map node
+
+ Usage: Optional - On ARM SMP systems provide CPUs topology to the OS.
+ ARM uniprocessor systems do not require a topology
+ description and therefore should not define a
+ cpu-map node.
+
+ Description: The cpu-map node is just a container node where its
+ subnodes describe the CPU topology.
+
+ Node name must be "cpu-map".
+
+ The cpu-map node's parent node must be the cpus node.
+
+ The cpu-map node's child nodes can be:
+
+ - one or more cluster nodes
+
+ Any other configuration is considered invalid.
+
+The cpu-map node can only contain three types of child nodes:
+
+- cluster node
+- core node
+- thread node
+
+whose bindings are described in paragraph 3.
+
+The nodes describing the CPU topology (cluster/core/thread) can only
+be defined within the cpu-map node and every core/thread in the system
+must be defined within the topology. Any other configuration is
+invalid and therefore must be ignored.
+
+===========================================
+2.1 - cpu-map child nodes naming convention
+===========================================
+
+cpu-map child nodes must follow a naming convention where the node name
+must be "clusterN", "coreN", "threadN" depending on the node type (ie
+cluster/core/thread) (where N = {0, 1, ...} is the node number; nodes which
+are siblings within a single common parent node must be given a unique and
+sequential N value, starting from 0).
+cpu-map child nodes which do not share a common parent node can have the same
+name (ie same number N as other cpu-map child nodes at different device tree
+levels) since name uniqueness will be guaranteed by the device tree hierarchy.
+
+===========================================
+3 - cluster/core/thread node bindings
+===========================================
+
+Bindings for cluster/cpu/thread nodes are defined as follows:
+
+- cluster node
+
+ Description: must be declared within a cpu-map node, one node
+ per cluster. A system can contain several layers of
+ clustering and cluster nodes can be contained in parent
+ cluster nodes.
+
+ The cluster node name must be "clusterN" as described in 2.1 above.
+ A cluster node can not be a leaf node.
+
+ A cluster node's child nodes must be:
+
+ - one or more cluster nodes; or
+ - one or more core nodes
+
+ Any other configuration is considered invalid.
+
+- core node
+
+ Description: must be declared in a cluster node, one node per core in
+ the cluster. If the system does not support SMT, core
+ nodes are leaf nodes, otherwise they become containers of
+ thread nodes.
+
+ The core node name must be "coreN" as described in 2.1 above.
+
+ A core node must be a leaf node if SMT is not supported.
+
+ Properties for core nodes that are leaf nodes:
+
+ - cpu
+ Usage: required
+ Value type: <phandle>
+ Definition: a phandle to the cpu node that corresponds to the
+ core node.
+
+ If a core node is not a leaf node (CPUs supporting SMT) a core node's
+ child nodes can be:
+
+ - one or more thread nodes
+
+ Any other configuration is considered invalid.
+
+- thread node
+
+ Description: must be declared in a core node, one node per thread
+ in the core if the system supports SMT. Thread nodes are
+ always leaf nodes in the device tree.
+
+ The thread node name must be "threadN" as described in 2.1 above.
+
+ A thread node must be a leaf node.
+
+ A thread node must contain the following property:
+
+ - cpu
+ Usage: required
+ Value type: <phandle>
+ Definition: a phandle to the cpu node that corresponds to
+ the thread node.
+
+===========================================
+4 - Example dts
+===========================================
+
+Example 1 (ARM 64-bit, 16-cpu system, two clusters of clusters):
+
+cpus {
+ #size-cells = <0>;
+ #address-cells = <2>;
+
+ cpu-map {
+ cluster0 {
+ cluster0 {
+ core0 {
+ thread0 {
+ cpu = <&CPU0>;
+ };
+ thread1 {
+ cpu = <&CPU1>;
+ };
+ };
+
+ core1 {
+ thread0 {
+ cpu = <&CPU2>;
+ };
+ thread1 {
+ cpu = <&CPU3>;
+ };
+ };
+ };
+
+ cluster1 {
+ core0 {
+ thread0 {
+ cpu = <&CPU4>;
+ };
+ thread1 {
+ cpu = <&CPU5>;
+ };
+ };
+
+ core1 {
+ thread0 {
+ cpu = <&CPU6>;
+ };
+ thread1 {
+ cpu = <&CPU7>;
+ };
+ };
+ };
+ };
+
+ cluster1 {
+ cluster0 {
+ core0 {
+ thread0 {
+ cpu = <&CPU8>;
+ };
+ thread1 {
+ cpu = <&CPU9>;
+ };
+ };
+ core1 {
+ thread0 {
+ cpu = <&CPU10>;
+ };
+ thread1 {
+ cpu = <&CPU11>;
+ };
+ };
+ };
+
+ cluster1 {
+ core0 {
+ thread0 {
+ cpu = <&CPU12>;
+ };
+ thread1 {
+ cpu = <&CPU13>;
+ };
+ };
+ core1 {
+ thread0 {
+ cpu = <&CPU14>;
+ };
+ thread1 {
+ cpu = <&CPU15>;
+ };
+ };
+ };
+ };
+ };
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU2: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU3: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU4: cpu@10000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10000>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU5: cpu@10001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10001>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU6: cpu@10100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU7: cpu@10101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x0 0x10101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU8: cpu@100000000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU9: cpu@100000001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x1>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU10: cpu@100000100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU11: cpu@100000101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU12: cpu@100010000 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10000>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU13: cpu@100010001 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10001>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU14: cpu@100010100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10100>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+
+ CPU15: cpu@100010101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a57";
+ reg = <0x1 0x10101>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0 0x20000000>;
+ };
+};
+
+Example 2 (ARM 32-bit, dual-cluster, 8-cpu system, no SMT):
+
+cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ cpu-map {
+ cluster0 {
+ core0 {
+ cpu = <&CPU0>;
+ };
+ core1 {
+ cpu = <&CPU1>;
+ };
+ core2 {
+ cpu = <&CPU2>;
+ };
+ core3 {
+ cpu = <&CPU3>;
+ };
+ };
+
+ cluster1 {
+ core0 {
+ cpu = <&CPU4>;
+ };
+ core1 {
+ cpu = <&CPU5>;
+ };
+ core2 {
+ cpu = <&CPU6>;
+ };
+ core3 {
+ cpu = <&CPU7>;
+ };
+ };
+ };
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x1>;
+ };
+
+ CPU2: cpu@2 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x2>;
+ };
+
+ CPU3: cpu@3 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ reg = <0x3>;
+ };
+
+ CPU4: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x100>;
+ };
+
+ CPU5: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x101>;
+ };
+
+ CPU6: cpu@102 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x102>;
+ };
+
+ CPU7: cpu@103 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ reg = <0x103>;
+ };
+};
+
+===============================================================================
+[1] ARM Linux kernel documentation
+ Documentation/devicetree/bindings/arm/cpus.txt
diff --git a/Documentation/devicetree/bindings/arm/twd.txt b/Documentation/devicetree/bindings/arm/twd.txt
new file mode 100644
index 000000000..75b861093
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/twd.txt
@@ -0,0 +1,48 @@
+* ARM Timer Watchdog
+
+ARM 11MP, Cortex-A5 and Cortex-A9 are often associated with a per-core
+Timer-Watchdog (aka TWD), which provides both a per-cpu local timer
+and watchdog.
+
+The TWD is usually attached to a GIC to deliver its two per-processor
+interrupts.
+
+** Timer node required properties:
+
+- compatible : Should be one of:
+ "arm,cortex-a9-twd-timer"
+ "arm,cortex-a5-twd-timer"
+ "arm,arm11mp-twd-timer"
+
+- interrupts : One interrupt to each core
+
+- reg : Specify the base address and the size of the TWD timer
+ register window.
+
+Example:
+
+ twd-timer@2c000600 {
+ compatible = "arm,arm11mp-twd-timer"";
+ reg = <0x2c000600 0x20>;
+ interrupts = <1 13 0xf01>;
+ };
+
+** Watchdog node properties:
+
+- compatible : Should be one of:
+ "arm,cortex-a9-twd-wdt"
+ "arm,cortex-a5-twd-wdt"
+ "arm,arm11mp-twd-wdt"
+
+- interrupts : One interrupt to each core
+
+- reg : Specify the base address and the size of the TWD watchdog
+ register window.
+
+Example:
+
+ twd-watchdog@2c000620 {
+ compatible = "arm,arm11mp-twd-wdt";
+ reg = <0x2c000620 0x20>;
+ interrupts = <1 14 0xf01>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/ux500/power_domain.txt b/Documentation/devicetree/bindings/arm/ux500/power_domain.txt
new file mode 100644
index 000000000..5679d1742
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/ux500/power_domain.txt
@@ -0,0 +1,35 @@
+* ST-Ericsson UX500 PM Domains
+
+UX500 supports multiple PM domains which are used to gate power to one or
+more peripherals on the SOC.
+
+The implementation of PM domains for UX500 are based upon the generic PM domain
+and use the corresponding DT bindings.
+
+==PM domain providers==
+
+Required properties:
+ - compatible: Must be "stericsson,ux500-pm-domains".
+ - #power-domain-cells : Number of cells in a power domain specifier, must be 1.
+
+Example:
+ pm_domains: pm_domains0 {
+ compatible = "stericsson,ux500-pm-domains";
+ #power-domain-cells = <1>;
+ };
+
+==PM domain consumers==
+
+Required properties:
+ - power-domains: A phandle and PM domain specifier. Below are the list of
+ valid specifiers:
+
+ Index Specifier
+ ----- ---------
+ 0 DOMAIN_VAPE
+
+Example:
+ sdi0_per1@80126000 {
+ compatible = "arm,pl18x", "arm,primecell";
+ power-domains = <&pm_domains DOMAIN_VAPE>
+ };
diff --git a/Documentation/devicetree/bindings/arm/versatile-fpga-irq.txt b/Documentation/devicetree/bindings/arm/versatile-fpga-irq.txt
new file mode 100644
index 000000000..c9cf605bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/versatile-fpga-irq.txt
@@ -0,0 +1,36 @@
+* ARM Versatile FPGA interrupt controller
+
+One or more FPGA IRQ controllers can be synthesized in an ARM reference board
+such as the Integrator or Versatile family. The output of these different
+controllers are OR:ed together and fed to the CPU tile's IRQ input. Each
+instance can handle up to 32 interrupts.
+
+Required properties:
+- compatible: "arm,versatile-fpga-irq"
+- interrupt-controller: Identifies the node as an interrupt controller
+- #interrupt-cells: The number of cells to define the interrupts. Must be 1
+ as the FPGA IRQ controller has no configuration options for interrupt
+ sources. The cell is a u32 and defines the interrupt number.
+- reg: The register bank for the FPGA interrupt controller.
+- clear-mask: a u32 number representing the mask written to clear all IRQs
+ on the controller at boot for example.
+- valid-mask: a u32 number representing a bit mask determining which of
+ the interrupts are valid. Unconnected/unused lines are set to 0, and
+ the system till not make it possible for devices to request these
+ interrupts.
+
+Example:
+
+pic: pic@14000000 {
+ compatible = "arm,versatile-fpga-irq";
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ reg = <0x14000000 0x100>;
+ clear-mask = <0xffffffff>;
+ valid-mask = <0x003fffff>;
+};
+
+Optional properties:
+- interrupts: if the FPGA IRQ controller is cascaded, i.e. if its IRQ
+ output is simply connected to the input of another IRQ controller,
+ then the parent IRQ shall be specified in this property.
diff --git a/Documentation/devicetree/bindings/arm/versatile-sysreg.txt b/Documentation/devicetree/bindings/arm/versatile-sysreg.txt
new file mode 100644
index 000000000..a4f15262d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/versatile-sysreg.txt
@@ -0,0 +1,10 @@
+ARM Versatile system registers
+--------------------------------------
+
+This is a system control registers block, providing multiple low level
+platform functions like board detection and identification, software
+interrupt generation, MMC and NOR Flash control etc.
+
+Required node properties:
+- compatible value : = "arm,versatile-sysreg", "syscon"
+- reg : physical base address and the size of the registers window
diff --git a/Documentation/devicetree/bindings/arm/vexpress-scc.txt b/Documentation/devicetree/bindings/arm/vexpress-scc.txt
new file mode 100644
index 000000000..ae5043e42
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vexpress-scc.txt
@@ -0,0 +1,33 @@
+ARM Versatile Express Serial Configuration Controller
+-----------------------------------------------------
+
+Test chips for ARM Versatile Express platform implement SCC (Serial
+Configuration Controller) interface, used to set initial conditions
+for the test chip.
+
+In some cases its registers are also mapped in normal address space
+and can be used to obtain runtime information about the chip internals
+(like silicon temperature sensors) and as interface to other subsystems
+like platform configuration control and power management.
+
+Required properties:
+
+- compatible value: "arm,vexpress-scc,<model>", "arm,vexpress-scc";
+ where <model> is the full tile model name (as used
+ in the tile's Technical Reference Manual),
+ eg. for Coretile Express A15x2 A7x3 (V2P-CA15_A7):
+ compatible = "arm,vexpress-scc,v2p-ca15_a7", "arm,vexpress-scc";
+
+Optional properties:
+
+- reg: when the SCC is memory mapped, physical address and size of the
+ registers window
+- interrupts: when the SCC can generate a system-level interrupt
+
+Example:
+
+ scc@7fff0000 {
+ compatible = "arm,vexpress-scc,v2p-ca15_a7", "arm,vexpress-scc";
+ reg = <0 0x7fff0000 0 0x1000>;
+ interrupts = <0 95 4>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
new file mode 100644
index 000000000..00318d083
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
@@ -0,0 +1,103 @@
+ARM Versatile Express system registers
+--------------------------------------
+
+This is a system control registers block, providing multiple low level
+platform functions like board detection and identification, software
+interrupt generation, MMC and NOR Flash control etc.
+
+Required node properties:
+- compatible value : = "arm,vexpress,sysreg";
+- reg : physical base address and the size of the registers window
+
+Deprecated properties, replaced by GPIO subnodes (see below):
+- gpio-controller : specifies that the node is a GPIO controller
+- #gpio-cells : size of the GPIO specifier, should be 2:
+ - first cell is the pseudo-GPIO line number:
+ 0 - MMC CARDIN
+ 1 - MMC WPROT
+ 2 - NOR FLASH WPn
+ - second cell can take standard GPIO flags (currently ignored).
+
+Control registers providing pseudo-GPIO lines must be represented
+by subnodes, each of them requiring the following properties:
+- compatible value : one of
+ "arm,vexpress-sysreg,sys_led"
+ "arm,vexpress-sysreg,sys_mci"
+ "arm,vexpress-sysreg,sys_flash"
+- gpio-controller : makes the node a GPIO controller
+- #gpio-cells : size of the GPIO specifier, must be 2:
+ - first cell is the function number:
+ - for sys_led : 0..7 = LED 0..7
+ - for sys_mci : 0 = MMC CARDIN, 1 = MMC WPROT
+ - for sys_flash : 0 = NOR FLASH WPn
+ - second cell can take standard GPIO flags (currently ignored).
+
+Example:
+ v2m_sysreg: sysreg@10000000 {
+ compatible = "arm,vexpress-sysreg";
+ reg = <0x10000000 0x1000>;
+
+ v2m_led_gpios: sys_led@08 {
+ compatible = "arm,vexpress-sysreg,sys_led";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ v2m_mmc_gpios: sys_mci@48 {
+ compatible = "arm,vexpress-sysreg,sys_mci";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ v2m_flash_gpios: sys_flash@4c {
+ compatible = "arm,vexpress-sysreg,sys_flash";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ };
+
+This block also can also act a bridge to the platform's configuration
+bus via "system control" interface, addressing devices with site number,
+position in the board stack, config controller, function and device
+numbers - see motherboard's TRM for more details. All configuration
+controller accessible via this interface must reference the sysreg
+node via "arm,vexpress,config-bridge" phandle and define appropriate
+topology properties - see main vexpress node documentation for more
+details. Each child of such node describes one function and must
+define the following properties:
+- compatible value : must be one of (corresponding to the TRM):
+ "arm,vexpress-amp"
+ "arm,vexpress-dvimode"
+ "arm,vexpress-energy"
+ "arm,vexpress-muxfpga"
+ "arm,vexpress-osc"
+ "arm,vexpress-power"
+ "arm,vexpress-reboot"
+ "arm,vexpress-reset"
+ "arm,vexpress-scc"
+ "arm,vexpress-shutdown"
+ "arm,vexpress-temp"
+ "arm,vexpress-volt"
+- arm,vexpress-sysreg,func : must contain a set of two cells long groups:
+ - first cell of each group defines the function number
+ (eg. 1 for clock generator, 2 for voltage regulators etc.)
+ - second cell of each group defines device number (eg. osc 0,
+ osc 1 etc.)
+ - some functions (eg. energy meter, with its 64 bit long counter)
+ are using more than one function/device number pair
+
+Example:
+ mcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ compatible = "arm,vexpress-osc";
+ arm,vexpress-sysreg,func = <1 0>;
+ };
+
+ energy@0 {
+ compatible = "arm,vexpress-energy";
+ arm,vexpress-sysreg,func = <13 0>, <13 1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/vexpress.txt b/Documentation/devicetree/bindings/arm/vexpress.txt
new file mode 100644
index 000000000..39844cd0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vexpress.txt
@@ -0,0 +1,229 @@
+ARM Versatile Express boards family
+-----------------------------------
+
+ARM's Versatile Express platform consists of a motherboard and one
+or more daughterboards (tiles). The motherboard provides a set of
+peripherals. Processor and RAM "live" on the tiles.
+
+The motherboard and each core tile should be described by a separate
+Device Tree source file, with the tile's description including
+the motherboard file using a /include/ directive. As the motherboard
+can be initialized in one of two different configurations ("memory
+maps"), care must be taken to include the correct one.
+
+
+Root node
+---------
+
+Required properties in the root node:
+- compatible value:
+ compatible = "arm,vexpress,<model>", "arm,vexpress";
+ where <model> is the full tile model name (as used in the tile's
+ Technical Reference Manual), eg.:
+ - for Coretile Express A5x2 (V2P-CA5s):
+ compatible = "arm,vexpress,v2p-ca5s", "arm,vexpress";
+ - for Coretile Express A9x4 (V2P-CA9):
+ compatible = "arm,vexpress,v2p-ca9", "arm,vexpress";
+ If a tile comes in several variants or can be used in more then one
+ configuration, the compatible value should be:
+ compatible = "arm,vexpress,<model>,<variant>", \
+ "arm,vexpress,<model>", "arm,vexpress";
+ eg:
+ - Coretile Express A15x2 (V2P-CA15) with Tech Chip 1:
+ compatible = "arm,vexpress,v2p-ca15,tc1", \
+ "arm,vexpress,v2p-ca15", "arm,vexpress";
+ - LogicTile Express 13MG (V2F-2XV6) running Cortex-A7 (3 cores) SMM:
+ compatible = "arm,vexpress,v2f-2xv6,ca7x3", \
+ "arm,vexpress,v2f-2xv6", "arm,vexpress";
+
+Optional properties in the root node:
+- tile model name (use name from the tile's Technical Reference
+ Manual, eg. "V2P-CA5s")
+ model = "<model>";
+- tile's HBI number (unique ARM's board model ID, visible on the
+ PCB's silkscreen) in hexadecimal transcription:
+ arm,hbi = <0xhbi>
+ eg:
+ - for Coretile Express A5x2 (V2P-CA5s) HBI-0191:
+ arm,hbi = <0x191>;
+ - Coretile Express A9x4 (V2P-CA9) HBI-0225:
+ arm,hbi = <0x225>;
+
+
+CPU nodes
+---------
+
+Top-level standard "cpus" node is required. It must contain a node
+with device_type = "cpu" property for every available core, eg.:
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a5";
+ reg = <0>;
+ };
+ };
+
+
+Configuration infrastructure
+----------------------------
+
+The platform has an elaborated configuration system, consisting of
+microcontrollers residing on the mother- and daughterboards known
+as Motherboard/Daughterboard Configuration Controller (MCC and DCC).
+The controllers are responsible for the platform initialization
+(reset generation, flash programming, FPGA bitfiles loading etc.)
+but also control clock generators, voltage regulators, gather
+environmental data like temperature, power consumption etc. Even
+the video output switch (FPGA) is controlled that way.
+
+The controllers are not mapped into normal memory address space
+and must be accessed through bridges - other devices capable
+of generating transactions on the configuration bus.
+
+The nodes describing configuration controllers must define
+the following properties:
+- compatible value:
+ compatible = "arm,vexpress,config-bus";
+- bridge phandle:
+ arm,vexpress,config-bridge = <phandle>;
+and children describing available functions.
+
+
+Platform topology
+-----------------
+
+As Versatile Express can be configured in number of physically
+different setups, the device tree should describe platform topology.
+Root node and main motherboard node must define the following
+property, describing physical location of the children nodes:
+- site number:
+ arm,vexpress,site = <number>;
+ where 0 means motherboard, 1 or 2 are daugtherboard sites,
+ 0xf means "master" site (site containing main CPU tile)
+- when daughterboards are stacked on one site, their position
+ in the stack be be described with:
+ arm,vexpress,position = <number>;
+- when describing tiles consisting more than one DCC, its number
+ can be described with:
+ arm,vexpress,dcc = <number>;
+
+Any of the numbers above defaults to zero if not defined in
+the node or any of its parent.
+
+
+Motherboard
+-----------
+
+The motherboard description file provides a single "motherboard" node
+using 2 address cells corresponding to the Static Memory Bus used
+between the motherboard and the tile. The first cell defines the Chip
+Select (CS) line number, the second cell address offset within the CS.
+All interrupt lines between the motherboard and the tile are active
+high and are described using single cell.
+
+Optional properties of the "motherboard" node:
+- motherboard's memory map variant:
+ arm,v2m-memory-map = "<name>";
+ where name is one of:
+ - "rs1" - for RS1 map (i.a. peripherals on CS3); this map is also
+ referred to as "ARM Cortex-A Series memory map":
+ arm,v2m-memory-map = "rs1";
+ When this property is missing, the motherboard is using the original
+ memory map (also known as the "Legacy memory map", primarily used
+ with the original CoreTile Express A9x4) with peripherals on CS7.
+
+Motherboard .dtsi files provide a set of labelled peripherals that
+can be used to obtain required phandle in the tile's "aliases" node:
+- UARTs, note that the numbers correspond to the physical connectors
+ on the motherboard's back panel:
+ v2m_serial0, v2m_serial1, v2m_serial2 and v2m_serial3
+- I2C controllers:
+ v2m_i2c_dvi and v2m_i2c_pcie
+- SP804 timers:
+ v2m_timer01 and v2m_timer23
+
+The tile description should define a "smb" node, describing the
+Static Memory Bus between the tile and motherboard. It must define
+the following properties:
+- "simple-bus" compatible value (to ensure creation of the children)
+ compatible = "simple-bus";
+- mapping of the SMB CS/offset addresses into main address space:
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <...>;
+- interrupts mapping:
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ interrupt-map = <...>;
+
+
+Example of a VE tile description (simplified)
+---------------------------------------------
+
+/dts-v1/;
+
+/ {
+ model = "V2P-CA5s";
+ arm,hbi = <0x225>;
+ arm,vexpress,site = <0xf>;
+ compatible = "arm,vexpress-v2p-ca5s", "arm,vexpress";
+ interrupt-parent = <&gic>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ chosen { };
+
+ aliases {
+ serial0 = &v2m_serial0;
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a5";
+ reg = <0>;
+ };
+ };
+
+ gic: interrupt-controller@2c001000 {
+ compatible = "arm,cortex-a9-gic";
+ #interrupt-cells = <3>;
+ #address-cells = <0>;
+ interrupt-controller;
+ reg = <0x2c001000 0x1000>,
+ <0x2c000100 0x100>;
+ };
+
+ dcc {
+ compatible = "arm,vexpress,config-bus";
+ arm,vexpress,config-bridge = <&v2m_sysreg>;
+
+ osc@0 {
+ compatible = "arm,vexpress-osc";
+ };
+ };
+
+ smb {
+ compatible = "simple-bus";
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+ /* CS0 is visible at 0x08000000 */
+ ranges = <0 0 0x08000000 0x04000000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 63>;
+ /* Active high IRQ 0 is connected to GIC's SPI0 */
+ interrupt-map = <0 0 0 &gic 0 0 4>;
+
+ /include/ "vexpress-v2m-rs1.dtsi"
+ };
+};
+
diff --git a/Documentation/devicetree/bindings/arm/vic.txt b/Documentation/devicetree/bindings/arm/vic.txt
new file mode 100644
index 000000000..dd527216c
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vic.txt
@@ -0,0 +1,41 @@
+* ARM Vectored Interrupt Controller
+
+One or more Vectored Interrupt Controllers (VIC's) can be connected in an ARM
+system for interrupt routing. For multiple controllers they can either be
+nested or have the outputs wire-OR'd together.
+
+Required properties:
+
+- compatible : should be one of
+ "arm,pl190-vic"
+ "arm,pl192-vic"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : The number of cells to define the interrupts. Must be 1 as
+ the VIC has no configuration options for interrupt sources. The cell is a u32
+ and defines the interrupt number.
+- reg : The register bank for the VIC.
+
+Optional properties:
+
+- interrupts : Interrupt source for parent controllers if the VIC is nested.
+- valid-mask : A one cell big bit mask of valid interrupt sources. Each bit
+ represents single interrupt source, starting from source 0 at LSb and ending
+ at source 31 at MSb. A bit that is set means that the source is wired and
+ clear means otherwise. If unspecified, defaults to all valid.
+- valid-wakeup-mask : A one cell big bit mask of interrupt sources that can be
+ configured as wake up source for the system. Order of bits is the same as for
+ valid-mask property. A set bit means that this interrupt source can be
+ configured as a wake up source for the system. If unspecied, defaults to all
+ interrupt sources configurable as wake up sources.
+
+Example:
+
+ vic0: interrupt-controller@60000 {
+ compatible = "arm,pl192-vic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x60000 0x1000>;
+
+ valid-mask = <0xffffff7f>;
+ valid-wakeup-mask = <0x0000ff7f>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/vt8500.txt b/Documentation/devicetree/bindings/arm/vt8500.txt
new file mode 100644
index 000000000..87dc1ddf4
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500.txt
@@ -0,0 +1,22 @@
+VIA/Wondermedia VT8500 Platforms Device Tree Bindings
+---------------------------------------
+
+Boards with the VIA VT8500 SoC shall have the following properties:
+Required root node property:
+compatible = "via,vt8500";
+
+Boards with the Wondermedia WM8505 SoC shall have the following properties:
+Required root node property:
+compatible = "wm,wm8505";
+
+Boards with the Wondermedia WM8650 SoC shall have the following properties:
+Required root node property:
+compatible = "wm,wm8650";
+
+Boards with the Wondermedia WM8750 SoC shall have the following properties:
+Required root node property:
+compatible = "wm,wm8750";
+
+Boards with the Wondermedia WM8850 SoC shall have the following properties:
+Required root node property:
+compatible = "wm,wm8850";
diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt
new file mode 100644
index 000000000..0a4ce1051
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-intc.txt
@@ -0,0 +1,16 @@
+VIA/Wondermedia VT8500 Interrupt Controller
+-----------------------------------------------------
+
+Required properties:
+- compatible : "via,vt8500-intc"
+- reg : Should contain 1 register ranges(address and length)
+- #interrupt-cells : should be <1>
+
+Example:
+
+ intc: interrupt-controller@d8140000 {
+ compatible = "via,vt8500-intc";
+ interrupt-controller;
+ reg = <0xd8140000 0x10000>;
+ #interrupt-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt
new file mode 100644
index 000000000..521b9c7de
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-pmc.txt
@@ -0,0 +1,13 @@
+VIA/Wondermedia VT8500 Power Management Controller
+-----------------------------------------------------
+
+Required properties:
+- compatible : "via,vt8500-pmc"
+- reg : Should contain 1 register ranges(address and length)
+
+Example:
+
+ pmc@d8130000 {
+ compatible = "via,vt8500-pmc";
+ reg = <0xd8130000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt
new file mode 100644
index 000000000..901c73f0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/vt8500/via,vt8500-timer.txt
@@ -0,0 +1,15 @@
+VIA/Wondermedia VT8500 Timer
+-----------------------------------------------------
+
+Required properties:
+- compatible : "via,vt8500-timer"
+- reg : Should contain 1 register ranges(address and length)
+- interrupts : interrupt for the timer
+
+Example:
+
+ timer@d8130100 {
+ compatible = "via,vt8500-timer";
+ reg = <0xd8130100 0x28>;
+ interrupts = <36>;
+ };
diff --git a/Documentation/devicetree/bindings/arm/xen.txt b/Documentation/devicetree/bindings/arm/xen.txt
new file mode 100644
index 000000000..0f7b9c210
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/xen.txt
@@ -0,0 +1,25 @@
+* Xen hypervisor device tree bindings
+
+Xen ARM virtual platforms shall have a top-level "hypervisor" node with
+the following properties:
+
+- compatible:
+ compatible = "xen,xen-<version>", "xen,xen";
+ where <version> is the version of the Xen ABI of the platform.
+
+- reg: specifies the base physical address and size of a region in
+ memory where the grant table should be mapped to, using an
+ HYPERVISOR_memory_op hypercall. The memory region is large enough to map
+ the whole grant table (it is larger or equal to gnttab_max_grant_frames()).
+
+- interrupts: the interrupt used by Xen to inject event notifications.
+ A GIC node is also required.
+
+
+Example (assuming #address-cells = <2> and #size-cells = <2>):
+
+hypervisor {
+ compatible = "xen,xen-4.3", "xen,xen";
+ reg = <0 0xb0000000 0 0x20000>;
+ interrupts = <1 15 0xf08>;
+};
diff --git a/Documentation/devicetree/bindings/arm/xilinx.txt b/Documentation/devicetree/bindings/arm/xilinx.txt
new file mode 100644
index 000000000..1f7995357
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/xilinx.txt
@@ -0,0 +1,7 @@
+Xilinx Zynq Platforms Device Tree Bindings
+
+Boards with Zynq-7000 SOC based on an ARM Cortex A9 processor
+shall have the following properties.
+
+Required root node properties:
+ - compatible = "xlnx,zynq-7000";
diff --git a/Documentation/devicetree/bindings/ata/ahci-platform.txt b/Documentation/devicetree/bindings/ata/ahci-platform.txt
new file mode 100644
index 000000000..c2340eeeb
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ahci-platform.txt
@@ -0,0 +1,80 @@
+* AHCI SATA Controller
+
+SATA nodes are defined to describe on-chip Serial ATA controllers.
+Each SATA controller should have its own node.
+
+It is possible, but not required, to represent each port as a sub-node.
+It allows to enable each port independently when dealing with multiple
+PHYs.
+
+Required properties:
+- compatible : compatible string, one of:
+ - "allwinner,sun4i-a10-ahci"
+ - "hisilicon,hisi-ahci"
+ - "ibm,476gtr-ahci"
+ - "marvell,armada-380-ahci"
+ - "snps,dwc-ahci"
+ - "snps,exynos5440-ahci"
+ - "snps,spear-ahci"
+ - "generic-ahci"
+- interrupts : <interrupt mapping for SATA IRQ>
+- reg : <registers mapping>
+
+Please note that when using "generic-ahci" you must also specify a SoC specific
+compatible:
+ compatible = "manufacturer,soc-model-ahci", "generic-ahci";
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+- clocks : a list of phandle + clock specifier pairs
+- target-supply : regulator for SATA target power
+- phys : reference to the SATA PHY node
+- phy-names : must be "sata-phy"
+
+Required properties when using sub-nodes:
+- #address-cells : number of cells to encode an address
+- #size-cells : number of cells representing the size of an address
+
+
+Sub-nodes required properties:
+- reg : the port number
+And at least one of the following properties:
+- phys : reference to the SATA PHY node
+- target-supply : regulator for SATA target power
+
+Examples:
+ sata@ffe08000 {
+ compatible = "snps,spear-ahci";
+ reg = <0xffe08000 0x1000>;
+ interrupts = <115>;
+ };
+
+ ahci: sata@01c18000 {
+ compatible = "allwinner,sun4i-a10-ahci";
+ reg = <0x01c18000 0x1000>;
+ interrupts = <56>;
+ clocks = <&pll6 0>, <&ahb_gates 25>;
+ target-supply = <&reg_ahci_5v>;
+ };
+
+With sub-nodes:
+ sata@f7e90000 {
+ compatible = "marvell,berlin2q-achi", "generic-ahci";
+ reg = <0xe90000 0x1000>;
+ interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&chip CLKID_SATA>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ sata0: sata-port@0 {
+ reg = <0>;
+ phys = <&sata_phy 0>;
+ target-supply = <&reg_sata0>;
+ };
+
+ sata1: sata-port@1 {
+ reg = <1>;
+ phys = <&sata_phy 1>;
+ target-supply = <&reg_sata1>;;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/ata/ahci-st.txt b/Documentation/devicetree/bindings/ata/ahci-st.txt
new file mode 100644
index 000000000..e1d01df8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/ahci-st.txt
@@ -0,0 +1,50 @@
+STMicroelectronics STi SATA controller
+
+This binding describes a SATA device.
+
+Required properties:
+ - compatible : Must be "st,ahci"
+ - reg : Physical base addresses and length of register sets
+ - interrupts : Interrupt associated with the SATA device
+ - interrupt-names : Associated name must be; "hostc"
+ - clocks : The phandle for the clock
+ - clock-names : Associated name must be; "ahci_clk"
+ - phys : The phandle for the PHY port
+ - phy-names : Associated name must be; "ahci_phy"
+
+Optional properties:
+ - resets : The power-down, soft-reset and power-reset lines of SATA IP
+ - reset-names : Associated names must be; "pwr-dwn", "sw-rst" and "pwr-rst"
+
+Example:
+
+ /* Example for stih416 */
+ sata0: sata@fe380000 {
+ compatible = "st,ahci";
+ reg = <0xfe380000 0x1000>;
+ interrupts = <GIC_SPI 157 IRQ_TYPE_NONE>;
+ interrupt-names = "hostc";
+ phys = <&phy_port0 PHY_TYPE_SATA>;
+ phy-names = "ahci_phy";
+ resets = <&powerdown STIH416_SATA0_POWERDOWN>,
+ <&softreset STIH416_SATA0_SOFTRESET>;
+ reset-names = "pwr-dwn", "sw-rst";
+ clocks = <&clk_s_a0_ls CLK_ICN_REG>;
+ clock-names = "ahci_clk";
+ };
+
+ /* Example for stih407 family silicon */
+ sata0: sata@9b20000 {
+ compatible = "st,ahci";
+ reg = <0x9b20000 0x1000>;
+ interrupts = <GIC_SPI 159 IRQ_TYPE_NONE>;
+ interrupt-names = "hostc";
+ phys = <&phy_port0 PHY_TYPE_SATA>;
+ phy-names = "ahci_phy";
+ resets = <&powerdown STIH407_SATA0_POWERDOWN>,
+ <&softreset STIH407_SATA0_SOFTRESET>,
+ <&softreset STIH407_SATA0_PWR_SOFTRESET>;
+ reset-names = "pwr-dwn", "sw-rst", "pwr-rst";
+ clocks = <&clk_s_c0_flexgen CLK_ICN_REG>;
+ clock-names = "ahci_clk";
+ };
diff --git a/Documentation/devicetree/bindings/ata/apm-xgene.txt b/Documentation/devicetree/bindings/ata/apm-xgene.txt
new file mode 100644
index 000000000..a668f0e7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/apm-xgene.txt
@@ -0,0 +1,79 @@
+* APM X-Gene 6.0 Gb/s SATA host controller nodes
+
+SATA host controller nodes are defined to describe on-chip Serial ATA
+controllers. Each SATA controller (pair of ports) have its own node.
+
+Required properties:
+- compatible : Shall contain:
+ * "apm,xgene-ahci"
+- reg : First memory resource shall be the AHCI memory
+ resource.
+ Second memory resource shall be the host controller
+ core memory resource.
+ Third memory resource shall be the host controller
+ diagnostic memory resource.
+ 4th memory resource shall be the host controller
+ AXI memory resource.
+ 5th optional memory resource shall be the host
+ controller MUX memory resource if required.
+- interrupts : Interrupt-specifier for SATA host controller IRQ.
+- clocks : Reference to the clock entry.
+- phys : A list of phandles + phy-specifiers, one for each
+ entry in phy-names.
+- phy-names : Should contain:
+ * "sata-phy" for the SATA 6.0Gbps PHY
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+- status : Shall be "ok" if enabled or "disabled" if disabled.
+ Default is "ok".
+
+Example:
+ sataclk: sataclk {
+ compatible = "fixed-clock";
+ #clock-cells = <1>;
+ clock-frequency = <100000000>;
+ clock-output-names = "sataclk";
+ };
+
+ phy2: phy@1f22a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f22a000 0x0 0x100>;
+ #phy-cells = <1>;
+ };
+
+ phy3: phy@1f23a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f23a000 0x0 0x100>;
+ #phy-cells = <1>;
+ };
+
+ sata2: sata@1a400000 {
+ compatible = "apm,xgene-ahci";
+ reg = <0x0 0x1a400000 0x0 0x1000>,
+ <0x0 0x1f220000 0x0 0x1000>,
+ <0x0 0x1f22d000 0x0 0x1000>,
+ <0x0 0x1f22e000 0x0 0x1000>,
+ <0x0 0x1f227000 0x0 0x1000>;
+ interrupts = <0x0 0x87 0x4>;
+ dma-coherent;
+ status = "ok";
+ clocks = <&sataclk 0>;
+ phys = <&phy2 0>;
+ phy-names = "sata-phy";
+ };
+
+ sata3: sata@1a800000 {
+ compatible = "apm,xgene-ahci-pcie";
+ reg = <0x0 0x1a800000 0x0 0x1000>,
+ <0x0 0x1f230000 0x0 0x1000>,
+ <0x0 0x1f23d000 0x0 0x1000>,
+ <0x0 0x1f23e000 0x0 0x1000>,
+ <0x0 0x1f237000 0x0 0x1000>;
+ interrupts = <0x0 0x88 0x4>;
+ dma-coherent;
+ status = "ok";
+ clocks = <&sataclk 0>;
+ phys = <&phy3 0>;
+ phy-names = "sata-phy";
+ };
diff --git a/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt b/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt
new file mode 100644
index 000000000..c1d22b3ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/atmel-at91_cf.txt
@@ -0,0 +1,19 @@
+Atmel AT91RM9200 CompactFlash
+
+Required properties:
+- compatible : "atmel,at91rm9200-cf".
+- reg : should specify localbus address and size used.
+- gpios : specifies the gpio pins to control the CF device. Detect
+ and reset gpio's are mandatory while irq and vcc gpio's are
+ optional and may be set to 0 if not present.
+
+Example:
+compact-flash@50000000 {
+ compatible = "atmel,at91rm9200-cf";
+ reg = <0x50000000 0x30000000>;
+ gpios = <&pioC 13 0 /* irq */
+ &pioC 15 0 /* detect */
+ 0 /* vcc */
+ &pioC 5 0 /* reset */
+ >;
+};
diff --git a/Documentation/devicetree/bindings/ata/cavium-compact-flash.txt b/Documentation/devicetree/bindings/ata/cavium-compact-flash.txt
new file mode 100644
index 000000000..3bacc8e09
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/cavium-compact-flash.txt
@@ -0,0 +1,30 @@
+* Compact Flash
+
+The Cavium Compact Flash device is connected to the Octeon Boot Bus,
+and is thus a child of the Boot Bus device. It can read and write
+industry standard compact flash devices.
+
+Properties:
+- compatible: "cavium,ebt3000-compact-flash";
+
+ Compatibility with many Cavium evaluation boards.
+
+- reg: The base address of the CF chip select banks. Depending on
+ the device configuration, there may be one or two banks.
+
+- cavium,bus-width: The width of the connection to the CF devices. Valid
+ values are 8 and 16.
+
+- cavium,true-ide: Optional, if present the CF connection is in True IDE mode.
+
+- cavium,dma-engine-handle: Optional, a phandle for the DMA Engine connected
+ to this device.
+
+Example:
+ compact-flash@5,0 {
+ compatible = "cavium,ebt3000-compact-flash";
+ reg = <5 0 0x10000>, <6 0 0x10000>;
+ cavium,bus-width = <16>;
+ cavium,true-ide;
+ cavium,dma-engine-handle = <&dma0>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/exynos-sata.txt b/Documentation/devicetree/bindings/ata/exynos-sata.txt
new file mode 100644
index 000000000..cb4844824
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/exynos-sata.txt
@@ -0,0 +1,30 @@
+* Samsung AHCI SATA Controller
+
+SATA nodes are defined to describe on-chip Serial ATA controllers.
+Each SATA controller should have its own node.
+
+Required properties:
+- compatible : compatible list, contains "samsung,exynos5-sata"
+- interrupts : <interrupt mapping for SATA IRQ>
+- reg : <registers mapping>
+- samsung,sata-freq : <frequency in MHz>
+- phys : Must contain exactly one entry as specified
+ in phy-bindings.txt
+- phy-names : Must be "sata-phy"
+
+Optional properties:
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Shall be "sata" for the external SATA bus clock,
+ and "sclk_sata" for the internal controller clock.
+
+Example:
+ sata@122f0000 {
+ compatible = "snps,dwc-ahci";
+ samsung,sata-freq = <66>;
+ reg = <0x122f0000 0x1ff>;
+ interrupts = <0 115 0>;
+ clocks = <&clock 277>, <&clock 143>;
+ clock-names = "sata", "sclk_sata";
+ phys = <&sata_phy>;
+ phy-names = "sata-phy";
+ };
diff --git a/Documentation/devicetree/bindings/ata/fsl-sata.txt b/Documentation/devicetree/bindings/ata/fsl-sata.txt
new file mode 100644
index 000000000..b46bcf46c
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/fsl-sata.txt
@@ -0,0 +1,29 @@
+* Freescale 8xxx/3.0 Gb/s SATA nodes
+
+SATA nodes are defined to describe on-chip Serial ATA controllers.
+Each SATA port should have its own node.
+
+Required properties:
+- compatible : compatible list, contains 2 entries, first is
+ "fsl,CHIP-sata", where CHIP is the processor
+ (mpc8315, mpc8379, etc.) and the second is
+ "fsl,pq-sata"
+- interrupts : <interrupt mapping for SATA IRQ>
+- cell-index : controller index.
+ 1 for controller @ 0x18000
+ 2 for controller @ 0x19000
+ 3 for controller @ 0x1a000
+ 4 for controller @ 0x1b000
+
+Optional properties:
+- interrupt-parent : optional, if needed for interrupt mapping
+- reg : <registers mapping>
+
+Example:
+ sata@18000 {
+ compatible = "fsl,mpc8379-sata", "fsl,pq-sata";
+ reg = <0x18000 0x1000>;
+ cell-index = <1>;
+ interrupts = <2c 8>;
+ interrupt-parent = < &ipic >;
+ };
diff --git a/Documentation/devicetree/bindings/ata/imx-pata.txt b/Documentation/devicetree/bindings/ata/imx-pata.txt
new file mode 100644
index 000000000..e38d73414
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/imx-pata.txt
@@ -0,0 +1,17 @@
+* Freescale i.MX PATA Controller
+
+Required properties:
+- compatible: "fsl,imx27-pata"
+- reg: Address range of the PATA Controller
+- interrupts: The interrupt of the PATA Controller
+- clocks: the clocks for the PATA Controller
+
+Example:
+
+ pata: pata@83fe0000 {
+ compatible = "fsl,imx51-pata", "fsl,imx27-pata";
+ reg = <0x83fe0000 0x4000>;
+ interrupts = <70>;
+ clocks = <&clks 161>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/ata/imx-sata.txt b/Documentation/devicetree/bindings/ata/imx-sata.txt
new file mode 100644
index 000000000..fa511db18
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/imx-sata.txt
@@ -0,0 +1,36 @@
+* Freescale i.MX AHCI SATA Controller
+
+The Freescale i.MX SATA controller mostly conforms to the AHCI interface
+with some special extensions at integration level.
+
+Required properties:
+- compatible : should be one of the following:
+ - "fsl,imx53-ahci" for i.MX53 SATA controller
+ - "fsl,imx6q-ahci" for i.MX6Q SATA controller
+- interrupts : interrupt mapping for SATA IRQ
+- reg : registers mapping
+- clocks : list of clock specifiers, must contain an entry for each
+ required entry in clock-names
+- clock-names : should include "sata", "sata_ref" and "ahb" entries
+
+Optional properties:
+- fsl,transmit-level-mV : transmit voltage level, in millivolts.
+- fsl,transmit-boost-mdB : transmit boost level, in milli-decibels
+- fsl,transmit-atten-16ths : transmit attenuation, in 16ths
+- fsl,receive-eq-mdB : receive equalisation, in milli-decibels
+ Please refer to the technical documentation or the driver source code
+ for the list of legal values for these options.
+- fsl,no-spread-spectrum : disable spread-spectrum clocking on the SATA
+ link.
+
+Examples:
+
+sata@02200000 {
+ compatible = "fsl,imx6q-ahci";
+ reg = <0x02200000 0x4000>;
+ interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks IMX6QDL_CLK_SATA>,
+ <&clks IMX6QDL_CLK_SATA_REF_100M>,
+ <&clks IMX6QDL_CLK_AHB>;
+ clock-names = "sata", "sata_ref", "ahb";
+};
diff --git a/Documentation/devicetree/bindings/ata/marvell.txt b/Documentation/devicetree/bindings/ata/marvell.txt
new file mode 100644
index 000000000..b460edd12
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/marvell.txt
@@ -0,0 +1,22 @@
+* Marvell Orion SATA
+
+Required Properties:
+- compatibility : "marvell,orion-sata" or "marvell,armada-370-sata"
+- reg : Address range of controller
+- interrupts : Interrupt controller is using
+- nr-ports : Number of SATA ports in use.
+
+Optional Properties:
+- phys : List of phandles to sata phys
+- phy-names : Should be "0", "1", etc, one number per phandle
+
+Example:
+
+ sata@80000 {
+ compatible = "marvell,orion-sata";
+ reg = <0x80000 0x5000>;
+ interrupts = <21>;
+ phys = <&sata_phy0>, <&sata_phy1>;
+ phy-names = "0", "1";
+ nr-ports = <2>;
+ }
diff --git a/Documentation/devicetree/bindings/ata/pata-arasan.txt b/Documentation/devicetree/bindings/ata/pata-arasan.txt
new file mode 100644
index 000000000..2aff154be
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/pata-arasan.txt
@@ -0,0 +1,39 @@
+* ARASAN PATA COMPACT FLASH CONTROLLER
+
+Required properties:
+- compatible: "arasan,cf-spear1340"
+- reg: Address range of the CF registers
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupt: Should contain the CF interrupt number
+- clock-frequency: Interface clock rate, in Hz, one of
+ 25000000
+ 33000000
+ 40000000
+ 50000000
+ 66000000
+ 75000000
+ 100000000
+ 125000000
+ 150000000
+ 166000000
+ 200000000
+
+Optional properties:
+- arasan,broken-udma: if present, UDMA mode is unusable
+- arasan,broken-mwdma: if present, MWDMA mode is unusable
+- arasan,broken-pio: if present, PIO mode is unusable
+- dmas: one DMA channel, as described in bindings/dma/dma.txt
+ required unless both UDMA and MWDMA mode are broken
+- dma-names: the corresponding channel name, must be "data"
+
+Example:
+
+ cf@fc000000 {
+ compatible = "arasan,cf-spear1340";
+ reg = <0xfc000000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <12>;
+ dmas = <&dma-controller 23>;
+ dma-names = "data";
+ };
diff --git a/Documentation/devicetree/bindings/ata/qcom-sata.txt b/Documentation/devicetree/bindings/ata/qcom-sata.txt
new file mode 100644
index 000000000..094de91cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/qcom-sata.txt
@@ -0,0 +1,48 @@
+* Qualcomm AHCI SATA Controller
+
+SATA nodes are defined to describe on-chip Serial ATA controllers.
+Each SATA controller should have its own node.
+
+Required properties:
+- compatible : compatible list, must contain "generic-ahci"
+- interrupts : <interrupt mapping for SATA IRQ>
+- reg : <registers mapping>
+- phys : Must contain exactly one entry as specified
+ in phy-bindings.txt
+- phy-names : Must be "sata-phy"
+
+Required properties for "qcom,ipq806x-ahci" compatible:
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Shall be:
+ "slave_iface" - Fabric port AHB clock for SATA
+ "iface" - AHB clock
+ "core" - core clock
+ "rxoob" - RX out-of-band clock
+ "pmalive" - Power Module Alive clock
+- assigned-clocks : Shall be:
+ SATA_RXOOB_CLK
+ SATA_PMALIVE_CLK
+- assigned-clock-rates : Shall be:
+ 100Mhz (100000000) for SATA_RXOOB_CLK
+ 100Mhz (100000000) for SATA_PMALIVE_CLK
+
+Example:
+ sata@29000000 {
+ compatible = "qcom,ipq806x-ahci", "generic-ahci";
+ reg = <0x29000000 0x180>;
+
+ interrupts = <0 209 0x0>;
+
+ clocks = <&gcc SFAB_SATA_S_H_CLK>,
+ <&gcc SATA_H_CLK>,
+ <&gcc SATA_A_CLK>,
+ <&gcc SATA_RXOOB_CLK>,
+ <&gcc SATA_PMALIVE_CLK>;
+ clock-names = "slave_iface", "iface", "core",
+ "rxoob", "pmalive";
+ assigned-clocks = <&gcc SATA_RXOOB_CLK>, <&gcc SATA_PMALIVE_CLK>;
+ assigned-clock-rates = <100000000>, <100000000>;
+
+ phys = <&sata_phy>;
+ phy-names = "sata-phy";
+ };
diff --git a/Documentation/devicetree/bindings/ata/sata_highbank.txt b/Documentation/devicetree/bindings/ata/sata_highbank.txt
new file mode 100644
index 000000000..aa83407cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/sata_highbank.txt
@@ -0,0 +1,44 @@
+* Calxeda AHCI SATA Controller
+
+SATA nodes are defined to describe on-chip Serial ATA controllers.
+The Calxeda SATA controller mostly conforms to the AHCI interface
+with some special extensions to add functionality.
+Each SATA controller should have its own node.
+
+Required properties:
+- compatible : compatible list, contains "calxeda,hb-ahci"
+- interrupts : <interrupt mapping for SATA IRQ>
+- reg : <registers mapping>
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+- calxeda,port-phys : phandle-combophy and lane assignment, which maps each
+ SATA port to a combophy and a lane within that
+ combophy
+- calxeda,sgpio-gpio: phandle-gpio bank, bit offset, and default on or off,
+ which indicates that the driver supports SGPIO
+ indicator lights using the indicated GPIOs
+- calxeda,led-order : a u32 array that map port numbers to offsets within the
+ SGPIO bitstream.
+- calxeda,tx-atten : a u32 array that contains TX attenuation override
+ codes, one per port. The upper 3 bytes are always
+ 0 and thus ignored.
+- calxeda,pre-clocks : a u32 that indicates the number of additional clock
+ cycles to transmit before sending an SGPIO pattern
+- calxeda,post-clocks: a u32 that indicates the number of additional clock
+ cycles to transmit after sending an SGPIO pattern
+
+Example:
+ sata@ffe08000 {
+ compatible = "calxeda,hb-ahci";
+ reg = <0xffe08000 0x1000>;
+ interrupts = <115>;
+ dma-coherent;
+ calxeda,port-phys = <&combophy5 0 &combophy0 0 &combophy0 1
+ &combophy0 2 &combophy0 3>;
+ calxeda,sgpio-gpio =<&gpioh 5 1 &gpioh 6 1 &gpioh 7 1>;
+ calxeda,led-order = <4 0 1 2 3>;
+ calxeda,tx-atten = <0xff 22 0xff 0xff 23>;
+ calxeda,pre-clocks = <10>;
+ calxeda,post-clocks = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/sata_rcar.txt b/Documentation/devicetree/bindings/ata/sata_rcar.txt
new file mode 100644
index 000000000..2493a5a31
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/sata_rcar.txt
@@ -0,0 +1,23 @@
+* Renesas R-Car SATA
+
+Required properties:
+- compatible : should contain one of the following:
+ - "renesas,sata-r8a7779" for R-Car H1
+ ("renesas,rcar-sata" is deprecated)
+ - "renesas,sata-r8a7790-es1" for R-Car H2 ES1
+ - "renesas,sata-r8a7790" for R-Car H2 other than ES1
+ - "renesas,sata-r8a7791" for R-Car M2-W
+ - "renesas,sata-r8a7793" for R-Car M2-N
+- reg : address and length of the SATA registers;
+- interrupts : must consist of one interrupt specifier.
+- clocks : must contain a reference to the functional clock.
+
+Example:
+
+sata0: sata@ee300000 {
+ compatible = "renesas,sata-r8a7791";
+ reg = <0 0xee300000 0 0x2000>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 105 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp8_clks R8A7791_CLK_SATA0>;
+};
diff --git a/Documentation/devicetree/bindings/ata/tegra-sata.txt b/Documentation/devicetree/bindings/ata/tegra-sata.txt
new file mode 100644
index 000000000..66c83c3e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/ata/tegra-sata.txt
@@ -0,0 +1,32 @@
+Tegra124 SoC SATA AHCI controller
+
+Required properties :
+- compatible : For Tegra124, must contain "nvidia,tegra124-ahci". Otherwise,
+ must contain '"nvidia,<chip>-ahci", "nvidia,tegra124-ahci"', where <chip>
+ is tegra132.
+- reg : Should contain 2 entries:
+ - AHCI register set (SATA BAR5)
+ - SATA register set
+- interrupts : Defines the interrupt used by SATA
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - sata
+ - sata-oob
+ - cml1
+ - pll_e
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - sata
+ - sata-oob
+ - sata-cold
+- phys : Must contain an entry for each entry in phy-names.
+ See ../phy/phy-bindings.txt for details.
+- phy-names : Must include the following entries:
+ - sata-phy : XUSB PADCTL SATA PHY
+- hvdd-supply : Defines the SATA HVDD regulator
+- vddio-supply : Defines the SATA VDDIO regulator
+- avdd-supply : Defines the SATA AVDD regulator
+- target-5v-supply : Defines the SATA 5V power regulator
+- target-12v-supply : Defines the SATA 12V power regulator
diff --git a/Documentation/devicetree/bindings/btmrvl.txt b/Documentation/devicetree/bindings/btmrvl.txt
new file mode 100644
index 000000000..58f964bb0
--- /dev/null
+++ b/Documentation/devicetree/bindings/btmrvl.txt
@@ -0,0 +1,29 @@
+btmrvl
+------
+
+Required properties:
+
+ - compatible : must be "btmrvl,cfgdata"
+
+Optional properties:
+
+ - btmrvl,cal-data : Calibration data downloaded to the device during
+ initialization. This is an array of 28 values(u8).
+
+ - btmrvl,gpio-gap : gpio and gap (in msecs) combination to be
+ configured.
+
+Example:
+
+GPIO pin 13 is configured as a wakeup source and GAP is set to 100 msecs
+in below example.
+
+btmrvl {
+ compatible = "btmrvl,cfgdata";
+
+ btmrvl,cal-data = /bits/ 8 <
+ 0x37 0x01 0x1c 0x00 0xff 0xff 0xff 0xff 0x01 0x7f 0x04 0x02
+ 0x00 0x00 0xba 0xce 0xc0 0xc6 0x2d 0x00 0x00 0x00 0x00 0x00
+ 0x00 0x00 0xf0 0x00>;
+ btmrvl,gpio-gap = <0x0d64>;
+};
diff --git a/Documentation/devicetree/bindings/bus/brcm,bus-axi.txt b/Documentation/devicetree/bindings/bus/brcm,bus-axi.txt
new file mode 100644
index 000000000..edd44d802
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/brcm,bus-axi.txt
@@ -0,0 +1,53 @@
+Driver for ARM AXI Bus with Broadcom Plugins (bcma)
+
+Required properties:
+
+- compatible : brcm,bus-axi
+
+- reg : iomem address range of chipcommon core
+
+The cores on the AXI bus are automatically detected by bcma with the
+memory ranges they are using and they get registered afterwards.
+Automatic detection of the IRQ number is not working on
+BCM47xx/BCM53xx ARM SoCs. To assign IRQ numbers to the cores, provide
+them manually through device tree. Use an interrupt-map to specify the
+IRQ used by the devices on the bus. The first address is just an index,
+because we do not have any special register.
+
+The top-level axi bus may contain children representing attached cores
+(devices). This is needed since some hardware details can't be auto
+detected (e.g. IRQ numbers). Also some of the cores may be responsible
+for extra things, e.g. ChipCommon providing access to the GPIO chip.
+
+Example:
+
+ axi@18000000 {
+ compatible = "brcm,bus-axi";
+ reg = <0x18000000 0x1000>;
+ ranges = <0x00000000 0x18000000 0x00100000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0x000fffff 0xffff>;
+ interrupt-map =
+ /* Ethernet Controller 0 */
+ <0x00024000 0 &gic GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>,
+
+ /* Ethernet Controller 1 */
+ <0x00025000 0 &gic GIC_SPI 148 IRQ_TYPE_LEVEL_HIGH>;
+
+ /* PCIe Controller 0 */
+ <0x00012000 0 &gic GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>,
+ <0x00012000 1 &gic GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>,
+ <0x00012000 2 &gic GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>,
+ <0x00012000 3 &gic GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>,
+ <0x00012000 4 &gic GIC_SPI 130 IRQ_TYPE_LEVEL_HIGH>,
+ <0x00012000 5 &gic GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
+
+ chipcommon {
+ reg = <0x00000000 0x1000>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
new file mode 100644
index 000000000..1eceefb20
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/brcm,gisb-arb.txt
@@ -0,0 +1,34 @@
+Broadcom GISB bus Arbiter controller
+
+Required properties:
+
+- compatible:
+ "brcm,gisb-arb" or "brcm,bcm7445-gisb-arb" for 28nm chips
+ "brcm,bcm7435-gisb-arb" for newer 40nm chips
+ "brcm,bcm7400-gisb-arb" for older 40nm chips and all 65nm chips
+ "brcm,bcm7038-gisb-arb" for 130nm chips
+- reg: specifies the base physical address and size of the registers
+- interrupt-parent: specifies the phandle to the parent interrupt controller
+ this arbiter gets interrupt line from
+- interrupts: specifies the two interrupts (timeout and TEA) to be used from
+ the parent interrupt controller
+
+Optional properties:
+
+- brcm,gisb-arb-master-mask: 32-bits wide bitmask used to specify which GISB
+ masters are valid at the system level
+- brcm,gisb-arb-master-names: string list of the litteral name of the GISB
+ masters. Should match the number of bits set in brcm,gisb-master-mask and
+ the order in which they appear
+
+Example:
+
+gisb-arb@f0400000 {
+ compatible = "brcm,gisb-arb";
+ reg = <0xf0400000 0x800>;
+ interrupts = <0>, <2>;
+ interrupt-parent = <&sun_l2_intc>;
+
+ brcm,gisb-arb-master-mask = <0x7>;
+ brcm,gisb-arb-master-names = "bsp_0", "scpu_0", "cpu_0";
+};
diff --git a/Documentation/devicetree/bindings/bus/imx-weim.txt b/Documentation/devicetree/bindings/bus/imx-weim.txt
new file mode 100644
index 000000000..6630d842c
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/imx-weim.txt
@@ -0,0 +1,82 @@
+Device tree bindings for i.MX Wireless External Interface Module (WEIM)
+
+The term "wireless" does not imply that the WEIM is literally an interface
+without wires. It simply means that this module was originally designed for
+wireless and mobile applications that use low-power technology.
+
+The actual devices are instantiated from the child nodes of a WEIM node.
+
+Required properties:
+
+ - compatible: Should contain one of the following:
+ "fsl,imx1-weim"
+ "fsl,imx27-weim"
+ "fsl,imx51-weim"
+ "fsl,imx50-weim"
+ "fsl,imx6q-weim"
+ - reg: A resource specifier for the register space
+ (see the example below)
+ - clocks: the clock, see the example below.
+ - #address-cells: Must be set to 2 to allow memory address translation
+ - #size-cells: Must be set to 1 to allow CS address passing
+ - ranges: Must be set up to reflect the memory layout with four
+ integer values for each chip-select line in use:
+
+ <cs-number> 0 <physical address of mapping> <size>
+
+Optional properties:
+
+ - fsl,weim-cs-gpr: For "fsl,imx50-weim" and "fsl,imx6q-weim" type of
+ devices, it should be the phandle to the system General
+ Purpose Register controller that contains WEIM CS GPR
+ register, e.g. IOMUXC_GPR1 on i.MX6Q. IOMUXC_GPR1[11:0]
+ should be set up as one of the following 4 possible
+ values depending on the CS space configuration.
+
+ IOMUXC_GPR1[11:0] CS0 CS1 CS2 CS3
+ ---------------------------------------------
+ 05 128M 0M 0M 0M
+ 033 64M 64M 0M 0M
+ 0113 64M 32M 32M 0M
+ 01111 32M 32M 32M 32M
+
+ In case that the property is absent, the reset value or
+ what bootloader sets up in IOMUXC_GPR1[11:0] will be
+ used.
+
+Timing property for child nodes. It is mandatory, not optional.
+
+ - fsl,weim-cs-timing: The timing array, contains timing values for the
+ child node. We can get the CS index from the child
+ node's "reg" property. The number of registers depends
+ on the selected chip.
+ For i.MX1, i.MX21 ("fsl,imx1-weim") there are two
+ registers: CSxU, CSxL.
+ For i.MX25, i.MX27, i.MX31 and i.MX35 ("fsl,imx27-weim")
+ there are three registers: CSCRxU, CSCRxL, CSCRxA.
+ For i.MX50, i.MX53 ("fsl,imx50-weim"),
+ i.MX51 ("fsl,imx51-weim") and i.MX6Q ("fsl,imx6q-weim")
+ there are six registers: CSxGCR1, CSxGCR2, CSxRCR1,
+ CSxRCR2, CSxWCR1, CSxWCR2.
+
+Example for an imx6q-sabreauto board, the NOR flash connected to the WEIM:
+
+ weim: weim@021b8000 {
+ compatible = "fsl,imx6q-weim";
+ reg = <0x021b8000 0x4000>;
+ clocks = <&clks 196>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x08000000 0x08000000>;
+ fsl,weim-cs-gpr = <&gpr>;
+
+ nor@0,0 {
+ compatible = "cfi-flash";
+ reg = <0 0 0x02000000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ bank-width = <2>;
+ fsl,weim-cs-timing = <0x00620081 0x00000001 0x1c022000
+ 0x0000c000 0x1404a38e 0x00000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/mvebu-mbus.txt b/Documentation/devicetree/bindings/bus/mvebu-mbus.txt
new file mode 100644
index 000000000..fa6cde41b
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/mvebu-mbus.txt
@@ -0,0 +1,279 @@
+
+* Marvell MBus
+
+Required properties:
+
+- compatible: Should be set to one of the following:
+ marvell,armada370-mbus
+ marvell,armadaxp-mbus
+ marvell,armada375-mbus
+ marvell,armada380-mbus
+ marvell,kirkwood-mbus
+ marvell,dove-mbus
+ marvell,orion5x-88f5281-mbus
+ marvell,orion5x-88f5182-mbus
+ marvell,orion5x-88f5181-mbus
+ marvell,orion5x-88f6183-mbus
+ marvell,mv78xx0-mbus
+
+- address-cells: Must be '2'. The first cell for the MBus ID encoding,
+ the second cell for the address offset within the window.
+
+- size-cells: Must be '1'.
+
+- ranges: Must be set up to provide a proper translation for each child.
+ See the examples below.
+
+- controller: Contains a single phandle referring to the MBus controller
+ node. This allows to specify the node that contains the
+ registers that control the MBus, which is typically contained
+ within the internal register window (see below).
+
+Optional properties:
+
+- pcie-mem-aperture: This optional property contains the aperture for
+ the memory region of the PCIe driver.
+ If it's defined, it must encode the base address and
+ size for the address decoding windows allocated for
+ the PCIe memory region.
+
+- pcie-io-aperture: Just as explained for the above property, this
+ optional property contains the aperture for the
+ I/O region of the PCIe driver.
+
+* Marvell MBus controller
+
+Required properties:
+
+- compatible: Should be set to "marvell,mbus-controller".
+
+- reg: Device's register space.
+ Two or three entries are expected (see the examples below):
+ the first one controls the devices decoding window,
+ the second one controls the SDRAM decoding window and
+ the third controls the MBus bridge (only with the
+ marvell,armada370-mbus and marvell,armadaxp-mbus
+ compatible strings)
+
+Example:
+
+ soc {
+ compatible = "marvell,armada370-mbus", "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ controller = <&mbusc>;
+ pcie-mem-aperture = <0xe0000000 0x8000000>;
+ pcie-io-aperture = <0xe8000000 0x100000>;
+
+ internal-regs {
+ compatible = "simple-bus";
+
+ mbusc: mbus-controller@20000 {
+ compatible = "marvell,mbus-controller";
+ reg = <0x20000 0x100>, <0x20180 0x20>, <0x20250 0x8>;
+ };
+
+ /* more children ...*/
+ };
+ };
+
+** MBus address decoding window specification
+
+The MBus children address space is comprised of two cells: the first one for
+the window ID and the second one for the offset within the window.
+In order to allow to describe valid and non-valid window entries, the
+following encoding is used:
+
+ 0xSIAA0000 0x00oooooo
+
+Where:
+
+ S = 0x0 for a MBus valid window
+ S = 0xf for a non-valid window (see below)
+
+If S = 0x0, then:
+
+ I = 4-bit window target ID
+ AA = windpw attribute
+
+If S = 0xf, then:
+
+ I = don't care
+ AA = 1 for internal register
+
+Following the above encoding, for each ranges entry for a MBus valid window
+(S = 0x0), an address decoding window is allocated. On the other side,
+entries for translation that do not correspond to valid windows (S = 0xf)
+are skipped.
+
+ soc {
+ compatible = "marvell,armada370-mbus", "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ controller = <&mbusc>;
+
+ ranges = <0xf0010000 0 0 0xd0000000 0x100000
+ 0x01e00000 0 0 0xfff00000 0x100000>;
+
+ bootrom {
+ compatible = "marvell,bootrom";
+ reg = <0x01e00000 0 0x100000>;
+ };
+
+ /* other children */
+ ...
+
+ internal-regs {
+ compatible = "simple-bus";
+ ranges = <0 0xf0010000 0 0x100000>;
+
+ mbusc: mbus-controller@20000 {
+ compatible = "marvell,mbus-controller";
+ reg = <0x20000 0x100>, <0x20180 0x20>, <0x20250 0x8>;
+ };
+
+ /* more children ...*/
+ };
+ };
+
+In the shown example, the translation entry in the 'ranges' property is what
+makes the MBus driver create a static decoding window for the corresponding
+given child device. Note that the binding does not require child nodes to be
+present. Of course, child nodes are needed to probe the devices.
+
+Since each window is identified by its target ID and attribute ID there's
+a special macro that can be use to simplify the translation entries:
+
+#define MBUS_ID(target,attributes) (((target) << 24) | ((attributes) << 16))
+
+Using this macro, the above example would be:
+
+ soc {
+ compatible = "marvell,armada370-mbus", "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ controller = <&mbusc>;
+
+ ranges = < MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000
+ MBUS_ID(0x01, 0xe0) 0 0 0xfff00000 0x100000>;
+
+ bootrom {
+ compatible = "marvell,bootrom";
+ reg = <MBUS_ID(0x01, 0xe0) 0 0x100000>;
+ };
+
+ /* other children */
+ ...
+
+ internal-regs {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 MBUS_ID(0xf0, 0x01) 0 0x100000>;
+
+ mbusc: mbus-controller@20000 {
+ compatible = "marvell,mbus-controller";
+ reg = <0x20000 0x100>, <0x20180 0x20>, <0x20250 0x8>;
+ };
+
+ /* other children */
+ ...
+ };
+ };
+
+
+** About the window base address
+
+Remember the MBus controller allows a great deal of flexibility for choosing
+the decoding window base address. When planning the device tree layout it's
+possible to choose any address as the base address, provided of course there's
+a region large enough available, and with the required alignment.
+
+Yet in other words: there's nothing preventing us from setting a base address
+of 0xf0000000, or 0xd0000000 for the NOR device shown above, if such region is
+unused.
+
+** Window allocation policy
+
+The mbus-node ranges property defines a set of mbus windows that are expected
+to be set by the operating system and that are guaranteed to be free of overlaps
+with one another or with the system memory ranges.
+
+Each entry in the property refers to exactly one window. If the operating system
+chooses to use a different set of mbus windows, it must ensure that any address
+translations performed from downstream devices are adapted accordingly.
+
+The operating system may insert additional mbus windows that do not conflict
+with the ones listed in the ranges, e.g. for mapping PCIe devices.
+As a special case, the internal register window must be set up by the boot
+loader at the address listed in the ranges property, since access to that region
+is needed to set up the other windows.
+
+** Example
+
+See the example below, where a more complete device tree is shown:
+
+ soc {
+ compatible = "marvell,armadaxp-mbus", "simple-bus";
+ controller = <&mbusc>;
+
+ ranges = <MBUS_ID(0xf0, 0x01) 0 0 0xd0000000 0x100000 /* internal-regs */
+ MBUS_ID(0x01, 0x1d) 0 0 0xfff00000 0x100000
+ MBUS_ID(0x01, 0x2f) 0 0 0xf0000000 0x8000000>;
+
+ bootrom {
+ compatible = "marvell,bootrom";
+ reg = <MBUS_ID(0x01, 0x1d) 0 0x100000>;
+ };
+
+ devbus-bootcs {
+ status = "okay";
+ ranges = <0 MBUS_ID(0x01, 0x2f) 0 0x8000000>;
+
+ /* NOR */
+ nor {
+ compatible = "cfi-flash";
+ reg = <0 0x8000000>;
+ bank-width = <2>;
+ };
+ };
+
+ pcie-controller {
+ compatible = "marvell,armada-xp-pcie";
+ status = "okay";
+ device_type = "pci";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges =
+ <0x82000000 0 0x40000 MBUS_ID(0xf0, 0x01) 0x40000 0 0x00002000 /* Port 0.0 registers */
+ 0x82000000 0 0x42000 MBUS_ID(0xf0, 0x01) 0x42000 0 0x00002000 /* Port 2.0 registers */
+ 0x82000000 0 0x44000 MBUS_ID(0xf0, 0x01) 0x44000 0 0x00002000 /* Port 0.1 registers */
+ 0x82000000 0 0x48000 MBUS_ID(0xf0, 0x01) 0x48000 0 0x00002000 /* Port 0.2 registers */
+ 0x82000000 0 0x4c000 MBUS_ID(0xf0, 0x01) 0x4c000 0 0x00002000 /* Port 0.3 registers */
+ 0x82000800 0 0xe0000000 MBUS_ID(0x04, 0xe8) 0xe0000000 0 0x08000000 /* Port 0.0 MEM */
+ 0x81000800 0 0 MBUS_ID(0x04, 0xe0) 0xe8000000 0 0x00100000 /* Port 0.0 IO */>;
+
+
+ pcie@1,0 {
+ /* Port 0, Lane 0 */
+ status = "okay";
+ };
+ };
+
+ internal-regs {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 MBUS_ID(0xf0, 0x01) 0 0x100000>;
+
+ mbusc: mbus-controller@20000 {
+ reg = <0x20000 0x100>, <0x20180 0x20>, <0x20250 0x8>;
+ };
+
+ interrupt-controller@20000 {
+ reg = <0x20a00 0x2d0>, <0x21070 0x58>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt b/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt
new file mode 100644
index 000000000..18729f6fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/omap-ocp2scp.txt
@@ -0,0 +1,29 @@
+* OMAP OCP2SCP - ocp interface to scp interface
+
+properties:
+- compatible : Should be "ti,am437x-ocp2scp" for AM437x processor
+ Should be "ti,omap-ocp2scp" for all others
+- reg : Address and length of the register set for the device
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+- ranges : the child address space are mapped 1:1 onto the parent address space
+- ti,hwmods : must be "ocp2scp_usb_phy"
+
+Sub-nodes:
+All the devices connected to ocp2scp are described using sub-node to ocp2scp
+
+ocp2scp@4a0ad000 {
+ compatible = "ti,omap-ocp2scp";
+ reg = <0x4a0ad000 0x1f>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ ti,hwmods = "ocp2scp_usb_phy";
+
+ subnode1 {
+ ...
+ };
+
+ subnode2 {
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/bus/renesas,bsc.txt b/Documentation/devicetree/bindings/bus/renesas,bsc.txt
new file mode 100644
index 000000000..90e947269
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/renesas,bsc.txt
@@ -0,0 +1,46 @@
+Renesas Bus State Controller (BSC)
+==================================
+
+The Renesas Bus State Controller (BSC, sometimes called "LBSC within Bus
+Bridge", or "External Bus Interface") can be found in several Renesas ARM SoCs.
+It provides an external bus for connecting multiple external devices to the
+SoC, driving several chip select lines, for e.g. NOR FLASH, Ethernet and USB.
+
+While the BSC is a fairly simple memory-mapped bus, it may be part of a PM
+domain, and may have a gateable functional clock.
+Before a device connected to the BSC can be accessed, the PM domain
+containing the BSC must be powered on, and the functional clock
+driving the BSC must be enabled.
+
+The bindings for the BSC extend the bindings for "simple-pm-bus".
+
+
+Required properties
+ - compatible: Must contain an SoC-specific value, and "renesas,bsc" and
+ "simple-pm-bus" as fallbacks.
+ SoC-specific values can be:
+ "renesas,bsc-r8a73a4" for R-Mobile APE6 (r8a73a4)
+ "renesas,bsc-sh73a0" for SH-Mobile AG5 (sh73a0)
+ - #address-cells, #size-cells, ranges: Must describe the mapping between
+ parent address and child address spaces.
+ - reg: Must contain the base address and length to access the bus controller.
+
+Optional properties:
+ - interrupts: Must contain a reference to the BSC interrupt, if available.
+ - clocks: Must contain a reference to the functional clock, if available.
+ - power-domains: Must contain a reference to the PM domain, if available.
+
+
+Example:
+
+ bsc: bus@fec10000 {
+ compatible = "renesas,bsc-sh73a0", "renesas,bsc",
+ "simple-pm-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0 0x20000000>;
+ reg = <0xfec10000 0x400>;
+ interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&zb_clk>;
+ power-domains = <&pd_a4s>;
+ };
diff --git a/Documentation/devicetree/bindings/bus/simple-pm-bus.txt b/Documentation/devicetree/bindings/bus/simple-pm-bus.txt
new file mode 100644
index 000000000..d03223751
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/simple-pm-bus.txt
@@ -0,0 +1,44 @@
+Simple Power-Managed Bus
+========================
+
+A Simple Power-Managed Bus is a transparent bus that doesn't need a real
+driver, as it's typically initialized by the boot loader.
+
+However, its bus controller is part of a PM domain, or under the control of a
+functional clock. Hence, the bus controller's PM domain and/or clock must be
+enabled for child devices connected to the bus (either on-SoC or externally)
+to function.
+
+While "simple-pm-bus" follows the "simple-bus" set of properties, as specified
+in ePAPR, it is not an extension of "simple-bus".
+
+
+Required properties:
+ - compatible: Must contain at least "simple-pm-bus".
+ Must not contain "simple-bus".
+ It's recommended to let this be preceded by one or more
+ vendor-specific compatible values.
+ - #address-cells, #size-cells, ranges: Must describe the mapping between
+ parent address and child address spaces.
+
+Optional platform-specific properties for clock or PM domain control (at least
+one of them is required):
+ - clocks: Must contain a reference to the functional clock(s),
+ - power-domains: Must contain a reference to the PM domain.
+Please refer to the binding documentation for the clock and/or PM domain
+providers for more details.
+
+
+Example:
+
+ bsc: bus@fec10000 {
+ compatible = "renesas,bsc-sh73a0", "renesas,bsc",
+ "simple-pm-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0 0x20000000>;
+ reg = <0xfec10000 0x400>;
+ interrupts = <0 39 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&zb_clk>;
+ power-domains = <&pd_a4s>;
+ };
diff --git a/Documentation/devicetree/bindings/bus/ti-gpmc.txt b/Documentation/devicetree/bindings/bus/ti-gpmc.txt
new file mode 100644
index 000000000..704be9306
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/ti-gpmc.txt
@@ -0,0 +1,130 @@
+Device tree bindings for OMAP general purpose memory controllers (GPMC)
+
+The actual devices are instantiated from the child nodes of a GPMC node.
+
+Required properties:
+
+ - compatible: Should be set to one of the following:
+
+ ti,omap2420-gpmc (omap2420)
+ ti,omap2430-gpmc (omap2430)
+ ti,omap3430-gpmc (omap3430 & omap3630)
+ ti,omap4430-gpmc (omap4430 & omap4460 & omap543x)
+ ti,am3352-gpmc (am335x devices)
+
+ - reg: A resource specifier for the register space
+ (see the example below)
+ - ti,hwmods: Should be set to "ti,gpmc" until the DT transition is
+ completed.
+ - #address-cells: Must be set to 2 to allow memory address translation
+ - #size-cells: Must be set to 1 to allow CS address passing
+ - gpmc,num-cs: The maximum number of chip-select lines that controller
+ can support.
+ - gpmc,num-waitpins: The maximum number of wait pins that controller can
+ support.
+ - ranges: Must be set up to reflect the memory layout with four
+ integer values for each chip-select line in use:
+
+ <cs-number> 0 <physical address of mapping> <size>
+
+ Currently, calculated values derived from the contents
+ of the per-CS register GPMC_CONFIG7 (as set up by the
+ bootloader) are used for the physical address decoding.
+ As this will change in the future, filling correct
+ values here is a requirement.
+
+Timing properties for child nodes. All are optional and default to 0.
+
+ - gpmc,sync-clk-ps: Minimum clock period for synchronous mode, in picoseconds
+
+ Chip-select signal timings (in nanoseconds) corresponding to GPMC_CONFIG2:
+ - gpmc,cs-on-ns: Assertion time
+ - gpmc,cs-rd-off-ns: Read deassertion time
+ - gpmc,cs-wr-off-ns: Write deassertion time
+
+ ADV signal timings (in nanoseconds) corresponding to GPMC_CONFIG3:
+ - gpmc,adv-on-ns: Assertion time
+ - gpmc,adv-rd-off-ns: Read deassertion time
+ - gpmc,adv-wr-off-ns: Write deassertion time
+
+ WE signals timings (in nanoseconds) corresponding to GPMC_CONFIG4:
+ - gpmc,we-on-ns Assertion time
+ - gpmc,we-off-ns: Deassertion time
+
+ OE signals timings (in nanoseconds) corresponding to GPMC_CONFIG4:
+ - gpmc,oe-on-ns: Assertion time
+ - gpmc,oe-off-ns: Deassertion time
+
+ Access time and cycle time timings (in nanoseconds) corresponding to
+ GPMC_CONFIG5:
+ - gpmc,page-burst-access-ns: Multiple access word delay
+ - gpmc,access-ns: Start-cycle to first data valid delay
+ - gpmc,rd-cycle-ns: Total read cycle time
+ - gpmc,wr-cycle-ns: Total write cycle time
+ - gpmc,bus-turnaround-ns: Turn-around time between successive accesses
+ - gpmc,cycle2cycle-delay-ns: Delay between chip-select pulses
+ - gpmc,clk-activation-ns: GPMC clock activation time
+ - gpmc,wait-monitoring-ns: Start of wait monitoring with regard to valid
+ data
+
+Boolean timing parameters. If property is present parameter enabled and
+disabled if omitted:
+ - gpmc,adv-extra-delay: ADV signal is delayed by half GPMC clock
+ - gpmc,cs-extra-delay: CS signal is delayed by half GPMC clock
+ - gpmc,cycle2cycle-diffcsen: Add "cycle2cycle-delay" between successive
+ accesses to a different CS
+ - gpmc,cycle2cycle-samecsen: Add "cycle2cycle-delay" between successive
+ accesses to the same CS
+ - gpmc,oe-extra-delay: OE signal is delayed by half GPMC clock
+ - gpmc,we-extra-delay: WE signal is delayed by half GPMC clock
+ - gpmc,time-para-granularity: Multiply all access times by 2
+
+The following are only applicable to OMAP3+ and AM335x:
+ - gpmc,wr-access-ns: In synchronous write mode, for single or
+ burst accesses, defines the number of
+ GPMC_FCLK cycles from start access time
+ to the GPMC_CLK rising edge used by the
+ memory device for the first data capture.
+ - gpmc,wr-data-mux-bus-ns: In address-data multiplex mode, specifies
+ the time when the first data is driven on
+ the address-data bus.
+
+GPMC chip-select settings properties for child nodes. All are optional.
+
+- gpmc,burst-length Page/burst length. Must be 4, 8 or 16.
+- gpmc,burst-wrap Enables wrap bursting
+- gpmc,burst-read Enables read page/burst mode
+- gpmc,burst-write Enables write page/burst mode
+- gpmc,device-width Total width of device(s) connected to a GPMC
+ chip-select in bytes. The GPMC supports 8-bit
+ and 16-bit devices and so this property must be
+ 1 or 2.
+- gpmc,mux-add-data Address and data multiplexing configuration.
+ Valid values are 1 for address-address-data
+ multiplexing mode and 2 for address-data
+ multiplexing mode.
+- gpmc,sync-read Enables synchronous read. Defaults to asynchronous
+ is this is not set.
+- gpmc,sync-write Enables synchronous writes. Defaults to asynchronous
+ is this is not set.
+- gpmc,wait-pin Wait-pin used by client. Must be less than
+ "gpmc,num-waitpins".
+- gpmc,wait-on-read Enables wait monitoring on reads.
+- gpmc,wait-on-write Enables wait monitoring on writes.
+
+Example for an AM33xx board:
+
+ gpmc: gpmc@50000000 {
+ compatible = "ti,am3352-gpmc";
+ ti,hwmods = "gpmc";
+ reg = <0x50000000 0x2000>;
+ interrupts = <100>;
+
+ gpmc,num-cs = <8>;
+ gpmc,num-waitpins = <2>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x08000000 0x10000000>; /* CS0 @addr 0x8000000, size 0x10000000 */
+
+ /* child nodes go here */
+ };
diff --git a/Documentation/devicetree/bindings/c6x/clocks.txt b/Documentation/devicetree/bindings/c6x/clocks.txt
new file mode 100644
index 000000000..a04f5fd30
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/clocks.txt
@@ -0,0 +1,40 @@
+C6X PLL Clock Controllers
+-------------------------
+
+This is a first-cut support for the SoC clock controllers. This is still
+under development and will probably change as the common device tree
+clock support is added to the kernel.
+
+Required properties:
+
+- compatible: "ti,c64x+pll"
+ May also have SoC-specific value to support SoC-specific initialization
+ in the driver. One of:
+ "ti,c6455-pll"
+ "ti,c6457-pll"
+ "ti,c6472-pll"
+ "ti,c6474-pll"
+
+- reg: base address and size of register area
+- clock-frequency: input clock frequency in hz
+
+
+Optional properties:
+
+- ti,c64x+pll-bypass-delay: CPU cycles to delay when entering bypass mode
+
+- ti,c64x+pll-reset-delay: CPU cycles to delay after PLL reset
+
+- ti,c64x+pll-lock-delay: CPU cycles to delay after PLL frequency change
+
+Example:
+
+ clock-controller@29a0000 {
+ compatible = "ti,c6472-pll", "ti,c64x+pll";
+ reg = <0x029a0000 0x200>;
+ clock-frequency = <25000000>;
+
+ ti,c64x+pll-bypass-delay = <200>;
+ ti,c64x+pll-reset-delay = <12000>;
+ ti,c64x+pll-lock-delay = <80000>;
+ };
diff --git a/Documentation/devicetree/bindings/c6x/dscr.txt b/Documentation/devicetree/bindings/c6x/dscr.txt
new file mode 100644
index 000000000..92672235d
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/dscr.txt
@@ -0,0 +1,127 @@
+Device State Configuration Registers
+------------------------------------
+
+TI C6X SoCs contain a region of miscellaneous registers which provide various
+function for SoC control or status. Details vary considerably among from SoC
+to SoC with no two being alike.
+
+In general, the Device State Configuration Registers (DSCR) will provide one or
+more configuration registers often protected by a lock register where one or
+more key values must be written to a lock register in order to unlock the
+configuration register for writes. These configuration register may be used to
+enable (and disable in some cases) SoC pin drivers, select peripheral clock
+sources (internal or pin), etc. In some cases, a configuration register is
+write once or the individual bits are write once. In addition to device config,
+the DSCR block may provide registers which are used to reset peripherals,
+provide device ID information, provide ethernet MAC addresses, as well as other
+miscellaneous functions.
+
+For device state control (enable/disable), each device control is assigned an
+id which is used by individual device drivers to control the state as needed.
+
+Required properties:
+
+- compatible: must be "ti,c64x+dscr"
+- reg: register area base and size
+
+Optional properties:
+
+ NOTE: These are optional in that not all SoCs will have all properties. For
+ SoCs which do support a given property, leaving the property out of the
+ device tree will result in reduced functionality or possibly driver
+ failure.
+
+- ti,dscr-devstat
+ offset of the devstat register
+
+- ti,dscr-silicon-rev
+ offset, start bit, and bitsize of silicon revision field
+
+- ti,dscr-rmii-resets
+ offset and bitmask of RMII reset field. May have multiple tuples if more
+ than one ethernet port is available.
+
+- ti,dscr-locked-regs
+ possibly multiple tuples describing registers which are write protected by
+ a lock register. Each tuple consists of the register offset, lock register
+ offsset, and the key value used to unlock the register.
+
+- ti,dscr-kick-regs
+ offset and key values of two "kick" registers used to write protect other
+ registers in DSCR. On SoCs using kick registers, the first key must be
+ written to the first kick register and the second key must be written to
+ the second register before other registers in the area are write-enabled.
+
+- ti,dscr-mac-fuse-regs
+ MAC addresses are contained in two registers. Each element of a MAC address
+ is contained in a single byte. This property has two tuples. Each tuple has
+ a register offset and four cells representing bytes in the register from
+ most significant to least. The value of these four cells is the MAC byte
+ index (1-6) of the byte within the register. A value of 0 means the byte
+ is unused in the MAC address.
+
+- ti,dscr-devstate-ctl-regs
+ This property describes the bitfields used to control the state of devices.
+ Each tuple describes a range of identical bitfields used to control one or
+ more devices (one bitfield per device). The layout of each tuple is:
+
+ start_id num_ids reg enable disable start_bit nbits
+
+ Where:
+ start_id is device id for the first device control in the range
+ num_ids is the number of device controls in the range
+ reg is the offset of the register holding the control bits
+ enable is the value to enable a device
+ disable is the value to disable a device (0xffffffff if cannot disable)
+ start_bit is the bit number of the first bit in the range
+ nbits is the number of bits per device control
+
+- ti,dscr-devstate-stat-regs
+ This property describes the bitfields used to provide device state status
+ for device states controlled by the DSCR. Each tuple describes a range of
+ identical bitfields used to provide status for one or more devices (one
+ bitfield per device). The layout of each tuple is:
+
+ start_id num_ids reg enable disable start_bit nbits
+
+ Where:
+ start_id is device id for the first device status in the range
+ num_ids is the number of devices covered by the range
+ reg is the offset of the register holding the status bits
+ enable is the value indicating device is enabled
+ disable is the value indicating device is disabled
+ start_bit is the bit number of the first bit in the range
+ nbits is the number of bits per device status
+
+- ti,dscr-privperm
+ Offset and default value for register used to set access privilege for
+ some SoC devices.
+
+
+Example:
+
+ device-state-config-regs@2a80000 {
+ compatible = "ti,c64x+dscr";
+ reg = <0x02a80000 0x41000>;
+
+ ti,dscr-devstat = <0>;
+ ti,dscr-silicon-rev = <8 28 0xf>;
+ ti,dscr-rmii-resets = <0x40020 0x00040000>;
+
+ ti,dscr-locked-regs = <0x40008 0x40004 0x0f0a0b00>;
+ ti,dscr-devstate-ctl-regs =
+ <0 12 0x40008 1 0 0 2
+ 12 1 0x40008 3 0 30 2
+ 13 2 0x4002c 1 0xffffffff 0 1>;
+ ti,dscr-devstate-stat-regs =
+ <0 10 0x40014 1 0 0 3
+ 10 2 0x40018 1 0 0 3>;
+
+ ti,dscr-mac-fuse-regs = <0x700 1 2 3 4
+ 0x704 5 6 0 0>;
+
+ ti,dscr-privperm = <0x41c 0xaaaaaaaa>;
+
+ ti,dscr-kick-regs = <0x38 0x83E70B13
+ 0x3c 0x95A4F1E0>;
+ };
diff --git a/Documentation/devicetree/bindings/c6x/emifa.txt b/Documentation/devicetree/bindings/c6x/emifa.txt
new file mode 100644
index 000000000..0ff6e9b9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/emifa.txt
@@ -0,0 +1,62 @@
+External Memory Interface
+-------------------------
+
+The emifa node describes a simple external bus controller found on some C6X
+SoCs. This interface provides external busses with a number of chip selects.
+
+Required properties:
+
+- compatible: must be "ti,c64x+emifa", "simple-bus"
+- reg: register area base and size
+- #address-cells: must be 2 (chip-select + offset)
+- #size-cells: must be 1
+- ranges: mapping from EMIFA space to parent space
+
+
+Optional properties:
+
+- ti,dscr-dev-enable: Device ID if EMIF is enabled/disabled from DSCR
+
+- ti,emifa-burst-priority:
+ Number of memory transfers after which the EMIF will elevate the priority
+ of the oldest command in the command FIFO. Setting this field to 255
+ disables this feature, thereby allowing old commands to stay in the FIFO
+ indefinitely.
+
+- ti,emifa-ce-config:
+ Configuration values for each of the supported chip selects.
+
+Example:
+
+ emifa@70000000 {
+ compatible = "ti,c64x+emifa", "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ reg = <0x70000000 0x100>;
+ ranges = <0x2 0x0 0xa0000000 0x00000008
+ 0x3 0x0 0xb0000000 0x00400000
+ 0x4 0x0 0xc0000000 0x10000000
+ 0x5 0x0 0xD0000000 0x10000000>;
+
+ ti,dscr-dev-enable = <13>;
+ ti,emifa-burst-priority = <255>;
+ ti,emifa-ce-config = <0x00240120
+ 0x00240120
+ 0x00240122
+ 0x00240122>;
+
+ flash@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x3 0x0 0x400000>;
+ bank-width = <1>;
+ device-width = <1>;
+ partition@0 {
+ reg = <0x0 0x400000>;
+ label = "NOR";
+ };
+ };
+ };
+
+This shows a flash chip attached to chip select 3.
diff --git a/Documentation/devicetree/bindings/c6x/interrupt.txt b/Documentation/devicetree/bindings/c6x/interrupt.txt
new file mode 100644
index 000000000..42bb796cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/interrupt.txt
@@ -0,0 +1,104 @@
+C6X Interrupt Chips
+-------------------
+
+* C64X+ Core Interrupt Controller
+
+ The core interrupt controller provides 16 prioritized interrupts to the
+ C64X+ core. Priority 0 and 1 are used for reset and NMI respectively.
+ Priority 2 and 3 are reserved. Priority 4-15 are used for interrupt
+ sources coming from outside the core.
+
+ Required properties:
+ --------------------
+ - compatible: Should be "ti,c64x+core-pic";
+ - #interrupt-cells: <1>
+
+ Interrupt Specifier Definition
+ ------------------------------
+ Single cell specifying the core interrupt priority level (4-15) where
+ 4 is highest priority and 15 is lowest priority.
+
+ Example
+ -------
+ core_pic: interrupt-controller@0 {
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ compatible = "ti,c64x+core-pic";
+ };
+
+
+
+* C64x+ Megamodule Interrupt Controller
+
+ The megamodule PIC consists of four interrupt mupliplexers each of which
+ combine up to 32 interrupt inputs into a single interrupt output which
+ may be cascaded into the core interrupt controller. The megamodule PIC
+ has a total of 12 outputs cascading into the core interrupt controller.
+ One for each core interrupt priority level. In addition to the combined
+ interrupt sources, individual megamodule interrupts may be cascaded to
+ the core interrupt controller. When an individual interrupt is cascaded,
+ it is no longer handled through a megamodule interrupt combiner and is
+ considered to have the core interrupt controller as the parent.
+
+ Required properties:
+ --------------------
+ - compatible: "ti,c64x+megamod-pic"
+ - interrupt-controller
+ - #interrupt-cells: <1>
+ - reg: base address and size of register area
+ - interrupt-parent: must be core interrupt controller
+ - interrupts: This should have four cells; one for each interrupt combiner.
+ The cells contain the core priority interrupt to which the
+ corresponding combiner output is wired.
+
+ Optional properties:
+ --------------------
+ - ti,c64x+megamod-pic-mux: Array of 12 cells correspnding to the 12 core
+ priority interrupts. The first cell corresponds to
+ core priority 4 and the last cell corresponds to
+ core priority 15. The value of each cell is the
+ megamodule interrupt source which is MUXed to
+ the core interrupt corresponding to the cell
+ position. Allowed values are 4 - 127. Mapping for
+ interrupts 0 - 3 (combined interrupt sources) are
+ ignored.
+
+ Interrupt Specifier Definition
+ ------------------------------
+ Single cell specifying the megamodule interrupt source (4-127). Note that
+ interrupts mapped directly to the core with "ti,c64x+megamod-pic-mux" will
+ use the core interrupt controller as their parent and the specifier will
+ be the core priority level, not the megamodule interrupt number.
+
+ Examples
+ --------
+ megamod_pic: interrupt-controller@1800000 {
+ compatible = "ti,c64x+megamod-pic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x1800000 0x1000>;
+ interrupt-parent = <&core_pic>;
+ interrupts = < 12 13 14 15 >;
+ };
+
+ This is a minimal example where all individual interrupts go through a
+ combiner. Combiner-0 is mapped to core interrupt 12, combiner-1 is mapped
+ to interrupt 13, etc.
+
+
+ megamod_pic: interrupt-controller@1800000 {
+ compatible = "ti,c64x+megamod-pic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x1800000 0x1000>;
+ interrupt-parent = <&core_pic>;
+ interrupts = < 12 13 14 15 >;
+ ti,c64x+megamod-pic-mux = < 0 0 0 0
+ 32 0 0 0
+ 0 0 0 0 >;
+ };
+
+ This the same as the first example except that megamodule interrupt 32 is
+ mapped directly to core priority interrupt 8. The node using this interrupt
+ must set the core controller as its interrupt parent and use 8 in the
+ interrupt specifier value.
diff --git a/Documentation/devicetree/bindings/c6x/soc.txt b/Documentation/devicetree/bindings/c6x/soc.txt
new file mode 100644
index 000000000..b1e4973b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/soc.txt
@@ -0,0 +1,28 @@
+C6X System-on-Chip
+------------------
+
+Required properties:
+
+- compatible: "simple-bus"
+- #address-cells: must be 1
+- #size-cells: must be 1
+- ranges
+
+Optional properties:
+
+- model: specific SoC model
+
+- nodes for IP blocks within SoC
+
+
+Example:
+
+ soc {
+ compatible = "simple-bus";
+ model = "tms320c6455";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/c6x/timer64.txt b/Documentation/devicetree/bindings/c6x/timer64.txt
new file mode 100644
index 000000000..95911fe70
--- /dev/null
+++ b/Documentation/devicetree/bindings/c6x/timer64.txt
@@ -0,0 +1,26 @@
+Timer64
+-------
+
+The timer64 node describes C6X event timers.
+
+Required properties:
+
+- compatible: must be "ti,c64x+timer64"
+- reg: base address and size of register region
+- interrupt-parent: interrupt controller
+- interrupts: interrupt id
+
+Optional properties:
+
+- ti,dscr-dev-enable: Device ID used to enable timer IP through DSCR interface.
+
+- ti,core-mask: on multi-core SoCs, bitmask of cores allowed to use this timer.
+
+Example:
+ timer0: timer@25e0000 {
+ compatible = "ti,c64x+timer64";
+ ti,core-mask = < 0x01 >;
+ reg = <0x25e0000 0x40>;
+ interrupt-parent = <&megamod_pic>;
+ interrupts = < 16 >;
+ };
diff --git a/Documentation/devicetree/bindings/chosen.txt b/Documentation/devicetree/bindings/chosen.txt
new file mode 100644
index 000000000..ed838f453
--- /dev/null
+++ b/Documentation/devicetree/bindings/chosen.txt
@@ -0,0 +1,46 @@
+The chosen node
+---------------
+
+The chosen node does not represent a real device, but serves as a place
+for passing data between firmware and the operating system, like boot
+arguments. Data in the chosen node does not represent the hardware.
+
+
+stdout-path property
+--------------------
+
+Device trees may specify the device to be used for boot console output
+with a stdout-path property under /chosen, as described in ePAPR, e.g.
+
+/ {
+ chosen {
+ stdout-path = "/serial@f00:115200";
+ };
+
+ serial@f00 {
+ compatible = "vendor,some-uart";
+ reg = <0xf00 0x10>;
+ };
+};
+
+If the character ":" is present in the value, this terminates the path.
+The meaning of any characters following the ":" is device-specific, and
+must be specified in the relevant binding documentation.
+
+For UART devices, the preferred binding is a string in the form:
+
+ <baud>{<parity>{<bits>{<flow>}}}
+
+where
+
+ baud - baud rate in decimal
+ parity - 'n' (none), 'o', (odd) or 'e' (even)
+ bits - number of data bits
+ flow - 'r' (rts)
+
+For example: 115200n8r
+
+Implementation note: Linux will look for the property "linux,stdout-path" or
+on PowerPC "stdout" if "stdout-path" is not found. However, the
+"linux,stdout-path" and "stdout" properties are deprecated. New platforms
+should only use the "stdout-path" property.
diff --git a/Documentation/devicetree/bindings/clock/alphascale,acc.txt b/Documentation/devicetree/bindings/clock/alphascale,acc.txt
new file mode 100644
index 000000000..62e67e883
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/alphascale,acc.txt
@@ -0,0 +1,115 @@
+Alphascale Clock Controller
+
+The ACC (Alphascale Clock Controller) is responsible of choising proper
+clock source, setting deviders and clock gates.
+
+Required properties for the ACC node:
+ - compatible: must be "alphascale,asm9260-clock-controller"
+ - reg: must contain the ACC register base and size
+ - #clock-cells : shall be set to 1.
+
+Simple one-cell clock specifier format is used, where the only cell is used
+as an index of the clock inside the provider.
+It is encouraged to use dt-binding for clock index definitions. SoC specific
+dt-binding should be included to the device tree descriptor. For example
+Alphascale ASM9260:
+#include <dt-bindings/clock/alphascale,asm9260.h>
+
+This binding contains two types of clock providers:
+ _AHB_ - AHB gate;
+ _SYS_ - adjustable clock source. Not all peripheral have _SYS_ clock provider.
+All clock specific details can be found in the SoC documentation.
+CLKID_AHB_ROM 0
+CLKID_AHB_RAM 1
+CLKID_AHB_GPIO 2
+CLKID_AHB_MAC 3
+CLKID_AHB_EMI 4
+CLKID_AHB_USB0 5
+CLKID_AHB_USB1 6
+CLKID_AHB_DMA0 7
+CLKID_AHB_DMA1 8
+CLKID_AHB_UART0 9
+CLKID_AHB_UART1 10
+CLKID_AHB_UART2 11
+CLKID_AHB_UART3 12
+CLKID_AHB_UART4 13
+CLKID_AHB_UART5 14
+CLKID_AHB_UART6 15
+CLKID_AHB_UART7 16
+CLKID_AHB_UART8 17
+CLKID_AHB_UART9 18
+CLKID_AHB_I2S0 19
+CLKID_AHB_I2C0 20
+CLKID_AHB_I2C1 21
+CLKID_AHB_SSP0 22
+CLKID_AHB_IOCONFIG 23
+CLKID_AHB_WDT 24
+CLKID_AHB_CAN0 25
+CLKID_AHB_CAN1 26
+CLKID_AHB_MPWM 27
+CLKID_AHB_SPI0 28
+CLKID_AHB_SPI1 29
+CLKID_AHB_QEI 30
+CLKID_AHB_QUADSPI0 31
+CLKID_AHB_CAMIF 32
+CLKID_AHB_LCDIF 33
+CLKID_AHB_TIMER0 34
+CLKID_AHB_TIMER1 35
+CLKID_AHB_TIMER2 36
+CLKID_AHB_TIMER3 37
+CLKID_AHB_IRQ 38
+CLKID_AHB_RTC 39
+CLKID_AHB_NAND 40
+CLKID_AHB_ADC0 41
+CLKID_AHB_LED 42
+CLKID_AHB_DAC0 43
+CLKID_AHB_LCD 44
+CLKID_AHB_I2S1 45
+CLKID_AHB_MAC1 46
+
+CLKID_SYS_CPU 47
+CLKID_SYS_AHB 48
+CLKID_SYS_I2S0M 49
+CLKID_SYS_I2S0S 50
+CLKID_SYS_I2S1M 51
+CLKID_SYS_I2S1S 52
+CLKID_SYS_UART0 53
+CLKID_SYS_UART1 54
+CLKID_SYS_UART2 55
+CLKID_SYS_UART3 56
+CLKID_SYS_UART4 56
+CLKID_SYS_UART5 57
+CLKID_SYS_UART6 58
+CLKID_SYS_UART7 59
+CLKID_SYS_UART8 60
+CLKID_SYS_UART9 61
+CLKID_SYS_SPI0 62
+CLKID_SYS_SPI1 63
+CLKID_SYS_QUADSPI 64
+CLKID_SYS_SSP0 65
+CLKID_SYS_NAND 66
+CLKID_SYS_TRACE 67
+CLKID_SYS_CAMM 68
+CLKID_SYS_WDT 69
+CLKID_SYS_CLKOUT 70
+CLKID_SYS_MAC 71
+CLKID_SYS_LCD 72
+CLKID_SYS_ADCANA 73
+
+Example of clock consumer with _SYS_ and _AHB_ sinks.
+uart4: serial@80010000 {
+ compatible = "alphascale,asm9260-uart";
+ reg = <0x80010000 0x4000>;
+ clocks = <&acc CLKID_SYS_UART4>, <&acc CLKID_AHB_UART4>;
+ interrupts = <19>;
+ status = "disabled";
+};
+
+Clock consumer with only one, _AHB_ sink.
+timer0: timer@80088000 {
+ compatible = "alphascale,asm9260-timer";
+ reg = <0x80088000 0x4000>;
+ clocks = <&acc CLKID_AHB_TIMER0>;
+ interrupts = <29>;
+};
+
diff --git a/Documentation/devicetree/bindings/clock/altr_socfpga.txt b/Documentation/devicetree/bindings/clock/altr_socfpga.txt
new file mode 100644
index 000000000..f72e80e0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/altr_socfpga.txt
@@ -0,0 +1,30 @@
+Device Tree Clock bindings for Altera's SoCFPGA platform
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+ "altr,socfpga-pll-clock" - for a PLL clock
+ "altr,socfpga-perip-clock" - The peripheral clock divided from the
+ PLL clock.
+ "altr,socfpga-gate-clk" - Clocks that directly feed peripherals and
+ can get gated.
+
+- reg : shall be the control register offset from CLOCK_MANAGER's base for the clock.
+- clocks : shall be the input parent clock phandle for the clock. This is
+ either an oscillator or a pll output.
+- #clock-cells : from common clock binding, shall be set to 0.
+
+Optional properties:
+- fixed-divider : If clocks have a fixed divider value, use this property.
+- clk-gate : For "socfpga-gate-clk", clk-gate contains the gating register
+ and the bit index.
+- div-reg : For "socfpga-gate-clk" and "socfpga-periph-clock", div-reg contains
+ the divider register, bit shift, and width.
+- clk-phase : For the sdmmc_clk, contains the value of the clock phase that controls
+ the SDMMC CIU clock. The first value is the clk_sample(smpsel), and the second
+ value is the cclk_in_drv(drvsel). The clk-phase is used to enable the correct
+ hold/delay times that is needed for the SD/MMC CIU clock. The values of both
+ can be 0-315 degrees, in 45 degree increments.
diff --git a/Documentation/devicetree/bindings/clock/arm-integrator.txt b/Documentation/devicetree/bindings/clock/arm-integrator.txt
new file mode 100644
index 000000000..11f5f95f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/arm-integrator.txt
@@ -0,0 +1,34 @@
+Clock bindings for ARM Integrator and Versatile Core Module clocks
+
+Auxiliary Oscillator Clock
+
+This is a configurable clock fed from a 24 MHz chrystal,
+used for generating e.g. video clocks. It is located on the
+core module and there is only one of these.
+
+This clock node *must* be a subnode of the core module, since
+it obtains the base address for it's address range from its
+parent node.
+
+
+Required properties:
+- compatible: must be "arm,integrator-cm-auxosc" or "arm,versatile-cm-auxosc"
+- #clock-cells: must be <0>
+
+Optional properties:
+- clocks: parent clock(s)
+
+Example:
+
+core-module@10000000 {
+ xtal24mhz: xtal24mhz@24M {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <24000000>;
+ };
+ auxosc: cm_aux_osc@25M {
+ #clock-cells = <0>;
+ compatible = "arm,integrator-cm-auxosc";
+ clocks = <&xtal24mhz>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
new file mode 100644
index 000000000..5ba645069
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -0,0 +1,463 @@
+Device Tree Clock bindings for arch-at91
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+ "atmel,at91sam9x5-sckc":
+ at91 SCKC (Slow Clock Controller)
+ This node contains the slow clock definitions.
+
+ "atmel,at91sam9x5-clk-slow-osc":
+ at91 slow oscillator
+
+ "atmel,at91sam9x5-clk-slow-rc-osc":
+ at91 internal slow RC oscillator
+
+ "atmel,at91rm9200-pmc" or
+ "atmel,at91sam9g45-pmc" or
+ "atmel,at91sam9n12-pmc" or
+ "atmel,at91sam9x5-pmc" or
+ "atmel,sama5d3-pmc":
+ at91 PMC (Power Management Controller)
+ All at91 specific clocks (clocks defined below) must be child
+ node of the PMC node.
+
+ "atmel,at91sam9x5-clk-slow" (under sckc node)
+ or
+ "atmel,at91sam9260-clk-slow" (under pmc node):
+ at91 slow clk
+
+ "atmel,at91rm9200-clk-main-osc"
+ "atmel,at91sam9x5-clk-main-rc-osc"
+ at91 main clk sources
+
+ "atmel,at91sam9x5-clk-main"
+ "atmel,at91rm9200-clk-main":
+ at91 main clock
+
+ "atmel,at91rm9200-clk-master" or
+ "atmel,at91sam9x5-clk-master":
+ at91 master clock
+
+ "atmel,at91sam9x5-clk-peripheral" or
+ "atmel,at91rm9200-clk-peripheral":
+ at91 peripheral clocks
+
+ "atmel,at91rm9200-clk-pll" or
+ "atmel,at91sam9g45-clk-pll" or
+ "atmel,at91sam9g20-clk-pllb" or
+ "atmel,sama5d3-clk-pll":
+ at91 pll clocks
+
+ "atmel,at91sam9x5-clk-plldiv":
+ at91 plla divisor
+
+ "atmel,at91rm9200-clk-programmable" or
+ "atmel,at91sam9g45-clk-programmable" or
+ "atmel,at91sam9x5-clk-programmable":
+ at91 programmable clocks
+
+ "atmel,at91sam9x5-clk-smd":
+ at91 SMD (Soft Modem) clock
+
+ "atmel,at91rm9200-clk-system":
+ at91 system clocks
+
+ "atmel,at91rm9200-clk-usb" or
+ "atmel,at91sam9x5-clk-usb" or
+ "atmel,at91sam9n12-clk-usb":
+ at91 usb clock
+
+ "atmel,at91sam9x5-clk-utmi":
+ at91 utmi clock
+
+ "atmel,sama5d4-clk-h32mx":
+ at91 h32mx clock
+
+Required properties for SCKC node:
+- reg : defines the IO memory reserved for the SCKC.
+- #size-cells : shall be 0 (reg is used to encode clk id).
+- #address-cells : shall be 1 (reg is used to encode clk id).
+
+
+For example:
+ sckc: sckc@fffffe50 {
+ compatible = "atmel,sama5d3-pmc";
+ reg = <0xfffffe50 0x4>
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ /* put at91 slow clocks here */
+ };
+
+
+Required properties for internal slow RC oscillator:
+- #clock-cells : from common clock binding; shall be set to 0.
+- clock-frequency : define the internal RC oscillator frequency.
+
+Optional properties:
+- clock-accuracy : define the internal RC oscillator accuracy.
+
+For example:
+ slow_rc_osc: slow_rc_osc {
+ compatible = "atmel,at91sam9x5-clk-slow-rc-osc";
+ clock-frequency = <32768>;
+ clock-accuracy = <50000000>;
+ };
+
+Required properties for slow oscillator:
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall encode the main osc source clk sources (see atmel datasheet).
+
+Optional properties:
+- atmel,osc-bypass : boolean property. Set this when a clock signal is directly
+ provided on XIN.
+
+For example:
+ slow_osc: slow_osc {
+ compatible = "atmel,at91rm9200-clk-slow-osc";
+ #clock-cells = <0>;
+ clocks = <&slow_xtal>;
+ };
+
+Required properties for slow clock:
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall encode the slow clk sources (see atmel datasheet).
+
+For example:
+ clk32k: slck {
+ compatible = "atmel,at91sam9x5-clk-slow";
+ #clock-cells = <0>;
+ clocks = <&slow_rc_osc &slow_osc>;
+ };
+
+Required properties for PMC node:
+- reg : defines the IO memory reserved for the PMC.
+- #size-cells : shall be 0 (reg is used to encode clk id).
+- #address-cells : shall be 1 (reg is used to encode clk id).
+- interrupts : shall be set to PMC interrupt line.
+- interrupt-controller : tell that the PMC is an interrupt controller.
+- #interrupt-cells : must be set to 1. The first cell encodes the interrupt id,
+ and reflect the bit position in the PMC_ER/DR/SR registers.
+ You can use the dt macros defined in dt-bindings/clock/at91.h.
+ 0 (AT91_PMC_MOSCS) -> main oscillator ready
+ 1 (AT91_PMC_LOCKA) -> PLL A ready
+ 2 (AT91_PMC_LOCKB) -> PLL B ready
+ 3 (AT91_PMC_MCKRDY) -> master clock ready
+ 6 (AT91_PMC_LOCKU) -> UTMI PLL clock ready
+ 8 .. 15 (AT91_PMC_PCKRDY(id)) -> programmable clock ready
+ 16 (AT91_PMC_MOSCSELS) -> main oscillator selected
+ 17 (AT91_PMC_MOSCRCS) -> RC main oscillator stabilized
+ 18 (AT91_PMC_CFDEV) -> clock failure detected
+
+For example:
+ pmc: pmc@fffffc00 {
+ compatible = "atmel,sama5d3-pmc";
+ interrupts = <1 4 7>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ /* put at91 clocks here */
+ };
+
+Required properties for main clock internal RC oscillator:
+- interrupt-parent : must reference the PMC node.
+- interrupts : shall be set to "<0>".
+- clock-frequency : define the internal RC oscillator frequency.
+
+Optional properties:
+- clock-accuracy : define the internal RC oscillator accuracy.
+
+For example:
+ main_rc_osc: main_rc_osc {
+ compatible = "atmel,at91sam9x5-clk-main-rc-osc";
+ interrupt-parent = <&pmc>;
+ interrupts = <0>;
+ clock-frequency = <12000000>;
+ clock-accuracy = <50000000>;
+ };
+
+Required properties for main clock oscillator:
+- interrupt-parent : must reference the PMC node.
+- interrupts : shall be set to "<0>".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall encode the main osc source clk sources (see atmel datasheet).
+
+Optional properties:
+- atmel,osc-bypass : boolean property. Specified if a clock signal is provided
+ on XIN.
+
+ clock signal is directly provided on XIN pin.
+
+For example:
+ main_osc: main_osc {
+ compatible = "atmel,at91rm9200-clk-main-osc";
+ interrupt-parent = <&pmc>;
+ interrupts = <0>;
+ #clock-cells = <0>;
+ clocks = <&main_xtal>;
+ };
+
+Required properties for main clock:
+- interrupt-parent : must reference the PMC node.
+- interrupts : shall be set to "<0>".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall encode the main clk sources (see atmel datasheet).
+
+For example:
+ main: mainck {
+ compatible = "atmel,at91sam9x5-clk-main";
+ interrupt-parent = <&pmc>;
+ interrupts = <0>;
+ #clock-cells = <0>;
+ clocks = <&main_rc_osc &main_osc>;
+ };
+
+Required properties for master clock:
+- interrupt-parent : must reference the PMC node.
+- interrupts : shall be set to "<3>".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the master clock sources (see atmel datasheet) phandles.
+ e.g. "<&ck32k>, <&main>, <&plla>, <&pllb>".
+- atmel,clk-output-range : minimum and maximum clock frequency (two u32
+ fields).
+ e.g. output = <0 133000000>; <=> 0 to 133MHz.
+- atmel,clk-divisors : master clock divisors table (four u32 fields).
+ 0 <=> reserved value.
+ e.g. divisors = <1 2 4 6>;
+- atmel,master-clk-have-div3-pres : some SoC use the reserved value 7 in the
+ PRES field as CLOCK_DIV3 (e.g sam9x5).
+
+For example:
+ mck: mck {
+ compatible = "atmel,at91rm9200-clk-master";
+ interrupt-parent = <&pmc>;
+ interrupts = <3>;
+ #clock-cells = <0>;
+ atmel,clk-output-range = <0 133000000>;
+ atmel,clk-divisors = <1 2 4 0>;
+ };
+
+Required properties for peripheral clocks:
+- #size-cells : shall be 0 (reg is used to encode clk id).
+- #address-cells : shall be 1 (reg is used to encode clk id).
+- clocks : shall be the master clock phandle.
+ e.g. clocks = <&mck>;
+- name: device tree node describing a specific peripheral clock.
+ * #clock-cells : from common clock binding; shall be set to 0.
+ * reg: peripheral id. See Atmel's datasheets to get a full
+ list of peripheral ids.
+ * atmel,clk-output-range : minimum and maximum clock frequency
+ (two u32 fields). Only valid on at91sam9x5-clk-peripheral
+ compatible IPs.
+
+For example:
+ periph: periphck {
+ compatible = "atmel,at91sam9x5-clk-peripheral";
+ #size-cells = <0>;
+ #address-cells = <1>;
+ clocks = <&mck>;
+
+ ssc0_clk {
+ #clock-cells = <0>;
+ reg = <2>;
+ atmel,clk-output-range = <0 133000000>;
+ };
+
+ usart0_clk {
+ #clock-cells = <0>;
+ reg = <3>;
+ atmel,clk-output-range = <0 66000000>;
+ };
+ };
+
+
+Required properties for pll clocks:
+- interrupt-parent : must reference the PMC node.
+- interrupts : shall be set to "<1>".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the main clock phandle.
+- reg : pll id.
+ 0 -> PLL A
+ 1 -> PLL B
+- atmel,clk-input-range : minimum and maximum source clock frequency (two u32
+ fields).
+ e.g. input = <1 32000000>; <=> 1 to 32MHz.
+- #atmel,pll-clk-output-range-cells : number of cells reserved for pll output
+ range description. Sould be set to 2, 3
+ or 4.
+ * 1st and 2nd cells represent the frequency range (min-max).
+ * 3rd cell is optional and represents the OUT field value for the given
+ range.
+ * 4th cell is optional and represents the ICPLL field (PLLICPR
+ register)
+- atmel,pll-clk-output-ranges : pll output frequency ranges + optional parameter
+ depending on #atmel,pll-output-range-cells
+ property value.
+
+For example:
+ plla: pllack {
+ compatible = "atmel,at91sam9g45-clk-pll";
+ interrupt-parent = <&pmc>;
+ interrupts = <1>;
+ #clock-cells = <0>;
+ clocks = <&main>;
+ reg = <0>;
+ atmel,clk-input-range = <2000000 32000000>;
+ #atmel,pll-clk-output-range-cells = <4>;
+ atmel,pll-clk-output-ranges = <74500000 800000000 0 0
+ 69500000 750000000 1 0
+ 64500000 700000000 2 0
+ 59500000 650000000 3 0
+ 54500000 600000000 0 1
+ 49500000 550000000 1 1
+ 44500000 500000000 2 1
+ 40000000 450000000 3 1>;
+ };
+
+Required properties for plldiv clocks (plldiv = pll / 2):
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the plla clock phandle.
+
+The pll divisor is equal to 2 and cannot be changed.
+
+For example:
+ plladiv: plladivck {
+ compatible = "atmel,at91sam9x5-clk-plldiv";
+ #clock-cells = <0>;
+ clocks = <&plla>;
+ };
+
+Required properties for programmable clocks:
+- interrupt-parent : must reference the PMC node.
+- #size-cells : shall be 0 (reg is used to encode clk id).
+- #address-cells : shall be 1 (reg is used to encode clk id).
+- clocks : shall be the programmable clock source phandles.
+ e.g. clocks = <&clk32k>, <&main>, <&plla>, <&pllb>;
+- name: device tree node describing a specific prog clock.
+ * #clock-cells : from common clock binding; shall be set to 0.
+ * reg : programmable clock id (register offset from PCKx
+ register).
+ * interrupts : shall be set to "<(8 + id)>".
+
+For example:
+ prog: progck {
+ compatible = "atmel,at91sam9g45-clk-programmable";
+ #size-cells = <0>;
+ #address-cells = <1>;
+ interrupt-parent = <&pmc>;
+ clocks = <&clk32k>, <&main>, <&plladiv>, <&utmi>, <&mck>;
+
+ prog0 {
+ #clock-cells = <0>;
+ reg = <0>;
+ interrupts = <8>;
+ };
+
+ prog1 {
+ #clock-cells = <0>;
+ reg = <1>;
+ interrupts = <9>;
+ };
+ };
+
+
+Required properties for smd clock:
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the smd clock source phandles.
+ e.g. clocks = <&plladiv>, <&utmi>;
+
+For example:
+ smd: smdck {
+ compatible = "atmel,at91sam9x5-clk-smd";
+ #clock-cells = <0>;
+ clocks = <&plladiv>, <&utmi>;
+ };
+
+Required properties for system clocks:
+- #size-cells : shall be 0 (reg is used to encode clk id).
+- #address-cells : shall be 1 (reg is used to encode clk id).
+- name: device tree node describing a specific system clock.
+ * #clock-cells : from common clock binding; shall be set to 0.
+ * reg: system clock id (bit position in SCER/SCDR/SCSR registers).
+ See Atmel's datasheet to get a full list of system clock ids.
+
+For example:
+ system: systemck {
+ compatible = "atmel,at91rm9200-clk-system";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ddrck {
+ #clock-cells = <0>;
+ reg = <2>;
+ clocks = <&mck>;
+ };
+
+ uhpck {
+ #clock-cells = <0>;
+ reg = <6>;
+ clocks = <&usb>;
+ };
+
+ udpck {
+ #clock-cells = <0>;
+ reg = <7>;
+ clocks = <&usb>;
+ };
+ };
+
+
+Required properties for usb clock:
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the smd clock source phandles.
+ e.g. clocks = <&pllb>;
+- atmel,clk-divisors (only available for "atmel,at91rm9200-clk-usb"):
+ usb clock divisor table.
+ e.g. divisors = <1 2 4 0>;
+
+For example:
+ usb: usbck {
+ compatible = "atmel,at91sam9x5-clk-usb";
+ #clock-cells = <0>;
+ clocks = <&plladiv>, <&utmi>;
+ };
+
+ usb: usbck {
+ compatible = "atmel,at91rm9200-clk-usb";
+ #clock-cells = <0>;
+ clocks = <&pllb>;
+ atmel,clk-divisors = <1 2 4 0>;
+ };
+
+
+Required properties for utmi clock:
+- interrupt-parent : must reference the PMC node.
+- interrupts : shall be set to "<AT91_PMC_LOCKU IRQ_TYPE_LEVEL_HIGH>".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the main clock source phandle.
+
+For example:
+ utmi: utmick {
+ compatible = "atmel,at91sam9x5-clk-utmi";
+ interrupt-parent = <&pmc>;
+ interrupts = <AT91_PMC_LOCKU IRQ_TYPE_LEVEL_HIGH>;
+ #clock-cells = <0>;
+ clocks = <&main>;
+ };
+
+Required properties for 32 bits bus Matrix clock (h32mx clock):
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the master clock source phandle.
+
+For example:
+ h32ck: h32mxck {
+ #clock-cells = <0>;
+ compatible = "atmel,sama5d4-clk-h32mx";
+ clocks = <&mck>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/axi-clkgen.txt b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
new file mode 100644
index 000000000..20e1704e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/axi-clkgen.txt
@@ -0,0 +1,22 @@
+Binding for the axi-clkgen clock generator
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "adi,axi-clkgen-1.00.a" or "adi,axi-clkgen-2.00.a".
+- #clock-cells : from common clock binding; Should always be set to 0.
+- reg : Address and length of the axi-clkgen register set.
+- clocks : Phandle and clock specifier for the parent clock.
+
+Optional properties:
+- clock-output-names : From common clock binding.
+
+Example:
+ clock@0xff000000 {
+ compatible = "adi,axi-clkgen";
+ #clock-cells = <0>;
+ reg = <0xff000000 0x1000>;
+ clocks = <&osc 1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt b/Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt
new file mode 100644
index 000000000..00d26edec
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/bcm-cygnus-clock.txt
@@ -0,0 +1,34 @@
+Broadcom Cygnus Clocks
+
+This binding uses the common clock binding:
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Currently various "fixed" clocks are declared for peripheral drivers that use
+the common clock framework to reference their core clocks. Proper support of
+these clocks will be added later
+
+Device tree example:
+
+ clocks {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ osc: oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <1>;
+ clock-frequency = <25000000>;
+ };
+
+ apb_clk: apb_clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000000>;
+ };
+
+ periph_clk: periph_clk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <500000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt b/Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt
new file mode 100644
index 000000000..5286e260f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/brcm,kona-ccu.txt
@@ -0,0 +1,139 @@
+Broadcom Kona Family Clocks
+
+This binding is associated with Broadcom SoCs having "Kona" style
+clock control units (CCUs). A CCU is a clock provider that manages
+a set of clock signals. Each CCU is represented by a node in the
+device tree.
+
+This binding uses the common clock binding:
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible
+ Shall have a value of the form "brcm,<model>-<which>-ccu",
+ where <model> is a Broadcom SoC model number and <which> is
+ the name of a defined CCU. For example:
+ "brcm,bcm11351-root-ccu"
+ The compatible strings used for each supported SoC family
+ are defined below.
+- reg
+ Shall define the base and range of the address space
+ containing clock control registers
+- #clock-cells
+ Shall have value <1>. The permitted clock-specifier values
+ are defined below.
+- clock-output-names
+ Shall be an ordered list of strings defining the names of
+ the clocks provided by the CCU.
+
+Device tree example:
+
+ slave_ccu: slave_ccu {
+ compatible = "brcm,bcm11351-slave-ccu";
+ reg = <0x3e011000 0x0f00>;
+ #clock-cells = <1>;
+ clock-output-names = "uartb",
+ "uartb2",
+ "uartb3",
+ "uartb4";
+ };
+
+ ref_crystal_clk: ref_crystal {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <26000000>;
+ };
+
+ uart@3e002000 {
+ compatible = "brcm,bcm11351-dw-apb-uart", "snps,dw-apb-uart";
+ status = "disabled";
+ reg = <0x3e002000 0x1000>;
+ clocks = <&slave_ccu BCM281XX_SLAVE_CCU_UARTB3>;
+ interrupts = <GIC_SPI 65 IRQ_TYPE_LEVEL_HIGH>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+BCM281XX family
+---------------
+CCU compatible string values for SoCs in the BCM281XX family are:
+ "brcm,bcm11351-root-ccu"
+ "brcm,bcm11351-aon-ccu"
+ "brcm,bcm11351-hub-ccu"
+ "brcm,bcm11351-master-ccu"
+ "brcm,bcm11351-slave-ccu"
+
+The following table defines the set of CCUs and clock specifiers for
+BCM281XX family clocks. When a clock consumer references a clocks,
+its symbolic specifier (rather than its numeric index value) should
+be used. These specifiers are defined in:
+ "include/dt-bindings/clock/bcm281xx.h"
+
+ CCU Clock Type Index Specifier
+ --- ----- ---- ----- ---------
+ root frac_1m peri 0 BCM281XX_ROOT_CCU_FRAC_1M
+
+ aon hub_timer peri 0 BCM281XX_AON_CCU_HUB_TIMER
+ aon pmu_bsc peri 1 BCM281XX_AON_CCU_PMU_BSC
+ aon pmu_bsc_var peri 2 BCM281XX_AON_CCU_PMU_BSC_VAR
+
+ hub tmon_1m peri 0 BCM281XX_HUB_CCU_TMON_1M
+
+ master sdio1 peri 0 BCM281XX_MASTER_CCU_SDIO1
+ master sdio2 peri 1 BCM281XX_MASTER_CCU_SDIO2
+ master sdio3 peri 2 BCM281XX_MASTER_CCU_SDIO3
+ master sdio4 peri 3 BCM281XX_MASTER_CCU_SDIO4
+ master dmac peri 4 BCM281XX_MASTER_CCU_DMAC
+ master usb_ic peri 5 BCM281XX_MASTER_CCU_USB_IC
+ master hsic2_48m peri 6 BCM281XX_MASTER_CCU_HSIC_48M
+ master hsic2_12m peri 7 BCM281XX_MASTER_CCU_HSIC_12M
+
+ slave uartb peri 0 BCM281XX_SLAVE_CCU_UARTB
+ slave uartb2 peri 1 BCM281XX_SLAVE_CCU_UARTB2
+ slave uartb3 peri 2 BCM281XX_SLAVE_CCU_UARTB3
+ slave uartb4 peri 3 BCM281XX_SLAVE_CCU_UARTB4
+ slave ssp0 peri 4 BCM281XX_SLAVE_CCU_SSP0
+ slave ssp2 peri 5 BCM281XX_SLAVE_CCU_SSP2
+ slave bsc1 peri 6 BCM281XX_SLAVE_CCU_BSC1
+ slave bsc2 peri 7 BCM281XX_SLAVE_CCU_BSC2
+ slave bsc3 peri 8 BCM281XX_SLAVE_CCU_BSC3
+ slave pwm peri 9 BCM281XX_SLAVE_CCU_PWM
+
+
+BCM21664 family
+---------------
+CCU compatible string values for SoCs in the BCM21664 family are:
+ "brcm,bcm21664-root-ccu"
+ "brcm,bcm21664-aon-ccu"
+ "brcm,bcm21664-master-ccu"
+ "brcm,bcm21664-slave-ccu"
+
+The following table defines the set of CCUs and clock specifiers for
+BCM21664 family clocks. When a clock consumer references a clocks,
+its symbolic specifier (rather than its numeric index value) should
+be used. These specifiers are defined in:
+ "include/dt-bindings/clock/bcm21664.h"
+
+ CCU Clock Type Index Specifier
+ --- ----- ---- ----- ---------
+ root frac_1m peri 0 BCM21664_ROOT_CCU_FRAC_1M
+
+ aon hub_timer peri 0 BCM21664_AON_CCU_HUB_TIMER
+
+ master sdio1 peri 0 BCM21664_MASTER_CCU_SDIO1
+ master sdio2 peri 1 BCM21664_MASTER_CCU_SDIO2
+ master sdio3 peri 2 BCM21664_MASTER_CCU_SDIO3
+ master sdio4 peri 3 BCM21664_MASTER_CCU_SDIO4
+ master sdio1_sleep peri 4 BCM21664_MASTER_CCU_SDIO1_SLEEP
+ master sdio2_sleep peri 5 BCM21664_MASTER_CCU_SDIO2_SLEEP
+ master sdio3_sleep peri 6 BCM21664_MASTER_CCU_SDIO3_SLEEP
+ master sdio4_sleep peri 7 BCM21664_MASTER_CCU_SDIO4_SLEEP
+
+ slave uartb peri 0 BCM21664_SLAVE_CCU_UARTB
+ slave uartb2 peri 1 BCM21664_SLAVE_CCU_UARTB2
+ slave uartb3 peri 2 BCM21664_SLAVE_CCU_UARTB3
+ slave uartb4 peri 3 BCM21664_SLAVE_CCU_UARTB4
+ slave bsc1 peri 4 BCM21664_SLAVE_CCU_BSC1
+ slave bsc2 peri 5 BCM21664_SLAVE_CCU_BSC2
+ slave bsc3 peri 6 BCM21664_SLAVE_CCU_BSC3
+ slave bsc4 peri 7 BCM21664_SLAVE_CCU_BSC4
diff --git a/Documentation/devicetree/bindings/clock/calxeda.txt b/Documentation/devicetree/bindings/clock/calxeda.txt
new file mode 100644
index 000000000..0a6ac1bdc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/calxeda.txt
@@ -0,0 +1,17 @@
+Device Tree Clock bindings for Calxeda highbank platform
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+ "calxeda,hb-pll-clock" - for a PLL clock
+ "calxeda,hb-a9periph-clock" - The A9 peripheral clock divided from the
+ A9 clock.
+ "calxeda,hb-a9bus-clock" - The A9 bus clock divided from the A9 clock.
+ "calxeda,hb-emmc-clock" - Divided clock for MMC/SD controller.
+- reg : shall be the control register offset from SYSREGs base for the clock.
+- clocks : shall be the input parent clock phandle for the clock. This is
+ either an oscillator or a pll output.
+- #clock-cells : from common clock binding; shall be set to 0.
diff --git a/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt b/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
new file mode 100644
index 000000000..180e88355
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/clk-exynos-audss.txt
@@ -0,0 +1,95 @@
+* Samsung Audio Subsystem Clock Controller
+
+The Samsung Audio Subsystem clock controller generates and supplies clocks
+to Audio Subsystem block available in the S5PV210 and Exynos SoCs. The clock
+binding described here is applicable to all SoCs in Exynos family.
+
+Required Properties:
+
+- compatible: should be one of the following:
+ - "samsung,exynos4210-audss-clock" - controller compatible with all Exynos4 SoCs.
+ - "samsung,exynos5250-audss-clock" - controller compatible with Exynos5250
+ SoCs.
+ - "samsung,exynos5420-audss-clock" - controller compatible with Exynos5420
+ SoCs.
+- reg: physical base address and length of the controller's register set.
+
+- #clock-cells: should be 1.
+
+- clocks:
+ - pll_ref: Fixed rate PLL reference clock, parent of mout_audss. "fin_pll"
+ is used if not specified.
+ - pll_in: Input PLL to the AudioSS block, parent of mout_audss. "fout_epll"
+ is used if not specified.
+ - cdclk: External i2s clock, parent of mout_i2s. "cdclk0" is used if not
+ specified.
+ - sclk_audio: Audio bus clock, parent of mout_i2s. "sclk_audio0" is used if
+ not specified.
+ - sclk_pcm_in: PCM clock, parent of sclk_pcm. "sclk_pcm0" is used if not
+ specified.
+
+- clock-names: Aliases for the above clocks. They should be "pll_ref",
+ "pll_in", "cdclk", "sclk_audio", and "sclk_pcm_in" respectively.
+
+The following is the list of clocks generated by the controller. Each clock is
+assigned an identifier and client nodes use this identifier to specify the
+clock which they consume. Some of the clocks are available only on a particular
+Exynos4 SoC and this is specified where applicable.
+
+Provided clocks:
+
+Clock ID SoC (if specific)
+-----------------------------------------------
+
+mout_audss 0
+mout_i2s 1
+dout_srp 2
+dout_aud_bus 3
+dout_i2s 4
+srp_clk 5
+i2s_bus 6
+sclk_i2s 7
+pcm_bus 8
+sclk_pcm 9
+adma 10 Exynos5420
+
+Example 1: An example of a clock controller node using the default input
+ clock names is listed below.
+
+clock_audss: audss-clock-controller@3810000 {
+ compatible = "samsung,exynos5250-audss-clock";
+ reg = <0x03810000 0x0C>;
+ #clock-cells = <1>;
+};
+
+Example 2: An example of a clock controller node with the input clocks
+ specified.
+
+clock_audss: audss-clock-controller@3810000 {
+ compatible = "samsung,exynos5250-audss-clock";
+ reg = <0x03810000 0x0C>;
+ #clock-cells = <1>;
+ clocks = <&clock 1>, <&clock 7>, <&clock 138>, <&clock 160>,
+ <&ext_i2s_clk>;
+ clock-names = "pll_ref", "pll_in", "sclk_audio", "sclk_pcm_in", "cdclk";
+};
+
+Example 3: I2S controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+i2s0: i2s@03830000 {
+ compatible = "samsung,i2s-v5";
+ reg = <0x03830000 0x100>;
+ dmas = <&pdma0 10
+ &pdma0 9
+ &pdma0 8>;
+ dma-names = "tx", "rx", "tx-sec";
+ clocks = <&clock_audss EXYNOS_I2S_BUS>,
+ <&clock_audss EXYNOS_I2S_BUS>,
+ <&clock_audss EXYNOS_SCLK_I2S>,
+ <&clock_audss EXYNOS_MOUT_AUDSS>,
+ <&clock_audss EXYNOS_MOUT_I2S>;
+ clock-names = "iis", "i2s_opclk0", "i2s_opclk1",
+ "mout_audss", "mout_i2s";
+};
diff --git a/Documentation/devicetree/bindings/clock/clk-palmas-clk32kg-clocks.txt b/Documentation/devicetree/bindings/clock/clk-palmas-clk32kg-clocks.txt
new file mode 100644
index 000000000..4208886d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/clk-palmas-clk32kg-clocks.txt
@@ -0,0 +1,35 @@
+* Palmas 32KHz clocks *
+
+Palmas device has two clock output pins for 32KHz, KG and KG_AUDIO.
+
+This binding uses the common clock binding ./clock-bindings.txt.
+
+Required properties:
+- compatible : "ti,palmas-clk32kg" for clk32kg clock
+ "ti,palmas-clk32kgaudio" for clk32kgaudio clock
+- #clock-cells : shall be set to 0.
+
+Optional property:
+- ti,external-sleep-control: The external enable input pins controlled the
+ enable/disable of clocks. The external enable input pins ENABLE1,
+ ENABLE2 and NSLEEP. The valid values for the external pins are:
+ PALMAS_EXT_CONTROL_PIN_ENABLE1 for ENABLE1 pin
+ PALMAS_EXT_CONTROL_PIN_ENABLE2 for ENABLE2 pin
+ PALMAS_EXT_CONTROL_PIN_NSLEEP for NSLEEP pin
+ Option 0 or missing this property means the clock is enabled/disabled
+ via register access and these pins do not have any control.
+ The macros of external control pins for DTS is defined at
+ dt-bindings/mfd/palmas.h
+
+Example:
+ #include <dt-bindings/mfd/palmas.h>
+ ...
+ palmas: tps65913@58 {
+ ...
+ clk32kg: palmas_clk32k@0 {
+ compatible = "ti,palmas-clk32kg";
+ #clock-cells = <0>;
+ ti,external-sleep-control = <PALMAS_EXT_CONTROL_PIN_NSLEEP>;
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt b/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt
new file mode 100644
index 000000000..4fc869b69
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/clk-s5pv210-audss.txt
@@ -0,0 +1,53 @@
+* Samsung Audio Subsystem Clock Controller
+
+The Samsung Audio Subsystem clock controller generates and supplies clocks
+to Audio Subsystem block available in the S5PV210 and compatible SoCs.
+
+Required Properties:
+
+- compatible: should be "samsung,s5pv210-audss-clock".
+- reg: physical base address and length of the controller's register set.
+
+- #clock-cells: should be 1.
+
+- clocks:
+ - hclk: AHB bus clock of the Audio Subsystem.
+ - xxti: Optional fixed rate PLL reference clock, parent of mout_audss. If
+ not specified (i.e. xusbxti is used for PLL reference), it is fixed to
+ a clock named "xxti".
+ - fout_epll: Input PLL to the AudioSS block, parent of mout_audss.
+ - iiscdclk0: Optional external i2s clock, parent of mout_i2s. If not
+ specified, it is fixed to a clock named "iiscdclk0".
+ - sclk_audio0: Audio bus clock, parent of mout_i2s.
+
+- clock-names: Aliases for the above clocks. They should be "hclk",
+ "xxti", "fout_epll", "iiscdclk0", and "sclk_audio0" respectively.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/s5pv210-audss-clk.h header and can be used in device
+tree sources.
+
+Example: Clock controller node.
+
+ clk_audss: clock-controller@c0900000 {
+ compatible = "samsung,s5pv210-audss-clock";
+ reg = <0xc0900000 0x1000>;
+ #clock-cells = <1>;
+ clock-names = "hclk", "xxti",
+ "fout_epll", "sclk_audio0";
+ clocks = <&clocks DOUT_HCLKP>, <&xxti>,
+ <&clocks FOUT_EPLL>, <&clocks SCLK_AUDIO0>;
+ };
+
+Example: I2S controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+ i2s0: i2s@03830000 {
+ /* ... */
+ clock-names = "iis", "i2s_opclk0",
+ "i2s_opclk1";
+ clocks = <&clk_audss CLK_I2S>, <&clk_audss CLK_I2S>,
+ <&clk_audss CLK_DOUT_AUD_BUS>;
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/clock/clock-bindings.txt b/Documentation/devicetree/bindings/clock/clock-bindings.txt
new file mode 100644
index 000000000..06fc6d541
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/clock-bindings.txt
@@ -0,0 +1,169 @@
+This binding is a work-in-progress, and are based on some experimental
+work by benh[1].
+
+Sources of clock signal can be represented by any node in the device
+tree. Those nodes are designated as clock providers. Clock consumer
+nodes use a phandle and clock specifier pair to connect clock provider
+outputs to clock inputs. Similar to the gpio specifiers, a clock
+specifier is an array of zero, one or more cells identifying the clock
+output on a device. The length of a clock specifier is defined by the
+value of a #clock-cells property in the clock provider node.
+
+[1] http://patchwork.ozlabs.org/patch/31551/
+
+==Clock providers==
+
+Required properties:
+#clock-cells: Number of cells in a clock specifier; Typically 0 for nodes
+ with a single clock output and 1 for nodes with multiple
+ clock outputs.
+
+Optional properties:
+clock-output-names: Recommended to be a list of strings of clock output signal
+ names indexed by the first cell in the clock specifier.
+ However, the meaning of clock-output-names is domain
+ specific to the clock provider, and is only provided to
+ encourage using the same meaning for the majority of clock
+ providers. This format may not work for clock providers
+ using a complex clock specifier format. In those cases it
+ is recommended to omit this property and create a binding
+ specific names property.
+
+ Clock consumer nodes must never directly reference
+ the provider's clock-output-names property.
+
+For example:
+
+ oscillator {
+ #clock-cells = <1>;
+ clock-output-names = "ckil", "ckih";
+ };
+
+- this node defines a device with two clock outputs, the first named
+ "ckil" and the second named "ckih". Consumer nodes always reference
+ clocks by index. The names should reflect the clock output signal
+ names for the device.
+
+clock-indices: If the identifying number for the clocks in the node
+ is not linear from zero, then this allows the mapping of
+ identifiers into the clock-output-names array.
+
+For example, if we have two clocks <&oscillator 1> and <&oscillator 3>:
+
+ oscillator {
+ compatible = "myclocktype";
+ #clock-cells = <1>;
+ clock-indices = <1>, <3>;
+ clock-output-names = "clka", "clkb";
+ }
+
+ This ensures we do not have any empty strings in clock-output-names
+
+
+==Clock consumers==
+
+Required properties:
+clocks: List of phandle and clock specifier pairs, one pair
+ for each clock input to the device. Note: if the
+ clock provider specifies '0' for #clock-cells, then
+ only the phandle portion of the pair will appear.
+
+Optional properties:
+clock-names: List of clock input name strings sorted in the same
+ order as the clocks property. Consumers drivers
+ will use clock-names to match clock input names
+ with clocks specifiers.
+clock-ranges: Empty property indicating that child nodes can inherit named
+ clocks from this node. Useful for bus nodes to provide a
+ clock to their children.
+
+For example:
+
+ device {
+ clocks = <&osc 1>, <&ref 0>;
+ clock-names = "baud", "register";
+ };
+
+
+This represents a device with two clock inputs, named "baud" and "register".
+The baud clock is connected to output 1 of the &osc device, and the register
+clock is connected to output 0 of the &ref.
+
+==Example==
+
+ /* external oscillator */
+ osc: oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <1>;
+ clock-frequency = <32678>;
+ clock-output-names = "osc";
+ };
+
+ /* phase-locked-loop device, generates a higher frequency clock
+ * from the external oscillator reference */
+ pll: pll@4c000 {
+ compatible = "vendor,some-pll-interface"
+ #clock-cells = <1>;
+ clocks = <&osc 0>;
+ clock-names = "ref";
+ reg = <0x4c000 0x1000>;
+ clock-output-names = "pll", "pll-switched";
+ };
+
+ /* UART, using the low frequency oscillator for the baud clock,
+ * and the high frequency switched PLL output for register
+ * clocking */
+ uart@a000 {
+ compatible = "fsl,imx-uart";
+ reg = <0xa000 0x1000>;
+ interrupts = <33>;
+ clocks = <&osc 0>, <&pll 1>;
+ clock-names = "baud", "register";
+ };
+
+This DT fragment defines three devices: an external oscillator to provide a
+low-frequency reference clock, a PLL device to generate a higher frequency
+clock signal, and a UART.
+
+* The oscillator is fixed-frequency, and provides one clock output, named "osc".
+* The PLL is both a clock provider and a clock consumer. It uses the clock
+ signal generated by the external oscillator, and provides two output signals
+ ("pll" and "pll-switched").
+* The UART has its baud clock connected the external oscillator and its
+ register clock connected to the PLL clock (the "pll-switched" signal)
+
+==Assigned clock parents and rates==
+
+Some platforms may require initial configuration of default parent clocks
+and clock frequencies. Such a configuration can be specified in a device tree
+node through assigned-clocks, assigned-clock-parents and assigned-clock-rates
+properties. The assigned-clock-parents property should contain a list of parent
+clocks in form of phandle and clock specifier pairs, the assigned-clock-parents
+property the list of assigned clock frequency values - corresponding to clocks
+listed in the assigned-clocks property.
+
+To skip setting parent or rate of a clock its corresponding entry should be
+set to 0, or can be omitted if it is not followed by any non-zero entry.
+
+ uart@a000 {
+ compatible = "fsl,imx-uart";
+ reg = <0xa000 0x1000>;
+ ...
+ clocks = <&osc 0>, <&pll 1>;
+ clock-names = "baud", "register";
+
+ assigned-clocks = <&clkcon 0>, <&pll 2>;
+ assigned-clock-parents = <&pll 2>;
+ assigned-clock-rates = <0>, <460800>;
+ };
+
+In this example the <&pll 2> clock is set as parent of clock <&clkcon 0> and
+the <&pll 2> clock is assigned a frequency value of 460800 Hz.
+
+Configuring a clock's parent and rate through the device node that consumes
+the clock can be done only for clocks that have a single user. Specifying
+conflicting parent or rate configuration in multiple consumer nodes for
+a shared clock is forbidden.
+
+Configuration of common clocks, which affect multiple consumer devices can
+be similarly specified in the clock provider node.
diff --git a/Documentation/devicetree/bindings/clock/clps711x-clock.txt b/Documentation/devicetree/bindings/clock/clps711x-clock.txt
new file mode 100644
index 000000000..ce5a7476f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/clps711x-clock.txt
@@ -0,0 +1,19 @@
+* Clock bindings for the Cirrus Logic CLPS711X CPUs
+
+Required properties:
+- compatible : Shall contain "cirrus,clps711x-clk".
+- reg : Address of the internal register set.
+- startup-frequency: Factory set CPU startup frequency in HZ.
+- #clock-cells : Should be <1>.
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/clps711x-clock.h
+for the full list of CLPS711X clock IDs.
+
+Example:
+ clks: clks@80000000 {
+ #clock-cells = <1>;
+ compatible = "cirrus,ep7312-clk", "cirrus,clps711x-clk";
+ reg = <0x80000000 0xc000>;
+ startup-frequency = <73728000>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/efm32-clock.txt b/Documentation/devicetree/bindings/clock/efm32-clock.txt
new file mode 100644
index 000000000..263d293f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/efm32-clock.txt
@@ -0,0 +1,11 @@
+* Clock bindings for Energy Micro efm32 Giant Gecko's Clock Management Unit
+
+Required properties:
+- compatible: Should be "efm32gg,cmu"
+- reg: Base address and length of the register set
+- interrupts: Interrupt used by the CMU
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock ID in
+its "clocks" phandle cell. The header efm32-clk.h contains a list of available
+IDs.
diff --git a/Documentation/devicetree/bindings/clock/emev2-clock.txt b/Documentation/devicetree/bindings/clock/emev2-clock.txt
new file mode 100644
index 000000000..60bbb1a8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/emev2-clock.txt
@@ -0,0 +1,98 @@
+Device tree Clock bindings for Renesas EMMA Mobile EV2
+
+This binding uses the common clock binding.
+
+* SMU
+System Management Unit described in user's manual R19UH0037EJ1000_SMU.
+This is not a clock provider, but clocks under SMU depend on it.
+
+Required properties:
+- compatible: Should be "renesas,emev2-smu"
+- reg: Address and Size of SMU registers
+
+* SMU_CLKDIV
+Function block with an input mux and a divider, which corresponds to
+"Serial clock generator" in fig."Clock System Overview" of the manual,
+and "xxx frequency division setting register" (XXXCLKDIV) registers.
+This makes internal (neither input nor output) clock that is provided
+to input of xxxGCLK block.
+
+Required properties:
+- compatible: Should be "renesas,emev2-smu-clkdiv"
+- reg: Byte offset from SMU base and Bit position in the register
+- clocks: Parent clocks. Input clocks as described in clock-bindings.txt
+- #clock-cells: Should be <0>
+
+* SMU_GCLK
+Clock gating node shown as "Clock stop processing block" in the
+fig."Clock System Overview" of the manual.
+Registers are "xxx clock gate control register" (XXXGCLKCTRL).
+
+Required properties:
+- compatible: Should be "renesas,emev2-smu-gclk"
+- reg: Byte offset from SMU base and Bit position in the register
+- clocks: Input clock as described in clock-bindings.txt
+- #clock-cells: Should be <0>
+
+Example of provider:
+
+usia_u0_sclkdiv: usia_u0_sclkdiv {
+ compatible = "renesas,emev2-smu-clkdiv";
+ reg = <0x610 0>;
+ clocks = <&pll3_fo>, <&pll4_fo>, <&pll1_fo>, <&osc1_fo>;
+ #clock-cells = <0>;
+};
+
+usia_u0_sclk: usia_u0_sclk {
+ compatible = "renesas,emev2-smu-gclk";
+ reg = <0x4a0 1>;
+ clocks = <&usia_u0_sclkdiv>;
+ #clock-cells = <0>;
+};
+
+Example of consumer:
+
+uart@e1020000 {
+ compatible = "renesas,em-uart";
+ reg = <0xe1020000 0x38>;
+ interrupts = <0 8 0>;
+ clocks = <&usia_u0_sclk>;
+ clock-names = "sclk";
+};
+
+Example of clock-tree description:
+
+ This describes a clock path in the clock tree
+ c32ki -> pll3_fo -> usia_u0_sclkdiv -> usia_u0_sclk
+
+smu@e0110000 {
+ compatible = "renesas,emev2-smu";
+ reg = <0xe0110000 0x10000>;
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ c32ki: c32ki {
+ compatible = "fixed-clock";
+ clock-frequency = <32768>;
+ #clock-cells = <0>;
+ };
+ pll3_fo: pll3_fo {
+ compatible = "fixed-factor-clock";
+ clocks = <&c32ki>;
+ clock-div = <1>;
+ clock-mult = <7000>;
+ #clock-cells = <0>;
+ };
+ usia_u0_sclkdiv: usia_u0_sclkdiv {
+ compatible = "renesas,emev2-smu-clkdiv";
+ reg = <0x610 0>;
+ clocks = <&pll3_fo>;
+ #clock-cells = <0>;
+ };
+ usia_u0_sclk: usia_u0_sclk {
+ compatible = "renesas,emev2-smu-gclk";
+ reg = <0x4a0 1>;
+ clocks = <&usia_u0_sclkdiv>;
+ #clock-cells = <0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/exynos3250-clock.txt b/Documentation/devicetree/bindings/clock/exynos3250-clock.txt
new file mode 100644
index 000000000..f1738b88c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos3250-clock.txt
@@ -0,0 +1,57 @@
+* Samsung Exynos3250 Clock Controller
+
+The Exynos3250 clock controller generates and supplies clock to various
+controllers within the Exynos3250 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,exynos3250-cmu" - controller compatible with Exynos3250 SoC.
+ - "samsung,exynos3250-cmu-dmc" - controller compatible with
+ Exynos3250 SoC for Dynamic Memory Controller domain.
+ - "samsung,exynos3250-cmu-isp" - ISP block clock controller compatible
+ with Exynos3250 SOC
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos3250.h header and can be used in device
+tree sources.
+
+Example 1: Examples of clock controller nodes are listed below.
+
+ cmu: clock-controller@10030000 {
+ compatible = "samsung,exynos3250-cmu";
+ reg = <0x10030000 0x20000>;
+ #clock-cells = <1>;
+ };
+
+ cmu_dmc: clock-controller@105C0000 {
+ compatible = "samsung,exynos3250-cmu-dmc";
+ reg = <0x105C0000 0x2000>;
+ #clock-cells = <1>;
+ };
+
+ cmu_isp: clock-controller@10048000 {
+ compatible = "samsung,exynos3250-cmu-isp";
+ reg = <0x10048000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+Example 2: UART controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+ serial@13800000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x13800000 0x100>;
+ interrupts = <0 109 0>;
+ clocks = <&cmu CLK_UART0>, <&cmu CLK_SCLK_UART0>;
+ clock-names = "uart", "clk_uart_baud0";
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos4-clock.txt b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
new file mode 100644
index 000000000..f5a5b19ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos4-clock.txt
@@ -0,0 +1,43 @@
+* Samsung Exynos4 Clock Controller
+
+The Exynos4 clock controller generates and supplies clock to various controllers
+within the Exynos4 SoC. The clock binding described here is applicable to all
+SoC's in the Exynos4 family.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,exynos4210-clock" - controller compatible with Exynos4210 SoC.
+ - "samsung,exynos4412-clock" - controller compatible with Exynos4412 SoC.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos4.h header and can be used in device
+tree sources.
+
+Example 1: An example of a clock controller node is listed below.
+
+ clock: clock-controller@0x10030000 {
+ compatible = "samsung,exynos4210-clock";
+ reg = <0x10030000 0x20000>;
+ #clock-cells = <1>;
+ };
+
+Example 2: UART controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+ serial@13820000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x13820000 0x100>;
+ interrupts = <0 54 0>;
+ clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
+ clock-names = "uart", "clk_uart_baud0";
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos4415-clock.txt b/Documentation/devicetree/bindings/clock/exynos4415-clock.txt
new file mode 100644
index 000000000..847d98bae
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos4415-clock.txt
@@ -0,0 +1,38 @@
+* Samsung Exynos4415 Clock Controller
+
+The Exynos4415 clock controller generates and supplies clock to various
+consumer devices within the Exynos4415 SoC.
+
+Required properties:
+
+- compatible: should be one of the following:
+ - "samsung,exynos4415-cmu" - for the main system clocks controller
+ (CMU_LEFTBUS, CMU_RIGHTBUS, CMU_TOP, CMU_CPU clock domains).
+ - "samsung,exynos4415-cmu-dmc" - for the Exynos4415 SoC DRAM Memory
+ Controller (DMC) domain clock controller.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos4415.h header and can be used in device
+tree sources.
+
+Example 1: An example of a clock controller node is listed below.
+
+ cmu: clock-controller@10030000 {
+ compatible = "samsung,exynos4415-cmu";
+ reg = <0x10030000 0x18000>;
+ #clock-cells = <1>;
+ };
+
+ cmu-dmc: clock-controller@105C0000 {
+ compatible = "samsung,exynos4415-cmu-dmc";
+ reg = <0x105C0000 0x3000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos5250-clock.txt b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
new file mode 100644
index 000000000..536eacd10
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos5250-clock.txt
@@ -0,0 +1,41 @@
+* Samsung Exynos5250 Clock Controller
+
+The Exynos5250 clock controller generates and supplies clock to various
+controllers within the Exynos5250 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,exynos5250-clock" - controller compatible with Exynos5250 SoC.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos5250.h header and can be used in device
+tree sources.
+
+Example 1: An example of a clock controller node is listed below.
+
+ clock: clock-controller@0x10010000 {
+ compatible = "samsung,exynos5250-clock";
+ reg = <0x10010000 0x30000>;
+ #clock-cells = <1>;
+ };
+
+Example 2: UART controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+ serial@13820000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x13820000 0x100>;
+ interrupts = <0 54 0>;
+ clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
+ clock-names = "uart", "clk_uart_baud0";
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos5260-clock.txt b/Documentation/devicetree/bindings/clock/exynos5260-clock.txt
new file mode 100644
index 000000000..5496b2fac
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos5260-clock.txt
@@ -0,0 +1,190 @@
+* Samsung Exynos5260 Clock Controller
+
+Exynos5260 has 13 clock controllers which are instantiated
+independently from the device-tree. These clock controllers
+generate and supply clocks to various hardware blocks within
+the SoC.
+
+Each clock is assigned an identifier and client nodes can use
+this identifier to specify the clock which they consume. All
+available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos5260-clk.h header and can be used in
+device tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It
+is expected that they are defined using standard clock bindings
+with following clock-output-names:
+
+ - "fin_pll" - PLL input clock from XXTI
+ - "xrtcxti" - input clock from XRTCXTI
+ - "ioclk_pcm_extclk" - pcm external operation clock
+ - "ioclk_spdif_extclk" - spdif external operation clock
+ - "ioclk_i2s_cdclk" - i2s0 codec clock
+
+Phy clocks:
+
+There are several clocks which are generated by specific PHYs.
+These clocks are fed into the clock controller and then routed to
+the hardware blocks. These clocks are defined as fixed clocks in the
+driver with following names:
+
+ - "phyclk_dptx_phy_ch3_txd_clk" - dp phy clock for channel 3
+ - "phyclk_dptx_phy_ch2_txd_clk" - dp phy clock for channel 2
+ - "phyclk_dptx_phy_ch1_txd_clk" - dp phy clock for channel 1
+ - "phyclk_dptx_phy_ch0_txd_clk" - dp phy clock for channel 0
+ - "phyclk_hdmi_phy_tmds_clko" - hdmi phy tmds clock
+ - "phyclk_hdmi_phy_pixel_clko" - hdmi phy pixel clock
+ - "phyclk_hdmi_link_o_tmds_clkhi" - hdmi phy for hdmi link
+ - "phyclk_dptx_phy_o_ref_clk_24m" - dp phy reference clock
+ - "phyclk_dptx_phy_clk_div2"
+ - "phyclk_mipi_dphy_4l_m_rxclkesc0"
+ - "phyclk_usbhost20_phy_phyclock" - usb 2.0 phy clock
+ - "phyclk_usbhost20_phy_freeclk"
+ - "phyclk_usbhost20_phy_clk48mohci"
+ - "phyclk_usbdrd30_udrd30_pipe_pclk"
+ - "phyclk_usbdrd30_udrd30_phyclock" - usb 3.0 phy clock
+
+Required Properties for Clock Controller:
+
+ - compatible: should be one of the following.
+ 1) "samsung,exynos5260-clock-top"
+ 2) "samsung,exynos5260-clock-peri"
+ 3) "samsung,exynos5260-clock-egl"
+ 4) "samsung,exynos5260-clock-kfc"
+ 5) "samsung,exynos5260-clock-g2d"
+ 6) "samsung,exynos5260-clock-mif"
+ 7) "samsung,exynos5260-clock-mfc"
+ 8) "samsung,exynos5260-clock-g3d"
+ 9) "samsung,exynos5260-clock-fsys"
+ 10) "samsung,exynos5260-clock-aud"
+ 11) "samsung,exynos5260-clock-isp"
+ 12) "samsung,exynos5260-clock-gscl"
+ 13) "samsung,exynos5260-clock-disp"
+
+ - reg: physical base address of the controller and the length of
+ memory mapped region.
+
+ - #clock-cells: should be 1.
+
+ - clocks: list of clock identifiers which are fed as the input to
+ the given clock controller. Please refer the next section to find
+ the input clocks for a given controller.
+
+ - clock-names: list of names of clocks which are fed as the input
+ to the given clock controller.
+
+Input clocks for top clock controller:
+ - fin_pll
+ - dout_mem_pll
+ - dout_bus_pll
+ - dout_media_pll
+
+Input clocks for peri clock controller:
+ - fin_pll
+ - ioclk_pcm_extclk
+ - ioclk_i2s_cdclk
+ - ioclk_spdif_extclk
+ - phyclk_hdmi_phy_ref_cko
+ - dout_aclk_peri_66
+ - dout_sclk_peri_uart0
+ - dout_sclk_peri_uart1
+ - dout_sclk_peri_uart2
+ - dout_sclk_peri_spi0_b
+ - dout_sclk_peri_spi1_b
+ - dout_sclk_peri_spi2_b
+ - dout_aclk_peri_aud
+ - dout_sclk_peri_spi0_b
+
+Input clocks for egl clock controller:
+ - fin_pll
+ - dout_bus_pll
+
+Input clocks for kfc clock controller:
+ - fin_pll
+ - dout_media_pll
+
+Input clocks for g2d clock controller:
+ - fin_pll
+ - dout_aclk_g2d_333
+
+Input clocks for mif clock controller:
+ - fin_pll
+
+Input clocks for mfc clock controller:
+ - fin_pll
+ - dout_aclk_mfc_333
+
+Input clocks for g3d clock controller:
+ - fin_pll
+
+Input clocks for fsys clock controller:
+ - fin_pll
+ - phyclk_usbhost20_phy_phyclock
+ - phyclk_usbhost20_phy_freeclk
+ - phyclk_usbhost20_phy_clk48mohci
+ - phyclk_usbdrd30_udrd30_pipe_pclk
+ - phyclk_usbdrd30_udrd30_phyclock
+ - dout_aclk_fsys_200
+
+Input clocks for aud clock controller:
+ - fin_pll
+ - fout_aud_pll
+ - ioclk_i2s_cdclk
+ - ioclk_pcm_extclk
+
+Input clocks for isp clock controller:
+ - fin_pll
+ - dout_aclk_isp1_266
+ - dout_aclk_isp1_400
+ - mout_aclk_isp1_266
+
+Input clocks for gscl clock controller:
+ - fin_pll
+ - dout_aclk_gscl_400
+ - dout_aclk_gscl_333
+
+Input clocks for disp clock controller:
+ - fin_pll
+ - phyclk_dptx_phy_ch3_txd_clk
+ - phyclk_dptx_phy_ch2_txd_clk
+ - phyclk_dptx_phy_ch1_txd_clk
+ - phyclk_dptx_phy_ch0_txd_clk
+ - phyclk_hdmi_phy_tmds_clko
+ - phyclk_hdmi_phy_ref_clko
+ - phyclk_hdmi_phy_pixel_clko
+ - phyclk_hdmi_link_o_tmds_clkhi
+ - phyclk_mipi_dphy_4l_m_txbyte_clkhs
+ - phyclk_dptx_phy_o_ref_clk_24m
+ - phyclk_dptx_phy_clk_div2
+ - phyclk_mipi_dphy_4l_m_rxclkesc0
+ - phyclk_hdmi_phy_ref_cko
+ - ioclk_spdif_extclk
+ - dout_aclk_peri_aud
+ - dout_aclk_disp_222
+ - dout_sclk_disp_pixel
+ - dout_aclk_disp_333
+
+Example 1: An example of a clock controller node is listed below.
+
+ clock_mfc: clock-controller@11090000 {
+ compatible = "samsung,exynos5260-clock-mfc";
+ clock = <&fin_pll>, <&clock_top TOP_DOUT_ACLK_MFC_333>;
+ clock-names = "fin_pll", "dout_aclk_mfc_333";
+ reg = <0x11090000 0x10000>;
+ #clock-cells = <1>;
+ };
+
+Example 2: UART controller node that consumes the clock generated by the
+ peri clock controller. Refer to the standard clock bindings for
+ information about 'clocks' and 'clock-names' property.
+
+ serial@12C00000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x12C00000 0x100>;
+ interrupts = <0 146 0>;
+ clocks = <&clock_peri PERI_PCLK_UART0>, <&clock_peri PERI_SCLK_UART0>;
+ clock-names = "uart", "clk_uart_baud0";
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/exynos5410-clock.txt b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
new file mode 100644
index 000000000..aeab635b0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos5410-clock.txt
@@ -0,0 +1,45 @@
+* Samsung Exynos5410 Clock Controller
+
+The Exynos5410 clock controller generates and supplies clock to various
+controllers within the Exynos5410 SoC.
+
+Required Properties:
+
+- compatible: should be "samsung,exynos5410-clock"
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos5410.h header and can be used in device
+tree sources.
+
+External clock:
+
+There is clock that is generated outside the SoC. It
+is expected that it is defined using standard clock bindings
+with following clock-output-name:
+
+ - "fin_pll" - PLL input clock from XXTI
+
+Example 1: An example of a clock controller node is listed below.
+
+ clock: clock-controller@0x10010000 {
+ compatible = "samsung,exynos5410-clock";
+ reg = <0x10010000 0x30000>;
+ #clock-cells = <1>;
+ };
+
+Example 2: UART controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+ serial@12C20000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x12C00000 0x100>;
+ interrupts = <0 51 0>;
+ clocks = <&clock CLK_UART0>, <&clock CLK_SCLK_UART0>;
+ clock-names = "uart", "clk_uart_baud0";
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos5420-clock.txt b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
new file mode 100644
index 000000000..d54f42cf0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos5420-clock.txt
@@ -0,0 +1,42 @@
+* Samsung Exynos5420 Clock Controller
+
+The Exynos5420 clock controller generates and supplies clock to various
+controllers within the Exynos5420 SoC and for the Exynos5800 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,exynos5420-clock" - controller compatible with Exynos5420 SoC.
+ - "samsung,exynos5800-clock" - controller compatible with Exynos5800 SoC.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos5420.h header and can be used in device
+tree sources.
+
+Example 1: An example of a clock controller node is listed below.
+
+ clock: clock-controller@0x10010000 {
+ compatible = "samsung,exynos5420-clock";
+ reg = <0x10010000 0x30000>;
+ #clock-cells = <1>;
+ };
+
+Example 2: UART controller node that consumes the clock generated by the clock
+ controller. Refer to the standard clock bindings for information
+ about 'clocks' and 'clock-names' property.
+
+ serial@13820000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x13820000 0x100>;
+ interrupts = <0 54 0>;
+ clocks = <&clock CLK_UART2>, <&clock CLK_SCLK_UART2>;
+ clock-names = "uart", "clk_uart_baud0";
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos5433-clock.txt b/Documentation/devicetree/bindings/clock/exynos5433-clock.txt
new file mode 100644
index 000000000..63379b04e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos5433-clock.txt
@@ -0,0 +1,462 @@
+* Samsung Exynos5433 CMU (Clock Management Units)
+
+The Exynos5433 clock controller generates and supplies clock to various
+controllers within the Exynos5433 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,exynos5433-cmu-top" - clock controller compatible for CMU_TOP
+ which generates clocks for IMEM/FSYS/G3D/GSCL/HEVC/MSCL/G2D/MFC/PERIC/PERIS
+ domains and bus clocks.
+ - "samsung,exynos5433-cmu-cpif" - clock controller compatible for CMU_CPIF
+ which generates clocks for LLI (Low Latency Interface) IP.
+ - "samsung,exynos5433-cmu-mif" - clock controller compatible for CMU_MIF
+ which generates clocks for DRAM Memory Controller domain.
+ - "samsung,exynos5433-cmu-peric" - clock controller compatible for CMU_PERIC
+ which generates clocks for UART/I2C/SPI/I2S/PCM/SPDIF/PWM/SLIMBUS IPs.
+ - "samsung,exynos5433-cmu-peris" - clock controller compatible for CMU_PERIS
+ which generates clocks for PMU/TMU/MCT/WDT/RTC/SECKEY/TZPC IPs.
+ - "samsung,exynos5433-cmu-fsys" - clock controller compatible for CMU_FSYS
+ which generates clocks for USB/UFS/SDMMC/TSI/PDMA IPs.
+ - "samsung,exynos5433-cmu-g2d" - clock controller compatible for CMU_G2D
+ which generates clocks for G2D/MDMA IPs.
+ - "samsung,exynos5433-cmu-disp" - clock controller compatible for CMU_DISP
+ which generates clocks for Display (DECON/HDMI/DSIM/MIXER) IPs.
+ - "samsung,exynos5433-cmu-aud" - clock controller compatible for CMU_AUD
+ which generates clocks for Cortex-A5/BUS/AUDIO clocks.
+ - "samsung,exynos5433-cmu-bus0", "samsung,exynos5433-cmu-bus1"
+ and "samsung,exynos5433-cmu-bus2" - clock controller compatible for CMU_BUS
+ which generates global data buses clock and global peripheral buses clock.
+ - "samsung,exynos5433-cmu-g3d" - clock controller compatible for CMU_G3D
+ which generates clocks for 3D Graphics Engine IP.
+ - "samsung,exynos5433-cmu-gscl" - clock controller compatible for CMU_GSCL
+ which generates clocks for GSCALER IPs.
+ - "samsung,exynos5433-cmu-apollo"- clock controller compatible for CMU_APOLLO
+ which generates clocks for Cortex-A53 Quad-core processor.
+ - "samsung,exynos5433-cmu-atlas" - clock controller compatible for CMU_ATLAS
+ which generates clocks for Cortex-A57 Quad-core processor, CoreSight and
+ L2 cache controller.
+ - "samsung,exynos5433-cmu-mscl" - clock controller compatible for CMU_MSCL
+ which generates clocks for M2M (Memory to Memory) scaler and JPEG IPs.
+ - "samsung,exynos5433-cmu-mfc" - clock controller compatible for CMU_MFC
+ which generates clocks for MFC(Multi-Format Codec) IP.
+ - "samsung,exynos5433-cmu-hevc" - clock controller compatible for CMU_HEVC
+ which generates clocks for HEVC(High Efficiency Video Codec) decoder IP.
+ - "samsung,exynos5433-cmu-isp" - clock controller compatible for CMU_ISP
+ which generates clocks for FIMC-ISP/DRC/SCLC/DIS/3DNR IPs.
+ - "samsung,exynos5433-cmu-cam0" - clock controller compatible for CMU_CAM0
+ which generates clocks for MIPI_CSIS{0|1}/FIMC_LITE_{A|B|D}/FIMC_3AA{0|1}
+ IPs.
+ - "samsung,exynos5433-cmu-cam1" - clock controller compatible for CMU_CAM1
+ which generates clocks for Cortex-A5/MIPI_CSIS2/FIMC-LITE_C/FIMC-FD IPs.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+- clocks: list of the clock controller input clock identifiers,
+ from common clock bindings. Please refer the next section
+ to find the input clocks for a given controller.
+
+- clock-names: list of the clock controller input clock names,
+ as described in clock-bindings.txt.
+
+ Input clocks for top clock controller:
+ - oscclk
+ - sclk_mphy_pll
+ - sclk_mfc_pll
+ - sclk_bus_pll
+
+ Input clocks for cpif clock controller:
+ - oscclk
+
+ Input clocks for mif clock controller:
+ - oscclk
+ - sclk_mphy_pll
+
+ Input clocks for fsys clock controller:
+ - oscclk
+ - sclk_ufs_mphy
+ - div_aclk_fsys_200
+ - sclk_pcie_100_fsys
+ - sclk_ufsunipro_fsys
+ - sclk_mmc2_fsys
+ - sclk_mmc1_fsys
+ - sclk_mmc0_fsys
+ - sclk_usbhost30_fsys
+ - sclk_usbdrd30_fsys
+
+ Input clocks for g2d clock controller:
+ - oscclk
+ - aclk_g2d_266
+ - aclk_g2d_400
+
+ Input clocks for disp clock controller:
+ - oscclk
+ - sclk_dsim1_disp
+ - sclk_dsim0_disp
+ - sclk_dsd_disp
+ - sclk_decon_tv_eclk_disp
+ - sclk_decon_vclk_disp
+ - sclk_decon_eclk_disp
+ - sclk_decon_tv_vclk_disp
+ - aclk_disp_333
+
+ Input clocks for bus0 clock controller:
+ - aclk_bus0_400
+
+ Input clocks for bus1 clock controller:
+ - aclk_bus1_400
+
+ Input clocks for bus2 clock controller:
+ - oscclk
+ - aclk_bus2_400
+
+ Input clocks for g3d clock controller:
+ - oscclk
+ - aclk_g3d_400
+
+ Input clocks for gscl clock controller:
+ - oscclk
+ - aclk_gscl_111
+ - aclk_gscl_333
+
+ Input clocks for apollo clock controller:
+ - oscclk
+ - sclk_bus_pll_apollo
+
+ Input clocks for atlas clock controller:
+ - oscclk
+ - sclk_bus_pll_atlas
+
+ Input clocks for mscl clock controller:
+ - oscclk
+ - sclk_jpeg_mscl
+ - aclk_mscl_400
+
+ Input clocks for mfc clock controller:
+ - oscclk
+ - aclk_mfc_400
+
+ Input clocks for hevc clock controller:
+ - oscclk
+ - aclk_hevc_400
+
+ Input clocks for isp clock controller:
+ - oscclk
+ - aclk_isp_dis_400
+ - aclk_isp_400
+
+ Input clocks for cam0 clock controller:
+ - oscclk
+ - aclk_cam0_333
+ - aclk_cam0_400
+ - aclk_cam0_552
+
+ Input clocks for cam1 clock controller:
+ - oscclk
+ - sclk_isp_uart_cam1
+ - sclk_isp_spi1_cam1
+ - sclk_isp_spi0_cam1
+ - aclk_cam1_333
+ - aclk_cam1_400
+ - aclk_cam1_552
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos5433.h header and can be used in device
+tree sources.
+
+Example 1: Examples of 'oscclk' source clock node are listed below.
+
+ xxti: xxti {
+ compatible = "fixed-clock";
+ clock-output-names = "oscclk";
+ #clock-cells = <0>;
+ };
+
+Example 2: Examples of clock controller nodes are listed below.
+
+ cmu_top: clock-controller@10030000 {
+ compatible = "samsung,exynos5433-cmu-top";
+ reg = <0x10030000 0x0c04>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_mphy_pll",
+ "sclk_mfc_pll",
+ "sclk_bus_pll";
+ clocks = <&xxti>,
+ <&cmu_cpif CLK_SCLK_MPHY_PLL>,
+ <&cmu_mif CLK_SCLK_MFC_PLL>,
+ <&cmu_mif CLK_SCLK_BUS_PLL>;
+ };
+
+ cmu_cpif: clock-controller@10fc0000 {
+ compatible = "samsung,exynos5433-cmu-cpif";
+ reg = <0x10fc0000 0x0c04>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk";
+ clocks = <&xxti>;
+ };
+
+ cmu_mif: clock-controller@105b0000 {
+ compatible = "samsung,exynos5433-cmu-mif";
+ reg = <0x105b0000 0x100c>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_mphy_pll";
+ clocks = <&xxti>,
+ <&cmu_cpif CLK_SCLK_MPHY_PLL>;
+ };
+
+ cmu_peric: clock-controller@14c80000 {
+ compatible = "samsung,exynos5433-cmu-peric";
+ reg = <0x14c80000 0x0b08>;
+ #clock-cells = <1>;
+ };
+
+ cmu_peris: clock-controller@10040000 {
+ compatible = "samsung,exynos5433-cmu-peris";
+ reg = <0x10040000 0x0b20>;
+ #clock-cells = <1>;
+ };
+
+ cmu_fsys: clock-controller@156e0000 {
+ compatible = "samsung,exynos5433-cmu-fsys";
+ reg = <0x156e0000 0x0b04>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_ufs_mphy",
+ "div_aclk_fsys_200",
+ "sclk_pcie_100_fsys",
+ "sclk_ufsunipro_fsys",
+ "sclk_mmc2_fsys",
+ "sclk_mmc1_fsys",
+ "sclk_mmc0_fsys",
+ "sclk_usbhost30_fsys",
+ "sclk_usbdrd30_fsys";
+ clocks = <&xxti>,
+ <&cmu_cpif CLK_SCLK_UFS_MPHY>,
+ <&cmu_top CLK_DIV_ACLK_FSYS_200>,
+ <&cmu_top CLK_SCLK_PCIE_100_FSYS>,
+ <&cmu_top CLK_SCLK_UFSUNIPRO_FSYS>,
+ <&cmu_top CLK_SCLK_MMC2_FSYS>,
+ <&cmu_top CLK_SCLK_MMC1_FSYS>,
+ <&cmu_top CLK_SCLK_MMC0_FSYS>,
+ <&cmu_top CLK_SCLK_USBHOST30_FSYS>,
+ <&cmu_top CLK_SCLK_USBDRD30_FSYS>;
+ };
+
+ cmu_g2d: clock-controller@12460000 {
+ compatible = "samsung,exynos5433-cmu-g2d";
+ reg = <0x12460000 0x0b08>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "aclk_g2d_266",
+ "aclk_g2d_400";
+ clocks = <&xxti>,
+ <&cmu_top CLK_ACLK_G2D_266>,
+ <&cmu_top CLK_ACLK_G2D_400>;
+ };
+
+ cmu_disp: clock-controller@13b90000 {
+ compatible = "samsung,exynos5433-cmu-disp";
+ reg = <0x13b90000 0x0c04>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_dsim1_disp",
+ "sclk_dsim0_disp",
+ "sclk_dsd_disp",
+ "sclk_decon_tv_eclk_disp",
+ "sclk_decon_vclk_disp",
+ "sclk_decon_eclk_disp",
+ "sclk_decon_tv_vclk_disp",
+ "aclk_disp_333";
+ clocks = <&xxti>,
+ <&cmu_mif CLK_SCLK_DSIM1_DISP>,
+ <&cmu_mif CLK_SCLK_DSIM0_DISP>,
+ <&cmu_mif CLK_SCLK_DSD_DISP>,
+ <&cmu_mif CLK_SCLK_DECON_TV_ECLK_DISP>,
+ <&cmu_mif CLK_SCLK_DECON_VCLK_DISP>,
+ <&cmu_mif CLK_SCLK_DECON_ECLK_DISP>,
+ <&cmu_mif CLK_SCLK_DECON_TV_VCLK_DISP>,
+ <&cmu_mif CLK_ACLK_DISP_333>;
+ };
+
+ cmu_aud: clock-controller@114c0000 {
+ compatible = "samsung,exynos5433-cmu-aud";
+ reg = <0x114c0000 0x0b04>;
+ #clock-cells = <1>;
+ };
+
+ cmu_bus0: clock-controller@13600000 {
+ compatible = "samsung,exynos5433-cmu-bus0";
+ reg = <0x13600000 0x0b04>;
+ #clock-cells = <1>;
+
+ clock-names = "aclk_bus0_400";
+ clocks = <&cmu_top CLK_ACLK_BUS0_400>;
+ };
+
+ cmu_bus1: clock-controller@14800000 {
+ compatible = "samsung,exynos5433-cmu-bus1";
+ reg = <0x14800000 0x0b04>;
+ #clock-cells = <1>;
+
+ clock-names = "aclk_bus1_400";
+ clocks = <&cmu_top CLK_ACLK_BUS1_400>;
+ };
+
+ cmu_bus2: clock-controller@13400000 {
+ compatible = "samsung,exynos5433-cmu-bus2";
+ reg = <0x13400000 0x0b04>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "aclk_bus2_400";
+ clocks = <&xxti>, <&cmu_mif CLK_ACLK_BUS2_400>;
+ };
+
+ cmu_g3d: clock-controller@14aa0000 {
+ compatible = "samsung,exynos5433-cmu-g3d";
+ reg = <0x14aa0000 0x1000>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "aclk_g3d_400";
+ clocks = <&xxti>, <&cmu_top CLK_ACLK_G3D_400>;
+ };
+
+ cmu_gscl: clock-controller@13cf0000 {
+ compatible = "samsung,exynos5433-cmu-gscl";
+ reg = <0x13cf0000 0x0b10>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "aclk_gscl_111",
+ "aclk_gscl_333";
+ clocks = <&xxti>,
+ <&cmu_top CLK_ACLK_GSCL_111>,
+ <&cmu_top CLK_ACLK_GSCL_333>;
+ };
+
+ cmu_apollo: clock-controller@11900000 {
+ compatible = "samsung,exynos5433-cmu-apollo";
+ reg = <0x11900000 0x1088>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "sclk_bus_pll_apollo";
+ clocks = <&xxti>, <&cmu_mif CLK_SCLK_BUS_PLL_APOLLO>;
+ };
+
+ cmu_atlas: clock-controller@11800000 {
+ compatible = "samsung,exynos5433-cmu-atlas";
+ reg = <0x11800000 0x1088>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "sclk_bus_pll_atlas";
+ clocks = <&xxti>, <&cmu_mif CLK_SCLK_BUS_PLL_ATLAS>;
+ };
+
+ cmu_mscl: clock-controller@105d0000 {
+ compatible = "samsung,exynos5433-cmu-mscl";
+ reg = <0x105d0000 0x0b10>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_jpeg_mscl",
+ "aclk_mscl_400";
+ clocks = <&xxti>,
+ <&cmu_top CLK_SCLK_JPEG_MSCL>,
+ <&cmu_top CLK_ACLK_MSCL_400>;
+ };
+
+ cmu_mfc: clock-controller@15280000 {
+ compatible = "samsung,exynos5433-cmu-mfc";
+ reg = <0x15280000 0x0b08>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "aclk_mfc_400";
+ clocks = <&xxti>, <&cmu_top CLK_ACLK_MFC_400>;
+ };
+
+ cmu_hevc: clock-controller@14f80000 {
+ compatible = "samsung,exynos5433-cmu-hevc";
+ reg = <0x14f80000 0x0b08>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk", "aclk_hevc_400";
+ clocks = <&xxti>, <&cmu_top CLK_ACLK_HEVC_400>;
+ };
+
+ cmu_isp: clock-controller@146d0000 {
+ compatible = "samsung,exynos5433-cmu-isp";
+ reg = <0x146d0000 0x0b0c>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "aclk_isp_dis_400",
+ "aclk_isp_400";
+ clocks = <&xxti>,
+ <&cmu_top CLK_ACLK_ISP_DIS_400>,
+ <&cmu_top CLK_ACLK_ISP_400>;
+ };
+
+ cmu_cam0: clock-controller@120d0000 {
+ compatible = "samsung,exynos5433-cmu-cam0";
+ reg = <0x120d0000 0x0b0c>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "aclk_cam0_333",
+ "aclk_cam0_400",
+ "aclk_cam0_552";
+ clocks = <&xxti>,
+ <&cmu_top CLK_ACLK_CAM0_333>,
+ <&cmu_top CLK_ACLK_CAM0_400>,
+ <&cmu_top CLK_ACLK_CAM0_552>;
+ };
+
+ cmu_cam1: clock-controller@145d0000 {
+ compatible = "samsung,exynos5433-cmu-cam1";
+ reg = <0x145d0000 0x0b08>;
+ #clock-cells = <1>;
+
+ clock-names = "oscclk",
+ "sclk_isp_uart_cam1",
+ "sclk_isp_spi1_cam1",
+ "sclk_isp_spi0_cam1",
+ "aclk_cam1_333",
+ "aclk_cam1_400",
+ "aclk_cam1_552";
+ clocks = <&xxti>,
+ <&cmu_top CLK_SCLK_ISP_UART_CAM1>,
+ <&cmu_top CLK_SCLK_ISP_SPI1_CAM1>,
+ <&cmu_top CLK_SCLK_ISP_SPI0_CAM1>,
+ <&cmu_top CLK_ACLK_CAM1_333>,
+ <&cmu_top CLK_ACLK_CAM1_400>,
+ <&cmu_top CLK_ACLK_CAM1_552>;
+ };
+
+Example 3: UART controller node that consumes the clock generated by the clock
+ controller.
+
+ serial_0: serial@14C10000 {
+ compatible = "samsung,exynos5433-uart";
+ reg = <0x14C10000 0x100>;
+ interrupts = <0 421 0>;
+ clocks = <&cmu_peric CLK_PCLK_UART0>,
+ <&cmu_peric CLK_SCLK_UART0>;
+ clock-names = "uart", "clk_uart_baud0";
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart0_bus>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos5440-clock.txt b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
new file mode 100644
index 000000000..5f7005f73
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos5440-clock.txt
@@ -0,0 +1,28 @@
+* Samsung Exynos5440 Clock Controller
+
+The Exynos5440 clock controller generates and supplies clock to various
+controllers within the Exynos5440 SoC.
+
+Required Properties:
+
+- compatible: should be "samsung,exynos5440-clock".
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos5440.h header and can be used in device
+tree sources.
+
+Example: An example of a clock controller node is listed below.
+
+ clock: clock-controller@0x10010000 {
+ compatible = "samsung,exynos5440-clock";
+ reg = <0x160000 0x10000>;
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/exynos7-clock.txt b/Documentation/devicetree/bindings/clock/exynos7-clock.txt
new file mode 100644
index 000000000..6bf1e7493
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos7-clock.txt
@@ -0,0 +1,108 @@
+* Samsung Exynos7 Clock Controller
+
+Exynos7 clock controller has various blocks which are instantiated
+independently from the device-tree. These clock controllers
+generate and supply clocks to various hardware blocks within
+the SoC.
+
+Each clock is assigned an identifier and client nodes can use
+this identifier to specify the clock which they consume. All
+available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos7-clk.h header and can be used in
+device tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It
+is expected that they are defined using standard clock bindings
+with following clock-output-names:
+
+ - "fin_pll" - PLL input clock from XXTI
+
+Required Properties for Clock Controller:
+
+ - compatible: clock controllers will use one of the following
+ compatible strings to indicate the clock controller
+ functionality.
+
+ - "samsung,exynos7-clock-topc"
+ - "samsung,exynos7-clock-top0"
+ - "samsung,exynos7-clock-top1"
+ - "samsung,exynos7-clock-ccore"
+ - "samsung,exynos7-clock-peric0"
+ - "samsung,exynos7-clock-peric1"
+ - "samsung,exynos7-clock-peris"
+ - "samsung,exynos7-clock-fsys0"
+ - "samsung,exynos7-clock-fsys1"
+ - "samsung,exynos7-clock-mscl"
+ - "samsung,exynos7-clock-aud"
+
+ - reg: physical base address of the controller and the length of
+ memory mapped region.
+
+ - #clock-cells: should be 1.
+
+ - clocks: list of clock identifiers which are fed as the input to
+ the given clock controller. Please refer the next section to
+ find the input clocks for a given controller.
+
+- clock-names: list of names of clocks which are fed as the input
+ to the given clock controller.
+
+Input clocks for top0 clock controller:
+ - fin_pll
+ - dout_sclk_bus0_pll
+ - dout_sclk_bus1_pll
+ - dout_sclk_cc_pll
+ - dout_sclk_mfc_pll
+ - dout_sclk_aud_pll
+
+Input clocks for top1 clock controller:
+ - fin_pll
+ - dout_sclk_bus0_pll
+ - dout_sclk_bus1_pll
+ - dout_sclk_cc_pll
+ - dout_sclk_mfc_pll
+
+Input clocks for ccore clock controller:
+ - fin_pll
+ - dout_aclk_ccore_133
+
+Input clocks for peric0 clock controller:
+ - fin_pll
+ - dout_aclk_peric0_66
+ - sclk_uart0
+
+Input clocks for peric1 clock controller:
+ - fin_pll
+ - dout_aclk_peric1_66
+ - sclk_uart1
+ - sclk_uart2
+ - sclk_uart3
+ - sclk_spi0
+ - sclk_spi1
+ - sclk_spi2
+ - sclk_spi3
+ - sclk_spi4
+ - sclk_i2s1
+ - sclk_pcm1
+ - sclk_spdif
+
+Input clocks for peris clock controller:
+ - fin_pll
+ - dout_aclk_peris_66
+
+Input clocks for fsys0 clock controller:
+ - fin_pll
+ - dout_aclk_fsys0_200
+ - dout_sclk_mmc2
+
+Input clocks for fsys1 clock controller:
+ - fin_pll
+ - dout_aclk_fsys1_200
+ - dout_sclk_mmc0
+ - dout_sclk_mmc1
+
+Input clocks for aud clock controller:
+ - fin_pll
+ - fout_aud_pll
diff --git a/Documentation/devicetree/bindings/clock/fixed-clock.txt b/Documentation/devicetree/bindings/clock/fixed-clock.txt
new file mode 100644
index 000000000..0641a663a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fixed-clock.txt
@@ -0,0 +1,23 @@
+Binding for simple fixed-rate clock sources.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "fixed-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clock-frequency : frequency of clock in Hz. Should be a single cell.
+
+Optional properties:
+- clock-accuracy : accuracy of clock in ppb (parts per billion).
+ Should be a single cell.
+- clock-output-names : From common clock binding.
+
+Example:
+ clock {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <1000000000>;
+ clock-accuracy = <100>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt
new file mode 100644
index 000000000..1bae8527e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fixed-factor-clock.txt
@@ -0,0 +1,24 @@
+Binding for simple fixed factor rate clock sources.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "fixed-factor-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clock-div: fixed divider.
+- clock-mult: fixed multiplier.
+- clocks: parent clock.
+
+Optional properties:
+- clock-output-names : From common clock binding.
+
+Example:
+ clock {
+ compatible = "fixed-factor-clock";
+ clocks = <&parentclk>;
+ #clock-cells = <0>;
+ clock-div = <2>;
+ clock-mult = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt b/Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt
new file mode 100644
index 000000000..332396265
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/fujitsu,mb86s70-crg11.txt
@@ -0,0 +1,26 @@
+Fujitsu CRG11 clock driver bindings
+-----------------------------------
+
+Required properties :
+- compatible : Shall contain "fujitsu,mb86s70-crg11"
+- #clock-cells : Shall be 3 {cntrlr domain port}
+
+The consumer specifies the desired clock pointing to its phandle.
+
+Example:
+
+ clock: crg11 {
+ compatible = "fujitsu,mb86s70-crg11";
+ #clock-cells = <3>;
+ };
+
+ mhu: mhu0@2b1f0000 {
+ #mbox-cells = <1>;
+ compatible = "arm,mhu";
+ reg = <0 0x2B1F0000 0x1000>;
+ interrupts = <0 36 4>, /* LP Non-Sec */
+ <0 35 4>, /* HP Non-Sec */
+ <0 37 4>; /* Secure */
+ clocks = <&clock 0 2 1>; /* Cntrlr:0 Domain:2 Port:1 */
+ clock-names = "clk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
new file mode 100644
index 000000000..d3379ff9b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
@@ -0,0 +1,21 @@
+Binding for simple gpio gated clock.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "gpio-gate-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- enable-gpios : GPIO reference for enabling and disabling the clock.
+
+Optional properties:
+- clocks: Maximum of one parent clock is supported.
+
+Example:
+ clock {
+ compatible = "gpio-gate-clock";
+ clocks = <&parentclk>;
+ #clock-cells = <0>;
+ enable-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/hi3620-clock.txt b/Documentation/devicetree/bindings/clock/hi3620-clock.txt
new file mode 100644
index 000000000..dad6269f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/hi3620-clock.txt
@@ -0,0 +1,20 @@
+* Hisilicon Hi3620 Clock Controller
+
+The Hi3620 clock controller generates and supplies clock to various
+controllers within the Hi3620 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "hisilicon,hi3620-clock" - controller compatible with Hi3620 SoC.
+ - "hisilicon,hi3620-mmc-clock" - controller specific for Hi3620 mmc.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/hi3620-clock.h>.
diff --git a/Documentation/devicetree/bindings/clock/hix5hd2-clock.txt b/Documentation/devicetree/bindings/clock/hix5hd2-clock.txt
new file mode 100644
index 000000000..7894a6488
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/hix5hd2-clock.txt
@@ -0,0 +1,31 @@
+* Hisilicon Hix5hd2 Clock Controller
+
+The hix5hd2 clock controller generates and supplies clock to various
+controllers within the hix5hd2 SoC.
+
+Required Properties:
+
+- compatible: should be "hisilicon,hix5hd2-clock"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/hix5hd2-clock.h>.
+
+Examples:
+ clock: clock@f8a22000 {
+ compatible = "hisilicon,hix5hd2-clock";
+ reg = <0xf8a22000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ uart0: uart@f8b00000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0xf8b00000 0x1000>;
+ interrupts = <0 49 4>;
+ clocks = <&clock HIX5HD2_FIXED_83M>;
+ clock-names = "apb_pclk";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx1-clock.txt b/Documentation/devicetree/bindings/clock/imx1-clock.txt
new file mode 100644
index 000000000..b7adf4e3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx1-clock.txt
@@ -0,0 +1,26 @@
+* Clock bindings for Freescale i.MX1 CPUs
+
+Required properties:
+- compatible: Should be "fsl,imx1-ccm".
+- reg: Address and length of the register set.
+- #clock-cells: Should be <1>.
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx1-clock.h
+for the full list of i.MX1 clock IDs.
+
+Examples:
+ clks: ccm@0021b000 {
+ #clock-cells = <1>;
+ compatible = "fsl,imx1-ccm";
+ reg = <0x0021b000 0x1000>;
+ };
+
+ pwm: pwm@00208000 {
+ #pwm-cells = <2>;
+ compatible = "fsl,imx1-pwm";
+ reg = <0x00208000 0x1000>;
+ interrupts = <34>;
+ clocks = <&clks IMX1_CLK_DUMMY>, <&clks IMX1_CLK_PER1>;
+ clock-names = "ipg", "per";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx21-clock.txt b/Documentation/devicetree/bindings/clock/imx21-clock.txt
new file mode 100644
index 000000000..c3b0db437
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx21-clock.txt
@@ -0,0 +1,28 @@
+* Clock bindings for Freescale i.MX21
+
+Required properties:
+- compatible : Should be "fsl,imx21-ccm".
+- reg : Address and length of the register set.
+- interrupts : Should contain CCM interrupt.
+- #clock-cells: Should be <1>.
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx21-clock.h
+for the full list of i.MX21 clock IDs.
+
+Examples:
+ clks: ccm@10027000{
+ compatible = "fsl,imx21-ccm";
+ reg = <0x10027000 0x800>;
+ #clock-cells = <1>;
+ };
+
+ uart1: serial@1000a000 {
+ compatible = "fsl,imx21-uart";
+ reg = <0x1000a000 0x1000>;
+ interrupts = <20>;
+ clocks = <&clks IMX21_CLK_UART1_IPG_GATE>,
+ <&clks IMX21_CLK_PER1>;
+ clock-names = "ipg", "per";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.txt b/Documentation/devicetree/bindings/clock/imx23-clock.txt
new file mode 100644
index 000000000..5083c0b83
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx23-clock.txt
@@ -0,0 +1,71 @@
+* Clock bindings for Freescale i.MX23
+
+Required properties:
+- compatible: Should be "fsl,imx23-clkctrl"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. The following is a full list of i.MX23
+clocks and IDs.
+
+ Clock ID
+ ------------------
+ ref_xtal 0
+ pll 1
+ ref_cpu 2
+ ref_emi 3
+ ref_pix 4
+ ref_io 5
+ saif_sel 6
+ lcdif_sel 7
+ gpmi_sel 8
+ ssp_sel 9
+ emi_sel 10
+ cpu 11
+ etm_sel 12
+ cpu_pll 13
+ cpu_xtal 14
+ hbus 15
+ xbus 16
+ lcdif_div 17
+ ssp_div 18
+ gpmi_div 19
+ emi_pll 20
+ emi_xtal 21
+ etm_div 22
+ saif_div 23
+ clk32k_div 24
+ rtc 25
+ adc 26
+ spdif_div 27
+ clk32k 28
+ dri 29
+ pwm 30
+ filt 31
+ uart 32
+ ssp 33
+ gpmi 34
+ spdif 35
+ emi 36
+ saif 37
+ lcdif 38
+ etm 39
+ usb 40
+ usb_phy 41
+
+Examples:
+
+clks: clkctrl@80040000 {
+ compatible = "fsl,imx23-clkctrl";
+ reg = <0x80040000 0x2000>;
+ #clock-cells = <1>;
+};
+
+auart0: serial@8006c000 {
+ compatible = "fsl,imx23-auart";
+ reg = <0x8006c000 0x2000>;
+ interrupts = <24 25 23>;
+ clocks = <&clks 32>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx25-clock.txt b/Documentation/devicetree/bindings/clock/imx25-clock.txt
new file mode 100644
index 000000000..ba6b312ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx25-clock.txt
@@ -0,0 +1,161 @@
+* Clock bindings for Freescale i.MX25
+
+Required properties:
+- compatible: Should be "fsl,imx25-ccm"
+- reg: Address and length of the register set
+- interrupts: Should contain CCM interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. The following is a full list of i.MX25
+clocks and IDs.
+
+ Clock ID
+ ---------------------------
+ dummy 0
+ osc 1
+ mpll 2
+ upll 3
+ mpll_cpu_3_4 4
+ cpu_sel 5
+ cpu 6
+ ahb 7
+ usb_div 8
+ ipg 9
+ per0_sel 10
+ per1_sel 11
+ per2_sel 12
+ per3_sel 13
+ per4_sel 14
+ per5_sel 15
+ per6_sel 16
+ per7_sel 17
+ per8_sel 18
+ per9_sel 19
+ per10_sel 20
+ per11_sel 21
+ per12_sel 22
+ per13_sel 23
+ per14_sel 24
+ per15_sel 25
+ per0 26
+ per1 27
+ per2 28
+ per3 29
+ per4 30
+ per5 31
+ per6 32
+ per7 33
+ per8 34
+ per9 35
+ per10 36
+ per11 37
+ per12 38
+ per13 39
+ per14 40
+ per15 41
+ csi_ipg_per 42
+ epit_ipg_per 43
+ esai_ipg_per 44
+ esdhc1_ipg_per 45
+ esdhc2_ipg_per 46
+ gpt_ipg_per 47
+ i2c_ipg_per 48
+ lcdc_ipg_per 49
+ nfc_ipg_per 50
+ owire_ipg_per 51
+ pwm_ipg_per 52
+ sim1_ipg_per 53
+ sim2_ipg_per 54
+ ssi1_ipg_per 55
+ ssi2_ipg_per 56
+ uart_ipg_per 57
+ ata_ahb 58
+ reserved 59
+ csi_ahb 60
+ emi_ahb 61
+ esai_ahb 62
+ esdhc1_ahb 63
+ esdhc2_ahb 64
+ fec_ahb 65
+ lcdc_ahb 66
+ rtic_ahb 67
+ sdma_ahb 68
+ slcdc_ahb 69
+ usbotg_ahb 70
+ reserved 71
+ reserved 72
+ reserved 73
+ reserved 74
+ can1_ipg 75
+ can2_ipg 76
+ csi_ipg 77
+ cspi1_ipg 78
+ cspi2_ipg 79
+ cspi3_ipg 80
+ dryice_ipg 81
+ ect_ipg 82
+ epit1_ipg 83
+ epit2_ipg 84
+ reserved 85
+ esdhc1_ipg 86
+ esdhc2_ipg 87
+ fec_ipg 88
+ reserved 89
+ reserved 90
+ reserved 91
+ gpt1_ipg 92
+ gpt2_ipg 93
+ gpt3_ipg 94
+ gpt4_ipg 95
+ reserved 96
+ reserved 97
+ reserved 98
+ iim_ipg 99
+ reserved 100
+ reserved 101
+ kpp_ipg 102
+ lcdc_ipg 103
+ reserved 104
+ pwm1_ipg 105
+ pwm2_ipg 106
+ pwm3_ipg 107
+ pwm4_ipg 108
+ rngb_ipg 109
+ reserved 110
+ scc_ipg 111
+ sdma_ipg 112
+ sim1_ipg 113
+ sim2_ipg 114
+ slcdc_ipg 115
+ spba_ipg 116
+ ssi1_ipg 117
+ ssi2_ipg 118
+ tsc_ipg 119
+ uart1_ipg 120
+ uart2_ipg 121
+ uart3_ipg 122
+ uart4_ipg 123
+ uart5_ipg 124
+ reserved 125
+ wdt_ipg 126
+ cko_div 127
+ cko_sel 128
+ cko 129
+
+Examples:
+
+clks: ccm@53f80000 {
+ compatible = "fsl,imx25-ccm";
+ reg = <0x53f80000 0x4000>;
+ interrupts = <31>;
+};
+
+uart1: serial@43f90000 {
+ compatible = "fsl,imx25-uart", "fsl,imx21-uart";
+ reg = <0x43f90000 0x4000>;
+ interrupts = <45>;
+ clocks = <&clks 79>, <&clks 50>;
+ clock-names = "ipg", "per";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.txt b/Documentation/devicetree/bindings/clock/imx27-clock.txt
new file mode 100644
index 000000000..cc05de9ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx27-clock.txt
@@ -0,0 +1,28 @@
+* Clock bindings for Freescale i.MX27
+
+Required properties:
+- compatible: Should be "fsl,imx27-ccm"
+- reg: Address and length of the register set
+- interrupts: Should contain CCM interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx27-clock.h
+for the full list of i.MX27 clock IDs.
+
+Examples:
+ clks: ccm@10027000{
+ compatible = "fsl,imx27-ccm";
+ reg = <0x10027000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ uart1: serial@1000a000 {
+ compatible = "fsl,imx27-uart", "fsl,imx21-uart";
+ reg = <0x1000a000 0x1000>;
+ interrupts = <20>;
+ clocks = <&clks IMX27_CLK_UART1_IPG_GATE>,
+ <&clks IMX27_CLK_PER1_GATE>;
+ clock-names = "ipg", "per";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.txt b/Documentation/devicetree/bindings/clock/imx28-clock.txt
new file mode 100644
index 000000000..e6587af62
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx28-clock.txt
@@ -0,0 +1,94 @@
+* Clock bindings for Freescale i.MX28
+
+Required properties:
+- compatible: Should be "fsl,imx28-clkctrl"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. The following is a full list of i.MX28
+clocks and IDs.
+
+ Clock ID
+ ------------------
+ ref_xtal 0
+ pll0 1
+ pll1 2
+ pll2 3
+ ref_cpu 4
+ ref_emi 5
+ ref_io0 6
+ ref_io1 7
+ ref_pix 8
+ ref_hsadc 9
+ ref_gpmi 10
+ saif0_sel 11
+ saif1_sel 12
+ gpmi_sel 13
+ ssp0_sel 14
+ ssp1_sel 15
+ ssp2_sel 16
+ ssp3_sel 17
+ emi_sel 18
+ etm_sel 19
+ lcdif_sel 20
+ cpu 21
+ ptp_sel 22
+ cpu_pll 23
+ cpu_xtal 24
+ hbus 25
+ xbus 26
+ ssp0_div 27
+ ssp1_div 28
+ ssp2_div 29
+ ssp3_div 30
+ gpmi_div 31
+ emi_pll 32
+ emi_xtal 33
+ lcdif_div 34
+ etm_div 35
+ ptp 36
+ saif0_div 37
+ saif1_div 38
+ clk32k_div 39
+ rtc 40
+ lradc 41
+ spdif_div 42
+ clk32k 43
+ pwm 44
+ uart 45
+ ssp0 46
+ ssp1 47
+ ssp2 48
+ ssp3 49
+ gpmi 50
+ spdif 51
+ emi 52
+ saif0 53
+ saif1 54
+ lcdif 55
+ etm 56
+ fec 57
+ can0 58
+ can1 59
+ usb0 60
+ usb1 61
+ usb0_phy 62
+ usb1_phy 63
+ enet_out 64
+
+Examples:
+
+clks: clkctrl@80040000 {
+ compatible = "fsl,imx28-clkctrl";
+ reg = <0x80040000 0x2000>;
+ #clock-cells = <1>;
+};
+
+auart0: serial@8006a000 {
+ compatible = "fsl,imx28-auart", "fsl,imx23-auart";
+ reg = <0x8006a000 0x2000>;
+ interrupts = <112 70 71>;
+ clocks = <&clks 45>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.txt b/Documentation/devicetree/bindings/clock/imx31-clock.txt
new file mode 100644
index 000000000..19df842c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx31-clock.txt
@@ -0,0 +1,91 @@
+* Clock bindings for Freescale i.MX31
+
+Required properties:
+- compatible: Should be "fsl,imx31-ccm"
+- reg: Address and length of the register set
+- interrupts: Should contain CCM interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. The following is a full list of i.MX31
+clocks and IDs.
+
+ Clock ID
+ -----------------------
+ dummy 0
+ ckih 1
+ ckil 2
+ mpll 3
+ spll 4
+ upll 5
+ mcu_main 6
+ hsp 7
+ ahb 8
+ nfc 9
+ ipg 10
+ per_div 11
+ per 12
+ csi_sel 13
+ fir_sel 14
+ csi_div 15
+ usb_div_pre 16
+ usb_div_post 17
+ fir_div_pre 18
+ fir_div_post 19
+ sdhc1_gate 20
+ sdhc2_gate 21
+ gpt_gate 22
+ epit1_gate 23
+ epit2_gate 24
+ iim_gate 25
+ ata_gate 26
+ sdma_gate 27
+ cspi3_gate 28
+ rng_gate 29
+ uart1_gate 30
+ uart2_gate 31
+ ssi1_gate 32
+ i2c1_gate 33
+ i2c2_gate 34
+ i2c3_gate 35
+ hantro_gate 36
+ mstick1_gate 37
+ mstick2_gate 38
+ csi_gate 39
+ rtc_gate 40
+ wdog_gate 41
+ pwm_gate 42
+ sim_gate 43
+ ect_gate 44
+ usb_gate 45
+ kpp_gate 46
+ ipu_gate 47
+ uart3_gate 48
+ uart4_gate 49
+ uart5_gate 50
+ owire_gate 51
+ ssi2_gate 52
+ cspi1_gate 53
+ cspi2_gate 54
+ gacc_gate 55
+ emi_gate 56
+ rtic_gate 57
+ firi_gate 58
+
+Examples:
+
+clks: ccm@53f80000{
+ compatible = "fsl,imx31-ccm";
+ reg = <0x53f80000 0x4000>;
+ interrupts = <0 31 0x04 0 53 0x04>;
+ #clock-cells = <1>;
+};
+
+uart1: serial@43f90000 {
+ compatible = "fsl,imx31-uart", "fsl,imx21-uart";
+ reg = <0x43f90000 0x4000>;
+ interrupts = <45>;
+ clocks = <&clks 10>, <&clks 30>;
+ clock-names = "ipg", "per";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx35-clock.txt b/Documentation/devicetree/bindings/clock/imx35-clock.txt
new file mode 100644
index 000000000..a70356452
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx35-clock.txt
@@ -0,0 +1,113 @@
+* Clock bindings for Freescale i.MX35
+
+Required properties:
+- compatible: Should be "fsl,imx35-ccm"
+- reg: Address and length of the register set
+- interrupts: Should contain CCM interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. The following is a full list of i.MX35
+clocks and IDs.
+
+ Clock ID
+ ---------------------------
+ ckih 0
+ mpll 1
+ ppll 2
+ mpll_075 3
+ arm 4
+ hsp 5
+ hsp_div 6
+ hsp_sel 7
+ ahb 8
+ ipg 9
+ arm_per_div 10
+ ahb_per_div 11
+ ipg_per 12
+ uart_sel 13
+ uart_div 14
+ esdhc_sel 15
+ esdhc1_div 16
+ esdhc2_div 17
+ esdhc3_div 18
+ spdif_sel 19
+ spdif_div_pre 20
+ spdif_div_post 21
+ ssi_sel 22
+ ssi1_div_pre 23
+ ssi1_div_post 24
+ ssi2_div_pre 25
+ ssi2_div_post 26
+ usb_sel 27
+ usb_div 28
+ nfc_div 29
+ asrc_gate 30
+ pata_gate 31
+ audmux_gate 32
+ can1_gate 33
+ can2_gate 34
+ cspi1_gate 35
+ cspi2_gate 36
+ ect_gate 37
+ edio_gate 38
+ emi_gate 39
+ epit1_gate 40
+ epit2_gate 41
+ esai_gate 42
+ esdhc1_gate 43
+ esdhc2_gate 44
+ esdhc3_gate 45
+ fec_gate 46
+ gpio1_gate 47
+ gpio2_gate 48
+ gpio3_gate 49
+ gpt_gate 50
+ i2c1_gate 51
+ i2c2_gate 52
+ i2c3_gate 53
+ iomuxc_gate 54
+ ipu_gate 55
+ kpp_gate 56
+ mlb_gate 57
+ mshc_gate 58
+ owire_gate 59
+ pwm_gate 60
+ rngc_gate 61
+ rtc_gate 62
+ rtic_gate 63
+ scc_gate 64
+ sdma_gate 65
+ spba_gate 66
+ spdif_gate 67
+ ssi1_gate 68
+ ssi2_gate 69
+ uart1_gate 70
+ uart2_gate 71
+ uart3_gate 72
+ usbotg_gate 73
+ wdog_gate 74
+ max_gate 75
+ admux_gate 76
+ csi_gate 77
+ csi_div 78
+ csi_sel 79
+ iim_gate 80
+ gpu2d_gate 81
+
+Examples:
+
+clks: ccm@53f80000 {
+ compatible = "fsl,imx35-ccm";
+ reg = <0x53f80000 0x4000>;
+ interrupts = <31>;
+ #clock-cells = <1>;
+};
+
+esdhc1: esdhc@53fb4000 {
+ compatible = "fsl,imx35-esdhc";
+ reg = <0x53fb4000 0x4000>;
+ interrupts = <7>;
+ clocks = <&clks 9>, <&clks 8>, <&clks 43>;
+ clock-names = "ipg", "ahb", "per";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.txt b/Documentation/devicetree/bindings/clock/imx5-clock.txt
new file mode 100644
index 000000000..cadc4d29a
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx5-clock.txt
@@ -0,0 +1,29 @@
+* Clock bindings for Freescale i.MX5
+
+Required properties:
+- compatible: Should be "fsl,<soc>-ccm" , where <soc> can be imx51 or imx53
+- reg: Address and length of the register set
+- interrupts: Should contain CCM interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx5-clock.h
+for the full list of i.MX5 clock IDs.
+
+Examples (for mx53):
+
+clks: ccm@53fd4000{
+ compatible = "fsl,imx53-ccm";
+ reg = <0x53fd4000 0x4000>;
+ interrupts = <0 71 0x04 0 72 0x04>;
+ #clock-cells = <1>;
+};
+
+can1: can@53fc8000 {
+ compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ reg = <0x53fc8000 0x4000>;
+ interrupts = <82>;
+ clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, <&clks IMX5_CLK_CAN1_SERIAL_GATE>;
+ clock-names = "ipg", "per";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx6q-clock.txt b/Documentation/devicetree/bindings/clock/imx6q-clock.txt
new file mode 100644
index 000000000..9252912a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6q-clock.txt
@@ -0,0 +1,31 @@
+* Clock bindings for Freescale i.MX6 Quad
+
+Required properties:
+- compatible: Should be "fsl,imx6q-ccm"
+- reg: Address and length of the register set
+- interrupts: Should contain CCM interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6qdl-clock.h
+for the full list of i.MX6 Quad and DualLite clock IDs.
+
+Examples:
+
+#include <dt-bindings/clock/imx6qdl-clock.h>
+
+clks: ccm@020c4000 {
+ compatible = "fsl,imx6q-ccm";
+ reg = <0x020c4000 0x4000>;
+ interrupts = <0 87 0x04 0 88 0x04>;
+ #clock-cells = <1>;
+};
+
+uart1: serial@02020000 {
+ compatible = "fsl,imx6q-uart", "fsl,imx21-uart";
+ reg = <0x02020000 0x4000>;
+ interrupts = <0 26 0x04>;
+ clocks = <&clks IMX6QDL_CLK_UART_IPG>, <&clks IMX6QDL_CLK_UART_SERIAL>;
+ clock-names = "ipg", "per";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/clock/imx6sl-clock.txt b/Documentation/devicetree/bindings/clock/imx6sl-clock.txt
new file mode 100644
index 000000000..15e40bdf1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6sl-clock.txt
@@ -0,0 +1,10 @@
+* Clock bindings for Freescale i.MX6 SoloLite
+
+Required properties:
+- compatible: Should be "fsl,imx6sl-ccm"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6sl-clock.h
+for the full list of i.MX6 SoloLite clock IDs.
diff --git a/Documentation/devicetree/bindings/clock/imx6sx-clock.txt b/Documentation/devicetree/bindings/clock/imx6sx-clock.txt
new file mode 100644
index 000000000..22362b9b7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/imx6sx-clock.txt
@@ -0,0 +1,13 @@
+* Clock bindings for Freescale i.MX6 SoloX
+
+Required properties:
+- compatible: Should be "fsl,imx6sx-ccm"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+- clocks: list of clock specifiers, must contain an entry for each required
+ entry in clock-names
+- clock-names: should include entries "ckil", "osc", "ipp_di0" and "ipp_di1"
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/imx6sx-clock.h
+for the full list of i.MX6 SoloX clock IDs.
diff --git a/Documentation/devicetree/bindings/clock/keystone-gate.txt b/Documentation/devicetree/bindings/clock/keystone-gate.txt
new file mode 100644
index 000000000..c5aa18702
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/keystone-gate.txt
@@ -0,0 +1,29 @@
+Status: Unstable - ABI compatibility may be broken in the future
+
+Binding for Keystone gate control driver which uses PSC controller IP.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,keystone,psc-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : parent clock phandle
+- reg : psc control and domain address address space
+- reg-names : psc control and domain registers
+- domain-id : psc domain id needed to check the transition state register
+
+Optional properties:
+- clock-output-names : From common clock binding to override the
+ default output clock name
+Example:
+ clkusb: clkusb {
+ #clock-cells = <0>;
+ compatible = "ti,keystone,psc-clock";
+ clocks = <&chipclk16>;
+ clock-output-names = "usb";
+ reg = <0x02350008 0xb00>, <0x02350000 0x400>;
+ reg-names = "control", "domain";
+ domain-id = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/keystone-pll.txt b/Documentation/devicetree/bindings/clock/keystone-pll.txt
new file mode 100644
index 000000000..225990f79
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/keystone-pll.txt
@@ -0,0 +1,84 @@
+Status: Unstable - ABI compatibility may be broken in the future
+
+Binding for keystone PLLs. The main PLL IP typically has a multiplier,
+a divider and a post divider. The additional PLL IPs like ARMPLL, DDRPLL
+and PAPLL are controlled by the memory mapped register where as the Main
+PLL is controlled by a PLL controller registers along with memory mapped
+registers.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- #clock-cells : from common clock binding; shall be set to 0.
+- compatible : shall be "ti,keystone,main-pll-clock" or "ti,keystone,pll-clock"
+- clocks : parent clock phandle
+- reg - pll control0 and pll multipler registers
+- reg-names : control and multiplier. The multiplier is applicable only for
+ main pll clock
+- fixed-postdiv : fixed post divider value. If absent, use clkod register bits
+ for postdiv
+
+Example:
+ mainpllclk: mainpllclk@2310110 {
+ #clock-cells = <0>;
+ compatible = "ti,keystone,main-pll-clock";
+ clocks = <&refclksys>;
+ reg = <0x02620350 4>, <0x02310110 4>;
+ reg-names = "control", "multiplier";
+ fixed-postdiv = <2>;
+ };
+
+ papllclk: papllclk@2620358 {
+ #clock-cells = <0>;
+ compatible = "ti,keystone,pll-clock";
+ clocks = <&refclkpass>;
+ clock-output-names = "pa-pll-clk";
+ reg = <0x02620358 4>;
+ reg-names = "control";
+ };
+
+Required properties:
+- #clock-cells : from common clock binding; shall be set to 0.
+- compatible : shall be "ti,keystone,pll-mux-clock"
+- clocks : link phandles of parent clocks
+- reg - pll mux register
+- bit-shift : number of bits to shift the bit-mask
+- bit-mask : arbitrary bitmask for programming the mux
+
+Optional properties:
+- clock-output-names : From common clock binding.
+
+Example:
+ mainmuxclk: mainmuxclk@2310108 {
+ #clock-cells = <0>;
+ compatible = "ti,keystone,pll-mux-clock";
+ clocks = <&mainpllclk>, <&refclkmain>;
+ reg = <0x02310108 4>;
+ bit-shift = <23>;
+ bit-mask = <1>;
+ clock-output-names = "mainmuxclk";
+ };
+
+Required properties:
+- #clock-cells : from common clock binding; shall be set to 0.
+- compatible : shall be "ti,keystone,pll-divider-clock"
+- clocks : parent clock phandle
+- reg - pll mux register
+- bit-shift : number of bits to shift the bit-mask
+- bit-mask : arbitrary bitmask for programming the divider
+
+Optional properties:
+- clock-output-names : From common clock binding.
+
+Example:
+ gemtraceclk: gemtraceclk@2310120 {
+ #clock-cells = <0>;
+ compatible = "ti,keystone,pll-divider-clock";
+ clocks = <&mainmuxclk>;
+ reg = <0x02310120 4>;
+ bit-shift = <0>;
+ bit-mask = <8>;
+ clock-output-names = "gemtraceclk";
+ };
diff --git a/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt b/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt
new file mode 100644
index 000000000..3ce97cfe9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/lsi,axm5516-clks.txt
@@ -0,0 +1,29 @@
+AXM5516 clock driver bindings
+-----------------------------
+
+Required properties :
+- compatible : shall contain "lsi,axm5516-clks"
+- reg : shall contain base register location and length
+- #clock-cells : shall contain 1
+
+The consumer specifies the desired clock by having the clock ID in its "clocks"
+phandle cell. See <dt-bindings/clock/lsi,axxia-clock.h> for the list of
+supported clock IDs.
+
+Example:
+
+ clks: clock-controller@2010020000 {
+ compatible = "lsi,axm5516-clks";
+ #clock-cells = <1>;
+ reg = <0x20 0x10020000 0 0x20000>;
+ };
+
+ serial0: uart@2010080000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x20 0x10080000 0 0x1000>;
+ interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks AXXIA_CLK_PER>;
+ clock-names = "apb_pclk";
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
new file mode 100644
index 000000000..af376a01f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
@@ -0,0 +1,21 @@
+* Marvell MMP2 Clock Controller
+
+The MMP2 clock subsystem generates and supplies clock to various
+controllers within the MMP2 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "marvell,mmp2-clock" - controller compatible with MMP2 SoC.
+
+- reg: physical base address of the clock subsystem and length of memory mapped
+ region. There are 3 places in SOC has clock control logic:
+ "mpmu", "apmu", "apbc". So three reg spaces need to be defined.
+
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/marvell-mmp2.h>.
diff --git a/Documentation/devicetree/bindings/clock/marvell,pxa168.txt b/Documentation/devicetree/bindings/clock/marvell,pxa168.txt
new file mode 100644
index 000000000..c62eb1d17
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,pxa168.txt
@@ -0,0 +1,21 @@
+* Marvell PXA168 Clock Controller
+
+The PXA168 clock subsystem generates and supplies clock to various
+controllers within the PXA168 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "marvell,pxa168-clock" - controller compatible with PXA168 SoC.
+
+- reg: physical base address of the clock subsystem and length of memory mapped
+ region. There are 3 places in SOC has clock control logic:
+ "mpmu", "apmu", "apbc". So three reg spaces need to be defined.
+
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/marvell,pxa168.h>.
diff --git a/Documentation/devicetree/bindings/clock/marvell,pxa910.txt b/Documentation/devicetree/bindings/clock/marvell,pxa910.txt
new file mode 100644
index 000000000..d9f41f3c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,pxa910.txt
@@ -0,0 +1,21 @@
+* Marvell PXA910 Clock Controller
+
+The PXA910 clock subsystem generates and supplies clock to various
+controllers within the PXA910 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "marvell,pxa910-clock" - controller compatible with PXA910 SoC.
+
+- reg: physical base address of the clock subsystem and length of memory mapped
+ region. There are 4 places in SOC has clock control logic:
+ "mpmu", "apmu", "apbc", "apbcp". So four reg spaces need to be defined.
+
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/marvell-pxa910.h>.
diff --git a/Documentation/devicetree/bindings/clock/maxim,max77686.txt b/Documentation/devicetree/bindings/clock/maxim,max77686.txt
new file mode 100644
index 000000000..9c40739a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/maxim,max77686.txt
@@ -0,0 +1,46 @@
+Binding for Maxim MAX77686 32k clock generator block
+
+This is a part of device tree bindings of MAX77686 multi-function device.
+More information can be found in bindings/mfd/max77686.txt file.
+
+The MAX77686 contains three 32.768khz clock outputs that can be controlled
+(gated/ungated) over I2C.
+
+Following properties should be presend in main device node of the MFD chip.
+
+Required properties:
+
+- #clock-cells: from common clock binding; shall be set to 1.
+
+Optional properties:
+- clock-output-names: From common clock binding.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Following indices are allowed:
+ - 0: 32khz_ap clock,
+ - 1: 32khz_cp clock,
+ - 2: 32khz_pmic clock.
+
+Clocks are defined as preprocessor macros in dt-bindings/clock/maxim,max77686.h
+header and can be used in device tree sources.
+
+Example: Node of the MFD chip
+
+ max77686: max77686@09 {
+ compatible = "maxim,max77686";
+ interrupt-parent = <&wakeup_eint>;
+ interrupts = <26 0>;
+ reg = <0x09>;
+ #clock-cells = <1>;
+
+ /* ... */
+ };
+
+Example: Clock consumer node
+
+ foo@0 {
+ compatible = "bar,foo";
+ /* ... */
+ clock-names = "my-clock";
+ clocks = <&max77686 MAX77686_CLK_PMIC>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/maxim,max77802.txt b/Documentation/devicetree/bindings/clock/maxim,max77802.txt
new file mode 100644
index 000000000..c6dc7835f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/maxim,max77802.txt
@@ -0,0 +1,44 @@
+Binding for Maxim MAX77802 32k clock generator block
+
+This is a part of device tree bindings of MAX77802 multi-function device.
+More information can be found in bindings/mfd/max77802.txt file.
+
+The MAX77802 contains two 32.768khz clock outputs that can be controlled
+(gated/ungated) over I2C.
+
+Following properties should be present in main device node of the MFD chip.
+
+Required properties:
+- #clock-cells: From common clock binding; shall be set to 1.
+
+Optional properties:
+- clock-output-names: From common clock binding.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Following indices are allowed:
+ - 0: 32khz_ap clock,
+ - 1: 32khz_cp clock.
+
+Clocks are defined as preprocessor macros in dt-bindings/clock/maxim,max77802.h
+header and can be used in device tree sources.
+
+Example: Node of the MFD chip
+
+ max77802: max77802@09 {
+ compatible = "maxim,max77802";
+ interrupt-parent = <&wakeup_eint>;
+ interrupts = <26 0>;
+ reg = <0x09>;
+ #clock-cells = <1>;
+
+ /* ... */
+ };
+
+Example: Clock consumer node
+
+ foo@0 {
+ compatible = "bar,foo";
+ /* ... */
+ clock-names = "my-clock";
+ clocks = <&max77802 MAX77802_CLK_32K_AP>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt b/Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt
new file mode 100644
index 000000000..fedea8431
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/moxa,moxart-clock.txt
@@ -0,0 +1,48 @@
+Device Tree Clock bindings for arch-moxart
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+MOXA ART SoCs allow to determine PLL output and APB frequencies
+by reading registers holding multiplier and divisor information.
+
+
+PLL:
+
+Required properties:
+- compatible : Must be "moxa,moxart-pll-clock"
+- #clock-cells : Should be 0
+- reg : Should contain registers location and length
+- clocks : Should contain phandle + clock-specifier for the parent clock
+
+Optional properties:
+- clock-output-names : Should contain clock name
+
+
+APB:
+
+Required properties:
+- compatible : Must be "moxa,moxart-apb-clock"
+- #clock-cells : Should be 0
+- reg : Should contain registers location and length
+- clocks : Should contain phandle + clock-specifier for the parent clock
+
+Optional properties:
+- clock-output-names : Should contain clock name
+
+
+For example:
+
+ clk_pll: clk_pll@98100000 {
+ compatible = "moxa,moxart-pll-clock";
+ #clock-cells = <0>;
+ reg = <0x98100000 0x34>;
+ };
+
+ clk_apb: clk_apb@98100000 {
+ compatible = "moxa,moxart-apb-clock";
+ #clock-cells = <0>;
+ reg = <0x98100000 0x34>;
+ clocks = <&clk_pll>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
new file mode 100644
index 000000000..670c2af3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mvebu-core-clock.txt
@@ -0,0 +1,78 @@
+* Core Clock bindings for Marvell MVEBU SoCs
+
+Marvell MVEBU SoCs usually allow to determine core clock frequencies by
+reading the Sample-At-Reset (SAR) register. The core clock consumer should
+specify the desired clock by having the clock ID in its "clocks" phandle cell.
+
+The following is a list of provided IDs and clock names on Armada 370/XP:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = nbclk (L2 Cache clock)
+ 3 = hclk (DRAM control clock)
+ 4 = dramclk (DDR clock)
+
+The following is a list of provided IDs and clock names on Armada 375:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = l2clk (L2 Cache clock)
+ 3 = ddrclk (DDR clock)
+
+The following is a list of provided IDs and clock names on Armada 380/385:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = l2clk (L2 Cache clock)
+ 3 = ddrclk (DDR clock)
+
+The following is a list of provided IDs and clock names on Armada 39x:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU clock)
+ 2 = nbclk (Coherent Fabric clock)
+ 3 = hclk (SDRAM Controller Internal Clock)
+ 4 = dclk (SDRAM Interface Clock)
+ 5 = refclk (Reference Clock)
+
+The following is a list of provided IDs and clock names on Kirkwood and Dove:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU0 clock)
+ 2 = l2clk (L2 Cache clock derived from CPU0 clock)
+ 3 = ddrclk (DDR controller clock derived from CPU0 clock)
+
+The following is a list of provided IDs and clock names on Orion5x:
+ 0 = tclk (Internal Bus clock)
+ 1 = cpuclk (CPU0 clock)
+ 2 = ddrclk (DDR controller clock derived from CPU0 clock)
+
+Required properties:
+- compatible : shall be one of the following:
+ "marvell,armada-370-core-clock" - For Armada 370 SoC core clocks
+ "marvell,armada-375-core-clock" - For Armada 375 SoC core clocks
+ "marvell,armada-380-core-clock" - For Armada 380/385 SoC core clocks
+ "marvell,armada-390-core-clock" - For Armada 39x SoC core clocks
+ "marvell,armada-xp-core-clock" - For Armada XP SoC core clocks
+ "marvell,dove-core-clock" - for Dove SoC core clocks
+ "marvell,kirkwood-core-clock" - for Kirkwood SoC (except mv88f6180)
+ "marvell,mv88f6180-core-clock" - for Kirkwood MV88f6180 SoC
+ "marvell,mv88f5182-core-clock" - for Orion MV88F5182 SoC
+ "marvell,mv88f5281-core-clock" - for Orion MV88F5281 SoC
+ "marvell,mv88f6183-core-clock" - for Orion MV88F6183 SoC
+- reg : shall be the register address of the Sample-At-Reset (SAR) register
+- #clock-cells : from common clock binding; shall be set to 1
+
+Optional properties:
+- clock-output-names : from common clock binding; allows overwrite default clock
+ output names ("tclk", "cpuclk", "l2clk", "ddrclk")
+
+Example:
+
+core_clk: core-clocks@d0214 {
+ compatible = "marvell,dove-core-clock";
+ reg = <0xd0214 0x4>;
+ #clock-cells = <1>;
+};
+
+spi0: spi@10600 {
+ compatible = "marvell,orion-spi";
+ /* ... */
+ /* get tclk from core clock provider */
+ clocks = <&core_clk 0>;
+};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt
new file mode 100644
index 000000000..520562a7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mvebu-corediv-clock.txt
@@ -0,0 +1,22 @@
+* Core Divider Clock bindings for Marvell MVEBU SoCs
+
+The following is a list of provided IDs and clock names on Armada 370/XP:
+ 0 = nand (NAND clock)
+
+Required properties:
+- compatible : must be "marvell,armada-370-corediv-clock",
+ "marvell,armada-375-corediv-clock",
+ "marvell,armada-380-corediv-clock",
+
+- reg : must be the register address of Core Divider control register
+- #clock-cells : from common clock binding; shall be set to 1
+- clocks : must be set to the parent's phandle
+
+Example:
+
+corediv_clk: corediv-clocks@18740 {
+ compatible = "marvell,armada-370-corediv-clock";
+ reg = <0x18740 0xc>;
+ #clock-cells = <1>;
+ clocks = <&pll>;
+};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt
new file mode 100644
index 000000000..99c214660
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mvebu-cpu-clock.txt
@@ -0,0 +1,22 @@
+Device Tree Clock bindings for cpu clock of Marvell EBU platforms
+
+Required properties:
+- compatible : shall be one of the following:
+ "marvell,armada-xp-cpu-clock" - cpu clocks for Armada XP
+- reg : Address and length of the clock complex register set, followed
+ by address and length of the PMU DFS registers
+- #clock-cells : should be set to 1.
+- clocks : shall be the input parent clock phandle for the clock.
+
+cpuclk: clock-complex@d0018700 {
+ #clock-cells = <1>;
+ compatible = "marvell,armada-xp-cpu-clock";
+ reg = <0xd0018700 0xA0>, <0x1c054 0x10>;
+ clocks = <&coreclk 1>;
+}
+
+cpu@0 {
+ compatible = "marvell,sheeva-v7";
+ reg = <0>;
+ clocks = <&cpuclk 0>;
+};
diff --git a/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
new file mode 100644
index 000000000..31c7c0c1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/mvebu-gated-clock.txt
@@ -0,0 +1,191 @@
+* Gated Clock bindings for Marvell EBU SoCs
+
+Marvell Armada 370/375/380/385/39x/XP, Dove and Kirkwood allow some
+peripheral clocks to be gated to save some power. The clock consumer
+should specify the desired clock by having the clock ID in its
+"clocks" phandle cell. The clock ID is directly mapped to the
+corresponding clock gating control bit in HW to ease manual clock
+lookup in datasheet.
+
+The following is a list of provided IDs for Armada 370:
+ID Clock Peripheral
+-----------------------------------
+0 Audio AC97 Cntrl
+1 pex0_en PCIe 0 Clock out
+2 pex1_en PCIe 1 Clock out
+3 ge1 Gigabit Ethernet 1
+4 ge0 Gigabit Ethernet 0
+5 pex0 PCIe Cntrl 0
+9 pex1 PCIe Cntrl 1
+15 sata0 SATA Host 0
+17 sdio SDHCI Host
+25 tdm Time Division Mplx
+28 ddr DDR Cntrl
+30 sata1 SATA Host 0
+
+The following is a list of provided IDs for Armada 375:
+ID Clock Peripheral
+-----------------------------------
+2 mu Management Unit
+3 pp Packet Processor
+4 ptp PTP
+5 pex0 PCIe 0 Clock out
+6 pex1 PCIe 1 Clock out
+8 audio Audio Cntrl
+11 nd_clk Nand Flash Cntrl
+14 sata0_link SATA 0 Link
+15 sata0_core SATA 0 Core
+16 usb3 USB3 Host
+17 sdio SDHCI Host
+18 usb USB Host
+19 gop Gigabit Ethernet MAC
+20 sata1_link SATA 1 Link
+21 sata1_core SATA 1 Core
+22 xor0 XOR DMA 0
+23 xor1 XOR DMA 0
+24 copro Coprocessor
+25 tdm Time Division Mplx
+28 crypto0_enc Cryptographic Unit Port 0 Encryption
+29 crypto0_core Cryptographic Unit Port 0 Core
+30 crypto1_enc Cryptographic Unit Port 1 Encryption
+31 crypto1_core Cryptographic Unit Port 1 Core
+
+The following is a list of provided IDs for Armada 380/385:
+ID Clock Peripheral
+-----------------------------------
+0 audio Audio
+2 ge2 Gigabit Ethernet 2
+3 ge1 Gigabit Ethernet 1
+4 ge0 Gigabit Ethernet 0
+5 pex1 PCIe 1
+6 pex2 PCIe 2
+7 pex3 PCIe 3
+8 pex0 PCIe 0
+9 usb3h0 USB3 Host 0
+10 usb3h1 USB3 Host 1
+11 usb3d USB3 Device
+13 bm Buffer Management
+14 crypto0z Cryptographic 0 Z
+15 sata0 SATA 0
+16 crypto1z Cryptographic 1 Z
+17 sdio SDIO
+18 usb2 USB 2
+21 crypto1 Cryptographic 1
+22 xor0 XOR 0
+23 crypto0 Cryptographic 0
+25 tdm Time Division Multiplexing
+28 xor1 XOR 1
+30 sata1 SATA 1
+
+The following is a list of provided IDs for Armada 39x:
+ID Clock Peripheral
+-----------------------------------
+5 pex1 PCIe 1
+6 pex2 PCIe 2
+7 pex3 PCIe 3
+8 pex0 PCIe 0
+9 usb3h0 USB3 Host 0
+17 sdio SDIO
+22 xor0 XOR 0
+28 xor1 XOR 1
+
+The following is a list of provided IDs for Armada XP:
+ID Clock Peripheral
+-----------------------------------
+0 audio Audio Cntrl
+1 ge3 Gigabit Ethernet 3
+2 ge2 Gigabit Ethernet 2
+3 ge1 Gigabit Ethernet 1
+4 ge0 Gigabit Ethernet 0
+5 pex0 PCIe Cntrl 0
+6 pex1 PCIe Cntrl 1
+7 pex2 PCIe Cntrl 2
+8 pex3 PCIe Cntrl 3
+13 bp
+14 sata0lnk
+15 sata0 SATA Host 0
+16 lcd LCD Cntrl
+17 sdio SDHCI Host
+18 usb0 USB Host 0
+19 usb1 USB Host 1
+20 usb2 USB Host 2
+22 xor0 XOR DMA 0
+23 crypto CESA engine
+25 tdm Time Division Mplx
+28 xor1 XOR DMA 1
+29 sata1lnk
+30 sata1 SATA Host 0
+
+The following is a list of provided IDs for Dove:
+ID Clock Peripheral
+-----------------------------------
+0 usb0 USB Host 0
+1 usb1 USB Host 1
+2 ge Gigabit Ethernet
+3 sata SATA Host
+4 pex0 PCIe Cntrl 0
+5 pex1 PCIe Cntrl 1
+8 sdio0 SDHCI Host 0
+9 sdio1 SDHCI Host 1
+10 nand NAND Cntrl
+11 camera Camera Cntrl
+12 i2s0 I2S Cntrl 0
+13 i2s1 I2S Cntrl 1
+15 crypto CESA engine
+21 ac97 AC97 Cntrl
+22 pdma Peripheral DMA
+23 xor0 XOR DMA 0
+24 xor1 XOR DMA 1
+30 gephy Gigabit Ethernel PHY
+Note: gephy(30) is implemented as a parent clock of ge(2)
+
+The following is a list of provided IDs for Kirkwood:
+ID Clock Peripheral
+-----------------------------------
+0 ge0 Gigabit Ethernet 0
+2 pex0 PCIe Cntrl 0
+3 usb0 USB Host 0
+4 sdio SDIO Cntrl
+5 tsu Transp. Stream Unit
+6 dunit SDRAM Cntrl
+7 runit Runit
+8 xor0 XOR DMA 0
+9 audio I2S Cntrl 0
+14 sata0 SATA Host 0
+15 sata1 SATA Host 1
+16 xor1 XOR DMA 1
+17 crypto CESA engine
+18 pex1 PCIe Cntrl 1
+19 ge1 Gigabit Ethernet 1
+20 tdm Time Division Mplx
+
+Required properties:
+- compatible : shall be one of the following:
+ "marvell,armada-370-gating-clock" - for Armada 370 SoC clock gating
+ "marvell,armada-375-gating-clock" - for Armada 375 SoC clock gating
+ "marvell,armada-380-gating-clock" - for Armada 380/385 SoC clock gating
+ "marvell,armada-390-gating-clock" - for Armada 39x SoC clock gating
+ "marvell,armada-xp-gating-clock" - for Armada XP SoC clock gating
+ "marvell,dove-gating-clock" - for Dove SoC clock gating
+ "marvell,kirkwood-gating-clock" - for Kirkwood SoC clock gating
+- reg : shall be the register address of the Clock Gating Control register
+- #clock-cells : from common clock binding; shall be set to 1
+
+Optional properties:
+- clocks : default parent clock phandle (e.g. tclk)
+
+Example:
+
+gate_clk: clock-gating-control@d0038 {
+ compatible = "marvell,dove-gating-clock";
+ reg = <0xd0038 0x4>;
+ /* default parent clock is tclk */
+ clocks = <&core_clk 0>;
+ #clock-cells = <1>;
+};
+
+sdio0: sdio@92000 {
+ compatible = "marvell,dove-sdhci";
+ /* get clk gate bit 8 (sdio0) */
+ clocks = <&gate_clk 8>;
+};
diff --git a/Documentation/devicetree/bindings/clock/nspire-clock.txt b/Documentation/devicetree/bindings/clock/nspire-clock.txt
new file mode 100644
index 000000000..7c3bc8bb5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nspire-clock.txt
@@ -0,0 +1,24 @@
+TI-NSPIRE Clocks
+
+Required properties:
+- compatible: Valid compatible properties include:
+ "lsi,nspire-cx-ahb-divider" for the AHB divider in the CX model
+ "lsi,nspire-classic-ahb-divider" for the AHB divider in the older model
+ "lsi,nspire-cx-clock" for the base clock in the CX model
+ "lsi,nspire-classic-clock" for the base clock in the older model
+
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+
+Optional:
+- clocks: For the "nspire-*-ahb-divider" compatible clocks, this is the parent
+ clock where it divides the rate from.
+
+Example:
+
+ahb_clk {
+ #clock-cells = <0>;
+ compatible = "lsi,nspire-cx-clock";
+ reg = <0x900B0000 0x4>;
+ clocks = <&base_clk>;
+};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt
new file mode 100644
index 000000000..9acea9d93
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra114-car.txt
@@ -0,0 +1,63 @@
+NVIDIA Tegra114 Clock And Reset Controller
+
+This binding uses the common clock binding:
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
+for muxing and gating Tegra's clocks, and setting their rates.
+
+Required properties :
+- compatible : Should be "nvidia,tegra114-car"
+- reg : Should contain CAR registers location and length
+- clocks : Should contain phandle and clock specifiers for two clocks:
+ the 32 KHz "32k_in", and the board-specific oscillator "osc".
+- #clock-cells : Should be 1.
+ In clock consumers, this cell represents the clock ID exposed by the
+ CAR. The assignments may be found in header file
+ <dt-bindings/clock/tegra114-car.h>.
+- #reset-cells : Should be 1.
+ In clock consumers, this cell represents the bit number in the CAR's
+ array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
+
+Example SoC include file:
+
+/ {
+ tegra_car: clock {
+ compatible = "nvidia,tegra114-car";
+ reg = <0x60006000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ usb@c5004000 {
+ clocks = <&tegra_car TEGRA114_CLK_USB2>;
+ };
+};
+
+Example board file:
+
+/ {
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ osc: clock@0 {
+ compatible = "fixed-clock";
+ reg = <0>;
+ #clock-cells = <0>;
+ clock-frequency = <12000000>;
+ };
+
+ clk_32k: clock@1 {
+ compatible = "fixed-clock";
+ reg = <1>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+
+ &tegra_car {
+ clocks = <&clk_32k> <&osc>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt
new file mode 100644
index 000000000..c6620bc96
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra124-car.txt
@@ -0,0 +1,65 @@
+NVIDIA Tegra124 and Tegra132 Clock And Reset Controller
+
+This binding uses the common clock binding:
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
+for muxing and gating Tegra's clocks, and setting their rates.
+
+Required properties :
+- compatible : Should be "nvidia,tegra124-car" or "nvidia,tegra132-car"
+- reg : Should contain CAR registers location and length
+- clocks : Should contain phandle and clock specifiers for two clocks:
+ the 32 KHz "32k_in", and the board-specific oscillator "osc".
+- #clock-cells : Should be 1.
+ In clock consumers, this cell represents the clock ID exposed by the
+ CAR. The assignments may be found in the header files
+ <dt-bindings/clock/tegra124-car-common.h> (which covers IDs common
+ to Tegra124 and Tegra132) and <dt-bindings/clock/tegra124-car.h>
+ (for Tegra124-specific clocks).
+- #reset-cells : Should be 1.
+ In clock consumers, this cell represents the bit number in the CAR's
+ array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
+
+Example SoC include file:
+
+/ {
+ tegra_car: clock {
+ compatible = "nvidia,tegra124-car";
+ reg = <0x60006000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ usb@c5004000 {
+ clocks = <&tegra_car TEGRA124_CLK_USB2>;
+ };
+};
+
+Example board file:
+
+/ {
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ osc: clock@0 {
+ compatible = "fixed-clock";
+ reg = <0>;
+ #clock-cells = <0>;
+ clock-frequency = <112400000>;
+ };
+
+ clk_32k: clock@1 {
+ compatible = "fixed-clock";
+ reg = <1>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+
+ &tegra_car {
+ clocks = <&clk_32k> <&osc>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt
new file mode 100644
index 000000000..6c5901b50
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra20-car.txt
@@ -0,0 +1,63 @@
+NVIDIA Tegra20 Clock And Reset Controller
+
+This binding uses the common clock binding:
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
+for muxing and gating Tegra's clocks, and setting their rates.
+
+Required properties :
+- compatible : Should be "nvidia,tegra20-car"
+- reg : Should contain CAR registers location and length
+- clocks : Should contain phandle and clock specifiers for two clocks:
+ the 32 KHz "32k_in", and the board-specific oscillator "osc".
+- #clock-cells : Should be 1.
+ In clock consumers, this cell represents the clock ID exposed by the
+ CAR. The assignments may be found in header file
+ <dt-bindings/clock/tegra20-car.h>.
+- #reset-cells : Should be 1.
+ In clock consumers, this cell represents the bit number in the CAR's
+ array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
+
+Example SoC include file:
+
+/ {
+ tegra_car: clock {
+ compatible = "nvidia,tegra20-car";
+ reg = <0x60006000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ usb@c5004000 {
+ clocks = <&tegra_car TEGRA20_CLK_USB2>;
+ };
+};
+
+Example board file:
+
+/ {
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ osc: clock@0 {
+ compatible = "fixed-clock";
+ reg = <0>;
+ #clock-cells = <0>;
+ clock-frequency = <12000000>;
+ };
+
+ clk_32k: clock@1 {
+ compatible = "fixed-clock";
+ reg = <1>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+
+ &tegra_car {
+ clocks = <&clk_32k> <&osc>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt b/Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt
new file mode 100644
index 000000000..63618cde1
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/nvidia,tegra30-car.txt
@@ -0,0 +1,63 @@
+NVIDIA Tegra30 Clock And Reset Controller
+
+This binding uses the common clock binding:
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The CAR (Clock And Reset) Controller on Tegra is the HW module responsible
+for muxing and gating Tegra's clocks, and setting their rates.
+
+Required properties :
+- compatible : Should be "nvidia,tegra30-car"
+- reg : Should contain CAR registers location and length
+- clocks : Should contain phandle and clock specifiers for two clocks:
+ the 32 KHz "32k_in", and the board-specific oscillator "osc".
+- #clock-cells : Should be 1.
+ In clock consumers, this cell represents the clock ID exposed by the
+ CAR. The assignments may be found in header file
+ <dt-bindings/clock/tegra30-car.h>.
+- #reset-cells : Should be 1.
+ In clock consumers, this cell represents the bit number in the CAR's
+ array of CLK_RST_CONTROLLER_RST_DEVICES_* registers.
+
+Example SoC include file:
+
+/ {
+ tegra_car: clock {
+ compatible = "nvidia,tegra30-car";
+ reg = <0x60006000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+ usb@c5004000 {
+ clocks = <&tegra_car TEGRA30_CLK_USB2>;
+ };
+};
+
+Example board file:
+
+/ {
+ clocks {
+ compatible = "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ osc: clock@0 {
+ compatible = "fixed-clock";
+ reg = <0>;
+ #clock-cells = <0>;
+ clock-frequency = <12000000>;
+ };
+
+ clk_32k: clock@1 {
+ compatible = "fixed-clock";
+ reg = <1>;
+ #clock-cells = <0>;
+ clock-frequency = <32768>;
+ };
+ };
+
+ &tegra_car {
+ clocks = <&clk_32k> <&osc>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/pistachio-clock.txt b/Documentation/devicetree/bindings/clock/pistachio-clock.txt
new file mode 100644
index 000000000..868db499e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/pistachio-clock.txt
@@ -0,0 +1,123 @@
+Imagination Technologies Pistachio SoC clock controllers
+========================================================
+
+Pistachio has four clock controllers (core clock, peripheral clock, peripheral
+general control, and top general control) which are instantiated individually
+from the device-tree.
+
+External clocks:
+----------------
+
+There are three external inputs to the clock controllers which should be
+defined with the following clock-output-names:
+- "xtal": External 52Mhz oscillator (required)
+- "audio_clk_in": Alternate audio reference clock (optional)
+- "enet_clk_in": Alternate ethernet PHY clock (optional)
+
+Core clock controller:
+----------------------
+
+The core clock controller generates clocks for the CPU, RPU (WiFi + BT
+co-processor), audio, and several peripherals.
+
+Required properties:
+- compatible: Must be "img,pistachio-clk".
+- reg: Must contain the base address and length of the core clock controller.
+- #clock-cells: Must be 1. The single cell is the clock identifier.
+ See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
+- clocks: Must contain an entry for each clock in clock-names.
+- clock-names: Must include "xtal" (see "External clocks") and
+ "audio_clk_in_gate", "enet_clk_in_gate" which are generated by the
+ top-level general control.
+
+Example:
+ clk_core: clock-controller@18144000 {
+ compatible = "img,pistachio-clk";
+ reg = <0x18144000 0x800>;
+ clocks = <&xtal>, <&cr_top EXT_CLK_AUDIO_IN>,
+ <&cr_top EXT_CLK_ENET_IN>;
+ clock-names = "xtal", "audio_clk_in_gate", "enet_clk_in_gate";
+
+ #clock-cells = <1>;
+ };
+
+Peripheral clock controller:
+----------------------------
+
+The peripheral clock controller generates clocks for the DDR, ROM, and other
+peripherals. The peripheral system clock ("periph_sys") generated by the core
+clock controller is the input clock to the peripheral clock controller.
+
+Required properties:
+- compatible: Must be "img,pistachio-periph-clk".
+- reg: Must contain the base address and length of the peripheral clock
+ controller.
+- #clock-cells: Must be 1. The single cell is the clock identifier.
+ See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
+- clocks: Must contain an entry for each clock in clock-names.
+- clock-names: Must include "periph_sys", the peripheral system clock generated
+ by the core clock controller.
+
+Example:
+ clk_periph: clock-controller@18144800 {
+ compatible = "img,pistachio-clk-periph";
+ reg = <0x18144800 0x800>;
+ clocks = <&clk_core CLK_PERIPH_SYS>;
+ clock-names = "periph_sys";
+
+ #clock-cells = <1>;
+ };
+
+Peripheral general control:
+---------------------------
+
+The peripheral general control block generates system interface clocks and
+resets for various peripherals. It also contains miscellaneous peripheral
+control registers. The system clock ("sys") generated by the peripheral clock
+controller is the input clock to the system clock controller.
+
+Required properties:
+- compatible: Must include "img,pistachio-periph-cr" and "syscon".
+- reg: Must contain the base address and length of the peripheral general
+ control registers.
+- #clock-cells: Must be 1. The single cell is the clock identifier.
+ See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
+- clocks: Must contain an entry for each clock in clock-names.
+- clock-names: Must include "sys", the system clock generated by the peripheral
+ clock controller.
+
+Example:
+ cr_periph: syscon@18144800 {
+ compatible = "img,pistachio-cr-periph", "syscon";
+ reg = <0x18148000 0x1000>;
+ clocks = <&clock_periph PERIPH_CLK_PERIPH_SYS>;
+ clock-names = "sys";
+
+ #clock-cells = <1>;
+ };
+
+Top-level general control:
+--------------------------
+
+The top-level general control block contains miscellaneous control registers and
+gates for the external clocks "audio_clk_in" and "enet_clk_in".
+
+Required properties:
+- compatible: Must include "img,pistachio-cr-top" and "syscon".
+- reg: Must contain the base address and length of the top-level
+ control registers.
+- clocks: Must contain an entry for each clock in clock-names.
+- clock-names: Two optional clocks, "audio_clk_in" and "enet_clk_in" (see
+ "External clocks").
+- #clock-cells: Must be 1. The single cell is the clock identifier.
+ See dt-bindings/clock/pistachio-clk.h for the list of valid identifiers.
+
+Example:
+ cr_top: syscon@18144800 {
+ compatible = "img,pistachio-cr-top", "syscon";
+ reg = <0x18149000 0x200>;
+ clocks = <&audio_refclk>, <&ext_enet_in>;
+ clock-names = "audio_clk_in", "enet_clk_in";
+
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/prima2-clock.txt b/Documentation/devicetree/bindings/clock/prima2-clock.txt
new file mode 100644
index 000000000..5016979c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/prima2-clock.txt
@@ -0,0 +1,73 @@
+* Clock bindings for CSR SiRFprimaII
+
+Required properties:
+- compatible: Should be "sirf,prima2-clkc"
+- reg: Address and length of the register set
+- interrupts: Should contain clock controller interrupt
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. The following is a full list of prima2
+clocks and IDs.
+
+ Clock ID
+ ---------------------------
+ rtc 0
+ osc 1
+ pll1 2
+ pll2 3
+ pll3 4
+ mem 5
+ sys 6
+ security 7
+ dsp 8
+ gps 9
+ mf 10
+ io 11
+ cpu 12
+ uart0 13
+ uart1 14
+ uart2 15
+ tsc 16
+ i2c0 17
+ i2c1 18
+ spi0 19
+ spi1 20
+ pwmc 21
+ efuse 22
+ pulse 23
+ dmac0 24
+ dmac1 25
+ nand 26
+ audio 27
+ usp0 28
+ usp1 29
+ usp2 30
+ vip 31
+ gfx 32
+ mm 33
+ lcd 34
+ vpp 35
+ mmc01 36
+ mmc23 37
+ mmc45 38
+ usbpll 39
+ usb0 40
+ usb1 41
+
+Examples:
+
+clks: clock-controller@88000000 {
+ compatible = "sirf,prima2-clkc";
+ reg = <0x88000000 0x1000>;
+ interrupts = <3>;
+ #clock-cells = <1>;
+};
+
+i2c0: i2c@b00e0000 {
+ cell-index = <0>;
+ compatible = "sirf,prima2-i2c";
+ reg = <0xb00e0000 0x10000>;
+ interrupts = <24>;
+ clocks = <&clks 17>;
+};
diff --git a/Documentation/devicetree/bindings/clock/pwm-clock.txt b/Documentation/devicetree/bindings/clock/pwm-clock.txt
new file mode 100644
index 000000000..83db876b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/pwm-clock.txt
@@ -0,0 +1,26 @@
+Binding for an external clock signal driven by a PWM pin.
+
+This binding uses the common clock binding[1] and the common PWM binding[2].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/pwm/pwm.txt
+
+Required properties:
+- compatible : shall be "pwm-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- pwms : from common PWM binding; this determines the clock frequency
+ via the period given in the PWM specifier.
+
+Optional properties:
+- clock-output-names : From common clock binding.
+- clock-frequency : Exact output frequency, in case the PWM period
+ is not exact but was rounded to nanoseconds.
+
+Example:
+ clock {
+ compatible = "pwm-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+ clock-output-names = "mipi_mclk";
+ pwms = <&pwm2 0 40>; /* 1 / 40 ns = 25 MHz */
+ };
diff --git a/Documentation/devicetree/bindings/clock/pxa-clock.txt b/Documentation/devicetree/bindings/clock/pxa-clock.txt
new file mode 100644
index 000000000..4b4a9024b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/pxa-clock.txt
@@ -0,0 +1,16 @@
+* Clock bindings for Marvell PXA chips
+
+Required properties:
+- compatible: Should be "marvell,pxa-clocks"
+- #clock-cells: Should be <1>
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell (see include/.../pxa-clock.h).
+
+Examples:
+
+pxa2xx_clks: pxa2xx_clks@41300004 {
+ compatible = "marvell,pxa-clocks";
+ #clock-cells = <1>;
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.txt b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
new file mode 100644
index 000000000..54c23f34f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.txt
@@ -0,0 +1,27 @@
+Qualcomm Global Clock & Reset Controller Binding
+------------------------------------------------
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+ "qcom,gcc-apq8064"
+ "qcom,gcc-apq8084"
+ "qcom,gcc-ipq8064"
+ "qcom,gcc-msm8660"
+ "qcom,gcc-msm8916"
+ "qcom,gcc-msm8960"
+ "qcom,gcc-msm8974"
+ "qcom,gcc-msm8974pro"
+ "qcom,gcc-msm8974pro-ac"
+
+- reg : shall contain base register location and length
+- #clock-cells : shall contain 1
+- #reset-cells : shall contain 1
+
+Example:
+ clock-controller@900000 {
+ compatible = "qcom,gcc-msm8960";
+ reg = <0x900000 0x4000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,lcc.txt b/Documentation/devicetree/bindings/clock/qcom,lcc.txt
new file mode 100644
index 000000000..dd755be63
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,lcc.txt
@@ -0,0 +1,21 @@
+Qualcomm LPASS Clock & Reset Controller Binding
+------------------------------------------------
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+ "qcom,lcc-msm8960"
+ "qcom,lcc-apq8064"
+ "qcom,lcc-ipq8064"
+
+- reg : shall contain base register location and length
+- #clock-cells : shall contain 1
+- #reset-cells : shall contain 1
+
+Example:
+ clock-controller@28000000 {
+ compatible = "qcom,lcc-ipq8064";
+ reg = <0x28000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qcom,mmcc.txt b/Documentation/devicetree/bindings/clock/qcom,mmcc.txt
new file mode 100644
index 000000000..29ebf84d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qcom,mmcc.txt
@@ -0,0 +1,23 @@
+Qualcomm Multimedia Clock & Reset Controller Binding
+----------------------------------------------------
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+ "qcom,mmcc-apq8064"
+ "qcom,mmcc-apq8084"
+ "qcom,mmcc-msm8660"
+ "qcom,mmcc-msm8960"
+ "qcom,mmcc-msm8974"
+
+- reg : shall contain base register location and length
+- #clock-cells : shall contain 1
+- #reset-cells : shall contain 1
+
+Example:
+ clock-controller@4000000 {
+ compatible = "qcom,mmcc-msm8960";
+ reg = <0x4000000 0x1000>;
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/qoriq-clock.txt b/Documentation/devicetree/bindings/clock/qoriq-clock.txt
new file mode 100644
index 000000000..df4a259a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/qoriq-clock.txt
@@ -0,0 +1,153 @@
+* Clock Block on Freescale QorIQ Platforms
+
+Freescale qoriq chips take primary clocking input from the external
+SYSCLK signal. The SYSCLK input (frequency) is multiplied using
+multiple phase locked loops (PLL) to create a variety of frequencies
+which can then be passed to a variety of internal logic, including
+cores and peripheral IP blocks.
+Please refer to the Reference Manual for details.
+
+All references to "1.0" and "2.0" refer to the QorIQ chassis version to
+which the chip complies.
+
+Chassis Version Example Chips
+--------------- -------------
+1.0 p4080, p5020, p5040
+2.0 t4240, b4860, t1040
+
+1. Clock Block Binding
+
+Required properties:
+- compatible: Should contain a specific clock block compatible string
+ and a single chassis clock compatible string.
+ Clock block strings include, but not limited to, one of the:
+ * "fsl,p2041-clockgen"
+ * "fsl,p3041-clockgen"
+ * "fsl,p4080-clockgen"
+ * "fsl,p5020-clockgen"
+ * "fsl,p5040-clockgen"
+ * "fsl,t4240-clockgen"
+ * "fsl,b4420-clockgen"
+ * "fsl,b4860-clockgen"
+ * "fsl,ls1021a-clockgen"
+ Chassis clock strings include:
+ * "fsl,qoriq-clockgen-1.0": for chassis 1.0 clocks
+ * "fsl,qoriq-clockgen-2.0": for chassis 2.0 clocks
+- reg: Describes the address of the device's resources within the
+ address space defined by its parent bus, and resource zero
+ represents the clock register set
+- clock-frequency: Input system clock frequency
+
+Recommended properties:
+- ranges: Allows valid translation between child's address space and
+ parent's. Must be present if the device has sub-nodes.
+- #address-cells: Specifies the number of cells used to represent
+ physical base addresses. Must be present if the device has
+ sub-nodes and set to 1 if present
+- #size-cells: Specifies the number of cells used to represent
+ the size of an address. Must be present if the device has
+ sub-nodes and set to 1 if present
+
+2. Clock Provider/Consumer Binding
+
+Most of the bindings are from the common clock binding[1].
+ [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : Should include one of the following:
+ * "fsl,qoriq-core-pll-1.0" for core PLL clocks (v1.0)
+ * "fsl,qoriq-core-pll-2.0" for core PLL clocks (v2.0)
+ * "fsl,qoriq-core-mux-1.0" for core mux clocks (v1.0)
+ * "fsl,qoriq-core-mux-2.0" for core mux clocks (v2.0)
+ * "fsl,qoriq-sysclk-1.0": for input system clock (v1.0).
+ It takes parent's clock-frequency as its clock.
+ * "fsl,qoriq-sysclk-2.0": for input system clock (v2.0).
+ It takes parent's clock-frequency as its clock.
+ * "fsl,qoriq-platform-pll-1.0" for the platform PLL clock (v1.0)
+ * "fsl,qoriq-platform-pll-2.0" for the platform PLL clock (v2.0)
+- #clock-cells: From common clock binding. The number of cells in a
+ clock-specifier. Should be <0> for "fsl,qoriq-sysclk-[1,2].0"
+ clocks, or <1> for "fsl,qoriq-core-pll-[1,2].0" clocks.
+ For "fsl,qoriq-core-pll-[1,2].0" clocks, the single
+ clock-specifier cell may take the following values:
+ * 0 - equal to the PLL frequency
+ * 1 - equal to the PLL frequency divided by 2
+ * 2 - equal to the PLL frequency divided by 4
+
+Recommended properties:
+- clocks: Should be the phandle of input parent clock
+- clock-names: From common clock binding, indicates the clock name
+- clock-output-names: From common clock binding, indicates the names of
+ output clocks
+- reg: Should be the offset and length of clock block base address.
+ The length should be 4.
+
+Example for clock block and clock provider:
+/ {
+ clockgen: global-utilities@e1000 {
+ compatible = "fsl,p5020-clockgen", "fsl,qoriq-clockgen-1.0";
+ ranges = <0x0 0xe1000 0x1000>;
+ clock-frequency = <133333333>;
+ reg = <0xe1000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ sysclk: sysclk {
+ #clock-cells = <0>;
+ compatible = "fsl,qoriq-sysclk-1.0";
+ clock-output-names = "sysclk";
+ };
+
+ pll0: pll0@800 {
+ #clock-cells = <1>;
+ reg = <0x800 0x4>;
+ compatible = "fsl,qoriq-core-pll-1.0";
+ clocks = <&sysclk>;
+ clock-output-names = "pll0", "pll0-div2";
+ };
+
+ pll1: pll1@820 {
+ #clock-cells = <1>;
+ reg = <0x820 0x4>;
+ compatible = "fsl,qoriq-core-pll-1.0";
+ clocks = <&sysclk>;
+ clock-output-names = "pll1", "pll1-div2";
+ };
+
+ mux0: mux0@0 {
+ #clock-cells = <0>;
+ reg = <0x0 0x4>;
+ compatible = "fsl,qoriq-core-mux-1.0";
+ clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
+ clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+ clock-output-names = "cmux0";
+ };
+
+ mux1: mux1@20 {
+ #clock-cells = <0>;
+ reg = <0x20 0x4>;
+ compatible = "fsl,qoriq-core-mux-1.0";
+ clocks = <&pll0 0>, <&pll0 1>, <&pll1 0>, <&pll1 1>;
+ clock-names = "pll0", "pll0-div2", "pll1", "pll1-div2";
+ clock-output-names = "cmux1";
+ };
+
+ platform-pll: platform-pll@c00 {
+ #clock-cells = <1>;
+ reg = <0xc00 0x4>;
+ compatible = "fsl,qoriq-platform-pll-1.0";
+ clocks = <&sysclk>;
+ clock-output-names = "platform-pll", "platform-pll-div2";
+ };
+ };
+};
+
+Example for clock consumer:
+
+/ {
+ cpu0: PowerPC,e5500@0 {
+ ...
+ clocks = <&mux0>;
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt
new file mode 100644
index 000000000..054f65f93
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt
@@ -0,0 +1,34 @@
+* Renesas CPG DIV6 Clock
+
+The CPG DIV6 clocks are variable factor clocks provided by the Clock Pulse
+Generator (CPG). They clock input is divided by a configurable factor from 1
+to 64.
+
+Required Properties:
+
+ - compatible: Must be one of the following
+ - "renesas,r8a73a4-div6-clock" for R8A73A4 (R-Mobile APE6) DIV6 clocks
+ - "renesas,r8a7740-div6-clock" for R8A7740 (R-Mobile A1) DIV6 clocks
+ - "renesas,r8a7790-div6-clock" for R8A7790 (R-Car H2) DIV6 clocks
+ - "renesas,r8a7791-div6-clock" for R8A7791 (R-Car M2) DIV6 clocks
+ - "renesas,sh73a0-div6-clock" for SH73A0 (SH-Mobile AG5) DIV6 clocks
+ - "renesas,cpg-div6-clock" for generic DIV6 clocks
+ - reg: Base address and length of the memory resource used by the DIV6 clock
+ - clocks: Reference to the parent clock(s); either one, four, or eight
+ clocks must be specified. For clocks with multiple parents, invalid
+ settings must be specified as "<0>".
+ - #clock-cells: Must be 0
+ - clock-output-names: The name of the clock as a free-form string
+
+
+Example
+-------
+
+ sdhi2_clk: sdhi2_clk@e615007c {
+ compatible = "renesas,r8a73a4-div6-clock", "renesas,cpg-div6-clock";
+ reg = <0 0xe615007c 0 4>;
+ clocks = <&pll1_div2_clk>, <&cpg_clocks R8A73A4_CLK_PLL2S>,
+ <0>, <&extal2_clk>;
+ #clock-cells = <0>;
+ clock-output-names = "sdhi2ck";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt
new file mode 100644
index 000000000..0a80fa70c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt
@@ -0,0 +1,57 @@
+* Renesas CPG Module Stop (MSTP) Clocks
+
+The CPG can gate SoC device clocks. The gates are organized in groups of up to
+32 gates.
+
+This device tree binding describes a single 32 gate clocks group per node.
+Clocks are referenced by user nodes by the MSTP node phandle and the clock
+index in the group, from 0 to 31.
+
+Required Properties:
+
+ - compatible: Must be one of the following
+ - "renesas,r7s72100-mstp-clocks" for R7S72100 (RZ) MSTP gate clocks
+ - "renesas,r8a73a4-mstp-clocks" for R8A73A4 (R-Mobile APE6) MSTP gate clocks
+ - "renesas,r8a7740-mstp-clocks" for R8A7740 (R-Mobile A1) MSTP gate clocks
+ - "renesas,r8a7779-mstp-clocks" for R8A7779 (R-Car H1) MSTP gate clocks
+ - "renesas,r8a7790-mstp-clocks" for R8A7790 (R-Car H2) MSTP gate clocks
+ - "renesas,r8a7791-mstp-clocks" for R8A7791 (R-Car M2) MSTP gate clocks
+ - "renesas,r8a7794-mstp-clocks" for R8A7794 (R-Car E2) MSTP gate clocks
+ - "renesas,sh73a0-mstp-clocks" for SH73A0 (SH-MobileAG5) MSTP gate clocks
+ - "renesas,cpg-mstp-clock" for generic MSTP gate clocks
+ - reg: Base address and length of the I/O mapped registers used by the MSTP
+ clocks. The first register is the clock control register and is mandatory.
+ The second register is the clock status register and is optional when not
+ implemented in hardware.
+ - clocks: Reference to the parent clocks, one per output clock. The parents
+ must appear in the same order as the output clocks.
+ - #clock-cells: Must be 1
+ - clock-output-names: The name of the clocks as free-form strings
+ - clock-indices: Indices of the gate clocks into the group (0 to 31)
+
+The clocks, clock-output-names and clock-indices properties contain one entry
+per gate clock. The MSTP groups are sparsely populated. Unimplemented gate
+clocks must not be declared.
+
+
+Example
+-------
+
+ #include <dt-bindings/clock/r8a7790-clock.h>
+
+ mstp3_clks: mstp3_clks@e615013c {
+ compatible = "renesas,r8a7790-mstp-clocks", "renesas,cpg-mstp-clocks";
+ reg = <0 0xe615013c 0 4>, <0 0xe6150048 0 4>;
+ clocks = <&cp_clk>, <&mmc1_clk>, <&sd3_clk>, <&sd2_clk>,
+ <&cpg_clocks R8A7790_CLK_SD1>, <&cpg_clocks R8A7790_CLK_SD0>,
+ <&mmc0_clk>;
+ #clock-cells = <1>;
+ clock-output-names =
+ "tpu0", "mmcif1", "sdhi3", "sdhi2",
+ "sdhi1", "sdhi0", "mmcif0";
+ clock-indices = <
+ R8A7790_CLK_TPU0 R8A7790_CLK_MMCIF1 R8A7790_CLK_SDHI3
+ R8A7790_CLK_SDHI2 R8A7790_CLK_SDHI1 R8A7790_CLK_SDHI0
+ R8A7790_CLK_MMCIF0
+ >;
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt
new file mode 100644
index 000000000..ece92393e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a73a4-cpg-clocks.txt
@@ -0,0 +1,33 @@
+* Renesas R8A73A4 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the R8A73A4 SoC. It includes five PLLs
+and several fixed ratio dividers.
+
+Required Properties:
+
+ - compatible: Must be "renesas,r8a73a4-cpg-clocks"
+
+ - reg: Base address and length of the memory resource used by the CPG
+
+ - clocks: Reference to the parent clocks ("extal1" and "extal2")
+
+ - #clock-cells: Must be 1
+
+ - clock-output-names: The names of the clocks. Supported clocks are "main",
+ "pll0", "pll1", "pll2", "pll2s", "pll2h", "z", "z2", "i", "m3", "b",
+ "m1", "m2", "zx", "zs", and "hp".
+
+
+Example
+-------
+
+ cpg_clocks: cpg_clocks@e6150000 {
+ compatible = "renesas,r8a73a4-cpg-clocks";
+ reg = <0 0xe6150000 0 0x10000>;
+ clocks = <&extal1_clk>, <&extal2_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "main", "pll0", "pll1", "pll2",
+ "pll2s", "pll2h", "z", "z2",
+ "i", "m3", "b", "m1", "m2",
+ "zx", "zs", "hp";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt
new file mode 100644
index 000000000..2c03302f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a7740-cpg-clocks.txt
@@ -0,0 +1,41 @@
+These bindings should be considered EXPERIMENTAL for now.
+
+* Renesas R8A7740 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the R8A7740 SoC. It includes three PLLs
+and several fixed ratio and variable ratio dividers.
+
+Required Properties:
+
+ - compatible: Must be "renesas,r8a7740-cpg-clocks"
+
+ - reg: Base address and length of the memory resource used by the CPG
+
+ - clocks: Reference to the three parent clocks
+ - #clock-cells: Must be 1
+ - clock-output-names: The names of the clocks. Supported clocks are
+ "system", "pllc0", "pllc1", "pllc2", "r", "usb24s", "i", "zg", "b",
+ "m1", "hp", "hpp", "usbp", "s", "zb", "m3", and "cp".
+
+ - renesas,mode: board-specific settings of the MD_CK* bits
+
+
+Example
+-------
+
+cpg_clocks: cpg_clocks@e6150000 {
+ compatible = "renesas,r8a7740-cpg-clocks";
+ reg = <0xe6150000 0x10000>;
+ clocks = <&extal1_clk>, <&extal2_clk>, <&extalr_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "system", "pllc0", "pllc1",
+ "pllc2", "r",
+ "usb24s",
+ "i", "zg", "b", "m1", "hp",
+ "hpp", "usbp", "s", "zb", "m3",
+ "cp";
+};
+
+&cpg_clocks {
+ renesas,mode = <0x05>;
+};
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
new file mode 100644
index 000000000..2f3747fdc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a7778-cpg-clocks.txt
@@ -0,0 +1,25 @@
+* Renesas R8A7778 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the R8A7778. It includes two PLLs and
+several fixed ratio dividers
+
+Required Properties:
+
+ - compatible: Must be "renesas,r8a7778-cpg-clocks"
+ - reg: Base address and length of the memory resource used by the CPG
+ - #clock-cells: Must be 1
+ - clock-output-names: The names of the clocks. Supported clocks are
+ "plla", "pllb", "b", "out", "p", "s", and "s1".
+
+
+Example
+-------
+
+ cpg_clocks: cpg_clocks@ffc80000 {
+ compatible = "renesas,r8a7778-cpg-clocks";
+ reg = <0xffc80000 0x80>;
+ #clock-cells = <1>;
+ clocks = <&extal_clk>;
+ clock-output-names = "plla", "pllb", "b",
+ "out", "p", "s", "s1";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
new file mode 100644
index 000000000..ed3c8cb12
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,r8a7779-cpg-clocks.txt
@@ -0,0 +1,27 @@
+* Renesas R8A7779 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the R8A7779. It includes one PLL and
+several fixed ratio dividers
+
+Required Properties:
+
+ - compatible: Must be "renesas,r8a7779-cpg-clocks"
+ - reg: Base address and length of the memory resource used by the CPG
+
+ - clocks: Reference to the parent clock
+ - #clock-cells: Must be 1
+ - clock-output-names: The names of the clocks. Supported clocks are "plla",
+ "z", "zs", "s", "s1", "p", "b", "out".
+
+
+Example
+-------
+
+ cpg_clocks: cpg_clocks@ffc80000 {
+ compatible = "renesas,r8a7779-cpg-clocks";
+ reg = <0 0xffc80000 0 0x30>;
+ clocks = <&extal_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "plla", "z", "zs", "s", "s1", "p",
+ "b", "out";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
new file mode 100644
index 000000000..b02944fba
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,rcar-gen2-cpg-clocks.txt
@@ -0,0 +1,37 @@
+* Renesas R-Car Gen2 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the R-Car Gen2 SoCs. It includes three PLLs
+and several fixed ratio dividers.
+
+Required Properties:
+
+ - compatible: Must be one of
+ - "renesas,r8a7790-cpg-clocks" for the r8a7790 CPG
+ - "renesas,r8a7791-cpg-clocks" for the r8a7791 CPG
+ - "renesas,r8a7793-cpg-clocks" for the r8a7793 CPG
+ - "renesas,r8a7794-cpg-clocks" for the r8a7794 CPG
+ - "renesas,rcar-gen2-cpg-clocks" for the generic R-Car Gen2 CPG
+
+ - reg: Base address and length of the memory resource used by the CPG
+
+ - clocks: References to the parent clocks: first to the EXTAL clock, second
+ to the USB_EXTAL clock
+ - #clock-cells: Must be 1
+ - clock-output-names: The names of the clocks. Supported clocks are "main",
+ "pll0", "pll1", "pll3", "lb", "qspi", "sdh", "sd0", "sd1", "z", "rcan", and
+ "adsp"
+
+
+Example
+-------
+
+ cpg_clocks: cpg_clocks@e6150000 {
+ compatible = "renesas,r8a7790-cpg-clocks",
+ "renesas,rcar-gen2-cpg-clocks";
+ reg = <0 0xe6150000 0 0x1000>;
+ clocks = <&extal_clk &usb_extal_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "main", "pll0, "pll1", "pll3",
+ "lb", "qspi", "sdh", "sd0", "sd1", "z",
+ "rcan", "adsp";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
new file mode 100644
index 000000000..98a257492
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,rz-cpg-clocks.txt
@@ -0,0 +1,29 @@
+* Renesas RZ Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the RZ SoCs. It includes the PLL, variable
+CPU and GPU clocks, and several fixed ratio dividers.
+
+Required Properties:
+
+ - compatible: Must be one of
+ - "renesas,r7s72100-cpg-clocks" for the r7s72100 CPG
+ - "renesas,rz-cpg-clocks" for the generic RZ CPG
+ - reg: Base address and length of the memory resource used by the CPG
+ - clocks: References to possible parent clocks. Order must match clock modes
+ in the datasheet. For the r7s72100, this is extal, usb_x1.
+ - #clock-cells: Must be 1
+ - clock-output-names: The names of the clocks. Supported clocks are "pll",
+ "i", and "g"
+
+
+Example
+-------
+
+ cpg_clocks: cpg_clocks@fcfe0000 {
+ #clock-cells = <1>;
+ compatible = "renesas,r7s72100-cpg-clocks",
+ "renesas,rz-cpg-clocks";
+ reg = <0xfcfe0000 0x18>;
+ clocks = <&extal_clk>, <&usb_x1_clk>;
+ clock-output-names = "pll", "i", "g";
+ };
diff --git a/Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt
new file mode 100644
index 000000000..a8978ec94
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/renesas,sh73a0-cpg-clocks.txt
@@ -0,0 +1,35 @@
+These bindings should be considered EXPERIMENTAL for now.
+
+* Renesas SH73A0 Clock Pulse Generator (CPG)
+
+The CPG generates core clocks for the SH73A0 SoC. It includes four PLLs
+and several fixed ratio dividers.
+
+Required Properties:
+
+ - compatible: Must be "renesas,sh73a0-cpg-clocks"
+
+ - reg: Base address and length of the memory resource used by the CPG
+
+ - clocks: Reference to the parent clocks ("extal1" and "extal2")
+
+ - #clock-cells: Must be 1
+
+ - clock-output-names: The names of the clocks. Supported clocks are "main",
+ "pll0", "pll1", "pll2", "pll3", "dsi0phy", "dsi1phy", "zg", "m3", "b",
+ "m1", "m2", "z", "zx", and "hp".
+
+
+Example
+-------
+
+ cpg_clocks: cpg_clocks@e6150000 {
+ compatible = "renesas,sh73a0-cpg-clocks";
+ reg = <0 0xe6150000 0 0x10000>;
+ clocks = <&extal1_clk>, <&extal2_clk>;
+ #clock-cells = <1>;
+ clock-output-names = "main", "pll0", "pll1", "pll2",
+ "pll3", "dsi0phy", "dsi1phy",
+ "zg", "m3", "b", "m1", "m2",
+ "z", "zx", "hp";
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt
new file mode 100644
index 000000000..0c2bf5eba
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3188-cru.txt
@@ -0,0 +1,61 @@
+* Rockchip RK3188/RK3066 Clock and Reset Unit
+
+The RK3188/RK3066 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: should be "rockchip,rk3188-cru", "rockchip,rk3188a-cru" or
+ "rockchip,rk3066a-cru"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+ If missing pll rates are not changable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/rk3188-cru.h and
+dt-bindings/clock/rk3066-cru.h headers and can be used in device tree sources.
+Similar macros exist for the reset sources in these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "xin27m" - 27mhz crystal input on rk3066 - optional,
+ - "ext_hsadc" - external HSADC clock - optional,
+ - "ext_cif0" - external camera clock - optional,
+ - "ext_rmii" - external RMII clock - optional,
+ - "ext_jtag" - externalJTAG clock - optional
+
+Example: Clock controller node:
+
+ cru: cru@20000000 {
+ compatible = "rockchip,rk3188-cru";
+ reg = <0x20000000 0x1000>;
+ rockchip,grf = <&grf>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller:
+
+ uart0: serial@10124000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x10124000 0x400>;
+ interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ clocks = <&cru SCLK_UART0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt
new file mode 100644
index 000000000..c9fbb7657
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3288-cru.txt
@@ -0,0 +1,61 @@
+* Rockchip RK3288 Clock and Reset Unit
+
+The RK3288 clock controller generates and supplies clock to various
+controllers within the SoC and also implements a reset controller for SoC
+peripherals.
+
+Required Properties:
+
+- compatible: should be "rockchip,rk3288-cru"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Optional Properties:
+
+- rockchip,grf: phandle to the syscon managing the "general register files"
+ If missing pll rates are not changable, due to the missing pll lock status.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. All available clocks are defined as
+preprocessor macros in the dt-bindings/clock/rk3288-cru.h headers and can be
+used in device tree sources. Similar macros exist for the reset sources in
+these files.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xin24m" - crystal input - required,
+ - "xin32k" - rtc clock - optional,
+ - "ext_i2s" - external I2S clock - optional,
+ - "ext_hsadc" - external HSADC clock - optional,
+ - "ext_edp_24m" - external display port clock - optional,
+ - "ext_vip" - external VIP clock - optional,
+ - "ext_isp" - external ISP clock - optional,
+ - "ext_jtag" - external JTAG clock - optional
+
+Example: Clock controller node:
+
+ cru: cru@20000000 {
+ compatible = "rockchip,rk3188-cru";
+ reg = <0x20000000 0x1000>;
+ rockchip,grf = <&grf>;
+
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller:
+
+ uart0: serial@10124000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x10124000 0x400>;
+ interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ clocks = <&cru SCLK_UART0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/rockchip.txt b/Documentation/devicetree/bindings/clock/rockchip.txt
new file mode 100644
index 000000000..22f6769e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/rockchip.txt
@@ -0,0 +1,77 @@
+Device Tree Clock bindings for arch-rockchip
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+== Gate clocks ==
+
+These bindings are deprecated!
+Please use the soc specific CRU bindings instead.
+
+The gate registers form a continuos block which makes the dt node
+structure a matter of taste, as either all gates can be put into
+one gate clock spanning all registers or they can be divided into
+the 10 individual gates containing 16 clocks each.
+The code supports both approaches.
+
+Required properties:
+- compatible : "rockchip,rk2928-gate-clk"
+- reg : shall be the control register address(es) for the clock.
+- #clock-cells : from common clock binding; shall be set to 1
+- clock-output-names : the corresponding gate names that the clock controls
+- clocks : should contain the parent clock for each individual gate,
+ therefore the number of clocks elements should match the number of
+ clock-output-names
+
+Example using multiple gate clocks:
+
+ clk_gates0: gate-clk@200000d0 {
+ compatible = "rockchip,rk2928-gate-clk";
+ reg = <0x200000d0 0x4>;
+ clocks = <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>,
+ <&dummy>, <&dummy>;
+
+ clock-output-names =
+ "gate_core_periph", "gate_cpu_gpll",
+ "gate_ddrphy", "gate_aclk_cpu",
+ "gate_hclk_cpu", "gate_pclk_cpu",
+ "gate_atclk_cpu", "gate_i2s0",
+ "gate_i2s0_frac", "gate_i2s1",
+ "gate_i2s1_frac", "gate_i2s2",
+ "gate_i2s2_frac", "gate_spdif",
+ "gate_spdif_frac", "gate_testclk";
+
+ #clock-cells = <1>;
+ };
+
+ clk_gates1: gate-clk@200000d4 {
+ compatible = "rockchip,rk2928-gate-clk";
+ reg = <0x200000d4 0x4>;
+ clocks = <&xin24m>, <&xin24m>,
+ <&xin24m>, <&dummy>,
+ <&dummy>, <&xin24m>,
+ <&xin24m>, <&dummy>,
+ <&xin24m>, <&dummy>,
+ <&xin24m>, <&dummy>,
+ <&xin24m>, <&dummy>,
+ <&xin24m>, <&dummy>;
+
+ clock-output-names =
+ "gate_timer0", "gate_timer1",
+ "gate_timer2", "gate_jtag",
+ "gate_aclk_lcdc1_src", "gate_otgphy0",
+ "gate_otgphy1", "gate_ddr_gpll",
+ "gate_uart0", "gate_frac_uart0",
+ "gate_uart1", "gate_frac_uart1",
+ "gate_uart2", "gate_frac_uart2",
+ "gate_uart3", "gate_frac_uart3";
+
+ #clock-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt
new file mode 100644
index 000000000..822505e71
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s3c2410-clock.txt
@@ -0,0 +1,50 @@
+* Samsung S3C2410 Clock Controller
+
+The S3C2410 clock controller generates and supplies clock to various controllers
+within the SoC. The clock binding described here is applicable to the s3c2410,
+s3c2440 and s3c2442 SoCs in the s3c24x family.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,s3c2410-clock" - controller compatible with S3C2410 SoC.
+ - "samsung,s3c2440-clock" - controller compatible with S3C2440 SoC.
+ - "samsung,s3c2442-clock" - controller compatible with S3C2442 SoC.
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Some of the clocks are available only
+on a particular SoC.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/s3c2410.h header and can be used in device
+tree sources.
+
+External clocks:
+
+The xti clock used as input for the plls is generated outside the SoC. It is
+expected that is are defined using standard clock bindings with a
+clock-output-names value of "xti".
+
+Example: Clock controller node:
+
+ clocks: clock-controller@4c000000 {
+ compatible = "samsung,s3c2410-clock";
+ reg = <0x4c000000 0x20>;
+ #clock-cells = <1>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller (refer to the standard clock bindings for information about
+ "clocks" and "clock-names" properties):
+
+ serial@50004000 {
+ compatible = "samsung,s3c2440-uart";
+ reg = <0x50004000 0x4000>;
+ interrupts = <1 23 3 4>, <1 23 4 4>;
+ clock-names = "uart", "clk_uart_baud2";
+ clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt
new file mode 100644
index 000000000..2b430960b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s3c2412-clock.txt
@@ -0,0 +1,50 @@
+* Samsung S3C2412 Clock Controller
+
+The S3C2412 clock controller generates and supplies clock to various controllers
+within the SoC. The clock binding described here is applicable to the s3c2412
+and s3c2413 SoCs in the s3c24x family.
+
+Required Properties:
+
+- compatible: should be "samsung,s3c2412-clock"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Some of the clocks are available only
+on a particular SoC.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/s3c2412.h header and can be used in device
+tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xti" - crystal input - required,
+ - "ext" - external clock source - optional,
+
+Example: Clock controller node:
+
+ clocks: clock-controller@4c000000 {
+ compatible = "samsung,s3c2412-clock";
+ reg = <0x4c000000 0x20>;
+ #clock-cells = <1>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller (refer to the standard clock bindings for information about
+ "clocks" and "clock-names" properties):
+
+ serial@50004000 {
+ compatible = "samsung,s3c2412-uart";
+ reg = <0x50004000 0x4000>;
+ interrupts = <1 23 3 4>, <1 23 4 4>;
+ clock-names = "uart", "clk_uart_baud2", "clk_uart_baud3";
+ clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
+ <&clocks SCLK_UART>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt
new file mode 100644
index 000000000..e67bb0547
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s3c2443-clock.txt
@@ -0,0 +1,56 @@
+* Samsung S3C2443 Clock Controller
+
+The S3C2443 clock controller generates and supplies clock to various controllers
+within the SoC. The clock binding described here is applicable to all SoCs in
+the s3c24x family starting with the s3c2443.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,s3c2416-clock" - controller compatible with S3C2416 SoC.
+ - "samsung,s3c2443-clock" - controller compatible with S3C2443 SoC.
+ - "samsung,s3c2450-clock" - controller compatible with S3C2450 SoC.
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Some of the clocks are available only
+on a particular SoC.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/s3c2443.h header and can be used in device
+tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xti" - crystal input - required,
+ - "ext" - external clock source - optional,
+ - "ext_i2s" - external I2S clock - optional,
+ - "ext_uart" - external uart clock - optional,
+
+Example: Clock controller node:
+
+ clocks: clock-controller@4c000000 {
+ compatible = "samsung,s3c2416-clock";
+ reg = <0x4c000000 0x40>;
+ #clock-cells = <1>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller (refer to the standard clock bindings for information about
+ "clocks" and "clock-names" properties):
+
+ serial@50004000 {
+ compatible = "samsung,s3c2440-uart";
+ reg = <0x50004000 0x4000>;
+ interrupts = <1 23 3 4>, <1 23 4 4>;
+ clock-names = "uart", "clk_uart_baud2",
+ "clk_uart_baud3";
+ clocks = <&clocks PCLK_UART0>, <&clocks PCLK_UART0>,
+ <&clocks SCLK_UART>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt
new file mode 100644
index 000000000..fa171dc4b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s3c64xx-clock.txt
@@ -0,0 +1,77 @@
+* Samsung S3C64xx Clock Controller
+
+The S3C64xx clock controller generates and supplies clock to various controllers
+within the SoC. The clock binding described here is applicable to all SoCs in
+the S3C64xx family.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ - "samsung,s3c6400-clock" - controller compatible with S3C6400 SoC.
+ - "samsung,s3c6410-clock" - controller compatible with S3C6410 SoC.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume. Some of the clocks are available only
+on a particular S3C64xx SoC and this is specified where applicable.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/samsung,s3c64xx-clock.h header and can be used in device
+tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "fin_pll" - PLL input clock (xtal/extclk) - required,
+ - "xusbxti" - USB xtal - required,
+ - "iiscdclk0" - I2S0 codec clock - optional,
+ - "iiscdclk1" - I2S1 codec clock - optional,
+ - "iiscdclk2" - I2S2 codec clock - optional,
+ - "pcmcdclk0" - PCM0 codec clock - optional,
+ - "pcmcdclk1" - PCM1 codec clock - optional, only S3C6410.
+
+Example: Clock controller node:
+
+ clock: clock-controller@7e00f000 {
+ compatible = "samsung,s3c6410-clock";
+ reg = <0x7e00f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+Example: Required external clocks:
+
+ fin_pll: clock-fin-pll {
+ compatible = "fixed-clock";
+ clock-output-names = "fin_pll";
+ clock-frequency = <12000000>;
+ #clock-cells = <0>;
+ };
+
+ xusbxti: clock-xusbxti {
+ compatible = "fixed-clock";
+ clock-output-names = "xusbxti";
+ clock-frequency = <48000000>;
+ #clock-cells = <0>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller (refer to the standard clock bindings for information about
+ "clocks" and "clock-names" properties):
+
+ uart0: serial@7f005000 {
+ compatible = "samsung,s3c6400-uart";
+ reg = <0x7f005000 0x100>;
+ interrupt-parent = <&vic1>;
+ interrupts = <5>;
+ clock-names = "uart", "clk_uart_baud2",
+ "clk_uart_baud3";
+ clocks = <&clock PCLK_UART0>, <&clocks PCLK_UART0>,
+ <&clock SCLK_UART>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt
new file mode 100644
index 000000000..effd9401c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/samsung,s5pv210-clock.txt
@@ -0,0 +1,78 @@
+* Samsung S5P6442/S5PC110/S5PV210 Clock Controller
+
+Samsung S5P6442, S5PC110 and S5PV210 SoCs contain integrated clock
+controller, which generates and supplies clock to various controllers
+within the SoC.
+
+Required Properties:
+
+- compatible: should be one of following:
+ - "samsung,s5pv210-clock" : for clock controller of Samsung
+ S5PC110/S5PV210 SoCs,
+ - "samsung,s5p6442-clock" : for clock controller of Samsung
+ S5P6442 SoC.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- #clock-cells: should be 1.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/s5pv210.h header and can be used in device tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It is expected
+that they are defined using standard clock bindings with following
+clock-output-names:
+ - "xxti": external crystal oscillator connected to XXTI and XXTO pins of
+the SoC,
+ - "xusbxti": external crystal oscillator connected to XUSBXTI and XUSBXTO
+pins of the SoC,
+
+A subset of above clocks available on given board shall be specified in
+board device tree, including the system base clock, as selected by XOM[0]
+pin of the SoC. Refer to generic fixed rate clock bindings
+documentation[1] for more information how to specify these clocks.
+
+[1] Documentation/devicetree/bindings/clock/fixed-clock.txt
+
+Example: Clock controller node:
+
+ clock: clock-controller@7e00f000 {
+ compatible = "samsung,s5pv210-clock";
+ reg = <0x7e00f000 0x1000>;
+ #clock-cells = <1>;
+ };
+
+Example: Required external clocks:
+
+ xxti: clock-xxti {
+ compatible = "fixed-clock";
+ clock-output-names = "xxti";
+ clock-frequency = <24000000>;
+ #clock-cells = <0>;
+ };
+
+ xusbxti: clock-xusbxti {
+ compatible = "fixed-clock";
+ clock-output-names = "xusbxti";
+ clock-frequency = <24000000>;
+ #clock-cells = <0>;
+ };
+
+Example: UART controller node that consumes the clock generated by the clock
+ controller (refer to the standard clock bindings for information about
+ "clocks" and "clock-names" properties):
+
+ uart0: serial@e2900000 {
+ compatible = "samsung,s5pv210-uart";
+ reg = <0xe2900000 0x400>;
+ interrupt-parent = <&vic1>;
+ interrupts = <10>;
+ clock-names = "uart", "clk_uart_baud0",
+ "clk_uart_baud1";
+ clocks = <&clocks UART0>, <&clocks UART0>,
+ <&clocks SCLK_UART0>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/clock/silabs,si5351.txt b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
new file mode 100644
index 000000000..28b28309f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si5351.txt
@@ -0,0 +1,121 @@
+Binding for Silicon Labs Si5351a/b/c programmable i2c clock generator.
+
+Reference
+[1] Si5351A/B/C Data Sheet
+ http://www.silabs.com/Support%20Documents/TechnicalDocs/Si5351.pdf
+
+The Si5351a/b/c are programmable i2c clock generators with up to 8 output
+clocks. Si5351a also has a reduced pin-count package (MSOP10) where only
+3 output clocks are accessible. The internal structure of the clock
+generators can be found in [1].
+
+==I2C device node==
+
+Required properties:
+- compatible: shall be one of "silabs,si5351{a,a-msop,b,c}".
+- reg: i2c device address, shall be 0x60 or 0x61.
+- #clock-cells: from common clock binding; shall be set to 1.
+- clocks: from common clock binding; list of parent clock
+ handles, shall be xtal reference clock or xtal and clkin for
+ si5351c only. Corresponding clock input names are "xtal" and
+ "clkin" respectively.
+- #address-cells: shall be set to 1.
+- #size-cells: shall be set to 0.
+
+Optional properties:
+- silabs,pll-source: pair of (number, source) for each pll. Allows
+ to overwrite clock source of pll A (number=0) or B (number=1).
+
+==Child nodes==
+
+Each of the clock outputs can be overwritten individually by
+using a child node to the I2C device node. If a child node for a clock
+output is not set, the eeprom configuration is not overwritten.
+
+Required child node properties:
+- reg: number of clock output.
+
+Optional child node properties:
+- silabs,clock-source: source clock of the output divider stage N, shall be
+ 0 = multisynth N
+ 1 = multisynth 0 for output clocks 0-3, else multisynth4
+ 2 = xtal
+ 3 = clkin (si5351c only)
+- silabs,drive-strength: output drive strength in mA, shall be one of {2,4,6,8}.
+- silabs,multisynth-source: source pll A(0) or B(1) of corresponding multisynth
+ divider.
+- silabs,pll-master: boolean, multisynth can change pll frequency.
+- silabs,disable-state : clock output disable state, shall be
+ 0 = clock output is driven LOW when disabled
+ 1 = clock output is driven HIGH when disabled
+ 2 = clock output is FLOATING (HIGH-Z) when disabled
+ 3 = clock output is NEVER disabled
+
+==Example==
+
+/* 25MHz reference crystal */
+ref25: ref25M {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <25000000>;
+};
+
+i2c-master-node {
+
+ /* Si5351a msop10 i2c clock generator */
+ si5351a: clock-generator@60 {
+ compatible = "silabs,si5351a-msop";
+ reg = <0x60>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #clock-cells = <1>;
+
+ /* connect xtal input to 25MHz reference */
+ clocks = <&ref25>;
+ clock-names = "xtal";
+
+ /* connect xtal input as source of pll0 and pll1 */
+ silabs,pll-source = <0 0>, <1 0>;
+
+ /*
+ * overwrite clkout0 configuration with:
+ * - 8mA output drive strength
+ * - pll0 as clock source of multisynth0
+ * - multisynth0 as clock source of output divider
+ * - multisynth0 can change pll0
+ * - set initial clock frequency of 74.25MHz
+ */
+ clkout0 {
+ reg = <0>;
+ silabs,drive-strength = <8>;
+ silabs,multisynth-source = <0>;
+ silabs,clock-source = <0>;
+ silabs,pll-master;
+ clock-frequency = <74250000>;
+ };
+
+ /*
+ * overwrite clkout1 configuration with:
+ * - 4mA output drive strength
+ * - pll1 as clock source of multisynth1
+ * - multisynth1 as clock source of output divider
+ * - multisynth1 can change pll1
+ */
+ clkout1 {
+ reg = <1>;
+ silabs,drive-strength = <4>;
+ silabs,multisynth-source = <1>;
+ silabs,clock-source = <0>;
+ pll-master;
+ };
+
+ /*
+ * overwrite clkout2 configuration with:
+ * - xtal as clock source of output divider
+ */
+ clkout2 {
+ reg = <2>;
+ silabs,clock-source = <2>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/silabs,si570.txt b/Documentation/devicetree/bindings/clock/silabs,si570.txt
new file mode 100644
index 000000000..c09f21e1d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/silabs,si570.txt
@@ -0,0 +1,39 @@
+Binding for Silicon Labs 570, 571, 598 and 599 programmable
+I2C clock generators.
+
+Reference
+This binding uses the common clock binding[1]. Details about the devices can be
+found in the data sheets[2][3].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Si570/571 Data Sheet
+ http://www.silabs.com/Support%20Documents/TechnicalDocs/si570.pdf
+[3] Si598/599 Data Sheet
+ http://www.silabs.com/Support%20Documents/TechnicalDocs/si598-99.pdf
+
+Required properties:
+ - compatible: Shall be one of "silabs,si570", "silabs,si571",
+ "silabs,si598", "silabs,si599"
+ - reg: I2C device address.
+ - #clock-cells: From common clock bindings: Shall be 0.
+ - factory-fout: Factory set default frequency. This frequency is part specific.
+ The correct frequency for the part used has to be provided in
+ order to generate the correct output frequencies. For more
+ details, please refer to the data sheet.
+ - temperature-stability: Temperature stability of the device in PPM. Should be
+ one of: 7, 20, 50 or 100.
+
+Optional properties:
+ - clock-output-names: From common clock bindings. Recommended to be "si570".
+ - clock-frequency: Output frequency to generate. This defines the output
+ frequency set during boot. It can be reprogrammed during
+ runtime through the common clock framework.
+
+Example:
+ si570: clock-generator@5d {
+ #clock-cells = <0>;
+ compatible = "silabs,si570";
+ temperature-stability = <50>;
+ reg = <0x5d>;
+ factory-fout = <156250000>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/st,nomadik.txt b/Documentation/devicetree/bindings/clock/st,nomadik.txt
new file mode 100644
index 000000000..40e0cf1f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st,nomadik.txt
@@ -0,0 +1,104 @@
+ST Microelectronics Nomadik SRC System Reset and Control
+
+This binding uses the common clock binding:
+Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+The Nomadik SRC controller is responsible of controlling chrystals,
+PLLs and clock gates.
+
+Required properties for the SRC node:
+- compatible: must be "stericsson,nomadik-src"
+- reg: must contain the SRC register base and size
+
+Optional properties for the SRC node:
+- disable-sxtalo: if present this will disable the SXTALO
+ i.e. the driver output for the slow 32kHz chrystal, if the
+ board has its own circuitry for providing this oscillator
+- disable-mxtal: if present this will disable the MXTALO,
+ i.e. the driver output for the main (~19.2 MHz) chrystal,
+ if the board has its own circuitry for providing this
+ oscillator
+
+
+PLL nodes: these nodes represent the two PLLs on the system,
+which should both have the main chrystal, represented as a
+fixed frequency clock, as parent.
+
+Required properties for the two PLL nodes:
+- compatible: must be "st,nomadik-pll-clock"
+- clock-cells: must be 0
+- clock-id: must be 1 or 2 for PLL1 and PLL2 respectively
+- clocks: this clock will have main chrystal as parent
+
+
+HCLK nodes: these represent the clock gates on individual
+lines from the HCLK clock tree and the gate for individual
+lines from the PCLK clock tree.
+
+Requires properties for the HCLK nodes:
+- compatible: must be "st,nomadik-hclk-clock"
+- clock-cells: must be 0
+- clock-id: must be the clock ID from 0 to 63 according to
+ this table:
+
+ 0: HCLKDMA0
+ 1: HCLKSMC
+ 2: HCLKSDRAM
+ 3: HCLKDMA1
+ 4: HCLKCLCD
+ 5: PCLKIRDA
+ 6: PCLKSSP
+ 7: PCLKUART0
+ 8: PCLKSDI
+ 9: PCLKI2C0
+ 10: PCLKI2C1
+ 11: PCLKUART1
+ 12: PCLMSP0
+ 13: HCLKUSB
+ 14: HCLKDIF
+ 15: HCLKSAA
+ 16: HCLKSVA
+ 17: PCLKHSI
+ 18: PCLKXTI
+ 19: PCLKUART2
+ 20: PCLKMSP1
+ 21: PCLKMSP2
+ 22: PCLKOWM
+ 23: HCLKHPI
+ 24: PCLKSKE
+ 25: PCLKHSEM
+ 26: HCLK3D
+ 27: HCLKHASH
+ 28: HCLKCRYP
+ 29: PCLKMSHC
+ 30: HCLKUSBM
+ 31: HCLKRNG
+ (32, 33, 34, 35 RESERVED)
+ 36: CLDCLK
+ 37: IRDACLK
+ 38: SSPICLK
+ 39: UART0CLK
+ 40: SDICLK
+ 41: I2C0CLK
+ 42: I2C1CLK
+ 43: UART1CLK
+ 44: MSPCLK0
+ 45: USBCLK
+ 46: DIFCLK
+ 47: IPI2CCLK
+ 48: IPBMCCLK
+ 49: HSICLKRX
+ 50: HSICLKTX
+ 51: UART2CLK
+ 52: MSPCLK1
+ 53: MSPCLK2
+ 54: OWMCLK
+ (55 RESERVED)
+ 56: SKECLK
+ (57 RESERVED)
+ 58: 3DCLK
+ 59: PCLKMSP3
+ 60: MSPCLK3
+ 61: MSHCCLK
+ 62: USBMCLK
+ 63: RNGCCLK
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt
new file mode 100644
index 000000000..624765204
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-divmux.txt
@@ -0,0 +1,49 @@
+Binding for a ST divider and multiplexer clock driver.
+
+This binding uses the common clock binding[1].
+Base address is located to the parent node. See clock binding[2]
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/st/st,clkgen.txt
+
+Required properties:
+
+- compatible : shall be:
+ "st,clkgena-divmux-c65-hs", "st,clkgena-divmux"
+ "st,clkgena-divmux-c65-ls", "st,clkgena-divmux"
+ "st,clkgena-divmux-c32-odf0", "st,clkgena-divmux"
+ "st,clkgena-divmux-c32-odf1", "st,clkgena-divmux"
+ "st,clkgena-divmux-c32-odf2", "st,clkgena-divmux"
+ "st,clkgena-divmux-c32-odf3", "st,clkgena-divmux"
+
+- #clock-cells : From common clock binding; shall be set to 1.
+
+- clocks : From common clock binding
+
+- clock-output-names : From common clock binding.
+
+Example:
+
+ clockgen-a@fd345000 {
+ reg = <0xfd345000 0xb50>;
+
+ clk_m_a1_div1: clk-m-a1-div1 {
+ #clock-cells = <1>;
+ compatible = "st,clkgena-divmux-c32-odf1",
+ "st,clkgena-divmux";
+
+ clocks = <&clk_m_a1_osc_prediv>,
+ <&clk_m_a1_pll0 1>, /* PLL0 PHI1 */
+ <&clk_m_a1_pll1 1>; /* PLL1 PHI1 */
+
+ clock-output-names = "clk-m-rx-icn-ts",
+ "clk-m-rx-icn-vdp-0",
+ "", /* unused */
+ "clk-m-prv-t1-bus",
+ "clk-m-icn-reg-12",
+ "clk-m-icn-reg-10",
+ "", /* unused */
+ "clk-m-icn-st231";
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt
new file mode 100644
index 000000000..f1fa91c68
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-mux.txt
@@ -0,0 +1,36 @@
+Binding for a ST multiplexed clock driver.
+
+This binding supports only simple indexed multiplexers, it does not
+support table based parent index to hardware value translations.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+
+- compatible : shall be:
+ "st,stih416-clkgenc-vcc-hd", "st,clkgen-mux"
+ "st,stih416-clkgenf-vcc-fvdp", "st,clkgen-mux"
+ "st,stih416-clkgenf-vcc-hva", "st,clkgen-mux"
+ "st,stih416-clkgenf-vcc-hd", "st,clkgen-mux"
+ "st,stih416-clkgenf-vcc-sd", "st,clkgen-mux"
+ "st,stih415-clkgen-a9-mux", "st,clkgen-mux"
+ "st,stih416-clkgen-a9-mux", "st,clkgen-mux"
+ "st,stih407-clkgen-a9-mux", "st,clkgen-mux"
+
+- #clock-cells : from common clock binding; shall be set to 0.
+
+- reg : A Base address and length of the register set.
+
+- clocks : from common clock binding
+
+Example:
+
+ clk_m_hva: clk-m-hva@fd690868 {
+ #clock-cells = <0>;
+ compatible = "st,stih416-clkgenf-vcc-hva", "st,clkgen-mux";
+ reg = <0xfd690868 4>;
+
+ clocks = <&clockgen_f 1>, <&clk_m_a1_div0 3>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
new file mode 100644
index 000000000..efb51cf0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-pll.txt
@@ -0,0 +1,51 @@
+Binding for a ST pll clock driver.
+
+This binding uses the common clock binding[1].
+Base address is located to the parent node. See clock binding[2]
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/st/st,clkgen.txt
+
+Required properties:
+
+- compatible : shall be:
+ "st,clkgena-prediv-c65", "st,clkgena-prediv"
+ "st,clkgena-prediv-c32", "st,clkgena-prediv"
+
+ "st,clkgena-plls-c65"
+ "st,plls-c32-a1x-0", "st,clkgen-plls-c32"
+ "st,plls-c32-a1x-1", "st,clkgen-plls-c32"
+ "st,stih415-plls-c32-a9", "st,clkgen-plls-c32"
+ "st,stih415-plls-c32-ddr", "st,clkgen-plls-c32"
+ "st,stih416-plls-c32-a9", "st,clkgen-plls-c32"
+ "st,stih416-plls-c32-ddr", "st,clkgen-plls-c32"
+ "st,stih407-plls-c32-a0", "st,clkgen-plls-c32"
+ "st,stih407-plls-c32-a9", "st,clkgen-plls-c32"
+ "st,stih407-plls-c32-c0_0", "st,clkgen-plls-c32"
+ "st,stih407-plls-c32-c0_1", "st,clkgen-plls-c32"
+
+ "st,stih415-gpu-pll-c32", "st,clkgengpu-pll-c32"
+ "st,stih416-gpu-pll-c32", "st,clkgengpu-pll-c32"
+
+- #clock-cells : From common clock binding; shall be set to 1.
+
+- clocks : From common clock binding
+
+- clock-output-names : From common clock binding.
+
+Example:
+
+ clockgen-a@fee62000 {
+ reg = <0xfee62000 0xb48>;
+
+ clk_s_a0_pll: clk-s-a0-pll {
+ #clock-cells = <1>;
+ compatible = "st,clkgena-plls-c65";
+
+ clocks = <&clk_sysin>;
+
+ clock-output-names = "clk-s-a0-pll0-hs",
+ "clk-s-a0-pll0-ls",
+ "clk-s-a0-pll1";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt
new file mode 100644
index 000000000..604766c26
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-prediv.txt
@@ -0,0 +1,36 @@
+Binding for a ST pre-divider clock driver.
+
+This binding uses the common clock binding[1].
+Base address is located to the parent node. See clock binding[2]
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/st/st,clkgen.txt
+
+Required properties:
+
+- compatible : shall be:
+ "st,clkgena-prediv-c65", "st,clkgena-prediv"
+ "st,clkgena-prediv-c32", "st,clkgena-prediv"
+
+- #clock-cells : From common clock binding; shall be set to 0.
+
+- clocks : From common clock binding
+
+- clock-output-names : From common clock binding.
+
+Example:
+
+ clockgen-a@fd345000 {
+ reg = <0xfd345000 0xb50>;
+
+ clk_m_a2_osc_prediv: clk-m-a2-osc-prediv {
+ #clock-cells = <0>;
+ compatible = "st,clkgena-prediv-c32",
+ "st,clkgena-prediv";
+
+ clocks = <&clk_sysin>;
+
+ clock-output-names = "clk-m-a2-osc-prediv";
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt
new file mode 100644
index 000000000..109b3eddc
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen-vcc.txt
@@ -0,0 +1,61 @@
+Binding for a type of STMicroelectronics clock crossbar (VCC).
+
+The crossbar can take up to 4 input clocks and control up to 16
+output clocks. Not all inputs or outputs have to be in use in a
+particular instantiation. Each output can be individually enabled,
+select any of the input clocks and apply a divide (by 1,2,4 or 8) to
+that selected clock.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+
+- compatible : shall be:
+ "st,stih416-clkgenc", "st,vcc"
+ "st,stih416-clkgenf", "st,vcc"
+
+- #clock-cells : from common clock binding; shall be set to 1.
+
+- reg : A Base address and length of the register set.
+
+- clocks : from common clock binding
+
+- clock-output-names : From common clock binding. The block has 16
+ clock outputs but not all of them in a specific instance
+ have to be used in the SoC. If a clock name is left as
+ an empty string then no clock will be created for the
+ output associated with that string index. If fewer than
+ 16 strings are provided then no clocks will be created
+ for the remaining outputs.
+
+Example:
+
+ clockgen_c_vcc: clockgen-c-vcc@0xfe8308ac {
+ #clock-cells = <1>;
+ compatible = "st,stih416-clkgenc", "st,clkgen-vcc";
+ reg = <0xfe8308ac 12>;
+
+ clocks = <&clk_s_vcc_hd>,
+ <&clockgen_c 1>,
+ <&clk_s_tmds_fromphy>,
+ <&clockgen_c 2>;
+
+ clock-output-names = "clk-s-pix-hdmi",
+ "clk-s-pix-dvo",
+ "clk-s-out-dvo",
+ "clk-s-pix-hd",
+ "clk-s-hddac",
+ "clk-s-denc",
+ "clk-s-sddac",
+ "clk-s-pix-main",
+ "clk-s-pix-aux",
+ "clk-s-stfe-frc-0",
+ "clk-s-ref-mcru",
+ "clk-s-slave-mcru",
+ "clk-s-tmds-hdmi",
+ "clk-s-hdmi-reject-pll",
+ "clk-s-thsens";
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/st/st,clkgen.txt b/Documentation/devicetree/bindings/clock/st/st,clkgen.txt
new file mode 100644
index 000000000..78978f1f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,clkgen.txt
@@ -0,0 +1,100 @@
+Binding for a Clockgen hardware block found on
+certain STMicroelectronics consumer electronics SoC devices.
+
+A Clockgen node can contain pll, diviser or multiplexer nodes.
+
+We will find only the base address of the Clockgen, this base
+address is common of all subnode.
+
+ clockgen_node {
+ reg = <>;
+
+ pll_node {
+ ...
+ };
+
+ prediv_node {
+ ...
+ };
+
+ divmux_node {
+ ...
+ };
+
+ quadfs_node {
+ ...
+ };
+
+ mux_node {
+ ...
+ };
+
+ vcc_node {
+ ...
+ };
+
+ flexgen_node {
+ ...
+ };
+ ...
+ };
+
+This binding uses the common clock binding[1].
+Each subnode should use the binding discribe in [2]..[7]
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/st,clkgen-divmux.txt
+[3] Documentation/devicetree/bindings/clock/st,clkgen-mux.txt
+[4] Documentation/devicetree/bindings/clock/st,clkgen-pll.txt
+[5] Documentation/devicetree/bindings/clock/st,clkgen-prediv.txt
+[6] Documentation/devicetree/bindings/clock/st,vcc.txt
+[7] Documentation/devicetree/bindings/clock/st,quadfs.txt
+[8] Documentation/devicetree/bindings/clock/st,flexgen.txt
+
+
+Required properties:
+- reg : A Base address and length of the register set.
+
+Example:
+
+ clockgen-a@fee62000 {
+
+ reg = <0xfee62000 0xb48>;
+
+ clk_s_a0_pll: clk-s-a0-pll {
+ #clock-cells = <1>;
+ compatible = "st,clkgena-plls-c65";
+
+ clocks = <&clk-sysin>;
+
+ clock-output-names = "clk-s-a0-pll0-hs",
+ "clk-s-a0-pll0-ls",
+ "clk-s-a0-pll1";
+ };
+
+ clk_s_a0_osc_prediv: clk-s-a0-osc-prediv {
+ #clock-cells = <0>;
+ compatible = "st,clkgena-prediv-c65",
+ "st,clkgena-prediv";
+
+ clocks = <&clk_sysin>;
+
+ clock-output-names = "clk-s-a0-osc-prediv";
+ };
+
+ clk_s_a0_hs: clk-s-a0-hs {
+ #clock-cells = <1>;
+ compatible = "st,clkgena-divmux-c65-hs",
+ "st,clkgena-divmux";
+
+ clocks = <&clk-s_a0_osc_prediv>,
+ <&clk-s_a0_pll 0>, /* pll0 hs */
+ <&clk-s_a0_pll 2>; /* pll1 */
+
+ clock-output-names = "clk-s-fdma-0",
+ "clk-s-fdma-1",
+ ""; /* clk-s-jit-sense */
+ /* fourth output unused */
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/st/st,flexgen.txt b/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
new file mode 100644
index 000000000..b7ee5c7e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,flexgen.txt
@@ -0,0 +1,119 @@
+Binding for a type of flexgen structure found on certain
+STMicroelectronics consumer electronics SoC devices
+
+This structure includes:
+- a clock cross bar (represented by a mux element)
+- a pre and final dividers (represented by a divider and gate elements)
+
+Flexgen structure is a part of Clockgen[1].
+
+Please find an example below:
+
+ Clockgen block diagram
+ -------------------------------------------------------------------
+ | Flexgen structure |
+ | --------------------------------------------- |
+ | | ------- -------- -------- | |
+clk_sysin | | | | | | | | |
+---|-----------------|-->| | | | | | | |
+ | | | | | | | | | | |
+ | | ------- | | | |Pre | |Final | | |
+ | | |PLL0 | | | | |Dividers| |Dividers| | |
+ | |->| | | | | | x32 | | x32 | | |
+ | | | odf_0|----|-->| | | | | | | |
+ | | | | | | | | | | | | |
+ | | | | | | | | | | | | |
+ | | | | | | | | | | | | |
+ | | | | | | | | | | | | |
+ | | ------- | | | | | | | | |
+ | | | | | | | | | | |
+ | | ------- | | Clock | | | | | | |
+ | | |PLL1 | | | | | | | | | |
+ | |->| | | | Cross | | | | | | |
+ | | | odf_0|----|-->| | | | | | CLK_DIV[31:0]
+ | | | | | | Bar |====>| |====>| |===|=========>
+ | | | | | | | | | | | | |
+ | | | | | | | | | | | | |
+ | | | | | | | | | | | | |
+ | | ------- | | | | | | | | |
+ | | | | | | | | | | |
+ | | ------- | | | | | | | | |
+ | | |QUADFS | | | | | | | | | |
+ | |->| ch0|----|-->| | | | | | | |
+ | | | | | | | | | | | |
+ | | ch1|----|-->| | | | | | | |
+ | | | | | | | | | | | |
+ | | ch2|----|-->| | | DIV | | DIV | | |
+ | | | | | | | 1 to | | 1 to | | |
+ | | ch3|----|-->| | | 1024 | | 64 | | |
+ | ------- | | | | | | | | |
+ | | ------- -------- -------- | |
+ | -------------------------------------------- |
+ | |
+ -------------------------------------------------------------------
+
+This binding uses the common clock binding[2].
+
+[1] Documentation/devicetree/bindings/clock/st/st,clkgen.txt
+[2] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be:
+ "st,flexgen"
+
+- #clock-cells : from common clock binding; shall be set to 1 (multiple clock
+ outputs).
+
+- clocks : must be set to the parent's phandle. it's could be output clocks of
+ a quadsfs or/and a pll or/and clk_sysin (up to 7 clocks)
+
+- clock-output-names : List of strings used to name the clock outputs.
+
+Example:
+
+ clk_s_c0_flexgen: clk-s-c0-flexgen {
+
+ #clock-cells = <1>;
+ compatible = "st,flexgen";
+
+ clocks = <&clk_s_c0_pll0 0>,
+ <&clk_s_c0_pll1 0>,
+ <&clk_s_c0_quadfs 0>,
+ <&clk_s_c0_quadfs 1>,
+ <&clk_s_c0_quadfs 2>,
+ <&clk_s_c0_quadfs 3>,
+ <&clk_sysin>;
+
+ clock-output-names = "clk-icn-gpu",
+ "clk-fdma",
+ "clk-nand",
+ "clk-hva",
+ "clk-proc-stfe",
+ "clk-proc-tp",
+ "clk-rx-icn-dmu",
+ "clk-rx-icn-hva",
+ "clk-icn-cpu",
+ "clk-tx-icn-dmu",
+ "clk-mmc-0",
+ "clk-mmc-1",
+ "clk-jpegdec",
+ "clk-ext2fa9",
+ "clk-ic-bdisp-0",
+ "clk-ic-bdisp-1",
+ "clk-pp-dmu",
+ "clk-vid-dmu",
+ "clk-dss-lpc",
+ "clk-st231-aud-0",
+ "clk-st231-gp-1",
+ "clk-st231-dmu",
+ "clk-icn-lmi",
+ "clk-tx-icn-disp-1",
+ "clk-icn-sbc",
+ "clk-stfe-frc2",
+ "clk-eth-phy",
+ "clk-eth-ref-phyclk",
+ "clk-flash-promip",
+ "clk-main-disp",
+ "clk-aux-disp",
+ "clk-compo-dvp";
+ };
diff --git a/Documentation/devicetree/bindings/clock/st/st,quadfs.txt b/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
new file mode 100644
index 000000000..cedeb9cc8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/st/st,quadfs.txt
@@ -0,0 +1,48 @@
+Binding for a type of quad channel digital frequency synthesizer found on
+certain STMicroelectronics consumer electronics SoC devices.
+
+This version contains a programmable PLL which can generate up to 216, 432
+or 660MHz (from a 30MHz oscillator input) as the input to the digital
+synthesizers.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be:
+ "st,stih416-quadfs216", "st,quadfs"
+ "st,stih416-quadfs432", "st,quadfs"
+ "st,stih416-quadfs660-E", "st,quadfs"
+ "st,stih416-quadfs660-F", "st,quadfs"
+ "st,stih407-quadfs660-C", "st,quadfs"
+ "st,stih407-quadfs660-D", "st,quadfs"
+
+
+- #clock-cells : from common clock binding; shall be set to 1.
+
+- reg : A Base address and length of the register set.
+
+- clocks : from common clock binding
+
+- clock-output-names : From common clock binding. The block has 4
+ clock outputs but not all of them in a specific instance
+ have to be used in the SoC. If a clock name is left as
+ an empty string then no clock will be created for the
+ output associated with that string index. If fewer than
+ 4 strings are provided then no clocks will be created
+ for the remaining outputs.
+
+Example:
+
+ clockgen_e: clockgen-e@fd3208bc {
+ #clock-cells = <1>;
+ compatible = "st,stih416-quadfs660-E", "st,quadfs";
+ reg = <0xfd3208bc 0xB0>;
+
+ clocks = <&clk_sysin>;
+ clock-output-names = "clk-m-pix-mdtp-0",
+ "clk-m-pix-mdtp-1",
+ "clk-m-pix-mdtp-2",
+ "clk-m-mpelpc";
+ };
diff --git a/Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt b/Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt
new file mode 100644
index 000000000..7cafcb98e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ste-u300-syscon-clock.txt
@@ -0,0 +1,80 @@
+Clock bindings for ST-Ericsson U300 System Controller Clocks
+
+Bindings for the gated system controller clocks:
+
+Required properties:
+- compatible: must be "stericsson,u300-syscon-clk"
+- #clock-cells: must be <0>
+- clock-type: specifies the type of clock:
+ 0 = slow clock
+ 1 = fast clock
+ 2 = rest/remaining clock
+- clock-id: specifies the clock in the type range
+
+Optional properties:
+- clocks: parent clock(s)
+
+The available clocks per type are as follows:
+
+Type: ID: Clock:
+-------------------
+0 0 Slow peripheral bridge clock
+0 1 UART0 clock
+0 4 GPIO clock
+0 6 RTC clock
+0 7 Application timer clock
+0 8 Access timer clock
+
+1 0 Fast peripheral bridge clock
+1 1 I2C bus 0 clock
+1 2 I2C bus 1 clock
+1 5 MMC interface peripheral (silicon) clock
+1 6 SPI clock
+
+2 3 CPU clock
+2 4 DMA controller clock
+2 5 External Memory Interface (EMIF) clock
+2 6 NAND flask interface clock
+2 8 XGAM graphics engine clock
+2 9 Shared External Memory Interface (SEMI) clock
+2 10 AHB Subsystem Bridge clock
+2 12 Interrupt controller clock
+
+Example:
+
+gpio_clk: gpio_clk@13M {
+ #clock-cells = <0>;
+ compatible = "stericsson,u300-syscon-clk";
+ clock-type = <0>; /* Slow */
+ clock-id = <4>;
+ clocks = <&slow_clk>;
+};
+
+gpio: gpio@c0016000 {
+ compatible = "stericsson,gpio-coh901";
+ (...)
+ clocks = <&gpio_clk>;
+};
+
+
+Bindings for the MMC/SD card clock:
+
+Required properties:
+- compatible: must be "stericsson,u300-syscon-mclk"
+- #clock-cells: must be <0>
+
+Optional properties:
+- clocks: parent clock(s)
+
+mmc_mclk: mmc_mclk {
+ #clock-cells = <0>;
+ compatible = "stericsson,u300-syscon-mclk";
+ clocks = <&mmc_pclk>;
+};
+
+mmcsd: mmcsd@c0001000 {
+ compatible = "arm,pl18x", "arm,primecell";
+ clocks = <&mmc_pclk>, <&mmc_mclk>;
+ clock-names = "apb_pclk", "mclk";
+ (...)
+};
diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
new file mode 100644
index 000000000..4fa11af3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt
@@ -0,0 +1,204 @@
+Device Tree Clock bindings for arch-sunxi
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+ "allwinner,sun4i-a10-osc-clk" - for a gatable oscillator
+ "allwinner,sun4i-a10-pll1-clk" - for the main PLL clock and PLL4
+ "allwinner,sun6i-a31-pll1-clk" - for the main PLL clock on A31
+ "allwinner,sun8i-a23-pll1-clk" - for the main PLL clock on A23
+ "allwinner,sun9i-a80-pll4-clk" - for the peripheral PLLs on A80
+ "allwinner,sun4i-a10-pll5-clk" - for the PLL5 clock
+ "allwinner,sun4i-a10-pll6-clk" - for the PLL6 clock
+ "allwinner,sun6i-a31-pll6-clk" - for the PLL6 clock on A31
+ "allwinner,sun9i-a80-gt-clk" - for the GT bus clock on A80
+ "allwinner,sun4i-a10-cpu-clk" - for the CPU multiplexer clock
+ "allwinner,sun4i-a10-axi-clk" - for the AXI clock
+ "allwinner,sun8i-a23-axi-clk" - for the AXI clock on A23
+ "allwinner,sun4i-a10-axi-gates-clk" - for the AXI gates
+ "allwinner,sun4i-a10-ahb-clk" - for the AHB clock
+ "allwinner,sun5i-a13-ahb-clk" - for the AHB clock on A13
+ "allwinner,sun9i-a80-ahb-clk" - for the AHB bus clocks on A80
+ "allwinner,sun4i-a10-ahb-gates-clk" - for the AHB gates on A10
+ "allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13
+ "allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s
+ "allwinner,sun7i-a20-ahb-gates-clk" - for the AHB gates on A20
+ "allwinner,sun6i-a31-ar100-clk" - for the AR100 on A31
+ "allwinner,sun6i-a31-ahb1-clk" - for the AHB1 clock on A31
+ "allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31
+ "allwinner,sun8i-a23-ahb1-gates-clk" - for the AHB1 gates on A23
+ "allwinner,sun9i-a80-ahb0-gates-clk" - for the AHB0 gates on A80
+ "allwinner,sun9i-a80-ahb1-gates-clk" - for the AHB1 gates on A80
+ "allwinner,sun9i-a80-ahb2-gates-clk" - for the AHB2 gates on A80
+ "allwinner,sun4i-a10-apb0-clk" - for the APB0 clock
+ "allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31
+ "allwinner,sun8i-a23-apb0-clk" - for the APB0 clock on A23
+ "allwinner,sun9i-a80-apb0-clk" - for the APB0 bus clock on A80
+ "allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10
+ "allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
+ "allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
+ "allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31
+ "allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
+ "allwinner,sun8i-a23-apb0-gates-clk" - for the APB0 gates on A23
+ "allwinner,sun9i-a80-apb0-gates-clk" - for the APB0 gates on A80
+ "allwinner,sun4i-a10-apb1-clk" - for the APB1 clock
+ "allwinner,sun9i-a80-apb1-clk" - for the APB1 bus clock on A80
+ "allwinner,sun4i-a10-apb1-gates-clk" - for the APB1 gates on A10
+ "allwinner,sun5i-a13-apb1-gates-clk" - for the APB1 gates on A13
+ "allwinner,sun5i-a10s-apb1-gates-clk" - for the APB1 gates on A10s
+ "allwinner,sun6i-a31-apb1-gates-clk" - for the APB1 gates on A31
+ "allwinner,sun7i-a20-apb1-gates-clk" - for the APB1 gates on A20
+ "allwinner,sun8i-a23-apb1-gates-clk" - for the APB1 gates on A23
+ "allwinner,sun9i-a80-apb1-gates-clk" - for the APB1 gates on A80
+ "allwinner,sun6i-a31-apb2-gates-clk" - for the APB2 gates on A31
+ "allwinner,sun8i-a23-apb2-gates-clk" - for the APB2 gates on A23
+ "allwinner,sun5i-a13-mbus-clk" - for the MBUS clock on A13
+ "allwinner,sun4i-a10-mmc-clk" - for the MMC clock
+ "allwinner,sun9i-a80-mmc-clk" - for mmc module clocks on A80
+ "allwinner,sun9i-a80-mmc-config-clk" - for mmc gates + resets on A80
+ "allwinner,sun4i-a10-mod0-clk" - for the module 0 family of clocks
+ "allwinner,sun9i-a80-mod0-clk" - for module 0 (storage) clocks on A80
+ "allwinner,sun8i-a23-mbus-clk" - for the MBUS clock on A23
+ "allwinner,sun7i-a20-out-clk" - for the external output clocks
+ "allwinner,sun7i-a20-gmac-clk" - for the GMAC clock module on A20/A31
+ "allwinner,sun4i-a10-usb-clk" - for usb gates + resets on A10 / A20
+ "allwinner,sun5i-a13-usb-clk" - for usb gates + resets on A13
+ "allwinner,sun6i-a31-usb-clk" - for usb gates + resets on A31
+ "allwinner,sun9i-a80-usb-mod-clk" - for usb gates + resets on A80
+ "allwinner,sun9i-a80-usb-phy-clk" - for usb phy gates + resets on A80
+
+Required properties for all clocks:
+- reg : shall be the control register address for the clock.
+- clocks : shall be the input parent clock(s) phandle for the clock. For
+ multiplexed clocks, the list order must match the hardware
+ programming order.
+- #clock-cells : from common clock binding; shall be set to 0 except for
+ the following compatibles where it shall be set to 1:
+ "allwinner,*-gates-clk", "allwinner,sun4i-pll5-clk",
+ "allwinner,sun4i-pll6-clk", "allwinner,sun6i-a31-pll6-clk",
+ "allwinner,*-usb-clk", "allwinner,*-mmc-clk",
+ "allwinner,*-mmc-config-clk"
+- clock-output-names : shall be the corresponding names of the outputs.
+ If the clock module only has one output, the name shall be the
+ module name.
+
+And "allwinner,*-usb-clk" clocks also require:
+- reset-cells : shall be set to 1
+
+The "allwinner,sun9i-a80-mmc-config-clk" clock also requires:
+- #reset-cells : shall be set to 1
+- resets : shall be the reset control phandle for the mmc block.
+
+For "allwinner,sun7i-a20-gmac-clk", the parent clocks shall be fixed rate
+dummy clocks at 25 MHz and 125 MHz, respectively. See example.
+
+Clock consumers should specify the desired clocks they use with a
+"clocks" phandle cell. Consumers that are using a gated clock should
+provide an additional ID in their clock property. This ID is the
+offset of the bit controlling this particular gate in the register.
+For the other clocks with "#clock-cells" = 1, the additional ID shall
+refer to the index of the output.
+
+For "allwinner,sun6i-a31-pll6-clk", there are 2 outputs. The first output
+is the normal PLL6 output, or "pll6". The second output is rate doubled
+PLL6, or "pll6x2".
+
+The "allwinner,*-mmc-clk" clocks have three different outputs: the
+main clock, with the ID 0, and the output and sample clocks, with the
+IDs 1 and 2, respectively.
+
+The "allwinner,sun9i-a80-mmc-config-clk" clock has one clock/reset output
+per mmc controller. The number of outputs is determined by the size of
+the address block, which is related to the overall mmc block.
+
+For example:
+
+osc24M: clk@01c20050 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-osc-clk";
+ reg = <0x01c20050 0x4>;
+ clocks = <&osc24M_fixed>;
+ clock-output-names = "osc24M";
+};
+
+pll1: clk@01c20000 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-pll1-clk";
+ reg = <0x01c20000 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll1";
+};
+
+pll5: clk@01c20020 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-pll5-clk";
+ reg = <0x01c20020 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll5_ddr", "pll5_other";
+};
+
+pll6: clk@01c20028 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun6i-a31-pll6-clk";
+ reg = <0x01c20028 0x4>;
+ clocks = <&osc24M>;
+ clock-output-names = "pll6", "pll6x2";
+};
+
+cpu: cpu@01c20054 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun4i-a10-cpu-clk";
+ reg = <0x01c20054 0x4>;
+ clocks = <&osc32k>, <&osc24M>, <&pll1>;
+ clock-output-names = "cpu";
+};
+
+mmc0_clk: clk@01c20088 {
+ #clock-cells = <1>;
+ compatible = "allwinner,sun4i-a10-mmc-clk";
+ reg = <0x01c20088 0x4>;
+ clocks = <&osc24M>, <&pll6 1>, <&pll5 1>;
+ clock-output-names = "mmc0", "mmc0_output", "mmc0_sample";
+};
+
+mii_phy_tx_clk: clk@2 {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <25000000>;
+ clock-output-names = "mii_phy_tx";
+};
+
+gmac_int_tx_clk: clk@3 {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <125000000>;
+ clock-output-names = "gmac_int_tx";
+};
+
+gmac_clk: clk@01c20164 {
+ #clock-cells = <0>;
+ compatible = "allwinner,sun7i-a20-gmac-clk";
+ reg = <0x01c20164 0x4>;
+ /*
+ * The first clock must be fixed at 25MHz;
+ * the second clock must be fixed at 125MHz
+ */
+ clocks = <&mii_phy_tx_clk>, <&gmac_int_tx_clk>;
+ clock-output-names = "gmac";
+};
+
+mmc_config_clk: clk@01c13000 {
+ compatible = "allwinner,sun9i-a80-mmc-config-clk";
+ reg = <0x01c13000 0x10>;
+ clocks = <&ahb0_gates 8>;
+ clock-names = "ahb";
+ resets = <&ahb0_resets 8>;
+ reset-names = "ahb";
+ #clock-cells = <1>;
+ #reset-cells = <1>;
+ clock-output-names = "mmc0_config", "mmc1_config",
+ "mmc2_config", "mmc3_config";
+};
diff --git a/Documentation/devicetree/bindings/clock/ti,cdce706.txt b/Documentation/devicetree/bindings/clock/ti,cdce706.txt
new file mode 100644
index 000000000..616836e7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti,cdce706.txt
@@ -0,0 +1,42 @@
+Bindings for Texas Instruments CDCE706 programmable 3-PLL clock
+synthesizer/multiplier/divider.
+
+Reference: http://www.ti.com/lit/ds/symlink/cdce706.pdf
+
+I2C device node required properties:
+- compatible: shall be "ti,cdce706".
+- reg: i2c device address, shall be in range [0x68...0x6b].
+- #clock-cells: from common clock binding; shall be set to 1.
+- clocks: from common clock binding; list of parent clock
+ handles, shall be reference clock(s) connected to CLK_IN0
+ and CLK_IN1 pins.
+- clock-names: shall be clk_in0 and/or clk_in1. Use clk_in0
+ in case of crystal oscillator or differential signal input
+ configuration. Use clk_in0 and clk_in1 in case of independent
+ single-ended LVCMOS inputs configuration.
+
+Example:
+
+ clocks {
+ clk54: clk54 {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <54000000>;
+ };
+ };
+ ...
+ i2c0: i2c-master@0d090000 {
+ ...
+ cdce706: clock-synth@69 {
+ compatible = "ti,cdce706";
+ #clock-cells = <1>;
+ reg = <0x69>;
+ clocks = <&clk54>;
+ clock-names = "clk_in0";
+ };
+ };
+ ...
+ simple-audio-card,codec {
+ ...
+ clocks = <&cdce706 4>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
new file mode 100644
index 000000000..3e6a81e99
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti-keystone-pllctrl.txt
@@ -0,0 +1,20 @@
+* Device tree bindings for Texas Instruments keystone pll controller
+
+The main pll controller used to drive theC66x CorePacs, the switch fabric,
+and a majority of the peripheral clocks (all but the ARM CorePacs, DDR3 and
+the NETCP modules) requires a PLL Controller to manage the various clock
+divisions, gating, and synchronization.
+
+Required properties:
+
+- compatible: "ti,keystone-pllctrl", "syscon"
+
+- reg: contains offset/length value for pll controller
+ registers space.
+
+Example:
+
+pllctrl: pll-controller@0x02310000 {
+ compatible = "ti,keystone-pllctrl", "syscon";
+ reg = <0x02310000 0x200>;
+};
diff --git a/Documentation/devicetree/bindings/clock/ti/apll.txt b/Documentation/devicetree/bindings/clock/ti/apll.txt
new file mode 100644
index 000000000..ade4dd4c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/apll.txt
@@ -0,0 +1,45 @@
+Binding for Texas Instruments APLL clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped APLL with usually two selectable input clocks
+(reference clock and bypass clock), with analog phase locked
+loop logic for multiplying the input clock to a desired output
+clock. This clock also typically supports different operation
+modes (locked, low power stop etc.) APLL mostly behaves like
+a subtype of a DPLL [2], although a simplified one at that.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/ti/dpll.txt
+
+Required properties:
+- compatible : shall be "ti,dra7-apll-clock" or "ti,omap2-apll-clock"
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles of parent clocks (clk-ref and clk-bypass)
+- reg : address and length of the register set for controlling the APLL.
+ It contains the information of registers in the following order:
+ "control" - contains the control register offset
+ "idlest" - contains the idlest register offset
+ "autoidle" - contains the autoidle register offset (OMAP2 only)
+- ti,clock-frequency : static clock frequency for the clock (OMAP2 only)
+- ti,idlest-shift : bit-shift for the idlest field (OMAP2 only)
+- ti,bit-shift : bit-shift for enable and autoidle fields (OMAP2 only)
+
+Examples:
+ apll_pcie_ck: apll_pcie_ck {
+ #clock-cells = <0>;
+ clocks = <&apll_pcie_in_clk_mux>, <&dpll_pcie_ref_ck>;
+ reg = <0x021c>, <0x0220>;
+ compatible = "ti,dra7-apll-clock";
+ };
+
+ apll96_ck: apll96_ck {
+ #clock-cells = <0>;
+ compatible = "ti,omap2-apll-clock";
+ clocks = <&sys_ck>;
+ ti,bit-shift = <2>;
+ ti,idlest-shift = <8>;
+ ti,clock-frequency = <96000000>;
+ reg = <0x0500>, <0x0530>, <0x0520>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/autoidle.txt b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
new file mode 100644
index 000000000..7c735dde9
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/autoidle.txt
@@ -0,0 +1,39 @@
+Binding for Texas Instruments autoidle clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a register mapped
+clock which can be put to idle automatically by hardware based on the usage
+and a configuration bit setting. Autoidle clock is never an individual
+clock, it is always a derivative of some basic clock like a gate, divider,
+or fixed-factor.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- reg : offset for the register controlling the autoidle
+- ti,autoidle-shift : bit shift of the autoidle enable bit
+- ti,invert-autoidle-bit : autoidle is enabled by setting the bit to 0
+
+Examples:
+ dpll_core_m4_ck: dpll_core_m4_ck {
+ #clock-cells = <0>;
+ compatible = "ti,divider-clock";
+ clocks = <&dpll_core_x2_ck>;
+ ti,max-div = <31>;
+ ti,autoidle-shift = <8>;
+ reg = <0x2d38>;
+ ti,index-starts-at-one;
+ ti,invert-autoidle-bit;
+ };
+
+ dpll_usb_clkdcoldo_ck: dpll_usb_clkdcoldo_ck {
+ #clock-cells = <0>;
+ compatible = "ti,fixed-factor-clock";
+ clocks = <&dpll_usb_ck>;
+ ti,clock-div = <1>;
+ ti,autoidle-shift = <8>;
+ reg = <0x01b4>;
+ ti,clock-mult = <1>;
+ ti,invert-autoidle-bit;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/clockdomain.txt b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
new file mode 100644
index 000000000..cb76b3f2b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/clockdomain.txt
@@ -0,0 +1,24 @@
+Binding for Texas Instruments clockdomain.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1] in consumer role.
+Every clock on TI SoC belongs to one clockdomain, but software
+only needs this information for specific clocks which require
+their parent clockdomain to be controlled when the clock is
+enabled/disabled. This binding doesn't define a new clock
+binding type, it is used to group existing clock nodes under
+hardware hierarchy.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,clockdomain"
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles of clocks within this domain
+
+Examples:
+ dss_clkdm: dss_clkdm {
+ compatible = "ti,clockdomain";
+ clocks = <&dss1_alwon_fck_3430es2>, <&dss_ick_3430es2>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/composite.txt b/Documentation/devicetree/bindings/clock/ti/composite.txt
new file mode 100644
index 000000000..5f43c4706
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/composite.txt
@@ -0,0 +1,54 @@
+Binding for TI composite clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped composite clock with multiple different sub-types;
+
+a multiplexer clock with multiple input clock signals or parents, one
+of which can be selected as output, this behaves exactly as [2]
+
+an adjustable clock rate divider, this behaves exactly as [3]
+
+a gating function which can be used to enable and disable the output
+clock, this behaves exactly as [4]
+
+The binding must provide a list of the component clocks that shall be
+merged to this clock. The component clocks shall be of one of the
+"ti,*composite*-clock" types.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/ti/mux.txt
+[3] Documentation/devicetree/bindings/clock/ti/divider.txt
+[4] Documentation/devicetree/bindings/clock/ti/gate.txt
+
+Required properties:
+- compatible : shall be: "ti,composite-clock"
+- clocks : link phandles of component clocks
+- #clock-cells : from common clock binding; shall be set to 0.
+
+Examples:
+
+usb_l4_gate_ick: usb_l4_gate_ick {
+ #clock-cells = <0>;
+ compatible = "ti,composite-interface-clock";
+ clocks = <&l4_ick>;
+ ti,bit-shift = <5>;
+ reg = <0x0a10>;
+};
+
+usb_l4_div_ick: usb_l4_div_ick {
+ #clock-cells = <0>;
+ compatible = "ti,composite-divider-clock";
+ clocks = <&l4_ick>;
+ ti,bit-shift = <4>;
+ ti,max-div = <1>;
+ reg = <0x0a40>;
+ ti,index-starts-at-one;
+};
+
+usb_l4_ick: usb_l4_ick {
+ #clock-cells = <0>;
+ compatible = "ti,composite-clock";
+ clocks = <&usb_l4_gate_ick>, <&usb_l4_div_ick>;
+};
diff --git a/Documentation/devicetree/bindings/clock/ti/divider.txt b/Documentation/devicetree/bindings/clock/ti/divider.txt
new file mode 100644
index 000000000..35a6f5c7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/divider.txt
@@ -0,0 +1,114 @@
+Binding for TI divider clock
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped adjustable clock rate divider that does not gate and has
+only one input clock or parent. By default the value programmed into
+the register is one less than the actual divisor value. E.g:
+
+register value actual divisor value
+0 1
+1 2
+2 3
+
+This assumption may be modified by the following optional properties:
+
+ti,index-starts-at-one - valid divisor values start at 1, not the default
+of 0. E.g:
+register value actual divisor value
+1 1
+2 2
+3 3
+
+ti,index-power-of-two - valid divisor values are powers of two. E.g:
+register value actual divisor value
+0 1
+1 2
+2 4
+
+Additionally an array of valid dividers may be supplied like so:
+
+ ti,dividers = <4>, <8>, <0>, <16>;
+
+Which will map the resulting values to a divisor table by their index:
+register value actual divisor value
+0 4
+1 8
+2 <invalid divisor, skipped>
+3 16
+
+Any zero value in this array means the corresponding bit-value is invalid
+and must not be used.
+
+The binding must also provide the register to control the divider and
+unless the divider array is provided, min and max dividers. Optionally
+the number of bits to shift that mask, if necessary. If the shift value
+is missing it is the same as supplying a zero shift.
+
+This binding can also optionally provide support to the hardware autoidle
+feature, see [2].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/ti/autoidle.txt
+
+Required properties:
+- compatible : shall be "ti,divider-clock" or "ti,composite-divider-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link to phandle of parent clock
+- reg : offset for register controlling adjustable divider
+
+Optional properties:
+- clock-output-names : from common clock binding.
+- ti,dividers : array of integers defining divisors
+- ti,bit-shift : number of bits to shift the divider value, defaults to 0
+- ti,min-div : min divisor for dividing the input clock rate, only
+ needed if the first divisor is offset from the default value (1)
+- ti,max-div : max divisor for dividing the input clock rate, only needed
+ if ti,dividers is not defined.
+- ti,index-starts-at-one : valid divisor programming starts at 1, not zero,
+ only valid if ti,dividers is not defined.
+- ti,index-power-of-two : valid divisor programming must be a power of two,
+ only valid if ti,dividers is not defined.
+- ti,autoidle-shift : bit shift of the autoidle enable bit for the clock,
+ see [2]
+- ti,invert-autoidle-bit : autoidle is enabled by setting the bit to 0,
+ see [2]
+- ti,set-rate-parent : clk_set_rate is propagated to parent
+
+Examples:
+dpll_usb_m2_ck: dpll_usb_m2_ck@4a008190 {
+ #clock-cells = <0>;
+ compatible = "ti,divider-clock";
+ clocks = <&dpll_usb_ck>;
+ ti,max-div = <127>;
+ reg = <0x190>;
+ ti,index-starts-at-one;
+};
+
+aess_fclk: aess_fclk@4a004528 {
+ #clock-cells = <0>;
+ compatible = "ti,divider-clock";
+ clocks = <&abe_clk>;
+ ti,bit-shift = <24>;
+ reg = <0x528>;
+ ti,max-div = <2>;
+};
+
+dpll_core_m3x2_div_ck: dpll_core_m3x2_div_ck {
+ #clock-cells = <0>;
+ compatible = "ti,composite-divider-clock";
+ clocks = <&dpll_core_x2_ck>;
+ ti,max-div = <31>;
+ reg = <0x0134>;
+ ti,index-starts-at-one;
+};
+
+ssi_ssr_div_fck_3430es2: ssi_ssr_div_fck_3430es2 {
+ #clock-cells = <0>;
+ compatible = "ti,composite-divider-clock";
+ clocks = <&corex2_fck>;
+ ti,bit-shift = <8>;
+ reg = <0x0a40>;
+ ti,dividers = <0>, <1>, <2>, <3>, <4>, <0>, <6>, <0>, <8>;
+};
diff --git a/Documentation/devicetree/bindings/clock/ti/dpll.txt b/Documentation/devicetree/bindings/clock/ti/dpll.txt
new file mode 100644
index 000000000..df57009ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/dpll.txt
@@ -0,0 +1,85 @@
+Binding for Texas Instruments DPLL clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped DPLL with usually two selectable input clocks
+(reference clock and bypass clock), with digital phase locked
+loop logic for multiplying the input clock to a desired output
+clock. This clock also typically supports different operation
+modes (locked, low power stop etc.) This binding has several
+sub-types, which effectively result in slightly different setup
+for the actual DPLL clock.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of:
+ "ti,omap3-dpll-clock",
+ "ti,omap3-dpll-core-clock",
+ "ti,omap3-dpll-per-clock",
+ "ti,omap3-dpll-per-j-type-clock",
+ "ti,omap4-dpll-clock",
+ "ti,omap4-dpll-x2-clock",
+ "ti,omap4-dpll-core-clock",
+ "ti,omap4-dpll-m4xen-clock",
+ "ti,omap4-dpll-j-type-clock",
+ "ti,omap5-mpu-dpll-clock",
+ "ti,am3-dpll-no-gate-clock",
+ "ti,am3-dpll-j-type-clock",
+ "ti,am3-dpll-no-gate-j-type-clock",
+ "ti,am3-dpll-clock",
+ "ti,am3-dpll-core-clock",
+ "ti,am3-dpll-x2-clock",
+ "ti,omap2-dpll-core-clock",
+
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles of parent clocks, first entry lists reference clock
+ and second entry bypass clock
+- reg : offsets for the register set for controlling the DPLL.
+ Registers are listed in following order:
+ "control" - contains the control register base address
+ "idlest" - contains the idle status register base address
+ "mult-div1" - contains the multiplier / divider register base address
+ "autoidle" - contains the autoidle register base address (optional)
+ ti,am3-* dpll types do not have autoidle register
+ ti,omap2-* dpll type does not support idlest / autoidle registers
+
+Optional properties:
+- DPLL mode setting - defining any one or more of the following overrides
+ default setting.
+ - ti,low-power-stop : DPLL supports low power stop mode, gating output
+ - ti,low-power-bypass : DPLL output matches rate of parent bypass clock
+ - ti,lock : DPLL locks in programmed rate
+
+Examples:
+ dpll_core_ck: dpll_core_ck@44e00490 {
+ #clock-cells = <0>;
+ compatible = "ti,omap4-dpll-core-clock";
+ clocks = <&sys_clkin_ck>, <&sys_clkin_ck>;
+ reg = <0x490>, <0x45c>, <0x488>, <0x468>;
+ };
+
+ dpll2_ck: dpll2_ck@48004004 {
+ #clock-cells = <0>;
+ compatible = "ti,omap3-dpll-clock";
+ clocks = <&sys_ck>, <&dpll2_fck>;
+ ti,low-power-stop;
+ ti,low-power-bypass;
+ ti,lock;
+ reg = <0x4>, <0x24>, <0x34>, <0x40>;
+ };
+
+ dpll_core_ck: dpll_core_ck@44e00490 {
+ #clock-cells = <0>;
+ compatible = "ti,am3-dpll-core-clock";
+ clocks = <&sys_clkin_ck>, <&sys_clkin_ck>;
+ reg = <0x90>, <0x5c>, <0x68>;
+ };
+
+ dpll_ck: dpll_ck {
+ #clock-cells = <0>;
+ compatible = "ti,omap2-dpll-core-clock";
+ clocks = <&sys_ck>, <&sys_ck>;
+ reg = <0x0500>, <0x0540>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
new file mode 100644
index 000000000..585e8c191
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/dra7-atl.txt
@@ -0,0 +1,96 @@
+Device Tree Clock bindings for ATL (Audio Tracking Logic) of DRA7 SoC.
+
+The ATL IP is used to generate clock to be used to synchronize baseband and
+audio codec. A single ATL IP provides four ATL clock instances sharing the same
+functional clock but can be configured to provide different clocks.
+ATL can maintain a clock averages to some desired frequency based on the bws/aws
+signals - can compensate the drift between the two ws signal.
+
+In order to provide the support for ATL and it's output clocks (which can be used
+internally within the SoC or external components) two sets of bindings is needed:
+
+Clock tree binding:
+This binding uses the common clock binding[1].
+To be able to integrate the ATL clocks with DT clock tree.
+Provides ccf level representation of the ATL clocks to be used by drivers.
+Since the clock instances are part of a single IP this binding is used as a node
+for the DT clock tree, the IP driver is needed to handle the actual configuration
+of the IP.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,dra7-atl-clock"
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles to functional clock of ATL
+
+Binding for the IP driver:
+This binding is used to configure the IP driver which is going to handle the
+configuration of the IP for the ATL clock instances.
+
+Required properties:
+- compatible : shall be "ti,dra7-atl"
+- reg : base address for the ATL IP
+- ti,provided-clocks : List of phandles to the clocks associated with the ATL
+- clocks : link phandles to functional clock of ATL
+- clock-names : Shall be set to "fck"
+- ti,hwmods : Shall be set to "atl"
+
+Optional properties:
+Configuration of ATL instances:
+- atl{0/1/2/3} {
+ - bws : Baseband word select signal selection
+ - aws : Audio word select signal selection
+};
+
+For valid word select signals, see the dt-bindings/clk/ti-dra7-atl.h include
+file.
+
+Examples:
+/* clock bindings for atl provided clocks */
+atl_clkin0_ck: atl_clkin0_ck {
+ #clock-cells = <0>;
+ compatible = "ti,dra7-atl-clock";
+ clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin1_ck: atl_clkin1_ck {
+ #clock-cells = <0>;
+ compatible = "ti,dra7-atl-clock";
+ clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin2_ck: atl_clkin2_ck {
+ #clock-cells = <0>;
+ compatible = "ti,dra7-atl-clock";
+ clocks = <&atl_gfclk_mux>;
+};
+
+atl_clkin3_ck: atl_clkin3_ck {
+ #clock-cells = <0>;
+ compatible = "ti,dra7-atl-clock";
+ clocks = <&atl_gfclk_mux>;
+};
+
+/* binding for the IP */
+atl: atl@4843c000 {
+ compatible = "ti,dra7-atl";
+ reg = <0x4843c000 0x3ff>;
+ ti,hwmods = "atl";
+ ti,provided-clocks = <&atl_clkin0_ck>, <&atl_clkin1_ck>,
+ <&atl_clkin2_ck>, <&atl_clkin3_ck>;
+ clocks = <&atl_gfclk_mux>;
+ clock-names = "fck";
+ status = "disabled";
+};
+
+#include <dt-bindings/clk/ti-dra7-atl.h>
+
+&atl {
+ status = "okay";
+
+ atl2 {
+ bws = <DRA7_ATL_WS_MCASP2_FSX>;
+ aws = <DRA7_ATL_WS_MCASP3_FSX>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/clock/ti/fapll.txt b/Documentation/devicetree/bindings/clock/ti/fapll.txt
new file mode 100644
index 000000000..c19b3f253
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/fapll.txt
@@ -0,0 +1,33 @@
+Binding for Texas Instruments FAPLL clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped FAPLL with usually two selectable input clocks
+(reference clock and bypass clock), and one or more child
+syntesizers.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,dm816-fapll-clock"
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles of parent clocks (clk-ref and clk-bypass)
+- reg : address and length of the register set for controlling the FAPLL.
+
+Examples:
+ main_fapll: main_fapll {
+ #clock-cells = <1>;
+ compatible = "ti,dm816-fapll-clock";
+ reg = <0x400 0x40>;
+ clocks = <&sys_clkin_ck &sys_clkin_ck>;
+ clock-indices = <1>, <2>, <3>, <4>, <5>,
+ <6>, <7>;
+ clock-output-names = "main_pll_clk1",
+ "main_pll_clk2",
+ "main_pll_clk3",
+ "main_pll_clk4",
+ "main_pll_clk5",
+ "main_pll_clk6",
+ "main_pll_clk7";
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
new file mode 100644
index 000000000..662b36d53
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/fixed-factor-clock.txt
@@ -0,0 +1,43 @@
+Binding for TI fixed factor rate clock sources.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1], and also uses the autoidle
+support from TI autoidle clock [2].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/ti/autoidle.txt
+
+Required properties:
+- compatible : shall be "ti,fixed-factor-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- ti,clock-div: fixed divider.
+- ti,clock-mult: fixed multiplier.
+- clocks: parent clock.
+
+Optional properties:
+- ti,autoidle-shift: bit shift of the autoidle enable bit for the clock,
+ see [2]
+- reg: offset for the autoidle register of this clock, see [2]
+- ti,invert-autoidle-bit: autoidle is enabled by setting the bit to 0, see [2]
+- ti,set-rate-parent: clk_set_rate is propagated to parent
+
+Example:
+ clock {
+ compatible = "ti,fixed-factor-clock";
+ clocks = <&parentclk>;
+ #clock-cells = <0>;
+ ti,clock-div = <2>;
+ ti,clock-mult = <1>;
+ };
+
+ dpll_usb_clkdcoldo_ck: dpll_usb_clkdcoldo_ck {
+ #clock-cells = <0>;
+ compatible = "ti,fixed-factor-clock";
+ clocks = <&dpll_usb_ck>;
+ ti,clock-div = <1>;
+ ti,autoidle-shift = <8>;
+ reg = <0x01b4>;
+ ti,clock-mult = <1>;
+ ti,invert-autoidle-bit;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/gate.txt b/Documentation/devicetree/bindings/clock/ti/gate.txt
new file mode 100644
index 000000000..03f8fdee6
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/gate.txt
@@ -0,0 +1,106 @@
+Binding for Texas Instruments gate clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. This clock is
+quite much similar to the basic gate-clock [2], however,
+it supports a number of additional features. If no register
+is provided for this clock, the code assumes that a clockdomain
+will be controlled instead and the corresponding hw-ops for
+that is used.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/gate-clock.txt
+[3] Documentation/devicetree/bindings/clock/ti/clockdomain.txt
+
+Required properties:
+- compatible : shall be one of:
+ "ti,gate-clock" - basic gate clock
+ "ti,wait-gate-clock" - gate clock which waits until clock is active before
+ returning from clk_enable()
+ "ti,dss-gate-clock" - gate clock with DSS specific hardware handling
+ "ti,am35xx-gate-clock" - gate clock with AM35xx specific hardware handling
+ "ti,clkdm-gate-clock" - clockdomain gate clock, which derives its functional
+ clock directly from a clockdomain, see [3] how
+ to map clockdomains properly
+ "ti,hsdiv-gate-clock" - gate clock with OMAP36xx specific hardware handling,
+ required for a hardware errata
+ "ti,composite-gate-clock" - composite gate clock, to be part of composite
+ clock
+ "ti,composite-no-wait-gate-clock" - composite gate clock that does not wait
+ for clock to be active before returning
+ from clk_enable()
+- #clock-cells : from common clock binding; shall be set to 0
+- clocks : link to phandle of parent clock
+- reg : offset for register controlling adjustable gate, not needed for
+ ti,clkdm-gate-clock type
+
+Optional properties:
+- ti,bit-shift : bit shift for programming the clock gate, invalid for
+ ti,clkdm-gate-clock type
+- ti,set-bit-to-disable : inverts default gate programming. Setting the bit
+ gates the clock and clearing the bit ungates the clock.
+
+Examples:
+ mmchs2_fck: mmchs2_fck@48004a00 {
+ #clock-cells = <0>;
+ compatible = "ti,gate-clock";
+ clocks = <&core_96m_fck>;
+ reg = <0x0a00>;
+ ti,bit-shift = <25>;
+ };
+
+ uart4_fck_am35xx: uart4_fck_am35xx {
+ #clock-cells = <0>;
+ compatible = "ti,wait-gate-clock";
+ clocks = <&core_48m_fck>;
+ reg = <0x0a00>;
+ ti,bit-shift = <23>;
+ };
+
+ dss1_alwon_fck_3430es2: dss1_alwon_fck_3430es2@48004e00 {
+ #clock-cells = <0>;
+ compatible = "ti,dss-gate-clock";
+ clocks = <&dpll4_m4x2_ck>;
+ reg = <0x0e00>;
+ ti,bit-shift = <0>;
+ };
+
+ emac_ick: emac_ick@4800259c {
+ #clock-cells = <0>;
+ compatible = "ti,am35xx-gate-clock";
+ clocks = <&ipss_ick>;
+ reg = <0x059c>;
+ ti,bit-shift = <1>;
+ };
+
+ emu_src_ck: emu_src_ck {
+ #clock-cells = <0>;
+ compatible = "ti,clkdm-gate-clock";
+ clocks = <&emu_src_mux_ck>;
+ };
+
+ dpll4_m2x2_ck: dpll4_m2x2_ck@48004d00 {
+ #clock-cells = <0>;
+ compatible = "ti,hsdiv-gate-clock";
+ clocks = <&dpll4_m2x2_mul_ck>;
+ ti,bit-shift = <0x1b>;
+ reg = <0x0d00>;
+ ti,set-bit-to-disable;
+ };
+
+ vlynq_gate_fck: vlynq_gate_fck {
+ #clock-cells = <0>;
+ compatible = "ti,composite-gate-clock";
+ clocks = <&core_ck>;
+ ti,bit-shift = <3>;
+ reg = <0x0200>;
+ };
+
+ sys_clkout2_src_gate: sys_clkout2_src_gate {
+ #clock-cells = <0>;
+ compatible = "ti,composite-no-wait-gate-clock";
+ clocks = <&core_ck>;
+ ti,bit-shift = <15>;
+ reg = <0x0070>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/interface.txt b/Documentation/devicetree/bindings/clock/ti/interface.txt
new file mode 100644
index 000000000..3111a409f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/interface.txt
@@ -0,0 +1,56 @@
+Binding for Texas Instruments interface clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. This clock is
+quite much similar to the basic gate-clock [2], however,
+it supports a number of additional features, including
+companion clock finding (match corresponding functional gate
+clock) and hardware autoidle enable / disable.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+[2] Documentation/devicetree/bindings/clock/gate-clock.txt
+
+Required properties:
+- compatible : shall be one of:
+ "ti,omap3-interface-clock" - basic OMAP3 interface clock
+ "ti,omap3-no-wait-interface-clock" - interface clock which has no hardware
+ capability for waiting clock to be ready
+ "ti,omap3-hsotgusb-interface-clock" - interface clock with USB specific HW
+ handling
+ "ti,omap3-dss-interface-clock" - interface clock with DSS specific HW handling
+ "ti,omap3-ssi-interface-clock" - interface clock with SSI specific HW handling
+ "ti,am35xx-interface-clock" - interface clock with AM35xx specific HW handling
+ "ti,omap2430-interface-clock" - interface clock with OMAP2430 specific HW
+ handling
+- #clock-cells : from common clock binding; shall be set to 0
+- clocks : link to phandle of parent clock
+- reg : base address for the control register
+
+Optional properties:
+- ti,bit-shift : bit shift for the bit enabling/disabling the clock (default 0)
+
+Examples:
+ aes1_ick: aes1_ick@48004a14 {
+ #clock-cells = <0>;
+ compatible = "ti,omap3-interface-clock";
+ clocks = <&security_l4_ick2>;
+ reg = <0x48004a14 0x4>;
+ ti,bit-shift = <3>;
+ };
+
+ cam_ick: cam_ick@48004f10 {
+ #clock-cells = <0>;
+ compatible = "ti,omap3-no-wait-interface-clock";
+ clocks = <&l4_ick>;
+ reg = <0x48004f10 0x4>;
+ ti,bit-shift = <0>;
+ };
+
+ ssi_ick_3430es2: ssi_ick_3430es2@48004a10 {
+ #clock-cells = <0>;
+ compatible = "ti,omap3-ssi-interface-clock";
+ clocks = <&ssi_l4_ick>;
+ reg = <0x48004a10 0x4>;
+ ti,bit-shift = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/clock/ti/mux.txt b/Documentation/devicetree/bindings/clock/ti/mux.txt
new file mode 100644
index 000000000..2d0d170f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/ti/mux.txt
@@ -0,0 +1,76 @@
+Binding for TI mux clock.
+
+Binding status: Unstable - ABI compatibility may be broken in the future
+
+This binding uses the common clock binding[1]. It assumes a
+register-mapped multiplexer with multiple input clock signals or
+parents, one of which can be selected as output. This clock does not
+gate or adjust the parent rate via a divider or multiplier.
+
+By default the "clocks" property lists the parents in the same order
+as they are programmed into the regster. E.g:
+
+ clocks = <&foo_clock>, <&bar_clock>, <&baz_clock>;
+
+results in programming the register as follows:
+
+register value selected parent clock
+0 foo_clock
+1 bar_clock
+2 baz_clock
+
+Some clock controller IPs do not allow a value of zero to be programmed
+into the register, instead indexing begins at 1. The optional property
+"index-starts-at-one" modified the scheme as follows:
+
+register value selected clock parent
+1 foo_clock
+2 bar_clock
+3 baz_clock
+
+The binding must provide the register to control the mux. Optionally
+the number of bits to shift the control field in the register can be
+supplied. If the shift value is missing it is the same as supplying
+a zero shift.
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "ti,mux-clock" or "ti,composite-mux-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : link phandles of parent clocks
+- reg : register offset for register controlling adjustable mux
+
+Optional properties:
+- ti,bit-shift : number of bits to shift the bit-mask, defaults to
+ 0 if not present
+- ti,index-starts-at-one : valid input select programming starts at 1, not
+ zero
+- ti,set-rate-parent : clk_set_rate is propagated to parent clock,
+ not supported by the composite-mux-clock subtype
+
+Examples:
+
+sys_clkin_ck: sys_clkin_ck@4a306110 {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&virt_12000000_ck>, <&virt_13000000_ck>, <&virt_16800000_ck>, <&virt_19200000_ck>, <&virt_26000000_ck>, <&virt_27000000_ck>, <&virt_38400000_ck>;
+ reg = <0x0110>;
+ ti,index-starts-at-one;
+};
+
+abe_dpll_bypass_clk_mux_ck: abe_dpll_bypass_clk_mux_ck@4a306108 {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin_ck>, <&sys_32k_ck>;
+ ti,bit-shift = <24>;
+ reg = <0x0108>;
+};
+
+mcbsp5_mux_fck: mcbsp5_mux_fck {
+ #clock-cells = <0>;
+ compatible = "ti,composite-mux-clock";
+ clocks = <&core_96m_fck>, <&mcbsp_clks>;
+ ti,bit-shift = <4>;
+ reg = <0x02d8>;
+};
diff --git a/Documentation/devicetree/bindings/clock/vf610-clock.txt b/Documentation/devicetree/bindings/clock/vf610-clock.txt
new file mode 100644
index 000000000..63f9f1ac3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/vf610-clock.txt
@@ -0,0 +1,41 @@
+* Clock bindings for Freescale Vybrid VF610 SOC
+
+Required properties:
+- compatible: Should be "fsl,vf610-ccm"
+- reg: Address and length of the register set
+- #clock-cells: Should be <1>
+
+Optional properties:
+- clocks: list of clock identifiers which are external input clocks to the
+ given clock controller. Please refer the next section to find
+ the input clocks for a given controller.
+- clock-names: list of names of clocks which are exteral input clocks to the
+ given clock controller.
+
+Input clocks for top clock controller:
+ - sxosc (external crystal oscillator 32KHz, recommended)
+ - fxosc (external crystal oscillator 24MHz, recommended)
+ - audio_ext
+ - enet_ext
+
+The clock consumer should specify the desired clock by having the clock
+ID in its "clocks" phandle cell. See include/dt-bindings/clock/vf610-clock.h
+for the full list of VF610 clock IDs.
+
+Examples:
+
+clks: ccm@4006b000 {
+ compatible = "fsl,vf610-ccm";
+ reg = <0x4006b000 0x1000>;
+ #clock-cells = <1>;
+ clocks = <&sxosc>, <&fxosc>;
+ clock-names = "sxosc", "fxosc";
+};
+
+uart1: serial@40028000 {
+ compatible = "fsl,vf610-uart";
+ reg = <0x40028000 0x1000>;
+ interrupts = <0 62 0x04>;
+ clocks = <&clks VF610_CLK_UART1>;
+ clock-names = "ipg";
+};
diff --git a/Documentation/devicetree/bindings/clock/vt8500.txt b/Documentation/devicetree/bindings/clock/vt8500.txt
new file mode 100644
index 000000000..91d71cc03
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/vt8500.txt
@@ -0,0 +1,74 @@
+Device Tree Clock bindings for arch-vt8500
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+ "via,vt8500-pll-clock" - for a VT8500/WM8505 PLL clock
+ "wm,wm8650-pll-clock" - for a WM8650 PLL clock
+ "wm,wm8750-pll-clock" - for a WM8750 PLL clock
+ "wm,wm8850-pll-clock" - for a WM8850 PLL clock
+ "via,vt8500-device-clock" - for a VT/WM device clock
+
+Required properties for PLL clocks:
+- reg : shall be the control register offset from PMC base for the pll clock.
+- clocks : shall be the input parent clock phandle for the clock. This should
+ be the reference clock.
+- #clock-cells : from common clock binding; shall be set to 0.
+
+Required properties for device clocks:
+- clocks : shall be the input parent clock phandle for the clock. This should
+ be a pll output.
+- #clock-cells : from common clock binding; shall be set to 0.
+
+
+Device Clocks
+
+Device clocks are required to have one or both of the following sets of
+properties:
+
+
+Gated device clocks:
+
+Required properties:
+- enable-reg : shall be the register offset from PMC base for the enable
+ register.
+- enable-bit : shall be the bit within enable-reg to enable/disable the clock.
+
+
+Divisor device clocks:
+
+Required property:
+- divisor-reg : shall be the register offset from PMC base for the divisor
+ register.
+Optional property:
+- divisor-mask : shall be the mask for the divisor register. Defaults to 0x1f
+ if not specified.
+
+
+For example:
+
+ref25: ref25M {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <25000000>;
+};
+
+plla: plla {
+ #clock-cells = <0>;
+ compatible = "wm,wm8650-pll-clock";
+ clocks = <&ref25>;
+ reg = <0x200>;
+};
+
+sdhc: sdhc {
+ #clock-cells = <0>;
+ compatible = "via,vt8500-device-clock";
+ clocks = <&pllb>;
+ divisor-reg = <0x328>;
+ divisor-mask = <0x3f>;
+ enable-reg = <0x254>;
+ enable-bit = <18>;
+};
diff --git a/Documentation/devicetree/bindings/clock/xgene.txt b/Documentation/devicetree/bindings/clock/xgene.txt
new file mode 100644
index 000000000..1c4ef773f
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/xgene.txt
@@ -0,0 +1,111 @@
+Device Tree Clock bindings for APM X-Gene
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be one of the following:
+ "apm,xgene-socpll-clock" - for a X-Gene SoC PLL clock
+ "apm,xgene-pcppll-clock" - for a X-Gene PCP PLL clock
+ "apm,xgene-device-clock" - for a X-Gene device clock
+
+Required properties for SoC or PCP PLL clocks:
+- reg : shall be the physical PLL register address for the pll clock.
+- clocks : shall be the input parent clock phandle for the clock. This should
+ be the reference clock.
+- #clock-cells : shall be set to 1.
+- clock-output-names : shall be the name of the PLL referenced by derive
+ clock.
+Optional properties for PLL clocks:
+- clock-names : shall be the name of the PLL. If missing, use the device name.
+
+Required properties for device clocks:
+- reg : shall be a list of address and length pairs describing the CSR
+ reset and/or the divider. Either may be omitted, but at least
+ one must be present.
+ - reg-names : shall be a string list describing the reg resource. This
+ may include "csr-reg" and/or "div-reg". If this property
+ is not present, the reg property is assumed to describe
+ only "csr-reg".
+- clocks : shall be the input parent clock phandle for the clock.
+- #clock-cells : shall be set to 1.
+- clock-output-names : shall be the name of the device referenced.
+Optional properties for device clocks:
+- clock-names : shall be the name of the device clock. If missing, use the
+ device name.
+- csr-offset : Offset to the CSR reset register from the reset address base.
+ Default is 0.
+- csr-mask : CSR reset mask bit. Default is 0xF.
+- enable-offset : Offset to the enable register from the reset address base.
+ Default is 0x8.
+- enable-mask : CSR enable mask bit. Default is 0xF.
+- divider-offset : Offset to the divider CSR register from the divider base.
+ Default is 0x0.
+- divider-width : Width of the divider register. Default is 0.
+- divider-shift : Bit shift of the divider register. Default is 0.
+
+For example:
+
+ pcppll: pcppll@17000100 {
+ compatible = "apm,xgene-pcppll-clock";
+ #clock-cells = <1>;
+ clocks = <&refclk 0>;
+ clock-names = "pcppll";
+ reg = <0x0 0x17000100 0x0 0x1000>;
+ clock-output-names = "pcppll";
+ type = <0>;
+ };
+
+ socpll: socpll@17000120 {
+ compatible = "apm,xgene-socpll-clock";
+ #clock-cells = <1>;
+ clocks = <&refclk 0>;
+ clock-names = "socpll";
+ reg = <0x0 0x17000120 0x0 0x1000>;
+ clock-output-names = "socpll";
+ type = <1>;
+ };
+
+ qmlclk: qmlclk {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ clock-names = "qmlclk";
+ reg = <0x0 0x1703C000 0x0 0x1000>;
+ reg-name = "csr-reg";
+ clock-output-names = "qmlclk";
+ };
+
+ ethclk: ethclk {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ clock-names = "ethclk";
+ reg = <0x0 0x17000000 0x0 0x1000>;
+ reg-names = "div-reg";
+ divider-offset = <0x238>;
+ divider-width = <0x9>;
+ divider-shift = <0x0>;
+ clock-output-names = "ethclk";
+ };
+
+ apbclk: apbclk {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&ahbclk 0>;
+ clock-names = "apbclk";
+ reg = <0x0 0x1F2AC000 0x0 0x1000
+ 0x0 0x1F2AC000 0x0 0x1000>;
+ reg-names = "csr-reg", "div-reg";
+ csr-offset = <0x0>;
+ csr-mask = <0x200>;
+ enable-offset = <0x8>;
+ enable-mask = <0x200>;
+ divider-offset = <0x10>;
+ divider-width = <0x2>;
+ divider-shift = <0x0>;
+ flags = <0x8>;
+ clock-output-names = "apbclk";
+ };
+
diff --git a/Documentation/devicetree/bindings/clock/zynq-7000.txt b/Documentation/devicetree/bindings/clock/zynq-7000.txt
new file mode 100644
index 000000000..d93746cf2
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/zynq-7000.txt
@@ -0,0 +1,110 @@
+Device Tree Clock bindings for the Zynq 7000 EPP
+
+The Zynq EPP has several different clk providers, each with there own bindings.
+The purpose of this document is to document their usage.
+
+See clock_bindings.txt for more information on the generic clock bindings.
+See Chapter 25 of Zynq TRM for more information about Zynq clocks.
+
+== Clock Controller ==
+The clock controller is a logical abstraction of Zynq's clock tree. It reads
+required input clock frequencies from the devicetree and acts as clock provider
+for all clock consumers of PS clocks.
+
+Required properties:
+ - #clock-cells : Must be 1
+ - compatible : "xlnx,ps7-clkc"
+ - reg : SLCR offset and size taken via syscon < 0x100 0x100 >
+ - ps-clk-frequency : Frequency of the oscillator providing ps_clk in HZ
+ (usually 33 MHz oscillators are used for Zynq platforms)
+ - clock-output-names : List of strings used to name the clock outputs. Shall be
+ a list of the outputs given below.
+
+Optional properties:
+ - clocks : as described in the clock bindings
+ - clock-names : as described in the clock bindings
+ - fclk-enable : Bit mask to enable FCLKs statically at boot time.
+ Bit [0..3] correspond to FCLK0..FCLK3. The corresponding
+ FCLK will only be enabled if it is actually running at
+ boot time.
+
+Clock inputs:
+The following strings are optional parameters to the 'clock-names' property in
+order to provide an optional (E)MIO clock source.
+ - swdt_ext_clk
+ - gem0_emio_clk
+ - gem1_emio_clk
+ - mio_clk_XX # with XX = 00..53
+...
+
+Clock outputs:
+ 0: armpll
+ 1: ddrpll
+ 2: iopll
+ 3: cpu_6or4x
+ 4: cpu_3or2x
+ 5: cpu_2x
+ 6: cpu_1x
+ 7: ddr2x
+ 8: ddr3x
+ 9: dci
+ 10: lqspi
+ 11: smc
+ 12: pcap
+ 13: gem0
+ 14: gem1
+ 15: fclk0
+ 16: fclk1
+ 17: fclk2
+ 18: fclk3
+ 19: can0
+ 20: can1
+ 21: sdio0
+ 22: sdio1
+ 23: uart0
+ 24: uart1
+ 25: spi0
+ 26: spi1
+ 27: dma
+ 28: usb0_aper
+ 29: usb1_aper
+ 30: gem0_aper
+ 31: gem1_aper
+ 32: sdio0_aper
+ 33: sdio1_aper
+ 34: spi0_aper
+ 35: spi1_aper
+ 36: can0_aper
+ 37: can1_aper
+ 38: i2c0_aper
+ 39: i2c1_aper
+ 40: uart0_aper
+ 41: uart1_aper
+ 42: gpio_aper
+ 43: lqspi_aper
+ 44: smc_aper
+ 45: swdt
+ 46: dbg_trc
+ 47: dbg_apb
+
+Example:
+ clkc: clkc@100 {
+ #clock-cells = <1>;
+ compatible = "xlnx,ps7-clkc";
+ ps-clk-frequency = <33333333>;
+ reg = <0x100 0x100>;
+ clock-output-names = "armpll", "ddrpll", "iopll", "cpu_6or4x",
+ "cpu_3or2x", "cpu_2x", "cpu_1x", "ddr2x", "ddr3x",
+ "dci", "lqspi", "smc", "pcap", "gem0", "gem1",
+ "fclk0", "fclk1", "fclk2", "fclk3", "can0", "can1",
+ "sdio0", "sdio1", "uart0", "uart1", "spi0", "spi1",
+ "dma", "usb0_aper", "usb1_aper", "gem0_aper",
+ "gem1_aper", "sdio0_aper", "sdio1_aper",
+ "spi0_aper", "spi1_aper", "can0_aper", "can1_aper",
+ "i2c0_aper", "i2c1_aper", "uart0_aper", "uart1_aper",
+ "gpio_aper", "lqspi_aper", "smc_aper", "swdt",
+ "dbg_trc", "dbg_apb";
+ # optional props
+ clocks = <&clkc 16>, <&clk_foo>;
+ clock-names = "gem1_emio_clk", "can_mio_clk_23";
+ };
diff --git a/Documentation/devicetree/bindings/common-properties.txt b/Documentation/devicetree/bindings/common-properties.txt
new file mode 100644
index 000000000..3193979b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/common-properties.txt
@@ -0,0 +1,60 @@
+Common properties
+
+The ePAPR specification does not define any properties related to hardware
+byteswapping, but endianness issues show up frequently in porting Linux to
+different machine types. This document attempts to provide a consistent
+way of handling byteswapping across drivers.
+
+Optional properties:
+ - big-endian: Boolean; force big endian register accesses
+ unconditionally (e.g. ioread32be/iowrite32be). Use this if you
+ know the peripheral always needs to be accessed in BE mode.
+ - little-endian: Boolean; force little endian register accesses
+ unconditionally (e.g. readl/writel). Use this if you know the
+ peripheral always needs to be accessed in LE mode.
+ - native-endian: Boolean; always use register accesses matched to the
+ endianness of the kernel binary (e.g. LE vmlinux -> readl/writel,
+ BE vmlinux -> ioread32be/iowrite32be). In this case no byteswaps
+ will ever be performed. Use this if the hardware "self-adjusts"
+ register endianness based on the CPU's configured endianness.
+
+If a binding supports these properties, then the binding should also
+specify the default behavior if none of these properties are present.
+In such cases, little-endian is the preferred default, but it is not
+a requirement. The of_device_is_big_endian() and of_fdt_is_big_endian()
+helper functions do assume that little-endian is the default, because
+most existing (PCI-based) drivers implicitly default to LE by using
+readl/writel for MMIO accesses.
+
+Examples:
+Scenario 1 : CPU in LE mode & device in LE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+ native-endian;
+};
+
+Scenario 2 : CPU in LE mode & device in BE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+ big-endian;
+};
+
+Scenario 3 : CPU in BE mode & device in BE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+ native-endian;
+};
+
+Scenario 4 : CPU in BE mode & device in LE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+ little-endian;
+};
diff --git a/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt
new file mode 100644
index 000000000..0715695e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/arm_big_little_dt.txt
@@ -0,0 +1,65 @@
+Generic ARM big LITTLE cpufreq driver's DT glue
+-----------------------------------------------
+
+This is DT specific glue layer for generic cpufreq driver for big LITTLE
+systems.
+
+Both required and optional properties listed below must be defined
+under node /cpus/cpu@x. Where x is the first cpu inside a cluster.
+
+FIXME: Cpus should boot in the order specified in DT and all cpus for a cluster
+must be present contiguously. Generic DT driver will check only node 'x' for
+cpu:x.
+
+Required properties:
+- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt
+ for details
+
+Optional properties:
+- clock-latency: Specify the possible maximum transition latency for clock,
+ in unit of nanoseconds.
+
+Examples:
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "arm,cortex-a15";
+ reg = <0>;
+ next-level-cache = <&L2>;
+ operating-points = <
+ /* kHz uV */
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+ clock-latency = <61036>; /* two CLK32 periods */
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a15";
+ reg = <1>;
+ next-level-cache = <&L2>;
+ };
+
+ cpu@100 {
+ compatible = "arm,cortex-a7";
+ reg = <100>;
+ next-level-cache = <&L2>;
+ operating-points = <
+ /* kHz uV */
+ 792000 950000
+ 396000 750000
+ 198000 450000
+ >;
+ clock-latency = <61036>; /* two CLK32 periods */
+ };
+
+ cpu@101 {
+ compatible = "arm,cortex-a7";
+ reg = <101>;
+ next-level-cache = <&L2>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
new file mode 100644
index 000000000..e41c98ffb
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-dt.txt
@@ -0,0 +1,64 @@
+Generic cpufreq driver
+
+It is a generic DT based cpufreq driver for frequency management. It supports
+both uniprocessor (UP) and symmetric multiprocessor (SMP) systems which share
+clock and voltage across all CPUs.
+
+Both required and optional properties listed below must be defined
+under node /cpus/cpu@0.
+
+Required properties:
+- None
+
+Optional properties:
+- operating-points: Refer to Documentation/devicetree/bindings/power/opp.txt for
+ details. OPPs *must* be supplied either via DT, i.e. this property, or
+ populated at runtime.
+- clock-latency: Specify the possible maximum transition latency for clock,
+ in unit of nanoseconds.
+- voltage-tolerance: Specify the CPU voltage tolerance in percentage.
+- #cooling-cells:
+- cooling-min-level:
+- cooling-max-level:
+ Please refer to Documentation/devicetree/bindings/thermal/thermal.txt.
+
+Examples:
+
+cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ next-level-cache = <&L2>;
+ operating-points = <
+ /* kHz uV */
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+ clock-latency = <61036>; /* two CLK32 periods */
+ #cooling-cells = <2>;
+ cooling-min-level = <0>;
+ cooling-max-level = <2>;
+ };
+
+ cpu@1 {
+ compatible = "arm,cortex-a9";
+ reg = <1>;
+ next-level-cache = <&L2>;
+ };
+
+ cpu@2 {
+ compatible = "arm,cortex-a9";
+ reg = <2>;
+ next-level-cache = <&L2>;
+ };
+
+ cpu@3 {
+ compatible = "arm,cortex-a9";
+ reg = <3>;
+ next-level-cache = <&L2>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-exynos5440.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-exynos5440.txt
new file mode 100644
index 000000000..caff1a574
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-exynos5440.txt
@@ -0,0 +1,28 @@
+
+Exynos5440 cpufreq driver
+-------------------
+
+Exynos5440 SoC cpufreq driver for CPU frequency scaling.
+
+Required properties:
+- interrupts: Interrupt to know the completion of cpu frequency change.
+- operating-points: Table of frequencies and voltage CPU could be transitioned into,
+ in the decreasing order. Frequency should be in KHz units and voltage
+ should be in microvolts.
+
+Optional properties:
+- clock-latency: Clock monitor latency in microsecond.
+
+All the required listed above must be defined under node cpufreq.
+
+Example:
+--------
+ cpufreq@160000 {
+ compatible = "samsung,exynos5440-cpufreq";
+ reg = <0x160000 0x1000>;
+ interrupts = <0 57 0>;
+ operating-points = <
+ 1000000 975000
+ 800000 925000>;
+ clock-latency = <100000>;
+ };
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt b/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt
new file mode 100644
index 000000000..f3d44984d
--- /dev/null
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-spear.txt
@@ -0,0 +1,42 @@
+SPEAr cpufreq driver
+-------------------
+
+SPEAr SoC cpufreq driver for CPU frequency scaling.
+It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) systems
+which share clock across all CPUs.
+
+Required properties:
+- cpufreq_tbl: Table of frequencies CPU could be transitioned into, in the
+ increasing order.
+
+Optional properties:
+- clock-latency: Specify the possible maximum transition latency for clock, in
+ unit of nanoseconds.
+
+Both required and optional properties listed above must be defined under node
+/cpus/cpu@0.
+
+Examples:
+--------
+cpus {
+
+ <...>
+
+ cpu@0 {
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+
+ <...>
+
+ cpufreq_tbl = < 166000
+ 200000
+ 250000
+ 300000
+ 400000
+ 500000
+ 600000 >;
+ };
+
+ <...>
+
+};
diff --git a/Documentation/devicetree/bindings/cris/axis.txt b/Documentation/devicetree/bindings/cris/axis.txt
new file mode 100644
index 000000000..d209ca2a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/cris/axis.txt
@@ -0,0 +1,9 @@
+Axis Communications AB
+ARTPEC series SoC Device Tree Bindings
+
+
+CRISv32 based SoCs are ETRAX FS and ARTPEC-3:
+
+ - compatible = "axis,crisv32";
+
+
diff --git a/Documentation/devicetree/bindings/cris/boards.txt b/Documentation/devicetree/bindings/cris/boards.txt
new file mode 100644
index 000000000..533dd273c
--- /dev/null
+++ b/Documentation/devicetree/bindings/cris/boards.txt
@@ -0,0 +1,8 @@
+Boards based on the CRIS SoCs:
+
+Required root node properties:
+ - compatible = should be one or more of the following:
+ - "axis,dev88" - for Axis devboard 88 with ETRAX FS
+
+Optional:
+
diff --git a/Documentation/devicetree/bindings/cris/interrupts.txt b/Documentation/devicetree/bindings/cris/interrupts.txt
new file mode 100644
index 000000000..e8b123b0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/cris/interrupts.txt
@@ -0,0 +1,23 @@
+* CRISv32 Interrupt Controller
+
+Interrupt controller for the CRISv32 SoCs.
+
+Main node required properties:
+
+- compatible : should be:
+ "axis,crisv32-intc"
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 1.
+- reg: physical base address and size of the intc registers map.
+
+Example:
+
+ intc: interrupt-controller {
+ compatible = "axis,crisv32-intc";
+ reg = <0xb001c000 0x1000>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+
diff --git a/Documentation/devicetree/bindings/crypto/amd-ccp.txt b/Documentation/devicetree/bindings/crypto/amd-ccp.txt
new file mode 100644
index 000000000..8c61183b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/amd-ccp.txt
@@ -0,0 +1,19 @@
+* AMD Cryptographic Coprocessor driver (ccp)
+
+Required properties:
+- compatible: Should be "amd,ccp-seattle-v1a"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the CCP interrupt
+
+Optional properties:
+- dma-coherent: Present if dma operations are coherent
+
+Example:
+ ccp@e0100000 {
+ compatible = "amd,ccp-seattle-v1a";
+ reg = <0 0xe0100000 0 0x10000>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 3 4>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/atmel-crypto.txt b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
new file mode 100644
index 000000000..f2aab3dc2
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/atmel-crypto.txt
@@ -0,0 +1,68 @@
+* Atmel HW cryptographic accelerators
+
+These are the HW cryptographic accelerators found on some Atmel products.
+
+* Advanced Encryption Standard (AES)
+
+Required properties:
+- compatible : Should be "atmel,at91sam9g46-aes".
+- reg: Should contain AES registers location and length.
+- interrupts: Should contain the IRQ line for the AES.
+- dmas: List of two DMA specifiers as described in
+ atmel-dma.txt and dma.txt files.
+- dma-names: Contains one identifier string for each DMA specifier
+ in the dmas property.
+
+Example:
+aes@f8038000 {
+ compatible = "atmel,at91sam9g46-aes";
+ reg = <0xf8038000 0x100>;
+ interrupts = <43 4 0>;
+ dmas = <&dma1 2 18>,
+ <&dma1 2 19>;
+ dma-names = "tx", "rx";
+
+* Triple Data Encryption Standard (Triple DES)
+
+Required properties:
+- compatible : Should be "atmel,at91sam9g46-tdes".
+- reg: Should contain TDES registers location and length.
+- interrupts: Should contain the IRQ line for the TDES.
+
+Optional properties:
+- dmas: List of two DMA specifiers as described in
+ atmel-dma.txt and dma.txt files.
+- dma-names: Contains one identifier string for each DMA specifier
+ in the dmas property.
+
+Example:
+tdes@f803c000 {
+ compatible = "atmel,at91sam9g46-tdes";
+ reg = <0xf803c000 0x100>;
+ interrupts = <44 4 0>;
+ dmas = <&dma1 2 20>,
+ <&dma1 2 21>;
+ dma-names = "tx", "rx";
+};
+
+* Secure Hash Algorithm (SHA)
+
+Required properties:
+- compatible : Should be "atmel,at91sam9g46-sha".
+- reg: Should contain SHA registers location and length.
+- interrupts: Should contain the IRQ line for the SHA.
+
+Optional properties:
+- dmas: One DMA specifiers as described in
+ atmel-dma.txt and dma.txt files.
+- dma-names: Contains one identifier string for each DMA specifier
+ in the dmas property. Only one "tx" string needed.
+
+Example:
+sha@f8034000 {
+ compatible = "atmel,at91sam9g46-sha";
+ reg = <0xf8034000 0x100>;
+ interrupts = <42 4 0>;
+ dmas = <&dma1 2 17>;
+ dma-names = "tx";
+};
diff --git a/Documentation/devicetree/bindings/crypto/fsl-dcp.txt b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
new file mode 100644
index 000000000..6949e50f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-dcp.txt
@@ -0,0 +1,17 @@
+Freescale DCP (Data Co-Processor) found on i.MX23/i.MX28 .
+
+Required properties:
+- compatible : Should be "fsl,<soc>-dcp"
+- reg : Should contain MXS DCP registers location and length
+- interrupts : Should contain MXS DCP interrupt numbers, VMI IRQ and DCP IRQ
+ must be supplied, optionally Secure IRQ can be present, but
+ is currently not implemented and not used.
+
+Example:
+
+dcp@80028000 {
+ compatible = "fsl,imx28-dcp", "fsl,imx23-dcp";
+ reg = <0x80028000 0x2000>;
+ interrupts = <52 53>;
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
new file mode 100644
index 000000000..e8a35c71e
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-imx-sahara.txt
@@ -0,0 +1,15 @@
+Freescale SAHARA Cryptographic Accelerator included in some i.MX chips.
+Currently only i.MX27 and i.MX53 are supported.
+
+Required properties:
+- compatible : Should be "fsl,<soc>-sahara"
+- reg : Should contain SAHARA registers location and length
+- interrupts : Should contain SAHARA interrupt number
+
+Example:
+
+sah@10025000 {
+ compatible = "fsl,imx27-sahara";
+ reg = < 0x10025000 0x800>;
+ interrupts = <75>;
+};
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
new file mode 100644
index 000000000..38988ef13
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
@@ -0,0 +1,68 @@
+Freescale SoC SEC Security Engines versions 2.x-3.x
+
+Required properties:
+
+- compatible : Should contain entries for this and backward compatible
+ SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0"
+- reg : Offset and length of the register set for the device
+- interrupts : the SEC's interrupt number
+- fsl,num-channels : An integer representing the number of channels
+ available.
+- fsl,channel-fifo-len : An integer representing the number of
+ descriptor pointers each channel fetch fifo can hold.
+- fsl,exec-units-mask : The bitmask representing what execution units
+ (EUs) are available. It's a single 32-bit cell. EU information
+ should be encoded following the SEC's Descriptor Header Dword
+ EU_SEL0 field documentation, i.e. as follows:
+
+ bit 0 = reserved - should be 0
+ bit 1 = set if SEC has the ARC4 EU (AFEU)
+ bit 2 = set if SEC has the DES/3DES EU (DEU)
+ bit 3 = set if SEC has the message digest EU (MDEU/MDEU-A)
+ bit 4 = set if SEC has the random number generator EU (RNG)
+ bit 5 = set if SEC has the public key EU (PKEU)
+ bit 6 = set if SEC has the AES EU (AESU)
+ bit 7 = set if SEC has the Kasumi EU (KEU)
+ bit 8 = set if SEC has the CRC EU (CRCU)
+ bit 11 = set if SEC has the message digest EU extended alg set (MDEU-B)
+
+remaining bits are reserved for future SEC EUs.
+
+- fsl,descriptor-types-mask : The bitmask representing what descriptors
+ are available. It's a single 32-bit cell. Descriptor type information
+ should be encoded following the SEC's Descriptor Header Dword DESC_TYPE
+ field documentation, i.e. as follows:
+
+ bit 0 = set if SEC supports the aesu_ctr_nonsnoop desc. type
+ bit 1 = set if SEC supports the ipsec_esp descriptor type
+ bit 2 = set if SEC supports the common_nonsnoop desc. type
+ bit 3 = set if SEC supports the 802.11i AES ccmp desc. type
+ bit 4 = set if SEC supports the hmac_snoop_no_afeu desc. type
+ bit 5 = set if SEC supports the srtp descriptor type
+ bit 6 = set if SEC supports the non_hmac_snoop_no_afeu desc.type
+ bit 7 = set if SEC supports the pkeu_assemble descriptor type
+ bit 8 = set if SEC supports the aesu_key_expand_output desc.type
+ bit 9 = set if SEC supports the pkeu_ptmul descriptor type
+ bit 10 = set if SEC supports the common_nonsnoop_afeu desc. type
+ bit 11 = set if SEC supports the pkeu_ptadd_dbl descriptor type
+
+ ..and so on and so forth.
+
+Optional properties:
+
+- interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Example:
+
+ /* MPC8548E */
+ crypto@30000 {
+ compatible = "fsl,sec2.1", "fsl,sec2.0";
+ reg = <0x30000 0x10000>;
+ interrupts = <29 2>;
+ interrupt-parent = <&mpic>;
+ fsl,num-channels = <4>;
+ fsl,channel-fifo-len = <24>;
+ fsl,exec-units-mask = <0xfe>;
+ fsl,descriptor-types-mask = <0x12b0ebf>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec4.txt b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
new file mode 100644
index 000000000..e4022776a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -0,0 +1,455 @@
+=====================================================================
+SEC 4 Device Tree Binding
+Copyright (C) 2008-2011 Freescale Semiconductor Inc.
+
+ CONTENTS
+ -Overview
+ -SEC 4 Node
+ -Job Ring Node
+ -Run Time Integrity Check (RTIC) Node
+ -Run Time Integrity Check (RTIC) Memory Node
+ -Secure Non-Volatile Storage (SNVS) Node
+ -Secure Non-Volatile Storage (SNVS) Low Power (LP) RTC Node
+ -Full Example
+
+NOTE: the SEC 4 is also known as Freescale's Cryptographic Accelerator
+Accelerator and Assurance Module (CAAM).
+
+=====================================================================
+Overview
+
+DESCRIPTION
+
+SEC 4 h/w can process requests from 2 types of sources.
+1. DPAA Queue Interface (HW interface between Queue Manager & SEC 4).
+2. Job Rings (HW interface between cores & SEC 4 registers).
+
+High Speed Data Path Configuration:
+
+HW interface between QM & SEC 4 and also BM & SEC 4, on DPAA-enabled parts
+such as the P4080. The number of simultaneous dequeues the QI can make is
+equal to the number of Descriptor Controller (DECO) engines in a particular
+SEC version. E.g., the SEC 4.0 in the P4080 has 5 DECOs and can thus
+dequeue from 5 subportals simultaneously.
+
+Job Ring Data Path Configuration:
+
+Each JR is located on a separate 4k page, they may (or may not) be made visible
+in the memory partition devoted to a particular core. The P4080 has 4 JRs, so
+up to 4 JRs can be configured; and all 4 JRs process requests in parallel.
+
+=====================================================================
+SEC 4 Node
+
+Description
+
+ Node defines the base address of the SEC 4 block.
+ This block specifies the address range of all global
+ configuration registers for the SEC 4 block. It
+ also receives interrupts from the Run Time Integrity Check
+ (RTIC) function within the SEC 4 block.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v4.0"
+
+ - fsl,sec-era
+ Usage: optional
+ Value type: <u32>
+ Definition: A standard property. Define the 'ERA' of the SEC
+ device.
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing physical addresses in child nodes.
+
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing the size of physical addresses in
+ child nodes.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical
+ address and length of the SEC4 configuration registers.
+ registers
+
+ - ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ range of the SEC 4.0 register space (-SNVS not included). A
+ triplet that includes the child address, parent address, &
+ length.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this
+ device. The value of the interrupts property
+ consists of one interrupt specifier. The format
+ of the specifier is defined by the binding document
+ describing the node's interrupt parent.
+
+ - interrupt-parent
+ Usage: (required if interrupt property is defined)
+ Value type: <phandle>
+ Definition: A single <phandle> value that points
+ to the interrupt parent to which the child domain
+ is being mapped.
+
+ Note: All other standard properties (see the ePAPR) are allowed
+ but are optional.
+
+
+EXAMPLE
+ crypto@300000 {
+ compatible = "fsl,sec-v4.0";
+ fsl,sec-era = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x300000 0x10000>;
+ ranges = <0 0x300000 0x10000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <92 2>;
+ };
+
+=====================================================================
+Job Ring (JR) Node
+
+ Child of the crypto node defines data processing interface to SEC 4
+ across the peripheral bus for purposes of processing
+ cryptographic descriptors. The specified address
+ range can be made visible to one (or more) cores.
+ The interrupt defined for this node is controlled within
+ the address range of this node.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v4.0-job-ring"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Specifies a two JR parameters: an offset from
+ the parent physical address and the length the JR registers.
+
+ - fsl,liodn
+ Usage: optional-but-recommended
+ Value type: <prop-encoded-array>
+ Definition:
+ Specifies the LIODN to be used in conjunction with
+ the ppid-to-liodn table that specifies the PPID to LIODN mapping.
+ Needed if the PAMU is used. Value is a 12 bit value
+ where value is a LIODN ID for this JR. This property is
+ normally set by boot firmware.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this
+ device. The value of the interrupts property
+ consists of one interrupt specifier. The format
+ of the specifier is defined by the binding document
+ describing the node's interrupt parent.
+
+ - interrupt-parent
+ Usage: (required if interrupt property is defined)
+ Value type: <phandle>
+ Definition: A single <phandle> value that points
+ to the interrupt parent to which the child domain
+ is being mapped.
+
+EXAMPLE
+ jr@1000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ fsl,liodn = <0x081>;
+ interrupt-parent = <&mpic>;
+ interrupts = <88 2>;
+ };
+
+
+=====================================================================
+Run Time Integrity Check (RTIC) Node
+
+ Child node of the crypto node. Defines a register space that
+ contains up to 5 sets of addresses and their lengths (sizes) that
+ will be checked at run time. After an initial hash result is
+ calculated, these addresses are checked by HW to monitor any
+ change. If any memory is modified, a Security Violation is
+ triggered (see SNVS definition).
+
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v4.0-rtic".
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing physical addresses in child nodes. Must
+ have a value of 1.
+
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing the size of physical addresses in
+ child nodes. Must have a value of 1.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies a two parameters:
+ an offset from the parent physical address and the length
+ the SEC4 registers.
+
+ - ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ range of the SEC 4 register space (-SNVS not included). A
+ triplet that includes the child address, parent address, &
+ length.
+
+EXAMPLE
+ rtic@6000 {
+ compatible = "fsl,sec-v4.0-rtic";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x6000 0x100>;
+ ranges = <0x0 0x6100 0xe00>;
+ };
+
+=====================================================================
+Run Time Integrity Check (RTIC) Memory Node
+ A child node that defines individual RTIC memory regions that are used to
+ perform run-time integrity check of memory areas that should not modified.
+ The node defines a register that contains the memory address &
+ length (combined) and a second register that contains the hash result
+ in big endian format.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v4.0-rtic-memory".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies two parameters:
+ an offset from the parent physical address and the length:
+
+ 1. The location of the RTIC memory address & length registers.
+ 2. The location RTIC hash result.
+
+ - fsl,rtic-region
+ Usage: optional-but-recommended
+ Value type: <prop-encoded-array>
+ Definition:
+ Specifies the HW address (36 bit address) for this region
+ followed by the length of the HW partition to be checked;
+ the address is represented as a 64 bit quantity followed
+ by a 32 bit length.
+
+ - fsl,liodn
+ Usage: optional-but-recommended
+ Value type: <prop-encoded-array>
+ Definition:
+ Specifies the LIODN to be used in conjunction with
+ the ppid-to-liodn table that specifies the PPID to LIODN
+ mapping. Needed if the PAMU is used. Value is a 12 bit value
+ where value is a LIODN ID for this RTIC memory region. This
+ property is normally set by boot firmware.
+
+EXAMPLE
+ rtic-a@0 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x00 0x20 0x100 0x80>;
+ fsl,liodn = <0x03c>;
+ fsl,rtic-region = <0x12345678 0x12345678 0x12345678>;
+ };
+
+=====================================================================
+Secure Non-Volatile Storage (SNVS) Node
+
+ Node defines address range and the associated
+ interrupt for the SNVS function. This function
+ monitors security state information & reports
+ security violations.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v4.0-mon".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical
+ address and length of the SEC4 configuration
+ registers.
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing physical addresses in child nodes. Must
+ have a value of 1.
+
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing the size of physical addresses in
+ child nodes. Must have a value of 1.
+
+ - ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ range of the SNVS register space. A triplet that includes
+ the child address, parent address, & length.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this
+ device. The value of the interrupts property
+ consists of one interrupt specifier. The format
+ of the specifier is defined by the binding document
+ describing the node's interrupt parent.
+
+ - interrupt-parent
+ Usage: (required if interrupt property is defined)
+ Value type: <phandle>
+ Definition: A single <phandle> value that points
+ to the interrupt parent to which the child domain
+ is being mapped.
+
+EXAMPLE
+ sec_mon@314000 {
+ compatible = "fsl,sec-v4.0-mon";
+ reg = <0x314000 0x1000>;
+ ranges = <0 0x314000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <93 2>;
+ };
+
+=====================================================================
+Secure Non-Volatile Storage (SNVS) Low Power (LP) RTC Node
+
+ A SNVS child node that defines SNVS LP RTC.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v4.0-mon-rtc-lp".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical
+ address and length of the SNVS LP configuration registers.
+
+EXAMPLE
+ sec_mon_rtc_lp@314000 {
+ compatible = "fsl,sec-v4.0-mon-rtc-lp";
+ reg = <0x34 0x58>;
+ };
+
+=====================================================================
+FULL EXAMPLE
+
+ crypto: crypto@300000 {
+ compatible = "fsl,sec-v4.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x300000 0x10000>;
+ ranges = <0 0x300000 0x10000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <92 2>;
+
+ sec_jr0: jr@1000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <88 2>;
+ };
+
+ sec_jr1: jr@2000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x2000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <89 2>;
+ };
+
+ sec_jr2: jr@3000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x3000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <90 2>;
+ };
+
+ sec_jr3: jr@4000 {
+ compatible = "fsl,sec-v4.0-job-ring";
+ reg = <0x4000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <91 2>;
+ };
+
+ rtic@6000 {
+ compatible = "fsl,sec-v4.0-rtic";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x6000 0x100>;
+ ranges = <0x0 0x6100 0xe00>;
+
+ rtic_a: rtic-a@0 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x00 0x20 0x100 0x80>;
+ };
+
+ rtic_b: rtic-b@20 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x20 0x20 0x200 0x80>;
+ };
+
+ rtic_c: rtic-c@40 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x40 0x20 0x300 0x80>;
+ };
+
+ rtic_d: rtic-d@60 {
+ compatible = "fsl,sec-v4.0-rtic-memory";
+ reg = <0x60 0x20 0x500 0x80>;
+ };
+ };
+ };
+
+ sec_mon: sec_mon@314000 {
+ compatible = "fsl,sec-v4.0-mon";
+ reg = <0x314000 0x1000>;
+ ranges = <0 0x314000 0x1000>;
+ interrupt-parent = <&mpic>;
+ interrupts = <93 2>;
+
+ sec_mon_rtc_lp@34 {
+ compatible = "fsl,sec-v4.0-mon-rtc-lp";
+ reg = <0x34 0x58>;
+ };
+ };
+
+=====================================================================
diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec6.txt b/Documentation/devicetree/bindings/crypto/fsl-sec6.txt
new file mode 100644
index 000000000..baf8a3c1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec6.txt
@@ -0,0 +1,157 @@
+SEC 6 is as Freescale's Cryptographic Accelerator and Assurance Module (CAAM).
+Currently Freescale powerpc chip C29X is embedded with SEC 6.
+SEC 6 device tree binding include:
+ -SEC 6 Node
+ -Job Ring Node
+ -Full Example
+
+=====================================================================
+SEC 6 Node
+
+Description
+
+ Node defines the base address of the SEC 6 block.
+ This block specifies the address range of all global
+ configuration registers for the SEC 6 block.
+ For example, In C293, we could see three SEC 6 node.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v6.0".
+
+ - fsl,sec-era
+ Usage: optional
+ Value type: <u32>
+ Definition: A standard property. Define the 'ERA' of the SEC
+ device.
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing physical addresses in child nodes.
+
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ for representing the size of physical addresses in
+ child nodes.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical
+ address and length of the SEC 6 configuration registers.
+
+ - ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ range of the SEC 6.0 register space (-SNVS not included). A
+ triplet that includes the child address, parent address, &
+ length.
+
+ Note: All other standard properties (see the ePAPR) are allowed
+ but are optional.
+
+EXAMPLE
+ crypto@a0000 {
+ compatible = "fsl,sec-v6.0";
+ fsl,sec-era = <6>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xa0000 0x20000>;
+ ranges = <0 0xa0000 0x20000>;
+ };
+
+=====================================================================
+Job Ring (JR) Node
+
+ Child of the crypto node defines data processing interface to SEC 6
+ across the peripheral bus for purposes of processing
+ cryptographic descriptors. The specified address
+ range can be made visible to one (or more) cores.
+ The interrupt defined for this node is controlled within
+ the address range of this node.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,sec-v6.0-job-ring".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Specifies a two JR parameters: an offset from
+ the parent physical address and the length the JR registers.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this
+ device. The value of the interrupts property
+ consists of one interrupt specifier. The format
+ of the specifier is defined by the binding document
+ describing the node's interrupt parent.
+
+EXAMPLE
+ jr@1000 {
+ compatible = "fsl,sec-v6.0-job-ring";
+ reg = <0x1000 0x1000>;
+ interrupts = <49 2 0 0>;
+ };
+
+===================================================================
+Full Example
+
+Since some chips may contain more than one SEC, the dtsi contains
+only the node contents, not the node itself. A chip using the SEC
+should include the dtsi inside each SEC node. Example:
+
+In qoriq-sec6.0.dtsi:
+
+ compatible = "fsl,sec-v6.0";
+ fsl,sec-era = <6>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ jr@1000 {
+ compatible = "fsl,sec-v6.0-job-ring",
+ "fsl,sec-v5.2-job-ring",
+ "fsl,sec-v5.0-job-ring",
+ "fsl,sec-v4.4-job-ring",
+ "fsl,sec-v4.0-job-ring";
+ reg = <0x1000 0x1000>;
+ };
+
+ jr@2000 {
+ compatible = "fsl,sec-v6.0-job-ring",
+ "fsl,sec-v5.2-job-ring",
+ "fsl,sec-v5.0-job-ring",
+ "fsl,sec-v4.4-job-ring",
+ "fsl,sec-v4.0-job-ring";
+ reg = <0x2000 0x1000>;
+ };
+
+In the C293 device tree, we add the include of public property:
+
+ crypto@a0000 {
+ /include/ "qoriq-sec6.0.dtsi"
+ }
+
+ crypto@a0000 {
+ reg = <0xa0000 0x20000>;
+ ranges = <0 0xa0000 0x20000>;
+
+ jr@1000 {
+ interrupts = <49 2 0 0>;
+ };
+
+ jr@2000 {
+ interrupts = <50 2 0 0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/crypto/img-hash.txt b/Documentation/devicetree/bindings/crypto/img-hash.txt
new file mode 100644
index 000000000..91a3d757d
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/img-hash.txt
@@ -0,0 +1,27 @@
+Imagination Technologies hardware hash accelerator
+
+The hash accelerator provides hardware hashing acceleration for
+SHA1, SHA224, SHA256 and MD5 hashes
+
+Required properties:
+
+- compatible : "img,hash-accelerator"
+- reg : Offset and length of the register set for the module, and the DMA port
+- interrupts : The designated IRQ line for the hashing module.
+- dmas : DMA specifier as per Documentation/devicetree/bindings/dma/dma.txt
+- dma-names : Should be "tx"
+- clocks : Clock specifiers
+- clock-names : "sys" Used to clock the hash block registers
+ "hash" Used to clock data through the accelerator
+
+Example:
+
+ hash: hash@18149600 {
+ compatible = "img,hash-accelerator";
+ reg = <0x18149600 0x100>, <0x18101100 0x4>;
+ interrupts = <GIC_SHARED 59 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 8 0xffffffff 0>;
+ dma-names = "tx";
+ clocks = <&cr_periph SYS_CLK_HASH>, <&clk_periph PERIPH_CLK_ROM>;
+ clock-names = "sys", "hash";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/mv_cesa.txt b/Documentation/devicetree/bindings/crypto/mv_cesa.txt
new file mode 100644
index 000000000..47229b1a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/mv_cesa.txt
@@ -0,0 +1,20 @@
+Marvell Cryptographic Engines And Security Accelerator
+
+Required properties:
+- compatible : should be "marvell,orion-crypto"
+- reg : base physical address of the engine and length of memory mapped
+ region, followed by base physical address of sram and its memory
+ length
+- reg-names : "regs" , "sram";
+- interrupts : interrupt number
+
+Examples:
+
+ crypto@30000 {
+ compatible = "marvell,orion-crypto";
+ reg = <0x30000 0x10000>,
+ <0x4000000 0x800>;
+ reg-names = "regs" , "sram";
+ interrupts = <22>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/omap-aes.txt b/Documentation/devicetree/bindings/crypto/omap-aes.txt
new file mode 100644
index 000000000..fd9717653
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/omap-aes.txt
@@ -0,0 +1,31 @@
+OMAP SoC AES crypto Module
+
+Required properties:
+
+- compatible : Should contain entries for this and backward compatible
+ AES versions:
+ - "ti,omap2-aes" for OMAP2.
+ - "ti,omap3-aes" for OMAP3.
+ - "ti,omap4-aes" for OMAP4 and AM33XX.
+ Note that the OMAP2 and 3 versions are compatible (OMAP3 supports
+ more algorithms) but they are incompatible with OMAP4.
+- ti,hwmods: Name of the hwmod associated with the AES module
+- reg : Offset and length of the register set for the module
+- interrupts : the interrupt-specifier for the AES module.
+
+Optional properties:
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+ Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request names should include "tx" and "rx" if present.
+
+Example:
+ /* AM335x */
+ aes: aes@53500000 {
+ compatible = "ti,omap4-aes";
+ ti,hwmods = "aes";
+ reg = <0x53500000 0xa0>;
+ interrupts = <102>;
+ dmas = <&edma 6>,
+ <&edma 5>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/omap-des.txt b/Documentation/devicetree/bindings/crypto/omap-des.txt
new file mode 100644
index 000000000..e8c63bf2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/omap-des.txt
@@ -0,0 +1,30 @@
+OMAP SoC DES crypto Module
+
+Required properties:
+
+- compatible : Should contain "ti,omap4-des"
+- ti,hwmods: Name of the hwmod associated with the DES module
+- reg : Offset and length of the register set for the module
+- interrupts : the interrupt-specifier for the DES module
+- clocks : A phandle to the functional clock node of the DES module
+ corresponding to each entry in clock-names
+- clock-names : Name of the functional clock, should be "fck"
+
+Optional properties:
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+ Documentation/devicetree/bindings/dma/dma.txt
+ Each entry corresponds to an entry in dma-names
+- dma-names: DMA request names should include "tx" and "rx" if present
+
+Example:
+ /* DRA7xx SoC */
+ des: des@480a5000 {
+ compatible = "ti,omap4-des";
+ ti,hwmods = "des";
+ reg = <0x480a5000 0xa0>;
+ interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&sdma 117>, <&sdma 116>;
+ dma-names = "tx", "rx";
+ clocks = <&l3_iclk_div>;
+ clock-names = "fck";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/omap-sham.txt b/Documentation/devicetree/bindings/crypto/omap-sham.txt
new file mode 100644
index 000000000..ad9115569
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/omap-sham.txt
@@ -0,0 +1,28 @@
+OMAP SoC SHA crypto Module
+
+Required properties:
+
+- compatible : Should contain entries for this and backward compatible
+ SHAM versions:
+ - "ti,omap2-sham" for OMAP2 & OMAP3.
+ - "ti,omap4-sham" for OMAP4 and AM33XX.
+ - "ti,omap5-sham" for OMAP5, DRA7 and AM43XX.
+- ti,hwmods: Name of the hwmod associated with the SHAM module
+- reg : Offset and length of the register set for the module
+- interrupts : the interrupt-specifier for the SHAM module.
+
+Optional properties:
+- dmas: DMA specifiers for the rx dma. See the DMA client binding,
+ Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request name. Should be "rx" if a dma is present.
+
+Example:
+ /* AM335x */
+ sham: sham@53100000 {
+ compatible = "ti,omap4-sham";
+ ti,hwmods = "sham";
+ reg = <0x53100000 0x200>;
+ interrupts = <109>;
+ dmas = <&edma 36>;
+ dma-names = "rx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/picochip-spacc.txt b/Documentation/devicetree/bindings/crypto/picochip-spacc.txt
new file mode 100644
index 000000000..d8609ece1
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/picochip-spacc.txt
@@ -0,0 +1,23 @@
+Picochip picoXcell SPAcc (Security Protocol Accelerator) bindings
+
+Picochip picoXcell devices contain crypto offload engines that may be used for
+IPSEC and femtocell layer 2 ciphering.
+
+Required properties:
+ - compatible : "picochip,spacc-ipsec" for the IPSEC offload engine
+ "picochip,spacc-l2" for the femtocell layer 2 ciphering engine.
+ - reg : Offset and length of the register set for this device
+ - interrupt-parent : The interrupt controller that controls the SPAcc
+ interrupt.
+ - interrupts : The interrupt line from the SPAcc.
+ - ref-clock : The input clock that drives the SPAcc.
+
+Example SPAcc node:
+
+spacc@10000 {
+ compatible = "picochip,spacc-ipsec";
+ reg = <0x100000 0x10000>;
+ interrupt-parent = <&vic0>;
+ interrupts = <24>;
+ ref-clock = <&ipsec_clk>, "ref";
+};
diff --git a/Documentation/devicetree/bindings/crypto/qcom-qce.txt b/Documentation/devicetree/bindings/crypto/qcom-qce.txt
new file mode 100644
index 000000000..fdd53b184
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/qcom-qce.txt
@@ -0,0 +1,25 @@
+Qualcomm crypto engine driver
+
+Required properties:
+
+- compatible : should be "qcom,crypto-v5.1"
+- reg : specifies base physical address and size of the registers map
+- clocks : phandle to clock-controller plus clock-specifier pair
+- clock-names : "iface" clocks register interface
+ "bus" clocks data transfer interface
+ "core" clocks rest of the crypto block
+- dmas : DMA specifiers for tx and rx dma channels. For more see
+ Documentation/devicetree/bindings/dma/dma.txt
+- dma-names : DMA request names should be "rx" and "tx"
+
+Example:
+ crypto@fd45a000 {
+ compatible = "qcom,crypto-v5.1";
+ reg = <0xfd45a000 0x6000>;
+ clocks = <&gcc GCC_CE2_AHB_CLK>,
+ <&gcc GCC_CE2_AXI_CLK>,
+ <&gcc GCC_CE2_CLK>;
+ clock-names = "iface", "bus", "core";
+ dmas = <&cryptobam 2>, <&cryptobam 3>;
+ dma-names = "rx", "tx";
+ };
diff --git a/Documentation/devicetree/bindings/crypto/samsung-sss.txt b/Documentation/devicetree/bindings/crypto/samsung-sss.txt
new file mode 100644
index 000000000..a6dafa83c
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/samsung-sss.txt
@@ -0,0 +1,34 @@
+Samsung SoC SSS (Security SubSystem) module
+
+The SSS module in S5PV210 SoC supports the following:
+-- Feeder (FeedCtrl)
+-- Advanced Encryption Standard (AES)
+-- Data Encryption Standard (DES)/3DES
+-- Public Key Accelerator (PKA)
+-- SHA-1/SHA-256/MD5/HMAC (SHA-1/SHA-256/MD5)/PRNG
+-- PRNG: Pseudo Random Number Generator
+
+The SSS module in Exynos4 (Exynos4210) and
+Exynos5 (Exynos5420 and Exynos5250) SoCs
+supports the following also:
+-- ARCFOUR (ARC4)
+-- True Random Number Generator (TRNG)
+-- Secure Key Manager
+
+Required properties:
+
+- compatible : Should contain entries for this and backward compatible
+ SSS versions:
+ - "samsung,s5pv210-secss" for S5PV210 SoC.
+ - "samsung,exynos4210-secss" for Exynos4210, Exynos4212, Exynos4412, Exynos5250,
+ Exynos5260 and Exynos5420 SoCs.
+- reg : Offset and length of the register set for the module
+- interrupts : interrupt specifiers of SSS module interrupts, should contain
+ following entries:
+ - first : feed control interrupt (required for all variants),
+ - second : hash interrupt (required only for samsung,s5pv210-secss).
+
+- clocks : list of clock phandle and specifier pairs for all clocks listed in
+ clock-names property.
+- clock-names : list of device clock input names; should contain one entry
+ "secss".
diff --git a/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
new file mode 100644
index 000000000..b54bf3a2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/devfreq/event/exynos-ppmu.txt
@@ -0,0 +1,110 @@
+
+* Samsung Exynos PPMU (Platform Performance Monitoring Unit) device
+
+The Samsung Exynos SoC has PPMU (Platform Performance Monitoring Unit) for
+each IP. PPMU provides the primitive values to get performance data. These
+PPMU events provide information of the SoC's behaviors so that you may
+use to analyze system performance, to make behaviors visible and to count
+usages of each IP (DMC, CPU, RIGHTBUS, LEFTBUS, CAM interface, LCD, G3D, MFC).
+The Exynos PPMU driver uses the devfreq-event class to provide event data
+to various devfreq devices. The devfreq devices would use the event data when
+derterming the current state of each IP.
+
+Required properties:
+- compatible: Should be "samsung,exynos-ppmu".
+- reg: physical base address of each PPMU and length of memory mapped region.
+
+Optional properties:
+- clock-names : the name of clock used by the PPMU, "ppmu"
+- clocks : phandles for clock specified in "clock-names" property
+- #clock-cells: should be 1.
+
+Example1 : PPMU nodes in exynos3250.dtsi are listed below.
+
+ ppmu_dmc0: ppmu_dmc0@106a0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x106a0000 0x2000>;
+ status = "disabled";
+ };
+
+ ppmu_dmc1: ppmu_dmc1@106b0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x106b0000 0x2000>;
+ status = "disabled";
+ };
+
+ ppmu_cpu: ppmu_cpu@106c0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x106c0000 0x2000>;
+ status = "disabled";
+ };
+
+ ppmu_rightbus: ppmu_rightbus@112a0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x112a0000 0x2000>;
+ clocks = <&cmu CLK_PPMURIGHT>;
+ clock-names = "ppmu";
+ status = "disabled";
+ };
+
+ ppmu_leftbus: ppmu_leftbus0@116a0000 {
+ compatible = "samsung,exynos-ppmu";
+ reg = <0x116a0000 0x2000>;
+ clocks = <&cmu CLK_PPMULEFT>;
+ clock-names = "ppmu";
+ status = "disabled";
+ };
+
+Example2 : Events of each PPMU node in exynos3250-rinato.dts are listed below.
+
+ &ppmu_dmc0 {
+ status = "okay";
+
+ events {
+ ppmu_dmc0_3: ppmu-event3-dmc0 {
+ event-name = "ppmu-event3-dmc0";
+ };
+
+ ppmu_dmc0_2: ppmu-event2-dmc0 {
+ event-name = "ppmu-event2-dmc0";
+ };
+
+ ppmu_dmc0_1: ppmu-event1-dmc0 {
+ event-name = "ppmu-event1-dmc0";
+ };
+
+ ppmu_dmc0_0: ppmu-event0-dmc0 {
+ event-name = "ppmu-event0-dmc0";
+ };
+ };
+ };
+
+ &ppmu_dmc1 {
+ status = "okay";
+
+ events {
+ ppmu_dmc1_3: ppmu-event3-dmc1 {
+ event-name = "ppmu-event3-dmc1";
+ };
+ };
+ };
+
+ &ppmu_leftbus {
+ status = "okay";
+
+ events {
+ ppmu_leftbus_3: ppmu-event3-leftbus {
+ event-name = "ppmu-event3-leftbus";
+ };
+ };
+ };
+
+ &ppmu_rightbus {
+ status = "okay";
+
+ events {
+ ppmu_rightbus_3: ppmu-event3-rightbus {
+ event-name = "ppmu-event3-rightbus";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt
new file mode 100644
index 000000000..d3058768b
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/apm-xgene-dma.txt
@@ -0,0 +1,47 @@
+Applied Micro X-Gene SoC DMA nodes
+
+DMA nodes are defined to describe on-chip DMA interfaces in
+APM X-Gene SoC.
+
+Required properties for DMA interfaces:
+- compatible: Should be "apm,xgene-dma".
+- device_type: set to "dma".
+- reg: Address and length of the register set for the device.
+ It contains the information of registers in the following order:
+ 1st - DMA control and status register address space.
+ 2nd - Descriptor ring control and status register address space.
+ 3rd - Descriptor ring command register address space.
+ 4th - Soc efuse register address space.
+- interrupts: DMA has 5 interrupts sources. 1st interrupt is
+ DMA error reporting interrupt. 2nd, 3rd, 4th and 5th interrupts
+ are completion interrupts for each DMA channels.
+- clocks: Reference to the clock entry.
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+
+Example:
+ dmaclk: dmaclk@1f27c000 {
+ compatible = "apm,xgene-device-clock";
+ #clock-cells = <1>;
+ clocks = <&socplldiv2 0>;
+ reg = <0x0 0x1f27c000 0x0 0x1000>;
+ reg-names = "csr-reg";
+ clock-output-names = "dmaclk";
+ };
+
+ dma: dma@1f270000 {
+ compatible = "apm,xgene-storm-dma";
+ device_type = "dma";
+ reg = <0x0 0x1f270000 0x0 0x10000>,
+ <0x0 0x1f200000 0x0 0x10000>,
+ <0x0 0x1b008000 0x0 0x2000>,
+ <0x0 0x1054a000 0x0 0x100>;
+ interrupts = <0x0 0x82 0x4>,
+ <0x0 0xb8 0x4>,
+ <0x0 0xb9 0x4>,
+ <0x0 0xba 0x4>,
+ <0x0 0xbb 0x4>;
+ dma-coherent;
+ clocks = <&dmaclk 0>;
+ };
diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt
new file mode 100644
index 000000000..267565894
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt
@@ -0,0 +1,44 @@
+* ARM PrimeCell PL330 DMA Controller
+
+The ARM PrimeCell PL330 DMA controller can move blocks of memory contents
+between memory and peripherals or memory to memory.
+
+Required properties:
+ - compatible: should include both "arm,pl330" and "arm,primecell".
+ - reg: physical base address of the controller and length of memory mapped
+ region.
+ - interrupts: interrupt number to the cpu.
+
+Optional properties:
+ - dma-coherent : Present if dma operations are coherent
+ - #dma-cells: must be <1>. used to represent the number of integer
+ cells in the dmas property of client device.
+ - dma-channels: contains the total number of DMA channels supported by the DMAC
+ - dma-requests: contains the total number of DMA requests supported by the DMAC
+
+Example:
+
+ pdma0: pdma@12680000 {
+ compatible = "arm,pl330", "arm,primecell";
+ reg = <0x12680000 0x1000>;
+ interrupts = <99>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ };
+
+Client drivers (device nodes requiring dma transfers from dev-to-mem or
+mem-to-dev) should specify the DMA channel numbers and dma channel names
+as shown below.
+
+ [property name] = <[phandle of the dma controller] [dma request id]>;
+ [property name] = <[dma channel name]>
+
+ where 'dma request id' is the dma request number which is connected
+ to the client controller. The 'property name' 'dmas' and 'dma-names'
+ as required by the generic dma device tree binding helpers. The dma
+ names correspond 1:1 with the dma request ids in the dmas property.
+
+ Example: dmas = <&pdma0 12
+ &pdma1 11>;
+ dma-names = "tx", "rx";
diff --git a/Documentation/devicetree/bindings/dma/atmel-dma.txt b/Documentation/devicetree/bindings/dma/atmel-dma.txt
new file mode 100644
index 000000000..f69bcf5a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/atmel-dma.txt
@@ -0,0 +1,42 @@
+* Atmel Direct Memory Access Controller (DMA)
+
+Required properties:
+- compatible: Should be "atmel,<chip>-dma".
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain DMA interrupt.
+- #dma-cells: Must be <2>, used to represent the number of integer cells in
+the dmas property of client devices.
+
+Example:
+
+dma0: dma@ffffec00 {
+ compatible = "atmel,at91sam9g45-dma";
+ reg = <0xffffec00 0x200>;
+ interrupts = <21>;
+ #dma-cells = <2>;
+};
+
+DMA clients connected to the Atmel DMA controller must use the format
+described in the dma.txt file, using a three-cell specifier for each channel:
+a phandle plus two integer cells.
+The three cells in order are:
+
+1. A phandle pointing to the DMA controller.
+2. The memory interface (16 most significant bits), the peripheral interface
+(16 less significant bits).
+3. Parameters for the at91 DMA configuration register which are device
+dependent:
+ - bit 7-0: peripheral identifier for the hardware handshaking interface. The
+ identifier can be different for tx and rx.
+ - bit 11-8: FIFO configuration. 0 for half FIFO, 1 for ALAP, 2 for ASAP.
+
+Example:
+
+i2c0@i2c@f8010000 {
+ compatible = "atmel,at91sam9x5-i2c";
+ reg = <0xf8010000 0x100>;
+ interrupts = <9 4 6>;
+ dmas = <&dma0 1 7>,
+ <&dma0 1 8>;
+ dma-names = "tx", "rx";
+};
diff --git a/Documentation/devicetree/bindings/dma/atmel-xdma.txt b/Documentation/devicetree/bindings/dma/atmel-xdma.txt
new file mode 100644
index 000000000..0eb2b3207
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/atmel-xdma.txt
@@ -0,0 +1,54 @@
+* Atmel Extensible Direct Memory Access Controller (XDMAC)
+
+* XDMA Controller
+Required properties:
+- compatible: Should be "atmel,<chip>-dma".
+ <chip> compatible description:
+ - sama5d4: first SoC adding the XDMAC
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain DMA interrupt.
+- #dma-cells: Must be <1>, used to represent the number of integer cells in
+the dmas property of client devices.
+ - The 1st cell specifies the channel configuration register:
+ - bit 13: SIF, source interface identifier, used to get the memory
+ interface identifier,
+ - bit 14: DIF, destination interface identifier, used to get the peripheral
+ interface identifier,
+ - bit 30-24: PERID, peripheral identifier.
+
+Example:
+
+dma1: dma-controller@f0004000 {
+ compatible = "atmel,sama5d4-dma";
+ reg = <0xf0004000 0x200>;
+ interrupts = <50 4 0>;
+ #dma-cells = <1>;
+};
+
+
+* DMA clients
+DMA clients connected to the Atmel XDMA controller must use the format
+described in the dma.txt file, using a one-cell specifier for each channel.
+The two cells in order are:
+1. A phandle pointing to the DMA controller.
+2. Channel configuration register. Configurable fields are:
+ - bit 13: SIF, source interface identifier, used to get the memory
+ interface identifier,
+ - bit 14: DIF, destination interface identifier, used to get the peripheral
+ interface identifier,
+ - bit 30-24: PERID, peripheral identifier.
+
+Example:
+
+i2c2: i2c@f8024000 {
+ compatible = "atmel,at91sam9x5-i2c";
+ reg = <0xf8024000 0x4000>;
+ interrupts = <34 4 6>;
+ dmas = <&dma1
+ (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+ | AT91_XDMAC_DT_PERID(6))>,
+ <&dma1
+ (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+ | AT91_XDMAC_DT_PERID(7))>;
+ dma-names = "tx", "rx";
+};
diff --git a/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
new file mode 100644
index 000000000..1396078d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/brcm,bcm2835-dma.txt
@@ -0,0 +1,57 @@
+* BCM2835 DMA controller
+
+The BCM2835 DMA controller has 16 channels in total.
+Only the lower 13 channels have an associated IRQ.
+Some arbitrary channels are used by the firmware
+(1,3,6,7 in the current firmware version).
+The channels 0,2 and 3 have special functionality
+and should not be used by the driver.
+
+Required properties:
+- compatible: Should be "brcm,bcm2835-dma".
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain the DMA interrupts associated
+ to the DMA channels in ascending order.
+- #dma-cells: Must be <1>, the cell in the dmas property of the
+ client device represents the DREQ number.
+- brcm,dma-channel-mask: Bit mask representing the channels
+ not used by the firmware in ascending order,
+ i.e. first channel corresponds to LSB.
+
+Example:
+
+dma: dma@7e007000 {
+ compatible = "brcm,bcm2835-dma";
+ reg = <0x7e007000 0xf00>;
+ interrupts = <1 16>,
+ <1 17>,
+ <1 18>,
+ <1 19>,
+ <1 20>,
+ <1 21>,
+ <1 22>,
+ <1 23>,
+ <1 24>,
+ <1 25>,
+ <1 26>,
+ <1 27>,
+ <1 28>;
+
+ #dma-cells = <1>;
+ brcm,dma-channel-mask = <0x7f35>;
+};
+
+DMA clients connected to the BCM2835 DMA controller must use the format
+described in the dma.txt file, using a two-cell specifier for each channel.
+
+Example:
+
+bcm2835_i2s: i2s@7e203000 {
+ compatible = "brcm,bcm2835-i2s";
+ reg = < 0x7e203000 0x20>,
+ < 0x7e101098 0x02>;
+
+ dmas = <&dma 2>,
+ <&dma 3>;
+ dma-names = "tx", "rx";
+};
diff --git a/Documentation/devicetree/bindings/dma/dma.txt b/Documentation/devicetree/bindings/dma/dma.txt
new file mode 100644
index 000000000..82104271e
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/dma.txt
@@ -0,0 +1,81 @@
+* Generic DMA Controller and DMA request bindings
+
+Generic binding to provide a way for a driver using DMA Engine to retrieve the
+DMA request or channel information that goes from a hardware device to a DMA
+controller.
+
+
+* DMA controller
+
+Required property:
+- #dma-cells: Must be at least 1. Used to provide DMA controller
+ specific information. See DMA client binding below for
+ more details.
+
+Optional properties:
+- dma-channels: Number of DMA channels supported by the controller.
+- dma-requests: Number of DMA request signals supported by the
+ controller.
+
+Example:
+
+ dma: dma@48000000 {
+ compatible = "ti,omap-sdma";
+ reg = <0x48000000 0x1000>;
+ interrupts = <0 12 0x4
+ 0 13 0x4
+ 0 14 0x4
+ 0 15 0x4>;
+ #dma-cells = <1>;
+ dma-channels = <32>;
+ dma-requests = <127>;
+ };
+
+
+* DMA client
+
+Client drivers should specify the DMA property using a phandle to the controller
+followed by DMA controller specific data.
+
+Required property:
+- dmas: List of one or more DMA specifiers, each consisting of
+ - A phandle pointing to DMA controller node
+ - A number of integer cells, as determined by the
+ #dma-cells property in the node referenced by phandle
+ containing DMA controller specific information. This
+ typically contains a DMA request line number or a
+ channel number, but can contain any data that is
+ required for configuring a channel.
+- dma-names: Contains one identifier string for each DMA specifier in
+ the dmas property. The specific strings that can be used
+ are defined in the binding of the DMA client device.
+ Multiple DMA specifiers can be used to represent
+ alternatives and in this case the dma-names for those
+ DMA specifiers must be identical (see examples).
+
+Examples:
+
+1. A device with one DMA read channel, one DMA write channel:
+
+ i2c1: i2c@1 {
+ ...
+ dmas = <&dma 2 /* read channel */
+ &dma 3>; /* write channel */
+ dma-names = "rx", "tx";
+ ...
+ };
+
+2. A single read-write channel with three alternative DMA controllers:
+
+ dmas = <&dma1 5
+ &dma2 7
+ &dma3 2>;
+ dma-names = "rx-tx", "rx-tx", "rx-tx";
+
+3. A device with three channels, one of which has two alternatives:
+
+ dmas = <&dma1 2 /* read channel */
+ &dma1 3 /* write channel */
+ &dma2 0 /* error read */
+ &dma3 0>; /* alternative error read */
+ dma-names = "rx", "tx", "error", "error";
diff --git a/Documentation/devicetree/bindings/dma/fsl-edma.txt b/Documentation/devicetree/bindings/dma/fsl-edma.txt
new file mode 100644
index 000000000..191d7bd8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/fsl-edma.txt
@@ -0,0 +1,76 @@
+* Freescale enhanced Direct Memory Access(eDMA) Controller
+
+ The eDMA channels have multiplex capability by programmble memory-mapped
+registers. channels are split into two groups, called DMAMUX0 and DMAMUX1,
+specific DMA request source can only be multiplexed by any channel of certain
+group, DMAMUX0 or DMAMUX1, but not both.
+
+* eDMA Controller
+Required properties:
+- compatible :
+ - "fsl,vf610-edma" for eDMA used similar to that on Vybrid vf610 SoC
+- reg : Specifies base physical address(s) and size of the eDMA registers.
+ The 1st region is eDMA control register's address and size.
+ The 2nd and the 3rd regions are programmable channel multiplexing
+ control register's address and size.
+- interrupts : A list of interrupt-specifiers, one for each entry in
+ interrupt-names.
+- interrupt-names : Should contain:
+ "edma-tx" - the transmission interrupt
+ "edma-err" - the error interrupt
+- #dma-cells : Must be <2>.
+ The 1st cell specifies the DMAMUX(0 for DMAMUX0 and 1 for DMAMUX1).
+ Specific request source can only be multiplexed by specific channels
+ group called DMAMUX.
+ The 2nd cell specifies the request source(slot) ID.
+ See the SoC's reference manual for all the supported request sources.
+- dma-channels : Number of channels supported by the controller
+- clock-names : A list of channel group clock names. Should contain:
+ "dmamux0" - clock name of mux0 group
+ "dmamux1" - clock name of mux1 group
+- clocks : A list of phandle and clock-specifier pairs, one for each entry in
+ clock-names.
+
+Optional properties:
+- big-endian: If present registers and hardware scatter/gather descriptors
+ of the eDMA are implemented in big endian mode, otherwise in little
+ mode.
+
+
+Examples:
+
+edma0: dma-controller@40018000 {
+ #dma-cells = <2>;
+ compatible = "fsl,vf610-edma";
+ reg = <0x40018000 0x2000>,
+ <0x40024000 0x1000>,
+ <0x40025000 0x1000>;
+ interrupts = <0 8 IRQ_TYPE_LEVEL_HIGH>,
+ <0 9 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "edma-tx", "edma-err";
+ dma-channels = <32>;
+ clock-names = "dmamux0", "dmamux1";
+ clocks = <&clks VF610_CLK_DMAMUX0>,
+ <&clks VF610_CLK_DMAMUX1>;
+};
+
+
+* DMA clients
+DMA client drivers that uses the DMA function must use the format described
+in the dma.txt file, using a two-cell specifier for each channel: the 1st
+specifies the channel group(DMAMUX) in which this request can be multiplexed,
+and the 2nd specifies the request source.
+
+Examples:
+
+sai2: sai@40031000 {
+ compatible = "fsl,vf610-sai";
+ reg = <0x40031000 0x1000>;
+ interrupts = <0 86 IRQ_TYPE_LEVEL_HIGH>;
+ clock-names = "sai";
+ clocks = <&clks VF610_CLK_SAI2>;
+ dma-names = "tx", "rx";
+ dmas = <&edma0 0 21>,
+ <&edma0 0 20>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-dma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-dma.txt
new file mode 100644
index 000000000..7bd8847d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-dma.txt
@@ -0,0 +1,48 @@
+* Freescale Direct Memory Access (DMA) Controller for i.MX
+
+This document will only describe differences to the generic DMA Controller and
+DMA request bindings as described in dma/dma.txt .
+
+* DMA controller
+
+Required properties:
+- compatible : Should be "fsl,<chip>-dma". chip can be imx1, imx21 or imx27
+- reg : Should contain DMA registers location and length
+- interrupts : First item should be DMA interrupt, second one is optional and
+ should contain DMA Error interrupt
+- #dma-cells : Has to be 1. imx-dma does not support anything else.
+
+Optional properties:
+- #dma-channels : Number of DMA channels supported. Should be 16.
+- #dma-requests : Number of DMA requests supported.
+
+Example:
+
+ dma: dma@10001000 {
+ compatible = "fsl,imx27-dma";
+ reg = <0x10001000 0x1000>;
+ interrupts = <32 33>;
+ #dma-cells = <1>;
+ #dma-channels = <16>;
+ };
+
+
+* DMA client
+
+Clients have to specify the DMA requests with phandles in a list.
+
+Required properties:
+- dmas: List of one or more DMA request specifiers. One DMA request specifier
+ consists of a phandle to the DMA controller followed by the integer
+ specifying the request line.
+- dma-names: List of string identifiers for the DMA requests. For the correct
+ names, have a look at the specific client driver.
+
+Example:
+
+ sdhci1: sdhci@10013000 {
+ ...
+ dmas = <&dma 7>;
+ dma-names = "rx-tx";
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
new file mode 100644
index 000000000..c6e72501a
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -0,0 +1,85 @@
+* Freescale Smart Direct Memory Access (SDMA) Controller for i.MX
+
+Required properties:
+- compatible : Should be one of
+ "fsl,imx25-sdma"
+ "fsl,imx31-sdma", "fsl,imx31-to1-sdma", "fsl,imx31-to2-sdma"
+ "fsl,imx35-sdma", "fsl,imx35-to1-sdma", "fsl,imx35-to2-sdma"
+ "fsl,imx51-sdma"
+ "fsl,imx53-sdma"
+ "fsl,imx6q-sdma"
+ The -to variants should be preferred since they allow to determine the
+ correct ROM script addresses needed for the driver to work without additional
+ firmware.
+- reg : Should contain SDMA registers location and length
+- interrupts : Should contain SDMA interrupt
+- #dma-cells : Must be <3>.
+ The first cell specifies the DMA request/event ID. See details below
+ about the second and third cell.
+- fsl,sdma-ram-script-name : Should contain the full path of SDMA RAM
+ scripts firmware
+
+The second cell of dma phandle specifies the peripheral type of DMA transfer.
+The full ID of peripheral types can be found below.
+
+ ID transfer type
+ ---------------------
+ 0 MCU domain SSI
+ 1 Shared SSI
+ 2 MMC
+ 3 SDHC
+ 4 MCU domain UART
+ 5 Shared UART
+ 6 FIRI
+ 7 MCU domain CSPI
+ 8 Shared CSPI
+ 9 SIM
+ 10 ATA
+ 11 CCM
+ 12 External peripheral
+ 13 Memory Stick Host Controller
+ 14 Shared Memory Stick Host Controller
+ 15 DSP
+ 16 Memory
+ 17 FIFO type Memory
+ 18 SPDIF
+ 19 IPU Memory
+ 20 ASRC
+ 21 ESAI
+ 22 SSI Dual FIFO (needs firmware ver >= 2)
+ 23 Shared ASRC
+ 24 SAI
+
+The third cell specifies the transfer priority as below.
+
+ ID transfer priority
+ -------------------------
+ 0 High
+ 1 Medium
+ 2 Low
+
+Examples:
+
+sdma@83fb0000 {
+ compatible = "fsl,imx51-sdma", "fsl,imx35-sdma";
+ reg = <0x83fb0000 0x4000>;
+ interrupts = <6>;
+ #dma-cells = <3>;
+ fsl,sdma-ram-script-name = "/*(DEBLOBBED)*/";
+};
+
+DMA clients connected to the i.MX SDMA controller must use the format
+described in the dma.txt file.
+
+Examples:
+
+ssi2: ssi@70014000 {
+ compatible = "fsl,imx51-ssi", "fsl,imx21-ssi";
+ reg = <0x70014000 0x4000>;
+ interrupts = <30>;
+ clocks = <&clks 49>;
+ dmas = <&sdma 24 1 0>,
+ <&sdma 25 1 0>;
+ dma-names = "rx", "tx";
+ fsl,fifo-depth = <15>;
+};
diff --git a/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt b/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt
new file mode 100644
index 000000000..e30e184f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/fsl-mxs-dma.txt
@@ -0,0 +1,60 @@
+* Freescale MXS DMA
+
+Required properties:
+- compatible : Should be "fsl,<chip>-dma-apbh" or "fsl,<chip>-dma-apbx"
+- reg : Should contain registers location and length
+- interrupts : Should contain the interrupt numbers of DMA channels.
+ If a channel is empty/reserved, 0 should be filled in place.
+- #dma-cells : Must be <1>. The number cell specifies the channel ID.
+- dma-channels : Number of channels supported by the DMA controller
+
+Optional properties:
+- interrupt-names : Name of DMA channel interrupts
+
+Supported chips:
+imx23, imx28.
+
+Examples:
+
+dma_apbh: dma-apbh@80004000 {
+ compatible = "fsl,imx28-dma-apbh";
+ reg = <0x80004000 0x2000>;
+ interrupts = <82 83 84 85
+ 88 88 88 88
+ 88 88 88 88
+ 87 86 0 0>;
+ interrupt-names = "ssp0", "ssp1", "ssp2", "ssp3",
+ "gpmi0", "gmpi1", "gpmi2", "gmpi3",
+ "gpmi4", "gmpi5", "gpmi6", "gmpi7",
+ "hsadc", "lcdif", "empty", "empty";
+ #dma-cells = <1>;
+ dma-channels = <16>;
+};
+
+dma_apbx: dma-apbx@80024000 {
+ compatible = "fsl,imx28-dma-apbx";
+ reg = <0x80024000 0x2000>;
+ interrupts = <78 79 66 0
+ 80 81 68 69
+ 70 71 72 73
+ 74 75 76 77>;
+ interrupt-names = "auart4-rx", "auart4-tx", "spdif-tx", "empty",
+ "saif0", "saif1", "i2c0", "i2c1",
+ "auart0-rx", "auart0-tx", "auart1-rx", "auart1-tx",
+ "auart2-rx", "auart2-tx", "auart3-rx", "auart3-tx";
+ #dma-cells = <1>;
+ dma-channels = <16>;
+};
+
+DMA clients connected to the MXS DMA controller must use the format
+described in the dma.txt file.
+
+Examples:
+
+auart0: serial@8006a000 {
+ compatible = "fsl,imx28-auart", "fsl,imx23-auart";
+ reg = <0x8006a000 0x2000>;
+ interrupts = <112>;
+ dmas = <&dma_apbx 8>, <&dma_apbx 9>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/dma/img-mdc-dma.txt b/Documentation/devicetree/bindings/dma/img-mdc-dma.txt
new file mode 100644
index 000000000..28c1341db
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/img-mdc-dma.txt
@@ -0,0 +1,57 @@
+* IMG Multi-threaded DMA Controller (MDC)
+
+Required properties:
+- compatible: Must be "img,pistachio-mdc-dma".
+- reg: Must contain the base address and length of the MDC registers.
+- interrupts: Must contain all the per-channel DMA interrupts.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clock/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - sys: MDC system interface clock.
+- img,cr-periph: Must contain a phandle to the peripheral control syscon
+ node which contains the DMA request to channel mapping registers.
+- img,max-burst-multiplier: Must be the maximum supported burst size multiplier.
+ The maximum burst size is this value multiplied by the hardware-reported bus
+ width.
+- #dma-cells: Must be 3:
+ - The first cell is the peripheral's DMA request line.
+ - The second cell is a bitmap specifying to which channels the DMA request
+ line may be mapped (i.e. bit N set indicates channel N is usable).
+ - The third cell is the thread ID to be used by the channel.
+
+Optional properties:
+- dma-channels: Number of supported DMA channels, up to 32. If not specified
+ the number reported by the hardware is used.
+
+Example:
+
+mdc: dma-controller@18143000 {
+ compatible = "img,pistachio-mdc-dma";
+ reg = <0x18143000 0x1000>;
+ interrupts = <GIC_SHARED 27 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 28 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 29 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 30 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 31 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 32 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 33 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 34 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 35 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 36 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 37 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SHARED 38 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&system_clk>;
+ clock-names = "sys";
+
+ img,max-burst-multiplier = <16>;
+ img,cr-periph = <&cr_periph>;
+
+ #dma-cells = <3>;
+};
+
+spi@18100f00 {
+ ...
+ dmas = <&mdc 9 0xffffffff 0>, <&mdc 10 0xffffffff 0>;
+ dma-names = "tx", "rx";
+ ...
+};
diff --git a/Documentation/devicetree/bindings/dma/jz4780-dma.txt b/Documentation/devicetree/bindings/dma/jz4780-dma.txt
new file mode 100644
index 000000000..f25feee62
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/jz4780-dma.txt
@@ -0,0 +1,56 @@
+* Ingenic JZ4780 DMA Controller
+
+Required properties:
+
+- compatible: Should be "ingenic,jz4780-dma"
+- reg: Should contain the DMA controller registers location and length.
+- interrupts: Should contain the interrupt specifier of the DMA controller.
+- interrupt-parent: Should be the phandle of the interrupt controller that
+- clocks: Should contain a clock specifier for the JZ4780 PDMA clock.
+- #dma-cells: Must be <2>. Number of integer cells in the dmas property of
+ DMA clients (see below).
+
+Optional properties:
+
+- ingenic,reserved-channels: Bitmask of channels to reserve for devices that
+ need a specific channel. These channels will only be assigned when explicitly
+ requested by a client. The primary use for this is channels 0 and 1, which
+ can be configured to have special behaviour for NAND/BCH when using
+ programmable firmware.
+
+Example:
+
+dma: dma@13420000 {
+ compatible = "ingenic,jz4780-dma";
+ reg = <0x13420000 0x10000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <10>;
+
+ clocks = <&cgu JZ4780_CLK_PDMA>;
+
+ #dma-cells = <2>;
+
+ ingenic,reserved-channels = <0x3>;
+};
+
+DMA clients must use the format described in dma.txt, giving a phandle to the
+DMA controller plus the following 2 integer cells:
+
+1. Request type: The DMA request type for transfers to/from the device on
+ the allocated channel, as defined in the SoC documentation.
+
+2. Channel: If set to 0xffffffff, any available channel will be allocated for
+ the client. Otherwise, the exact channel specified will be used. The channel
+ should be reserved on the DMA controller using the ingenic,reserved-channels
+ property.
+
+Example:
+
+uart0: serial@10030000 {
+ ...
+ dmas = <&dma 0x14 0xffffffff
+ &dma 0x15 0xffffffff>;
+ dma-names = "tx", "rx";
+ ...
+};
diff --git a/Documentation/devicetree/bindings/dma/k3dma.txt b/Documentation/devicetree/bindings/dma/k3dma.txt
new file mode 100644
index 000000000..23f8d712c
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/k3dma.txt
@@ -0,0 +1,46 @@
+* Hisilicon K3 DMA controller
+
+See dma.txt first
+
+Required properties:
+- compatible: Should be "hisilicon,k3-dma-1.0"
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain one interrupt shared by all channel
+- #dma-cells: see dma.txt, should be 1, para number
+- dma-channels: physical channels supported
+- dma-requests: virtual channels supported, each virtual channel
+ have specific request line
+- clocks: clock required
+
+Example:
+
+Controller:
+ dma0: dma@fcd02000 {
+ compatible = "hisilicon,k3-dma-1.0";
+ reg = <0xfcd02000 0x1000>;
+ #dma-cells = <1>;
+ dma-channels = <16>;
+ dma-requests = <27>;
+ interrupts = <0 12 4>;
+ clocks = <&pclk>;
+ status = "disable";
+ };
+
+Client:
+Use specific request line passing from dmax
+For example, i2c0 read channel request line is 18, while write channel use 19
+
+ i2c0: i2c@fcb08000 {
+ compatible = "snps,designware-i2c";
+ dmas = <&dma0 18 /* read channel */
+ &dma0 19>; /* write channel */
+ dma-names = "rx", "tx";
+ };
+
+ i2c1: i2c@fcb09000 {
+ compatible = "snps,designware-i2c";
+ dmas = <&dma0 20 /* read channel */
+ &dma0 21>; /* write channel */
+ dma-names = "rx", "tx";
+ };
+
diff --git a/Documentation/devicetree/bindings/dma/mmp-dma.txt b/Documentation/devicetree/bindings/dma/mmp-dma.txt
new file mode 100644
index 000000000..7a802f64e
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mmp-dma.txt
@@ -0,0 +1,77 @@
+* MARVELL MMP DMA controller
+
+Marvell Peripheral DMA Controller
+Used platforms: pxa688, pxa910, pxa3xx, etc
+
+Required properties:
+- compatible: Should be "marvell,pdma-1.0"
+- reg: Should contain DMA registers location and length.
+- interrupts: Either contain all of the per-channel DMA interrupts
+ or one irq for pdma device
+
+Optional properties:
+- #dma-channels: Number of DMA channels supported by the controller (defaults
+ to 32 when not specified)
+
+"marvell,pdma-1.0"
+Used platforms: pxa25x, pxa27x, pxa3xx, pxa93x, pxa168, pxa910, pxa688.
+
+Examples:
+
+/*
+ * Each channel has specific irq
+ * ICU parse out irq channel from ICU register,
+ * while DMA controller may not able to distinguish the irq channel
+ * Using this method, interrupt-parent is required as demuxer
+ * For example, pxa688 icu register 0x128, bit 0~15 is PDMA channel irq,
+ * 18~21 is ADMA irq
+ */
+pdma: dma-controller@d4000000 {
+ compatible = "marvell,pdma-1.0";
+ reg = <0xd4000000 0x10000>;
+ interrupts = <0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15>;
+ interrupt-parent = <&intcmux32>;
+ #dma-channels = <16>;
+ };
+
+/*
+ * One irq for all channels
+ * Dmaengine driver (DMA controller) distinguish irq channel via
+ * parsing internal register
+ */
+pdma: dma-controller@d4000000 {
+ compatible = "marvell,pdma-1.0";
+ reg = <0xd4000000 0x10000>;
+ interrupts = <47>;
+ #dma-channels = <16>;
+ };
+
+
+Marvell Two Channel DMA Controller used specifically for audio
+Used platforms: pxa688, pxa910
+
+Required properties:
+- compatible: Should be "marvell,adma-1.0" or "marvell,pxa910-squ"
+- reg: Should contain DMA registers location and length.
+- interrupts: Either contain all of the per-channel DMA interrupts
+ or one irq for dma device
+
+"marvell,adma-1.0" used on pxa688
+"marvell,pxa910-squ" used on pxa910
+
+Examples:
+
+/* each channel has specific irq */
+adma0: dma-controller@d42a0800 {
+ compatible = "marvell,adma-1.0";
+ reg = <0xd42a0800 0x100>;
+ interrupts = <18 19>;
+ interrupt-parent = <&intcmux32>;
+ };
+
+/* One irq for all channels */
+squ: dma-controller@d42a0800 {
+ compatible = "marvell,pxa910-squ";
+ reg = <0xd42a0800 0x100>;
+ interrupts = <46>;
+ };
diff --git a/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt b/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt
new file mode 100644
index 000000000..8a9f35593
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/moxa,moxart-dma.txt
@@ -0,0 +1,45 @@
+MOXA ART DMA Controller
+
+See dma.txt first
+
+Required properties:
+
+- compatible : Must be "moxa,moxart-dma"
+- reg : Should contain registers location and length
+- interrupts : Should contain an interrupt-specifier for the sole
+ interrupt generated by the device
+- #dma-cells : Should be 1, a single cell holding a line request number
+
+Example:
+
+ dma: dma@90500000 {
+ compatible = "moxa,moxart-dma";
+ reg = <0x90500080 0x40>;
+ interrupts = <24 0>;
+ #dma-cells = <1>;
+ };
+
+
+Clients:
+
+DMA clients connected to the MOXA ART DMA controller must use the format
+described in the dma.txt file, using a two-cell specifier for each channel:
+a phandle plus one integer cells.
+The two cells in order are:
+
+1. A phandle pointing to the DMA controller.
+2. Peripheral identifier for the hardware handshaking interface.
+
+Example:
+Use specific request line passing from dma
+For example, MMC request line is 5
+
+ sdhci: sdhci@98e00000 {
+ compatible = "moxa,moxart-sdhci";
+ reg = <0x98e00000 0x5C>;
+ interrupts = <5 0>;
+ clocks = <&clk_apb>;
+ dmas = <&dma 5>,
+ <&dma 5>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/dma/mpc512x-dma.txt b/Documentation/devicetree/bindings/dma/mpc512x-dma.txt
new file mode 100644
index 000000000..a6511df16
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mpc512x-dma.txt
@@ -0,0 +1,29 @@
+* Freescale MPC512x and MPC8308 DMA Controller
+
+The DMA controller in Freescale MPC512x and MPC8308 SoCs can move
+blocks of memory contents between memory and peripherals or
+from memory to memory.
+
+Refer to "Generic DMA Controller and DMA request bindings" in
+the dma/dma.txt file for a more detailed description of binding.
+
+Required properties:
+- compatible: should be "fsl,mpc5121-dma" or "fsl,mpc8308-dma";
+- reg: should contain the DMA controller registers location and length;
+- interrupt for the DMA controller: syntax of interrupt client node
+ is described in interrupt-controller/interrupts.txt file.
+- #dma-cells: the length of the DMA specifier, must be <1>.
+ Each channel of this DMA controller has a peripheral request line,
+ the assignment is fixed in hardware. This one cell
+ in dmas property of a client device represents the channel number.
+
+Example:
+
+ dma0: dma@14000 {
+ compatible = "fsl,mpc5121-dma";
+ reg = <0x14000 0x1800>;
+ interrupts = <65 0x8>;
+ #dma-cells = <1>;
+ };
+
+DMA clients must use the format described in dma/dma.txt file.
diff --git a/Documentation/devicetree/bindings/dma/mv-xor.txt b/Documentation/devicetree/bindings/dma/mv-xor.txt
new file mode 100644
index 000000000..7c6cb7fce
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/mv-xor.txt
@@ -0,0 +1,40 @@
+* Marvell XOR engines
+
+Required properties:
+- compatible: Should be "marvell,orion-xor"
+- reg: Should contain registers location and length (two sets)
+ the first set is the low registers, the second set the high
+ registers for the XOR engine.
+- clocks: pointer to the reference clock
+
+The DT node must also contains sub-nodes for each XOR channel that the
+XOR engine has. Those sub-nodes have the following required
+properties:
+- interrupts: interrupt of the XOR channel
+
+And the following optional properties:
+- dmacap,memcpy to indicate that the XOR channel is capable of memcpy operations
+- dmacap,memset to indicate that the XOR channel is capable of memset operations
+- dmacap,xor to indicate that the XOR channel is capable of xor operations
+
+Example:
+
+xor@d0060900 {
+ compatible = "marvell,orion-xor";
+ reg = <0xd0060900 0x100
+ 0xd0060b00 0x100>;
+ clocks = <&coreclk 0>;
+ status = "okay";
+
+ xor00 {
+ interrupts = <51>;
+ dmacap,memcpy;
+ dmacap,xor;
+ };
+ xor01 {
+ interrupts = <52>;
+ dmacap,memcpy;
+ dmacap,xor;
+ dmacap,memset;
+ };
+};
diff --git a/Documentation/devicetree/bindings/dma/nbpfaxi.txt b/Documentation/devicetree/bindings/dma/nbpfaxi.txt
new file mode 100644
index 000000000..d5e2522b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/nbpfaxi.txt
@@ -0,0 +1,61 @@
+* Renesas "Type-AXI" NBPFAXI* DMA controllers
+
+* DMA controller
+
+Required properties
+
+- compatible: must be one of
+ "renesas,nbpfaxi64dmac1b4"
+ "renesas,nbpfaxi64dmac1b8"
+ "renesas,nbpfaxi64dmac1b16"
+ "renesas,nbpfaxi64dmac4b4"
+ "renesas,nbpfaxi64dmac4b8"
+ "renesas,nbpfaxi64dmac4b16"
+ "renesas,nbpfaxi64dmac8b4"
+ "renesas,nbpfaxi64dmac8b8"
+ "renesas,nbpfaxi64dmac8b16"
+- #dma-cells: must be 2: the first integer is a terminal number, to which this
+ slave is connected, the second one is flags. Flags is a bitmask
+ with the following bits defined:
+
+#define NBPF_SLAVE_RQ_HIGH 1
+#define NBPF_SLAVE_RQ_LOW 2
+#define NBPF_SLAVE_RQ_LEVEL 4
+
+Optional properties:
+
+You can use dma-channels and dma-requests as described in dma.txt, although they
+won't be used, this information is derived from the compatibility string.
+
+Example:
+
+ dma: dma-controller@48000000 {
+ compatible = "renesas,nbpfaxi64dmac8b4";
+ reg = <0x48000000 0x400>;
+ interrupts = <0 12 0x4
+ 0 13 0x4
+ 0 14 0x4
+ 0 15 0x4
+ 0 16 0x4
+ 0 17 0x4
+ 0 18 0x4
+ 0 19 0x4>;
+ #dma-cells = <2>;
+ dma-channels = <8>;
+ dma-requests = <8>;
+ };
+
+* DMA client
+
+Required properties:
+
+dmas and dma-names are required, as described in dma.txt.
+
+Example:
+
+#include <dt-bindings/dma/nbpfaxi.h>
+
+...
+ dmas = <&dma 0 (NBPF_SLAVE_RQ_HIGH | NBPF_SLAVE_RQ_LEVEL)
+ &dma 1 (NBPF_SLAVE_RQ_HIGH | NBPF_SLAVE_RQ_LEVEL)>;
+ dma-names = "rx", "tx";
diff --git a/Documentation/devicetree/bindings/dma/qcom_adm.txt b/Documentation/devicetree/bindings/dma/qcom_adm.txt
new file mode 100644
index 000000000..9bcab9115
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/qcom_adm.txt
@@ -0,0 +1,62 @@
+QCOM ADM DMA Controller
+
+Required properties:
+- compatible: must contain "qcom,adm" for IPQ/APQ8064 and MSM8960
+- reg: Address range for DMA registers
+- interrupts: Should contain one interrupt shared by all channels
+- #dma-cells: must be <2>. First cell denotes the channel number. Second cell
+ denotes CRCI (client rate control interface) flow control assignment.
+- clocks: Should contain the core clock and interface clock.
+- clock-names: Must contain "core" for the core clock and "iface" for the
+ interface clock.
+- resets: Must contain an entry for each entry in reset names.
+- reset-names: Must include the following entries:
+ - clk
+ - c0
+ - c1
+ - c2
+- qcom,ee: indicates the security domain identifier used in the secure world.
+
+Example:
+ adm_dma: dma@18300000 {
+ compatible = "qcom,adm";
+ reg = <0x18300000 0x100000>;
+ interrupts = <0 170 0>;
+ #dma-cells = <2>;
+
+ clocks = <&gcc ADM0_CLK>, <&gcc ADM0_PBUS_CLK>;
+ clock-names = "core", "iface";
+
+ resets = <&gcc ADM0_RESET>,
+ <&gcc ADM0_C0_RESET>,
+ <&gcc ADM0_C1_RESET>,
+ <&gcc ADM0_C2_RESET>;
+ reset-names = "clk", "c0", "c1", "c2";
+ qcom,ee = <0>;
+ };
+
+DMA clients must use the format descripted in the dma.txt file, using a three
+cell specifier for each channel.
+
+Each dmas request consists of 3 cells:
+ 1. phandle pointing to the DMA controller
+ 2. channel number
+ 3. CRCI assignment, if applicable. If no CRCI flow control is required, use 0.
+ The CRCI is used for flow control. It identifies the peripheral device that
+ is the source/destination for the transferred data.
+
+Example:
+
+ spi4: spi@1a280000 {
+ status = "ok";
+ spi-max-frequency = <50000000>;
+
+ pinctrl-0 = <&spi_pins>;
+ pinctrl-names = "default";
+
+ cs-gpios = <&qcom_pinmux 20 0>;
+
+ dmas = <&adm_dma 6 9>,
+ <&adm_dma 5 10>;
+ dma-names = "rx", "tx";
+ };
diff --git a/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
new file mode 100644
index 000000000..1c9d48ea4
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/qcom_bam_dma.txt
@@ -0,0 +1,44 @@
+QCOM BAM DMA controller
+
+Required properties:
+- compatible: must be one of the following:
+ * "qcom,bam-v1.4.0" for MSM8974, APQ8074 and APQ8084
+ * "qcom,bam-v1.3.0" for APQ8064, IPQ8064 and MSM8960
+ * "qcom,bam-v1.7.0" for MSM8916
+- reg: Address range for DMA registers
+- interrupts: Should contain the one interrupt shared by all channels
+- #dma-cells: must be <1>, the cell in the dmas property of the client device
+ represents the channel number
+- clocks: required clock
+- clock-names: must contain "bam_clk" entry
+- qcom,ee : indicates the active Execution Environment identifier (0-7) used in
+ the secure world.
+
+Example:
+
+ uart-bam: dma@f9984000 = {
+ compatible = "qcom,bam-v1.4.0";
+ reg = <0xf9984000 0x15000>;
+ interrupts = <0 94 0>;
+ clocks = <&gcc GCC_BAM_DMA_AHB_CLK>;
+ clock-names = "bam_clk";
+ #dma-cells = <1>;
+ qcom,ee = <0>;
+ };
+
+DMA clients must use the format described in the dma.txt file, using a two cell
+specifier for each channel.
+
+Example:
+ serial@f991e000 {
+ compatible = "qcom,msm-uart";
+ reg = <0xf991e000 0x1000>
+ <0xf9944000 0x19000>;
+ interrupts = <0 108 0>;
+ clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>,
+ <&gcc GCC_BLSP1_AHB_CLK>;
+ clock-names = "core", "iface";
+
+ dmas = <&uart-bam 0>, <&uart-bam 1>;
+ dma-names = "rx", "tx";
+ };
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
new file mode 100644
index 000000000..09daeef1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.txt
@@ -0,0 +1,95 @@
+* Renesas R-Car DMA Controller Device Tree bindings
+
+Renesas R-Car Generation 2 SoCs have multiple multi-channel DMA
+controller instances named DMAC capable of serving multiple clients. Channels
+can be dedicated to specific clients or shared between a large number of
+clients.
+
+Each DMA client is connected to one dedicated port of the DMAC, identified by
+an 8-bit port number called the MID/RID. A DMA controller can thus serve up to
+256 clients in total. When the number of hardware channels is lower than the
+number of clients to be served, channels must be shared between multiple DMA
+clients. The association of DMA clients to DMAC channels is fully dynamic and
+not described in these device tree bindings.
+
+Required Properties:
+
+- compatible: must contain "renesas,rcar-dmac"
+
+- reg: base address and length of the registers block for the DMAC
+
+- interrupts: interrupt specifiers for the DMAC, one for each entry in
+ interrupt-names.
+- interrupt-names: one entry per channel, named "ch%u", where %u is the
+ channel number ranging from zero to the number of channels minus one.
+
+- clock-names: "fck" for the functional clock
+- clocks: a list of phandle + clock-specifier pairs, one for each entry
+ in clock-names.
+- clock-names: must contain "fck" for the functional clock.
+
+- #dma-cells: must be <1>, the cell specifies the MID/RID of the DMAC port
+ connected to the DMA client
+- dma-channels: number of DMA channels
+
+Example: R8A7790 (R-Car H2) SYS-DMACs
+
+ dmac0: dma-controller@e6700000 {
+ compatible = "renesas,rcar-dmac";
+ reg = <0 0xe6700000 0 0x20000>;
+ interrupts = <0 197 IRQ_TYPE_LEVEL_HIGH
+ 0 200 IRQ_TYPE_LEVEL_HIGH
+ 0 201 IRQ_TYPE_LEVEL_HIGH
+ 0 202 IRQ_TYPE_LEVEL_HIGH
+ 0 203 IRQ_TYPE_LEVEL_HIGH
+ 0 204 IRQ_TYPE_LEVEL_HIGH
+ 0 205 IRQ_TYPE_LEVEL_HIGH
+ 0 206 IRQ_TYPE_LEVEL_HIGH
+ 0 207 IRQ_TYPE_LEVEL_HIGH
+ 0 208 IRQ_TYPE_LEVEL_HIGH
+ 0 209 IRQ_TYPE_LEVEL_HIGH
+ 0 210 IRQ_TYPE_LEVEL_HIGH
+ 0 211 IRQ_TYPE_LEVEL_HIGH
+ 0 212 IRQ_TYPE_LEVEL_HIGH
+ 0 213 IRQ_TYPE_LEVEL_HIGH
+ 0 214 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14";
+ clocks = <&mstp2_clks R8A7790_CLK_SYS_DMAC0>;
+ clock-names = "fck";
+ #dma-cells = <1>;
+ dma-channels = <15>;
+ };
+
+ dmac1: dma-controller@e6720000 {
+ compatible = "renesas,rcar-dmac";
+ reg = <0 0xe6720000 0 0x20000>;
+ interrupts = <0 220 IRQ_TYPE_LEVEL_HIGH
+ 0 216 IRQ_TYPE_LEVEL_HIGH
+ 0 217 IRQ_TYPE_LEVEL_HIGH
+ 0 218 IRQ_TYPE_LEVEL_HIGH
+ 0 219 IRQ_TYPE_LEVEL_HIGH
+ 0 308 IRQ_TYPE_LEVEL_HIGH
+ 0 309 IRQ_TYPE_LEVEL_HIGH
+ 0 310 IRQ_TYPE_LEVEL_HIGH
+ 0 311 IRQ_TYPE_LEVEL_HIGH
+ 0 312 IRQ_TYPE_LEVEL_HIGH
+ 0 313 IRQ_TYPE_LEVEL_HIGH
+ 0 314 IRQ_TYPE_LEVEL_HIGH
+ 0 315 IRQ_TYPE_LEVEL_HIGH
+ 0 316 IRQ_TYPE_LEVEL_HIGH
+ 0 317 IRQ_TYPE_LEVEL_HIGH
+ 0 318 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14";
+ clocks = <&mstp2_clks R8A7790_CLK_SYS_DMAC1>;
+ clock-names = "fck";
+ #dma-cells = <1>;
+ dma-channels = <15>;
+ };
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
new file mode 100644
index 000000000..040f36595
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.txt
@@ -0,0 +1,37 @@
+* Renesas USB DMA Controller Device Tree bindings
+
+Required Properties:
+- compatible: must contain "renesas,usb-dmac"
+- reg: base address and length of the registers block for the DMAC
+- interrupts: interrupt specifiers for the DMAC, one for each entry in
+ interrupt-names.
+- interrupt-names: one entry per channel, named "ch%u", where %u is the
+ channel number ranging from zero to the number of channels minus one.
+- clocks: a list of phandle + clock-specifier pairs.
+- #dma-cells: must be <1>, the cell specifies the channel number of the DMAC
+ port connected to the DMA client.
+- dma-channels: number of DMA channels
+
+Example: R8A7790 (R-Car H2) USB-DMACs
+
+ usb_dmac0: dma-controller@e65a0000 {
+ compatible = "renesas,usb-dmac";
+ reg = <0 0xe65a0000 0 0x100>;
+ interrupts = <0 109 IRQ_TYPE_LEVEL_HIGH
+ 0 109 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ch0", "ch1";
+ clocks = <&mstp3_clks R8A7790_CLK_USBDMAC0>;
+ #dma-cells = <1>;
+ dma-channels = <2>;
+ };
+
+ usb_dmac1: dma-controller@e65b0000 {
+ compatible = "renesas,usb-dmac";
+ reg = <0 0xe65b0000 0 0x100>;
+ interrupts = <0 110 IRQ_TYPE_LEVEL_HIGH
+ 0 110 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "ch0", "ch1";
+ clocks = <&mstp3_clks R8A7790_CLK_USBDMAC1>;
+ #dma-cells = <1>;
+ dma-channels = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/dma/shdma.txt b/Documentation/devicetree/bindings/dma/shdma.txt
new file mode 100644
index 000000000..2a3f3b894
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/shdma.txt
@@ -0,0 +1,84 @@
+* SHDMA Device Tree bindings
+
+Sh-/r-mobile and r-car systems often have multiple identical DMA controller
+instances, capable of serving any of a common set of DMA slave devices, using
+the same configuration. To describe this topology we require all compatible
+SHDMA DT nodes to be placed under a DMA multiplexer node. All such compatible
+DMAC instances have the same number of channels and use the same DMA
+descriptors. Therefore respective DMA DT bindings can also all be placed in the
+multiplexer node. Even if there is only one such DMAC instance on a system, it
+still has to be placed under such a multiplexer node.
+
+* DMA multiplexer
+
+Required properties:
+- compatible: should be "renesas,shdma-mux"
+- #dma-cells: should be <1>, see "dmas" property below
+
+Optional properties (currently unused):
+- dma-channels: number of DMA channels
+- dma-requests: number of DMA request signals
+
+* DMA controller
+
+Required properties:
+- compatible: should be of the form "renesas,shdma-<soc>", where <soc> should
+ be replaced with the desired SoC model, e.g.
+ "renesas,shdma-r8a73a4" for the system DMAC on r8a73a4 SoC
+
+Example:
+ dmac: dma-multiplexer@0 {
+ compatible = "renesas,shdma-mux";
+ #dma-cells = <1>;
+ dma-channels = <20>;
+ dma-requests = <256>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ dma0: dma-controller@e6700020 {
+ compatible = "renesas,shdma-r8a73a4";
+ reg = <0 0xe6700020 0 0x89e0>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 220 4
+ 0 200 4
+ 0 201 4
+ 0 202 4
+ 0 203 4
+ 0 204 4
+ 0 205 4
+ 0 206 4
+ 0 207 4
+ 0 208 4
+ 0 209 4
+ 0 210 4
+ 0 211 4
+ 0 212 4
+ 0 213 4
+ 0 214 4
+ 0 215 4
+ 0 216 4
+ 0 217 4
+ 0 218 4
+ 0 219 4>;
+ interrupt-names = "error",
+ "ch0", "ch1", "ch2", "ch3",
+ "ch4", "ch5", "ch6", "ch7",
+ "ch8", "ch9", "ch10", "ch11",
+ "ch12", "ch13", "ch14", "ch15",
+ "ch16", "ch17", "ch18", "ch19";
+ };
+ };
+
+* DMA client
+
+Required properties:
+- dmas: a list of <[DMA multiplexer phandle] [MID/RID value]> pairs,
+ where MID/RID values are fixed handles, specified in the SoC
+ manual
+- dma-names: a list of DMA channel names, one per "dmas" entry
+
+Example:
+ dmas = <&dmac 0xd1
+ &dmac 0xd2>;
+ dma-names = "tx", "rx";
diff --git a/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt
new file mode 100644
index 000000000..ecbc96ad3
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt
@@ -0,0 +1,43 @@
+* CSR SiRFSoC DMA controller
+
+See dma.txt first
+
+Required properties:
+- compatible: Should be "sirf,prima2-dmac" or "sirf,marco-dmac"
+- reg: Should contain DMA registers location and length.
+- interrupts: Should contain one interrupt shared by all channel
+- #dma-cells: must be <1>. used to represent the number of integer
+ cells in the dmas property of client device.
+- clocks: clock required
+
+Example:
+
+Controller:
+dmac0: dma-controller@b00b0000 {
+ compatible = "sirf,prima2-dmac";
+ reg = <0xb00b0000 0x10000>;
+ interrupts = <12>;
+ clocks = <&clks 24>;
+ #dma-cells = <1>;
+};
+
+
+Client:
+Fill the specific dma request line in dmas. In the below example, spi0 read
+channel request line is 9 of the 2nd dma controller, while write channel uses
+4 of the 2nd dma controller; spi1 read channel request line is 12 of the 1st
+dma controller, while write channel uses 13 of the 1st dma controller:
+
+spi0: spi@b00d0000 {
+ compatible = "sirf,prima2-spi";
+ dmas = <&dmac1 9>,
+ <&dmac1 4>;
+ dma-names = "rx", "tx";
+};
+
+spi1: spi@b0170000 {
+ compatible = "sirf,prima2-spi";
+ dmas = <&dmac0 12>,
+ <&dmac0 13>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
new file mode 100644
index 000000000..c26159816
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -0,0 +1,63 @@
+* Synopsys Designware DMA Controller
+
+Required properties:
+- compatible: "snps,dma-spear1340"
+- reg: Address range of the DMAC registers
+- interrupt: Should contain the DMAC interrupt number
+- dma-channels: Number of channels supported by hardware
+- dma-requests: Number of DMA request lines supported, up to 16
+- dma-masters: Number of AHB masters supported by the controller
+- #dma-cells: must be <3>
+- chan_allocation_order: order of allocation of channel, 0 (default): ascending,
+ 1: descending
+- chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
+ increase from chan n->0
+- block_size: Maximum block size supported by the controller
+- data_width: Maximum data width supported by hardware per AHB master
+ (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
+
+
+Optional properties:
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- is_private: The device channels should be marked as private and not for by the
+ general purpose DMA channel allocator. False if not passed.
+
+Example:
+
+ dmahost: dma@fc000000 {
+ compatible = "snps,dma-spear1340";
+ reg = <0xfc000000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <12>;
+
+ dma-channels = <8>;
+ dma-requests = <16>;
+ dma-masters = <2>;
+ #dma-cells = <3>;
+ chan_allocation_order = <1>;
+ chan_priority = <1>;
+ block_size = <0xfff>;
+ data_width = <3 3>;
+ };
+
+DMA clients connected to the Designware DMA controller must use the format
+described in the dma.txt file, using a four-cell specifier for each channel.
+The four cells in order are:
+
+1. A phandle pointing to the DMA controller
+2. The DMA request line number
+3. Source master for transfers on allocated channel
+4. Destination master for transfers on allocated channel
+
+Example:
+
+ serial@e0000000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0xe0000000 0x1000>;
+ interrupts = <0 35 0x4>;
+ status = "disabled";
+ dmas = <&dmahost 12 0 1>,
+ <&dmahost 13 0 1 0>;
+ dma-names = "rx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/dma/ste-coh901318.txt b/Documentation/devicetree/bindings/dma/ste-coh901318.txt
new file mode 100644
index 000000000..091ad057e
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/ste-coh901318.txt
@@ -0,0 +1,32 @@
+ST-Ericsson COH 901 318 DMA Controller
+
+This is a DMA controller which has begun as a fork of the
+ARM PL08x PrimeCell VHDL code.
+
+Required properties:
+- compatible: should be "stericsson,coh901318"
+- reg: register locations and length
+- interrupts: the single DMA IRQ
+- #dma-cells: must be set to <1>, as the channels on the
+ COH 901 318 are simple and identified by a single number
+- dma-channels: the number of DMA channels handled
+
+Example:
+
+dmac: dma-controller@c00020000 {
+ compatible = "stericsson,coh901318";
+ reg = <0xc0020000 0x1000>;
+ interrupt-parent = <&vica>;
+ interrupts = <2>;
+ #dma-cells = <1>;
+ dma-channels = <40>;
+};
+
+Consumers example:
+
+uart0: serial@c0013000 {
+ compatible = "...";
+ (...)
+ dmas = <&dmac 17 &dmac 18>;
+ dma-names = "tx", "rx";
+};
diff --git a/Documentation/devicetree/bindings/dma/ste-dma40.txt b/Documentation/devicetree/bindings/dma/ste-dma40.txt
new file mode 100644
index 000000000..95800ab37
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/ste-dma40.txt
@@ -0,0 +1,139 @@
+* DMA40 DMA Controller
+
+Required properties:
+- compatible: "stericsson,dma40"
+- reg: Address range of the DMAC registers
+- reg-names: Names of the above areas to use during resource look-up
+- interrupt: Should contain the DMAC interrupt number
+- #dma-cells: must be <3>
+- memcpy-channels: Channels to be used for memcpy
+
+Optional properties:
+- dma-channels: Number of channels supported by hardware - if not present
+ the driver will attempt to obtain the information from H/W
+- disabled-channels: Channels which can not be used
+
+Example:
+
+ dma: dma-controller@801C0000 {
+ compatible = "stericsson,db8500-dma40", "stericsson,dma40";
+ reg = <0x801C0000 0x1000 0x40010000 0x800>;
+ reg-names = "base", "lcpa";
+ interrupt-parent = <&intc>;
+ interrupts = <0 25 0x4>;
+
+ #dma-cells = <2>;
+ memcpy-channels = <56 57 58 59 60>;
+ disabled-channels = <12>;
+ dma-channels = <8>;
+ };
+
+Clients
+Required properties:
+- dmas: Comma separated list of dma channel requests
+- dma-names: Names of the aforementioned requested channels
+
+Each dmas request consists of 4 cells:
+ 1. A phandle pointing to the DMA controller
+ 2. Device signal number, the signal line for single and burst requests
+ connected from the device to the DMA40 engine
+ 3. The DMA request line number (only when 'use fixed channel' is set)
+ 4. A 32bit mask specifying; mode, direction and endianness
+ [NB: This list will grow]
+ 0x00000001: Mode:
+ Logical channel when unset
+ Physical channel when set
+ 0x00000002: Direction:
+ Memory to Device when unset
+ Device to Memory when set
+ 0x00000004: Endianness:
+ Little endian when unset
+ Big endian when set
+ 0x00000008: Use fixed channel:
+ Use automatic channel selection when unset
+ Use DMA request line number when set
+ 0x00000010: Set channel as high priority:
+ Normal priority when unset
+ High priority when set
+
+Existing signal numbers for the DB8500 ASIC. Unless specified, the signals are
+bidirectional, i.e. the same for RX and TX operations:
+
+0: SPI controller 0
+1: SD/MMC controller 0 (unused)
+2: SD/MMC controller 1 (unused)
+3: SD/MMC controller 2 (unused)
+4: I2C port 1
+5: I2C port 3
+6: I2C port 2
+7: I2C port 4
+8: Synchronous Serial Port SSP0
+9: Synchronous Serial Port SSP1
+10: Multi-Channel Display Engine MCDE RX
+11: UART port 2
+12: UART port 1
+13: UART port 0
+14: Multirate Serial Port MSP2
+15: I2C port 0
+16: USB OTG in/out endpoints 7 & 15
+17: USB OTG in/out endpoints 6 & 14
+18: USB OTG in/out endpoints 5 & 13
+19: USB OTG in/out endpoints 4 & 12
+20: SLIMbus or HSI channel 0
+21: SLIMbus or HSI channel 1
+22: SLIMbus or HSI channel 2
+23: SLIMbus or HSI channel 3
+24: Multimedia DSP SXA0
+25: Multimedia DSP SXA1
+26: Multimedia DSP SXA2
+27: Multimedia DSP SXA3
+28: SD/MM controller 2
+29: SD/MM controller 0
+30: MSP port 1 on DB8500 v1, MSP port 3 on DB8500 v2
+31: MSP port 0 or SLIMbus channel 0
+32: SD/MM controller 1
+33: SPI controller 2
+34: i2c3 RX2 TX2
+35: SPI controller 1
+36: USB OTG in/out endpoints 3 & 11
+37: USB OTG in/out endpoints 2 & 10
+38: USB OTG in/out endpoints 1 & 9
+39: USB OTG in/out endpoints 8
+40: SPI controller 3
+41: SD/MM controller 3
+42: SD/MM controller 4
+43: SD/MM controller 5
+44: Multimedia DSP SXA4
+45: Multimedia DSP SXA5
+46: SLIMbus channel 8 or Multimedia DSP SXA6
+47: SLIMbus channel 9 or Multimedia DSP SXA7
+48: Crypto Accelerator 1
+49: Crypto Accelerator 1 TX or Hash Accelerator 1 TX
+50: Hash Accelerator 1 TX
+51: memcpy TX (to be used by the DMA driver for memcpy operations)
+52: SLIMbus or HSI channel 4
+53: SLIMbus or HSI channel 5
+54: SLIMbus or HSI channel 6
+55: SLIMbus or HSI channel 7
+56: memcpy (to be used by the DMA driver for memcpy operations)
+57: memcpy (to be used by the DMA driver for memcpy operations)
+58: memcpy (to be used by the DMA driver for memcpy operations)
+59: memcpy (to be used by the DMA driver for memcpy operations)
+60: memcpy (to be used by the DMA driver for memcpy operations)
+61: Crypto Accelerator 0
+62: Crypto Accelerator 0 TX or Hash Accelerator 0 TX
+63: Hash Accelerator 0 TX
+
+Example:
+
+ uart@80120000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x80120000 0x1000>;
+ interrupts = <0 11 0x4>;
+
+ dmas = <&dma 13 0 0x2>, /* Logical - DevToMem */
+ <&dma 13 0 0x0>; /* Logical - MemToDev */
+ dma-names = "rx", "rx";
+
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/dma/sun6i-dma.txt b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
new file mode 100644
index 000000000..9cdcba24d
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/sun6i-dma.txt
@@ -0,0 +1,45 @@
+Allwinner A31 DMA Controller
+
+This driver follows the generic DMA bindings defined in dma.txt.
+
+Required properties:
+
+- compatible: Must be "allwinner,sun6i-a31-dma" or "allwinner,sun8i-a23-dma"
+- reg: Should contain the registers base address and length
+- interrupts: Should contain a reference to the interrupt used by this device
+- clocks: Should contain a reference to the parent AHB clock
+- resets: Should contain a reference to the reset controller asserting
+ this device in reset
+- #dma-cells : Should be 1, a single cell holding a line request number
+
+Example:
+ dma: dma-controller@01c02000 {
+ compatible = "allwinner,sun6i-a31-dma";
+ reg = <0x01c02000 0x1000>;
+ interrupts = <0 50 4>;
+ clocks = <&ahb1_gates 6>;
+ resets = <&ahb1_rst 6>;
+ #dma-cells = <1>;
+ };
+
+Clients:
+
+DMA clients connected to the A31 DMA controller must use the format
+described in the dma.txt file, using a two-cell specifier for each
+channel: a phandle plus one integer cells.
+The two cells in order are:
+
+1. A phandle pointing to the DMA controller.
+2. The port ID as specified in the datasheet
+
+Example:
+spi2: spi@01c6a000 {
+ compatible = "allwinner,sun6i-a31-spi";
+ reg = <0x01c6a000 0x1000>;
+ interrupts = <0 67 4>;
+ clocks = <&ahb1_gates 22>, <&spi2_clk>;
+ clock-names = "ahb", "mod";
+ dmas = <&dma 25>, <&dma 25>;
+ dma-names = "rx", "tx";
+ resets = <&ahb1_rst 22>;
+};
diff --git a/Documentation/devicetree/bindings/dma/tegra20-apbdma.txt b/Documentation/devicetree/bindings/dma/tegra20-apbdma.txt
new file mode 100644
index 000000000..c6908e7c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/tegra20-apbdma.txt
@@ -0,0 +1,44 @@
+* NVIDIA Tegra APB DMA controller
+
+Required properties:
+- compatible: Should be "nvidia,<chip>-apbdma"
+- reg: Should contain DMA registers location and length. This shuld include
+ all of the per-channel registers.
+- interrupts: Should contain all of the per-channel DMA interrupts.
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - dma
+- #dma-cells : Must be <1>. This dictates the length of DMA specifiers in
+ client nodes' dmas properties. The specifier represents the DMA request
+ select value for the peripheral. For more details, consult the Tegra TRM's
+ documentation of the APB DMA channel control register REQ_SEL field.
+
+Examples:
+
+apbdma: dma@6000a000 {
+ compatible = "nvidia,tegra20-apbdma";
+ reg = <0x6000a000 0x1200>;
+ interrupts = < 0 136 0x04
+ 0 137 0x04
+ 0 138 0x04
+ 0 139 0x04
+ 0 140 0x04
+ 0 141 0x04
+ 0 142 0x04
+ 0 143 0x04
+ 0 144 0x04
+ 0 145 0x04
+ 0 146 0x04
+ 0 147 0x04
+ 0 148 0x04
+ 0 149 0x04
+ 0 150 0x04
+ 0 151 0x04 >;
+ clocks = <&tegra_car 34>;
+ resets = <&tegra_car 34>;
+ reset-names = "dma";
+ #dma-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/dma/ti-edma.txt b/Documentation/devicetree/bindings/dma/ti-edma.txt
new file mode 100644
index 000000000..5ba525a10
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/ti-edma.txt
@@ -0,0 +1,35 @@
+TI EDMA
+
+Required properties:
+- compatible : "ti,edma3"
+- #dma-cells: Should be set to <1>
+ Clients should use a single channel number per DMA request.
+- reg: Memory map for accessing module
+- interrupt-parent: Interrupt controller the interrupt is routed through
+- interrupts: Exactly 3 interrupts need to be specified in the order:
+ 1. Transfer completion interrupt.
+ 2. Memory protection interrupt.
+ 3. Error interrupt.
+Optional properties:
+- ti,hwmods: Name of the hwmods associated to the EDMA
+- ti,edma-xbar-event-map: Crossbar event to channel map
+
+Deprecated properties:
+Listed here in case one wants to boot an old kernel with new DTB. These
+properties might need to be added to the new DTS files.
+- ti,edma-regions: Number of regions
+- ti,edma-slots: Number of slots
+- dma-channels: Specify total DMA channels per CC
+
+Example:
+
+edma: edma@49000000 {
+ reg = <0x49000000 0x10000>;
+ interrupt-parent = <&intc>;
+ interrupts = <12 13 14>;
+ compatible = "ti,edma3";
+ ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2";
+ #dma-cells = <1>;
+ ti,edma-xbar-event-map = /bits/ 16 <1 12
+ 2 13>;
+};
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
new file mode 100644
index 000000000..2291c4098
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
@@ -0,0 +1,65 @@
+Xilinx AXI DMA engine, it does transfers between memory and AXI4 stream
+target devices. It can be configured to have one channel or two channels.
+If configured as two channels, one is to transmit to the device and another
+is to receive from the device.
+
+Required properties:
+- compatible: Should be "xlnx,axi-dma-1.00.a"
+- #dma-cells: Should be <1>, see "dmas" property below
+- reg: Should contain DMA registers location and length.
+- dma-channel child node: Should have atleast one channel and can have upto
+ two channels per device. This node specifies the properties of each
+ DMA channel (see child node properties below).
+
+Optional properties:
+- xlnx,include-sg: Tells whether configured for Scatter-mode in
+ the hardware.
+
+Required child node properties:
+- compatible: It should be either "xlnx,axi-dma-mm2s-channel" or
+ "xlnx,axi-dma-s2mm-channel".
+- interrupts: Should contain per channel DMA interrupts.
+- xlnx,datawidth: Should contain the stream data width, take values
+ {32,64...1024}.
+
+Option child node properties:
+- xlnx,include-dre: Tells whether hardware is configured for Data
+ Realignment Engine.
+
+Example:
+++++++++
+
+axi_dma_0: axidma@40400000 {
+ compatible = "xlnx,axi-dma-1.00.a";
+ #dma_cells = <1>;
+ reg = < 0x40400000 0x10000 >;
+ dma-channel@40400000 {
+ compatible = "xlnx,axi-dma-mm2s-channel";
+ interrupts = < 0 59 4 >;
+ xlnx,datawidth = <0x40>;
+ } ;
+ dma-channel@40400030 {
+ compatible = "xlnx,axi-dma-s2mm-channel";
+ interrupts = < 0 58 4 >;
+ xlnx,datawidth = <0x40>;
+ } ;
+} ;
+
+
+* DMA client
+
+Required properties:
+- dmas: a list of <[DMA device phandle] [Channel ID]> pairs,
+ where Channel ID is '0' for write/tx and '1' for read/rx
+ channel.
+- dma-names: a list of DMA channel names, one per "dmas" entry
+
+Example:
+++++++++
+
+dmatest_0: dmatest@0 {
+ compatible ="xlnx,axi-dma-test-1.00.a";
+ dmas = <&axi_dma_0 0
+ &axi_dma_0 1>;
+ dma-names = "dma0", "dma1";
+} ;
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
new file mode 100644
index 000000000..e4c4d47f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_vdma.txt
@@ -0,0 +1,75 @@
+Xilinx AXI VDMA engine, it does transfers between memory and video devices.
+It can be configured to have one channel or two channels. If configured
+as two channels, one is to transmit to the video device and another is
+to receive from the video device.
+
+Required properties:
+- compatible: Should be "xlnx,axi-vdma-1.00.a"
+- #dma-cells: Should be <1>, see "dmas" property below
+- reg: Should contain VDMA registers location and length.
+- xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
+- dma-channel child node: Should have at least one channel and can have up to
+ two channels per device. This node specifies the properties of each
+ DMA channel (see child node properties below).
+
+Optional properties:
+- xlnx,include-sg: Tells configured for Scatter-mode in
+ the hardware.
+- xlnx,flush-fsync: Tells which channel to Flush on Frame sync.
+ It takes following values:
+ {1}, flush both channels
+ {2}, flush mm2s channel
+ {3}, flush s2mm channel
+
+Required child node properties:
+- compatible: It should be either "xlnx,axi-vdma-mm2s-channel" or
+ "xlnx,axi-vdma-s2mm-channel".
+- interrupts: Should contain per channel VDMA interrupts.
+- xlnx,datawidth: Should contain the stream data width, take values
+ {32,64...1024}.
+
+Optional child node properties:
+- xlnx,include-dre: Tells hardware is configured for Data
+ Realignment Engine.
+- xlnx,genlock-mode: Tells Genlock synchronization is
+ enabled/disabled in hardware.
+
+Example:
+++++++++
+
+axi_vdma_0: axivdma@40030000 {
+ compatible = "xlnx,axi-vdma-1.00.a";
+ #dma_cells = <1>;
+ reg = < 0x40030000 0x10000 >;
+ xlnx,num-fstores = <0x8>;
+ xlnx,flush-fsync = <0x1>;
+ dma-channel@40030000 {
+ compatible = "xlnx,axi-vdma-mm2s-channel";
+ interrupts = < 0 54 4 >;
+ xlnx,datawidth = <0x40>;
+ } ;
+ dma-channel@40030030 {
+ compatible = "xlnx,axi-vdma-s2mm-channel";
+ interrupts = < 0 53 4 >;
+ xlnx,datawidth = <0x40>;
+ } ;
+} ;
+
+
+* DMA client
+
+Required properties:
+- dmas: a list of <[Video DMA device phandle] [Channel ID]> pairs,
+ where Channel ID is '0' for write/tx and '1' for read/rx
+ channel.
+- dma-names: a list of DMA channel names, one per "dmas" entry
+
+Example:
+++++++++
+
+vdmatest_0: vdmatest@0 {
+ compatible ="xlnx,axi-vdma-test-1.00.a";
+ dmas = <&axi_vdma_0 0
+ &axi_vdma_0 1>;
+ dma-names = "vdma0", "vdma1";
+} ;
diff --git a/Documentation/devicetree/bindings/drm/armada/marvell,dove-lcd.txt b/Documentation/devicetree/bindings/drm/armada/marvell,dove-lcd.txt
new file mode 100644
index 000000000..46525ea3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/armada/marvell,dove-lcd.txt
@@ -0,0 +1,30 @@
+Device Tree bindings for Armada DRM CRTC driver
+
+Required properties:
+ - compatible: value should be "marvell,dove-lcd".
+ - reg: base address and size of the LCD controller
+ - interrupts: single interrupt number for the LCD controller
+ - port: video output port with endpoints, as described by graph.txt
+
+Optional properties:
+
+ - clocks: as described by clock-bindings.txt
+ - clock-names: as described by clock-bindings.txt
+ "axiclk" - axi bus clock for pixel clock
+ "plldivider" - pll divider clock for pixel clock
+ "ext_ref_clk0" - external clock 0 for pixel clock
+ "ext_ref_clk1" - external clock 1 for pixel clock
+
+Note: all clocks are optional but at least one must be specified.
+Further clocks may be added in the future according to requirements of
+different SoCs.
+
+Example:
+
+ lcd0: lcd-controller@820000 {
+ compatible = "marvell,dove-lcd";
+ reg = <0x820000 0x1000>;
+ interrupts = <47>;
+ clocks = <&si5351 0>;
+ clock-names = "ext_ref_clk_1";
+ };
diff --git a/Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt b/Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt
new file mode 100644
index 000000000..ebc1a914b
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/atmel/hlcdc-dc.txt
@@ -0,0 +1,53 @@
+Device-Tree bindings for Atmel's HLCDC (High LCD Controller) DRM driver
+
+The Atmel HLCDC Display Controller is subdevice of the HLCDC MFD device.
+See ../mfd/atmel-hlcdc.txt for more details.
+
+Required properties:
+ - compatible: value should be "atmel,hlcdc-display-controller"
+ - pinctrl-names: the pin control state names. Should contain "default".
+ - pinctrl-0: should contain the default pinctrl states.
+ - #address-cells: should be set to 1.
+ - #size-cells: should be set to 0.
+
+Required children nodes:
+ Children nodes are encoding available output ports and their connections
+ to external devices using the OF graph reprensentation (see ../graph.txt).
+ At least one port node is required.
+
+Example:
+
+ hlcdc: hlcdc@f0030000 {
+ compatible = "atmel,sama5d3-hlcdc";
+ reg = <0xf0030000 0x2000>;
+ interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
+ clock-names = "periph_clk","sys_clk", "slow_clk";
+ status = "disabled";
+
+ hlcdc-display-controller {
+ compatible = "atmel,hlcdc-display-controller";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb888>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ hlcdc_panel_output: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&panel_input>;
+ };
+ };
+ };
+
+ hlcdc_pwm: hlcdc-pwm {
+ compatible = "atmel,hlcdc-pwm";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lcd_pwm>;
+ #pwm-cells = <3>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt b/Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt
new file mode 100644
index 000000000..a905c1413
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/bridge/dw_hdmi.txt
@@ -0,0 +1,50 @@
+DesignWare HDMI bridge bindings
+
+Required properties:
+- compatible: platform specific such as:
+ * "snps,dw-hdmi-tx"
+ * "fsl,imx6q-hdmi"
+ * "fsl,imx6dl-hdmi"
+ * "rockchip,rk3288-dw-hdmi"
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The HDMI interrupt number
+- clocks, clock-names : must have the phandles to the HDMI iahb and isfr clocks,
+ as described in Documentation/devicetree/bindings/clock/clock-bindings.txt,
+ the clocks are soc specific, the clock-names should be "iahb", "isfr"
+-port@[X]: SoC specific port nodes with endpoint definitions as defined
+ in Documentation/devicetree/bindings/media/video-interfaces.txt,
+ please refer to the SoC specific binding document:
+ * Documentation/devicetree/bindings/drm/imx/hdmi.txt
+ * Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt
+
+Optional properties
+- reg-io-width: the width of the reg:1,4, default set to 1 if not present
+- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+- clocks, clock-names: phandle to the HDMI CEC clock, name should be "cec"
+
+Example:
+ hdmi: hdmi@0120000 {
+ compatible = "fsl,imx6q-hdmi";
+ reg = <0x00120000 0x9000>;
+ interrupts = <0 115 0x04>;
+ gpr = <&gpr>;
+ clocks = <&clks 123>, <&clks 124>;
+ clock-names = "iahb", "isfr";
+ ddc-i2c-bus = <&i2c2>;
+
+ port@0 {
+ reg = <0>;
+
+ hdmi_mux_0: endpoint {
+ remote-endpoint = <&ipu1_di0_hdmi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ hdmi_mux_1: endpoint {
+ remote-endpoint = <&ipu1_di1_hdmi>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/drm/i2c/tda998x.txt b/Documentation/devicetree/bindings/drm/i2c/tda998x.txt
new file mode 100644
index 000000000..e9e4bce40
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/i2c/tda998x.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for the NXP TDA998x HDMI transmitter
+
+Required properties;
+ - compatible: must be "nxp,tda998x"
+
+ - reg: I2C address
+
+Optional properties:
+ - interrupts: interrupt number and trigger type
+ default: polling
+
+ - pinctrl-0: pin control group to be used for
+ screen plug/unplug interrupt.
+
+ - pinctrl-names: must contain a "default" entry.
+
+ - video-ports: 24 bits value which defines how the video controller
+ output is wired to the TDA998x input - default: <0x230145>
+
+Example:
+
+ tda998x: hdmi-encoder {
+ compatible = "nxp,tda998x";
+ reg = <0x70>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <27 2>; /* falling edge */
+ pinctrl-0 = <&pmx_camera>;
+ pinctrl-names = "default";
+ };
diff --git a/Documentation/devicetree/bindings/drm/imx/fsl-imx-drm.txt b/Documentation/devicetree/bindings/drm/imx/fsl-imx-drm.txt
new file mode 100644
index 000000000..e75f0e549
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/imx/fsl-imx-drm.txt
@@ -0,0 +1,83 @@
+Freescale i.MX DRM master device
+================================
+
+The freescale i.MX DRM master device is a virtual device needed to list all
+IPU or other display interface nodes that comprise the graphics subsystem.
+
+Required properties:
+- compatible: Should be "fsl,imx-display-subsystem"
+- ports: Should contain a list of phandles pointing to display interface ports
+ of IPU devices
+
+example:
+
+display-subsystem {
+ compatible = "fsl,display-subsystem";
+ ports = <&ipu_di0>;
+};
+
+
+Freescale i.MX IPUv3
+====================
+
+Required properties:
+- compatible: Should be "fsl,<chip>-ipu"
+- reg: should be register base and length as documented in the
+ datasheet
+- interrupts: Should contain sync interrupt and error interrupt,
+ in this order.
+- resets: phandle pointing to the system reset controller and
+ reset line index, see reset/fsl,imx-src.txt for details
+Optional properties:
+- port@[0-3]: Port nodes with endpoint definitions as defined in
+ Documentation/devicetree/bindings/media/video-interfaces.txt.
+ Ports 0 and 1 should correspond to CSI0 and CSI1,
+ ports 2 and 3 should correspond to DI0 and DI1, respectively.
+
+example:
+
+ipu: ipu@18000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx53-ipu";
+ reg = <0x18000000 0x080000000>;
+ interrupts = <11 10>;
+ resets = <&src 2>;
+
+ ipu_di0: port@2 {
+ reg = <2>;
+
+ ipu_di0_disp0: endpoint {
+ remote-endpoint = <&display_in>;
+ };
+ };
+};
+
+Parallel display support
+========================
+
+Required properties:
+- compatible: Should be "fsl,imx-parallel-display"
+Optional properties:
+- interface_pix_fmt: How this display is connected to the
+ display interface. Currently supported types: "rgb24", "rgb565", "bgr666"
+ and "lvds666".
+- edid: verbatim EDID data block describing attached display.
+- ddc: phandle describing the i2c bus handling the display data
+ channel
+- port: A port node with endpoint definitions as defined in
+ Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+example:
+
+display@di0 {
+ compatible = "fsl,imx-parallel-display";
+ edid = [edid-data];
+ interface-pix-fmt = "rgb24";
+
+ port {
+ display_in: endpoint {
+ remote-endpoint = <&ipu_di0_disp0>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/drm/imx/hdmi.txt b/Documentation/devicetree/bindings/drm/imx/hdmi.txt
new file mode 100644
index 000000000..1b756cf9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/imx/hdmi.txt
@@ -0,0 +1,58 @@
+Device-Tree bindings for HDMI Transmitter
+
+HDMI Transmitter
+================
+
+The HDMI Transmitter is a Synopsys DesignWare HDMI 1.4 TX controller IP
+with accompanying PHY IP.
+
+Required properties:
+ - #address-cells : should be <1>
+ - #size-cells : should be <0>
+ - compatible : should be "fsl,imx6q-hdmi" or "fsl,imx6dl-hdmi".
+ - gpr : should be <&gpr>.
+ The phandle points to the iomuxc-gpr region containing the HDMI
+ multiplexer control register.
+ - clocks, clock-names : phandles to the HDMI iahb and isrf clocks, as described
+ in Documentation/devicetree/bindings/clock/clock-bindings.txt and
+ Documentation/devicetree/bindings/clock/imx6q-clock.txt.
+ - port@[0-4]: Up to four port nodes with endpoint definitions as defined in
+ Documentation/devicetree/bindings/media/video-interfaces.txt,
+ corresponding to the four inputs to the HDMI multiplexer.
+
+Optional properties:
+ - ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+
+example:
+
+ gpr: iomuxc-gpr@020e0000 {
+ /* ... */
+ };
+
+ hdmi: hdmi@0120000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx6q-hdmi";
+ reg = <0x00120000 0x9000>;
+ interrupts = <0 115 0x04>;
+ gpr = <&gpr>;
+ clocks = <&clks 123>, <&clks 124>;
+ clock-names = "iahb", "isfr";
+ ddc-i2c-bus = <&i2c2>;
+
+ port@0 {
+ reg = <0>;
+
+ hdmi_mux_0: endpoint {
+ remote-endpoint = <&ipu1_di0_hdmi>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ hdmi_mux_1: endpoint {
+ remote-endpoint = <&ipu1_di1_hdmi>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/drm/imx/ldb.txt b/Documentation/devicetree/bindings/drm/imx/ldb.txt
new file mode 100644
index 000000000..9a2136643
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/imx/ldb.txt
@@ -0,0 +1,146 @@
+Device-Tree bindings for LVDS Display Bridge (ldb)
+
+LVDS Display Bridge
+===================
+
+The LVDS Display Bridge device tree node contains up to two lvds-channel
+nodes describing each of the two LVDS encoder channels of the bridge.
+
+Required properties:
+ - #address-cells : should be <1>
+ - #size-cells : should be <0>
+ - compatible : should be "fsl,imx53-ldb" or "fsl,imx6q-ldb".
+ Both LDB versions are similar, but i.MX6 has an additional
+ multiplexer in the front to select any of the four IPU display
+ interfaces as input for each LVDS channel.
+ - gpr : should be <&gpr> on i.MX53 and i.MX6q.
+ The phandle points to the iomuxc-gpr region containing the LVDS
+ control register.
+- clocks, clock-names : phandles to the LDB divider and selector clocks and to
+ the display interface selector clocks, as described in
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+ The following clocks are expected on i.MX53:
+ "di0_pll" - LDB LVDS channel 0 mux
+ "di1_pll" - LDB LVDS channel 1 mux
+ "di0" - LDB LVDS channel 0 gate
+ "di1" - LDB LVDS channel 1 gate
+ "di0_sel" - IPU1 DI0 mux
+ "di1_sel" - IPU1 DI1 mux
+ On i.MX6q the following additional clocks are needed:
+ "di2_sel" - IPU2 DI0 mux
+ "di3_sel" - IPU2 DI1 mux
+ The needed clock numbers for each are documented in
+ Documentation/devicetree/bindings/clock/imx5-clock.txt, and in
+ Documentation/devicetree/bindings/clock/imx6q-clock.txt.
+
+Optional properties:
+ - pinctrl-names : should be "default" on i.MX53, not used on i.MX6q
+ - pinctrl-0 : a phandle pointing to LVDS pin settings on i.MX53,
+ not used on i.MX6q
+ - fsl,dual-channel : boolean. if it exists, only LVDS channel 0 should
+ be configured - one input will be distributed on both outputs in dual
+ channel mode
+
+LVDS Channel
+============
+
+Each LVDS Channel has to contain either an of graph link to a panel device node
+or a display-timings node that describes the video timings for the connected
+LVDS display as well as the fsl,data-mapping and fsl,data-width properties.
+
+Required properties:
+ - reg : should be <0> or <1>
+ - port: Input and output port nodes with endpoint definitions as defined in
+ Documentation/devicetree/bindings/graph.txt.
+ On i.MX5, the internal two-input-multiplexer is used. Due to hardware
+ limitations, only one input port (port@[0,1]) can be used for each channel
+ (lvds-channel@[0,1], respectively).
+ On i.MX6, there should be four input ports (port@[0-3]) that correspond
+ to the four LVDS multiplexer inputs.
+ A single output port (port@2 on i.MX5, port@4 on i.MX6) must be connected
+ to a panel input port. Optionally, the output port can be left out if
+ display-timings are used instead.
+
+Optional properties (required if display-timings are used):
+ - display-timings : A node that describes the display timings as defined in
+ Documentation/devicetree/bindings/video/display-timing.txt.
+ - fsl,data-mapping : should be "spwg" or "jeida"
+ This describes how the color bits are laid out in the
+ serialized LVDS signal.
+ - fsl,data-width : should be <18> or <24>
+
+example:
+
+gpr: iomuxc-gpr@53fa8000 {
+ /* ... */
+};
+
+ldb: ldb@53fa8008 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx53-ldb";
+ gpr = <&gpr>;
+ clocks = <&clks IMX5_CLK_LDB_DI0_SEL>,
+ <&clks IMX5_CLK_LDB_DI1_SEL>,
+ <&clks IMX5_CLK_IPU_DI0_SEL>,
+ <&clks IMX5_CLK_IPU_DI1_SEL>,
+ <&clks IMX5_CLK_LDB_DI0_GATE>,
+ <&clks IMX5_CLK_LDB_DI1_GATE>;
+ clock-names = "di0_pll", "di1_pll",
+ "di0_sel", "di1_sel",
+ "di0", "di1";
+
+ /* Using an of-graph endpoint link to connect the panel */
+ lvds-channel@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ lvds0_in: endpoint {
+ remote-endpoint = <&ipu_di0_lvds0>;
+ };
+ };
+
+ port@2 {
+ reg = <2>;
+
+ lvds0_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+
+ /* Using display-timings and fsl,data-mapping/width instead */
+ lvds-channel@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>;
+ fsl,data-mapping = "spwg";
+ fsl,data-width = <24>;
+
+ display-timings {
+ /* ... */
+ };
+
+ port@1 {
+ reg = <1>;
+
+ lvds1_in: endpoint {
+ remote-endpoint = <&ipu_di1_lvds1>;
+ };
+ };
+ };
+};
+
+panel: lvds-panel {
+ /* ... */
+
+ port {
+ panel_in: endpoint {
+ remote-endpoint = <&lvds0_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/drm/msm/gpu.txt b/Documentation/devicetree/bindings/drm/msm/gpu.txt
new file mode 100644
index 000000000..67d0a58db
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/msm/gpu.txt
@@ -0,0 +1,52 @@
+Qualcomm adreno/snapdragon GPU
+
+Required properties:
+- compatible: "qcom,adreno-3xx"
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The interrupt signal from the gpu.
+- clocks: device clocks
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: the following clocks are required:
+ * "core_clk"
+ * "iface_clk"
+ * "mem_iface_clk"
+- qcom,chipid: gpu chip-id. Note this may become optional for future
+ devices if we can reliably read the chipid from hw
+- qcom,gpu-pwrlevels: list of operating points
+ - compatible: "qcom,gpu-pwrlevels"
+ - for each qcom,gpu-pwrlevel:
+ - qcom,gpu-freq: requested gpu clock speed
+ - NOTE: downstream android driver defines additional parameters to
+ configure memory bandwidth scaling per OPP.
+
+Example:
+
+/ {
+ ...
+
+ gpu: qcom,kgsl-3d0@4300000 {
+ compatible = "qcom,adreno-3xx";
+ reg = <0x04300000 0x20000>;
+ reg-names = "kgsl_3d0_reg_memory";
+ interrupts = <GIC_SPI 80 0>;
+ interrupt-names = "kgsl_3d0_irq";
+ clock-names =
+ "core_clk",
+ "iface_clk",
+ "mem_iface_clk";
+ clocks =
+ <&mmcc GFX3D_CLK>,
+ <&mmcc GFX3D_AHB_CLK>,
+ <&mmcc MMSS_IMEM_AHB_CLK>;
+ qcom,chipid = <0x03020100>;
+ qcom,gpu-pwrlevels {
+ compatible = "qcom,gpu-pwrlevels";
+ qcom,gpu-pwrlevel@0 {
+ qcom,gpu-freq = <450000000>;
+ };
+ qcom,gpu-pwrlevel@1 {
+ qcom,gpu-freq = <27000000>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/drm/msm/hdmi.txt b/Documentation/devicetree/bindings/drm/msm/hdmi.txt
new file mode 100644
index 000000000..a29a55f3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/msm/hdmi.txt
@@ -0,0 +1,48 @@
+Qualcomm adreno/snapdragon hdmi output
+
+Required properties:
+- compatible: one of the following
+ * "qcom,hdmi-tx-8084"
+ * "qcom,hdmi-tx-8074"
+ * "qcom,hdmi-tx-8660"
+ * "qcom,hdmi-tx-8960"
+- reg: Physical base address and length of the controller's registers
+- reg-names: "core_physical"
+- interrupts: The interrupt signal from the hdmi block.
+- clocks: device clocks
+ See ../clocks/clock-bindings.txt for details.
+- qcom,hdmi-tx-ddc-clk-gpio: ddc clk pin
+- qcom,hdmi-tx-ddc-data-gpio: ddc data pin
+- qcom,hdmi-tx-hpd-gpio: hpd pin
+- core-vdda-supply: phandle to supply regulator
+- hdmi-mux-supply: phandle to mux regulator
+
+Optional properties:
+- qcom,hdmi-tx-mux-en-gpio: hdmi mux enable pin
+- qcom,hdmi-tx-mux-sel-gpio: hdmi mux select pin
+
+Example:
+
+/ {
+ ...
+
+ hdmi: qcom,hdmi-tx-8960@4a00000 {
+ compatible = "qcom,hdmi-tx-8960";
+ reg-names = "core_physical";
+ reg = <0x04a00000 0x1000>;
+ interrupts = <GIC_SPI 79 0>;
+ clock-names =
+ "core_clk",
+ "master_iface_clk",
+ "slave_iface_clk";
+ clocks =
+ <&mmcc HDMI_APP_CLK>,
+ <&mmcc HDMI_M_AHB_CLK>,
+ <&mmcc HDMI_S_AHB_CLK>;
+ qcom,hdmi-tx-ddc-clk = <&msmgpio 70 GPIO_ACTIVE_HIGH>;
+ qcom,hdmi-tx-ddc-data = <&msmgpio 71 GPIO_ACTIVE_HIGH>;
+ qcom,hdmi-tx-hpd = <&msmgpio 72 GPIO_ACTIVE_HIGH>;
+ core-vdda-supply = <&pm8921_hdmi_mvs>;
+ hdmi-mux-supply = <&ext_3p3v>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/drm/msm/mdp.txt b/Documentation/devicetree/bindings/drm/msm/mdp.txt
new file mode 100644
index 000000000..1a0598e52
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/msm/mdp.txt
@@ -0,0 +1,48 @@
+Qualcomm adreno/snapdragon display controller
+
+Required properties:
+- compatible:
+ * "qcom,mdp" - mdp4
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The interrupt signal from the display controller.
+- connectors: array of phandles for output device(s)
+- clocks: device clocks
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: the following clocks are required:
+ * "core_clk"
+ * "iface_clk"
+ * "lut_clk"
+ * "src_clk"
+ * "hdmi_clk"
+ * "mpd_clk"
+
+Optional properties:
+- gpus: phandle for gpu device
+
+Example:
+
+/ {
+ ...
+
+ mdp: qcom,mdp@5100000 {
+ compatible = "qcom,mdp";
+ reg = <0x05100000 0xf0000>;
+ interrupts = <GIC_SPI 75 0>;
+ connectors = <&hdmi>;
+ gpus = <&gpu>;
+ clock-names =
+ "core_clk",
+ "iface_clk",
+ "lut_clk",
+ "src_clk",
+ "hdmi_clk",
+ "mdp_clk";
+ clocks =
+ <&mmcc MDP_SRC>,
+ <&mmcc MDP_AHB_CLK>,
+ <&mmcc MDP_LUT_CLK>,
+ <&mmcc TV_SRC>,
+ <&mmcc HDMI_TV_CLK>,
+ <&mmcc MDP_TV_CLK>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/drm/tilcdc/panel.txt b/Documentation/devicetree/bindings/drm/tilcdc/panel.txt
new file mode 100644
index 000000000..4ab9e2300
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/tilcdc/panel.txt
@@ -0,0 +1,66 @@
+Device-Tree bindings for tilcdc DRM generic panel output driver
+
+Required properties:
+ - compatible: value should be "ti,tilcdc,panel".
+ - panel-info: configuration info to configure LCDC correctly for the panel
+ - ac-bias: AC Bias Pin Frequency
+ - ac-bias-intrpt: AC Bias Pin Transitions per Interrupt
+ - dma-burst-sz: DMA burst size
+ - bpp: Bits per pixel
+ - fdd: FIFO DMA Request Delay
+ - sync-edge: Horizontal and Vertical Sync Edge: 0=rising 1=falling
+ - sync-ctrl: Horizontal and Vertical Sync: Control: 0=ignore
+ - raster-order: Raster Data Order Select: 1=Most-to-least 0=Least-to-most
+ - fifo-th: DMA FIFO threshold
+ - display-timings: typical videomode of lcd panel. Multiple video modes
+ can be listed if the panel supports multiple timings, but the 'native-mode'
+ should be the preferred/default resolution. Refer to
+ Documentation/devicetree/bindings/video/display-timing.txt for display
+ timing binding details.
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+- enable-gpios: GPIO pin to enable or disable the panel
+
+Recommended properties:
+ - pinctrl-names, pinctrl-0: the pincontrol settings to configure
+ muxing properly for pins that connect to TFP410 device
+
+Example:
+
+ /* Settings for CDTech_S035Q01 / LCD3 cape: */
+ lcd3 {
+ compatible = "ti,tilcdc,panel";
+ pinctrl-names = "default";
+ pinctrl-0 = <&bone_lcd3_cape_lcd_pins>;
+ backlight = <&backlight>;
+ enable-gpios = <&gpio3 19 0>;
+
+ panel-info {
+ ac-bias = <255>;
+ ac-bias-intrpt = <0>;
+ dma-burst-sz = <16>;
+ bpp = <16>;
+ fdd = <0x80>;
+ sync-edge = <0>;
+ sync-ctrl = <1>;
+ raster-order = <0>;
+ fifo-th = <0>;
+ };
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: 320x240 {
+ hactive = <320>;
+ vactive = <240>;
+ hback-porch = <21>;
+ hfront-porch = <58>;
+ hsync-len = <47>;
+ vback-porch = <11>;
+ vfront-porch = <23>;
+ vsync-len = <2>;
+ clock-frequency = <8000000>;
+ hsync-active = <0>;
+ vsync-active = <0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/drm/tilcdc/slave.txt b/Documentation/devicetree/bindings/drm/tilcdc/slave.txt
new file mode 100644
index 000000000..3d2c52460
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/tilcdc/slave.txt
@@ -0,0 +1,18 @@
+Device-Tree bindings for tilcdc DRM encoder slave output driver
+
+Required properties:
+ - compatible: value should be "ti,tilcdc,slave".
+ - i2c: the phandle for the i2c device the encoder slave is connected to
+
+Recommended properties:
+ - pinctrl-names, pinctrl-0: the pincontrol settings to configure
+ muxing properly for pins that connect to TFP410 device
+
+Example:
+
+ hdmi {
+ compatible = "ti,tilcdc,slave";
+ i2c = <&i2c0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&nxp_hdmi_bonelt_pins>;
+ };
diff --git a/Documentation/devicetree/bindings/drm/tilcdc/tfp410.txt b/Documentation/devicetree/bindings/drm/tilcdc/tfp410.txt
new file mode 100644
index 000000000..a58ae7756
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/tilcdc/tfp410.txt
@@ -0,0 +1,21 @@
+Device-Tree bindings for tilcdc DRM TFP410 output driver
+
+Required properties:
+ - compatible: value should be "ti,tilcdc,tfp410".
+ - i2c: the phandle for the i2c device to use for DDC
+
+Recommended properties:
+ - pinctrl-names, pinctrl-0: the pincontrol settings to configure
+ muxing properly for pins that connect to TFP410 device
+ - powerdn-gpio: the powerdown GPIO, pulled low to power down the
+ TFP410 device (for DPMS_OFF)
+
+Example:
+
+ dvicape {
+ compatible = "ti,tilcdc,tfp410";
+ i2c = <&i2c2>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&bone_dvi_cape_dvi_00A1_pins>;
+ powerdn-gpio = <&gpio2 31 0>;
+ };
diff --git a/Documentation/devicetree/bindings/drm/tilcdc/tilcdc.txt b/Documentation/devicetree/bindings/drm/tilcdc/tilcdc.txt
new file mode 100644
index 000000000..fff10da5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/drm/tilcdc/tilcdc.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for tilcdc DRM driver
+
+Required properties:
+ - compatible: value should be "ti,am33xx-tilcdc".
+ - interrupts: the interrupt number
+ - reg: base address and size of the LCDC device
+
+Recommended properties:
+ - interrupt-parent: the phandle for the interrupt controller that
+ services interrupts for this device.
+ - ti,hwmods: Name of the hwmod associated to the LCDC
+
+Optional properties:
+ - max-bandwidth: The maximum pixels per second that the memory
+ interface / lcd controller combination can sustain
+ - max-width: The maximum horizontal pixel width supported by
+ the lcd controller.
+ - max-pixelclock: The maximum pixel clock that can be supported
+ by the lcd controller in KHz.
+
+Example:
+
+ fb: fb@4830e000 {
+ compatible = "ti,am33xx-tilcdc";
+ reg = <0x4830e000 0x1000>;
+ interrupt-parent = <&intc>;
+ interrupts = <36>;
+ ti,hwmods = "lcdc";
+ };
diff --git a/Documentation/devicetree/bindings/eeprom.txt b/Documentation/devicetree/bindings/eeprom.txt
new file mode 100644
index 000000000..4342c10de
--- /dev/null
+++ b/Documentation/devicetree/bindings/eeprom.txt
@@ -0,0 +1,28 @@
+EEPROMs (I2C)
+
+Required properties:
+
+ - compatible : should be "<manufacturer>,<type>"
+ If there is no specific driver for <manufacturer>, a generic
+ driver based on <type> is selected. Possible types are:
+ 24c00, 24c01, 24c02, 24c04, 24c08, 24c16, 24c32, 24c64,
+ 24c128, 24c256, 24c512, 24c1024, spd
+
+ - reg : the I2C address of the EEPROM
+
+Optional properties:
+
+ - pagesize : the length of the pagesize for writing. Please consult the
+ manual of your device, that value varies a lot. A wrong value
+ may result in data loss! If not specified, a safety value of
+ '1' is used which will be very slow.
+
+ - read-only: this parameterless property disables writes to the eeprom
+
+Example:
+
+eeprom@52 {
+ compatible = "atmel,24c32";
+ reg = <0x52>;
+ pagesize = <32>;
+};
diff --git a/Documentation/devicetree/bindings/extcon/extcon-palmas.txt b/Documentation/devicetree/bindings/extcon/extcon-palmas.txt
new file mode 100644
index 000000000..45414bbcd
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/extcon-palmas.txt
@@ -0,0 +1,19 @@
+EXTCON FOR PALMAS/TWL CHIPS
+
+PALMAS USB COMPARATOR
+Required Properties:
+ - compatible: should contain one of:
+ * "ti,palmas-usb-vid".
+ * "ti,twl6035-usb-vid".
+ * "ti,palmas-usb" (DEPRECATED - use "ti,palmas-usb-vid").
+ * "ti,twl6035-usb" (DEPRECATED - use "ti,twl6035-usb-vid").
+
+Optional Properties:
+ - ti,wakeup : To enable the wakeup comparator in probe
+ - ti,enable-id-detection: Perform ID detection.
+ - ti,enable-vbus-detection: Perform VBUS detection.
+
+palmas-usb {
+ compatible = "ti,twl6035-usb", "ti,palmas-usb";
+ ti,wakeup;
+};
diff --git a/Documentation/devicetree/bindings/extcon/extcon-rt8973a.txt b/Documentation/devicetree/bindings/extcon/extcon-rt8973a.txt
new file mode 100644
index 000000000..6dede7d11
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/extcon-rt8973a.txt
@@ -0,0 +1,25 @@
+
+* Richtek RT8973A - Micro USB Switch device
+
+The Richtek RT8973A is Micro USB Switch with OVP and I2C interface. The RT8973A
+is a USB port accessory detector and switch that is optimized to protect low
+voltage system from abnormal high input voltage (up to 28V) and supports high
+speed USB operation. Also, RT8973A support 'auto-configuration' mode.
+If auto-configuration mode is enabled, RT8973A would control internal h/w patch
+for USB D-/D+ switching.
+
+Required properties:
+- compatible: Should be "richtek,rt8973a-muic"
+- reg: Specifies the I2C slave address of the MUIC block. It should be 0x14
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from rt8973a are delivered to.
+- interrupts: Interrupt specifiers for detection interrupt sources.
+
+Example:
+
+ rt8973a@14 {
+ compatible = "richtek,rt8973a-muic";
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 0>;
+ reg = <0x14>;
+ };
diff --git a/Documentation/devicetree/bindings/extcon/extcon-sm5502.txt b/Documentation/devicetree/bindings/extcon/extcon-sm5502.txt
new file mode 100644
index 000000000..4ecda2249
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/extcon-sm5502.txt
@@ -0,0 +1,23 @@
+
+* SM5502 MUIC (Micro-USB Interface Controller) device
+
+The Silicon Mitus SM5502 is a MUIC (Micro-USB Interface Controller) device
+which can detect the state of external accessory when external accessory is
+attached or detached and button is pressed or released. It is interfaced to
+the host controller using an I2C interface.
+
+Required properties:
+- compatible: Should be "siliconmitus,sm5502-muic"
+- reg: Specifies the I2C slave address of the MUIC block. It should be 0x25
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from sm5502 are delivered to.
+- interrupts: Interrupt specifiers for detection interrupt sources.
+
+Example:
+
+ sm5502@25 {
+ compatible = "siliconmitus,sm5502-muic";
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 0>;
+ reg = <0x25>;
+ };
diff --git a/Documentation/devicetree/bindings/extcon/extcon-usb-gpio.txt b/Documentation/devicetree/bindings/extcon/extcon-usb-gpio.txt
new file mode 100644
index 000000000..af0b903de
--- /dev/null
+++ b/Documentation/devicetree/bindings/extcon/extcon-usb-gpio.txt
@@ -0,0 +1,18 @@
+USB GPIO Extcon device
+
+This is a virtual device used to generate USB cable states from the USB ID pin
+connected to a GPIO pin.
+
+Required properties:
+- compatible: Should be "linux,extcon-usb-gpio"
+- id-gpio: gpio for USB ID pin. See gpio binding.
+
+Example: Examples of extcon-usb-gpio node in dra7-evm.dts as listed below:
+ extcon_usb1 {
+ compatible = "linux,extcon-usb-gpio";
+ id-gpio = <&gpio6 1 GPIO_ACTIVE_HIGH>;
+ }
+
+ &omap_dwc3_1 {
+ extcon = <&extcon_usb1>;
+ };
diff --git a/Documentation/devicetree/bindings/fb/mxsfb.txt b/Documentation/devicetree/bindings/fb/mxsfb.txt
new file mode 100644
index 000000000..96ec5179c
--- /dev/null
+++ b/Documentation/devicetree/bindings/fb/mxsfb.txt
@@ -0,0 +1,49 @@
+* Freescale MXS LCD Interface (LCDIF)
+
+Required properties:
+- compatible: Should be "fsl,<chip>-lcdif". Supported chips include
+ imx23 and imx28.
+- reg: Address and length of the register set for lcdif
+- interrupts: Should contain lcdif interrupts
+- display : phandle to display node (see below for details)
+
+* display node
+
+Required properties:
+- bits-per-pixel : <16> for RGB565, <32> for RGB888/666.
+- bus-width : number of data lines. Could be <8>, <16>, <18> or <24>.
+
+Required sub-node:
+- display-timings : Refer to binding doc display-timing.txt for details.
+
+Examples:
+
+lcdif@80030000 {
+ compatible = "fsl,imx28-lcdif";
+ reg = <0x80030000 2000>;
+ interrupts = <38 86>;
+
+ display: display {
+ bits-per-pixel = <32>;
+ bus-width = <24>;
+
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: timing0 {
+ clock-frequency = <33500000>;
+ hactive = <800>;
+ vactive = <480>;
+ hfront-porch = <164>;
+ hback-porch = <89>;
+ hsync-len = <10>;
+ vback-porch = <23>;
+ vfront-porch = <10>;
+ vsync-len = <10>;
+ hsync-active = <0>;
+ vsync-active = <0>;
+ de-active = <1>;
+ pixelclk-active = <0>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/fb/sm501fb.txt b/Documentation/devicetree/bindings/fb/sm501fb.txt
new file mode 100644
index 000000000..9d9f00980
--- /dev/null
+++ b/Documentation/devicetree/bindings/fb/sm501fb.txt
@@ -0,0 +1,34 @@
+* SM SM501
+
+The SM SM501 is a LCD controller, with proper hardware, it can also
+drive DVI monitors.
+
+Required properties:
+- compatible : should be "smi,sm501".
+- reg : contain two entries:
+ - First entry: System Configuration register
+ - Second entry: IO space (Display Controller register)
+- interrupts : SMI interrupt to the cpu should be described here.
+- interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Optional properties:
+- mode : select a video mode:
+ <xres>x<yres>[-<bpp>][@<refresh>]
+- edid : verbatim EDID data block describing attached display.
+ Data from the detailed timing descriptor will be used to
+ program the display controller.
+- little-endian: available on big endian systems, to
+ set different foreign endian.
+- big-endian: available on little endian systems, to
+ set different foreign endian.
+
+Example for MPC5200:
+ display@1,0 {
+ compatible = "smi,sm501";
+ reg = <1 0x00000000 0x00800000
+ 1 0x03e00000 0x00200000>;
+ interrupts = <1 1 3>;
+ mode = "640x480-32@60";
+ edid = [edid-data];
+ };
diff --git a/Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt b/Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt
new file mode 100644
index 000000000..9b027a615
--- /dev/null
+++ b/Documentation/devicetree/bindings/fpga/altera-socfpga-fpga-mgr.txt
@@ -0,0 +1,17 @@
+Altera SOCFPGA FPGA Manager
+
+Required properties:
+- compatible : should contain "altr,socfpga-fpga-mgr"
+- reg : base address and size for memory mapped io.
+ - The first index is for FPGA manager register access.
+ - The second index is for writing FPGA configuration data.
+- interrupts : interrupt for the FPGA Manager device.
+
+Example:
+
+ hps_0_fpgamgr: fpgamgr@0xff706000 {
+ compatible = "altr,socfpga-fpga-mgr";
+ reg = <0xFF706000 0x1000
+ 0xFFB90000 0x1000>;
+ interrupts = <0 175 4>;
+ };
diff --git a/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt b/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt
new file mode 100644
index 000000000..23e1d3194
--- /dev/null
+++ b/Documentation/devicetree/bindings/fuse/nvidia,tegra20-fuse.txt
@@ -0,0 +1,40 @@
+NVIDIA Tegra20/Tegra30/Tegr114/Tegra124 fuse block.
+
+Required properties:
+- compatible : For Tegra20, must contain "nvidia,tegra20-efuse". For Tegra30,
+ must contain "nvidia,tegra30-efuse". For Tegra114, must contain
+ "nvidia,tegra114-efuse". For Tegra124, must contain "nvidia,tegra124-efuse".
+ Otherwise, must contain "nvidia,<chip>-efuse", plus one of the above, where
+ <chip> is tegra132.
+ Details:
+ nvidia,tegra20-efuse: Tegra20 requires using APB DMA to read the fuse data
+ due to a hardware bug. Tegra20 also lacks certain information which is
+ available in later generations such as fab code, lot code, wafer id,..
+ nvidia,tegra30-efuse, nvidia,tegra114-efuse and nvidia,tegra124-efuse:
+ The differences between these SoCs are the size of the efuse array,
+ the location of the spare (OEM programmable) bits and the location of
+ the speedo data.
+- reg: Should contain 1 entry: the entry gives the physical address and length
+ of the fuse registers.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - fuse
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - fuse
+
+Example:
+
+ fuse@7000f800 {
+ compatible = "nvidia,tegra20-efuse";
+ reg = <0x7000F800 0x400>,
+ <0x70000000 0x400>;
+ clocks = <&tegra_car TEGRA20_CLK_FUSE>;
+ clock-names = "fuse";
+ resets = <&tegra_car 39>;
+ reset-names = "fuse";
+ };
+
+
diff --git a/Documentation/devicetree/bindings/gpio/8xxx_gpio.txt b/Documentation/devicetree/bindings/gpio/8xxx_gpio.txt
new file mode 100644
index 000000000..798cfc9d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/8xxx_gpio.txt
@@ -0,0 +1,74 @@
+GPIO controllers on MPC8xxx SoCs
+
+This is for the non-QE/CPM/GUTs GPIO controllers as found on
+8349, 8572, 8610 and compatible.
+
+Every GPIO controller node must have #gpio-cells property defined,
+this information will be used to translate gpio-specifiers.
+See bindings/gpio/gpio.txt for details of how to specify GPIO
+information for devices.
+
+The GPIO module usually is connected to the SoC's internal interrupt
+controller, see bindings/interrupt-controller/interrupts.txt (the
+interrupt client nodes section) for details how to specify this GPIO
+module's interrupt.
+
+The GPIO module may serve as another interrupt controller (cascaded to
+the SoC's internal interrupt controller). See the interrupt controller
+nodes section in bindings/interrupt-controller/interrupts.txt for
+details.
+
+Required properties:
+- compatible: "fsl,<chip>-gpio" followed by "fsl,mpc8349-gpio"
+ for 83xx, "fsl,mpc8572-gpio" for 85xx, or
+ "fsl,mpc8610-gpio" for 86xx.
+- #gpio-cells: Should be two. The first cell is the pin number
+ and the second cell is used to specify optional
+ parameters (currently unused).
+- interrupt-parent: Phandle for the interrupt controller that
+ services interrupts for this device.
+- interrupts: Interrupt mapping for GPIO IRQ.
+- gpio-controller: Marks the port as GPIO controller.
+
+Optional properties:
+- interrupt-controller: Empty boolean property which marks the GPIO
+ module as an IRQ controller.
+- #interrupt-cells: Should be two. Defines the number of integer
+ cells required to specify an interrupt within
+ this interrupt controller. The first cell
+ defines the pin number, the second cell
+ defines additional flags (trigger type,
+ trigger polarity). Note that the available
+ set of trigger conditions supported by the
+ GPIO module depends on the actual SoC.
+
+Example of gpio-controller nodes for a MPC8347 SoC:
+
+ gpio1: gpio-controller@c00 {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8347-gpio", "fsl,mpc8349-gpio";
+ reg = <0xc00 0x100>;
+ interrupt-parent = <&ipic>;
+ interrupts = <74 0x8>;
+ gpio-controller;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio2: gpio-controller@d00 {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8347-gpio", "fsl,mpc8349-gpio";
+ reg = <0xd00 0x100>;
+ interrupt-parent = <&ipic>;
+ interrupts = <75 0x8>;
+ gpio-controller;
+ };
+
+Example of a peripheral using the GPIO module as an IRQ controller:
+
+ funkyfpga@0 {
+ compatible = "funky-fpga";
+ ...
+ interrupt-parent = <&gpio1>;
+ interrupts = <4 3>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.txt b/Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.txt
new file mode 100644
index 000000000..00611acee
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/abilis,tb10x-gpio.txt
@@ -0,0 +1,36 @@
+* Abilis TB10x GPIO controller
+
+Required Properties:
+- compatible: Should be "abilis,tb10x-gpio"
+- reg: Address and length of the register set for the device
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Should be <2>. The first cell is the pin number and the
+ second cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted).
+- abilis,ngpio: the number of GPIO pins this driver controls.
+
+Optional Properties:
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be <1>. Interrupts are triggered on both edges.
+- interrupts: Defines the interrupt line connecting this GPIO controller to
+ its parent interrupt controller.
+- interrupt-parent: Defines the parent interrupt controller.
+
+GPIO ranges are specified as described in
+Documentation/devicetree/bindings/gpio/gpio.txt
+
+Example:
+
+ gpioa: gpio@FF140000 {
+ compatible = "abilis,tb10x-gpio";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&tb10x_ictl>;
+ interrupts = <27 2>;
+ reg = <0xFF140000 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ abilis,ngpio = <3>;
+ gpio-ranges = <&iomux 0 0 0>;
+ gpio-ranges-group-names = "gpioa_pins";
+ };
diff --git a/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt b/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
new file mode 100644
index 000000000..4a63bc96b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/brcm,kona-gpio.txt
@@ -0,0 +1,52 @@
+Broadcom Kona Family GPIO
+=========================
+
+This GPIO driver is used in the following Broadcom SoCs:
+ BCM11130, BCM11140, BCM11351, BCM28145, BCM28155
+
+The Broadcom GPIO Controller IP can be configured prior to synthesis to
+support up to 8 banks of 32 GPIOs where each bank has its own IRQ. The
+GPIO controller only supports edge, not level, triggering of interrupts.
+
+Required properties
+-------------------
+
+- compatible: "brcm,bcm11351-gpio", "brcm,kona-gpio"
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The interrupt outputs from the controller. There is one GPIO
+ interrupt per GPIO bank. The number of interrupts listed depends on the
+ number of GPIO banks on the SoC. The interrupts must be ordered by bank,
+ starting with bank 0. There is always a 1:1 mapping between banks and
+ IRQs.
+- #gpio-cells: Should be <2>. The first cell is the pin number, the second
+ cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted)
+ See also "gpio-specifier" in .../devicetree/bindings/gpio/gpio.txt.
+- #interrupt-cells: Should be <2>. The first cell is the GPIO number. The
+ second cell is used to specify flags. The following subset of flags is
+ supported:
+ - trigger type (bits[1:0]):
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 3 = low-to-high or high-to-low edge triggered
+ Valid values are 1, 2, 3
+ See also .../devicetree/bindings/interrupt-controller/interrupts.txt.
+- gpio-controller: Marks the device node as a GPIO controller.
+- interrupt-controller: Marks the device node as an interrupt controller.
+
+Example:
+ gpio: gpio@35003000 {
+ compatible = "brcm,bcm11351-gpio", "brcm,kona-gpio";
+ reg = <0x35003000 0x800>;
+ interrupts =
+ <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH
+ GIC_SPI 115 IRQ_TYPE_LEVEL_HIGH
+ GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH
+ GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH
+ GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH
+ GIC_SPI 111 IRQ_TYPE_LEVEL_HIGH>;
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/cavium-octeon-gpio.txt b/Documentation/devicetree/bindings/gpio/cavium-octeon-gpio.txt
new file mode 100644
index 000000000..9d6dcd3fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/cavium-octeon-gpio.txt
@@ -0,0 +1,49 @@
+* General Purpose Input Output (GPIO) bus.
+
+Properties:
+- compatible: "cavium,octeon-3860-gpio"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The base address of the GPIO unit's register bank.
+
+- gpio-controller: This is a GPIO controller.
+
+- #gpio-cells: Must be <2>. The first cell is the GPIO pin.
+
+- interrupt-controller: The GPIO controller is also an interrupt
+ controller, many of its pins may be configured as an interrupt
+ source.
+
+- #interrupt-cells: Must be <2>. The first cell is the GPIO pin
+ connected to the interrupt source. The second cell is the interrupt
+ triggering protocol and may have one of four values:
+ 1 - edge triggered on the rising edge.
+ 2 - edge triggered on the falling edge
+ 4 - level triggered active high.
+ 8 - level triggered active low.
+
+- interrupts: Interrupt routing for each pin.
+
+Example:
+
+ gpio-controller@1070000000800 {
+ #gpio-cells = <2>;
+ compatible = "cavium,octeon-3860-gpio";
+ reg = <0x10700 0x00000800 0x0 0x100>;
+ gpio-controller;
+ /* Interrupts are specified by two parts:
+ * 1) GPIO pin number (0..15)
+ * 2) Triggering (1 - edge rising
+ * 2 - edge falling
+ * 4 - level active high
+ * 8 - level active low)
+ */
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ /* The GPIO pin connect to 16 consecutive CUI bits */
+ interrupts = <0 16>, <0 17>, <0 18>, <0 19>,
+ <0 20>, <0 21>, <0 22>, <0 23>,
+ <0 24>, <0 25>, <0 26>, <0 27>,
+ <0 28>, <0 29>, <0 30>, <0 31>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt b/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt
new file mode 100644
index 000000000..94ae9f82d
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/cirrus,clps711x-mctrl-gpio.txt
@@ -0,0 +1,17 @@
+* ARM Cirrus Logic CLPS711X SYSFLG1 MCTRL GPIOs
+
+Required properties:
+- compatible: Should contain "cirrus,clps711x-mctrl-gpio".
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = Active high,
+ 1 = Active low.
+
+Example:
+ sysgpio: sysgpio {
+ compatible = "cirrus,ep7312-mctrl-gpio",
+ "cirrus,clps711x-mctrl-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt
new file mode 100644
index 000000000..dbd22e0df
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/fsl-imx-gpio.txt
@@ -0,0 +1,32 @@
+* Freescale i.MX/MXC GPIO controller
+
+Required properties:
+- compatible : Should be "fsl,<soc>-gpio"
+- reg : Address and length of the register set for the device
+- interrupts : Should be the port interrupt shared by all 32 pins, if
+ one number. If two numbers, the first one is the interrupt shared
+ by low 16 pins and the second one is for high 16 pins.
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells : Should be 2. The first cell is the GPIO number.
+ The second cell bits[3:0] is used to specify trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+
+Example:
+
+gpio0: gpio@73f84000 {
+ compatible = "fsl,imx51-gpio", "fsl,imx35-gpio";
+ reg = <0x73f84000 0x4000>;
+ interrupts = <50 51>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/fujitsu,mb86s70-gpio.txt b/Documentation/devicetree/bindings/gpio/fujitsu,mb86s70-gpio.txt
new file mode 100644
index 000000000..bef353f37
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/fujitsu,mb86s70-gpio.txt
@@ -0,0 +1,20 @@
+Fujitsu MB86S7x GPIO Controller
+-------------------------------
+
+Required properties:
+- compatible: Should be "fujitsu,mb86s70-gpio"
+- reg: Base address and length of register space
+- clocks: Specify the clock
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Should be <2>. The first cell is the pin number and the
+ second cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted).
+
+Examples:
+ gpio0: gpio@31000000 {
+ compatible = "fujitsu,mb86s70-gpio";
+ reg = <0 0x31000000 0x10000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ clocks = <&clk 0 2 1>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-74x164.txt b/Documentation/devicetree/bindings/gpio/gpio-74x164.txt
new file mode 100644
index 000000000..cc2608021
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-74x164.txt
@@ -0,0 +1,22 @@
+* Generic 8-bits shift register GPIO driver
+
+Required properties:
+- compatible : Should be "fairchild,74hc595"
+- reg : chip select number
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- registers-number: Number of daisy-chained shift registers
+
+Example:
+
+gpio5: gpio5@0 {
+ compatible = "fairchild,74hc595";
+ reg = <0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ registers-number = <4>;
+ spi-max-frequency = <100000>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-74xx-mmio.txt b/Documentation/devicetree/bindings/gpio/gpio-74xx-mmio.txt
new file mode 100644
index 000000000..7bb1a9d60
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-74xx-mmio.txt
@@ -0,0 +1,30 @@
+* 74XX MMIO GPIO driver
+
+Required properties:
+- compatible: Should contain one of the following:
+ "ti,741g125": for 741G125 (1-bit Input),
+ "ti,741g174": for 741G74 (1-bit Output),
+ "ti,742g125": for 742G125 (2-bit Input),
+ "ti,7474" : for 7474 (2-bit Output),
+ "ti,74125" : for 74125 (4-bit Input),
+ "ti,74175" : for 74175 (4-bit Output),
+ "ti,74365" : for 74365 (6-bit Input),
+ "ti,74174" : for 74174 (6-bit Output),
+ "ti,74244" : for 74244 (8-bit Input),
+ "ti,74273" : for 74273 (8-bit Output),
+ "ti,741624" : for 741624 (16-bit Input),
+ "ti,7416374": for 7416374 (16-bit Output).
+- reg: Physical base address and length where IC resides.
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Should be two. The first cell is the pin number and
+ the second cell is used to specify the GPIO polarity:
+ 0 = Active High,
+ 1 = Active Low.
+
+Example:
+ ctrl: gpio@30008004 {
+ compatible = "ti,74174";
+ reg = <0x30008004 0x1>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-adnp.txt b/Documentation/devicetree/bindings/gpio/gpio-adnp.txt
new file mode 100644
index 000000000..af66b2724
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-adnp.txt
@@ -0,0 +1,34 @@
+Avionic Design N-bit GPIO expander bindings
+
+Required properties:
+- compatible: should be "ad,gpio-adnp"
+- reg: The I2C slave address for this device.
+- interrupt-parent: phandle of the parent interrupt controller.
+- interrupts: Interrupt specifier for the controllers interrupt.
+- #gpio-cells: Should be 2. The first cell is the GPIO number and the
+ second cell is used to specify optional parameters:
+ - bit 0: polarity (0: normal, 1: inverted)
+- gpio-controller: Marks the device as a GPIO controller
+- nr-gpios: The number of pins supported by the controller.
+
+The GPIO expander can optionally be used as an interrupt controller, in
+which case it uses the default two cell specifier as described in
+Documentation/devicetree/bindings/interrupt-controller/interrupts.txt.
+
+Example:
+
+ gpioext: gpio-controller@41 {
+ compatible = "ad,gpio-adnp";
+ reg = <0x41>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <160 1>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ nr-gpios = <64>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-altera.txt b/Documentation/devicetree/bindings/gpio/gpio-altera.txt
new file mode 100644
index 000000000..12f50149e
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-altera.txt
@@ -0,0 +1,43 @@
+Altera GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "altr,pio-1.0"
+- reg: Physical base address and length of the controller's registers.
+- #gpio-cells : Should be 2
+ - The first cell is the gpio offset number.
+ - The second cell is reserved and is currently unused.
+- gpio-controller : Marks the device node as a GPIO controller.
+- interrupt-controller: Mark the device node as an interrupt controller
+- #interrupt-cells : Should be 1. The interrupt type is fixed in the hardware.
+ - The first cell is the GPIO offset number within the GPIO controller.
+- interrupts: Specify the interrupt.
+- altr,interrupt-trigger: Specifies the interrupt trigger type the GPIO
+ hardware is synthesized. This field is required if the Altera GPIO controller
+ used has IRQ enabled as the interrupt type is not software controlled,
+ but hardware synthesized. Required if GPIO is used as an interrupt
+ controller. The value is defined in <dt-bindings/interrupt-controller/irq.h>
+ Only the following flags are supported:
+ IRQ_TYPE_EDGE_RISING
+ IRQ_TYPE_EDGE_FALLING
+ IRQ_TYPE_EDGE_BOTH
+ IRQ_TYPE_LEVEL_HIGH
+
+Optional properties:
+- altr,ngpio: Width of the GPIO bank. This defines how many pins the
+ GPIO device has. Ranges between 1-32. Optional and defaults to 32 if not
+ specified.
+
+Example:
+
+gpio_altr: gpio@0xff200000 {
+ compatible = "altr,pio-1.0";
+ reg = <0xff200000 0x10>;
+ interrupts = <0 45 4>;
+ altr,ngpio = <32>;
+ altr,interrupt-trigger = <IRQ_TYPE_EDGE_RISING>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt b/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt
new file mode 100644
index 000000000..e0d0446a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-clps711x.txt
@@ -0,0 +1,28 @@
+Cirrus Logic CLPS711X GPIO controller
+
+Required properties:
+- compatible: Should be "cirrus,clps711x-gpio"
+- reg: Physical base GPIO controller registers location and length.
+ There should be two registers, first is DATA register, the second
+ is DIRECTION.
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+
+Note: Each GPIO port should have an alias correctly numbered in "aliases"
+node.
+
+Example:
+
+aliases {
+ gpio0 = &porta;
+};
+
+porta: gpio@80000000 {
+ compatible = "cirrus,clps711x-gpio";
+ reg = <0x80000000 0x1>, <0x80000040 0x1>;
+ gpio-controller;
+ #gpio-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.txt b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
new file mode 100644
index 000000000..5079ba7d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.txt
@@ -0,0 +1,62 @@
+Davinci/Keystone GPIO controller bindings
+
+Required Properties:
+- compatible: should be "ti,dm6441-gpio", "ti,keystone-gpio"
+
+- reg: Physical base address of the controller and the size of memory mapped
+ registers.
+
+- gpio-controller : Marks the device node as a gpio controller.
+
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify optional parameters (unused)
+
+- interrupt-parent: phandle of the parent interrupt controller.
+
+- interrupts: Array of GPIO interrupt number. Only banked or unbanked IRQs are
+ supported at a time.
+
+- ti,ngpio: The number of GPIO pins supported.
+
+- ti,davinci-gpio-unbanked: The number of GPIOs that have an individual interrupt
+ line to processor.
+
+The GPIO controller also acts as an interrupt controller. It uses the default
+two cells specifier as described in Documentation/devicetree/bindings/
+interrupt-controller/interrupts.txt.
+
+Example:
+
+gpio: gpio@1e26000 {
+ compatible = "ti,dm6441-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ reg = <0x226000 0x1000>;
+ interrupt-parent = <&intc>;
+ interrupts = <42 IRQ_TYPE_EDGE_BOTH 43 IRQ_TYPE_EDGE_BOTH
+ 44 IRQ_TYPE_EDGE_BOTH 45 IRQ_TYPE_EDGE_BOTH
+ 46 IRQ_TYPE_EDGE_BOTH 47 IRQ_TYPE_EDGE_BOTH
+ 48 IRQ_TYPE_EDGE_BOTH 49 IRQ_TYPE_EDGE_BOTH
+ 50 IRQ_TYPE_EDGE_BOTH>;
+ ti,ngpio = <144>;
+ ti,davinci-gpio-unbanked = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
+
+leds {
+ compatible = "gpio-leds";
+
+ led1 {
+ label = "davinci:green:usr1";
+ gpios = <&gpio 10 GPIO_ACTIVE_HIGH>;
+ ...
+ };
+
+ led2 {
+ label = "davinci:red:debug1";
+ gpios = <&gpio 11 GPIO_ACTIVE_HIGH>;
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-dsp-keystone.txt b/Documentation/devicetree/bindings/gpio/gpio-dsp-keystone.txt
new file mode 100644
index 000000000..6c7e6c730
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-dsp-keystone.txt
@@ -0,0 +1,39 @@
+Keystone 2 DSP GPIO controller bindings
+
+HOST OS userland running on ARM can send interrupts to DSP cores using
+the DSP GPIO controller IP. It provides 28 IRQ signals per each DSP core.
+This is one of the component used by the IPC mechanism used on Keystone SOCs.
+
+For example TCI6638K2K SoC has 8 DSP GPIO controllers:
+ - 8 for C66x CorePacx CPUs 0-7
+
+Keystone 2 DSP GPIO controller has specific features:
+- each GPIO can be configured only as output pin;
+- setting GPIO value to 1 causes IRQ generation on target DSP core;
+- reading pin value returns 0 - if IRQ was handled or 1 - IRQ is still
+ pending.
+
+Required Properties:
+- compatible: should be "ti,keystone-dsp-gpio"
+- ti,syscon-dev: phandle/offset pair. The phandle to syscon used to
+ access device state control registers and the offset of device's specific
+ registers within device state control registers range.
+- gpio-controller: Marks the device node as a gpio controller.
+- #gpio-cells: Should be 2.
+
+Please refer to gpio.txt in this directory for details of the common GPIO
+bindings used by client devices.
+
+Example:
+ dspgpio0: keystone_dsp_gpio@02620240 {
+ compatible = "ti,keystone-dsp-gpio";
+ ti,syscon-dev = <&devctrl 0x240>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ dsp0: dsp0 {
+ compatible = "linux,rproc-user";
+ ...
+ kick-gpio = <&dspgpio0 27>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-fan.txt b/Documentation/devicetree/bindings/gpio/gpio-fan.txt
new file mode 100644
index 000000000..439a7430f
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-fan.txt
@@ -0,0 +1,40 @@
+Bindings for fan connected to GPIO lines
+
+Required properties:
+- compatible : "gpio-fan"
+
+Optional properties:
+- gpios: Specifies the pins that map to bits in the control value,
+ ordered MSB-->LSB.
+- gpio-fan,speed-map: A mapping of possible fan RPM speeds and the
+ control value that should be set to achieve them. This array
+ must have the RPM values in ascending order.
+- alarm-gpios: This pin going active indicates something is wrong with
+ the fan, and a udev event will be fired.
+- cooling-cells: If used as a cooling device, must be <2>
+ Also see: Documentation/devicetree/bindings/thermal/thermal.txt
+ min and max states are derived from the speed-map of the fan.
+
+Note: At least one the "gpios" or "alarm-gpios" properties must be set.
+
+Examples:
+
+ gpio_fan {
+ compatible = "gpio-fan";
+ gpios = <&gpio1 14 1
+ &gpio1 13 1>;
+ gpio-fan,speed-map = <0 0
+ 3000 1
+ 6000 2>;
+ alarm-gpios = <&gpio1 15 1>;
+ };
+ gpio_fan_cool: gpio_fan {
+ compatible = "gpio-fan";
+ gpios = <&gpio2 14 1
+ &gpio2 13 1>;
+ gpio-fan,speed-map = <0 0>,
+ <3000 1>,
+ <6000 2>;
+ alarm-gpios = <&gpio2 15 1>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt b/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt
new file mode 100644
index 000000000..e46659810
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-grgpio.txt
@@ -0,0 +1,26 @@
+Aeroflex Gaisler GRGPIO General Purpose I/O cores.
+
+The GRGPIO GPIO core is available in the GRLIB VHDL IP core library.
+
+Note: In the ordinary environment for the GRGPIO core, a Leon SPARC system,
+these properties are built from information in the AMBA plug&play.
+
+Required properties:
+
+- name : Should be "GAISLER_GPIO" or "01_01a"
+
+- reg : Address and length of the register set for the device
+
+- interrupts : Interrupt numbers for this device
+
+Optional properties:
+
+- nbits : The number of gpio lines. If not present driver assumes 32 lines.
+
+- irqmap : An array with an index for each gpio line. An index is either a valid
+ index into the interrupts property array, or 0xffffffff that indicates
+ no irq for that line. Driver provides no interrupt support if not
+ present.
+
+For further information look in the documentation for the GLIB IP core library:
+http://www.gaisler.com/products/grlib/grip.pdf
diff --git a/Documentation/devicetree/bindings/gpio/gpio-lp3943.txt b/Documentation/devicetree/bindings/gpio/gpio-lp3943.txt
new file mode 100644
index 000000000..80fcb7d70
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-lp3943.txt
@@ -0,0 +1,37 @@
+TI/National Semiconductor LP3943 GPIO controller
+
+Required properties:
+ - compatible: "ti,lp3943-gpio"
+ - gpio-controller: Marks the device node as a GPIO controller.
+ - #gpio-cells: Should be 2. See gpio.txt in this directory for a
+ description of the cells format.
+
+Example:
+Simple LED controls with LP3943 GPIO controller
+
+&i2c4 {
+ lp3943@60 {
+ compatible = "ti,lp3943";
+ reg = <0x60>;
+
+ gpioex: gpio {
+ compatible = "ti,lp3943-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ };
+};
+
+leds {
+ compatible = "gpio-leds";
+ indicator1 {
+ label = "indi1";
+ gpios = <&gpioex 9 GPIO_ACTIVE_LOW>;
+ };
+
+ indicator2 {
+ label = "indi2";
+ gpios = <&gpioex 10 GPIO_ACTIVE_LOW>;
+ default-state = "off";
+ };
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-max732x.txt b/Documentation/devicetree/bindings/gpio/gpio-max732x.txt
new file mode 100644
index 000000000..5fdc843b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-max732x.txt
@@ -0,0 +1,59 @@
+* MAX732x-compatible I/O expanders
+
+Required properties:
+ - compatible: Should be one of the following:
+ - "maxim,max7319": For the Maxim MAX7319
+ - "maxim,max7320": For the Maxim MAX7320
+ - "maxim,max7321": For the Maxim MAX7321
+ - "maxim,max7322": For the Maxim MAX7322
+ - "maxim,max7323": For the Maxim MAX7323
+ - "maxim,max7324": For the Maxim MAX7324
+ - "maxim,max7325": For the Maxim MAX7325
+ - "maxim,max7326": For the Maxim MAX7326
+ - "maxim,max7327": For the Maxim MAX7327
+ - reg: I2C slave address for this device.
+ - gpio-controller: Marks the device node as a GPIO controller.
+ - #gpio-cells: Should be 2.
+ - first cell is the GPIO number
+ - second cell specifies GPIO flags, as defined in <dt-bindings/gpio/gpio.h>.
+ Only the GPIO_ACTIVE_HIGH and GPIO_ACTIVE_LOW flags are supported.
+
+Optional properties:
+
+ The I/O expander can detect input state changes, and thus optionally act as
+ an interrupt controller. When the expander interrupt line is connected all the
+ following properties must be set. For more information please see the
+ interrupt controller device tree bindings documentation available at
+ Documentation/devicetree/bindings/interrupt-controller/interrupts.txt.
+
+ - interrupt-controller: Identifies the node as an interrupt controller.
+ - #interrupt-cells: Number of cells to encode an interrupt source, shall be 2.
+ - first cell is the pin number
+ - second cell is used to specify flags
+ - interrupt-parent: phandle of the parent interrupt controller.
+ - interrupts: Interrupt specifier for the controllers interrupt.
+
+Please refer to gpio.txt in this directory for details of the common GPIO
+bindings used by client devices.
+
+Example 1. MAX7325 with interrupt support enabled (CONFIG_GPIO_MAX732X_IRQ=y):
+
+ expander: max7325@6d {
+ compatible = "maxim,max7325";
+ reg = <0x6d>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+ };
+
+Example 2. MAX7325 with interrupt support disabled (CONFIG_GPIO_MAX732X_IRQ=n):
+
+ expander: max7325@6d {
+ compatible = "maxim,max7325";
+ reg = <0x6d>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt b/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt
new file mode 100644
index 000000000..f3332b9a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-mcp23s08.txt
@@ -0,0 +1,85 @@
+Microchip MCP2308/MCP23S08/MCP23017/MCP23S17 driver for
+8-/16-bit I/O expander with serial interface (I2C/SPI)
+
+Required properties:
+- compatible : Should be
+ - "mcp,mcp23s08" (DEPRECATED) for 8 GPIO SPI version
+ - "mcp,mcp23s17" (DEPRECATED) for 16 GPIO SPI version
+ - "mcp,mcp23008" (DEPRECATED) for 8 GPIO I2C version or
+ - "mcp,mcp23017" (DEPRECATED) for 16 GPIO I2C version of the chip
+
+ - "microchip,mcp23s08" for 8 GPIO SPI version
+ - "microchip,mcp23s17" for 16 GPIO SPI version
+ - "microchip,mcp23008" for 8 GPIO I2C version or
+ - "microchip,mcp23017" for 16 GPIO I2C version of the chip
+ NOTE: Do not use the old mcp prefix any more. It is deprecated and will be
+ removed.
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify flags. Flags are currently unused.
+- gpio-controller : Marks the device node as a GPIO controller.
+- reg : For an address on its bus. I2C uses this a the I2C address of the chip.
+ SPI uses this to specify the chipselect line which the chip is
+ connected to. The driver and the SPI variant of the chip support
+ multiple chips on the same chipselect. Have a look at
+ microchip,spi-present-mask below.
+
+Required device specific properties (only for SPI chips):
+- mcp,spi-present-mask (DEPRECATED)
+- microchip,spi-present-mask : This is a present flag, that makes only sense for SPI
+ chips - as the name suggests. Multiple SPI chips can share the same
+ SPI chipselect. Set a bit in bit0-7 in this mask to 1 if there is a
+ chip connected with the corresponding spi address set. For example if
+ you have a chip with address 3 connected, you have to set bit3 to 1,
+ which is 0x08. mcp23s08 chip variant only supports bits 0-3. It is not
+ possible to mix mcp23s08 and mcp23s17 on the same chipselect. Set at
+ least one bit to 1 for SPI chips.
+ NOTE: Do not use the old mcp prefix any more. It is deprecated and will be
+ removed.
+- spi-max-frequency = The maximum frequency this chip is able to handle
+
+Optional properties:
+- #interrupt-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify flags.
+- interrupt-controller: Marks the device node as a interrupt controller.
+NOTE: The interrupt functionality is only supported for i2c versions of the
+chips. The spi chips can also do the interrupts, but this is not supported by
+the linux driver yet.
+
+Optional device specific properties:
+- microchip,irq-mirror: Sets the mirror flag in the IOCON register. Devices
+ with two interrupt outputs (these are the devices ending with 17 and
+ those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and
+ IO 8-15 are bank 2. These chips have two different interrupt outputs:
+ One for bank 1 and another for bank 2. If irq-mirror is set, both
+ interrupts are generated regardless of the bank that an input change
+ occurred on. If it is not set, the interrupt are only generated for the
+ bank they belong to.
+ On devices with only one interrupt output this property is useless.
+- microchip,irq-active-high: Sets the INTPOL flag in the IOCON register. This
+ configures the IRQ output polarity as active high.
+
+Example I2C (with interrupt):
+gpiom1: gpio@20 {
+ compatible = "microchip,mcp23017";
+ gpio-controller;
+ #gpio-cells = <2>;
+ reg = <0x20>;
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+ #interrupt-cells=<2>;
+ microchip,irq-mirror;
+};
+
+Example SPI:
+gpiom1: gpio@0 {
+ compatible = "microchip,mcp23s17";
+ gpio-controller;
+ #gpio-cells = <2>;
+ spi-present-mask = <0x01>;
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mm-lantiq.txt b/Documentation/devicetree/bindings/gpio/gpio-mm-lantiq.txt
new file mode 100644
index 000000000..f93d51478
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-mm-lantiq.txt
@@ -0,0 +1,38 @@
+Lantiq SoC External Bus memory mapped GPIO controller
+
+By attaching hardware latches to the EBU it is possible to create output
+only gpios. This driver configures a special memory address, which when
+written to outputs 16 bit to the latches.
+
+The node describing the memory mapped GPIOs needs to be a child of the node
+describing the "lantiq,localbus".
+
+Required properties:
+- compatible : Should be "lantiq,gpio-mm-lantiq"
+- reg : Address and length of the register set for the device
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify optional parameters (currently
+ unused).
+- gpio-controller : Marks the device node as a gpio controller.
+
+Optional properties:
+- lantiq,shadow : The default value that we shall assume as already set on the
+ shift register cascade.
+
+Example:
+
+localbus@0 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x0 0x3ffffff /* addrsel0 */
+ 1 0 0x4000000 0x4000010>; /* addsel1 */
+ compatible = "lantiq,localbus", "simple-bus";
+
+ gpio_mm0: gpio@4000000 {
+ compatible = "lantiq,gpio-mm";
+ reg = <1 0x0 0x10>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ lantiq,shadow = <0x77f>
+ };
+}
diff --git a/Documentation/devicetree/bindings/gpio/gpio-msm.txt b/Documentation/devicetree/bindings/gpio/gpio-msm.txt
new file mode 100644
index 000000000..ac20e68a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-msm.txt
@@ -0,0 +1,26 @@
+MSM GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "qcom,msm-gpio" for MSM controllers
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify optional parameters (unused)
+- gpio-controller : Marks the device node as a GPIO controller.
+- #interrupt-cells : Should be 2.
+- interrupt-controller: Mark the device node as an interrupt controller
+- interrupts : Specify the TLMM summary interrupt number
+- ngpio : Specify the number of MSM GPIOs
+
+Example:
+
+ msmgpio: gpio@fd510000 {
+ compatible = "qcom,msm-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0xfd510000 0x4000>;
+ interrupts = <0 208 0>;
+ ngpio = <150>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
new file mode 100644
index 000000000..a6f3bec1d
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-mvebu.txt
@@ -0,0 +1,53 @@
+* Marvell EBU GPIO controller
+
+Required properties:
+
+- compatible : Should be "marvell,orion-gpio", "marvell,mv78200-gpio"
+ or "marvell,armadaxp-gpio". "marvell,orion-gpio" should be used for
+ Orion, Kirkwood, Dove, Discovery (except MV78200) and Armada
+ 370. "marvell,mv78200-gpio" should be used for the Discovery
+ MV78200. "marvel,armadaxp-gpio" should be used for all Armada XP
+ SoCs (MV78230, MV78260, MV78460).
+
+- reg: Address and length of the register set for the device. Only one
+ entry is expected, except for the "marvell,armadaxp-gpio" variant
+ for which two entries are expected: one for the general registers,
+ one for the per-cpu registers.
+
+- interrupts: The list of interrupts that are used for all the pins
+ managed by this GPIO bank. There can be more than one interrupt
+ (example: 1 interrupt per 8 pins on Armada XP, which means 4
+ interrupts per bank of 32 GPIOs).
+
+- interrupt-controller: identifies the node as an interrupt controller
+
+- #interrupt-cells: specifies the number of cells needed to encode an
+ interrupt source. Should be two.
+ The first cell is the GPIO number.
+ The second cell is used to specify flags:
+ bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+
+- gpio-controller: marks the device node as a gpio controller
+
+- ngpios: number of GPIOs this controller has
+
+- #gpio-cells: Should be two. The first cell is the pin number. The
+ second cell is reserved for flags, unused at the moment.
+
+Example:
+
+ gpio0: gpio@d0018100 {
+ compatible = "marvell,armadaxp-gpio";
+ reg = <0xd0018100 0x40>,
+ <0xd0018800 0x30>;
+ ngpios = <32>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <16>, <17>, <18>, <19>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-mxs.txt b/Documentation/devicetree/bindings/gpio/gpio-mxs.txt
new file mode 100644
index 000000000..1e677a47b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-mxs.txt
@@ -0,0 +1,88 @@
+* Freescale MXS GPIO controller
+
+The Freescale MXS GPIO controller is part of MXS PIN controller. The
+GPIOs are organized in port/bank. Each port consists of 32 GPIOs.
+
+As the GPIO controller is embedded in the PIN controller and all the
+GPIO ports share the same IO space with PIN controller, the GPIO node
+will be represented as sub-nodes of MXS pinctrl node.
+
+Required properties for GPIO node:
+- compatible : Should be "fsl,<soc>-gpio". The supported SoCs include
+ imx23 and imx28.
+- interrupts : Should be the port interrupt shared by all 32 pins.
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells : Should be 2. The first cell is the GPIO number.
+ The second cell bits[3:0] is used to specify trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+
+Note: Each GPIO port should have an alias correctly numbered in "aliases"
+node.
+
+Examples:
+
+aliases {
+ gpio0 = &gpio0;
+ gpio1 = &gpio1;
+ gpio2 = &gpio2;
+ gpio3 = &gpio3;
+ gpio4 = &gpio4;
+};
+
+pinctrl@80018000 {
+ compatible = "fsl,imx28-pinctrl", "simple-bus";
+ reg = <0x80018000 2000>;
+
+ gpio0: gpio@0 {
+ compatible = "fsl,imx28-gpio";
+ interrupts = <127>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio1: gpio@1 {
+ compatible = "fsl,imx28-gpio";
+ interrupts = <126>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio2: gpio@2 {
+ compatible = "fsl,imx28-gpio";
+ interrupts = <125>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio3: gpio@3 {
+ compatible = "fsl,imx28-gpio";
+ interrupts = <124>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio4: gpio@4 {
+ compatible = "fsl,imx28-gpio";
+ interrupts = <123>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-nmk.txt b/Documentation/devicetree/bindings/gpio/gpio-nmk.txt
new file mode 100644
index 000000000..8315ac778
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-nmk.txt
@@ -0,0 +1,31 @@
+Nomadik GPIO controller
+
+Required properties:
+- compatible : Should be "st,nomadik-gpio".
+- reg : Physical base address and length of the controller's registers.
+- interrupts : The interrupt outputs from the controller.
+- #gpio-cells : Should be two:
+ The first cell is the pin number.
+ The second cell is used to specify optional parameters:
+ - bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+- gpio-controller : Marks the device node as a GPIO controller.
+- interrupt-controller : Marks the device node as an interrupt controller.
+- gpio-bank : Specifies which bank a controller owns.
+- st,supports-sleepmode : Specifies whether controller can sleep or not
+
+Example:
+
+ gpio1: gpio@8012e080 {
+ compatible = "st,nomadik-gpio";
+ reg = <0x8012e080 0x80>;
+ interrupts = <0 120 0x4>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupt-controller;
+ st,supports-sleepmode;
+ gpio-bank = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-omap.txt b/Documentation/devicetree/bindings/gpio/gpio-omap.txt
new file mode 100644
index 000000000..8d950522e
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-omap.txt
@@ -0,0 +1,39 @@
+OMAP GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "ti,omap2-gpio" for OMAP2 controllers
+ - "ti,omap3-gpio" for OMAP3 controllers
+ - "ti,omap4-gpio" for OMAP4 controllers
+- gpio-controller : Marks the device node as a GPIO controller.
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify optional parameters (unused)
+- interrupt-controller: Mark the device node as an interrupt controller.
+- #interrupt-cells : Should be 2.
+ The first cell is the GPIO number.
+ The second cell is used to specify flags:
+ bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+
+OMAP specific properties:
+- ti,hwmods: Name of the hwmod associated to the GPIO:
+ "gpio<X>", <X> being the 1-based instance number
+ from the HW spec.
+- ti,gpio-always-on: Indicates if a GPIO bank is always powered and
+ so will never lose its logic state.
+
+
+Example:
+
+gpio4: gpio4 {
+ compatible = "ti,omap4-gpio";
+ ti,hwmods = "gpio4";
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-palmas.txt b/Documentation/devicetree/bindings/gpio/gpio-palmas.txt
new file mode 100644
index 000000000..08b5b52a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-palmas.txt
@@ -0,0 +1,27 @@
+Palmas GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "ti,palams-gpio" for palma series of the GPIO controller
+ - "ti,tps80036-gpio" for Palma series device TPS80036.
+ - "ti,tps65913-gpio" for palma series device TPS65913.
+ - "ti,tps65914-gpio" for palma series device TPS65914.
+- #gpio-cells : Should be two.
+ - first cell is the gpio pin number
+ - second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- gpio-controller : Marks the device node as a GPIO controller.
+
+Note: This gpio node will be sub node of palmas node.
+
+Example:
+ palmas: tps65913@58 {
+ :::::::::::
+ palmas_gpio: palmas_gpio {
+ compatible = "ti,palmas-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ :::::::::::
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
new file mode 100644
index 000000000..b9a42f294
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca953x.txt
@@ -0,0 +1,39 @@
+* NXP PCA953x I2C GPIO multiplexer
+
+Required properties:
+ - compatible: Has to contain one of the following:
+ nxp,pca9505
+ nxp,pca9534
+ nxp,pca9535
+ nxp,pca9536
+ nxp,pca9537
+ nxp,pca9538
+ nxp,pca9539
+ nxp,pca9554
+ nxp,pca9555
+ nxp,pca9556
+ nxp,pca9557
+ nxp,pca9574
+ nxp,pca9575
+ nxp,pca9698
+ maxim,max7310
+ maxim,max7312
+ maxim,max7313
+ maxim,max7315
+ ti,pca6107
+ ti,tca6408
+ ti,tca6416
+ ti,tca6424
+ exar,xra1202
+
+Example:
+
+
+ gpio@20 {
+ compatible = "nxp,pca9505";
+ reg = <0x20>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pca9505>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <23 IRQ_TYPE_LEVEL_LOW>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt b/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt
new file mode 100644
index 000000000..ada4e2973
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-pcf857x.txt
@@ -0,0 +1,71 @@
+* PCF857x-compatible I/O expanders
+
+The PCF857x-compatible chips have "quasi-bidirectional" I/O lines that can be
+driven high by a pull-up current source or driven low to ground. This combines
+the direction and output level into a single bit per line, which can't be read
+back. We can't actually know at initialization time whether a line is configured
+(a) as output and driving the signal low/high, or (b) as input and reporting a
+low/high value, without knowing the last value written since the chip came out
+of reset (if any). The only reliable solution for setting up line direction is
+thus to do it explicitly.
+
+Required Properties:
+
+ - compatible: should be one of the following.
+ - "maxim,max7328": For the Maxim MAX7378
+ - "maxim,max7329": For the Maxim MAX7329
+ - "nxp,pca8574": For the NXP PCA8574
+ - "nxp,pca8575": For the NXP PCA8575
+ - "nxp,pca9670": For the NXP PCA9670
+ - "nxp,pca9671": For the NXP PCA9671
+ - "nxp,pca9672": For the NXP PCA9672
+ - "nxp,pca9673": For the NXP PCA9673
+ - "nxp,pca9674": For the NXP PCA9674
+ - "nxp,pca9675": For the NXP PCA9675
+ - "nxp,pcf8574": For the NXP PCF8574
+ - "nxp,pcf8574a": For the NXP PCF8574A
+ - "nxp,pcf8575": For the NXP PCF8575
+ - "ti,tca9554": For the TI TCA9554
+
+ - reg: I2C slave address.
+
+ - gpio-controller: Marks the device node as a gpio controller.
+ - #gpio-cells: Should be 2. The first cell is the GPIO number and the second
+ cell specifies GPIO flags, as defined in <dt-bindings/gpio/gpio.h>. Only the
+ GPIO_ACTIVE_HIGH and GPIO_ACTIVE_LOW flags are supported.
+
+Optional Properties:
+
+ - lines-initial-states: Bitmask that specifies the initial state of each
+ line. When a bit is set to zero, the corresponding line will be initialized to
+ the input (pulled-up) state. When the bit is set to one, the line will be
+ initialized the low-level output state. If the property is not specified
+ all lines will be initialized to the input state.
+
+ The I/O expander can detect input state changes, and thus optionally act as
+ an interrupt controller. When the expander interrupt line is connected all the
+ following properties must be set. For more information please see the
+ interrupt controller device tree bindings documentation available at
+ Documentation/devicetree/bindings/interrupt-controller/interrupts.txt.
+
+ - interrupt-controller: Identifies the node as an interrupt controller.
+ - #interrupt-cells: Number of cells to encode an interrupt source, shall be 2.
+ - interrupt-parent: phandle of the parent interrupt controller.
+ - interrupts: Interrupt specifier for the controllers interrupt.
+
+
+Please refer to gpio.txt in this directory for details of the common GPIO
+bindings used by client devices.
+
+Example: PCF8575 I/O expander node
+
+ pcf8575: gpio@20 {
+ compatible = "nxp,pcf8575";
+ reg = <0x20>;
+ interrupt-parent = <&irqpin2>;
+ interrupts = <3 0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-poweroff.txt b/Documentation/devicetree/bindings/gpio/gpio-poweroff.txt
new file mode 100644
index 000000000..d4eab9227
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-poweroff.txt
@@ -0,0 +1,36 @@
+Driver a GPIO line that can be used to turn the power off.
+
+The driver supports both level triggered and edge triggered power off.
+At driver load time, the driver will request the given gpio line and
+install a pm_power_off handler. If the optional properties 'input' is
+not found, the GPIO line will be driven in the inactive
+state. Otherwise its configured as an input.
+
+When the pm_power_off is called, the gpio is configured as an output,
+and drive active, so triggering a level triggered power off
+condition. This will also cause an inactive->active edge condition, so
+triggering positive edge triggered power off. After a delay of 100ms,
+the GPIO is set to inactive, thus causing an active->inactive edge,
+triggering negative edge triggered power off. After another 100ms
+delay the GPIO is driver active again. If the power is still on and
+the CPU still running after a 3000ms delay, a WARN_ON(1) is emitted.
+
+Required properties:
+- compatible : should be "gpio-poweroff".
+- gpios : The GPIO to set high/low, see "gpios property" in
+ Documentation/devicetree/bindings/gpio/gpio.txt. If the pin should be
+ low to power down the board set it to "Active Low", otherwise set
+ gpio to "Active High".
+
+Optional properties:
+- input : Initially configure the GPIO line as an input. Only reconfigure
+ it to an output when the pm_power_off function is called. If this optional
+ property is not specified, the GPIO is initialized as an output in its
+ inactive state.
+
+Examples:
+
+gpio-poweroff {
+ compatible = "gpio-poweroff";
+ gpios = <&gpio 4 0>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-restart.txt b/Documentation/devicetree/bindings/gpio/gpio-restart.txt
new file mode 100644
index 000000000..af3701bc1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-restart.txt
@@ -0,0 +1,54 @@
+Drive a GPIO line that can be used to restart the system from a restart
+handler.
+
+This binding supports level and edge triggered reset. At driver load
+time, the driver will request the given gpio line and install a restart
+handler. If the optional properties 'open-source' is not found, the GPIO line
+will be driven in the inactive state. Otherwise its not driven until
+the restart is initiated.
+
+When the system is restarted, the restart handler will be invoked in
+priority order. The gpio is configured as an output, and driven active,
+triggering a level triggered reset condition. This will also cause an
+inactive->active edge condition, triggering positive edge triggered
+reset. After a delay specified by active-delay, the GPIO is set to
+inactive, thus causing an active->inactive edge, triggering negative edge
+triggered reset. After a delay specified by inactive-delay, the GPIO
+is driven active again. After a delay specified by wait-delay, the
+restart handler completes allowing other restart handlers to be attempted.
+
+Required properties:
+- compatible : should be "gpio-restart".
+- gpios : The GPIO to set high/low, see "gpios property" in
+ Documentation/devicetree/bindings/gpio/gpio.txt. If the pin should be
+ low to reset the board set it to "Active Low", otherwise set
+ gpio to "Active High".
+
+Optional properties:
+- open-source : Treat the GPIO as being open source and defer driving
+ it to when the restart is initiated. If this optional property is not
+ specified, the GPIO is initialized as an output in its inactive state.
+- priority : A priority ranging from 0 to 255 (default 128) according to
+ the following guidelines:
+ 0: Restart handler of last resort, with limited restart
+ capabilities
+ 128: Default restart handler; use if no other restart handler is
+ expected to be available, and/or if restart functionality is
+ sufficient to restart the entire system
+ 255: Highest priority restart handler, will preempt all other
+ restart handlers
+- active-delay: Delay (default 100) to wait after driving gpio active [ms]
+- inactive-delay: Delay (default 100) to wait after driving gpio inactive [ms]
+- wait-delay: Delay (default 3000) to wait after completing restart
+ sequence [ms]
+
+Examples:
+
+gpio-restart {
+ compatible = "gpio-restart";
+ gpios = <&gpio 4 0>;
+ priority = <128>;
+ active-delay = <100>;
+ inactive-delay = <100>;
+ wait-delay = <3000>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-samsung.txt b/Documentation/devicetree/bindings/gpio/gpio-samsung.txt
new file mode 100644
index 000000000..5375625e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-samsung.txt
@@ -0,0 +1,41 @@
+Samsung Exynos4 GPIO Controller
+
+Required properties:
+- compatible: Compatible property value should be "samsung,exynos4-gpio>".
+
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+
+- #gpio-cells: Should be 4. The syntax of the gpio specifier used by client nodes
+ should be the following with values derived from the SoC user manual.
+ <[phandle of the gpio controller node]
+ [pin number within the gpio controller]
+ [mux function]
+ [flags and pull up/down]
+ [drive strength]>
+
+ Values for gpio specifier:
+ - Pin number: is a value between 0 to 7.
+ - Flags and Pull Up/Down: 0 - Pull Up/Down Disabled.
+ 1 - Pull Down Enabled.
+ 3 - Pull Up Enabled.
+ Bit 16 (0x00010000) - Input is active low.
+ - Drive Strength: 0 - 1x,
+ 1 - 3x,
+ 2 - 2x,
+ 3 - 4x
+
+- gpio-controller: Specifies that the node is a gpio controller.
+- #address-cells: should be 1.
+- #size-cells: should be 1.
+
+Example:
+
+ gpa0: gpio-controller@11400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "samsung,exynos4-gpio";
+ reg = <0x11400000 0x20>;
+ #gpio-cells = <4>;
+ gpio-controller;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-stericsson-coh901.txt b/Documentation/devicetree/bindings/gpio/gpio-stericsson-coh901.txt
new file mode 100644
index 000000000..fd665b44d
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-stericsson-coh901.txt
@@ -0,0 +1,7 @@
+ST-Ericsson COH 901 571/3 GPIO controller
+
+Required properties:
+- compatible: Compatible property value should be "stericsson,gpio-coh901"
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+- interrupts: the 0...n interrupts assigned to the different GPIO ports/banks.
diff --git a/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt b/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt
new file mode 100644
index 000000000..a0e4cf885
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-stmpe.txt
@@ -0,0 +1,18 @@
+STMPE gpio
+----------
+
+Required properties:
+ - compatible: "st,stmpe-gpio"
+
+Optional properties:
+ - st,norequest-mask: bitmask specifying which GPIOs should _not_ be requestable
+ due to different usage (e.g. touch, keypad)
+
+Node name must be stmpe_gpio and should be child node of stmpe node to which it
+belongs.
+
+Example:
+ stmpe_gpio {
+ compatible = "st,stmpe-gpio";
+ st,norequest-mask = <0x20>; //gpio 5 can't be used
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-stp-xway.txt b/Documentation/devicetree/bindings/gpio/gpio-stp-xway.txt
new file mode 100644
index 000000000..854de130a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-stp-xway.txt
@@ -0,0 +1,42 @@
+Lantiq SoC Serial To Parallel (STP) GPIO controller
+
+The Serial To Parallel (STP) is found on MIPS based Lantiq socs. It is a
+peripheral controller used to drive external shift register cascades. At most
+3 groups of 8 bits can be driven. The hardware is able to allow the DSL modem
+to drive the 2 LSBs of the cascade automatically.
+
+
+Required properties:
+- compatible : Should be "lantiq,gpio-stp-xway"
+- reg : Address and length of the register set for the device
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify optional parameters (currently
+ unused).
+- gpio-controller : Marks the device node as a gpio controller.
+
+Optional properties:
+- lantiq,shadow : The default value that we shall assume as already set on the
+ shift register cascade.
+- lantiq,groups : Set the 3 bit mask to select which of the 3 groups are enabled
+ in the shift register cascade.
+- lantiq,dsl : The dsl core can control the 2 LSBs of the gpio cascade. This 2 bit
+ property can enable this feature.
+- lantiq,phy1 : The gphy1 core can control 3 bits of the gpio cascade.
+- lantiq,phy2 : The gphy2 core can control 3 bits of the gpio cascade.
+- lantiq,rising : use rising instead of falling edge for the shift register
+
+Example:
+
+gpio1: stp@E100BB0 {
+ compatible = "lantiq,gpio-stp-xway";
+ reg = <0xE100BB0 0x40>;
+ #gpio-cells = <2>;
+ gpio-controller;
+
+ lantiq,shadow = <0xffff>;
+ lantiq,groups = <0x7>;
+ lantiq,dsl = <0x3>;
+ lantiq,phy1 = <0x7>;
+ lantiq,phy2 = <0x7>;
+ /* lantiq,rising; */
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-sx150x.txt b/Documentation/devicetree/bindings/gpio/gpio-sx150x.txt
new file mode 100644
index 000000000..ba2bb84ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-sx150x.txt
@@ -0,0 +1,40 @@
+SEMTECH SX150x GPIO expander bindings
+
+
+Required properties:
+
+- compatible: should be "semtech,sx1506q",
+ "semtech,sx1508q",
+ "semtech,sx1509q".
+
+- reg: The I2C slave address for this device.
+
+- interrupt-parent: phandle of the parent interrupt controller.
+
+- interrupts: Interrupt specifier for the controllers interrupt.
+
+- #gpio-cells: Should be 2. The first cell is the GPIO number and the
+ second cell is used to specify optional parameters:
+ bit 0: polarity (0: normal, 1: inverted)
+
+- gpio-controller: Marks the device as a GPIO controller.
+
+- interrupt-controller: Marks the device as a interrupt controller.
+
+The GPIO expander can optionally be used as an interrupt controller, in
+which case it uses the default two cell specifier as described in
+Documentation/devicetree/bindings/interrupt-controller/interrupts.txt.
+
+Example:
+
+ i2c_gpio_expander@20{
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ compatible = "semtech,sx1506q";
+ reg = <0x20>;
+ interrupt-parent = <&gpio_1>;
+ interrupts = <16 0>;
+
+ gpio-controller;
+ interrupt-controller;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-twl4030.txt b/Documentation/devicetree/bindings/gpio/gpio-twl4030.txt
new file mode 100644
index 000000000..66788fda1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-twl4030.txt
@@ -0,0 +1,29 @@
+twl4030 GPIO controller bindings
+
+Required properties:
+- compatible:
+ - "ti,twl4030-gpio" for twl4030 GPIO controller
+- #gpio-cells : Should be two.
+ - first cell is the pin number
+ - second cell is used to specify optional parameters (unused)
+- gpio-controller : Marks the device node as a GPIO controller.
+- #interrupt-cells : Should be 2.
+- interrupt-controller: Mark the device node as an interrupt controller
+ The first cell is the GPIO number.
+ The second cell is not used.
+- ti,use-leds : Enables LEDA and LEDB outputs if set
+- ti,debounce : if n-th bit is set, debounces GPIO-n
+- ti,mmc-cd : if n-th bit is set, GPIO-n controls VMMC(n+1)
+- ti,pullups : if n-th bit is set, set a pullup on GPIO-n
+- ti,pulldowns : if n-th bit is set, set a pulldown on GPIO-n
+
+Example:
+
+twl_gpio: gpio {
+ compatible = "ti,twl4030-gpio";
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ ti,use-leds;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-tz1090-pdc.txt b/Documentation/devicetree/bindings/gpio/gpio-tz1090-pdc.txt
new file mode 100644
index 000000000..1fd98ffa8
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-tz1090-pdc.txt
@@ -0,0 +1,45 @@
+ImgTec TZ1090 PDC GPIO Controller
+
+Required properties:
+- compatible: Compatible property value should be "img,tz1090-pdc-gpio".
+
+- reg: Physical base address of the controller and length of memory mapped
+ region. This starts at and cover the SOC_GPIO_CONTROL registers.
+
+- gpio-controller: Specifies that the node is a gpio controller.
+
+- #gpio-cells: Should be 2. The syntax of the gpio specifier used by client
+ nodes should have the following values.
+ <[phandle of the gpio controller node]
+ [PDC gpio number]
+ [gpio flags]>
+
+ Values for gpio specifier:
+ - GPIO number: a value in the range 0 to 6.
+ - GPIO flags: bit field of flags, as defined in <dt-bindings/gpio/gpio.h>.
+ Only the following flags are supported:
+ GPIO_ACTIVE_HIGH
+ GPIO_ACTIVE_LOW
+
+Optional properties:
+- gpio-ranges: Mapping to pin controller pins (as described in
+ Documentation/devicetree/bindings/gpio/gpio.txt)
+
+- interrupts: Individual syswake interrupts (other GPIOs cannot interrupt)
+
+
+Example:
+
+ pdc_gpios: gpio-controller@02006500 {
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ compatible = "img,tz1090-pdc-gpio";
+ reg = <0x02006500 0x100>;
+
+ interrupt-parent = <&pdc>;
+ interrupts = <8 IRQ_TYPE_NONE>, /* Syswake 0 */
+ <9 IRQ_TYPE_NONE>, /* Syswake 1 */
+ <10 IRQ_TYPE_NONE>; /* Syswake 2 */
+ gpio-ranges = <&pdc_pinctrl 0 0 7>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-tz1090.txt b/Documentation/devicetree/bindings/gpio/gpio-tz1090.txt
new file mode 100644
index 000000000..174cdf309
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-tz1090.txt
@@ -0,0 +1,88 @@
+ImgTec TZ1090 GPIO Controller
+
+Required properties:
+- compatible: Compatible property value should be "img,tz1090-gpio".
+
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+
+- #address-cells: Should be 1 (for bank subnodes)
+
+- #size-cells: Should be 0 (for bank subnodes)
+
+- Each bank of GPIOs should have a subnode to represent it.
+
+ Bank subnode required properties:
+ - reg: Index of bank in the range 0 to 2.
+
+ - gpio-controller: Specifies that the node is a gpio controller.
+
+ - #gpio-cells: Should be 2. The syntax of the gpio specifier used by client
+ nodes should have the following values.
+ <[phandle of the gpio controller node]
+ [gpio number within the gpio bank]
+ [gpio flags]>
+
+ Values for gpio specifier:
+ - GPIO number: a value in the range 0 to 29.
+ - GPIO flags: bit field of flags, as defined in <dt-bindings/gpio/gpio.h>.
+ Only the following flags are supported:
+ GPIO_ACTIVE_HIGH
+ GPIO_ACTIVE_LOW
+
+ Bank subnode optional properties:
+ - gpio-ranges: Mapping to pin controller pins (as described in
+ Documentation/devicetree/bindings/gpio/gpio.txt)
+
+ - interrupts: Interrupt for the entire bank
+
+ - interrupt-controller: Specifies that the node is an interrupt controller
+
+ - #interrupt-cells: Should be 2. The syntax of the interrupt specifier used by
+ client nodes should have the following values.
+ <[phandle of the interurupt controller]
+ [gpio number within the gpio bank]
+ [irq flags]>
+
+ Values for irq specifier:
+ - GPIO number: a value in the range 0 to 29
+ - IRQ flags: value to describe edge and level triggering, as defined in
+ <dt-bindings/interrupt-controller/irq.h>. Only the following flags are
+ supported:
+ IRQ_TYPE_EDGE_RISING
+ IRQ_TYPE_EDGE_FALLING
+ IRQ_TYPE_EDGE_BOTH
+ IRQ_TYPE_LEVEL_HIGH
+ IRQ_TYPE_LEVEL_LOW
+
+
+
+Example:
+
+ gpios: gpio-controller@02005800 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "img,tz1090-gpio";
+ reg = <0x02005800 0x90>;
+
+ /* bank 0 with an interrupt */
+ gpios0: bank@0 {
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ reg = <0>;
+ interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 0 30>;
+ interrupt-controller;
+ };
+
+ /* bank 2 without interrupt */
+ gpios2: bank@2 {
+ #gpio-cells = <2>;
+ reg = <2>;
+ gpio-controller;
+ gpio-ranges = <&pinctrl 0 60 30>;
+ };
+ };
+
+
diff --git a/Documentation/devicetree/bindings/gpio/gpio-vf610.txt b/Documentation/devicetree/bindings/gpio/gpio-vf610.txt
new file mode 100644
index 000000000..436cc99c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-vf610.txt
@@ -0,0 +1,55 @@
+* Freescale VF610 PORT/GPIO module
+
+The Freescale PORT/GPIO modules are two adjacent modules providing GPIO
+functionality. Each pair serves 32 GPIOs. The VF610 has 5 instances of
+each, and each PORT module has its own interrupt.
+
+Required properties for GPIO node:
+- compatible : Should be "fsl,<soc>-gpio", currently "fsl,vf610-gpio"
+- reg : The first reg tuple represents the PORT module, the second tuple
+ the GPIO module.
+- interrupts : Should be the port interrupt shared by all 32 pins.
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells : Should be 2. The first cell is the GPIO number.
+ The second cell bits[3:0] is used to specify trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+
+Note: Each GPIO port should have an alias correctly numbered in "aliases"
+node.
+
+Examples:
+
+aliases {
+ gpio0 = &gpio1;
+ gpio1 = &gpio2;
+};
+
+gpio1: gpio@40049000 {
+ compatible = "fsl,vf610-gpio";
+ reg = <0x40049000 0x1000 0x400ff000 0x40>;
+ interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ gpio-ranges = <&iomuxc 0 0 32>;
+};
+
+gpio2: gpio@4004a000 {
+ compatible = "fsl,vf610-gpio";
+ reg = <0x4004a000 0x1000 0x400ff040 0x40>;
+ interrupts = <0 108 IRQ_TYPE_LEVEL_HIGH>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ gpio-ranges = <&iomuxc 0 32 32>;
+};
diff --git a/Documentation/devicetree/bindings/gpio/gpio-xgene-sb.txt b/Documentation/devicetree/bindings/gpio/gpio-xgene-sb.txt
new file mode 100644
index 000000000..dae130060
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-xgene-sb.txt
@@ -0,0 +1,32 @@
+APM X-Gene Standby GPIO controller bindings
+
+This is a gpio controller in the standby domain.
+
+There are 20 GPIO pins from 0..21. There is no GPIO_DS14 or GPIO_DS15,
+only GPIO_DS8..GPIO_DS13 support interrupts. The IRQ mapping
+is currently 1-to-1 on interrupts 0x28 thru 0x2d.
+
+Required properties:
+- compatible: "apm,xgene-gpio-sb" for the X-Gene Standby GPIO controller
+- reg: Physical base address and size of the controller's registers
+- #gpio-cells: Should be two.
+ - first cell is the pin number
+ - second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- gpio-controller: Marks the device node as a GPIO controller.
+- interrupts: Shall contain exactly 6 interrupts.
+
+Example:
+ sbgpio: sbgpio@17001000 {
+ compatible = "apm,xgene-gpio-sb";
+ reg = <0x0 0x17001000 0x0 0x400>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupts = <0x0 0x28 0x1>,
+ <0x0 0x29 0x1>,
+ <0x0 0x2a 0x1>,
+ <0x0 0x2b 0x1>,
+ <0x0 0x2c 0x1>,
+ <0x0 0x2d 0x1>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-xgene.txt b/Documentation/devicetree/bindings/gpio/gpio-xgene.txt
new file mode 100644
index 000000000..86dbb05e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-xgene.txt
@@ -0,0 +1,22 @@
+APM X-Gene SoC GPIO controller bindings
+
+This is a gpio controller that is part of the flash controller.
+This gpio controller controls a total of 48 gpios.
+
+Required properties:
+- compatible: "apm,xgene-gpio" for X-Gene GPIO controller
+- reg: Physical base address and size of the controller's registers
+- #gpio-cells: Should be two.
+ - first cell is the pin number
+ - second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- gpio-controller: Marks the device node as a GPIO controller.
+
+Example:
+ gpio0: gpio0@1701c000 {
+ compatible = "apm,xgene-gpio";
+ reg = <0x0 0x1701c000 0x0 0x40>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-xilinx.txt b/Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
new file mode 100644
index 000000000..63bf4becd
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
@@ -0,0 +1,48 @@
+Xilinx plb/axi GPIO controller
+
+Dual channel GPIO controller with configurable number of pins
+(from 1 to 32 per channel). Every pin can be configured as
+input/output/tristate. Both channels share the same global IRQ but
+local interrupts can be enabled on channel basis.
+
+Required properties:
+- compatible : Should be "xlnx,xps-gpio-1.00.a"
+- reg : Address and length of the register set for the device
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+- gpio-controller : Marks the device node as a GPIO controller.
+
+Optional properties:
+- interrupts : Interrupt mapping for GPIO IRQ.
+- interrupt-parent : Phandle for the interrupt controller that
+ services interrupts for this device.
+- xlnx,all-inputs : if n-th bit is setup, GPIO-n is input
+- xlnx,dout-default : if n-th bit is 1, GPIO-n default value is 1
+- xlnx,gpio-width : gpio width
+- xlnx,tri-default : if n-th bit is 1, GPIO-n is in tristate mode
+- xlnx,is-dual : if 1, controller also uses the second channel
+- xlnx,all-inputs-2 : as above but for the second channel
+- xlnx,dout-default-2 : as above but the second channel
+- xlnx,gpio2-width : as above but for the second channel
+- xlnx,tri-default-2 : as above but for the second channel
+
+
+Example:
+gpio: gpio@40000000 {
+ #gpio-cells = <2>;
+ compatible = "xlnx,xps-gpio-1.00.a";
+ gpio-controller ;
+ interrupt-parent = <&microblaze_0_intc>;
+ interrupts = < 6 2 >;
+ reg = < 0x40000000 0x10000 >;
+ xlnx,all-inputs = <0x0>;
+ xlnx,all-inputs-2 = <0x0>;
+ xlnx,dout-default = <0x0>;
+ xlnx,dout-default-2 = <0x0>;
+ xlnx,gpio-width = <0x2>;
+ xlnx,gpio2-width = <0x2>;
+ xlnx,interrupt-present = <0x1>;
+ xlnx,is-dual = <0x1>;
+ xlnx,tri-default = <0xffffffff>;
+ xlnx,tri-default-2 = <0xffffffff>;
+} ;
diff --git a/Documentation/devicetree/bindings/gpio/gpio-zevio.txt b/Documentation/devicetree/bindings/gpio/gpio-zevio.txt
new file mode 100644
index 000000000..a37bd9ae2
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-zevio.txt
@@ -0,0 +1,16 @@
+Zevio GPIO controller
+
+Required properties:
+- compatible: Should be "lsi,zevio-gpio"
+- reg: Address and length of the register set for the device
+- #gpio-cells: Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+- gpio-controller: Marks the device node as a GPIO controller.
+
+Example:
+ gpio: gpio@90000000 {
+ compatible = "lsi,zevio-gpio";
+ reg = <0x90000000 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio-zynq.txt b/Documentation/devicetree/bindings/gpio/gpio-zynq.txt
new file mode 100644
index 000000000..986371a4b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-zynq.txt
@@ -0,0 +1,26 @@
+Xilinx Zynq GPIO controller Device Tree Bindings
+-------------------------------------------
+
+Required properties:
+- #gpio-cells : Should be two
+ - First cell is the GPIO line number
+ - Second cell is used to specify optional
+ parameters (unused)
+- compatible : Should be "xlnx,zynq-gpio-1.0"
+- clocks : Clock specifier (see clock bindings for details)
+- gpio-controller : Marks the device node as a GPIO controller.
+- interrupts : Interrupt specifier (see interrupt bindings for
+ details)
+- interrupt-parent : Must be core interrupt controller
+- reg : Address and length of the register set for the device
+
+Example:
+ gpio@e000a000 {
+ #gpio-cells = <2>;
+ compatible = "xlnx,zynq-gpio-1.0";
+ clocks = <&clkc 42>;
+ gpio-controller;
+ interrupt-parent = <&intc>;
+ interrupts = <0 20 4>;
+ reg = <0xe000a000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/gpio.txt b/Documentation/devicetree/bindings/gpio/gpio.txt
new file mode 100644
index 000000000..5788d5cf1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio.txt
@@ -0,0 +1,242 @@
+Specifying GPIO information for devices
+============================================
+
+1) gpios property
+-----------------
+
+Nodes that makes use of GPIOs should specify them using one or more
+properties, each containing a 'gpio-list':
+
+ gpio-list ::= <single-gpio> [gpio-list]
+ single-gpio ::= <gpio-phandle> <gpio-specifier>
+ gpio-phandle : phandle to gpio controller node
+ gpio-specifier : Array of #gpio-cells specifying specific gpio
+ (controller specific)
+
+GPIO properties should be named "[<name>-]gpios", with <name> being the purpose
+of this GPIO for the device. While a non-existent <name> is considered valid
+for compatibility reasons (resolving to the "gpios" property), it is not allowed
+for new bindings.
+
+GPIO properties can contain one or more GPIO phandles, but only in exceptional
+cases should they contain more than one. If your device uses several GPIOs with
+distinct functions, reference each of them under its own property, giving it a
+meaningful name. The only case where an array of GPIOs is accepted is when
+several GPIOs serve the same function (e.g. a parallel data line).
+
+The exact purpose of each gpios property must be documented in the device tree
+binding of the device.
+
+The following example could be used to describe GPIO pins used as device enable
+and bit-banged data signals:
+
+ gpio1: gpio1 {
+ gpio-controller
+ #gpio-cells = <2>;
+ };
+ gpio2: gpio2 {
+ gpio-controller
+ #gpio-cells = <1>;
+ };
+ [...]
+
+ enable-gpios = <&gpio2 2>;
+ data-gpios = <&gpio1 12 0>,
+ <&gpio1 13 0>,
+ <&gpio1 14 0>,
+ <&gpio1 15 0>;
+
+Note that gpio-specifier length is controller dependent. In the
+above example, &gpio1 uses 2 cells to specify a gpio, while &gpio2
+only uses one.
+
+gpio-specifier may encode: bank, pin position inside the bank,
+whether pin is open-drain and whether pin is logically inverted.
+Exact meaning of each specifier cell is controller specific, and must
+be documented in the device tree binding for the device. Use the macros
+defined in include/dt-bindings/gpio/gpio.h whenever possible:
+
+Example of a node using GPIOs:
+
+ node {
+ enable-gpios = <&qe_pio_e 18 GPIO_ACTIVE_HIGH>;
+ };
+
+GPIO_ACTIVE_HIGH is 0, so in this example gpio-specifier is "18 0" and encodes
+GPIO pin number, and GPIO flags as accepted by the "qe_pio_e" gpio-controller.
+
+1.1) GPIO specifier best practices
+----------------------------------
+
+A gpio-specifier should contain a flag indicating the GPIO polarity; active-
+high or active-low. If it does, the following best practices should be
+followed:
+
+The gpio-specifier's polarity flag should represent the physical level at the
+GPIO controller that achieves (or represents, for inputs) a logically asserted
+value at the device. The exact definition of logically asserted should be
+defined by the binding for the device. If the board inverts the signal between
+the GPIO controller and the device, then the gpio-specifier will represent the
+opposite physical level than the signal at the device's pin.
+
+When the device's signal polarity is configurable, the binding for the
+device must either:
+
+a) Define a single static polarity for the signal, with the expectation that
+any software using that binding would statically program the device to use
+that signal polarity.
+
+The static choice of polarity may be either:
+
+a1) (Preferred) Dictated by a binding-specific DT property.
+
+or:
+
+a2) Defined statically by the DT binding itself.
+
+In particular, the polarity cannot be derived from the gpio-specifier, since
+that would prevent the DT from separately representing the two orthogonal
+concepts of configurable signal polarity in the device, and possible board-
+level signal inversion.
+
+or:
+
+b) Pick a single option for device signal polarity, and document this choice
+in the binding. The gpio-specifier should represent the polarity of the signal
+(at the GPIO controller) assuming that the device is configured for this
+particular signal polarity choice. If software chooses to program the device
+to generate or receive a signal of the opposite polarity, software will be
+responsible for correctly interpreting (inverting) the GPIO signal at the GPIO
+controller.
+
+2) gpio-controller nodes
+------------------------
+
+Every GPIO controller node must contain both an empty "gpio-controller"
+property, and a #gpio-cells integer property, which indicates the number of
+cells in a gpio-specifier.
+
+The GPIO chip may contain GPIO hog definitions. GPIO hogging is a mechanism
+providing automatic GPIO request and configuration as part of the
+gpio-controller's driver probe function.
+
+Each GPIO hog definition is represented as a child node of the GPIO controller.
+Required properties:
+- gpio-hog: A property specifying that this child node represent a GPIO hog.
+- gpios: Store the GPIO information (id, flags, ...). Shall contain the
+ number of cells specified in its parent node (GPIO controller
+ node).
+Only one of the following properties scanned in the order shown below.
+This means that when multiple properties are present they will be searched
+in the order presented below and the first match is taken as the intended
+configuration.
+- input: A property specifying to set the GPIO direction as input.
+- output-low A property specifying to set the GPIO direction as output with
+ the value low.
+- output-high A property specifying to set the GPIO direction as output with
+ the value high.
+
+Optional properties:
+- line-name: The GPIO label name. If not present the node name is used.
+
+Example of two SOC GPIO banks defined as gpio-controller nodes:
+
+ qe_pio_a: gpio-controller@1400 {
+ compatible = "fsl,qe-pario-bank-a", "fsl,qe-pario-bank";
+ reg = <0x1400 0x18>;
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ line_b {
+ gpio-hog;
+ gpios = <6 0>;
+ output-low;
+ line-name = "foo-bar-gpio";
+ };
+ };
+
+ qe_pio_e: gpio-controller@1460 {
+ compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank";
+ reg = <0x1460 0x18>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+2.1) gpio- and pin-controller interaction
+-----------------------------------------
+
+Some or all of the GPIOs provided by a GPIO controller may be routed to pins
+on the package via a pin controller. This allows muxing those pins between
+GPIO and other functions.
+
+It is useful to represent which GPIOs correspond to which pins on which pin
+controllers. The gpio-ranges property described below represents this, and
+contains information structures as follows:
+
+ gpio-range-list ::= <single-gpio-range> [gpio-range-list]
+ single-gpio-range ::= <numeric-gpio-range> | <named-gpio-range>
+ numeric-gpio-range ::=
+ <pinctrl-phandle> <gpio-base> <pinctrl-base> <count>
+ named-gpio-range ::= <pinctrl-phandle> <gpio-base> '<0 0>'
+ pinctrl-phandle : phandle to pin controller node
+ gpio-base : Base GPIO ID in the GPIO controller
+ pinctrl-base : Base pinctrl pin ID in the pin controller
+ count : The number of GPIOs/pins in this range
+
+The "pin controller node" mentioned above must conform to the bindings
+described in ../pinctrl/pinctrl-bindings.txt.
+
+In case named gpio ranges are used (ranges with both <pinctrl-base> and
+<count> set to 0), the property gpio-ranges-group-names contains one string
+for every single-gpio-range in gpio-ranges:
+ gpiorange-names-list ::= <gpiorange-name> [gpiorange-names-list]
+ gpiorange-name : Name of the pingroup associated to the GPIO range in
+ the respective pin controller.
+
+Elements of gpiorange-names-list corresponding to numeric ranges contain
+the empty string. Elements of gpiorange-names-list corresponding to named
+ranges contain the name of a pin group defined in the respective pin
+controller. The number of pins/GPIOs in the range is the number of pins in
+that pin group.
+
+Previous versions of this binding required all pin controller nodes that
+were referenced by any gpio-ranges property to contain a property named
+#gpio-range-cells with value <3>. This requirement is now deprecated.
+However, that property may still exist in older device trees for
+compatibility reasons, and would still be required even in new device
+trees that need to be compatible with older software.
+
+Example 1:
+
+ qe_pio_e: gpio-controller@1460 {
+ #gpio-cells = <2>;
+ compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank";
+ reg = <0x1460 0x18>;
+ gpio-controller;
+ gpio-ranges = <&pinctrl1 0 20 10>, <&pinctrl2 10 50 20>;
+ };
+
+Here, a single GPIO controller has GPIOs 0..9 routed to pin controller
+pinctrl1's pins 20..29, and GPIOs 10..19 routed to pin controller pinctrl2's
+pins 50..59.
+
+Example 2:
+
+ gpio_pio_i: gpio-controller@14B0 {
+ #gpio-cells = <2>;
+ compatible = "fsl,qe-pario-bank-e", "fsl,qe-pario-bank";
+ reg = <0x1480 0x18>;
+ gpio-controller;
+ gpio-ranges = <&pinctrl1 0 20 10>,
+ <&pinctrl2 10 0 0>,
+ <&pinctrl1 15 0 10>,
+ <&pinctrl2 25 0 0>;
+ gpio-ranges-group-names = "",
+ "foo",
+ "",
+ "bar";
+ };
+
+Here, three GPIO ranges are defined wrt. two pin controllers. pinctrl1 GPIO
+ranges are defined using pin numbers whereas the GPIO ranges wrt. pinctrl2
+are named "foo" and "bar".
diff --git a/Documentation/devicetree/bindings/gpio/gpio_atmel.txt b/Documentation/devicetree/bindings/gpio/gpio_atmel.txt
new file mode 100644
index 000000000..85f8c0d08
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_atmel.txt
@@ -0,0 +1,25 @@
+* Atmel GPIO controller (PIO)
+
+Required properties:
+- compatible: "atmel,<chip>-gpio", where <chip> is at91rm9200 or at91sam9x5.
+- reg: Should contain GPIO controller registers location and length
+- interrupts: Should be the port interrupt shared by all the pins.
+- #gpio-cells: Should be two. The first cell is the pin number and
+ the second cell is used to specify optional parameters (currently
+ unused).
+- gpio-controller: Marks the device node as a GPIO controller.
+
+optional properties:
+- #gpio-lines: Number of gpio if absent 32.
+
+
+Example:
+ pioA: gpio@fffff200 {
+ compatible = "atmel,at91rm9200-gpio";
+ reg = <0xfffff200 0x100>;
+ interrupts = <2 4>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ #gpio-lines = <19>;
+ };
+
diff --git a/Documentation/devicetree/bindings/gpio/gpio_lpc32xx.txt b/Documentation/devicetree/bindings/gpio/gpio_lpc32xx.txt
new file mode 100644
index 000000000..49819367a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio_lpc32xx.txt
@@ -0,0 +1,43 @@
+NXP LPC32xx SoC GPIO controller
+
+Required properties:
+- compatible: must be "nxp,lpc3220-gpio"
+- reg: Physical base address and length of the controller's registers.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be 3:
+ 1) bank:
+ 0: GPIO P0
+ 1: GPIO P1
+ 2: GPIO P2
+ 3: GPIO P3
+ 4: GPI P3
+ 5: GPO P3
+ 2) pin number
+ 3) optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted)
+- reg: Index of the GPIO group
+
+Example:
+
+ gpio: gpio@40028000 {
+ compatible = "nxp,lpc3220-gpio";
+ reg = <0x40028000 0x1000>;
+ gpio-controller;
+ #gpio-cells = <3>; /* bank, pin, flags */
+ };
+
+ leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ gpios = <&gpio 5 1 1>; /* GPO_P3 1, active low */
+ linux,default-trigger = "heartbeat";
+ default-state = "off";
+ };
+
+ led1 {
+ gpios = <&gpio 5 14 1>; /* GPO_P3 14, active low */
+ linux,default-trigger = "timer";
+ default-state = "off";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt b/Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt
new file mode 100644
index 000000000..f8e8f185a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/moxa,moxart-gpio.txt
@@ -0,0 +1,19 @@
+MOXA ART GPIO Controller
+
+Required properties:
+
+- #gpio-cells : Should be 2, The first cell is the pin number,
+ the second cell is used to specify polarity:
+ 0 = active high
+ 1 = active low
+- compatible : Must be "moxa,moxart-gpio"
+- reg : Should contain registers location and length
+
+Example:
+
+ gpio: gpio@98700000 {
+ gpio-controller;
+ #gpio-cells = <2>;
+ compatible = "moxa,moxart-gpio";
+ reg = <0x98700000 0xC>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/mrvl-gpio.txt b/Documentation/devicetree/bindings/gpio/mrvl-gpio.txt
new file mode 100644
index 000000000..98d198396
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/mrvl-gpio.txt
@@ -0,0 +1,69 @@
+* Marvell PXA GPIO controller
+
+Required properties:
+- compatible : Should be "intel,pxa25x-gpio", "intel,pxa26x-gpio",
+ "intel,pxa27x-gpio", "intel,pxa3xx-gpio",
+ "marvell,pxa93x-gpio", "marvell,mmp-gpio",
+ "marvell,mmp2-gpio" or marvell,pxa1928-gpio.
+- reg : Address and length of the register set for the device
+- interrupts : Should be the port interrupt shared by all gpio pins.
+ There're three gpio interrupts in arch-pxa, and they're gpio0,
+ gpio1 and gpio_mux. There're only one gpio interrupt in arch-mmp,
+ gpio_mux.
+- interrupt-names : Should be the names of irq resources. Each interrupt
+ uses its own interrupt name, so there should be as many interrupt names
+ as referenced interrupts.
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source.
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be one. It is the pin number.
+
+Example for a MMP platform:
+
+ gpio: gpio@d4019000 {
+ compatible = "marvell,mmp-gpio";
+ reg = <0xd4019000 0x1000>;
+ interrupts = <49>;
+ interrupt-names = "gpio_mux";
+ gpio-controller;
+ #gpio-cells = <1>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+Example for a PXA3xx platform:
+
+ gpio: gpio@40e00000 {
+ compatible = "intel,pxa3xx-gpio";
+ reg = <0x40e00000 0x10000>;
+ interrupt-names = "gpio0", "gpio1", "gpio_mux";
+ interrupts = <8 9 10>;
+ gpio-controller;
+ #gpio-cells = <0x2>;
+ interrupt-controller;
+ #interrupt-cells = <0x2>;
+ };
+
+* Marvell Orion GPIO Controller
+
+Required properties:
+- compatible : Should be "marvell,orion-gpio"
+- reg : Address and length of the register set for controller.
+- gpio-controller : So we know this is a gpio controller.
+- ngpio : How many gpios this controller has.
+- interrupts : Up to 4 Interrupts for the controller.
+
+Optional properties:
+- mask-offset : For SMP Orions, offset for Nth CPU
+
+Example:
+
+ gpio0: gpio@10100 {
+ compatible = "marvell,orion-gpio";
+ #gpio-cells = <2>;
+ gpio-controller;
+ reg = <0x10100 0x40>;
+ ngpio = <32>;
+ interrupts = <35>, <36>, <37>, <38>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/nvidia,tegra20-gpio.txt b/Documentation/devicetree/bindings/gpio/nvidia,tegra20-gpio.txt
new file mode 100644
index 000000000..023c9526e
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/nvidia,tegra20-gpio.txt
@@ -0,0 +1,40 @@
+NVIDIA Tegra GPIO controller
+
+Required properties:
+- compatible : "nvidia,tegra<chip>-gpio"
+- reg : Physical base address and length of the controller's registers.
+- interrupts : The interrupt outputs from the controller. For Tegra20,
+ there should be 7 interrupts specified, and for Tegra30, there should
+ be 8 interrupts specified.
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted)
+- gpio-controller : Marks the device node as a GPIO controller.
+- #interrupt-cells : Should be 2.
+ The first cell is the GPIO number.
+ The second cell is used to specify flags:
+ bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+ Valid combinations are 1, 2, 3, 4, 8.
+- interrupt-controller : Marks the device node as an interrupt controller.
+
+Example:
+
+gpio: gpio@6000d000 {
+ compatible = "nvidia,tegra20-gpio";
+ reg = < 0x6000d000 0x1000 >;
+ interrupts = < 0 32 0x04
+ 0 33 0x04
+ 0 34 0x04
+ 0 35 0x04
+ 0 55 0x04
+ 0 87 0x04
+ 0 89 0x04 >;
+ #gpio-cells = <2>;
+ gpio-controller;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+};
diff --git a/Documentation/devicetree/bindings/gpio/pl061-gpio.txt b/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
new file mode 100644
index 000000000..89058d375
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/pl061-gpio.txt
@@ -0,0 +1,10 @@
+ARM PL061 GPIO controller
+
+Required properties:
+- compatible : "arm,pl061", "arm,primecell"
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted)
+- gpio-controller : Marks the device node as a GPIO controller.
+- interrupts : Interrupt mapping for GPIO IRQ.
+- gpio-ranges : Interaction with the PINCTRL subsystem.
diff --git a/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
new file mode 100644
index 000000000..38fb86f28
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
@@ -0,0 +1,63 @@
+* Renesas R-Car GPIO Controller
+
+Required Properties:
+
+ - compatible: should contain one of the following.
+ - "renesas,gpio-r8a7778": for R8A7778 (R-Mobile M1) compatible GPIO controller.
+ - "renesas,gpio-r8a7779": for R8A7779 (R-Car H1) compatible GPIO controller.
+ - "renesas,gpio-r8a7790": for R8A7790 (R-Car H2) compatible GPIO controller.
+ - "renesas,gpio-r8a7791": for R8A7791 (R-Car M2-W) compatible GPIO controller.
+ - "renesas,gpio-r8a7793": for R8A7793 (R-Car M2-N) compatible GPIO controller.
+ - "renesas,gpio-r8a7794": for R8A7794 (R-Car E2) compatible GPIO controller.
+ - "renesas,gpio-rcar": for generic R-Car GPIO controller.
+
+ - reg: Base address and length of each memory resource used by the GPIO
+ controller hardware module.
+
+ - interrupt-parent: phandle of the parent interrupt controller.
+ - interrupts: Interrupt specifier for the controllers interrupt.
+
+ - gpio-controller: Marks the device node as a gpio controller.
+ - #gpio-cells: Should be 2. The first cell is the GPIO number and the second
+ cell specifies GPIO flags, as defined in <dt-bindings/gpio/gpio.h>. Only the
+ GPIO_ACTIVE_HIGH and GPIO_ACTIVE_LOW flags are supported.
+ - gpio-ranges: Range of pins managed by the GPIO controller.
+
+Optional properties:
+
+ - clocks: Must contain a reference to the functional clock. The property is
+ mandatory if the hardware implements a controllable functional clock for
+ the GPIO instance.
+
+Please refer to gpio.txt in this directory for details of gpio-ranges property
+and the common GPIO bindings used by client devices.
+
+The GPIO controller also acts as an interrupt controller. It uses the default
+two cells specifier as described in Documentation/devicetree/bindings/
+interrupt-controller/interrupts.txt.
+
+Example: R8A7779 (R-Car H1) GPIO controller nodes
+
+ gpio0: gpio@ffc40000 {
+ compatible = "renesas,gpio-r8a7779", "renesas,gpio-rcar";
+ reg = <0xffc40000 0x2c>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 141 0x4>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 0 32>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+ ...
+ gpio6: gpio@ffc46000 {
+ compatible = "renesas,gpio-r8a7779", "renesas,gpio-rcar";
+ reg = <0xffc46000 0x2c>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 147 0x4>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ gpio-ranges = <&pfc 0 192 9>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt b/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
new file mode 100644
index 000000000..dd5d2c039
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/snps-dwapb-gpio.txt
@@ -0,0 +1,60 @@
+* Synopsys DesignWare APB GPIO controller
+
+Required properties:
+- compatible : Should contain "snps,dw-apb-gpio"
+- reg : Address and length of the register set for the device.
+- #address-cells : should be 1 (for addressing port subnodes).
+- #size-cells : should be 0 (port subnodes).
+
+The GPIO controller has a configurable number of ports, each of which are
+represented as child nodes with the following properties:
+
+Required properties:
+- compatible : "snps,dw-apb-gpio-port"
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells : Should be two. The first cell is the pin number and
+ the second cell is used to specify the gpio polarity:
+ 0 = active high
+ 1 = active low
+- reg : The integer port index of the port, a single cell.
+
+Optional properties:
+- interrupt-controller : The first port may be configured to be an interrupt
+controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt. Shall be set to 2. The first cell defines the interrupt number,
+ the second encodes the triger flags encoded as described in
+ Documentation/devicetree/bindings/interrupts.txt
+- interrupt-parent : The parent interrupt controller.
+- interrupts : The interrupt to the parent controller raised when GPIOs
+ generate the interrupts.
+- snps,nr-gpios : The number of pins in the port, a single cell.
+
+Example:
+
+gpio: gpio@20000 {
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x20000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ porta: gpio-controller@0 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&vic1>;
+ interrupts = <0>;
+ };
+
+ portb: gpio-controller@1 {
+ compatible = "snps,dw-apb-gpio-port";
+ gpio-controller;
+ #gpio-cells = <2>;
+ snps,nr-gpios = <8>;
+ reg = <1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/gpio/sodaville.txt b/Documentation/devicetree/bindings/gpio/sodaville.txt
new file mode 100644
index 000000000..563eff22b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/sodaville.txt
@@ -0,0 +1,48 @@
+GPIO controller on CE4100 / Sodaville SoCs
+==========================================
+
+The bindings for CE4100's GPIO controller match the generic description
+which is covered by the gpio.txt file in this folder.
+
+The only additional property is the intel,muxctl property which holds the
+value which is written into the MUXCNTL register.
+
+There is no compatible property for now because the driver is probed via
+PCI id (vendor 0x8086 device 0x2e67).
+
+The interrupt specifier consists of two cells encoded as follows:
+ - <1st cell>: The interrupt-number that identifies the interrupt source.
+ - <2nd cell>: The level-sense information, encoded as follows:
+ 4 - active high level-sensitive
+ 8 - active low level-sensitive
+
+Example of the GPIO device and one user:
+
+ pcigpio: gpio@b,1 {
+ /* two cells for GPIO and interrupt */
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ compatible = "pci8086,2e67.2",
+ "pci8086,2e67",
+ "pciclassff0000",
+ "pciclassff00";
+
+ reg = <0x15900 0x0 0x0 0x0 0x0>;
+ /* Interrupt line of the gpio device */
+ interrupts = <15 1>;
+ /* It is an interrupt and GPIO controller itself */
+ interrupt-controller;
+ gpio-controller;
+ intel,muxctl = <0>;
+ };
+
+ testuser@20 {
+ compatible = "example,testuser";
+ /* User the 11th GPIO line as an active high triggered
+ * level interrupt
+ */
+ interrupts = <11 8>;
+ interrupt-parent = <&pcigpio>;
+ /* Use this GPIO also with the gpio functions */
+ gpios = <&pcigpio 11 0>;
+ };
diff --git a/Documentation/devicetree/bindings/gpio/spear_spics.txt b/Documentation/devicetree/bindings/gpio/spear_spics.txt
new file mode 100644
index 000000000..96c37eb15
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/spear_spics.txt
@@ -0,0 +1,50 @@
+=== ST Microelectronics SPEAr SPI CS Driver ===
+
+SPEAr platform provides a provision to control chipselects of ARM PL022 Prime
+Cell spi controller through its system registers, which otherwise remains under
+PL022 control. If chipselect remain under PL022 control then they would be
+released as soon as transfer is over and TxFIFO becomes empty. This is not
+desired by some of the device protocols above spi which expect (multiple)
+transfers without releasing their chipselects.
+
+Chipselects can be controlled by software by turning them as GPIOs. SPEAr
+provides another interface through system registers through which software can
+directly control each PL022 chipselect. Hence, it is natural for SPEAr to export
+the control of this interface as gpio.
+
+Required properties:
+
+ * compatible: should be defined as "st,spear-spics-gpio"
+ * reg: mentioning address range of spics controller
+ * st-spics,peripcfg-reg: peripheral configuration register offset
+ * st-spics,sw-enable-bit: bit offset to enable sw control
+ * st-spics,cs-value-bit: bit offset to drive chipselect low or high
+ * st-spics,cs-enable-mask: chip select number bit mask
+ * st-spics,cs-enable-shift: chip select number program offset
+ * gpio-controller: Marks the device node as gpio controller
+ * #gpio-cells: should be 1 and will mention chip select number
+
+All the above bit offsets are within peripcfg register.
+
+Example:
+-------
+spics: spics@e0700000{
+ compatible = "st,spear-spics-gpio";
+ reg = <0xe0700000 0x1000>;
+ st-spics,peripcfg-reg = <0x3b0>;
+ st-spics,sw-enable-bit = <12>;
+ st-spics,cs-value-bit = <11>;
+ st-spics,cs-enable-mask = <3>;
+ st-spics,cs-enable-shift = <8>;
+ gpio-controller;
+ #gpio-cells = <2>;
+};
+
+
+spi0: spi@e0100000 {
+ status = "okay";
+ num-cs = <3>;
+ cs-gpios = <&gpio1 7 0>, <&spics 0>,
+ <&spics 1>;
+ ...
+}
diff --git a/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt
new file mode 100644
index 000000000..23bfe8e1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/nvidia,gk20a.txt
@@ -0,0 +1,43 @@
+NVIDIA GK20A Graphics Processing Unit
+
+Required properties:
+- compatible: "nvidia,<chip>-<gpu>"
+ Currently recognized values:
+ - nvidia,tegra124-gk20a
+- reg: Physical base address and length of the controller's registers.
+ Must contain two entries:
+ - first entry for bar0
+ - second entry for bar1
+- interrupts: Must contain an entry for each entry in interrupt-names.
+ See ../interrupt-controller/interrupts.txt for details.
+- interrupt-names: Must include the following entries:
+ - stall
+ - nonstall
+- vdd-supply: regulator for supply voltage.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - gpu
+ - pwr
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - gpu
+
+Example:
+
+ gpu@0,57000000 {
+ compatible = "nvidia,gk20a";
+ reg = <0x0 0x57000000 0x0 0x01000000>,
+ <0x0 0x58000000 0x0 0x01000000>;
+ interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "stall", "nonstall";
+ vdd-supply = <&vdd_gpu>;
+ clocks = <&tegra_car TEGRA124_CLK_GPU>,
+ <&tegra_car TEGRA124_CLK_PLL_P_OUT5>;
+ clock-names = "gpu", "pwr";
+ resets = <&tegra_car 184>;
+ reset-names = "gpu";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
new file mode 100644
index 000000000..009f4bfa1
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt
@@ -0,0 +1,378 @@
+NVIDIA Tegra host1x
+
+Required properties:
+- compatible: "nvidia,tegra<chip>-host1x"
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The interrupt outputs from the controller.
+- #address-cells: The number of cells used to represent physical base addresses
+ in the host1x address space. Should be 1.
+- #size-cells: The number of cells used to represent the size of an address
+ range in the host1x address space. Should be 1.
+- ranges: The mapping of the host1x address space to the CPU address space.
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - host1x
+
+The host1x top-level node defines a number of children, each representing one
+of the following host1x client modules:
+
+- mpe: video encoder
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-mpe"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - mpe
+
+- vi: video input
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-vi"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - vi
+
+- epp: encoder pre-processor
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-epp"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - epp
+
+- isp: image signal processor
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-isp"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - isp
+
+- gr2d: 2D graphics engine
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-gr2d"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - 2d
+
+- gr3d: 3D graphics engine
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-gr3d"
+ - reg: Physical base address and length of the controller's registers.
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must include the following entries:
+ (This property may be omitted if the only clock in the list is "3d")
+ - 3d
+ This MUST be the first entry.
+ - 3d2 (Only required on SoCs with two 3D clocks)
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - 3d
+ - 3d2 (Only required on SoCs with two 3D clocks)
+
+- dc: display controller
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-dc"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must include the following entries:
+ - dc
+ This MUST be the first entry.
+ - parent
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - dc
+ - nvidia,head: The number of the display controller head. This is used to
+ setup the various types of output to receive video data from the given
+ head.
+
+ Each display controller node has a child node, named "rgb", that represents
+ the RGB output associated with the controller. It can take the following
+ optional properties:
+ - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+ - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
+ - nvidia,edid: supplies a binary EDID blob
+ - nvidia,panel: phandle of a display panel
+
+- hdmi: High Definition Multimedia Interface
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-hdmi"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - hdmi-supply: supply for the +5V HDMI connector pin
+ - vdd-supply: regulator for supply voltage
+ - pll-supply: regulator for PLL
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must include the following entries:
+ - hdmi
+ This MUST be the first entry.
+ - parent
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - hdmi
+
+ Optional properties:
+ - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+ - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
+ - nvidia,edid: supplies a binary EDID blob
+ - nvidia,panel: phandle of a display panel
+
+- tvo: TV encoder output
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-tvo"
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+
+- dsi: display serial interface
+
+ Required properties:
+ - compatible: "nvidia,tegra<chip>-dsi"
+ - reg: Physical base address and length of the controller's registers.
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must include the following entries:
+ - dsi
+ This MUST be the first entry.
+ - lp
+ - parent
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - dsi
+ - avdd-dsi-supply: phandle of a supply that powers the DSI controller
+ - nvidia,mipi-calibrate: Should contain a phandle and a specifier specifying
+ which pads are used by this DSI output and need to be calibrated. See also
+ ../mipi/nvidia,tegra114-mipi.txt.
+
+ Optional properties:
+ - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+ - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
+ - nvidia,edid: supplies a binary EDID blob
+ - nvidia,panel: phandle of a display panel
+ - nvidia,ganged-mode: contains a phandle to a second DSI controller to gang
+ up with in order to support up to 8 data lanes
+
+- sor: serial output resource
+
+ Required properties:
+ - compatible: For Tegra124, must contain "nvidia,tegra124-sor". Otherwise,
+ must contain '"nvidia,<chip>-sor", "nvidia,tegra124-sor"', where <chip>
+ is tegra132.
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must include the following entries:
+ - sor: clock input for the SOR hardware
+ - parent: input for the pixel clock
+ - dp: reference clock for the SOR clock
+ - safe: safe reference for the SOR clock during power up
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - sor
+
+ Optional properties:
+ - nvidia,ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+ - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
+ - nvidia,edid: supplies a binary EDID blob
+ - nvidia,panel: phandle of a display panel
+
+ Optional properties when driving an eDP output:
+ - nvidia,dpaux: phandle to a DispayPort AUX interface
+
+- dpaux: DisplayPort AUX interface
+ - compatible: For Tegra124, must contain "nvidia,tegra124-dpaux". Otherwise,
+ must contain '"nvidia,<chip>-dpaux", "nvidia,tegra124-dpaux"', where
+ <chip> is tegra132.
+ - reg: Physical base address and length of the controller's registers.
+ - interrupts: The interrupt outputs from the controller.
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: Must include the following entries:
+ - dpaux: clock input for the DPAUX hardware
+ - parent: reference clock
+ - resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names: Must include the following entries:
+ - dpaux
+ - vdd-supply: phandle of a supply that powers the DisplayPort link
+
+Example:
+
+/ {
+ ...
+
+ host1x {
+ compatible = "nvidia,tegra20-host1x", "simple-bus";
+ reg = <0x50000000 0x00024000>;
+ interrupts = <0 65 0x04 /* mpcore syncpt */
+ 0 67 0x04>; /* mpcore general */
+ clocks = <&tegra_car TEGRA20_CLK_HOST1X>;
+ resets = <&tegra_car 28>;
+ reset-names = "host1x";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ ranges = <0x54000000 0x54000000 0x04000000>;
+
+ mpe {
+ compatible = "nvidia,tegra20-mpe";
+ reg = <0x54040000 0x00040000>;
+ interrupts = <0 68 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_MPE>;
+ resets = <&tegra_car 60>;
+ reset-names = "mpe";
+ };
+
+ vi {
+ compatible = "nvidia,tegra20-vi";
+ reg = <0x54080000 0x00040000>;
+ interrupts = <0 69 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_VI>;
+ resets = <&tegra_car 100>;
+ reset-names = "vi";
+ };
+
+ epp {
+ compatible = "nvidia,tegra20-epp";
+ reg = <0x540c0000 0x00040000>;
+ interrupts = <0 70 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_EPP>;
+ resets = <&tegra_car 19>;
+ reset-names = "epp";
+ };
+
+ isp {
+ compatible = "nvidia,tegra20-isp";
+ reg = <0x54100000 0x00040000>;
+ interrupts = <0 71 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_ISP>;
+ resets = <&tegra_car 23>;
+ reset-names = "isp";
+ };
+
+ gr2d {
+ compatible = "nvidia,tegra20-gr2d";
+ reg = <0x54140000 0x00040000>;
+ interrupts = <0 72 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_GR2D>;
+ resets = <&tegra_car 21>;
+ reset-names = "2d";
+ };
+
+ gr3d {
+ compatible = "nvidia,tegra20-gr3d";
+ reg = <0x54180000 0x00040000>;
+ clocks = <&tegra_car TEGRA20_CLK_GR3D>;
+ resets = <&tegra_car 24>;
+ reset-names = "3d";
+ };
+
+ dc@54200000 {
+ compatible = "nvidia,tegra20-dc";
+ reg = <0x54200000 0x00040000>;
+ interrupts = <0 73 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_DISP1>,
+ <&tegra_car TEGRA20_CLK_PLL_P>;
+ clock-names = "dc", "parent";
+ resets = <&tegra_car 27>;
+ reset-names = "dc";
+
+ rgb {
+ status = "disabled";
+ };
+ };
+
+ dc@54240000 {
+ compatible = "nvidia,tegra20-dc";
+ reg = <0x54240000 0x00040000>;
+ interrupts = <0 74 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_DISP2>,
+ <&tegra_car TEGRA20_CLK_PLL_P>;
+ clock-names = "dc", "parent";
+ resets = <&tegra_car 26>;
+ reset-names = "dc";
+
+ rgb {
+ status = "disabled";
+ };
+ };
+
+ hdmi {
+ compatible = "nvidia,tegra20-hdmi";
+ reg = <0x54280000 0x00040000>;
+ interrupts = <0 75 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_HDMI>,
+ <&tegra_car TEGRA20_CLK_PLL_D_OUT0>;
+ clock-names = "hdmi", "parent";
+ resets = <&tegra_car 51>;
+ reset-names = "hdmi";
+ status = "disabled";
+ };
+
+ tvo {
+ compatible = "nvidia,tegra20-tvo";
+ reg = <0x542c0000 0x00040000>;
+ interrupts = <0 76 0x04>;
+ clocks = <&tegra_car TEGRA20_CLK_TVO>;
+ status = "disabled";
+ };
+
+ dsi {
+ compatible = "nvidia,tegra20-dsi";
+ reg = <0x54300000 0x00040000>;
+ clocks = <&tegra_car TEGRA20_CLK_DSI>,
+ <&tegra_car TEGRA20_CLK_PLL_D_OUT0>;
+ clock-names = "dsi", "parent";
+ resets = <&tegra_car 48>;
+ reset-names = "dsi";
+ status = "disabled";
+ };
+ };
+
+ ...
+};
diff --git a/Documentation/devicetree/bindings/gpu/samsung-g2d.txt b/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
new file mode 100644
index 000000000..c4f358daf
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/samsung-g2d.txt
@@ -0,0 +1,28 @@
+* Samsung 2D Graphics Accelerator
+
+Required properties:
+ - compatible : value should be one among the following:
+ (a) "samsung,s5pv210-g2d" for G2D IP present in S5PV210 & Exynos4210 SoC
+ (b) "samsung,exynos4212-g2d" for G2D IP present in Exynos4x12 SoCs
+ (c) "samsung,exynos5250-g2d" for G2D IP present in Exynos5250 SoC
+
+ - reg : Physical base address of the IP registers and length of memory
+ mapped region.
+
+ - interrupts : G2D interrupt number to the CPU.
+ - clocks : from common clock binding: handle to G2D clocks.
+ - clock-names : names of clocks listed in clocks property, in the same
+ order, depending on SoC type:
+ - for S5PV210 and Exynos4 based SoCs: "fimg2d" and
+ "sclk_fimg2d"
+ - for Exynos5250 SoC: "fimg2d".
+
+Example:
+ g2d@12800000 {
+ compatible = "samsung,s5pv210-g2d";
+ reg = <0x12800000 0x1000>;
+ interrupts = <0 89 0>;
+ clocks = <&clock 177>, <&clock 277>;
+ clock-names = "sclk_fimg2d", "fimg2d";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/gpu/samsung-rotator.txt b/Documentation/devicetree/bindings/gpu/samsung-rotator.txt
new file mode 100644
index 000000000..82cd1ed0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/samsung-rotator.txt
@@ -0,0 +1,27 @@
+* Samsung Image Rotator
+
+Required properties:
+ - compatible : value should be one of the following:
+ (a) "samsung,exynos4210-rotator" for Rotator IP in Exynos4210
+ (b) "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412
+ (c) "samsung,exynos5250-rotator" for Rotator IP in Exynos5250
+
+ - reg : Physical base address of the IP registers and length of memory
+ mapped region.
+
+ - interrupts : Interrupt specifier for rotator interrupt, according to format
+ specific to interrupt parent.
+
+ - clocks : Clock specifier for rotator clock, according to generic clock
+ bindings. (See Documentation/devicetree/bindings/clock/exynos*.txt)
+
+ - clock-names : Names of clocks. For exynos rotator, it should be "rotator".
+
+Example:
+ rotator@12810000 {
+ compatible = "samsung,exynos4210-rotator";
+ reg = <0x12810000 0x1000>;
+ interrupts = <0 83 0>;
+ clocks = <&clock 278>;
+ clock-names = "rotator";
+ };
diff --git a/Documentation/devicetree/bindings/gpu/st,stih4xx.txt b/Documentation/devicetree/bindings/gpu/st,stih4xx.txt
new file mode 100644
index 000000000..6b1d75f1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpu/st,stih4xx.txt
@@ -0,0 +1,243 @@
+STMicroelectronics stih4xx platforms
+
+- sti-vtg: video timing generator
+ Required properties:
+ - compatible: "st,vtg"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ Optional properties:
+ - interrupts : VTG interrupt number to the CPU.
+ - st,slave: phandle on a slave vtg
+
+- sti-vtac: video timing advanced inter dye communication Rx and TX
+ Required properties:
+ - compatible: "st,vtac-main" or "st,vtac-aux"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - clocks: from common clock binding: handle hardware IP needed clocks, the
+ number of clocks may depend of the SoC type.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: names of the clocks listed in clocks property in the same
+ order.
+
+- sti-display-subsystem: Master device for DRM sub-components
+ This device must be the parent of all the sub-components and is responsible
+ of bind them.
+ Required properties:
+ - compatible: "st,sti-display-subsystem"
+ - ranges: to allow probing of subdevices
+
+- sti-compositor: frame compositor engine
+ must be a child of sti-display-subsystem
+ Required properties:
+ - compatible: "st,stih<chip>-compositor"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - clocks: from common clock binding: handle hardware IP needed clocks, the
+ number of clocks may depend of the SoC type.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: names of the clocks listed in clocks property in the same
+ order.
+ - resets: resets to be used by the device
+ See ../reset/reset.txt for details.
+ - reset-names: names of the resets listed in resets property in the same
+ order.
+ - st,vtg: phandle(s) on vtg device (main and aux) nodes.
+
+- sti-tvout: video out hardware block
+ must be a child of sti-display-subsystem
+ Required properties:
+ - compatible: "st,stih<chip>-tvout"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - reg-names: names of the mapped memory regions listed in regs property in
+ the same order.
+ - resets: resets to be used by the device
+ See ../reset/reset.txt for details.
+ - reset-names: names of the resets listed in resets property in the same
+ order.
+ - ranges: to allow probing of subdevices
+
+- sti-hdmi: hdmi output block
+ must be a child of sti-tvout
+ Required properties:
+ - compatible: "st,stih<chip>-hdmi";
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - reg-names: names of the mapped memory regions listed in regs property in
+ the same order.
+ - interrupts : HDMI interrupt number to the CPU.
+ - interrupt-names: name of the interrupts listed in interrupts property in
+ the same order
+ - clocks: from common clock binding: handle hardware IP needed clocks, the
+ number of clocks may depend of the SoC type.
+ - clock-names: names of the clocks listed in clocks property in the same
+ order.
+ - ddc: phandle of an I2C controller used for DDC EDID probing
+
+sti-hda:
+ Required properties:
+ must be a child of sti-tvout
+ - compatible: "st,stih<chip>-hda"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - reg-names: names of the mapped memory regions listed in regs property in
+ the same order.
+ - clocks: from common clock binding: handle hardware IP needed clocks, the
+ number of clocks may depend of the SoC type.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: names of the clocks listed in clocks property in the same
+ order.
+
+sti-dvo:
+ Required properties:
+ must be a child of sti-tvout
+ - compatible: "st,stih<chip>-dvo"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - reg-names: names of the mapped memory regions listed in regs property in
+ the same order.
+ - clocks: from common clock binding: handle hardware IP needed clocks, the
+ number of clocks may depend of the SoC type.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: names of the clocks listed in clocks property in the same
+ order.
+ - pinctrl-0: pin control handle
+ - pinctrl-name: names of the pin control to use
+ - sti,panel: phandle of the panel connected to the DVO output
+
+sti-hqvdp:
+ must be a child of sti-display-subsystem
+ Required properties:
+ - compatible: "st,stih<chip>-hqvdp"
+ - reg: Physical base address of the IP registers and length of memory mapped region.
+ - clocks: from common clock binding: handle hardware IP needed clocks, the
+ number of clocks may depend of the SoC type.
+ See ../clocks/clock-bindings.txt for details.
+ - clock-names: names of the clocks listed in clocks property in the same
+ order.
+ - resets: resets to be used by the device
+ See ../reset/reset.txt for details.
+ - reset-names: names of the resets listed in resets property in the same
+ order.
+ - st,vtg: phandle on vtg main device node.
+
+Example:
+
+/ {
+ ...
+
+ vtg_main_slave: sti-vtg-main-slave@fe85A800 {
+ compatible = "st,vtg";
+ reg = <0xfe85A800 0x300>;
+ interrupts = <GIC_SPI 175 IRQ_TYPE_NONE>;
+ };
+
+ vtg_main: sti-vtg-main-master@fd348000 {
+ compatible = "st,vtg";
+ reg = <0xfd348000 0x400>;
+ st,slave = <&vtg_main_slave>;
+ };
+
+ vtg_aux_slave: sti-vtg-aux-slave@fd348400 {
+ compatible = "st,vtg";
+ reg = <0xfe858200 0x300>;
+ interrupts = <GIC_SPI 176 IRQ_TYPE_NONE>;
+ };
+
+ vtg_aux: sti-vtg-aux-master@fd348400 {
+ compatible = "st,vtg";
+ reg = <0xfd348400 0x400>;
+ st,slave = <&vtg_aux_slave>;
+ };
+
+
+ sti-vtac-rx-main@fee82800 {
+ compatible = "st,vtac-main";
+ reg = <0xfee82800 0x200>;
+ clock-names = "vtac";
+ clocks = <&clk_m_a2_div0 CLK_M_VTAC_MAIN_PHY>;
+ };
+
+ sti-vtac-rx-aux@fee82a00 {
+ compatible = "st,vtac-aux";
+ reg = <0xfee82a00 0x200>;
+ clock-names = "vtac";
+ clocks = <&clk_m_a2_div0 CLK_M_VTAC_AUX_PHY>;
+ };
+
+ sti-vtac-tx-main@fd349000 {
+ compatible = "st,vtac-main";
+ reg = <0xfd349000 0x200>, <0xfd320000 0x10000>;
+ clock-names = "vtac";
+ clocks = <&clk_s_a1_hs CLK_S_VTAC_TX_PHY>;
+ };
+
+ sti-vtac-tx-aux@fd349200 {
+ compatible = "st,vtac-aux";
+ reg = <0xfd349200 0x200>, <0xfd320000 0x10000>;
+ clock-names = "vtac";
+ clocks = <&clk_s_a1_hs CLK_S_VTAC_TX_PHY>;
+ };
+
+ sti-display-subsystem {
+ compatible = "st,sti-display-subsystem";
+ ranges;
+
+ sti-compositor@fd340000 {
+ compatible = "st,stih416-compositor";
+ reg = <0xfd340000 0x1000>;
+ clock-names = "compo_main", "compo_aux",
+ "pix_main", "pix_aux";
+ clocks = <&clk_m_a2_div1 CLK_M_COMPO_MAIN>, <&clk_m_a2_div1 CLK_M_COMPO_AUX>,
+ <&clockgen_c_vcc CLK_S_PIX_MAIN>, <&clockgen_c_vcc CLK_S_PIX_AUX>;
+ reset-names = "compo-main", "compo-aux";
+ resets = <&softreset STIH416_COMPO_M_SOFTRESET>, <&softreset STIH416_COMPO_A_SOFTRESET>;
+ st,vtg = <&vtg_main>, <&vtg_aux>;
+ };
+
+ sti-tvout@fe000000 {
+ compatible = "st,stih416-tvout";
+ reg = <0xfe000000 0x1000>, <0xfe85a000 0x400>, <0xfe830000 0x10000>;
+ reg-names = "tvout-reg", "hda-reg", "syscfg";
+ reset-names = "tvout";
+ resets = <&softreset STIH416_HDTVOUT_SOFTRESET>;
+ ranges;
+
+ sti-hdmi@fe85c000 {
+ compatible = "st,stih416-hdmi";
+ reg = <0xfe85c000 0x1000>, <0xfe830000 0x10000>;
+ reg-names = "hdmi-reg", "syscfg";
+ interrupts = <GIC_SPI 173 IRQ_TYPE_NONE>;
+ interrupt-names = "irq";
+ clock-names = "pix", "tmds", "phy", "audio";
+ clocks = <&clockgen_c_vcc CLK_S_PIX_HDMI>, <&clockgen_c_vcc CLK_S_TMDS_HDMI>, <&clockgen_c_vcc CLK_S_HDMI_REJECT_PLL>, <&clockgen_b1 CLK_S_PCM_0>;
+ };
+
+ sti-hda@fe85a000 {
+ compatible = "st,stih416-hda";
+ reg = <0xfe85a000 0x400>, <0xfe83085c 0x4>;
+ reg-names = "hda-reg", "video-dacs-ctrl";
+ clock-names = "pix", "hddac";
+ clocks = <&clockgen_c_vcc CLK_S_PIX_HD>, <&clockgen_c_vcc CLK_S_HDDAC>;
+ };
+
+ sti-dvo@8d00400 {
+ compatible = "st,stih407-dvo";
+ reg = <0x8d00400 0x200>;
+ reg-names = "dvo-reg";
+ clock-names = "dvo_pix", "dvo",
+ "main_parent", "aux_parent";
+ clocks = <&clk_s_d2_flexgen CLK_PIX_DVO>, <&clk_s_d2_flexgen CLK_DVO>,
+ <&clk_s_d2_quadfs 0>, <&clk_s_d2_quadfs 1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_dvo>;
+ sti,panel = <&panel_dvo>;
+ };
+ };
+
+ sti-hqvdp@9c000000 {
+ compatible = "st,stih407-hqvdp";
+ reg = <0x9C00000 0x100000>;
+ clock-names = "hqvdp", "pix_main";
+ clocks = <&clk_s_c0_flexgen CLK_MAIN_DISP>, <&clk_s_d2_flexgen CLK_PIX_MAIN_DISP>;
+ reset-names = "hqvdp";
+ resets = <&softreset STIH407_HDQVDP_SOFTRESET>;
+ st,vtg = <&vtg_main>;
+ };
+ };
+ ...
+};
diff --git a/Documentation/devicetree/bindings/graph.txt b/Documentation/devicetree/bindings/graph.txt
new file mode 100644
index 000000000..fcb1c6a47
--- /dev/null
+++ b/Documentation/devicetree/bindings/graph.txt
@@ -0,0 +1,129 @@
+Common bindings for device graphs
+
+General concept
+---------------
+
+The hierarchical organisation of the device tree is well suited to describe
+control flow to devices, but there can be more complex connections between
+devices that work together to form a logical compound device, following an
+arbitrarily complex graph.
+There already is a simple directed graph between devices tree nodes using
+phandle properties pointing to other nodes to describe connections that
+can not be inferred from device tree parent-child relationships. The device
+tree graph bindings described herein abstract more complex devices that can
+have multiple specifiable ports, each of which can be linked to one or more
+ports of other devices.
+
+These common bindings do not contain any information about the direction or
+type of the connections, they just map their existence. Specific properties
+may be described by specialized bindings depending on the type of connection.
+
+To see how this binding applies to video pipelines, for example, see
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+Here the ports describe data interfaces, and the links between them are
+the connecting data buses. A single port with multiple connections can
+correspond to multiple devices being connected to the same physical bus.
+
+Organisation of ports and endpoints
+-----------------------------------
+
+Ports are described by child 'port' nodes contained in the device node.
+Each port node contains an 'endpoint' subnode for each remote device port
+connected to this port. If a single port is connected to more than one
+remote device, an 'endpoint' child node must be provided for each link.
+If more than one port is present in a device node or there is more than one
+endpoint at a port, or a port node needs to be associated with a selected
+hardware interface, a common scheme using '#address-cells', '#size-cells'
+and 'reg' properties is used number the nodes.
+
+device {
+ ...
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ endpoint@0 {
+ reg = <0>;
+ ...
+ };
+ endpoint@1 {
+ reg = <1>;
+ ...
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ endpoint { ... };
+ };
+};
+
+All 'port' nodes can be grouped under an optional 'ports' node, which
+allows to specify #address-cells, #size-cells properties for the 'port'
+nodes independently from any other child device nodes a device might
+have.
+
+device {
+ ...
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ ...
+ endpoint@0 { ... };
+ endpoint@1 { ... };
+ };
+
+ port@1 { ... };
+ };
+};
+
+Links between endpoints
+-----------------------
+
+Each endpoint should contain a 'remote-endpoint' phandle property that points
+to the corresponding endpoint in the port of the remote device. In turn, the
+remote endpoint should contain a 'remote-endpoint' property. If it has one,
+it must not point to another than the local endpoint. Two endpoints with their
+'remote-endpoint' phandles pointing at each other form a link between the
+containing ports.
+
+device-1 {
+ port {
+ device_1_output: endpoint {
+ remote-endpoint = <&device_2_input>;
+ };
+ };
+};
+
+device-2 {
+ port {
+ device_2_input: endpoint {
+ remote-endpoint = <&device_1_output>;
+ };
+ };
+};
+
+
+Required properties
+-------------------
+
+If there is more than one 'port' or more than one 'endpoint' node or 'reg'
+property is present in port and/or endpoint nodes the following properties
+are required in a relevant parent node:
+
+ - #address-cells : number of cells required to define port/endpoint
+ identifier, should be 1.
+ - #size-cells : should be zero.
+
+Optional endpoint properties
+----------------------------
+
+- remote-endpoint: phandle to an 'endpoint' subnode of a remote device node.
+
diff --git a/Documentation/devicetree/bindings/hid/hid-over-i2c.txt b/Documentation/devicetree/bindings/hid/hid-over-i2c.txt
new file mode 100644
index 000000000..488edcb26
--- /dev/null
+++ b/Documentation/devicetree/bindings/hid/hid-over-i2c.txt
@@ -0,0 +1,28 @@
+* HID over I2C Device-Tree bindings
+
+HID over I2C provides support for various Human Interface Devices over the
+I2C bus. These devices can be for example touchpads, keyboards, touch screens
+or sensors.
+
+The specification has been written by Microsoft and is currently available here:
+http://msdn.microsoft.com/en-us/library/windows/hardware/hh852380.aspx
+
+If this binding is used, the kernel module i2c-hid will handle the communication
+with the device and the generic hid core layer will handle the protocol.
+
+Required properties:
+- compatible: must be "hid-over-i2c"
+- reg: i2c slave address
+- hid-descr-addr: HID descriptor address
+- interrupt-parent: the phandle for the interrupt controller
+- interrupts: interrupt line
+
+Example:
+
+ i2c-hid-dev@2c {
+ compatible = "hid-over-i2c";
+ reg = <0x2c>;
+ hid-descr-addr = <0x0020>;
+ interrupt-parent = <&gpx3>;
+ interrupts = <3 2>;
+ };
diff --git a/Documentation/devicetree/bindings/hsi/client-devices.txt b/Documentation/devicetree/bindings/hsi/client-devices.txt
new file mode 100644
index 000000000..104c9a3e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/hsi/client-devices.txt
@@ -0,0 +1,44 @@
+Each HSI port is supposed to have one child node, which
+symbols the remote device connected to the HSI port. The
+following properties are standardized for HSI clients:
+
+Required HSI configuration properties:
+
+- hsi-channel-ids: A list of channel ids
+
+- hsi-rx-mode: Receiver Bit transmission mode ("stream" or "frame")
+- hsi-tx-mode: Transmitter Bit transmission mode ("stream" or "frame")
+- hsi-mode: May be used instead hsi-rx-mode and hsi-tx-mode if
+ the transmission mode is the same for receiver and
+ transmitter
+- hsi-speed-kbps: Max bit transmission speed in kbit/s
+- hsi-flow: RX flow type ("synchronized" or "pipeline")
+- hsi-arb-mode: Arbitration mode for TX frame ("round-robin", "priority")
+
+Optional HSI configuration properties:
+
+- hsi-channel-names: A list with one name per channel specified in the
+ hsi-channel-ids property
+
+
+Device Tree node example for an HSI client:
+
+hsi-controller {
+ hsi-port {
+ modem: hsi-client {
+ compatible = "nokia,n900-modem";
+
+ hsi-channel-ids = <0>, <1>, <2>, <3>;
+ hsi-channel-names = "mcsaab-control",
+ "speech-control",
+ "speech-data",
+ "mcsaab-data";
+ hsi-speed-kbps = <55000>;
+ hsi-mode = "frame";
+ hsi-flow = "synchronized";
+ hsi-arb-mode = "round-robin";
+
+ /* more client specific properties */
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/hsi/nokia-modem.txt b/Documentation/devicetree/bindings/hsi/nokia-modem.txt
new file mode 100644
index 000000000..8a9797804
--- /dev/null
+++ b/Documentation/devicetree/bindings/hsi/nokia-modem.txt
@@ -0,0 +1,57 @@
+Nokia modem client bindings
+
+The Nokia modem HSI client follows the common HSI client binding
+and inherits all required properties. The following additional
+properties are needed by the Nokia modem HSI client:
+
+Required properties:
+- compatible: Should be one of
+ "nokia,n900-modem"
+- hsi-channel-names: Should contain the following strings
+ "mcsaab-control"
+ "speech-control"
+ "speech-data"
+ "mcsaab-data"
+- gpios: Should provide a GPIO handler for each GPIO listed in
+ gpio-names
+- gpio-names: Should contain the following strings
+ "cmt_apeslpx"
+ "cmt_rst_rq"
+ "cmt_en"
+ "cmt_rst"
+ "cmt_bsi"
+- interrupts: Should be IRQ handle for modem's reset indication
+
+Example:
+
+&ssi_port {
+ modem: hsi-client {
+ compatible = "nokia,n900-modem";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&modem_pins>;
+
+ hsi-channel-ids = <0>, <1>, <2>, <3>;
+ hsi-channel-names = "mcsaab-control",
+ "speech-control",
+ "speech-data",
+ "mcsaab-data";
+ hsi-speed-kbps = <55000>;
+ hsi-mode = "frame";
+ hsi-flow = "synchronized";
+ hsi-arb-mode = "round-robin";
+
+ interrupts-extended = <&gpio3 8 IRQ_TYPE_EDGE_FALLING>; /* 72 */
+
+ gpios = <&gpio3 6 GPIO_ACTIVE_HIGH>, /* 70 */
+ <&gpio3 9 GPIO_ACTIVE_HIGH>, /* 73 */
+ <&gpio3 10 GPIO_ACTIVE_HIGH>, /* 74 */
+ <&gpio3 11 GPIO_ACTIVE_HIGH>, /* 75 */
+ <&gpio5 29 GPIO_ACTIVE_HIGH>; /* 157 */
+ gpio-names = "cmt_apeslpx",
+ "cmt_rst_rq",
+ "cmt_en",
+ "cmt_rst",
+ "cmt_bsi";
+ };
+};
diff --git a/Documentation/devicetree/bindings/hsi/omap-ssi.txt b/Documentation/devicetree/bindings/hsi/omap-ssi.txt
new file mode 100644
index 000000000..f26625e42
--- /dev/null
+++ b/Documentation/devicetree/bindings/hsi/omap-ssi.txt
@@ -0,0 +1,97 @@
+OMAP SSI controller bindings
+
+OMAP Synchronous Serial Interface (SSI) controller implements a legacy
+variant of MIPI's High Speed Synchronous Serial Interface (HSI).
+
+Required properties:
+- compatible: Should include "ti,omap3-ssi".
+- reg-names: Contains the values "sys" and "gdd" (in this order).
+- reg: Contains a matching register specifier for each entry
+ in reg-names.
+- interrupt-names: Contains the value "gdd_mpu".
+- interrupts: Contains matching interrupt information for each entry
+ in interrupt-names.
+- ranges: Represents the bus address mapping between the main
+ controller node and the child nodes below.
+- clock-names: Must include the following entries:
+ "ssi_ssr_fck": The OMAP clock of that name
+ "ssi_sst_fck": The OMAP clock of that name
+ "ssi_ick": The OMAP clock of that name
+- clocks: Contains a matching clock specifier for each entry in
+ clock-names.
+- #address-cells: Should be set to <1>
+- #size-cells: Should be set to <1>
+
+Each port is represented as a sub-node of the ti,omap3-ssi device.
+
+Required Port sub-node properties:
+- compatible: Should be set to the following value
+ ti,omap3-ssi-port (applicable to OMAP34xx devices)
+- reg-names: Contains the values "tx" and "rx" (in this order).
+- reg: Contains a matching register specifier for each entry
+ in reg-names.
+- interrupt-parent Should be a phandle for the interrupt controller
+- interrupts: Should contain interrupt specifiers for mpu interrupts
+ 0 and 1 (in this order).
+- ti,ssi-cawake-gpio: Defines which GPIO pin is used to signify CAWAKE
+ events for the port. This is an optional board-specific
+ property. If it's missing the port will not be
+ enabled.
+
+Example for Nokia N900:
+
+ssi-controller@48058000 {
+ compatible = "ti,omap3-ssi";
+
+ /* needed until hwmod is updated to use the compatible string */
+ ti,hwmods = "ssi";
+
+ reg = <0x48058000 0x1000>,
+ <0x48059000 0x1000>;
+ reg-names = "sys",
+ "gdd";
+
+ interrupts = <55>;
+ interrupt-names = "gdd_mpu";
+
+ clocks = <&ssi_ssr_fck>,
+ <&ssi_sst_fck>,
+ <&ssi_ick>;
+ clock-names = "ssi_ssr_fck",
+ "ssi_sst_fck",
+ "ssi_ick";
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ ssi-port@4805a000 {
+ compatible = "ti,omap3-ssi-port";
+
+ reg = <0x4805a000 0x800>,
+ <0x4805a800 0x800>;
+ reg-names = "tx",
+ "rx";
+
+ interrupt-parent = <&intc>;
+ interrupts = <67>,
+ <68>;
+
+ ti,ssi-cawake-gpio = <&gpio5 23 GPIO_ACTIVE_HIGH>; /* 151 */
+ }
+
+ ssi-port@4805a000 {
+ compatible = "ti,omap3-ssi-port";
+
+ reg = <0x4805b000 0x800>,
+ <0x4805b800 0x800>;
+ reg-names = "tx",
+ "rx";
+
+ interrupt-parent = <&intc>;
+ interrupts = <69>,
+ <70>;
+
+ status = "disabled"; /* second port is not used on N900 */
+ }
+}
diff --git a/Documentation/devicetree/bindings/hwmon/ads1015.txt b/Documentation/devicetree/bindings/hwmon/ads1015.txt
new file mode 100644
index 000000000..918a507d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ads1015.txt
@@ -0,0 +1,73 @@
+ADS1015 (I2C)
+
+This device is a 12-bit A-D converter with 4 inputs.
+
+The inputs can be used single ended or in certain differential combinations.
+
+For configuration all possible combinations are mapped to 8 channels:
+ 0: Voltage over AIN0 and AIN1.
+ 1: Voltage over AIN0 and AIN3.
+ 2: Voltage over AIN1 and AIN3.
+ 3: Voltage over AIN2 and AIN3.
+ 4: Voltage over AIN0 and GND.
+ 5: Voltage over AIN1 and GND.
+ 6: Voltage over AIN2 and GND.
+ 7: Voltage over AIN3 and GND.
+
+Each channel can be configured individually:
+ - pga is the programmable gain amplifier (values are full scale)
+ 0: +/- 6.144 V
+ 1: +/- 4.096 V
+ 2: +/- 2.048 V (default)
+ 3: +/- 1.024 V
+ 4: +/- 0.512 V
+ 5: +/- 0.256 V
+ - data_rate in samples per second
+ 0: 128
+ 1: 250
+ 2: 490
+ 3: 920
+ 4: 1600 (default)
+ 5: 2400
+ 6: 3300
+
+1) The /ads1015 node
+
+ Required properties:
+
+ - compatible : must be "ti,ads1015"
+ - reg : I2C bus address of the device
+ - #address-cells : must be <1>
+ - #size-cells : must be <0>
+
+ The node contains child nodes for each channel that the platform uses.
+
+ Example ADS1015 node:
+
+ ads1015@49 {
+ compatible = "ti,ads1015";
+ reg = <0x49>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ [ child node definitions... ]
+ }
+
+2) channel nodes
+
+ Required properties:
+
+ - reg : the channel number
+
+ Optional properties:
+
+ - ti,gain : the programmable gain amplifier setting
+ - ti,datarate : the converter data rate
+
+ Example ADS1015 channel node:
+
+ channel@4 {
+ reg = <4>;
+ ti,gain = <3>;
+ ti,datarate = <5>;
+ };
diff --git a/Documentation/devicetree/bindings/hwmon/g762.txt b/Documentation/devicetree/bindings/hwmon/g762.txt
new file mode 100644
index 000000000..25cc6d8ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/g762.txt
@@ -0,0 +1,47 @@
+GMT G762/G763 PWM Fan controller
+
+Required node properties:
+
+ - "compatible": must be either "gmt,g762" or "gmt,g763"
+ - "reg": I2C bus address of the device
+ - "clocks": a fixed clock providing input clock frequency
+ on CLK pin of the chip.
+
+Optional properties:
+
+ - "fan_startv": fan startup voltage. Accepted values are 0, 1, 2 and 3.
+ The higher the more.
+
+ - "pwm_polarity": pwm polarity. Accepted values are 0 (positive duty)
+ and 1 (negative duty).
+
+ - "fan_gear_mode": fan gear mode. Supported values are 0, 1 and 2.
+
+If an optional property is not set in .dts file, then current value is kept
+unmodified (e.g. u-boot installed value).
+
+Additional information on operational parameters for the device is available
+in Documentation/hwmon/g762. A detailed datasheet for the device is available
+at http://natisbad.org/NAS/refs/GMT_EDS-762_763-080710-0.2.pdf.
+
+Example g762 node:
+
+ clocks {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ g762_clk: fixedclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <8192>;
+ }
+ }
+
+ g762: g762@3e {
+ compatible = "gmt,g762";
+ reg = <0x3e>;
+ clocks = <&g762_clk>
+ fan_gear_mode = <0>; /* chip default */
+ fan_startv = <1>; /* chip default */
+ pwm_polarity = <0>; /* chip default */
+ };
diff --git a/Documentation/devicetree/bindings/hwmon/ibmpowernv.txt b/Documentation/devicetree/bindings/hwmon/ibmpowernv.txt
new file mode 100644
index 000000000..f93242be6
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ibmpowernv.txt
@@ -0,0 +1,23 @@
+IBM POWERNV platform sensors
+----------------------------
+
+Required node properties:
+- compatible: must be one of
+ "ibm,opal-sensor-cooling-fan"
+ "ibm,opal-sensor-amb-temp"
+ "ibm,opal-sensor-power-supply"
+ "ibm,opal-sensor-power"
+- sensor-id: an opaque id provided by the firmware to the kernel, identifies a
+ given sensor and its attribute data
+
+Example sensors node:
+
+cooling-fan#8-data {
+ sensor-id = <0x7052107>;
+ compatible = "ibm,opal-sensor-cooling-fan";
+};
+
+amb-temp#1-thrs {
+ sensor-id = <0x5096000>;
+ compatible = "ibm,opal-sensor-amb-temp";
+};
diff --git a/Documentation/devicetree/bindings/hwmon/lm90.txt b/Documentation/devicetree/bindings/hwmon/lm90.txt
new file mode 100644
index 000000000..e8632486b
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/lm90.txt
@@ -0,0 +1,44 @@
+* LM90 series thermometer.
+
+Required node properties:
+- compatible: manufacturer and chip name, one of
+ "adi,adm1032"
+ "adi,adt7461"
+ "adi,adt7461a"
+ "gmt,g781"
+ "national,lm90"
+ "national,lm86"
+ "national,lm89"
+ "national,lm99"
+ "dallas,max6646"
+ "dallas,max6647"
+ "dallas,max6649"
+ "dallas,max6657"
+ "dallas,max6658"
+ "dallas,max6659"
+ "dallas,max6680"
+ "dallas,max6681"
+ "dallas,max6695"
+ "dallas,max6696"
+ "onnn,nct1008"
+ "winbond,w83l771"
+ "nxp,sa56004"
+
+- reg: I2C bus address of the device
+
+- vcc-supply: vcc regulator for the supply voltage.
+
+Optional properties:
+- interrupts: Contains a single interrupt specifier which describes the
+ LM90 "-ALERT" pin output.
+ See interrupt-controller/interrupts.txt for the format.
+
+Example LM90 node:
+
+temp-sensor {
+ compatible = "onnn,nct1008";
+ reg = <0x4c>;
+ vcc-supply = <&palmas_ldo6_reg>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(O, 4) IRQ_TYPE_LEVEL_LOW>;
+}
diff --git a/Documentation/devicetree/bindings/hwmon/ltc2978.txt b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
new file mode 100644
index 000000000..ed2f09dc2
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ltc2978.txt
@@ -0,0 +1,39 @@
+ltc2978
+
+Required properties:
+- compatible: should contain one of:
+ * "lltc,ltc2974"
+ * "lltc,ltc2977"
+ * "lltc,ltc2978"
+ * "lltc,ltc3880"
+ * "lltc,ltc3883"
+ * "lltc,ltm4676"
+- reg: I2C slave address
+
+Optional properties:
+- regulators: A node that houses a sub-node for each regulator controlled by
+ the device. Each sub-node is identified using the node's name, with valid
+ values listed below. The content of each sub-node is defined by the
+ standard binding for regulators; see regulator.txt.
+
+Valid names of regulators depend on number of supplies supported per device:
+ * ltc2974 : vout0 - vout3
+ * ltc2977 : vout0 - vout7
+ * ltc2978 : vout0 - vout7
+ * ltc3880 : vout0 - vout1
+ * ltc3883 : vout0
+ * ltm4676 : vout0 - vout1
+
+Example:
+ltc2978@5e {
+ compatible = "lltc,ltc2978";
+ reg = <0x5e>;
+ regulators {
+ vout0 {
+ regulator-name = "FPGA-2.5V";
+ };
+ vout2 {
+ regulator-name = "FPGA-1.5V";
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
new file mode 100644
index 000000000..fcca8e744
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/ntc_thermistor.txt
@@ -0,0 +1,41 @@
+NTC Thermistor hwmon sensors
+-------------------------------
+
+Requires node properties:
+- "compatible" value : one of
+ "epcos,b57330v2103"
+ "murata,ncp15wb473"
+ "murata,ncp18wb473"
+ "murata,ncp21wb473"
+ "murata,ncp03wb473"
+ "murata,ncp15wl333"
+
+/* Usage of vendor name "ntc" is deprecated */
+<DEPRECATED> "ntc,ncp15wb473"
+<DEPRECATED> "ntc,ncp18wb473"
+<DEPRECATED> "ntc,ncp21wb473"
+<DEPRECATED> "ntc,ncp03wb473"
+<DEPRECATED> "ntc,ncp15wl333"
+
+- "pullup-uv" Pull up voltage in micro volts
+- "pullup-ohm" Pull up resistor value in ohms
+- "pulldown-ohm" Pull down resistor value in ohms
+- "connected-positive" Always ON, If not specified.
+ Status change is possible.
+- "io-channels" Channel node of ADC to be used for
+ conversion.
+
+Optional node properties:
+- "#thermal-sensor-cells" Used to expose itself to thermal fw.
+
+Read more about iio bindings at
+ Documentation/devicetree/bindings/iio/iio-bindings.txt
+
+Example:
+ ncp15wb473@0 {
+ compatible = "murata,ncp15wb473";
+ pullup-uv = <1800000>;
+ pullup-ohm = <47000>;
+ pulldown-ohm = <0>;
+ io-channels = <&adc 3>;
+ };
diff --git a/Documentation/devicetree/bindings/hwmon/pwm-fan.txt b/Documentation/devicetree/bindings/hwmon/pwm-fan.txt
new file mode 100644
index 000000000..610757ce4
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/pwm-fan.txt
@@ -0,0 +1,12 @@
+Bindings for a fan connected to the PWM lines
+
+Required properties:
+- compatible : "pwm-fan"
+- pwms : the PWM that is used to control the PWM fan
+
+Example:
+ pwm-fan {
+ compatible = "pwm-fan";
+ status = "okay";
+ pwms = <&pwm 0 10000 0>;
+ };
diff --git a/Documentation/devicetree/bindings/hwmon/vexpress.txt b/Documentation/devicetree/bindings/hwmon/vexpress.txt
new file mode 100644
index 000000000..9c27ed694
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwmon/vexpress.txt
@@ -0,0 +1,23 @@
+Versatile Express hwmon sensors
+-------------------------------
+
+Requires node properties:
+- "compatible" value : one of
+ "arm,vexpress-volt"
+ "arm,vexpress-amp"
+ "arm,vexpress-temp"
+ "arm,vexpress-power"
+ "arm,vexpress-energy"
+- "arm,vexpress-sysreg,func" when controlled via vexpress-sysreg
+ (see Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+ for more details)
+
+Optional node properties:
+- label : string describing the monitored value
+
+Example:
+ energy@0 {
+ compatible = "arm,vexpress-energy";
+ arm,vexpress-sysreg,func = <13 0>;
+ label = "A15 Jcore";
+ };
diff --git a/Documentation/devicetree/bindings/hwrng/atmel-trng.txt b/Documentation/devicetree/bindings/hwrng/atmel-trng.txt
new file mode 100644
index 000000000..4ac5aaa2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/atmel-trng.txt
@@ -0,0 +1,16 @@
+Atmel TRNG (True Random Number Generator) block
+
+Required properties:
+- compatible : Should be "atmel,at91sam9g45-trng"
+- reg : Offset and length of the register set of this block
+- interrupts : the interrupt number for the TRNG block
+- clocks: should contain the TRNG clk source
+
+Example:
+
+trng@fffcc000 {
+ compatible = "atmel,at91sam9g45-trng";
+ reg = <0xfffcc000 0x4000>;
+ interrupts = <6 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&trng_clk>;
+};
diff --git a/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt
new file mode 100644
index 000000000..e25a45666
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/brcm,iproc-rng200.txt
@@ -0,0 +1,12 @@
+HWRNG support for the iproc-rng200 driver
+
+Required properties:
+- compatible : "brcm,iproc-rng200"
+- reg : base address and size of control register block
+
+Example:
+
+rng {
+ compatible = "brcm,iproc-rng200";
+ reg = <0x18032000 0x28>;
+};
diff --git a/Documentation/devicetree/bindings/hwrng/omap_rng.txt b/Documentation/devicetree/bindings/hwrng/omap_rng.txt
new file mode 100644
index 000000000..6a62acd86
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/omap_rng.txt
@@ -0,0 +1,22 @@
+OMAP SoC HWRNG Module
+
+Required properties:
+
+- compatible : Should contain entries for this and backward compatible
+ RNG versions:
+ - "ti,omap2-rng" for OMAP2.
+ - "ti,omap4-rng" for OMAP4, OMAP5 and AM33XX.
+ Note that these two versions are incompatible.
+- ti,hwmods: Name of the hwmod associated with the RNG module
+- reg : Offset and length of the register set for the module
+- interrupts : the interrupt number for the RNG module.
+ Only used for "ti,omap4-rng".
+
+Example:
+/* AM335x */
+rng: rng@48310000 {
+ compatible = "ti,omap4-rng";
+ ti,hwmods = "rng";
+ reg = <0x48310000 0x2000>;
+ interrupts = <111>;
+};
diff --git a/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt
new file mode 100644
index 000000000..6616d1586
--- /dev/null
+++ b/Documentation/devicetree/bindings/hwrng/timeriomem_rng.txt
@@ -0,0 +1,18 @@
+HWRNG support for the timeriomem_rng driver
+
+Required properties:
+- compatible : "timeriomem_rng"
+- reg : base address to sample from
+- period : wait time in microseconds to use between samples
+
+N.B. currently 'reg' must be four bytes wide and aligned
+
+Example:
+
+hwrng@44 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "timeriomem_rng";
+ reg = <0x44 0x04>;
+ period = <1000000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
new file mode 100644
index 000000000..e9de37567
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/brcm,bcm2835-i2c.txt
@@ -0,0 +1,20 @@
+Broadcom BCM2835 I2C controller
+
+Required properties:
+- compatible : Should be "brcm,bcm2835-i2c".
+- reg: Should contain register location and length.
+- interrupts: Should contain interrupt.
+- clocks : The clock feeding the I2C controller.
+
+Recommended properties:
+- clock-frequency : desired I2C bus clock frequency in Hz.
+
+Example:
+
+i2c@20205000 {
+ compatible = "brcm,bcm2835-i2c";
+ reg = <0x7e205000 0x1000>;
+ interrupts = <2 21>;
+ clocks = <&clk_i2c>;
+ clock-frequency = <100000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt b/Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt
new file mode 100644
index 000000000..81f982ccc
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/brcm,iproc-i2c.txt
@@ -0,0 +1,37 @@
+Broadcom iProc I2C controller
+
+Required properties:
+
+- compatible:
+ Must be "brcm,iproc-i2c"
+
+- reg:
+ Define the base and range of the I/O address space that contain the iProc
+ I2C controller registers
+
+- interrupts:
+ Should contain the I2C interrupt
+
+- clock-frequency:
+ This is the I2C bus clock. Need to be either 100000 or 400000
+
+- #address-cells:
+ Always 1 (for I2C addresses)
+
+- #size-cells:
+ Always 0
+
+Example:
+ i2c0: i2c@18008000 {
+ compatible = "brcm,iproc-i2c";
+ reg = <0x18008000 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 85 IRQ_TYPE_NONE>;
+ clock-frequency = <100000>;
+
+ codec: wm8750@1a {
+ compatible = "wlf,wm8750";
+ reg = <0x1a>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/brcm,kona-i2c.txt b/Documentation/devicetree/bindings/i2c/brcm,kona-i2c.txt
new file mode 100644
index 000000000..1b87b741f
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/brcm,kona-i2c.txt
@@ -0,0 +1,35 @@
+Broadcom Kona Family I2C
+=========================
+
+This I2C controller is used in the following Broadcom SoCs:
+
+ BCM11130
+ BCM11140
+ BCM11351
+ BCM28145
+ BCM28155
+
+Required Properties
+-------------------
+- compatible: "brcm,bcm11351-i2c", "brcm,kona-i2c"
+- reg: Physical base address and length of controller registers
+- interrupts: The interrupt number used by the controller
+- clocks: clock specifier for the kona i2c external clock
+- clock-frequency: The I2C bus frequency in Hz
+- #address-cells: Should be <1>
+- #size-cells: Should be <0>
+
+Refer to clocks/clock-bindings.txt for generic clock consumer
+properties.
+
+Example:
+
+i2c@3e016000 {
+ compatible = "brcm,bcm11351-i2c","brcm,kona-i2c";
+ reg = <0x3e016000 0x80>;
+ interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&bsc1_clk>;
+ clock-frequency = <400000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt
new file mode 100644
index 000000000..bfeabb843
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-arb-gpio-challenge.txt
@@ -0,0 +1,86 @@
+GPIO-based I2C Arbitration Using a Challenge & Response Mechanism
+=================================================================
+This uses GPIO lines and a challenge & response mechanism to arbitrate who is
+the master of an I2C bus in a multimaster situation.
+
+In many cases using GPIOs to arbitrate is not needed and a design can use
+the standard I2C multi-master rules. Using GPIOs is generally useful in
+the case where there is a device on the bus that has errata and/or bugs
+that makes standard multimaster mode not feasible.
+
+Note that this scheme works well enough but has some downsides:
+* It is nonstandard (not using standard I2C multimaster)
+* Having two masters on a bus in general makes it relatively hard to debug
+ problems (hard to tell if i2c issues were caused by one master, another, or
+ some device on the bus).
+
+
+Algorithm:
+
+All masters on the bus have a 'bus claim' line which is an output that the
+others can see. These are all active low with pull-ups enabled. We'll
+describe these lines as:
+
+- OUR_CLAIM: output from us signaling to other hosts that we want the bus
+- THEIR_CLAIMS: output from others signaling that they want the bus
+
+The basic algorithm is to assert your line when you want the bus, then make
+sure that the other side doesn't want it also. A detailed explanation is best
+done with an example.
+
+Let's say we want to claim the bus. We:
+1. Assert OUR_CLAIM.
+2. Waits a little bit for the other sides to notice (slew time, say 10
+ microseconds).
+3. Check THEIR_CLAIMS. If none are asserted then the we have the bus and we are
+ done.
+4. Otherwise, wait for a few milliseconds and see if THEIR_CLAIMS are released.
+5. If not, back off, release the claim and wait for a few more milliseconds.
+6. Go back to 1 (until retry time has expired).
+
+
+Required properties:
+- compatible: i2c-arb-gpio-challenge
+- our-claim-gpio: The GPIO that we use to claim the bus.
+- their-claim-gpios: The GPIOs that the other sides use to claim the bus.
+ Note that some implementations may only support a single other master.
+- Standard I2C mux properties. See mux.txt in this directory.
+- Single I2C child bus node at reg 0. See mux.txt in this directory.
+
+Optional properties:
+- slew-delay-us: microseconds to wait for a GPIO to go high. Default is 10 us.
+- wait-retry-us: we'll attempt another claim after this many microseconds.
+ Default is 3000 us.
+- wait-free-us: we'll give up after this many microseconds. Default is 50000 us.
+
+
+Example:
+ i2c@12CA0000 {
+ compatible = "acme,some-i2c-device";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ i2c-arbitrator {
+ compatible = "i2c-arb-gpio-challenge";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c-parent = <&{/i2c@12CA0000}>;
+
+ our-claim-gpio = <&gpf0 3 1>;
+ their-claim-gpios = <&gpe0 4 1>;
+ slew-delay-us = <10>;
+ wait-retry-us = <3000>;
+ wait-free-us = <50000>;
+
+ i2c@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c@52 {
+ // Normal I2C device
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-at91.txt b/Documentation/devicetree/bindings/i2c/i2c-at91.txt
new file mode 100644
index 000000000..388f0a275
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-at91.txt
@@ -0,0 +1,34 @@
+I2C for Atmel platforms
+
+Required properties :
+- compatible : Must be "atmel,at91rm9200-i2c", "atmel,at91sam9261-i2c",
+ "atmel,at91sam9260-i2c", "atmel,at91sam9g20-i2c", "atmel,at91sam9g10-i2c"
+ or "atmel,at91sam9x5-i2c"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: interrupt number to the cpu.
+- #address-cells = <1>;
+- #size-cells = <0>;
+- clocks: phandles to input clocks.
+
+Optional properties:
+- clock-frequency: Desired I2C bus frequency in Hz, otherwise defaults to 100000
+- Child nodes conforming to i2c bus binding
+
+Examples :
+
+i2c0: i2c@fff84000 {
+ compatible = "atmel,at91sam9g20-i2c";
+ reg = <0xfff84000 0x100>;
+ interrupts = <12 4 6>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&twi0_clk>;
+ clock-frequency = <400000>;
+
+ 24c512@50 {
+ compatible = "24c512";
+ reg = <0x50>;
+ pagesize = <128>;
+ }
+}
diff --git a/Documentation/devicetree/bindings/i2c/i2c-axxia.txt b/Documentation/devicetree/bindings/i2c/i2c-axxia.txt
new file mode 100644
index 000000000..2296d782b
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-axxia.txt
@@ -0,0 +1,30 @@
+LSI Axxia I2C
+
+Required properties :
+- compatible : Must be "lsi,api2c"
+- reg : Offset and length of the register set for the device
+- interrupts : the interrupt specifier
+- #address-cells : Must be <1>;
+- #size-cells : Must be <0>;
+- clock-names : Must contain "i2c".
+- clocks: Must contain an entry for each name in clock-names. See the common
+ clock bindings.
+
+Optional properties :
+- clock-frequency : Desired I2C bus clock frequency in Hz. If not specified,
+ the default 100 kHz frequency will be used. As only Normal and Fast modes
+ are supported, possible values are 100000 and 400000.
+
+Example :
+
+i2c@02010084000 {
+ compatible = "lsi,api2c";
+ device_type = "i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x20 0x10084000 0x00 0x1000>;
+ interrupts = <0 19 4>;
+ clocks = <&clk_per>;
+ clock-names = "i2c";
+ clock-frequency = <400000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-cadence.txt b/Documentation/devicetree/bindings/i2c/i2c-cadence.txt
new file mode 100644
index 000000000..7cb0b5608
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-cadence.txt
@@ -0,0 +1,24 @@
+Binding for the Cadence I2C controller
+
+Required properties:
+ - reg: Physical base address and size of the controller's register area.
+ - compatible: Compatibility string. Must be 'cdns,i2c-r1p10'.
+ - clocks: Input clock specifier. Refer to common clock bindings.
+ - interrupts: Interrupt specifier. Refer to interrupt bindings.
+ - #address-cells: Should be 1.
+ - #size-cells: Should be 0.
+
+Optional properties:
+ - clock-frequency: Desired operating frequency, in Hz, of the bus.
+ - clock-names: Input clock name, should be 'pclk'.
+
+Example:
+ i2c@e0004000 {
+ compatible = "cdns,i2c-r1p10";
+ clocks = <&clkc 38>;
+ interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
+ reg = <0xe0004000 0x1000>;
+ clock-frequency = <400000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
new file mode 100644
index 000000000..8ce9cd285
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-cbus-gpio.txt
@@ -0,0 +1,27 @@
+Device tree bindings for i2c-cbus-gpio driver
+
+Required properties:
+ - compatible = "i2c-cbus-gpio";
+ - gpios: clk, dat, sel
+ - #address-cells = <1>;
+ - #size-cells = <0>;
+
+Optional properties:
+ - child nodes conforming to i2c bus binding
+
+Example:
+
+i2c@0 {
+ compatible = "i2c-cbus-gpio";
+ gpios = <&gpio 66 0 /* clk */
+ &gpio 65 0 /* dat */
+ &gpio 64 0 /* sel */
+ >;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ retu-mfd: retu@1 {
+ compatible = "retu-mfd";
+ reg = <0x1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-cros-ec-tunnel.txt b/Documentation/devicetree/bindings/i2c/i2c-cros-ec-tunnel.txt
new file mode 100644
index 000000000..898f030eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-cros-ec-tunnel.txt
@@ -0,0 +1,39 @@
+I2C bus that tunnels through the ChromeOS EC (cros-ec)
+======================================================
+On some ChromeOS board designs we've got a connection to the EC (embedded
+controller) but no direct connection to some devices on the other side of
+the EC (like a battery and PMIC). To get access to those devices we need
+to tunnel our i2c commands through the EC.
+
+The node for this device should be under a cros-ec node like google,cros-ec-spi
+or google,cros-ec-i2c.
+
+
+Required properties:
+- compatible: google,cros-ec-i2c-tunnel
+- google,remote-bus: The EC bus we'd like to talk to.
+
+Optional child nodes:
+- One node per I2C device connected to the tunnelled I2C bus.
+
+
+Example:
+ cros-ec@0 {
+ compatible = "google,cros-ec-spi";
+
+ ...
+
+ i2c-tunnel {
+ compatible = "google,cros-ec-i2c-tunnel";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ google,remote-bus = <0>;
+
+ battery: sbs-battery@b {
+ compatible = "sbs,sbs-battery";
+ reg = <0xb>;
+ sbs,poll-retry-count = <1>;
+ };
+ };
+ }
diff --git a/Documentation/devicetree/bindings/i2c/i2c-davinci.txt b/Documentation/devicetree/bindings/i2c/i2c-davinci.txt
new file mode 100644
index 000000000..a4e1cbc81
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-davinci.txt
@@ -0,0 +1,31 @@
+* Texas Instruments Davinci I2C
+
+This file provides information, what the device node for the
+davinci i2c interface contain.
+
+Required properties:
+- compatible: "ti,davinci-i2c";
+- reg : Offset and length of the register set for the device
+
+Recommended properties :
+- interrupts : standard interrupt property.
+- clock-frequency : desired I2C bus clock frequency in Hz.
+- ti,has-pfunc: boolean; if defined, it indicates that SoC supports PFUNC
+ registers. PFUNC registers allow to switch I2C pins to function as
+ GPIOs, so they can by toggled manually.
+
+Example (enbw_cmc board):
+ i2c@1c22000 {
+ compatible = "ti,davinci-i2c";
+ reg = <0x22000 0x1000>;
+ clock-frequency = <100000>;
+ interrupts = <15>;
+ interrupt-parent = <&intc>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ dtt@48 {
+ compatible = "national,lm75";
+ reg = <0x48>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-designware.txt b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
new file mode 100644
index 000000000..fee26dc3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-designware.txt
@@ -0,0 +1,45 @@
+* Synopsys DesignWare I2C
+
+Required properties :
+
+ - compatible : should be "snps,designware-i2c"
+ - reg : Offset and length of the register set for the device
+ - interrupts : <IRQ> where IRQ is the interrupt number.
+
+Recommended properties :
+
+ - clock-frequency : desired I2C bus clock frequency in Hz.
+
+Optional properties :
+ - i2c-sda-hold-time-ns : should contain the SDA hold time in nanoseconds.
+ This option is only supported in hardware blocks version 1.11a or newer.
+
+ - i2c-scl-falling-time-ns : should contain the SCL falling time in nanoseconds.
+ This value which is by default 300ns is used to compute the tLOW period.
+
+ - i2c-sda-falling-time-ns : should contain the SDA falling time in nanoseconds.
+ This value which is by default 300ns is used to compute the tHIGH period.
+
+Example :
+
+ i2c@f0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,designware-i2c";
+ reg = <0xf0000 0x1000>;
+ interrupts = <11>;
+ clock-frequency = <400000>;
+ };
+
+ i2c@1120000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,designware-i2c";
+ reg = <0x1120000 0x1000>;
+ interrupt-parent = <&ictl>;
+ interrupts = <12 1>;
+ clock-frequency = <400000>;
+ i2c-sda-hold-time-ns = <300>;
+ i2c-sda-falling-time-ns = <300>;
+ i2c-scl-falling-time-ns = <300>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-digicolor.txt b/Documentation/devicetree/bindings/i2c/i2c-digicolor.txt
new file mode 100644
index 000000000..457a098d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-digicolor.txt
@@ -0,0 +1,25 @@
+Conexant Digicolor I2C controller
+
+Required properties:
+ - compatible: must be "cnxt,cx92755-i2c"
+ - reg: physical address and length of the device registers
+ - interrupts: a single interrupt specifier
+ - clocks: clock for the device
+ - #address-cells: should be <1>
+ - #size-cells: should be <0>
+
+Optional properties:
+- clock-frequency: the desired I2C bus clock frequency in Hz; in
+ absence of this property the default value is used (100 kHz).
+
+Example:
+
+ i2c: i2c@f0000120 {
+ compatible = "cnxt,cx92755-i2c";
+ reg = <0xf0000120 0x10>;
+ interrupts = <28>;
+ clocks = <&main_clk>;
+ clock-frequency = <100000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-efm32.txt b/Documentation/devicetree/bindings/i2c/i2c-efm32.txt
new file mode 100644
index 000000000..50b25c3da
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-efm32.txt
@@ -0,0 +1,34 @@
+* Energymicro efm32 i2c controller
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : should be "energymicro,efm32-i2c"
+ - interrupts : the interrupt number
+ - clocks : reference to the module clock
+
+Recommended properties :
+
+ - clock-frequency : maximal I2C bus clock frequency in Hz.
+ - energymicro,location : Decides the location of the USART I/O pins.
+ Allowed range : [0 .. 6]
+
+Example:
+ i2c0: i2c@4000a000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "energymicro,efm32-i2c";
+ reg = <0x4000a000 0x400>;
+ interrupts = <9>;
+ clocks = <&cmu clk_HFPERCLKI2C0>;
+ clock-frequency = <100000>;
+ status = "ok";
+ energymicro,location = <3>;
+
+ eeprom@50 {
+ compatible = "microchip,24c02";
+ reg = <0x50>;
+ pagesize = <16>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/i2c/i2c-exynos5.txt b/Documentation/devicetree/bindings/i2c/i2c-exynos5.txt
new file mode 100644
index 000000000..2dbc0b62d
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-exynos5.txt
@@ -0,0 +1,53 @@
+* Samsung's High Speed I2C controller
+
+The Samsung's High Speed I2C controller is used to interface with I2C devices
+at various speeds ranging from 100khz to 3.4Mhz.
+
+Required properties:
+ - compatible: value should be.
+ -> "samsung,exynos5-hsi2c", (DEPRECATED)
+ for i2c compatible with HSI2C available
+ on Exynos5250 and Exynos5420 SoCs.
+ -> "samsung,exynos5250-hsi2c", for i2c compatible with HSI2C available
+ on Exynos5250 and Exynos5420 SoCs.
+ -> "samsung,exynos5260-hsi2c", for i2c compatible with HSI2C available
+ on Exynos5260 SoCs.
+ -> "samsung,exynos7-hsi2c", for i2c compatible with HSI2C available
+ on Exynos7 SoCs.
+
+ - reg: physical base address of the controller and length of memory mapped
+ region.
+ - interrupts: interrupt number to the cpu.
+ - #address-cells: always 1 (for i2c addresses)
+ - #size-cells: always 0
+
+ - Pinctrl:
+ - pinctrl-0: Pin control group to be used for this controller.
+ - pinctrl-names: Should contain only one value - "default".
+
+Optional properties:
+ - clock-frequency: Desired operating frequency in Hz of the bus.
+ -> If not specified, the bus operates in fast-speed mode at
+ at 100khz.
+ -> If specified, the bus operates in high-speed mode only if the
+ clock-frequency is >= 1Mhz.
+
+Example:
+
+hsi2c@12ca0000 {
+ compatible = "samsung,exynos5250-hsi2c";
+ reg = <0x12ca0000 0x100>;
+ interrupts = <56>;
+ clock-frequency = <100000>;
+
+ pinctrl-0 = <&i2c4_bus>;
+ pinctrl-names = "default";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ s2mps11_pmic@66 {
+ compatible = "samsung,s2mps11-pmic";
+ reg = <0x66>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-gpio.txt
new file mode 100644
index 000000000..4f8ec947c
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.txt
@@ -0,0 +1,32 @@
+Device-Tree bindings for i2c gpio driver
+
+Required properties:
+ - compatible = "i2c-gpio";
+ - gpios: sda and scl gpio
+
+
+Optional properties:
+ - i2c-gpio,sda-open-drain: sda as open drain
+ - i2c-gpio,scl-open-drain: scl as open drain
+ - i2c-gpio,scl-output-only: scl as output only
+ - i2c-gpio,delay-us: delay between GPIO operations (may depend on each platform)
+ - i2c-gpio,timeout-ms: timeout to get data
+
+Example nodes:
+
+i2c@0 {
+ compatible = "i2c-gpio";
+ gpios = <&pioA 23 0 /* sda */
+ &pioA 24 0 /* scl */
+ >;
+ i2c-gpio,sda-open-drain;
+ i2c-gpio,scl-open-drain;
+ i2c-gpio,delay-us = <2>; /* ~100 kHz */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rv3029c2@56 {
+ compatible = "rv3029c2";
+ reg = <0x56>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-hix5hd2.txt b/Documentation/devicetree/bindings/i2c/i2c-hix5hd2.txt
new file mode 100644
index 000000000..f98b37401
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-hix5hd2.txt
@@ -0,0 +1,24 @@
+I2C for Hisilicon hix5hd2 chipset platform
+
+Required properties:
+- compatible: Must be "hisilicon,hix5hd2-i2c"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: interrupt number to the cpu.
+- #address-cells = <1>;
+- #size-cells = <0>;
+- clocks: phandles to input clocks.
+
+Optional properties:
+- clock-frequency: Desired I2C bus frequency in Hz, otherwise defaults to 100000
+- Child nodes conforming to i2c bus binding
+
+Examples:
+I2C0@f8b10000 {
+ compatible = "hisilicon,hix5hd2-i2c";
+ reg = <0xf8b10000 0x1000>;
+ interrupts = <0 38 4>;
+ clocks = <&clock HIX5HD2_I2C0_RST>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+}
diff --git a/Documentation/devicetree/bindings/i2c/i2c-img-scb.txt b/Documentation/devicetree/bindings/i2c/i2c-img-scb.txt
new file mode 100644
index 000000000..b6461602d
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-img-scb.txt
@@ -0,0 +1,26 @@
+IMG Serial Control Bus (SCB) I2C Controller
+
+Required Properties:
+- compatible: "img,scb-i2c"
+- reg: Physical base address and length of controller registers
+- interrupts: Interrupt number used by the controller
+- clocks : Should contain a clock specifier for each entry in clock-names
+- clock-names : Should contain the following entries:
+ "scb", for the SCB core clock.
+ "sys", for the system clock.
+- clock-frequency: The I2C bus frequency in Hz
+- #address-cells: Should be <1>
+- #size-cells: Should be <0>
+
+Example:
+
+i2c@18100000 {
+ compatible = "img,scb-i2c";
+ reg = <0x18100000 0x200>;
+ interrupts = <GIC_SHARED 2 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&i2c0_clk>, <&system_clk>;
+ clock-names = "scb", "sys";
+ clock-frequency = <400000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx.txt b/Documentation/devicetree/bindings/i2c/i2c-imx.txt
new file mode 100644
index 000000000..ce4311d72
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx.txt
@@ -0,0 +1,40 @@
+* Freescale Inter IC (I2C) and High Speed Inter IC (HS-I2C) for i.MX
+
+Required properties:
+- compatible :
+ - "fsl,imx1-i2c" for I2C compatible with the one integrated on i.MX1 SoC
+ - "fsl,imx21-i2c" for I2C compatible with the one integrated on i.MX21 SoC
+ - "fsl,vf610-i2c" for I2C compatible with the one integrated on Vybrid vf610 SoC
+- reg : Should contain I2C/HS-I2C registers location and length
+- interrupts : Should contain I2C/HS-I2C interrupt
+- clocks : Should contain the I2C/HS-I2C clock specifier
+
+Optional properties:
+- clock-frequency : Constains desired I2C/HS-I2C bus clock frequency in Hz.
+ The absence of the propoerty indicates the default frequency 100 kHz.
+- dmas: A list of two dma specifiers, one for each entry in dma-names.
+- dma-names: should contain "tx" and "rx".
+
+Examples:
+
+i2c@83fc4000 { /* I2C2 on i.MX51 */
+ compatible = "fsl,imx51-i2c", "fsl,imx21-i2c";
+ reg = <0x83fc4000 0x4000>;
+ interrupts = <63>;
+};
+
+i2c@70038000 { /* HS-I2C on i.MX51 */
+ compatible = "fsl,imx51-i2c", "fsl,imx21-i2c";
+ reg = <0x70038000 0x4000>;
+ interrupts = <64>;
+ clock-frequency = <400000>;
+};
+
+i2c0: i2c@40066000 { /* i2c0 on vf610 */
+ compatible = "fsl,vf610-i2c";
+ reg = <0x40066000 0x1000>;
+ interrupts =<0 71 0x04>;
+ dmas = <&edma0 0 50>,
+ <&edma0 0 51>;
+ dma-names = "rx","tx";
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt
new file mode 100644
index 000000000..231e4cc40
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-jz4780.txt
@@ -0,0 +1,35 @@
+* Ingenic JZ4780 I2C Bus controller
+
+Required properties:
+- compatible: should be "ingenic,jz4780-i2c"
+- reg: Should contain the address & size of the I2C controller registers.
+- interrupts: Should specify the interrupt provided by parent.
+- clocks: Should contain a single clock specifier for the JZ4780 I2C clock.
+- clock-frequency: desired I2C bus clock frequency in Hz.
+
+Recommended properties:
+- pinctrl-names: should be "default";
+- pinctrl-0: phandle to pinctrl function
+
+Optional properties:
+- interrupt-parent: Should be the phandle of the interrupt controller that
+ delivers interrupts to the I2C block.
+
+Example
+
+/ {
+ i2c4: i2c4@0x10054000 {
+ compatible = "ingenic,jz4780-i2c";
+ reg = <0x10054000 0x1000>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <56>;
+
+ clocks = <&cgu JZ4780_CLK_SMB4>;
+ clock-frequency = <100000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pins_i2c4_data>;
+
+ };
+};
+
diff --git a/Documentation/devicetree/bindings/i2c/i2c-meson.txt b/Documentation/devicetree/bindings/i2c/i2c-meson.txt
new file mode 100644
index 000000000..682f9a6f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-meson.txt
@@ -0,0 +1,24 @@
+Amlogic Meson I2C controller
+
+Required properties:
+ - compatible: must be "amlogic,meson6-i2c"
+ - reg: physical address and length of the device registers
+ - interrupts: a single interrupt specifier
+ - clocks: clock for the device
+ - #address-cells: should be <1>
+ - #size-cells: should be <0>
+
+Optional properties:
+- clock-frequency: the desired I2C bus clock frequency in Hz; in
+ absence of this property the default value is used (100 kHz).
+
+Examples:
+
+ i2c@c8100500 {
+ compatible = "amlogic,meson6-i2c";
+ reg = <0xc8100500 0x20>;
+ interrupts = <0 92 1>;
+ clocks = <&clk81>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mpc.txt b/Documentation/devicetree/bindings/i2c/i2c-mpc.txt
new file mode 100644
index 000000000..1eacd6b20
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mpc.txt
@@ -0,0 +1,64 @@
+* I2C
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : should be "fsl,CHIP-i2c" where CHIP is the name of a
+ compatible processor, e.g. mpc8313, mpc8543, mpc8544, mpc5121,
+ mpc5200 or mpc5200b. For the mpc5121, an additional node
+ "fsl,mpc5121-i2c-ctrl" is required as shown in the example below.
+
+Recommended properties :
+
+ - interrupts : <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and level
+ information for the interrupt. This should be encoded based on
+ the information in section 2) depending on the type of interrupt
+ controller you have.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+ - fsl,preserve-clocking : boolean; if defined, the clock settings
+ from the bootloader are preserved (not touched).
+ - clock-frequency : desired I2C bus clock frequency in Hz.
+ - fsl,timeout : I2C bus timeout in microseconds.
+
+Examples :
+
+ /* MPC5121 based board */
+ i2c@1740 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mpc5121-i2c", "fsl-i2c";
+ reg = <0x1740 0x20>;
+ interrupts = <11 0x8>;
+ interrupt-parent = <&ipic>;
+ clock-frequency = <100000>;
+ };
+
+ i2ccontrol@1760 {
+ compatible = "fsl,mpc5121-i2c-ctrl";
+ reg = <0x1760 0x8>;
+ };
+
+ /* MPC5200B based board */
+ i2c@3d00 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mpc5200b-i2c","fsl,mpc5200-i2c","fsl-i2c";
+ reg = <0x3d00 0x40>;
+ interrupts = <2 15 0>;
+ interrupt-parent = <&mpc5200_pic>;
+ fsl,preserve-clocking;
+ };
+
+ /* MPC8544 base board */
+ i2c@3100 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mpc8544-i2c", "fsl-i2c";
+ reg = <0x3100 0x100>;
+ interrupts = <43 2>;
+ interrupt-parent = <&mpic>;
+ clock-frequency = <400000>;
+ fsl,timeout = <10000>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt
new file mode 100644
index 000000000..66709a825
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-gpio.txt
@@ -0,0 +1,81 @@
+GPIO-based I2C Bus Mux
+
+This binding describes an I2C bus multiplexer that uses GPIOs to
+route the I2C signals.
+
+ +-----+ +-----+
+ | dev | | dev |
+ +------------+ +-----+ +-----+
+ | SoC | | |
+ | | /--------+--------+
+ | +------+ | +------+ child bus A, on GPIO value set to 0
+ | | I2C |-|--| Mux |
+ | +------+ | +--+---+ child bus B, on GPIO value set to 1
+ | | | \----------+--------+--------+
+ | +------+ | | | | |
+ | | GPIO |-|-----+ +-----+ +-----+ +-----+
+ | +------+ | | dev | | dev | | dev |
+ +------------+ +-----+ +-----+ +-----+
+
+Required properties:
+- compatible: i2c-mux-gpio
+- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
+ port is connected to.
+- mux-gpios: list of gpios used to control the muxer
+* Standard I2C mux properties. See mux.txt in this directory.
+* I2C child bus nodes. See mux.txt in this directory.
+
+Optional properties:
+- idle-state: value to set the muxer to when idle. When no value is
+ given, it defaults to the last value used.
+
+For each i2c child node, an I2C child bus will be created. They will
+be numbered based on their order in the device tree.
+
+Whenever an access is made to a device on a child bus, the value set
+in the revelant node's reg property will be output using the list of
+GPIOs, the first in the list holding the least-significant value.
+
+If an idle state is defined, using the idle-state (optional) property,
+whenever an access is not being made to a device on a child bus, the
+GPIOs will be set according to the idle value.
+
+If an idle state is not defined, the most recently used value will be
+left programmed into hardware whenever no access is being made to a
+device on a child bus.
+
+Example:
+ i2cmux {
+ compatible = "i2c-mux-gpio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ mux-gpios = <&gpio1 22 0 &gpio1 23 0>;
+ i2c-parent = <&i2c1>;
+
+ i2c@1 {
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ ssd1307: oled@3c {
+ compatible = "solomon,ssd1307fb-i2c";
+ reg = <0x3c>;
+ pwms = <&pwm 4 3000>;
+ reset-gpios = <&gpio2 7 1>;
+ reset-active-low;
+ };
+ };
+
+ i2c@3 {
+ reg = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pca9555: pca9555@20 {
+ compatible = "nxp,pca9555";
+ gpio-controller;
+ #gpio-cells = <2>;
+ reg = <0x20>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt
new file mode 100644
index 000000000..cf53d5fba
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.txt
@@ -0,0 +1,53 @@
+* NXP PCA954x I2C bus switch
+
+Required Properties:
+
+ - compatible: Must contain one of the following.
+ "nxp,pca9540", "nxp,pca9542", "nxp,pca9543", "nxp,pca9544",
+ "nxp,pca9545", "nxp,pca9546", "nxp,pca9547", "nxp,pca9548"
+
+ - reg: The I2C address of the device.
+
+ The following required properties are defined externally:
+
+ - Standard I2C mux properties. See i2c-mux.txt in this directory.
+ - I2C child bus nodes. See i2c-mux.txt in this directory.
+
+Optional Properties:
+
+ - reset-gpios: Reference to the GPIO connected to the reset input.
+ - i2c-mux-idle-disconnect: Boolean; if defined, forces mux to disconnect all
+ children in idle state. This is necessary for example, if there are several
+ multiplexers on the bus and the devices behind them use same I2C addresses.
+
+
+Example:
+
+ i2c-switch@74 {
+ compatible = "nxp,pca9548";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x74>;
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <2>;
+
+ eeprom@54 {
+ compatible = "at,24c08";
+ reg = <0x54>;
+ };
+ };
+
+ i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ rtc@51 {
+ compatible = "nxp,pcf8563";
+ reg = <0x51>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
new file mode 100644
index 000000000..ae8af1694
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
@@ -0,0 +1,93 @@
+Pinctrl-based I2C Bus Mux
+
+This binding describes an I2C bus multiplexer that uses pin multiplexing to
+route the I2C signals, and represents the pin multiplexing configuration
+using the pinctrl device tree bindings.
+
+ +-----+ +-----+
+ | dev | | dev |
+ +------------------------+ +-----+ +-----+
+ | SoC | | |
+ | /----|------+--------+
+ | +---+ +------+ | child bus A, on first set of pins
+ | |I2C|---|Pinmux| |
+ | +---+ +------+ | child bus B, on second set of pins
+ | \----|------+--------+--------+
+ | | | | |
+ +------------------------+ +-----+ +-----+ +-----+
+ | dev | | dev | | dev |
+ +-----+ +-----+ +-----+
+
+Required properties:
+- compatible: i2c-mux-pinctrl
+- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
+ port is connected to.
+
+Also required are:
+
+* Standard pinctrl properties that specify the pin mux state for each child
+ bus. See ../pinctrl/pinctrl-bindings.txt.
+
+* Standard I2C mux properties. See mux.txt in this directory.
+
+* I2C child bus nodes. See mux.txt in this directory.
+
+For each named state defined in the pinctrl-names property, an I2C child bus
+will be created. I2C child bus numbers are assigned based on the index into
+the pinctrl-names property.
+
+The only exception is that no bus will be created for a state named "idle". If
+such a state is defined, it must be the last entry in pinctrl-names. For
+example:
+
+ pinctrl-names = "ddc", "pta", "idle" -> ddc = bus 0, pta = bus 1
+ pinctrl-names = "ddc", "idle", "pta" -> Invalid ("idle" not last)
+ pinctrl-names = "idle", "ddc", "pta" -> Invalid ("idle" not last)
+
+Whenever an access is made to a device on a child bus, the relevant pinctrl
+state will be programmed into hardware.
+
+If an idle state is defined, whenever an access is not being made to a device
+on a child bus, the idle pinctrl state will be programmed into hardware.
+
+If an idle state is not defined, the most recently used pinctrl state will be
+left programmed into hardware whenever no access is being made of a device on
+a child bus.
+
+Example:
+
+ i2cmux {
+ compatible = "i2c-mux-pinctrl";
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c-parent = <&i2c1>;
+
+ pinctrl-names = "ddc", "pta", "idle";
+ pinctrl-0 = <&state_i2cmux_ddc>;
+ pinctrl-1 = <&state_i2cmux_pta>;
+ pinctrl-2 = <&state_i2cmux_idle>;
+
+ i2c@0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ eeprom {
+ compatible = "eeprom";
+ reg = <0x50>;
+ };
+ };
+
+ i2c@1 {
+ reg = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ eeprom {
+ compatible = "eeprom";
+ reg = <0x50>;
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux.txt b/Documentation/devicetree/bindings/i2c/i2c-mux.txt
new file mode 100644
index 000000000..af84cce5c
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux.txt
@@ -0,0 +1,60 @@
+Common i2c bus multiplexer/switch properties.
+
+An i2c bus multiplexer/switch will have several child busses that are
+numbered uniquely in a device dependent manner. The nodes for an i2c bus
+multiplexer/switch will have one child node for each child
+bus.
+
+Required properties:
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Required properties for child nodes:
+- #address-cells = <1>;
+- #size-cells = <0>;
+- reg : The sub-bus number.
+
+Optional properties for child nodes:
+- Other properties specific to the multiplexer/switch hardware.
+- Child nodes conforming to i2c bus binding
+
+
+Example :
+
+ /*
+ An NXP pca9548 8 channel I2C multiplexer at address 0x70
+ with two NXP pca8574 GPIO expanders attached, one each to
+ ports 3 and 4.
+ */
+
+ mux@70 {
+ compatible = "nxp,pca9548";
+ reg = <0x70>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ i2c@3 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <3>;
+
+ gpio1: gpio@38 {
+ compatible = "nxp,pca8574";
+ reg = <0x38>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ };
+ };
+ i2c@4 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <4>;
+
+ gpio2: gpio@38 {
+ compatible = "nxp,pca8574";
+ reg = <0x38>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
new file mode 100644
index 000000000..5c3002692
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt
@@ -0,0 +1,44 @@
+
+* Marvell MV64XXX I2C controller
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : Should be either:
+ - "allwinner,sun4i-a10-i2c"
+ - "allwinner,sun6i-a31-i2c"
+ - "marvell,mv64xxx-i2c"
+ - "marvell,mv78230-i2c"
+ - "marvell,mv78230-a0-i2c"
+ * Note: Only use "marvell,mv78230-a0-i2c" for a
+ very rare, initial version of the SoC which
+ had broken offload support. Linux
+ auto-detects this and sets it appropriately.
+ - interrupts : The interrupt number
+
+Optional properties :
+
+ - clock-frequency : Desired I2C bus clock frequency in Hz. If not set the
+default frequency is 100kHz
+
+ - resets : phandle to the parent reset controller. Mandatory
+ whenever you're using the "allwinner,sun6i-a31-i2c"
+ compatible.
+
+Examples:
+
+ i2c@11000 {
+ compatible = "marvell,mv64xxx-i2c";
+ reg = <0x11000 0x20>;
+ interrupts = <29>;
+ clock-frequency = <100000>;
+ };
+
+For the Armada XP:
+
+ i2c@11000 {
+ compatible = "marvell,mv78230-i2c", "marvell,mv64xxx-i2c";
+ reg = <0x11000 0x100>;
+ interrupts = <29>;
+ clock-frequency = <100000>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-mxs.txt b/Documentation/devicetree/bindings/i2c/i2c-mxs.txt
new file mode 100644
index 000000000..4e1c8ac01
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mxs.txt
@@ -0,0 +1,25 @@
+* Freescale MXS Inter IC (I2C) Controller
+
+Required properties:
+- compatible: Should be "fsl,<chip>-i2c"
+- reg: Should contain registers location and length
+- interrupts: Should contain ERROR interrupt number
+- clock-frequency: Desired I2C bus clock frequency in Hz.
+ Only 100000Hz and 400000Hz modes are supported.
+- dmas: DMA specifier, consisting of a phandle to DMA controller node
+ and I2C DMA channel ID.
+ Refer to dma.txt and fsl-mxs-dma.txt for details.
+- dma-names: Must be "rx-tx".
+
+Examples:
+
+i2c0: i2c@80058000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx28-i2c";
+ reg = <0x80058000 2000>;
+ interrupts = <111>;
+ clock-frequency = <100000>;
+ dmas = <&dma_apbx 6>;
+ dma-names = "rx-tx";
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt b/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt
new file mode 100644
index 000000000..72065b0ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-nomadik.txt
@@ -0,0 +1,23 @@
+I2C for Nomadik based systems
+
+Required (non-standard) properties:
+ - Nil
+
+Recommended (non-standard) properties:
+ - clock-frequency : Maximum bus clock frequency for the device
+
+Optional (non-standard) properties:
+ - Nil
+
+Example :
+
+i2c@80004000 {
+ compatible = "stericsson,db8500-i2c", "st,nomadik-i2c";
+ reg = <0x80004000 0x1000>;
+ interrupts = <0 21 0x4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ v-i2c-supply = <&db8500_vape_reg>;
+
+ clock-frequency = <400000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-ocores.txt b/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
new file mode 100644
index 000000000..17bef9a34
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-ocores.txt
@@ -0,0 +1,69 @@
+Device tree configuration for i2c-ocores
+
+Required properties:
+- compatible : "opencores,i2c-ocores" or "aeroflexgaisler,i2cmst"
+- reg : bus address start and address range size of device
+- interrupts : interrupt number
+- clocks : handle to the controller clock; see the note below.
+ Mutually exclusive with opencores,ip-clock-frequency
+- opencores,ip-clock-frequency: frequency of the controller clock in Hz;
+ see the note below. Mutually exclusive with clocks
+- #address-cells : should be <1>
+- #size-cells : should be <0>
+
+Optional properties:
+- clock-frequency : frequency of bus clock in Hz; see the note below.
+ Defaults to 100 KHz when the property is not specified
+- reg-shift : device register offsets are shifted by this value
+- reg-io-width : io register width in bytes (1, 2 or 4)
+- regstep : deprecated, use reg-shift above
+
+Note
+clock-frequency property is meant to control the bus frequency for i2c bus
+drivers, but it was incorrectly used to specify i2c controller input clock
+frequency. So the following rules are set to fix this situation:
+- if clock-frequency is present and neither opencores,ip-clock-frequency nor
+ clocks are, then clock-frequency specifies i2c controller clock frequency.
+ This is to keep backwards compatibility with setups using old DTB. i2c bus
+ frequency is fixed at 100 KHz.
+- if clocks is present it specifies i2c controller clock. clock-frequency
+ property specifies i2c bus frequency.
+- if opencores,ip-clock-frequency is present it specifies i2c controller
+ clock frequency. clock-frequency property specifies i2c bus frequency.
+
+Examples:
+
+ i2c0: ocores@a0000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "opencores,i2c-ocores";
+ reg = <0xa0000000 0x8>;
+ interrupts = <10>;
+ opencores,ip-clock-frequency = <20000000>;
+
+ reg-shift = <0>; /* 8 bit registers */
+ reg-io-width = <1>; /* 8 bit read/write */
+
+ dummy@60 {
+ compatible = "dummy";
+ reg = <0x60>;
+ };
+ };
+or
+ i2c0: ocores@a0000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "opencores,i2c-ocores";
+ reg = <0xa0000000 0x8>;
+ interrupts = <10>;
+ clocks = <&osc>;
+ clock-frequency = <400000>; /* i2c bus frequency 400 KHz */
+
+ reg-shift = <0>; /* 8 bit registers */
+ reg-io-width = <1>; /* 8 bit read/write */
+
+ dummy@60 {
+ compatible = "dummy";
+ reg = <0x60>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-octeon.txt b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
new file mode 100644
index 000000000..dced82ebe
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-octeon.txt
@@ -0,0 +1,34 @@
+* Two Wire Serial Interface (TWSI) / I2C
+
+- compatible: "cavium,octeon-3860-twsi"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The base address of the TWSI/I2C bus controller register bank.
+
+- #address-cells: Must be <1>.
+
+- #size-cells: Must be <0>. I2C addresses have no size component.
+
+- interrupts: A single interrupt specifier.
+
+- clock-frequency: The I2C bus clock rate in Hz.
+
+Example:
+ twsi0: i2c@1180000001000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "cavium,octeon-3860-twsi";
+ reg = <0x11800 0x00001000 0x0 0x200>;
+ interrupts = <0 45>;
+ clock-frequency = <100000>;
+
+ rtc@68 {
+ compatible = "dallas,ds1337";
+ reg = <0x68>;
+ };
+ tmp@4c {
+ compatible = "ti,tmp421";
+ reg = <0x4c>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-omap.txt b/Documentation/devicetree/bindings/i2c/i2c-omap.txt
new file mode 100644
index 000000000..7e49839d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-omap.txt
@@ -0,0 +1,31 @@
+I2C for OMAP platforms
+
+Required properties :
+- compatible : Must be "ti,omap2420-i2c", "ti,omap2430-i2c", "ti,omap3-i2c"
+ or "ti,omap4-i2c"
+- ti,hwmods : Must be "i2c<n>", n being the instance number (1-based)
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Recommended properties :
+- clock-frequency : Desired I2C bus clock frequency in Hz. Otherwise
+ the default 100 kHz frequency will be used.
+
+Optional properties:
+- Child nodes conforming to i2c bus binding
+
+Note: Current implementation will fetch base address, irq and dma
+from omap hwmod data base during device registration.
+Future plan is to migrate hwmod data base contents into device tree
+blob so that, all the required data will be used from device tree dts
+file.
+
+Examples :
+
+i2c1: i2c@0 {
+ compatible = "ti,omap3-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ ti,hwmods = "i2c1";
+ clock-frequency = <400000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
new file mode 100644
index 000000000..12bc61465
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
@@ -0,0 +1,37 @@
+Device-tree bindings for I2C OPAL driver
+----------------------------------------
+
+Most of the device node and properties layout is specific to the firmware and
+used by the firmware itself for configuring the port. From the linux
+perspective, the properties of use are "ibm,port-name" and "ibm,opal-id".
+
+Required properties:
+
+- reg: Port-id within a given master
+- compatible: must be "ibm,opal-i2c"
+- ibm,opal-id: Refers to a specific bus and used to identify it when calling
+ the relevant OPAL functions.
+- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for
+ linux, used by the FW though.
+
+Optional properties:
+- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
+ port.
+
+The node contains a number of other properties that are used by the FW itself
+and depend on the specific hardware implementation. The example below depicts
+a P8 on-chip bus.
+
+Example:
+
+i2c-bus@0 {
+ reg = <0x0>;
+ bus-frequency = <0x61a80>;
+ compatible = "ibm,power8-i2c-port", "ibm,opal-i2c";
+ ibm,opal-id = <0x1>;
+ ibm,port-name = "p8_00000000_e1p0";
+ #address-cells = <0x1>;
+ phandle = <0x10000006>;
+ #size-cells = <0x0>;
+ linux,phandle = <0x10000006>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-pnx.txt b/Documentation/devicetree/bindings/i2c/i2c-pnx.txt
new file mode 100644
index 000000000..fe98ada33
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-pnx.txt
@@ -0,0 +1,36 @@
+* NXP PNX I2C Controller
+
+Required properties:
+
+ - reg: Offset and length of the register set for the device
+ - compatible: should be "nxp,pnx-i2c"
+ - interrupts: configure one interrupt line
+ - #address-cells: always 1 (for i2c addresses)
+ - #size-cells: always 0
+ - interrupt-parent: the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Optional properties:
+
+ - clock-frequency: desired I2C bus clock frequency in Hz, Default: 100000 Hz
+
+Examples:
+
+ i2c1: i2c@400a0000 {
+ compatible = "nxp,pnx-i2c";
+ reg = <0x400a0000 0x100>;
+ interrupt-parent = <&mic>;
+ interrupts = <51 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ i2c2: i2c@400a8000 {
+ compatible = "nxp,pnx-i2c";
+ reg = <0x400a8000 0x100>;
+ interrupt-parent = <&mic>;
+ interrupts = <50 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-frequency = <100000>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt b/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt
new file mode 100644
index 000000000..569b16248
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-pxa-pci-ce4100.txt
@@ -0,0 +1,93 @@
+CE4100 I2C
+----------
+
+CE4100 has one PCI device which is described as the I2C-Controller. This
+PCI device has three PCI-bars, each bar contains a complete I2C
+controller. So we have a total of three independent I2C-Controllers
+which share only an interrupt line.
+The driver is probed via the PCI-ID and is gathering the information of
+attached devices from the devices tree.
+Grant Likely recommended to use the ranges property to map the PCI-Bar
+number to its physical address and to use this to find the child nodes
+of the specific I2C controller. This were his exact words:
+
+ Here's where the magic happens. Each entry in
+ ranges describes how the parent pci address space
+ (middle group of 3) is translated to the local
+ address space (first group of 2) and the size of
+ each range (last cell). In this particular case,
+ the first cell of the local address is chosen to be
+ 1:1 mapped to the BARs, and the second is the
+ offset from be base of the BAR (which would be
+ non-zero if you had 2 or more devices mapped off
+ the same BAR)
+
+ ranges allows the address mapping to be described
+ in a way that the OS can interpret without
+ requiring custom device driver code.
+
+This is an example which is used on FalconFalls:
+------------------------------------------------
+ i2c-controller@b,2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ compatible = "pci8086,2e68.2",
+ "pci8086,2e68",
+ "pciclass,ff0000",
+ "pciclass,ff00";
+
+ reg = <0x15a00 0x0 0x0 0x0 0x0>;
+ interrupts = <16 1>;
+
+ /* as described by Grant, the first number in the group of
+ * three is the bar number followed by the 64bit bar address
+ * followed by size of the mapping. The bar address
+ * requires also a valid translation in parents ranges
+ * property.
+ */
+ ranges = <0 0 0x02000000 0 0xdffe0500 0x100
+ 1 0 0x02000000 0 0xdffe0600 0x100
+ 2 0 0x02000000 0 0xdffe0700 0x100>;
+
+ i2c@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+
+ /* The first number in the reg property is the
+ * number of the bar
+ */
+ reg = <0 0 0x100>;
+
+ /* This I2C controller has no devices */
+ };
+
+ i2c@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <1 0 0x100>;
+
+ /* This I2C controller has one gpio controller */
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+
+ i2c@2 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "intel,ce4100-i2c-controller";
+ reg = <2 0 0x100>;
+
+ gpio@26 {
+ #gpio-cells = <2>;
+ compatible = "ti,pcf8575";
+ reg = <0x26>;
+ gpio-controller;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-pxa.txt b/Documentation/devicetree/bindings/i2c/i2c-pxa.txt
new file mode 100644
index 000000000..12b78ac50
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-pxa.txt
@@ -0,0 +1,33 @@
+* Marvell MMP I2C controller
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : should be "mrvl,mmp-twsi" where mmp is the name of a
+ compatible processor, e.g. pxa168, pxa910, mmp2, mmp3.
+ For the pxa2xx/pxa3xx, an additional node "mrvl,pxa-i2c" is required
+ as shown in the example below.
+
+Recommended properties :
+
+ - interrupts : the interrupt number
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device. If the parent is the default
+ interrupt controller in device tree, it could be ignored.
+ - mrvl,i2c-polling : Disable interrupt of i2c controller. Polling
+ status register of i2c controller instead.
+ - mrvl,i2c-fast-mode : Enable fast mode of i2c controller.
+
+Examples:
+ twsi1: i2c@d4011000 {
+ compatible = "mrvl,mmp-twsi";
+ reg = <0xd4011000 0x1000>;
+ interrupts = <7>;
+ mrvl,i2c-fast-mode;
+ };
+
+ twsi2: i2c@d4025000 {
+ compatible = "mrvl,mmp-twsi";
+ reg = <0xd4025000 0x1000>;
+ interrupts = <58>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rcar.txt b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
new file mode 100644
index 000000000..16b3e07aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-rcar.txt
@@ -0,0 +1,32 @@
+I2C for R-Car platforms
+
+Required properties:
+- compatible: Must be one of
+ "renesas,i2c-rcar"
+ "renesas,i2c-r8a7778"
+ "renesas,i2c-r8a7779"
+ "renesas,i2c-r8a7790"
+ "renesas,i2c-r8a7791"
+ "renesas,i2c-r8a7792"
+ "renesas,i2c-r8a7793"
+ "renesas,i2c-r8a7794"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: interrupt specifier.
+
+Optional properties:
+- clock-frequency: desired I2C bus clock frequency in Hz. The absence of this
+ propoerty indicates the default frequency 100 kHz.
+- clocks: clock specifier.
+
+Examples :
+
+i2c0: i2c@e6508000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "renesas,i2c-r8a7791";
+ reg = <0 0xe6508000 0 0x40>;
+ interrupts = <0 287 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp9_clks R8A7791_CLK_I2C0>;
+ clock-frequency = <400000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-riic.txt b/Documentation/devicetree/bindings/i2c/i2c-riic.txt
new file mode 100644
index 000000000..0bcc4716c
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-riic.txt
@@ -0,0 +1,29 @@
+Device tree configuration for Renesas RIIC driver
+
+Required properties:
+- compatible : "renesas,riic-<soctype>". "renesas,riic-rz" as fallback
+- reg : address start and address range size of device
+- interrupts : 8 interrupts (TEI, RI, TI, SPI, STI, NAKI, ALI, TMOI)
+- clock-frequency : frequency of bus clock in Hz
+- #address-cells : should be <1>
+- #size-cells : should be <0>
+
+Pinctrl properties might be needed, too. See there.
+
+Example:
+
+ i2c0: i2c@fcfee000 {
+ compatible = "renesas,riic-r7s72100", "renesas,riic-rz";
+ reg = <0xfcfee000 0x44>;
+ interrupts = <0 157 IRQ_TYPE_LEVEL_HIGH>,
+ <0 158 IRQ_TYPE_EDGE_RISING>,
+ <0 159 IRQ_TYPE_EDGE_RISING>,
+ <0 160 IRQ_TYPE_LEVEL_HIGH>,
+ <0 161 IRQ_TYPE_LEVEL_HIGH>,
+ <0 162 IRQ_TYPE_LEVEL_HIGH>,
+ <0 163 IRQ_TYPE_LEVEL_HIGH>,
+ <0 164 IRQ_TYPE_LEVEL_HIGH>;
+ clock-frequency = <100000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt b/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt
new file mode 100644
index 000000000..f0d71bc52
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-rk3x.txt
@@ -0,0 +1,56 @@
+* Rockchip RK3xxx I2C controller
+
+This driver interfaces with the native I2C controller present in Rockchip
+RK3xxx SoCs.
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device
+ - compatible : should be "rockchip,rk3066-i2c", "rockchip,rk3188-i2c" or
+ "rockchip,rk3288-i2c".
+ - interrupts : interrupt number
+ - clocks : parent clock
+
+Required on RK3066, RK3188 :
+
+ - rockchip,grf : the phandle of the syscon node for the general register
+ file (GRF)
+ - on those SoCs an alias with the correct I2C bus ID (bit offset in the GRF)
+ is also required.
+
+Optional properties :
+
+ - clock-frequency : SCL frequency to use (in Hz). If omitted, 100kHz is used.
+ - i2c-scl-rising-time-ns : Number of nanoseconds the SCL signal takes to rise
+ (t(r) in I2C specification). If not specified this is assumed to be
+ the maximum the specification allows(1000 ns for Standard-mode,
+ 300 ns for Fast-mode) which might cause slightly slower communication.
+ - i2c-scl-falling-time-ns : Number of nanoseconds the SCL signal takes to fall
+ (t(f) in the I2C specification). If not specified this is assumed to
+ be the maximum the specification allows (300 ns) which might cause
+ slightly slower communication.
+ - i2c-sda-falling-time-ns : Number of nanoseconds the SDA signal takes to fall
+ (t(f) in the I2C specification). If not specified we'll use the SCL
+ value since they are the same in nearly all cases.
+
+Example:
+
+aliases {
+ i2c0 = &i2c0;
+}
+
+i2c0: i2c@2002d000 {
+ compatible = "rockchip,rk3188-i2c";
+ reg = <0x2002d000 0x1000>;
+ interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rockchip,grf = <&grf>;
+
+ clock-names = "i2c";
+ clocks = <&cru PCLK_I2C0>;
+
+ i2c-scl-rising-time-ns = <800>;
+ i2c-scl-falling-time-ns = <100>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt
new file mode 100644
index 000000000..89b3250f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-s3c2410.txt
@@ -0,0 +1,60 @@
+* Samsung's I2C controller
+
+The Samsung's I2C controller is used to interface with I2C devices.
+
+Required properties:
+ - compatible: value should be either of the following.
+ (a) "samsung, s3c2410-i2c", for i2c compatible with s3c2410 i2c.
+ (b) "samsung, s3c2440-i2c", for i2c compatible with s3c2440 i2c.
+ (c) "samsung, s3c2440-hdmiphy-i2c", for s3c2440-like i2c used
+ inside HDMIPHY block found on several samsung SoCs
+ (d) "samsung, exynos5440-i2c", for s3c2440-like i2c used
+ on EXYNOS5440 which does not need GPIO configuration.
+ (e) "samsung, exynos5-sata-phy-i2c", for s3c2440-like i2c used as
+ a host to SATA PHY controller on an internal bus.
+ - reg: physical base address of the controller and length of memory mapped
+ region.
+ - interrupts: interrupt number to the cpu.
+ - samsung,i2c-sda-delay: Delay (in ns) applied to data line (SDA) edges.
+
+Required for all cases except "samsung,s3c2440-hdmiphy-i2c":
+ - Samsung GPIO variant (deprecated):
+ - gpios: The order of the gpios should be the following: <SDA, SCL>.
+ The gpio specifier depends on the gpio controller. Required in all
+ cases except for "samsung,s3c2440-hdmiphy-i2c" whose input/output
+ lines are permanently wired to the respective clienta
+ - Pinctrl variant (preferred, if available):
+ - pinctrl-0: Pin control group to be used for this controller.
+ - pinctrl-names: Should contain only one value - "default".
+
+Optional properties:
+ - samsung,i2c-slave-addr: Slave address in multi-master environment. If not
+ specified, default value is 0.
+ - samsung,i2c-max-bus-freq: Desired frequency in Hz of the bus. If not
+ specified, the default value in Hz is 100000.
+ - samsung,sysreg-phandle - handle to syscon used to control the system registers
+
+Example:
+
+ i2c@13870000 {
+ compatible = "samsung,s3c2440-i2c";
+ reg = <0x13870000 0x100>;
+ interrupts = <345>;
+ samsung,i2c-sda-delay = <100>;
+ samsung,i2c-max-bus-freq = <100000>;
+ /* Samsung GPIO variant begins here */
+ gpios = <&gpd1 2 0 /* SDA */
+ &gpd1 3 0 /* SCL */>;
+ /* Samsung GPIO variant ends here */
+ /* Pinctrl variant begins here */
+ pinctrl-0 = <&i2c3_bus>;
+ pinctrl-names = "default";
+ /* Pinctrl variant ends here */
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ wm8994@1a {
+ compatible = "wlf,wm8994";
+ reg = <0x1a>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt b/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
new file mode 100644
index 000000000..2bfc6e7ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt
@@ -0,0 +1,40 @@
+Device tree configuration for Renesas IIC (sh_mobile) driver
+
+Required properties:
+- compatible : "renesas,iic-<soctype>". "renesas,rmobile-iic" as fallback
+ Examples with soctypes are:
+ - "renesas,iic-r8a73a4" (R-Mobile APE6)
+ - "renesas,iic-r8a7740" (R-Mobile A1)
+ - "renesas,iic-r8a7790" (R-Car H2)
+ - "renesas,iic-r8a7791" (R-Car M2-W)
+ - "renesas,iic-r8a7792" (R-Car V2H)
+ - "renesas,iic-r8a7793" (R-Car M2-N)
+ - "renesas,iic-r8a7794" (R-Car E2)
+ - "renesas,iic-sh73a0" (SH-Mobile AG5)
+- reg : address start and address range size of device
+- interrupts : interrupt of device
+- clocks : clock for device
+- #address-cells : should be <1>
+- #size-cells : should be <0>
+
+Optional properties:
+- clock-frequency : frequency of bus clock in Hz. Default 100kHz if unset.
+- dmas : Must contain a list of two references to DMA
+ specifiers, one for transmission, and one for
+ reception.
+- dma-names : Must contain a list of two DMA names, "tx" and "rx".
+
+
+Pinctrl properties might be needed, too. See there.
+
+Example:
+
+ iic0: i2c@e6500000 {
+ compatible = "renesas,iic-r8a7790", "renesas,rmobile-iic";
+ reg = <0 0xe6500000 0 0x425>;
+ interrupts = <0 174 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp3_clks R8A7790_CLK_IIC0>;
+ clock-frequency = <400000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sirf.txt b/Documentation/devicetree/bindings/i2c/i2c-sirf.txt
new file mode 100644
index 000000000..7baf9e133
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-sirf.txt
@@ -0,0 +1,19 @@
+I2C for SiRFprimaII platforms
+
+Required properties :
+- compatible : Must be "sirf,prima2-i2c"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: interrupt number to the cpu.
+
+Optional properties:
+- clock-frequency : Constains desired I2C/HS-I2C bus clock frequency in Hz.
+ The absence of the propoerty indicates the default frequency 100 kHz.
+
+Examples :
+
+i2c0: i2c@b00e0000 {
+ compatible = "sirf,prima2-i2c";
+ reg = <0xb00e0000 0x10000>;
+ interrupts = <24>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt b/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt
new file mode 100644
index 000000000..bd81a4826
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt
@@ -0,0 +1,15 @@
+ST Microelectronics DDC I2C
+
+Required properties :
+- compatible : Must be "st,ddci2c"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: interrupt number to the cpu.
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Optional properties:
+- Child nodes conforming to i2c bus binding
+
+Examples :
+
diff --git a/Documentation/devicetree/bindings/i2c/i2c-st.txt b/Documentation/devicetree/bindings/i2c/i2c-st.txt
new file mode 100644
index 000000000..4c26fda38
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-st.txt
@@ -0,0 +1,41 @@
+ST SSC binding, for I2C mode operation
+
+Required properties :
+- compatible : Must be "st,comms-ssc-i2c" or "st,comms-ssc4-i2c"
+- reg : Offset and length of the register set for the device
+- interrupts : the interrupt specifier
+- clock-names: Must contain "ssc".
+- clocks: Must contain an entry for each name in clock-names. See the common
+ clock bindings.
+- A pinctrl state named "default" must be defined to set pins in mode of
+ operation for I2C transfer.
+
+Optional properties :
+- clock-frequency : Desired I2C bus clock frequency in Hz. If not specified,
+ the default 100 kHz frequency will be used. As only Normal and Fast modes
+ are supported, possible values are 100000 and 400000.
+- st,i2c-min-scl-pulse-width-us : The minimum valid SCL pulse width that is
+ allowed through the deglitch circuit. In units of us.
+- st,i2c-min-sda-pulse-width-us : The minimum valid SDA pulse width that is
+ allowed through the deglitch circuit. In units of us.
+- A pinctrl state named "idle" could be defined to set pins in idle state
+ when I2C instance is not performing a transfer.
+- A pinctrl state named "sleep" could be defined to set pins in sleep state
+ when driver enters in suspend.
+
+
+
+Example :
+
+i2c0: i2c@fed40000 {
+ compatible = "st,comms-ssc4-i2c";
+ reg = <0xfed40000 0x110>;
+ interrupts = <GIC_SPI 187 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_s_a0_ls CLK_ICN_REG>;
+ clock-names = "ssc";
+ clock-frequency = <400000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_i2c0_default>;
+ st,i2c-min-scl-pulse-width-us = <0>;
+ st,i2c-min-sda-pulse-width-us = <5>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt b/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt
new file mode 100644
index 000000000..6b765485a
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt
@@ -0,0 +1,41 @@
+
+* Allwinner P2WI (Push/Pull 2 Wire Interface) controller
+
+Required properties :
+
+ - reg : Offset and length of the register set for the device.
+ - compatible : Should one of the following:
+ - "allwinner,sun6i-a31-p2wi"
+ - interrupts : The interrupt line connected to the P2WI peripheral.
+ - clocks : The gate clk connected to the P2WI peripheral.
+ - resets : The reset line connected to the P2WI peripheral.
+
+Optional properties :
+
+ - clock-frequency : Desired P2WI bus clock frequency in Hz. If not set the
+default frequency is 100kHz
+
+A P2WI may contain one child node encoding a P2WI slave device.
+
+Slave device properties:
+ Required properties:
+ - reg : the I2C slave address used during the initialization
+ process to switch from I2C to P2WI mode
+
+Example:
+
+ p2wi@01f03400 {
+ compatible = "allwinner,sun6i-a31-p2wi";
+ reg = <0x01f03400 0x400>;
+ interrupts = <0 39 4>;
+ clocks = <&apb0_gates 3>;
+ clock-frequency = <6000000>;
+ resets = <&apb0_rst 3>;
+
+ axp221: pmic@68 {
+ compatible = "x-powers,axp221";
+ reg = <0x68>;
+
+ /* ... */
+ };
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-versatile.txt b/Documentation/devicetree/bindings/i2c/i2c-versatile.txt
new file mode 100644
index 000000000..361d31c51
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-versatile.txt
@@ -0,0 +1,10 @@
+i2c Controller on ARM Versatile platform:
+
+Required properties:
+- compatible : Must be "arm,versatile-i2c";
+- reg
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Optional properties:
+- Child nodes conforming to i2c bus binding
diff --git a/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt b/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt
new file mode 100644
index 000000000..94a425eaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt
@@ -0,0 +1,24 @@
+* Wondermedia I2C Controller
+
+Required properties :
+
+ - compatible : should be "wm,wm8505-i2c"
+ - reg : Offset and length of the register set for the device
+ - interrupts : <IRQ> where IRQ is the interrupt number
+ - clocks : phandle to the I2C clock source
+
+Optional properties :
+
+ - clock-frequency : desired I2C bus clock frequency in Hz.
+ Valid values are 100000 and 400000.
+ Default to 100000 if not specified, or invalid value.
+
+Example :
+
+ i2c_0: i2c@d8280000 {
+ compatible = "wm,wm8505-i2c";
+ reg = <0xd8280000 0x1000>;
+ interrupts = <19>;
+ clocks = <&clki2c0>;
+ clock-frequency = <400000>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-xiic.txt b/Documentation/devicetree/bindings/i2c/i2c-xiic.txt
new file mode 100644
index 000000000..ceabbe91a
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-xiic.txt
@@ -0,0 +1,22 @@
+Xilinx IIC controller:
+
+Required properties:
+- compatible : Must be "xlnx,xps-iic-2.00.a"
+- reg : IIC register location and length
+- interrupts : IIC controller unterrupt
+- #address-cells = <1>
+- #size-cells = <0>
+
+Optional properties:
+- Child nodes conforming to i2c bus binding
+
+Example:
+
+ axi_iic_0: i2c@40800000 {
+ compatible = "xlnx,xps-iic-2.00.a";
+ interrupts = < 1 2 >;
+ reg = < 0x40800000 0x10000 >;
+
+ #size-cells = <0>;
+ #address-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt b/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt
new file mode 100644
index 000000000..f818ef507
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-xlp9xx.txt
@@ -0,0 +1,22 @@
+Device tree configuration for the I2C controller on the XLP9xx/5xx SoC
+
+Required properties:
+- compatible : should be "netlogic,xlp980-i2c"
+- reg : bus address start and address range size of device
+- interrupts : interrupt number
+
+Optional properties:
+- clock-frequency : frequency of bus clock in Hz
+ Defaults to 100 KHz when the property is not specified
+
+Example:
+
+i2c0: i2c@113100 {
+ compatible = "netlogic,xlp980-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0 0x113100 0x100>;
+ clock-frequency = <400000>;
+ interrupts = <30>;
+ interrupt-parent = <&pic>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/ina209.txt b/Documentation/devicetree/bindings/i2c/ina209.txt
new file mode 100644
index 000000000..9dd2bee80
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/ina209.txt
@@ -0,0 +1,18 @@
+ina209 properties
+
+Required properties:
+- compatible: Must be "ti,ina209"
+- reg: I2C address
+
+Optional properties:
+
+- shunt-resistor
+ Shunt resistor value in micro-Ohm
+
+Example:
+
+temp-sensor@4c {
+ compatible = "ti,ina209";
+ reg = <0x4c>;
+ shunt-resistor = <5000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/ina2xx.txt b/Documentation/devicetree/bindings/i2c/ina2xx.txt
new file mode 100644
index 000000000..a2ad85d7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/ina2xx.txt
@@ -0,0 +1,22 @@
+ina2xx properties
+
+Required properties:
+- compatible: Must be one of the following:
+ - "ti,ina219" for ina219
+ - "ti,ina220" for ina220
+ - "ti,ina226" for ina226
+ - "ti,ina230" for ina230
+- reg: I2C address
+
+Optional properties:
+
+- shunt-resistor
+ Shunt resistor value in micro-Ohm
+
+Example:
+
+ina220@44 {
+ compatible = "ti,ina220";
+ reg = <0x44>;
+ shunt-resistor = <1000>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/max6697.txt b/Documentation/devicetree/bindings/i2c/max6697.txt
new file mode 100644
index 000000000..5f793998e
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/max6697.txt
@@ -0,0 +1,64 @@
+max6697 properties
+
+Required properties:
+- compatible:
+ Should be one of
+ maxim,max6581
+ maxim,max6602
+ maxim,max6622
+ maxim,max6636
+ maxim,max6689
+ maxim,max6693
+ maxim,max6694
+ maxim,max6697
+ maxim,max6698
+ maxim,max6699
+- reg: I2C address
+
+Optional properties:
+
+- smbus-timeout-disable
+ Set to disable SMBus timeout. If not specified, SMBus timeout will be
+ enabled.
+- extended-range-enable
+ Only valid for MAX6581. Set to enable extended temperature range.
+ Extended temperature will be disabled if not specified.
+- beta-compensation-enable
+ Only valid for MAX6693 and MX6694. Set to enable beta compensation on
+ remote temperature channel 1.
+ Beta compensation will be disabled if not specified.
+- alert-mask
+ Alert bit mask. Alert disabled for bits set.
+ Select bit 0 for local temperature, bit 1..7 for remote temperatures.
+ If not specified, alert will be enabled for all channels.
+- over-temperature-mask
+ Over-temperature bit mask. Over-temperature reporting disabled for
+ bits set.
+ Select bit 0 for local temperature, bit 1..7 for remote temperatures.
+ If not specified, over-temperature reporting will be enabled for all
+ channels.
+- resistance-cancellation
+ Boolean for all chips other than MAX6581. Set to enable resistance
+ cancellation on remote temperature channel 1.
+ For MAX6581, resistance cancellation enabled for all channels if
+ specified as boolean, otherwise as per bit mask specified.
+ Only supported for remote temperatures (bit 1..7).
+ If not specified, resistance cancellation will be disabled for all
+ channels.
+- transistor-ideality
+ For MAX6581 only. Two values; first is bit mask, second is ideality
+ select value as per MAX6581 data sheet. Select bit 1..7 for remote
+ channels.
+ Transistor ideality will be initialized to default (1.008) if not
+ specified.
+
+Example:
+
+temp-sensor@1a {
+ compatible = "maxim,max6697";
+ reg = <0x1a>;
+ smbus-timeout-disable;
+ resistance-cancellation;
+ alert-mask = <0x72>;
+ over-temperature-mask = <0x7f>;
+};
diff --git a/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt
new file mode 100644
index 000000000..656716b72
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/nvidia,tegra20-i2c.txt
@@ -0,0 +1,75 @@
+NVIDIA Tegra20/Tegra30/Tegra114 I2C controller driver.
+
+Required properties:
+- compatible : For Tegra20, must be one of "nvidia,tegra20-i2c-dvc" or
+ "nvidia,tegra20-i2c". For Tegra30, must be "nvidia,tegra30-i2c".
+ For Tegra114, must be "nvidia,tegra114-i2c". Otherwise, must be
+ "nvidia,<chip>-i2c", plus at least one of the above, where <chip> is
+ tegra124, tegra132, or tegra210.
+ Details of compatible are as follows:
+ nvidia,tegra20-i2c-dvc: Tegra20 has specific I2C controller called as DVC I2C
+ controller. This only support master mode of I2C communication. Register
+ interface/offset and interrupts handling are different than generic I2C
+ controller. Driver of DVC I2C controller is only compatible with
+ "nvidia,tegra20-i2c-dvc".
+ nvidia,tegra20-i2c: Tegra20 has 4 generic I2C controller. This can support
+ master and slave mode of I2C communication. The i2c-tegra driver only
+ support master mode of I2C communication. Driver of I2C controller is
+ only compatible with "nvidia,tegra20-i2c".
+ nvidia,tegra30-i2c: Tegra30 has 5 generic I2C controller. This controller is
+ very much similar to Tegra20 I2C controller with additional feature:
+ Continue Transfer Support. This feature helps to implement M_NO_START
+ as per I2C core API transfer flags. Driver of I2C controller is
+ compatible with "nvidia,tegra30-i2c" to enable the continue transfer
+ support. This is also compatible with "nvidia,tegra20-i2c" without
+ continue transfer support.
+ nvidia,tegra114-i2c: Tegra114 has 5 generic I2C controller. This controller is
+ very much similar to Tegra30 I2C controller with some hardware
+ modification:
+ - Tegra30/Tegra20 I2C controller has 2 clock source called div-clk and
+ fast-clk. Tegra114 has only one clock source called as div-clk and
+ hence clock mechanism is changed in I2C controller.
+ - Tegra30/Tegra20 I2C controller has enabled per packet transfer by
+ default and there is no way to disable it. Tegra114 has this
+ interrupt disable by default and SW need to enable explicitly.
+ Due to above changes, Tegra114 I2C driver makes incompatible with
+ previous hardware driver. Hence, tegra114 I2C controller is compatible
+ with "nvidia,tegra114-i2c".
+- reg: Should contain I2C controller registers physical address and length.
+- interrupts: Should contain I2C controller interrupts.
+- address-cells: Address cells for I2C device address.
+- size-cells: Size of the I2C device address.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ Tegra20/Tegra30:
+ - div-clk
+ - fast-clk
+ Tegra114:
+ - div-clk
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - i2c
+- dmas: Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names: Must include the following entries:
+ - rx
+ - tx
+
+Example:
+
+ i2c@7000c000 {
+ compatible = "nvidia,tegra20-i2c";
+ reg = <0x7000c000 0x100>;
+ interrupts = <0 38 0x04>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car 12>, <&tegra_car 124>;
+ clock-names = "div-clk", "fast-clk";
+ resets = <&tegra_car 12>;
+ reset-names = "i2c";
+ dmas = <&apbdma 16>, <&apbdma 16>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.txt b/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.txt
new file mode 100644
index 000000000..dc71754a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-qup.txt
@@ -0,0 +1,40 @@
+Qualcomm Universal Peripheral (QUP) I2C controller
+
+Required properties:
+ - compatible: Should be:
+ * "qcom,i2c-qup-v1.1.1" for 8660, 8960 and 8064.
+ * "qcom,i2c-qup-v2.1.1" for 8974 v1.
+ * "qcom,i2c-qup-v2.2.1" for 8974 v2 and later.
+ - reg: Should contain QUP register address and length.
+ - interrupts: Should contain I2C interrupt.
+
+ - clocks: A list of phandles + clock-specifiers, one for each entry in
+ clock-names.
+ - clock-names: Should contain:
+ * "core" for the core clock
+ * "iface" for the AHB clock
+
+ - #address-cells: Should be <1> Address cells for i2c device address
+ - #size-cells: Should be <0> as i2c addresses have no size component
+
+Optional properties:
+ - clock-frequency: Should specify the desired i2c bus clock frequency in Hz,
+ defaults to 100kHz if omitted.
+
+Child nodes should conform to i2c bus binding.
+
+Example:
+
+ i2c@f9924000 {
+ compatible = "qcom,i2c-qup-v2.2.1";
+ reg = <0xf9924000 0x1000>;
+ interrupts = <0 96 0>;
+
+ clocks = <&gcc GCC_BLSP1_QUP2_I2C_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>;
+ clock-names = "core", "iface";
+
+ clock-frequency = <355000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/ti,bq32k.txt b/Documentation/devicetree/bindings/i2c/ti,bq32k.txt
new file mode 100644
index 000000000..e204906b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/ti,bq32k.txt
@@ -0,0 +1,18 @@
+* TI BQ32000 I2C Serial Real-Time Clock
+
+Required properties:
+- compatible: Should contain "ti,bq32000".
+- reg: I2C address for chip
+
+Optional properties:
+- trickle-resistor-ohms : Selected resistor for trickle charger
+ Values usable are 1120 and 20180
+ Should be given if trickle charger should be enabled
+- trickle-diode-disable : Do not use internal trickle charger diode
+ Should be given if internal trickle charger diode should be disabled
+Example:
+ bq32000: rtc@68 {
+ compatible = "ti,bq32000";
+ trickle-resistor-ohms = <1120>;
+ reg = <0x68>;
+ };
diff --git a/Documentation/devicetree/bindings/i2c/trivial-devices.txt b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
new file mode 100644
index 000000000..ad0c4ac91
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/trivial-devices.txt
@@ -0,0 +1,103 @@
+This is a list of trivial i2c devices that have simple device tree
+bindings, consisting only of a compatible field, an address and
+possibly an interrupt line.
+
+If a device needs more specific bindings, such as properties to
+describe some aspect of it, there needs to be a specific binding
+document for it just like any other devices.
+
+
+Compatible Vendor / Chip
+========== =============
+abracon,abb5zes3 AB-RTCMC-32.768kHz-B5ZE-S3: Real Time Clock/Calendar Module with I2C Interface
+ad,ad7414 SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin
+ad,adm9240 ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems
+adi,adt7461 +/-1C TDM Extended Temp Range I.C
+adt7461 +/-1C TDM Extended Temp Range I.C
+adi,adt7473 +/-1C TDM Extended Temp Range I.C
+adi,adt7475 +/-1C TDM Extended Temp Range I.C
+adi,adt7476 +/-1C TDM Extended Temp Range I.C
+adi,adt7490 +/-1C TDM Extended Temp Range I.C
+adi,adxl345 Three-Axis Digital Accelerometer
+adi,adxl346 Three-Axis Digital Accelerometer
+adi,adxl34x Three-Axis Digital Accelerometer
+at,24c08 i2c serial eeprom (24cxx)
+atmel,24c00 i2c serial eeprom (24cxx)
+atmel,24c01 i2c serial eeprom (24cxx)
+atmel,24c02 i2c serial eeprom (24cxx)
+atmel,24c04 i2c serial eeprom (24cxx)
+atmel,24c16 i2c serial eeprom (24cxx)
+atmel,24c32 i2c serial eeprom (24cxx)
+atmel,24c64 i2c serial eeprom (24cxx)
+atmel,24c128 i2c serial eeprom (24cxx)
+atmel,24c256 i2c serial eeprom (24cxx)
+atmel,24c512 i2c serial eeprom (24cxx)
+atmel,24c1024 i2c serial eeprom (24cxx)
+atmel,at97sc3204t i2c trusted platform module (TPM)
+capella,cm32181 CM32181: Ambient Light Sensor
+capella,cm3232 CM3232: Ambient Light Sensor
+catalyst,24c32 i2c serial eeprom
+cirrus,cs42l51 Cirrus Logic CS42L51 audio codec
+dallas,ds1307 64 x 8, Serial, I2C Real-Time Clock
+dallas,ds1338 I2C RTC with 56-Byte NV RAM
+dallas,ds1340 I2C RTC with Trickle Charger
+dallas,ds1374 I2C, 32-Bit Binary Counter Watchdog RTC with Trickle Charger and Reset Input/Output
+dallas,ds1631 High-Precision Digital Thermometer
+dallas,ds1682 Total-Elapsed-Time Recorder with Alarm
+dallas,ds1775 Tiny Digital Thermometer and Thermostat
+dallas,ds3232 Extremely Accurate I²C RTC with Integrated Crystal and SRAM
+dallas,ds4510 CPU Supervisor with Nonvolatile Memory and Programmable I/O
+dallas,ds75 Digital Thermometer and Thermostat
+dlg,da9053 DA9053: flexible system level PMIC with multicore support
+dlg,da9063 DA9063: system PMIC for quad-core application processors
+epson,rx8025 High-Stability. I2C-Bus INTERFACE REAL TIME CLOCK MODULE
+epson,rx8581 I2C-BUS INTERFACE REAL TIME CLOCK MODULE
+fsl,mag3110 MAG3110: Xtrinsic High Accuracy, 3D Magnetometer
+fsl,mc13892 MC13892: Power Management Integrated Circuit (PMIC) for i.MX35/51
+fsl,mma8450 MMA8450Q: Xtrinsic Low-power, 3-axis Xtrinsic Accelerometer
+fsl,mma8452 MMA8452Q: 3-axis 12-bit / 8-bit Digital Accelerometer
+fsl,mpr121 MPR121: Proximity Capacitive Touch Sensor Controller
+fsl,sgtl5000 SGTL5000: Ultra Low-Power Audio Codec
+gmt,g751 G751: Digital Temperature Sensor and Thermal Watchdog with Two-Wire Interface
+infineon,slb9635tt Infineon SLB9635 (Soft-) I2C TPM (old protocol, max 100khz)
+infineon,slb9645tt Infineon SLB9645 I2C TPM (new protocol, max 400khz)
+isil,isl12057 Intersil ISL12057 I2C RTC Chip
+isil,isl29028 Intersil ISL29028 Ambient Light and Proximity Sensor
+maxim,ds1050 5 Bit Programmable, Pulse-Width Modulator
+maxim,max1237 Low-Power, 4-/12-Channel, 2-Wire Serial, 12-Bit ADCs
+maxim,max6625 9-Bit/12-Bit Temperature Sensors with I²C-Compatible Serial Interface
+mc,rv3029c2 Real Time Clock Module with I2C-Bus
+national,lm63 Temperature sensor with integrated fan control
+national,lm75 I2C TEMP SENSOR
+national,lm80 Serial Interface ACPI-Compatible Microprocessor System Hardware Monitor
+national,lm85 Temperature sensor with integrated fan control
+national,lm92 ±0.33°C Accurate, 12-Bit + Sign Temperature Sensor and Thermal Window Comparator with Two-Wire Interface
+nuvoton,npct501 i2c trusted platform module (TPM)
+nxp,pca9556 Octal SMBus and I2C registered interface
+nxp,pca9557 8-bit I2C-bus and SMBus I/O port with reset
+nxp,pcf8563 Real-time clock/calendar
+nxp,pcf85063 Tiny Real-Time Clock
+oki,ml86v7667 OKI ML86V7667 video decoder
+ovti,ov5642 OV5642: Color CMOS QSXGA (5-megapixel) Image Sensor with OmniBSI and Embedded TrueFocus
+pericom,pt7c4338 Real-time Clock Module
+plx,pex8648 48-Lane, 12-Port PCI Express Gen 2 (5.0 GT/s) Switch
+ramtron,24c64 i2c serial eeprom (24cxx)
+ricoh,r2025sd I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+ricoh,r2221tl I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+ricoh,rs5c372a I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+ricoh,rs5c372b I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+ricoh,rv5c386 I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+ricoh,rv5c387a I2C bus SERIAL INTERFACE REAL-TIME CLOCK IC
+samsung,24ad0xd1 S524AD0XF1 (128K/256K-bit Serial EEPROM for Low Power)
+sii,s35390a 2-wire CMOS real-time clock
+skyworks,sky81452 Skyworks SKY81452: Six-Channel White LED Driver with Touch Panel Bias Supply
+st-micro,24c256 i2c serial eeprom (24cxx)
+stm,m41t00 Serial Access TIMEKEEPER
+stm,m41t62 Serial real-time clock (RTC) with alarm
+stm,m41t80 M41T80 - SERIAL ACCESS RTC WITH ALARMS
+taos,tsl2550 Ambient Light Sensor with SMBUS/Two Wire Serial Interface
+ti,tsc2003 I2C Touch-Screen Controller
+ti,tmp102 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
+ti,tmp103 Low Power Digital Temperature Sensor with SMBUS/Two Wire Serial Interface
+ti,tmp275 Digital Temperature Sensor
+winbond,wpct301 i2c trusted platform module (TPM)
diff --git a/Documentation/devicetree/bindings/iio/accel/bma180.txt b/Documentation/devicetree/bindings/iio/accel/bma180.txt
new file mode 100644
index 000000000..c5933573e
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/accel/bma180.txt
@@ -0,0 +1,24 @@
+* Bosch BMA180 triaxial acceleration sensor
+
+http://omapworld.com/BMA180_111_1002839.pdf
+
+Required properties:
+
+ - compatible : should be "bosch,bma180"
+ - reg : the I2C address of the sensor
+
+Optional properties:
+
+ - interrupt-parent : should be the phandle for the interrupt controller
+
+ - interrupts : interrupt mapping for GPIO IRQ, it should by configured with
+ flags IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING
+
+Example:
+
+bma180@40 {
+ compatible = "bosch,bma180";
+ reg = <0x40>;
+ interrupt-parent = <&gpio6>;
+ interrupts = <18 (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_EDGE_RISING)>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/at91_adc.txt b/Documentation/devicetree/bindings/iio/adc/at91_adc.txt
new file mode 100644
index 000000000..0f813dec5
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/at91_adc.txt
@@ -0,0 +1,87 @@
+* AT91's Analog to Digital Converter (ADC)
+
+Required properties:
+ - compatible: Should be "atmel,<chip>-adc"
+ <chip> can be "at91sam9260", "at91sam9g45" or "at91sam9x5"
+ - reg: Should contain ADC registers location and length
+ - interrupts: Should contain the IRQ line for the ADC
+ - clock-names: tuple listing input clock names.
+ Required elements: "adc_clk", "adc_op_clk".
+ - clocks: phandles to input clocks.
+ - atmel,adc-channels-used: Bitmask of the channels muxed and enabled for this
+ device
+ - atmel,adc-startup-time: Startup Time of the ADC in microseconds as
+ defined in the datasheet
+ - atmel,adc-vref: Reference voltage in millivolts for the conversions
+ - atmel,adc-res: List of resolutions in bits supported by the ADC. List size
+ must be two at least.
+ - atmel,adc-res-names: Contains one identifier string for each resolution
+ in atmel,adc-res property. "lowres" and "highres"
+ identifiers are required.
+
+Optional properties:
+ - atmel,adc-use-external-triggers: Boolean to enable the external triggers
+ - atmel,adc-use-res: String corresponding to an identifier from
+ atmel,adc-res-names property. If not specified, the highest
+ resolution will be used.
+ - atmel,adc-sleep-mode: Boolean to enable sleep mode when no conversion
+ - atmel,adc-sample-hold-time: Sample and Hold Time in microseconds
+ - atmel,adc-ts-wires: Number of touchscreen wires. Should be 4 or 5. If this
+ value is set, then the adc driver will enable touchscreen
+ support.
+ NOTE: when adc touchscreen is enabled, the adc hardware trigger will be
+ disabled. Since touchscreen will occupy the trigger register.
+ - atmel,adc-ts-pressure-threshold: a pressure threshold for touchscreen. It
+ makes touch detection more precise.
+
+Optional trigger Nodes:
+ - Required properties:
+ * trigger-name: Name of the trigger exposed to the user
+ * trigger-value: Value to put in the Trigger register
+ to activate this trigger
+ - Optional properties:
+ * trigger-external: Is the trigger an external trigger?
+
+Examples:
+adc0: adc@fffb0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "atmel,at91sam9260-adc";
+ reg = <0xfffb0000 0x100>;
+ interrupts = <20 IRQ_TYPE_LEVEL_HIGH 0>;
+ clocks = <&adc_clk>, <&adc_op_clk>;
+ clock-names = "adc_clk", "adc_op_clk";
+ atmel,adc-channels-used = <0xff>;
+ atmel,adc-startup-time = <40>;
+ atmel,adc-use-external-triggers;
+ atmel,adc-vref = <3300>;
+ atmel,adc-res = <8 10>;
+ atmel,adc-res-names = "lowres", "highres";
+ atmel,adc-use-res = "lowres";
+
+ trigger@0 {
+ reg = <0>;
+ trigger-name = "external-rising";
+ trigger-value = <0x1>;
+ trigger-external;
+ };
+ trigger@1 {
+ reg = <1>;
+ trigger-name = "external-falling";
+ trigger-value = <0x2>;
+ trigger-external;
+ };
+
+ trigger@2 {
+ reg = <2>;
+ trigger-name = "external-any";
+ trigger-value = <0x3>;
+ trigger-external;
+ };
+
+ trigger@3 {
+ reg = <3>;
+ trigger-name = "continuous";
+ trigger-value = <0x6>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/cc10001_adc.txt b/Documentation/devicetree/bindings/iio/adc/cc10001_adc.txt
new file mode 100644
index 000000000..904f76de9
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/cc10001_adc.txt
@@ -0,0 +1,22 @@
+* Cosmic Circuits - Analog to Digital Converter (CC-10001-ADC)
+
+Required properties:
+ - compatible: Should be "cosmic,10001-adc"
+ - reg: Should contain adc registers location and length.
+ - clock-names: Should contain "adc".
+ - clocks: Should contain a clock specifier for each entry in clock-names
+ - vref-supply: The regulator supply ADC reference voltage.
+
+Optional properties:
+ - adc-reserved-channels: Bitmask of reserved channels,
+ i.e. channels that cannot be used by the OS.
+
+Example:
+adc: adc@18101600 {
+ compatible = "cosmic,10001-adc";
+ reg = <0x18101600 0x24>;
+ adc-reserved-channels = <0x2>;
+ clocks = <&adc_clk>;
+ clock-names = "adc";
+ vref-supply = <&reg_1v8>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt b/Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt
new file mode 100644
index 000000000..c07228da9
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt
@@ -0,0 +1,16 @@
+Dialog Semiconductor DA9150 IIO GPADC bindings
+
+Required properties:
+- compatible: "dlg,da9150-gpadc" for DA9150 IIO GPADC
+- #io-channel-cells: Should be set to <1>
+ (See Documentation/devicetree/bindings/iio/iio-bindings.txt for further info)
+
+For further information on GPADC channels, see device datasheet.
+
+
+Example:
+
+ gpadc: da9150-gpadc {
+ compatible = "dlg,da9150-gpadc";
+ #io-channel-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/iio/adc/max1027-adc.txt b/Documentation/devicetree/bindings/iio/adc/max1027-adc.txt
new file mode 100644
index 000000000..a8770cc6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/max1027-adc.txt
@@ -0,0 +1,22 @@
+* Maxim 1027/1029/1031 Analog to Digital Converter (ADC)
+
+Required properties:
+ - compatible: Should be "maxim,max1027" or "maxim,max1029" or "maxim,max1031"
+ - reg: SPI chip select number for the device
+ - interrupt-parent: phandle to the parent interrupt controller
+ see: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+ - interrupts: IRQ line for the ADC
+ see: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+adc@0 {
+ compatible = "maxim,max1027";
+ reg = <0>;
+ interrupt-parent = <&gpio5>;
+ interrupts = <15 IRQ_TYPE_EDGE_RISING>;
+ spi-max-frequency = <1000000>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/mcp320x.txt b/Documentation/devicetree/bindings/iio/adc/mcp320x.txt
new file mode 100644
index 000000000..b85184391
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/mcp320x.txt
@@ -0,0 +1,30 @@
+* Microchip Analog to Digital Converter (ADC)
+
+The node for this driver must be a child node of a SPI controller, hence
+all mandatory properties described in
+
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+must be specified.
+
+Required properties:
+ - compatible: Must be one of the following, depending on the
+ model:
+ "mcp3001"
+ "mcp3002"
+ "mcp3004"
+ "mcp3008"
+ "mcp3201"
+ "mcp3202"
+ "mcp3204"
+ "mcp3208"
+
+
+Examples:
+spi_controller {
+ mcp3x0x@0 {
+ compatible = "mcp3002";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/mcp3422.txt b/Documentation/devicetree/bindings/iio/adc/mcp3422.txt
new file mode 100644
index 000000000..333139cc0
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/mcp3422.txt
@@ -0,0 +1,17 @@
+* Microchip mcp3422/3/4/6/7/8 chip family (ADC)
+
+Required properties:
+ - compatible: Should be
+ "microchip,mcp3422" or
+ "microchip,mcp3423" or
+ "microchip,mcp3424" or
+ "microchip,mcp3426" or
+ "microchip,mcp3427" or
+ "microchip,mcp3428"
+ - reg: I2C address for the device
+
+Example:
+adc@0 {
+ compatible = "microchip,mcp3424";
+ reg = <0x68>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/nuvoton-nau7802.txt b/Documentation/devicetree/bindings/iio/adc/nuvoton-nau7802.txt
new file mode 100644
index 000000000..e9582e6fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/nuvoton-nau7802.txt
@@ -0,0 +1,18 @@
+* Nuvoton NAU7802 Analog to Digital Converter (ADC)
+
+Required properties:
+ - compatible: Should be "nuvoton,nau7802"
+ - reg: Should contain the ADC I2C address
+
+Optional properties:
+ - nuvoton,vldo: Internal reference voltage in millivolts to be
+ configured valid values are between 2400 mV and 4500 mV.
+ - interrupts: IRQ line for the ADC. If not used the driver will use
+ polling.
+
+Example:
+adc2: nau7802@2a {
+ compatible = "nuvoton,nau7802";
+ reg = <0x2a>;
+ nuvoton,vldo = <3000>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.txt b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.txt
new file mode 100644
index 000000000..4e36d6e2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.txt
@@ -0,0 +1,46 @@
+Qualcomm's SPMI PMIC current ADC
+
+QPNP PMIC current ADC (IADC) provides interface to clients to read current.
+A 16 bit ADC is used for current measurements. IADC can measure the current
+through an external resistor (channel 1) or internal (built-in) resistor
+(channel 0). When using an external resistor it is to be described by
+qcom,external-resistor-micro-ohms property.
+
+IADC node:
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: Should contain "qcom,spmi-iadc".
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: IADC base address and length in the SPMI PMIC register map
+
+- interrupts:
+ Usage: optional
+ Value type: <prop-encoded-array>
+ Definition: End of ADC conversion.
+
+- qcom,external-resistor-micro-ohms:
+ Usage: optional
+ Value type: <u32>
+ Definition: Sense resister value in micro Ohm.
+ If not defined value of 10000 micro Ohms will be used.
+
+Example:
+ /* IADC node */
+ pmic_iadc: iadc@3600 {
+ compatible = "qcom,spmi-iadc";
+ reg = <0x3600 0x100>;
+ interrupts = <0x0 0x36 0x0 IRQ_TYPE_EDGE_RISING>;
+ qcom,external-resistor-micro-ohms = <10000>;
+ #io-channel-cells = <1>;
+ };
+
+ /* IIO client node */
+ bat {
+ io-channels = <&pmic_iadc 0>;
+ io-channel-names = "iadc";
+ };
diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt
new file mode 100644
index 000000000..0fb46137f
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.txt
@@ -0,0 +1,129 @@
+Qualcomm's SPMI PMIC voltage ADC
+
+SPMI PMIC voltage ADC (VADC) provides interface to clients to read
+voltage. The VADC is a 15-bit sigma-delta ADC.
+
+VADC node:
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: Should contain "qcom,spmi-vadc".
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: VADC base address and length in the SPMI PMIC register map.
+
+- #address-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: Must be one. Child node 'reg' property should define ADC
+ channel number.
+
+- #size-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: Must be zero.
+
+- #io-channel-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: Must be one. For details about IIO bindings see:
+ Documentation/devicetree/bindings/iio/iio-bindings.txt
+
+- interrupts:
+ Usage: optional
+ Value type: <prop-encoded-array>
+ Definition: End of conversion interrupt.
+
+Channel node properties:
+
+- reg:
+ Usage: required
+ Value type: <u32>
+ Definition: ADC channel number.
+ See include/dt-bindings/iio/qcom,spmi-vadc.h
+
+- qcom,decimation:
+ Usage: optional
+ Value type: <u32>
+ Definition: This parameter is used to decrease ADC sampling rate.
+ Quicker measurements can be made by reducing decimation ratio.
+ Valid values are 512, 1024, 2048, 4096.
+ If property is not found, default value of 512 will be used.
+
+- qcom,pre-scaling:
+ Usage: optional
+ Value type: <u32 array>
+ Definition: Used for scaling the channel input signal before the signal is
+ fed to VADC. The configuration for this node is to know the
+ pre-determined ratio and use it for post scaling. Select one from
+ the following options.
+ <1 1>, <1 3>, <1 4>, <1 6>, <1 20>, <1 8>, <10 81>, <1 10>
+ If property is not found default value depending on chip will be used.
+
+- qcom,ratiometric:
+ Usage: optional
+ Value type: <empty>
+ Definition: Channel calibration type. If this property is specified
+ VADC will use the VDD reference (1.8V) and GND for channel
+ calibration. If property is not found, channel will be
+ calibrated with 0.625V and 1.25V reference channels, also
+ known as absolute calibration.
+
+- qcom,hw-settle-time:
+ Usage: optional
+ Value type: <u32>
+ Definition: Time between AMUX getting configured and the ADC starting
+ conversion. Delay = 100us * (value) for value < 11, and
+ 2ms * (value - 10) otherwise.
+ Valid values are: 0, 100, 200, 300, 400, 500, 600, 700, 800,
+ 900 us and 1, 2, 4, 6, 8, 10 ms
+ If property is not found, channel will use 0us.
+
+- qcom,avg-samples:
+ Usage: optional
+ Value type: <u32>
+ Definition: Number of samples to be used for measurement.
+ Averaging provides the option to obtain a single measurement
+ from the ADC that is an average of multiple samples. The value
+ selected is 2^(value).
+ Valid values are: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512
+ If property is not found, 1 sample will be used.
+
+NOTE:
+
+Following channels, also known as reference point channels, are used for
+result calibration and their channel configuration nodes should be defined:
+VADC_REF_625MV and/or VADC_SPARE1(based on PMIC version) VADC_REF_1250MV,
+VADC_GND_REF and VADC_VDD_VADC.
+
+Example:
+
+ /* VADC node */
+ pmic_vadc: vadc@3100 {
+ compatible = "qcom,spmi-vadc";
+ reg = <0x3100 0x100>;
+ interrupts = <0x0 0x31 0x0 IRQ_TYPE_EDGE_RISING>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #io-channel-cells = <1>;
+ io-channel-ranges;
+
+ /* Channel node */
+ usb_id_nopull {
+ reg = <VADC_LR_MUX10_USB_ID>;
+ qcom,decimation = <512>;
+ qcom,ratiometric;
+ qcom,hw-settle-time = <200>;
+ qcom,avg-samples = <1>;
+ qcom,pre-scaling = <1 3>;
+ };
+ };
+
+ /* IIO client node */
+ usb {
+ io-channels = <&pmic_vadc VADC_LR_MUX10_USB_ID>;
+ io-channel-names = "vadc";
+ };
diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
new file mode 100644
index 000000000..a9a5fe19f
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.txt
@@ -0,0 +1,24 @@
+Rockchip Successive Approximation Register (SAR) A/D Converter bindings
+
+Required properties:
+- compatible: Should be "rockchip,saradc" or "rockchip,rk3066-tsadc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The interrupt number to the cpu. The interrupt specifier format
+ depends on the interrupt controller.
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Shall be "saradc" for the converter-clock, and "apb_pclk" for
+ the peripheral clock.
+- vref-supply: The regulator supply ADC reference voltage.
+- #io-channel-cells: Should be 1, see ../iio-bindings.txt
+
+Example:
+ saradc: saradc@2006c000 {
+ compatible = "rockchip,saradc";
+ reg = <0x2006c000 0x100>;
+ interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_SARADC>, <&cru PCLK_SARADC>;
+ clock-names = "saradc", "apb_pclk";
+ #io-channel-cells = <1>;
+ vref-supply = <&vcc18>;
+ };
diff --git a/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt b/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt
new file mode 100644
index 000000000..42ca7deec
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/ti-adc128s052.txt
@@ -0,0 +1,18 @@
+* Texas Instruments' ADC128S052 ADC chip
+
+Required properties:
+ - compatible: Should be "ti,adc128s052"
+ - reg: spi chip select number for the device
+ - vref-supply: The regulator supply for ADC reference voltage
+
+Recommended properties:
+ - spi-max-frequency: Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+adc@0 {
+ compatible = "ti,adc128s052";
+ reg = <0>;
+ vref-supply = <&vdd_supply>;
+ spi-max-frequency = <1000000>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/twl4030-madc.txt b/Documentation/devicetree/bindings/iio/adc/twl4030-madc.txt
new file mode 100644
index 000000000..6bdd21404
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/twl4030-madc.txt
@@ -0,0 +1,24 @@
+* TWL4030 Monitoring Analog to Digital Converter (MADC)
+
+The MADC subsystem in the TWL4030 consists of a 10-bit ADC
+combined with a 16-input analog multiplexer.
+
+Required properties:
+ - compatible: Should contain "ti,twl4030-madc".
+ - interrupts: IRQ line for the MADC submodule.
+ - #io-channel-cells: Should be set to <1>.
+
+Optional properties:
+ - ti,system-uses-second-madc-irq: boolean, set if the second madc irq register
+ should be used, which is intended to be used
+ by Co-Processors (e.g. a modem).
+
+Example:
+
+&twl {
+ madc {
+ compatible = "ti,twl4030-madc";
+ interrupts = <3>;
+ #io-channel-cells = <1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/vf610-adc.txt b/Documentation/devicetree/bindings/iio/adc/vf610-adc.txt
new file mode 100644
index 000000000..1a4a43d5c
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/vf610-adc.txt
@@ -0,0 +1,22 @@
+Freescale vf610 Analog to Digital Converter bindings
+
+The devicetree bindings are for the new ADC driver written for
+vf610/i.MX6slx and upward SoCs from Freescale.
+
+Required properties:
+- compatible: Should contain "fsl,vf610-adc"
+- reg: Offset and length of the register set for the device
+- interrupts: Should contain the interrupt for the device
+- clocks: The clock is needed by the ADC controller, ADC clock source is ipg clock.
+- clock-names: Must contain "adc", matching entry in the clocks property.
+- vref-supply: The regulator supply ADC reference voltage.
+
+Example:
+adc0: adc@4003b000 {
+ compatible = "fsl,vf610-adc";
+ reg = <0x4003b000 0x1000>;
+ interrupts = <0 53 0x04>;
+ clocks = <&clks VF610_CLK_ADC0>;
+ clock-names = "adc";
+ vref-supply = <&reg_vcc_3v3_mcu>;
+};
diff --git a/Documentation/devicetree/bindings/iio/adc/xilinx-xadc.txt b/Documentation/devicetree/bindings/iio/adc/xilinx-xadc.txt
new file mode 100644
index 000000000..d71258e2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/xilinx-xadc.txt
@@ -0,0 +1,113 @@
+Xilinx XADC device driver
+
+This binding document describes the bindings for both of them since the
+bindings are very similar. The Xilinx XADC is a ADC that can be found in the
+series 7 FPGAs from Xilinx. The XADC has a DRP interface for communication.
+Currently two different frontends for the DRP interface exist. One that is only
+available on the ZYNQ family as a hardmacro in the SoC portion of the ZYNQ. The
+other one is available on all series 7 platforms and is a softmacro with a AXI
+interface. This binding document describes the bindings for both of them since
+the bindings are very similar.
+
+Required properties:
+ - compatible: Should be one of
+ * "xlnx,zynq-xadc-1.00.a": When using the ZYNQ device
+ configuration interface to interface to the XADC hardmacro.
+ * "xlnx,axi-xadc-1.00.a": When using the axi-xadc pcore to
+ interface to the XADC hardmacro.
+ - reg: Address and length of the register set for the device
+ - interrupts: Interrupt for the XADC control interface.
+ - clocks: When using the ZYNQ this must be the ZYNQ PCAP clock,
+ when using the AXI-XADC pcore this must be the clock that provides the
+ clock to the AXI bus interface of the core.
+
+Optional properties:
+ - interrupt-parent: phandle to the parent interrupt controller
+ - xlnx,external-mux:
+ * "none": No external multiplexer is used, this is the default
+ if the property is omitted.
+ * "single": External multiplexer mode is used with one
+ multiplexer.
+ * "dual": External multiplexer mode is used with two
+ multiplexers for simultaneous sampling.
+ - xlnx,external-mux-channel: Configures which pair of pins is used to
+ sample data in external mux mode.
+ Valid values for single external multiplexer mode are:
+ 0: VP/VN
+ 1: VAUXP[0]/VAUXN[0]
+ 2: VAUXP[1]/VAUXN[1]
+ ...
+ 16: VAUXP[15]/VAUXN[15]
+ Valid values for dual external multiplexer mode are:
+ 1: VAUXP[0]/VAUXN[0] - VAUXP[8]/VAUXN[8]
+ 2: VAUXP[1]/VAUXN[1] - VAUXP[9]/VAUXN[9]
+ ...
+ 8: VAUXP[7]/VAUXN[7] - VAUXP[15]/VAUXN[15]
+
+ This property needs to be present if the device is configured for
+ external multiplexer mode (either single or dual). If the device is
+ not using external multiplexer mode the property is ignored.
+ - xnlx,channels: List of external channels that are connected to the ADC
+ Required properties:
+ * #address-cells: Should be 1.
+ * #size-cells: Should be 0.
+
+ The child nodes of this node represent the external channels which are
+ connected to the ADC. If the property is no present no external
+ channels will be assumed to be connected.
+
+ Each child node represents one channel and has the following
+ properties:
+ Required properties:
+ * reg: Pair of pins the channel is connected to.
+ 0: VP/VN
+ 1: VAUXP[0]/VAUXN[0]
+ 2: VAUXP[1]/VAUXN[1]
+ ...
+ 16: VAUXP[15]/VAUXN[15]
+ Note each channel number should only be used at most
+ once.
+ Optional properties:
+ * xlnx,bipolar: If set the channel is used in bipolar
+ mode.
+
+
+Examples:
+ xadc@f8007100 {
+ compatible = "xlnx,zynq-xadc-1.00.a";
+ reg = <0xf8007100 0x20>;
+ interrupts = <0 7 4>;
+ interrupt-parent = <&gic>;
+ clocks = <&pcap_clk>;
+
+ xlnx,channels {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ channel@0 {
+ reg = <0>;
+ };
+ channel@1 {
+ reg = <1>;
+ };
+ channel@8 {
+ reg = <8>;
+ };
+ };
+ };
+
+ xadc@43200000 {
+ compatible = "xlnx,axi-xadc-1.00.a";
+ reg = <0x43200000 0x1000>;
+ interrupts = <0 53 4>;
+ interrupt-parent = <&gic>;
+ clocks = <&fpga1_clk>;
+
+ xlnx,channels {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ channel@0 {
+ reg = <0>;
+ xlnx,bipolar;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/iio/dac/ad7303.txt b/Documentation/devicetree/bindings/iio/dac/ad7303.txt
new file mode 100644
index 000000000..914610f05
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/dac/ad7303.txt
@@ -0,0 +1,23 @@
+Analog Devices AD7303 DAC device driver
+
+Required properties:
+ - compatible: Must be "adi,ad7303"
+ - reg: SPI chip select number for the device
+ - spi-max-frequency: Max SPI frequency to use (< 30000000)
+ - Vdd-supply: Phandle to the Vdd power supply
+
+Optional properties:
+ - REF-supply: Phandle to the external reference voltage supply. This should
+ only be set if there is an external reference voltage connected to the REF
+ pin. If the property is not set Vdd/2 is used as the reference voltage.
+
+Example:
+
+ ad7303@4 {
+ compatible = "adi,ad7303";
+ reg = <4>;
+ spi-max-frequency = <10000000>;
+ Vdd-supply = <&vdd_supply>;
+ adi,use-external-reference;
+ REF-supply = <&vref_supply>;
+ };
diff --git a/Documentation/devicetree/bindings/iio/dac/max5821.txt b/Documentation/devicetree/bindings/iio/dac/max5821.txt
new file mode 100644
index 000000000..54276ce8c
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/dac/max5821.txt
@@ -0,0 +1,14 @@
+Maxim max5821 DAC device driver
+
+Required properties:
+ - compatible: Must be "maxim,max5821"
+ - reg: Should contain the DAC I2C address
+ - vref-supply: Phandle to the vref power supply
+
+Example:
+
+ max5821@38 {
+ compatible = "maxim,max5821";
+ reg = <0x38>;
+ vref-supply = <&reg_max5821>;
+ };
diff --git a/Documentation/devicetree/bindings/iio/frequency/adf4350.txt b/Documentation/devicetree/bindings/iio/frequency/adf4350.txt
new file mode 100644
index 000000000..f8c181d81
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/frequency/adf4350.txt
@@ -0,0 +1,86 @@
+Analog Devices ADF4350/ADF4351 device driver
+
+Required properties:
+ - compatible: Should be one of
+ * "adi,adf4350": When using the ADF4350 device
+ * "adi,adf4351": When using the ADF4351 device
+ - reg: SPI chip select numbert for the device
+ - spi-max-frequency: Max SPI frequency to use (< 20000000)
+ - clocks: From common clock binding. Clock is phandle to clock for
+ ADF435x Reference Clock (CLKIN).
+
+Optional properties:
+ - gpios: GPIO Lock detect - If set with a valid phandle and GPIO number,
+ pll lock state is tested upon read.
+ - adi,channel-spacing: Channel spacing in Hz (influences MODULUS).
+ - adi,power-up-frequency: If set in Hz the PLL tunes to
+ the desired frequency on probe.
+ - adi,reference-div-factor: If set the driver skips dynamic calculation
+ and uses this default value instead.
+ - adi,reference-doubler-enable: Enables reference doubler.
+ - adi,reference-div2-enable: Enables reference divider.
+ - adi,phase-detector-polarity-positive-enable: Enables positive phase
+ detector polarity. Default = negative.
+ - adi,lock-detect-precision-6ns-enable: Enables 6ns lock detect precision.
+ Default = 10ns.
+ - adi,lock-detect-function-integer-n-enable: Enables lock detect
+ for integer-N mode. Default = factional-N mode.
+ - adi,charge-pump-current: Charge pump current in mA.
+ Default = 2500mA.
+ - adi,muxout-select: On chip multiplexer output selection.
+ Valid values for the multiplexer output are:
+ 0: Three-State Output (default)
+ 1: DVDD
+ 2: DGND
+ 3: R-Counter output
+ 4: N-Divider output
+ 5: Analog lock detect
+ 6: Digital lock detect
+ - adi,low-spur-mode-enable: Enables low spur mode.
+ Default = Low noise mode.
+ - adi,cycle-slip-reduction-enable: Enables cycle slip reduction.
+ - adi,charge-cancellation-enable: Enabled charge pump
+ charge cancellation for integer-N modes.
+ - adi,anti-backlash-3ns-enable: Enables 3ns antibacklash pulse width
+ for integer-N modes.
+ - adi,band-select-clock-mode-high-enable: Enables faster band
+ selection logic.
+ - adi,12bit-clk-divider: Clock divider value used when
+ adi,12bit-clkdiv-mode != 0
+ - adi,clk-divider-mode:
+ Valid values for the clkdiv mode are:
+ 0: Clock divider off (default)
+ 1: Fast lock enable
+ 2: Phase resync enable
+ - adi,aux-output-enable: Enables auxiliary RF output.
+ - adi,aux-output-fundamental-enable: Selects fundamental VCO output on
+ the auxiliary RF output. Default = Output of RF dividers.
+ - adi,mute-till-lock-enable: Enables Mute-Till-Lock-Detect function.
+ - adi,output-power: Output power selection.
+ Valid values for the power mode are:
+ 0: -4dBm (default)
+ 1: -1dBm
+ 2: +2dBm
+ 3: +5dBm
+ - adi,aux-output-power: Auxiliary output power selection.
+ Valid values for the power mode are:
+ 0: -4dBm (default)
+ 1: -1dBm
+ 2: +2dBm
+ 3: +5dBm
+
+
+Example:
+ lo_pll0_rx_adf4351: adf4351-rx-lpc@4 {
+ compatible = "adi,adf4351";
+ reg = <4>;
+ spi-max-frequency = <10000000>;
+ clocks = <&clk0_ad9523 9>;
+ clock-names = "clkin";
+ adi,channel-spacing = <10000>;
+ adi,power-up-frequency = <2400000000>;
+ adi,phase-detector-polarity-positive-enable;
+ adi,charge-pump-current = <2500>;
+ adi,output-power = <3>;
+ adi,mute-till-lock-enable;
+ };
diff --git a/Documentation/devicetree/bindings/iio/humidity/dht11.txt b/Documentation/devicetree/bindings/iio/humidity/dht11.txt
new file mode 100644
index 000000000..ecc24c199
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/humidity/dht11.txt
@@ -0,0 +1,14 @@
+* DHT11 humidity/temperature sensor (and compatibles like DHT22)
+
+Required properties:
+ - compatible: Should be "dht11"
+ - gpios: Should specify the GPIO connected to the sensor's data
+ line, see "gpios property" in
+ Documentation/devicetree/bindings/gpio/gpio.txt.
+
+Example:
+
+humidity_sensor {
+ compatible = "dht11";
+ gpios = <&gpio0 6 0>;
+}
diff --git a/Documentation/devicetree/bindings/iio/iio-bindings.txt b/Documentation/devicetree/bindings/iio/iio-bindings.txt
new file mode 100644
index 000000000..0b447d9ad
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/iio-bindings.txt
@@ -0,0 +1,97 @@
+This binding is derived from clock bindings, and based on suggestions
+from Lars-Peter Clausen [1].
+
+Sources of IIO channels can be represented by any node in the device
+tree. Those nodes are designated as IIO providers. IIO consumer
+nodes use a phandle and IIO specifier pair to connect IIO provider
+outputs to IIO inputs. Similar to the gpio specifiers, an IIO
+specifier is an array of one or more cells identifying the IIO
+output on a device. The length of an IIO specifier is defined by the
+value of a #io-channel-cells property in the IIO provider node.
+
+[1] http://marc.info/?l=linux-iio&m=135902119507483&w=2
+
+==IIO providers==
+
+Required properties:
+#io-channel-cells: Number of cells in an IIO specifier; Typically 0 for nodes
+ with a single IIO output and 1 for nodes with multiple
+ IIO outputs.
+
+Example for a simple configuration with no trigger:
+
+ adc: voltage-sensor@35 {
+ compatible = "maxim,max1139";
+ reg = <0x35>;
+ #io-channel-cells = <1>;
+ };
+
+Example for a configuration with trigger:
+
+ adc@35 {
+ compatible = "some-vendor,some-adc";
+ reg = <0x35>;
+
+ adc1: iio-device@0 {
+ #io-channel-cells = <1>;
+ /* other properties */
+ };
+ adc2: iio-device@1 {
+ #io-channel-cells = <1>;
+ /* other properties */
+ };
+ };
+
+==IIO consumers==
+
+Required properties:
+io-channels: List of phandle and IIO specifier pairs, one pair
+ for each IIO input to the device. Note: if the
+ IIO provider specifies '0' for #io-channel-cells,
+ then only the phandle portion of the pair will appear.
+
+Optional properties:
+io-channel-names:
+ List of IIO input name strings sorted in the same
+ order as the io-channels property. Consumers drivers
+ will use io-channel-names to match IIO input names
+ with IIO specifiers.
+io-channel-ranges:
+ Empty property indicating that child nodes can inherit named
+ IIO channels from this node. Useful for bus nodes to provide
+ and IIO channel to their children.
+
+For example:
+
+ device {
+ io-channels = <&adc 1>, <&ref 0>;
+ io-channel-names = "vcc", "vdd";
+ };
+
+This represents a device with two IIO inputs, named "vcc" and "vdd".
+The vcc channel is connected to output 1 of the &adc device, and the
+vdd channel is connected to output 0 of the &ref device.
+
+==Example==
+
+ adc: max1139@35 {
+ compatible = "maxim,max1139";
+ reg = <0x35>;
+ #io-channel-cells = <1>;
+ };
+
+ ...
+
+ iio_hwmon {
+ compatible = "iio-hwmon";
+ io-channels = <&adc 0>, <&adc 1>, <&adc 2>,
+ <&adc 3>, <&adc 4>, <&adc 5>,
+ <&adc 6>, <&adc 7>, <&adc 8>,
+ <&adc 9>;
+ };
+
+ some_consumer {
+ compatible = "some-consumer";
+ io-channels = <&adc 10>, <&adc 11>;
+ io-channel-names = "adc1", "adc2";
+ };
diff --git a/Documentation/devicetree/bindings/iio/light/apds9300.txt b/Documentation/devicetree/bindings/iio/light/apds9300.txt
new file mode 100644
index 000000000..d6f66c73d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/apds9300.txt
@@ -0,0 +1,22 @@
+* Avago APDS9300 ambient light sensor
+
+http://www.avagotech.com/docs/AV02-1077EN
+
+Required properties:
+
+ - compatible : should be "avago,apds9300"
+ - reg : the I2C address of the sensor
+
+Optional properties:
+
+ - interrupt-parent : should be the phandle for the interrupt controller
+ - interrupts : interrupt mapping for GPIO IRQ
+
+Example:
+
+apds9300@39 {
+ compatible = "avago,apds9300";
+ reg = <0x39>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <29 8>;
+};
diff --git a/Documentation/devicetree/bindings/iio/light/cm36651.txt b/Documentation/devicetree/bindings/iio/light/cm36651.txt
new file mode 100644
index 000000000..c03e19db4
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/cm36651.txt
@@ -0,0 +1,26 @@
+* Capella CM36651 I2C Proximity and Color Light sensor
+
+Required properties:
+- compatible: must be "capella,cm36651"
+- reg: the I2C address of the device
+- interrupts: interrupt-specifier for the sole interrupt
+ generated by the device
+- vled-supply: regulator for the IR LED. IR_LED is a part
+ of the cm36651 for proximity detection.
+ As covered in ../../regulator/regulator.txt
+
+Example:
+
+ i2c_cm36651: i2c-gpio {
+ /* ... */
+
+ cm36651@18 {
+ compatible = "capella,cm36651";
+ reg = <0x18>;
+ interrupt-parent = <&gpx0>;
+ interrupts = <2 0>;
+ vled-supply = <&ps_als_reg>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/iio/light/gp2ap020a00f.txt b/Documentation/devicetree/bindings/iio/light/gp2ap020a00f.txt
new file mode 100644
index 000000000..9231c8231
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/gp2ap020a00f.txt
@@ -0,0 +1,21 @@
+* Sharp GP2AP020A00F I2C Proximity/ALS sensor
+
+The proximity detector sensor requires power supply
+for its built-in led. It is also defined by this binding.
+
+Required properties:
+
+ - compatible : should be "sharp,gp2ap020a00f"
+ - reg : the I2C slave address of the light sensor
+ - interrupts : interrupt specifier for the sole interrupt generated
+ by the device
+ - vled-supply : VLED power supply, as covered in ../regulator/regulator.txt
+
+Example:
+
+gp2ap020a00f@39 {
+ compatible = "sharp,gp2ap020a00f";
+ reg = <0x39>;
+ interrupts = <2 0>;
+ vled-supply = <...>;
+};
diff --git a/Documentation/devicetree/bindings/iio/light/tsl2563.txt b/Documentation/devicetree/bindings/iio/light/tsl2563.txt
new file mode 100644
index 000000000..f91e809e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/light/tsl2563.txt
@@ -0,0 +1,19 @@
+* AMS TAOS TSL2563 ambient light sensor
+
+Required properties:
+
+ - compatible : should be "amstaos,tsl2563"
+ - reg : the I2C address of the sensor
+
+Optional properties:
+
+ - amstaos,cover-comp-gain : integer used as multiplier for gain
+ compensation (default = 1)
+
+Example:
+
+tsl2563@29 {
+ compatible = "amstaos,tsl2563";
+ reg = <0x29>;
+ amstaos,cover-comp-gain = <16>;
+};
diff --git a/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt b/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt
new file mode 100644
index 000000000..011679f1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/magnetometer/ak8975.txt
@@ -0,0 +1,18 @@
+* AsahiKASEI AK8975 magnetometer sensor
+
+Required properties:
+
+ - compatible : should be "asahi-kasei,ak8975"
+ - reg : the I2C address of the magnetometer
+
+Optional properties:
+
+ - gpios : should be device tree identifier of the magnetometer DRDY pin
+
+Example:
+
+ak8975@0c {
+ compatible = "asahi-kasei,ak8975";
+ reg = <0x0c>;
+ gpios = <&gpj0 7 0>;
+};
diff --git a/Documentation/devicetree/bindings/iio/magnetometer/hmc5843.txt b/Documentation/devicetree/bindings/iio/magnetometer/hmc5843.txt
new file mode 100644
index 000000000..8e191eef0
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/magnetometer/hmc5843.txt
@@ -0,0 +1,21 @@
+* Honeywell HMC5843 magnetometer sensor
+
+Required properties:
+
+ - compatible : should be "honeywell,hmc5843"
+ Other models which are supported with driver are:
+ "honeywell,hmc5883"
+ "honeywell,hmc5883l"
+ "honeywell,hmc5983"
+ - reg : the I2C address of the magnetometer - typically 0x1e
+
+Optional properties:
+
+ - gpios : should be device tree identifier of the magnetometer DRDY pin
+
+Example:
+
+hmc5843@1e {
+ compatible = "honeywell,hmc5843"
+ reg = <0x1e>;
+};
diff --git a/Documentation/devicetree/bindings/iio/proximity/as3935.txt b/Documentation/devicetree/bindings/iio/proximity/as3935.txt
new file mode 100644
index 000000000..ae23dd8da
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/proximity/as3935.txt
@@ -0,0 +1,28 @@
+Austrian Microsystems AS3935 Franklin lightning sensor device driver
+
+Required properties:
+ - compatible: must be "ams,as3935"
+ - reg: SPI chip select number for the device
+ - spi-cpha: SPI Mode 1. Refer to spi/spi-bus.txt for generic SPI
+ slave node bindings.
+ - interrupt-parent : should be the phandle for the interrupt controller
+ - interrupts : the sole interrupt generated by the device
+
+ Refer to interrupt-controller/interrupts.txt for generic
+ interrupt client node bindings.
+
+Optional properties:
+ - ams,tuning-capacitor-pf: Calibration tuning capacitor stepping
+ value 0 - 120pF. This will require using the calibration data from
+ the manufacturer.
+
+Example:
+
+as3935@0 {
+ compatible = "ams,as3935";
+ reg = <0>;
+ spi-cpha;
+ interrupt-parent = <&gpio1>;
+ interrupts = <16 1>;
+ ams,tuning-capacitor-pf = <80>;
+};
diff --git a/Documentation/devicetree/bindings/iio/sensorhub.txt b/Documentation/devicetree/bindings/iio/sensorhub.txt
new file mode 100644
index 000000000..8d57571d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/sensorhub.txt
@@ -0,0 +1,25 @@
+Samsung Sensorhub driver
+
+Sensorhub is a MCU which manages several sensors and also plays the role
+of a virtual sensor device.
+
+Required properties:
+- compatible: "samsung,sensorhub-rinato" or "samsung,sensorhub-thermostat"
+- spi-max-frequency: max SPI clock frequency
+- interrupt-parent: interrupt parent
+- interrupts: communication interrupt
+- ap-mcu-gpios: [out] ap to sensorhub line - used during communication
+- mcu-ap-gpios: [in] sensorhub to ap - used during communication
+- mcu-reset-gpios: [out] sensorhub reset
+
+Example:
+
+ shub_spi: shub {
+ compatible = "samsung,sensorhub-rinato";
+ spi-max-frequency = <5000000>;
+ interrupt-parent = <&gpx0>;
+ interrupts = <2 0>;
+ ap-mcu-gpios = <&gpx0 0 0>;
+ mcu-ap-gpios = <&gpx0 4 0>;
+ mcu-reset-gpios = <&gpx0 5 0>;
+ };
diff --git a/Documentation/devicetree/bindings/iio/st-sensors.txt b/Documentation/devicetree/bindings/iio/st-sensors.txt
new file mode 100644
index 000000000..d2aaca974
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/st-sensors.txt
@@ -0,0 +1,55 @@
+STMicroelectronics MEMS sensors
+
+The STMicroelectronics sensor devices are pretty straight-forward I2C or
+SPI devices, all sharing the same device tree descriptions no matter what
+type of sensor it is.
+
+Required properties:
+- compatible: see the list of valid compatible strings below
+- reg: the I2C or SPI address the device will respond to
+
+Optional properties:
+- vdd-supply: an optional regulator that needs to be on to provide VDD
+ power to the sensor.
+- vddio-supply: an optional regulator that needs to be on to provide the
+ VDD IO power to the sensor.
+- st,drdy-int-pin: the pin on the package that will be used to signal
+ "data ready" (valid values: 1 or 2). This property is not configurable
+ on all sensors.
+
+Sensors may also have applicable pin control settings, those use the
+standard bindings from pinctrl/pinctrl-bindings.txt.
+
+Valid compatible strings:
+
+Accelerometers:
+- st,lis3lv02dl-accel
+- st,lsm303dlh-accel
+- st,lsm303dlhc-accel
+- st,lis3dh-accel
+- st,lsm330d-accel
+- st,lsm330dl-accel
+- st,lsm330dlc-accel
+- st,lis331dlh-accel
+- st,lsm303dl-accel
+- st,lsm303dlm-accel
+- st,lsm330-accel
+
+Gyroscopes:
+- st,l3g4200d-gyro
+- st,lsm330d-gyro
+- st,lsm330dl-gyro
+- st,lsm330dlc-gyro
+- st,l3gd20-gyro
+- st,l3g4is-gyro
+- st,lsm330-gyro
+
+Magnetometers:
+- st,lsm303dlhc-magn
+- st,lsm303dlm-magn
+- st,lis3mdl-magn
+
+Pressure sensors:
+- st,lps001wp-press
+- st,lps25h-press
+- st,lps331ap-press
diff --git a/Documentation/devicetree/bindings/input/ads7846.txt b/Documentation/devicetree/bindings/input/ads7846.txt
new file mode 100644
index 000000000..5f7619c22
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ads7846.txt
@@ -0,0 +1,91 @@
+Device tree bindings for TI's ADS7843, ADS7845, ADS7846, ADS7873, TSC2046
+SPI driven touch screen controllers.
+
+The node for this driver must be a child node of a SPI controller, hence
+all mandatory properties described in
+
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+must be specified.
+
+Additional required properties:
+
+ compatible Must be one of the following, depending on the
+ model:
+ "ti,tsc2046"
+ "ti,ads7843"
+ "ti,ads7845"
+ "ti,ads7846"
+ "ti,ads7873"
+
+ interrupt-parent
+ interrupts An interrupt node describing the IRQ line the chip's
+ !PENIRQ pin is connected to.
+ vcc-supply A regulator node for the supply voltage.
+
+
+Optional properties:
+
+ ti,vref-delay-usecs vref supply delay in usecs, 0 for
+ external vref (u16).
+ ti,vref-mv The VREF voltage, in millivolts (u16).
+ ti,keep-vref-on set to keep vref on for differential
+ measurements as well
+ ti,swap-xy swap x and y axis
+ ti,settle-delay-usec Settling time of the analog signals;
+ a function of Vcc and the capacitance
+ on the X/Y drivers. If set to non-zero,
+ two samples are taken with settle_delay
+ us apart, and the second one is used.
+ ~150 uSec with 0.01uF caps (u16).
+ ti,penirq-recheck-delay-usecs If set to non-zero, after samples are
+ taken this delay is applied and penirq
+ is rechecked, to help avoid false
+ events. This value is affected by the
+ material used to build the touch layer
+ (u16).
+ ti,x-plate-ohms Resistance of the X-plate,
+ in Ohms (u16).
+ ti,y-plate-ohms Resistance of the Y-plate,
+ in Ohms (u16).
+ ti,x-min Minimum value on the X axis (u16).
+ ti,y-min Minimum value on the Y axis (u16).
+ ti,x-max Maximum value on the X axis (u16).
+ ti,y-max Minimum value on the Y axis (u16).
+ ti,pressure-min Minimum reported pressure value
+ (threshold) - u16.
+ ti,pressure-max Maximum reported pressure value (u16).
+ ti,debounce-max Max number of additional readings per
+ sample (u16).
+ ti,debounce-tol Tolerance used for filtering (u16).
+ ti,debounce-rep Additional consecutive good readings
+ required after the first two (u16).
+ ti,pendown-gpio-debounce Platform specific debounce time for the
+ pendown-gpio (u32).
+ pendown-gpio GPIO handle describing the pin the !PENIRQ
+ line is connected to.
+ linux,wakeup use any event on touchscreen as wakeup event.
+
+
+Example for a TSC2046 chip connected to an McSPI controller of an OMAP SoC::
+
+ spi_controller {
+ tsc2046@0 {
+ reg = <0>; /* CS0 */
+ compatible = "ti,tsc2046";
+ interrupt-parent = <&gpio1>;
+ interrupts = <8 0>; /* BOOT6 / GPIO 8 */
+ spi-max-frequency = <1000000>;
+ pendown-gpio = <&gpio1 8 0>;
+ vcc-supply = <&reg_vcc3>;
+
+ ti,x-min = /bits/ 16 <0>;
+ ti,x-max = /bits/ 16 <8000>;
+ ti,y-min = /bits/ 16 <0>;
+ ti,y-max = /bits/ 16 <4800>;
+ ti,x-plate-ohms = /bits/ 16 <40>;
+ ti,pressure-max = /bits/ 16 <255>;
+
+ linux,wakeup;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/input/atmel,maxtouch.txt b/Documentation/devicetree/bindings/input/atmel,maxtouch.txt
new file mode 100644
index 000000000..185290651
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/atmel,maxtouch.txt
@@ -0,0 +1,32 @@
+Atmel maXTouch touchscreen/touchpad
+
+Required properties:
+- compatible:
+ atmel,maxtouch
+
+- reg: The I2C address of the device
+
+- interrupts: The sink for the touchpad's IRQ output
+ See ../interrupt-controller/interrupts.txt
+
+Optional properties for main touchpad device:
+
+- linux,gpio-keymap: When enabled, the SPT_GPIOPWN_T19 object sends messages
+ on GPIO bit changes. An array of up to 8 entries can be provided
+ indicating the Linux keycode mapped to each bit of the status byte,
+ starting at the LSB. Linux keycodes are defined in
+ <dt-bindings/input/input.h>.
+
+ Note: the numbering of the GPIOs and the bit they start at varies between
+ maXTouch devices. You must either refer to the documentation, or
+ experiment to determine which bit corresponds to which input. Use
+ KEY_RESERVED for unused padding values.
+
+Example:
+
+ touch@4b {
+ compatible = "atmel,maxtouch";
+ reg = <0x4b>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(W, 3) IRQ_TYPE_LEVEL_LOW>;
+ };
diff --git a/Documentation/devicetree/bindings/input/brcm,bcm-keypad.txt b/Documentation/devicetree/bindings/input/brcm,bcm-keypad.txt
new file mode 100644
index 000000000..b77f50bd6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/brcm,bcm-keypad.txt
@@ -0,0 +1,108 @@
+* Broadcom Keypad Controller device tree bindings
+
+Broadcom Keypad controller is used to interface a SoC with a matrix-type
+keypad device. The keypad controller supports multiple row and column lines.
+A key can be placed at each intersection of a unique row and a unique column.
+The keypad controller can sense a key-press and key-release and report the
+event using a interrupt to the cpu.
+
+This binding is based on the matrix-keymap binding with the following
+changes:
+
+keypad,num-rows and keypad,num-columns are required.
+
+Required SoC Specific Properties:
+- compatible: should be "brcm,bcm-keypad"
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- interrupts: The interrupt number to the cpu.
+
+Board Specific Properties:
+- keypad,num-rows: Number of row lines connected to the keypad
+ controller.
+
+- keypad,num-columns: Number of column lines connected to the
+ keypad controller.
+
+- col-debounce-filter-period: The debounce period for the Column filter.
+
+ KEYPAD_DEBOUNCE_1_ms = 0
+ KEYPAD_DEBOUNCE_2_ms = 1
+ KEYPAD_DEBOUNCE_4_ms = 2
+ KEYPAD_DEBOUNCE_8_ms = 3
+ KEYPAD_DEBOUNCE_16_ms = 4
+ KEYPAD_DEBOUNCE_32_ms = 5
+ KEYPAD_DEBOUNCE_64_ms = 6
+ KEYPAD_DEBOUNCE_128_ms = 7
+
+- status-debounce-filter-period: The debounce period for the Status filter.
+
+ KEYPAD_DEBOUNCE_1_ms = 0
+ KEYPAD_DEBOUNCE_2_ms = 1
+ KEYPAD_DEBOUNCE_4_ms = 2
+ KEYPAD_DEBOUNCE_8_ms = 3
+ KEYPAD_DEBOUNCE_16_ms = 4
+ KEYPAD_DEBOUNCE_32_ms = 5
+ KEYPAD_DEBOUNCE_64_ms = 6
+ KEYPAD_DEBOUNCE_128_ms = 7
+
+- row-output-enabled: An optional property indicating whether the row or
+ column is being used as output. If specified the row is being used
+ as the output. Else defaults to column.
+
+- pull-up-enabled: An optional property indicating the Keypad scan mode.
+ If specified implies the keypad scan pull-up has been enabled.
+
+- autorepeat: Boolean, Enable auto repeat feature of Linux input
+ subsystem (optional).
+
+- linux,keymap: The keymap for keys as described in the binding document
+ devicetree/bindings/input/matrix-keymap.txt.
+
+Example:
+#include "dt-bindings/input/input.h"
+
+/ {
+ keypad: keypad@180ac000 {
+ /* Required SoC specific properties */
+ compatible = "brcm,bcm-keypad";
+
+ /* Required Board specific properties */
+ keypad,num-rows = <5>;
+ keypad,num-columns = <5>;
+ status = "okay";
+
+ linux,keymap = <MATRIX_KEY(0x00, 0x02, KEY_F) /* key_forward */
+ MATRIX_KEY(0x00, 0x03, KEY_HOME) /* key_home */
+ MATRIX_KEY(0x00, 0x04, KEY_M) /* key_message */
+ MATRIX_KEY(0x01, 0x00, KEY_A) /* key_contacts */
+ MATRIX_KEY(0x01, 0x01, KEY_1) /* key_1 */
+ MATRIX_KEY(0x01, 0x02, KEY_2) /* key_2 */
+ MATRIX_KEY(0x01, 0x03, KEY_3) /* key_3 */
+ MATRIX_KEY(0x01, 0x04, KEY_S) /* key_speaker */
+ MATRIX_KEY(0x02, 0x00, KEY_P) /* key_phone */
+ MATRIX_KEY(0x02, 0x01, KEY_4) /* key_4 */
+ MATRIX_KEY(0x02, 0x02, KEY_5) /* key_5 */
+ MATRIX_KEY(0x02, 0x03, KEY_6) /* key_6 */
+ MATRIX_KEY(0x02, 0x04, KEY_VOLUMEUP) /* key_vol_up */
+ MATRIX_KEY(0x03, 0x00, KEY_C) /* key_call_log */
+ MATRIX_KEY(0x03, 0x01, KEY_7) /* key_7 */
+ MATRIX_KEY(0x03, 0x02, KEY_8) /* key_8 */
+ MATRIX_KEY(0x03, 0x03, KEY_9) /* key_9 */
+ MATRIX_KEY(0x03, 0x04, KEY_VOLUMEDOWN) /* key_vol_down */
+ MATRIX_KEY(0x04, 0x00, KEY_H) /* key_headset */
+ MATRIX_KEY(0x04, 0x01, KEY_KPASTERISK) /* key_* */
+ MATRIX_KEY(0x04, 0x02, KEY_0) /* key_0 */
+ MATRIX_KEY(0x04, 0x03, KEY_GRAVE) /* key_# */
+ MATRIX_KEY(0x04, 0x04, KEY_MUTE) /* key_mute */
+ >;
+
+ /* Optional board specific properties */
+ col-debounce-filter-period = <5>;
+ row-output-enabled;
+ pull-up-enabled;
+
+ };
+};
diff --git a/Documentation/devicetree/bindings/input/cap11xx.txt b/Documentation/devicetree/bindings/input/cap11xx.txt
new file mode 100644
index 000000000..7d0a30097
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/cap11xx.txt
@@ -0,0 +1,59 @@
+Device tree bindings for Microchip CAP11xx based capacitive touch sensors
+
+The node for this device must be a child of a I2C controller node, as the
+device communication via I2C only.
+
+Required properties:
+
+ compatible: Must contain one of:
+ "microchip,cap1106"
+ "microchip,cap1126"
+ "microchip,cap1188"
+
+ reg: The I2C slave address of the device.
+
+ interrupts: Property describing the interrupt line the
+ device's ALERT#/CM_IRQ# pin is connected to.
+ The device only has one interrupt source.
+
+Optional properties:
+
+ autorepeat: Enables the Linux input system's autorepeat
+ feature on the input device.
+
+ microchip,sensor-gain: Defines the gain of the sensor circuitry. This
+ effectively controls the sensitivity, as a
+ smaller delta capacitance is required to
+ generate the same delta count values.
+ Valid values are 1, 2, 4, and 8.
+ By default, a gain of 1 is set.
+
+ microchip,irq-active-high: By default the interrupt pin is active low
+ open drain. This property allows using the active
+ high push-pull output.
+
+ linux,keycodes: Specifies an array of numeric keycode values to
+ be used for the channels. If this property is
+ omitted, KEY_A, KEY_B, etc are used as
+ defaults. The array must have exactly six
+ entries.
+
+Example:
+
+i2c_controller {
+ cap1106@28 {
+ compatible = "microchip,cap1106";
+ interrupt-parent = <&gpio1>;
+ interrupts = <0 0>;
+ reg = <0x28>;
+ autorepeat;
+ microchip,sensor-gain = <2>;
+
+ linux,keycodes = <103>, /* KEY_UP */
+ <106>, /* KEY_RIGHT */
+ <108>, /* KEY_DOWN */
+ <105>, /* KEY_LEFT */
+ <109>, /* KEY_PAGEDOWN */
+ <104>; /* KEY_PAGEUP */
+ };
+}
diff --git a/Documentation/devicetree/bindings/input/clps711x-keypad.txt b/Documentation/devicetree/bindings/input/clps711x-keypad.txt
new file mode 100644
index 000000000..e68d2bbc6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/clps711x-keypad.txt
@@ -0,0 +1,27 @@
+* Cirrus Logic CLPS711X matrix keypad device tree bindings
+
+Required Properties:
+- compatible: Shall contain "cirrus,clps711x-keypad".
+- row-gpios: List of GPIOs used as row lines.
+- poll-interval: Poll interval time in milliseconds.
+- linux,keymap: The definition can be found at
+ bindings/input/matrix-keymap.txt.
+
+Optional Properties:
+- autorepeat: Enable autorepeat feature.
+
+Example:
+ keypad {
+ compatible = "cirrus,ep7312-keypad", "cirrus,clps711x-keypad";
+ autorepeat;
+ poll-interval = <120>;
+ row-gpios = <&porta 0 0>,
+ <&porta 1 0>;
+
+ linux,keymap = <
+ MATRIX_KEY(0, 0, KEY_UP)
+ MATRIX_KEY(0, 1, KEY_DOWN)
+ MATRIX_KEY(1, 0, KEY_LEFT)
+ MATRIX_KEY(1, 1, KEY_RIGHT)
+ >;
+ };
diff --git a/Documentation/devicetree/bindings/input/cros-ec-keyb.txt b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt
new file mode 100644
index 000000000..0f6355ce3
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/cros-ec-keyb.txt
@@ -0,0 +1,72 @@
+ChromeOS EC Keyboard
+
+Google's ChromeOS EC Keyboard is a simple matrix keyboard implemented on
+a separate EC (Embedded Controller) device. It provides a message for reading
+key scans from the EC. These are then converted into keycodes for processing
+by the kernel.
+
+This binding is based on matrix-keymap.txt and extends/modifies it as follows:
+
+Required properties:
+- compatible: "google,cros-ec-keyb"
+
+Optional properties:
+- google,needs-ghost-filter: True to enable a ghost filter for the matrix
+keyboard. This is recommended if the EC does not have its own logic or
+hardware for this.
+
+
+Example:
+
+cros-ec-keyb {
+ compatible = "google,cros-ec-keyb";
+ keypad,num-rows = <8>;
+ keypad,num-columns = <13>;
+ google,needs-ghost-filter;
+ /*
+ * Keymap entries take the form of 0xRRCCKKKK where
+ * RR=Row CC=Column KKKK=Key Code
+ * The values below are for a US keyboard layout and
+ * are taken from the Linux driver. Note that the
+ * 102ND key is not used for US keyboards.
+ */
+ linux,keymap = <
+ /* CAPSLCK F1 B F10 */
+ 0x0001003a 0x0002003b 0x00030030 0x00040044
+ /* N = R_ALT ESC */
+ 0x00060031 0x0008000d 0x000a0064 0x01010001
+ /* F4 G F7 H */
+ 0x0102003e 0x01030022 0x01040041 0x01060023
+ /* ' F9 BKSPACE L_CTRL */
+ 0x01080028 0x01090043 0x010b000e 0x0200001d
+ /* TAB F3 T F6 */
+ 0x0201000f 0x0202003d 0x02030014 0x02040040
+ /* ] Y 102ND [ */
+ 0x0205001b 0x02060015 0x02070056 0x0208001a
+ /* F8 GRAVE F2 5 */
+ 0x02090042 0x03010029 0x0302003c 0x03030006
+ /* F5 6 - \ */
+ 0x0304003f 0x03060007 0x0308000c 0x030b002b
+ /* R_CTRL A D F */
+ 0x04000061 0x0401001e 0x04020020 0x04030021
+ /* S K J ; */
+ 0x0404001f 0x04050025 0x04060024 0x04080027
+ /* L ENTER Z C */
+ 0x04090026 0x040b001c 0x0501002c 0x0502002e
+ /* V X , M */
+ 0x0503002f 0x0504002d 0x05050033 0x05060032
+ /* L_SHIFT / . SPACE */
+ 0x0507002a 0x05080035 0x05090034 0x050B0039
+ /* 1 3 4 2 */
+ 0x06010002 0x06020004 0x06030005 0x06040003
+ /* 8 7 0 9 */
+ 0x06050009 0x06060008 0x0608000b 0x0609000a
+ /* L_ALT DOWN RIGHT Q */
+ 0x060a0038 0x060b006c 0x060c006a 0x07010010
+ /* E R W I */
+ 0x07020012 0x07030013 0x07040011 0x07050017
+ /* U R_SHIFT P O */
+ 0x07060016 0x07070036 0x07080019 0x07090018
+ /* UP LEFT */
+ 0x070b0067 0x070c0069>;
+};
diff --git a/Documentation/devicetree/bindings/input/e3x0-button.txt b/Documentation/devicetree/bindings/input/e3x0-button.txt
new file mode 100644
index 000000000..751665e8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/e3x0-button.txt
@@ -0,0 +1,25 @@
+National Instruments Ettus Research USRP E3x0 button driver
+
+This module is part of the NI Ettus Research USRP E3x0 SDR.
+
+This module provides a simple power button event via two interrupts.
+
+Required properties:
+- compatible: should be one of the following
+ - "ettus,e3x0-button": For devices such as the NI Ettus Research USRP E3x0
+- interrupt-parent:
+ - a phandle to the interrupt controller that it is attached to.
+- interrupts: should be one of the following
+ - <0 30 1>, <0 31 1>: For devices such as the NI Ettus Research USRP E3x0
+- interrupt-names: should be one of the following
+ - "press", "release": For devices such as the NI Ettus Research USRP E3x0
+
+Note: Interrupt numbers might vary depending on the FPGA configuration.
+
+Example:
+ button {
+ compatible = "ettus,e3x0-button";
+ interrupt-parent = <&intc>;
+ interrupts = <0 30 1>, <0 31 1>;
+ interrupt-names = "press", "release";
+ }
diff --git a/Documentation/devicetree/bindings/input/elan_i2c.txt b/Documentation/devicetree/bindings/input/elan_i2c.txt
new file mode 100644
index 000000000..ee3242c4b
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/elan_i2c.txt
@@ -0,0 +1,34 @@
+Elantech I2C Touchpad
+
+Required properties:
+- compatible: must be "elan,ekth3000".
+- reg: I2C address of the chip.
+- interrupt-parent: a phandle for the interrupt controller (see interrupt
+ binding[0]).
+- interrupts: interrupt to which the chip is connected (see interrupt
+ binding[0]).
+
+Optional properties:
+- wakeup-source: touchpad can be used as a wakeup source.
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the device (see
+ pinctrl binding [1]).
+- vcc-supply: a phandle for the regulator supplying 3.3V power.
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+ &i2c1 {
+ /* ... */
+
+ touchpad@15 {
+ compatible = "elan,ekth3000";
+ reg = <0x15>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>;
+ wakeup-source;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/elants_i2c.txt b/Documentation/devicetree/bindings/input/elants_i2c.txt
new file mode 100644
index 000000000..a765232e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/elants_i2c.txt
@@ -0,0 +1,33 @@
+Elantech I2C Touchscreen
+
+Required properties:
+- compatible: must be "elan,ekth3500".
+- reg: I2C address of the chip.
+- interrupt-parent: a phandle for the interrupt controller (see interrupt
+ binding[0]).
+- interrupts: interrupt to which the chip is connected (see interrupt
+ binding[0]).
+
+Optional properties:
+- wakeup-source: touchscreen can be used as a wakeup source.
+- pinctrl-names: should be "default" (see pinctrl binding [1]).
+- pinctrl-0: a phandle pointing to the pin settings for the device (see
+ pinctrl binding [1]).
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+Example:
+ &i2c1 {
+ /* ... */
+
+ touchscreen@10 {
+ compatible = "elan,ekth3500";
+ reg = <0x10>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <0x0 IRQ_TYPE_EDGE_FALLING>;
+ wakeup-source;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/fsl-mma8450.txt b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
new file mode 100644
index 000000000..0b96e5737
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/fsl-mma8450.txt
@@ -0,0 +1,12 @@
+* Freescale MMA8450 3-Axis Accelerometer
+
+Required properties:
+- compatible : "fsl,mma8450".
+- reg: the I2C address of MMA8450
+
+Example:
+
+accelerometer: mma8450@1c {
+ compatible = "fsl,mma8450";
+ reg = <0x1c>;
+};
diff --git a/Documentation/devicetree/bindings/input/gpio-beeper.txt b/Documentation/devicetree/bindings/input/gpio-beeper.txt
new file mode 100644
index 000000000..a5086e37f
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/gpio-beeper.txt
@@ -0,0 +1,13 @@
+* GPIO beeper device tree bindings
+
+Register a beeper connected to GPIO pin.
+
+Required properties:
+- compatible: Should be "gpio-beeper".
+- gpios: From common gpio binding; gpio connection to beeper enable pin.
+
+Example:
+ beeper: beeper {
+ compatible = "gpio-beeper";
+ gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/input/gpio-keys-polled.txt b/Documentation/devicetree/bindings/input/gpio-keys-polled.txt
new file mode 100644
index 000000000..313abefa3
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/gpio-keys-polled.txt
@@ -0,0 +1,38 @@
+Device-Tree bindings for input/gpio_keys_polled.c keyboard driver
+
+Required properties:
+ - compatible = "gpio-keys-polled";
+ - poll-interval: Poll interval time in milliseconds
+
+Optional properties:
+ - autorepeat: Boolean, Enable auto repeat feature of Linux input
+ subsystem.
+
+Each button (key) is represented as a sub-node of "gpio-keys-polled":
+Subnode properties:
+
+ - gpios: OF device-tree gpio specification.
+ - label: Descriptive name of the key.
+ - linux,code: Keycode to emit.
+
+Optional subnode-properties:
+ - linux,input-type: Specify event type this button/key generates.
+ If not specified defaults to <1> == EV_KEY.
+ - debounce-interval: Debouncing interval time in milliseconds.
+ If not specified defaults to 5.
+ - gpio-key,wakeup: Boolean, button can wake-up the system.
+
+Example nodes:
+
+ gpio_keys_polled {
+ compatible = "gpio-keys-polled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ poll-interval = <100>;
+ autorepeat;
+ button@21 {
+ label = "GPIO Key UP";
+ linux,code = <103>;
+ gpios = <&gpio1 0 1>;
+ };
+ ...
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.txt b/Documentation/devicetree/bindings/input/gpio-keys.txt
new file mode 100644
index 000000000..44b705767
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/gpio-keys.txt
@@ -0,0 +1,48 @@
+Device-Tree bindings for input/gpio_keys.c keyboard driver
+
+Required properties:
+ - compatible = "gpio-keys";
+
+Optional properties:
+ - autorepeat: Boolean, Enable auto repeat feature of Linux input
+ subsystem.
+
+Each button (key) is represented as a sub-node of "gpio-keys":
+Subnode properties:
+
+ - gpios: OF device-tree gpio specification.
+ - interrupts: the interrupt line for that input.
+ - label: Descriptive name of the key.
+ - linux,code: Keycode to emit.
+
+Note that either "interrupts" or "gpios" properties can be omitted, but not
+both at the same time. Specifying both properties is allowed.
+
+Optional subnode-properties:
+ - linux,input-type: Specify event type this button/key generates.
+ If not specified defaults to <1> == EV_KEY.
+ - debounce-interval: Debouncing interval time in milliseconds.
+ If not specified defaults to 5.
+ - gpio-key,wakeup: Boolean, button can wake-up the system.
+ - linux,can-disable: Boolean, indicates that button is connected
+ to dedicated (not shared) interrupt which can be disabled to
+ suppress events from the button.
+
+Example nodes:
+
+ gpio_keys {
+ compatible = "gpio-keys";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ autorepeat;
+ button@21 {
+ label = "GPIO Key UP";
+ linux,code = <103>;
+ gpios = <&gpio1 0 1>;
+ };
+ button@22 {
+ label = "GPIO Key DOWN";
+ linux,code = <108>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+ };
+ ...
diff --git a/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt
new file mode 100644
index 000000000..ead641c65
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/gpio-matrix-keypad.txt
@@ -0,0 +1,46 @@
+* GPIO driven matrix keypad device tree bindings
+
+GPIO driven matrix keypad is used to interface a SoC with a matrix keypad.
+The matrix keypad supports multiple row and column lines, a key can be
+placed at each intersection of a unique row and a unique column. The matrix
+keypad can sense a key-press and key-release by means of GPIO lines and
+report the event using GPIO interrupts to the cpu.
+
+Required Properties:
+- compatible: Should be "gpio-matrix-keypad"
+- row-gpios: List of gpios used as row lines. The gpio specifier
+ for this property depends on the gpio controller to
+ which these row lines are connected.
+- col-gpios: List of gpios used as column lines. The gpio specifier
+ for this property depends on the gpio controller to
+ which these column lines are connected.
+- linux,keymap: The definition can be found at
+ bindings/input/matrix-keymap.txt
+
+Optional Properties:
+- linux,no-autorepeat: do no enable autorepeat feature.
+- linux,wakeup: use any event on keypad as wakeup event.
+- debounce-delay-ms: debounce interval in milliseconds
+- col-scan-delay-us: delay, measured in microseconds, that is needed
+ before we can scan keypad after activating column gpio
+
+Example:
+ matrix-keypad {
+ compatible = "gpio-matrix-keypad";
+ debounce-delay-ms = <5>;
+ col-scan-delay-us = <2>;
+
+ row-gpios = <&gpio2 25 0
+ &gpio2 26 0
+ &gpio2 27 0>;
+
+ col-gpios = <&gpio2 21 0
+ &gpio2 22 0>;
+
+ linux,keymap = <0x0000008B
+ 0x0100009E
+ 0x02000069
+ 0x0001006A
+ 0x0101001C
+ 0x0201006C>;
+ };
diff --git a/Documentation/devicetree/bindings/input/imx-keypad.txt b/Documentation/devicetree/bindings/input/imx-keypad.txt
new file mode 100644
index 000000000..2ebaf7d26
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/imx-keypad.txt
@@ -0,0 +1,53 @@
+* Freescale i.MX Keypad Port(KPP) device tree bindings
+
+The KPP is designed to interface with a keypad matrix with 2-point contact
+or 3-point contact keys. The KPP is designed to simplify the software task
+of scanning a keypad matrix. The KPP is capable of detecting, debouncing,
+and decoding one or multiple keys pressed simultaneously on a keypad.
+
+Required SoC Specific Properties:
+- compatible: Should be "fsl,<soc>-kpp".
+
+- reg: Physical base address of the KPP and length of memory mapped
+ region.
+
+- interrupts: The KPP interrupt number to the CPU(s).
+
+- clocks: The clock provided by the SoC to the KPP. Some SoCs use dummy
+clock(The clock for the KPP is provided by the SoCs automatically).
+
+Required Board Specific Properties:
+- pinctrl-names: The definition can be found at
+pinctrl/pinctrl-bindings.txt.
+
+- pinctrl-0: The definition can be found at
+pinctrl/pinctrl-bindings.txt.
+
+- linux,keymap: The definition can be found at
+bindings/input/matrix-keymap.txt.
+
+Example:
+kpp: kpp@73f94000 {
+ compatible = "fsl,imx51-kpp", "fsl,imx21-kpp";
+ reg = <0x73f94000 0x4000>;
+ interrupts = <60>;
+ clocks = <&clks 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_kpp_1>;
+ linux,keymap = <0x00000067 /* KEY_UP */
+ 0x0001006c /* KEY_DOWN */
+ 0x00020072 /* KEY_VOLUMEDOWN */
+ 0x00030066 /* KEY_HOME */
+ 0x0100006a /* KEY_RIGHT */
+ 0x01010069 /* KEY_LEFT */
+ 0x0102001c /* KEY_ENTER */
+ 0x01030073 /* KEY_VOLUMEUP */
+ 0x02000040 /* KEY_F6 */
+ 0x02010042 /* KEY_F8 */
+ 0x02020043 /* KEY_F9 */
+ 0x02030044 /* KEY_F10 */
+ 0x0300003b /* KEY_F1 */
+ 0x0301003c /* KEY_F2 */
+ 0x0302003d /* KEY_F3 */
+ 0x03030074>; /* KEY_POWER */
+};
diff --git a/Documentation/devicetree/bindings/input/input-reset.txt b/Documentation/devicetree/bindings/input/input-reset.txt
new file mode 100644
index 000000000..2bb2626fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/input-reset.txt
@@ -0,0 +1,33 @@
+Input: sysrq reset sequence
+
+A simple binding to represent a set of keys as described in
+include/uapi/linux/input.h. This is to communicate a sequence of keys to the
+sysrq driver. Upon holding the keys for a specified amount of time (if
+specified) the system is sync'ed and reset.
+
+Key sequences are global to the system but all the keys in a set must be coming
+from the same input device.
+
+The /chosen node should contain a 'linux,sysrq-reset-seq' child node to define
+a set of keys.
+
+Required property:
+sysrq-reset-seq: array of Linux keycodes, one keycode per cell.
+
+Optional property:
+timeout-ms: duration keys must be pressed together in milliseconds before
+generating a sysrq. If omitted the system is rebooted immediately when a valid
+sequence has been recognized.
+
+Example:
+
+ chosen {
+ linux,sysrq-reset-seq {
+ keyset = <0x03
+ 0x04
+ 0x0a>;
+ timeout-ms = <3000>;
+ };
+ };
+
+Would represent KEY_2, KEY_3 and KEY_9.
diff --git a/Documentation/devicetree/bindings/input/lpc32xx-key.txt b/Documentation/devicetree/bindings/input/lpc32xx-key.txt
new file mode 100644
index 000000000..bcf62f856
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/lpc32xx-key.txt
@@ -0,0 +1,31 @@
+NXP LPC32xx Key Scan Interface
+
+This binding is based on the matrix-keymap binding with the following
+changes:
+
+Required Properties:
+- compatible: Should be "nxp,lpc3220-key"
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The interrupt number to the cpu.
+- nxp,debounce-delay-ms: Debounce delay in ms
+- nxp,scan-delay-ms: Repeated scan period in ms
+- linux,keymap: the key-code to be reported when the key is pressed
+ and released, see also
+ Documentation/devicetree/bindings/input/matrix-keymap.txt
+
+Note: keypad,num-rows and keypad,num-columns are required, and must be equal
+since LPC32xx only supports square matrices
+
+Example:
+
+ key@40050000 {
+ compatible = "nxp,lpc3220-key";
+ reg = <0x40050000 0x1000>;
+ interrupts = <54 0>;
+ keypad,num-rows = <1>;
+ keypad,num-columns = <1>;
+ nxp,debounce-delay-ms = <3>;
+ nxp,scan-delay-ms = <34>;
+ linux,keymap = <0x00000002>;
+ };
diff --git a/Documentation/devicetree/bindings/input/matrix-keymap.txt b/Documentation/devicetree/bindings/input/matrix-keymap.txt
new file mode 100644
index 000000000..c54919fad
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/matrix-keymap.txt
@@ -0,0 +1,27 @@
+A simple common binding for matrix-connected key boards. Currently targeted at
+defining the keys in the scope of linux key codes since that is a stable and
+standardized interface at this time.
+
+Required properties:
+- linux,keymap: an array of packed 1-cell entries containing the equivalent
+ of row, column and linux key-code. The 32-bit big endian cell is packed
+ as:
+ row << 24 | column << 16 | key-code
+
+Optional properties:
+Properties for the number of rows and columns are optional because some
+drivers will use fixed values for these.
+- keypad,num-rows: Number of row lines connected to the keypad controller.
+- keypad,num-columns: Number of column lines connected to the keypad
+ controller.
+
+Some users of this binding might choose to specify secondary keymaps for
+cases where there is a modifier key such as a Fn key. Proposed names
+for said properties are "linux,fn-keymap" or with another descriptive
+word for the modifier other from "Fn".
+
+Example:
+ linux,keymap = < 0x00030012
+ 0x0102003a >;
+ keypad,num-rows = <2>;
+ keypad,num-columns = <8>;
diff --git a/Documentation/devicetree/bindings/input/nvidia,tegra20-kbc.txt b/Documentation/devicetree/bindings/input/nvidia,tegra20-kbc.txt
new file mode 100644
index 000000000..0382b8bd6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/nvidia,tegra20-kbc.txt
@@ -0,0 +1,54 @@
+* Tegra keyboard controller
+The key controller has maximum 24 pins to make matrix keypad. Any pin
+can be configured as row or column. The maximum column pin can be 8
+and maximum row pins can be 16 for Tegra20/Tegra30.
+
+Required properties:
+- compatible: "nvidia,tegra20-kbc"
+- reg: Register base address of KBC.
+- interrupts: Interrupt number for the KBC.
+- nvidia,kbc-row-pins: The KBC pins which are configured as row. This is an
+ array of pin numbers which is used as rows.
+- nvidia,kbc-col-pins: The KBC pins which are configured as column. This is an
+ array of pin numbers which is used as column.
+- linux,keymap: The keymap for keys as described in the binding document
+ devicetree/bindings/input/matrix-keymap.txt.
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - kbc
+
+Optional properties, in addition to those specified by the shared
+matrix-keyboard bindings:
+
+- linux,fn-keymap: a second keymap, same specification as the
+ matrix-keyboard-controller spec but to be used when the KEY_FN modifier
+ key is pressed.
+- nvidia,debounce-delay-ms: delay in milliseconds per row scan for debouncing
+- nvidia,repeat-delay-ms: delay in milliseconds before repeat starts
+- nvidia,ghost-filter: enable ghost filtering for this device
+- nvidia,wakeup-source: configure keyboard as a wakeup source for suspend/resume
+
+Example:
+
+keyboard: keyboard {
+ compatible = "nvidia,tegra20-kbc";
+ reg = <0x7000e200 0x100>;
+ interrupts = <0 85 0x04>;
+ clocks = <&tegra_car 36>;
+ resets = <&tegra_car 36>;
+ reset-names = "kbc";
+ nvidia,ghost-filter;
+ nvidia,debounce-delay-ms = <640>;
+ nvidia,kbc-row-pins = <0 1 2>; /* pin 0, 1, 2 as rows */
+ nvidia,kbc-col-pins = <11 12 13>; /* pin 11, 12, 13 as columns */
+ linux,keymap = <0x00000074
+ 0x00010067
+ 0x00020066
+ 0x01010068
+ 0x02000069
+ 0x02010070
+ 0x02020071>;
+};
diff --git a/Documentation/devicetree/bindings/input/omap-keypad.txt b/Documentation/devicetree/bindings/input/omap-keypad.txt
new file mode 100644
index 000000000..34ed1c60f
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/omap-keypad.txt
@@ -0,0 +1,28 @@
+* TI's Keypad Controller device tree bindings
+
+TI's Keypad controller is used to interface a SoC with a matrix-type
+keypad device. The keypad controller supports multiple row and column lines.
+A key can be placed at each intersection of a unique row and a unique column.
+The keypad controller can sense a key-press and key-release and report the
+event using a interrupt to the cpu.
+
+This binding is based on the matrix-keymap binding with the following
+changes:
+
+keypad,num-rows and keypad,num-columns are required.
+
+Required SoC Specific Properties:
+- compatible: should be one of the following
+ - "ti,omap4-keypad": For controllers compatible with omap4 keypad
+ controller.
+
+Optional Properties specific to linux:
+- linux,keypad-no-autorepeat: do no enable autorepeat feature.
+
+Example:
+ keypad@4ae1c000{
+ compatible = "ti,omap4-keypad";
+ keypad,num-rows = <2>;
+ keypad,num-columns = <8>;
+ linux,keypad-no-autorepeat;
+ };
diff --git a/Documentation/devicetree/bindings/input/ps2keyb-mouse-apbps2.txt b/Documentation/devicetree/bindings/input/ps2keyb-mouse-apbps2.txt
new file mode 100644
index 000000000..3029c5694
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ps2keyb-mouse-apbps2.txt
@@ -0,0 +1,16 @@
+Aeroflex Gaisler APBPS2 PS/2 Core, supporting Keyboard or Mouse.
+
+The APBPS2 PS/2 core is available in the GRLIB VHDL IP core library.
+
+Note: In the ordinary environment for the APBPS2 core, a LEON SPARC system,
+these properties are built from information in the AMBA plug&play and from
+bootloader settings.
+
+Required properties:
+
+- name : Should be "GAISLER_APBPS2" or "01_060"
+- reg : Address and length of the register set for the device
+- interrupts : Interrupt numbers for this device
+
+For further information look in the documentation for the GLIB IP core library:
+http://www.gaisler.com/products/grlib/grip.pdf
diff --git a/Documentation/devicetree/bindings/input/pwm-beeper.txt b/Documentation/devicetree/bindings/input/pwm-beeper.txt
new file mode 100644
index 000000000..be332ae4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/pwm-beeper.txt
@@ -0,0 +1,7 @@
+* PWM beeper device tree bindings
+
+Registers a PWM device as beeper.
+
+Required properties:
+- compatible: should be "pwm-beeper"
+- pwms: phandle to the physical PWM device
diff --git a/Documentation/devicetree/bindings/input/pxa27x-keypad.txt b/Documentation/devicetree/bindings/input/pxa27x-keypad.txt
new file mode 100644
index 000000000..f8674f7e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/pxa27x-keypad.txt
@@ -0,0 +1,60 @@
+* Marvell PXA Keypad controller
+
+Required Properties
+- compatible : should be "marvell,pxa27x-keypad"
+- reg : Address and length of the register set for the device
+- interrupts : The interrupt for the keypad controller
+- marvell,debounce-interval : How long time the key will be
+ recognized when it is pressed. It is a u32 value, and bit[31:16]
+ is debounce interval for direct key and bit[15:0] is debounce
+ interval for matrix key. The value is in binary number of 2ms
+
+Optional Properties For Matrix Keyes
+Please refer to matrix-keymap.txt
+
+Optional Properties for Direct Keyes
+- marvell,direct-key-count : How many direct keyes are used.
+- marvell,direct-key-mask : The mask indicates which keyes
+ are used. If bit[X] of the mask is set, the direct key X
+ is used.
+- marvell,direct-key-low-active : Direct key status register
+ tells the level of pins that connects to the direct keyes.
+ When this property is set, it means that when the pin level
+ is low, the key is pressed(active).
+- marvell,direct-key-map : It is a u16 array. Each item indicates
+ the linux key-code for the direct key.
+
+Optional Properties For Rotary
+- marvell,rotary0 : It is a u32 value. Bit[31:16] is the
+ linux key-code for rotary up. Bit[15:0] is the linux key-code
+ for rotary down. It is for rotary 0.
+- marvell,rotary1 : Same as marvell,rotary0. It is for rotary 1.
+- marvell,rotary-rel-key : When rotary is used for relative axes
+ in the device, the value indicates the key-code for relative
+ axes measurement in the device. It is a u32 value. Bit[31:16]
+ is for rotary 1, and Bit[15:0] is for rotary 0.
+
+Examples:
+ keypad: keypad@d4012000 {
+ keypad,num-rows = <3>;
+ keypad,num-columns = <5>;
+ linux,keymap = <0x0000000e /* KEY_BACKSPACE */
+ 0x0001006b /* KEY_END */
+ 0x00020061 /* KEY_RIGHTCTRL */
+ 0x0003000b /* KEY_0 */
+ 0x00040002 /* KEY_1 */
+ 0x0100008b /* KEY_MENU */
+ 0x01010066 /* KEY_HOME */
+ 0x010200e7 /* KEY_SEND */
+ 0x01030009 /* KEY_8 */
+ 0x0104000a /* KEY_9 */
+ 0x02000160 /* KEY_OK */
+ 0x02010003 /* KEY_2 */
+ 0x02020004 /* KEY_3 */
+ 0x02030005 /* KEY_4 */
+ 0x02040006>; /* KEY_5 */
+ marvell,rotary0 = <0x006c0067>; /* KEY_UP & KEY_DOWN */
+ marvell,direct-key-count = <1>;
+ marvell,direct-key-map = <0x001c>;
+ marvell,debounce-interval = <0x001e001e>;
+ };
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt
new file mode 100644
index 000000000..07bf55f6e
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt
@@ -0,0 +1,43 @@
+Qualcomm PM8941 PMIC Power Key
+
+PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8941-pwrkey"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: base address of registers for block
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: key change interrupt; The format of the specifier is
+ defined by the binding document describing the node's
+ interrupt parent.
+
+- debounce:
+ Usage: optional
+ Value type: <u32>
+ Definition: time in microseconds that key must be pressed or released
+ for state change interrupt to trigger.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <empty>
+ Definition: presence of this property indicates that the KPDPWR_N pin
+ should be configured for pull up.
+
+EXAMPLE
+
+ pwrkey@800 {
+ compatible = "qcom,pm8941-pwrkey";
+ reg = <0x800>;
+ interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
+ debounce = <15625>;
+ bias-pull-up;
+ };
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt b/Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt
new file mode 100644
index 000000000..7d8cb9283
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/qcom,pm8xxx-keypad.txt
@@ -0,0 +1,89 @@
+Qualcomm PM8xxx PMIC Keypad
+
+PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8058-keypad"
+ "qcom,pm8921-keypad"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: address of keypad control register
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the first interrupt specifies the key sense interrupt
+ and the second interrupt specifies the key stuck interrupt.
+ The format of the specifier is defined by the binding
+ document describing the node's interrupt parent.
+
+- linux,keymap:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the linux keymap. More information can be found in
+ input/matrix-keymap.txt.
+
+- linux,keypad-no-autorepeat:
+ Usage: optional
+ Value type: <bool>
+ Definition: don't enable autorepeat feature.
+
+- linux,keypad-wakeup:
+ Usage: optional
+ Value type: <bool>
+ Definition: use any event on keypad as wakeup event.
+
+- keypad,num-rows:
+ Usage: required
+ Value type: <u32>
+ Definition: number of rows in the keymap. More information can be found
+ in input/matrix-keymap.txt.
+
+- keypad,num-columns:
+ Usage: required
+ Value type: <u32>
+ Definition: number of columns in the keymap. More information can be
+ found in input/matrix-keymap.txt.
+
+- debounce:
+ Usage: optional
+ Value type: <u32>
+ Definition: time in microseconds that key must be pressed or release
+ for key sense interrupt to trigger.
+
+- scan-delay:
+ Usage: optional
+ Value type: <u32>
+ Definition: time in microseconds to pause between successive scans
+ of the matrix array.
+
+- row-hold:
+ Usage: optional
+ Value type: <u32>
+ Definition: time in nanoseconds to pause between scans of each row in
+ the matrix array.
+
+EXAMPLE
+
+ keypad@148 {
+ compatible = "qcom,pm8921-keypad";
+ reg = <0x148>;
+ interrupt-parent = <&pmicintc>;
+ interrupts = <74 1>, <75 1>;
+ linux,keymap = <
+ MATRIX_KEY(0, 0, KEY_VOLUMEUP)
+ MATRIX_KEY(0, 1, KEY_VOLUMEDOWN)
+ MATRIX_KEY(0, 2, KEY_CAMERA_FOCUS)
+ MATRIX_KEY(0, 3, KEY_CAMERA)
+ >;
+ keypad,num-rows = <1>;
+ keypad,num-columns = <5>;
+ debounce = <15>;
+ scan-delay = <32>;
+ row-hold = <91500>;
+ };
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8xxx-pwrkey.txt b/Documentation/devicetree/bindings/input/qcom,pm8xxx-pwrkey.txt
new file mode 100644
index 000000000..588536cc9
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/qcom,pm8xxx-pwrkey.txt
@@ -0,0 +1,46 @@
+Qualcomm PM8xxx PMIC Power Key
+
+PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8058-pwrkey"
+ "qcom,pm8921-pwrkey"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: address of power key control register
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the first interrupt specifies the key release interrupt
+ and the second interrupt specifies the key press interrupt.
+ The format of the specifier is defined by the binding
+ document describing the node's interrupt parent.
+
+- debounce:
+ Usage: optional
+ Value type: <u32>
+ Definition: time in microseconds that key must be pressed or release
+ for state change interrupt to trigger.
+
+- pull-up:
+ Usage: optional
+ Value type: <empty>
+ Definition: presence of this property indicates that the KPDPWR_N pin
+ should be configured for pull up.
+
+EXAMPLE
+
+ pwrkey@1c {
+ compatible = "qcom,pm8921-pwrkey";
+ reg = <0x1c>;
+ interrupt-parent = <&pmicintc>;
+ interrupts = <50 1>, <51 1>;
+ debounce = <15625>;
+ pull-up;
+ };
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt b/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt
new file mode 100644
index 000000000..4ed467b1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/qcom,pm8xxx-vib.txt
@@ -0,0 +1,22 @@
+Qualcomm PM8xxx PMIC Vibrator
+
+PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8058-vib"
+ "qcom,pm8921-vib"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: address of vibration control register
+
+EXAMPLE
+
+ vibrator@4a {
+ compatible = "qcom,pm8058-vib";
+ reg = <0x4a>;
+ };
diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.txt b/Documentation/devicetree/bindings/input/regulator-haptic.txt
new file mode 100644
index 000000000..3ed1c7eb2
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/regulator-haptic.txt
@@ -0,0 +1,21 @@
+* Regulator Haptic Device Tree Bindings
+
+Required Properties:
+ - compatible : Should be "regulator-haptic"
+ - haptic-supply : Power supply to the haptic motor.
+ [*] refer Documentation/devicetree/bindings/regulator/regulator.txt
+
+ - max-microvolt : The maximum voltage value supplied to the haptic motor.
+ [The unit of the voltage is a micro]
+
+ - min-microvolt : The minimum voltage value supplied to the haptic motor.
+ [The unit of the voltage is a micro]
+
+Example:
+
+ haptics {
+ compatible = "regulator-haptic";
+ haptic-supply = <&motor_regulator>;
+ max-microvolt = <2700000>;
+ min-microvolt = <1100000>;
+ };
diff --git a/Documentation/devicetree/bindings/input/rotary-encoder.txt b/Documentation/devicetree/bindings/input/rotary-encoder.txt
new file mode 100644
index 000000000..331549593
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/rotary-encoder.txt
@@ -0,0 +1,36 @@
+Rotary encoder DT bindings
+
+Required properties:
+- gpios: a spec for two GPIOs to be used
+
+Optional properties:
+- linux,axis: the input subsystem axis to map to this rotary encoder.
+ Defaults to 0 (ABS_X / REL_X)
+- rotary-encoder,steps: Number of steps in a full turnaround of the
+ encoder. Only relevant for absolute axis. Defaults to 24 which is a
+ typical value for such devices.
+- rotary-encoder,relative-axis: register a relative axis rather than an
+ absolute one. Relative axis will only generate +1/-1 events on the input
+ device, hence no steps need to be passed.
+- rotary-encoder,rollover: Automatic rollove when the rotary value becomes
+ greater than the specified steps or smaller than 0. For absolute axis only.
+- rotary-encoder,half-period: Makes the driver work on half-period mode.
+
+See Documentation/input/rotary-encoder.txt for more information.
+
+Example:
+
+ rotary@0 {
+ compatible = "rotary-encoder";
+ gpios = <&gpio 19 1>, <&gpio 20 0>; /* GPIO19 is inverted */
+ linux,axis = <0>; /* REL_X */
+ rotary-encoder,relative-axis;
+ };
+
+ rotary@1 {
+ compatible = "rotary-encoder";
+ gpios = <&gpio 21 0>, <&gpio 22 0>;
+ linux,axis = <1>; /* ABS_Y */
+ rotary-encoder,steps = <24>;
+ rotary-encoder,rollover;
+ };
diff --git a/Documentation/devicetree/bindings/input/samsung-keypad.txt b/Documentation/devicetree/bindings/input/samsung-keypad.txt
new file mode 100644
index 000000000..942d071ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/samsung-keypad.txt
@@ -0,0 +1,74 @@
+* Samsung's Keypad Controller device tree bindings
+
+Samsung's Keypad controller is used to interface a SoC with a matrix-type
+keypad device. The keypad controller supports multiple row and column lines.
+A key can be placed at each intersection of a unique row and a unique column.
+The keypad controller can sense a key-press and key-release and report the
+event using a interrupt to the cpu.
+
+Required SoC Specific Properties:
+- compatible: should be one of the following
+ - "samsung,s3c6410-keypad": For controllers compatible with s3c6410 keypad
+ controller.
+ - "samsung,s5pv210-keypad": For controllers compatible with s5pv210 keypad
+ controller.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- interrupts: The interrupt number to the cpu.
+
+Required Board Specific Properties:
+- samsung,keypad-num-rows: Number of row lines connected to the keypad
+ controller.
+
+- samsung,keypad-num-columns: Number of column lines connected to the
+ keypad controller.
+
+- Keys represented as child nodes: Each key connected to the keypad
+ controller is represented as a child node to the keypad controller
+ device node and should include the following properties.
+ - keypad,row: the row number to which the key is connected.
+ - keypad,column: the column number to which the key is connected.
+ - linux,code: the key-code to be reported when the key is pressed
+ and released.
+
+- pinctrl-0: Should specify pin control groups used for this controller.
+- pinctrl-names: Should contain only one value - "default".
+
+Optional Properties specific to linux:
+- linux,keypad-no-autorepeat: do no enable autorepeat feature.
+- linux,keypad-wakeup: use any event on keypad as wakeup event.
+
+
+Example:
+ keypad@100A0000 {
+ compatible = "samsung,s5pv210-keypad";
+ reg = <0x100A0000 0x100>;
+ interrupts = <173>;
+ samsung,keypad-num-rows = <2>;
+ samsung,keypad-num-columns = <8>;
+ linux,input-no-autorepeat;
+ linux,input-wakeup;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&keypad_rows &keypad_columns>;
+
+ key_1 {
+ keypad,row = <0>;
+ keypad,column = <3>;
+ linux,code = <2>;
+ };
+
+ key_2 {
+ keypad,row = <0>;
+ keypad,column = <4>;
+ linux,code = <3>;
+ };
+
+ key_3 {
+ keypad,row = <0>;
+ keypad,column = <5>;
+ linux,code = <4>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/input/spear-keyboard.txt b/Documentation/devicetree/bindings/input/spear-keyboard.txt
new file mode 100644
index 000000000..4a846d26d
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/spear-keyboard.txt
@@ -0,0 +1,20 @@
+* SPEAr keyboard controller
+
+Required properties:
+- compatible: "st,spear300-kbd"
+
+Optional properties, in addition to those specified by the shared
+matrix-keyboard bindings:
+- autorepeat: bool: enables key autorepeat
+- st,mode: keyboard mode: 0 - 9x9, 1 - 6x6, 2 - 2x2
+
+Example:
+
+kbd@fc400000 {
+ compatible = "st,spear300-kbd";
+ reg = <0xfc400000 0x100>;
+ linux,keymap = < 0x00030012
+ 0x0102003a >;
+ autorepeat;
+ st,mode = <0>;
+};
diff --git a/Documentation/devicetree/bindings/input/st-keyscan.txt b/Documentation/devicetree/bindings/input/st-keyscan.txt
new file mode 100644
index 000000000..51eb428e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/st-keyscan.txt
@@ -0,0 +1,60 @@
+* ST Keyscan controller Device Tree bindings
+
+The ST keyscan controller Device Tree binding is based on the
+matrix-keymap.
+
+Required properties:
+- compatible: "st,sti-keyscan"
+
+- reg: Register base address and size of st-keyscan controller.
+
+- interrupts: Interrupt number for the st-keyscan controller.
+
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+
+- pinctrl: Should specify pin control groups used for this controller.
+ See ../pinctrl/pinctrl-bindings.txt for details.
+
+- linux,keymap: The keymap for keys as described in the binding document
+ devicetree/bindings/input/matrix-keymap.txt.
+
+- keypad,num-rows: Number of row lines connected to the keypad controller.
+
+- keypad,num-columns: Number of column lines connected to the keypad
+ controller.
+
+Optional property:
+- st,debounce_us: Debouncing interval time in microseconds
+
+Example:
+
+keyscan: keyscan@fe4b0000 {
+ compatible = "st,sti-keyscan";
+ reg = <0xfe4b0000 0x2000>;
+ interrupts = <GIC_SPI 212 IRQ_TYPE_NONE>;
+ clocks = <&CLK_SYSIN>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_keyscan>;
+
+ keypad,num-rows = <4>;
+ keypad,num-columns = <4>;
+ st,debounce_us = <5000>;
+
+ linux,keymap = < MATRIX_KEY(0x00, 0x00, KEY_F13)
+ MATRIX_KEY(0x00, 0x01, KEY_F9)
+ MATRIX_KEY(0x00, 0x02, KEY_F5)
+ MATRIX_KEY(0x00, 0x03, KEY_F1)
+ MATRIX_KEY(0x01, 0x00, KEY_F14)
+ MATRIX_KEY(0x01, 0x01, KEY_F10)
+ MATRIX_KEY(0x01, 0x02, KEY_F6)
+ MATRIX_KEY(0x01, 0x03, KEY_F2)
+ MATRIX_KEY(0x02, 0x00, KEY_F15)
+ MATRIX_KEY(0x02, 0x01, KEY_F11)
+ MATRIX_KEY(0x02, 0x02, KEY_F7)
+ MATRIX_KEY(0x02, 0x03, KEY_F3)
+ MATRIX_KEY(0x03, 0x00, KEY_F16)
+ MATRIX_KEY(0x03, 0x01, KEY_F12)
+ MATRIX_KEY(0x03, 0x02, KEY_F8)
+ MATRIX_KEY(0x03, 0x03, KEY_F4) >;
+ };
diff --git a/Documentation/devicetree/bindings/input/stmpe-keypad.txt b/Documentation/devicetree/bindings/input/stmpe-keypad.txt
new file mode 100644
index 000000000..12bb771d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/stmpe-keypad.txt
@@ -0,0 +1,41 @@
+* STMPE Keypad
+
+Required properties:
+ - compatible : "st,stmpe-keypad"
+ - linux,keymap : See ./matrix-keymap.txt
+
+Optional properties:
+ - debounce-interval : Debouncing interval time in milliseconds
+ - st,scan-count : Scanning cycles elapsed before key data is updated
+ - st,no-autorepeat : If specified device will not autorepeat
+ - keypad,num-rows : See ./matrix-keymap.txt
+ - keypad,num-columns : See ./matrix-keymap.txt
+
+Example:
+
+ stmpe_keypad {
+ compatible = "st,stmpe-keypad";
+
+ debounce-interval = <64>;
+ st,scan-count = <8>;
+ st,no-autorepeat;
+
+ linux,keymap = <0x205006b
+ 0x4010074
+ 0x3050072
+ 0x1030004
+ 0x502006a
+ 0x500000a
+ 0x5008b
+ 0x706001c
+ 0x405000b
+ 0x6070003
+ 0x3040067
+ 0x303006c
+ 0x60400e7
+ 0x602009e
+ 0x4020073
+ 0x5050002
+ 0x4030069
+ 0x3020008>;
+ };
diff --git a/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
new file mode 100644
index 000000000..b9c32f6fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/sun4i-lradc-keys.txt
@@ -0,0 +1,62 @@
+Allwinner sun4i low res adc attached tablet keys
+------------------------------------------------
+
+Required properties:
+ - compatible: "allwinner,sun4i-a10-lradc-keys"
+ - reg: mmio address range of the chip
+ - interrupts: interrupt to which the chip is connected
+ - vref-supply: powersupply for the lradc reference voltage
+
+Each key is represented as a sub-node of "allwinner,sun4i-a10-lradc-keys":
+
+Required subnode-properties:
+ - label: Descriptive name of the key.
+ - linux,code: Keycode to emit.
+ - channel: Channel this key is attached to, mut be 0 or 1.
+ - voltage: Voltage in µV at lradc input when this key is pressed.
+
+Example:
+
+#include <dt-bindings/input/input.h>
+
+ lradc: lradc@01c22800 {
+ compatible = "allwinner,sun4i-a10-lradc-keys";
+ reg = <0x01c22800 0x100>;
+ interrupts = <31>;
+ vref-supply = <&reg_vcc3v0>;
+
+ button@191 {
+ label = "Volume Up";
+ linux,code = <KEY_VOLUMEUP>;
+ channel = <0>;
+ voltage = <191274>;
+ };
+
+ button@392 {
+ label = "Volume Down";
+ linux,code = <KEY_VOLUMEDOWN>;
+ channel = <0>;
+ voltage = <392644>;
+ };
+
+ button@601 {
+ label = "Menu";
+ linux,code = <KEY_MENU>;
+ channel = <0>;
+ voltage = <601151>;
+ };
+
+ button@795 {
+ label = "Enter";
+ linux,code = <KEY_ENTER>;
+ channel = <0>;
+ voltage = <795090>;
+ };
+
+ button@987 {
+ label = "Home";
+ linux,code = <KEY_HOMEPAGE>;
+ channel = <0>;
+ voltage = <987387>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/input/tca8418_keypad.txt b/Documentation/devicetree/bindings/input/tca8418_keypad.txt
new file mode 100644
index 000000000..255185009
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/tca8418_keypad.txt
@@ -0,0 +1,10 @@
+This binding is based on the matrix-keymap binding with the following
+changes:
+
+keypad,num-rows and keypad,num-columns are required.
+
+Required properties:
+- compatible: "ti,tca8418"
+- reg: the I2C address
+- interrupts: IRQ line number, should trigger on falling edge
+- linux,keymap: Keys definitions, see keypad-matrix.
diff --git a/Documentation/devicetree/bindings/input/ti,drv260x.txt b/Documentation/devicetree/bindings/input/ti,drv260x.txt
new file mode 100644
index 000000000..ee09c8f44
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ti,drv260x.txt
@@ -0,0 +1,50 @@
+* Texas Instruments - drv260x Haptics driver family
+
+Required properties:
+ - compatible - One of:
+ "ti,drv2604" - DRV2604
+ "ti,drv2605" - DRV2605
+ "ti,drv2605l" - DRV2605L
+ - reg - I2C slave address
+ - vbat-supply - Required supply regulator
+ - mode - Power up mode of the chip (defined in include/dt-bindings/input/ti-drv260x.h)
+ DRV260X_LRA_MODE - Linear Resonance Actuator mode (Piezoelectric)
+ DRV260X_LRA_NO_CAL_MODE - This is a LRA Mode but there is no calibration
+ sequence during init. And the device is configured for real
+ time playback mode (RTP mode).
+ DRV260X_ERM_MODE - Eccentric Rotating Mass mode (Rotary vibrator)
+ - library-sel - These are ROM based waveforms pre-programmed into the IC.
+ This should be set to set the library to use at power up.
+ (defined in include/dt-bindings/input/ti-drv260x.h)
+ DRV260X_LIB_EMPTY - Do not use a pre-programmed library
+ DRV260X_ERM_LIB_A - Pre-programmed Library
+ DRV260X_ERM_LIB_B - Pre-programmed Library
+ DRV260X_ERM_LIB_C - Pre-programmed Library
+ DRV260X_ERM_LIB_D - Pre-programmed Library
+ DRV260X_ERM_LIB_E - Pre-programmed Library
+ DRV260X_ERM_LIB_F - Pre-programmed Library
+ DRV260X_LIB_LRA - Pre-programmed LRA Library
+
+Optional properties:
+ - enable-gpio - gpio pin to enable/disable the device.
+ - vib-rated-mv - The rated voltage of the actuator in millivolts.
+ If this is not set then the value will be defaulted to
+ 3.2 v.
+ - vib-overdrive-mv - The overdrive voltage of the actuator in millivolts.
+ If this is not set then the value will be defaulted to
+ 3.2 v.
+Example:
+
+haptics: haptics@5a {
+ compatible = "ti,drv2605l";
+ reg = <0x5a>;
+ vbat-supply = <&vbat>;
+ enable-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ mode = <DRV260X_LRA_MODE>;
+ library-sel = <DRV260X_LIB_LRA>;
+ vib-rated-mv = <3200>;
+ vib-overdriver-mv = <3200>;
+}
+
+For more product information please see the link below:
+http://www.ti.com/product/drv2605
diff --git a/Documentation/devicetree/bindings/input/ti,drv2667.txt b/Documentation/devicetree/bindings/input/ti,drv2667.txt
new file mode 100644
index 000000000..996382cf9
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ti,drv2667.txt
@@ -0,0 +1,17 @@
+* Texas Instruments - drv2667 Haptics driver
+
+Required properties:
+ - compatible - "ti,drv2667" - DRV2667
+ - reg - I2C slave address
+ - vbat-supply - Required supply regulator
+
+Example:
+
+haptics: haptics@59 {
+ compatible = "ti,drv2667";
+ reg = <0x59>;
+ vbat-supply = <&vbat>;
+};
+
+For more product information please see the link below:
+http://www.ti.com/product/drv2667
diff --git a/Documentation/devicetree/bindings/input/ti,nspire-keypad.txt b/Documentation/devicetree/bindings/input/ti,nspire-keypad.txt
new file mode 100644
index 000000000..513d94d6e
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ti,nspire-keypad.txt
@@ -0,0 +1,60 @@
+TI-NSPIRE Keypad
+
+Required properties:
+- compatible: Compatible property value should be "ti,nspire-keypad".
+
+- reg: Physical base address of the peripheral and length of memory mapped
+ region.
+
+- interrupts: The interrupt number for the peripheral.
+
+- scan-interval: How often to scan in us. Based on a APB speed of 33MHz, the
+ maximum and minimum delay time is ~2000us and ~500us respectively
+
+- row-delay: How long to wait before scanning each row.
+
+- clocks: The clock this peripheral is attached to.
+
+- linux,keymap: The keymap to use
+ (see Documentation/devicetree/bindings/input/matrix-keymap.txt)
+
+Optional properties:
+- active-low: Specify that the keypad is active low (i.e. logical low signifies
+ a key press).
+
+Example:
+
+input {
+ compatible = "ti,nspire-keypad";
+ reg = <0x900E0000 0x1000>;
+ interrupts = <16>;
+
+ scan-interval = <1000>;
+ row-delay = <200>;
+
+ clocks = <&apb_pclk>;
+
+ linux,keymap = <
+ 0x0000001c 0x0001001c 0x00040039
+ 0x0005002c 0x00060015 0x0007000b
+ 0x0008000f 0x0100002d 0x01010011
+ 0x0102002f 0x01030004 0x01040016
+ 0x01050014 0x0106001f 0x01070002
+ 0x010a006a 0x02000013 0x02010010
+ 0x02020019 0x02030007 0x02040018
+ 0x02050031 0x02060032 0x02070005
+ 0x02080028 0x0209006c 0x03000026
+ 0x03010025 0x03020024 0x0303000a
+ 0x03040017 0x03050023 0x03060022
+ 0x03070008 0x03080035 0x03090069
+ 0x04000021 0x04010012 0x04020020
+ 0x0404002e 0x04050030 0x0406001e
+ 0x0407000d 0x04080037 0x04090067
+ 0x05010038 0x0502000c 0x0503001b
+ 0x05040034 0x0505001a 0x05060006
+ 0x05080027 0x0509000e 0x050a006f
+ 0x0600002b 0x0602004e 0x06030068
+ 0x06040003 0x0605006d 0x06060009
+ 0x06070001 0x0609000f 0x0708002a
+ 0x0709001d 0x070a0033 >;
+};
diff --git a/Documentation/devicetree/bindings/input/ti,palmas-pwrbutton.txt b/Documentation/devicetree/bindings/input/ti,palmas-pwrbutton.txt
new file mode 100644
index 000000000..a3dde8c30
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/ti,palmas-pwrbutton.txt
@@ -0,0 +1,36 @@
+Texas Instruments Palmas family power button module
+
+This module is part of the Palmas family of PMICs. For more details
+about the whole chip see:
+Documentation/devicetree/bindings/mfd/palmas.txt.
+
+This module provides a simple power button event via an Interrupt.
+
+Required properties:
+- compatible: should be one of the following
+ - "ti,palmas-pwrbutton": For Palmas compatible power on button
+- interrupt-parent: Parent interrupt device, must be handle of palmas node.
+- interrupts: Interrupt number of power button submodule on device.
+
+Optional Properties:
+
+- ti,palmas-long-press-seconds: Duration in seconds which the power
+ button should be kept pressed for Palmas to power off automatically.
+ NOTE: This depends on OTP support and POWERHOLD signal configuration
+ on platform. Valid values are 6, 8, 10 and 12.
+- ti,palmas-pwron-debounce-milli-seconds: Duration in milliseconds
+ which the power button should be kept pressed for Palmas to register
+ a press for debouncing purposes. NOTE: This depends on specific
+ Palmas variation capability. Valid values are 15, 100, 500 and 1000.
+
+Example:
+
+&palmas {
+ palmas_pwr_button: pwrbutton {
+ compatible = "ti,palmas-pwrbutton";
+ interrupt-parent = <&tps659038>;
+ interrupts = <1 IRQ_TYPE_EDGE_FALLING>;
+ ti,palmas-long-press-seconds = <12>;
+ ti,palmas-pwron-debounce-milli-seconds = <15>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/input/touchscreen/auo_pixcir_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/auo_pixcir_ts.txt
new file mode 100644
index 000000000..f40f21c64
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/auo_pixcir_ts.txt
@@ -0,0 +1,30 @@
+* AUO in-cell touchscreen controller using Pixcir sensors
+
+Required properties:
+- compatible: must be "auo,auo_pixcir_ts"
+- reg: I2C address of the chip
+- interrupts: interrupt to which the chip is connected
+- gpios: gpios the chip is connected to
+ first one is the interrupt gpio and second one the reset gpio
+- x-size: horizontal resolution of touchscreen
+- y-size: vertical resolution of touchscreen
+
+Example:
+
+ i2c@00000000 {
+ /* ... */
+
+ auo_pixcir_ts@5c {
+ compatible = "auo,auo_pixcir_ts";
+ reg = <0x5c>;
+ interrupts = <2 0>;
+
+ gpios = <&gpf 2 0 2>, /* INT */
+ <&gpf 5 1 0>; /* RST */
+
+ x-size = <800>;
+ y-size = <600>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt
new file mode 100644
index 000000000..34e3382a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/brcm,iproc-touchscreen.txt
@@ -0,0 +1,76 @@
+* Broadcom's IPROC Touchscreen Controller
+
+Required properties:
+- compatible: must be "brcm,iproc-touchscreen"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- clocks: The clock provided by the SOC to driver the tsc
+- clock-name: name for the clock
+- interrupts: The touchscreen controller's interrupt
+
+Optional properties:
+- scanning_period: Time between scans. Each step is 1024 us. Valid 1-256.
+- debounce_timeout: Each step is 512 us. Valid 0-255
+- settling_timeout: The settling duration (in ms) is the amount of time
+ the tsc waits to allow the voltage to settle after
+ turning on the drivers in detection mode.
+ Valid values: 0-11
+ 0 = 0.008 ms
+ 1 = 0.01 ms
+ 2 = 0.02 ms
+ 3 = 0.04 ms
+ 4 = 0.08 ms
+ 5 = 0.16 ms
+ 6 = 0.32 ms
+ 7 = 0.64 ms
+ 8 = 1.28 ms
+ 9 = 2.56 ms
+ 10 = 5.12 ms
+ 11 = 10.24 ms
+- touch_timeout: The continuous number of scan periods in which touch is
+ not detected before the controller returns to idle state.
+ Valid values 0-255.
+- average_data: Number of data samples which are averaged before a final
+ data point is placed into the FIFO
+ Valid values 0-7
+ 0 = 1 sample
+ 1 = 2 samples
+ 2 = 4 samples
+ 3 = 8 samples
+ 4 = 16 samples
+ 5 = 32 samples
+ 6 = 64 samples
+ 7 = 128 samples
+- fifo_threshold: Interrupt is generated whenever the number of fifo
+ entries exceeds this value
+ Valid values 0-31
+- touchscreen-size-x: horizontal resolution of touchscreen (in pixels)
+- touchscreen-size-y: vertical resolution of touchscreen (in pixels)
+- touchscreen-fuzz-x: horizontal noise value of the absolute input
+ device (in pixels)
+- touchscreen-fuzz-y: vertical noise value of the absolute input
+ device (in pixels)
+- touchscreen-inverted-x: X axis is inverted (boolean)
+- touchscreen-inverted-y: Y axis is inverted (boolean)
+
+Example:
+
+ touchscreen: tsc@0x180A6000 {
+ compatible = "brcm,iproc-touchscreen";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x180A6000 0x40>;
+ clocks = <&adc_clk>;
+ clock-names = "tsc_clk";
+ interrupts = <GIC_SPI 164 IRQ_TYPE_LEVEL_HIGH>;
+
+ scanning_period = <5>;
+ debounce_timeout = <40>;
+ settling_timeout = <7>;
+ touch_timeout = <10>;
+ average_data = <5>;
+ fifo_threshold = <1>;
+ /* Touchscreen is rotated 180 degrees. */
+ touchscreen-inverted-x;
+ touchscreen-inverted-y;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
new file mode 100644
index 000000000..ca5a2c864
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/bu21013.txt
@@ -0,0 +1,28 @@
+* Rohm BU21013 Touch Screen
+
+Required properties:
+ - compatible : "rohm,bu21013_tp"
+ - reg : I2C device address
+
+Optional properties:
+ - touch-gpio : GPIO pin registering a touch event
+ - <supply_name>-supply : Phandle to a regulator supply
+ - rohm,touch-max-x : Maximum outward permitted limit in the X axis
+ - rohm,touch-max-y : Maximum outward permitted limit in the Y axis
+ - rohm,flip-x : Flip touch coordinates on the X axis
+ - rohm,flip-y : Flip touch coordinates on the Y axis
+
+Example:
+
+ i2c@80110000 {
+ bu21013_tp@0x5c {
+ compatible = "rohm,bu21013_tp";
+ reg = <0x5c>;
+ touch-gpio = <&gpio2 20 0x4>;
+ avdd-supply = <&ab8500_ldo_aux1_reg>;
+
+ rohm,touch-max-x = <384>;
+ rohm,touch-max-y = <704>;
+ rohm,flip-y;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt b/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
new file mode 100644
index 000000000..d11f8d615
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
@@ -0,0 +1,46 @@
+* ChipOne icn8318 I2C touchscreen controller
+
+Required properties:
+ - compatible : "chipone,icn8318"
+ - reg : I2C slave address of the chip (0x40)
+ - interrupt-parent : a phandle pointing to the interrupt controller
+ serving the interrupt for this chip
+ - interrupts : interrupt specification for the icn8318 interrupt
+ - wake-gpios : GPIO specification for the WAKE input
+ - touchscreen-size-x : horizontal resolution of touchscreen (in pixels)
+ - touchscreen-size-y : vertical resolution of touchscreen (in pixels)
+
+Optional properties:
+ - pinctrl-names : should be "default"
+ - pinctrl-0: : a phandle pointing to the pin settings for the
+ control gpios
+ - touchscreen-fuzz-x : horizontal noise value of the absolute input
+ device (in pixels)
+ - touchscreen-fuzz-y : vertical noise value of the absolute input
+ device (in pixels)
+ - touchscreen-inverted-x : X axis is inverted (boolean)
+ - touchscreen-inverted-y : Y axis is inverted (boolean)
+ - touchscreen-swapped-x-y : X and Y axis are swapped (boolean)
+ Swapping is done after inverting the axis
+
+Example:
+
+i2c@00000000 {
+ /* ... */
+
+ chipone_icn8318@40 {
+ compatible = "chipone,icn8318";
+ reg = <0x40>;
+ interrupt-parent = <&pio>;
+ interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
+ pinctrl-names = "default";
+ pinctrl-0 = <&ts_wake_pin_p66>;
+ wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
+ touchscreen-size-x = <800>;
+ touchscreen-size-y = <480>;
+ touchscreen-inverted-x;
+ touchscreen-swapped-x-y;
+ };
+
+ /* ... */
+};
diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
new file mode 100644
index 000000000..76db96704
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.txt
@@ -0,0 +1,55 @@
+FocalTech EDT-FT5x06 Polytouch driver
+=====================================
+
+There are 3 variants of the chip for various touch panel sizes
+FT5206GE1 2.8" .. 3.8"
+FT5306DE4 4.3" .. 7"
+FT5406EE8 7" .. 8.9"
+
+The software interface is identical for all those chips, so that
+currently there is no need for the driver to distinguish between the
+different chips. Nevertheless distinct compatible strings are used so
+that a distinction can be added if necessary without changing the DT
+bindings.
+
+
+Required properties:
+ - compatible: "edt,edt-ft5206"
+ or: "edt,edt-ft5306"
+ or: "edt,edt-ft5406"
+
+ - reg: I2C slave address of the chip (0x38)
+ - interrupt-parent: a phandle pointing to the interrupt controller
+ serving the interrupt for this chip
+ - interrupts: interrupt specification for the touchdetect
+ interrupt
+
+Optional properties:
+ - reset-gpios: GPIO specification for the RESET input
+ - wake-gpios: GPIO specification for the WAKE input
+
+ - pinctrl-names: should be "default"
+ - pinctrl-0: a phandle pointing to the pin settings for the
+ control gpios
+
+ - threshold: allows setting the "click"-threshold in the range
+ from 20 to 80.
+
+ - gain: allows setting the sensitivity in the range from 0 to
+ 31. Note that lower values indicate higher
+ sensitivity.
+
+ - offset: allows setting the edge compensation in the range from
+ 0 to 31.
+
+Example:
+ polytouch: edt-ft5x06@38 {
+ compatible = "edt,edt-ft5406", "edt,edt-ft5x06";
+ reg = <0x38>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&edt_ft5x06_pins>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <5 0>;
+ reset-gpios = <&gpio2 6 1>;
+ wake-gpios = <&gpio4 9 0>;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt b/Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt
new file mode 100644
index 000000000..49fa14ed1
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/egalax-ts.txt
@@ -0,0 +1,19 @@
+* EETI eGalax Multiple Touch Controller
+
+Required properties:
+- compatible: must be "eeti,egalax_ts"
+- reg: i2c slave address
+- interrupt-parent: the phandle for the interrupt controller
+- interrupts: touch controller interrupt
+- wakeup-gpios: the gpio pin to be used for waking up the controller
+ and also used as irq pin
+
+Example:
+
+ egalax_ts@04 {
+ compatible = "eeti,egalax_ts";
+ reg = <0x04>;
+ interrupt-parent = <&gpio1>;
+ interrupts = <9 2>;
+ wakeup-gpios = <&gpio1 9 0>;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.txt b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt
new file mode 100644
index 000000000..8ba98eec7
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.txt
@@ -0,0 +1,29 @@
+Device tree bindings for Goodix GT9xx series touchscreen controller
+
+Required properties:
+
+ - compatible : Should be "goodix,gt911"
+ or "goodix,gt9110"
+ or "goodix,gt912"
+ or "goodix,gt927"
+ or "goodix,gt9271"
+ or "goodix,gt928"
+ or "goodix,gt967"
+ - reg : I2C address of the chip. Should be 0x5d or 0x14
+ - interrupt-parent : Interrupt controller to which the chip is connected
+ - interrupts : Interrupt to which the chip is connected
+
+Example:
+
+ i2c@00000000 {
+ /* ... */
+
+ gt928@5d {
+ compatible = "goodix,gt928";
+ reg = <0x5d>;
+ interrupt-parent = <&gpio>;
+ interrupts = <0 0>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt b/Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt
new file mode 100644
index 000000000..41cbf4b7a
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/lpc32xx-tsc.txt
@@ -0,0 +1,16 @@
+* NXP LPC32xx SoC Touchscreen Controller (TSC)
+
+Required properties:
+- compatible: must be "nxp,lpc3220-tsc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The TSC/ADC interrupt
+
+Example:
+
+ tsc@40048000 {
+ compatible = "nxp,lpc3220-tsc";
+ reg = <0x40048000 0x1000>;
+ interrupt-parent = <&mic>;
+ interrupts = <39 0>;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/mms114.txt b/Documentation/devicetree/bindings/input/touchscreen/mms114.txt
new file mode 100644
index 000000000..89d4c56c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/mms114.txt
@@ -0,0 +1,34 @@
+* MELFAS MMS114 touchscreen controller
+
+Required properties:
+- compatible: must be "melfas,mms114"
+- reg: I2C address of the chip
+- interrupts: interrupt to which the chip is connected
+- x-size: horizontal resolution of touchscreen
+- y-size: vertical resolution of touchscreen
+
+Optional properties:
+- contact-threshold:
+- moving-threshold:
+- x-invert: invert X axis
+- y-invert: invert Y axis
+
+Example:
+
+ i2c@00000000 {
+ /* ... */
+
+ touchscreen@48 {
+ compatible = "melfas,mms114";
+ reg = <0x48>;
+ interrupts = <39 0>;
+ x-size = <720>;
+ y-size = <1280>;
+ contact-threshold = <10>;
+ moving-threshold = <10>;
+ x-invert;
+ y-invert;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt
new file mode 100644
index 000000000..6e551090f
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt
@@ -0,0 +1,26 @@
+* Pixcir I2C touchscreen controllers
+
+Required properties:
+- compatible: must be "pixcir,pixcir_ts" or "pixcir,pixcir_tangoc"
+- reg: I2C address of the chip
+- interrupts: interrupt to which the chip is connected
+- attb-gpio: GPIO connected to the ATTB line of the chip
+- touchscreen-size-x: horizontal resolution of touchscreen (in pixels)
+- touchscreen-size-y: vertical resolution of touchscreen (in pixels)
+
+Example:
+
+ i2c@00000000 {
+ /* ... */
+
+ pixcir_ts@5c {
+ compatible = "pixcir,pixcir_ts";
+ reg = <0x5c>;
+ interrupts = <2 0>;
+ attb-gpio = <&gpf 2 0 2>;
+ touchscreen-size-x = <800>;
+ touchscreen-size-y = <600>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/sitronix-st1232.txt b/Documentation/devicetree/bindings/input/touchscreen/sitronix-st1232.txt
new file mode 100644
index 000000000..64ad48b82
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/sitronix-st1232.txt
@@ -0,0 +1,24 @@
+* Sitronix st1232 touchscreen controller
+
+Required properties:
+- compatible: must be "sitronix,st1232"
+- reg: I2C address of the chip
+- interrupts: interrupt to which the chip is connected
+
+Optional properties:
+- gpios: a phandle to the reset GPIO
+
+Example:
+
+ i2c@00000000 {
+ /* ... */
+
+ touchscreen@55 {
+ compatible = "sitronix,st1232";
+ reg = <0x55>;
+ interrupts = <2 0>;
+ gpios = <&gpio1 166 0>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt b/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt
new file mode 100644
index 000000000..127baa31a
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/stmpe.txt
@@ -0,0 +1,43 @@
+STMPE Touchscreen
+----------------
+
+Required properties:
+ - compatible: "st,stmpe-ts"
+
+Optional properties:
+- st,sample-time: ADC converstion time in number of clock. (0 -> 36 clocks, 1 ->
+ 44 clocks, 2 -> 56 clocks, 3 -> 64 clocks, 4 -> 80 clocks, 5 -> 96 clocks, 6
+ -> 144 clocks), recommended is 4.
+- st,mod-12b: ADC Bit mode (0 -> 10bit ADC, 1 -> 12bit ADC)
+- st,ref-sel: ADC reference source (0 -> internal reference, 1 -> external
+ reference)
+- st,adc-freq: ADC Clock speed (0 -> 1.625 MHz, 1 -> 3.25 MHz, 2 || 3 -> 6.5 MHz)
+- st,ave-ctrl: Sample average control (0 -> 1 sample, 1 -> 2 samples, 2 -> 4
+ samples, 3 -> 8 samples)
+- st,touch-det-delay: Touch detect interrupt delay (0 -> 10 us, 1 -> 50 us, 2 ->
+ 100 us, 3 -> 500 us, 4-> 1 ms, 5 -> 5 ms, 6 -> 10 ms, 7 -> 50 ms) recommended
+ is 3
+- st,settling: Panel driver settling time (0 -> 10 us, 1 -> 100 us, 2 -> 500 us, 3
+ -> 1 ms, 4 -> 5 ms, 5 -> 10 ms, 6 for 50 ms, 7 -> 100 ms) recommended is 2
+- st,fraction-z: Length of the fractional part in z (fraction-z ([0..7]) = Count of
+ the fractional part) recommended is 7
+- st,i-drive: current limit value of the touchscreen drivers (0 -> 20 mA typical 35
+ mA max, 1 -> 50 mA typical 80 mA max)
+
+Node name must be stmpe_touchscreen and should be child node of stmpe node to
+which it belongs.
+
+Example:
+
+ stmpe_touchscreen {
+ compatible = "st,stmpe-ts";
+ st,sample-time = <4>;
+ st,mod-12b = <1>;
+ st,ref-sel = <0>;
+ st,adc-freq = <1>;
+ st,ave-ctrl = <1>;
+ st,touch-det-delay = <2>;
+ st,settling = <2>;
+ st,fraction-z = <7>;
+ st,i-drive = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt b/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt
new file mode 100644
index 000000000..89abecd93
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/sun4i.txt
@@ -0,0 +1,38 @@
+sun4i resistive touchscreen controller
+--------------------------------------
+
+Required properties:
+ - compatible: "allwinner,sun4i-a10-ts", "allwinner,sun5i-a13-ts" or
+ "allwinner,sun6i-a31-ts"
+ - reg: mmio address range of the chip
+ - interrupts: interrupt to which the chip is connected
+ - #thermal-sensor-cells: shall be 0
+
+Optional properties:
+ - allwinner,ts-attached : boolean indicating that an actual touchscreen
+ is attached to the controller
+ - allwinner,tp-sensitive-adjust : integer (4 bits)
+ adjust sensitivity of pen down detection
+ between 0 (least sensitive) and 15
+ (defaults to 15)
+ - allwinner,filter-type : integer (2 bits)
+ select median and averaging filter
+ samples used for median / averaging filter
+ 0: 4/2
+ 1: 5/3
+ 2: 8/4
+ 3: 16/8
+ (defaults to 1)
+
+Example:
+
+ rtp: rtp@01c25000 {
+ compatible = "allwinner,sun4i-a10-ts";
+ reg = <0x01c25000 0x100>;
+ interrupts = <29>;
+ allwinner,ts-attached;
+ #thermal-sensor-cells = <0>;
+ /* sensitive/noisy touch panel */
+ allwinner,tp-sensitive-adjust = <0>;
+ allwinner,filter-type = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/sx8654.txt b/Documentation/devicetree/bindings/input/touchscreen/sx8654.txt
new file mode 100644
index 000000000..5aaa6b3aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/sx8654.txt
@@ -0,0 +1,16 @@
+* Semtech SX8654 I2C Touchscreen Controller
+
+Required properties:
+- compatible: must be "semtech,sx8654"
+- reg: i2c slave address
+- interrupt-parent: the phandle for the interrupt controller
+- interrupts: touch controller interrupt
+
+Example:
+
+ sx8654@48 {
+ compatible = "semtech,sx8654";
+ reg = <0x48>;
+ interrupt-parent = <&gpio6>;
+ interrupts = <3 IRQ_TYPE_EDGE_FALLING>;
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti-tsc-adc.txt b/Documentation/devicetree/bindings/input/touchscreen/ti-tsc-adc.txt
new file mode 100644
index 000000000..6c4fb3482
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/ti-tsc-adc.txt
@@ -0,0 +1,59 @@
+* TI - TSC ADC (Touschscreen and analog digital converter)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Required properties:
+- child "tsc"
+ ti,wires: Wires refer to application modes i.e. 4/5/8 wire touchscreen
+ support on the platform.
+ ti,x-plate-resistance: X plate resistance
+ ti,coordinate-readouts: The sequencer supports a total of 16
+ programmable steps each step is used to
+ read a single coordinate. A single
+ readout is enough but multiple reads can
+ increase the quality.
+ A value of 5 means, 5 reads for X, 5 for
+ Y and 2 for Z (always). This utilises 12
+ of the 16 software steps available. The
+ remaining 4 can be used by the ADC.
+ ti,wire-config: Different boards could have a different order for
+ connecting wires on touchscreen. We need to provide an
+ 8 bit number where in the 1st four bits represent the
+ analog lines and the next 4 bits represent positive/
+ negative terminal on that input line. Notations to
+ represent the input lines and terminals resoectively
+ is as follows:
+ AIN0 = 0, AIN1 = 1 and so on till AIN7 = 7.
+ XP = 0, XN = 1, YP = 2, YN = 3.
+- child "adc"
+ ti,adc-channels: List of analog inputs available for ADC.
+ AIN0 = 0, AIN1 = 1 and so on till AIN7 = 7.
+
+Optional properties:
+- child "tsc"
+ ti,charge-delay: Length of touch screen charge delay step in terms of
+ ADC clock cycles. Charge delay value should be large
+ in order to avoid false pen-up events. This value
+ effects the overall sampling speed, hence need to be
+ kept as low as possible, while avoiding false pen-up
+ event. Start from a lower value, say 0x400, and
+ increase value until false pen-up events are avoided.
+ The pen-up detection happens immediately after the
+ charge step, so this does in fact function as a
+ hardware knob for adjusting the amount of "settling
+ time".
+
+Example:
+ tscadc: tscadc@44e0d000 {
+ compatible = "ti,am3359-tscadc";
+ tsc {
+ ti,wires = <4>;
+ ti,x-plate-resistance = <200>;
+ ti,coordiante-readouts = <5>;
+ ti,wire-config = <0x00 0x11 0x22 0x33>;
+ ti,charge-delay = <0x400>;
+ };
+
+ adc {
+ ti,adc-channels = <4 5 6 7>;
+ };
+ }
diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
new file mode 100644
index 000000000..ac23caf51
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
@@ -0,0 +1,29 @@
+General Touchscreen Properties:
+
+Optional properties for Touchscreens:
+ - touchscreen-size-x : horizontal resolution of touchscreen
+ (in pixels)
+ - touchscreen-size-y : vertical resolution of touchscreen
+ (in pixels)
+ - touchscreen-max-pressure : maximum reported pressure (arbitrary range
+ dependent on the controller)
+ - touchscreen-fuzz-x : horizontal noise value of the absolute input
+ device (in pixels)
+ - touchscreen-fuzz-y : vertical noise value of the absolute input
+ device (in pixels)
+ - touchscreen-fuzz-pressure : pressure noise value of the absolute input
+ device (arbitrary range dependent on the
+ controller)
+ - touchscreen-inverted-x : X axis is inverted (boolean)
+ - touchscreen-inverted-y : Y axis is inverted (boolean)
+ - touchscreen-swapped-x-y : X and Y axis are swapped (boolean)
+ Swapping is done after inverting the axis
+
+Deprecated properties for Touchscreens:
+ - x-size : deprecated name for touchscreen-size-x
+ - y-size : deprecated name for touchscreen-size-y
+ - moving-threshold : deprecated name for a combination of
+ touchscreen-fuzz-x and touchscreen-fuzz-y
+ - contact-threshold : deprecated name for touchscreen-fuzz-pressure
+ - x-invert : deprecated name for touchscreen-inverted-x
+ - y-invert : deprecated name for touchscreen-inverted-y
diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
new file mode 100644
index 000000000..09089a6d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
@@ -0,0 +1,42 @@
+* Texas Instruments tsc2005 touchscreen controller
+
+Required properties:
+ - compatible : "ti,tsc2005"
+ - reg : SPI device address
+ - spi-max-frequency : Maximal SPI speed
+ - interrupts : IRQ specifier
+ - reset-gpios : GPIO specifier
+ - vio-supply : Regulator specifier
+
+Optional properties:
+ - ti,x-plate-ohms : integer, resistance of the touchscreen's X plates
+ in ohm (defaults to 280)
+ - ti,esd-recovery-timeout-ms : integer, if the touchscreen does not respond after
+ the configured time (in milli seconds), the driver
+ will reset it. This is disabled by default.
+ - properties defined in touchscreen.txt
+
+Example:
+
+&mcspi1 {
+ tsc2005@0 {
+ compatible = "ti,tsc2005";
+ spi-max-frequency = <6000000>;
+ reg = <0>;
+
+ vio-supply = <&vio>;
+
+ reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
+ interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
+
+ touchscreen-fuzz-x = <4>;
+ touchscreen-fuzz-y = <7>;
+ touchscreen-fuzz-pressure = <2>;
+ touchscreen-size-x = <4096>;
+ touchscreen-size-y = <4096>;
+ touchscreen-max-pressure = <2048>;
+
+ ti,x-plate-ohms = <280>;
+ ti,esd-recovery-timeout-ms = <8000>;
+ };
+}
diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt
new file mode 100644
index 000000000..ec365e172
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/tsc2007.txt
@@ -0,0 +1,41 @@
+* Texas Instruments tsc2007 touchscreen controller
+
+Required properties:
+- compatible: must be "ti,tsc2007".
+- reg: I2C address of the chip.
+- ti,x-plate-ohms: X-plate resistance in ohms.
+
+Optional properties:
+- gpios: the interrupt gpio the chip is connected to (trough the penirq pin).
+ The penirq pin goes to low when the panel is touched.
+ (see GPIO binding[1] for more details).
+- interrupt-parent: the phandle for the gpio controller
+ (see interrupt binding[0]).
+- interrupts: (gpio) interrupt to which the chip is connected
+ (see interrupt binding[0]).
+- ti,max-rt: maximum pressure.
+- ti,fuzzx: specifies the absolute input fuzz x value.
+ If set, it will permit noise in the data up to +- the value given to the fuzz
+ parameter, that is used to filter noise from the event stream.
+- ti,fuzzy: specifies the absolute input fuzz y value.
+- ti,fuzzz: specifies the absolute input fuzz z value.
+- ti,poll-period: how much time to wait (in milliseconds) before reading again the
+ values from the tsc2007.
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[1]: Documentation/devicetree/bindings/gpio/gpio.txt
+
+Example:
+ &i2c1 {
+ /* ... */
+ tsc2007@49 {
+ compatible = "ti,tsc2007";
+ reg = <0x49>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <0x0 0x8>;
+ gpios = <&gpio4 0 0>;
+ ti,x-plate-ohms = <180>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt
new file mode 100644
index 000000000..80c37df94
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/touchscreen/zforce_ts.txt
@@ -0,0 +1,34 @@
+* Neonode infrared touchscreen controller
+
+Required properties:
+- compatible: must be "neonode,zforce"
+- reg: I2C address of the chip
+- interrupts: interrupt to which the chip is connected
+- gpios: gpios the chip is connected to
+ first one is the interrupt gpio and second one the reset gpio
+- x-size: horizontal resolution of touchscreen
+- y-size: vertical resolution of touchscreen
+
+Optional properties:
+- vdd-supply: Regulator controlling the controller supply
+
+Example:
+
+ i2c@00000000 {
+ /* ... */
+
+ zforce_ts@50 {
+ compatible = "neonode,zforce";
+ reg = <0x50>;
+ interrupts = <2 0>;
+ vdd-supply = <&reg_zforce_vdd>;
+
+ gpios = <&gpio5 6 0>, /* INT */
+ <&gpio5 9 0>; /* RST */
+
+ x-size = <800>;
+ y-size = <600>;
+ };
+
+ /* ... */
+ };
diff --git a/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt b/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt
new file mode 100644
index 000000000..e30e0b93f
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/tps65218-pwrbutton.txt
@@ -0,0 +1,17 @@
+Texas Instruments TPS65218 power button
+
+This driver provides a simple power button event via an Interrupt.
+
+Required properties:
+- compatible: should be "ti,tps65218-pwrbutton"
+- interrupts: should be one of the following
+ - <3 IRQ_TYPE_EDGE_BOTH>: For controllers compatible with tps65218
+
+Example:
+
+&tps {
+ power-button {
+ compatible = "ti,tps65218-pwrbutton";
+ interrupts = <3 IRQ_TYPE_EDGE_BOTH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/input/twl4030-keypad.txt b/Documentation/devicetree/bindings/input/twl4030-keypad.txt
new file mode 100644
index 000000000..e4be2f76a
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/twl4030-keypad.txt
@@ -0,0 +1,27 @@
+* TWL4030's Keypad Controller device tree bindings
+
+TWL4030's Keypad controller is used to interface a SoC with a matrix-type
+keypad device. The keypad controller supports multiple row and column lines.
+A key can be placed at each intersection of a unique row and a unique column.
+The keypad controller can sense a key-press and key-release and report the
+event using a interrupt to the cpu.
+
+This binding is based on the matrix-keymap binding with the following
+changes:
+
+ * keypad,num-rows and keypad,num-columns are required.
+
+Required SoC Specific Properties:
+- compatible: should be one of the following
+ - "ti,twl4030-keypad": For controllers compatible with twl4030 keypad
+ controller.
+- interrupt: should be one of the following
+ - <1>: For controllers compatible with twl4030 keypad controller.
+
+Example:
+ twl_keypad: keypad {
+ compatible = "ti,twl4030-keypad";
+ interrupts = <1>;
+ keypad,num-rows = <8>;
+ keypad,num-columns = <8>;
+ };
diff --git a/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt
new file mode 100644
index 000000000..c864a46cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/input/twl4030-pwrbutton.txt
@@ -0,0 +1,21 @@
+Texas Instruments TWL family (twl4030) pwrbutton module
+
+This module is part of the TWL4030. For more details about the whole
+chip see Documentation/devicetree/bindings/mfd/twl-familly.txt.
+
+This module provides a simple power button event via an Interrupt.
+
+Required properties:
+- compatible: should be one of the following
+ - "ti,twl4030-pwrbutton": For controllers compatible with twl4030
+- interrupts: should be one of the following
+ - <8>: For controllers compatible with twl4030
+
+Example:
+
+&twl {
+ twl_pwrbutton: pwrbutton {
+ compatible = "ti,twl4030-pwrbutton";
+ interrupts = <8>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.txt b/Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.txt
new file mode 100644
index 000000000..9d52d5afe
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/abilis,tb10x-ictl.txt
@@ -0,0 +1,38 @@
+TB10x Top Level Interrupt Controller
+====================================
+
+The Abilis TB10x SOC contains a custom interrupt controller. It performs
+one-to-one mapping of external interrupt sources to CPU interrupts and
+provides support for reconfigurable trigger modes.
+
+Required properties
+-------------------
+
+- compatible: Should be "abilis,tb10x-ictl"
+- reg: specifies physical base address and size of register range.
+- interrupt-congroller: Identifies the node as an interrupt controller.
+- #interrupt cells: Specifies the number of cells used to encode an interrupt
+ source connected to this controller. The value shall be 2.
+- interrupt-parent: Specifies the parent interrupt controller.
+- interrupts: Specifies the list of interrupt lines which are handled by
+ the interrupt controller in the parent controller's notation. Interrupts
+ are mapped one-to-one to parent interrupts.
+
+Example
+-------
+
+intc: interrupt-controller { /* Parent interrupt controller */
+ interrupt-controller;
+ #interrupt-cells = <1>; /* For example below */
+ /* ... */
+};
+
+tb10x_ictl: pic@2000 { /* TB10x interrupt controller */
+ compatible = "abilis,tb10x-ictl";
+ reg = <0x2000 0x20>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&intc>;
+ interrupts = <5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
+ 20 21 22 23 24 25 26 27 28 29 30 31>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt
new file mode 100644
index 000000000..b290ca150
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-ic.txt
@@ -0,0 +1,18 @@
+Allwinner Sunxi Interrupt Controller
+
+Required properties:
+
+- compatible : should be "allwinner,sun4i-a10-ic"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 1.
+
+Example:
+
+intc: interrupt-controller {
+ compatible = "allwinner,sun4i-a10-ic";
+ reg = <0x01c20400 0x400>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt
new file mode 100644
index 000000000..d1c5cdabc
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun67i-sc-nmi.txt
@@ -0,0 +1,27 @@
+Allwinner Sunxi NMI Controller
+==============================
+
+Required properties:
+
+- compatible : should be "allwinner,sun7i-a20-sc-nmi" or
+ "allwinner,sun6i-a31-sc-nmi"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 2. The first cell is the IRQ number, the
+ second cell the trigger type as defined in interrupt.txt in this directory.
+- interrupt-parent: Specifies the parent interrupt controller.
+- interrupts: Specifies the interrupt line (NMI) which is handled by
+ the interrupt controller in the parent controller's notation. This value
+ shall be the NMI.
+
+Example:
+
+sc-nmi-intc@01c00030 {
+ compatible = "allwinner,sun7i-a20-sc-nmi";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0x01c00030 0x0c>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 0 4>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
new file mode 100644
index 000000000..f292917fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/atmel,aic.txt
@@ -0,0 +1,42 @@
+* Advanced Interrupt Controller (AIC)
+
+Required properties:
+- compatible: Should be "atmel,<chip>-aic"
+ <chip> can be "at91rm9200", "sama5d3" or "sama5d4"
+- interrupt-controller: Identifies the node as an interrupt controller.
+- interrupt-parent: For single AIC system, it is an empty property.
+- #interrupt-cells: The number of cells to define the interrupts. It should be 3.
+ The first cell is the IRQ number (aka "Peripheral IDentifier" on datasheet).
+ The second cell is used to specify flags:
+ bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+ Valid combinations are 1, 2, 3, 4, 8.
+ Default flag for internal sources should be set to 4 (active high).
+ The third cell is used to specify the irq priority from 0 (lowest) to 7
+ (highest).
+- reg: Should contain AIC registers location and length
+- atmel,external-irqs: u32 array of external irqs.
+
+Examples:
+ /*
+ * AIC
+ */
+ aic: interrupt-controller@fffff000 {
+ compatible = "atmel,at91rm9200-aic";
+ interrupt-controller;
+ interrupt-parent;
+ #interrupt-cells = <3>;
+ reg = <0xfffff000 0x200>;
+ };
+
+ /*
+ * An interrupt generating device that is wired to an AIC.
+ */
+ dma: dma-controller@ffffec00 {
+ compatible = "atmel,at91sam9g45-dma";
+ reg = <0xffffec00 0x200>;
+ interrupts = <21 4 5>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
new file mode 100644
index 000000000..7da578d72
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
@@ -0,0 +1,110 @@
+BCM2835 Top-Level ("ARMCTRL") Interrupt Controller
+
+The BCM2835 contains a custom top-level interrupt controller, which supports
+72 interrupt sources using a 2-level register scheme. The interrupt
+controller, or the HW block containing it, is referred to occasionally
+as "armctrl" in the SoC documentation, hence naming of this binding.
+
+Required properties:
+
+- compatible : should be "brcm,bcm2835-armctrl-ic"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 2.
+
+ The 1st cell is the interrupt bank; 0 for interrupts in the "IRQ basic
+ pending" register, or 1/2 respectively for interrupts in the "IRQ pending
+ 1/2" register.
+
+ The 2nd cell contains the interrupt number within the bank. Valid values
+ are 0..7 for bank 0, and 0..31 for bank 1.
+
+The interrupt sources are as follows:
+
+Bank 0:
+0: ARM_TIMER
+1: ARM_MAILBOX
+2: ARM_DOORBELL_0
+3: ARM_DOORBELL_1
+4: VPU0_HALTED
+5: VPU1_HALTED
+6: ILLEGAL_TYPE0
+7: ILLEGAL_TYPE1
+
+Bank 1:
+0: TIMER0
+1: TIMER1
+2: TIMER2
+3: TIMER3
+4: CODEC0
+5: CODEC1
+6: CODEC2
+7: VC_JPEG
+8: ISP
+9: VC_USB
+10: VC_3D
+11: TRANSPOSER
+12: MULTICORESYNC0
+13: MULTICORESYNC1
+14: MULTICORESYNC2
+15: MULTICORESYNC3
+16: DMA0
+17: DMA1
+18: VC_DMA2
+19: VC_DMA3
+20: DMA4
+21: DMA5
+22: DMA6
+23: DMA7
+24: DMA8
+25: DMA9
+26: DMA10
+27: DMA11
+28: DMA12
+29: AUX
+30: ARM
+31: VPUDMA
+
+Bank 2:
+0: HOSTPORT
+1: VIDEOSCALER
+2: CCP2TX
+3: SDC
+4: DSI0
+5: AVE
+6: CAM0
+7: CAM1
+8: HDMI0
+9: HDMI1
+10: PIXELVALVE1
+11: I2CSPISLV
+12: DSI1
+13: PWA0
+14: PWA1
+15: CPR
+16: SMI
+17: GPIO0
+18: GPIO1
+19: GPIO2
+20: GPIO3
+21: VC_I2C
+22: VC_SPI
+23: VC_I2SPCM
+24: VC_SDIO
+25: VC_UART
+26: SLIMBUS
+27: VEC
+28: CPG
+29: RNG
+30: VC_ARASANSDIO
+31: AVSPMON
+
+Example:
+
+intc: interrupt-controller {
+ compatible = "brcm,bcm2835-armctrl-ic";
+ reg = <0x7e00b200 0x200>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm3380-l2-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm3380-l2-intc.txt
new file mode 100644
index 000000000..8f48aad50
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm3380-l2-intc.txt
@@ -0,0 +1,41 @@
+Broadcom BCM3380-style Level 1 / Level 2 interrupt controller
+
+This interrupt controller shows up in various forms on many BCM338x/BCM63xx
+chipsets. It has the following properties:
+
+- outputs a single interrupt signal to its interrupt controller parent
+
+- contains one or more enable/status word pairs, which often appear at
+ different offsets in different blocks
+
+- no atomic set/clear operations
+
+Required properties:
+
+- compatible: should be "brcm,bcm3380-l2-intc"
+- reg: specifies one or more enable/status pairs, in the following format:
+ <enable_reg 0x4 status_reg 0x4>...
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+ source, should be 1.
+- interrupt-parent: specifies the phandle to the parent interrupt controller
+ this one is cascaded from
+- interrupts: specifies the interrupt line in the interrupt-parent controller
+ node, valid values depend on the type of parent interrupt controller
+
+Optional properties:
+
+- brcm,irq-can-wake: if present, this means the L2 controller can be used as a
+ wakeup source for system suspend/resume.
+
+Example:
+
+irq0_intc: interrupt-controller@10000020 {
+ compatible = "brcm,bcm3380-l2-intc";
+ reg = <0x10000024 0x4 0x1000002c 0x4>,
+ <0x10000020 0x4 0x10000028 0x4>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <2>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt
new file mode 100644
index 000000000..cc217b22d
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7038-l1-intc.txt
@@ -0,0 +1,52 @@
+Broadcom BCM7038-style Level 1 interrupt controller
+
+This block is a first level interrupt controller that is typically connected
+directly to one of the HW INT lines on each CPU. Every BCM7xxx set-top chip
+since BCM7038 has contained this hardware.
+
+Key elements of the hardware design include:
+
+- 64, 96, 128, or 160 incoming level IRQ lines
+
+- Most onchip peripherals are wired directly to an L1 input
+
+- A separate instance of the register set for each CPU, allowing individual
+ peripheral IRQs to be routed to any CPU
+
+- Atomic mask/unmask operations
+
+- No polarity/level/edge settings
+
+- No FIFO or priority encoder logic; software is expected to read all
+ 2-5 status words to determine which IRQs are pending
+
+Required properties:
+
+- compatible: should be "brcm,bcm7038-l1-intc"
+- reg: specifies the base physical address and size of the registers;
+ the number of supported IRQs is inferred from the size argument
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+ source, should be 1.
+- interrupt-parent: specifies the phandle to the parent interrupt controller(s)
+ this one is cascaded from
+- interrupts: specifies the interrupt line(s) in the interrupt-parent controller
+ node; valid values depend on the type of parent interrupt controller
+
+If multiple reg ranges and interrupt-parent entries are present on an SMP
+system, the driver will allow IRQ SMP affinity to be set up through the
+/proc/irq/ interface. In the simplest possible configuration, only one
+reg range and one interrupt-parent is needed.
+
+Example:
+
+periph_intc: periph_intc@1041a400 {
+ compatible = "brcm,bcm7038-l1-intc";
+ reg = <0x1041a400 0x30 0x1041a600 0x30>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpu_intc>;
+ interrupts = <2>, <3>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt
new file mode 100644
index 000000000..44a9bb15d
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,bcm7120-l2-intc.txt
@@ -0,0 +1,90 @@
+Broadcom BCM7120-style Level 2 interrupt controller
+
+This interrupt controller hardware is a second level interrupt controller that
+is hooked to a parent interrupt controller: e.g: ARM GIC for ARM-based
+platforms. It can be found on BCM7xxx products starting with BCM7120.
+
+Such an interrupt controller has the following hardware design:
+
+- outputs multiple interrupts signals towards its interrupt controller parent
+
+- controls how some of the interrupts will be flowing, whether they will
+ directly output an interrupt signal towards the interrupt controller parent,
+ or if they will output an interrupt signal at this 2nd level interrupt
+ controller, in particular for UARTs
+
+- has one 32-bit enable word and one 32-bit status word
+
+- no atomic set/clear operations
+
+- not all bits within the interrupt controller actually map to an interrupt
+
+The typical hardware layout for this controller is represented below:
+
+2nd level interrupt line Outputs for the parent controller (e.g: ARM GIC)
+
+0 -----[ MUX ] ------------|==========> GIC interrupt 75
+ \-----------\
+ |
+1 -----[ MUX ] --------)---|==========> GIC interrupt 76
+ \------------|
+ |
+2 -----[ MUX ] --------)---|==========> GIC interrupt 77
+ \------------|
+ |
+3 ---------------------|
+4 ---------------------|
+5 ---------------------|
+7 ---------------------|---|===========> GIC interrupt 66
+9 ---------------------|
+10 --------------------|
+11 --------------------/
+
+6 ------------------------\
+ |===========> GIC interrupt 64
+8 ------------------------/
+
+12 ........................ X
+13 ........................ X (not connected)
+..
+31 ........................ X
+
+Required properties:
+
+- compatible: should be "brcm,bcm7120-l2-intc"
+- reg: specifies the base physical address and size of the registers
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+ source, should be 1.
+- interrupt-parent: specifies the phandle to the parent interrupt controller
+ this one is cascaded from
+- interrupts: specifies the interrupt line(s) in the interrupt-parent controller
+ node, valid values depend on the type of parent interrupt controller
+- brcm,int-map-mask: 32-bits bit mask describing how many and which interrupts
+ are wired to this 2nd level interrupt controller, and how they match their
+ respective interrupt parents. Should match exactly the number of interrupts
+ specified in the 'interrupts' property.
+
+Optional properties:
+
+- brcm,irq-can-wake: if present, this means the L2 controller can be used as a
+ wakeup source for system suspend/resume.
+
+- brcm,int-fwd-mask: if present, a bit mask to configure the interrupts which
+ have a mux gate, typically UARTs. Setting these bits will make their
+ respective interrupt outputs bypass this 2nd level interrupt controller
+ completely; it is completely transparent for the interrupt controller
+ parent. This should have one 32-bit word per enable/status pair.
+
+Example:
+
+irq0_intc: interrupt-controller@f0406800 {
+ compatible = "brcm,bcm7120-l2-intc";
+ interrupt-parent = <&intc>;
+ #interrupt-cells = <1>;
+ reg = <0xf0406800 0x8>;
+ interrupt-controller;
+ interrupts = <0x0 0x42 0x0>, <0x0 0x40 0x0>;
+ brcm,int-map-mask = <0xeb8>, <0x140>;
+ brcm,int-fwd-mask = <0x7>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt
new file mode 100644
index 000000000..448273a30
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt
@@ -0,0 +1,29 @@
+Broadcom Generic Level 2 Interrupt Controller
+
+Required properties:
+
+- compatible: should be "brcm,l2-intc"
+- reg: specifies the base physical address and size of the registers
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: specifies the number of cells needed to encode an
+ interrupt source. Should be 1.
+- interrupt-parent: specifies the phandle to the parent interrupt controller
+ this controller is cacaded from
+- interrupts: specifies the interrupt line in the interrupt-parent irq space
+ to be used for cascading
+
+Optional properties:
+
+- brcm,irq-can-wake: If present, this means the L2 controller can be used as a
+ wakeup source for system suspend/resume.
+
+Example:
+
+hif_intr2_intc: interrupt-controller@f0441000 {
+ compatible = "brcm,l2-intc";
+ reg = <0xf0441000 0x30>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&intc>;
+ interrupts = <0x0 0x20 0x0>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-mx.txt b/Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-mx.txt
new file mode 100644
index 000000000..d4de980e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-mx.txt
@@ -0,0 +1,18 @@
+* Xtensa Interrupt Distributor and Programmable Interrupt Controller (MX)
+
+Required properties:
+- compatible: Should be "cdns,xtensa-mx".
+
+Remaining properties have exact same meaning as in Xtensa PIC
+(see cdns,xtensa-pic.txt).
+
+Examples:
+ pic: pic {
+ compatible = "cdns,xtensa-mx";
+ /* one cell: internal irq number,
+ * two cells: second cell == 0: internal irq number
+ * second cell == 1: external irq number
+ */
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.txt
new file mode 100644
index 000000000..026ef4cfc
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/cdns,xtensa-pic.txt
@@ -0,0 +1,25 @@
+* Xtensa built-in Programmable Interrupt Controller (PIC)
+
+Required properties:
+- compatible: Should be "cdns,xtensa-pic".
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt-cells: The number of cells to define the interrupts.
+ It may be either 1 or 2.
+ When it's 1, the first cell is the internal IRQ number.
+ When it's 2, the first cell is the IRQ number, and the second cell
+ specifies whether it's internal (0) or external (1).
+ Periferals are usually connected to a fixed external IRQ, but for different
+ core variants it may be mapped to different internal IRQ.
+ IRQ sensitivity and priority are fixed for each core variant and may not be
+ changed at runtime.
+
+Examples:
+ pic: pic {
+ compatible = "cdns,xtensa-pic";
+ /* one cell: internal irq number,
+ * two cells: second cell == 0: internal irq number
+ * second cell == 1: external irq number
+ */
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/cirrus,clps711x-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/cirrus,clps711x-intc.txt
new file mode 100644
index 000000000..759339c34
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/cirrus,clps711x-intc.txt
@@ -0,0 +1,41 @@
+Cirrus Logic CLPS711X Interrupt Controller
+
+Required properties:
+
+- compatible: Should be "cirrus,clps711x-intc".
+- reg: Specifies base physical address of the registers set.
+- interrupt-controller: Identifies the node as an interrupt controller.
+- #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 1.
+
+The interrupt sources are as follows:
+ID Name Description
+---------------------------
+1: BLINT Battery low (FIQ)
+3: MCINT Media changed (FIQ)
+4: CSINT CODEC sound
+5: EINT1 External 1
+6: EINT2 External 2
+7: EINT3 External 3
+8: TC1OI TC1 under flow
+9: TC2OI TC2 under flow
+10: RTCMI RTC compare match
+11: TINT 64Hz tick
+12: UTXINT1 UART1 transmit FIFO half empty
+13: URXINT1 UART1 receive FIFO half full
+14: UMSINT UART1 modem status changed
+15: SSEOTI SSI1 end of transfer
+16: KBDINT Keyboard
+17: SS2RX SSI2 receive FIFO half or greater full
+18: SS2TX SSI2 transmit FIFO less than half empty
+28: UTXINT2 UART2 transmit FIFO half empty
+29: URXINT2 UART2 receive FIFO half full
+32: DAIINT DAI interface (FIQ)
+
+Example:
+ intc: interrupt-controller {
+ compatible = "cirrus,clps711x-intc";
+ reg = <0x80000000 0x4000>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/digicolor-ic.txt b/Documentation/devicetree/bindings/interrupt-controller/digicolor-ic.txt
new file mode 100644
index 000000000..42d41ec84
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/digicolor-ic.txt
@@ -0,0 +1,21 @@
+Conexant Digicolor Interrupt Controller
+
+Required properties:
+
+- compatible : should be "cnxt,cx92755-ic"
+- reg : Specifies base physical address and size of the interrupt controller
+ registers (IC) area
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 1.
+- syscon: A phandle to the syscon node describing UC registers
+
+Example:
+
+ intc: interrupt-controller@f0000040 {
+ compatible = "cnxt,cx92755-ic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0xf0000040 0x40>;
+ syscon = <&uc_regs>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
new file mode 100644
index 000000000..8a3c40829
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
@@ -0,0 +1,110 @@
+Specifying interrupt information for devices
+============================================
+
+1) Interrupt client nodes
+-------------------------
+
+Nodes that describe devices which generate interrupts must contain an
+"interrupts" property, an "interrupts-extended" property, or both. If both are
+present, the latter should take precedence; the former may be provided simply
+for compatibility with software that does not recognize the latter. These
+properties contain a list of interrupt specifiers, one per output interrupt. The
+format of the interrupt specifier is determined by the interrupt controller to
+which the interrupts are routed; see section 2 below for details.
+
+ Example:
+ interrupt-parent = <&intc1>;
+ interrupts = <5 0>, <6 0>;
+
+The "interrupt-parent" property is used to specify the controller to which
+interrupts are routed and contains a single phandle referring to the interrupt
+controller node. This property is inherited, so it may be specified in an
+interrupt client node or in any of its parent nodes. Interrupts listed in the
+"interrupts" property are always in reference to the node's interrupt parent.
+
+The "interrupts-extended" property is a special form for use when a node needs
+to reference multiple interrupt parents. Each entry in this property contains
+both the parent phandle and the interrupt specifier. "interrupts-extended"
+should only be used when a device has multiple interrupt parents.
+
+ Example:
+ interrupts-extended = <&intc1 5 1>, <&intc2 1 0>;
+
+2) Interrupt controller nodes
+-----------------------------
+
+A device is marked as an interrupt controller with the "interrupt-controller"
+property. This is a empty, boolean property. An additional "#interrupt-cells"
+property defines the number of cells needed to specify a single interrupt.
+
+It is the responsibility of the interrupt controller's binding to define the
+length and format of the interrupt specifier. The following two variants are
+commonly used:
+
+ a) one cell
+ -----------
+ The #interrupt-cells property is set to 1 and the single cell defines the
+ index of the interrupt within the controller.
+
+ Example:
+
+ vic: intc@10140000 {
+ compatible = "arm,versatile-vic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x10140000 0x1000>;
+ };
+
+ sic: intc@10003000 {
+ compatible = "arm,versatile-sic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x10003000 0x1000>;
+ interrupt-parent = <&vic>;
+ interrupts = <31>; /* Cascaded to vic */
+ };
+
+ b) two cells
+ ------------
+ The #interrupt-cells property is set to 2 and the first cell defines the
+ index of the interrupt within the controller, while the second cell is used
+ to specify any of the following flags:
+ - bits[3:0] trigger type and level flags
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive
+
+ Example:
+
+ i2c@7000c000 {
+ gpioext: gpio-adnp@41 {
+ compatible = "ad,gpio-adnp";
+ reg = <0x41>;
+
+ interrupt-parent = <&gpio>;
+ interrupts = <160 1>;
+
+ gpio-controller;
+ #gpio-cells = <1>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ nr-gpios = <64>;
+ };
+
+ sx8634@2b {
+ compatible = "smtc,sx8634";
+ reg = <0x2b>;
+
+ interrupt-parent = <&gpioext>;
+ interrupts = <3 0x8>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ threshold = <0x40>;
+ sensitivity = <7>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt
new file mode 100644
index 000000000..aee38e7c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/lsi,zevio-intc.txt
@@ -0,0 +1,18 @@
+TI-NSPIRE interrupt controller
+
+Required properties:
+- compatible: Compatible property value should be "lsi,zevio-intc".
+
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+
+- interrupt-controller : Identifies the node as an interrupt controller
+
+Example:
+
+interrupt-controller {
+ compatible = "lsi,zevio-intc";
+ interrupt-controller;
+ reg = <0xDC000000 0x1000>;
+ #interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-370-xp-mpic.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-370-xp-mpic.txt
new file mode 100644
index 000000000..5fc03134a
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,armada-370-xp-mpic.txt
@@ -0,0 +1,38 @@
+Marvell Armada 370, 375, 38x, XP Interrupt Controller
+-----------------------------------------------------
+
+Required properties:
+- compatible: Should be "marvell,mpic"
+- interrupt-controller: Identifies the node as an interrupt controller.
+- msi-controller: Identifies the node as an PCI Message Signaled
+ Interrupt controller.
+- #interrupt-cells: The number of cells to define the interrupts. Should be 1.
+ The cell is the IRQ number
+
+- reg: Should contain PMIC registers location and length. First pair
+ for the main interrupt registers, second pair for the per-CPU
+ interrupt registers. For this last pair, to be compliant with SMP
+ support, the "virtual" must be use (For the record, these registers
+ automatically map to the interrupt controller registers of the
+ current CPU)
+
+Optional properties:
+
+- interrupts: If defined, then it indicates that this MPIC is
+ connected as a slave to another interrupt controller. This is
+ typically the case on Armada 375 and Armada 38x, where the MPIC is
+ connected as a slave to the Cortex-A9 GIC. The provided interrupt
+ indicate to which GIC interrupt the MPIC output is connected.
+
+Example:
+
+ mpic: interrupt-controller@d0020000 {
+ compatible = "marvell,mpic";
+ #interrupt-cells = <1>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-controller;
+ msi-controller;
+ reg = <0xd0020a00 0x1d0>,
+ <0xd0021070 0x58>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/marvell,orion-intc.txt b/Documentation/devicetree/bindings/interrupt-controller/marvell,orion-intc.txt
new file mode 100644
index 000000000..2c11ac76f
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/marvell,orion-intc.txt
@@ -0,0 +1,48 @@
+Marvell Orion SoC interrupt controllers
+
+* Main interrupt controller
+
+Required properties:
+- compatible: shall be "marvell,orion-intc"
+- reg: base address(es) of interrupt registers starting with CAUSE register
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: number of cells to encode an interrupt source, shall be 1
+
+The interrupt sources map to the corresponding bits in the interrupt
+registers, i.e.
+- 0 maps to bit 0 of first base address,
+- 1 maps to bit 1 of first base address,
+- 32 maps to bit 0 of second base address, and so on.
+
+Example:
+ intc: interrupt-controller {
+ compatible = "marvell,orion-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ /* Dove has 64 first level interrupts */
+ reg = <0x20200 0x10>, <0x20210 0x10>;
+ };
+
+* Bridge interrupt controller
+
+Required properties:
+- compatible: shall be "marvell,orion-bridge-intc"
+- reg: base address of bridge interrupt registers starting with CAUSE register
+- interrupts: bridge interrupt of the main interrupt controller
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: number of cells to encode an interrupt source, shall be 1
+
+Optional properties:
+- marvell,#interrupts: number of interrupts provided by bridge interrupt
+ controller, defaults to 32 if not set
+
+Example:
+ bridge_intc: interrupt-controller {
+ compatible = "marvell,orion-bridge-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x20110 0x8>;
+ interrupts = <0>;
+ /* Dove bridge provides 5 interrupts */
+ marvell,#interrupts = <5>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
new file mode 100644
index 000000000..aae4c384e
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/mips-gic.txt
@@ -0,0 +1,60 @@
+MIPS Global Interrupt Controller (GIC)
+
+The MIPS GIC routes external interrupts to individual VPEs and IRQ pins.
+It also supports local (per-processor) interrupts and software-generated
+interrupts which can be used as IPIs. The GIC also includes a free-running
+global timer, per-CPU count/compare timers, and a watchdog.
+
+Required properties:
+- compatible : Should be "mti,gic".
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt specifier. Should be 3.
+ - The first cell is the type of interrupt, local or shared.
+ See <include/dt-bindings/interrupt-controller/mips-gic.h>.
+ - The second cell is the GIC interrupt number.
+ - The third cell encodes the interrupt flags.
+ See <include/dt-bindings/interrupt-controller/irq.h> for a list of valid
+ flags.
+
+Optional properties:
+- reg : Base address and length of the GIC registers. If not present,
+ the base address reported by the hardware GCR_GIC_BASE will be used.
+- mti,reserved-cpu-vectors : Specifies the list of CPU interrupt vectors
+ to which the GIC may not route interrupts. Valid values are 2 - 7.
+ This property is ignored if the CPU is started in EIC mode.
+
+Required properties for timer sub-node:
+- compatible : Should be "mti,gic-timer".
+- interrupts : Interrupt for the GIC local timer.
+
+Optional properties for timer sub-node:
+- clocks : GIC timer operating clock.
+- clock-frequency : Clock frequency at which the GIC timers operate.
+
+Note that one of clocks or clock-frequency must be specified.
+
+Example:
+
+ gic: interrupt-controller@1bdc0000 {
+ compatible = "mti,gic";
+ reg = <0x1bdc0000 0x20000>;
+
+ interrupt-controller;
+ #interrupt-cells = <3>;
+
+ mti,reserved-cpu-vectors = <7>;
+
+ timer {
+ compatible = "mti,gic-timer";
+ interrupts = <GIC_LOCAL 1 IRQ_TYPE_NONE>;
+ clock-frequency = <50000000>;
+ };
+ };
+
+ uart@18101400 {
+ ...
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SHARED 24 IRQ_TYPE_LEVEL_HIGH>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt b/Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt
new file mode 100644
index 000000000..1099fe078
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/nvidia,tegra-ictlr.txt
@@ -0,0 +1,43 @@
+NVIDIA Legacy Interrupt Controller
+
+All Tegra SoCs contain a legacy interrupt controller that routes
+interrupts to the GIC, and also serves as a wakeup source. It is also
+referred to as "ictlr", hence the name of the binding.
+
+The HW block exposes a number of interrupt controllers, each
+implementing a set of 32 interrupts.
+
+Required properties:
+
+- compatible : should be: "nvidia,tegra<chip>-ictlr". The LIC on
+ subsequent SoCs remained backwards-compatible with Tegra30, so on
+ Tegra generations later than Tegra30 the compatible value should
+ include "nvidia,tegra30-ictlr".
+- reg : Specifies base physical address and size of the registers.
+ Each controller must be described separately (Tegra20 has 4 of them,
+ whereas Tegra30 and later have 5"
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value must be 3.
+- interrupt-parent : a phandle to the GIC these interrupts are routed
+ to.
+
+Notes:
+
+- Because this HW ultimately routes interrupts to the GIC, the
+ interrupt specifier must be that of the GIC.
+- Only SPIs can use the ictlr as an interrupt parent. SGIs and PPIs
+ are explicitly forbidden.
+
+Example:
+
+ ictlr: interrupt-controller@60004000 {
+ compatible = "nvidia,tegra20-ictlr", "nvidia,tegra-ictlr";
+ reg = <0x60004000 64>,
+ <0x60004100 64>,
+ <0x60004200 64>,
+ <0x60004300 64>;
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ interrupt-parent = <&intc>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.txt b/Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.txt
new file mode 100644
index 000000000..55c04faa3
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/opencores,or1k-pic.txt
@@ -0,0 +1,23 @@
+OpenRISC 1000 Programmable Interrupt Controller
+
+Required properties:
+
+- compatible : should be "opencores,or1k-pic-level" for variants with
+ level triggered interrupt lines, "opencores,or1k-pic-edge" for variants with
+ edge triggered interrupt lines or "opencores,or1200-pic" for machines
+ with the non-spec compliant or1200 type implementation.
+
+ "opencores,or1k-pic" is also provided as an alias to "opencores,or1200-pic",
+ but this is only for backwards compatibility.
+
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 1.
+
+Example:
+
+intc: interrupt-controller {
+ compatible = "opencores,or1k-pic-level";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,intc-irqpin.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,intc-irqpin.txt
new file mode 100644
index 000000000..4f7946ae8
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,intc-irqpin.txt
@@ -0,0 +1,27 @@
+DT bindings for the R-/SH-Mobile irqpin controller
+
+Required properties:
+
+- compatible: has to be "renesas,intc-irqpin-<soctype>", "renesas,intc-irqpin"
+ as fallback.
+ Examples with soctypes are:
+ - "renesas,intc-irqpin-r8a7740" (R-Mobile A1)
+ - "renesas,intc-irqpin-r8a7778" (R-Car M1A)
+ - "renesas,intc-irqpin-r8a7779" (R-Car H1)
+ - "renesas,intc-irqpin-sh73a0" (SH-Mobile AG5)
+
+- reg: Base address and length of each register bank used by the external
+ IRQ pins driven by the interrupt controller hardware module. The base
+ addresses, length and number of required register banks varies with soctype.
+
+- #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
+ interrupts.txt in this directory
+
+Optional properties:
+
+- any properties, listed in interrupts.txt, and any standard resource allocation
+ properties
+- sense-bitfield-width: width of a single sense bitfield in the SENSE register,
+ if different from the default 4 bits
+- control-parent: disable and enable interrupts on the parent interrupt
+ controller, needed for some broken implementations
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
new file mode 100644
index 000000000..63633bdea
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt
@@ -0,0 +1,34 @@
+DT bindings for the R-Mobile/R-Car interrupt controller
+
+Required properties:
+
+- compatible: has to be "renesas,irqc-<soctype>", "renesas,irqc" as fallback.
+ Examples with soctypes are:
+ - "renesas,irqc-r8a73a4" (R-Mobile APE6)
+ - "renesas,irqc-r8a7790" (R-Car H2)
+ - "renesas,irqc-r8a7791" (R-Car M2-W)
+ - "renesas,irqc-r8a7792" (R-Car V2H)
+ - "renesas,irqc-r8a7793" (R-Car M2-N)
+ - "renesas,irqc-r8a7794" (R-Car E2)
+- #interrupt-cells: has to be <2>: an interrupt index and flags, as defined in
+ interrupts.txt in this directory
+- clocks: Must contain a reference to the functional clock.
+
+Optional properties:
+
+- any properties, listed in interrupts.txt, and any standard resource allocation
+ properties
+
+Example:
+
+ irqc0: interrupt-controller@e61c0000 {
+ compatible = "renesas,irqc-r8a7790", "renesas,irqc";
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ reg = <0 0xe61c0000 0 0x200>;
+ interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>,
+ <0 1 IRQ_TYPE_LEVEL_HIGH>,
+ <0 2 IRQ_TYPE_LEVEL_HIGH>,
+ <0 3 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp4_clks R8A7790_CLK_IRQC>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/samsung,s3c24xx-irq.txt b/Documentation/devicetree/bindings/interrupt-controller/samsung,s3c24xx-irq.txt
new file mode 100644
index 000000000..c54c5a9a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/samsung,s3c24xx-irq.txt
@@ -0,0 +1,53 @@
+Samsung S3C24XX Interrupt Controllers
+
+The S3C24XX SoCs contain a custom set of interrupt controllers providing a
+varying number of interrupt sources. The set consists of a main- and sub-
+controller and on newer SoCs even a second main controller.
+
+Required properties:
+- compatible: Compatible property value should be "samsung,s3c2410-irq"
+ for machines before s3c2416 and "samsung,s3c2416-irq" for s3c2416 and later.
+
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+
+- interrupt-controller : Identifies the node as an interrupt controller
+
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value shall be 4 and interrupt descriptor shall
+ have the following format:
+ <ctrl_num parent_irq ctrl_irq type>
+
+ ctrl_num contains the controller to use:
+ - 0 ... main controller
+ - 1 ... sub controller
+ - 2 ... second main controller on s3c2416 and s3c2450
+ parent_irq contains the parent bit in the main controller and will be
+ ignored in main controllers
+ ctrl_irq contains the interrupt bit of the controller
+ type contains the trigger type to use
+
+Example:
+
+ interrupt-controller@4a000000 {
+ compatible = "samsung,s3c2410-irq";
+ reg = <0x4a000000 0x100>;
+ interrupt-controller;
+ #interrupt-cells=<4>;
+ };
+
+ [...]
+
+ serial@50000000 {
+ compatible = "samsung,s3c2410-uart";
+ reg = <0x50000000 0x4000>;
+ interrupt-parent = <&subintc>;
+ interrupts = <1 28 0 4>, <1 28 1 4>;
+ };
+
+ rtc@57000000 {
+ compatible = "samsung,s3c2410-rtc";
+ reg = <0x57000000 0x100>;
+ interrupt-parent = <&intc>;
+ interrupts = <0 30 0 3>, <0 8 0 3>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.txt b/Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.txt
new file mode 100644
index 000000000..492911744
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/snps,dw-apb-ictl.txt
@@ -0,0 +1,32 @@
+Synopsys DesignWare APB interrupt controller (dw_apb_ictl)
+
+Synopsys DesignWare provides interrupt controller IP for APB known as
+dw_apb_ictl. The IP is used as secondary interrupt controller in some SoCs with
+APB bus, e.g. Marvell Armada 1500.
+
+Required properties:
+- compatible: shall be "snps,dw-apb-ictl"
+- reg: physical base address of the controller and length of memory mapped
+ region starting with ENABLE_LOW register
+- interrupt-controller: identifies the node as an interrupt controller
+- #interrupt-cells: number of cells to encode an interrupt-specifier, shall be 1
+- interrupts: interrupt reference to primary interrupt controller
+- interrupt-parent: (optional) reference specific primary interrupt controller
+
+The interrupt sources map to the corresponding bits in the interrupt
+registers, i.e.
+- 0 maps to bit 0 of low interrupts,
+- 1 maps to bit 1 of low interrupts,
+- 32 maps to bit 0 of high interrupts,
+- 33 maps to bit 1 of high interrupts,
+- (optional) fast interrupts start at 64.
+
+Example:
+ aic: interrupt-controller@3000 {
+ compatible = "snps,dw-apb-ictl";
+ reg = <0x3000 0xc00>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt b/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt
new file mode 100644
index 000000000..ced601406
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/st,sti-irq-syscfg.txt
@@ -0,0 +1,35 @@
+STMicroelectronics STi System Configuration Controlled IRQs
+-----------------------------------------------------------
+
+On STi based systems; External, CTI (Core Sight), PMU (Performance Management),
+and PL310 L2 Cache IRQs are controlled using System Configuration registers.
+This driver is used to unmask them prior to use.
+
+Required properties:
+- compatible : Should be set to one of:
+ "st,stih415-irq-syscfg"
+ "st,stih416-irq-syscfg"
+ "st,stih407-irq-syscfg"
+ "st,stid127-irq-syscfg"
+- st,syscfg : Phandle to Cortex-A9 IRQ system config registers
+- st,irq-device : Array of IRQs to enable - should be 2 in length
+- st,fiq-device : Array of FIQs to enable - should be 2 in length
+
+Optional properties:
+- st,invert-ext : External IRQs can be inverted at will. This property inverts
+ these IRQs using bitwise logic. A number of defines have been
+ provided for convenience:
+ ST_IRQ_SYSCFG_EXT_1_INV
+ ST_IRQ_SYSCFG_EXT_2_INV
+ ST_IRQ_SYSCFG_EXT_3_INV
+Example:
+
+irq-syscfg {
+ compatible = "st,stih416-irq-syscfg";
+ st,syscfg = <&syscfg_cpu>;
+ st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
+ <ST_IRQ_SYSCFG_PMU_1>;
+ st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
+ <ST_IRQ_SYSCFG_DISABLED>;
+ st,invert-ext = <(ST_IRQ_SYSCFG_EXT_1_INV | ST_IRQ_SYSCFG_EXT_3_INV)>;
+};
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.txt
new file mode 100644
index 000000000..d9bb106bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/ti,keystone-irq.txt
@@ -0,0 +1,36 @@
+Keystone 2 IRQ controller IP
+
+On Keystone SOCs, DSP cores can send interrupts to ARM
+host using the IRQ controller IP. It provides 28 IRQ signals to ARM.
+The IRQ handler running on HOST OS can identify DSP signal source by
+analyzing SRCCx bits in IPCARx registers. This is one of the component
+used by the IPC mechanism used on Keystone SOCs.
+
+Required Properties:
+- compatible: should be "ti,keystone-irq"
+- ti,syscon-dev : phandle and offset pair. The phandle to syscon used to
+ access device control registers and the offset inside
+ device control registers range.
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode interrupt
+ source should be 1.
+- interrupts: interrupt reference to primary interrupt controller
+
+Please refer to interrupts.txt in this directory for details of the common
+Interrupt Controllers bindings used by client devices.
+
+Example:
+ kirq0: keystone_irq0@026202a0 {
+ compatible = "ti,keystone-irq";
+ ti,syscon-dev = <&devctrl 0x2a0>;
+ interrupts = <GIC_SPI 4 IRQ_TYPE_EDGE_RISING>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
+
+ dsp0: dsp0 {
+ compatible = "linux,rproc-user";
+ ...
+ interrupt-parent = <&kirq0>;
+ interrupts = <10 2>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.txt b/Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.txt
new file mode 100644
index 000000000..38ce5d037
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/ti,omap-intc-irq.txt
@@ -0,0 +1,28 @@
+Omap2/3 intc controller
+
+On TI omap2 and 3 the intc interrupt controller can provide
+96 or 128 IRQ signals to the ARM host depending on the SoC.
+
+Required Properties:
+- compatible: should be one of
+ "ti,omap2-intc"
+ "ti,omap3-intc"
+ "ti,dm814-intc"
+ "ti,dm816-intc"
+ "ti,am33xx-intc"
+
+- interrupt-controller : Identifies the node as an interrupt controller
+- #interrupt-cells : Specifies the number of cells needed to encode interrupt
+ source, should be 1 for intc
+- interrupts: interrupt reference to primary interrupt controller
+
+Please refer to interrupts.txt in this directory for details of the common
+Interrupt Controllers bindings used by client devices.
+
+Example:
+ intc: interrupt-controller@48200000 {
+ compatible = "ti,omap3-intc";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x48200000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu b/Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu
new file mode 100644
index 000000000..43effa0a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/ti,omap4-wugen-mpu
@@ -0,0 +1,33 @@
+TI OMAP4 Wake-up Generator
+
+All TI OMAP4/5 (and their derivatives) an interrupt controller that
+routes interrupts to the GIC, and also serves as a wakeup source. It
+is also referred to as "WUGEN-MPU", hence the name of the binding.
+
+Reguired properties:
+
+- compatible : should contain at least "ti,omap4-wugen-mpu" or
+ "ti,omap5-wugen-mpu"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : Specifies the number of cells needed to encode an
+ interrupt source. The value must be 3.
+- interrupt-parent : a phandle to the GIC these interrupts are routed
+ to.
+
+Notes:
+
+- Because this HW ultimately routes interrupts to the GIC, the
+ interrupt specifier must be that of the GIC.
+- Only SPIs can use the WUGEN as an interrupt parent. SGIs and PPIs
+ are explicitly forbiden.
+
+Example:
+
+ wakeupgen: interrupt-controller@48281000 {
+ compatible = "ti,omap5-wugen-mpu", "ti,omap4-wugen-mpu";
+ interrupt-controller;
+ #interrupt-cells = <3>;
+ reg = <0x48281000 0x1000>;
+ interrupt-parent = <&gic>;
+ };
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
new file mode 100644
index 000000000..06760503a
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt
@@ -0,0 +1,71 @@
+* ARM System MMU Architecture Implementation
+
+ARM SoCs may contain an implementation of the ARM System Memory
+Management Unit Architecture, which can be used to provide 1 or 2 stages
+of address translation to bus masters external to the CPU.
+
+The SMMU may also raise interrupts in response to various fault
+conditions.
+
+** System MMU required properties:
+
+- compatible : Should be one of:
+
+ "arm,smmu-v1"
+ "arm,smmu-v2"
+ "arm,mmu-400"
+ "arm,mmu-401"
+ "arm,mmu-500"
+
+ depending on the particular implementation and/or the
+ version of the architecture implemented.
+
+- reg : Base address and size of the SMMU.
+
+- #global-interrupts : The number of global interrupts exposed by the
+ device.
+
+- interrupts : Interrupt list, with the first #global-irqs entries
+ corresponding to the global interrupts and any
+ following entries corresponding to context interrupts,
+ specified in order of their indexing by the SMMU.
+
+ For SMMUv2 implementations, there must be exactly one
+ interrupt per context bank. In the case of a single,
+ combined interrupt, it must be listed multiple times.
+
+- mmu-masters : A list of phandles to device nodes representing bus
+ masters for which the SMMU can provide a translation
+ and their corresponding StreamIDs (see example below).
+ Each device node linked from this list must have a
+ "#stream-id-cells" property, indicating the number of
+ StreamIDs associated with it.
+
+** System MMU optional properties:
+
+- calxeda,smmu-secure-config-access : Enable proper handling of buggy
+ implementations that always use secure access to
+ SMMU configuration registers. In this case non-secure
+ aliases of secure registers have to be used during
+ SMMU configuration.
+
+Example:
+
+ smmu {
+ compatible = "arm,smmu-v1";
+ reg = <0xba5e0000 0x10000>;
+ #global-interrupts = <2>;
+ interrupts = <0 32 4>,
+ <0 33 4>,
+ <0 34 4>, /* This is the first context interrupt */
+ <0 35 4>,
+ <0 36 4>,
+ <0 37 4>;
+
+ /*
+ * Two DMA controllers, the first with two StreamIDs (0xd01d
+ * and 0xd01e) and the second with only one (0xd11c).
+ */
+ mmu-masters = <&dma0 0xd01d 0xd01e>,
+ <&dma1 0xd11c>;
+ };
diff --git a/Documentation/devicetree/bindings/iommu/iommu.txt b/Documentation/devicetree/bindings/iommu/iommu.txt
new file mode 100644
index 000000000..5a8b4624d
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/iommu.txt
@@ -0,0 +1,182 @@
+This document describes the generic device tree binding for IOMMUs and their
+master(s).
+
+
+IOMMU device node:
+==================
+
+An IOMMU can provide the following services:
+
+* Remap address space to allow devices to access physical memory ranges that
+ they otherwise wouldn't be capable of accessing.
+
+ Example: 32-bit DMA to 64-bit physical addresses
+
+* Implement scatter-gather at page level granularity so that the device does
+ not have to.
+
+* Provide system protection against "rogue" DMA by forcing all accesses to go
+ through the IOMMU and faulting when encountering accesses to unmapped
+ address regions.
+
+* Provide address space isolation between multiple contexts.
+
+ Example: Virtualization
+
+Device nodes compatible with this binding represent hardware with some of the
+above capabilities.
+
+IOMMUs can be single-master or multiple-master. Single-master IOMMU devices
+typically have a fixed association to the master device, whereas multiple-
+master IOMMU devices can translate accesses from more than one master.
+
+The device tree node of the IOMMU device's parent bus must contain a valid
+"dma-ranges" property that describes how the physical address space of the
+IOMMU maps to memory. An empty "dma-ranges" property means that there is a
+1:1 mapping from IOMMU to memory.
+
+Required properties:
+--------------------
+- #iommu-cells: The number of cells in an IOMMU specifier needed to encode an
+ address.
+
+The meaning of the IOMMU specifier is defined by the device tree binding of
+the specific IOMMU. Below are a few examples of typical use-cases:
+
+- #iommu-cells = <0>: Single master IOMMU devices are not configurable and
+ therefore no additional information needs to be encoded in the specifier.
+ This may also apply to multiple master IOMMU devices that do not allow the
+ association of masters to be configured. Note that an IOMMU can by design
+ be multi-master yet only expose a single master in a given configuration.
+ In such cases the number of cells will usually be 1 as in the next case.
+- #iommu-cells = <1>: Multiple master IOMMU devices may need to be configured
+ in order to enable translation for a given master. In such cases the single
+ address cell corresponds to the master device's ID. In some cases more than
+ one cell can be required to represent a single master ID.
+- #iommu-cells = <4>: Some IOMMU devices allow the DMA window for masters to
+ be configured. The first cell of the address in this may contain the master
+ device's ID for example, while the second cell could contain the start of
+ the DMA window for the given device. The length of the DMA window is given
+ by the third and fourth cells.
+
+Note that these are merely examples and real-world use-cases may use different
+definitions to represent their individual needs. Always refer to the specific
+IOMMU binding for the exact meaning of the cells that make up the specifier.
+
+
+IOMMU master node:
+==================
+
+Devices that access memory through an IOMMU are called masters. A device can
+have multiple master interfaces (to one or more IOMMU devices).
+
+Required properties:
+--------------------
+- iommus: A list of phandle and IOMMU specifier pairs that describe the IOMMU
+ master interfaces of the device. One entry in the list describes one master
+ interface of the device.
+
+When an "iommus" property is specified in a device tree node, the IOMMU will
+be used for address translation. If a "dma-ranges" property exists in the
+device's parent node it will be ignored. An exception to this rule is if the
+referenced IOMMU is disabled, in which case the "dma-ranges" property of the
+parent shall take effect. Note that merely disabling a device tree node does
+not guarantee that the IOMMU is really disabled since the hardware may not
+have a means to turn off translation. But it is invalid in such cases to
+disable the IOMMU's device tree node in the first place because it would
+prevent any driver from properly setting up the translations.
+
+
+Notes:
+======
+
+One possible extension to the above is to use an "iommus" property along with
+a "dma-ranges" property in a bus device node (such as PCI host bridges). This
+can be useful to describe how children on the bus relate to the IOMMU if they
+are not explicitly listed in the device tree (e.g. PCI devices). However, the
+requirements of that use-case haven't been fully determined yet. Implementing
+this is therefore not recommended without further discussion and extension of
+this binding.
+
+
+Examples:
+=========
+
+Single-master IOMMU:
+--------------------
+
+ iommu {
+ #iommu-cells = <0>;
+ };
+
+ master {
+ iommus = <&{/iommu}>;
+ };
+
+Multiple-master IOMMU with fixed associations:
+----------------------------------------------
+
+ /* multiple-master IOMMU */
+ iommu {
+ /*
+ * Masters are statically associated with this IOMMU and share
+ * the same address translations because the IOMMU does not
+ * have sufficient information to distinguish between masters.
+ *
+ * Consequently address translation is always on or off for
+ * all masters at any given point in time.
+ */
+ #iommu-cells = <0>;
+ };
+
+ /* static association with IOMMU */
+ master@1 {
+ reg = <1>;
+ iommus = <&{/iommu}>;
+ };
+
+ /* static association with IOMMU */
+ master@2 {
+ reg = <2>;
+ iommus = <&{/iommu}>;
+ };
+
+Multiple-master IOMMU:
+----------------------
+
+ iommu {
+ /* the specifier represents the ID of the master */
+ #iommu-cells = <1>;
+ };
+
+ master@1 {
+ /* device has master ID 42 in the IOMMU */
+ iommus = <&{/iommu} 42>;
+ };
+
+ master@2 {
+ /* device has master IDs 23 and 24 in the IOMMU */
+ iommus = <&{/iommu} 23>, <&{/iommu} 24>;
+ };
+
+Multiple-master IOMMU with configurable DMA window:
+---------------------------------------------------
+
+ / {
+ iommu {
+ /*
+ * One cell for the master ID and one cell for the
+ * address of the DMA window. The length of the DMA
+ * window is encoded in two cells.
+ *
+ * The DMA window is the range addressable by the
+ * master (i.e. the I/O virtual address space).
+ */
+ #iommu-cells = <4>;
+ };
+
+ master {
+ /* master ID 42, 4 GiB DMA window starting at 0 */
+ iommus = <&{/iommu} 42 0 0x1 0x0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt
new file mode 100644
index 000000000..099d9362e
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt
@@ -0,0 +1,14 @@
+NVIDIA Tegra 20 GART
+
+Required properties:
+- compatible: "nvidia,tegra20-gart"
+- reg: Two pairs of cells specifying the physical address and size of
+ the memory controller registers and the GART aperture respectively.
+
+Example:
+
+ gart {
+ compatible = "nvidia,tegra20-gart";
+ reg = <0x7000f024 0x00000018 /* controller registers */
+ 0x58000000 0x02000000>; /* GART aperture */
+ };
diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
new file mode 100644
index 000000000..89fb5434b
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/nvidia,tegra30-smmu.txt
@@ -0,0 +1,21 @@
+NVIDIA Tegra 30 IOMMU H/W, SMMU (System Memory Management Unit)
+
+Required properties:
+- compatible : "nvidia,tegra30-smmu"
+- reg : Should contain 3 register banks(address and length) for each
+ of the SMMU register blocks.
+- interrupts : Should contain MC General interrupt.
+- nvidia,#asids : # of ASIDs
+- dma-window : IOVA start address and length.
+- nvidia,ahb : phandle to the ahb bus connected to SMMU.
+
+Example:
+ smmu {
+ compatible = "nvidia,tegra30-smmu";
+ reg = <0x7000f010 0x02c
+ 0x7000f1f0 0x010
+ 0x7000f228 0x05c>;
+ nvidia,#asids = <4>; /* # of ASIDs */
+ dma-window = <0 0x40000000>; /* IOVA start & length */
+ nvidia,ahb = <&ahb>;
+ };
diff --git a/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
new file mode 100644
index 000000000..cd29083e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/renesas,ipmmu-vmsa.txt
@@ -0,0 +1,41 @@
+* Renesas VMSA-Compatible IOMMU
+
+The IPMMU is an IOMMU implementation compatible with the ARM VMSA page tables.
+It provides address translation for bus masters outside of the CPU, each
+connected to the IPMMU through a port called micro-TLB.
+
+
+Required Properties:
+
+ - compatible: Must contain "renesas,ipmmu-vmsa".
+ - reg: Base address and size of the IPMMU registers.
+ - interrupts: Specifiers for the MMU fault interrupts. For instances that
+ support secure mode two interrupts must be specified, for non-secure and
+ secure mode, in that order. For instances that don't support secure mode a
+ single interrupt must be specified.
+
+ - #iommu-cells: Must be 1.
+
+Each bus master connected to an IPMMU must reference the IPMMU in its device
+node with the following property:
+
+ - iommus: A reference to the IPMMU in two cells. The first cell is a phandle
+ to the IPMMU and the second cell the number of the micro-TLB that the
+ device is connected to.
+
+
+Example: R8A7791 IPMMU-MX and VSP1-D0 bus master
+
+ ipmmu_mx: mmu@fe951000 {
+ compatible = "renasas,ipmmu-vmsa";
+ reg = <0 0xfe951000 0 0x1000>;
+ interrupts = <0 222 IRQ_TYPE_LEVEL_HIGH>,
+ <0 221 IRQ_TYPE_LEVEL_HIGH>;
+ #iommu-cells = <1>;
+ };
+
+ vsp1@fe928000 {
+ ...
+ iommus = <&ipmmu_mx 13>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
new file mode 100644
index 000000000..9a55ac373
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/rockchip,iommu.txt
@@ -0,0 +1,26 @@
+Rockchip IOMMU
+==============
+
+A Rockchip DRM iommu translates io virtual addresses to physical addresses for
+its master device. Each slave device is bound to a single master device, and
+shares its clocks, power domain and irq.
+
+Required properties:
+- compatible : Should be "rockchip,iommu"
+- reg : Address space for the configuration registers
+- interrupts : Interrupt specifier for the IOMMU instance
+- interrupt-names : Interrupt name for the IOMMU instance
+- #iommu-cells : Should be <0>. This indicates the iommu is a
+ "single-master" device, and needs no additional information
+ to associate with its master device. See:
+ Documentation/devicetree/bindings/iommu/iommu.txt
+
+Example:
+
+ vopl_mmu: iommu@ff940300 {
+ compatible = "rockchip,iommu";
+ reg = <0xff940300 0x100>;
+ interrupts = <GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "vopl_mmu";
+ #iommu-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt
new file mode 100644
index 000000000..729543c47
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/samsung,sysmmu.txt
@@ -0,0 +1,70 @@
+Samsung Exynos IOMMU H/W, System MMU (System Memory Management Unit)
+
+Samsung's Exynos architecture contains System MMUs that enables scattered
+physical memory chunks visible as a contiguous region to DMA-capable peripheral
+devices like MFC, FIMC, FIMD, GScaler, FIMC-IS and so forth.
+
+System MMU is an IOMMU and supports identical translation table format to
+ARMv7 translation tables with minimum set of page properties including access
+permissions, shareability and security protection. In addition, System MMU has
+another capabilities like L2 TLB or block-fetch buffers to minimize translation
+latency.
+
+System MMUs are in many to one relation with peripheral devices, i.e. single
+peripheral device might have multiple System MMUs (usually one for each bus
+master), but one System MMU can handle transactions from only one peripheral
+device. The relation between a System MMU and the peripheral device needs to be
+defined in device node of the peripheral device.
+
+MFC in all Exynos SoCs and FIMD, M2M Scalers and G2D in Exynos5420 has 2 System
+MMUs.
+* MFC has one System MMU on its left and right bus.
+* FIMD in Exynos5420 has one System MMU for window 0 and 4, the other system MMU
+ for window 1, 2 and 3.
+* M2M Scalers and G2D in Exynos5420 has one System MMU on the read channel and
+ the other System MMU on the write channel.
+The drivers must consider how to handle those System MMUs. One of the idea is
+to implement child devices or sub-devices which are the client devices of the
+System MMU.
+
+Note:
+The current DT binding for the Exynos System MMU is incomplete.
+The following properties can be removed or changed, if found incompatible with
+the "Generic IOMMU Binding" support for attaching devices to the IOMMU.
+
+Required properties:
+- compatible: Should be "samsung,exynos-sysmmu"
+- reg: A tuple of base address and size of System MMU registers.
+- interrupt-parent: The phandle of the interrupt controller of System MMU
+- interrupts: An interrupt specifier for interrupt signal of System MMU,
+ according to the format defined by a particular interrupt
+ controller.
+- clock-names: Should be "sysmmu" if the System MMU is needed to gate its clock.
+ Optional "master" if the clock to the System MMU is gated by
+ another gate clock other than "sysmmu".
+ Exynos4 SoCs, there needs no "master" clock.
+ Exynos5 SoCs, some System MMUs must have "master" clocks.
+- clocks: Required if the System MMU is needed to gate its clock.
+- power-domains: Required if the System MMU is needed to gate its power.
+ Please refer to the following document:
+ Documentation/devicetree/bindings/arm/exynos/power_domain.txt
+
+Examples:
+ gsc_0: gsc@13e00000 {
+ compatible = "samsung,exynos5-gsc";
+ reg = <0x13e00000 0x1000>;
+ interrupts = <0 85 0>;
+ power-domains = <&pd_gsc>;
+ clocks = <&clock CLK_GSCL0>;
+ clock-names = "gscl";
+ };
+
+ sysmmu_gsc0: sysmmu@13E80000 {
+ compatible = "samsung,exynos-sysmmu";
+ reg = <0x13E80000 0x1000>;
+ interrupt-parent = <&combiner>;
+ interrupts = <2 0>;
+ clock-names = "sysmmu", "master";
+ clocks = <&clock CLK_SMMU_GSCL0>, <&clock CLK_GSCL0>;
+ power-domains = <&pd_gsc>;
+ };
diff --git a/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt
new file mode 100644
index 000000000..42531dc38
--- /dev/null
+++ b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt
@@ -0,0 +1,26 @@
+OMAP2+ IOMMU
+
+Required properties:
+- compatible : Should be one of,
+ "ti,omap2-iommu" for OMAP2/OMAP3 IOMMU instances
+ "ti,omap4-iommu" for OMAP4/OMAP5 IOMMU instances
+ "ti,dra7-iommu" for DRA7xx IOMMU instances
+- ti,hwmods : Name of the hwmod associated with the IOMMU instance
+- reg : Address space for the configuration registers
+- interrupts : Interrupt specifier for the IOMMU instance
+
+Optional properties:
+- ti,#tlb-entries : Number of entries in the translation look-aside buffer.
+ Should be either 8 or 32 (default: 32)
+- ti,iommu-bus-err-back : Indicates the IOMMU instance supports throwing
+ back a bus error response on MMU faults.
+
+Example:
+ /* OMAP3 ISP MMU */
+ mmu_isp: mmu@480bd400 {
+ compatible = "ti,omap2-iommu";
+ reg = <0x480bd400 0x80>;
+ interrupts = <24>;
+ ti,hwmods = "mmu_isp";
+ ti,#tlb-entries = <8>;
+ };
diff --git a/Documentation/devicetree/bindings/leds/common.txt b/Documentation/devicetree/bindings/leds/common.txt
new file mode 100644
index 000000000..747c53805
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/common.txt
@@ -0,0 +1,55 @@
+Common leds properties.
+
+LED and flash LED devices provide the same basic functionality as current
+regulators, but extended with LED and flash LED specific features like
+blinking patterns, flash timeout, flash faults and external flash strobe mode.
+
+Many LED devices expose more than one current output that can be connected
+to one or more discrete LED component. Since the arrangement of connections
+can influence the way of the LED device initialization, the LED components
+have to be tightly coupled with the LED device binding. They are represented
+by child nodes of the parent LED device binding.
+
+Optional properties for child nodes:
+- led-sources : List of device current outputs the LED is connected to. The
+ outputs are identified by the numbers that must be defined
+ in the LED device binding documentation.
+- label : The label for this LED. If omitted, the label is taken from the node
+ name (excluding the unit address). It has to uniquely identify
+ a device, i.e. no other LED class device can be assigned the same
+ label.
+
+- linux,default-trigger : This parameter, if present, is a
+ string defining the trigger assigned to the LED. Current triggers are:
+ "backlight" - LED will act as a back-light, controlled by the framebuffer
+ system
+ "default-on" - LED will turn on (but for leds-gpio see "default-state"
+ property in Documentation/devicetree/bindings/gpio/led.txt)
+ "heartbeat" - LED "double" flashes at a load average based rate
+ "ide-disk" - LED indicates disk activity
+ "timer" - LED flashes at a fixed, configurable rate
+
+- max-microamp : maximum intensity in microamperes of the LED
+ (torch LED for flash devices)
+- flash-max-microamp : maximum intensity in microamperes of the
+ flash LED; it is mandatory if the LED should
+ support the flash mode
+- flash-timeout-us : timeout in microseconds after which the flash
+ LED is turned off
+
+
+Examples:
+
+system-status {
+ label = "Status";
+ linux,default-trigger = "heartbeat";
+ ...
+};
+
+camera-flash {
+ label = "Flash";
+ led-sources = <0>, <1>;
+ max-microamp = <50000>;
+ flash-max-microamp = <320000>;
+ flash-timeout-us = <500000>;
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-gpio.txt b/Documentation/devicetree/bindings/leds/leds-gpio.txt
new file mode 100644
index 000000000..fea1ebfe2
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-gpio.txt
@@ -0,0 +1,66 @@
+LEDs connected to GPIO lines
+
+Required properties:
+- compatible : should be "gpio-leds".
+
+Each LED is represented as a sub-node of the gpio-leds device. Each
+node's name represents the name of the corresponding LED.
+
+LED sub-node properties:
+- gpios : Should specify the LED's GPIO, see "gpios property" in
+ Documentation/devicetree/bindings/gpio/gpio.txt. Active low LEDs should be
+ indicated using flags in the GPIO specifier.
+- label : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+- default-state: (optional) The initial state of the LED. Valid
+ values are "on", "off", and "keep". If the LED is already on or off
+ and the default-state property is set the to same value, then no
+ glitch should be produced where the LED momentarily turns off (or
+ on). The "keep" setting will keep the LED at whatever its current
+ state is, without producing a glitch. The default is off if this
+ property is not present.
+- retain-state-suspended: (optional) The suspend state can be retained.Such
+ as charge-led gpio.
+
+Examples:
+
+#include <dt-bindings/gpio/gpio.h>
+
+leds {
+ compatible = "gpio-leds";
+ hdd {
+ label = "IDE Activity";
+ gpios = <&mcu_pio 0 GPIO_ACTIVE_LOW>;
+ linux,default-trigger = "ide-disk";
+ };
+
+ fault {
+ gpios = <&mcu_pio 1 GPIO_ACTIVE_HIGH>;
+ /* Keep LED on if BIOS detected hardware fault */
+ default-state = "keep";
+ };
+};
+
+run-control {
+ compatible = "gpio-leds";
+ red {
+ gpios = <&mpc8572 6 GPIO_ACTIVE_HIGH>;
+ default-state = "off";
+ };
+ green {
+ gpios = <&mpc8572 7 GPIO_ACTIVE_HIGH>;
+ default-state = "on";
+ };
+};
+
+leds {
+ compatible = "gpio-leds";
+
+ charger-led {
+ gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+ linux,default-trigger = "max8903-charger-charging";
+ retain-state-suspended;
+ };
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-lp55xx.txt b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
new file mode 100644
index 000000000..1b66a413f
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lp55xx.txt
@@ -0,0 +1,228 @@
+Binding for TI/National Semiconductor LP55xx Led Drivers
+
+Required properties:
+- compatible: one of
+ national,lp5521
+ national,lp5523
+ ti,lp55231
+ ti,lp5562
+ ti,lp8501
+
+- reg: I2C slave address
+- clock-mode: Input clock mode, (0: automode, 1: internal, 2: external)
+
+Each child has own specific current settings
+- led-cur: Current setting at each led channel (mA x10, 0 if led is not connected)
+- max-cur: Maximun current at each led channel.
+
+Optional properties:
+- enable-gpio: GPIO attached to the chip's enable pin
+- label: Used for naming LEDs
+- pwr-sel: LP8501 specific property. Power selection for output channels.
+ 0: D1~9 are connected to VDD
+ 1: D1~6 with VDD, D7~9 with VOUT
+ 2: D1~6 with VOUT, D7~9 with VDD
+ 3: D1~9 are connected to VOUT
+
+Alternatively, each child can have a specific channel name and trigger:
+- chan-name (optional): name of channel
+- linux,default-trigger (optional): see
+ Documentation/devicetree/bindings/leds/common.txt
+
+example 1) LP5521
+3 LED channels, external clock used. Channel names are 'lp5521_pri:channel0',
+'lp5521_pri:channel1' and 'lp5521_pri:channel2', with a heartbeat trigger
+on channel 0.
+
+lp5521@32 {
+ compatible = "national,lp5521";
+ reg = <0x32>;
+ label = "lp5521_pri";
+ clock-mode = /bits/ 8 <2>;
+
+ chan0 {
+ led-cur = /bits/ 8 <0x2f>;
+ max-cur = /bits/ 8 <0x5f>;
+ linux,default-trigger = "heartbeat";
+ };
+
+ chan1 {
+ led-cur = /bits/ 8 <0x2f>;
+ max-cur = /bits/ 8 <0x5f>;
+ };
+
+ chan2 {
+ led-cur = /bits/ 8 <0x2f>;
+ max-cur = /bits/ 8 <0x5f>;
+ };
+};
+
+example 2) LP5523
+9 LED channels with specific name. Internal clock used.
+The I2C slave address is configurable with ASEL1 and ASEL0 pins.
+Available addresses are 32/33/34/35h.
+
+ASEL1 ASEL0 Address
+-------------------------
+ GND GND 32h
+ GND VEN 33h
+ VEN GND 34h
+ VEN VEN 35h
+
+lp5523@32 {
+ compatible = "national,lp5523";
+ reg = <0x32>;
+ clock-mode = /bits/ 8 <1>;
+
+ chan0 {
+ chan-name = "d1";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan1 {
+ chan-name = "d2";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan2 {
+ chan-name = "d3";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan3 {
+ chan-name = "d4";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan4 {
+ chan-name = "d5";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan5 {
+ chan-name = "d6";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan6 {
+ chan-name = "d7";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan7 {
+ chan-name = "d8";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan8 {
+ chan-name = "d9";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+};
+
+example 3) LP5562
+4 channels are defined.
+
+lp5562@30 {
+ compatible = "ti,lp5562";
+ reg = <0x30>;
+ clock-mode = /bits/8 <2>;
+
+ chan0 {
+ chan-name = "R";
+ led-cur = /bits/ 8 <0x20>;
+ max-cur = /bits/ 8 <0x60>;
+ };
+
+ chan1 {
+ chan-name = "G";
+ led-cur = /bits/ 8 <0x20>;
+ max-cur = /bits/ 8 <0x60>;
+ };
+
+ chan2 {
+ chan-name = "B";
+ led-cur = /bits/ 8 <0x20>;
+ max-cur = /bits/ 8 <0x60>;
+ };
+
+ chan3 {
+ chan-name = "W";
+ led-cur = /bits/ 8 <0x20>;
+ max-cur = /bits/ 8 <0x60>;
+ };
+};
+
+example 4) LP8501
+9 channels are defined. The 'pwr-sel' is LP8501 specific property.
+Others are same as LP5523.
+
+lp8501@32 {
+ compatible = "ti,lp8501";
+ reg = <0x32>;
+ clock-mode = /bits/ 8 <2>;
+ pwr-sel = /bits/ 8 <3>; /* D1~9 connected to VOUT */
+
+ chan0 {
+ chan-name = "d1";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan1 {
+ chan-name = "d2";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan2 {
+ chan-name = "d3";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan3 {
+ chan-name = "d4";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan4 {
+ chan-name = "d5";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan5 {
+ chan-name = "d6";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan6 {
+ chan-name = "d7";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan7 {
+ chan-name = "d8";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+
+ chan8 {
+ chan-name = "d9";
+ led-cur = /bits/ 8 <0x14>;
+ max-cur = /bits/ 8 <0x20>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-lp8860.txt b/Documentation/devicetree/bindings/leds/leds-lp8860.txt
new file mode 100644
index 000000000..aad38dd94
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lp8860.txt
@@ -0,0 +1,29 @@
+* Texas Instruments - lp8860 4-Channel LED Driver
+
+The LP8860-Q1 is an high-efficiency LED
+driver with boost controller. It has 4 high-precision
+current sinks that can be controlled by a PWM input
+signal, a SPI/I2C master, or both.
+
+Required properties:
+ - compatible:
+ "ti,lp8860"
+ - reg - I2C slave address
+ - label - Used for naming LEDs
+
+Optional properties:
+ - enable-gpio - gpio pin to enable/disable the device.
+ - supply - "vled" - LED supply
+
+Example:
+
+leds: leds@6 {
+ compatible = "ti,lp8860";
+ reg = <0x2d>;
+ label = "display_cluster";
+ enable-gpio = <&gpio1 28 GPIO_ACTIVE_HIGH>;
+ vled-supply = <&vbatt>;
+}
+
+For more product information please see the link below:
+http://www.ti.com/product/lp8860-q1
diff --git a/Documentation/devicetree/bindings/leds/leds-ns2.txt b/Documentation/devicetree/bindings/leds/leds-ns2.txt
new file mode 100644
index 000000000..aef3aca34
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-ns2.txt
@@ -0,0 +1,26 @@
+Binding for dual-GPIO LED found on Network Space v2 (and parents).
+
+Required properties:
+- compatible: "lacie,ns2-leds".
+
+Each LED is represented as a sub-node of the ns2-leds device.
+
+Required sub-node properties:
+- cmd-gpio: Command LED GPIO. See OF device-tree GPIO specification.
+- slow-gpio: Slow LED GPIO. See OF device-tree GPIO specification.
+
+Optional sub-node properties:
+- label: Name for this LED. If omitted, the label is taken from the node name.
+- linux,default-trigger: Trigger assigned to the LED.
+
+Example:
+
+ns2-leds {
+ compatible = "lacie,ns2-leds";
+
+ blue-sata {
+ label = "ns2:blue:sata";
+ slow-gpio = <&gpio0 29 0>;
+ cmd-gpio = <&gpio0 30 0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-pm8941-wled.txt b/Documentation/devicetree/bindings/leds/leds-pm8941-wled.txt
new file mode 100644
index 000000000..a85a964d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-pm8941-wled.txt
@@ -0,0 +1,43 @@
+Binding for Qualcomm PM8941 WLED driver
+
+Required properties:
+- compatible: should be "qcom,pm8941-wled"
+- reg: slave address
+
+Optional properties:
+- label: The label for this led
+ See Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger: Default trigger assigned to the LED
+ See Documentation/devicetree/bindings/leds/common.txt
+- qcom,cs-out: bool; enable current sink output
+- qcom,cabc: bool; enable content adaptive backlight control
+- qcom,ext-gen: bool; use externally generated modulator signal to dim
+- qcom,current-limit: mA; per-string current limit; value from 0 to 25
+ default: 20mA
+- qcom,current-boost-limit: mA; boost current limit; one of:
+ 105, 385, 525, 805, 980, 1260, 1400, 1680
+ default: 805mA
+- qcom,switching-freq: kHz; switching frequency; one of:
+ 600, 640, 685, 738, 800, 872, 960, 1066, 1200, 1371,
+ 1600, 1920, 2400, 3200, 4800, 9600,
+ default: 1600kHz
+- qcom,ovp: V; Over-voltage protection limit; one of:
+ 27, 29, 32, 35
+ default: 29V
+- qcom,num-strings: #; number of led strings attached; value from 1 to 3
+ default: 2
+
+Example:
+
+pm8941-wled@d800 {
+ compatible = "qcom,pm8941-wled";
+ reg = <0xd800>;
+ label = "backlight";
+
+ qcom,cs-out;
+ qcom,current-limit = <20>;
+ qcom,current-boost-limit = <805>;
+ qcom,switching-freq = <1600>;
+ qcom,ovp = <29>;
+ qcom,num-strings = <2>;
+};
diff --git a/Documentation/devicetree/bindings/leds/leds-pwm.txt b/Documentation/devicetree/bindings/leds/leds-pwm.txt
new file mode 100644
index 000000000..6c6583c35
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-pwm.txt
@@ -0,0 +1,50 @@
+LED connected to PWM
+
+Required properties:
+- compatible : should be "pwm-leds".
+
+Each LED is represented as a sub-node of the pwm-leds device. Each
+node's name represents the name of the corresponding LED.
+
+LED sub-node properties:
+- pwms : PWM property to point to the PWM device (phandle)/port (id) and to
+ specify the period time to be used: <&phandle id period_ns>;
+- pwm-names : (optional) Name to be used by the PWM subsystem for the PWM device
+ For the pwms and pwm-names property please refer to:
+ Documentation/devicetree/bindings/pwm/pwm.txt
+- max-brightness : Maximum brightness possible for the LED
+- active-low : (optional) For PWMs where the LED is wired to supply
+ rather than ground.
+- label : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+
+Example:
+
+twl_pwm: pwm {
+ /* provides two PWMs (id 0, 1 for PWM1 and PWM2) */
+ compatible = "ti,twl6030-pwm";
+ #pwm-cells = <2>;
+};
+
+twl_pwmled: pwmled {
+ /* provides one PWM (id 0 for Charing indicator LED) */
+ compatible = "ti,twl6030-pwmled";
+ #pwm-cells = <2>;
+};
+
+pwmleds {
+ compatible = "pwm-leds";
+ kpad {
+ label = "omap4::keypad";
+ pwms = <&twl_pwm 0 7812500>;
+ max-brightness = <127>;
+ };
+
+ charging {
+ label = "omap4:green:chrg";
+ pwms = <&twl_pwmled 0 7812500>;
+ max-brightness = <255>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/leds/pca963x.txt b/Documentation/devicetree/bindings/leds/pca963x.txt
new file mode 100644
index 000000000..dafbe9931
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/pca963x.txt
@@ -0,0 +1,48 @@
+LEDs connected to pca9632, pca9633 or pca9634
+
+Required properties:
+- compatible : should be : "nxp,pca9632", "nxp,pca9633", "nxp,pca9634" or "nxp,pca9635"
+
+Optional properties:
+- nxp,totem-pole : use totem pole (push-pull) instead of open-drain (pca9632 defaults
+ to open-drain, newer chips to totem pole)
+- nxp,hw-blink : use hardware blinking instead of software blinking
+
+Each led is represented as a sub-node of the nxp,pca963x device.
+
+LED sub-node properties:
+- label : (optional) see Documentation/devicetree/bindings/leds/common.txt
+- reg : number of LED line (could be from 0 to 3 in pca9632 or pca9633,
+ 0 to 7 in pca9634, or 0 to 15 in pca9635)
+- linux,default-trigger : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+
+Examples:
+
+pca9632: pca9632 {
+ compatible = "nxp,pca9632";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x62>;
+
+ red@0 {
+ label = "red";
+ reg = <0>;
+ linux,default-trigger = "none";
+ };
+ green@1 {
+ label = "green";
+ reg = <1>;
+ linux,default-trigger = "none";
+ };
+ blue@2 {
+ label = "blue";
+ reg = <2>;
+ linux,default-trigger = "none";
+ };
+ unused@3 {
+ label = "unused";
+ reg = <3>;
+ linux,default-trigger = "none";
+ };
+};
diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.txt b/Documentation/devicetree/bindings/leds/register-bit-led.txt
new file mode 100644
index 000000000..379cefdc0
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/register-bit-led.txt
@@ -0,0 +1,99 @@
+Device Tree Bindings for Register Bit LEDs
+
+Register bit leds are used with syscon multifunctional devices
+where single bits in a certain register can turn on/off a
+single LED. The register bit LEDs appear as children to the
+syscon device, with the proper compatible string. For the
+syscon bindings see:
+Documentation/devicetree/bindings/mfd/syscon.txt
+
+Each LED is represented as a sub-node of the syscon device. Each
+node's name represents the name of the corresponding LED.
+
+LED sub-node properties:
+
+Required properties:
+- compatible : must be "register-bit-led"
+- offset : register offset to the register controlling this LED
+- mask : bit mask for the bit controlling this LED in the register
+ typically 0x01, 0x02, 0x04 ...
+
+Optional properties:
+- label : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+- linux,default-trigger : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+- default-state: (optional) The initial state of the LED. Valid
+ values are "on", "off", and "keep". If the LED is already on or off
+ and the default-state property is set the to same value, then no
+ glitch should be produced where the LED momentarily turns off (or
+ on). The "keep" setting will keep the LED at whatever its current
+ state is, without producing a glitch. The default is off if this
+ property is not present.
+
+Example:
+
+syscon: syscon@10000000 {
+ compatible = "arm,realview-pb1176-syscon", "syscon";
+ reg = <0x10000000 0x1000>;
+
+ led@08.0 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x01>;
+ label = "versatile:0";
+ linux,default-trigger = "heartbeat";
+ default-state = "on";
+ };
+ led@08.1 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x02>;
+ label = "versatile:1";
+ linux,default-trigger = "mmc0";
+ default-state = "off";
+ };
+ led@08.2 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x04>;
+ label = "versatile:2";
+ linux,default-trigger = "cpu0";
+ default-state = "off";
+ };
+ led@08.3 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x08>;
+ label = "versatile:3";
+ default-state = "off";
+ };
+ led@08.4 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x10>;
+ label = "versatile:4";
+ default-state = "off";
+ };
+ led@08.5 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x20>;
+ label = "versatile:5";
+ default-state = "off";
+ };
+ led@08.6 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x40>;
+ label = "versatile:6";
+ default-state = "off";
+ };
+ led@08.7 {
+ compatible = "register-bit-led";
+ offset = <0x08>;
+ mask = <0x80>;
+ label = "versatile:7";
+ default-state = "off";
+ };
+};
diff --git a/Documentation/devicetree/bindings/leds/tca6507.txt b/Documentation/devicetree/bindings/leds/tca6507.txt
new file mode 100644
index 000000000..bad910279
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/tca6507.txt
@@ -0,0 +1,49 @@
+LEDs connected to tca6507
+
+Required properties:
+- compatible : should be : "ti,tca6507".
+- #address-cells: must be 1
+- #size-cells: must be 0
+- reg: typically 0x45.
+
+Optional properties:
+- gpio-controller: allows lines to be used as output-only GPIOs.
+- #gpio-cells: if present, must not be 0.
+
+Each led is represented as a sub-node of the ti,tca6507 device.
+
+LED sub-node properties:
+- label : (optional) see Documentation/devicetree/bindings/leds/common.txt
+- reg : number of LED line (could be from 0 to 6)
+- linux,default-trigger : (optional)
+ see Documentation/devicetree/bindings/leds/common.txt
+- compatible: either "led" (the default) or "gpio".
+
+Examples:
+
+tca6507@45 {
+ compatible = "ti,tca6507";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x45>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ led0: red-aux@0 {
+ label = "red:aux";
+ reg = <0x0>;
+ };
+
+ led1: green-aux@1 {
+ label = "green:aux";
+ reg = <0x5>;
+ linux,default-trigger = "default-on";
+ };
+
+ wifi-reset@6 {
+ reg = <0x6>;
+ compatible = "gpio";
+ };
+};
+
diff --git a/Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt b/Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt
new file mode 100644
index 000000000..9ceb19e0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/lpddr2/lpddr2-timings.txt
@@ -0,0 +1,52 @@
+* AC timing parameters of LPDDR2(JESD209-2) memories for a given speed-bin
+
+Required properties:
+- compatible : Should be "jedec,lpddr2-timings"
+- min-freq : minimum DDR clock frequency for the speed-bin. Type is <u32>
+- max-freq : maximum DDR clock frequency for the speed-bin. Type is <u32>
+
+Optional properties:
+
+The following properties represent AC timing parameters from the memory
+data-sheet of the device for a given speed-bin. All these properties are
+of type <u32> and the default unit is ps (pico seconds). Parameters with
+a different unit have a suffix indicating the unit such as 'tRAS-max-ns'
+- tRCD
+- tWR
+- tRAS-min
+- tRRD
+- tWTR
+- tXP
+- tRTP
+- tDQSCK-max
+- tFAW
+- tZQCS
+- tZQinit
+- tRPab
+- tZQCL
+- tCKESR
+- tRAS-max-ns
+- tDQSCK-max-derated
+
+Example:
+
+timings_elpida_ECB240ABACN_400mhz: lpddr2-timings@0 {
+ compatible = "jedec,lpddr2-timings";
+ min-freq = <10000000>;
+ max-freq = <400000000>;
+ tRPab = <21000>;
+ tRCD = <18000>;
+ tWR = <15000>;
+ tRAS-min = <42000>;
+ tRRD = <10000>;
+ tWTR = <7500>;
+ tXP = <7500>;
+ tRTP = <7500>;
+ tCKESR = <15000>;
+ tDQSCK-max = <5500>;
+ tFAW = <50000>;
+ tZQCS = <90000>;
+ tZQCL = <360000>;
+ tZQinit = <1000000>;
+ tRAS-max-ns = <70000>;
+};
diff --git a/Documentation/devicetree/bindings/lpddr2/lpddr2.txt b/Documentation/devicetree/bindings/lpddr2/lpddr2.txt
new file mode 100644
index 000000000..58354a075
--- /dev/null
+++ b/Documentation/devicetree/bindings/lpddr2/lpddr2.txt
@@ -0,0 +1,102 @@
+* LPDDR2 SDRAM memories compliant to JEDEC JESD209-2
+
+Required properties:
+- compatible : Should be one of - "jedec,lpddr2-nvm", "jedec,lpddr2-s2",
+ "jedec,lpddr2-s4"
+
+ "ti,jedec-lpddr2-s2" should be listed if the memory part is LPDDR2-S2 type
+
+ "ti,jedec-lpddr2-s4" should be listed if the memory part is LPDDR2-S4 type
+
+ "ti,jedec-lpddr2-nvm" should be listed if the memory part is LPDDR2-NVM type
+
+- density : <u32> representing density in Mb (Mega bits)
+
+- io-width : <u32> representing bus width. Possible values are 8, 16, and 32
+
+Optional properties:
+
+The following optional properties represent the minimum value of some AC
+timing parameters of the DDR device in terms of number of clock cycles.
+These values shall be obtained from the device data-sheet.
+- tRRD-min-tck
+- tWTR-min-tck
+- tXP-min-tck
+- tRTP-min-tck
+- tCKE-min-tck
+- tRPab-min-tck
+- tRCD-min-tck
+- tWR-min-tck
+- tRASmin-min-tck
+- tCKESR-min-tck
+- tFAW-min-tck
+
+Child nodes:
+- The lpddr2 node may have one or more child nodes of type "lpddr2-timings".
+ "lpddr2-timings" provides AC timing parameters of the device for
+ a given speed-bin. The user may provide the timings for as many
+ speed-bins as is required. Please see Documentation/devicetree/
+ bindings/lpddr2/lpddr2-timings.txt for more information on "lpddr2-timings"
+
+Example:
+
+elpida_ECB240ABACN : lpddr2 {
+ compatible = "Elpida,ECB240ABACN","jedec,lpddr2-s4";
+ density = <2048>;
+ io-width = <32>;
+
+ tRPab-min-tck = <3>;
+ tRCD-min-tck = <3>;
+ tWR-min-tck = <3>;
+ tRASmin-min-tck = <3>;
+ tRRD-min-tck = <2>;
+ tWTR-min-tck = <2>;
+ tXP-min-tck = <2>;
+ tRTP-min-tck = <2>;
+ tCKE-min-tck = <3>;
+ tCKESR-min-tck = <3>;
+ tFAW-min-tck = <8>;
+
+ timings_elpida_ECB240ABACN_400mhz: lpddr2-timings@0 {
+ compatible = "jedec,lpddr2-timings";
+ min-freq = <10000000>;
+ max-freq = <400000000>;
+ tRPab = <21000>;
+ tRCD = <18000>;
+ tWR = <15000>;
+ tRAS-min = <42000>;
+ tRRD = <10000>;
+ tWTR = <7500>;
+ tXP = <7500>;
+ tRTP = <7500>;
+ tCKESR = <15000>;
+ tDQSCK-max = <5500>;
+ tFAW = <50000>;
+ tZQCS = <90000>;
+ tZQCL = <360000>;
+ tZQinit = <1000000>;
+ tRAS-max-ns = <70000>;
+ };
+
+ timings_elpida_ECB240ABACN_200mhz: lpddr2-timings@1 {
+ compatible = "jedec,lpddr2-timings";
+ min-freq = <10000000>;
+ max-freq = <200000000>;
+ tRPab = <21000>;
+ tRCD = <18000>;
+ tWR = <15000>;
+ tRAS-min = <42000>;
+ tRRD = <10000>;
+ tWTR = <10000>;
+ tXP = <7500>;
+ tRTP = <7500>;
+ tCKESR = <15000>;
+ tDQSCK-max = <5500>;
+ tFAW = <50000>;
+ tZQCS = <90000>;
+ tZQCL = <360000>;
+ tZQinit = <1000000>;
+ tRAS-max-ns = <70000>;
+ };
+
+}
diff --git a/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt b/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt
new file mode 100644
index 000000000..c2619797c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/altera-mailbox.txt
@@ -0,0 +1,49 @@
+Altera Mailbox Driver
+=====================
+
+Required properties:
+- compatible : "altr,mailbox-1.0".
+- reg : physical base address of the mailbox and length of
+ memory mapped region.
+- #mbox-cells: Common mailbox binding property to identify the number
+ of cells required for the mailbox specifier. Should be 1.
+
+Optional properties:
+- interrupt-parent : interrupt source phandle.
+- interrupts : interrupt number. The interrupt specifier format
+ depends on the interrupt controller parent.
+
+Example:
+ mbox_tx: mailbox@0x100 {
+ compatible = "altr,mailbox-1.0";
+ reg = <0x100 0x8>;
+ interrupt-parent = < &gic_0 >;
+ interrupts = <5>;
+ #mbox-cells = <1>;
+ };
+
+ mbox_rx: mailbox@0x200 {
+ compatible = "altr,mailbox-1.0";
+ reg = <0x200 0x8>;
+ interrupt-parent = < &gic_0 >;
+ interrupts = <6>;
+ #mbox-cells = <1>;
+ };
+
+Mailbox client
+===============
+"mboxes" and the optional "mbox-names" (please see
+Documentation/devicetree/bindings/mailbox/mailbox.txt for details). Each value
+of the mboxes property should contain a phandle to the mailbox controller
+device node and second argument is the channel index. It must be 0 (hardware
+support only one channel).The equivalent "mbox-names" property value can be
+used to give a name to the communication channel to be used by the client user.
+
+Example:
+ mclient0: mclient0@0x400 {
+ compatible = "client-1.0";
+ reg = <0x400 0x10>;
+ mbox-names = "mbox-tx", "mbox-rx";
+ mboxes = <&mbox_tx 0>,
+ <&mbox_rx 0>;
+ };
diff --git a/Documentation/devicetree/bindings/mailbox/arm-mhu.txt b/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
new file mode 100644
index 000000000..4971f03f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/arm-mhu.txt
@@ -0,0 +1,43 @@
+ARM MHU Mailbox Driver
+======================
+
+The ARM's Message-Handling-Unit (MHU) is a mailbox controller that has
+3 independent channels/links to communicate with remote processor(s).
+ MHU links are hardwired on a platform. A link raises interrupt for any
+received data. However, there is no specified way of knowing if the sent
+data has been read by the remote. This driver assumes the sender polls
+STAT register and the remote clears it after having read the data.
+The last channel is specified to be a 'Secure' resource, hence can't be
+used by Linux running NS.
+
+Mailbox Device Node:
+====================
+
+Required properties:
+--------------------
+- compatible: Shall be "arm,mhu" & "arm,primecell"
+- reg: Contains the mailbox register address range (base
+ address and length)
+- #mbox-cells Shall be 1 - the index of the channel needed.
+- interrupts: Contains the interrupt information corresponding to
+ each of the 3 links of MHU.
+
+Example:
+--------
+
+ mhu: mailbox@2b1f0000 {
+ #mbox-cells = <1>;
+ compatible = "arm,mhu", "arm,primecell";
+ reg = <0 0x2b1f0000 0x1000>;
+ interrupts = <0 36 4>, /* LP-NonSecure */
+ <0 35 4>, /* HP-NonSecure */
+ <0 37 4>; /* Secure */
+ clocks = <&clock 0 2 1>;
+ clock-names = "apb_pclk";
+ };
+
+ mhu_client: scb@2e000000 {
+ compatible = "fujitsu,mb86s70-scb-1.0";
+ reg = <0 0x2e000000 0x4000>;
+ mboxes = <&mhu 1>; /* HP-NonSecure */
+ };
diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 000000000..1a2cd3d26
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
+* Generic Mailbox Controller and client driver bindings
+
+Generic binding to provide a way for Mailbox controller drivers to
+assign appropriate mailbox channel to client drivers.
+
+* Mailbox Controller
+
+Required property:
+- #mbox-cells: Must be at least 1. Number of cells in a mailbox
+ specifier.
+
+Example:
+ mailbox: mailbox {
+ ...
+ #mbox-cells = <1>;
+ };
+
+
+* Mailbox Client
+
+Required property:
+- mboxes: List of phandle and mailbox channel specifiers.
+
+Optional property:
+- mbox-names: List of identifier strings for each mailbox channel
+ required by the client. The use of this property
+ is discouraged in favor of using index in list of
+ 'mboxes' while requesting a mailbox. Instead the
+ platforms may define channel indices, in DT headers,
+ to something legible.
+
+Example:
+ pwr_cntrl: power {
+ ...
+ mbox-names = "pwr-ctrl", "rpc";
+ mboxes = <&mailbox 0
+ &mailbox 1>;
+ };
diff --git a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
new file mode 100644
index 000000000..d1a043339
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
@@ -0,0 +1,131 @@
+OMAP2+ Mailbox Driver
+=====================
+
+The OMAP mailbox hardware facilitates communication between different processors
+using a queued mailbox interrupt mechanism. The IP block is external to the
+various processor subsystems and is connected on an interconnect bus. The
+communication is achieved through a set of registers for message storage and
+interrupt configuration registers.
+
+Each mailbox IP block has a certain number of h/w fifo queues and output
+interrupt lines. An output interrupt line is routed to an interrupt controller
+within a processor subsystem, and there can be more than one line going to a
+specific processor's interrupt controller. The interrupt line connections are
+fixed for an instance and are dictated by the IP integration into the SoC
+(excluding the SoCs that have a Interrupt Crossbar IP). Each interrupt line is
+programmable through a set of interrupt configuration registers, and have a rx
+and tx interrupt source per h/w fifo. Communication between different processors
+is achieved through the appropriate programming of the rx and tx interrupt
+sources on the appropriate interrupt lines.
+
+The number of h/w fifo queues and interrupt lines dictate the usable registers.
+All the current OMAP SoCs except for the newest DRA7xx SoC has a single IP
+instance. DRA7xx has multiple instances with different number of h/w fifo queues
+and interrupt lines between different instances. The interrupt lines can also be
+routed to different processor sub-systems on DRA7xx as they are routed through
+the Crossbar, a kind of interrupt router/multiplexer.
+
+Mailbox Device Node:
+====================
+A Mailbox device node is used to represent a Mailbox IP instance within a SoC.
+The sub-mailboxes are represented as child nodes of this parent node.
+
+Required properties:
+--------------------
+- compatible: Should be one of the following,
+ "ti,omap2-mailbox" for OMAP2420, OMAP2430 SoCs
+ "ti,omap3-mailbox" for OMAP3430, OMAP3630 SoCs
+ "ti,omap4-mailbox" for OMAP44xx, OMAP54xx, AM33xx,
+ AM43xx and DRA7xx SoCs
+- reg: Contains the mailbox register address range (base
+ address and length)
+- interrupts: Contains the interrupt information for the mailbox
+ device. The format is dependent on which interrupt
+ controller the OMAP device uses
+- ti,hwmods: Name of the hwmod associated with the mailbox
+- #mbox-cells: Common mailbox binding property to identify the number
+ of cells required for the mailbox specifier. Should be
+ 1
+- ti,mbox-num-users: Number of targets (processor devices) that the mailbox
+ device can interrupt
+- ti,mbox-num-fifos: Number of h/w fifo queues within the mailbox IP block
+
+Child Nodes:
+============
+A child node is used for representing the actual sub-mailbox device that is
+used for the communication between the host processor and a remote processor.
+Each child node should have a unique node name across all the different
+mailbox device nodes.
+
+Required properties:
+--------------------
+- ti,mbox-tx: sub-mailbox descriptor property defining a Tx fifo
+- ti,mbox-rx: sub-mailbox descriptor property defining a Rx fifo
+
+Sub-mailbox Descriptor Data
+---------------------------
+Each of the above ti,mbox-tx and ti,mbox-rx properties should have 3 cells of
+data that represent the following:
+ Cell #1 (fifo_id) - mailbox fifo id used either for transmitting
+ (ti,mbox-tx) or for receiving (ti,mbox-rx)
+ Cell #2 (irq_id) - irq identifier index number to use from the parent's
+ interrupts data. Should be 0 for most of the cases, a
+ positive index value is seen only on mailboxes that have
+ multiple interrupt lines connected to the MPU processor.
+ Cell #3 (usr_id) - mailbox user id for identifying the interrupt line
+ associated with generating a tx/rx fifo interrupt.
+
+Mailbox Users:
+==============
+A device needing to communicate with a target processor device should specify
+them using the common mailbox binding properties, "mboxes" and the optional
+"mbox-names" (please see Documentation/devicetree/bindings/mailbox/mailbox.txt
+for details). Each value of the mboxes property should contain a phandle to the
+mailbox controller device node and an args specifier that will be the phandle to
+the intended sub-mailbox child node to be used for communication. The equivalent
+"mbox-names" property value can be used to give a name to the communication channel
+to be used by the client user.
+
+
+Example:
+--------
+
+/* OMAP4 */
+mailbox: mailbox@4a0f4000 {
+ compatible = "ti,omap4-mailbox";
+ reg = <0x4a0f4000 0x200>;
+ interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
+ ti,hwmods = "mailbox";
+ #mbox-cells = <1>;
+ ti,mbox-num-users = <3>;
+ ti,mbox-num-fifos = <8>;
+ mbox_ipu: mbox_ipu {
+ ti,mbox-tx = <0 0 0>;
+ ti,mbox-rx = <1 0 0>;
+ };
+ mbox_dsp: mbox_dsp {
+ ti,mbox-tx = <3 0 0>;
+ ti,mbox-rx = <2 0 0>;
+ };
+};
+
+dsp {
+ ...
+ mboxes = <&mailbox &mbox_dsp>;
+ ...
+};
+
+/* AM33xx */
+mailbox: mailbox@480C8000 {
+ compatible = "ti,omap4-mailbox";
+ reg = <0x480C8000 0x200>;
+ interrupts = <77>;
+ ti,hwmods = "mailbox";
+ #mbox-cells = <1>;
+ ti,mbox-num-users = <4>;
+ ti,mbox-num-fifos = <8>;
+ mbox_wkupm3: wkup_m3 {
+ ti,mbox-tx = <0 0 0>;
+ ti,mbox-rx = <0 0 3>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/marvell.txt b/Documentation/devicetree/bindings/marvell.txt
new file mode 100644
index 000000000..ea2b16ced
--- /dev/null
+++ b/Documentation/devicetree/bindings/marvell.txt
@@ -0,0 +1,516 @@
+Marvell Discovery mv64[345]6x System Controller chips
+===========================================================
+
+The Marvell mv64[345]60 series of system controller chips contain
+many of the peripherals needed to implement a complete computer
+system. In this section, we define device tree nodes to describe
+the system controller chip itself and each of the peripherals
+which it contains. Compatible string values for each node are
+prefixed with the string "marvell,", for Marvell Technology Group Ltd.
+
+1) The /system-controller node
+
+ This node is used to represent the system-controller and must be
+ present when the system uses a system controller chip. The top-level
+ system-controller node contains information that is global to all
+ devices within the system controller chip. The node name begins
+ with "system-controller" followed by the unit address, which is
+ the base address of the memory-mapped register set for the system
+ controller chip.
+
+ Required properties:
+
+ - ranges : Describes the translation of system controller addresses
+ for memory mapped registers.
+ - clock-frequency: Contains the main clock frequency for the system
+ controller chip.
+ - reg : This property defines the address and size of the
+ memory-mapped registers contained within the system controller
+ chip. The address specified in the "reg" property should match
+ the unit address of the system-controller node.
+ - #address-cells : Address representation for system controller
+ devices. This field represents the number of cells needed to
+ represent the address of the memory-mapped registers of devices
+ within the system controller chip.
+ - #size-cells : Size representation for the memory-mapped
+ registers within the system controller chip.
+ - #interrupt-cells : Defines the width of cells used to represent
+ interrupts.
+
+ Optional properties:
+
+ - model : The specific model of the system controller chip. Such
+ as, "mv64360", "mv64460", or "mv64560".
+ - compatible : A string identifying the compatibility identifiers
+ of the system controller chip.
+
+ The system-controller node contains child nodes for each system
+ controller device that the platform uses. Nodes should not be created
+ for devices which exist on the system controller chip but are not used
+
+ Example Marvell Discovery mv64360 system-controller node:
+
+ system-controller@f1000000 { /* Marvell Discovery mv64360 */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ model = "mv64360"; /* Default */
+ compatible = "marvell,mv64360";
+ clock-frequency = <133333333>;
+ reg = <0xf1000000 0x10000>;
+ virtual-reg = <0xf1000000>;
+ ranges = <0x88000000 0x88000000 0x1000000 /* PCI 0 I/O Space */
+ 0x80000000 0x80000000 0x8000000 /* PCI 0 MEM Space */
+ 0xa0000000 0xa0000000 0x4000000 /* User FLASH */
+ 0x00000000 0xf1000000 0x0010000 /* Bridge's regs */
+ 0xf2000000 0xf2000000 0x0040000>;/* Integrated SRAM */
+
+ [ child node definitions... ]
+ }
+
+2) Child nodes of /system-controller
+
+ a) Marvell Discovery MDIO bus
+
+ The MDIO is a bus to which the PHY devices are connected. For each
+ device that exists on this bus, a child node should be created. See
+ the definition of the PHY node below for an example of how to define
+ a PHY.
+
+ Required properties:
+ - #address-cells : Should be <1>
+ - #size-cells : Should be <0>
+ - compatible : Should be "marvell,mv64360-mdio"
+
+ Example:
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "marvell,mv64360-mdio";
+
+ ethernet-phy@0 {
+ ......
+ };
+ };
+
+
+ b) Marvell Discovery ethernet controller
+
+ The Discover ethernet controller is described with two levels
+ of nodes. The first level describes an ethernet silicon block
+ and the second level describes up to 3 ethernet nodes within
+ that block. The reason for the multiple levels is that the
+ registers for the node are interleaved within a single set
+ of registers. The "ethernet-block" level describes the
+ shared register set, and the "ethernet" nodes describe ethernet
+ port-specific properties.
+
+ Ethernet block node
+
+ Required properties:
+ - #address-cells : <1>
+ - #size-cells : <0>
+ - compatible : "marvell,mv64360-eth-block"
+ - reg : Offset and length of the register set for this block
+
+ Optional properties:
+ - clocks : Phandle to the clock control device and gate bit
+
+ Example Discovery Ethernet block node:
+ ethernet-block@2000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "marvell,mv64360-eth-block";
+ reg = <0x2000 0x2000>;
+ ethernet@0 {
+ .......
+ };
+ };
+
+ Ethernet port node
+
+ Required properties:
+ - compatible : Should be "marvell,mv64360-eth".
+ - reg : Should be <0>, <1>, or <2>, according to which registers
+ within the silicon block the device uses.
+ - interrupts : <a> where a is the interrupt number for the port.
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+ - phy : the phandle for the PHY connected to this ethernet
+ controller.
+ - local-mac-address : 6 bytes, MAC address
+
+ Example Discovery Ethernet port node:
+ ethernet@0 {
+ compatible = "marvell,mv64360-eth";
+ reg = <0>;
+ interrupts = <32>;
+ interrupt-parent = <&PIC>;
+ phy = <&PHY0>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ };
+
+
+
+ c) Marvell Discovery PHY nodes
+
+ Required properties:
+ - interrupts : <a> where a is the interrupt number for this phy.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+ - reg : The ID number for the phy, usually a small integer
+
+ Example Discovery PHY node:
+ ethernet-phy@1 {
+ compatible = "broadcom,bcm5421";
+ interrupts = <76>; /* GPP 12 */
+ interrupt-parent = <&PIC>;
+ reg = <1>;
+ };
+
+
+ d) Marvell Discovery SDMA nodes
+
+ Represent DMA hardware associated with the MPSC (multiprotocol
+ serial controllers).
+
+ Required properties:
+ - compatible : "marvell,mv64360-sdma"
+ - reg : Offset and length of the register set for this device
+ - interrupts : <a> where a is the interrupt number for the DMA
+ device.
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery SDMA node:
+ sdma@4000 {
+ compatible = "marvell,mv64360-sdma";
+ reg = <0x4000 0xc18>;
+ virtual-reg = <0xf1004000>;
+ interrupts = <36>;
+ interrupt-parent = <&PIC>;
+ };
+
+
+ e) Marvell Discovery BRG nodes
+
+ Represent baud rate generator hardware associated with the MPSC
+ (multiprotocol serial controllers).
+
+ Required properties:
+ - compatible : "marvell,mv64360-brg"
+ - reg : Offset and length of the register set for this device
+ - clock-src : A value from 0 to 15 which selects the clock
+ source for the baud rate generator. This value corresponds
+ to the CLKS value in the BRGx configuration register. See
+ the mv64x60 User's Manual.
+ - clock-frequence : The frequency (in Hz) of the baud rate
+ generator's input clock.
+ - current-speed : The current speed setting (presumably by
+ firmware) of the baud rate generator.
+
+ Example Discovery BRG node:
+ brg@b200 {
+ compatible = "marvell,mv64360-brg";
+ reg = <0xb200 0x8>;
+ clock-src = <8>;
+ clock-frequency = <133333333>;
+ current-speed = <9600>;
+ };
+
+
+ f) Marvell Discovery CUNIT nodes
+
+ Represent the Serial Communications Unit device hardware.
+
+ Required properties:
+ - reg : Offset and length of the register set for this device
+
+ Example Discovery CUNIT node:
+ cunit@f200 {
+ reg = <0xf200 0x200>;
+ };
+
+
+ g) Marvell Discovery MPSCROUTING nodes
+
+ Represent the Discovery's MPSC routing hardware
+
+ Required properties:
+ - reg : Offset and length of the register set for this device
+
+ Example Discovery CUNIT node:
+ mpscrouting@b500 {
+ reg = <0xb400 0xc>;
+ };
+
+
+ h) Marvell Discovery MPSCINTR nodes
+
+ Represent the Discovery's MPSC DMA interrupt hardware registers
+ (SDMA cause and mask registers).
+
+ Required properties:
+ - reg : Offset and length of the register set for this device
+
+ Example Discovery MPSCINTR node:
+ mpsintr@b800 {
+ reg = <0xb800 0x100>;
+ };
+
+
+ i) Marvell Discovery MPSC nodes
+
+ Represent the Discovery's MPSC (Multiprotocol Serial Controller)
+ serial port.
+
+ Required properties:
+ - compatible : "marvell,mv64360-mpsc"
+ - reg : Offset and length of the register set for this device
+ - sdma : the phandle for the SDMA node used by this port
+ - brg : the phandle for the BRG node used by this port
+ - cunit : the phandle for the CUNIT node used by this port
+ - mpscrouting : the phandle for the MPSCROUTING node used by this port
+ - mpscintr : the phandle for the MPSCINTR node used by this port
+ - cell-index : the hardware index of this cell in the MPSC core
+ - max_idle : value needed for MPSC CHR3 (Maximum Frame Length)
+ register
+ - interrupts : <a> where a is the interrupt number for the MPSC.
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery MPSCINTR node:
+ mpsc@8000 {
+ compatible = "marvell,mv64360-mpsc";
+ reg = <0x8000 0x38>;
+ virtual-reg = <0xf1008000>;
+ sdma = <&SDMA0>;
+ brg = <&BRG0>;
+ cunit = <&CUNIT>;
+ mpscrouting = <&MPSCROUTING>;
+ mpscintr = <&MPSCINTR>;
+ cell-index = <0>;
+ max_idle = <40>;
+ interrupts = <40>;
+ interrupt-parent = <&PIC>;
+ };
+
+
+ j) Marvell Discovery Watch Dog Timer nodes
+
+ Represent the Discovery's watchdog timer hardware
+
+ Required properties:
+ - compatible : "marvell,mv64360-wdt"
+ - reg : Offset and length of the register set for this device
+
+ Example Discovery Watch Dog Timer node:
+ wdt@b410 {
+ compatible = "marvell,mv64360-wdt";
+ reg = <0xb410 0x8>;
+ };
+
+
+ k) Marvell Discovery I2C nodes
+
+ Represent the Discovery's I2C hardware
+
+ Required properties:
+ - device_type : "i2c"
+ - compatible : "marvell,mv64360-i2c"
+ - reg : Offset and length of the register set for this device
+ - interrupts : <a> where a is the interrupt number for the I2C.
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery I2C node:
+ compatible = "marvell,mv64360-i2c";
+ reg = <0xc000 0x20>;
+ virtual-reg = <0xf100c000>;
+ interrupts = <37>;
+ interrupt-parent = <&PIC>;
+ };
+
+
+ l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes
+
+ Represent the Discovery's PIC hardware
+
+ Required properties:
+ - #interrupt-cells : <1>
+ - #address-cells : <0>
+ - compatible : "marvell,mv64360-pic"
+ - reg : Offset and length of the register set for this device
+ - interrupt-controller
+
+ Example Discovery PIC node:
+ pic {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ compatible = "marvell,mv64360-pic";
+ reg = <0x0 0x88>;
+ interrupt-controller;
+ };
+
+
+ m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes
+
+ Represent the Discovery's MPP hardware
+
+ Required properties:
+ - compatible : "marvell,mv64360-mpp"
+ - reg : Offset and length of the register set for this device
+
+ Example Discovery MPP node:
+ mpp@f000 {
+ compatible = "marvell,mv64360-mpp";
+ reg = <0xf000 0x10>;
+ };
+
+
+ n) Marvell Discovery GPP (General Purpose Pins) nodes
+
+ Represent the Discovery's GPP hardware
+
+ Required properties:
+ - compatible : "marvell,mv64360-gpp"
+ - reg : Offset and length of the register set for this device
+
+ Example Discovery GPP node:
+ gpp@f000 {
+ compatible = "marvell,mv64360-gpp";
+ reg = <0xf100 0x20>;
+ };
+
+
+ o) Marvell Discovery PCI host bridge node
+
+ Represents the Discovery's PCI host bridge device. The properties
+ for this node conform to Rev 2.1 of the PCI Bus Binding to IEEE
+ 1275-1994. A typical value for the compatible property is
+ "marvell,mv64360-pci".
+
+ Example Discovery PCI host bridge node
+ pci@80000000 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ device_type = "pci";
+ compatible = "marvell,mv64360-pci";
+ reg = <0xcf8 0x8>;
+ ranges = <0x01000000 0x0 0x0
+ 0x88000000 0x0 0x01000000
+ 0x02000000 0x0 0x80000000
+ 0x80000000 0x0 0x08000000>;
+ bus-range = <0 255>;
+ clock-frequency = <66000000>;
+ interrupt-parent = <&PIC>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0a */
+ 0x5000 0 0 1 &PIC 80
+ 0x5000 0 0 2 &PIC 81
+ 0x5000 0 0 3 &PIC 91
+ 0x5000 0 0 4 &PIC 93
+
+ /* IDSEL 0x0b */
+ 0x5800 0 0 1 &PIC 91
+ 0x5800 0 0 2 &PIC 93
+ 0x5800 0 0 3 &PIC 80
+ 0x5800 0 0 4 &PIC 81
+
+ /* IDSEL 0x0c */
+ 0x6000 0 0 1 &PIC 91
+ 0x6000 0 0 2 &PIC 93
+ 0x6000 0 0 3 &PIC 80
+ 0x6000 0 0 4 &PIC 81
+
+ /* IDSEL 0x0d */
+ 0x6800 0 0 1 &PIC 93
+ 0x6800 0 0 2 &PIC 80
+ 0x6800 0 0 3 &PIC 81
+ 0x6800 0 0 4 &PIC 91
+ >;
+ };
+
+
+ p) Marvell Discovery CPU Error nodes
+
+ Represent the Discovery's CPU error handler device.
+
+ Required properties:
+ - compatible : "marvell,mv64360-cpu-error"
+ - reg : Offset and length of the register set for this device
+ - interrupts : the interrupt number for this device
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery CPU Error node:
+ cpu-error@0070 {
+ compatible = "marvell,mv64360-cpu-error";
+ reg = <0x70 0x10 0x128 0x28>;
+ interrupts = <3>;
+ interrupt-parent = <&PIC>;
+ };
+
+
+ q) Marvell Discovery SRAM Controller nodes
+
+ Represent the Discovery's SRAM controller device.
+
+ Required properties:
+ - compatible : "marvell,mv64360-sram-ctrl"
+ - reg : Offset and length of the register set for this device
+ - interrupts : the interrupt number for this device
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery SRAM Controller node:
+ sram-ctrl@0380 {
+ compatible = "marvell,mv64360-sram-ctrl";
+ reg = <0x380 0x80>;
+ interrupts = <13>;
+ interrupt-parent = <&PIC>;
+ };
+
+
+ r) Marvell Discovery PCI Error Handler nodes
+
+ Represent the Discovery's PCI error handler device.
+
+ Required properties:
+ - compatible : "marvell,mv64360-pci-error"
+ - reg : Offset and length of the register set for this device
+ - interrupts : the interrupt number for this device
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery PCI Error Handler node:
+ pci-error@1d40 {
+ compatible = "marvell,mv64360-pci-error";
+ reg = <0x1d40 0x40 0xc28 0x4>;
+ interrupts = <12>;
+ interrupt-parent = <&PIC>;
+ };
+
+
+ s) Marvell Discovery Memory Controller nodes
+
+ Represent the Discovery's memory controller device.
+
+ Required properties:
+ - compatible : "marvell,mv64360-mem-ctrl"
+ - reg : Offset and length of the register set for this device
+ - interrupts : the interrupt number for this device
+ - interrupt-parent : the phandle for the interrupt controller
+ that services interrupts for this device.
+
+ Example Discovery Memory Controller node:
+ mem-ctrl@1400 {
+ compatible = "marvell,mv64360-mem-ctrl";
+ reg = <0x1400 0x60>;
+ interrupts = <17>;
+ interrupt-parent = <&PIC>;
+ };
+
+
diff --git a/Documentation/devicetree/bindings/media/atmel-isi.txt b/Documentation/devicetree/bindings/media/atmel-isi.txt
new file mode 100644
index 000000000..251f008f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/atmel-isi.txt
@@ -0,0 +1,51 @@
+Atmel Image Sensor Interface (ISI) SoC Camera Subsystem
+----------------------------------------------
+
+Required properties:
+- compatible: must be "atmel,at91sam9g45-isi"
+- reg: physical base address and length of the registers set for the device;
+- interrupts: should contain IRQ line for the ISI;
+- clocks: list of clock specifiers, corresponding to entries in
+ the clock-names property;
+- clock-names: must contain "isi_clk", which is the isi peripherial clock.
+
+ISI supports a single port node with parallel bus. It should contain one
+'port' child node with child 'endpoint' node. Please refer to the bindings
+defined in Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+ isi: isi@f0034000 {
+ compatible = "atmel,at91sam9g45-isi";
+ reg = <0xf0034000 0x4000>;
+ interrupts = <37 IRQ_TYPE_LEVEL_HIGH 5>;
+
+ clocks = <&isi_clk>;
+ clock-names = "isi_clk";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_isi>;
+
+ port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ isi_0: endpoint {
+ remote-endpoint = <&ov2640_0>;
+ bus-width = <8>;
+ };
+ };
+ };
+
+ i2c1: i2c@f0018000 {
+ ov2640: camera@0x30 {
+ compatible = "ovti,ov2640";
+ reg = <0x30>;
+
+ port {
+ ov2640_0: endpoint {
+ remote-endpoint = <&isi_0>;
+ bus-width = <8>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/coda.txt b/Documentation/devicetree/bindings/media/coda.txt
new file mode 100644
index 000000000..2865d04e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/coda.txt
@@ -0,0 +1,30 @@
+Chips&Media Coda multi-standard codec IP
+========================================
+
+Coda codec IPs are present in i.MX SoCs in various versions,
+called VPU (Video Processing Unit).
+
+Required properties:
+- compatible : should be "fsl,<chip>-src" for i.MX SoCs:
+ (a) "fsl,imx27-vpu" for CodaDx6 present in i.MX27
+ (b) "fsl,imx53-vpu" for CODA7541 present in i.MX53
+ (c) "fsl,imx6q-vpu" for CODA960 present in i.MX6q
+- reg: should be register base and length as documented in the
+ SoC reference manual
+- interrupts : Should contain the VPU interrupt. For CODA960,
+ a second interrupt is needed for the MJPEG unit.
+- clocks : Should contain the ahb and per clocks, in the order
+ determined by the clock-names property.
+- clock-names : Should be "ahb", "per"
+- iram : phandle pointing to the SRAM device node
+
+Example:
+
+vpu: vpu@63ff4000 {
+ compatible = "fsl,imx53-vpu";
+ reg = <0x63ff4000 0x1000>;
+ interrupts = <9>;
+ clocks = <&clks 63>, <&clks 63>;
+ clock-names = "ahb", "per";
+ iram = <&ocram>;
+};
diff --git a/Documentation/devicetree/bindings/media/exynos-fimc-lite.txt b/Documentation/devicetree/bindings/media/exynos-fimc-lite.txt
new file mode 100644
index 000000000..0bf6fb7fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/exynos-fimc-lite.txt
@@ -0,0 +1,16 @@
+Exynos4x12/Exynos5 SoC series camera host interface (FIMC-LITE)
+
+Required properties:
+
+- compatible : should be one of:
+ "samsung,exynos4212-fimc-lite" for Exynos4212/4412 SoCs,
+ "samsung,exynos5250-fimc-lite" for Exynos5250 compatible
+ devices;
+- reg : physical base address and size of the device memory mapped
+ registers;
+- interrupts : should contain FIMC-LITE interrupt;
+- clocks : FIMC LITE gate clock should be specified in this property.
+- clock-names : should contain "flite" entry.
+
+Each FIMC device should have an alias in the aliases node, in the form of
+fimc-lite<n>, where <n> is an integer specifying the IP block instance.
diff --git a/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt b/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt
new file mode 100644
index 000000000..4ef45636e
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/exynos-jpeg-codec.txt
@@ -0,0 +1,15 @@
+Samsung S5P/EXYNOS SoC series JPEG codec
+
+Required properties:
+
+- compatible : should be one of:
+ "samsung,s5pv210-jpeg", "samsung,exynos4210-jpeg",
+ "samsung,exynos3250-jpeg", "samsung,exynos5420-jpeg";
+- reg : address and length of the JPEG codec IP register set;
+- interrupts : specifies the JPEG codec IP interrupt;
+- clock-names : should contain:
+ - "jpeg" for the core gate clock,
+ - "sclk" for the special clock (optional).
+- clocks : should contain the clock specifier and clock ID list
+ matching entries in the clock-names property; from
+ the common clock bindings.
diff --git a/Documentation/devicetree/bindings/media/exynos4-fimc-is.txt b/Documentation/devicetree/bindings/media/exynos4-fimc-is.txt
new file mode 100644
index 000000000..55c9ad6f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/exynos4-fimc-is.txt
@@ -0,0 +1,49 @@
+Exynos4x12 SoC series Imaging Subsystem (FIMC-IS)
+
+The FIMC-IS is a subsystem for processing image signal from an image sensor.
+The Exynos4x12 SoC series FIMC-IS V1.5 comprises of a dedicated ARM Cortex-A5
+processor, ISP, DRC and FD IP blocks and peripheral devices such as UART, I2C
+and SPI bus controllers, PWM and ADC.
+
+fimc-is node
+------------
+
+Required properties:
+- compatible : should be "samsung,exynos4212-fimc-is" for Exynos4212 and
+ Exynos4412 SoCs;
+- reg : physical base address and length of the registers set;
+- interrupts : must contain two FIMC-IS interrupts, in order: ISP0, ISP1;
+- clocks : list of clock specifiers, corresponding to entries in
+ clock-names property;
+- clock-names : must contain "ppmuispx", "ppmuispx", "lite0", "lite1"
+ "mpll", "sysreg", "isp", "drc", "fd", "mcuisp", "uart",
+ "ispdiv0", "ispdiv1", "mcuispdiv0", "mcuispdiv1", "aclk200",
+ "div_aclk200", "aclk400mcuisp", "div_aclk400mcuisp" entries,
+ matching entries in the clocks property.
+pmu subnode
+-----------
+
+Required properties:
+ - reg : must contain PMU physical base address and size of the register set.
+
+The following are the FIMC-IS peripheral device nodes and can be specified
+either standalone or as the fimc-is node child nodes.
+
+i2c-isp (ISP I2C bus controller) nodes
+------------------------------------------
+
+Required properties:
+
+- compatible : should be "samsung,exynos4212-i2c-isp" for Exynos4212 and
+ Exynos4412 SoCs;
+- reg : physical base address and length of the registers set;
+- clocks : must contain gate clock specifier for this controller;
+- clock-names : must contain "i2c_isp" entry.
+
+For the above nodes it is required to specify a pinctrl state named "default",
+according to the pinctrl bindings defined in ../pinctrl/pinctrl-bindings.txt.
+
+Device tree nodes of the image sensors' controlled directly by the FIMC-IS
+firmware must be child nodes of their corresponding ISP I2C bus controller node.
+The data link of these image sensors must be specified using the common video
+interfaces bindings, defined in video-interfaces.txt.
diff --git a/Documentation/devicetree/bindings/media/exynos5-gsc.txt b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
new file mode 100644
index 000000000..0604d42f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/exynos5-gsc.txt
@@ -0,0 +1,30 @@
+* Samsung Exynos5 G-Scaler device
+
+G-Scaler is used for scaling and color space conversion on EXYNOS5 SoCs.
+
+Required properties:
+- compatible: should be "samsung,exynos5-gsc"
+- reg: should contain G-Scaler physical address location and length.
+- interrupts: should contain G-Scaler interrupt number
+
+Example:
+
+gsc_0: gsc@0x13e00000 {
+ compatible = "samsung,exynos5-gsc";
+ reg = <0x13e00000 0x1000>;
+ interrupts = <0 85 0>;
+};
+
+Aliases:
+Each G-Scaler node should have a numbered alias in the aliases node,
+in the form of gscN, N = 0...3. G-Scaler driver uses these aliases
+to retrieve the device IDs using "of_alias_get_id()" call.
+
+Example:
+
+aliases {
+ gsc0 =&gsc_0;
+ gsc1 =&gsc_1;
+ gsc2 =&gsc_2;
+ gsc3 =&gsc_3;
+};
diff --git a/Documentation/devicetree/bindings/media/gpio-ir-receiver.txt b/Documentation/devicetree/bindings/media/gpio-ir-receiver.txt
new file mode 100644
index 000000000..56e726ef4
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/gpio-ir-receiver.txt
@@ -0,0 +1,16 @@
+Device-Tree bindings for GPIO IR receiver
+
+Required properties:
+ - compatible: should be "gpio-ir-receiver".
+ - gpios: specifies GPIO used for IR signal reception.
+
+Optional properties:
+ - linux,rc-map-name: Linux specific remote control map name.
+
+Example node:
+
+ ir: ir-receiver {
+ compatible = "gpio-ir-receiver";
+ gpios = <&gpio0 19 1>;
+ linux,rc-map-name = "rc-rc6-mce";
+ };
diff --git a/Documentation/devicetree/bindings/media/hix5hd2-ir.txt b/Documentation/devicetree/bindings/media/hix5hd2-ir.txt
new file mode 100644
index 000000000..fb5e76066
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/hix5hd2-ir.txt
@@ -0,0 +1,25 @@
+Device-Tree bindings for hix5hd2 ir IP
+
+Required properties:
+ - compatible: Should contain "hisilicon,hix5hd2-ir".
+ - reg: Base physical address of the controller and length of memory
+ mapped region.
+ - interrupts: interrupt-specifier for the sole interrupt generated by
+ the device. The interrupt specifier format depends on the interrupt
+ controller parent.
+ - clocks: clock phandle and specifier pair.
+ - hisilicon,power-syscon: phandle of syscon used to control power.
+
+Optional properties:
+ - linux,rc-map-name : Remote control map name.
+
+Example node:
+
+ ir: ir@f8001000 {
+ compatible = "hisilicon,hix5hd2-ir";
+ reg = <0xf8001000 0x1000>;
+ interrupts = <0 47 4>;
+ clocks = <&clock HIX5HD2_FIXED_24M>;
+ hisilicon,power-syscon = <&sysctrl>;
+ linux,rc-map-name = "rc-tivo";
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/adv7343.txt b/Documentation/devicetree/bindings/media/i2c/adv7343.txt
new file mode 100644
index 000000000..5653bc242
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/adv7343.txt
@@ -0,0 +1,48 @@
+* Analog Devices adv7343 video encoder
+
+The ADV7343 are high speed, digital-to-analog video encoders in a 64-lead LQFP
+package. Six high speed, 3.3 V, 11-bit video DACs provide support for composite
+(CVBS), S-Video (Y-C), and component (YPrPb/RGB) analog outputs in standard
+definition (SD), enhanced definition (ED), or high definition (HD) video
+formats.
+
+Required Properties :
+- compatible: Must be "adi,adv7343"
+
+Optional Properties :
+- adi,power-mode-sleep-mode: on enable the current consumption is reduced to
+ micro ampere level. All DACs and the internal PLL
+ circuit are disabled.
+- adi,power-mode-pll-ctrl: PLL and oversampling control. This control allows
+ internal PLL 1 circuit to be powered down and the
+ oversampling to be switched off.
+- ad,adv7343-power-mode-dac: array configuring the power on/off DAC's 1..6,
+ 0 = OFF and 1 = ON, Default value when this
+ property is not specified is <0 0 0 0 0 0>.
+- ad,adv7343-sd-config-dac-out: array configure SD DAC Output's 1 and 2, 0 = OFF
+ and 1 = ON, Default value when this property is
+ not specified is <0 0>.
+
+Example:
+
+i2c0@1c22000 {
+ ...
+ ...
+
+ adv7343@2a {
+ compatible = "adi,adv7343";
+ reg = <0x2a>;
+
+ port {
+ adv7343_1: endpoint {
+ adi,power-mode-sleep-mode;
+ adi,power-mode-pll-ctrl;
+ /* Use DAC1..3, DAC6 */
+ adi,dac-enable = <1 1 1 0 0 1>;
+ /* Use SD DAC output 1 */
+ adi,sd-dac-enable = <1 0>;
+ };
+ };
+ };
+ ...
+};
diff --git a/Documentation/devicetree/bindings/media/i2c/adv7604.txt b/Documentation/devicetree/bindings/media/i2c/adv7604.txt
new file mode 100644
index 000000000..c27cede3b
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/adv7604.txt
@@ -0,0 +1,70 @@
+* Analog Devices ADV7604/11 video decoder with HDMI receiver
+
+The ADV7604 and ADV7611 are multiformat video decoders with an integrated HDMI
+receiver. The ADV7604 has four multiplexed HDMI inputs and one analog input,
+and the ADV7611 has one HDMI input and no analog input.
+
+These device tree bindings support the ADV7611 only at the moment.
+
+Required Properties:
+
+ - compatible: Must contain one of the following
+ - "adi,adv7611" for the ADV7611
+
+ - reg: I2C slave address
+
+ - hpd-gpios: References to the GPIOs that control the HDMI hot-plug
+ detection pins, one per HDMI input. The active flag indicates the GPIO
+ level that enables hot-plug detection.
+
+The device node must contain one 'port' child node per device input and output
+port, in accordance with the video interface bindings defined in
+Documentation/devicetree/bindings/media/video-interfaces.txt. The port nodes
+are numbered as follows.
+
+ Port ADV7611
+------------------------------------------------------------
+ HDMI 0
+ Digital output 1
+
+The digital output port node must contain at least one endpoint.
+
+Optional Properties:
+
+ - reset-gpios: Reference to the GPIO connected to the device's reset pin.
+
+Optional Endpoint Properties:
+
+ The following three properties are defined in video-interfaces.txt and are
+ valid for source endpoints only.
+
+ - hsync-active: Horizontal synchronization polarity. Defaults to active low.
+ - vsync-active: Vertical synchronization polarity. Defaults to active low.
+ - pclk-sample: Pixel clock polarity. Defaults to output on the falling edge.
+
+ If none of hsync-active, vsync-active and pclk-sample is specified the
+ endpoint will use embedded BT.656 synchronization.
+
+
+Example:
+
+ hdmi_receiver@4c {
+ compatible = "adi,adv7611";
+ reg = <0x4c>;
+
+ reset-gpios = <&ioexp 0 GPIO_ACTIVE_LOW>;
+ hpd-gpios = <&ioexp 2 GPIO_ACTIVE_HIGH>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ };
+ port@1 {
+ reg = <1>;
+ hdmi_in: endpoint {
+ remote-endpoint = <&ccdc_in>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/mt9m111.txt b/Documentation/devicetree/bindings/media/i2c/mt9m111.txt
new file mode 100644
index 000000000..ed5a334b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/mt9m111.txt
@@ -0,0 +1,28 @@
+Micron 1.3Mp CMOS Digital Image Sensor
+
+The Micron MT9M111 is a CMOS active pixel digital image sensor with an active
+array size of 1280H x 1024V. It is programmable through a simple two-wire serial
+interface.
+
+Required Properties:
+- compatible: value should be "micron,mt9m111"
+
+For further reading on port node refer to
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+
+ i2c_master {
+ mt9m111@5d {
+ compatible = "micron,mt9m111";
+ reg = <0x5d>;
+
+ remote = <&pxa_camera>;
+ port {
+ mt9m111_1: endpoint {
+ bus-width = <8>;
+ remote-endpoint = <&pxa_camera>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/mt9p031.txt b/Documentation/devicetree/bindings/media/i2c/mt9p031.txt
new file mode 100644
index 000000000..cb60443ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/mt9p031.txt
@@ -0,0 +1,40 @@
+* Aptina 1/2.5-Inch 5Mp CMOS Digital Image Sensor
+
+The Aptina MT9P031 is a 1/2.5-inch CMOS active pixel digital image sensor with
+an active array size of 2592H x 1944V. It is programmable through a simple
+two-wire serial interface.
+
+Required Properties:
+- compatible: value should be either one among the following
+ (a) "aptina,mt9p031" for mt9p031 sensor
+ (b) "aptina,mt9p031m" for mt9p031m sensor
+
+- input-clock-frequency: Input clock frequency.
+
+- pixel-clock-frequency: Pixel clock frequency.
+
+Optional Properties:
+- reset-gpios: Chip reset GPIO
+
+For further reading on port node refer to
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+
+ i2c0@1c22000 {
+ ...
+ ...
+ mt9p031@5d {
+ compatible = "aptina,mt9p031";
+ reg = <0x5d>;
+ reset-gpios = <&gpio3 30 0>;
+
+ port {
+ mt9p031_1: endpoint {
+ input-clock-frequency = <6000000>;
+ pixel-clock-frequency = <96000000>;
+ };
+ };
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/mt9v032.txt b/Documentation/devicetree/bindings/media/i2c/mt9v032.txt
new file mode 100644
index 000000000..202565313
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/mt9v032.txt
@@ -0,0 +1,39 @@
+* Aptina 1/3-Inch WVGA CMOS Digital Image Sensor
+
+The Aptina MT9V032 is a 1/3-inch CMOS active pixel digital image sensor with
+an active array size of 752H x 480V. It is programmable through a simple
+two-wire serial interface.
+
+Required Properties:
+
+- compatible: value should be either one among the following
+ (a) "aptina,mt9v022" for MT9V022 color sensor
+ (b) "aptina,mt9v022m" for MT9V022 monochrome sensor
+ (c) "aptina,mt9v024" for MT9V024 color sensor
+ (d) "aptina,mt9v024m" for MT9V024 monochrome sensor
+ (e) "aptina,mt9v032" for MT9V032 color sensor
+ (f) "aptina,mt9v032m" for MT9V032 monochrome sensor
+ (g) "aptina,mt9v034" for MT9V034 color sensor
+ (h) "aptina,mt9v034m" for MT9V034 monochrome sensor
+
+Optional Properties:
+
+- link-frequencies: List of allowed link frequencies in Hz. Each frequency is
+ expressed as a 64-bit big-endian integer.
+
+For further reading on port node refer to
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+
+ mt9v032@5c {
+ compatible = "aptina,mt9v032";
+ reg = <0x5c>;
+
+ port {
+ mt9v032_out: endpoint {
+ link-frequencies = /bits/ 64
+ <13000000 26600000 27000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt b/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt
new file mode 100644
index 000000000..855e1faf7
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/nokia,smia.txt
@@ -0,0 +1,63 @@
+SMIA/SMIA++ sensor
+
+SMIA (Standard Mobile Imaging Architecture) is an image sensor standard
+defined jointly by Nokia and ST. SMIA++, defined by Nokia, is an extension
+of that. These definitions are valid for both types of sensors.
+
+More detailed documentation can be found in
+Documentation/devicetree/bindings/media/video-interfaces.txt .
+
+
+Mandatory properties
+--------------------
+
+- compatible: "nokia,smia"
+- reg: I2C address (0x10, or an alternative address)
+- vana-supply: Analogue voltage supply (VANA), typically 2,8 volts (sensor
+ dependent).
+- clocks: External clock to the sensor
+- clock-frequency: Frequency of the external clock to the sensor
+- link-frequencies: List of allowed data link frequencies. An array of
+ 64-bit elements.
+
+
+Optional properties
+-------------------
+
+- nokia,nvm-size: The size of the NVM, in bytes. If the size is not given,
+ the NVM contents will not be read.
+- reset-gpios: XSHUTDOWN GPIO
+
+
+Endpoint node mandatory properties
+----------------------------------
+
+- clock-lanes: <0>
+- data-lanes: <1..n>
+- remote-endpoint: A phandle to the bus receiver's endpoint node.
+
+
+Example
+-------
+
+&i2c2 {
+ clock-frequency = <400000>;
+
+ smiapp_1: camera@10 {
+ compatible = "nokia,smia";
+ reg = <0x10>;
+ reset-gpios = <&gpio3 20 0>;
+ vana-supply = <&vaux3>;
+ clocks = <&omap3_isp 0>;
+ clock-frequency = <9600000>;
+ nokia,nvm-size = <512>; /* 8 * 64 */
+ link-frequencies = /bits/ 64 <199200000 210000000 499200000>;
+ port {
+ smiapp_1_1: endpoint {
+ clock-lanes = <0>;
+ data-lanes = <1 2>;
+ remote-endpoint = <&csi2a_ep>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/media/i2c/ov2640.txt b/Documentation/devicetree/bindings/media/i2c/ov2640.txt
new file mode 100644
index 000000000..c429b5bdc
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/ov2640.txt
@@ -0,0 +1,46 @@
+* Omnivision OV2640 CMOS sensor
+
+The Omnivision OV2640 sensor support multiple resolutions output, such as
+CIF, SVGA, UXGA. It also can support YUV422/420, RGB565/555 or raw RGB
+output format.
+
+Required Properties:
+- compatible: should be "ovti,ov2640"
+- clocks: reference to the xvclk input clock.
+- clock-names: should be "xvclk".
+
+Optional Properties:
+- resetb-gpios: reference to the GPIO connected to the resetb pin, if any.
+- pwdn-gpios: reference to the GPIO connected to the pwdn pin, if any.
+
+The device node must contain one 'port' child node for its digital output
+video port, in accordance with the video interface bindings defined in
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+
+ i2c1: i2c@f0018000 {
+ ov2640: camera@0x30 {
+ compatible = "ovti,ov2640";
+ reg = <0x30>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pck1 &pinctrl_ov2640_pwdn &pinctrl_ov2640_resetb>;
+
+ resetb-gpios = <&pioE 24 GPIO_ACTIVE_LOW>;
+ pwdn-gpios = <&pioE 29 GPIO_ACTIVE_HIGH>;
+
+ clocks = <&pck1>;
+ clock-names = "xvclk";
+
+ assigned-clocks = <&pck1>;
+ assigned-clock-rates = <25000000>;
+
+ port {
+ ov2640_0: endpoint {
+ remote-endpoint = <&isi_0>;
+ bus-width = <8>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/ov2659.txt b/Documentation/devicetree/bindings/media/i2c/ov2659.txt
new file mode 100644
index 000000000..cabc7d827
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/ov2659.txt
@@ -0,0 +1,38 @@
+* OV2659 1/5-Inch 2Mp SOC Camera
+
+The Omnivision OV2659 is a 1/5-inch SOC camera, with an active array size of
+1632H x 1212V. It is programmable through a SCCB. The OV2659 sensor supports
+multiple resolutions output, such as UXGA, SVGA, 720p. It also can support
+YUV422, RGB565/555 or raw RGB output formats.
+
+Required Properties:
+- compatible: Must be "ovti,ov2659"
+- reg: I2C slave address
+- clocks: reference to the xvclk input clock.
+- clock-names: should be "xvclk".
+- link-frequencies: target pixel clock frequency.
+
+For further reading on port node refer to
+Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+
+ i2c0@1c22000 {
+ ...
+ ...
+ ov2659@30 {
+ compatible = "ovti,ov2659";
+ reg = <0x30>;
+
+ clocks = <&clk_ov2659 0>;
+ clock-names = "xvclk";
+
+ port {
+ ov2659_0: endpoint {
+ remote-endpoint = <&vpfe_ep>;
+ link-frequencies = /bits/ 64 <70000000>;
+ };
+ };
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/ths8200.txt b/Documentation/devicetree/bindings/media/i2c/ths8200.txt
new file mode 100644
index 000000000..285f6ae7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/ths8200.txt
@@ -0,0 +1,19 @@
+* Texas Instruments THS8200 video encoder
+
+The ths8200 device is a digital to analog converter used in DVD players, video
+recorders, set-top boxes.
+
+Required Properties :
+- compatible : value must be "ti,ths8200"
+
+Example:
+
+ i2c0@1c22000 {
+ ...
+ ...
+ ths8200@5c {
+ compatible = "ti,ths8200";
+ reg = <0x5c>;
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/tvp514x.txt b/Documentation/devicetree/bindings/media/i2c/tvp514x.txt
new file mode 100644
index 000000000..46752cc71
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/tvp514x.txt
@@ -0,0 +1,44 @@
+* Texas Instruments TVP514x video decoder
+
+The TVP5146/TVP5146m2/TVP5147/TVP5147m1 device is high quality, single-chip
+digital video decoder that digitizes and decodes all popular baseband analog
+video formats into digital video component. The tvp514x decoder supports analog-
+to-digital (A/D) conversion of component RGB and YPbPr signals as well as A/D
+conversion and decoding of NTSC, PAL and SECAM composite and S-video into
+component YCbCr.
+
+Required Properties :
+- compatible : value should be either one among the following
+ (a) "ti,tvp5146" for tvp5146 decoder.
+ (b) "ti,tvp5146m2" for tvp5146m2 decoder.
+ (c) "ti,tvp5147" for tvp5147 decoder.
+ (d) "ti,tvp5147m1" for tvp5147m1 decoder.
+
+- hsync-active: HSYNC Polarity configuration for endpoint.
+
+- vsync-active: VSYNC Polarity configuration for endpoint.
+
+- pclk-sample: Clock polarity of the endpoint.
+
+For further reading on port node refer to Documentation/devicetree/bindings/
+media/video-interfaces.txt.
+
+Example:
+
+ i2c0@1c22000 {
+ ...
+ ...
+ tvp514x@5c {
+ compatible = "ti,tvp5146";
+ reg = <0x5c>;
+
+ port {
+ tvp514x_1: endpoint {
+ hsync-active = <1>;
+ vsync-active = <1>;
+ pclk-sample = <0>;
+ };
+ };
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/tvp7002.txt b/Documentation/devicetree/bindings/media/i2c/tvp7002.txt
new file mode 100644
index 000000000..5f28b5d9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/i2c/tvp7002.txt
@@ -0,0 +1,53 @@
+* Texas Instruments TV7002 video decoder
+
+The TVP7002 device supports digitizing of video and graphics signal in RGB and
+YPbPr color space.
+
+Required Properties :
+- compatible : Must be "ti,tvp7002"
+
+Optional Properties:
+- hsync-active: HSYNC Polarity configuration for the bus. Default value when
+ this property is not specified is <0>.
+
+- vsync-active: VSYNC Polarity configuration for the bus. Default value when
+ this property is not specified is <0>.
+
+- pclk-sample: Clock polarity of the bus. Default value when this property is
+ not specified is <0>.
+
+- sync-on-green-active: Active state of Sync-on-green signal property of the
+ endpoint.
+ 0 = Normal Operation (Active Low, Default)
+ 1 = Inverted operation
+
+- field-even-active: Active-high Field ID output polarity control of the bus.
+ Under normal operation, the field ID output is set to logic 1 for an odd field
+ (field 1) and set to logic 0 for an even field (field 0).
+ 0 = Normal Operation (Active Low, Default)
+ 1 = FID output polarity inverted
+
+For further reading of port node refer Documentation/devicetree/bindings/media/
+video-interfaces.txt.
+
+Example:
+
+ i2c0@1c22000 {
+ ...
+ ...
+ tvp7002@5c {
+ compatible = "ti,tvp7002";
+ reg = <0x5c>;
+
+ port {
+ tvp7002_1: endpoint {
+ hsync-active = <1>;
+ vsync-active = <1>;
+ pclk-sample = <0>;
+ sync-on-green-active = <1>;
+ field-even-active = <0>;
+ };
+ };
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/media/img-ir-rev1.txt b/Documentation/devicetree/bindings/media/img-ir-rev1.txt
new file mode 100644
index 000000000..5434ce61b
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/img-ir-rev1.txt
@@ -0,0 +1,34 @@
+* ImgTec Infrared (IR) decoder version 1
+
+This binding is for Imagination Technologies' Infrared decoder block,
+specifically major revision 1.
+
+Required properties:
+- compatible: Should be "img,ir-rev1"
+- reg: Physical base address of the controller and length of
+ memory mapped region.
+- interrupts: The interrupt specifier to the cpu.
+
+Optional properties:
+- clocks: List of clock specifiers as described in standard
+ clock bindings.
+ Up to 3 clocks may be specified in the following order:
+ 1st: Core clock (defaults to 32.768KHz if omitted).
+ 2nd: System side (fast) clock.
+ 3rd: Power modulation clock.
+- clock-names: List of clock names corresponding to the clocks
+ specified in the clocks property.
+ Accepted clock names are:
+ "core": Core clock.
+ "sys": System clock.
+ "mod": Power modulation clock.
+
+Example:
+
+ ir@02006200 {
+ compatible = "img,ir-rev1";
+ reg = <0x02006200 0x100>;
+ interrupts = <29 4>;
+ clocks = <&clk_32khz>;
+ clock-names = "core";
+ };
diff --git a/Documentation/devicetree/bindings/media/meson-ir.txt b/Documentation/devicetree/bindings/media/meson-ir.txt
new file mode 100644
index 000000000..407848e85
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/meson-ir.txt
@@ -0,0 +1,14 @@
+* Amlogic Meson IR remote control receiver
+
+Required properties:
+ - compatible : should be "amlogic,meson6-ir"
+ - reg : physical base address and length of the device registers
+ - interrupts : a single specifier for the interrupt from the device
+
+Example:
+
+ ir-receiver@c8100480 {
+ compatible= "amlogic,meson6-ir";
+ reg = <0xc8100480 0x20>;
+ interrupts = <0 15 1>;
+ };
diff --git a/Documentation/devicetree/bindings/media/pxa-camera.txt b/Documentation/devicetree/bindings/media/pxa-camera.txt
new file mode 100644
index 000000000..11f5b5d51
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/pxa-camera.txt
@@ -0,0 +1,43 @@
+Marvell PXA camera host interface
+
+Required properties:
+ - compatible: Should be "marvell,pxa270-qci"
+ - reg: register base and size
+ - interrupts: the interrupt number
+ - any required generic properties defined in video-interfaces.txt
+
+Optional properties:
+ - clocks: input clock (see clock-bindings.txt)
+ - clock-output-names: should contain the name of the clock driving the
+ sensor master clock MCLK
+ - clock-frequency: host interface is driving MCLK, and MCLK rate is this rate
+
+Example:
+
+ pxa_camera: pxa_camera@50000000 {
+ compatible = "marvell,pxa270-qci";
+ reg = <0x50000000 0x1000>;
+ interrupts = <33>;
+
+ clocks = <&pxa2xx_clks 24>;
+ clock-names = "ciclk";
+ clock-frequency = <50000000>;
+ clock-output-names = "qci_mclk";
+
+ status = "okay";
+
+ port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* Parallel bus endpoint */
+ qci: endpoint@0 {
+ reg = <0>; /* Local endpoint # */
+ remote-endpoint = <&mt9m111_1>;
+ bus-width = <8>; /* Used data lines */
+ hsync-active = <0>; /* Active low */
+ vsync-active = <0>; /* Active low */
+ pclk-sample = <1>; /* Rising */
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt
new file mode 100644
index 000000000..9dafe6b06
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/rcar_vin.txt
@@ -0,0 +1,88 @@
+Renesas RCar Video Input driver (rcar_vin)
+------------------------------------------
+
+The rcar_vin device provides video input capabilities for the Renesas R-Car
+family of devices. The current blocks are always slaves and suppot one input
+channel which can be either RGB, YUYV or BT656.
+
+ - compatible: Must be one of the following
+ - "renesas,vin-r8a7794" for the R8A7794 device
+ - "renesas,vin-r8a7793" for the R8A7793 device
+ - "renesas,vin-r8a7791" for the R8A7791 device
+ - "renesas,vin-r8a7790" for the R8A7790 device
+ - "renesas,vin-r8a7779" for the R8A7779 device
+ - "renesas,vin-r8a7778" for the R8A7778 device
+ - reg: the register base and size for the device registers
+ - interrupts: the interrupt for the device
+ - clocks: Reference to the parent clock
+
+Additionally, an alias named vinX will need to be created to specify
+which video input device this is.
+
+The per-board settings:
+ - port sub-node describing a single endpoint connected to the vin
+ as described in video-interfaces.txt[1]. Only the first one will
+ be considered as each vin interface has one input port.
+
+ These settings are used to work out video input format and widths
+ into the system.
+
+
+Device node example
+-------------------
+
+ aliases {
+ vin0 = &vin0;
+ };
+
+ vin0: vin@0xe6ef0000 {
+ compatible = "renesas,vin-r8a7790";
+ clocks = <&mstp8_clks R8A7790_CLK_VIN0>;
+ reg = <0 0xe6ef0000 0 0x1000>;
+ interrupts = <0 188 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ };
+
+Board setup example (vin1 composite video input)
+------------------------------------------------
+
+&i2c2 {
+ status = "ok";
+ pinctrl-0 = <&i2c2_pins>;
+ pinctrl-names = "default";
+
+ adv7180@20 {
+ compatible = "adi,adv7180";
+ reg = <0x20>;
+ remote = <&vin1>;
+
+ port {
+ adv7180: endpoint {
+ bus-width = <8>;
+ remote-endpoint = <&vin1ep0>;
+ };
+ };
+ };
+};
+
+/* composite video input */
+&vin1 {
+ pinctrl-0 = <&vin1_pins>;
+ pinctrl-names = "default";
+
+ status = "ok";
+
+ port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vin1ep0: endpoint {
+ remote-endpoint = <&adv7180>;
+ bus-width = <8>;
+ };
+ };
+};
+
+
+
+[1] video-interfaces.txt common video media interface
diff --git a/Documentation/devicetree/bindings/media/renesas,vsp1.txt b/Documentation/devicetree/bindings/media/renesas,vsp1.txt
new file mode 100644
index 000000000..87fe08abf
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/renesas,vsp1.txt
@@ -0,0 +1,43 @@
+* Renesas VSP1 Video Processing Engine
+
+The VSP1 is a video processing engine that supports up-/down-scaling, alpha
+blending, color space conversion and various other image processing features.
+It can be found in the Renesas R-Car second generation SoCs.
+
+Required properties:
+
+ - compatible: Must contain "renesas,vsp1"
+
+ - reg: Base address and length of the registers block for the VSP1.
+ - interrupts: VSP1 interrupt specifier.
+ - clocks: A phandle + clock-specifier pair for the VSP1 functional clock.
+
+ - renesas,#rpf: Number of Read Pixel Formatter (RPF) modules in the VSP1.
+ - renesas,#uds: Number of Up Down Scaler (UDS) modules in the VSP1.
+ - renesas,#wpf: Number of Write Pixel Formatter (WPF) modules in the VSP1.
+
+
+Optional properties:
+
+ - renesas,has-lif: Boolean, indicates that the LCD Interface (LIF) module is
+ available.
+ - renesas,has-lut: Boolean, indicates that the Look Up Table (LUT) module is
+ available.
+ - renesas,has-sru: Boolean, indicates that the Super Resolution Unit (SRU)
+ module is available.
+
+
+Example: R8A7790 (R-Car H2) VSP1-S node
+
+ vsp1@fe928000 {
+ compatible = "renesas,vsp1";
+ reg = <0 0xfe928000 0 0x8000>;
+ interrupts = <0 267 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp1_clks R8A7790_CLK_VSP1_S>;
+
+ renesas,has-lut;
+ renesas,has-sru;
+ renesas,#rpf = <5>;
+ renesas,#uds = <3>;
+ renesas,#wpf = <4>;
+ };
diff --git a/Documentation/devicetree/bindings/media/s5p-mfc.txt b/Documentation/devicetree/bindings/media/s5p-mfc.txt
new file mode 100644
index 000000000..2d5787eac
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/s5p-mfc.txt
@@ -0,0 +1,51 @@
+* Samsung Multi Format Codec (MFC)
+
+Multi Format Codec (MFC) is the IP present in Samsung SoCs which
+supports high resolution decoding and encoding functionalities.
+The MFC device driver is a v4l2 driver which can encode/decode
+video raw/elementary streams and has support for all popular
+video codecs.
+
+Required properties:
+ - compatible : value should be either one among the following
+ (a) "samsung,mfc-v5" for MFC v5 present in Exynos4 SoCs
+ (b) "samsung,mfc-v6" for MFC v6 present in Exynos5 SoCs
+ (c) "samsung,mfc-v7" for MFC v7 present in Exynos5420 SoC
+ (d) "samsung,mfc-v8" for MFC v8 present in Exynos5800 SoC
+
+ - reg : Physical base address of the IP registers and length of memory
+ mapped region.
+
+ - interrupts : MFC interrupt number to the CPU.
+ - clocks : from common clock binding: handle to mfc clock.
+ - clock-names : from common clock binding: must contain "mfc",
+ corresponding to entry in the clocks property.
+
+ - samsung,mfc-r : Base address of the first memory bank used by MFC
+ for DMA contiguous memory allocation and its size.
+
+ - samsung,mfc-l : Base address of the second memory bank used by MFC
+ for DMA contiguous memory allocation and its size.
+
+Optional properties:
+ - power-domains : power-domain property defined with a phandle
+ to respective power domain.
+
+Example:
+SoC specific DT entry:
+
+mfc: codec@13400000 {
+ compatible = "samsung,mfc-v5";
+ reg = <0x13400000 0x10000>;
+ interrupts = <0 94 0>;
+ power-domains = <&pd_mfc>;
+ clocks = <&clock 273>;
+ clock-names = "mfc";
+};
+
+Board specific DT entry:
+
+codec@13400000 {
+ samsung,mfc-r = <0x43000000 0x800000>;
+ samsung,mfc-l = <0x51000000 0x800000>;
+};
diff --git a/Documentation/devicetree/bindings/media/samsung-fimc.txt b/Documentation/devicetree/bindings/media/samsung-fimc.txt
new file mode 100644
index 000000000..922d6f8e7
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/samsung-fimc.txt
@@ -0,0 +1,211 @@
+Samsung S5P/EXYNOS SoC Camera Subsystem (FIMC)
+----------------------------------------------
+
+The S5P/Exynos SoC Camera subsystem comprises of multiple sub-devices
+represented by separate device tree nodes. Currently this includes: FIMC (in
+the S5P SoCs series known as CAMIF), MIPI CSIS, FIMC-LITE and FIMC-IS (ISP).
+
+The sub-subdevices are defined as child nodes of the common 'camera' node which
+also includes common properties of the whole subsystem not really specific to
+any single sub-device, like common camera port pins or the CAMCLK clock outputs
+for external image sensors attached to an SoC.
+
+Common 'camera' node
+--------------------
+
+Required properties:
+
+- compatible: must be "samsung,fimc", "simple-bus"
+- clocks: list of clock specifiers, corresponding to entries in
+ the clock-names property;
+- clock-names : must contain "sclk_cam0", "sclk_cam1", "pxl_async0",
+ "pxl_async1" entries, matching entries in the clocks property.
+
+- #clock-cells: from the common clock bindings (../clock/clock-bindings.txt),
+ must be 1. A clock provider is associated with the 'camera' node and it should
+ be referenced by external sensors that use clocks provided by the SoC on
+ CAM_*_CLKOUT pins. The clock specifier cell stores an index of a clock.
+ The indices are 0, 1 for CAM_A_CLKOUT, CAM_B_CLKOUT clocks respectively.
+
+- clock-output-names: from the common clock bindings, should contain names of
+ clocks registered by the camera subsystem corresponding to CAM_A_CLKOUT,
+ CAM_B_CLKOUT output clocks respectively.
+
+The pinctrl bindings defined in ../pinctrl/pinctrl-bindings.txt must be used
+to define a required pinctrl state named "default" and optional pinctrl states:
+"idle", "active-a", active-b". These optional states can be used to switch the
+camera port pinmux at runtime. The "idle" state should configure both the camera
+ports A and B into high impedance state, especially the CAMCLK clock output
+should be inactive. For the "active-a" state the camera port A must be activated
+and the port B deactivated and for the state "active-b" it should be the other
+way around.
+
+The 'camera' node must include at least one 'fimc' child node.
+
+
+'fimc' device nodes
+-------------------
+
+Required properties:
+
+- compatible: "samsung,s5pv210-fimc" for S5PV210, "samsung,exynos4210-fimc"
+ for Exynos4210 and "samsung,exynos4212-fimc" for Exynos4x12 SoCs;
+- reg: physical base address and length of the registers set for the device;
+- interrupts: should contain FIMC interrupt;
+- clocks: list of clock specifiers, must contain an entry for each required
+ entry in clock-names;
+- clock-names: must contain "fimc", "sclk_fimc" entries.
+- samsung,pix-limits: an array of maximum supported image sizes in pixels, for
+ details refer to Table 2-1 in the S5PV210 SoC User Manual; The meaning of
+ each cell is as follows:
+ 0 - scaler input horizontal size,
+ 1 - input horizontal size for the scaler bypassed,
+ 2 - REAL_WIDTH without input rotation,
+ 3 - REAL_HEIGHT with input rotation,
+- samsung,sysreg: a phandle to the SYSREG node.
+
+Each FIMC device should have an alias in the aliases node, in the form of
+fimc<n>, where <n> is an integer specifying the IP block instance.
+
+Optional properties:
+
+- clock-frequency: maximum FIMC local clock (LCLK) frequency;
+- samsung,min-pix-sizes: an array specyfing minimum image size in pixels at
+ the FIMC input and output DMA, in the first and second cell respectively.
+ Default value when this property is not present is <16 16>;
+- samsung,min-pix-alignment: minimum supported image height alignment (first
+ cell) and the horizontal image offset (second cell). The values are in pixels
+ and default to <2 1> when this property is not present;
+- samsung,mainscaler-ext: a boolean property indicating whether the FIMC IP
+ supports extended image size and has CIEXTEN register;
+- samsung,rotators: a bitmask specifying whether this IP has the input and
+ the output rotator. Bits 4 and 0 correspond to input and output rotator
+ respectively. If a rotator is present its corresponding bit should be set.
+ Default value when this property is not specified is 0x11.
+- samsung,cam-if: a bolean property indicating whether the IP block includes
+ the camera input interface.
+- samsung,isp-wb: this property must be present if the IP block has the ISP
+ writeback input.
+- samsung,lcd-wb: this property must be present if the IP block has the LCD
+ writeback input.
+
+
+'parallel-ports' node
+---------------------
+
+This node should contain child 'port' nodes specifying active parallel video
+input ports. It includes camera A and camera B inputs. 'reg' property in the
+port nodes specifies data input - 0, 1 indicates input A, B respectively.
+
+Optional properties
+
+- samsung,camclk-out (deprecated) : specifies clock output for remote sensor,
+ 0 - CAM_A_CLKOUT, 1 - CAM_B_CLKOUT;
+
+Image sensor nodes
+------------------
+
+The sensor device nodes should be added to their control bus controller (e.g.
+I2C0) nodes and linked to a port node in the csis or the parallel-ports node,
+using the common video interfaces bindings, defined in video-interfaces.txt.
+
+Example:
+
+ aliases {
+ fimc0 = &fimc_0;
+ };
+
+ /* Parallel bus IF sensor */
+ i2c_0: i2c@13860000 {
+ s5k6aa: sensor@3c {
+ compatible = "samsung,s5k6aafx";
+ reg = <0x3c>;
+ vddio-supply = <...>;
+
+ clock-frequency = <24000000>;
+ clocks = <&camera 1>;
+ clock-names = "mclk";
+
+ port {
+ s5k6aa_ep: endpoint {
+ remote-endpoint = <&fimc0_ep>;
+ bus-width = <8>;
+ hsync-active = <0>;
+ vsync-active = <1>;
+ pclk-sample = <1>;
+ };
+ };
+ };
+
+ /* MIPI CSI-2 bus IF sensor */
+ s5c73m3: sensor@0x1a {
+ compatible = "samsung,s5c73m3";
+ reg = <0x1a>;
+ vddio-supply = <...>;
+
+ clock-frequency = <24000000>;
+ clocks = <&camera 0>;
+ clock-names = "mclk";
+
+ port {
+ s5c73m3_1: endpoint {
+ data-lanes = <1 2 3 4>;
+ remote-endpoint = <&csis0_ep>;
+ };
+ };
+ };
+ };
+
+ camera {
+ compatible = "samsung,fimc", "simple-bus";
+ clocks = <&clock 132>, <&clock 133>, <&clock 351>,
+ <&clock 352>;
+ clock-names = "sclk_cam0", "sclk_cam1", "pxl_async0",
+ "pxl_async1";
+ #clock-cells = <1>;
+ clock-output-names = "cam_a_clkout", "cam_b_clkout";
+ pinctrl-names = "default";
+ pinctrl-0 = <&cam_port_a_clk_active>;
+ status = "okay";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* parallel camera ports */
+ parallel-ports {
+ /* camera A input */
+ port@0 {
+ reg = <0>;
+ fimc0_ep: endpoint {
+ remote-endpoint = <&s5k6aa_ep>;
+ bus-width = <8>;
+ hsync-active = <0>;
+ vsync-active = <1>;
+ pclk-sample = <1>;
+ };
+ };
+ };
+
+ fimc_0: fimc@11800000 {
+ compatible = "samsung,exynos4210-fimc";
+ reg = <0x11800000 0x1000>;
+ interrupts = <0 85 0>;
+ status = "okay";
+ };
+
+ csis_0: csis@11880000 {
+ compatible = "samsung,exynos4210-csis";
+ reg = <0x11880000 0x1000>;
+ interrupts = <0 78 0>;
+ /* camera C input */
+ port@3 {
+ reg = <3>;
+ csis0_ep: endpoint {
+ remote-endpoint = <&s5c73m3_ep>;
+ data-lanes = <1 2 3 4>;
+ samsung,csis-hs-settle = <12>;
+ };
+ };
+ };
+ };
+
+The MIPI-CSIS device binding is defined in samsung-mipi-csis.txt.
diff --git a/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt b/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt
new file mode 100644
index 000000000..be45f0b1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/samsung-mipi-csis.txt
@@ -0,0 +1,81 @@
+Samsung S5P/EXYNOS SoC series MIPI CSI-2 receiver (MIPI CSIS)
+-------------------------------------------------------------
+
+Required properties:
+
+- compatible : "samsung,s5pv210-csis" for S5PV210 (S5PC110),
+ "samsung,exynos4210-csis" for Exynos4210 (S5PC210),
+ "samsung,exynos4212-csis" for Exynos4212/Exynos4412,
+ "samsung,exynos5250-csis" for Exynos5250;
+- reg : offset and length of the register set for the device;
+- interrupts : should contain MIPI CSIS interrupt; the format of the
+ interrupt specifier depends on the interrupt controller;
+- bus-width : maximum number of data lanes supported (SoC specific);
+- vddio-supply : MIPI CSIS I/O and PLL voltage supply (e.g. 1.8V);
+- vddcore-supply : MIPI CSIS Core voltage supply (e.g. 1.1V);
+- clocks : list of clock specifiers, corresponding to entries in
+ clock-names property;
+- clock-names : must contain "csis", "sclk_csis" entries, matching entries
+ in the clocks property.
+
+Optional properties:
+
+- clock-frequency : The IP's main (system bus) clock frequency in Hz, default
+ value when this property is not specified is 166 MHz;
+- samsung,csis-wclk : CSI-2 wrapper clock selection. If this property is present
+ external clock from CMU will be used, or the bus clock if
+ if it's not specified.
+
+The device node should contain one 'port' child node with one child 'endpoint'
+node, according to the bindings defined in Documentation/devicetree/bindings/
+media/video-interfaces.txt. The following are properties specific to those nodes.
+
+port node
+---------
+
+- reg : (required) must be 3 for camera C input (CSIS0) or 4 for
+ camera D input (CSIS1);
+
+endpoint node
+-------------
+
+- data-lanes : (required) an array specifying active physical MIPI-CSI2
+ data input lanes and their mapping to logical lanes; the
+ array's content is unused, only its length is meaningful;
+
+- samsung,csis-hs-settle : (optional) differential receiver (HS-RX) settle time;
+
+
+Example:
+
+ reg0: regulator@0 {
+ };
+
+ reg1: regulator@1 {
+ };
+
+/* SoC properties */
+
+ csis_0: csis@11880000 {
+ compatible = "samsung,exynos4210-csis";
+ reg = <0x11880000 0x1000>;
+ interrupts = <0 78 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+/* Board properties */
+
+ csis_0: csis@11880000 {
+ clock-frequency = <166000000>;
+ vddio-supply = <&reg0>;
+ vddcore-supply = <&reg1>;
+ port {
+ reg = <3>; /* 3 - CSIS0, 4 - CSIS1 */
+ csis0_ep: endpoint {
+ remote-endpoint = <...>;
+ data-lanes = <1>, <2>;
+ samsung,csis-hs-settle = <12>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/samsung-s5c73m3.txt b/Documentation/devicetree/bindings/media/samsung-s5c73m3.txt
new file mode 100644
index 000000000..2c85c4538
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/samsung-s5c73m3.txt
@@ -0,0 +1,97 @@
+Samsung S5C73M3 8Mp camera ISP
+------------------------------
+
+The S5C73M3 camera ISP supports MIPI CSI-2 and parallel (ITU-R BT.656) video
+data busses. The I2C bus is the main control bus and additionally the SPI bus
+is used, mostly for transferring the firmware to and from the device. Two
+slave device nodes corresponding to these control bus interfaces are required
+and should be placed under respective bus controller nodes.
+
+I2C slave device node
+---------------------
+
+Required properties:
+
+- compatible : "samsung,s5c73m3";
+- reg : I2C slave address of the sensor;
+- vdd-int-supply : digital power supply (1.2V);
+- vdda-supply : analog power supply (1.2V);
+- vdd-reg-supply : regulator input power supply (2.8V);
+- vddio-host-supply : host I/O power supply (1.8V to 2.8V);
+- vddio-cis-supply : CIS I/O power supply (1.2V to 1.8V);
+- vdd-af-supply : lens power supply (2.8V);
+- xshutdown-gpios : specifier of GPIO connected to the XSHUTDOWN pin;
+- standby-gpios : specifier of GPIO connected to the STANDBY pin;
+- clocks : should contain list of phandle and clock specifier pairs
+ according to common clock bindings for the clocks described
+ in the clock-names property;
+- clock-names : should contain "cis_extclk" entry for the CIS_EXTCLK clock;
+
+Optional properties:
+
+- clock-frequency : the frequency at which the "cis_extclk" clock should be
+ configured to operate, in Hz; if this property is not
+ specified default 24 MHz value will be used.
+
+The common video interfaces bindings (see video-interfaces.txt) should be used
+to specify link from the S5C73M3 to an external image data receiver. The S5C73M3
+device node should contain one 'port' child node with an 'endpoint' subnode for
+this purpose. The data link from a raw image sensor to the S5C73M3 can be
+similarly specified, but it is optional since the S5C73M3 ISP and a raw image
+sensor are usually inseparable and form a hybrid module.
+
+Following properties are valid for the endpoint node(s):
+
+endpoint subnode
+----------------
+
+- data-lanes : (optional) specifies MIPI CSI-2 data lanes as covered in
+ video-interfaces.txt. This sensor doesn't support data lane remapping
+ and physical lane indexes in subsequent elements of the array should
+ be only consecutive ascending values.
+
+SPI device node
+---------------
+
+Required properties:
+
+- compatible : "samsung,s5c73m3";
+
+For more details see description of the SPI busses bindings
+(../spi/spi-bus.txt) and bindings of a specific bus controller.
+
+Example:
+
+i2c@138A000000 {
+ ...
+ s5c73m3@3c {
+ compatible = "samsung,s5c73m3";
+ reg = <0x3c>;
+ vdd-int-supply = <&buck9_reg>;
+ vdda-supply = <&ldo17_reg>;
+ vdd-reg-supply = <&cam_io_reg>;
+ vddio-host-supply = <&ldo18_reg>;
+ vddio-cis-supply = <&ldo9_reg>;
+ vdd-af-supply = <&cam_af_reg>;
+ clock-frequency = <24000000>;
+ clocks = <&clk 0>;
+ clock-names = "cis_extclk";
+ reset-gpios = <&gpf1 3 1>;
+ standby-gpios = <&gpm0 1 1>;
+ port {
+ s5c73m3_ep: endpoint {
+ remote-endpoint = <&csis0_ep>;
+ data-lanes = <1 2 3 4>;
+ };
+ };
+ };
+};
+
+spi@1392000 {
+ ...
+ s5c73m3_spi: s5c73m3@0 {
+ compatible = "samsung,s5c73m3";
+ reg = <0>;
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/media/samsung-s5k5baf.txt b/Documentation/devicetree/bindings/media/samsung-s5k5baf.txt
new file mode 100644
index 000000000..1f51e0439
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/samsung-s5k5baf.txt
@@ -0,0 +1,58 @@
+Samsung S5K5BAF UXGA 1/5" 2M CMOS Image Sensor with embedded SoC ISP
+--------------------------------------------------------------------
+
+Required properties:
+
+- compatible : "samsung,s5k5baf";
+- reg : I2C slave address of the sensor;
+- vdda-supply : analog power supply 2.8V (2.6V to 3.0V);
+- vddreg-supply : regulator input power supply 1.8V (1.7V to 1.9V)
+ or 2.8V (2.6V to 3.0);
+- vddio-supply : I/O power supply 1.8V (1.65V to 1.95V)
+ or 2.8V (2.5V to 3.1V);
+- stbyn-gpios : GPIO connected to STDBYN pin;
+- rstn-gpios : GPIO connected to RSTN pin;
+- clocks : list of phandle and clock specifier pairs
+ according to common clock bindings for the
+ clocks described in clock-names;
+- clock-names : should include "mclk" for the sensor's master clock;
+
+Optional properties:
+
+- clock-frequency : the frequency at which the "mclk" clock should be
+ configured to operate, in Hz; if this property is not
+ specified default 24 MHz value will be used.
+
+The device node should contain one 'port' child node with one child 'endpoint'
+node, according to the bindings defined in Documentation/devicetree/bindings/
+media/video-interfaces.txt. The following are properties specific to those
+nodes.
+
+endpoint node
+-------------
+
+- data-lanes : (optional) specifies MIPI CSI-2 data lanes as covered in
+ video-interfaces.txt. If present it should be <1> - the device
+ supports only one data lane without re-mapping.
+
+Example:
+
+s5k5bafx@2d {
+ compatible = "samsung,s5k5baf";
+ reg = <0x2d>;
+ vdda-supply = <&cam_io_en_reg>;
+ vddreg-supply = <&vt_core_15v_reg>;
+ vddio-supply = <&vtcam_reg>;
+ stbyn-gpios = <&gpl2 0 1>;
+ rstn-gpios = <&gpl2 1 1>;
+ clock-names = "mclk";
+ clocks = <&clock_cam 0>;
+ clock-frequency = <24000000>;
+
+ port {
+ s5k5bafx_ep: endpoint {
+ remote-endpoint = <&csis1_ep>;
+ data-lanes = <1>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/media/samsung-s5k6a3.txt b/Documentation/devicetree/bindings/media/samsung-s5k6a3.txt
new file mode 100644
index 000000000..cce01e82f
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/samsung-s5k6a3.txt
@@ -0,0 +1,33 @@
+Samsung S5K6A3(YX) raw image sensor
+---------------------------------
+
+S5K6A3(YX) is a raw image sensor with MIPI CSI-2 and CCP2 image data interfaces
+and CCI (I2C compatible) control bus.
+
+Required properties:
+
+- compatible : "samsung,s5k6a3";
+- reg : I2C slave address of the sensor;
+- svdda-supply : core voltage supply;
+- svddio-supply : I/O voltage supply;
+- afvdd-supply : AF (actuator) voltage supply;
+- gpios : specifier of a GPIO connected to the RESET pin;
+- clocks : should contain list of phandle and clock specifier pairs
+ according to common clock bindings for the clocks described
+ in the clock-names property;
+- clock-names : should contain "extclk" entry for the sensor's EXTCLK clock;
+
+Optional properties:
+
+- clock-frequency : the frequency at which the "extclk" clock should be
+ configured to operate, in Hz; if this property is not
+ specified default 24 MHz value will be used.
+
+The common video interfaces bindings (see video-interfaces.txt) should be
+used to specify link to the image data receiver. The S5K6A3(YX) device
+node should contain one 'port' child node with an 'endpoint' subnode.
+
+Following properties are valid for the endpoint node:
+
+- data-lanes : (optional) specifies MIPI CSI-2 data lanes as covered in
+ video-interfaces.txt. The sensor supports only one data lane.
diff --git a/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
new file mode 100644
index 000000000..1ce4e46bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/sh_mobile_ceu.txt
@@ -0,0 +1,18 @@
+Bindings, specific for the sh_mobile_ceu_camera.c driver:
+ - compatible: Should be "renesas,sh-mobile-ceu"
+ - reg: register base and size
+ - interrupts: the interrupt number
+ - interrupt-parent: the interrupt controller
+ - renesas,max-width: maximum image width, supported on this SoC
+ - renesas,max-height: maximum image height, supported on this SoC
+
+Example:
+
+ceu0: ceu@0xfe910000 {
+ compatible = "renesas,sh-mobile-ceu";
+ reg = <0xfe910000 0xa0>;
+ interrupt-parent = <&intcs>;
+ interrupts = <0x880>;
+ renesas,max-width = <8188>;
+ renesas,max-height = <8188>;
+};
diff --git a/Documentation/devicetree/bindings/media/si4713.txt b/Documentation/devicetree/bindings/media/si4713.txt
new file mode 100644
index 000000000..5ee5552d3
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/si4713.txt
@@ -0,0 +1,30 @@
+* Silicon Labs FM Radio transmitter
+
+The Silicon Labs Si4713 is an FM radio transmitter with receive power scan
+supporting 76-108 MHz. It includes an RDS encoder and has both, a stereo-analog
+and a digital interface, which supports I2S, left-justified and a custom
+DSP-mode format. It is programmable through an I2C interface.
+
+Required Properties:
+- compatible: Should contain "silabs,si4713"
+- reg: the I2C address of the device
+
+Optional Properties:
+- interrupts-extended: Interrupt specifier for the chips interrupt
+- reset-gpios: GPIO specifier for the chips reset line
+- vdd-supply: phandle for Vdd regulator
+- vio-supply: phandle for Vio regulator
+
+Example:
+
+&i2c2 {
+ fmtx: si4713@63 {
+ compatible = "silabs,si4713";
+ reg = <0x63>;
+
+ interrupts-extended = <&gpio2 21 IRQ_TYPE_EDGE_FALLING>; /* 53 */
+ reset-gpios = <&gpio6 3 GPIO_ACTIVE_HIGH>; /* 163 */
+ vio-supply = <&vio>;
+ vdd-supply = <&vaux1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/media/st-rc.txt b/Documentation/devicetree/bindings/media/st-rc.txt
new file mode 100644
index 000000000..05c432d08
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/st-rc.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for ST IRB IP
+
+Required properties:
+ - compatible: Should contain "st,comms-irb".
+ - reg: Base physical address of the controller and length of memory
+ mapped region.
+ - interrupts: interrupt-specifier for the sole interrupt generated by
+ the device. The interrupt specifier format depends on the interrupt
+ controller parent.
+ - rx-mode: can be "infrared" or "uhf". This property specifies the L1
+ protocol used for receiving remote control signals. rx-mode should
+ be present iff the rx pins are wired up.
+ - tx-mode: should be "infrared". This property specifies the L1
+ protocol used for transmitting remote control signals. tx-mode should
+ be present iff the tx pins are wired up.
+
+Optional properties:
+ - pinctrl-names, pinctrl-0: the pincontrol settings to configure muxing
+ properly for IRB pins.
+ - clocks : phandle with clock-specifier pair for IRB.
+
+Example node:
+
+ rc: rc@fe518000 {
+ compatible = "st,comms-irb";
+ reg = <0xfe518000 0x234>;
+ interrupts = <0 203 0>;
+ rx-mode = "infrared";
+ };
diff --git a/Documentation/devicetree/bindings/media/sunxi-ir.txt b/Documentation/devicetree/bindings/media/sunxi-ir.txt
new file mode 100644
index 000000000..1811a067c
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/sunxi-ir.txt
@@ -0,0 +1,25 @@
+Device-Tree bindings for SUNXI IR controller found in sunXi SoC family
+
+Required properties:
+- compatible : "allwinner,sun4i-a10-ir" or "allwinner,sun5i-a13-ir"
+- clocks : list of clock specifiers, corresponding to
+ entries in clock-names property;
+- clock-names : should contain "apb" and "ir" entries;
+- interrupts : should contain IR IRQ number;
+- reg : should contain IO map address for IR.
+
+Optional properties:
+- linux,rc-map-name : Remote control map name.
+- resets : phandle + reset specifier pair
+
+Example:
+
+ir0: ir@01c21800 {
+ compatible = "allwinner,sun4i-a10-ir";
+ clocks = <&apb0_gates 6>, <&ir0_clk>;
+ clock-names = "apb", "ir";
+ resets = <&apb0_rst 1>;
+ interrupts = <0 5 1>;
+ reg = <0x01C21800 0x40>;
+ linux,rc-map-name = "rc-rc6-mce";
+};
diff --git a/Documentation/devicetree/bindings/media/ti,omap3isp.txt b/Documentation/devicetree/bindings/media/ti,omap3isp.txt
new file mode 100644
index 000000000..ac23de855
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/ti,omap3isp.txt
@@ -0,0 +1,71 @@
+OMAP 3 ISP Device Tree bindings
+===============================
+
+The DT definitions can be found in include/dt-bindings/media/omap3-isp.h.
+
+Required properties
+===================
+
+compatible : must contain "ti,omap3-isp"
+
+reg : the two registers sets (physical address and length) for the
+ ISP. The first set contains the core ISP registers up to
+ the end of the SBL block. The second set contains the
+ CSI PHYs and receivers registers.
+interrupts : the ISP interrupt specifier
+iommus : phandle and IOMMU specifier for the IOMMU that serves the ISP
+syscon : the phandle and register offset to the Complex I/O or CSI-PHY
+ register
+ti,phy-type : 0 -- OMAP3ISP_PHY_TYPE_COMPLEX_IO (e.g. 3430)
+ 1 -- OMAP3ISP_PHY_TYPE_CSIPHY (e.g. 3630)
+#clock-cells : Must be 1 --- the ISP provides two external clocks,
+ cam_xclka and cam_xclkb, at indices 0 and 1,
+ respectively. Please find more information on common
+ clock bindings in ../clock/clock-bindings.txt.
+
+Port nodes (optional)
+---------------------
+
+More documentation on these bindings is available in
+video-interfaces.txt in the same directory.
+
+reg : The interface:
+ 0 - parallel (CCDC)
+ 1 - CSIPHY1 -- CSI2C / CCP2B on 3630;
+ CSI1 -- CSIb on 3430
+ 2 - CSIPHY2 -- CSI2A / CCP2B on 3630;
+ CSI2 -- CSIa on 3430
+
+Optional properties
+===================
+
+vdd-csiphy1-supply : voltage supply of the CSI-2 PHY 1
+vdd-csiphy2-supply : voltage supply of the CSI-2 PHY 2
+
+Endpoint nodes
+--------------
+
+lane-polarities : lane polarity (required on CSI-2)
+ 0 -- not inverted; 1 -- inverted
+data-lanes : an array of data lanes from 1 to 3. The length can
+ be either 1 or 2. (required on CSI-2)
+clock-lanes : the clock lane (from 1 to 3). (required on CSI-2)
+
+
+Example
+=======
+
+ isp@480bc000 {
+ compatible = "ti,omap3-isp";
+ reg = <0x480bc000 0x12fc
+ 0x480bd800 0x0600>;
+ interrupts = <24>;
+ iommus = <&mmu_isp>;
+ syscon = <&scm_conf 0x2f0>;
+ ti,phy-type = <OMAP3ISP_PHY_TYPE_CSIPHY>;
+ #clock-cells = <1>;
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt b/Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt
new file mode 100644
index 000000000..3932e7665
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/ti-am437x-vpfe.txt
@@ -0,0 +1,61 @@
+Texas Instruments AM437x CAMERA (VPFE)
+--------------------------------------
+
+The Video Processing Front End (VPFE) is a key component for image capture
+applications. The capture module provides the system interface and the
+processing capability to connect RAW image-sensor modules and video decoders
+to the AM437x device.
+
+Required properties:
+- compatible: must be "ti,am437x-vpfe"
+- reg: physical base address and length of the registers set for the device;
+- interrupts: should contain IRQ line for the VPFE;
+- ti,am437x-vpfe-interface: can be one of the following,
+ 0 - Raw Bayer Interface.
+ 1 - 8 Bit BT656 Interface.
+ 2 - 10 Bit BT656 Interface.
+ 3 - YCbCr 8 Bit Interface.
+ 4 - YCbCr 16 Bit Interface.
+
+VPFE supports a single port node with parallel bus. It should contain one
+'port' child node with child 'endpoint' node. Please refer to the bindings
+defined in Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+ vpfe: vpfe@f0034000 {
+ compatible = "ti,am437x-vpfe";
+ reg = <0x48328000 0x2000>;
+ interrupts = <GIC_SPI 50 IRQ_TYPE_LEVEL_HIGH>;
+
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&vpfe_pins_default>;
+ pinctrl-1 = <&vpfe_pins_sleep>;
+
+ port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vpfe0_ep: endpoint {
+ remote-endpoint = <&ov2659_1>;
+ ti,am437x-vpfe-interface = <0>;
+ bus-width = <8>;
+ hsync-active = <0>;
+ vsync-active = <0>;
+ };
+ };
+ };
+
+ i2c1: i2c@4802a000 {
+
+ ov2659@30 {
+ compatible = "ti,ov2659";
+ reg = <0x30>;
+
+ port {
+ ov2659_1: endpoint {
+ remote-endpoint = <&vpfe0_ep>;
+ bus-width = <8>;
+ mclk-frequency = <12000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/video-interfaces.txt b/Documentation/devicetree/bindings/media/video-interfaces.txt
new file mode 100644
index 000000000..9cd2a3691
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/video-interfaces.txt
@@ -0,0 +1,239 @@
+Common bindings for video receiver and transmitter interfaces
+
+General concept
+---------------
+
+Video data pipelines usually consist of external devices, e.g. camera sensors,
+controlled over an I2C, SPI or UART bus, and SoC internal IP blocks, including
+video DMA engines and video data processors.
+
+SoC internal blocks are described by DT nodes, placed similarly to other SoC
+blocks. External devices are represented as child nodes of their respective
+bus controller nodes, e.g. I2C.
+
+Data interfaces on all video devices are described by their child 'port' nodes.
+Configuration of a port depends on other devices participating in the data
+transfer and is described by 'endpoint' subnodes.
+
+device {
+ ...
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ ...
+ endpoint@0 { ... };
+ endpoint@1 { ... };
+ };
+ port@1 { ... };
+ };
+};
+
+If a port can be configured to work with more than one remote device on the same
+bus, an 'endpoint' child node must be provided for each of them. If more than
+one port is present in a device node or there is more than one endpoint at a
+port, or port node needs to be associated with a selected hardware interface,
+a common scheme using '#address-cells', '#size-cells' and 'reg' properties is
+used.
+
+All 'port' nodes can be grouped under optional 'ports' node, which allows to
+specify #address-cells, #size-cells properties independently for the 'port'
+and 'endpoint' nodes and any child device nodes a device might have.
+
+Two 'endpoint' nodes are linked with each other through their 'remote-endpoint'
+phandles. An endpoint subnode of a device contains all properties needed for
+configuration of this device for data exchange with other device. In most
+cases properties at the peer 'endpoint' nodes will be identical, however they
+might need to be different when there is any signal modifications on the bus
+between two devices, e.g. there are logic signal inverters on the lines.
+
+It is allowed for multiple endpoints at a port to be active simultaneously,
+where supported by a device. For example, in case where a data interface of
+a device is partitioned into multiple data busses, e.g. 16-bit input port
+divided into two separate ITU-R BT.656 8-bit busses. In such case bus-width
+and data-shift properties can be used to assign physical data lines to each
+endpoint node (logical bus).
+
+
+Required properties
+-------------------
+
+If there is more than one 'port' or more than one 'endpoint' node or 'reg'
+property is present in port and/or endpoint nodes the following properties
+are required in a relevant parent node:
+
+ - #address-cells : number of cells required to define port/endpoint
+ identifier, should be 1.
+ - #size-cells : should be zero.
+
+Optional endpoint properties
+----------------------------
+
+- remote-endpoint: phandle to an 'endpoint' subnode of a remote device node.
+- slave-mode: a boolean property indicating that the link is run in slave mode.
+ The default when this property is not specified is master mode. In the slave
+ mode horizontal and vertical synchronization signals are provided to the
+ slave device (data source) by the master device (data sink). In the master
+ mode the data source device is also the source of the synchronization signals.
+- bus-width: number of data lines actively used, valid for the parallel busses.
+- data-shift: on the parallel data busses, if bus-width is used to specify the
+ number of data lines, data-shift can be used to specify which data lines are
+ used, e.g. "bus-width=<8>; data-shift=<2>;" means, that lines 9:2 are used.
+- hsync-active: active state of the HSYNC signal, 0/1 for LOW/HIGH respectively.
+- vsync-active: active state of the VSYNC signal, 0/1 for LOW/HIGH respectively.
+ Note, that if HSYNC and VSYNC polarities are not specified, embedded
+ synchronization may be required, where supported.
+- data-active: similar to HSYNC and VSYNC, specifies data line polarity.
+- field-even-active: field signal level during the even field data transmission.
+- pclk-sample: sample data on rising (1) or falling (0) edge of the pixel clock
+ signal.
+- sync-on-green-active: active state of Sync-on-green (SoG) signal, 0/1 for
+ LOW/HIGH respectively.
+- data-lanes: an array of physical data lane indexes. Position of an entry
+ determines the logical lane number, while the value of an entry indicates
+ physical lane, e.g. for 2-lane MIPI CSI-2 bus we could have
+ "data-lanes = <1 2>;", assuming the clock lane is on hardware lane 0.
+ This property is valid for serial busses only (e.g. MIPI CSI-2).
+- clock-lanes: an array of physical clock lane indexes. Position of an entry
+ determines the logical lane number, while the value of an entry indicates
+ physical lane, e.g. for a MIPI CSI-2 bus we could have "clock-lanes = <0>;",
+ which places the clock lane on hardware lane 0. This property is valid for
+ serial busses only (e.g. MIPI CSI-2). Note that for the MIPI CSI-2 bus this
+ array contains only one entry.
+- clock-noncontinuous: a boolean property to allow MIPI CSI-2 non-continuous
+ clock mode.
+- link-frequencies: Allowed data bus frequencies. For MIPI CSI-2, for
+ instance, this is the actual frequency of the bus, not bits per clock per
+ lane value. An array of 64-bit unsigned integers.
+- lane-polarities: an array of polarities of the lanes starting from the clock
+ lane and followed by the data lanes in the same order as in data-lanes.
+ Valid values are 0 (normal) and 1 (inverted). The length of the array
+ should be the combined length of data-lanes and clock-lanes properties.
+ If the lane-polarities property is omitted, the value must be interpreted
+ as 0 (normal). This property is valid for serial busses only.
+
+
+Example
+-------
+
+The example snippet below describes two data pipelines. ov772x and imx074 are
+camera sensors with a parallel and serial (MIPI CSI-2) video bus respectively.
+Both sensors are on the I2C control bus corresponding to the i2c0 controller
+node. ov772x sensor is linked directly to the ceu0 video host interface.
+imx074 is linked to ceu0 through the MIPI CSI-2 receiver (csi2). ceu0 has a
+(single) DMA engine writing captured data to memory. ceu0 node has a single
+'port' node which may indicate that at any time only one of the following data
+pipelines can be active: ov772x -> ceu0 or imx074 -> csi2 -> ceu0.
+
+ ceu0: ceu@0xfe910000 {
+ compatible = "renesas,sh-mobile-ceu";
+ reg = <0xfe910000 0xa0>;
+ interrupts = <0x880>;
+
+ mclk: master_clock {
+ compatible = "renesas,ceu-clock";
+ #clock-cells = <1>;
+ clock-frequency = <50000000>; /* Max clock frequency */
+ clock-output-names = "mclk";
+ };
+
+ port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* Parallel bus endpoint */
+ ceu0_1: endpoint@1 {
+ reg = <1>; /* Local endpoint # */
+ remote = <&ov772x_1_1>; /* Remote phandle */
+ bus-width = <8>; /* Used data lines */
+ data-shift = <2>; /* Lines 9:2 are used */
+
+ /* If hsync-active/vsync-active are missing,
+ embedded BT.656 sync is used */
+ hsync-active = <0>; /* Active low */
+ vsync-active = <0>; /* Active low */
+ data-active = <1>; /* Active high */
+ pclk-sample = <1>; /* Rising */
+ };
+
+ /* MIPI CSI-2 bus endpoint */
+ ceu0_0: endpoint@0 {
+ reg = <0>;
+ remote = <&csi2_2>;
+ };
+ };
+ };
+
+ i2c0: i2c@0xfff20000 {
+ ...
+ ov772x_1: camera@0x21 {
+ compatible = "ovti,ov772x";
+ reg = <0x21>;
+ vddio-supply = <&regulator1>;
+ vddcore-supply = <&regulator2>;
+
+ clock-frequency = <20000000>;
+ clocks = <&mclk 0>;
+ clock-names = "xclk";
+
+ port {
+ /* With 1 endpoint per port no need for addresses. */
+ ov772x_1_1: endpoint {
+ bus-width = <8>;
+ remote-endpoint = <&ceu0_1>;
+ hsync-active = <1>;
+ vsync-active = <0>; /* Who came up with an
+ inverter here ?... */
+ data-active = <1>;
+ pclk-sample = <1>;
+ };
+ };
+ };
+
+ imx074: camera@0x1a {
+ compatible = "sony,imx074";
+ reg = <0x1a>;
+ vddio-supply = <&regulator1>;
+ vddcore-supply = <&regulator2>;
+
+ clock-frequency = <30000000>; /* Shared clock with ov772x_1 */
+ clocks = <&mclk 0>;
+ clock-names = "sysclk"; /* Assuming this is the
+ name in the datasheet */
+ port {
+ imx074_1: endpoint {
+ clock-lanes = <0>;
+ data-lanes = <1 2>;
+ remote-endpoint = <&csi2_1>;
+ };
+ };
+ };
+ };
+
+ csi2: csi2@0xffc90000 {
+ compatible = "renesas,sh-mobile-csi2";
+ reg = <0xffc90000 0x1000>;
+ interrupts = <0x17a0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ compatible = "renesas,csi2c"; /* One of CSI2I and CSI2C. */
+ reg = <1>; /* CSI-2 PHY #1 of 2: PHY_S,
+ PHY_M has port address 0,
+ is unused. */
+ csi2_1: endpoint {
+ clock-lanes = <0>;
+ data-lanes = <2 1>;
+ remote-endpoint = <&imx074_1>;
+ };
+ };
+ port@2 {
+ reg = <2>; /* port 2: link to the CEU */
+
+ csi2_2: endpoint {
+ remote-endpoint = <&ceu0_0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/xilinx/video.txt b/Documentation/devicetree/bindings/media/xilinx/video.txt
new file mode 100644
index 000000000..cbd46fa09
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/xilinx/video.txt
@@ -0,0 +1,35 @@
+DT bindings for Xilinx video IP cores
+-------------------------------------
+
+Xilinx video IP cores process video streams by acting as video sinks and/or
+sources. They are connected by links through their input and output ports,
+creating a video pipeline.
+
+Each video IP core is represented by an AMBA bus child node in the device
+tree using bindings documented in this directory. Connections between the IP
+cores are represented as defined in ../video-interfaces.txt.
+
+The whole pipeline is represented by an AMBA bus child node in the device
+tree using bindings documented in ./xlnx,video.txt.
+
+Common properties
+-----------------
+
+The following properties are common to all Xilinx video IP cores.
+
+- xlnx,video-format: This property represents a video format transmitted on an
+ AXI bus between video IP cores, using its VF code as defined in "AXI4-Stream
+ Video IP and System Design Guide" [UG934]. How the format relates to the IP
+ core is decribed in the IP core bindings documentation.
+
+- xlnx,video-width: This property qualifies the video format with the sample
+ width expressed as a number of bits per pixel component. All components must
+ use the same width.
+
+- xlnx,cfa-pattern: When the video format is set to Mono/Sensor, this property
+ describes the sensor's color filter array pattern. Supported values are
+ "bggr", "gbrg", "grbg", "rggb" and "mono". If not specified, the pattern
+ defaults to "mono".
+
+
+[UG934] http://www.xilinx.com/support/documentation/ip_documentation/axi_videoip/v1_0/ug934_axi_videoIP.pdf
diff --git a/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tc.txt b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tc.txt
new file mode 100644
index 000000000..2aed3b4a6
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tc.txt
@@ -0,0 +1,33 @@
+Xilinx Video Timing Controller (VTC)
+------------------------------------
+
+The Video Timing Controller is a general purpose video timing generator and
+detector.
+
+Required properties:
+
+ - compatible: Must be "xlnx,v-tc-6.1".
+
+ - reg: Physical base address and length of the registers set for the device.
+
+ - clocks: Must contain a clock specifier for the VTC core and timing
+ interfaces clock.
+
+Optional properties:
+
+ - xlnx,detector: The VTC has a timing detector
+ - xlnx,generator: The VTC has a timing generator
+
+ At least one of the xlnx,detector and xlnx,generator properties must be
+ specified.
+
+
+Example:
+
+ vtc: vtc@43c40000 {
+ compatible = "xlnx,v-tc-6.1";
+ reg = <0x43c40000 0x10000>;
+
+ clocks = <&clkc 15>;
+ xlnx,generator;
+ };
diff --git a/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt
new file mode 100644
index 000000000..9dd86b3db
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt
@@ -0,0 +1,71 @@
+Xilinx Video Test Pattern Generator (TPG)
+-----------------------------------------
+
+Required properties:
+
+- compatible: Must contain at least one of
+
+ "xlnx,v-tpg-5.0" (TPG version 5.0)
+ "xlnx,v-tpg-6.0" (TPG version 6.0)
+
+ TPG versions backward-compatible with previous versions should list all
+ compatible versions in the newer to older order.
+
+- reg: Physical base address and length of the registers set for the device.
+
+- clocks: Reference to the video core clock.
+
+- xlnx,video-format, xlnx,video-width: Video format and width, as defined in
+ video.txt.
+
+- port: Video port, using the DT bindings defined in ../video-interfaces.txt.
+ The TPG has a single output port numbered 0.
+
+Optional properties:
+
+- xlnx,vtc: A phandle referencing the Video Timing Controller that generates
+ video timings for the TPG test patterns.
+
+- timing-gpios: Specifier for a GPIO that controls the timing mux at the TPG
+ input. The GPIO active level corresponds to the selection of VTC-generated
+ video timings.
+
+The xlnx,vtc and timing-gpios properties are mandatory when the TPG is
+synthesized with two ports and forbidden when synthesized with one port.
+
+Example:
+
+ tpg_0: tpg@40050000 {
+ compatible = "xlnx,v-tpg-6.0", "xlnx,v-tpg-5.0";
+ reg = <0x40050000 0x10000>;
+ clocks = <&clkc 15>;
+
+ xlnx,vtc = <&vtc_3>;
+ timing-gpios = <&ps7_gpio_0 55 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ xlnx,video-format = <XVIP_VF_YUV_422>;
+ xlnx,video-width = <8>;
+
+ tpg_in: endpoint {
+ remote-endpoint = <&adv7611_out>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+
+ xlnx,video-format = <XVIP_VF_YUV_422>;
+ xlnx,video-width = <8>;
+
+ tpg1_out: endpoint {
+ remote-endpoint = <&switch_in0>;
+ };
+ }:
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/xilinx/xlnx,video.txt b/Documentation/devicetree/bindings/media/xilinx/xlnx,video.txt
new file mode 100644
index 000000000..5a0227023
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/xilinx/xlnx,video.txt
@@ -0,0 +1,55 @@
+Xilinx Video IP Pipeline (VIPP)
+-------------------------------
+
+General concept
+---------------
+
+Xilinx video IP pipeline processes video streams through one or more Xilinx
+video IP cores. Each video IP core is represented as documented in video.txt
+and IP core specific documentation, xlnx,v-*.txt, in this directory. The DT
+node of the VIPP represents as a top level node of the pipeline and defines
+mappings between DMAs and the video IP cores.
+
+Required properties:
+
+- compatible: Must be "xlnx,video".
+
+- dmas, dma-names: List of one DMA specifier and identifier string (as defined
+ in Documentation/devicetree/bindings/dma/dma.txt) per port. Each port
+ requires a DMA channel with the identifier string set to "port" followed by
+ the port index.
+
+- ports: Video port, using the DT bindings defined in ../video-interfaces.txt.
+
+Required port properties:
+
+- direction: should be either "input" or "output" depending on the direction
+ of stream.
+
+Example:
+
+ video_cap {
+ compatible = "xlnx,video";
+ dmas = <&vdma_1 1>, <&vdma_3 1>;
+ dma-names = "port0", "port1";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ direction = "input";
+ vcap0_in0: endpoint {
+ remote-endpoint = <&scaler0_out>;
+ };
+ };
+ port@1 {
+ reg = <1>;
+ direction = "input";
+ vcap0_in1: endpoint {
+ remote-endpoint = <&switch_out1>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt b/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt
new file mode 100644
index 000000000..d5e370450
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/fsl/ifc.txt
@@ -0,0 +1,79 @@
+Integrated Flash Controller
+
+Properties:
+- name : Should be ifc
+- compatible : should contain "fsl,ifc". The version of the integrated
+ flash controller can be found in the IFC_REV register at
+ offset zero.
+
+- #address-cells : Should be either two or three. The first cell is the
+ chipselect number, and the remaining cells are the
+ offset into the chipselect.
+- #size-cells : Either one or two, depending on how large each chipselect
+ can be.
+- reg : Offset and length of the register set for the device
+- interrupts: IFC may have one or two interrupts. If two interrupt
+ specifiers are present, the first is the "common"
+ interrupt (CM_EVTER_STAT), and the second is the NAND
+ interrupt (NAND_EVTER_STAT). If there is only one,
+ that interrupt reports both types of event.
+
+
+- ranges : Each range corresponds to a single chipselect, and covers
+ the entire access window as configured.
+
+Child device nodes describe the devices connected to IFC such as NOR (e.g.
+cfi-flash) and NAND (fsl,ifc-nand). There might be board specific devices
+like FPGAs, CPLDs, etc.
+
+Example:
+
+ ifc@ffe1e000 {
+ compatible = "fsl,ifc", "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ reg = <0x0 0xffe1e000 0 0x2000>;
+ interrupts = <16 2 19 2>;
+
+ /* NOR, NAND Flashes and CPLD on board */
+ ranges = <0x0 0x0 0x0 0xee000000 0x02000000
+ 0x1 0x0 0x0 0xffa00000 0x00010000
+ 0x3 0x0 0x0 0xffb00000 0x00020000>;
+
+ flash@0,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "cfi-flash";
+ reg = <0x0 0x0 0x2000000>;
+ bank-width = <2>;
+ device-width = <1>;
+
+ partition@0 {
+ /* 32MB for user data */
+ reg = <0x0 0x02000000>;
+ label = "NOR Data";
+ };
+ };
+
+ flash@1,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,ifc-nand";
+ reg = <0x1 0x0 0x10000>;
+
+ partition@0 {
+ /* This location must not be altered */
+ /* 1MB for u-boot Bootloader Image */
+ reg = <0x0 0x00100000>;
+ label = "NAND U-Boot Image";
+ read-only;
+ };
+ };
+
+ cpld@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,p1010rdb-cpld";
+ reg = <0x3 0x0 0x000001f>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt b/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt
new file mode 100644
index 000000000..f936b5589
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,jz4780-nemc.txt
@@ -0,0 +1,75 @@
+* Ingenic JZ4780 NAND/external memory controller (NEMC)
+
+This file documents the device tree bindings for the NEMC external memory
+controller in Ingenic JZ4780
+
+Required properties:
+- compatible: Should be set to one of:
+ "ingenic,jz4780-nemc" (JZ4780)
+- reg: Should specify the NEMC controller registers location and length.
+- clocks: Clock for the NEMC controller.
+- #address-cells: Must be set to 2.
+- #size-cells: Must be set to 1.
+- ranges: A set of ranges for each bank describing the physical memory layout.
+ Each should specify the following 4 integer values:
+
+ <cs number> 0 <physical address of mapping> <size of mapping>
+
+Each child of the NEMC node describes a device connected to the NEMC.
+
+Required child node properties:
+- reg: Should contain at least one register specifier, given in the following
+ format:
+
+ <cs number> <offset> <size>
+
+ Multiple registers can be specified across multiple banks. This is needed,
+ for example, for packaged NAND devices with multiple dies. Such devices
+ should be grouped into a single node.
+
+Optional child node properties:
+- ingenic,nemc-bus-width: Specifies the bus width in bits. Defaults to 8 bits.
+- ingenic,nemc-tAS: Address setup time in nanoseconds.
+- ingenic,nemc-tAH: Address hold time in nanoseconds.
+- ingenic,nemc-tBP: Burst pitch time in nanoseconds.
+- ingenic,nemc-tAW: Access wait time in nanoseconds.
+- ingenic,nemc-tSTRV: Static memory recovery time in nanoseconds.
+
+If a child node references multiple banks in its "reg" property, the same value
+for all optional parameters will be configured for all banks. If any optional
+parameters are omitted, they will be left unchanged from whatever they are
+configured to when the NEMC device is probed (which may be the reset value as
+given in the hardware reference manual, or a value configured by the boot
+loader).
+
+Example (NEMC node with a NAND child device attached at CS1):
+
+nemc: nemc@13410000 {
+ compatible = "ingenic,jz4780-nemc";
+ reg = <0x13410000 0x10000>;
+
+ #address-cells = <2>;
+ #size-cells = <1>;
+
+ ranges = <1 0 0x1b000000 0x1000000
+ 2 0 0x1a000000 0x1000000
+ 3 0 0x19000000 0x1000000
+ 4 0 0x18000000 0x1000000
+ 5 0 0x17000000 0x1000000
+ 6 0 0x16000000 0x1000000>;
+
+ clocks = <&cgu JZ4780_CLK_NEMC>;
+
+ nand: nand@1 {
+ compatible = "ingenic,jz4780-nand";
+ reg = <1 0 0x1000000>;
+
+ ingenic,nemc-tAS = <10>;
+ ingenic,nemc-tAH = <5>;
+ ingenic,nemc-tBP = <10>;
+ ingenic,nemc-tAW = <15>;
+ ingenic,nemc-tSTRV = <100>;
+
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/memory-controllers/mvebu-devbus.txt b/Documentation/devicetree/bindings/memory-controllers/mvebu-devbus.txt
new file mode 100644
index 000000000..1ee3bc09f
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/mvebu-devbus.txt
@@ -0,0 +1,178 @@
+Device tree bindings for MVEBU Device Bus controllers
+
+The Device Bus controller available in some Marvell's SoC allows to control
+different types of standard memory and I/O devices such as NOR, NAND, and FPGA.
+The actual devices are instantiated from the child nodes of a Device Bus node.
+
+Required properties:
+
+ - compatible: Armada 370/XP SoC are supported using the
+ "marvell,mvebu-devbus" compatible string.
+
+ Orion5x SoC are supported using the
+ "marvell,orion-devbus" compatible string.
+
+ - reg: A resource specifier for the register space.
+ This is the base address of a chip select within
+ the controller's register space.
+ (see the example below)
+
+ - #address-cells: Must be set to 1
+ - #size-cells: Must be set to 1
+ - ranges: Must be set up to reflect the memory layout with four
+ integer values for each chip-select line in use:
+ 0 <physical address of mapping> <size>
+
+Optional properties:
+
+ - devbus,keep-config This property can optionally be used to keep
+ using the timing parameters set by the
+ bootloader. It makes all the timing properties
+ described below unused.
+
+Timing properties for child nodes:
+
+Read parameters:
+
+ - devbus,turn-off-ps: Defines the time during which the controller does not
+ drive the AD bus after the completion of a device read.
+ This prevents contentions on the Device Bus after a read
+ cycle from a slow device.
+ Mandatory, except if devbus,keep-config is used.
+
+ - devbus,bus-width: Defines the bus width, in bits (e.g. <16>).
+ Mandatory, except if devbus,keep-config is used.
+
+ - devbus,badr-skew-ps: Defines the time delay from from A[2:0] toggle,
+ to read data sample. This parameter is useful for
+ synchronous pipelined devices, where the address
+ precedes the read data by one or two cycles.
+ Mandatory, except if devbus,keep-config is used.
+
+ - devbus,acc-first-ps: Defines the time delay from the negation of
+ ALE[0] to the cycle that the first read data is sampled
+ by the controller.
+ Mandatory, except if devbus,keep-config is used.
+
+ - devbus,acc-next-ps: Defines the time delay between the cycle that
+ samples data N and the cycle that samples data N+1
+ (in burst accesses).
+ Mandatory, except if devbus,keep-config is used.
+
+ - devbus,rd-setup-ps: Defines the time delay between DEV_CSn assertion to
+ DEV_OEn assertion. If set to 0 (default),
+ DEV_OEn and DEV_CSn are asserted at the same cycle.
+ This parameter has no affect on <acc-first-ps> parameter
+ (no affect on first data sample). Set <rd-setup-ps>
+ to a value smaller than <acc-first-ps>.
+ Mandatory for "marvell,mvebu-devbus" compatible string,
+ except if devbus,keep-config is used.
+
+ - devbus,rd-hold-ps: Defines the time between the last data sample to the
+ de-assertion of DEV_CSn. If set to 0 (default),
+ DEV_OEn and DEV_CSn are de-asserted at the same cycle
+ (the cycle of the last data sample).
+ This parameter has no affect on DEV_OEn de-assertion.
+ DEV_OEn is always de-asserted the next cycle after
+ last data sampled. Also this parameter has no
+ affect on <turn-off-ps> parameter.
+ Set <rd-hold-ps> to a value smaller than <turn-off-ps>.
+ Mandatory for "marvell,mvebu-devbus" compatible string,
+ except if devbus,keep-config is used.
+
+Write parameters:
+
+ - devbus,ale-wr-ps: Defines the time delay from the ALE[0] negation cycle
+ to the DEV_WEn assertion.
+ Mandatory.
+
+ - devbus,wr-low-ps: Defines the time during which DEV_WEn is active.
+ A[2:0] and Data are kept valid as long as DEV_WEn
+ is active. This parameter defines the setup time of
+ address and data to DEV_WEn rise.
+ Mandatory.
+
+ - devbus,wr-high-ps: Defines the time during which DEV_WEn is kept
+ inactive (high) between data beats of a burst write.
+ DEV_A[2:0] and Data are kept valid (do not toggle) for
+ <wr-high-ps> - <tick> ps.
+ This parameter defines the hold time of address and
+ data after DEV_WEn rise.
+ Mandatory.
+
+ - devbus,sync-enable: Synchronous device enable.
+ 1: True
+ 0: False
+ Mandatory for "marvell,mvebu-devbus" compatible string,
+ except if devbus,keep-config is used.
+
+An example for an Armada XP GP board, with a 16 MiB NOR device as child
+is showed below. Note that the Device Bus driver is in charge of allocating
+the mbus address decoding window for each of its child devices.
+The window is created using the chip select specified in the child
+device node together with the base address and size specified in the ranges
+property. For instance, in the example below the allocated decoding window
+will start at base address 0xf0000000, with a size 0x1000000 (16 MiB)
+for chip select 0 (a.k.a DEV_BOOTCS).
+
+This address window handling is done in this mvebu-devbus only as a temporary
+solution. It will be removed when the support for mbus device tree binding is
+added.
+
+The reg property implicitly specifies the chip select as this:
+
+ 0x10400: DEV_BOOTCS
+ 0x10408: DEV_CS0
+ 0x10410: DEV_CS1
+ 0x10418: DEV_CS2
+ 0x10420: DEV_CS3
+
+Example:
+
+ devbus-bootcs@d0010400 {
+ status = "okay";
+ ranges = <0 0xf0000000 0x1000000>; /* @addr 0xf0000000, size 0x1000000 */
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* Device Bus parameters are required */
+
+ /* Read parameters */
+ devbus,bus-width = <8>;
+ devbus,turn-off-ps = <60000>;
+ devbus,badr-skew-ps = <0>;
+ devbus,acc-first-ps = <124000>;
+ devbus,acc-next-ps = <248000>;
+ devbus,rd-setup-ps = <0>;
+ devbus,rd-hold-ps = <0>;
+
+ /* Write parameters */
+ devbus,sync-enable = <0>;
+ devbus,wr-high-ps = <60000>;
+ devbus,wr-low-ps = <60000>;
+ devbus,ale-wr-ps = <60000>;
+
+ flash@0 {
+ compatible = "cfi-flash";
+
+ /* 16 MiB */
+ reg = <0 0x1000000>;
+ bank-width = <2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /*
+ * We split the 16 MiB in two partitions,
+ * just as an example.
+ */
+ partition@0 {
+ label = "First";
+ reg = <0 0x800000>;
+ };
+
+ partition@800000 {
+ label = "Second";
+ reg = <0x800000 0x800000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/mvebu-sdram-controller.txt b/Documentation/devicetree/bindings/memory-controllers/mvebu-sdram-controller.txt
new file mode 100644
index 000000000..89657d1d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/mvebu-sdram-controller.txt
@@ -0,0 +1,21 @@
+Device Tree bindings for MVEBU SDRAM controllers
+
+The Marvell EBU SoCs all have a SDRAM controller. The SDRAM controller
+differs from one SoC variant to another, but they also share a number
+of commonalities.
+
+For now, this Device Tree binding documentation only documents the
+Armada XP SDRAM controller.
+
+Required properties:
+
+ - compatible: for Armada XP, "marvell,armada-xp-sdram-controller"
+ - reg: a resource specifier for the register space, which should
+ include all SDRAM controller registers as per the datasheet.
+
+Example:
+
+sdramc@1400 {
+ compatible = "marvell,armada-xp-sdram-controller";
+ reg = <0x1400 0x500>;
+};
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt
new file mode 100644
index 000000000..f3db93c85
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra-mc.txt
@@ -0,0 +1,36 @@
+NVIDIA Tegra Memory Controller device tree bindings
+===================================================
+
+Required properties:
+- compatible: Should be "nvidia,tegra<chip>-mc"
+- reg: Physical base address and length of the controller's registers.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - mc: the module's clock input
+- interrupts: The interrupt outputs from the controller.
+- #iommu-cells: Should be 1. The single cell of the IOMMU specifier defines
+ the SWGROUP of the master.
+
+This device implements an IOMMU that complies with the generic IOMMU binding.
+See ../iommu/iommu.txt for details.
+
+Example:
+--------
+
+ mc: memory-controller@0,70019000 {
+ compatible = "nvidia,tegra124-mc";
+ reg = <0x0 0x70019000 0x0 0x1000>;
+ clocks = <&tegra_car TEGRA124_CLK_MC>;
+ clock-names = "mc";
+
+ interrupts = <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+
+ #iommu-cells = <1>;
+ };
+
+ sdhci@0,700b0000 {
+ compatible = "nvidia,tegra124-sdhci";
+ ...
+ iommus = <&mc TEGRA_SWGROUP_SDMMC1A>;
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/renesas-memory-controllers.txt b/Documentation/devicetree/bindings/memory-controllers/renesas-memory-controllers.txt
new file mode 100644
index 000000000..c64b7925c
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/renesas-memory-controllers.txt
@@ -0,0 +1,44 @@
+DT bindings for Renesas R-Mobile and SH-Mobile memory controllers
+=================================================================
+
+Renesas R-Mobile and SH-Mobile SoCs contain one or more memory controllers.
+These memory controllers differ from one SoC variant to another, and are called
+by different names ("DDR Bus Controller (DBSC)", "DDR3 Bus State Controller
+(DBSC3)", "SDRAM Bus State Controller (SBSC)").
+
+Currently memory controller device nodes are used only to reference PM
+domains, and prevent these PM domains from being powered down, which would
+crash the system.
+
+As there exist no actual drivers for these controllers yet, these bindings
+should be considered EXPERIMENTAL for now.
+
+Required properties:
+ - compatible: Must be one of the following SoC-specific values:
+ - "renesas,dbsc-r8a73a4" (R-Mobile APE6)
+ - "renesas,dbsc3-r8a7740" (R-Mobile A1)
+ - "renesas,sbsc-sh73a0" (SH-Mobile AG5)
+ - reg: Must contain the base address and length of the memory controller's
+ registers.
+
+Optional properties:
+ - interrupts: Must contain a list of interrupt specifiers for memory
+ controller interrupts, if available.
+ - interrupts-names: Must contain a list of interrupt names corresponding to
+ the interrupts in the interrupts property, if available.
+ Valid interrupt names are:
+ - "sec" (secure interrupt)
+ - "temp" (normal (temperature) interrupt)
+ - power-domains: Must contain a reference to the PM domain that the memory
+ controller belongs to, if available.
+
+Example:
+
+ sbsc1: memory-controller@fe400000 {
+ compatible = "renesas,sbsc-sh73a0";
+ reg = <0xfe400000 0x400>;
+ interrupts = <0 35 IRQ_TYPE_LEVEL_HIGH>,
+ <0 36 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "sec", "temp";
+ power-domains = <&pd_a4bc0>;
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/synopsys.txt b/Documentation/devicetree/bindings/memory-controllers/synopsys.txt
new file mode 100644
index 000000000..f9c645414
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/synopsys.txt
@@ -0,0 +1,11 @@
+Binding for Synopsys IntelliDDR Multi Protocol Memory Controller
+
+Required properties:
+ - compatible: Should be 'xlnx,zynq-ddrc-a05'
+ - reg: Base address and size of the controllers memory area
+
+Example:
+ memory-controller@f8006000 {
+ compatible = "xlnx,zynq-ddrc-a05";
+ reg = <0xf8006000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt b/Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt
new file mode 100644
index 000000000..9592717f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/ti-aemif.txt
@@ -0,0 +1,210 @@
+* Device tree bindings for Texas instruments AEMIF controller
+
+The Async External Memory Interface (EMIF16/AEMIF) controller is intended to
+provide a glue-less interface to a variety of asynchronous memory devices like
+ASRA M, NOR and NAND memory. A total of 256M bytes of any of these memories
+can be accessed at any given time via four chip selects with 64M byte access
+per chip select. Synchronous memories such as DDR1 SD RAM, SDR SDRAM
+and Mobile SDR are not supported.
+
+Documentation:
+Davinci DM646x - http://www.ti.com/lit/ug/sprueq7c/sprueq7c.pdf
+OMAP-L138 (DA850) - http://www.ti.com/lit/ug/spruh77a/spruh77a.pdf
+Kestone - http://www.ti.com/lit/ug/sprugz3a/sprugz3a.pdf
+
+Required properties:
+
+- compatible: "ti,davinci-aemif"
+ "ti,keystone-aemif"
+ "ti,da850-aemif"
+
+- reg: contains offset/length value for AEMIF control registers
+ space.
+
+- #address-cells: Must be 2. The partition number has to be encoded in the
+ first address cell and it may accept values 0..N-1
+ (N - total number of partitions). It's recommended to
+ assign N-1 number for the control partition. The second
+ cell is the offset into the partition.
+
+- #size-cells: Must be set to 1.
+
+- ranges: Contains memory regions. There are two types of
+ ranges/partitions:
+ - CS-specific partition/range. If continuous, must be
+ set up to reflect the memory layout for 4 chipselects,
+ if not then additional range/partition can be added and
+ child device can select the proper one.
+ - control partition which is common for all CS
+ interfaces.
+
+- clocks: the clock feeding the controller clock. Required only
+ if clock tree data present in device tree.
+ See clock-bindings.txt
+
+- clock-names: clock name. It has to be "aemif". Required only if clock
+ tree data present in device tree, in another case don't
+ use it.
+ See clock-bindings.txt
+
+- clock-ranges: Empty property indicating that child nodes can inherit
+ named clocks. Required only if clock tree data present
+ in device tree.
+ See clock-bindings.txt
+
+
+Child chip-select (cs) nodes contain the memory devices nodes connected to
+such as NOR (e.g. cfi-flash) and NAND (ti,davinci-nand, see davinci-nand.txt).
+There might be board specific devices like FPGAs.
+
+Required child cs node properties:
+
+- #address-cells: Must be 2.
+
+- #size-cells: Must be 1.
+
+- ranges: Empty property indicating that child nodes can inherit
+ memory layout.
+
+- clock-ranges: Empty property indicating that child nodes can inherit
+ named clocks. Required only if clock tree data present
+ in device tree.
+
+- ti,cs-chipselect: number of chipselect. Indicates on the aemif driver
+ which chipselect is used for accessing the memory. For
+ compatibles "ti,davinci-aemif" and "ti,keystone-aemif"
+ it can be in range [0-3]. For compatible
+ "ti,da850-aemif" range is [2-5].
+
+Optional child cs node properties:
+
+- ti,cs-bus-width: width of the asynchronous device's data bus
+ 8 or 16 if not preset 8
+
+- ti,cs-select-strobe-mode: enable/disable select strobe mode
+ In select strobe mode chip select behaves as
+ the strobe and is active only during the strobe
+ period. If present then enable.
+
+- ti,cs-extended-wait-mode: enable/disable extended wait mode
+ if set, the controller monitors the EMIFWAIT pin
+ mapped to that chip select to determine if the
+ device wants to extend the strobe period. If
+ present then enable.
+
+- ti,cs-min-turnaround-ns: minimum turn around time, ns
+ Time between the end of one asynchronous memory
+ access and the start of another asynchronous
+ memory access. This delay is not incurred
+ between a read followed by read or a write
+ followed by a write to same chip select.
+
+- ti,cs-read-setup-ns: read setup width, ns
+ Time between the beginning of a memory cycle
+ and the activation of read strobe.
+ Minimum value is 1 (0 treated as 1).
+
+- ti,cs-read-strobe-ns: read strobe width, ns
+ Time between the activation and deactivation of
+ the read strobe.
+ Minimum value is 1 (0 treated as 1).
+
+- ti,cs-read-hold-ns: read hold width, ns
+ Time between the deactivation of the read
+ strobe and the end of the cycle (which may be
+ either an address change or the deactivation of
+ the chip select signal.
+ Minimum value is 1 (0 treated as 1).
+
+- ti,cs-write-setup-ns: write setup width, ns
+ Time between the beginning of a memory cycle
+ and the activation of write strobe.
+ Minimum value is 1 (0 treated as 1).
+
+- ti,cs-write-strobe-ns: write strobe width, ns
+ Time between the activation and deactivation of
+ the write strobe.
+ Minimum value is 1 (0 treated as 1).
+
+- ti,cs-write-hold-ns: write hold width, ns
+ Time between the deactivation of the write
+ strobe and the end of the cycle (which may be
+ either an address change or the deactivation of
+ the chip select signal.
+ Minimum value is 1 (0 treated as 1).
+
+If any of the above parameters are absent, current parameter value will be taken
+from the corresponding HW reg.
+
+Example for aemif, davinci nand and nor flash chip select shown below.
+
+memory-controller@21000A00 {
+ compatible = "ti,davinci-aemif";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ clocks = <&clkaemif 0>;
+ clock-names = "aemif";
+ clock-ranges;
+ reg = <0x21000A00 0x00000100>;
+ ranges = <0 0 0x70000000 0x10000000
+ 1 0 0x21000A00 0x00000100>;
+ /*
+ * Partition0: CS-specific memory range which is
+ * implemented as continuous physical memory region
+ * Partition1: control memory range
+ */
+
+ nand:cs2 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ clock-ranges;
+ ranges;
+
+ ti,cs-chipselect = <2>;
+ /* all timings in nanoseconds */
+ ti,cs-min-turnaround-ns = <0>;
+ ti,cs-read-hold-ns = <7>;
+ ti,cs-read-strobe-ns = <42>;
+ ti,cs-read-setup-ns = <14>;
+ ti,cs-write-hold-ns = <7>;
+ ti,cs-write-strobe-ns = <42>;
+ ti,cs-write-setup-ns = <14>;
+
+ nand@0,0x8000000 {
+ compatible = "ti,davinci-nand";
+ reg = <0 0x8000000 0x4000000
+ 1 0x0000000 0x0000100>;
+ /*
+ * Partition0, offset 0x8000000, size 0x4000000
+ * Partition1, offset 0x0000000, size 0x0000100
+ */
+
+ .. see davinci-nand.txt
+ };
+ };
+
+ nor:cs0 {
+ #address-cells = <2>;
+ #size-cells = <1>;
+ clock-ranges;
+ ranges;
+
+ ti,cs-chipselect = <0>;
+ /* all timings in nanoseconds */
+ ti,cs-min-turnaround-ns = <0>;
+ ti,cs-read-hold-ns = <8>;
+ ti,cs-read-strobe-ns = <40>;
+ ti,cs-read-setup-ns = <14>;
+ ti,cs-write-hold-ns = <7>;
+ ti,cs-write-strobe-ns = <40>;
+ ti,cs-write-setup-ns = <14>;
+ ti,cs-bus-width = <16>;
+
+ flash@0,0x0000000 {
+ compatible = "cfi-flash";
+ reg = <0 0x0000000 0x4000000>;
+
+ ...
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
new file mode 100644
index 000000000..938f8e1ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt
@@ -0,0 +1,55 @@
+* EMIF family of TI SDRAM controllers
+
+EMIF - External Memory Interface - is an SDRAM controller used in
+TI SoCs. EMIF supports, based on the IP revision, one or more of
+DDR2/DDR3/LPDDR2 protocols. This binding describes a given instance
+of the EMIF IP and memory parts attached to it.
+
+Required properties:
+- compatible : Should be of the form "ti,emif-<ip-rev>" where <ip-rev>
+ is the IP revision of the specific EMIF instance.
+
+- phy-type : <u32> indicating the DDR phy type. Following are the
+ allowed values
+ <1> : Attila PHY
+ <2> : Intelli PHY
+
+- device-handle : phandle to a "lpddr2" node representing the memory part
+
+- ti,hwmods : For TI hwmods processing and omap device creation
+ the value shall be "emif<n>" where <n> is the number of the EMIF
+ instance with base 1.
+
+Optional properties:
+- cs1-used : Have this property if CS1 of this EMIF
+ instance has a memory part attached to it. If there is a memory
+ part attached to CS1, it should be the same type as the one on CS0,
+ so there is no need to give the details of this memory part.
+
+- cal-resistor-per-cs : Have this property if the board has one
+ calibration resistor per chip-select.
+
+- hw-caps-read-idle-ctrl: Have this property if the controller
+ supports read idle window programming
+
+- hw-caps-dll-calib-ctrl: Have this property if the controller
+ supports dll calibration control
+
+- hw-caps-ll-interface : Have this property if the controller
+ has a low latency interface and corresponding interrupt events
+
+- hw-caps-temp-alert : Have this property if the controller
+ has capability for generating SDRAM temperature alerts
+
+Example:
+
+emif1: emif@0x4c000000 {
+ compatible = "ti,emif-4d";
+ ti,hwmods = "emif2";
+ phy-type = <1>;
+ device-handle = <&elpida_ECB240ABACN>;
+ cs1-used;
+ hw-caps-read-idle-ctrl;
+ hw-caps-ll-interface;
+ hw-caps-temp-alert;
+};
diff --git a/Documentation/devicetree/bindings/metag/meta-intc.txt b/Documentation/devicetree/bindings/metag/meta-intc.txt
new file mode 100644
index 000000000..80994adab
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/meta-intc.txt
@@ -0,0 +1,82 @@
+* Meta External Trigger Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta external trigger controller.
+
+Required properties:
+
+ - compatible: Specifies the compatibility list for the interrupt controller.
+ The type shall be <string> and the value shall include "img,meta-intc".
+
+ - num-banks: Specifies the number of interrupt banks (each of which can
+ handle 32 interrupt sources).
+
+ - interrupt-controller: The presence of this property identifies the node
+ as an interrupt controller. No property value shall be defined.
+
+ - #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 2.
+
+ - #address-cells: Specifies the number of cells needed to encode an
+ address. The type shall be <u32> and the value shall be 0. As such,
+ 'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+ - no-mask: The controller doesn't have any mask registers.
+
+* Interrupt Specifier Definition
+
+ Interrupt specifiers consists of 2 cells encoded as follows:
+
+ - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+ - <2nd-cell>: The Linux interrupt flags containing level-sense information,
+ encoded as follows:
+ 1 = edge triggered
+ 4 = level-sensitive
+
+* Examples
+
+Example 1:
+
+ /*
+ * Meta external trigger block
+ */
+ intc: intc {
+ // This is an interrupt controller node.
+ interrupt-controller;
+
+ // No address cells so that 'interrupt-map' nodes which
+ // reference this interrupt controller node do not need a parent
+ // address specifier.
+ #address-cells = <0>;
+
+ // Two cells to encode interrupt sources.
+ #interrupt-cells = <2>;
+
+ // Number of interrupt banks
+ num-banks = <2>;
+
+ // No HWMASKEXT is available (specify on Chorus2 and Comet ES1)
+ no-mask;
+
+ // Compatible with Meta hardware trigger block.
+ compatible = "img,meta-intc";
+ };
+
+Example 2:
+
+ /*
+ * An interrupt generating device that is wired to a Meta external
+ * trigger block.
+ */
+ uart1: uart@0x02004c00 {
+ // Interrupt source '5' that is level-sensitive.
+ // Note that there are only two cells as specified in the
+ // interrupt parent's '#interrupt-cells' property.
+ interrupts = <5 4 /* level */>;
+
+ // The interrupt controller that this device is wired to.
+ interrupt-parent = <&intc>;
+ };
diff --git a/Documentation/devicetree/bindings/metag/meta.txt b/Documentation/devicetree/bindings/metag/meta.txt
new file mode 100644
index 000000000..f4457f57a
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/meta.txt
@@ -0,0 +1,30 @@
+* Meta Processor Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a Meta Processor Core, which is the root node in the tree.
+
+Required properties:
+
+ - compatible: Specifies the compatibility list for the Meta processor.
+ The type shall be <string> and the value shall include "img,meta".
+
+Optional properties:
+
+ - clocks: Clock consumer specifiers as described in
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+ - clock-names: Clock consumer names as described in
+ Documentation/devicetree/bindings/clock/clock-bindings.txt.
+
+Clocks are identified by name. Valid clocks are:
+
+ - "core": The Meta core clock from which the Meta timers are derived.
+
+* Examples
+
+/ {
+ compatible = "toumaz,tz1090", "img,meta";
+
+ clocks = <&meta_core_clk>;
+ clock-names = "core";
+};
diff --git a/Documentation/devicetree/bindings/metag/pdc-intc.txt b/Documentation/devicetree/bindings/metag/pdc-intc.txt
new file mode 100644
index 000000000..a69118550
--- /dev/null
+++ b/Documentation/devicetree/bindings/metag/pdc-intc.txt
@@ -0,0 +1,105 @@
+* ImgTec Powerdown Controller (PDC) Interrupt Controller Binding
+
+This binding specifies what properties must be available in the device tree
+representation of a PDC IRQ controller. This has a number of input interrupt
+lines which can wake the system, and are passed on through output interrupt
+lines.
+
+Required properties:
+
+ - compatible: Specifies the compatibility list for the interrupt controller.
+ The type shall be <string> and the value shall include "img,pdc-intc".
+
+ - reg: Specifies the base PDC physical address(s) and size(s) of the
+ addressable register space. The type shall be <prop-encoded-array>.
+
+ - interrupt-controller: The presence of this property identifies the node
+ as an interrupt controller. No property value shall be defined.
+
+ - #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 2.
+
+ - num-perips: Number of waking peripherals.
+
+ - num-syswakes: Number of SysWake inputs.
+
+ - interrupts: List of interrupt specifiers. The first specifier shall be the
+ shared SysWake interrupt, and remaining specifies shall be PDC peripheral
+ interrupts in order.
+
+* Interrupt Specifier Definition
+
+ Interrupt specifiers consists of 2 cells encoded as follows:
+
+ - <1st-cell>: The interrupt-number that identifies the interrupt source.
+ 0-7: Peripheral interrupts
+ 8-15: SysWake interrupts
+
+ - <2nd-cell>: The level-sense information, encoded using the Linux interrupt
+ flags as follows (only 4 valid for peripheral interrupts):
+ 0 = none (decided by software)
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 3 = both edge triggered
+ 4 = active-high level-sensitive (required for perip irqs)
+ 8 = active-low level-sensitive
+
+* Examples
+
+Example 1:
+
+ /*
+ * TZ1090 PDC block
+ */
+ pdc: pdc@0x02006000 {
+ // This is an interrupt controller node.
+ interrupt-controller;
+
+ // Three cells to encode interrupt sources.
+ #interrupt-cells = <2>;
+
+ // Offset address of 0x02006000 and size of 0x1000.
+ reg = <0x02006000 0x1000>;
+
+ // Compatible with Meta hardware trigger block.
+ compatible = "img,pdc-intc";
+
+ // Three peripherals are connected.
+ num-perips = <3>;
+
+ // Four SysWakes are connected.
+ num-syswakes = <4>;
+
+ interrupts = <18 4 /* level */>, /* Syswakes */
+ <30 4 /* level */>, /* Peripheral 0 (RTC) */
+ <29 4 /* level */>, /* Peripheral 1 (IR) */
+ <31 4 /* level */>; /* Peripheral 2 (WDT) */
+ };
+
+Example 2:
+
+ /*
+ * An SoC peripheral that is wired through the PDC.
+ */
+ rtc0 {
+ // The interrupt controller that this device is wired to.
+ interrupt-parent = <&pdc>;
+
+ // Interrupt source Peripheral 0
+ interrupts = <0 /* Peripheral 0 (RTC) */
+ 4> /* IRQ_TYPE_LEVEL_HIGH */
+ };
+
+Example 3:
+
+ /*
+ * An interrupt generating device that is wired to a SysWake pin.
+ */
+ touchscreen0 {
+ // The interrupt controller that this device is wired to.
+ interrupt-parent = <&pdc>;
+
+ // Interrupt source SysWake 0 that is active-low level-sensitive
+ interrupts = <8 /* SysWake0 */
+ 8 /* IRQ_TYPE_LEVEL_LOW */>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/88pm860x.txt b/Documentation/devicetree/bindings/mfd/88pm860x.txt
new file mode 100644
index 000000000..63f3ee337
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/88pm860x.txt
@@ -0,0 +1,85 @@
+* Marvell 88PM860x Power Management IC
+
+Required parent device properties:
+- compatible : "marvell,88pm860x"
+- reg : the I2C slave address for the 88pm860x chip
+- interrupts : IRQ line for the 88pm860x chip
+- interrupt-controller: describes the 88pm860x as an interrupt controller (has its own domain)
+- #interrupt-cells : should be 1.
+ - The cell is the 88pm860x local IRQ number
+
+Optional parent device properties:
+- marvell,88pm860x-irq-read-clr: inicates whether interrupt status is cleared by read
+- marvell,88pm860x-slave-addr: 88pm860x are two chips solution. <reg> stores the I2C address
+ of one chip, and this property stores the I2C address of
+ another chip.
+
+88pm860x consists of a large and varied group of sub-devices:
+
+Device Supply Names Description
+------ ------------ -----------
+88pm860x-onkey : : On key
+88pm860x-rtc : : RTC
+88pm8607 : : Regulators
+88pm860x-backlight : : Backlight
+88pm860x-led : : Led
+88pm860x-touch : : Touchscreen
+
+Example:
+
+ pmic: 88pm860x@34 {
+ compatible = "marvell,88pm860x";
+ reg = <0x34>;
+ interrupts = <4>;
+ interrupt-parent = <&intc>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ marvell,88pm860x-irq-read-clr;
+ marvell,88pm860x-slave-addr = <0x11>;
+
+ regulators {
+ BUCK1 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ LDO1 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ };
+ rtc {
+ marvell,88pm860x-vrtc = <1>;
+ };
+ touch {
+ marvell,88pm860x-gpadc-prebias = <1>;
+ marvell,88pm860x-gpadc-slot-cycle = <1>;
+ marvell,88pm860x-tsi-prebias = <6>;
+ marvell,88pm860x-pen-prebias = <16>;
+ marvell,88pm860x-pen-prechg = <2>;
+ marvell,88pm860x-resistor-X = <300>;
+ };
+ backlights {
+ backlight-0 {
+ marvell,88pm860x-iset = <4>;
+ marvell,88pm860x-pwm = <3>;
+ };
+ backlight-2 {
+ };
+ };
+ leds {
+ led0-red {
+ marvell,88pm860x-iset = <12>;
+ };
+ led0-green {
+ marvell,88pm860x-iset = <12>;
+ };
+ led0-blue {
+ marvell,88pm860x-iset = <12>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/ab8500.txt b/Documentation/devicetree/bindings/mfd/ab8500.txt
new file mode 100644
index 000000000..cd9e90c5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/ab8500.txt
@@ -0,0 +1,159 @@
+* AB8500 Multi-Functional Device (MFD)
+
+Required parent device properties:
+- compatible : contains "stericsson,ab8500";
+- interrupts : contains the IRQ line for the AB8500
+- interrupt-controller : describes the AB8500 as an Interrupt Controller (has its own domain)
+- #interrupt-cells : should be 2, for 2-cell format
+ - The first cell is the AB8500 local IRQ number
+ - The second cell is used to specify optional parameters
+ - bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive
+
+The AB8500 consists of a large and varied group of sub-devices:
+
+Device IRQ Names Supply Names Description
+------ --------- ------------ -----------
+ab8500-bm : : : Battery Manager
+ab8500-btemp : : : Battery Temperature
+ab8500-charger : : : Battery Charger
+ab8500-codec : : : Audio Codec
+ab8500-fg : : vddadc : Fuel Gauge
+ : NCONV_ACCU : : Accumulate N Sample Conversion
+ : BATT_OVV : : Battery Over Voltage
+ : LOW_BAT_F : : LOW threshold battery voltage
+ : CC_INT_CALIB : : Coulomb Counter Internal Calibration
+ : CCEOC : : Coulomb Counter End of Conversion
+ab8500-btemp : : vtvout : Battery Temperature
+ : BAT_CTRL_INDB : : Battery Removal Indicator
+ : BTEMP_LOW : : Btemp < BtempLow, if battery temperature is lower than -10°C
+ : BTEMP_LOW_MEDIUM : : BtempLow < Btemp < BtempMedium,if battery temperature is between -10 and 0°C
+ : BTEMP_MEDIUM_HIGH : : BtempMedium < Btemp < BtempHigh,if battery temperature is between 0°C and“MaxTemp
+ : BTEMP_HIGH : : Btemp > BtempHigh, if battery temperature is higher than “MaxTemp
+ab8500-charger : : vddadc : Charger interface
+ : MAIN_CH_UNPLUG_DET : : main charger unplug detection management (not in 8505)
+ : MAIN_CHARGE_PLUG_DET : : main charger plug detection management (not in 8505)
+ : MAIN_EXT_CH_NOT_OK : : main charger not OK
+ : MAIN_CH_TH_PROT_R : : Die temp is above main charger
+ : MAIN_CH_TH_PROT_F : : Die temp is below main charger
+ : VBUS_DET_F : : VBUS falling detected
+ : VBUS_DET_R : : VBUS rising detected
+ : USB_LINK_STATUS : : USB link status has changed
+ : USB_CH_TH_PROT_R : : Die temp is above usb charger
+ : USB_CH_TH_PROT_F : : Die temp is below usb charger
+ : USB_CHARGER_NOT_OKR : : allowed USB charger not ok detection
+ : VBUS_OVV : : Overvoltage on Vbus ball detected (USB charge is stopped)
+ : CH_WD_EXP : : Charger watchdog detected
+ab8500-gpadc : HW_CONV_END : vddadc : Analogue to Digital Converter
+ SW_CONV_END : :
+ab8500-gpio : : : GPIO Controller
+ab8500-ponkey : ONKEY_DBF : : Power-on Key
+ ONKEY_DBR : :
+ab8500-pwm : : : Pulse Width Modulator
+ab8500-regulator : : : Regulators
+ab8500-rtc : 60S : : Real Time Clock
+ : ALARM : :
+ab8500-sysctrl : : : System Control
+ab8500-usb : ID_WAKEUP_R : vddulpivio18 : Universal Serial Bus
+ : ID_WAKEUP_F : v-ape :
+ : VBUS_DET_F : musb_1v8 :
+ : VBUS_DET_R : :
+ : USB_LINK_STATUS : :
+ : USB_ADP_PROBE_PLUG : :
+ : USB_ADP_PROBE_UNPLUG : :
+
+Required child device properties:
+- compatible : "stericsson,ab8500-[bm|btemp|charger|fg|gpadc|gpio|ponkey|
+ pwm|regulator|rtc|sysctrl|usb]";
+
+Optional child device properties:
+- interrupts : contains the device IRQ(s) using the 2-cell format (see above)
+- interrupt-names : contains names of IRQ resource in the order in which they were
+ supplied in the interrupts property
+- <supply_name>-supply : contains a phandle to the regulator supply node in Device Tree
+
+Non-standard child device properties:
+ - Audio CODEC:
+ - stericsson,amic[1|2]-type-single-ended : Single-ended Analoge Mic (default: differential)
+ - stericsson,amic1a-bias-vamic2 : Analoge Mic wishes to use a non-standard Vamic
+ - stericsson,amic1b-bias-vamic2 : Analoge Mic wishes to use a non-standard Vamic
+ - stericsson,amic2-bias-vamic1 : Analoge Mic wishes to use a non-standard Vamic
+ - stericsson,earpeice-cmv : Earpeice voltage (only: 950 | 1100 | 1270 | 1580)
+
+ab8500 {
+ compatible = "stericsson,ab8500";
+ interrupts = <0 40 0x4>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ ab8500-rtc {
+ compatible = "stericsson,ab8500-rtc";
+ interrupts = <17 0x4
+ 18 0x4>;
+ interrupt-names = "60S", "ALARM";
+ };
+
+ ab8500-gpadc {
+ compatible = "stericsson,ab8500-gpadc";
+ interrupts = <32 0x4
+ 39 0x4>;
+ interrupt-names = "HW_CONV_END", "SW_CONV_END";
+ vddadc-supply = <&ab8500_ldo_tvout_reg>;
+ };
+
+ ab8500-usb {
+ compatible = "stericsson,ab8500-usb";
+ interrupts = < 90 0x4
+ 96 0x4
+ 14 0x4
+ 15 0x4
+ 79 0x4
+ 74 0x4
+ 75 0x4>;
+ interrupt-names = "ID_WAKEUP_R",
+ "ID_WAKEUP_F",
+ "VBUS_DET_F",
+ "VBUS_DET_R",
+ "USB_LINK_STATUS",
+ "USB_ADP_PROBE_PLUG",
+ "USB_ADP_PROBE_UNPLUG";
+ vddulpivio18-supply = <&ab8500_ldo_intcore_reg>;
+ v-ape-supply = <&db8500_vape_reg>;
+ musb_1v8-supply = <&db8500_vsmps2_reg>;
+ };
+
+ ab8500-ponkey {
+ compatible = "stericsson,ab8500-ponkey";
+ interrupts = <6 0x4
+ 7 0x4>;
+ interrupt-names = "ONKEY_DBF", "ONKEY_DBR";
+ };
+
+ ab8500-sysctrl {
+ compatible = "stericsson,ab8500-sysctrl";
+ };
+
+ ab8500-pwm {
+ compatible = "stericsson,ab8500-pwm";
+ };
+
+ codec: ab8500-codec {
+ compatible = "stericsson,ab8500-codec";
+
+ stericsson,earpeice-cmv = <950>; /* Units in mV. */
+ };
+
+ ab8500-regulators {
+ compatible = "stericsson,ab8500-regulator";
+
+ ab8500_ldo_aux1_reg: ab8500_ldo_aux1 {
+ /*
+ * See: Documentation/devicetree/bindings/regulator/regulator.txt
+ * for more information on regulators
+ */
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/arizona.txt b/Documentation/devicetree/bindings/mfd/arizona.txt
new file mode 100644
index 000000000..7665aa959
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/arizona.txt
@@ -0,0 +1,95 @@
+Wolfson Arizona class audio SoCs
+
+These devices are audio SoCs with extensive digital capabilites and a range
+of analogue I/O.
+
+Required properties:
+
+ - compatible : One of the following chip-specific strings:
+ "wlf,wm5102"
+ "wlf,wm5110"
+ "wlf,wm8280"
+ "wlf,wm8997"
+ - reg : I2C slave address when connected using I2C, chip select number when
+ using SPI.
+
+ - interrupts : The interrupt line the /IRQ signal for the device is
+ connected to.
+ - interrupt-controller : Arizona class devices contain interrupt controllers
+ and may provide interrupt services to other devices.
+ - interrupt-parent : The parent interrupt controller.
+ - #interrupt-cells: the number of cells to describe an IRQ, this should be 2.
+ The first cell is the IRQ number.
+ The second cell is the flags, encoded as the trigger masks from
+ Documentation/devicetree/bindings/interrupts.txt
+
+ - gpio-controller : Indicates this device is a GPIO controller.
+ - #gpio-cells : Must be 2. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+
+ - AVDD-supply, DBVDD1-supply, CPVDD-supply : Power supplies for the device,
+ as covered in Documentation/devicetree/bindings/regulator/regulator.txt
+
+ - DBVDD2-supply, DBVDD3-supply : Additional databus power supplies (wm5102,
+ wm5110, wm8280)
+
+ - SPKVDDL-supply, SPKVDDR-supply : Speaker driver power supplies (wm5102,
+ wm5110, wm8280)
+
+ - SPKVDD-supply : Speaker driver power supply (wm8997)
+
+Optional properties:
+
+ - wlf,reset : GPIO specifier for the GPIO controlling /RESET
+ - wlf,ldoena : GPIO specifier for the GPIO controlling LDOENA
+
+ - wlf,gpio-defaults : A list of GPIO configuration register values. Defines
+ for the appropriate values can found in <dt-bindings/mfd/arizona.txt>. If
+ absent, no configuration of these registers is performed. If any entry has
+ a value that is out of range for a 16 bit register then the chip default
+ will be used. If present exactly five values must be specified.
+
+ - wlf,inmode : A list of INn_MODE register values, where n is the number
+ of input signals. Valid values are 0 (Differential), 1 (Single-ended) and
+ 2 (Digital Microphone). If absent, INn_MODE registers set to 0 by default.
+ If present, values must be specified less than or equal to the number of
+ input singals. If values less than the number of input signals, elements
+ that has not been specifed are set to 0 by default.
+
+ - wlf,dmic-ref : DMIC reference voltage source for each input, can be
+ selected from either MICVDD or one of the MICBIAS's, defines
+ (ARIZONA_DMIC_xxxx) are provided in <dt-bindings/mfd/arizona.txt>. If
+ present, the number of values should be less than or equal to the
+ number of inputs, unspecified inputs will use the chip default.
+
+ - DCVDD-supply, MICVDD-supply : Power supplies, only need to be specified if
+ they are being externally supplied. As covered in
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+Optional subnodes:
+ - ldo1 : Initial data for the LDO1 regulator, as covered in
+ Documentation/devicetree/bindings/regulator/regulator.txt
+ - micvdd : Initial data for the MICVDD regulator, as covered in
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+Example:
+
+codec: wm5102@1a {
+ compatible = "wlf,wm5102";
+ reg = <0x1a>;
+ interrupts = <347>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&gic>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ wlf,gpio-defaults = <
+ ARIZONA_GP_FN_TXLRCLK
+ ARIZONA_GP_DEFAULT
+ ARIZONA_GP_DEFAULT
+ ARIZONA_GP_DEFAULT
+ ARIZONA_GP_DEFAULT
+ >;
+};
diff --git a/Documentation/devicetree/bindings/mfd/as3711.txt b/Documentation/devicetree/bindings/mfd/as3711.txt
new file mode 100644
index 000000000..d98cf18c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/as3711.txt
@@ -0,0 +1,73 @@
+AS3711 is an I2C PMIC from Austria MicroSystems with multiple DCDC and LDO power
+supplies, a battery charger and an RTC. So far only bindings for the two stepup
+DCDC converters are defined. Other DCDC and LDO supplies are configured, using
+standard regulator properties, they must belong to a sub-node, called
+"regulators" and be called "sd1" to "sd4" and "ldo1" to "ldo8." Stepup converter
+configuration should be placed in a subnode, called "backlight."
+
+Compulsory properties:
+- compatible : must be "ams,as3711"
+- reg : specifies the I2C address
+
+To use the SU1 converter as a backlight source the following two properties must
+be provided:
+- su1-dev : framebuffer phandle
+- su1-max-uA : maximum current
+
+To use the SU2 converter as a backlight source the following two properties must
+be provided:
+- su2-dev : framebuffer phandle
+- su1-max-uA : maximum current
+
+Additionally one of these properties must be provided to select the type of
+feedback used:
+- su2-feedback-voltage : voltage feedback is used
+- su2-feedback-curr1 : CURR1 input used for current feedback
+- su2-feedback-curr2 : CURR2 input used for current feedback
+- su2-feedback-curr3 : CURR3 input used for current feedback
+- su2-feedback-curr-auto: automatic current feedback selection
+
+and one of these to select the over-voltage protection pin
+- su2-fbprot-lx-sd4 : LX_SD4 is used for over-voltage protection
+- su2-fbprot-gpio2 : GPIO2 is used for over-voltage protection
+- su2-fbprot-gpio3 : GPIO3 is used for over-voltage protection
+- su2-fbprot-gpio4 : GPIO4 is used for over-voltage protection
+
+If "su2-feedback-curr-auto" is selected, one or more of the following properties
+have to be specified:
+- su2-auto-curr1 : use CURR1 input for current feedback
+- su2-auto-curr2 : use CURR2 input for current feedback
+- su2-auto-curr3 : use CURR3 input for current feedback
+
+Example:
+
+as3711@40 {
+ compatible = "ams,as3711";
+ reg = <0x40>;
+
+ regulators {
+ sd4 {
+ regulator-name = "1.215V";
+ regulator-min-microvolt = <1215000>;
+ regulator-max-microvolt = <1235000>;
+ };
+ ldo2 {
+ regulator-name = "2.8V CPU";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ };
+
+ backlight {
+ compatible = "ams,as3711-bl";
+ su2-dev = <&lcdc>;
+ su2-max-uA = <36000>;
+ su2-feedback-curr-auto;
+ su2-fbprot-gpio4;
+ su2-auto-curr1;
+ su2-auto-curr2;
+ su2-auto-curr3;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/as3722.txt b/Documentation/devicetree/bindings/mfd/as3722.txt
new file mode 100644
index 000000000..4f64b2a73
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/as3722.txt
@@ -0,0 +1,213 @@
+* ams AS3722 Power management IC.
+
+Required properties:
+-------------------
+- compatible: Must be "ams,as3722".
+- reg: I2C device address.
+- interrupt-controller: AS3722 has internal interrupt controller which takes the
+ interrupt request from internal sub-blocks like RTC, regulators, GPIOs as well
+ as external input.
+- #interrupt-cells: Should be set to 2 for IRQ number and flags.
+ The first cell is the IRQ number. IRQ numbers for different interrupt source
+ of AS3722 are defined at dt-bindings/mfd/as3722.h
+ The second cell is the flags, encoded as the trigger masks from binding document
+ interrupts.txt, using dt-bindings/irq.
+
+Optional properties:
+--------------------
+- ams,enable-internal-int-pullup: Boolean property, to enable internal pullup on
+ interrupt pin. Missing this will disable internal pullup on INT pin.
+- ams,enable-internal-i2c-pullup: Boolean property, to enable internal pullup on
+ i2c scl/sda pins. Missing this will disable internal pullup on i2c
+ scl/sda lines.
+
+Optional submodule and their properties:
+=======================================
+
+Pinmux and GPIO:
+===============
+Device has 8 GPIO pins which can be configured as GPIO as well as the special IO
+functions.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Following are properties which is needed if GPIO and pinmux functionality
+is required:
+ Required properties:
+ -------------------
+ - gpio-controller: Marks the device node as a GPIO controller.
+ - #gpio-cells: Number of GPIO cells. Refer to binding document
+ gpio/gpio.txt
+
+ Optional properties:
+ --------------------
+ Following properties are require if pin control setting is required
+ at boot.
+ - pinctrl-names: A pinctrl state named "default" be defined, using the
+ bindings in pinctrl/pinctrl-binding.txt.
+ - pinctrl[0...n]: Properties to contain the phandle that refer to
+ different nodes of pin control settings. These nodes represents
+ the pin control setting of state 0 to state n. Each of these
+ nodes contains different subnodes to represents some desired
+ configuration for a list of pins. This configuration can
+ include the mux function to select on those pin(s), and
+ various pin configuration parameters, such as pull-up,
+ open drain.
+
+ Each subnode have following properties:
+ Required properties:
+ - pins: List of pins. Valid values of pins properties are:
+ gpio0, gpio1, gpio2, gpio3, gpio4, gpio5,
+ gpio6, gpio7
+
+ Optional properties:
+ function, bias-disable, bias-pull-up, bias-pull-down,
+ bias-high-impedance, drive-open-drain.
+
+ Valid values for function properties are:
+ gpio, interrupt-out, gpio-in-interrupt,
+ vsup-vbat-low-undebounce-out,
+ vsup-vbat-low-debounce-out,
+ voltage-in-standby, oc-pg-sd0, oc-pg-sd6,
+ powergood-out, pwm-in, pwm-out, clk32k-out,
+ watchdog-in, soft-reset-in
+
+Regulators:
+===========
+Device has multiple DCDC and LDOs. The node "regulators" is require if regulator
+functionality is needed.
+
+Following are properties of regulator subnode.
+
+ Optional properties:
+ -------------------
+ The input supply of regulators are the optional properties on the
+ regulator node. The input supply of these regulators are provided
+ through following properties:
+ vsup-sd2-supply: Input supply for SD2.
+ vsup-sd3-supply: Input supply for SD3.
+ vsup-sd4-supply: Input supply for SD4.
+ vsup-sd5-supply: Input supply for SD5.
+ vin-ldo0-supply: Input supply for LDO0.
+ vin-ldo1-6-supply: Input supply for LDO1 and LDO6.
+ vin-ldo2-5-7-supply: Input supply for LDO2, LDO5 and LDO7.
+ vin-ldo3-4-supply: Input supply for LDO3 and LDO4.
+ vin-ldo9-10-supply: Input supply for LDO9 and LDO10.
+ vin-ldo11-supply: Input supply for LDO11.
+
+ Optional sub nodes for regulators:
+ ---------------------------------
+ The subnodes name is the name of regulator and it must be one of:
+ sd[0-6], ldo[0-7], ldo[9-11]
+
+ Each sub-node should contain the constraints and initialization
+ information for that regulator. See regulator.txt for a description
+ of standard properties for these sub-nodes.
+ Additional optional custom properties are listed below.
+ ams,ext-control: External control of the rail. The option of
+ this properties will tell which external input is
+ controlling this rail. Valid values are 0, 1, 2 ad 3.
+ 0: There is no external control of this rail.
+ 1: Rail is controlled by ENABLE1 input pin.
+ 2: Rail is controlled by ENABLE2 input pin.
+ 3: Rail is controlled by ENABLE3 input pin.
+ Missing this property on DT will be assume as no
+ external control. The external control pin macros
+ are defined @dt-bindings/mfd/as3722.h
+
+ ams,enable-tracking: Enable tracking with SD1, only supported
+ by LDO3.
+
+Power-off:
+=========
+AS3722 supports the system power off by turning off all its rail. This
+is provided through pm_power_off.
+The device node should have the following properties to enable this
+functionality
+ams,system-power-controller: Boolean, to enable the power off functionality
+ through this device.
+
+Example:
+--------
+#include <dt-bindings/mfd/as3722.h>
+...
+ams3722 {
+ compatible = "ams,as3722";
+ reg = <0x48>;
+
+ ams,system-power-controller;
+
+ interrupt-parent = <&intc>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&as3722_default>;
+
+ as3722_default: pinmux {
+ gpio0 {
+ pins = "gpio0";
+ function = "gpio";
+ bias-pull-down;
+ };
+
+ gpio1_2_4_7 {
+ pins = "gpio1", "gpio2", "gpio4", "gpio7";
+ function = "gpio";
+ bias-pull-up;
+ };
+
+ gpio5 {
+ pins = "gpio5";
+ function = "clk32k_out";
+ };
+ }
+
+ regulators {
+ vsup-sd2-supply = <...>;
+ ...
+
+ sd0 {
+ regulator-name = "vdd_cpu";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ ams,ext-control = <2>;
+ };
+
+ sd1 {
+ regulator-name = "vdd_core";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ ams,ext-control = <1>;
+ };
+
+ sd2 {
+ regulator-name = "vddio_ddr";
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ };
+
+ sd4 {
+ regulator-name = "avdd-hdmi-pex";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-always-on;
+ };
+
+ sd5 {
+ regulator-name = "vdd-1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+ ....
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/atmel-gpbr.txt b/Documentation/devicetree/bindings/mfd/atmel-gpbr.txt
new file mode 100644
index 000000000..a28569540
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/atmel-gpbr.txt
@@ -0,0 +1,15 @@
+* Device tree bindings for Atmel GPBR (General Purpose Backup Registers)
+
+The GPBR are a set of battery-backed registers.
+
+Required properties:
+- compatible: "atmel,at91sam9260-gpbr", "syscon"
+- reg: contains offset/length value of the GPBR memory
+ region.
+
+Example:
+
+gpbr: gpbr@fffffd50 {
+ compatible = "atmel,at91sam9260-gpbr", "syscon";
+ reg = <0xfffffd50 0x10>;
+};
diff --git a/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt b/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt
new file mode 100644
index 000000000..f64de95a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/atmel-hlcdc.txt
@@ -0,0 +1,51 @@
+Device-Tree bindings for Atmel's HLCDC (High LCD Controller) MFD driver
+
+Required properties:
+ - compatible: value should be one of the following:
+ "atmel,sama5d3-hlcdc"
+ - reg: base address and size of the HLCDC device registers.
+ - clock-names: the name of the 3 clocks requested by the HLCDC device.
+ Should contain "periph_clk", "sys_clk" and "slow_clk".
+ - clocks: should contain the 3 clocks requested by the HLCDC device.
+ - interrupts: should contain the description of the HLCDC interrupt line
+
+The HLCDC IP exposes two subdevices:
+ - a PWM chip: see ../pwm/atmel-hlcdc-pwm.txt
+ - a Display Controller: see ../drm/atmel-hlcdc-dc.txt
+
+Example:
+
+ hlcdc: hlcdc@f0030000 {
+ compatible = "atmel,sama5d3-hlcdc";
+ reg = <0xf0030000 0x2000>;
+ clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
+ clock-names = "periph_clk","sys_clk", "slow_clk";
+ interrupts = <36 IRQ_TYPE_LEVEL_HIGH 0>;
+ status = "disabled";
+
+ hlcdc-display-controller {
+ compatible = "atmel,hlcdc-display-controller";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lcd_base &pinctrl_lcd_rgb888>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>;
+
+ hlcdc_panel_output: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&panel_input>;
+ };
+ };
+ };
+
+ hlcdc_pwm: hlcdc-pwm {
+ compatible = "atmel,hlcdc-pwm";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lcd_pwm>;
+ #pwm-cells = <3>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/atmel-matrix.txt b/Documentation/devicetree/bindings/mfd/atmel-matrix.txt
new file mode 100644
index 000000000..e3ef50ca0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/atmel-matrix.txt
@@ -0,0 +1,24 @@
+* Device tree bindings for Atmel Bus Matrix
+
+The Bus Matrix registers are used to configure Atmel SoCs internal bus
+behavior (master/slave priorities, undefined burst length type, ...)
+
+Required properties:
+- compatible: Should be one of the following
+ "atmel,at91sam9260-matrix", "syscon"
+ "atmel,at91sam9261-matrix", "syscon"
+ "atmel,at91sam9263-matrix", "syscon"
+ "atmel,at91sam9rl-matrix", "syscon"
+ "atmel,at91sam9g45-matrix", "syscon"
+ "atmel,at91sam9n12-matrix", "syscon"
+ "atmel,at91sam9x5-matrix", "syscon"
+ "atmel,sama5d3-matrix", "syscon"
+- reg: Contains offset/length value of the Bus Matrix
+ memory region.
+
+Example:
+
+matrix: matrix@ffffec00 {
+ compatible = "atmel,sama5d3-matrix", "syscon";
+ reg = <0xffffec00 0x200>;
+};
diff --git a/Documentation/devicetree/bindings/mfd/atmel-smc.txt b/Documentation/devicetree/bindings/mfd/atmel-smc.txt
new file mode 100644
index 000000000..26eeed373
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/atmel-smc.txt
@@ -0,0 +1,19 @@
+* Device tree bindings for Atmel SMC (Static Memory Controller)
+
+The SMC registers are used to configure Atmel EBI (External Bus Interface)
+to interface with standard memory devices (NAND, NOR, SRAM or specialized
+devices like FPGAs).
+
+Required properties:
+- compatible: Should be one of the following
+ "atmel,at91sam9260-smc", "syscon"
+ "atmel,sama5d3-smc", "syscon"
+- reg: Contains offset/length value of the SMC memory
+ region.
+
+Example:
+
+smc: smc@ffffc000 {
+ compatible = "atmel,sama5d3-smc", "syscon";
+ reg = <0xffffc000 0x1000>;
+};
diff --git a/Documentation/devicetree/bindings/mfd/axp20x.txt b/Documentation/devicetree/bindings/mfd/axp20x.txt
new file mode 100644
index 000000000..98685f291
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/axp20x.txt
@@ -0,0 +1,96 @@
+AXP202/AXP209 device tree bindings
+
+The axp20x family current members :
+axp202 (X-Powers)
+axp209 (X-Powers)
+
+Required properties:
+- compatible: "x-powers,axp202" or "x-powers,axp209"
+- reg: The I2C slave address for the AXP chip
+- interrupt-parent: The parent interrupt controller
+- interrupts: SoC NMI / GPIO interrupt connected to the PMIC's IRQ pin
+- interrupt-controller: axp20x has its own internal IRQs
+- #interrupt-cells: Should be set to 1
+
+Optional properties:
+- x-powers,dcdc-freq: defines the work frequency of DC-DC in KHz
+ (range: 750-1875). Default: 1.5MHz
+- <input>-supply: a phandle to the regulator supply node. May be omitted if
+ inputs are unregulated, such as using the IPSOUT output
+ from the PMIC.
+
+- regulators: A node that houses a sub-node for each regulator. Regulators
+ not used but preferred to be managed by the OS should be
+ listed as well.
+ See Documentation/devicetree/bindings/regulator/regulator.txt
+ for more information on standard regulator bindings.
+
+Optional properties for DCDC regulators:
+- x-powers,dcdc-workmode: 1 for PWM mode, 0 for AUTO (PWM/PFM) mode
+ Default: Current hardware setting
+ The DCDC regulators work in a mixed PWM/PFM mode,
+ using PFM under light loads and switching to PWM
+ for heavier loads. Forcing PWM mode trades efficiency
+ under light loads for lower output noise. This
+ probably makes sense for HiFi audio related
+ applications that aren't battery constrained.
+
+
+AXP202/AXP209 regulators, type, and corresponding input supply names:
+
+Regulator Type Supply Name Notes
+--------- ---- ----------- -----
+DCDC2 : DC-DC buck : vin2-supply
+DCDC3 : DC-DC buck : vin3-supply
+LDO1 : LDO : acin-supply : always on
+LDO2 : LDO : ldo24in-supply : shared supply
+LDO3 : LDO : ldo3in-supply
+LDO4 : LDO : ldo24in-supply : shared supply
+LDO5 : LDO : ldo5in-supply
+
+Example:
+
+axp209: pmic@34 {
+ compatible = "x-powers,axp209";
+ reg = <0x34>;
+ interrupt-parent = <&nmi_intc>;
+ interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ regulators {
+ x-powers,dcdc-freq = <1500>;
+
+ vdd_cpu: dcdc2 {
+ regulator-always-on;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1450000>;
+ regulator-name = "vdd-cpu";
+ };
+
+ vdd_int_dll: dcdc3 {
+ regulator-always-on;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-name = "vdd-int-dll";
+ };
+
+ vdd_rtc: ldo1 {
+ regulator-always-on;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-name = "vdd-rtc";
+ };
+
+ avcc: ldo2 {
+ regulator-always-on;
+ regulator-min-microvolt = <2700000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "avcc";
+ };
+
+ ldo3 {
+ /* unused but preferred to be managed by OS */
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/bfticu.txt b/Documentation/devicetree/bindings/mfd/bfticu.txt
new file mode 100644
index 000000000..65c90776c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/bfticu.txt
@@ -0,0 +1,25 @@
+KEYMILE bfticu Chassis Management FPGA
+
+The bfticu is a multifunction device that manages the whole chassis.
+Its main functionality is to collect IRQs from the whole chassis and signals
+them to a single controller.
+
+Required properties:
+- compatible: "keymile,bfticu"
+- interrupt-controller: the bfticu FPGA is an interrupt controller
+- interrupts: the main IRQ line to signal the collected IRQs
+- #interrupt-cells : is 2 and their usage is compliant to the 2 cells variant
+ of Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+- interrupt-parent: the parent IRQ ctrl the main IRQ is connected to
+- reg: access on the parent local bus (chip select, offset in chip select, size)
+
+Example:
+
+ chassis-mgmt@3,0 {
+ compatible = "keymile,bfticu";
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <3 0 0x100>;
+ interrupt-parent = <&mpic>;
+ interrupts = <6 1 0 0>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/brcm,bcm59056.txt b/Documentation/devicetree/bindings/mfd/brcm,bcm59056.txt
new file mode 100644
index 000000000..be51a15e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/brcm,bcm59056.txt
@@ -0,0 +1,39 @@
+-------------------------------
+BCM590xx Power Management Units
+-------------------------------
+
+Required properties:
+- compatible: "brcm,bcm59056"
+- reg: I2C slave address
+- interrupts: interrupt for the PMU. Generic interrupt client node bindings
+ are described in interrupt-controller/interrupts.txt
+
+------------------
+Voltage Regulators
+------------------
+
+Optional child nodes:
+- regulators: container node for regulators following the generic
+ regulator binding in regulator/regulator.txt
+
+ The valid regulator node names for BCM59056 are:
+ rfldo, camldo1, camldo2, simldo1, simldo2, sdldo, sdxldo,
+ mmcldo1, mmcldo2, audldo, micldo, usbldo, vibldo,
+ csr, iosr1, iosr2, msr, sdsr1, sdsr2, vsr,
+ gpldo1, gpldo2, gpldo3, gpldo4, gpldo5, gpldo6,
+ vbus
+
+Example:
+ pmu: bcm59056@8 {
+ compatible = "brcm,bcm59056";
+ reg = <0x08>;
+ interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>;
+ regulators {
+ rfldo_reg: rfldo {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ...
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt b/Documentation/devicetree/bindings/mfd/cros-ec.txt
new file mode 100644
index 000000000..8009c3d87
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/cros-ec.txt
@@ -0,0 +1,65 @@
+ChromeOS Embedded Controller
+
+Google's ChromeOS EC is a Cortex-M device which talks to the AP and
+implements various function such as keyboard and battery charging.
+
+The EC can be connect through various means (I2C, SPI, LPC) and the
+compatible string used depends on the interface. Each connection method has
+its own driver which connects to the top level interface-agnostic EC driver.
+Other Linux driver (such as cros-ec-keyb for the matrix keyboard) connect to
+the top-level driver.
+
+Required properties (I2C):
+- compatible: "google,cros-ec-i2c"
+- reg: I2C slave address
+
+Required properties (SPI):
+- compatible: "google,cros-ec-spi"
+- reg: SPI chip select
+
+Optional properties (SPI):
+- google,cros-ec-spi-msg-delay: Some implementations of the EC require some
+ additional processing time in order to accept new transactions. If the delay
+ between transactions is not long enough the EC may not be able to respond
+ properly to subsequent transactions and cause them to hang. This property
+ specifies the delay, in usecs, introduced between transactions to account
+ for the time required by the EC to get back into a state in which new data
+ can be accepted.
+
+Required properties (LPC):
+- compatible: "google,cros-ec-lpc"
+- reg: List of (IO address, size) pairs defining the interface uses
+
+
+Example for I2C:
+
+i2c@12CA0000 {
+ cros-ec@1e {
+ reg = <0x1e>;
+ compatible = "google,cros-ec-i2c";
+ interrupts = <14 0>;
+ interrupt-parent = <&wakeup_eint>;
+ wakeup-source;
+ };
+
+
+Example for SPI:
+
+spi@131b0000 {
+ ec@0 {
+ compatible = "google,cros-ec-spi";
+ reg = <0x0>;
+ interrupts = <14 0>;
+ interrupt-parent = <&wakeup_eint>;
+ wakeup-source;
+ spi-max-frequency = <5000000>;
+ controller-data {
+ cs-gpio = <&gpf0 3 4 3 0>;
+ samsung,spi-cs;
+ samsung,spi-feedback-delay = <2>;
+ };
+ };
+};
+
+
+Example for LPC is not supplied as it is not yet implemented.
diff --git a/Documentation/devicetree/bindings/mfd/da9052-i2c.txt b/Documentation/devicetree/bindings/mfd/da9052-i2c.txt
new file mode 100644
index 000000000..1857f4a6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/da9052-i2c.txt
@@ -0,0 +1,60 @@
+* Dialog DA9052/53 Power Management Integrated Circuit (PMIC)
+
+Required properties:
+- compatible : Should be "dlg,da9052", "dlg,da9053-aa",
+ "dlg,da9053-ab", or "dlg,da9053-bb"
+
+Sub-nodes:
+- regulators : Contain the regulator nodes. The DA9052/53 regulators are
+ bound using their names as listed below:
+
+ buck0 : regulator BUCK0
+ buck1 : regulator BUCK1
+ buck2 : regulator BUCK2
+ buck3 : regulator BUCK3
+ ldo4 : regulator LDO4
+ ldo5 : regulator LDO5
+ ldo6 : regulator LDO6
+ ldo7 : regulator LDO7
+ ldo8 : regulator LDO8
+ ldo9 : regulator LDO9
+ ldo10 : regulator LDO10
+ ldo11 : regulator LDO11
+ ldo12 : regulator LDO12
+ ldo13 : regulator LDO13
+
+ The bindings details of individual regulator device can be found in:
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+Examples:
+
+i2c@63fc8000 { /* I2C1 */
+ status = "okay";
+
+ pmic: dialog@48 {
+ compatible = "dlg,da9053-aa";
+ reg = <0x48>;
+
+ regulators {
+ buck0 {
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <2075000>;
+ };
+
+ buck1 {
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <2075000>;
+ };
+
+ buck2 {
+ regulator-min-microvolt = <925000>;
+ regulator-max-microvolt = <2500000>;
+ };
+
+ buck3 {
+ regulator-min-microvolt = <925000>;
+ regulator-max-microvolt = <2500000>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/da9055.txt b/Documentation/devicetree/bindings/mfd/da9055.txt
new file mode 100644
index 000000000..6dab34d34
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/da9055.txt
@@ -0,0 +1,72 @@
+* Dialog DA9055 Power Management Integrated Circuit (PMIC)
+
+DA9055 consists of a large and varied group of sub-devices (I2C Only):
+
+Device Supply Names Description
+------ ------------ -----------
+da9055-gpio : : GPIOs
+da9055-regulator : : Regulators
+da9055-onkey : : On key
+da9055-rtc : : RTC
+da9055-hwmon : : ADC
+da9055-watchdog : : Watchdog
+
+The CODEC device in DA9055 has a separate, configurable I2C address and so
+is instantiated separately from the PMIC.
+
+For details on accompanying CODEC I2C device, see the following:
+Documentation/devicetree/bindings/sound/da9055.txt
+
+======
+
+Required properties:
+- compatible : Should be "dlg,da9055-pmic"
+- reg: Specifies the I2C slave address (defaults to 0x5a but can be modified)
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the IRQs from da9055 are delivered to.
+- interrupts: IRQ line info for da9055 chip.
+- interrupt-controller: da9055 has internal IRQs (has own IRQ domain).
+- #interrupt-cells: Should be 1, is the local IRQ number for da9055.
+
+Sub-nodes:
+- regulators : Contain the regulator nodes. The DA9055 regulators are
+ bound using their names as listed below:
+
+ buck1 : regulator BUCK1
+ buck2 : regulator BUCK2
+ ldo1 : regulator LDO1
+ ldo2 : regulator LDO2
+ ldo3 : regulator LDO3
+ ldo4 : regulator LDO4
+ ldo5 : regulator LDO5
+ ldo6 : regulator LDO6
+
+ The bindings details of individual regulator device can be found in:
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+
+Example:
+
+ pmic: da9055-pmic@5a {
+ compatible = "dlg,da9055-pmic";
+ reg = <0x5a>;
+ interrupt-parent = <&intc>;
+ interrupts = <5 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ regulators {
+ buck1: BUCK1 {
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <2075000>;
+ };
+ buck2: BUCK2 {
+ regulator-min-microvolt = <925000>;
+ regulator-max-microvolt = <2500000>;
+ };
+ ldo1: LDO1 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <3300000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/da9063.txt b/Documentation/devicetree/bindings/mfd/da9063.txt
new file mode 100644
index 000000000..42c6fa6f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/da9063.txt
@@ -0,0 +1,93 @@
+* Dialog DA9063 Power Management Integrated Circuit (PMIC)
+
+DA9093 consists of a large and varied group of sub-devices (I2C Only):
+
+Device Supply Names Description
+------ ------------ -----------
+da9063-regulator : : LDOs & BUCKs
+da9063-rtc : : Real-Time Clock
+da9063-watchdog : : Watchdog
+
+======
+
+Required properties:
+
+- compatible : Should be "dlg,da9063"
+- reg : Specifies the I2C slave address (this defaults to 0x58 but it can be
+ modified to match the chip's OTP settings).
+- interrupt-parent : Specifies the reference to the interrupt controller for
+ the DA9063.
+- interrupts : IRQ line information.
+- interrupt-controller
+
+Sub-nodes:
+
+- regulators : This node defines the settings for the LDOs and BUCKs. The
+ DA9063 regulators are bound using their names listed below:
+
+ bcore1 : BUCK CORE1
+ bcore2 : BUCK CORE2
+ bpro : BUCK PRO
+ bmem : BUCK MEM
+ bio : BUCK IO
+ bperi : BUCK PERI
+ ldo1 : LDO_1
+ ldo2 : LDO_2
+ ldo3 : LDO_3
+ ldo4 : LDO_4
+ ldo5 : LDO_5
+ ldo6 : LDO_6
+ ldo7 : LDO_7
+ ldo8 : LDO_8
+ ldo9 : LDO_9
+ ldo10 : LDO_10
+ ldo11 : LDO_11
+
+ The component follows the standard regulator framework and the bindings
+ details of individual regulator device can be found in:
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+- rtc : This node defines settings for the Real-Time Clock associated with
+ the DA9063. There are currently no entries in this binding, however
+ compatible = "dlg,da9063-rtc" should be added if a node is created.
+
+- watchdog : This node defines settings for the Watchdog timer associated
+ with the DA9063. There are currently no entries in this binding, however
+ compatible = "dlg,da9063-watchdog" should be added if a node is created.
+
+
+Example:
+
+ pmic0: da9063@58 {
+ compatible = "dlg,da9063"
+ reg = <0x58>;
+ interrupt-parent = <&gpio6>;
+ interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+
+ rtc {
+ compatible = "dlg,da9063-rtc";
+ };
+
+ wdt {
+ compatible = "dlg,da9063-watchdog";
+ };
+
+ regulators {
+ DA9063_BCORE1: bcore1 {
+ regulator-name = "BCORE1";
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1570000>;
+ regulator-min-microamp = <500000>;
+ regulator-max-microamp = <2000000>;
+ regulator-boot-on;
+ };
+ DA9063_LDO11: ldo11 {
+ regulator-name = "LDO_11";
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <3600000>;
+ regulator-boot-on;
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/mfd/da9150.txt b/Documentation/devicetree/bindings/mfd/da9150.txt
new file mode 100644
index 000000000..d0588eaa0
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/da9150.txt
@@ -0,0 +1,43 @@
+Dialog Semiconductor DA9150 Combined Charger/Fuel-Gauge MFD bindings
+
+DA9150 consists of a group of sub-devices:
+
+Device Description
+------ -----------
+da9150-gpadc : General Purpose ADC
+da9150-charger : Battery Charger
+
+======
+
+Required properties:
+- compatible : Should be "dlg,da9150"
+- reg: Specifies the I2C slave address
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the IRQs from da9150 are delivered to.
+- interrupts: IRQ line info for da9150 chip.
+- interrupt-controller: da9150 has internal IRQs (own IRQ domain).
+ (See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt for
+ further information relating to interrupt properties)
+
+Sub-devices:
+- da9150-gpadc: See Documentation/devicetree/bindings/iio/adc/da9150-gpadc.txt
+- da9150-charger: See Documentation/devicetree/bindings/power/da9150-charger.txt
+
+
+Example:
+
+ charger_fg: da9150@58 {
+ compatible = "dlg,da9150";
+ reg = <0x58>;
+ interrupt-parent = <&gpio6>;
+ interrupts = <11 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+
+ gpadc: da9150-gpadc {
+ ...
+ };
+
+ da9150-charger {
+ ...
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/hi6421.txt b/Documentation/devicetree/bindings/mfd/hi6421.txt
new file mode 100644
index 000000000..0d5a4466a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/hi6421.txt
@@ -0,0 +1,38 @@
+* HI6421 Multi-Functional Device (MFD), by HiSilicon Ltd.
+
+Required parent device properties:
+- compatible : contains "hisilicon,hi6421-pmic";
+- reg : register range space of hi6421;
+
+Supported Hi6421 sub-devices include:
+
+Device IRQ Names Supply Names Description
+------ --------- ------------ -----------
+regulators : None : None : Regulators
+
+Required child device properties:
+None.
+
+Example:
+ hi6421 {
+ compatible = "hisilicon,hi6421-pmic";
+ reg = <0xfcc00000 0x0180>; /* 0x60 << 2 */
+
+ regulators {
+ // supply for MLC NAND/ eMMC
+ hi6421_vout0_reg: hi6421_vout0 {
+ regulator-name = "VOUT0";
+ regulator-min-microvolt = <2850000>;
+ regulator-max-microvolt = <2850000>;
+ };
+
+ // supply for 26M Oscillator
+ hi6421_vout1_reg: hi6421_vout1 {
+ regulator-name = "VOUT1";
+ regulator-min-microvolt = <1700000>;
+ regulator-max-microvolt = <2000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/lp3943.txt b/Documentation/devicetree/bindings/mfd/lp3943.txt
new file mode 100644
index 000000000..e8591d6b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/lp3943.txt
@@ -0,0 +1,33 @@
+TI/National Semiconductor LP3943 MFD driver
+
+Required properties:
+ - compatible: "ti,lp3943"
+ - reg: I2C slave address. From 0x60 to 0x67.
+
+LP3943 consists of two sub-devices, lp3943-gpio and lp3943-pwm.
+
+For the LP3943 GPIO properties please refer to:
+Documentation/devicetree/bindings/gpio/gpio-lp3943.txt
+
+For the LP3943 PWM properties please refer to:
+Documentation/devicetree/bindings/pwm/pwm-lp3943.txt
+
+Example:
+
+lp3943@60 {
+ compatible = "ti,lp3943";
+ reg = <0x60>;
+
+ gpioex: gpio {
+ compatible = "ti,lp3943-gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ pwm3943: pwm {
+ compatible = "ti,lp3943-pwm";
+ #pwm-cells = <2>;
+ ti,pwm0 = <8 9 10>;
+ ti,pwm1 = <15>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/max14577.txt b/Documentation/devicetree/bindings/mfd/max14577.txt
new file mode 100644
index 000000000..236264c10
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max14577.txt
@@ -0,0 +1,146 @@
+Maxim MAX14577/77836 Multi-Function Device
+
+MAX14577 is a Multi-Function Device with Micro-USB Interface Circuit, Li+
+Battery Charger and SFOUT LDO output for powering USB devices. It is
+interfaced to host controller using I2C.
+
+MAX77836 additionally contains PMIC (with two LDO regulators) and Fuel Gauge.
+
+
+Required properties:
+- compatible : Must be "maxim,max14577" or "maxim,max77836".
+- reg : I2C slave address for the max14577 chip (0x25 for max14577/max77836)
+- interrupts : IRQ line for the chip.
+- interrupt-parent : The parent interrupt controller.
+
+
+Required nodes:
+ - charger :
+ Node for configuring the charger driver.
+ Required properties:
+ - compatible : "maxim,max14577-charger"
+ or "maxim,max77836-charger"
+ - maxim,fast-charge-uamp : Current in uA for Fast Charge;
+ Valid values:
+ - for max14577: 90000 - 950000;
+ - for max77836: 45000 - 475000;
+ - maxim,eoc-uamp : Current in uA for End-Of-Charge mode;
+ Valid values:
+ - for max14577: 50000 - 200000;
+ - for max77836: 5000 - 100000;
+ - maxim,ovp-uvolt : OverVoltage Protection Threshold in uV;
+ In an overvoltage condition, INT asserts and charging
+ stops. Valid values:
+ - 6000000, 6500000, 7000000, 7500000;
+ - maxim,constant-uvolt : Battery Constant Voltage in uV;
+ Valid values:
+ - 4000000 - 4280000 (step by 20000);
+ - 4350000;
+
+
+Optional nodes:
+- max14577-muic/max77836-muic :
+ Node used only by extcon consumers.
+ Required properties:
+ - compatible : "maxim,max14577-muic" or "maxim,max77836-muic"
+
+- regulators :
+ Required properties:
+ - compatible : "maxim,max14577-regulator"
+ or "maxim,max77836-regulator"
+
+ May contain a sub-node per regulator from the list below. Each
+ sub-node should contain the constraints and initialization information
+ for that regulator. See regulator.txt for a description of standard
+ properties for these sub-nodes.
+
+ List of valid regulator names:
+ - for max14577: CHARGER, SAFEOUT.
+ - for max77836: CHARGER, SAFEOUT, LDO1, LDO2.
+
+ The SAFEOUT is a fixed voltage regulator so there is no need to specify
+ voltages for it.
+
+
+Example:
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+max14577@25 {
+ compatible = "maxim,max14577";
+ reg = <0x25>;
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 IRQ_TYPE_NONE>;
+
+ muic: max14577-muic {
+ compatible = "maxim,max14577-muic";
+ };
+
+ regulators {
+ compatible = "maxim,max14577-regulator";
+
+ SAFEOUT {
+ regulator-name = "SAFEOUT";
+ };
+ CHARGER {
+ regulator-name = "CHARGER";
+ regulator-min-microamp = <90000>;
+ regulator-max-microamp = <950000>;
+ regulator-boot-on;
+ };
+ };
+
+ charger {
+ compatible = "maxim,max14577-charger";
+
+ maxim,constant-uvolt = <4350000>;
+ maxim,fast-charge-uamp = <450000>;
+ maxim,eoc-uamp = <50000>;
+ maxim,ovp-uvolt = <6500000>;
+ };
+};
+
+
+max77836@25 {
+ compatible = "maxim,max77836";
+ reg = <0x25>;
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 IRQ_TYPE_NONE>;
+
+ muic: max77836-muic {
+ compatible = "maxim,max77836-muic";
+ };
+
+ regulators {
+ compatible = "maxim,max77836-regulator";
+
+ SAFEOUT {
+ regulator-name = "SAFEOUT";
+ };
+ CHARGER {
+ regulator-name = "CHARGER";
+ regulator-min-microamp = <90000>;
+ regulator-max-microamp = <950000>;
+ regulator-boot-on;
+ };
+ LDO1 {
+ regulator-name = "LDO1";
+ regulator-min-microvolt = <2700000>;
+ regulator-max-microvolt = <2700000>;
+ };
+ LDO2 {
+ regulator-name = "LDO2";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3950000>;
+ };
+ };
+
+ charger {
+ compatible = "maxim,max77836-charger";
+
+ maxim,constant-uvolt = <4350000>;
+ maxim,fast-charge-uamp = <225000>;
+ maxim,eoc-uamp = <7500>;
+ maxim,ovp-uvolt = <6500000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/max77686.txt b/Documentation/devicetree/bindings/mfd/max77686.txt
new file mode 100644
index 000000000..e39f0bc1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max77686.txt
@@ -0,0 +1,82 @@
+Maxim MAX77686 multi-function device
+
+MAX77686 is a Mulitifunction device with PMIC, RTC and Charger on chip. It is
+interfaced to host controller using i2c interface. PMIC and Charger submodules
+are addressed using same i2c slave address whereas RTC submodule uses
+different i2c slave address,presently for which we are statically creating i2c
+client while probing.This document describes the binding for mfd device and
+PMIC submodule.
+
+Binding for the built-in 32k clock generator block is defined separately
+in bindings/clk/maxim,max77686.txt file.
+
+Required properties:
+- compatible : Must be "maxim,max77686";
+- reg : Specifies the i2c slave address of PMIC block.
+- interrupts : This i2c device has an IRQ line connected to the main SoC.
+- interrupt-parent : The parent interrupt controller.
+
+Optional node:
+- voltage-regulators : The regulators of max77686 have to be instantiated
+ under subnode named "voltage-regulators" using the following format.
+
+ regulator_name {
+ regulator-compatible = LDOn/BUCKn
+ standard regulator constraints....
+ };
+ refer Documentation/devicetree/bindings/regulator/regulator.txt
+
+ The regulator-compatible property of regulator should initialized with string
+to get matched with their hardware counterparts as follow:
+
+ -LDOn : for LDOs, where n can lie in range 1 to 26.
+ example: LDO1, LDO2, LDO26.
+ -BUCKn : for BUCKs, where n can lie in range 1 to 9.
+ example: BUCK1, BUCK5, BUCK9.
+
+ Regulators which can be turned off during system suspend:
+ -LDOn : 2, 6-8, 10-12, 14-16,
+ -BUCKn : 1-4.
+ Use standard regulator bindings for it ('regulator-off-in-suspend').
+
+ LDO20, LDO21, LDO22, BUCK8 and BUCK9 can be configured to GPIO enable
+ control. To turn this feature on this property must be added to the regulator
+ sub-node:
+ - maxim,ena-gpios : one GPIO specifier enable control (the gpio
+ flags are actually ignored and always
+ ACTIVE_HIGH is used)
+
+Example:
+
+ max77686@09 {
+ compatible = "maxim,max77686";
+ interrupt-parent = <&wakeup_eint>;
+ interrupts = <26 0>;
+ reg = <0x09>;
+
+ voltage-regulators {
+ ldo11_reg {
+ regulator-compatible = "LDO11";
+ regulator-name = "vdd_ldo11";
+ regulator-min-microvolt = <1900000>;
+ regulator-max-microvolt = <1900000>;
+ regulator-always-on;
+ };
+
+ buck1_reg {
+ regulator-compatible = "BUCK1";
+ regulator-name = "vdd_mif";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ buck9_reg {
+ regulator-compatible = "BUCK9";
+ regulator-name = "CAM_ISP_CORE_1.2V";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1200000>;
+ maxim,ena-gpios = <&gpm0 3 GPIO_ACTIVE_HIGH>;
+ };
+ }
diff --git a/Documentation/devicetree/bindings/mfd/max77693.txt b/Documentation/devicetree/bindings/mfd/max77693.txt
new file mode 100644
index 000000000..38e64405e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max77693.txt
@@ -0,0 +1,121 @@
+Maxim MAX77693 multi-function device
+
+MAX77693 is a Multifunction device with the following submodules:
+- PMIC,
+- CHARGER,
+- LED,
+- MUIC,
+- HAPTIC
+
+It is interfaced to host controller using i2c.
+This document describes the bindings for the mfd device.
+
+Required properties:
+- compatible : Must be "maxim,max77693".
+- reg : Specifies the i2c slave address of PMIC block.
+- interrupts : This i2c device has an IRQ line connected to the main SoC.
+- interrupt-parent : The parent interrupt controller.
+
+Optional properties:
+- regulators : The regulators of max77693 have to be instantiated under subnod
+ named "regulators" using the following format.
+
+ regulators {
+ regualtor-compatible = ESAFEOUT1/ESAFEOUT2/CHARGER
+ standard regulator constratints[*].
+ };
+
+ [*] refer Documentation/devicetree/bindings/regulator/regulator.txt
+
+- haptic : The MAX77693 haptic device utilises a PWM controlled motor to provide
+ users with tactile feedback. PWM period and duty-cycle are varied in
+ order to provide the approprite level of feedback.
+
+ Required properties:
+ - compatible : Must be "maxim,max77693-hpatic"
+ - haptic-supply : power supply for the haptic motor
+ [*] refer Documentation/devicetree/bindings/regulator/regulator.txt
+ - pwms : phandle to the physical PWM(Pulse Width Modulation) device.
+ PWM properties should be named "pwms". And number of cell is different
+ for each pwm device.
+ To get more informations, please refer to documentaion.
+ [*] refer Documentation/devicetree/bindings/pwm/pwm.txt
+
+- charger : Node configuring the charger driver.
+ If present, required properties:
+ - compatible : Must be "maxim,max77693-charger".
+
+ Optional properties (if not set, defaults will be used):
+ - maxim,constant-microvolt : Battery constant voltage in uV. The charger
+ will operate in fast charge constant current mode till battery voltage
+ reaches this level. Then the charger will switch to fast charge constant
+ voltage mode. Also vsys (system voltage) will be set to this value when
+ DC power is supplied but charger is not enabled.
+ Valid values: 3650000 - 4400000, step by 25000 (rounded down)
+ Default: 4200000
+
+ - maxim,min-system-microvolt : Minimal system voltage in uV.
+ Valid values: 3000000 - 3700000, step by 100000 (rounded down)
+ Default: 3600000
+
+ - maxim,thermal-regulation-celsius : Temperature in Celsius for entering
+ high temperature charging mode. If die temperature exceeds this value
+ the charging current will be reduced by 105 mA/Celsius.
+ Valid values: 70, 85, 100, 115
+ Default: 100
+
+ - maxim,battery-overcurrent-microamp : Overcurrent protection threshold
+ in uA (current from battery to system).
+ Valid values: 2000000 - 3500000, step by 250000 (rounded down)
+ Default: 3500000
+
+ - maxim,charge-input-threshold-microvolt : Threshold voltage in uV for
+ triggering input voltage regulation loop. If input voltage decreases
+ below this value, the input current will be reduced to reach the
+ threshold voltage.
+ Valid values: 4300000, 4700000, 4800000, 4900000
+ Default: 4300000
+
+Example:
+ max77693@66 {
+ compatible = "maxim,max77693";
+ reg = <0x66>;
+ interrupt-parent = <&gpx1>;
+ interrupts = <5 2>;
+
+ regulators {
+ esafeout@1 {
+ regulator-compatible = "ESAFEOUT1";
+ regulator-name = "ESAFEOUT1";
+ regulator-boot-on;
+ };
+ esafeout@2 {
+ regulator-compatible = "ESAFEOUT2";
+ regulator-name = "ESAFEOUT2";
+ };
+ charger@0 {
+ regulator-compatible = "CHARGER";
+ regulator-name = "CHARGER";
+ regulator-min-microamp = <60000>;
+ regulator-max-microamp = <2580000>;
+ regulator-boot-on;
+ };
+ };
+
+ haptic {
+ compatible = "maxim,max77693-haptic";
+ haptic-supply = <&haptic_supply>;
+ pwms = <&pwm 0 40000 0>;
+ pwm-names = "haptic";
+ };
+
+ charger {
+ compatible = "maxim,max77693-charger";
+
+ maxim,constant-microvolt = <4200000>;
+ maxim,min-system-microvolt = <3600000>;
+ maxim,thermal-regulation-celsius = <75>;
+ maxim,battery-overcurrent-microamp = <3000000>;
+ maxim,charge-input-threshold-microvolt = <4300000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/max8925.txt b/Documentation/devicetree/bindings/mfd/max8925.txt
new file mode 100644
index 000000000..4f0dc6638
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max8925.txt
@@ -0,0 +1,64 @@
+* Maxim max8925 Power Management IC
+
+Required parent device properties:
+- compatible : "maxim,max8925"
+- reg : the I2C slave address for the max8925 chip
+- interrupts : IRQ line for the max8925 chip
+- interrupt-controller: describes the max8925 as an interrupt
+ controller (has its own domain)
+- #interrupt-cells : should be 1.
+ - The cell is the max8925 local IRQ number
+
+Optional parent device properties:
+- maxim,tsc-irq: there are 2 IRQ lines for max8925, one is indicated in
+ interrupts property, the other is indicated here.
+
+max8925 consists of a large and varied group of sub-devices:
+
+Device Supply Names Description
+------ ------------ -----------
+max8925-onkey : : On key
+max8925-rtc : : RTC
+max8925-regulator : : Regulators
+max8925-backlight : : Backlight
+max8925-touch : : Touchscreen
+max8925-power : : Charger
+
+Example:
+
+ pmic: max8925@3c {
+ compatible = "maxim,max8925";
+ reg = <0x3c>;
+ interrupts = <1>;
+ interrupt-parent = <&intcmux4>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ maxim,tsc-irq = <0>;
+
+ regulators {
+ SDV1 {
+ regulator-min-microvolt = <637500>;
+ regulator-max-microvolt = <1425000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ LDO1 {
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <3900000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ };
+ backlight {
+ maxim,max8925-dual-string = <0>;
+ };
+ charger {
+ batt-detect = <0>;
+ topoff-threshold = <1>;
+ fast-charge = <7>;
+ no-temp-support = <0>;
+ no-insert-detect = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/max8998.txt b/Documentation/devicetree/bindings/mfd/max8998.txt
new file mode 100644
index 000000000..23a3650ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/max8998.txt
@@ -0,0 +1,119 @@
+* Maxim MAX8998, National/TI LP3974 multi-function device
+
+The Maxim MAX8998 is a multi-function device which includes voltage/current
+regulators, real time clock, battery charging controller and several
+other sub-blocks. It is interfaced using an I2C interface. Each sub-block
+is addressed by the host system using different i2c slave address.
+
+PMIC sub-block
+--------------
+
+The PMIC sub-block contains a number of voltage and current regulators,
+with controllable parameters and dynamic voltage scaling capability.
+In addition, it includes a real time clock and battery charging controller
+as well. It is accessible at I2C address 0x66.
+
+Required properties:
+- compatible: Should be one of the following:
+ - "maxim,max8998" for Maxim MAX8998
+ - "national,lp3974" or "ti,lp3974" for National/TI LP3974.
+- reg: Specifies the i2c slave address of the pmic block. It should be 0x66.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from MAX8998 are routed to.
+- interrupts: Interrupt specifiers for two interrupt sources.
+ - First interrupt specifier is for main interrupt.
+ - Second interrupt specifier is for power-on/-off interrupt.
+- max8998,pmic-buck1-dvs-gpios: GPIO specifiers for two host gpios used
+ for buck 1 dvs. The format of the gpio specifier depends on the gpio
+ controller.
+- max8998,pmic-buck2-dvs-gpio: GPIO specifier for host gpio used
+ for buck 2 dvs. The format of the gpio specifier depends on the gpio
+ controller.
+- max8998,pmic-buck1-default-dvs-idx: Default voltage setting selected from
+ the possible 4 options selectable by the dvs gpios. The value of this
+ property should be 0, 1, 2 or 3. If not specified or out of range,
+ a default value of 0 is taken.
+- max8998,pmic-buck2-default-dvs-idx: Default voltage setting selected from
+ the possible 2 options selectable by the dvs gpios. The value of this
+ property should be 0 or 1. If not specified or out of range, a default
+ value of 0 is taken.
+- max8998,pmic-buck-voltage-lock: If present, disallows changing of
+ preprogrammed buck dvfs voltages.
+
+Additional properties required if max8998,pmic-buck1-dvs-gpios is defined:
+- max8998,pmic-buck1-dvs-voltage: An array of 4 voltage values in microvolts
+ for buck1 regulator that can be selected using dvs gpio.
+
+Additional properties required if max8998,pmic-buck2-dvs-gpio is defined:
+- max8998,pmic-buck2-dvs-voltage: An array of 2 voltage values in microvolts
+ for buck2 regulator that can be selected using dvs gpio.
+
+Regulators: All the regulators of MAX8998 to be instantiated shall be
+listed in a child node named 'regulators'. Each regulator is represented
+by a child node of the 'regulators' node.
+
+ regulator-name {
+ /* standard regulator bindings here */
+ };
+
+Following regulators of the MAX8998 PMIC block are supported. Note that
+the 'n' in regulator name, as in LDOn or BUCKn, represents the LDO or BUCK
+number as described in MAX8998 datasheet.
+
+ - LDOn
+ - valid values for n are 2 to 17
+ - Example: LDO2, LDO10, LDO17
+ - BUCKn
+ - valid values for n are 1 to 4.
+ - Example: BUCK1, BUCK2, BUCK3, BUCK4
+
+ - ENVICHG: Battery Charging Current Monitor Output. This is a fixed
+ voltage type regulator
+
+ - ESAFEOUT1: (ldo19)
+ - ESAFEOUT2: (ld020)
+
+Standard regulator bindings are used inside regulator subnodes. Check
+ Documentation/devicetree/bindings/regulator/regulator.txt
+for more details.
+
+Example:
+
+ pmic@66 {
+ compatible = "maxim,max8998-pmic";
+ reg = <0x66>;
+ interrupt-parent = <&wakeup_eint>;
+ interrupts = <4 0>, <3 0>;
+
+ /* Buck 1 DVS settings */
+ max8998,pmic-buck1-default-dvs-idx = <0>;
+ max8998,pmic-buck1-dvs-gpios = <&gpx0 0 1 0 0>, /* SET1 */
+ <&gpx0 1 1 0 0>; /* SET2 */
+ max8998,pmic-buck1-dvs-voltage = <1350000>, <1300000>,
+ <1000000>, <950000>;
+
+ /* Buck 2 DVS settings */
+ max8998,pmic-buck2-default-dvs-idx = <0>;
+ max8998,pmic-buck2-dvs-gpio = <&gpx0 0 3 0 0>; /* SET3 */
+ max8998,pmic-buck2-dvs-voltage = <1350000>, <1300000>;
+
+ /* Regulators to instantiate */
+ regulators {
+ ldo2_reg: LDO2 {
+ regulator-name = "VDD_ALIVE_1.1V";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ };
+
+ buck1_reg: BUCK1 {
+ regulator-name = "VDD_ARM_1.2V";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/mc13xxx.txt b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
new file mode 100644
index 000000000..8aba48821
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/mc13xxx.txt
@@ -0,0 +1,158 @@
+* Freescale MC13783/MC13892 Power Management Integrated Circuit (PMIC)
+
+Required properties:
+- compatible : Should be "fsl,mc13783" or "fsl,mc13892"
+
+Optional properties:
+- fsl,mc13xxx-uses-adc : Indicate the ADC is being used
+- fsl,mc13xxx-uses-codec : Indicate the Audio Codec is being used
+- fsl,mc13xxx-uses-rtc : Indicate the RTC is being used
+- fsl,mc13xxx-uses-touch : Indicate the touchscreen controller is being used
+
+Sub-nodes:
+- codec: Contain the Audio Codec node.
+ - adc-port: Contain PMIC SSI port number used for ADC.
+ - dac-port: Contain PMIC SSI port number used for DAC.
+- leds : Contain the led nodes and initial register values in property
+ "led-control". Number of register depends of used IC, for MC13783 is 6,
+ for MC13892 is 4, for MC34708 is 1. See datasheet for bits definitions of
+ these registers.
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+ Each led node should contain "reg", which used as LED ID (described below).
+ Optional properties "label" and "linux,default-trigger" is described in
+ Documentation/devicetree/bindings/leds/common.txt.
+- regulators : Contain the regulator nodes. The regulators are bound using
+ their names as listed below with their registers and bits for enabling.
+
+MC13783 LED IDs:
+ 0 : Main display
+ 1 : AUX display
+ 2 : Keypad
+ 3 : Red 1
+ 4 : Green 1
+ 5 : Blue 1
+ 6 : Red 2
+ 7 : Green 2
+ 8 : Blue 2
+ 9 : Red 3
+ 10 : Green 3
+ 11 : Blue 3
+
+MC13892 LED IDs:
+ 0 : Main display
+ 1 : AUX display
+ 2 : Keypad
+ 3 : Red
+ 4 : Green
+ 5 : Blue
+
+MC34708 LED IDs:
+ 0 : Charger Red
+ 1 : Charger Green
+
+MC13783 regulators:
+ sw1a : regulator SW1A (register 24, bit 0)
+ sw1b : regulator SW1B (register 25, bit 0)
+ sw2a : regulator SW2A (register 26, bit 0)
+ sw2b : regulator SW2B (register 27, bit 0)
+ sw3 : regulator SW3 (register 29, bit 20)
+ vaudio : regulator VAUDIO (register 32, bit 0)
+ viohi : regulator VIOHI (register 32, bit 3)
+ violo : regulator VIOLO (register 32, bit 6)
+ vdig : regulator VDIG (register 32, bit 9)
+ vgen : regulator VGEN (register 32, bit 12)
+ vrfdig : regulator VRFDIG (register 32, bit 15)
+ vrfref : regulator VRFREF (register 32, bit 18)
+ vrfcp : regulator VRFCP (register 32, bit 21)
+ vsim : regulator VSIM (register 33, bit 0)
+ vesim : regulator VESIM (register 33, bit 3)
+ vcam : regulator VCAM (register 33, bit 6)
+ vrfbg : regulator VRFBG (register 33, bit 9)
+ vvib : regulator VVIB (register 33, bit 11)
+ vrf1 : regulator VRF1 (register 33, bit 12)
+ vrf2 : regulator VRF2 (register 33, bit 15)
+ vmmc1 : regulator VMMC1 (register 33, bit 18)
+ vmmc2 : regulator VMMC2 (register 33, bit 21)
+ gpo1 : regulator GPO1 (register 34, bit 6)
+ gpo2 : regulator GPO2 (register 34, bit 8)
+ gpo3 : regulator GPO3 (register 34, bit 10)
+ gpo4 : regulator GPO4 (register 34, bit 12)
+ pwgt1spi : regulator PWGT1SPI (register 34, bit 15)
+ pwgt2spi : regulator PWGT2SPI (register 34, bit 16)
+
+MC13892 regulators:
+ vcoincell : regulator VCOINCELL (register 13, bit 23)
+ sw1 : regulator SW1 (register 24, bit 0)
+ sw2 : regulator SW2 (register 25, bit 0)
+ sw3 : regulator SW3 (register 26, bit 0)
+ sw4 : regulator SW4 (register 27, bit 0)
+ swbst : regulator SWBST (register 29, bit 20)
+ vgen1 : regulator VGEN1 (register 32, bit 0)
+ viohi : regulator VIOHI (register 32, bit 3)
+ vdig : regulator VDIG (register 32, bit 9)
+ vgen2 : regulator VGEN2 (register 32, bit 12)
+ vpll : regulator VPLL (register 32, bit 15)
+ vusb2 : regulator VUSB2 (register 32, bit 18)
+ vgen3 : regulator VGEN3 (register 33, bit 0)
+ vcam : regulator VCAM (register 33, bit 6)
+ vvideo : regulator VVIDEO (register 33, bit 12)
+ vaudio : regulator VAUDIO (register 33, bit 15)
+ vsd : regulator VSD (register 33, bit 18)
+ gpo1 : regulator GPO1 (register 34, bit 6)
+ gpo2 : regulator GPO2 (register 34, bit 8)
+ gpo3 : regulator GPO3 (register 34, bit 10)
+ gpo4 : regulator GPO4 (register 34, bit 12)
+ pwgt1spi : regulator PWGT1SPI (register 34, bit 15)
+ pwgt2spi : regulator PWGT2SPI (register 34, bit 16)
+ vusb : regulator VUSB (register 50, bit 3)
+
+ The bindings details of individual regulator device can be found in:
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+Examples:
+
+ecspi@70010000 { /* ECSPI1 */
+ fsl,spi-num-chipselects = <2>;
+ cs-gpios = <&gpio4 24 0>, /* GPIO4_24 */
+ <&gpio4 25 0>; /* GPIO4_25 */
+ status = "okay";
+
+ pmic: mc13892@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mc13892";
+ spi-max-frequency = <6000000>;
+ reg = <0>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <8>;
+
+ leds {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ led-control = <0x000 0x000 0x0e0 0x000>;
+
+ sysled {
+ reg = <3>;
+ label = "system:red:live";
+ linux,default-trigger = "heartbeat";
+ };
+ };
+
+ regulators {
+ sw1_reg: mc13892__sw1 {
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <1375000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw2_reg: mc13892__sw2 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <1850000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/mt6397.txt b/Documentation/devicetree/bindings/mfd/mt6397.txt
new file mode 100644
index 000000000..15043e652
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/mt6397.txt
@@ -0,0 +1,70 @@
+MediaTek MT6397 Multifunction Device Driver
+
+MT6397 is a multifunction device with the following sub modules:
+- Regulator
+- RTC
+- Audio codec
+- GPIO
+- Clock
+
+It is interfaced to host controller using SPI interface by a proprietary hardware
+called PMIC wrapper or pwrap. MT6397 MFD is a child device of pwrap.
+See the following for pwarp node definitions:
+Documentation/devicetree/bindings/soc/pwrap.txt
+
+This document describes the binding for MFD device and its sub module.
+
+Required properties:
+compatible: "mediatek,mt6397"
+
+Optional subnodes:
+
+- rtc
+ Required properties:
+ - compatible: "mediatek,mt6397-rtc"
+- regulators
+ Required properties:
+ - compatible: "mediatek,mt6397-regulator"
+ see Documentation/devicetree/bindings/regulator/mt6397-regulator.txt
+- codec
+ Required properties:
+ - compatible: "mediatek,mt6397-codec"
+- clk
+ Required properties:
+ - compatible: "mediatek,mt6397-clk"
+
+Example:
+ pwrap: pwrap@1000f000 {
+ compatible = "mediatek,mt8135-pwrap";
+
+ ...
+
+ pmic {
+ compatible = "mediatek,mt6397";
+
+ codec: mt6397codec {
+ compatible = "mediatek,mt6397-codec";
+ };
+
+ regulators {
+ compatible = "mediatek,mt6397-regulator";
+
+ mt6397_vpca15_reg: buck_vpca15 {
+ regulator-compatible = "buck_vpca15";
+ regulator-name = "vpca15";
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-ramp-delay = <12500>;
+ regulator-always-on;
+ };
+
+ mt6397_vgp4_reg: ldo_vgp4 {
+ regulator-compatible = "ldo_vgp4";
+ regulator-name = "vgp4";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-host.txt b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt
new file mode 100644
index 000000000..4721b2d52
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/omap-usb-host.txt
@@ -0,0 +1,103 @@
+OMAP HS USB Host
+
+Required properties:
+
+- compatible: should be "ti,usbhs-host"
+- reg: should contain one register range i.e. start and length
+- ti,hwmods: must contain "usb_host_hs"
+
+Optional properties:
+
+- num-ports: number of USB ports. Usually this is automatically detected
+ from the IP's revision register but can be overridden by specifying
+ this property. A maximum of 3 ports are supported at the moment.
+
+- portN-mode: String specifying the port mode for port N, where N can be
+ from 1 to 3. If the port mode is not specified, that port is treated
+ as unused. When specified, it must be one of the following.
+ "ehci-phy",
+ "ehci-tll",
+ "ehci-hsic",
+ "ohci-phy-6pin-datse0",
+ "ohci-phy-6pin-dpdm",
+ "ohci-phy-3pin-datse0",
+ "ohci-phy-4pin-dpdm",
+ "ohci-tll-6pin-datse0",
+ "ohci-tll-6pin-dpdm",
+ "ohci-tll-3pin-datse0",
+ "ohci-tll-4pin-dpdm",
+ "ohci-tll-2pin-datse0",
+ "ohci-tll-2pin-dpdm",
+
+- single-ulpi-bypass: Must be present if the controller contains a single
+ ULPI bypass control bit. e.g. OMAP3 silicon <= ES2.1
+
+- clocks: a list of phandles and clock-specifier pairs, one for each entry in
+ clock-names.
+
+- clock-names: should include:
+ For OMAP3
+ * "usbhost_120m_fck" - 120MHz Functional clock.
+
+ For OMAP4+
+ * "refclk_60m_int" - 60MHz internal reference clock for UTMI clock mux
+ * "refclk_60m_ext_p1" - 60MHz external ref. clock for Port 1's UTMI clock mux.
+ * "refclk_60m_ext_p2" - 60MHz external ref. clock for Port 2's UTMI clock mux
+ * "utmi_p1_gfclk" - Port 1 UTMI clock mux.
+ * "utmi_p2_gfclk" - Port 2 UTMI clock mux.
+ * "usb_host_hs_utmi_p1_clk" - Port 1 UTMI clock gate.
+ * "usb_host_hs_utmi_p2_clk" - Port 2 UTMI clock gate.
+ * "usb_host_hs_utmi_p3_clk" - Port 3 UTMI clock gate.
+ * "usb_host_hs_hsic480m_p1_clk" - Port 1 480MHz HSIC clock gate.
+ * "usb_host_hs_hsic480m_p2_clk" - Port 2 480MHz HSIC clock gate.
+ * "usb_host_hs_hsic480m_p3_clk" - Port 3 480MHz HSIC clock gate.
+ * "usb_host_hs_hsic60m_p1_clk" - Port 1 60MHz HSIC clock gate.
+ * "usb_host_hs_hsic60m_p2_clk" - Port 2 60MHz HSIC clock gate.
+ * "usb_host_hs_hsic60m_p3_clk" - Port 3 60MHz HSIC clock gate.
+
+Required properties if child node exists:
+
+- #address-cells: Must be 1
+- #size-cells: Must be 1
+- ranges: must be present
+
+Properties for children:
+
+The OMAP HS USB Host subsystem contains EHCI and OHCI controllers.
+See Documentation/devicetree/bindings/usb/omap-ehci.txt and
+omap3-ohci.txt
+
+Example for OMAP4:
+
+usbhshost: usbhshost@4a064000 {
+ compatible = "ti,usbhs-host";
+ reg = <0x4a064000 0x800>;
+ ti,hwmods = "usb_host_hs";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ usbhsohci: ohci@4a064800 {
+ compatible = "ti,ohci-omap3", "usb-ohci";
+ reg = <0x4a064800 0x400>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 76 0x4>;
+ };
+
+ usbhsehci: ehci@4a064c00 {
+ compatible = "ti,ehci-omap", "usb-ehci";
+ reg = <0x4a064c00 0x400>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 77 0x4>;
+ };
+};
+
+&usbhshost {
+ port1-mode = "ehci-phy";
+ port2-mode = "ehci-tll";
+ port3-mode = "ehci-phy";
+};
+
+&usbhsehci {
+ phys = <&hsusb1_phy 0 &hsusb3_phy>;
+};
diff --git a/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt
new file mode 100644
index 000000000..c58d70437
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/omap-usb-tll.txt
@@ -0,0 +1,27 @@
+OMAP HS USB Host TLL (Transceiver-Less Interface)
+
+Required properties:
+
+- compatible : should be "ti,usbhs-tll"
+- reg : should contain one register range i.e. start and length
+- interrupts : should contain the TLL module's interrupt
+- ti,hwmod : must contain "usb_tll_hs"
+
+Optional properties:
+
+- clocks: a list of phandles and clock-specifier pairs, one for each entry in
+ clock-names.
+
+- clock-names: should include:
+ * "usb_tll_hs_usb_ch0_clk" - USB TLL channel 0 clock
+ * "usb_tll_hs_usb_ch1_clk" - USB TLL channel 1 clock
+ * "usb_tll_hs_usb_ch2_clk" - USB TLL channel 2 clock
+
+Example:
+
+ usbhstll: usbhstll@4a062000 {
+ compatible = "ti,usbhs-tll";
+ reg = <0x4a062000 0x1000>;
+ interrupts = <78>;
+ ti,hwmods = "usb_tll_hs";
+ };
diff --git a/Documentation/devicetree/bindings/mfd/palmas.txt b/Documentation/devicetree/bindings/mfd/palmas.txt
new file mode 100644
index 000000000..eda898978
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/palmas.txt
@@ -0,0 +1,53 @@
+* palmas device tree bindings
+
+The TI palmas family current members :-
+twl6035 (palmas)
+twl6037 (palmas)
+tps65913 (palmas)
+tps65914 (palmas)
+tps659038
+tps65917
+
+Required properties:
+- compatible : Should be from the list
+ ti,twl6035
+ ti,twl6036
+ ti,twl6037
+ ti,tps65913
+ ti,tps65914
+ ti,tps80036
+ ti,tps659038
+ ti,tps65917
+and also the generic series names
+ ti,palmas
+- interrupt-controller : palmas has its own internal IRQs
+- #interrupt-cells : should be set to 2 for IRQ number and flags
+ The first cell is the IRQ number.
+ The second cell is the flags, encoded as the trigger masks from
+ Documentation/devicetree/bindings/interrupts.txt
+- interrupt-parent : The parent interrupt controller.
+
+Optional properties:
+ ti,mux-padX : set the pad register X (1-2) to the correct muxing for the
+ hardware, if not set will use muxing in OTP.
+
+Example:
+
+palmas {
+ compatible = "ti,twl6035", "ti,palmas";
+ reg = <0x48>
+ interrupt-parent = <&intc>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ ti,mux-pad1 = <0>;
+ ti,mux-pad2 = <0>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pmic {
+ compatible = "ti,twl6035-pmic", "ti,palmas-pmic";
+ ....
+ };
+}
diff --git a/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt b/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt
new file mode 100644
index 000000000..6ac06c1b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/qcom,spmi-pmic.txt
@@ -0,0 +1,75 @@
+ Qualcomm SPMI PMICs multi-function device bindings
+
+The Qualcomm SPMI series presently includes PM8941, PM8841 and PMA8084
+PMICs. These PMICs use a QPNP scheme through SPMI interface.
+QPNP is effectively a partitioning scheme for dividing the SPMI extended
+register space up into logical pieces, and set of fixed register
+locations/definitions within these regions, with some of these regions
+specifically used for interrupt handling.
+
+The QPNP PMICs are used with the Qualcomm Snapdragon series SoCs, and are
+interfaced to the chip via the SPMI (System Power Management Interface) bus.
+Support for multiple independent functions are implemented by splitting the
+16-bit SPMI slave address space into 256 smaller fixed-size regions, 256 bytes
+each. A function can consume one or more of these fixed-size register regions.
+
+Required properties:
+- compatible: Should contain one of:
+ "qcom,pm8941",
+ "qcom,pm8841",
+ "qcom,pma8084",
+ "qcom,pm8019",
+ "qcom,pm8226",
+ "qcom,pm8110",
+ "qcom,pma8084",
+ "qcom,pmi8962",
+ "qcom,pmd9635",
+ "qcom,pm8994",
+ "qcom,pmi8994",
+ "qcom,pm8916",
+ "qcom,pm8004",
+ "qcom,pm8909",
+ or generalized "qcom,spmi-pmic".
+- reg: Specifies the SPMI USID slave address for this device.
+ For more information see:
+ Documentation/devicetree/bindings/spmi/spmi.txt
+
+Required properties for peripheral child nodes:
+- compatible: Should contain "qcom,xxx", where "xxx" is a peripheral name.
+
+Optional properties for peripheral child nodes:
+- interrupts: Interrupts are specified as a 4-tuple. For more information
+ see:
+ Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.txt
+- interrupt-names: Corresponding interrupt name to the interrupts property
+
+Each child node of SPMI slave id represents a function of the PMIC. In the
+example below the rtc device node represents a peripheral of pm8941
+SID = 0. The regulator device node represents a peripheral of pm8941 SID = 1.
+
+Example:
+
+ spmi {
+ compatible = "qcom,spmi-pmic-arb";
+
+ pm8941@0 {
+ compatible = "qcom,pm8941", "qcom,spmi-pmic";
+ reg = <0x0 SPMI_USID>;
+
+ rtc {
+ compatible = "qcom,rtc";
+ interrupts = <0x0 0x61 0x1 IRQ_TYPE_EDGE_RISING>;
+ interrupt-names = "alarm";
+ };
+ };
+
+ pm8941@1 {
+ compatible = "qcom,pm8941", "qcom,spmi-pmic";
+ reg = <0x1 SPMI_USID>;
+
+ regulator {
+ compatible = "qcom,regulator";
+ regulator-name = "8941_boost";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt b/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt
new file mode 100644
index 000000000..e90519d56
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/qcom,tcsr.txt
@@ -0,0 +1,22 @@
+QCOM Top Control and Status Register
+
+Qualcomm devices have a set of registers that provide various control and status
+functions for their peripherals. This node is intended to allow access to these
+registers via syscon.
+
+Required properties:
+- compatible: Should contain:
+ "qcom,tcsr-ipq8064", "syscon" for IPQ8064
+ "qcom,tcsr-apq8064", "syscon" for APQ8064
+ "qcom,tcsr-msm8660", "syscon" for MSM8660
+ "qcom,tcsr-msm8960", "syscon" for MSM8960
+ "qcom,tcsr-msm8974", "syscon" for MSM8974
+ "qcom,tcsr-apq8084", "syscon" for APQ8084
+ "qcom,tcsr-msm8916", "syscon" for MSM8916
+- reg: Address range for TCSR registers
+
+Example:
+ tcsr: syscon@1a400000 {
+ compatible = "qcom,tcsr-msm8960", "syscon";
+ reg = <0x1a400000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.txt b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.txt
new file mode 100644
index 000000000..f24f33409
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.txt
@@ -0,0 +1,97 @@
+Qualcomm PM8xxx PMIC multi-function devices
+
+The PM8xxx family of Power Management ICs are used to provide regulated
+voltages and other various functionality to Qualcomm SoCs.
+
+= PROPERTIES
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8058"
+ "qcom,pm8921"
+
+- #address-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 1
+
+- #size-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 0
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: specifies the interrupt that indicates a subdevice
+ has generated an interrupt (summary interrupt). The
+ format of the specifier is defined by the binding document
+ describing the node's interrupt parent.
+
+- #interrupt-cells:
+ Usage: required
+ Value type : <u32>
+ Definition: must be 2. Specifies the number of cells needed to encode
+ an interrupt source. The 1st cell contains the interrupt
+ number. The 2nd cell is the trigger type and level flags
+ encoded as follows:
+
+ 1 = low-to-high edge triggered
+ 2 = high-to-low edge triggered
+ 4 = active high level-sensitive
+ 8 = active low level-sensitive
+
+- interrupt-controller:
+ Usage: required
+ Value type: <empty>
+ Definition: identifies this node as an interrupt controller
+
+= SUBCOMPONENTS
+
+The PMIC contains multiple independent functions, each described in a subnode.
+The below bindings specify the set of valid subnodes.
+
+== Real-Time Clock
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8058-rtc"
+ "qcom,pm8921-rtc"
+ "qcom,pm8941-rtc"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: single entry specifying the base address of the RTC registers
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: single entry specifying the RTC's alarm interrupt
+
+- allow-set-time:
+ Usage: optional
+ Value type: <empty>
+ Definition: indicates that the setting of RTC time is allowed by
+ the host CPU
+
+= EXAMPLE
+
+ pmicintc: pmic@0 {
+ compatible = "qcom,pm8921";
+ interrupts = <104 8>;
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rtc@11d {
+ compatible = "qcom,pm8921-rtc";
+ reg = <0x11d>;
+ interrupts = <0x27 0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/qcom-rpm.txt b/Documentation/devicetree/bindings/mfd/qcom-rpm.txt
new file mode 100644
index 000000000..5e97a9593
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/qcom-rpm.txt
@@ -0,0 +1,264 @@
+Qualcomm Resource Power Manager (RPM)
+
+This driver is used to interface with the Resource Power Manager (RPM) found in
+various Qualcomm platforms. The RPM allows each component in the system to vote
+for state of the system resources, such as clocks, regulators and bus
+frequencies.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,rpm-apq8064"
+ "qcom,rpm-msm8660"
+ "qcom,rpm-msm8960"
+ "qcom,rpm-ipq8064"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: base address and size of the RPM's message ram
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: three entries specifying the RPM's:
+ 1. acknowledgement interrupt
+ 2. error interrupt
+ 3. wakeup interrupt
+
+- interrupt-names:
+ Usage: required
+ Value type: <string-array>
+ Definition: must be the three strings "ack", "err" and "wakeup", in order
+
+- qcom,ipc:
+ Usage: required
+ Value type: <prop-encoded-array>
+
+ Definition: three entries specifying the outgoing ipc bit used for
+ signaling the RPM:
+ - phandle to a syscon node representing the apcs registers
+ - u32 representing offset to the register within the syscon
+ - u32 representing the ipc bit within the register
+
+
+= SUBNODES
+
+The RPM exposes resources to its subnodes. The below bindings specify the set
+of valid subnodes that can operate on these resources.
+
+== Regulators
+
+Regulator nodes are identified by their compatible:
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,rpm-pm8058-regulators"
+ "qcom,rpm-pm8901-regulators"
+ "qcom,rpm-pm8921-regulators"
+
+- vdd_l0_l1_lvs-supply:
+- vdd_l2_l11_l12-supply:
+- vdd_l3_l4_l5-supply:
+- vdd_l6_l7-supply:
+- vdd_l8-supply:
+- vdd_l9-supply:
+- vdd_l10-supply:
+- vdd_l13_l16-supply:
+- vdd_l14_l15-supply:
+- vdd_l17_l18-supply:
+- vdd_l19_l20-supply:
+- vdd_l21-supply:
+- vdd_l22-supply:
+- vdd_l23_l24_l25-supply:
+- vdd_ncp-supply:
+- vdd_s0-supply:
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+ Usage: optional (pm8058 only)
+ Value type: <phandle>
+ Definition: reference to regulator supplying the input pin, as
+ described in the data sheet
+
+- lvs0_in-supply:
+- lvs1_in-supply:
+- lvs2_in-supply:
+- lvs3_in-supply:
+- mvs_in-supply:
+- vdd_l0-supply:
+- vdd_l1-supply:
+- vdd_l2-supply:
+- vdd_l3-supply:
+- vdd_l4-supply:
+- vdd_l5-supply:
+- vdd_l6-supply:
+- vdd_s0-supply:
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s3-supply:
+- vdd_s4-supply:
+ Usage: optional (pm8901 only)
+ Value type: <phandle>
+ Definition: reference to regulator supplying the input pin, as
+ described in the data sheet
+
+- vdd_l1_l2_l12_l18-supply:
+- vdd_l3_l15_l17-supply:
+- vdd_l4_l14-supply:
+- vdd_l5_l8_l16-supply:
+- vdd_l6_l7-supply:
+- vdd_l9_l11-supply:
+- vdd_l10_l22-supply:
+- vdd_l21_l23_l29-supply:
+- vdd_l24-supply:
+- vdd_l25-supply:
+- vdd_l26-supply:
+- vdd_l27-supply:
+- vdd_l28-supply:
+- vdd_ncp-supply:
+- vdd_s1-supply:
+- vdd_s2-supply:
+- vdd_s4-supply:
+- vdd_s5-supply:
+- vdd_s6-supply:
+- vdd_s7-supply:
+- vdd_s8-supply:
+- vin_5vs-supply:
+- vin_lvs1_3_6-supply:
+- vin_lvs2-supply:
+- vin_lvs4_5_7-supply:
+ Usage: optional (pm8921 only)
+ Value type: <phandle>
+ Definition: reference to regulator supplying the input pin, as
+ described in the data sheet
+
+The regulator node houses sub-nodes for each regulator within the device. Each
+sub-node is identified using the node's name, with valid values listed for each
+of the pmics below.
+
+pm8058:
+ l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15,
+ l16, l17, l18, l19, l20, l21, l22, l23, l24, l25, s0, s1, s2, s3, s4,
+ lvs0, lvs1, ncp
+
+pm8901:
+ l0, l1, l2, l3, l4, l5, l6, s0, s1, s2, s3, s4, lvs0, lvs1, lvs2, lvs3,
+ mvs
+
+pm8921:
+ s1, s2, s3, s4, s7, s8, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11,
+ l12, l14, l15, l16, l17, l18, l21, l22, l23, l24, l25, l26, l27, l28,
+ l29, lvs1, lvs2, lvs3, lvs4, lvs5, lvs6, lvs7, usb-switch, hdmi-switch,
+ ncp
+
+The content of each sub-node is defined by the standard binding for regulators -
+see regulator.txt - with additional custom properties described below:
+
+=== Switch-mode Power Supply regulator custom properties
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <empty>
+ Definition: enable pull down of the regulator when inactive
+
+- qcom,switch-mode-frequency:
+ Usage: required
+ Value type: <u32>
+ Definition: Frequency (Hz) of the switch-mode power supply;
+ must be one of:
+ 19200000, 9600000, 6400000, 4800000, 3840000, 3200000,
+ 2740000, 2400000, 2130000, 1920000, 1750000, 1600000,
+ 1480000, 1370000, 1280000, 1200000
+
+- qcom,force-mode:
+ Usage: optional (default if no other qcom,force-mode is specified)
+ Value type: <u32>
+ Defintion: indicates that the regulator should be forced to a
+ particular mode, valid values are:
+ QCOM_RPM_FORCE_MODE_NONE - do not force any mode
+ QCOM_RPM_FORCE_MODE_LPM - force into low power mode
+ QCOM_RPM_FORCE_MODE_HPM - force into high power mode
+ QCOM_RPM_FORCE_MODE_AUTO - allow regulator to automatically
+ select its own mode based on
+ realtime current draw, only for:
+ pm8921 smps and ftsmps
+
+- qcom,power-mode-hysteretic:
+ Usage: optional
+ Value type: <empty>
+ Definition: select that the power supply should operate in hysteretic
+ mode, instead of the default pwm mode
+
+=== Low-dropout regulator custom properties
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <empty>
+ Definition: enable pull down of the regulator when inactive
+
+- qcom,force-mode:
+ Usage: optional
+ Value type: <u32>
+ Defintion: indicates that the regulator should not be forced to any
+ particular mode, valid values are:
+ QCOM_RPM_FORCE_MODE_NONE - do not force any mode
+ QCOM_RPM_FORCE_MODE_LPM - force into low power mode
+ QCOM_RPM_FORCE_MODE_HPM - force into high power mode
+ QCOM_RPM_FORCE_MODE_BYPASS - set regulator to use bypass
+ mode, i.e. to act as a switch
+ and not regulate, only for:
+ pm8921 pldo, nldo and nldo1200
+
+=== Negative Charge Pump custom properties
+
+- qcom,switch-mode-frequency:
+ Usage: required
+ Value type: <u32>
+ Definition: Frequency (Hz) of the swith mode power supply;
+ must be one of:
+ 19200000, 9600000, 6400000, 4800000, 3840000, 3200000,
+ 2740000, 2400000, 2130000, 1920000, 1750000, 1600000,
+ 1480000, 1370000, 1280000, 1200000
+
+= EXAMPLE
+
+ #include <dt-bindings/mfd/qcom-rpm.h>
+
+ rpm@108000 {
+ compatible = "qcom,rpm-msm8960";
+ reg = <0x108000 0x1000>;
+ qcom,ipc = <&apcs 0x8 2>;
+
+ interrupts = <0 19 0>, <0 21 0>, <0 22 0>;
+ interrupt-names = "ack", "err", "wakeup";
+
+ regulators {
+ compatible = "qcom,rpm-pm8921-regulators";
+ vdd_l1_l2_l12_l18-supply = <&pm8921_s4>;
+
+ s1 {
+ regulator-min-microvolt = <1225000>;
+ regulator-max-microvolt = <1225000>;
+
+ bias-pull-down;
+
+ qcom,switch-mode-frequency = <3200000>;
+ };
+
+ pm8921_s4: s4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+
+ qcom,switch-mode-frequency = <1600000>;
+ bias-pull-down;
+
+ qcom,force-mode = <QCOM_RPM_FORCE_MODE_AUTO>;
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/mfd/qriox.txt b/Documentation/devicetree/bindings/mfd/qriox.txt
new file mode 100644
index 000000000..f301e2d4c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/qriox.txt
@@ -0,0 +1,17 @@
+KEYMILE qrio Board Control CPLD
+
+The qrio is a multifunction device that controls the KEYMILE boards based on
+the kmp204x design.
+It is consists of a reset controller, watchdog timer, LEDs, and 2 IRQ capable
+GPIO blocks.
+
+Required properties:
+- compatible: "keymile,qriox"
+- reg: access on the parent local bus (chip select, offset in chip select, size)
+
+Example:
+
+ board-control@1,0 {
+ compatible = "keymile,qriox";
+ reg = <1 0 0x80>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/rk808.txt b/Documentation/devicetree/bindings/mfd/rk808.txt
new file mode 100644
index 000000000..9e6e2592e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/rk808.txt
@@ -0,0 +1,177 @@
+RK808 Power Management Integrated Circuit
+
+Required properties:
+- compatible: "rockchip,rk808"
+- reg: I2C slave address
+- interrupt-parent: The parent interrupt controller.
+- interrupts: the interrupt outputs of the controller.
+- #clock-cells: from common clock binding; shall be set to 1 (multiple clock
+ outputs). See <dt-bindings/clock/rockchip,rk808.h> for clock IDs.
+
+Optional properties:
+- clock-output-names: From common clock binding to override the
+ default output clock name
+- rockchip,system-power-controller: Telling whether or not this pmic is controlling
+ the system power.
+- vcc1-supply: The input supply for DCDC_REG1
+- vcc2-supply: The input supply for DCDC_REG2
+- vcc3-supply: The input supply for DCDC_REG3
+- vcc4-supply: The input supply for DCDC_REG4
+- vcc6-supply: The input supply for LDO_REG1 and LDO_REG2
+- vcc7-supply: The input supply for LDO_REG3 and LDO_REG7
+- vcc8-supply: The input supply for SWITCH_REG1
+- vcc9-supply: The input supply for LDO_REG4 and LDO_REG5
+- vcc10-supply: The input supply for LDO_REG6
+- vcc11-supply: The input supply for LDO_REG8
+- vcc12-supply: The input supply for SWITCH_REG2
+
+Regulators: All the regulators of RK808 to be instantiated shall be
+listed in a child node named 'regulators'. Each regulator is represented
+by a child node of the 'regulators' node.
+
+ regulator-name {
+ /* standard regulator bindings here */
+ };
+
+Following regulators of the RK808 PMIC block are supported. Note that
+the 'n' in regulator name, as in DCDC_REGn or LDOn, represents the DCDC or LDO
+number as described in RK808 datasheet.
+
+ - DCDC_REGn
+ - valid values for n are 1 to 4.
+ - LDO_REGn
+ - valid values for n are 1 to 8.
+ - SWITCH_REGn
+ - valid values for n are 1 to 2
+
+Standard regulator bindings are used inside regulator subnodes. Check
+ Documentation/devicetree/bindings/regulator/regulator.txt
+for more details
+
+Example:
+ rk808: pmic@1b {
+ compatible = "rockchip,rk808";
+ clock-output-names = "xin32k", "rk808-clkout2";
+ interrupt-parent = <&gpio0>;
+ interrupts = <4 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pmic_int>;
+ reg = <0x1b>;
+ rockchip,system-power-controller;
+ wakeup-source;
+ #clock-cells = <1>;
+
+ vcc8-supply = <&vcc_18>;
+ vcc9-supply = <&vcc_io>;
+ vcc10-supply = <&vcc_io>;
+ vcc12-supply = <&vcc_io>;
+ vddio-supply = <&vccio_pmu>;
+
+ regulators {
+ vdd_cpu: DCDC_REG1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <750000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-name = "vdd_arm";
+ };
+
+ vdd_gpu: DCDC_REG2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <850000>;
+ regulator-max-microvolt = <1250000>;
+ regulator-name = "vdd_gpu";
+ };
+
+ vcc_ddr: DCDC_REG3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "vcc_ddr";
+ };
+
+ vcc_io: DCDC_REG4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc_io";
+ };
+
+ vccio_pmu: LDO_REG1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vccio_pmu";
+ };
+
+ vcc_tp: LDO_REG2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcc_tp";
+ };
+
+ vdd_10: LDO_REG3 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-name = "vdd_10";
+ };
+
+ vcc18_lcd: LDO_REG4 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc18_lcd";
+ };
+
+ vccio_sd: LDO_REG5 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vccio_sd";
+ };
+
+ vdd10_lcd: LDO_REG6 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-name = "vdd10_lcd";
+ };
+
+ vcc_18: LDO_REG7 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-name = "vcc_18";
+ };
+
+ vcca_codec: LDO_REG8 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-name = "vcca_codec";
+ };
+
+ vcc_wl: SWITCH_REG1 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "vcc_wl";
+ };
+
+ vcc_lcd: SWITCH_REG2 {
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-name = "vcc_lcd";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/rn5t618.txt b/Documentation/devicetree/bindings/mfd/rn5t618.txt
new file mode 100644
index 000000000..937785a3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/rn5t618.txt
@@ -0,0 +1,36 @@
+* Ricoh RN5T618 PMIC
+
+Ricoh RN5T618 is a power management IC which integrates 3 step-down
+DCDC converters, 7 low-dropout regulators, a Li-ion battery charger,
+fuel gauge, ADC, GPIOs and a watchdog timer. It can be controlled
+through a I2C interface.
+
+Required properties:
+ - compatible: should be "ricoh,rn5t618"
+ - reg: the I2C slave address of the device
+
+Sub-nodes:
+ - regulators: the node is required if the regulator functionality is
+ needed. The valid regulator names are: DCDC1, DCDC2, DCDC3, LDO1,
+ LDO2, LDO3, LDO4, LDO5, LDORTC1 and LDORTC2.
+ The common bindings for each individual regulator can be found in:
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+Example:
+
+ pmic@32 {
+ compatible = "ricoh,rn5t618";
+ reg = <0x32>;
+
+ regulators {
+ DCDC1 {
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ };
+
+ DCDC2 {
+ regulator-min-microvolt = <1175000>;
+ regulator-max-microvolt = <1175000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/s2mpa01.txt b/Documentation/devicetree/bindings/mfd/s2mpa01.txt
new file mode 100644
index 000000000..c13d3d8c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/s2mpa01.txt
@@ -0,0 +1,90 @@
+
+* Samsung S2MPA01 Voltage and Current Regulator
+
+The Samsung S2MPA01 is a multi-function device which includes high
+efficiency buck converters including Dual-Phase buck converter, various LDOs,
+and an RTC. It is interfaced to the host controller using an I2C interface.
+Each sub-block is addressed by the host system using different I2C slave
+addresses.
+
+Required properties:
+- compatible: Should be "samsung,s2mpa01-pmic".
+- reg: Specifies the I2C slave address of the PMIC block. It should be 0x66.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from s2mpa01 are delivered to.
+- interrupts: An interrupt specifier for the sole interrupt generated by the
+ device.
+
+Optional nodes:
+- regulators: The regulators of s2mpa01 that have to be instantiated should be
+ included in a sub-node named 'regulators'. Regulator nodes and constraints
+ included in this sub-node use the standard regulator bindings which are
+ documented elsewhere.
+
+Properties for BUCK regulator nodes:
+- regulator-ramp-delay: ramp delay in uV/us. May be 6250, 12500
+ (default), 25000, or 50000. May be 0 for disabling the ramp delay on
+ BUCK{1,2,3,4}.
+
+ In the absence of the regulator-ramp-delay property, the default ramp
+ delay will be used.
+
+ NOTE: Some BUCKs share the ramp rate setting i.e. same ramp value will be set
+ for a particular group of BUCKs. So provide same regulator-ramp-delay=<value>.
+
+ The following BUCKs share ramp settings:
+ * 1 and 6
+ * 2 and 4
+ * 8, 9, and 10
+
+The following are the names of the regulators that the s2mpa01 PMIC block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of s2mpa01.
+
+ - LDOn
+ - valid values for n are 1 to 26
+ - Example: LDO1, LD02, LDO26
+ - BUCKn
+ - valid values for n are 1 to 10.
+ - Example: BUCK1, BUCK2, BUCK9
+
+Example:
+
+ s2mpa01_pmic@66 {
+ compatible = "samsung,s2mpa01-pmic";
+ reg = <0x66>;
+
+ regulators {
+ ldo1_reg: LDO1 {
+ regulator-name = "VDD_ALIVE";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ };
+
+ ldo2_reg: LDO2 {
+ regulator-name = "VDDQ_MMC2";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-always-on;
+ };
+
+ buck1_reg: BUCK1 {
+ regulator-name = "vdd_mif";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ buck2_reg: BUCK2 {
+ regulator-name = "vdd_arm";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-ramp-delay = <50000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/s2mps11.txt b/Documentation/devicetree/bindings/mfd/s2mps11.txt
new file mode 100644
index 000000000..57a045016
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/s2mps11.txt
@@ -0,0 +1,139 @@
+
+* Samsung S2MPS11, S2MPS13, S2MPS14 and S2MPU02 Voltage and Current Regulator
+
+The Samsung S2MPS11 is a multi-function device which includes voltage and
+current regulators, RTC, charger controller and other sub-blocks. It is
+interfaced to the host controller using an I2C interface. Each sub-block is
+addressed by the host system using different I2C slave addresses.
+
+Required properties:
+- compatible: Should be "samsung,s2mps11-pmic" or "samsung,s2mps13-pmic"
+ or "samsung,s2mps14-pmic" or "samsung,s2mpu02-pmic".
+- reg: Specifies the I2C slave address of the pmic block. It should be 0x66.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from s2mps11 are delivered to.
+- interrupts: Interrupt specifiers for interrupt sources.
+
+Optional nodes:
+- clocks: s2mps11, s2mps13 and s5m8767 provide three(AP/CP/BT) buffered 32.768
+ KHz outputs, so to register these as clocks with common clock framework
+ instantiate a sub-node named "clocks". It uses the common clock binding
+ documented in :
+ [Documentation/devicetree/bindings/clock/clock-bindings.txt]
+ The s2mps14 provides two (AP/BT) buffered 32.768 KHz outputs.
+ - #clock-cells: should be 1.
+
+ - The following is the list of clocks generated by the controller. Each clock
+ is assigned an identifier and client nodes use this identifier to specify
+ the clock which they consume.
+ Clock ID Devices
+ ----------------------------------------------------------
+ 32KhzAP 0 S2MPS11, S2MPS13, S2MPS14, S5M8767
+ 32KhzCP 1 S2MPS11, S2MPS13, S5M8767
+ 32KhzBT 2 S2MPS11, S2MPS13, S2MPS14, S5M8767
+
+ - compatible: Should be one of: "samsung,s2mps11-clk", "samsung,s2mps13-clk",
+ "samsung,s2mps14-clk", "samsung,s5m8767-clk"
+
+- regulators: The regulators of s2mps11 that have to be instantiated should be
+included in a sub-node named 'regulators'. Regulator nodes included in this
+sub-node should be of the format as listed below.
+
+ regulator_name {
+ [standard regulator constraints....];
+ };
+
+ regulator-ramp-delay for BUCKs = [6250/12500/25000(default)/50000] uV/us
+
+ BUCK[2/3/4/6] supports disabling ramp delay on hardware, so explicitly
+ regulator-ramp-delay = <0> can be used for them to disable ramp delay.
+ In the absence of the regulator-ramp-delay property, the default ramp
+ delay will be used.
+
+NOTE: Some BUCKs share the ramp rate setting i.e. same ramp value will be set
+for a particular group of BUCKs. So provide same regulator-ramp-delay<value>.
+Grouping of BUCKs sharing ramp rate setting is as follow : BUCK[1, 6],
+BUCK[3, 4], and BUCK[7, 8, 10]
+
+On S2MPS14 the LDO10, LDO11 and LDO12 can be configured to external control
+over GPIO. To turn this feature on this property must be added to the regulator
+sub-node:
+ - samsung,ext-control-gpios: GPIO specifier for one GPIO
+ controlling this regulator (enable/disable);
+Example:
+ LDO12 {
+ regulator-name = "V_EMMC_2.8V";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ samsung,ext-control-gpios = <&gpk0 2 0>;
+ };
+
+
+The regulator constraints inside the regulator nodes use the standard regulator
+bindings which are documented elsewhere.
+
+The following are the names of the regulators that the s2mps11 pmic block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of s2mps11.
+
+ - LDOn
+ - valid values for n are:
+ - S2MPS11: 1 to 38
+ - S2MPS13: 1 to 40
+ - S2MPS14: 1 to 25
+ - S2MPU02: 1 to 28
+ - Example: LDO1, LDO2, LDO28
+ - BUCKn
+ - valid values for n are:
+ - S2MPS11: 1 to 10
+ - S2MPS13: 1 to 10
+ - S2MPS14: 1 to 5
+ - S2MPU02: 1 to 7
+ - Example: BUCK1, BUCK2, BUCK9
+
+Example:
+
+ s2mps11_pmic@66 {
+ compatible = "samsung,s2mps11-pmic";
+ reg = <0x66>;
+
+ s2m_osc: clocks {
+ compatible = "samsung,s2mps11-clk";
+ #clock-cells = <1>;
+ clock-output-names = "xx", "yy", "zz";
+ };
+
+ regulators {
+ ldo1_reg: LDO1 {
+ regulator-name = "VDD_ABB_3.3V";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo2_reg: LDO2 {
+ regulator-name = "VDD_ALIVE_1.1V";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ };
+
+ buck1_reg: BUCK1 {
+ regulator-name = "vdd_mif";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ buck2_reg: BUCK2 {
+ regulator-name = "vdd_arm";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ regulator-ramp-delay = <50000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/sky81452.txt b/Documentation/devicetree/bindings/mfd/sky81452.txt
new file mode 100644
index 000000000..35181794a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/sky81452.txt
@@ -0,0 +1,35 @@
+SKY81452 bindings
+
+Required properties:
+- compatible : Must be "skyworks,sky81452"
+- reg : I2C slave address
+
+Required child nodes:
+- backlight : container node for backlight following the binding
+ in video/backlight/sky81452-backlight.txt
+- regulator : container node for regulators following the binding
+ in regulator/sky81452-regulator.txt
+
+Example:
+
+ sky81452@2c {
+ compatible = "skyworks,sky81452";
+ reg = <0x2c>;
+
+ backlight {
+ compatible = "skyworks,sky81452-backlight";
+ name = "pwm-backlight";
+ led-sources = <0 1 2 3 6>;
+ skyworks,ignore-pwm;
+ skyworks,phase-shift;
+ skyworks,current-limit = <2300>;
+ };
+
+ regulator {
+ lout {
+ regulator-name = "sky81452-lout";
+ regulator-min-microvolt = <4500000>;
+ regulator-max-microvolt = <8000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/stmpe.txt b/Documentation/devicetree/bindings/mfd/stmpe.txt
new file mode 100644
index 000000000..3fb68bfef
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/stmpe.txt
@@ -0,0 +1,29 @@
+* ST Microelectronics STMPE Multi-Functional Device
+
+STMPE is an MFD device which may expose the following inbuilt devices: gpio,
+keypad, touchscreen, adc, pwm, rotator.
+
+Required properties:
+ - compatible : "st,stmpe[610|801|811|1601|2401|2403]"
+ - reg : I2C/SPI address of the device
+
+Optional properties:
+ - interrupts : The interrupt outputs from the controller
+ - interrupt-controller : Marks the device node as an interrupt controller
+ - interrupt-parent : Specifies which IRQ controller we're connected to
+ - wakeup-source : Marks the input device as wakable
+ - st,autosleep-timeout : Valid entries (ms); 4, 16, 32, 64, 128, 256, 512 and 1024
+ - irq-gpio : If present, which GPIO to use for event IRQ
+
+Example:
+
+ stmpe1601: stmpe1601@40 {
+ compatible = "st,stmpe1601";
+ reg = <0x40>;
+ interrupts = <26 0x4>;
+ interrupt-parent = <&gpio6>;
+ interrupt-controller;
+
+ wakeup-source;
+ st,autosleep-timeout = <1024>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/sun6i-prcm.txt b/Documentation/devicetree/bindings/mfd/sun6i-prcm.txt
new file mode 100644
index 000000000..03c5a551d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/sun6i-prcm.txt
@@ -0,0 +1,59 @@
+* Allwinner PRCM (Power/Reset/Clock Management) Multi-Functional Device
+
+PRCM is an MFD device exposing several Power Management related devices
+(like clks and reset controllers).
+
+Required properties:
+ - compatible: "allwinner,sun6i-a31-prcm" or "allwinner,sun8i-a23-prcm"
+ - reg: The PRCM registers range
+
+The prcm node may contain several subdevices definitions:
+ - see Documentation/devicetree/clk/sunxi.txt for clock devices
+ - see Documentation/devicetree/reset/allwinner,sunxi-clock-reset.txt for reset
+ controller devices
+
+
+Example:
+
+ prcm: prcm@01f01400 {
+ compatible = "allwinner,sun6i-a31-prcm";
+ reg = <0x01f01400 0x200>;
+
+ /* Put subdevices here */
+ ar100: ar100_clk {
+ compatible = "allwinner,sun6i-a31-ar100-clk";
+ #clock-cells = <0>;
+ clocks = <&osc32k>, <&osc24M>, <&pll6>, <&pll6>;
+ };
+
+ ahb0: ahb0_clk {
+ compatible = "fixed-factor-clock";
+ #clock-cells = <0>;
+ clock-div = <1>;
+ clock-mult = <1>;
+ clocks = <&ar100_div>;
+ clock-output-names = "ahb0";
+ };
+
+ apb0: apb0_clk {
+ compatible = "allwinner,sun6i-a31-apb0-clk";
+ #clock-cells = <0>;
+ clocks = <&ahb0>;
+ clock-output-names = "apb0";
+ };
+
+ apb0_gates: apb0_gates_clk {
+ compatible = "allwinner,sun6i-a31-apb0-gates-clk";
+ #clock-cells = <1>;
+ clocks = <&apb0>;
+ clock-output-names = "apb0_pio", "apb0_ir",
+ "apb0_timer01", "apb0_p2wi",
+ "apb0_uart", "apb0_1wire",
+ "apb0_i2c";
+ };
+
+ apb0_rst: apb0_rst {
+ compatible = "allwinner,sun6i-a31-clock-reset";
+ #reset-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/syscon.txt b/Documentation/devicetree/bindings/mfd/syscon.txt
new file mode 100644
index 000000000..fe8150bb3
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/syscon.txt
@@ -0,0 +1,20 @@
+* System Controller Registers R/W driver
+
+System controller node represents a register region containing a set
+of miscellaneous registers. The registers are not cohesive enough to
+represent as any specific type of device. The typical use-case is for
+some other node's driver, or platform-specific code, to acquire a
+reference to the syscon node (e.g. by phandle, node path, or search
+using a specific compatible value), interrogate the node (or associated
+OS driver) to determine the location of the registers, and access the
+registers directly.
+
+Required properties:
+- compatible: Should contain "syscon".
+- reg: the register region can be accessed from syscon
+
+Examples:
+gpr: iomuxc-gpr@020e0000 {
+ compatible = "fsl,imx6q-iomuxc-gpr", "syscon";
+ reg = <0x020e0000 0x38>;
+};
diff --git a/Documentation/devicetree/bindings/mfd/tc3589x.txt b/Documentation/devicetree/bindings/mfd/tc3589x.txt
new file mode 100644
index 000000000..6fcedba46
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/tc3589x.txt
@@ -0,0 +1,107 @@
+* Toshiba TC3589x multi-purpose expander
+
+The Toshiba TC3589x series are I2C-based MFD devices which may expose the
+following built-in devices: gpio, keypad, rotator (vibrator), PWM (for
+e.g. LEDs or vibrators) The included models are:
+
+- TC35890
+- TC35892
+- TC35893
+- TC35894
+- TC35895
+- TC35896
+
+Required properties:
+ - compatible : must be "toshiba,tc35890", "toshiba,tc35892", "toshiba,tc35893",
+ "toshiba,tc35894", "toshiba,tc35895" or "toshiba,tc35896"
+ - reg : I2C address of the device
+ - interrupt-parent : specifies which IRQ controller we're connected to
+ - interrupts : the interrupt on the parent the controller is connected to
+ - interrupt-controller : marks the device node as an interrupt controller
+ - #interrupt-cells : should be <1>, the first cell is the IRQ offset on this
+ TC3589x interrupt controller.
+
+Optional nodes:
+
+- GPIO
+ This GPIO module inside the TC3589x has 24 (TC35890, TC35892) or 20
+ (other models) GPIO lines.
+ - compatible : must be "toshiba,tc3589x-gpio"
+ - interrupts : interrupt on the parent, which must be the tc3589x MFD device
+ - interrupt-controller : marks the device node as an interrupt controller
+ - #interrupt-cells : should be <2>, the first cell is the IRQ offset on this
+ TC3589x GPIO interrupt controller, the second cell is the interrupt flags
+ in accordance with <dt-bindings/interrupt-controller/irq.h>. The following
+ flags are valid:
+ - IRQ_TYPE_LEVEL_LOW
+ - IRQ_TYPE_LEVEL_HIGH
+ - IRQ_TYPE_EDGE_RISING
+ - IRQ_TYPE_EDGE_FALLING
+ - IRQ_TYPE_EDGE_BOTH
+ - gpio-controller : marks the device node as a GPIO controller
+ - #gpio-cells : should be <2>, the first cell is the GPIO offset on this
+ GPIO controller, the second cell is the flags.
+
+- Keypad
+ This keypad is the same on all variants, supporting up to 96 different
+ keys. The linux-specific properties are modeled on those already existing
+ in other input drivers.
+ - compatible : must be "toshiba,tc3589x-keypad"
+ - debounce-delay-ms : debounce interval in milliseconds
+ - keypad,num-rows : number of rows in the matrix, see
+ bindings/input/matrix-keymap.txt
+ - keypad,num-columns : number of columns in the matrix, see
+ bindings/input/matrix-keymap.txt
+ - linux,keymap: the definition can be found in
+ bindings/input/matrix-keymap.txt
+ - linux,no-autorepeat: do no enable autorepeat feature.
+ - linux,wakeup: use any event on keypad as wakeup event.
+
+Example:
+
+tc35893@44 {
+ compatible = "toshiba,tc35893";
+ reg = <0x44>;
+ interrupt-parent = <&gpio6>;
+ interrupts = <26 IRQ_TYPE_EDGE_RISING>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ tc3589x_gpio {
+ compatible = "toshiba,tc3589x-gpio";
+ interrupts = <0>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+ tc3589x_keypad {
+ compatible = "toshiba,tc3589x-keypad";
+ interrupts = <6>;
+ debounce-delay-ms = <4>;
+ keypad,num-columns = <8>;
+ keypad,num-rows = <8>;
+ linux,no-autorepeat;
+ linux,wakeup;
+ linux,keymap = <0x0301006b
+ 0x04010066
+ 0x06040072
+ 0x040200d7
+ 0x0303006a
+ 0x0205000e
+ 0x0607008b
+ 0x0500001c
+ 0x0403000b
+ 0x03040034
+ 0x05020067
+ 0x0305006c
+ 0x040500e7
+ 0x0005009e
+ 0x06020073
+ 0x01030039
+ 0x07060069
+ 0x050500d9>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt b/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt
new file mode 100644
index 000000000..20963c76b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/ti-keystone-devctrl.txt
@@ -0,0 +1,19 @@
+* Device tree bindings for Texas Instruments keystone device state control
+
+The Keystone II devices have a set of registers that are used to control
+the status of its peripherals. This node is intended to allow access to
+this functionality.
+
+Required properties:
+
+- compatible: "ti,keystone-devctrl", "syscon"
+
+- reg: contains offset/length value for device state control
+ registers space.
+
+Example:
+
+devctrl: device-state-control@0x02620000 {
+ compatible = "ti,keystone-devctrl", "syscon";
+ reg = <0x02620000 0x1000>;
+};
diff --git a/Documentation/devicetree/bindings/mfd/tps6507x.txt b/Documentation/devicetree/bindings/mfd/tps6507x.txt
new file mode 100755
index 000000000..8fffa3c5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/tps6507x.txt
@@ -0,0 +1,91 @@
+TPS6507x Power Management Integrated Circuit
+
+Required properties:
+- compatible: "ti,tps6507x"
+- reg: I2C slave address
+- regulators: This is the list of child nodes that specify the regulator
+ initialization data for defined regulators. Not all regulators for the
+ given device need to be present. The definition for each of these nodes
+ is defined using the standard binding for regulators found at
+ Documentation/devicetree/bindings/regulator/regulator.txt.
+ The regulator is matched with the regulator-compatible.
+
+ The valid regulator-compatible values are:
+ tps6507x: vdcdc1, vdcdc2, vdcdc3, vldo1, vldo2
+- xxx-supply: Input voltage supply regulator.
+ These entries are required if regulators are enabled for a device.
+ Missing of these properties can cause the regulator registration
+ fails.
+ If some of input supply is powered through battery or always-on
+ supply then also it is require to have these parameters with proper
+ node handle of always on power supply.
+ tps6507x:
+ vindcdc1_2-supply: VDCDC1 and VDCDC2 input.
+ vindcdc3-supply : VDCDC3 input.
+ vldo1_2-supply : VLDO1 and VLDO2 input.
+
+Regulator Optional properties:
+- defdcdc_default: It's property of DCDC2 and DCDC3 regulators.
+ 0: If defdcdc pin of DCDC2/DCDC3 is pulled to GND.
+ 1: If defdcdc pin of DCDC2/DCDC3 is driven HIGH.
+ If this property is not defined, it defaults to 0 (not enabled).
+
+Example:
+
+ pmu: tps6507x@48 {
+ compatible = "ti,tps6507x";
+ reg = <0x48>;
+
+ vindcdc1_2-supply = <&vbat>;
+ vindcdc3-supply = <...>;
+ vinldo1_2-supply = <...>;
+
+ regulators {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vdcdc1_reg: regulator@0 {
+ regulator-compatible = "VDCDC1";
+ reg = <0>;
+ regulator-min-microvolt = <3150000>;
+ regulator-max-microvolt = <3450000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ vdcdc2_reg: regulator@1 {
+ regulator-compatible = "VDCDC2";
+ reg = <1>;
+ regulator-min-microvolt = <1710000>;
+ regulator-max-microvolt = <3450000>;
+ regulator-always-on;
+ regulator-boot-on;
+ defdcdc_default = <1>;
+ };
+ vdcdc3_reg: regulator@2 {
+ regulator-compatible = "VDCDC3";
+ reg = <2>;
+ regulator-min-microvolt = <950000>
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ defdcdc_default = <1>;
+ };
+ ldo1_reg: regulator@3 {
+ regulator-compatible = "LDO1";
+ reg = <3>;
+ regulator-min-microvolt = <1710000>;
+ regulator-max-microvolt = <1890000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ ldo2_reg: regulator@4 {
+ regulator-compatible = "LDO2";
+ reg = <4>;
+ regulator-min-microvolt = <1140000>;
+ regulator-max-microvolt = <1320000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ };
+
+ };
diff --git a/Documentation/devicetree/bindings/mfd/tps65910.txt b/Documentation/devicetree/bindings/mfd/tps65910.txt
new file mode 100644
index 000000000..38833e63a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/tps65910.txt
@@ -0,0 +1,201 @@
+TPS65910 Power Management Integrated Circuit
+
+Required properties:
+- compatible: "ti,tps65910" or "ti,tps65911"
+- reg: I2C slave address
+- interrupts: the interrupt outputs of the controller
+- #gpio-cells: number of cells to describe a GPIO, this should be 2.
+ The first cell is the GPIO number.
+ The second cell is used to specify additional options <unused>.
+- gpio-controller: mark the device as a GPIO controller
+- #interrupt-cells: the number of cells to describe an IRQ, this should be 2.
+ The first cell is the IRQ number.
+ The second cell is the flags, encoded as the trigger masks from
+ Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+- regulators: This is the list of child nodes that specify the regulator
+ initialization data for defined regulators. Not all regulators for the given
+ device need to be present. The definition for each of these nodes is defined
+ using the standard binding for regulators found at
+ Documentation/devicetree/bindings/regulator/regulator.txt.
+ The regulator is matched with the regulator-compatible.
+
+ The valid regulator-compatible values are:
+ tps65910: vrtc, vio, vdd1, vdd2, vdd3, vdig1, vdig2, vpll, vdac, vaux1,
+ vaux2, vaux33, vmmc, vbb
+ tps65911: vrtc, vio, vdd1, vdd3, vddctrl, ldo1, ldo2, ldo3, ldo4, ldo5,
+ ldo6, ldo7, ldo8
+
+- xxx-supply: Input voltage supply regulator.
+ These entries are require if regulators are enabled for a device. Missing of these
+ properties can cause the regulator registration fails.
+ If some of input supply is powered through battery or always-on supply then
+ also it is require to have these parameters with proper node handle of always
+ on power supply.
+ tps65910:
+ vcc1-supply: VDD1 input.
+ vcc2-supply: VDD2 input.
+ vcc3-supply: VAUX33 and VMMC input.
+ vcc4-supply: VAUX1 and VAUX2 input.
+ vcc5-supply: VPLL and VDAC input.
+ vcc6-supply: VDIG1 and VDIG2 input.
+ vcc7-supply: VRTC and VBB input.
+ vccio-supply: VIO input.
+ tps65911:
+ vcc1-supply: VDD1 input.
+ vcc2-supply: VDD2 input.
+ vcc3-supply: LDO6, LDO7 and LDO8 input.
+ vcc4-supply: LDO5 input.
+ vcc5-supply: LDO3 and LDO4 input.
+ vcc6-supply: LDO1 and LDO2 input.
+ vcc7-supply: VRTC input.
+ vccio-supply: VIO input.
+
+Optional properties:
+- ti,vmbch-threshold: (tps65911) main battery charged threshold
+ comparator. (see VMBCH_VSEL in TPS65910 datasheet)
+- ti,vmbch2-threshold: (tps65911) main battery discharged threshold
+ comparator. (see VMBCH_VSEL in TPS65910 datasheet)
+- ti,en-ck32k-xtal: enable external 32-kHz crystal oscillator (see CK32K_CTRL
+ in TPS6591X datasheet)
+- ti,en-gpio-sleep: enable sleep control for gpios
+ There should be 9 entries here, one for each gpio.
+- ti,system-power-controller: Telling whether or not this pmic is controlling
+ the system power.
+
+Regulator Optional properties:
+- ti,regulator-ext-sleep-control: enable external sleep
+ control through external inputs [0 (not enabled), 1 (EN1), 2 (EN2) or 4(EN3)]
+ If this property is not defined, it defaults to 0 (not enabled).
+
+Example:
+
+ pmu: tps65910@d2 {
+ compatible = "ti,tps65910";
+ reg = <0xd2>;
+ interrupt-parent = <&intc>;
+ interrupts = < 0 118 0x04 >;
+
+ #gpio-cells = <2>;
+ gpio-controller;
+
+ #interrupt-cells = <2>;
+ interrupt-controller;
+
+ ti,system-power-controller;
+
+ ti,vmbch-threshold = 0;
+ ti,vmbch2-threshold = 0;
+ ti,en-ck32k-xtal;
+ ti,en-gpio-sleep = <0 0 1 0 0 0 0 0 0>;
+
+ vcc1-supply = <&reg_parent>;
+ vcc2-supply = <&some_reg>;
+ vcc3-supply = <...>;
+ vcc4-supply = <...>;
+ vcc5-supply = <...>;
+ vcc6-supply = <...>;
+ vcc7-supply = <...>;
+ vccio-supply = <...>;
+
+ regulators {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ vdd1_reg: regulator@0 {
+ regulator-compatible = "vdd1";
+ reg = <0>;
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ vdd2_reg: regulator@1 {
+ regulator-compatible = "vdd2";
+ reg = <1>;
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ti,regulator-ext-sleep-control = <4>;
+ };
+ vddctrl_reg: regulator@2 {
+ regulator-compatible = "vddctrl";
+ reg = <2>;
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ vio_reg: regulator@3 {
+ regulator-compatible = "vio";
+ reg = <3>;
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ti,regulator-ext-sleep-control = <1>;
+ };
+ ldo1_reg: regulator@4 {
+ regulator-compatible = "ldo1";
+ reg = <4>;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ ldo2_reg: regulator@5 {
+ regulator-compatible = "ldo2";
+ reg = <5>;
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ ldo3_reg: regulator@6 {
+ regulator-compatible = "ldo3";
+ reg = <6>;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ ldo4_reg: regulator@7 {
+ regulator-compatible = "ldo4";
+ reg = <7>;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ ldo5_reg: regulator@8 {
+ regulator-compatible = "ldo5";
+ reg = <8>;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ ldo6_reg: regulator@9 {
+ regulator-compatible = "ldo6";
+ reg = <9>;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ ti,regulator-ext-sleep-control = <0>;
+ };
+ ldo7_reg: regulator@10 {
+ regulator-compatible = "ldo7";
+ reg = <10>;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ti,regulator-ext-sleep-control = <1>;
+ };
+ ldo8_reg: regulator@11 {
+ regulator-compatible = "ldo8";
+ reg = <11>;
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ ti,regulator-ext-sleep-control = <1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mfd/twl-familly.txt b/Documentation/devicetree/bindings/mfd/twl-familly.txt
new file mode 100644
index 000000000..a66fcf946
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/twl-familly.txt
@@ -0,0 +1,47 @@
+Texas Instruments TWL family
+
+The TWLs are Integrated Power Management Chips.
+Some version might contain much more analog function like
+USB transceiver or Audio amplifier.
+These chips are connected to an i2c bus.
+
+
+Required properties:
+- compatible : Must be "ti,twl4030";
+ For Integrated power-management/audio CODEC device used in OMAP3
+ based boards
+- compatible : Must be "ti,twl6030";
+ For Integrated power-management used in OMAP4 based boards
+- interrupts : This i2c device has an IRQ line connected to the main SoC
+- interrupt-controller : Since the twl support several interrupts internally,
+ it is considered as an interrupt controller cascaded to the SoC one.
+- #interrupt-cells = <1>;
+- interrupt-parent : The parent interrupt controller.
+
+Optional node:
+- Child nodes contain in the twl. The twl family is made of several variants
+ that support a different number of features.
+ The children nodes will thus depend of the capability of the variant.
+
+
+Example:
+/*
+ * Integrated Power Management Chip
+ * http://www.ti.com/lit/ds/symlink/twl6030.pdf
+ */
+twl@48 {
+ compatible = "ti,twl6030";
+ reg = <0x48>;
+ interrupts = <39>; /* IRQ_SYS_1N cascaded to gic */
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&gic>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ twl_rtc {
+ compatible = "ti,twl_rtc";
+ interrupts = <11>;
+ reg = <0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/twl4030-audio.txt b/Documentation/devicetree/bindings/mfd/twl4030-audio.txt
new file mode 100644
index 000000000..414d2ae0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/twl4030-audio.txt
@@ -0,0 +1,46 @@
+Texas Instruments TWL family (twl4030) audio module
+
+The audio module inside the TWL family consist of an audio codec and a vibra
+driver.
+
+Required properties:
+- compatible : must be "ti,twl4030-audio"
+
+Optional properties, nodes:
+
+Audio functionality:
+- codec { }: Need to be present if the audio functionality is used. Within this
+ section the following options can be used:
+- ti,digimic_delay: Delay need after enabling the digimic to reduce artifacts
+ from the start of the recorded sample (in ms)
+-ti,ramp_delay_value: HS ramp delay configuration to reduce pop noise
+-ti,hs_extmute: Use external mute for HS pop reduction
+-ti,hs_extmute_gpio: Use external GPIO to control the external mute
+-ti,offset_cncl_path: Offset cancellation path selection, refer to TRM for the
+ valid values.
+
+Vibra functionality
+- ti,enable-vibra: Need to be set to <1> if the vibra functionality is used. if
+ missing or it is 0, the vibra functionality is disabled.
+
+Example:
+&i2c1 {
+ clock-frequency = <2600000>;
+
+ twl: twl@48 {
+ reg = <0x48>;
+ interrupts = <7>; /* SYS_NIRQ cascaded to intc */
+ interrupt-parent = <&intc>;
+
+ twl_audio: audio {
+ compatible = "ti,twl4030-audio";
+
+ ti,enable-vibra = <1>;
+
+ codec {
+ ti,ramp_delay_value = <3>;
+ };
+
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/twl4030-power.txt b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
new file mode 100644
index 000000000..3d1996331
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/twl4030-power.txt
@@ -0,0 +1,48 @@
+Texas Instruments TWL family (twl4030) reset and power management module
+
+The power management module inside the TWL family provides several facilities
+to control the power resources, including power scripts. For now, the
+binding only supports the complete shutdown of the system after poweroff.
+
+Required properties:
+- compatible : must be one of the following
+ "ti,twl4030-power"
+ "ti,twl4030-power-reset"
+ "ti,twl4030-power-idle"
+ "ti,twl4030-power-idle-osc-off"
+
+The use of ti,twl4030-power-reset is recommended at least on
+3530 that needs a special configuration for warm reset to work.
+
+When using ti,twl4030-power-idle, the TI recommended configuration
+for idle modes is loaded to the tlw4030 PMIC.
+
+When using ti,twl4030-power-idle-osc-off, the TI recommended
+configuration is used with the external oscillator being shut
+down during off-idle. Note that this does not work on all boards
+depending on how the external oscillator is wired.
+
+Optional properties:
+
+- ti,system-power-controller: This indicates that TWL4030 is the
+ power supply master of the system. With this flag, the chip will
+ initiate an ACTIVE-to-OFF or SLEEP-to-OFF transition when the
+ system poweroffs.
+
+- ti,use_poweroff: Deprecated name for ti,system-power-controller
+
+Example:
+&i2c1 {
+ clock-frequency = <2600000>;
+
+ twl: twl@48 {
+ reg = <0x48>;
+ interrupts = <7>; /* SYS_NIRQ cascaded to intc */
+ interrupt-parent = <&intc>;
+
+ twl_power: power {
+ compatible = "ti,twl4030-power";
+ ti,use_poweroff;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mfd/twl6040.txt b/Documentation/devicetree/bindings/mfd/twl6040.txt
new file mode 100644
index 000000000..a41157b5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/twl6040.txt
@@ -0,0 +1,67 @@
+Texas Instruments TWL6040 family
+
+The TWL6040s are 8-channel high quality low-power audio codecs providing audio,
+vibra and GPO functionality on OMAP4+ platforms.
+They are connected ot the host processor via i2c for commands, McPDM for audio
+data and commands.
+
+Required properties:
+- compatible : "ti,twl6040" for twl6040, "ti,twl6041" for twl6041
+- reg: must be 0x4b for i2c address
+- interrupts: twl6040 has one interrupt line connecteded to the main SoC
+- interrupt-parent: The parent interrupt controller
+- gpio-controller:
+- #gpio-cells = <1>: twl6040 provides GPO lines.
+- twl6040,audpwron-gpio: Power on GPIO line for the twl6040
+
+- vio-supply: Regulator for the twl6040 VIO supply
+- v2v1-supply: Regulator for the twl6040 V2V1 supply
+
+Optional properties, nodes:
+- enable-active-high: To power on the twl6040 during boot.
+- clocks: phandle to the clk32k clock provider
+- clock-names: Must be "clk32k"
+
+Vibra functionality
+Required properties:
+- vddvibl-supply: Regulator for the left vibra motor
+- vddvibr-supply: Regulator for the right vibra motor
+- vibra { }: Configuration section for vibra parameters containing the following
+ properties:
+- ti,vibldrv-res: Resistance parameter for left driver
+- ti,vibrdrv-res: Resistance parameter for right driver
+- ti,viblmotor-res: Resistance parameter for left motor
+- ti,viblmotor-res: Resistance parameter for right motor
+
+Optional properties within vibra { } section:
+- vddvibl_uV: If the vddvibl default voltage need to be changed
+- vddvibr_uV: If the vddvibr default voltage need to be changed
+
+Example:
+&i2c1 {
+ twl6040: twl@4b {
+ compatible = "ti,twl6040";
+
+ interrupts = <0 119 4>;
+ interrupt-parent = <&gic>;
+ twl6040,audpwron-gpio = <&gpio4 31 0>;
+
+ vio-supply = <&v1v8>;
+ v2v1-supply = <&v2v1>;
+ enable-active-high;
+
+ /* regulators for vibra motor */
+ vddvibl-supply = <&vbat>;
+ vddvibr-supply = <&vbat>;
+
+ vibra {
+ /* Vibra driver, motor resistance parameters */
+ ti,vibldrv-res = <8>;
+ ti,vibrdrv-res = <3>;
+ ti,viblmotor-res = <10>;
+ ti,vibrmotor-res = <10>;
+ };
+ };
+};
+
+/include/ "twl6040.dtsi"
diff --git a/Documentation/devicetree/bindings/mipi/dsi/mipi-dsi-bus.txt b/Documentation/devicetree/bindings/mipi/dsi/mipi-dsi-bus.txt
new file mode 100644
index 000000000..973c27273
--- /dev/null
+++ b/Documentation/devicetree/bindings/mipi/dsi/mipi-dsi-bus.txt
@@ -0,0 +1,98 @@
+MIPI DSI (Display Serial Interface) busses
+==========================================
+
+The MIPI Display Serial Interface specifies a serial bus and a protocol for
+communication between a host and up to four peripherals. This document will
+define the syntax used to represent a DSI bus in a device tree.
+
+This document describes DSI bus-specific properties only or defines existing
+standard properties in the context of the DSI bus.
+
+Each DSI host provides a DSI bus. The DSI host controller's node contains a
+set of properties that characterize the bus. Child nodes describe individual
+peripherals on that bus.
+
+The following assumes that only a single peripheral is connected to a DSI
+host. Experience shows that this is true for the large majority of setups.
+
+DSI host
+--------
+
+In addition to the standard properties and those defined by the parent bus of
+a DSI host, the following properties apply to a node representing a DSI host.
+
+Required properties:
+- #address-cells: The number of cells required to represent an address on the
+ bus. DSI peripherals are addressed using a 2-bit virtual channel number, so
+ a maximum of 4 devices can be addressed on a single bus. Hence the value of
+ this property should be 1.
+- #size-cells: Should be 0. There are cases where it makes sense to use a
+ different value here. See below.
+
+DSI peripheral
+--------------
+
+Peripherals are represented as child nodes of the DSI host's node. Properties
+described here apply to all DSI peripherals, but individual bindings may want
+to define additional, device-specific properties.
+
+Required properties:
+- reg: The virtual channel number of a DSI peripheral. Must be in the range
+ from 0 to 3.
+
+Some DSI peripherals respond to more than a single virtual channel. In that
+case two alternative representations can be chosen:
+- The reg property can take multiple entries, one for each virtual channel
+ that the peripheral responds to.
+- If the virtual channels that a peripheral responds to are consecutive, the
+ #size-cells can be set to 1. The first cell of each entry in the reg
+ property is the number of the first virtual channel and the second cell is
+ the number of consecutive virtual channels.
+
+Example
+-------
+
+ dsi-host {
+ ...
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* peripheral responds to virtual channel 0 */
+ peripheral@0 {
+ compatible = "...";
+ reg = <0>;
+ };
+
+ ...
+ };
+
+ dsi-host {
+ ...
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ /* peripheral responds to virtual channels 0 and 2 */
+ peripheral@0 {
+ compatible = "...";
+ reg = <0, 2>;
+ };
+
+ ...
+ };
+
+ dsi-host {
+ ...
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* peripheral responds to virtual channels 1, 2 and 3 */
+ peripheral@1 {
+ compatible = "...";
+ reg = <1 3>;
+ };
+
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/mipi/nvidia,tegra114-mipi.txt b/Documentation/devicetree/bindings/mipi/nvidia,tegra114-mipi.txt
new file mode 100644
index 000000000..e4a25cedc
--- /dev/null
+++ b/Documentation/devicetree/bindings/mipi/nvidia,tegra114-mipi.txt
@@ -0,0 +1,41 @@
+NVIDIA Tegra MIPI pad calibration controller
+
+Required properties:
+- compatible: "nvidia,tegra<chip>-mipi"
+- reg: Physical base address and length of the controller's registers.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - mipi-cal
+- #nvidia,mipi-calibrate-cells: Should be 1. The cell is a bitmask of the pads
+ that need to be calibrated for a given device.
+
+User nodes need to contain an nvidia,mipi-calibrate property that has a
+phandle to refer to the calibration controller node and a bitmask of the pads
+that need to be calibrated.
+
+Example:
+
+ mipi: mipi@700e3000 {
+ compatible = "nvidia,tegra114-mipi";
+ reg = <0x700e3000 0x100>;
+ clocks = <&tegra_car TEGRA114_CLK_MIPI_CAL>;
+ clock-names = "mipi-cal";
+ #nvidia,mipi-calibrate-cells = <1>;
+ };
+
+ ...
+
+ host1x@50000000 {
+ ...
+
+ dsi@54300000 {
+ ...
+
+ nvidia,mipi-calibrate = <&mipi 0x060>;
+
+ ...
+ };
+
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/mips/brcm/brcm,bmips.txt b/Documentation/devicetree/bindings/mips/brcm/brcm,bmips.txt
new file mode 100644
index 000000000..8ef71b408
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/brcm/brcm,bmips.txt
@@ -0,0 +1,8 @@
+* Broadcom MIPS (BMIPS) CPUs
+
+Required properties:
+- compatible: "brcm,bmips3300", "brcm,bmips4350", "brcm,bmips4380",
+ "brcm,bmips5000"
+
+- mips-hpt-frequency: This is common to all CPUs in the system so it lives
+ under the "cpus" node.
diff --git a/Documentation/devicetree/bindings/mips/brcm/soc.txt b/Documentation/devicetree/bindings/mips/brcm/soc.txt
new file mode 100644
index 000000000..7bab90cc4
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/brcm/soc.txt
@@ -0,0 +1,12 @@
+* Broadcom cable/DSL/settop platforms
+
+Required properties:
+
+- compatible: "brcm,bcm3384", "brcm,bcm33843"
+ "brcm,bcm3384-viper", "brcm,bcm33843-viper"
+ "brcm,bcm6328", "brcm,bcm6368",
+ "brcm,bcm7125", "brcm,bcm7346", "brcm,bcm7358", "brcm,bcm7360",
+ "brcm,bcm7362", "brcm,bcm7420", "brcm,bcm7425"
+
+The experimental -viper variants are for running Linux on the 3384's
+BMIPS4355 cable modem CPU instead of the BMIPS5000 application processor.
diff --git a/Documentation/devicetree/bindings/mips/cavium/bootbus.txt b/Documentation/devicetree/bindings/mips/cavium/bootbus.txt
new file mode 100644
index 000000000..658147822
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/bootbus.txt
@@ -0,0 +1,126 @@
+* Boot Bus
+
+The Octeon Boot Bus is a configurable parallel bus with 8 chip
+selects. Each chip select is independently configurable.
+
+Properties:
+- compatible: "cavium,octeon-3860-bootbus"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The base address of the Boot Bus' register bank.
+
+- #address-cells: Must be <2>. The first cell is the chip select
+ within the bootbus. The second cell is the offset from the chip select.
+
+- #size-cells: Must be <1>.
+
+- ranges: There must be one one triplet of (child-bus-address,
+ parent-bus-address, length) for each active chip select. If the
+ length element for any triplet is zero, the chip select is disabled,
+ making it inactive.
+
+The configuration parameters for each chip select are stored in child
+nodes.
+
+Configuration Properties:
+- compatible: "cavium,octeon-3860-bootbus-config"
+
+- cavium,cs-index: A single cell indicating the chip select that
+ corresponds to this configuration.
+
+- cavium,t-adr: A cell specifying the ADR timing (in nS).
+
+- cavium,t-ce: A cell specifying the CE timing (in nS).
+
+- cavium,t-oe: A cell specifying the OE timing (in nS).
+
+- cavium,t-we: A cell specifying the WE timing (in nS).
+
+- cavium,t-rd-hld: A cell specifying the RD_HLD timing (in nS).
+
+- cavium,t-wr-hld: A cell specifying the WR_HLD timing (in nS).
+
+- cavium,t-pause: A cell specifying the PAUSE timing (in nS).
+
+- cavium,t-wait: A cell specifying the WAIT timing (in nS).
+
+- cavium,t-page: A cell specifying the PAGE timing (in nS).
+
+- cavium,t-rd-dly: A cell specifying the RD_DLY timing (in nS).
+
+- cavium,pages: A cell specifying the PAGES parameter (0 = 8 bytes, 1
+ = 2 bytes, 2 = 4 bytes, 3 = 8 bytes).
+
+- cavium,wait-mode: Optional. If present, wait mode (WAITM) is selected.
+
+- cavium,page-mode: Optional. If present, page mode (PAGEM) is selected.
+
+- cavium,bus-width: A cell specifying the WIDTH parameter (in bits) of
+ the bus for this chip select.
+
+- cavium,ale-mode: Optional. If present, ALE mode is selected.
+
+- cavium,sam-mode: Optional. If present, SAM mode is selected.
+
+- cavium,or-mode: Optional. If present, OR mode is selected.
+
+Example:
+ bootbus: bootbus@1180000000000 {
+ compatible = "cavium,octeon-3860-bootbus";
+ reg = <0x11800 0x00000000 0x0 0x200>;
+ /* The chip select number and offset */
+ #address-cells = <2>;
+ /* The size of the chip select region */
+ #size-cells = <1>;
+ ranges = <0 0 0x0 0x1f400000 0xc00000>,
+ <1 0 0x10000 0x30000000 0>,
+ <2 0 0x10000 0x40000000 0>,
+ <3 0 0x10000 0x50000000 0>,
+ <4 0 0x0 0x1d020000 0x10000>,
+ <5 0 0x0 0x1d040000 0x10000>,
+ <6 0 0x0 0x1d050000 0x10000>,
+ <7 0 0x10000 0x90000000 0>;
+
+ cavium,cs-config@0 {
+ compatible = "cavium,octeon-3860-bootbus-config";
+ cavium,cs-index = <0>;
+ cavium,t-adr = <20>;
+ cavium,t-ce = <60>;
+ cavium,t-oe = <60>;
+ cavium,t-we = <45>;
+ cavium,t-rd-hld = <35>;
+ cavium,t-wr-hld = <45>;
+ cavium,t-pause = <0>;
+ cavium,t-wait = <0>;
+ cavium,t-page = <35>;
+ cavium,t-rd-dly = <0>;
+
+ cavium,pages = <0>;
+ cavium,bus-width = <8>;
+ };
+ .
+ .
+ .
+ cavium,cs-config@6 {
+ compatible = "cavium,octeon-3860-bootbus-config";
+ cavium,cs-index = <6>;
+ cavium,t-adr = <5>;
+ cavium,t-ce = <300>;
+ cavium,t-oe = <270>;
+ cavium,t-we = <150>;
+ cavium,t-rd-hld = <100>;
+ cavium,t-wr-hld = <70>;
+ cavium,t-pause = <0>;
+ cavium,t-wait = <0>;
+ cavium,t-page = <320>;
+ cavium,t-rd-dly = <0>;
+
+ cavium,pages = <0>;
+ cavium,wait-mode;
+ cavium,bus-width = <16>;
+ };
+ .
+ .
+ .
+ };
diff --git a/Documentation/devicetree/bindings/mips/cavium/cib.txt b/Documentation/devicetree/bindings/mips/cavium/cib.txt
new file mode 100644
index 000000000..f39a1aa28
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/cib.txt
@@ -0,0 +1,43 @@
+* Cavium Interrupt Bus widget
+
+Properties:
+- compatible: "cavium,octeon-7130-cib"
+
+ Compatibility with cn70XX SoCs.
+
+- interrupt-controller: This is an interrupt controller.
+
+- reg: Two elements consisting of the addresses of the RAW and EN
+ registers of the CIB block
+
+- cavium,max-bits: The index (zero based) of the highest numbered bit
+ in the CIB block.
+
+- interrupt-parent: Always the CIU on the SoC.
+
+- interrupts: The CIU line to which the CIB block is connected.
+
+- #interrupt-cells: Must be <2>. The first cell is the bit within the
+ CIB. The second cell specifies the triggering semantics of the
+ line.
+
+Example:
+
+ interrupt-controller@107000000e000 {
+ compatible = "cavium,octeon-7130-cib";
+ reg = <0x10700 0x0000e000 0x0 0x8>, /* RAW */
+ <0x10700 0x0000e100 0x0 0x8>; /* EN */
+ cavium,max-bits = <23>;
+
+ interrupt-controller;
+ interrupt-parent = <&ciu>;
+ interrupts = <1 24>;
+ /* Interrupts are specified by two parts:
+ * 1) Bit number in the CIB* registers
+ * 2) Triggering (1 - edge rising
+ * 2 - edge falling
+ * 4 - level active high
+ * 8 - level active low)
+ */
+ #interrupt-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/mips/cavium/ciu.txt b/Documentation/devicetree/bindings/mips/cavium/ciu.txt
new file mode 100644
index 000000000..2c2d0746b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/ciu.txt
@@ -0,0 +1,26 @@
+* Central Interrupt Unit
+
+Properties:
+- compatible: "cavium,octeon-3860-ciu"
+
+ Compatibility with all cn3XXX, cn5XXX and cn63XX SOCs.
+
+- interrupt-controller: This is an interrupt controller.
+
+- reg: The base address of the CIU's register bank.
+
+- #interrupt-cells: Must be <2>. The first cell is the bank within
+ the CIU and may have a value of 0 or 1. The second cell is the bit
+ within the bank and may have a value between 0 and 63.
+
+Example:
+ interrupt-controller@1070000000000 {
+ compatible = "cavium,octeon-3860-ciu";
+ interrupt-controller;
+ /* Interrupts are specified by two parts:
+ * 1) Controller register (0 or 1)
+ * 2) Bit within the register (0..63)
+ */
+ #interrupt-cells = <2>;
+ reg = <0x10700 0x00000000 0x0 0x7000>;
+ };
diff --git a/Documentation/devicetree/bindings/mips/cavium/ciu2.txt b/Documentation/devicetree/bindings/mips/cavium/ciu2.txt
new file mode 100644
index 000000000..0ec7ba8bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/ciu2.txt
@@ -0,0 +1,27 @@
+* Central Interrupt Unit
+
+Properties:
+- compatible: "cavium,octeon-6880-ciu2"
+
+ Compatibility with 68XX SOCs.
+
+- interrupt-controller: This is an interrupt controller.
+
+- reg: The base address of the CIU's register bank.
+
+- #interrupt-cells: Must be <2>. The first cell is the bank within
+ the CIU and may have a value between 0 and 63. The second cell is
+ the bit within the bank and may also have a value between 0 and 63.
+
+Example:
+ interrupt-controller@1070100000000 {
+ compatible = "cavium,octeon-6880-ciu2";
+ interrupt-controller;
+ /* Interrupts are specified by two parts:
+ * 1) Controller register (0..63)
+ * 2) Bit within the register (0..63)
+ */
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ reg = <0x10701 0x00000000 0x0 0x4000000>;
+ };
diff --git a/Documentation/devicetree/bindings/mips/cavium/dma-engine.txt b/Documentation/devicetree/bindings/mips/cavium/dma-engine.txt
new file mode 100644
index 000000000..a5bdff400
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/dma-engine.txt
@@ -0,0 +1,21 @@
+* DMA Engine.
+
+The Octeon DMA Engine transfers between the Boot Bus and main memory.
+The DMA Engine will be referred to by phandle by any device that is
+connected to it.
+
+Properties:
+- compatible: "cavium,octeon-5750-bootbus-dma"
+
+ Compatibility with all cn52XX, cn56XX and cn6XXX SOCs.
+
+- reg: The base address of the DMA Engine's register bank.
+
+- interrupts: A single interrupt specifier.
+
+Example:
+ dma0: dma-engine@1180000000100 {
+ compatible = "cavium,octeon-5750-bootbus-dma";
+ reg = <0x11800 0x00000100 0x0 0x8>;
+ interrupts = <0 63>;
+ };
diff --git a/Documentation/devicetree/bindings/mips/cavium/uctl.txt b/Documentation/devicetree/bindings/mips/cavium/uctl.txt
new file mode 100644
index 000000000..aa66b9b8d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cavium/uctl.txt
@@ -0,0 +1,46 @@
+* UCTL USB controller glue
+
+Properties:
+- compatible: "cavium,octeon-6335-uctl"
+
+ Compatibility with all cn6XXX SOCs.
+
+- reg: The base address of the UCTL register bank.
+
+- #address-cells: Must be <2>.
+
+- #size-cells: Must be <2>.
+
+- ranges: Empty to signify direct mapping of the children.
+
+- refclk-frequency: A single cell containing the reference clock
+ frequency in Hz.
+
+- refclk-type: A string describing the reference clock connection
+ either "crystal" or "external".
+
+Example:
+ uctl@118006f000000 {
+ compatible = "cavium,octeon-6335-uctl";
+ reg = <0x11800 0x6f000000 0x0 0x100>;
+ ranges; /* Direct mapping */
+ #address-cells = <2>;
+ #size-cells = <2>;
+ /* 12MHz, 24MHz and 48MHz allowed */
+ refclk-frequency = <24000000>;
+ /* Either "crystal" or "external" */
+ refclk-type = "crystal";
+
+ ehci@16f0000000000 {
+ compatible = "cavium,octeon-6335-ehci","usb-ehci";
+ reg = <0x16f00 0x00000000 0x0 0x100>;
+ interrupts = <0 56>;
+ big-endian-regs;
+ };
+ ohci@16f0000000400 {
+ compatible = "cavium,octeon-6335-ohci","usb-ohci";
+ reg = <0x16f00 0x00000400 0x0 0x100>;
+ interrupts = <0 56>;
+ big-endian-regs;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mips/cpu_irq.txt b/Documentation/devicetree/bindings/mips/cpu_irq.txt
new file mode 100644
index 000000000..fc149f326
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/cpu_irq.txt
@@ -0,0 +1,47 @@
+MIPS CPU interrupt controller
+
+On MIPS the mips_cpu_irq_of_init() helper can be used to initialize the 8 CPU
+IRQs from a devicetree file and create a irq_domain for IRQ controller.
+
+With the irq_domain in place we can describe how the 8 IRQs are wired to the
+platforms internal interrupt controller cascade.
+
+Below is an example of a platform describing the cascade inside the devicetree
+and the code used to load it inside arch_init_irq().
+
+Required properties:
+- compatible : Should be "mti,cpu-interrupt-controller"
+
+Example devicetree:
+ cpu-irq: cpu-irq@0 {
+ #address-cells = <0>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ compatible = "mti,cpu-interrupt-controller";
+ };
+
+ intc: intc@200 {
+ compatible = "ralink,rt2880-intc";
+ reg = <0x200 0x100>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpu-irq>;
+ interrupts = <2>;
+ };
+
+
+Example platform irq.c:
+static struct of_device_id __initdata of_irq_ids[] = {
+ { .compatible = "mti,cpu-interrupt-controller", .data = mips_cpu_irq_of_init },
+ { .compatible = "ralink,rt2880-intc", .data = intc_of_init },
+ {},
+};
+
+void __init arch_init_irq(void)
+{
+ of_irq_init(of_irq_ids);
+}
diff --git a/Documentation/devicetree/bindings/mips/img/pistachio.txt b/Documentation/devicetree/bindings/mips/img/pistachio.txt
new file mode 100644
index 000000000..a736d889c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/img/pistachio.txt
@@ -0,0 +1,42 @@
+Imagination Pistachio SoC
+=========================
+
+Required properties:
+--------------------
+ - compatible: Must include "img,pistachio".
+
+CPU nodes:
+----------
+A "cpus" node is required. Required properties:
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+A CPU sub-node is also required for at least CPU 0. Since the topology may
+be probed via CPS, it is not necessary to specify secondary CPUs. Required
+propertis:
+ - device_type: Must be "cpu".
+ - compatible: Must be "mti,interaptiv".
+ - reg: CPU number.
+ - clocks: Must include the CPU clock. See ../../clock/clock-bindings.txt for
+ details on clock bindings.
+Example:
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "mti,interaptiv";
+ reg = <0>;
+ clocks = <&clk_core CLK_MIPS>;
+ };
+ };
+
+
+Boot protocol:
+--------------
+In accordance with the MIPS UHI specification[1], the bootloader must pass the
+following arguments to the kernel:
+ - $a0: -2.
+ - $a1: KSEG0 address of the flattened device-tree blob.
+
+[1] http://prplfoundation.org/wiki/MIPS_documentation
diff --git a/Documentation/devicetree/bindings/mips/ralink.txt b/Documentation/devicetree/bindings/mips/ralink.txt
new file mode 100644
index 000000000..b35a8d04f
--- /dev/null
+++ b/Documentation/devicetree/bindings/mips/ralink.txt
@@ -0,0 +1,17 @@
+Ralink MIPS SoC device tree bindings
+
+1. SoCs
+
+Each device tree must specify a compatible value for the Ralink SoC
+it uses in the compatible property of the root node. The compatible
+value must be one of the following values:
+
+ ralink,rt2880-soc
+ ralink,rt3050-soc
+ ralink,rt3052-soc
+ ralink,rt3350-soc
+ ralink,rt3352-soc
+ ralink,rt3883-soc
+ ralink,rt5350-soc
+ ralink,mt7620a-soc
+ ralink,mt7620n-soc
diff --git a/Documentation/devicetree/bindings/misc/allwinner,sunxi-sid.txt b/Documentation/devicetree/bindings/misc/allwinner,sunxi-sid.txt
new file mode 100644
index 000000000..fabdf64a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/allwinner,sunxi-sid.txt
@@ -0,0 +1,17 @@
+Allwinner sunxi-sid
+
+Required properties:
+- compatible: "allwinner,sun4i-a10-sid" or "allwinner,sun7i-a20-sid"
+- reg: Should contain registers location and length
+
+Example for sun4i:
+ sid@01c23800 {
+ compatible = "allwinner,sun4i-a10-sid";
+ reg = <0x01c23800 0x10>
+ };
+
+Example for sun7i:
+ sid@01c23800 {
+ compatible = "allwinner,sun7i-a20-sid";
+ reg = <0x01c23800 0x200>
+ };
diff --git a/Documentation/devicetree/bindings/misc/arm-charlcd.txt b/Documentation/devicetree/bindings/misc/arm-charlcd.txt
new file mode 100644
index 000000000..e28e2aac4
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/arm-charlcd.txt
@@ -0,0 +1,18 @@
+ARM Versatile Character LCD
+-----------------------------------------------------
+This binding defines the character LCD interface found on ARM Versatile AB
+and PB reference platforms.
+
+Required properties:
+- compatible : "arm,versatile-clcd"
+- reg : Location and size of character LCD registers
+
+Optional properties:
+- interrupts - single interrupt for character LCD. The character LCD can
+ operate in polled mode without an interrupt.
+
+Example:
+ lcd@10008000 {
+ compatible = "arm,versatile-lcd";
+ reg = <0x10008000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/misc/at25.txt b/Documentation/devicetree/bindings/misc/at25.txt
new file mode 100644
index 000000000..1d3447165
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/at25.txt
@@ -0,0 +1,35 @@
+EEPROMs (SPI) compatible with Atmel at25.
+
+Required properties:
+- compatible : "atmel,at25".
+- reg : chip select number
+- spi-max-frequency : max spi frequency to use
+- pagesize : size of the eeprom page
+- size : total eeprom size in bytes
+- address-width : number of address bits (one of 8, 16, or 24)
+
+Optional properties:
+- spi-cpha : SPI shifted clock phase, as per spi-bus bindings.
+- spi-cpol : SPI inverse clock polarity, as per spi-bus bindings.
+- read-only : this parameter-less property disables writes to the eeprom
+
+Obsolete legacy properties are can be used in place of "size", "pagesize",
+"address-width", and "read-only":
+- at25,byte-len : total eeprom size in bytes
+- at25,addr-mode : addr-mode flags, as defined in include/linux/spi/eeprom.h
+- at25,page-size : size of the eeprom page
+
+Additional compatible properties are also allowed.
+
+Example:
+ at25@0 {
+ compatible = "atmel,at25", "st,m95256";
+ reg = <0>
+ spi-max-frequency = <5000000>;
+ spi-cpha;
+ spi-cpol;
+
+ pagesize = <64>;
+ size = <32768>;
+ address-width = <16>;
+ };
diff --git a/Documentation/devicetree/bindings/misc/atmel-ssc.txt b/Documentation/devicetree/bindings/misc/atmel-ssc.txt
new file mode 100644
index 000000000..efc98ea1f
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -0,0 +1,49 @@
+* Atmel SSC driver.
+
+Required properties:
+- compatible: "atmel,at91rm9200-ssc" or "atmel,at91sam9g45-ssc"
+ - atmel,at91rm9200-ssc: support pdc transfer
+ - atmel,at91sam9g45-ssc: support dma transfer
+- reg: Should contain SSC registers location and length
+- interrupts: Should contain SSC interrupt
+- clock-names: tuple listing input clock names.
+ Required elements: "pclk"
+- clocks: phandles to input clocks.
+
+
+Required properties for devices compatible with "atmel,at91sam9g45-ssc":
+- dmas: DMA specifier, consisting of a phandle to DMA controller node,
+ the memory interface and SSC DMA channel ID (for tx and rx).
+ See Documentation/devicetree/bindings/dma/atmel-dma.txt for details.
+- dma-names: Must be "tx", "rx".
+
+Optional properties:
+ - atmel,clk-from-rk-pin: bool property.
+ - When SSC works in slave mode, according to the hardware design, the
+ clock can get from TK pin, and also can get from RK pin. So, add
+ this parameter to choose where the clock from.
+ - By default the clock is from TK pin, if the clock from RK pin, this
+ property is needed.
+
+Examples:
+- PDC transfer:
+ssc0: ssc@fffbc000 {
+ compatible = "atmel,at91rm9200-ssc";
+ reg = <0xfffbc000 0x4000>;
+ interrupts = <14 4 5>;
+ clocks = <&ssc0_clk>;
+ clock-names = "pclk";
+};
+
+- DMA transfer:
+ssc0: ssc@f0010000 {
+ compatible = "atmel,at91sam9g45-ssc";
+ reg = <0xf0010000 0x4000>;
+ interrupts = <28 4 5>;
+ dmas = <&dma0 1 13>,
+ <&dma0 1 14>;
+ dma-names = "tx", "rx";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ssc0_tx &pinctrl_ssc0_rx>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/misc/bmp085.txt b/Documentation/devicetree/bindings/misc/bmp085.txt
new file mode 100644
index 000000000..d7a6deb6b
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/bmp085.txt
@@ -0,0 +1,24 @@
+BMP085/BMP18x digital pressure sensors
+
+Required properties:
+- compatible: bosch,bmp085
+
+Optional properties:
+- chip-id: configurable chip id for non-default chip revisions
+- temp-measurement-period: temperature measurement period (milliseconds)
+- default-oversampling: default oversampling value to be used at startup,
+ value range is 0-3 with rising sensitivity.
+- interrupt-parent: should be the phandle for the interrupt controller
+- interrupts: interrupt mapping for IRQ
+
+Example:
+
+pressure@77 {
+ compatible = "bosch,bmp085";
+ reg = <0x77>;
+ chip-id = <10>;
+ temp-measurement-period = <100>;
+ default-oversampling = <2>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <25 IRQ_TYPE_EDGE_RISING>;
+};
diff --git a/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt b/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt
new file mode 100644
index 000000000..6c9f176f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/brcm,kona-smc.txt
@@ -0,0 +1,15 @@
+Broadcom Secure Monitor Bounce buffer
+-----------------------------------------------------
+This binding defines the location of the bounce buffer
+used for non-secure to secure communications.
+
+Required properties:
+- compatible : "brcm,kona-smc"
+- DEPRECATED: compatible : "bcm,kona-smc"
+- reg : Location and size of bounce buffer
+
+Example:
+ smc@0x3404c000 {
+ compatible = "brcm,bcm11351-smc", "brcm,kona-smc";
+ reg = <0x3404c000 0x400>; //1 KiB in SRAM
+ };
diff --git a/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
new file mode 100644
index 000000000..c7a26ca8d
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
@@ -0,0 +1,40 @@
+* Freescale Management Complex
+
+The Freescale Management Complex (fsl-mc) is a hardware resource
+manager that manages specialized hardware objects used in
+network-oriented packet processing applications. After the fsl-mc
+block is enabled, pools of hardware resources are available, such as
+queues, buffer pools, I/O interfaces. These resources are building
+blocks that can be used to create functional hardware objects/devices
+such as network interfaces, crypto accelerator instances, L2 switches,
+etc.
+
+Required properties:
+
+ - compatible
+ Value type: <string>
+ Definition: Must be "fsl,qoriq-mc". A Freescale Management Complex
+ compatible with this binding must have Block Revision
+ Registers BRR1 and BRR2 at offset 0x0BF8 and 0x0BFC in
+ the MC control register region.
+
+ - reg
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies one or two regions
+ defining the MC's registers:
+
+ -the first region is the command portal for the
+ this machine and must always be present
+
+ -the second region is the MC control registers. This
+ region may not be present in some scenarios, such
+ as in the device tree presented to a virtual machine.
+
+Example:
+
+ fsl_mc: fsl-mc@80c000000 {
+ compatible = "fsl,qoriq-mc";
+ reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
+ <0x00000000 0x08340000 0 0x40000>; /* MC control reg */
+ };
+
diff --git a/Documentation/devicetree/bindings/misc/ifm-csi.txt b/Documentation/devicetree/bindings/misc/ifm-csi.txt
new file mode 100644
index 000000000..5bdfffb0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/ifm-csi.txt
@@ -0,0 +1,41 @@
+IFM camera sensor interface on mpc5200 LocalPlus bus
+
+Required properties:
+- compatible: "ifm,o2d-csi"
+- reg: specifies sensor chip select number and associated address range
+- interrupts: external interrupt line number and interrupt sense mode
+ of the interrupt line signaling frame valid events
+- gpios: three gpio-specifiers for "capture", "reset" and "master enable"
+ GPIOs (strictly in this order).
+- ifm,csi-clk-handle: the phandle to a node in the DT describing the sensor
+ clock generator. This node is usually a general purpose timer controller.
+- ifm,csi-addr-bus-width: address bus width (valid values are 16, 24, 25)
+- ifm,csi-data-bus-width: data bus width (valid values are 8 and 16)
+- ifm,csi-wait-cycles: sensor bus wait cycles
+
+Optional properties:
+- ifm,csi-byte-swap: if this property is present, the byte swapping on
+ the bus will be enabled.
+
+Example:
+
+ csi@3,0 {
+ compatible = "ifm,o2d-csi";
+ reg = <3 0 0x00100000>; /* CS 3, 1 MiB range */
+ interrupts = <1 1 2>; /* IRQ1, edge falling */
+
+ ifm,csi-clk-handle = <&timer7>;
+ gpios = <&gpio_simple 23 0 /* image_capture */
+ &gpio_simple 26 0 /* image_reset */
+ &gpio_simple 29 0>; /* image_master_en */
+
+ ifm,csi-addr-bus-width = <24>;
+ ifm,csi-data-bus-width = <8>;
+ ifm,csi-wait-cycles = <0>;
+ };
+
+The base address of the used chip select is specified in the
+ranges property of the parent localbus node, for example:
+
+ ranges = <0 0 0xff000000 0x01000000
+ 3 0 0xe3000000 0x00100000>;
diff --git a/Documentation/devicetree/bindings/misc/lis302.txt b/Documentation/devicetree/bindings/misc/lis302.txt
new file mode 100644
index 000000000..2a19bff96
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/lis302.txt
@@ -0,0 +1,119 @@
+LIS302 accelerometer devicetree bindings
+
+This device is matched via its bus drivers, and has a number of properties
+that apply in on the generic device (independent from the bus).
+
+
+Required properties for the SPI bindings:
+ - compatible: should be set to "st,lis3lv02d_spi"
+ - reg: the chipselect index
+ - spi-max-frequency: maximal bus speed, should be set to 1000000 unless
+ constrained by external circuitry
+ - interrupts: the interrupt generated by the device
+
+Required properties for the I2C bindings:
+ - compatible: should be set to "st,lis3lv02d"
+ - reg: i2c slave address
+ - Vdd-supply: The input supply for Vdd
+ - Vdd_IO-supply: The input supply for Vdd_IO
+
+
+Optional properties for all bus drivers:
+
+ - st,click-single-{x,y,z}: if present, tells the device to issue an
+ interrupt on single click events on the
+ x/y/z axis.
+ - st,click-double-{x,y,z}: if present, tells the device to issue an
+ interrupt on double click events on the
+ x/y/z axis.
+ - st,click-thresh-{x,y,z}: set the x/y/z axis threshold
+ - st,click-click-time-limit: click time limit, from 0 to 127.5msec
+ with step of 0.5 msec
+ - st,click-latency: click latency, from 0 to 255 msec with
+ step of 1 msec.
+ - st,click-window: click window, from 0 to 255 msec with
+ step of 1 msec.
+ - st,irq{1,2}-disable: disable IRQ 1/2
+ - st,irq{1,2}-ff-wu-1: raise IRQ 1/2 on FF_WU_1 condition
+ - st,irq{1,2}-ff-wu-2: raise IRQ 1/2 on FF_WU_2 condition
+ - st,irq{1,2}-data-ready: raise IRQ 1/2 on data ready contition
+ - st,irq{1,2}-click: raise IRQ 1/2 on click condition
+ - st,irq-open-drain: consider IRQ lines open-drain
+ - st,irq-active-low: make IRQ lines active low
+ - st,wu-duration-1: duration register for Free-Fall/Wake-Up
+ interrupt 1
+ - st,wu-duration-2: duration register for Free-Fall/Wake-Up
+ interrupt 2
+ - st,wakeup-{x,y,z}-{lo,hi}: set wakeup condition on x/y/z axis for
+ upper/lower limit
+ - st,wakeup-threshold: set wakeup threshold
+ - st,wakeup2-{x,y,z}-{lo,hi}: set wakeup condition on x/y/z axis for
+ upper/lower limit for second wakeup
+ engine.
+ - st,wakeup2-threshold: set wakeup threshold for second wakeup
+ engine.
+ - st,highpass-cutoff-hz=: 1, 2, 4 or 8 for 1Hz, 2Hz, 4Hz or 8Hz of
+ highpass cut-off frequency
+ - st,hipass{1,2}-disable: disable highpass 1/2.
+ - st,default-rate=: set the default rate
+ - st,axis-{x,y,z}=: set the axis to map to the three coordinates.
+ Negative values can be used for inverted axis.
+ - st,{min,max}-limit-{x,y,z} set the min/max limits for x/y/z axis
+ (used by self-test)
+
+
+Example for a SPI device node:
+
+ lis302@0 {
+ compatible = "st,lis302dl-spi";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ interrupt-parent = <&gpio>;
+ interrupts = <104 0>;
+
+ st,click-single-x;
+ st,click-single-y;
+ st,click-single-z;
+ st,click-thresh-x = <10>;
+ st,click-thresh-y = <10>;
+ st,click-thresh-z = <10>;
+ st,irq1-click;
+ st,irq2-click;
+ st,wakeup-x-lo;
+ st,wakeup-x-hi;
+ st,wakeup-y-lo;
+ st,wakeup-y-hi;
+ st,wakeup-z-lo;
+ st,wakeup-z-hi;
+ };
+
+Example for a I2C device node:
+
+ lis331dlh: lis331dlh@18 {
+ compatible = "st,lis331dlh", "st,lis3lv02d";
+ reg = <0x18>;
+ Vdd-supply = <&lis3_reg>;
+ Vdd_IO-supply = <&lis3_reg>;
+
+ st,click-single-x;
+ st,click-single-y;
+ st,click-single-z;
+ st,click-thresh-x = <10>;
+ st,click-thresh-y = <10>;
+ st,click-thresh-z = <10>;
+ st,irq1-click;
+ st,irq2-click;
+ st,wakeup-x-lo;
+ st,wakeup-x-hi;
+ st,wakeup-y-lo;
+ st,wakeup-y-hi;
+ st,wakeup-z-lo;
+ st,wakeup-z-hi;
+ st,min-limit-x = <120>;
+ st,min-limit-y = <120>;
+ st,min-limit-z = <140>;
+ st,max-limit-x = <550>;
+ st,max-limit-y = <550>;
+ st,max-limit-z = <750>;
+ };
+
diff --git a/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt b/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt
new file mode 100644
index 000000000..47b205cc9
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/nvidia,tegra20-apbmisc.txt
@@ -0,0 +1,12 @@
+NVIDIA Tegra20/Tegra30/Tegr114/Tegra124 apbmisc block
+
+Required properties:
+- compatible : For Tegra20, must be "nvidia,tegra20-apbmisc". For Tegra30,
+ must be "nvidia,tegra30-apbmisc". Otherwise, must contain
+ "nvidia,<chip>-apbmisc", plus one of the above, where <chip> is tegra114,
+ tegra124, tegra132.
+- reg: Should contain 2 entries: the first entry gives the physical address
+ and length of the registers which contain revision and debug features.
+ The second entry gives the physical address and length of the
+ registers indicating the strapping options.
+
diff --git a/Documentation/devicetree/bindings/misc/sram.txt b/Documentation/devicetree/bindings/misc/sram.txt
new file mode 100644
index 000000000..36cbe5aea
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/sram.txt
@@ -0,0 +1,51 @@
+Generic on-chip SRAM
+
+Simple IO memory regions to be managed by the genalloc API.
+
+Required properties:
+
+- compatible : mmio-sram
+
+- reg : SRAM iomem address range
+
+Reserving sram areas:
+---------------------
+
+Each child of the sram node specifies a region of reserved memory. Each
+child node should use a 'reg' property to specify a specific range of
+reserved memory.
+
+Following the generic-names recommended practice, node names should
+reflect the purpose of the node. Unit address (@<address>) should be
+appended to the name.
+
+Required properties in the sram node:
+
+- #address-cells, #size-cells : should use the same values as the root node
+- ranges : standard definition, should translate from local addresses
+ within the sram to bus addresses
+
+Required properties in the area nodes:
+
+- reg : iomem address range, relative to the SRAM range
+
+Optional properties in the area nodes:
+
+- compatible : standard definition, should contain a vendor specific string
+ in the form <vendor>,[<device>-]<usage>
+
+Example:
+
+sram: sram@5c000000 {
+ compatible = "mmio-sram";
+ reg = <0x5c000000 0x40000>; /* 256 KiB SRAM at address 0x5c000000 */
+
+ #adress-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x5c000000 0x40000>;
+
+ smp-sram@100 {
+ compatible = "socvendor,smp-sram";
+ reg = <0x100 0x50>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/misc/ti,dac7512.txt b/Documentation/devicetree/bindings/misc/ti,dac7512.txt
new file mode 100644
index 000000000..1db45939d
--- /dev/null
+++ b/Documentation/devicetree/bindings/misc/ti,dac7512.txt
@@ -0,0 +1,20 @@
+TI DAC7512 DEVICETREE BINDINGS
+
+Required properties:
+
+ - "compatible" Must be set to "ti,dac7512"
+
+Property rules described in Documentation/devicetree/bindings/spi/spi-bus.txt
+apply. In particular, "reg" and "spi-max-frequency" properties must be given.
+
+
+Example:
+
+ spi_master {
+ dac7512: dac7512@0 {
+ compatible = "ti,dac7512";
+ reg = <0>; /* CS0 */
+ spi-max-frequency = <1000000>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
new file mode 100644
index 000000000..98ee2abbe
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.txt
@@ -0,0 +1,27 @@
+Device Tree Bindings for the Arasan SDHCI Controller
+
+ The bindings follow the mmc[1], clock[2] and interrupt[3] bindings. Only
+ deviations are documented here.
+
+ [1] Documentation/devicetree/bindings/mmc/mmc.txt
+ [2] Documentation/devicetree/bindings/clock/clock-bindings.txt
+ [3] Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+
+Required Properties:
+ - compatible: Compatibility string. Must be 'arasan,sdhci-8.9a'
+ - reg: From mmc bindings: Register location and length.
+ - clocks: From clock bindings: Handles to clock inputs.
+ - clock-names: From clock bindings: Tuple including "clk_xin" and "clk_ahb"
+ - interrupts: Interrupt specifier
+ - interrupt-parent: Phandle for the interrupt controller that services
+ interrupts for this device.
+
+Example:
+ sdhci@e0100000 {
+ compatible = "arasan,sdhci-8.9a";
+ reg = <0xe0100000 0x1000>;
+ clock-names = "clk_xin", "clk_ahb";
+ clocks = <&clkc 21>, <&clkc 32>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 24 4>;
+ } ;
diff --git a/Documentation/devicetree/bindings/mmc/atmel-hsmci.txt b/Documentation/devicetree/bindings/mmc/atmel-hsmci.txt
new file mode 100644
index 000000000..07ad02075
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/atmel-hsmci.txt
@@ -0,0 +1,73 @@
+* Atmel High Speed MultiMedia Card Interface
+
+This controller on atmel products provides an interface for MMC, SD and SDIO
+types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the atmel-mci driver.
+
+1) MCI node
+
+Required properties:
+- compatible: should be "atmel,hsmci"
+- #address-cells: should be one. The cell is the slot id.
+- #size-cells: should be zero.
+- at least one slot node
+- clock-names: tuple listing input clock names.
+ Required elements: "mci_clk"
+- clocks: phandles to input clocks.
+
+The node contains child nodes for each slot that the platform uses
+
+Example MCI node:
+
+mmc0: mmc@f0008000 {
+ compatible = "atmel,hsmci";
+ reg = <0xf0008000 0x600>;
+ interrupts = <12 4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clock-names = "mci_clk";
+ clocks = <&mci0_clk>;
+
+ [ child node definitions...]
+};
+
+2) slot nodes
+
+Required properties:
+- reg: should contain the slot id.
+- bus-width: number of data lines connected to the controller
+
+Optional properties:
+- cd-gpios: specify GPIOs for card detection
+- cd-inverted: invert the value of external card detect gpio line
+- wp-gpios: specify GPIOs for write protection
+
+Example slot node:
+
+slot@0 {
+ reg = <0>;
+ bus-width = <4>;
+ cd-gpios = <&pioD 15 0>
+ cd-inverted;
+};
+
+Example full MCI node:
+mmc0: mmc@f0008000 {
+ compatible = "atmel,hsmci";
+ reg = <0xf0008000 0x600>;
+ interrupts = <12 4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ slot@0 {
+ reg = <0>;
+ bus-width = <4>;
+ cd-gpios = <&pioD 15 0>
+ cd-inverted;
+ };
+ slot@1 {
+ reg = <1>;
+ bus-width = <4>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhci.txt b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhci.txt
new file mode 100644
index 000000000..59476fbdb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/brcm,bcm2835-sdhci.txt
@@ -0,0 +1,18 @@
+Broadcom BCM2835 SDHCI controller
+
+This file documents differences between the core properties described
+by mmc.txt and the properties that represent the BCM2835 controller.
+
+Required properties:
+- compatible : Should be "brcm,bcm2835-sdhci".
+- clocks : The clock feeding the SDHCI controller.
+
+Example:
+
+sdhci: sdhci {
+ compatible = "brcm,bcm2835-sdhci";
+ reg = <0x7e300000 0x100>;
+ interrupts = <2 30>;
+ clocks = <&clk_mmc>;
+ bus-width = <4>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
new file mode 100644
index 000000000..aaba2483b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/brcm,kona-sdhci.txt
@@ -0,0 +1,21 @@
+Broadcom BCM281xx SDHCI
+
+This file documents differences between the core properties in mmc.txt
+and the properties present in the bcm281xx SDHCI
+
+Required properties:
+- compatible : Should be "brcm,kona-sdhci"
+- DEPRECATED: compatible : Should be "bcm,kona-sdhci"
+- clocks: phandle + clock specifier pair of the external clock
+
+Refer to clocks/clock-bindings.txt for generic clock consumer properties.
+
+Example:
+
+sdio2: sdio@0x3f1a0000 {
+ compatible = "brcm,kona-sdhci";
+ reg = <0x3f1a0000 0x10000>;
+ clocks = <&sdio3_clk>;
+ interrupts = <0x0 74 0x4>;
+};
+
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
new file mode 100644
index 000000000..72cc9cc95
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-iproc.txt
@@ -0,0 +1,23 @@
+Broadcom IPROC SDHCI controller
+
+This file documents differences between the core properties described
+by mmc.txt and the properties that represent the IPROC SDHCI controller.
+
+Required properties:
+- compatible : Should be "brcm,sdhci-iproc-cygnus".
+- clocks : The clock feeding the SDHCI controller.
+
+Optional properties:
+ - sdhci,auto-cmd12: specifies that controller should use auto CMD12.
+
+Example:
+
+sdhci0: sdhci@0x18041000 {
+ compatible = "brcm,sdhci-iproc-cygnus";
+ reg = <0x18041000 0x100>;
+ interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&lcpll0_clks BCM_CYGNUS_LCPLL0_SDIO_CLK>;
+ bus-width = <4>;
+ sdhci,auto-cmd12;
+ no-1-8-v;
+};
diff --git a/Documentation/devicetree/bindings/mmc/davinci_mmc.txt b/Documentation/devicetree/bindings/mmc/davinci_mmc.txt
new file mode 100644
index 000000000..e5a0140b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/davinci_mmc.txt
@@ -0,0 +1,33 @@
+* TI Highspeed MMC host controller for DaVinci
+
+The Highspeed MMC Host Controller on TI DaVinci family
+provides an interface for MMC, SD and SDIO types of memory cards.
+
+This file documents the properties used by the davinci_mmc driver.
+
+Required properties:
+- compatible:
+ Should be "ti,da830-mmc": for da830, da850, dm365
+ Should be "ti,dm355-mmc": for dm355, dm644x
+
+Optional properties:
+- bus-width: Number of data lines, can be <1>, <4>, or <8>, default <1>
+- max-frequency: Maximum operating clock frequency, default 25MHz.
+- dmas: List of DMA specifiers with the controller specific format
+ as described in the generic DMA client binding. A tx and rx
+ specifier is required.
+- dma-names: RX and TX DMA request names. These strings correspond
+ 1:1 with the DMA specifiers listed in dmas.
+
+Example:
+mmc0: mmc@1c40000 {
+ compatible = "ti,da830-mmc",
+ reg = <0x40000 0x1000>;
+ interrupts = <16>;
+ status = "okay";
+ bus-width = <4>;
+ max-frequency = <50000000>;
+ dmas = <&edma 16
+ &edma 17>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
new file mode 100644
index 000000000..aad984427
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
@@ -0,0 +1,93 @@
+* Samsung Exynos specific extensions to the Synopsys Designware Mobile
+ Storage Host Controller
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Samsung Exynos specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be
+ - "samsung,exynos4210-dw-mshc": for controllers with Samsung Exynos4210
+ specific extensions.
+ - "samsung,exynos4412-dw-mshc": for controllers with Samsung Exynos4412
+ specific extensions.
+ - "samsung,exynos5250-dw-mshc": for controllers with Samsung Exynos5250
+ specific extensions.
+ - "samsung,exynos5420-dw-mshc": for controllers with Samsung Exynos5420
+ specific extensions.
+ - "samsung,exynos7-dw-mshc": for controllers with Samsung Exynos7
+ specific extensions.
+ - "samsung,exynos7-dw-mshc-smu": for controllers with Samsung Exynos7
+ specific extensions having an SMU.
+
+* samsung,dw-mshc-ciu-div: Specifies the divider value for the card interface
+ unit (ciu) clock. This property is applicable only for Exynos5 SoC's and
+ ignored for Exynos4 SoC's. The valid range of divider value is 0 to 7.
+
+* samsung,dw-mshc-sdr-timing: Specifies the value of CIU clock phase shift value
+ in transmit mode and CIU clock phase shift value in receive mode for single
+ data rate mode operation. Refer notes below for the order of the cells and the
+ valid values.
+
+* samsung,dw-mshc-ddr-timing: Specifies the value of CUI clock phase shift value
+ in transmit mode and CIU clock phase shift value in receive mode for double
+ data rate mode operation. Refer notes below for the order of the cells and the
+ valid values.
+* samsung,dw-mshc-hs400-timing: Specifies the value of CIU TX and RX clock phase
+ shift value for hs400 mode operation.
+
+ Notes for the sdr-timing and ddr-timing values:
+
+ The order of the cells should be
+ - First Cell: CIU clock phase shift value for tx mode.
+ - Second Cell: CIU clock phase shift value for rx mode.
+
+ Valid values for SDR and DDR CIU clock timing for Exynos5250:
+ - valid value for tx phase shift and rx phase shift is 0 to 7.
+ - when CIU clock divider value is set to 3, all possible 8 phase shift
+ values can be used.
+ - if CIU clock divider value is 0 (that is divide by 1), both tx and rx
+ phase shift clocks should be 0.
+
+* samsung,read-strobe-delay: RCLK (Data strobe) delay to control HS400 mode
+ (Latency value for delay line in Read path)
+
+Required properties for a slot (Deprecated - Recommend to use one slot per host):
+
+* gpios: specifies a list of gpios used for command, clock and data bus. The
+ first gpio is the command line and the second gpio is the clock line. The
+ rest of the gpios (depending on the bus-width property) are the data lines in
+ no particular order. The format of the gpio specifier depends on the gpio
+ controller.
+(Deprecated - Refer to Documentation/devicetree/binding/pinctrl/samsung-pinctrl.txt)
+
+Example:
+
+ The MSHC controller node can be split into two portions, SoC specific and
+ board specific portions as listed below.
+
+ dwmmc0@12200000 {
+ compatible = "samsung,exynos5250-dw-mshc";
+ reg = <0x12200000 0x1000>;
+ interrupts = <0 75 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ dwmmc0@12200000 {
+ num-slots = <1>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
+ broken-cd;
+ fifo-depth = <0x80>;
+ card-detect-delay = <200>;
+ samsung,dw-mshc-ciu-div = <3>;
+ samsung,dw-mshc-sdr-timing = <2 3>;
+ samsung,dw-mshc-ddr-timing = <1 2>;
+ samsung,dw-mshc-hs400-timing = <0 2>;
+ samsung,read-strobe-delay = <90>;
+ bus-width = <8>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt
new file mode 100644
index 000000000..b7943f3f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/fsl-esdhc.txt
@@ -0,0 +1,36 @@
+* Freescale Enhanced Secure Digital Host Controller (eSDHC)
+
+The Enhanced Secure Digital Host Controller provides an interface
+for MMC, SD, and SDIO types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the sdhci-esdhc driver.
+
+Required properties:
+ - interrupt-parent : interrupt source phandle.
+ - clock-frequency : specifies eSDHC base clock frequency.
+
+Optional properties:
+ - sdhci,wp-inverted : specifies that eSDHC controller reports
+ inverted write-protect state; New devices should use the generic
+ "wp-inverted" property.
+ - sdhci,1-bit-only : specifies that a controller can only handle
+ 1-bit data transfers. New devices should use the generic
+ "bus-width = <1>" property.
+ - sdhci,auto-cmd12: specifies that a controller can only handle auto
+ CMD12.
+ - voltage-ranges : two cells are required, first cell specifies minimum
+ slot voltage (mV), second cell specifies maximum slot voltage (mV).
+ Several ranges could be specified.
+
+Example:
+
+sdhci@2e000 {
+ compatible = "fsl,mpc8378-esdhc", "fsl,esdhc";
+ reg = <0x2e000 0x1000>;
+ interrupts = <42 0x8>;
+ interrupt-parent = <&ipic>;
+ /* Filled in by U-Boot */
+ clock-frequency = <0>;
+ voltage-ranges = <3300 3300>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
new file mode 100644
index 000000000..415c5575c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-esdhc.txt
@@ -0,0 +1,41 @@
+* Freescale Enhanced Secure Digital Host Controller (eSDHC) for i.MX
+
+The Enhanced Secure Digital Host Controller on Freescale i.MX family
+provides an interface for MMC, SD, and SDIO types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the sdhci-esdhc-imx driver.
+
+Required properties:
+- compatible : Should be "fsl,<chip>-esdhc"
+
+Optional properties:
+- fsl,cd-controller : Indicate to use controller internal card detection
+- fsl,wp-controller : Indicate to use controller internal write protection
+- fsl,delay-line : Specify the number of delay cells for override mode.
+ This is used to set the clock delay for DLL(Delay Line) on override mode
+ to select a proper data sampling window in case the clock quality is not good
+ due to signal path is too long on the board. Please refer to eSDHC/uSDHC
+ chapter, DLL (Delay Line) section in RM for details.
+- voltage-ranges : Specify the voltage range in case there are software
+ transparent level shifters on the outputs of the controller. Two cells are
+ required, first cell specifies minimum slot voltage (mV), second cell
+ specifies maximum slot voltage (mV). Several ranges could be specified.
+
+Examples:
+
+esdhc@70004000 {
+ compatible = "fsl,imx51-esdhc";
+ reg = <0x70004000 0x4000>;
+ interrupts = <1>;
+ fsl,cd-controller;
+ fsl,wp-controller;
+};
+
+esdhc@70008000 {
+ compatible = "fsl,imx51-esdhc";
+ reg = <0x70008000 0x4000>;
+ interrupts = <2>;
+ cd-gpios = <&gpio1 6 0>; /* GPIO1_6 */
+ wp-gpios = <&gpio1 5 0>; /* GPIO1_5 */
+};
diff --git a/Documentation/devicetree/bindings/mmc/fsl-imx-mmc.txt b/Documentation/devicetree/bindings/mmc/fsl-imx-mmc.txt
new file mode 100644
index 000000000..db442355c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/fsl-imx-mmc.txt
@@ -0,0 +1,24 @@
+* Freescale Secure Digital Host Controller for i.MX2/3 series
+
+This file documents differences to the properties defined in mmc.txt.
+
+Required properties:
+- compatible : Should be "fsl,<chip>-mmc", chip can be imx21 or imx31
+
+Optional properties:
+- dmas: One DMA phandle with arguments as defined by the devicetree bindings
+ of the used DMA controller.
+- dma-names: Has to be "rx-tx".
+
+Example:
+
+sdhci1: sdhci@10014000 {
+ compatible = "fsl,imx27-mmc", "fsl,imx21-mmc";
+ reg = <0x10014000 0x1000>;
+ interrupts = <11>;
+ dmas = <&dma 7>;
+ dma-names = "rx-tx";
+ bus-width = <4>;
+ cd-gpios = <&gpio3 29>;
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/mmc/img-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/img-dw-mshc.txt
new file mode 100644
index 000000000..85de99fca
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/img-dw-mshc.txt
@@ -0,0 +1,29 @@
+* Imagination specific extensions to the Synopsys Designware Mobile Storage
+ Host Controller
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Imagination specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be
+ - "img,pistachio-dw-mshc": for Pistachio SoCs
+
+Example:
+
+ mmc@18142000 {
+ compatible = "img,pistachio-dw-mshc";
+ reg = <0x18142000 0x400>;
+ interrupts = <GIC_SHARED 39 IRQ_TYPE_LEVEL_HIGH>;
+
+ clocks = <&system_clk>, <&sdhost_clk>;
+ clock-names = "biu", "ciu";
+
+ fifo-depth = <0x20>;
+ bus-width = <4>;
+ num-slots = <1>;
+ disable-wp;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
new file mode 100644
index 000000000..3b3544931
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
@@ -0,0 +1,44 @@
+* Hisilicon specific extensions to the Synopsys Designware Mobile
+ Storage Host Controller
+
+Read synopsys-dw-mshc.txt for more details
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Hisilicon specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be one of the following.
+ - "hisilicon,hi4511-dw-mshc": for controllers with hi4511 specific extensions.
+
+Example:
+
+ /* for Hi3620 */
+
+ /* SoC portion */
+ dwmmc_0: dwmmc0@fcd03000 {
+ compatible = "hisilicon,hi4511-dw-mshc";
+ reg = <0xfcd03000 0x1000>;
+ interrupts = <0 16 4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&mmc_clock HI3620_SD_CIUCLK>, <&clock HI3620_DDRC_PER_CLK>;
+ clock-names = "ciu", "biu";
+ };
+
+ /* Board portion */
+ dwmmc0@fcd03000 {
+ num-slots = <1>;
+ vmmc-supply = <&ldo12>;
+ fifo-depth = <0x100>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sd_pmx_pins &sd_cfg_func1 &sd_cfg_func2>;
+ bus-width = <4>;
+ disable-wp;
+ cd-gpios = <&gpio10 3 0>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/mmc-card.txt b/Documentation/devicetree/bindings/mmc/mmc-card.txt
new file mode 100644
index 000000000..a70fcd65b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc-card.txt
@@ -0,0 +1,31 @@
+mmc-card / eMMC bindings
+------------------------
+
+This documents describes the devicetree bindings for a mmc-host controller
+child node describing a mmc-card / an eMMC, see "Use of Function subnodes"
+in mmc.txt
+
+Required properties:
+-compatible : Must be "mmc-card"
+-reg : Must be <0>
+
+Optional properties:
+-broken-hpi : Use this to indicate that the mmc-card has a broken hpi
+ implementation, and that hpi should not be used
+
+Example:
+
+&mmc2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc2_pins_a>;
+ vmmc-supply = <&reg_vcc3v3>;
+ bus-width = <8>;
+ non-removable;
+ status = "okay";
+
+ mmccard: mmccard@0 {
+ reg = <0>;
+ compatible = "mmc-card";
+ broken-hpi;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.txt
new file mode 100644
index 000000000..0cb827bf9
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.txt
@@ -0,0 +1,25 @@
+* The simple eMMC hardware reset provider
+
+The purpose of this driver is to perform standard eMMC hw reset
+procedure, as descibed by Jedec 4.4 specification. This procedure is
+performed just after MMC core enabled power to the given mmc host (to
+fix possible issues if bootloader has left eMMC card in initialized or
+unknown state), and before performing complete system reboot (also in
+case of emergency reboot call). The latter is needed on boards, which
+doesn't have hardware reset logic connected to emmc card and (limited or
+broken) ROM bootloaders are unable to read second stage from the emmc
+card if the card is left in unknown or already initialized state.
+
+Required properties:
+- compatible : contains "mmc-pwrseq-emmc".
+- reset-gpios : contains a GPIO specifier. The reset GPIO is asserted
+ and then deasserted to perform eMMC card reset. To perform
+ reset procedure as described in Jedec 4.4 specification, the
+ gpio line should be defined as GPIO_ACTIVE_LOW.
+
+Example:
+
+ sdhci0_pwrseq {
+ compatible = "mmc-pwrseq-emmc";
+ reset-gpios = <&gpio1 12 GPIO_ACTIVE_LOW>;
+ }
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
new file mode 100644
index 000000000..a462c50f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.txt
@@ -0,0 +1,25 @@
+* The simple MMC power sequence provider
+
+The purpose of the simple MMC power sequence provider is to supports a set of
+common properties between various SOC designs. It thus enables us to use the
+same provider for several SOC designs.
+
+Required properties:
+- compatible : contains "mmc-pwrseq-simple".
+
+Optional properties:
+- reset-gpios : contains a list of GPIO specifiers. The reset GPIOs are asserted
+ at initialization and prior we start the power up procedure of the card.
+ They will be de-asserted right after the power has been provided to the
+ card.
+- clocks : Must contain an entry for the entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entry:
+ "ext_clock" (External clock provided to the card).
+
+Example:
+
+ sdhci0_pwrseq {
+ compatible = "mmc-pwrseq-simple";
+ reset-gpios = <&gpio1 12 0>;
+ }
diff --git a/Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt b/Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt
new file mode 100644
index 000000000..0e5e2ec40
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc-spi-slot.txt
@@ -0,0 +1,31 @@
+MMC/SD/SDIO slot directly connected to a SPI bus
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the mmc_spi driver.
+
+Required properties:
+- spi-max-frequency : maximum frequency for this device (Hz).
+- voltage-ranges : two cells are required, first cell specifies minimum
+ slot voltage (mV), second cell specifies maximum slot voltage (mV).
+ Several ranges could be specified.
+
+Optional properties:
+- gpios : may specify GPIOs in this order: Card-Detect GPIO,
+ Write-Protect GPIO. Note that this does not follow the
+ binding from mmc.txt, for historical reasons.
+- interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Example:
+
+ mmc-slot@0 {
+ compatible = "fsl,mpc8323rdb-mmc-slot",
+ "mmc-spi-slot";
+ reg = <0>;
+ gpios = <&qe_pio_d 14 1
+ &qe_pio_d 15 0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <50000000>;
+ interrupts = <42>;
+ interrupt-parent = <&PIC>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
new file mode 100644
index 000000000..438899e88
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -0,0 +1,140 @@
+These properties are common to multiple MMC host controllers. Any host
+that requires the respective functionality should implement them using
+these definitions.
+
+Interpreted by the OF core:
+- reg: Registers location and length.
+- interrupts: Interrupts used by the MMC controller.
+
+Card detection:
+If no property below is supplied, host native card detect is used.
+Only one of the properties in this section should be supplied:
+ - broken-cd: There is no card detection available; polling must be used.
+ - cd-gpios: Specify GPIOs for card detection, see gpio binding
+ - non-removable: non-removable slot (like eMMC); assume always present.
+
+Optional properties:
+- bus-width: Number of data lines, can be <1>, <4>, or <8>. The default
+ will be <1> if the property is absent.
+- wp-gpios: Specify GPIOs for write protection, see gpio binding
+- cd-inverted: when present, polarity on the CD line is inverted. See the note
+ below for the case, when a GPIO is used for the CD line
+- wp-inverted: when present, polarity on the WP line is inverted. See the note
+ below for the case, when a GPIO is used for the WP line
+- max-frequency: maximum operating clock frequency
+- no-1-8-v: when present, denotes that 1.8v card voltage is not supported on
+ this system, even if the controller claims it is.
+- cap-sd-highspeed: SD high-speed timing is supported
+- cap-mmc-highspeed: MMC high-speed timing is supported
+- sd-uhs-sdr12: SD UHS SDR12 speed is supported
+- sd-uhs-sdr25: SD UHS SDR25 speed is supported
+- sd-uhs-sdr50: SD UHS SDR50 speed is supported
+- sd-uhs-sdr104: SD UHS SDR104 speed is supported
+- sd-uhs-ddr50: SD UHS DDR50 speed is supported
+- cap-power-off-card: powering off the card is safe
+- cap-sdio-irq: enable SDIO IRQ signalling on this interface
+- full-pwr-cycle: full power cycle of the card is supported
+- mmc-ddr-1_8v: eMMC high-speed DDR mode(1.8V I/O) is supported
+- mmc-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
+- mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported
+- mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
+- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
+- mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported
+- dsr: Value the card's (optional) Driver Stage Register (DSR) should be
+ programmed with. Valid range: [0 .. 0xffff].
+
+*NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
+polarity properties, we have to fix the meaning of the "normal" and "inverted"
+line levels. We choose to follow the SDHCI standard, which specifies both those
+lines as "active low." Therefore, using the "cd-inverted" property means, that
+the CD line is active high, i.e. it is high, when a card is inserted. Similar
+logic applies to the "wp-inverted" property.
+
+CD and WP lines can be implemented on the hardware in one of two ways: as GPIOs,
+specified in cd-gpios and wp-gpios properties, or as dedicated pins. Polarity of
+dedicated pins can be specified, using *-inverted properties. GPIO polarity can
+also be specified using the OF_GPIO_ACTIVE_LOW flag. This creates an ambiguity
+in the latter case. We choose to use the XOR logic for GPIO CD and WP lines.
+This means, the two properties are "superimposed," for example leaving the
+OF_GPIO_ACTIVE_LOW flag clear and specifying the respective *-inverted
+property results in a double-inversion and actually means the "normal" line
+polarity is in effect.
+
+Optional SDIO properties:
+- keep-power-in-suspend: Preserves card power during a suspend/resume cycle
+- enable-sdio-wakeup: Enables wake up of host system on SDIO IRQ assertion
+
+
+MMC power sequences:
+--------------------
+
+System on chip designs may specify a specific MMC power sequence. To
+successfully detect an (e)MMC/SD/SDIO card, that power sequence must be
+maintained while initializing the card.
+
+Optional property:
+- mmc-pwrseq: phandle to the MMC power sequence node. See "mmc-pwrseq-*"
+ for documentation of MMC power sequence bindings.
+
+
+Use of Function subnodes
+------------------------
+
+On embedded systems the cards connected to a host may need additional
+properties. These can be specified in subnodes to the host controller node.
+The subnodes are identified by the standard 'reg' property.
+Which information exactly can be specified depends on the bindings for the
+SDIO function driver for the subnode, as specified by the compatible string.
+
+Required host node properties when using function subnodes:
+- #address-cells: should be one. The cell is the slot id.
+- #size-cells: should be zero.
+
+Required function subnode properties:
+- compatible: name of SDIO function following generic names recommended practice
+- reg: Must contain the SDIO function number of the function this subnode
+ describes. A value of 0 denotes the memory SD function, values from
+ 1 to 7 denote the SDIO functions.
+
+
+Examples
+--------
+
+Basic example:
+
+sdhci@ab000000 {
+ compatible = "sdhci";
+ reg = <0xab000000 0x200>;
+ interrupts = <23>;
+ bus-width = <4>;
+ cd-gpios = <&gpio 69 0>;
+ cd-inverted;
+ wp-gpios = <&gpio 70 0>;
+ max-frequency = <50000000>;
+ keep-power-in-suspend;
+ enable-sdio-wakeup;
+ mmc-pwrseq = <&sdhci0_pwrseq>
+}
+
+Example with sdio function subnode:
+
+mmc3: mmc@01c12000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc3_pins_a>;
+ vmmc-supply = <&reg_vmmc3>;
+ bus-width = <4>;
+ non-removable;
+ mmc-pwrseq = <&sdhci0_pwrseq>
+ status = "okay";
+
+ brcmf: bcrmf@1 {
+ reg = <1>;
+ compatible = "brcm,bcm43xx-fmac";
+ interrupt-parent = <&pio>;
+ interrupts = <10 8>; /* PH10 / EINT10 */
+ interrupt-names = "host-wake";
+ };
+};
diff --git a/Documentation/devicetree/bindings/mmc/mmci.txt b/Documentation/devicetree/bindings/mmc/mmci.txt
new file mode 100644
index 000000000..03796cf2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mmci.txt
@@ -0,0 +1,61 @@
+* ARM PrimeCell MultiMedia Card Interface (MMCI) PL180/1
+
+The ARM PrimeCell MMCI PL180 and PL181 provides an interface for
+reading and writing to MultiMedia and SD cards alike.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the mmci driver. Using "st" as
+the prefix for a property, indicates support by the ST Micro variant.
+
+Required properties:
+- compatible : contains "arm,pl18x", "arm,primecell".
+- vmmc-supply : phandle to the regulator device tree node, mentioned
+ as the VCC/VDD supply in the eMMC/SD specs.
+
+Optional properties:
+- arm,primecell-periphid : contains the PrimeCell Peripheral ID, it overrides
+ the ID provided by the HW
+- vqmmc-supply : phandle to the regulator device tree node, mentioned
+ as the VCCQ/VDD_IO supply in the eMMC/SD specs.
+- st,sig-dir-dat0 : bus signal direction pin used for DAT[0].
+- st,sig-dir-dat2 : bus signal direction pin used for DAT[2].
+- st,sig-dir-dat31 : bus signal direction pin used for DAT[3] and DAT[1].
+- st,sig-dir-dat74 : bus signal direction pin used for DAT[4] to DAT[7].
+- st,sig-dir-cmd : cmd signal direction pin used for CMD.
+- st,sig-pin-fbclk : feedback clock signal pin used.
+
+Deprecated properties:
+- mmc-cap-mmc-highspeed : indicates whether MMC is high speed capable.
+- mmc-cap-sd-highspeed : indicates whether SD is high speed capable.
+
+Example:
+
+sdi0_per1@80126000 {
+ compatible = "arm,pl18x", "arm,primecell";
+ reg = <0x80126000 0x1000>;
+ interrupts = <0 60 IRQ_TYPE_LEVEL_HIGH>;
+
+ dmas = <&dma 29 0 0x2>, /* Logical - DevToMem */
+ <&dma 29 0 0x0>; /* Logical - MemToDev */
+ dma-names = "rx", "tx";
+
+ clocks = <&prcc_kclk 1 5>, <&prcc_pclk 1 5>;
+ clock-names = "sdi", "apb_pclk";
+
+ max-frequency = <100000000>;
+ bus-width = <4>;
+ cap-sd-highspeed;
+ cap-mmc-highspeed;
+ cd-gpios = <&gpio2 31 0x4>; // 95
+ st,sig-dir-dat0;
+ st,sig-dir-dat2;
+ st,sig-dir-cmd;
+ st,sig-pin-fbclk;
+
+ vmmc-supply = <&ab8500_ldo_aux3_reg>;
+ vqmmc-supply = <&vmmci>;
+
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&sdi0_default_mode>;
+ pinctrl-1 = <&sdi0_sleep_mode>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt b/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt
new file mode 100644
index 000000000..b63819149
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt
@@ -0,0 +1,30 @@
+MOXA ART MMC Host Controller Interface
+
+ Inherits from mmc binding[1].
+
+ [1] Documentation/devicetree/bindings/mmc/mmc.txt
+
+Required properties:
+
+- compatible : Must be "moxa,moxart-mmc" or "faraday,ftsdc010"
+- reg : Should contain registers location and length
+- interrupts : Should contain the interrupt number
+- clocks : Should contain phandle for the clock feeding the MMC controller
+
+Optional properties:
+
+- dmas : Should contain two DMA channels, line request number must be 5 for
+ both channels
+- dma-names : Must be "tx", "rx"
+
+Example:
+
+ mmc: mmc@98e00000 {
+ compatible = "moxa,moxart-mmc";
+ reg = <0x98e00000 0x5C>;
+ interrupts = <5 0>;
+ clocks = <&clk_apb>;
+ dmas = <&dma 5>,
+ <&dma 5>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/mxs-mmc.txt b/Documentation/devicetree/bindings/mmc/mxs-mmc.txt
new file mode 100644
index 000000000..515addc20
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/mxs-mmc.txt
@@ -0,0 +1,27 @@
+* Freescale MXS MMC controller
+
+The Freescale MXS Synchronous Serial Ports (SSP) can act as a MMC controller
+to support MMC, SD, and SDIO types of memory cards.
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the mxsmmc driver.
+
+Required properties:
+- compatible: Should be "fsl,<chip>-mmc". The supported chips include
+ imx23 and imx28.
+- interrupts: Should contain ERROR interrupt number
+- dmas: DMA specifier, consisting of a phandle to DMA controller node
+ and SSP DMA channel ID.
+ Refer to dma.txt and fsl-mxs-dma.txt for details.
+- dma-names: Must be "rx-tx".
+
+Examples:
+
+ssp0: ssp@80010000 {
+ compatible = "fsl,imx28-mmc";
+ reg = <0x80010000 2000>;
+ interrupts = <96>;
+ dmas = <&dma_apbh 0>;
+ dma-names = "rx-tx";
+ bus-width = <8>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
new file mode 100644
index 000000000..15b8368ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/nvidia,tegra20-sdhci.txt
@@ -0,0 +1,38 @@
+* NVIDIA Tegra Secure Digital Host Controller
+
+This controller on Tegra family SoCs provides an interface for MMC, SD,
+and SDIO types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the sdhci-tegra driver.
+
+Required properties:
+- compatible : For Tegra20, must contain "nvidia,tegra20-sdhci".
+ For Tegra30, must contain "nvidia,tegra30-sdhci". For Tegra114,
+ must contain "nvidia,tegra114-sdhci". For Tegra124, must contain
+ "nvidia,tegra124-sdhci". Otherwise, must contain "nvidia,<chip>-sdhci",
+ plus one of the above, where <chip> is tegra132 or tegra210.
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - sdhci
+
+Optional properties:
+- power-gpios : Specify GPIOs for power control
+
+Example:
+
+sdhci@c8000200 {
+ compatible = "nvidia,tegra20-sdhci";
+ reg = <0xc8000200 0x200>;
+ interrupts = <47>;
+ clocks = <&tegra_car 14>;
+ resets = <&tegra_car 14>;
+ reset-names = "sdhci";
+ cd-gpios = <&gpio 69 0>; /* gpio PI5 */
+ wp-gpios = <&gpio 57 0>; /* gpio PH1 */
+ power-gpios = <&gpio 155 0>; /* gpio PT3 */
+ bus-width = <8>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/orion-sdio.txt b/Documentation/devicetree/bindings/mmc/orion-sdio.txt
new file mode 100644
index 000000000..84f0ebd67
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/orion-sdio.txt
@@ -0,0 +1,17 @@
+* Marvell orion-sdio controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the orion-sdio driver.
+
+- compatible: Should be "marvell,orion-sdio"
+- clocks: reference to the clock of the SDIO interface
+
+Example:
+
+ mvsdio@d00d4000 {
+ compatible = "marvell,orion-sdio";
+ reg = <0xd00d4000 0x200>;
+ interrupts = <54>;
+ clocks = <&gateclk 17>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/pxa-mmc.txt b/Documentation/devicetree/bindings/mmc/pxa-mmc.txt
new file mode 100644
index 000000000..b7025de7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/pxa-mmc.txt
@@ -0,0 +1,25 @@
+* PXA MMC drivers
+
+Driver bindings for the PXA MCI (MMC/SDIO) interfaces
+
+Required properties:
+- compatible: Should be "marvell,pxa-mmc".
+- vmmc-supply: A regulator for VMMC
+
+Optional properties:
+- marvell,detect-delay-ms: sets the detection delay timeout in ms.
+- marvell,gpio-power: GPIO spec for the card power enable pin
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the pxa-mmc driver.
+
+Examples:
+
+mmc0: mmc@41100000 {
+ compatible = "marvell,pxa-mmc";
+ reg = <0x41100000 0x1000>;
+ interrupts = <23>;
+ cd-gpios = <&gpio 23 0>;
+ wp-gpios = <&gpio 24 0>;
+};
+
diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
new file mode 100644
index 000000000..299081f94
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
@@ -0,0 +1,32 @@
+* Renesas Multi Media Card Interface (MMCIF) Controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the MMCIF device.
+
+
+Required properties:
+
+- compatible: must contain one of the following
+ - "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs
+ - "renesas,mmcif-r8a7790" for the MMCIF found in r8a7790 SoCs
+ - "renesas,mmcif-r8a7791" for the MMCIF found in r8a7791 SoCs
+ - "renesas,sh-mmcif" for the generic MMCIF
+
+- clocks: reference to the functional clock
+
+- dmas: reference to the DMA channels, one per channel name listed in the
+ dma-names property.
+- dma-names: must contain "tx" for the transmit DMA channel and "rx" for the
+ receive DMA channel.
+
+
+Example: R8A7790 (R-Car H2) MMCIF0
+
+ mmcif0: mmc@ee200000 {
+ compatible = "renesas,mmcif-r8a7790", "renesas,sh-mmcif";
+ reg = <0 0xee200000 0 0x80>;
+ interrupts = <0 169 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp3_clks R8A7790_CLK_MMCIF0>;
+ dmas = <&dmac0 0xd1>, <&dmac0 0xd2>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
new file mode 100644
index 000000000..c327c2d6f
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/rockchip-dw-mshc.txt
@@ -0,0 +1,25 @@
+* Rockchip specific extensions to the Synopsys Designware Mobile
+ Storage Host Controller
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Rockchip specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be
+ - "rockchip,rk2928-dw-mshc": for Rockchip RK2928 and following,
+ before RK3288
+ - "rockchip,rk3288-dw-mshc": for Rockchip RK3288
+
+Example:
+
+ rkdwmmc0@12200000 {
+ compatible = "rockchip,rk3288-dw-mshc";
+ reg = <0x12200000 0x1000>;
+ interrupts = <0 75 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt b/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt
new file mode 100644
index 000000000..42e0a9afa
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/samsung-sdhci.txt
@@ -0,0 +1,32 @@
+* Samsung's SDHCI Controller device tree bindings
+
+Samsung's SDHCI controller is used as a connectivity interface with external
+MMC, SD and eMMC storage mediums. This file documents differences between the
+core mmc properties described by mmc.txt and the properties used by the
+Samsung implementation of the SDHCI controller.
+
+Required SoC Specific Properties:
+- compatible: should be one of the following
+ - "samsung,s3c6410-sdhci": For controllers compatible with s3c6410 sdhci
+ controller.
+ - "samsung,exynos4210-sdhci": For controllers compatible with Exynos4 sdhci
+ controller.
+
+Required Board Specific Properties:
+- pinctrl-0: Should specify pin control groups used for this controller.
+- pinctrl-names: Should contain only one value - "default".
+
+Example:
+ sdhci@12530000 {
+ compatible = "samsung,exynos4210-sdhci";
+ reg = <0x12530000 0x100>;
+ interrupts = <0 75 0>;
+ bus-width = <4>;
+ cd-gpios = <&gpk2 2 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&sd0_clk &sd0_cmd &sd0_bus4>;
+ };
+
+ Note: This example shows both SoC specific and board specific properties
+ in a single device node. The properties can be actually be separated
+ into SoC specific node and board specific node.
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-dove.txt b/Documentation/devicetree/bindings/mmc/sdhci-dove.txt
new file mode 100644
index 000000000..ae9aab9ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-dove.txt
@@ -0,0 +1,14 @@
+* Marvell sdhci-dove controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-pxav2 and sdhci-pxav3 drivers.
+
+- compatible: Should be "marvell,dove-sdhci".
+
+Example:
+
+sdio0: sdio@92000 {
+ compatible = "marvell,dove-sdhci";
+ reg = <0x92000 0x100>;
+ interrupts = <35>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-fujitsu.txt b/Documentation/devicetree/bindings/mmc/sdhci-fujitsu.txt
new file mode 100644
index 000000000..de2c53cff
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-fujitsu.txt
@@ -0,0 +1,30 @@
+* Fujitsu SDHCI controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci_f_sdh30 driver.
+
+Required properties:
+- compatible: "fujitsu,mb86s70-sdhci-3.0"
+- clocks: Must contain an entry for each entry in clock-names. It is a
+ list of phandles and clock-specifier pairs.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Should contain the following two entries:
+ "iface" - clock used for sdhci interface
+ "core" - core clock for sdhci controller
+
+Optional properties:
+- vqmmc-supply: phandle to the regulator device tree node, mentioned
+ as the VCCQ/VDD_IO supply in the eMMC/SD specs.
+
+Example:
+
+ sdhci1: mmc@36600000 {
+ compatible = "fujitsu,mb86s70-sdhci-3.0";
+ reg = <0 0x36600000 0x1000>;
+ interrupts = <0 172 0x4>,
+ <0 173 0x4>;
+ bus-width = <4>;
+ vqmmc-supply = <&vccq_sdhci1>;
+ clocks = <&clock 2 2 0>, <&clock 2 3 0>;
+ clock-names = "iface", "core";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
new file mode 100644
index 000000000..485483a63
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -0,0 +1,55 @@
+* Qualcomm SDHCI controller (sdhci-msm)
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-msm driver.
+
+Required properties:
+- compatible: Should contain "qcom,sdhci-msm-v4".
+- reg: Base address and length of the register in the following order:
+ - Host controller register map (required)
+ - SD Core register map (required)
+- interrupts: Should contain an interrupt-specifiers for the interrupts:
+ - Host controller interrupt (required)
+- pinctrl-names: Should contain only one value - "default".
+- pinctrl-0: Should specify pin control groups used for this controller.
+- clocks: A list of phandle + clock-specifier pairs for the clocks listed in clock-names.
+- clock-names: Should contain the following:
+ "iface" - Main peripheral bus clock (PCLK/HCLK - AHB Bus clock) (required)
+ "core" - SDC MMC clock (MCLK) (required)
+ "bus" - SDCC bus voter clock (optional)
+
+Example:
+
+ sdhc_1: sdhci@f9824900 {
+ compatible = "qcom,sdhci-msm-v4";
+ reg = <0xf9824900 0x11c>, <0xf9824000 0x800>;
+ interrupts = <0 123 0>;
+ bus-width = <8>;
+ non-removable;
+
+ vmmc-supply = <&pm8941_l20>;
+ vqmmc-supply = <&pm8941_s3>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdc1_clk &sdc1_cmd &sdc1_data>;
+
+ clocks = <&gcc GCC_SDCC1_APPS_CLK>, <&gcc GCC_SDCC1_AHB_CLK>;
+ clock-names = "core", "iface";
+ };
+
+ sdhc_2: sdhci@f98a4900 {
+ compatible = "qcom,sdhci-msm-v4";
+ reg = <0xf98a4900 0x11c>, <0xf98a4000 0x800>;
+ interrupts = <0 125 0>;
+ bus-width = <4>;
+ cd-gpios = <&msmgpio 62 0x1>;
+
+ vmmc-supply = <&pm8941_l21>;
+ vqmmc-supply = <&pm8941_l13>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdc2_clk &sdc2_cmd &sdc2_data>;
+
+ clocks = <&gcc GCC_SDCC2_APPS_CLK>, <&gcc GCC_SDCC2_AHB_CLK>;
+ clock-names = "core", "iface";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-pxa.txt b/Documentation/devicetree/bindings/mmc/sdhci-pxa.txt
new file mode 100644
index 000000000..3d1b449d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-pxa.txt
@@ -0,0 +1,50 @@
+* Marvell sdhci-pxa v2/v3 controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-pxav2 and sdhci-pxav3 drivers.
+
+Required properties:
+- compatible: Should be "mrvl,pxav2-mmc", "mrvl,pxav3-mmc" or
+ "marvell,armada-380-sdhci".
+- reg:
+ * for "mrvl,pxav2-mmc" and "mrvl,pxav3-mmc", one register area for
+ the SDHCI registers.
+
+ * for "marvell,armada-380-sdhci", three register areas. The first
+ one for the SDHCI registers themselves, the second one for the
+ AXI/Mbus bridge registers of the SDHCI unit, the third one for the
+ SDIO3 Configuration register
+- reg names: should be "sdhci", "mbus", "conf-sdio3". only mandatory
+ for "marvell,armada-380-sdhci"
+- clocks: Array of clocks required for SDHCI; requires at least one for
+ I/O clock.
+- clock-names: Array of names corresponding to clocks property; shall be
+ "io" for I/O clock and "core" for optional core clock.
+
+Optional properties:
+- mrvl,clk-delay-cycles: Specify a number of cycles to delay for tuning.
+
+Example:
+
+sdhci@d4280800 {
+ compatible = "mrvl,pxav3-mmc";
+ reg = <0xd4280800 0x800>;
+ bus-width = <8>;
+ interrupts = <27>;
+ clocks = <&chip CLKID_SDIO1XIN>, <&chip CLKID_SDIO1>;
+ clock-names = "io", "core";
+ non-removable;
+ mrvl,clk-delay-cycles = <31>;
+};
+
+sdhci@d8000 {
+ compatible = "marvell,armada-380-sdhci";
+ reg-names = "sdhci", "mbus", "conf-sdio3";
+ reg = <0xd8000 0x1000>,
+ <0xdc000 0x100>;
+ <0x18454 0x4>;
+ interrupts = <0 25 0x4>;
+ clocks = <&gateclk 17>;
+ clock-names = "io";
+ mrvl,clk-delay-cycles = <0x1F>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-sirf.txt b/Documentation/devicetree/bindings/mmc/sdhci-sirf.txt
new file mode 100644
index 000000000..dd6ed464b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-sirf.txt
@@ -0,0 +1,18 @@
+* SiRFprimII/marco/atlas6 SDHCI Controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-sirf driver.
+
+Required properties:
+- compatible: sirf,prima2-sdhc
+
+Optional properties:
+- cd-gpios: card detect gpio, with zero flags.
+
+Example:
+
+ sd0: sdhci@56000000 {
+ compatible = "sirf,prima2-sdhc";
+ reg = <0xcd000000 0x100000>;
+ cd-gpios = <&gpio 6 0>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-spear.txt b/Documentation/devicetree/bindings/mmc/sdhci-spear.txt
new file mode 100644
index 000000000..fd3643e7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-spear.txt
@@ -0,0 +1,18 @@
+* SPEAr SDHCI Controller
+
+This file documents differences between the core properties in mmc.txt
+and the properties used by the sdhci-spear driver.
+
+Required properties:
+- compatible: "st,spear300-sdhci"
+
+Optional properties:
+- cd-gpios: card detect gpio, with zero flags.
+
+Example:
+
+ sdhci@fc000000 {
+ compatible = "st,spear300-sdhci";
+ reg = <0xfc000000 0x1000>;
+ cd-gpios = <&gpio0 6 0>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
new file mode 100644
index 000000000..18d950df2
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -0,0 +1,113 @@
+* STMicroelectronics sdhci-st MMC/SD controller
+
+This file documents the differences between the core properties in
+Documentation/devicetree/bindings/mmc/mmc.txt and the properties
+used by the sdhci-st driver.
+
+Required properties:
+- compatible: Must be "st,sdhci" and it can be compatible to "st,sdhci-stih407"
+ to set the internal glue logic used for configuring the MMC
+ subsystem (mmcss) inside the FlashSS (available in STiH407 SoC
+ family).
+
+- clock-names: Should be "mmc".
+ See: Documentation/devicetree/bindings/resource-names.txt
+- clocks: Phandle to the clock.
+ See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+- interrupts: One mmc interrupt should be described here.
+- interrupt-names: Should be "mmcirq".
+
+- pinctrl-names: A pinctrl state names "default" must be defined.
+- pinctrl-0: Phandle referencing pin configuration of the sd/emmc controller.
+ See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+- reg: This must provide the host controller base address and it can also
+ contain the FlashSS Top register for TX/RX delay used by the driver
+ to configure DLL inside the flashSS, if so reg-names must also be
+ specified.
+
+Optional properties:
+- reg-names: Should be "mmc" and "top-mmc-delay". "top-mmc-delay" is optional
+ for eMMC on stih407 family silicon to configure DLL inside FlashSS.
+
+- non-removable: Non-removable slot. Also used for configuring mmcss in STiH407 SoC
+ family.
+ See: Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- bus-width: Number of data lines.
+ See: Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- max-frequency: Can be 200MHz, 100Mz or 50MHz (default) and used for
+ configuring the CCONFIG3 in the mmcss.
+ See: Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- resets: Phandle and reset specifier pair to softreset line of HC IP.
+ See: Documentation/devicetree/bindings/reset/reset.txt
+
+- vqmmc-supply: Phandle to the regulator dt node, mentioned as the vcc/vdd
+ supply in eMMC/SD specs.
+
+- sd-uhs--sdr50: To enable the SDR50 in the mmcss.
+ See: Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- sd-uhs-sdr104: To enable the SDR104 in the mmcss.
+ See: Documentation/devicetree/bindings/mmc/mmc.txt.
+
+- sd-uhs-ddr50: To enable the DDR50 in the mmcss.
+ See: Documentation/devicetree/bindings/mmc/mmc.txt.
+
+Example:
+
+/* Example stih416e eMMC configuration */
+
+mmc0: sdhci@fe81e000 {
+ compatible = "st,sdhci";
+ status = "disabled";
+ reg = <0xfe81e000 0x1000>;
+ interrupts = <GIC_SPI 127 IRQ_TYPE_NONE>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mmc0>;
+ clock-names = "mmc";
+ clocks = <&clk_s_a1_ls 1>;
+ bus-width = <8>
+
+/* Example SD stih407 family configuration */
+
+mmc1: sdhci@09080000 {
+ compatible = "st,sdhci-stih407", "st,sdhci";
+ status = "disabled";
+ reg = <0x09080000 0x7ff>;
+ reg-names = "mmc";
+ interrupts = <GIC_SPI 90 IRQ_TYPE_NONE>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sd1>;
+ clock-names = "mmc";
+ clocks = <&clk_s_c0_flexgen CLK_MMC_1>;
+ resets = <&softreset STIH407_MMC1_SOFTRESET>;
+ bus-width = <4>;
+};
+
+/* Example eMMC stih407 family configuration */
+
+mmc0: sdhci@09060000 {
+ compatible = "st,sdhci-stih407", "st,sdhci";
+ status = "disabled";
+ reg = <0x09060000 0x7ff>, <0x9061008 0x20>;
+ reg-names = "mmc", "top-mmc-delay";
+ interrupts = <GIC_SPI 92 IRQ_TYPE_NONE>;
+ interrupt-names = "mmcirq";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mmc0>;
+ clock-names = "mmc";
+ clocks = <&clk_s_c0_flexgen CLK_MMC_0>;
+ vqmmc-supply = <&vmmc_reg>;
+ max-frequency = <200000000>;
+ bus-width = <8>;
+ non-removable;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ sd-uhs-ddr50;
+};
diff --git a/Documentation/devicetree/bindings/mmc/socfpga-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/socfpga-dw-mshc.txt
new file mode 100644
index 000000000..4897bea7e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/socfpga-dw-mshc.txt
@@ -0,0 +1,23 @@
+* Altera SOCFPGA specific extensions to the Synopsys Designware Mobile
+ Storage Host Controller
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core Synopsys dw mshc controller properties described
+by synopsys-dw-mshc.txt and the properties used by the Altera SOCFPGA specific
+extensions to the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be
+ - "altr,socfpga-dw-mshc": for Altera's SOCFPGA platform
+
+Example:
+
+ mmc: dwmmc0@ff704000 {
+ compatible = "altr,socfpga-dw-mshc";
+ reg = <0xff704000 0x1000>;
+ interrupts = <0 129 4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/sunxi-mmc.txt b/Documentation/devicetree/bindings/mmc/sunxi-mmc.txt
new file mode 100644
index 000000000..4bf41d833
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sunxi-mmc.txt
@@ -0,0 +1,43 @@
+* Allwinner sunxi MMC controller
+
+The highspeed MMC host controller on Allwinner SoCs provides an interface
+for MMC, SD and SDIO types of memory cards.
+
+Supported maximum speeds are the ones of the eMMC standard 4.5 as well
+as the speed of SD standard 3.0.
+Absolute maximum transfer rate is 200MB/s
+
+Required properties:
+ - compatible : "allwinner,sun4i-a10-mmc" or "allwinner,sun5i-a13-mmc"
+ - reg : mmc controller base registers
+ - clocks : a list with 4 phandle + clock specifier pairs
+ - clock-names : must contain "ahb", "mmc", "output" and "sample"
+ - interrupts : mmc controller interrupt
+
+Optional properties:
+ - resets : phandle + reset specifier pair
+ - reset-names : must contain "ahb"
+ - for cd, bus-width and additional generic mmc parameters
+ please refer to mmc.txt within this directory
+
+Examples:
+ - Within .dtsi:
+ mmc0: mmc@01c0f000 {
+ compatible = "allwinner,sun5i-a13-mmc";
+ reg = <0x01c0f000 0x1000>;
+ clocks = <&ahb_gates 8>, <&mmc0_clk>, <&mmc0_output_clk>, <&mmc0_sample_clk>;
+ clock-names = "ahb", "mod", "output", "sample";
+ interrupts = <0 32 4>;
+ status = "disabled";
+ };
+
+ - Within dts:
+ mmc0: mmc@01c0f000 {
+ pinctrl-names = "default", "default";
+ pinctrl-0 = <&mmc0_pins_a>;
+ pinctrl-1 = <&mmc0_cd_pin_reference_design>;
+ bus-width = <4>;
+ cd-gpios = <&pio 7 1 0>; /* PH1 */
+ cd-inverted;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
new file mode 100644
index 000000000..346c6095a
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
@@ -0,0 +1,109 @@
+* Synopsys Designware Mobile Storage Host Controller
+
+The Synopsys designware mobile storage host controller is used to interface
+a SoC with storage medium such as eMMC or SD/MMC cards. This file documents
+differences between the core mmc properties described by mmc.txt and the
+properties used by the Synopsys Designware Mobile Storage Host Controller.
+
+Required Properties:
+
+* compatible: should be
+ - snps,dw-mshc: for controllers compliant with synopsys dw-mshc.
+* #address-cells: should be 1.
+* #size-cells: should be 0.
+
+# Slots: The slot specific information are contained within child-nodes with
+ each child-node representing a supported slot. There should be atleast one
+ child node representing a card slot. The name of the child node representing
+ the slot is recommended to be slot@n where n is the unique number of the slot
+ connnected to the controller. The following are optional properties which
+ can be included in the slot child node.
+
+ * reg: specifies the physical slot number. The valid values of this
+ property is 0 to (num-slots -1), where num-slots is the value
+ specified by the num-slots property.
+
+ * bus-width: as documented in mmc core bindings.
+
+ * wp-gpios: specifies the write protect gpio line. The format of the
+ gpio specifier depends on the gpio controller. If a GPIO is not used
+ for write-protect, this property is optional.
+
+ * disable-wp: If the wp-gpios property isn't present then (by default)
+ we'd assume that the write protect is hooked up directly to the
+ controller's special purpose write protect line (accessible via
+ the WRTPRT register). However, it's possible that we simply don't
+ want write protect. In that case specify 'disable-wp'.
+ NOTE: This property is not required for slots known to always
+ connect to eMMC or SDIO cards.
+
+Optional properties:
+
+* clocks: from common clock binding: handle to biu and ciu clocks for the
+ bus interface unit clock and the card interface unit clock.
+
+* clock-names: from common clock binding: Shall be "biu" and "ciu".
+ If the biu clock is missing we'll simply skip enabling it. If the
+ ciu clock is missing we'll just assume that the clock is running at
+ clock-frequency. It is an error to omit both the ciu clock and the
+ clock-frequency.
+
+* clock-frequency: should be the frequency (in Hz) of the ciu clock. If this
+ is specified and the ciu clock is specified then we'll try to set the ciu
+ clock to this at probe time.
+
+* clock-freq-min-max: Minimum and Maximum clock frequency for card output
+ clock(cclk_out). If it's not specified, max is 200MHZ and min is 400KHz by default.
+
+* num-slots: specifies the number of slots supported by the controller.
+ The number of physical slots actually used could be equal or less than the
+ value specified by num-slots. If this property is not specified, the value
+ of num-slot property is assumed to be 1.
+
+* fifo-depth: The maximum size of the tx/rx fifo's. If this property is not
+ specified, the default value of the fifo size is determined from the
+ controller registers.
+
+* card-detect-delay: Delay in milli-seconds before detecting card after card
+ insert event. The default value is 0.
+
+* supports-highspeed (DEPRECATED): Enables support for high speed cards (up to 50MHz)
+ (use "cap-mmc-highspeed" or "cap-sd-highspeed" instead)
+
+* broken-cd: as documented in mmc core bindings.
+
+* vmmc-supply: The phandle to the regulator to use for vmmc. If this is
+ specified we'll defer probe until we can find this regulator.
+
+Aliases:
+
+- All the MSHC controller nodes should be represented in the aliases node using
+ the following format 'mshc{n}' where n is a unique number for the alias.
+
+Example:
+
+The MSHC controller node can be split into two portions, SoC specific and
+board specific portions as listed below.
+
+ dwmmc0@12200000 {
+ compatible = "snps,dw-mshc";
+ clocks = <&clock 351>, <&clock 132>;
+ clock-names = "biu", "ciu";
+ reg = <0x12200000 0x1000>;
+ interrupts = <0 75 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ dwmmc0@12200000 {
+ clock-frequency = <400000000>;
+ clock-freq-min-max = <400000 200000000>;
+ num-slots = <1>;
+ broken-cd;
+ fifo-depth = <0x80>;
+ card-detect-delay = <200>;
+ vmmc-supply = <&buck8>;
+ bus-width = <8>;
+ cap-mmc-highspeed;
+ cap-sd-highspeed;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
new file mode 100644
index 000000000..76bf087bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -0,0 +1,112 @@
+* TI Highspeed MMC host controller for OMAP
+
+The Highspeed MMC Host Controller on TI OMAP family
+provides an interface for MMC, SD, and SDIO types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the omap_hsmmc driver.
+
+Required properties:
+- compatible:
+ Should be "ti,omap2-hsmmc", for OMAP2 controllers
+ Should be "ti,omap3-hsmmc", for OMAP3 controllers
+ Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0
+ Should be "ti,omap4-hsmmc", for OMAP4 controllers
+ Should be "ti,am33xx-hsmmc", for AM335x controllers
+- ti,hwmods: Must be "mmc<n>", n is controller instance starting 1
+
+Optional properties:
+ti,dual-volt: boolean, supports dual voltage cards
+<supply-name>-supply: phandle to the regulator device tree node
+"supply-name" examples are "vmmc", "vmmc_aux" etc
+ti,non-removable: non-removable slot (like eMMC)
+ti,needs-special-reset: Requires a special softreset sequence
+ti,needs-special-hs-handling: HSMMC IP needs special setting for handling High Speed
+dmas: List of DMA specifiers with the controller specific format
+as described in the generic DMA client binding. A tx and rx
+specifier is required.
+dma-names: List of DMA request names. These strings correspond
+1:1 with the DMA specifiers listed in dmas. The string naming is
+to be "rx" and "tx" for RX and TX DMA requests, respectively.
+
+Examples:
+
+[hwmod populated DMA resources]
+
+ mmc1: mmc@0x4809c000 {
+ compatible = "ti,omap4-hsmmc";
+ reg = <0x4809c000 0x400>;
+ ti,hwmods = "mmc1";
+ ti,dual-volt;
+ bus-width = <4>;
+ vmmc-supply = <&vmmc>; /* phandle to regulator node */
+ ti,non-removable;
+ };
+
+[generic DMA request binding]
+
+ mmc1: mmc@0x4809c000 {
+ compatible = "ti,omap4-hsmmc";
+ reg = <0x4809c000 0x400>;
+ ti,hwmods = "mmc1";
+ ti,dual-volt;
+ bus-width = <4>;
+ vmmc-supply = <&vmmc>; /* phandle to regulator node */
+ ti,non-removable;
+ dmas = <&edma 24
+ &edma 25>;
+ dma-names = "tx", "rx";
+ };
+
+[workaround for missing swakeup on am33xx]
+
+This SOC is missing the swakeup line, it will not detect SDIO irq
+while in suspend.
+
+ ------
+ | PRCM |
+ ------
+ ^ |
+ swakeup | | fclk
+ | v
+ ------ ------- -----
+ | card | -- CIRQ --> | hsmmc | -- IRQ --> | CPU |
+ ------ ------- -----
+
+In suspend the fclk is off and the module is disfunctional. Even register reads
+will fail. A small logic in the host will request fclk restore, when an
+external event is detected. Once the clock is restored, the host detects the
+event normally. Since am33xx doesn't have this line it never wakes from
+suspend.
+
+The workaround is to reconfigure the dat1 line as a GPIO upon suspend. To make
+this work, we need to set the named pinctrl states "default" and "idle".
+Prepare idle to remux dat1 as a gpio, and default to remux it back as sdio
+dat1. The MMC driver will then toggle between idle and default state during
+runtime.
+
+In summary:
+1. select matching 'compatible' section, see example below.
+2. specify pinctrl states "default" and "idle", "sleep" is optional.
+3. specify the gpio irq used for detecting sdio irq in suspend
+
+If configuration is incomplete, a warning message is emitted "falling back to
+polling". Also check the "sdio irq mode" in /sys/kernel/debug/mmc0/regs. Mind
+not every application needs SDIO irq, e.g. MMC cards.
+
+ mmc1: mmc@48060100 {
+ compatible = "ti,am33xx-hsmmc";
+ ...
+ pinctrl-names = "default", "idle", "sleep"
+ pinctrl-0 = <&mmc1_pins>;
+ pinctrl-1 = <&mmc1_idle>;
+ pinctrl-2 = <&mmc1_sleep>;
+ ...
+ interrupts-extended = <&intc 64 &gpio2 28 0>;
+ };
+
+ mmc1_idle : pinmux_cirq_pin {
+ pinctrl-single,pins = <
+ 0x0f8 0x3f /* GPIO2_28 */
+ >;
+ };
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap.txt b/Documentation/devicetree/bindings/mmc/ti-omap.txt
new file mode 100644
index 000000000..8de579969
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/ti-omap.txt
@@ -0,0 +1,54 @@
+* TI MMC host controller for OMAP1 and 2420
+
+The MMC Host Controller on TI OMAP1 and 2420 family provides
+an interface for MMC, SD, and SDIO types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the omap mmc driver.
+
+Note that this driver will not work with omap2430 or later omaps,
+please see the omap hsmmc driver for the current omaps.
+
+Required properties:
+- compatible: Must be "ti,omap2420-mmc", for OMAP2420 controllers
+- ti,hwmods: For 2420, must be "msdi<n>", where n is controller
+ instance starting 1
+
+Examples:
+
+ msdi1: mmc@4809c000 {
+ compatible = "ti,omap2420-mmc";
+ ti,hwmods = "msdi1";
+ reg = <0x4809c000 0x80>;
+ interrupts = <83>;
+ dmas = <&sdma 61 &sdma 62>;
+ dma-names = "tx", "rx";
+ };
+
+* TI MMC host controller for OMAP1 and 2420
+
+The MMC Host Controller on TI OMAP1 and 2420 family provides
+an interface for MMC, SD, and SDIO types of memory cards.
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the omap mmc driver.
+
+Note that this driver will not work with omap2430 or later omaps,
+please see the omap hsmmc driver for the current omaps.
+
+Required properties:
+- compatible: Must be "ti,omap2420-mmc", for OMAP2420 controllers
+- ti,hwmods: For 2420, must be "msdi<n>", where n is controller
+ instance starting 1
+
+Examples:
+
+ msdi1: mmc@4809c000 {
+ compatible = "ti,omap2420-mmc";
+ ti,hwmods = "msdi1";
+ reg = <0x4809c000 0x80>;
+ interrupts = <83>;
+ dmas = <&sdma 61 &sdma 62>;
+ dma-names = "tx", "rx";
+ };
+
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
new file mode 100644
index 000000000..400b640fa
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -0,0 +1,27 @@
+* Toshiba Mobile IO SD/MMC controller
+
+The tmio-mmc driver doesn't probe its devices actively, instead its binding to
+devices is managed by either MFD drivers or by the sh_mobile_sdhi platform
+driver. Those drivers supply the tmio-mmc driver with platform data, that either
+describe hardware capabilities, known to them, or are obtained by them from
+their own platform data or from their DT information. In the latter case all
+compulsory and any optional properties, common to all SD/MMC drivers, as
+described in mmc.txt, can be used. Additionally the following tmio_mmc-specific
+optional bindings can be used.
+
+Required properties:
+- compatible: "renesas,sdhi-shmobile" - a generic sh-mobile SDHI unit
+ "renesas,sdhi-sh7372" - SDHI IP on SH7372 SoC
+ "renesas,sdhi-sh73a0" - SDHI IP on SH73A0 SoC
+ "renesas,sdhi-r8a73a4" - SDHI IP on R8A73A4 SoC
+ "renesas,sdhi-r8a7740" - SDHI IP on R8A7740 SoC
+ "renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
+ "renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC
+ "renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC
+ "renesas,sdhi-r8a7791" - SDHI IP on R8A7791 SoC
+ "renesas,sdhi-r8a7792" - SDHI IP on R8A7792 SoC
+ "renesas,sdhi-r8a7793" - SDHI IP on R8A7793 SoC
+ "renesas,sdhi-r8a7794" - SDHI IP on R8A7794 SoC
+
+Optional properties:
+- toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
diff --git a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
new file mode 100644
index 000000000..8babdaa86
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
@@ -0,0 +1,33 @@
+* Renesas usdhi6rol0 SD/SDIO host controller
+
+Required properties:
+
+- compatible: must be
+ "renesas,usdhi6rol0"
+- interrupts: 3 interrupts, named "card detect", "data" and "SDIO" must be
+ specified
+- clocks: a clock binding for the IMCLK input
+
+Optional properties:
+
+- vmmc-supply: a phandle of a regulator, supplying Vcc to the card
+- vqmmc-supply: a phandle of a regulator, supplying VccQ to the card
+
+Additionally any standard mmc bindings from mmc.txt can be used.
+
+Example:
+
+sd0: sd@ab000000 {
+ compatible = "renesas,usdhi6rol0";
+ reg = <0xab000000 0x200>;
+ interrupts = <0 23 0x4
+ 0 24 0x4
+ 0 25 0x4>;
+ interrupt-names = "card detect", "data", "SDIO";
+ bus-width = <4>;
+ max-frequency = <50000000>;
+ cap-power-off-card;
+ clocks = <&imclk>;
+ vmmc-supply = <&vcc_sd0>;
+ vqmmc-supply = <&vccq_sd0>;
+};
diff --git a/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt
new file mode 100644
index 000000000..d7fb6abb3
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/vt8500-sdmmc.txt
@@ -0,0 +1,23 @@
+* Wondermedia WM8505/WM8650 SD/MMC Host Controller
+
+This file documents differences between the core properties described
+by mmc.txt and the properties used by the wmt-sdmmc driver.
+
+Required properties:
+- compatible: Should be "wm,wm8505-sdhc".
+- interrupts: Two interrupts are required - regular irq and dma irq.
+
+Optional properties:
+- sdon-inverted: SD_ON bit is inverted on the controller
+
+Examples:
+
+sdhc@d800a000 {
+ compatible = "wm,wm8505-sdhc";
+ reg = <0xd800a000 0x1000>;
+ interrupts = <20 21>;
+ clocks = <&sdhc>;
+ bus-width = <4>;
+ sdon-inverted;
+};
+
diff --git a/Documentation/devicetree/bindings/mtd/arm-versatile.txt b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
new file mode 100644
index 000000000..beace4b89
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/arm-versatile.txt
@@ -0,0 +1,8 @@
+Flash device on ARM Versatile board
+
+Required properties:
+- compatible : must be "arm,versatile-flash";
+- bank-width : width in bytes of flash interface.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
diff --git a/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt b/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt
new file mode 100644
index 000000000..1889a4db5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/atmel-dataflash.txt
@@ -0,0 +1,17 @@
+* Atmel Data Flash
+
+Required properties:
+- compatible : "atmel,<model>", "atmel,<series>", "atmel,dataflash".
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+flash@1 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash";
+ spi-max-frequency = <25000000>;
+ reg = <1>;
+};
diff --git a/Documentation/devicetree/bindings/mtd/atmel-nand.txt b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
new file mode 100644
index 000000000..7d4c8eb77
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/atmel-nand.txt
@@ -0,0 +1,111 @@
+Atmel NAND flash
+
+Required properties:
+- compatible : should be "atmel,at91rm9200-nand" or "atmel,sama5d4-nand".
+- reg : should specify localbus address and size used for the chip,
+ and hardware ECC controller if available.
+ If the hardware ECC is PMECC, it should contain address and size for
+ PMECC and PMECC Error Location controller.
+ The PMECC lookup table address and size in ROM is optional. If not
+ specified, driver will build it in runtime.
+- atmel,nand-addr-offset : offset for the address latch.
+- atmel,nand-cmd-offset : offset for the command latch.
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+ representing partitions.
+
+- gpios : specifies the gpio pins to control the NAND device. detect is an
+ optional gpio and may be set to 0 if not present.
+
+Optional properties:
+- atmel,nand-has-dma : boolean to support dma transfer for nand read/write.
+- nand-ecc-mode : String, operation mode of the NAND ecc mode, soft by default.
+ Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
+ "soft_bch".
+- atmel,has-pmecc : boolean to enable Programmable Multibit ECC hardware.
+ Only supported by at91sam9x5 or later sam9 product.
+- atmel,pmecc-cap : error correct capability for Programmable Multibit ECC
+ Controller. Supported values are: 2, 4, 8, 12, 24.
+- atmel,pmecc-sector-size : sector size for ECC computation. Supported values
+ are: 512, 1024.
+- atmel,pmecc-lookup-table-offset : includes two offsets of lookup table in ROM
+ for different sector size. First one is for sector size 512, the next is for
+ sector size 1024. If not specified, driver will build the table in runtime.
+- nand-bus-width : 8 or 16 bus width if not present 8
+- nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
+- Nand Flash Controller(NFC) is a slave driver under Atmel nand flash
+ - Required properties:
+ - compatible : "atmel,sama5d3-nfc".
+ - reg : should specify the address and size used for NFC command registers,
+ NFC registers and NFC Sram. NFC Sram address and size can be absent
+ if don't want to use it.
+ - clocks: phandle to the peripheral clock
+ - Optional properties:
+ - atmel,write-by-sram: boolean to enable NFC write by sram.
+
+Examples:
+nand0: nand@40000000,0 {
+ compatible = "atmel,at91rm9200-nand";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x40000000 0x10000000
+ 0xffffe800 0x200
+ >;
+ atmel,nand-addr-offset = <21>; /* ale */
+ atmel,nand-cmd-offset = <22>; /* cle */
+ nand-on-flash-bbt;
+ nand-ecc-mode = "soft";
+ gpios = <&pioC 13 0 /* rdy */
+ &pioC 14 0 /* nce */
+ 0 /* cd */
+ >;
+ partition@0 {
+ ...
+ };
+};
+
+/* for PMECC supported chips */
+nand0: nand@40000000 {
+ compatible = "atmel,at91rm9200-nand";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = < 0x40000000 0x10000000 /* bus addr & size */
+ 0xffffe000 0x00000600 /* PMECC addr & size */
+ 0xffffe600 0x00000200 /* PMECC ERRLOC addr & size */
+ 0x00100000 0x00100000 /* ROM addr & size */
+ >;
+ atmel,nand-addr-offset = <21>; /* ale */
+ atmel,nand-cmd-offset = <22>; /* cle */
+ nand-on-flash-bbt;
+ nand-ecc-mode = "hw";
+ atmel,has-pmecc; /* enable PMECC */
+ atmel,pmecc-cap = <2>;
+ atmel,pmecc-sector-size = <512>;
+ atmel,pmecc-lookup-table-offset = <0x8000 0x10000>;
+ gpios = <&pioD 5 0 /* rdy */
+ &pioD 4 0 /* nce */
+ 0 /* cd */
+ >;
+ partition@0 {
+ ...
+ };
+};
+
+/* for NFC supported chips */
+nand0: nand@40000000 {
+ compatible = "atmel,at91rm9200-nand";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ ...
+ nfc@70000000 {
+ compatible = "atmel,sama5d3-nfc";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ clocks = <&hsmc_clk>
+ reg = <
+ 0x70000000 0x10000000 /* NFC Command Registers */
+ 0xffffc000 0x00000070 /* NFC HSMC regs */
+ 0x00200000 0x00100000 /* NFC SRAM banks */
+ >;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/davinci-nand.txt b/Documentation/devicetree/bindings/mtd/davinci-nand.txt
new file mode 100644
index 000000000..cfb18abe6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/davinci-nand.txt
@@ -0,0 +1,94 @@
+Device tree bindings for Texas instruments Davinci/Keystone NAND controller
+
+This file provides information, what the device node for the davinci/keystone
+NAND interface contains.
+
+Documentation:
+Davinci DM646x - http://www.ti.com/lit/ug/sprueq7c/sprueq7c.pdf
+Kestone - http://www.ti.com/lit/ug/sprugz3a/sprugz3a.pdf
+
+Required properties:
+
+- compatible: "ti,davinci-nand"
+ "ti,keystone-nand"
+
+- reg: Contains 2 offset/length values:
+ - offset and length for the access window.
+ - offset and length for accessing the AEMIF
+ control registers.
+
+- ti,davinci-chipselect: number of chipselect. Indicates on the
+ davinci_nand driver which chipselect is used
+ for accessing the nand.
+ Can be in the range [0-3].
+
+Recommended properties :
+
+- ti,davinci-mask-ale: mask for ALE. Needed for executing address
+ phase. These offset will be added to the base
+ address for the chip select space the NAND Flash
+ device is connected to.
+ If not set equal to 0x08.
+
+- ti,davinci-mask-cle: mask for CLE. Needed for executing command
+ phase. These offset will be added to the base
+ address for the chip select space the NAND Flash
+ device is connected to.
+ If not set equal to 0x10.
+
+- ti,davinci-mask-chipsel: mask for chipselect address. Needed to mask
+ addresses for given chipselect.
+
+- nand-ecc-mode: operation mode of the NAND ecc mode. ECC mode
+ valid values for davinci driver:
+ - "none"
+ - "soft"
+ - "hw"
+
+- ti,davinci-ecc-bits: used ECC bits, currently supported 1 or 4.
+
+- nand-bus-width: buswidth 8 or 16. If not present 8.
+
+- nand-on-flash-bbt: use flash based bad block table support. OOB
+ identifier is saved in OOB area. If not present
+ false.
+
+Deprecated properties:
+
+- ti,davinci-ecc-mode: operation mode of the NAND ecc mode. ECC mode
+ valid values for davinci driver:
+ - "none"
+ - "soft"
+ - "hw"
+
+- ti,davinci-nand-buswidth: buswidth 8 or 16. If not present 8.
+
+- ti,davinci-nand-use-bbt: use flash based bad block table support. OOB
+ identifier is saved in OOB area. If not present
+ false.
+
+Nand device bindings may contain additional sub-nodes describing partitions of
+the address space. See partition.txt for more detail. The NAND Flash timing
+values must be programmed in the chip select’s node of AEMIF
+memory-controller (see Documentation/devicetree/bindings/memory-controllers/
+davinci-aemif.txt).
+
+Example(da850 EVM ):
+
+nand_cs3@62000000 {
+ compatible = "ti,davinci-nand";
+ reg = <0x62000000 0x807ff
+ 0x68000000 0x8000>;
+ ti,davinci-chipselect = <1>;
+ ti,davinci-mask-ale = <0>;
+ ti,davinci-mask-cle = <0>;
+ ti,davinci-mask-chipsel = <0>;
+ nand-ecc-mode = "hw";
+ ti,davinci-ecc-bits = <4>;
+ nand-on-flash-bbt;
+
+ partition@180000 {
+ label = "ubifs";
+ reg = <0x180000 0x7e80000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/denali-nand.txt b/Documentation/devicetree/bindings/mtd/denali-nand.txt
new file mode 100644
index 000000000..b04d03a1d
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/denali-nand.txt
@@ -0,0 +1,23 @@
+* Denali NAND controller
+
+Required properties:
+ - compatible : should be "denali,denali-nand-dt"
+ - reg : should contain registers location and length for data and reg.
+ - reg-names: Should contain the reg names "nand_data" and "denali_reg"
+ - interrupts : The interrupt number.
+ - dm-mask : DMA bit mask
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Examples:
+
+nand: nand@ff900000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "denali,denali-nand-dt";
+ reg = <0xff900000 0x100000>, <0xffb80000 0x10000>;
+ reg-names = "nand_data", "denali_reg";
+ interrupts = <0 144 4>;
+ dma-mask = <0xffffffff>;
+};
diff --git a/Documentation/devicetree/bindings/mtd/diskonchip.txt b/Documentation/devicetree/bindings/mtd/diskonchip.txt
new file mode 100644
index 000000000..3e13bfdbe
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/diskonchip.txt
@@ -0,0 +1,15 @@
+M-Systems and Sandisk DiskOnChip devices
+
+M-System DiskOnChip G3
+======================
+The Sandisk (formerly M-Systems) docg3 is a nand device of 64M to 256MB.
+
+Required properties:
+ - compatible: should be "m-systems,diskonchip-g3"
+ - reg: register base and size
+
+Example:
+ docg3: flash@0 {
+ compatible = "m-systems,diskonchip-g3";
+ reg = <0x0 0x2000>;
+ };
diff --git a/Documentation/devicetree/bindings/mtd/elm.txt b/Documentation/devicetree/bindings/mtd/elm.txt
new file mode 100644
index 000000000..8c1528c42
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/elm.txt
@@ -0,0 +1,16 @@
+Error location module
+
+Required properties:
+- compatible: Must be "ti,am33xx-elm"
+- reg: physical base address and size of the registers map.
+- interrupts: Interrupt number for the elm.
+
+Optional properties:
+- ti,hwmods: Name of the hwmod associated to the elm
+
+Example:
+elm: elm@0 {
+ compatible = "ti,am3352-elm";
+ reg = <0x48080000 0x2000>;
+ interrupts = <4>;
+};
diff --git a/Documentation/devicetree/bindings/mtd/flctl-nand.txt b/Documentation/devicetree/bindings/mtd/flctl-nand.txt
new file mode 100644
index 000000000..427f46dc6
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/flctl-nand.txt
@@ -0,0 +1,49 @@
+FLCTL NAND controller
+
+Required properties:
+- compatible : "renesas,shmobile-flctl-sh7372"
+- reg : Address range of the FLCTL
+- interrupts : flste IRQ number
+- nand-bus-width : bus width to NAND chip
+
+Optional properties:
+- dmas: DMA specifier(s)
+- dma-names: name for each DMA specifier. Valid names are
+ "data_tx", "data_rx", "ecc_tx", "ecc_rx"
+
+The DMA fields are not used yet in the driver but are listed here for
+completing the bindings.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+ flctl@e6a30000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "renesas,shmobile-flctl-sh7372";
+ reg = <0xe6a30000 0x100>;
+ interrupts = <0x0d80>;
+
+ nand-bus-width = <16>;
+
+ dmas = <&dmac 1 /* data_tx */
+ &dmac 2;> /* data_rx */
+ dma-names = "data_tx", "data_rx";
+
+ system@0 {
+ label = "system";
+ reg = <0x0 0x8000000>;
+ };
+
+ userdata@8000000 {
+ label = "userdata";
+ reg = <0x8000000 0x10000000>;
+ };
+
+ cache@18000000 {
+ label = "cache";
+ reg = <0x18000000 0x8000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
new file mode 100644
index 000000000..4461dc71c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/fsl-quadspi.txt
@@ -0,0 +1,35 @@
+* Freescale Quad Serial Peripheral Interface(QuadSPI)
+
+Required properties:
+ - compatible : Should be "fsl,vf610-qspi" or "fsl,imx6sx-qspi"
+ - reg : the first contains the register location and length,
+ the second contains the memory mapping address and length
+ - reg-names: Should contain the reg names "QuadSPI" and "QuadSPI-memory"
+ - interrupts : Should contain the interrupt for the device
+ - clocks : The clocks needed by the QuadSPI controller
+ - clock-names : the name of the clocks
+
+Optional properties:
+ - fsl,qspi-has-second-chip: The controller has two buses, bus A and bus B.
+ Each bus can be connected with two NOR flashes.
+ Most of the time, each bus only has one NOR flash
+ connected, this is the default case.
+ But if there are two NOR flashes connected to the
+ bus, you should enable this property.
+ (Please check the board's schematic.)
+
+Example:
+
+qspi0: quadspi@40044000 {
+ compatible = "fsl,vf610-qspi";
+ reg = <0x40044000 0x1000>, <0x20000000 0x10000000>;
+ reg-names = "QuadSPI", "QuadSPI-memory";
+ interrupts = <0 24 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks VF610_CLK_QSPI0_EN>,
+ <&clks VF610_CLK_QSPI0>;
+ clock-names = "qspi_en", "qspi";
+
+ flash0: s25fl128s@0 {
+ ....
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt b/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt
new file mode 100644
index 000000000..fce4894f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/fsl-upm-nand.txt
@@ -0,0 +1,67 @@
+Freescale Localbus UPM programmed to work with NAND flash
+
+Required properties:
+- compatible : "fsl,upm-nand".
+- reg : should specify localbus chip select and size used for the chip.
+- fsl,upm-addr-offset : UPM pattern offset for the address latch.
+- fsl,upm-cmd-offset : UPM pattern offset for the command latch.
+
+Optional properties:
+- fsl,upm-wait-flags : add chip-dependent short delays after running the
+ UPM pattern (0x1), after writing a data byte (0x2) or after
+ writing out a buffer (0x4).
+- fsl,upm-addr-line-cs-offsets : address offsets for multi-chip support.
+ The corresponding address lines are used to select the chip.
+- gpios : may specify optional GPIOs connected to the Ready-Not-Busy pins
+ (R/B#). For multi-chip devices, "n" GPIO definitions are required
+ according to the number of chips.
+- chip-delay : chip dependent delay for transferring data from array to
+ read registers (tR). Required if property "gpios" is not used
+ (R/B# pins not connected).
+
+Each flash chip described may optionally contain additional sub-nodes
+describing partitions of the address space. See partition.txt for more
+detail.
+
+Examples:
+
+upm@1,0 {
+ compatible = "fsl,upm-nand";
+ reg = <1 0 1>;
+ fsl,upm-addr-offset = <16>;
+ fsl,upm-cmd-offset = <8>;
+ gpios = <&qe_pio_e 18 0>;
+
+ flash {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "...";
+
+ partition@0 {
+ ...
+ };
+ };
+};
+
+upm@3,0 {
+ #address-cells = <0>;
+ #size-cells = <0>;
+ compatible = "tqc,tqm8548-upm-nand", "fsl,upm-nand";
+ reg = <3 0x0 0x800>;
+ fsl,upm-addr-offset = <0x10>;
+ fsl,upm-cmd-offset = <0x08>;
+ /* Multi-chip NAND device */
+ fsl,upm-addr-line-cs-offsets = <0x0 0x200>;
+ fsl,upm-wait-flags = <0x5>;
+ chip-delay = <25>; // in micro-seconds
+
+ nand@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "fs";
+ reg = <0x00000000 0x10000000>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/fsmc-nand.txt b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
new file mode 100644
index 000000000..5235cbc55
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/fsmc-nand.txt
@@ -0,0 +1,54 @@
+ST Microelectronics Flexible Static Memory Controller (FSMC)
+NAND Interface
+
+Required properties:
+- compatible : "st,spear600-fsmc-nand", "stericsson,fsmc-nand"
+- reg : Address range of the mtd chip
+- reg-names: Should contain the reg names "fsmc_regs", "nand_data", "nand_addr" and "nand_cmd"
+
+Optional properties:
+- bank-width : Width (in bytes) of the device. If not present, the width
+ defaults to 1 byte
+- nand-skip-bbtscan: Indicates the BBT scanning should be skipped
+- timings: array of 6 bytes for NAND timings. The meanings of these bytes
+ are:
+ byte 0 TCLR : CLE to RE delay in number of AHB clock cycles, only 4 bits
+ are valid. Zero means one clockcycle, 15 means 16 clock
+ cycles.
+ byte 1 TAR : ALE to RE delay, 4 bits are valid. Same format as TCLR.
+ byte 2 THIZ : number of HCLK clock cycles during which the data bus is
+ kept in Hi-Z (tristate) after the start of a write access.
+ Only valid for write transactions. Zero means zero cycles,
+ 255 means 255 cycles.
+ byte 3 THOLD : number of HCLK clock cycles to hold the address (and data
+ when writing) after the command deassertation. Zero means
+ one cycle, 255 means 256 cycles.
+ byte 4 TWAIT : number of HCLK clock cycles to assert the command to the
+ NAND flash in response to SMWAITn. Zero means 1 cycle,
+ 255 means 256 cycles.
+ byte 5 TSET : number of HCLK clock cycles to assert the address before the
+ command is asserted. Zero means one cycle, 255 means 256
+ cycles.
+- bank: default NAND bank to use (0-3 are valid, 0 is the default).
+
+Example:
+
+ fsmc: flash@d1800000 {
+ compatible = "st,spear600-fsmc-nand";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xd1800000 0x1000 /* FSMC Register */
+ 0xd2000000 0x0010 /* NAND Base DATA */
+ 0xd2020000 0x0010 /* NAND Base ADDR */
+ 0xd2010000 0x0010>; /* NAND Base CMD */
+ reg-names = "fsmc_regs", "nand_data", "nand_addr", "nand_cmd";
+
+ bank-width = <1>;
+ nand-skip-bbtscan;
+ timings = /bits/ 8 <0 0 0 2 3 0>;
+ bank = <1>;
+
+ partition@0 {
+ ...
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
new file mode 100644
index 000000000..af8915b41
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpio-control-nand.txt
@@ -0,0 +1,47 @@
+GPIO assisted NAND flash
+
+The GPIO assisted NAND flash uses a memory mapped interface to
+read/write the NAND commands and data and GPIO pins for the control
+signals.
+
+Required properties:
+- compatible : "gpio-control-nand"
+- reg : should specify localbus chip select and size used for the chip. The
+ resource describes the data bus connected to the NAND flash and all accesses
+ are made in native endianness.
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+ representing partitions.
+- gpios : Specifies the GPIO pins to control the NAND device. The order of
+ GPIO references is: RDY, nCE, ALE, CLE, and an optional nWP.
+
+Optional properties:
+- bank-width : Width (in bytes) of the device. If not present, the width
+ defaults to 1 byte.
+- chip-delay : chip dependent delay for transferring data from array to
+ read registers (tR). If not present then a default of 20us is used.
+- gpio-control-nand,io-sync-reg : A 64-bit physical address for a read
+ location used to guard against bus reordering with regards to accesses to
+ the GPIO's and the NAND flash data bus. If present, then after changing
+ GPIO state and before and after command byte writes, this register will be
+ read to ensure that the GPIO accesses have completed.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Examples:
+
+gpio-nand@1,0 {
+ compatible = "gpio-control-nand";
+ reg = <1 0x0000 0x2>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ gpios = <&banka 1 0>, /* RDY */
+ <&banka 2 0>, /* nCE */
+ <&banka 3 0>, /* ALE */
+ <&banka 4 0>, /* CLE */
+ <0>; /* nWP */
+
+ partition@0 {
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nand.txt b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
new file mode 100644
index 000000000..fb733c4e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nand.txt
@@ -0,0 +1,137 @@
+Device tree bindings for GPMC connected NANDs
+
+GPMC connected NAND (found on OMAP boards) are represented as child nodes of
+the GPMC controller with a name of "nand".
+
+All timing relevant properties as well as generic gpmc child properties are
+explained in a separate documents - please refer to
+Documentation/devicetree/bindings/bus/ti-gpmc.txt
+
+For NAND specific properties such as ECC modes or bus width, please refer to
+Documentation/devicetree/bindings/mtd/nand.txt
+
+
+Required properties:
+
+ - reg: The CS line the peripheral is connected to
+
+Optional properties:
+
+ - nand-bus-width: Set this numeric value to 16 if the hardware
+ is wired that way. If not specified, a bus
+ width of 8 is assumed.
+
+ - ti,nand-ecc-opt: A string setting the ECC layout to use. One of:
+ "sw" 1-bit Hamming ecc code via software
+ "hw" <deprecated> use "ham1" instead
+ "hw-romcode" <deprecated> use "ham1" instead
+ "ham1" 1-bit Hamming ecc code
+ "bch4" 4-bit BCH ecc code
+ "bch8" 8-bit BCH ecc code
+ "bch16" 16-bit BCH ECC code
+ Refer below "How to select correct ECC scheme for your device ?"
+
+ - ti,nand-xfer-type: A string setting the data transfer type. One of:
+
+ "prefetch-polled" Prefetch polled mode (default)
+ "polled" Polled mode, without prefetch
+ "prefetch-dma" Prefetch enabled sDMA mode
+ "prefetch-irq" Prefetch enabled irq mode
+
+ - elm_id: <deprecated> use "ti,elm-id" instead
+ - ti,elm-id: Specifies phandle of the ELM devicetree node.
+ ELM is an on-chip hardware engine on TI SoC which is used for
+ locating ECC errors for BCHx algorithms. SoC devices which have
+ ELM hardware engines should specify this device node in .dtsi
+ Using ELM for ECC error correction frees some CPU cycles.
+
+For inline partition table parsing (optional):
+
+ - #address-cells: should be set to 1
+ - #size-cells: should be set to 1
+
+Example for an AM33xx board:
+
+ gpmc: gpmc@50000000 {
+ compatible = "ti,am3352-gpmc";
+ ti,hwmods = "gpmc";
+ reg = <0x50000000 0x1000000>;
+ interrupts = <100>;
+ gpmc,num-cs = <8>;
+ gpmc,num-waitpins = <2>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x08000000 0x2000>; /* CS0: NAND */
+ elm_id = <&elm>;
+
+ nand@0,0 {
+ reg = <0 0 0>; /* CS0, offset 0 */
+ nand-bus-width = <16>;
+ ti,nand-ecc-opt = "bch8";
+ ti,nand-xfer-type = "polled";
+
+ gpmc,sync-clk-ps = <0>;
+ gpmc,cs-on-ns = <0>;
+ gpmc,cs-rd-off-ns = <44>;
+ gpmc,cs-wr-off-ns = <44>;
+ gpmc,adv-on-ns = <6>;
+ gpmc,adv-rd-off-ns = <34>;
+ gpmc,adv-wr-off-ns = <44>;
+ gpmc,we-off-ns = <40>;
+ gpmc,oe-off-ns = <54>;
+ gpmc,access-ns = <64>;
+ gpmc,rd-cycle-ns = <82>;
+ gpmc,wr-cycle-ns = <82>;
+ gpmc,wr-access-ns = <40>;
+ gpmc,wr-data-mux-bus-ns = <0>;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* partitions go here */
+ };
+ };
+
+How to select correct ECC scheme for your device ?
+--------------------------------------------------
+Higher ECC scheme usually means better protection against bit-flips and
+increased system lifetime. However, selection of ECC scheme is dependent
+on various other factors also like;
+
+(1) support of built in hardware engines.
+ Some legacy OMAP SoC do not have ELM harware engine, so those SoC cannot
+ support ecc-schemes with hardware error-correction (BCHx_HW). However
+ such SoC can use ecc-schemes with software library for error-correction
+ (BCHx_HW_DETECTION_SW). The error correction capability with software
+ library remains equivalent to their hardware counter-part, but there is
+ slight CPU penalty when too many bit-flips are detected during reads.
+
+(2) Device parameters like OOBSIZE.
+ Other factor which governs the selection of ecc-scheme is oob-size.
+ Higher ECC schemes require more OOB/Spare area to store ECC syndrome,
+ so the device should have enough free bytes available its OOB/Spare
+ area to accommodate ECC for entire page. In general following expression
+ helps in determining if given device can accommodate ECC syndrome:
+ "2 + (PAGESIZE / 512) * ECC_BYTES" >= OOBSIZE"
+ where
+ OOBSIZE number of bytes in OOB/spare area
+ PAGESIZE number of bytes in main-area of device page
+ ECC_BYTES number of ECC bytes generated to protect
+ 512 bytes of data, which is:
+ '3' for HAM1_xx ecc schemes
+ '7' for BCH4_xx ecc schemes
+ '14' for BCH8_xx ecc schemes
+ '26' for BCH16_xx ecc schemes
+
+ Example(a): For a device with PAGESIZE = 2048 and OOBSIZE = 64 and
+ trying to use BCH16 (ECC_BYTES=26) ecc-scheme.
+ Number of ECC bytes per page = (2 + (2048 / 512) * 26) = 106 B
+ which is greater than capacity of NAND device (OOBSIZE=64)
+ Hence, BCH16 cannot be supported on given device. But it can
+ probably use lower ecc-schemes like BCH8.
+
+ Example(b): For a device with PAGESIZE = 2048 and OOBSIZE = 128 and
+ trying to use BCH16 (ECC_BYTES=26) ecc-scheme.
+ Number of ECC bytes per page = (2 + (2048 / 512) * 26) = 106 B
+ which can be accommodated in the OOB/Spare area of this device
+ (OOBSIZE=128). So this device can use BCH16 ecc-scheme.
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-nor.txt b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
new file mode 100644
index 000000000..4828c17bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpmc-nor.txt
@@ -0,0 +1,98 @@
+Device tree bindings for NOR flash connect to TI GPMC
+
+NOR flash connected to the TI GPMC (found on OMAP boards) are represented as
+child nodes of the GPMC controller with a name of "nor".
+
+All timing relevant properties as well as generic GPMC child properties are
+explained in a separate documents. Please refer to
+Documentation/devicetree/bindings/bus/ti-gpmc.txt
+
+Required properties:
+- bank-width: Width of NOR flash in bytes. GPMC supports 8-bit and
+ 16-bit devices and so must be either 1 or 2 bytes.
+- compatible: Documentation/devicetree/bindings/mtd/mtd-physmap.txt
+- gpmc,cs-on-ns: Chip-select assertion time
+- gpmc,cs-rd-off-ns: Chip-select de-assertion time for reads
+- gpmc,cs-wr-off-ns: Chip-select de-assertion time for writes
+- gpmc,oe-on-ns: Output-enable assertion time
+- gpmc,oe-off-ns: Output-enable de-assertion time
+- gpmc,we-on-ns Write-enable assertion time
+- gpmc,we-off-ns: Write-enable de-assertion time
+- gpmc,access-ns: Start cycle to first data capture (read access)
+- gpmc,rd-cycle-ns: Total read cycle time
+- gpmc,wr-cycle-ns: Total write cycle time
+- linux,mtd-name: Documentation/devicetree/bindings/mtd/mtd-physmap.txt
+- reg: Chip-select, base address (relative to chip-select)
+ and size of NOR flash. Note that base address will be
+ typically 0 as this is the start of the chip-select.
+
+Optional properties:
+- gpmc,XXX Additional GPMC timings and settings parameters. See
+ Documentation/devicetree/bindings/bus/ti-gpmc.txt
+
+Optional properties for partition table parsing:
+- #address-cells: should be set to 1
+- #size-cells: should be set to 1
+
+Example:
+
+gpmc: gpmc@6e000000 {
+ compatible = "ti,omap3430-gpmc", "simple-bus";
+ ti,hwmods = "gpmc";
+ reg = <0x6e000000 0x1000>;
+ interrupts = <20>;
+ gpmc,num-cs = <8>;
+ gpmc,num-waitpins = <4>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+
+ ranges = <0 0 0x10000000 0x08000000>;
+
+ nor@0,0 {
+ compatible = "cfi-flash";
+ linux,mtd-name= "intel,pf48f6000m0y1be";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0 0 0x08000000>;
+ bank-width = <2>;
+
+ gpmc,mux-add-data;
+ gpmc,cs-on-ns = <0>;
+ gpmc,cs-rd-off-ns = <186>;
+ gpmc,cs-wr-off-ns = <186>;
+ gpmc,adv-on-ns = <12>;
+ gpmc,adv-rd-off-ns = <48>;
+ gpmc,adv-wr-off-ns = <48>;
+ gpmc,oe-on-ns = <54>;
+ gpmc,oe-off-ns = <168>;
+ gpmc,we-on-ns = <54>;
+ gpmc,we-off-ns = <168>;
+ gpmc,rd-cycle-ns = <186>;
+ gpmc,wr-cycle-ns = <186>;
+ gpmc,access-ns = <114>;
+ gpmc,page-burst-access-ns = <6>;
+ gpmc,bus-turnaround-ns = <12>;
+ gpmc,cycle2cycle-delay-ns = <18>;
+ gpmc,wr-data-mux-bus-ns = <90>;
+ gpmc,wr-access-ns = <186>;
+ gpmc,cycle2cycle-samecsen;
+ gpmc,cycle2cycle-diffcsen;
+
+ partition@0 {
+ label = "bootloader-nor";
+ reg = <0 0x40000>;
+ };
+ partition@0x40000 {
+ label = "params-nor";
+ reg = <0x40000 0x40000>;
+ };
+ partition@0x80000 {
+ label = "kernel-nor";
+ reg = <0x80000 0x200000>;
+ };
+ partition@0x280000 {
+ label = "filesystem-nor";
+ reg = <0x240000 0x7d80000>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
new file mode 100644
index 000000000..5d8fa527c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpmc-onenand.txt
@@ -0,0 +1,46 @@
+Device tree bindings for GPMC connected OneNANDs
+
+GPMC connected OneNAND (found on OMAP boards) are represented as child nodes of
+the GPMC controller with a name of "onenand".
+
+All timing relevant properties as well as generic gpmc child properties are
+explained in a separate documents - please refer to
+Documentation/devicetree/bindings/bus/ti-gpmc.txt
+
+Required properties:
+
+ - reg: The CS line the peripheral is connected to
+ - gpmc,device-width Width of the ONENAND device connected to the GPMC
+ in bytes. Must be 1 or 2.
+
+Optional properties:
+
+ - dma-channel: DMA Channel index
+
+For inline partition table parsing (optional):
+
+ - #address-cells: should be set to 1
+ - #size-cells: should be set to 1
+
+Example for an OMAP3430 board:
+
+ gpmc: gpmc@6e000000 {
+ compatible = "ti,omap3430-gpmc";
+ ti,hwmods = "gpmc";
+ reg = <0x6e000000 0x1000000>;
+ interrupts = <20>;
+ gpmc,num-cs = <8>;
+ gpmc,num-waitpins = <4>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+
+ onenand@0 {
+ reg = <0 0 0>; /* CS0, offset 0 */
+ gpmc,device-width = <2>;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* partitions go here */
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.txt b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
new file mode 100644
index 000000000..d02acaff3
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
@@ -0,0 +1,58 @@
+* Freescale General-Purpose Media Interface (GPMI)
+
+The GPMI nand controller provides an interface to control the
+NAND flash chips.
+
+Required properties:
+ - compatible : should be "fsl,<chip>-gpmi-nand"
+ - reg : should contain registers location and length for gpmi and bch.
+ - reg-names: Should contain the reg names "gpmi-nand" and "bch"
+ - interrupts : BCH interrupt number.
+ - interrupt-names : Should be "bch".
+ - dmas: DMA specifier, consisting of a phandle to DMA controller node
+ and GPMI DMA channel ID.
+ Refer to dma.txt and fsl-mxs-dma.txt for details.
+ - dma-names: Must be "rx-tx".
+
+Optional properties:
+ - nand-on-flash-bbt: boolean to enable on flash bbt option if not
+ present false
+ - fsl,use-minimum-ecc: Protect this NAND flash with the minimum ECC
+ strength required. The required ECC strength is
+ automatically discoverable for some flash
+ (e.g., according to the ONFI standard).
+ However, note that if this strength is not
+ discoverable or this property is not enabled,
+ the software may chooses an implementation-defined
+ ECC scheme.
+ - fsl,no-blockmark-swap: Don't swap the bad block marker from the OOB
+ area with the byte in the data area but rely on the
+ flash based BBT for identifying bad blocks.
+ NOTE: this is only valid in conjunction with
+ 'nand-on-flash-bbt'.
+ WARNING: on i.MX28 blockmark swapping cannot be
+ disabled for the BootROM in the FCB. Thus,
+ partitions written from Linux with this feature
+ turned on may not be accessible by the BootROM
+ code.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Examples:
+
+gpmi-nand@8000c000 {
+ compatible = "fsl,imx28-gpmi-nand";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x8000c000 2000>, <0x8000a000 2000>;
+ reg-names = "gpmi-nand", "bch";
+ interrupts = <41>;
+ interrupt-names = "bch";
+ dmas = <&dma_apbh 4>;
+ dma-names = "rx-tx";
+
+ partition@0 {
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/hisi504-nand.txt b/Documentation/devicetree/bindings/mtd/hisi504-nand.txt
new file mode 100644
index 000000000..2e35f0662
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/hisi504-nand.txt
@@ -0,0 +1,47 @@
+Hisilicon Hip04 Soc NAND controller DT binding
+
+Required properties:
+
+- compatible: Should be "hisilicon,504-nfc".
+- reg: The first contains base physical address and size of
+ NAND controller's registers. The second contains base
+ physical address and size of NAND controller's buffer.
+- interrupts: Interrupt number for nfc.
+- nand-bus-width: See nand.txt.
+- nand-ecc-mode: Support none and hw ecc mode.
+- #address-cells: Partition address, should be set 1.
+- #size-cells: Partition size, should be set 1.
+
+Optional properties:
+
+- nand-ecc-strength: Number of bits to correct per ECC step.
+- nand-ecc-step-size: Number of data bytes covered by a single ECC step.
+
+The following ECC strength and step size are currently supported:
+
+ - nand-ecc-strength = <16>, nand-ecc-step-size = <1024>
+
+Flash chip may optionally contain additional sub-nodes describing partitions of
+the address space. See partition.txt for more detail.
+
+Example:
+
+ nand: nand@4020000 {
+ compatible = "hisilicon,504-nfc";
+ reg = <0x4020000 0x10000>, <0x5000000 0x1000>;
+ interrupts = <0 379 4>;
+ nand-bus-width = <8>;
+ nand-ecc-mode = "hw";
+ nand-ecc-strength = <16>;
+ nand-ecc-step-size = <1024>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "nand_text";
+ reg = <0x00000000 0x00400000>;
+ };
+
+ ...
+
+ };
diff --git a/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
new file mode 100644
index 000000000..2bee68103
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/jedec,spi-nor.txt
@@ -0,0 +1,32 @@
+* MTD SPI driver for ST M25Pxx (and similar) serial flash chips
+
+Required properties:
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+ representing partitions.
+- compatible : May include a device-specific string consisting of the
+ manufacturer and name of the chip. Bear in mind the DT binding
+ is not Linux-only, but in case of Linux, see the "m25p_ids"
+ table in drivers/mtd/devices/m25p80.c for the list of supported
+ chips.
+ Must also include "jedec,spi-nor" for any SPI NOR flash that can
+ be identified by the JEDEC READ ID opcode (0x9F).
+- reg : Chip-Select number
+- spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at
+
+Optional properties:
+- m25p,fast-read : Use the "fast read" opcode to read data from the chip instead
+ of the usual "read" opcode. This opcode is not supported by
+ all chips and support for it can not be detected at runtime.
+ Refer to your chips' datasheet to check if this is supported
+ by your chip.
+
+Example:
+
+ flash: m25p80@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "spansion,m25p80", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <40000000>;
+ m25p,fast-read;
+ };
diff --git a/Documentation/devicetree/bindings/mtd/lpc32xx-mlc.txt b/Documentation/devicetree/bindings/mtd/lpc32xx-mlc.txt
new file mode 100644
index 000000000..d0a37252e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/lpc32xx-mlc.txt
@@ -0,0 +1,50 @@
+NXP LPC32xx SoC NAND MLC controller
+
+Required properties:
+- compatible: "nxp,lpc3220-mlc"
+- reg: Address and size of the controller
+- interrupts: The NAND interrupt specification
+- gpios: GPIO specification for NAND write protect
+
+The following required properties are very controller specific. See the LPC32xx
+User Manual 7.5.14 MLC NAND Timing Register (the values here are specified in
+Hz, to make them independent of actual clock speed and to provide for good
+accuracy:)
+- nxp,tcea_delay: TCEA_DELAY
+- nxp,busy_delay: BUSY_DELAY
+- nxp,nand_ta: NAND_TA
+- nxp,rd_high: RD_HIGH
+- nxp,rd_low: RD_LOW
+- nxp,wr_high: WR_HIGH
+- nxp,wr_low: WR_LOW
+
+Optional subnodes:
+- Partitions, see Documentation/devicetree/bindings/mtd/partition.txt
+
+Example:
+
+ mlc: flash@200A8000 {
+ compatible = "nxp,lpc3220-mlc";
+ reg = <0x200A8000 0x11000>;
+ interrupts = <11 0>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ nxp,tcea-delay = <333333333>;
+ nxp,busy-delay = <10000000>;
+ nxp,nand-ta = <18181818>;
+ nxp,rd-high = <31250000>;
+ nxp,rd-low = <45454545>;
+ nxp,wr-high = <40000000>;
+ nxp,wr-low = <83333333>;
+ gpios = <&gpio 5 19 1>; /* GPO_P3 19, active low */
+
+ mtd0@00000000 {
+ label = "boot";
+ reg = <0x00000000 0x00064000>;
+ read-only;
+ };
+
+ ...
+
+ };
diff --git a/Documentation/devicetree/bindings/mtd/lpc32xx-slc.txt b/Documentation/devicetree/bindings/mtd/lpc32xx-slc.txt
new file mode 100644
index 000000000..d94edc0fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/lpc32xx-slc.txt
@@ -0,0 +1,52 @@
+NXP LPC32xx SoC NAND SLC controller
+
+Required properties:
+- compatible: "nxp,lpc3220-slc"
+- reg: Address and size of the controller
+- nand-on-flash-bbt: Use bad block table on flash
+- gpios: GPIO specification for NAND write protect
+
+The following required properties are very controller specific. See the LPC32xx
+User Manual:
+- nxp,wdr-clks: Delay before Ready signal is tested on write (W_RDY)
+- nxp,rdr-clks: Delay before Ready signal is tested on read (R_RDY)
+(The following values are specified in Hz, to make them independent of actual
+clock speed:)
+- nxp,wwidth: Write pulse width (W_WIDTH)
+- nxp,whold: Write hold time (W_HOLD)
+- nxp,wsetup: Write setup time (W_SETUP)
+- nxp,rwidth: Read pulse width (R_WIDTH)
+- nxp,rhold: Read hold time (R_HOLD)
+- nxp,rsetup: Read setup time (R_SETUP)
+
+Optional subnodes:
+- Partitions, see Documentation/devicetree/bindings/mtd/partition.txt
+
+Example:
+
+ slc: flash@20020000 {
+ compatible = "nxp,lpc3220-slc";
+ reg = <0x20020000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ nxp,wdr-clks = <14>;
+ nxp,wwidth = <40000000>;
+ nxp,whold = <100000000>;
+ nxp,wsetup = <100000000>;
+ nxp,rdr-clks = <14>;
+ nxp,rwidth = <40000000>;
+ nxp,rhold = <66666666>;
+ nxp,rsetup = <100000000>;
+ nand-on-flash-bbt;
+ gpios = <&gpio 5 19 1>; /* GPO_P3 19, active low */
+
+ mtd0@00000000 {
+ label = "phy3250-boot";
+ reg = <0x00000000 0x00064000>;
+ read-only;
+ };
+
+ ...
+
+ };
diff --git a/Documentation/devicetree/bindings/mtd/mtd-physmap.txt b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
new file mode 100644
index 000000000..4a0a48bf4
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/mtd-physmap.txt
@@ -0,0 +1,89 @@
+CFI or JEDEC memory-mapped NOR flash, MTD-RAM (NVRAM...)
+
+Flash chips (Memory Technology Devices) are often used for solid state
+file systems on embedded devices.
+
+ - compatible : should contain the specific model of mtd chip(s)
+ used, if known, followed by either "cfi-flash", "jedec-flash",
+ "mtd-ram" or "mtd-rom".
+ - reg : Address range(s) of the mtd chip(s)
+ It's possible to (optionally) define multiple "reg" tuples so that
+ non-identical chips can be described in one node.
+ - bank-width : Width (in bytes) of the bank. Equal to the
+ device width times the number of interleaved chips.
+ - device-width : (optional) Width of a single mtd chip. If
+ omitted, assumed to be equal to 'bank-width'.
+ - #address-cells, #size-cells : Must be present if the device has
+ sub-nodes representing partitions (see below). In this case
+ both #address-cells and #size-cells must be equal to 1.
+ - no-unaligned-direct-access: boolean to disable the default direct
+ mapping of the flash.
+ On some platforms (e.g. MPC5200) a direct 1:1 mapping may cause
+ problems with JFFS2 usage, as the local bus (LPB) doesn't support
+ unaligned accesses as implemented in the JFFS2 code via memcpy().
+ By defining "no-unaligned-direct-access", the flash will not be
+ exposed directly to the MTD users (e.g. JFFS2) any more.
+ - linux,mtd-name: allow to specify the mtd name for retro capability with
+ physmap-flash drivers as boot loader pass the mtd partition via the old
+ device name physmap-flash.
+ - use-advanced-sector-protection: boolean to enable support for the
+ advanced sector protection (Spansion: PPB - Persistent Protection
+ Bits) locking.
+
+For JEDEC compatible devices, the following additional properties
+are defined:
+
+ - vendor-id : Contains the flash chip's vendor id (1 byte).
+ - device-id : Contains the flash chip's device id (1 byte).
+
+For ROM compatible devices (and ROM fallback from cfi-flash), the following
+additional (optional) property is defined:
+
+ - erase-size : The chip's physical erase block size in bytes.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+ flash@ff000000 {
+ compatible = "amd,am29lv128ml", "cfi-flash";
+ reg = <ff000000 01000000>;
+ bank-width = <4>;
+ device-width = <1>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ fs@0 {
+ label = "fs";
+ reg = <0 f80000>;
+ };
+ firmware@f80000 {
+ label ="firmware";
+ reg = <f80000 80000>;
+ read-only;
+ };
+ };
+
+Here an example with multiple "reg" tuples:
+
+ flash@f0000000,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "intel,PC48F4400P0VB", "cfi-flash";
+ reg = <0 0x00000000 0x02000000
+ 0 0x02000000 0x02000000>;
+ bank-width = <2>;
+ partition@0 {
+ label = "test-part1";
+ reg = <0 0x04000000>;
+ };
+ };
+
+An example using SRAM:
+
+ sram@2,0 {
+ compatible = "samsung,k6f1616u6a", "mtd-ram";
+ reg = <2 0 0x00200000>;
+ bank-width = <2>;
+ };
+
diff --git a/Documentation/devicetree/bindings/mtd/mxc-nand.txt b/Documentation/devicetree/bindings/mtd/mxc-nand.txt
new file mode 100644
index 000000000..b5833d11c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/mxc-nand.txt
@@ -0,0 +1,19 @@
+* Freescale's mxc_nand
+
+Required properties:
+- compatible: "fsl,imxXX-nand"
+- reg: address range of the nfc block
+- interrupts: irq to be used
+- nand-bus-width: see nand.txt
+- nand-ecc-mode: see nand.txt
+- nand-on-flash-bbt: see nand.txt
+
+Example:
+
+ nand@d8000000 {
+ compatible = "fsl,imx27-nand";
+ reg = <0xd8000000 0x1000>;
+ interrupts = <29>;
+ nand-bus-width = <8>;
+ nand-ecc-mode = "hw";
+ };
diff --git a/Documentation/devicetree/bindings/mtd/nand.txt b/Documentation/devicetree/bindings/mtd/nand.txt
new file mode 100644
index 000000000..b53f92e25
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/nand.txt
@@ -0,0 +1,21 @@
+* MTD generic binding
+
+- nand-ecc-mode : String, operation mode of the NAND ecc mode.
+ Supported values are: "none", "soft", "hw", "hw_syndrome", "hw_oob_first",
+ "soft_bch".
+- nand-bus-width : 8 or 16 bus width if not present 8
+- nand-on-flash-bbt: boolean to enable on flash bbt option if not present false
+
+- nand-ecc-strength: integer representing the number of bits to correct
+ per ECC step.
+
+- nand-ecc-step-size: integer representing the number of data bytes
+ that are covered by a single ECC step.
+
+The ECC strength and ECC step size properties define the correction capability
+of a controller. Together, they say a controller can correct "{strength} bit
+errors per {size} bytes".
+
+The interpretation of these parameters is implementation-defined, so not all
+implementations must support all possible combinations. However, implementations
+are encouraged to further specify the value(s) they support.
diff --git a/Documentation/devicetree/bindings/mtd/orion-nand.txt b/Documentation/devicetree/bindings/mtd/orion-nand.txt
new file mode 100644
index 000000000..2d6ab660e
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/orion-nand.txt
@@ -0,0 +1,50 @@
+NAND support for Marvell Orion SoC platforms
+
+Required properties:
+- compatible : "marvell,orion-nand".
+- reg : Base physical address of the NAND and length of memory mapped
+ region
+
+Optional properties:
+- cle : Address line number connected to CLE. Default is 0
+- ale : Address line number connected to ALE. Default is 1
+- bank-width : Width in bytes of the device. Default is 1
+- chip-delay : Chip dependent delay for transferring data from array to read
+ registers in usecs
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Example:
+
+nand@f4000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cle = <0>;
+ ale = <1>;
+ bank-width = <1>;
+ chip-delay = <25>;
+ compatible = "marvell,orion-nand";
+ reg = <0xf4000000 0x400>;
+
+ partition@0 {
+ label = "u-boot";
+ reg = <0x0000000 0x100000>;
+ read-only;
+ };
+
+ partition@100000 {
+ label = "uImage";
+ reg = <0x0100000 0x200000>;
+ };
+
+ partition@300000 {
+ label = "dtb";
+ reg = <0x0300000 0x100000>;
+ };
+
+ partition@400000 {
+ label = "root";
+ reg = <0x0400000 0x7d00000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/partition.txt b/Documentation/devicetree/bindings/mtd/partition.txt
new file mode 100644
index 000000000..8e5557da1
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/partition.txt
@@ -0,0 +1,71 @@
+Representing flash partitions in devicetree
+
+Partitions can be represented by sub-nodes of an mtd device. This can be used
+on platforms which have strong conventions about which portions of a flash are
+used for what purposes, but which don't use an on-flash partition table such
+as RedBoot.
+NOTE: if the sub-node has a compatible string, then it is not a partition.
+
+#address-cells & #size-cells must both be present in the mtd device. There are
+two valid values for both:
+<1>: for partitions that require a single 32-bit cell to represent their
+ size/address (aka the value is below 4 GiB)
+<2>: for partitions that require two 32-bit cells to represent their
+ size/address (aka the value is 4 GiB or greater).
+
+Required properties:
+- reg : The partition's offset and size within the mtd bank.
+
+Optional properties:
+- label : The label / name for this partition. If omitted, the label is taken
+ from the node name (excluding the unit address).
+- read-only : This parameter, if present, is a hint to Linux that this
+ partition should only be mounted read-only. This is usually used for flash
+ partitions containing early-boot firmware images or data which should not be
+ clobbered.
+
+Examples:
+
+
+flash@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "u-boot";
+ reg = <0x0000000 0x100000>;
+ read-only;
+ };
+
+ uimage@100000 {
+ reg = <0x0100000 0x200000>;
+ };
+};
+
+flash@1 {
+ #address-cells = <1>;
+ #size-cells = <2>;
+
+ /* a 4 GiB partition */
+ partition@0 {
+ label = "filesystem";
+ reg = <0x00000000 0x1 0x00000000>;
+ };
+};
+
+flash@2 {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ /* an 8 GiB partition */
+ partition@0 {
+ label = "filesystem #1";
+ reg = <0x0 0x00000000 0x2 0x00000000>;
+ };
+
+ /* a 4 GiB partition */
+ partition@200000000 {
+ label = "filesystem #2";
+ reg = <0x2 0x00000000 0x1 0x00000000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt b/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
new file mode 100644
index 000000000..4f833e3c4
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/pxa3xx-nand.txt
@@ -0,0 +1,43 @@
+PXA3xx NAND DT bindings
+
+Required properties:
+
+ - compatible: Should be set to one of the following:
+ marvell,pxa3xx-nand
+ marvell,armada370-nand
+ - reg: The register base for the controller
+ - interrupts: The interrupt to map
+ - #address-cells: Set to <1> if the node includes partitions
+
+Optional properties:
+
+ - marvell,nand-enable-arbiter: Set to enable the bus arbiter
+ - marvell,nand-keep-config: Set to keep the NAND controller config as set
+ by the bootloader
+ - num-cs: Number of chipselect lines to use
+ - nand-on-flash-bbt: boolean to enable on flash bbt option if
+ not present false
+ - nand-ecc-strength: number of bits to correct per ECC step
+ - nand-ecc-step-size: number of data bytes covered by a single ECC step
+
+The following ECC strength and step size are currently supported:
+
+ - nand-ecc-strength = <1>, nand-ecc-step-size = <512>
+ - nand-ecc-strength = <4>, nand-ecc-step-size = <512>
+ - nand-ecc-strength = <8>, nand-ecc-step-size = <512>
+
+Example:
+
+ nand0: nand@43100000 {
+ compatible = "marvell,pxa3xx-nand";
+ reg = <0x43100000 90>;
+ interrupts = <45>;
+ #address-cells = <1>;
+
+ marvell,nand-enable-arbiter;
+ marvell,nand-keep-config;
+ num-cs = <1>;
+
+ /* partitions (optional) */
+ };
+
diff --git a/Documentation/devicetree/bindings/mtd/spear_smi.txt b/Documentation/devicetree/bindings/mtd/spear_smi.txt
new file mode 100644
index 000000000..7248aadd8
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/spear_smi.txt
@@ -0,0 +1,31 @@
+* SPEAr SMI
+
+Required properties:
+- compatible : "st,spear600-smi"
+- reg : Address range of the mtd chip
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+ representing partitions.
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the STMMAC interrupts
+- clock-rate : Functional clock rate of SMI in Hz
+
+Optional properties:
+- st,smi-fast-mode : Flash supports read in fast mode
+
+Example:
+
+ smi: flash@fc000000 {
+ compatible = "st,spear600-smi";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0xfc000000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <12>;
+ clock-rate = <50000000>; /* 50MHz */
+
+ flash@f8000000 {
+ st,smi-fast-mode;
+ ...
+ };
+ };
diff --git a/Documentation/devicetree/bindings/mtd/st-fsm.txt b/Documentation/devicetree/bindings/mtd/st-fsm.txt
new file mode 100644
index 000000000..c2489391c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/st-fsm.txt
@@ -0,0 +1,26 @@
+* ST-Microelectronics SPI FSM Serial (NOR) Flash Controller
+
+Required properties:
+ - compatible : Should be "st,spi-fsm"
+ - reg : Contains register's location and length.
+ - reg-names : Should contain the reg names "spi-fsm"
+ - interrupts : The interrupt number
+ - pinctrl-0 : Standard Pinctrl phandle (see: pinctrl/pinctrl-bindings.txt)
+
+Optional properties:
+ - st,syscfg : Phandle to boot-device system configuration registers
+ - st,boot-device-reg : Address of the aforementioned boot-device register(s)
+ - st,boot-device-spi : Expected boot-device value if booted via this device
+
+Example:
+ spifsm: spifsm@fe902000{
+ compatible = "st,spi-fsm";
+ reg = <0xfe902000 0x1000>;
+ reg-names = "spi-fsm";
+ pinctrl-0 = <&pinctrl_fsm>;
+ st,syscfg = <&syscfg_rear>;
+ st,boot-device-reg = <0x958>;
+ st,boot-device-spi = <0x1a>;
+ status = "okay";
+ };
+
diff --git a/Documentation/devicetree/bindings/mtd/sunxi-nand.txt b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
new file mode 100644
index 000000000..086d6f44c
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/sunxi-nand.txt
@@ -0,0 +1,45 @@
+Allwinner NAND Flash Controller (NFC)
+
+Required properties:
+- compatible : "allwinner,sun4i-a10-nand".
+- reg : shall contain registers location and length for data and reg.
+- interrupts : shall define the nand controller interrupt.
+- #address-cells: shall be set to 1. Encode the nand CS.
+- #size-cells : shall be set to 0.
+- clocks : shall reference nand controller clocks.
+- clock-names : nand controller internal clock names. Shall contain :
+ * "ahb" : AHB gating clock
+ * "mod" : nand controller clock
+
+Optional children nodes:
+Children nodes represent the available nand chips.
+
+Optional properties:
+- allwinner,rb : shall contain the native Ready/Busy ids.
+ or
+- rb-gpios : shall contain the gpios used as R/B pins.
+- nand-ecc-mode : one of the supported ECC modes ("hw", "hw_syndrome", "soft",
+ "soft_bch" or "none")
+
+see Documentation/devicetree/bindings/mtd/nand.txt for generic bindings.
+
+
+Examples:
+nfc: nand@01c03000 {
+ compatible = "allwinner,sun4i-a10-nand";
+ reg = <0x01c03000 0x1000>;
+ interrupts = <0 37 1>;
+ clocks = <&ahb_gates 13>, <&nand_clk>;
+ clock-names = "ahb", "mod";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&nand_pins_a &nand_cs0_pins_a &nand_rb0_pins_a>;
+ status = "okay";
+
+ nand@0 {
+ reg = <0>;
+ allwinner,rb = <0>;
+ nand-ecc-mode = "soft_bch";
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt b/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt
new file mode 100644
index 000000000..10640b17c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-emac.txt
@@ -0,0 +1,19 @@
+* Allwinner EMAC ethernet controller
+
+Required properties:
+- compatible: should be "allwinner,sun4i-a10-emac" (Deprecated:
+ "allwinner,sun4i-emac")
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device
+- phy: see ethernet.txt file in the same directory.
+- clocks: A phandle to the reference clock for this device
+
+Example:
+
+emac: ethernet@01c0b000 {
+ compatible = "allwinner,sun4i-a10-emac";
+ reg = <0x01c0b000 0x1000>;
+ interrupts = <55>;
+ clocks = <&ahb_gates 17>;
+ phy = <&phy0>;
+};
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt b/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt
new file mode 100644
index 000000000..4ec564137
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-mdio.txt
@@ -0,0 +1,27 @@
+* Allwinner A10 MDIO Ethernet Controller interface
+
+Required properties:
+- compatible: should be "allwinner,sun4i-a10-mdio"
+ (Deprecated: "allwinner,sun4i-mdio").
+- reg: address and length of the register set for the device.
+
+Optional properties:
+- phy-supply: phandle to a regulator if the PHY needs one
+
+Example at the SoC level:
+mdio@01c0b080 {
+ compatible = "allwinner,sun4i-a10-mdio";
+ reg = <0x01c0b080 0x14>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+};
+
+And at the board level:
+
+mdio@01c0b080 {
+ phy-supply = <&reg_emac_3v3>;
+
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
new file mode 100644
index 000000000..ea4d75238
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.txt
@@ -0,0 +1,27 @@
+* Allwinner GMAC ethernet controller
+
+This device is a platform glue layer for stmmac.
+Please see stmmac.txt for the other unchanged properties.
+
+Required properties:
+ - compatible: Should be "allwinner,sun7i-a20-gmac"
+ - clocks: Should contain the GMAC main clock, and tx clock
+ The tx clock type should be "allwinner,sun7i-a20-gmac-clk"
+ - clock-names: Should contain the clock names "stmmaceth",
+ and "allwinner_gmac_tx"
+
+Optional properties:
+- phy-supply: phandle to a regulator if the PHY needs one
+
+Examples:
+
+ gmac: ethernet@01c50000 {
+ compatible = "allwinner,sun7i-a20-gmac";
+ reg = <0x01c50000 0x10000>,
+ <0x01c20164 0x4>;
+ interrupts = <0 85 1>;
+ interrupt-names = "macirq";
+ clocks = <&ahb_gates 49>, <&gmac_tx>;
+ clock-names = "stmmaceth", "allwinner_gmac_tx";
+ phy-mode = "mii";
+ };
diff --git a/Documentation/devicetree/bindings/net/altera_tse.txt b/Documentation/devicetree/bindings/net/altera_tse.txt
new file mode 100644
index 000000000..a70629799
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/altera_tse.txt
@@ -0,0 +1,114 @@
+* Altera Triple-Speed Ethernet MAC driver (TSE)
+
+Required properties:
+- compatible: Should be "altr,tse-1.0" for legacy SGDMA based TSE, and should
+ be "altr,tse-msgdma-1.0" for the preferred MSGDMA based TSE.
+ ALTR is supported for legacy device trees, but is deprecated.
+ altr should be used for all new designs.
+- reg: Address and length of the register set for the device. It contains
+ the information of registers in the same order as described by reg-names
+- reg-names: Should contain the reg names
+ "control_port": MAC configuration space region
+ "tx_csr": xDMA Tx dispatcher control and status space region
+ "tx_desc": MSGDMA Tx dispatcher descriptor space region
+ "rx_csr" : xDMA Rx dispatcher control and status space region
+ "rx_desc": MSGDMA Rx dispatcher descriptor space region
+ "rx_resp": MSGDMA Rx dispatcher response space region
+ "s1": SGDMA descriptor memory
+- interrupts: Should contain the TSE interrupts and it's mode.
+- interrupt-names: Should contain the interrupt names
+ "rx_irq": xDMA Rx dispatcher interrupt
+ "tx_irq": xDMA Tx dispatcher interrupt
+- rx-fifo-depth: MAC receive FIFO buffer depth in bytes
+- tx-fifo-depth: MAC transmit FIFO buffer depth in bytes
+- phy-mode: See ethernet.txt in the same directory.
+- phy-handle: See ethernet.txt in the same directory.
+- phy-addr: See ethernet.txt in the same directory. A configuration should
+ include phy-handle or phy-addr.
+- altr,has-supplementary-unicast:
+ If present, TSE supports additional unicast addresses.
+ Otherwise additional unicast addresses are not supported.
+- altr,has-hash-multicast-filter:
+ If present, TSE supports a hash based multicast filter.
+ Otherwise, hash-based multicast filtering is not supported.
+
+- mdio device tree subnode: When the TSE has a phy connected to its local
+ mdio, there must be device tree subnode with the following
+ required properties:
+
+ - compatible: Must be "altr,tse-mdio".
+ - #address-cells: Must be <1>.
+ - #size-cells: Must be <0>.
+
+ For each phy on the mdio bus, there must be a node with the following
+ fields:
+
+ - reg: phy id used to communicate to phy.
+ - device_type: Must be "ethernet-phy".
+
+Optional properties:
+- local-mac-address: See ethernet.txt in the same directory.
+- max-frame-size: See ethernet.txt in the same directory.
+
+Example:
+
+ tse_sub_0_eth_tse_0: ethernet@0x1,00000000 {
+ compatible = "altr,tse-msgdma-1.0";
+ reg = <0x00000001 0x00000000 0x00000400>,
+ <0x00000001 0x00000460 0x00000020>,
+ <0x00000001 0x00000480 0x00000020>,
+ <0x00000001 0x000004A0 0x00000008>,
+ <0x00000001 0x00000400 0x00000020>,
+ <0x00000001 0x00000420 0x00000020>;
+ reg-names = "control_port", "rx_csr", "rx_desc", "rx_resp", "tx_csr", "tx_desc";
+ interrupt-parent = <&hps_0_arm_gic_0>;
+ interrupts = <0 41 4>, <0 40 4>;
+ interrupt-names = "rx_irq", "tx_irq";
+ rx-fifo-depth = <2048>;
+ tx-fifo-depth = <2048>;
+ address-bits = <48>;
+ max-frame-size = <1500>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ phy-mode = "gmii";
+ altr,has-supplementary-unicast;
+ altr,has-hash-multicast-filter;
+ phy-handle = <&phy0>;
+ mdio {
+ compatible = "altr,tse-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <0x0>;
+ device_type = "ethernet-phy";
+ };
+
+ phy1: ethernet-phy@1 {
+ reg = <0x1>;
+ device_type = "ethernet-phy";
+ };
+
+ };
+ };
+
+ tse_sub_1_eth_tse_0: ethernet@0x1,00001000 {
+ compatible = "altr,tse-msgdma-1.0";
+ reg = <0x00000001 0x00001000 0x00000400>,
+ <0x00000001 0x00001460 0x00000020>,
+ <0x00000001 0x00001480 0x00000020>,
+ <0x00000001 0x000014A0 0x00000008>,
+ <0x00000001 0x00001400 0x00000020>,
+ <0x00000001 0x00001420 0x00000020>;
+ reg-names = "control_port", "rx_csr", "rx_desc", "rx_resp", "tx_csr", "tx_desc";
+ interrupt-parent = <&hps_0_arm_gic_0>;
+ interrupts = <0 43 4>, <0 42 4>;
+ interrupt-names = "rx_irq", "tx_irq";
+ rx-fifo-depth = <2048>;
+ tx-fifo-depth = <2048>;
+ address-bits = <48>;
+ max-frame-size = <1500>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ phy-mode = "gmii";
+ altr,has-supplementary-unicast;
+ altr,has-hash-multicast-filter;
+ phy-handle = <&phy1>;
+ };
diff --git a/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt
new file mode 100644
index 000000000..8db32384a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/amd-xgbe-phy.txt
@@ -0,0 +1,48 @@
+* AMD 10GbE PHY driver (amd-xgbe-phy)
+
+Required properties:
+- compatible: Should be "amd,xgbe-phy-seattle-v1a" and
+ "ethernet-phy-ieee802.3-c45"
+- reg: Address and length of the register sets for the device
+ - SerDes Rx/Tx registers
+ - SerDes integration registers (1/2)
+ - SerDes integration registers (2/2)
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the amd-xgbe-phy interrupt.
+
+Optional properties:
+- amd,speed-set: Speed capabilities of the device
+ 0 - 1GbE and 10GbE (default)
+ 1 - 2.5GbE and 10GbE
+
+The following optional properties are represented by an array with each
+value corresponding to a particular speed. The first array value represents
+the setting for the 1GbE speed, the second value for the 2.5GbE speed and
+the third value for the 10GbE speed. All three values are required if the
+property is used.
+- amd,serdes-blwc: Baseline wandering correction enablement
+ 0 - Off
+ 1 - On
+- amd,serdes-cdr-rate: CDR rate speed selection
+- amd,serdes-pq-skew: PQ (data sampling) skew
+- amd,serdes-tx-amp: TX amplitude boost
+- amd,serdes-dfe-tap-config: DFE taps available to run
+- amd,serdes-dfe-tap-enable: DFE taps to enable
+
+Example:
+ xgbe_phy@e1240800 {
+ compatible = "amd,xgbe-phy-seattle-v1a", "ethernet-phy-ieee802.3-c45";
+ reg = <0 0xe1240800 0 0x00400>,
+ <0 0xe1250000 0 0x00060>,
+ <0 0xe1250080 0 0x00004>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 323 4>;
+ amd,speed-set = <0>;
+ amd,serdes-blwc = <1>, <1>, <0>;
+ amd,serdes-cdr-rate = <2>, <2>, <7>;
+ amd,serdes-pq-skew = <10>, <10>, <30>;
+ amd,serdes-tx-amp = <15>, <15>, <10>;
+ amd,serdes-dfe-tap-config = <3>, <3>, <1>;
+ amd,serdes-dfe-tap-enable = <0>, <0>, <127>;
+ };
diff --git a/Documentation/devicetree/bindings/net/amd-xgbe.txt b/Documentation/devicetree/bindings/net/amd-xgbe.txt
new file mode 100644
index 000000000..26efd526d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/amd-xgbe.txt
@@ -0,0 +1,47 @@
+* AMD 10GbE driver (amd-xgbe)
+
+Required properties:
+- compatible: Should be "amd,xgbe-seattle-v1a"
+- reg: Address and length of the register sets for the device
+ - MAC registers
+ - PCS registers
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the amd-xgbe interrupt(s). The first interrupt
+ listed is required and is the general device interrupt. If the optional
+ amd,per-channel-interrupt property is specified, then one additional
+ interrupt for each DMA channel supported by the device should be specified
+- clocks:
+ - DMA clock for the amd-xgbe device (used for calculating the
+ correct Rx interrupt watchdog timer value on a DMA channel
+ for coalescing)
+ - PTP clock for the amd-xgbe device
+- clock-names: Should be the names of the clocks
+ - "dma_clk" for the DMA clock
+ - "ptp_clk" for the PTP clock
+- phy-handle: See ethernet.txt file in the same directory
+- phy-mode: See ethernet.txt file in the same directory
+
+Optional properties:
+- mac-address: mac address to be assigned to the device. Can be overridden
+ by UEFI.
+- dma-coherent: Present if dma operations are coherent
+- amd,per-channel-interrupt: Indicates that Rx and Tx complete will generate
+ a unique interrupt for each DMA channel - this requires an additional
+ interrupt be configured for each DMA channel
+
+Example:
+ xgbe@e0700000 {
+ compatible = "amd,xgbe-seattle-v1a";
+ reg = <0 0xe0700000 0 0x80000>,
+ <0 0xe0780000 0 0x80000>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 325 4>,
+ <0 326 1>, <0 327 1>, <0 328 1>, <0 329 1>;
+ amd,per-channel-interrupt;
+ clocks = <&xgbe_dma_clk>, <&xgbe_ptp_clk>;
+ clock-names = "dma_clk", "ptp_clk";
+ phy-handle = <&phy>;
+ phy-mode = "xgmii";
+ mac-address = [ 02 a1 a2 a3 a4 a5 ];
+ };
diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
new file mode 100644
index 000000000..f55aa280d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt
@@ -0,0 +1,76 @@
+APM X-Gene SoC Ethernet nodes
+
+Ethernet nodes are defined to describe on-chip ethernet interfaces in
+APM X-Gene SoC.
+
+Required properties for all the ethernet interfaces:
+- compatible: Should state binding information from the following list,
+ - "apm,xgene-enet": RGMII based 1G interface
+ - "apm,xgene1-sgenet": SGMII based 1G interface
+ - "apm,xgene1-xgenet": XFI based 10G interface
+- reg: Address and length of the register set for the device. It contains the
+ information of registers in the same order as described by reg-names
+- reg-names: Should contain the register set names
+ - "enet_csr": Ethernet control and status register address space
+ - "ring_csr": Descriptor ring control and status register address space
+ - "ring_cmd": Descriptor ring command register address space
+- interrupts: Two interrupt specifiers can be specified.
+ - First is the Rx interrupt. This irq is mandatory.
+ - Second is the Tx completion interrupt.
+ This is supported only on SGMII based 1GbE and 10GbE interfaces.
+- port-id: Port number (0 or 1)
+- clocks: Reference to the clock entry.
+- local-mac-address: MAC address assigned to this device
+- phy-connection-type: Interface type between ethernet device and PHY device
+
+Required properties for ethernet interfaces that have external PHY:
+- phy-handle: Reference to a PHY node connected to this device
+
+- mdio: Device tree subnode with the following required properties:
+ - compatible: Must be "apm,xgene-mdio".
+ - #address-cells: Must be <1>.
+ - #size-cells: Must be <0>.
+
+ For the phy on the mdio bus, there must be a node with the following fields:
+ - compatible: PHY identifier. Please refer ./phy.txt for the format.
+ - reg: The ID number for the phy.
+
+Optional properties:
+- status: Should be "ok" or "disabled" for enabled/disabled. Default is "ok".
+
+Example:
+ menetclk: menetclk {
+ compatible = "apm,xgene-device-clock";
+ clock-output-names = "menetclk";
+ status = "ok";
+ };
+
+ menet: ethernet@17020000 {
+ compatible = "apm,xgene-enet";
+ status = "disabled";
+ reg = <0x0 0x17020000 0x0 0xd100>,
+ <0x0 0X17030000 0x0 0X400>,
+ <0x0 0X10000000 0x0 0X200>;
+ reg-names = "enet_csr", "ring_csr", "ring_cmd";
+ interrupts = <0x0 0x3c 0x4>;
+ port-id = <0>;
+ clocks = <&menetclk 0>;
+ local-mac-address = [00 01 73 00 00 01];
+ phy-connection-type = "rgmii";
+ phy-handle = <&menetphy>;
+ mdio {
+ compatible = "apm,xgene-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ menetphy: menetphy@3 {
+ compatible = "ethernet-phy-id001c.c915";
+ reg = <0x3>;
+ };
+
+ };
+ };
+
+/* Board-specific peripheral configurations */
+&menet {
+ status = "ok";
+};
diff --git a/Documentation/devicetree/bindings/net/arc_emac.txt b/Documentation/devicetree/bindings/net/arc_emac.txt
new file mode 100644
index 000000000..a1d71eb43
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/arc_emac.txt
@@ -0,0 +1,39 @@
+* Synopsys ARC EMAC 10/100 Ethernet driver (EMAC)
+
+Required properties:
+- compatible: Should be "snps,arc-emac"
+- reg: Address and length of the register set for the device
+- interrupts: Should contain the EMAC interrupts
+- max-speed: see ethernet.txt file in the same directory.
+- phy: see ethernet.txt file in the same directory.
+
+Clock handling:
+The clock frequency is needed to calculate and set polling period of EMAC.
+It must be provided by one of:
+- clock-frequency: CPU frequency.
+- clocks: reference to the clock supplying the EMAC.
+
+Child nodes of the driver are the individual PHY devices connected to the
+MDIO bus. They must have a "reg" property given the PHY address on the MDIO bus.
+
+Examples:
+
+ ethernet@c0fc2000 {
+ compatible = "snps,arc-emac";
+ reg = <0xc0fc2000 0x3c>;
+ interrupts = <6>;
+ mac-address = [ 00 11 22 33 44 55 ];
+
+ clock-frequency = <80000000>;
+ /* or */
+ clocks = <&emac_clock>;
+
+ max-speed = <100>;
+ phy = <&phy0>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
new file mode 100644
index 000000000..30d487597
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt
@@ -0,0 +1,78 @@
+* Broadcom Starfighter 2 integrated swich
+
+Required properties:
+
+- compatible: should be "brcm,bcm7445-switch-v4.0"
+- reg: addresses and length of the register sets for the device, must be 6
+ pairs of register addresses and lengths
+- interrupts: interrupts for the devices, must be two interrupts
+- dsa,mii-bus: phandle to the MDIO bus controller, see dsa/dsa.txt
+- dsa,ethernet: phandle to the CPU network interface controller, see dsa/dsa.txt
+- #size-cells: must be 0
+- #address-cells: must be 2, see dsa/dsa.txt
+
+Subnodes:
+
+The integrated switch subnode should be specified according to the binding
+described in dsa/dsa.txt.
+
+Optional properties:
+
+- reg-names: litteral names for the device base register addresses, when present
+ must be: "core", "reg", "intrl2_0", "intrl2_1", "fcb", "acb"
+
+- interrupt-names: litternal names for the device interrupt lines, when present
+ must be: "switch_0" and "switch_1"
+
+- brcm,num-gphy: specify the maximum number of integrated gigabit PHYs in the
+ switch
+
+- brcm,num-rgmii-ports: specify the maximum number of RGMII interfaces supported
+ by the switch
+
+- brcm,fcb-pause-override: boolean property, if present indicates that the switch
+ supports Failover Control Block pause override capability
+
+- brcm,acb-packets-inflight: boolean property, if present indicates that the switch
+ Admission Control Block supports reporting the number of packets in-flight in a
+ switch queue
+
+Example:
+
+switch_top@f0b00000 {
+ compatible = "simple-bus";
+ #size-cells = <1>;
+ #address-cells = <1>;
+ ranges = <0 0xf0b00000 0x40804>;
+
+ ethernet_switch@0 {
+ compatible = "brcm,bcm7445-switch-v4.0";
+ #size-cells = <0>;
+ #address-cells = <2>;
+ reg = <0x0 0x40000
+ 0x40000 0x110
+ 0x40340 0x30
+ 0x40380 0x30
+ 0x40400 0x34
+ 0x40600 0x208>;
+ interrupts = <0 0x18 0
+ 0 0x19 0>;
+ brcm,num-gphy = <1>;
+ brcm,num-rgmii-ports = <2>;
+ brcm,fcb-pause-override;
+ brcm,acb-packets-inflight;
+
+ ...
+ switch@0 {
+ reg = <0 0>;
+ #size-cells = <0>;
+ #address-cells <1>;
+
+ port@0 {
+ label = "gphy";
+ reg = <0>;
+ };
+ ...
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
new file mode 100644
index 000000000..451fef26b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.txt
@@ -0,0 +1,121 @@
+* Broadcom BCM7xxx Ethernet Controller (GENET)
+
+Required properties:
+- compatible: should contain one of "brcm,genet-v1", "brcm,genet-v2",
+ "brcm,genet-v3", "brcm,genet-v4".
+- reg: address and length of the register set for the device
+- interrupts: must be two cells, the first cell is the general purpose
+ interrupt line, while the second cell is the interrupt for the ring
+ RX and TX queues operating in ring mode
+- phy-mode: see ethernet.txt file in the same directory
+- #address-cells: should be 1
+- #size-cells: should be 1
+
+Optional properties:
+- clocks: When provided, must be two phandles to the functional clocks nodes
+ of the GENET block. The first phandle is the main GENET clock used during
+ normal operation, while the second phandle is the Wake-on-LAN clock.
+- clock-names: When provided, names of the functional clock phandles, first
+ name should be "enet" and second should be "enet-wol".
+
+- phy-handle: See ethernet.txt file in the same directory; used to describe
+ configurations where a PHY (internal or external) is used.
+
+- fixed-link: When the GENET interface is connected to a MoCA hardware block or
+ when operating in a RGMII to RGMII type of connection, or when the MDIO bus is
+ voluntarily disabled, this property should be used to describe the "fixed link".
+ See Documentation/devicetree/bindings/net/fixed-link.txt for information on
+ the property specifics
+
+Required child nodes:
+
+- mdio bus node: this node should always be present regarless of the PHY
+ configuration of the GENET instance
+
+MDIO bus node required properties:
+
+- compatible: should contain one of "brcm,genet-mdio-v1", "brcm,genet-mdio-v2"
+ "brcm,genet-mdio-v3", "brcm,genet-mdio-v4", the version has to match the
+ parent node compatible property (e.g: brcm,genet-v4 pairs with
+ brcm,genet-mdio-v4)
+- reg: address and length relative to the parent node base register address
+- #address-cells: address cell for MDIO bus addressing, should be 1
+- #size-cells: size of the cells for MDIO bus addressing, should be 0
+
+Ethernet PHY node properties:
+
+See Documentation/devicetree/bindings/net/phy.txt for the list of required and
+optional properties.
+
+Internal Gigabit PHY example:
+
+ethernet@f0b60000 {
+ phy-mode = "internal";
+ phy-handle = <&phy1>;
+ mac-address = [ 00 10 18 36 23 1a ];
+ compatible = "brcm,genet-v4";
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ reg = <0xf0b60000 0xfc4c>;
+ interrupts = <0x0 0x14 0x0>, <0x0 0x15 0x0>;
+
+ mdio@e14 {
+ compatible = "brcm,genet-mdio-v4";
+ #address-cells = <0x1>;
+ #size-cells = <0x0>;
+ reg = <0xe14 0x8>;
+
+ phy1: ethernet-phy@1 {
+ max-speed = <1000>;
+ reg = <0x1>;
+ compatible = "brcm,28nm-gphy", "ethernet-phy-ieee802.3-c22";
+ };
+ };
+};
+
+MoCA interface / MAC to MAC example:
+
+ethernet@f0b80000 {
+ phy-mode = "moca";
+ fixed-link = <1 0 1000 0 0>;
+ mac-address = [ 00 10 18 36 24 1a ];
+ compatible = "brcm,genet-v4";
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ reg = <0xf0b80000 0xfc4c>;
+ interrupts = <0x0 0x16 0x0>, <0x0 0x17 0x0>;
+
+ mdio@e14 {
+ compatible = "brcm,genet-mdio-v4";
+ #address-cells = <0x1>;
+ #size-cells = <0x0>;
+ reg = <0xe14 0x8>;
+ };
+};
+
+
+External MDIO-connected Gigabit PHY/switch:
+
+ethernet@f0ba0000 {
+ phy-mode = "rgmii";
+ phy-handle = <&phy0>;
+ mac-address = [ 00 10 18 36 26 1a ];
+ compatible = "brcm,genet-v4";
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ reg = <0xf0ba0000 0xfc4c>;
+ interrupts = <0x0 0x18 0x0>, <0x0 0x19 0x0>;
+
+ mdio@0e14 {
+ compatible = "brcm,genet-mdio-v4";
+ #address-cells = <0x1>;
+ #size-cells = <0x0>;
+ reg = <0xe14 0x8>;
+
+ phy0: ethernet-phy@0 {
+ max-speed = <1000>;
+ reg = <0x0>;
+ compatible = "brcm,bcm53125", "ethernet-phy-ieee802.3-c22";
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/brcm,systemport.txt b/Documentation/devicetree/bindings/net/brcm,systemport.txt
new file mode 100644
index 000000000..877da3414
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,systemport.txt
@@ -0,0 +1,30 @@
+* Broadcom BCM7xxx Ethernet Systemport Controller (SYSTEMPORT)
+
+Required properties:
+- compatible: should be one of "brcm,systemport-v1.00" or "brcm,systemport"
+- reg: address and length of the register set for the device.
+- interrupts: interrupts for the device, first cell must be for the rx
+ interrupts, and the second cell should be for the transmit queues. An
+ optional third interrupt cell for Wake-on-LAN can be specified
+- local-mac-address: Ethernet MAC address (48 bits) of this adapter
+- phy-mode: Should be a string describing the PHY interface to the
+ Ethernet switch/PHY, see Documentation/devicetree/bindings/net/ethernet.txt
+- fixed-link: see Documentation/devicetree/bindings/net/fixed-link.txt for
+ the property specific details
+
+Optional properties:
+- systemport,num-tier2-arb: number of tier 2 arbiters, an integer
+- systemport,num-tier1-arb: number of tier 1 arbiters, an integer
+- systemport,num-txq: number of HW transmit queues, an integer
+- systemport,num-rxq: number of HW receive queues, an integer
+
+Example:
+ethernet@f04a0000 {
+ compatible = "brcm,systemport-v1.00";
+ reg = <0xf04a0000 0x4650>;
+ local-mac-address = [ 00 11 22 33 44 55 ];
+ fixed-link = <0 1 1000 0 0>;
+ phy-mode = "gmii";
+ interrupts = <0x0 0x16 0x0>,
+ <0x0 0x17 0x0>;
+};
diff --git a/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
new file mode 100644
index 000000000..ab0bb4247
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/brcm,unimac-mdio.txt
@@ -0,0 +1,39 @@
+* Broadcom UniMAC MDIO bus controller
+
+Required properties:
+- compatible: should one from "brcm,genet-mdio-v1", "brcm,genet-mdio-v2",
+ "brcm,genet-mdio-v3", "brcm,genet-mdio-v4" or "brcm,unimac-mdio"
+- reg: address and length of the regsiter set for the device, first one is the
+ base register, and the second one is optional and for indirect accesses to
+ larger than 16-bits MDIO transactions
+- reg-names: name(s) of the register must be "mdio" and optional "mdio_indir_rw"
+- #size-cells: must be 1
+- #address-cells: must be 0
+
+Optional properties:
+- interrupts: must be one if the interrupt is shared with the Ethernet MAC or
+ Ethernet switch this MDIO block is integrated from, or must be two, if there
+ are two separate interrupts, first one must be "mdio done" and second must be
+ for "mdio error"
+- interrupt-names: must be "mdio_done_error" when there is a share interrupt fed
+ to this hardware block, or must be "mdio_done" for the first interrupt and
+ "mdio_error" for the second when there are separate interrupts
+
+Child nodes of this MDIO bus controller node are standard Ethernet PHY device
+nodes as described in Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+
+mdio@403c0 {
+ compatible = "brcm,unimac-mdio";
+ reg = <0x403c0 0x8 0x40300 0x18>;
+ reg-names = "mdio", "mdio_indir_rw";
+ #size-cells = <1>;
+ #address-cells = <0>;
+
+ ...
+ phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/broadcom-bcm87xx.txt b/Documentation/devicetree/bindings/net/broadcom-bcm87xx.txt
new file mode 100644
index 000000000..7c86d5e28
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/broadcom-bcm87xx.txt
@@ -0,0 +1,29 @@
+The Broadcom BCM87XX devices are a family of 10G Ethernet PHYs. They
+have these bindings in addition to the standard PHY bindings.
+
+Compatible: Should contain "broadcom,bcm8706" or "broadcom,bcm8727" and
+ "ethernet-phy-ieee802.3-c45"
+
+Optional Properties:
+
+- broadcom,c45-reg-init : one of more sets of 4 cells. The first cell
+ is the MDIO Manageable Device (MMD) address, the second a register
+ address within the MMD, the third cell contains a mask to be ANDed
+ with the existing register value, and the fourth cell is ORed with
+ he result to yield the new register value. If the third cell has a
+ value of zero, no read of the existing value is performed.
+
+Example:
+
+ ethernet-phy@5 {
+ reg = <5>;
+ compatible = "broadcom,bcm8706", "ethernet-phy-ieee802.3-c45";
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ /*
+ * Set PMD Digital Control Register for
+ * GPIO[1] Tx/Rx
+ * GPIO[0] R64 Sync Acquired
+ */
+ broadcom,c45-reg-init = <1 0xc808 0xff8f 0x70>;
+ };
diff --git a/Documentation/devicetree/bindings/net/calxeda-xgmac.txt b/Documentation/devicetree/bindings/net/calxeda-xgmac.txt
new file mode 100644
index 000000000..c8ae996bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/calxeda-xgmac.txt
@@ -0,0 +1,18 @@
+* Calxeda Highbank 10Gb XGMAC Ethernet
+
+Required properties:
+- compatible : Should be "calxeda,hb-xgmac"
+- reg : Address and length of the register set for the device
+- interrupts : Should contain 3 xgmac interrupts. The 1st is main interrupt.
+ The 2nd is pwr mgt interrupt. The 3rd is low power state interrupt.
+
+Optional properties:
+- dma-coherent : Present if dma operations are coherent
+
+Example:
+
+ethernet@fff50000 {
+ compatible = "calxeda,hb-xgmac";
+ reg = <0xfff50000 0x1000>;
+ interrupts = <0 77 4 0 78 4 0 79 4>;
+};
diff --git a/Documentation/devicetree/bindings/net/can/atmel-can.txt b/Documentation/devicetree/bindings/net/can/atmel-can.txt
new file mode 100644
index 000000000..14e52a0d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/atmel-can.txt
@@ -0,0 +1,14 @@
+* AT91 CAN *
+
+Required properties:
+ - compatible: Should be "atmel,at91sam9263-can" or "atmel,at91sam9x5-can"
+ - reg: Should contain CAN controller registers location and length
+ - interrupts: Should contain IRQ line for the CAN controller
+
+Example:
+
+ can0: can@f000c000 {
+ compatible = "atmel,at91sam9x5-can";
+ reg = <0xf000c000 0x300>;
+ interrupts = <40 4 5>
+ };
diff --git a/Documentation/devicetree/bindings/net/can/c_can.txt b/Documentation/devicetree/bindings/net/can/c_can.txt
new file mode 100644
index 000000000..5a1d8b0c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/c_can.txt
@@ -0,0 +1,54 @@
+Bosch C_CAN/D_CAN controller Device Tree Bindings
+-------------------------------------------------
+
+Required properties:
+- compatible : Should be "bosch,c_can" for C_CAN controllers and
+ "bosch,d_can" for D_CAN controllers.
+ Can be "ti,dra7-d_can", "ti,am3352-d_can" or
+ "ti,am4372-d_can".
+- reg : physical base address and size of the C_CAN/D_CAN
+ registers map
+- interrupts : property with a value describing the interrupt
+ number
+
+Optional properties:
+- ti,hwmods : Must be "d_can<n>" or "c_can<n>", n being the
+ instance number
+- syscon-raminit : Handle to system control region that contains the
+ RAMINIT register, register offset to the RAMINIT
+ register and the CAN instance number (0 offset).
+
+Note: "ti,hwmods" field is used to fetch the base address and irq
+resources from TI, omap hwmod data base during device registration.
+Future plan is to migrate hwmod data base contents into device tree
+blob so that, all the required data will be used from device tree dts
+file.
+
+Example:
+
+Step1: SoC common .dtsi file
+
+ dcan1: d_can@481d0000 {
+ compatible = "bosch,d_can";
+ reg = <0x481d0000 0x2000>;
+ interrupts = <55>;
+ interrupt-parent = <&intc>;
+ status = "disabled";
+ };
+
+(or)
+
+ dcan1: d_can@481d0000 {
+ compatible = "bosch,d_can";
+ ti,hwmods = "d_can1";
+ reg = <0x481d0000 0x2000>;
+ interrupts = <55>;
+ interrupt-parent = <&intc>;
+ status = "disabled";
+ };
+
+Step 2: board specific .dts file
+
+ &dcan1 {
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/net/can/cc770.txt b/Documentation/devicetree/bindings/net/can/cc770.txt
new file mode 100644
index 000000000..77027bf64
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/cc770.txt
@@ -0,0 +1,53 @@
+Memory mapped Bosch CC770 and Intel AN82527 CAN controller
+
+Note: The CC770 is a CAN controller from Bosch, which is 100%
+compatible with the old AN82527 from Intel, but with "bugs" being fixed.
+
+Required properties:
+
+- compatible : should be "bosch,cc770" for the CC770 and "intc,82527"
+ for the AN82527.
+
+- reg : should specify the chip select, address offset and size required
+ to map the registers of the controller. The size is usually 0x80.
+
+- interrupts : property with a value describing the interrupt source
+ (number and sensitivity) required for the controller.
+
+Optional properties:
+
+- bosch,external-clock-frequency : frequency of the external oscillator
+ clock in Hz. Note that the internal clock frequency used by the
+ controller is half of that value. If not specified, a default
+ value of 16000000 (16 MHz) is used.
+
+- bosch,clock-out-frequency : slock frequency in Hz on the CLKOUT pin.
+ If not specified or if the specified value is 0, the CLKOUT pin
+ will be disabled.
+
+- bosch,slew-rate : slew rate of the CLKOUT signal. If not specified,
+ a resonable value will be calculated.
+
+- bosch,disconnect-rx0-input : see data sheet.
+
+- bosch,disconnect-rx1-input : see data sheet.
+
+- bosch,disconnect-tx1-output : see data sheet.
+
+- bosch,polarity-dominant : see data sheet.
+
+- bosch,divide-memory-clock : see data sheet.
+
+- bosch,iso-low-speed-mux : see data sheet.
+
+For further information, please have a look to the CC770 or AN82527.
+
+Examples:
+
+can@3,100 {
+ compatible = "bosch,cc770";
+ reg = <3 0x100 0x80>;
+ interrupts = <2 0>;
+ interrupt-parent = <&mpic>;
+ bosch,external-clock-frequency = <16000000>;
+};
diff --git a/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
new file mode 100644
index 000000000..56d6cc336
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/fsl-flexcan.txt
@@ -0,0 +1,29 @@
+Flexcan CAN controller on Freescale's ARM and PowerPC system-on-a-chip (SOC).
+
+Required properties:
+
+- compatible : Should be "fsl,<processor>-flexcan"
+
+ An implementation should also claim any of the following compatibles
+ that it is fully backwards compatible with:
+
+ - fsl,p1010-flexcan
+
+- reg : Offset and length of the register set for this device
+- interrupts : Interrupt tuple for this device
+
+Optional properties:
+
+- clock-frequency : The oscillator frequency driving the flexcan device
+
+- xceiver-supply: Regulator that powers the CAN transceiver
+
+Example:
+
+ can@1c000 {
+ compatible = "fsl,p1010-flexcan";
+ reg = <0x1c000 0x1000>;
+ interrupts = <48 0x2>;
+ interrupt-parent = <&mpic>;
+ clock-frequency = <200000000>; // filled in by bootloader
+ };
diff --git a/Documentation/devicetree/bindings/net/can/grcan.txt b/Documentation/devicetree/bindings/net/can/grcan.txt
new file mode 100644
index 000000000..34ef3498f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/grcan.txt
@@ -0,0 +1,28 @@
+Aeroflex Gaisler GRCAN and GRHCAN CAN controllers.
+
+The GRCAN and CRHCAN CAN controllers are available in the GRLIB VHDL IP core
+library.
+
+Note: These properties are built from the AMBA plug&play in a Leon SPARC system
+(the ordinary environment for GRCAN and GRHCAN). There are no dts files for
+sparc.
+
+Required properties:
+
+- name : Should be "GAISLER_GRCAN", "01_03d", "GAISLER_GRHCAN" or "01_034"
+
+- reg : Address and length of the register set for the device
+
+- freq : Frequency of the external oscillator clock in Hz (the frequency of
+ the amba bus in the ordinary case)
+
+- interrupts : Interrupt number for this device
+
+Optional properties:
+
+- systemid : If not present or if the value of the least significant 16 bits
+ of this 32-bit property is smaller than GRCAN_TXBUG_SAFE_GRLIB_VERSION
+ a bug workaround is activated.
+
+For further information look in the documentation for the GLIB IP core library:
+http://www.gaisler.com/products/grlib/grip.pdf
diff --git a/Documentation/devicetree/bindings/net/can/m_can.txt b/Documentation/devicetree/bindings/net/can/m_can.txt
new file mode 100644
index 000000000..9e331777c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/m_can.txt
@@ -0,0 +1,67 @@
+Bosch MCAN controller Device Tree Bindings
+-------------------------------------------------
+
+Required properties:
+- compatible : Should be "bosch,m_can" for M_CAN controllers
+- reg : physical base address and size of the M_CAN
+ registers map and Message RAM
+- reg-names : Should be "m_can" and "message_ram"
+- interrupts : Should be the interrupt number of M_CAN interrupt
+ line 0 and line 1, could be same if sharing
+ the same interrupt.
+- interrupt-names : Should contain "int0" and "int1"
+- clocks : Clocks used by controller, should be host clock
+ and CAN clock.
+- clock-names : Should contain "hclk" and "cclk"
+- pinctrl-<n> : Pinctrl states as described in bindings/pinctrl/pinctrl-bindings.txt
+- pinctrl-names : Names corresponding to the numbered pinctrl states
+- bosch,mram-cfg : Message RAM configuration data.
+ Multiple M_CAN instances can share the same Message
+ RAM and each element(e.g Rx FIFO or Tx Buffer and etc)
+ number in Message RAM is also configurable,
+ so this property is telling driver how the shared or
+ private Message RAM are used by this M_CAN controller.
+
+ The format should be as follows:
+ <offset sidf_elems xidf_elems rxf0_elems rxf1_elems
+ rxb_elems txe_elems txb_elems>
+ The 'offset' is an address offset of the Message RAM
+ where the following elements start from. This is
+ usually set to 0x0 if you're using a private Message
+ RAM. The remain cells are used to specify how many
+ elements are used for each FIFO/Buffer.
+
+ M_CAN includes the following elements according to user manual:
+ 11-bit Filter 0-128 elements / 0-128 words
+ 29-bit Filter 0-64 elements / 0-128 words
+ Rx FIFO 0 0-64 elements / 0-1152 words
+ Rx FIFO 1 0-64 elements / 0-1152 words
+ Rx Buffers 0-64 elements / 0-1152 words
+ Tx Event FIFO 0-32 elements / 0-64 words
+ Tx Buffers 0-32 elements / 0-576 words
+
+ Please refer to 2.4.1 Message RAM Configuration in
+ Bosch M_CAN user manual for details.
+
+Example:
+SoC dtsi:
+m_can1: can@020e8000 {
+ compatible = "bosch,m_can";
+ reg = <0x020e8000 0x4000>, <0x02298000 0x4000>;
+ reg-names = "m_can", "message_ram";
+ interrupts = <0 114 0x04>,
+ <0 114 0x04>;
+ interrupt-names = "int0", "int1";
+ clocks = <&clks IMX6SX_CLK_CANFD>,
+ <&clks IMX6SX_CLK_CANFD>;
+ clock-names = "hclk", "cclk";
+ bosch,mram-cfg = <0x0 0 0 32 0 0 0 1>;
+ status = "disabled";
+};
+
+Board dts:
+&m_can1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_m_can1>;
+ status = "enabled";
+};
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
new file mode 100644
index 000000000..ee3723beb
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251x.txt
@@ -0,0 +1,25 @@
+* Microchip MCP251X stand-alone CAN controller device tree bindings
+
+Required properties:
+ - compatible: Should be one of the following:
+ - "microchip,mcp2510" for MCP2510.
+ - "microchip,mcp2515" for MCP2515.
+ - reg: SPI chip select.
+ - clocks: The clock feeding the CAN controller.
+ - interrupt-parent: The parent interrupt controller.
+ - interrupts: Should contain IRQ line for the CAN controller.
+
+Optional properties:
+ - vdd-supply: Regulator that powers the CAN controller.
+ - xceiver-supply: Regulator that powers the CAN transceiver.
+
+Example:
+ can0: can@1 {
+ compatible = "microchip,mcp2515";
+ reg = <1>;
+ clocks = <&clk24m>;
+ interrupt-parent = <&gpio4>;
+ interrupts = <13 0x2>;
+ vdd-supply = <&reg5v0>;
+ xceiver-supply = <&reg5v0>;
+ };
diff --git a/Documentation/devicetree/bindings/net/can/mpc5xxx-mscan.txt b/Documentation/devicetree/bindings/net/can/mpc5xxx-mscan.txt
new file mode 100644
index 000000000..2fa4fcd38
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/mpc5xxx-mscan.txt
@@ -0,0 +1,53 @@
+CAN Device Tree Bindings
+------------------------
+
+(c) 2006-2009 Secret Lab Technologies Ltd
+Grant Likely <grant.likely@secretlab.ca>
+
+fsl,mpc5200-mscan nodes
+-----------------------
+In addition to the required compatible-, reg- and interrupt-properties, you can
+also specify which clock source shall be used for the controller:
+
+- fsl,mscan-clock-source : a string describing the clock source. Valid values
+ are: "ip" for ip bus clock
+ "ref" for reference clock (XTAL)
+ "ref" is default in case this property is not
+ present.
+
+fsl,mpc5121-mscan nodes
+-----------------------
+In addition to the required compatible-, reg- and interrupt-properties, you can
+also specify which clock source and divider shall be used for the controller:
+
+- fsl,mscan-clock-source : a string describing the clock source. Valid values
+ are: "ip" for ip bus clock
+ "ref" for reference clock
+ "sys" for system clock
+ If this property is not present, an optimal CAN
+ clock source and frequency based on the system
+ clock will be selected. If this is not possible,
+ the reference clock will be used.
+
+- fsl,mscan-clock-divider: for the reference and system clock, an additional
+ clock divider can be specified. By default, a
+ value of 1 is used.
+
+Note that the MPC5121 Rev. 1 processor is not supported.
+
+Examples:
+ can@1300 {
+ compatible = "fsl,mpc5121-mscan";
+ interrupts = <12 0x8>;
+ interrupt-parent = <&ipic>;
+ reg = <0x1300 0x80>;
+ };
+
+ can@1380 {
+ compatible = "fsl,mpc5121-mscan";
+ interrupts = <13 0x8>;
+ interrupt-parent = <&ipic>;
+ reg = <0x1380 0x80>;
+ fsl,mscan-clock-source = "ref";
+ fsl,mscan-clock-divider = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt
new file mode 100644
index 000000000..002d8440b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/rcar_can.txt
@@ -0,0 +1,43 @@
+Renesas R-Car CAN controller Device Tree Bindings
+-------------------------------------------------
+
+Required properties:
+- compatible: "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
+ "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
+ "renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC.
+ "renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC.
+- reg: physical base address and size of the R-Car CAN register map.
+- interrupts: interrupt specifier for the sole interrupt.
+- clocks: phandles and clock specifiers for 3 CAN clock inputs.
+- clock-names: 3 clock input name strings: "clkp1", "clkp2", "can_clk".
+- pinctrl-0: pin control group to be used for this controller.
+- pinctrl-names: must be "default".
+
+Optional properties:
+- renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are:
+ <0x0> (default) : Peripheral clock (clkp1)
+ <0x1> : Peripheral clock (clkp2)
+ <0x3> : Externally input clock
+
+Example
+-------
+
+SoC common .dtsi file:
+
+ can0: can@e6e80000 {
+ compatible = "renesas,can-r8a7791";
+ reg = <0 0xe6e80000 0 0x1000>;
+ interrupts = <0 186 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp9_clks R8A7791_CLK_RCAN0>,
+ <&cpg_clocks R8A7791_CLK_RCAN>, <&can_clk>;
+ clock-names = "clkp1", "clkp2", "can_clk";
+ status = "disabled";
+ };
+
+Board specific .dts file:
+
+&can0 {
+ pinctrl-0 = <&can0_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/net/can/sja1000.txt b/Documentation/devicetree/bindings/net/can/sja1000.txt
new file mode 100644
index 000000000..b4a6d53fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/sja1000.txt
@@ -0,0 +1,57 @@
+Memory mapped SJA1000 CAN controller from NXP (formerly Philips)
+
+Required properties:
+
+- compatible : should be "nxp,sja1000".
+
+- reg : should specify the chip select, address offset and size required
+ to map the registers of the SJA1000. The size is usually 0x80.
+
+- interrupts: property with a value describing the interrupt source
+ (number and sensitivity) required for the SJA1000.
+
+Optional properties:
+
+- reg-io-width : Specify the size (in bytes) of the IO accesses that
+ should be performed on the device. Valid value is 1, 2 or 4.
+ Default to 1 (8 bits).
+
+- nxp,external-clock-frequency : Frequency of the external oscillator
+ clock in Hz. Note that the internal clock frequency used by the
+ SJA1000 is half of that value. If not specified, a default value
+ of 16000000 (16 MHz) is used.
+
+- nxp,tx-output-mode : operation mode of the TX output control logic:
+ <0x0> : bi-phase output mode
+ <0x1> : normal output mode (default)
+ <0x2> : test output mode
+ <0x3> : clock output mode
+
+- nxp,tx-output-config : TX output pin configuration:
+ <0x01> : TX0 invert
+ <0x02> : TX0 pull-down (default)
+ <0x04> : TX0 pull-up
+ <0x06> : TX0 push-pull
+ <0x08> : TX1 invert
+ <0x10> : TX1 pull-down
+ <0x20> : TX1 pull-up
+ <0x30> : TX1 push-pull
+
+- nxp,clock-out-frequency : clock frequency in Hz on the CLKOUT pin.
+ If not specified or if the specified value is 0, the CLKOUT pin
+ will be disabled.
+
+- nxp,no-comparator-bypass : Allows to disable the CAN input comparator.
+
+For further information, please have a look to the SJA1000 data sheet.
+
+Examples:
+
+can@3,100 {
+ compatible = "nxp,sja1000";
+ reg = <3 0x100 0x80>;
+ interrupts = <2 0>;
+ interrupt-parent = <&mpic>;
+ nxp,external-clock-frequency = <16000000>;
+};
+
diff --git a/Documentation/devicetree/bindings/net/can/xilinx_can.txt b/Documentation/devicetree/bindings/net/can/xilinx_can.txt
new file mode 100644
index 000000000..fe38847d8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/xilinx_can.txt
@@ -0,0 +1,44 @@
+Xilinx Axi CAN/Zynq CANPS controller Device Tree Bindings
+---------------------------------------------------------
+
+Required properties:
+- compatible : Should be "xlnx,zynq-can-1.0" for Zynq CAN
+ controllers and "xlnx,axi-can-1.00.a" for Axi CAN
+ controllers.
+- reg : Physical base address and size of the Axi CAN/Zynq
+ CANPS registers map.
+- interrupts : Property with a value describing the interrupt
+ number.
+- interrupt-parent : Must be core interrupt controller
+- clock-names : List of input clock names - "can_clk", "pclk"
+ (For CANPS), "can_clk" , "s_axi_aclk"(For AXI CAN)
+ (See clock bindings for details).
+- clocks : Clock phandles (see clock bindings for details).
+- tx-fifo-depth : Can Tx fifo depth.
+- rx-fifo-depth : Can Rx fifo depth.
+
+
+Example:
+
+For Zynq CANPS Dts file:
+ zynq_can_0: can@e0008000 {
+ compatible = "xlnx,zynq-can-1.0";
+ clocks = <&clkc 19>, <&clkc 36>;
+ clock-names = "can_clk", "pclk";
+ reg = <0xe0008000 0x1000>;
+ interrupts = <0 28 4>;
+ interrupt-parent = <&intc>;
+ tx-fifo-depth = <0x40>;
+ rx-fifo-depth = <0x40>;
+ };
+For Axi CAN Dts file:
+ axi_can_0: axi-can@40000000 {
+ compatible = "xlnx,axi-can-1.00.a";
+ clocks = <&clkc 0>, <&clkc 1>;
+ clock-names = "can_clk","s_axi_aclk" ;
+ reg = <0x40000000 0x10000>;
+ interrupt-parent = <&intc>;
+ interrupts = <0 59 1>;
+ tx-fifo-depth = <0x40>;
+ rx-fifo-depth = <0x40>;
+ };
diff --git a/Documentation/devicetree/bindings/net/cavium-mdio.txt b/Documentation/devicetree/bindings/net/cavium-mdio.txt
new file mode 100644
index 000000000..04cb7491d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cavium-mdio.txt
@@ -0,0 +1,27 @@
+* System Management Interface (SMI) / MDIO
+
+Properties:
+- compatible: "cavium,octeon-3860-mdio"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The base address of the MDIO bus controller register bank.
+
+- #address-cells: Must be <1>.
+
+- #size-cells: Must be <0>. MDIO addresses have no size component.
+
+Typically an MDIO bus might have several children.
+
+Example:
+ mdio@1180000001800 {
+ compatible = "cavium,octeon-3860-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x11800 0x00001800 0x0 0x40>;
+
+ ethernet-phy@0 {
+ ...
+ reg = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/cavium-mix.txt b/Documentation/devicetree/bindings/net/cavium-mix.txt
new file mode 100644
index 000000000..8d7c30963
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cavium-mix.txt
@@ -0,0 +1,34 @@
+* MIX Ethernet controller.
+
+Properties:
+- compatible: "cavium,octeon-5750-mix"
+
+ Compatibility with all cn5XXX and cn6XXX SOCs populated with MIX
+ devices.
+
+- reg: The base addresses of four separate register banks. The first
+ bank contains the MIX registers. The second bank the corresponding
+ AGL registers. The third bank are the AGL registers shared by all
+ MIX devices present. The fourth bank is the AGL_PRT_CTL shared by
+ all MIX devices present.
+
+- cell-index: A single cell specifying which portion of the shared
+ register banks corresponds to this MIX device.
+
+- interrupts: Two interrupt specifiers. The first is the MIX
+ interrupt routing and the second the routing for the AGL interrupts.
+
+- phy-handle: Optional, see ethernet.txt file in the same directory.
+
+Example:
+ ethernet@1070000100800 {
+ compatible = "cavium,octeon-5750-mix";
+ reg = <0x10700 0x00100800 0x0 0x100>, /* MIX */
+ <0x11800 0xE0000800 0x0 0x300>, /* AGL */
+ <0x11800 0xE0000400 0x0 0x400>, /* AGL_SHARED */
+ <0x11800 0xE0002008 0x0 0x8>; /* AGL_PRT_CTL */
+ cell-index = <1>;
+ interrupts = <1 18>, < 1 46>;
+ local-mac-address = [ 00 0f b7 10 63 54 ];
+ phy-handle = <&phy1>;
+ };
diff --git a/Documentation/devicetree/bindings/net/cavium-pip.txt b/Documentation/devicetree/bindings/net/cavium-pip.txt
new file mode 100644
index 000000000..7dbd15881
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cavium-pip.txt
@@ -0,0 +1,93 @@
+* PIP Ethernet nexus.
+
+The PIP Ethernet nexus can control several data packet input/output
+devices. The devices have a two level grouping scheme. There may be
+several interfaces, and each interface may have several ports. These
+ports might be an individual Ethernet PHY.
+
+
+Properties for the PIP nexus:
+- compatible: "cavium,octeon-3860-pip"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The base address of the PIP's register bank.
+
+- #address-cells: Must be <1>.
+
+- #size-cells: Must be <0>.
+
+Properties for PIP interfaces which is a child the PIP nexus:
+- compatible: "cavium,octeon-3860-pip-interface"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The interface number.
+
+- #address-cells: Must be <1>.
+
+- #size-cells: Must be <0>.
+
+Properties for PIP port which is a child the PIP interface:
+- compatible: "cavium,octeon-3860-pip-port"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The port number within the interface group.
+
+- phy-handle: Optional, see ethernet.txt file in the same directory.
+
+Example:
+
+ pip@11800a0000000 {
+ compatible = "cavium,octeon-3860-pip";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x11800 0xa0000000 0x0 0x2000>;
+
+ interface@0 {
+ compatible = "cavium,octeon-3860-pip-interface";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0>; /* interface */
+
+ ethernet@0 {
+ compatible = "cavium,octeon-3860-pip-port";
+ reg = <0x0>; /* Port */
+ local-mac-address = [ 00 0f b7 10 63 60 ];
+ phy-handle = <&phy2>;
+ };
+ ethernet@1 {
+ compatible = "cavium,octeon-3860-pip-port";
+ reg = <0x1>; /* Port */
+ local-mac-address = [ 00 0f b7 10 63 61 ];
+ phy-handle = <&phy3>;
+ };
+ ethernet@2 {
+ compatible = "cavium,octeon-3860-pip-port";
+ reg = <0x2>; /* Port */
+ local-mac-address = [ 00 0f b7 10 63 62 ];
+ phy-handle = <&phy4>;
+ };
+ ethernet@3 {
+ compatible = "cavium,octeon-3860-pip-port";
+ reg = <0x3>; /* Port */
+ local-mac-address = [ 00 0f b7 10 63 63 ];
+ phy-handle = <&phy5>;
+ };
+ };
+
+ interface@1 {
+ compatible = "cavium,octeon-3860-pip-interface";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <1>; /* interface */
+
+ ethernet@0 {
+ compatible = "cavium,octeon-3860-pip-port";
+ reg = <0x0>; /* Port */
+ local-mac-address = [ 00 0f b7 10 63 64 ];
+ phy-handle = <&phy6>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/cdns-emac.txt b/Documentation/devicetree/bindings/net/cdns-emac.txt
new file mode 100644
index 000000000..4451ee973
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cdns-emac.txt
@@ -0,0 +1,20 @@
+* Cadence EMAC Ethernet controller
+
+Required properties:
+- compatible: Should be "cdns,[<chip>-]{emac}"
+ Use "cdns,at91rm9200-emac" Atmel at91rm9200 SoC.
+ Use "cdns,zynq-gem" Xilinx Zynq-7xxx SoC.
+ Or the generic form: "cdns,emac".
+- reg: Address and length of the register set for the device
+- interrupts: Should contain macb interrupt
+- phy-mode: see ethernet.txt file in the same directory.
+
+Examples:
+
+ macb0: ethernet@fffc4000 {
+ compatible = "cdns,at91rm9200-emac";
+ reg = <0xfffc4000 0x4000>;
+ interrupts = <21>;
+ phy-mode = "rmii";
+ local-mac-address = [3a 0e 03 04 05 06];
+ };
diff --git a/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt
new file mode 100644
index 000000000..764c0c79b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cpsw-phy-sel.txt
@@ -0,0 +1,30 @@
+TI CPSW Phy mode Selection Device Tree Bindings
+-----------------------------------------------
+
+Required properties:
+- compatible : Should be "ti,am3352-cpsw-phy-sel" for am335x platform and
+ "ti,dra7xx-cpsw-phy-sel" for dra7xx platform
+ "ti,am43xx-cpsw-phy-sel" for am43xx platform
+- reg : physical base address and size of the cpsw
+ registers map
+- reg-names : names of the register map given in "reg" node
+
+Optional properties:
+-rmii-clock-ext : If present, the driver will configure the RMII
+ interface to external clock usage
+
+Examples:
+
+ phy_sel: cpsw-phy-sel@44e10650 {
+ compatible = "ti,am3352-cpsw-phy-sel";
+ reg= <0x44e10650 0x4>;
+ reg-names = "gmii-sel";
+ };
+
+(or)
+ phy_sel: cpsw-phy-sel@44e10650 {
+ compatible = "ti,am3352-cpsw-phy-sel";
+ reg= <0x44e10650 0x4>;
+ reg-names = "gmii-sel";
+ rmii-clock-ext;
+ };
diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
new file mode 100644
index 000000000..33fe8462e
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -0,0 +1,104 @@
+TI SoC Ethernet Switch Controller Device Tree Bindings
+------------------------------------------------------
+
+Required properties:
+- compatible : Should be "ti,cpsw"
+- reg : physical base address and size of the cpsw
+ registers map
+- interrupts : property with a value describing the interrupt
+ number
+- interrupt-parent : The parent interrupt controller
+- cpdma_channels : Specifies number of channels in CPDMA
+- ale_entries : Specifies No of entries ALE can hold
+- bd_ram_size : Specifies internal descriptor RAM size
+- rx_descs : Specifies number of Rx descriptors
+- mac_control : Specifies Default MAC control register content
+ for the specific platform
+- slaves : Specifies number for slaves
+- active_slave : Specifies the slave to use for time stamping,
+ ethtool and SIOCGMIIPHY
+- cpts_clock_mult : Numerator to convert input clock ticks into nanoseconds
+- cpts_clock_shift : Denominator to convert input clock ticks into nanoseconds
+
+Optional properties:
+- ti,hwmods : Must be "cpgmac0"
+- no_bd_ram : Must be 0 or 1
+- dual_emac : Specifies Switch to act as Dual EMAC
+- syscon : Phandle to the system control device node, which is
+ the control module device of the am33x
+
+Slave Properties:
+Required properties:
+- phy_id : Specifies slave phy id
+- phy-mode : See ethernet.txt file in the same directory
+
+Optional properties:
+- dual_emac_res_vlan : Specifies VID to be used to segregate the ports
+- mac-address : See ethernet.txt file in the same directory
+
+Note: "ti,hwmods" field is used to fetch the base address and irq
+resources from TI, omap hwmod data base during device registration.
+Future plan is to migrate hwmod data base contents into device tree
+blob so that, all the required data will be used from device tree dts
+file.
+
+Examples:
+
+ mac: ethernet@4A100000 {
+ compatible = "ti,cpsw";
+ reg = <0x4A100000 0x1000>;
+ interrupts = <55 0x4>;
+ interrupt-parent = <&intc>;
+ cpdma_channels = <8>;
+ ale_entries = <1024>;
+ bd_ram_size = <0x2000>;
+ no_bd_ram = <0>;
+ rx_descs = <64>;
+ mac_control = <0x20>;
+ slaves = <2>;
+ active_slave = <0>;
+ cpts_clock_mult = <0x80000000>;
+ cpts_clock_shift = <29>;
+ syscon = <&cm>;
+ cpsw_emac0: slave@0 {
+ phy_id = <&davinci_mdio>, <0>;
+ phy-mode = "rgmii-txid";
+ /* Filled in by U-Boot */
+ mac-address = [ 00 00 00 00 00 00 ];
+ };
+ cpsw_emac1: slave@1 {
+ phy_id = <&davinci_mdio>, <1>;
+ phy-mode = "rgmii-txid";
+ /* Filled in by U-Boot */
+ mac-address = [ 00 00 00 00 00 00 ];
+ };
+ };
+
+(or)
+ mac: ethernet@4A100000 {
+ compatible = "ti,cpsw";
+ ti,hwmods = "cpgmac0";
+ cpdma_channels = <8>;
+ ale_entries = <1024>;
+ bd_ram_size = <0x2000>;
+ no_bd_ram = <0>;
+ rx_descs = <64>;
+ mac_control = <0x20>;
+ slaves = <2>;
+ active_slave = <0>;
+ cpts_clock_mult = <0x80000000>;
+ cpts_clock_shift = <29>;
+ syscon = <&cm>;
+ cpsw_emac0: slave@0 {
+ phy_id = <&davinci_mdio>, <0>;
+ phy-mode = "rgmii-txid";
+ /* Filled in by U-Boot */
+ mac-address = [ 00 00 00 00 00 00 ];
+ };
+ cpsw_emac1: slave@1 {
+ phy_id = <&davinci_mdio>, <1>;
+ phy-mode = "rgmii-txid";
+ /* Filled in by U-Boot */
+ mac-address = [ 00 00 00 00 00 00 ];
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/davicom-dm9000.txt b/Documentation/devicetree/bindings/net/davicom-dm9000.txt
new file mode 100644
index 000000000..5224bf05f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/davicom-dm9000.txt
@@ -0,0 +1,28 @@
+Davicom DM9000 Fast Ethernet controller
+
+Required properties:
+- compatible = "davicom,dm9000";
+- reg : physical addresses and sizes of registers, must contain 2 entries:
+ first entry : address register,
+ second entry : data register.
+- interrupt-parent : interrupt controller to which the device is connected
+- interrupts : interrupt specifier specific to interrupt controller
+
+Optional properties:
+- davicom,no-eeprom : Configuration EEPROM is not available
+- davicom,ext-phy : Use external PHY
+- reset-gpios : phandle of gpio that will be used to reset chip during probe
+- vcc-supply : phandle of regulator that will be used to enable power to chip
+
+Example:
+
+ ethernet@18000000 {
+ compatible = "davicom,dm9000";
+ reg = <0x18000000 0x2 0x18000004 0x2>;
+ interrupt-parent = <&gpn>;
+ interrupts = <7 4>;
+ local-mac-address = [00 00 de ad be ef];
+ davicom,no-eeprom;
+ reset-gpios = <&gpf 12 GPIO_ACTIVE_LOW>;
+ vcc-supply = <&eth0_power>;
+ };
diff --git a/Documentation/devicetree/bindings/net/davinci-mdio.txt b/Documentation/devicetree/bindings/net/davinci-mdio.txt
new file mode 100644
index 000000000..0369e25aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/davinci-mdio.txt
@@ -0,0 +1,33 @@
+TI SoC Davinci/Keystone2 MDIO Controller Device Tree Bindings
+---------------------------------------------------
+
+Required properties:
+- compatible : Should be "ti,davinci_mdio" or "ti,keystone_mdio"
+- reg : physical base address and size of the davinci mdio
+ registers map
+- bus_freq : Mdio Bus frequency
+
+Optional properties:
+- ti,hwmods : Must be "davinci_mdio"
+
+Note: "ti,hwmods" field is used to fetch the base address and irq
+resources from TI, omap hwmod data base during device registration.
+Future plan is to migrate hwmod data base contents into device tree
+blob so that, all the required data will be used from device tree dts
+file.
+
+Examples:
+
+ mdio: davinci_mdio@4A101000 {
+ compatible = "ti,davinci_mdio";
+ reg = <0x4A101000 0x1000>;
+ bus_freq = <1000000>;
+ };
+
+(or)
+
+ mdio: davinci_mdio@4A101000 {
+ compatible = "ti,davinci_mdio";
+ ti,hwmods = "davinci_mdio";
+ bus_freq = <1000000>;
+ };
diff --git a/Documentation/devicetree/bindings/net/davinci_emac.txt b/Documentation/devicetree/bindings/net/davinci_emac.txt
new file mode 100644
index 000000000..24c5cdaba
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/davinci_emac.txt
@@ -0,0 +1,41 @@
+* Texas Instruments Davinci EMAC
+
+This file provides information, what the device node
+for the davinci_emac interface contains.
+
+Required properties:
+- compatible: "ti,davinci-dm6467-emac", "ti,am3517-emac" or
+ "ti,dm816-emac"
+- reg: Offset and length of the register set for the device
+- ti,davinci-ctrl-reg-offset: offset to control register
+- ti,davinci-ctrl-mod-reg-offset: offset to control module register
+- ti,davinci-ctrl-ram-offset: offset to control module ram
+- ti,davinci-ctrl-ram-size: size of control module ram
+- interrupts: interrupt mapping for the davinci emac interrupts sources:
+ 4 sources: <Receive Threshold Interrupt
+ Receive Interrupt
+ Transmit Interrupt
+ Miscellaneous Interrupt>
+
+Optional properties:
+- phy-handle: See ethernet.txt file in the same directory.
+ If absent, davinci_emac driver defaults to 100/FULL.
+- ti,davinci-rmii-en: 1 byte, 1 means use RMII
+- ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
+
+Example (enbw_cmc board):
+ eth0: emac@1e20000 {
+ compatible = "ti,davinci-dm6467-emac";
+ reg = <0x220000 0x4000>;
+ ti,davinci-ctrl-reg-offset = <0x3000>;
+ ti,davinci-ctrl-mod-reg-offset = <0x2000>;
+ ti,davinci-ctrl-ram-offset = <0>;
+ ti,davinci-ctrl-ram-size = <0x2000>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupts = <33
+ 34
+ 35
+ 36
+ >;
+ interrupt-parent = <&intc>;
+ };
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt
new file mode 100644
index 000000000..f0b4cd724
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt
@@ -0,0 +1,117 @@
+Marvell Distributed Switch Architecture Device Tree Bindings
+------------------------------------------------------------
+
+Required properties:
+- compatible : Should be "marvell,dsa"
+- #address-cells : Must be 2, first cell is the address on the MDIO bus
+ and second cell is the address in the switch tree.
+ Second cell is used only when cascading/chaining.
+- #size-cells : Must be 0
+- dsa,ethernet : Should be a phandle to a valid Ethernet device node
+- dsa,mii-bus : Should be a phandle to a valid MDIO bus device node
+
+Optional properties:
+- interrupts : property with a value describing the switch
+ interrupt number (not supported by the driver)
+
+A DSA node can contain multiple switch chips which are therefore child nodes of
+the parent DSA node. The maximum number of allowed child nodes is 4
+(DSA_MAX_SWITCHES).
+Each of these switch child nodes should have the following required properties:
+
+- reg : Contains two fields. The first one describes the
+ address on the MII bus. The second is the switch
+ number that must be unique in cascaded configurations
+- #address-cells : Must be 1
+- #size-cells : Must be 0
+
+A switch child node has the following optional property:
+
+- eeprom-length : Set to the length of an EEPROM connected to the
+ switch. Must be set if the switch can not detect
+ the presence and/or size of a connected EEPROM,
+ otherwise optional.
+
+A switch may have multiple "port" children nodes
+
+Each port children node must have the following mandatory properties:
+- reg : Describes the port address in the switch
+- label : Describes the label associated with this port, special
+ labels are "cpu" to indicate a CPU port and "dsa" to
+ indicate an uplink/downlink port.
+
+Note that a port labelled "dsa" will imply checking for the uplink phandle
+described below.
+
+Optionnal property:
+- link : Should be a phandle to another switch's DSA port.
+ This property is only used when switches are being
+ chained/cascaded together.
+
+- phy-handle : Phandle to a PHY on an external MDIO bus, not the
+ switch internal one. See
+ Documentation/devicetree/bindings/net/ethernet.txt
+ for details.
+
+- phy-mode : String representing the connection to the designated
+ PHY node specified by the 'phy-handle' property. See
+ Documentation/devicetree/bindings/net/ethernet.txt
+ for details.
+
+Optional subnodes:
+- fixed-link : Fixed-link subnode describing a link to a non-MDIO
+ managed entity. See
+ Documentation/devicetree/bindings/net/fixed-link.txt
+ for details.
+
+Example:
+
+ dsa@0 {
+ compatible = "marvell,dsa";
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ interrupts = <10>;
+ dsa,ethernet = <&ethernet0>;
+ dsa,mii-bus = <&mii_bus0>;
+
+ switch@0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <16 0>; /* MDIO address 16, switch 0 in tree */
+
+ port@0 {
+ reg = <0>;
+ label = "lan1";
+ phy-handle = <&phy0>;
+ };
+
+ port@1 {
+ reg = <1>;
+ label = "lan2";
+ };
+
+ port@5 {
+ reg = <5>;
+ label = "cpu";
+ };
+
+ switch0uplink: port@6 {
+ reg = <6>;
+ label = "dsa";
+ link = <&switch1uplink>;
+ };
+ };
+
+ switch@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <17 1>; /* MDIO address 17, switch 1 in tree */
+
+ switch1uplink: port@0 {
+ reg = <0>;
+ label = "dsa";
+ link = <&switch0uplink>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/emac_rockchip.txt b/Documentation/devicetree/bindings/net/emac_rockchip.txt
new file mode 100644
index 000000000..8dc1c79fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/emac_rockchip.txt
@@ -0,0 +1,50 @@
+* ARC EMAC 10/100 Ethernet platform driver for Rockchip Rk3066/RK3188 SoCs
+
+Required properties:
+- compatible: Should be "rockchip,rk3066-emac" or "rockchip,rk3188-emac"
+ according to the target SoC.
+- reg: Address and length of the register set for the device
+- interrupts: Should contain the EMAC interrupts
+- rockchip,grf: phandle to the syscon grf used to control speed and mode
+ for emac.
+- phy: see ethernet.txt file in the same directory.
+- phy-mode: see ethernet.txt file in the same directory.
+
+Optional properties:
+- phy-supply: phandle to a regulator if the PHY needs one
+
+Clock handling:
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Shall be "hclk" for the host clock needed to calculate and set
+ polling period of EMAC and "macref" for the reference clock needed to transfer
+ data to and from the phy.
+
+Child nodes of the driver are the individual PHY devices connected to the
+MDIO bus. They must have a "reg" property given the PHY address on the MDIO bus.
+
+Examples:
+
+ethernet@10204000 {
+ compatible = "rockchip,rk3188-emac";
+ reg = <0xc0fc2000 0x3c>;
+ interrupts = <6>;
+ mac-address = [ 00 11 22 33 44 55 ];
+
+ clocks = <&cru HCLK_EMAC>, <&cru SCLK_MAC>;
+ clock-names = "hclk", "macref";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&emac_xfer>, <&emac_mdio>, <&phy_int>;
+
+ rockchip,grf = <&grf>;
+
+ phy = <&phy0>;
+ phy-mode = "rmii";
+ phy-supply = <&vcc_rmii>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy0: ethernet-phy@0 {
+ reg = <1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt
new file mode 100644
index 000000000..41b3f3f86
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ethernet.txt
@@ -0,0 +1,31 @@
+The following properties are common to the Ethernet controllers:
+
+- local-mac-address: array of 6 bytes, specifies the MAC address that was
+ assigned to the network device;
+- mac-address: array of 6 bytes, specifies the MAC address that was last used by
+ the boot program; should be used in cases where the MAC address assigned to
+ the device by the boot program is different from the "local-mac-address"
+ property;
+- max-speed: number, specifies maximum speed in Mbit/s supported by the device;
+- max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
+ the maximum frame size (there's contradiction in ePAPR).
+- phy-mode: string, operation mode of the PHY interface; supported values are
+ "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id",
+ "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto
+ standard property;
+- phy-connection-type: the same as "phy-mode" property but described in ePAPR;
+- phy-handle: phandle, specifies a reference to a node representing a PHY
+ device; this property is described in ePAPR and so preferred;
+- phy: the same as "phy-handle" property, not recommended for new bindings.
+- phy-device: the same as "phy-handle" property, not recommended for new
+ bindings.
+- rx-fifo-depth: the size of the controller's receive fifo in bytes. This
+ is used for components that can have configurable receive fifo sizes,
+ and is useful for determining certain configuration settings such as
+ flow control thresholds.
+- tx-fifo-depth: the size of the controller's transmit fifo in bytes. This
+ is used for components that can have configurable fifo sizes.
+
+Child nodes of the Ethernet controller are typically the individual PHY devices
+connected via the MDIO bus (sometimes the MDIO bus controller is separate).
+They are described in the phy.txt file in this same directory.
diff --git a/Documentation/devicetree/bindings/net/fixed-link.txt b/Documentation/devicetree/bindings/net/fixed-link.txt
new file mode 100644
index 000000000..82bf7e0f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fixed-link.txt
@@ -0,0 +1,42 @@
+Fixed link Device Tree binding
+------------------------------
+
+Some Ethernet MACs have a "fixed link", and are not connected to a
+normal MDIO-managed PHY device. For those situations, a Device Tree
+binding allows to describe a "fixed link".
+
+Such a fixed link situation is described by creating a 'fixed-link'
+sub-node of the Ethernet MAC device node, with the following
+properties:
+
+* 'speed' (integer, mandatory), to indicate the link speed. Accepted
+ values are 10, 100 and 1000
+* 'full-duplex' (boolean, optional), to indicate that full duplex is
+ used. When absent, half duplex is assumed.
+* 'pause' (boolean, optional), to indicate that pause should be
+ enabled.
+* 'asym-pause' (boolean, optional), to indicate that asym_pause should
+ be enabled.
+
+Old, deprecated 'fixed-link' binding:
+
+* A 'fixed-link' property in the Ethernet MAC node, with 5 cells, of the
+ form <a b c d e> with the following accepted values:
+ - a: emulated PHY ID, choose any but but unique to the all specified
+ fixed-links, from 0 to 31
+ - b: duplex configuration: 0 for half duplex, 1 for full duplex
+ - c: link speed in Mbits/sec, accepted values are: 10, 100 and 1000
+ - d: pause configuration: 0 for no pause, 1 for pause
+ - e: asymmetric pause configuration: 0 for no asymmetric pause, 1 for
+ asymmetric pause
+
+Example:
+
+ethernet@0 {
+ ...
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ ...
+};
diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
new file mode 100644
index 000000000..a9eb611be
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -0,0 +1,62 @@
+* Freescale Fast Ethernet Controller (FEC)
+
+Required properties:
+- compatible : Should be "fsl,<soc>-fec"
+- reg : Address and length of the register set for the device
+- interrupts : Should contain fec interrupt
+- phy-mode : See ethernet.txt file in the same directory
+
+Optional properties:
+- phy-reset-gpios : Should specify the gpio for phy reset
+- phy-reset-duration : Reset duration in milliseconds. Should present
+ only if property "phy-reset-gpios" is available. Missing the property
+ will have the duration be 1 millisecond. Numbers greater than 1000 are
+ invalid and 1 millisecond will be used instead.
+- phy-supply : regulator that powers the Ethernet PHY.
+- phy-handle : phandle to the PHY device connected to this device.
+- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
+ Use instead of phy-handle.
+- fsl,num-tx-queues : The property is valid for enet-avb IP, which supports
+ hw multi queues. Should specify the tx queue number, otherwise set tx queue
+ number to 1.
+- fsl,num-rx-queues : The property is valid for enet-avb IP, which supports
+ hw multi queues. Should specify the rx queue number, otherwise set rx queue
+ number to 1.
+- fsl,magic-packet : If present, indicates that the hardware supports waking
+ up via magic packet.
+
+Optional subnodes:
+- mdio : specifies the mdio bus in the FEC, used as a container for phy nodes
+ according to phy.txt in the same directory
+
+Example:
+
+ethernet@83fec000 {
+ compatible = "fsl,imx51-fec", "fsl,imx27-fec";
+ reg = <0x83fec000 0x4000>;
+ interrupts = <87>;
+ phy-mode = "mii";
+ phy-reset-gpios = <&gpio2 14 0>; /* GPIO2_14 */
+ local-mac-address = [00 04 9F 01 1B B9];
+ phy-supply = <&reg_fec_supply>;
+};
+
+Example with phy specified:
+
+ethernet@83fec000 {
+ compatible = "fsl,imx51-fec", "fsl,imx27-fec";
+ reg = <0x83fec000 0x4000>;
+ interrupts = <87>;
+ phy-mode = "mii";
+ phy-reset-gpios = <&gpio2 14 0>; /* GPIO2_14 */
+ local-mac-address = [00 04 9F 01 1B B9];
+ phy-supply = <&reg_fec_supply>;
+ phy-handle = <&ethphy>;
+ mdio {
+ ethphy: ethernet-phy@6 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <6>;
+ max-speed = <100>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
new file mode 100644
index 000000000..1e97532a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/fsl-tsec-phy.txt
@@ -0,0 +1,147 @@
+* MDIO IO device
+
+The MDIO is a bus to which the PHY devices are connected. For each
+device that exists on this bus, a child node should be created. See
+the definition of the PHY node in booting-without-of.txt for an example
+of how to define a PHY.
+
+Required properties:
+ - reg : Offset and length of the register set for the device
+ - compatible : Should define the compatible device type for the
+ mdio. Currently supported strings/devices are:
+ - "fsl,gianfar-tbi"
+ - "fsl,gianfar-mdio"
+ - "fsl,etsec2-tbi"
+ - "fsl,etsec2-mdio"
+ - "fsl,ucc-mdio"
+ - "fsl,fman-mdio"
+ When device_type is "mdio", the following strings are also considered:
+ - "gianfar"
+ - "ucc_geth_phy"
+
+Example:
+
+ mdio@24520 {
+ reg = <24520 20>;
+ compatible = "fsl,gianfar-mdio";
+
+ ethernet-phy@0 {
+ ......
+ };
+ };
+
+* TBI Internal MDIO bus
+
+As of this writing, every tsec is associated with an internal TBI PHY.
+This PHY is accessed through the local MDIO bus. These buses are defined
+similarly to the mdio buses, except they are compatible with "fsl,gianfar-tbi".
+The TBI PHYs underneath them are similar to normal PHYs, but the reg property
+is considered instructive, rather than descriptive. The reg property should
+be chosen so it doesn't interfere with other PHYs on the bus.
+
+* Gianfar-compatible ethernet nodes
+
+Properties:
+
+ - device_type : Should be "network"
+ - model : Model of the device. Can be "TSEC", "eTSEC", or "FEC"
+ - compatible : Should be "gianfar"
+ - reg : Offset and length of the register set for the device
+ - interrupts : For FEC devices, the first interrupt is the device's
+ interrupt. For TSEC and eTSEC devices, the first interrupt is
+ transmit, the second is receive, and the third is error.
+ - phy-handle : See ethernet.txt file in the same directory.
+ - fixed-link : See fixed-link.txt in the same directory.
+ - phy-connection-type : See ethernet.txt file in the same directory.
+ This property is only really needed if the connection is of type
+ "rgmii-id", as all other connection types are detected by hardware.
+ - fsl,magic-packet : If present, indicates that the hardware supports
+ waking up via magic packet.
+ - bd-stash : If present, indicates that the hardware supports stashing
+ buffer descriptors in the L2.
+ - rx-stash-len : Denotes the number of bytes of a received buffer to stash
+ in the L2.
+ - rx-stash-idx : Denotes the index of the first byte from the received
+ buffer to stash in the L2.
+
+Example:
+ ethernet@24000 {
+ device_type = "network";
+ model = "TSEC";
+ compatible = "gianfar";
+ reg = <0x24000 0x1000>;
+ local-mac-address = [ 00 E0 0C 00 73 00 ];
+ interrupts = <29 2 30 2 34 2>;
+ interrupt-parent = <&mpic>;
+ phy-handle = <&phy0>
+ };
+
+* Gianfar PTP clock nodes
+
+General Properties:
+
+ - compatible Should be "fsl,etsec-ptp"
+ - reg Offset and length of the register set for the device
+ - interrupts There should be at least two interrupts. Some devices
+ have as many as four PTP related interrupts.
+
+Clock Properties:
+
+ - fsl,cksel Timer reference clock source.
+ - fsl,tclk-period Timer reference clock period in nanoseconds.
+ - fsl,tmr-prsc Prescaler, divides the output clock.
+ - fsl,tmr-add Frequency compensation value.
+ - fsl,tmr-fiper1 Fixed interval period pulse generator.
+ - fsl,tmr-fiper2 Fixed interval period pulse generator.
+ - fsl,max-adj Maximum frequency adjustment in parts per billion.
+
+ These properties set the operational parameters for the PTP
+ clock. You must choose these carefully for the clock to work right.
+ Here is how to figure good values:
+
+ TimerOsc = selected reference clock MHz
+ tclk_period = desired clock period nanoseconds
+ NominalFreq = 1000 / tclk_period MHz
+ FreqDivRatio = TimerOsc / NominalFreq (must be greater that 1.0)
+ tmr_add = ceil(2^32 / FreqDivRatio)
+ OutputClock = NominalFreq / tmr_prsc MHz
+ PulseWidth = 1 / OutputClock microseconds
+ FiperFreq1 = desired frequency in Hz
+ FiperDiv1 = 1000000 * OutputClock / FiperFreq1
+ tmr_fiper1 = tmr_prsc * tclk_period * FiperDiv1 - tclk_period
+ max_adj = 1000000000 * (FreqDivRatio - 1.0) - 1
+
+ The calculation for tmr_fiper2 is the same as for tmr_fiper1. The
+ driver expects that tmr_fiper1 will be correctly set to produce a 1
+ Pulse Per Second (PPS) signal, since this will be offered to the PPS
+ subsystem to synchronize the Linux clock.
+
+ Reference clock source is determined by the value, which is holded
+ in CKSEL bits in TMR_CTRL register. "fsl,cksel" property keeps the
+ value, which will be directly written in those bits, that is why,
+ according to reference manual, the next clock sources can be used:
+
+ <0> - external high precision timer reference clock (TSEC_TMR_CLK
+ input is used for this purpose);
+ <1> - eTSEC system clock;
+ <2> - eTSEC1 transmit clock;
+ <3> - RTC clock input.
+
+ When this attribute is not used, eTSEC system clock will serve as
+ IEEE 1588 timer reference clock.
+
+Example:
+
+ ptp_clock@24E00 {
+ compatible = "fsl,etsec-ptp";
+ reg = <0x24E00 0xB0>;
+ interrupts = <12 0x8 13 0x8>;
+ interrupt-parent = < &ipic >;
+ fsl,cksel = <1>;
+ fsl,tclk-period = <10>;
+ fsl,tmr-prsc = <100>;
+ fsl,tmr-add = <0x999999A4>;
+ fsl,tmr-fiper1 = <0x3B9AC9F6>;
+ fsl,tmr-fiper2 = <0x00018696>;
+ fsl,max-adj = <659999998>;
+ };
diff --git a/Documentation/devicetree/bindings/net/gpmc-eth.txt b/Documentation/devicetree/bindings/net/gpmc-eth.txt
new file mode 100644
index 000000000..ace4a64b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/gpmc-eth.txt
@@ -0,0 +1,97 @@
+Device tree bindings for Ethernet chip connected to TI GPMC
+
+Besides being used to interface with external memory devices, the
+General-Purpose Memory Controller can be used to connect Pseudo-SRAM devices
+such as ethernet controllers to processors using the TI GPMC as a data bus.
+
+Ethernet controllers connected to TI GPMC are represented as child nodes of
+the GPMC controller with an "ethernet" name.
+
+All timing relevant properties as well as generic GPMC child properties are
+explained in a separate documents. Please refer to
+Documentation/devicetree/bindings/bus/ti-gpmc.txt
+
+For the properties relevant to the ethernet controller connected to the GPMC
+refer to the binding documentation of the device. For example, the documentation
+for the SMSC 911x is Documentation/devicetree/bindings/net/smsc911x.txt
+
+Child nodes need to specify the GPMC bus address width using the "bank-width"
+property but is possible that an ethernet controller also has a property to
+specify the I/O registers address width. Even when the GPMC has a maximum 16-bit
+address width, it supports devices with 32-bit word registers.
+For example with an SMSC LAN911x/912x controller connected to the TI GPMC on an
+OMAP2+ board, "bank-width = <2>;" and "reg-io-width = <4>;".
+
+Required properties:
+- bank-width: Address width of the device in bytes. GPMC supports 8-bit
+ and 16-bit devices and so must be either 1 or 2 bytes.
+- compatible: Compatible string property for the ethernet child device.
+- gpmc,cs-on-ns: Chip-select assertion time
+- gpmc,cs-rd-off-ns: Chip-select de-assertion time for reads
+- gpmc,cs-wr-off-ns: Chip-select de-assertion time for writes
+- gpmc,oe-on-ns: Output-enable assertion time
+- gpmc,oe-off-ns: Output-enable de-assertion time
+- gpmc,we-on-ns: Write-enable assertion time
+- gpmc,we-off-ns: Write-enable de-assertion time
+- gpmc,access-ns: Start cycle to first data capture (read access)
+- gpmc,rd-cycle-ns: Total read cycle time
+- gpmc,wr-cycle-ns: Total write cycle time
+- reg: Chip-select, base address (relative to chip-select)
+ and size of the memory mapped for the device.
+ Note that base address will be typically 0 as this
+ is the start of the chip-select.
+
+Optional properties:
+- gpmc,XXX Additional GPMC timings and settings parameters. See
+ Documentation/devicetree/bindings/bus/ti-gpmc.txt
+
+Example:
+
+gpmc: gpmc@6e000000 {
+ compatible = "ti,omap3430-gpmc";
+ ti,hwmods = "gpmc";
+ reg = <0x6e000000 0x1000>;
+ interrupts = <20>;
+ gpmc,num-cs = <8>;
+ gpmc,num-waitpins = <4>;
+ #address-cells = <2>;
+ #size-cells = <1>;
+
+ ranges = <5 0 0x2c000000 0x1000000>;
+
+ ethernet@5,0 {
+ compatible = "smsc,lan9221", "smsc,lan9115";
+ reg = <5 0 0xff>;
+ bank-width = <2>;
+
+ gpmc,mux-add-data;
+ gpmc,cs-on-ns = <0>;
+ gpmc,cs-rd-off-ns = <186>;
+ gpmc,cs-wr-off-ns = <186>;
+ gpmc,adv-on-ns = <12>;
+ gpmc,adv-rd-off-ns = <48>;
+ gpmc,adv-wr-off-ns = <48>;
+ gpmc,oe-on-ns = <54>;
+ gpmc,oe-off-ns = <168>;
+ gpmc,we-on-ns = <54>;
+ gpmc,we-off-ns = <168>;
+ gpmc,rd-cycle-ns = <186>;
+ gpmc,wr-cycle-ns = <186>;
+ gpmc,access-ns = <114>;
+ gpmc,page-burst-access-ns = <6>;
+ gpmc,bus-turnaround-ns = <12>;
+ gpmc,cycle2cycle-delay-ns = <18>;
+ gpmc,wr-data-mux-bus-ns = <90>;
+ gpmc,wr-access-ns = <186>;
+ gpmc,cycle2cycle-samecsen;
+ gpmc,cycle2cycle-diffcsen;
+
+ interrupt-parent = <&gpio6>;
+ interrupts = <16>;
+ vmmc-supply = <&vddvario>;
+ vmmc_aux-supply = <&vdd33a>;
+ reg-io-width = <4>;
+
+ smsc,save-mac-address;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
new file mode 100644
index 000000000..988fc694b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-hip04-net.txt
@@ -0,0 +1,88 @@
+Hisilicon hip04 Ethernet Controller
+
+* Ethernet controller node
+
+Required properties:
+- compatible: should be "hisilicon,hip04-mac".
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device.
+- port-handle: <phandle port channel>
+ phandle, specifies a reference to the syscon ppe node
+ port, port number connected to the controller
+ channel, recv channel start from channel * number (RX_DESC_NUM)
+- phy-mode: see ethernet.txt [1].
+
+Optional properties:
+- phy-handle: see ethernet.txt [1].
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+
+
+* Ethernet ppe node:
+Control rx & tx fifos of all ethernet controllers.
+Have 2048 recv channels shared by all ethernet controllers, only if no overlap.
+Each controller's recv channel start from channel * number (RX_DESC_NUM).
+
+Required properties:
+- compatible: "hisilicon,hip04-ppe", "syscon".
+- reg: address and length of the register set for the device.
+
+
+* MDIO bus node:
+
+Required properties:
+
+- compatible: should be "hisilicon,hip04-mdio".
+- Inherits from MDIO bus node binding [2]
+[2] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+ mdio {
+ compatible = "hisilicon,hip04-mdio";
+ reg = <0x28f1000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy0: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0>;
+ marvell,reg-init = <18 0x14 0 0x8001>;
+ };
+
+ phy1: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <1>;
+ marvell,reg-init = <18 0x14 0 0x8001>;
+ };
+ };
+
+ ppe: ppe@28c0000 {
+ compatible = "hisilicon,hip04-ppe", "syscon";
+ reg = <0x28c0000 0x10000>;
+ };
+
+ fe: ethernet@28b0000 {
+ compatible = "hisilicon,hip04-mac";
+ reg = <0x28b0000 0x10000>;
+ interrupts = <0 413 4>;
+ phy-mode = "mii";
+ port-handle = <&ppe 31 0>;
+ };
+
+ ge0: ethernet@2800000 {
+ compatible = "hisilicon,hip04-mac";
+ reg = <0x2800000 0x10000>;
+ interrupts = <0 402 4>;
+ phy-mode = "sgmii";
+ port-handle = <&ppe 0 1>;
+ phy-handle = <&phy0>;
+ };
+
+ ge8: ethernet@2880000 {
+ compatible = "hisilicon,hip04-mac";
+ reg = <0x2880000 0x10000>;
+ interrupts = <0 410 4>;
+ phy-mode = "sgmii";
+ port-handle = <&ppe 8 2>;
+ phy-handle = <&phy1>;
+ };
diff --git a/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
new file mode 100644
index 000000000..75d398bb1
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/hisilicon-hix5hd2-gmac.txt
@@ -0,0 +1,36 @@
+Hisilicon hix5hd2 gmac controller
+
+Required properties:
+- compatible: should be "hisilicon,hix5hd2-gmac".
+- reg: specifies base physical address(s) and size of the device registers.
+ The first region is the MAC register base and size.
+ The second region is external interface control register.
+- interrupts: should contain the MAC interrupt.
+- #address-cells: must be <1>.
+- #size-cells: must be <0>.
+- phy-mode: see ethernet.txt [1].
+- phy-handle: see ethernet.txt [1].
+- mac-address: see ethernet.txt [1].
+- clocks: clock phandle and specifier pair.
+
+- PHY subnode: inherits from phy binding [2]
+
+[1] Documentation/devicetree/bindings/net/ethernet.txt
+[2] Documentation/devicetree/bindings/net/phy.txt
+
+Example:
+ gmac0: ethernet@f9840000 {
+ compatible = "hisilicon,hix5hd2-gmac";
+ reg = <0xf9840000 0x1000>,<0xf984300c 0x4>;
+ interrupts = <0 71 4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy-mode = "mii";
+ phy-handle = <&phy2>;
+ mac-address = [00 00 00 00 00 00];
+ clocks = <&clock HIX5HD2_MAC0_CLK>;
+
+ phy2: ethernet-phy@2 {
+ reg = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt
new file mode 100644
index 000000000..168f1be50
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/at86rf230.txt
@@ -0,0 +1,27 @@
+* AT86RF230 IEEE 802.15.4 *
+
+Required properties:
+ - compatible: should be "atmel,at86rf230", "atmel,at86rf231",
+ "atmel,at86rf233" or "atmel,at86rf212"
+ - spi-max-frequency: maximal bus speed, should be set to 7500000 depends
+ sync or async operation mode
+ - reg: the chipselect index
+ - interrupts: the interrupt generated by the device. Non high-level
+ can occur deadlocks while handling isr.
+
+Optional properties:
+ - reset-gpio: GPIO spec for the rstn pin
+ - sleep-gpio: GPIO spec for the slp_tr pin
+ - xtal-trim: u8 value for fine tuning the internal capacitance
+ arrays of xtal pins: 0 = +0 pF, 0xf = +4.5 pF
+
+Example:
+
+ at86rf231@0 {
+ compatible = "atmel,at86rf231";
+ spi-max-frequency = <7500000>;
+ reg = <0>;
+ interrupts = <19 4>;
+ interrupt-parent = <&gpio3>;
+ xtal-trim = /bits/ 8 <0x06>;
+ };
diff --git a/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
new file mode 100644
index 000000000..fb6d49f18
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ieee802154/cc2520.txt
@@ -0,0 +1,33 @@
+*CC2520 IEEE 802.15.4 Compatible Radio*
+
+Required properties:
+ - compatible: should be "ti,cc2520"
+ - spi-max-frequency: maximal bus speed (8000000), should be set to 4000000 depends
+ sync or async operation mode
+ - reg: the chipselect index
+ - pinctrl-0: pin control group to be used for this controller.
+ - pinctrl-names: must contain a "default" entry.
+ - fifo-gpio: GPIO spec for the FIFO pin
+ - fifop-gpio: GPIO spec for the FIFOP pin
+ - sfd-gpio: GPIO spec for the SFD pin
+ - cca-gpio: GPIO spec for the CCA pin
+ - vreg-gpio: GPIO spec for the VREG pin
+ - reset-gpio: GPIO spec for the RESET pin
+Optional properties:
+ - amplified: include if the CC2520 is connected to a CC2591 amplifier
+
+Example:
+ cc2520@0 {
+ compatible = "ti,cc2520";
+ reg = <0>;
+ spi-max-frequency = <4000000>;
+ amplified;
+ pinctrl-names = "default";
+ pinctrl-0 = <&cc2520_cape_pins>;
+ fifo-gpio = <&gpio1 18 0>;
+ fifop-gpio = <&gpio1 19 0>;
+ sfd-gpio = <&gpio1 13 0>;
+ cca-gpio = <&gpio1 16 0>;
+ vreg-gpio = <&gpio0 31 0>;
+ reset-gpio = <&gpio1 12 0>;
+ };
diff --git a/Documentation/devicetree/bindings/net/keystone-netcp.txt b/Documentation/devicetree/bindings/net/keystone-netcp.txt
new file mode 100644
index 000000000..d0e6fa38f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/keystone-netcp.txt
@@ -0,0 +1,217 @@
+This document describes the device tree bindings associated with the
+keystone network coprocessor(NetCP) driver support.
+
+The network coprocessor (NetCP) is a hardware accelerator that processes
+Ethernet packets. NetCP has a gigabit Ethernet (GbE) subsytem with a ethernet
+switch sub-module to send and receive packets. NetCP also includes a packet
+accelerator (PA) module to perform packet classification operations such as
+header matching, and packet modification operations such as checksum
+generation. NetCP can also optionally include a Security Accelerator (SA)
+capable of performing IPSec operations on ingress/egress packets.
+
+Keystone II SoC's also have a 10 Gigabit Ethernet Subsystem (XGbE) which
+includes a 3-port Ethernet switch sub-module capable of 10Gb/s and 1Gb/s rates
+per Ethernet port.
+
+Keystone NetCP driver has a plug-in module architecture where each of the NetCP
+sub-modules exist as a loadable kernel module which plug in to the netcp core.
+These sub-modules are represented as "netcp-devices" in the dts bindings. It is
+mandatory to have the ethernet switch sub-module for the ethernet interface to
+be operational. Any other sub-module like the PA is optional.
+
+NetCP Ethernet SubSystem Layout:
+
+-----------------------------
+ NetCP subsystem(10G or 1G)
+-----------------------------
+ |
+ |-> NetCP Devices -> |
+ | |-> GBE/XGBE Switch
+ | |
+ | |-> Packet Accelerator
+ | |
+ | |-> Security Accelerator
+ |
+ |
+ |
+ |-> NetCP Interfaces -> |
+ |-> Ethernet Port 0
+ |
+ |-> Ethernet Port 1
+ |
+ |-> Ethernet Port 2
+ |
+ |-> Ethernet Port 3
+
+
+NetCP subsystem properties:
+Required properties:
+- compatible: Should be "ti,netcp-1.0"
+- clocks: phandle to the reference clocks for the subsystem.
+- dma-id: Navigator packet dma instance id.
+- ranges: address range of NetCP (includes, Ethernet SS, PA and SA)
+
+Optional properties:
+- reg: register location and the size for the following register
+ regions in the specified order.
+ - Efuse MAC address register
+- dma-coherent: Present if dma operations are coherent
+- big-endian: Keystone devices can be operated in a mode where the DSP is in
+ the big endian mode. In such cases enable this option. This
+ option should also be enabled if the ARM is operated in
+ big endian mode with the DSP in little endian.
+
+NetCP device properties: Device specification for NetCP sub-modules.
+1Gb/10Gb (gbe/xgbe) ethernet switch sub-module specifications.
+Required properties:
+- label: Must be "netcp-gbe" for 1Gb & "netcp-xgbe" for 10Gb.
+- compatible: Must be one of below:-
+ "ti,netcp-gbe" for 1GbE on NetCP 1.4
+ "ti,netcp-gbe-5" for 1GbE N NetCP 1.5 (N=5)
+ "ti,netcp-gbe-9" for 1GbE N NetCP 1.5 (N=9)
+ "ti,netcp-gbe-2" for 1GbE N NetCP 1.5 (N=2)
+ "ti,netcp-xgbe" for 10 GbE
+
+- reg: register location and the size for the following register
+ regions in the specified order.
+ - switch subsystem registers
+ - sgmii port3/4 module registers (only for NetCP 1.4)
+ - switch module registers
+ - serdes registers (only for 10G)
+
+ NetCP 1.4 ethss, here is the order
+ index #0 - switch subsystem registers
+ index #1 - sgmii port3/4 module registers
+ index #2 - switch module registers
+
+ NetCP 1.5 ethss 9 port, 5 port and 2 port
+ index #0 - switch subsystem registers
+ index #1 - switch module registers
+ index #2 - serdes registers
+
+- tx-channel: the navigator packet dma channel name for tx.
+- tx-queue: the navigator queue number associated with the tx dma channel.
+- interfaces: specification for each of the switch port to be registered as a
+ network interface in the stack.
+-- slave-port: Switch port number, 0 based numbering.
+-- link-interface: type of link interface, supported options are
+ - mac<->mac auto negotiate mode: 0
+ - mac<->phy mode: 1
+ - mac<->mac forced mode: 2
+ - mac<->fiber mode: 3
+ - mac<->phy mode with no mdio: 4
+ - 10Gb mac<->phy mode : 10
+ - 10Gb mac<->mac forced mode : 11
+----phy-handle: phandle to PHY device
+
+Optional properties:
+- enable-ale: NetCP driver keeps the address learning feature in the ethernet
+ switch module disabled. This attribute is to enable the address
+ learning.
+- secondary-slave-ports: specification for each of the switch port not be
+ registered as a network interface. NetCP driver
+ will only initialize these ports and attach PHY
+ driver to them if needed.
+
+NetCP interface properties: Interface specification for NetCP sub-modules.
+Required properties:
+- rx-channel: the navigator packet dma channel name for rx.
+- rx-queue: the navigator queue number associated with rx dma channel.
+- rx-pool: specifies the number of descriptors to be used & the region-id
+ for creating the rx descriptor pool.
+- tx-pool: specifies the number of descriptors to be used & the region-id
+ for creating the tx descriptor pool.
+- rx-queue-depth: number of descriptors in each of the free descriptor
+ queue (FDQ) for the pktdma Rx flow. There can be at
+ present a maximum of 4 queues per Rx flow.
+- rx-buffer-size: the buffer size for each of the Rx flow FDQ.
+- tx-completion-queue: the navigator queue number where the descriptors are
+ recycled after Tx DMA completion.
+
+Optional properties:
+- efuse-mac: If this is 1, then the MAC address for the interface is
+ obtained from the device efuse mac address register
+- local-mac-address: the driver is designed to use the of_get_mac_address api
+ only if efuse-mac is 0. When efuse-mac is 0, the MAC
+ address is obtained from local-mac-address. If this
+ attribute is not present, then the driver will use a
+ random MAC address.
+- "netcp-device label": phandle to the device specification for each of NetCP
+ sub-module attached to this interface.
+
+Example binding:
+
+netcp: netcp@2000000 {
+ reg = <0x2620110 0x8>;
+ reg-names = "efuse";
+ compatible = "ti,netcp-1.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x2000000 0xfffff>;
+ clocks = <&papllclk>, <&clkcpgmac>, <&chipclk12>;
+ dma-coherent;
+ /* big-endian; */
+ dma-id = <0>;
+
+ netcp-devices {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ gbe@90000 {
+ label = "netcp-gbe";
+ reg = <0x90000 0x300>, <0x90400 0x400>, <0x90800 0x700>;
+ /* enable-ale; */
+ tx-queue = <648>;
+ tx-channel = <8>;
+
+ interfaces {
+ gbe0: interface-0 {
+ slave-port = <0>;
+ link-interface = <4>;
+ };
+ gbe1: interface-1 {
+ slave-port = <1>;
+ link-interface = <4>;
+ };
+ };
+
+ secondary-slave-ports {
+ port-2 {
+ slave-port = <2>;
+ link-interface = <2>;
+ };
+ port-3 {
+ slave-port = <3>;
+ link-interface = <2>;
+ };
+ };
+ };
+ };
+
+ netcp-interfaces {
+ interface-0 {
+ rx-channel = <22>;
+ rx-pool = <1024 12>;
+ tx-pool = <1024 12>;
+ rx-queue-depth = <128 128 0 0>;
+ rx-buffer-size = <1518 4096 0 0>;
+ rx-queue = <8704>;
+ tx-completion-queue = <8706>;
+ efuse-mac = <1>;
+ netcp-gbe = <&gbe0>;
+
+ };
+ interface-1 {
+ rx-channel = <23>;
+ rx-pool = <1024 12>;
+ tx-pool = <1024 12>;
+ rx-queue-depth = <128 128 0 0>;
+ rx-buffer-size = <1518 4096 0 0>;
+ rx-queue = <8705>;
+ tx-completion-queue = <8707>;
+ efuse-mac = <0>;
+ local-mac-address = [02 18 31 7e 3e 6f];
+ netcp-gbe = <&gbe1>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/lpc-eth.txt b/Documentation/devicetree/bindings/net/lpc-eth.txt
new file mode 100644
index 000000000..b92e92780
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/lpc-eth.txt
@@ -0,0 +1,23 @@
+* NXP LPC32xx SoC Ethernet Controller
+
+Required properties:
+- compatible: Should be "nxp,lpc-eth"
+- reg: Address and length of the register set for the device
+- interrupts: Should contain ethernet controller interrupt
+
+Optional properties:
+- phy-mode: See ethernet.txt file in the same directory. If the property is
+ absent, "rmii" is assumed.
+- use-iram: Use LPC32xx internal SRAM (IRAM) for DMA buffering
+
+Example:
+
+ mac: ethernet@31060000 {
+ compatible = "nxp,lpc-eth";
+ reg = <0x31060000 0x1000>;
+ interrupt-parent = <&mic>;
+ interrupts = <29 0>;
+
+ phy-mode = "rmii";
+ use-iram;
+ };
diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt
new file mode 100644
index 000000000..ba19d671e
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/macb.txt
@@ -0,0 +1,30 @@
+* Cadence MACB/GEM Ethernet controller
+
+Required properties:
+- compatible: Should be "cdns,[<chip>-]{macb|gem}"
+ Use "cdns,at91sam9260-macb" for Atmel at91sam9 SoCs or the 10/100Mbit IP
+ available on sama5d3 SoCs.
+ Use "cdns,at32ap7000-macb" for other 10/100 usage or use the generic form: "cdns,macb".
+ Use "cdns,pc302-gem" for Picochip picoXcell pc302 and later devices based on
+ the Cadence GEM, or the generic form: "cdns,gem".
+ Use "cdns,sama5d3-gem" for the Gigabit IP available on Atmel sama5d3 SoCs.
+ Use "cdns,sama5d4-gem" for the Gigabit IP available on Atmel sama5d4 SoCs.
+- reg: Address and length of the register set for the device
+- interrupts: Should contain macb interrupt
+- phy-mode: See ethernet.txt file in the same directory.
+- clock-names: Tuple listing input clock names.
+ Required elements: 'pclk', 'hclk'
+ Optional elements: 'tx_clk'
+- clocks: Phandles to input clocks.
+
+Examples:
+
+ macb0: ethernet@fffc4000 {
+ compatible = "cdns,at32ap7000-macb";
+ reg = <0xfffc4000 0x4000>;
+ interrupts = <21>;
+ phy-mode = "rmii";
+ local-mac-address = [3a 0e 03 04 05 06];
+ clock-names = "pclk", "hclk", "tx_clk";
+ clocks = <&clkc 30>, <&clkc 30>, <&clkc 13>;
+ };
diff --git a/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
new file mode 100644
index 000000000..f5a8ca29a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-armada-370-neta.txt
@@ -0,0 +1,21 @@
+* Marvell Armada 370 / Armada XP Ethernet Controller (NETA)
+
+Required properties:
+- compatible: "marvell,armada-370-neta" or "marvell,armada-xp-neta".
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device
+- phy: See ethernet.txt file in the same directory.
+- phy-mode: See ethernet.txt file in the same directory
+- clocks: a pointer to the reference clock for this device.
+
+Example:
+
+ethernet@d0070000 {
+ compatible = "marvell,armada-370-neta";
+ reg = <0xd0070000 0x2500>;
+ interrupts = <8>;
+ clocks = <&gate_clk 4>;
+ status = "okay";
+ phy = <&phy0>;
+ phy-mode = "rgmii-id";
+};
diff --git a/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt
new file mode 100644
index 000000000..9417e54c2
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-orion-mdio.txt
@@ -0,0 +1,39 @@
+* Marvell MDIO Ethernet Controller interface
+
+The Ethernet controllers of the Marvel Kirkwood, Dove, Orion5x,
+MV78xx0, Armada 370 and Armada XP have an identical unit that provides
+an interface with the MDIO bus. This driver handles this MDIO
+interface.
+
+Required properties:
+- compatible: "marvell,orion-mdio"
+- reg: address and length of the SMI register
+
+Optional properties:
+- interrupts: interrupt line number for the SMI error/done interrupt
+- clocks: Phandle to the clock control device and gate bit
+
+The child nodes of the MDIO driver are the individual PHY devices
+connected to this MDIO bus. They must have a "reg" property given the
+PHY address on the MDIO bus.
+
+Example at the SoC level:
+
+mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "marvell,orion-mdio";
+ reg = <0xd0072004 0x4>;
+};
+
+And at the board level:
+
+mdio {
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ };
+}
diff --git a/Documentation/devicetree/bindings/net/marvell-orion-net.txt b/Documentation/devicetree/bindings/net/marvell-orion-net.txt
new file mode 100644
index 000000000..bce52b2ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-orion-net.txt
@@ -0,0 +1,82 @@
+Marvell Orion/Discovery ethernet controller
+=============================================
+
+The Marvell Discovery ethernet controller can be found on Marvell Orion SoCs
+(Kirkwood, Dove, Orion5x, and Discovery Innovation) and as part of Marvell
+Discovery system controller chips (mv64[345]60).
+
+The Discovery ethernet controller is described with two levels of nodes. The
+first level describes the ethernet controller itself and the second level
+describes up to 3 ethernet port nodes within that controller. The reason for
+the multiple levels is that the port registers are interleaved within a single
+set of controller registers. Each port node describes port-specific properties.
+
+Note: The above separation is only true for Discovery system controllers.
+For Orion SoCs we stick to the separation, although there each controller has
+only one port associated. Multiple ports are implemented as multiple single-port
+controllers. As Kirkwood has some issues with proper initialization after reset,
+an extra compatible string is added for it.
+
+* Ethernet controller node
+
+Required controller properties:
+ - #address-cells: shall be 1.
+ - #size-cells: shall be 0.
+ - compatible: shall be one of "marvell,orion-eth", "marvell,kirkwood-eth".
+ - reg: address and length of the controller registers.
+
+Optional controller properties:
+ - clocks: phandle reference to the controller clock.
+ - marvell,tx-checksum-limit: max tx packet size for hardware checksum.
+
+* Ethernet port node
+
+Required port properties:
+ - compatible: shall be one of "marvell,orion-eth-port",
+ "marvell,kirkwood-eth-port".
+ - reg: port number relative to ethernet controller, shall be 0, 1, or 2.
+ - interrupts: port interrupt.
+ - local-mac-address: See ethernet.txt file in the same directory.
+
+Optional port properties:
+ - marvell,tx-queue-size: size of the transmit ring buffer.
+ - marvell,tx-sram-addr: address of transmit descriptor buffer located in SRAM.
+ - marvell,tx-sram-size: size of transmit descriptor buffer located in SRAM.
+ - marvell,rx-queue-size: size of the receive ring buffer.
+ - marvell,rx-sram-addr: address of receive descriptor buffer located in SRAM.
+ - marvell,rx-sram-size: size of receive descriptor buffer located in SRAM.
+
+and
+
+ - phy-handle: See ethernet.txt file in the same directory.
+
+or
+
+ - speed: port speed if no PHY connected.
+ - duplex: port mode if no PHY connected.
+
+* Node example:
+
+mdio-bus {
+ ...
+ ethphy: ethernet-phy@8 {
+ ...
+ };
+};
+
+eth: ethernet-controller@72000 {
+ compatible = "marvell,orion-eth";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x72000 0x2000>;
+ clocks = <&gate_clk 2>;
+ marvell,tx-checksum-limit = <1600>;
+
+ ethernet@0 {
+ compatible = "marvell,orion-eth-port";
+ reg = <0>;
+ interrupts = <29>;
+ phy-handle = <&ethphy>;
+ local-mac-address = [00 00 00 00 00 00];
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt
new file mode 100644
index 000000000..aa4f4230b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt
@@ -0,0 +1,61 @@
+* Marvell Armada 375 Ethernet Controller (PPv2)
+
+Required properties:
+
+- compatible: should be "marvell,armada-375-pp2"
+- reg: addresses and length of the register sets for the device.
+ Must contain the following register sets:
+ - common controller registers
+ - LMS registers
+ In addition, at least one port register set is required.
+- clocks: a pointer to the reference clocks for this device, consequently:
+ - main controller clock
+ - GOP clock
+- clock-names: names of used clocks, must be "pp_clk" and "gop_clk".
+
+The ethernet ports are represented by subnodes. At least one port is
+required.
+
+Required properties (port):
+
+- interrupts: interrupt for the port
+- port-id: should be '0' or '1' for ethernet ports, and '2' for the
+ loopback port
+- phy-mode: See ethernet.txt file in the same directory
+
+Optional properties (port):
+
+- marvell,loopback: port is loopback mode
+- phy: a phandle to a phy node defining the PHY address (as the reg
+ property, a single integer). Note: if this property isn't present,
+ then fixed link is assumed, and the 'fixed-link' property is
+ mandatory.
+
+Example:
+
+ethernet@f0000 {
+ compatible = "marvell,armada-375-pp2";
+ reg = <0xf0000 0xa000>,
+ <0xc0000 0x3060>,
+ <0xc4000 0x100>,
+ <0xc5000 0x100>;
+ clocks = <&gateclk 3>, <&gateclk 19>;
+ clock-names = "pp_clk", "gop_clk";
+ status = "okay";
+
+ eth0: eth0@c4000 {
+ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+ port-id = <0>;
+ status = "okay";
+ phy = <&phy0>;
+ phy-mode = "gmii";
+ };
+
+ eth1: eth1@c5000 {
+ interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
+ port-id = <1>;
+ status = "okay";
+ phy = <&phy3>;
+ phy-mode = "gmii";
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/marvell-pxa168.txt b/Documentation/devicetree/bindings/net/marvell-pxa168.txt
new file mode 100644
index 000000000..845a148a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/marvell-pxa168.txt
@@ -0,0 +1,36 @@
+* Marvell PXA168 Ethernet Controller
+
+Required properties:
+- compatible: should be "marvell,pxa168-eth".
+- reg: address and length of the register set for the device.
+- interrupts: interrupt for the device.
+- clocks: pointer to the clock for the device.
+
+Optional properties:
+- port-id: Ethernet port number. Should be '0','1' or '2'.
+- #address-cells: must be 1 when using sub-nodes.
+- #size-cells: must be 0 when using sub-nodes.
+- phy-handle: see ethernet.txt file in the same directory.
+- local-mac-address: see ethernet.txt file in the same directory.
+
+Sub-nodes:
+Each PHY can be represented as a sub-node. This is not mandatory.
+
+Sub-nodes required properties:
+- reg: the MDIO address of the PHY.
+
+Example:
+
+ eth0: ethernet@f7b90000 {
+ compatible = "marvell,pxa168-eth";
+ reg = <0xf7b90000 0x10000>;
+ clocks = <&chip CLKID_GETH0>;
+ interrupts = <GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ phy-handle = <&ethphy0>;
+
+ ethphy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.txt b/Documentation/devicetree/bindings/net/mdio-gpio.txt
new file mode 100644
index 000000000..8dbcf8295
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mdio-gpio.txt
@@ -0,0 +1,26 @@
+MDIO on GPIOs
+
+Currently defined compatibles:
+- virtual,gpio-mdio
+
+MDC and MDIO lines connected to GPIO controllers are listed in the
+gpios property as described in section VIII.1 in the following order:
+
+MDC, MDIO.
+
+Note: Each gpio-mdio bus should have an alias correctly numbered in "aliases"
+node.
+
+Example:
+
+aliases {
+ mdio-gpio0 = &mdio0;
+};
+
+mdio0: mdio {
+ compatible = "virtual,mdio-gpio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ gpios = <&qe_pio_a 11
+ &qe_pio_c 6>;
+};
diff --git a/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt b/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
new file mode 100644
index 000000000..79384113c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mdio-mux-gpio.txt
@@ -0,0 +1,127 @@
+Properties for an MDIO bus multiplexer/switch controlled by GPIO pins.
+
+This is a special case of a MDIO bus multiplexer. One or more GPIO
+lines are used to control which child bus is connected.
+
+Required properties in addition to the generic multiplexer properties:
+
+- compatible : mdio-mux-gpio.
+- gpios : GPIO specifiers for each GPIO line. One or more must be specified.
+
+
+Example :
+
+ /* The parent MDIO bus. */
+ smi1: mdio@1180000001900 {
+ compatible = "cavium,octeon-3860-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x11800 0x00001900 0x0 0x40>;
+ };
+
+ /*
+ An NXP sn74cbtlv3253 dual 1-of-4 switch controlled by a
+ pair of GPIO lines. Child busses 2 and 3 populated with 4
+ PHYs each.
+ */
+ mdio-mux {
+ compatible = "mdio-mux-gpio";
+ gpios = <&gpio1 3 0>, <&gpio1 4 0>;
+ mdio-parent-bus = <&smi1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ mdio@2 {
+ reg = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy11: ethernet-phy@1 {
+ reg = <1>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ phy12: ethernet-phy@2 {
+ reg = <2>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ phy13: ethernet-phy@3 {
+ reg = <3>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ phy14: ethernet-phy@4 {
+ reg = <4>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ };
+
+ mdio@3 {
+ reg = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy21: ethernet-phy@1 {
+ reg = <1>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ phy22: ethernet-phy@2 {
+ reg = <2>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ phy23: ethernet-phy@3 {
+ reg = <3>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ phy24: ethernet-phy@4 {
+ reg = <4>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt
new file mode 100644
index 000000000..8516929c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mdio-mux-mmioreg.txt
@@ -0,0 +1,75 @@
+Properties for an MDIO bus multiplexer controlled by a memory-mapped device
+
+This is a special case of a MDIO bus multiplexer. A memory-mapped device,
+like an FPGA, is used to control which child bus is connected. The mdio-mux
+node must be a child of the memory-mapped device. The driver currently only
+supports devices with eight-bit registers.
+
+Required properties in addition to the generic multiplexer properties:
+
+- compatible : string, must contain "mdio-mux-mmioreg"
+
+- reg : integer, contains the offset of the register that controls the bus
+ multiplexer. The size field in the 'reg' property is the size of
+ register, and must therefore be 1.
+
+- mux-mask : integer, contains an eight-bit mask that specifies which
+ bits in the register control the actual bus multiplexer. The
+ 'reg' property of each child mdio-mux node must be constrained by
+ this mask.
+
+Example:
+
+The FPGA node defines a memory-mapped FPGA with a register space of 0x30 bytes.
+For the "EMI2" MDIO bus, register 9 (BRDCFG1) controls the mux on that bus.
+A bitmask of 0x6 means that bits 1 and 2 (bit 0 is lsb) are the bits on
+BRDCFG1 that control the actual mux.
+
+ /* The FPGA node */
+ fpga: board-control@3,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,p5020ds-fpga", "fsl,fpga-ngpixis";
+ reg = <3 0 0x30>;
+ ranges = <0 3 0 0x30>;
+
+ mdio-mux-emi2 {
+ compatible = "mdio-mux-mmioreg", "mdio-mux";
+ mdio-parent-bus = <&xmdio0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <9 1>; // BRDCFG1
+ mux-mask = <0x6>; // EMI2
+
+ emi2_slot1: mdio@0 { // Slot 1 XAUI (FM2)
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy_xgmii_slot1: ethernet-phy@0 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <4>;
+ };
+ };
+
+ emi2_slot2: mdio@2 { // Slot 2 XAUI (FM1)
+ reg = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy_xgmii_slot2: ethernet-phy@4 {
+ compatible = "ethernet-phy-ieee802.3-c45";
+ reg = <0>;
+ };
+ };
+ };
+ };
+
+ /* The parent MDIO bus. */
+ xmdio0: mdio@f1000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ interrupts = <100 1 0 0>;
+ };
diff --git a/Documentation/devicetree/bindings/net/mdio-mux.txt b/Documentation/devicetree/bindings/net/mdio-mux.txt
new file mode 100644
index 000000000..f65606f8d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mdio-mux.txt
@@ -0,0 +1,136 @@
+Common MDIO bus multiplexer/switch properties.
+
+An MDIO bus multiplexer/switch will have several child busses that are
+numbered uniquely in a device dependent manner. The nodes for an MDIO
+bus multiplexer/switch will have one child node for each child bus.
+
+Required properties:
+- mdio-parent-bus : phandle to the parent MDIO bus.
+- #address-cells = <1>;
+- #size-cells = <0>;
+
+Optional properties:
+- Other properties specific to the multiplexer/switch hardware.
+
+Required properties for child nodes:
+- #address-cells = <1>;
+- #size-cells = <0>;
+- reg : The sub-bus number.
+
+
+Example :
+
+ /* The parent MDIO bus. */
+ smi1: mdio@1180000001900 {
+ compatible = "cavium,octeon-3860-mdio";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x11800 0x00001900 0x0 0x40>;
+ };
+
+ /*
+ An NXP sn74cbtlv3253 dual 1-of-4 switch controlled by a
+ pair of GPIO lines. Child busses 2 and 3 populated with 4
+ PHYs each.
+ */
+ mdio-mux {
+ compatible = "mdio-mux-gpio";
+ gpios = <&gpio1 3 0>, <&gpio1 4 0>;
+ mdio-parent-bus = <&smi1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ mdio@2 {
+ reg = <2>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy11: ethernet-phy@1 {
+ reg = <1>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ phy12: ethernet-phy@2 {
+ reg = <2>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ phy13: ethernet-phy@3 {
+ reg = <3>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ phy14: ethernet-phy@4 {
+ reg = <4>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <10 8>; /* Pin 10, active low */
+ };
+ };
+
+ mdio@3 {
+ reg = <3>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy21: ethernet-phy@1 {
+ reg = <1>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ phy22: ethernet-phy@2 {
+ reg = <2>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ phy23: ethernet-phy@3 {
+ reg = <3>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ phy24: ethernet-phy@4 {
+ reg = <4>;
+ compatible = "marvell,88e1149r";
+ marvell,reg-init = <3 0x10 0 0x5777>,
+ <3 0x11 0 0x00aa>,
+ <3 0x12 0 0x4105>,
+ <3 0x13 0 0x0a60>;
+ interrupt-parent = <&gpio>;
+ interrupts = <12 8>; /* Pin 12, active low */
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/meson-dwmac.txt b/Documentation/devicetree/bindings/net/meson-dwmac.txt
new file mode 100644
index 000000000..ec633d74a
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/meson-dwmac.txt
@@ -0,0 +1,25 @@
+* Amlogic Meson DWMAC Ethernet controller
+
+The device inherits all the properties of the dwmac/stmmac devices
+described in the file net/stmmac.txt with the following changes.
+
+Required properties:
+
+- compatible: should be "amlogic,meson6-dwmac" along with "snps,dwmac"
+ and any applicable more detailed version number
+ described in net/stmmac.txt
+
+- reg: should contain a register range for the dwmac controller and
+ another one for the Amlogic specific configuration
+
+Example:
+
+ ethmac: ethernet@c9410000 {
+ compatible = "amlogic,meson6-dwmac", "snps,dwmac";
+ reg = <0xc9410000 0x10000
+ 0xc1108108 0x4>;
+ interrupts = <0 8 1>;
+ interrupt-names = "macirq";
+ clocks = <&clk81>;
+ clock-names = "stmmaceth";
+ }
diff --git a/Documentation/devicetree/bindings/net/micrel-ks8851.txt b/Documentation/devicetree/bindings/net/micrel-ks8851.txt
new file mode 100644
index 000000000..bbdf9a735
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/micrel-ks8851.txt
@@ -0,0 +1,18 @@
+Micrel KS8851 Ethernet mac (MLL)
+
+Required properties:
+- compatible = "micrel,ks8851-mll" of parallel interface
+- reg : 2 physical address and size of registers for data and command
+- interrupts : interrupt connection
+
+Micrel KS8851 Ethernet mac (SPI)
+
+Required properties:
+- compatible = "micrel,ks8851" or the deprecated "ks8851"
+- reg : chip select number
+- interrupts : interrupt connection
+
+Optional properties:
+- vdd-supply: analog 3.3V supply for Ethernet mac
+- vdd-io-supply: digital 1.8V IO supply for Ethernet mac
+- reset-gpios: reset_n input pin
diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
new file mode 100644
index 000000000..692076fda
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt
@@ -0,0 +1,83 @@
+Micrel KSZ9021/KSZ9031 Gigabit Ethernet PHY
+
+Some boards require special tuning values, particularly when it comes to
+clock delays. You can specify clock delay values by adding
+micrel-specific properties to an Ethernet OF device node.
+
+Note that these settings are applied after any phy-specific fixup from
+phy_fixup_list (see phy_init_hw() from drivers/net/phy/phy_device.c),
+and therefore may overwrite them.
+
+KSZ9021:
+
+ All skew control options are specified in picoseconds. The minimum
+ value is 0, the maximum value is 3000, and it is incremented by 200ps
+ steps.
+
+ Optional properties:
+
+ - rxc-skew-ps : Skew control of RXC pad
+ - rxdv-skew-ps : Skew control of RX CTL pad
+ - txc-skew-ps : Skew control of TXC pad
+ - txen-skew-ps : Skew control of TX CTL pad
+ - rxd0-skew-ps : Skew control of RX data 0 pad
+ - rxd1-skew-ps : Skew control of RX data 1 pad
+ - rxd2-skew-ps : Skew control of RX data 2 pad
+ - rxd3-skew-ps : Skew control of RX data 3 pad
+ - txd0-skew-ps : Skew control of TX data 0 pad
+ - txd1-skew-ps : Skew control of TX data 1 pad
+ - txd2-skew-ps : Skew control of TX data 2 pad
+ - txd3-skew-ps : Skew control of TX data 3 pad
+
+KSZ9031:
+
+ All skew control options are specified in picoseconds. The minimum
+ value is 0, and the maximum is property-dependent. The increment
+ step is 60ps.
+
+ Optional properties:
+
+ Maximum value of 1860:
+
+ - rxc-skew-ps : Skew control of RX clock pad
+ - txc-skew-ps : Skew control of TX clock pad
+
+ Maximum value of 900:
+
+ - rxdv-skew-ps : Skew control of RX CTL pad
+ - txen-skew-ps : Skew control of TX CTL pad
+ - rxd0-skew-ps : Skew control of RX data 0 pad
+ - rxd1-skew-ps : Skew control of RX data 1 pad
+ - rxd2-skew-ps : Skew control of RX data 2 pad
+ - rxd3-skew-ps : Skew control of RX data 3 pad
+ - txd0-skew-ps : Skew control of TX data 0 pad
+ - txd1-skew-ps : Skew control of TX data 1 pad
+ - txd2-skew-ps : Skew control of TX data 2 pad
+ - txd3-skew-ps : Skew control of TX data 3 pad
+
+Examples:
+
+ /* Attach to an Ethernet device with autodetected PHY */
+ &enet {
+ rxc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ txen-skew-ps = <0>;
+ status = "okay";
+ };
+
+ /* Attach to an explicitly-specified PHY */
+ mdio {
+ phy0: ethernet-phy@0 {
+ rxc-skew-ps = <3000>;
+ rxdv-skew-ps = <0>;
+ txc-skew-ps = <3000>;
+ txen-skew-ps = <0>;
+ reg = <0>;
+ };
+ };
+ ethernet@70000 {
+ status = "okay";
+ phy = <&phy0>;
+ phy-mode = "rgmii-id";
+ };
diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt
new file mode 100644
index 000000000..87496a8c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/micrel.txt
@@ -0,0 +1,37 @@
+Micrel PHY properties.
+
+These properties cover the base properties Micrel PHYs.
+
+Optional properties:
+
+ - micrel,led-mode : LED mode value to set for PHYs with configurable LEDs.
+
+ Configure the LED mode with single value. The list of PHYs and the
+ bits that are currently supported:
+
+ KSZ8001: register 0x1e, bits 15..14
+ KSZ8041: register 0x1e, bits 15..14
+ KSZ8021: register 0x1f, bits 5..4
+ KSZ8031: register 0x1f, bits 5..4
+ KSZ8051: register 0x1f, bits 5..4
+ KSZ8081: register 0x1f, bits 5..4
+ KSZ8091: register 0x1f, bits 5..4
+
+ See the respective PHY datasheet for the mode values.
+
+ - micrel,rmii-reference-clock-select-25-mhz: RMII Reference Clock Select
+ bit selects 25 MHz mode
+
+ Setting the RMII Reference Clock Select bit enables 25 MHz rather
+ than 50 MHz clock mode.
+
+ Note that this option in only needed for certain PHY revisions with a
+ non-standard, inverted function of this configuration bit.
+ Specifically, a clock reference ("rmii-ref" below) is always needed to
+ actually select a mode.
+
+ - clocks, clock-names: contains clocks according to the common clock bindings.
+
+ supported clocks:
+ - KSZ8021, KSZ8031, KSZ8081, KSZ8091: "rmii-ref": The RMII reference
+ input clock. Used to determine the XI input clock.
diff --git a/Documentation/devicetree/bindings/net/moxa,moxart-mac.txt b/Documentation/devicetree/bindings/net/moxa,moxart-mac.txt
new file mode 100644
index 000000000..583418b2c
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/moxa,moxart-mac.txt
@@ -0,0 +1,21 @@
+MOXA ART Ethernet Controller
+
+Required properties:
+
+- compatible : Must be "moxa,moxart-mac"
+- reg : Should contain register location and length
+- interrupts : Should contain the mac interrupt number
+
+Example:
+
+ mac0: mac@90900000 {
+ compatible = "moxa,moxart-mac";
+ reg = <0x90900000 0x100>;
+ interrupts = <25 0>;
+ };
+
+ mac1: mac@92000000 {
+ compatible = "moxa,moxart-mac";
+ reg = <0x92000000 0x100>;
+ interrupts = <27 0>;
+ };
diff --git a/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt
new file mode 100644
index 000000000..5b6cd9b3f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/nxp-nci.txt
@@ -0,0 +1,35 @@
+* NXP Semiconductors NXP NCI NFC Controllers
+
+Required properties:
+- compatible: Should be "nxp,nxp-nci-i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- enable-gpios: Output GPIO pin used for enabling/disabling the chip
+- firmware-gpios: Output GPIO pin used to enter firmware download mode
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBone with NPC100 NFC controller on I2C2):
+
+&i2c2 {
+
+ status = "okay";
+
+ npc100: npc100@29 {
+
+ compatible = "nxp,nxp-nci-i2c";
+
+ reg = <0x29>;
+ clock-frequency = <100000>;
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <29 GPIO_ACTIVE_HIGH>;
+
+ enable-gpios = <&gpio0 30 GPIO_ACTIVE_HIGH>;
+ firmware-gpios = <&gpio0 31 GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/nfc/pn544.txt b/Documentation/devicetree/bindings/net/nfc/pn544.txt
new file mode 100644
index 000000000..dab69f361
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/pn544.txt
@@ -0,0 +1,35 @@
+* NXP Semiconductors PN544 NFC Controller
+
+Required properties:
+- compatible: Should be "nxp,pn544-i2c".
+- clock-frequency: IC work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- enable-gpios: Output GPIO pin used for enabling/disabling the PN544
+- firmware-gpios: Output GPIO pin used to enter firmware download mode
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBone with PN544 on I2C2):
+
+&i2c2 {
+
+ status = "okay";
+
+ pn544: pn544@28 {
+
+ compatible = "nxp,pn544-i2c";
+
+ reg = <0x28>;
+ clock-frequency = <400000>;
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <17 GPIO_ACTIVE_HIGH>;
+
+ enable-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
+ firmware-gpios = <&gpio3 19 GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfca.txt b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt
new file mode 100644
index 000000000..7bb2e213d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/st21nfca.txt
@@ -0,0 +1,40 @@
+* STMicroelectronics SAS. ST21NFCA NFC Controller
+
+Required properties:
+- compatible: Should be "st,st21nfca-i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- enable-gpios: Output GPIO pin used for enabling/disabling the ST21NFCA
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+- ese-present: Specifies that an ese is physically connected to the nfc
+controller.
+- uicc-present: Specifies that the uicc swp signal can be physically
+connected to the nfc controller.
+
+Example (for ARM-based BeagleBoard xM with ST21NFCA on I2C2):
+
+&i2c2 {
+
+ status = "okay";
+
+ st21nfca: st21nfca@1 {
+
+ compatible = "st,st21nfca-i2c";
+
+ reg = <0x01>;
+ clock-frequency = <400000>;
+
+ interrupt-parent = <&gpio5>;
+ interrupts = <2 IRQ_TYPE_LEVEL_LOW>;
+
+ enable-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>;
+
+ ese-present;
+ uicc-present;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt
new file mode 100644
index 000000000..bb237072d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/st21nfcb.txt
@@ -0,0 +1,33 @@
+* STMicroelectronics SAS. ST21NFCB NFC Controller
+
+Required properties:
+- compatible: Should be "st,st21nfcb-i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- reset-gpios: Output GPIO pin used to reset the ST21NFCB
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBoard xM with ST21NFCB on I2C2):
+
+&i2c2 {
+
+ status = "okay";
+
+ st21nfcb: st21nfcb@8 {
+
+ compatible = "st,st21nfcb-i2c";
+
+ reg = <0x08>;
+ clock-frequency = <400000>;
+
+ interrupt-parent = <&gpio5>;
+ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
+
+ reset-gpios = <&gpio5 29 GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/nfc/trf7970a.txt b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
new file mode 100644
index 000000000..7c89ca290
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nfc/trf7970a.txt
@@ -0,0 +1,44 @@
+* Texas Instruments TRF7970A RFID/NFC/15693 Transceiver
+
+Required properties:
+- compatible: Should be "ti,trf7970a".
+- spi-max-frequency: Maximum SPI frequency (<= 2000000).
+- interrupt-parent: phandle of parent interrupt handler.
+- interrupts: A single interrupt specifier.
+- ti,enable-gpios: Two GPIO entries used for 'EN' and 'EN2' pins on the
+ TRF7970A.
+- vin-supply: Regulator for supply voltage to VIN pin
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+- autosuspend-delay: Specify autosuspend delay in milliseconds.
+- vin-voltage-override: Specify voltage of VIN pin in microvolts.
+- irq-status-read-quirk: Specify that the trf7970a being used has the
+ "IRQ Status Read" erratum.
+- en2-rf-quirk: Specify that the trf7970a being used has the "EN2 RF"
+ erratum.
+
+Example (for ARM-based BeagleBone with TRF7970A on SPI1):
+
+&spi1 {
+ status = "okay";
+
+ nfc@0 {
+ compatible = "ti,trf7970a";
+ reg = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&trf7970a_default>;
+ spi-max-frequency = <2000000>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <14 0>;
+ ti,enable-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>,
+ <&gpio2 5 GPIO_ACTIVE_LOW>;
+ vin-supply = <&ldo3_reg>;
+ vin-voltage-override = <5000000>;
+ autosuspend-delay = <30000>;
+ irq-status-read-quirk;
+ en2-rf-quirk;
+ status = "okay";
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/opencores-ethoc.txt b/Documentation/devicetree/bindings/net/opencores-ethoc.txt
new file mode 100644
index 000000000..2dc127c30
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/opencores-ethoc.txt
@@ -0,0 +1,22 @@
+* OpenCores MAC 10/100 Mbps
+
+Required properties:
+- compatible: Should be "opencores,ethoc".
+- reg: two memory regions (address and length),
+ first region is for the device registers and descriptor rings,
+ second is for the device packet memory.
+- interrupts: interrupt for the device.
+
+Optional properties:
+- clocks: phandle to refer to the clk used as per
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Examples:
+
+ enet0: ethoc@fd030000 {
+ compatible = "opencores,ethoc";
+ reg = <0xfd030000 0x4000 0xfd800000 0x4000>;
+ interrupts = <1>;
+ local-mac-address = [00 50 c2 13 6f 00];
+ clocks = <&osc>;
+ };
diff --git a/Documentation/devicetree/bindings/net/phy.txt b/Documentation/devicetree/bindings/net/phy.txt
new file mode 100644
index 000000000..40831fbaf
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/phy.txt
@@ -0,0 +1,40 @@
+PHY nodes
+
+Required properties:
+
+ - interrupts : <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and level
+ information for the interrupt. This should be encoded based on
+ the information in section 2) depending on the type of interrupt
+ controller you have.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+ - reg : The ID number for the phy, usually a small integer
+
+Optional Properties:
+
+- compatible: Compatible list, may contain
+ "ethernet-phy-ieee802.3-c22" or "ethernet-phy-ieee802.3-c45" for
+ PHYs that implement IEEE802.3 clause 22 or IEEE802.3 clause 45
+ specifications. If neither of these are specified, the default is to
+ assume clause 22. The compatible list may also contain other
+ elements.
+
+ If the phy's identifier is known then the list may contain an entry
+ of the form: "ethernet-phy-idAAAA.BBBB" where
+ AAAA - The value of the 16 bit Phy Identifier 1 register as
+ 4 hex digits. This is the chip vendor OUI bits 3:18
+ BBBB - The value of the 16 bit Phy Identifier 2 register as
+ 4 hex digits. This is the chip vendor OUI bits 19:24,
+ followed by 10 bits of a vendor specific ID.
+
+- max-speed: Maximum PHY supported speed (10, 100, 1000...)
+
+Example:
+
+ethernet-phy@0 {
+ compatible = "ethernet-phy-id0141.0e90", "ethernet-phy-ieee802.3-c22";
+ interrupt-parent = <40000>;
+ interrupts = <35 1>;
+ reg = <0>;
+};
diff --git a/Documentation/devicetree/bindings/net/qca-qca7000-spi.txt b/Documentation/devicetree/bindings/net/qca-qca7000-spi.txt
new file mode 100644
index 000000000..c74989c0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca-qca7000-spi.txt
@@ -0,0 +1,47 @@
+* Qualcomm QCA7000 (Ethernet over SPI protocol)
+
+Note: The QCA7000 is useable as a SPI device. In this case it must be defined
+as a child of a SPI master in the device tree.
+
+Required properties:
+- compatible : Should be "qca,qca7000"
+- reg : Should specify the SPI chip select
+- interrupts : The first cell should specify the index of the source interrupt
+ and the second cell should specify the trigger type as rising edge
+- spi-cpha : Must be set
+- spi-cpol: Must be set
+
+Optional properties:
+- interrupt-parent : Specify the pHandle of the source interrupt
+- spi-max-frequency : Maximum frequency of the SPI bus the chip can operate at.
+ Numbers smaller than 1000000 or greater than 16000000 are invalid. Missing
+ the property will set the SPI frequency to 8000000 Hertz.
+- local-mac-address: 6 bytes, MAC address
+- qca,legacy-mode : Set the SPI data transfer of the QCA7000 to legacy mode.
+ In this mode the SPI master must toggle the chip select between each data
+ word. In burst mode these gaps aren't necessary, which is faster.
+ This setting depends on how the QCA7000 is setup via GPIO pin strapping.
+ If the property is missing the driver defaults to burst mode.
+
+Example:
+
+/* Freescale i.MX28 SPI master*/
+ssp2: spi@80014000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx28-spi";
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi2_pins_a>;
+ status = "okay";
+
+ qca7000: ethernet@0 {
+ compatible = "qca,qca7000";
+ reg = <0x0>;
+ interrupt-parent = <&gpio3>; /* GPIO Bank 3 */
+ interrupts = <25 0x1>; /* Index: 25, rising edge */
+ spi-cpha; /* SPI mode: CPHA=1 */
+ spi-cpol; /* SPI mode: CPOL=1 */
+ spi-max-frequency = <8000000>; /* freq: 8 MHz */
+ local-mac-address = [ A0 B0 C0 D0 E0 F0 ];
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
new file mode 100644
index 000000000..21fd199e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt
@@ -0,0 +1,68 @@
+Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC)
+
+The device node has following properties.
+
+Required properties:
+ - compatible: Can be "rockchip,rk3288-gmac".
+ - reg: addresses and length of the register sets for the device.
+ - interrupts: Should contain the GMAC interrupts.
+ - interrupt-names: Should contain the interrupt names "macirq".
+ - rockchip,grf: phandle to the syscon grf used to control speed and mode.
+ - clocks: <&cru SCLK_MAC>: clock selector for main clock, from PLL or PHY.
+ <&cru SCLK_MAC_PLL>: PLL clock for SCLK_MAC
+ <&cru SCLK_MAC_RX>: clock gate for RX
+ <&cru SCLK_MAC_TX>: clock gate for TX
+ <&cru SCLK_MACREF>: clock gate for RMII referce clock
+ <&cru SCLK_MACREF_OUT> clock gate for RMII reference clock output
+ <&cru ACLK_GMAC>: AXI clock gate for GMAC
+ <&cru PCLK_GMAC>: APB clock gate for GMAC
+ - clock-names: One name for each entry in the clocks property.
+ - phy-mode: See ethernet.txt file in the same directory.
+ - pinctrl-names: Names corresponding to the numbered pinctrl states.
+ - pinctrl-0: pin-control mode. can be <&rgmii_pins> or <&rmii_pins>.
+ - clock_in_out: For RGMII, it must be "input", means main clock(125MHz)
+ is not sourced from SoC's PLL, but input from PHY; For RMII, "input" means
+ PHY provides the reference clock(50MHz), "output" means GMAC provides the
+ reference clock.
+ - snps,reset-gpio gpio number for phy reset.
+ - snps,reset-active-low boolean flag to indicate if phy reset is active low.
+ - assigned-clocks: main clock, should be <&cru SCLK_MAC>;
+ - assigned-clock-parents = parent of main clock.
+ can be <&ext_gmac> or <&cru SCLK_MAC_PLL>.
+
+Optional properties:
+ - tx_delay: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as default.
+ - rx_delay: Delay value for RXD timing. Range value is 0~0x7F, 0x10 as default.
+ - phy-supply: phandle to a regulator if the PHY needs one
+
+Example:
+
+gmac: ethernet@ff290000 {
+ compatible = "rockchip,rk3288-gmac";
+ reg = <0xff290000 0x10000>;
+ interrupts = <GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "macirq";
+ rockchip,grf = <&grf>;
+ clocks = <&cru SCLK_MAC>,
+ <&cru SCLK_MAC_RX>, <&cru SCLK_MAC_TX>,
+ <&cru SCLK_MACREF>, <&cru SCLK_MACREF_OUT>,
+ <&cru ACLK_GMAC>, <&cru PCLK_GMAC>;
+ clock-names = "stmmaceth",
+ "mac_clk_rx", "mac_clk_tx",
+ "clk_mac_ref", "clk_mac_refout",
+ "aclk_mac", "pclk_mac";
+ phy-mode = "rgmii";
+ pinctrl-names = "default";
+ pinctrl-0 = <&rgmii_pins /*&rmii_pins*/>;
+
+ clock_in_out = "input";
+ snps,reset-gpio = <&gpio4 7 0>;
+ snps,reset-active-low;
+
+ assigned-clocks = <&cru SCLK_MAC>;
+ assigned-clock-parents = <&ext_gmac>;
+ tx_delay = <0x30>;
+ rx_delay = <0x10>;
+
+ status = "ok";
+};
diff --git a/Documentation/devicetree/bindings/net/samsung-sxgbe.txt b/Documentation/devicetree/bindings/net/samsung-sxgbe.txt
new file mode 100644
index 000000000..888c25019
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/samsung-sxgbe.txt
@@ -0,0 +1,52 @@
+* Samsung 10G Ethernet driver (SXGBE)
+
+Required properties:
+- compatible: Should be "samsung,sxgbe-v2.0a"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the SXGBE interrupts
+ These interrupts are ordered by fixed and follows variable
+ trasmit DMA interrupts, receive DMA interrupts and lpi interrupt.
+ index 0 - this is fixed common interrupt of SXGBE and it is always
+ available.
+ index 1 to 25 - 8 variable trasmit interrupts, variable 16 receive interrupts
+ and 1 optional lpi interrupt.
+- phy-mode: String, operation mode of the PHY interface.
+ Supported values are: "sgmii", "xgmii".
+- samsung,pbl: Integer, Programmable Burst Length.
+ Supported values are 1, 2, 4, 8, 16, or 32.
+- samsung,burst-map: Integer, Program the possible bursts supported by sxgbe
+ This is an integer and represents allowable DMA bursts when fixed burst.
+ Allowable range is 0x01-0x3F. When this field is set fixed burst is enabled.
+ When fixed length is needed for burst mode, it can be set within allowable
+ range.
+
+Optional properties:
+- mac-address: 6 bytes, mac address
+- max-frame-size: Maximum Transfer Unit (IEEE defined MTU), rather
+ than the maximum frame size.
+
+Example:
+
+ aliases {
+ ethernet0 = <&sxgbe0>;
+ };
+
+ sxgbe0: ethernet@1a040000 {
+ compatible = "samsung,sxgbe-v2.0a";
+ reg = <0 0x1a040000 0 0x10000>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 209 4>, <0 185 4>, <0 186 4>, <0 187 4>,
+ <0 188 4>, <0 189 4>, <0 190 4>, <0 191 4>,
+ <0 192 4>, <0 193 4>, <0 194 4>, <0 195 4>,
+ <0 196 4>, <0 197 4>, <0 198 4>, <0 199 4>,
+ <0 200 4>, <0 201 4>, <0 202 4>, <0 203 4>,
+ <0 204 4>, <0 205 4>, <0 206 4>, <0 207 4>,
+ <0 208 4>, <0 210 4>;
+ samsung,pbl = <0x08>
+ samsung,burst-map = <0x20>
+ mac-address = [ 00 11 22 33 44 55 ]; /* Filled in by U-Boot */
+ max-frame-size = <9000>;
+ phy-mode = "xgmii";
+ };
diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
new file mode 100644
index 000000000..2f6ec85fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/sh_eth.txt
@@ -0,0 +1,57 @@
+* Renesas Electronics SH EtherMAC
+
+This file provides information on what the device node for the SH EtherMAC
+interface contains.
+
+Required properties:
+- compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC.
+ "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC.
+ "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC.
+ "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC.
+ "renesas,ether-r8a7791" if the device is a part of R8A7791 SoC.
+ "renesas,ether-r8a7793" if the device is a part of R8A7793 SoC.
+ "renesas,ether-r8a7794" if the device is a part of R8A7794 SoC.
+ "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
+- reg: offset and length of (1) the E-DMAC/feLic register block (required),
+ (2) the TSU register block (optional).
+- interrupts: interrupt specifier for the sole interrupt.
+- phy-mode: see ethernet.txt file in the same directory.
+- phy-handle: see ethernet.txt file in the same directory.
+- #address-cells: number of address cells for the MDIO bus, must be equal to 1.
+- #size-cells: number of size cells on the MDIO bus, must be equal to 0.
+- clocks: clock phandle and specifier pair.
+- pinctrl-0: phandle, referring to a default pin configuration node.
+
+Optional properties:
+- interrupt-parent: the phandle for the interrupt controller that services
+ interrupts for this device.
+- pinctrl-names: pin configuration state name ("default").
+- renesas,no-ether-link: boolean, specify when a board does not provide a proper
+ Ether LINK signal.
+- renesas,ether-link-active-low: boolean, specify when the Ether LINK signal is
+ active-low instead of normal active-high.
+
+Example (Lager board):
+
+ ethernet@ee700000 {
+ compatible = "renesas,ether-r8a7790";
+ reg = <0 0xee700000 0 0x400>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 162 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp8_clks R8A7790_CLK_ETHER>;
+ phy-mode = "rmii";
+ phy-handle = <&phy1>;
+ pinctrl-0 = <&ether_pins>;
+ pinctrl-names = "default";
+ renesas,ether-link-active-low;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ phy1: ethernet-phy@1 {
+ reg = <1>;
+ interrupt-parent = <&irqc0>;
+ interrupts = <0 IRQ_TYPE_LEVEL_LOW>;
+ pinctrl-0 = <&phy1_pins>;
+ pinctrl-names = "default";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/net/smsc-lan91c111.txt b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt
new file mode 100644
index 000000000..e77e16759
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/smsc-lan91c111.txt
@@ -0,0 +1,15 @@
+SMSC LAN91c111 Ethernet mac
+
+Required properties:
+- compatible = "smsc,lan91c111";
+- reg : physical address and size of registers
+- interrupts : interrupt connection
+
+Optional properties:
+- phy-device : see ethernet.txt file in the same directory
+- reg-io-width : Mask of sizes (in bytes) of the IO accesses that
+ are supported on the device. Valid value for SMSC LAN91c111 are
+ 1, 2 or 4. If it's omitted or invalid, the size would be 2 meaning
+ 16-bit access only.
+- power-gpios: GPIO to control the PWRDWN pin
+- reset-gpios: GPIO to control the RESET pin
diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt
new file mode 100644
index 000000000..3fed3c124
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/smsc911x.txt
@@ -0,0 +1,35 @@
+* Smart Mixed-Signal Connectivity (SMSC) LAN911x/912x Controller
+
+Required properties:
+- compatible : Should be "smsc,lan<model>", "smsc,lan9115"
+- reg : Address and length of the io space for SMSC LAN
+- interrupts : Should contain SMSC LAN interrupt line
+- interrupt-parent : Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- phy-mode : See ethernet.txt file in the same directory
+
+Optional properties:
+- reg-shift : Specify the quantity to shift the register offsets by
+- reg-io-width : Specify the size (in bytes) of the IO accesses that
+ should be performed on the device. Valid value for SMSC LAN is
+ 2 or 4. If it's omitted or invalid, the size would be 2.
+- smsc,irq-active-high : Indicates the IRQ polarity is active-high
+- smsc,irq-push-pull : Indicates the IRQ type is push-pull
+- smsc,force-internal-phy : Forces SMSC LAN controller to use
+ internal PHY
+- smsc,force-external-phy : Forces SMSC LAN controller to use
+ external PHY
+- smsc,save-mac-address : Indicates that mac address needs to be saved
+ before resetting the controller
+
+Examples:
+
+lan9220@f4000000 {
+ compatible = "smsc,lan9220", "smsc,lan9115";
+ reg = <0xf4000000 0x2000000>;
+ phy-mode = "mii";
+ interrupt-parent = <&gpio1>;
+ interrupts = <31>;
+ reg-io-width = <4>;
+ smsc,irq-push-pull;
+};
diff --git a/Documentation/devicetree/bindings/net/socfpga-dwmac.txt b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
new file mode 100644
index 000000000..3a9d67951
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/socfpga-dwmac.txt
@@ -0,0 +1,31 @@
+Altera SOCFPGA SoC DWMAC controller
+
+This is a variant of the dwmac/stmmac driver an inherits all descriptions
+present in Documentation/devicetree/bindings/net/stmmac.txt.
+
+The device node has additional properties:
+
+Required properties:
+ - compatible : Should contain "altr,socfpga-stmmac" along with
+ "snps,dwmac" and any applicable more detailed
+ designware version numbers documented in stmmac.txt
+ - altr,sysmgr-syscon : Should be the phandle to the system manager node that
+ encompasses the glue register, the register offset, and the register shift.
+
+Optional properties:
+altr,emac-splitter: Should be the phandle to the emac splitter soft IP node if
+ DWMAC controller is connected emac splitter.
+
+Example:
+
+gmac0: ethernet@ff700000 {
+ compatible = "altr,socfpga-stmmac", "snps,dwmac-3.70a", "snps,dwmac";
+ altr,sysmgr-syscon = <&sysmgr 0x60 0>;
+ status = "disabled";
+ reg = <0xff700000 0x2000>;
+ interrupts = <0 115 4>;
+ interrupt-names = "macirq";
+ mac-address = [00 00 00 00 00 00];/* Filled in by U-Boot */
+ clocks = <&emac_0_clk>;
+ clock-names = "stmmaceth";
+};
diff --git a/Documentation/devicetree/bindings/net/sti-dwmac.txt b/Documentation/devicetree/bindings/net/sti-dwmac.txt
new file mode 100644
index 000000000..d05c1e1fd
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/sti-dwmac.txt
@@ -0,0 +1,61 @@
+STMicroelectronics SoC DWMAC glue layer controller
+
+This file documents differences between the core properties in
+Documentation/devicetree/bindings/net/stmmac.txt
+and what is needed on STi platforms to program the stmmac glue logic.
+
+The device node has following properties.
+
+Required properties:
+ - compatible : Can be "st,stih415-dwmac", "st,stih416-dwmac",
+ "st,stih407-dwmac", "st,stid127-dwmac".
+ - st,syscon : Should be phandle/offset pair. The phandle to the syscon node which
+ encompases the glue register, and the offset of the control register.
+ - st,gmac_en: this is to enable the gmac into a dedicated sysctl control
+ register available on STiH407 SoC.
+ - pinctrl-0: pin-control for all the MII mode supported.
+
+Optional properties:
+ - resets : phandle pointing to the system reset controller with correct
+ reset line index for ethernet reset.
+ - st,ext-phyclk: valid only for RMII where PHY can generate 50MHz clock or
+ MAC can generate it.
+ - st,tx-retime-src: This specifies which clk is wired up to the mac for
+ retimeing tx lines. This is totally board dependent and can take one of the
+ posssible values from "txclk", "clk_125" or "clkgen".
+ If not passed, the internal clock will be used by default.
+ - sti-ethclk: this is the phy clock.
+ - sti-clkconf: this is an extra sysconfig register, available in new SoCs,
+ to program the clk retiming.
+ - st,gmac_en: to enable the GMAC, this only is present in some SoCs; e.g.
+ STiH407.
+
+Example:
+
+ethernet0: dwmac@9630000 {
+ device_type = "network";
+ status = "disabled";
+ compatible = "st,stih407-dwmac", "snps,dwmac", "snps,dwmac-3.710";
+ reg = <0x9630000 0x8000>;
+ reg-names = "stmmaceth";
+
+ st,syscon = <&syscfg_sbc_reg 0x80>;
+ st,gmac_en;
+ resets = <&softreset STIH407_ETH1_SOFTRESET>;
+ reset-names = "stmmaceth";
+
+ interrupts = <GIC_SPI 98 IRQ_TYPE_NONE>,
+ <GIC_SPI 99 IRQ_TYPE_NONE>,
+ <GIC_SPI 100 IRQ_TYPE_NONE>;
+ interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
+
+ snps,pbl = <32>;
+ snps,mixed-burst;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rgmii1>;
+
+ clock-names = "stmmaceth", "sti-ethclk";
+ clocks = <&CLK_S_C0_FLEXGEN CLK_EXT2F_A9>,
+ <&CLK_S_C0_FLEXGEN CLK_ETH_PHY>;
+};
diff --git a/Documentation/devicetree/bindings/net/stmmac.txt b/Documentation/devicetree/bindings/net/stmmac.txt
new file mode 100644
index 000000000..f34fc3c81
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/stmmac.txt
@@ -0,0 +1,68 @@
+* STMicroelectronics 10/100/1000 Ethernet driver (GMAC)
+
+Required properties:
+- compatible: Should be "snps,dwmac-<ip_version>" "snps,dwmac"
+ For backwards compatibility: "st,spear600-gmac" is also supported.
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the STMMAC interrupts
+- interrupt-names: Should contain the interrupt names "macirq"
+ "eth_wake_irq" if this interrupt is supported in the "interrupts"
+ property
+- phy-mode: See ethernet.txt file in the same directory.
+- snps,reset-gpio gpio number for phy reset.
+- snps,reset-active-low boolean flag to indicate if phy reset is active low.
+- snps,reset-delays-us is triplet of delays
+ The 1st cell is reset pre-delay in micro seconds.
+ The 2nd cell is reset pulse in micro seconds.
+ The 3rd cell is reset post-delay in micro seconds.
+- snps,pbl Programmable Burst Length
+- snps,fixed-burst Program the DMA to use the fixed burst mode
+- snps,mixed-burst Program the DMA to use the mixed burst mode
+- snps,force_thresh_dma_mode Force DMA to use the threshold mode for
+ both tx and rx
+- snps,force_sf_dma_mode Force DMA to use the Store and Forward
+ mode for both tx and rx. This flag is
+ ignored if force_thresh_dma_mode is set.
+- snps,multicast-filter-bins: Number of multicast filter hash bins
+ supported by this device instance
+- snps,perfect-filter-entries: Number of perfect filter entries supported
+ by this device instance
+
+Optional properties:
+- resets: Should contain a phandle to the STMMAC reset signal, if any
+- reset-names: Should contain the reset signal name "stmmaceth", if a
+ reset phandle is given
+- max-frame-size: See ethernet.txt file in the same directory
+- clocks: If present, the first clock should be the GMAC main clock and
+ the second clock should be peripheral's register interface clock. Further
+ clocks may be specified in derived bindings.
+- clock-names: One name for each entry in the clocks property, the
+ first one should be "stmmaceth" and the second one should be "pclk".
+- clk_ptp_ref: this is the PTP reference clock; in case of the PTP is
+ available this clock is used for programming the Timestamp Addend Register.
+ If not passed then the system clock will be used and this is fine on some
+ platforms.
+- snps,burst_len: The AXI burst lenth value of the AXI BUS MODE register.
+- tx-fifo-depth: See ethernet.txt file in the same directory
+- rx-fifo-depth: See ethernet.txt file in the same directory
+
+Examples:
+
+ gmac0: ethernet@e0800000 {
+ compatible = "st,spear600-gmac";
+ reg = <0xe0800000 0x8000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <24 23>;
+ interrupt-names = "macirq", "eth_wake_irq";
+ mac-address = [000000000000]; /* Filled in by U-Boot */
+ max-frame-size = <3800>;
+ phy-mode = "gmii";
+ snps,multicast-filter-bins = <256>;
+ snps,perfect-filter-entries = <128>;
+ rx-fifo-depth = <16384>;
+ tx-fifo-depth = <16384>;
+ clocks = <&clock>;
+ clock-names = "stmmaceth";
+ };
diff --git a/Documentation/devicetree/bindings/net/via-rhine.txt b/Documentation/devicetree/bindings/net/via-rhine.txt
new file mode 100644
index 000000000..334eca2bf
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/via-rhine.txt
@@ -0,0 +1,17 @@
+* VIA Rhine 10/100 Network Controller
+
+Required properties:
+- compatible : Should be "via,vt8500-rhine" for integrated
+ Rhine controllers found in VIA VT8500, WonderMedia WM8950
+ and similar. These are listed as 1106:3106 rev. 0x84 on the
+ virtual PCI bus under vendor-provided kernels
+- reg : Address and length of the io space
+- interrupts : Should contain the controller interrupt line
+
+Examples:
+
+ethernet@d8004000 {
+ compatible = "via,vt8500-rhine";
+ reg = <0xd8004000 0x100>;
+ interrupts = <10>;
+};
diff --git a/Documentation/devicetree/bindings/net/via-velocity.txt b/Documentation/devicetree/bindings/net/via-velocity.txt
new file mode 100644
index 000000000..b3db469b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/via-velocity.txt
@@ -0,0 +1,20 @@
+* VIA Velocity 10/100/1000 Network Controller
+
+Required properties:
+- compatible : Should be "via,velocity-vt6110"
+- reg : Address and length of the io space
+- interrupts : Should contain the controller interrupt line
+
+Optional properties:
+- no-eeprom : PCI network cards use an external EEPROM to store data. Embedded
+ devices quite often set this data in uboot and do not provide an eeprom.
+ Specify this option if you have no external eeprom.
+
+Examples:
+
+eth0@d8004000 {
+ compatible = "via,velocity-vt6110";
+ reg = <0xd8004000 0x400>;
+ interrupts = <10>;
+ no-eeprom;
+};
diff --git a/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt b/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt
new file mode 100644
index 000000000..5dbf169cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/brcm,bcm43xx-fmac.txt
@@ -0,0 +1,41 @@
+Broadcom BCM43xx Fullmac wireless SDIO devices
+
+This node provides properties for controlling the Broadcom wireless device. The
+node is expected to be specified as a child node to the SDIO controller that
+connects the device to the system.
+
+Required properties:
+
+ - compatible : Should be "brcm,bcm4329-fmac".
+
+Optional properties:
+ - brcm,drive-strength : drive strength used for SDIO pins on device in mA
+ (default = 6).
+ - interrupt-parent : the phandle for the interrupt controller to which the
+ device interrupts are connected.
+ - interrupts : specifies attributes for the out-of-band interrupt (host-wake).
+ When not specified the device will use in-band SDIO interrupts.
+ - interrupt-names : name of the out-of-band interrupt, which must be set
+ to "host-wake".
+
+Example:
+
+mmc3: mmc@01c12000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc3_pins_a>;
+ vmmc-supply = <&reg_vmmc3>;
+ bus-width = <4>;
+ non-removable;
+ status = "okay";
+
+ brcmf: bcrmf@1 {
+ reg = <1>;
+ compatible = "brcm,bcm4329-fmac";
+ interrupt-parent = <&pio>;
+ interrupts = <10 8>; /* PH10 / EINT10 */
+ interrupt-names = "host-wake";
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
new file mode 100644
index 000000000..edefc26c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
@@ -0,0 +1,30 @@
+* Qualcomm Atheros ath10k wireless devices
+
+For ath10k devices the calibration data can be provided through Device
+Tree. The node is a child node of the PCI controller.
+
+Required properties:
+-compatible : Should be "qcom,ath10k"
+
+Optional properties:
+- qcom,ath10k-calibration-data : calibration data as an array, the
+ length can vary between hw versions
+
+
+Example:
+
+pci {
+ pcie@0 {
+ reg = <0 0 0 0 0>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ device_type = "pci";
+
+ ath10k@0,0 {
+ reg = <0 0 0 0 0>;
+ device_type = "pci";
+ qcom,ath10k-calibration-data = [ 01 02 03 ... ];
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt b/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt
new file mode 100644
index 000000000..189ae5cad
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/ti,wl1251.txt
@@ -0,0 +1,39 @@
+* Texas Instruments wl1251 wireless lan controller
+
+The wl1251 chip can be connected via SPI or via SDIO. This
+document describes the binding for the SPI connected chip.
+
+Required properties:
+- compatible : Should be "ti,wl1251"
+- reg : Chip select address of device
+- spi-max-frequency : Maximum SPI clocking speed of device in Hz
+- interrupts : Should contain interrupt line
+- interrupt-parent : Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- vio-supply : phandle to regulator providing VIO
+- ti,power-gpio : GPIO connected to chip's PMEN pin
+
+Optional properties:
+- ti,wl1251-has-eeprom : boolean, the wl1251 has an eeprom connected, which
+ provides configuration data (calibration, MAC, ...)
+- Please consult Documentation/devicetree/bindings/spi/spi-bus.txt
+ for optional SPI connection related properties,
+
+Examples:
+
+&spi1 {
+ wl1251@0 {
+ compatible = "ti,wl1251";
+
+ reg = <0>;
+ spi-max-frequency = <48000000>;
+ spi-cpol;
+ spi-cpha;
+
+ interrupt-parent = <&gpio2>;
+ interrupts = <10 IRQ_TYPE_NONE>; /* gpio line 42 */
+
+ vio-supply = <&vio>;
+ ti,power-gpio = <&gpio3 23 GPIO_ACTIVE_HIGH>; /* 87 */
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt b/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt
new file mode 100644
index 000000000..2a3d90de1
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/ti,wlcore.txt
@@ -0,0 +1,47 @@
+TI Wilink 6/7/8 (wl12xx/wl18xx) SDIO devices
+
+This node provides properties for controlling the wilink wireless device. The
+node is expected to be specified as a child node to the SDIO controller that
+connects the device to the system.
+
+Required properties:
+ - compatible: should be one of the following:
+ * "ti,wl1271"
+ * "ti,wl1273"
+ * "ti,wl1281"
+ * "ti,wl1283"
+ * "ti,wl1801"
+ * "ti,wl1805"
+ * "ti,wl1807"
+ * "ti,wl1831"
+ * "ti,wl1835"
+ * "ti,wl1837"
+ - interrupts : specifies attributes for the out-of-band interrupt.
+
+Optional properties:
+ - interrupt-parent : the phandle for the interrupt controller to which the
+ device interrupts are connected.
+ - ref-clock-frequency : ref clock frequency in Hz
+ - tcxo-clock-frequency : tcxo clock frequency in Hz
+
+Note: the *-clock-frequency properties assume internal clocks. In case of external
+clock, new bindings (for parsing the clock nodes) have to be added.
+
+Example:
+
+&mmc3 {
+ status = "okay";
+ vmmc-supply = <&wlan_en_reg>;
+ bus-width = <4>;
+ cap-power-off-card;
+ keep-power-in-suspend;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ wlcore: wlcore@2 {
+ compatible = "ti,wl1835";
+ reg = <2>;
+ interrupt-parent = <&gpio0>;
+ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/nios2/nios2.txt b/Documentation/devicetree/bindings/nios2/nios2.txt
new file mode 100644
index 000000000..d6d0a94cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/nios2/nios2.txt
@@ -0,0 +1,62 @@
+* Nios II Processor Binding
+
+This binding specifies what properties available in the device tree
+representation of a Nios II Processor Core.
+
+Users can use sopc2dts tool for generating device tree sources (dts) from a
+Qsys system. See more detail in: http://www.alterawiki.com/wiki/Sopc2dts
+
+Required properties:
+
+- compatible: Compatible property value should be "altr,nios2-1.0".
+- reg: Contains CPU index.
+- interrupt-controller: Specifies that the node is an interrupt controller
+- #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source, should be 1.
+- clock-frequency: Contains the clock frequency for CPU, in Hz.
+- dcache-line-size: Contains data cache line size.
+- icache-line-size: Contains instruction line size.
+- dcache-size: Contains data cache size.
+- icache-size: Contains instruction cache size.
+- altr,pid-num-bits: Specifies the number of bits to use to represent the process
+ identifier (PID).
+- altr,tlb-num-ways: Specifies the number of set-associativity ways in the TLB.
+- altr,tlb-num-entries: Specifies the number of entries in the TLB.
+- altr,tlb-ptr-sz: Specifies size of TLB pointer.
+- altr,has-mul: Specifies CPU hardware multipy support, should be 1.
+- altr,has-mmu: Specifies CPU support MMU support, should be 1.
+- altr,has-initda: Specifies CPU support initda instruction, should be 1.
+- altr,reset-addr: Specifies CPU reset address
+- altr,fast-tlb-miss-addr: Specifies CPU fast TLB miss exception address
+- altr,exception-addr: Specifies CPU exception address
+
+Optional properties:
+- altr,has-div: Specifies CPU hardware divide support
+- altr,implementation: Nios II core implementation, this should be "fast";
+
+Example:
+
+cpu@0x0 {
+ device_type = "cpu";
+ compatible = "altr,nios2-1.0";
+ reg = <0>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ clock-frequency = <125000000>;
+ dcache-line-size = <32>;
+ icache-line-size = <32>;
+ dcache-size = <32768>;
+ icache-size = <32768>;
+ altr,implementation = "fast";
+ altr,pid-num-bits = <8>;
+ altr,tlb-num-ways = <16>;
+ altr,tlb-num-entries = <128>;
+ altr,tlb-ptr-sz = <7>;
+ altr,has-div = <1>;
+ altr,has-mul = <1>;
+ altr,reset-addr = <0xc2800000>;
+ altr,fast-tlb-miss-addr = <0xc7fff400>;
+ altr,exception-addr = <0xd0000020>;
+ altr,has-initda = <1>;
+ altr,has-mmu = <1>;
+};
diff --git a/Documentation/devicetree/bindings/nios2/timer.txt b/Documentation/devicetree/bindings/nios2/timer.txt
new file mode 100644
index 000000000..904a5846d
--- /dev/null
+++ b/Documentation/devicetree/bindings/nios2/timer.txt
@@ -0,0 +1,19 @@
+Altera Timer
+
+Required properties:
+
+- compatible : should be "altr,timer-1.0"
+- reg : Specifies base physical address and size of the registers.
+- interrupt-parent: phandle of the interrupt controller
+- interrupts : Should contain the timer interrupt number
+- clock-frequency : The frequency of the clock that drives the counter, in Hz.
+
+Example:
+
+timer {
+ compatible = "altr,timer-1.0";
+ reg = <0x00400000 0x00000020>;
+ interrupt-parent = <&cpu>;
+ interrupts = <11>;
+ clock-frequency = <125000000>;
+};
diff --git a/Documentation/devicetree/bindings/nvec/nvidia,nvec.txt b/Documentation/devicetree/bindings/nvec/nvidia,nvec.txt
new file mode 100644
index 000000000..5ae601e7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvec/nvidia,nvec.txt
@@ -0,0 +1,21 @@
+NVIDIA compliant embedded controller
+
+Required properties:
+- compatible : should be "nvidia,nvec".
+- reg : the iomem of the i2c slave controller
+- interrupts : the interrupt line of the i2c slave controller
+- clock-frequency : the frequency of the i2c bus
+- gpios : the gpio used for ec request
+- slave-addr: the i2c address of the slave controller
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ Tegra20/Tegra30:
+ - div-clk
+ - fast-clk
+ Tegra114:
+ - div-clk
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - i2c
diff --git a/Documentation/devicetree/bindings/open-pic.txt b/Documentation/devicetree/bindings/open-pic.txt
new file mode 100644
index 000000000..909a902df
--- /dev/null
+++ b/Documentation/devicetree/bindings/open-pic.txt
@@ -0,0 +1,98 @@
+* Open PIC Binding
+
+This binding specifies what properties must be available in the device tree
+representation of an Open PIC compliant interrupt controller. This binding is
+based on the binding defined for Open PIC in [1] and is a superset of that
+binding.
+
+Required properties:
+
+ NOTE: Many of these descriptions were paraphrased here from [1] to aid
+ readability.
+
+ - compatible: Specifies the compatibility list for the PIC. The type
+ shall be <string> and the value shall include "open-pic".
+
+ - reg: Specifies the base physical address(s) and size(s) of this
+ PIC's addressable register space. The type shall be <prop-encoded-array>.
+
+ - interrupt-controller: The presence of this property identifies the node
+ as an Open PIC. No property value shall be defined.
+
+ - #interrupt-cells: Specifies the number of cells needed to encode an
+ interrupt source. The type shall be a <u32> and the value shall be 2.
+
+ - #address-cells: Specifies the number of cells needed to encode an
+ address. The type shall be <u32> and the value shall be 0. As such,
+ 'interrupt-map' nodes do not have to specify a parent unit address.
+
+Optional properties:
+
+ - pic-no-reset: The presence of this property indicates that the PIC
+ shall not be reset during runtime initialization. No property value shall
+ be defined. The presence of this property also mandates that any
+ initialization related to interrupt sources shall be limited to sources
+ explicitly referenced in the device tree.
+
+* Interrupt Specifier Definition
+
+ Interrupt specifiers consists of 2 cells encoded as
+ follows:
+
+ - <1st-cell>: The interrupt-number that identifies the interrupt source.
+
+ - <2nd-cell>: The level-sense information, encoded as follows:
+ 0 = low-to-high edge triggered
+ 1 = active low level-sensitive
+ 2 = active high level-sensitive
+ 3 = high-to-low edge triggered
+
+* Examples
+
+Example 1:
+
+ /*
+ * An Open PIC interrupt controller
+ */
+ mpic: pic@40000 {
+ // This is an interrupt controller node.
+ interrupt-controller;
+
+ // No address cells so that 'interrupt-map' nodes which reference
+ // this Open PIC node do not need a parent address specifier.
+ #address-cells = <0>;
+
+ // Two cells to encode interrupt sources.
+ #interrupt-cells = <2>;
+
+ // Offset address of 0x40000 and size of 0x40000.
+ reg = <0x40000 0x40000>;
+
+ // Compatible with Open PIC.
+ compatible = "open-pic";
+
+ // The PIC shall not be reset.
+ pic-no-reset;
+ };
+
+Example 2:
+
+ /*
+ * An interrupt generating device that is wired to an Open PIC.
+ */
+ serial0: serial@4500 {
+ // Interrupt source '42' that is active high level-sensitive.
+ // Note that there are only two cells as specified in the interrupt
+ // parent's '#interrupt-cells' property.
+ interrupts = <42 2>;
+
+ // The interrupt controller that this device is wired to.
+ interrupt-parent = <&mpic>;
+ };
+
+* References
+
+[1] Power.org (TM) Standard for Embedded Power Architecture (TM) Platform
+ Requirements (ePAPR), Version 1.0, July 2008.
+ (http://www.power.org/resources/downloads/Power_ePAPR_APPROVED_v1.0.pdf)
+
diff --git a/Documentation/devicetree/bindings/panel/ampire,am800480r3tmqwa1h.txt b/Documentation/devicetree/bindings/panel/ampire,am800480r3tmqwa1h.txt
new file mode 100644
index 000000000..83e2cae1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/ampire,am800480r3tmqwa1h.txt
@@ -0,0 +1,7 @@
+Ampire AM-800480R3TMQW-A1H 7.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "ampire,am800480r3tmqwa1h"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/auo,b101aw03.txt b/Documentation/devicetree/bindings/panel/auo,b101aw03.txt
new file mode 100644
index 000000000..72e088a4f
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b101aw03.txt
@@ -0,0 +1,7 @@
+AU Optronics Corporation 10.1" WSVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,b101aw03"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/auo,b101ean01.txt b/Documentation/devicetree/bindings/panel/auo,b101ean01.txt
new file mode 100644
index 000000000..3590b0741
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b101ean01.txt
@@ -0,0 +1,7 @@
+AU Optronics Corporation 10.1" WSVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,b101ean01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/auo,b101xtn01.txt b/Documentation/devicetree/bindings/panel/auo,b101xtn01.txt
new file mode 100644
index 000000000..889d511d6
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b101xtn01.txt
@@ -0,0 +1,7 @@
+AU Optronics Corporation 10.1" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,b101xtn01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/auo,b116xw03.txt b/Documentation/devicetree/bindings/panel/auo,b116xw03.txt
new file mode 100644
index 000000000..690d0a568
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b116xw03.txt
@@ -0,0 +1,7 @@
+AU Optronics Corporation 11.6" HD (1366x768) color TFT-LCD panel
+
+Required properties:
+- compatible: should be "auo,b116xw03"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/auo,b133htn01.txt b/Documentation/devicetree/bindings/panel/auo,b133htn01.txt
new file mode 100644
index 000000000..302226b5b
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b133htn01.txt
@@ -0,0 +1,7 @@
+AU Optronics Corporation 13.3" FHD (1920x1080) color TFT-LCD panel
+
+Required properties:
+- compatible: should be "auo,b133htn01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/auo,b133xtn01.txt b/Documentation/devicetree/bindings/panel/auo,b133xtn01.txt
new file mode 100644
index 000000000..7443b7c76
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/auo,b133xtn01.txt
@@ -0,0 +1,7 @@
+AU Optronics Corporation 13.3" WXGA (1366x768) TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,b133xtn01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt b/Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt
new file mode 100644
index 000000000..b6f2f3e8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/avic,tm070ddh03.txt
@@ -0,0 +1,7 @@
+Shanghai AVIC Optoelectronics 7" 1024x600 color TFT-LCD panel
+
+Required properties:
+- compatible: should be "avic,tm070ddh03"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/chunghwa,claa101wa01a.txt b/Documentation/devicetree/bindings/panel/chunghwa,claa101wa01a.txt
new file mode 100644
index 000000000..f24614e4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/chunghwa,claa101wa01a.txt
@@ -0,0 +1,7 @@
+Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "chunghwa,claa101wa01a"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/chunghwa,claa101wb03.txt b/Documentation/devicetree/bindings/panel/chunghwa,claa101wb03.txt
new file mode 100644
index 000000000..0ab2c05a4
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/chunghwa,claa101wb03.txt
@@ -0,0 +1,7 @@
+Chunghwa Picture Tubes Ltd. 10.1" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "chunghwa,claa101wb03"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/edt,et057090dhu.txt b/Documentation/devicetree/bindings/panel/edt,et057090dhu.txt
new file mode 100644
index 000000000..4903d7b1d
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/edt,et057090dhu.txt
@@ -0,0 +1,7 @@
+Emerging Display Technology Corp. 5.7" VGA TFT LCD panel
+
+Required properties:
+- compatible: should be "edt,et057090dhu"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/edt,et070080dh6.txt b/Documentation/devicetree/bindings/panel/edt,et070080dh6.txt
new file mode 100644
index 000000000..20cb38e83
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/edt,et070080dh6.txt
@@ -0,0 +1,10 @@
+Emerging Display Technology Corp. ET070080DH6 7.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "edt,et070080dh6"
+
+This panel is the same as ETM0700G0DH6 except for the touchscreen.
+ET070080DH6 is the model with resistive touch.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt b/Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt
new file mode 100644
index 000000000..ee4b18053
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/edt,etm0700g0dh6.txt
@@ -0,0 +1,10 @@
+Emerging Display Technology Corp. ETM0700G0DH6 7.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "edt,etm0700g0dh6"
+
+This panel is the same as ET070080DH6 except for the touchscreen.
+ETM0700G0DH6 is the model with capacitive multitouch.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/foxlink,fl500wvr00-a0t.txt b/Documentation/devicetree/bindings/panel/foxlink,fl500wvr00-a0t.txt
new file mode 100644
index 000000000..b47f9d87b
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/foxlink,fl500wvr00-a0t.txt
@@ -0,0 +1,7 @@
+Foxlink Group 5" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "foxlink,fl500wvr00-a0t"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt b/Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt
new file mode 100644
index 000000000..24b0b6244
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/giantplus,gpg482739qs5.txt
@@ -0,0 +1,7 @@
+GiantPlus GPG48273QS5 4.3" (480x272) WQVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "giantplus,gpg48273qs5"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt b/Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt
new file mode 100644
index 000000000..7da1d5c03
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt
@@ -0,0 +1,7 @@
+HannStar Display Corp. HSD070PWW1 7.0" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "hannstar,hsd070pww1"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt b/Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt
new file mode 100644
index 000000000..04caaae19
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt
@@ -0,0 +1,7 @@
+Hitachi Ltd. Corporation 9" WVGA (800x480) TFT LCD panel
+
+Required properties:
+- compatible: should be "hit,tx23d38vm0caa"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/innolux,at043tn24.txt b/Documentation/devicetree/bindings/panel/innolux,at043tn24.txt
new file mode 100644
index 000000000..4104226b6
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/innolux,at043tn24.txt
@@ -0,0 +1,7 @@
+Innolux AT043TN24 4.3" WQVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,at043tn24"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt b/Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt
new file mode 100644
index 000000000..2743b07cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt
@@ -0,0 +1,7 @@
+Innolux Corporation 12.1" WXGA (1280x800) TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,g121i1-l01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/innolux,n116bge.txt b/Documentation/devicetree/bindings/panel/innolux,n116bge.txt
new file mode 100644
index 000000000..081bb939e
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/innolux,n116bge.txt
@@ -0,0 +1,7 @@
+Innolux Corporation 11.6" WXGA (1366x768) TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,n116bge"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/innolux,n156bge-l21.txt b/Documentation/devicetree/bindings/panel/innolux,n156bge-l21.txt
new file mode 100644
index 000000000..7825844aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/innolux,n156bge-l21.txt
@@ -0,0 +1,7 @@
+InnoLux 15.6" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,n156bge-l21"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/innolux,zj070na-01p.txt b/Documentation/devicetree/bindings/panel/innolux,zj070na-01p.txt
new file mode 100644
index 000000000..824f87f15
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/innolux,zj070na-01p.txt
@@ -0,0 +1,7 @@
+Innolux Corporation 7.0" WSVGA (1024x600) TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,zj070na-01p"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/lg,ld070wx3-sl01.txt b/Documentation/devicetree/bindings/panel/lg,ld070wx3-sl01.txt
new file mode 100644
index 000000000..5e649cb9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/lg,ld070wx3-sl01.txt
@@ -0,0 +1,7 @@
+LG Corporation 7" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "lg,ld070wx3-sl01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/lg,lh500wx1-sd03.txt b/Documentation/devicetree/bindings/panel/lg,lh500wx1-sd03.txt
new file mode 100644
index 000000000..a04fd2b2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/lg,lh500wx1-sd03.txt
@@ -0,0 +1,7 @@
+LG Corporation 5" HD TFT LCD panel
+
+Required properties:
+- compatible: should be "lg,lh500wx1-sd03"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/lg,lp129qe.txt b/Documentation/devicetree/bindings/panel/lg,lp129qe.txt
new file mode 100644
index 000000000..9f262e0c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/lg,lp129qe.txt
@@ -0,0 +1,7 @@
+LG 12.9" (2560x1700 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "lg,lp129qe"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/ortustech,com43h4m85ulc.txt b/Documentation/devicetree/bindings/panel/ortustech,com43h4m85ulc.txt
new file mode 100644
index 000000000..de19e9398
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/ortustech,com43h4m85ulc.txt
@@ -0,0 +1,7 @@
+OrtusTech COM43H4M85ULC Blanview 3.7" TFT-LCD panel
+
+Required properties:
+- compatible: should be "ortustech,com43h4m85ulc"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/panasonic,vvx10f004b00.txt b/Documentation/devicetree/bindings/panel/panasonic,vvx10f004b00.txt
new file mode 100644
index 000000000..d328b0341
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/panasonic,vvx10f004b00.txt
@@ -0,0 +1,7 @@
+Panasonic Corporation 10.1" WUXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "panasonic,vvx10f004b00"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/samsung,ld9040.txt b/Documentation/devicetree/bindings/panel/samsung,ld9040.txt
new file mode 100644
index 000000000..07c36c3f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/samsung,ld9040.txt
@@ -0,0 +1,66 @@
+Samsung LD9040 AMOLED LCD parallel RGB panel with SPI control bus
+
+Required properties:
+ - compatible: "samsung,ld9040"
+ - reg: address of the panel on SPI bus
+ - vdd3-supply: core voltage supply
+ - vci-supply: voltage supply for analog circuits
+ - reset-gpios: a GPIO spec for the reset pin
+ - display-timings: timings for the connected panel according to [1]
+
+The panel must obey rules for SPI slave device specified in document [2].
+
+Optional properties:
+ - power-on-delay: delay after turning regulators on [ms]
+ - reset-delay: delay after reset sequence [ms]
+ - panel-width-mm: physical panel width [mm]
+ - panel-height-mm: physical panel height [mm]
+
+The device node can contain one 'port' child node with one child
+'endpoint' node, according to the bindings defined in [3]. This
+node should describe panel's video bus.
+
+[1]: Documentation/devicetree/bindings/video/display-timing.txt
+[2]: Documentation/devicetree/bindings/spi/spi-bus.txt
+[3]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+ lcd@0 {
+ compatible = "samsung,ld9040";
+ reg = <0>;
+ vdd3-supply = <&ldo7_reg>;
+ vci-supply = <&ldo17_reg>;
+ reset-gpios = <&gpy4 5 0>;
+ spi-max-frequency = <1200000>;
+ spi-cpol;
+ spi-cpha;
+ power-on-delay = <10>;
+ reset-delay = <10>;
+ panel-width-mm = <90>;
+ panel-height-mm = <154>;
+
+ display-timings {
+ timing {
+ clock-frequency = <23492370>;
+ hactive = <480>;
+ vactive = <800>;
+ hback-porch = <16>;
+ hfront-porch = <16>;
+ vback-porch = <2>;
+ vfront-porch = <28>;
+ hsync-len = <2>;
+ vsync-len = <1>;
+ hsync-active = <0>;
+ vsync-active = <0>;
+ de-active = <0>;
+ pixelclk-active = <0>;
+ };
+ };
+
+ port {
+ lcd_ep: endpoint {
+ remote-endpoint = <&fimd_dpi_ep>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/panel/samsung,ltn101nt05.txt b/Documentation/devicetree/bindings/panel/samsung,ltn101nt05.txt
new file mode 100644
index 000000000..ef522c6bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/samsung,ltn101nt05.txt
@@ -0,0 +1,7 @@
+Samsung Electronics 10.1" WSVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "samsung,ltn101nt05"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/samsung,ltn140at29-301.txt b/Documentation/devicetree/bindings/panel/samsung,ltn140at29-301.txt
new file mode 100644
index 000000000..e7f969d89
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/samsung,ltn140at29-301.txt
@@ -0,0 +1,7 @@
+Samsung Electronics 14" WXGA (1366x768) TFT LCD panel
+
+Required properties:
+- compatible: should be "samsung,ltn140at29-301"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/samsung,s6e8aa0.txt b/Documentation/devicetree/bindings/panel/samsung,s6e8aa0.txt
new file mode 100644
index 000000000..e7ee988e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/samsung,s6e8aa0.txt
@@ -0,0 +1,56 @@
+Samsung S6E8AA0 AMOLED LCD 5.3 inch panel
+
+Required properties:
+ - compatible: "samsung,s6e8aa0"
+ - reg: the virtual channel number of a DSI peripheral
+ - vdd3-supply: core voltage supply
+ - vci-supply: voltage supply for analog circuits
+ - reset-gpios: a GPIO spec for the reset pin
+ - display-timings: timings for the connected panel as described by [1]
+
+Optional properties:
+ - power-on-delay: delay after turning regulators on [ms]
+ - reset-delay: delay after reset sequence [ms]
+ - init-delay: delay after initialization sequence [ms]
+ - panel-width-mm: physical panel width [mm]
+ - panel-height-mm: physical panel height [mm]
+ - flip-horizontal: boolean to flip image horizontally
+ - flip-vertical: boolean to flip image vertically
+
+The device node can contain one 'port' child node with one child
+'endpoint' node, according to the bindings defined in [2]. This
+node should describe panel's video bus.
+
+[1]: Documentation/devicetree/bindings/video/display-timing.txt
+[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+ panel {
+ compatible = "samsung,s6e8aa0";
+ reg = <0>;
+ vdd3-supply = <&vcclcd_reg>;
+ vci-supply = <&vlcd_reg>;
+ reset-gpios = <&gpy4 5 0>;
+ power-on-delay= <50>;
+ reset-delay = <100>;
+ init-delay = <100>;
+ panel-width-mm = <58>;
+ panel-height-mm = <103>;
+ flip-horizontal;
+ flip-vertical;
+
+ display-timings {
+ timing0: timing-0 {
+ clock-frequency = <57153600>;
+ hactive = <720>;
+ vactive = <1280>;
+ hfront-porch = <5>;
+ hback-porch = <5>;
+ hsync-len = <5>;
+ vfront-porch = <13>;
+ vback-porch = <1>;
+ vsync-len = <2>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt b/Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt
new file mode 100644
index 000000000..f522bb8e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt
@@ -0,0 +1,49 @@
+Sharp Microelectronics 10.1" WQXGA TFT LCD panel
+
+This panel requires a dual-channel DSI host to operate. It supports two modes:
+- left-right: each channel drives the left or right half of the screen
+- even-odd: each channel drives the even or odd lines of the screen
+
+Each of the DSI channels controls a separate DSI peripheral. The peripheral
+driven by the first link (DSI-LINK1), left or even, is considered the primary
+peripheral and controls the device. The 'link2' property contains a phandle
+to the peripheral driven by the second link (DSI-LINK2, right or odd).
+
+Note that in video mode the DSI-LINK1 interface always provides the left/even
+pixels and DSI-LINK2 always provides the right/odd pixels. In command mode it
+is possible to program either link to drive the left/even or right/odd pixels
+but for the sake of consistency this binding assumes that the same assignment
+is chosen as for video mode.
+
+Required properties:
+- compatible: should be "sharp,lq101r1sx01"
+- reg: DSI virtual channel of the peripheral
+
+Required properties (for DSI-LINK1 only):
+- link2: phandle to the DSI peripheral on the secondary link. Note that the
+ presence of this property marks the containing node as DSI-LINK1.
+- power-supply: phandle of the regulator that provides the supply voltage
+
+Optional properties (for DSI-LINK1 only):
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+ dsi@54300000 {
+ panel: panel@0 {
+ compatible = "sharp,lq101r1sx01";
+ reg = <0>;
+
+ link2 = <&secondary>;
+
+ power-supply = <...>;
+ backlight = <...>;
+ };
+ };
+
+ dsi@54400000 {
+ secondary: panel@0 {
+ compatible = "sharp,lq101r1sx01";
+ reg = <0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/panel/shelly,sca07010-bfn-lnn.txt b/Documentation/devicetree/bindings/panel/shelly,sca07010-bfn-lnn.txt
new file mode 100644
index 000000000..fc1ea9e26
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/shelly,sca07010-bfn-lnn.txt
@@ -0,0 +1,7 @@
+Shelly SCA07010-BFN-LNN 7.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "shelly,sca07010-bfn-lnn"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/panel/simple-panel.txt b/Documentation/devicetree/bindings/panel/simple-panel.txt
new file mode 100644
index 000000000..1341bbf4a
--- /dev/null
+++ b/Documentation/devicetree/bindings/panel/simple-panel.txt
@@ -0,0 +1,21 @@
+Simple display panel
+
+Required properties:
+- power-supply: regulator to provide the supply voltage
+
+Optional properties:
+- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+- enable-gpios: GPIO pin to enable or disable the panel
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+ panel: panel {
+ compatible = "cptt,claa101wb01";
+ ddc-i2c-bus = <&panelddc>;
+
+ power-supply = <&vdd_pnl_reg>;
+ enable-gpios = <&gpio 90 0>;
+
+ backlight = <&backlight>;
+ };
diff --git a/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
new file mode 100644
index 000000000..35a465362
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/83xx-512x-pci.txt
@@ -0,0 +1,40 @@
+* Freescale 83xx and 512x PCI bridges
+
+Freescale 83xx and 512x SOCs include the same pci bridge core.
+
+83xx/512x specific notes:
+- reg: should contain two address length tuples
+ The first is for the internal pci bridge registers
+ The second is for the pci config space access registers
+
+Example (MPC8313ERDB)
+ pci0: pci@e0008500 {
+ cell-index = <1>;
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+ interrupt-map = <
+ /* IDSEL 0x0E -mini PCI */
+ 0x7000 0x0 0x0 0x1 &ipic 18 0x8
+ 0x7000 0x0 0x0 0x2 &ipic 18 0x8
+ 0x7000 0x0 0x0 0x3 &ipic 18 0x8
+ 0x7000 0x0 0x0 0x4 &ipic 18 0x8
+
+ /* IDSEL 0x0F - PCI slot */
+ 0x7800 0x0 0x0 0x1 &ipic 17 0x8
+ 0x7800 0x0 0x0 0x2 &ipic 18 0x8
+ 0x7800 0x0 0x0 0x3 &ipic 17 0x8
+ 0x7800 0x0 0x0 0x4 &ipic 18 0x8>;
+ interrupt-parent = <&ipic>;
+ interrupts = <66 0x8>;
+ bus-range = <0x0 0x0>;
+ ranges = <0x02000000 0x0 0x90000000 0x90000000 0x0 0x10000000
+ 0x42000000 0x0 0x80000000 0x80000000 0x0 0x10000000
+ 0x01000000 0x0 0x00000000 0xe2000000 0x0 0x00100000>;
+ clock-frequency = <66666666>;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = <0xe0008500 0x100 /* internal registers */
+ 0xe0008300 0x8>; /* config space access registers */
+ compatible = "fsl,mpc8349-pci";
+ device_type = "pci";
+ };
diff --git a/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
new file mode 100644
index 000000000..f7ce50e38
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/brcm,iproc-pcie.txt
@@ -0,0 +1,63 @@
+* Broadcom iProc PCIe controller with the platform bus interface
+
+Required properties:
+- compatible: Must be "brcm,iproc-pcie"
+- reg: base address and length of the PCIe controller I/O register space
+- #interrupt-cells: set to <1>
+- interrupt-map-mask and interrupt-map, standard PCI properties to define the
+ mapping of the PCIe interface to interrupt numbers
+- linux,pci-domain: PCI domain ID. Should be unique for each host controller
+- bus-range: PCI bus numbers covered
+- #address-cells: set to <3>
+- #size-cells: set to <2>
+- device_type: set to "pci"
+- ranges: ranges for the PCI memory and I/O regions
+
+Optional properties:
+- phys: phandle of the PCIe PHY device
+- phy-names: must be "pcie-phy"
+
+Example:
+ pcie0: pcie@18012000 {
+ compatible = "brcm,iproc-pcie";
+ reg = <0x18012000 0x1000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic GIC_SPI 100 IRQ_TYPE_NONE>;
+
+ linux,pci-domain = <0>;
+
+ bus-range = <0x00 0xff>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ ranges = <0x81000000 0 0 0x28000000 0 0x00010000
+ 0x82000000 0 0x20000000 0x20000000 0 0x04000000>;
+
+ phys = <&phy 0 5>;
+ phy-names = "pcie-phy";
+ };
+
+ pcie1: pcie@18013000 {
+ compatible = "brcm,iproc-pcie";
+ reg = <0x18013000 0x1000>;
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic GIC_SPI 106 IRQ_TYPE_NONE>;
+
+ linux,pci-domain = <1>;
+
+ bus-range = <0x00 0xff>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ ranges = <0x81000000 0 0 0x48000000 0 0x00010000
+ 0x82000000 0 0x40000000 0x40000000 0 0x04000000>;
+
+ phys = <&phy 1 6>;
+ phy-names = "pcie-phy";
+ };
diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt
new file mode 100644
index 000000000..9f4faa8e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt
@@ -0,0 +1,28 @@
+* Synopsys Designware PCIe interface
+
+Required properties:
+- compatible: should contain "snps,dw-pcie" to identify the core.
+- reg: Should contain the configuration address space.
+- reg-names: Must be "config" for the PCIe configuration space.
+ (The old way of getting the configuration address space from "ranges"
+ is deprecated and should be avoided.)
+- #address-cells: set to <3>
+- #size-cells: set to <2>
+- device_type: set to "pci"
+- ranges: ranges for the PCI memory and I/O regions
+- #interrupt-cells: set to <1>
+- interrupt-map-mask and interrupt-map: standard PCI properties
+ to define the mapping of the PCIe interface to interrupt
+ numbers.
+- num-lanes: number of lanes to use
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - "pcie"
+ - "pcie_bus"
+
+Optional properties:
+- reset-gpio: gpio pin number of power good signal
+- bus-range: PCI bus numbers covered (it is recommended for new devicetrees to
+ specify this property, to keep backwards compatibility a range of 0x00-0xff
+ is assumed if not present)
diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
new file mode 100644
index 000000000..6fbba53a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
@@ -0,0 +1,40 @@
+* Freescale i.MX6 PCIe interface
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: "fsl,imx6q-pcie"
+- reg: base addresse and length of the pcie controller
+- interrupts: A list of interrupt outputs of the controller. Must contain an
+ entry for each entry in the interrupt-names property.
+- interrupt-names: Must include the following entries:
+ - "msi": The interrupt that is asserted when an MSI is received
+- clock-names: Must include the following additional entries:
+ - "pcie_phy"
+
+Example:
+
+ pcie@0x01000000 {
+ compatible = "fsl,imx6q-pcie", "snps,dw-pcie";
+ reg = <0x01ffc000 0x04000>,
+ <0x01f00000 0x80000>;
+ reg-names = "dbi", "config";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ ranges = <0x00000800 0 0x01f00000 0x01f00000 0 0x00080000
+ 0x81000000 0 0 0x01f80000 0 0x00010000
+ 0x82000000 0 0x01000000 0x01000000 0 0x00f00000>;
+ num-lanes = <1>;
+ interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "msi";
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0x7>;
+ interrupt-map = <0 0 0 1 &intc GIC_SPI 123 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 2 &intc GIC_SPI 122 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 3 &intc GIC_SPI 121 IRQ_TYPE_LEVEL_HIGH>,
+ <0 0 0 4 &intc GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks 144>, <&clks 206>, <&clks 189>;
+ clock-names = "pcie", "pcie_bus", "pcie_phy";
+ };
diff --git a/Documentation/devicetree/bindings/pci/fsl,pci.txt b/Documentation/devicetree/bindings/pci/fsl,pci.txt
new file mode 100644
index 000000000..d8ac4a768
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/fsl,pci.txt
@@ -0,0 +1,27 @@
+* Bus Enumeration by Freescale PCI-X Agent
+
+Typically any Freescale PCI-X bridge hardware strapped into Agent mode
+is prevented from enumerating the bus. The PrPMC form-factor requires
+all mezzanines to be PCI-X Agents, but one per system may still
+enumerate the bus.
+
+The property defined below will allow a PCI-X bridge to be used for bus
+enumeration despite being strapped into Agent mode.
+
+Required properties:
+- fsl,pci-agent-force-enum : There is no value associated with this
+ property. The property itself is treated as a boolean.
+
+Example:
+
+ /* PCI-X bridge known to be PrPMC Monarch */
+ pci0: pci@ef008000 {
+ fsl,pci-agent-force-enum;
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ compatible = "fsl,mpc8540-pcix", "fsl,mpc8540-pci";
+ device_type = "pci";
+ ...
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/pci/host-generic-pci.txt b/Documentation/devicetree/bindings/pci/host-generic-pci.txt
new file mode 100644
index 000000000..cf3e205e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/host-generic-pci.txt
@@ -0,0 +1,100 @@
+* Generic PCI host controller
+
+Firmware-initialised PCI host controllers and PCI emulations, such as the
+virtio-pci implementations found in kvmtool and other para-virtualised
+systems, do not require driver support for complexities such as regulator
+and clock management. In fact, the controller may not even require the
+configuration of a control interface by the operating system, instead
+presenting a set of fixed windows describing a subset of IO, Memory and
+Configuration Spaces.
+
+Such a controller can be described purely in terms of the standardized device
+tree bindings communicated in pci.txt:
+
+
+Properties of the host controller node:
+
+- compatible : Must be "pci-host-cam-generic" or "pci-host-ecam-generic"
+ depending on the layout of configuration space (CAM vs
+ ECAM respectively).
+
+- device_type : Must be "pci".
+
+- ranges : As described in IEEE Std 1275-1994, but must provide
+ at least a definition of non-prefetchable memory. One
+ or both of prefetchable Memory and IO Space may also
+ be provided.
+
+- bus-range : Optional property (also described in IEEE Std 1275-1994)
+ to indicate the range of bus numbers for this controller.
+ If absent, defaults to <0 255> (i.e. all buses).
+
+- #address-cells : Must be 3.
+
+- #size-cells : Must be 2.
+
+- reg : The Configuration Space base address and size, as accessed
+ from the parent bus.
+
+
+Properties of the /chosen node:
+
+- linux,pci-probe-only
+ : Optional property which takes a single-cell argument.
+ If '0', then Linux will assign devices in its usual manner,
+ otherwise it will not try to assign devices and instead use
+ them as they are configured already.
+
+Configuration Space is assumed to be memory-mapped (as opposed to being
+accessed via an ioport) and laid out with a direct correspondence to the
+geography of a PCI bus address by concatenating the various components to
+form an offset.
+
+For CAM, this 24-bit offset is:
+
+ cfg_offset(bus, device, function, register) =
+ bus << 16 | device << 11 | function << 8 | register
+
+Whilst ECAM extends this by 4 bits to accommodate 4k of function space:
+
+ cfg_offset(bus, device, function, register) =
+ bus << 20 | device << 15 | function << 12 | register
+
+Interrupt mapping is exactly as described in `Open Firmware Recommended
+Practice: Interrupt Mapping' and requires the following properties:
+
+- #interrupt-cells : Must be 1
+
+- interrupt-map : <see aforementioned specification>
+
+- interrupt-map-mask : <see aforementioned specification>
+
+
+Example:
+
+pci {
+ compatible = "pci-host-cam-generic"
+ device_type = "pci";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ bus-range = <0x0 0x1>;
+
+ // CPU_PHYSICAL(2) SIZE(2)
+ reg = <0x0 0x40000000 0x0 0x1000000>;
+
+ // BUS_ADDRESS(3) CPU_PHYSICAL(2) SIZE(2)
+ ranges = <0x01000000 0x0 0x01000000 0x0 0x01000000 0x0 0x00010000>,
+ <0x02000000 0x0 0x41000000 0x0 0x41000000 0x0 0x3f000000>;
+
+
+ #interrupt-cells = <0x1>;
+
+ // PCI_DEVICE(3) INT#(1) CONTROLLER(PHANDLE) CONTROLLER_DATA(3)
+ interrupt-map = < 0x0 0x0 0x0 0x1 &gic 0x0 0x4 0x1
+ 0x800 0x0 0x0 0x1 &gic 0x0 0x5 0x1
+ 0x1000 0x0 0x0 0x1 &gic 0x0 0x6 0x1
+ 0x1800 0x0 0x0 0x1 &gic 0x0 0x7 0x1>;
+
+ // PCI_DEVICE(3) INT#(1)
+ interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
+}
diff --git a/Documentation/devicetree/bindings/pci/layerscape-pci.txt b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
new file mode 100644
index 000000000..6286f049b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/layerscape-pci.txt
@@ -0,0 +1,42 @@
+Freescale Layerscape PCIe controller
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: should contain the platform identifier such as "fsl,ls1021a-pcie"
+- reg: base addresses and lengths of the PCIe controller
+- interrupts: A list of interrupt outputs of the controller. Must contain an
+ entry for each entry in the interrupt-names property.
+- interrupt-names: Must include the following entries:
+ "intr": The interrupt that is asserted for controller interrupts
+- fsl,pcie-scfg: Must include two entries.
+ The first entry must be a link to the SCFG device node
+ The second entry must be '0' or '1' based on physical PCIe controller index.
+ This is used to get SCFG PEXN registers
+
+Example:
+
+ pcie@3400000 {
+ compatible = "fsl,ls1021a-pcie", "snps,dw-pcie";
+ reg = <0x00 0x03400000 0x0 0x00010000 /* controller registers */
+ 0x40 0x00000000 0x0 0x00002000>; /* configuration space */
+ reg-names = "regs", "config";
+ interrupts = <GIC_SPI 177 IRQ_TYPE_LEVEL_HIGH>; /* controller interrupt */
+ interrupt-names = "intr";
+ fsl,pcie-scfg = <&scfg 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ num-lanes = <4>;
+ bus-range = <0x0 0xff>;
+ ranges = <0x81000000 0x0 0x00000000 0x40 0x00010000 0x0 0x00010000 /* downstream I/O */
+ 0xc2000000 0x0 0x20000000 0x40 0x20000000 0x0 0x20000000 /* prefetchable memory */
+ 0x82000000 0x0 0x40000000 0x40 0x40000000 0x0 0x40000000>; /* non-prefetchable memory */
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map = <0000 0 0 1 &gic GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>,
+ <0000 0 0 2 &gic GIC_SPI 188 IRQ_TYPE_LEVEL_HIGH>,
+ <0000 0 0 3 &gic GIC_SPI 190 IRQ_TYPE_LEVEL_HIGH>,
+ <0000 0 0 4 &gic GIC_SPI 192 IRQ_TYPE_LEVEL_HIGH>;
+ };
diff --git a/Documentation/devicetree/bindings/pci/mvebu-pci.txt b/Documentation/devicetree/bindings/pci/mvebu-pci.txt
new file mode 100644
index 000000000..08c716b2c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/mvebu-pci.txt
@@ -0,0 +1,304 @@
+* Marvell EBU PCIe interfaces
+
+Mandatory properties:
+
+- compatible: one of the following values:
+ marvell,armada-370-pcie
+ marvell,armada-xp-pcie
+ marvell,dove-pcie
+ marvell,kirkwood-pcie
+- #address-cells, set to <3>
+- #size-cells, set to <2>
+- #interrupt-cells, set to <1>
+- bus-range: PCI bus numbers covered
+- device_type, set to "pci"
+- ranges: ranges describing the MMIO registers to control the PCIe
+ interfaces, and ranges describing the MBus windows needed to access
+ the memory and I/O regions of each PCIe interface.
+- msi-parent: Link to the hardware entity that serves as the Message
+ Signaled Interrupt controller for this PCI controller.
+
+The ranges describing the MMIO registers have the following layout:
+
+ 0x82000000 0 r MBUS_ID(0xf0, 0x01) r 0 s
+
+where:
+
+ * r is a 32-bits value that gives the offset of the MMIO
+ registers of this PCIe interface, from the base of the internal
+ registers.
+
+ * s is a 32-bits value that give the size of this MMIO
+ registers area. This range entry translates the '0x82000000 0 r' PCI
+ address into the 'MBUS_ID(0xf0, 0x01) r' CPU address, which is part
+ of the internal register window (as identified by MBUS_ID(0xf0,
+ 0x01)).
+
+The ranges describing the MBus windows have the following layout:
+
+ 0x8t000000 s 0 MBUS_ID(w, a) 0 1 0
+
+where:
+
+ * t is the type of the MBus window (as defined by the standard PCI DT
+ bindings), 1 for I/O and 2 for memory.
+
+ * s is the PCI slot that corresponds to this PCIe interface
+
+ * w is the 'target ID' value for the MBus window
+
+ * a the 'attribute' value for the MBus window.
+
+Since the location and size of the different MBus windows is not fixed in
+hardware, and only determined in runtime, those ranges cover the full first
+4 GB of the physical address space, and do not translate into a valid CPU
+address.
+
+In addition, the device tree node must have sub-nodes describing each
+PCIe interface, having the following mandatory properties:
+
+- reg: used only for interrupt mapping, so only the first four bytes
+ are used to refer to the correct bus number and device number.
+- assigned-addresses: reference to the MMIO registers used to control
+ this PCIe interface.
+- clocks: the clock associated to this PCIe interface
+- marvell,pcie-port: the physical PCIe port number
+- status: either "disabled" or "okay"
+- device_type, set to "pci"
+- #address-cells, set to <3>
+- #size-cells, set to <2>
+- #interrupt-cells, set to <1>
+- ranges, translating the MBus windows ranges of the parent node into
+ standard PCI addresses.
+- interrupt-map-mask and interrupt-map, standard PCI properties to
+ define the mapping of the PCIe interface to interrupt numbers.
+
+and the following optional properties:
+- marvell,pcie-lane: the physical PCIe lane number, for ports having
+ multiple lanes. If this property is not found, we assume that the
+ value is 0.
+- reset-gpios: optional gpio to PERST#
+- reset-delay-us: delay in us to wait after reset de-assertion
+
+Example:
+
+pcie-controller {
+ compatible = "marvell,armada-xp-pcie";
+ status = "disabled";
+ device_type = "pci";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ bus-range = <0x00 0xff>;
+ msi-parent = <&mpic>;
+
+ ranges =
+ <0x82000000 0 0x40000 MBUS_ID(0xf0, 0x01) 0x40000 0 0x00002000 /* Port 0.0 registers */
+ 0x82000000 0 0x42000 MBUS_ID(0xf0, 0x01) 0x42000 0 0x00002000 /* Port 2.0 registers */
+ 0x82000000 0 0x44000 MBUS_ID(0xf0, 0x01) 0x44000 0 0x00002000 /* Port 0.1 registers */
+ 0x82000000 0 0x48000 MBUS_ID(0xf0, 0x01) 0x48000 0 0x00002000 /* Port 0.2 registers */
+ 0x82000000 0 0x4c000 MBUS_ID(0xf0, 0x01) 0x4c000 0 0x00002000 /* Port 0.3 registers */
+ 0x82000000 0 0x80000 MBUS_ID(0xf0, 0x01) 0x80000 0 0x00002000 /* Port 1.0 registers */
+ 0x82000000 0 0x82000 MBUS_ID(0xf0, 0x01) 0x82000 0 0x00002000 /* Port 3.0 registers */
+ 0x82000000 0 0x84000 MBUS_ID(0xf0, 0x01) 0x84000 0 0x00002000 /* Port 1.1 registers */
+ 0x82000000 0 0x88000 MBUS_ID(0xf0, 0x01) 0x88000 0 0x00002000 /* Port 1.2 registers */
+ 0x82000000 0 0x8c000 MBUS_ID(0xf0, 0x01) 0x8c000 0 0x00002000 /* Port 1.3 registers */
+ 0x82000000 0x1 0 MBUS_ID(0x04, 0xe8) 0 1 0 /* Port 0.0 MEM */
+ 0x81000000 0x1 0 MBUS_ID(0x04, 0xe0) 0 1 0 /* Port 0.0 IO */
+ 0x82000000 0x2 0 MBUS_ID(0x04, 0xd8) 0 1 0 /* Port 0.1 MEM */
+ 0x81000000 0x2 0 MBUS_ID(0x04, 0xd0) 0 1 0 /* Port 0.1 IO */
+ 0x82000000 0x3 0 MBUS_ID(0x04, 0xb8) 0 1 0 /* Port 0.2 MEM */
+ 0x81000000 0x3 0 MBUS_ID(0x04, 0xb0) 0 1 0 /* Port 0.2 IO */
+ 0x82000000 0x4 0 MBUS_ID(0x04, 0x78) 0 1 0 /* Port 0.3 MEM */
+ 0x81000000 0x4 0 MBUS_ID(0x04, 0x70) 0 1 0 /* Port 0.3 IO */
+
+ 0x82000000 0x5 0 MBUS_ID(0x08, 0xe8) 0 1 0 /* Port 1.0 MEM */
+ 0x81000000 0x5 0 MBUS_ID(0x08, 0xe0) 0 1 0 /* Port 1.0 IO */
+ 0x82000000 0x6 0 MBUS_ID(0x08, 0xd8) 0 1 0 /* Port 1.1 MEM */
+ 0x81000000 0x6 0 MBUS_ID(0x08, 0xd0) 0 1 0 /* Port 1.1 IO */
+ 0x82000000 0x7 0 MBUS_ID(0x08, 0xb8) 0 1 0 /* Port 1.2 MEM */
+ 0x81000000 0x7 0 MBUS_ID(0x08, 0xb0) 0 1 0 /* Port 1.2 IO */
+ 0x82000000 0x8 0 MBUS_ID(0x08, 0x78) 0 1 0 /* Port 1.3 MEM */
+ 0x81000000 0x8 0 MBUS_ID(0x08, 0x70) 0 1 0 /* Port 1.3 IO */
+
+ 0x82000000 0x9 0 MBUS_ID(0x04, 0xf8) 0 1 0 /* Port 2.0 MEM */
+ 0x81000000 0x9 0 MBUS_ID(0x04, 0xf0) 0 1 0 /* Port 2.0 IO */
+
+ 0x82000000 0xa 0 MBUS_ID(0x08, 0xf8) 0 1 0 /* Port 3.0 MEM */
+ 0x81000000 0xa 0 MBUS_ID(0x08, 0xf0) 0 1 0 /* Port 3.0 IO */>;
+
+ pcie@1,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82000800 0 0x40000 0 0x2000>;
+ reg = <0x0800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x1 0 1 0
+ 0x81000000 0 0 0x81000000 0x1 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 58>;
+ marvell,pcie-port = <0>;
+ marvell,pcie-lane = <0>;
+ /* low-active PERST# reset on GPIO 25 */
+ reset-gpios = <&gpio0 25 1>;
+ /* wait 20ms for device settle after reset deassertion */
+ reset-delay-us = <20000>;
+ clocks = <&gateclk 5>;
+ status = "disabled";
+ };
+
+ pcie@2,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001000 0 0x44000 0 0x2000>;
+ reg = <0x1000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x2 0 1 0
+ 0x81000000 0 0 0x81000000 0x2 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 59>;
+ marvell,pcie-port = <0>;
+ marvell,pcie-lane = <1>;
+ clocks = <&gateclk 6>;
+ status = "disabled";
+ };
+
+ pcie@3,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001800 0 0x48000 0 0x2000>;
+ reg = <0x1800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x3 0 1 0
+ 0x81000000 0 0 0x81000000 0x3 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 60>;
+ marvell,pcie-port = <0>;
+ marvell,pcie-lane = <2>;
+ clocks = <&gateclk 7>;
+ status = "disabled";
+ };
+
+ pcie@4,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82002000 0 0x4c000 0 0x2000>;
+ reg = <0x2000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x4 0 1 0
+ 0x81000000 0 0 0x81000000 0x4 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 61>;
+ marvell,pcie-port = <0>;
+ marvell,pcie-lane = <3>;
+ clocks = <&gateclk 8>;
+ status = "disabled";
+ };
+
+ pcie@5,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82002800 0 0x80000 0 0x2000>;
+ reg = <0x2800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x5 0 1 0
+ 0x81000000 0 0 0x81000000 0x5 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 62>;
+ marvell,pcie-port = <1>;
+ marvell,pcie-lane = <0>;
+ clocks = <&gateclk 9>;
+ status = "disabled";
+ };
+
+ pcie@6,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82003000 0 0x84000 0 0x2000>;
+ reg = <0x3000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x6 0 1 0
+ 0x81000000 0 0 0x81000000 0x6 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 63>;
+ marvell,pcie-port = <1>;
+ marvell,pcie-lane = <1>;
+ clocks = <&gateclk 10>;
+ status = "disabled";
+ };
+
+ pcie@7,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82003800 0 0x88000 0 0x2000>;
+ reg = <0x3800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x7 0 1 0
+ 0x81000000 0 0 0x81000000 0x7 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 64>;
+ marvell,pcie-port = <1>;
+ marvell,pcie-lane = <2>;
+ clocks = <&gateclk 11>;
+ status = "disabled";
+ };
+
+ pcie@8,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82004000 0 0x8c000 0 0x2000>;
+ reg = <0x4000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x8 0 1 0
+ 0x81000000 0 0 0x81000000 0x8 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 65>;
+ marvell,pcie-port = <1>;
+ marvell,pcie-lane = <3>;
+ clocks = <&gateclk 12>;
+ status = "disabled";
+ };
+
+ pcie@9,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82004800 0 0x42000 0 0x2000>;
+ reg = <0x4800 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0x9 0 1 0
+ 0x81000000 0 0 0x81000000 0x9 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 99>;
+ marvell,pcie-port = <2>;
+ marvell,pcie-lane = <0>;
+ clocks = <&gateclk 26>;
+ status = "disabled";
+ };
+
+ pcie@10,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82005000 0 0x82000 0 0x2000>;
+ reg = <0x5000 0 0 0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ ranges = <0x82000000 0 0 0x82000000 0xa 0 1 0
+ 0x81000000 0 0 0x81000000 0xa 0 1 0>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &mpic 103>;
+ marvell,pcie-port = <3>;
+ marvell,pcie-lane = <0>;
+ clocks = <&gateclk 27>;
+ status = "disabled";
+ };
+};
diff --git a/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
new file mode 100644
index 000000000..75321ae23
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
@@ -0,0 +1,224 @@
+NVIDIA Tegra PCIe controller
+
+Required properties:
+- compatible: For Tegra20, must contain "nvidia,tegra20-pcie". For Tegra30,
+ "nvidia,tegra30-pcie". For Tegra124, must contain "nvidia,tegra124-pcie".
+ Otherwise, must contain "nvidia,<chip>-pcie", plus one of the above, where
+ <chip> is tegra132 or tegra210.
+- device_type: Must be "pci"
+- reg: A list of physical base address and length for each set of controller
+ registers. Must contain an entry for each entry in the reg-names property.
+- reg-names: Must include the following entries:
+ "pads": PADS registers
+ "afi": AFI registers
+ "cs": configuration space region
+- interrupts: A list of interrupt outputs of the controller. Must contain an
+ entry for each entry in the interrupt-names property.
+- interrupt-names: Must include the following entries:
+ "intr": The Tegra interrupt that is asserted for controller interrupts
+ "msi": The Tegra interrupt that is asserted when an MSI is received
+- bus-range: Range of bus numbers associated with this controller
+- #address-cells: Address representation for root ports (must be 3)
+ - cell 0 specifies the bus and device numbers of the root port:
+ [23:16]: bus number
+ [15:11]: device number
+ - cell 1 denotes the upper 32 address bits and should be 0
+ - cell 2 contains the lower 32 address bits and is used to translate to the
+ CPU address space
+- #size-cells: Size representation for root ports (must be 2)
+- ranges: Describes the translation of addresses for root ports and standard
+ PCI regions. The entries must be 6 cells each, where the first three cells
+ correspond to the address as described for the #address-cells property
+ above, the fourth cell is the physical CPU address to translate to and the
+ fifth and six cells are as described for the #size-cells property above.
+ - The first two entries are expected to translate the addresses for the root
+ port registers, which are referenced by the assigned-addresses property of
+ the root port nodes (see below).
+ - The remaining entries setup the mapping for the standard I/O, memory and
+ prefetchable PCI regions. The first cell determines the type of region
+ that is setup:
+ - 0x81000000: I/O memory region
+ - 0x82000000: non-prefetchable memory region
+ - 0xc2000000: prefetchable memory region
+ Please refer to the standard PCI bus binding document for a more detailed
+ explanation.
+- #interrupt-cells: Size representation for interrupts (must be 1)
+- interrupt-map-mask and interrupt-map: Standard PCI IRQ mapping properties
+ Please refer to the standard PCI bus binding document for a more detailed
+ explanation.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - pex
+ - afi
+ - pll_e
+ - cml (not required for Tegra20)
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - pex
+ - afi
+ - pcie_x
+
+Required properties on Tegra124 and later:
+- phys: Must contain an entry for each entry in phy-names.
+- phy-names: Must include the following entries:
+ - pcie
+
+Power supplies for Tegra20:
+- avdd-pex-supply: Power supply for analog PCIe logic. Must supply 1.05 V.
+- vdd-pex-supply: Power supply for digital PCIe I/O. Must supply 1.05 V.
+- avdd-pex-pll-supply: Power supply for dedicated (internal) PCIe PLL. Must
+ supply 1.05 V.
+- avdd-plle-supply: Power supply for PLLE, which is shared with SATA. Must
+ supply 1.05 V.
+- vddio-pex-clk-supply: Power supply for PCIe clock. Must supply 3.3 V.
+
+Power supplies for Tegra30:
+- Required:
+ - avdd-pex-pll-supply: Power supply for dedicated (internal) PCIe PLL. Must
+ supply 1.05 V.
+ - avdd-plle-supply: Power supply for PLLE, which is shared with SATA. Must
+ supply 1.05 V.
+ - vddio-pex-ctl-supply: Power supply for PCIe control I/O partition. Must
+ supply 1.8 V.
+ - hvdd-pex-supply: High-voltage supply for PCIe I/O and PCIe output clocks.
+ Must supply 3.3 V.
+- Optional:
+ - If lanes 0 to 3 are used:
+ - avdd-pexa-supply: Power supply for analog PCIe logic. Must supply 1.05 V.
+ - vdd-pexa-supply: Power supply for digital PCIe I/O. Must supply 1.05 V.
+ - If lanes 4 or 5 are used:
+ - avdd-pexb-supply: Power supply for analog PCIe logic. Must supply 1.05 V.
+ - vdd-pexb-supply: Power supply for digital PCIe I/O. Must supply 1.05 V.
+
+Power supplies for Tegra124:
+- Required:
+ - avddio-pex-supply: Power supply for analog PCIe logic. Must supply 1.05 V.
+ - dvddio-pex-supply: Power supply for digital PCIe I/O. Must supply 1.05 V.
+ - avdd-pex-pll-supply: Power supply for dedicated (internal) PCIe PLL. Must
+ supply 1.05 V.
+ - hvdd-pex-supply: High-voltage supply for PCIe I/O and PCIe output clocks.
+ Must supply 3.3 V.
+ - hvdd-pex-pll-e-supply: High-voltage supply for PLLE (shared with USB3).
+ Must supply 3.3 V.
+ - vddio-pex-ctl-supply: Power supply for PCIe control I/O partition. Must
+ supply 2.8-3.3 V.
+ - avdd-pll-erefe-supply: Power supply for PLLE (shared with USB3). Must
+ supply 1.05 V.
+
+Root ports are defined as subnodes of the PCIe controller node.
+
+Required properties:
+- device_type: Must be "pci"
+- assigned-addresses: Address and size of the port configuration registers
+- reg: PCI bus address of the root port
+- #address-cells: Must be 3
+- #size-cells: Must be 2
+- ranges: Sub-ranges distributed from the PCIe controller node. An empty
+ property is sufficient.
+- nvidia,num-lanes: Number of lanes to use for this port. Valid combinations
+ are:
+ - Root port 0 uses 4 lanes, root port 1 is unused.
+ - Both root ports use 2 lanes.
+
+Example:
+
+SoC DTSI:
+
+ pcie-controller {
+ compatible = "nvidia,tegra20-pcie";
+ device_type = "pci";
+ reg = <0x80003000 0x00000800 /* PADS registers */
+ 0x80003800 0x00000200 /* AFI registers */
+ 0x90000000 0x10000000>; /* configuration space */
+ reg-names = "pads", "afi", "cs";
+ interrupts = <0 98 0x04 /* controller interrupt */
+ 0 99 0x04>; /* MSI interrupt */
+ interrupt-names = "intr", "msi";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &intc GIC_SPI 98 IRQ_TYPE_LEVEL_HIGH>;
+
+ bus-range = <0x00 0xff>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges = <0x82000000 0 0x80000000 0x80000000 0 0x00001000 /* port 0 registers */
+ 0x82000000 0 0x80001000 0x80001000 0 0x00001000 /* port 1 registers */
+ 0x81000000 0 0 0x82000000 0 0x00010000 /* downstream I/O */
+ 0x82000000 0 0xa0000000 0xa0000000 0 0x10000000 /* non-prefetchable memory */
+ 0xc2000000 0 0xb0000000 0xb0000000 0 0x10000000>; /* prefetchable memory */
+
+ clocks = <&tegra_car 70>, <&tegra_car 72>, <&tegra_car 118>;
+ clock-names = "pex", "afi", "pll_e";
+ resets = <&tegra_car 70>, <&tegra_car 72>, <&tegra_car 74>;
+ reset-names = "pex", "afi", "pcie_x";
+ status = "disabled";
+
+ pci@1,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82000800 0 0x80000000 0 0x1000>;
+ reg = <0x000800 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+
+ pci@2,0 {
+ device_type = "pci";
+ assigned-addresses = <0x82001000 0 0x80001000 0 0x1000>;
+ reg = <0x001000 0 0 0 0>;
+ status = "disabled";
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ ranges;
+
+ nvidia,num-lanes = <2>;
+ };
+ };
+
+
+Board DTS:
+
+ pcie-controller {
+ status = "okay";
+
+ vdd-supply = <&pci_vdd_reg>;
+ pex-clk-supply = <&pci_clk_reg>;
+
+ /* root port 00:01.0 */
+ pci@1,0 {
+ status = "okay";
+
+ /* bridge 01:00.0 (optional) */
+ pci@0,0 {
+ reg = <0x010000 0 0 0 0>;
+
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ device_type = "pci";
+
+ /* endpoint 02:00.0 */
+ pci@0,0 {
+ reg = <0x020000 0 0 0 0>;
+ };
+ };
+ };
+ };
+
+Note that devices on the PCI bus are dynamically discovered using PCI's bus
+enumeration and therefore don't need corresponding device nodes in DT. However
+if a device on the PCI bus provides a non-probeable bus such as I2C or SPI,
+device nodes need to be added in order to allow the bus' children to be
+instantiated at the proper location in the operating system's device tree (as
+illustrated by the optional nodes in the example above).
diff --git a/Documentation/devicetree/bindings/pci/pci-keystone.txt b/Documentation/devicetree/bindings/pci/pci-keystone.txt
new file mode 100644
index 000000000..54eae2938
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/pci-keystone.txt
@@ -0,0 +1,63 @@
+TI Keystone PCIe interface
+
+Keystone PCI host Controller is based on Designware PCI h/w version 3.65.
+It shares common functions with PCIe Designware core driver and inherit
+common properties defined in
+Documentation/devicetree/bindings/pci/designware-pci.txt
+
+Please refer to Documentation/devicetree/bindings/pci/designware-pci.txt
+for the details of Designware DT bindings. Additional properties are
+described here as well as properties that are not applicable.
+
+Required Properties:-
+
+compatibility: "ti,keystone-pcie"
+reg: index 1 is the base address and length of DW application registers.
+ index 2 is the base address and length of PCI device ID register.
+
+pcie_msi_intc : Interrupt controller device node for MSI IRQ chip
+ interrupt-cells: should be set to 1
+ interrupt-parent: Parent interrupt controller phandle
+ interrupts: GIC interrupt lines connected to PCI MSI interrupt lines
+
+ Example:
+ pcie_msi_intc: msi-interrupt-controller {
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 30 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 31 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 32 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 33 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>;
+ };
+
+pcie_intc: Interrupt controller device node for Legacy IRQ chip
+ interrupt-cells: should be set to 1
+ interrupt-parent: Parent interrupt controller phandle
+ interrupts: GIC interrupt lines connected to PCI Legacy interrupt lines
+
+ Example:
+ pcie_intc: legacy-interrupt-controller {
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ interrupt-parent = <&gic>;
+ interrupts = <GIC_SPI 26 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 27 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 28 IRQ_TYPE_EDGE_RISING>,
+ <GIC_SPI 29 IRQ_TYPE_EDGE_RISING>;
+ };
+
+Optional properties:-
+ phys: phandle to Generic Keystone SerDes phy for PCI
+ phy-names: name of the Generic Keystine SerDes phy for PCI
+ - If boot loader already does PCI link establishment, then phys and
+ phy-names shouldn't be present.
+
+Designware DT Properties not applicable for Keystone PCI
+
+1. pcie_bus clock-names not used. Instead, a phandle to phys is used.
+
diff --git a/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
new file mode 100644
index 000000000..d8ef5bf50
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/pci-rcar-gen2.txt
@@ -0,0 +1,66 @@
+Renesas AHB to PCI bridge
+-------------------------
+
+This is the bridge used internally to connect the USB controllers to the
+AHB. There is one bridge instance per USB port connected to the internal
+OHCI and EHCI controllers.
+
+Required properties:
+- compatible: "renesas,pci-r8a7790" for the R8A7790 SoC;
+ "renesas,pci-r8a7791" for the R8A7791 SoC.
+- reg: A list of physical regions to access the device: the first is
+ the operational registers for the OHCI/EHCI controllers and the
+ second is for the bridge configuration and control registers.
+- interrupts: interrupt for the device.
+- clocks: The reference to the device clock.
+- bus-range: The PCI bus number range; as this is a single bus, the range
+ should be specified as the same value twice.
+- #address-cells: must be 3.
+- #size-cells: must be 2.
+- #interrupt-cells: must be 1.
+- interrupt-map: standard property used to define the mapping of the PCI
+ interrupts to the GIC interrupts.
+- interrupt-map-mask: standard property that helps to define the interrupt
+ mapping.
+
+Example SoC configuration:
+
+ pci0: pci@ee090000 {
+ compatible = "renesas,pci-r8a7790";
+ clocks = <&mstp7_clks R8A7790_CLK_EHCI>;
+ reg = <0x0 0xee090000 0x0 0xc00>,
+ <0x0 0xee080000 0x0 0x1100>;
+ interrupts = <0 108 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+
+ bus-range = <0 0>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0xff00 0 0 0x7>;
+ interrupt-map = <0x0000 0 0 1 &gic 0 108 IRQ_TYPE_LEVEL_HIGH
+ 0x0800 0 0 1 &gic 0 108 IRQ_TYPE_LEVEL_HIGH
+ 0x1000 0 0 2 &gic 0 108 IRQ_TYPE_LEVEL_HIGH>;
+
+ pci@0,1 {
+ reg = <0x800 0 0 0 0>;
+ device_type = "pci";
+ phys = <&usbphy 0 0>;
+ phy-names = "usb";
+ };
+
+ pci@0,2 {
+ reg = <0x1000 0 0 0 0>;
+ device_type = "pci";
+ phys = <&usbphy 0 0>;
+ phy-names = "usb";
+ };
+ };
+
+Example board setup:
+
+&pci0 {
+ status = "okay";
+ pinctrl-0 = <&usb0_pins>;
+ pinctrl-names = "default";
+};
diff --git a/Documentation/devicetree/bindings/pci/pci.txt b/Documentation/devicetree/bindings/pci/pci.txt
new file mode 100644
index 000000000..f8fbe9af7
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/pci.txt
@@ -0,0 +1,20 @@
+PCI bus bridges have standardized Device Tree bindings:
+
+PCI Bus Binding to: IEEE Std 1275-1994
+http://www.openfirmware.org/ofwg/bindings/pci/pci2_1.pdf
+
+And for the interrupt mapping part:
+
+Open Firmware Recommended Practice: Interrupt Mapping
+http://www.openfirmware.org/1275/practice/imap/imap0_9d.pdf
+
+Additionally to the properties specified in the above standards a host bridge
+driver implementation may support the following properties:
+
+- linux,pci-domain:
+ If present this property assigns a fixed PCI domain number to a host bridge,
+ otherwise an unstable (across boots) unique number will be assigned.
+ It is required to either not set this property at all or set it for all
+ host bridges in the system, otherwise potentially conflicting domain numbers
+ may be assigned to root buses behind different host bridges. The domain
+ number for each host bridge in the system must be unique.
diff --git a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt
new file mode 100644
index 000000000..8e0a1eb0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt
@@ -0,0 +1,190 @@
+* Mediatek/Ralink RT3883 PCI controller
+
+1) Main node
+
+ Required properties:
+
+ - compatible: must be "ralink,rt3883-pci"
+
+ - reg: specifies the physical base address of the controller and
+ the length of the memory mapped region.
+
+ - #address-cells: specifies the number of cells needed to encode an
+ address. The value must be 1.
+
+ - #size-cells: specifies the number of cells used to represent the size
+ of an address. The value must be 1.
+
+ - ranges: specifies the translation between child address space and parent
+ address space
+
+ Optional properties:
+
+ - status: indicates the operational status of the device.
+ Value must be either "disabled" or "okay".
+
+2) Child nodes
+
+ The main node must have two child nodes which describes the built-in
+ interrupt controller and the PCI host bridge.
+
+ a) Interrupt controller:
+
+ Required properties:
+
+ - interrupt-controller: identifies the node as an interrupt controller
+
+ - #address-cells: specifies the number of cells needed to encode an
+ address. The value must be 0. As such, 'interrupt-map' nodes do not
+ have to specify a parent unit address.
+
+ - #interrupt-cells: specifies the number of cells needed to encode an
+ interrupt source. The value must be 1.
+
+ - interrupt-parent: the phandle for the interrupt controller that
+ services interrupts for this device.
+
+ - interrupts: specifies the interrupt source of the parent interrupt
+ controller. The format of the interrupt specifier depends on the
+ parent interrupt controller.
+
+ b) PCI host bridge:
+
+ Required properties:
+
+ - #address-cells: specifies the number of cells needed to encode an
+ address. The value must be 0.
+
+ - #size-cells: specifies the number of cells used to represent the size
+ of an address. The value must be 2.
+
+ - #interrupt-cells: specifies the number of cells needed to encode an
+ interrupt source. The value must be 1.
+
+ - device_type: must be "pci"
+
+ - bus-range: PCI bus numbers covered
+
+ - ranges: specifies the ranges for the PCI memory and I/O regions
+
+ - interrupt-map-mask,
+ - interrupt-map: standard PCI properties to define the mapping of the
+ PCI interface to interrupt numbers.
+
+ The PCI host bridge node migh have additional sub-nodes representing
+ the onboard PCI devices/PCI slots. Each such sub-node must have the
+ following mandatory properties:
+
+ - reg: used only for interrupt mapping, so only the first four bytes
+ are used to refer to the correct bus number and device number.
+
+ - device_type: must be "pci"
+
+ If a given sub-node represents a PCI bridge it must have following
+ mandatory properties as well:
+
+ - #address-cells: must be set to <3>
+
+ - #size-cells: must set to <2>
+
+ - #interrupt-cells: must be set to <1>
+
+ - interrupt-map-mask,
+ - interrupt-map: standard PCI properties to define the mapping of the
+ PCI interface to interrupt numbers.
+
+ Besides the required properties the sub-nodes may have these optional
+ properties:
+
+ - status: indicates the operational status of the sub-node.
+ Value must be either "disabled" or "okay".
+
+3) Example:
+
+ a) SoC specific dtsi file:
+
+ pci@10140000 {
+ compatible = "ralink,rt3883-pci";
+ reg = <0x10140000 0x20000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges; /* direct mapping */
+
+ status = "disabled";
+
+ pciintc: interrupt-controller {
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+
+ interrupt-parent = <&cpuintc>;
+ interrupts = <4>;
+ };
+
+ host-bridge {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ device_type = "pci";
+
+ bus-range = <0 255>;
+ ranges = <
+ 0x02000000 0 0x00000000 0x20000000 0 0x10000000 /* pci memory */
+ 0x01000000 0 0x00000000 0x10160000 0 0x00010000 /* io space */
+ >;
+
+ interrupt-map-mask = <0xf800 0 0 7>;
+ interrupt-map = <
+ /* IDSEL 17 */
+ 0x8800 0 0 1 &pciintc 18
+ 0x8800 0 0 2 &pciintc 18
+ 0x8800 0 0 3 &pciintc 18
+ 0x8800 0 0 4 &pciintc 18
+ /* IDSEL 18 */
+ 0x9000 0 0 1 &pciintc 19
+ 0x9000 0 0 2 &pciintc 19
+ 0x9000 0 0 3 &pciintc 19
+ 0x9000 0 0 4 &pciintc 19
+ >;
+
+ pci-bridge@1 {
+ reg = <0x0800 0 0 0 0>;
+ device_type = "pci";
+ #interrupt-cells = <1>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+
+ interrupt-map-mask = <0x0 0 0 0>;
+ interrupt-map = <0x0 0 0 0 &pciintc 20>;
+
+ status = "disabled";
+ };
+
+ pci-slot@17 {
+ reg = <0x8800 0 0 0 0>;
+ device_type = "pci";
+
+ status = "disabled";
+ };
+
+ pci-slot@18 {
+ reg = <0x9000 0 0 0 0>;
+ device_type = "pci";
+
+ status = "disabled";
+ };
+ };
+ };
+
+ b) Board specific dts file:
+
+ pci@10140000 {
+ status = "okay";
+
+ host-bridge {
+ pci-bridge@1 {
+ status = "okay";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pci/rcar-pci.txt b/Documentation/devicetree/bindings/pci/rcar-pci.txt
new file mode 100644
index 000000000..29d3b989d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/rcar-pci.txt
@@ -0,0 +1,47 @@
+* Renesas RCar PCIe interface
+
+Required properties:
+- compatible: should contain one of the following
+ "renesas,pcie-r8a7779", "renesas,pcie-r8a7790", "renesas,pcie-r8a7791"
+- reg: base address and length of the pcie controller registers.
+- #address-cells: set to <3>
+- #size-cells: set to <2>
+- bus-range: PCI bus numbers covered
+- device_type: set to "pci"
+- ranges: ranges for the PCI memory and I/O regions.
+- dma-ranges: ranges for the inbound memory regions.
+- interrupts: two interrupt sources for MSI interrupts, followed by interrupt
+ source for hardware related interrupts (e.g. link speed change).
+- #interrupt-cells: set to <1>
+- interrupt-map-mask and interrupt-map: standard PCI properties
+ to define the mapping of the PCIe interface to interrupt
+ numbers.
+- clocks: from common clock binding: clock specifiers for the PCIe controller
+ and PCIe bus clocks.
+- clock-names: from common clock binding: should be "pcie" and "pcie_bus".
+
+Example:
+
+SoC specific DT Entry:
+
+ pcie: pcie@fe000000 {
+ compatible = "renesas,pcie-r8a7791";
+ reg = <0 0xfe000000 0 0x80000>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ bus-range = <0x00 0xff>;
+ device_type = "pci";
+ ranges = <0x01000000 0 0x00000000 0 0xfe100000 0 0x00100000
+ 0x02000000 0 0xfe200000 0 0xfe200000 0 0x00200000
+ 0x02000000 0 0x30000000 0 0x30000000 0 0x08000000
+ 0x42000000 0 0x38000000 0 0x38000000 0 0x08000000>;
+ dma-ranges = <0x42000000 0 0x40000000 0 0x40000000 0 0x40000000
+ 0x42000000 2 0x00000000 2 0x00000000 0 0x40000000>;
+ interrupts = <0 116 4>, <0 117 4>, <0 118 4>;
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic 0 116 4>;
+ clocks = <&mstp3_clks R8A7791_CLK_PCIE>, <&pcie_bus_clk>;
+ clock-names = "pcie", "pcie_bus";
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt
new file mode 100644
index 000000000..4f9d23d2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/samsung,exynos5440-pcie.txt
@@ -0,0 +1,65 @@
+* Samsung Exynos 5440 PCIe interface
+
+This PCIe host controller is based on the Synopsis Designware PCIe IP
+and thus inherits all the common properties defined in designware-pcie.txt.
+
+Required properties:
+- compatible: "samsung,exynos5440-pcie"
+- reg: base addresses and lengths of the pcie controller,
+ the phy controller, additional register for the phy controller.
+- interrupts: A list of interrupt outputs for level interrupt,
+ pulse interrupt, special interrupt.
+
+Example:
+
+SoC specific DT Entry:
+
+ pcie@290000 {
+ compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
+ reg = <0x290000 0x1000
+ 0x270000 0x1000
+ 0x271000 0x40>;
+ interrupts = <0 20 0>, <0 21 0>, <0 22 0>;
+ clocks = <&clock 28>, <&clock 27>;
+ clock-names = "pcie", "pcie_bus";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ ranges = <0x00000800 0 0x40000000 0x40000000 0 0x00001000 /* configuration space */
+ 0x81000000 0 0 0x40001000 0 0x00010000 /* downstream I/O */
+ 0x82000000 0 0x40011000 0x40011000 0 0x1ffef000>; /* non-prefetchable memory */
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+ num-lanes = <4>;
+ };
+
+ pcie@2a0000 {
+ compatible = "samsung,exynos5440-pcie", "snps,dw-pcie";
+ reg = <0x2a0000 0x1000
+ 0x272000 0x1000
+ 0x271040 0x40>;
+ interrupts = <0 23 0>, <0 24 0>, <0 25 0>;
+ clocks = <&clock 29>, <&clock 27>;
+ clock-names = "pcie", "pcie_bus";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ ranges = <0x00000800 0 0x60000000 0x60000000 0 0x00001000 /* configuration space */
+ 0x81000000 0 0 0x60001000 0 0x00010000 /* downstream I/O */
+ 0x82000000 0 0x60011000 0x60011000 0 0x1ffef000>; /* non-prefetchable memory */
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0>;
+ interrupt-map = <0 0 0 0 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>;
+ num-lanes = <4>;
+ };
+
+Board specific DT Entry:
+
+ pcie@290000 {
+ reset-gpio = <&pin_ctrl 5 0>;
+ };
+
+ pcie@2a0000 {
+ reset-gpio = <&pin_ctrl 22 0>;
+ };
diff --git a/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt
new file mode 100644
index 000000000..49ea76da7
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/spear13xx-pcie.txt
@@ -0,0 +1,14 @@
+SPEAr13XX PCIe DT detail:
+================================
+
+SPEAr13XX uses synopsis designware PCIe controller and ST MiPHY as phy
+controller.
+
+Required properties:
+- compatible : should be "st,spear1340-pcie", "snps,dw-pcie".
+- phys : phandle to phy node associated with pcie controller
+- phy-names : must be "pcie-phy"
+- All other definitions as per generic PCI bindings
+
+ Optional properties:
+- st,pcie-is-gen1 indicates that forced gen1 initialization is needed.
diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt b/Documentation/devicetree/bindings/pci/ti-pci.txt
new file mode 100644
index 000000000..3d217911b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/ti-pci.txt
@@ -0,0 +1,59 @@
+TI PCI Controllers
+
+PCIe Designware Controller
+ - compatible: Should be "ti,dra7-pcie""
+ - reg : Two register ranges as listed in the reg-names property
+ - reg-names : The first entry must be "ti-conf" for the TI specific registers
+ The second entry must be "rc-dbics" for the designware pcie
+ registers
+ The third entry must be "config" for the PCIe configuration space
+ - phys : list of PHY specifiers (used by generic PHY framework)
+ - phy-names : must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the
+ number of PHYs as specified in *phys* property.
+ - ti,hwmods : Name of the hwmod associated to the pcie, "pcie<X>",
+ where <X> is the instance number of the pcie from the HW spec.
+ - interrupts : Two interrupt entries must be specified. The first one is for
+ main interrupt line and the second for MSI interrupt line.
+ - #address-cells,
+ #size-cells,
+ #interrupt-cells,
+ device_type,
+ ranges,
+ num-lanes,
+ interrupt-map-mask,
+ interrupt-map : as specified in ../designware-pcie.txt
+
+Example:
+axi {
+ compatible = "simple-bus";
+ #size-cells = <1>;
+ #address-cells = <1>;
+ ranges = <0x51000000 0x51000000 0x3000
+ 0x0 0x20000000 0x10000000>;
+ pcie@51000000 {
+ compatible = "ti,dra7-pcie";
+ reg = <0x51000000 0x2000>, <0x51002000 0x14c>, <0x1000 0x2000>;
+ reg-names = "rc_dbics", "ti_conf", "config";
+ interrupts = <0 232 0x4>, <0 233 0x4>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ device_type = "pci";
+ ranges = <0x81000000 0 0 0x03000 0 0x00010000
+ 0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+ #interrupt-cells = <1>;
+ num-lanes = <1>;
+ ti,hwmods = "pcie1";
+ phys = <&pcie1_phy>;
+ phy-names = "pcie-phy0";
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map = <0 0 0 1 &pcie_intc 1>,
+ <0 0 0 2 &pcie_intc 2>,
+ <0 0 0 3 &pcie_intc 3>,
+ <0 0 0 4 &pcie_intc 4>;
+ pcie_intc: interrupt-controller {
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt b/Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt
new file mode 100644
index 000000000..30b364e50
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/v3-v360epc-pci.txt
@@ -0,0 +1,15 @@
+V3 Semiconductor V360 EPC PCI bridge
+
+This bridge is found in the ARM Integrator/AP (Application Platform)
+
+Integrator-specific notes:
+
+- syscon: should contain a link to the syscon device node (since
+ on the Integrator, some registers in the syscon are required to
+ operate the V3).
+
+V360 EPC specific notes:
+
+- reg: should contain the base address of the V3 adapter.
+- interrupts: should contain a reference to the V3 error interrupt
+ as routed on the system.
diff --git a/Documentation/devicetree/bindings/pci/versatile.txt b/Documentation/devicetree/bindings/pci/versatile.txt
new file mode 100644
index 000000000..ebd1e7d04
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/versatile.txt
@@ -0,0 +1,59 @@
+* ARM Versatile Platform Baseboard PCI interface
+
+PCI host controller found on the ARM Versatile PB board's FPGA.
+
+Required properties:
+- compatible: should contain "arm,versatile-pci" to identify the Versatile PCI
+ controller.
+- reg: base addresses and lengths of the pci controller. There must be 3
+ entries:
+ - Versatile-specific registers
+ - Self Config space
+ - Config space
+- #address-cells: set to <3>
+- #size-cells: set to <2>
+- device_type: set to "pci"
+- bus-range: set to <0 0xff>
+- ranges: ranges for the PCI memory and I/O regions
+- #interrupt-cells: set to <1>
+- interrupt-map-mask and interrupt-map: standard PCI properties to define
+ the mapping of the PCI interface to interrupt numbers.
+
+Example:
+
+pci-controller@10001000 {
+ compatible = "arm,versatile-pci";
+ device_type = "pci";
+ reg = <0x10001000 0x1000
+ 0x41000000 0x10000
+ 0x42000000 0x100000>;
+ bus-range = <0 0xff>;
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+
+ ranges = <0x01000000 0 0x00000000 0x43000000 0 0x00010000 /* downstream I/O */
+ 0x02000000 0 0x50000000 0x50000000 0 0x10000000 /* non-prefetchable memory */
+ 0x42000000 0 0x60000000 0x60000000 0 0x10000000>; /* prefetchable memory */
+
+ interrupt-map-mask = <0x1800 0 0 7>;
+ interrupt-map = <0x1800 0 0 1 &sic 28
+ 0x1800 0 0 2 &sic 29
+ 0x1800 0 0 3 &sic 30
+ 0x1800 0 0 4 &sic 27
+
+ 0x1000 0 0 1 &sic 27
+ 0x1000 0 0 2 &sic 28
+ 0x1000 0 0 3 &sic 29
+ 0x1000 0 0 4 &sic 30
+
+ 0x0800 0 0 1 &sic 30
+ 0x0800 0 0 2 &sic 27
+ 0x0800 0 0 3 &sic 28
+ 0x0800 0 0 4 &sic 29
+
+ 0x0000 0 0 1 &sic 29
+ 0x0000 0 0 2 &sic 30
+ 0x0000 0 0 3 &sic 27
+ 0x0000 0 0 4 &sic 28>;
+};
diff --git a/Documentation/devicetree/bindings/pci/xgene-pci.txt b/Documentation/devicetree/bindings/pci/xgene-pci.txt
new file mode 100644
index 000000000..1070b068c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/xgene-pci.txt
@@ -0,0 +1,57 @@
+* AppliedMicro X-Gene PCIe interface
+
+Required properties:
+- device_type: set to "pci"
+- compatible: should contain "apm,xgene-pcie" to identify the core.
+- reg: A list of physical base address and length for each set of controller
+ registers. Must contain an entry for each entry in the reg-names
+ property.
+- reg-names: Must include the following entries:
+ "csr": controller configuration registers.
+ "cfg": pcie configuration space registers.
+- #address-cells: set to <3>
+- #size-cells: set to <2>
+- ranges: ranges for the outbound memory, I/O regions.
+- dma-ranges: ranges for the inbound memory regions.
+- #interrupt-cells: set to <1>
+- interrupt-map-mask and interrupt-map: standard PCI properties
+ to define the mapping of the PCIe interface to interrupt
+ numbers.
+- clocks: from common clock binding: handle to pci clock.
+
+Optional properties:
+- status: Either "ok" or "disabled".
+- dma-coherent: Present if dma operations are coherent
+
+Example:
+
+SoC specific DT Entry:
+
+ pcie0: pcie@1f2b0000 {
+ status = "disabled";
+ device_type = "pci";
+ compatible = "apm,xgene-storm-pcie", "apm,xgene-pcie";
+ #interrupt-cells = <1>;
+ #size-cells = <2>;
+ #address-cells = <3>;
+ reg = < 0x00 0x1f2b0000 0x0 0x00010000 /* Controller registers */
+ 0xe0 0xd0000000 0x0 0x00040000>; /* PCI config space */
+ reg-names = "csr", "cfg";
+ ranges = <0x01000000 0x00 0x00000000 0xe0 0x10000000 0x00 0x00010000 /* io */
+ 0x02000000 0x00 0x80000000 0xe1 0x80000000 0x00 0x80000000>; /* mem */
+ dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
+ 0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+ interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc2 0x1
+ 0x0 0x0 0x0 0x2 &gic 0x0 0xc3 0x1
+ 0x0 0x0 0x0 0x3 &gic 0x0 0xc4 0x1
+ 0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x1>;
+ dma-coherent;
+ clocks = <&pcie0clk 0>;
+ };
+
+
+Board specific DT Entry:
+ &pcie0 {
+ status = "ok";
+ };
diff --git a/Documentation/devicetree/bindings/pci/xilinx-pcie.txt b/Documentation/devicetree/bindings/pci/xilinx-pcie.txt
new file mode 100644
index 000000000..3e2c88d97
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/xilinx-pcie.txt
@@ -0,0 +1,62 @@
+* Xilinx AXI PCIe Root Port Bridge DT description
+
+Required properties:
+- #address-cells: Address representation for root ports, set to <3>
+- #size-cells: Size representation for root ports, set to <2>
+- #interrupt-cells: specifies the number of cells needed to encode an
+ interrupt source. The value must be 1.
+- compatible: Should contain "xlnx,axi-pcie-host-1.00.a"
+- reg: Should contain AXI PCIe registers location and length
+- device_type: must be "pci"
+- interrupts: Should contain AXI PCIe interrupt
+- interrupt-map-mask,
+ interrupt-map: standard PCI properties to define the mapping of the
+ PCI interface to interrupt numbers.
+- ranges: ranges for the PCI memory regions (I/O space region is not
+ supported by hardware)
+ Please refer to the standard PCI bus binding document for a more
+ detailed explanation
+
+Optional properties:
+- bus-range: PCI bus numbers covered
+
+Interrupt controller child node
++++++++++++++++++++++++++++++++
+Required properties:
+- interrupt-controller: identifies the node as an interrupt controller
+- #address-cells: specifies the number of cells needed to encode an
+ address. The value must be 0.
+- #interrupt-cells: specifies the number of cells needed to encode an
+ interrupt source. The value must be 1.
+
+NOTE:
+The core provides a single interrupt for both INTx/MSI messages. So,
+created a interrupt controller node to support 'interrupt-map' DT
+functionality. The driver will create an IRQ domain for this map, decode
+the four INTx interrupts in ISR and route them to this domain.
+
+
+Example:
+++++++++
+
+ pci_express: axi-pcie@50000000 {
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ compatible = "xlnx,axi-pcie-host-1.00.a";
+ reg = < 0x50000000 0x10000000 >;
+ device_type = "pci";
+ interrupts = < 0 52 4 >;
+ interrupt-map-mask = <0 0 0 7>;
+ interrupt-map = <0 0 0 1 &pcie_intc 1>,
+ <0 0 0 2 &pcie_intc 2>,
+ <0 0 0 3 &pcie_intc 3>,
+ <0 0 0 4 &pcie_intc 4>;
+ ranges = < 0x02000000 0 0x60000000 0x60000000 0 0x10000000 >;
+
+ pcie_intc: interrupt-controller {
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ }
+ };
diff --git a/Documentation/devicetree/bindings/phy/apm-xgene-phy.txt b/Documentation/devicetree/bindings/phy/apm-xgene-phy.txt
new file mode 100644
index 000000000..5f3a65a9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/apm-xgene-phy.txt
@@ -0,0 +1,79 @@
+* APM X-Gene 15Gbps Multi-purpose PHY nodes
+
+PHY nodes are defined to describe on-chip 15Gbps Multi-purpose PHY. Each
+PHY (pair of lanes) has its own node.
+
+Required properties:
+- compatible : Shall be "apm,xgene-phy".
+- reg : PHY memory resource is the SDS PHY access resource.
+- #phy-cells : Shall be 1 as it expects one argument for setting
+ the mode of the PHY. Possible values are 0 (SATA),
+ 1 (SGMII), 2 (PCIe), 3 (USB), and 4 (XFI).
+
+Optional properties:
+- status : Shall be "ok" if enabled or "disabled" if disabled.
+ Default is "ok".
+- clocks : Reference to the clock entry.
+- apm,tx-eye-tuning : Manual control to fine tune the capture of the serial
+ bit lines from the automatic calibrated position.
+ Two set of 3-tuple setting for each (up to 3)
+ supported link speed on the host. Range from 0 to
+ 127 in unit of one bit period. Default is 10.
+- apm,tx-eye-direction : Eye tuning manual control direction. 0 means sample
+ data earlier than the nominal sampling point. 1 means
+ sample data later than the nominal sampling point.
+ Two set of 3-tuple setting for each (up to 3)
+ supported link speed on the host. Default is 0.
+- apm,tx-boost-gain : Frequency boost AC (LSB 3-bit) and DC (2-bit)
+ gain control. Two set of 3-tuple setting for each
+ (up to 3) supported link speed on the host. Range is
+ between 0 to 31 in unit of dB. Default is 3.
+- apm,tx-amplitude : Amplitude control. Two set of 3-tuple setting for
+ each (up to 3) supported link speed on the host.
+ Range is between 0 to 199500 in unit of uV.
+ Default is 199500 uV.
+- apm,tx-pre-cursor1 : 1st pre-cursor emphasis taps control. Two set of
+ 3-tuple setting for each (up to 3) supported link
+ speed on the host. Range is 0 to 273000 in unit of
+ uV. Default is 0.
+- apm,tx-pre-cursor2 : 2st pre-cursor emphasis taps control. Two set of
+ 3-tuple setting for each (up to 3) supported link
+ speed on the host. Range is 0 to 127400 in unit uV.
+ Default is 0x0.
+- apm,tx-post-cursor : Post-cursor emphasis taps control. Two set of
+ 3-tuple setting for Gen1, Gen2, and Gen3. Range is
+ between 0 to 0x1f in unit of 18.2mV. Default is 0xf.
+- apm,tx-speed : Tx operating speed. One set of 3-tuple for each
+ supported link speed on the host.
+ 0 = 1-2Gbps
+ 1 = 2-4Gbps (1st tuple default)
+ 2 = 4-8Gbps
+ 3 = 8-15Gbps (2nd tuple default)
+ 4 = 2.5-4Gbps
+ 5 = 4-5Gbps
+ 6 = 5-6Gbps
+ 7 = 6-16Gbps (3rd tuple default)
+
+NOTE: PHY override parameters are board specific setting.
+
+Example:
+ phy1: phy@1f21a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f21a000 0x0 0x100>;
+ #phy-cells = <1>;
+ status = "disabled";
+ };
+
+ phy2: phy@1f22a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f22a000 0x0 0x100>;
+ #phy-cells = <1>;
+ status = "ok";
+ };
+
+ phy3: phy@1f23a000 {
+ compatible = "apm,xgene-phy";
+ reg = <0x0 0x1f23a000 0x0 0x100>;
+ #phy-cells = <1>;
+ status = "ok";
+ };
diff --git a/Documentation/devicetree/bindings/phy/berlin-sata-phy.txt b/Documentation/devicetree/bindings/phy/berlin-sata-phy.txt
new file mode 100644
index 000000000..c0155f842
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/berlin-sata-phy.txt
@@ -0,0 +1,36 @@
+Berlin SATA PHY
+---------------
+
+Required properties:
+- compatible: should be one of
+ "marvell,berlin2-sata-phy"
+ "marvell,berlin2q-sata-phy"
+- address-cells: should be 1
+- size-cells: should be 0
+- phy-cells: from the generic PHY bindings, must be 1
+- reg: address and length of the register
+- clocks: reference to the clock entry
+
+Sub-nodes:
+Each PHY should be represented as a sub-node.
+
+Sub-nodes required properties:
+- reg: the PHY number
+
+Example:
+ sata_phy: phy@f7e900a0 {
+ compatible = "marvell,berlin2q-sata-phy";
+ reg = <0xf7e900a0 0x200>;
+ clocks = <&chip CLKID_SATA>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #phy-cells = <1>;
+
+ sata-phy@0 {
+ reg = <0>;
+ };
+
+ sata-phy@1 {
+ reg = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/phy/berlin-usb-phy.txt b/Documentation/devicetree/bindings/phy/berlin-usb-phy.txt
new file mode 100644
index 000000000..be33780f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/berlin-usb-phy.txt
@@ -0,0 +1,16 @@
+* Marvell Berlin USB PHY
+
+Required properties:
+- compatible: "marvell,berlin2-usb-phy" or "marvell,berlin2cd-usb-phy"
+- reg: base address and length of the registers
+- #phys-cells: should be 0
+- resets: reference to the reset controller
+
+Example:
+
+ usb-phy@f774000 {
+ compatible = "marvell,berlin2-usb-phy";
+ reg = <0xf774000 0x128>;
+ #phy-cells = <0>;
+ resets = <&chip 0x104 14>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/brcm,kona-usb2-phy.txt b/Documentation/devicetree/bindings/phy/brcm,kona-usb2-phy.txt
new file mode 100644
index 000000000..3dc8b3d2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/brcm,kona-usb2-phy.txt
@@ -0,0 +1,15 @@
+BROADCOM KONA USB2 PHY
+
+Required properties:
+ - compatible: brcm,kona-usb2-phy
+ - reg: offset and length of the PHY registers
+ - #phy-cells: must be 0
+Refer to phy/phy-bindings.txt for the generic PHY binding properties
+
+Example:
+
+ usbphy: usb-phy@3f130000 {
+ compatible = "brcm,kona-usb2-phy";
+ reg = <0x3f130000 0x28>;
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/dm816x-phy.txt b/Documentation/devicetree/bindings/phy/dm816x-phy.txt
new file mode 100644
index 000000000..2fe3d11d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/dm816x-phy.txt
@@ -0,0 +1,24 @@
+Device tree binding documentation for am816x USB PHY
+=========================
+
+Required properties:
+- compatible : should be "ti,dm816x-usb-phy"
+- reg : offset and length of the PHY register set.
+- reg-names : name for the phy registers
+- clocks : phandle to the clock
+- clock-names : name of the clock
+- syscon: phandle for the syscon node to access misc registers
+- #phy-cells : from the generic PHY bindings, must be 1
+- syscon: phandle for the syscon node to access misc registers
+
+Example:
+
+usb_phy0: usb-phy@20 {
+ compatible = "ti,dm8168-usb-phy";
+ reg = <0x20 0x8>;
+ reg-names = "phy";
+ clocks = <&main_fapll 6>;
+ clock-names = "refclk";
+ #phy-cells = <0>;
+ syscon = <&scm_conf>;
+};
diff --git a/Documentation/devicetree/bindings/phy/hix5hd2-phy.txt b/Documentation/devicetree/bindings/phy/hix5hd2-phy.txt
new file mode 100644
index 000000000..296168b74
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/hix5hd2-phy.txt
@@ -0,0 +1,22 @@
+Hisilicon hix5hd2 SATA PHY
+-----------------------
+
+Required properties:
+- compatible: should be "hisilicon,hix5hd2-sata-phy"
+- reg: offset and length of the PHY registers
+- #phy-cells: must be 0
+Refer to phy/phy-bindings.txt for the generic PHY binding properties
+
+Optional Properties:
+- hisilicon,peripheral-syscon: phandle of syscon used to control peripheral.
+- hisilicon,power-reg: offset and bit number within peripheral-syscon,
+ register of controlling sata power supply.
+
+Example:
+ sata_phy: phy@f9900000 {
+ compatible = "hisilicon,hix5hd2-sata-phy";
+ reg = <0xf9900000 0x10000>;
+ #phy-cells = <0>;
+ hisilicon,peripheral-syscon = <&peripheral_ctrl>;
+ hisilicon,power-reg = <0x8 10>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/phy-bindings.txt b/Documentation/devicetree/bindings/phy/phy-bindings.txt
new file mode 100644
index 000000000..1293c3217
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-bindings.txt
@@ -0,0 +1,70 @@
+This document explains only the device tree data binding. For general
+information about PHY subsystem refer to Documentation/phy.txt
+
+PHY device node
+===============
+
+Required Properties:
+#phy-cells: Number of cells in a PHY specifier; The meaning of all those
+ cells is defined by the binding for the phy node. The PHY
+ provider can use the values in cells to find the appropriate
+ PHY.
+
+Optional Properties:
+phy-supply: Phandle to a regulator that provides power to the PHY. This
+ regulator will be managed during the PHY power on/off sequence.
+
+For example:
+
+phys: phy {
+ compatible = "xxx";
+ reg = <...>;
+ .
+ .
+ #phy-cells = <1>;
+ .
+ .
+};
+
+That node describes an IP block (PHY provider) that implements 2 different PHYs.
+In order to differentiate between these 2 PHYs, an additional specifier should be
+given while trying to get a reference to it.
+
+PHY user node
+=============
+
+Required Properties:
+phys : the phandle for the PHY device (used by the PHY subsystem)
+phy-names : the names of the PHY corresponding to the PHYs present in the
+ *phys* phandle
+
+Example 1:
+usb1: usb_otg_ss@xxx {
+ compatible = "xxx";
+ reg = <xxx>;
+ .
+ .
+ phys = <&usb2_phy>, <&usb3_phy>;
+ phy-names = "usb2phy", "usb3phy";
+ .
+ .
+};
+
+This node represents a controller that uses two PHYs, one for usb2 and one for
+usb3.
+
+Example 2:
+usb2: usb_otg_ss@xxx {
+ compatible = "xxx";
+ reg = <xxx>;
+ .
+ .
+ phys = <&phys 1>;
+ phy-names = "usbphy";
+ .
+ .
+};
+
+This node represents a controller that uses one of the PHYs of the PHY provider
+device defined previously. Note that the phy handle has an additional specifier
+"1" to differentiate between the two PHYs.
diff --git a/Documentation/devicetree/bindings/phy/phy-miphy28lp.txt b/Documentation/devicetree/bindings/phy/phy-miphy28lp.txt
new file mode 100644
index 000000000..89caa885d
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-miphy28lp.txt
@@ -0,0 +1,117 @@
+STMicroelectronics STi MIPHY28LP PHY binding
+============================================
+
+This binding describes a miphy device that is used to control PHY hardware
+for SATA, PCIe or USB3.
+
+Required properties (controller (parent) node):
+- compatible : Should be "st,miphy28lp-phy".
+- st,syscfg : Should be a phandle of the system configuration register group
+ which contain the SATA, PCIe or USB3 mode setting bits.
+
+Required nodes : A sub-node is required for each channel the controller
+ provides. Address range information including the usual
+ 'reg' and 'reg-names' properties are used inside these
+ nodes to describe the controller's topology. These nodes
+ are translated by the driver's .xlate() function.
+
+Required properties (port (child) node):
+- #phy-cells : Should be 1 (See second example)
+ Cell after port phandle is device type from:
+ - PHY_TYPE_SATA
+ - PHY_TYPE_PCI
+ - PHY_TYPE_USB3
+- reg : Address and length of the register set for the device.
+- reg-names : The names of the register addresses corresponding to the registers
+ filled in "reg". It can also contain the offset of the system configuration
+ registers used as glue-logic to setup the device for SATA/PCIe or USB3
+ devices.
+- st,syscfg : Offset of the parent configuration register.
+- resets : phandle to the parent reset controller.
+- reset-names : Associated name must be "miphy-sw-rst".
+
+Optional properties (port (child) node):
+- st,osc-rdy : to check the MIPHY0_OSC_RDY status in the glue-logic. This
+ is not available in all the MiPHY. For example, for STiH407, only the
+ MiPHY0 has this bit.
+- st,osc-force-ext : to select the external oscillator. This can change from
+ different MiPHY inside the same SoC.
+- st,sata_gen : to select which SATA_SPDMODE has to be set in the SATA system config
+ register.
+- st,px_rx_pol_inv : to invert polarity of RXn/RXp (respectively negative line and positive
+ line).
+- st,scc-on : enable ssc to reduce effects of EMI (only for sata or PCIe).
+- st,tx-impedance-comp : to compensate tx impedance avoiding out of range values.
+
+example:
+
+ miphy28lp_phy: miphy28lp@9b22000 {
+ compatible = "st,miphy28lp-phy";
+ st,syscfg = <&syscfg_core>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ phy_port0: port@9b22000 {
+ reg = <0x9b22000 0xff>,
+ <0x9b09000 0xff>,
+ <0x9b04000 0xff>;
+ reg-names = "sata-up",
+ "pcie-up",
+ "pipew";
+
+ st,syscfg = <0x114 0x818 0xe0 0xec>;
+ #phy-cells = <1>;
+ st,osc-rdy;
+ reset-names = "miphy-sw-rst";
+ resets = <&softreset STIH407_MIPHY0_SOFTRESET>;
+ };
+
+ phy_port1: port@9b2a000 {
+ reg = <0x9b2a000 0xff>,
+ <0x9b19000 0xff>,
+ <0x9b14000 0xff>;
+ reg-names = "sata-up",
+ "pcie-up",
+ "pipew";
+
+ st,syscfg = <0x118 0x81c 0xe4 0xf0>;
+
+ #phy-cells = <1>;
+ st,osc-force-ext;
+ reset-names = "miphy-sw-rst";
+ resets = <&softreset STIH407_MIPHY1_SOFTRESET>;
+ };
+
+ phy_port2: port@8f95000 {
+ reg = <0x8f95000 0xff>,
+ <0x8f90000 0xff>;
+ reg-names = "pipew",
+ "usb3-up";
+
+ st,syscfg = <0x11c 0x820>;
+
+ #phy-cells = <1>;
+ reset-names = "miphy-sw-rst";
+ resets = <&softreset STIH407_MIPHY2_SOFTRESET>;
+ };
+ };
+
+
+Specifying phy control of devices
+=================================
+
+Device nodes should specify the configuration required in their "phys"
+property, containing a phandle to the miphy device node and an index
+specifying which configuration to use, as described in phy-bindings.txt.
+
+example:
+ sata0: sata@9b20000 {
+ ...
+ phys = <&phy_port0 PHY_TYPE_SATA>;
+ ...
+ };
+
+Macro definitions for the supported miphy configuration can be found in:
+
+include/dt-bindings/phy/phy.h
diff --git a/Documentation/devicetree/bindings/phy/phy-miphy365x.txt b/Documentation/devicetree/bindings/phy/phy-miphy365x.txt
new file mode 100644
index 000000000..8772900e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-miphy365x.txt
@@ -0,0 +1,77 @@
+STMicroelectronics STi MIPHY365x PHY binding
+============================================
+
+This binding describes a miphy device that is used to control PHY hardware
+for SATA and PCIe.
+
+Required properties (controller (parent) node):
+- compatible : Should be "st,miphy365x-phy"
+- st,syscfg : Phandle / integer array property. Phandle of sysconfig group
+ containing the miphy registers and integer array should contain
+ an entry for each port sub-node, specifying the control
+ register offset inside the sysconfig group.
+
+Required nodes : A sub-node is required for each channel the controller
+ provides. Address range information including the usual
+ 'reg' and 'reg-names' properties are used inside these
+ nodes to describe the controller's topology. These nodes
+ are translated by the driver's .xlate() function.
+
+Required properties (port (child) node):
+- #phy-cells : Should be 1 (See second example)
+ Cell after port phandle is device type from:
+ - PHY_TYPE_SATA
+ - PHY_TYPE_PCI
+- reg : Address and length of register sets for each device in
+ "reg-names"
+- reg-names : The names of the register addresses corresponding to the
+ registers filled in "reg":
+ - sata: For SATA devices
+ - pcie: For PCIe devices
+
+Optional properties (port (child) node):
+- st,sata-gen : Generation of locally attached SATA IP. Expected values
+ are {1,2,3). If not supplied generation 1 hardware will
+ be expected
+- st,pcie-tx-pol-inv : Bool property to invert the polarity PCIe Tx (Txn/Txp)
+- st,sata-tx-pol-inv : Bool property to invert the polarity SATA Tx (Txn/Txp)
+
+Example:
+
+ miphy365x_phy: miphy365x@fe382000 {
+ compatible = "st,miphy365x-phy";
+ st,syscfg = <&syscfg_rear 0x824 0x828>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ phy_port0: port@fe382000 {
+ reg = <0xfe382000 0x100>, <0xfe394000 0x100>;
+ reg-names = "sata", "pcie";
+ #phy-cells = <1>;
+ st,sata-gen = <3>;
+ };
+
+ phy_port1: port@fe38a000 {
+ reg = <0xfe38a000 0x100>, <0xfe804000 0x100>;;
+ reg-names = "sata", "pcie", "syscfg";
+ #phy-cells = <1>;
+ st,pcie-tx-pol-inv;
+ };
+ };
+
+Specifying phy control of devices
+=================================
+
+Device nodes should specify the configuration required in their "phys"
+property, containing a phandle to the phy port node and a device type.
+
+Example:
+
+#include <dt-bindings/phy/phy.h>
+
+ sata0: sata@fe380000 {
+ ...
+ phys = <&phy_port0 PHY_TYPE_SATA>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/phy/phy-mvebu.txt b/Documentation/devicetree/bindings/phy/phy-mvebu.txt
new file mode 100644
index 000000000..f95b6260a
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-mvebu.txt
@@ -0,0 +1,43 @@
+* Marvell MVEBU SATA PHY
+
+Power control for the SATA phy found on Marvell MVEBU SoCs.
+
+This document extends the binding described in phy-bindings.txt
+
+Required properties :
+
+ - reg : Offset and length of the register set for the SATA device
+ - compatible : Should be "marvell,mvebu-sata-phy"
+ - clocks : phandle of clock and specifier that supplies the device
+ - clock-names : Should be "sata"
+
+Example:
+ sata-phy@84000 {
+ compatible = "marvell,mvebu-sata-phy";
+ reg = <0x84000 0x0334>;
+ clocks = <&gate_clk 15>;
+ clock-names = "sata";
+ #phy-cells = <0>;
+ status = "ok";
+ };
+
+Armada 375 USB cluster
+----------------------
+
+Armada 375 comes with an USB2 host and device controller and an USB3
+controller. The USB cluster control register allows to manage common
+features of both USB controllers.
+
+Required properties:
+
+- compatible: "marvell,armada-375-usb-cluster"
+- reg: Should contain usb cluster register location and length.
+- #phy-cells : from the generic phy bindings, must be 1. Possible
+values are 1 (USB2), 2 (USB3).
+
+Example:
+ usbcluster: usb-cluster@18400 {
+ compatible = "marvell,armada-375-usb-cluster";
+ reg = <0x18400 0x4>;
+ #phy-cells = <1>
+ };
diff --git a/Documentation/devicetree/bindings/phy/phy-stih407-usb.txt b/Documentation/devicetree/bindings/phy/phy-stih407-usb.txt
new file mode 100644
index 000000000..de6a706ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-stih407-usb.txt
@@ -0,0 +1,24 @@
+ST STiH407 USB PHY controller
+
+This file documents the dt bindings for the usb picoPHY driver which is the PHY for both USB2 and USB3
+host controllers (when controlling usb2/1.1 devices) available on STiH407 SoC family from STMicroelectronics.
+
+Required properties:
+- compatible : should be "st,stih407-usb2-phy"
+- st,syscfg : phandle of sysconfig bank plus integer array containing phyparam and phyctrl register offsets
+- resets : list of phandle and reset specifier pairs. There should be two entries, one
+ for the whole phy and one for the port
+- reset-names : list of reset signal names. Should be "global" and "port"
+See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/reset.txt
+
+Example:
+
+usb2_picophy0: usbpicophy@f8 {
+ compatible = "st,stih407-usb2-phy";
+ #phy-cells = <0>;
+ st,syscfg = <&syscfg_core 0x100 0xf4>;
+ resets = <&softreset STIH407_PICOPHY_SOFTRESET>,
+ <&picophyreset STIH407_PICOPHY0_RESET>;
+ reset-names = "global", "port";
+};
diff --git a/Documentation/devicetree/bindings/phy/phy-stih41x-usb.txt b/Documentation/devicetree/bindings/phy/phy-stih41x-usb.txt
new file mode 100644
index 000000000..00944a05e
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/phy-stih41x-usb.txt
@@ -0,0 +1,24 @@
+STMicroelectronics STiH41x USB PHY binding
+------------------------------------------
+
+This file contains documentation for the usb phy found in STiH415/6 SoCs from
+STMicroelectronics.
+
+Required properties:
+- compatible : should be "st,stih416-usb-phy" or "st,stih415-usb-phy"
+- st,syscfg : should be a phandle of the syscfg node
+- clock-names : must contain "osc_phy"
+- clocks : must contain an entry for each name in clock-names.
+See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+- #phy-cells : must be 0 for this phy
+See: Documentation/devicetree/bindings/phy/phy-bindings.txt
+
+Example:
+
+usb2_phy: usb2phy@0 {
+ compatible = "st,stih416-usb-phy";
+ #phy-cell = <0>;
+ st,syscfg = <&syscfg_rear>;
+ clocks = <&clk_sysin>;
+ clock-names = "osc_phy";
+};
diff --git a/Documentation/devicetree/bindings/phy/qcom-apq8064-sata-phy.txt b/Documentation/devicetree/bindings/phy/qcom-apq8064-sata-phy.txt
new file mode 100644
index 000000000..952f6c96b
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom-apq8064-sata-phy.txt
@@ -0,0 +1,24 @@
+Qualcomm APQ8064 SATA PHY Controller
+------------------------------------
+
+SATA PHY nodes are defined to describe on-chip SATA Physical layer controllers.
+Each SATA PHY controller should have its own node.
+
+Required properties:
+- compatible: compatible list, contains "qcom,apq8064-sata-phy".
+- reg: offset and length of the SATA PHY register set;
+- #phy-cells: must be zero
+- clocks: a list of phandles and clock-specifier pairs, one for each entry in
+ clock-names.
+- clock-names: must be "cfg" for phy config clock.
+
+Example:
+ sata_phy: sata-phy@1b400000 {
+ compatible = "qcom,apq8064-sata-phy";
+ reg = <0x1b400000 0x200>;
+
+ clocks = <&gcc SATA_PHY_CFG_CLK>;
+ clock-names = "cfg";
+
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/qcom-dwc3-usb-phy.txt b/Documentation/devicetree/bindings/phy/qcom-dwc3-usb-phy.txt
new file mode 100644
index 000000000..86f2dbe07
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom-dwc3-usb-phy.txt
@@ -0,0 +1,39 @@
+Qualcomm DWC3 HS AND SS PHY CONTROLLER
+--------------------------------------
+
+DWC3 PHY nodes are defined to describe on-chip Synopsis Physical layer
+controllers. Each DWC3 PHY controller should have its own node.
+
+Required properties:
+- compatible: should contain one of the following:
+ - "qcom,dwc3-hs-usb-phy" for High Speed Synopsis PHY controller
+ - "qcom,dwc3-ss-usb-phy" for Super Speed Synopsis PHY controller
+- reg: offset and length of the DWC3 PHY controller register set
+- #phy-cells: must be zero
+- clocks: a list of phandles and clock-specifier pairs, one for each entry in
+ clock-names.
+- clock-names: Should contain "ref" for the PHY reference clock
+
+Optional clocks:
+ "xo" External reference clock
+
+Example:
+ phy@100f8800 {
+ compatible = "qcom,dwc3-hs-usb-phy";
+ reg = <0x100f8800 0x30>;
+ clocks = <&gcc USB30_0_UTMI_CLK>;
+ clock-names = "ref";
+ #phy-cells = <0>;
+
+ status = "ok";
+ };
+
+ phy@100f8830 {
+ compatible = "qcom,dwc3-ss-usb-phy";
+ reg = <0x100f8830 0x30>;
+ clocks = <&gcc USB30_0_MASTER_CLK>;
+ clock-names = "ref";
+ #phy-cells = <0>;
+
+ status = "ok";
+ };
diff --git a/Documentation/devicetree/bindings/phy/qcom-ipq806x-sata-phy.txt b/Documentation/devicetree/bindings/phy/qcom-ipq806x-sata-phy.txt
new file mode 100644
index 000000000..76bfbd056
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom-ipq806x-sata-phy.txt
@@ -0,0 +1,23 @@
+Qualcomm IPQ806x SATA PHY Controller
+------------------------------------
+
+SATA PHY nodes are defined to describe on-chip SATA Physical layer controllers.
+Each SATA PHY controller should have its own node.
+
+Required properties:
+- compatible: compatible list, contains "qcom,ipq806x-sata-phy"
+- reg: offset and length of the SATA PHY register set;
+- #phy-cells: must be zero
+- clocks: must be exactly one entry
+- clock-names: must be "cfg"
+
+Example:
+ sata_phy: sata-phy@1b400000 {
+ compatible = "qcom,ipq806x-sata-phy";
+ reg = <0x1b400000 0x200>;
+
+ clocks = <&gcc SATA_PHY_CFG_CLK>;
+ clock-names = "cfg";
+
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt b/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
new file mode 100644
index 000000000..00fc52a03
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/rcar-gen2-phy.txt
@@ -0,0 +1,51 @@
+* Renesas R-Car generation 2 USB PHY
+
+This file provides information on what the device node for the R-Car generation
+2 USB PHY contains.
+
+Required properties:
+- compatible: "renesas,usb-phy-r8a7790" if the device is a part of R8A7790 SoC.
+ "renesas,usb-phy-r8a7791" if the device is a part of R8A7791 SoC.
+- reg: offset and length of the register block.
+- #address-cells: number of address cells for the USB channel subnodes, must
+ be <1>.
+- #size-cells: number of size cells for the USB channel subnodes, must be <0>.
+- clocks: clock phandle and specifier pair.
+- clock-names: string, clock input name, must be "usbhs".
+
+The USB PHY device tree node should have the subnodes corresponding to the USB
+channels. These subnodes must contain the following properties:
+- reg: the USB controller selector; see the table below for the values.
+- #phy-cells: see phy-bindings.txt in the same directory, must be <1>.
+
+The phandle's argument in the PHY specifier is the USB controller selector for
+the USB channel; see the selector meanings below:
+
++-----------+---------------+---------------+
+|\ Selector | | |
++ --------- + 0 | 1 |
+| Channel \| | |
++-----------+---------------+---------------+
+| 0 | PCI EHCI/OHCI | HS-USB |
+| 2 | PCI EHCI/OHCI | xHCI |
++-----------+---------------+---------------+
+
+Example (Lager board):
+
+ usb-phy@e6590100 {
+ compatible = "renesas,usb-phy-r8a7790";
+ reg = <0 0xe6590100 0 0x100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&mstp7_clks R8A7790_CLK_HSUSB>;
+ clock-names = "usbhs";
+
+ usb-channel@0 {
+ reg = <0>;
+ #phy-cells = <1>;
+ };
+ usb-channel@2 {
+ reg = <2>;
+ #phy-cells = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
new file mode 100644
index 000000000..826454ac4
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/rockchip-usb-phy.txt
@@ -0,0 +1,37 @@
+ROCKCHIP USB2 PHY
+
+Required properties:
+ - compatible: rockchip,rk3288-usb-phy
+ - rockchip,grf : phandle to the syscon managing the "general
+ register files"
+ - #address-cells: should be 1
+ - #size-cells: should be 0
+
+Sub-nodes:
+Each PHY should be represented as a sub-node.
+
+Sub-nodes
+required properties:
+- #phy-cells: should be 0
+- reg: PHY configure reg address offset in GRF
+ "0x320" - for PHY attach to OTG controller
+ "0x334" - for PHY attach to HOST0 controller
+ "0x348" - for PHY attach to HOST1 controller
+
+Optional Properties:
+- clocks : phandle + clock specifier for the phy clocks
+- clock-names: string, clock name, must be "phyclk"
+
+Example:
+
+usbphy: phy {
+ compatible = "rockchip,rk3288-usb-phy";
+ rockchip,grf = <&grf>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ usbphy0: usb-phy0 {
+ #phy-cells = <0>;
+ reg = <0x320>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/phy/samsung-phy.txt b/Documentation/devicetree/bindings/phy/samsung-phy.txt
new file mode 100644
index 000000000..60c6f2a63
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/samsung-phy.txt
@@ -0,0 +1,176 @@
+Samsung S5P/EXYNOS SoC series MIPI CSIS/DSIM DPHY
+-------------------------------------------------
+
+Required properties:
+- compatible : should be "samsung,s5pv210-mipi-video-phy";
+- #phy-cells : from the generic phy bindings, must be 1;
+- syscon - phandle to the PMU system controller;
+
+For "samsung,s5pv210-mipi-video-phy" compatible PHYs the second cell in
+the PHY specifier identifies the PHY and its meaning is as follows:
+ 0 - MIPI CSIS 0,
+ 1 - MIPI DSIM 0,
+ 2 - MIPI CSIS 1,
+ 3 - MIPI DSIM 1.
+
+Samsung EXYNOS SoC series Display Port PHY
+-------------------------------------------------
+
+Required properties:
+- compatible : should be one of the following supported values:
+ - "samsung,exynos5250-dp-video-phy"
+ - "samsung,exynos5420-dp-video-phy"
+- samsung,pmu-syscon: phandle for PMU system controller interface, used to
+ control pmu registers for power isolation.
+- #phy-cells : from the generic PHY bindings, must be 0;
+
+Samsung S5P/EXYNOS SoC series USB PHY
+-------------------------------------------------
+
+Required properties:
+- compatible : should be one of the listed compatibles:
+ - "samsung,exynos3250-usb2-phy"
+ - "samsung,exynos4210-usb2-phy"
+ - "samsung,exynos4x12-usb2-phy"
+ - "samsung,exynos5250-usb2-phy"
+ - "samsung,s5pv210-usb2-phy"
+- reg : a list of registers used by phy driver
+ - first and obligatory is the location of phy modules registers
+- samsung,sysreg-phandle - handle to syscon used to control the system registers
+- samsung,pmureg-phandle - handle to syscon used to control PMU registers
+- #phy-cells : from the generic phy bindings, must be 1;
+- clocks and clock-names:
+ - the "phy" clock is required by the phy module, used as a gate
+ - the "ref" clock is used to get the rate of the clock provided to the
+ PHY module
+
+The first phandle argument in the PHY specifier identifies the PHY, its
+meaning is compatible dependent. For the currently supported SoCs (Exynos 4210
+and Exynos 4212) it is as follows:
+ 0 - USB device ("device"),
+ 1 - USB host ("host"),
+ 2 - HSIC0 ("hsic0"),
+ 3 - HSIC1 ("hsic1"),
+Exynos3250 has only USB device phy available as phy 0.
+
+Exynos 4210 and Exynos 4212 use mode switching and require that mode switch
+register is supplied.
+
+Example:
+
+For Exynos 4412 (compatible with Exynos 4212):
+
+usbphy: phy@125b0000 {
+ compatible = "samsung,exynos4x12-usb2-phy";
+ reg = <0x125b0000 0x100>;
+ clocks = <&clock 305>, <&clock 2>;
+ clock-names = "phy", "ref";
+ status = "okay";
+ #phy-cells = <1>;
+ samsung,sysreg-phandle = <&sys_reg>;
+ samsung,pmureg-phandle = <&pmu_reg>;
+};
+
+Then the PHY can be used in other nodes such as:
+
+phy-consumer@12340000 {
+ phys = <&usbphy 2>;
+ phy-names = "phy";
+};
+
+Refer to DT bindings documentation of particular PHY consumer devices for more
+information about required PHYs and the way of specification.
+
+Samsung SATA PHY Controller
+---------------------------
+
+SATA PHY nodes are defined to describe on-chip SATA Physical layer controllers.
+Each SATA PHY controller should have its own node.
+
+Required properties:
+- compatible : compatible list, contains "samsung,exynos5250-sata-phy"
+- reg : offset and length of the SATA PHY register set;
+- #phy-cells : must be zero
+- clocks : must be exactly one entry
+- clock-names : must be "sata_phyctrl"
+- samsung,exynos-sataphy-i2c-phandle : a phandle to the I2C device, no arguments
+- samsung,syscon-phandle : a phandle to the PMU system controller, no arguments
+
+Example:
+ sata_phy: sata-phy@12170000 {
+ compatible = "samsung,exynos5250-sata-phy";
+ reg = <0x12170000 0x1ff>;
+ clocks = <&clock 287>;
+ clock-names = "sata_phyctrl";
+ #phy-cells = <0>;
+ samsung,exynos-sataphy-i2c-phandle = <&sata_phy_i2c>;
+ samsung,syscon-phandle = <&pmu_syscon>;
+ };
+
+Device-Tree bindings for sataphy i2c client driver
+--------------------------------------------------
+
+Required properties:
+compatible: Should be "samsung,exynos-sataphy-i2c"
+- reg: I2C address of the sataphy i2c device.
+
+Example:
+
+ sata_phy_i2c:sata-phy@38 {
+ compatible = "samsung,exynos-sataphy-i2c";
+ reg = <0x38>;
+ };
+
+Samsung Exynos5 SoC series USB DRD PHY controller
+--------------------------------------------------
+
+Required properties:
+- compatible : Should be set to one of the following supported values:
+ - "samsung,exynos5250-usbdrd-phy" - for exynos5250 SoC,
+ - "samsung,exynos5420-usbdrd-phy" - for exynos5420 SoC.
+ - "samsung,exynos5433-usbdrd-phy" - for exynos5433 SoC.
+ - "samsung,exynos7-usbdrd-phy" - for exynos7 SoC.
+- reg : Register offset and length of USB DRD PHY register set;
+- clocks: Clock IDs array as required by the controller
+- clock-names: names of clocks correseponding to IDs in the clock property;
+ Required clocks:
+ - phy: main PHY clock (same as USB DRD controller i.e. DWC3 IP clock),
+ used for register access.
+ - ref: PHY's reference clock (usually crystal clock), used for
+ PHY operations, associated by phy name. It is used to
+ determine bit values for clock settings register.
+ For Exynos5420 this is given as 'sclk_usbphy30' in CMU.
+ - optional clocks: Exynos5433 & Exynos7 SoC has now following additional
+ gate clocks available:
+ - phy_pipe: for PIPE3 phy
+ - phy_utmi: for UTMI+ phy
+ - itp: for ITP generation
+- samsung,pmu-syscon: phandle for PMU system controller interface, used to
+ control pmu registers for power isolation.
+- #phy-cells : from the generic PHY bindings, must be 1;
+
+For "samsung,exynos5250-usbdrd-phy" and "samsung,exynos5420-usbdrd-phy"
+compatible PHYs, the second cell in the PHY specifier identifies the
+PHY id, which is interpreted as follows:
+ 0 - UTMI+ type phy,
+ 1 - PIPE3 type phy,
+
+Example:
+ usbdrd_phy: usbphy@12100000 {
+ compatible = "samsung,exynos5250-usbdrd-phy";
+ reg = <0x12100000 0x100>;
+ clocks = <&clock 286>, <&clock 1>;
+ clock-names = "phy", "ref";
+ samsung,pmu-syscon = <&pmu_system_controller>;
+ #phy-cells = <1>;
+ };
+
+- aliases: For SoCs like Exynos5420 having multiple USB 3.0 DRD PHY controllers,
+ 'usbdrd_phy' nodes should have numbered alias in the aliases node,
+ in the form of usbdrdphyN, N = 0, 1... (depending on number of
+ controllers).
+Example:
+ aliases {
+ usbdrdphy0 = &usb3_phy0;
+ usbdrdphy1 = &usb3_phy1;
+ };
diff --git a/Documentation/devicetree/bindings/phy/st-spear-miphy.txt b/Documentation/devicetree/bindings/phy/st-spear-miphy.txt
new file mode 100644
index 000000000..2a6bfdcc0
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/st-spear-miphy.txt
@@ -0,0 +1,15 @@
+ST SPEAr miphy DT details
+=========================
+
+ST Microelectronics SPEAr miphy is a phy controller supporting PCIe and SATA.
+
+Required properties:
+- compatible : should be "st,spear1310-miphy" or "st,spear1340-miphy"
+- reg : offset and length of the PHY register set.
+- misc: phandle for the syscon node to access misc registers
+- #phy-cells : from the generic PHY bindings, must be 1.
+ - cell[1]: 0 if phy used for SATA, 1 for PCIe.
+
+Optional properties:
+- phy-id: Instance id of the phy. Only required when there are multiple phys
+ present on a implementation.
diff --git a/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
new file mode 100644
index 000000000..16528b9eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/sun4i-usb-phy.txt
@@ -0,0 +1,37 @@
+Allwinner sun4i USB PHY
+-----------------------
+
+Required properties:
+- compatible : should be one of
+ * allwinner,sun4i-a10-usb-phy
+ * allwinner,sun5i-a13-usb-phy
+ * allwinner,sun6i-a31-usb-phy
+ * allwinner,sun7i-a20-usb-phy
+- reg : a list of offset + length pairs
+- reg-names :
+ * "phy_ctrl"
+ * "pmu1"
+ * "pmu2" for sun4i, sun6i or sun7i
+- #phy-cells : from the generic phy bindings, must be 1
+- clocks : phandle + clock specifier for the phy clocks
+- clock-names :
+ * "usb_phy" for sun4i, sun5i or sun7i
+ * "usb0_phy", "usb1_phy" and "usb2_phy" for sun6i
+- resets : a list of phandle + reset specifier pairs
+- reset-names :
+ * "usb0_reset"
+ * "usb1_reset"
+ * "usb2_reset" for sun4i, sun6i or sun7i
+
+Example:
+ usbphy: phy@0x01c13400 {
+ #phy-cells = <1>;
+ compatible = "allwinner,sun4i-a10-usb-phy";
+ /* phy base regs, phy1 pmu reg, phy2 pmu reg */
+ reg = <0x01c13400 0x10 0x01c14800 0x4 0x01c1c800 0x4>;
+ reg-names = "phy_ctrl", "pmu1", "pmu2";
+ clocks = <&usb_clk 8>;
+ clock-names = "usb_phy";
+ resets = <&usb_clk 1>, <&usb_clk 2>;
+ reset-names = "usb1_reset", "usb2_reset";
+ };
diff --git a/Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt b/Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt
new file mode 100644
index 000000000..1cca85c70
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/sun9i-usb-phy.txt
@@ -0,0 +1,38 @@
+Allwinner sun9i USB PHY
+-----------------------
+
+Required properties:
+- compatible : should be one of
+ * allwinner,sun9i-a80-usb-phy
+- reg : a list of offset + length pairs
+- #phy-cells : from the generic phy bindings, must be 0
+- phy_type : "hsic" for HSIC usage;
+ other values or absence of this property indicates normal USB
+- clocks : phandle + clock specifier for the phy clocks
+- clock-names : depending on the "phy_type" property,
+ * "phy" for normal USB
+ * "hsic_480M", "hsic_12M" for HSIC
+- resets : a list of phandle + reset specifier pairs
+- reset-names : depending on the "phy_type" property,
+ * "phy" for normal USB
+ * "hsic" for HSIC
+
+Optional Properties:
+- phy-supply : from the generic phy bindings, a phandle to a regulator that
+ provides power to VBUS.
+
+It is recommended to list all clocks and resets available.
+The driver will only use those matching the phy_type.
+
+Example:
+ usbphy1: phy@00a01800 {
+ compatible = "allwinner,sun9i-a80-usb-phy";
+ reg = <0x00a01800 0x4>;
+ clocks = <&usb_phy_clk 2>, <&usb_phy_clk 10>,
+ <&usb_phy_clk 3>;
+ clock-names = "hsic_480M", "hsic_12M", "phy";
+ resets = <&usb_phy_clk 18>, <&usb_phy_clk 19>;
+ reset-names = "hsic", "phy";
+ status = "disabled";
+ #phy-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/phy/ti-phy.txt b/Documentation/devicetree/bindings/phy/ti-phy.txt
new file mode 100644
index 000000000..305e3df3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/ti-phy.txt
@@ -0,0 +1,102 @@
+TI PHY: DT DOCUMENTATION FOR PHYs in TI PLATFORMs
+
+OMAP CONTROL PHY
+
+Required properties:
+ - compatible: Should be one of
+ "ti,control-phy-otghs" - if it has otghs_control mailbox register as on OMAP4.
+ "ti,control-phy-usb2" - if it has Power down bit in control_dev_conf register
+ e.g. USB2_PHY on OMAP5.
+ "ti,control-phy-pipe3" - if it has DPLL and individual Rx & Tx power control
+ e.g. USB3 PHY and SATA PHY on OMAP5.
+ "ti,control-phy-pcie" - for pcie to support external clock for pcie and to
+ set PCS delay value.
+ e.g. PCIE PHY in DRA7x
+ "ti,control-phy-usb2-dra7" - if it has power down register like USB2 PHY on
+ DRA7 platform.
+ "ti,control-phy-usb2-am437" - if it has power down register like USB2 PHY on
+ AM437 platform.
+ - reg : register ranges as listed in the reg-names property
+ - reg-names: "otghs_control" for control-phy-otghs
+ "power", "pcie_pcs" and "control_sma" for control-phy-pcie
+ "power" for all other types
+
+omap_control_usb: omap-control-usb@4a002300 {
+ compatible = "ti,control-phy-otghs";
+ reg = <0x4a00233c 0x4>;
+ reg-names = "otghs_control";
+};
+
+OMAP USB2 PHY
+
+Required properties:
+ - compatible: Should be "ti,omap-usb2"
+ - reg : Address and length of the register set for the device.
+ - #phy-cells: determine the number of cells that should be given in the
+ phandle while referencing this phy.
+ - clocks: a list of phandles and clock-specifier pairs, one for each entry in
+ clock-names.
+ - clock-names: should include:
+ * "wkupclk" - wakeup clock.
+ * "refclk" - reference clock (optional).
+
+Optional properties:
+ - ctrl-module : phandle of the control module used by PHY driver to power on
+ the PHY.
+
+This is usually a subnode of ocp2scp to which it is connected.
+
+usb2phy@4a0ad080 {
+ compatible = "ti,omap-usb2";
+ reg = <0x4a0ad080 0x58>;
+ ctrl-module = <&omap_control_usb>;
+ #phy-cells = <0>;
+ clocks = <&usb_phy_cm_clk32k>, <&usb_otg_ss_refclk960m>;
+ clock-names = "wkupclk", "refclk";
+};
+
+TI PIPE3 PHY
+
+Required properties:
+ - compatible: Should be "ti,phy-usb3", "ti,phy-pipe3-sata" or
+ "ti,phy-pipe3-pcie. "ti,omap-usb3" is deprecated.
+ - reg : Address and length of the register set for the device.
+ - reg-names: The names of the register addresses corresponding to the registers
+ filled in "reg".
+ - #phy-cells: determine the number of cells that should be given in the
+ phandle while referencing this phy.
+ - clocks: a list of phandles and clock-specifier pairs, one for each entry in
+ clock-names.
+ - clock-names: should include:
+ * "wkupclk" - wakeup clock.
+ * "sysclk" - system clock.
+ * "refclk" - reference clock.
+ * "dpll_ref" - external dpll ref clk
+ * "dpll_ref_m2" - external dpll ref clk
+ * "phy-div" - divider for apll
+ * "div-clk" - apll clock
+
+Optional properties:
+ - ctrl-module : phandle of the control module used by PHY driver to power on
+ the PHY.
+ - id: If there are multiple instance of the same type, in order to
+ differentiate between each instance "id" can be used (e.g., multi-lane PCIe
+ PHY). If "id" is not provided, it is set to default value of '1'.
+
+This is usually a subnode of ocp2scp to which it is connected.
+
+usb3phy@4a084400 {
+ compatible = "ti,phy-usb3";
+ reg = <0x4a084400 0x80>,
+ <0x4a084800 0x64>,
+ <0x4a084c00 0x40>;
+ reg-names = "phy_rx", "phy_tx", "pll_ctrl";
+ ctrl-module = <&omap_control_usb>;
+ #phy-cells = <0>;
+ clocks = <&usb_phy_cm_clk32k>,
+ <&sys_clkin>,
+ <&usb_otg_ss_refclk960m>;
+ clock-names = "wkupclk",
+ "sysclk",
+ "refclk";
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/abilis,tb10x-iomux.txt b/Documentation/devicetree/bindings/pinctrl/abilis,tb10x-iomux.txt
new file mode 100644
index 000000000..2c1186622
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/abilis,tb10x-iomux.txt
@@ -0,0 +1,80 @@
+Abilis Systems TB10x pin controller
+===================================
+
+Required properties
+-------------------
+
+- compatible: should be "abilis,tb10x-iomux";
+- reg: should contain the physical address and size of the pin controller's
+ register range.
+
+
+Function definitions
+--------------------
+
+Functions are defined (and referenced) by sub-nodes of the pin controller.
+Every sub-node defines exactly one function (implying a set of pins).
+Every function is associated to one named pin group inside the pin controller
+driver and these names are used to associate pin group predefinitions to pin
+controller sub-nodes.
+
+Required function definition subnode properties:
+ - abilis,function: should be set to the name of the function's pin group.
+
+The following pin groups are available:
+ - GPIO ports: gpioa, gpiob, gpioc, gpiod, gpioe, gpiof, gpiog,
+ gpioh, gpioi, gpioj, gpiok, gpiol, gpiom, gpion
+ - Serial TS input ports: mis0, mis1, mis2, mis3, mis4, mis5, mis6, mis7
+ - Parallel TS input ports: mip1, mip3, mip5, mip7
+ - Serial TS output ports: mos0, mos1, mos2, mos3
+ - Parallel TS output port: mop
+ - CI+ port: ciplus
+ - CableCard (Mcard) port: mcard
+ - Smart card ports: stc0, stc1
+ - UART ports: uart0, uart1
+ - SPI ports: spi1, spi3
+ - JTAG: jtag
+
+All other ports of the chip are not multiplexed and thus not managed by this
+driver.
+
+
+GPIO ranges definition
+----------------------
+
+The named pin groups of GPIO ports can be used to define GPIO ranges as
+explained in Documentation/devicetree/bindings/gpio/gpio.txt.
+
+
+Example
+-------
+
+iomux: iomux@FF10601c {
+ compatible = "abilis,tb10x-iomux";
+ reg = <0xFF10601c 0x4>;
+ pctl_gpio_a: pctl-gpio-a {
+ abilis,function = "gpioa";
+ };
+ pctl_uart0: pctl-uart0 {
+ abilis,function = "uart0";
+ };
+};
+uart@FF100000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0xFF100000 0x100>;
+ clock-frequency = <166666666>;
+ interrupts = <25 1>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pctl_uart0>;
+};
+gpioa: gpio@FF140000 {
+ compatible = "abilis,tb10x-gpio";
+ reg = <0xFF140000 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpio = <3>;
+ gpio-ranges = <&iomux 0 0>;
+ gpio-ranges-group-names = "gpioa";
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
new file mode 100644
index 000000000..fdd8046e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sunxi-pinctrl.txt
@@ -0,0 +1,68 @@
+* Allwinner A1X Pin Controller
+
+The pins controlled by sunXi pin controller are organized in banks,
+each bank has 32 pins. Each pin has 7 multiplexing functions, with
+the first two functions being GPIO in and out. The configuration on
+the pins includes drive strength and pull-up.
+
+Required properties:
+- compatible: Should be one of the followings (depending on you SoC):
+ "allwinner,sun4i-a10-pinctrl"
+ "allwinner,sun5i-a10s-pinctrl"
+ "allwinner,sun5i-a13-pinctrl"
+ "allwinner,sun6i-a31-pinctrl"
+ "allwinner,sun6i-a31s-pinctrl"
+ "allwinner,sun6i-a31-r-pinctrl"
+ "allwinner,sun7i-a20-pinctrl"
+ "allwinner,sun8i-a23-pinctrl"
+ "allwinner,sun8i-a23-r-pinctrl"
+- reg: Should contain the register physical address and length for the
+ pin controller.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices.
+
+A pinctrl node should contain at least one subnodes representing the
+pinctrl groups available on the machine. Each subnode will list the
+pins it needs, and how they should be configured, with regard to muxer
+configuration, drive strength and pullups. If one of these options is
+not set, its actual value will be unspecified.
+
+Required subnode-properties:
+
+- allwinner,pins: List of strings containing the pin name.
+- allwinner,function: Function to mux the pins listed above to.
+
+Optional subnode-properties:
+- allwinner,drive: Integer. Represents the current sent to the pin
+ 0: 10 mA
+ 1: 20 mA
+ 2: 30 mA
+ 3: 40 mA
+- allwinner,pull: Integer.
+ 0: No resistor
+ 1: Pull-up resistor
+ 2: Pull-down resistor
+
+Examples:
+
+pinctrl@01c20800 {
+ compatible = "allwinner,sun5i-a13-pinctrl";
+ reg = <0x01c20800 0x400>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ uart1_pins_a: uart1@0 {
+ allwinner,pins = "PE10", "PE11";
+ allwinner,function = "uart1";
+ allwinner,drive = <0>;
+ allwinner,pull = <0>;
+ };
+
+ uart1_pins_b: uart1@1 {
+ allwinner,pins = "PG3", "PG4";
+ allwinner,function = "uart1";
+ allwinner,drive = <0>;
+ allwinner,pull = <0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt
new file mode 100644
index 000000000..b7a93e80a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/atmel,at91-pinctrl.txt
@@ -0,0 +1,150 @@
+* Atmel AT91 Pinmux Controller
+
+The AT91 Pinmux Controller, enables the IC
+to share one PAD to several functional blocks. The sharing is done by
+multiplexing the PAD input/output signals. For each PAD there are up to
+8 muxing options (called periph modes). Since different modules require
+different PAD settings (like pull up, keeper, etc) the contoller controls
+also the PAD settings parameters.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Atmel AT91 pin configuration node is a node of a group of pins which can be
+used for a specific device or function. This node represents both mux and config
+of the pins in that group. The 'pins' selects the function mode(also named pin
+mode) this pin can work on and the 'config' configures various pad settings
+such as pull-up, multi drive, etc.
+
+Required properties for iomux controller:
+- compatible: "atmel,at91rm9200-pinctrl" or "atmel,at91sam9x5-pinctrl"
+ or "atmel,sama5d3-pinctrl"
+- atmel,mux-mask: array of mask (periph per bank) to describe if a pin can be
+ configured in this periph mode. All the periph and bank need to be describe.
+
+How to create such array:
+
+Each column will represent the possible peripheral of the pinctrl
+Each line will represent a pio bank
+
+Take an example on the 9260
+Peripheral: 2 ( A and B)
+Bank: 3 (A, B and C)
+=>
+
+ /* A B */
+ 0xffffffff 0xffc00c3b /* pioA */
+ 0xffffffff 0x7fff3ccf /* pioB */
+ 0xffffffff 0x007fffff /* pioC */
+
+For each peripheral/bank we will descibe in a u32 if a pin can be
+configured in it by putting 1 to the pin bit (1 << pin)
+
+Let's take the pioA on peripheral B
+From the datasheet Table 10-2.
+Peripheral B
+PA0 MCDB0
+PA1 MCCDB
+PA2
+PA3 MCDB3
+PA4 MCDB2
+PA5 MCDB1
+PA6
+PA7
+PA8
+PA9
+PA10 ETX2
+PA11 ETX3
+PA12
+PA13
+PA14
+PA15
+PA16
+PA17
+PA18
+PA19
+PA20
+PA21
+PA22 ETXER
+PA23 ETX2
+PA24 ETX3
+PA25 ERX2
+PA26 ERX3
+PA27 ERXCK
+PA28 ECRS
+PA29 ECOL
+PA30 RXD4
+PA31 TXD4
+
+=> 0xffc00c3b
+
+Required properties for pin configuration node:
+- atmel,pins: 4 integers array, represents a group of pins mux and config
+ setting. The format is atmel,pins = <PIN_BANK PIN_BANK_NUM PERIPH CONFIG>.
+ The PERIPH 0 means gpio, PERIPH 1 is periph A, PERIPH 2 is periph B...
+ PIN_BANK 0 is pioA, PIN_BANK 1 is pioB...
+
+Bits used for CONFIG:
+PULL_UP (1 << 0): indicate this pin needs a pull up.
+MULTIDRIVE (1 << 1): indicate this pin needs to be configured as multi-drive.
+ Multi-drive is equivalent to open-drain type output.
+DEGLITCH (1 << 2): indicate this pin needs deglitch.
+PULL_DOWN (1 << 3): indicate this pin needs a pull down.
+DIS_SCHMIT (1 << 4): indicate this pin needs to the disable schmitt trigger.
+DRIVE_STRENGTH (3 << 5): indicate the drive strength of the pin using the
+ following values:
+ 00 - No change (reset state value kept)
+ 01 - Low
+ 10 - Medium
+ 11 - High
+DEBOUNCE (1 << 16): indicate this pin needs debounce.
+DEBOUNCE_VAL (0x3fff << 17): debounce value.
+
+NOTE:
+Some requirements for using atmel,at91rm9200-pinctrl binding:
+1. We have pin function node defined under at91 controller node to represent
+ what pinmux functions this SoC supports.
+2. The driver can use the function node's name and pin configuration node's
+ name describe the pin function and group hierarchy.
+ For example, Linux at91 pinctrl driver takes the function node's name
+ as the function name and pin configuration node's name as group name to
+ create the map table.
+3. Each pin configuration node should have a phandle, devices can set pins
+ configurations by referring to the phandle of that pin configuration node.
+4. The gpio controller must be describe in the pinctrl simple-bus.
+
+Examples:
+
+pinctrl@fffff400 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ compatible = "atmel,at91rm9200-pinctrl", "simple-bus";
+ reg = <0xfffff400 0x600>;
+
+ atmel,mux-mask = <
+ /* A B */
+ 0xffffffff 0xffc00c3b /* pioA */
+ 0xffffffff 0x7fff3ccf /* pioB */
+ 0xffffffff 0x007fffff /* pioC */
+ >;
+
+ /* shared pinctrl settings */
+ dbgu {
+ pinctrl_dbgu: dbgu-0 {
+ atmel,pins =
+ <1 14 0x1 0x0 /* PB14 periph A */
+ 1 15 0x1 0x1>; /* PB15 periph A with pullup */
+ };
+ };
+};
+
+dbgu: serial@fffff200 {
+ compatible = "atmel,at91sam9260-usart";
+ reg = <0xfffff200 0x200>;
+ interrupts = <1 4 7>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_dbgu>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt
new file mode 100644
index 000000000..4eaae3282
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm11351-pinctrl.txt
@@ -0,0 +1,461 @@
+Broadcom BCM281xx Pin Controller
+
+This is a pin controller for the Broadcom BCM281xx SoC family, which includes
+BCM11130, BCM11140, BCM11351, BCM28145, and BCM28155 SoCs.
+
+=== Pin Controller Node ===
+
+Required Properties:
+
+- compatible: Must be "brcm,bcm11351-pinctrl"
+- reg: Base address of the PAD Controller register block and the size
+ of the block.
+
+For example, the following is the bare minimum node:
+
+ pinctrl@35004800 {
+ compatible = "brcm,bcm11351-pinctrl";
+ reg = <0x35004800 0x430>;
+ };
+
+As a pin controller device, in addition to the required properties, this node
+should also contain the pin configuration nodes that client devices reference,
+if any.
+
+=== Pin Configuration Node ===
+
+Each pin configuration node is a sub-node of the pin controller node and is a
+container of an arbitrary number of subnodes, called pin group nodes in this
+document.
+
+Please refer to the pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the definition of a
+"pin configuration node".
+
+=== Pin Group Node ===
+
+A pin group node specifies the desired pin mux and/or pin configuration for an
+arbitrary number of pins. The name of the pin group node is optional and not
+used.
+
+A pin group node only affects the properties specified in the node, and has no
+effect on any properties that are omitted.
+
+The pin group node accepts a subset of the generic pin config properties. For
+details generic pin config properties, please refer to pinctrl-bindings.txt
+and <include/linux/pinctrl/pinconfig-generic.h>.
+
+Each pin controlled by this pin controller belong to one of three types:
+Standard, I2C, and HDMI. Each type accepts a different set of pin config
+properties. A list of pins and their types is provided below.
+
+Required Properties (applicable to all pins):
+
+- pins: Multiple strings. Specifies the name(s) of one or more pins to
+ be configured by this node.
+
+Optional Properties (for standard pins):
+
+- function: String. Specifies the pin mux selection. Values
+ must be one of: "alt1", "alt2", "alt3", "alt4"
+- input-schmitt-enable: No arguments. Enable schmitt-trigger mode.
+- input-schmitt-disable: No arguments. Disable schmitt-trigger mode.
+- bias-pull-up: No arguments. Pull up on pin.
+- bias-pull-down: No arguments. Pull down on pin.
+- bias-disable: No arguments. Disable pin bias.
+- slew-rate: Integer. Meaning depends on configured pin mux:
+ *_SCL or *_SDA:
+ 0: Standard(100kbps)& Fast(400kbps) mode
+ 1: Highspeed (3.4Mbps) mode
+ IC_DM or IC_DP:
+ 0: normal slew rate
+ 1: fast slew rate
+ Otherwise:
+ 0: fast slew rate
+ 1: normal slew rate
+- input-enable: No arguments. Enable input (does not affect
+ output.)
+- input-disable: No arguments. Disable input (does not affect
+ output.)
+- drive-strength: Integer. Drive strength in mA. Valid values are
+ 2, 4, 6, 8, 10, 12, 14, 16 mA.
+
+Optional Properties (for I2C pins):
+
+- function: String. Specifies the pin mux selection. Values
+ must be one of: "alt1", "alt2", "alt3", "alt4"
+- bias-pull-up: Integer. Pull up strength in Ohm. There are 3
+ pull-up resisitors (1.2k, 1.8k, 2.7k) available
+ in parallel for I2C pins, so the valid values
+ are: 568, 720, 831, 1080, 1200, 1800, 2700 Ohm.
+- bias-disable: No arguments. Disable pin bias.
+- slew-rate: Integer. Meaning depends on configured pin mux:
+ *_SCL or *_SDA:
+ 0: Standard(100kbps)& Fast(400kbps) mode
+ 1: Highspeed (3.4Mbps) mode
+ IC_DM or IC_DP:
+ 0: normal slew rate
+ 1: fast slew rate
+ Otherwise:
+ 0: fast slew rate
+ 1: normal slew rate
+- input-enable: No arguments. Enable input (does not affect
+ output.)
+- input-disable: No arguments. Disable input (does not affect
+ output.)
+
+Optional Properties (for HDMI pins):
+
+- function: String. Specifies the pin mux selection. Values
+ must be one of: "alt1", "alt2", "alt3", "alt4"
+- slew-rate: Integer. Controls slew rate.
+ 0: Standard(100kbps)& Fast(400kbps) mode
+ 1: Highspeed (3.4Mbps) mode
+- input-enable: No arguments. Enable input (does not affect
+ output.)
+- input-disable: No arguments. Disable input (does not affect
+ output.)
+
+Example:
+// pin controller node
+pinctrl@35004800 {
+ compatible = "brcm,bcm11351-pinctrl";
+ reg = <0x35004800 0x430>;
+
+ // pin configuration node
+ dev_a_default: dev_a_active {
+ //group node defining 1 standard pin
+ grp_1 {
+ pins = "std_pin1";
+ function = "alt1";
+ input-schmitt-enable;
+ bias-disable;
+ slew-rate = <1>;
+ drive-strength = <4>;
+ };
+
+ // group node defining 2 I2C pins
+ grp_2 {
+ pins = "i2c_pin1", "i2c_pin2";
+ function = "alt2";
+ bias-pull-up = <720>;
+ input-enable;
+ };
+
+ // group node defining 2 HDMI pins
+ grp_3 {
+ pins = "hdmi_pin1", "hdmi_pin2";
+ function = "alt3";
+ slew-rate = <1>;
+ };
+
+ // other pin group nodes
+ ...
+ };
+
+ // other pin configuration nodes
+ ...
+};
+
+In the example above, "dev_a_active" is a pin configuration node with a number
+of sub-nodes. In the pin group node "grp_1", one pin, "std_pin1", is defined in
+the "pins" property. Thus, the remaining properties in the "grp_1" node applies
+only to this pin, including the following settings:
+ - setting pinmux to "alt1"
+ - enabling schmitt-trigger (hystersis) mode
+ - disabling pin bias
+ - setting the slew-rate to 1
+ - setting the drive strength to 4 mA
+Note that neither "input-enable" nor "input-disable" was specified - the pinctrl
+subsystem will therefore leave this property unchanged from whatever state it
+was in before applying these changes.
+
+The "pins" property in the pin group node "grp_2" specifies two pins -
+"i2c_pin1" and "i2c_pin2"; the remaining properties in this pin group node,
+therefore, applies to both of these pins. The properties include:
+ - setting pinmux to "alt2"
+ - setting pull-up resistance to 720 Ohm (ie. enabling 1.2k and 1.8k resistors
+ in parallel)
+ - enabling both pins' input
+"slew-rate" is not specified in this pin group node, so the slew-rate for these
+pins are left as-is.
+
+Finally, "grp_3" defines two HDMI pins. The following properties are applied to
+both pins:
+ - setting pinmux to "alt3"
+ - setting slew-rate to 1; for HDMI pins, this corresponds to the 3.4 Mbps
+ Highspeed mode
+The input is neither enabled or disabled, and is left untouched.
+
+=== Pin Names and Type ===
+
+The following are valid pin names and their pin types:
+
+ "adcsync", Standard
+ "bat_rm", Standard
+ "bsc1_scl", I2C
+ "bsc1_sda", I2C
+ "bsc2_scl", I2C
+ "bsc2_sda", I2C
+ "classgpwr", Standard
+ "clk_cx8", Standard
+ "clkout_0", Standard
+ "clkout_1", Standard
+ "clkout_2", Standard
+ "clkout_3", Standard
+ "clkreq_in_0", Standard
+ "clkreq_in_1", Standard
+ "cws_sys_req1", Standard
+ "cws_sys_req2", Standard
+ "cws_sys_req3", Standard
+ "digmic1_clk", Standard
+ "digmic1_dq", Standard
+ "digmic2_clk", Standard
+ "digmic2_dq", Standard
+ "gpen13", Standard
+ "gpen14", Standard
+ "gpen15", Standard
+ "gpio00", Standard
+ "gpio01", Standard
+ "gpio02", Standard
+ "gpio03", Standard
+ "gpio04", Standard
+ "gpio05", Standard
+ "gpio06", Standard
+ "gpio07", Standard
+ "gpio08", Standard
+ "gpio09", Standard
+ "gpio10", Standard
+ "gpio11", Standard
+ "gpio12", Standard
+ "gpio13", Standard
+ "gpio14", Standard
+ "gps_pablank", Standard
+ "gps_tmark", Standard
+ "hdmi_scl", HDMI
+ "hdmi_sda", HDMI
+ "ic_dm", Standard
+ "ic_dp", Standard
+ "kp_col_ip_0", Standard
+ "kp_col_ip_1", Standard
+ "kp_col_ip_2", Standard
+ "kp_col_ip_3", Standard
+ "kp_row_op_0", Standard
+ "kp_row_op_1", Standard
+ "kp_row_op_2", Standard
+ "kp_row_op_3", Standard
+ "lcd_b_0", Standard
+ "lcd_b_1", Standard
+ "lcd_b_2", Standard
+ "lcd_b_3", Standard
+ "lcd_b_4", Standard
+ "lcd_b_5", Standard
+ "lcd_b_6", Standard
+ "lcd_b_7", Standard
+ "lcd_g_0", Standard
+ "lcd_g_1", Standard
+ "lcd_g_2", Standard
+ "lcd_g_3", Standard
+ "lcd_g_4", Standard
+ "lcd_g_5", Standard
+ "lcd_g_6", Standard
+ "lcd_g_7", Standard
+ "lcd_hsync", Standard
+ "lcd_oe", Standard
+ "lcd_pclk", Standard
+ "lcd_r_0", Standard
+ "lcd_r_1", Standard
+ "lcd_r_2", Standard
+ "lcd_r_3", Standard
+ "lcd_r_4", Standard
+ "lcd_r_5", Standard
+ "lcd_r_6", Standard
+ "lcd_r_7", Standard
+ "lcd_vsync", Standard
+ "mdmgpio0", Standard
+ "mdmgpio1", Standard
+ "mdmgpio2", Standard
+ "mdmgpio3", Standard
+ "mdmgpio4", Standard
+ "mdmgpio5", Standard
+ "mdmgpio6", Standard
+ "mdmgpio7", Standard
+ "mdmgpio8", Standard
+ "mphi_data_0", Standard
+ "mphi_data_1", Standard
+ "mphi_data_2", Standard
+ "mphi_data_3", Standard
+ "mphi_data_4", Standard
+ "mphi_data_5", Standard
+ "mphi_data_6", Standard
+ "mphi_data_7", Standard
+ "mphi_data_8", Standard
+ "mphi_data_9", Standard
+ "mphi_data_10", Standard
+ "mphi_data_11", Standard
+ "mphi_data_12", Standard
+ "mphi_data_13", Standard
+ "mphi_data_14", Standard
+ "mphi_data_15", Standard
+ "mphi_ha0", Standard
+ "mphi_hat0", Standard
+ "mphi_hat1", Standard
+ "mphi_hce0_n", Standard
+ "mphi_hce1_n", Standard
+ "mphi_hrd_n", Standard
+ "mphi_hwr_n", Standard
+ "mphi_run0", Standard
+ "mphi_run1", Standard
+ "mtx_scan_clk", Standard
+ "mtx_scan_data", Standard
+ "nand_ad_0", Standard
+ "nand_ad_1", Standard
+ "nand_ad_2", Standard
+ "nand_ad_3", Standard
+ "nand_ad_4", Standard
+ "nand_ad_5", Standard
+ "nand_ad_6", Standard
+ "nand_ad_7", Standard
+ "nand_ale", Standard
+ "nand_cen_0", Standard
+ "nand_cen_1", Standard
+ "nand_cle", Standard
+ "nand_oen", Standard
+ "nand_rdy_0", Standard
+ "nand_rdy_1", Standard
+ "nand_wen", Standard
+ "nand_wp", Standard
+ "pc1", Standard
+ "pc2", Standard
+ "pmu_int", Standard
+ "pmu_scl", I2C
+ "pmu_sda", I2C
+ "rfst2g_mtsloten3g", Standard
+ "rgmii_0_rx_ctl", Standard
+ "rgmii_0_rxc", Standard
+ "rgmii_0_rxd_0", Standard
+ "rgmii_0_rxd_1", Standard
+ "rgmii_0_rxd_2", Standard
+ "rgmii_0_rxd_3", Standard
+ "rgmii_0_tx_ctl", Standard
+ "rgmii_0_txc", Standard
+ "rgmii_0_txd_0", Standard
+ "rgmii_0_txd_1", Standard
+ "rgmii_0_txd_2", Standard
+ "rgmii_0_txd_3", Standard
+ "rgmii_1_rx_ctl", Standard
+ "rgmii_1_rxc", Standard
+ "rgmii_1_rxd_0", Standard
+ "rgmii_1_rxd_1", Standard
+ "rgmii_1_rxd_2", Standard
+ "rgmii_1_rxd_3", Standard
+ "rgmii_1_tx_ctl", Standard
+ "rgmii_1_txc", Standard
+ "rgmii_1_txd_0", Standard
+ "rgmii_1_txd_1", Standard
+ "rgmii_1_txd_2", Standard
+ "rgmii_1_txd_3", Standard
+ "rgmii_gpio_0", Standard
+ "rgmii_gpio_1", Standard
+ "rgmii_gpio_2", Standard
+ "rgmii_gpio_3", Standard
+ "rtxdata2g_txdata3g1", Standard
+ "rtxen2g_txdata3g2", Standard
+ "rxdata3g0", Standard
+ "rxdata3g1", Standard
+ "rxdata3g2", Standard
+ "sdio1_clk", Standard
+ "sdio1_cmd", Standard
+ "sdio1_data_0", Standard
+ "sdio1_data_1", Standard
+ "sdio1_data_2", Standard
+ "sdio1_data_3", Standard
+ "sdio4_clk", Standard
+ "sdio4_cmd", Standard
+ "sdio4_data_0", Standard
+ "sdio4_data_1", Standard
+ "sdio4_data_2", Standard
+ "sdio4_data_3", Standard
+ "sim_clk", Standard
+ "sim_data", Standard
+ "sim_det", Standard
+ "sim_resetn", Standard
+ "sim2_clk", Standard
+ "sim2_data", Standard
+ "sim2_det", Standard
+ "sim2_resetn", Standard
+ "sri_c", Standard
+ "sri_d", Standard
+ "sri_e", Standard
+ "ssp_extclk", Standard
+ "ssp0_clk", Standard
+ "ssp0_fs", Standard
+ "ssp0_rxd", Standard
+ "ssp0_txd", Standard
+ "ssp2_clk", Standard
+ "ssp2_fs_0", Standard
+ "ssp2_fs_1", Standard
+ "ssp2_fs_2", Standard
+ "ssp2_fs_3", Standard
+ "ssp2_rxd_0", Standard
+ "ssp2_rxd_1", Standard
+ "ssp2_txd_0", Standard
+ "ssp2_txd_1", Standard
+ "ssp3_clk", Standard
+ "ssp3_fs", Standard
+ "ssp3_rxd", Standard
+ "ssp3_txd", Standard
+ "ssp4_clk", Standard
+ "ssp4_fs", Standard
+ "ssp4_rxd", Standard
+ "ssp4_txd", Standard
+ "ssp5_clk", Standard
+ "ssp5_fs", Standard
+ "ssp5_rxd", Standard
+ "ssp5_txd", Standard
+ "ssp6_clk", Standard
+ "ssp6_fs", Standard
+ "ssp6_rxd", Standard
+ "ssp6_txd", Standard
+ "stat_1", Standard
+ "stat_2", Standard
+ "sysclken", Standard
+ "traceclk", Standard
+ "tracedt00", Standard
+ "tracedt01", Standard
+ "tracedt02", Standard
+ "tracedt03", Standard
+ "tracedt04", Standard
+ "tracedt05", Standard
+ "tracedt06", Standard
+ "tracedt07", Standard
+ "tracedt08", Standard
+ "tracedt09", Standard
+ "tracedt10", Standard
+ "tracedt11", Standard
+ "tracedt12", Standard
+ "tracedt13", Standard
+ "tracedt14", Standard
+ "tracedt15", Standard
+ "txdata3g0", Standard
+ "txpwrind", Standard
+ "uartb1_ucts", Standard
+ "uartb1_urts", Standard
+ "uartb1_urxd", Standard
+ "uartb1_utxd", Standard
+ "uartb2_urxd", Standard
+ "uartb2_utxd", Standard
+ "uartb3_ucts", Standard
+ "uartb3_urts", Standard
+ "uartb3_urxd", Standard
+ "uartb3_utxd", Standard
+ "uartb4_ucts", Standard
+ "uartb4_urts", Standard
+ "uartb4_urxd", Standard
+ "uartb4_utxd", Standard
+ "vc_cam1_scl", I2C
+ "vc_cam1_sda", I2C
+ "vc_cam2_scl", I2C
+ "vc_cam2_sda", I2C
+ "vc_cam3_scl", I2C
+ "vc_cam3_sda", I2C
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt
new file mode 100644
index 000000000..2569866c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,bcm2835-gpio.txt
@@ -0,0 +1,74 @@
+Broadcom BCM2835 GPIO (and pinmux) controller
+
+The BCM2835 GPIO module is a combined GPIO controller, (GPIO) interrupt
+controller, and pinmux/control device.
+
+Required properties:
+- compatible: "brcm,bcm2835-gpio"
+- reg: Should contain the physical address of the GPIO module's registers.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters:
+ - bit 0 specifies polarity (0 for normal, 1 for inverted)
+- interrupts : The interrupt outputs from the controller. One interrupt per
+ individual bank followed by the "all banks" interrupt.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells : Should be 2.
+ The first cell is the GPIO number.
+ The second cell is used to specify flags:
+ bits[3:0] trigger type and level flags:
+ 1 = low-to-high edge triggered.
+ 2 = high-to-low edge triggered.
+ 4 = active high level-sensitive.
+ 8 = active low level-sensitive.
+ Valid combinations are 1, 2, 3, 4, 8.
+
+Please refer to ../gpio/gpio.txt for a general description of GPIO bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Each pin configuration node lists the pin(s) to which it applies, and one or
+more of the mux function to select on those pin(s), and pull-up/down
+configuration. Each subnode only affects those parameters that are explicitly
+listed. In other words, a subnode that lists only a mux function implies no
+information about any pull configuration. Similarly, a subnode that lists only
+a pul parameter implies no information about the mux function.
+
+Required subnode-properties:
+- brcm,pins: An array of cells. Each cell contains the ID of a pin. Valid IDs
+ are the integer GPIO IDs; 0==GPIO0, 1==GPIO1, ... 53==GPIO53.
+
+Optional subnode-properties:
+- brcm,function: Integer, containing the function to mux to the pin(s):
+ 0: GPIO in
+ 1: GPIO out
+ 2: alt5
+ 3: alt4
+ 4: alt0
+ 5: alt1
+ 6: alt2
+ 7: alt3
+- brcm,pull: Integer, representing the pull-down/up to apply to the pin(s):
+ 0: none
+ 1: down
+ 2: up
+
+Each of brcm,function and brcm,pull may contain either a single value which
+will be applied to all pins in brcm,pins, or 1 value for each entry in
+brcm,pins.
+
+Example:
+
+ gpio: gpio {
+ compatible = "brcm,bcm2835-gpio";
+ reg = <0x2200000 0xb4>;
+ interrupts = <2 17>, <2 19>, <2 18>, <2 20>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt
new file mode 100644
index 000000000..6540ca56b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-gpio.txt
@@ -0,0 +1,98 @@
+Broadcom Cygnus GPIO/PINCONF Controller
+
+Required properties:
+
+- compatible:
+ Must be "brcm,cygnus-ccm-gpio", "brcm,cygnus-asiu-gpio", or
+ "brcm,cygnus-crmu-gpio"
+
+- reg:
+ Define the base and range of the I/O address space that contains the Cygnus
+GPIO/PINCONF controller registers
+
+- #gpio-cells:
+ Must be two. The first cell is the GPIO pin number (within the
+controller's pin space) and the second cell is used for the following:
+ bit[0]: polarity (0 for active high and 1 for active low)
+
+- gpio-controller:
+ Specifies that the node is a GPIO controller
+
+Optional properties:
+
+- interrupts:
+ Interrupt ID
+
+- interrupt-controller:
+ Specifies that the node is an interrupt controller
+
+- pinmux:
+ Specifies the phandle to the IOMUX device, where pins can be individually
+muxed to GPIO
+
+Supported generic PINCONF properties in child nodes:
+
+- pins:
+ The list of pins (within the controller's own pin space) that properties
+in the node apply to. Pin names are "gpio-<pin>"
+
+- bias-disable:
+ Disable pin bias
+
+- bias-pull-up:
+ Enable internal pull up resistor
+
+- bias-pull-down:
+ Enable internal pull down resistor
+
+- drive-strength:
+ Valid drive strength values include 2, 4, 6, 8, 10, 12, 14, 16 (mA)
+
+Example:
+ gpio_ccm: gpio@1800a000 {
+ compatible = "brcm,cygnus-ccm-gpio";
+ reg = <0x1800a000 0x50>,
+ <0x0301d164 0x20>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupts = <GIC_SPI 84 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+
+ touch_pins: touch_pins {
+ pwr: pwr {
+ pins = "gpio-0";
+ drive-strength = <16>;
+ };
+
+ event: event {
+ pins = "gpio-1";
+ bias-pull-up;
+ };
+ };
+ };
+
+ gpio_asiu: gpio@180a5000 {
+ compatible = "brcm,cygnus-asiu-gpio";
+ reg = <0x180a5000 0x668>;
+ #gpio-cells = <2>;
+ gpio-controller;
+ interrupts = <GIC_SPI 174 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-controller;
+ };
+
+ /*
+ * Touchscreen that uses the CCM GPIO 0 and 1
+ */
+ tsc {
+ ...
+ ...
+ gpio-pwr = <&gpio_ccm 0 0>;
+ gpio-event = <&gpio_ccm 1 0>;
+ };
+
+ /* Bluetooth that uses the ASIU GPIO 5, with polarity inverted */
+ bluetooth {
+ ...
+ ...
+ bcm,rfkill-bank-sel = <&gpio_asiu 5 1>
+ }
diff --git a/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt
new file mode 100644
index 000000000..3600d5c6c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/brcm,cygnus-pinmux.txt
@@ -0,0 +1,132 @@
+Broadcom Cygnus IOMUX Controller
+
+The Cygnus IOMUX controller supports group based mux configuration. In
+addition, certain pins can be muxed to GPIO function individually.
+
+Required properties:
+
+- compatible:
+ Must be "brcm,cygnus-pinmux"
+
+- reg:
+ Define the base and range of the I/O address space that contains the Cygnus
+IOMUX registers
+
+Properties in subnodes:
+
+- function:
+ The mux function to select
+
+- groups:
+ The list of groups to select with a given function
+
+For more details, refer to
+Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+
+For example:
+
+ pinmux: pinmux@0x0301d0c8 {
+ compatible = "brcm,cygnus-pinmux";
+ reg = <0x0301d0c8 0x1b0>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2s0_default>;
+
+ i2s0_default: i2s0_default {
+ mux {
+ function = "i2s0";
+ groups = "i2s0_0_grp", "i2s0_1_grp";
+ };
+ };
+ };
+
+List of supported functions and groups in Cygnus:
+
+"i2s0": "i2s0_0_grp", "i2s0_1_grp"
+
+"i2s1": "i2s1_0_grp", "i2s1_1_grp"
+
+"i2s2": "i2s2_0_grp", "i2s2_1_grp", "i2s2_2_grp", "i2s2_3_grp", "i2s2_4_grp"
+
+"spdif": "spdif_grp"
+
+"pwm0": "pwm0_grp"
+
+"pwm1": "pwm1_grp"
+
+"pwm2": "pwm2_grp"
+
+"pwm3": "pwm3_grp"
+
+"pwm4": "pwm4_grp"
+
+"pwm5": "pwm5_grp"
+
+"key": "key0_grp", "key1_grp", "key2_grp", "key3_grp", "key4_grp", "key5_grp",
+"key6_grp", "key7_grp", "key8_grp", "key9_grp", "key10_grp", "key11_grp",
+"key12_grp", "key13_grp", "key14_grp", "key15_grp"
+
+"audio_dte": "audio_dte0_grp", "audio_dte1_grp", "audio_dte2_grp", "audio_dte3_grp"
+
+"smart_card0": "smart_card0_grp", "smart_card0_fcb_grp"
+
+"smart_card1": "smart_card1_grp", "smart_card1_fcb_grp"
+
+"spi0": "spi0_grp"
+
+"spi1": "spi1_grp"
+
+"spi2": "spi2_grp"
+
+"spi3": "spi3_grp"
+
+"spi4": "spi4_0_grp", "spi4_1_grp"
+
+"spi5": "spi5_grp"
+
+"sw_led0": "sw_led0_0_grp", "sw_led0_1_grp"
+
+"sw_led1": "sw_led1_grp"
+
+"sw_led2": "sw_led2_0_grp", "sw_led2_1_grp"
+
+"d1w": "d1w_grp"
+
+"lcd": "lcd_grp"
+
+"sram": "sram_0_grp", "sram_1_grp"
+
+"uart0": "uart0_grp"
+
+"uart1": "uart1_grp", "uart1_dte_grp"
+
+"uart2": "uart2_grp"
+
+"uart3": "uart3_grp"
+
+"uart4": "uart4_grp"
+
+"qspi": "qspi_0_grp", "qspi_1_grp"
+
+"nand": "nand_grp"
+
+"sdio0": "sdio0_grp", "sdio0_cd_grp", "sdio0_mmc_grp"
+
+"sdio1": "sdio1_data_0_grp", "sdio1_data_1_grp", "sdio1_cd_grp",
+"sdio1_led_grp", "sdio1_mmc_grp"
+
+"can0": "can0_grp"
+
+"can1": "can1_grp"
+
+"cam": "cam_led_grp", "cam_0_grp", "cam_1_grp"
+
+"bsc1": "bsc1_grp"
+
+"pcie_clkreq": "pcie_clkreq_grp"
+
+"usb0_oc": "usb0_oc_grp"
+
+"usb1_oc": "usb1_oc_grp"
+
+"usb2_oc": "usb2_oc_grp"
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt
new file mode 100644
index 000000000..9fde25f14
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx-pinctrl.txt
@@ -0,0 +1,94 @@
+* Freescale IOMUX Controller (IOMUXC) for i.MX
+
+The IOMUX Controller (IOMUXC), together with the IOMUX, enables the IC
+to share one PAD to several functional blocks. The sharing is done by
+multiplexing the PAD input/output signals. For each PAD there are up to
+8 muxing options (called ALT modes). Since different modules require
+different PAD settings (like pull up, keeper, etc) the IOMUXC controls
+also the PAD settings parameters.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Freescale IMX pin configuration node is a node of a group of pins which can be
+used for a specific device or function. This node represents both mux and config
+of the pins in that group. The 'mux' selects the function mode(also named mux
+mode) this pin can work on and the 'config' configures various pad settings
+such as pull-up, open drain, drive strength, etc.
+
+Required properties for iomux controller:
+- compatible: "fsl,<soc>-iomuxc"
+ Please refer to each fsl,<soc>-pinctrl.txt binding doc for supported SoCs.
+
+Required properties for pin configuration node:
+- fsl,pins: each entry consists of 6 integers and represents the mux and config
+ setting for one pin. The first 5 integers <mux_reg conf_reg input_reg mux_val
+ input_val> are specified using a PIN_FUNC_ID macro, which can be found in
+ imx*-pinfunc.h under device tree source folder. The last integer CONFIG is
+ the pad setting value like pull-up on this pin. And that's why fsl,pins entry
+ looks like <PIN_FUNC_ID CONFIG> in the example below.
+
+Bits used for CONFIG:
+NO_PAD_CTL(1 << 31): indicate this pin does not need config.
+
+SION(1 << 30): Software Input On Field.
+Force the selected mux mode input path no matter of MUX_MODE functionality.
+By default the input path is determined by functionality of the selected
+mux mode (regular).
+
+Other bits are used for PAD setting.
+Please refer to each fsl,<soc>-pinctrl,txt binding doc for SoC specific part
+of bits definitions.
+
+NOTE:
+Some requirements for using fsl,imx-pinctrl binding:
+1. We have pin function node defined under iomux controller node to represent
+ what pinmux functions this SoC supports.
+2. The pin configuration node intends to work on a specific function should
+ to be defined under that specific function node.
+ The function node's name should represent well about what function
+ this group of pins in this pin configuration node are working on.
+3. The driver can use the function node's name and pin configuration node's
+ name describe the pin function and group hierarchy.
+ For example, Linux IMX pinctrl driver takes the function node's name
+ as the function name and pin configuration node's name as group name to
+ create the map table.
+4. Each pin configuration node should have a phandle, devices can set pins
+ configurations by referring to the phandle of that pin configuration node.
+
+Examples:
+usdhc@0219c000 { /* uSDHC4 */
+ non-removable;
+ vmmc-supply = <&reg_3p3v>;
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usdhc4_1>;
+};
+
+iomuxc@020e0000 {
+ compatible = "fsl,imx6q-iomuxc";
+ reg = <0x020e0000 0x4000>;
+
+ /* shared pinctrl settings */
+ usdhc4 {
+ pinctrl_usdhc4_1: usdhc4grp-1 {
+ fsl,pins = <
+ MX6QDL_PAD_SD4_CMD__SD4_CMD 0x17059
+ MX6QDL_PAD_SD4_CLK__SD4_CLK 0x10059
+ MX6QDL_PAD_SD4_DAT0__SD4_DATA0 0x17059
+ MX6QDL_PAD_SD4_DAT1__SD4_DATA1 0x17059
+ MX6QDL_PAD_SD4_DAT2__SD4_DATA2 0x17059
+ MX6QDL_PAD_SD4_DAT3__SD4_DATA3 0x17059
+ MX6QDL_PAD_SD4_DAT4__SD4_DATA4 0x17059
+ MX6QDL_PAD_SD4_DAT5__SD4_DATA5 0x17059
+ MX6QDL_PAD_SD4_DAT6__SD4_DATA6 0x17059
+ MX6QDL_PAD_SD4_DAT7__SD4_DATA7 0x17059
+ >;
+ };
+ ....
+};
+Refer to the IOMUXC controller chapter in imx6q datasheet,
+0x17059 means enable hysteresis, 47KOhm Pull Up, 50Mhz speed,
+80Ohm driver strength and Fast Slew Rate.
+User should refer to each SoC spec to set the correct value.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx25-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx25-pinctrl.txt
new file mode 100644
index 000000000..fd653bde1
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx25-pinctrl.txt
@@ -0,0 +1,23 @@
+* Freescale IMX25 IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+CONFIG bits definition:
+PAD_CTL_HYS (1 << 8)
+PAD_CTL_PKE (1 << 7)
+PAD_CTL_PUE (1 << 6)
+PAD_CTL_PUS_100K_DOWN (0 << 4)
+PAD_CTL_PUS_47K_UP (1 << 4)
+PAD_CTL_PUS_100K_UP (2 << 4)
+PAD_CTL_PUS_22K_UP (3 << 4)
+PAD_CTL_ODE_CMOS (0 << 3)
+PAD_CTL_ODE_OPENDRAIN (1 << 3)
+PAD_CTL_DSE_NOMINAL (0 << 1)
+PAD_CTL_DSE_HIGH (1 << 1)
+PAD_CTL_DSE_MAX (2 << 1)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx25-pinfunc.h in device tree source folder for all available
+imx25 PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx27-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx27-pinctrl.txt
new file mode 100644
index 000000000..d1706ea82
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx27-pinctrl.txt
@@ -0,0 +1,121 @@
+* Freescale IMX27 IOMUX Controller
+
+Required properties:
+- compatible: "fsl,imx27-iomuxc"
+
+The iomuxc driver node should define subnodes containing of pinctrl configuration subnodes.
+
+Required properties for pin configuration node:
+- fsl,pins: three integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN MUX_ID CONFIG>.
+
+ PIN is an integer between 0 and 0xbf. imx27 has 6 ports with 32 configurable
+ configurable pins each. PIN is PORT * 32 + PORT_PIN, PORT_PIN is the pin
+ number on the specific port (between 0 and 31).
+
+ MUX_ID is
+ function + (direction << 2) + (gpio_oconf << 4) + (gpio_iconfa << 8) + (gpio_iconfb << 10)
+
+ function value is used to select the pin function.
+ Possible values:
+ 0 - Primary function
+ 1 - Alternate function
+ 2 - GPIO
+ Registers: GIUS (GPIO In Use), GPR (General Purpose Register)
+
+ direction defines the data direction of the pin.
+ Possible values:
+ 0 - Input
+ 1 - Output
+ Register: DDIR
+
+ gpio_oconf configures the gpio submodule output signal. This does not
+ have any effect unless GPIO function is selected. A/B/C_IN are output
+ signals of function blocks A,B and C. Specific function blocks are
+ described in the reference manual.
+ Possible values:
+ 0 - A_IN
+ 1 - B_IN
+ 2 - C_IN
+ 3 - Data Register
+ Registers: OCR1, OCR2
+
+ gpio_iconfa/b configures the gpio submodule input to functionblocks A and
+ B. GPIO function should be selected if this is configured.
+ Possible values:
+ 0 - GPIO_IN
+ 1 - Interrupt Status Register
+ 2 - Pulldown
+ 3 - Pullup
+ Registers ICONFA1, ICONFA2, ICONFB1 and ICONFB2
+
+ CONFIG can be 0 or 1, meaning Pullup disable/enable.
+
+
+The iomux controller has gpio child nodes which are embedded in the iomux
+control registers. They have to be defined as child nodes of the iomux device
+node. If gpio subnodes are defined "#address-cells", "#size-cells" and "ranges"
+properties for the iomux device node are required.
+
+Example:
+
+iomuxc: iomuxc@10015000 {
+ compatible = "fsl,imx27-iomuxc";
+ reg = <0x10015000 0x600>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gpio1: gpio@10015000 {
+ ...
+ };
+
+ ...
+
+ uart {
+ pinctrl_uart1: uart-1 {
+ fsl,pins = <
+ 0x8c 0x004 0x0 /* UART1_TXD__UART1_TXD */
+ 0x8d 0x000 0x0 /* UART1_RXD__UART1_RXD */
+ 0x8e 0x004 0x0 /* UART1_CTS__UART1_CTS */
+ 0x8f 0x000 0x0 /* UART1_RTS__UART1_RTS */
+ >;
+ };
+
+ ...
+ };
+};
+
+
+For convenience there are macros defined in imx27-pinfunc.h which provide PIN
+and MUX_ID. They are structured as MX27_PAD_<Pad name>__<Signal name>. The names
+are defined in the i.MX27 reference manual.
+
+The above example using macros:
+
+iomuxc: iomuxc@10015000 {
+ compatible = "fsl,imx27-iomuxc";
+ reg = <0x10015000 0x600>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gpio1: gpio@10015000 {
+ ...
+ };
+
+ ...
+
+ uart {
+ pinctrl_uart1: uart-1 {
+ fsl,pins = <
+ MX27_PAD_UART1_TXD__UART1_TXD 0x0
+ MX27_PAD_UART1_RXD__UART1_RXD 0x0
+ MX27_PAD_UART1_CTS__UART1_CTS 0x0
+ MX27_PAD_UART1_RTS__UART1_RTS 0x0
+ >;
+ };
+
+ ...
+ };
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.txt
new file mode 100644
index 000000000..c083dfd25
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx35-pinctrl.txt
@@ -0,0 +1,33 @@
+* Freescale IMX35 IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx35-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is a
+ pin working on a specific function, CONFIG is the pad setting value like
+ pull-up for this pin. Please refer to imx35 datasheet for the valid pad
+ config settings.
+
+CONFIG bits definition:
+PAD_CTL_DRIVE_VOLAGAGE_18 (1 << 13)
+PAD_CTL_DRIVE_VOLAGAGE_33 (0 << 13)
+PAD_CTL_HYS (1 << 8)
+PAD_CTL_PKE (1 << 7)
+PAD_CTL_PUE (1 << 6)
+PAD_CTL_PUS_100K_DOWN (0 << 4)
+PAD_CTL_PUS_47K_UP (1 << 4)
+PAD_CTL_PUS_100K_UP (2 << 4)
+PAD_CTL_PUS_22K_UP (3 << 4)
+PAD_CTL_ODE_CMOS (0 << 3)
+PAD_CTL_ODE_OPENDRAIN (1 << 3)
+PAD_CTL_DSE_NOMINAL (0 << 1)
+PAD_CTL_DSE_HIGH (1 << 1)
+PAD_CTL_DSE_MAX (2 << 1)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx35-pinfunc.h in device tree source folder for all available
+imx35 PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx51-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx51-pinctrl.txt
new file mode 100644
index 000000000..4d1408fcc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx51-pinctrl.txt
@@ -0,0 +1,32 @@
+* Freescale IMX51 IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx51-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is a
+ pin working on a specific function, CONFIG is the pad setting value like
+ pull-up for this pin. Please refer to imx51 datasheet for the valid pad
+ config settings.
+
+CONFIG bits definition:
+PAD_CTL_HVE (1 << 13)
+PAD_CTL_HYS (1 << 8)
+PAD_CTL_PKE (1 << 7)
+PAD_CTL_PUE (1 << 6)
+PAD_CTL_PUS_100K_DOWN (0 << 4)
+PAD_CTL_PUS_47K_UP (1 << 4)
+PAD_CTL_PUS_100K_UP (2 << 4)
+PAD_CTL_PUS_22K_UP (3 << 4)
+PAD_CTL_ODE (1 << 3)
+PAD_CTL_DSE_LOW (0 << 1)
+PAD_CTL_DSE_MED (1 << 1)
+PAD_CTL_DSE_HIGH (2 << 1)
+PAD_CTL_DSE_MAX (3 << 1)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx51-pinfunc.h in device tree source folder for all available
+imx51 PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx53-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx53-pinctrl.txt
new file mode 100644
index 000000000..25dcb77cf
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx53-pinctrl.txt
@@ -0,0 +1,32 @@
+* Freescale IMX53 IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx53-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is a
+ pin working on a specific function, CONFIG is the pad setting value like
+ pull-up for this pin. Please refer to imx53 datasheet for the valid pad
+ config settings.
+
+CONFIG bits definition:
+PAD_CTL_HVE (1 << 13)
+PAD_CTL_HYS (1 << 8)
+PAD_CTL_PKE (1 << 7)
+PAD_CTL_PUE (1 << 6)
+PAD_CTL_PUS_100K_DOWN (0 << 4)
+PAD_CTL_PUS_47K_UP (1 << 4)
+PAD_CTL_PUS_100K_UP (2 << 4)
+PAD_CTL_PUS_22K_UP (3 << 4)
+PAD_CTL_ODE (1 << 3)
+PAD_CTL_DSE_LOW (0 << 1)
+PAD_CTL_DSE_MED (1 << 1)
+PAD_CTL_DSE_HIGH (2 << 1)
+PAD_CTL_DSE_MAX (3 << 1)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx53-pinfunc.h in device tree source folder for all available
+imx53 PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6dl-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6dl-pinctrl.txt
new file mode 100644
index 000000000..0ac5bee87
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6dl-pinctrl.txt
@@ -0,0 +1,38 @@
+* Freescale IMX6 DualLite/Solo IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx6dl-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is a
+ pin working on a specific function, CONFIG is the pad setting value like
+ pull-up for this pin. Please refer to imx6dl datasheet for the valid pad
+ config settings.
+
+CONFIG bits definition:
+PAD_CTL_HYS (1 << 16)
+PAD_CTL_PUS_100K_DOWN (0 << 14)
+PAD_CTL_PUS_47K_UP (1 << 14)
+PAD_CTL_PUS_100K_UP (2 << 14)
+PAD_CTL_PUS_22K_UP (3 << 14)
+PAD_CTL_PUE (1 << 13)
+PAD_CTL_PKE (1 << 12)
+PAD_CTL_ODE (1 << 11)
+PAD_CTL_SPEED_LOW (1 << 6)
+PAD_CTL_SPEED_MED (2 << 6)
+PAD_CTL_SPEED_HIGH (3 << 6)
+PAD_CTL_DSE_DISABLE (0 << 3)
+PAD_CTL_DSE_240ohm (1 << 3)
+PAD_CTL_DSE_120ohm (2 << 3)
+PAD_CTL_DSE_80ohm (3 << 3)
+PAD_CTL_DSE_60ohm (4 << 3)
+PAD_CTL_DSE_48ohm (5 << 3)
+PAD_CTL_DSE_40ohm (6 << 3)
+PAD_CTL_DSE_34ohm (7 << 3)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx6dl-pinfunc.h in device tree source folder for all available
+imx6dl PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt
new file mode 100644
index 000000000..546610cf2
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6q-pinctrl.txt
@@ -0,0 +1,38 @@
+* Freescale IMX6Q IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx6q-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is a
+ pin working on a specific function, CONFIG is the pad setting value like
+ pull-up for this pin. Please refer to imx6q datasheet for the valid pad
+ config settings.
+
+CONFIG bits definition:
+PAD_CTL_HYS (1 << 16)
+PAD_CTL_PUS_100K_DOWN (0 << 14)
+PAD_CTL_PUS_47K_UP (1 << 14)
+PAD_CTL_PUS_100K_UP (2 << 14)
+PAD_CTL_PUS_22K_UP (3 << 14)
+PAD_CTL_PUE (1 << 13)
+PAD_CTL_PKE (1 << 12)
+PAD_CTL_ODE (1 << 11)
+PAD_CTL_SPEED_LOW (1 << 6)
+PAD_CTL_SPEED_MED (2 << 6)
+PAD_CTL_SPEED_HIGH (3 << 6)
+PAD_CTL_DSE_DISABLE (0 << 3)
+PAD_CTL_DSE_240ohm (1 << 3)
+PAD_CTL_DSE_120ohm (2 << 3)
+PAD_CTL_DSE_80ohm (3 << 3)
+PAD_CTL_DSE_60ohm (4 << 3)
+PAD_CTL_DSE_48ohm (5 << 3)
+PAD_CTL_DSE_40ohm (6 << 3)
+PAD_CTL_DSE_34ohm (7 << 3)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx6q-pinfunc.h in device tree source folder for all available
+imx6q PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6sl-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6sl-pinctrl.txt
new file mode 100644
index 000000000..e5f6d1f06
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6sl-pinctrl.txt
@@ -0,0 +1,39 @@
+* Freescale IMX6 SoloLite IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx6sl-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is a
+ pin working on a specific function, CONFIG is the pad setting value like
+ pull-up for this pin. Please refer to imx6sl datasheet for the valid pad
+ config settings.
+
+CONFIG bits definition:
+PAD_CTL_LVE (1 << 22)
+PAD_CTL_HYS (1 << 16)
+PAD_CTL_PUS_100K_DOWN (0 << 14)
+PAD_CTL_PUS_47K_UP (1 << 14)
+PAD_CTL_PUS_100K_UP (2 << 14)
+PAD_CTL_PUS_22K_UP (3 << 14)
+PAD_CTL_PUE (1 << 13)
+PAD_CTL_PKE (1 << 12)
+PAD_CTL_ODE (1 << 11)
+PAD_CTL_SPEED_LOW (1 << 6)
+PAD_CTL_SPEED_MED (2 << 6)
+PAD_CTL_SPEED_HIGH (3 << 6)
+PAD_CTL_DSE_DISABLE (0 << 3)
+PAD_CTL_DSE_240ohm (1 << 3)
+PAD_CTL_DSE_120ohm (2 << 3)
+PAD_CTL_DSE_80ohm (3 << 3)
+PAD_CTL_DSE_60ohm (4 << 3)
+PAD_CTL_DSE_48ohm (5 << 3)
+PAD_CTL_DSE_40ohm (6 << 3)
+PAD_CTL_DSE_34ohm (7 << 3)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
+
+Refer to imx6sl-pinfunc.h in device tree source folder for all available
+imx6sl PIN_FUNC_ID.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,imx6sx-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,imx6sx-pinctrl.txt
new file mode 100644
index 000000000..b1b595220
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,imx6sx-pinctrl.txt
@@ -0,0 +1,36 @@
+* Freescale i.MX6 SoloX IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,imx6sx-iomuxc"
+- fsl,pins: each entry consists of 6 integers and represents the mux and config
+ setting for one pin. The first 5 integers <mux_reg conf_reg input_reg mux_val
+ input_val> are specified using a PIN_FUNC_ID macro, which can be found in
+ imx6sx-pinfunc.h under device tree source folder. The last integer CONFIG is
+ the pad setting value like pull-up on this pin. Please refer to i.MX6 SoloX
+ Reference Manual for detailed CONFIG settings.
+
+CONFIG bits definition:
+PAD_CTL_HYS (1 << 16)
+PAD_CTL_PUS_100K_DOWN (0 << 14)
+PAD_CTL_PUS_47K_UP (1 << 14)
+PAD_CTL_PUS_100K_UP (2 << 14)
+PAD_CTL_PUS_22K_UP (3 << 14)
+PAD_CTL_PUE (1 << 13)
+PAD_CTL_PKE (1 << 12)
+PAD_CTL_ODE (1 << 11)
+PAD_CTL_SPEED_LOW (0 << 6)
+PAD_CTL_SPEED_MED (1 << 6)
+PAD_CTL_SPEED_HIGH (3 << 6)
+PAD_CTL_DSE_DISABLE (0 << 3)
+PAD_CTL_DSE_260ohm (1 << 3)
+PAD_CTL_DSE_130ohm (2 << 3)
+PAD_CTL_DSE_87ohm (3 << 3)
+PAD_CTL_DSE_65ohm (4 << 3)
+PAD_CTL_DSE_52ohm (5 << 3)
+PAD_CTL_DSE_43ohm (6 << 3)
+PAD_CTL_DSE_37ohm (7 << 3)
+PAD_CTL_SRE_FAST (1 << 0)
+PAD_CTL_SRE_SLOW (0 << 0)
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,mxs-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,mxs-pinctrl.txt
new file mode 100644
index 000000000..1e70a8aff
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,mxs-pinctrl.txt
@@ -0,0 +1,127 @@
+* Freescale MXS Pin Controller
+
+The pins controlled by mxs pin controller are organized in banks, each bank
+has 32 pins. Each pin has 4 multiplexing functions, and generally, the 4th
+function is GPIO. The configuration on the pins includes drive strength,
+voltage and pull-up.
+
+Required properties:
+- compatible: "fsl,imx23-pinctrl" or "fsl,imx28-pinctrl"
+- reg: Should contain the register physical address and length for the
+ pin controller.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices.
+
+The node of mxs pin controller acts as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for
+a group of pins, and only affects those parameters that are explicitly listed.
+In other words, a subnode that describes a drive strength parameter implies no
+information about pull-up. For this reason, even seemingly boolean values are
+actually tristates in this binding: unspecified, off, or on. Unspecified is
+represented as an absent property, and off/on are represented as integer
+values 0 and 1.
+
+Those subnodes under mxs pin controller node will fall into two categories.
+One is to set up a group of pins for a function, both mux selection and pin
+configurations, and it's called group node in the binding document. The other
+one is to adjust the pin configuration for some particular pins that need a
+different configuration than what is defined in group node. The binding
+document calls this type of node config node.
+
+On mxs, there is no hardware pin group. The pin group in this binding only
+means a group of pins put together for particular peripheral to work in
+particular function, like SSP0 functioning as mmc0-8bit. That said, the
+group node should include all the pins needed for one function rather than
+having these pins defined in several group nodes. It also means each of
+"pinctrl-*" phandle in client device node should only have one group node
+pointed in there, while the phandle can have multiple config node referenced
+there to adjust configurations for some pins in the group.
+
+Required subnode-properties:
+- fsl,pinmux-ids: An integer array. Each integer in the array specify a pin
+ with given mux function, with bank, pin and mux packed as below.
+
+ [15..12] : bank number
+ [11..4] : pin number
+ [3..0] : mux selection
+
+ This integer with mux selection packed is used as an entity by both group
+ and config nodes to identify a pin. The mux selection in the integer takes
+ effects only on group node, and will get ignored by driver with config node,
+ since config node is only meant to set up pin configurations.
+
+ Valid values for these integers are listed below.
+
+- reg: Should be the index of the group nodes for same function. This property
+ is required only for group nodes, and should not be present in any config
+ nodes.
+
+Optional subnode-properties:
+- fsl,drive-strength: Integer.
+ 0: MXS_DRIVE_4mA
+ 1: MXS_DRIVE_8mA
+ 2: MXS_DRIVE_12mA
+ 3: MXS_DRIVE_16mA
+- fsl,voltage: Integer.
+ 0: MXS_VOLTAGE_LOW - 1.8 V
+ 1: MXS_VOLTAGE_HIGH - 3.3 V
+- fsl,pull-up: Integer.
+ 0: MXS_PULL_DISABLE - Disable the internal pull-up
+ 1: MXS_PULL_ENABLE - Enable the internal pull-up
+
+Note that when enabling the pull-up, the internal pad keeper gets disabled.
+Also, some pins doesn't have a pull up, in that case, setting the fsl,pull-up
+will only disable the internal pad keeper.
+
+Examples:
+
+pinctrl@80018000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx28-pinctrl";
+ reg = <0x80018000 2000>;
+
+ mmc0_8bit_pins_a: mmc0-8bit@0 {
+ reg = <0>;
+ fsl,pinmux-ids = <
+ MX28_PAD_SSP0_DATA0__SSP0_D0
+ MX28_PAD_SSP0_DATA1__SSP0_D1
+ MX28_PAD_SSP0_DATA2__SSP0_D2
+ MX28_PAD_SSP0_DATA3__SSP0_D3
+ MX28_PAD_SSP0_DATA4__SSP0_D4
+ MX28_PAD_SSP0_DATA5__SSP0_D5
+ MX28_PAD_SSP0_DATA6__SSP0_D6
+ MX28_PAD_SSP0_DATA7__SSP0_D7
+ MX28_PAD_SSP0_CMD__SSP0_CMD
+ MX28_PAD_SSP0_DETECT__SSP0_CARD_DETECT
+ MX28_PAD_SSP0_SCK__SSP0_SCK
+ >;
+ fsl,drive-strength = <MXS_DRIVE_4mA>;
+ fsl,voltage = <MXS_VOLTAGE_HIGH>;
+ fsl,pull-up = <MXS_PULL_ENABLE>;
+ };
+
+ mmc_cd_cfg: mmc-cd-cfg {
+ fsl,pinmux-ids = <MX28_PAD_SSP0_DETECT__SSP0_CARD_DETECT>;
+ fsl,pull-up = <MXS_PULL_DISABLE>;
+ };
+
+ mmc_sck_cfg: mmc-sck-cfg {
+ fsl,pinmux-ids = <MX28_PAD_SSP0_SCK__SSP0_SCK>;
+ fsl,drive-strength = <MXS_DRIVE_12mA>;
+ fsl,pull-up = <MXS_PULL_DISABLE>;
+ };
+};
+
+In this example, group node mmc0-8bit defines a group of pins for mxs SSP0
+to function as a 8-bit mmc device, with 8mA, 3.3V and pull-up configurations
+applied on all these pins. And config nodes mmc-cd-cfg and mmc-sck-cfg are
+adjusting the configuration for pins card-detection and clock from what group
+node mmc0-8bit defines. Only the configuration properties to be adjusted need
+to be listed in the config nodes.
+
+Valid values for i.MX28/i.MX23 pinmux-id are defined in
+arch/arm/boot/dts/imx28-pinfunc.h and arch/arm/boot/dts/imx23-pinfunc.h.
+The definitions for the padconfig properties can be found in
+arch/arm/boot/dts/mxs-pinfunc.h.
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,vf610-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/fsl,vf610-pinctrl.txt
new file mode 100644
index 000000000..ddcdeb697
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,vf610-pinctrl.txt
@@ -0,0 +1,41 @@
+Freescale Vybrid VF610 IOMUX Controller
+
+Please refer to fsl,imx-pinctrl.txt in this directory for common binding part
+and usage.
+
+Required properties:
+- compatible: "fsl,vf610-iomuxc"
+- fsl,pins: two integers array, represents a group of pins mux and config
+ setting. The format is fsl,pins = <PIN_FUNC_ID CONFIG>, PIN_FUNC_ID is
+ a pin working on a specific function, CONFIG is the pad setting value
+ such as pull-up, speed, ode for this pin. Please refer to Vybrid VF610
+ datasheet for the valid pad config settings.
+
+CONFIG bits definition:
+PAD_CTL_SPEED_LOW (1 << 12)
+PAD_CTL_SPEED_MED (2 << 12)
+PAD_CTL_SPEED_HIGH (3 << 12)
+PAD_CTL_SRE_FAST (1 << 11)
+PAD_CTL_SRE_SLOW (0 << 11)
+PAD_CTL_ODE (1 << 10)
+PAD_CTL_HYS (1 << 9)
+PAD_CTL_DSE_DISABLE (0 << 6)
+PAD_CTL_DSE_150ohm (1 << 6)
+PAD_CTL_DSE_75ohm (2 << 6)
+PAD_CTL_DSE_50ohm (3 << 6)
+PAD_CTL_DSE_37ohm (4 << 6)
+PAD_CTL_DSE_30ohm (5 << 6)
+PAD_CTL_DSE_25ohm (6 << 6)
+PAD_CTL_DSE_20ohm (7 << 6)
+PAD_CTL_PUS_100K_DOWN (0 << 4)
+PAD_CTL_PUS_47K_UP (1 << 4)
+PAD_CTL_PUS_100K_UP (2 << 4)
+PAD_CTL_PUS_22K_UP (3 << 4)
+PAD_CTL_PKE (1 << 3)
+PAD_CTL_PUE (1 << 2)
+PAD_CTL_OBE_ENABLE (1 << 1)
+PAD_CTL_IBE_ENABLE (1 << 0)
+PAD_CTL_OBE_IBE_ENABLE (3 << 0)
+
+Please refer to vf610-pinfunc.h in device tree source folder
+for all available PIN_FUNC_ID for Vybrid VF610.
diff --git a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt
new file mode 100644
index 000000000..51b943cc9
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pdc-pinctrl.txt
@@ -0,0 +1,127 @@
+ImgTec TZ1090 PDC pin controller
+
+Required properties:
+- compatible: "img,tz1090-pdc-pinctrl"
+- reg: Should contain the register physical address and length of the
+ SOC_GPIO_CONTROL registers in the PDC register region.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+TZ1090-PDC's pin configuration nodes act as a container for an arbitrary number
+of subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function. For this reason, even seemingly boolean
+values are actually tristates in this binding: unspecified, off, or on.
+Unspecified is represented as an absent property, and off/on are represented as
+integer values 0 and 1.
+
+Required subnode-properties:
+- tz1090,pins : An array of strings. Each string contains the name of a pin or
+ group. Valid values for these names are listed below.
+
+Optional subnode-properties:
+- tz1090,function: A string containing the name of the function to mux to the
+ pin or group. Valid values for function names are listed below, including
+ which pingroups can be muxed to them.
+- supported generic pinconfig properties (for further details see
+ Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt):
+ - bias-disable
+ - bias-high-impedance
+ - bias-bus-hold
+ - bias-pull-up
+ - bias-pull-down
+ - input-schmitt-enable
+ - input-schmitt-disable
+ - drive-strength: Integer, control drive strength of pins in mA.
+ 2: 2mA
+ 4: 4mA
+ 8: 8mA
+ 12: 12mA
+ - low-power-enable: Flag, power-on-start weak pull-down for invalid power.
+ - low-power-disable: Flag, power-on-start weak pull-down disabled.
+
+Note that many of these properties are only valid for certain specific pins
+or groups. See the TZ1090 TRM for complete details regarding which groups
+support which functionality. The Linux pinctrl driver may also be a useful
+reference.
+
+Valid values for pin and group names are:
+
+ pins:
+
+ These all support bias-high-impediance, bias-pull-up, bias-pull-down, and
+ bias-bus-hold (which can also be provided to any of the groups below to set
+ it for all gpio pins in that group).
+
+ gpio0, gpio1, sys_wake0, sys_wake1, sys_wake2, ir_data, ext_power.
+
+ mux groups:
+
+ These all support function.
+
+ gpio0
+ pins: gpio0.
+ function: ir_mod_stable_out.
+ gpio1
+ pins: gpio1.
+ function: ir_mod_power_out.
+
+ drive groups:
+
+ These support input-schmitt-enable, input-schmitt-disable,
+ drive-strength, low-power-enable, and low-power-disable.
+
+ pdc
+ pins: gpio0, gpio1, sys_wake0, sys_wake1, sys_wake2, ir_data,
+ ext_power.
+
+Example:
+
+ pinctrl_pdc: pinctrl@02006500 {
+ #gpio-range-cells = <3>;
+ compatible = "img,tz1090-pdc-pinctrl";
+ reg = <0x02006500 0x100>;
+ };
+
+Example board file extracts:
+
+ &pinctrl_pdc {
+ pinctrl-names = "default";
+ pinctrl-0 = <&syswake_default>;
+
+ syswake_default: syswakes {
+ syswake_cfg {
+ tz1090,pins = "sys_wake0",
+ "sys_wake1",
+ "sys_wake2";
+ pull-up;
+ };
+ };
+ irmod_default: irmod {
+ gpio0_cfg {
+ tz1090,pins = "gpio0";
+ tz1090,function = "ir_mod_stable_out";
+ };
+ gpio1_cfg {
+ tz1090,pins = "gpio1";
+ tz1090,function = "ir_mod_power_out";
+ };
+ };
+ };
+
+ ir: ir@02006200 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&irmod_default>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt
new file mode 100644
index 000000000..509faa87a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/img,tz1090-pinctrl.txt
@@ -0,0 +1,227 @@
+ImgTec TZ1090 pin controller
+
+Required properties:
+- compatible: "img,tz1090-pinctrl"
+- reg: Should contain the register physical address and length of the pad
+ configuration registers (CR_PADS_* and CR_IF_CTL0).
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+TZ1090's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function. For this reason, even seemingly boolean
+values are actually tristates in this binding: unspecified, off, or on.
+Unspecified is represented as an absent property, and off/on are represented as
+integer values 0 and 1.
+
+Required subnode-properties:
+- tz1090,pins : An array of strings. Each string contains the name of a pin or
+ group. Valid values for these names are listed below.
+
+Optional subnode-properties:
+- tz1090,function: A string containing the name of the function to mux to the
+ pin or group. Valid values for function names are listed below, including
+ which pingroups can be muxed to them.
+- supported generic pinconfig properties (for further details see
+ Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt):
+ - bias-disable
+ - bias-high-impedance
+ - bias-bus-hold
+ - bias-pull-up
+ - bias-pull-down
+ - input-schmitt-enable
+ - input-schmitt-disable
+ - drive-strength: Integer, control drive strength of pins in mA.
+ 2: 2mA
+ 4: 4mA
+ 8: 8mA
+ 12: 12mA
+
+
+Note that many of these properties are only valid for certain specific pins
+or groups. See the TZ1090 TRM for complete details regarding which groups
+support which functionality. The Linux pinctrl driver may also be a useful
+reference.
+
+Valid values for pin and group names are:
+
+ gpio pins:
+
+ These all support bias-high-impediance, bias-pull-up, bias-pull-down, and
+ bias-bus-hold (which can also be provided to any of the groups below to set
+ it for all pins in that group).
+
+ They also all support the some form of muxing. Any pins which are contained
+ in one of the mux groups (see below) can be muxed only to the functions
+ supported by the mux group. All other pins can be muxed to the "perip"
+ function which enables them with their intended peripheral.
+
+ Different pins in the same mux group cannot be muxed to different functions,
+ however it is possible to mux only a subset of the pins in a mux group to a
+ particular function and leave the remaining pins unmuxed. This is useful if
+ the board connects certain pins in a group to other devices to be controlled
+ by GPIO, and you don't want the usual peripheral to have any control of the
+ pin.
+
+ ant_sel0, ant_sel1, gain0, gain1, gain2, gain3, gain4, gain5, gain6, gain7,
+ i2s_bclk_out, i2s_din, i2s_dout0, i2s_dout1, i2s_dout2, i2s_lrclk_out,
+ i2s_mclk, pa_on, pdm_a, pdm_b, pdm_c, pdm_d, pll_on, rx_hp, rx_on,
+ scb0_sclk, scb0_sdat, scb1_sclk, scb1_sdat, scb2_sclk, scb2_sdat, sdh_cd,
+ sdh_clk_in, sdh_wp, sdio_clk, sdio_cmd, sdio_d0, sdio_d1, sdio_d2, sdio_d3,
+ spi0_cs0, spi0_cs1, spi0_cs2, spi0_din, spi0_dout, spi0_mclk, spi1_cs0,
+ spi1_cs1, spi1_cs2, spi1_din, spi1_dout, spi1_mclk, tft_blank_ls, tft_blue0,
+ tft_blue1, tft_blue2, tft_blue3, tft_blue4, tft_blue5, tft_blue6, tft_blue7,
+ tft_green0, tft_green1, tft_green2, tft_green3, tft_green4, tft_green5,
+ tft_green6, tft_green7, tft_hsync_nr, tft_panelclk, tft_pwrsave, tft_red0,
+ tft_red1, tft_red2, tft_red3, tft_red4, tft_red5, tft_red6, tft_red7,
+ tft_vd12acb, tft_vdden_gd, tft_vsync_ns, tx_on, uart0_cts, uart0_rts,
+ uart0_rxd, uart0_txd, uart1_rxd, uart1_txd.
+
+ bias-high-impediance: supported.
+ bias-pull-up: supported.
+ bias-pull-down: supported.
+ bias-bus-hold: supported.
+ function: perip or those supported by pin's mux group.
+
+ other pins:
+
+ These other pins are part of various pin groups below, but can't be
+ controlled as GPIOs. They do however support bias-high-impediance,
+ bias-pull-up, bias-pull-down, and bias-bus-hold (which can also be provided
+ to any of the groups below to set it for all pins in that group).
+
+ clk_out0, clk_out1, tck, tdi, tdo, tms, trst.
+
+ bias-high-impediance: supported.
+ bias-pull-up: supported.
+ bias-pull-down: supported.
+ bias-bus-hold: supported.
+
+ mux groups:
+
+ These all support function, and some support drive configs.
+
+ afe
+ pins: tx_on, rx_on, pll_on, pa_on, rx_hp, ant_sel0,
+ ant_sel1, gain0, gain1, gain2, gain3, gain4,
+ gain5, gain6, gain7.
+ function: afe, ts_out_0.
+ input-schmitt-enable: supported.
+ input-schmitt-disable: supported.
+ drive-strength: supported.
+ pdm_d
+ pins: pdm_d.
+ function: pdm_dac, usb_vbus.
+ sdh
+ pins: sdh_cd, sdh_wp, sdh_clk_in.
+ function: sdh, sdio.
+ sdio
+ pins: sdio_clk, sdio_cmd, sdio_d0, sdio_d1, sdio_d2,
+ sdio_d3.
+ function: sdio, sdh.
+ spi1_cs2
+ pins: spi1_cs2.
+ function: spi1_cs2, usb_vbus.
+ tft
+ pins: tft_red0, tft_red1, tft_red2, tft_red3,
+ tft_red4, tft_red5, tft_red6, tft_red7,
+ tft_green0, tft_green1, tft_green2, tft_green3,
+ tft_green4, tft_green5, tft_green6, tft_green7,
+ tft_blue0, tft_blue1, tft_blue2, tft_blue3,
+ tft_blue4, tft_blue5, tft_blue6, tft_blue7,
+ tft_vdden_gd, tft_panelclk, tft_blank_ls,
+ tft_vsync_ns, tft_hsync_nr, tft_vd12acb,
+ tft_pwrsave.
+ function: tft, ext_dac, not_iqadc_stb, iqdac_stb, ts_out_1,
+ lcd_trace, phy_ringosc.
+ input-schmitt-enable: supported.
+ input-schmitt-disable: supported.
+ drive-strength: supported.
+
+ drive groups:
+
+ These all support input-schmitt-enable, input-schmitt-disable,
+ and drive-strength.
+
+ jtag
+ pins: tck, trst, tdi, tdo, tms.
+ scb1
+ pins: scb1_sdat, scb1_sclk.
+ scb2
+ pins: scb2_sdat, scb2_sclk.
+ spi0
+ pins: spi0_mclk, spi0_cs0, spi0_cs1, spi0_cs2, spi0_dout, spi0_din.
+ spi1
+ pins: spi1_mclk, spi1_cs0, spi1_cs1, spi1_cs2, spi1_dout, spi1_din.
+ uart
+ pins: uart0_txd, uart0_rxd, uart0_rts, uart0_cts,
+ uart1_txd, uart1_rxd.
+ drive_i2s
+ pins: clk_out1, i2s_din, i2s_dout0, i2s_dout1, i2s_dout2,
+ i2s_lrclk_out, i2s_bclk_out, i2s_mclk.
+ drive_pdm
+ pins: clk_out0, pdm_b, pdm_a.
+ drive_scb0
+ pins: scb0_sclk, scb0_sdat, pdm_d, pdm_c.
+ drive_sdio
+ pins: sdio_clk, sdio_cmd, sdio_d0, sdio_d1, sdio_d2, sdio_d3,
+ sdh_wp, sdh_cd, sdh_clk_in.
+
+ convenience groups:
+
+ These are just convenient groupings of pins and don't support any drive
+ configs.
+
+ uart0
+ pins: uart0_cts, uart0_rts, uart0_rxd, uart0_txd.
+ uart1
+ pins: uart1_rxd, uart1_txd.
+ scb0
+ pins: scb0_sclk, scb0_sdat.
+ i2s
+ pins: i2s_bclk_out, i2s_din, i2s_dout0, i2s_dout1, i2s_dout2,
+ i2s_lrclk_out, i2s_mclk.
+
+Example:
+
+ pinctrl: pinctrl@02005800 {
+ #gpio-range-cells = <3>;
+ compatible = "img,tz1090-pinctrl";
+ reg = <0x02005800 0xe4>;
+ };
+
+Example board file extract:
+
+ &pinctrl {
+ uart0_default: uart0 {
+ uart0_cfg {
+ tz1090,pins = "uart0_rxd",
+ "uart0_txd";
+ tz1090,function = "perip";
+ };
+ };
+ tft_default: tft {
+ tft_cfg {
+ tz1090,pins = "tft";
+ tz1090,function = "tft";
+ };
+ };
+ };
+
+ uart@02004b00 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart0_default>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt
new file mode 100644
index 000000000..ac4da9fe0
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/lantiq,falcon-pinumx.txt
@@ -0,0 +1,83 @@
+Lantiq FALCON pinmux controller
+
+Required properties:
+- compatible: "lantiq,pinctrl-falcon"
+- reg: Should contain the physical address and length of the gpio/pinmux
+ register range
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Lantiq's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those group(s), and two pin configuration parameters:
+pull-up and open-drain
+
+The name of each subnode is not important as long as it is unique; all subnodes
+should be enumerated and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+We support 2 types of nodes.
+
+Definition of mux function groups:
+
+Required subnode-properties:
+- lantiq,groups : An array of strings. Each string contains the name of a group.
+ Valid values for these names are listed below.
+- lantiq,function: A string containing the name of the function to mux to the
+ group. Valid values for function names are listed below.
+
+Valid values for group and function names:
+
+ mux groups:
+ por, ntr, ntr8k, hrst, mdio, bootled, asc0, spi, spi cs0, spi cs1, i2c,
+ jtag, slic, pcm, asc1
+
+ functions:
+ rst, ntr, mdio, led, asc, spi, i2c, jtag, slic, pcm
+
+
+Definition of pin configurations:
+
+Required subnode-properties:
+- lantiq,pins : An array of strings. Each string contains the name of a pin.
+ Valid values for these names are listed below.
+
+Optional subnode-properties:
+- lantiq,pull: Integer, representing the pull-down/up to apply to the pin.
+ 0: none, 1: down
+- lantiq,drive-current: Boolean, enables drive-current
+- lantiq,slew-rate: Boolean, enables slew-rate
+
+Example:
+ pinmux0 {
+ compatible = "lantiq,pinctrl-falcon";
+ pinctrl-names = "default";
+ pinctrl-0 = <&state_default>;
+
+ state_default: pinmux {
+ asc0 {
+ lantiq,groups = "asc0";
+ lantiq,function = "asc";
+ };
+ ntr {
+ lantiq,groups = "ntr8k";
+ lantiq,function = "ntr";
+ };
+ i2c {
+ lantiq,groups = "i2c";
+ lantiq,function = "i2c";
+ };
+ hrst {
+ lantiq,groups = "hrst";
+ lantiq,function = "rst";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt b/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt
new file mode 100644
index 000000000..e89b46775
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/lantiq,xway-pinumx.txt
@@ -0,0 +1,97 @@
+Lantiq XWAY pinmux controller
+
+Required properties:
+- compatible: "lantiq,pinctrl-xway" or "lantiq,pinctrl-xr9"
+- reg: Should contain the physical address and length of the gpio/pinmux
+ register range
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Lantiq's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those group(s), and two pin configuration parameters:
+pull-up and open-drain
+
+The name of each subnode is not important as long as it is unique; all subnodes
+should be enumerated and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+We support 2 types of nodes.
+
+Definition of mux function groups:
+
+Required subnode-properties:
+- lantiq,groups : An array of strings. Each string contains the name of a group.
+ Valid values for these names are listed below.
+- lantiq,function: A string containing the name of the function to mux to the
+ group. Valid values for function names are listed below.
+
+Valid values for group and function names:
+
+ mux groups:
+ exin0, exin1, exin2, jtag, ebu a23, ebu a24, ebu a25, ebu clk, ebu cs1,
+ ebu wait, nand ale, nand cs1, nand cle, spi, spi_cs1, spi_cs2, spi_cs3,
+ spi_cs4, spi_cs5, spi_cs6, asc0, asc0 cts rts, stp, nmi , gpt1, gpt2,
+ gpt3, clkout0, clkout1, clkout2, clkout3, gnt1, gnt2, gnt3, req1, req2,
+ req3
+
+ additional mux groups (XR9 only):
+ mdio, nand rdy, nand rd, exin3, exin4, gnt4, req4
+
+ functions:
+ spi, asc, cgu, jtag, exin, stp, gpt, nmi, pci, ebu, mdio
+
+
+
+Definition of pin configurations:
+
+Required subnode-properties:
+- lantiq,pins : An array of strings. Each string contains the name of a pin.
+ Valid values for these names are listed below.
+
+Optional subnode-properties:
+- lantiq,pull: Integer, representing the pull-down/up to apply to the pin.
+ 0: none, 1: down, 2: up.
+- lantiq,open-drain: Boolean, enables open-drain on the defined pin.
+
+Valid values for XWAY pin names:
+ Pinconf pins can be referenced via the names io0-io31.
+
+Valid values for XR9 pin names:
+ Pinconf pins can be referenced via the names io0-io55.
+
+Example:
+ gpio: pinmux@E100B10 {
+ compatible = "lantiq,pinctrl-xway";
+ pinctrl-names = "default";
+ pinctrl-0 = <&state_default>;
+
+ #gpio-cells = <2>;
+ gpio-controller;
+ reg = <0xE100B10 0xA0>;
+
+ state_default: pinmux {
+ stp {
+ lantiq,groups = "stp";
+ lantiq,function = "stp";
+ };
+ pci {
+ lantiq,groups = "gnt1";
+ lantiq,function = "pci";
+ };
+ conf_out {
+ lantiq,pins = "io4", "io5", "io6"; /* stp */
+ lantiq,open-drain;
+ lantiq,pull = <0>;
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt
new file mode 100644
index 000000000..e357b0208
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-370-pinctrl.txt
@@ -0,0 +1,96 @@
+* Marvell Armada 370 SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,88f6710-pinctrl"
+- reg: register specifier of MPP registers
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+name pins functions
+================================================================================
+mpp0 0 gpio, uart0(rxd)
+mpp1 1 gpo, uart0(txd)
+mpp2 2 gpio, i2c0(sck), uart0(txd)
+mpp3 3 gpio, i2c0(sda), uart0(rxd)
+mpp4 4 gpio, cpu_pd(vdd)
+mpp5 5 gpo, ge0(txclko), uart1(txd), spi1(clk), audio(mclk)
+mpp6 6 gpio, ge0(txd0), sata0(prsnt), tdm(rst), audio(sdo)
+mpp7 7 gpo, ge0(txd1), tdm(tdx), audio(lrclk)
+mpp8 8 gpio, ge0(txd2), uart0(rts), tdm(drx), audio(bclk)
+mpp9 9 gpo, ge0(txd3), uart1(txd), sd0(clk), audio(spdifo)
+mpp10 10 gpio, ge0(txctl), uart0(cts), tdm(fsync), audio(sdi)
+mpp11 11 gpio, ge0(rxd0), uart1(rxd), sd0(cmd), spi0(cs1),
+ sata1(prsnt), spi1(cs1)
+mpp12 12 gpio, ge0(rxd1), i2c1(sda), sd0(d0), spi1(cs0),
+ audio(spdifi)
+mpp13 13 gpio, ge0(rxd2), i2c1(sck), sd0(d1), tdm(pclk),
+ audio(rmclk)
+mpp14 14 gpio, ge0(rxd3), pcie(clkreq0), sd0(d2), spi1(mosi),
+ spi0(cs2)
+mpp15 15 gpio, ge0(rxctl), pcie(clkreq1), sd0(d3), spi1(miso),
+ spi0(cs3)
+mpp16 16 gpio, ge0(rxclk), uart1(rxd), tdm(int), audio(extclk)
+mpp17 17 gpo, ge(mdc)
+mpp18 18 gpio, ge(mdio)
+mpp19 19 gpio, ge0(txclk), ge1(txclkout), tdm(pclk)
+mpp20 20 gpo, ge0(txd4), ge1(txd0)
+mpp21 21 gpo, ge0(txd5), ge1(txd1), uart1(txd)
+mpp22 22 gpo, ge0(txd6), ge1(txd2), uart0(rts)
+mpp23 23 gpo, ge0(txd7), ge1(txd3), spi1(mosi)
+mpp24 24 gpio, ge0(col), ge1(txctl), spi1(cs0)
+mpp25 25 gpio, ge0(rxerr), ge1(rxd0), uart1(rxd)
+mpp26 26 gpio, ge0(crs), ge1(rxd1), spi1(miso)
+mpp27 27 gpio, ge0(rxd4), ge1(rxd2), uart0(cts)
+mpp28 28 gpio, ge0(rxd5), ge1(rxd3)
+mpp29 29 gpio, ge0(rxd6), ge1(rxctl), i2c1(sda)
+mpp30 30 gpio, ge0(rxd7), ge1(rxclk), i2c1(sck)
+mpp31 31 gpio, tclk, ge0(txerr)
+mpp32 32 gpio, spi0(cs0)
+mpp33 33 gpio, dev(bootcs), spi0(cs0)
+mpp34 34 gpo, dev(wen0), spi0(mosi)
+mpp35 35 gpo, dev(oen), spi0(sck)
+mpp36 36 gpo, dev(a1), spi0(miso)
+mpp37 37 gpo, dev(a0), sata0(prsnt)
+mpp38 38 gpio, dev(ready), uart1(cts), uart0(cts)
+mpp39 39 gpo, dev(ad0), audio(spdifo)
+mpp40 40 gpio, dev(ad1), uart1(rts), uart0(rts)
+mpp41 41 gpio, dev(ad2), uart1(rxd)
+mpp42 42 gpo, dev(ad3), uart1(txd)
+mpp43 43 gpo, dev(ad4), audio(bclk)
+mpp44 44 gpo, dev(ad5), audio(mclk)
+mpp45 45 gpo, dev(ad6), audio(lrclk)
+mpp46 46 gpo, dev(ad7), audio(sdo)
+mpp47 47 gpo, dev(ad8), sd0(clk), audio(spdifo)
+mpp48 48 gpio, dev(ad9), uart0(rts), sd0(cmd), sata1(prsnt),
+ spi0(cs1)
+mpp49 49 gpio, dev(ad10), pcie(clkreq1), sd0(d0), spi1(cs0),
+ audio(spdifi)
+mpp50 50 gpio, dev(ad11), uart0(cts), sd0(d1), spi1(miso),
+ audio(rmclk)
+mpp51 51 gpio, dev(ad12), i2c1(sda), sd0(d2), spi1(mosi)
+mpp52 52 gpio, dev(ad13), i2c1(sck), sd0(d3), spi1(sck)
+mpp53 53 gpio, dev(ad14), sd0(clk), tdm(pclk), spi0(cs2),
+ pcie(clkreq1)
+mpp54 54 gpo, dev(ad15), tdm(dtx)
+mpp55 55 gpio, dev(cs1), uart1(txd), tdm(rst), sata1(prsnt),
+ sata0(prsnt)
+mpp56 56 gpio, dev(cs2), uart1(cts), uart0(cts), spi0(cs3),
+ pcie(clkreq0), spi1(cs1)
+mpp57 57 gpio, dev(cs3), uart1(rxd), tdm(fsync), sata0(prsnt),
+ audio(sdo)
+mpp58 58 gpio, dev(cs0), uart1(rts), tdm(int), audio(extclk),
+ uart0(rts)
+mpp59 59 gpo, dev(ale0), uart1(rts), uart0(rts), audio(bclk)
+mpp60 60 gpio, dev(ale1), uart1(rxd), sata0(prsnt), pcie(rst-out),
+ audio(sdi)
+mpp61 61 gpo, dev(wen1), uart1(txd), audio(rclk)
+mpp62 62 gpio, dev(a2), uart1(cts), tdm(drx), pcie(clkreq0),
+ audio(mclk), uart0(cts)
+mpp63 63 gpo, spi0(sck), tclk
+mpp64 64 gpio, spi0(miso), spi0(cs1)
+mpp65 65 gpio, spi0(mosi), spi0(cs2)
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt
new file mode 100644
index 000000000..bedbe42c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-375-pinctrl.txt
@@ -0,0 +1,82 @@
+* Marvell Armada 375 SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,88f6720-pinctrl"
+- reg: register specifier of MPP registers
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+name pins functions
+================================================================================
+mpp0 0 gpio, dev(ad2), spi0(cs1), spi1(cs1)
+mpp1 1 gpio, dev(ad3), spi0(mosi), spi1(mosi)
+mpp2 2 gpio, dev(ad4), ptp(eventreq), led(c0), audio(sdi)
+mpp3 3 gpio, dev(ad5), ptp(triggen), led(p3), audio(mclk)
+mpp4 4 gpio, dev(ad6), spi0(miso), spi1(miso)
+mpp5 5 gpio, dev(ad7), spi0(cs2), spi1(cs2)
+mpp6 6 gpio, dev(ad0), led(p1), audio(rclk)
+mpp7 7 gpio, dev(ad1), ptp(clk), led(p2), audio(extclk)
+mpp8 8 gpio, dev (bootcs), spi0(cs0), spi1(cs0)
+mpp9 9 gpio, spi0(sck), spi1(sck), nand(we)
+mpp10 10 gpio, dram(vttctrl), led(c1), nand(re)
+mpp11 11 gpio, dev(a0), led(c2), audio(sdo)
+mpp12 12 gpio, dev(a1), audio(bclk)
+mpp13 13 gpio, dev(readyn), pcie0(rstoutn), pcie1(rstoutn)
+mpp14 14 gpio, i2c0(sda), uart1(txd)
+mpp15 15 gpio, i2c0(sck), uart1(rxd)
+mpp16 16 gpio, uart0(txd)
+mpp17 17 gpio, uart0(rxd)
+mpp18 18 gpio, tdm(intn)
+mpp19 19 gpio, tdm(rstn)
+mpp20 20 gpio, tdm(pclk)
+mpp21 21 gpio, tdm(fsync)
+mpp22 22 gpio, tdm(drx)
+mpp23 23 gpio, tdm(dtx)
+mpp24 24 gpio, led(p0), ge1(rxd0), sd(cmd), uart0(rts)
+mpp25 25 gpio, led(p2), ge1(rxd1), sd(d0), uart0(cts)
+mpp26 26 gpio, pcie0(clkreq), ge1(rxd2), sd(d2), uart1(rts)
+mpp27 27 gpio, pcie1(clkreq), ge1(rxd3), sd(d1), uart1(cts)
+mpp28 28 gpio, led(p3), ge1(txctl), sd(clk)
+mpp29 29 gpio, pcie1(clkreq), ge1(rxclk), sd(d3)
+mpp30 30 gpio, ge1(txd0), spi1(cs0)
+mpp31 31 gpio, ge1(txd1), spi1(mosi)
+mpp32 32 gpio, ge1(txd2), spi1(sck), ptp(triggen)
+mpp33 33 gpio, ge1(txd3), spi1(miso)
+mpp34 34 gpio, ge1(txclkout), spi1(sck)
+mpp35 35 gpio, ge1(rxctl), spi1(cs1), spi0(cs2)
+mpp36 36 gpio, pcie0(clkreq)
+mpp37 37 gpio, pcie0(clkreq), tdm(intn), ge(mdc)
+mpp38 38 gpio, pcie1(clkreq), ge(mdio)
+mpp39 39 gpio, ref(clkout)
+mpp40 40 gpio, uart1(txd)
+mpp41 41 gpio, uart1(rxd)
+mpp42 42 gpio, spi1(cs2), led(c0)
+mpp43 43 gpio, sata0(prsnt), dram(vttctrl)
+mpp44 44 gpio, sata0(prsnt)
+mpp45 45 gpio, spi0(cs2), pcie0(rstoutn)
+mpp46 46 gpio, led(p0), ge0(txd0), ge1(txd0)
+mpp47 47 gpio, led(p1), ge0(txd1), ge1(txd1)
+mpp48 48 gpio, led(p2), ge0(txd2), ge1(txd2)
+mpp49 49 gpio, led(p3), ge0(txd3), ge1(txd3)
+mpp50 50 gpio, led(c0), ge0(rxd0), ge1(rxd0)
+mpp51 51 gpio, led(c1), ge0(rxd1), ge1(rxd1)
+mpp52 52 gpio, led(c2), ge0(rxd2), ge1(rxd2)
+mpp53 53 gpio, pcie1(rstoutn), ge0(rxd3), ge1(rxd3)
+mpp54 54 gpio, pcie0(rstoutn), ge0(rxctl), ge1(rxctl)
+mpp55 55 gpio, ge0(rxclk), ge1(rxclk)
+mpp56 56 gpio, ge0(txclkout), ge1(txclkout)
+mpp57 57 gpio, ge0(txctl), ge1(txctl)
+mpp58 58 gpio, led(c0)
+mpp59 59 gpio, led(c1)
+mpp60 60 gpio, uart1(txd), led(c2)
+mpp61 61 gpio, i2c1(sda), uart1(rxd), spi1(cs2), led(p0)
+mpp62 62 gpio, i2c1(sck), led(p1)
+mpp63 63 gpio, ptp(triggen), led(p2)
+mpp64 64 gpio, dram(vttctrl), led(p3)
+mpp65 65 gpio, sata1(prsnt)
+mpp66 66 gpio, ptp(eventreq), spi1(cs3)
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt
new file mode 100644
index 000000000..4ac138aaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-38x-pinctrl.txt
@@ -0,0 +1,80 @@
+* Marvell Armada 380/385 SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,88f6810-pinctrl", "marvell,88f6820-pinctrl" or
+ "marvell,88f6828-pinctrl" depending on the specific variant of the
+ SoC being used.
+- reg: register specifier of MPP registers
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+name pins functions
+================================================================================
+mpp0 0 gpio, ua0(rxd)
+mpp1 1 gpio, ua0(txd)
+mpp2 2 gpio, i2c0(sck)
+mpp3 3 gpio, i2c0(sda)
+mpp4 4 gpio, ge(mdc), ua1(txd), ua0(rts)
+mpp5 5 gpio, ge(mdio), ua1(rxd), ua0(cts)
+mpp6 6 gpio, ge0(txclkout), ge0(crs), dev(cs3)
+mpp7 7 gpio, ge0(txd0), dev(ad9)
+mpp8 8 gpio, ge0(txd1), dev(ad10)
+mpp9 9 gpio, ge0(txd2), dev(ad11)
+mpp10 10 gpio, ge0(txd3), dev(ad12)
+mpp11 11 gpio, ge0(txctl), dev(ad13)
+mpp12 12 gpio, ge0(rxd0), pcie0(rstout), spi0(cs1), dev(ad14), pcie3(clkreq)
+mpp13 13 gpio, ge0(rxd1), pcie0(clkreq), pcie1(clkreq) [1], spi0(cs2), dev(ad15), pcie2(clkreq)
+mpp14 14 gpio, ge0(rxd2), ptp(clk), m(vtt_ctrl), spi0(cs3), dev(wen1), pcie3(clkreq)
+mpp15 15 gpio, ge0(rxd3), ge(mdc slave), pcie0(rstout), spi0(mosi)
+mpp16 16 gpio, ge0(rxctl), ge(mdio slave), m(decc_err), spi0(miso), pcie0(clkreq), pcie1(clkreq) [1]
+mpp17 17 gpio, ge0(rxclk), ptp(clk), ua1(rxd), spi0(sck), sata1(prsnt)
+mpp18 18 gpio, ge0(rxerr), ptp(trig_gen), ua1(txd), spi0(cs0)
+mpp19 19 gpio, ge0(col), ptp(event_req), ge0(txerr), sata1(prsnt), ua0(cts)
+mpp20 20 gpio, ge0(txclk), ptp(clk), sata0(prsnt), ua0(rts)
+mpp21 21 gpio, spi0(cs1), ge1(rxd0), sata0(prsnt), sd0(cmd), dev(bootcs)
+mpp22 22 gpio, spi0(mosi), dev(ad0)
+mpp23 23 gpio, spi0(sck), dev(ad2)
+mpp24 24 gpio, spi0(miso), ua0(cts), ua1(rxd), sd0(d4), dev(ready)
+mpp25 25 gpio, spi0(cs0), ua0(rts), ua1(txd), sd0(d5), dev(cs0)
+mpp26 26 gpio, spi0(cs2), i2c1(sck), sd0(d6), dev(cs1)
+mpp27 27 gpio, spi0(cs3), ge1(txclkout), i2c1(sda), sd0(d7), dev(cs2)
+mpp28 28 gpio, ge1(txd0), sd0(clk), dev(ad5)
+mpp29 29 gpio, ge1(txd1), dev(ale0)
+mpp30 30 gpio, ge1(txd2), dev(oen)
+mpp31 31 gpio, ge1(txd3), dev(ale1)
+mpp32 32 gpio, ge1(txctl), dev(wen0)
+mpp33 33 gpio, m(decc_err), dev(ad3)
+mpp34 34 gpio, dev(ad1)
+mpp35 35 gpio, ref(clk_out1), dev(a1)
+mpp36 36 gpio, ptp(trig_gen), dev(a0)
+mpp37 37 gpio, ptp(clk), ge1(rxclk), sd0(d3), dev(ad8)
+mpp38 38 gpio, ptp(event_req), ge1(rxd1), ref(clk_out0), sd0(d0), dev(ad4)
+mpp39 39 gpio, i2c1(sck), ge1(rxd2), ua0(cts), sd0(d1), dev(a2)
+mpp40 40 gpio, i2c1(sda), ge1(rxd3), ua0(rts), sd0(d2), dev(ad6)
+mpp41 41 gpio, ua1(rxd), ge1(rxctl), ua0(cts), spi1(cs3), dev(burst/last)
+mpp42 42 gpio, ua1(txd), ua0(rts), dev(ad7)
+mpp43 43 gpio, pcie0(clkreq), m(vtt_ctrl), m(decc_err), spi1(cs2), dev(clkout)
+mpp44 44 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [3]
+mpp45 45 gpio, ref(clk_out0), pcie0(rstout)
+mpp46 46 gpio, ref(clk_out1), pcie0(rstout)
+mpp47 47 gpio, sata0(prsnt), sata1(prsnt), sata2(prsnt) [2], sata3(prsnt) [2]
+mpp48 48 gpio, sata0(prsnt), m(vtt_ctrl), tdm2c(pclk), audio(mclk), sd0(d4), pcie0(clkreq)
+mpp49 49 gpio, sata2(prsnt) [2], sata3(prsnt) [2], tdm2c(fsync), audio(lrclk), sd0(d5), pcie1(clkreq)
+mpp50 50 gpio, pcie0(rstout), tdm2c(drx), audio(extclk), sd0(cmd)
+mpp51 51 gpio, tdm2c(dtx), audio(sdo), m(decc_err)
+mpp52 52 gpio, pcie0(rstout), tdm2c(intn), audio(sdi), sd0(d6)
+mpp53 53 gpio, sata1(prsnt), sata0(prsnt), tdm2c(rstn), audio(bclk), sd0(d7)
+mpp54 54 gpio, sata0(prsnt), sata1(prsnt), pcie0(rstout), ge0(txerr), sd0(d3)
+mpp55 55 gpio, ua1(cts), ge(mdio), pcie1(clkreq) [1], spi1(cs1), sd0(d0)
+mpp56 56 gpio, ua1(rts), ge(mdc), m(decc_err), spi1(mosi)
+mpp57 57 gpio, spi1(sck), sd0(clk)
+mpp58 58 gpio, pcie1(clkreq) [1], i2c1(sck), pcie2(clkreq), spi1(miso), sd0(d1)
+mpp59 59 gpio, pcie0(rstout), i2c1(sda), spi1(cs0), sd0(d2)
+
+[1]: only available on 88F6820 and 88F6828
+[2]: only available on 88F6828
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-39x-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-39x-pinctrl.txt
new file mode 100644
index 000000000..5b1a9dc00
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-39x-pinctrl.txt
@@ -0,0 +1,78 @@
+* Marvell Armada 39x SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,88f6920-pinctrl", "marvell,88f6928-pinctrl"
+ depending on the specific variant of the SoC being used.
+- reg: register specifier of MPP registers
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+name pins functions
+================================================================================
+mpp0 0 gpio, ua0(rxd)
+mpp1 1 gpio, ua0(txd)
+mpp2 2 gpio, i2c0(sck)
+mpp3 3 gpio, i2c0(sda)
+mpp4 4 gpio, ua1(txd), ua0(rts), smi(mdc)
+mpp5 5 gpio, ua1(rxd), ua0(cts), smi(mdio)
+mpp6 6 gpio, dev(cs3), xsmi(mdio)
+mpp7 7 gpio, dev(ad9), xsmi(mdc)
+mpp8 8 gpio, dev(ad10), ptp(trig)
+mpp9 9 gpio, dev(ad11), ptp(clk)
+mpp10 10 gpio, dev(ad12), ptp(event)
+mpp11 11 gpio, dev(ad13), led(clk)
+mpp12 12 gpio, pcie0(rstout), dev(ad14), led(stb)
+mpp13 13 gpio, dev(ad15), led(data)
+mpp14 14 gpio, m(vtt), dev(wen1), ua1(txd)
+mpp15 15 gpio, pcie0(rstout), spi0(mosi), i2c1(sck)
+mpp16 16 gpio, m(decc), spi0(miso), i2c1(sda)
+mpp17 17 gpio, ua1(rxd), spi0(sck), smi(mdio)
+mpp18 18 gpio, ua1(txd), spi0(cs0), i2c2(sck)
+mpp19 19 gpio, sata1(present) [1], ua0(cts), ua1(rxd), i2c2(sda)
+mpp20 20 gpio, sata0(present) [1], ua0(rts), ua1(txd), smi(mdc)
+mpp21 21 gpio, spi0(cs1), sata0(present) [1], sd(cmd), dev(bootcs), ge(rxd0)
+mpp22 22 gpio, spi0(mosi), dev(ad0)
+mpp23 23 gpio, spi0(sck), dev(ad2)
+mpp24 24 gpio, spi0(miso), ua0(cts), ua1(rxd), sd(d4), dev(readyn)
+mpp25 25 gpio, spi0(cs0), ua0(rts), ua1(txd), sd(d5), dev(cs0)
+mpp26 26 gpio, spi0(cs2), i2c1(sck), sd(d6), dev(cs1)
+mpp27 27 gpio, spi0(cs3), i2c1(sda), sd(d7), dev(cs2), ge(txclkout)
+mpp28 28 gpio, sd(clk), dev(ad5), ge(txd0)
+mpp29 29 gpio, dev(ale0), ge(txd1)
+mpp30 30 gpio, dev(oen), ge(txd2)
+mpp31 31 gpio, dev(ale1), ge(txd3)
+mpp32 32 gpio, dev(wen0), ge(txctl)
+mpp33 33 gpio, m(decc), dev(ad3)
+mpp34 34 gpio, dev(ad1)
+mpp35 35 gpio, ref(clk), dev(a1)
+mpp36 36 gpio, dev(a0)
+mpp37 37 gpio, sd(d3), dev(ad8), ge(rxclk)
+mpp38 38 gpio, ref(clk), sd(d0), dev(ad4), ge(rxd1)
+mpp39 39 gpio, i2c1(sck), ua0(cts), sd(d1), dev(a2), ge(rxd2)
+mpp40 40 gpio, i2c1(sda), ua0(rts), sd(d2), dev(ad6), ge(rxd3)
+mpp41 41 gpio, ua1(rxd), ua0(cts), spi1(cs3), dev(burstn), nd(rbn0), ge(rxctl)
+mpp42 42 gpio, ua1(txd), ua0(rts), dev(ad7)
+mpp43 43 gpio, pcie0(clkreq), m(vtt), m(decc), spi1(cs2), dev(clkout), nd(rbn1)
+mpp44 44 gpio, sata0(present) [1], sata1(present) [1], led(clk)
+mpp45 45 gpio, ref(clk), pcie0(rstout), ua1(rxd)
+mpp46 46 gpio, ref(clk), pcie0(rstout), ua1(txd), led(stb)
+mpp47 47 gpio, sata0(present) [1], sata1(present) [1], led(data)
+mpp48 48 gpio, sata0(present) [1], m(vtt), tdm(pclk) [1], audio(mclk) [1], sd(d4), pcie0(clkreq), ua1(txd)
+mpp49 49 gpio, tdm(fsync) [1], audio(lrclk) [1], sd(d5), ua2(rxd)
+mpp50 50 gpio, pcie0(rstout), tdm(drx) [1], audio(extclk) [1], sd(cmd), ua2(rxd)
+mpp51 51 gpio, tdm(dtx) [1], audio(sdo) [1], m(decc), ua2(txd)
+mpp52 52 gpio, pcie0(rstout), tdm(intn) [1], audio(sdi) [1], sd(d6), i2c3(sck)
+mpp53 53 gpio, sata1(present) [1], sata0(present) [1], tdm(rstn) [1], audio(bclk) [1], sd(d7), i2c3(sda)
+mpp54 54 gpio, sata0(present) [1], sata1(present) [1], pcie0(rstout), sd(d3), ua3(txd)
+mpp55 55 gpio, ua1(cts), spi1(cs1), sd(d0), ua1(rxd), ua3(rxd)
+mpp56 56 gpio, ua1(rts), m(decc), spi1(mosi), ua1(txd)
+mpp57 57 gpio, spi1(sck), sd(clk), ua1(txd)
+mpp58 58 gpio, i2c1(sck), pcie2(clkreq), spi1(miso), sd(d1), ua1(rxd)
+mpp59 59 gpio, pcie0(rstout), i2c1(sda), spi1(cs0), sd(d2)
+
+[1]: only available on 88F6928
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt
new file mode 100644
index 000000000..96e7744ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,armada-xp-pinctrl.txt
@@ -0,0 +1,95 @@
+* Marvell Armada XP SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,mv78230-pinctrl", "marvell,mv78260-pinctrl",
+ "marvell,mv78460-pinctrl"
+- reg: register specifier of MPP registers
+
+This driver supports all Armada XP variants, i.e. mv78230, mv78260, and mv78460.
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+* Marvell Armada XP (all variants)
+
+name pins functions
+================================================================================
+mpp0 0 gpio, ge0(txclko), lcd(d0)
+mpp1 1 gpio, ge0(txd0), lcd(d1)
+mpp2 2 gpio, ge0(txd1), lcd(d2)
+mpp3 3 gpio, ge0(txd2), lcd(d3)
+mpp4 4 gpio, ge0(txd3), lcd(d4)
+mpp5 5 gpio, ge0(txctl), lcd(d5)
+mpp6 6 gpio, ge0(rxd0), lcd(d6)
+mpp7 7 gpio, ge0(rxd1), lcd(d7)
+mpp8 8 gpio, ge0(rxd2), lcd(d8)
+mpp9 9 gpio, ge0(rxd3), lcd(d9)
+mpp10 10 gpio, ge0(rxctl), lcd(d10)
+mpp11 11 gpio, ge0(rxclk), lcd(d11)
+mpp12 12 gpio, ge0(txd4), ge1(txd0), lcd(d12)
+mpp13 13 gpio, ge0(txd5), ge1(txd1), lcd(d13)
+mpp14 14 gpio, ge0(txd6), ge1(txd2), lcd(d15)
+mpp15 15 gpio, ge0(txd7), ge1(txd3), lcd(d16)
+mpp16 16 gpio, ge0(txd7), ge1(txd3), lcd(d16)
+mpp17 17 gpio, ge0(col), ge1(txctl), lcd(d17)
+mpp18 18 gpio, ge0(rxerr), ge1(rxd0), lcd(d18), ptp(trig)
+mpp19 19 gpio, ge0(crs), ge1(rxd1), lcd(d19), ptp(evreq)
+mpp20 20 gpio, ge0(rxd4), ge1(rxd2), lcd(d20), ptp(clk)
+mpp21 21 gpio, ge0(rxd5), ge1(rxd3), lcd(d21), mem(bat)
+mpp22 22 gpio, ge0(rxd6), ge1(rxctl), lcd(d22), sata0(prsnt)
+mpp23 23 gpio, ge0(rxd7), ge1(rxclk), lcd(d23), sata1(prsnt)
+mpp24 24 gpio, lcd(hsync), sata1(prsnt), tdm(rst)
+mpp25 25 gpio, lcd(vsync), sata0(prsnt), tdm(pclk)
+mpp26 26 gpio, lcd(clk), tdm(fsync)
+mpp27 27 gpio, lcd(e), tdm(dtx), ptp(trig)
+mpp28 28 gpio, lcd(pwm), tdm(drx), ptp(evreq)
+mpp29 29 gpio, lcd(ref-clk), tdm(int0), ptp(clk)
+mpp30 30 gpio, tdm(int1), sd0(clk)
+mpp31 31 gpio, tdm(int2), sd0(cmd)
+mpp32 32 gpio, tdm(int3), sd0(d0)
+mpp33 33 gpio, tdm(int4), sd0(d1), mem(bat)
+mpp34 34 gpio, tdm(int5), sd0(d2), sata0(prsnt)
+mpp35 35 gpio, tdm(int6), sd0(d3), sata1(prsnt)
+mpp36 36 gpio, spi(mosi)
+mpp37 37 gpio, spi(miso)
+mpp38 38 gpio, spi(sck)
+mpp39 39 gpio, spi(cs0)
+mpp40 40 gpio, spi(cs1), uart2(cts), lcd(vga-hsync), pcie(clkreq0)
+mpp41 41 gpio, spi(cs2), uart2(rts), lcd(vga-vsync), sata1(prsnt),
+ pcie(clkreq1)
+mpp42 42 gpio, uart2(rxd), uart0(cts), tdm(int7), tdm-1(timer)
+mpp43 43 gpio, uart2(txd), uart0(rts), spi(cs3), pcie(rstout)
+mpp44 44 gpio, uart2(cts), uart3(rxd), spi(cs4), pcie(clkreq2),
+ mem(bat)
+mpp45 45 gpio, uart2(rts), uart3(txd), spi(cs5), sata1(prsnt)
+mpp46 46 gpio, uart3(rts), uart1(rts), spi(cs6), sata0(prsnt)
+mpp47 47 gpio, uart3(cts), uart1(cts), spi(cs7), pcie(clkreq3),
+ ref(clkout)
+mpp48 48 gpio, dev(clkout), dev(burst/last)
+
+* Marvell Armada XP (mv78260 and mv78460 only)
+
+name pins functions
+================================================================================
+mpp49 49 gpio, dev(we3)
+mpp50 50 gpio, dev(we2)
+mpp51 51 gpio, dev(ad16)
+mpp52 52 gpio, dev(ad17)
+mpp53 53 gpio, dev(ad18)
+mpp54 54 gpio, dev(ad19)
+mpp55 55 gpio, dev(ad20)
+mpp56 56 gpio, dev(ad21)
+mpp57 57 gpio, dev(ad22)
+mpp58 58 gpio, dev(ad23)
+mpp59 59 gpio, dev(ad24)
+mpp60 60 gpio, dev(ad25)
+mpp61 61 gpio, dev(ad26)
+mpp62 62 gpio, dev(ad27)
+mpp63 63 gpio, dev(ad28)
+mpp64 64 gpio, dev(ad29)
+mpp65 65 gpio, dev(ad30)
+mpp66 66 gpio, dev(ad31)
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,dove-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,dove-pinctrl.txt
new file mode 100644
index 000000000..cf52477cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,dove-pinctrl.txt
@@ -0,0 +1,90 @@
+* Marvell Dove SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,dove-pinctrl"
+- clocks: (optional) phandle of pdma clock
+- reg: register specifiers of MPP, MPP4, and PMU MPP registers
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+Note: pmu* also allows for Power Management functions listed below
+
+name pins functions
+================================================================================
+mpp0 0 gpio, pmu, uart2(rts), sdio0(cd), lcd0(pwm), pmu*
+mpp1 1 gpio, pmu, uart2(cts), sdio0(wp), lcd1(pwm), pmu*
+mpp2 2 gpio, pmu, uart2(txd), sdio0(buspwr), sata(prsnt),
+ uart1(rts), pmu*
+mpp3 3 gpio, pmu, uart2(rxd), sdio0(ledctrl), sata(act),
+ uart1(cts), lcd-spi(cs1), pmu*
+mpp4 4 gpio, pmu, uart3(rts), sdio1(cd), spi1(miso), pmu*
+mpp5 5 gpio, pmu, uart3(cts), sdio1(wp), spi1(cs), pmu*
+mpp6 6 gpio, pmu, uart3(txd), sdio1(buspwr), spi1(mosi), pmu*
+mpp7 7 gpio, pmu, uart3(rxd), sdio1(ledctrl), spi1(sck), pmu*
+mpp8 8 gpio, pmu, watchdog(rstout), pmu*
+mpp9 9 gpio, pmu, pex1(clkreq), pmu*
+mpp10 10 gpio, pmu, ssp(sclk), pmu*
+mpp11 11 gpio, pmu, sata(prsnt), sata-1(act), sdio0(ledctrl),
+ sdio1(ledctrl), pex0(clkreq), pmu*
+mpp12 12 gpio, pmu, uart2(rts), audio0(extclk), sdio1(cd),
+ sata(act), pmu*
+mpp13 13 gpio, pmu, uart2(cts), audio1(extclk), sdio1(wp),
+ ssp(extclk), pmu*
+mpp14 14 gpio, pmu, uart2(txd), sdio1(buspwr), ssp(rxd), pmu*
+mpp15 15 gpio, pmu, uart2(rxd), sdio1(ledctrl), ssp(sfrm), pmu*
+mpp16 16 gpio, uart3(rts), sdio0(cd), ac97(sdi1), lcd-spi(cs1)
+mpp17 17 gpio, uart3(cts), sdio0(wp), ac97(sdi2), twsi(sda),
+ ac97-1(sysclko)
+mpp18 18 gpio, uart3(txd), sdio0(buspwr), ac97(sdi3), lcd0(pwm)
+mpp19 19 gpio, uart3(rxd), sdio0(ledctrl), twsi(sck)
+mpp20 20 gpio, sdio0(cd), sdio1(cd), spi1(miso), lcd-spi(miso),
+ ac97(sysclko)
+mpp21 21 gpio, sdio0(wp), sdio1(wp), spi1(cs), lcd-spi(cs0),
+ uart1(cts), ssp(sfrm)
+mpp22 22 gpio, sdio0(buspwr), sdio1(buspwr), spi1(mosi),
+ lcd-spi(mosi), uart1(cts), ssp(txd)
+mpp23 23 gpio, sdio0(ledctrl), sdio1(ledctrl), spi1(sck),
+ lcd-spi(sck), ssp(sclk)
+mpp_camera 24-39 gpio, camera
+mpp_sdio0 40-45 gpio, sdio0
+mpp_sdio1 46-51 gpio, sdio1
+mpp_audio1 52-57 gpio, i2s1/spdifo, i2s1, spdifo, twsi, ssp/spdifo, ssp,
+ ssp/twsi
+mpp_spi0 58-61 gpio, spi0
+mpp_uart1 62-63 gpio, uart1
+mpp_nand 64-71 gpo, nand
+audio0 - i2s, ac97
+twsi - none, opt1, opt2, opt3
+
+Power Management functions (pmu*):
+pmu-nc Pin not driven by any PM function
+pmu-low Pin driven low (0)
+pmu-high Pin driven high (1)
+pmic(sdi) Pin is used for PMIC SDI
+cpu-pwr-down Pin is used for CPU_PWRDWN
+standby-pwr-down Pin is used for STBY_PWRDWN
+core-pwr-good Pin is used for CORE_PWR_GOOD (Pins 0-7 only)
+cpu-pwr-good Pin is used for CPU_PWR_GOOD (Pins 8-15 only)
+bat-fault Pin is used for BATTERY_FAULT
+ext0-wakeup Pin is used for EXT0_WU
+ext1-wakeup Pin is used for EXT0_WU
+ext2-wakeup Pin is used for EXT0_WU
+pmu-blink Pin is used for blink function
+
+Notes:
+* group "mpp_audio1" allows the following functions and gpio pins:
+ - gpio : gpio on pins 52-57
+ - i2s1/spdifo : audio1 i2s on pins 52-55 and spdifo on 57, no gpios
+ - i2s1 : audio1 i2s on pins 52-55, gpio on pins 56,57
+ - spdifo : spdifo on pin 57, gpio on pins 52-55
+ - twsi : twsi on pins 56,57, gpio on pins 52-55
+ - ssp/spdifo : ssp on pins 52-55, spdifo on pin 57, no gpios
+ - ssp : ssp on pins 52-55, gpio on pins 56,57
+ - ssp/twsi : ssp on pins 52-55, twsi on pins 56,57, no gpios
+* group "audio0" internally muxes i2s0 or ac97 controller to the dedicated
+ audio0 pins.
+* group "twsi" internally muxes twsi controller to the dedicated or option pins.
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt
new file mode 100644
index 000000000..730444a9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,kirkwood-pinctrl.txt
@@ -0,0 +1,319 @@
+* Marvell Kirkwood SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,88f6180-pinctrl",
+ "marvell,88f6190-pinctrl", "marvell,88f6192-pinctrl",
+ "marvell,88f6281-pinctrl", "marvell,88f6282-pinctrl"
+ "marvell,98dx4122-pinctrl"
+- reg: register specifier of MPP registers
+
+This driver supports all kirkwood variants, i.e. 88f6180, 88f619x, and 88f628x.
+It also support the 88f6281-based variant in the 98dx412x Bobcat SoCs.
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+* Marvell Kirkwood 88f6180
+
+name pins functions
+================================================================================
+mpp0 0 gpio, nand(io2), spi(cs)
+mpp1 1 gpo, nand(io3), spi(mosi)
+mpp2 2 gpo, nand(io4), spi(sck)
+mpp3 3 gpo, nand(io5), spi(miso)
+mpp4 4 gpio, nand(io6), uart0(rxd), ptp(clk)
+mpp5 5 gpo, nand(io7), uart0(txd), ptp(trig)
+mpp6 6 sysrst(out), spi(mosi), ptp(trig)
+mpp7 7 gpo, pex(rsto), spi(cs), ptp(trig)
+mpp8 8 gpio, twsi0(sda), uart0(rts), uart1(rts), ptp(clk),
+ mii(col)
+mpp9 9 gpio, twsi(sck), uart0(cts), uart1(cts), ptp(evreq),
+ mii(crs)
+mpp10 10 gpo, spi(sck), uart0(txd), ptp(trig)
+mpp11 11 gpio, spi(miso), uart0(rxd), ptp(clk), ptp-1(evreq),
+ ptp-2(trig)
+mpp12 12 gpo, sdio(clk)
+mpp13 13 gpio, sdio(cmd), uart1(txd)
+mpp14 14 gpio, sdio(d0), uart1(rxd), mii(col)
+mpp15 15 gpio, sdio(d1), uart0(rts), uart1(txd)
+mpp16 16 gpio, sdio(d2), uart0(cts), uart1(rxd), mii(crs)
+mpp17 17 gpio, sdio(d3)
+mpp18 18 gpo, nand(io0)
+mpp19 19 gpo, nand(io1)
+mpp20 20 gpio, mii(rxerr)
+mpp21 21 gpio, audio(spdifi)
+mpp22 22 gpio, audio(spdifo)
+mpp23 23 gpio, audio(rmclk)
+mpp24 24 gpio, audio(bclk)
+mpp25 25 gpio, audio(sdo)
+mpp26 26 gpio, audio(lrclk)
+mpp27 27 gpio, audio(mclk)
+mpp28 28 gpio, audio(sdi)
+mpp29 29 gpio, audio(extclk)
+
+* Marvell Kirkwood 88f6190
+
+name pins functions
+================================================================================
+mpp0 0 gpio, nand(io2), spi(cs)
+mpp1 1 gpo, nand(io3), spi(mosi)
+mpp2 2 gpo, nand(io4), spi(sck)
+mpp3 3 gpo, nand(io5), spi(miso)
+mpp4 4 gpio, nand(io6), uart0(rxd), ptp(clk)
+mpp5 5 gpo, nand(io7), uart0(txd), ptp(trig), sata0(act)
+mpp6 6 sysrst(out), spi(mosi), ptp(trig)
+mpp7 7 gpo, pex(rsto), spi(cs), ptp(trig)
+mpp8 8 gpio, twsi0(sda), uart0(rts), uart1(rts), ptp(clk),
+ mii(col), mii-1(rxerr)
+mpp9 9 gpio, twsi(sck), uart0(cts), uart1(cts), ptp(evreq),
+ mii(crs), sata0(prsnt)
+mpp10 10 gpo, spi(sck), uart0(txd), ptp(trig)
+mpp11 11 gpio, spi(miso), uart0(rxd), ptp(clk), ptp-1(evreq),
+ ptp-2(trig), sata0(act)
+mpp12 12 gpo, sdio(clk)
+mpp13 13 gpio, sdio(cmd), uart1(txd)
+mpp14 14 gpio, sdio(d0), uart1(rxd), mii(col)
+mpp15 15 gpio, sdio(d1), uart0(rts), uart1(txd), sata0(act)
+mpp16 16 gpio, sdio(d2), uart0(cts), uart1(rxd), mii(crs)
+mpp17 17 gpio, sdio(d3), sata0(prsnt)
+mpp18 18 gpo, nand(io0)
+mpp19 19 gpo, nand(io1)
+mpp20 20 gpio, ge1(txd0)
+mpp21 21 gpio, ge1(txd1), sata0(act)
+mpp22 22 gpio, ge1(txd2)
+mpp23 23 gpio, ge1(txd3), sata0(prsnt)
+mpp24 24 gpio, ge1(rxd0)
+mpp25 25 gpio, ge1(rxd1)
+mpp26 26 gpio, ge1(rxd2)
+mpp27 27 gpio, ge1(rxd3)
+mpp28 28 gpio, ge1(col)
+mpp29 29 gpio, ge1(txclk)
+mpp30 30 gpio, ge1(rxclk)
+mpp31 31 gpio, ge1(rxclk)
+mpp32 32 gpio, ge1(txclko)
+mpp33 33 gpo, ge1(txclk)
+mpp34 34 gpio, ge1(txen)
+mpp35 35 gpio, ge1(rxerr), sata0(act), mii(rxerr)
+
+* Marvell Kirkwood 88f6192
+
+name pins functions
+================================================================================
+mpp0 0 gpio, nand(io2), spi(cs)
+mpp1 1 gpo, nand(io3), spi(mosi)
+mpp2 2 gpo, nand(io4), spi(sck)
+mpp3 3 gpo, nand(io5), spi(miso)
+mpp4 4 gpio, nand(io6), uart0(rxd), ptp(clk), sata1(act)
+mpp5 5 gpo, nand(io7), uart0(txd), ptp(trig), sata0(act)
+mpp6 6 sysrst(out), spi(mosi), ptp(trig)
+mpp7 7 gpo, pex(rsto), spi(cs), ptp(trig)
+mpp8 8 gpio, twsi0(sda), uart0(rts), uart1(rts), ptp(clk),
+ mii(col), mii-1(rxerr), sata1(prsnt)
+mpp9 9 gpio, twsi(sck), uart0(cts), uart1(cts), ptp(evreq),
+ mii(crs), sata0(prsnt)
+mpp10 10 gpo, spi(sck), uart0(txd), ptp(trig), sata1(act)
+mpp11 11 gpio, spi(miso), uart0(rxd), ptp(clk), ptp-1(evreq),
+ ptp-2(trig), sata0(act)
+mpp12 12 gpo, sdio(clk)
+mpp13 13 gpio, sdio(cmd), uart1(txd)
+mpp14 14 gpio, sdio(d0), uart1(rxd), mii(col), sata1(prsnt)
+mpp15 15 gpio, sdio(d1), uart0(rts), uart1(txd), sata0(act)
+mpp16 16 gpio, sdio(d2), uart0(cts), uart1(rxd), mii(crs),
+ sata1(act)
+mpp17 17 gpio, sdio(d3), sata0(prsnt)
+mpp18 18 gpo, nand(io0)
+mpp19 19 gpo, nand(io1)
+mpp20 20 gpio, ge1(txd0), ts(mp0), tdm(tx0ql), audio(spdifi),
+ sata1(act)
+mpp21 21 gpio, ge1(txd1), sata0(act), ts(mp1), tdm(rx0ql),
+ audio(spdifo)
+mpp22 22 gpio, ge1(txd2), ts(mp2), tdm(tx2ql), audio(rmclk),
+ sata1(prsnt)
+mpp23 23 gpio, ge1(txd3), sata0(prsnt), ts(mp3), tdm(rx2ql),
+ audio(bclk)
+mpp24 24 gpio, ge1(rxd0), ts(mp4), tdm(spi-cs0), audio(sdo)
+mpp25 25 gpio, ge1(rxd1), ts(mp5), tdm(spi-sck), audio(lrclk)
+mpp26 26 gpio, ge1(rxd2), ts(mp6), tdm(spi-miso), audio(mclk)
+mpp27 27 gpio, ge1(rxd3), ts(mp7), tdm(spi-mosi), audio(sdi)
+mpp28 28 gpio, ge1(col), ts(mp8), tdm(int), audio(extclk)
+mpp29 29 gpio, ge1(txclk), ts(mp9), tdm(rst)
+mpp30 30 gpio, ge1(rxclk), ts(mp10), tdm(pclk)
+mpp31 31 gpio, ge1(rxclk), ts(mp11), tdm(fs)
+mpp32 32 gpio, ge1(txclko), ts(mp12), tdm(drx)
+mpp33 33 gpo, ge1(txclk), tdm(drx)
+mpp34 34 gpio, ge1(txen), tdm(spi-cs1)
+mpp35 35 gpio, ge1(rxerr), sata0(act), mii(rxerr), tdm(tx0ql)
+
+* Marvell Kirkwood 88f6281
+
+name pins functions
+================================================================================
+mpp0 0 gpio, nand(io2), spi(cs)
+mpp1 1 gpo, nand(io3), spi(mosi)
+mpp2 2 gpo, nand(io4), spi(sck)
+mpp3 3 gpo, nand(io5), spi(miso)
+mpp4 4 gpio, nand(io6), uart0(rxd), ptp(clk), sata1(act)
+mpp5 5 gpo, nand(io7), uart0(txd), ptp(trig), sata0(act)
+mpp6 6 sysrst(out), spi(mosi), ptp(trig)
+mpp7 7 gpo, pex(rsto), spi(cs), ptp(trig)
+mpp8 8 gpio, twsi0(sda), uart0(rts), uart1(rts), ptp(clk),
+ mii(col), mii-1(rxerr), sata1(prsnt)
+mpp9 9 gpio, twsi(sck), uart0(cts), uart1(cts), ptp(evreq),
+ mii(crs), sata0(prsnt)
+mpp10 10 gpo, spi(sck), uart0(txd), ptp(trig), sata1(act)
+mpp11 11 gpio, spi(miso), uart0(rxd), ptp(clk), ptp-1(evreq),
+ ptp-2(trig), sata0(act)
+mpp12 12 gpio, sdio(clk)
+mpp13 13 gpio, sdio(cmd), uart1(txd)
+mpp14 14 gpio, sdio(d0), uart1(rxd), mii(col), sata1(prsnt)
+mpp15 15 gpio, sdio(d1), uart0(rts), uart1(txd), sata0(act)
+mpp16 16 gpio, sdio(d2), uart0(cts), uart1(rxd), mii(crs),
+ sata1(act)
+mpp17 17 gpio, sdio(d3), sata0(prsnt)
+mpp18 18 gpo, nand(io0)
+mpp19 19 gpo, nand(io1)
+mpp20 20 gpio, ge1(txd0), ts(mp0), tdm(tx0ql), audio(spdifi),
+ sata1(act)
+mpp21 21 gpio, ge1(txd1), sata0(act), ts(mp1), tdm(rx0ql),
+ audio(spdifo)
+mpp22 22 gpio, ge1(txd2), ts(mp2), tdm(tx2ql), audio(rmclk),
+ sata1(prsnt)
+mpp23 23 gpio, ge1(txd3), sata0(prsnt), ts(mp3), tdm(rx2ql),
+ audio(bclk)
+mpp24 24 gpio, ge1(rxd0), ts(mp4), tdm(spi-cs0), audio(sdo)
+mpp25 25 gpio, ge1(rxd1), ts(mp5), tdm(spi-sck), audio(lrclk)
+mpp26 26 gpio, ge1(rxd2), ts(mp6), tdm(spi-miso), audio(mclk)
+mpp27 27 gpio, ge1(rxd3), ts(mp7), tdm(spi-mosi), audio(sdi)
+mpp28 28 gpio, ge1(col), ts(mp8), tdm(int), audio(extclk)
+mpp29 29 gpio, ge1(txclk), ts(mp9), tdm(rst)
+mpp30 30 gpio, ge1(rxclk), ts(mp10), tdm(pclk)
+mpp31 31 gpio, ge1(rxclk), ts(mp11), tdm(fs)
+mpp32 32 gpio, ge1(txclko), ts(mp12), tdm(drx)
+mpp33 33 gpo, ge1(txclk), tdm(drx)
+mpp34 34 gpio, ge1(txen), tdm(spi-cs1), sata1(act)
+mpp35 35 gpio, ge1(rxerr), sata0(act), mii(rxerr), tdm(tx0ql)
+mpp36 36 gpio, ts(mp0), tdm(spi-cs1), audio(spdifi)
+mpp37 37 gpio, ts(mp1), tdm(tx2ql), audio(spdifo)
+mpp38 38 gpio, ts(mp2), tdm(rx2ql), audio(rmclk)
+mpp39 39 gpio, ts(mp3), tdm(spi-cs0), audio(bclk)
+mpp40 40 gpio, ts(mp4), tdm(spi-sck), audio(sdo)
+mpp41 41 gpio, ts(mp5), tdm(spi-miso), audio(lrclk)
+mpp42 42 gpio, ts(mp6), tdm(spi-mosi), audio(mclk)
+mpp43 43 gpio, ts(mp7), tdm(int), audio(sdi)
+mpp44 44 gpio, ts(mp8), tdm(rst), audio(extclk)
+mpp45 45 gpio, ts(mp9), tdm(pclk)
+mpp46 46 gpio, ts(mp10), tdm(fs)
+mpp47 47 gpio, ts(mp11), tdm(drx)
+mpp48 48 gpio, ts(mp12), tdm(dtx)
+mpp49 49 gpio, ts(mp9), tdm(rx0ql), ptp(clk)
+
+* Marvell Kirkwood 88f6282
+
+name pins functions
+================================================================================
+mpp0 0 gpio, nand(io2), spi(cs)
+mpp1 1 gpo, nand(io3), spi(mosi)
+mpp2 2 gpo, nand(io4), spi(sck)
+mpp3 3 gpo, nand(io5), spi(miso)
+mpp4 4 gpio, nand(io6), uart0(rxd), sata1(act), lcd(hsync)
+mpp5 5 gpo, nand(io7), uart0(txd), sata0(act), lcd(vsync)
+mpp6 6 sysrst(out), spi(mosi)
+mpp7 7 gpo, spi(cs), lcd(pwm)
+mpp8 8 gpio, twsi0(sda), uart0(rts), uart1(rts), mii(col),
+ mii-1(rxerr), sata1(prsnt)
+mpp9 9 gpio, twsi(sck), uart0(cts), uart1(cts), mii(crs),
+ sata0(prsnt)
+mpp10 10 gpo, spi(sck), uart0(txd), sata1(act)
+mpp11 11 gpio, spi(miso), uart0(rxd), sata0(act)
+mpp12 12 gpo, sdio(clk), audio(spdifo), spi(mosi), twsi(sda)
+mpp13 13 gpio, sdio(cmd), uart1(txd), audio(rmclk), lcd(pwm)
+mpp14 14 gpio, sdio(d0), uart1(rxd), mii(col), sata1(prsnt),
+ audio(spdifi), audio-1(sdi)
+mpp15 15 gpio, sdio(d1), uart0(rts), uart1(txd), sata0(act),
+ spi(cs)
+mpp16 16 gpio, sdio(d2), uart0(cts), uart1(rxd), mii(crs),
+ sata1(act), lcd(extclk)
+mpp17 17 gpio, sdio(d3), sata0(prsnt), sata1(act), twsi1(sck)
+mpp18 18 gpo, nand(io0), pex(clkreq)
+mpp19 19 gpo, nand(io1)
+mpp20 20 gpio, ge1(txd0), ts(mp0), tdm(tx0ql), audio(spdifi),
+ sata1(act), lcd(d0)
+mpp21 21 gpio, ge1(txd1), sata0(act), ts(mp1), tdm(rx0ql),
+ audio(spdifo), lcd(d1)
+mpp22 22 gpio, ge1(txd2), ts(mp2), tdm(tx2ql), audio(rmclk),
+ sata1(prsnt), lcd(d2)
+mpp23 23 gpio, ge1(txd3), sata0(prsnt), ts(mp3), tdm(rx2ql),
+ audio(bclk), lcd(d3)
+mpp24 24 gpio, ge1(rxd0), ts(mp4), tdm(spi-cs0), audio(sdo),
+ lcd(d4)
+mpp25 25 gpio, ge1(rxd1), ts(mp5), tdm(spi-sck), audio(lrclk),
+ lcd(d5)
+mpp26 26 gpio, ge1(rxd2), ts(mp6), tdm(spi-miso), audio(mclk),
+ lcd(d6)
+mpp27 27 gpio, ge1(rxd3), ts(mp7), tdm(spi-mosi), audio(sdi),
+ lcd(d7)
+mpp28 28 gpio, ge1(col), ts(mp8), tdm(int), audio(extclk),
+ lcd(d8)
+mpp29 29 gpio, ge1(txclk), ts(mp9), tdm(rst), lcd(d9)
+mpp30 30 gpio, ge1(rxclk), ts(mp10), tdm(pclk), lcd(d10)
+mpp31 31 gpio, ge1(rxclk), ts(mp11), tdm(fs), lcd(d11)
+mpp32 32 gpio, ge1(txclko), ts(mp12), tdm(drx), lcd(d12)
+mpp33 33 gpo, ge1(txclk), tdm(drx), lcd(d13)
+mpp34 34 gpio, ge1(txen), tdm(spi-cs1), sata1(act), lcd(d14)
+mpp35 35 gpio, ge1(rxerr), sata0(act), mii(rxerr), tdm(tx0ql),
+ lcd(d15)
+mpp36 36 gpio, ts(mp0), tdm(spi-cs1), audio(spdifi), twsi1(sda)
+mpp37 37 gpio, ts(mp1), tdm(tx2ql), audio(spdifo), twsi1(sck)
+mpp38 38 gpio, ts(mp2), tdm(rx2ql), audio(rmclk), lcd(d18)
+mpp39 39 gpio, ts(mp3), tdm(spi-cs0), audio(bclk), lcd(d19)
+mpp40 40 gpio, ts(mp4), tdm(spi-sck), audio(sdo), lcd(d20)
+mpp41 41 gpio, ts(mp5), tdm(spi-miso), audio(lrclk), lcd(d21)
+mpp42 42 gpio, ts(mp6), tdm(spi-mosi), audio(mclk), lcd(d22)
+mpp43 43 gpio, ts(mp7), tdm(int), audio(sdi), lcd(d23)
+mpp44 44 gpio, ts(mp8), tdm(rst), audio(extclk), lcd(clk)
+mpp45 45 gpio, ts(mp9), tdm(pclk), lcd(e)
+mpp46 46 gpio, ts(mp10), tdm(fs), lcd(hsync)
+mpp47 47 gpio, ts(mp11), tdm(drx), lcd(vsync)
+mpp48 48 gpio, ts(mp12), tdm(dtx), lcd(d16)
+mpp49 49 gpo, tdm(rx0ql), pex(clkreq), lcd(d17)
+
+* Marvell Bobcat 98dx4122
+
+name pins functions
+================================================================================
+mpp0 0 gpio, nand(io2), spi(cs)
+mpp1 1 gpo, nand(io3), spi(mosi)
+mpp2 2 gpo, nand(io4), spi(sck)
+mpp3 3 gpo, nand(io5), spi(miso)
+mpp4 4 gpio, nand(io6), uart0(rxd)
+mpp5 5 gpo, nand(io7), uart0(txd)
+mpp6 6 sysrst(out), spi(mosi)
+mpp7 7 gpo, pex(rsto), spi(cs)
+mpp8 8 gpio, twsi0(sda), uart0(rts), uart1(rts)
+mpp9 9 gpio, twsi(sck), uart0(cts), uart1(cts)
+mpp10 10 gpo, spi(sck), uart0(txd)
+mpp11 11 gpio, spi(miso), uart0(rxd)
+mpp13 13 gpio, uart1(txd)
+mpp14 14 gpio, uart1(rxd)
+mpp15 15 gpio, uart0(rts)
+mpp16 16 gpio, uart0(cts)
+mpp18 18 gpo, nand(io0)
+mpp19 19 gpo, nand(io1)
+mpp34 34 gpio
+mpp35 35 gpio
+mpp36 36 gpio
+mpp37 37 gpio
+mpp38 38 gpio
+mpp39 39 gpio
+mpp40 40 gpio
+mpp41 41 gpio
+mpp42 42 gpio
+mpp43 43 gpio
+mpp44 44 gpio
+mpp45 45 gpio
+mpp49 49 gpio
+
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt
new file mode 100644
index 000000000..0c09f4eb2
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,mvebu-pinctrl.txt
@@ -0,0 +1,46 @@
+* Marvell SoC pinctrl core driver for mpp
+
+The pinctrl driver enables Marvell SoCs to configure the multi-purpose pins
+(mpp) to a specific function. For each SoC family there is a SoC specific
+driver using this core driver.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+A Marvell SoC pin configuration node is a node of a group of pins which can
+be used for a specific device or function. Each node requires one or more
+mpp pins or group of pins and a mpp function common to all pins.
+
+Required properties for pinctrl driver:
+- compatible: "marvell,<soc>-pinctrl"
+ Please refer to each marvell,<soc>-pinctrl.txt binding doc for supported SoCs.
+
+Required properties for pin configuration node:
+- marvell,pins: string array of mpp pins or group of pins to be muxed.
+- marvell,function: string representing a function to mux to for all
+ marvell,pins given in this pin configuration node. The function has to be
+ common for all marvell,pins. Please refer to marvell,<soc>-pinctrl.txt for
+ valid pin/pin group names and available function names for each SoC.
+
+Examples:
+
+uart1: serial@12100 {
+ compatible = "ns16550a";
+ reg = <0x12100 0x100>;
+ reg-shift = <2>;
+ interrupts = <7>;
+
+ pinctrl-0 = <&pmx_uart1_sw>;
+ pinctrl-names = "default";
+};
+
+pinctrl: pinctrl@d0200 {
+ compatible = "marvell,dove-pinctrl";
+ reg = <0xd0200 0x14>, <0xd0440 0x04>, <0xd802c 0x08>;
+
+ pmx_uart1_sw: pmx-uart1-sw {
+ marvell,pins = "mpp_uart1";
+ marvell,function = "uart1";
+ };
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/marvell,orion-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/marvell,orion-pinctrl.txt
new file mode 100644
index 000000000..27570a3a1
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/marvell,orion-pinctrl.txt
@@ -0,0 +1,91 @@
+* Marvell Orion SoC pinctrl driver for mpp
+
+Please refer to marvell,mvebu-pinctrl.txt in this directory for common binding
+part and usage.
+
+Required properties:
+- compatible: "marvell,88f5181l-pinctrl", "marvell,88f5182-pinctrl",
+ "marvell,88f5281-pinctrl"
+
+- reg: two register areas, the first one describing the first two
+ contiguous MPP registers, and the second one describing the single
+ final MPP register, separated from the previous one.
+
+Available mpp pins/groups and functions:
+Note: brackets (x) are not part of the mpp name for marvell,function and given
+only for more detailed description in this document.
+
+* Marvell Orion 88f5181l
+
+name pins functions
+================================================================================
+mpp0 0 pcie(rstout), pci(req2), gpio
+mpp1 1 gpio, pci(gnt2)
+mpp2 2 gpio, pci(req3), pci-1(pme)
+mpp3 3 gpio, pci(gnt3)
+mpp4 4 gpio, pci(req4)
+mpp5 5 gpio, pci(gnt4)
+mpp6 6 gpio, pci(req5), pci-1(clk)
+mpp7 7 gpio, pci(gnt5), pci-1(clk)
+mpp8 8 gpio, ge(col)
+mpp9 9 gpio, ge(rxerr)
+mpp10 10 gpio, ge(crs)
+mpp11 11 gpio, ge(txerr)
+mpp12 12 gpio, ge(txd4)
+mpp13 13 gpio, ge(txd5)
+mpp14 14 gpio, ge(txd6)
+mpp15 15 gpio, ge(txd7)
+mpp16 16 ge(rxd4)
+mpp17 17 ge(rxd5)
+mpp18 18 ge(rxd6)
+mpp19 19 ge(rxd7)
+
+* Marvell Orion 88f5182
+
+name pins functions
+================================================================================
+mpp0 0 pcie(rstout), pci(req2), gpio
+mpp1 1 gpio, pci(gnt2)
+mpp2 2 gpio, pci(req3), pci-1(pme)
+mpp3 3 gpio, pci(gnt3)
+mpp4 4 gpio, pci(req4), bootnand(re), sata0(prsnt)
+mpp5 5 gpio, pci(gnt4), bootnand(we), sata1(prsnt)
+mpp6 6 gpio, pci(req5), nand(re0), sata0(act)
+mpp7 7 gpio, pci(gnt5), nand(we0), sata1(act)
+mpp8 8 gpio, ge(col)
+mpp9 9 gpio, ge(rxerr)
+mpp10 10 gpio, ge(crs)
+mpp11 11 gpio, ge(txerr)
+mpp12 12 gpio, ge(txd4), nand(re1), sata0(ledprsnt)
+mpp13 13 gpio, ge(txd5), nand(we1), sata1(ledprsnt)
+mpp14 14 gpio, ge(txd6), nand(re2), sata0(ledact)
+mpp15 15 gpio, ge(txd7), nand(we2), sata1(ledact)
+mpp16 16 uart1(rxd), ge(rxd4), gpio
+mpp17 17 uart1(txd), ge(rxd5), gpio
+mpp18 18 uart1(cts), ge(rxd6), gpio
+mpp19 19 uart1(rts), ge(rxd7), gpio
+
+* Marvell Orion 88f5281
+
+name pins functions
+================================================================================
+mpp0 0 pcie(rstout), pci(req2), gpio
+mpp1 1 gpio, pci(gnt2)
+mpp2 2 gpio, pci(req3), pci(pme)
+mpp3 3 gpio, pci(gnt3)
+mpp4 4 gpio, pci(req4), bootnand(re)
+mpp5 5 gpio, pci(gnt4), bootnand(we)
+mpp6 6 gpio, pci(req5), nand(re0)
+mpp7 7 gpio, pci(gnt5), nand(we0)
+mpp8 8 gpio, ge(col)
+mpp9 9 gpio, ge(rxerr)
+mpp10 10 gpio, ge(crs)
+mpp11 11 gpio, ge(txerr)
+mpp12 12 gpio, ge(txd4), nand(re1)
+mpp13 13 gpio, ge(txd5), nand(we1)
+mpp14 14 gpio, ge(txd6), nand(re2)
+mpp15 15 gpio, ge(txd7), nand(we2)
+mpp16 16 uart1(rxd), ge(rxd4)
+mpp17 17 uart1(txd), ge(rxd5)
+mpp18 18 uart1(cts), ge(rxd6)
+mpp19 19 uart1(rts), ge(rxd7)
diff --git a/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
new file mode 100644
index 000000000..3f6a524cc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/meson,pinctrl.txt
@@ -0,0 +1,96 @@
+== Amlogic Meson pinmux controller ==
+
+Required properties for the root node:
+ - compatible: "amlogic,meson8-pinctrl" or "amlogic,meson8b-pinctrl"
+ - reg: address and size of registers controlling irq functionality
+
+=== GPIO sub-nodes ===
+
+The 2 power domains of the controller (regular and always-on) are
+represented as sub-nodes and each of them acts as a GPIO controller.
+
+Required properties for sub-nodes are:
+ - reg: should contain address and size for mux, pull-enable, pull and
+ gpio register sets
+ - reg-names: an array of strings describing the "reg" entries. Must
+ contain "mux", "pull" and "gpio". "pull-enable" is optional and
+ when it is missing the "pull" registers are used instead
+ - gpio-controller: identifies the node as a gpio controller
+ - #gpio-cells: must be 2
+
+Valid sub-node names are:
+ - "banks" for the regular domain
+ - "ao-bank" for the always-on domain
+
+=== Other sub-nodes ===
+
+Child nodes without the "gpio-controller" represent some desired
+configuration for a pin or a group. Those nodes can be pinmux nodes or
+configuration nodes.
+
+Required properties for pinmux nodes are:
+ - groups: a list of pinmux groups. The list of all available groups
+ depends on the SoC and can be found in driver sources.
+ - function: the name of a function to activate for the specified set
+ of groups. The list of all available functions depends on the SoC
+ and can be found in driver sources.
+
+Required properties for configuration nodes:
+ - pins: a list of pin names
+
+Configuration nodes support the generic properties "bias-disable",
+"bias-pull-up" and "bias-pull-down", described in file
+pinctrl-bindings.txt
+
+=== Example ===
+
+ pinctrl: pinctrl@c1109880 {
+ compatible = "amlogic,meson8-pinctrl";
+ reg = <0xc1109880 0x10>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gpio: banks@c11080b0 {
+ reg = <0xc11080b0 0x28>,
+ <0xc11080e8 0x18>,
+ <0xc1108120 0x18>,
+ <0xc1108030 0x30>;
+ reg-names = "mux", "pull", "pull-enable", "gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ gpio_ao: ao-bank@c1108030 {
+ reg = <0xc8100014 0x4>,
+ <0xc810002c 0x4>,
+ <0xc8100024 0x8>;
+ reg-names = "mux", "pull", "gpio";
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ nand {
+ mux {
+ groups = "nand_io", "nand_io_ce0", "nand_io_ce1",
+ "nand_io_rb0", "nand_ale", "nand_cle",
+ "nand_wen_clk", "nand_ren_clk", "nand_dqs",
+ "nand_ce2", "nand_ce3";
+ function = "nand";
+ };
+ };
+
+ uart_ao_a {
+ mux {
+ groups = "uart_tx_ao_a", "uart_rx_ao_a",
+ "uart_cts_ao_a", "uart_rts_ao_a";
+ function = "uart_ao";
+ };
+
+ conf {
+ pins = "GPIOAO_0", "GPIOAO_1",
+ "GPIOAO_2", "GPIOAO_3";
+ bias-disable;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra114-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra114-pinmux.txt
new file mode 100644
index 000000000..fb70856c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra114-pinmux.txt
@@ -0,0 +1,131 @@
+NVIDIA Tegra114 pinmux controller
+
+The Tegra114 pinctrl binding is very similar to the Tegra20 and Tegra30
+pinctrl binding, as described in nvidia,tegra20-pinmux.txt and
+nvidia,tegra30-pinmux.txt. In fact, this document assumes that binding as
+a baseline, and only documents the differences between the two bindings.
+
+Required properties:
+- compatible: "nvidia,tegra114-pinmux"
+- reg: Should contain the register physical address and length for each of
+ the pad control and mux registers. The first bank of address must be the
+ driver strength pad control register address and second bank address must
+ be pinmux register address.
+
+Tegra114 adds the following optional properties for pin configuration subnodes:
+- nvidia,enable-input: Integer. Enable the pin's input path. 0: no, 1: yes.
+- nvidia,open-drain: Integer. Enable open drain mode. 0: no, 1: yes.
+- nvidia,lock: Integer. Lock the pin configuration against further changes
+ until reset. 0: no, 1: yes.
+- nvidia,io-reset: Integer. Reset the IO path. 0: no, 1: yes.
+- nvidia,rcv-sel: Integer. Select VIL/VIH receivers. 0: normal, 1: high.
+- nvidia,drive-type: Integer. Valid range 0...3.
+
+As with Tegra20 and Terga30, see the Tegra TRM for complete details regarding
+which groups support which functionality.
+
+Valid values for pin and group names are:
+
+ per-pin mux groups:
+
+ These all support nvidia,function, nvidia,tristate, nvidia,pull,
+ nvidia,enable-input, nvidia,lock. Some support nvidia,open-drain,
+ nvidia,io-reset and nvidia,rcv-sel.
+
+ ulpi_data0_po1, ulpi_data1_po2, ulpi_data2_po3, ulpi_data3_po4,
+ ulpi_data4_po5, ulpi_data5_po6, ulpi_data6_po7, ulpi_data7_po0,
+ ulpi_clk_py0, ulpi_dir_py1, ulpi_nxt_py2, ulpi_stp_py3, dap3_fs_pp0,
+ dap3_din_pp1, dap3_dout_pp2, dap3_sclk_pp3, pv0, pv1, sdmmc1_clk_pz0,
+ sdmmc1_cmd_pz1, sdmmc1_dat3_py4, sdmmc1_dat2_py5, sdmmc1_dat1_py6,
+ sdmmc1_dat0_py7, clk2_out_pw5, clk2_req_pcc5, hdmi_int_pn7, ddc_scl_pv4,
+ ddc_sda_pv5, uart2_rxd_pc3, uart2_txd_pc2, uart2_rts_n_pj6,
+ uart2_cts_n_pj5, uart3_txd_pw6, uart3_rxd_pw7, uart3_cts_n_pa1,
+ uart3_rts_n_pc0, pu0, pu1, pu2, pu3, pu4, pu5, pu6, gen1_i2c_sda_pc5,
+ gen1_i2c_scl_pc4, dap4_fs_pp4, dap4_din_pp5, dap4_dout_pp6, dap4_sclk_pp7,
+ clk3_out_pee0, clk3_req_pee1, gmi_wp_n_pc7, gmi_iordy_pi5, gmi_wait_pi7,
+ gmi_adv_n_pk0, gmi_clk_pk1, gmi_cs0_n_pj0, gmi_cs1_n_pj2, gmi_cs2_n_pk3,
+ gmi_cs3_n_pk4, gmi_cs4_n_pk2, gmi_cs6_n_pi3, gmi_cs7_n_pi6, gmi_ad0_pg0,
+ gmi_ad1_pg1, gmi_ad2_pg2, gmi_ad3_pg3, gmi_ad4_pg4, gmi_ad5_pg5,
+ gmi_ad6_pg6, gmi_ad7_pg7, gmi_ad8_ph0, gmi_ad9_ph1, gmi_ad10_ph2,
+ gmi_ad11_ph3, gmi_ad12_ph4, gmi_ad13_ph5, gmi_ad14_ph6, gmi_ad15_ph7,
+ gmi_a16_pj7, gmi_a17_pb0, gmi_a18_pb1, gmi_a19_pk7, gmi_wr_n_pi0,
+ gmi_oe_n_pi1, gmi_dqs_p_pj3, gmi_rst_n_pi4, gen2_i2c_scl_pt5,
+ gen2_i2c_sda_pt6, sdmmc4_clk_pcc4, sdmmc4_cmd_pt7, sdmmc4_dat0_paa0,
+ sdmmc4_dat1_paa1, sdmmc4_dat2_paa2, sdmmc4_dat3_paa3, sdmmc4_dat4_paa4,
+ sdmmc4_dat5_paa5, sdmmc4_dat6_paa6, sdmmc4_dat7_paa7, cam_mclk_pcc0,
+ pcc1, pbb0, cam_i2c_scl_pbb1, cam_i2c_sda_pbb2, pbb3, pbb4, pbb5, pbb6,
+ pbb7, pcc2, pwr_i2c_scl_pz6, pwr_i2c_sda_pz7, kb_row0_pr0, kb_row1_pr1,
+ kb_row2_pr2, kb_row3_pr3, kb_row4_pr4, kb_row5_pr5, kb_row6_pr6,
+ kb_row7_pr7, kb_row8_ps0, kb_row9_ps1, kb_row10_ps2, kb_col0_pq0,
+ kb_col1_pq1, kb_col2_pq2, kb_col3_pq3, kb_col4_pq4, kb_col5_pq5,
+ kb_col6_pq6, kb_col7_pq7, clk_32k_out_pa0, sys_clk_req_pz5, core_pwr_req,
+ cpu_pwr_req, pwr_int_n, owr, dap1_fs_pn0, dap1_din_pn1, dap1_dout_pn2,
+ dap1_sclk_pn3, clk1_req_pee2, clk1_out_pw4, spdif_in_pk6, spdif_out_pk5,
+ dap2_fs_pa2, dap2_din_pa4, dap2_dout_pa5, dap2_sclk_pa3, dvfs_pwm_px0,
+ gpio_x1_aud_px1, gpio_x3_aud_px3, dvfs_clk_px2, gpio_x4_aud_px4,
+ gpio_x5_aud_px5, gpio_x6_aud_px6, gpio_x7_aud_px7, sdmmc3_clk_pa6,
+ sdmmc3_cmd_pa7, sdmmc3_dat0_pb7, sdmmc3_dat1_pb6, sdmmc3_dat2_pb5,
+ sdmmc3_dat3_pb4, hdmi_cec_pee3, sdmmc1_wp_n_pv3, sdmmc3_cd_n_pv2,
+ gpio_w2_aud_pw2, gpio_w3_aud_pw3, usb_vbus_en0_pn4, usb_vbus_en1_pn5,
+ sdmmc3_clk_lb_in_pee5, sdmmc3_clk_lb_out_pee4, reset_out_n.
+
+ drive groups:
+
+ These all support nvidia,pull-down-strength, nvidia,pull-up-strength,
+ nvidia,slew-rate-rising, nvidia,slew-rate-falling. Most but not all
+ support nvidia,high-speed-mode, nvidia,schmitt, nvidia,low-power-mode
+ and nvidia,drive-type.
+
+ ao1, ao2, at1, at2, at3, at4, at5, cdev1, cdev2, dap1, dap2, dap3, dap4,
+ dbg, sdio3, spi, uaa, uab, uart2, uart3, sdio1, ddc, gma, gme, gmf, gmg,
+ gmh, owr, uda.
+
+Valid values for nvidia,functions are:
+
+ blink, cec, cldvfs, clk12, cpu, dap, dap1, dap2, dev3, displaya,
+ displaya_alt, displayb, dtv, emc_dll, extperiph1, extperiph2,
+ extperiph3, gmi, gmi_alt, hda, hsi, i2c1, i2c2, i2c3, i2c4, i2cpwr,
+ i2s0, i2s1, i2s2, i2s3, i2s4, irda, kbc, nand, nand_alt, owr, pmi,
+ pwm0, pwm1, pwm2, pwm3, pwron, reset_out_n, rsvd1, rsvd2, rsvd3,
+ rsvd4, sdmmc1, sdmmc2, sdmmc3, sdmmc4, soc, spdif, spi1, spi2, spi3,
+ spi4, spi5, spi6, sysclk, trace, uarta, uartb, uartc, uartd, ulpi,
+ usb, vgp1, vgp2, vgp3, vgp4, vgp5, vgp6, vi, vi_alt1, vi_alt3
+
+Example:
+
+ pinmux: pinmux {
+ compatible = "nvidia,tegra114-pinmux";
+ reg = <0x70000868 0x148 /* Pad control registers */
+ 0x70003000 0x40c>; /* PinMux registers */
+ };
+
+Example board file extract:
+
+ pinctrl {
+ sdmmc4_default: pinmux {
+ sdmmc4_clk_pcc4 {
+ nvidia,pins = "sdmmc4_clk_pcc4",
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <0>;
+ nvidia,tristate = <0>;
+ };
+ sdmmc4_dat0_paa0 {
+ nvidia,pins = "sdmmc4_dat0_paa0",
+ "sdmmc4_dat1_paa1",
+ "sdmmc4_dat2_paa2",
+ "sdmmc4_dat3_paa3",
+ "sdmmc4_dat4_paa4",
+ "sdmmc4_dat5_paa5",
+ "sdmmc4_dat6_paa6",
+ "sdmmc4_dat7_paa7";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <2>;
+ nvidia,tristate = <0>;
+ };
+ };
+ };
+
+ sdhci@78000400 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc4_default>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt
new file mode 100644
index 000000000..ecb5c0d25
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-pinmux.txt
@@ -0,0 +1,153 @@
+NVIDIA Tegra124 pinmux controller
+
+The Tegra124 pinctrl binding is very similar to the Tegra20 and Tegra30
+pinctrl binding, as described in nvidia,tegra20-pinmux.txt and
+nvidia,tegra30-pinmux.txt. In fact, this document assumes that binding as
+a baseline, and only documents the differences between the two bindings.
+
+Required properties:
+- compatible: For Tegra124, must contain "nvidia,tegra124-pinmux". For
+ Tegra132, must contain '"nvidia,tegra132-pinmux", "nvidia-tegra124-pinmux"'.
+- reg: Should contain a list of base address and size pairs for:
+ -- first entry - the drive strength and pad control registers.
+ -- second entry - the pinmux registers
+ -- third entry - the MIPI_PAD_CTRL register
+
+Tegra124 adds the following optional properties for pin configuration subnodes.
+The macros for options are defined in the
+ include/dt-binding/pinctrl/pinctrl-tegra.h.
+- nvidia,enable-input: Integer. Enable the pin's input path.
+ enable :TEGRA_PIN_ENABLE0 and
+ disable or output only: TEGRA_PIN_DISABLE.
+- nvidia,open-drain: Integer.
+ enable: TEGRA_PIN_ENABLE.
+ disable: TEGRA_PIN_DISABLE.
+- nvidia,lock: Integer. Lock the pin configuration against further changes
+ until reset.
+ enable: TEGRA_PIN_ENABLE.
+ disable: TEGRA_PIN_DISABLE.
+- nvidia,io-reset: Integer. Reset the IO path.
+ enable: TEGRA_PIN_ENABLE.
+ disable: TEGRA_PIN_DISABLE.
+- nvidia,rcv-sel: Integer. Select VIL/VIH receivers.
+ normal: TEGRA_PIN_DISABLE
+ high: TEGRA_PIN_ENABLE
+
+Please refer the Tegra TRM for complete details regarding which groups
+support which functionality.
+
+Valid values for pin and group names are:
+
+ per-pin mux groups:
+
+ These all support nvidia,function, nvidia,tristate, nvidia,pull,
+ nvidia,enable-input. Some support nvidia,lock nvidia,open-drain,
+ nvidia,io-reset and nvidia,rcv-sel.
+
+ ulpi_data0_po1, ulpi_data1_po2, ulpi_data2_po3, ulpi_data3_po4,
+ ulpi_data4_po5, ulpi_data5_po6, ulpi_data6_po7, ulpi_data7_po0,
+ ulpi_clk_py0, ulpi_dir_py1, ulpi_nxt_py2, ulpi_stp_py3, dap3_fs_pp0,
+ dap3_din_pp1, dap3_dout_pp2, dap3_sclk_pp3, pv0, pv1, sdmmc1_clk_pz0,
+ sdmmc1_cmd_pz1, sdmmc1_dat3_py4, sdmmc1_dat2_py5, sdmmc1_dat1_py6,
+ sdmmc1_dat0_py7, clk2_out_pw5, clk2_req_pcc5, hdmi_int_pn7, ddc_scl_pv4,
+ ddc_sda_pv5, uart2_rxd_pc3, uart2_txd_pc2, uart2_rts_n_pj6,
+ uart2_cts_n_pj5, uart3_txd_pw6, uart3_rxd_pw7, uart3_cts_n_pa1,
+ uart3_rts_n_pc0, pu0, pu1, pu2, pu3, pu4, pu5, pu6, gen1_i2c_scl_pc4,
+ gen1_i2c_sda_pc5, dap4_fs_pp4, dap4_din_pp5, dap4_dout_pp6,
+ dap4_sclk_pp7, clk3_out_pee0, clk3_req_pee1, pc7, pi5, pi7, pk0, pk1,
+ pj0, pj2, pk3, pk4, pk2, pi3, pi6, pg0, pg1, pg2, pg3, pg4, pg5, pg6,
+ pg7, ph0, ph1, ph2, ph3, ph4, ph5, ph6, ph7, pj7, pb0, pb1, pk7, pi0,
+ pi1, pi2, pi4, gen2_i2c_scl_pt5, gen2_i2c_sda_pt6, sdmmc4_clk_pcc4,
+ sdmmc4_cmd_pt7, sdmmc4_dat0_paa0, sdmmc4_dat1_paa1, sdmmc4_dat2_paa2,
+ sdmmc4_dat3_paa3, sdmmc4_dat4_paa4, sdmmc4_dat5_paa5, sdmmc4_dat6_paa6,
+ sdmmc4_dat7_paa7, cam_mclk_pcc0, pcc1, pbb0, cam_i2c_scl_pbb1,
+ cam_i2c_sda_pbb2, pbb3, pbb4, pbb5, pbb6, pbb7, pcc2, jtag_rtck,
+ pwr_i2c_scl_pz6, pwr_i2c_sda_pz7, kb_row0_pr0, kb_row1_pr1, kb_row2_pr2,
+ kb_row3_pr3, kb_row4_pr4, kb_row5_pr5, kb_row6_pr6, kb_row7_pr7,
+ kb_row8_ps0, kb_row9_ps1, kb_row10_ps2, kb_row11_ps3, kb_row12_ps4,
+ kb_row13_ps5, kb_row14_ps6, kb_row15_ps7, kb_col0_pq0, kb_col1_pq1,
+ kb_col2_pq2, kb_col3_pq3, kb_col4_pq4, kb_col5_pq5, kb_col6_pq6,
+ kb_col7_pq7, clk_32k_out_pa0, core_pwr_req, cpu_pwr_req, pwr_int_n,
+ clk_32k_in, owr, dap1_fs_pn0, dap1_din_pn1, dap1_dout_pn2,
+ dap1_sclk_pn3, dap_mclk1_req_pee2, dap_mclk1_pw4, spdif_in_pk6,
+ spdif_out_pk5, dap2_fs_pa2, dap2_din_pa4, dap2_dout_pa5, dap2_sclk_pa3,
+ dvfs_pwm_px0, gpio_x1_aud_px1, gpio_x3_aud_px3, dvfs_clk_px2,
+ gpio_x4_aud_px4, gpio_x5_aud_px5, gpio_x6_aud_px6, gpio_x7_aud_px7,
+ sdmmc3_clk_pa6, sdmmc3_cmd_pa7, sdmmc3_dat0_pb7, sdmmc3_dat1_pb6,
+ sdmmc3_dat2_pb5, sdmmc3_dat3_pb4, pex_l0_rst_n_pdd1,
+ pex_l0_clkreq_n_pdd2, pex_wake_n_pdd3, pex_l1_rst_n_pdd5,
+ pex_l1_clkreq_n_pdd6, hdmi_cec_pee3, sdmmc1_wp_n_pv3,
+ sdmmc3_cd_n_pv2, gpio_w2_aud_pw2, gpio_w3_aud_pw3, usb_vbus_en0_pn4,
+ usb_vbus_en1_pn5, sdmmc3_clk_lb_out_pee4, sdmmc3_clk_lb_in_pee5,
+ gmi_clk_lb, reset_out_n, kb_row16_pt0, kb_row17_pt1, usb_vbus_en2_pff1,
+ pff2, dp_hpd_pff0,
+
+ drive groups:
+
+ These all support nvidia,pull-down-strength, nvidia,pull-up-strength,
+ nvidia,slew-rate-rising, nvidia,slew-rate-falling. Most but not all
+ support nvidia,high-speed-mode, nvidia,schmitt, nvidia,low-power-mode
+ and nvidia,drive-type.
+
+ ao1, ao2, at1, at2, at3, at4, at5, cdev1, cdev2, dap1, dap2, dap3, dap4,
+ dbg, sdio3, spi, uaa, uab, uart2, uart3, sdio1, ddc, gma, gme, gmf, gmg,
+ gmh, owr, uda, gpv, dev3, cec, usb_vbus_en, ao3, ao0, hv0, sdio4, ao4.
+
+ MIPI pad control groups:
+
+ These support only the nvidia,function property.
+
+ dsi_b
+
+Valid values for nvidia,functions are:
+
+ blink, cec, cldvfs, clk12, cpu, dap, dap1, dap2, dev3, displaya,
+ displaya_alt, displayb, dtv, extperiph1, extperiph2, extperiph3,
+ gmi, gmi_alt, hda, hsi, i2c1, i2c2, i2c3, i2c4, i2cpwr, i2s0,
+ i2s1, i2s2, i2s3, i2s4, irda, kbc, owr, pmi, pwm0, pwm1, pwm2, pwm3,
+ pwron, reset_out_n, rsvd1, rsvd2, rsvd3, rsvd4, sdmmc1, sdmmc2, sdmmc3,
+ sdmmc4, soc, spdif, spi1, spi2, spi3, spi4, spi5, spi6, trace, uarta,
+ uartb, uartc, uartd, ulpi, usb, vgp1, vgp2, vgp3, vgp4, vgp5, vgp6,
+ vi, vi_alt1, vi_alt3, vimclk2, vimclk2_alt, sata, ccla, pe0, pe, pe1,
+ dp, rtck, sys, clk tmds, csi, dsi_b
+
+Example:
+
+ pinmux: pinmux {
+ compatible = "nvidia,tegra124-pinmux";
+ reg = <0x0 0x70000868 0x0 0x164>, /* Pad control registers */
+ <0x0 0x70003000 0x0 0x434>, /* Mux registers */
+ <0x0 0x70000820 0x0 0x8>; /* MIPI pad control */
+ };
+
+Example pinmux entries:
+
+ pinctrl {
+ sdmmc4_default: pinmux {
+ sdmmc4_clk_pcc4 {
+ nvidia,pins = "sdmmc4_clk_pcc4",
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ };
+
+ sdmmc4_dat0_paa0 {
+ nvidia,pins = "sdmmc4_dat0_paa0",
+ "sdmmc4_dat1_paa1",
+ "sdmmc4_dat2_paa2",
+ "sdmmc4_dat3_paa3",
+ "sdmmc4_dat4_paa4",
+ "sdmmc4_dat5_paa5",
+ "sdmmc4_dat6_paa6",
+ "sdmmc4_dat7_paa7";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <TEGRA_PIN_PULL_UP>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ };
+ };
+ };
+
+ sdhci@78000400 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc4_default>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt
new file mode 100644
index 000000000..30676ded8
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-xusb-padctl.txt
@@ -0,0 +1,129 @@
+Device tree binding for NVIDIA Tegra XUSB pad controller
+========================================================
+
+The Tegra XUSB pad controller manages a set of lanes, each of which can be
+assigned to one out of a set of different pads. Some of these pads have an
+associated PHY that must be powered up before the pad can be used.
+
+This document defines the device-specific binding for the XUSB pad controller.
+
+Refer to pinctrl-bindings.txt in this directory for generic information about
+pin controller device tree bindings and ../phy/phy-bindings.txt for details on
+how to describe and reference PHYs in device trees.
+
+Required properties:
+--------------------
+- compatible: For Tegra124, must contain "nvidia,tegra124-xusb-padctl".
+ Otherwise, must contain '"nvidia,<chip>-xusb-padctl",
+ "nvidia-tegra124-xusb-padctl"', where <chip> is tegra132 or tegra210.
+- reg: Physical base address and length of the controller's registers.
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - padctl
+- #phy-cells: Should be 1. The specifier is the index of the PHY to reference.
+ See <dt-bindings/pinctrl/pinctrl-tegra-xusb.h> for the list of valid values.
+
+Lane muxing:
+------------
+
+Child nodes contain the pinmux configurations following the conventions from
+the pinctrl-bindings.txt document. Typically a single, static configuration is
+given and applied at boot time.
+
+Each subnode describes groups of lanes along with parameters and pads that
+they should be assigned to. The name of these subnodes is not important. All
+subnodes should be parsed solely based on their content.
+
+Each subnode only applies the parameters that are explicitly listed. In other
+words, if a subnode that lists a function but no pin configuration parameters
+implies no information about any pin configuration parameters. Similarly, a
+subnode that describes only an IDDQ parameter implies no information about
+what function the pins are assigned to. For this reason even seemingly boolean
+values are actually tristates in this binding: unspecified, off or on.
+Unspecified is represented as an absent property, and off/on are represented
+as integer values 0 and 1.
+
+Required properties:
+- nvidia,lanes: An array of strings. Each string is the name of a lane.
+
+Optional properties:
+- nvidia,function: A string that is the name of the function (pad) that the
+ pin or group should be assigned to. Valid values for function names are
+ listed below.
+- nvidia,iddq: Enables IDDQ mode of the lane. (0: no, 1: yes)
+
+Note that not all of these properties are valid for all lanes. Lanes can be
+divided into three groups:
+
+ - otg-0, otg-1, otg-2:
+
+ Valid functions for this group are: "snps", "xusb", "uart", "rsvd".
+
+ The nvidia,iddq property does not apply to this group.
+
+ - ulpi-0, hsic-0, hsic-1:
+
+ Valid functions for this group are: "snps", "xusb".
+
+ The nvidia,iddq property does not apply to this group.
+
+ - pcie-0, pcie-1, pcie-2, pcie-3, pcie-4, sata-0:
+
+ Valid functions for this group are: "pcie", "usb3", "sata", "rsvd".
+
+
+Example:
+========
+
+SoC file extract:
+-----------------
+
+ padctl@0,7009f000 {
+ compatible = "nvidia,tegra124-xusb-padctl";
+ reg = <0x0 0x7009f000 0x0 0x1000>;
+ resets = <&tegra_car 142>;
+ reset-names = "padctl";
+
+ #phy-cells = <1>;
+ };
+
+Board file extract:
+-------------------
+
+ pcie-controller@0,01003000 {
+ ...
+
+ phys = <&padctl 0>;
+ phy-names = "pcie";
+
+ ...
+ };
+
+ ...
+
+ padctl: padctl@0,7009f000 {
+ pinctrl-0 = <&padctl_default>;
+ pinctrl-names = "default";
+
+ padctl_default: pinmux {
+ usb3 {
+ nvidia,lanes = "pcie-0", "pcie-1";
+ nvidia,function = "usb3";
+ nvidia,iddq = <0>;
+ };
+
+ pcie {
+ nvidia,lanes = "pcie-2", "pcie-3",
+ "pcie-4";
+ nvidia,function = "pcie";
+ nvidia,iddq = <0>;
+ };
+
+ sata {
+ nvidia,lanes = "sata-0";
+ nvidia,function = "sata";
+ nvidia,iddq = <0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt
new file mode 100644
index 000000000..3c8ce28ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra20-pinmux.txt
@@ -0,0 +1,143 @@
+NVIDIA Tegra20 pinmux controller
+
+Required properties:
+- compatible: "nvidia,tegra20-pinmux"
+- reg: Should contain the register physical address and length for each of
+ the tri-state, mux, pull-up/down, and pad control register sets.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Tegra's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, tristate, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function or tristate parameter. For this
+reason, even seemingly boolean values are actually tristates in this binding:
+unspecified, off, or on. Unspecified is represented as an absent property,
+and off/on are represented as integer values 0 and 1.
+
+Required subnode-properties:
+- nvidia,pins : An array of strings. Each string contains the name of a pin or
+ group. Valid values for these names are listed below.
+
+Optional subnode-properties:
+- nvidia,function: A string containing the name of the function to mux to the
+ pin or group. Valid values for function names are listed below. See the Tegra
+ TRM to determine which are valid for each pin or group.
+- nvidia,pull: Integer, representing the pull-down/up to apply to the pin.
+ 0: none, 1: down, 2: up.
+- nvidia,tristate: Integer.
+ 0: drive, 1: tristate.
+- nvidia,high-speed-mode: Integer. Enable high speed mode the pins.
+ 0: no, 1: yes.
+- nvidia,schmitt: Integer. Enables Schmitt Trigger on the input.
+ 0: no, 1: yes.
+- nvidia,low-power-mode: Integer. Valid values 0-3. 0 is least power, 3 is
+ most power. Controls the drive power or current. See "Low Power Mode"
+ or "LPMD1" and "LPMD0" in the Tegra TRM.
+- nvidia,pull-down-strength: Integer. Controls drive strength. 0 is weakest.
+ The range of valid values depends on the pingroup. See "CAL_DRVDN" in the
+ Tegra TRM.
+- nvidia,pull-up-strength: Integer. Controls drive strength. 0 is weakest.
+ The range of valid values depends on the pingroup. See "CAL_DRVUP" in the
+ Tegra TRM.
+- nvidia,slew-rate-rising: Integer. Controls rising signal slew rate. 0 is
+ fastest. The range of valid values depends on the pingroup. See
+ "DRVDN_SLWR" in the Tegra TRM.
+- nvidia,slew-rate-falling: Integer. Controls falling signal slew rate. 0 is
+ fastest. The range of valid values depends on the pingroup. See
+ "DRVUP_SLWF" in the Tegra TRM.
+
+Note that many of these properties are only valid for certain specific pins
+or groups. See the Tegra TRM and various pinmux spreadsheets for complete
+details regarding which groups support which functionality. The Linux pinctrl
+driver may also be a useful reference, since it consolidates, disambiguates,
+and corrects data from all those sources.
+
+Valid values for pin and group names are:
+
+ mux groups:
+
+ These all support nvidia,function, nvidia,tristate, and many support
+ nvidia,pull.
+
+ ata, atb, atc, atd, ate, cdev1, cdev2, crtp, csus, dap1, dap2, dap3, dap4,
+ ddc, dta, dtb, dtc, dtd, dte, dtf, gma, gmb, gmc, gmd, gme, gpu, gpu7,
+ gpv, hdint, i2cp, irrx, irtx, kbca, kbcb, kbcc, kbcd, kbce, kbcf, lcsn,
+ ld0, ld1, ld2, ld3, ld4, ld5, ld6, ld7, ld8, ld9, ld10, ld11, ld12, ld13,
+ ld14, ld15, ld16, ld17, ldc, ldi, lhp0, lhp1, lhp2, lhs, lm0, lm1, lpp,
+ lpw0, lpw1, lpw2, lsc0, lsc1, lsck, lsda, lsdi, lspi, lvp0, lvp1, lvs,
+ owc, pmc, pta, rm, sdb, sdc, sdd, sdio1, slxa, slxc, slxd, slxk, spdi,
+ spdo, spia, spib, spic, spid, spie, spif, spig, spih, uaa, uab, uac, uad,
+ uca, ucb, uda.
+
+ tristate groups:
+
+ These only support nvidia,pull.
+
+ ck32, ddrc, pmca, pmcb, pmcc, pmcd, pmce, xm2c, xm2d, ls, lc, ld17_0,
+ ld19_18, ld21_20, ld23_22.
+
+ drive groups:
+
+ With some exceptions, these support nvidia,high-speed-mode,
+ nvidia,schmitt, nvidia,low-power-mode, nvidia,pull-down-strength,
+ nvidia,pull-up-strength, nvidia,slew-rate-rising, nvidia,slew-rate-falling.
+
+ drive_ao1, drive_ao2, drive_at1, drive_at2, drive_cdev1, drive_cdev2,
+ drive_csus, drive_dap1, drive_dap2, drive_dap3, drive_dap4, drive_dbg,
+ drive_lcd1, drive_lcd2, drive_sdmmc2, drive_sdmmc3, drive_spi, drive_uaa,
+ drive_uab, drive_uart2, drive_uart3, drive_vi1, drive_vi2, drive_xm2a,
+ drive_xm2c, drive_xm2d, drive_xm2clk, drive_sdio1, drive_crt, drive_ddc,
+ drive_gma, drive_gmb, drive_gmc, drive_gmd, drive_gme, drive_owr,
+ drive_uda.
+
+Valid values for nvidia,functions are:
+
+ ahb_clk, apb_clk, audio_sync, crt, dap1, dap2, dap3, dap4, dap5,
+ displaya, displayb, emc_test0_dll, emc_test1_dll, gmi, gmi_int,
+ hdmi, i2cp, i2c1, i2c2, i2c3, ide, irda, kbc, mio, mipi_hs, nand,
+ osc, owr, pcie, plla_out, pllc_out1, pllm_out1, pllp_out2, pllp_out3,
+ pllp_out4, pwm, pwr_intr, pwr_on, rsvd1, rsvd2, rsvd3, rsvd4, rtck,
+ sdio1, sdio2, sdio3, sdio4, sflash, spdif, spi1, spi2, spi2_alt,
+ spi3, spi4, trace, twc, uarta, uartb, uartc, uartd, uarte, ulpi,
+ vi, vi_sensor_clk, xio
+
+Example:
+
+ pinctrl@70000000 {
+ compatible = "nvidia,tegra20-pinmux";
+ reg = < 0x70000014 0x10 /* Tri-state registers */
+ 0x70000080 0x20 /* Mux registers */
+ 0x700000a0 0x14 /* Pull-up/down registers */
+ 0x70000868 0xa8 >; /* Pad control registers */
+ };
+
+Example board file extract:
+
+ pinctrl@70000000 {
+ sdio4_default: sdio4_default {
+ atb {
+ nvidia,pins = "atb", "gma", "gme";
+ nvidia,function = "sdio4";
+ nvidia,pull = <0>;
+ nvidia,tristate = <0>;
+ };
+ };
+ };
+
+ sdhci@c8000600 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdio4_default>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.txt
new file mode 100644
index 000000000..a62d82d5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra210-pinmux.txt
@@ -0,0 +1,166 @@
+NVIDIA Tegra210 pinmux controller
+
+Required properties:
+- compatible: "nvidia,tegra210-pinmux"
+- reg: Should contain a list of base address and size pairs for:
+ - first entry: The APB_MISC_GP_*_PADCTRL registers (pad control)
+ - second entry: The PINMUX_AUX_* registers (pinmux)
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Tegra's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, tristate, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function or tristate parameter. For this
+reason, even seemingly boolean values are actually tristates in this binding:
+unspecified, off, or on. Unspecified is represented as an absent property,
+and off/on are represented as integer values 0 and 1.
+
+See the TRM to determine which properties and values apply to each pin/group.
+Macro values for property values are defined in
+include/dt-binding/pinctrl/pinctrl-tegra.h.
+
+Required subnode-properties:
+- nvidia,pins : An array of strings. Each string contains the name of a pin or
+ group. Valid values for these names are listed below.
+
+Optional subnode-properties:
+- nvidia,function: A string containing the name of the function to mux to the
+ pin or group.
+- nvidia,pull: Integer, representing the pull-down/up to apply to the pin.
+ 0: none, 1: down, 2: up.
+- nvidia,tristate: Integer.
+ 0: drive, 1: tristate.
+- nvidia,enable-input: Integer. Enable the pin's input path.
+ enable :TEGRA_PIN_ENABLE0 and
+ disable or output only: TEGRA_PIN_DISABLE.
+- nvidia,open-drain: Integer.
+ enable: TEGRA_PIN_ENABLE.
+ disable: TEGRA_PIN_DISABLE.
+- nvidia,lock: Integer. Lock the pin configuration against further changes
+ until reset.
+ enable: TEGRA_PIN_ENABLE.
+ disable: TEGRA_PIN_DISABLE.
+- nvidia,io-hv: Integer. Select high-voltage receivers.
+ normal: TEGRA_PIN_DISABLE
+ high: TEGRA_PIN_ENABLE
+- nvidia,high-speed-mode: Integer. Enable high speed mode the pins.
+ normal: TEGRA_PIN_DISABLE
+ high: TEGRA_PIN_ENABLE
+- nvidia,schmitt: Integer. Enables Schmitt Trigger on the input.
+ normal: TEGRA_PIN_DISABLE
+ high: TEGRA_PIN_ENABLE
+- nvidia,drive-type: Integer. Valid range 0...3.
+- nvidia,pull-down-strength: Integer. Controls drive strength. 0 is weakest.
+ The range of valid values depends on the pingroup. See "CAL_DRVDN" in the
+ Tegra TRM.
+- nvidia,pull-up-strength: Integer. Controls drive strength. 0 is weakest.
+ The range of valid values depends on the pingroup. See "CAL_DRVUP" in the
+ Tegra TRM.
+- nvidia,slew-rate-rising: Integer. Controls rising signal slew rate. 0 is
+ fastest. The range of valid values depends on the pingroup. See
+ "DRVDN_SLWR" in the Tegra TRM.
+- nvidia,slew-rate-falling: Integer. Controls falling signal slew rate. 0 is
+ fastest. The range of valid values depends on the pingroup. See
+ "DRVUP_SLWF" in the Tegra TRM.
+
+Valid values for pin and group names (nvidia,pin) are:
+
+ Mux groups:
+
+ These correspond to Tegra PINMUX_AUX_* (pinmux) registers. Any property
+ that exists in those registers may be set for the following pin names.
+
+ In Tegra210, many pins also have a dedicated APB_MISC_GP_*_PADCTRL
+ register. Where that is true, and property that exists in that register
+ may also be set on the following pin names.
+
+ als_prox_int_px3, ap_ready_pv5, ap_wake_bt_ph3, ap_wake_nfc_ph7,
+ aud_mclk_pbb0, batt_bcl, bt_rst_ph4, bt_wake_ap_ph5, button_home_py1,
+ button_power_on_px5, button_slide_sw_py0, button_vol_down_px7,
+ button_vol_up_px6, cam1_mclk_ps0, cam1_pwdn_ps7, cam1_strobe_pt1,
+ cam2_mclk_ps1, cam2_pwdn_pt0, cam_af_en_ps5, cam_flash_en_ps6,
+ cam_i2c_scl_ps2, cam_i2c_sda_ps3, cam_rst_ps4cam_rst_ps4, clk_32k_in,
+ clk_32k_out_py5, clk_req, core_pwr_req, cpu_pwr_req, dap1_din_pb1,
+ dap1_dout_pb2, dap1_fs_pb0, dap1_sclk_pb3, dap2_din_paa2, dap2_dout_paa3,
+ dap2_fs_paa0, dap2_sclk_paa1, dap4_din_pj5, dap4_dout_pj6, dap4_fs_pj4,
+ dap4_sclk_pj7, dmic1_clk_pe0, dmic1_dat_pe1, dmic2_clk_pe2, dmic2_dat_pe3,
+ dmic3_clk_pe4, dmic3_dat_pe5, dp_hpd0_pcc6, dvfs_clk_pbb2, dvfs_pwm_pbb1,
+ gen1_i2c_scl_pj1, gen1_i2c_sda_pj0, gen2_i2c_scl_pj2, gen2_i2c_sda_pj3,
+ gen3_i2c_scl_pf0, gen3_i2c_sda_pf1, gpio_x1_aud_pbb3, gpio_x3_aud_pbb4,
+ gps_en_pi2, gps_rst_pi3, hdmi_cec_pcc0, hdmi_int_dp_hpd_pcc1, jtag_rtck,
+ lcd_bl_en_pv1, lcd_bl_pwm_pv0, lcd_gpio1_pv3, lcd_gpio2_pv4, lcd_rst_pv2,
+ lcd_te_py2, modem_wake_ap_px0, motion_int_px2, nfc_en_pi0, nfc_int_pi1,
+ pa6, pcc7, pe6, pe7, pex_l0_clkreq_n_pa1, pex_l0_rst_n_pa0,
+ pex_l1_clkreq_n_pa4, pex_l1_rst_n_pa3, pex_wake_n_pa2, ph6, pk0, pk1, pk2,
+ pk3, pk4, pk5, pk6, pk7, pl0, pl1, pwr_i2c_scl_py3, pwr_i2c_sda_py4,
+ pwr_int_n, pz0, pz1, pz2, pz3, pz4, pz5, qspi_cs_n_pee1, qspi_io0_pee2,
+ qspi_io1_pee3, qspi_io2_pee4, qspi_io3_pee5, qspi_sck_pee0,
+ sata_led_active_pa5, sdmmc1_clk_pm0, sdmmc1_cmd_pm1, sdmmc1_dat0_pm5,
+ sdmmc1_dat1_pm4, sdmmc1_dat2_pm3, sdmmc1_dat3_pm2, sdmmc3_clk_pp0,
+ sdmmc3_cmd_pp1, sdmmc3_dat0_pp5, sdmmc3_dat1_pp4, sdmmc3_dat2_pp3,
+ sdmmc3_dat3_pp2, shutdown, spdif_in_pcc3, spdif_out_pcc2, spi1_cs0_pc3,
+ spi1_cs1_pc4, spi1_miso_pc1, spi1_mosi_pc0, spi1_sck_pc2, spi2_cs0_pb7,
+ spi2_cs1_pdd0, spi2_miso_pb5, spi2_mosi_pb4, spi2_sck_pb6, spi4_cs0_pc6,
+ spi4_miso_pd0, spi4_mosi_pc7, spi4_sck_pc5, temp_alert_px4, touch_clk_pv7,
+ touch_int_px1, touch_rst_pv6, uart1_cts_pu3, uart1_rts_pu2, uart1_rx_pu1,
+ uart1_tx_pu0, uart2_cts_pg3, uart2_rts_pg2, uart2_rx_pg1, uart2_tx_pg0,
+ uart3_cts_pd4, uart3_rts_pd3, uart3_rx_pd2, uart3_tx_pd1, uart4_cts_pi7,
+ uart4_rts_pi6, uart4_rx_pi5, uart4_tx_pi4, usb_vbus_en0_pcc4,
+ usb_vbus_en1_pcc5, wifi_en_ph0, wifi_rst_ph1, wifi_wake_ap_ph2
+
+ Drive groups:
+
+ These correspond to the Tegra APB_MISC_GP_*_PADCTRL (pad control)
+ registers. Note that where one of these registers controls a single pin
+ for which a PINMUX_AUX_* exists, see the list above for the pin name to
+ use when configuring the pinmux.
+
+ pa6, pcc7, pe6, pe7, ph6, pk0, pk1, pk2, pk3, pk4, pk5, pk6, pk7, pl0, pl1,
+ pz0, pz1, pz2, pz3, pz4, pz5, sdmmc1, sdmmc2, sdmmc3, sdmmc4
+
+Valid values for nvidia,functions are:
+
+ aud, bcl, blink, ccla, cec, cldvfs, clk, core, cpu, displaya, displayb,
+ dmic1, dmic2, dmic3, dp, dtv, extperiph3, i2c1, i2c2, i2c3, i2cpmu, i2cvi,
+ i2s1, i2s2, i2s3, i2s4a, i2s4b, i2s5a, i2s5b, iqc0, iqc1, jtag, pe, pe0,
+ pe1, pmi, pwm0, pwm1, pwm2, pwm3, qspi, rsvd0, rsvd1, rsvd2, rsvd3, sata,
+ sdmmc1, sdmmc3, shutdown, soc, sor0, sor1, spdif, spi1, spi2, spi3, spi4,
+ sys, touch, uart, uarta, uartb, uartc, uartd, usb, vgp1, vgp2, vgp3, vgp4,
+ vgp5, vgp6, vimclk, vimclk2
+
+Example:
+
+ pinmux: pinmux@70000800 {
+ compatible = "nvidia,tegra210-pinmux";
+ reg = <0x0 0x700008d4 0x0 0x2a8>, /* Pad control registers */
+ <0x0 0x70003000 0x0 0x1000>; /* Mux registers */
+
+ pinctrl-names = "boot";
+ pinctrl-0 = <&state_boot>;
+
+ state_boot: pinmux {
+ gen1_i2c_scl_pj1 {
+ nvidia,pins = "gen1_i2c_scl_pj1",
+ nvidia,function = "i2c1";
+ nvidia,pull = <TEGRA_PIN_PULL_NONE>;
+ nvidia,tristate = <TEGRA_PIN_DISABLE>;
+ nvidia,enable-input = <TEGRA_PIN_ENABLE>;
+ nvidia,open-drain = <TEGRA_PIN_ENABLE>;
+ nvidia,io-hv = <TEGRA_PIN_ENABLE>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt
new file mode 100644
index 000000000..0e6354c11
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra30-pinmux.txt
@@ -0,0 +1,144 @@
+NVIDIA Tegra30 pinmux controller
+
+The Tegra30 pinctrl binding is very similar to the Tegra20 pinctrl binding,
+as described in nvidia,tegra20-pinmux.txt. In fact, this document assumes
+that binding as a baseline, and only documents the differences between the
+two bindings.
+
+Required properties:
+- compatible: "nvidia,tegra30-pinmux"
+- reg: Should contain the register physical address and length for each of
+ the pad control and mux registers.
+
+Tegra30 adds the following optional properties for pin configuration subnodes:
+- nvidia,enable-input: Integer. Enable the pin's input path. 0: no, 1: yes.
+- nvidia,open-drain: Integer. Enable open drain mode. 0: no, 1: yes.
+- nvidia,lock: Integer. Lock the pin configuration against further changes
+ until reset. 0: no, 1: yes.
+- nvidia,io-reset: Integer. Reset the IO path. 0: no, 1: yes.
+
+As with Tegra20, see the Tegra TRM for complete details regarding which groups
+support which functionality.
+
+Valid values for pin and group names are:
+
+ per-pin mux groups:
+
+ These all support nvidia,function, nvidia,tristate, nvidia,pull,
+ nvidia,enable-input, nvidia,lock. Some support nvidia,open-drain,
+ nvidia,io-reset.
+
+ clk_32k_out_pa0, uart3_cts_n_pa1, dap2_fs_pa2, dap2_sclk_pa3,
+ dap2_din_pa4, dap2_dout_pa5, sdmmc3_clk_pa6, sdmmc3_cmd_pa7, gmi_a17_pb0,
+ gmi_a18_pb1, lcd_pwr0_pb2, lcd_pclk_pb3, sdmmc3_dat3_pb4, sdmmc3_dat2_pb5,
+ sdmmc3_dat1_pb6, sdmmc3_dat0_pb7, uart3_rts_n_pc0, lcd_pwr1_pc1,
+ uart2_txd_pc2, uart2_rxd_pc3, gen1_i2c_scl_pc4, gen1_i2c_sda_pc5,
+ lcd_pwr2_pc6, gmi_wp_n_pc7, sdmmc3_dat5_pd0, sdmmc3_dat4_pd1, lcd_dc1_pd2,
+ sdmmc3_dat6_pd3, sdmmc3_dat7_pd4, vi_d1_pd5, vi_vsync_pd6, vi_hsync_pd7,
+ lcd_d0_pe0, lcd_d1_pe1, lcd_d2_pe2, lcd_d3_pe3, lcd_d4_pe4, lcd_d5_pe5,
+ lcd_d6_pe6, lcd_d7_pe7, lcd_d8_pf0, lcd_d9_pf1, lcd_d10_pf2, lcd_d11_pf3,
+ lcd_d12_pf4, lcd_d13_pf5, lcd_d14_pf6, lcd_d15_pf7, gmi_ad0_pg0,
+ gmi_ad1_pg1, gmi_ad2_pg2, gmi_ad3_pg3, gmi_ad4_pg4, gmi_ad5_pg5,
+ gmi_ad6_pg6, gmi_ad7_pg7, gmi_ad8_ph0, gmi_ad9_ph1, gmi_ad10_ph2,
+ gmi_ad11_ph3, gmi_ad12_ph4, gmi_ad13_ph5, gmi_ad14_ph6, gmi_ad15_ph7,
+ gmi_wr_n_pi0, gmi_oe_n_pi1, gmi_dqs_pi2, gmi_cs6_n_pi3, gmi_rst_n_pi4,
+ gmi_iordy_pi5, gmi_cs7_n_pi6, gmi_wait_pi7, gmi_cs0_n_pj0, lcd_de_pj1,
+ gmi_cs1_n_pj2, lcd_hsync_pj3, lcd_vsync_pj4, uart2_cts_n_pj5,
+ uart2_rts_n_pj6, gmi_a16_pj7, gmi_adv_n_pk0, gmi_clk_pk1, gmi_cs4_n_pk2,
+ gmi_cs2_n_pk3, gmi_cs3_n_pk4, spdif_out_pk5, spdif_in_pk6, gmi_a19_pk7,
+ vi_d2_pl0, vi_d3_pl1, vi_d4_pl2, vi_d5_pl3, vi_d6_pl4, vi_d7_pl5,
+ vi_d8_pl6, vi_d9_pl7, lcd_d16_pm0, lcd_d17_pm1, lcd_d18_pm2, lcd_d19_pm3,
+ lcd_d20_pm4, lcd_d21_pm5, lcd_d22_pm6, lcd_d23_pm7, dap1_fs_pn0,
+ dap1_din_pn1, dap1_dout_pn2, dap1_sclk_pn3, lcd_cs0_n_pn4, lcd_sdout_pn5,
+ lcd_dc0_pn6, hdmi_int_pn7, ulpi_data7_po0, ulpi_data0_po1, ulpi_data1_po2,
+ ulpi_data2_po3, ulpi_data3_po4, ulpi_data4_po5, ulpi_data5_po6,
+ ulpi_data6_po7, dap3_fs_pp0, dap3_din_pp1, dap3_dout_pp2, dap3_sclk_pp3,
+ dap4_fs_pp4, dap4_din_pp5, dap4_dout_pp6, dap4_sclk_pp7, kb_col0_pq0,
+ kb_col1_pq1, kb_col2_pq2, kb_col3_pq3, kb_col4_pq4, kb_col5_pq5,
+ kb_col6_pq6, kb_col7_pq7, kb_row0_pr0, kb_row1_pr1, kb_row2_pr2,
+ kb_row3_pr3, kb_row4_pr4, kb_row5_pr5, kb_row6_pr6, kb_row7_pr7,
+ kb_row8_ps0, kb_row9_ps1, kb_row10_ps2, kb_row11_ps3, kb_row12_ps4,
+ kb_row13_ps5, kb_row14_ps6, kb_row15_ps7, vi_pclk_pt0, vi_mclk_pt1,
+ vi_d10_pt2, vi_d11_pt3, vi_d0_pt4, gen2_i2c_scl_pt5, gen2_i2c_sda_pt6,
+ sdmmc4_cmd_pt7, pu0, pu1, pu2, pu3, pu4, pu5, pu6, jtag_rtck_pu7, pv0,
+ pv1, pv2, pv3, ddc_scl_pv4, ddc_sda_pv5, crt_hsync_pv6, crt_vsync_pv7,
+ lcd_cs1_n_pw0, lcd_m1_pw1, spi2_cs1_n_pw2, spi2_cs2_n_pw3, clk1_out_pw4,
+ clk2_out_pw5, uart3_txd_pw6, uart3_rxd_pw7, spi2_mosi_px0, spi2_miso_px1,
+ spi2_sck_px2, spi2_cs0_n_px3, spi1_mosi_px4, spi1_sck_px5, spi1_cs0_n_px6,
+ spi1_miso_px7, ulpi_clk_py0, ulpi_dir_py1, ulpi_nxt_py2, ulpi_stp_py3,
+ sdmmc1_dat3_py4, sdmmc1_dat2_py5, sdmmc1_dat1_py6, sdmmc1_dat0_py7,
+ sdmmc1_clk_pz0, sdmmc1_cmd_pz1, lcd_sdin_pz2, lcd_wr_n_pz3, lcd_sck_pz4,
+ sys_clk_req_pz5, pwr_i2c_scl_pz6, pwr_i2c_sda_pz7, sdmmc4_dat0_paa0,
+ sdmmc4_dat1_paa1, sdmmc4_dat2_paa2, sdmmc4_dat3_paa3, sdmmc4_dat4_paa4,
+ sdmmc4_dat5_paa5, sdmmc4_dat6_paa6, sdmmc4_dat7_paa7, pbb0,
+ cam_i2c_scl_pbb1, cam_i2c_sda_pbb2, pbb3, pbb4, pbb5, pbb6, pbb7,
+ cam_mclk_pcc0, pcc1, pcc2, sdmmc4_rst_n_pcc3, sdmmc4_clk_pcc4,
+ clk2_req_pcc5, pex_l2_rst_n_pcc6, pex_l2_clkreq_n_pcc7,
+ pex_l0_prsnt_n_pdd0, pex_l0_rst_n_pdd1, pex_l0_clkreq_n_pdd2,
+ pex_wake_n_pdd3, pex_l1_prsnt_n_pdd4, pex_l1_rst_n_pdd5,
+ pex_l1_clkreq_n_pdd6, pex_l2_prsnt_n_pdd7, clk3_out_pee0, clk3_req_pee1,
+ clk1_req_pee2, hdmi_cec_pee3, clk_32k_in, core_pwr_req, cpu_pwr_req, owr,
+ pwr_int_n.
+
+ drive groups:
+
+ These all support nvidia,pull-down-strength, nvidia,pull-up-strength,
+ nvidia,slew-rate-rising, nvidia,slew-rate-falling. Most but not all
+ support nvidia,high-speed-mode, nvidia,schmitt, nvidia,low-power-mode.
+
+ ao1, ao2, at1, at2, at3, at4, at5, cdev1, cdev2, cec, crt, csus, dap1,
+ dap2, dap3, dap4, dbg, ddc, dev3, gma, gmb, gmc, gmd, gme, gmf, gmg,
+ gmh, gpv, lcd1, lcd2, owr, sdio1, sdio2, sdio3, spi, uaa, uab, uart2,
+ uart3, uda, vi1.
+
+Valid values for nvidia,functions are:
+
+ blink, cec, clk_12m_out, clk_32k_in, core_pwr_req, cpu_pwr_req, crt,
+ dap, ddr, dev3, displaya, displayb, dtv, extperiph1, extperiph2,
+ extperiph3, gmi, gmi_alt, hda, hdcp, hdmi, hsi, i2c1, i2c2, i2c3,
+ i2c4, i2cpwr, i2s0, i2s1, i2s2, i2s3, i2s4, invalid, kbc, mio, nand,
+ nand_alt, owr, pcie, pwm0, pwm1, pwm2, pwm3, pwr_int_n, rsvd1, rsvd2,
+ rsvd3, rsvd4, rtck, sata, sdmmc1, sdmmc2, sdmmc3, sdmmc4, spdif, spi1,
+ spi2, spi2_alt, spi3, spi4, spi5, spi6, sysclk, test, trace, uarta,
+ uartb, uartc, uartd, uarte, ulpi, vgp1, vgp2, vgp3, vgp4, vgp5, vgp6,
+ vi, vi_alt1, vi_alt2, vi_alt3
+
+Example:
+
+ pinctrl@70000000 {
+ compatible = "nvidia,tegra30-pinmux";
+ reg = < 0x70000868 0xd0 /* Pad control registers */
+ 0x70003000 0x3e0 >; /* Mux registers */
+ };
+
+Example board file extract:
+
+ pinctrl@70000000 {
+ sdmmc4_default: pinmux {
+ sdmmc4_clk_pcc4 {
+ nvidia,pins = "sdmmc4_clk_pcc4",
+ "sdmmc4_rst_n_pcc3";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <0>;
+ nvidia,tristate = <0>;
+ };
+ sdmmc4_dat0_paa0 {
+ nvidia,pins = "sdmmc4_dat0_paa0",
+ "sdmmc4_dat1_paa1",
+ "sdmmc4_dat2_paa2",
+ "sdmmc4_dat3_paa3",
+ "sdmmc4_dat4_paa4",
+ "sdmmc4_dat5_paa5",
+ "sdmmc4_dat6_paa6",
+ "sdmmc4_dat7_paa7";
+ nvidia,function = "sdmmc4";
+ nvidia,pull = <2>;
+ nvidia,tristate = <0>;
+ };
+ };
+ };
+
+ sdhci@78000400 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&sdmmc4_default>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
new file mode 100644
index 000000000..b73c96d24
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -0,0 +1,236 @@
+== Introduction ==
+
+Hardware modules that control pin multiplexing or configuration parameters
+such as pull-up/down, tri-state, drive-strength etc are designated as pin
+controllers. Each pin controller must be represented as a node in device tree,
+just like any other hardware module.
+
+Hardware modules whose signals are affected by pin configuration are
+designated client devices. Again, each client device must be represented as a
+node in device tree, just like any other hardware module.
+
+For a client device to operate correctly, certain pin controllers must
+set up certain specific pin configurations. Some client devices need a
+single static pin configuration, e.g. set up during initialization. Others
+need to reconfigure pins at run-time, for example to tri-state pins when the
+device is inactive. Hence, each client device can define a set of named
+states. The number and names of those states is defined by the client device's
+own binding.
+
+The common pinctrl bindings defined in this file provide an infrastructure
+for client device device tree nodes to map those state names to the pin
+configuration used by those states.
+
+Note that pin controllers themselves may also be client devices of themselves.
+For example, a pin controller may set up its own "active" state when the
+driver loads. This would allow representing a board's static pin configuration
+in a single place, rather than splitting it across multiple client device
+nodes. The decision to do this or not somewhat rests with the author of
+individual board device tree files, and any requirements imposed by the
+bindings for the individual client devices in use by that board, i.e. whether
+they require certain specific named states for dynamic pin configuration.
+
+== Pinctrl client devices ==
+
+For each client device individually, every pin state is assigned an integer
+ID. These numbers start at 0, and are contiguous. For each state ID, a unique
+property exists to define the pin configuration. Each state may also be
+assigned a name. When names are used, another property exists to map from
+those names to the integer IDs.
+
+Each client device's own binding determines the set of states that must be
+defined in its device tree node, and whether to define the set of state
+IDs that must be provided, or whether to define the set of state names that
+must be provided.
+
+Required properties:
+pinctrl-0: List of phandles, each pointing at a pin configuration
+ node. These referenced pin configuration nodes must be child
+ nodes of the pin controller that they configure. Multiple
+ entries may exist in this list so that multiple pin
+ controllers may be configured, or so that a state may be built
+ from multiple nodes for a single pin controller, each
+ contributing part of the overall configuration. See the next
+ section of this document for details of the format of these
+ pin configuration nodes.
+
+ In some cases, it may be useful to define a state, but for it
+ to be empty. This may be required when a common IP block is
+ used in an SoC either without a pin controller, or where the
+ pin controller does not affect the HW module in question. If
+ the binding for that IP block requires certain pin states to
+ exist, they must still be defined, but may be left empty.
+
+Optional properties:
+pinctrl-1: List of phandles, each pointing at a pin configuration
+ node within a pin controller.
+...
+pinctrl-n: List of phandles, each pointing at a pin configuration
+ node within a pin controller.
+pinctrl-names: The list of names to assign states. List entry 0 defines the
+ name for integer state ID 0, list entry 1 for state ID 1, and
+ so on.
+
+For example:
+
+ /* For a client device requiring named states */
+ device {
+ pinctrl-names = "active", "idle";
+ pinctrl-0 = <&state_0_node_a>;
+ pinctrl-1 = <&state_1_node_a &state_1_node_b>;
+ };
+
+ /* For the same device if using state IDs */
+ device {
+ pinctrl-0 = <&state_0_node_a>;
+ pinctrl-1 = <&state_1_node_a &state_1_node_b>;
+ };
+
+ /*
+ * For an IP block whose binding supports pin configuration,
+ * but in use on an SoC that doesn't have any pin control hardware
+ */
+ device {
+ pinctrl-names = "active", "idle";
+ pinctrl-0 = <>;
+ pinctrl-1 = <>;
+ };
+
+== Pin controller devices ==
+
+Pin controller devices should contain the pin configuration nodes that client
+devices reference.
+
+For example:
+
+ pincontroller {
+ ... /* Standard DT properties for the device itself elided */
+
+ state_0_node_a {
+ ...
+ };
+ state_1_node_a {
+ ...
+ };
+ state_1_node_b {
+ ...
+ };
+ }
+
+The contents of each of those pin configuration child nodes is defined
+entirely by the binding for the individual pin controller device. There
+exists no common standard for this content.
+
+The pin configuration nodes need not be direct children of the pin controller
+device; they may be grandchildren, for example. Whether this is legal, and
+whether there is any interaction between the child and intermediate parent
+nodes, is again defined entirely by the binding for the individual pin
+controller device.
+
+== Generic pin multiplexing node content ==
+
+pin multiplexing nodes:
+
+function - the mux function to select
+groups - the list of groups to select with this function
+ (either this or "pins" must be specified)
+pins - the list of pins to select with this function (either
+ this or "groups" must be specified)
+
+Example:
+
+state_0_node_a {
+ uart0 {
+ function = "uart0";
+ groups = "u0rxtx", "u0rtscts";
+ };
+};
+state_1_node_a {
+ spi0 {
+ function = "spi0";
+ groups = "spi0pins";
+ };
+};
+state_2_node_a {
+ function = "i2c0";
+ pins = "mfio29", "mfio30";
+};
+
+== Generic pin configuration node content ==
+
+Many data items that are represented in a pin configuration node are common
+and generic. Pin control bindings should use the properties defined below
+where they are applicable; not all of these properties are relevant or useful
+for all hardware or binding structures. Each individual binding document
+should state which of these generic properties, if any, are used, and the
+structure of the DT nodes that contain these properties.
+
+Supported generic properties are:
+
+pins - the list of pins that properties in the node
+ apply to (either this or "group" has to be
+ specified)
+group - the group to apply the properties to, if the driver
+ supports configuration of whole groups rather than
+ individual pins (either this or "pins" has to be
+ specified)
+bias-disable - disable any pin bias
+bias-high-impedance - high impedance mode ("third-state", "floating")
+bias-bus-hold - latch weakly
+bias-pull-up - pull up the pin
+bias-pull-down - pull down the pin
+bias-pull-pin-default - use pin-default pull state
+drive-push-pull - drive actively high and low
+drive-open-drain - drive with open drain
+drive-open-source - drive with open source
+drive-strength - sink or source at most X mA
+input-enable - enable input on pin (no effect on output)
+input-disable - disable input on pin (no effect on output)
+input-schmitt-enable - enable schmitt-trigger mode
+input-schmitt-disable - disable schmitt-trigger mode
+input-debounce - debounce mode with debound time X
+power-source - select between different power supplies
+low-power-enable - enable low power mode
+low-power-disable - disable low power mode
+output-low - set the pin to output mode with low level
+output-high - set the pin to output mode with high level
+slew-rate - set the slew rate
+
+For example:
+
+state_0_node_a {
+ cts_rxd {
+ pins = "GPIO0_AJ5", "GPIO2_AH4"; /* CTS+RXD */
+ bias-pull-up;
+ };
+};
+state_1_node_a {
+ rts_txd {
+ pins = "GPIO1_AJ3", "GPIO3_AH3"; /* RTS+TXD */
+ output-high;
+ };
+};
+state_2_node_a {
+ foo {
+ group = "foo-group";
+ bias-pull-up;
+ };
+};
+
+Some of the generic properties take arguments. For those that do, the
+arguments are described below.
+
+- pins takes a list of pin names or IDs as a required argument. The specific
+ binding for the hardware defines:
+ - Whether the entries are integers or strings, and their meaning.
+
+- bias-pull-up, -down and -pin-default take as optional argument on hardware
+ supporting it the pull strength in Ohm. bias-disable will disable the pull.
+
+- drive-strength takes as argument the target strength in mA.
+
+- input-debounce takes the debounce time in usec as argument
+ or 0 to disable debouncing
+
+More in-depth documentation on these parameters can be found in
+<include/linux/pinctrl/pinconf-generic.h>
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
new file mode 100644
index 000000000..5868a0f72
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
@@ -0,0 +1,145 @@
+* Mediatek MT65XX Pin Controller
+
+The Mediatek's Pin controller is used to control SoC pins.
+
+Required properties:
+- compatible: value should be either of the following.
+ (a) "mediatek,mt8135-pinctrl", compatible with mt8135 pinctrl.
+- mediatek,pctl-regmap: Should be a phandle of the syscfg node.
+- pins-are-numbered: Specify the subnodes are using numbered pinmux to
+ specify pins.
+- gpio-controller : Marks the device node as a gpio controller.
+- #gpio-cells: number of cells in GPIO specifier. Since the generic GPIO
+ binding is used, the amount of cells must be specified as 2. See the below
+ mentioned gpio binding representation for description of particular cells.
+
+ Eg: <&pio 6 0>
+ <[phandle of the gpio controller node]
+ [line number within the gpio controller]
+ [flags]>
+
+ Values for gpio specifier:
+ - Line number: is a value between 0 to 202.
+ - Flags: bit field of flags, as defined in <dt-bindings/gpio/gpio.h>.
+ Only the following flags are supported:
+ 0 - GPIO_ACTIVE_HIGH
+ 1 - GPIO_ACTIVE_LOW
+- reg: physicall address base for EINT registers
+- interrupt-controller: Marks the device node as an interrupt controller
+- #interrupt-cells: Should be two.
+- interrupts : The interrupt outputs from the controller.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices.
+
+Subnode format
+A pinctrl node should contain at least one subnodes representing the
+pinctrl groups available on the machine. Each subnode will list the
+pins it needs, and how they should be configured, with regard to muxer
+configuration, pullups, drive strength, input enable/disable and input schmitt.
+
+ node {
+ pinmux = <PIN_NUMBER_PINMUX>;
+ GENERIC_PINCONFIG;
+ };
+
+Required properties:
+- pinmux: integer array, represents gpio pin number and mux setting.
+ Supported pin number and mux varies for different SoCs, and are defined
+ as macros in boot/dts/<soc>-pinfunc.h directly.
+
+Optional properties:
+- GENERIC_PINCONFIG: is the generic pinconfig options to use, bias-disable,
+ bias-pull-down, bias-pull-up, input-enable, input-disable, output-low, output-high,
+ input-schmitt-enable, input-schmitt-disable and drive-strength are valid.
+
+ Some special pins have extra pull up strength, there are R0 and R1 pull-up
+ resistors available, but for user, it's only need to set R1R0 as 00, 01, 10 or 11.
+ So when config bias-pull-up, it support arguments for those special pins.
+ Some macros have been defined for this usage, such as MTK_PUPD_SET_R1R0_00.
+ See dt-bindings/pinctrl/mt65xx.h.
+
+ When config drive-strength, it can support some arguments, such as
+ MTK_DRIVE_4mA, MTK_DRIVE_6mA, etc. See dt-bindings/pinctrl/mt65xx.h.
+
+Examples:
+
+#include "mt8135-pinfunc.h"
+
+...
+{
+ syscfg_pctl_a: syscfg_pctl_a@10005000 {
+ compatible = "mediatek,mt8135-pctl-a-syscfg", "syscon";
+ reg = <0 0x10005000 0 0x1000>;
+ };
+
+ syscfg_pctl_b: syscfg_pctl_b@1020C020 {
+ compatible = "mediatek,mt8135-pctl-b-syscfg", "syscon";
+ reg = <0 0x1020C020 0 0x1000>;
+ };
+
+ pinctrl@01c20800 {
+ compatible = "mediatek,mt8135-pinctrl";
+ reg = <0 0x1000B000 0 0x1000>;
+ mediatek,pctl-regmap = <&syscfg_pctl_a &syscfg_pctl_b>;
+ pins-are-numbered;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 118 IRQ_TYPE_LEVEL_HIGH>;
+
+ i2c0_pins_a: i2c0@0 {
+ pins1 {
+ pinmux = <MT8135_PIN_100_SDA0__FUNC_SDA0>,
+ <MT8135_PIN_101_SCL0__FUNC_SCL0>;
+ bias-disable;
+ };
+ };
+
+ i2c1_pins_a: i2c1@0 {
+ pins {
+ pinmux = <MT8135_PIN_195_SDA1__FUNC_SDA1>,
+ <MT8135_PIN_196_SCL1__FUNC_SCL1>;
+ bias-pull-up = <55>;
+ };
+ };
+
+ i2c2_pins_a: i2c2@0 {
+ pins1 {
+ pinmux = <MT8135_PIN_193_SDA2__FUNC_SDA2>;
+ bias-pull-down;
+ };
+
+ pins2 {
+ pinmux = <MT8135_PIN_49_WATCHDOG__FUNC_GPIO49>;
+ bias-pull-up;
+ };
+ };
+
+ i2c3_pins_a: i2c3@0 {
+ pins1 {
+ pinmux = <MT8135_PIN_40_DAC_CLK__FUNC_GPIO40>,
+ <MT8135_PIN_41_DAC_WS__FUNC_GPIO41>;
+ bias-pull-up = <55>;
+ };
+
+ pins2 {
+ pinmux = <MT8135_PIN_35_SCL3__FUNC_SCL3>,
+ <MT8135_PIN_36_SDA3__FUNC_SDA3>;
+ output-low;
+ bias-pull-up = <55>;
+ };
+
+ pins3 {
+ pinmux = <MT8135_PIN_57_JTCK__FUNC_GPIO57>,
+ <MT8135_PIN_60_JTDI__FUNC_JTDI>;
+ drive-strength = <32>;
+ };
+ };
+
+ ...
+ }
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-palmas.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-palmas.txt
new file mode 100644
index 000000000..caf297bee
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-palmas.txt
@@ -0,0 +1,96 @@
+Palmas Pincontrol bindings
+
+The pins of Palmas device can be set on different option and provides
+the configuration for Pull UP/DOWN, open drain etc.
+
+Required properties:
+- compatible: It must be one of following:
+ - "ti,palmas-pinctrl" for Palma series of the pincontrol.
+ - "ti,tps65913-pinctrl" for Palma series device TPS65913.
+ - "ti,tps80036-pinctrl" for Palma series device TPS80036.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Palmas's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+list of pins. This configuration can include the mux function to select on
+those pin(s), and various pin configuration parameters, such as pull-up,
+open drain.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+Optional properties:
+- ti,palmas-enable-dvfs1: Enable DVFS1. Configure pins for DVFS1 mode.
+ Selection primary or secondary function associated to I2C2_SCL_SCE,
+ I2C2_SDA_SDO pin/pad for DVFS1 interface
+- ti,palmas-enable-dvfs2: Enable DVFS2. Configure pins for DVFS2 mode.
+ Selection primary or secondary function associated to GPADC_START
+ and SYSEN2 pin/pad for DVFS2 interface
+
+This binding uses the following generic properties as defined in
+pinctrl-bindings.txt:
+
+Required: pins
+Options: function, bias-disable, bias-pull-up, bias-pull-down,
+ drive-open-drain.
+
+Note that many of these properties are only valid for certain specific pins.
+See the Palmas device datasheet for complete details regarding which pins
+support which functionality.
+
+Valid values for pin names are:
+ gpio0, gpio1, gpio2, gpio3, gpio4, gpio5, gpio6, gpio7, gpio8, gpio9,
+ gpio10, gpio11, gpio12, gpio13, gpio14, gpio15, vac, powergood,
+ nreswarm, pwrdown, gpadc_start, reset_in, nsleep, enable1, enable2,
+ int.
+
+Valid value of function names are:
+ gpio, led, pwm, regen, sysen, clk32kgaudio, id, vbus_det, chrg_det,
+ vac, vacok, powergood, usb_psel, msecure, pwrhold, int, nreswarm,
+ simrsto, simrsti, low_vbat, wireless_chrg1, rcm, pwrdown, gpadc_start,
+ reset_in, nsleep, enable.
+
+There are 4 special functions: opt0, opt1, opt2 and opt3. If any of these
+functions is selected then directly pins register will be written with 0, 1, 2
+or 3 respectively if it is valid for that pins or list of pins.
+
+Example:
+ palmas: tps65913 {
+ ....
+ pinctrl {
+ compatible = "ti,tps65913-pinctrl";
+ ti,palmas-enable-dvfs1;
+ pinctrl-names = "default";
+ pinctrl-0 = <&palmas_pins_state>;
+
+ palmas_pins_state: pinmux {
+ gpio0 {
+ pins = "gpio0";
+ function = "id";
+ bias-pull-up;
+ };
+
+ vac {
+ pins = "vac";
+ function = "vacok";
+ bias-pull-down;
+ };
+
+ gpio5 {
+ pins = "gpio5";
+ function = "opt0";
+ drive-open-drain = <1>;
+ };
+ };
+ };
+ ....
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
new file mode 100644
index 000000000..66dcaa9ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt
@@ -0,0 +1,252 @@
+One-register-per-pin type device tree based pinctrl driver
+
+Required properties:
+- compatible : "pinctrl-single" or "pinconf-single".
+ "pinctrl-single" means that pinconf isn't supported.
+ "pinconf-single" means that generic pinconf is supported.
+
+- reg : offset and length of the register set for the mux registers
+
+- pinctrl-single,register-width : pinmux register access width in bits
+
+- pinctrl-single,function-mask : mask of allowed pinmux function bits
+ in the pinmux register
+
+Optional properties:
+- pinctrl-single,function-off : function off mode for disabled state if
+ available and same for all registers; if not specified, disabling of
+ pin functions is ignored
+
+- pinctrl-single,bit-per-mux : boolean to indicate that one register controls
+ more than one pin, for which "pinctrl-single,function-mask" property specifies
+ position mask of pin.
+
+- pinctrl-single,drive-strength : array of value that are used to configure
+ drive strength in the pinmux register. They're value of drive strength
+ current and drive strength mask.
+
+ /* drive strength current, mask */
+ pinctrl-single,power-source = <0x30 0xf0>;
+
+- pinctrl-single,bias-pullup : array of value that are used to configure the
+ input bias pullup in the pinmux register.
+
+ /* input, enabled pullup bits, disabled pullup bits, mask */
+ pinctrl-single,bias-pullup = <0 1 0 1>;
+
+- pinctrl-single,bias-pulldown : array of value that are used to configure the
+ input bias pulldown in the pinmux register.
+
+ /* input, enabled pulldown bits, disabled pulldown bits, mask */
+ pinctrl-single,bias-pulldown = <2 2 0 2>;
+
+ * Two bits to control input bias pullup and pulldown: User should use
+ pinctrl-single,bias-pullup & pinctrl-single,bias-pulldown. One bit means
+ pullup, and the other one bit means pulldown.
+ * Three bits to control input bias enable, pullup and pulldown. User should
+ use pinctrl-single,bias-pullup & pinctrl-single,bias-pulldown. Input bias
+ enable bit should be included in pullup or pulldown bits.
+ * Although driver could set PIN_CONFIG_BIAS_DISABLE, there's no property as
+ pinctrl-single,bias-disable. Because pinctrl single driver could implement
+ it by calling pulldown, pullup disabled.
+
+- pinctrl-single,input-schmitt : array of value that are used to configure
+ input schmitt in the pinmux register. In some silicons, there're two input
+ schmitt value (rising-edge & falling-edge) in the pinmux register.
+
+ /* input schmitt value, mask */
+ pinctrl-single,input-schmitt = <0x30 0x70>;
+
+- pinctrl-single,input-schmitt-enable : array of value that are used to
+ configure input schmitt enable or disable in the pinmux register.
+
+ /* input, enable bits, disable bits, mask */
+ pinctrl-single,input-schmitt-enable = <0x30 0x40 0 0x70>;
+
+- pinctrl-single,low-power-mode : array of value that are used to configure
+ low power mode of this pin. For some silicons, the low power mode will
+ control the output of the pin when the pad including the pin enter low
+ power mode.
+ /* low power mode value, mask */
+ pinctrl-single,low-power-mode = <0x288 0x388>;
+
+- pinctrl-single,gpio-range : list of value that are used to configure a GPIO
+ range. They're value of subnode phandle, pin base in pinctrl device, pin
+ number in this range, GPIO function value of this GPIO range.
+ The number of parameters is depend on #pinctrl-single,gpio-range-cells
+ property.
+
+ /* pin base, nr pins & gpio function */
+ pinctrl-single,gpio-range = <&range 0 3 0 &range 3 9 1>;
+
+- interrupt-controller : standard interrupt controller binding if using
+ interrupts for wake-up events for example. In this case pinctrl-single
+ is set up as a chained interrupt controller and the wake-up interrupts
+ can be requested by the drivers using request_irq().
+
+- #interrupt-cells : standard interrupt binding if using interrupts
+
+This driver assumes that there is only one register for each pin (unless the
+pinctrl-single,bit-per-mux is set), and uses the common pinctrl bindings as
+specified in the pinctrl-bindings.txt document in this directory.
+
+The pin configuration nodes for pinctrl-single are specified as pinctrl
+register offset and value pairs using pinctrl-single,pins. Only the bits
+specified in pinctrl-single,function-mask are updated. For example, setting
+a pin for a device could be done with:
+
+ pinctrl-single,pins = <0xdc 0x118>;
+
+Where 0xdc is the offset from the pinctrl register base address for the
+device pinctrl register, and 0x118 contains the desired value of the
+pinctrl register. See the device example and static board pins example
+below for more information.
+
+In case when one register changes more than one pin's mux the
+pinctrl-single,bits need to be used which takes three parameters:
+
+ pinctrl-single,bits = <0xdc 0x18 0xff>;
+
+Where 0xdc is the offset from the pinctrl register base address for the
+device pinctrl register, 0x18 is the desired value, and 0xff is the sub mask to
+be used when applying this change to the register.
+
+
+Optional sub-node: In case some pins could be configured as GPIO in the pinmux
+register, those pins could be defined as a GPIO range. This sub-node is required
+by pinctrl-single,gpio-range property.
+
+Required properties in sub-node:
+- #pinctrl-single,gpio-range-cells : the number of parameters after phandle in
+ pinctrl-single,gpio-range property.
+
+ range: gpio-range {
+ #pinctrl-single,gpio-range-cells = <3>;
+ };
+
+
+Example:
+
+/* SoC common file */
+
+/* first controller instance for pins in core domain */
+pmx_core: pinmux@4a100040 {
+ compatible = "pinctrl-single";
+ reg = <0x4a100040 0x0196>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ pinctrl-single,register-width = <16>;
+ pinctrl-single,function-mask = <0xffff>;
+};
+
+/* second controller instance for pins in wkup domain */
+pmx_wkup: pinmux@4a31e040 {
+ compatible = "pinctrl-single";
+ reg = <0x4a31e040 0x0038>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ pinctrl-single,register-width = <16>;
+ pinctrl-single,function-mask = <0xffff>;
+};
+
+control_devconf0: pinmux@48002274 {
+ compatible = "pinctrl-single";
+ reg = <0x48002274 4>; /* Single register */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-single,bit-per-mux;
+ pinctrl-single,register-width = <32>;
+ pinctrl-single,function-mask = <0x5F>;
+};
+
+/* third controller instance for pins in gpio domain */
+pmx_gpio: pinmux@d401e000 {
+ compatible = "pinconf-single";
+ reg = <0xd401e000 0x0330>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ pinctrl-single,register-width = <32>;
+ pinctrl-single,function-mask = <7>;
+
+ /* sparse GPIO range could be supported */
+ pinctrl-single,gpio-range = <&range 0 3 0 &range 3 9 1
+ &range 12 1 0 &range 13 29 1
+ &range 43 1 0 &range 44 49 1
+ &range 94 1 1 &range 96 2 1>;
+
+ range: gpio-range {
+ #pinctrl-single,gpio-range-cells = <3>;
+ };
+};
+
+
+/* board specific .dts file */
+
+&pmx_core {
+
+ /*
+ * map all board specific static pins enabled by the pinctrl driver
+ * itself during the boot (or just set them up in the bootloader)
+ */
+ pinctrl-names = "default";
+ pinctrl-0 = <&board_pins>;
+
+ board_pins: pinmux_board_pins {
+ pinctrl-single,pins = <
+ 0x6c 0xf
+ 0x6e 0xf
+ 0x70 0xf
+ 0x72 0xf
+ >;
+ };
+
+ uart0_pins: pinmux_uart0_pins {
+ pinctrl-single,pins = <
+ 0x208 0 /* UART0_RXD (IOCFG138) */
+ 0x20c 0 /* UART0_TXD (IOCFG139) */
+ >;
+ pinctrl-single,bias-pulldown = <0 2 2>;
+ pinctrl-single,bias-pullup = <0 1 1>;
+ };
+
+ /* map uart2 pins */
+ uart2_pins: pinmux_uart2_pins {
+ pinctrl-single,pins = <
+ 0xd8 0x118
+ 0xda 0
+ 0xdc 0x118
+ 0xde 0
+ >;
+ };
+};
+
+&control_devconf0 {
+ mcbsp1_pins: pinmux_mcbsp1_pins {
+ pinctrl-single,bits = <
+ 0x00 0x18 0x18 /* FSR/CLKR signal from FSX/CLKX pin */
+ >;
+ };
+
+ mcbsp2_clks_pins: pinmux_mcbsp2_clks_pins {
+ pinctrl-single,bits = <
+ 0x00 0x40 0x40 /* McBSP2 CLKS from McBSP_CLKS pin */
+ >;
+ };
+
+};
+
+&uart1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart0_pins>;
+};
+
+&uart2 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart2_pins>;
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt
new file mode 100644
index 000000000..5f55be59d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-sirf.txt
@@ -0,0 +1,47 @@
+CSR SiRFprimaII pinmux controller
+
+Required properties:
+- compatible : "sirf,prima2-pinctrl"
+- reg : Address range of the pinctrl registers
+- interrupts : Interrupts used by every GPIO group
+- gpio-controller : Indicates this device is a GPIO controller
+- interrupt-controller : Marks the device node as an interrupt controller
+Optional properties:
+- sirf,pullups : if n-th bit of m-th bank is set, set a pullup on GPIO-n of bank m
+- sirf,pulldowns : if n-th bit of m-th bank is set, set a pulldown on GPIO-n of bank m
+
+Please refer to pinctrl-bindings.txt in this directory for details of the common
+pinctrl bindings used by client devices.
+
+SiRFprimaII's pinmux nodes act as a container for an arbitrary number of subnodes.
+Each of these subnodes represents some desired configuration for a group of pins.
+
+Required subnode-properties:
+- sirf,pins : An array of strings. Each string contains the name of a group.
+- sirf,function: A string containing the name of the function to mux to the
+ group.
+
+ Valid values for group and function names can be found from looking at the
+ group and function arrays in driver files:
+ drivers/pinctrl/pinctrl-sirf.c
+
+For example, pinctrl might have subnodes like the following:
+ uart2_pins_a: uart2@0 {
+ uart {
+ sirf,pins = "uart2grp";
+ sirf,function = "uart2";
+ };
+ };
+ uart2_noflow_pins_a: uart2@1 {
+ uart {
+ sirf,pins = "uart2_nostreamctrlgrp";
+ sirf,function = "uart2_nostreamctrl";
+ };
+ };
+
+For a specific board, if it wants to use uart2 without hardware flow control,
+it can add the following to its board-specific .dts file.
+uart2: uart@0xb0070000 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart2_noflow_pins_a>;
+}
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-st.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-st.txt
new file mode 100644
index 000000000..26bcb18f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-st.txt
@@ -0,0 +1,171 @@
+*ST pin controller.
+
+Each multi-function pin is controlled, driven and routed through the
+PIO multiplexing block. Each pin supports GPIO functionality (ALT0)
+and multiple alternate functions(ALT1 - ALTx) that directly connect
+the pin to different hardware blocks.
+
+When a pin is in GPIO mode, Output Enable (OE), Open Drain(OD), and
+Pull Up (PU) are driven by the related PIO block.
+
+ST pinctrl driver controls PIO multiplexing block and also interacts with
+gpio driver to configure a pin.
+
+GPIO bank can have one of the two possible types of interrupt-wirings.
+
+First type is via irqmux, single interrupt is used by multiple gpio banks. This
+reduces number of overall interrupts numbers required. All these banks belong to
+a single pincontroller.
+ _________
+ | |----> [gpio-bank (n) ]
+ | |----> [gpio-bank (n + 1)]
+ [irqN]-- | irq-mux |----> [gpio-bank (n + 2)]
+ | |----> [gpio-bank (... )]
+ |_________|----> [gpio-bank (n + 7)]
+
+Second type has a dedicated interrupt per gpio bank.
+
+ [irqN]----> [gpio-bank (n)]
+
+
+Pin controller node:
+Required properties:
+- compatible : should be "st,<SOC>-<pio-block>-pinctrl"
+ like st,stih415-sbc-pinctrl, st,stih415-front-pinctrl and so on.
+- st,syscfg : Should be a phandle of the syscfg node.
+- st,retime-pin-mask : Should be mask to specify which pins can be retimed.
+ If the property is not present, it is assumed that all the pins in the
+ bank are capable of retiming. Retiming is mainly used to improve the
+ IO timing margins of external synchronous interfaces.
+- ranges : defines mapping between pin controller node (parent) to gpio-bank
+ node (children).
+
+Optional properties:
+- interrupts : Interrupt number of the irqmux. If the interrupt is shared
+ with other gpio banks via irqmux.
+ a irqline and gpio banks.
+- reg : irqmux memory resource. If irqmux is present.
+- reg-names : irqmux resource should be named as "irqmux".
+
+GPIO controller/bank node.
+Required properties:
+- gpio-controller : Indicates this device is a GPIO controller
+- #gpio-cells : Should be one. The first cell is the pin number.
+- st,bank-name : Should be a name string for this bank as specified in
+ datasheet.
+
+Optional properties:
+- interrupts : Interrupt number for this gpio bank. If there is a dedicated
+ interrupt wired up for this gpio bank.
+
+- interrupt-controller : Indicates this device is a interrupt controller. GPIO
+ bank can be an interrupt controller iff one of the interrupt type either via
+irqmux or a dedicated interrupt per bank is specified.
+
+- #interrupt-cells: the value of this property should be 2.
+ - First Cell: represents the external gpio interrupt number local to the
+ gpio interrupt space of the controller.
+ - Second Cell: flags to identify the type of the interrupt
+ - 1 = rising edge triggered
+ - 2 = falling edge triggered
+ - 3 = rising and falling edge triggered
+ - 4 = high level triggered
+ - 8 = low level triggered
+for related macros look in:
+include/dt-bindings/interrupt-controller/irq.h
+
+Example:
+ pin-controller-sbc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "st,stih415-sbc-pinctrl";
+ st,syscfg = <&syscfg_sbc>;
+ reg = <0xfe61f080 0x4>;
+ reg-names = "irqmux";
+ interrupts = <GIC_SPI 180 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "irqmux";
+ ranges = <0 0xfe610000 0x5000>;
+
+ PIO0: gpio@fe610000 {
+ gpio-controller;
+ #gpio-cells = <1>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ reg = <0 0x100>;
+ st,bank-name = "PIO0";
+ };
+ ...
+ pin-functions nodes follow...
+ };
+
+
+Contents of function subnode node:
+----------------------
+Required properties for pin configuration node:
+- st,pins : Child node with list of pins with configuration.
+
+Below is the format of how each pin conf should look like.
+
+<bank offset mux mode rt_type rt_delay rt_clk>
+
+Every PIO is represented with 4-7 parameters depending on retime configuration.
+Each parameter is explained as below.
+
+-bank : Should be bank phandle to which this PIO belongs.
+-offset : Offset in the PIO bank.
+-mux : Should be alternate function number associated this pin.
+ Use same numbers from datasheet.
+-mode :pin configuration is selected from one of the below values.
+ IN
+ IN_PU
+ OUT
+ BIDIR
+ BIDIR_PU
+
+-rt_type Retiming Configuration for the pin.
+ Possible retime configuration are:
+
+ ------- -------------
+ value args
+ ------- -------------
+ NICLK <delay> <clk>
+ ICLK_IO <delay> <clk>
+ BYPASS <delay>
+ DE_IO <delay> <clk>
+ SE_ICLK_IO <delay> <clk>
+ SE_NICLK_IO <delay> <clk>
+
+- delay is retime delay in pico seconds as mentioned in data sheet.
+
+- rt_clk :clk to be use for retime.
+ Possible values are:
+ CLK_A
+ CLK_B
+ CLK_C
+ CLK_D
+
+Example of mmcclk pin which is a bi-direction pull pu with retime config
+as non inverted clock retimed with CLK_B and delay of 0 pico seconds:
+
+pin-controller {
+ ...
+ mmc0 {
+ pinctrl_mmc: mmc {
+ st,pins {
+ mmcclk = <&PIO13 4 ALT4 BIDIR_PU NICLK 0 CLK_B>;
+ ...
+ };
+ };
+ ...
+ };
+};
+
+sdhci0:sdhci@fe810000{
+ ...
+ interrupt-parent = <&PIO3>;
+ #interrupt-cells = <2>;
+ interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; /* Interrupt line via PIO3-3 */
+ interrupt-names = "card-detect";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_mmc>;
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-vt8500.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-vt8500.txt
new file mode 100644
index 000000000..b3aa90f0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-vt8500.txt
@@ -0,0 +1,57 @@
+VIA VT8500 and Wondermedia WM8xxx-series pinmux/gpio controller
+
+These SoCs contain a combined Pinmux/GPIO module. Each pin may operate as
+either a GPIO in, GPIO out or as an alternate function (I2C, SPI etc).
+
+Required properties:
+- compatible: "via,vt8500-pinctrl", "wm,wm8505-pinctrl", "wm,wm8650-pinctrl",
+ "wm8750-pinctrl" or "wm,wm8850-pinctrl"
+- reg: Should contain the physical address of the module's registers.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be two.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters.
+ bit 0 - active low
+
+Please refer to ../gpio/gpio.txt for a general description of GPIO bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Each pin configuration node lists the pin(s) to which it applies, and one or
+more of the mux functions to select on those pin(s), and pull-up/down
+configuration. Each subnode only affects those parameters that are explicitly
+listed. In other words, a subnode that lists only a mux function implies no
+information about any pull configuration. Similarly, a subnode that lists only
+a pull parameter implies no information about the mux function.
+
+Required subnode-properties:
+- wm,pins: An array of cells. Each cell contains the ID of a pin.
+
+Optional subnode-properties:
+- wm,function: Integer, containing the function to mux to the pin(s):
+ 0: GPIO in
+ 1: GPIO out
+ 2: alternate
+
+- wm,pull: Integer, representing the pull-down/up to apply to the pin(s):
+ 0: none
+ 1: down
+ 2: up
+
+Each of wm,function and wm,pull may contain either a single value which
+will be applied to all pins in wm,pins, or one value for each entry in
+wm,pins.
+
+Example:
+
+ pinctrl: pinctrl {
+ compatible = "wm,wm8505-pinctrl";
+ reg = <0xD8110000 0x10000>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt
new file mode 100644
index 000000000..458615596
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl_spear.txt
@@ -0,0 +1,155 @@
+ST Microelectronics, SPEAr pinmux controller
+
+Required properties:
+- compatible : "st,spear300-pinmux"
+ : "st,spear310-pinmux"
+ : "st,spear320-pinmux"
+ : "st,spear1310-pinmux"
+ : "st,spear1340-pinmux"
+- reg : Address range of the pinctrl registers
+- st,pinmux-mode: Mandatory for SPEAr300 and SPEAr320 and invalid for others.
+ - Its values for SPEAr300:
+ - NAND_MODE : <0>
+ - NOR_MODE : <1>
+ - PHOTO_FRAME_MODE : <2>
+ - LEND_IP_PHONE_MODE : <3>
+ - HEND_IP_PHONE_MODE : <4>
+ - LEND_WIFI_PHONE_MODE : <5>
+ - HEND_WIFI_PHONE_MODE : <6>
+ - ATA_PABX_WI2S_MODE : <7>
+ - ATA_PABX_I2S_MODE : <8>
+ - CAML_LCDW_MODE : <9>
+ - CAMU_LCD_MODE : <10>
+ - CAMU_WLCD_MODE : <11>
+ - CAML_LCD_MODE : <12>
+ - Its values for SPEAr320:
+ - AUTO_NET_SMII_MODE : <0>
+ - AUTO_NET_MII_MODE : <1>
+ - AUTO_EXP_MODE : <2>
+ - SMALL_PRINTERS_MODE : <3>
+ - EXTENDED_MODE : <4>
+
+Please refer to pinctrl-bindings.txt in this directory for details of the common
+pinctrl bindings used by client devices.
+
+SPEAr's pinmux nodes act as a container for an arbitrary number of subnodes. Each
+of these subnodes represents muxing for a pin, a group, or a list of pins or
+groups.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Required subnode-properties:
+- st,pins : An array of strings. Each string contains the name of a pin or
+ group.
+- st,function: A string containing the name of the function to mux to the pin or
+ group. See the SPEAr's TRM to determine which are valid for each pin or group.
+
+ Valid values for group and function names can be found from looking at the
+ group and function arrays in driver files:
+ drivers/pinctrl/spear/pinctrl-spear3*0.c
+
+Valid values for group names are:
+For All SPEAr3xx machines:
+ "firda_grp", "i2c0_grp", "ssp_cs_grp", "ssp0_grp", "mii0_grp",
+ "gpio0_pin0_grp", "gpio0_pin1_grp", "gpio0_pin2_grp", "gpio0_pin3_grp",
+ "gpio0_pin4_grp", "gpio0_pin5_grp", "uart0_ext_grp", "uart0_grp",
+ "timer_0_1_grp", timer_0_1_pins, "timer_2_3_grp"
+
+For SPEAr300 machines:
+ "fsmc_2chips_grp", "fsmc_4chips_grp", "clcd_lcdmode_grp",
+ "clcd_pfmode_grp", "tdm_grp", "i2c_clk_grp_grp", "caml_grp", "camu_grp",
+ "dac_grp", "i2s_grp", "sdhci_4bit_grp", "sdhci_8bit_grp",
+ "gpio1_0_to_3_grp", "gpio1_4_to_7_grp"
+
+For SPEAr310 machines:
+ "emi_cs_0_to_5_grp", "uart1_grp", "uart2_grp", "uart3_grp", "uart4_grp",
+ "uart5_grp", "fsmc_grp", "rs485_0_grp", "rs485_1_grp", "tdm_grp"
+
+For SPEAr320 machines:
+ "clcd_grp", "emi_grp", "fsmc_8bit_grp", "fsmc_16bit_grp", "spp_grp",
+ "sdhci_led_grp", "sdhci_cd_12_grp", "sdhci_cd_51_grp", "i2s_grp",
+ "uart1_grp", "uart1_modem_2_to_7_grp", "uart1_modem_31_to_36_grp",
+ "uart1_modem_34_to_45_grp", "uart1_modem_80_to_85_grp", "uart2_grp",
+ "uart3_8_9_grp", "uart3_15_16_grp", "uart3_41_42_grp",
+ "uart3_52_53_grp", "uart3_73_74_grp", "uart3_94_95_grp",
+ "uart3_98_99_grp", "uart4_6_7_grp", "uart4_13_14_grp",
+ "uart4_39_40_grp", "uart4_71_72_grp", "uart4_92_93_grp",
+ "uart4_100_101_grp", "uart5_4_5_grp", "uart5_37_38_grp",
+ "uart5_69_70_grp", "uart5_90_91_grp", "uart6_2_3_grp",
+ "uart6_88_89_grp", "rs485_grp", "touchscreen_grp", "can0_grp",
+ "can1_grp", "pwm0_1_pin_8_9_grp", "pwm0_1_pin_14_15_grp",
+ "pwm0_1_pin_30_31_grp", "pwm0_1_pin_37_38_grp", "pwm0_1_pin_42_43_grp",
+ "pwm0_1_pin_59_60_grp", "pwm0_1_pin_88_89_grp", "pwm2_pin_7_grp",
+ "pwm2_pin_13_grp", "pwm2_pin_29_grp", "pwm2_pin_34_grp",
+ "pwm2_pin_41_grp", "pwm2_pin_58_grp", "pwm2_pin_87_grp",
+ "pwm3_pin_6_grp", "pwm3_pin_12_grp", "pwm3_pin_28_grp",
+ "pwm3_pin_40_grp", "pwm3_pin_57_grp", "pwm3_pin_86_grp",
+ "ssp1_17_20_grp", "ssp1_36_39_grp", "ssp1_48_51_grp", "ssp1_65_68_grp",
+ "ssp1_94_97_grp", "ssp2_13_16_grp", "ssp2_32_35_grp", "ssp2_44_47_grp",
+ "ssp2_61_64_grp", "ssp2_90_93_grp", "mii2_grp", "smii0_1_grp",
+ "rmii0_1_grp", "i2c1_8_9_grp", "i2c1_98_99_grp", "i2c2_0_1_grp",
+ "i2c2_2_3_grp", "i2c2_19_20_grp", "i2c2_75_76_grp", "i2c2_96_97_grp"
+
+For SPEAr1310 machines:
+ "i2c0_grp", "ssp0_grp", "ssp0_cs0_grp", "ssp0_cs1_2_grp", "i2s0_grp",
+ "i2s1_grp", "clcd_grp", "clcd_high_res_grp", "arm_gpio_grp",
+ "smi_2_chips_grp", "smi_4_chips_grp", "gmii_grp", "rgmii_grp",
+ "smii_0_1_2_grp", "ras_mii_txclk_grp", "nand_8bit_grp",
+ "nand_16bit_grp", "nand_4_chips_grp", "keyboard_6x6_grp",
+ "keyboard_rowcol6_8_grp", "uart0_grp", "uart0_modem_grp",
+ "gpt0_tmr0_grp", "gpt0_tmr1_grp", "gpt1_tmr0_grp", "gpt1_tmr1_grp",
+ "sdhci_grp", "cf_grp", "xd_grp", "touch_xy_grp",
+ "uart1_disable_i2c_grp", "uart1_disable_sd_grp", "uart2_3_grp",
+ "uart4_grp", "uart5_grp", "rs485_0_1_tdm_0_1_grp", "i2c_1_2_grp",
+ "i2c3_dis_smi_clcd_grp", "i2c3_dis_sd_i2s0_grp", "i2c_4_5_dis_smi_grp",
+ "i2c4_dis_sd_grp", "i2c5_dis_sd_grp", "i2c_6_7_dis_kbd_grp",
+ "i2c6_dis_sd_grp", "i2c7_dis_sd_grp", "can0_dis_nor_grp",
+ "can0_dis_sd_grp", "can1_dis_sd_grp", "can1_dis_kbd_grp", "pcie0_grp",
+ "pcie1_grp", "pcie2_grp", "sata0_grp", "sata1_grp", "sata2_grp",
+ "ssp1_dis_kbd_grp", "ssp1_dis_sd_grp", "gpt64_grp"
+
+For SPEAr1340 machines:
+ "pads_as_gpio_grp", "fsmc_8bit_grp", "fsmc_16bit_grp", "fsmc_pnor_grp",
+ "keyboard_row_col_grp", "keyboard_col5_grp", "spdif_in_grp",
+ "spdif_out_grp", "gpt_0_1_grp", "pwm0_grp", "pwm1_grp", "pwm2_grp",
+ "pwm3_grp", "vip_mux_grp", "vip_mux_cam0_grp", "vip_mux_cam1_grp",
+ "vip_mux_cam2_grp", "vip_mux_cam3_grp", "cam0_grp", "cam1_grp",
+ "cam2_grp", "cam3_grp", "smi_grp", "ssp0_grp", "ssp0_cs1_grp",
+ "ssp0_cs2_grp", "ssp0_cs3_grp", "uart0_grp", "uart0_enh_grp",
+ "uart1_grp", "i2s_in_grp", "i2s_out_grp", "gmii_grp", "rgmii_grp",
+ "rmii_grp", "sgmii_grp", "i2c0_grp", "i2c1_grp", "cec0_grp", "cec1_grp",
+ "sdhci_grp", "cf_grp", "xd_grp", "clcd_grp", "arm_trace_grp",
+ "miphy_dbg_grp", "pcie_grp", "sata_grp"
+
+Valid values for function names are:
+For All SPEAr3xx machines:
+ "firda", "i2c0", "ssp_cs", "ssp0", "mii0", "gpio0", "uart0_ext",
+ "uart0", "timer_0_1", "timer_2_3"
+
+For SPEAr300 machines:
+ "fsmc", "clcd", "tdm", "i2c1", "cam", "dac", "i2s", "sdhci", "gpio1"
+
+For SPEAr310 machines:
+ "emi", "uart1", "uart2", "uart3", "uart4", "uart5", "fsmc", "rs485_0",
+ "rs485_1", "tdm"
+
+For SPEAr320 machines:
+ "clcd", "emi", "fsmc", "spp", "sdhci", "i2s", "uart1", "uart1_modem",
+ "uart2", "uart3", "uart4", "uart5", "uart6", "rs485", "touchscreen",
+ "can0", "can1", "pwm0_1", "pwm2", "pwm3", "ssp1", "ssp2", "mii2",
+ "mii0_1", "i2c1", "i2c2"
+
+
+For SPEAr1310 machines:
+ "i2c0", "ssp0", "i2s0", "i2s1", "clcd", "arm_gpio", "smi", "gmii",
+ "rgmii", "smii_0_1_2", "ras_mii_txclk", "nand", "keyboard", "uart0",
+ "gpt0", "gpt1", "sdhci", "cf", "xd", "touchscreen", "uart1", "uart2_3",
+ "uart4", "uart5", "rs485_0_1_tdm_0_1", "i2c_1_2", "i2c3_i2s1",
+ "i2c_4_5", "i2c_6_7", "can0", "can1", "pci", "sata", "ssp1", "gpt64"
+
+For SPEAr1340 machines:
+ "pads_as_gpio", "fsmc", "keyboard", "spdif_in", "spdif_out", "gpt_0_1",
+ "pwm", "vip", "cam0", "cam1", "cam2", "cam3", "smi", "ssp0", "uart0",
+ "uart1", "i2s", "gmac", "i2c0", "i2c1", "cec0", "cec1", "sdhci", "cf",
+ "xd", "clcd", "arm_trace", "miphy_dbg", "pcie", "sata"
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt
new file mode 100644
index 000000000..a7bde6479
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8064-pinctrl.txt
@@ -0,0 +1,88 @@
+Qualcomm APQ8064 TLMM block
+
+Required properties:
+- compatible: "qcom,apq8064-pinctrl"
+- reg: Should be the base address and length of the TLMM block.
+- interrupts: Should be the parent IRQ of the TLMM block.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be two.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells : Should be two.
+ The first cell is the gpio pin number and the
+ second cell is used for optional parameters.
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Qualcomm's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+ pins, function, bias-disable, bias-pull-down, bias-pull,up, drive-strength,
+ output-low, output-high.
+
+Non-empty subnodes must specify the 'pins' property.
+
+Valid values for pins are:
+ gpio0-gpio89
+
+Valid values for function are:
+ cam_mclk, codec_mic_i2s, codec_spkr_i2s, gpio, gsbi1, gsbi2, gsbi3, gsbi4,
+ gsbi4_cam_i2c, gsbi5, gsbi5_spi_cs1, gsbi5_spi_cs2, gsbi5_spi_cs3, gsbi6,
+ gsbi6_spi_cs1, gsbi6_spi_cs2, gsbi6_spi_cs3, gsbi7, gsbi7_spi_cs1,
+ gsbi7_spi_cs2, gsbi7_spi_cs3, gsbi_cam_i2c, hdmi, mi2s, riva_bt, riva_fm,
+ riva_wlan, sdc2, sdc4, slimbus, spkr_i2s, tsif1, tsif2, usb2_hsic, ps_hold
+
+Example:
+
+ msmgpio: pinctrl@800000 {
+ compatible = "qcom,apq8064-pinctrl";
+ reg = <0x800000 0x4000>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <0 16 0x4>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&gsbi5_uart_default>;
+
+ gsbi5_uart_default: gsbi5_uart_default {
+ mux {
+ pins = "gpio51", "gpio52";
+ function = "gsbi5";
+ };
+
+ tx {
+ pins = "gpio51";
+ drive-strength = <4>;
+ bias-disable;
+ };
+
+ rx {
+ pins = "gpio52";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt
new file mode 100644
index 000000000..c4ea61ac5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,apq8084-pinctrl.txt
@@ -0,0 +1,179 @@
+Qualcomm APQ8084 TLMM block
+
+This binding describes the Top Level Mode Multiplexer block found in the
+MSM8960 platform.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be "qcom,apq8084-pinctrl"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the base address and size of the TLMM register space.
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: should specify the TLMM summary IRQ.
+
+- interrupt-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as an interrupt controller
+
+- #interrupt-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/interrupt-controller/irq.h>
+
+- gpio-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as a gpio controller
+
+- #gpio-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/gpio/gpio.h>
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+
+PIN CONFIGURATION NODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+ Usage: required
+ Value type: <string-array>
+ Definition: List of gpio pins affected by the properties specified in
+ this subnode. Valid pins are:
+ gpio0-gpio146,
+ sdc1_clk,
+ sdc1_cmd,
+ sdc1_data
+ sdc2_clk,
+ sdc2_cmd,
+ sdc2_data
+
+- function:
+ Usage: required
+ Value type: <string>
+ Definition: Specify the alternative function to be configured for the
+ specified pins. Functions are only valid for gpio pins.
+ Valid values are:
+ adsp_ext, audio_ref, blsp_i2c1, blsp_i2c2, blsp_i2c3,
+ blsp_i2c4, blsp_i2c5, blsp_i2c6, blsp_i2c7, blsp_i2c8,
+ blsp_i2c9, blsp_i2c10, blsp_i2c11, blsp_i2c12,
+ blsp_spi1, blsp_spi2, blsp_spi3, blsp_spi4, blsp_spi5,
+ blsp_spi6, blsp_spi7, blsp_spi8, blsp_spi9, blsp_spi10,
+ blsp_spi11, blsp_spi12, blsp_uart1, blsp_uart2, blsp_uart3,
+ blsp_uart4, blsp_uart5, blsp_uart6, blsp_uart7, blsp_uart8,
+ blsp_uart9, blsp_uart10, blsp_uart11, blsp_uart12,
+ blsp_uim1, blsp_uim2, blsp_uim3, blsp_uim4, blsp_uim5,
+ blsp_uim6, blsp_uim7, blsp_uim8, blsp_uim9, blsp_uim10,
+ blsp_uim11, blsp_uim12, cam_mclk0, cam_mclk1, cam_mclk2,
+ cam_mclk3, cci_async, cci_async_in0, cci_i2c0, cci_i2c1,
+ cci_timer0, cci_timer1, cci_timer2, cci_timer3, cci_timer4,
+ edp_hpd, gcc_gp1, gcc_gp2, gcc_gp3, gcc_obt, gcc_vtt,i
+ gp_mn, gp_pdm0, gp_pdm1, gp_pdm2, gp0_clk, gp1_clk, gpio,
+ hdmi_cec, hdmi_ddc, hdmi_dtest, hdmi_hpd, hdmi_rcv, hsic,
+ ldo_en, ldo_update, mdp_vsync, pci_e0, pci_e0_n, pci_e0_rst,
+ pci_e1, pci_e1_rst, pci_e1_rst_n, pci_e1_clkreq_n, pri_mi2s,
+ qua_mi2s, sata_act, sata_devsleep, sata_devsleep_n,
+ sd_write, sdc_emmc_mode, sdc3, sdc4, sec_mi2s, slimbus,
+ spdif_tx, spkr_i2s, spkr_i2s_ws, spss_geni, ter_mi2s, tsif1,
+ tsif2, uim, uim_batt_alarm
+
+- bias-disable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as no pull.
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull down.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull up.
+
+- output-high:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ high.
+ Not valid for sdc pins.
+
+- output-low:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ low.
+ Not valid for sdc pins.
+
+- drive-strength:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the drive strength for the specified pins, in mA.
+ Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16
+
+Example:
+
+ tlmm: pinctrl@fd510000 {
+ compatible = "qcom,apq8084-pinctrl";
+ reg = <0xfd510000 0x4000>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <0 208 0>;
+
+ uart2: uart2-default {
+ mux {
+ pins = "gpio4", "gpio5";
+ function = "blsp_uart2";
+ };
+
+ tx {
+ pins = "gpio4";
+ drive-strength = <4>;
+ bias-disable;
+ };
+
+ rx {
+ pins = "gpio5";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt
new file mode 100644
index 000000000..6e88e91fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,ipq8064-pinctrl.txt
@@ -0,0 +1,95 @@
+Qualcomm IPQ8064 TLMM block
+
+Required properties:
+- compatible: "qcom,ipq8064-pinctrl"
+- reg: Should be the base address and length of the TLMM block.
+- interrupts: Should be the parent IRQ of the TLMM block.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be two.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells : Should be two.
+ The first cell is the gpio pin number and the
+ second cell is used for optional parameters.
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Qualcomm's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+ pins, function, bias-disable, bias-pull-down, bias-pull,up, drive-strength,
+ output-low, output-high.
+
+Non-empty subnodes must specify the 'pins' property.
+
+Valid values for qcom,pins are:
+ gpio0-gpio68
+ Supports mux, bias, and drive-strength
+
+ sdc3_clk, sdc3_cmd, sdc3_data
+ Supports bias and drive-strength
+
+
+Valid values for function are:
+ mdio, mi2s, pdm, ssbi, spmi, audio_pcm, gpio, gsbi1, gsbi2, gsbi4, gsbi5,
+ gsbi5_spi_cs1, gsbi5_spi_cs2, gsbi5_spi_cs3, gsbi6, gsbi7, nss_spi, sdc1,
+ spdif, nand, tsif1, tsif2, usb_fs_n, usb_fs, usb2_hsic, rgmii2, sata,
+ pcie1_rst, pcie1_prsnt, pcie1_pwren_n, pcie1_pwren, pcie1_pwrflt,
+ pcie1_clk_req, pcie2_rst, pcie2_prsnt, pcie2_pwren_n, pcie2_pwren,
+ pcie2_pwrflt, pcie2_clk_req, pcie3_rst, pcie3_prsnt, pcie3_pwren_n,
+ pcie3_pwren, pcie3_pwrflt, pcie3_clk_req, ps_hold
+
+Example:
+
+ pinmux: pinctrl@800000 {
+ compatible = "qcom,ipq8064-pinctrl";
+ reg = <0x800000 0x4000>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <0 32 0x4>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&gsbi5_uart_default>;
+
+ gsbi5_uart_default: gsbi5_uart_default {
+ mux {
+ pins = "gpio18", "gpio19";
+ function = "gsbi5";
+ };
+
+ tx {
+ pins = "gpio18";
+ drive-strength = <4>;
+ bias-disable;
+ };
+
+ rx {
+ pins = "gpio19";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt
new file mode 100644
index 000000000..498caff60
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8916-pinctrl.txt
@@ -0,0 +1,186 @@
+Qualcomm MSM8916 TLMM block
+
+This binding describes the Top Level Mode Multiplexer block found in the
+MSM8916 platform.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be "qcom,msm8916-pinctrl"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the base address and size of the TLMM register space.
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: should specify the TLMM summary IRQ.
+
+- interrupt-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as an interrupt controller
+
+- #interrupt-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/interrupt-controller/irq.h>
+
+- gpio-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as a gpio controller
+
+- #gpio-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/gpio/gpio.h>
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+
+PIN CONFIGURATION NODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+ Usage: required
+ Value type: <string-array>
+ Definition: List of gpio pins affected by the properties specified in
+ this subnode. Valid pins are:
+ gpio0-gpio121,
+ sdc1_clk,
+ sdc1_cmd,
+ sdc1_data
+ sdc2_clk,
+ sdc2_cmd,
+ sdc2_data,
+ qdsd_cmd,
+ qdsd_data0,
+ qdsd_data1,
+ qdsd_data2,
+ qdsd_data3
+
+- function:
+ Usage: required
+ Value type: <string>
+ Definition: Specify the alternative function to be configured for the
+ specified pins. Functions are only valid for gpio pins.
+ Valid values are:
+ adsp_ext, alsp_int, atest_bbrx0, atest_bbrx1, atest_char, atest_char0,
+ atest_char1, atest_char2, atest_char3, atest_combodac, atest_gpsadc0,
+ atest_gpsadc1, atest_tsens, atest_wlan0, atest_wlan1, backlight_en,
+ bimc_dte0,bimc_dte1, blsp_i2c1, blsp_i2c2, blsp_i2c3, blsp_i2c4,
+ blsp_i2c5, blsp_i2c6, blsp_spi1, blsp_spi1_cs1, blsp_spi1_cs2,
+ blsp_spi1_cs3, blsp_spi2, blsp_spi2_cs1, blsp_spi2_cs2, blsp_spi2_cs3,
+ blsp_spi3, blsp_spi3_cs1, blsp_spi3_cs2, blsp_spi3_cs3, blsp_spi4,
+ blsp_spi5, blsp_spi6, blsp_uart1, blsp_uart2, blsp_uim1, blsp_uim2,
+ cam1_rst, cam1_standby, cam_mclk0, cam_mclk1, cci_async, cci_i2c,
+ cci_timer0, cci_timer1, cci_timer2, cdc_pdm0, codec_mad, dbg_out,
+ display_5v, dmic0_clk, dmic0_data, dsi_rst, ebi0_wrcdc, euro_us,
+ ext_lpass, flash_strobe, gcc_gp1_clk_a, gcc_gp1_clk_b, gcc_gp2_clk_a,
+ gcc_gp2_clk_b, gcc_gp3_clk_a, gcc_gp3_clk_b, gpio, gsm0_tx0, gsm0_tx1,
+ gsm1_tx0, gsm1_tx1, gyro_accl, kpsns0, kpsns1, kpsns2, ldo_en,
+ ldo_update, mag_int, mdp_vsync, modem_tsync, m_voc, nav_pps, nav_tsync,
+ pa_indicator, pbs0, pbs1, pbs2, pri_mi2s, pri_mi2s_ws, prng_rosc,
+ pwr_crypto_enabled_a, pwr_crypto_enabled_b, pwr_modem_enabled_a,
+ pwr_modem_enabled_b, pwr_nav_enabled_a, pwr_nav_enabled_b,
+ qdss_ctitrig_in_a0, qdss_ctitrig_in_a1, qdss_ctitrig_in_b0,
+ qdss_ctitrig_in_b1, qdss_ctitrig_out_a0, qdss_ctitrig_out_a1,
+ qdss_ctitrig_out_b0, qdss_ctitrig_out_b1, qdss_traceclk_a,
+ qdss_traceclk_b, qdss_tracectl_a, qdss_tracectl_b, qdss_tracedata_a,
+ qdss_tracedata_b, reset_n, sd_card, sd_write, sec_mi2s, smb_int,
+ ssbi_wtr0, ssbi_wtr1, uim1, uim2, uim3, uim_batt, wcss_bt, wcss_fm,
+ wcss_wlan, webcam1_rst
+
+- bias-disable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as no pull.
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull down.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull up.
+
+- output-high:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ high.
+ Not valid for sdc pins.
+
+- output-low:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ low.
+ Not valid for sdc pins.
+
+- drive-strength:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the drive strength for the specified pins, in mA.
+ Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16
+
+Example:
+
+ tlmm: pinctrl@1000000 {
+ compatible = "qcom,msm8916-pinctrl";
+ reg = <0x1000000 0x300000>;
+ interrupts = <0 208 0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+
+ uart2: uart2-default {
+ mux {
+ pins = "gpio4", "gpio5";
+ function = "blsp_uart2";
+ };
+
+ tx {
+ pins = "gpio4";
+ drive-strength = <4>;
+ bias-disable;
+ };
+
+ rx {
+ pins = "gpio5";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt
new file mode 100644
index 000000000..eb8d8aa41
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8960-pinctrl.txt
@@ -0,0 +1,181 @@
+Qualcomm MSM8960 TLMM block
+
+This binding describes the Top Level Mode Multiplexer block found in the
+MSM8960 platform.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be "qcom,msm8960-pinctrl"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the base address and size of the TLMM register space.
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: should specify the TLMM summary IRQ.
+
+- interrupt-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as an interrupt controller
+
+- #interrupt-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/interrupt-controller/irq.h>
+
+- gpio-controller:
+ Usage: required
+ Value type: <none>
+ Definition: identifies this node as a gpio controller
+
+- #gpio-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: must be 2. Specifying the pin number and flags, as defined
+ in <dt-bindings/gpio/gpio.h>
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+
+PIN CONFIGURATION NODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+ Usage: required
+ Value type: <string-array>
+ Definition: List of gpio pins affected by the properties specified in
+ this subnode. Valid pins are:
+ gpio0-gpio151,
+ sdc1_clk,
+ sdc1_cmd,
+ sdc1_data
+ sdc3_clk,
+ sdc3_cmd,
+ sdc3_data
+
+- function:
+ Usage: required
+ Value type: <string>
+ Definition: Specify the alternative function to be configured for the
+ specified pins. Functions are only valid for gpio pins.
+ Valid values are:
+ audio_pcm, bt, cam_mclk0, cam_mclk1, cam_mclk2,
+ codec_mic_i2s, codec_spkr_i2s, ext_gps, fm, gps_blanking,
+ gps_pps_in, gps_pps_out, gp_clk_0a, gp_clk_0b, gp_clk_1a,
+ gp_clk_1b, gp_clk_2a, gp_clk_2b, gp_mn, gp_pdm_0a,
+ gp_pdm_0b, gp_pdm_1a, gp_pdm_1b, gp_pdm_2a, gp_pdm_2b, gpio,
+ gsbi1, gsbi1_spi_cs1_n, gsbi1_spi_cs2a_n, gsbi1_spi_cs2b_n,
+ gsbi1_spi_cs3_n, gsbi2, gsbi2_spi_cs1_n, gsbi2_spi_cs2_n,
+ gsbi2_spi_cs3_n, gsbi3, gsbi4, gsbi4_3d_cam_i2c_l,
+ gsbi4_3d_cam_i2c_r, gsbi5, gsbi5_3d_cam_i2c_l,
+ gsbi5_3d_cam_i2c_r, gsbi6, gsbi7, gsbi8, gsbi9, gsbi10,
+ gsbi11, gsbi11_spi_cs1a_n, gsbi11_spi_cs1b_n,
+ gsbi11_spi_cs2a_n, gsbi11_spi_cs2b_n, gsbi11_spi_cs3_n,
+ gsbi12, hdmi_cec, hdmi_ddc_clock, hdmi_ddc_data,
+ hdmi_hot_plug_detect, hsic, mdp_vsync, mi2s, mic_i2s,
+ pmb_clk, pmb_ext_ctrl, ps_hold, rpm_wdog, sdc2, sdc4, sdc5,
+ slimbus1, slimbus2, spkr_i2s, ssbi1, ssbi2, ssbi_ext_gps,
+ ssbi_pmic2, ssbi_qpa1, ssbi_ts, tsif1, tsif2, ts_eoc,
+ usb_fs1, usb_fs1_oe, usb_fs1_oe_n, usb_fs2, usb_fs2_oe,
+ usb_fs2_oe_n, vfe_camif_timer1_a, vfe_camif_timer1_b,
+ vfe_camif_timer2, vfe_camif_timer3_a, vfe_camif_timer3_b,
+ vfe_camif_timer4_a, vfe_camif_timer4_b, vfe_camif_timer4_c,
+ vfe_camif_timer5_a, vfe_camif_timer5_b, vfe_camif_timer6_a,
+ vfe_camif_timer6_b, vfe_camif_timer6_c, vfe_camif_timer7_a,
+ vfe_camif_timer7_b, vfe_camif_timer7_c, wlan
+
+- bias-disable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as no pull.
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull down.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configued as pull up.
+
+- output-high:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ high.
+ Not valid for sdc pins.
+
+- output-low:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ low.
+ Not valid for sdc pins.
+
+- drive-strength:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the drive strength for the specified pins, in mA.
+ Valid values are: 2, 4, 6, 8, 10, 12, 14 and 16
+
+Example:
+
+ msmgpio: pinctrl@800000 {
+ compatible = "qcom,msm8960-pinctrl";
+ reg = <0x800000 0x4000>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <0 16 0x4>;
+
+ gsbi8_uart: gsbi8-uart {
+ mux {
+ pins = "gpio34", "gpio35";
+ function = "gsbi8";
+ };
+
+ tx {
+ pins = "gpio34";
+ drive-strength = <4>;
+ bias-disable;
+ };
+
+ rx {
+ pins = "gpio35";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt
new file mode 100644
index 000000000..e4d6a9d20
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,msm8974-pinctrl.txt
@@ -0,0 +1,112 @@
+Qualcomm MSM8974 TLMM block
+
+Required properties:
+- compatible: "qcom,msm8974-pinctrl"
+- reg: Should be the base address and length of the TLMM block.
+- interrupts: Should be the parent IRQ of the TLMM block.
+- interrupt-controller: Marks the device node as an interrupt controller.
+- #interrupt-cells: Should be two.
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells : Should be two.
+ The first cell is the gpio pin number and the
+ second cell is used for optional parameters.
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Qualcomm's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, drive strength, etc.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+ pins, function, bias-disable, bias-pull-down, bias-pull,up, drive-strength.
+
+Non-empty subnodes must specify the 'pins' property.
+Note that not all properties are valid for all pins.
+
+
+Valid values for pins are:
+ gpio0-gpio145
+ Supports mux, bias and drive-strength
+
+ sdc1_clk, sdc1_cmd, sdc1_data, sdc2_clk, sdc2_cmd, sdc2_data
+ Supports bias and drive-strength
+
+Valid values for function are:
+ cci_i2c0, cci_i2c1, uim1, uim2, uim_batt_alarm,
+ blsp_uim1, blsp_uart1, blsp_i2c1, blsp_spi1,
+ blsp_uim2, blsp_uart2, blsp_i2c2, blsp_spi2,
+ blsp_uim3, blsp_uart3, blsp_i2c3, blsp_spi3,
+ blsp_uim4, blsp_uart4, blsp_i2c4, blsp_spi4,
+ blsp_uim5, blsp_uart5, blsp_i2c5, blsp_spi5,
+ blsp_uim6, blsp_uart6, blsp_i2c6, blsp_spi6,
+ blsp_uim7, blsp_uart7, blsp_i2c7, blsp_spi7,
+ blsp_uim8, blsp_uart8, blsp_i2c8, blsp_spi8,
+ blsp_uim9, blsp_uart9, blsp_i2c9, blsp_spi9,
+ blsp_uim10, blsp_uart10, blsp_i2c10, blsp_spi10,
+ blsp_uim11, blsp_uart11, blsp_i2c11, blsp_spi11,
+ blsp_uim12, blsp_uart12, blsp_i2c12, blsp_spi12,
+ blsp_spi1_cs1, blsp_spi2_cs2, blsp_spi_cs3, blsp_spi2_cs1, blsp_spi2_cs2
+ blsp_spi2_cs3, blsp_spi10_cs1, blsp_spi10_cs2, blsp_spi10_cs3,
+ sdc3, sdc4, gcc_gp_clk1, gcc_gp_clk2, gcc_gp_clk3, cci_timer0, cci_timer1,
+ cci_timer2, cci_timer3, cci_async_in0, cci_async_in1, cci_async_in2,
+ cam_mckl0, cam_mclk1, cam_mclk2, cam_mclk3, mdp_vsync, hdmi_cec, hdmi_ddc,
+ hdmi_hpd, edp_hpd, gp_pdm0, gp_pdm1, gp_pdm2, gp_pdm3, gp0_clk, gp1_clk,
+ gp_mn, tsif1, tsif2, hsic, grfc, audio_ref_clk, qua_mi2s, pri_mi2s, spkr_mi2s,
+ ter_mi2s, sec_mi2s, bt, fm, wlan, slimbus, gpio
+
+ (Note that this is not yet the complete list of functions)
+
+
+
+Example:
+
+ msmgpio: pinctrl@fd510000 {
+ compatible = "qcom,msm8974-pinctrl";
+ reg = <0xfd510000 0x4000>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupts = <0 208 0>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart2_default>;
+
+ uart2_default: uart2_default {
+ mux {
+ pins = "gpio4", "gpio5";
+ function = "blsp_uart2";
+ };
+
+ tx {
+ pins = "gpio4";
+ drive-strength = <4>;
+ bias-disable;
+ };
+
+ rx {
+ pins = "gpio5";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
new file mode 100644
index 000000000..1ae63c0ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-gpio.txt
@@ -0,0 +1,217 @@
+Qualcomm PMIC GPIO block
+
+This binding describes the GPIO block(s) found in the 8xxx series of
+PMIC's from Qualcomm.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: must be one of:
+ "qcom,pm8018-gpio"
+ "qcom,pm8038-gpio"
+ "qcom,pm8058-gpio"
+ "qcom,pm8916-gpio"
+ "qcom,pm8917-gpio"
+ "qcom,pm8921-gpio"
+ "qcom,pm8941-gpio"
+ "qcom,pma8084-gpio"
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Register base of the GPIO block and length.
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Must contain an array of encoded interrupt specifiers for
+ each available GPIO
+
+- gpio-controller:
+ Usage: required
+ Value type: <none>
+ Definition: Mark the device node as a GPIO controller
+
+- #gpio-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: Must be 2;
+ the first cell will be used to define gpio number and the
+ second denotes the flags for this gpio
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin or a list of pins. This configuration can include the
+mux function to select on those pin(s), and various pin configuration
+parameters, as listed below.
+
+
+SUBNODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+ Usage: required
+ Value type: <string-array>
+ Definition: List of gpio pins affected by the properties specified in
+ this subnode. Valid pins are:
+ gpio1-gpio6 for pm8018
+ gpio1-gpio12 for pm8038
+ gpio1-gpio40 for pm8058
+ gpio1-gpio4 for pm8916
+ gpio1-gpio38 for pm8917
+ gpio1-gpio44 for pm8921
+ gpio1-gpio36 for pm8941
+ gpio1-gpio22 for pma8084
+
+- function:
+ Usage: required
+ Value type: <string>
+ Definition: Specify the alternative function to be configured for the
+ specified pins. Valid values are:
+ "normal",
+ "paired",
+ "func1",
+ "func2",
+ "dtest1",
+ "dtest2",
+ "dtest3",
+ "dtest4"
+
+- bias-disable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configured as no pull.
+
+- bias-pull-down:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configured as pull down.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <empty>
+ Definition: The specified pins should be configured as pull up.
+
+- qcom,pull-up-strength:
+ Usage: optional
+ Value type: <u32>
+ Definition: Specifies the strength to use for pull up, if selected.
+ Valid values are; as defined in
+ <dt-bindings/pinctrl/qcom,pmic-gpio.h>:
+ 1: 30uA (PMIC_GPIO_PULL_UP_30)
+ 2: 1.5uA (PMIC_GPIO_PULL_UP_1P5)
+ 3: 31.5uA (PMIC_GPIO_PULL_UP_31P5)
+ 4: 1.5uA + 30uA boost (PMIC_GPIO_PULL_UP_1P5_30)
+ If this property is ommited 30uA strength will be used if
+ pull up is selected
+
+- bias-high-impedance:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins will put in high-Z mode and disabled.
+
+- input-enable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are put in input mode.
+
+- output-high:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ high.
+
+- output-low:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ low.
+
+- power-source:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the power source for the specified pins. Valid
+ power sources are defined per chip in
+ <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+
+- qcom,drive-strength:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the drive strength for the specified pins. Value
+ drive strengths are:
+ 0: no (PMIC_GPIO_STRENGTH_NO)
+ 1: high (PMIC_GPIO_STRENGTH_HIGH) 0.9mA @ 1.8V - 1.9mA @ 2.6V
+ 2: medium (PMIC_GPIO_STRENGTH_MED) 0.6mA @ 1.8V - 1.25mA @ 2.6V
+ 3: low (PMIC_GPIO_STRENGTH_LOW) 0.15mA @ 1.8V - 0.3mA @ 2.6V
+ as defined in <dt-bindings/pinctrl/qcom,pmic-gpio.h>
+
+- drive-push-pull:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in push-pull mode.
+
+- drive-open-drain:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in open-drain mode.
+
+- drive-open-source:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in open-source mode.
+
+Example:
+
+ pm8921_gpio: gpio@150 {
+ compatible = "qcom,pm8921-gpio";
+ reg = <0x150 0x160>;
+ interrupts = <192 1>, <193 1>, <194 1>,
+ <195 1>, <196 1>, <197 1>,
+ <198 1>, <199 1>, <200 1>,
+ <201 1>, <202 1>, <203 1>,
+ <204 1>, <205 1>, <206 1>,
+ <207 1>, <208 1>, <209 1>,
+ <210 1>, <211 1>, <212 1>,
+ <213 1>, <214 1>, <215 1>,
+ <216 1>, <217 1>, <218 1>,
+ <219 1>, <220 1>, <221 1>,
+ <222 1>, <223 1>, <224 1>,
+ <225 1>, <226 1>, <227 1>,
+ <228 1>, <229 1>, <230 1>,
+ <231 1>, <232 1>, <233 1>,
+ <234 1>, <235 1>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ pm8921_gpio_keys: gpio-keys {
+ volume-keys {
+ pins = "gpio20", "gpio21";
+ function = "normal";
+
+ input-enable;
+ bias-pull-up;
+ drive-push-pull;
+ qcom,drive-strength = <PMIC_GPIO_STRENGTH_NO>;
+ power-source = <PM8921_GPIO_S4>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
new file mode 100644
index 000000000..ed19991aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,pmic-mpp.txt
@@ -0,0 +1,164 @@
+Qualcomm PMIC Multi-Purpose Pin (MPP) block
+
+This binding describes the MPP block(s) found in the 8xxx series
+of PMIC's from Qualcomm.
+
+- compatible:
+ Usage: required
+ Value type: <string>
+ Definition: Should contain one of:
+ "qcom,pm8841-mpp",
+ "qcom,pm8916-mpp",
+ "qcom,pm8941-mpp",
+ "qcom,pma8084-mpp",
+
+- reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Register base of the MPP block and length.
+
+- interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Must contain an array of encoded interrupt specifiers for
+ each available MPP
+
+- gpio-controller:
+ Usage: required
+ Value type: <none>
+ Definition: Mark the device node as a GPIO controller
+
+- #gpio-cells:
+ Usage: required
+ Value type: <u32>
+ Definition: Must be 2;
+ the first cell will be used to define MPP number and the
+ second denotes the flags for this MPP
+
+Please refer to ../gpio/gpio.txt and ../interrupt-controller/interrupts.txt for
+a general description of GPIO and interrupt bindings.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin or a list of pins. This configuration can include the
+mux function to select on those pin(s), and various pin configuration
+parameters, as listed below.
+
+SUBNODES:
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Each subnode only affects those parameters that are explicitly listed. In
+other words, a subnode that lists a mux function but no pin configuration
+parameters implies no information about any pin configuration parameters.
+Similarly, a pin subnode that describes a pullup parameter implies no
+information about e.g. the mux function.
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pin configuration subnode:
+
+- pins:
+ Usage: required
+ Value type: <string-array>
+ Definition: List of MPP pins affected by the properties specified in
+ this subnode. Valid pins are:
+ mpp1-mpp4 for pm8841
+ mpp1-mpp4 for pm8916
+ mpp1-mpp8 for pm8941
+ mpp1-mpp4 for pma8084
+
+- function:
+ Usage: required
+ Value type: <string>
+ Definition: Specify the alternative function to be configured for the
+ specified pins. Valid values are:
+ "normal",
+ "paired",
+ "dtest1",
+ "dtest2",
+ "dtest3",
+ "dtest4"
+
+- bias-disable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins should be configured as no pull.
+
+- bias-pull-up:
+ Usage: optional
+ Value type: <u32>
+ Definition: The specified pins should be configured as pull up.
+ Valid values are 600, 10000 and 30000 in bidirectional mode
+ only, i.e. when operating in qcom,analog-mode and input and
+ outputs are enabled. The hardware ignores the configuration
+ when operating in other modes.
+
+- bias-high-impedance:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins will put in high-Z mode and disabled.
+
+- input-enable:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are put in input mode, i.e. their input
+ buffer is enabled
+
+- output-high:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ high.
+
+- output-low:
+ Usage: optional
+ Value type: <none>
+ Definition: The specified pins are configured in output mode, driven
+ low.
+
+- power-source:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the power source for the specified pins. Valid power
+ sources are defined in <dt-bindings/pinctrl/qcom,pmic-mpp.h>
+
+- qcom,analog-mode:
+ Usage: optional
+ Value type: <none>
+ Definition: Selects Analog mode of operation: combined with input-enable
+ and/or output-high, output-low MPP could operate as
+ Bidirectional Logic, Analog Input, Analog Output.
+
+- qcom,amux-route:
+ Usage: optional
+ Value type: <u32>
+ Definition: Selects the source for analog input. Valid values are
+ defined in <dt-bindings/pinctrl/qcom,pmic-mpp.h>
+ PMIC_MPP_AMUX_ROUTE_CH5, PMIC_MPP_AMUX_ROUTE_CH6...
+
+Example:
+
+ mpps@a000 {
+ compatible = "qcom,pm8841-mpp";
+ reg = <0xa000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupts = <4 0xa0 0 0>, <4 0xa1 0 0>, <4 0xa2 0 0>, <4 0xa3 0 0>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pm8841_default>;
+
+ pm8841_default: default {
+ gpio {
+ pins = "mpp1", "mpp2", "mpp3", "mpp4";
+ function = "normal";
+ input-enable;
+ power-source = <PM8841_MPP_S3>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
new file mode 100644
index 000000000..bfe72ec05
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/renesas,pfc-pinctrl.txt
@@ -0,0 +1,167 @@
+* Renesas Pin Function Controller (GPIO and Pin Mux/Config)
+
+The Pin Function Controller (PFC) is a Pin Mux/Config controller. On SH73A0,
+R8A73A4 and R8A7740 it also acts as a GPIO controller.
+
+
+Pin Control
+-----------
+
+Required Properties:
+
+ - compatible: should be one of the following.
+ - "renesas,pfc-emev2": for EMEV2 (EMMA Mobile EV2) compatible pin-controller.
+ - "renesas,pfc-r8a73a4": for R8A73A4 (R-Mobile APE6) compatible pin-controller.
+ - "renesas,pfc-r8a7740": for R8A7740 (R-Mobile A1) compatible pin-controller.
+ - "renesas,pfc-r8a7778": for R8A7778 (R-Mobile M1) compatible pin-controller.
+ - "renesas,pfc-r8a7779": for R8A7779 (R-Car H1) compatible pin-controller.
+ - "renesas,pfc-r8a7790": for R8A7790 (R-Car H2) compatible pin-controller.
+ - "renesas,pfc-r8a7791": for R8A7791 (R-Car M2) compatible pin-controller.
+ - "renesas,pfc-sh73a0": for SH73A0 (SH-Mobile AG5) compatible pin-controller.
+
+ - reg: Base address and length of each memory resource used by the pin
+ controller hardware module.
+
+Optional properties:
+
+ - #gpio-range-cells: Mandatory when the PFC doesn't handle GPIO, forbidden
+ otherwise. Should be 3.
+
+ - interrupts-extended: Specify the interrupts associated with external
+ IRQ pins. This property is mandatory when the PFC handles GPIOs and
+ forbidden otherwise. When specified, it must contain one interrupt per
+ external IRQ, sorted by external IRQ number.
+
+The PFC node also acts as a container for pin configuration nodes. Please refer
+to pinctrl-bindings.txt in this directory for the definition of the term "pin
+configuration node" and for the common pinctrl bindings used by client devices.
+
+Each pin configuration node represents a desired configuration for a pin, a
+pin group, or a list of pins or pin groups. The configuration can include the
+function to select on those pin(s) and pin configuration parameters (such as
+pull-up and pull-down).
+
+Pin configuration nodes contain pin configuration properties, either directly
+or grouped in child subnodes. Both pin muxing and configuration parameters can
+be grouped in that way and referenced as a single pin configuration node by
+client devices.
+
+A configuration node or subnode must reference at least one pin (through the
+pins or pin groups properties) and contain at least a function or one
+configuration parameter. When the function is present only pin groups can be
+used to reference pins.
+
+All pin configuration nodes and subnodes names are ignored. All of those nodes
+are parsed through phandles and processed purely based on their content.
+
+Pin Configuration Node Properties:
+
+- renesas,pins : An array of strings, each string containing the name of a pin.
+- renesas,groups : An array of strings, each string containing the name of a pin
+ group.
+
+- renesas,function: A string containing the name of the function to mux to the
+ pin group(s) specified by the renesas,groups property
+
+ Valid values for pin, group and function names can be found in the group and
+ function arrays of the PFC data file corresponding to the SoC
+ (drivers/pinctrl/sh-pfc/pfc-*.c)
+
+The pin configuration parameters use the generic pinconf bindings defined in
+pinctrl-bindings.txt in this directory. The supported parameters are
+bias-disable, bias-pull-up and bias-pull-down.
+
+
+GPIO
+----
+
+On SH73A0, R8A73A4 and R8A7740 the PFC node is also a GPIO controller node.
+
+Required Properties:
+
+ - gpio-controller: Marks the device node as a gpio controller.
+
+ - #gpio-cells: Should be 2. The first cell is the GPIO number and the second
+ cell specifies GPIO flags, as defined in <dt-bindings/gpio/gpio.h>. Only the
+ GPIO_ACTIVE_HIGH and GPIO_ACTIVE_LOW flags are supported.
+
+The syntax of the gpio specifier used by client nodes should be the following
+with values derived from the SoC user manual.
+
+ <[phandle of the gpio controller node]
+ [pin number within the gpio controller]
+ [flags]>
+
+On other mach-shmobile platforms GPIO is handled by the gpio-rcar driver.
+Please refer to Documentation/devicetree/bindings/gpio/renesas,gpio-rcar.txt
+for documentation of the GPIO device tree bindings on those platforms.
+
+
+Examples
+--------
+
+Example 1: SH73A0 (SH-Mobile AG5) pin controller node
+
+ pfc: pfc@e6050000 {
+ compatible = "renesas,pfc-sh73a0";
+ reg = <0xe6050000 0x8000>,
+ <0xe605801c 0x1c>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupts-extended =
+ <&irqpin0 0 0>, <&irqpin0 1 0>, <&irqpin0 2 0>, <&irqpin0 3 0>,
+ <&irqpin0 4 0>, <&irqpin0 5 0>, <&irqpin0 6 0>, <&irqpin0 7 0>,
+ <&irqpin1 0 0>, <&irqpin1 1 0>, <&irqpin1 2 0>, <&irqpin1 3 0>,
+ <&irqpin1 4 0>, <&irqpin1 5 0>, <&irqpin1 6 0>, <&irqpin1 7 0>,
+ <&irqpin2 0 0>, <&irqpin2 1 0>, <&irqpin2 2 0>, <&irqpin2 3 0>,
+ <&irqpin2 4 0>, <&irqpin2 5 0>, <&irqpin2 6 0>, <&irqpin2 7 0>,
+ <&irqpin3 0 0>, <&irqpin3 1 0>, <&irqpin3 2 0>, <&irqpin3 3 0>,
+ <&irqpin3 4 0>, <&irqpin3 5 0>, <&irqpin3 6 0>, <&irqpin3 7 0>;
+ };
+
+Example 2: A GPIO LED node that references a GPIO
+
+ #include <dt-bindings/gpio/gpio.h>
+
+ leds {
+ compatible = "gpio-leds";
+ led1 {
+ gpios = <&pfc 20 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+Example 3: KZM-A9-GT (SH-Mobile AG5) default pin state hog and pin control maps
+ for the MMCIF and SCIFA4 devices
+
+ &pfc {
+ pinctrl-0 = <&scifa4_pins>;
+ pinctrl-names = "default";
+
+ mmcif_pins: mmcif {
+ mux {
+ renesas,groups = "mmc0_data8_0", "mmc0_ctrl_0";
+ renesas,function = "mmc0";
+ };
+ cfg {
+ renesas,groups = "mmc0_data8_0";
+ renesas,pins = "PORT279";
+ bias-pull-up;
+ };
+ };
+
+ scifa4_pins: scifa4 {
+ renesas,groups = "scifa4_data", "scifa4_ctrl";
+ renesas,function = "scifa4";
+ };
+ };
+
+Example 4: KZM-A9-GT (SH-Mobile AG5) default pin state for the MMCIF device
+
+ &mmcif {
+ pinctrl-0 = <&mmcif_pins>;
+ pinctrl-names = "default";
+
+ bus-width = <8>;
+ vmmc-supply = <&reg_1p8v>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
new file mode 100644
index 000000000..388b21324
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/rockchip,pinctrl.txt
@@ -0,0 +1,157 @@
+* Rockchip Pinmux Controller
+
+The Rockchip Pinmux Controller, enables the IC
+to share one PAD to several functional blocks. The sharing is done by
+multiplexing the PAD input/output signals. For each PAD there are several
+muxing options with option 0 being the use as a GPIO.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+The Rockchip pin configuration node is a node of a group of pins which can be
+used for a specific device or function. This node represents both mux and
+config of the pins in that group. The 'pins' selects the function mode(also
+named pin mode) this pin can work on and the 'config' configures various pad
+settings such as pull-up, etc.
+
+The pins are grouped into up to 5 individual pin banks which need to be
+defined as gpio sub-nodes of the pinmux controller.
+
+Required properties for iomux controller:
+ - compatible: one of "rockchip,rk2928-pinctrl", "rockchip,rk3066a-pinctrl"
+ "rockchip,rk3066b-pinctrl", "rockchip,rk3188-pinctrl"
+ "rockchip,rk3288-pinctrl"
+ - rockchip,grf: phandle referencing a syscon providing the
+ "general register files"
+
+Optional properties for iomux controller:
+ - rockchip,pmu: phandle referencing a syscon providing the pmu registers
+ as some SoCs carry parts of the iomux controller registers there.
+ Required for at least rk3188 and rk3288.
+
+Deprecated properties for iomux controller:
+ - reg: first element is the general register space of the iomux controller
+ It should be large enough to contain also separate pull registers.
+ second element is the separate pull register space of the rk3188.
+ Use rockchip,grf and rockchip,pmu described above instead.
+
+Required properties for gpio sub nodes:
+ - compatible: "rockchip,gpio-bank"
+ - reg: register of the gpio bank (different than the iomux registerset)
+ - interrupts: base interrupt of the gpio bank in the interrupt controller
+ - clocks: clock that drives this bank
+ - gpio-controller: identifies the node as a gpio controller and pin bank.
+ - #gpio-cells: number of cells in GPIO specifier. Since the generic GPIO
+ binding is used, the amount of cells must be specified as 2. See generic
+ GPIO binding documentation for description of particular cells.
+ - interrupt-controller: identifies the controller node as interrupt-parent.
+ - #interrupt-cells: the value of this property should be 2 and the interrupt
+ cells should use the standard two-cell scheme described in
+ bindings/interrupt-controller/interrupts.txt
+
+Deprecated properties for gpio sub nodes:
+ - compatible: "rockchip,rk3188-gpio-bank0"
+ - reg: second element: separate pull register for rk3188 bank0, use
+ rockchip,pmu described above instead
+
+Required properties for pin configuration node:
+ - rockchip,pins: 3 integers array, represents a group of pins mux and config
+ setting. The format is rockchip,pins = <PIN_BANK PIN_BANK_IDX MUX &phandle>.
+ The MUX 0 means gpio and MUX 1 to N mean the specific device function.
+ The phandle of a node containing the generic pinconfig options
+ to use, as described in pinctrl-bindings.txt in this directory.
+
+Examples:
+
+#include <dt-bindings/pinctrl/rockchip.h>
+
+...
+
+pinctrl@20008000 {
+ compatible = "rockchip,rk3066a-pinctrl";
+ rockchip,grf = <&grf>;
+
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gpio0: gpio0@20034000 {
+ compatible = "rockchip,gpio-bank";
+ reg = <0x20034000 0x100>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_gates8 9>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ ...
+
+ pcfg_pull_default: pcfg_pull_default {
+ bias-pull-pin-default
+ };
+
+ uart2 {
+ uart2_xfer: uart2-xfer {
+ rockchip,pins = <RK_GPIO1 8 1 &pcfg_pull_default>,
+ <RK_GPIO1 9 1 &pcfg_pull_default>;
+ };
+ };
+};
+
+uart2: serial@20064000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x20064000 0x400>;
+ interrupts = <GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ clocks = <&mux_uart2>;
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&uart2_xfer>;
+};
+
+Example for rk3188:
+
+ pinctrl@20008000 {
+ compatible = "rockchip,rk3188-pinctrl";
+ rockchip,grf = <&grf>;
+ rockchip,pmu = <&pmu>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gpio0: gpio0@0x2000a000 {
+ compatible = "rockchip,rk3188-gpio-bank0";
+ reg = <0x2000a000 0x100>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_gates8 9>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio1: gpio1@0x2003c000 {
+ compatible = "rockchip,gpio-bank";
+ reg = <0x2003c000 0x100>;
+ interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_gates8 10>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ ...
+
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
new file mode 100644
index 000000000..9d2a99529
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
@@ -0,0 +1,356 @@
+Samsung GPIO and Pin Mux/Config controller
+
+Samsung's ARM based SoC's integrates a GPIO and Pin mux/config hardware
+controller. It controls the input/output settings on the available pads/pins
+and also provides ability to multiplex and configure the output of various
+on-chip controllers onto these pads.
+
+Required Properties:
+- compatible: should be one of the following.
+ - "samsung,s3c2412-pinctrl": for S3C2412-compatible pin-controller,
+ - "samsung,s3c2416-pinctrl": for S3C2416-compatible pin-controller,
+ - "samsung,s3c2440-pinctrl": for S3C2440-compatible pin-controller,
+ - "samsung,s3c2450-pinctrl": for S3C2450-compatible pin-controller,
+ - "samsung,s3c64xx-pinctrl": for S3C64xx-compatible pin-controller,
+ - "samsung,s5pv210-pinctrl": for S5PV210-compatible pin-controller,
+ - "samsung,exynos4210-pinctrl": for Exynos4210 compatible pin-controller.
+ - "samsung,exynos4x12-pinctrl": for Exynos4x12 compatible pin-controller.
+ - "samsung,exynos5250-pinctrl": for Exynos5250 compatible pin-controller.
+ - "samsung,exynos5260-pinctrl": for Exynos5260 compatible pin-controller.
+ - "samsung,exynos5420-pinctrl": for Exynos5420 compatible pin-controller.
+ - "samsung,exynos7-pinctrl": for Exynos7 compatible pin-controller.
+
+- reg: Base address of the pin controller hardware module and length of
+ the address space it occupies.
+
+- Pin banks as child nodes: Pin banks of the controller are represented by child
+ nodes of the controller node. Bank name is taken from name of the node. Each
+ bank node must contain following properties:
+
+ - gpio-controller: identifies the node as a gpio controller and pin bank.
+ - #gpio-cells: number of cells in GPIO specifier. Since the generic GPIO
+ binding is used, the amount of cells must be specified as 2. See the below
+ mentioned gpio binding representation for description of particular cells.
+
+ Eg: <&gpx2 6 0>
+ <[phandle of the gpio controller node]
+ [pin number within the gpio controller]
+ [flags]>
+
+ Values for gpio specifier:
+ - Pin number: is a value between 0 to 7.
+ - Flags: 0 - Active High
+ 1 - Active Low
+
+- Pin mux/config groups as child nodes: The pin mux (selecting pin function
+ mode) and pin config (pull up/down, driver strength) settings are represented
+ as child nodes of the pin-controller node. There should be atleast one
+ child node and there is no limit on the count of these child nodes. It is
+ also possible for a child node to consist of several further child nodes
+ to allow grouping multiple pinctrl groups into one. The format of second
+ level child nodes is exactly the same as for first level ones and is
+ described below.
+
+ The child node should contain a list of pin(s) on which a particular pin
+ function selection or pin configuration (or both) have to applied. This
+ list of pins is specified using the property name "samsung,pins". There
+ should be atleast one pin specfied for this property and there is no upper
+ limit on the count of pins that can be specified. The pins are specified
+ using pin names which are derived from the hardware manual of the SoC. As
+ an example, the pins in GPA0 bank of the pin controller can be represented
+ as "gpa0-0", "gpa0-1", "gpa0-2" and so on. The names should be in lower case.
+ The format of the pin names should be (as per the hardware manual)
+ "[pin bank name]-[pin number within the bank]".
+
+ The pin function selection that should be applied on the pins listed in the
+ child node is specified using the "samsung,pin-function" property. The value
+ of this property that should be applied to each of the pins listed in the
+ "samsung,pins" property should be picked from the hardware manual of the SoC
+ for the specified pin group. This property is optional in the child node if
+ no specific function selection is desired for the pins listed in the child
+ node. The value of this property is used as-is to program the pin-controller
+ function selector register of the pin-bank.
+
+ The child node can also optionally specify one or more of the pin
+ configuration that should be applied on all the pins listed in the
+ "samsung,pins" property of the child node. The following pin configuration
+ properties are supported.
+
+ - samsung,pin-val: Initial value of pin output buffer.
+ - samsung,pin-pud: Pull up/down configuration.
+ - samsung,pin-drv: Drive strength configuration.
+ - samsung,pin-pud-pdn: Pull up/down configuration in power down mode.
+ - samsung,pin-drv-pdn: Drive strength configuration in power down mode.
+
+ The values specified by these config properties should be derived from the
+ hardware manual and these values are programmed as-is into the pin
+ pull up/down and driver strength register of the pin-controller.
+
+ Note: A child should include atleast a pin function selection property or
+ pin configuration property (one or more) or both.
+
+ The client nodes that require a particular pin function selection and/or
+ pin configuration should use the bindings listed in the "pinctrl-bindings.txt"
+ file.
+
+External GPIO and Wakeup Interrupts:
+
+The controller supports two types of external interrupts over gpio. The first
+is the external gpio interrupt and second is the external wakeup interrupts.
+The difference between the two is that the external wakeup interrupts can be
+used as system wakeup events.
+
+A. External GPIO Interrupts: For supporting external gpio interrupts, the
+ following properties should be specified in the pin-controller device node.
+
+ - interrupt-parent: phandle of the interrupt parent to which the external
+ GPIO interrupts are forwarded to.
+ - interrupts: interrupt specifier for the controller. The format and value of
+ the interrupt specifier depends on the interrupt parent for the controller.
+
+ In addition, following properties must be present in node of every bank
+ of pins supporting GPIO interrupts:
+
+ - interrupt-controller: identifies the controller node as interrupt-parent.
+ - #interrupt-cells: the value of this property should be 2.
+ - First Cell: represents the external gpio interrupt number local to the
+ external gpio interrupt space of the controller.
+ - Second Cell: flags to identify the type of the interrupt
+ - 1 = rising edge triggered
+ - 2 = falling edge triggered
+ - 3 = rising and falling edge triggered
+ - 4 = high level triggered
+ - 8 = low level triggered
+
+B. External Wakeup Interrupts: For supporting external wakeup interrupts, a
+ child node representing the external wakeup interrupt controller should be
+ included in the pin-controller device node. This child node should include
+ the following properties.
+
+ - compatible: identifies the type of the external wakeup interrupt controller
+ The possible values are:
+ - samsung,s3c2410-wakeup-eint: represents wakeup interrupt controller
+ found on Samsung S3C24xx SoCs except S3C2412 and S3C2413,
+ - samsung,s3c2412-wakeup-eint: represents wakeup interrupt controller
+ found on Samsung S3C2412 and S3C2413 SoCs,
+ - samsung,s3c64xx-wakeup-eint: represents wakeup interrupt controller
+ found on Samsung S3C64xx SoCs,
+ - samsung,exynos4210-wakeup-eint: represents wakeup interrupt controller
+ found on Samsung Exynos4210 and S5PC110/S5PV210 SoCs.
+ - samsung,exynos7-wakeup-eint: represents wakeup interrupt controller
+ found on Samsung Exynos7 SoC.
+ - interrupt-parent: phandle of the interrupt parent to which the external
+ wakeup interrupts are forwarded to.
+ - interrupts: interrupt used by multiplexed wakeup interrupts.
+
+ In addition, following properties must be present in node of every bank
+ of pins supporting wake-up interrupts:
+
+ - interrupt-controller: identifies the node as interrupt-parent.
+ - #interrupt-cells: the value of this property should be 2
+ - First Cell: represents the external wakeup interrupt number local to
+ the external wakeup interrupt space of the controller.
+ - Second Cell: flags to identify the type of the interrupt
+ - 1 = rising edge triggered
+ - 2 = falling edge triggered
+ - 3 = rising and falling edge triggered
+ - 4 = high level triggered
+ - 8 = low level triggered
+
+ Node of every bank of pins supporting direct wake-up interrupts (without
+ multiplexing) must contain following properties:
+
+ - interrupt-parent: phandle of the interrupt parent to which the external
+ wakeup interrupts are forwarded to.
+ - interrupts: interrupts of the interrupt parent which are used for external
+ wakeup interrupts from pins of the bank, must contain interrupts for all
+ pins of the bank.
+
+Aliases:
+
+All the pin controller nodes should be represented in the aliases node using
+the following format 'pinctrl{n}' where n is a unique number for the alias.
+
+Aliases for controllers compatible with "samsung,exynos7-pinctrl":
+- pinctrl0: pin controller of ALIVE block,
+- pinctrl1: pin controller of BUS0 block,
+- pinctrl2: pin controller of NFC block,
+- pinctrl3: pin controller of TOUCH block,
+- pinctrl4: pin controller of FF block,
+- pinctrl5: pin controller of ESE block,
+- pinctrl6: pin controller of FSYS0 block,
+- pinctrl7: pin controller of FSYS1 block,
+- pinctrl8: pin controller of BUS1 block,
+- pinctrl9: pin controller of AUDIO block,
+
+Example: A pin-controller node with pin banks:
+
+ pinctrl_0: pinctrl@11400000 {
+ compatible = "samsung,exynos4210-pinctrl";
+ reg = <0x11400000 0x1000>;
+ interrupts = <0 47 0>;
+
+ /* ... */
+
+ /* Pin bank without external interrupts */
+ gpy0: gpy0 {
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
+
+ /* ... */
+
+ /* Pin bank with external GPIO or muxed wake-up interrupts */
+ gpj0: gpj0 {
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ /* ... */
+
+ /* Pin bank with external direct wake-up interrupts */
+ gpx0: gpx0 {
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ interrupt-parent = <&gic>;
+ interrupts = <0 16 0>, <0 17 0>, <0 18 0>, <0 19 0>,
+ <0 20 0>, <0 21 0>, <0 22 0>, <0 23 0>;
+ #interrupt-cells = <2>;
+ };
+
+ /* ... */
+ };
+
+Example 1: A pin-controller node with pin groups.
+
+ pinctrl_0: pinctrl@11400000 {
+ compatible = "samsung,exynos4210-pinctrl";
+ reg = <0x11400000 0x1000>;
+ interrupts = <0 47 0>;
+
+ /* ... */
+
+ uart0_data: uart0-data {
+ samsung,pins = "gpa0-0", "gpa0-1";
+ samsung,pin-function = <2>;
+ samsung,pin-pud = <0>;
+ samsung,pin-drv = <0>;
+ };
+
+ uart0_fctl: uart0-fctl {
+ samsung,pins = "gpa0-2", "gpa0-3";
+ samsung,pin-function = <2>;
+ samsung,pin-pud = <0>;
+ samsung,pin-drv = <0>;
+ };
+
+ uart1_data: uart1-data {
+ samsung,pins = "gpa0-4", "gpa0-5";
+ samsung,pin-function = <2>;
+ samsung,pin-pud = <0>;
+ samsung,pin-drv = <0>;
+ };
+
+ uart1_fctl: uart1-fctl {
+ samsung,pins = "gpa0-6", "gpa0-7";
+ samsung,pin-function = <2>;
+ samsung,pin-pud = <0>;
+ samsung,pin-drv = <0>;
+ };
+
+ i2c2_bus: i2c2-bus {
+ samsung,pins = "gpa0-6", "gpa0-7";
+ samsung,pin-function = <3>;
+ samsung,pin-pud = <3>;
+ samsung,pin-drv = <0>;
+ };
+
+ sd4_bus8: sd4-bus-width8 {
+ part-1 {
+ samsung,pins = "gpk0-3", "gpk0-4",
+ "gpk0-5", "gpk0-6";
+ samsung,pin-function = <3>;
+ samsung,pin-pud = <3>;
+ samsung,pin-drv = <3>;
+ };
+ part-2 {
+ samsung,pins = "gpk1-3", "gpk1-4",
+ "gpk1-5", "gpk1-6";
+ samsung,pin-function = <4>;
+ samsung,pin-pud = <4>;
+ samsung,pin-drv = <3>;
+ };
+ };
+ };
+
+Example 2: A pin-controller node with external wakeup interrupt controller node.
+
+ pinctrl_1: pinctrl@11000000 {
+ compatible = "samsung,exynos4210-pinctrl";
+ reg = <0x11000000 0x1000>;
+ interrupts = <0 46 0>
+
+ /* ... */
+
+ wakeup-interrupt-controller {
+ compatible = "samsung,exynos4210-wakeup-eint";
+ interrupt-parent = <&gic>;
+ interrupts = <0 32 0>;
+ };
+ };
+
+Example 3: A uart client node that supports 'default' and 'flow-control' states.
+
+ uart@13800000 {
+ compatible = "samsung,exynos4210-uart";
+ reg = <0x13800000 0x100>;
+ interrupts = <0 52 0>;
+ pinctrl-names = "default", "flow-control;
+ pinctrl-0 = <&uart0_data>;
+ pinctrl-1 = <&uart0_data &uart0_fctl>;
+ };
+
+Example 4: Set up the default pin state for uart controller.
+
+ static int s3c24xx_serial_probe(struct platform_device *pdev) {
+ struct pinctrl *pinctrl;
+
+ /* ... */
+
+ pinctrl = devm_pinctrl_get_select_default(&pdev->dev);
+ }
+
+Example 5: A display port client node that supports 'default' pinctrl state
+ and gpio binding.
+
+ display-port-controller {
+ /* ... */
+
+ samsung,hpd-gpio = <&gpx2 6 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&dp_hpd>;
+ };
+
+Example 6: Request the gpio for display port controller
+
+ static int exynos_dp_probe(struct platform_device *pdev)
+ {
+ int hpd_gpio, ret;
+ struct device *dev = &pdev->dev;
+ struct device_node *dp_node = dev->of_node;
+
+ /* ... */
+
+ hpd_gpio = of_get_named_gpio(dp_node, "samsung,hpd-gpio", 0);
+
+ /* ... */
+
+ ret = devm_gpio_request_one(&pdev->dev, hpd_gpio, GPIOF_IN,
+ "hpd_gpio");
+ /* ... */
+ }
diff --git a/Documentation/devicetree/bindings/pinctrl/ste,abx500.txt b/Documentation/devicetree/bindings/pinctrl/ste,abx500.txt
new file mode 100644
index 000000000..876974204
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/ste,abx500.txt
@@ -0,0 +1,318 @@
+ST Ericsson abx500 pinmux controller
+
+Required properties:
+- compatible: "stericsson,ab8500-gpio", "stericsson,ab8540-gpio",
+ "stericsson,ab8505-gpio", "stericsson,ab9540-gpio",
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+ST Ericsson's pin configuration nodes use the generic pin multiplexing
+and pin configuration bindings, see pinctrl-bindings.txt
+
+Example board file extract:
+
+&pinctrl_abx500 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&sysclkreq2_default_mode>, <&sysclkreq3_default_mode>, <&gpio3_default_mode>, <&sysclkreq6_default_mode>, <&pwmout1_default_mode>, <&pwmout2_default_mode>, <&pwmout3_default_mode>, <&adi1_default_mode>, <&dmic12_default_mode>, <&dmic34_default_mode>, <&dmic56_default_mode>, <&sysclkreq5_default_mode>, <&batremn_default_mode>, <&service_default_mode>, <&pwrctrl0_default_mode>, <&pwrctrl1_default_mode>, <&pwmextvibra1_default_mode>, <&pwmextvibra2_default_mode>, <&gpio51_default_mode>, <&gpio52_default_mode>, <&gpio53_default_mode>, <&gpio54_default_mode>, <&pdmclkdat_default_mode>;
+
+ sysclkreq2 {
+ sysclkreq2_default_mode: sysclkreq2_default {
+ default_mux {
+ function = "sysclkreq";
+ groups = "sysclkreq2_d_1";
+ };
+ default_cfg {
+ pins = "GPIO1";
+ bias-disable;
+ };
+ };
+ };
+ sysclkreq3 {
+ sysclkreq3_default_mode: sysclkreq3_default {
+ default_mux {
+ function = "sysclkreq";
+ groups = "sysclkreq3_d_1";
+ };
+ default_cfg {
+ pins = "GPIO2";
+ output-low;
+ };
+ };
+ };
+ gpio3 {
+ gpio3_default_mode: gpio3_default {
+ default_mux {
+ function = "gpio";
+ groups = "gpio3_a_1";
+ };
+ default_cfg {
+ pins = "GPIO3";
+ output-low;
+ };
+ };
+ };
+ sysclkreq6 {
+ sysclkreq6_default_mode: sysclkreq6_default {
+ default_mux {
+ function = "sysclkreq";
+ groups = "sysclkreq6_d_1";
+ };
+ default_cfg {
+ pins = "GPIO4";
+ bias-disable;
+ };
+ };
+ };
+ pwmout1 {
+ pwmout1_default_mode: pwmout1_default {
+ default_mux {
+ function = "pwmout";
+ groups = "pwmout1_d_1";
+ };
+ default_cfg {
+ pins = "GPIO14";
+ output-low;
+ };
+ };
+ };
+ pwmout2 {
+ pwmout2_default_mode: pwmout2_default {
+ pwmout2_default_mux {
+ function = "pwmout";
+ groups = "pwmout2_d_1";
+ };
+ pwmout2_default_cfg {
+ pins = "GPIO15";
+ output-low;
+ };
+ };
+ };
+ pwmout3 {
+ pwmout3_default_mode: pwmout3_default {
+ pwmout3_default_mux {
+ function = "pwmout";
+ groups = "pwmout3_d_1";
+ };
+ pwmout3_default_cfg {
+ pins = "GPIO16";
+ output-low;
+ };
+ };
+ };
+ adi1 {
+
+ adi1_default_mode: adi1_default {
+ adi1_default_mux {
+ function = "adi1";
+ groups = "adi1_d_1";
+ };
+ adi1_default_cfg1 {
+ pins = "GPIO17","GPIO19","GPIO20";
+ bias-disable;
+ };
+ adi1_default_cfg2 {
+ pins = "GPIO18";
+ output-low;
+ };
+ };
+ };
+ dmic12 {
+ dmic12_default_mode: dmic12_default {
+ dmic12_default_mux {
+ function = "dmic";
+ groups = "dmic12_d_1";
+ };
+ dmic12_default_cfg1 {
+ pins = "GPIO27";
+ output-low;
+ };
+ dmic12_default_cfg2 {
+ pins = "GPIO28";
+ bias-disable;
+ };
+ };
+ };
+ dmic34 {
+ dmic34_default_mode: dmic34_default {
+ dmic34_default_mux {
+ function = "dmic";
+ groups = "dmic34_d_1";
+ };
+ dmic34_default_cfg1 {
+ pins = "GPIO29";
+ output-low;
+ };
+ dmic34_default_cfg2 {
+ pins = "GPIO30";
+ bias-disable;{
+
+ };
+ };
+ };
+ dmic56 {
+ dmic56_default_mode: dmic56_default {
+ dmic56_default_mux {
+ function = "dmic";
+ groups = "dmic56_d_1";
+ };
+ dmic56_default_cfg1 {
+ pins = "GPIO31";
+ output-low;
+ };
+ dmic56_default_cfg2 {
+ pins = "GPIO32";
+ bias-disable;
+ };
+ };
+ };
+ sysclkreq5 {
+ sysclkreq5_default_mode: sysclkreq5_default {
+ sysclkreq5_default_mux {
+ function = "sysclkreq";
+ groups = "sysclkreq5_d_1";
+ };
+ sysclkreq5_default_cfg {
+ pins = "GPIO42";
+ output-low;
+ };
+ };
+ };
+ batremn {
+ batremn_default_mode: batremn_default {
+ batremn_default_mux {
+ function = "batremn";
+ groups = "batremn_d_1";
+ };
+ batremn_default_cfg {
+ pins = "GPIO43";
+ bias-disable;
+ };
+ };
+ };
+ service {
+ service_default_mode: service_default {
+ service_default_mux {
+ function = "service";
+ groups = "service_d_1";
+ };
+ service_default_cfg {
+ pins = "GPIO44";
+ bias-disable;
+ };
+ };
+ };
+ pwrctrl0 {
+ pwrctrl0_default_mux: pwrctrl0_mux {
+ pwrctrl0_default_mux {
+ function = "pwrctrl";
+ groups = "pwrctrl0_d_1";
+ };
+ };
+ pwrctrl0_default_mode: pwrctrl0_default {
+ pwrctrl0_default_cfg {
+ pins = "GPIO45";
+ bias-disable;
+ };
+ };
+ };
+ pwrctrl1 {
+ pwrctrl1_default_mux: pwrctrl1_mux {
+ pwrctrl1_default_mux {
+ function = "pwrctrl";
+ groups = "pwrctrl1_d_1";
+ };
+ };
+ pwrctrl1_default_mode: pwrctrl1_default {
+ pwrctrl1_default_cfg {
+ pins = "GPIO46";
+ bias-disable;
+ };
+ };
+ };
+ pwmextvibra1 {
+ pwmextvibra1_default_mode: pwmextvibra1_default {
+ pwmextvibra1_default_mux {
+ function = "pwmextvibra";
+ groups = "pwmextvibra1_d_1";
+ };
+ pwmextvibra1_default_cfg {
+ pins = "GPIO47";
+ bias-disable;
+ };
+ };
+ };
+ pwmextvibra2 {
+ pwmextvibra2_default_mode: pwmextvibra2_default {
+ pwmextvibra2_default_mux {
+ function = "pwmextvibra";
+ groups = "pwmextvibra2_d_1";
+ };
+ pwmextvibra1_default_cfg {
+ pins = "GPIO48";
+ bias-disable;
+ };
+ };
+ };
+ gpio51 {
+ gpio51_default_mode: gpio51_default {
+ gpio51_default_mux {
+ function = "gpio";
+ groups = "gpio51_a_1";
+ };
+ gpio51_default_cfg {
+ pins = "GPIO51";
+ output-low;
+ };
+ };
+ };
+ gpio52 {
+ gpio52_default_mode: gpio52_default {
+ gpio52_default_mux {
+ function = "gpio";
+ groups = "gpio52_a_1";
+ };
+ gpio52_default_cfg {
+ pins = "GPIO52";
+ bias-pull-down;
+ };
+ };
+ };
+ gpio53 {
+ gpio53_default_mode: gpio53_default {
+ gpio53_default_mux {
+ function = "gpio";
+ groups = "gpio53_a_1";
+ };
+ gpio53_default_cfg {
+ pins = "GPIO53";
+ bias-pull-down;
+ };
+ };
+ };
+ gpio54 {
+ gpio54_default_mode: gpio54_default {
+ gpio54_default_mux {
+ function = "gpio";
+ groups = "gpio54_a_1";
+ };
+ gpio54_default_cfg {
+ pins = "GPIO54";
+ output-low;
+ };
+ };
+ };
+ pdmclkdat {
+ pdmclkdat_default_mode: pdmclkdat_default {
+ pdmclkdat_default_mux {
+ function = "pdm";
+ groups = "pdmclkdat_d_1";
+ };
+ pdmclkdat_default_cfg {
+ pins = "GPIO55", "GPIO56";
+ bias-disable;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt b/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt
new file mode 100644
index 000000000..f63fcb3ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/ste,nomadik.txt
@@ -0,0 +1,145 @@
+ST Ericsson Nomadik pinmux controller
+
+Required properties:
+- compatible: "stericsson,db8500-pinctrl", "stericsson,db8540-pinctrl",
+ "stericsson,stn8815-pinctrl"
+- reg: Should contain the register physical address and length of the PRCMU.
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+ST Ericsson's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as input, output, pull up, pull down...
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content. The subnodes use the generic
+pin multiplexing node layout from the standard pin control bindings
+(see pinctrl-bindings.txt):
+
+Required pin multiplexing subnode properties:
+- function: A string containing the name of the function to mux to the
+ pin or group.
+- groups : An array of strings. Each string contains the name of a pin
+ group that will be combined with the function to form a multiplexing
+ set-up.
+
+Required pin configuration subnode properties:
+- pins: A string array describing the pins affected by the configuration
+ in the node.
+- ste,config: Handle of pin configuration node
+ (e.g. ste,config = <&slpm_in_wkup_pdis>)
+
+- ste,input : <0/1/2>
+ 0: input with no pull
+ 1: input with pull up,
+ 2: input with pull down,
+
+- ste,output: <0/1/2>
+ 0: output low,
+ 1: output high,
+ 2: output (value is not specified).
+
+- ste,sleep: <0/1>
+ 0: sleep mode disable,
+ 1: sleep mode enable.
+
+- ste,sleep-input: <0/1/2/3>
+ 0: sleep input with no pull,
+ 1: sleep input with pull up,
+ 2: sleep input with pull down.
+ 3: sleep input and keep last input configuration (no pull, pull up or pull down).
+
+- ste,sleep-output: <0/1/2>
+ 0: sleep output low,
+ 1: sleep output high,
+ 2: sleep output (value is not specified).
+
+- ste,sleep-gpio: <0/1>
+ 0: disable sleep gpio mode,
+ 1: enable sleep gpio mode.
+
+- ste,sleep-wakeup: <0/1>
+ 0: wake-up detection enabled,
+ 1: wake-up detection disabled.
+
+- ste,sleep-pull-disable: <0/1>
+ 0: GPIO pull-up or pull-down resistor is enabled, when pin is an input,
+ 1: GPIO pull-up and pull-down resistor are disabled.
+
+Example board file extract:
+
+ pinctrl@80157000 {
+ compatible = "stericsson,db8500-pinctrl";
+ reg = <0x80157000 0x2000>;
+
+ pinctrl-names = "default";
+
+ slpm_in_wkup_pdis: slpm_in_wkup_pdis {
+ ste,sleep = <1>;
+ ste,sleep-input = <3>;
+ ste,sleep-wakeup = <1>;
+ ste,sleep-pull-disable = <0>;
+ };
+
+ slpm_out_hi_wkup_pdis: slpm_out_hi_wkup_pdis {
+ ste,sleep = <1>;
+ ste,sleep-output = <1>;
+ ste,sleep-wakeup = <1>;
+ ste,sleep-pull-disable = <0>;
+ };
+
+ slpm_out_wkup_pdis: slpm_out_wkup_pdis {
+ ste,sleep = <1>;
+ ste,sleep-output = <2>;
+ ste,sleep-wakeup = <1>;
+ ste,sleep-pull-disable = <0>;
+ };
+
+ uart0 {
+ uart0_default_mux: uart0_mux {
+ u0_default_mux {
+ function = "u0";
+ pins = "u0_a_1";
+ };
+ };
+ uart0_default_mode: uart0_default {
+ uart0_default_cfg1 {
+ pins = "GPIO0", "GPIO2";
+ ste,input = <1>;
+ };
+
+ uart0_default_cfg2 {
+ pins = "GPIO1", "GPIO3";
+ ste,output = <1>;
+ };
+ };
+ uart0_sleep_mode: uart0_sleep {
+ uart0_sleep_cfg1 {
+ pins = "GPIO0", "GPIO2";
+ ste,config = <&slpm_in_wkup_pdis>;
+ };
+ uart0_sleep_cfg2 {
+ pins = "GPIO1";
+ ste,config = <&slpm_out_hi_wkup_pdis>;
+ };
+ uart0_sleep_cfg3 {
+ pins = "GPIO3";
+ ste,config = <&slpm_out_wkup_pdis>;
+ };
+ };
+ };
+ };
+
+ uart@80120000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x80120000 0x1000>;
+ interrupts = <0 11 0x4>;
+
+ pinctrl-names = "default","sleep";
+ pinctrl-0 = <&uart0_default_mux>, <&uart0_default_mode>;
+ pinctrl-1 = <&uart0_sleep_mode>;
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/ti,omap-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/ti,omap-pinctrl.txt
new file mode 100644
index 000000000..88c80273d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/ti,omap-pinctrl.txt
@@ -0,0 +1,13 @@
+OMAP Pinctrl definitions
+
+Required properties:
+- compatible : Should be one of:
+ "ti,omap2420-padconf" - OMAP2420 compatible pinctrl
+ "ti,omap2430-padconf" - OMAP2430 compatible pinctrl
+ "ti,omap3-padconf" - OMAP3 compatible pinctrl
+ "ti,omap4-padconf" - OMAP4 compatible pinctrl
+ "ti,omap5-padconf" - OMAP5 compatible pinctrl
+ "ti,dra7-padconf" - DRA7 compatible pinctrl
+ "ti,am437-padconf" - AM437x compatible pinctrl
+
+See Documentation/devicetree/bindings/pinctrl/pinctrl-single.txt for further details.
diff --git a/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt b/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt
new file mode 100644
index 000000000..b7b55a964
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/xlnx,zynq-pinctrl.txt
@@ -0,0 +1,104 @@
+ Binding for Xilinx Zynq Pinctrl
+
+Required properties:
+- compatible: "xlnx,zynq-pinctrl"
+- syscon: phandle to SLCR
+- reg: Offset and length of pinctrl space in SLCR
+
+Please refer to pinctrl-bindings.txt in this directory for details of the
+common pinctrl bindings used by client devices, including the meaning of the
+phrase "pin configuration node".
+
+Zynq's pin configuration nodes act as a container for an arbitrary number of
+subnodes. Each of these subnodes represents some desired configuration for a
+pin, a group, or a list of pins or groups. This configuration can include the
+mux function to select on those pin(s)/group(s), and various pin configuration
+parameters, such as pull-up, slew rate, etc.
+
+Each configuration node can consist of multiple nodes describing the pinmux and
+pinconf options. Those nodes can be pinmux nodes or pinconf nodes.
+
+The name of each subnode is not important; all subnodes should be enumerated
+and processed purely based on their content.
+
+Required properties for pinmux nodes are:
+ - groups: A list of pinmux groups.
+ - function: The name of a pinmux function to activate for the specified set
+ of groups.
+
+Required properties for configuration nodes:
+One of:
+ - pins: a list of pin names
+ - groups: A list of pinmux groups.
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pinmux subnode:
+ groups, function
+
+The following generic properties as defined in pinctrl-bindings.txt are valid
+to specify in a pinconf subnode:
+ groups, pins, bias-disable, bias-high-impedance, bias-pull-up, slew-rate,
+ low-power-disable, low-power-enable
+
+ Valid arguments for 'slew-rate' are '0' and '1' to select between slow and fast
+ respectively.
+
+ Valid values for groups are:
+ ethernet0_0_grp, ethernet1_0_grp, mdio0_0_grp, mdio1_0_grp,
+ qspi0_0_grp, qspi1_0_grp, qspi_fbclk, qspi_cs1_grp, spi0_0_grp,
+ spi0_1_grp - spi0_2_grp, spi1_0_grp - spi1_3_grp, sdio0_0_grp - sdio0_2_grp,
+ sdio1_0_grp - sdio1_3_grp, sdio0_emio_wp, sdio0_emio_cd, sdio1_emio_wp,
+ sdio1_emio_cd, smc0_nor, smc0_nor_cs1_grp, smc0_nor_addr25_grp, smc0_nand,
+ can0_0_grp - can0_10_grp, can1_0_grp - can1_11_grp, uart0_0_grp - uart0_10_grp,
+ uart1_0_grp - uart1_11_grp, i2c0_0_grp - i2c0_10_grp, i2c1_0_grp - i2c1_10_grp,
+ ttc0_0_grp - ttc0_2_grp, ttc1_0_grp - ttc1_2_grp, swdt0_0_grp - swdt0_4_grp,
+ gpio0_0_grp - gpio0_53_grp, usb0_0_grp, usb1_0_grp
+
+ Valid values for pins are:
+ MIO0 - MIO53
+
+ Valid values for function are:
+ ethernet0, ethernet1, mdio0, mdio1, qspi0, qspi1, qspi_fbclk, qspi_cs1,
+ spi0, spi1, sdio0, sdio0_pc, sdio0_cd, sdio0_wp,
+ sdio1, sdio1_pc, sdio1_cd, sdio1_wp,
+ smc0_nor, smc0_nor_cs1, smc0_nor_addr25, smc0_nand, can0, can1, uart0, uart1,
+ i2c0, i2c1, ttc0, ttc1, swdt0, gpio0, usb0, usb1
+
+The following driver-specific properties as defined here are valid to specify in
+a pin configuration subnode:
+ - io-standard: Configure the pin to use the selected IO standard according to
+ this mapping:
+ 1: LVCMOS18
+ 2: LVCMOS25
+ 3: LVCMOS33
+ 4: HSTL
+
+Example:
+ pinctrl0: pinctrl@700 {
+ compatible = "xlnx,pinctrl-zynq";
+ reg = <0x700 0x200>;
+ syscon = <&slcr>;
+
+ pinctrl_uart1_default: uart1-default {
+ mux {
+ groups = "uart1_10_grp";
+ function = "uart1";
+ };
+
+ conf {
+ groups = "uart1_10_grp";
+ slew-rate = <0>;
+ io-standard = <1>;
+ };
+
+ conf-rx {
+ pins = "MIO49";
+ bias-high-impedance;
+ };
+
+ conf-tx {
+ pins = "MIO48";
+ bias-disable;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/power/bq2415x.txt b/Documentation/devicetree/bindings/power/bq2415x.txt
new file mode 100644
index 000000000..d0327f0b5
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/bq2415x.txt
@@ -0,0 +1,47 @@
+Binding for TI bq2415x Li-Ion Charger
+
+Required properties:
+- compatible: Should contain one of the following:
+ * "ti,bq24150"
+ * "ti,bq24150"
+ * "ti,bq24150a"
+ * "ti,bq24151"
+ * "ti,bq24151a"
+ * "ti,bq24152"
+ * "ti,bq24153"
+ * "ti,bq24153a"
+ * "ti,bq24155"
+ * "ti,bq24156"
+ * "ti,bq24156a"
+ * "ti,bq24158"
+- reg: integer, i2c address of the device.
+- ti,current-limit: integer, initial maximum current charger can pull
+ from power supply in mA.
+- ti,weak-battery-voltage: integer, weak battery voltage threshold in mV.
+ The chip will use slow precharge if battery voltage
+ is below this value.
+- ti,battery-regulation-voltage: integer, maximum charging voltage in mV.
+- ti,charge-current: integer, maximum charging current in mA.
+- ti,termination-current: integer, charge will be terminated when current in
+ constant-voltage phase drops below this value (in mA).
+- ti,resistor-sense: integer, value of sensing resistor in milliohm.
+
+Optional properties:
+- ti,usb-charger-detection: phandle to usb charger detection device.
+ (required for auto mode)
+
+Example from Nokia N900:
+
+bq24150a {
+ compatible = "ti,bq24150a";
+ reg = <0x6b>;
+
+ ti,current-limit = <100>;
+ ti,weak-battery-voltage = <3400>;
+ ti,battery-regulation-voltage = <4200>;
+ ti,charge-current = <650>;
+ ti,termination-current = <100>;
+ ti,resistor-sense = <68>;
+
+ ti,usb-charger-detection = <&isp1704>;
+};
diff --git a/Documentation/devicetree/bindings/power/da9150-charger.txt b/Documentation/devicetree/bindings/power/da9150-charger.txt
new file mode 100644
index 000000000..f3906663c
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/da9150-charger.txt
@@ -0,0 +1,26 @@
+Dialog Semiconductor DA9150 Charger Power Supply bindings
+
+Required properties:
+- compatible: "dlg,da9150-charger" for DA9150 Charger Power Supply
+
+Optional properties:
+- io-channels: List of phandle and IIO specifier pairs
+- io-channel-names: List of channel names used by charger
+ ["CHAN_IBUS", "CHAN_VBUS", "CHAN_TJUNC", "CHAN_VBAT"]
+ (See Documentation/devicetree/bindings/iio/iio-bindings.txt for further info)
+
+
+Example:
+
+ da9150-charger {
+ compatible = "dlg,da9150-charger";
+
+ io-channels = <&gpadc 0>,
+ <&gpadc 2>,
+ <&gpadc 8>,
+ <&gpadc 5>;
+ io-channel-names = "CHAN_IBUS",
+ "CHAN_VBUS",
+ "CHAN_TJUNC",
+ "CHAN_VBAT";
+ };
diff --git a/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt b/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt
new file mode 100644
index 000000000..65cc03457
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/fsl,imx-gpc.txt
@@ -0,0 +1,59 @@
+Freescale i.MX General Power Controller
+=======================================
+
+The i.MX6Q General Power Control (GPC) block contains DVFS load tracking
+counters and Power Gating Control (PGC) for the CPU and PU (GPU/VPU) power
+domains.
+
+Required properties:
+- compatible: Should be "fsl,imx6q-gpc" or "fsl,imx6sl-gpc"
+- reg: should be register base and length as documented in the
+ datasheet
+- interrupts: Should contain GPC interrupt request 1
+- pu-supply: Link to the LDO regulator powering the PU power domain
+- clocks: Clock phandles to devices in the PU power domain that need
+ to be enabled during domain power-up for reset propagation.
+- #power-domain-cells: Should be 1, see below:
+
+The gpc node is a power-controller as documented by the generic power domain
+bindings in Documentation/devicetree/bindings/power/power_domain.txt.
+
+Example:
+
+ gpc: gpc@020dc000 {
+ compatible = "fsl,imx6q-gpc";
+ reg = <0x020dc000 0x4000>;
+ interrupts = <0 89 IRQ_TYPE_LEVEL_HIGH>,
+ <0 90 IRQ_TYPE_LEVEL_HIGH>;
+ pu-supply = <&reg_pu>;
+ clocks = <&clks IMX6QDL_CLK_GPU3D_CORE>,
+ <&clks IMX6QDL_CLK_GPU3D_SHADER>,
+ <&clks IMX6QDL_CLK_GPU2D_CORE>,
+ <&clks IMX6QDL_CLK_GPU2D_AXI>,
+ <&clks IMX6QDL_CLK_OPENVG_AXI>,
+ <&clks IMX6QDL_CLK_VPU_AXI>;
+ #power-domain-cells = <1>;
+ };
+
+
+Specifying power domain for IP modules
+======================================
+
+IP cores belonging to a power domain should contain a 'power-domains' property
+that is a phandle pointing to the gpc device node and a DOMAIN_INDEX specifying
+the power domain the device belongs to.
+
+Example of a device that is part of the PU power domain:
+
+ vpu: vpu@02040000 {
+ reg = <0x02040000 0x3c000>;
+ /* ... */
+ power-domains = <&gpc 1>;
+ /* ... */
+ };
+
+The following DOMAIN_INDEX values are valid for i.MX6Q:
+ARM_DOMAIN 0
+PU_DOMAIN 1
+The following additional DOMAIN_INDEX value is valid for i.MX6SL:
+DISPLAY_DOMAIN 2
diff --git a/Documentation/devicetree/bindings/power/isp1704.txt b/Documentation/devicetree/bindings/power/isp1704.txt
new file mode 100644
index 000000000..fa3596907
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/isp1704.txt
@@ -0,0 +1,17 @@
+Binding for NXP ISP1704 USB Charger Detection
+
+Required properties:
+- compatible: Should contain one of the following:
+ * "nxp,isp1704"
+- nxp,enable-gpio: Should contain a phandle + gpio-specifier
+ to the GPIO pin connected to the chip's enable pin.
+- usb-phy: Should contain a phandle to the USB PHY
+ the ISP1704 is connected to.
+
+Example:
+
+isp1704 {
+ compatible = "nxp,isp1704";
+ nxp,enable-gpio = <&gpio3 3 GPIO_ACTIVE_LOW>;
+ usb-phy = <&usb2_phy>;
+};
diff --git a/Documentation/devicetree/bindings/power/ltc2941.txt b/Documentation/devicetree/bindings/power/ltc2941.txt
new file mode 100644
index 000000000..ea42ae12d
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/ltc2941.txt
@@ -0,0 +1,27 @@
+binding for LTC2941 and LTC2943 battery gauges
+
+Both the LTC2941 and LTC2943 measure battery capacity.
+The LTC2943 is compatible with the LTC2941, it adds voltage and
+temperature monitoring, and uses a slightly different conversion
+formula for the charge counter.
+
+Required properties:
+- compatible: Should contain "ltc2941" or "ltc2943" which also indicates the
+ type of I2C chip attached.
+- reg: The 7-bit I2C address.
+- lltc,resistor-sense: The sense resistor value in milli-ohms. Can be a 32-bit
+ negative value when the battery has been connected to the wrong end of the
+ resistor.
+- lltc,prescaler-exponent: The prescaler exponent as explained in the datasheet.
+ This determines the range and accuracy of the gauge. The value is programmed
+ into the chip only if it differs from the current setting. The setting is
+ lost when the battery is disconnected.
+
+Example from the Topic Miami Florida board:
+
+ fuelgauge: ltc2943@64 {
+ compatible = "ltc2943";
+ reg = <0x64>;
+ lltc,resistor-sense = <15>;
+ lltc,prescaler-exponent = <5>; /* 2^(2*5) = 1024 */
+ };
diff --git a/Documentation/devicetree/bindings/power/opp.txt b/Documentation/devicetree/bindings/power/opp.txt
new file mode 100644
index 000000000..74499e503
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/opp.txt
@@ -0,0 +1,25 @@
+* Generic OPP Interface
+
+SoCs have a standard set of tuples consisting of frequency and
+voltage pairs that the device will support per voltage domain. These
+are called Operating Performance Points or OPPs.
+
+Properties:
+- operating-points: An array of 2-tuples items, and each item consists
+ of frequency and voltage like <freq-kHz vol-uV>.
+ freq: clock frequency in kHz
+ vol: voltage in microvolt
+
+Examples:
+
+cpu@0 {
+ compatible = "arm,cortex-a9";
+ reg = <0>;
+ next-level-cache = <&L2>;
+ operating-points = <
+ /* kHz uV */
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+};
diff --git a/Documentation/devicetree/bindings/power/power-controller.txt b/Documentation/devicetree/bindings/power/power-controller.txt
new file mode 100644
index 000000000..4f7a3bc9c
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power-controller.txt
@@ -0,0 +1,18 @@
+* Generic system power control capability
+
+Power-management integrated circuits or miscellaneous hardware components are
+sometimes able to control the system power. The device driver associated with these
+components might need to define this capability, which tells the kernel that
+it can be used to switch off the system. The corresponding device must have the
+standard property "system-power-controller" in its device node. This property
+marks the device as able to control the system power. In order to test if this
+property is found programmatically, use the helper function
+"of_device_is_system_power_controller" from of.h .
+
+Example:
+
+act8846: act8846@5 {
+ compatible = "active-semi,act8846";
+ status = "okay";
+ system-power-controller;
+}
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
new file mode 100644
index 000000000..0f8ed3710
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -0,0 +1,78 @@
+* Generic PM domains
+
+System on chip designs are often divided into multiple PM domains that can be
+used for power gating of selected IP blocks for power saving by reduced leakage
+current.
+
+This device tree binding can be used to bind PM domain consumer devices with
+their PM domains provided by PM domain providers. A PM domain provider can be
+represented by any node in the device tree and can provide one or more PM
+domains. A consumer node can refer to the provider by a phandle and a set of
+phandle arguments (so called PM domain specifiers) of length specified by the
+#power-domain-cells property in the PM domain provider node.
+
+==PM domain providers==
+
+Required properties:
+ - #power-domain-cells : Number of cells in a PM domain specifier;
+ Typically 0 for nodes representing a single PM domain and 1 for nodes
+ providing multiple PM domains (e.g. power controllers), but can be any value
+ as specified by device tree binding documentation of particular provider.
+
+Optional properties:
+ - power-domains : A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle.
+ Some power domains might be powered from another power domain (or have
+ other hardware specific dependencies). For representing such dependency
+ a standard PM domain consumer binding is used. When provided, all domains
+ created by the given provider should be subdomains of the domain
+ specified by this binding. More details about power domain specifier are
+ available in the next section.
+
+Example:
+
+ power: power-controller@12340000 {
+ compatible = "foo,power-controller";
+ reg = <0x12340000 0x1000>;
+ #power-domain-cells = <1>;
+ };
+
+The node above defines a power controller that is a PM domain provider and
+expects one cell as its phandle argument.
+
+Example 2:
+
+ parent: power-controller@12340000 {
+ compatible = "foo,power-controller";
+ reg = <0x12340000 0x1000>;
+ #power-domain-cells = <1>;
+ };
+
+ child: power-controller@12340000 {
+ compatible = "foo,power-controller";
+ reg = <0x12341000 0x1000>;
+ power-domains = <&parent 0>;
+ #power-domain-cells = <1>;
+ };
+
+The nodes above define two power controllers: 'parent' and 'child'.
+Domains created by the 'child' power controller are subdomains of '0' power
+domain provided by the 'parent' power controller.
+
+==PM domain consumers==
+
+Required properties:
+ - power-domains : A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle.
+
+Example:
+
+ leaky-device@12350000 {
+ compatible = "foo,i-leak-current";
+ reg = <0x12350000 0x1000>;
+ power-domains = <&power 0>;
+ };
+
+The node above defines a typical PM domain consumer device, which is located
+inside a PM domain with index 0 of a power controller represented by a node
+with the label "power".
diff --git a/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt b/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt
new file mode 100644
index 000000000..beda7d2ef
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/renesas,sysc-rmobile.txt
@@ -0,0 +1,100 @@
+DT bindings for the Renesas R-Mobile System Controller
+
+== System Controller Node ==
+
+The R-Mobile System Controller provides the following functions:
+ - Boot mode management,
+ - Reset generation,
+ - Power management.
+
+Required properties:
+- compatible: Should be "renesas,sysc-<soctype>", "renesas,sysc-rmobile" as
+ fallback.
+ Examples with soctypes are:
+ - "renesas,sysc-r8a73a4" (R-Mobile APE6)
+ - "renesas,sysc-r8a7740" (R-Mobile A1)
+ - "renesas,sysc-sh73a0" (SH-Mobile AG5)
+- reg: Two address start and address range blocks for the device:
+ - The first block refers to the normally accessible registers,
+ - the second block refers to the registers protected by the HPB
+ semaphore.
+
+Optional nodes:
+- pm-domains: This node contains a hierarchy of PM domain nodes, which should
+ match the Power Area Hierarchy in the Power Domain Specifications section of
+ the device's datasheet.
+
+
+== PM Domain Nodes ==
+
+Each of the PM domain nodes represents a PM domain, as documented by the
+generic PM domain bindings in
+Documentation/devicetree/bindings/power/power_domain.txt.
+
+The nodes should be named by the real power area names, and thus their names
+should be unique.
+
+Required properties:
+ - #power-domain-cells: Must be 0.
+
+Optional properties:
+- reg: If the PM domain is not always-on, this property must contain the bit
+ index number for the corresponding power area in the various Power
+ Control and Status Registers. The parent's node must contain the
+ following two properties:
+ - #address-cells: Must be 1,
+ - #size-cells: Must be 0.
+ If the PM domain is always-on, this property must be omitted.
+
+
+Example:
+
+This shows a subset of the r8a7740 PM domain hierarchy, containing the
+C5 "always-on" domain, 2 of its subdomains (A4S and A4SU), and the A3SP domain,
+which is a subdomain of A4S.
+
+ sysc: system-controller@e6180000 {
+ compatible = "renesas,sysc-r8a7740", "renesas,sysc-rmobile";
+ reg = <0xe6180000 0x8000>, <0xe6188000 0x8000>;
+
+ pm-domains {
+ pd_c5: c5 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #power-domain-cells = <0>;
+
+ pd_a4s: a4s@10 {
+ reg = <10>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ #power-domain-cells = <0>;
+
+ pd_a3sp: a3sp@11 {
+ reg = <11>;
+ #power-domain-cells = <0>;
+ };
+ };
+
+ pd_a4su: a4su@20 {
+ reg = <20>;
+ #power-domain-cells = <0>;
+ };
+ };
+ };
+ };
+
+
+== PM Domain Consumers ==
+
+Hardware blocks belonging to a PM domain should contain a "power-domains"
+property that is a phandle pointing to the corresponding PM domain node.
+
+Example:
+
+ tpu: pwm@e6600000 {
+ compatible = "renesas,tpu-r8a7740", "renesas,tpu";
+ reg = <0xe6600000 0x100>;
+ clocks = <&mstp3_clks R8A7740_CLK_TPU0>;
+ power-domains = <&pd_a3sp>;
+ #pwm-cells = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/power/reset/keystone-reset.txt b/Documentation/devicetree/bindings/power/reset/keystone-reset.txt
new file mode 100644
index 000000000..c82f12e2d
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/keystone-reset.txt
@@ -0,0 +1,67 @@
+* Device tree bindings for Texas Instruments keystone reset
+
+This node is intended to allow SoC reset in case of software reset
+of selected watchdogs.
+
+The Keystone SoCs can contain up to 4 watchdog timers to reset
+SoC. Each watchdog timer event input is connected to the Reset Mux
+block. The Reset Mux block can be configured to cause reset or not.
+
+Additionally soft or hard reset can be configured.
+
+Required properties:
+
+- compatible: ti,keystone-reset
+
+- ti,syscon-pll: phandle/offset pair. The phandle to syscon used to
+ access pll controller registers and the offset to use
+ reset control registers.
+
+- ti,syscon-dev: phandle/offset pair. The phandle to syscon used to
+ access device state control registers and the offset
+ in order to use mux block registers for all watchdogs.
+
+Optional properties:
+
+- ti,soft-reset: Boolean option indicating soft reset.
+ By default hard reset is used.
+
+- ti,wdt-list: WDT list that can cause SoC reset. It's not related
+ to WDT driver, it's just needed to enable a SoC related
+ reset that's triggered by one of WDTs. The list is
+ in format: <0>, <2>; It can be in random order and
+ begins from 0 to 3, as keystone can contain up to 4 SoC
+ reset watchdogs and can be in random order.
+
+Example 1:
+Setup keystone reset so that in case software reset or
+WDT0 is triggered it issues hard reset for SoC.
+
+pllctrl: pll-controller@02310000 {
+ compatible = "ti,keystone-pllctrl", "syscon";
+ reg = <0x02310000 0x200>;
+};
+
+devctrl: device-state-control@02620000 {
+ compatible = "ti,keystone-devctrl", "syscon";
+ reg = <0x02620000 0x1000>;
+};
+
+rstctrl: reset-controller {
+ compatible = "ti,keystone-reset";
+ ti,syscon-pll = <&pllctrl 0xe4>;
+ ti,syscon-dev = <&devctrl 0x328>;
+ ti,wdt-list = <0>;
+};
+
+Example 2:
+Setup keystone reset so that in case of software reset or
+WDT0 or WDT2 is triggered it issues soft reset for SoC.
+
+rstctrl: reset-controller {
+ compatible = "ti,keystone-reset";
+ ti,syscon-pll = <&pllctrl 0xe4>;
+ ti,syscon-dev = <&devctrl 0x328>;
+ ti,wdt-list = <0>, <2>;
+ ti,soft-reset;
+};
diff --git a/Documentation/devicetree/bindings/power/reset/ltc2952-poweroff.txt b/Documentation/devicetree/bindings/power/reset/ltc2952-poweroff.txt
new file mode 100644
index 000000000..cd2d7f58a
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/ltc2952-poweroff.txt
@@ -0,0 +1,29 @@
+Binding for the LTC2952 PowerPath controller
+
+This chip is used to externally trigger a system shut down. Once the trigger has
+been sent, the chip's watchdog has to be reset to gracefully shut down.
+A full powerdown can be triggered via the kill signal.
+
+Required properties:
+
+- compatible: Must contain: "lltc,ltc2952"
+- watchdog-gpios: phandle + gpio-specifier for the GPIO connected to the
+ chip's watchdog line
+- kill-gpios: phandle + gpio-specifier for the GPIO connected to the
+ chip's kill line
+
+Optional properties:
+- trigger-gpios: phandle + gpio-specifier for the GPIO connected to the
+ chip's trigger line. If this property is not set, the
+ trigger function is ignored and the chip is kept alive
+ until an explicit kill signal is received
+
+Example:
+
+ltc2952 {
+ compatible = "lltc,ltc2952";
+
+ trigger-gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
+ watchdog-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+ kill-gpios = <&gpio0 2 GPIO_ACTIVE_LOW>;
+};
diff --git a/Documentation/devicetree/bindings/power/reset/st-reset.txt b/Documentation/devicetree/bindings/power/reset/st-reset.txt
new file mode 100644
index 000000000..809af54f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/st-reset.txt
@@ -0,0 +1,11 @@
+*Device-Tree bindings for ST SW reset functionality
+
+Required properties:
+- compatible: should be "st,<chip>-restart".
+- st,syscfg: should be a phandle of the syscfg node.
+
+Example node:
+ restart {
+ compatible = "st,stih416-restart";
+ st,syscfg = <&syscfg_sbc>;
+ };
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
new file mode 100644
index 000000000..1e2546f8b
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/syscon-poweroff.txt
@@ -0,0 +1,23 @@
+Generic SYSCON mapped register poweroff driver
+
+This is a generic poweroff driver using syscon to map the poweroff register.
+The poweroff is generally performed with a write to the poweroff register
+defined by the register map pointed by syscon reference plus the offset
+with the mask defined in the poweroff node.
+
+Required properties:
+- compatible: should contain "syscon-poweroff"
+- regmap: this is phandle to the register map node
+- offset: offset in the register map for the poweroff register (in bytes)
+- mask: the poweroff value written to the poweroff register (32 bit access)
+
+Default will be little endian mode, 32 bit access only.
+
+Examples:
+
+ poweroff {
+ compatible = "syscon-poweroff";
+ regmap = <&regmapnode>;
+ offset = <0x0>;
+ mask = <0x7a>;
+ };
diff --git a/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt b/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
new file mode 100644
index 000000000..11906316b
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/syscon-reboot.txt
@@ -0,0 +1,23 @@
+Generic SYSCON mapped register reset driver
+
+This is a generic reset driver using syscon to map the reset register.
+The reset is generally performed with a write to the reset register
+defined by the register map pointed by syscon reference plus the offset
+with the mask defined in the reboot node.
+
+Required properties:
+- compatible: should contain "syscon-reboot"
+- regmap: this is phandle to the register map node
+- offset: offset in the register map for the reboot register (in bytes)
+- mask: the reset value written to the reboot register (32 bit access)
+
+Default will be little endian mode, 32 bit access only.
+
+Examples:
+
+ reboot {
+ compatible = "syscon-reboot";
+ regmap = <&regmapnode>;
+ offset = <0x0>;
+ mask = <0x1>;
+ };
diff --git a/Documentation/devicetree/bindings/power/rockchip-io-domain.txt b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
new file mode 100644
index 000000000..8b70db103
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/rockchip-io-domain.txt
@@ -0,0 +1,83 @@
+Rockchip SRAM for IO Voltage Domains:
+-------------------------------------
+
+IO domain voltages on some Rockchip SoCs are variable but need to be
+kept in sync between the regulators and the SoC using a special
+register.
+
+A specific example using rk3288:
+- If the regulator hooked up to a pin like SDMMC0_VDD is 3.3V then
+ bit 7 of GRF_IO_VSEL needs to be 0. If the regulator hooked up to
+ that same pin is 1.8V then bit 7 of GRF_IO_VSEL needs to be 1.
+
+Said another way, this driver simply handles keeping bits in the SoC's
+general register file (GRF) in sync with the actual value of a voltage
+hooked up to the pins.
+
+Note that this driver specifically doesn't include:
+- any logic for deciding what voltage we should set regulators to
+- any logic for deciding whether regulators (or internal SoC blocks)
+ should have power or not have power
+
+If there were some other software that had the smarts of making
+decisions about regulators, it would work in conjunction with this
+driver. When that other software adjusted a regulator's voltage then
+this driver would handle telling the SoC about it. A good example is
+vqmmc for SD. In that case the dw_mmc driver simply is told about a
+regulator. It changes the regulator between 3.3V and 1.8V at the
+right time. This driver notices the change and makes sure that the
+SoC is on the same page.
+
+
+Required properties:
+- compatible: should be one of:
+ - "rockchip,rk3188-io-voltage-domain" for rk3188
+ - "rockchip,rk3288-io-voltage-domain" for rk3288
+- rockchip,grf: phandle to the syscon managing the "general register files"
+
+
+You specify supplies using the standard regulator bindings by including
+a phandle the relevant regulator. All specified supplies must be able
+to report their voltage. The IO Voltage Domain for any non-specified
+supplies will be not be touched.
+
+Possible supplies for rk3188:
+- ap0-supply: The supply connected to AP0_VCC.
+- ap1-supply: The supply connected to AP1_VCC.
+- cif-supply: The supply connected to CIF_VCC.
+- flash-supply: The supply connected to FLASH_VCC.
+- lcdc0-supply: The supply connected to LCD0_VCC.
+- lcdc1-supply: The supply connected to LCD1_VCC.
+- vccio0-supply: The supply connected to VCCIO0.
+- vccio1-supply: The supply connected to VCCIO1.
+ Sometimes also labeled VCCIO1 and VCCIO2.
+
+Possible supplies for rk3288:
+- audio-supply: The supply connected to APIO4_VDD.
+- bb-supply: The supply connected to APIO5_VDD.
+- dvp-supply: The supply connected to DVPIO_VDD.
+- flash0-supply: The supply connected to FLASH0_VDD. Typically for eMMC
+- flash1-supply: The supply connected to FLASH1_VDD. Also known as SDIO1.
+- gpio30-supply: The supply connected to APIO1_VDD.
+- gpio1830 The supply connected to APIO2_VDD.
+- lcdc-supply: The supply connected to LCDC_VDD.
+- sdcard-supply: The supply connected to SDMMC0_VDD.
+- wifi-supply: The supply connected to APIO3_VDD. Also known as SDIO0.
+
+
+Example:
+
+ io-domains {
+ compatible = "rockchip,rk3288-io-voltage-domain";
+ rockchip,grf = <&grf>;
+
+ audio-supply = <&vcc18_codec>;
+ bb-supply = <&vcc33_io>;
+ dvp-supply = <&vcc_18>;
+ flash0-supply = <&vcc18_flashio>;
+ gpio1830-supply = <&vcc33_io>;
+ gpio30-supply = <&vcc33_pmuio>;
+ lcdc-supply = <&vcc33_lcd>;
+ sdcard-supply = <&vccio_sd>;
+ wifi-supply = <&vcc18_wl>;
+ };
diff --git a/Documentation/devicetree/bindings/power/rx51-battery.txt b/Documentation/devicetree/bindings/power/rx51-battery.txt
new file mode 100644
index 000000000..90438453d
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/rx51-battery.txt
@@ -0,0 +1,25 @@
+Binding for Nokia N900 battery
+
+The Nokia N900 battery status can be read via the TWL4030's A/D converter.
+
+Required properties:
+- compatible: Should contain one of the following:
+ * "nokia,n900-battery"
+- io-channels: Should contain IIO channel specifiers
+ for each element in io-channel-names.
+- io-channel-names: Should contain the following values:
+ * "temp" - The ADC channel for temperature reading
+ * "bsi" - The ADC channel for battery size identification
+ * "vbat" - The ADC channel to measure the battery voltage
+
+Example from Nokia N900:
+
+battery: n900-battery {
+ compatible = "nokia,n900-battery";
+ io-channels = <&twl4030_madc 0>,
+ <&twl4030_madc 4>,
+ <&twl4030_madc 12>;
+ io-channel-names = "temp",
+ "bsi",
+ "vbat";
+};
diff --git a/Documentation/devicetree/bindings/power/twl-charger.txt b/Documentation/devicetree/bindings/power/twl-charger.txt
new file mode 100644
index 000000000..d5c706216
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/twl-charger.txt
@@ -0,0 +1,20 @@
+TWL BCI (Battery Charger Interface)
+
+Required properties:
+- compatible:
+ - "ti,twl4030-bci"
+- interrupts: two interrupt lines from the TWL SIH (secondary
+ interrupt handler) - interrupts 9 and 2.
+
+Optional properties:
+- ti,bb-uvolt: microvolts for charging the backup battery.
+- ti,bb-uamp: microamps for charging the backup battery.
+
+Examples:
+
+bci {
+ compatible = "ti,twl4030-bci";
+ interrupts = <9>, <2>;
+ ti,bb-uvolt = <3200000>;
+ ti,bb-uamp = <150>;
+};
diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/btemp.txt b/Documentation/devicetree/bindings/power_supply/ab8500/btemp.txt
new file mode 100644
index 000000000..0ba1bcc7f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/btemp.txt
@@ -0,0 +1,16 @@
+=== AB8500 Battery Temperature Monitor Driver ===
+
+The properties below describes the node for btemp driver.
+
+Required Properties:
+- compatible = Shall be: "stericsson,ab8500-btemp"
+- battery = Shall be battery specific information
+
+ Example:
+ ab8500_btemp {
+ compatible = "stericsson,ab8500-btemp";
+ battery = <&ab8500_battery>;
+ };
+
+For information on battery specific node, Ref:
+Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt
new file mode 100644
index 000000000..ef5328371
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/chargalg.txt
@@ -0,0 +1,16 @@
+=== AB8500 Charging Algorithm Driver ===
+
+The properties below describes the node for chargalg driver.
+
+Required Properties:
+- compatible = Shall be: "stericsson,ab8500-chargalg"
+- battery = Shall be battery specific information
+
+Example:
+ab8500_chargalg {
+ compatible = "stericsson,ab8500-chargalg";
+ battery = <&ab8500_battery>;
+};
+
+For information on battery specific node, Ref:
+Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/charger.txt b/Documentation/devicetree/bindings/power_supply/ab8500/charger.txt
new file mode 100644
index 000000000..6bdbb08ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/charger.txt
@@ -0,0 +1,25 @@
+=== AB8500 Charger Driver ===
+
+Required Properties:
+- compatible = Shall be "stericsson,ab8500-charger"
+- battery = Shall be battery specific information
+ Example:
+ ab8500_charger {
+ compatible = "stericsson,ab8500-charger";
+ battery = <&ab8500_battery>;
+ };
+
+- vddadc-supply: Supply for USB and Main charger
+ Example:
+ ab8500-charger {
+ vddadc-supply = <&ab8500_ldo_tvout_reg>;
+ }
+- autopower_cfg:
+ Boolean value depicting the presence of 'automatic poweron after powerloss'
+ Example:
+ ab8500-charger {
+ autopower_cfg;
+ };
+
+For information on battery specific node, Ref:
+Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
diff --git a/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
new file mode 100644
index 000000000..ccafcb911
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ab8500/fg.txt
@@ -0,0 +1,58 @@
+=== AB8500 Fuel Gauge Driver ===
+
+AB8500 is a mixed signal multimedia and power management
+device comprising: power and energy-management-module,
+wall-charger, usb-charger, audio codec, general purpose adc,
+tvout, clock management and sim card interface.
+
+Fuelgauge support is part of energy-management-modules, other
+components of this module are:
+main-charger, usb-combo-charger and battery-temperature-monitoring.
+
+The properties below describes the node for fuelgauge driver.
+
+Required Properties:
+- compatible = This shall be: "stericsson,ab8500-fg"
+- battery = Shall be battery specific information
+ Example:
+ ab8500_fg {
+ compatible = "stericsson,ab8500-fg";
+ battery = <&ab8500_battery>;
+ };
+
+dependent node:
+ ab8500_battery: ab8500_battery {
+ };
+ This node will provide information on 'thermistor interface' and
+ 'battery technology type' used.
+
+Properties of this node are:
+thermistor-on-batctrl:
+ A boolean value indicating thermistor interface to battery
+
+ Note:
+ 'btemp' and 'batctrl' are the pins interfaced for battery temperature
+ measurement, 'btemp' signal is used when NTC(negative temperature
+ coefficient) resister is interfaced external to battery whereas
+ 'batctrl' pin is used when NTC resister is internal to battery.
+
+ Example:
+ ab8500_battery: ab8500_battery {
+ thermistor-on-batctrl;
+ };
+ indicates: NTC resister is internal to battery, 'batctrl' is used
+ for thermal measurement.
+
+ The absence of property 'thermal-on-batctrl' indicates
+ NTC resister is external to battery and 'btemp' signal is used
+ for thermal measurement.
+
+battery-type:
+ This shall be the battery manufacturing technology type,
+ allowed types are:
+ "UNKNOWN" "NiMH" "LION" "LIPO" "LiFe" "NiCd" "LiMn"
+ Example:
+ ab8500_battery: ab8500_battery {
+ stericsson,battery-type = "LIPO";
+ }
+
diff --git a/Documentation/devicetree/bindings/power_supply/axxia-reset.txt b/Documentation/devicetree/bindings/power_supply/axxia-reset.txt
new file mode 100644
index 000000000..47e720d24
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/axxia-reset.txt
@@ -0,0 +1,20 @@
+Axxia Restart Driver
+
+This driver can do reset of the Axxia SoC. It uses the registers in the syscon
+block to initiate a chip reset.
+
+Required Properties:
+ -compatible: "lsi,axm55xx-reset"
+ -syscon: phandle to the syscon node.
+
+Example:
+
+ syscon: syscon@2010030000 {
+ compatible = "lsi,axxia-syscon", "syscon";
+ reg = <0x20 0x10030000 0 0x2000>;
+ };
+
+ reset: reset@2010031000 {
+ compatible = "lsi,axm55xx-reset";
+ syscon = <&syscon>;
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/charger-manager.txt b/Documentation/devicetree/bindings/power_supply/charger-manager.txt
new file mode 100644
index 000000000..ec4fe9de3
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/charger-manager.txt
@@ -0,0 +1,81 @@
+charger-manager bindings
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Required properties :
+ - compatible : "charger-manager"
+ - <>-supply : for regulator consumer
+ - cm-num-chargers : number of chargers
+ - cm-chargers : name of chargers
+ - cm-fuel-gauge : name of battery fuel gauge
+ - subnode <regulator> :
+ - cm-regulator-name : name of charger regulator
+ - subnode <cable> :
+ - cm-cable-name : name of charger cable
+ - cm-cable-extcon : name of extcon dev
+(optional) - cm-cable-min : minimum current of cable
+(optional) - cm-cable-max : maximum current of cable
+
+Optional properties :
+ - cm-name : charger manager's name (default : "battery")
+ - cm-poll-mode : polling mode (enum polling_modes)
+ - cm-poll-interval : polling interval
+ - cm-battery-stat : battery status (enum data_source)
+ - cm-fullbatt-* : data for full battery checking
+ - cm-thermal-zone : name of external thermometer's thermal zone
+ - cm-battery-* : threshold battery temperature for charging
+ -cold : critical cold temperature of battery for charging
+ -cold-in-minus : flag that cold temperature is in minus degrees
+ -hot : critical hot temperature of battery for charging
+ -temp-diff : temperature difference to allow recharging
+ - cm-dis/charging-max = limits of charging duration
+
+Example :
+ charger-manager@0 {
+ compatible = "charger-manager";
+ chg-reg-supply = <&charger_regulator>;
+
+ cm-name = "battery";
+ /* Always polling ON : 30s */
+ cm-poll-mode = <1>;
+ cm-poll-interval = <30000>;
+
+ cm-fullbatt-vchkdrop-ms = <30000>;
+ cm-fullbatt-vchkdrop-volt = <150000>;
+ cm-fullbatt-soc = <100>;
+
+ cm-battery-stat = <3>;
+
+ cm-num-chargers = <3>;
+ cm-chargers = "charger0", "charger1", "charger2";
+
+ cm-fuel-gauge = "fuelgauge0";
+
+ cm-thermal-zone = "thermal_zone.1"
+ /* in deci centigrade */
+ cm-battery-cold = <50>;
+ cm-battery-cold-in-minus;
+ cm-battery-hot = <800>;
+ cm-battery-temp-diff = <100>;
+
+ /* Allow charging for 5hr */
+ cm-charging-max = <18000000>;
+ /* Allow discharging for 2hr */
+ cm-discharging-max = <7200000>;
+
+ regulator@0 {
+ cm-regulator-name = "chg-reg";
+ cable@0 {
+ cm-cable-name = "USB";
+ cm-cable-extcon = "extcon-dev.0";
+ cm-cable-min = <475000>;
+ cm-cable-max = <500000>;
+ };
+ cable@1 {
+ cm-cable-name = "TA";
+ cm-cable-extcon = "extcon-dev.0";
+ cm-cable-min = <650000>;
+ cm-cable-max = <675000>;
+ };
+ };
+
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/gpio-charger.txt b/Documentation/devicetree/bindings/power_supply/gpio-charger.txt
new file mode 100644
index 000000000..adbb5dc5b
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/gpio-charger.txt
@@ -0,0 +1,27 @@
+gpio-charger
+
+Required properties :
+ - compatible : "gpio-charger"
+ - gpios : GPIO indicating the charger presence.
+ See GPIO binding in bindings/gpio/gpio.txt .
+ - charger-type : power supply type, one of
+ unknown
+ battery
+ ups
+ mains
+ usb-sdp (USB standard downstream port)
+ usb-dcp (USB dedicated charging port)
+ usb-cdp (USB charging downstream port)
+ usb-aca (USB accessory charger adapter)
+
+Example:
+
+ usb_charger: charger {
+ compatible = "gpio-charger";
+ charger-type = "usb-sdp";
+ gpios = <&gpf0 2 0 0 0>;
+ }
+
+ battery {
+ power-supplies = <&usb_charger>;
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/imx-snvs-poweroff.txt b/Documentation/devicetree/bindings/power_supply/imx-snvs-poweroff.txt
new file mode 100644
index 000000000..dc7c9bad6
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/imx-snvs-poweroff.txt
@@ -0,0 +1,23 @@
+i.mx6 Poweroff Driver
+
+SNVS_LPCR in SNVS module can power off the whole system by pull
+PMIC_ON_REQ low if PMIC_ON_REQ is connected with external PMIC.
+If you don't want to use PMIC_ON_REQ as power on/off control,
+please set status='disabled' to disable this driver.
+
+Required Properties:
+-compatible: "fsl,sec-v4.0-poweroff"
+-reg: Specifies the physical address of the SNVS_LPCR register
+
+Example:
+ snvs@020cc000 {
+ compatible = "fsl,sec-v4.0-mon", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x020cc000 0x4000>;
+ .....
+ snvs_poweroff: snvs-poweroff@38 {
+ compatible = "fsl,sec-v4.0-poweroff";
+ reg = <0x38 0x4>;
+ };
+ }
diff --git a/Documentation/devicetree/bindings/power_supply/lp8727_charger.txt b/Documentation/devicetree/bindings/power_supply/lp8727_charger.txt
new file mode 100644
index 000000000..2246bc5c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/lp8727_charger.txt
@@ -0,0 +1,44 @@
+Binding for TI/National Semiconductor LP8727 Charger
+
+Required properties:
+- compatible: "ti,lp8727"
+- reg: I2C slave address 27h
+
+Optional properties:
+- interrupt-parent: interrupt controller node (see interrupt binding[0])
+- interrupts: interrupt specifier (see interrupt binding[0])
+- debounce-ms: interrupt debounce time. (u32)
+
+AC and USB charging parameters
+- charger-type: "ac" or "usb" (string)
+- eoc-level: value of 'enum lp8727_eoc_level' (u8)
+- charging-current: value of 'enum lp8727_ichg' (u8)
+
+[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+
+Example)
+
+lp8727@27 {
+ compatible = "ti,lp8727";
+ reg = <0x27>;
+
+ /* GPIO 134 is used for LP8728 interrupt pin */
+ interrupt-parent = <&gpio5>; /* base = 128 */
+ interrupts = <6 0x2>; /* offset = 6, falling edge type */
+
+ debounce-ms = <300>;
+
+ /* AC charger: 5% EOC and 500mA charging current */
+ ac {
+ charger-type = "ac";
+ eoc-level = /bits/ 8 <0>;
+ charging-current = /bits/ 8 <4>;
+ };
+
+ /* USB charger: 10% EOC and 400mA charging current */
+ usb {
+ charger-type = "usb";
+ eoc-level = /bits/ 8 <1>;
+ charging-current = /bits/ 8 <2>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/power_supply/max17042_battery.txt b/Documentation/devicetree/bindings/power_supply/max17042_battery.txt
new file mode 100644
index 000000000..5bc9b685c
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/max17042_battery.txt
@@ -0,0 +1,18 @@
+max17042_battery
+~~~~~~~~~~~~~~~~
+
+Required properties :
+ - compatible : "maxim,max17042"
+
+Optional properties :
+ - maxim,rsns-microohm : Resistance of rsns resistor in micro Ohms
+ (datasheet-recommended value is 10000).
+ Defining this property enables current-sense functionality.
+
+Example:
+
+ battery-charger@36 {
+ compatible = "maxim,max17042";
+ reg = <0x36>;
+ maxim,rsns-microohm = <10000>;
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/max8925_batter.txt b/Documentation/devicetree/bindings/power_supply/max8925_batter.txt
new file mode 100644
index 000000000..d7e3e0c0f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/max8925_batter.txt
@@ -0,0 +1,18 @@
+max8925-battery bindings
+~~~~~~~~~~~~~~~~
+
+Optional properties :
+ - batt-detect: whether support battery detect
+ - topoff-threshold: set charging current in topoff mode
+ - fast-charge: set charging current in fast mode
+ - no-temp-support: whether support temperature protection detect
+ - no-insert-detect: whether support insert detect
+
+Example:
+ charger {
+ batt-detect = <0>;
+ topoff-threshold = <1>;
+ fast-charge = <7>;
+ no-temp-support = <0>;
+ no-insert-detect = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/msm-poweroff.txt b/Documentation/devicetree/bindings/power_supply/msm-poweroff.txt
new file mode 100644
index 000000000..ce44ad357
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/msm-poweroff.txt
@@ -0,0 +1,17 @@
+MSM Restart Driver
+
+A power supply hold (ps-hold) bit is set to power the msm chipsets.
+Clearing that bit allows us to restart/poweroff. The difference
+between poweroff and restart is determined by unique power manager IC
+settings.
+
+Required Properties:
+-compatible: "qcom,pshold"
+-reg: Specifies the physical address of the ps-hold register
+
+Example:
+
+ restart@fc4ab000 {
+ compatible = "qcom,pshold";
+ reg = <0xfc4ab000 0x4>;
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/olpc_battery.txt b/Documentation/devicetree/bindings/power_supply/olpc_battery.txt
new file mode 100644
index 000000000..c8901b399
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/olpc_battery.txt
@@ -0,0 +1,5 @@
+OLPC battery
+~~~~~~~~~~~~
+
+Required properties:
+ - compatible : "olpc,xo1-battery"
diff --git a/Documentation/devicetree/bindings/power_supply/power_supply.txt b/Documentation/devicetree/bindings/power_supply/power_supply.txt
new file mode 100644
index 000000000..8391bfa0e
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/power_supply.txt
@@ -0,0 +1,23 @@
+Power Supply Core Support
+
+Optional Properties:
+ - power-supplies : This property is added to a supply in order to list the
+ devices which supply it power, referenced by their phandles.
+
+Example:
+
+ usb-charger: power@e {
+ compatible = "some,usb-charger";
+ ...
+ };
+
+ ac-charger: power@c {
+ compatible = "some,ac-charger";
+ ...
+ };
+
+ battery@b {
+ compatible = "some,battery";
+ ...
+ power-supplies = <&usb-charger>, <&ac-charger>;
+ };
diff --git a/Documentation/devicetree/bindings/power_supply/qnap-poweroff.txt b/Documentation/devicetree/bindings/power_supply/qnap-poweroff.txt
new file mode 100644
index 000000000..af25e77c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/qnap-poweroff.txt
@@ -0,0 +1,16 @@
+* QNAP Power Off
+
+QNAP NAS devices have a microcontroller controlling the main power
+supply. This microcontroller is connected to UART1 of the Kirkwood and
+Orion5x SoCs. Sending the character 'A', at 19200 baud, tells the
+microcontroller to turn the power off. This driver adds a handler to
+pm_power_off which is called to turn the power off.
+
+Synology NAS devices use a similar scheme, but a different baud rate,
+9600, and a different character, '1'.
+
+Required Properties:
+- compatible: Should be "qnap,power-off" or "synology,power-off"
+
+- reg: Address and length of the register set for UART1
+- clocks: tclk clock
diff --git a/Documentation/devicetree/bindings/power_supply/restart-poweroff.txt b/Documentation/devicetree/bindings/power_supply/restart-poweroff.txt
new file mode 100644
index 000000000..5776e684a
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/restart-poweroff.txt
@@ -0,0 +1,8 @@
+* Restart Power Off
+
+Buffalo Linkstation LS-XHL and LS-CHLv2, and other devices power off
+by restarting and letting u-boot keep hold of the machine until the
+user presses a button.
+
+Required Properties:
+- compatible: Should be "restart-poweroff"
diff --git a/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt b/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt
new file mode 100644
index 000000000..c40e8926f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/sbs_sbs-battery.txt
@@ -0,0 +1,23 @@
+SBS sbs-battery
+~~~~~~~~~~
+
+Required properties :
+ - compatible : "sbs,sbs-battery"
+
+Optional properties :
+ - sbs,i2c-retry-count : The number of times to retry i2c transactions on i2c
+ IO failure.
+ - sbs,poll-retry-count : The number of times to try looking for new status
+ after an external change notification.
+ - sbs,battery-detect-gpios : The gpio which signals battery detection and
+ a flag specifying its polarity.
+
+Example:
+
+ bq20z75@b {
+ compatible = "sbs,sbs-battery";
+ reg = < 0xb >;
+ sbs,i2c-retry-count = <2>;
+ sbs,poll-retry-count = <10>;
+ sbs,battery-detect-gpios = <&gpio-controller 122 1>;
+ }
diff --git a/Documentation/devicetree/bindings/power_supply/ti,bq24735.txt b/Documentation/devicetree/bindings/power_supply/ti,bq24735.txt
new file mode 100644
index 000000000..4f6a55018
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/ti,bq24735.txt
@@ -0,0 +1,32 @@
+TI BQ24735 Charge Controller
+~~~~~~~~~~
+
+Required properties :
+ - compatible : "ti,bq24735"
+
+Optional properties :
+ - interrupts : Specify the interrupt to be used to trigger when the AC
+ adapter is either plugged in or removed.
+ - ti,ac-detect-gpios : This GPIO is optionally used to read the AC adapter
+ presence. This is a Host GPIO that is configured as an input and
+ connected to the bq24735.
+ - ti,charge-current : Used to control and set the charging current. This value
+ must be between 128mA and 8.128A with a 64mA step resolution. The POR value
+ is 0x0000h. This number is in mA (e.g. 8192), see spec for more information
+ about the ChargeCurrent (0x14h) register.
+ - ti,charge-voltage : Used to control and set the charging voltage. This value
+ must be between 1.024V and 19.2V with a 16mV step resolution. The POR value
+ is 0x0000h. This number is in mV (e.g. 19200), see spec for more information
+ about the ChargeVoltage (0x15h) register.
+ - ti,input-current : Used to control and set the charger input current. This
+ value must be between 128mA and 8.064A with a 128mA step resolution. The
+ POR value is 0x1000h. This number is in mA (e.g. 8064), see the spec for
+ more information about the InputCurrent (0x3fh) register.
+
+Example:
+
+ bq24735@9 {
+ compatible = "ti,bq24735";
+ reg = <0x9>;
+ ti,ac-detect-gpios = <&gpio 72 0x1>;
+ }
diff --git a/Documentation/devicetree/bindings/power_supply/tps65090.txt b/Documentation/devicetree/bindings/power_supply/tps65090.txt
new file mode 100644
index 000000000..8e5e0d391
--- /dev/null
+++ b/Documentation/devicetree/bindings/power_supply/tps65090.txt
@@ -0,0 +1,17 @@
+TPS65090 Frontend PMU with Switchmode Charger
+
+Required Properties:
+-compatible: "ti,tps65090-charger"
+
+Optional Properties:
+-ti,enable-low-current-chrg: Enables charging when a low current is detected
+ while the default logic is to stop charging.
+
+This node is a subnode of the tps65090 PMIC.
+
+Example:
+
+ tps65090-charger {
+ compatible = "ti,tps65090-charger";
+ ti,enable-low-current-chrg;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/akebono.txt b/Documentation/devicetree/bindings/powerpc/4xx/akebono.txt
new file mode 100644
index 000000000..db939210e
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/akebono.txt
@@ -0,0 +1,54 @@
+
+IBM Akebono board device tree
+=============================
+
+The IBM Akebono board is a development board for the PPC476GTR SoC.
+
+0) The root node
+
+ Required properties:
+
+ - model : "ibm,akebono".
+ - compatible : "ibm,akebono" , "ibm,476gtr".
+
+1.a) The Secure Digital Host Controller Interface (SDHCI) node
+
+ Represent the Secure Digital Host Controller Interfaces.
+
+ Required properties:
+
+ - compatible : should be "ibm,476gtr-sdhci","generic-sdhci".
+ - reg : should contain the SDHCI registers location and length.
+ - interrupt-parent : a phandle for the interrupt controller.
+ - interrupts : should contain the SDHCI interrupt.
+
+1.b) The Advanced Host Controller Interface (AHCI) SATA node
+
+ Represents the advanced host controller SATA interface.
+
+ Required properties:
+
+ - compatible : should be "ibm,476gtr-ahci".
+ - reg : should contain the AHCI registers location and length.
+ - interrupt-parent : a phandle for the interrupt controller.
+ - interrupts : should contain the AHCI interrupt.
+
+1.c) The FPGA node
+
+ The Akebono board stores some board information such as the revision
+ number in an FPGA which is represented by this node.
+
+ Required properties:
+
+ - compatible : should be "ibm,akebono-fpga".
+ - reg : should contain the FPGA registers location and length.
+
+1.d) The AVR node
+
+ The Akebono board has an Atmel AVR microprocessor attached to the I2C
+ bus as a power controller for the board.
+
+ Required properties:
+
+ - compatible : should be "ibm,akebono-avr".
+ - reg : should contain the I2C bus address for the AVR.
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/cpm.txt b/Documentation/devicetree/bindings/powerpc/4xx/cpm.txt
new file mode 100644
index 000000000..ee459806d
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/cpm.txt
@@ -0,0 +1,52 @@
+PPC4xx Clock Power Management (CPM) node
+
+Required properties:
+ - compatible : compatible list, currently only "ibm,cpm"
+ - dcr-access-method : "native"
+ - dcr-reg : < DCR register range >
+
+Optional properties:
+ - er-offset : All 4xx SoCs with a CPM controller have
+ one of two different order for the CPM
+ registers. Some have the CPM registers
+ in the following order (ER,FR,SR). The
+ others have them in the following order
+ (SR,ER,FR). For the second case set
+ er-offset = <1>.
+ - unused-units : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set to turn off unused
+ devices.
+ - idle-doze : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set to turn off unused
+ devices. This is usually just CPM[CPU].
+ - standby : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set on standby and
+ restored on resume.
+ - suspend : specifier consist of one cell. For each
+ bit in the cell, the corresponding bit
+ in CPM will be set on suspend (mem) and
+ restored on resume. Note, for standby
+ and suspend the corresponding bits can
+ be different or the same. Usually for
+ standby only class 2 and 3 units are set.
+ However, the interface does not care.
+ If they are the same, the additional
+ power saving will be seeing if support
+ is available to put the DDR in self
+ refresh mode and any additional power
+ saving techniques for the specific SoC.
+
+Example:
+ CPM0: cpm {
+ compatible = "ibm,cpm";
+ dcr-access-method = "native";
+ dcr-reg = <0x160 0x003>;
+ er-offset = <0>;
+ unused-units = <0x00000100>;
+ idle-doze = <0x02000000>;
+ standby = <0xfeff0000>;
+ suspend = <0xfeff791d>;
+};
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/emac.txt b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
new file mode 100644
index 000000000..712baf6c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/emac.txt
@@ -0,0 +1,148 @@
+ 4xx/Axon EMAC ethernet nodes
+
+ The EMAC ethernet controller in IBM and AMCC 4xx chips, and also
+ the Axon bridge. To operate this needs to interact with a this
+ special McMAL DMA controller, and sometimes an RGMII or ZMII
+ interface. In addition to the nodes and properties described
+ below, the node for the OPB bus on which the EMAC sits must have a
+ correct clock-frequency property.
+
+ i) The EMAC node itself
+
+ Required properties:
+ - device_type : "network"
+
+ - compatible : compatible list, contains 2 entries, first is
+ "ibm,emac-CHIP" where CHIP is the host ASIC (440gx,
+ 405gp, Axon) and second is either "ibm,emac" or
+ "ibm,emac4". For Axon, thus, we have: "ibm,emac-axon",
+ "ibm,emac4"
+ - interrupts : <interrupt mapping for EMAC IRQ and WOL IRQ>
+ - interrupt-parent : optional, if needed for interrupt mapping
+ - reg : <registers mapping>
+ - local-mac-address : 6 bytes, MAC address
+ - mal-device : phandle of the associated McMAL node
+ - mal-tx-channel : 1 cell, index of the tx channel on McMAL associated
+ with this EMAC
+ - mal-rx-channel : 1 cell, index of the rx channel on McMAL associated
+ with this EMAC
+ - cell-index : 1 cell, hardware index of the EMAC cell on a given
+ ASIC (typically 0x0 and 0x1 for EMAC0 and EMAC1 on
+ each Axon chip)
+ - max-frame-size : 1 cell, maximum frame size supported in bytes
+ - rx-fifo-size : 1 cell, Rx fifo size in bytes for 10 and 100 Mb/sec
+ operations.
+ For Axon, 2048
+ - tx-fifo-size : 1 cell, Tx fifo size in bytes for 10 and 100 Mb/sec
+ operations.
+ For Axon, 2048.
+ - fifo-entry-size : 1 cell, size of a fifo entry (used to calculate
+ thresholds).
+ For Axon, 0x00000010
+ - mal-burst-size : 1 cell, MAL burst size (used to calculate thresholds)
+ in bytes.
+ For Axon, 0x00000100 (I think ...)
+ - phy-mode : string, mode of operations of the PHY interface.
+ Supported values are: "mii", "rmii", "smii", "rgmii",
+ "tbi", "gmii", rtbi", "sgmii".
+ For Axon on CAB, it is "rgmii"
+ - mdio-device : 1 cell, required iff using shared MDIO registers
+ (440EP). phandle of the EMAC to use to drive the
+ MDIO lines for the PHY used by this EMAC.
+ - zmii-device : 1 cell, required iff connected to a ZMII. phandle of
+ the ZMII device node
+ - zmii-channel : 1 cell, required iff connected to a ZMII. Which ZMII
+ channel or 0xffffffff if ZMII is only used for MDIO.
+ - rgmii-device : 1 cell, required iff connected to an RGMII. phandle
+ of the RGMII device node.
+ For Axon: phandle of plb5/plb4/opb/rgmii
+ - rgmii-channel : 1 cell, required iff connected to an RGMII. Which
+ RGMII channel is used by this EMAC.
+ Fox Axon: present, whatever value is appropriate for each
+ EMAC, that is the content of the current (bogus) "phy-port"
+ property.
+
+ Optional properties:
+ - phy-address : 1 cell, optional, MDIO address of the PHY. If absent,
+ a search is performed.
+ - phy-map : 1 cell, optional, bitmap of addresses to probe the PHY
+ for, used if phy-address is absent. bit 0x00000001 is
+ MDIO address 0.
+ For Axon it can be absent, though my current driver
+ doesn't handle phy-address yet so for now, keep
+ 0x00ffffff in it.
+ - rx-fifo-size-gige : 1 cell, Rx fifo size in bytes for 1000 Mb/sec
+ operations (if absent the value is the same as
+ rx-fifo-size). For Axon, either absent or 2048.
+ - tx-fifo-size-gige : 1 cell, Tx fifo size in bytes for 1000 Mb/sec
+ operations (if absent the value is the same as
+ tx-fifo-size). For Axon, either absent or 2048.
+ - tah-device : 1 cell, optional. If connected to a TAH engine for
+ offload, phandle of the TAH device node.
+ - tah-channel : 1 cell, optional. If appropriate, channel used on the
+ TAH engine.
+
+ Example:
+
+ EMAC0: ethernet@40000800 {
+ device_type = "network";
+ compatible = "ibm,emac-440gp", "ibm,emac";
+ interrupt-parent = <&UIC1>;
+ interrupts = <1c 4 1d 4>;
+ reg = <40000800 70>;
+ local-mac-address = [00 04 AC E3 1B 1E];
+ mal-device = <&MAL0>;
+ mal-tx-channel = <0 1>;
+ mal-rx-channel = <0>;
+ cell-index = <0>;
+ max-frame-size = <5dc>;
+ rx-fifo-size = <1000>;
+ tx-fifo-size = <800>;
+ phy-mode = "rmii";
+ phy-map = <00000001>;
+ zmii-device = <&ZMII0>;
+ zmii-channel = <0>;
+ };
+
+ ii) McMAL node
+
+ Required properties:
+ - device_type : "dma-controller"
+ - compatible : compatible list, containing 2 entries, first is
+ "ibm,mcmal-CHIP" where CHIP is the host ASIC (like
+ emac) and the second is either "ibm,mcmal" or
+ "ibm,mcmal2".
+ For Axon, "ibm,mcmal-axon","ibm,mcmal2"
+ - interrupts : <interrupt mapping for the MAL interrupts sources:
+ 5 sources: tx_eob, rx_eob, serr, txde, rxde>.
+ For Axon: This is _different_ from the current
+ firmware. We use the "delayed" interrupts for txeob
+ and rxeob. Thus we end up with mapping those 5 MPIC
+ interrupts, all level positive sensitive: 10, 11, 32,
+ 33, 34 (in decimal)
+ - dcr-reg : < DCR registers range >
+ - dcr-parent : if needed for dcr-reg
+ - num-tx-chans : 1 cell, number of Tx channels
+ - num-rx-chans : 1 cell, number of Rx channels
+
+ iii) ZMII node
+
+ Required properties:
+ - compatible : compatible list, containing 2 entries, first is
+ "ibm,zmii-CHIP" where CHIP is the host ASIC (like
+ EMAC) and the second is "ibm,zmii".
+ For Axon, there is no ZMII node.
+ - reg : <registers mapping>
+
+ iv) RGMII node
+
+ Required properties:
+ - compatible : compatible list, containing 2 entries, first is
+ "ibm,rgmii-CHIP" where CHIP is the host ASIC (like
+ EMAC) and the second is "ibm,rgmii".
+ For Axon, "ibm,rgmii-axon","ibm,rgmii"
+ - reg : <registers mapping>
+ - revision : as provided by the RGMII new version register if
+ available.
+ For Axon: 0x0000012a
+
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/hsta.txt b/Documentation/devicetree/bindings/powerpc/4xx/hsta.txt
new file mode 100644
index 000000000..c737c8338
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/hsta.txt
@@ -0,0 +1,19 @@
+
+ppc476gtr High Speed Serial Assist (HSTA) node
+==============================================
+
+The 476gtr SoC contains a high speed serial assist module attached
+between the plb4 and plb6 system buses to provide high speed data
+transfer between memory and system peripherals as well as support for
+PCI message signalled interrupts.
+
+Currently only the MSI support is used by Linux using the following
+device tree entries:
+
+Require properties:
+- compatible : "ibm,476gtr-hsta-msi", "ibm,hsta-msi"
+- reg : register mapping for the HSTA MSI space
+- interrupt-parent : parent controller for mapping interrupts
+- interrupts : ordered interrupt mapping for each MSI in the register
+ space. The first interrupt should be associated with a
+ register offset of 0x00, the second to 0x10, etc.
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/ndfc.txt b/Documentation/devicetree/bindings/powerpc/4xx/ndfc.txt
new file mode 100644
index 000000000..869f0b5f1
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/ndfc.txt
@@ -0,0 +1,39 @@
+AMCC NDFC (NanD Flash Controller)
+
+Required properties:
+- compatible : "ibm,ndfc".
+- reg : should specify chip select and size used for the chip (0x2000).
+
+Optional properties:
+- ccr : NDFC config and control register value (default 0).
+- bank-settings : NDFC bank configuration register value (default 0).
+
+Notes:
+- partition(s) - follows the OF MTD standard for partitions
+
+Example:
+
+ndfc@1,0 {
+ compatible = "ibm,ndfc";
+ reg = <0x00000001 0x00000000 0x00002000>;
+ ccr = <0x00001000>;
+ bank-settings = <0x80002222>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ nand {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ partition@0 {
+ label = "kernel";
+ reg = <0x00000000 0x00200000>;
+ };
+ partition@200000 {
+ label = "root";
+ reg = <0x00200000 0x03E00000>;
+ };
+ };
+};
+
+
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/ppc440spe-adma.txt b/Documentation/devicetree/bindings/powerpc/4xx/ppc440spe-adma.txt
new file mode 100644
index 000000000..515ebcf1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/ppc440spe-adma.txt
@@ -0,0 +1,93 @@
+PPC440SPe DMA/XOR (DMA Controller and XOR Accelerator)
+
+Device nodes needed for operation of the ppc440spe-adma driver
+are specified hereby. These are I2O/DMA, DMA and XOR nodes
+for DMA engines and Memory Queue Module node. The latter is used
+by ADMA driver for configuration of RAID-6 H/W capabilities of
+the PPC440SPe. In addition to the nodes and properties described
+below, the ranges property of PLB node must specify ranges for
+DMA devices.
+
+ i) The I2O node
+
+ Required properties:
+
+ - compatible : "ibm,i2o-440spe";
+ - reg : <registers mapping>
+ - dcr-reg : <DCR registers range>
+
+ Example:
+
+ I2O: i2o@400100000 {
+ compatible = "ibm,i2o-440spe";
+ reg = <0x00000004 0x00100000 0x100>;
+ dcr-reg = <0x060 0x020>;
+ };
+
+
+ ii) The DMA node
+
+ Required properties:
+
+ - compatible : "ibm,dma-440spe";
+ - cell-index : 1 cell, hardware index of the DMA engine
+ (typically 0x0 and 0x1 for DMA0 and DMA1)
+ - reg : <registers mapping>
+ - dcr-reg : <DCR registers range>
+ - interrupts : <interrupt mapping for DMA0/1 interrupts sources:
+ 2 sources: DMAx CS FIFO Needs Service IRQ (on UIC0)
+ and DMA Error IRQ (on UIC1). The latter is common
+ for both DMA engines>.
+ - interrupt-parent : needed for interrupt mapping
+
+ Example:
+
+ DMA0: dma0@400100100 {
+ compatible = "ibm,dma-440spe";
+ cell-index = <0>;
+ reg = <0x00000004 0x00100100 0x100>;
+ dcr-reg = <0x060 0x020>;
+ interrupt-parent = <&DMA0>;
+ interrupts = <0 1>;
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ interrupt-map = <
+ 0 &UIC0 0x14 4
+ 1 &UIC1 0x16 4>;
+ };
+
+
+ iii) XOR Accelerator node
+
+ Required properties:
+
+ - compatible : "amcc,xor-accelerator";
+ - reg : <registers mapping>
+ - interrupts : <interrupt mapping for XOR interrupt source>
+ - interrupt-parent : for interrupt mapping
+
+ Example:
+
+ xor-accel@400200000 {
+ compatible = "amcc,xor-accelerator";
+ reg = <0x00000004 0x00200000 0x400>;
+ interrupt-parent = <&UIC1>;
+ interrupts = <0x1f 4>;
+ };
+
+
+ iv) Memory Queue Module node
+
+ Required properties:
+
+ - compatible : "ibm,mq-440spe";
+ - dcr-reg : <DCR registers range>
+
+ Example:
+
+ MQ0: mq {
+ compatible = "ibm,mq-440spe";
+ dcr-reg = <0x040 0x020>;
+ };
+
diff --git a/Documentation/devicetree/bindings/powerpc/4xx/reboot.txt b/Documentation/devicetree/bindings/powerpc/4xx/reboot.txt
new file mode 100644
index 000000000..5bc635513
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/4xx/reboot.txt
@@ -0,0 +1,18 @@
+Reboot property to control system reboot on PPC4xx systems:
+
+By setting "reset_type" to one of the following values, the default
+software reset mechanism may be overridden. Here the possible values of
+"reset_type":
+
+ 1 - PPC4xx core reset
+ 2 - PPC4xx chip reset
+ 3 - PPC4xx system reset (default)
+
+Example:
+
+ cpu@0 {
+ device_type = "cpu";
+ model = "PowerPC,440SPe";
+ ...
+ reset-type = <2>; /* Use chip-reset */
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/board.txt b/Documentation/devicetree/bindings/powerpc/fsl/board.txt
new file mode 100644
index 000000000..cff38bdbc
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/board.txt
@@ -0,0 +1,102 @@
+Freescale Reference Board Bindings
+
+This document describes device tree bindings for various devices that
+exist on some Freescale reference boards.
+
+* Board Control and Status (BCSR)
+
+Required properties:
+
+ - compatible : Should be "fsl,<board>-bcsr"
+ - reg : Offset and length of the register set for the device
+
+Example:
+
+ bcsr@f8000000 {
+ compatible = "fsl,mpc8360mds-bcsr";
+ reg = <f8000000 8000>;
+ };
+
+* Freescale on-board FPGA
+
+This is the memory-mapped registers for on board FPGA.
+
+Required properities:
+- compatible: should be a board-specific string followed by a string
+ indicating the type of FPGA. Example:
+ "fsl,<board>-fpga", "fsl,fpga-pixis"
+- reg: should contain the address and the length of the FPGA register set.
+- interrupt-parent: should specify phandle for the interrupt controller.
+- interrupts: should specify event (wakeup) IRQ.
+
+Example (P1022DS):
+
+ board-control@3,0 {
+ compatible = "fsl,p1022ds-fpga", "fsl,fpga-ngpixis";
+ reg = <3 0 0x30>;
+ interrupt-parent = <&mpic>;
+ interrupts = <8 8 0 0>;
+ };
+
+* Freescale BCSR GPIO banks
+
+Some BCSR registers act as simple GPIO controllers, each such
+register can be represented by the gpio-controller node.
+
+Required properities:
+- compatible : Should be "fsl,<board>-bcsr-gpio".
+- reg : Should contain the address and the length of the GPIO bank
+ register.
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+- gpio-controller : Marks the port as GPIO controller.
+
+Example:
+
+ bcsr@1,0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc8360mds-bcsr";
+ reg = <1 0 0x8000>;
+ ranges = <0 1 0 0x8000>;
+
+ bcsr13: gpio-controller@d {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8360mds-bcsr-gpio";
+ reg = <0xd 1>;
+ gpio-controller;
+ };
+ };
+
+* Freescale on-board FPGA connected on I2C bus
+
+Some Freescale boards like BSC9132QDS have on board FPGA connected on
+the i2c bus.
+
+Required properties:
+- compatible: Should be a board-specific string followed by a string
+ indicating the type of FPGA. Example:
+ "fsl,<board>-fpga", "fsl,fpga-qixis-i2c"
+- reg: Should contain the address of the FPGA
+
+Example:
+ fpga: fpga@66 {
+ compatible = "fsl,bsc9132qds-fpga", "fsl,fpga-qixis-i2c";
+ reg = <0x66>;
+ };
+
+* Freescale on-board CPLD
+
+Some Freescale boards like T1040RDB have an on board CPLD connected.
+
+Required properties:
+- compatible: Should be a board-specific string like "fsl,<board>-cpld"
+ Example:
+ "fsl,t1040rdb-cpld", "fsl,t1042rdb-cpld", "fsl,t1042rdb_pi-cpld"
+- reg: should describe CPLD registers
+
+Example:
+ cpld@3,0 {
+ compatible = "fsl,t1040rdb-cpld";
+ reg = <3 0 0x300>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cache_sram.txt b/Documentation/devicetree/bindings/powerpc/fsl/cache_sram.txt
new file mode 100644
index 000000000..781955f52
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cache_sram.txt
@@ -0,0 +1,20 @@
+* Freescale PQ3 and QorIQ based Cache SRAM
+
+Freescale's mpc85xx and some QorIQ platforms provide an
+option of configuring a part of (or full) cache memory
+as SRAM. This cache SRAM representation in the device
+tree should be done as under:-
+
+Required properties:
+
+- compatible : should be "fsl,p2020-cache-sram"
+- fsl,cache-sram-ctlr-handle : points to the L2 controller
+- reg : offset and length of the cache-sram.
+
+Example:
+
+cache-sram@fff00000 {
+ fsl,cache-sram-ctlr-handle = <&L2>;
+ reg = <0 0xfff00000 0 0x10000>;
+ compatible = "fsl,p2020-cache-sram";
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ccf.txt b/Documentation/devicetree/bindings/powerpc/fsl/ccf.txt
new file mode 100644
index 000000000..454da7e08
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/ccf.txt
@@ -0,0 +1,46 @@
+Freescale CoreNet Coherency Fabric(CCF) Device Tree Binding
+
+DESCRIPTION
+
+The CoreNet coherency fabric is a fabric-oriented, connectivity infrastructure
+that enables the implementation of coherent, multicore systems.
+
+Required properties:
+
+- compatible: <string list>
+ fsl,corenet1-cf - CoreNet coherency fabric version 1.
+ Example chips: T4240, B4860
+
+ fsl,corenet2-cf - CoreNet coherency fabric version 2.
+ Example chips: P5040, P5020, P4080, P3041, P2041
+
+ fsl,corenet-cf - Used to represent the common registers
+ between CCF version 1 and CCF version 2. This compatible
+ is retained for compatibility reasons, as it was already
+ used for both CCF version 1 chips and CCF version 2
+ chips. It should be specified after either
+ "fsl,corenet1-cf" or "fsl,corenet2-cf".
+
+- reg: <prop-encoded-array>
+ A standard property. Represents the CCF registers.
+
+- interrupts: <prop-encoded-array>
+ Interrupt mapping for CCF error interrupt.
+
+- fsl,ccf-num-csdids: <u32>
+ Specifies the number of Coherency Subdomain ID Port Mapping
+ Registers that are supported by the CCF.
+
+- fsl,ccf-num-snoopids: <u32>
+ Specifies the number of Snoop ID Port Mapping Registers that
+ are supported by CCF.
+
+Example:
+
+ corenet-cf@18000 {
+ compatible = "fsl,corenet2-cf", "fsl,corenet-cf";
+ reg = <0x18000 0x1000>;
+ interrupts = <16 2 1 31>;
+ fsl,ccf-num-csdids = <32>;
+ fsl,ccf-num-snoopids = <32>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt
new file mode 100644
index 000000000..160c75248
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm.txt
@@ -0,0 +1,67 @@
+* Freescale Communications Processor Module
+
+NOTE: This is an interim binding, and will likely change slightly,
+as more devices are supported. The QE bindings especially are
+incomplete.
+
+* Root CPM node
+
+Properties:
+- compatible : "fsl,cpm1", "fsl,cpm2", or "fsl,qe".
+- reg : A 48-byte region beginning with CPCR.
+
+Example:
+ cpm@119c0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #interrupt-cells = <2>;
+ compatible = "fsl,mpc8272-cpm", "fsl,cpm2";
+ reg = <119c0 30>;
+ }
+
+* Properties common to multiple CPM/QE devices
+
+- fsl,cpm-command : This value is ORed with the opcode and command flag
+ to specify the device on which a CPM command operates.
+
+- fsl,cpm-brg : Indicates which baud rate generator the device
+ is associated with. If absent, an unused BRG
+ should be dynamically allocated. If zero, the
+ device uses an external clock rather than a BRG.
+
+- reg : Unless otherwise specified, the first resource represents the
+ scc/fcc/ucc registers, and the second represents the device's
+ parameter RAM region (if it has one).
+
+* Multi-User RAM (MURAM)
+
+The multi-user/dual-ported RAM is expressed as a bus under the CPM node.
+
+Ranges must be set up subject to the following restrictions:
+
+- Children's reg nodes must be offsets from the start of all muram, even
+ if the user-data area does not begin at zero.
+- If multiple range entries are used, the difference between the parent
+ address and the child address must be the same in all, so that a single
+ mapping can cover them all while maintaining the ability to determine
+ CPM-side offsets with pointer subtraction. It is recommended that
+ multiple range entries not be used.
+- A child address of zero must be translatable, even if no reg resources
+ contain it.
+
+A child "data" node must exist, compatible with "fsl,cpm-muram-data", to
+indicate the portion of muram that is usable by the OS for arbitrary
+purposes. The data node may have an arbitrary number of reg resources,
+all of which contribute to the allocatable muram pool.
+
+Example, based on mpc8272:
+ muram@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0 10000>;
+
+ data@0 {
+ compatible = "fsl,cpm-muram-data";
+ reg = <0 2000 9800 800>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt
new file mode 100644
index 000000000..4c7d45eaf
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/brg.txt
@@ -0,0 +1,21 @@
+* Baud Rate Generators
+
+Currently defined compatibles:
+fsl,cpm-brg
+fsl,cpm1-brg
+fsl,cpm2-brg
+
+Properties:
+- reg : There may be an arbitrary number of reg resources; BRG
+ numbers are assigned to these in order.
+- clock-frequency : Specifies the base frequency driving
+ the BRG.
+
+Example:
+ brg@119f0 {
+ compatible = "fsl,mpc8272-brg",
+ "fsl,cpm2-brg",
+ "fsl,cpm-brg";
+ reg = <119f0 10 115f0 10>;
+ clock-frequency = <d#25000000>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt
new file mode 100644
index 000000000..87bc60486
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/i2c.txt
@@ -0,0 +1,41 @@
+* I2C
+
+The I2C controller is expressed as a bus under the CPM node.
+
+Properties:
+- compatible : "fsl,cpm1-i2c", "fsl,cpm2-i2c"
+- reg : On CPM2 devices, the second resource doesn't specify the I2C
+ Parameter RAM itself, but the I2C_BASE field of the CPM2 Parameter RAM
+ (typically 0x8afc 0x2).
+- #address-cells : Should be one. The cell is the i2c device address with
+ the r/w bit set to zero.
+- #size-cells : Should be zero.
+- clock-frequency : Can be used to set the i2c clock frequency. If
+ unspecified, a default frequency of 60kHz is being used.
+The following two properties are deprecated. They are only used by legacy
+i2c drivers to find the bus to probe:
+- linux,i2c-index : Can be used to hard code an i2c bus number. By default,
+ the bus number is dynamically assigned by the i2c core.
+- linux,i2c-class : Can be used to override the i2c class. The class is used
+ by legacy i2c device drivers to find a bus in a specific context like
+ system management, video or sound. By default, I2C_CLASS_HWMON (1) is
+ being used. The definition of the classes can be found in
+ include/i2c/i2c.h
+
+Example, based on mpc823:
+
+ i2c@860 {
+ compatible = "fsl,mpc823-i2c",
+ "fsl,cpm1-i2c";
+ reg = <0x860 0x20 0x3c80 0x30>;
+ interrupts = <16>;
+ interrupt-parent = <&CPM_PIC>;
+ fsl,cpm-command = <0x10>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rtc@68 {
+ compatible = "dallas,ds1307";
+ reg = <0x68>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt
new file mode 100644
index 000000000..8e3ee1681
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/pic.txt
@@ -0,0 +1,18 @@
+* Interrupt Controllers
+
+Currently defined compatibles:
+- fsl,cpm1-pic
+ - only one interrupt cell
+- fsl,pq1-pic
+- fsl,cpm2-pic
+ - second interrupt cell is level/sense:
+ - 2 is falling edge
+ - 8 is active low
+
+Example:
+ interrupt-controller@10c00 {
+ #interrupt-cells = <2>;
+ interrupt-controller;
+ reg = <10c00 80>;
+ compatible = "mpc8272-pic", "fsl,cpm2-pic";
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt
new file mode 100644
index 000000000..74bfda4bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/cpm/usb.txt
@@ -0,0 +1,15 @@
+* USB (Universal Serial Bus Controller)
+
+Properties:
+- compatible : "fsl,cpm1-usb", "fsl,cpm2-usb", "fsl,qe-usb"
+
+Example:
+ usb@11bc0 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,cpm2-usb";
+ reg = <11b60 18 8b00 100>;
+ interrupts = <b 8>;
+ interrupt-parent = <&PIC>;
+ fsl,cpm-command = <2e600000>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt
new file mode 100644
index 000000000..349f79fd7
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/gpio.txt
@@ -0,0 +1,38 @@
+Every GPIO controller node must have #gpio-cells property defined,
+this information will be used to translate gpio-specifiers.
+
+On CPM1 devices, all ports are using slightly different register layouts.
+Ports A, C and D are 16bit ports and Ports B and E are 32bit ports.
+
+On CPM2 devices, all ports are 32bit ports and use a common register layout.
+
+Required properties:
+- compatible : "fsl,cpm1-pario-bank-a", "fsl,cpm1-pario-bank-b",
+ "fsl,cpm1-pario-bank-c", "fsl,cpm1-pario-bank-d",
+ "fsl,cpm1-pario-bank-e", "fsl,cpm2-pario-bank"
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+- gpio-controller : Marks the port as GPIO controller.
+
+Example of three SOC GPIO banks defined as gpio-controller nodes:
+
+ CPM1_PIO_A: gpio-controller@950 {
+ #gpio-cells = <2>;
+ compatible = "fsl,cpm1-pario-bank-a";
+ reg = <0x950 0x10>;
+ gpio-controller;
+ };
+
+ CPM1_PIO_B: gpio-controller@ab8 {
+ #gpio-cells = <2>;
+ compatible = "fsl,cpm1-pario-bank-b";
+ reg = <0xab8 0x10>;
+ gpio-controller;
+ };
+
+ CPM1_PIO_E: gpio-controller@ac8 {
+ #gpio-cells = <2>;
+ compatible = "fsl,cpm1-pario-bank-e";
+ reg = <0xac8 0x18>;
+ gpio-controller;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt
new file mode 100644
index 000000000..29b28b8f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/network.txt
@@ -0,0 +1,43 @@
+* Network
+
+Currently defined compatibles:
+- fsl,cpm1-scc-enet
+- fsl,cpm2-scc-enet
+- fsl,cpm1-fec-enet
+- fsl,cpm2-fcc-enet (third resource is GFEMR)
+- fsl,qe-enet
+
+Example:
+
+ ethernet@11300 {
+ compatible = "fsl,mpc8272-fcc-enet",
+ "fsl,cpm2-fcc-enet";
+ reg = <11300 20 8400 100 11390 1>;
+ local-mac-address = [ 00 00 00 00 00 00 ];
+ interrupts = <20 8>;
+ interrupt-parent = <&PIC>;
+ phy-handle = <&PHY0>;
+ fsl,cpm-command = <12000300>;
+ };
+
+* MDIO
+
+Currently defined compatibles:
+fsl,pq1-fec-mdio (reg is same as first resource of FEC device)
+fsl,cpm2-mdio-bitbang (reg is port C registers)
+
+Properties for fsl,cpm2-mdio-bitbang:
+fsl,mdio-pin : pin of port C controlling mdio data
+fsl,mdc-pin : pin of port C controlling mdio clock
+
+Example:
+ mdio@10d40 {
+ compatible = "fsl,mpc8272ads-mdio-bitbang",
+ "fsl,mpc8272-mdio-bitbang",
+ "fsl,cpm2-mdio-bitbang";
+ reg = <10d40 14>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ fsl,mdio-pin = <12>;
+ fsl,mdc-pin = <13>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt
new file mode 100644
index 000000000..4f8930263
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe.txt
@@ -0,0 +1,115 @@
+* Freescale QUICC Engine module (QE)
+This represents qe module that is installed on PowerQUICC II Pro.
+
+NOTE: This is an interim binding; it should be updated to fit
+in with the CPM binding later in this document.
+
+Basically, it is a bus of devices, that could act more or less
+as a complete entity (UCC, USB etc ). All of them should be siblings on
+the "root" qe node, using the common properties from there.
+The description below applies to the qe of MPC8360 and
+more nodes and properties would be extended in the future.
+
+i) Root QE device
+
+Required properties:
+- compatible : should be "fsl,qe";
+- model : precise model of the QE, Can be "QE", "CPM", or "CPM2"
+- reg : offset and length of the device registers.
+- bus-frequency : the clock frequency for QUICC Engine.
+- fsl,qe-num-riscs: define how many RISC engines the QE has.
+- fsl,qe-num-snums: define how many serial number(SNUM) the QE can use for the
+ threads.
+
+Optional properties:
+- fsl,firmware-phandle:
+ Usage: required only if there is no fsl,qe-firmware child node
+ Value type: <phandle>
+ Definition: Points to a firmware node (see "QE Firmware Node" below)
+ that contains the firmware that should be uploaded for this QE.
+ The compatible property for the firmware node should say,
+ "fsl,qe-firmware".
+
+Recommended properties
+- brg-frequency : the internal clock source frequency for baud-rate
+ generators in Hz.
+
+Example:
+ qe@e0100000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #interrupt-cells = <2>;
+ compatible = "fsl,qe";
+ ranges = <0 e0100000 00100000>;
+ reg = <e0100000 480>;
+ brg-frequency = <0>;
+ bus-frequency = <179A7B00>;
+ }
+
+* Multi-User RAM (MURAM)
+
+Required properties:
+- compatible : should be "fsl,qe-muram", "fsl,cpm-muram".
+- mode : the could be "host" or "slave".
+- ranges : Should be defined as specified in 1) to describe the
+ translation of MURAM addresses.
+- data-only : sub-node which defines the address area under MURAM
+ bus that can be allocated as data/parameter
+
+Example:
+
+ muram@10000 {
+ compatible = "fsl,qe-muram", "fsl,cpm-muram";
+ ranges = <0 00010000 0000c000>;
+
+ data-only@0{
+ compatible = "fsl,qe-muram-data",
+ "fsl,cpm-muram-data";
+ reg = <0 c000>;
+ };
+ };
+
+* QE Firmware Node
+
+This node defines a firmware binary that is embedded in the device tree, for
+the purpose of passing the firmware from bootloader to the kernel, or from
+the hypervisor to the guest.
+
+The firmware node itself contains the firmware binary contents, a compatible
+property, and any firmware-specific properties. The node should be placed
+inside a QE node that needs it. Doing so eliminates the need for a
+fsl,firmware-phandle property. Other QE nodes that need the same firmware
+should define an fsl,firmware-phandle property that points to the firmware node
+in the first QE node.
+
+The fsl,firmware property can be specified in the DTS (possibly using incbin)
+or can be inserted by the boot loader at boot time.
+
+Required properties:
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: A standard property. Specify a string that indicates what
+ kind of firmware it is. For QE, this should be "fsl,qe-firmware".
+
+ - fsl,firmware
+ Usage: required
+ Value type: <prop-encoded-array>, encoded as an array of bytes
+ Definition: A standard property. This property contains the firmware
+ binary "blob".
+
+Example:
+ qe1@e0080000 {
+ compatible = "fsl,qe";
+ qe_firmware:qe-firmware {
+ compatible = "fsl,qe-firmware";
+ fsl,firmware = [0x70 0xcd 0x00 0x00 0x01 0x46 0x45 ...];
+ };
+ ...
+ };
+
+ qe2@e0090000 {
+ compatible = "fsl,qe";
+ fsl,firmware-phandle = <&qe_firmware>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt
new file mode 100644
index 000000000..249db3a15
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/firmware.txt
@@ -0,0 +1,24 @@
+* Uploaded QE firmware
+
+ If a new firmware has been uploaded to the QE (usually by the
+ boot loader), then a 'firmware' child node should be added to the QE
+ node. This node provides information on the uploaded firmware that
+ device drivers may need.
+
+ Required properties:
+ - id: The string name of the firmware. This is taken from the 'id'
+ member of the qe_firmware structure of the uploaded firmware.
+ Device drivers can search this string to determine if the
+ firmware they want is already present.
+ - extended-modes: The Extended Modes bitfield, taken from the
+ firmware binary. It is a 64-bit number represented
+ as an array of two 32-bit numbers.
+ - virtual-traps: The virtual traps, taken from the firmware binary.
+ It is an array of 8 32-bit numbers.
+
+Example:
+ firmware {
+ id = "Soft-UART";
+ extended-modes = <0 0>;
+ virtual-traps = <0 0 0 0 0 0 0 0>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt
new file mode 100644
index 000000000..609842602
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/par_io.txt
@@ -0,0 +1,51 @@
+* Parallel I/O Ports
+
+This node configures Parallel I/O ports for CPUs with QE support.
+The node should reside in the "soc" node of the tree. For each
+device that using parallel I/O ports, a child node should be created.
+See the definition of the Pin configuration nodes below for more
+information.
+
+Required properties:
+- device_type : should be "par_io".
+- reg : offset to the register set and its length.
+- num-ports : number of Parallel I/O ports
+
+Example:
+par_io@1400 {
+ reg = <1400 100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ device_type = "par_io";
+ num-ports = <7>;
+ ucc_pin@01 {
+ ......
+ };
+
+Note that "par_io" nodes are obsolete, and should not be used for
+the new device trees. Instead, each Par I/O bank should be represented
+via its own gpio-controller node:
+
+Required properties:
+- #gpio-cells : should be "2".
+- compatible : should be "fsl,<chip>-qe-pario-bank",
+ "fsl,mpc8323-qe-pario-bank".
+- reg : offset to the register set and its length.
+- gpio-controller : node to identify gpio controllers.
+
+Example:
+ qe_pio_a: gpio-controller@1400 {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8360-qe-pario-bank",
+ "fsl,mpc8323-qe-pario-bank";
+ reg = <0x1400 0x18>;
+ gpio-controller;
+ };
+
+ qe_pio_e: gpio-controller@1460 {
+ #gpio-cells = <2>;
+ compatible = "fsl,mpc8360-qe-pario-bank",
+ "fsl,mpc8323-qe-pario-bank";
+ reg = <0x1460 0x18>;
+ gpio-controller;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt
new file mode 100644
index 000000000..ec6ee2e86
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/pincfg.txt
@@ -0,0 +1,57 @@
+* Pin configuration nodes
+
+Required properties:
+- pio-map : array of pin configurations. Each pin is defined by 6
+ integers. The six numbers are respectively: port, pin, dir,
+ open_drain, assignment, has_irq.
+ - port : port number of the pin; 0-6 represent port A-G in UM.
+ - pin : pin number in the port.
+ - dir : direction of the pin, should encode as follows:
+
+ 0 = The pin is disabled
+ 1 = The pin is an output
+ 2 = The pin is an input
+ 3 = The pin is I/O
+
+ - open_drain : indicates the pin is normal or wired-OR:
+
+ 0 = The pin is actively driven as an output
+ 1 = The pin is an open-drain driver. As an output, the pin is
+ driven active-low, otherwise it is three-stated.
+
+ - assignment : function number of the pin according to the Pin Assignment
+ tables in User Manual. Each pin can have up to 4 possible functions in
+ QE and two options for CPM.
+ - has_irq : indicates if the pin is used as source of external
+ interrupts.
+
+Example:
+ ucc_pin@01 {
+ pio-map = <
+ /* port pin dir open_drain assignment has_irq */
+ 0 3 1 0 1 0 /* TxD0 */
+ 0 4 1 0 1 0 /* TxD1 */
+ 0 5 1 0 1 0 /* TxD2 */
+ 0 6 1 0 1 0 /* TxD3 */
+ 1 6 1 0 3 0 /* TxD4 */
+ 1 7 1 0 1 0 /* TxD5 */
+ 1 9 1 0 2 0 /* TxD6 */
+ 1 a 1 0 2 0 /* TxD7 */
+ 0 9 2 0 1 0 /* RxD0 */
+ 0 a 2 0 1 0 /* RxD1 */
+ 0 b 2 0 1 0 /* RxD2 */
+ 0 c 2 0 1 0 /* RxD3 */
+ 0 d 2 0 1 0 /* RxD4 */
+ 1 1 2 0 2 0 /* RxD5 */
+ 1 0 2 0 2 0 /* RxD6 */
+ 1 4 2 0 2 0 /* RxD7 */
+ 0 7 1 0 1 0 /* TX_EN */
+ 0 8 1 0 1 0 /* TX_ER */
+ 0 f 2 0 1 0 /* RX_DV */
+ 0 10 2 0 1 0 /* RX_ER */
+ 0 0 2 0 1 0 /* RX_CLK */
+ 2 9 1 0 3 0 /* GTX_CLK - CLK10 */
+ 2 8 2 0 1 0>; /* GTX125 - CLK9 */
+ };
+
+
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt
new file mode 100644
index 000000000..e47734bee
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/ucc.txt
@@ -0,0 +1,70 @@
+* UCC (Unified Communications Controllers)
+
+Required properties:
+- device_type : should be "network", "hldc", "uart", "transparent"
+ "bisync", "atm", or "serial".
+- compatible : could be "ucc_geth" or "fsl_atm" and so on.
+- cell-index : the ucc number(1-8), corresponding to UCCx in UM.
+- reg : Offset and length of the register set for the device
+- interrupts : <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and level
+ information for the interrupt. This should be encoded based on
+ the information in section 2) depending on the type of interrupt
+ controller you have.
+- interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+- pio-handle : The phandle for the Parallel I/O port configuration.
+- port-number : for UART drivers, the port number to use, between 0 and 3.
+ This usually corresponds to the /dev/ttyQE device, e.g. <0> = /dev/ttyQE0.
+ The port number is added to the minor number of the device. Unlike the
+ CPM UART driver, the port-number is required for the QE UART driver.
+- soft-uart : for UART drivers, if specified this means the QE UART device
+ driver should use "Soft-UART" mode, which is needed on some SOCs that have
+ broken UART hardware. Soft-UART is provided via a microcode upload.
+- rx-clock-name: the UCC receive clock source
+ "none": clock source is disabled
+ "brg1" through "brg16": clock source is BRG1-BRG16, respectively
+ "clk1" through "clk24": clock source is CLK1-CLK24, respectively
+- tx-clock-name: the UCC transmit clock source
+ "none": clock source is disabled
+ "brg1" through "brg16": clock source is BRG1-BRG16, respectively
+ "clk1" through "clk24": clock source is CLK1-CLK24, respectively
+The following two properties are deprecated. rx-clock has been replaced
+with rx-clock-name, and tx-clock has been replaced with tx-clock-name.
+Drivers that currently use the deprecated properties should continue to
+do so, in order to support older device trees, but they should be updated
+to check for the new properties first.
+- rx-clock : represents the UCC receive clock source.
+ 0x00 : clock source is disabled;
+ 0x1~0x10 : clock source is BRG1~BRG16 respectively;
+ 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively.
+- tx-clock: represents the UCC transmit clock source;
+ 0x00 : clock source is disabled;
+ 0x1~0x10 : clock source is BRG1~BRG16 respectively;
+ 0x11~0x28: clock source is QE_CLK1~QE_CLK24 respectively.
+
+Required properties for network device_type:
+- mac-address : list of bytes representing the ethernet address.
+- phy-handle : The phandle for the PHY connected to this controller.
+
+Recommended properties:
+- phy-connection-type : a string naming the controller/PHY interface type,
+ i.e., "mii" (default), "rmii", "gmii", "rgmii", "rgmii-id" (Internal
+ Delay), "rgmii-txid" (delay on TX only), "rgmii-rxid" (delay on RX only),
+ "tbi", or "rtbi".
+
+Example:
+ ucc@2000 {
+ device_type = "network";
+ compatible = "ucc_geth";
+ cell-index = <1>;
+ reg = <2000 200>;
+ interrupts = <a0 0>;
+ interrupt-parent = <700>;
+ mac-address = [ 00 04 9f 00 23 23 ];
+ rx-clock = "none";
+ tx-clock = "clk9";
+ phy-handle = <212000>;
+ phy-connection-type = "gmii";
+ pio-handle = <140001>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt
new file mode 100644
index 000000000..9ccd5f304
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/qe/usb.txt
@@ -0,0 +1,37 @@
+Freescale QUICC Engine USB Controller
+
+Required properties:
+- compatible : should be "fsl,<chip>-qe-usb", "fsl,mpc8323-qe-usb".
+- reg : the first two cells should contain usb registers location and
+ length, the next two two cells should contain PRAM location and
+ length.
+- interrupts : should contain USB interrupt.
+- interrupt-parent : interrupt source phandle.
+- fsl,fullspeed-clock : specifies the full speed USB clock source:
+ "none": clock source is disabled
+ "brg1" through "brg16": clock source is BRG1-BRG16, respectively
+ "clk1" through "clk24": clock source is CLK1-CLK24, respectively
+- fsl,lowspeed-clock : specifies the low speed USB clock source:
+ "none": clock source is disabled
+ "brg1" through "brg16": clock source is BRG1-BRG16, respectively
+ "clk1" through "clk24": clock source is CLK1-CLK24, respectively
+- hub-power-budget : USB power budget for the root hub, in mA.
+- gpios : should specify GPIOs in this order: USBOE, USBTP, USBTN, USBRP,
+ USBRN, SPEED (optional), and POWER (optional).
+
+Example:
+
+usb@6c0 {
+ compatible = "fsl,mpc8360-qe-usb", "fsl,mpc8323-qe-usb";
+ reg = <0x6c0 0x40 0x8b00 0x100>;
+ interrupts = <11>;
+ interrupt-parent = <&qeic>;
+ fsl,fullspeed-clock = "clk21";
+ gpios = <&qe_pio_b 2 0 /* USBOE */
+ &qe_pio_b 3 0 /* USBTP */
+ &qe_pio_b 8 0 /* USBTN */
+ &qe_pio_b 9 0 /* USBRP */
+ &qe_pio_b 11 0 /* USBRN */
+ &qe_pio_e 20 0 /* SPEED */
+ &qe_pio_e 21 0 /* POWER */>;
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt
new file mode 100644
index 000000000..2ea76d9d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpm_qe/serial.txt
@@ -0,0 +1,32 @@
+* Serial
+
+Currently defined compatibles:
+- fsl,cpm1-smc-uart
+- fsl,cpm2-smc-uart
+- fsl,cpm1-scc-uart
+- fsl,cpm2-scc-uart
+- fsl,qe-uart
+
+Modem control lines connected to GPIO controllers are listed in the gpios
+property as described in booting-without-of.txt, section IX.1 in the following
+order:
+
+CTS, RTS, DCD, DSR, DTR, and RI.
+
+The gpios property is optional and can be left out when control lines are
+not used.
+
+Example:
+
+ serial@11a00 {
+ device_type = "serial";
+ compatible = "fsl,mpc8272-scc-uart",
+ "fsl,cpm2-scc-uart";
+ reg = <11a00 20 8000 100>;
+ interrupts = <28 8>;
+ interrupt-parent = <&PIC>;
+ fsl,cpm-brg = <1>;
+ fsl,cpm-command = <00800000>;
+ gpios = <&gpio_c 15 0
+ &gpio_d 29 0>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt b/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt
new file mode 100644
index 000000000..f8cd2397a
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/cpus.txt
@@ -0,0 +1,33 @@
+===================================================================
+Power Architecture CPU Binding
+Copyright 2013 Freescale Semiconductor Inc.
+
+Power Architecture CPUs in Freescale SOCs are represented in device trees as
+per the definition in ePAPR.
+
+In addition to the ePAPR definitions, the properties defined below may be
+present on CPU nodes.
+
+PROPERTIES
+
+ - fsl,eref-*
+ Usage: optional
+ Value type: <empty>
+ Definition: The EREF (EREF: A Programmer.s Reference Manual for
+ Freescale Power Architecture) defines the architecture for Freescale
+ Power CPUs. The EREF defines some architecture categories not defined
+ by the Power ISA. For these EREF-specific categories, the existence of
+ a property named fsl,eref-[CAT], where [CAT] is the abbreviated category
+ name with all uppercase letters converted to lowercase, indicates that
+ the category is supported by the implementation.
+
+ - fsl,portid-mapping
+ Usage: optional
+ Value type: <u32>
+ Definition: The Coherency Subdomain ID Port Mapping Registers and
+ Snoop ID Port Mapping registers, which are part of the CoreNet
+ Coherency fabric (CCF), provide a CoreNet Coherency Subdomain
+ ID/CoreNet Snoop ID to cpu mapping functions. Certain bits from
+ these registers should be set if the coresponding CPU should be
+ snooped. This property defines a bitmask which selects the bit
+ that should be set if this cpu should be snooped.
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dcsr.txt b/Documentation/devicetree/bindings/powerpc/fsl/dcsr.txt
new file mode 100644
index 000000000..18a88100a
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/dcsr.txt
@@ -0,0 +1,395 @@
+===================================================================
+Debug Control and Status Register (DCSR) Binding
+Copyright 2011 Freescale Semiconductor Inc.
+
+NOTE: The bindings described in this document are preliminary and subject
+to change. Some of the compatible strings that contain only generic names
+may turn out to be inappropriate, or need additional properties to describe
+the integration of the block with the rest of the chip.
+
+=====================================================================
+Debug Control and Status Register Memory Map
+
+Description
+
+This node defines the base address and range for the
+defined DCSR Memory Map. Child nodes will describe the individual
+debug blocks defined within this memory space.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,dcsr" and "simple-bus".
+ The DCSR space exists in the memory-mapped bus.
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ or representing physical addresses in child nodes.
+
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Defines the number of cells
+ or representing the size of physical addresses in
+ child nodes.
+
+ - ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ range of the DCSR space.
+
+EXAMPLE
+ dcsr: dcsr@f00000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,dcsr", "simple-bus";
+ ranges = <0x00000000 0xf 0x00000000 0x01008000>;
+ };
+
+=====================================================================
+Event Processing Unit
+
+This node represents the region of DCSR space allocated to the EPU
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,dcsr-epu"
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by the EPU.
+ The value of the interrupts property consists of three
+ interrupt specifiers. The format of the specifier is defined
+ by the binding document describing the node's interrupt parent.
+
+ The EPU counters can be configured to assert the performance
+ monitor interrupt signal based on either counter overflow or value
+ match. Which counter asserted the interrupt is captured in an EPU
+ Counter Interrupt Status Register (EPCPUISR).
+
+ The EPU unit can also be configured to assert either or both of
+ two interrupt signals based on debug event sources within the SoC.
+ The interrupt signals are epu_xt_int0 and epu_xt_int1.
+ Which event source asserted the interrupt is captured in an EPU
+ Interrupt Status Register (EPISR0,EPISR1).
+
+ Interrupt numbers are listed in order (perfmon, event0, event1).
+
+ - interrupt-parent
+ Usage: required
+ Value type: <phandle>
+ Definition: A single <phandle> value that points
+ to the interrupt parent to which the child domain
+ is being mapped. Value must be "&mpic"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-epu@0 {
+ compatible = "fsl,dcsr-epu";
+ interrupts = <52 2 0 0
+ 84 2 0 0
+ 85 2 0 0>;
+ interrupt-parent = <&mpic>;
+ reg = <0x0 0x1000>;
+ };
+
+=======================================================================
+Nexus Port Controller
+
+This node represents the region of DCSR space allocated to the NPC
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,dcsr-npc"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+ The Nexus Port controller occupies two regions in the DCSR space
+ with distinct functionality.
+
+ The first register range describes the Nexus Port Controller
+ control and status registers.
+
+ The second register range describes the Nexus Port Controller
+ internal trace buffer. The NPC trace buffer is a small memory buffer
+ which stages the nexus trace data for transmission via the Aurora port
+ or to a DDR based trace buffer. In some configurations the NPC trace
+ buffer can be the only trace buffer used.
+
+
+EXAMPLE
+ dcsr-npc {
+ compatible = "fsl,dcsr-npc";
+ reg = <0x1000 0x1000 0x1000000 0x8000>;
+ };
+
+=======================================================================
+Nexus Concentrator
+
+This node represents the region of DCSR space allocated to the NXC
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,dcsr-nxc"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-nxc@2000 {
+ compatible = "fsl,dcsr-nxc";
+ reg = <0x2000 0x1000>;
+ };
+=======================================================================
+CoreNet Debug Controller
+
+This node represents the region of DCSR space allocated to
+the CoreNet Debug controller.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,dcsr-corenet"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+ The CoreNet Debug controller occupies two regions in the DCSR space
+ with distinct functionality.
+
+ The first register range describes the CoreNet Debug Controller
+ functionalty to perform transaction and transaction attribute matches.
+
+ The second register range describes the CoreNet Debug Controller
+ functionalty to trigger event notifications and debug traces.
+
+EXAMPLE
+ dcsr-corenet {
+ compatible = "fsl,dcsr-corenet";
+ reg = <0x8000 0x1000 0xB0000 0x1000>;
+ };
+
+=======================================================================
+Data Path Debug controller
+
+This node represents the region of DCSR space allocated to
+the DPAA Debug Controller. This controller controls debug configuration
+for the QMAN and FMAN blocks.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include both an identifier specific to the SoC
+ or Debug IP of the form "fsl,<soc>-dcsr-dpaa" in addition to the
+ generic compatible string "fsl,dcsr-dpaa".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-dpaa@9000 {
+ compatible = "fsl,p4080-dcsr-dpaa", "fsl,dcsr-dpaa";
+ reg = <0x9000 0x1000>;
+ };
+
+=======================================================================
+OCeaN Debug controller
+
+This node represents the region of DCSR space allocated to
+the OCN Debug Controller.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include both an identifier specific to the SoC
+ or Debug IP of the form "fsl,<soc>-dcsr-ocn" in addition to the
+ generic compatible string "fsl,dcsr-ocn".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-ocn@11000 {
+ compatible = "fsl,p4080-dcsr-ocn", "fsl,dcsr-ocn";
+ reg = <0x11000 0x1000>;
+ };
+
+=======================================================================
+DDR Controller Debug controller
+
+This node represents the region of DCSR space allocated to
+the OCN Debug Controller.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,dcsr-ddr"
+
+ - dev-handle
+ Usage: required
+ Definition: A phandle to associate this debug node with its
+ component controller.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-ddr@12000 {
+ compatible = "fsl,dcsr-ddr";
+ dev-handle = <&ddr1>;
+ reg = <0x12000 0x1000>;
+ };
+
+=======================================================================
+Nexus Aurora Link Controller
+
+This node represents the region of DCSR space allocated to
+the NAL Controller.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include both an identifier specific to the SoC
+ or Debug IP of the form "fsl,<soc>-dcsr-nal" in addition to the
+ generic compatible string "fsl,dcsr-nal".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-nal@18000 {
+ compatible = "fsl,p4080-dcsr-nal", "fsl,dcsr-nal";
+ reg = <0x18000 0x1000>;
+ };
+
+
+=======================================================================
+Run Control and Power Management
+
+This node represents the region of DCSR space allocated to
+the RCPM Debug Controller. This functionlity is limited to the
+control the debug operations of the SoC and cores.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include both an identifier specific to the SoC
+ or Debug IP of the form "fsl,<soc>-dcsr-rcpm" in addition to the
+ generic compatible string "fsl,dcsr-rcpm".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-rcpm@22000 {
+ compatible = "fsl,p4080-dcsr-rcpm", "fsl,dcsr-rcpm";
+ reg = <0x22000 0x1000>;
+ };
+
+=======================================================================
+Core Service Bridge Proxy
+
+This node represents the region of DCSR space allocated to
+the Core Service Bridge Proxies.
+There is one Core Service Bridge Proxy device for each CPU in the system.
+This functionlity provides access to the debug operations of the CPU.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include both an identifier specific to the cpu
+ of the form "fsl,dcsr-<cpu>-sb-proxy" in addition to the
+ generic compatible string "fsl,dcsr-cpu-sb-proxy".
+
+ - cpu-handle
+ Usage: required
+ Definition: A phandle to associate this debug node with its cpu.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address
+ offset and length of the DCSR space registers of the device
+ configuration block.
+
+EXAMPLE
+ dcsr-cpu-sb-proxy@40000 {
+ compatible = "fsl,dcsr-e500mc-sb-proxy",
+ "fsl,dcsr-cpu-sb-proxy";
+ cpu-handle = <&cpu0>;
+ reg = <0x40000 0x1000>;
+ };
+ dcsr-cpu-sb-proxy@41000 {
+ compatible = "fsl,dcsr-e500mc-sb-proxy",
+ "fsl,dcsr-cpu-sb-proxy";
+ cpu-handle = <&cpu1>;
+ reg = <0x41000 0x1000>;
+ };
+
+=======================================================================
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/diu.txt b/Documentation/devicetree/bindings/powerpc/fsl/diu.txt
new file mode 100644
index 000000000..b66cb6d31
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/diu.txt
@@ -0,0 +1,34 @@
+* Freescale Display Interface Unit
+
+The Freescale DIU is a LCD controller, with proper hardware, it can also
+drive DVI monitors.
+
+Required properties:
+- compatible : should be "fsl,diu" or "fsl,mpc5121-diu".
+- reg : should contain at least address and length of the DIU register
+ set.
+- interrupts : one DIU interrupt should be described here.
+- interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Optional properties:
+- edid : verbatim EDID data block describing attached display.
+ Data from the detailed timing descriptor will be used to
+ program the display controller.
+
+Example (MPC8610HPCD):
+ display@2c000 {
+ compatible = "fsl,diu";
+ reg = <0x2c000 100>;
+ interrupts = <72 2>;
+ interrupt-parent = <&mpic>;
+ };
+
+Example for MPC5121:
+ display@2100 {
+ compatible = "fsl,mpc5121-diu";
+ reg = <0x2100 0x100>;
+ interrupts = <64 0x8>;
+ interrupt-parent = <&ipic>;
+ edid = [edid-data];
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/dma.txt b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
new file mode 100644
index 000000000..7fc1b010f
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/dma.txt
@@ -0,0 +1,208 @@
+* Freescale DMA Controllers
+
+** Freescale Elo DMA Controller
+ This is a little-endian 4-channel DMA controller, used in Freescale mpc83xx
+ series chips such as mpc8315, mpc8349, mpc8379 etc.
+
+Required properties:
+
+- compatible : must include "fsl,elo-dma"
+- reg : DMA General Status Register, i.e. DGSR which contains
+ status for all the 4 DMA channels
+- ranges : describes the mapping between the address space of the
+ DMA channels and the address space of the DMA controller
+- cell-index : controller index. 0 for controller @ 0x8100
+- interrupts : interrupt specifier for DMA IRQ
+- interrupt-parent : optional, if needed for interrupt mapping
+
+- DMA channel nodes:
+ - compatible : must include "fsl,elo-dma-channel"
+ However, see note below.
+ - reg : DMA channel specific registers
+ - cell-index : DMA channel index starts at 0.
+
+Optional properties:
+ - interrupts : interrupt specifier for DMA channel IRQ
+ (on 83xx this is expected to be identical to
+ the interrupts property of the parent node)
+ - interrupt-parent : optional, if needed for interrupt mapping
+
+Example:
+ dma@82a8 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc8349-dma", "fsl,elo-dma";
+ reg = <0x82a8 4>;
+ ranges = <0 0x8100 0x1a4>;
+ interrupt-parent = <&ipic>;
+ interrupts = <71 8>;
+ cell-index = <0>;
+ dma-channel@0 {
+ compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+ cell-index = <0>;
+ reg = <0 0x80>;
+ interrupt-parent = <&ipic>;
+ interrupts = <71 8>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+ cell-index = <1>;
+ reg = <0x80 0x80>;
+ interrupt-parent = <&ipic>;
+ interrupts = <71 8>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+ cell-index = <2>;
+ reg = <0x100 0x80>;
+ interrupt-parent = <&ipic>;
+ interrupts = <71 8>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,mpc8349-dma-channel", "fsl,elo-dma-channel";
+ cell-index = <3>;
+ reg = <0x180 0x80>;
+ interrupt-parent = <&ipic>;
+ interrupts = <71 8>;
+ };
+ };
+
+** Freescale EloPlus DMA Controller
+ This is a 4-channel DMA controller with extended addresses and chaining,
+ mainly used in Freescale mpc85xx/86xx, Pxxx and BSC series chips, such as
+ mpc8540, mpc8641 p4080, bsc9131 etc.
+
+Required properties:
+
+- compatible : must include "fsl,eloplus-dma"
+- reg : DMA General Status Register, i.e. DGSR which contains
+ status for all the 4 DMA channels
+- cell-index : controller index. 0 for controller @ 0x21000,
+ 1 for controller @ 0xc000
+- ranges : describes the mapping between the address space of the
+ DMA channels and the address space of the DMA controller
+
+- DMA channel nodes:
+ - compatible : must include "fsl,eloplus-dma-channel"
+ However, see note below.
+ - cell-index : DMA channel index starts at 0.
+ - reg : DMA channel specific registers
+ - interrupts : interrupt specifier for DMA channel IRQ
+ - interrupt-parent : optional, if needed for interrupt mapping
+
+Example:
+ dma@21300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc8540-dma", "fsl,eloplus-dma";
+ reg = <0x21300 4>;
+ ranges = <0 0x21100 0x200>;
+ cell-index = <0>;
+ dma-channel@0 {
+ compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
+ reg = <0 0x80>;
+ cell-index = <0>;
+ interrupt-parent = <&mpic>;
+ interrupts = <20 2>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ cell-index = <1>;
+ interrupt-parent = <&mpic>;
+ interrupts = <21 2>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ cell-index = <2>;
+ interrupt-parent = <&mpic>;
+ interrupts = <22 2>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,mpc8540-dma-channel", "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ cell-index = <3>;
+ interrupt-parent = <&mpic>;
+ interrupts = <23 2>;
+ };
+ };
+
+** Freescale Elo3 DMA Controller
+ DMA controller which has same function as EloPlus except that Elo3 has 8
+ channels while EloPlus has only 4, it is used in Freescale Txxx and Bxxx
+ series chips, such as t1040, t4240, b4860.
+
+Required properties:
+
+- compatible : must include "fsl,elo3-dma"
+- reg : contains two entries for DMA General Status Registers,
+ i.e. DGSR0 which includes status for channel 1~4, and
+ DGSR1 for channel 5~8
+- ranges : describes the mapping between the address space of the
+ DMA channels and the address space of the DMA controller
+
+- DMA channel nodes:
+ - compatible : must include "fsl,eloplus-dma-channel"
+ - reg : DMA channel specific registers
+ - interrupts : interrupt specifier for DMA channel IRQ
+ - interrupt-parent : optional, if needed for interrupt mapping
+
+Example:
+dma@100300 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,elo3-dma";
+ reg = <0x100300 0x4>,
+ <0x100600 0x4>;
+ ranges = <0x0 0x100100 0x500>;
+ dma-channel@0 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x0 0x80>;
+ interrupts = <28 2 0 0>;
+ };
+ dma-channel@80 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x80 0x80>;
+ interrupts = <29 2 0 0>;
+ };
+ dma-channel@100 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x100 0x80>;
+ interrupts = <30 2 0 0>;
+ };
+ dma-channel@180 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x180 0x80>;
+ interrupts = <31 2 0 0>;
+ };
+ dma-channel@300 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x300 0x80>;
+ interrupts = <76 2 0 0>;
+ };
+ dma-channel@380 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x380 0x80>;
+ interrupts = <77 2 0 0>;
+ };
+ dma-channel@400 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x400 0x80>;
+ interrupts = <78 2 0 0>;
+ };
+ dma-channel@480 {
+ compatible = "fsl,eloplus-dma-channel";
+ reg = <0x480 0x80>;
+ interrupts = <79 2 0 0>;
+ };
+};
+
+Note on DMA channel compatible properties: The compatible property must say
+"fsl,elo-dma-channel" or "fsl,eloplus-dma-channel" to be used by the Elo DMA
+driver (fsldma). Any DMA channel used by fsldma cannot be used by another
+DMA driver, such as the SSI sound drivers for the MPC8610. Therefore, any DMA
+channel that should be used for another driver should not use
+"fsl,elo-dma-channel" or "fsl,eloplus-dma-channel". For the SSI drivers, for
+example, the compatible property should be "fsl,ssi-dma-channel". See ssi.txt
+for more information.
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/ecm.txt b/Documentation/devicetree/bindings/powerpc/fsl/ecm.txt
new file mode 100644
index 000000000..f514f29c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/ecm.txt
@@ -0,0 +1,64 @@
+=====================================================================
+E500 LAW & Coherency Module Device Tree Binding
+Copyright (C) 2009 Freescale Semiconductor Inc.
+=====================================================================
+
+Local Access Window (LAW) Node
+
+The LAW node represents the region of CCSR space where local access
+windows are configured. For ECM based devices this is the first 4k
+of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some
+number of local access windows as specified by fsl,num-laws.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,ecm-law"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. The value specifies the
+ physical address offset and length of the CCSR space
+ registers.
+
+ - fsl,num-laws
+ Usage: required
+ Value type: <u32>
+ Definition: The value specifies the number of local access
+ windows for this device.
+
+=====================================================================
+
+E500 Coherency Module Node
+
+The E500 LAW node represents the region of CCSR space where ECM config
+and error reporting registers exist, this is the second 4k (0x1000)
+of CCSR space.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,CHIP-ecm", "fsl,ecm" where
+ CHIP is the processor (mpc8572, mpc8544, etc.)
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. The value specifies the
+ physical address offset and length of the CCSR space
+ registers.
+
+ - interrupts
+ Usage: required
+ Value type: <prop-encoded-array>
+
+ - interrupt-parent
+ Usage: required
+ Value type: <phandle>
+
+=====================================================================
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/fman.txt b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
new file mode 100644
index 000000000..edda55f74
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/fman.txt
@@ -0,0 +1,604 @@
+=============================================================================
+Freescale Frame Manager Device Bindings
+
+CONTENTS
+ - FMan Node
+ - FMan Port Node
+ - FMan MURAM Node
+ - FMan dTSEC/XGEC/mEMAC Node
+ - FMan IEEE 1588 Node
+ - FMan MDIO Node
+ - Example
+
+=============================================================================
+FMan Node
+
+DESCRIPTION
+
+Due to the fact that the FMan is an aggregation of sub-engines (ports, MACs,
+etc.) the FMan node will have child nodes for each of them.
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: Must include "fsl,fman"
+ FMan version can be determined via FM_IP_REV_1 register in the
+ FMan block. The offset is 0xc4 from the beginning of the
+ Frame Processing Manager memory map (0xc3000 from the
+ beginning of the FMan node).
+
+- cell-index
+ Usage: required
+ Value type: <u32>
+ Definition: Specifies the index of the FMan unit.
+
+ The cell-index value may be used by the SoC, to identify the
+ FMan unit in the SoC memory map. In the table bellow,
+ there's a description of the cell-index use in each SoC:
+
+ - P1023:
+ register[bit] FMan unit cell-index
+ ============================================================
+ DEVDISR[1] 1 0
+
+ - P2041, P3041, P4080 P5020, P5040:
+ register[bit] FMan unit cell-index
+ ============================================================
+ DCFG_DEVDISR2[6] 1 0
+ DCFG_DEVDISR2[14] 2 1
+ (Second FM available only in P4080 and P5040)
+
+ - B4860, T1040, T2080, T4240:
+ register[bit] FMan unit cell-index
+ ============================================================
+ DCFG_CCSR_DEVDISR2[24] 1 0
+ DCFG_CCSR_DEVDISR2[25] 2 1
+ (Second FM available only in T4240)
+
+ DEVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in
+ the specific SoC "Device Configuration/Pin Control" Memory
+ Map.
+
+- reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the offset of the
+ following configuration registers:
+ - BMI configuration registers.
+ - QMI configuration registers.
+ - DMA configuration registers.
+ - FPM configuration registers.
+ - FMan controller configuration registers.
+
+- ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property.
+
+- clocks
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: phandle for the fman input clock.
+
+- clock-names
+ usage: required
+ Value type: <stringlist>
+ Definition: "fmanclk" for the fman input clock.
+
+- interrupts
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A pair of IRQs are specified in this property.
+ The first element is associated with the event interrupts and
+ the second element is associated with the error interrupts.
+
+- fsl,qman-channel-range
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: Specifies the range of the available dedicated
+ channels in the FMan. The first cell specifies the beginning
+ of the range and the second cell specifies the number of
+ channels.
+ Further information available at:
+ "Work Queue (WQ) Channel Assignments in the QMan" section
+ in DPAA Reference Manual.
+
+- fsl,qman
+- fsl,bman
+ Usage: required
+ Definition: See soc/fsl/qman.txt and soc/fsl/bman.txt
+
+=============================================================================
+FMan MURAM Node
+
+DESCRIPTION
+
+FMan Internal memory - shared between all the FMan modules.
+It contains data structures that are common and written to or read by
+the modules.
+FMan internal memory is split into the following parts:
+ Packet buffering (Tx/Rx FIFOs)
+ Frames internal context
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: Must include "fsl,fman-muram"
+
+- ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property.
+ Specifies the multi-user memory offset and the size within
+ the FMan.
+
+EXAMPLE
+
+muram@0 {
+ compatible = "fsl,fman-muram";
+ ranges = <0 0x000000 0x28000>;
+};
+
+=============================================================================
+FMan Port Node
+
+DESCRIPTION
+
+The Frame Manager (FMan) supports several types of hardware ports:
+ Ethernet receiver (RX)
+ Ethernet transmitter (TX)
+ Offline/Host command (O/H)
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: A standard property.
+ Must include one of the following:
+ - "fsl,fman-v2-port-oh" for FManV2 OH ports
+ - "fsl,fman-v2-port-rx" for FManV2 RX ports
+ - "fsl,fman-v2-port-tx" for FManV2 TX ports
+ - "fsl,fman-v3-port-oh" for FManV3 OH ports
+ - "fsl,fman-v3-port-rx" for FManV3 RX ports
+ - "fsl,fman-v3-port-tx" for FManV3 TX ports
+
+- cell-index
+ Usage: required
+ Value type: <u32>
+ Definition: Specifies the hardware port id.
+ Each hardware port on the FMan has its own hardware PortID.
+ Super set of all hardware Port IDs available at FMan Reference
+ Manual under "FMan Hardware Ports in Freescale Devices" table.
+
+ Each hardware port is assigned a 4KB, port-specific page in
+ the FMan hardware port memory region (which is part of the
+ FMan memory map). The first 4 KB in the FMan hardware ports
+ memory region is used for what are called common registers.
+ The subsequent 63 4KB pages are allocated to the hardware
+ ports.
+ The page of a specific port is determined by the cell-index.
+
+- reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: There is one reg region describing the port
+ configuration registers.
+
+EXAMPLE
+
+port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa8000 0x1000>;
+};
+
+port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x88000 0x1000>;
+};
+
+port@81000 {
+ cell-index = <0x1>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x81000 0x1000>;
+};
+
+=============================================================================
+FMan dTSEC/XGEC/mEMAC Node
+
+DESCRIPTION
+
+mEMAC/dTSEC/XGEC are the Ethernet network interfaces
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: A standard property.
+ Must include one of the following:
+ - "fsl,fman-dtsec" for dTSEC MAC
+ - "fsl,fman-xgec" for XGEC MAC
+ - "fsl,fman-memac for mEMAC MAC
+
+- cell-index
+ Usage: required
+ Value type: <u32>
+ Definition: Specifies the MAC id.
+
+ The cell-index value may be used by the FMan or the SoC, to
+ identify the MAC unit in the FMan (or SoC) memory map.
+ In the tables bellow there's a description of the cell-index
+ use, there are two tables, one describes the use of cell-index
+ by the FMan, the second describes the use by the SoC:
+
+ 1. FMan Registers
+
+ FManV2:
+ register[bit] MAC cell-index
+ ============================================================
+ FM_EPI[16] XGEC 8
+ FM_EPI[16+n] dTSECn n-1
+ FM_NPI[11+n] dTSECn n-1
+ n = 1,..,5
+
+ FManV3:
+ register[bit] MAC cell-index
+ ============================================================
+ FM_EPI[16+n] mEMACn n-1
+ FM_EPI[25] mEMAC10 9
+
+ FM_NPI[11+n] mEMACn n-1
+ FM_NPI[10] mEMAC10 9
+ FM_NPI[11] mEMAC9 8
+ n = 1,..8
+
+ FM_EPI and FM_NPI are located in the FMan memory map.
+
+ 2. SoC registers:
+
+ - P2041, P3041, P4080 P5020, P5040:
+ register[bit] FMan MAC cell
+ Unit index
+ ============================================================
+ DCFG_DEVDISR2[7] 1 XGEC 8
+ DCFG_DEVDISR2[7+n] 1 dTSECn n-1
+ DCFG_DEVDISR2[15] 2 XGEC 8
+ DCFG_DEVDISR2[15+n] 2 dTSECn n-1
+ n = 1,..5
+
+ - T1040, T2080, T4240, B4860:
+ register[bit] FMan MAC cell
+ Unit index
+ ============================================================
+ DCFG_CCSR_DEVDISR2[n-1] 1 mEMACn n-1
+ DCFG_CCSR_DEVDISR2[11+n] 2 mEMACn n-1
+ n = 1,..6,9,10
+
+ EVDISR, DCFG_DEVDISR2 and DCFG_CCSR_DEVDISR2 are located in
+ the specific SoC "Device Configuration/Pin Control" Memory
+ Map.
+
+- reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property.
+
+- fsl,fman-ports
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: An array of two phandles - the first references is
+ the FMan RX port and the second is the TX port used by this
+ MAC.
+
+- ptp-timer
+ Usage required
+ Value type: <phandle>
+ Definition: A phandle for 1EEE1588 timer.
+
+EXAMPLE
+
+fman1_tx28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa8000 0x1000>;
+};
+
+fman1_rx8: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x88000 0x1000>;
+};
+
+ptp-timer: ptp_timer@fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0xfe000 0x1000>;
+};
+
+ethernet@e0000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <0>;
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx8 &fman1_tx28>;
+ ptp-timer = <&ptp-timer>;
+};
+
+============================================================================
+FMan IEEE 1588 Node
+
+DESCRIPTION
+
+The FMan interface to support IEEE 1588
+
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: A standard property.
+ Must include "fsl,fman-ptp-timer".
+
+- reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property.
+
+EXAMPLE
+
+ptp-timer@fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0xfe000 0x1000>;
+};
+
+=============================================================================
+FMan MDIO Node
+
+DESCRIPTION
+
+The MDIO is a bus to which the PHY devices are connected.
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: A standard property.
+ Must include "fsl,fman-mdio" for 1 Gb/s MDIO from FMan v2.
+ Must include "fsl,fman-xmdio" for 10 Gb/s MDIO from FMan v2.
+ Must include "fsl,fman-memac-mdio" for 1/10 Gb/s MDIO from
+ FMan v3.
+
+- reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property.
+
+- bus-frequency
+ Usage: optional
+ Value type: <u32>
+ Definition: Specifies the external MDIO bus clock speed to
+ be used, if different from the standard 2.5 MHz.
+ This may be due to the standard speed being unsupported (e.g.
+ due to a hardware problem), or to advertise that all relevant
+ components in the system support a faster speed.
+
+- interrupts
+ Usage: required for external MDIO
+ Value type: <prop-encoded-array>
+ Definition: Event interrupt of external MDIO controller.
+
+- fsl,fman-internal-mdio
+ Usage: required for internal MDIO
+ Value type: boolean
+ Definition: Fman has internal MDIO for internal PCS(Physical
+ Coding Sublayer) PHYs and external MDIO for external PHYs.
+ The settings and programming routines for internal/external
+ MDIO are different. Must be included for internal MDIO.
+
+EXAMPLE
+
+Example for FMan v2 external MDIO:
+
+mdio@f1000 {
+ compatible = "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ interrupts = <101 2 0 0>;
+};
+
+Example for FMan v3 internal MDIO:
+
+mdio@f1000 {
+ compatible = "fsl,fman-memac-mdio";
+ reg = <0xf1000 0x1000>;
+ fsl,fman-internal-mdio;
+};
+
+=============================================================================
+Example
+
+fman@400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ cell-index = <1>;
+ compatible = "fsl,fman"
+ ranges = <0 0x400000 0x100000>;
+ reg = <0x400000 0x100000>;
+ clocks = <&fman_clk>;
+ clock-names = "fmanclk";
+ interrupts = <
+ 96 2 0 0
+ 16 2 1 1>;
+ fsl,qman-channel-range = <0x40 0xc>;
+
+ muram@0 {
+ compatible = "fsl,fman-muram";
+ reg = <0x0 0x28000>;
+ };
+
+ port@81000 {
+ cell-index = <1>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x81000 0x1000>;
+ };
+
+ port@82000 {
+ cell-index = <2>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x82000 0x1000>;
+ };
+
+ port@83000 {
+ cell-index = <3>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x83000 0x1000>;
+ };
+
+ port@84000 {
+ cell-index = <4>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x84000 0x1000>;
+ };
+
+ port@85000 {
+ cell-index = <5>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x85000 0x1000>;
+ };
+
+ port@86000 {
+ cell-index = <6>;
+ compatible = "fsl,fman-v2-port-oh";
+ reg = <0x86000 0x1000>;
+ };
+
+ fman1_rx_0x8: port@88000 {
+ cell-index = <0x8>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x88000 0x1000>;
+ };
+
+ fman1_rx_0x9: port@89000 {
+ cell-index = <0x9>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x89000 0x1000>;
+ };
+
+ fman1_rx_0xa: port@8a000 {
+ cell-index = <0xa>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8a000 0x1000>;
+ };
+
+ fman1_rx_0xb: port@8b000 {
+ cell-index = <0xb>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8b000 0x1000>;
+ };
+
+ fman1_rx_0xc: port@8c000 {
+ cell-index = <0xc>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x8c000 0x1000>;
+ };
+
+ fman1_rx_0x10: port@90000 {
+ cell-index = <0x10>;
+ compatible = "fsl,fman-v2-port-rx";
+ reg = <0x90000 0x1000>;
+ };
+
+ fman1_tx_0x28: port@a8000 {
+ cell-index = <0x28>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa8000 0x1000>;
+ };
+
+ fman1_tx_0x29: port@a9000 {
+ cell-index = <0x29>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xa9000 0x1000>;
+ };
+
+ fman1_tx_0x2a: port@aa000 {
+ cell-index = <0x2a>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xaa000 0x1000>;
+ };
+
+ fman1_tx_0x2b: port@ab000 {
+ cell-index = <0x2b>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xab000 0x1000>;
+ };
+
+ fman1_tx_0x2c: port@ac0000 {
+ cell-index = <0x2c>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xac000 0x1000>;
+ };
+
+ fman1_tx_0x30: port@b0000 {
+ cell-index = <0x30>;
+ compatible = "fsl,fman-v2-port-tx";
+ reg = <0xb0000 0x1000>;
+ };
+
+ ethernet@e0000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <0>;
+ reg = <0xe0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x8 &fman1_tx_0x28>;
+ };
+
+ ethernet@e2000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <1>;
+ reg = <0xe2000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x9 &fman1_tx_0x29>;
+ };
+
+ ethernet@e4000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <2>;
+ reg = <0xe4000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0xa &fman1_tx_0x2a>;
+ };
+
+ ethernet@e6000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <3>;
+ reg = <0xe6000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0xb &fman1_tx_0x2b>;
+ };
+
+ ethernet@e8000 {
+ compatible = "fsl,fman-dtsec";
+ cell-index = <4>;
+ reg = <0xf0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0xc &fman1_tx_0x2c>;
+
+ ethernet@f0000 {
+ cell-index = <8>;
+ compatible = "fsl,fman-xgec";
+ reg = <0xf0000 0x1000>;
+ fsl,fman-ports = <&fman1_rx_0x10 &fman1_tx_0x30>;
+ };
+
+ ptp-timer@fe000 {
+ compatible = "fsl,fman-ptp-timer";
+ reg = <0xfe000 0x1000>;
+ };
+
+ mdio@f1000 {
+ compatible = "fsl,fman-xmdio";
+ reg = <0xf1000 0x1000>;
+ interrupts = <101 2 0 0>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/gtm.txt b/Documentation/devicetree/bindings/powerpc/fsl/gtm.txt
new file mode 100644
index 000000000..9a33efded
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/gtm.txt
@@ -0,0 +1,31 @@
+* Freescale General-purpose Timers Module
+
+Required properties:
+ - compatible : should be
+ "fsl,<chip>-gtm", "fsl,gtm" for SOC GTMs
+ "fsl,<chip>-qe-gtm", "fsl,qe-gtm", "fsl,gtm" for QE GTMs
+ "fsl,<chip>-cpm2-gtm", "fsl,cpm2-gtm", "fsl,gtm" for CPM2 GTMs
+ - reg : should contain gtm registers location and length (0x40).
+ - interrupts : should contain four interrupts.
+ - interrupt-parent : interrupt source phandle.
+ - clock-frequency : specifies the frequency driving the timer.
+
+Example:
+
+timer@500 {
+ compatible = "fsl,mpc8360-gtm", "fsl,gtm";
+ reg = <0x500 0x40>;
+ interrupts = <90 8 78 8 84 8 72 8>;
+ interrupt-parent = <&ipic>;
+ /* filled by u-boot */
+ clock-frequency = <0>;
+};
+
+timer@440 {
+ compatible = "fsl,mpc8360-qe-gtm", "fsl,qe-gtm", "fsl,gtm";
+ reg = <0x440 0x40>;
+ interrupts = <12 13 14 15>;
+ interrupt-parent = <&qeic>;
+ /* filled by u-boot */
+ clock-frequency = <0>;
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/guts.txt b/Documentation/devicetree/bindings/powerpc/fsl/guts.txt
new file mode 100644
index 000000000..7f150b501
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/guts.txt
@@ -0,0 +1,36 @@
+* Global Utilities Block
+
+The global utilities block controls power management, I/O device
+enabling, power-on-reset configuration monitoring, general-purpose
+I/O signal configuration, alternate function selection for multiplexed
+signals, and clock control.
+
+Required properties:
+
+ - compatible : Should define the compatible device type for
+ global-utilities.
+ - reg : Offset and length of the register set for the device.
+
+Recommended properties:
+
+ - fsl,has-rstcr : Indicates that the global utilities register set
+ contains a functioning "reset control register" (i.e. the board
+ is wired to reset upon setting the HRESET_REQ bit in this register).
+
+ - fsl,liodn-bits : Indicates the number of defined bits in the LIODN
+ registers, for those SOCs that have a PAMU device.
+
+Examples:
+ global-utilities@e0000 { /* global utilities block */
+ compatible = "fsl,mpc8548-guts";
+ reg = <e0000 1000>;
+ fsl,has-rstcr;
+ };
+
+ guts: global-utilities@e0000 {
+ compatible = "fsl,qoriq-device-config-1.0";
+ reg = <0xe0000 0xe00>;
+ fsl,has-rstcr;
+ #sleep-cells = <1>;
+ fsl,liodn-bits = <12>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt b/Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt
new file mode 100644
index 000000000..641bc1398
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/interlaken-lac.txt
@@ -0,0 +1,309 @@
+===============================================================================
+Freescale Interlaken Look-Aside Controller Device Bindings
+Copyright 2012 Freescale Semiconductor Inc.
+
+CONTENTS
+ - Interlaken Look-Aside Controller (LAC) Node
+ - Example LAC Node
+ - Interlaken Look-Aside Controller (LAC) Software Portal Node
+ - Interlaken Look-Aside Controller (LAC) Software Portal Child Nodes
+ - Example LAC SWP Node with Child Nodes
+
+==============================================================================
+Interlaken Look-Aside Controller (LAC) Node
+
+DESCRIPTION
+
+The Interlaken is a narrow, high speed channelized chip-to-chip interface. To
+facilitate interoperability between a data path device and a look-aside
+co-processor, the Interlaken Look-Aside protocol is defined for short
+transaction-related transfers. Although based on the Interlaken protocol,
+Interlaken Look-Aside is not directly compatible with Interlaken and can be
+considered a different operation mode.
+
+The Interlaken LA controller connects internal platform to Interlaken serial
+interface. It accepts LA command through software portals, which are system
+memory mapped 4KB spaces. The LA commands are then translated into the
+Interlaken control words and data words, which are sent on TX side to TCAM
+through SerDes lanes.
+
+There are two 4KiB spaces defined within the LAC global register memory map.
+There is a full register set at 0x0000-0x0FFF (also known as the "hypervisor"
+version), and a subset at 0x1000-0x1FFF. The former is a superset of the
+latter, and includes certain registers that should not be accessible to
+partitioned software. Separate nodes are used for each region, with a phandle
+linking the hypervisor node to the normal operating node.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,interlaken-lac". This represents only
+ those LAC CCSR registers not protected in partitioned
+ software. The version of the device is determined by the LAC
+ IP Block Revision Register (IPBRR0) at offset 0x0BF8.
+
+ Table of correspondences between IPBRR0 values and example
+ chips:
+ Value Device
+ ----------- -------
+ 0x02000100 T4240
+
+ The Hypervisor node has a different compatible. It must include
+ "fsl,interlaken-lac-hv". This node represents the protected
+ LAC register space and is required except inside a partition
+ where access to the hypervisor node is to be denied.
+
+ - fsl,non-hv-node
+ Usage: required in "fsl,interlaken-lac-hv"
+ Value type: <phandle>
+ Definition: Points to the non-protected LAC CCSR mapped register space
+ node.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. The first resource represents the
+ Interlaken LAC configuration registers.
+
+ - interrupts:
+ Usage: required in non-hv node only
+ Value type: <prop-encoded-array>
+ Definition: Interrupt mapping for Interlaken LAC error IRQ.
+
+EXAMPLE
+ lac: lac@229000 {
+ compatible = "fsl,interlaken-lac"
+ reg = <0x229000 0x1000>;
+ interrupts = <16 2 1 18>;
+ };
+
+ lac-hv@228000 {
+ compatible = "fsl,interlaken-lac-hv"
+ reg = <0x228000 0x1000>;
+ fsl,non-hv-node = <&lac>;
+ };
+
+===============================================================================
+Interlaken Look-Aside Controller (LAC) Software Portal Container Node
+
+DESCRIPTION
+The Interlaken Look-Aside Controller (LAC) utilizes Software Portals to accept
+Interlaken Look-Aside (ILA) commands. The Interlaken LAC software portal
+memory map occupies 128KB of memory space. The software portal memory space is
+intended to be cache-enabled. WIMG for each software space is required to be
+0010 if stashing is enabled; otherwise, WIMG can be 0000 or 0010.
+
+PROPERTIES
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Must have a value of 1.
+
+ - #size-cells
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Must have a value of 1.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,interlaken-lac-portals"
+
+ - ranges
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the address and length
+ of the LAC portal memory space.
+
+===============================================================================
+Interlaken Look-Aside Controller (LAC) Software Portals Child Nodes
+
+DESCRIPTION
+There are up to 24 available software portals with each software portal
+requiring 4KB of consecutive memory within the software portal memory mapped
+space.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,interlaken-lac-portal-vX.Y" where X is
+ the Major version (IP_MJ) found in the LAC IP Block Revision
+ Register (IPBRR0), at offset 0x0BF8, and Y is the Minor version
+ (IP_MN).
+
+ Table of correspondences between version values and example chips:
+ Value Device
+ ------ -------
+ 1.0 T4240
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. The first resource represents the
+ Interlaken LAC software portal registers.
+
+ - fsl,liodn
+ Value type: <u32>
+ Definition: The logical I/O device number (LIODN) for this device. The
+ LIODN is a number expressed by this device and used to perform
+ look-ups in the IOMMU (PAMU) address table when performing
+ DMAs. This property is automatically added by u-boot.
+
+===============================================================================
+EXAMPLE
+
+lac-portals {
+ #address-cells = <0x1>;
+ #size-cells = <0x1>;
+ compatible = "fsl,interlaken-lac-portals";
+ ranges = <0x0 0xf 0xf4400000 0x20000>;
+
+ lportal0: lac-portal@0 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x204>;
+ reg = <0x0 0x1000>;
+ };
+
+ lportal1: lac-portal@1000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x205>;
+ reg = <0x1000 0x1000>;
+ };
+
+ lportal2: lac-portal@2000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x206>;
+ reg = <0x2000 0x1000>;
+ };
+
+ lportal3: lac-portal@3000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x207>;
+ reg = <0x3000 0x1000>;
+ };
+
+ lportal4: lac-portal@4000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x208>;
+ reg = <0x4000 0x1000>;
+ };
+
+ lportal5: lac-portal@5000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x209>;
+ reg = <0x5000 0x1000>;
+ };
+
+ lportal6: lac-portal@6000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x20A>;
+ reg = <0x6000 0x1000>;
+ };
+
+ lportal7: lac-portal@7000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x20B>;
+ reg = <0x7000 0x1000>;
+ };
+
+ lportal8: lac-portal@8000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x20C>;
+ reg = <0x8000 0x1000>;
+ };
+
+ lportal9: lac-portal@9000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x20D>;
+ reg = <0x9000 0x1000>;
+ };
+
+ lportal10: lac-portal@A000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x20E>;
+ reg = <0xA000 0x1000>;
+ };
+
+ lportal11: lac-portal@B000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x20F>;
+ reg = <0xB000 0x1000>;
+ };
+
+ lportal12: lac-portal@C000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x210>;
+ reg = <0xC000 0x1000>;
+ };
+
+ lportal13: lac-portal@D000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x211>;
+ reg = <0xD000 0x1000>;
+ };
+
+ lportal14: lac-portal@E000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x212>;
+ reg = <0xE000 0x1000>;
+ };
+
+ lportal15: lac-portal@F000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x213>;
+ reg = <0xF000 0x1000>;
+ };
+
+ lportal16: lac-portal@10000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x214>;
+ reg = <0x10000 0x1000>;
+ };
+
+ lportal17: lac-portal@11000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x215>;
+ reg = <0x11000 0x1000>;
+ };
+
+ lportal8: lac-portal@1200 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x216>;
+ reg = <0x12000 0x1000>;
+ };
+
+ lportal19: lac-portal@13000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x217>;
+ reg = <0x13000 0x1000>;
+ };
+
+ lportal20: lac-portal@14000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x218>;
+ reg = <0x14000 0x1000>;
+ };
+
+ lportal21: lac-portal@15000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x219>;
+ reg = <0x15000 0x1000>;
+ };
+
+ lportal22: lac-portal@16000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x21A>;
+ reg = <0x16000 0x1000>;
+ };
+
+ lportal23: lac-portal@17000 {
+ compatible = "fsl,interlaken-lac-portal-v1.0";
+ fsl,liodn = <0x21B>;
+ reg = <0x17000 0x1000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
new file mode 100644
index 000000000..c41b2187e
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/l2cache.txt
@@ -0,0 +1,23 @@
+Freescale L2 Cache Controller
+
+L2 cache is present in Freescale's QorIQ and QorIQ Qonverge platforms.
+The cache bindings explained below are ePAPR compliant
+
+Required Properties:
+
+- compatible : Should include "fsl,chip-l2-cache-controller" and "cache"
+ where chip is the processor (bsc9132, npc8572 etc.)
+- reg : Address and size of L2 cache controller registers
+- cache-size : Size of the entire L2 cache
+- interrupts : Error interrupt of L2 controller
+- cache-line-size : Size of L2 cache lines
+
+Example:
+
+ L2: l2-cache-controller@20000 {
+ compatible = "fsl,bsc9132-l2-cache-controller", "cache";
+ reg = <0x20000 0x1000>;
+ cache-line-size = <32>; // 32 bytes
+ cache-size = <0x40000>; // L2,256K
+ interrupts = <16 2 1 0>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/lbc.txt b/Documentation/devicetree/bindings/powerpc/fsl/lbc.txt
new file mode 100644
index 000000000..1c80fcede
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/lbc.txt
@@ -0,0 +1,43 @@
+* Chipselect/Local Bus
+
+Properties:
+- name : Should be localbus
+- #address-cells : Should be either two or three. The first cell is the
+ chipselect number, and the remaining cells are the
+ offset into the chipselect.
+- #size-cells : Either one or two, depending on how large each chipselect
+ can be.
+- ranges : Each range corresponds to a single chipselect, and cover
+ the entire access window as configured.
+
+Example:
+ localbus@f0010100 {
+ compatible = "fsl,mpc8272-localbus",
+ "fsl,pq2-localbus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ reg = <0xf0010100 0x40>;
+
+ ranges = <0x0 0x0 0xfe000000 0x02000000
+ 0x1 0x0 0xf4500000 0x00008000
+ 0x2 0x0 0xfd810000 0x00010000>;
+
+ flash@0,0 {
+ compatible = "jedec-flash";
+ reg = <0x0 0x0 0x2000000>;
+ bank-width = <4>;
+ device-width = <1>;
+ };
+
+ board-control@1,0 {
+ reg = <0x1 0x0 0x20>;
+ compatible = "fsl,mpc8272ads-bcsr";
+ };
+
+ simple-periph@2,0 {
+ compatible = "fsl,elbc-gpcm-uio";
+ reg = <0x2 0x0 0x10000>;
+ elbc-gpcm-br = <0xfd810800>;
+ elbc-gpcm-or = <0xffff09f7>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mcm.txt b/Documentation/devicetree/bindings/powerpc/fsl/mcm.txt
new file mode 100644
index 000000000..4ceda9b3b
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mcm.txt
@@ -0,0 +1,64 @@
+=====================================================================
+MPX LAW & Coherency Module Device Tree Binding
+Copyright (C) 2009 Freescale Semiconductor Inc.
+=====================================================================
+
+Local Access Window (LAW) Node
+
+The LAW node represents the region of CCSR space where local access
+windows are configured. For MCM based devices this is the first 4k
+of CCSR space that includes CCSRBAR, ALTCBAR, ALTCAR, BPTR, and some
+number of local access windows as specified by fsl,num-laws.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,mcm-law"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. The value specifies the
+ physical address offset and length of the CCSR space
+ registers.
+
+ - fsl,num-laws
+ Usage: required
+ Value type: <u32>
+ Definition: The value specifies the number of local access
+ windows for this device.
+
+=====================================================================
+
+MPX Coherency Module Node
+
+The MPX LAW node represents the region of CCSR space where MCM config
+and error reporting registers exist, this is the second 4k (0x1000)
+of CCSR space.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,CHIP-mcm", "fsl,mcm" where
+ CHIP is the processor (mpc8641, mpc8610, etc.)
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. The value specifies the
+ physical address offset and length of the CCSR space
+ registers.
+
+ - interrupts
+ Usage: required
+ Value type: <prop-encoded-array>
+
+ - interrupt-parent
+ Usage: required
+ Value type: <phandle>
+
+=====================================================================
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mcu-mpc8349emitx.txt b/Documentation/devicetree/bindings/powerpc/fsl/mcu-mpc8349emitx.txt
new file mode 100644
index 000000000..0f766333b
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mcu-mpc8349emitx.txt
@@ -0,0 +1,17 @@
+Freescale MPC8349E-mITX-compatible Power Management Micro Controller Unit (MCU)
+
+Required properties:
+- compatible : "fsl,<mcu-chip>-<board>", "fsl,mcu-mpc8349emitx".
+- reg : should specify I2C address (0x0a).
+- #gpio-cells : should be 2.
+- gpio-controller : should be present.
+
+Example:
+
+mcu@0a {
+ #gpio-cells = <2>;
+ compatible = "fsl,mc9s08qg8-mpc8349emitx",
+ "fsl,mcu-mpc8349emitx";
+ reg = <0x0a>;
+ gpio-controller;
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt b/Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt
new file mode 100644
index 000000000..f87856faf
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mem-ctrlr.txt
@@ -0,0 +1,27 @@
+Freescale DDR memory controller
+
+Properties:
+
+- compatible : Should include "fsl,chip-memory-controller" where
+ chip is the processor (bsc9132, mpc8572 etc.), or
+ "fsl,qoriq-memory-controller".
+- reg : Address and size of DDR controller registers
+- interrupts : Error interrupt of DDR controller
+
+Example 1:
+
+ memory-controller@2000 {
+ compatible = "fsl,bsc9132-memory-controller";
+ reg = <0x2000 0x1000>;
+ interrupts = <16 2 1 8>;
+ };
+
+
+Example 2:
+
+ ddr1: memory-controller@8000 {
+ compatible = "fsl,qoriq-memory-controller-v4.7",
+ "fsl,qoriq-memory-controller";
+ reg = <0x8000 0x1000>;
+ interrupts = <16 2 1 23>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpc5121-psc.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpc5121-psc.txt
new file mode 100644
index 000000000..8832e8798
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpc5121-psc.txt
@@ -0,0 +1,70 @@
+MPC5121 PSC Device Tree Bindings
+
+PSC in UART mode
+----------------
+
+For PSC in UART mode the needed PSC serial devices
+are specified by fsl,mpc5121-psc-uart nodes in the
+fsl,mpc5121-immr SoC node. Additionally the PSC FIFO
+Controller node fsl,mpc5121-psc-fifo is requered there:
+
+fsl,mpc5121-psc-uart nodes
+--------------------------
+
+Required properties :
+ - compatible : Should contain "fsl,mpc5121-psc-uart" and "fsl,mpc5121-psc"
+ - cell-index : Index of the PSC in hardware
+ - reg : Offset and length of the register set for the PSC device
+ - interrupts : <a b> where a is the interrupt number of the
+ PSC FIFO Controller and b is a field that represents an
+ encoding of the sense and level information for the interrupt.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Recommended properties :
+ - fsl,rx-fifo-size : the size of the RX fifo slice (a multiple of 4)
+ - fsl,tx-fifo-size : the size of the TX fifo slice (a multiple of 4)
+
+
+fsl,mpc5121-psc-fifo node
+-------------------------
+
+Required properties :
+ - compatible : Should be "fsl,mpc5121-psc-fifo"
+ - reg : Offset and length of the register set for the PSC
+ FIFO Controller
+ - interrupts : <a b> where a is the interrupt number of the
+ PSC FIFO Controller and b is a field that represents an
+ encoding of the sense and level information for the interrupt.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+
+Example for a board using PSC0 and PSC1 devices in serial mode:
+
+serial@11000 {
+ compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+ cell-index = <0>;
+ reg = <0x11000 0x100>;
+ interrupts = <40 0x8>;
+ interrupt-parent = < &ipic >;
+ fsl,rx-fifo-size = <16>;
+ fsl,tx-fifo-size = <16>;
+};
+
+serial@11100 {
+ compatible = "fsl,mpc5121-psc-uart", "fsl,mpc5121-psc";
+ cell-index = <1>;
+ reg = <0x11100 0x100>;
+ interrupts = <40 0x8>;
+ interrupt-parent = < &ipic >;
+ fsl,rx-fifo-size = <16>;
+ fsl,tx-fifo-size = <16>;
+};
+
+pscfifo@11f00 {
+ compatible = "fsl,mpc5121-psc-fifo";
+ reg = <0x11f00 0x100>;
+ interrupts = <40 0x8>;
+ interrupt-parent = < &ipic >;
+};
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
new file mode 100644
index 000000000..4ccb2cd5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpc5200.txt
@@ -0,0 +1,198 @@
+MPC5200 Device Tree Bindings
+----------------------------
+
+(c) 2006-2009 Secret Lab Technologies Ltd
+Grant Likely <grant.likely@secretlab.ca>
+
+Naming conventions
+------------------
+For mpc5200 on-chip devices, the format for each compatible value is
+<chip>-<device>[-<mode>]. The OS should be able to match a device driver
+to the device based solely on the compatible value. If two drivers
+match on the compatible list; the 'most compatible' driver should be
+selected.
+
+The split between the MPC5200 and the MPC5200B leaves a bit of a
+conundrum. How should the compatible property be set up to provide
+maximum compatibility information; but still accurately describe the
+chip? For the MPC5200; the answer is easy. Most of the SoC devices
+originally appeared on the MPC5200. Since they didn't exist anywhere
+else; the 5200 compatible properties will contain only one item;
+"fsl,mpc5200-<device>".
+
+The 5200B is almost the same as the 5200, but not quite. It fixes
+silicon bugs and it adds a small number of enhancements. Most of the
+devices either provide exactly the same interface as on the 5200. A few
+devices have extra functions but still have a backwards compatible mode.
+To express this information as completely as possible, 5200B device trees
+should have two items in the compatible list:
+ compatible = "fsl,mpc5200b-<device>","fsl,mpc5200-<device>";
+
+It is *strongly* recommended that 5200B device trees follow this convention
+(instead of only listing the base mpc5200 item).
+
+ie. ethernet on mpc5200: compatible = "fsl,mpc5200-fec";
+ ethernet on mpc5200b: compatible = "fsl,mpc5200b-fec", "fsl,mpc5200-fec";
+
+Modal devices, like PSCs, also append the configured function to the
+end of the compatible field. ie. A PSC in i2s mode would specify
+"fsl,mpc5200-psc-i2s", not "fsl,mpc5200-i2s". This convention is chosen to
+avoid naming conflicts with non-psc devices providing the same
+function. For example, "fsl,mpc5200-spi" and "fsl,mpc5200-psc-spi" describe
+the mpc5200 simple spi device and a PSC spi mode respectively.
+
+At the time of writing, exact chip may be either 'fsl,mpc5200' or
+'fsl,mpc5200b'.
+
+The soc node
+------------
+This node describes the on chip SOC peripherals. Every mpc5200 based
+board will have this node, and as such there is a common naming
+convention for SOC devices.
+
+Required properties:
+name description
+---- -----------
+ranges Memory range of the internal memory mapped registers.
+ Should be <0 [baseaddr] 0xc000>
+reg Should be <[baseaddr] 0x100>
+compatible mpc5200: "fsl,mpc5200-immr"
+ mpc5200b: "fsl,mpc5200b-immr"
+system-frequency 'fsystem' frequency in Hz; XLB, IPB, USB and PCI
+ clocks are derived from the fsystem clock.
+bus-frequency IPB bus frequency in Hz. Clock rate
+ used by most of the soc devices.
+
+soc child nodes
+---------------
+Any on chip SOC devices available to Linux must appear as soc5200 child nodes.
+
+Note: The tables below show the value for the mpc5200. A mpc5200b device
+tree should use the "fsl,mpc5200b-<device>","fsl,mpc5200-<device>" form.
+
+Required soc5200 child nodes:
+name compatible Description
+---- ---------- -----------
+cdm@<addr> fsl,mpc5200-cdm Clock Distribution
+interrupt-controller@<addr> fsl,mpc5200-pic need an interrupt
+ controller to boot
+bestcomm@<addr> fsl,mpc5200-bestcomm Bestcomm DMA controller
+
+Recommended soc5200 child nodes; populate as needed for your board
+name compatible Description
+---- ---------- -----------
+timer@<addr> fsl,mpc5200-gpt General purpose timers
+gpio@<addr> fsl,mpc5200-gpio MPC5200 simple gpio controller
+gpio@<addr> fsl,mpc5200-gpio-wkup MPC5200 wakeup gpio controller
+rtc@<addr> fsl,mpc5200-rtc Real time clock
+mscan@<addr> fsl,mpc5200-mscan CAN bus controller
+pci@<addr> fsl,mpc5200-pci PCI bridge
+serial@<addr> fsl,mpc5200-psc-uart PSC in serial mode
+i2s@<addr> fsl,mpc5200-psc-i2s PSC in i2s mode
+ac97@<addr> fsl,mpc5200-psc-ac97 PSC in ac97 mode
+spi@<addr> fsl,mpc5200-psc-spi PSC in spi mode
+irda@<addr> fsl,mpc5200-psc-irda PSC in IrDA mode
+spi@<addr> fsl,mpc5200-spi MPC5200 spi device
+ethernet@<addr> fsl,mpc5200-fec MPC5200 ethernet device
+ata@<addr> fsl,mpc5200-ata IDE ATA interface
+i2c@<addr> fsl,mpc5200-i2c I2C controller
+usb@<addr> fsl,mpc5200-ohci,ohci-be USB controller
+xlb@<addr> fsl,mpc5200-xlb XLB arbitrator
+
+fsl,mpc5200-gpt nodes
+---------------------
+On the mpc5200 and 5200b, GPT0 has a watchdog timer function. If the board
+design supports the internal wdt, then the device node for GPT0 should
+include the empty property 'fsl,has-wdt'. Note that this does not activate
+the watchdog. The timer will function as a GPT if the timer api is used, and
+it will function as watchdog if the watchdog device is used. The watchdog
+mode has priority over the gpt mode, i.e. if the watchdog is activated, any
+gpt api call to this timer will fail with -EBUSY.
+
+If you add the property
+ fsl,wdt-on-boot = <n>;
+GPT0 will be marked as in-use watchdog, i.e. blocking every gpt access to it.
+If n>0, the watchdog is started with a timeout of n seconds. If n=0, the
+configuration of the watchdog is not touched. This is useful in two cases:
+- just mark GPT0 as watchdog, blocking gpt accesses, and configure it later;
+- do not touch a configuration assigned by the boot loader which supervises
+ the boot process itself.
+
+The watchdog will respect the CONFIG_WATCHDOG_NOWAYOUT option.
+
+An mpc5200-gpt can be used as a single line GPIO controller. To do so,
+add the following properties to the gpt node:
+ gpio-controller;
+ #gpio-cells = <2>;
+When referencing the GPIO line from another node, the first cell must always
+be zero and the second cell represents the gpio flags and described in the
+gpio device tree binding.
+
+An mpc5200-gpt can be used as a single line edge sensitive interrupt
+controller. To do so, add the following properties to the gpt node:
+ interrupt-controller;
+ #interrupt-cells = <1>;
+When referencing the IRQ line from another node, the cell represents the
+sense mode; 1 for edge rising, 2 for edge falling.
+
+fsl,mpc5200-psc nodes
+---------------------
+The PSCs should include a cell-index which is the index of the PSC in
+hardware. cell-index is used to determine which shared SoC registers to
+use when setting up PSC clocking. cell-index number starts at '0'. ie:
+ PSC1 has 'cell-index = <0>'
+ PSC4 has 'cell-index = <3>'
+
+PSC in i2s mode: The mpc5200 and mpc5200b PSCs are not compatible when in
+i2s mode. An 'mpc5200b-psc-i2s' node cannot include 'mpc5200-psc-i2s' in the
+compatible field.
+
+
+fsl,mpc5200-gpio and fsl,mpc5200-gpio-wkup nodes
+------------------------------------------------
+Each GPIO controller node should have the empty property gpio-controller and
+#gpio-cells set to 2. First cell is the GPIO number which is interpreted
+according to the bit numbers in the GPIO control registers. The second cell
+is for flags which is currently unused.
+
+fsl,mpc5200-fec nodes
+---------------------
+The FEC node can specify one of the following properties to configure
+the MII link:
+- fsl,7-wire-mode - An empty property that specifies the link uses 7-wire
+ mode instead of MII
+- current-speed - Specifies that the MII should be configured for a fixed
+ speed. This property should contain two cells. The
+ first cell specifies the speed in Mbps and the second
+ should be '0' for half duplex and '1' for full duplex
+- phy-handle - Contains a phandle to an Ethernet PHY.
+
+Interrupt controller (fsl,mpc5200-pic) node
+-------------------------------------------
+The mpc5200 pic binding splits hardware IRQ numbers into two levels. The
+split reflects the layout of the PIC hardware itself, which groups
+interrupts into one of three groups; CRIT, MAIN or PERP. Also, the
+Bestcomm dma engine has it's own set of interrupt sources which are
+cascaded off of peripheral interrupt 0, which the driver interprets as a
+fourth group, SDMA.
+
+The interrupts property for device nodes using the mpc5200 pic consists
+of three cells; <L1 L2 level>
+
+ L1 := [CRIT=0, MAIN=1, PERP=2, SDMA=3]
+ L2 := interrupt number; directly mapped from the value in the
+ "ICTL PerStat, MainStat, CritStat Encoded Register"
+ level := [LEVEL_HIGH=0, EDGE_RISING=1, EDGE_FALLING=2, LEVEL_LOW=3]
+
+For external IRQs, use the following interrupt property values (how to
+specify external interrupts is a frequently asked question):
+External interrupts:
+ external irq0: interrupts = <0 0 n>;
+ external irq1: interrupts = <1 1 n>;
+ external irq2: interrupts = <1 2 n>;
+ external irq3: interrupts = <1 3 n>;
+'n' is sense (0: level high, 1: edge rising, 2: edge falling 3: level low)
+
+fsl,mpc5200-mscan nodes
+-----------------------
+See file can.txt in this directory.
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt
new file mode 100644
index 000000000..bc8ded641
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic-msgr.txt
@@ -0,0 +1,63 @@
+* FSL MPIC Message Registers
+
+This binding specifies what properties must be available in the device tree
+representation of the message register blocks found in some FSL MPIC
+implementations.
+
+Required properties:
+
+ - compatible: Specifies the compatibility list for the message register
+ block. The type shall be <string-list> and the value shall be of the form
+ "fsl,mpic-v<version>-msgr", where <version> is the version number of
+ the MPIC containing the message registers.
+
+ - reg: Specifies the base physical address(s) and size(s) of the
+ message register block's addressable register space. The type shall be
+ <prop-encoded-array>.
+
+ - interrupts: Specifies a list of interrupt-specifiers which are available
+ for receiving interrupts. Interrupt-specifier consists of two cells: first
+ cell is interrupt-number and second cell is level-sense. The type shall be
+ <prop-encoded-array>.
+
+Optional properties:
+
+ - mpic-msgr-receive-mask: Specifies what registers in the containing block
+ are allowed to receive interrupts. The value is a bit mask where a set
+ bit at bit 'n' indicates that message register 'n' can receive interrupts.
+ Note that "bit 'n'" is numbered from LSB for PPC hardware. The type shall
+ be <u32>. If not present, then all of the message registers in the block
+ are available.
+
+Aliases:
+
+ An alias should be created for every message register block. They are not
+ required, though. However, a particular implementation of this binding
+ may require aliases to be present. Aliases are of the form
+ 'mpic-msgr-block<n>', where <n> is an integer specifying the block's number.
+ Numbers shall start at 0.
+
+Example:
+
+ aliases {
+ mpic-msgr-block0 = &mpic_msgr_block0;
+ mpic-msgr-block1 = &mpic_msgr_block1;
+ };
+
+ mpic_msgr_block0: mpic-msgr-block@41400 {
+ compatible = "fsl,mpic-v3.1-msgr";
+ reg = <0x41400 0x200>;
+ // Message registers 0 and 2 in this block can receive interrupts on
+ // sources 0xb0 and 0xb2, respectively.
+ interrupts = <0xb0 2 0xb2 2>;
+ mpic-msgr-receive-mask = <0x5>;
+ };
+
+ mpic_msgr_block1: mpic-msgr-block@42400 {
+ compatible = "fsl,mpic-v3.1-msgr";
+ reg = <0x42400 0x200>;
+ // Message registers 0 and 2 in this block can receive interrupts on
+ // sources 0xb4 and 0xb6, respectively.
+ interrupts = <0xb4 2 0xb6 2>;
+ mpic-msgr-receive-mask = <0x5>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt
new file mode 100644
index 000000000..df4195814
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic-timer.txt
@@ -0,0 +1,38 @@
+* Freescale MPIC timers
+
+Required properties:
+- compatible: "fsl,mpic-global-timer"
+
+- reg : Contains two regions. The first is the main timer register bank
+ (GTCCRxx, GTBCRxx, GTVPRxx, GTDRxx). The second is the timer control
+ register (TCRx) for the group.
+
+- fsl,available-ranges: use <start count> style section to define which
+ timer interrupts can be used. This property is optional; without this,
+ all timers within the group can be used.
+
+- interrupts: one interrupt per timer in the group, in order, starting
+ with timer zero. If timer-available-ranges is present, only the
+ interrupts that correspond to available timers shall be present.
+
+Example:
+ /* Note that this requires #interrupt-cells to be 4 */
+ timer0: timer@41100 {
+ compatible = "fsl,mpic-global-timer";
+ reg = <0x41100 0x100 0x41300 4>;
+
+ /* Another AMP partition is using timers 0 and 1 */
+ fsl,available-ranges = <2 2>;
+
+ interrupts = <2 0 3 0
+ 3 0 3 0>;
+ };
+
+ timer1: timer@42100 {
+ compatible = "fsl,mpic-global-timer";
+ reg = <0x42100 0x100 0x42300 4>;
+ interrupts = <4 0 3 0
+ 5 0 3 0
+ 6 0 3 0
+ 7 0 3 0>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
new file mode 100644
index 000000000..dc5744636
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
@@ -0,0 +1,231 @@
+=====================================================================
+Freescale MPIC Interrupt Controller Node
+Copyright (C) 2010,2011 Freescale Semiconductor Inc.
+=====================================================================
+
+The Freescale MPIC interrupt controller is found on all PowerQUICC
+and QorIQ processors and is compatible with the Open PIC. The
+notable difference from Open PIC binding is the addition of 2
+additional cells in the interrupt specifier defining interrupt type
+information.
+
+PROPERTIES
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Shall include "fsl,mpic". Freescale MPIC
+ controllers compatible with this binding have Block
+ Revision Registers BRR1 and BRR2 at offset 0x0 and
+ 0x10 in the MPIC.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical
+ offset and length of the device's registers within the
+ CCSR address space.
+
+ - interrupt-controller
+ Usage: required
+ Value type: <empty>
+ Definition: Specifies that this node is an interrupt
+ controller
+
+ - #interrupt-cells
+ Usage: required
+ Value type: <u32>
+ Definition: Shall be 2 or 4. A value of 2 means that interrupt
+ specifiers do not contain the interrupt-type or type-specific
+ information cells.
+
+ - #address-cells
+ Usage: required
+ Value type: <u32>
+ Definition: Shall be 0.
+
+ - pic-no-reset
+ Usage: optional
+ Value type: <empty>
+ Definition: The presence of this property specifies that the
+ MPIC must not be reset by the client program, and that
+ the boot program has initialized all interrupt source
+ configuration registers to a sane state-- masked or
+ directed at other cores. This ensures that the client
+ program will not receive interrupts for sources not belonging
+ to the client. The presence of this property also mandates
+ that any initialization related to interrupt sources shall
+ be limited to sources explicitly referenced in the device tree.
+
+ - big-endian
+ Usage: optional
+ Value type: <empty>
+ If present the MPIC will be assumed to be big-endian. Some
+ device-trees omit this property on MPIC nodes even when the MPIC is
+ in fact big-endian, so certain boards override this property.
+
+ - single-cpu-affinity
+ Usage: optional
+ Value type: <empty>
+ If present the MPIC will be assumed to only be able to route
+ non-IPI interrupts to a single CPU at a time (EG: Freescale MPIC).
+
+ - last-interrupt-source
+ Usage: optional
+ Value type: <u32>
+ Some MPICs do not correctly report the number of hardware sources
+ in the global feature registers. If specified, this field will
+ override the value read from MPIC_GREG_FEATURE_LAST_SRC.
+
+INTERRUPT SPECIFIER DEFINITION
+
+ Interrupt specifiers consists of 4 cells encoded as
+ follows:
+
+ <1st-cell> interrupt-number
+
+ Identifies the interrupt source. The meaning
+ depends on the type of interrupt.
+
+ Note: If the interrupt-type cell is undefined
+ (i.e. #interrupt-cells = 2), this cell
+ should be interpreted the same as for
+ interrupt-type 0-- i.e. an external or
+ normal SoC device interrupt.
+
+ <2nd-cell> level-sense information, encoded as follows:
+ 0 = low-to-high edge triggered
+ 1 = active low level-sensitive
+ 2 = active high level-sensitive
+ 3 = high-to-low edge triggered
+
+ <3rd-cell> interrupt-type
+
+ The following types are supported:
+
+ 0 = external or normal SoC device interrupt
+
+ The interrupt-number cell contains
+ the SoC device interrupt number. The
+ type-specific cell is undefined. The
+ interrupt-number is derived from the
+ MPIC a block of registers referred to as
+ the "Interrupt Source Configuration Registers".
+ Each source has 32-bytes of registers
+ (vector/priority and destination) in this
+ region. So interrupt 0 is at offset 0x0,
+ interrupt 1 is at offset 0x20, and so on.
+
+ 1 = error interrupt
+
+ The interrupt-number cell contains
+ the SoC device interrupt number for
+ the error interrupt. The type-specific
+ cell identifies the specific error
+ interrupt number.
+
+ 2 = MPIC inter-processor interrupt (IPI)
+
+ The interrupt-number cell identifies
+ the MPIC IPI number. The type-specific
+ cell is undefined.
+
+ 3 = MPIC timer interrupt
+
+ The interrupt-number cell identifies
+ the MPIC timer number. The type-specific
+ cell is undefined.
+
+ <4th-cell> type-specific information
+
+ The type-specific cell is encoded as follows:
+
+ - For interrupt-type 1 (error interrupt),
+ the type-specific cell contains the
+ bit number of the error interrupt in the
+ Error Interrupt Summary Register.
+
+EXAMPLE 1
+ /*
+ * mpic interrupt controller with 4 cells per specifier
+ */
+ mpic: pic@40000 {
+ compatible = "fsl,mpic";
+ interrupt-controller;
+ #interrupt-cells = <4>;
+ #address-cells = <0>;
+ reg = <0x40000 0x40000>;
+ };
+
+EXAMPLE 2
+ /*
+ * The MPC8544 I2C controller node has an internal
+ * interrupt number of 27. As per the reference manual
+ * this corresponds to interrupt source configuration
+ * registers at 0x5_0560.
+ *
+ * The interrupt source configuration registers begin
+ * at 0x5_0000.
+ *
+ * To compute the interrupt specifier interrupt number
+ *
+ * 0x560 >> 5 = 43
+ *
+ * The interrupt source configuration registers begin
+ * at 0x5_0000, and so the i2c vector/priority registers
+ * are at 0x5_0560.
+ */
+ i2c@3000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cell-index = <0>;
+ compatible = "fsl-i2c";
+ reg = <0x3000 0x100>;
+ interrupts = <43 2>;
+ interrupt-parent = <&mpic>;
+ dfsrr;
+ };
+
+
+EXAMPLE 3
+ /*
+ * Definition of a node defining the 4
+ * MPIC IPI interrupts. Note the interrupt
+ * type of 2.
+ */
+ ipi@410a0 {
+ compatible = "fsl,mpic-ipi";
+ reg = <0x40040 0x10>;
+ interrupts = <0 0 2 0
+ 1 0 2 0
+ 2 0 2 0
+ 3 0 2 0>;
+ };
+
+EXAMPLE 4
+ /*
+ * Definition of a node defining the MPIC
+ * global timers. Note the interrupt
+ * type of 3.
+ */
+ timer0: timer@41100 {
+ compatible = "fsl,mpic-global-timer";
+ reg = <0x41100 0x100 0x41300 4>;
+ interrupts = <0 0 3 0
+ 1 0 3 0
+ 2 0 3 0
+ 3 0 3 0>;
+ };
+
+EXAMPLE 5
+ /*
+ * Definition of an error interrupt (interrupt type 1).
+ * SoC interrupt number is 16 and the specific error
+ * interrupt bit in the error interrupt summary register
+ * is 23.
+ */
+ memory-controller@8000 {
+ compatible = "fsl,p4080-memory-controller";
+ reg = <0x8000 0x1000>;
+ interrupts = <16 2 1 23>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt b/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt
new file mode 100644
index 000000000..82dd5b65c
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/msi-pic.txt
@@ -0,0 +1,116 @@
+* Freescale MSI interrupt controller
+
+Required properties:
+- compatible : compatible list, may contain one or two entries
+ The first is "fsl,CHIP-msi", where CHIP is the processor(mpc8610, mpc8572,
+ etc.) and the second is "fsl,mpic-msi" or "fsl,ipic-msi" or
+ "fsl,mpic-msi-v4.3" depending on the parent type and version. If mpic
+ version is 4.3, the number of MSI registers is increased to 16, MSIIR1 is
+ provided to access these 16 registers, and compatible "fsl,mpic-msi-v4.3"
+ should be used. The first entry is optional; the second entry is
+ required.
+
+- reg : It may contain one or two regions. The first region should contain
+ the address and the length of the shared message interrupt register set.
+ The second region should contain the address of aliased MSIIR or MSIIR1
+ register for platforms that have such an alias, if using MSIIR1, the second
+ region must be added because different MSI group has different MSIIR1 offset.
+
+- interrupts : each one of the interrupts here is one entry per 32 MSIs,
+ and routed to the host interrupt controller. the interrupts should
+ be set as edge sensitive. If msi-available-ranges is present, only
+ the interrupts that correspond to available ranges shall be present.
+
+- interrupt-parent: the phandle for the interrupt controller
+ that services interrupts for this device. for 83xx cpu, the interrupts
+ are routed to IPIC, and for 85xx/86xx cpu the interrupts are routed
+ to MPIC.
+
+Optional properties:
+- msi-available-ranges: use <start count> style section to define which
+ msi interrupt can be used in the 256 msi interrupts. This property is
+ optional, without this, all the MSI interrupts can be used.
+ Each available range must begin and end on a multiple of 32 (i.e.
+ no splitting an individual MSI register or the associated PIC interrupt).
+ MPIC v4.3 does not support this property because the 32 interrupts of an
+ individual register are not continuous when using MSIIR1.
+
+- msi-address-64: 64-bit PCI address of the MSIIR register. The MSIIR register
+ is used for MSI messaging. The address of MSIIR in PCI address space is
+ the MSI message address.
+
+ This property may be used in virtualized environments where the hypervisor
+ has created an alternate mapping for the MSIR block. See below for an
+ explanation.
+
+
+Example:
+ msi@41600 {
+ compatible = "fsl,mpc8610-msi", "fsl,mpic-msi";
+ reg = <0x41600 0x80>;
+ msi-available-ranges = <0 0x100>;
+ interrupts = <
+ 0xe0 0
+ 0xe1 0
+ 0xe2 0
+ 0xe3 0
+ 0xe4 0
+ 0xe5 0
+ 0xe6 0
+ 0xe7 0>;
+ interrupt-parent = <&mpic>;
+ };
+
+ msi@41600 {
+ compatible = "fsl,mpic-msi-v4.3";
+ reg = <0x41600 0x200 0x44148 4>;
+ interrupts = <
+ 0xe0 0 0 0
+ 0xe1 0 0 0
+ 0xe2 0 0 0
+ 0xe3 0 0 0
+ 0xe4 0 0 0
+ 0xe5 0 0 0
+ 0xe6 0 0 0
+ 0xe7 0 0 0
+ 0x100 0 0 0
+ 0x101 0 0 0
+ 0x102 0 0 0
+ 0x103 0 0 0
+ 0x104 0 0 0
+ 0x105 0 0 0
+ 0x106 0 0 0
+ 0x107 0 0 0>;
+ };
+
+The Freescale hypervisor and msi-address-64
+-------------------------------------------
+Normally, PCI devices have access to all of CCSR via an ATMU mapping. The
+Freescale MSI driver calculates the address of MSIIR (in the MSI register
+block) and sets that address as the MSI message address.
+
+In a virtualized environment, the hypervisor may need to create an IOMMU
+mapping for MSIIR. The Freescale ePAPR hypervisor has this requirement
+because of hardware limitations of the Peripheral Access Management Unit
+(PAMU), which is currently the only IOMMU that the hypervisor supports.
+The ATMU is programmed with the guest physical address, and the PAMU
+intercepts transactions and reroutes them to the true physical address.
+
+In the PAMU, each PCI controller is given only one primary window. The
+PAMU restricts DMA operations so that they can only occur within a window.
+Because PCI devices must be able to DMA to memory, the primary window must
+be used to cover all of the guest's memory space.
+
+PAMU primary windows can be divided into 256 subwindows, and each
+subwindow can have its own address mapping ("guest physical" to "true
+physical"). However, each subwindow has to have the same alignment, which
+means they cannot be located at just any address. Because of these
+restrictions, it is usually impossible to create a 4KB subwindow that
+covers MSIIR where it's normally located.
+
+Therefore, the hypervisor has to create a subwindow inside the same
+primary window used for memory, but mapped to the MSIR block (where MSIIR
+lives). The first subwindow after the end of guest memory is used for
+this. The address specified in the msi-address-64 property is the PCI
+address of MSIIR. The hypervisor configures the PAMU to map that address to
+the true physical address of MSIIR.
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt b/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt
new file mode 100644
index 000000000..c2b289988
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/pamu.txt
@@ -0,0 +1,150 @@
+Freescale Peripheral Management Access Unit (PAMU) Device Tree Binding
+
+DESCRIPTION
+
+The PAMU is an I/O MMU that provides device-to-memory access control and
+address translation capabilities.
+
+Required properties:
+
+- compatible : <string>
+ First entry is a version-specific string, such as
+ "fsl,pamu-v1.0". The second is "fsl,pamu".
+- ranges : <prop-encoded-array>
+ A standard property. Utilized to describe the memory mapped
+ I/O space utilized by the controller. The size should
+ be set to the total size of the register space of all
+ physically present PAMU controllers. For example, for
+ PAMU v1.0, on an SOC that has five PAMU devices, the size
+ is 0x5000.
+- interrupts : <prop-encoded-array>
+ Interrupt mappings. The first tuple is the normal PAMU
+ interrupt, used for reporting access violations. The second
+ is for PAMU hardware errors, such as PAMU operation errors
+ and ECC errors.
+- #address-cells: <u32>
+ A standard property.
+- #size-cells : <u32>
+ A standard property.
+
+Optional properties:
+- reg : <prop-encoded-array>
+ A standard property. It represents the CCSR registers of
+ all child PAMUs combined. Include it to provide support
+ for legacy drivers.
+- interrupt-parent : <phandle>
+ Phandle to interrupt controller
+- fsl,portid-mapping : <u32>
+ The Coherency Subdomain ID Port Mapping Registers and
+ Snoop ID Port Mapping registers, which are part of the
+ CoreNet Coherency fabric (CCF), provide a CoreNet
+ Coherency Subdomain ID/CoreNet Snoop ID to pamu mapping
+ functions. Certain bits from these registers should be
+ set if PAMUs should be snooped. This property defines
+ a bitmask which selects the bits that should be set if
+ PAMUs should be snooped.
+
+Child nodes:
+
+Each child node represents one PAMU controller. Each SOC device that is
+connected to a specific PAMU device should have a "fsl,pamu-phandle" property
+that links to the corresponding specific child PAMU controller.
+
+- reg : <prop-encoded-array>
+ A standard property. Specifies the physical address and
+ length (relative to the parent 'ranges' property) of this
+ PAMU controller's configuration registers. The size should
+ be set to the size of this PAMU controllers's register space.
+ For PAMU v1.0, this size is 0x1000.
+- fsl,primary-cache-geometry
+ : <prop-encoded-array>
+ Two cells that specify the geometry of the primary PAMU
+ cache. The first is the number of cache lines, and the
+ second is the number of "ways". For direct-mapped caches,
+ specify a value of 1.
+- fsl,secondary-cache-geometry
+ : <prop-encoded-array>
+ Two cells that specify the geometry of the secondary PAMU
+ cache. The first is the number of cache lines, and the
+ second is the number of "ways". For direct-mapped caches,
+ specify a value of 1.
+
+Device nodes:
+
+Devices that have LIODNs need to specify links to the parent PAMU controller
+(the actual PAMU controller that this device is connected to) and a pointer to
+the LIODN register, if applicable.
+
+- fsl,iommu-parent
+ : <phandle>
+ Phandle to the single, specific PAMU controller node to which
+ this device is connect. The PAMU topology is represented in
+ the device tree to assist code that dynamically determines the
+ best LIODN values to minimize PAMU cache thrashing.
+
+- fsl,liodn-reg : <prop-encoded-array>
+ Two cells that specify the location of the LIODN register
+ for this device. Required for devices that have a single
+ LIODN. The first cell is a phandle to a node that contains
+ the registers where the LIODN is to be set. The second is
+ the offset from the first "reg" resource of the node where
+ the specific LIODN register is located.
+
+
+Example:
+
+ iommu@20000 {
+ compatible = "fsl,pamu-v1.0", "fsl,pamu";
+ reg = <0x20000 0x5000>;
+ ranges = <0 0x20000 0x5000>;
+ fsl,portid-mapping = <0xf80000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupts = <
+ 24 2 0 0
+ 16 2 1 30>;
+
+ pamu0: pamu@0 {
+ reg = <0 0x1000>;
+ fsl,primary-cache-geometry = <32 1>;
+ fsl,secondary-cache-geometry = <128 2>;
+ };
+
+ pamu1: pamu@1000 {
+ reg = <0x1000 0x1000>;
+ fsl,primary-cache-geometry = <32 1>;
+ fsl,secondary-cache-geometry = <128 2>;
+ };
+
+ pamu2: pamu@2000 {
+ reg = <0x2000 0x1000>;
+ fsl,primary-cache-geometry = <32 1>;
+ fsl,secondary-cache-geometry = <128 2>;
+ };
+
+ pamu3: pamu@3000 {
+ reg = <0x3000 0x1000>;
+ fsl,primary-cache-geometry = <32 1>;
+ fsl,secondary-cache-geometry = <128 2>;
+ };
+
+ pamu4: pamu@4000 {
+ reg = <0x4000 0x1000>;
+ fsl,primary-cache-geometry = <32 1>;
+ fsl,secondary-cache-geometry = <128 2>;
+ };
+ };
+
+ guts: global-utilities@e0000 {
+ compatible = "fsl,qoriq-device-config-1.0";
+ reg = <0xe0000 0xe00>;
+ fsl,has-rstcr;
+ #sleep-cells = <1>;
+ fsl,liodn-bits = <12>;
+ };
+
+/include/ "qoriq-dma-0.dtsi"
+ dma@100300 {
+ fsl,iommu-parent = <&pamu0>;
+ fsl,liodn-reg = <&guts 0x584>; /* DMA2LIODNR */
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt b/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt
new file mode 100644
index 000000000..07256b7ff
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/pmc.txt
@@ -0,0 +1,63 @@
+* Power Management Controller
+
+Properties:
+- compatible: "fsl,<chip>-pmc".
+
+ "fsl,mpc8349-pmc" should be listed for any chip whose PMC is
+ compatible. "fsl,mpc8313-pmc" should also be listed for any chip
+ whose PMC is compatible, and implies deep-sleep capability.
+
+ "fsl,mpc8548-pmc" should be listed for any chip whose PMC is
+ compatible. "fsl,mpc8536-pmc" should also be listed for any chip
+ whose PMC is compatible, and implies deep-sleep capability.
+
+ "fsl,mpc8641d-pmc" should be listed for any chip whose PMC is
+ compatible; all statements below that apply to "fsl,mpc8548-pmc" also
+ apply to "fsl,mpc8641d-pmc".
+
+ Compatibility does not include bit assignments in SCCR/PMCDR/DEVDISR; these
+ bit assignments are indicated via the sleep specifier in each device's
+ sleep property.
+
+- reg: For devices compatible with "fsl,mpc8349-pmc", the first resource
+ is the PMC block, and the second resource is the Clock Configuration
+ block.
+
+ For devices compatible with "fsl,mpc8548-pmc", the first resource
+ is a 32-byte block beginning with DEVDISR.
+
+- interrupts: For "fsl,mpc8349-pmc"-compatible devices, the first
+ resource is the PMC block interrupt.
+
+- fsl,mpc8313-wakeup-timer: For "fsl,mpc8313-pmc"-compatible devices,
+ this is a phandle to an "fsl,gtm" node on which timer 4 can be used as
+ a wakeup source from deep sleep.
+
+Sleep specifiers:
+
+ fsl,mpc8349-pmc: Sleep specifiers consist of one cell. For each bit
+ that is set in the cell, the corresponding bit in SCCR will be saved
+ and cleared on suspend, and restored on resume. This sleep controller
+ supports disabling and resuming devices at any time.
+
+ fsl,mpc8536-pmc: Sleep specifiers consist of three cells, the third of
+ which will be ORed into PMCDR upon suspend, and cleared from PMCDR
+ upon resume. The first two cells are as described for fsl,mpc8578-pmc.
+ This sleep controller only supports disabling devices during system
+ sleep, or permanently.
+
+ fsl,mpc8548-pmc: Sleep specifiers consist of one or two cells, the
+ first of which will be ORed into DEVDISR (and the second into
+ DEVDISR2, if present -- this cell should be zero or absent if the
+ hardware does not have DEVDISR2) upon a request for permanent device
+ disabling. This sleep controller does not support configuring devices
+ to disable during system sleep (unless supported by another compatible
+ match), or dynamically.
+
+Example:
+
+ power@b00 {
+ compatible = "fsl,mpc8313-pmc", "fsl,mpc8349-pmc";
+ reg = <0xb00 0x100 0xa00 0x100>;
+ interrupts = <80 8>;
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/raideng.txt b/Documentation/devicetree/bindings/powerpc/fsl/raideng.txt
new file mode 100644
index 000000000..4ad29b9ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/raideng.txt
@@ -0,0 +1,81 @@
+* Freescale 85xx RAID Engine nodes
+
+RAID Engine nodes are defined to describe on-chip RAID accelerators. Each RAID
+Engine should have a separate node.
+
+Supported chips:
+P5020, P5040
+
+Required properties:
+
+- compatible: Should contain "fsl,raideng-v1.0" as the value
+ This identifies RAID Engine block. 1 in 1.0 represents
+ major number whereas 0 represents minor number. The
+ version matches the hardware IP version.
+- reg: offset and length of the register set for the device
+- ranges: standard ranges property specifying the translation
+ between child address space and parent address space
+
+Example:
+ /* P5020 */
+ raideng: raideng@320000 {
+ compatible = "fsl,raideng-v1.0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x320000 0x10000>;
+ ranges = <0 0x320000 0x10000>;
+ };
+
+
+There must be a sub-node for each job queue present in RAID Engine
+This node must be a sub-node of the main RAID Engine node
+
+- compatible: Should contain "fsl,raideng-v1.0-job-queue" as the value
+ This identifies the job queue interface
+- reg: offset and length of the register set for job queue
+- ranges: standard ranges property specifying the translation
+ between child address space and parent address space
+
+Example:
+ /* P5020 */
+ raideng_jq0@1000 {
+ compatible = "fsl,raideng-v1.0-job-queue";
+ reg = <0x1000 0x1000>;
+ ranges = <0x0 0x1000 0x1000>;
+ };
+
+
+There must be a sub-node for each job ring present in RAID Engine
+This node must be a sub-node of job queue node
+
+- compatible: Must contain "fsl,raideng-v1.0-job-ring" as the value
+ This identifies job ring. Should contain either
+ "fsl,raideng-v1.0-hp-ring" or "fsl,raideng-v1.0-lp-ring"
+ depending upon whether ring has high or low priority
+- reg: offset and length of the register set for job ring
+- interrupts: interrupt mapping for job ring IRQ
+
+Optional property:
+
+- fsl,liodn: Specifies the LIODN to be used for Job Ring. This
+ property is normally set by firmware. Value
+ is of 12-bits which is the LIODN number for this JR.
+ This property is used by the IOMMU (PAMU) to distinquish
+ transactions from this JR and than be able to do address
+ translation & protection accordingly.
+
+Example:
+ /* P5020 */
+ raideng_jq0@1000 {
+ compatible = "fsl,raideng-v1.0-job-queue";
+ reg = <0x1000 0x1000>;
+ ranges = <0x0 0x1000 0x1000>;
+
+ raideng_jr0: jr@0 {
+ compatible = "fsl,raideng-v1.0-job-ring", "fsl,raideng-v1.0-hp-ring";
+ reg = <0x0 0x400>;
+ interrupts = <139 2 0 0>;
+ interrupt-parent = <&mpic>;
+ fsl,liodn = <0x41>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt b/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt
new file mode 100644
index 000000000..b9a8a2bcf
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/srio-rmu.txt
@@ -0,0 +1,163 @@
+Message unit node:
+
+For SRIO controllers that implement the message unit as part of the controller
+this node is required. For devices with RMAN this node should NOT exist. The
+node is composed of three types of sub-nodes ("fsl-srio-msg-unit",
+"fsl-srio-dbell-unit" and "fsl-srio-port-write-unit").
+
+See srio.txt for more details about generic SRIO controller details.
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,srio-rmu-vX.Y", "fsl,srio-rmu".
+
+ The version X.Y should match the general SRIO controller's IP Block
+ revision register's Major(X) and Minor (Y) value.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address and
+ length of the SRIO configuration registers for message units
+ and doorbell units.
+
+ - fsl,liodn
+ Usage: optional-but-recommended (for devices with PAMU)
+ Value type: <prop-encoded-array>
+ Definition: The logical I/O device number for the PAMU (IOMMU) to be
+ correctly configured for SRIO accesses. The property should
+ not exist on devices that do not support PAMU.
+
+ The LIODN value is associated with all RMU transactions
+ (msg-unit, doorbell, port-write).
+
+Sub-Nodes for RMU: The RMU node is composed of multiple sub-nodes that
+correspond to the actual sub-controllers in the RMU. The manual for a given
+SoC will detail which and how many of these sub-controllers are implemented.
+
+Message Unit:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,srio-msg-unit-vX.Y", "fsl,srio-msg-unit".
+
+ The version X.Y should match the general SRIO controller's IP Block
+ revision register's Major(X) and Minor (Y) value.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address and
+ length of the SRIO configuration registers for message units
+ and doorbell units.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this device. The
+ value of the interrupts property consists of one interrupt
+ specifier. The format of the specifier is defined by the
+ binding document describing the node's interrupt parent.
+
+ A pair of IRQs are specified in this property. The first
+ element is associated with the transmit (TX) interrupt and the
+ second element is associated with the receive (RX) interrupt.
+
+Doorbell Unit:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include:
+ "fsl,srio-dbell-unit-vX.Y", "fsl,srio-dbell-unit"
+
+ The version X.Y should match the general SRIO controller's IP Block
+ revision register's Major(X) and Minor (Y) value.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address and
+ length of the SRIO configuration registers for message units
+ and doorbell units.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this device. The
+ value of the interrupts property consists of one interrupt
+ specifier. The format of the specifier is defined by the
+ binding document describing the node's interrupt parent.
+
+ A pair of IRQs are specified in this property. The first
+ element is associated with the transmit (TX) interrupt and the
+ second element is associated with the receive (RX) interrupt.
+
+Port-Write Unit:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include:
+ "fsl,srio-port-write-unit-vX.Y", "fsl,srio-port-write-unit"
+
+ The version X.Y should match the general SRIO controller's IP Block
+ revision register's Major(X) and Minor (Y) value.
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address and
+ length of the SRIO configuration registers for message units
+ and doorbell units.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this device. The
+ value of the interrupts property consists of one interrupt
+ specifier. The format of the specifier is defined by the
+ binding document describing the node's interrupt parent.
+
+ A single IRQ that handles port-write conditions is
+ specified by this property. (Typically shared with error).
+
+ Note: All other standard properties (see the ePAPR) are allowed
+ but are optional.
+
+Example:
+ rmu: rmu@d3000 {
+ compatible = "fsl,srio-rmu";
+ reg = <0xd3000 0x400>;
+ ranges = <0x0 0xd3000 0x400>;
+ fsl,liodn = <0xc8>;
+
+ message-unit@0 {
+ compatible = "fsl,srio-msg-unit";
+ reg = <0x0 0x100>;
+ interrupts = <
+ 60 2 0 0 /* msg1_tx_irq */
+ 61 2 0 0>;/* msg1_rx_irq */
+ };
+ message-unit@100 {
+ compatible = "fsl,srio-msg-unit";
+ reg = <0x100 0x100>;
+ interrupts = <
+ 62 2 0 0 /* msg2_tx_irq */
+ 63 2 0 0>;/* msg2_rx_irq */
+ };
+ doorbell-unit@400 {
+ compatible = "fsl,srio-dbell-unit";
+ reg = <0x400 0x80>;
+ interrupts = <
+ 56 2 0 0 /* bell_outb_irq */
+ 57 2 0 0>;/* bell_inb_irq */
+ };
+ port-write-unit@4e0 {
+ compatible = "fsl,srio-port-write-unit";
+ reg = <0x4e0 0x20>;
+ interrupts = <16 2 1 11>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/fsl/srio.txt b/Documentation/devicetree/bindings/powerpc/fsl/srio.txt
new file mode 100644
index 000000000..07abf0f2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/fsl/srio.txt
@@ -0,0 +1,103 @@
+* Freescale Serial RapidIO (SRIO) Controller
+
+RapidIO port node:
+Properties:
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: Must include "fsl,srio" for IP blocks with IP Block
+ Revision Register (SRIO IPBRR1) Major ID equal to 0x01c0.
+
+ Optionally, a compatible string of "fsl,srio-vX.Y" where X is Major
+ version in IP Block Revision Register and Y is Minor version. If this
+ compatible is provided it should be ordered before "fsl,srio".
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies the physical address and
+ length of the SRIO configuration registers. The size should
+ be set to 0x11000.
+
+ - interrupts
+ Usage: required
+ Value type: <prop_encoded-array>
+ Definition: Specifies the interrupts generated by this device. The
+ value of the interrupts property consists of one interrupt
+ specifier. The format of the specifier is defined by the
+ binding document describing the node's interrupt parent.
+
+ A single IRQ that handles error conditions is specified by this
+ property. (Typically shared with port-write).
+
+ - fsl,srio-rmu-handle:
+ Usage: required if rmu node is defined
+ Value type: <phandle>
+ Definition: A single <phandle> value that points to the RMU.
+ (See srio-rmu.txt for more details on RMU node binding)
+
+Port Child Nodes: There should a port child node for each port that exists in
+the controller. The ports are numbered starting at one (1) and should have
+the following properties:
+
+ - cell-index
+ Usage: required
+ Value type: <u32>
+ Definition: A standard property. Matches the port id.
+
+ - ranges
+ Usage: required if local access windows preset
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Utilized to describe the memory mapped
+ IO space utilized by the controller. This corresponds to the
+ setting of the local access windows that are targeted to this
+ SRIO port.
+
+ - fsl,liodn
+ Usage: optional-but-recommended (for devices with PAMU)
+ Value type: <prop-encoded-array>
+ Definition: The logical I/O device number for the PAMU (IOMMU) to be
+ correctly configured for SRIO accesses. The property should
+ not exist on devices that do not support PAMU.
+
+ For HW (ie, the P4080) that only supports a LIODN for both
+ memory and maintenance transactions then a single LIODN is
+ represented in the property for both transactions.
+
+ For HW (ie, the P304x/P5020, etc) that supports an LIODN for
+ memory transactions and a unique LIODN for maintenance
+ transactions then a pair of LIODNs are represented in the
+ property. Within the pair, the first element represents the
+ LIODN associated with memory transactions and the second element
+ represents the LIODN associated with maintenance transactions
+ for the port.
+
+Note: All other standard properties (see ePAPR) are allowed but are optional.
+
+Example:
+
+ rapidio: rapidio@ffe0c0000 {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ reg = <0xf 0xfe0c0000 0 0x11000>;
+ compatible = "fsl,srio";
+ interrupts = <16 2 1 11>; /* err_irq */
+ fsl,srio-rmu-handle = <&rmu>;
+ ranges;
+
+ port1 {
+ cell-index = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ fsl,liodn = <34>;
+ ranges = <0 0 0xc 0x20000000 0 0x10000000>;
+ };
+
+ port2 {
+ cell-index = <2>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ fsl,liodn = <48>;
+ ranges = <0 0 0xc 0x30000000 0 0x10000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/powerpc/nintendo/gamecube.txt b/Documentation/devicetree/bindings/powerpc/nintendo/gamecube.txt
new file mode 100644
index 000000000..b558585b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/nintendo/gamecube.txt
@@ -0,0 +1,109 @@
+
+Nintendo GameCube device tree
+=============================
+
+1) The "flipper" node
+
+ This node represents the multi-function "Flipper" chip, which packages
+ many of the devices found in the Nintendo GameCube.
+
+ Required properties:
+
+ - compatible : Should be "nintendo,flipper"
+
+1.a) The Video Interface (VI) node
+
+ Represents the interface between the graphics processor and a external
+ video encoder.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-vi"
+ - reg : should contain the VI registers location and length
+ - interrupts : should contain the VI interrupt
+
+1.b) The Processor Interface (PI) node
+
+ Represents the data and control interface between the main processor
+ and graphics and audio processor.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-pi"
+ - reg : should contain the PI registers location and length
+
+1.b.i) The "Flipper" interrupt controller node
+
+ Represents the interrupt controller within the "Flipper" chip.
+ The node for the "Flipper" interrupt controller must be placed under
+ the PI node.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-pic"
+
+1.c) The Digital Signal Procesor (DSP) node
+
+ Represents the digital signal processor interface, designed to offload
+ audio related tasks.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-dsp"
+ - reg : should contain the DSP registers location and length
+ - interrupts : should contain the DSP interrupt
+
+1.c.i) The Auxiliary RAM (ARAM) node
+
+ Represents the non cpu-addressable ram designed mainly to store audio
+ related information.
+ The ARAM node must be placed under the DSP node.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-aram"
+ - reg : should contain the ARAM start (zero-based) and length
+
+1.d) The Disk Interface (DI) node
+
+ Represents the interface used to communicate with mass storage devices.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-di"
+ - reg : should contain the DI registers location and length
+ - interrupts : should contain the DI interrupt
+
+1.e) The Audio Interface (AI) node
+
+ Represents the interface to the external 16-bit stereo digital-to-analog
+ converter.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-ai"
+ - reg : should contain the AI registers location and length
+ - interrupts : should contain the AI interrupt
+
+1.f) The Serial Interface (SI) node
+
+ Represents the interface to the four single bit serial interfaces.
+ The SI is a proprietary serial interface used normally to control gamepads.
+ It's NOT a RS232-type interface.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-si"
+ - reg : should contain the SI registers location and length
+ - interrupts : should contain the SI interrupt
+
+1.g) The External Interface (EXI) node
+
+ Represents the multi-channel SPI-like interface.
+
+ Required properties:
+
+ - compatible : should be "nintendo,flipper-exi"
+ - reg : should contain the EXI registers location and length
+ - interrupts : should contain the EXI interrupt
+
diff --git a/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
new file mode 100644
index 000000000..36afa322b
--- /dev/null
+++ b/Documentation/devicetree/bindings/powerpc/nintendo/wii.txt
@@ -0,0 +1,184 @@
+
+Nintendo Wii device tree
+========================
+
+0) The root node
+
+ This node represents the Nintendo Wii video game console.
+
+ Required properties:
+
+ - model : Should be "nintendo,wii"
+ - compatible : Should be "nintendo,wii"
+
+1) The "hollywood" node
+
+ This node represents the multi-function "Hollywood" chip, which packages
+ many of the devices found in the Nintendo Wii.
+
+ Required properties:
+
+ - compatible : Should be "nintendo,hollywood"
+
+1.a) The Video Interface (VI) node
+
+ Represents the interface between the graphics processor and a external
+ video encoder.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-vi","nintendo,flipper-vi"
+ - reg : should contain the VI registers location and length
+ - interrupts : should contain the VI interrupt
+
+1.b) The Processor Interface (PI) node
+
+ Represents the data and control interface between the main processor
+ and graphics and audio processor.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-pi","nintendo,flipper-pi"
+ - reg : should contain the PI registers location and length
+
+1.b.i) The "Flipper" interrupt controller node
+
+ Represents the "Flipper" interrupt controller within the "Hollywood" chip.
+ The node for the "Flipper" interrupt controller must be placed under
+ the PI node.
+
+ Required properties:
+
+ - #interrupt-cells : <1>
+ - compatible : should be "nintendo,flipper-pic"
+ - interrupt-controller
+
+1.c) The Digital Signal Procesor (DSP) node
+
+ Represents the digital signal processor interface, designed to offload
+ audio related tasks.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-dsp","nintendo,flipper-dsp"
+ - reg : should contain the DSP registers location and length
+ - interrupts : should contain the DSP interrupt
+
+1.d) The Serial Interface (SI) node
+
+ Represents the interface to the four single bit serial interfaces.
+ The SI is a proprietary serial interface used normally to control gamepads.
+ It's NOT a RS232-type interface.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-si","nintendo,flipper-si"
+ - reg : should contain the SI registers location and length
+ - interrupts : should contain the SI interrupt
+
+1.e) The Audio Interface (AI) node
+
+ Represents the interface to the external 16-bit stereo digital-to-analog
+ converter.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-ai","nintendo,flipper-ai"
+ - reg : should contain the AI registers location and length
+ - interrupts : should contain the AI interrupt
+
+1.f) The External Interface (EXI) node
+
+ Represents the multi-channel SPI-like interface.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-exi","nintendo,flipper-exi"
+ - reg : should contain the EXI registers location and length
+ - interrupts : should contain the EXI interrupt
+
+1.g) The Open Host Controller Interface (OHCI) nodes
+
+ Represent the USB 1.x Open Host Controller Interfaces.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-usb-ohci","usb-ohci"
+ - reg : should contain the OHCI registers location and length
+ - interrupts : should contain the OHCI interrupt
+
+1.h) The Enhanced Host Controller Interface (EHCI) node
+
+ Represents the USB 2.0 Enhanced Host Controller Interface.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-usb-ehci","usb-ehci"
+ - reg : should contain the EHCI registers location and length
+ - interrupts : should contain the EHCI interrupt
+
+1.i) The Secure Digital Host Controller Interface (SDHCI) nodes
+
+ Represent the Secure Digital Host Controller Interfaces.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-sdhci","sdhci"
+ - reg : should contain the SDHCI registers location and length
+ - interrupts : should contain the SDHCI interrupt
+
+1.j) The Inter-Processor Communication (IPC) node
+
+ Represent the Inter-Processor Communication interface. This interface
+ enables communications between the Broadway and the Starlet processors.
+
+ - compatible : should be "nintendo,hollywood-ipc"
+ - reg : should contain the IPC registers location and length
+ - interrupts : should contain the IPC interrupt
+
+1.k) The "Hollywood" interrupt controller node
+
+ Represents the "Hollywood" interrupt controller within the
+ "Hollywood" chip.
+
+ Required properties:
+
+ - #interrupt-cells : <1>
+ - compatible : should be "nintendo,hollywood-pic"
+ - reg : should contain the controller registers location and length
+ - interrupt-controller
+ - interrupts : should contain the cascade interrupt of the "flipper" pic
+ - interrupt-parent: should contain the phandle of the "flipper" pic
+
+1.l) The General Purpose I/O (GPIO) controller node
+
+ Represents the dual access 32 GPIO controller interface.
+
+ Required properties:
+
+ - #gpio-cells : <2>
+ - compatible : should be "nintendo,hollywood-gpio"
+ - reg : should contain the IPC registers location and length
+ - gpio-controller
+
+1.m) The control node
+
+ Represents the control interface used to setup several miscellaneous
+ settings of the "Hollywood" chip like boot memory mappings, resets,
+ disk interface mode, etc.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-control"
+ - reg : should contain the control registers location and length
+
+1.n) The Disk Interface (DI) node
+
+ Represents the interface used to communicate with mass storage devices.
+
+ Required properties:
+
+ - compatible : should be "nintendo,hollywood-di"
+ - reg : should contain the DI registers location and length
+ - interrupts : should contain the DI interrupt
+
diff --git a/Documentation/devicetree/bindings/pps/pps-gpio.txt b/Documentation/devicetree/bindings/pps/pps-gpio.txt
new file mode 100644
index 000000000..40bf9c356
--- /dev/null
+++ b/Documentation/devicetree/bindings/pps/pps-gpio.txt
@@ -0,0 +1,20 @@
+Device-Tree Bindings for a PPS Signal on GPIO
+
+These properties describe a PPS (pulse-per-second) signal connected to
+a GPIO pin.
+
+Required properties:
+- compatible: should be "pps-gpio"
+- gpios: one PPS GPIO in the format described by ../gpio/gpio.txt
+
+Optional properties:
+- assert-falling-edge: when present, assert is indicated by a falling edge
+ (instead of by a rising edge)
+
+Example:
+ pps {
+ compatible = "pps-gpio";
+ gpios = <&gpio2 6 0>;
+
+ assert-falling-edge;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
new file mode 100644
index 000000000..cfda0d57d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/atmel-hlcdc-pwm.txt
@@ -0,0 +1,29 @@
+Device-Tree bindings for Atmel's HLCDC (High-end LCD Controller) PWM driver
+
+The Atmel HLCDC PWM is subdevice of the HLCDC MFD device.
+See ../mfd/atmel-hlcdc.txt for more details.
+
+Required properties:
+ - compatible: value should be one of the following:
+ "atmel,hlcdc-pwm"
+ - pinctr-names: the pin control state names. Should contain "default".
+ - pinctrl-0: should contain the pinctrl states described by pinctrl
+ default.
+ - #pwm-cells: should be set to 3. This PWM chip use the default 3 cells
+ bindings defined in pwm.txt in this directory.
+
+Example:
+
+ hlcdc: hlcdc@f0030000 {
+ compatible = "atmel,sama5d3-hlcdc";
+ reg = <0xf0030000 0x2000>;
+ clocks = <&lcdc_clk>, <&lcdck>, <&clk32k>;
+ clock-names = "periph_clk","sys_clk", "slow_clk";
+
+ hlcdc_pwm: hlcdc-pwm {
+ compatible = "atmel,hlcdc-pwm";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_lcd_pwm>;
+ #pwm-cells = <3>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pwm/atmel-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
new file mode 100644
index 000000000..02331b904
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/atmel-pwm.txt
@@ -0,0 +1,33 @@
+Atmel PWM controller
+
+Required properties:
+ - compatible: should be one of:
+ - "atmel,at91sam9rl-pwm"
+ - "atmel,sama5d3-pwm"
+ - reg: physical base address and length of the controller's registers
+ - #pwm-cells: Should be 3. See pwm.txt in this directory for a
+ description of the cells format.
+
+Example:
+
+ pwm0: pwm@f8034000 {
+ compatible = "atmel,at91sam9rl-pwm";
+ reg = <0xf8034000 0x400>;
+ #pwm-cells = <3>;
+ };
+
+ pwmleds {
+ compatible = "pwm-leds";
+
+ d1 {
+ label = "d1";
+ pwms = <&pwm0 3 5000 0>
+ max-brightness = <255>;
+ };
+
+ d2 {
+ label = "d2";
+ pwms = <&pwm0 1 5000 1>
+ max-brightness = <255>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt b/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt
new file mode 100644
index 000000000..8031148bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/atmel-tcb-pwm.txt
@@ -0,0 +1,16 @@
+Atmel TCB PWM controller
+
+Required properties:
+- compatible: should be "atmel,tcb-pwm"
+- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format. The only third cell flag supported by this binding is
+ PWM_POLARITY_INVERTED.
+- tc-block: The Timer Counter block to use as a PWM chip.
+
+Example:
+
+pwm {
+ compatible = "atmel,tcb-pwm";
+ #pwm-cells = <3>;
+ tc-block = <1>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
new file mode 100644
index 000000000..8eae9fe78
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/brcm,kona-pwm.txt
@@ -0,0 +1,21 @@
+Broadcom Kona PWM controller device tree bindings
+
+This controller has 6 channels.
+
+Required Properties :
+- compatible: should contain "brcm,kona-pwm"
+- reg: physical base address and length of the controller's registers
+- clocks: phandle + clock specifier pair for the external clock
+- #pwm-cells: Should be 3. See pwm.txt in this directory for a
+ description of the cells format.
+
+Refer to clocks/clock-bindings.txt for generic clock consumer properties.
+
+Example:
+
+pwm: pwm@3e01a000 {
+ compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
+ reg = <0x3e01a000 0xc4>;
+ clocks = <&pwm_clk>;
+ #pwm-cells = <3>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/cirrus,clps711x-pwm.txt b/Documentation/devicetree/bindings/pwm/cirrus,clps711x-pwm.txt
new file mode 100644
index 000000000..a183db48f
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/cirrus,clps711x-pwm.txt
@@ -0,0 +1,16 @@
+* Cirris Logic CLPS711X PWM controller
+
+Required properties:
+- compatible: Shall contain "cirrus,clps711x-pwm".
+- reg: Physical base address and length of the controller's registers.
+- clocks: phandle + clock specifier pair of the PWM reference clock.
+- #pwm-cells: Should be 1. The cell specifies the index of the channel.
+
+Example:
+ pwm: pwm@80000400 {
+ compatible = "cirrus,ep7312-pwm",
+ "cirrus,clps711x-pwm";
+ reg = <0x80000400 0x4>;
+ clocks = <&clks 8>;
+ #pwm-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/img-pwm.txt b/Documentation/devicetree/bindings/pwm/img-pwm.txt
new file mode 100644
index 000000000..fade5f26f
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/img-pwm.txt
@@ -0,0 +1,24 @@
+*Imagination Technologies PWM DAC driver
+
+Required properties:
+ - compatible: Should be "img,pistachio-pwm"
+ - reg: Should contain physical base address and length of pwm registers.
+ - clocks: Must contain an entry for each entry in clock-names.
+ See ../clock/clock-bindings.txt for details.
+ - clock-names: Must include the following entries.
+ - pwm: PWM operating clock.
+ - sys: PWM system interface clock.
+ - #pwm-cells: Should be 2. See pwm.txt in this directory for the
+ description of the cells format.
+ - img,cr-periph: Must contain a phandle to the peripheral control
+ syscon node which contains PWM control registers.
+
+Example:
+ pwm: pwm@18101300 {
+ compatible = "img,pistachio-pwm";
+ reg = <0x18101300 0x100>;
+ clocks = <&pwm_clk>, <&system_clk>;
+ clock-names = "pwm", "sys";
+ #pwm-cells = <2>;
+ img,cr-periph = <&cr_periph>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/imx-pwm.txt b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
new file mode 100644
index 000000000..e00c2e9f4
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/imx-pwm.txt
@@ -0,0 +1,27 @@
+Freescale i.MX PWM controller
+
+Required properties:
+- compatible : should be "fsl,<soc>-pwm" and one of the following
+ compatible strings:
+ - "fsl,imx1-pwm" for PWM compatible with the one integrated on i.MX1
+ - "fsl,imx27-pwm" for PWM compatible with the one integrated on i.MX27
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+- interrupts: The interrupt for the pwm controller
+
+Example:
+
+pwm1: pwm@53fb4000 {
+ #pwm-cells = <2>;
+ compatible = "fsl,imx53-pwm", "fsl,imx27-pwm";
+ reg = <0x53fb4000 0x4000>;
+ clocks = <&clks IMX5_CLK_PWM1_IPG_GATE>,
+ <&clks IMX5_CLK_PWM1_HF_GATE>;
+ clock-names = "ipg", "per";
+ interrupts = <61>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/lpc32xx-pwm.txt b/Documentation/devicetree/bindings/pwm/lpc32xx-pwm.txt
new file mode 100644
index 000000000..cfe1db3bb
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/lpc32xx-pwm.txt
@@ -0,0 +1,12 @@
+LPC32XX PWM controller
+
+Required properties:
+- compatible: should be "nxp,lpc3220-pwm"
+- reg: physical base address and length of the controller's registers
+
+Examples:
+
+pwm@0x4005C000 {
+ compatible = "nxp,lpc3220-pwm";
+ reg = <0x4005C000 0x8>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.txt b/Documentation/devicetree/bindings/pwm/mxs-pwm.txt
new file mode 100644
index 000000000..96cdde5f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.txt
@@ -0,0 +1,17 @@
+Freescale MXS PWM controller
+
+Required properties:
+- compatible: should be "fsl,imx23-pwm"
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+- fsl,pwm-number: the number of PWM devices
+
+Example:
+
+pwm: pwm@80064000 {
+ compatible = "fsl,imx28-pwm", "fsl,imx23-pwm";
+ reg = <0x80064000 0x2000>;
+ #pwm-cells = <2>;
+ fsl,pwm-number = <8>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
new file mode 100644
index 000000000..c52f03b50
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/nvidia,tegra20-pwm.txt
@@ -0,0 +1,27 @@
+Tegra SoC PWFM controller
+
+Required properties:
+- compatible: For Tegra20, must contain "nvidia,tegra20-pwm". For Tegra30,
+ must contain "nvidia,tegra30-pwm". Otherwise, must contain
+ "nvidia,<chip>-pwm", plus one of the above, where <chip> is tegra114,
+ tegra124, tegra132, or tegra210.
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - pwm
+
+Example:
+
+ pwm: pwm@7000a000 {
+ compatible = "nvidia,tegra20-pwm";
+ reg = <0x7000a000 0x100>;
+ #pwm-cells = <2>;
+ clocks = <&tegra_car 17>;
+ resets = <&tegra_car 17>;
+ reset-names = "pwm";
+ };
diff --git a/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt b/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt
new file mode 100644
index 000000000..f84ec9d29
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/nxp,pca9685-pwm.txt
@@ -0,0 +1,27 @@
+NXP PCA9685 16-channel 12-bit PWM LED controller
+================================================
+
+Required properties:
+ - compatible: "nxp,pca9685-pwm"
+ - #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+ The index 16 is the ALLCALL channel, that sets all PWM channels at the same
+ time.
+
+Optional properties:
+ - invert (bool): boolean to enable inverted logic
+ - open-drain (bool): boolean to configure outputs with open-drain structure;
+ if omitted use totem-pole structure
+
+Example:
+
+For LEDs that are directly connected to the PCA, the following setting is
+applicable:
+
+pca: pca@41 {
+ compatible = "nxp,pca9685-pwm";
+ #pwm-cells = <2>;
+ reg = <0x41>;
+ invert;
+ open-drain;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
new file mode 100644
index 000000000..fb6fb31bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-bcm2835.txt
@@ -0,0 +1,30 @@
+BCM2835 PWM controller (Raspberry Pi controller)
+
+Required properties:
+- compatible: should be "brcm,bcm2835-pwm"
+- reg: physical base address and length of the controller's registers
+- clock: This clock defines the base clock frequency of the PWM hardware
+ system, the period and the duty_cycle of the PWM signal is a multiple of
+ the base period.
+- #pwm-cells: Should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+
+Examples:
+
+pwm@2020c000 {
+ compatible = "brcm,bcm2835-pwm";
+ reg = <0x2020c000 0x28>;
+ clocks = <&clk_pwm>;
+ #pwm-cells = <2>;
+};
+
+clocks {
+ ....
+ clk_pwm: pwm {
+ compatible = "fixed-clock";
+ reg = <3>;
+ #clock-cells = <0>;
+ clock-frequency = <9200000>;
+ };
+ ....
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
new file mode 100644
index 000000000..3899d6a55
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-fsl-ftm.txt
@@ -0,0 +1,52 @@
+Freescale FlexTimer Module (FTM) PWM controller
+
+The same FTM PWM device can have a different endianness on different SoCs. The
+device tree provides a property to describing this so that an operating system
+device driver can handle all variants of the device. Refer to the table below
+for the endianness of the FTM PWM block as integrated into the existing SoCs:
+
+ SoC | FTM-PWM endianness
+ --------+-------------------
+ Vybrid | LE
+ LS1 | BE
+ LS2 | LE
+
+Please see ../regmap/regmap.txt for more detail about how to specify endian
+modes in device tree.
+
+
+Required properties:
+- compatible: Should be "fsl,vf610-ftm-pwm".
+- reg: Physical base address and length of the controller's registers
+- #pwm-cells: Should be 3. See pwm.txt in this directory for a description of
+ the cells format.
+- clock-names: Should include the following module clock source entries:
+ "ftm_sys" (module clock, also can be used as counter clock),
+ "ftm_ext" (external counter clock),
+ "ftm_fix" (fixed counter clock),
+ "ftm_cnt_clk_en" (external and fixed counter clock enable/disable).
+- clocks: Must contain a phandle and clock specifier for each entry in
+ clock-names, please see clock/clock-bindings.txt for details of the property
+ values.
+- pinctrl-names: Must contain a "default" entry.
+- pinctrl-NNN: One property must exist for each entry in pinctrl-names.
+ See pinctrl/pinctrl-bindings.txt for details of the property values.
+- big-endian: Boolean property, required if the FTM PWM registers use a big-
+ endian rather than little-endian layout.
+
+Example:
+
+pwm0: pwm@40038000 {
+ compatible = "fsl,vf610-ftm-pwm";
+ reg = <0x40038000 0x1000>;
+ #pwm-cells = <3>;
+ clock-names = "ftm_sys", "ftm_ext",
+ "ftm_fix", "ftm_cnt_clk_en";
+ clocks = <&clks VF610_CLK_FTM0>,
+ <&clks VF610_CLK_FTM0_EXT_SEL>,
+ <&clks VF610_CLK_FTM0_FIX_SEL>,
+ <&clks VF610_CLK_FTM0_EXT_FIX_EN>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm0_1>;
+ big-endian;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt b/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt
new file mode 100644
index 000000000..7bd9d3b12
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-lp3943.txt
@@ -0,0 +1,58 @@
+TI/National Semiconductor LP3943 PWM controller
+
+Required properties:
+ - compatible: "ti,lp3943-pwm"
+ - #pwm-cells: Should be 2. See pwm.txt in this directory for a
+ description of the cells format.
+ Note that this hardware limits the period length to the
+ range 6250~1600000.
+ - ti,pwm0 or ti,pwm1: Output pin number(s) for PWM channel 0 or 1.
+ 0 = output 0
+ 1 = output 1
+ .
+ .
+ 15 = output 15
+
+Example:
+PWM 0 is for RGB LED brightness control
+PWM 1 is for brightness control of LP8557 backlight device
+
+&i2c3 {
+ lp3943@60 {
+ compatible = "ti,lp3943";
+ reg = <0x60>;
+
+ /*
+ * PWM 0 : output 8, 9 and 10
+ * PWM 1 : output 15
+ */
+ pwm3943: pwm {
+ compatible = "ti,lp3943-pwm";
+ #pwm-cells = <2>;
+ ti,pwm0 = <8 9 10>;
+ ti,pwm1 = <15>;
+ };
+ };
+
+};
+
+/* LEDs control with PWM 0 of LP3943 */
+pwmleds {
+ compatible = "pwm-leds";
+ rgb {
+ label = "indi::rgb";
+ pwms = <&pwm3943 0 10000>;
+ max-brightness = <255>;
+ };
+};
+
+&i2c4 {
+ /* Backlight control with PWM 1 of LP3943 */
+ backlight@2c {
+ compatible = "ti,lp8557";
+ reg = <0x2c>;
+
+ pwms = <&pwm3943 1 10000>;
+ pwm-names = "lp8557";
+ };
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
new file mode 100644
index 000000000..b8be3d09e
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-rockchip.txt
@@ -0,0 +1,20 @@
+Rockchip PWM controller
+
+Required properties:
+ - compatible: should be "rockchip,<name>-pwm"
+ "rockchip,rk2928-pwm": found on RK29XX,RK3066 and RK3188 SoCs
+ "rockchip,rk3288-pwm": found on RK3288 SoC
+ "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC
+ - reg: physical base address and length of the controller's registers
+ - clocks: phandle and clock specifier of the PWM reference clock
+ - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.txt in this directory
+ for a description of the cell format.
+
+Example:
+
+ pwm0: pwm@20030000 {
+ compatible = "rockchip,rk2928-pwm";
+ reg = <0x20030000 0x10>;
+ clocks = <&cru PCLK_PWM01>;
+ #pwm-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-samsung.txt b/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
new file mode 100644
index 000000000..5538de9c2
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-samsung.txt
@@ -0,0 +1,51 @@
+* Samsung PWM timers
+
+Samsung SoCs contain PWM timer blocks which can be used for system clock source
+and clock event timers, as well as to drive SoC outputs with PWM signal. Each
+PWM timer block provides 5 PWM channels (not all of them can drive physical
+outputs - see SoC and board manual).
+
+Be aware that the clocksource driver supports only uniprocessor systems.
+
+Required properties:
+- compatible : should be one of following:
+ samsung,s3c2410-pwm - for 16-bit timers present on S3C24xx SoCs
+ samsung,s3c6400-pwm - for 32-bit timers present on S3C64xx SoCs
+ samsung,s5p6440-pwm - for 32-bit timers present on S5P64x0 SoCs
+ samsung,s5pc100-pwm - for 32-bit timers present on S5PC100, S5PV210,
+ Exynos4210 rev0 SoCs
+ samsung,exynos4210-pwm - for 32-bit timers present on Exynos4210,
+ Exynos4x12, Exynos5250 and Exynos5420 SoCs
+- reg: base address and size of register area
+- interrupts: list of timer interrupts (one interrupt per timer, starting at
+ timer 0)
+- clock-names: should contain all following required clock names:
+ - "timers" - PWM base clock used to generate PWM signals,
+ and any subset of following optional clock names:
+ - "pwm-tclk0" - first external PWM clock source,
+ - "pwm-tclk1" - second external PWM clock source.
+ Note that not all IP variants allow using all external clock sources.
+ Refer to SoC documentation to learn which clock source configurations
+ are available.
+- clocks: should contain clock specifiers of all clocks, which input names
+ have been specified in clock-names property, in same order.
+- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format. The only third cell flag supported by this binding is
+ PWM_POLARITY_INVERTED.
+
+Optional properties:
+- samsung,pwm-outputs: list of PWM channels used as PWM outputs on particular
+ platform - an array of up to 5 elements being indices of PWM channels
+ (from 0 to 4), the order does not matter.
+
+Example:
+ pwm@7f006000 {
+ compatible = "samsung,s3c6400-pwm";
+ reg = <0x7f006000 0x1000>;
+ interrupt-parent = <&vic0>;
+ interrupts = <23>, <24>, <25>, <27>, <28>;
+ clocks = <&clock 67>;
+ clock-names = "timers";
+ samsung,pwm-outputs = <0>, <1>;
+ #pwm-cells = <3>;
+ }
diff --git a/Documentation/devicetree/bindings/pwm/pwm-st.txt b/Documentation/devicetree/bindings/pwm/pwm-st.txt
new file mode 100644
index 000000000..84d2fb807
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-st.txt
@@ -0,0 +1,41 @@
+STMicroelectronics PWM driver bindings
+--------------------------------------
+
+Required parameters:
+- compatible : "st,pwm"
+- #pwm-cells : Number of cells used to specify a PWM. First cell
+ specifies the per-chip index of the PWM to use and the
+ second cell is the period in nanoseconds - fixed to 2
+ for STiH41x.
+- reg : Physical base address and length of the controller's
+ registers.
+- pinctrl-names: Set to "default".
+- pinctrl-0: List of phandles pointing to pin configuration nodes
+ for PWM module.
+ For Pinctrl properties, please refer to [1].
+- clock-names: Set to "pwm".
+- clocks: phandle of the clock used by the PWM module.
+ For Clk properties, please refer to [2].
+
+Optional properties:
+- st,pwm-num-chan: Number of available channels. If not passed, the driver
+ will consider single channel by default.
+
+[1] Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+[2] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Example:
+
+pwm1: pwm@fe510000 {
+ compatible = "st,pwm";
+ reg = <0xfe510000 0x68>;
+ #pwm-cells = <2>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pwm1_chan0_default
+ &pinctrl_pwm1_chan1_default
+ &pinctrl_pwm1_chan2_default
+ &pinctrl_pwm1_chan3_default>;
+ clocks = <&clk_sysin>;
+ clock-names = "pwm";
+ st,pwm-num-chan = <4>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
new file mode 100644
index 000000000..ae0273e19
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-sun4i.txt
@@ -0,0 +1,20 @@
+Allwinner sun4i and sun7i SoC PWM controller
+
+Required properties:
+ - compatible: should be one of:
+ - "allwinner,sun4i-a10-pwm"
+ - "allwinner,sun7i-a20-pwm"
+ - reg: physical base address and length of the controller's registers
+ - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format.
+ - clocks: From common clock binding, handle to the parent clock.
+
+Example:
+
+ pwm: pwm@01c20e00 {
+ compatible = "allwinner,sun7i-a20-pwm";
+ reg = <0x01c20e00 0xc>;
+ clocks = <&osc24M>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
new file mode 100644
index 000000000..fb81179dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiecap.txt
@@ -0,0 +1,29 @@
+TI SOC ECAP based APWM controller
+
+Required properties:
+- compatible: Must be "ti,<soc>-ecap".
+ for am33xx - compatible = "ti,am33xx-ecap";
+ for da850 - compatible = "ti,da850-ecap", "ti,am33xx-ecap";
+- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format. The PWM channel index ranges from 0 to 4. The only third
+ cell flag supported by this binding is PWM_POLARITY_INVERTED.
+- reg: physical base address and size of the registers map.
+
+Optional properties:
+- ti,hwmods: Name of the hwmod associated to the ECAP:
+ "ecap<x>", <x> being the 0-based instance number from the HW spec
+
+Example:
+
+ecap0: ecap@0 { /* ECAP on am33xx */
+ compatible = "ti,am33xx-ecap";
+ #pwm-cells = <3>;
+ reg = <0x48300100 0x80>;
+ ti,hwmods = "ecap0";
+};
+
+ecap0: ecap@0 { /* ECAP on da850 */
+ compatible = "ti,da850-ecap", "ti,am33xx-ecap";
+ #pwm-cells = <3>;
+ reg = <0x306000 0x80>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
new file mode 100644
index 000000000..9c100b2c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-tiehrpwm.txt
@@ -0,0 +1,29 @@
+TI SOC EHRPWM based PWM controller
+
+Required properties:
+- compatible: Must be "ti,<soc>-ehrpwm".
+ for am33xx - compatible = "ti,am33xx-ehrpwm";
+ for da850 - compatible = "ti,da850-ehrpwm", "ti,am33xx-ehrpwm";
+- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format. The only third cell flag supported by this binding is
+ PWM_POLARITY_INVERTED.
+- reg: physical base address and size of the registers map.
+
+Optional properties:
+- ti,hwmods: Name of the hwmod associated to the EHRPWM:
+ "ehrpwm<x>", <x> being the 0-based instance number from the HW spec
+
+Example:
+
+ehrpwm0: ehrpwm@0 { /* EHRPWM on am33xx */
+ compatible = "ti,am33xx-ehrpwm";
+ #pwm-cells = <3>;
+ reg = <0x48300200 0x100>;
+ ti,hwmods = "ehrpwm0";
+};
+
+ehrpwm0: ehrpwm@0 { /* EHRPWM on da850 */
+ compatible = "ti,da850-ehrpwm", "ti,am33xx-ehrpwm";
+ #pwm-cells = <3>;
+ reg = <0x300000 0x2000>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt b/Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt
new file mode 100644
index 000000000..f7eae77f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm-tipwmss.txt
@@ -0,0 +1,31 @@
+TI SOC based PWM Subsystem
+
+Required properties:
+- compatible: Must be "ti,am33xx-pwmss";
+- reg: physical base address and size of the registers map.
+- address-cells: Specify the number of u32 entries needed in child nodes.
+ Should set to 1.
+- size-cells: specify number of u32 entries needed to specify child nodes size
+ in reg property. Should set to 1.
+- ranges: describes the address mapping of a memory-mapped bus. Should set to
+ physical address map of child's base address, physical address within
+ parent's address space and length of the address map. For am33xx,
+ 3 set of child register maps present, ECAP register space, EQEP
+ register space, EHRPWM register space.
+
+Also child nodes should also populated under PWMSS DT node.
+
+Example:
+pwmss0: pwmss@48300000 {
+ compatible = "ti,am33xx-pwmss";
+ reg = <0x48300000 0x10>;
+ ti,hwmods = "epwmss0";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ status = "disabled";
+ ranges = <0x48300100 0x48300100 0x80 /* ECAP */
+ 0x48300180 0x48300180 0x80 /* EQEP */
+ 0x48300200 0x48300200 0x80>; /* EHRPWM */
+
+ /* child nodes go here */
+};
diff --git a/Documentation/devicetree/bindings/pwm/pwm.txt b/Documentation/devicetree/bindings/pwm/pwm.txt
new file mode 100644
index 000000000..8556263b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pwm.txt
@@ -0,0 +1,69 @@
+Specifying PWM information for devices
+======================================
+
+1) PWM user nodes
+-----------------
+
+PWM users should specify a list of PWM devices that they want to use
+with a property containing a 'pwm-list':
+
+ pwm-list ::= <single-pwm> [pwm-list]
+ single-pwm ::= <pwm-phandle> <pwm-specifier>
+ pwm-phandle : phandle to PWM controller node
+ pwm-specifier : array of #pwm-cells specifying the given PWM
+ (controller specific)
+
+PWM properties should be named "pwms". The exact meaning of each pwms
+property must be documented in the device tree binding for each device.
+An optional property "pwm-names" may contain a list of strings to label
+each of the PWM devices listed in the "pwms" property. If no "pwm-names"
+property is given, the name of the user node will be used as fallback.
+
+Drivers for devices that use more than a single PWM device can use the
+"pwm-names" property to map the name of the PWM device requested by the
+pwm_get() call to an index into the list given by the "pwms" property.
+
+The following example could be used to describe a PWM-based backlight
+device:
+
+ pwm: pwm {
+ #pwm-cells = <2>;
+ };
+
+ [...]
+
+ bl: backlight {
+ pwms = <&pwm 0 5000000>;
+ pwm-names = "backlight";
+ };
+
+Note that in the example above, specifying the "pwm-names" is redundant
+because the name "backlight" would be used as fallback anyway.
+
+pwm-specifier typically encodes the chip-relative PWM number and the PWM
+period in nanoseconds.
+
+Optionally, the pwm-specifier can encode a number of flags (defined in
+<dt-bindings/pwm/pwm.h>) in a third cell:
+- PWM_POLARITY_INVERTED: invert the PWM signal polarity
+
+Example with optional PWM specifier for inverse polarity
+
+ bl: backlight {
+ pwms = <&pwm 0 5000000 PWM_POLARITY_INVERTED>;
+ pwm-names = "backlight";
+ };
+
+2) PWM controller nodes
+-----------------------
+
+PWM controller nodes must specify the number of cells used for the
+specifier using the '#pwm-cells' property.
+
+An example PWM controller might look like this:
+
+ pwm: pwm@7000a000 {
+ compatible = "nvidia,tegra20-pwm";
+ reg = <0x7000a000 0x100>;
+ #pwm-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/pxa-pwm.txt b/Documentation/devicetree/bindings/pwm/pxa-pwm.txt
new file mode 100644
index 000000000..5ae9f1e3c
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/pxa-pwm.txt
@@ -0,0 +1,30 @@
+Marvell PWM controller
+
+Required properties:
+- compatible: should be one or more of:
+ - "marvell,pxa250-pwm"
+ - "marvell,pxa270-pwm"
+ - "marvell,pxa168-pwm"
+ - "marvell,pxa910-pwm"
+- reg: Physical base address and length of the registers used by the PWM channel
+ Note that one device instance must be created for each PWM that is used, so the
+ length covers only the register window for one PWM output, not that of the
+ entire PWM controller. Currently length is 0x10 for all supported devices.
+- #pwm-cells: Should be 1. This cell is used to specify the period in
+ nanoseconds.
+
+Example PWM device node:
+
+pwm0: pwm@40b00000 {
+ compatible = "marvell,pxa250-pwm";
+ reg = <0x40b00000 0x10>;
+ #pwm-cells = <1>;
+};
+
+Example PWM client node:
+
+backlight {
+ compatible = "pwm-backlight";
+ pwms = <&pwm0 5000000>;
+ ...
+}
diff --git a/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
new file mode 100644
index 000000000..b067e84a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/renesas,tpu-pwm.txt
@@ -0,0 +1,28 @@
+* Renesas R-Car Timer Pulse Unit PWM Controller
+
+Required Properties:
+
+ - compatible: should be one of the following.
+ - "renesas,tpu-r8a73a4": for R8A77A4 (R-Mobile APE6) compatible PWM controller.
+ - "renesas,tpu-r8a7740": for R8A7740 (R-Mobile A1) compatible PWM controller.
+ - "renesas,tpu-r8a7790": for R8A7790 (R-Car H2) compatible PWM controller.
+ - "renesas,tpu-sh7372": for SH7372 (SH-Mobile AP4) compatible PWM controller.
+ - "renesas,tpu": for generic R-Car TPU PWM controller.
+
+ - reg: Base address and length of each memory resource used by the PWM
+ controller hardware module.
+
+ - #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format. The only third cell flag supported by this binding is
+ PWM_POLARITY_INVERTED.
+
+Please refer to pwm.txt in this directory for details of the common PWM bindings
+used by client devices.
+
+Example: R8A7740 (R-Car A1) TPU controller node
+
+ tpu: pwm@e6600000 {
+ compatible = "renesas,tpu-r8a7740", "renesas,tpu";
+ reg = <0xe6600000 0x100>;
+ #pwm-cells = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/spear-pwm.txt b/Documentation/devicetree/bindings/pwm/spear-pwm.txt
new file mode 100644
index 000000000..b486de2c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/spear-pwm.txt
@@ -0,0 +1,17 @@
+== ST SPEAr SoC PWM controller ==
+
+Required properties:
+- compatible: should be one of:
+ - "st,spear320-pwm"
+ - "st,spear1340-pwm"
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+
+Example:
+
+ pwm: pwm@a8000000 {
+ compatible ="st,spear320-pwm";
+ reg = <0xa8000000 0x1000>;
+ #pwm-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt b/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt
new file mode 100644
index 000000000..4e32bee11
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/ti,twl-pwm.txt
@@ -0,0 +1,17 @@
+Texas Instruments TWL series PWM drivers
+
+Supported PWMs:
+On TWL4030 series: PWM1 and PWM2
+On TWL6030 series: PWM0 and PWM1
+
+Required properties:
+- compatible: "ti,twl4030-pwm" or "ti,twl6030-pwm"
+- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+
+Example:
+
+twl_pwm: pwm {
+ compatible = "ti,twl6030-pwm";
+ #pwm-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt b/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt
new file mode 100644
index 000000000..9f4b46090
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/ti,twl-pwmled.txt
@@ -0,0 +1,17 @@
+Texas Instruments TWL series PWM drivers connected to LED terminals
+
+Supported PWMs:
+On TWL4030 series: PWMA and PWMB (connected to LEDA and LEDB terminals)
+On TWL6030 series: LED PWM (mainly used as charging indicator LED)
+
+Required properties:
+- compatible: "ti,twl4030-pwmled" or "ti,twl6030-pwmled"
+- #pwm-cells: should be 2. See pwm.txt in this directory for a description of
+ the cells format.
+
+Example:
+
+twl_pwmled: pwmled {
+ compatible = "ti,twl6030-pwmled";
+ #pwm-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
new file mode 100644
index 000000000..a76390e6d
--- /dev/null
+++ b/Documentation/devicetree/bindings/pwm/vt8500-pwm.txt
@@ -0,0 +1,18 @@
+VIA/Wondermedia VT8500/WM8xxx series SoC PWM controller
+
+Required properties:
+- compatible: should be "via,vt8500-pwm"
+- reg: physical base address and length of the controller's registers
+- #pwm-cells: should be 3. See pwm.txt in this directory for a description of
+ the cells format. The only third cell flag supported by this binding is
+ PWM_POLARITY_INVERTED.
+- clocks: phandle to the PWM source clock
+
+Example:
+
+pwm1: pwm@d8220000 {
+ #pwm-cells = <3>;
+ compatible = "via,vt8500-pwm";
+ reg = <0xd8220000 0x1000>;
+ clocks = <&clkpwm>;
+};
diff --git a/Documentation/devicetree/bindings/regmap/regmap.txt b/Documentation/devicetree/bindings/regmap/regmap.txt
new file mode 100644
index 000000000..b494f8b8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regmap/regmap.txt
@@ -0,0 +1,47 @@
+Device-Tree binding for regmap
+
+The endianness mode of CPU & Device scenarios:
+Index Device Endianness properties
+---------------------------------------------------
+1 BE 'big-endian'
+2 LE 'little-endian'
+
+For one device driver, which will run in different scenarios above
+on different SoCs using the devicetree, we need one way to simplify
+this.
+
+Required properties:
+- {big,little}-endian: these are boolean properties, if absent
+ meaning that the CPU and the Device are in the same endianness mode,
+ these properties are for register values and all the buffers only.
+
+Examples:
+Scenario 1 : CPU in LE mode & device in LE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+};
+
+Scenario 2 : CPU in LE mode & device in BE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+ big-endian;
+};
+
+Scenario 3 : CPU in BE mode & device in BE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+};
+
+Scenario 4 : CPU in BE mode & device in LE mode.
+dev: dev@40031000 {
+ compatible = "name";
+ reg = <0x40031000 0x1000>;
+ ...
+ little-endian;
+};
diff --git a/Documentation/devicetree/bindings/regulator/88pm800.txt b/Documentation/devicetree/bindings/regulator/88pm800.txt
new file mode 100644
index 000000000..e8a54c2a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/88pm800.txt
@@ -0,0 +1,38 @@
+Marvell 88PM800 regulator
+
+Required properties:
+- compatible: "marvell,88pm800"
+- reg: I2C slave address
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name (or the deprecated
+ regulator-compatible property if present), with valid values listed below.
+ The content of each sub-node is defined by the standard binding for
+ regulators; see regulator.txt.
+
+The valid names for regulators are:
+
+ buck1, buck2, buck3, buck4, buck5, ldo1, ldo2, ldo3, ldo4, ldo5, ldo6, ldo7,
+ ldo8, ldo9, ldo10, ldo11, ldo12, ldo13, ldo14, ldo15, ldo16, ldo17, ldo18, ldo19
+
+Example:
+
+ pmic: 88pm800@31 {
+ compatible = "marvell,88pm800";
+ reg = <0x31>;
+
+ regulators {
+ buck1 {
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <3950000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ ldo1 {
+ regulator-min-microvolt = <600000>;
+ regulator-max-microvolt = <15000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+...
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/88pm860x.txt b/Documentation/devicetree/bindings/regulator/88pm860x.txt
new file mode 100644
index 000000000..1267b3e1a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/88pm860x.txt
@@ -0,0 +1,30 @@
+Marvell 88PM860x regulator
+
+Required properties:
+- compatible: "marvell,88pm860x"
+- reg: I2C slave address
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the regulator-compatible
+ property, with valid values listed below.
+
+Example:
+
+ pmic: 88pm860x@34 {
+ compatible = "marvell,88pm860x";
+ reg = <0x34>;
+
+ regulators {
+ BUCK1 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ BUCK3 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
new file mode 100644
index 000000000..e91485d11
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
@@ -0,0 +1,92 @@
+ACT88xx regulators
+-------------------
+
+Required properties:
+- compatible: "active-semi,act8846" or "active-semi,act8865" or "active-semi,act8600"
+- reg: I2C slave address
+
+Optional properties:
+- system-power-controller: Telling whether or not this pmic is controlling
+ the system power. See Documentation/devicetree/bindings/power/power-controller.txt .
+
+Optional input supply properties:
+- for act8600:
+ - vp1-supply: The input supply for DCDC_REG1
+ - vp2-supply: The input supply for DCDC_REG2
+ - vp3-supply: The input supply for DCDC_REG3
+ - inl-supply: The input supply for LDO_REG5, LDO_REG6, LDO_REG7 and LDO_REG8
+ SUDCDC_REG4, LDO_REG9 and LDO_REG10 do not have separate supplies.
+- for act8846:
+ - vp1-supply: The input supply for REG1
+ - vp2-supply: The input supply for REG2
+ - vp3-supply: The input supply for REG3
+ - vp4-supply: The input supply for REG4
+ - inl1-supply: The input supply for REG5, REG6 and REG7
+ - inl2-supply: The input supply for REG8 and LDO_REG9
+ - inl3-supply: The input supply for REG10, REG11 and REG12
+- for act8865:
+ - vp1-supply: The input supply for DCDC_REG1
+ - vp2-supply: The input supply for DCDC_REG2
+ - vp3-supply: The input supply for DCDC_REG3
+ - inl45-supply: The input supply for LDO_REG1 and LDO_REG2
+ - inl67-supply: The input supply for LDO_REG3 and LDO_REG4
+
+Any standard regulator properties can be used to configure the single regulator.
+
+The valid names for regulators are:
+ - for act8846:
+ REG1, REG2, REG3, REG4, REG5, REG6, REG7, REG8, REG9, REG10, REG11, REG12
+ - for act8865:
+ DCDC_REG1, DCDC_REG2, DCDC_REG3, LDO_REG1, LDO_REG2, LDO_REG3, LDO_REG4.
+ - for act8600:
+ DCDC_REG1, DCDC_REG2, DCDC_REG3, SUDCDC_REG4, LDO_REG5, LDO_REG6, LDO_REG7,
+ LDO_REG8, LDO_REG9, LDO_REG10,
+
+Example:
+--------
+
+ i2c1: i2c@f0018000 {
+ pmic: act8865@5b {
+ compatible = "active-semi,act8865";
+ reg = <0x5b>;
+ status = "disabled";
+
+ regulators {
+ vcc_1v8_reg: DCDC_REG1 {
+ regulator-name = "VCC_1V8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+
+ vcc_1v2_reg: DCDC_REG2 {
+ regulator-name = "VCC_1V2";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-suspend-mem-microvolt = <1150000>;
+ regulator-suspend-standby-microvolt = <1150000>;
+ regulator-always-on;
+ };
+
+ vcc_3v3_reg: DCDC_REG3 {
+ regulator-name = "VCC_3V3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vddana_reg: LDO_REG1 {
+ regulator-name = "VDDANA";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vddfuse_reg: LDO_REG2 {
+ regulator-name = "FUSE_2V5";
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/anatop-regulator.txt b/Documentation/devicetree/bindings/regulator/anatop-regulator.txt
new file mode 100644
index 000000000..758eae240
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/anatop-regulator.txt
@@ -0,0 +1,37 @@
+Anatop Voltage regulators
+
+Required properties:
+- compatible: Must be "fsl,anatop-regulator"
+- anatop-reg-offset: Anatop MFD register offset
+- anatop-vol-bit-shift: Bit shift for the register
+- anatop-vol-bit-width: Number of bits used in the register
+- anatop-min-bit-val: Minimum value of this register
+- anatop-min-voltage: Minimum voltage of this regulator
+- anatop-max-voltage: Maximum voltage of this regulator
+
+Optional properties:
+- anatop-delay-reg-offset: Anatop MFD step time register offset
+- anatop-delay-bit-shift: Bit shift for the step time register
+- anatop-delay-bit-width: Number of bits used in the step time register
+
+Any property defined as part of the core regulator
+binding, defined in regulator.txt, can also be used.
+
+Example:
+
+ regulator-vddpu {
+ compatible = "fsl,anatop-regulator";
+ regulator-name = "vddpu";
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-always-on;
+ anatop-reg-offset = <0x140>;
+ anatop-vol-bit-shift = <9>;
+ anatop-vol-bit-width = <5>;
+ anatop-delay-reg-offset = <0x170>;
+ anatop-delay-bit-shift = <24>;
+ anatop-delay-bit-width = <2>;
+ anatop-min-bit-val = <1>;
+ anatop-min-voltage = <725000>;
+ anatop-max-voltage = <1300000>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/as3722-regulator.txt b/Documentation/devicetree/bindings/regulator/as3722-regulator.txt
new file mode 100644
index 000000000..caad0c8a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/as3722-regulator.txt
@@ -0,0 +1,91 @@
+Regulator of AMS AS3722 PMIC.
+Name of the regulator subnode must be "regulators".
+
+Optional properties:
+--------------------
+The input supply of regulators are the optional properties on the
+regulator node. The AS3722 is having 7 DCDC step-down regulators as
+sd[0-6], 10 LDOs as ldo[0-7], ldo[9-11]. The input supply of these
+regulators are provided through following properties:
+vsup-sd2-supply: Input supply for SD2.
+vsup-sd3-supply: Input supply for SD3.
+vsup-sd4-supply: Input supply for SD4.
+vsup-sd5-supply: Input supply for SD5.
+vin-ldo0-supply: Input supply for LDO0.
+vin-ldo1-6-supply: Input supply for LDO1 and LDO6.
+vin-ldo2-5-7-supply: Input supply for LDO2, LDO5 and LDO7.
+vin-ldo3-4-supply: Input supply for LDO3 and LDO4.
+vin-ldo9-10-supply: Input supply for LDO9 and LDO10.
+vin-ldo11-supply: Input supply for LDO11.
+
+Optional nodes:
+--------------
+- regulators : Must contain a sub-node per regulator from the list below.
+ Each sub-node should contain the constraints and initialization
+ information for that regulator. See regulator.txt for a
+ description of standard properties for these sub-nodes.
+ Additional custom properties are listed below.
+ sd[0-6], ldo[0-7], ldo[9-11].
+
+ Optional sub-node properties:
+ ----------------------------
+ ams,ext-control: External control of the rail. The option of
+ this properties will tell which external input is
+ controlling this rail. Valid values are 0, 1, 2 ad 3.
+ 0: There is no external control of this rail.
+ 1: Rail is controlled by ENABLE1 input pin.
+ 2: Rail is controlled by ENABLE2 input pin.
+ 3: Rail is controlled by ENABLE3 input pin.
+ ams,enable-tracking: Enable tracking with SD1, only supported
+ by LDO3.
+
+Example:
+-------
+ ams3722: ams3722 {
+ compatible = "ams,as3722";
+ reg = <0x40>;
+ ...
+
+ regulators {
+ vsup-sd2-supply = <...>;
+ ...
+
+ sd0 {
+ regulator-name = "vdd_cpu";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ ams,ext-control = <2>;
+ };
+
+ sd1 {
+ regulator-name = "vdd_core";
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ ams,ext-control = <1>;
+ };
+
+ sd2 {
+ regulator-name = "vddio_ddr";
+ regulator-min-microvolt = <1350000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ };
+
+ sd4 {
+ regulator-name = "avdd-hdmi-pex";
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-always-on;
+ };
+
+ sd5 {
+ regulator-name = "vdd-1v8";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-always-on;
+ };
+ ....
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/da9210.txt b/Documentation/devicetree/bindings/regulator/da9210.txt
new file mode 100644
index 000000000..3297c53cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/da9210.txt
@@ -0,0 +1,21 @@
+* Dialog Semiconductor DA9210 Voltage Regulator
+
+Required properties:
+
+- compatible: must be "dlg,da9210"
+- reg: the i2c slave address of the regulator. It should be 0x68.
+
+Any standard regulator properties can be used to configure the single da9210
+DCDC.
+
+Example:
+
+ da9210@68 {
+ compatible = "dlg,da9210";
+ reg = <0x68>;
+
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt
new file mode 100644
index 000000000..eb618907c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/da9211.txt
@@ -0,0 +1,68 @@
+* Dialog Semiconductor DA9211/DA9213 Voltage Regulator
+
+Required properties:
+- compatible: "dlg,da9211" or "dlg,da9213".
+- reg: I2C slave address, usually 0x68.
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name, with valid
+ values listed below. The content of each sub-node is defined by the
+ standard binding for regulators; see regulator.txt.
+ BUCKA and BUCKB.
+
+Optional properties:
+- enable-gpios: platform gpio for control of BUCKA/BUCKB.
+- Any optional property defined in regulator.txt
+
+Example 1) DA9211
+
+ pmic: da9211@68 {
+ compatible = "dlg,da9211";
+ reg = <0x68>;
+ interrupts = <3 27>;
+
+ regulators {
+ BUCKA {
+ regulator-name = "VBUCKA";
+ regulator-min-microvolt = < 300000>;
+ regulator-max-microvolt = <1570000>;
+ regulator-min-microamp = <2000000>;
+ regulator-max-microamp = <5000000>;
+ enable-gpios = <&gpio 27 0>;
+ };
+ BUCKB {
+ regulator-name = "VBUCKB";
+ regulator-min-microvolt = < 300000>;
+ regulator-max-microvolt = <1570000>;
+ regulator-min-microamp = <2000000>;
+ regulator-max-microamp = <5000000>;
+ enable-gpios = <&gpio 17 0>;
+ };
+ };
+ };
+
+Example 2) DA9213
+ pmic: da9213@68 {
+ compatible = "dlg,da9213";
+ reg = <0x68>;
+ interrupts = <3 27>;
+
+ regulators {
+ BUCKA {
+ regulator-name = "VBUCKA";
+ regulator-min-microvolt = < 300000>;
+ regulator-max-microvolt = <1570000>;
+ regulator-min-microamp = <3000000>;
+ regulator-max-microamp = <6000000>;
+ enable-gpios = <&gpio 27 0>;
+ };
+ BUCKB {
+ regulator-name = "VBUCKB";
+ regulator-min-microvolt = < 300000>;
+ regulator-max-microvolt = <1570000>;
+ regulator-min-microamp = <3000000>;
+ regulator-max-microamp = <6000000>;
+ enable-gpios = <&gpio 17 0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/fan53555.txt b/Documentation/devicetree/bindings/regulator/fan53555.txt
new file mode 100644
index 000000000..54a3f2c80
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/fan53555.txt
@@ -0,0 +1,23 @@
+Binding for Fairchild FAN53555 regulators
+
+Required properties:
+ - compatible: one of "fcs,fan53555", "silergy,syr827", "silergy,syr828"
+ - reg: I2C address
+
+Optional properties:
+ - fcs,suspend-voltage-selector: declare which of the two available
+ voltage selector registers should be used for the suspend
+ voltage. The other one is used for the runtime voltage setting
+ Possible values are either <0> or <1>
+ - vin-supply: regulator supplying the vin pin
+
+Example:
+
+ regulator@40 {
+ compatible = "fcs,fan53555";
+ regulator-name = "fan53555";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1800000>;
+ vin-supply = <&parent_reg>;
+ fcs,suspend-voltage-selector = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/fixed-regulator.txt b/Documentation/devicetree/bindings/regulator/fixed-regulator.txt
new file mode 100644
index 000000000..4fae41d54
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/fixed-regulator.txt
@@ -0,0 +1,34 @@
+Fixed Voltage regulators
+
+Required properties:
+- compatible: Must be "regulator-fixed";
+
+Optional properties:
+- gpio: gpio to use for enable control
+- startup-delay-us: startup time in microseconds
+- enable-active-high: Polarity of GPIO is Active high
+If this property is missing, the default assumed is Active low.
+- gpio-open-drain: GPIO is open drain type.
+ If this property is missing then default assumption is false.
+-vin-supply: Input supply name.
+
+Any property defined as part of the core regulator
+binding, defined in regulator.txt, can also be used.
+However a fixed voltage regulator is expected to have the
+regulator-min-microvolt and regulator-max-microvolt
+to be the same.
+
+Example:
+
+ abc: fixedregulator@0 {
+ compatible = "regulator-fixed";
+ regulator-name = "fixed-supply";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ gpio = <&gpio1 16 0>;
+ startup-delay-us = <70000>;
+ enable-active-high;
+ regulator-boot-on;
+ gpio-open-drain;
+ vin-supply = <&parent_reg>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/gpio-regulator.txt b/Documentation/devicetree/bindings/regulator/gpio-regulator.txt
new file mode 100644
index 000000000..e5cac1e0c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/gpio-regulator.txt
@@ -0,0 +1,41 @@
+GPIO controlled regulators
+
+Required properties:
+- compatible : Must be "regulator-gpio".
+- states : Selection of available voltages and GPIO configs.
+ if there are no states, then use a fixed regulator
+
+Optional properties:
+- enable-gpio : GPIO to use to enable/disable the regulator.
+- gpios : GPIO group used to control voltage.
+- gpios-states : gpios pin's initial states array. 0: LOW, 1: HIGH.
+ defualt is LOW if nothing is specified.
+- startup-delay-us : Startup time in microseconds.
+- enable-active-high : Polarity of GPIO is active high (default is low).
+- regulator-type : Specifies what is being regulated, must be either
+ "voltage" or "current", defaults to current.
+
+Any property defined as part of the core regulator binding defined in
+regulator.txt can also be used.
+
+Example:
+
+ mmciv: gpio-regulator {
+ compatible = "regulator-gpio";
+
+ regulator-name = "mmci-gpio-supply";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <2600000>;
+ regulator-boot-on;
+
+ enable-gpio = <&gpio0 23 0x4>;
+ gpios = <&gpio0 24 0x4
+ &gpio0 25 0x4>;
+ states = <1800000 0x3
+ 2200000 0x2
+ 2600000 0x1
+ 2900000 0x0>;
+
+ startup-delay-us = <100000>;
+ enable-active-high;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/isl9305.txt b/Documentation/devicetree/bindings/regulator/isl9305.txt
new file mode 100644
index 000000000..d6e7c9ec9
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/isl9305.txt
@@ -0,0 +1,36 @@
+Intersil ISL9305/ISL9305H voltage regulator
+
+Required properties:
+
+- compatible: "isil,isl9305" or "isil,isl9305h"
+- reg: I2C slave address, usually 0x68.
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name, with valid
+ values being "dcd1", "dcd2", "ldo1" and "ldo2". The content of each sub-node
+ is defined by the standard binding for regulators; see regulator.txt.
+- VINDCD1-supply: A phandle to a regulator node supplying VINDCD1.
+ VINDCD2-supply: A phandle to a regulator node supplying VINDCD2.
+ VINLDO1-supply: A phandle to a regulator node supplying VINLDO1.
+ VINLDO2-supply: A phandle to a regulator node supplying VINLDO2.
+
+Optional properties:
+- Per-regulator optional properties are defined in regulator.txt
+
+Example
+
+ pmic: isl9305@68 {
+ compatible = "isil,isl9305";
+ reg = <0x68>;
+
+ VINDCD1-supply = <&system_power>;
+ VINDCD2-supply = <&system_power>;
+ VINLDO1-supply = <&system_power>;
+ VINLDO2-supply = <&system_power>;
+
+ regulators {
+ dcd1 {
+ regulator-name = "VDD_DSP";
+ regulator-always-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/lp872x.txt b/Documentation/devicetree/bindings/regulator/lp872x.txt
new file mode 100644
index 000000000..78183182d
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/lp872x.txt
@@ -0,0 +1,160 @@
+Binding for TI/National Semiconductor LP872x Driver
+
+Required properties:
+ - compatible: "ti,lp8720" or "ti,lp8725"
+ - reg: I2C slave address. 0x7d = LP8720, 0x7a = LP8725
+
+Optional properties:
+ - ti,general-config: the value of LP872X_GENERAL_CFG register (u8)
+ (LP8720)
+ bit[2]: BUCK output voltage control by external DVS pin or register
+ 1 = external pin, 0 = bit7 of register 08h
+ bit[1]: sleep control by external DVS pin or register
+ 1 = external pin, 0 = bit6 of register 08h
+ bit[0]: time step unit(usec). 1 = 25, 0 = 50
+
+ (LP8725)
+ bit[7:6]: time step unit(usec). 00 = 32, 01 = 64, 10 = 128, 11 = 256
+ bit[4]: BUCK2 enable control. 1 = enable, 0 = disable
+ bit[3]: BUCK2 output voltage register address. 1 = 0Ah, 0 = 0Bh
+ bit[2]: BUCK1 output voltage control by external DVS pin or register
+ 1 = register 08h, 0 = DVS
+ bit[1]: LDO sleep control. 1 = sleep mode, 0 = normal
+ bit[0]: BUCK1 enable control, 1 = enable, 0 = disable
+
+ For more details, please see the datasheet.
+
+ - ti,update-config: define it when LP872X_GENERAL_CFG register should be set
+ - ti,dvs-gpio: GPIO specifier for external DVS pin control of LP872x devices.
+ - ti,dvs-vsel: DVS selector. 0 = SEL_V1, 1 = SEL_V2.
+ - ti,dvs-state: initial DVS pin state. 0 = DVS_LOW, 1 = DVS_HIGH.
+
+ Sub nodes for regulator_init_data
+ LP8720 has maximum 6 nodes. (child name: ldo1 ~ 5 and buck)
+ LP8725 has maximum 9 nodes. (child name: ldo1 ~ 5, lilo1,2 and buck1,2)
+ For more details, please see the following binding document.
+ (Documentation/devicetree/bindings/regulator/regulator.txt)
+
+Datasheet
+ - LP8720: http://www.ti.com/lit/ds/symlink/lp8720.pdf
+ - LP8725: http://www.ti.com/lit/ds/symlink/lp8725.pdf
+
+Example 1) LP8720
+
+lp8720@7d {
+ compatible = "ti,lp8720";
+ reg = <0x7d>;
+
+ /* external DVS pin used, timestep is 25usec */
+ ti,general-config = /bits/ 8 <0x03>;
+ ti,update-config;
+
+ /*
+ * The dvs-gpio depends on the processor environment.
+ * For example, following GPIO specifier means GPIO134 in OMAP4.
+ */
+ ti,dvs-gpio = <&gpio5 6 0>;
+ ti,dvs-vsel = /bits/ 8 <1>; /* SEL_V2 */
+ ti,dvs-state = /bits/ 8 <1>; /* DVS_HIGH */
+
+ vaf: ldo1 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vmmc: ldo2 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vcam_io: ldo3 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ };
+
+ vcam_core: ldo4 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <2850000>;
+ regulator-boot-on;
+ };
+
+ vcam: ldo5 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vcc: buck {
+ regulator-name = "VBUCK";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <2300000>;
+ };
+};
+
+Example 2) LP8725
+
+lp8725@7a {
+ compatible = "ti,lp8725";
+ reg = <0x7a>;
+
+ /* Enable BUCK1,2, no DVS, normal LDO mode, timestep is 256usec */
+ ti,general-config = /bits/ 8 <0xdd>;
+ ti,update-config;
+
+ vcam_io: ldo1 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vcam_core: ldo2 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vcam: ldo3 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vcmmb_io: ldo4 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ };
+
+ vcmmb_core: ldo5 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ };
+
+ vaux1: lilo1 {
+ regulator-name = "VAUX1";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vaux2: lilo2 {
+ regulator-name = "VAUX2";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vcc1: buck1 {
+ regulator-name = "VBUCK1";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-min-microamp = <460000>;
+ regulator-max-microamp = <1370000>;
+ regulator-boot-on;
+ };
+
+ vcc2: buck2 {
+ regulator-name = "VBUCK2";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-min-microamp = <460000>;
+ regulator-max-microamp = <1370000>;
+ regulator-boot-on;
+ };
+};
diff --git a/Documentation/devicetree/bindings/regulator/ltc3589.txt b/Documentation/devicetree/bindings/regulator/ltc3589.txt
new file mode 100644
index 000000000..801053036
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ltc3589.txt
@@ -0,0 +1,99 @@
+Linear Technology LTC3589, LTC3589-1, and LTC3589-2 8-output regulators
+
+Required properties:
+- compatible: "lltc,ltc3589", "lltc,ltc3589-1" or "lltc,ltc3589-2"
+- reg: I2C slave address
+
+Required child node:
+- regulators: Contains eight regulator child nodes sw1, sw2, sw3, bb-out,
+ ldo1, ldo2, ldo3, and ldo4, specifying the initialization data as
+ documented in Documentation/devicetree/bindings/regulator/regulator.txt.
+
+Each regulator is defined using the standard binding for regulators. The
+nodes for sw1, sw2, sw3, bb-out, ldo1, and ldo2 additionally need to specify
+the resistor values of their external feedback voltage dividers:
+
+Required properties (not on ldo3, ldo4):
+- lltc,fb-voltage-divider: An array of two integers containing the resistor
+ values R1 and R2 of the feedback voltage divider in ohms.
+
+Regulators sw1, sw2, sw3, and ldo2 can regulate the feedback reference from
+0.3625 V to 0.75 V in 12.5 mV steps. The output voltage thus ranges between
+0.3625 * (1 + R1/R2) V and 0.75 * (1 + R1/R2) V. Regulators bb-out and ldo1
+have a fixed 0.8 V reference and thus output 0.8 * (1 + R1/R2) V. The ldo3
+regulator is fixed to 1.8 V on LTC3589 and to 2.8 V on LTC3589-1,2. The ldo4
+regulator can output between 1.8 V and 3.3 V on LTC3589 and between 1.2 V
+and 3.2 V on LTC3589-1,2 in four steps. The ldo1 standby regulator can not
+be disabled and thus should have the regulator-always-on property set.
+
+Example:
+
+ ltc3589: pmic@34 {
+ compatible = "lltc,ltc3589-1";
+ reg = <0x34>;
+
+ regulators {
+ sw1_reg: sw1 {
+ regulator-min-microvolt = <591930>;
+ regulator-max-microvolt = <1224671>;
+ lltc,fb-voltage-divider = <100000 158000>;
+ regulator-ramp-delay = <7000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw2_reg: sw2 {
+ regulator-min-microvolt = <704123>;
+ regulator-max-microvolt = <1456803>;
+ lltc,fb-voltage-divider = <180000 191000>;
+ regulator-ramp-delay = <7000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw3_reg: sw3 {
+ regulator-min-microvolt = <1341250>;
+ regulator-max-microvolt = <2775000>;
+ lltc,fb-voltage-divider = <270000 100000>;
+ regulator-ramp-delay = <7000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ bb_out_reg: bb-out {
+ regulator-min-microvolt = <3387341>;
+ regulator-max-microvolt = <3387341>;
+ lltc,fb-voltage-divider = <511000 158000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo1_reg: ldo1 {
+ regulator-min-microvolt = <1306329>;
+ regulator-max-microvolt = <1306329>;
+ lltc,fb-voltage-divider = <100000 158000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo2_reg: ldo2 {
+ regulator-min-microvolt = <704123>;
+ regulator-max-microvolt = <1456806>;
+ lltc,fb-voltage-divider = <180000 191000>;
+ regulator-ramp-delay = <7000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo3_reg: ldo3 {
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-boot-on;
+ };
+
+ ldo4_reg: ldo4 {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3200000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max1586-regulator.txt b/Documentation/devicetree/bindings/regulator/max1586-regulator.txt
new file mode 100644
index 000000000..c050c1744
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max1586-regulator.txt
@@ -0,0 +1,28 @@
+Maxim MAX1586 voltage regulator
+
+Required properties:
+- compatible: must be "maxim,max1586"
+- reg: I2C slave address, usually 0x14
+- v3-gain: integer specifying the V3 gain as per datasheet
+ (1 + R24/R25 + R24/185.5kOhm)
+- any required generic properties defined in regulator.txt
+
+Example:
+
+ i2c_master {
+ max1586@14 {
+ compatible = "maxim,max1586";
+ reg = <0x14>;
+ v3-gain = <1000000>;
+
+ regulators {
+ vcc_core: v3 {
+ regulator-name = "vcc_core";
+ regulator-compatible = "Output_V3";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1705000>;
+ regulator-always-on;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max77802.txt b/Documentation/devicetree/bindings/regulator/max77802.txt
new file mode 100644
index 000000000..79e547644
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max77802.txt
@@ -0,0 +1,88 @@
+Binding for Maxim MAX77802 regulators
+
+This is a part of device tree bindings of MAX77802 multi-function device.
+More information can be found in bindings/mfd/max77802.txt file.
+
+The MAX77802 PMIC has 10 high-efficiency Buck and 32 Low-dropout (LDO)
+regulators that can be controlled over I2C.
+
+Following properties should be present in main device node of the MFD chip.
+
+Optional node:
+- regulators : The regulators of max77802 have to be instantiated
+ under subnode named "regulators" using the following format.
+
+ regulator-name {
+ standard regulator constraints....
+ };
+ refer Documentation/devicetree/bindings/regulator/regulator.txt
+
+The regulator node name should be initialized with a string to get matched
+with their hardware counterparts as follow. The valid names are:
+
+ -LDOn : for LDOs, where n can lie in ranges 1-15, 17-21, 23-30
+ and 32-35.
+ example: LDO1, LDO2, LDO35.
+ -BUCKn : for BUCKs, where n can lie in range 1 to 10.
+ example: BUCK1, BUCK5, BUCK10.
+
+The max77802 regulator supports two different operating modes: Normal and Low
+Power Mode. Some regulators support the modes to be changed at startup or by
+the consumers during normal operation while others only support to change the
+mode during system suspend. The standard regulator suspend states binding can
+be used to configure the regulator operating mode.
+
+The regulators that support the standard "regulator-initial-mode" property,
+changing their mode during normal operation are: LDOs 1, 3, 20 and 21.
+
+The possible values for "regulator-initial-mode" and "regulator-mode" are:
+ 1: Normal regulator voltage output mode.
+ 3: Low Power which reduces the quiescent current down to only 1uA
+
+The list of valid modes are defined in the dt-bindings/clock/maxim,max77802.h
+header and can be included by device tree source files.
+
+The standard "regulator-mode" property can only be used for regulators that
+support changing their mode to Low Power Mode during suspend. These regulators
+are: BUCKs 2-4 and LDOs 1-35. Also, it only takes effect if the regulator has
+been enabled for the given suspend state using "regulator-on-in-suspend" and
+has not been disabled for that state using "regulator-off-in-suspend".
+
+Example:
+
+ max77802@09 {
+ compatible = "maxim,max77802";
+ interrupt-parent = <&wakeup_eint>;
+ interrupts = <26 0>;
+ reg = <0x09>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ regulators {
+ ldo1_reg: LDO1 {
+ regulator-name = "vdd_1v0";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-always-on;
+ regulator-initial-mode = <MAX77802_OPMODE_LP>;
+ };
+
+ ldo11_reg: LDO11 {
+ regulator-name = "vdd_ldo11";
+ regulator-min-microvolt = <1900000>;
+ regulator-max-microvolt = <1900000>;
+ regulator-always-on;
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ regulator-mode = <MAX77802_OPMODE_LP>;
+ };
+ };
+
+ buck1_reg: BUCK1 {
+ regulator-name = "vdd_mif";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1300000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max8660.txt b/Documentation/devicetree/bindings/regulator/max8660.txt
new file mode 100644
index 000000000..8ba994d8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8660.txt
@@ -0,0 +1,47 @@
+Maxim MAX8660 voltage regulator
+
+Required properties:
+- compatible: must be one of "maxim,max8660", "maxim,max8661"
+- reg: I2C slave address, usually 0x34
+- any required generic properties defined in regulator.txt
+
+Example:
+
+ i2c_master {
+ max8660@34 {
+ compatible = "maxim,max8660";
+ reg = <0x34>;
+
+ regulators {
+ regulator@0 {
+ regulator-compatible= "V3(DCDC)";
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ regulator@1 {
+ regulator-compatible= "V4(DCDC)";
+ regulator-min-microvolt = <725000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ regulator@2 {
+ regulator-compatible= "V5(LDO)";
+ regulator-min-microvolt = <1700000>;
+ regulator-max-microvolt = <2000000>;
+ };
+
+ regulator@3 {
+ regulator-compatible= "V6(LDO)";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ regulator@4 {
+ regulator-compatible= "V7(LDO)";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max8907.txt b/Documentation/devicetree/bindings/regulator/max8907.txt
new file mode 100644
index 000000000..371eccd1c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8907.txt
@@ -0,0 +1,69 @@
+MAX8907 regulator
+
+Required properties:
+- compatible: "maxim,max8907"
+- reg: I2C slave address
+- interrupts: The interrupt output of the controller
+- mbatt-supply: The input supply for MBATT, BBAT, SDBY, VRTC.
+- in-v1-supply: The input supply for SD1.
+- in-v2-supply: The input supply for SD2.
+- in-v3-supply: The input supply for SD3.
+- in1-supply: The input supply for LDO1.
+...
+- in20-supply: The input supply for LDO20.
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name (or the deprecated
+ regulator-compatible property if present), with valid values listed below.
+ The content of each sub-node is defined by the standard binding for
+ regulators; see regulator.txt.
+
+Optional properties:
+- maxim,system-power-controller: Boolean property indicating that the PMIC
+ controls the overall system power.
+
+The valid names for regulators are:
+
+ sd1, sd2, sd3, ldo1, ldo2, ldo3, ldo4, ldo5, ldo6, ldo7, ldo8, ldo9, ldo10,
+ ldo11, ldo12, ldo13, ldo14, ldo15, ldo16, ldo17, ldo18, ldo19, ldo20, out5v,
+ out33v, bbat, sdby, vrtc.
+
+Example:
+
+ max8907@3c {
+ compatible = "maxim,max8907";
+ reg = <0x3c>;
+ interrupts = <0 86 0x4>;
+
+ maxim,system-power-controller;
+
+ mbatt-supply = <&some_reg>;
+ in-v1-supply = <&mbatt_reg>;
+ ...
+ in1-supply = <&mbatt_reg>;
+ ...
+
+ regulators {
+ mbatt_reg: mbatt {
+ regulator-name = "vbat_pmu";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ regulator-always-on;
+ };
+
+ sd1 {
+ regulator-name = "nvvdd_sv1,vdd_cpu_pmu";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-always-on;
+ };
+
+ sd2 {
+ regulator-name = "nvvdd_sv2,vdd_core";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-always-on;
+ };
+...
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max8925-regulator.txt b/Documentation/devicetree/bindings/regulator/max8925-regulator.txt
new file mode 100644
index 000000000..0057695aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8925-regulator.txt
@@ -0,0 +1,40 @@
+Max8925 Voltage regulators
+
+Required nodes:
+-nodes:
+ - SDV1 for SDV SDV1
+ - SDV2 for SDV SDV2
+ - SDV3 for SDV SDV3
+ - LDO1 for LDO LDO1
+ - LDO2 for LDO LDO2
+ - LDO3 for LDO LDO3
+ - LDO4 for LDO LDO4
+ - LDO5 for LDO LDO5
+ - LDO6 for LDO LDO6
+ - LDO7 for LDO LDO7
+ - LDO8 for LDO LDO8
+ - LDO9 for LDO LDO9
+ - LDO10 for LDO LDO10
+ - LDO11 for LDO LDO11
+ - LDO12 for LDO LDO12
+ - LDO13 for LDO LDO13
+ - LDO14 for LDO LDO14
+ - LDO15 for LDO LDO15
+ - LDO16 for LDO LDO16
+ - LDO17 for LDO LDO17
+ - LDO18 for LDO LDO18
+ - LDO19 for LDO LDO19
+ - LDO20 for LDO LDO20
+
+Optional properties:
+- Any optional property defined in bindings/regulator/regulator.txt
+
+Example:
+
+ SDV1 {
+ regulator-min-microvolt = <637500>;
+ regulator-max-microvolt = <1425000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
diff --git a/Documentation/devicetree/bindings/regulator/max8952.txt b/Documentation/devicetree/bindings/regulator/max8952.txt
new file mode 100644
index 000000000..866fcdd0f
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8952.txt
@@ -0,0 +1,52 @@
+Maxim MAX8952 voltage regulator
+
+Required properties:
+- compatible: must be equal to "maxim,max8952"
+- reg: I2C slave address, usually 0x60
+- max8952,dvs-mode-microvolt: array of 4 integer values defining DVS voltages
+ in microvolts. All values must be from range <770000, 1400000>
+- any required generic properties defined in regulator.txt
+
+Optional properties:
+- max8952,vid-gpios: array of two GPIO pins used for DVS voltage selection
+- max8952,en-gpio: GPIO used to control enable status of regulator
+- max8952,default-mode: index of default DVS voltage, from <0, 3> range
+- max8952,sync-freq: sync frequency, must be one of following values:
+ - 0: 26 MHz
+ - 1: 13 MHz
+ - 2: 19.2 MHz
+ Defaults to 26 MHz if not specified.
+- max8952,ramp-speed: voltage ramp speed, must be one of following values:
+ - 0: 32mV/us
+ - 1: 16mV/us
+ - 2: 8mV/us
+ - 3: 4mV/us
+ - 4: 2mV/us
+ - 5: 1mV/us
+ - 6: 0.5mV/us
+ - 7: 0.25mV/us
+ Defaults to 32mV/us if not specified.
+- any available generic properties defined in regulator.txt
+
+Example:
+
+ vdd_arm_reg: pmic@60 {
+ compatible = "maxim,max8952";
+ reg = <0x60>;
+
+ /* max8952-specific properties */
+ max8952,vid-gpios = <&gpx0 3 0>, <&gpx0 4 0>;
+ max8952,en-gpio = <&gpx0 1 0>;
+ max8952,default-mode = <0>;
+ max8952,dvs-mode-microvolt = <1250000>, <1200000>,
+ <1050000>, <950000>;
+ max8952,sync-freq = <0>;
+ max8952,ramp-speed = <0>;
+
+ /* generic regulator properties */
+ regulator-name = "vdd_arm";
+ regulator-min-microvolt = <770000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max8973-regulator.txt b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
new file mode 100644
index 000000000..4f15d8a1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8973-regulator.txt
@@ -0,0 +1,21 @@
+* Maxim MAX8973 Voltage Regulator
+
+Required properties:
+
+- compatible: must be "maxim,max8973"
+- reg: the i2c slave address of the regulator. It should be 0x1b.
+
+Any standard regulator properties can be used to configure the single max8973
+DCDC.
+
+Example:
+
+ max8973@1b {
+ compatible = "maxim,max8973";
+ reg = <0x1b>;
+
+ regulator-min-microvolt = <935000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/max8997-regulator.txt b/Documentation/devicetree/bindings/regulator/max8997-regulator.txt
new file mode 100644
index 000000000..5c186a7a7
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/max8997-regulator.txt
@@ -0,0 +1,146 @@
+* Maxim MAX8997 Voltage and Current Regulator
+
+The Maxim MAX8997 is a multi-function device which includes voltage and
+current regulators, rtc, charger controller and other sub-blocks. It is
+interfaced to the host controller using a i2c interface. Each sub-block is
+addressed by the host system using different i2c slave address. This document
+describes the bindings for 'pmic' sub-block of max8997.
+
+Required properties:
+- compatible: Should be "maxim,max8997-pmic".
+- reg: Specifies the i2c slave address of the pmic block. It should be 0x66.
+
+- max8997,pmic-buck1-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck1 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+- max8997,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck2 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+- max8997,pmic-buck5-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck5 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+[1] If none of the 'max8997,pmic-buck[1/2/5]-uses-gpio-dvs' optional
+ property is specified, the 'max8997,pmic-buck[1/2/5]-dvs-voltage'
+ property should specify atleast one voltage level (which would be a
+ safe operating voltage).
+
+ If either of the 'max8997,pmic-buck[1/2/5]-uses-gpio-dvs' optional
+ property is specified, then all the eight voltage values for the
+ 'max8997,pmic-buck[1/2/5]-dvs-voltage' should be specified.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from max8997 are delivered to.
+- interrupts: Interrupt specifiers for two interrupt sources.
+ - First interrupt specifier is for 'irq1' interrupt.
+ - Second interrupt specifier is for 'alert' interrupt.
+- max8997,pmic-buck1-uses-gpio-dvs: 'buck1' can be controlled by gpio dvs.
+- max8997,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
+- max8997,pmic-buck5-uses-gpio-dvs: 'buck5' can be controlled by gpio dvs.
+
+Additional properties required if either of the optional properties are used:
+- max8997,pmic-ignore-gpiodvs-side-effect: When GPIO-DVS mode is used for
+ multiple bucks, changing the voltage value of one of the bucks may affect
+ that of another buck, which is the side effect of the change (set_voltage).
+ Use this property to ignore such side effects and change the voltage.
+
+- max8997,pmic-buck125-default-dvs-idx: Default voltage setting selected from
+ the possible 8 options selectable by the dvs gpios. The value of this
+ property should be between 0 and 7. If not specified or if out of range, the
+ default value of this property is set to 0.
+
+- max8997,pmic-buck125-dvs-gpios: GPIO specifiers for three host gpio's used
+ for dvs. The format of the gpio specifier depends in the gpio controller.
+
+Regulators: The regulators of max8997 that have to be instantiated should be
+included in a sub-node named 'regulators'. Regulator nodes included in this
+sub-node should be of the format as listed below.
+
+ regulator_name {
+ standard regulator bindings here
+ };
+
+The following are the names of the regulators that the max8997 pmic block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of max8997.
+
+ - LDOn
+ - valid values for n are 1 to 18 and 21
+ - Example: LDO0, LD01, LDO2, LDO21
+ - BUCKn
+ - valid values for n are 1 to 7.
+ - Example: BUCK1, BUCK2, BUCK3, BUCK7
+
+ - ENVICHG: Battery Charging Current Monitor Output. This is a fixed
+ voltage type regulator
+
+ - ESAFEOUT1: (ldo19)
+ - ESAFEOUT2: (ld020)
+
+ - CHARGER_CV: main battery charger voltage control
+ - CHARGER: main battery charger current control
+ - CHARGER_TOPOFF: end of charge current threshold level
+
+The bindings inside the regulator nodes use the standard regulator bindings
+which are documented elsewhere.
+
+Example:
+
+ max8997_pmic@66 {
+ compatible = "maxim,max8997-pmic";
+ interrupt-parent = <&wakeup_eint>;
+ reg = <0x66>;
+ interrupts = <4 0>, <3 0>;
+
+ max8997,pmic-buck1-uses-gpio-dvs;
+ max8997,pmic-buck2-uses-gpio-dvs;
+ max8997,pmic-buck5-uses-gpio-dvs;
+
+ max8997,pmic-ignore-gpiodvs-side-effect;
+ max8997,pmic-buck125-default-dvs-idx = <0>;
+
+ max8997,pmic-buck125-dvs-gpios = <&gpx0 0 1 0 0>, /* SET1 */
+ <&gpx0 1 1 0 0>, /* SET2 */
+ <&gpx0 2 1 0 0>; /* SET3 */
+
+ max8997,pmic-buck1-dvs-voltage = <1350000>, <1300000>,
+ <1250000>, <1200000>,
+ <1150000>, <1100000>,
+ <1000000>, <950000>;
+
+ max8997,pmic-buck2-dvs-voltage = <1100000>, <1100000>,
+ <1100000>, <1100000>,
+ <1000000>, <1000000>,
+ <1000000>, <1000000>;
+
+ max8997,pmic-buck5-dvs-voltage = <1200000>, <1200000>,
+ <1200000>, <1200000>,
+ <1200000>, <1200000>,
+ <1200000>, <1200000>;
+
+ regulators {
+ ldo1_reg: LDO1 {
+ regulator-name = "VDD_ABB_3.3V";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo2_reg: LDO2 {
+ regulator-name = "VDD_ALIVE_1.1V";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ };
+
+ buck1_reg: BUCK1 {
+ regulator-name = "VDD_ARM_1.2V";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/mt6397-regulator.txt b/Documentation/devicetree/bindings/regulator/mt6397-regulator.txt
new file mode 100644
index 000000000..a42b1d6e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/mt6397-regulator.txt
@@ -0,0 +1,217 @@
+Mediatek MT6397 Regulator Driver
+
+Required properties:
+- compatible: "mediatek,mt6397-regulator"
+- mt6397regulator: List of regulators provided by this controller. It is named
+ according to its regulator type, buck_<name> and ldo_<name>.
+ The definition for each of these nodes is defined using the standard binding
+ for regulators at Documentation/devicetree/bindings/regulator/regulator.txt.
+
+The valid names for regulators are::
+BUCK:
+ buck_vpca15, buck_vpca7, buck_vsramca15, buck_vsramca7, buck_vcore, buck_vgpu,
+ buck_vdrm, buck_vio18
+LDO:
+ ldo_vtcxo, ldo_va28, ldo_vcama, ldo_vio28, ldo_vusb, ldo_vmc, ldo_vmch,
+ ldo_vemc3v3, ldo_vgp1, ldo_vgp2, ldo_vgp3, ldo_vgp4, ldo_vgp5, ldo_vgp6,
+ ldo_vibr
+
+Example:
+ pmic {
+ compatible = "mediatek,mt6397";
+
+ mt6397regulator: mt6397regulator {
+ compatible = "mediatek,mt6397-regulator";
+
+ mt6397_vpca15_reg: buck_vpca15 {
+ regulator-compatible = "buck_vpca15";
+ regulator-name = "vpca15";
+ regulator-min-microvolt = < 850000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <200>;
+ };
+
+ mt6397_vpca7_reg: buck_vpca7 {
+ regulator-compatible = "buck_vpca7";
+ regulator-name = "vpca7";
+ regulator-min-microvolt = < 850000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <115>;
+ };
+
+ mt6397_vsramca15_reg: buck_vsramca15 {
+ regulator-compatible = "buck_vsramca15";
+ regulator-name = "vsramca15";
+ regulator-min-microvolt = < 850000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <115>;
+
+ };
+
+ mt6397_vsramca7_reg: buck_vsramca7 {
+ regulator-compatible = "buck_vsramca7";
+ regulator-name = "vsramca7";
+ regulator-min-microvolt = < 850000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <115>;
+
+ };
+
+ mt6397_vcore_reg: buck_vcore {
+ regulator-compatible = "buck_vcore";
+ regulator-name = "vcore";
+ regulator-min-microvolt = < 850000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <115>;
+ };
+
+ mt6397_vgpu_reg: buck_vgpu {
+ regulator-compatible = "buck_vgpu";
+ regulator-name = "vgpu";
+ regulator-min-microvolt = < 700000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <115>;
+ };
+
+ mt6397_vdrm_reg: buck_vdrm {
+ regulator-compatible = "buck_vdrm";
+ regulator-name = "vdrm";
+ regulator-min-microvolt = < 800000>;
+ regulator-max-microvolt = <1400000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <500>;
+ };
+
+ mt6397_vio18_reg: buck_vio18 {
+ regulator-compatible = "buck_vio18";
+ regulator-name = "vio18";
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <2120000>;
+ regulator-ramp-delay = <12500>;
+ regulator-enable-ramp-delay = <500>;
+ };
+
+ mt6397_vtcxo_reg: ldo_vtcxo {
+ regulator-compatible = "ldo_vtcxo";
+ regulator-name = "vtcxo";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <90>;
+ };
+
+ mt6397_va28_reg: ldo_va28 {
+ regulator-compatible = "ldo_va28";
+ regulator-name = "va28";
+ /* fixed output 2.8 V */
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vcama_reg: ldo_vcama {
+ regulator-compatible = "ldo_vcama";
+ regulator-name = "vcama";
+ regulator-min-microvolt = <1500000>;
+ regulator-max-microvolt = <2800000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vio28_reg: ldo_vio28 {
+ regulator-compatible = "ldo_vio28";
+ regulator-name = "vio28";
+ /* fixed output 2.8 V */
+ regulator-enable-ramp-delay = <240>;
+ };
+
+ mt6397_usb_reg: ldo_vusb {
+ regulator-compatible = "ldo_vusb";
+ regulator-name = "vusb";
+ /* fixed output 3.3 V */
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vmc_reg: ldo_vmc {
+ regulator-compatible = "ldo_vmc";
+ regulator-name = "vmc";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vmch_reg: ldo_vmch {
+ regulator-compatible = "ldo_vmch";
+ regulator-name = "vmch";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vemc_3v3_reg: ldo_vemc3v3 {
+ regulator-compatible = "ldo_vemc3v3";
+ regulator-name = "vemc_3v3";
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vgp1_reg: ldo_vgp1 {
+ regulator-compatible = "ldo_vgp1";
+ regulator-name = "vcamd";
+ regulator-min-microvolt = <1220000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <240>;
+ };
+
+ mt6397_vgp2_reg: ldo_vgp2 {
+ egulator-compatible = "ldo_vgp2";
+ regulator-name = "vcamio";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vgp3_reg: ldo_vgp3 {
+ regulator-compatible = "ldo_vgp3";
+ regulator-name = "vcamaf";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vgp4_reg: ldo_vgp4 {
+ regulator-compatible = "ldo_vgp4";
+ regulator-name = "vgp4";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vgp5_reg: ldo_vgp5 {
+ regulator-compatible = "ldo_vgp5";
+ regulator-name = "vgp5";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vgp6_reg: ldo_vgp6 {
+ regulator-compatible = "ldo_vgp6";
+ regulator-name = "vgp6";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+
+ mt6397_vibr_reg: ldo_vibr {
+ regulator-compatible = "ldo_vibr";
+ regulator-name = "vibr";
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-enable-ramp-delay = <218>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/palmas-pmic.txt b/Documentation/devicetree/bindings/regulator/palmas-pmic.txt
new file mode 100644
index 000000000..725393c8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/palmas-pmic.txt
@@ -0,0 +1,82 @@
+* palmas regulator IP block devicetree bindings
+
+Required properties:
+- compatible : Should be from the list
+ ti,twl6035-pmic
+ ti,twl6036-pmic
+ ti,twl6037-pmic
+ ti,tps65913-pmic
+ ti,tps65914-pmic
+ ti,tps65917-pmic
+and also the generic series names
+ ti,palmas-pmic
+- interrupt-parent : The parent interrupt controller which is palmas.
+- interrupts : The interrupt number and the type which can be looked up here:
+ arch/arm/boot/dts/include/dt-bindings/interrupt-controller/irq.h
+- interrupts-name: The names of the individual interrupts.
+
+Optional properties:
+- ti,ldo6-vibrator : ldo6 is in vibrator mode
+
+Optional nodes:
+- regulators : Must contain a sub-node per regulator from the list below.
+ Each sub-node should contain the constraints and initialization
+ information for that regulator. See regulator.txt for a
+ description of standard properties for these sub-nodes.
+ Additional custom properties are listed below.
+
+ For ti,palmas-pmic - smps12, smps123, smps3 depending on OTP,
+ smps45, smps457, smps7 depending on variant, smps6, smps[8-9],
+ smps10_out2, smps10_out1, ldo[1-9], ldoln, ldousb.
+
+ Optional sub-node properties:
+ ti,warm-reset - maintain voltage during warm reset(boolean)
+ ti,roof-floor - This takes as optional argument on platform supporting
+ the rail from desired external control. If there is no argument then
+ it will be assume that it is controlled by NSLEEP pin.
+ The valid value for external pins are:
+ ENABLE1 then 1,
+ ENABLE2 then 2 or
+ NSLEEP then 3.
+ ti,mode-sleep - mode to adopt in pmic sleep 0 - off, 1 - auto,
+ 2 - eco, 3 - forced pwm
+ ti,smps-range - OTP has the wrong range set for the hardware so override
+ 0 - low range, 1 - high range.
+
+- ti,system-power-controller: Telling whether or not this pmic is controlling
+ the system power.
+
+Example:
+
+#include <dt-bindings/interrupt-controller/irq.h>
+
+pmic {
+ compatible = "ti,twl6035-pmic", "ti,palmas-pmic";
+ interrupt-parent = <&palmas>;
+ interrupts = <14 IRQ_TYPE_NONE>;
+ interrupts-name = "short-irq";
+
+ ti,ldo6-vibrator;
+
+ ti,system-power-controller;
+
+ regulators {
+ smps12_reg : smps12 {
+ regulator-name = "smps12";
+ regulator-min-microvolt = < 600000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-always-on;
+ regulator-boot-on;
+ ti,warm-reset;
+ ti,roof-floor = <1>; /* ENABLE1 control */
+ ti,mode-sleep = <0>;
+ ti,smps-range = <1>;
+ };
+
+ ldo1_reg: ldo1 {
+ regulator-name = "ldo1";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/regulator/pbias-regulator.txt b/Documentation/devicetree/bindings/regulator/pbias-regulator.txt
new file mode 100644
index 000000000..32aa26f1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/pbias-regulator.txt
@@ -0,0 +1,27 @@
+PBIAS internal regulator for SD card dual voltage i/o pads on OMAP SoCs.
+
+Required properties:
+- compatible:
+ - "ti,pbias-omap" for OMAP2, OMAP3, OMAP4, OMAP5, DRA7.
+- reg: pbias register offset from syscon base and size of pbias register.
+- syscon : phandle of the system control module
+- regulator-name : should be
+ pbias_mmc_omap2430 for OMAP2430, OMAP3 SoCs
+ pbias_sim_omap3 for OMAP3 SoCs
+ pbias_mmc_omap4 for OMAP4 SoCs
+ pbias_mmc_omap5 for OMAP5 and DRA7 SoC
+
+Optional properties:
+- Any optional property defined in bindings/regulator/regulator.txt
+
+Example:
+
+ pbias_regulator: pbias_regulator {
+ compatible = "ti,pbias-omap";
+ reg = <0 0x4>;
+ syscon = <&omap5_padconf_global>;
+ pbias_mmc_reg: pbias_mmc_omap5 {
+ regulator-name = "pbias_mmc_omap5";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3000000>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/pfuze100.txt b/Documentation/devicetree/bindings/regulator/pfuze100.txt
new file mode 100644
index 000000000..9b40db88f
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/pfuze100.txt
@@ -0,0 +1,299 @@
+PFUZE100 family of regulators
+
+Required properties:
+- compatible: "fsl,pfuze100", "fsl,pfuze200", "fsl,pfuze3000"
+- reg: I2C slave address
+
+Required child node:
+- regulators: This is the list of child nodes that specify the regulator
+ initialization data for defined regulators. Please refer to below doc
+ Documentation/devicetree/bindings/regulator/regulator.txt.
+
+ The valid names for regulators are:
+ --PFUZE100
+ sw1ab,sw1c,sw2,sw3a,sw3b,sw4,swbst,vsnvs,vrefddr,vgen1~vgen6
+ --PFUZE200
+ sw1ab,sw2,sw3a,sw3b,swbst,vsnvs,vrefddr,vgen1~vgen6
+ --PFUZE3000
+ sw1a,sw1b,sw2,sw3,swbst,vsnvs,vrefddr,vldo1,vldo2,vccsd,v33,vldo3,vldo4
+
+Each regulator is defined using the standard binding for regulators.
+
+Example 1: PFUZE100
+
+ pmic: pfuze100@08 {
+ compatible = "fsl,pfuze100";
+ reg = <0x08>;
+
+ regulators {
+ sw1a_reg: sw1ab {
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1875000>;
+ regulator-boot-on;
+ regulator-always-on;
+ regulator-ramp-delay = <6250>;
+ };
+
+ sw1c_reg: sw1c {
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1875000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw2_reg: sw2 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw3a_reg: sw3a {
+ regulator-min-microvolt = <400000>;
+ regulator-max-microvolt = <1975000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw3b_reg: sw3b {
+ regulator-min-microvolt = <400000>;
+ regulator-max-microvolt = <1975000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw4_reg: sw4 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ swbst_reg: swbst {
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5150000>;
+ };
+
+ snvs_reg: vsnvs {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vref_reg: vrefddr {
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vgen1_reg: vgen1 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1550000>;
+ };
+
+ vgen2_reg: vgen2 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1550000>;
+ };
+
+ vgen3_reg: vgen3 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vgen4_reg: vgen4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen5_reg: vgen5 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen6_reg: vgen6 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+ };
+ };
+
+
+Example 2: PFUZE200
+
+ pmic: pfuze200@08 {
+ compatible = "fsl,pfuze200";
+ reg = <0x08>;
+
+ regulators {
+ sw1a_reg: sw1ab {
+ regulator-min-microvolt = <300000>;
+ regulator-max-microvolt = <1875000>;
+ regulator-boot-on;
+ regulator-always-on;
+ regulator-ramp-delay = <6250>;
+ };
+
+ sw2_reg: sw2 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw3a_reg: sw3a {
+ regulator-min-microvolt = <400000>;
+ regulator-max-microvolt = <1975000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw3b_reg: sw3b {
+ regulator-min-microvolt = <400000>;
+ regulator-max-microvolt = <1975000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ swbst_reg: swbst {
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5150000>;
+ };
+
+ snvs_reg: vsnvs {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vref_reg: vrefddr {
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vgen1_reg: vgen1 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1550000>;
+ };
+
+ vgen2_reg: vgen2 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1550000>;
+ };
+
+ vgen3_reg: vgen3 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vgen4_reg: vgen4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen5_reg: vgen5 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen6_reg: vgen6 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+ };
+ };
+
+Example 3: PFUZE3000
+
+ pmic: pfuze3000@08 {
+ compatible = "fsl,pfuze3000";
+ reg = <0x08>;
+
+ regulators {
+ sw1a_reg: sw1a {
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1475000>;
+ regulator-boot-on;
+ regulator-always-on;
+ regulator-ramp-delay = <6250>;
+ };
+ /* use sw1c_reg to align with pfuze100/pfuze200 */
+ sw1c_reg: sw1b {
+ regulator-min-microvolt = <700000>;
+ regulator-max-microvolt = <1475000>;
+ regulator-boot-on;
+ regulator-always-on;
+ regulator-ramp-delay = <6250>;
+ };
+
+ sw2_reg: sw2 {
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sw3a_reg: sw3 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <1650000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ swbst_reg: swbst {
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5150000>;
+ };
+
+ snvs_reg: vsnvs {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3000000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vref_reg: vrefddr {
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ vgen1_reg: vldo1 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen2_reg: vldo2 {
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1550000>;
+ };
+
+ vgen3_reg: vccsd {
+ regulator-min-microvolt = <2850000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen4_reg: v33 {
+ regulator-min-microvolt = <2850000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ vgen5_reg: vldo3 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+
+ vgen6_reg: vldo4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-always-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/pwm-regulator.txt b/Documentation/devicetree/bindings/regulator/pwm-regulator.txt
new file mode 100644
index 000000000..ce91f61fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/pwm-regulator.txt
@@ -0,0 +1,27 @@
+pwm regulator bindings
+
+Required properties:
+- compatible: Should be "pwm-regulator"
+- pwms: OF device-tree PWM specification (see PWM binding pwm.txt)
+- voltage-table: voltage and duty table, include 2 members in each set of
+ brackets, first one is voltage(unit: uv), the next is duty(unit: percent)
+
+Any property defined as part of the core regulator binding defined in
+regulator.txt can also be used.
+
+Example:
+ pwm_regulator {
+ compatible = "pwm-regulator;
+ pwms = <&pwm1 0 8448 0>;
+
+ voltage-table = <1114000 0>,
+ <1095000 10>,
+ <1076000 20>,
+ <1056000 30>,
+ <1036000 40>,
+ <1016000 50>;
+
+ regulator-min-microvolt = <1016000>;
+ regulator-max-microvolt = <1114000>;
+ regulator-name = "vdd_logic";
+ };
diff --git a/Documentation/devicetree/bindings/regulator/regulator.txt b/Documentation/devicetree/bindings/regulator/regulator.txt
new file mode 100644
index 000000000..abb26b58c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/regulator.txt
@@ -0,0 +1,92 @@
+Voltage/Current Regulators
+
+Optional properties:
+- regulator-name: A string used as a descriptive name for regulator outputs
+- regulator-min-microvolt: smallest voltage consumers may set
+- regulator-max-microvolt: largest voltage consumers may set
+- regulator-microvolt-offset: Offset applied to voltages to compensate for voltage drops
+- regulator-min-microamp: smallest current consumers may set
+- regulator-max-microamp: largest current consumers may set
+- regulator-always-on: boolean, regulator should never be disabled
+- regulator-boot-on: bootloader/firmware enabled regulator
+- regulator-allow-bypass: allow the regulator to go into bypass mode
+- <name>-supply: phandle to the parent supply/regulator node
+- regulator-ramp-delay: ramp delay for regulator(in uV/uS)
+ For hardware which supports disabling ramp rate, it should be explicitly
+ intialised to zero (regulator-ramp-delay = <0>) for disabling ramp delay.
+- regulator-enable-ramp-delay: The time taken, in microseconds, for the supply
+ rail to reach the target voltage, plus/minus whatever tolerance the board
+ design requires. This property describes the total system ramp time
+ required due to the combination of internal ramping of the regulator itself,
+ and board design issues such as trace capacitance and load on the supply.
+- regulator-state-mem sub-root node for Suspend-to-RAM mode
+ : suspend to memory, the device goes to sleep, but all data stored in memory,
+ only some external interrupt can wake the device.
+- regulator-state-disk sub-root node for Suspend-to-DISK mode
+ : suspend to disk, this state operates similarly to Suspend-to-RAM,
+ but includes a final step of writing memory contents to disk.
+- regulator-state-[mem/disk] node has following common properties:
+ - regulator-on-in-suspend: regulator should be on in suspend state.
+ - regulator-off-in-suspend: regulator should be off in suspend state.
+ - regulator-suspend-microvolt: regulator should be set to this voltage
+ in suspend.
+ - regulator-mode: operating mode in the given suspend state.
+ The set of possible operating modes depends on the capabilities of
+ every hardware so the valid modes are documented on each regulator
+ device tree binding document.
+- regulator-initial-mode: initial operating mode. The set of possible operating
+ modes depends on the capabilities of every hardware so each device binding
+ documentation explains which values the regulator supports.
+
+Deprecated properties:
+- regulator-compatible: If a regulator chip contains multiple
+ regulators, and if the chip's binding contains a child node that
+ describes each regulator, then this property indicates which regulator
+ this child node is intended to configure. If this property is missing,
+ the node's name will be used instead.
+
+Example:
+
+ xyzreg: regulator@0 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-always-on;
+ vin-supply = <&vin>;
+
+ regulator-state-mem {
+ regulator-on-in-suspend;
+ };
+ };
+
+Regulator Consumers:
+Consumer nodes can reference one or more of its supplies/
+regulators using the below bindings.
+
+- <name>-supply: phandle to the regulator node
+
+These are the same bindings that a regulator in the above
+example used to reference its own supply, in which case
+its just seen as a special case of a regulator being a
+consumer itself.
+
+Example of a consumer device node (mmc) referencing two
+regulators (twl_reg1 and twl_reg2),
+
+ twl_reg1: regulator@0 {
+ ...
+ ...
+ ...
+ };
+
+ twl_reg2: regulator@1 {
+ ...
+ ...
+ ...
+ };
+
+ mmc: mmc@0x0 {
+ ...
+ ...
+ vmmc-supply = <&twl_reg1>;
+ vmmcaux-supply = <&twl_reg2>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt
new file mode 100644
index 000000000..20191315e
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/s5m8767-regulator.txt
@@ -0,0 +1,163 @@
+* Samsung S5M8767 Voltage and Current Regulator
+
+The Samsung S5M8767 is a multi-function device which includes voltage and
+current regulators, rtc, charger controller and other sub-blocks. It is
+interfaced to the host controller using a i2c interface. Each sub-block is
+addressed by the host system using different i2c slave address. This document
+describes the bindings for 'pmic' sub-block of s5m8767.
+
+Required properties:
+- compatible: Should be "samsung,s5m8767-pmic".
+- reg: Specifies the i2c slave address of the pmic block. It should be 0x66.
+
+- s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck2 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+- s5m8767,pmic-buck3-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck3 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+- s5m8767,pmic-buck4-dvs-voltage: A set of 8 voltage values in micro-volt (uV)
+ units for buck4 when changing voltage using gpio dvs. Refer to [1] below
+ for additional information.
+
+- s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used
+ for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines.
+
+[1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+ property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage'
+ property should specify atleast one voltage level (which would be a
+ safe operating voltage).
+
+ If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional
+ property is specified, then all the eight voltage values for the
+ 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified.
+
+Optional properties:
+- interrupt-parent: Specifies the phandle of the interrupt controller to which
+ the interrupts from s5m8767 are delivered to.
+- interrupts: Interrupt specifiers for two interrupt sources.
+ - First interrupt specifier is for 'irq1' interrupt.
+ - Second interrupt specifier is for 'alert' interrupt.
+- s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs.
+- s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs.
+- s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs.
+
+Additional properties required if either of the optional properties are used:
+
+- s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from
+ the possible 8 options selectable by the dvs gpios. The value of this
+ property should be between 0 and 7. If not specified or if out of range, the
+ default value of this property is set to 0.
+
+- s5m8767,pmic-buck-dvs-gpios: GPIO specifiers for three host gpio's used
+ for dvs. The format of the gpio specifier depends in the gpio controller.
+
+Regulators: The regulators of s5m8767 that have to be instantiated should be
+included in a sub-node named 'regulators'. Regulator nodes included in this
+sub-node should be of the format as listed below.
+
+ regulator_name {
+ ldo1_reg: LDO1 {
+ regulator-name = "VDD_ALIVE_1.0V";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ regulator-boot-on;
+ op_mode = <1>; /* Normal Mode */
+ };
+ };
+The above regulator entries are defined in regulator bindings documentation
+except these properties:
+ - op_mode: describes the different operating modes of the LDO's with
+ power mode change in SOC. The different possible values are,
+ 0 - always off mode
+ 1 - on in normal mode
+ 2 - low power mode
+ 3 - suspend mode
+ - s5m8767,pmic-ext-control-gpios: (optional) GPIO specifier for one
+ GPIO controlling this regulator (enable/disable); This is
+ valid only for buck9.
+
+The following are the names of the regulators that the s5m8767 pmic block
+supports. Note: The 'n' in LDOn and BUCKn represents the LDO or BUCK number
+as per the datasheet of s5m8767.
+
+ - LDOn
+ - valid values for n are 1 to 28
+ - Example: LDO1, LDO2, LDO28
+ - BUCKn
+ - valid values for n are 1 to 9.
+ - Example: BUCK1, BUCK2, BUCK9
+
+The bindings inside the regulator nodes use the standard regulator bindings
+which are documented elsewhere.
+
+Example:
+
+ s5m8767_pmic@66 {
+ compatible = "samsung,s5m8767-pmic";
+ reg = <0x66>;
+
+ s5m8767,pmic-buck2-uses-gpio-dvs;
+ s5m8767,pmic-buck3-uses-gpio-dvs;
+ s5m8767,pmic-buck4-uses-gpio-dvs;
+
+ s5m8767,pmic-buck-default-dvs-idx = <0>;
+
+ s5m8767,pmic-buck-dvs-gpios = <&gpx0 0 0>, /* DVS1 */
+ <&gpx0 1 0>, /* DVS2 */
+ <&gpx0 2 0>; /* DVS3 */
+
+ s5m8767,pmic-buck-ds-gpios = <&gpx2 3 0>, /* SET1 */
+ <&gpx2 4 0>, /* SET2 */
+ <&gpx2 5 0>; /* SET3 */
+
+ s5m8767,pmic-buck2-dvs-voltage = <1350000>, <1300000>,
+ <1250000>, <1200000>,
+ <1150000>, <1100000>,
+ <1000000>, <950000>;
+
+ s5m8767,pmic-buck3-dvs-voltage = <1100000>, <1100000>,
+ <1100000>, <1100000>,
+ <1000000>, <1000000>,
+ <1000000>, <1000000>;
+
+ s5m8767,pmic-buck4-dvs-voltage = <1200000>, <1200000>,
+ <1200000>, <1200000>,
+ <1200000>, <1200000>,
+ <1200000>, <1200000>;
+
+ regulators {
+ ldo1_reg: LDO1 {
+ regulator-name = "VDD_ABB_3.3V";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ op_mode = <1>; /* Normal Mode */
+ };
+
+ ldo2_reg: LDO2 {
+ regulator-name = "VDD_ALIVE_1.1V";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ regulator-always-on;
+ };
+
+ buck1_reg: BUCK1 {
+ regulator-name = "VDD_MIF_1.2V";
+ regulator-min-microvolt = <950000>;
+ regulator-max-microvolt = <1350000>;
+ regulator-always-on;
+ regulator-boot-on;
+ };
+
+ vemmc_reg: BUCK9 {
+ regulator-name = "VMEM_VDD_2.8V";
+ regulator-min-microvolt = <2800000>;
+ regulator-max-microvolt = <2800000>;
+ op_mode = <3>; /* Standby Mode */
+ s5m8767,pmic-ext-control-gpios = <&gpk0 2 0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/sky81452-regulator.txt b/Documentation/devicetree/bindings/regulator/sky81452-regulator.txt
new file mode 100644
index 000000000..f9acbc1f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/sky81452-regulator.txt
@@ -0,0 +1,18 @@
+SKY81452 voltage regulator
+
+Required properties:
+- regulator node named lout.
+- any required generic properties defined in regulator.txt
+
+Optional properties:
+- any available generic properties defined in regulator.txt
+
+Example:
+
+ regulator {
+ lout {
+ regulator-name = "sky81452-lout";
+ regulator-min-microvolt = <4500000>;
+ regulator-max-microvolt = <8000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
new file mode 100644
index 000000000..c58db75f9
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/ti-abb-regulator.txt
@@ -0,0 +1,132 @@
+Adaptive Body Bias(ABB) SoC internal LDO regulator for Texas Instruments SoCs
+
+Required Properties:
+- compatible: Should be one of:
+ - "ti,abb-v1" for older SoCs like OMAP3
+ - "ti,abb-v2" for newer SoCs like OMAP4, OMAP5
+ - "ti,abb-v3" for a generic definition where setup and control registers are
+ provided (example: DRA7)
+- reg: Address and length of the register set for the device. It contains
+ the information of registers in the same order as described by reg-names
+- reg-names: Should contain the reg names
+ - "base-address" - contains base address of ABB module (ti,abb-v1,ti,abb-v2)
+ - "control-address" - contains control register address of ABB module (ti,abb-v3)
+ - "setup-address" - contains setup register address of ABB module (ti,abb-v3)
+ - "int-address" - contains address of interrupt register for ABB module
+ (also see Optional properties)
+- #address-cell: should be 0
+- #size-cell: should be 0
+- clocks: should point to the clock node used by ABB module
+- ti,settling-time: Settling time in uSecs from SoC documentation for ABB module
+ to settle down(target time for SR2_WTCNT_VALUE).
+- ti,clock-cycles: SoC specific data about count of system ti,clock-cycles used for
+ computing settling time from SoC Documentation for ABB module(clock
+ cycles for SR2_WTCNT_VALUE).
+- ti,tranxdone-status-mask: Mask to the int-register to write-to-clear mask
+ indicating LDO tranxdone (operation complete).
+- ti,abb_info: An array of 6-tuples u32 items providing information about ABB
+ configuration needed per operational voltage of the device.
+ Each item consists of the following in the same order:
+ volt: voltage in uV - Only used to index ABB information.
+ ABB mode: one of the following:
+ 0-bypass
+ 1-Forward Body Bias(FBB)
+ 3-Reverse Body Bias(RBB)
+ efuse: (see Optional properties)
+ RBB enable efuse Mask: (See Optional properties)
+ FBB enable efuse Mask: (See Optional properties)
+ Vset value efuse Mask: (See Optional properties)
+
+ NOTE: If more than 1 entry is present, then regulator is setup to change
+ voltage, allowing for various modes to be selected indexed off
+ the regulator. Further, ABB LDOs are considered always-on by
+ default.
+
+Optional Properties:
+- reg-names: In addition to the required properties, the following are optional
+ - "efuse-address" - Contains efuse base address used to pick up ABB info.
+ - "ldo-address" - Contains address of ABB LDO overide register address.
+ "efuse-address" is required for this.
+- ti,ldovbb-vset-mask - Required if ldo-address is set, mask for LDO override
+ register to provide override vset value.
+- ti,ldovbb-override-mask - Required if ldo-address is set, mask for LDO
+ override register to enable override vset value.
+- ti,abb_opp_sel: Addendum to the description in required properties
+ efuse: Mandatory if 'efuse-address' register is defined. Provides offset
+ from efuse-address to pick up ABB characteristics. Set to 0 if
+ 'efuse-address' is not defined.
+ RBB enable efuse Mask: Optional if 'efuse-address' register is defined.
+ 'ABB mode' is force set to RBB mode if value at "efuse-address"
+ + efuse maps to RBB mask. Set to 0 to ignore this.
+ FBB enable efuse Mask: Optional if 'efuse-address' register is defined.
+ 'ABB mode' is force set to FBB mode if value at "efuse-address"
+ + efuse maps to FBB mask (valid only if RBB mask does not match)
+ Set to 0 to ignore this.
+ Vset value efuse Mask: Mandatory if ldo-address is set. Picks up from
+ efuse the value to set in 'ti,ldovbb-vset-mask' at ldo-address.
+
+Example #1: Simplest configuration (no efuse data, hard coded ABB table):
+abb_x: regulator-abb-x {
+ compatible = "ti,abb-v1";
+ regulator-name = "abb_x";
+ #address-cell = <0>;
+ #size-cells = <0>;
+ reg = <0x483072f0 0x8>, <0x48306818 0x4>;
+ reg-names = "base-address", "int-address";
+ ti,tranxdone-status-mask = <0x4000000>;
+ clocks = <&sysclk>;
+ ti,settling-time = <30>;
+ ti,clock-cycles = <8>;
+ ti,abb_info = <
+ /* uV ABB efuse rbb_m fbb_m vset_m */
+ 1012500 0 0 0 0 0 /* Bypass */
+ 1200000 3 0 0 0 0 /* RBB mandatory */
+ 1320000 1 0 0 0 0 /* FBB mandatory */
+ >;
+};
+
+Example #2: Efuse bits contain ABB mode setting (no LDO override capability)
+abb_y: regulator-abb-y {
+ compatible = "ti,abb-v2";
+ regulator-name = "abb_y";
+ #address-cell = <0>;
+ #size-cells = <0>;
+ reg = <0x4a307bd0 0x8>, <0x4a306014 0x4>, <0x4A002268 0x8>;
+ reg-names = "base-address", "int-address", "efuse-address";
+ ti,tranxdone-status-mask = <0x4000000>;
+ clocks = <&sysclk>;
+ ti,settling-time = <50>;
+ ti,clock-cycles = <16>;
+ ti,abb_info = <
+ /* uV ABB efuse rbb_m fbb_m vset_m */
+ 975000 0 0 0 0 0 /* Bypass */
+ 1012500 0 0 0x40000 0 0 /* RBB optional */
+ 1200000 0 0x4 0 0x40000 0 /* FBB optional */
+ 1320000 1 0 0 0 0 /* FBB mandatory */
+ >;
+};
+
+Example #3: Efuse bits contain ABB mode setting and LDO override capability
+abb_z: regulator-abb-z {
+ compatible = "ti,abb-v2";
+ regulator-name = "abb_z";
+ #address-cell = <0>;
+ #size-cells = <0>;
+ reg = <0x4ae07ce4 0x8>, <0x4ae06010 0x4>,
+ <0x4a002194 0x8>, <0x4ae0C314 0x4>;
+ reg-names = "base-address", "int-address",
+ "efuse-address", "ldo-address";
+ ti,tranxdone-status-mask = <0x8000000>;
+ /* LDOVBBMM_MUX_CTRL */
+ ti,ldovbb-override-mask = <0x400>;
+ /* LDOVBBMM_VSET_OUT */
+ ti,ldovbb-vset-mask = <0x1F>;
+ clocks = <&sysclk>;
+ ti,settling-time = <50>;
+ ti,clock-cycles = <16>;
+ ti,abb_info = <
+ /* uV ABB efuse rbb_m fbb_m vset_m */
+ 975000 0 0 0 0 0 /* Bypass */
+ 1200000 0 0x4 0 0x40000 0x1f00 /* FBB optional, vset */
+ >;
+};
diff --git a/Documentation/devicetree/bindings/regulator/tps51632-regulator.txt b/Documentation/devicetree/bindings/regulator/tps51632-regulator.txt
new file mode 100644
index 000000000..2f7e44a96
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/tps51632-regulator.txt
@@ -0,0 +1,27 @@
+TPS51632 Voltage regulators
+
+Required properties:
+- compatible: Must be "ti,tps51632"
+- reg: I2C slave address
+
+Optional properties:
+- ti,enable-pwm-dvfs: Enable the DVFS voltage control through the PWM interface.
+- ti,dvfs-step-20mV: The 20mV step voltage when PWM DVFS enabled. Missing this
+ will set 10mV step voltage in PWM DVFS mode. In normal mode, the voltage
+ step is 10mV as per datasheet.
+
+Any property defined as part of the core regulator binding, defined in
+regulator.txt, can also be used.
+
+Example:
+
+ tps51632 {
+ compatible = "ti,tps51632";
+ reg = <0x43>;
+ regulator-name = "tps51632-vout";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ ti,enable-pwm-dvfs;
+ ti,dvfs-step-20mV;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt b/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt
new file mode 100644
index 000000000..1b20c3dbc
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/tps62360-regulator.txt
@@ -0,0 +1,44 @@
+TPS62360 Voltage regulators
+
+Required properties:
+- compatible: Must be one of the following.
+ "ti,tps62360"
+ "ti,tps62361",
+ "ti,tps62362",
+ "ti,tps62363",
+- reg: I2C slave address
+
+Optional properties:
+- ti,enable-vout-discharge: Enable output discharge. This is boolean value.
+- ti,enable-pull-down: Enable pull down. This is boolean value.
+- ti,vsel0-gpio: GPIO for controlling VSEL0 line.
+ If this property is missing, then assume that there is no GPIO
+ for vsel0 control.
+- ti,vsel1-gpio: Gpio for controlling VSEL1 line.
+ If this property is missing, then assume that there is no GPIO
+ for vsel1 control.
+- ti,vsel0-state-high: Initial state of vsel0 input is high.
+ If this property is missing, then assume the state as low (0).
+- ti,vsel1-state-high: Initial state of vsel1 input is high.
+ If this property is missing, then assume the state as low (0).
+
+Any property defined as part of the core regulator binding, defined in
+regulator.txt, can also be used.
+
+Example:
+
+ abc: tps62360 {
+ compatible = "ti,tps62361";
+ reg = <0x60>;
+ regulator-name = "tps62361-vout";
+ regulator-min-microvolt = <500000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on
+ ti,vsel0-gpio = <&gpio1 16 0>;
+ ti,vsel1-gpio = <&gpio1 17 0>;
+ ti,vsel0-state-high;
+ ti,vsel1-state-high;
+ ti,enable-pull-down;
+ ti,enable-force-pwm;
+ ti,enable-vout-discharge;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/tps65090.txt b/Documentation/devicetree/bindings/regulator/tps65090.txt
new file mode 100644
index 000000000..ca69f5e30
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/tps65090.txt
@@ -0,0 +1,126 @@
+TPS65090 regulators
+
+Required properties:
+- compatible: "ti,tps65090"
+- reg: I2C slave address
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name, with valid
+ values listed below. The content of each sub-node is defined by the
+ standard binding for regulators; see regulator.txt.
+ dcdc[1-3], fet[1-7] and ldo[1-2] respectively.
+- vsys[1-3]-supply: The input supply for DCDC[1-3] respectively.
+- infet[1-7]-supply: The input supply for FET[1-7] respectively.
+- vsys-l[1-2]-supply: The input supply for LDO[1-2] respectively.
+
+Optional properties:
+- ti,enable-ext-control: This is applicable for DCDC1, DCDC2 and DCDC3.
+ If DCDCs are externally controlled then this property should be there.
+- "dcdc-ext-control-gpios: This is applicable for DCDC1, DCDC2 and DCDC3.
+ If DCDCs are externally controlled and if it is from GPIO then GPIO
+ number should be provided. If it is externally controlled and no GPIO
+ entry then driver will just configure this rails as external control
+ and will not provide any enable/disable APIs.
+- ti,overcurrent-wait: This is applicable to FET registers, which have a
+ poorly defined "overcurrent wait" field. If this property is present it
+ should be between 0 - 3. If this property isn't present we won't touch the
+ "overcurrent wait" field and we'll leave it to the BIOS/EC to deal with.
+
+Each regulator is defined using the standard binding for regulators.
+
+Example:
+
+ tps65090@48 {
+ compatible = "ti,tps65090";
+ reg = <0x48>;
+ interrupts = <0 88 0x4>;
+
+ vsys1-supply = <&some_reg>;
+ vsys2-supply = <&some_reg>;
+ vsys3-supply = <&some_reg>;
+ infet1-supply = <&some_reg>;
+ infet2-supply = <&some_reg>;
+ infet3-supply = <&some_reg>;
+ infet4-supply = <&some_reg>;
+ infet5-supply = <&some_reg>;
+ infet6-supply = <&some_reg>;
+ infet7-supply = <&some_reg>;
+ vsys-l1-supply = <&some_reg>;
+ vsys-l2-supply = <&some_reg>;
+
+ regulators {
+ dcdc1 {
+ regulator-name = "dcdc1";
+ regulator-boot-on;
+ regulator-always-on;
+ ti,enable-ext-control;
+ dcdc-ext-control-gpios = <&gpio 10 0>;
+ };
+
+ dcdc2 {
+ regulator-name = "dcdc2";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ dcdc3 {
+ regulator-name = "dcdc3";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet1 {
+ regulator-name = "fet1";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet2 {
+ regulator-name = "fet2";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet3 {
+ regulator-name = "fet3";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet4 {
+ regulator-name = "fet4";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet5 {
+ regulator-name = "fet5";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet6 {
+ regulator-name = "fet6";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ fet7 {
+ regulator-name = "fet7";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo1 {
+ regulator-name = "ldo1";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo2 {
+ regulator-name = "ldo2";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/tps65217.txt b/Documentation/devicetree/bindings/regulator/tps65217.txt
new file mode 100644
index 000000000..4f05d208c
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/tps65217.txt
@@ -0,0 +1,78 @@
+TPS65217 family of regulators
+
+Required properties:
+- compatible: "ti,tps65217"
+- reg: I2C slave address
+- regulators: list of regulators provided by this controller, must be named
+ after their hardware counterparts: dcdc[1-3] and ldo[1-4]
+- regulators: This is the list of child nodes that specify the regulator
+ initialization data for defined regulators. Not all regulators for the given
+ device need to be present. The definition for each of these nodes is defined
+ using the standard binding for regulators found at
+ Documentation/devicetree/bindings/regulator/regulator.txt.
+
+Optional properties:
+- ti,pmic-shutdown-controller: Telling the PMIC to shutdown on PWR_EN toggle.
+
+ The valid names for regulators are:
+ tps65217: dcdc1, dcdc2, dcdc3, ldo1, ldo2, ldo3 and ldo4
+
+Each regulator is defined using the standard binding for regulators.
+
+Example:
+
+ tps: tps@24 {
+ compatible = "ti,tps65217";
+ ti,pmic-shutdown-controller;
+
+ regulators {
+ dcdc1_reg: dcdc1 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ dcdc2_reg: dcdc2 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ dcdc3_reg: dcc3 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo1_reg: ldo1 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo2_reg: ldo2 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo3_reg: ldo3 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo4_reg: ldo4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/tps65218.txt b/Documentation/devicetree/bindings/regulator/tps65218.txt
new file mode 100644
index 000000000..fccc1d24a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/tps65218.txt
@@ -0,0 +1,23 @@
+TPS65218 family of regulators
+
+Required properties:
+For tps65218 regulators/LDOs
+- compatible:
+ - "ti,tps65218-dcdc1" for DCDC1
+ - "ti,tps65218-dcdc2" for DCDC2
+ - "ti,tps65218-dcdc3" for DCDC3
+ - "ti,tps65218-dcdc4" for DCDC4
+ - "ti,tps65218-dcdc5" for DCDC5
+ - "ti,tps65218-dcdc6" for DCDC6
+ - "ti,tps65218-ldo1" for LDO1
+
+Optional properties:
+- Any optional property defined in bindings/regulator/regulator.txt
+
+Example:
+
+ xyz: regulator@0 {
+ compatible = "ti,tps65218-dcdc1";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3000000>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/tps6586x.txt b/Documentation/devicetree/bindings/regulator/tps6586x.txt
new file mode 100644
index 000000000..8b40cac24
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/tps6586x.txt
@@ -0,0 +1,135 @@
+TPS6586x family of regulators
+
+Required properties:
+- compatible: "ti,tps6586x"
+- reg: I2C slave address
+- interrupts: the interrupt outputs of the controller
+- #gpio-cells: number of cells to describe a GPIO
+- gpio-controller: mark the device as a GPIO controller
+- regulators: A node that houses a sub-node for each regulator within the
+ device. Each sub-node is identified using the node's name (or the deprecated
+ regulator-compatible property if present), with valid values listed below.
+ The content of each sub-node is defined by the standard binding for
+ regulators; see regulator.txt.
+ sys, sm[0-2], ldo[0-9] and ldo_rtc
+- sys-supply: The input supply for SYS.
+- vin-sm0-supply: The input supply for the SM0.
+- vin-sm1-supply: The input supply for the SM1.
+- vin-sm2-supply: The input supply for the SM2.
+- vinldo01-supply: The input supply for the LDO1 and LDO2
+- vinldo23-supply: The input supply for the LDO2 and LDO3
+- vinldo4-supply: The input supply for the LDO4
+- vinldo678-supply: The input supply for the LDO6, LDO7 and LDO8
+- vinldo9-supply: The input supply for the LDO9
+
+Optional properties:
+- ti,system-power-controller: Telling whether or not this pmic is controlling
+ the system power.
+
+Each regulator is defined using the standard binding for regulators.
+
+Note: LDO5 and LDO_RTC is supplied by SYS regulator internally and driver
+ take care of making proper parent child relationship.
+
+Example:
+
+ pmu: tps6586x@34 {
+ compatible = "ti,tps6586x";
+ reg = <0x34>;
+ interrupts = <0 88 0x4>;
+
+ #gpio-cells = <2>;
+ gpio-controller;
+
+ ti,system-power-controller;
+
+ sys-supply = <&some_reg>;
+ vin-sm0-supply = <&some_reg>;
+ vin-sm1-supply = <&some_reg>;
+ vin-sm2-supply = <&some_reg>;
+ vinldo01-supply = <...>;
+ vinldo23-supply = <...>;
+ vinldo4-supply = <...>;
+ vinldo678-supply = <...>;
+ vinldo9-supply = <...>;
+
+ regulators {
+ sys_reg: sys {
+ regulator-name = "vdd_sys";
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sm0_reg: sm0 {
+ regulator-min-microvolt = < 725000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sm1_reg: sm1 {
+ regulator-min-microvolt = < 725000>;
+ regulator-max-microvolt = <1500000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ sm2_reg: sm2 {
+ regulator-min-microvolt = <3000000>;
+ regulator-max-microvolt = <4550000>;
+ regulator-boot-on;
+ regulator-always-on;
+ };
+
+ ldo0_reg: ldo0 {
+ regulator-name = "PCIE CLK";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo1_reg: ldo1 {
+ regulator-min-microvolt = < 725000>;
+ regulator-max-microvolt = <1500000>;
+ };
+
+ ldo2_reg: ldo2 {
+ regulator-min-microvolt = < 725000>;
+ regulator-max-microvolt = <1500000>;
+ };
+
+ ldo3_reg: ldo3 {
+ regulator-min-microvolt = <1250000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo4_reg: ldo4 {
+ regulator-min-microvolt = <1700000>;
+ regulator-max-microvolt = <2475000>;
+ };
+
+ ldo5_reg: ldo5 {
+ regulator-min-microvolt = <1250000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo6_reg: ldo6 {
+ regulator-min-microvolt = <1250000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo7_reg: ldo7 {
+ regulator-min-microvolt = <1250000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo8_reg: ldo8 {
+ regulator-min-microvolt = <1250000>;
+ regulator-max-microvolt = <3300000>;
+ };
+
+ ldo9_reg: ldo9 {
+ regulator-min-microvolt = <1250000>;
+ regulator-max-microvolt = <3300000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/regulator/twl-regulator.txt b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
new file mode 100644
index 000000000..75b0c1669
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/twl-regulator.txt
@@ -0,0 +1,67 @@
+TWL family of regulators
+
+Required properties:
+For twl6030 regulators/LDOs
+- compatible:
+ - "ti,twl6030-vaux1" for VAUX1 LDO
+ - "ti,twl6030-vaux2" for VAUX2 LDO
+ - "ti,twl6030-vaux3" for VAUX3 LDO
+ - "ti,twl6030-vmmc" for VMMC LDO
+ - "ti,twl6030-vpp" for VPP LDO
+ - "ti,twl6030-vusim" for VUSIM LDO
+ - "ti,twl6030-vana" for VANA LDO
+ - "ti,twl6030-vcxio" for VCXIO LDO
+ - "ti,twl6030-vdac" for VDAC LDO
+ - "ti,twl6030-vusb" for VUSB LDO
+ - "ti,twl6030-v1v8" for V1V8 LDO
+ - "ti,twl6030-v2v1" for V2V1 LDO
+ - "ti,twl6030-vdd1" for VDD1 SMPS
+ - "ti,twl6030-vdd2" for VDD2 SMPS
+ - "ti,twl6030-vdd3" for VDD3 SMPS
+For twl6032 regulators/LDOs
+- compatible:
+ - "ti,twl6032-ldo1" for LDO1 LDO
+ - "ti,twl6032-ldo2" for LDO2 LDO
+ - "ti,twl6032-ldo3" for LDO3 LDO
+ - "ti,twl6032-ldo4" for LDO4 LDO
+ - "ti,twl6032-ldo5" for LDO5 LDO
+ - "ti,twl6032-ldo6" for LDO6 LDO
+ - "ti,twl6032-ldo7" for LDO7 LDO
+ - "ti,twl6032-ldoln" for LDOLN LDO
+ - "ti,twl6032-ldousb" for LDOUSB LDO
+ - "ti,twl6032-smps3" for SMPS3 SMPS
+ - "ti,twl6032-smps4" for SMPS4 SMPS
+ - "ti,twl6032-vio" for VIO SMPS
+For twl4030 regulators/LDOs
+- compatible:
+ - "ti,twl4030-vaux1" for VAUX1 LDO
+ - "ti,twl4030-vaux2" for VAUX2 LDO
+ - "ti,twl5030-vaux2" for VAUX2 LDO
+ - "ti,twl4030-vaux3" for VAUX3 LDO
+ - "ti,twl4030-vaux4" for VAUX4 LDO
+ - "ti,twl4030-vmmc1" for VMMC1 LDO
+ - "ti,twl4030-vmmc2" for VMMC2 LDO
+ - "ti,twl4030-vpll1" for VPLL1 LDO
+ - "ti,twl4030-vpll2" for VPLL2 LDO
+ - "ti,twl4030-vsim" for VSIM LDO
+ - "ti,twl4030-vdac" for VDAC LDO
+ - "ti,twl4030-vintana2" for VINTANA2 LDO
+ - "ti,twl4030-vio" for VIO LDO
+ - "ti,twl4030-vdd1" for VDD1 SMPS
+ - "ti,twl4030-vdd2" for VDD2 SMPS
+ - "ti,twl4030-vintana1" for VINTANA1 LDO
+ - "ti,twl4030-vintdig" for VINTDIG LDO
+ - "ti,twl4030-vusb1v5" for VUSB1V5 LDO
+ - "ti,twl4030-vusb1v8" for VUSB1V8 LDO
+ - "ti,twl4030-vusb3v1" for VUSB3V1 LDO
+
+Optional properties:
+- Any optional property defined in bindings/regulator/regulator.txt
+
+Example:
+
+ xyz: regulator@0 {
+ compatible = "ti,twl6030-vaux1";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <3000000>;
+ };
diff --git a/Documentation/devicetree/bindings/regulator/vexpress.txt b/Documentation/devicetree/bindings/regulator/vexpress.txt
new file mode 100644
index 000000000..d775f7248
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/vexpress.txt
@@ -0,0 +1,32 @@
+Versatile Express voltage regulators
+------------------------------------
+
+Requires node properties:
+- "compatible" value: "arm,vexpress-volt"
+- "arm,vexpress-sysreg,func" when controlled via vexpress-sysreg
+ (see Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+ for more details)
+
+Required regulator properties:
+- "regulator-name"
+- "regulator-always-on"
+
+Optional regulator properties:
+- "regulator-min-microvolt"
+- "regulator-max-microvolt"
+
+See Documentation/devicetree/bindings/regulator/regulator.txt
+for more details about the regulator properties.
+
+When no "regulator-[min|max]-microvolt" properties are defined,
+the device is treated as fixed (or rather "read-only") regulator.
+
+Example:
+ volt@0 {
+ compatible = "arm,vexpress-volt";
+ arm,vexpress-sysreg,func = <2 0>;
+ regulator-name = "Cores";
+ regulator-min-microvolt = <800000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-always-on;
+ };
diff --git a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
new file mode 100644
index 000000000..3da0ebdba
--- /dev/null
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -0,0 +1,133 @@
+*** Reserved memory regions ***
+
+Reserved memory is specified as a node under the /reserved-memory node.
+The operating system shall exclude reserved memory from normal usage
+one can create child nodes describing particular reserved (excluded from
+normal use) memory regions. Such memory regions are usually designed for
+the special usage by various device drivers.
+
+Parameters for each memory region can be encoded into the device tree
+with the following nodes:
+
+/reserved-memory node
+---------------------
+#address-cells, #size-cells (required) - standard definition
+ - Should use the same values as the root node
+ranges (required) - standard definition
+ - Should be empty
+
+/reserved-memory/ child nodes
+-----------------------------
+Each child of the reserved-memory node specifies one or more regions of
+reserved memory. Each child node may either use a 'reg' property to
+specify a specific range of reserved memory, or a 'size' property with
+optional constraints to request a dynamically allocated block of memory.
+
+Following the generic-names recommended practice, node names should
+reflect the purpose of the node (ie. "framebuffer" or "dma-pool"). Unit
+address (@<address>) should be appended to the name if the node is a
+static allocation.
+
+Properties:
+Requires either a) or b) below.
+a) static allocation
+ reg (required) - standard definition
+b) dynamic allocation
+ size (required) - length based on parent's #size-cells
+ - Size in bytes of memory to reserve.
+ alignment (optional) - length based on parent's #size-cells
+ - Address boundary for alignment of allocation.
+ alloc-ranges (optional) - prop-encoded-array (address, length pairs).
+ - Specifies regions of memory that are
+ acceptable to allocate from.
+
+If both reg and size are present, then the reg property takes precedence
+and size is ignored.
+
+Additional properties:
+compatible (optional) - standard definition
+ - may contain the following strings:
+ - shared-dma-pool: This indicates a region of memory meant to be
+ used as a shared pool of DMA buffers for a set of devices. It can
+ be used by an operating system to instanciate the necessary pool
+ management subsystem if necessary.
+ - vendor specific string in the form <vendor>,[<device>-]<usage>
+no-map (optional) - empty property
+ - Indicates the operating system must not create a virtual mapping
+ of the region as part of its standard mapping of system memory,
+ nor permit speculative access to it under any circumstances other
+ than under the control of the device driver using the region.
+reusable (optional) - empty property
+ - The operating system can use the memory in this region with the
+ limitation that the device driver(s) owning the region need to be
+ able to reclaim it back. Typically that means that the operating
+ system can use that region to store volatile or cached data that
+ can be otherwise regenerated or migrated elsewhere.
+
+Linux implementation note:
+- If a "linux,cma-default" property is present, then Linux will use the
+ region for the default pool of the contiguous memory allocator.
+
+Device node references to reserved memory
+-----------------------------------------
+Regions in the /reserved-memory node may be referenced by other device
+nodes by adding a memory-region property to the device node.
+
+memory-region (optional) - phandle, specifier pairs to children of /reserved-memory
+
+Example
+-------
+This example defines 3 contiguous regions are defined for Linux kernel:
+one default of all device drivers (named linux,cma@72000000 and 64MiB in size),
+one dedicated to the framebuffer device (named framebuffer@78000000, 8MiB), and
+one for multimedia processing (named multimedia-memory@77000000, 64MiB).
+
+/ {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ memory {
+ reg = <0x40000000 0x40000000>;
+ };
+
+ reserved-memory {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ /* global autoconfigured region for contiguous allocations */
+ linux,cma {
+ compatible = "shared-dma-pool";
+ reusable;
+ size = <0x4000000>;
+ alignment = <0x2000>;
+ linux,cma-default;
+ };
+
+ display_reserved: framebuffer@78000000 {
+ reg = <0x78000000 0x800000>;
+ };
+
+ multimedia_reserved: multimedia@77000000 {
+ compatible = "acme,multimedia-memory";
+ reg = <0x77000000 0x4000000>;
+ };
+ };
+
+ /* ... */
+
+ fb0: video@12300000 {
+ memory-region = <&display_reserved>;
+ /* ... */
+ };
+
+ scaler: scaler@12500000 {
+ memory-region = <&multimedia_reserved>;
+ /* ... */
+ };
+
+ codec: codec@12600000 {
+ memory-region = <&multimedia_reserved>;
+ /* ... */
+ };
+};
diff --git a/Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt b/Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt
new file mode 100644
index 000000000..c8f775714
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/allwinner,sunxi-clock-reset.txt
@@ -0,0 +1,21 @@
+Allwinner sunxi Peripheral Reset Controller
+===========================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be one of the following:
+ "allwinner,sun6i-a31-ahb1-reset"
+ "allwinner,sun6i-a31-clock-reset"
+- reg: should be register base and length as documented in the
+ datasheet
+- #reset-cells: 1, see below
+
+example:
+
+ahb1_rst: reset@01c202c0 {
+ #reset-cells = <1>;
+ compatible = "allwinner,sun6i-a31-ahb1-reset";
+ reg = <0x01c202c0 0xc>;
+};
diff --git a/Documentation/devicetree/bindings/reset/brcm,bcm21664-resetmgr.txt b/Documentation/devicetree/bindings/reset/brcm,bcm21664-resetmgr.txt
new file mode 100644
index 000000000..93f31ca1e
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/brcm,bcm21664-resetmgr.txt
@@ -0,0 +1,14 @@
+Broadcom Kona Family Reset Manager
+----------------------------------
+
+The reset manager is used on the Broadcom BCM21664 SoC.
+
+Required properties:
+ - compatible: brcm,bcm21664-resetmgr
+ - reg: memory address & range
+
+Example:
+ brcm,resetmgr@35001f00 {
+ compatible = "brcm,bcm21664-resetmgr";
+ reg = <0x35001f00 0x24>;
+ };
diff --git a/Documentation/devicetree/bindings/reset/fsl,imx-src.txt b/Documentation/devicetree/bindings/reset/fsl,imx-src.txt
new file mode 100644
index 000000000..13301777e
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/fsl,imx-src.txt
@@ -0,0 +1,49 @@
+Freescale i.MX System Reset Controller
+======================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be "fsl,<chip>-src"
+- reg: should be register base and length as documented in the
+ datasheet
+- interrupts: Should contain SRC interrupt and CPU WDOG interrupt,
+ in this order.
+- #reset-cells: 1, see below
+
+example:
+
+src: src@020d8000 {
+ compatible = "fsl,imx6q-src";
+ reg = <0x020d8000 0x4000>;
+ interrupts = <0 91 0x04 0 96 0x04>;
+ #reset-cells = <1>;
+};
+
+Specifying reset lines connected to IP modules
+==============================================
+
+The system reset controller can be used to reset the GPU, VPU,
+IPU, and OpenVG IP modules on i.MX5 and i.MX6 ICs. Those device
+nodes should specify the reset line on the SRC in their resets
+property, containing a phandle to the SRC device node and a
+RESET_INDEX specifying which module to reset, as described in
+reset.txt
+
+example:
+
+ ipu1: ipu@02400000 {
+ resets = <&src 2>;
+ };
+ ipu2: ipu@02800000 {
+ resets = <&src 4>;
+ };
+
+The following RESET_INDEX values are valid for i.MX5:
+GPU_RESET 0
+VPU_RESET 1
+IPU1_RESET 2
+OPEN_VG_RESET 3
+The following additional RESET_INDEX value is valid for i.MX6:
+IPU2_RESET 4
diff --git a/Documentation/devicetree/bindings/reset/reset.txt b/Documentation/devicetree/bindings/reset/reset.txt
new file mode 100644
index 000000000..31db6ff84
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/reset.txt
@@ -0,0 +1,75 @@
+= Reset Signal Device Tree Bindings =
+
+This binding is intended to represent the hardware reset signals present
+internally in most IC (SoC, FPGA, ...) designs. Reset signals for whole
+standalone chips are most likely better represented as GPIOs, although there
+are likely to be exceptions to this rule.
+
+Hardware blocks typically receive a reset signal. This signal is generated by
+a reset provider (e.g. power management or clock module) and received by a
+reset consumer (the module being reset, or a module managing when a sub-
+ordinate module is reset). This binding exists to represent the provider and
+consumer, and provide a way to couple the two together.
+
+A reset signal is represented by the phandle of the provider, plus a reset
+specifier - a list of DT cells that represents the reset signal within the
+provider. The length (number of cells) and semantics of the reset specifier
+are dictated by the binding of the reset provider, although common schemes
+are described below.
+
+A word on where to place reset signal consumers in device tree: It is possible
+in hardware for a reset signal to affect multiple logically separate HW blocks
+at once. In this case, it would be unwise to represent this reset signal in
+the DT node of each affected HW block, since if activated, an unrelated block
+may be reset. Instead, reset signals should be represented in the DT node
+where it makes most sense to control it; this may be a bus node if all
+children of the bus are affected by the reset signal, or an individual HW
+block node for dedicated reset signals. The intent of this binding is to give
+appropriate software access to the reset signals in order to manage the HW,
+rather than to slavishly enumerate the reset signal that affects each HW
+block.
+
+= Reset providers =
+
+Required properties:
+#reset-cells: Number of cells in a reset specifier; Typically 0 for nodes
+ with a single reset output and 1 for nodes with multiple
+ reset outputs.
+
+For example:
+
+ rst: reset-controller {
+ #reset-cells = <1>;
+ };
+
+= Reset consumers =
+
+Required properties:
+resets: List of phandle and reset specifier pairs, one pair
+ for each reset signal that affects the device, or that the
+ device manages. Note: if the reset provider specifies '0' for
+ #reset-cells, then only the phandle portion of the pair will
+ appear.
+
+Optional properties:
+reset-names: List of reset signal name strings sorted in the same order as
+ the resets property. Consumers drivers will use reset-names to
+ match reset signal names with reset specifiers.
+
+For example:
+
+ device {
+ resets = <&rst 20>;
+ reset-names = "reset";
+ };
+
+This represents a device with a single reset signal named "reset".
+
+ bus {
+ resets = <&rst 10> <&rst 11> <&rst 12> <&rst 11>;
+ reset-names = "i2s1", "i2s2", "dma", "mixer";
+ };
+
+This represents a bus that controls the reset signal of each of four sub-
+ordinate devices. Consider for example a bus that fails to operate unless no
+child device has reset asserted.
diff --git a/Documentation/devicetree/bindings/reset/sirf,rstc.txt b/Documentation/devicetree/bindings/reset/sirf,rstc.txt
new file mode 100644
index 000000000..0505de742
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/sirf,rstc.txt
@@ -0,0 +1,42 @@
+CSR SiRFSoC Reset Controller
+======================================
+
+Please also refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be "sirf,prima2-rstc" or "sirf,marco-rstc"
+- reg: should be register base and length as documented in the
+ datasheet
+- #reset-cells: 1, see below
+
+example:
+
+rstc: reset-controller@88010000 {
+ compatible = "sirf,prima2-rstc";
+ reg = <0x88010000 0x1000>;
+ #reset-cells = <1>;
+};
+
+Specifying reset lines connected to IP modules
+==============================================
+
+The reset controller(rstc) manages various reset sources. This module provides
+reset signals for most blocks in system. Those device nodes should specify the
+reset line on the rstc in their resets property, containing a phandle to the
+rstc device node and a RESET_INDEX specifying which module to reset, as described
+in reset.txt.
+
+For SiRFSoC, RESET_INDEX is just reset_bit defined in SW_RST0 and SW_RST1 registers.
+For modules whose rest_bit is in SW_RST0, its RESET_INDEX is 0~31. For modules whose
+rest_bit is in SW_RST1, its RESET_INDEX is 32~63.
+
+example:
+
+vpp@90020000 {
+ compatible = "sirf,prima2-vpp";
+ reg = <0x90020000 0x10000>;
+ interrupts = <31>;
+ clocks = <&clks 35>;
+ resets = <&rstc 6>;
+};
diff --git a/Documentation/devicetree/bindings/reset/socfpga-reset.txt b/Documentation/devicetree/bindings/reset/socfpga-reset.txt
new file mode 100644
index 000000000..32c1c8bfd
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/socfpga-reset.txt
@@ -0,0 +1,13 @@
+Altera SOCFPGA Reset Manager
+
+Required properties:
+- compatible : "altr,rst-mgr"
+- reg : Should contain 1 register ranges(address and length)
+- #reset-cells: 1
+
+Example:
+ rstmgr@ffd05000 {
+ #reset-cells = <1>;
+ compatible = "altr,rst-mgr";
+ reg = <0xffd05000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt b/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
new file mode 100644
index 000000000..54ae9f747
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/st,sti-picophyreset.txt
@@ -0,0 +1,42 @@
+STMicroelectronics STi family Sysconfig Picophy SoftReset Controller
+=============================================================================
+
+This binding describes a reset controller device that is used to enable and
+disable on-chip PicoPHY USB2 phy(s) using "softreset" control bits found in
+the STi family SoC system configuration registers.
+
+The actual action taken when softreset is asserted is hardware dependent.
+However, when asserted it may not be possible to access the hardware's
+registers and after an assert/deassert sequence the hardware's previous state
+may no longer be valid.
+
+Please refer to Documentation/devicetree/bindings/reset/reset.txt
+for common reset controller binding usage.
+
+Required properties:
+- compatible: Should be "st,stih407-picophyreset"
+- #reset-cells: 1, see below
+
+Example:
+
+ picophyreset: picophyreset-controller {
+ compatible = "st,stih407-picophyreset";
+ #reset-cells = <1>;
+ };
+
+Specifying picophyreset control of devices
+=======================================
+
+Device nodes should specify the reset channel required in their "resets"
+property, containing a phandle to the picophyreset device node and an
+index specifying which channel to use, as described in
+Documentation/devicetree/bindings/reset/reset.txt.
+
+Example:
+
+ usb2_picophy0: usbpicophy@0 {
+ resets = <&picophyreset STIH407_PICOPHY0_RESET>;
+ };
+
+Macro definitions for the supported reset channels can be found in:
+include/dt-bindings/reset-controller/stih407-resets.h
diff --git a/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt b/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
new file mode 100644
index 000000000..5ab26b7e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
@@ -0,0 +1,47 @@
+STMicroelectronics STi family Sysconfig Peripheral Powerdown Reset Controller
+=============================================================================
+
+This binding describes a reset controller device that is used to enable and
+disable on-chip peripheral controllers such as USB and SATA, using
+"powerdown" control bits found in the STi family SoC system configuration
+registers. These have been grouped together into a single reset controller
+device for convenience.
+
+The actual action taken when powerdown is asserted is hardware dependent.
+However, when asserted it may not be possible to access the hardware's
+registers and after an assert/deassert sequence the hardware's previous state
+may no longer be valid.
+
+Please refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be "st,<chip>-powerdown"
+ ex: "st,stih415-powerdown", "st,stih416-powerdown"
+- #reset-cells: 1, see below
+
+example:
+
+ powerdown: powerdown-controller {
+ #reset-cells = <1>;
+ compatible = "st,stih415-powerdown";
+ };
+
+
+Specifying powerdown control of devices
+=======================================
+
+Device nodes should specify the reset channel required in their "resets"
+property, containing a phandle to the powerdown device node and an
+index specifying which channel to use, as described in reset.txt
+
+example:
+
+ usb1: usb@fe200000 {
+ resets = <&powerdown STIH41X_USB1_POWERDOWN>;
+ };
+
+Macro definitions for the supported reset channels can be found in:
+
+include/dt-bindings/reset-controller/stih415-resets.h
+include/dt-bindings/reset-controller/stih416-resets.h
diff --git a/Documentation/devicetree/bindings/reset/st,sti-softreset.txt b/Documentation/devicetree/bindings/reset/st,sti-softreset.txt
new file mode 100644
index 000000000..a8d3d3c25
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/st,sti-softreset.txt
@@ -0,0 +1,46 @@
+STMicroelectronics STi family Sysconfig Peripheral SoftReset Controller
+=============================================================================
+
+This binding describes a reset controller device that is used to enable and
+disable on-chip peripheral controllers such as USB and SATA, using
+"softreset" control bits found in the STi family SoC system configuration
+registers.
+
+The actual action taken when softreset is asserted is hardware dependent.
+However, when asserted it may not be possible to access the hardware's
+registers and after an assert/deassert sequence the hardware's previous state
+may no longer be valid.
+
+Please refer to reset.txt in this directory for common reset
+controller binding usage.
+
+Required properties:
+- compatible: Should be "st,<chip>-softreset" example:
+ "st,stih415-softreset" or "st,stih416-softreset";
+- #reset-cells: 1, see below
+
+example:
+
+ softreset: softreset-controller {
+ #reset-cells = <1>;
+ compatible = "st,stih415-softreset";
+ };
+
+
+Specifying softreset control of devices
+=======================================
+
+Device nodes should specify the reset channel required in their "resets"
+property, containing a phandle to the softreset device node and an
+index specifying which channel to use, as described in reset.txt
+
+example:
+
+ ethernet0{
+ resets = <&softreset STIH415_ETH0_SOFTRESET>;
+ };
+
+Macro definitions for the supported reset channels can be found in:
+
+include/dt-bindings/reset-controller/stih415-resets.h
+include/dt-bindings/reset-controller/stih416-resets.h
diff --git a/Documentation/devicetree/bindings/resource-names.txt b/Documentation/devicetree/bindings/resource-names.txt
new file mode 100644
index 000000000..e280fef6f
--- /dev/null
+++ b/Documentation/devicetree/bindings/resource-names.txt
@@ -0,0 +1,54 @@
+Some properties contain an ordered list of 1 or more datum which are
+normally accessed by index. However, some devices will have multiple
+values which are more naturally accessed by name. Device nodes can
+include a supplemental property for assigning names to each of the list
+items. The names property consists of a list of strings in the same
+order as the data in the resource property.
+
+The following supplemental names properties are defined.
+
+Resource Property Supplemental Names Property
+----------------- ---------------------------
+reg reg-names
+clocks clock-names
+interrupts interrupt-names
+
+Usage:
+
+The -names property must be used in conjunction with the normal resource
+property. If not it will be ignored.
+
+Examples:
+
+l4-abe {
+ compatible = "simple-bus";
+ #address-cells = <2>;
+ #size-cells = <1>;
+ ranges = <0 0 0x48000000 0x00001000>, /* MPU path */
+ <1 0 0x49000000 0x00001000>; /* L3 path */
+ mcasp {
+ compatible = "ti,mcasp";
+ reg = <0 0x10 0x10>, <0 0x20 0x10>,
+ <1 0x10 0x10>, <1 0x20 0x10>;
+ reg-names = "mpu", "dat",
+ "dma", "dma_dat";
+ interrupts = <11>, <12>;
+ interrupt-names = "rx", "tx";
+ };
+
+ timer {
+ compatible = "ti,timer";
+ reg = <0 0x40 0x10>, <1 0x40 0x10>;
+ reg-names = "mpu", "dma";
+ };
+};
+
+
+usb {
+ compatible = "ti,usb-host";
+ reg = <0x4a064000 0x800>, <0x4a064800 0x200>,
+ <0x4a064c00 0x200>;
+ reg-names = "config", "ohci", "ehci";
+ interrupts = <14>, <15>;
+ interrupt-names = "ohci", "ehci";
+};
diff --git a/Documentation/devicetree/bindings/rng/apm,rng.txt b/Documentation/devicetree/bindings/rng/apm,rng.txt
new file mode 100644
index 000000000..4dde4b06c
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/apm,rng.txt
@@ -0,0 +1,17 @@
+APM X-Gene SoC random number generator.
+
+Required properties:
+
+- compatible : should be "apm,xgene-rng"
+- reg : specifies base physical address and size of the registers map
+- clocks : phandle to clock-controller plus clock-specifier pair
+- interrupts : specify the fault interrupt for the RNG device
+
+Example:
+
+ rng: rng@10520000 {
+ compatible = "apm,xgene-rng";
+ reg = <0x0 0x10520000 0x0 0x100>;
+ interrupts = <0x0 0x41 0x4>;
+ clocks = <&rngpkaclk 0>;
+ };
diff --git a/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
new file mode 100644
index 000000000..07ccdaa68
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/brcm,bcm2835.txt
@@ -0,0 +1,13 @@
+BCM2835 Random number generator
+
+Required properties:
+
+- compatible : should be "brcm,bcm2835-rng"
+- reg : Specifies base physical address and size of the registers.
+
+Example:
+
+rng {
+ compatible = "brcm,bcm2835-rng";
+ reg = <0x7e104000 0x10>;
+};
diff --git a/Documentation/devicetree/bindings/rng/qcom,prng.txt b/Documentation/devicetree/bindings/rng/qcom,prng.txt
new file mode 100644
index 000000000..8e5853c28
--- /dev/null
+++ b/Documentation/devicetree/bindings/rng/qcom,prng.txt
@@ -0,0 +1,17 @@
+Qualcomm MSM pseudo random number generator.
+
+Required properties:
+
+- compatible : should be "qcom,prng"
+- reg : specifies base physical address and size of the registers map
+- clocks : phandle to clock-controller plus clock-specifier pair
+- clock-names : "core" clocks all registers, FIFO and circuits in PRNG IP block
+
+Example:
+
+ rng@f9bff000 {
+ compatible = "qcom,prng";
+ reg = <0xf9bff000 0x200>;
+ clocks = <&clock GCC_PRNG_AHB_CLK>;
+ clock-names = "core";
+ };
diff --git a/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt
new file mode 100644
index 000000000..be789685a
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/abracon,abx80x.txt
@@ -0,0 +1,30 @@
+Abracon ABX80X I2C ultra low power RTC/Alarm chip
+
+The Abracon ABX80X family consist of the ab0801, ab0803, ab0804, ab0805, ab1801,
+ab1803, ab1804 and ab1805. The ab0805 is the superset of ab080x and the ab1805
+is the superset of ab180x.
+
+Required properties:
+
+ - "compatible": should one of:
+ "abracon,abx80x"
+ "abracon,ab0801"
+ "abracon,ab0803"
+ "abracon,ab0804"
+ "abracon,ab0805"
+ "abracon,ab1801"
+ "abracon,ab1803"
+ "abracon,ab1804"
+ "abracon,ab1805"
+ Using "abracon,abx80x" will enable chip autodetection.
+ - "reg": I2C bus address of the device
+
+Optional properties:
+
+The abx804 and abx805 have a trickle charger that is able to charge the
+connected battery or supercap. Both the following properties have to be defined
+and valid to enable charging:
+
+ - "abracon,tc-diode": should be "standard" (0.6V) or "schottky" (0.3V)
+ - "abracon,tc-resistor": should be <0>, <3>, <6> or <11>. 0 disables the output
+ resistor, the other values are in ohm.
diff --git a/Documentation/devicetree/bindings/rtc/armada-380-rtc.txt b/Documentation/devicetree/bindings/rtc/armada-380-rtc.txt
new file mode 100644
index 000000000..2eb9d4ee7
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/armada-380-rtc.txt
@@ -0,0 +1,22 @@
+* Real Time Clock of the Armada 38x SoCs
+
+RTC controller for the Armada 38x SoCs
+
+Required properties:
+- compatible : Should be "marvell,armada-380-rtc"
+- reg: a list of base address and size pairs, one for each entry in
+ reg-names
+- reg names: should contain:
+ * "rtc" for the RTC registers
+ * "rtc-soc" for the SoC related registers and among them the one
+ related to the interrupt.
+- interrupts: IRQ line for the RTC.
+
+Example:
+
+rtc@a3800 {
+ compatible = "marvell,armada-380-rtc";
+ reg = <0xa3800 0x20>, <0x184a0 0x0c>;
+ reg-names = "rtc", "rtc-soc";
+ interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt
new file mode 100644
index 000000000..34c150577
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/atmel,at91rm9200-rtc.txt
@@ -0,0 +1,15 @@
+Atmel AT91RM9200 Real Time Clock
+
+Required properties:
+- compatible: should be: "atmel,at91rm9200-rtc" or "atmel,at91sam9x5-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: rtc alarm/event interrupt
+
+Example:
+
+rtc@fffffe00 {
+ compatible = "atmel,at91rm9200-rtc";
+ reg = <0xfffffe00 0x100>;
+ interrupts = <1 4 7>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt b/Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt
new file mode 100644
index 000000000..6ae79d184
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/atmel,at91sam9-rtc.txt
@@ -0,0 +1,23 @@
+Atmel AT91SAM9260 Real Time Timer
+
+Required properties:
+- compatible: should be: "atmel,at91sam9260-rtt"
+- reg: should encode the memory region of the RTT controller
+- interrupts: rtt alarm/event interrupt
+- clocks: should contain the 32 KHz slow clk that will drive the RTT block.
+- atmel,rtt-rtc-time-reg: should encode the GPBR register used to store
+ the time base when the RTT is used as an RTC.
+ The first cell should point to the GPBR node and the second one
+ encode the offset within the GPBR block (or in other words, the
+ GPBR register used to store the time base).
+
+
+Example:
+
+rtt@fffffd20 {
+ compatible = "atmel,at91sam9260-rtt";
+ reg = <0xfffffd20 0x10>;
+ interrupts = <1 4 7>;
+ clocks = <&clk32k>;
+ atmel,rtt-rtc-time-reg = <&gpbr 0x0>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/dallas,ds1339.txt b/Documentation/devicetree/bindings/rtc/dallas,ds1339.txt
new file mode 100644
index 000000000..916f57601
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/dallas,ds1339.txt
@@ -0,0 +1,18 @@
+* Dallas DS1339 I2C Serial Real-Time Clock
+
+Required properties:
+- compatible: Should contain "dallas,ds1339".
+- reg: I2C address for chip
+
+Optional properties:
+- trickle-resistor-ohms : Selected resistor for trickle charger
+ Values usable for ds1339 are 250, 2000, 4000
+ Should be given if trickle charger should be enabled
+- trickle-diode-disable : Do not use internal trickle charger diode
+ Should be given if internal trickle charger diode should be disabled
+Example:
+ ds1339: rtc@68 {
+ compatible = "dallas,ds1339";
+ trickle-resistor-ohms = <250>;
+ reg = <0x68>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/digicolor-rtc.txt b/Documentation/devicetree/bindings/rtc/digicolor-rtc.txt
new file mode 100644
index 000000000..d46498601
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/digicolor-rtc.txt
@@ -0,0 +1,17 @@
+Conexant Digicolor Real Time Clock controller
+
+This binding currently supports the CX92755 SoC.
+
+Required properties:
+- compatible: should be "cnxt,cx92755-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: rtc alarm interrupt
+
+Example:
+
+ rtc@f0000c30 {
+ compatible = "cnxt,cx92755-rtc";
+ reg = <0xf0000c30 0x18>;
+ interrupts = <25>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/dw-apb.txt b/Documentation/devicetree/bindings/rtc/dw-apb.txt
new file mode 100644
index 000000000..c703d51ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/dw-apb.txt
@@ -0,0 +1,32 @@
+* Designware APB timer
+
+Required properties:
+- compatible: One of:
+ "snps,dw-apb-timer"
+ "snps,dw-apb-timer-sp" <DEPRECATED>
+ "snps,dw-apb-timer-osc" <DEPRECATED>
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: IRQ line for the timer.
+- either clocks+clock-names or clock-frequency properties
+
+Optional properties:
+- clocks : list of clock specifiers, corresponding to entries in
+ the clock-names property;
+- clock-names : should contain "timer" and "pclk" entries, matching entries
+ in the clocks property.
+- clock-frequency: The frequency in HZ of the timer.
+- clock-freq: For backwards compatibility with picoxcell
+
+If using the clock specifiers, the pclk clock is optional, as not all
+systems may use one.
+
+
+Example:
+ timer@ffe00000 {
+ compatible = "snps,dw-apb-timer";
+ interrupts = <0 170 4>;
+ reg = <0xffe00000 0x1000>;
+ clocks = <&timer_clk>, <&timer_pclk>;
+ clock-names = "timer", "pclk";
+ };
diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt
new file mode 100644
index 000000000..5c199ee04
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt
@@ -0,0 +1,30 @@
+Haoyu Microelectronics HYM8563 Real Time Clock
+
+The HYM8563 provides basic rtc and alarm functionality
+as well as a clock output of up to 32kHz.
+
+Required properties:
+- compatible: should be: "haoyu,hym8563"
+- reg: i2c address
+- interrupts: rtc alarm/event interrupt
+- #clock-cells: the value should be 0
+
+Optional properties:
+- clock-output-names: From common clock binding
+
+Example:
+
+hym8563: hym8563@51 {
+ compatible = "haoyu,hym8563";
+ reg = <0x51>;
+
+ interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
+
+ #clock-cells = <0>;
+};
+
+device {
+...
+ clocks = <&hym8563>;
+...
+};
diff --git a/Documentation/devicetree/bindings/rtc/imxdi-rtc.txt b/Documentation/devicetree/bindings/rtc/imxdi-rtc.txt
new file mode 100644
index 000000000..c9d80d7da
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/imxdi-rtc.txt
@@ -0,0 +1,17 @@
+* i.MX25 Real Time Clock controller
+
+This binding supports the following chips: i.MX25, i.MX53
+
+Required properties:
+- compatible: should be: "fsl,imx25-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: rtc alarm interrupt
+
+Example:
+
+rtc@80056000 {
+ compatible = "fsl,imx53-rtc", "fsl,imx25-rtc";
+ reg = <0x80056000 2000>;
+ interrupts = <29>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/isil,isl12057.txt b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt
new file mode 100644
index 000000000..501c39cea
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/isil,isl12057.txt
@@ -0,0 +1,78 @@
+Intersil ISL12057 I2C RTC/Alarm chip
+
+ISL12057 is a trivial I2C device (it has simple device tree bindings,
+consisting of a compatible field, an address and possibly an interrupt
+line).
+
+Nonetheless, it also supports an option boolean property
+("isil,irq2-can-wakeup-machine") to handle the specific use-case found
+on at least three in-tree users of the chip (NETGEAR ReadyNAS 102, 104
+and 2120 ARM-based NAS); On those devices, the IRQ#2 pin of the chip
+(associated with the alarm supported by the driver) is not connected
+to the SoC but to a PMIC. It allows the device to be powered up when
+RTC alarm rings. In order to mark the device has a wakeup source and
+get access to the 'wakealarm' sysfs entry, this specific property can
+be set when the IRQ#2 pin of the chip is not connected to the SoC but
+can wake up the device.
+
+Required properties supported by the device:
+
+ - "compatible": must be "isil,isl12057"
+ - "reg": I2C bus address of the device
+
+Optional properties:
+
+ - "isil,irq2-can-wakeup-machine": mark the chip as a wakeup source,
+ independently of the availability of an IRQ line connected to the
+ SoC.
+
+ - "interrupt-parent", "interrupts": for passing the interrupt line
+ of the SoC connected to IRQ#2 of the RTC chip.
+
+
+Example isl12057 node without IRQ#2 pin connected (no alarm support):
+
+ isl12057: isl12057@68 {
+ compatible = "isil,isl12057";
+ reg = <0x68>;
+ };
+
+
+Example isl12057 node with IRQ#2 pin connected to main SoC via MPP6 (note
+that the pinctrl-related properties below are given for completeness and
+may not be required or may be different depending on your system or
+SoC, and the main function of the MPP used as IRQ line, i.e.
+"interrupt-parent" and "interrupts" are usually sufficient):
+
+ pinctrl {
+ ...
+
+ rtc_alarm_pin: rtc_alarm_pin {
+ marvell,pins = "mpp6";
+ marvell,function = "gpio";
+ };
+
+ ...
+
+ };
+
+ ...
+
+ isl12057: isl12057@68 {
+ compatible = "isil,isl12057";
+ reg = <0x68>;
+ pinctrl-0 = <&rtc_alarm_pin>;
+ pinctrl-names = "default";
+ interrupt-parent = <&gpio0>;
+ interrupts = <6 IRQ_TYPE_EDGE_FALLING>;
+ };
+
+
+Example isl12057 node without IRQ#2 pin connected to the SoC but to a
+PMIC, allowing the device to be started based on configured alarm:
+
+ isl12057: isl12057@68 {
+ compatible = "isil,isl12057";
+ reg = <0x68>;
+ isil,irq2-can-wakeup-machine;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt b/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt
new file mode 100644
index 000000000..a87a1e9bc
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/lpc32xx-rtc.txt
@@ -0,0 +1,15 @@
+* NXP LPC32xx SoC Real Time Clock controller
+
+Required properties:
+- compatible: must be "nxp,lpc3220-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The RTC interrupt
+
+Example:
+
+ rtc@40024000 {
+ compatible = "nxp,lpc3220-rtc";
+ reg = <0x40024000 0x1000>;
+ interrupts = <52 0>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt b/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt
new file mode 100644
index 000000000..d0f937c35
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/maxim,ds1742.txt
@@ -0,0 +1,12 @@
+* Maxim (Dallas) DS1742/DS1743 Real Time Clock
+
+Required properties:
+- compatible: Should contain "maxim,ds1742".
+- reg: Physical base address of the RTC and length of memory
+ mapped region.
+
+Example:
+ rtc: rtc@10000000 {
+ compatible = "maxim,ds1742";
+ reg = <0x10000000 0x800>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt b/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt
new file mode 100644
index 000000000..c9d3ac147
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt
@@ -0,0 +1,17 @@
+MOXA ART real-time clock
+
+Required properties:
+
+- compatible : Should be "moxa,moxart-rtc"
+- gpio-rtc-sclk : RTC sclk gpio, with zero flags
+- gpio-rtc-data : RTC data gpio, with zero flags
+- gpio-rtc-reset : RTC reset gpio, with zero flags
+
+Example:
+
+ rtc: rtc {
+ compatible = "moxa,moxart-rtc";
+ gpio-rtc-sclk = <&gpio 5 0>;
+ gpio-rtc-data = <&gpio 6 0>;
+ gpio-rtc-reset = <&gpio 7 0>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt b/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt
new file mode 100644
index 000000000..b7d98ed3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/nvidia,tegra20-rtc.txt
@@ -0,0 +1,24 @@
+NVIDIA Tegra20 real-time clock
+
+The Tegra RTC maintains seconds and milliseconds counters, and five alarm
+registers. The alarms and other interrupts may wake the system from low-power
+state.
+
+Required properties:
+
+- compatible : For Tegra20, must contain "nvidia,tegra20-rtc". Otherwise,
+ must contain '"nvidia,<chip>-rtc", "nvidia,tegra20-rtc"', where <chip>
+ can be tegra30, tegra114, tegra124, or tegra132.
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A single interrupt specifier.
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+
+Example:
+
+timer {
+ compatible = "nvidia,tegra20-rtc";
+ reg = <0x7000e000 0x100>;
+ interrupts = <0 2 0x04>;
+ clocks = <&tegra_car 4>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt
new file mode 100644
index 000000000..5cbc0b145
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/nxp,rtc-2123.txt
@@ -0,0 +1,16 @@
+NXP PCF2123 SPI Real Time Clock
+
+Required properties:
+- compatible: should be: "nxp,rtc-pcf2123"
+- reg: should be the SPI slave chipselect address
+
+Optional properties:
+- spi-cs-high: PCF2123 needs chipselect high
+
+Example:
+
+rtc: nxp,rtc-pcf2123@3 {
+ compatible = "nxp,rtc-pcf2123"
+ reg = <3>
+ spi-cs-high;
+};
diff --git a/Documentation/devicetree/bindings/rtc/olpc-xo1-rtc.txt b/Documentation/devicetree/bindings/rtc/olpc-xo1-rtc.txt
new file mode 100644
index 000000000..a2891ceb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/olpc-xo1-rtc.txt
@@ -0,0 +1,5 @@
+OLPC XO-1 RTC
+~~~~~~~~~~~~~
+
+Required properties:
+ - compatible : "olpc,xo1-rtc"
diff --git a/Documentation/devicetree/bindings/rtc/orion-rtc.txt b/Documentation/devicetree/bindings/rtc/orion-rtc.txt
new file mode 100644
index 000000000..3bf63ffa5
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/orion-rtc.txt
@@ -0,0 +1,18 @@
+* Mvebu Real Time Clock
+
+RTC controller for the Kirkwood, the Dove, the Armada 370 and the
+Armada XP SoCs
+
+Required properties:
+- compatible : Should be "marvell,orion-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: IRQ line for the RTC.
+
+Example:
+
+rtc@10300 {
+ compatible = "marvell,orion-rtc";
+ reg = <0xd0010300 0x20>;
+ interrupts = <50>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/pxa-rtc.txt b/Documentation/devicetree/bindings/rtc/pxa-rtc.txt
new file mode 100644
index 000000000..8c6672a1b
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/pxa-rtc.txt
@@ -0,0 +1,14 @@
+* PXA RTC
+
+PXA specific RTC driver.
+
+Required properties:
+- compatible : Should be "marvell,pxa-rtc"
+
+Examples:
+
+rtc@40900000 {
+ compatible = "marvell,pxa-rtc";
+ reg = <0x40900000 0x3c>;
+ interrupts = <30 31>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/rtc-cmos.txt b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
new file mode 100644
index 000000000..7382989b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-cmos.txt
@@ -0,0 +1,28 @@
+ Motorola mc146818 compatible RTC
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Required properties:
+ - compatible : "motorola,mc146818"
+ - reg : should contain registers location and length.
+
+Optional properties:
+ - interrupts : should contain interrupt.
+ - interrupt-parent : interrupt source phandle.
+ - ctrl-reg : Contains the initial value of the control register also
+ called "Register B".
+ - freq-reg : Contains the initial value of the frequency register also
+ called "Regsiter A".
+
+"Register A" and "B" are usually initialized by the firmware (BIOS for
+instance). If this is not done, it can be performed by the driver.
+
+ISA Example:
+
+ rtc@70 {
+ compatible = "motorola,mc146818";
+ interrupts = <8 3>;
+ interrupt-parent = <&ioapic1>;
+ ctrl-reg = <2>;
+ freq-reg = <0x26>;
+ reg = <1 0x70 2>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-omap.txt b/Documentation/devicetree/bindings/rtc/rtc-omap.txt
new file mode 100644
index 000000000..4ba4dbd34
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-omap.txt
@@ -0,0 +1,28 @@
+TI Real Time Clock
+
+Required properties:
+- compatible:
+ - "ti,da830-rtc" - for RTC IP used similar to that on DA8xx SoC family.
+ - "ti,am3352-rtc" - for RTC IP used similar to that on AM335x SoC family.
+ This RTC IP has special WAKE-EN Register to enable
+ Wakeup generation for event Alarm. It can also be
+ used to control an external PMIC via the
+ pmic_power_en pin.
+- reg: Address range of rtc register set
+- interrupts: rtc timer, alarm interrupts in order
+- interrupt-parent: phandle for the interrupt controller
+
+Optional properties:
+- system-power-controller: whether the rtc is controlling the system power
+ through pmic_power_en
+
+Example:
+
+rtc@1c23000 {
+ compatible = "ti,da830-rtc";
+ reg = <0x23000 0x1000>;
+ interrupts = <19
+ 19>;
+ interrupt-parent = <&intc>;
+ system-power-controller;
+};
diff --git a/Documentation/devicetree/bindings/rtc/rtc-opal.txt b/Documentation/devicetree/bindings/rtc/rtc-opal.txt
new file mode 100644
index 000000000..af87e5eca
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-opal.txt
@@ -0,0 +1,16 @@
+IBM OPAL real-time clock
+------------------------
+
+Required properties:
+- comapatible: Should be "ibm,opal-rtc"
+
+Optional properties:
+- has-tpo: Decides if the wakeup is supported or not.
+
+Example:
+ rtc {
+ compatible = "ibm,opal-rtc";
+ has-tpo;
+ phandle = <0x10000029>;
+ linux,phandle = <0x10000029>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-palmas.txt b/Documentation/devicetree/bindings/rtc/rtc-palmas.txt
new file mode 100644
index 000000000..adbccc0a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/rtc-palmas.txt
@@ -0,0 +1,33 @@
+Palmas RTC controller bindings
+
+Required properties:
+- compatible:
+ - "ti,palmas-rtc" for palma series of the RTC controller
+- interrupt-parent: Parent interrupt device, must be handle of palmas node.
+- interrupts: Interrupt number of RTC submodule on device.
+
+Optional properties:
+
+- ti,backup-battery-chargeable: The Palmas series device like TPS65913 or
+ TPS80036 supports the backup battery for powering the RTC when main
+ battery is removed or in very low power state. The backup battery
+ can be chargeable or non-chargeable. This flag will tells whether
+ battery is chargeable or not. If charging battery then driver can
+ enable the charging.
+- ti,backup-battery-charge-high-current: Enable high current charging in
+ backup battery. Device supports the < 100mA and > 100mA charging.
+ The high current will be > 100mA. Absence of this property will
+ charge battery to lower current i.e. < 100mA.
+
+Example:
+ palmas: tps65913@58 {
+ ...
+ palmas_rtc: rtc {
+ compatible = "ti,palmas-rtc";
+ interrupt-parent = <&palmas>;
+ interrupts = <8 0>;
+ ti,backup-battery-chargeable;
+ ti,backup-battery-charge-high-current;
+ };
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/rtc/s3c-rtc.txt b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
new file mode 100644
index 000000000..ab757b84d
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/s3c-rtc.txt
@@ -0,0 +1,23 @@
+* Samsung's S3C Real Time Clock controller
+
+Required properties:
+- compatible: should be one of the following.
+ * "samsung,s3c2410-rtc" - for controllers compatible with s3c2410 rtc.
+ * "samsung,s3c2416-rtc" - for controllers compatible with s3c2416 rtc.
+ * "samsung,s3c2443-rtc" - for controllers compatible with s3c2443 rtc.
+ * "samsung,s3c6410-rtc" - for controllers compatible with s3c6410 rtc.
+ * "samsung,exynos3250-rtc" - for controllers compatible with exynos3250 rtc.
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: Two interrupt numbers to the cpu should be specified. First
+ interrupt number is the rtc alarm interrupt and second interrupt number
+ is the rtc tick interrupt. The number of cells representing a interrupt
+ depends on the parent interrupt controller.
+
+Example:
+
+ rtc@10070000 {
+ compatible = "samsung,s3c6410-rtc";
+ reg = <0x10070000 0x100>;
+ interrupts = <44 0 45 0>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/sa1100-rtc.txt b/Documentation/devicetree/bindings/rtc/sa1100-rtc.txt
new file mode 100644
index 000000000..0cda19ad4
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/sa1100-rtc.txt
@@ -0,0 +1,17 @@
+* Marvell Real Time Clock controller
+
+Required properties:
+- compatible: should be "mrvl,sa1100-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: Should be two. The first interrupt number is the rtc alarm
+ interrupt and the second interrupt number is the rtc hz interrupt.
+- interrupt-names: Assign name of irq resource.
+
+Example:
+ rtc: rtc@d4010000 {
+ compatible = "mrvl,mmp-rtc";
+ reg = <0xd4010000 0x1000>;
+ interrupts = <5>, <6>;
+ interrupt-name = "rtc 1Hz", "rtc alarm";
+ };
diff --git a/Documentation/devicetree/bindings/rtc/snvs-rtc.txt b/Documentation/devicetree/bindings/rtc/snvs-rtc.txt
new file mode 100644
index 000000000..fb61ed77a
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/snvs-rtc.txt
@@ -0,0 +1 @@
+See Documentation/devicetree/bindings/crypto/fsl-sec4.txt for details.
diff --git a/Documentation/devicetree/bindings/rtc/spear-rtc.txt b/Documentation/devicetree/bindings/rtc/spear-rtc.txt
new file mode 100644
index 000000000..ca67ac621
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/spear-rtc.txt
@@ -0,0 +1,17 @@
+* SPEAr RTC
+
+Required properties:
+- compatible : "st,spear600-rtc"
+- reg : Address range of the rtc registers
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupt: Should contain the rtc interrupt number
+
+Example:
+
+ rtc@fc000000 {
+ compatible = "st,spear600-rtc";
+ reg = <0xfc000000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <12>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/stmp3xxx-rtc.txt b/Documentation/devicetree/bindings/rtc/stmp3xxx-rtc.txt
new file mode 100644
index 000000000..fa6a94226
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/stmp3xxx-rtc.txt
@@ -0,0 +1,21 @@
+* STMP3xxx/i.MX28 Time Clock controller
+
+Required properties:
+- compatible: should be one of the following.
+ * "fsl,stmp3xxx-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: rtc alarm interrupt
+
+Optional properties:
+- stmp,crystal-freq: override crystal frequency as determined from fuse bits.
+ Only <32000> and <32768> are possible for the hardware. Use <0> for
+ "no crystal".
+
+Example:
+
+rtc@80056000 {
+ compatible = "fsl,imx28-rtc", "fsl,stmp3xxx-rtc";
+ reg = <0x80056000 2000>;
+ interrupts = <29>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/sun6i-rtc.txt b/Documentation/devicetree/bindings/rtc/sun6i-rtc.txt
new file mode 100644
index 000000000..f007e428a
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/sun6i-rtc.txt
@@ -0,0 +1,17 @@
+* sun6i Real Time Clock
+
+RTC controller for the Allwinner A31
+
+Required properties:
+- compatible : Should be "allwinner,sun6i-a31-rtc"
+- reg : physical base address of the controller and length of
+ memory mapped region.
+- interrupts : IRQ lines for the RTC alarm 0 and alarm 1, in that order.
+
+Example:
+
+rtc: rtc@01f00000 {
+ compatible = "allwinner,sun6i-a31-rtc";
+ reg = <0x01f00000 0x54>;
+ interrupts = <0 40 4>, <0 41 4>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/sunxi-rtc.txt b/Documentation/devicetree/bindings/rtc/sunxi-rtc.txt
new file mode 100644
index 000000000..6983aad37
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/sunxi-rtc.txt
@@ -0,0 +1,17 @@
+* sun4i/sun7i Real Time Clock
+
+RTC controller for the Allwinner A10/A20
+
+Required properties:
+- compatible : Should be "allwinner,sun4i-a10-rtc" or "allwinner,sun7i-a20-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: IRQ line for the RTC.
+
+Example:
+
+rtc: rtc@01c20d00 {
+ compatible = "allwinner,sun4i-a10-rtc";
+ reg = <0x01c20d00 0x20>;
+ interrupts = <24>;
+};
diff --git a/Documentation/devicetree/bindings/rtc/twl-rtc.txt b/Documentation/devicetree/bindings/rtc/twl-rtc.txt
new file mode 100644
index 000000000..596e0c97b
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/twl-rtc.txt
@@ -0,0 +1,12 @@
+* TI twl RTC
+
+The TWL family (twl4030/6030) contains a RTC.
+
+Required properties:
+- compatible : Should be twl4030-rtc
+
+Examples:
+
+rtc@0 {
+ compatible = "ti,twl4030-rtc";
+};
diff --git a/Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt b/Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt
new file mode 100644
index 000000000..3c0484c49
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/via,vt8500-rtc.txt
@@ -0,0 +1,15 @@
+VIA/Wondermedia VT8500 Realtime Clock Controller
+-----------------------------------------------------
+
+Required properties:
+- compatible : "via,vt8500-rtc"
+- reg : Should contain 1 register ranges(address and length)
+- interrupts : alarm interrupt
+
+Example:
+
+ rtc@d8100000 {
+ compatible = "via,vt8500-rtc";
+ reg = <0xd8100000 0x10000>;
+ interrupts = <48>;
+ };
diff --git a/Documentation/devicetree/bindings/rtc/xgene-rtc.txt b/Documentation/devicetree/bindings/rtc/xgene-rtc.txt
new file mode 100644
index 000000000..fd195c358
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/xgene-rtc.txt
@@ -0,0 +1,28 @@
+* APM X-Gene Real Time Clock
+
+RTC controller for the APM X-Gene Real Time Clock
+
+Required properties:
+- compatible : Should be "apm,xgene-rtc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: IRQ line for the RTC.
+- #clock-cells: Should be 1.
+- clocks: Reference to the clock entry.
+
+Example:
+
+rtcclk: rtcclk {
+ compatible = "fixed-clock";
+ #clock-cells = <1>;
+ clock-frequency = <100000000>;
+ clock-output-names = "rtcclk";
+};
+
+rtc: rtc@10510000 {
+ compatible = "apm,xgene-rtc";
+ reg = <0x0 0x10510000 0x0 0x400>;
+ interrupts = <0x0 0x46 0x4>;
+ #clock-cells = <1>;
+ clocks = <&rtcclk 0>;
+};
diff --git a/Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt b/Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt
new file mode 100644
index 000000000..3ad115efe
--- /dev/null
+++ b/Documentation/devicetree/bindings/security/tpm/st33zp24-i2c.txt
@@ -0,0 +1,36 @@
+* STMicroelectronics SAS. ST33ZP24 TPM SoC
+
+Required properties:
+- compatible: Should be "st,st33zp24-i2c".
+- clock-frequency: I²C work frequency.
+- reg: address on the bus
+
+Optional ST33ZP24 Properties:
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- lpcpd-gpios: Output GPIO pin used for ST33ZP24 power management D1/D2 state.
+If set, power must be present when the platform is going into sleep/hibernate mode.
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBoard xM with ST33ZP24 on I2C2):
+
+&i2c2 {
+
+ status = "okay";
+
+ st33zp24: st33zp24@13 {
+
+ compatible = "st,st33zp24-i2c";
+
+ reg = <0x13>;
+ clock-frequency = <400000>;
+
+ interrupt-parent = <&gpio5>;
+ interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
+
+ lpcpd-gpios = <&gpio5 15 GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/security/tpm/st33zp24-spi.txt b/Documentation/devicetree/bindings/security/tpm/st33zp24-spi.txt
new file mode 100644
index 000000000..158b0165e
--- /dev/null
+++ b/Documentation/devicetree/bindings/security/tpm/st33zp24-spi.txt
@@ -0,0 +1,34 @@
+* STMicroelectronics SAS. ST33ZP24 TPM SoC
+
+Required properties:
+- compatible: Should be "st,st33zp24-spi".
+- spi-max-frequency: Maximum SPI frequency (<= 10000000).
+
+Optional ST33ZP24 Properties:
+- interrupt-parent: phandle for the interrupt gpio controller
+- interrupts: GPIO interrupt to which the chip is connected
+- lpcpd-gpios: Output GPIO pin used for ST33ZP24 power management D1/D2 state.
+If set, power must be present when the platform is going into sleep/hibernate mode.
+
+Optional SoC Specific Properties:
+- pinctrl-names: Contains only one value - "default".
+- pintctrl-0: Specifies the pin control groups used for this controller.
+
+Example (for ARM-based BeagleBoard xM with ST33ZP24 on SPI4):
+
+&mcspi4 {
+
+ status = "okay";
+
+ st33zp24@0 {
+
+ compatible = "st,st33zp24-spi";
+
+ spi-max-frequency = <10000000>;
+
+ interrupt-parent = <&gpio5>;
+ interrupts = <7 IRQ_TYPE_LEVEL_HIGH>;
+
+ lpcpd-gpios = <&gpio5 15 GPIO_ACTIVE_HIGH>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/serial/8250.txt b/Documentation/devicetree/bindings/serial/8250.txt
new file mode 100644
index 000000000..91d5ab0e6
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/8250.txt
@@ -0,0 +1,66 @@
+* UART (Universal Asynchronous Receiver/Transmitter)
+
+Required properties:
+- compatible : one of:
+ - "ns8250"
+ - "ns16450"
+ - "ns16550a"
+ - "ns16550"
+ - "ns16750"
+ - "ns16850"
+ - For Tegra20, must contain "nvidia,tegra20-uart"
+ - For other Tegra, must contain '"nvidia,<chip>-uart",
+ "nvidia,tegra20-uart"' where <chip> is tegra30, tegra114, tegra124,
+ tegra132, or tegra210.
+ - "nxp,lpc3220-uart"
+ - "ralink,rt2880-uart"
+ - "ibm,qpace-nwp-serial"
+ - "altr,16550-FIFO32"
+ - "altr,16550-FIFO64"
+ - "altr,16550-FIFO128"
+ - "fsl,16550-FIFO64"
+ - "fsl,ns16550"
+ - "serial" if the port type is unknown.
+- reg : offset and length of the register set for the device.
+- interrupts : should contain uart interrupt.
+- clock-frequency : the input clock frequency for the UART
+ or
+ clocks phandle to refer to the clk used as per Documentation/devicetree
+ /bindings/clock/clock-bindings.txt
+
+Optional properties:
+- current-speed : the current active speed of the UART.
+- reg-offset : offset to apply to the mapbase from the start of the registers.
+- reg-shift : quantity to shift the register offsets by.
+- reg-io-width : the size (in bytes) of the IO accesses that should be
+ performed on the device. There are some systems that require 32-bit
+ accesses to the UART (e.g. TI davinci).
+- used-by-rtas : set to indicate that the port is in use by the OpenFirmware
+ RTAS and should not be registered.
+- no-loopback-test: set to indicate that the port does not implements loopback
+ test mode
+- fifo-size: the fifo size of the UART.
+- auto-flow-control: one way to enable automatic flow control support. The
+ driver is allowed to detect support for the capability even without this
+ property.
+
+Note:
+* fsl,ns16550:
+ ------------
+ Freescale DUART is very similar to the PC16552D (and to a
+ pair of NS16550A), albeit with some nonstandard behavior such as
+ erratum A-004737 (relating to incorrect BRK handling).
+
+ Represents a single port that is compatible with the DUART found
+ on many Freescale chips (examples include mpc8349, mpc8548,
+ mpc8641d, p4080 and ls2085a).
+
+Example:
+
+ uart@80230000 {
+ compatible = "ns8250";
+ reg = <0x80230000 0x100>;
+ clock-frequency = <3686400>;
+ interrupts = <10>;
+ reg-shift = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/altera_jtaguart.txt b/Documentation/devicetree/bindings/serial/altera_jtaguart.txt
new file mode 100644
index 000000000..55a901051
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/altera_jtaguart.txt
@@ -0,0 +1,5 @@
+Altera JTAG UART
+
+Required properties:
+- compatible : should be "ALTR,juart-1.0" <DEPRECATED>
+- compatible : should be "altr,juart-1.0"
diff --git a/Documentation/devicetree/bindings/serial/altera_uart.txt b/Documentation/devicetree/bindings/serial/altera_uart.txt
new file mode 100644
index 000000000..81bf7ffb1
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/altera_uart.txt
@@ -0,0 +1,8 @@
+Altera UART
+
+Required properties:
+- compatible : should be "ALTR,uart-1.0" <DEPRECATED>
+- compatible : should be "altr,uart-1.0"
+
+Optional properties:
+- clock-frequency : frequency of the clock input to the UART
diff --git a/Documentation/devicetree/bindings/serial/arc-uart.txt b/Documentation/devicetree/bindings/serial/arc-uart.txt
new file mode 100644
index 000000000..5cae2eb68
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/arc-uart.txt
@@ -0,0 +1,26 @@
+* Synopsys ARC UART : Non standard UART used in some of the ARC FPGA boards
+
+Required properties:
+- compatible : "snps,arc-uart"
+- reg : offset and length of the register set for the device.
+- interrupts : device interrupt
+- clock-frequency : the input clock frequency for the UART
+- current-speed : baud rate for UART
+
+e.g.
+
+arcuart0: serial@c0fc1000 {
+ compatible = "snps,arc-uart";
+ reg = <0xc0fc1000 0x100>;
+ interrupts = <5>;
+ clock-frequency = <80000000>;
+ current-speed = <115200>;
+ status = "okay";
+};
+
+Note: Each port should have an alias correctly numbered in "aliases" node.
+
+e.g.
+aliases {
+ serial0 = &arcuart0;
+};
diff --git a/Documentation/devicetree/bindings/serial/atmel-usart.txt b/Documentation/devicetree/bindings/serial/atmel-usart.txt
new file mode 100644
index 000000000..90787aa2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/atmel-usart.txt
@@ -0,0 +1,60 @@
+* Atmel Universal Synchronous Asynchronous Receiver/Transmitter (USART)
+
+Required properties:
+- compatible: Should be "atmel,<chip>-usart" or "atmel,<chip>-dbgu"
+ The compatible <chip> indicated will be the first SoC to support an
+ additional mode or an USART new feature.
+ For the dbgu UART, use "atmel,<chip>-dbgu", "atmel,<chip>-usart"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt
+- clock-names: tuple listing input clock names.
+ Required elements: "usart"
+- clocks: phandles to input clocks.
+
+Optional properties:
+- atmel,use-dma-rx: use of PDC or DMA for receiving data
+- atmel,use-dma-tx: use of PDC or DMA for transmitting data
+- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD line respectively.
+ It will use specified PIO instead of the peripheral function pin for the USART feature.
+ If unsure, don't specify this property.
+- add dma bindings for dma transfer:
+ - dmas: DMA specifier, consisting of a phandle to DMA controller node,
+ memory peripheral interface and USART DMA channel ID, FIFO configuration.
+ Refer to dma.txt and atmel-dma.txt for details.
+ - dma-names: "rx" for RX channel, "tx" for TX channel.
+
+<chip> compatible description:
+- at91rm9200: legacy USART support
+- at91sam9260: generic USART implementation for SAM9 SoCs
+
+Example:
+- use PDC:
+ usart0: serial@fff8c000 {
+ compatible = "atmel,at91sam9260-usart";
+ reg = <0xfff8c000 0x4000>;
+ interrupts = <7>;
+ clocks = <&usart0_clk>;
+ clock-names = "usart";
+ atmel,use-dma-rx;
+ atmel,use-dma-tx;
+ rts-gpios = <&pioD 15 GPIO_ACTIVE_LOW>;
+ cts-gpios = <&pioD 16 GPIO_ACTIVE_LOW>;
+ dtr-gpios = <&pioD 17 GPIO_ACTIVE_LOW>;
+ dsr-gpios = <&pioD 18 GPIO_ACTIVE_LOW>;
+ dcd-gpios = <&pioD 20 GPIO_ACTIVE_LOW>;
+ rng-gpios = <&pioD 19 GPIO_ACTIVE_LOW>;
+ };
+
+- use DMA:
+ usart0: serial@f001c000 {
+ compatible = "atmel,at91sam9260-usart";
+ reg = <0xf001c000 0x100>;
+ interrupts = <12 4 5>;
+ clocks = <&usart0_clk>;
+ clock-names = "usart";
+ atmel,use-dma-rx;
+ atmel,use-dma-tx;
+ dmas = <&dma0 2 0x3>,
+ <&dma0 2 0x204>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt b/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt
new file mode 100644
index 000000000..ebcbb62c0
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt
@@ -0,0 +1,19 @@
+ETRAX FS UART
+
+Required properties:
+- compatible : "axis,etraxfs-uart"
+- reg: offset and length of the register set for the device.
+- interrupts: device interrupt
+
+Optional properties:
+- {dtr,dsr,ri,cd}-gpios: specify a GPIO for DTR/DSR/RI/CD
+ line respectively.
+
+Example:
+
+serial@b00260000 {
+ compatible = "axis,etraxfs-uart";
+ reg = <0xb0026000 0x1000>;
+ interrupts = <68>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt b/Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt
new file mode 100644
index 000000000..5c52e5eef
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/brcm,bcm6345-uart.txt
@@ -0,0 +1,30 @@
+* BCM63xx UART
+
+Required properties:
+
+- compatible: "brcm,bcm6345-uart"
+
+- reg: The base address of the UART register bank.
+
+- interrupts: A single interrupt specifier.
+
+- clocks: Clock driving the hardware; used to figure out the baud rate
+ divisor.
+
+Example:
+
+ uart0: serial@14e00520 {
+ compatible = "brcm,bcm6345-uart";
+ reg = <0x14e00520 0x18>;
+ interrupt-parent = <&periph_intc>;
+ interrupts = <2>;
+ clocks = <&periph_clk>;
+ };
+
+ clocks {
+ periph_clk: periph_clk@0 {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <54000000>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/serial/cavium-uart.txt b/Documentation/devicetree/bindings/serial/cavium-uart.txt
new file mode 100644
index 000000000..87a6c375c
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/cavium-uart.txt
@@ -0,0 +1,19 @@
+* Universal Asynchronous Receiver/Transmitter (UART)
+
+- compatible: "cavium,octeon-3860-uart"
+
+ Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+
+- reg: The base address of the UART register bank.
+
+- interrupts: A single interrupt specifier.
+
+- current-speed: Optional, the current bit rate in bits per second.
+
+Example:
+ uart1: serial@1180000000c00 {
+ compatible = "cavium,octeon-3860-uart","ns16550";
+ reg = <0x11800 0x00000c00 0x0 0x400>;
+ current-speed = <115200>;
+ interrupts = <0 35>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/cdns,uart.txt b/Documentation/devicetree/bindings/serial/cdns,uart.txt
new file mode 100644
index 000000000..a3eb154c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/cdns,uart.txt
@@ -0,0 +1,20 @@
+Binding for Cadence UART Controller
+
+Required properties:
+- compatible : should be "cdns,uart-r1p8", or "xlnx,xuartps"
+- reg: Should contain UART controller registers location and length.
+- interrupts: Should contain UART controller interrupts.
+- clocks: Must contain phandles to the UART clocks
+ See ../clocks/clock-bindings.txt for details.
+- clock-names: Tuple to identify input clocks, must contain "uart_clk" and "pclk"
+ See ../clocks/clock-bindings.txt for details.
+
+
+Example:
+ uart@e0000000 {
+ compatible = "cdns,uart-r1p8";
+ clocks = <&clkc 23>, <&clkc 40>;
+ clock-names = "uart_clk", "pclk";
+ reg = <0xE0000000 0x1000>;
+ interrupts = <0 27 4>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt b/Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt
new file mode 100644
index 000000000..caaeb2583
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/cirrus,clps711x-uart.txt
@@ -0,0 +1,31 @@
+* Cirrus Logic CLPS711X Universal Asynchronous Receiver/Transmitter (UART)
+
+Required properties:
+- compatible: Should be "cirrus,clps711x-uart".
+- reg: Address and length of the register set for the device.
+- interrupts: Should contain UART TX and RX interrupt.
+- clocks: Should contain UART core clock number.
+- syscon: Phandle to SYSCON node, which contain UART control bits.
+
+Optional properties:
+- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
+ line respectively.
+
+Note: Each UART port should have an alias correctly numbered
+in "aliases" node.
+
+Example:
+ aliases {
+ serial0 = &uart1;
+ };
+
+ uart1: uart@80000480 {
+ compatible = "cirrus,clps711x-uart";
+ reg = <0x80000480 0x80>;
+ interrupts = <12 13>;
+ clocks = <&clks 11>;
+ syscon = <&syscon1>;
+ cts-gpios = <&sysgpio 0 GPIO_ACTIVE_LOW>;
+ dsr-gpios = <&sysgpio 1 GPIO_ACTIVE_LOW>;
+ dcd-gpios = <&sysgpio 2 GPIO_ACTIVE_LOW>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/digicolor-usart.txt b/Documentation/devicetree/bindings/serial/digicolor-usart.txt
new file mode 100644
index 000000000..2d3ede668
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/digicolor-usart.txt
@@ -0,0 +1,27 @@
+Binding for Conexant Digicolor USART
+
+Note: this binding is only applicable for using the USART peripheral as
+UART. USART also support synchronous serial protocols like SPI and I2S. Use
+the binding that matches the wiring of your system.
+
+Required properties:
+- compatible : should be "cnxt,cx92755-usart".
+- reg: Should contain USART controller registers location and length.
+- interrupts: Should contain a single USART controller interrupt.
+- clocks: Must contain phandles to the USART clock
+ See ../clocks/clock-bindings.txt for details.
+
+Note: Each UART port should have an alias correctly numbered
+in "aliases" node.
+
+Example:
+ aliases {
+ serial0 = &uart0;
+ };
+
+ uart0: uart@f0000740 {
+ compatible = "cnxt,cx92755-usart";
+ reg = <0xf0000740 0x20>;
+ clocks = <&main_clk>;
+ interrupts = <44>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/efm32-uart.txt b/Documentation/devicetree/bindings/serial/efm32-uart.txt
new file mode 100644
index 000000000..8adbab268
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/efm32-uart.txt
@@ -0,0 +1,20 @@
+* Energymicro efm32 UART
+
+Required properties:
+- compatible : Should be "energymicro,efm32-uart"
+- reg : Address and length of the register set
+- interrupts : Should contain uart interrupt
+
+Optional properties:
+- energymicro,location : Decides the location of the USART I/O pins.
+ Allowed range : [0 .. 5]
+ Default: 0
+
+Example:
+
+uart@0x4000c400 {
+ compatible = "energymicro,efm32-uart";
+ reg = <0x4000c400 0x400>;
+ interrupts = <15>;
+ energymicro,location = <0>;
+};
diff --git a/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
new file mode 100644
index 000000000..35ae1fb35
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/fsl-imx-uart.txt
@@ -0,0 +1,29 @@
+* Freescale i.MX Universal Asynchronous Receiver/Transmitter (UART)
+
+Required properties:
+- compatible : Should be "fsl,<soc>-uart"
+- reg : Address and length of the register set for the device
+- interrupts : Should contain uart interrupt
+
+Optional properties:
+- fsl,uart-has-rtscts : Indicate the uart has rts and cts
+- fsl,irda-mode : Indicate the uart supports irda mode
+- fsl,dte-mode : Indicate the uart works in DTE mode. The uart works
+ is DCE mode by default.
+
+Note: Each uart controller should have an alias correctly numbered
+in "aliases" node.
+
+Example:
+
+aliases {
+ serial0 = &uart1;
+};
+
+uart1: serial@73fbc000 {
+ compatible = "fsl,imx51-uart", "fsl,imx21-uart";
+ reg = <0x73fbc000 0x4000>;
+ interrupts = <31>;
+ fsl,uart-has-rtscts;
+ fsl,dte-mode;
+};
diff --git a/Documentation/devicetree/bindings/serial/fsl-lpuart.txt b/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
new file mode 100644
index 000000000..c95005efb
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/fsl-lpuart.txt
@@ -0,0 +1,31 @@
+* Freescale low power universal asynchronous receiver/transmitter (lpuart)
+
+Required properties:
+- compatible :
+ - "fsl,vf610-lpuart" for lpuart compatible with the one integrated
+ on Vybrid vf610 SoC with 8-bit register organization
+ - "fsl,ls1021a-lpuart" for lpuart compatible with the one integrated
+ on LS1021A SoC with 32-bit big-endian register organization
+- reg : Address and length of the register set for the device
+- interrupts : Should contain uart interrupt
+- clocks : phandle + clock specifier pairs, one for each entry in clock-names
+- clock-names : should contain: "ipg" - the uart clock
+
+Optional properties:
+- dmas: A list of two dma specifiers, one for each entry in dma-names.
+- dma-names: should contain "tx" and "rx".
+
+Note: Optional properties for DMA support. Write them both or both not.
+
+Example:
+
+uart0: serial@40027000 {
+ compatible = "fsl,vf610-lpuart";
+ reg = <0x40027000 0x1000>;
+ interrupts = <0 61 0x00>;
+ clocks = <&clks VF610_CLK_UART0>;
+ clock-names = "ipg";
+ dmas = <&edma0 0 2>,
+ <&edma0 0 3>;
+ dma-names = "rx","tx";
+ };
diff --git a/Documentation/devicetree/bindings/serial/fsl-mxs-auart.txt b/Documentation/devicetree/bindings/serial/fsl-mxs-auart.txt
new file mode 100644
index 000000000..7c408c87e
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/fsl-mxs-auart.txt
@@ -0,0 +1,45 @@
+* Freescale MXS Application UART (AUART)
+
+Required properties:
+- compatible : Should be "fsl,<soc>-auart". The supported SoCs include
+ imx23 and imx28.
+- reg : Address and length of the register set for the device
+- interrupts : Should contain the auart interrupt numbers
+- dmas: DMA specifier, consisting of a phandle to DMA controller node
+ and AUART DMA channel ID.
+ Refer to dma.txt and fsl-mxs-dma.txt for details.
+- dma-names: "rx" for RX channel, "tx" for TX channel.
+
+Optional properties:
+- fsl,uart-has-rtscts : Indicate the UART has RTS and CTS lines
+ for hardware flow control,
+ it also means you enable the DMA support for this UART.
+- {rts,cts,dtr,dsr,rng,dcd}-gpios: specify a GPIO for RTS/CTS/DTR/DSR/RI/DCD
+ line respectively. It will use specified PIO instead of the peripheral
+ function pin for the USART feature.
+ If unsure, don't specify this property.
+
+Example:
+auart0: serial@8006a000 {
+ compatible = "fsl,imx28-auart", "fsl,imx23-auart";
+ reg = <0x8006a000 0x2000>;
+ interrupts = <112>;
+ dmas = <&dma_apbx 8>, <&dma_apbx 9>;
+ dma-names = "rx", "tx";
+ cts-gpios = <&gpio1 15 GPIO_ACTIVE_LOW>;
+ dsr-gpios = <&gpio1 16 GPIO_ACTIVE_LOW>;
+ dcd-gpios = <&gpio1 17 GPIO_ACTIVE_LOW>;
+};
+
+Note: Each auart port should have an alias correctly numbered in "aliases"
+node.
+
+Example:
+
+aliases {
+ serial0 = &auart0;
+ serial1 = &auart1;
+ serial2 = &auart2;
+ serial3 = &auart3;
+ serial4 = &auart4;
+};
diff --git a/Documentation/devicetree/bindings/serial/lantiq_asc.txt b/Documentation/devicetree/bindings/serial/lantiq_asc.txt
new file mode 100644
index 000000000..5b78591aa
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/lantiq_asc.txt
@@ -0,0 +1,16 @@
+Lantiq SoC ASC serial controller
+
+Required properties:
+- compatible : Should be "lantiq,asc"
+- reg : Address and length of the register set for the device
+- interrupts: the 3 (tx rx err) interrupt numbers. The interrupt specifier
+ depends on the interrupt-parent interrupt controller.
+
+Example:
+
+asc1: serial@E100C00 {
+ compatible = "lantiq,asc";
+ reg = <0xE100C00 0x400>;
+ interrupt-parent = <&icu0>;
+ interrupts = <112 113 114>;
+};
diff --git a/Documentation/devicetree/bindings/serial/maxim,max310x.txt b/Documentation/devicetree/bindings/serial/maxim,max310x.txt
new file mode 100644
index 000000000..83a919c24
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/maxim,max310x.txt
@@ -0,0 +1,36 @@
+* Maxim MAX310X advanced Universal Asynchronous Receiver-Transmitter (UART)
+
+Required properties:
+- compatible: Should be one of the following:
+ - "maxim,max3107" for Maxim MAX3107,
+ - "maxim,max3108" for Maxim MAX3108,
+ - "maxim,max3109" for Maxim MAX3109,
+ - "maxim,max14830" for Maxim MAX14830.
+- reg: SPI chip select number.
+- interrupt-parent: The phandle for the interrupt controller that
+ services interrupts for this IC.
+- interrupts: Specifies the interrupt source of the parent interrupt
+ controller. The format of the interrupt specifier depends on the
+ parent interrupt controller.
+- clocks: phandle to the IC source clock.
+- clock-names: Should be "xtal" if clock is an external crystal or
+ "osc" if an external clock source is used.
+
+Optional properties:
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be two. The first cell is the GPIO number and
+ the second cell is used to specify the GPIO polarity:
+ 0 = active high,
+ 1 = active low.
+
+Example:
+ max14830: max14830@0 {
+ compatible = "maxim,max14830";
+ reg = <0>;
+ clocks = <&clk20m>;
+ clock-names = "osc";
+ interrupt-parent = <&gpio3>;
+ interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/mrvl,pxa-ssp.txt b/Documentation/devicetree/bindings/serial/mrvl,pxa-ssp.txt
new file mode 100644
index 000000000..669b8140d
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/mrvl,pxa-ssp.txt
@@ -0,0 +1,65 @@
+Device tree bindings for Marvell PXA SSP ports
+
+Required properties:
+
+ - compatible: Must be one of
+ mrvl,pxa25x-ssp
+ mvrl,pxa25x-nssp
+ mrvl,pxa27x-ssp
+ mrvl,pxa3xx-ssp
+ mvrl,pxa168-ssp
+ mrvl,pxa910-ssp
+ mrvl,ce4100-ssp
+ mrvl,lpss-ssp
+
+ - reg: The memory base
+ - dmas: Two dma phandles, one for rx, one for tx
+ - dma-names: Must be "rx", "tx"
+
+
+Example for PXA3xx:
+
+ ssp0: ssp@41000000 {
+ compatible = "mrvl,pxa3xx-ssp";
+ reg = <0x41000000 0x40>;
+ ssp-id = <1>;
+ interrupts = <24>;
+ clock-names = "pxa27x-ssp.0";
+ dmas = <&dma 13
+ &dma 14>;
+ dma-names = "rx", "tx";
+ };
+
+ ssp1: ssp@41700000 {
+ compatible = "mrvl,pxa3xx-ssp";
+ reg = <0x41700000 0x40>;
+ ssp-id = <2>;
+ interrupts = <16>;
+ clock-names = "pxa27x-ssp.1";
+ dmas = <&dma 15
+ &dma 16>;
+ dma-names = "rx", "tx";
+ };
+
+ ssp2: ssp@41900000 {
+ compatibl3 = "mrvl,pxa3xx-ssp";
+ reg = <0x41900000 0x40>;
+ ssp-id = <3>;
+ interrupts = <0>;
+ clock-names = "pxa27x-ssp.2";
+ dmas = <&dma 66
+ &dma 67>;
+ dma-names = "rx", "tx";
+ };
+
+ ssp3: ssp@41a00000 {
+ compatible = "mrvl,pxa3xx-ssp";
+ reg = <0x41a00000 0x40>;
+ ssp-id = <4>;
+ interrupts = <13>;
+ clock-names = "pxa27x-ssp.3";
+ dmas = <&dma 2
+ &dma 3>;
+ dma-names = "rx", "tx";
+ };
+
diff --git a/Documentation/devicetree/bindings/serial/mrvl-serial.txt b/Documentation/devicetree/bindings/serial/mrvl-serial.txt
new file mode 100644
index 000000000..d744340de
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/mrvl-serial.txt
@@ -0,0 +1,4 @@
+PXA UART controller
+
+Required properties:
+- compatible : should be "mrvl,mmp-uart" or "mrvl,pxa-uart".
diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt b/Documentation/devicetree/bindings/serial/mtk-uart.txt
new file mode 100644
index 000000000..44152261e
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt
@@ -0,0 +1,26 @@
+* Mediatek Universal Asynchronous Receiver/Transmitter (UART)
+
+Required properties:
+- compatible should contain:
+ * "mediatek,mt8135-uart" for MT8135 compatible UARTS
+ * "mediatek,mt8127-uart" for MT8127 compatible UARTS
+ * "mediatek,mt8173-uart" for MT8173 compatible UARTS
+ * "mediatek,mt6589-uart" for MT6589 compatible UARTS
+ * "mediatek,mt6582-uart" for MT6582 compatible UARTS
+ * "mediatek,mt6577-uart" for all compatible UARTS (MT8173, MT6589, MT6582,
+ MT6577)
+
+- reg: The base address of the UART register bank.
+
+- interrupts: A single interrupt specifier.
+
+- clocks: Clock driving the hardware.
+
+Example:
+
+ uart0: serial@11006000 {
+ compatible = "mediatek,mt6589-uart", "mediatek,mt6577-uart";
+ reg = <0x11006000 0x400>;
+ interrupts = <GIC_SPI 51 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&uart_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.txt b/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.txt
new file mode 100644
index 000000000..845850caf
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/nvidia,tegra20-hsuart.txt
@@ -0,0 +1,37 @@
+NVIDIA Tegra20/Tegra30 high speed (DMA based) UART controller driver.
+
+Required properties:
+- compatible : should be "nvidia,tegra30-hsuart", "nvidia,tegra20-hsuart".
+- reg: Should contain UART controller registers location and length.
+- interrupts: Should contain UART controller interrupts.
+- clocks: Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - serial
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+
+Optional properties:
+- nvidia,enable-modem-interrupt: Enable modem interrupts. Should be enable
+ only if all 8 lines of UART controller are pinmuxed.
+
+Example:
+
+serial@70006000 {
+ compatible = "nvidia,tegra30-hsuart", "nvidia,tegra20-hsuart";
+ reg = <0x70006000 0x40>;
+ reg-shift = <2>;
+ interrupts = <0 36 0x04>;
+ nvidia,enable-modem-interrupt;
+ clocks = <&tegra_car 6>;
+ resets = <&tegra_car 6>;
+ reset-names = "serial";
+ dmas = <&apbdma 8>, <&apbdma 8>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt b/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt
new file mode 100644
index 000000000..246c79566
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/nxp,sc16is7xx.txt
@@ -0,0 +1,33 @@
+* NXP SC16IS7xx advanced Universal Asynchronous Receiver-Transmitter (UART)
+
+Required properties:
+- compatible: Should be one of the following:
+ - "nxp,sc16is740" for NXP SC16IS740,
+ - "nxp,sc16is741" for NXP SC16IS741,
+ - "nxp,sc16is750" for NXP SC16IS750,
+ - "nxp,sc16is752" for NXP SC16IS752,
+ - "nxp,sc16is760" for NXP SC16IS760,
+ - "nxp,sc16is762" for NXP SC16IS762.
+- reg: I2C address of the SC16IS7xx device.
+- interrupt-parent: The phandle for the interrupt controller that
+ services interrupts for this IC.
+- interrupts: Should contain the UART interrupt
+- clocks: Reference to the IC source clock.
+
+Optional properties:
+- gpio-controller: Marks the device node as a GPIO controller.
+- #gpio-cells: Should be two. The first cell is the GPIO number and
+ the second cell is used to specify the GPIO polarity:
+ 0 = active high,
+ 1 = active low.
+
+Example:
+ sc16is750: sc16is750@51 {
+ compatible = "nxp,sc16is750";
+ reg = <0x51>;
+ clocks = <&clk20m>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <7 IRQ_TYPE_EDGE_FALLING>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt b/Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt
new file mode 100644
index 000000000..0d439dfc1
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/nxp-lpc32xx-hsuart.txt
@@ -0,0 +1,14 @@
+* NXP LPC32xx SoC High Speed UART
+
+Required properties:
+- compatible: Should be "nxp,lpc3220-hsuart"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt
+
+Example:
+
+ uart1: serial@40014000 {
+ compatible = "nxp,lpc3220-hsuart";
+ reg = <0x40014000 0x1000>;
+ interrupts = <26 0>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/omap_serial.txt b/Documentation/devicetree/bindings/serial/omap_serial.txt
new file mode 100644
index 000000000..54c2a155c
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/omap_serial.txt
@@ -0,0 +1,30 @@
+OMAP UART controller
+
+Required properties:
+- compatible : should be "ti,omap2-uart" for OMAP2 controllers
+- compatible : should be "ti,omap3-uart" for OMAP3 controllers
+- compatible : should be "ti,omap4-uart" for OMAP4 controllers
+- reg : address and length of the register space
+- interrupts or interrupts-extended : Should contain the uart interrupt
+ specifier or both the interrupt
+ controller phandle and interrupt
+ specifier.
+- ti,hwmods : Must be "uart<n>", n being the instance number (1-based)
+
+Optional properties:
+- clock-frequency : frequency of the clock input to the UART
+- dmas : DMA specifier, consisting of a phandle to the DMA controller
+ node and a DMA channel number.
+- dma-names : "rx" for receive channel, "tx" for transmit channel.
+
+Example:
+
+ uart4: serial@49042000 {
+ compatible = "ti,omap3-uart";
+ reg = <0x49042000 0x400>;
+ interrupts = <80>;
+ dmas = <&sdma 81 &sdma 82>;
+ dma-names = "tx", "rx";
+ ti,hwmods = "uart4";
+ clock-frequency = <48000000>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/pl011.txt b/Documentation/devicetree/bindings/serial/pl011.txt
new file mode 100644
index 000000000..ba3ecb8cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/pl011.txt
@@ -0,0 +1,51 @@
+* ARM AMBA Primecell PL011 serial UART
+
+Required properties:
+- compatible: must be "arm,primecell", "arm,pl011"
+- reg: exactly one register range with length 0x1000
+- interrupts: exactly one interrupt specifier
+
+Optional properties:
+- pinctrl:
+ When present, must have one state named "default",
+ and may contain a second name named "sleep". The former
+ state sets up pins for ordinary operation whereas
+ the latter state will put the associated pins to sleep
+ when the UART is unused
+- clocks:
+ When present, the first clock listed must correspond to
+ the clock named UARTCLK on the IP block, i.e. the clock
+ to the external serial line, whereas the second clock
+ must correspond to the PCLK clocking the internal logic
+ of the block. Just listing one clock (the first one) is
+ deprecated.
+- clocks-names:
+ When present, the first clock listed must be named
+ "uartclk" and the second clock listed must be named
+ "apb_pclk"
+- dmas:
+ When present, may have one or two dma channels.
+ The first one must be named "rx", the second one
+ must be named "tx".
+- auto-poll:
+ Enables polling when using RX DMA.
+- poll-rate-ms:
+ Rate at which poll occurs when auto-poll is set,
+ default 100ms.
+- poll-timeout-ms:
+ Poll timeout when auto-poll is set, default
+ 3000ms.
+
+See also bindings/arm/primecell.txt
+
+Example:
+
+uart@80120000 {
+ compatible = "arm,pl011", "arm,primecell";
+ reg = <0x80120000 0x1000>;
+ interrupts = <0 11 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&dma 13 0 0x2>, <&dma 13 0 0x0>;
+ dma-names = "rx", "tx";
+ clocks = <&foo_clk>, <&bar_clk>;
+ clock-names = "uartclk", "apb_pclk";
+};
diff --git a/Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt b/Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt
new file mode 100644
index 000000000..c5e032c85
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/qca,ar9330-uart.txt
@@ -0,0 +1,34 @@
+* Qualcomm Atheros AR9330 High-Speed UART
+
+Required properties:
+
+- compatible: Must be "qca,ar9330-uart"
+
+- reg: Specifies the physical base address of the controller and
+ the length of the memory mapped region.
+
+- interrupt-parent: The phandle for the interrupt controller that
+ services interrupts for this device.
+
+- interrupts: Specifies the interrupt source of the parent interrupt
+ controller. The format of the interrupt specifier depends on the
+ parent interrupt controller.
+
+Additional requirements:
+
+ Each UART port must have an alias correctly numbered in "aliases"
+ node.
+
+Example:
+
+ aliases {
+ serial0 = &uart0;
+ };
+
+ uart0: uart@18020000 {
+ compatible = "qca,ar9330-uart";
+ reg = <0x18020000 0x14>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/qcom,msm-uart.txt b/Documentation/devicetree/bindings/serial/qcom,msm-uart.txt
new file mode 100644
index 000000000..ce8c90161
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/qcom,msm-uart.txt
@@ -0,0 +1,25 @@
+* MSM Serial UART
+
+The MSM serial UART hardware is designed for low-speed use cases where a
+dma-engine isn't needed. From a software perspective it's mostly compatible
+with the MSM serial UARTDM except that it only supports reading and writing one
+character at a time.
+
+Required properties:
+- compatible: Should contain "qcom,msm-uart"
+- reg: Should contain UART register location and length.
+- interrupts: Should contain UART interrupt.
+- clocks: Should contain the core clock.
+- clock-names: Should be "core".
+
+Example:
+
+A uart device at 0xa9c00000 with interrupt 11.
+
+serial@a9c00000 {
+ compatible = "qcom,msm-uart";
+ reg = <0xa9c00000 0x1000>;
+ interrupts = <11>;
+ clocks = <&uart_cxc>;
+ clock-names = "core";
+};
diff --git a/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.txt b/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.txt
new file mode 100644
index 000000000..a2114c217
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/qcom,msm-uartdm.txt
@@ -0,0 +1,78 @@
+* MSM Serial UARTDM
+
+The MSM serial UARTDM hardware is designed for high-speed use cases where the
+transmit and/or receive channels can be offloaded to a dma-engine. From a
+software perspective it's mostly compatible with the MSM serial UART except
+that it supports reading and writing multiple characters at a time.
+
+Required properties:
+- compatible: Should contain at least "qcom,msm-uartdm".
+ A more specific property should be specified as follows depending
+ on the version:
+ "qcom,msm-uartdm-v1.1"
+ "qcom,msm-uartdm-v1.2"
+ "qcom,msm-uartdm-v1.3"
+ "qcom,msm-uartdm-v1.4"
+- reg: Should contain UART register locations and lengths. The first
+ register shall specify the main control registers. An optional second
+ register location shall specify the GSBI control region.
+ "qcom,msm-uartdm-v1.3" is the only compatible value that might
+ need the GSBI control region.
+- interrupts: Should contain UART interrupt.
+- clocks: Should contain the core clock and the AHB clock.
+- clock-names: Should be "core" for the core clock and "iface" for the
+ AHB clock.
+
+Optional properties:
+- dmas: Should contain dma specifiers for transmit and receive channels
+- dma-names: Should contain "tx" for transmit and "rx" for receive channels
+
+Note: Aliases may be defined to ensure the correct ordering of the UARTs.
+The alias serialN will result in the UART being assigned port N. If any
+serialN alias exists, then an alias must exist for each enabled UART. The
+serialN aliases should be in a .dts file instead of in a .dtsi file.
+
+Examples:
+
+- A uartdm v1.4 device with dma capabilities.
+
+ serial@f991e000 {
+ compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm";
+ reg = <0xf991e000 0x1000>;
+ interrupts = <0 108 0x0>;
+ clocks = <&blsp1_uart2_apps_cxc>, <&blsp1_ahb_cxc>;
+ clock-names = "core", "iface";
+ dmas = <&dma0 0>, <&dma0 1>;
+ dma-names = "tx", "rx";
+ };
+
+- A uartdm v1.3 device without dma capabilities and part of a GSBI complex.
+
+ serial@19c40000 {
+ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
+ reg = <0x19c40000 0x1000>,
+ <0x19c00000 0x1000>;
+ interrupts = <0 195 0x0>;
+ clocks = <&gsbi5_uart_cxc>, <&gsbi5_ahb_cxc>;
+ clock-names = "core", "iface";
+ };
+
+- serialN alias.
+
+ aliases {
+ serial0 = &uarta;
+ serial1 = &uartc;
+ serial2 = &uartb;
+ };
+
+ uarta: serial@12490000 {
+ status = "ok";
+ };
+
+ uartb: serial@16340000 {
+ status = "ok";
+ };
+
+ uartc: serial@1a240000 {
+ status = "ok";
+ };
diff --git a/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
new file mode 100644
index 000000000..ae73bb0e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/renesas,sci-serial.txt
@@ -0,0 +1,59 @@
+* Renesas SH-Mobile Serial Communication Interface
+
+Required properties:
+
+ - compatible: Must contain one of the following:
+
+ - "renesas,scif-r7s72100" for R7S72100 (RZ/A1H) SCIF compatible UART.
+ - "renesas,scifa-r8a73a4" for R8A73A4 (R-Mobile APE6) SCIFA compatible UART.
+ - "renesas,scifb-r8a73a4" for R8A73A4 (R-Mobile APE6) SCIFB compatible UART.
+ - "renesas,scifa-r8a7740" for R8A7740 (R-Mobile A1) SCIFA compatible UART.
+ - "renesas,scifb-r8a7740" for R8A7740 (R-Mobile A1) SCIFB compatible UART.
+ - "renesas,scif-r8a7778" for R8A7778 (R-Car M1) SCIF compatible UART.
+ - "renesas,scif-r8a7779" for R8A7779 (R-Car H1) SCIF compatible UART.
+ - "renesas,scif-r8a7790" for R8A7790 (R-Car H2) SCIF compatible UART.
+ - "renesas,scifa-r8a7790" for R8A7790 (R-Car H2) SCIFA compatible UART.
+ - "renesas,scifb-r8a7790" for R8A7790 (R-Car H2) SCIFB compatible UART.
+ - "renesas,hscif-r8a7790" for R8A7790 (R-Car H2) HSCIF compatible UART.
+ - "renesas,scif-r8a7791" for R8A7791 (R-Car M2) SCIF compatible UART.
+ - "renesas,scifa-r8a7791" for R8A7791 (R-Car M2) SCIFA compatible UART.
+ - "renesas,scifb-r8a7791" for R8A7791 (R-Car M2) SCIFB compatible UART.
+ - "renesas,hscif-r8a7791" for R8A7791 (R-Car M2) HSCIF compatible UART.
+ - "renesas,scif-r8a7794" for R8A7794 (R-Car E2) SCIF compatible UART.
+ - "renesas,scifa-r8a7794" for R8A7794 (R-Car E2) SCIFA compatible UART.
+ - "renesas,scifb-r8a7794" for R8A7794 (R-Car E2) SCIFB compatible UART.
+ - "renesas,hscif-r8a7794" for R8A7794 (R-Car E2) HSCIF compatible UART.
+ - "renesas,scifa-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFA compatible UART.
+ - "renesas,scifb-sh73a0" for SH73A0 (SH-Mobile AG5) SCIFB compatible UART.
+ - "renesas,scif" for generic SCIF compatible UART.
+ - "renesas,scifa" for generic SCIFA compatible UART.
+ - "renesas,scifb" for generic SCIFB compatible UART.
+ - "renesas,hscif" for generic HSCIF compatible UART.
+
+ When compatible with the generic version, nodes must list the
+ SoC-specific version corresponding to the platform first followed by the
+ generic version.
+
+ - reg: Base address and length of the I/O registers used by the UART.
+ - interrupts: Must contain an interrupt-specifier for the SCIx interrupt.
+
+ - clocks: Must contain a phandle and clock-specifier pair for each entry
+ in clock-names.
+ - clock-names: Must contain "sci_ick" for the SCIx UART interface clock.
+
+Note: Each enabled SCIx UART should have an alias correctly numbered in the
+"aliases" node.
+
+Example:
+ aliases {
+ serial0 = &scifa0;
+ };
+
+ scifa0: serial@e6c40000 {
+ compatible = "renesas,scifa-r8a7790", "renesas,scifa";
+ reg = <0 0xe6c40000 0 64>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 144 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp2_clks R8A7790_CLK_SCIFA0>;
+ clock-names = "sci_ick";
+ };
diff --git a/Documentation/devicetree/bindings/serial/rs485.txt b/Documentation/devicetree/bindings/serial/rs485.txt
new file mode 100644
index 000000000..32b1fa1f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/rs485.txt
@@ -0,0 +1,31 @@
+* RS485 serial communications
+
+The RTS signal is capable of automatically controlling line direction for
+the built-in half-duplex mode.
+The properties described hereafter shall be given to a half-duplex capable
+UART node.
+
+Required properties:
+- rs485-rts-delay: prop-encoded-array <a b> where:
+ * a is the delay between rts signal and beginning of data sent in milliseconds.
+ it corresponds to the delay before sending data.
+ * b is the delay between end of data sent and rts signal in milliseconds
+ it corresponds to the delay after sending data and actual release of the line.
+
+Optional properties:
+- linux,rs485-enabled-at-boot-time: empty property telling to enable the rs485
+ feature at boot time. It can be disabled later with proper ioctl.
+- rs485-rx-during-tx: empty property that enables the receiving of data even
+ whilst sending data.
+
+RS485 example for Atmel USART:
+ usart0: serial@fff8c000 {
+ compatible = "atmel,at91sam9260-usart";
+ reg = <0xfff8c000 0x4000>;
+ interrupts = <7>;
+ atmel,use-dma-rx;
+ atmel,use-dma-tx;
+ linux,rs485-enabled-at-boot-time;
+ rs485-rts-delay = <0 200>; // in milliseconds
+ };
+
diff --git a/Documentation/devicetree/bindings/serial/samsung_uart.txt b/Documentation/devicetree/bindings/serial/samsung_uart.txt
new file mode 100644
index 000000000..e85f37ec3
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/samsung_uart.txt
@@ -0,0 +1,58 @@
+* Samsung's UART Controller
+
+The Samsung's UART controller is used for interfacing SoC with serial
+communicaion devices.
+
+Required properties:
+- compatible: should be one of following:
+ - "samsung,exynos4210-uart" - Exynos4210 SoC,
+ - "samsung,s3c2410-uart" - compatible with ports present on S3C2410 SoC,
+ - "samsung,s3c2412-uart" - compatible with ports present on S3C2412 SoC,
+ - "samsung,s3c2440-uart" - compatible with ports present on S3C2440 SoC,
+ - "samsung,s3c6400-uart" - compatible with ports present on S3C6400 SoC,
+ - "samsung,s5pv210-uart" - compatible with ports present on S5PV210 SoC.
+
+- reg: base physical address of the controller and length of memory mapped
+ region.
+
+- interrupts: a single interrupt signal to SoC interrupt controller,
+ according to interrupt bindings documentation [1].
+
+- clock-names: input names of clocks used by the controller:
+ - "uart" - controller bus clock,
+ - "clk_uart_baudN" - Nth baud base clock input (N = 0, 1, ...),
+ according to SoC User's Manual (only N = 0 is allowedfor SoCs without
+ internal baud clock mux).
+- clocks: phandles and specifiers for all clocks specified in "clock-names"
+ property, in the same order, according to clock bindings documentation [2].
+
+[1] Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+[2] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Optional properties:
+- samsung,uart-fifosize: The fifo size supported by the UART channel
+
+Note: Each Samsung UART should have an alias correctly numbered in the
+"aliases" node, according to serialN format, where N is the port number
+(non-negative decimal integer) as specified by User's Manual of respective
+SoC.
+
+Example:
+ aliases {
+ serial0 = &uart0;
+ serial1 = &uart1;
+ serial2 = &uart2;
+ };
+
+Example:
+ uart1: serial@7f005400 {
+ compatible = "samsung,s3c6400-uart";
+ reg = <0x7f005400 0x100>;
+ interrupt-parent = <&vic1>;
+ interrupts = <6>;
+ clock-names = "uart", "clk_uart_baud2",
+ "clk_uart_baud3";
+ clocks = <&clocks PCLK_UART1>, <&clocks PCLK_UART1>,
+ <&clocks SCLK_UART>;
+ samsung,uart-fifosize = <16>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/sirf-uart.txt b/Documentation/devicetree/bindings/serial/sirf-uart.txt
new file mode 100644
index 000000000..f0c39261c
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/sirf-uart.txt
@@ -0,0 +1,47 @@
+* CSR SiRFprimaII/atlasVI Universal Synchronous Asynchronous Receiver/Transmitter *
+
+Required properties:
+- compatible : Should be "sirf,prima2-uart", "sirf, prima2-usp-uart",
+ "sirf,atlas7-uart" or "sirf,atlas7-bt-uart" which means
+ uart located in BT module and used for BT.
+- reg : Offset and length of the register set for the device
+- interrupts : Should contain uart interrupt
+- fifosize : Should define hardware rx/tx fifo size
+- clocks : Should contain uart clock number
+
+Optional properties:
+- sirf,uart-has-rtscts: we have hardware flow controller pins in hardware
+- rts-gpios: RTS pin for USP-based UART if sirf,uart-has-rtscts is true
+- cts-gpios: CTS pin for USP-based UART if sirf,uart-has-rtscts is true
+
+Example:
+
+uart0: uart@b0050000 {
+ cell-index = <0>;
+ compatible = "sirf,prima2-uart";
+ reg = <0xb0050000 0x1000>;
+ interrupts = <17>;
+ fifosize = <128>;
+ clocks = <&clks 13>;
+};
+
+On the board-specific dts, we can put rts-gpios and cts-gpios like
+
+usp@b0090000 {
+ compatible = "sirf,prima2-usp-uart";
+ sirf,uart-has-rtscts;
+ rts-gpios = <&gpio 15 0>;
+ cts-gpios = <&gpio 46 0>;
+};
+
+for uart use in BT module,
+uart6: uart@11000000 {
+ cell-index = <6>;
+ compatible = "sirf,atlas7-bt-uart", "sirf,atlas7-uart";
+ reg = <0x11000000 0x1000>;
+ interrupts = <0 100 0>;
+ clocks = <&clks 138>, <&clks 140>, <&clks 141>;
+ clock-names = "uart", "general", "noc";
+ fifosize = <128>;
+ status = "disabled";
+}
diff --git a/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt
new file mode 100644
index 000000000..289c40ed7
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt
@@ -0,0 +1,73 @@
+* Synopsys DesignWare ABP UART
+
+Required properties:
+- compatible : "snps,dw-apb-uart"
+- reg : offset and length of the register set for the device.
+- interrupts : should contain uart interrupt.
+
+Clock handling:
+The clock rate of the input clock needs to be supplied by one of
+- clock-frequency : the input clock frequency for the UART.
+- clocks : phandle to the input clock
+
+The supplying peripheral clock can also be handled, needing a second property
+- clock-names: tuple listing input clock names.
+ Required elements: "baudclk", "apb_pclk"
+
+Optional properties:
+- resets : phandle to the parent reset controller.
+- reg-shift : quantity to shift the register offsets by. If this property is
+ not present then the register offsets are not shifted.
+- reg-io-width : the size (in bytes) of the IO accesses that should be
+ performed on the device. If this property is not present then single byte
+ accesses are used.
+- dcd-override : Override the DCD modem status signal. This signal will always
+ be reported as active instead of being obtained from the modem status
+ register. Define this if your serial port does not use this pin.
+- dsr-override : Override the DTS modem status signal. This signal will always
+ be reported as active instead of being obtained from the modem status
+ register. Define this if your serial port does not use this pin.
+- cts-override : Override the CTS modem status signal. This signal will always
+ be reported as active instead of being obtained from the modem status
+ register. Define this if your serial port does not use this pin.
+- ri-override : Override the RI modem status signal. This signal will always be
+ reported as inactive instead of being obtained from the modem status register.
+ Define this if your serial port does not use this pin.
+
+Example:
+
+ uart@80230000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x80230000 0x100>;
+ clock-frequency = <3686400>;
+ interrupts = <10>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+Example with one clock:
+
+ uart@80230000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x80230000 0x100>;
+ clocks = <&baudclk>;
+ interrupts = <10>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
+
+Example with two clocks:
+
+ uart@80230000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x80230000 0x100>;
+ clocks = <&baudclk>, <&apb_pclk>;
+ clock-names = "baudclk", "apb_pclk";
+ interrupts = <10>;
+ reg-shift = <2>;
+ reg-io-width = <4>;
+ };
diff --git a/Documentation/devicetree/bindings/serial/sprd-uart.txt b/Documentation/devicetree/bindings/serial/sprd-uart.txt
new file mode 100644
index 000000000..2aff0f22c
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/sprd-uart.txt
@@ -0,0 +1,7 @@
+* Spreadtrum serial UART
+
+Required properties:
+- compatible: must be "sprd,sc9836-uart"
+- reg: offset and length of the register set for the device
+- interrupts: exactly one interrupt specifier
+- clocks: phandles to input clocks.
diff --git a/Documentation/devicetree/bindings/serial/st-asc.txt b/Documentation/devicetree/bindings/serial/st-asc.txt
new file mode 100644
index 000000000..75d877f59
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/st-asc.txt
@@ -0,0 +1,18 @@
+*st-asc(Serial Port)
+
+Required properties:
+- compatible : Should be "st,asc".
+- reg, reg-names, interrupts, interrupt-names : Standard way to define device
+ resources with names. look in
+ Documentation/devicetree/bindings/resource-names.txt
+
+Optional properties:
+- st,hw-flow-ctrl bool flag to enable hardware flow control.
+- st,force-m1 bool flat to force asc to be in Mode-1 recommeded
+ for high bit rates (above 19.2K)
+Example:
+serial@fe440000{
+ compatible = "st,asc";
+ reg = <0xfe440000 0x2c>;
+ interrupts = <0 209 0>;
+};
diff --git a/Documentation/devicetree/bindings/serial/vt8500-uart.txt b/Documentation/devicetree/bindings/serial/vt8500-uart.txt
new file mode 100644
index 000000000..2b64e6107
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/vt8500-uart.txt
@@ -0,0 +1,27 @@
+* VIA VT8500 and WonderMedia WM8xxx UART Controller
+
+Required properties:
+- compatible: should be "via,vt8500-uart" (for VIA/WonderMedia chips up to and
+ including WM8850/WM8950), or "wm,wm8880-uart" (for WM8880 and later)
+
+- reg: base physical address of the controller and length of memory mapped
+ region.
+
+- interrupts: hardware interrupt number
+
+- clocks: shall be the input parent clock phandle for the clock. This should
+ be the 24Mhz reference clock.
+
+Aliases may be defined to ensure the correct ordering of the uarts.
+
+Example:
+ aliases {
+ serial0 = &uart0;
+ };
+
+ uart0: serial@d8200000 {
+ compatible = "via,vt8500-uart";
+ reg = <0xd8200000 0x1040>;
+ interrupts = <32>;
+ clocks = <&clkuart0>;
+ };
diff --git a/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
new file mode 100644
index 000000000..362a76925
--- /dev/null
+++ b/Documentation/devicetree/bindings/serio/allwinner,sun4i-ps2.txt
@@ -0,0 +1,23 @@
+* Device tree bindings for Allwinner A10, A20 PS2 host controller
+
+A20 PS2 is dual role controller (PS2 host and PS2 device). These bindings are
+for PS2 A10/A20 host controller. IBM compliant IBM PS2 and AT-compatible keyboard
+and mouse can be connected.
+
+Required properties:
+
+ - reg : Offset and length of the register set for the device.
+ - compatible : Should be as of the following:
+ - "allwinner,sun4i-a10-ps2"
+ - interrupts : The interrupt line connected to the PS2.
+ - clocks : The gate clk connected to the PS2.
+
+
+Example:
+ ps20: ps2@0x01c2a000 {
+ compatible = "allwinner,sun4i-a10-ps2";
+ reg = <0x01c2a000 0x400>;
+ interrupts = <0 62 4>;
+ clocks = <&apb1_gates 6>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/serio/altera_ps2.txt b/Documentation/devicetree/bindings/serio/altera_ps2.txt
new file mode 100644
index 000000000..520199e2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/serio/altera_ps2.txt
@@ -0,0 +1,5 @@
+Altera UP PS/2 controller
+
+Required properties:
+- compatible : should be "ALTR,ps2-1.0". <DEPRECATED>
+- compatible : should be "altr,ps2-1.0".
diff --git a/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt b/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt
new file mode 100644
index 000000000..0e72183f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/serio/olpc,ap-sp.txt
@@ -0,0 +1,13 @@
+OLPC AP-SP serio interface
+
+Required properties:
+- compatible : "olpc,ap-sp"
+- reg : base address and length of SoC's WTM registers
+- interrupts : SP-AP interrupt
+
+Example:
+ ap-sp@d4290000 {
+ compatible = "olpc,ap-sp";
+ reg = <0xd4290000 0x1000>;
+ interrupts = <40>;
+ }
diff --git a/Documentation/devicetree/bindings/serio/snps-arc_ps2.txt b/Documentation/devicetree/bindings/serio/snps-arc_ps2.txt
new file mode 100644
index 000000000..38c2f21e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/serio/snps-arc_ps2.txt
@@ -0,0 +1,16 @@
+* ARC PS/2 driver: PS/2 block used in some ARC FPGA's & nSIM OSCI model
+
+Required properties:
+- compatible : "snps,arc_ps2"
+- reg : offset and length (always 0x14) of registers
+- interrupts : interrupt
+- interrupt-names : name of interrupt, must be "arc_ps2_irq"
+
+Example:
+
+serio@c9000400 {
+ compatible = "snps,arc_ps2";
+ reg = <0xc9000400 0x14>;
+ interrupts = <13>;
+ interrupt-names = "arc_ps2_irq";
+}
diff --git a/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt b/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt
new file mode 100644
index 000000000..2a00e14e1
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/fsl/bman-portals.txt
@@ -0,0 +1,56 @@
+QorIQ DPAA Buffer Manager Portals Device Tree Binding
+
+Copyright (C) 2008 - 2014 Freescale Semiconductor Inc.
+
+CONTENTS
+
+ - BMan Portal
+ - Example
+
+BMan Portal Node
+
+Portals are memory mapped interfaces to BMan that allow low-latency, lock-less
+interaction by software running on processor cores, accelerators and network
+interfaces with the BMan
+
+PROPERTIES
+
+- compatible
+ Usage: Required
+ Value type: <stringlist>
+ Definition: Must include "fsl,bman-portal-<hardware revision>"
+ May include "fsl,<SoC>-bman-portal" or "fsl,bman-portal"
+
+- reg
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Two regions. The first is the cache-enabled region of
+ the portal. The second is the cache-inhibited region of
+ the portal
+
+- interrupts
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Standard property
+
+EXAMPLE
+
+The example below shows a (P4080) BMan portals container/bus node with two portals
+
+ bman-portals@ff4000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+ ranges = <0 0xf 0xf4000000 0x200000>;
+
+ bman-portal@0 {
+ compatible = "fsl,bman-portal-1.0.0", "fsl,bman-portal";
+ reg = <0x0 0x4000>, <0x100000 0x1000>;
+ interrupts = <105 2 0 0>;
+ };
+ bman-portal@4000 {
+ compatible = "fsl,bman-portal-1.0.0", "fsl,bman-portal";
+ reg = <0x4000 0x4000>, <0x101000 0x1000>;
+ interrupts = <107 2 0 0>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/soc/fsl/bman.txt b/Documentation/devicetree/bindings/soc/fsl/bman.txt
new file mode 100644
index 000000000..47ac83441
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/fsl/bman.txt
@@ -0,0 +1,135 @@
+QorIQ DPAA Buffer Manager Device Tree Bindings
+
+Copyright (C) 2008 - 2014 Freescale Semiconductor Inc.
+
+CONTENTS
+
+ - BMan Node
+ - BMan Private Memory Node
+ - Example
+
+BMan Node
+
+The Buffer Manager is part of the Data-Path Acceleration Architecture (DPAA).
+BMan supports hardware allocation and deallocation of buffers belonging to pools
+originally created by software with configurable depletion thresholds. This
+binding covers the CCSR space programming model
+
+PROPERTIES
+
+- compatible
+ Usage: Required
+ Value type: <stringlist>
+ Definition: Must include "fsl,bman"
+ May include "fsl,<SoC>-bman"
+
+- reg
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Registers region within the CCSR address space
+
+The BMan revision information is located in the BMAN_IP_REV_1/2 registers which
+are located at offsets 0xbf8 and 0xbfc
+
+- interrupts
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Standard property. The error interrupt
+
+- fsl,bman-portals
+ Usage: Required
+ Value type: <phandle>
+ Definition: Phandle to this BMan instance's portals
+
+- fsl,liodn
+ Usage: See pamu.txt
+ Value type: <prop-encoded-array>
+ Definition: PAMU property used for static LIODN assignment
+
+- fsl,iommu-parent
+ Usage: See pamu.txt
+ Value type: <phandle>
+ Definition: PAMU property used for dynamic LIODN assignment
+
+ For additional details about the PAMU/LIODN binding(s) see pamu.txt
+
+Devices connected to a BMan instance via Direct Connect Portals (DCP) must link
+to the respective BMan instance
+
+- fsl,bman
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Description: List of phandle and DCP index pairs, to the BMan instance
+ to which this device is connected via the DCP
+
+BMan Private Memory Node
+
+BMan requires a contiguous range of physical memory used for the backing store
+for BMan Free Buffer Proxy Records (FBPR). This memory is reserved/allocated as a
+node under the /reserved-memory node
+
+The BMan FBPR memory node must be named "bman-fbpr"
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: Must inclide "fsl,bman-fbpr"
+
+The following constraints are relevant to the FBPR private memory:
+ - The size must be 2^(size + 1), with size = 11..33. That is 4 KiB to
+ 16 GiB
+ - The alignment must be a muliptle of the memory size
+
+The size of the FBPR must be chosen by observing the hardware features configured
+via the Reset Configuration Word (RCW) and that are relevant to a specific board
+(e.g. number of MAC(s) pinned-out, number of offline/host command FMan ports,
+etc.). The size configured in the DT must reflect the hardware capabilities and
+not the specific needs of an application
+
+For additional details about reserved memory regions see reserved-memory.txt
+
+EXAMPLE
+
+The example below shows a BMan FBPR dynamic allocation memory node
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ bman_fbpr: bman-fbpr {
+ compatible = "fsl,bman-fbpr";
+ alloc-ranges = <0 0 0x10 0>;
+ size = <0 0x1000000>;
+ alignment = <0 0x1000000>;
+ };
+ };
+
+The example below shows a (P4080) BMan CCSR-space node
+
+ bportals: bman-portals@ff4000000 {
+ ...
+ };
+
+ crypto@300000 {
+ ...
+ fsl,bman = <&bman, 2>;
+ ...
+ };
+
+ bman: bman@31a000 {
+ compatible = "fsl,bman";
+ reg = <0x31a000 0x1000>;
+ interrupts = <16 2 1 2>;
+ fsl,liodn = <0x17>;
+ fsl,bman-portals = <&bportals>;
+ memory-region = <&bman_fbpr>;
+ };
+
+ fman@400000 {
+ ...
+ fsl,bman = <&bman, 0>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt b/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt
new file mode 100644
index 000000000..48c4dae5d
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/fsl/qman-portals.txt
@@ -0,0 +1,154 @@
+QorIQ DPAA Queue Manager Portals Device Tree Binding
+
+Copyright (C) 2008 - 2014 Freescale Semiconductor Inc.
+
+CONTENTS
+
+ - QMan Portal
+ - QMan Pool Channel
+ - Example
+
+QMan Portal Node
+
+Portals are memory mapped interfaces to QMan that allow low-latency, lock-less
+interaction by software running on processor cores, accelerators and network
+interfaces with the QMan
+
+PROPERTIES
+
+- compatible
+ Usage: Required
+ Value type: <stringlist>
+ Definition: Must include "fsl,qman-portal-<hardware revision>"
+ May include "fsl,<SoC>-qman-portal" or "fsl,qman-portal"
+
+- reg
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Two regions. The first is the cache-enabled region of
+ the portal. The second is the cache-inhibited region of
+ the portal
+
+- interrupts
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Standard property
+
+- fsl,liodn
+ Usage: See pamu.txt
+ Value type: <prop-encoded-array>
+ Definition: Two LIODN(s). DQRR LIODN (DLIODN) and Frame LIODN
+ (FLIODN)
+
+- fsl,iommu-parent
+ Usage: See pamu.txt
+ Value type: <phandle>
+ Definition: PAMU property used for dynamic LIODN assignment
+
+ For additional details about the PAMU/LIODN binding(s) see pamu.txt
+
+- fsl,qman-channel-id
+ Usage: Required
+ Value type: <u32>
+ Definition: The hardware index of the channel. This can also be
+ determined by dividing any of the channel's 8 work queue
+ IDs by 8
+
+In addition to these properties the qman-portals should have sub-nodes to
+represent the HW devices/portals that are connected to the software portal
+described here
+
+The currently supported sub-nodes are:
+ * fman0
+ * fman1
+ * pme
+ * crypto
+
+These subnodes should have the following properties:
+
+- fsl,liodn
+ Usage: See pamu.txt
+ Value type: <prop-encoded-array>
+ Definition: PAMU property used for static LIODN assignment
+
+- fsl,iommu-parent
+ Usage: See pamu.txt
+ Value type: <phandle>
+ Definition: PAMU property used for dynamic LIODN assignment
+
+- dev-handle
+ Usage: Required
+ Value type: <phandle>
+ Definition: The phandle to the particular hardware device that this
+ portal is connected to.
+
+DPAA QMan Pool Channel Nodes
+
+Pool Channels are defined with the following properties.
+
+PROPERTIES
+
+- compatible
+ Usage: Required
+ Value type: <stringlist>
+ Definition: Must include "fsl,qman-pool-channel"
+ May include "fsl,<SoC>-qman-pool-channel"
+
+- fsl,qman-channel-id
+ Usage: Required
+ Value type: <u32>
+ Definition: The hardware index of the channel. This can also be
+ determined by dividing any of the channel's 8 work queue
+ IDs by 8
+
+EXAMPLE
+
+The example below shows a (P4080) QMan portals container/bus node with two portals
+
+ qman-portals@ff4200000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+ ranges = <0 0xf 0xf4200000 0x200000>;
+
+ qman-portal@0 {
+ compatible = "fsl,qman-portal-1.2.0", "fsl,qman-portal";
+ reg = <0 0x4000>, <0x100000 0x1000>;
+ interrupts = <104 2 0 0>;
+ fsl,liodn = <1 2>;
+ fsl,qman-channel-id = <0>;
+
+ fman0 {
+ fsl,liodn = <0x21>;
+ dev-handle = <&fman0>;
+ };
+ fman1 {
+ fsl,liodn = <0xa1>;
+ dev-handle = <&fman1>;
+ };
+ crypto {
+ fsl,liodn = <0x41 0x66>;
+ dev-handle = <&crypto>;
+ };
+ };
+ qman-portal@4000 {
+ compatible = "fsl,qman-portal-1.2.0", "fsl,qman-portal";
+ reg = <0x4000 0x4000>, <0x101000 0x1000>;
+ interrupts = <106 2 0 0>;
+ fsl,liodn = <3 4>;
+ fsl,qman-channel-id = <1>;
+
+ fman0 {
+ fsl,liodn = <0x22>;
+ dev-handle = <&fman0>;
+ };
+ fman1 {
+ fsl,liodn = <0xa2>;
+ dev-handle = <&fman1>;
+ };
+ crypto {
+ fsl,liodn = <0x42 0x67>;
+ dev-handle = <&crypto>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/soc/fsl/qman.txt b/Documentation/devicetree/bindings/soc/fsl/qman.txt
new file mode 100644
index 000000000..556ebb8be
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/fsl/qman.txt
@@ -0,0 +1,175 @@
+QorIQ DPAA Queue Manager Device Tree Binding
+
+Copyright (C) 2008 - 2014 Freescale Semiconductor Inc.
+
+CONTENTS
+
+ - QMan Node
+ - QMan Private Memory Nodes
+ - Example
+
+QMan Node
+
+The Queue Manager is part of the Data-Path Acceleration Architecture (DPAA). QMan
+supports queuing and QoS scheduling of frames to CPUs, network interfaces and
+DPAA logic modules, maintains packet ordering within flows. Besides providing
+flow-level queuing, is also responsible for congestion management functions such
+as RED/WRED, congestion notifications and tail discards. This binding covers the
+CCSR space programming model
+
+PROPERTIES
+
+- compatible
+ Usage: Required
+ Value type: <stringlist>
+ Definition: Must include "fsl,qman"
+ May include "fsl,<SoC>-qman"
+
+- reg
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Registers region within the CCSR address space
+
+The QMan revision information is located in the QMAN_IP_REV_1/2 registers which
+are located at offsets 0xbf8 and 0xbfc
+
+- interrupts
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Definition: Standard property. The error interrupt
+
+- fsl,qman-portals
+ Usage: Required
+ Value type: <phandle>
+ Definition: Phandle to this QMan instance's portals
+
+- fsl,liodn
+ Usage: See pamu.txt
+ Value type: <prop-encoded-array>
+ Definition: PAMU property used for static LIODN assignment
+
+- fsl,iommu-parent
+ Usage: See pamu.txt
+ Value type: <phandle>
+ Definition: PAMU property used for dynamic LIODN assignment
+
+ For additional details about the PAMU/LIODN binding(s) see pamu.txt
+
+- clocks
+ Usage: See clock-bindings.txt and qoriq-clock.txt
+ Value type: <prop-encoded-array>
+ Definition: Reference input clock. Its frequency is half of the
+ platform clock
+
+Devices connected to a QMan instance via Direct Connect Portals (DCP) must link
+to the respective QMan instance
+
+- fsl,qman
+ Usage: Required
+ Value type: <prop-encoded-array>
+ Description: List of phandle and DCP index pairs, to the QMan instance
+ to which this device is connected via the DCP
+
+QMan Private Memory Nodes
+
+QMan requires two contiguous range of physical memory used for the backing store
+for QMan Frame Queue Descriptor (FQD) and Packed Frame Descriptor Record (PFDR).
+This memory is reserved/allocated as a nodes under the /reserved-memory node
+
+The QMan FQD memory node must be named "qman-fqd"
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: Must inclide "fsl,qman-fqd"
+
+The QMan PFDR memory node must be named "qman-pfdr"
+
+PROPERTIES
+
+- compatible
+ Usage: required
+ Value type: <stringlist>
+ Definition: Must inclide "fsl,qman-pfdr"
+
+The following constraints are relevant to the FQD and PFDR private memory:
+ - The size must be 2^(size + 1), with size = 11..29. That is 4 KiB to
+ 1 GiB
+ - The alignment must be a muliptle of the memory size
+
+The size of the FQD and PFDP must be chosen by observing the hardware features
+configured via the Reset Configuration Word (RCW) and that are relevant to a
+specific board (e.g. number of MAC(s) pinned-out, number of offline/host command
+FMan ports, etc.). The size configured in the DT must reflect the hardware
+capabilities and not the specific needs of an application
+
+For additional details about reserved memory regions see reserved-memory.txt
+
+EXAMPLE
+
+The example below shows a QMan FQD and a PFDR dynamic allocation memory nodes
+
+ reserved-memory {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ qman_fqd: qman-fqd {
+ compatible = "fsl,qman-fqd";
+ alloc-ranges = <0 0 0x10 0>;
+ size = <0 0x400000>;
+ alignment = <0 0x400000>;
+ };
+ qman_pfdr: qman-pfdr {
+ compatible = "fsl,qman-pfdr";
+ alloc-ranges = <0 0 0x10 0>;
+ size = <0 0x2000000>;
+ alignment = <0 0x2000000>;
+ };
+ };
+
+The example below shows a (P4080) QMan CCSR-space node
+
+ qportals: qman-portals@ff4200000 {
+ ...
+ };
+
+ clockgen: global-utilities@e1000 {
+ ...
+ sysclk: sysclk {
+ ...
+ };
+ ...
+ platform_pll: platform-pll@c00 {
+ #clock-cells = <1>;
+ reg = <0xc00 0x4>;
+ compatible = "fsl,qoriq-platform-pll-1.0";
+ clocks = <&sysclk>;
+ clock-output-names = "platform-pll", "platform-pll-div2";
+ };
+ ...
+ };
+
+ crypto@300000 {
+ ...
+ fsl,qman = <&qman, 2>;
+ ...
+ };
+
+ qman: qman@318000 {
+ compatible = "fsl,qman";
+ reg = <0x318000 0x1000>;
+ interrupts = <16 2 1 3>
+ fsl,liodn = <0x16>;
+ fsl,qman-portals = <&qportals>;
+ memory-region = <&qman_fqd &qman_pfdr>;
+ clocks = <&platform_pll 1>;
+ };
+
+ fman@400000 {
+ ...
+ fsl,qman = <&qman, 0>;
+ ...
+ };
diff --git a/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
new file mode 100644
index 000000000..ddeb5b6a5
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/mediatek/pwrap.txt
@@ -0,0 +1,58 @@
+MediaTek PMIC Wrapper Driver
+
+This document describes the binding for the MediaTek PMIC wrapper.
+
+On MediaTek SoCs the PMIC is connected via SPI. The SPI master interface
+is not directly visible to the CPU, but only through the PMIC wrapper
+inside the SoC. The communication between the SoC and the PMIC can
+optionally be encrypted. Also a non standard Dual IO SPI mode can be
+used to increase speed.
+
+IP Pairing
+
+on MT8135 the pins of some SoC internal peripherals can be on the PMIC.
+The signals of these pins are routed over the SPI bus using the pwrap
+bridge. In the binding description below the properties needed for bridging
+are marked with "IP Pairing". These are optional on SoCs which do not support
+IP Pairing
+
+Required properties in pwrap device node.
+- compatible:
+ "mediatek,mt8135-pwrap" for MT8135 SoCs
+ "mediatek,mt8173-pwrap" for MT8173 SoCs
+- interrupts: IRQ for pwrap in SOC
+- reg-names: Must include the following entries:
+ "pwrap": Main registers base
+ "pwrap-bridge": bridge base (IP Pairing)
+- reg: Must contain an entry for each entry in reg-names.
+- reset-names: Must include the following entries:
+ "pwrap"
+ "pwrap-bridge" (IP Pairing)
+- resets: Must contain an entry for each entry in reset-names.
+- clock-names: Must include the following entries:
+ "spi": SPI bus clock
+ "wrap": Main module clock
+- clocks: Must contain an entry for each entry in clock-names.
+
+Optional properities:
+- pmic: Mediatek PMIC MFD is the child device of pwrap
+ See the following for child node definitions:
+ Documentation/devicetree/bindings/mfd/mt6397.txt
+
+Example:
+ pwrap: pwrap@1000f000 {
+ compatible = "mediatek,mt8135-pwrap";
+ reg = <0 0x1000f000 0 0x1000>,
+ <0 0x11017000 0 0x1000>;
+ reg-names = "pwrap", "pwrap-bridge";
+ interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>;
+ resets = <&infracfg MT8135_INFRA_PMIC_WRAP_RST>,
+ <&pericfg MT8135_PERI_PWRAP_BRIDGE_SW_RST>;
+ reset-names = "pwrap", "pwrap-bridge";
+ clocks = <&clk26m>, <&clk26m>;
+ clock-names = "spi", "wrap";
+
+ pmic {
+ compatible = "mediatek,mt6397";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.txt b/Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.txt
new file mode 100644
index 000000000..2f5ede39b
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,gsbi.txt
@@ -0,0 +1,88 @@
+QCOM GSBI (General Serial Bus Interface) Driver
+
+The GSBI controller is modeled as a node with zero or more child nodes, each
+representing a serial sub-node device that is mux'd as part of the GSBI
+configuration settings. The mode setting will govern the input/output mode of
+the 4 GSBI IOs.
+
+Required properties:
+- compatible: Should contain "qcom,gsbi-v1.0.0"
+- cell-index: Should contain the GSBI index
+- reg: Address range for GSBI registers
+- clocks: required clock
+- clock-names: must contain "iface" entry
+- qcom,mode : indicates MUX value for configuration of the serial interface.
+ Please reference dt-bindings/soc/qcom,gsbi.h for valid mux values.
+
+Optional properties:
+- qcom,crci : indicates CRCI MUX value for QUP CRCI ports. Please reference
+ dt-bindings/soc/qcom,gsbi.h for valid CRCI mux values.
+- syscon-tcsr: indicates phandle of TCSR syscon node. Required if child uses
+ dma.
+
+Required properties if child node exists:
+- #address-cells: Must be 1
+- #size-cells: Must be 1
+- ranges: Must be present
+
+Properties for children:
+
+A GSBI controller node can contain 0 or more child nodes representing serial
+devices. These serial devices can be a QCOM UART, I2C controller, spi
+controller, or some combination of aforementioned devices.
+
+See the following for child node definitions:
+Documentation/devicetree/bindings/i2c/qcom,i2c-qup.txt
+Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
+Documentation/devicetree/bindings/serial/qcom,msm-uartdm.txt
+
+Example for APQ8064:
+
+#include <dt-bindings/soc/qcom,gsbi.h>
+
+ gsbi4@16300000 {
+ compatible = "qcom,gsbi-v1.0.0";
+ cell-index = <4>;
+ reg = <0x16300000 0x100>;
+ clocks = <&gcc GSBI4_H_CLK>;
+ clock-names = "iface";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ qcom,mode = <GSBI_PROT_I2C_UART>;
+ qcom,crci = <GSBI_CRCI_QUP>;
+
+ syscon-tcsr = <&tcsr>;
+
+ /* child nodes go under here */
+
+ i2c_qup4: i2c@16380000 {
+ compatible = "qcom,i2c-qup-v1.1.1";
+ reg = <0x16380000 0x1000>;
+ interrupts = <0 153 0>;
+
+ clocks = <&gcc GSBI4_QUP_CLK>, <&gcc GSBI4_H_CLK>;
+ clock-names = "core", "iface";
+
+ clock-frequency = <200000>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ };
+
+ uart4: serial@16340000 {
+ compatible = "qcom,msm-uartdm-v1.3", "qcom,msm-uartdm";
+ reg = <0x16340000 0x1000>,
+ <0x16300000 0x1000>;
+ interrupts = <0 152 0x0>;
+ clocks = <&gcc GSBI4_UART_CLK>, <&gcc GSBI4_H_CLK>;
+ clock-names = "core", "iface";
+ status = "ok";
+ };
+ };
+
+ tcsr: syscon@1a400000 {
+ compatible = "qcom,apq8064-tcsr", "syscon";
+ reg = <0x1a400000 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-dma.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-dma.txt
new file mode 100644
index 000000000..337c4ea5c
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-dma.txt
@@ -0,0 +1,111 @@
+Keystone Navigator DMA Controller
+
+This document explains the device tree bindings for the packet dma
+on keystone devices. The Keystone Navigator DMA driver sets up the dma
+channels and flows for the QMSS(Queue Manager SubSystem) who triggers
+the actual data movements across clients using destination queues. Every
+client modules like NETCP(Network Coprocessor), SRIO(Serial Rapid IO),
+CRYPTO Engines etc has its own instance of dma hardware. QMSS has also
+an internal packet DMA module which is used as an infrastructure DMA
+with zero copy.
+
+Navigator DMA cloud layout:
+ ------------------
+ | Navigator DMAs |
+ ------------------
+ |
+ |-> DMA instance #0
+ |
+ |-> DMA instance #1
+ .
+ .
+ |
+ |-> DMA instance #n
+
+Navigator DMA properties:
+Required properties:
+ - compatible: Should be "ti,keystone-navigator-dma"
+ - clocks: phandle to dma instances clocks. The clock handles can be as
+ many as the dma instances. The order should be maintained as per
+ the dma instances.
+ - ti,navigator-cloud-address: Should contain base address for the multi-core
+ navigator cloud and number of addresses depends on SOC integration
+ configuration.. Navigator cloud global address needs to be programmed
+ into DMA and the DMA uses it as the physical addresses to reach queue
+ managers. Note that these addresses though points to queue managers,
+ they are relevant only from DMA perspective. The QMSS may not choose to
+ use them since it has a different address space view to reach all
+ its components.
+
+DMA instance properties:
+Required properties:
+ - reg: Should contain register location and length of the following dma
+ register regions. Register regions should be specified in the following
+ order.
+ - Global control register region (global).
+ - Tx DMA channel configuration register region (txchan).
+ - Rx DMA channel configuration register region (rxchan).
+ - Tx DMA channel Scheduler configuration register region (txsched).
+ - Rx DMA flow configuration register region (rxflow).
+
+Optional properties:
+ - reg-names: Names for the register regions.
+ - ti,enable-all: Enable all DMA channels vs clients opening specific channels
+ what they need. This property is useful for the userspace fast path
+ case where the linux drivers enables the channels used by userland
+ stack.
+ - ti,loop-back: To loopback Tx streaming I/F to Rx streaming I/F. Used for
+ infrastructure transfers.
+ - ti,rx-retry-timeout: Number of dma cycles to wait before retry on buffer
+ starvation.
+
+Example:
+
+ knav_dmas: knav_dmas@0 {
+ compatible = "ti,keystone-navigator-dma";
+ clocks = <&papllclk>, <&clkxge>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ ti,navigator-cloud-address = <0x23a80000 0x23a90000
+ 0x23aa0000 0x23ab0000>;
+
+ dma_gbe: dma_gbe@0 {
+ reg = <0x2004000 0x100>,
+ <0x2004400 0x120>,
+ <0x2004800 0x300>,
+ <0x2004c00 0x120>,
+ <0x2005000 0x400>;
+ reg-names = "global", "txchan", "rxchan",
+ "txsched", "rxflow";
+ };
+
+ dma_xgbe: dma_xgbe@0 {
+ reg = <0x2fa1000 0x100>,
+ <0x2fa1400 0x200>,
+ <0x2fa1800 0x200>,
+ <0x2fa1c00 0x200>,
+ <0x2fa2000 0x400>;
+ reg-names = "global", "txchan", "rxchan",
+ "txsched", "rxflow";
+ };
+ };
+
+Navigator DMA client:
+Required properties:
+ - ti,navigator-dmas: List of one or more DMA specifiers, each consisting of
+ - A phandle pointing to DMA instance node
+ - A DMA channel number as a phandle arg.
+ - ti,navigator-dma-names: Contains dma channel name for each DMA specifier in
+ the 'ti,navigator-dmas' property.
+
+Example:
+
+ netcp: netcp@2090000 {
+ ..
+ ti,navigator-dmas = <&dma_gbe 22>,
+ <&dma_gbe 23>,
+ <&dma_gbe 8>;
+ ti,navigator-dma-names = "netrx0", "netrx1", "nettx";
+ ..
+ };
diff --git a/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
new file mode 100644
index 000000000..fcf11e036
--- /dev/null
+++ b/Documentation/devicetree/bindings/soc/ti/keystone-navigator-qmss.txt
@@ -0,0 +1,232 @@
+* Texas Instruments Keystone Navigator Queue Management SubSystem driver
+
+The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
+the main hardware sub system which forms the backbone of the Keystone
+multi-core Navigator. QMSS consist of queue managers, packed-data structure
+processors(PDSP), linking RAM, descriptor pools and infrastructure
+Packet DMA.
+The Queue Manager is a hardware module that is responsible for accelerating
+management of the packet queues. Packets are queued/de-queued by writing or
+reading descriptor address to a particular memory mapped location. The PDSPs
+perform QMSS related functions like accumulation, QoS, or event management.
+Linking RAM registers are used to link the descriptors which are stored in
+descriptor RAM. Descriptor RAM is configurable as internal or external memory.
+The QMSS driver manages the PDSP setups, linking RAM regions,
+queue pool management (allocation, push, pop and notify) and descriptor
+pool management.
+
+
+Required properties:
+- compatible : Must be "ti,keystone-navigator-qmss";
+- clocks : phandle to the reference clock for this device.
+- queue-range : <start number> total range of queue numbers for the device.
+- linkram0 : <address size> for internal link ram, where size is the total
+ link ram entries.
+- linkram1 : <address size> for external link ram, where size is the total
+ external link ram entries. If the address is specified as "0"
+ driver will allocate memory.
+- qmgrs : child node describing the individual queue managers on the
+ SoC. On keystone 1 devices there should be only one node.
+ On keystone 2 devices there can be more than 1 node.
+ -- managed-queues : the actual queues managed by each queue manager
+ instance, specified as <"base queue #" "# of queues">.
+ -- reg : Address and size of the register set for the device.
+ Register regions should be specified in the following
+ order
+ - Queue Peek region.
+ - Queue status RAM.
+ - Queue configuration region.
+ - Descriptor memory setup region.
+ - Queue Management/Queue Proxy region for queue Push.
+ - Queue Management/Queue Proxy region for queue Pop.
+- queue-pools : child node classifying the queue ranges into pools.
+ Queue ranges are grouped into 3 type of pools:
+ - qpend : pool of qpend(interruptible) queues
+ - general-purpose : pool of general queues, primarly used
+ as free descriptor queues or the
+ transmit DMA queues.
+ - accumulator : pool of queues on PDSP accumulator channel
+ Each range can have the following properties:
+ -- qrange : number of queues to use per queue range, specified as
+ <"base queue #" "# of queues">.
+ -- interrupts : Optional property to specify the interrupt mapping
+ for interruptible queues. The driver additionaly sets
+ the interrupt affinity hint based on the cpu mask.
+ -- qalloc-by-id : Optional property to specify that the queues in this
+ range can only be allocated by queue id.
+ -- accumulator : Accumulator channel specification. Any of the PDSPs in
+ QMSS can be loaded with the accumulator firmware. The
+ accumulator firmware’s job is to poll a select number of
+ queues looking for descriptors that have been pushed
+ into them. Descriptors are popped from the queue and
+ placed in a buffer provided by the host. When the list
+ becomes full or a programmed time period expires, the
+ accumulator triggers an interrupt to the host to read
+ the buffer for descriptor information. This firmware
+ comes in 16, 32, and 48 channel builds. Each of these
+ channels can be configured to monitor 32 contiguous
+ queues. Accumulator channel property is specified as:
+ <pdsp-id, channel, entries, pacing mode, latency>
+ pdsp-id : QMSS PDSP running accumulator firmware
+ on which the channel has to be
+ configured
+ channel : Accumulator channel number
+ entries : Size of the accumulator descriptor list
+ pacing mode : Interrupt pacing mode
+ 0 : None, i.e interrupt on list full only
+ 1 : Time delay since last interrupt
+ 2 : Time delay since first new packet
+ 3 : Time delay since last new packet
+ latency : time to delay the interrupt, specified
+ in microseconds.
+ -- multi-queue : Optional property to specify that the channel has to
+ monitor upto 32 queues starting at the base queue #.
+- descriptor-regions : child node describing the memory regions for keystone
+ navigator packet DMA descriptors. The memory for
+ descriptors will be allocated by the driver.
+ -- id : region number in QMSS.
+ -- region-spec : specifies the number of descriptors in the
+ region, specified as
+ <"# of descriptors" "descriptor size">.
+ -- link-index : start index, i.e. index of the first
+ descriptor in the region.
+
+Optional properties:
+- dma-coherent : Present if DMA operations are coherent.
+- pdsps : child node describing the PDSP configuration.
+ -- firmware : firmware to be loaded on the PDSP.
+ -- id : the qmss pdsp that will run the firmware.
+ -- reg : Address and size of the register set for the PDSP.
+ Register regions should be specified in the following
+ order
+ - PDSP internal RAM region.
+ - PDSP control/status region registers.
+ - QMSS interrupt distributor registers.
+ - PDSP command interface region.
+
+Example:
+
+qmss: qmss@2a40000 {
+ compatible = "ti,keystone-qmss";
+ dma-coherent;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ clocks = <&chipclk13>;
+ ranges;
+ queue-range = <0 0x4000>;
+ linkram0 = <0x100000 0x8000>;
+ linkram1 = <0x0 0x10000>;
+
+ qmgrs {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ qmgr0 {
+ managed-queues = <0 0x2000>;
+ reg = <0x2a40000 0x20000>,
+ <0x2a06000 0x400>,
+ <0x2a02000 0x1000>,
+ <0x2a03000 0x1000>,
+ <0x23a80000 0x20000>,
+ <0x2a80000 0x20000>;
+ };
+
+ qmgr1 {
+ managed-queues = <0x2000 0x2000>;
+ reg = <0x2a60000 0x20000>,
+ <0x2a06400 0x400>,
+ <0x2a04000 0x1000>,
+ <0x2a05000 0x1000>,
+ <0x23aa0000 0x20000>,
+ <0x2aa0000 0x20000>;
+ };
+ };
+ queue-pools {
+ qpend {
+ qpend-0 {
+ qrange = <658 8>;
+ interrupts =<0 40 0xf04 0 41 0xf04 0 42 0xf04
+ 0 43 0xf04 0 44 0xf04 0 45 0xf04
+ 0 46 0xf04 0 47 0xf04>;
+ };
+ qpend-1 {
+ qrange = <8704 16>;
+ interrupts = <0 48 0xf04 0 49 0xf04 0 50 0xf04
+ 0 51 0xf04 0 52 0xf04 0 53 0xf04
+ 0 54 0xf04 0 55 0xf04 0 56 0xf04
+ 0 57 0xf04 0 58 0xf04 0 59 0xf04
+ 0 60 0xf04 0 61 0xf04 0 62 0xf04
+ 0 63 0xf04>;
+ qalloc-by-id;
+ };
+ qpend-2 {
+ qrange = <8720 16>;
+ interrupts = <0 64 0xf04 0 65 0xf04 0 66 0xf04
+ 0 59 0xf04 0 68 0xf04 0 69 0xf04
+ 0 70 0xf04 0 71 0xf04 0 72 0xf04
+ 0 73 0xf04 0 74 0xf04 0 75 0xf04
+ 0 76 0xf04 0 77 0xf04 0 78 0xf04
+ 0 79 0xf04>;
+ };
+ };
+ general-purpose {
+ gp-0 {
+ qrange = <4000 64>;
+ };
+ netcp-tx {
+ qrange = <640 9>;
+ qalloc-by-id;
+ };
+ };
+ accumulator {
+ acc-0 {
+ qrange = <128 32>;
+ accumulator = <0 36 16 2 50>;
+ interrupts = <0 215 0xf01>;
+ multi-queue;
+ qalloc-by-id;
+ };
+ acc-1 {
+ qrange = <160 32>;
+ accumulator = <0 37 16 2 50>;
+ interrupts = <0 216 0xf01>;
+ multi-queue;
+ };
+ acc-2 {
+ qrange = <192 32>;
+ accumulator = <0 38 16 2 50>;
+ interrupts = <0 217 0xf01>;
+ multi-queue;
+ };
+ acc-3 {
+ qrange = <224 32>;
+ accumulator = <0 39 16 2 50>;
+ interrupts = <0 218 0xf01>;
+ multi-queue;
+ };
+ };
+ };
+ descriptor-regions {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ region-12 {
+ id = <12>;
+ region-spec = <8192 128>; /* num_desc desc_size */
+ link-index = <0x4000>;
+ };
+ };
+ pdsps {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ pdsp0@0x2a10000 {
+ firmware = "/*(DEBLOBBED)*/";
+ reg = <0x2a10000 0x1000>,
+ <0x2a0f000 0x100>,
+ <0x2a0c000 0x3c8>,
+ <0x2a20000 0x4000>;
+ id = <0>;
+ };
+ };
+}; /* qmss */
diff --git a/Documentation/devicetree/bindings/sound/adi,adau1701.txt b/Documentation/devicetree/bindings/sound/adi,adau1701.txt
new file mode 100644
index 000000000..547a49b56
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/adi,adau1701.txt
@@ -0,0 +1,35 @@
+Analog Devices ADAU1701
+
+Required properties:
+
+ - compatible: Should contain "adi,adau1701"
+ - reg: The i2c address. Value depends on the state of ADDR0
+ and ADDR1, as wired in hardware.
+
+Optional properties:
+
+ - reset-gpio: A GPIO spec to define which pin is connected to the
+ chip's !RESET pin. If specified, the driver will
+ assert a hardware reset at probe time.
+ - adi,pll-mode-gpios: An array of two GPIO specs to describe the GPIOs
+ the ADAU's PLL config pins are connected to.
+ The state of the pins are set according to the
+ configured clock divider on ASoC side before the
+ firmware is loaded.
+ - adi,pin-config: An array of 12 numerical values selecting one of the
+ pin configurations as described in the datasheet,
+ table 53. Note that the value of this property has
+ to be prefixed with '/bits/ 8'.
+
+Examples:
+
+ i2c_bus {
+ adau1701@34 {
+ compatible = "adi,adau1701";
+ reg = <0x34>;
+ reset-gpio = <&gpio 23 0>;
+ adi,pll-mode-gpios = <&gpio 24 0 &gpio 25 0>;
+ adi,pin-config = /bits/ 8 <0x4 0x7 0x5 0x5 0x4 0x4
+ 0x4 0x4 0x4 0x4 0x4 0x4>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt b/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt
new file mode 100644
index 000000000..5875ca459
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/adi,axi-i2s.txt
@@ -0,0 +1,31 @@
+ADI AXI-I2S controller
+
+Required properties:
+ - compatible : Must be "adi,axi-i2s-1.00.a"
+ - reg : Must contain I2S core's registers location and length
+ - clocks : Pairs of phandle and specifier referencing the controller's clocks.
+ The controller expects two clocks, the clock used for the AXI interface and
+ the clock used as the sampling rate reference clock sample.
+ - clock-names : "axi" for the clock to the AXI interface, "ref" for the sample
+ rate reference clock.
+ - dmas: Pairs of phandle and specifier for the DMA channels that are used by
+ the core. The core expects two dma channels, one for transmit and one for
+ receive.
+ - dma-names : "tx" for the transmit channel, "rx" for the receive channel.
+
+For more details on the 'dma', 'dma-names', 'clock' and 'clock-names' properties
+please check:
+ * resource-names.txt
+ * clock/clock-bindings.txt
+ * dma/dma.txt
+
+Example:
+
+ i2s: i2s@0x77600000 {
+ compatible = "adi,axi-i2s-1.00.a";
+ reg = <0x77600000 0x1000>;
+ clocks = <&clk 15>, <&audio_clock>;
+ clock-names = "axi", "ref";
+ dmas = <&ps7_dma 0>, <&ps7_dma 1>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt b/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt
new file mode 100644
index 000000000..4eb799767
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/adi,axi-spdif-tx.txt
@@ -0,0 +1,30 @@
+ADI AXI-SPDIF controller
+
+Required properties:
+ - compatible : Must be "adi,axi-spdif-tx-1.00.a"
+ - reg : Must contain SPDIF core's registers location and length
+ - clocks : Pairs of phandle and specifier referencing the controller's clocks.
+ The controller expects two clocks, the clock used for the AXI interface and
+ the clock used as the sampling rate reference clock sample.
+ - clock-names: "axi" for the clock to the AXI interface, "ref" for the sample
+ rate reference clock.
+ - dmas: Pairs of phandle and specifier for the DMA channel that is used by
+ the core. The core expects one dma channel for transmit.
+ - dma-names : Must be "tx"
+
+For more details on the 'dma', 'dma-names', 'clock' and 'clock-names' properties
+please check:
+ * resource-names.txt
+ * clock/clock-bindings.txt
+ * dma/dma.txt
+
+Example:
+
+ spdif: spdif@0x77400000 {
+ compatible = "adi,axi-spdif-tx-1.00.a";
+ reg = <0x77600000 0x1000>;
+ clocks = <&clk 15>, <&audio_clock>;
+ clock-names = "axi", "ref";
+ dmas = <&ps7_dma 0>;
+ dma-names = "tx";
+ };
diff --git a/Documentation/devicetree/bindings/sound/adi,ssm2602.txt b/Documentation/devicetree/bindings/sound/adi,ssm2602.txt
new file mode 100644
index 000000000..3b3302fe3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/adi,ssm2602.txt
@@ -0,0 +1,19 @@
+Analog Devices SSM2602, SSM2603 and SSM2604 I2S audio CODEC devices
+
+SSM2602 support both I2C and SPI as the configuration interface,
+the selection is made by the MODE strap-in pin.
+SSM2603 and SSM2604 only support I2C as the configuration interface.
+
+Required properties:
+
+ - compatible : One of "adi,ssm2602", "adi,ssm2603" or "adi,ssm2604"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+ Example:
+
+ ssm2602: ssm2602@1a {
+ compatible = "adi,ssm2602";
+ reg = <0x1a>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/ak4104.txt b/Documentation/devicetree/bindings/sound/ak4104.txt
new file mode 100644
index 000000000..deca5e18f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ak4104.txt
@@ -0,0 +1,25 @@
+AK4104 S/PDIF transmitter
+
+This device supports SPI mode only.
+
+Required properties:
+
+ - compatible : "asahi-kasei,ak4104"
+
+ - reg : The chip select number on the SPI bus
+
+ - vdd-supply : A regulator node, providing 2.7V - 3.6V
+
+Optional properties:
+
+ - reset-gpio : a GPIO spec for the reset pin. If specified, it will be
+ deasserted before communication to the device starts.
+
+Example:
+
+spdif: ak4104@0 {
+ compatible = "asahi-kasei,ak4104";
+ reg = <0>;
+ spi-max-frequency = <5000000>;
+ vdd-supply = <&vdd_3v3_reg>;
+};
diff --git a/Documentation/devicetree/bindings/sound/ak4554.c b/Documentation/devicetree/bindings/sound/ak4554.c
new file mode 100644
index 000000000..934fa0275
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ak4554.c
@@ -0,0 +1,11 @@
+AK4554 ADC/DAC
+
+Required properties:
+
+ - compatible : "asahi-kasei,ak4554"
+
+Example:
+
+ak4554-adc-dac {
+ compatible = "asahi-kasei,ak4554";
+};
diff --git a/Documentation/devicetree/bindings/sound/ak4642.txt b/Documentation/devicetree/bindings/sound/ak4642.txt
new file mode 100644
index 000000000..623d4e70a
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ak4642.txt
@@ -0,0 +1,17 @@
+AK4642 I2C transmitter
+
+This device supports I2C mode only.
+
+Required properties:
+
+ - compatible : "asahi-kasei,ak4642" or "asahi-kasei,ak4643" or "asahi-kasei,ak4648"
+ - reg : The chip select number on the I2C bus
+
+Example:
+
+&i2c {
+ ak4648: ak4648@0x12 {
+ compatible = "asahi-kasei,ak4642";
+ reg = <0x12>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/sound/ak5386.txt b/Documentation/devicetree/bindings/sound/ak5386.txt
new file mode 100644
index 000000000..ec3df3abb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ak5386.txt
@@ -0,0 +1,23 @@
+AK5386 Single-ended 24-Bit 192kHz delta-sigma ADC
+
+This device has no control interface.
+
+Required properties:
+
+ - compatible : "asahi-kasei,ak5386"
+
+Optional properties:
+
+ - reset-gpio : a GPIO spec for the reset/power down pin.
+ If specified, it will be deasserted at probe time.
+ - va-supply : a regulator spec, providing 5.0V
+ - vd-supply : a regulator spec, providing 3.3V
+
+Example:
+
+spdif: ak5386@0 {
+ compatible = "asahi-kasei,ak5386";
+ reset-gpio = <&gpio0 23>;
+ va-supply = <&vdd_5v0_reg>;
+ vd-supply = <&vdd_3v3_reg>;
+};
diff --git a/Documentation/devicetree/bindings/sound/alc5623.txt b/Documentation/devicetree/bindings/sound/alc5623.txt
new file mode 100644
index 000000000..26c86c98d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/alc5623.txt
@@ -0,0 +1,25 @@
+ALC5621/ALC5622/ALC5623 audio Codec
+
+Required properties:
+
+ - compatible: "realtek,alc5623"
+ - reg: the I2C address of the device.
+
+Optional properties:
+
+ - add-ctrl: Default register value for Reg-40h, Additional Control
+ Register. If absent or has the value of 0, the
+ register is untouched.
+
+ - jack-det-ctrl: Default register value for Reg-5Ah, Jack Detect
+ Control Register. If absent or has value 0, the
+ register is untouched.
+
+Example:
+
+ alc5621: alc5621@1a {
+ compatible = "alc5621";
+ reg = <0x1a>;
+ add-ctrl = <0x3700>;
+ jack-det-ctrl = <0x4810>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/alc5632.txt b/Documentation/devicetree/bindings/sound/alc5632.txt
new file mode 100644
index 000000000..ffd886d11
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/alc5632.txt
@@ -0,0 +1,43 @@
+ALC5632 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "realtek,alc5632"
+
+ - reg : the I2C address of the device.
+
+ - gpio-controller : Indicates this device is a GPIO controller.
+
+ - #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+
+Pins on the device (for linking into audio routes):
+
+ * SPK_OUTP
+ * SPK_OUTN
+ * HP_OUT_L
+ * HP_OUT_R
+ * AUX_OUT_P
+ * AUX_OUT_N
+ * LINE_IN_L
+ * LINE_IN_R
+ * PHONE_P
+ * PHONE_N
+ * MIC1_P
+ * MIC1_N
+ * MIC2_P
+ * MIC2_N
+ * MICBIAS1
+ * DMICDAT
+
+Example:
+
+alc5632: alc5632@1e {
+ compatible = "realtek,alc5632";
+ reg = <0x1a>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+};
diff --git a/Documentation/devicetree/bindings/sound/armada-370db-audio.txt b/Documentation/devicetree/bindings/sound/armada-370db-audio.txt
new file mode 100644
index 000000000..bf984d238
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/armada-370db-audio.txt
@@ -0,0 +1,27 @@
+Device Tree bindings for the Armada 370 DB audio
+================================================
+
+These Device Tree bindings are used to describe the audio complex
+found on the Armada 370 DB platform.
+
+Mandatory properties:
+
+ * compatible: must be "marvell,a370db-audio"
+
+ * marvell,audio-controller: a phandle that points to the audio
+ controller of the Armada 370 SoC.
+
+ * marvell,audio-codec: a set of three phandles that points to:
+
+ 1/ the analog audio codec connected to the Armada 370 SoC
+ 2/ the S/PDIF transceiver
+ 3/ the S/PDIF receiver
+
+Example:
+
+ sound {
+ compatible = "marvell,a370db-audio";
+ marvell,audio-controller = <&audio_controller>;
+ marvell,audio-codec = <&audio_codec &spdif_out &spdif_in>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/sound/arndale.txt b/Documentation/devicetree/bindings/sound/arndale.txt
new file mode 100644
index 000000000..0e7694638
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/arndale.txt
@@ -0,0 +1,24 @@
+Audio Binding for Arndale boards
+
+Required properties:
+- compatible : Can be the following,
+ "samsung,arndale-rt5631"
+
+- samsung,audio-cpu: The phandle of the Samsung I2S controller
+- samsung,audio-codec: The phandle of the audio codec
+
+Optional:
+- samsung,model: The name of the sound-card
+
+Arndale Boards has many audio daughter cards, one of them is
+rt5631/alc5631. Below example shows audio bindings for rt5631/
+alc5631 based codec.
+
+Example:
+
+sound {
+ compatible = "samsung,arndale-rt5631";
+
+ samsung,audio-cpu = <&i2s0>
+ samsung,audio-codec = <&rt5631>;
+};
diff --git a/Documentation/devicetree/bindings/sound/atmel-at91sam9g20ek-wm8731-audio.txt b/Documentation/devicetree/bindings/sound/atmel-at91sam9g20ek-wm8731-audio.txt
new file mode 100644
index 000000000..9c5a9947b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/atmel-at91sam9g20ek-wm8731-audio.txt
@@ -0,0 +1,26 @@
+* Atmel at91sam9g20ek wm8731 audio complex
+
+Required properties:
+ - compatible: "atmel,at91sam9g20ek-wm8731-audio"
+ - atmel,model: The user-visible name of this sound complex.
+ - atmel,audio-routing: A list of the connections between audio components.
+ - atmel,ssc-controller: The phandle of the SSC controller
+ - atmel,audio-codec: The phandle of the WM8731 audio codec
+Optional properties:
+ - pinctrl-names, pinctrl-0: Please refer to pinctrl-bindings.txt
+
+Example:
+sound {
+ compatible = "atmel,at91sam9g20ek-wm8731-audio";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pck0_as_mck>;
+
+ atmel,model = "wm8731 @ AT91SAMG20EK";
+
+ atmel,audio-routing =
+ "Ext Spk", "LHPOUT",
+ "Int MIC", "MICIN";
+
+ atmel,ssc-controller = <&ssc0>;
+ atmel,audio-codec = <&wm8731>;
+};
diff --git a/Documentation/devicetree/bindings/sound/atmel-sam9x5-wm8731-audio.txt b/Documentation/devicetree/bindings/sound/atmel-sam9x5-wm8731-audio.txt
new file mode 100644
index 000000000..072085708
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/atmel-sam9x5-wm8731-audio.txt
@@ -0,0 +1,35 @@
+* Atmel at91sam9x5ek wm8731 audio complex
+
+Required properties:
+ - compatible: "atmel,sam9x5-wm8731-audio"
+ - atmel,model: The user-visible name of this sound complex.
+ - atmel,ssc-controller: The phandle of the SSC controller
+ - atmel,audio-codec: The phandle of the WM8731 audio codec
+ - atmel,audio-routing: A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source.
+
+Available audio endpoints for the audio-routing table:
+
+Board connectors:
+ * Headphone Jack
+ * Line In Jack
+
+wm8731 pins:
+cf Documentation/devicetree/bindings/sound/wm8731.txt
+
+Example:
+sound {
+ compatible = "atmel,sam9x5-wm8731-audio";
+
+ atmel,model = "wm8731 @ AT91SAM9X5EK";
+
+ atmel,audio-routing =
+ "Headphone Jack", "RHPOUT",
+ "Headphone Jack", "LHPOUT",
+ "LLINEIN", "Line In Jack",
+ "RLINEIN", "Line In Jack";
+
+ atmel,ssc-controller = <&ssc0>;
+ atmel,audio-codec = <&wm8731>;
+};
diff --git a/Documentation/devicetree/bindings/sound/atmel-wm8904.txt b/Documentation/devicetree/bindings/sound/atmel-wm8904.txt
new file mode 100644
index 000000000..8bbe50c88
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/atmel-wm8904.txt
@@ -0,0 +1,55 @@
+Atmel ASoC driver with wm8904 audio codec complex
+
+Required properties:
+ - compatible: "atmel,asoc-wm8904"
+ - atmel,model: The user-visible name of this sound complex.
+ - atmel,audio-routing: A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the WM8904's pins, and the jacks on the board:
+
+ WM8904 pins:
+
+ * IN1L
+ * IN1R
+ * IN2L
+ * IN2R
+ * IN3L
+ * IN3R
+ * HPOUTL
+ * HPOUTR
+ * LINEOUTL
+ * LINEOUTR
+ * MICBIAS
+
+ Board connectors:
+
+ * Headphone Jack
+ * Line In Jack
+ * Mic
+
+ - atmel,ssc-controller: The phandle of the SSC controller
+ - atmel,audio-codec: The phandle of the WM8904 audio codec
+
+Optional properties:
+ - pinctrl-names, pinctrl-0: Please refer to pinctrl-bindings.txt
+
+Example:
+sound {
+ compatible = "atmel,asoc-wm8904";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_pck0_as_mck>;
+
+ atmel,model = "wm8904 @ AT91SAM9N12EK";
+
+ atmel,audio-routing =
+ "Headphone Jack", "HPOUTL",
+ "Headphone Jack", "HPOUTR",
+ "IN2L", "Line In Jack",
+ "IN2R", "Line In Jack",
+ "Mic", "MICBIAS",
+ "IN1L", "Mic";
+
+ atmel,ssc-controller = <&ssc0>;
+ atmel,audio-codec = <&wm8904>;
+};
diff --git a/Documentation/devicetree/bindings/sound/atmel_ac97c.txt b/Documentation/devicetree/bindings/sound/atmel_ac97c.txt
new file mode 100644
index 000000000..b151bd902
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/atmel_ac97c.txt
@@ -0,0 +1,20 @@
+* Atmel AC97 controller
+
+Required properties:
+ - compatible: "atmel,at91sam9263-ac97c"
+ - reg: Address and length of the register set for the device
+ - interrupts: Should contain AC97 interrupt
+ - ac97-gpios: Please refer to soc-ac97link.txt, only ac97-reset is used
+Optional properties:
+ - pinctrl-names, pinctrl-0: Please refer to pinctrl-bindings.txt
+
+Example:
+sound@fffa0000 {
+ compatible = "atmel,at91sam9263-ac97c";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ac97>;
+ reg = <0xfffa0000 0x4000>;
+ interrupts = <18 IRQ_TYPE_LEVEL_HIGH 5>;
+
+ ac97-gpios = <&pioB 0 0 &pioB 2 0 &pioC 29 GPIO_ACTIVE_LOW>;
+};
diff --git a/Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.txt b/Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.txt
new file mode 100644
index 000000000..65783de0a
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/brcm,bcm2835-i2s.txt
@@ -0,0 +1,25 @@
+* Broadcom BCM2835 SoC I2S/PCM module
+
+Required properties:
+- compatible: "brcm,bcm2835-i2s"
+- reg: A list of base address and size entries:
+ * The first entry should cover the PCM registers
+ * The second entry should cover the PCM clock registers
+- dmas: List of DMA controller phandle and DMA request line ordered pairs.
+- dma-names: Identifier string for each DMA request line in the dmas property.
+ These strings correspond 1:1 with the ordered pairs in dmas.
+
+ One of the DMA channels will be responsible for transmission (should be
+ named "tx") and one for reception (should be named "rx").
+
+Example:
+
+bcm2835_i2s: i2s@7e203000 {
+ compatible = "brcm,bcm2835-i2s";
+ reg = <0x7e203000 0x20>,
+ <0x7e101098 0x02>;
+
+ dmas = <&dma 2>,
+ <&dma 3>;
+ dma-names = "tx", "rx";
+};
diff --git a/Documentation/devicetree/bindings/sound/cdns,xtfpga-i2s.txt b/Documentation/devicetree/bindings/sound/cdns,xtfpga-i2s.txt
new file mode 100644
index 000000000..befd125d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cdns,xtfpga-i2s.txt
@@ -0,0 +1,18 @@
+Bindings for I2S controller built into xtfpga Xtensa bitstreams.
+
+Required properties:
+- compatible: shall be "cdns,xtfpga-i2s".
+- reg: memory region (address and length) with device registers.
+- interrupts: interrupt for the device.
+- clocks: phandle to the clk used as master clock. I2S bus clock
+ is derived from it.
+
+Examples:
+
+ i2s0: xtfpga-i2s@0d080000 {
+ #sound-dai-cells = <0>;
+ compatible = "cdns,xtfpga-i2s";
+ reg = <0x0d080000 0x40>;
+ interrupts = <2 1>;
+ clocks = <&cdce706 4>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/cs35l32.txt b/Documentation/devicetree/bindings/sound/cs35l32.txt
new file mode 100644
index 000000000..1417d3f5c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs35l32.txt
@@ -0,0 +1,62 @@
+CS35L32 audio CODEC
+
+Required properties:
+
+ - compatible : "cirrus,cs35l32"
+
+ - reg : the I2C address of the device for I2C. Address is determined by the level
+ of the AD0 pin. Level 0 is 0x40 while Level 1 is 0x41.
+
+ - VA-supply, VP-supply : power supplies for the device,
+ as covered in Documentation/devicetree/bindings/regulator/regulator.txt.
+
+Optional properties:
+
+ - reset-gpios : a GPIO spec for the reset pin. If specified, it will be
+ deasserted before communication to the codec starts.
+
+ - cirrus,boost-manager : Boost voltage control.
+ 0 = Automatically managed. Boost-converter output voltage is the higher
+ of the two: Class G or adaptive LED voltage.
+ 1 = Automatically managed irrespective of audio, adapting for low-power
+ dissipation when LEDs are ON, and operating in Fixed-Boost Bypass Mode
+ if LEDs are OFF (VBST = VP).
+ 2 = (Default) Boost voltage fixed in Bypass Mode (VBST = VP).
+ 3 = Boost voltage fixed at 5 V.
+
+ - cirrus,sdout-datacfg : Data configuration for dual CS35L32 applications only.
+ Determines the data packed in a two-CS35L32 configuration.
+ 0 = Left/right channels VMON[11:0], IMON[11:0], VPMON[7:0].
+ 1 = Left/right channels VMON[11:0], IMON[11:0], STATUS.
+ 2 = (Default) left/right channels VMON[15:0], IMON [15:0].
+ 3 = Left/right channels VPMON[7:0], STATUS.
+
+ - cirrus,sdout-share : SDOUT sharing. Determines whether one or two CS35L32
+ devices are on board sharing SDOUT.
+ 0 = (Default) One IC.
+ 1 = Two IC's.
+
+ - cirrus,battery-recovery : Low battery nominal recovery threshold, rising VP.
+ 0 = 3.1V
+ 1 = 3.2V
+ 2 = 3.3V (Default)
+ 3 = 3.4V
+
+ - cirrus,battery-threshold : Low battery nominal threshold, falling VP.
+ 0 = 3.1V
+ 1 = 3.2V
+ 2 = 3.3V
+ 3 = 3.4V (Default)
+ 4 = 3.5V
+ 5 = 3.6V
+
+Example:
+
+codec: codec@40 {
+ compatible = "cirrus,cs35l32";
+ reg = <0x40>;
+ reset-gpios = <&gpio 10 0>;
+ cirrus,boost-manager = <0x03>;
+ cirrus,sdout-datacfg = <0x02>;
+ VA-supply = <&reg_audio>;
+};
diff --git a/Documentation/devicetree/bindings/sound/cs4265.txt b/Documentation/devicetree/bindings/sound/cs4265.txt
new file mode 100644
index 000000000..380fff8e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs4265.txt
@@ -0,0 +1,29 @@
+CS4265 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "cirrus,cs4265"
+
+ - reg : the I2C address of the device for I2C. The I2C address depends on
+ the state of the AD0 pin. If AD0 is high, the i2c address is 0x4f.
+ If it is low, the i2c address is 0x4e.
+
+Optional properties:
+
+ - reset-gpios : a GPIO spec for the reset pin. If specified, it will be
+ deasserted before communication to the codec starts.
+
+Examples:
+
+codec_ad0_high: cs4265@4f { /* AD0 Pin is high */
+ compatible = "cirrus,cs4265";
+ reg = <0x4f>;
+};
+
+
+codec_ad0_low: cs4265@4e { /* AD0 Pin is low */
+ compatible = "cirrus,cs4265";
+ reg = <0x4e>;
+};
diff --git a/Documentation/devicetree/bindings/sound/cs4270.txt b/Documentation/devicetree/bindings/sound/cs4270.txt
new file mode 100644
index 000000000..6b222f9b8
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs4270.txt
@@ -0,0 +1,21 @@
+CS4270 audio CODEC
+
+The driver for this device currently only supports I2C.
+
+Required properties:
+
+ - compatible : "cirrus,cs4270"
+
+ - reg : the I2C address of the device for I2C
+
+Optional properties:
+
+ - reset-gpio : a GPIO spec for the reset pin. If specified, it will be
+ deasserted before communication to the codec starts.
+
+Example:
+
+codec: cs4270@48 {
+ compatible = "cirrus,cs4270";
+ reg = <0x48>;
+};
diff --git a/Documentation/devicetree/bindings/sound/cs4271.txt b/Documentation/devicetree/bindings/sound/cs4271.txt
new file mode 100644
index 000000000..e2cd1d753
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs4271.txt
@@ -0,0 +1,50 @@
+Cirrus Logic CS4271 DT bindings
+
+This driver supports both the I2C and the SPI bus.
+
+Required properties:
+
+ - compatible: "cirrus,cs4271"
+
+For required properties on SPI, please consult
+Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Required properties on I2C:
+
+ - reg: the i2c address
+
+
+Optional properties:
+
+ - reset-gpio: a GPIO spec to define which pin is connected to the chip's
+ !RESET pin
+ - cirrus,amuteb-eq-bmutec: When given, the Codec's AMUTEB=BMUTEC flag
+ is enabled.
+ - cirrus,enable-soft-reset:
+ The CS4271 requires its LRCLK and MCLK to be stable before its RESET
+ line is de-asserted. That also means that clocks cannot be changed
+ without putting the chip back into hardware reset, which also requires
+ a complete re-initialization of all registers.
+
+ One (undocumented) workaround is to assert and de-assert the PDN bit
+ in the MODE2 register. This workaround can be enabled with this DT
+ property.
+
+ Note that this is not needed in case the clocks are stable
+ throughout the entire runtime of the codec.
+
+Examples:
+
+ codec_i2c: cs4271@10 {
+ compatible = "cirrus,cs4271";
+ reg = <0x10>;
+ reset-gpio = <&gpio 23 0>;
+ };
+
+ codec_spi: cs4271@0 {
+ compatible = "cirrus,cs4271";
+ reg = <0x0>;
+ reset-gpio = <&gpio 23 0>;
+ spi-max-frequency = <6000000>;
+ };
+
diff --git a/Documentation/devicetree/bindings/sound/cs42l52.txt b/Documentation/devicetree/bindings/sound/cs42l52.txt
new file mode 100644
index 000000000..bc03c9312
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs42l52.txt
@@ -0,0 +1,46 @@
+CS42L52 audio CODEC
+
+Required properties:
+
+ - compatible : "cirrus,cs42l52"
+
+ - reg : the I2C address of the device for I2C
+
+Optional properties:
+
+ - cirrus,reset-gpio : GPIO controller's phandle and the number
+ of the GPIO used to reset the codec.
+
+ - cirrus,chgfreq-divisor : Values used to set the Charge Pump Frequency.
+ Allowable values of 0x00 through 0x0F. These are raw values written to the
+ register, not the actual frequency. The frequency is determined by the following.
+ Frequency = (64xFs)/(N+2)
+ N = chgfreq_val
+ Fs = Sample Rate (variable)
+
+ - cirrus,mica-differential-cfg : boolean, If present, then the MICA input is configured
+ as a differential input. If not present then the MICA input is configured as
+ Single-ended input. Single-ended mode allows for MIC1 or MIC2 muxing for input.
+
+ - cirrus,micb-differential-cfg : boolean, If present, then the MICB input is configured
+ as a differential input. If not present then the MICB input is configured as
+ Single-ended input. Single-ended mode allows for MIC1 or MIC2 muxing for input.
+
+ - cirrus,micbias-lvl: Set the output voltage level on the MICBIAS Pin
+ 0 = 0.5 x VA
+ 1 = 0.6 x VA
+ 2 = 0.7 x VA
+ 3 = 0.8 x VA
+ 4 = 0.83 x VA
+ 5 = 0.91 x VA
+
+Example:
+
+codec: codec@4a {
+ compatible = "cirrus,cs42l52";
+ reg = <0x4a>;
+ reset-gpio = <&gpio 10 0>;
+ cirrus,chgfreq-divisor = <0x05>;
+ cirrus.mica-differential-cfg;
+ cirrus,micbias-lvl = <5>;
+};
diff --git a/Documentation/devicetree/bindings/sound/cs42l56.txt b/Documentation/devicetree/bindings/sound/cs42l56.txt
new file mode 100644
index 000000000..4feb0eb27
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs42l56.txt
@@ -0,0 +1,63 @@
+CS42L52 audio CODEC
+
+Required properties:
+
+ - compatible : "cirrus,cs42l56"
+
+ - reg : the I2C address of the device for I2C
+
+ - VA-supply, VCP-supply, VLDO-supply : power supplies for the device,
+ as covered in Documentation/devicetree/bindings/regulator/regulator.txt.
+
+Optional properties:
+
+ - cirrus,gpio-nreset : GPIO controller's phandle and the number
+ of the GPIO used to reset the codec.
+
+ - cirrus,chgfreq-divisor : Values used to set the Charge Pump Frequency.
+ Allowable values of 0x00 through 0x0F. These are raw values written to the
+ register, not the actual frequency. The frequency is determined by the following.
+ Frequency = MCLK / 4 * (N+2)
+ N = chgfreq_val
+ MCLK = Where MCLK is the frequency of the mclk signal after the MCLKDIV2 circuit.
+
+ - cirrus,ain1a-ref-cfg, ain1b-ref-cfg : boolean, If present, AIN1A or AIN1B are configured
+ as a pseudo-differential input referenced to AIN1REF/AIN3A.
+
+ - cirrus,ain2a-ref-cfg, ain2b-ref-cfg : boolean, If present, AIN2A or AIN2B are configured
+ as a pseudo-differential input referenced to AIN2REF/AIN3B.
+
+ - cirrus,micbias-lvl: Set the output voltage level on the MICBIAS Pin.
+ 0 = 0.5 x VA
+ 1 = 0.6 x VA
+ 2 = 0.7 x VA
+ 3 = 0.8 x VA
+ 4 = 0.83 x VA
+ 5 = 0.91 x VA
+
+ - cirrus,adaptive-pwr-cfg : Configures how the power to the Headphone and Lineout
+ Amplifiers adapt to the output signal levels.
+ 0 = Adapt to Volume Mode. Voltage level determined by the sum of the relevant volume settings.
+ 1 = Fixed - Headphone and Line Amp supply = + or - VCP/2.
+ 2 = Fixed - Headphone and Line Amp supply = + or - VCP.
+ 3 = Adapted to Signal; Voltage level is dynamically determined by the output signal.
+
+ - cirrus,hpf-left-freq, hpf-right-freq : Sets the corner frequency (-3dB point) for the internal High-Pass
+ Filter.
+ 0 = 1.8Hz
+ 1 = 119Hz
+ 2 = 236Hz
+ 3 = 464Hz
+
+
+Example:
+
+codec: codec@4b {
+ compatible = "cirrus,cs42l56";
+ reg = <0x4b>;
+ gpio-reset = <&gpio 10 0>;
+ cirrus,chgfreq-divisor = <0x05>;
+ cirrus.ain1_ref_cfg;
+ cirrus,micbias-lvl = <5>;
+ VA-supply = <&reg_audio>;
+};
diff --git a/Documentation/devicetree/bindings/sound/cs42l73.txt b/Documentation/devicetree/bindings/sound/cs42l73.txt
new file mode 100644
index 000000000..80ae910db
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs42l73.txt
@@ -0,0 +1,22 @@
+CS42L73 audio CODEC
+
+Required properties:
+
+ - compatible : "cirrus,cs42l73"
+
+ - reg : the I2C address of the device for I2C
+
+Optional properties:
+
+ - reset_gpio : a GPIO spec for the reset pin.
+ - chgfreq : Charge Pump Frequency values 0x00-0x0F
+
+
+Example:
+
+codec: cs42l73@4a {
+ compatible = "cirrus,cs42l73";
+ reg = <0x4a>;
+ reset_gpio = <&gpio 10 0>;
+ chgfreq = <0x05>;
+}; \ No newline at end of file
diff --git a/Documentation/devicetree/bindings/sound/cs42xx8.txt b/Documentation/devicetree/bindings/sound/cs42xx8.txt
new file mode 100644
index 000000000..f631fbca6
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/cs42xx8.txt
@@ -0,0 +1,28 @@
+CS42448/CS42888 audio CODEC
+
+Required properties:
+
+ - compatible : must contain one of "cirrus,cs42448" and "cirrus,cs42888"
+
+ - reg : the I2C address of the device for I2C
+
+ - clocks : a list of phandles + clock-specifiers, one for each entry in
+ clock-names
+
+ - clock-names : must contain "mclk"
+
+ - VA-supply, VD-supply, VLS-supply, VLC-supply: power supplies for the device,
+ as covered in Documentation/devicetree/bindings/regulator/regulator.txt
+
+Example:
+
+codec: cs42888@48 {
+ compatible = "cirrus,cs42888";
+ reg = <0x48>;
+ clocks = <&codec_mclk 0>;
+ clock-names = "mclk";
+ VA-supply = <&reg_audio>;
+ VD-supply = <&reg_audio>;
+ VLS-supply = <&reg_audio>;
+ VLC-supply = <&reg_audio>;
+};
diff --git a/Documentation/devicetree/bindings/sound/da9055.txt b/Documentation/devicetree/bindings/sound/da9055.txt
new file mode 100644
index 000000000..ed1b7cc6f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/da9055.txt
@@ -0,0 +1,22 @@
+* Dialog DA9055 Audio CODEC
+
+DA9055 provides Audio CODEC support (I2C only).
+
+The Audio CODEC device in DA9055 has it's own I2C address which is configurable,
+so the device is instantiated separately from the PMIC (MFD) device.
+
+For details on accompanying PMIC I2C device, see the following:
+Documentation/devicetree/bindings/mfd/da9055.txt
+
+Required properties:
+
+ - compatible: "dlg,da9055-codec"
+ - reg: Specifies the I2C slave address
+
+
+Example:
+
+ codec: da9055-codec@1a {
+ compatible = "dlg,da9055-codec";
+ reg = <0x1a>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/davinci-evm-audio.txt b/Documentation/devicetree/bindings/sound/davinci-evm-audio.txt
new file mode 100644
index 000000000..963e10051
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/davinci-evm-audio.txt
@@ -0,0 +1,49 @@
+* Texas Instruments SoC audio setups with TLV320AIC3X Codec
+
+Required properties:
+- compatible : "ti,da830-evm-audio" : forDM365/DA8xx/OMAPL1x/AM33xx
+- ti,model : The user-visible name of this sound complex.
+- ti,audio-codec : The phandle of the TLV320AIC3x audio codec
+- ti,mcasp-controller : The phandle of the McASP controller
+- ti,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the codec's pins, and the jacks on the board:
+
+Optional properties:
+- ti,codec-clock-rate : The Codec Clock rate (in Hz) applied to the Codec.
+- clocks : Reference to the master clock
+- clock-names : The clock should be named "mclk"
+- Either codec-clock-rate or the codec-clock reference has to be defined. If
+ the both are defined the driver attempts to set referenced clock to the
+ defined rate and takes the rate from the clock reference.
+
+ Board connectors:
+
+ * Headphone Jack
+ * Line Out
+ * Mic Jack
+ * Line In
+
+
+Example:
+
+sound {
+ compatible = "ti,da830-evm-audio";
+ ti,model = "DA830 EVM";
+ ti,audio-codec = <&tlv320aic3x>;
+ ti,mcasp-controller = <&mcasp1>;
+ ti,codec-clock-rate = <12000000>;
+ ti,audio-routing =
+ "Headphone Jack", "HPLOUT",
+ "Headphone Jack", "HPROUT",
+ "Line Out", "LLOUT",
+ "Line Out", "RLOUT",
+ "MIC3L", "Mic Bias 2V",
+ "MIC3R", "Mic Bias 2V",
+ "Mic Bias 2V", "Mic Jack",
+ "LINE1L", "Line In",
+ "LINE2L", "Line In",
+ "LINE1R", "Line In",
+ "LINE2R", "Line In";
+};
diff --git a/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
new file mode 100644
index 000000000..46bc9829c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/davinci-mcasp-audio.txt
@@ -0,0 +1,60 @@
+Texas Instruments McASP controller
+
+Required properties:
+- compatible :
+ "ti,dm646x-mcasp-audio" : for DM646x platforms
+ "ti,da830-mcasp-audio" : for both DA830 & DA850 platforms
+ "ti,am33xx-mcasp-audio" : for AM33xx platforms (AM33xx, AM43xx, TI81xx)
+ "ti,dra7-mcasp-audio" : for DRA7xx platforms
+
+- reg : Should contain reg specifiers for the entries in the reg-names property.
+- reg-names : Should contain:
+ * "mpu" for the main registers (required). For compatibility with
+ existing software, it is recommended this is the first entry.
+ * "dat" for separate data port register access (optional).
+- op-mode : I2S/DIT ops mode. 0 for I2S mode. 1 for DIT mode used for S/PDIF,
+ IEC60958-1, and AES-3 formats.
+- tdm-slots : Slots for TDM operation. Indicates number of channels transmitted
+ or received over one serializer.
+- serial-dir : A list of serializer configuration. Each entry is a number
+ indication for serializer pin direction.
+ (0 - INACTIVE, 1 - TX, 2 - RX)
+- dmas: two element list of DMA controller phandles and DMA request line
+ ordered pairs.
+- dma-names: identifier string for each DMA request line in the dmas property.
+ These strings correspond 1:1 with the ordered pairs in dmas. The dma
+ identifiers must be "rx" and "tx".
+
+Optional properties:
+
+- ti,hwmods : Must be "mcasp<n>", n is controller instance starting 0
+- tx-num-evt : FIFO levels.
+- rx-num-evt : FIFO levels.
+- sram-size-playback : size of sram to be allocated during playback
+- sram-size-capture : size of sram to be allocated during capture
+- interrupts : Interrupt numbers for McASP
+- interrupt-names : Known interrupt names are "tx" and "rx"
+- pinctrl-0: Should specify pin control group used for this controller.
+- pinctrl-names: Should contain only one value - "default", for more details
+ please refer to pinctrl-bindings.txt
+- fck_parent : Should contain a valid clock name which will be used as parent
+ for the McASP fck
+
+Example:
+
+mcasp0: mcasp0@1d00000 {
+ compatible = "ti,da830-mcasp-audio";
+ reg = <0x100000 0x3000>;
+ reg-names "mpu";
+ interrupts = <82>, <83>;
+ interrupt-names = "tx", "rx";
+ op-mode = <0>; /* MCASP_IIS_MODE */
+ tdm-slots = <2>;
+ serial-dir = <
+ 0 0 0 0 /* 0: INACTIVE, 1: TX, 2: RX */
+ 0 0 0 0
+ 0 0 0 1
+ 2 0 0 0 >;
+ tx-num-evt = <1>;
+ rx-num-evt = <1>;
+};
diff --git a/Documentation/devicetree/bindings/sound/designware-i2s.txt b/Documentation/devicetree/bindings/sound/designware-i2s.txt
new file mode 100644
index 000000000..7bb54247f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/designware-i2s.txt
@@ -0,0 +1,31 @@
+DesignWare I2S controller
+
+Required properties:
+ - compatible : Must be "snps,designware-i2s"
+ - reg : Must contain the I2S core's registers location and length
+ - clocks : Pairs of phandle and specifier referencing the controller's
+ clocks. The controller expects one clock: the clock used as the sampling
+ rate reference clock sample.
+ - clock-names : "i2sclk" for the sample rate reference clock.
+ - dmas: Pairs of phandle and specifier for the DMA channels that are used by
+ the core. The core expects one or two dma channels: one for transmit and
+ one for receive.
+ - dma-names : "tx" for the transmit channel, "rx" for the receive channel.
+
+For more details on the 'dma', 'dma-names', 'clock' and 'clock-names'
+properties please check:
+ * resource-names.txt
+ * clock/clock-bindings.txt
+ * dma/dma.txt
+
+Example:
+
+ soc_i2s: i2s@7ff90000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x0 0x7ff90000 0x0 0x1000>;
+ clocks = <&scpi_i2sclk 0>;
+ clock-names = "i2sclk";
+ #sound-dai-cells = <0>;
+ dmas = <&dma0 5>;
+ dma-names = "tx";
+ };
diff --git a/Documentation/devicetree/bindings/sound/es8328.txt b/Documentation/devicetree/bindings/sound/es8328.txt
new file mode 100644
index 000000000..30ea8a318
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/es8328.txt
@@ -0,0 +1,38 @@
+Everest ES8328 audio CODEC
+
+This device supports both I2C and SPI.
+
+Required properties:
+
+ - compatible : "everest,es8328"
+ - DVDD-supply : Regulator providing digital core supply voltage 1.8 - 3.6V
+ - AVDD-supply : Regulator providing analog supply voltage 3.3V
+ - PVDD-supply : Regulator providing digital IO supply voltage 1.8 - 3.6V
+ - IPVDD-supply : Regulator providing analog output voltage 3.3V
+ - clocks : A 22.5792 or 11.2896 MHz clock
+ - reg : the I2C address of the device for I2C, the chip select number for SPI
+
+Pins on the device (for linking into audio routes):
+
+ * LOUT1
+ * LOUT2
+ * ROUT1
+ * ROUT2
+ * LINPUT1
+ * RINPUT1
+ * LINPUT2
+ * RINPUT2
+ * Mic Bias
+
+
+Example:
+
+codec: es8328@11 {
+ compatible = "everest,es8328";
+ DVDD-supply = <&reg_3p3v>;
+ AVDD-supply = <&reg_3p3v>;
+ PVDD-supply = <&reg_3p3v>;
+ HPVDD-supply = <&reg_3p3v>;
+ clocks = <&clks 169>;
+ reg = <0x11>;
+};
diff --git a/Documentation/devicetree/bindings/sound/eukrea-tlv320.txt b/Documentation/devicetree/bindings/sound/eukrea-tlv320.txt
new file mode 100644
index 000000000..6dfa88c4d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/eukrea-tlv320.txt
@@ -0,0 +1,26 @@
+Audio complex for Eukrea boards with tlv320aic23 codec.
+
+Required properties:
+
+ - compatible : "eukrea,asoc-tlv320"
+
+ - eukrea,model : The user-visible name of this sound complex.
+
+ - ssi-controller : The phandle of the SSI controller.
+
+ - fsl,mux-int-port : The internal port of the i.MX audio muxer (AUDMUX).
+
+ - fsl,mux-ext-port : The external port of the i.MX audio muxer.
+
+Note: The AUDMUX port numbering should start at 1, which is consistent with
+hardware manual.
+
+Example:
+
+ sound {
+ compatible = "eukrea,asoc-tlv320";
+ eukrea,model = "imx51-eukrea-tlv320aic23";
+ ssi-controller = <&ssi2>;
+ fsl,mux-int-port = <2>;
+ fsl,mux-ext-port = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/fsl,asrc.txt b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
new file mode 100644
index 000000000..b93362a57
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl,asrc.txt
@@ -0,0 +1,60 @@
+Freescale Asynchronous Sample Rate Converter (ASRC) Controller
+
+The Asynchronous Sample Rate Converter (ASRC) converts the sampling rate of a
+signal associated with an input clock into a signal associated with a different
+output clock. The driver currently works as a Front End of DPCM with other Back
+Ends Audio controller such as ESAI, SSI and SAI. It has three pairs to support
+three substreams within totally 10 channels.
+
+Required properties:
+
+ - compatible : Contains "fsl,imx35-asrc" or "fsl,imx53-asrc".
+
+ - reg : Offset and length of the register set for the device.
+
+ - interrupts : Contains the spdif interrupt.
+
+ - dmas : Generic dma devicetree binding as described in
+ Documentation/devicetree/bindings/dma/dma.txt.
+
+ - dma-names : Contains "rxa", "rxb", "rxc", "txa", "txb" and "txc".
+
+ - clocks : Contains an entry for each entry in clock-names.
+
+ - clock-names : Contains the following entries
+ "mem" Peripheral access clock to access registers.
+ "ipg" Peripheral clock to driver module.
+ "asrck_<0-f>" Clock sources for input and output clock.
+
+ - big-endian : If this property is absent, the little endian mode
+ will be in use as default. Otherwise, the big endian
+ mode will be in use for all the device registers.
+
+ - fsl,asrc-rate : Defines a mutual sample rate used by DPCM Back Ends.
+
+ - fsl,asrc-width : Defines a mutual sample width used by DPCM Back Ends.
+
+Example:
+
+asrc: asrc@02034000 {
+ compatible = "fsl,imx53-asrc";
+ reg = <0x02034000 0x4000>;
+ interrupts = <0 50 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clks 107>, <&clks 107>, <&clks 0>,
+ <&clks 0>, <&clks 0>, <&clks 0>, <&clks 0>,
+ <&clks 0>, <&clks 0>, <&clks 0>, <&clks 0>,
+ <&clks 0>, <&clks 0>, <&clks 0>, <&clks 0>,
+ <&clks 107>, <&clks 0>, <&clks 0>;
+ clock-names = "mem", "ipg", "asrck0",
+ "asrck_1", "asrck_2", "asrck_3", "asrck_4",
+ "asrck_5", "asrck_6", "asrck_7", "asrck_8",
+ "asrck_9", "asrck_a", "asrck_b", "asrck_c",
+ "asrck_d", "asrck_e", "asrck_f";
+ dmas = <&sdma 17 23 1>, <&sdma 18 23 1>, <&sdma 19 23 1>,
+ <&sdma 20 23 1>, <&sdma 21 23 1>, <&sdma 22 23 1>;
+ dma-names = "rxa", "rxb", "rxc",
+ "txa", "txb", "txc";
+ fsl,asrc-rate = <48000>;
+ fsl,asrc-width = <16>;
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/sound/fsl,esai.txt b/Documentation/devicetree/bindings/sound/fsl,esai.txt
new file mode 100644
index 000000000..d3b6b5f48
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl,esai.txt
@@ -0,0 +1,58 @@
+Freescale Enhanced Serial Audio Interface (ESAI) Controller
+
+The Enhanced Serial Audio Interface (ESAI) provides a full-duplex serial port
+for serial communication with a variety of serial devices, including industry
+standard codecs, Sony/Phillips Digital Interface (S/PDIF) transceivers, and
+other DSPs. It has up to six transmitters and four receivers.
+
+Required properties:
+
+ - compatible : Compatible list, must contain "fsl,imx35-esai" or
+ "fsl,vf610-esai"
+
+ - reg : Offset and length of the register set for the device.
+
+ - interrupts : Contains the spdif interrupt.
+
+ - dmas : Generic dma devicetree binding as described in
+ Documentation/devicetree/bindings/dma/dma.txt.
+
+ - dma-names : Two dmas have to be defined, "tx" and "rx".
+
+ - clocks : Contains an entry for each entry in clock-names.
+
+ - clock-names : Includes the following entries:
+ "core" The core clock used to access registers
+ "extal" The esai baud clock for esai controller used to
+ derive HCK, SCK and FS.
+ "fsys" The system clock derived from ahb clock used to
+ derive HCK, SCK and FS.
+
+ - fsl,fifo-depth : The number of elements in the transmit and receive
+ FIFOs. This number is the maximum allowed value for
+ TFCR[TFWM] or RFCR[RFWM].
+
+ - fsl,esai-synchronous: This is a boolean property. If present, indicating
+ that ESAI would work in the synchronous mode, which
+ means all the settings for Receiving would be
+ duplicated from Transmition related registers.
+
+ - big-endian : If this property is absent, the native endian mode
+ will be in use as default, or the big endian mode
+ will be in use for all the device registers.
+
+Example:
+
+esai: esai@02024000 {
+ compatible = "fsl,imx35-esai";
+ reg = <0x02024000 0x4000>;
+ interrupts = <0 51 0x04>;
+ clocks = <&clks 208>, <&clks 118>, <&clks 208>;
+ clock-names = "core", "extal", "fsys";
+ dmas = <&sdma 23 21 0>, <&sdma 24 21 0>;
+ dma-names = "rx", "tx";
+ fsl,fifo-depth = <128>;
+ fsl,esai-synchronous;
+ big-endian;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/sound/fsl,spdif.txt b/Documentation/devicetree/bindings/sound/fsl,spdif.txt
new file mode 100644
index 000000000..b5ee32ee3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl,spdif.txt
@@ -0,0 +1,58 @@
+Freescale Sony/Philips Digital Interface Format (S/PDIF) Controller
+
+The Freescale S/PDIF audio block is a stereo transceiver that allows the
+processor to receive and transmit digital audio via an coaxial cable or
+a fibre cable.
+
+Required properties:
+
+ - compatible : Compatible list, must contain "fsl,imx35-spdif".
+
+ - reg : Offset and length of the register set for the device.
+
+ - interrupts : Contains the spdif interrupt.
+
+ - dmas : Generic dma devicetree binding as described in
+ Documentation/devicetree/bindings/dma/dma.txt.
+
+ - dma-names : Two dmas have to be defined, "tx" and "rx".
+
+ - clocks : Contains an entry for each entry in clock-names.
+
+ - clock-names : Includes the following entries:
+ "core" The core clock of spdif controller.
+ "rxtx<0-7>" Clock source list for tx and rx clock.
+ This clock list should be identical to the source
+ list connecting to the spdif clock mux in "SPDIF
+ Transceiver Clock Diagram" of SoC reference manual.
+ It can also be referred to TxClk_Source bit of
+ register SPDIF_STC.
+
+ - big-endian : If this property is absent, the native endian mode
+ will be in use as default, or the big endian mode
+ will be in use for all the device registers.
+
+Example:
+
+spdif: spdif@02004000 {
+ compatible = "fsl,imx35-spdif";
+ reg = <0x02004000 0x4000>;
+ interrupts = <0 52 0x04>;
+ dmas = <&sdma 14 18 0>,
+ <&sdma 15 18 0>;
+ dma-names = "rx", "tx";
+
+ clocks = <&clks 197>, <&clks 3>,
+ <&clks 197>, <&clks 107>,
+ <&clks 0>, <&clks 118>,
+ <&clks 62>, <&clks 139>,
+ <&clks 0>;
+ clock-names = "core", "rxtx0",
+ "rxtx1", "rxtx2",
+ "rxtx3", "rxtx4",
+ "rxtx5", "rxtx6",
+ "rxtx7";
+
+ big-endian;
+ status = "okay";
+};
diff --git a/Documentation/devicetree/bindings/sound/fsl,ssi.txt b/Documentation/devicetree/bindings/sound/fsl,ssi.txt
new file mode 100644
index 000000000..5b76be45d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl,ssi.txt
@@ -0,0 +1,87 @@
+Freescale Synchronous Serial Interface
+
+The SSI is a serial device that communicates with audio codecs. It can
+be programmed in AC97, I2S, left-justified, or right-justified modes.
+
+Required properties:
+- compatible: Compatible list, should contain one of the following
+ compatibles:
+ fsl,mpc8610-ssi
+ fsl,imx51-ssi
+ fsl,imx35-ssi
+ fsl,imx21-ssi
+- cell-index: The SSI, <0> = SSI1, <1> = SSI2, and so on.
+- reg: Offset and length of the register set for the device.
+- interrupts: <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and
+ level information for the interrupt. This should be
+ encoded based on the information in section 2)
+ depending on the type of interrupt controller you
+ have.
+- interrupt-parent: The phandle for the interrupt controller that
+ services interrupts for this device.
+- fsl,playback-dma: Phandle to a node for the DMA channel to use for
+ playback of audio. This is typically dictated by SOC
+ design. See the notes below.
+- fsl,capture-dma: Phandle to a node for the DMA channel to use for
+ capture (recording) of audio. This is typically dictated
+ by SOC design. See the notes below.
+- fsl,fifo-depth: The number of elements in the transmit and receive FIFOs.
+ This number is the maximum allowed value for SFCSR[TFWM0].
+- fsl,ssi-asynchronous:
+ If specified, the SSI is to be programmed in asynchronous
+ mode. In this mode, pins SRCK, STCK, SRFS, and STFS must
+ all be connected to valid signals. In synchronous mode,
+ SRCK and SRFS are ignored. Asynchronous mode allows
+ playback and capture to use different sample sizes and
+ sample rates. Some drivers may require that SRCK and STCK
+ be connected together, and SRFS and STFS be connected
+ together. This would still allow different sample sizes,
+ but not different sample rates.
+ - clocks: "ipg" - Required clock for the SSI unit
+ "baud" - Required clock for SSI master mode. Otherwise this
+ clock is not used
+
+Required are also ac97 link bindings if ac97 is used. See
+Documentation/devicetree/bindings/sound/soc-ac97link.txt for the necessary
+bindings.
+
+Optional properties:
+- codec-handle: Phandle to a 'codec' node that defines an audio
+ codec connected to this SSI. This node is typically
+ a child of an I2C or other control node.
+- fsl,fiq-stream-filter: Bool property. Disabled DMA and use FIQ instead to
+ filter the codec stream. This is necessary for some boards
+ where an incompatible codec is connected to this SSI, e.g.
+ on pca100 and pcm043.
+- dmas: Generic dma devicetree binding as described in
+ Documentation/devicetree/bindings/dma/dma.txt.
+- dma-names: Two dmas have to be defined, "tx" and "rx", if fsl,imx-fiq
+ is not defined.
+- fsl,mode: The operating mode for the AC97 interface only.
+ "ac97-slave" - AC97 mode, SSI is clock slave
+ "ac97-master" - AC97 mode, SSI is clock master
+
+Child 'codec' node required properties:
+- compatible: Compatible list, contains the name of the codec
+
+Child 'codec' node optional properties:
+- clock-frequency: The frequency of the input clock, which typically comes
+ from an on-board dedicated oscillator.
+
+Notes on fsl,playback-dma and fsl,capture-dma:
+
+On SOCs that have an SSI, specific DMA channels are hard-wired for playback
+and capture. On the MPC8610, for example, SSI1 must use DMA channel 0 for
+playback and DMA channel 1 for capture. SSI2 must use DMA channel 2 for
+playback and DMA channel 3 for capture. The developer can choose which
+DMA controller to use, but the channels themselves are hard-wired. The
+purpose of these two properties is to represent this hardware design.
+
+The device tree nodes for the DMA channels that are referenced by
+"fsl,playback-dma" and "fsl,capture-dma" must be marked as compatible with
+"fsl,ssi-dma-channel". The SOC-specific compatible string (e.g.
+"fsl,mpc8610-dma-channel") can remain. If these nodes are left as
+"fsl,elo-dma-channel" or "fsl,eloplus-dma-channel", then the generic Elo DMA
+drivers (fsldma) will attempt to use them, and it will conflict with the
+sound drivers.
diff --git a/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
new file mode 100644
index 000000000..a96774c19
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl-asoc-card.txt
@@ -0,0 +1,82 @@
+Freescale Generic ASoC Sound Card with ASRC support
+
+The Freescale Generic ASoC Sound Card can be used, ideally, for all Freescale
+SoCs connecting with external CODECs.
+
+The idea of this generic sound card is a bit like ASoC Simple Card. However,
+for Freescale SoCs (especially those released in recent years), most of them
+have ASRC (Documentation/devicetree/bindings/sound/fsl,asrc.txt) inside. And
+this is a specific feature that might be painstakingly controlled and merged
+into the Simple Card.
+
+So having this generic sound card allows all Freescale SoC users to benefit
+from the simplification of a new card support and the capability of the wide
+sample rates support through ASRC.
+
+Note: The card is initially designed for those sound cards who use I2S and
+ PCM DAI formats. However, it'll be also possible to support those non
+ I2S/PCM type sound cards, such as S/PDIF audio and HDMI audio, as long
+ as the driver has been properly upgraded.
+
+
+The compatible list for this generic sound card currently:
+ "fsl,imx-audio-cs42888"
+
+ "fsl,imx-audio-wm8962"
+ (compatible with Documentation/devicetree/bindings/sound/imx-audio-wm8962.txt)
+
+ "fsl,imx-audio-sgtl5000"
+ (compatible with Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt)
+
+Required properties:
+
+ - compatible : Contains one of entries in the compatible list.
+
+ - model : The user-visible name of this sound complex
+
+ - audio-cpu : The phandle of an CPU DAI controller
+
+ - audio-codec : The phandle of an audio codec
+
+ - audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's
+ source. There're a few pre-designed board connectors:
+ * Line Out Jack
+ * Line In Jack
+ * Headphone Jack
+ * Mic Jack
+ * Ext Spk
+ * AMIC (stands for Analog Microphone Jack)
+ * DMIC (stands for Digital Microphone Jack)
+
+ Note: The "Mic Jack" and "AMIC" are redundant while
+ coexsiting in order to support the old bindings
+ of wm8962 and sgtl5000.
+
+Optional properties:
+
+ - audio-asrc : The phandle of ASRC. It can be absent if there's no
+ need to add ASRC support via DPCM.
+
+Example:
+sound-cs42888 {
+ compatible = "fsl,imx-audio-cs42888";
+ model = "cs42888-audio";
+ audio-cpu = <&esai>;
+ audio-asrc = <&asrc>;
+ audio-codec = <&cs42888>;
+ audio-routing =
+ "Line Out Jack", "AOUT1L",
+ "Line Out Jack", "AOUT1R",
+ "Line Out Jack", "AOUT2L",
+ "Line Out Jack", "AOUT2R",
+ "Line Out Jack", "AOUT3L",
+ "Line Out Jack", "AOUT3R",
+ "Line Out Jack", "AOUT4L",
+ "Line Out Jack", "AOUT4R",
+ "AIN1L", "Line In Jack",
+ "AIN1R", "Line In Jack",
+ "AIN2L", "Line In Jack",
+ "AIN2R", "Line In Jack";
+};
diff --git a/Documentation/devicetree/bindings/sound/fsl-sai.txt b/Documentation/devicetree/bindings/sound/fsl-sai.txt
new file mode 100644
index 000000000..044e5d76e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/fsl-sai.txt
@@ -0,0 +1,73 @@
+Freescale Synchronous Audio Interface (SAI).
+
+The SAI is based on I2S module that used communicating with audio codecs,
+which provides a synchronous audio interface that supports fullduplex
+serial interfaces with frame synchronization such as I2S, AC97, TDM, and
+codec/DSP interfaces.
+
+Required properties:
+
+ - compatible : Compatible list, contains "fsl,vf610-sai" or
+ "fsl,imx6sx-sai".
+
+ - reg : Offset and length of the register set for the device.
+
+ - clocks : Must contain an entry for each entry in clock-names.
+
+ - clock-names : Must include the "bus" for register access and
+ "mclk1", "mclk2", "mclk3" for bit clock and frame
+ clock providing.
+ - dmas : Generic dma devicetree binding as described in
+ Documentation/devicetree/bindings/dma/dma.txt.
+
+ - dma-names : Two dmas have to be defined, "tx" and "rx".
+
+ - pinctrl-names : Must contain a "default" entry.
+
+ - pinctrl-NNN : One property must exist for each entry in
+ pinctrl-names. See ../pinctrl/pinctrl-bindings.txt
+ for details of the property values.
+
+ - big-endian : Boolean property, required if all the FTM_PWM
+ registers are big-endian rather than little-endian.
+
+ - lsb-first : Configures whether the LSB or the MSB is transmitted
+ first for the fifo data. If this property is absent,
+ the MSB is transmitted first as default, or the LSB
+ is transmitted first.
+
+ - fsl,sai-synchronous-rx: This is a boolean property. If present, indicating
+ that SAI will work in the synchronous mode (sync Tx
+ with Rx) which means both the transimitter and the
+ receiver will send and receive data by following
+ receiver's bit clocks and frame sync clocks.
+
+ - fsl,sai-asynchronous: This is a boolean property. If present, indicating
+ that SAI will work in the asynchronous mode, which
+ means both transimitter and receiver will send and
+ receive data by following their own bit clocks and
+ frame sync clocks separately.
+
+Note:
+- If both fsl,sai-asynchronous and fsl,sai-synchronous-rx are absent, the
+ default synchronous mode (sync Rx with Tx) will be used, which means both
+ transimitter and receiver will send and receive data by following clocks
+ of transimitter.
+- fsl,sai-asynchronous and fsl,sai-synchronous-rx are exclusive.
+
+Example:
+sai2: sai@40031000 {
+ compatible = "fsl,vf610-sai";
+ reg = <0x40031000 0x1000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_sai2_1>;
+ clocks = <&clks VF610_CLK_PLATFORM_BUS>,
+ <&clks VF610_CLK_SAI2>,
+ <&clks 0>, <&clks 0>;
+ clock-names = "bus", "mclk1", "mclk2", "mclk3";
+ dma-names = "tx", "rx";
+ dmas = <&edma0 0 VF610_EDMA_MUXID0_SAI2_TX>,
+ <&edma0 0 VF610_EDMA_MUXID0_SAI2_RX>;
+ big-endian;
+ lsb-first;
+};
diff --git a/Documentation/devicetree/bindings/sound/hdmi.txt b/Documentation/devicetree/bindings/sound/hdmi.txt
new file mode 100644
index 000000000..31af7bca3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/hdmi.txt
@@ -0,0 +1,17 @@
+Device-Tree bindings for dummy HDMI codec
+
+Required properties:
+ - compatible: should be "linux,hdmi-audio".
+
+CODEC output pins:
+ * TX
+
+CODEC input pins:
+ * RX
+
+Example node:
+
+ hdmi_audio: hdmi_audio@0 {
+ compatible = "linux,hdmi-audio";
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/sound/imx-audio-es8328.txt b/Documentation/devicetree/bindings/sound/imx-audio-es8328.txt
new file mode 100644
index 000000000..07b68ab20
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audio-es8328.txt
@@ -0,0 +1,60 @@
+Freescale i.MX audio complex with ES8328 codec
+
+Required properties:
+- compatible : "fsl,imx-audio-es8328"
+- model : The user-visible name of this sound complex
+- ssi-controller : The phandle of the i.MX SSI controller
+- jack-gpio : Optional GPIO for headphone jack
+- audio-amp-supply : Power regulator for speaker amps
+- audio-codec : The phandle of the ES8328 audio codec
+- audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's
+ source. Valid names could be power supplies, ES8328
+ pins, and the jacks on the board:
+
+ Power supplies:
+ * audio-amp
+
+ ES8328 pins:
+ * LOUT1
+ * LOUT2
+ * ROUT1
+ * ROUT2
+ * LINPUT1
+ * LINPUT2
+ * RINPUT1
+ * RINPUT2
+ * Mic PGA
+
+ Board connectors:
+ * Headphone
+ * Speaker
+ * Mic Jack
+- mux-int-port : The internal port of the i.MX audio muxer (AUDMUX)
+- mux-ext-port : The external port of the i.MX audio muxer (AUDMIX)
+
+Note: The AUDMUX port numbering should start at 1, which is consistent with
+hardware manual.
+
+Example:
+
+sound {
+ compatible = "fsl,imx-audio-es8328";
+ model = "imx-audio-es8328";
+ ssi-controller = <&ssi1>;
+ audio-codec = <&codec>;
+ jack-gpio = <&gpio5 15 0>;
+ audio-amp-supply = <&reg_audio_amp>;
+ audio-routing =
+ "Speaker", "LOUT2",
+ "Speaker", "ROUT2",
+ "Speaker", "audio-amp",
+ "Headphone", "ROUT1",
+ "Headphone", "LOUT1",
+ "LINPUT1", "Mic Jack",
+ "RINPUT1", "Mic Jack",
+ "Mic Jack", "Mic Bias";
+ mux-int-port = <1>;
+ mux-ext-port = <3>;
+};
diff --git a/Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt b/Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt
new file mode 100644
index 000000000..2f89db88f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audio-sgtl5000.txt
@@ -0,0 +1,56 @@
+Freescale i.MX audio complex with SGTL5000 codec
+
+Required properties:
+
+ - compatible : "fsl,imx-audio-sgtl5000"
+
+ - model : The user-visible name of this sound complex
+
+ - ssi-controller : The phandle of the i.MX SSI controller
+
+ - audio-codec : The phandle of the SGTL5000 audio codec
+
+ - audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's
+ source. Valid names could be power supplies, SGTL5000
+ pins, and the jacks on the board:
+
+ Power supplies:
+ * Mic Bias
+
+ SGTL5000 pins:
+ * MIC_IN
+ * LINE_IN
+ * HP_OUT
+ * LINE_OUT
+
+ Board connectors:
+ * Mic Jack
+ * Line In Jack
+ * Headphone Jack
+ * Line Out Jack
+ * Ext Spk
+
+ - mux-int-port : The internal port of the i.MX audio muxer (AUDMUX)
+
+ - mux-ext-port : The external port of the i.MX audio muxer
+
+Note: The AUDMUX port numbering should start at 1, which is consistent with
+hardware manual.
+
+Example:
+
+sound {
+ compatible = "fsl,imx51-babbage-sgtl5000",
+ "fsl,imx-audio-sgtl5000";
+ model = "imx51-babbage-sgtl5000";
+ ssi-controller = <&ssi1>;
+ audio-codec = <&sgtl5000>;
+ audio-routing =
+ "MIC_IN", "Mic Jack",
+ "Mic Jack", "Mic Bias",
+ "Headphone Jack", "HP_OUT";
+ mux-int-port = <1>;
+ mux-ext-port = <3>;
+};
diff --git a/Documentation/devicetree/bindings/sound/imx-audio-spdif.txt b/Documentation/devicetree/bindings/sound/imx-audio-spdif.txt
new file mode 100644
index 000000000..da84a442c
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audio-spdif.txt
@@ -0,0 +1,36 @@
+Freescale i.MX audio complex with S/PDIF transceiver
+
+Required properties:
+
+ - compatible : "fsl,imx-audio-spdif"
+
+ - model : The user-visible name of this sound complex
+
+ - spdif-controller : The phandle of the i.MX S/PDIF controller
+
+
+Optional properties:
+
+ - spdif-out : This is a boolean property. If present, the
+ transmitting function of S/PDIF will be enabled,
+ indicating there's a physical S/PDIF out connector
+ or jack on the board or it's connecting to some
+ other IP block, such as an HDMI encoder or
+ display-controller.
+
+ - spdif-in : This is a boolean property. If present, the receiving
+ function of S/PDIF will be enabled, indicating there
+ is a physical S/PDIF in connector/jack on the board.
+
+* Note: At least one of these two properties should be set in the DT binding.
+
+
+Example:
+
+sound-spdif {
+ compatible = "fsl,imx-audio-spdif";
+ model = "imx-spdif";
+ spdif-controller = <&spdif>;
+ spdif-out;
+ spdif-in;
+};
diff --git a/Documentation/devicetree/bindings/sound/imx-audio-wm8962.txt b/Documentation/devicetree/bindings/sound/imx-audio-wm8962.txt
new file mode 100644
index 000000000..acea71bee
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audio-wm8962.txt
@@ -0,0 +1,53 @@
+Freescale i.MX audio complex with WM8962 codec
+
+Required properties:
+
+ - compatible : "fsl,imx-audio-wm8962"
+
+ - model : The user-visible name of this sound complex
+
+ - ssi-controller : The phandle of the i.MX SSI controller
+
+ - audio-codec : The phandle of the WM8962 audio codec
+
+ - audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's
+ source. Valid names could be power supplies, WM8962
+ pins, and the jacks on the board:
+
+ Power supplies:
+ * Mic Bias
+
+ Board connectors:
+ * Mic Jack
+ * Headphone Jack
+ * Ext Spk
+
+ - mux-int-port : The internal port of the i.MX audio muxer (AUDMUX)
+
+ - mux-ext-port : The external port of the i.MX audio muxer
+
+Note: The AUDMUX port numbering should start at 1, which is consistent with
+hardware manual.
+
+Example:
+
+sound {
+ compatible = "fsl,imx6q-sabresd-wm8962",
+ "fsl,imx-audio-wm8962";
+ model = "wm8962-audio";
+ ssi-controller = <&ssi2>;
+ audio-codec = <&codec>;
+ audio-routing =
+ "Headphone Jack", "HPOUTL",
+ "Headphone Jack", "HPOUTR",
+ "Ext Spk", "SPKOUTL",
+ "Ext Spk", "SPKOUTR",
+ "MICBIAS", "AMIC",
+ "IN3R", "MICBIAS",
+ "DMIC", "MICBIAS",
+ "DMICDAT", "DMIC";
+ mux-int-port = <2>;
+ mux-ext-port = <3>;
+};
diff --git a/Documentation/devicetree/bindings/sound/imx-audmux.txt b/Documentation/devicetree/bindings/sound/imx-audmux.txt
new file mode 100644
index 000000000..b30a737e2
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/imx-audmux.txt
@@ -0,0 +1,28 @@
+Freescale Digital Audio Mux (AUDMUX) device
+
+Required properties:
+
+ - compatible : "fsl,imx21-audmux" for AUDMUX version firstly used
+ on i.MX21, or "fsl,imx31-audmux" for the version
+ firstly used on i.MX31.
+
+ - reg : Should contain AUDMUX registers location and length.
+
+An initial configuration can be setup using child nodes.
+
+Required properties of optional child nodes:
+
+ - fsl,audmux-port : Integer of the audmux port that is configured by this
+ child node.
+
+ - fsl,port-config : List of configuration options for the specific port.
+ For imx31-audmux and above, it is a list of tuples
+ <ptcr pdcr>. For imx21-audmux it is a list of pcr
+ values.
+
+Example:
+
+audmux@021d8000 {
+ compatible = "fsl,imx6q-audmux", "fsl,imx31-audmux";
+ reg = <0x021d8000 0x4000>;
+};
diff --git a/Documentation/devicetree/bindings/sound/ingenic,jz4740-i2s.txt b/Documentation/devicetree/bindings/sound/ingenic,jz4740-i2s.txt
new file mode 100644
index 000000000..b623d5000
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ingenic,jz4740-i2s.txt
@@ -0,0 +1,23 @@
+Ingenic JZ4740 I2S controller
+
+Required properties:
+- compatible : "ingenic,jz4740-i2s" or "ingenic,jz4780-i2s"
+- reg : I2S registers location and length
+- clocks : AIC and I2S PLL clock specifiers.
+- clock-names: "aic" and "i2s"
+- dmas: DMA controller phandle and DMA request line for I2S Tx and Rx channels
+- dma-names: Must be "tx" and "rx"
+
+Example:
+
+i2s: i2s@10020000 {
+ compatible = "ingenic,jz4740-i2s";
+ reg = <0x10020000 0x94>;
+
+ clocks = <&cgu JZ4740_CLK_AIC>, <&cgu JZ4740_CLK_I2SPLL>;
+ clock-names = "aic", "i2s";
+
+ dmas = <&dma 2>, <&dma 3>;
+ dma-names = "tx", "rx";
+
+};
diff --git a/Documentation/devicetree/bindings/sound/max98090.txt b/Documentation/devicetree/bindings/sound/max98090.txt
new file mode 100644
index 000000000..aa802a274
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/max98090.txt
@@ -0,0 +1,51 @@
+MAX98090 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "maxim,max98090" or "maxim,max98091".
+
+- reg : The I2C address of the device.
+
+- interrupts : The CODEC's interrupt output.
+
+Optional properties:
+
+- clocks: The phandle of the master clock to the CODEC
+
+- clock-names: Should be "mclk"
+
+- maxim,dmic-freq: Frequency at which to clock DMIC
+
+Pins on the device (for linking into audio routes):
+
+ * MIC1
+ * MIC2
+ * DMICL
+ * DMICR
+ * IN1
+ * IN2
+ * IN3
+ * IN4
+ * IN5
+ * IN6
+ * IN12
+ * IN34
+ * IN56
+ * HPL
+ * HPR
+ * SPKL
+ * SPKR
+ * RCVL
+ * RCVR
+ * MICBIAS
+
+Example:
+
+audio-codec@10 {
+ compatible = "maxim,max98090";
+ reg = <0x10>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(H, 4) GPIO_ACTIVE_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/sound/max98095.txt b/Documentation/devicetree/bindings/sound/max98095.txt
new file mode 100644
index 000000000..318a4c82f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/max98095.txt
@@ -0,0 +1,22 @@
+MAX98095 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "maxim,max98095".
+
+- reg : The I2C address of the device.
+
+Optional properties:
+
+- clocks: The phandle of the master clock to the CODEC
+
+- clock-names: Should be "mclk"
+
+Example:
+
+max98095: codec@11 {
+ compatible = "maxim,max98095";
+ reg = <0x11>;
+};
diff --git a/Documentation/devicetree/bindings/sound/max98357a.txt b/Documentation/devicetree/bindings/sound/max98357a.txt
new file mode 100644
index 000000000..a7a149a23
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/max98357a.txt
@@ -0,0 +1,14 @@
+Maxim MAX98357A audio DAC
+
+This node models the Maxim MAX98357A DAC.
+
+Required properties:
+- compatible : "maxim,max98357a"
+- sdmode-gpios : GPIO specifier for the GPIO -> DAC SDMODE pin
+
+Example:
+
+max98357a {
+ compatible = "maxim,max98357a";
+ sdmode-gpios = <&qcom_pinmux 25 0>;
+};
diff --git a/Documentation/devicetree/bindings/sound/max98925.txt b/Documentation/devicetree/bindings/sound/max98925.txt
new file mode 100644
index 000000000..27be63e2a
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/max98925.txt
@@ -0,0 +1,22 @@
+max98925 audio CODEC
+
+This device supports I2C.
+
+Required properties:
+
+ - compatible : "maxim,max98925"
+
+ - vmon-slot-no : slot number used to send voltage information
+
+ - imon-slot-no : slot number used to send current information
+
+ - reg : the I2C address of the device for I2C
+
+Example:
+
+codec: max98925@1a {
+ compatible = "maxim,max98925";
+ vmon-slot-no = <0>;
+ imon-slot-no = <2>;
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/mrvl,pxa-ssp.txt b/Documentation/devicetree/bindings/sound/mrvl,pxa-ssp.txt
new file mode 100644
index 000000000..74c9ba6c2
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mrvl,pxa-ssp.txt
@@ -0,0 +1,28 @@
+Marvell PXA SSP CPU DAI bindings
+
+Required properties:
+
+ compatible Must be "mrvl,pxa-ssp-dai"
+ port A phandle reference to a PXA ssp upstream device
+
+Example:
+
+ /* upstream device */
+
+ ssp0: ssp@41000000 {
+ compatible = "mrvl,pxa3xx-ssp";
+ reg = <0x41000000 0x40>;
+ interrupts = <24>;
+ clock-names = "pxa27x-ssp.0";
+ dmas = <&dma 13
+ &dma 14>;
+ dma-names = "rx", "tx";
+ };
+
+ /* DAI as user */
+
+ ssp_dai0: ssp_dai@0 {
+ compatible = "mrvl,pxa-ssp-dai";
+ port = <&ssp0>;
+ };
+
diff --git a/Documentation/devicetree/bindings/sound/mrvl,pxa2xx-pcm.txt b/Documentation/devicetree/bindings/sound/mrvl,pxa2xx-pcm.txt
new file mode 100644
index 000000000..551fbb834
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mrvl,pxa2xx-pcm.txt
@@ -0,0 +1,15 @@
+DT bindings for ARM PXA2xx PCM platform driver
+
+This is just a dummy driver that registers the PXA ASoC platform driver.
+It does not have any resources assigned.
+
+Required properties:
+
+ - compatible 'mrvl,pxa-pcm-audio'
+
+Example:
+
+ pxa_pcm_audio: snd_soc_pxa_audio {
+ compatible = "mrvl,pxa-pcm-audio";
+ };
+
diff --git a/Documentation/devicetree/bindings/sound/mvebu-audio.txt b/Documentation/devicetree/bindings/sound/mvebu-audio.txt
new file mode 100644
index 000000000..cb8c07c81
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mvebu-audio.txt
@@ -0,0 +1,34 @@
+* mvebu (Kirkwood, Dove, Armada 370) audio controller
+
+Required properties:
+
+- compatible:
+ "marvell,kirkwood-audio" for Kirkwood platforms
+ "marvell,dove-audio" for Dove platforms
+ "marvell,armada370-audio" for Armada 370 platforms
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- interrupts:
+ with "marvell,kirkwood-audio", the audio interrupt
+ with "marvell,dove-audio", a list of two interrupts, the first for
+ the data flow, and the second for errors.
+
+- clocks: one or two phandles.
+ The first one is mandatory and defines the internal clock.
+ The second one is optional and defines an external clock.
+
+- clock-names: names associated to the clocks:
+ "internal" for the internal clock
+ "extclk" for the external clock
+
+Example:
+
+i2s1: audio-controller@b4000 {
+ compatible = "marvell,dove-audio";
+ reg = <0xb4000 0x2210>;
+ interrupts = <21>, <22>;
+ clocks = <&gate_clk 13>;
+ clock-names = "internal";
+};
diff --git a/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
new file mode 100644
index 000000000..601c518ed
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mxs-audio-sgtl5000.txt
@@ -0,0 +1,17 @@
+* Freescale MXS audio complex with SGTL5000 codec
+
+Required properties:
+- compatible: "fsl,mxs-audio-sgtl5000"
+- model: The user-visible name of this sound complex
+- saif-controllers: The phandle list of the MXS SAIF controller
+- audio-codec: The phandle of the SGTL5000 audio codec
+
+Example:
+
+sound {
+ compatible = "fsl,imx28-evk-sgtl5000",
+ "fsl,mxs-audio-sgtl5000";
+ model = "imx28-evk-sgtl5000";
+ saif-controllers = <&saif0 &saif1>;
+ audio-codec = <&sgtl5000>;
+};
diff --git a/Documentation/devicetree/bindings/sound/mxs-saif.txt b/Documentation/devicetree/bindings/sound/mxs-saif.txt
new file mode 100644
index 000000000..7ba07a118
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/mxs-saif.txt
@@ -0,0 +1,41 @@
+* Freescale MXS Serial Audio Interface (SAIF)
+
+Required properties:
+- compatible: Should be "fsl,<chip>-saif"
+- reg: Should contain registers location and length
+- interrupts: Should contain ERROR interrupt number
+- dmas: DMA specifier, consisting of a phandle to DMA controller node
+ and SAIF DMA channel ID.
+ Refer to dma.txt and fsl-mxs-dma.txt for details.
+- dma-names: Must be "rx-tx".
+
+Optional properties:
+- fsl,saif-master: phandle to the master SAIF. It's only required for
+ the slave SAIF.
+
+Note: Each SAIF controller should have an alias correctly numbered
+in "aliases" node.
+
+Example:
+
+aliases {
+ saif0 = &saif0;
+ saif1 = &saif1;
+};
+
+saif0: saif@80042000 {
+ compatible = "fsl,imx28-saif";
+ reg = <0x80042000 2000>;
+ interrupts = <59>;
+ dmas = <&dma_apbx 4>;
+ dma-names = "rx-tx";
+};
+
+saif1: saif@80046000 {
+ compatible = "fsl,imx28-saif";
+ reg = <0x80046000 2000>;
+ interrupts = <58>;
+ dmas = <&dma_apbx 5>;
+ dma-names = "rx-tx";
+ fsl,saif-master = <&saif0>;
+};
diff --git a/Documentation/devicetree/bindings/sound/nokia,rx51.txt b/Documentation/devicetree/bindings/sound/nokia,rx51.txt
new file mode 100644
index 000000000..72f93d996
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nokia,rx51.txt
@@ -0,0 +1,27 @@
+* Nokia N900 audio setup
+
+Required properties:
+- compatible: Should contain "nokia,n900-audio"
+- nokia,cpu-dai: phandle for the McBSP node
+- nokia,audio-codec: phandles for the main TLV320AIC3X node and the
+ auxiliary TLV320AIC3X node (in this order)
+- nokia,headphone-amplifier: phandle for the TPA6130A2 node
+- tvout-selection-gpios: GPIO for tvout selection
+- jack-detection-gpios: GPIO for jack detection
+- eci-switch-gpios: GPIO for ECI (Enhancement Control Interface) switch
+- speaker-amplifier-gpios: GPIO for speaker amplifier
+
+Example:
+
+sound {
+ compatible = "nokia,n900-audio";
+
+ nokia,cpu-dai = <&mcbsp2>;
+ nokia,audio-codec = <&tlv320aic3x>, <&tlv320aic3x_aux>;
+ nokia,headphone-amplifier = <&tpa6130a2>;
+
+ tvout-selection-gpios = <&gpio2 8 GPIO_ACTIVE_HIGH>; /* 40 */
+ jack-detection-gpios = <&gpio6 17 GPIO_ACTIVE_HIGH>; /* 177 */
+ eci-switch-gpios = <&gpio6 22 GPIO_ACTIVE_HIGH>; /* 182 */
+ speaker-amplifier-gpios = <&twl_gpio 7 GPIO_ACTIVE_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-alc5632.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-alc5632.txt
new file mode 100644
index 000000000..57f40f934
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-alc5632.txt
@@ -0,0 +1,48 @@
+NVIDIA Tegra audio complex
+
+Required properties:
+- compatible : "nvidia,tegra-audio-alc5632"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the ALC5632's pins as documented in the binding for the device
+ and:
+
+ * Headset Stereophone
+ * Int Spk
+ * Headset Mic
+ * Digital Mic
+
+- nvidia,i2s-controller : The phandle of the Tegra I2S controller
+- nvidia,audio-codec : The phandle of the ALC5632 audio codec
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-alc5632-paz00",
+ "nvidia,tegra-audio-alc5632";
+
+ nvidia,model = "Compal PAZ00";
+
+ nvidia,audio-routing =
+ "Int Spk", "SPK_OUTP",
+ "Int Spk", "SPK_OUTN",
+ "Headset Mic","MICBIAS1",
+ "MIC1_N", "Headset Mic",
+ "MIC1_P", "Headset Mic",
+ "Headset Stereophone", "HP_OUT_R",
+ "Headset Stereophone", "HP_OUT_L";
+
+ nvidia,i2s-controller = <&tegra_i2s1>;
+ nvidia,audio-codec = <&alc5632>;
+
+ clocks = <&tegra_car 112>, <&tegra_car 113>, <&tegra_car 93>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max98090.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max98090.txt
new file mode 100644
index 000000000..c3495beba
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-max98090.txt
@@ -0,0 +1,53 @@
+NVIDIA Tegra audio complex, with MAX98090 CODEC
+
+Required properties:
+- compatible : "nvidia,tegra-audio-max98090"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the MAX98090's pins (as documented in its binding), and the jacks
+ on the board:
+
+ * Headphones
+ * Speakers
+ * Mic Jack
+ * Int Mic
+
+- nvidia,i2s-controller : The phandle of the Tegra I2S controller that's
+ connected to the CODEC.
+- nvidia,audio-codec : The phandle of the MAX98090 audio codec.
+
+Optional properties:
+- nvidia,hp-det-gpios : The GPIO that detect headphones are plugged in
+- nvidia,mic-det-gpios : The GPIO that detect microphones are plugged in
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-max98090-venice2",
+ "nvidia,tegra-audio-max98090";
+ nvidia,model = "NVIDIA Tegra Venice2";
+
+ nvidia,audio-routing =
+ "Headphones", "HPR",
+ "Headphones", "HPL",
+ "Speakers", "SPKR",
+ "Speakers", "SPKL",
+ "Mic Jack", "MICBIAS",
+ "IN34", "Mic Jack";
+
+ nvidia,i2s-controller = <&tegra_i2s1>;
+ nvidia,audio-codec = <&acodec>;
+
+ clocks = <&tegra_car TEGRA124_CLK_PLL_A>,
+ <&tegra_car TEGRA124_CLK_PLL_A_OUT0>,
+ <&tegra_car TEGRA124_CLK_EXTERN1>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5640.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5640.txt
new file mode 100644
index 000000000..7788808dc
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5640.txt
@@ -0,0 +1,52 @@
+NVIDIA Tegra audio complex, with RT5640 CODEC
+
+Required properties:
+- compatible : "nvidia,tegra-audio-rt5640"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the RT5640's pins (as documented in its binding), and the jacks
+ on the board:
+
+ * Headphones
+ * Speakers
+ * Mic Jack
+
+- nvidia,i2s-controller : The phandle of the Tegra I2S controller that's
+ connected to the CODEC.
+- nvidia,audio-codec : The phandle of the RT5640 audio codec. This binding
+ assumes that AIF1 on the CODEC is connected to Tegra.
+
+Optional properties:
+- nvidia,hp-det-gpios : The GPIO that detects headphones are plugged in
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-rt5640-dalmore",
+ "nvidia,tegra-audio-rt5640";
+ nvidia,model = "NVIDIA Tegra Dalmore";
+
+ nvidia,audio-routing =
+ "Headphones", "HPOR",
+ "Headphones", "HPOL",
+ "Speakers", "SPORP",
+ "Speakers", "SPORN",
+ "Speakers", "SPOLP",
+ "Speakers", "SPOLN";
+
+ nvidia,i2s-controller = <&tegra_i2s1>;
+ nvidia,audio-codec = <&rt5640>;
+
+ nvidia,hp-det-gpios = <&gpio 143 0>; /* GPIO PR7 */
+
+ clocks = <&tegra_car 216>, <&tegra_car 217>, <&tegra_car 120>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5677.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5677.txt
new file mode 100644
index 000000000..a4589cda2
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-rt5677.txt
@@ -0,0 +1,67 @@
+NVIDIA Tegra audio complex, with RT5677 CODEC
+
+Required properties:
+- compatible : "nvidia,tegra-audio-rt5677"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the RT5677's pins (as documented in its binding), and the jacks
+ on the board:
+
+ * Headphone
+ * Speaker
+ * Headset Mic
+ * Internal Mic 1
+ * Internal Mic 2
+
+- nvidia,i2s-controller : The phandle of the Tegra I2S controller that's
+ connected to the CODEC.
+- nvidia,audio-codec : The phandle of the RT5677 audio codec. This binding
+ assumes that AIF1 on the CODEC is connected to Tegra.
+
+Optional properties:
+- nvidia,hp-det-gpios : The GPIO that detects headphones are plugged in
+- nvidia,hp-en-gpios : The GPIO that enables headphone amplifier
+- nvidia,mic-present-gpios: The GPIO that mic jack is plugged in
+- nvidia,dmic-clk-en-gpios : The GPIO that gates DMIC clock signal
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-rt5677-ryu",
+ "nvidia,tegra-audio-rt5677";
+ nvidia,model = "NVIDIA Tegra Ryu";
+
+ nvidia,audio-routing =
+ "Headphone", "LOUT2",
+ "Headphone", "LOUT1",
+ "Headset Mic", "MICBIAS1",
+ "IN1P", "Headset Mic",
+ "IN1N", "Headset Mic",
+ "DMIC L1", "Internal Mic 1",
+ "DMIC R1", "Internal Mic 1",
+ "DMIC L2", "Internal Mic 2",
+ "DMIC R2", "Internal Mic 2",
+ "Speaker", "PDM1L",
+ "Speaker", "PDM1R";
+
+ nvidia,i2s-controller = <&tegra_i2s1>;
+ nvidia,audio-codec = <&rt5677>;
+
+ nvidia,hp-det-gpios = <&gpio TEGRA_GPIO(R, 7) GPIO_ACTIVE_HIGH>;
+ nvidia,mic-present-gpios = <&gpio TEGRA_GPIO(O, 5) GPIO_ACTIVE_LOW>;
+ nvidia,hp-en-gpios = <&rt5677 1 GPIO_ACTIVE_HIGH>;
+ nvidia,dmic-clk-en-gpios = <&rt5677 2 GPIO_ACTIVE_HIGH>;
+
+ clocks = <&tegra_car TEGRA124_CLK_PLL_A>,
+ <&tegra_car TEGRA124_CLK_PLL_A_OUT0>,
+ <&tegra_car TEGRA124_CLK_EXTERN1>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-trimslice.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-trimslice.txt
new file mode 100644
index 000000000..ef1fe7358
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-trimslice.txt
@@ -0,0 +1,21 @@
+NVIDIA Tegra audio complex for TrimSlice
+
+Required properties:
+- compatible : "nvidia,tegra-audio-trimslice"
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Must include the following entries:
+ "pll_a" (The Tegra clock of that name),
+ "pll_a_out0" (The Tegra clock of that name),
+ "mclk" (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,i2s-controller : The phandle of the Tegra I2S1 controller
+- nvidia,audio-codec : The phandle of the WM8903 audio codec
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-trimslice";
+ nvidia,i2s-controller = <&tegra_i2s1>;
+ nvidia,audio-codec = <&codec>;
+ clocks = <&tegra_car 112>, <&tegra_car 113>, <&tegra_car 93>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8753.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8753.txt
new file mode 100644
index 000000000..96f6a57dd
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8753.txt
@@ -0,0 +1,40 @@
+NVIDIA Tegra audio complex
+
+Required properties:
+- compatible : "nvidia,tegra-audio-wm8753"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the WM8753's pins as documented in the binding for the WM8753,
+ and the jacks on the board:
+
+ * Headphone Jack
+ * Mic Jack
+
+- nvidia,i2s-controller : The phandle of the Tegra I2S1 controller
+- nvidia,audio-codec : The phandle of the WM8753 audio codec
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-wm8753-whistler",
+ "nvidia,tegra-audio-wm8753"
+ nvidia,model = "tegra-wm8753-harmony";
+
+ nvidia,audio-routing =
+ "Headphone Jack", "LOUT1",
+ "Headphone Jack", "ROUT1";
+
+ nvidia,i2s-controller = <&i2s1>;
+ nvidia,audio-codec = <&wm8753>;
+
+ clocks = <&tegra_car 112>, <&tegra_car 113>, <&tegra_car 93>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
+
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8903.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8903.txt
new file mode 100644
index 000000000..b795d2828
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm8903.txt
@@ -0,0 +1,60 @@
+NVIDIA Tegra audio complex
+
+Required properties:
+- compatible : "nvidia,tegra-audio-wm8903"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the WM8903's pins (documented in the WM8903 binding document),
+ and the jacks on the board:
+
+ * Headphone Jack
+ * Int Spk
+ * Mic Jack
+
+- nvidia,i2s-controller : The phandle of the Tegra I2S1 controller
+- nvidia,audio-codec : The phandle of the WM8903 audio codec
+
+Optional properties:
+- nvidia,spkr-en-gpios : The GPIO that enables the speakers
+- nvidia,hp-mute-gpios : The GPIO that mutes the headphones
+- nvidia,hp-det-gpios : The GPIO that detect headphones are plugged in
+- nvidia,int-mic-en-gpios : The GPIO that enables the internal microphone
+- nvidia,ext-mic-en-gpios : The GPIO that enables the external microphone
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-wm8903-harmony",
+ "nvidia,tegra-audio-wm8903"
+ nvidia,model = "tegra-wm8903-harmony";
+
+ nvidia,audio-routing =
+ "Headphone Jack", "HPOUTR",
+ "Headphone Jack", "HPOUTL",
+ "Int Spk", "ROP",
+ "Int Spk", "RON",
+ "Int Spk", "LOP",
+ "Int Spk", "LON",
+ "Mic Jack", "MICBIAS",
+ "IN1L", "Mic Jack";
+
+ nvidia,i2s-controller = <&i2s1>;
+ nvidia,audio-codec = <&wm8903>;
+
+ nvidia,spkr-en-gpios = <&codec 2 0>;
+ nvidia,hp-det-gpios = <&gpio 178 0>; /* gpio PW2 */
+ nvidia,int-mic-en-gpios = <&gpio 184 0>; /*gpio PX0 */
+ nvidia,ext-mic-en-gpios = <&gpio 185 0>; /* gpio PX1 */
+
+ clocks = <&tegra_car 112>, <&tegra_car 113>, <&tegra_car 93>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
+
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm9712.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm9712.txt
new file mode 100644
index 000000000..436f6cd9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra-audio-wm9712.txt
@@ -0,0 +1,60 @@
+NVIDIA Tegra audio complex
+
+Required properties:
+- compatible : "nvidia,tegra-audio-wm9712"
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - pll_a
+ - pll_a_out0
+ - mclk (The Tegra cdev1/extern1 clock, which feeds the CODEC's mclk)
+- nvidia,model : The user-visible name of this sound complex.
+- nvidia,audio-routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source. Valid names for sources and
+ sinks are the WM9712's pins, and the jacks on the board:
+
+ WM9712 pins:
+
+ * MONOOUT
+ * HPOUTL
+ * HPOUTR
+ * LOUT2
+ * ROUT2
+ * OUT3
+ * LINEINL
+ * LINEINR
+ * PHONE
+ * PCBEEP
+ * MIC1
+ * MIC2
+ * Mic Bias
+
+ Board connectors:
+
+ * Headphone
+ * LineIn
+ * Mic
+
+- nvidia,ac97-controller : The phandle of the Tegra AC97 controller
+
+
+Example:
+
+sound {
+ compatible = "nvidia,tegra-audio-wm9712-colibri_t20",
+ "nvidia,tegra-audio-wm9712";
+ nvidia,model = "Toradex Colibri T20";
+
+ nvidia,audio-routing =
+ "Headphone", "HPOUTL",
+ "Headphone", "HPOUTR",
+ "LineIn", "LINEINL",
+ "LineIn", "LINEINR",
+ "Mic", "MIC1";
+
+ nvidia,ac97-controller = <&ac97>;
+
+ clocks = <&tegra_car 112>, <&tegra_car 113>, <&tegra_car 93>;
+ clock-names = "pll_a", "pll_a_out0", "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.txt
new file mode 100644
index 000000000..eaf00102d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra20-ac97.txt
@@ -0,0 +1,36 @@
+NVIDIA Tegra 20 AC97 controller
+
+Required properties:
+- compatible : "nvidia,tegra20-ac97"
+- reg : Should contain AC97 controller registers location and length
+- interrupts : Should contain AC97 interrupt
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - ac97
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- nvidia,codec-reset-gpio : The Tegra GPIO controller's phandle and the number
+ of the GPIO used to reset the external AC97 codec
+- nvidia,codec-sync-gpio : The Tegra GPIO controller's phandle and the number
+ of the GPIO corresponding with the AC97 DAP _FS line
+
+Example:
+
+ac97@70002000 {
+ compatible = "nvidia,tegra20-ac97";
+ reg = <0x70002000 0x200>;
+ interrupts = <0 81 0x04>;
+ nvidia,codec-reset-gpio = <&gpio 170 0>;
+ nvidia,codec-sync-gpio = <&gpio 120 0>;
+ clocks = <&tegra_car 3>;
+ resets = <&tegra_car 3>;
+ reset-names = "ac97";
+ dmas = <&apbdma 12>, <&apbdma 12>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra20-das.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra20-das.txt
new file mode 100644
index 000000000..6de3a7ee4
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra20-das.txt
@@ -0,0 +1,12 @@
+NVIDIA Tegra 20 DAS (Digital Audio Switch) controller
+
+Required properties:
+- compatible : "nvidia,tegra20-das"
+- reg : Should contain DAS registers location and length
+
+Example:
+
+das@70000c00 {
+ compatible = "nvidia,tegra20-das";
+ reg = <0x70000c00 0x80>;
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra20-i2s.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra20-i2s.txt
new file mode 100644
index 000000000..dc30c6bfb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra20-i2s.txt
@@ -0,0 +1,30 @@
+NVIDIA Tegra 20 I2S controller
+
+Required properties:
+- compatible : "nvidia,tegra20-i2s"
+- reg : Should contain I2S registers location and length
+- interrupts : Should contain I2S interrupt
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - i2s
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+
+Example:
+
+i2s@70002800 {
+ compatible = "nvidia,tegra20-i2s";
+ reg = <0x70002800 0x200>;
+ interrupts = < 45 >;
+ clocks = <&tegra_car 11>;
+ resets = <&tegra_car 11>;
+ reset-names = "i2s";
+ dmas = <&apbdma 21>, <&apbdma 21>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt
new file mode 100644
index 000000000..0e9a1895d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-ahub.txt
@@ -0,0 +1,88 @@
+NVIDIA Tegra30 AHUB (Audio Hub)
+
+Required properties:
+- compatible : For Tegra30, must contain "nvidia,tegra30-ahub". For Tegra114,
+ must contain "nvidia,tegra114-ahub". For Tegra124, must contain
+ "nvidia,tegra124-ahub". Otherwise, must contain "nvidia,<chip>-ahub",
+ plus at least one of the above, where <chip> is tegra132.
+- reg : Should contain the register physical address and length for each of
+ the AHUB's register blocks.
+ - Tegra30 requires 2 entries, for the APBIF and AHUB/AUDIO register blocks.
+ - Tegra114 requires an additional entry, for the APBIF2 register block.
+- interrupts : Should contain AHUB interrupt
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - d_audio
+ - apbif
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ Tegra30 and later:
+ - d_audio
+ - apbif
+ - i2s0
+ - i2s1
+ - i2s2
+ - i2s3
+ - i2s4
+ - dam0
+ - dam1
+ - dam2
+ - spdif
+ Tegra114 and later additionally require:
+ - amx
+ - adx
+ Tegra124 and later additionally require:
+ - amx1
+ - adx1
+ - afc0
+ - afc1
+ - afc2
+ - afc3
+ - afc4
+ - afc5
+- ranges : The bus address mapping for the configlink register bus.
+ Can be empty since the mapping is 1:1.
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx0 .. rx<n>
+ - tx0 .. tx<n>
+ ... where n is:
+ Tegra30: 3
+ Tegra114, Tegra124: 9
+- #address-cells : For the configlink bus. Should be <1>;
+- #size-cells : For the configlink bus. Should be <1>.
+
+AHUB client modules need to specify the IDs of their CIFs (Client InterFaces).
+For RX CIFs, the numbers indicate the register number within AHUB routing
+register space (APBIF 0..3 RX, I2S 0..5 RX, DAM 0..2 RX 0..1, SPDIF RX 0..1).
+For TX CIFs, the numbers indicate the bit position within the AHUB routing
+registers (APBIF 0..3 TX, I2S 0..5 TX, DAM 0..2 TX, SPDIF TX 0..1).
+
+Example:
+
+ahub@70080000 {
+ compatible = "nvidia,tegra30-ahub";
+ reg = <0x70080000 0x200 0x70080200 0x100>;
+ interrupts = < 0 103 0x04 >;
+ nvidia,dma-request-selector = <&apbdma 1>;
+ clocks = <&tegra_car 106>, <&tegra_car 107>;
+ clock-names = "d_audio", "apbif";
+ resets = <&tegra_car 106>, <&tegra_car 107>, <&tegra_car 30>,
+ <&tegra_car 11>, <&tegra_car 18>, <&tegra_car 101>,
+ <&tegra_car 102>, <&tegra_car 108>, <&tegra_car 109>,
+ <&tegra_car 110>, <&tegra_car 10>;
+ reset-names = "d_audio", "apbif", "i2s0", "i2s1", "i2s2",
+ "i2s3", "i2s4", "dam0", "dam1", "dam2",
+ "spdif";
+ dmas = <&apbdma 1>, <&apbdma 1>;
+ <&apbdma 2>, <&apbdma 2>;
+ <&apbdma 3>, <&apbdma 3>;
+ <&apbdma 4>, <&apbdma 4>;
+ dma-names = "rx0", "tx0", "rx1", "tx1", "rx2", "tx2", "rx3", "tx3";
+ ranges;
+ #address-cells = <1>;
+ #size-cells = <1>;
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt
new file mode 100644
index 000000000..13e2ef496
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-hda.txt
@@ -0,0 +1,30 @@
+NVIDIA Tegra30 HDA controller
+
+Required properties:
+- compatible : For Tegra30, must contain "nvidia,tegra30-hda". Otherwise,
+ must contain '"nvidia,<chip>-hda", "nvidia,tegra30-hda"', where <chip> is
+ tegra114, tegra124, or tegra132.
+- reg : Should contain the HDA registers location and length.
+- interrupts : The interrupt from the HDA controller.
+- clocks : Must contain an entry for each required entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries: hda, hdacodec_2x, hda2hdmi
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries: hda, hdacodec_2x, hda2hdmi
+
+Example:
+
+hda@0,70030000 {
+ compatible = "nvidia,tegra124-hda", "nvidia,tegra30-hda";
+ reg = <0x0 0x70030000 0x0 0x10000>;
+ interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_HDA>,
+ <&tegra_car TEGRA124_CLK_HDA2HDMI>,
+ <&tegra_car TEGRA124_CLK_HDA2CODEC_2X>;
+ clock-names = "hda", "hda2hdmi", "hda2codec_2x";
+ resets = <&tegra_car 125>, /* hda */
+ <&tegra_car 128>; /* hda2hdmi */
+ <&tegra_car 111>, /* hda2codec_2x */
+ reset-names = "hda", "hda2hdmi", "hda2codec_2x";
+};
diff --git a/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt b/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt
new file mode 100644
index 000000000..38caa936f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/nvidia,tegra30-i2s.txt
@@ -0,0 +1,27 @@
+NVIDIA Tegra30 I2S controller
+
+Required properties:
+- compatible : For Tegra30, must contain "nvidia,tegra30-i2s". For Tegra124,
+ must contain "nvidia,tegra124-i2s". Otherwise, must contain
+ "nvidia,<chip>-i2s" plus at least one of the above, where <chip> is
+ tegra114 or tegra132.
+- reg : Should contain I2S registers location and length
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - i2s
+- nvidia,ahub-cif-ids : The list of AHUB CIF IDs for this port, rx (playback)
+ first, tx (capture) second. See nvidia,tegra30-ahub.txt for values.
+
+Example:
+
+i2s@70080300 {
+ compatible = "nvidia,tegra30-i2s";
+ reg = <0x70080300 0x100>;
+ nvidia,ahub-cif-ids = <4 4>;
+ clocks = <&tegra_car 11>;
+ resets = <&tegra_car 11>;
+ reset-names = "i2s";
+};
diff --git a/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt
new file mode 100644
index 000000000..fd40c852d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/omap-abe-twl6040.txt
@@ -0,0 +1,91 @@
+* Texas Instruments OMAP4+ and twl6040 based audio setups
+
+Required properties:
+- compatible: "ti,abe-twl6040"
+- ti,model: Name of the sound card ( for example "SDP4430")
+- ti,mclk-freq: MCLK frequency for HPPLL operation
+- ti,mcpdm: phandle for the McPDM node
+- ti,twl6040: phandle for the twl6040 core node
+- ti,audio-routing: List of connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source.
+
+Optional properties:
+- ti,dmic: phandle for the OMAP dmic node if the machine have it connected
+- ti,jack_detection: Need to be present if the board capable to detect jack
+ insertion, removal.
+
+Available audio endpoints for the audio-routing table:
+
+Board connectors:
+ * Headset Stereophone
+ * Earphone Spk
+ * Ext Spk
+ * Line Out
+ * Vibrator
+ * Headset Mic
+ * Main Handset Mic
+ * Sub Handset Mic
+ * Line In
+ * Digital Mic
+
+twl6040 pins:
+ * HSOL
+ * HSOR
+ * EP
+ * HFL
+ * HFR
+ * AUXL
+ * AUXR
+ * VIBRAL
+ * VIBRAR
+ * HSMIC
+ * MAINMIC
+ * SUBMIC
+ * AFML
+ * AFMR
+
+ * Headset Mic Bias
+ * Main Mic Bias
+ * Digital Mic1 Bias
+ * Digital Mic2 Bias
+
+Digital mic pins:
+ * DMic
+
+Example:
+
+sound {
+ compatible = "ti,abe-twl6040";
+ ti,model = "SDP4430";
+
+ ti,jack-detection;
+ ti,mclk-freq = <38400000>;
+
+ ti,mcpdm = <&mcpdm>;
+ ti,dmic = <&dmic>;
+
+ ti,twl6040 = <&twl6040>;
+
+ /* Audio routing */
+ ti,audio-routing =
+ "Headset Stereophone", "HSOL",
+ "Headset Stereophone", "HSOR",
+ "Earphone Spk", "EP",
+ "Ext Spk", "HFL",
+ "Ext Spk", "HFR",
+ "Line Out", "AUXL",
+ "Line Out", "AUXR",
+ "Vibrator", "VIBRAL",
+ "Vibrator", "VIBRAR",
+ "HSMIC", "Headset Mic",
+ "Headset Mic", "Headset Mic Bias",
+ "MAINMIC", "Main Handset Mic",
+ "Main Handset Mic", "Main Mic Bias",
+ "SUBMIC", "Sub Handset Mic",
+ "Sub Handset Mic", "Main Mic Bias",
+ "AFML", "Line In",
+ "AFMR", "Line In",
+ "DMic", "Digital Mic",
+ "Digital Mic", "Digital Mic1 Bias";
+};
diff --git a/Documentation/devicetree/bindings/sound/omap-dmic.txt b/Documentation/devicetree/bindings/sound/omap-dmic.txt
new file mode 100644
index 000000000..fd8105f18
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/omap-dmic.txt
@@ -0,0 +1,21 @@
+* Texas Instruments OMAP4+ Digital Microphone Module
+
+Required properties:
+- compatible: "ti,omap4-dmic"
+- reg: Register location and size as an array:
+ <MPU access base address, size>,
+ <L3 interconnect address, size>;
+- interrupts: Interrupt number for DMIC
+- interrupt-parent: The parent interrupt controller
+- ti,hwmods: Name of the hwmod associated with OMAP dmic IP
+
+Example:
+
+dmic: dmic@4012e000 {
+ compatible = "ti,omap4-dmic";
+ reg = <0x4012e000 0x7f>, /* MPU private access */
+ <0x4902e000 0x7f>; /* L3 Interconnect */
+ interrupts = <0 114 0x4>;
+ interrupt-parent = <&gic>;
+ ti,hwmods = "dmic";
+};
diff --git a/Documentation/devicetree/bindings/sound/omap-mcbsp.txt b/Documentation/devicetree/bindings/sound/omap-mcbsp.txt
new file mode 100644
index 000000000..17cce4490
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/omap-mcbsp.txt
@@ -0,0 +1,37 @@
+* Texas Instruments OMAP2+ McBSP module
+
+Required properties:
+- compatible: "ti,omap2420-mcbsp" for McBSP on OMAP2420
+ "ti,omap2430-mcbsp" for McBSP on OMAP2430
+ "ti,omap3-mcbsp" for McBSP on OMAP3
+ "ti,omap4-mcbsp" for McBSP on OMAP4 and newer SoC
+- reg: Register location and size, for OMAP4+ as an array:
+ <MPU access base address, size>,
+ <L3 interconnect address, size>;
+- reg-names: Array of strings associated with the address space
+- interrupts: Interrupt numbers for the McBSP port, as an array in case the
+ McBSP IP have more interrupt lines:
+ <OCP compliant irq>,
+ <TX irq>,
+ <RX irq>;
+- interrupt-names: Array of strings associated with the interrupt numbers
+- interrupt-parent: The parent interrupt controller
+- ti,buffer-size: Size of the FIFO on the port (OMAP2430 and newer SoC)
+- ti,hwmods: Name of the hwmod associated to the McBSP port
+
+Example:
+
+mcbsp2: mcbsp@49022000 {
+ compatible = "ti,omap3-mcbsp";
+ reg = <0x49022000 0xff>,
+ <0x49028000 0xff>;
+ reg-names = "mpu", "sidetone";
+ interrupts = <0 17 0x4>, /* OCP compliant interrupt */
+ <0 62 0x4>, /* TX interrupt */
+ <0 63 0x4>, /* RX interrupt */
+ <0 4 0x4>; /* Sidetone */
+ interrupt-names = "common", "tx", "rx", "sidetone";
+ interrupt-parent = <&intc>;
+ ti,buffer-size = <1280>;
+ ti,hwmods = "mcbsp2";
+};
diff --git a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt
new file mode 100644
index 000000000..0741dff04
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt
@@ -0,0 +1,21 @@
+* Texas Instruments OMAP4+ McPDM
+
+Required properties:
+- compatible: "ti,omap4-mcpdm"
+- reg: Register location and size as an array:
+ <MPU access base address, size>,
+ <L3 interconnect address, size>;
+- interrupts: Interrupt number for McPDM
+- interrupt-parent: The parent interrupt controller
+- ti,hwmods: Name of the hwmod associated to the McPDM
+
+Example:
+
+mcpdm: mcpdm@40132000 {
+ compatible = "ti,omap4-mcpdm";
+ reg = <0x40132000 0x7f>, /* MPU private access */
+ <0x49032000 0x7f>; /* L3 Interconnect */
+ interrupts = <0 112 0x4>;
+ interrupt-parent = <&gic>;
+ ti,hwmods = "mcpdm";
+};
diff --git a/Documentation/devicetree/bindings/sound/omap-twl4030.txt b/Documentation/devicetree/bindings/sound/omap-twl4030.txt
new file mode 100644
index 000000000..f6a715e4e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/omap-twl4030.txt
@@ -0,0 +1,62 @@
+* Texas Instruments SoC with twl4030 based audio setups
+
+Required properties:
+- compatible: "ti,omap-twl4030"
+- ti,model: Name of the sound card (for example "omap3beagle")
+- ti,mcbsp: phandle for the McBSP node
+
+Optional properties:
+- ti,codec: phandle for the twl4030 audio node
+- ti,mcbsp-voice: phandle for the McBSP node connected to the voice port of twl
+- ti, jack-det-gpio: Jack detect GPIO
+- ti,audio-routing: List of connections between audio components.
+ Each entry is a pair of strings, the first being the connection's sink,
+ the second being the connection's source.
+ If the routing is not provided all possible connection will be available
+
+Available audio endpoints for the audio-routing table:
+
+Board connectors:
+ * Headset Stereophone
+ * Earpiece Spk
+ * Handsfree Spk
+ * Ext Spk
+ * Main Mic
+ * Sub Mic
+ * Headset Mic
+ * Carkit Mic
+ * Digital0 Mic
+ * Digital1 Mic
+ * Line In
+
+twl4030 pins:
+ * HSOL
+ * HSOR
+ * EARPIECE
+ * HFL
+ * HFR
+ * PREDRIVEL
+ * PREDRIVER
+ * CARKITL
+ * CARKITR
+ * MAINMIC
+ * SUBMIC
+ * HSMIC
+ * DIGIMIC0
+ * DIGIMIC1
+ * CARKITMIC
+ * AUXL
+ * AUXR
+
+ * Headset Mic Bias
+ * Mic Bias 1 /* Used for Main Mic or Digimic0 */
+ * Mic Bias 2 /* Used for Sub Mic or Digimic1 */
+
+Example:
+
+sound {
+ compatible = "ti,omap-twl4030";
+ ti,model = "omap3beagle";
+
+ ti,mcbsp = <&mcbsp2>;
+};
diff --git a/Documentation/devicetree/bindings/sound/pcm1792a.txt b/Documentation/devicetree/bindings/sound/pcm1792a.txt
new file mode 100644
index 000000000..970ba1ed5
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/pcm1792a.txt
@@ -0,0 +1,18 @@
+Texas Instruments pcm1792a DT bindings
+
+This driver supports the SPI bus.
+
+Required properties:
+
+ - compatible: "ti,pcm1792a"
+
+For required properties on SPI, please consult
+Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Examples:
+
+ codec_spi: 1792a@0 {
+ compatible = "ti,pcm1792a";
+ spi-max-frequency = <600000>;
+ };
+
diff --git a/Documentation/devicetree/bindings/sound/pcm512x.txt b/Documentation/devicetree/bindings/sound/pcm512x.txt
new file mode 100644
index 000000000..3aae3b41b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/pcm512x.txt
@@ -0,0 +1,52 @@
+PCM512x audio CODECs
+
+These devices support both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : One of "ti,pcm5121", "ti,pcm5122", "ti,pcm5141" or
+ "ti,pcm5142"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+ - AVDD-supply, DVDD-supply, and CPVDD-supply : power supplies for the
+ device, as covered in bindings/regulator/regulator.txt
+
+Optional properties:
+
+ - clocks : A clock specifier for the clock connected as SCLK. If this
+ is absent the device will be configured to clock from BCLK. If pll-in
+ and pll-out are specified in addition to a clock, the device is
+ configured to accept clock input on a specified gpio pin.
+
+ - pll-in, pll-out : gpio pins used to connect the pll using <1>
+ through <6>. The device will be configured for clock input on the
+ given pll-in pin and PLL output on the given pll-out pin. An
+ external connection from the pll-out pin to the SCLK pin is assumed.
+
+Examples:
+
+ pcm5122: pcm5122@4c {
+ compatible = "ti,pcm5122";
+ reg = <0x4c>;
+
+ AVDD-supply = <&reg_3v3_analog>;
+ DVDD-supply = <&reg_1v8>;
+ CPVDD-supply = <&reg_3v3>;
+ };
+
+
+ pcm5142: pcm5142@4c {
+ compatible = "ti,pcm5142";
+ reg = <0x4c>;
+
+ AVDD-supply = <&reg_3v3_analog>;
+ DVDD-supply = <&reg_1v8>;
+ CPVDD-supply = <&reg_3v3>;
+
+ clocks = <&sck>;
+ pll-in = <3>;
+ pll-out = <6>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.txt b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.txt
new file mode 100644
index 000000000..e00732dac
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.txt
@@ -0,0 +1,43 @@
+* Qualcomm Technologies LPASS CPU DAI
+
+This node models the Qualcomm Technologies Low-Power Audio SubSystem (LPASS).
+
+Required properties:
+
+- compatible : "qcom,lpass-cpu"
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : A list which must include the following entries:
+ * "ahbix-clk"
+ * "mi2s-osr-clk"
+ * "mi2s-bit-clk"
+- interrupts : Must contain an entry for each entry in
+ interrupt-names.
+- interrupt-names : A list which must include the following entries:
+ * "lpass-irq-lpaif"
+- pinctrl-N : One property must exist for each entry in
+ pinctrl-names. See ../pinctrl/pinctrl-bindings.txt
+ for details of the property values.
+- pinctrl-names : Must contain a "default" entry.
+- reg : Must contain an address for each entry in reg-names.
+- reg-names : A list which must include the following entries:
+ * "lpass-lpaif"
+
+Optional properties:
+
+- qcom,adsp : Phandle for the audio DSP node
+
+Example:
+
+lpass@28100000 {
+ compatible = "qcom,lpass-cpu";
+ clocks = <&lcc AHBIX_CLK>, <&lcc MI2S_OSR_CLK>, <&lcc MI2S_BIT_CLK>;
+ clock-names = "ahbix-clk", "mi2s-osr-clk", "mi2s-bit-clk";
+ interrupts = <0 85 1>;
+ interrupt-names = "lpass-irq-lpaif";
+ pinctrl-names = "default", "idle";
+ pinctrl-0 = <&mi2s_default>;
+ pinctrl-1 = <&mi2s_idle>;
+ reg = <0x28100000 0x10000>;
+ reg-names = "lpass-lpaif";
+ qcom,adsp = <&adsp>;
+};
diff --git a/Documentation/devicetree/bindings/sound/renesas,fsi.txt b/Documentation/devicetree/bindings/sound/renesas,fsi.txt
new file mode 100644
index 000000000..0d0ab5110
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/renesas,fsi.txt
@@ -0,0 +1,31 @@
+Renesas FSI
+
+Required properties:
+- compatible : "renesas,fsi2-<soctype>",
+ "renesas,sh_fsi2" or "renesas,sh_fsi" as
+ fallback.
+ Examples with soctypes are:
+ - "renesas,fsi2-r8a7740" (R-Mobile A1)
+ - "renesas,fsi2-sh73a0" (SH-Mobile AG5)
+- reg : Should contain the register physical address and length
+- interrupts : Should contain FSI interrupt
+
+- fsia,spdif-connection : FSI is connected by S/PDIF
+- fsia,stream-mode-support : FSI supports 16bit stream mode.
+- fsia,use-internal-clock : FSI uses internal clock when master mode.
+
+- fsib,spdif-connection : same as fsia
+- fsib,stream-mode-support : same as fsia
+- fsib,use-internal-clock : same as fsia
+
+Example:
+
+sh_fsi2: sh_fsi2@0xec230000 {
+ compatible = "renesas,sh_fsi2";
+ reg = <0xec230000 0x400>;
+ interrupts = <0 146 0x4>;
+
+ fsia,spdif-connection;
+ fsia,stream-mode-support;
+ fsia,use-internal-clock;
+};
diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.txt b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
new file mode 100644
index 000000000..f316ce1f2
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.txt
@@ -0,0 +1,216 @@
+Renesas R-Car sound
+
+Required properties:
+- compatible : "renesas,rcar_sound-<soctype>", fallbacks
+ "renesas,rcar_sound-gen1" if generation1, and
+ "renesas,rcar_sound-gen2" if generation2
+ Examples with soctypes are:
+ - "renesas,rcar_sound-r8a7790" (R-Car H2)
+ - "renesas,rcar_sound-r8a7791" (R-Car M2-W)
+- reg : Should contain the register physical address.
+ required register is
+ SRU/ADG/SSI if generation1
+ SRU/ADG/SSIU/SSI if generation2
+- rcar_sound,ssi : Should contain SSI feature.
+ The number of SSI subnode should be same as HW.
+ see below for detail.
+- rcar_sound,src : Should contain SRC feature.
+ The number of SRC subnode should be same as HW.
+ see below for detail.
+- rcar_sound,dvc : Should contain DVC feature.
+ The number of DVC subnode should be same as HW.
+ see below for detail.
+- rcar_sound,dai : DAI contents.
+ The number of DAI subnode should be same as HW.
+ see below for detail.
+
+SSI subnode properties:
+- interrupts : Should contain SSI interrupt for PIO transfer
+- shared-pin : if shared clock pin
+- pio-transfer : use PIO transfer mode
+- no-busif : BUSIF is not ussed when [mem -> SSI] via DMA case
+- dma : Should contain Audio DMAC entry
+- dma-names : SSI case "rx" (=playback), "tx" (=capture)
+ SSIU case "rxu" (=playback), "txu" (=capture)
+
+SRC subnode properties:
+- dma : Should contain Audio DMAC entry
+- dma-names : "rx" (=playback), "tx" (=capture)
+
+DVC subnode properties:
+- dma : Should contain Audio DMAC entry
+- dma-names : "tx" (=playback/capture)
+
+DAI subnode properties:
+- playback : list of playback modules
+- capture : list of capture modules
+
+Example:
+
+rcar_sound: rcar_sound@ec500000 {
+ #sound-dai-cells = <1>;
+ compatible = "renesas,rcar_sound-r8a7791", "renesas,rcar_sound-gen2";
+ reg = <0 0xec500000 0 0x1000>, /* SCU */
+ <0 0xec5a0000 0 0x100>, /* ADG */
+ <0 0xec540000 0 0x1000>, /* SSIU */
+ <0 0xec541000 0 0x1280>, /* SSI */
+ <0 0xec740000 0 0x200>; /* Audio DMAC peri peri*/
+ reg-names = "scu", "adg", "ssiu", "ssi", "audmapp";
+
+ clocks = <&mstp10_clks R8A7790_CLK_SSI_ALL>,
+ <&mstp10_clks R8A7790_CLK_SSI9>, <&mstp10_clks R8A7790_CLK_SSI8>,
+ <&mstp10_clks R8A7790_CLK_SSI7>, <&mstp10_clks R8A7790_CLK_SSI6>,
+ <&mstp10_clks R8A7790_CLK_SSI5>, <&mstp10_clks R8A7790_CLK_SSI4>,
+ <&mstp10_clks R8A7790_CLK_SSI3>, <&mstp10_clks R8A7790_CLK_SSI2>,
+ <&mstp10_clks R8A7790_CLK_SSI1>, <&mstp10_clks R8A7790_CLK_SSI0>,
+ <&mstp10_clks R8A7790_CLK_SCU_SRC9>, <&mstp10_clks R8A7790_CLK_SCU_SRC8>,
+ <&mstp10_clks R8A7790_CLK_SCU_SRC7>, <&mstp10_clks R8A7790_CLK_SCU_SRC6>,
+ <&mstp10_clks R8A7790_CLK_SCU_SRC5>, <&mstp10_clks R8A7790_CLK_SCU_SRC4>,
+ <&mstp10_clks R8A7790_CLK_SCU_SRC3>, <&mstp10_clks R8A7790_CLK_SCU_SRC2>,
+ <&mstp10_clks R8A7790_CLK_SCU_SRC1>, <&mstp10_clks R8A7790_CLK_SCU_SRC0>,
+ <&mstp10_clks R8A7790_CLK_SCU_DVC0>, <&mstp10_clks R8A7790_CLK_SCU_DVC1>,
+ <&audio_clk_a>, <&audio_clk_b>, <&audio_clk_c>, <&m2_clk>;
+ clock-names = "ssi-all",
+ "ssi.9", "ssi.8", "ssi.7", "ssi.6", "ssi.5",
+ "ssi.4", "ssi.3", "ssi.2", "ssi.1", "ssi.0",
+ "src.9", "src.8", "src.7", "src.6", "src.5",
+ "src.4", "src.3", "src.2", "src.1", "src.0",
+ "dvc.0", "dvc.1",
+ "clk_a", "clk_b", "clk_c", "clk_i";
+
+ rcar_sound,dvc {
+ dvc0: dvc@0 {
+ dmas = <&audma0 0xbc>;
+ dma-names = "tx";
+ };
+ dvc1: dvc@1 {
+ dmas = <&audma0 0xbe>;
+ dma-names = "tx";
+ };
+ };
+
+ rcar_sound,src {
+ src0: src@0 {
+ interrupts = <0 352 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x85>, <&audma1 0x9a>;
+ dma-names = "rx", "tx";
+ };
+ src1: src@1 {
+ interrupts = <0 353 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x87>, <&audma1 0x9c>;
+ dma-names = "rx", "tx";
+ };
+ src2: src@2 {
+ interrupts = <0 354 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x89>, <&audma1 0x9e>;
+ dma-names = "rx", "tx";
+ };
+ src3: src@3 {
+ interrupts = <0 355 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x8b>, <&audma1 0xa0>;
+ dma-names = "rx", "tx";
+ };
+ src4: src@4 {
+ interrupts = <0 356 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x8d>, <&audma1 0xb0>;
+ dma-names = "rx", "tx";
+ };
+ src5: src@5 {
+ interrupts = <0 357 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x8f>, <&audma1 0xb2>;
+ dma-names = "rx", "tx";
+ };
+ src6: src@6 {
+ interrupts = <0 358 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x91>, <&audma1 0xb4>;
+ dma-names = "rx", "tx";
+ };
+ src7: src@7 {
+ interrupts = <0 359 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x93>, <&audma1 0xb6>;
+ dma-names = "rx", "tx";
+ };
+ src8: src@8 {
+ interrupts = <0 360 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x95>, <&audma1 0xb8>;
+ dma-names = "rx", "tx";
+ };
+ src9: src@9 {
+ interrupts = <0 361 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x97>, <&audma1 0xba>;
+ dma-names = "rx", "tx";
+ };
+ };
+
+ rcar_sound,ssi {
+ ssi0: ssi@0 {
+ interrupts = <0 370 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x01>, <&audma1 0x02>, <&audma0 0x15>, <&audma1 0x16>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi1: ssi@1 {
+ interrupts = <0 371 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x03>, <&audma1 0x04>, <&audma0 0x49>, <&audma1 0x4a>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi2: ssi@2 {
+ interrupts = <0 372 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x05>, <&audma1 0x06>, <&audma0 0x63>, <&audma1 0x64>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi3: ssi@3 {
+ interrupts = <0 373 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x07>, <&audma1 0x08>, <&audma0 0x6f>, <&audma1 0x70>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi4: ssi@4 {
+ interrupts = <0 374 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x09>, <&audma1 0x0a>, <&audma0 0x71>, <&audma1 0x72>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi5: ssi@5 {
+ interrupts = <0 375 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x0b>, <&audma1 0x0c>, <&audma0 0x73>, <&audma1 0x74>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi6: ssi@6 {
+ interrupts = <0 376 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x0d>, <&audma1 0x0e>, <&audma0 0x75>, <&audma1 0x76>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi7: ssi@7 {
+ interrupts = <0 377 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x0f>, <&audma1 0x10>, <&audma0 0x79>, <&audma1 0x7a>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi8: ssi@8 {
+ interrupts = <0 378 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x11>, <&audma1 0x12>, <&audma0 0x7b>, <&audma1 0x7c>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ ssi9: ssi@9 {
+ interrupts = <0 379 IRQ_TYPE_LEVEL_HIGH>;
+ dmas = <&audma0 0x13>, <&audma1 0x14>, <&audma0 0x7d>, <&audma1 0x7e>;
+ dma-names = "rx", "tx", "rxu", "txu";
+ };
+ };
+
+ rcar_sound,dai {
+ dai0 {
+ playback = <&ssi5 &src5>;
+ capture = <&ssi6>;
+ };
+ dai1 {
+ playback = <&ssi3>;
+ };
+ dai2 {
+ capture = <&ssi4>;
+ };
+ dai3 {
+ playback = <&ssi7>;
+ };
+ dai4 {
+ capture = <&ssi8>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/sound/renesas,rsrc-card.txt b/Documentation/devicetree/bindings/sound/renesas,rsrc-card.txt
new file mode 100644
index 000000000..c64155027
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/renesas,rsrc-card.txt
@@ -0,0 +1,67 @@
+Renesas Sampling Rate Convert Sound Card:
+
+Renesas Sampling Rate Convert Sound Card specifies audio DAI connections of SoC <-> codec.
+
+Required properties:
+
+- compatible : "renesas,rsrc-card,<board>"
+ Examples with soctypes are:
+ - "renesas,rsrc-card,lager"
+ - "renesas,rsrc-card,koelsch"
+Optional properties:
+
+- card_name : User specified audio sound card name, one string
+ property.
+- cpu : CPU sub-node
+- codec : CODEC sub-node
+
+Optional subnode properties:
+
+- format : CPU/CODEC common audio format.
+ "i2s", "right_j", "left_j" , "dsp_a"
+ "dsp_b", "ac97", "pdm", "msb", "lsb"
+- frame-master : Indicates dai-link frame master.
+ phandle to a cpu or codec subnode.
+- bitclock-master : Indicates dai-link bit clock master.
+ phandle to a cpu or codec subnode.
+- bitclock-inversion : bool property. Add this if the
+ dai-link uses bit clock inversion.
+- frame-inversion : bool property. Add this if the
+ dai-link uses frame clock inversion.
+- convert-rate : platform specified sampling rate convert
+
+Required CPU/CODEC subnodes properties:
+
+- sound-dai : phandle and port of CPU/CODEC
+
+Optional CPU/CODEC subnodes properties:
+
+- clocks / system-clock-frequency : specify subnode's clock if needed.
+ it can be specified via "clocks" if system has
+ clock node (= common clock), or "system-clock-frequency"
+ (if system doens't support common clock)
+ If a clock is specified, it is
+ enabled with clk_prepare_enable()
+ in dai startup() and disabled with
+ clk_disable_unprepare() in dai
+ shutdown().
+
+Example
+
+sound {
+ compatible = "renesas,rsrc-card,lager";
+
+ card-name = "rsnd-ak4643";
+ format = "left_j";
+ bitclock-master = <&sndcodec>;
+ frame-master = <&sndcodec>;
+
+ sndcpu: cpu {
+ sound-dai = <&rcar_sound>;
+ };
+
+ sndcodec: codec {
+ sound-dai = <&ak4643>;
+ system-clock-frequency = <11289600>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/sound/rockchip-i2s.txt b/Documentation/devicetree/bindings/sound/rockchip-i2s.txt
new file mode 100644
index 000000000..9b82c20b3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rockchip-i2s.txt
@@ -0,0 +1,37 @@
+* Rockchip I2S controller
+
+The I2S bus (Inter-IC sound bus) is a serial link for digital
+audio data transfer between devices in the system.
+
+Required properties:
+
+- compatible: should be one of the followings
+ - "rockchip,rk3066-i2s": for rk3066
+ - "rockchip,rk3188-i2s", "rockchip,rk3066-i2s": for rk3188
+ - "rockchip,rk3288-i2s", "rockchip,rk3066-i2s": for rk3288
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: should contain the I2S interrupt.
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+ Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: should include "tx" and "rx".
+- clocks: a list of phandle + clock-specifer pairs, one for each entry in clock-names.
+- clock-names: should contain followings:
+ - "i2s_hclk": clock for I2S BUS
+ - "i2s_clk" : clock for I2S controller
+
+Example for rk3288 I2S controller:
+
+i2s@ff890000 {
+ compatible = "rockchip,rk3288-i2s", "rockchip,rk3066-i2s";
+ reg = <0xff890000 0x10000>;
+ interrupts = <GIC_SPI 85 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ dmas = <&pdma1 0>, <&pdma1 1>;
+ dma-names = "tx", "rx";
+ clock-names = "i2s_hclk", "i2s_clk";
+ clocks = <&cru HCLK_I2S0>, <&cru SCLK_I2S0>;
+};
diff --git a/Documentation/devicetree/bindings/sound/rt5631.txt b/Documentation/devicetree/bindings/sound/rt5631.txt
new file mode 100644
index 000000000..92b986ca3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt5631.txt
@@ -0,0 +1,48 @@
+ALC5631/RT5631 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "realtek,alc5631" or "realtek,rt5631"
+
+ - reg : the I2C address of the device.
+
+Pins on the device (for linking into audio routes):
+
+ * SPK_OUT_R_P
+ * SPK_OUT_R_N
+ * SPK_OUT_L_P
+ * SPK_OUT_L_N
+ * HP_OUT_L
+ * HP_OUT_R
+ * AUX_OUT2_LP
+ * AUX_OUT2_RN
+ * AUX_OUT1_LP
+ * AUX_OUT1_RN
+ * AUX_IN_L_JD
+ * AUX_IN_R_JD
+ * MONO_IN_P
+ * MONO_IN_N
+ * MIC1_P
+ * MIC1_N
+ * MIC2_P
+ * MIC2_N
+ * MONO_OUT_P
+ * MONO_OUT_N
+ * MICBIAS1
+ * MICBIAS2
+
+Example:
+
+alc5631: alc5631@1a {
+ compatible = "realtek,alc5631";
+ reg = <0x1a>;
+};
+
+or
+
+rt5631: rt5631@1a {
+ compatible = "realtek,rt5631";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/rt5640.txt b/Documentation/devicetree/bindings/sound/rt5640.txt
new file mode 100644
index 000000000..bac4d9ac1
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt5640.txt
@@ -0,0 +1,53 @@
+RT5640/RT5639 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : One of "realtek,rt5640" or "realtek,rt5639".
+
+- reg : The I2C address of the device.
+
+- interrupts : The CODEC's interrupt output.
+
+Optional properties:
+
+- realtek,in1-differential
+- realtek,in2-differential
+ Boolean. Indicate MIC1/2 input are differential, rather than single-ended.
+
+- realtek,ldo1-en-gpios : The GPIO that controls the CODEC's LDO1_EN pin.
+
+Pins on the device (for linking into audio routes) for RT5639/RT5640:
+
+ * DMIC1
+ * DMIC2
+ * MICBIAS1
+ * IN1P
+ * IN1R
+ * IN2P
+ * IN2R
+ * HPOL
+ * HPOR
+ * LOUTL
+ * LOUTR
+ * SPOLP
+ * SPOLN
+ * SPORP
+ * SPORN
+
+Additional pins on the device for RT5640:
+
+ * MONOP
+ * MONON
+
+Example:
+
+rt5640 {
+ compatible = "realtek,rt5640";
+ reg = <0x1c>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(W, 3) GPIO_ACTIVE_HIGH>;
+ realtek,ldo1-en-gpios =
+ <&gpio TEGRA_GPIO(V, 3) GPIO_ACTIVE_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/sound/rt5677.txt b/Documentation/devicetree/bindings/sound/rt5677.txt
new file mode 100644
index 000000000..740ff771a
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/rt5677.txt
@@ -0,0 +1,76 @@
+RT5677 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+- compatible : "realtek,rt5677".
+
+- reg : The I2C address of the device.
+
+- interrupts : The CODEC's interrupt output.
+
+- gpio-controller : Indicates this device is a GPIO controller.
+
+- #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+
+Optional properties:
+
+- realtek,pow-ldo2-gpio : The GPIO that controls the CODEC's POW_LDO2 pin.
+
+- realtek,in1-differential
+- realtek,in2-differential
+- realtek,lout1-differential
+- realtek,lout2-differential
+- realtek,lout3-differential
+ Boolean. Indicate MIC1/2 input and LOUT1/2/3 outputs are differential,
+ rather than single-ended.
+
+- realtek,gpio-config
+ Array of six 8bit elements that configures GPIO.
+ 0 - floating (reset value)
+ 1 - pull down
+ 2 - pull up
+
+- realtek,jd1-gpio
+ Configures GPIO Mic Jack detection 1.
+ Select 0 ~ 3 as OFF, GPIO1, GPIO2 and GPIO3 respectively.
+
+- realtek,jd2-gpio
+- realtek,jd3-gpio
+ Configures GPIO Mic Jack detection 2 and 3.
+ Select 0 ~ 3 as OFF, GPIO4, GPIO5 and GPIO6 respectively.
+
+Pins on the device (for linking into audio routes):
+
+ * IN1P
+ * IN1N
+ * IN2P
+ * IN2N
+ * MICBIAS1
+ * DMIC1
+ * DMIC2
+ * DMIC3
+ * DMIC4
+ * LOUT1
+ * LOUT2
+ * LOUT3
+
+Example:
+
+rt5677 {
+ compatible = "realtek,rt5677";
+ reg = <0x2c>;
+ interrupt-parent = <&gpio>;
+ interrupts = <TEGRA_GPIO(W, 3) GPIO_ACTIVE_HIGH>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ realtek,pow-ldo2-gpio =
+ <&gpio TEGRA_GPIO(V, 3) GPIO_ACTIVE_HIGH>;
+ realtek,in1-differential = "true";
+ realtek,gpio-config = /bits/ 8 <0 0 0 0 0 2>; /* pull up GPIO6 */
+ realtek,jd2-gpio = <3>; /* Enables Jack detection for GPIO6 */
+};
diff --git a/Documentation/devicetree/bindings/sound/samsung,odroidx2-max98090.txt b/Documentation/devicetree/bindings/sound/samsung,odroidx2-max98090.txt
new file mode 100644
index 000000000..9148f7231
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/samsung,odroidx2-max98090.txt
@@ -0,0 +1,35 @@
+Samsung Exynos Odroid X2/U3 audio complex with MAX98090 codec
+
+Required properties:
+ - compatible : "samsung,odroidx2-audio" - for Odroid X2 board,
+ "samsung,odroidu3-audio" - for Odroid U3 board
+ - samsung,model : the user-visible name of this sound complex
+ - samsung,i2s-controller : the phandle of the I2S controller
+ - samsung,audio-codec : the phandle of the MAX98090 audio codec
+ - samsung,audio-routing : a list of the connections between audio
+ components; each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's source;
+ valid names for sources and sinks are the MAX98090's pins (as
+ documented in its binding), and the jacks on the board
+ For Odroid X2:
+ * Headphone Jack
+ * Mic Jack
+ * DMIC
+
+ For Odroid U3:
+ * Headphone Jack
+ * Speakers
+
+Example:
+
+sound {
+ compatible = "samsung,odroidu3-audio";
+ samsung,i2s-controller = <&i2s0>;
+ samsung,audio-codec = <&max98090>;
+ samsung,model = "Odroid-X2";
+ samsung,audio-routing =
+ "Headphone Jack", "HPL",
+ "Headphone Jack", "HPR",
+ "IN1", "Mic Jack",
+ "Mic Jack", "MICBIAS";
+};
diff --git a/Documentation/devicetree/bindings/sound/samsung,smdk-wm8994.txt b/Documentation/devicetree/bindings/sound/samsung,smdk-wm8994.txt
new file mode 100644
index 000000000..4686646fb
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/samsung,smdk-wm8994.txt
@@ -0,0 +1,14 @@
+Samsung SMDK audio complex
+
+Required properties:
+- compatible : "samsung,smdk-wm8994"
+- samsung,i2s-controller: The phandle of the Samsung I2S0 controller
+- samsung,audio-codec: The phandle of the WM8994 audio codec
+Example:
+
+sound {
+ compatible = "samsung,smdk-wm8994";
+
+ samsung,i2s-controller = <&i2s0>;
+ samsung,audio-codec = <&wm8994>;
+};
diff --git a/Documentation/devicetree/bindings/sound/samsung-i2s.txt b/Documentation/devicetree/bindings/sound/samsung-i2s.txt
new file mode 100644
index 000000000..09e0e1859
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/samsung-i2s.txt
@@ -0,0 +1,82 @@
+* Samsung I2S controller
+
+Required SoC Specific Properties:
+
+- compatible : should be one of the following.
+ - samsung,s3c6410-i2s: for 8/16/24bit stereo I2S.
+ - samsung,s5pv210-i2s: for 8/16/24bit multichannel(5.1) I2S with
+ secondary fifo, s/w reset control and internal mux for root clk src.
+ - samsung,exynos5420-i2s: for 8/16/24bit multichannel(5.1) I2S for
+ playback, sterio channel capture, secondary fifo using internal
+ or external dma, s/w reset control, internal mux for root clk src
+ and 7.1 channel TDM support for playback. TDM (Time division multiplexing)
+ is to allow transfer of multiple channel audio data on single data line.
+ - samsung,exynos7-i2s: with all the available features of exynos5 i2s,
+ exynos7 I2S has 7.1 channel TDM support for capture, secondary fifo
+ with only external dma and more no.of root clk sampling frequencies.
+ - samsung,exynos7-i2s1: I2S1 on previous samsung platforms supports
+ stereo channels. exynos7 i2s1 upgraded to 5.1 multichannel with
+ slightly modified bit offsets.
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- dmas: list of DMA controller phandle and DMA request line ordered pairs.
+- dma-names: identifier string for each DMA request line in the dmas property.
+ These strings correspond 1:1 with the ordered pairs in dmas.
+- clocks: Handle to iis clock and RCLK source clk.
+- clock-names:
+ i2s0 uses some base clks from CMU and some are from audio subsystem internal
+ clock controller. The clock names for i2s0 should be "iis", "i2s_opclk0" and
+ "i2s_opclk1" as shown in the example below.
+ i2s1 and i2s2 uses clocks from CMU. The clock names for i2s1 and i2s2 should
+ be "iis" and "i2s_opclk0".
+ "iis" is the i2s bus clock and i2s_opclk0, i2s_opclk1 are sources of the root
+ clk. i2s0 has internal mux to select the source of root clk and i2s1 and i2s2
+ doesn't have any such mux.
+- #clock-cells: should be 1, this property must be present if the I2S device
+ is a clock provider in terms of the common clock bindings, described in
+ ../clock/clock-bindings.txt.
+- clock-output-names: from the common clock bindings, names of the CDCLK
+ I2S output clocks, suggested values are "i2s_cdclk0", "i2s_cdclk1",
+ "i2s_cdclk3" for the I2S0, I2S1, I2S2 devices recpectively.
+
+There are following clocks available at the I2S device nodes:
+ CLK_I2S_CDCLK - the CDCLK (CODECLKO) gate clock,
+ CLK_I2S_RCLK_PSR - the RCLK prescaler divider clock (corresponding to the
+ IISPSR register),
+ CLK_I2S_RCLK_SRC - the RCLKSRC mux clock (corresponding to RCLKSRC bit in
+ IISMOD register).
+
+Refer to the SoC datasheet for availability of the above clocks.
+The CLK_I2S_RCLK_PSR and CLK_I2S_RCLK_SRC clocks are usually only available
+in the IIS Multi Audio Interface (I2S0).
+Note: Old DTs may not have the #clock-cells, clock-output-names properties
+and then not use the I2S node as a clock supplier.
+
+Optional SoC Specific Properties:
+
+- samsung,idma-addr: Internal DMA register base address of the audio
+ sub system(used in secondary sound source).
+- pinctrl-0: Should specify pin control groups used for this controller.
+- pinctrl-names: Should contain only one value - "default".
+
+
+Example:
+
+i2s0: i2s@03830000 {
+ compatible = "samsung,s5pv210-i2s";
+ reg = <0x03830000 0x100>;
+ dmas = <&pdma0 10
+ &pdma0 9
+ &pdma0 8>;
+ dma-names = "tx", "rx", "tx-sec";
+ clocks = <&clock_audss EXYNOS_I2S_BUS>,
+ <&clock_audss EXYNOS_I2S_BUS>,
+ <&clock_audss EXYNOS_SCLK_I2S>;
+ clock-names = "iis", "i2s_opclk0", "i2s_opclk1";
+ #clock-cells;
+ clock-output-names = "i2s_cdclk0";
+ samsung,idma-addr = <0x03000000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2s0_bus>;
+};
diff --git a/Documentation/devicetree/bindings/sound/sgtl5000.txt b/Documentation/devicetree/bindings/sound/sgtl5000.txt
new file mode 100644
index 000000000..0e5e4eb3e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/sgtl5000.txt
@@ -0,0 +1,39 @@
+* Freescale SGTL5000 Stereo Codec
+
+Required properties:
+- compatible : "fsl,sgtl5000".
+
+- reg : the I2C address of the device
+
+- clocks : the clock provider of SYS_MCLK
+
+- micbias-resistor-k-ohms : the bias resistor to be used in kOmhs
+ The resistor can take values of 2k, 4k or 8k.
+ If set to 0 it will be off.
+ If this node is not mentioned or if the value is unknown, then
+ micbias resistor is set to 4K.
+
+- micbias-voltage-m-volts : the bias voltage to be used in mVolts
+ The voltage can take values from 1.25V to 3V by 250mV steps
+ If this node is not mentionned or the value is unknown, then
+ the value is set to 1.25V.
+
+- VDDA-supply : the regulator provider of VDDA
+
+- VDDIO-supply: the regulator provider of VDDIO
+
+Optional properties:
+
+- VDDD-supply : the regulator provider of VDDD
+
+Example:
+
+codec: sgtl5000@0a {
+ compatible = "fsl,sgtl5000";
+ reg = <0x0a>;
+ clocks = <&clks 150>;
+ micbias-resistor-k-ohms = <2>;
+ micbias-voltage-m-volts = <2250>;
+ VDDA-supply = <&reg_3p3v>;
+ VDDIO-supply = <&reg_3p3v>;
+};
diff --git a/Documentation/devicetree/bindings/sound/simple-card.txt b/Documentation/devicetree/bindings/sound/simple-card.txt
new file mode 100644
index 000000000..73bf314f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/simple-card.txt
@@ -0,0 +1,160 @@
+Simple-Card:
+
+Simple-Card specifies audio DAI connections of SoC <-> codec.
+
+Required properties:
+
+- compatible : "simple-audio-card"
+
+Optional properties:
+
+- simple-audio-card,name : User specified audio sound card name, one string
+ property.
+- simple-audio-card,widgets : Please refer to widgets.txt.
+- simple-audio-card,routing : A list of the connections between audio components.
+ Each entry is a pair of strings, the first being the
+ connection's sink, the second being the connection's
+ source.
+- simple-audio-card,mclk-fs : Multiplication factor between stream rate and codec
+ mclk.
+- simple-audio-card,hp-det-gpio : Reference to GPIO that signals when
+ headphones are attached.
+- simple-audio-card,mic-det-gpio : Reference to GPIO that signals when
+ a microphone is attached.
+
+Optional subnodes:
+
+- simple-audio-card,dai-link : Container for dai-link level
+ properties and the CPU and CODEC
+ sub-nodes. This container may be
+ omitted when the card has only one
+ DAI link. See the examples and the
+ section bellow.
+
+Dai-link subnode properties and subnodes:
+
+If dai-link subnode is omitted and the subnode properties are directly
+under "sound"-node the subnode property and subnode names have to be
+prefixed with "simple-audio-card,"-prefix.
+
+Required dai-link subnodes:
+
+- cpu : CPU sub-node
+- codec : CODEC sub-node
+
+Optional dai-link subnode properties:
+
+- format : CPU/CODEC common audio format.
+ "i2s", "right_j", "left_j" , "dsp_a"
+ "dsp_b", "ac97", "pdm", "msb", "lsb"
+- frame-master : Indicates dai-link frame master.
+ phandle to a cpu or codec subnode.
+- bitclock-master : Indicates dai-link bit clock master.
+ phandle to a cpu or codec subnode.
+- bitclock-inversion : bool property. Add this if the
+ dai-link uses bit clock inversion.
+- frame-inversion : bool property. Add this if the
+ dai-link uses frame clock inversion.
+
+For backward compatibility the frame-master and bitclock-master
+properties can be used as booleans in codec subnode to indicate if the
+codec is the dai-link frame or bit clock master. In this case there
+should be no dai-link node, the same properties should not be present
+at sound-node level, and the bitclock-inversion and frame-inversion
+properties should also be placed in the codec node if needed.
+
+Required CPU/CODEC subnodes properties:
+
+- sound-dai : phandle and port of CPU/CODEC
+
+Optional CPU/CODEC subnodes properties:
+
+- dai-tdm-slot-num : Please refer to tdm-slot.txt.
+- dai-tdm-slot-width : Please refer to tdm-slot.txt.
+- clocks / system-clock-frequency : specify subnode's clock if needed.
+ it can be specified via "clocks" if system has
+ clock node (= common clock), or "system-clock-frequency"
+ (if system doens't support common clock)
+ If a clock is specified, it is
+ enabled with clk_prepare_enable()
+ in dai startup() and disabled with
+ clk_disable_unprepare() in dai
+ shutdown().
+
+Example 1 - single DAI link:
+
+sound {
+ compatible = "simple-audio-card";
+ simple-audio-card,name = "VF610-Tower-Sound-Card";
+ simple-audio-card,format = "left_j";
+ simple-audio-card,bitclock-master = <&dailink0_master>;
+ simple-audio-card,frame-master = <&dailink0_master>;
+ simple-audio-card,widgets =
+ "Microphone", "Microphone Jack",
+ "Headphone", "Headphone Jack",
+ "Speaker", "External Speaker";
+ simple-audio-card,routing =
+ "MIC_IN", "Microphone Jack",
+ "Headphone Jack", "HP_OUT",
+ "External Speaker", "LINE_OUT";
+
+ simple-audio-card,cpu {
+ sound-dai = <&sh_fsi2 0>;
+ };
+
+ dailink0_master: simple-audio-card,codec {
+ sound-dai = <&ak4648>;
+ clocks = <&osc>;
+ };
+};
+
+&i2c0 {
+ ak4648: ak4648@12 {
+ #sound-dai-cells = <0>;
+ compatible = "asahi-kasei,ak4648";
+ reg = <0x12>;
+ };
+};
+
+sh_fsi2: sh_fsi2@ec230000 {
+ #sound-dai-cells = <1>;
+ compatible = "renesas,sh_fsi2";
+ reg = <0xec230000 0x400>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 146 0x4>;
+};
+
+Example 2 - many DAI links:
+
+sound {
+ compatible = "simple-audio-card";
+ simple-audio-card,name = "Cubox Audio";
+
+ simple-audio-card,dai-link@0 { /* I2S - HDMI */
+ format = "i2s";
+ cpu {
+ sound-dai = <&audio1 0>;
+ };
+ codec {
+ sound-dai = <&tda998x 0>;
+ };
+ };
+
+ simple-audio-card,dai-link@1 { /* S/PDIF - HDMI */
+ cpu {
+ sound-dai = <&audio1 1>;
+ };
+ codec {
+ sound-dai = <&tda998x 1>;
+ };
+ };
+
+ simple-audio-card,dai-link@2 { /* S/PDIF - S/PDIF */
+ cpu {
+ sound-dai = <&audio1 1>;
+ };
+ codec {
+ sound-dai = <&spdif_codec>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/sound/sirf-audio-codec.txt b/Documentation/devicetree/bindings/sound/sirf-audio-codec.txt
new file mode 100644
index 000000000..062f5ec36
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/sirf-audio-codec.txt
@@ -0,0 +1,17 @@
+SiRF internal audio CODEC
+
+Required properties:
+
+ - compatible : "sirf,atlas6-audio-codec" or "sirf,prima2-audio-codec"
+
+ - reg : the register address of the device.
+
+ - clocks: the clock of SiRF internal audio codec
+
+Example:
+
+audiocodec: audiocodec@b0040000 {
+ compatible = "sirf,atlas6-audio-codec";
+ reg = <0xb0040000 0x10000>;
+ clocks = <&clks 27>;
+};
diff --git a/Documentation/devicetree/bindings/sound/sirf-audio-port.txt b/Documentation/devicetree/bindings/sound/sirf-audio-port.txt
new file mode 100644
index 000000000..1f66de3c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/sirf-audio-port.txt
@@ -0,0 +1,20 @@
+* SiRF SoC audio port
+
+Required properties:
+- compatible: "sirf,audio-port"
+- reg: Base address and size entries:
+- dmas: List of DMA controller phandle and DMA request line ordered pairs.
+- dma-names: Identifier string for each DMA request line in the dmas property.
+ These strings correspond 1:1 with the ordered pairs in dmas.
+
+ One of the DMA channels will be responsible for transmission (should be
+ named "tx") and one for reception (should be named "rx").
+
+Example:
+
+audioport: audioport@b0040000 {
+ compatible = "sirf,audio-port";
+ reg = <0xb0040000 0x10000>;
+ dmas = <&dmac1 3>, <&dmac1 8>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/sound/sirf-audio.txt b/Documentation/devicetree/bindings/sound/sirf-audio.txt
new file mode 100644
index 000000000..c88882ca3
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/sirf-audio.txt
@@ -0,0 +1,41 @@
+* SiRF atlas6 and prima2 internal audio codec and port based audio setups
+
+Required properties:
+- compatible: "sirf,sirf-audio-card"
+- sirf,audio-platform: phandle for the platform node
+- sirf,audio-codec: phandle for the SiRF internal codec node
+
+Optional properties:
+- hp-pa-gpios: Need to be present if the board need control external
+ headphone amplifier.
+- spk-pa-gpios: Need to be present if the board need control external
+ speaker amplifier.
+- hp-switch-gpios: Need to be present if the board capable to detect jack
+ insertion, removal.
+
+Available audio endpoints for the audio-routing table:
+
+Board connectors:
+ * Headset Stereophone
+ * Ext Spk
+ * Line In
+ * Mic
+
+SiRF internal audio codec pins:
+ * HPOUTL
+ * HPOUTR
+ * SPKOUT
+ * Ext Mic
+ * Mic Bias
+
+Example:
+
+sound {
+ compatible = "sirf,sirf-audio-card";
+ sirf,audio-codec = <&audiocodec>;
+ sirf,audio-platform = <&audioport>;
+ hp-pa-gpios = <&gpio 44 0>;
+ spk-pa-gpios = <&gpio 46 0>;
+ hp-switch-gpios = <&gpio 45 0>;
+};
+
diff --git a/Documentation/devicetree/bindings/sound/sirf-usp.txt b/Documentation/devicetree/bindings/sound/sirf-usp.txt
new file mode 100644
index 000000000..02f85b32d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/sirf-usp.txt
@@ -0,0 +1,27 @@
+* SiRF SoC USP module
+
+Required properties:
+- compatible: "sirf,prima2-usp-pcm"
+- reg: Base address and size entries:
+- dmas: List of DMA controller phandle and DMA request line ordered pairs.
+- dma-names: Identifier string for each DMA request line in the dmas property.
+ These strings correspond 1:1 with the ordered pairs in dmas.
+
+ One of the DMA channels will be responsible for transmission (should be
+ named "tx") and one for reception (should be named "rx").
+
+- clocks: USP controller clock source
+- pinctrl-names: Must contain a "default" entry.
+- pinctrl-NNN: One property must exist for each entry in pinctrl-names.
+
+Example:
+usp0: usp@b0080000 {
+ compatible = "sirf,prima2-usp-pcm";
+ reg = <0xb0080000 0x10000>;
+ clocks = <&clks 28>;
+ dmas = <&dmac1 1>, <&dmac1 2>;
+ dma-names = "rx", "tx";
+ pinctrl-names = "default";
+ pinctrl-0 = <&usp0_only_utfs_pins_a>;
+};
+
diff --git a/Documentation/devicetree/bindings/sound/snow.txt b/Documentation/devicetree/bindings/sound/snow.txt
new file mode 100644
index 000000000..6df74f156
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/snow.txt
@@ -0,0 +1,22 @@
+Audio Binding for Snow boards
+
+Required properties:
+- compatible : Can be one of the following,
+ "google,snow-audio-max98090" or
+ "google,snow-audio-max98091" or
+ "google,snow-audio-max98095"
+- samsung,i2s-controller: The phandle of the Samsung I2S controller
+- samsung,audio-codec: The phandle of the audio codec
+
+Optional:
+- samsung,model: The name of the sound-card
+
+Example:
+
+sound {
+ compatible = "google,snow-audio-max98095";
+
+ samsung,model = "Snow-I2S-MAX98095";
+ samsung,i2s-controller = <&i2s0>;
+ samsung,audio-codec = <&max98095>;
+};
diff --git a/Documentation/devicetree/bindings/sound/soc-ac97link.txt b/Documentation/devicetree/bindings/sound/soc-ac97link.txt
new file mode 100644
index 000000000..80152a87f
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/soc-ac97link.txt
@@ -0,0 +1,28 @@
+AC97 link bindings
+
+These bindings can be included within any other device node.
+
+Required properties:
+ - pinctrl-names: Has to contain following states to setup the correct
+ pinmuxing for the used gpios:
+ "ac97-running": AC97-link is active
+ "ac97-reset": AC97-link reset state
+ "ac97-warm-reset": AC97-link warm reset state
+ - ac97-gpios: List of gpio phandles with args in the order ac97-sync,
+ ac97-sdata, ac97-reset
+
+
+Example:
+
+ssi {
+ ...
+
+ pinctrl-names = "default", "ac97-running", "ac97-reset", "ac97-warm-reset";
+ pinctrl-0 = <&ac97link_running>;
+ pinctrl-1 = <&ac97link_running>;
+ pinctrl-2 = <&ac97link_reset>;
+ pinctrl-3 = <&ac97link_warm_reset>;
+ ac97-gpios = <&gpio3 20 0 &gpio3 22 0 &gpio3 28 0>;
+
+ ...
+};
diff --git a/Documentation/devicetree/bindings/sound/spdif-receiver.txt b/Documentation/devicetree/bindings/sound/spdif-receiver.txt
new file mode 100644
index 000000000..80f807bf8
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/spdif-receiver.txt
@@ -0,0 +1,10 @@
+Device-Tree bindings for dummy spdif receiver
+
+Required properties:
+ - compatible: should be "linux,spdif-dir".
+
+Example node:
+
+ codec: spdif-receiver {
+ compatible = "linux,spdif-dir";
+ };
diff --git a/Documentation/devicetree/bindings/sound/spdif-transmitter.txt b/Documentation/devicetree/bindings/sound/spdif-transmitter.txt
new file mode 100644
index 000000000..55a85841d
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/spdif-transmitter.txt
@@ -0,0 +1,10 @@
+Device-Tree bindings for dummy spdif transmitter
+
+Required properties:
+ - compatible: should be "linux,spdif-dit".
+
+Example node:
+
+ codec: spdif-transmitter {
+ compatible = "linux,spdif-dit";
+ };
diff --git a/Documentation/devicetree/bindings/sound/ssm2518.txt b/Documentation/devicetree/bindings/sound/ssm2518.txt
new file mode 100644
index 000000000..59381a778
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ssm2518.txt
@@ -0,0 +1,20 @@
+SSM2518 audio amplifier
+
+This device supports I2C only.
+
+Required properties:
+ - compatible : Must be "adi,ssm2518"
+ - reg : the I2C address of the device. This will either be 0x34 (ADDR pin low)
+ or 0x35 (ADDR pin high)
+
+Optional properties:
+ - gpios : GPIO connected to the nSD pin. If the property is not present it is
+ assumed that the nSD pin is hardwired to always on.
+
+Example:
+
+ ssm2518: ssm2518@34 {
+ compatible = "adi,ssm2518";
+ reg = <0x34>;
+ gpios = <&gpio 5 0>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/ssm4567.txt b/Documentation/devicetree/bindings/sound/ssm4567.txt
new file mode 100644
index 000000000..ec3d9e700
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ssm4567.txt
@@ -0,0 +1,15 @@
+Analog Devices SSM4567 audio amplifier
+
+This device supports I2C only.
+
+Required properties:
+ - compatible : Must be "adi,ssm4567"
+ - reg : the I2C address of the device. This will either be 0x34 (LR_SEL/ADDR connected to AGND),
+ 0x35 (LR_SEL/ADDR connected to IOVDD) or 0x36 (LR_SEL/ADDR open).
+
+Example:
+
+ ssm4567: ssm4567@34 {
+ compatible = "adi,ssm4567";
+ reg = <0x34>;
+ };
diff --git a/Documentation/devicetree/bindings/sound/st,sta32x.txt b/Documentation/devicetree/bindings/sound/st,sta32x.txt
new file mode 100644
index 000000000..255de3ae5
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/st,sta32x.txt
@@ -0,0 +1,92 @@
+STA32X audio CODEC
+
+The driver for this device only supports I2C.
+
+Required properties:
+
+ - compatible: "st,sta32x"
+ - reg: the I2C address of the device for I2C
+ - reset-gpios: a GPIO spec for the reset pin. If specified, it will be
+ deasserted before communication to the codec starts.
+
+ - power-down-gpios: a GPIO spec for the power down pin. If specified,
+ it will be deasserted before communication to the codec
+ starts.
+
+ - Vdda-supply: regulator spec, providing 3.3V
+ - Vdd3-supply: regulator spec, providing 3.3V
+ - Vcc-supply: regulator spec, providing 5V - 26V
+
+Optional properties:
+
+ - st,output-conf: number, Selects the output configuration:
+ 0: 2-channel (full-bridge) power, 2-channel data-out
+ 1: 2 (half-bridge). 1 (full-bridge) on-board power
+ 2: 2 Channel (Full-Bridge) Power, 1 Channel FFX
+ 3: 1 Channel Mono-Parallel
+ If parameter is missing, mode 0 will be enabled.
+ This property has to be specified as '/bits/ 8' value.
+
+ - st,ch1-output-mapping: Channel 1 output mapping
+ - st,ch2-output-mapping: Channel 2 output mapping
+ - st,ch3-output-mapping: Channel 3 output mapping
+ 0: Channel 1
+ 1: Channel 2
+ 2: Channel 3
+ If parameter is missing, channel 1 is chosen.
+ This properties have to be specified as '/bits/ 8' values.
+
+ - st,thermal-warning-recover:
+ If present, thermal warning recovery is enabled.
+
+ - st,thermal-warning-adjustment:
+ If present, thermal warning adjustment is enabled.
+
+ - st,fault-detect-recovery:
+ If present, then fault recovery will be enabled.
+
+ - st,drop-compensation-ns: number
+ Only required for "st,ffx-power-output-mode" ==
+ "variable-drop-compensation".
+ Specifies the drop compensation in nanoseconds.
+ The value must be in the range of 0..300, and only
+ multiples of 20 are allowed. Default is 140ns.
+
+ - st,max-power-use-mpcc:
+ If present, then MPCC bits are used for MPC coefficients,
+ otherwise standard MPC coefficients are used.
+
+ - st,max-power-corr:
+ If present, power bridge correction for THD reduction near maximum
+ power output is enabled.
+
+ - st,am-reduction-mode:
+ If present, FFX mode runs in AM reduction mode, otherwise normal
+ FFX mode is used.
+
+ - st,odd-pwm-speed-mode:
+ If present, PWM speed mode run on odd speed mode (341.3 kHz) on all
+ channels. If not present, normal PWM spped mode (384 kHz) will be used.
+
+ - st,invalid-input-detect-mute:
+ If present, automatic invalid input detect mute is enabled.
+
+Example:
+
+codec: sta32x@38 {
+ compatible = "st,sta32x";
+ reg = <0x1c>;
+ reset-gpios = <&gpio1 19 0>;
+ power-down-gpios = <&gpio1 16 0>;
+ st,output-conf = /bits/ 8 <0x3>; // set output to 2-channel
+ // (full-bridge) power,
+ // 2-channel data-out
+ st,ch1-output-mapping = /bits/ 8 <0>; // set channel 1 output ch 1
+ st,ch2-output-mapping = /bits/ 8 <0>; // set channel 2 output ch 1
+ st,ch3-output-mapping = /bits/ 8 <0>; // set channel 3 output ch 1
+ st,max-power-correction; // enables power bridge
+ // correction for THD reduction
+ // near maximum power output
+ st,invalid-input-detect-mute; // mute if no valid digital
+ // audio signal is provided.
+};
diff --git a/Documentation/devicetree/bindings/sound/st,sta350.txt b/Documentation/devicetree/bindings/sound/st,sta350.txt
new file mode 100644
index 000000000..307398ef2
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/st,sta350.txt
@@ -0,0 +1,131 @@
+STA350 audio CODEC
+
+The driver for this device only supports I2C.
+
+Required properties:
+
+ - compatible: "st,sta350"
+ - reg: the I2C address of the device for I2C
+ - reset-gpios: a GPIO spec for the reset pin. If specified, it will be
+ deasserted before communication to the codec starts.
+
+ - power-down-gpios: a GPIO spec for the power down pin. If specified,
+ it will be deasserted before communication to the codec
+ starts.
+
+ - vdd-dig-supply: regulator spec, providing 3.3V
+ - vdd-pll-supply: regulator spec, providing 3.3V
+ - vcc-supply: regulator spec, providing 5V - 26V
+
+Optional properties:
+
+ - st,output-conf: number, Selects the output configuration:
+ 0: 2-channel (full-bridge) power, 2-channel data-out
+ 1: 2 (half-bridge). 1 (full-bridge) on-board power
+ 2: 2 Channel (Full-Bridge) Power, 1 Channel FFX
+ 3: 1 Channel Mono-Parallel
+ If parameter is missing, mode 0 will be enabled.
+ This property has to be specified as '/bits/ 8' value.
+
+ - st,ch1-output-mapping: Channel 1 output mapping
+ - st,ch2-output-mapping: Channel 2 output mapping
+ - st,ch3-output-mapping: Channel 3 output mapping
+ 0: Channel 1
+ 1: Channel 2
+ 2: Channel 3
+ If parameter is missing, channel 1 is chosen.
+ This properties have to be specified as '/bits/ 8' values.
+
+ - st,thermal-warning-recover:
+ If present, thermal warning recovery is enabled.
+
+ - st,thermal-warning-adjustment:
+ If present, thermal warning adjustment is enabled.
+
+ - st,fault-detect-recovery:
+ If present, then fault recovery will be enabled.
+
+ - st,ffx-power-output-mode: string
+ The FFX power output mode selects how the FFX output timing is
+ configured. Must be one of these values:
+ - "drop-compensation"
+ - "tapered-compensation"
+ - "full-power-mode"
+ - "variable-drop-compensation" (default)
+
+ - st,drop-compensation-ns: number
+ Only required for "st,ffx-power-output-mode" ==
+ "variable-drop-compensation".
+ Specifies the drop compensation in nanoseconds.
+ The value must be in the range of 0..300, and only
+ multiples of 20 are allowed. Default is 140ns.
+
+ - st,overcurrent-warning-adjustment:
+ If present, overcurrent warning adjustment is enabled.
+
+ - st,max-power-use-mpcc:
+ If present, then MPCC bits are used for MPC coefficients,
+ otherwise standard MPC coefficients are used.
+
+ - st,max-power-corr:
+ If present, power bridge correction for THD reduction near maximum
+ power output is enabled.
+
+ - st,am-reduction-mode:
+ If present, FFX mode runs in AM reduction mode, otherwise normal
+ FFX mode is used.
+
+ - st,odd-pwm-speed-mode:
+ If present, PWM speed mode run on odd speed mode (341.3 kHz) on all
+ channels. If not present, normal PWM spped mode (384 kHz) will be used.
+
+ - st,distortion-compensation:
+ If present, distortion compensation variable uses DCC coefficient.
+ If not present, preset DC coefficient is used.
+
+ - st,invalid-input-detect-mute:
+ If present, automatic invalid input detect mute is enabled.
+
+ - st,activate-mute-output:
+ If present, a mute output will be activated in ase the volume will
+ reach a value lower than -76 dBFS.
+
+ - st,bridge-immediate-off:
+ If present, the bridge will be switched off immediately after the
+ power-down-gpio goes low. Otherwise, the bridge will wait for 13
+ million clock cycles to pass before shutting down.
+
+ - st,noise-shape-dc-cut:
+ If present, the noise-shaping technique on the DC cutoff filter are
+ enabled.
+
+ - st,powerdown-master-volume:
+ If present, the power-down pin and I2C power-down functions will
+ act on the master volume. Otherwise, the functions will act on the
+ mute commands.
+
+ - st,powerdown-delay-divider:
+ If present, the bridge power-down time will be divided by the provided
+ value. If not specified, a divider of 1 will be used. Allowed values
+ are 1, 2, 4, 8, 16, 32, 64 and 128.
+ This property has to be specified as '/bits/ 8' value.
+
+Example:
+
+codec: sta350@38 {
+ compatible = "st,sta350";
+ reg = <0x1c>;
+ reset-gpios = <&gpio1 19 0>;
+ power-down-gpios = <&gpio1 16 0>;
+ st,output-conf = /bits/ 8 <0x3>; // set output to 2-channel
+ // (full-bridge) power,
+ // 2-channel data-out
+ st,ch1-output-mapping = /bits/ 8 <0>; // set channel 1 output ch 1
+ st,ch2-output-mapping = /bits/ 8 <0>; // set channel 2 output ch 1
+ st,ch3-output-mapping = /bits/ 8 <0>; // set channel 3 output ch 1
+ st,max-power-correction; // enables power bridge
+ // correction for THD reduction
+ // near maximum power output
+ st,invalid-input-detect-mute; // mute if no valid digital
+ // audio signal is provided.
+};
diff --git a/Documentation/devicetree/bindings/sound/storm.txt b/Documentation/devicetree/bindings/sound/storm.txt
new file mode 100644
index 000000000..062a4c185
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/storm.txt
@@ -0,0 +1,23 @@
+* Sound complex for Storm boards
+
+Models a soundcard for Storm boards with the Qualcomm Technologies IPQ806x SOC
+connected to a MAX98357A DAC via I2S.
+
+Required properties:
+
+- compatible : "google,storm-audio"
+- cpu : Phandle of the CPU DAI
+- codec : Phandle of the codec DAI
+
+Optional properties:
+
+- qcom,model : The user-visible name of this sound card.
+
+Example:
+
+sound {
+ compatible = "google,storm-audio";
+ qcom,model = "ipq806x-storm";
+ cpu = <&lpass_cpu>;
+ codec = <&max98357a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/tas2552.txt b/Documentation/devicetree/bindings/sound/tas2552.txt
new file mode 100644
index 000000000..55e2a0af5
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tas2552.txt
@@ -0,0 +1,26 @@
+Texas Instruments - tas2552 Codec module
+
+The tas2552 serial control bus communicates through I2C protocols
+
+Required properties:
+ - compatible - One of:
+ "ti,tas2552" - TAS2552
+ - reg - I2C slave address
+ - supply-*: Required supply regulators are:
+ "vbat" battery voltage
+ "iovdd" I/O Voltage
+ "avdd" Analog DAC Voltage
+
+Optional properties:
+ - enable-gpio - gpio pin to enable/disable the device
+
+Example:
+
+tas2552: tas2552@41 {
+ compatible = "ti,tas2552";
+ reg = <0x41>;
+ enable-gpio = <&gpio4 2 GPIO_ACTIVE_HIGH>;
+};
+
+For more product information please see the link below:
+http://www.ti.com/product/TAS2552
diff --git a/Documentation/devicetree/bindings/sound/tdm-slot.txt b/Documentation/devicetree/bindings/sound/tdm-slot.txt
new file mode 100644
index 000000000..6a2c84247
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tdm-slot.txt
@@ -0,0 +1,20 @@
+TDM slot:
+
+This specifies audio DAI's TDM slot.
+
+TDM slot properties:
+dai-tdm-slot-num : Number of slots in use.
+dai-tdm-slot-width : Width in bits for each slot.
+
+For instance:
+ dai-tdm-slot-num = <2>;
+ dai-tdm-slot-width = <8>;
+
+And for each spcified driver, there could be one .of_xlate_tdm_slot_mask()
+to specify a explicit mapping of the channels and the slots. If it's absent
+the default snd_soc_of_xlate_tdm_slot_mask() will be used to generating the
+tx and rx masks.
+
+For snd_soc_of_xlate_tdm_slot_mask(), the tx and rx masks will use a 1 bit
+for an active slot as default, and the default active bits are at the LSB of
+the masks.
diff --git a/Documentation/devicetree/bindings/sound/ti,pcm1681.txt b/Documentation/devicetree/bindings/sound/ti,pcm1681.txt
new file mode 100644
index 000000000..4df17185a
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ti,pcm1681.txt
@@ -0,0 +1,15 @@
+Texas Instruments PCM1681 8-channel PWM Processor
+
+Required properties:
+
+ - compatible: Should contain "ti,pcm1681".
+ - reg: The i2c address. Should contain <0x4c>.
+
+Examples:
+
+ i2c_bus {
+ pcm1681@4c {
+ compatible = "ti,pcm1681";
+ reg = <0x4c>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/ti,tas5086.txt b/Documentation/devicetree/bindings/sound/ti,tas5086.txt
new file mode 100644
index 000000000..234dad296
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ti,tas5086.txt
@@ -0,0 +1,48 @@
+Texas Instruments TAS5086 6-channel PWM Processor
+
+Required properties:
+
+ - compatible: Should contain "ti,tas5086".
+ - reg: The i2c address. Should contain <0x1b>.
+
+Optional properties:
+
+ - reset-gpio: A GPIO spec to define which pin is connected to the
+ chip's !RESET pin. If specified, the driver will
+ assert a hardware reset at probe time.
+
+ - ti,charge-period: This property should contain the time in microseconds
+ that closely matches the external single-ended
+ split-capacitor charge period. The hardware chip
+ waits for this period of time before starting the
+ PWM signals. This helps reduce pops and clicks.
+
+ When not specified, the hardware default of 1300ms
+ is retained.
+
+ - ti,mid-z-channel-X: Boolean properties, X being a number from 1 to 6.
+ If given, channel X will start with the Mid-Z start
+ sequence, otherwise the default Low-Z scheme is used.
+
+ The correct configuration depends on how the power
+ stages connected to the PWM output pins work. Not all
+ power stages are compatible to Mid-Z - please refer
+ to the datasheets for more details.
+
+ Most systems should not set any of these properties.
+
+ - avdd-supply: Power supply for AVDD, providing 3.3V
+ - dvdd-supply: Power supply for DVDD, providing 3.3V
+
+Examples:
+
+ i2c_bus {
+ tas5086@1b {
+ compatible = "ti,tas5086";
+ reg = <0x1b>;
+ reset-gpio = <&gpio 23 0>;
+ ti,charge-period = <156000>;
+ avdd-supply = <&vdd_3v3_reg>;
+ dvdd-supply = <&vdd_3v3_reg>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
new file mode 100644
index 000000000..eff12be5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
@@ -0,0 +1,61 @@
+Texas Instruments - tlv320aic31xx Codec module
+
+The tlv320aic31xx serial control bus communicates through I2C protocols
+
+Required properties:
+
+- compatible - "string" - One of:
+ "ti,tlv320aic310x" - Generic TLV320AIC31xx with mono speaker amp
+ "ti,tlv320aic311x" - Generic TLV320AIC31xx with stereo speaker amp
+ "ti,tlv320aic3100" - TLV320AIC3100 (mono speaker amp, no MiniDSP)
+ "ti,tlv320aic3110" - TLV320AIC3110 (stereo speaker amp, no MiniDSP)
+ "ti,tlv320aic3120" - TLV320AIC3120 (mono speaker amp, MiniDSP)
+ "ti,tlv320aic3111" - TLV320AIC3111 (stereo speaker amp, MiniDSP)
+
+- reg - <int> - I2C slave address
+- HPVDD-supply, SPRVDD-supply, SPLVDD-supply, AVDD-supply, IOVDD-supply,
+ DVDD-supply : power supplies for the device as covered in
+ Documentation/devicetree/bindings/regulator/regulator.txt
+
+
+Optional properties:
+
+- gpio-reset - gpio pin number used for codec reset
+- ai31xx-micbias-vg - MicBias Voltage setting
+ 1 or MICBIAS_2_0V - MICBIAS output is powered to 2.0V
+ 2 or MICBIAS_2_5V - MICBIAS output is powered to 2.5V
+ 3 or MICBIAS_AVDD - MICBIAS output is connected to AVDD
+ If this node is not mentioned or if the value is unknown, then
+ micbias is set to 2.0V.
+
+CODEC output pins:
+ * HPL
+ * HPR
+ * SPL, devices with stereo speaker amp
+ * SPR, devices with stereo speaker amp
+ * SPK, devices with mono speaker amp
+ * MICBIAS
+
+CODEC input pins:
+ * MIC1LP
+ * MIC1RP
+ * MIC1LM
+
+The pins can be used in referring sound node's audio-routing property.
+
+Example:
+#include <dt-bindings/sound/tlv320aic31xx-micbias.h>
+
+tlv320aic31xx: tlv320aic31xx@18 {
+ compatible = "ti,tlv320aic311x";
+ reg = <0x18>;
+
+ ai31xx-micbias-vg = <MICBIAS_OFF>;
+
+ HPVDD-supply = <&regulator>;
+ SPRVDD-supply = <&regulator>;
+ SPLVDD-supply = <&regulator>;
+ AVDD-supply = <&regulator>;
+ IOVDD-supply = <&regulator>;
+ DVDD-supply = <&regulator>;
+};
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt b/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt
new file mode 100644
index 000000000..5e2741af2
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tlv320aic32x4.txt
@@ -0,0 +1,30 @@
+Texas Instruments - tlv320aic32x4 Codec module
+
+The tlv320aic32x4 serial control bus communicates through I2C protocols
+
+Required properties:
+ - compatible: Should be "ti,tlv320aic32x4"
+ - reg: I2C slave address
+ - supply-*: Required supply regulators are:
+ "iov" - digital IO power supply
+ "ldoin" - LDO power supply
+ "dv" - Digital core power supply
+ "av" - Analog core power supply
+ If you supply ldoin, dv and av are optional. Otherwise they are required
+ See regulator/regulator.txt for more information about the detailed binding
+ format.
+
+Optional properties:
+ - reset-gpios: Reset-GPIO phandle with args as described in gpio/gpio.txt
+ - clocks/clock-names: Clock named 'mclk' for the master clock of the codec.
+ See clock/clock-bindings.txt for information about the detailed format.
+
+
+Example:
+
+codec: tlv320aic32x4@18 {
+ compatible = "ti,tlv320aic32x4";
+ reg = <0x18>;
+ clocks = <&clks 201>;
+ clock-names = "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/tlv320aic3x.txt b/Documentation/devicetree/bindings/sound/tlv320aic3x.txt
new file mode 100644
index 000000000..47a213c41
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tlv320aic3x.txt
@@ -0,0 +1,67 @@
+Texas Instruments - tlv320aic3x Codec module
+
+The tlv320aic3x serial control bus communicates through I2C protocols
+
+Required properties:
+
+- compatible - "string" - One of:
+ "ti,tlv320aic3x" - Generic TLV320AIC3x device
+ "ti,tlv320aic33" - TLV320AIC33
+ "ti,tlv320aic3007" - TLV320AIC3007
+ "ti,tlv320aic3106" - TLV320AIC3106
+ "ti,tlv320aic3104" - TLV320AIC3104
+
+
+- reg - <int> - I2C slave address
+
+
+Optional properties:
+
+- gpio-reset - gpio pin number used for codec reset
+- ai3x-gpio-func - <array of 2 int> - AIC3X_GPIO1 & AIC3X_GPIO2 Functionality
+ - Not supported on tlv320aic3104
+- ai3x-micbias-vg - MicBias Voltage required.
+ 1 - MICBIAS output is powered to 2.0V,
+ 2 - MICBIAS output is powered to 2.5V,
+ 3 - MICBIAS output is connected to AVDD,
+ If this node is not mentioned or if the value is incorrect, then MicBias
+ is powered down.
+- AVDD-supply, IOVDD-supply, DRVDD-supply, DVDD-supply : power supplies for the
+ device as covered in Documentation/devicetree/bindings/regulator/regulator.txt
+
+CODEC output pins:
+ * LLOUT
+ * RLOUT
+ * MONO_LOUT
+ * HPLOUT
+ * HPROUT
+ * HPLCOM
+ * HPRCOM
+
+CODEC input pins for TLV320AIC3104:
+ * MIC2L
+ * MIC2R
+ * LINE1L
+ * LINE1R
+
+CODEC input pins for other compatible codecs:
+ * MIC3L
+ * MIC3R
+ * LINE1L
+ * LINE2L
+ * LINE1R
+ * LINE2R
+
+The pins can be used in referring sound node's audio-routing property.
+
+Example:
+
+tlv320aic3x: tlv320aic3x@1b {
+ compatible = "ti,tlv320aic3x";
+ reg = <0x1b>;
+
+ AVDD-supply = <&regulator>;
+ IOVDD-supply = <&regulator>;
+ DRVDD-supply = <&regulator>;
+ DVDD-supply = <&regulator>;
+};
diff --git a/Documentation/devicetree/bindings/sound/tpa6130a2.txt b/Documentation/devicetree/bindings/sound/tpa6130a2.txt
new file mode 100644
index 000000000..6dfa740e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/tpa6130a2.txt
@@ -0,0 +1,27 @@
+Texas Instruments - tpa6130a2 Codec module
+
+The tpa6130a2 serial control bus communicates through I2C protocols
+
+Required properties:
+
+- compatible - "string" - One of:
+ "ti,tpa6130a2" - TPA6130A2
+ "ti,tpa6140a2" - TPA6140A2
+
+
+- reg - <int> - I2C slave address
+
+- Vdd-supply - <phandle> - power supply regulator
+
+Optional properties:
+
+- power-gpio - gpio pin to power the device
+
+Example:
+
+tpa6130a2: tpa6130a2@60 {
+ compatible = "ti,tpa6130a2";
+ reg = <0x60>;
+ Vdd-supply = <&vmmc2>;
+ power-gpio = <&gpio4 2 GPIO_ACTIVE_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/sound/ts3a227e.txt b/Documentation/devicetree/bindings/sound/ts3a227e.txt
new file mode 100644
index 000000000..a836881d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ts3a227e.txt
@@ -0,0 +1,31 @@
+Texas Instruments TS3A227E
+Autonomous Audio Accessory Detection and Configuration Switch
+
+The TS3A227E detect headsets of 3-ring and 4-ring standards and
+switches automatically to route the microphone correctly. It also
+handles key press detection in accordance with the Android audio
+headset specification v1.0.
+
+Required properties:
+
+ - compatible: Should contain "ti,ts3a227e".
+ - reg: The i2c address. Should contain <0x3b>.
+ - interrupt-parent: The parent interrupt controller
+ - interrupts: Interrupt number for /INT pin from the 227e
+
+Optional properies:
+ - ti,micbias: Intended MICBIAS voltage (datasheet section 9.6.7).
+ Select 0/1/2/3/4/5/6/7 to specify MACBIAS voltage
+ 2.1V/2.2V/2.3V/2.4V/2.5V/2.6V/2.7V/2.8V
+ Default value is "1" (2.2V).
+
+Examples:
+
+ i2c {
+ ts3a227e@3b {
+ compatible = "ti,ts3a227e";
+ reg = <0x3b>;
+ interrupt-parent = <&gpio>;
+ interrupts = <3 IRQ_TYPE_LEVEL_LOW>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/sound/ux500-mop500.txt b/Documentation/devicetree/bindings/sound/ux500-mop500.txt
new file mode 100644
index 000000000..48e071c96
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ux500-mop500.txt
@@ -0,0 +1,39 @@
+* MOP500 Audio Machine Driver
+
+This node is responsible for linking together all ux500 Audio Driver components.
+
+Required properties:
+ - compatible : "stericsson,snd-soc-mop500"
+
+Non-standard properties:
+ - stericsson,cpu-dai : Phandle to the CPU-side DAI
+ - stericsson,audio-codec : Phandle to the Audio CODEC
+ - stericsson,card-name : Over-ride default card name
+
+Example:
+
+ sound {
+ compatible = "stericsson,snd-soc-mop500";
+
+ stericsson,cpu-dai = <&msp1 &msp3>;
+ stericsson,audio-codec = <&codec>;
+ };
+
+ msp1: msp@80124000 {
+ compatible = "stericsson,ux500-msp-i2s";
+ reg = <0x80124000 0x1000>;
+ interrupts = <0 62 0x4>;
+ v-ape-supply = <&db8500_vape_reg>;
+ };
+
+ msp3: msp@80125000 {
+ compatible = "stericsson,ux500-msp-i2s";
+ reg = <0x80125000 0x1000>;
+ interrupts = <0 62 0x4>;
+ v-ape-supply = <&db8500_vape_reg>;
+ };
+
+ codec: ab8500-codec {
+ compatible = "stericsson,ab8500-codec";
+ stericsson,earpeice-cmv = <950>; /* Units in mV. */
+ };
diff --git a/Documentation/devicetree/bindings/sound/ux500-msp.txt b/Documentation/devicetree/bindings/sound/ux500-msp.txt
new file mode 100644
index 000000000..99acd9c77
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/ux500-msp.txt
@@ -0,0 +1,43 @@
+* ux500 MSP (CPU-side Digital Audio Interface)
+
+Required properties:
+ - compatible :"stericsson,ux500-msp-i2s"
+ - reg : Physical base address and length of the device's registers.
+
+Optional properties:
+ - interrupts : The interrupt output from the device.
+ - interrupt-parent : The parent interrupt controller.
+ - <name>-supply : Phandle to the regulator <name> supply
+
+Example:
+
+ sound {
+ compatible = "stericsson,snd-soc-mop500";
+
+ stericsson,platform-pcm-dma = <&pcm>;
+ stericsson,cpu-dai = <&msp1 &msp3>;
+ stericsson,audio-codec = <&codec>;
+ };
+
+ pcm: ux500-pcm {
+ compatible = "stericsson,ux500-pcm";
+ };
+
+ msp1: msp@80124000 {
+ compatible = "stericsson,ux500-msp-i2s";
+ reg = <0x80124000 0x1000>;
+ interrupts = <0 62 0x4>;
+ v-ape-supply = <&db8500_vape_reg>;
+ };
+
+ msp3: msp@80125000 {
+ compatible = "stericsson,ux500-msp-i2s";
+ reg = <0x80125000 0x1000>;
+ interrupts = <0 62 0x4>;
+ v-ape-supply = <&db8500_vape_reg>;
+ };
+
+ codec: ab8500-codec {
+ compatible = "stericsson,ab8500-codec";
+ stericsson,earpeice-cmv = <950>; /* Units in mV. */
+ };
diff --git a/Documentation/devicetree/bindings/sound/widgets.txt b/Documentation/devicetree/bindings/sound/widgets.txt
new file mode 100644
index 000000000..b6de5ba3b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/widgets.txt
@@ -0,0 +1,20 @@
+Widgets:
+
+This mainly specifies audio off-codec DAPM widgets.
+
+Each entry is a pair of strings in DT:
+
+ "template-wname", "user-supplied-wname"
+
+The "template-wname" being the template widget name and currently includes:
+"Microphone", "Line", "Headphone" and "Speaker".
+
+The "user-supplied-wname" being the user specified widget name.
+
+For instance:
+ simple-audio-widgets =
+ "Microphone", "Microphone Jack",
+ "Line", "Line In Jack",
+ "Line", "Line Out Jack",
+ "Headphone", "Headphone Jack",
+ "Speaker", "Speaker External";
diff --git a/Documentation/devicetree/bindings/sound/wm8510.txt b/Documentation/devicetree/bindings/sound/wm8510.txt
new file mode 100644
index 000000000..fa1a32b85
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8510.txt
@@ -0,0 +1,18 @@
+WM8510 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8510"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8510@1a {
+ compatible = "wlf,wm8510";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8523.txt b/Documentation/devicetree/bindings/sound/wm8523.txt
new file mode 100644
index 000000000..04746186b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8523.txt
@@ -0,0 +1,16 @@
+WM8523 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "wlf,wm8523"
+
+ - reg : the I2C address of the device.
+
+Example:
+
+codec: wm8523@1a {
+ compatible = "wlf,wm8523";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8580.txt b/Documentation/devicetree/bindings/sound/wm8580.txt
new file mode 100644
index 000000000..7d9821f34
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8580.txt
@@ -0,0 +1,16 @@
+WM8580 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "wlf,wm8580"
+
+ - reg : the I2C address of the device.
+
+Example:
+
+codec: wm8580@1a {
+ compatible = "wlf,wm8580";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8711.txt b/Documentation/devicetree/bindings/sound/wm8711.txt
new file mode 100644
index 000000000..8ed9998cd
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8711.txt
@@ -0,0 +1,18 @@
+WM8711 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8711"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8711@1a {
+ compatible = "wlf,wm8711";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8728.txt b/Documentation/devicetree/bindings/sound/wm8728.txt
new file mode 100644
index 000000000..a8b5c3668
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8728.txt
@@ -0,0 +1,18 @@
+WM8728 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8728"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8728@1a {
+ compatible = "wlf,wm8728";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8731.txt b/Documentation/devicetree/bindings/sound/wm8731.txt
new file mode 100644
index 000000000..236690e99
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8731.txt
@@ -0,0 +1,27 @@
+WM8731 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8731"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8731@1a {
+ compatible = "wlf,wm8731";
+ reg = <0x1a>;
+};
+
+Available audio endpoints for an audio-routing table:
+ * LOUT: Left Channel Line Output
+ * ROUT: Right Channel Line Output
+ * LHPOUT: Left Channel Headphone Output
+ * RHPOUT: Right Channel Headphone Output
+ * LLINEIN: Left Channel Line Input
+ * RLINEIN: Right Channel Line Input
+ * MICIN: Microphone Input
diff --git a/Documentation/devicetree/bindings/sound/wm8737.txt b/Documentation/devicetree/bindings/sound/wm8737.txt
new file mode 100644
index 000000000..4bc2cea3b
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8737.txt
@@ -0,0 +1,18 @@
+WM8737 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8737"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8737@1a {
+ compatible = "wlf,wm8737";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8741.txt b/Documentation/devicetree/bindings/sound/wm8741.txt
new file mode 100644
index 000000000..74bda58c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8741.txt
@@ -0,0 +1,18 @@
+WM8741 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8741"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8741@1a {
+ compatible = "wlf,wm8741";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8750.txt b/Documentation/devicetree/bindings/sound/wm8750.txt
new file mode 100644
index 000000000..8db239fd5
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8750.txt
@@ -0,0 +1,18 @@
+WM8750 and WM8987 audio CODECs
+
+These devices support both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8750" or "wlf,wm8987"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8750@1a {
+ compatible = "wlf,wm8750";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8753.txt b/Documentation/devicetree/bindings/sound/wm8753.txt
new file mode 100644
index 000000000..8eee61282
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8753.txt
@@ -0,0 +1,40 @@
+WM8753 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8753"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Pins on the device (for linking into audio routes):
+
+ * LOUT1
+ * LOUT2
+ * ROUT1
+ * ROUT2
+ * MONO1
+ * MONO2
+ * OUT3
+ * OUT4
+ * LINE1
+ * LINE2
+ * RXP
+ * RXN
+ * ACIN
+ * ACOP
+ * MIC1N
+ * MIC1
+ * MIC2N
+ * MIC2
+ * Mic Bias
+
+Example:
+
+codec: wm8753@1a {
+ compatible = "wlf,wm8753";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8770.txt b/Documentation/devicetree/bindings/sound/wm8770.txt
new file mode 100644
index 000000000..866e00ca1
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8770.txt
@@ -0,0 +1,16 @@
+WM8770 audio CODEC
+
+This device supports SPI.
+
+Required properties:
+
+ - compatible : "wlf,wm8770"
+
+ - reg : the chip select number.
+
+Example:
+
+codec: wm8770@1 {
+ compatible = "wlf,wm8770";
+ reg = <1>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8776.txt b/Documentation/devicetree/bindings/sound/wm8776.txt
new file mode 100644
index 000000000..3b9ca49ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8776.txt
@@ -0,0 +1,18 @@
+WM8776 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8776"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+Example:
+
+codec: wm8776@1a {
+ compatible = "wlf,wm8776";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8804.txt b/Documentation/devicetree/bindings/sound/wm8804.txt
new file mode 100644
index 000000000..6fd124b16
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8804.txt
@@ -0,0 +1,25 @@
+WM8804 audio CODEC
+
+This device supports both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : "wlf,wm8804"
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+ - PVDD-supply, DVDD-supply : Power supplies for the device, as covered
+ in Documentation/devicetree/bindings/regulator/regulator.txt
+
+Optional properties:
+
+ - wlf,reset-gpio: A GPIO specifier for the GPIO controlling the reset pin
+
+Example:
+
+codec: wm8804@1a {
+ compatible = "wlf,wm8804";
+ reg = <0x1a>;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8903.txt b/Documentation/devicetree/bindings/sound/wm8903.txt
new file mode 100644
index 000000000..94ec32c19
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8903.txt
@@ -0,0 +1,69 @@
+WM8903 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "wlf,wm8903"
+
+ - reg : the I2C address of the device.
+
+ - gpio-controller : Indicates this device is a GPIO controller.
+
+ - #gpio-cells : Should be two. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+
+Optional properties:
+
+ - interrupts : The interrupt line the codec is connected to.
+
+ - micdet-cfg : Default register value for R6 (Mic Bias). If absent, the
+ default is 0.
+
+ - micdet-delay : The debounce delay for microphone detection in mS. If
+ absent, the default is 100.
+
+ - gpio-cfg : A list of GPIO configuration register values. The list must
+ be 5 entries long. If absent, no configuration of these registers is
+ performed. If any entry has the value 0xffffffff, that GPIO's
+ configuration will not be modified.
+
+Pins on the device (for linking into audio routes):
+
+ * IN1L
+ * IN1R
+ * IN2L
+ * IN2R
+ * IN3L
+ * IN3R
+ * DMICDAT
+ * HPOUTL
+ * HPOUTR
+ * LINEOUTL
+ * LINEOUTR
+ * LOP
+ * LON
+ * ROP
+ * RON
+ * MICBIAS
+
+Example:
+
+codec: wm8903@1a {
+ compatible = "wlf,wm8903";
+ reg = <0x1a>;
+ interrupts = < 347 >;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ micdet-cfg = <0>;
+ micdet-delay = <100>;
+ gpio-cfg = <
+ 0x0600 /* DMIC_LR, output */
+ 0x0680 /* DMIC_DAT, input */
+ 0x0000 /* GPIO, output, low */
+ 0x0200 /* Interrupt, output */
+ 0x01a0 /* BCLK, input, active high */
+ >;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8904.txt b/Documentation/devicetree/bindings/sound/wm8904.txt
new file mode 100644
index 000000000..66bf26142
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8904.txt
@@ -0,0 +1,33 @@
+WM8904 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+ - compatible: "wlf,wm8904" or "wlf,wm8912"
+ - reg: the I2C address of the device.
+ - clock-names: "mclk"
+ - clocks: reference to
+ <Documentation/devicetree/bindings/clock/clock-bindings.txt>
+
+Pins on the device (for linking into audio routes):
+
+ * IN1L
+ * IN1R
+ * IN2L
+ * IN2R
+ * IN3L
+ * IN3R
+ * HPOUTL
+ * HPOUTR
+ * LINEOUTL
+ * LINEOUTR
+ * MICBIAS
+
+Examples:
+
+codec: wm8904@1a {
+ compatible = "wlf,wm8904";
+ reg = <0x1a>;
+ clocks = <&pck0>;
+ clock-names = "mclk";
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8960.txt b/Documentation/devicetree/bindings/sound/wm8960.txt
new file mode 100644
index 000000000..2deb8a3da
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8960.txt
@@ -0,0 +1,31 @@
+WM8960 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "wlf,wm8960"
+
+ - reg : the I2C address of the device.
+
+Optional properties:
+ - wlf,shared-lrclk: This is a boolean property. If present, the LRCM bit of
+ R24 (Additional control 2) gets set, indicating that ADCLRC and DACLRC pins
+ will be disabled only when ADC (Left and Right) and DAC (Left and Right)
+ are disabled.
+ When wm8960 works on synchronize mode and DACLRC pin is used to supply
+ frame clock, it will no frame clock for captrue unless enable DAC to enable
+ DACLRC pin. If shared-lrclk is present, no need to enable DAC for captrue.
+
+ - wlf,capless: This is a boolean property. If present, OUT3 pin will be
+ enabled and disabled together with HP_L and HP_R pins in response to jack
+ detect events.
+
+Example:
+
+codec: wm8960@1a {
+ compatible = "wlf,wm8960";
+ reg = <0x1a>;
+
+ wlf,shared-lrclk;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8962.txt b/Documentation/devicetree/bindings/sound/wm8962.txt
new file mode 100644
index 000000000..7f82b59ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8962.txt
@@ -0,0 +1,39 @@
+WM8962 audio CODEC
+
+This device supports I2C only.
+
+Required properties:
+
+ - compatible : "wlf,wm8962"
+
+ - reg : the I2C address of the device.
+
+Optional properties:
+ - spk-mono: This is a boolean property. If present, the SPK_MONO bit
+ of R51 (Class D Control 2) gets set, indicating that the speaker is
+ in mono mode.
+
+ - mic-cfg : Default register value for R48 (Additional Control 4).
+ If absent, the default should be the register default.
+
+ - gpio-cfg : A list of GPIO configuration register values. The list must
+ be 6 entries long. If absent, no configuration of these registers is
+ performed. And note that only the value within [0x0, 0xffff] is valid.
+ Any other value is regarded as setting the GPIO register by its reset
+ value 0x0.
+
+Example:
+
+codec: wm8962@1a {
+ compatible = "wlf,wm8962";
+ reg = <0x1a>;
+
+ gpio-cfg = <
+ 0x0000 /* 0:Default */
+ 0x0000 /* 1:Default */
+ 0x0013 /* 2:FN_DMICCLK */
+ 0x0000 /* 3:Default */
+ 0x8014 /* 4:FN_DMICCDAT */
+ 0x0000 /* 5:Default */
+ >;
+};
diff --git a/Documentation/devicetree/bindings/sound/wm8994.txt b/Documentation/devicetree/bindings/sound/wm8994.txt
new file mode 100644
index 000000000..e045e90a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/sound/wm8994.txt
@@ -0,0 +1,78 @@
+WM1811/WM8994/WM8958 audio CODEC
+
+These devices support both I2C and SPI (configured with pin strapping
+on the board).
+
+Required properties:
+
+ - compatible : One of "wlf,wm1811", "wlf,wm8994" or "wlf,wm8958".
+
+ - reg : the I2C address of the device for I2C, the chip select
+ number for SPI.
+
+ - gpio-controller : Indicates this device is a GPIO controller.
+ - #gpio-cells : Must be 2. The first cell is the pin number and the
+ second cell is used to specify optional parameters (currently unused).
+
+ - AVDD2-supply, DBVDD1-supply, DBVDD2-supply, DBVDD3-supply, CPVDD-supply,
+ SPKVDD1-supply, SPKVDD2-supply : power supplies for the device, as covered
+ in Documentation/devicetree/bindings/regulator/regulator.txt
+
+Optional properties:
+
+ - interrupts : The interrupt line the IRQ signal for the device is
+ connected to. This is optional, if it is not connected then none
+ of the interrupt related properties should be specified.
+ - interrupt-controller : These devices contain interrupt controllers
+ and may provide interrupt services to other devices if they have an
+ interrupt line connected.
+ - interrupt-parent : The parent interrupt controller.
+ - #interrupt-cells: the number of cells to describe an IRQ, this should be 2.
+ The first cell is the IRQ number.
+ The second cell is the flags, encoded as the trigger masks from
+ Documentation/devicetree/bindings/interrupts.txt
+
+ - clocks : A list of up to two phandle and clock specifier pairs
+ - clock-names : A list of clock names sorted in the same order as clocks.
+ Valid clock names are "MCLK1" and "MCLK2".
+
+ - wlf,gpio-cfg : A list of GPIO configuration register values. If absent,
+ no configuration of these registers is performed. If any value is
+ over 0xffff then the register will be left as default. If present 11
+ values must be supplied.
+
+ - wlf,micbias-cfg : Two MICBIAS register values for WM1811 or
+ WM8958. If absent the register defaults will be used.
+
+ - wlf,ldo1ena : GPIO specifier for control of LDO1ENA input to device.
+ - wlf,ldo2ena : GPIO specifier for control of LDO2ENA input to device.
+
+ - wlf,lineout1-se : If present LINEOUT1 is in single ended mode.
+ - wlf,lineout2-se : If present LINEOUT2 is in single ended mode.
+
+ - wlf,lineout1-feedback : If present LINEOUT1 has common mode feedback
+ connected.
+ - wlf,lineout2-feedback : If present LINEOUT2 has common mode feedback
+ connected.
+
+ - wlf,ldoena-always-driven : If present LDOENA is always driven.
+
+Example:
+
+codec: wm8994@1a {
+ compatible = "wlf,wm8994";
+ reg = <0x1a>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ lineout1-se;
+
+ AVDD2-supply = <&regulator>;
+ CPVDD-supply = <&regulator>;
+ DBVDD1-supply = <&regulator>;
+ DBVDD2-supply = <&regulator>;
+ DBVDD3-supply = <&regulator>;
+ SPKVDD1-supply = <&regulator>;
+ SPKVDD2-supply = <&regulator>;
+};
diff --git a/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt b/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt
new file mode 100644
index 000000000..f11f295c8
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/brcm,bcm2835-spi.txt
@@ -0,0 +1,22 @@
+Broadcom BCM2835 SPI0 controller
+
+The BCM2835 contains two forms of SPI master controller, one known simply as
+SPI0, and the other known as the "Universal SPI Master"; part of the
+auxiliary block. This binding applies to the SPI0 controller.
+
+Required properties:
+- compatible: Should be "brcm,bcm2835-spi".
+- reg: Should contain register location and length.
+- interrupts: Should contain interrupt.
+- clocks: The clock feeding the SPI controller.
+
+Example:
+
+spi@20204000 {
+ compatible = "brcm,bcm2835-spi";
+ reg = <0x7e204000 0x1000>;
+ interrupts = <2 22>;
+ clocks = <&clk_spi>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/spi/efm32-spi.txt b/Documentation/devicetree/bindings/spi/efm32-spi.txt
new file mode 100644
index 000000000..750e29aff
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/efm32-spi.txt
@@ -0,0 +1,41 @@
+* Energy Micro EFM32 SPI
+
+Required properties:
+- #address-cells: see spi-bus.txt
+- #size-cells: see spi-bus.txt
+- compatible: should be "energymicro,efm32-spi"
+- reg: Offset and length of the register set for the controller
+- interrupts: pair specifying rx and tx irq
+- clocks: phandle to the spi clock
+- cs-gpios: see spi-bus.txt
+
+Recommended properties :
+- energymicro,location: Value to write to the ROUTE register's LOCATION
+ bitfield to configure the pinmux for the device, see
+ datasheet for values.
+ If this property is not provided, keeping what is
+ already configured in the hardware, so its either the
+ reset default 0 or whatever the bootloader did.
+
+Example:
+
+spi1: spi@0x4000c400 { /* USART1 */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "energymicro,efm32-spi";
+ reg = <0x4000c400 0x400>;
+ interrupts = <15 16>;
+ clocks = <&cmu 20>;
+ cs-gpios = <&gpio 51 1>; // D3
+ energymicro,location = <1>;
+ status = "ok";
+
+ ks8851@0 {
+ compatible = "ks8851";
+ spi-max-frequency = <6000000>;
+ reg = <0>;
+ interrupt-parent = <&boardfpga>;
+ interrupts = <4>;
+ status = "ok";
+ };
+};
diff --git a/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
new file mode 100644
index 000000000..523341a0e
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/fsl-imx-cspi.txt
@@ -0,0 +1,37 @@
+* Freescale (Enhanced) Configurable Serial Peripheral Interface
+ (CSPI/eCSPI) for i.MX
+
+Required properties:
+- compatible :
+ - "fsl,imx1-cspi" for SPI compatible with the one integrated on i.MX1
+ - "fsl,imx21-cspi" for SPI compatible with the one integrated on i.MX21
+ - "fsl,imx27-cspi" for SPI compatible with the one integrated on i.MX27
+ - "fsl,imx31-cspi" for SPI compatible with the one integrated on i.MX31
+ - "fsl,imx35-cspi" for SPI compatible with the one integrated on i.MX35
+ - "fsl,imx51-ecspi" for SPI compatible with the one integrated on i.MX51
+- reg : Offset and length of the register set for the device
+- interrupts : Should contain CSPI/eCSPI interrupt
+- fsl,spi-num-chipselects : Contains the number of the chipselect
+- cs-gpios : Specifies the gpio pins to be used for chipselects.
+- clocks : Clock specifiers for both ipg and per clocks.
+- clock-names : Clock names should include both "ipg" and "per"
+See the clock consumer binding,
+ Documentation/devicetree/bindings/clock/clock-bindings.txt
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+ Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request names should include "tx" and "rx" if present.
+
+Example:
+
+ecspi@70010000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx51-ecspi";
+ reg = <0x70010000 0x4000>;
+ interrupts = <36>;
+ fsl,spi-num-chipselects = <2>;
+ cs-gpios = <&gpio3 24 0>, /* GPIO3_24 */
+ <&gpio3 25 0>; /* GPIO3_25 */
+ dmas = <&sdma 3 7 1>, <&sdma 4 7 2>;
+ dma-names = "rx", "tx";
+};
diff --git a/Documentation/devicetree/bindings/spi/fsl-spi.txt b/Documentation/devicetree/bindings/spi/fsl-spi.txt
new file mode 100644
index 000000000..a23313720
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/fsl-spi.txt
@@ -0,0 +1,60 @@
+* SPI (Serial Peripheral Interface)
+
+Required properties:
+- cell-index : QE SPI subblock index.
+ 0: QE subblock SPI1
+ 1: QE subblock SPI2
+- compatible : should be "fsl,spi" or "aeroflexgaisler,spictrl".
+- mode : the SPI operation mode, it can be "cpu" or "cpu-qe".
+- reg : Offset and length of the register set for the device
+- interrupts : <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and level
+ information for the interrupt. This should be encoded based on
+ the information in section 2) depending on the type of interrupt
+ controller you have.
+- interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+- clock-frequency : input clock frequency to non FSL_SOC cores
+
+Optional properties:
+- gpios : specifies the gpio pins to be used for chipselects.
+ The gpios will be referred to as reg = <index> in the SPI child nodes.
+ If unspecified, a single SPI device without a chip select can be used.
+
+Example:
+ spi@4c0 {
+ cell-index = <0>;
+ compatible = "fsl,spi";
+ reg = <4c0 40>;
+ interrupts = <82 0>;
+ interrupt-parent = <700>;
+ mode = "cpu";
+ gpios = <&gpio 18 1 // device reg=<0>
+ &gpio 19 1>; // device reg=<1>
+ };
+
+
+* eSPI (Enhanced Serial Peripheral Interface)
+
+Required properties:
+- compatible : should be "fsl,mpc8536-espi".
+- reg : Offset and length of the register set for the device.
+- interrupts : should contain eSPI interrupt, the device has one interrupt.
+- fsl,espi-num-chipselects : the number of the chipselect signals.
+
+Optional properties:
+- fsl,csbef: chip select assertion time in bits before frame starts
+- fsl,csaft: chip select negation time in bits after frame ends
+
+Example:
+ spi@110000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mpc8536-espi";
+ reg = <0x110000 0x1000>;
+ interrupts = <53 0x2>;
+ interrupt-parent = <&mpic>;
+ fsl,espi-num-chipselects = <4>;
+ fsl,csbef = <1>;
+ fsl,csaft = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/spi/mxs-spi.txt b/Documentation/devicetree/bindings/spi/mxs-spi.txt
new file mode 100644
index 000000000..3499b7329
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/mxs-spi.txt
@@ -0,0 +1,26 @@
+* Freescale MX233/MX28 SSP/SPI
+
+Required properties:
+- compatible: Should be "fsl,<soc>-spi", where soc is "imx23" or "imx28"
+- reg: Offset and length of the register set for the device
+- interrupts: Should contain SSP ERROR interrupt
+- dmas: DMA specifier, consisting of a phandle to DMA controller node
+ and SSP DMA channel ID.
+ Refer to dma.txt and fsl-mxs-dma.txt for details.
+- dma-names: Must be "rx-tx".
+
+Optional properties:
+- clock-frequency : Input clock frequency to the SPI block in Hz.
+ Default is 160000000 Hz.
+
+Example:
+
+ssp0: ssp@80010000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,imx28-spi";
+ reg = <0x80010000 0x2000>;
+ interrupts = <96>;
+ dmas = <&dma_apbh 0>;
+ dma-names = "rx-tx";
+};
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
new file mode 100644
index 000000000..b785976fe
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra114-spi.txt
@@ -0,0 +1,42 @@
+NVIDIA Tegra114 SPI controller.
+
+Required properties:
+- compatible : For Tegra114, must contain "nvidia,tegra114-spi".
+ Otherwise, must contain '"nvidia,<chip>-spi", "nvidia,tegra114-spi"' where
+ <chip> is tegra124, tegra132, or tegra210.
+- reg: Should contain SPI registers location and length.
+- interrupts: Should contain SPI interrupts.
+- clock-names : Must include the following entries:
+ - spi
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - spi
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+Example:
+
+spi@7000d600 {
+ compatible = "nvidia,tegra114-spi";
+ reg = <0x7000d600 0x200>;
+ interrupts = <0 82 0x04>;
+ spi-max-frequency = <25000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car 44>;
+ clock-names = "spi";
+ resets = <&tegra_car 44>;
+ reset-names = "spi";
+ dmas = <&apbdma 16>, <&apbdma 16>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
new file mode 100644
index 000000000..bdf08e6de
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-sflash.txt
@@ -0,0 +1,38 @@
+NVIDIA Tegra20 SFLASH controller.
+
+Required properties:
+- compatible : should be "nvidia,tegra20-sflash".
+- reg: Should contain SFLASH registers location and length.
+- interrupts: Should contain SFLASH interrupts.
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - spi
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+spi@7000c380 {
+ compatible = "nvidia,tegra20-sflash";
+ reg = <0x7000c380 0x80>;
+ interrupts = <0 39 0x04>;
+ spi-max-frequency = <25000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car 43>;
+ resets = <&tegra_car 43>;
+ reset-names = "spi";
+ dmas = <&apbdma 11>, <&apbdma 11>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
new file mode 100644
index 000000000..5db9144a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/nvidia,tegra20-slink.txt
@@ -0,0 +1,38 @@
+NVIDIA Tegra20/Tegra30 SLINK controller.
+
+Required properties:
+- compatible : should be "nvidia,tegra20-slink", "nvidia,tegra30-slink".
+- reg: Should contain SLINK registers location and length.
+- interrupts: Should contain SLINK interrupts.
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - spi
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+spi@7000d600 {
+ compatible = "nvidia,tegra20-slink";
+ reg = <0x7000d600 0x200>;
+ interrupts = <0 82 0x04>;
+ spi-max-frequency = <25000000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&tegra_car 44>;
+ resets = <&tegra_car 44>;
+ reset-names = "spi";
+ dmas = <&apbdma 16>, <&apbdma 16>;
+ dma-names = "rx", "tx";
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/spi/omap-spi.txt b/Documentation/devicetree/bindings/spi/omap-spi.txt
new file mode 100644
index 000000000..2ba5f9c02
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/omap-spi.txt
@@ -0,0 +1,47 @@
+OMAP2+ McSPI device
+
+Required properties:
+- compatible :
+ - "ti,omap2-mcspi" for OMAP2 & OMAP3.
+ - "ti,omap4-mcspi" for OMAP4+.
+- ti,spi-num-cs : Number of chipselect supported by the instance.
+- ti,hwmods: Name of the hwmod associated to the McSPI
+- ti,pindir-d0-out-d1-in: Select the D0 pin as output and D1 as
+ input. The default is D0 as input and
+ D1 as output.
+
+Optional properties:
+- dmas: List of DMA specifiers with the controller specific format
+ as described in the generic DMA client binding. A tx and rx
+ specifier is required for each chip select.
+- dma-names: List of DMA request names. These strings correspond
+ 1:1 with the DMA specifiers listed in dmas. The string naming
+ is to be "rxN" and "txN" for RX and TX requests,
+ respectively, where N equals the chip select number.
+
+Examples:
+
+[hwmod populated DMA resources]
+
+mcspi1: mcspi@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,omap4-mcspi";
+ ti,hwmods = "mcspi1";
+ ti,spi-num-cs = <4>;
+};
+
+[generic DMA request binding]
+
+mcspi1: mcspi@1 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,omap4-mcspi";
+ ti,hwmods = "mcspi1";
+ ti,spi-num-cs = <2>;
+ dmas = <&edma 42
+ &edma 43
+ &edma 44
+ &edma 45>;
+ dma-names = "tx0", "rx0", "tx1", "rx1";
+};
diff --git a/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt b/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
new file mode 100644
index 000000000..5c090771c
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/qcom,spi-qup.txt
@@ -0,0 +1,103 @@
+Qualcomm Universal Peripheral (QUP) Serial Peripheral Interface (SPI)
+
+The QUP core is an AHB slave that provides a common data path (an output FIFO
+and an input FIFO) for serial peripheral interface (SPI) mini-core.
+
+SPI in master mode supports up to 50MHz, up to four chip selects, programmable
+data path from 4 bits to 32 bits and numerous protocol variants.
+
+Required properties:
+- compatible: Should contain:
+ "qcom,spi-qup-v1.1.1" for 8660, 8960 and 8064.
+ "qcom,spi-qup-v2.1.1" for 8974 and later
+ "qcom,spi-qup-v2.2.1" for 8974 v2 and later.
+
+- reg: Should contain base register location and length
+- interrupts: Interrupt number used by this controller
+
+- clocks: Should contain the core clock and the AHB clock.
+- clock-names: Should be "core" for the core clock and "iface" for the
+ AHB clock.
+
+- #address-cells: Number of cells required to define a chip select
+ address on the SPI bus. Should be set to 1.
+- #size-cells: Should be zero.
+
+Optional properties:
+- spi-max-frequency: Specifies maximum SPI clock frequency,
+ Units - Hz. Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+- num-cs: total number of chipselects
+- cs-gpios: should specify GPIOs used for chipselects.
+ The gpios will be referred to as reg = <index> in the SPI child
+ nodes. If unspecified, a single SPI device without a chip
+ select can be used.
+
+- dmas: Two DMA channel specifiers following the convention outlined
+ in bindings/dma/dma.txt
+- dma-names: Names for the dma channels, if present. There must be at
+ least one channel named "tx" for transmit and named "rx" for
+ receive.
+
+SPI slave nodes must be children of the SPI master node and can contain
+properties described in Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+ spi_8: spi@f9964000 { /* BLSP2 QUP2 */
+
+ compatible = "qcom,spi-qup-v2";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0xf9964000 0x1000>;
+ interrupts = <0 102 0>;
+ spi-max-frequency = <19200000>;
+
+ clocks = <&gcc GCC_BLSP2_QUP2_SPI_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
+ clock-names = "core", "iface";
+
+ dmas = <&blsp1_bam 13>, <&blsp1_bam 12>;
+ dma-names = "rx", "tx";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi8_default>;
+
+ device@0 {
+ compatible = "arm,pl022-dummy";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0>; /* Chip select 0 */
+ spi-max-frequency = <19200000>;
+ spi-cpol;
+ };
+
+ device@1 {
+ compatible = "arm,pl022-dummy";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <1>; /* Chip select 1 */
+ spi-max-frequency = <9600000>;
+ spi-cpha;
+ };
+
+ device@2 {
+ compatible = "arm,pl022-dummy";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <2>; /* Chip select 2 */
+ spi-max-frequency = <19200000>;
+ spi-cpol;
+ spi-cpha;
+ };
+
+ device@3 {
+ compatible = "arm,pl022-dummy";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <3>; /* Chip select 3 */
+ spi-max-frequency = <19200000>;
+ spi-cpol;
+ spi-cpha;
+ spi-cs-high;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/spi/sh-hspi.txt b/Documentation/devicetree/bindings/spi/sh-hspi.txt
new file mode 100644
index 000000000..319bad4af
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/sh-hspi.txt
@@ -0,0 +1,29 @@
+Renesas HSPI.
+
+Required properties:
+- compatible : "renesas,hspi-<soctype>", "renesas,hspi" as fallback.
+ Examples with soctypes are:
+ - "renesas,hspi-r8a7778" (R-Car M1)
+ - "renesas,hspi-r8a7779" (R-Car H1)
+- reg : Offset and length of the register set for the device
+- interrupt-parent : The phandle for the interrupt controller that
+ services interrupts for this device
+- interrupts : Interrupt specifier
+- #address-cells : Must be <1>
+- #size-cells : Must be <0>
+
+Pinctrl properties might be needed, too. See
+Documentation/devicetree/bindings/pinctrl/renesas,*.
+
+Example:
+
+ hspi0: spi@fffc7000 {
+ compatible = "renesas,hspi-r8a7778", "renesas,hspi";
+ reg = <0xfffc7000 0x18>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 63 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
diff --git a/Documentation/devicetree/bindings/spi/sh-msiof.txt b/Documentation/devicetree/bindings/spi/sh-msiof.txt
new file mode 100644
index 000000000..4c388bb2f
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/sh-msiof.txt
@@ -0,0 +1,71 @@
+Renesas MSIOF spi controller
+
+Required properties:
+- compatible : "renesas,msiof-<soctype>" for SoCs,
+ "renesas,sh-msiof" for SuperH, or
+ "renesas,sh-mobile-msiof" for SH Mobile series.
+ Examples with soctypes are:
+ "renesas,msiof-r8a7790" (R-Car H2)
+ "renesas,msiof-r8a7791" (R-Car M2-W)
+ "renesas,msiof-r8a7792" (R-Car V2H)
+ "renesas,msiof-r8a7793" (R-Car M2-N)
+ "renesas,msiof-r8a7794" (R-Car E2)
+- reg : A list of offsets and lengths of the register sets for
+ the device.
+ If only one register set is present, it is to be used
+ by both the CPU and the DMA engine.
+ If two register sets are present, the first is to be
+ used by the CPU, and the second is to be used by the
+ DMA engine.
+- interrupt-parent : The phandle for the interrupt controller that
+ services interrupts for this device
+- interrupts : Interrupt specifier
+- #address-cells : Must be <1>
+- #size-cells : Must be <0>
+
+Optional properties:
+- clocks : Must contain a reference to the functional clock.
+- num-cs : Total number of chip-selects (default is 1)
+- dmas : Must contain a list of two references to DMA
+ specifiers, one for transmission, and one for
+ reception.
+- dma-names : Must contain a list of two DMA names, "tx" and "rx".
+- renesas,dtdl : delay sync signal (setup) in transmit mode.
+ Must contain one of the following values:
+ 0 (no bit delay)
+ 50 (0.5-clock-cycle delay)
+ 100 (1-clock-cycle delay)
+ 150 (1.5-clock-cycle delay)
+ 200 (2-clock-cycle delay)
+
+- renesas,syncdl : delay sync signal (hold) in transmit mode.
+ Must contain one of the following values:
+ 0 (no bit delay)
+ 50 (0.5-clock-cycle delay)
+ 100 (1-clock-cycle delay)
+ 150 (1.5-clock-cycle delay)
+ 200 (2-clock-cycle delay)
+ 300 (3-clock-cycle delay)
+
+Optional properties, deprecated for soctype-specific bindings:
+- renesas,tx-fifo-size : Overrides the default tx fifo size given in words
+ (default is 64)
+- renesas,rx-fifo-size : Overrides the default rx fifo size given in words
+ (default is 64, or 256 on R-Car Gen2)
+
+Pinctrl properties might be needed, too. See
+Documentation/devicetree/bindings/pinctrl/renesas,*.
+
+Example:
+
+ msiof0: spi@e6e20000 {
+ compatible = "renesas,msiof-r8a7791";
+ reg = <0 0xe6e20000 0 0x0064>, <0 0xe7e20000 0 0x0064>;
+ interrupts = <0 156 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp0_clks R8A7791_CLK_MSIOF0>;
+ dmas = <&dmac0 0x51>, <&dmac0 0x52>;
+ dma-names = "tx", "rx";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
new file mode 100644
index 000000000..bd99193e8
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.txt
@@ -0,0 +1,28 @@
+Synopsys DesignWare AMBA 2.0 Synchronous Serial Interface.
+
+Required properties:
+- compatible : "snps,dw-apb-ssi"
+- reg : The register base for the controller.
+- interrupts : One interrupt, used by the controller.
+- #address-cells : <1>, as required by generic SPI binding.
+- #size-cells : <0>, also as required by generic SPI binding.
+
+Optional properties:
+- cs-gpios : Specifies the gpio pis to be used for chipselects.
+- num-cs : The number of chipselects. If omitted, this will default to 4.
+
+Child nodes as per the generic SPI binding.
+
+Example:
+
+ spi@fff00000 {
+ compatible = "snps,dw-apb-ssi";
+ reg = <0xfff00000 0x1000>;
+ interrupts = <0 154 4>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ num-cs = <2>;
+ cs-gpios = <&gpio0 13 0>,
+ <&gpio0 14 0>;
+ };
+
diff --git a/Documentation/devicetree/bindings/spi/spi-bus.txt b/Documentation/devicetree/bindings/spi/spi-bus.txt
new file mode 100644
index 000000000..bbaa857dd
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-bus.txt
@@ -0,0 +1,94 @@
+SPI (Serial Peripheral Interface) busses
+
+SPI busses can be described with a node for the SPI master device
+and a set of child nodes for each SPI slave on the bus. For this
+discussion, it is assumed that the system's SPI controller is in
+SPI master mode. This binding does not describe SPI controllers
+in slave mode.
+
+The SPI master node requires the following properties:
+- #address-cells - number of cells required to define a chip select
+ address on the SPI bus.
+- #size-cells - should be zero.
+- compatible - name of SPI bus controller following generic names
+ recommended practice.
+- cs-gpios - (optional) gpios chip select.
+No other properties are required in the SPI bus node. It is assumed
+that a driver for an SPI bus device will understand that it is an SPI bus.
+However, the binding does not attempt to define the specific method for
+assigning chip select numbers. Since SPI chip select configuration is
+flexible and non-standardized, it is left out of this binding with the
+assumption that board specific platform code will be used to manage
+chip selects. Individual drivers can define additional properties to
+support describing the chip select layout.
+
+Optional property:
+- num-cs : total number of chipselects
+
+If cs-gpios is used the number of chip select will automatically increased
+with max(cs-gpios > hw cs)
+
+So if for example the controller has 2 CS lines, and the cs-gpios
+property looks like this:
+
+cs-gpios = <&gpio1 0 0> <0> <&gpio1 1 0> <&gpio1 2 0>;
+
+Then it should be configured so that num_chipselect = 4 with the
+following mapping:
+
+cs0 : &gpio1 0 0
+cs1 : native
+cs2 : &gpio1 1 0
+cs3 : &gpio1 2 0
+
+SPI slave nodes must be children of the SPI master node and can
+contain the following properties.
+- reg - (required) chip select address of device.
+- compatible - (required) name of SPI device following generic names
+ recommended practice
+- spi-max-frequency - (required) Maximum SPI clocking speed of device in Hz
+- spi-cpol - (optional) Empty property indicating device requires
+ inverse clock polarity (CPOL) mode
+- spi-cpha - (optional) Empty property indicating device requires
+ shifted clock phase (CPHA) mode
+- spi-cs-high - (optional) Empty property indicating device requires
+ chip select active high
+- spi-3wire - (optional) Empty property indicating device requires
+ 3-wire mode.
+- spi-lsb-first - (optional) Empty property indicating device requires
+ LSB first mode.
+- spi-tx-bus-width - (optional) The bus width(number of data wires) that
+ used for MOSI. Defaults to 1 if not present.
+- spi-rx-bus-width - (optional) The bus width(number of data wires) that
+ used for MISO. Defaults to 1 if not present.
+
+Some SPI controllers and devices support Dual and Quad SPI transfer mode.
+It allows data in the SPI system to be transferred in 2 wires(DUAL) or 4 wires(QUAD).
+Now the value that spi-tx-bus-width and spi-rx-bus-width can receive is
+only 1(SINGLE), 2(DUAL) and 4(QUAD).
+Dual/Quad mode is not allowed when 3-wire mode is used.
+
+If a gpio chipselect is used for the SPI slave the gpio number will be passed
+via the SPI master node cs-gpios property.
+
+SPI example for an MPC5200 SPI bus:
+ spi@f00 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,mpc5200b-spi","fsl,mpc5200-spi";
+ reg = <0xf00 0x20>;
+ interrupts = <2 13 0 2 14 0>;
+ interrupt-parent = <&mpc5200_pic>;
+
+ ethernet-switch@0 {
+ compatible = "micrel,ks8995m";
+ spi-max-frequency = <1000000>;
+ reg = <0>;
+ };
+
+ codec@1 {
+ compatible = "ti,tlv320aic26";
+ spi-max-frequency = <100000>;
+ reg = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-cadence.txt b/Documentation/devicetree/bindings/spi/spi-cadence.txt
new file mode 100644
index 000000000..94f09141a
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-cadence.txt
@@ -0,0 +1,31 @@
+Cadence SPI controller Device Tree Bindings
+-------------------------------------------
+
+Required properties:
+- compatible : Should be "cdns,spi-r1p6" or "xlnx,zynq-spi-r1p6".
+- reg : Physical base address and size of SPI registers map.
+- interrupts : Property with a value describing the interrupt
+ number.
+- interrupt-parent : Must be core interrupt controller
+- clock-names : List of input clock names - "ref_clk", "pclk"
+ (See clock bindings for details).
+- clocks : Clock phandles (see clock bindings for details).
+
+Optional properties:
+- num-cs : Number of chip selects used.
+ If a decoder is used, this will be the number of
+ chip selects after the decoder.
+- is-decoded-cs : Flag to indicate whether decoder is used or not.
+
+Example:
+
+ spi@e0007000 {
+ compatible = "xlnx,zynq-spi-r1p6";
+ clock-names = "ref_clk", "pclk";
+ clocks = <&clkc 26>, <&clkc 35>;
+ interrupt-parent = <&intc>;
+ interrupts = <0 49 4>;
+ num-cs = <4>;
+ is-decoded-cs = <0>;
+ reg = <0xe0007000 0x1000>;
+ } ;
diff --git a/Documentation/devicetree/bindings/spi/spi-davinci.txt b/Documentation/devicetree/bindings/spi/spi-davinci.txt
new file mode 100644
index 000000000..12ecfe9e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-davinci.txt
@@ -0,0 +1,88 @@
+Davinci SPI controller device bindings
+
+Links on DM:
+Keystone 2 - http://www.ti.com/lit/ug/sprugp2a/sprugp2a.pdf
+dm644x - http://www.ti.com/lit/ug/sprue32a/sprue32a.pdf
+OMAP-L138/da830 - http://www.ti.com/lit/ug/spruh77a/spruh77a.pdf
+
+Required properties:
+- #address-cells: number of cells required to define a chip select
+ address on the SPI bus. Should be set to 1.
+- #size-cells: should be zero.
+- compatible:
+ - "ti,dm6441-spi" for SPI used similar to that on DM644x SoC family
+ - "ti,da830-spi" for SPI used similar to that on DA8xx SoC family
+- reg: Offset and length of SPI controller register space
+- num-cs: Number of chip selects. This includes internal as well as
+ GPIO chip selects.
+- ti,davinci-spi-intr-line: interrupt line used to connect the SPI
+ IP to the interrupt controller within the SoC. Possible values
+ are 0 and 1. Manual says one of the two possible interrupt
+ lines can be tied to the interrupt controller. Set this
+ based on a specifc SoC configuration.
+- interrupts: interrupt number mapped to CPU.
+- clocks: spi clk phandle
+
+Optional:
+- cs-gpios: gpio chip selects
+ For example to have 3 internal CS and 2 GPIO CS, user could define
+ cs-gpios = <0>, <0>, <0>, <&gpio1 30 0>, <&gpio1 31 0>;
+ where first three are internal CS and last two are GPIO CS.
+
+Optional properties for slave devices:
+SPI slave nodes can contain the following properties.
+Not all SPI Peripherals from Texas Instruments support this.
+Please check SPI peripheral documentation for a device before using these.
+
+- ti,spi-wdelay : delay between transmission of words
+ (SPIFMTn.WDELAY, SPIDAT1.WDEL) must be specified in number of SPI module
+ clock periods.
+
+ delay = WDELAY * SPI_module_clock_period + 2 * SPI_module_clock_period
+
+Below is timing diagram which shows functional meaning of
+"ti,spi-wdelay" parameter.
+
+ +-+ +-+ +-+ +-+ +-+ +-+ +-+ +-+
+SPI_CLK | | | | | | | | | | | | | | | |
+ +----------+ +-+ +-+ +-+ +-+ +---------------------------+ +-+ +-+ +-
+
+SPI_SOMI/SIMO+-----------------+ +-----------
+ +----------+ word1 +---------------------------+word2
+ +-----------------+ +-----------
+ WDELAY
+ <-------------------------->
+
+Example of a NOR flash slave device (n25q032) connected to DaVinci
+SPI controller device over the SPI bus.
+
+spi0:spi@20BF0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "ti,dm6446-spi";
+ reg = <0x20BF0000 0x1000>;
+ num-cs = <4>;
+ ti,davinci-spi-intr-line = <0>;
+ interrupts = <338>;
+ clocks = <&clkspi>;
+
+ flash: n25q032@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "st,m25p32";
+ spi-max-frequency = <25000000>;
+ reg = <0>;
+ ti,spi-wdelay = <8>;
+
+ partition@0 {
+ label = "u-boot-spl";
+ reg = <0x0 0x80000>;
+ read-only;
+ };
+
+ partition@1 {
+ label = "test";
+ reg = <0x80000 0x380000>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-dw.txt b/Documentation/devicetree/bindings/spi/spi-dw.txt
new file mode 100644
index 000000000..7b63ed601
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-dw.txt
@@ -0,0 +1,24 @@
+Synopsys DesignWare SPI master
+
+Required properties:
+- compatible: should be "snps,designware-spi"
+- #address-cells: see spi-bus.txt
+- #size-cells: see spi-bus.txt
+- reg: address and length of the spi master registers
+- interrupts: should contain one interrupt
+- clocks: spi clock phandle
+- num-cs: see spi-bus.txt
+
+Optional properties:
+- cs-gpios: see spi-bus.txt
+
+Example:
+
+spi: spi@4020a000 {
+ compatible = "snps,designware-spi";
+ interrupts = <11 1>;
+ reg = <0x4020a000 0x1000>;
+ clocks = <&pclk>;
+ num-cs = <2>;
+ cs-gpios = <&banka 0 0>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
new file mode 100644
index 000000000..70af78a91
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-fsl-dspi.txt
@@ -0,0 +1,57 @@
+ARM Freescale DSPI controller
+
+Required properties:
+- compatible : "fsl,vf610-dspi"
+- reg : Offset and length of the register set for the device
+- interrupts : Should contain SPI controller interrupt
+- clocks: from common clock binding: handle to dspi clock.
+- clock-names: from common clock binding: Shall be "dspi".
+- pinctrl-0: pin control group to be used for this controller.
+- pinctrl-names: must contain a "default" entry.
+- spi-num-chipselects : the number of the chipselect signals.
+- bus-num : the slave chip chipselect signal number.
+
+Optional property:
+- big-endian: If present the dspi device's registers are implemented
+ in big endian mode, otherwise in native mode(same with CPU), for more
+ detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
+
+Optional SPI slave node properties:
+- fsl,spi-cs-sck-delay: a delay in nanoseconds between activating chip
+ select and the start of clock signal, at the start of a transfer.
+- fsl,spi-sck-cs-delay: a delay in nanoseconds between stopping the clock
+ signal and deactivating chip select, at the end of a transfer.
+
+Example:
+
+dspi0@4002c000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "fsl,vf610-dspi";
+ reg = <0x4002c000 0x1000>;
+ interrupts = <0 67 0x04>;
+ clocks = <&clks VF610_CLK_DSPI0>;
+ clock-names = "dspi";
+ spi-num-chipselects = <5>;
+ bus-num = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_dspi0_1>;
+ big-endian;
+ status = "okay";
+
+ sflash: at26df081a@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "atmel,at26df081a";
+ spi-max-frequency = <16000000>;
+ spi-cpol;
+ spi-cpha;
+ reg = <0>;
+ linux,modalias = "m25p80";
+ modal = "at26df081a";
+ fsl,spi-cs-sck-delay = <100>;
+ fsl,spi-sck-cs-delay = <50>;
+ };
+};
+
+
diff --git a/Documentation/devicetree/bindings/spi/spi-gpio.txt b/Documentation/devicetree/bindings/spi/spi-gpio.txt
new file mode 100644
index 000000000..a95603bcf
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-gpio.txt
@@ -0,0 +1,31 @@
+SPI-GPIO devicetree bindings
+
+Required properties:
+
+ - compatible: should be set to "spi-gpio"
+ - #address-cells: should be set to <0x1>
+ - ranges
+ - gpio-sck: GPIO spec for the SCK line to use
+ - gpio-miso: GPIO spec for the MISO line to use
+ - gpio-mosi: GPIO spec for the MOSI line to use
+ - cs-gpios: GPIOs to use for chipselect lines.
+ Not needed if num-chipselects = <0>.
+ - num-chipselects: Number of chipselect lines. Should be <0> if a single device
+ with no chip select is connected.
+
+Example:
+
+ spi {
+ compatible = "spi-gpio";
+ #address-cells = <0x1>;
+ ranges;
+
+ gpio-sck = <&gpio 95 0>;
+ gpio-miso = <&gpio 98 0>;
+ gpio-mosi = <&gpio 97 0>;
+ cs-gpios = <&gpio 125 0>;
+ num-chipselects = <1>;
+
+ /* clients */
+ };
+
diff --git a/Documentation/devicetree/bindings/spi/spi-img-spfi.txt b/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
new file mode 100644
index 000000000..e02fbf18c
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-img-spfi.txt
@@ -0,0 +1,38 @@
+IMG Synchronous Peripheral Flash Interface (SPFI) controller
+
+Required properties:
+- compatible: Must be "img,spfi".
+- reg: Must contain the base address and length of the SPFI registers.
+- interrupts: Must contain the SPFI interrupt.
+- clocks: Must contain an entry for each entry in clock-names.
+ See ../clock/clock-bindings.txt for details.
+- clock-names: Must include the following entries:
+ - spfi: SPI operating clock
+ - sys: SPI system interface clock
+- dmas: Must contain an entry for each entry in dma-names.
+ See ../dma/dma.txt for details.
+- dma-names: Must include the following entries:
+ - rx
+ - tx
+- cs-gpios: Must specify the GPIOs used for chipselect lines.
+- #address-cells: Must be 1.
+- #size-cells: Must be 0.
+
+Optional properties:
+- img,supports-quad-mode: Should be set if the interface supports quad mode
+ SPI transfers.
+
+Example:
+
+spi@18100f00 {
+ compatible = "img,spfi";
+ reg = <0x18100f00 0x100>;
+ interrupts = <GIC_SHARED 22 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&spi_clk>, <&system_clk>;
+ clock-names = "spfi", "sys";
+ dmas = <&mdc 9 0xffffffff 0>, <&mdc 10 0xffffffff 0>;
+ dma-names = "rx", "tx";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-meson.txt b/Documentation/devicetree/bindings/spi/spi-meson.txt
new file mode 100644
index 000000000..bb52a86f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-meson.txt
@@ -0,0 +1,22 @@
+Amlogic Meson SPI controllers
+
+* SPIFC (SPI Flash Controller)
+
+The Meson SPIFC is a controller optimized for communication with SPI
+NOR memories, without DMA support and a 64-byte unified transmit /
+receive buffer.
+
+Required properties:
+ - compatible: should be "amlogic,meson6-spifc"
+ - reg: physical base address and length of the controller registers
+ - clocks: phandle of the input clock for the baud rate generator
+ - #address-cells: should be 1
+ - #size-cells: should be 0
+
+ spi@c1108c80 {
+ compatible = "amlogic,meson6-spifc";
+ reg = <0xc1108c80 0x80>;
+ clocks = <&clk81>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-octeon.txt b/Documentation/devicetree/bindings/spi/spi-octeon.txt
new file mode 100644
index 000000000..431add192
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-octeon.txt
@@ -0,0 +1,33 @@
+Cavium, Inc. OCTEON SOC SPI master controller.
+
+Required properties:
+- compatible : "cavium,octeon-3010-spi"
+- reg : The register base for the controller.
+- interrupts : One interrupt, used by the controller.
+- #address-cells : <1>, as required by generic SPI binding.
+- #size-cells : <0>, also as required by generic SPI binding.
+
+Child nodes as per the generic SPI binding.
+
+Example:
+
+ spi@1070000001000 {
+ compatible = "cavium,octeon-3010-spi";
+ reg = <0x10700 0x00001000 0x0 0x100>;
+ interrupts = <0 58>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ eeprom@0 {
+ compatible = "st,m95256", "atmel,at25";
+ reg = <0>;
+ spi-max-frequency = <5000000>;
+ spi-cpha;
+ spi-cpol;
+
+ pagesize = <64>;
+ size = <32768>;
+ address-width = <16>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/spi/spi-orion.txt b/Documentation/devicetree/bindings/spi/spi-orion.txt
new file mode 100644
index 000000000..50c3a3de6
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-orion.txt
@@ -0,0 +1,19 @@
+Marvell Orion SPI device
+
+Required properties:
+- compatible : should be "marvell,orion-spi" or "marvell,armada-370-spi".
+- reg : offset and length of the register set for the device
+- cell-index : Which of multiple SPI controllers is this.
+Optional properties:
+- interrupts : Is currently not used.
+
+Example:
+ spi@10600 {
+ compatible = "marvell,orion-spi";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cell-index = <0>;
+ reg = <0x10600 0x28>;
+ interrupts = <23>;
+ status = "disabled";
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-rockchip.txt b/Documentation/devicetree/bindings/spi/spi-rockchip.txt
new file mode 100644
index 000000000..0c491bda4
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-rockchip.txt
@@ -0,0 +1,45 @@
+* Rockchip SPI Controller
+
+The Rockchip SPI controller is used to interface with various devices such as flash
+and display controllers using the SPI communication interface.
+
+Required Properties:
+
+- compatible: should be one of the following.
+ "rockchip,rk3066-spi" for rk3066.
+ "rockchip,rk3188-spi", "rockchip,rk3066-spi" for rk3188.
+ "rockchip,rk3288-spi", "rockchip,rk3066-spi" for rk3288.
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The interrupt number to the cpu. The interrupt specifier format
+ depends on the interrupt controller.
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Shall be "spiclk" for the transfer-clock, and "apb_pclk" for
+ the peripheral clock.
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+
+Optional Properties:
+
+- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
+ Documentation/devicetree/bindings/dma/dma.txt
+- dma-names: DMA request names should include "tx" and "rx" if present.
+- rx-sample-delay-ns: nanoseconds to delay after the SCLK edge before sampling
+ Rx data (may need to be fine tuned for high capacitance lines).
+ No delay (0) by default.
+
+
+Example:
+
+ spi0: spi@ff110000 {
+ compatible = "rockchip,rk3066-spi";
+ reg = <0xff110000 0x1000>;
+ dmas = <&pdma1 11>, <&pdma1 12>;
+ dma-names = "tx", "rx";
+ rx-sample-delay-ns = <10>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_SPI0>, <&cru PCLK_SPI0>;
+ clock-names = "spiclk", "apb_pclk";
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-rspi.txt b/Documentation/devicetree/bindings/spi/spi-rspi.txt
new file mode 100644
index 000000000..8f4169f63
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-rspi.txt
@@ -0,0 +1,69 @@
+Device tree configuration for Renesas RSPI/QSPI driver
+
+Required properties:
+- compatible : For Renesas Serial Peripheral Interface on legacy SH:
+ "renesas,rspi-<soctype>", "renesas,rspi" as fallback.
+ For Renesas Serial Peripheral Interface on RZ/A1H:
+ "renesas,rspi-<soctype>", "renesas,rspi-rz" as fallback.
+ For Quad Serial Peripheral Interface on R-Car Gen2:
+ "renesas,qspi-<soctype>", "renesas,qspi" as fallback.
+ Examples with soctypes are:
+ - "renesas,rspi-sh7757" (SH)
+ - "renesas,rspi-r7s72100" (RZ/A1H)
+ - "renesas,qspi-r8a7790" (R-Car H2)
+ - "renesas,qspi-r8a7791" (R-Car M2-W)
+ - "renesas,qspi-r8a7792" (R-Car V2H)
+ - "renesas,qspi-r8a7793" (R-Car M2-N)
+ - "renesas,qspi-r8a7794" (R-Car E2)
+- reg : Address start and address range size of the device
+- interrupts : A list of interrupt-specifiers, one for each entry in
+ interrupt-names.
+ If interrupt-names is not present, an interrupt specifier
+ for a single muxed interrupt.
+- interrupt-names : A list of interrupt names. Should contain (if present):
+ - "error" for SPEI,
+ - "rx" for SPRI,
+ - "tx" to SPTI,
+ - "mux" for a single muxed interrupt.
+- interrupt-parent : The phandle for the interrupt controller that
+ services interrupts for this device.
+- num-cs : Number of chip selects. Some RSPI cores have more than 1.
+- #address-cells : Must be <1>
+- #size-cells : Must be <0>
+
+Optional properties:
+- clocks : Must contain a reference to the functional clock.
+- dmas : Must contain a list of two references to DMA specifiers,
+ one for transmission, and one for reception.
+- dma-names : Must contain a list of two DMA names, "tx" and "rx".
+
+Pinctrl properties might be needed, too. See
+Documentation/devicetree/bindings/pinctrl/renesas,*.
+
+Examples:
+
+ spi0: spi@e800c800 {
+ compatible = "renesas,rspi-r7s72100", "renesas,rspi-rz";
+ reg = <0xe800c800 0x24>;
+ interrupts = <0 238 IRQ_TYPE_LEVEL_HIGH>,
+ <0 239 IRQ_TYPE_LEVEL_HIGH>,
+ <0 240 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "error", "rx", "tx";
+ interrupt-parent = <&gic>;
+ num-cs = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ spi: spi@e6b10000 {
+ compatible = "renesas,qspi-r8a7791", "renesas,qspi";
+ reg = <0 0xe6b10000 0 0x2c>;
+ interrupt-parent = <&gic>;
+ interrupts = <0 184 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp9_clks R8A7791_CLK_QSPI_MOD>;
+ num-cs = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ dmas = <&dmac0 0x17>, <&dmac0 0x18>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-samsung.txt b/Documentation/devicetree/bindings/spi/spi-samsung.txt
new file mode 100644
index 000000000..6dbdeb3c3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-samsung.txt
@@ -0,0 +1,109 @@
+* Samsung SPI Controller
+
+The Samsung SPI controller is used to interface with various devices such as flash
+and display controllers using the SPI communication interface.
+
+Required SoC Specific Properties:
+
+- compatible: should be one of the following.
+ - samsung,s3c2443-spi: for s3c2443, s3c2416 and s3c2450 platforms
+ - samsung,s3c6410-spi: for s3c6410 platforms
+ - samsung,s5pv210-spi: for s5pv210 and s5pc110 platforms
+ - samsung,exynos7-spi: for exynos7 platforms
+
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+- interrupts: The interrupt number to the cpu. The interrupt specifier format
+ depends on the interrupt controller.
+
+- dmas : Two or more DMA channel specifiers following the convention outlined
+ in bindings/dma/dma.txt
+
+- dma-names: Names for the dma channels. There must be at least one channel
+ named "tx" for transmit and named "rx" for receive.
+
+Required Board Specific Properties:
+
+- #address-cells: should be 1.
+- #size-cells: should be 0.
+
+Optional Board Specific Properties:
+
+- samsung,spi-src-clk: If the spi controller includes a internal clock mux to
+ select the clock source for the spi bus clock, this property can be used to
+ indicate the clock to be used for driving the spi bus clock. If not specified,
+ the clock number 0 is used as default.
+
+- num-cs: Specifies the number of chip select lines supported. If
+ not specified, the default number of chip select lines is set to 1.
+
+- cs-gpios: should specify GPIOs used for chipselects (see spi-bus.txt)
+
+SPI Controller specific data in SPI slave nodes:
+
+- The spi slave nodes should provide the following information which is required
+ by the spi controller.
+
+ - samsung,spi-feedback-delay: The sampling phase shift to be applied on the
+ miso line (to account for any lag in the miso line). The following are the
+ valid values.
+
+ - 0: No phase shift.
+ - 1: 90 degree phase shift sampling.
+ - 2: 180 degree phase shift sampling.
+ - 3: 270 degree phase shift sampling.
+
+Aliases:
+
+- All the SPI controller nodes should be represented in the aliases node using
+ the following format 'spi{n}' where n is a unique number for the alias.
+
+
+Example:
+
+- SoC Specific Portion:
+
+ spi_0: spi@12d20000 {
+ compatible = "samsung,exynos4210-spi";
+ reg = <0x12d20000 0x100>;
+ interrupts = <0 66 0>;
+ dmas = <&pdma0 5
+ &pdma0 4>;
+ dma-names = "tx", "rx";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+- Board Specific Portion:
+
+ spi_0: spi@12d20000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&spi0_bus>;
+ cs-gpios = <&gpa2 5 0>;
+
+ w25q80bw@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "w25x80";
+ reg = <0>;
+ spi-max-frequency = <10000>;
+
+ controller-data {
+ samsung,spi-feedback-delay = <0>;
+ };
+
+ partition@0 {
+ label = "U-Boot";
+ reg = <0x0 0x40000>;
+ read-only;
+ };
+
+ partition@40000 {
+ label = "Kernel";
+ reg = <0x40000 0xc0000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-sc18is602.txt b/Documentation/devicetree/bindings/spi/spi-sc18is602.txt
new file mode 100644
index 000000000..02f903327
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sc18is602.txt
@@ -0,0 +1,23 @@
+NXP SC18IS602/SCIS603
+
+Required properties:
+ - compatible : Should be one of
+ "nxp,sc18is602"
+ "nxp,sc18is602b"
+ "nxp,sc18is603"
+ - reg: I2C bus address
+
+Optional properties:
+ - clock-frequency : external oscillator clock frequency. If not
+ specified, the SC18IS602 default frequency (7372000) will be used.
+
+The clock-frequency property is relevant and needed only if the chip has an
+external oscillator (SC18IS603).
+
+Example:
+
+ sc18is603@28 {
+ compatible = "nxp,sc18is603";
+ reg = <0x28>;
+ clock-frequency = <14744000>;
+ }
diff --git a/Documentation/devicetree/bindings/spi/spi-sirf.txt b/Documentation/devicetree/bindings/spi/spi-sirf.txt
new file mode 100644
index 000000000..4c7adb8f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sirf.txt
@@ -0,0 +1,41 @@
+* CSR SiRFprimaII Serial Peripheral Interface
+
+Required properties:
+- compatible : Should be "sirf,prima2-spi"
+- reg : Offset and length of the register set for the device
+- interrupts : Should contain SPI interrupt
+- resets: phandle to the reset controller asserting this device in
+ reset
+ See ../reset/reset.txt for details.
+- dmas : Must contain an entry for each entry in clock-names.
+ See ../dma/dma.txt for details.
+- dma-names : Must include the following entries:
+ - rx
+ - tx
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+
+- #address-cells: Number of cells required to define a chip select
+ address on the SPI bus. Should be set to 1.
+- #size-cells: Should be zero.
+
+Optional properties:
+- spi-max-frequency: Specifies maximum SPI clock frequency,
+ Units - Hz. Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+- cs-gpios: should specify GPIOs used for chipselects.
+
+Example:
+
+spi0: spi@b00d0000 {
+ compatible = "sirf,prima2-spi";
+ reg = <0xb00d0000 0x10000>;
+ interrupts = <15>;
+ dmas = <&dmac1 9>,
+ <&dmac1 4>;
+ dma-names = "rx", "tx";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&clks 19>;
+ resets = <&rstc 26>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-st-ssc.txt b/Documentation/devicetree/bindings/spi/spi-st-ssc.txt
new file mode 100644
index 000000000..fe54959ec
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-st-ssc.txt
@@ -0,0 +1,40 @@
+STMicroelectronics SSC (SPI) Controller
+---------------------------------------
+
+Required properties:
+- compatible : "st,comms-ssc4-spi"
+- reg : Offset and length of the device's register set
+- interrupts : The interrupt specifier
+- clock-names : Must contain "ssc"
+- clocks : Must contain an entry for each name in clock-names
+ See ../clk/*
+- pinctrl-names : Uses "default", can use "sleep" if provided
+ See ../pinctrl/pinctrl-binding.txt
+
+Optional properties:
+- cs-gpios : List of GPIO chip selects
+ See ../spi/spi-bus.txt
+
+Child nodes represent devices on the SPI bus
+ See ../spi/spi-bus.txt
+
+Example:
+ spi@9840000 {
+ compatible = "st,comms-ssc4-spi";
+ reg = <0x9840000 0x110>;
+ interrupts = <GIC_SPI 112 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_s_c0_flexgen CLK_EXT2F_A9>;
+ clock-names = "ssc";
+ pinctrl-0 = <&pinctrl_spi0_default>;
+ pinctrl-names = "default";
+ cs-gpios = <&pio17 5 0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ st95hf@0{
+ compatible = "st,st95hf";
+ reg = <0>;
+ spi-max-frequency = <1000000>;
+ interrupts = <2 IRQ_TYPE_EDGE_FALLING>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/spi/spi-sun4i.txt b/Documentation/devicetree/bindings/spi/spi-sun4i.txt
new file mode 100644
index 000000000..de827f5a3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sun4i.txt
@@ -0,0 +1,24 @@
+Allwinner A10 SPI controller
+
+Required properties:
+- compatible: Should be "allwinner,sun4-a10-spi".
+- reg: Should contain register location and length.
+- interrupts: Should contain interrupt.
+- clocks: phandle to the clocks feeding the SPI controller. Two are
+ needed:
+ - "ahb": the gated AHB parent clock
+ - "mod": the parent module clock
+- clock-names: Must contain the clock names described just above
+
+Example:
+
+spi1: spi@01c06000 {
+ compatible = "allwinner,sun4i-a10-spi";
+ reg = <0x01c06000 0x1000>;
+ interrupts = <11>;
+ clocks = <&ahb_gates 21>, <&spi1_clk>;
+ clock-names = "ahb", "mod";
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-sun6i.txt b/Documentation/devicetree/bindings/spi/spi-sun6i.txt
new file mode 100644
index 000000000..21de73db6
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-sun6i.txt
@@ -0,0 +1,24 @@
+Allwinner A31 SPI controller
+
+Required properties:
+- compatible: Should be "allwinner,sun6i-a31-spi".
+- reg: Should contain register location and length.
+- interrupts: Should contain interrupt.
+- clocks: phandle to the clocks feeding the SPI controller. Two are
+ needed:
+ - "ahb": the gated AHB parent clock
+ - "mod": the parent module clock
+- clock-names: Must contain the clock names described just above
+- resets: phandle to the reset controller asserting this device in
+ reset
+
+Example:
+
+spi1: spi@01c69000 {
+ compatible = "allwinner,sun6i-a31-spi";
+ reg = <0x01c69000 0x1000>;
+ interrupts = <0 66 4>;
+ clocks = <&ahb1_gates 21>, <&spi1_clk>;
+ clock-names = "ahb", "mod";
+ resets = <&ahb1_rst 21>;
+};
diff --git a/Documentation/devicetree/bindings/spi/spi-xtensa-xtfpga.txt b/Documentation/devicetree/bindings/spi/spi-xtensa-xtfpga.txt
new file mode 100644
index 000000000..b6ebe2bc7
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi-xtensa-xtfpga.txt
@@ -0,0 +1,9 @@
+Cadence Xtensa XTFPGA platform SPI controller.
+
+This simple SPI master controller is built into xtfpga bitstreams and is used
+to control daughterboard audio codec.
+
+Required properties:
+- compatible: should be "cdns,xtfpga-spi".
+- reg: physical base address of the controller and length of memory mapped
+ region.
diff --git a/Documentation/devicetree/bindings/spi/spi_altera.txt b/Documentation/devicetree/bindings/spi/spi_altera.txt
new file mode 100644
index 000000000..31319dcf3
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_altera.txt
@@ -0,0 +1,5 @@
+Altera SPI
+
+Required properties:
+- compatible : should be "ALTR,spi-1.0". <DEPRECATED>
+- compatible : should be "altr,spi-1.0".
diff --git a/Documentation/devicetree/bindings/spi/spi_atmel.txt b/Documentation/devicetree/bindings/spi/spi_atmel.txt
new file mode 100644
index 000000000..4f8184d06
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_atmel.txt
@@ -0,0 +1,31 @@
+Atmel SPI device
+
+Required properties:
+- compatible : should be "atmel,at91rm9200-spi".
+- reg: Address and length of the register set for the device
+- interrupts: Should contain spi interrupt
+- cs-gpios: chipselects
+- clock-names: tuple listing input clock names.
+ Required elements: "spi_clk"
+- clocks: phandles to input clocks.
+
+Example:
+
+spi1: spi@fffcc000 {
+ compatible = "atmel,at91rm9200-spi";
+ reg = <0xfffcc000 0x4000>;
+ interrupts = <13 4 5>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ clocks = <&spi1_clk>;
+ clock-names = "spi_clk";
+ cs-gpios = <&pioB 3 0>;
+ status = "okay";
+
+ mmc-slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ gpios = <&pioC 4 0>; /* CD */
+ spi-max-frequency = <25000000>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/spi/spi_oc_tiny.txt b/Documentation/devicetree/bindings/spi/spi_oc_tiny.txt
new file mode 100644
index 000000000..d95c0b367
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_oc_tiny.txt
@@ -0,0 +1,12 @@
+OpenCores tiny SPI
+
+Required properties:
+- compatible : should be "opencores,tiny-spi-rtlsvn2".
+- gpios : should specify GPIOs used for chipselect.
+Optional properties:
+- clock-frequency : input clock frequency to the core.
+- baud-width: width, in bits, of the programmable divider used to scale
+ the input clock to SCLK.
+
+The clock-frequency and baud-width properties are needed only if the divider
+is programmable. They are not needed if the divider is fixed.
diff --git a/Documentation/devicetree/bindings/spi/spi_pl022.txt b/Documentation/devicetree/bindings/spi/spi_pl022.txt
new file mode 100644
index 000000000..4d1673ca8
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/spi_pl022.txt
@@ -0,0 +1,70 @@
+ARM PL022 SPI controller
+
+Required properties:
+- compatible : "arm,pl022", "arm,primecell"
+- reg : Offset and length of the register set for the device
+- interrupts : Should contain SPI controller interrupt
+- num-cs : total number of chipselects
+
+Optional properties:
+- cs-gpios : should specify GPIOs used for chipselects.
+ The gpios will be referred to as reg = <index> in the SPI child nodes.
+ If unspecified, a single SPI device without a chip select can be used.
+- pl022,autosuspend-delay : delay in ms following transfer completion before
+ the runtime power management system suspends the
+ device. A setting of 0 indicates no delay and the
+ device will be suspended immediately
+- pl022,rt : indicates the controller should run the message pump with realtime
+ priority to minimise the transfer latency on the bus (boolean)
+- dmas : Two or more DMA channel specifiers following the convention outlined
+ in bindings/dma/dma.txt
+- dma-names: Names for the dma channels, if present. There must be at
+ least one channel named "tx" for transmit and named "rx" for
+ receive.
+
+
+SPI slave nodes must be children of the SPI master node and can
+contain the following properties.
+
+- pl022,interface : interface type:
+ 0: SPI
+ 1: Texas Instruments Synchronous Serial Frame Format
+ 2: Microwire (Half Duplex)
+- pl022,com-mode : polling, interrupt or dma
+- pl022,rx-level-trig : Rx FIFO watermark level
+- pl022,tx-level-trig : Tx FIFO watermark level
+- pl022,ctrl-len : Microwire interface: Control length
+- pl022,wait-state : Microwire interface: Wait state
+- pl022,duplex : Microwire interface: Full/Half duplex
+
+
+Example:
+
+ spi@e0100000 {
+ compatible = "arm,pl022", "arm,primecell";
+ reg = <0xe0100000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <0 31 0x4>;
+ dmas = <&dma-controller 23 1>,
+ <&dma-controller 24 0>;
+ dma-names = "rx", "tx";
+
+ m25p80@1 {
+ compatible = "st,m25p80";
+ reg = <1>;
+ spi-max-frequency = <12000000>;
+ spi-cpol;
+ spi-cpha;
+ pl022,hierarchy = <0>;
+ pl022,interface = <0>;
+ pl022,slave-tx-disable;
+ pl022,com-mode = <0x2>;
+ pl022,rx-level-trig = <0>;
+ pl022,tx-level-trig = <0>;
+ pl022,ctrl-len = <0x11>;
+ pl022,wait-state = <0>;
+ pl022,duplex = <0>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/spi/ti_qspi.txt b/Documentation/devicetree/bindings/spi/ti_qspi.txt
new file mode 100644
index 000000000..601a36053
--- /dev/null
+++ b/Documentation/devicetree/bindings/spi/ti_qspi.txt
@@ -0,0 +1,28 @@
+TI QSPI controller.
+
+Required properties:
+- compatible : should be "ti,dra7xxx-qspi" or "ti,am4372-qspi".
+- reg: Should contain QSPI registers location and length.
+- reg-names: Should contain the resource reg names.
+ - qspi_base: Qspi configuration register Address space
+ - qspi_mmap: Memory mapped Address space
+ - (optional) qspi_ctrlmod: Control module Address space
+- interrupts: should contain the qspi interrupt number.
+- #address-cells, #size-cells : Must be present if the device has sub-nodes
+- ti,hwmods: Name of the hwmod associated to the QSPI
+
+Recommended properties:
+- spi-max-frequency: Definition as per
+ Documentation/devicetree/bindings/spi/spi-bus.txt
+
+Example:
+
+qspi: qspi@4b300000 {
+ compatible = "ti,dra7xxx-qspi";
+ reg = <0x47900000 0x100>, <0x30000000 0x3ffffff>;
+ reg-names = "qspi_base", "qspi_mmap";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ spi-max-frequency = <25000000>;
+ ti,hwmods = "qspi";
+};
diff --git a/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.txt b/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.txt
new file mode 100644
index 000000000..e16b9b5af
--- /dev/null
+++ b/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.txt
@@ -0,0 +1,65 @@
+Qualcomm SPMI Controller (PMIC Arbiter)
+
+The SPMI PMIC Arbiter is found on Snapdragon chipsets. It is an SPMI
+controller with wrapping arbitration logic to allow for multiple on-chip
+devices to control a single SPMI master.
+
+The PMIC Arbiter can also act as an interrupt controller, providing interrupts
+to slave devices.
+
+See spmi.txt for the generic SPMI controller binding requirements for child
+nodes.
+
+See Documentation/devicetree/bindings/interrupt-controller/interrupts.txt for
+generic interrupt controller binding documentation.
+
+Required properties:
+- compatible : should be "qcom,spmi-pmic-arb".
+- reg-names : must contain:
+ "core" - core registers
+ "intr" - interrupt controller registers
+ "cnfg" - configuration registers
+ Registers used only for V2 PMIC Arbiter:
+ "chnls" - tx-channel per virtual slave registers.
+ "obsrvr" - rx-channel (called observer) per virtual slave registers.
+
+- reg : address + size pairs describing the PMIC arb register sets; order must
+ correspond with the order of entries in reg-names
+- #address-cells : must be set to 2
+- #size-cells : must be set to 0
+- qcom,ee : indicates the active Execution Environment identifier (0-5)
+- qcom,channel : which of the PMIC Arb provided channels to use for accesses (0-5)
+- interrupts : interrupt list for the PMIC Arb controller, must contain a
+ single interrupt entry for the peripheral interrupt
+- interrupt-names : corresponding interrupt names for the interrupts
+ listed in the 'interrupts' property, must contain:
+ "periph_irq" - summary interrupt for PMIC peripherals
+- interrupt-controller : boolean indicator that the PMIC arbiter is an interrupt controller
+- #interrupt-cells : must be set to 4. Interrupts are specified as a 4-tuple:
+ cell 1: slave ID for the requested interrupt (0-15)
+ cell 2: peripheral ID for requested interrupt (0-255)
+ cell 3: the requested peripheral interrupt (0-7)
+ cell 4: interrupt flags indicating level-sense information, as defined in
+ dt-bindings/interrupt-controller/irq.h
+
+Example:
+
+ spmi {
+ compatible = "qcom,spmi-pmic-arb";
+ reg-names = "core", "intr", "cnfg";
+ reg = <0xfc4cf000 0x1000>,
+ <0xfc4cb000 0x1000>,
+ <0xfc4ca000 0x1000>;
+
+ interrupt-names = "periph_irq";
+ interrupts = <0 190 0>;
+
+ qcom,ee = <0>;
+ qcom,channel = <0>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ interrupt-controller;
+ #interrupt-cells = <4>;
+ };
diff --git a/Documentation/devicetree/bindings/spmi/spmi.txt b/Documentation/devicetree/bindings/spmi/spmi.txt
new file mode 100644
index 000000000..4bb10d161
--- /dev/null
+++ b/Documentation/devicetree/bindings/spmi/spmi.txt
@@ -0,0 +1,41 @@
+System Power Management Interface (SPMI) Controller
+
+This document defines a generic set of bindings for use by SPMI controllers. A
+controller is modelled in device tree as a node with zero or more child nodes,
+each representing a unique slave on the bus.
+
+Required properties:
+- #address-cells : must be set to 2
+- #size-cells : must be set to 0
+
+Child nodes:
+
+An SPMI controller node can contain zero or more child nodes representing slave
+devices on the bus. Child 'reg' properties are specified as an address, type
+pair. The address must be in the range 0-15 (4 bits). The type must be one of
+SPMI_USID (0) or SPMI_GSID (1) for Unique Slave ID or Group Slave ID respectively.
+These are the identifiers "statically assigned by the system integrator", as
+per the SPMI spec.
+
+Each child node must have one and only one 'reg' entry of type SPMI_USID.
+
+#include <dt-bindings/spmi/spmi.h>
+
+ spmi@.. {
+ compatible = "...";
+ reg = <...>;
+
+ #address-cells = <2>;
+ #size-cells = <0>;
+
+ child@0 {
+ compatible = "...";
+ reg = <0 SPMI_USID>;
+ };
+
+ child@7 {
+ compatible = "...";
+ reg = <7 SPMI_USID
+ 3 SPMI_GSID>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/staging/iio/adc/lpc32xx-adc.txt b/Documentation/devicetree/bindings/staging/iio/adc/lpc32xx-adc.txt
new file mode 100644
index 000000000..b3629d3a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/staging/iio/adc/lpc32xx-adc.txt
@@ -0,0 +1,16 @@
+* NXP LPC32xx SoC ADC controller
+
+Required properties:
+- compatible: must be "nxp,lpc3220-adc"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The ADC interrupt
+
+Example:
+
+ adc@40048000 {
+ compatible = "nxp,lpc3220-adc";
+ reg = <0x40048000 0x1000>;
+ interrupt-parent = <&mic>;
+ interrupts = <39 0>;
+ };
diff --git a/Documentation/devicetree/bindings/staging/iio/adc/mxs-lradc.txt b/Documentation/devicetree/bindings/staging/iio/adc/mxs-lradc.txt
new file mode 100644
index 000000000..307537787
--- /dev/null
+++ b/Documentation/devicetree/bindings/staging/iio/adc/mxs-lradc.txt
@@ -0,0 +1,47 @@
+* Freescale i.MX28 LRADC device driver
+
+Required properties:
+- compatible: Should be "fsl,imx23-lradc" for i.MX23 SoC and "fsl,imx28-lradc"
+ for i.MX28 SoC
+- reg: Address and length of the register set for the device
+- interrupts: Should contain the LRADC interrupts
+
+Optional properties:
+- fsl,lradc-touchscreen-wires: Number of wires used to connect the touchscreen
+ to LRADC. Valid value is either 4 or 5. If this
+ property is not present, then the touchscreen is
+ disabled. 5 wires is valid for i.MX28 SoC only.
+- fsl,ave-ctrl: number of samples per direction to calculate an average value.
+ Allowed value is 1 ... 32, default is 4
+- fsl,ave-delay: delay between consecutive samples. Allowed value is
+ 2 ... 2048. It is used if 'fsl,ave-ctrl' > 1, counts at
+ 2 kHz and its default is 2 (= 1 ms)
+- fsl,settling: delay between plate switch to next sample. Allowed value is
+ 1 ... 2047. It counts at 2 kHz and its default is
+ 10 (= 5 ms)
+
+Example for i.MX23 SoC:
+
+ lradc@80050000 {
+ compatible = "fsl,imx23-lradc";
+ reg = <0x80050000 0x2000>;
+ interrupts = <36 37 38 39 40 41 42 43 44>;
+ status = "okay";
+ fsl,lradc-touchscreen-wires = <4>;
+ fsl,ave-ctrl = <4>;
+ fsl,ave-delay = <2>;
+ fsl,settling = <10>;
+ };
+
+Example for i.MX28 SoC:
+
+ lradc@80050000 {
+ compatible = "fsl,imx28-lradc";
+ reg = <0x80050000 0x2000>;
+ interrupts = <10 14 15 16 17 18 19 20 21 22 23 24 25>;
+ status = "okay";
+ fsl,lradc-touchscreen-wires = <5>;
+ fsl,ave-ctrl = <4>;
+ fsl,ave-delay = <2>;
+ fsl,settling = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/staging/iio/adc/spear-adc.txt b/Documentation/devicetree/bindings/staging/iio/adc/spear-adc.txt
new file mode 100644
index 000000000..02ea23a63
--- /dev/null
+++ b/Documentation/devicetree/bindings/staging/iio/adc/spear-adc.txt
@@ -0,0 +1,26 @@
+* ST SPEAr ADC device driver
+
+Required properties:
+- compatible: Should be "st,spear600-adc"
+- reg: Address and length of the register set for the device
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the ADC interrupt
+- sampling-frequency: Default sampling frequency
+
+Optional properties:
+- vref-external: External voltage reference in milli-volts. If omitted
+ the internal voltage reference will be used.
+- average-samples: Number of samples to generate an average value. If
+ omitted, single data conversion will be used.
+
+Examples:
+
+ adc: adc@d8200000 {
+ compatible = "st,spear600-adc";
+ reg = <0xd8200000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <6>;
+ sampling-frequency = <5000000>;
+ vref-external = <2500>; /* 2.5V VRef */
+ };
diff --git a/Documentation/devicetree/bindings/submitting-patches.txt b/Documentation/devicetree/bindings/submitting-patches.txt
new file mode 100644
index 000000000..7d44eae7a
--- /dev/null
+++ b/Documentation/devicetree/bindings/submitting-patches.txt
@@ -0,0 +1,67 @@
+
+ Submitting devicetree (DT) binding patches
+
+I. For patch submitters
+
+ 0) Normal patch submission rules from Documentation/SubmittingPatches
+ applies.
+
+ 1) The Documentation/ portion of the patch should be a separate patch.
+
+ 2) Submit the entire series to the devicetree mailinglist at
+
+ devicetree@vger.kernel.org
+
+ and Cc: the DT maintainers. Use scripts/get_maintainer.pl to identify
+ all of the DT maintainers.
+
+ 3) The Documentation/ portion of the patch should come in the series before
+ the code implementing the binding.
+
+ 4) Any compatible strings used in a chip or board DTS file must be
+ previously documented in the corresponding DT binding text file
+ in Documentation/devicetree/bindings. This rule applies even if
+ the Linux device driver does not yet match on the compatible
+ string. [ checkpatch will emit warnings if this step is not
+ followed as of commit bff5da4335256513497cc8c79f9a9d1665e09864
+ ("checkpatch: add DT compatible string documentation checks"). ]
+
+ 5) The wildcard "<chip>" may be used in compatible strings, as in
+ the following example:
+
+ - compatible: Must contain '"nvidia,<chip>-pcie",
+ "nvidia,tegra20-pcie"' where <chip> is tegra30, tegra132, ...
+
+ As in the above example, the known values of "<chip>" should be
+ documented if it is used.
+
+ 6) If a documented compatible string is not yet matched by the
+ driver, the documentation should also include a compatible
+ string that is matched by the driver (as in the "nvidia,tegra20-pcie"
+ example above).
+
+
+II. For kernel maintainers
+
+ 1) If you aren't comfortable reviewing a given binding, reply to it and ask
+ the devicetree maintainers for guidance. This will help them prioritize
+ which ones to review and which ones are ok to let go.
+
+ 2) For driver (not subsystem) bindings: If you are comfortable with the
+ binding, and it hasn't received an Acked-by from the devicetree
+ maintainers after a few weeks, go ahead and take it.
+
+ Subsystem bindings (anything affecting more than a single device)
+ then getting a devicetree maintainer to review it is required.
+
+ 3) For a series going though multiple trees, the binding patch should be
+ kept with the driver using the binding.
+
+III. Notes
+
+ 0) Please see ...bindings/ABI.txt for details regarding devicetree ABI.
+
+ 1) This document is intended as a general familiarization with the process as
+ decided at the 2013 Kernel Summit. When in doubt, the current word of the
+ devicetree maintainers overrules this document. In that situation, a patch
+ updating this document would be appreciated.
diff --git a/Documentation/devicetree/bindings/thermal/armada-thermal.txt b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
new file mode 100644
index 000000000..4698e0edc
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/armada-thermal.txt
@@ -0,0 +1,24 @@
+* Marvell Armada 370/375/380/XP thermal management
+
+Required properties:
+
+- compatible: Should be set to one of the following:
+ marvell,armada370-thermal
+ marvell,armada375-thermal
+ marvell,armada380-thermal
+ marvell,armadaxp-thermal
+
+- reg: Device's register space.
+ Two entries are expected, see the examples below.
+ The first one is required for the sensor register;
+ the second one is required for the control register
+ to be used for sensor initialization (a.k.a. calibration).
+
+Example:
+
+ thermal@d0018300 {
+ compatible = "marvell,armada370-thermal";
+ reg = <0xd0018300 0x4
+ 0xd0018304 0x4>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/thermal/db8500-thermal.txt b/Documentation/devicetree/bindings/thermal/db8500-thermal.txt
new file mode 100644
index 000000000..2e1c06fad
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/db8500-thermal.txt
@@ -0,0 +1,44 @@
+* ST-Ericsson DB8500 Thermal
+
+** Thermal node properties:
+
+- compatible : "stericsson,db8500-thermal";
+- reg : address range of the thermal sensor registers;
+- interrupts : interrupts generated from PRCMU;
+- interrupt-names : "IRQ_HOTMON_LOW" and "IRQ_HOTMON_HIGH";
+- num-trips : number of total trip points, this is required, set it 0 if none,
+ if greater than 0, the following properties must be defined;
+- tripN-temp : temperature of trip point N, should be in ascending order;
+- tripN-type : type of trip point N, should be one of "active" "passive" "hot"
+ "critical";
+- tripN-cdev-num : number of the cooling devices which can be bound to trip
+ point N, this is required if trip point N is defined, set it 0 if none,
+ otherwise the following cooling device names must be defined;
+- tripN-cdev-nameM : name of the No. M cooling device of trip point N;
+
+Usually the num-trips and tripN-*** are separated in board related dts files.
+
+Example:
+thermal@801573c0 {
+ compatible = "stericsson,db8500-thermal";
+ reg = <0x801573c0 0x40>;
+ interrupts = <21 0x4>, <22 0x4>;
+ interrupt-names = "IRQ_HOTMON_LOW", "IRQ_HOTMON_HIGH";
+
+ num-trips = <3>;
+
+ trip0-temp = <75000>;
+ trip0-type = "active";
+ trip0-cdev-num = <1>;
+ trip0-cdev-name0 = "thermal-cpufreq-0";
+
+ trip1-temp = <80000>;
+ trip1-type = "active";
+ trip1-cdev-num = <2>;
+ trip1-cdev-name0 = "thermal-cpufreq-0";
+ trip1-cdev-name1 = "thermal-fan";
+
+ trip2-temp = <85000>;
+ trip2-type = "critical";
+ trip2-cdev-num = <0>;
+}
diff --git a/Documentation/devicetree/bindings/thermal/dove-thermal.txt b/Documentation/devicetree/bindings/thermal/dove-thermal.txt
new file mode 100644
index 000000000..6f474677d
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/dove-thermal.txt
@@ -0,0 +1,18 @@
+* Dove Thermal
+
+This driver is for Dove SoCs which contain a thermal sensor.
+
+Required properties:
+- compatible : "marvell,dove-thermal"
+- reg : Address range of the thermal registers
+
+The reg properties should contain two ranges. The first is for the
+three Thermal Manager registers, while the second range contains the
+Thermal Diode Control Registers.
+
+Example:
+
+ thermal@10078 {
+ compatible = "marvell,dove-thermal";
+ reg = <0xd001c 0x0c>, <0xd005c 0x08>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
new file mode 100644
index 000000000..695150a41
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
@@ -0,0 +1,121 @@
+* Exynos Thermal Management Unit (TMU)
+
+** Required properties:
+
+- compatible : One of the following:
+ "samsung,exynos3250-tmu"
+ "samsung,exynos4412-tmu"
+ "samsung,exynos4210-tmu"
+ "samsung,exynos5250-tmu"
+ "samsung,exynos5260-tmu"
+ "samsung,exynos5420-tmu" for TMU channel 0, 1 on Exynos5420
+ "samsung,exynos5420-tmu-ext-triminfo" for TMU channels 2, 3 and 4
+ Exynos5420 (Must pass triminfo base and triminfo clock)
+ "samsung,exynos5440-tmu"
+ "samsung,exynos7-tmu"
+- interrupt-parent : The phandle for the interrupt controller
+- reg : Address range of the thermal registers. For soc's which has multiple
+ instances of TMU and some registers are shared across all TMU's like
+ interrupt related then 2 set of register has to supplied. First set
+ belongs to register set of TMU instance and second set belongs to
+ registers shared with the TMU instance.
+
+ NOTE: On Exynos5420, the TRIMINFO register is misplaced for TMU
+ channels 2, 3 and 4
+ Use "samsung,exynos5420-tmu-ext-triminfo" in cases, there is a misplaced
+ register, also provide clock to access that base.
+
+ TRIMINFO at 0x1006c000 contains data for TMU channel 3
+ TRIMINFO at 0x100a0000 contains data for TMU channel 4
+ TRIMINFO at 0x10068000 contains data for TMU channel 2
+
+- interrupts : Should contain interrupt for thermal system
+- clocks : The main clocks for TMU device
+ -- 1. operational clock for TMU channel
+ -- 2. optional clock to access the shared registers of TMU channel
+ -- 3. optional special clock for functional operation
+- clock-names : Thermal system clock name
+ -- "tmu_apbif" operational clock for current TMU channel
+ -- "tmu_triminfo_apbif" clock to access the shared triminfo register
+ for current TMU channel
+ -- "tmu_sclk" clock for functional operation of the current TMU
+ channel
+- vtmu-supply: This entry is optional and provides the regulator node supplying
+ voltage to TMU. If needed this entry can be placed inside
+ board/platform specific dts file.
+Following properties are mandatory (depending on SoC):
+- samsung,tmu_gain: Gain value for internal TMU operation.
+- samsung,tmu_reference_voltage: Value of TMU IP block's reference voltage
+- samsung,tmu_noise_cancel_mode: Mode for noise cancellation
+- samsung,tmu_efuse_value: Default level of temperature - it is needed when
+ in factory fusing produced wrong value
+- samsung,tmu_min_efuse_value: Minimum temperature fused value
+- samsung,tmu_max_efuse_value: Maximum temperature fused value
+- samsung,tmu_first_point_trim: First point trimming value
+- samsung,tmu_second_point_trim: Second point trimming value
+- samsung,tmu_default_temp_offset: Default temperature offset
+- samsung,tmu_cal_type: Callibration type
+
+Example 1):
+
+ tmu@100C0000 {
+ compatible = "samsung,exynos4412-tmu";
+ interrupt-parent = <&combiner>;
+ reg = <0x100C0000 0x100>;
+ interrupts = <2 4>;
+ clocks = <&clock 383>;
+ clock-names = "tmu_apbif";
+ status = "disabled";
+ vtmu-supply = <&tmu_regulator_node>;
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+Example 2):
+
+ tmuctrl_0: tmuctrl@160118 {
+ compatible = "samsung,exynos5440-tmu";
+ reg = <0x160118 0x230>, <0x160368 0x10>;
+ interrupts = <0 58 0>;
+ clocks = <&clock 21>;
+ clock-names = "tmu_apbif";
+ #include "exynos5440-tmu-sensor-conf.dtsi"
+ };
+
+Example 3): (In case of Exynos5420 "with misplaced TRIMINFO register")
+ tmu_cpu2: tmu@10068000 {
+ compatible = "samsung,exynos5420-tmu-ext-triminfo";
+ reg = <0x10068000 0x100>, <0x1006c000 0x4>;
+ interrupts = <0 184 0>;
+ clocks = <&clock 318>, <&clock 318>;
+ clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+ tmu_cpu3: tmu@1006c000 {
+ compatible = "samsung,exynos5420-tmu-ext-triminfo";
+ reg = <0x1006c000 0x100>, <0x100a0000 0x4>;
+ interrupts = <0 185 0>;
+ clocks = <&clock 318>, <&clock 319>;
+ clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+ tmu_gpu: tmu@100a0000 {
+ compatible = "samsung,exynos5420-tmu-ext-triminfo";
+ reg = <0x100a0000 0x100>, <0x10068000 0x4>;
+ interrupts = <0 215 0>;
+ clocks = <&clock 319>, <&clock 318>;
+ clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+Note: For multi-instance tmu each instance should have an alias correctly
+numbered in "aliases" node.
+
+Example:
+
+aliases {
+ tmuctrl0 = &tmuctrl_0;
+ tmuctrl1 = &tmuctrl_1;
+ tmuctrl2 = &tmuctrl_2;
+};
diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
new file mode 100644
index 000000000..3c67bd50a
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
@@ -0,0 +1,24 @@
+* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs
+
+Required properties:
+- compatible : "fsl,imx6q-tempmon" for i.MX6Q, "fsl,imx6sx-tempmon" for i.MX6SX.
+ i.MX6SX has two more IRQs than i.MX6Q, one is IRQ_LOW and the other is IRQ_PANIC,
+ when temperature is below than low threshold, IRQ_LOW will be triggered, when temperature
+ is higher than panic threshold, system will auto reboot by SRC module.
+- fsl,tempmon : phandle pointer to system controller that contains TEMPMON
+ control registers, e.g. ANATOP on imx6q.
+- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON
+ calibration data, e.g. OCOTP on imx6q. The details about calibration data
+ can be found in SoC Reference Manual.
+
+Optional properties:
+- clocks : thermal sensor's clock source.
+
+Example:
+
+tempmon {
+ compatible = "fsl,imx6q-tempmon";
+ fsl,tempmon = <&anatop>;
+ fsl,tempmon-data = <&ocotp>;
+ clocks = <&clks 172>;
+};
diff --git a/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt b/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt
new file mode 100644
index 000000000..8c0f5eb86
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt
@@ -0,0 +1,15 @@
+* Kirkwood Thermal
+
+This version is for Kirkwood 88F8262 & 88F6283 SoCs. Other kirkwoods
+don't contain a thermal sensor.
+
+Required properties:
+- compatible : "marvell,kirkwood-thermal"
+- reg : Address range of the thermal registers
+
+Example:
+
+ thermal@10078 {
+ compatible = "marvell,kirkwood-thermal";
+ reg = <0x10078 0x4>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
new file mode 100644
index 000000000..332e625f6
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt
@@ -0,0 +1,38 @@
+* Renesas R-Car Thermal
+
+Required properties:
+- compatible : "renesas,thermal-<soctype>", "renesas,rcar-thermal"
+ as fallback.
+ Examples with soctypes are:
+ - "renesas,thermal-r8a73a4" (R-Mobile APE6)
+ - "renesas,thermal-r8a7779" (R-Car H1)
+ - "renesas,thermal-r8a7790" (R-Car H2)
+ - "renesas,thermal-r8a7791" (R-Car M2-W)
+ - "renesas,thermal-r8a7792" (R-Car V2H)
+ - "renesas,thermal-r8a7793" (R-Car M2-N)
+ - "renesas,thermal-r8a7794" (R-Car E2)
+- reg : Address range of the thermal registers.
+ The 1st reg will be recognized as common register
+ if it has "interrupts".
+
+Option properties:
+
+- interrupts : use interrupt
+
+Example (non interrupt support):
+
+thermal@ffc48000 {
+ compatible = "renesas,thermal-r8a7779", "renesas,rcar-thermal";
+ reg = <0xffc48000 0x38>;
+};
+
+Example (interrupt support):
+
+thermal@e61f0000 {
+ compatible = "renesas,thermal-r8a73a4", "renesas,rcar-thermal";
+ reg = <0xe61f0000 0x14
+ 0xe61f0100 0x38
+ 0xe61f0200 0x38
+ 0xe61f0300 0x38>;
+ interrupts = <0 69 IRQ_TYPE_LEVEL_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
new file mode 100644
index 000000000..ef802de49
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/rockchip-thermal.txt
@@ -0,0 +1,68 @@
+* Temperature Sensor ADC (TSADC) on rockchip SoCs
+
+Required properties:
+- compatible : "rockchip,rk3288-tsadc"
+- reg : physical base address of the controller and length of memory mapped
+ region.
+- interrupts : The interrupt number to the cpu. The interrupt specifier format
+ depends on the interrupt controller.
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Shall be "tsadc" for the converter-clock, and "apb_pclk" for
+ the peripheral clock.
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the name "tsadc-apb".
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description.
+- rockchip,hw-tshut-temp : The hardware-controlled shutdown temperature value.
+- rockchip,hw-tshut-mode : The hardware-controlled shutdown mode 0:CRU 1:GPIO.
+- rockchip,hw-tshut-polarity : The hardware-controlled active polarity 0:LOW
+ 1:HIGH.
+
+Exiample:
+tsadc: tsadc@ff280000 {
+ compatible = "rockchip,rk3288-tsadc";
+ reg = <0xff280000 0x100>;
+ interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru SCLK_TSADC>, <&cru PCLK_TSADC>;
+ clock-names = "tsadc", "apb_pclk";
+ resets = <&cru SRST_TSADC>;
+ reset-names = "tsadc-apb";
+ pinctrl-names = "default";
+ pinctrl-0 = <&otp_out>;
+ #thermal-sensor-cells = <1>;
+ rockchip,hw-tshut-temp = <95000>;
+ rockchip,hw-tshut-mode = <0>;
+ rockchip,hw-tshut-polarity = <0>;
+};
+
+Example: referring to thermal sensors:
+thermal-zones {
+ cpu_thermal: cpu_thermal {
+ polling-delay-passive = <1000>; /* milliseconds */
+ polling-delay = <5000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&tsadc 1>;
+
+ trips {
+ cpu_alert0: cpu_alert {
+ temperature = <70000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu_crit: cpu_crit {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert0>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/thermal/spear-thermal.txt b/Documentation/devicetree/bindings/thermal/spear-thermal.txt
new file mode 100644
index 000000000..93e3b67c1
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/spear-thermal.txt
@@ -0,0 +1,14 @@
+* SPEAr Thermal
+
+Required properties:
+- compatible : "st,thermal-spear1340"
+- reg : Address range of the thermal registers
+- st,thermal-flags: flags used to enable thermal sensor
+
+Example:
+
+ thermal@fc000000 {
+ compatible = "st,thermal-spear1340";
+ reg = <0xfc000000 0x1000>;
+ st,thermal-flags = <0x7000>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/st-thermal.txt b/Documentation/devicetree/bindings/thermal/st-thermal.txt
new file mode 100644
index 000000000..3b9251b4a
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/st-thermal.txt
@@ -0,0 +1,42 @@
+Binding for Thermal Sensor driver for STMicroelectronics STi series of SoCs.
+
+Required parameters:
+-------------------
+
+compatible : st,<SoC>-<module>-thermal; should be one of:
+ "st,stih415-sas-thermal",
+ "st,stih415-mpe-thermal",
+ "st,stih416-sas-thermal"
+ "st,stih416-mpe-thermal"
+ "st,stid127-thermal" or
+ "st,stih407-thermal"
+ according to the SoC type (stih415, stih416, stid127, stih407)
+ and module type (sas or mpe). On stid127 & stih407 there is only
+ one die/module, so there is no module type in the compatible
+ string.
+clock-names : Should be "thermal".
+ See: Documentation/devicetree/bindings/resource-names.txt
+clocks : Phandle of the clock used by the thermal sensor.
+ See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Optional parameters:
+-------------------
+
+reg : For non-sysconf based sensors, this should be the physical base
+ address and length of the sensor's registers.
+interrupts : Standard way to define interrupt number.
+ Interrupt is mandatory to be defined when compatible is
+ "stih416-mpe-thermal".
+ NB: For thermal sensor's for which no interrupt has been
+ defined, a polling delay of 1000ms will be used to read the
+ temperature from device.
+
+Example:
+
+ temp1@fdfe8000 {
+ compatible = "st,stih416-mpe-thermal";
+ reg = <0xfdfe8000 0x10>;
+ clock-names = "thermal";
+ clocks = <&clk_m_mpethsens>;
+ interrupts = <GIC_SPI 23 IRQ_TYPE_NONE>;
+ };
diff --git a/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt
new file mode 100644
index 000000000..6b68cd150
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/tegra-soctherm.txt
@@ -0,0 +1,55 @@
+Tegra124 SOCTHERM thermal management system
+
+The SOCTHERM IP block contains thermal sensors, support for polled
+or interrupt-based thermal monitoring, CPU and GPU throttling based
+on temperature trip points, and handling external overcurrent
+notifications. It is also used to manage emergency shutdown in an
+overheating situation.
+
+Required properties :
+- compatible : For Tegra124, must contain "nvidia,tegra124-soctherm".
+ For Tegra132, must contain "nvidia,tegra132-soctherm".
+ For Tegra210, must contain "nvidia,tegra210-soctherm".
+- reg : Should contain 1 entry:
+ - SOCTHERM register set
+- interrupts : Defines the interrupt used by SOCTHERM
+- clocks : Must contain an entry for each entry in clock-names.
+ See ../clocks/clock-bindings.txt for details.
+- clock-names : Must include the following entries:
+ - tsensor
+ - soctherm
+- resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names : Must include the following entries:
+ - soctherm
+- #thermal-sensor-cells : Should be 1. See ./thermal.txt for a description
+ of this property. See <dt-bindings/thermal/tegra124-soctherm.h> for a
+ list of valid values when referring to thermal sensors.
+
+
+Example :
+
+ soctherm@0,700e2000 {
+ compatible = "nvidia,tegra124-soctherm";
+ reg = <0x0 0x700e2000 0x0 0x1000>;
+ interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&tegra_car TEGRA124_CLK_TSENSOR>,
+ <&tegra_car TEGRA124_CLK_SOC_THERM>;
+ clock-names = "tsensor", "soctherm";
+ resets = <&tegra_car 78>;
+ reset-names = "soctherm";
+
+ #thermal-sensor-cells = <1>;
+ };
+
+Example: referring to thermal sensors :
+
+ thermal-zones {
+ cpu {
+ polling-delay-passive = <1000>;
+ polling-delay = <1000>;
+
+ thermal-sensors =
+ <&soctherm TEGRA124_SOCTHERM_SENSOR_CPU>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
new file mode 100644
index 000000000..29fe0bfae
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -0,0 +1,595 @@
+* Thermal Framework Device Tree descriptor
+
+This file describes a generic binding to provide a way of
+defining hardware thermal structure using device tree.
+A thermal structure includes thermal zones and their components,
+such as trip points, polling intervals, sensors and cooling devices
+binding descriptors.
+
+The target of device tree thermal descriptors is to describe only
+the hardware thermal aspects. The thermal device tree bindings are
+not about how the system must control or which algorithm or policy
+must be taken in place.
+
+There are five types of nodes involved to describe thermal bindings:
+- thermal sensors: devices which may be used to take temperature
+ measurements.
+- cooling devices: devices which may be used to dissipate heat.
+- trip points: describe key temperatures at which cooling is recommended. The
+ set of points should be chosen based on hardware limits.
+- cooling maps: used to describe links between trip points and cooling devices;
+- thermal zones: used to describe thermal data within the hardware;
+
+The following is a description of each of these node types.
+
+* Thermal sensor devices
+
+Thermal sensor devices are nodes providing temperature sensing capabilities on
+thermal zones. Typical devices are I2C ADC converters and bandgaps. These are
+nodes providing temperature data to thermal zones. Thermal sensor devices may
+control one or more internal sensors.
+
+Required property:
+- #thermal-sensor-cells: Used to provide sensor device specific information
+ Type: unsigned while referring to it. Typically 0 on thermal sensor
+ Size: one cell nodes with only one sensor, and at least 1 on nodes
+ with several internal sensors, in order
+ to identify uniquely the sensor instances within
+ the IC. See thermal zone binding for more details
+ on how consumers refer to sensor devices.
+
+* Cooling device nodes
+
+Cooling devices are nodes providing control on power dissipation. There
+are essentially two ways to provide control on power dissipation. First
+is by means of regulating device performance, which is known as passive
+cooling. A typical passive cooling is a CPU that has dynamic voltage and
+frequency scaling (DVFS), and uses lower frequencies as cooling states.
+Second is by means of activating devices in order to remove
+the dissipated heat, which is known as active cooling, e.g. regulating
+fan speeds. In both cases, cooling devices shall have a way to determine
+the state of cooling in which the device is.
+
+Any cooling device has a range of cooling states (i.e. different levels
+of heat dissipation). For example a fan's cooling states correspond to
+the different fan speeds possible. Cooling states are referred to by
+single unsigned integers, where larger numbers mean greater heat
+dissipation. The precise set of cooling states associated with a device
+(as referred to be the cooling-min-state and cooling-max-state
+properties) should be defined in a particular device's binding.
+For more examples of cooling devices, refer to the example sections below.
+
+Required properties:
+- cooling-min-state: An integer indicating the smallest
+ Type: unsigned cooling state accepted. Typically 0.
+ Size: one cell
+
+- cooling-max-state: An integer indicating the largest
+ Type: unsigned cooling state accepted.
+ Size: one cell
+
+- #cooling-cells: Used to provide cooling device specific information
+ Type: unsigned while referring to it. Must be at least 2, in order
+ Size: one cell to specify minimum and maximum cooling state used
+ in the reference. The first cell is the minimum
+ cooling state requested and the second cell is
+ the maximum cooling state requested in the reference.
+ See Cooling device maps section below for more details
+ on how consumers refer to cooling devices.
+
+* Trip points
+
+The trip node is a node to describe a point in the temperature domain
+in which the system takes an action. This node describes just the point,
+not the action.
+
+Required properties:
+- temperature: An integer indicating the trip temperature level,
+ Type: signed in millicelsius.
+ Size: one cell
+
+- hysteresis: A low hysteresis value on temperature property (above).
+ Type: unsigned This is a relative value, in millicelsius.
+ Size: one cell
+
+- type: a string containing the trip type. Expected values are:
+ "active": A trip point to enable active cooling
+ "passive": A trip point to enable passive cooling
+ "hot": A trip point to notify emergency
+ "critical": Hardware not reliable.
+ Type: string
+
+* Cooling device maps
+
+The cooling device maps node is a node to describe how cooling devices
+get assigned to trip points of the zone. The cooling devices are expected
+to be loaded in the target system.
+
+Required properties:
+- cooling-device: A phandle of a cooling device with its specifier,
+ Type: phandle + referring to which cooling device is used in this
+ cooling specifier binding. In the cooling specifier, the first cell
+ is the minimum cooling state and the second cell
+ is the maximum cooling state used in this map.
+- trip: A phandle of a trip point node within the same thermal
+ Type: phandle of zone.
+ trip point node
+
+Optional property:
+- contribution: The cooling contribution to the thermal zone of the
+ Type: unsigned referred cooling device at the referred trip point.
+ Size: one cell The contribution is a ratio of the sum
+ of all cooling contributions within a thermal zone.
+
+Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
+limit specifier means:
+(i) - minimum state allowed for minimum cooling state used in the reference.
+(ii) - maximum state allowed for maximum cooling state used in the reference.
+Refer to include/dt-bindings/thermal/thermal.h for definition of this constant.
+
+* Thermal zone nodes
+
+The thermal zone node is the node containing all the required info
+for describing a thermal zone, including its cooling device bindings. The
+thermal zone node must contain, apart from its own properties, one sub-node
+containing trip nodes and one sub-node containing all the zone cooling maps.
+
+Required properties:
+- polling-delay: The maximum number of milliseconds to wait between polls
+ Type: unsigned when checking this thermal zone.
+ Size: one cell
+
+- polling-delay-passive: The maximum number of milliseconds to wait
+ Type: unsigned between polls when performing passive cooling.
+ Size: one cell
+
+- thermal-sensors: A list of thermal sensor phandles and sensor specifier
+ Type: list of used while monitoring the thermal zone.
+ phandles + sensor
+ specifier
+
+- trips: A sub-node which is a container of only trip point nodes
+ Type: sub-node required to describe the thermal zone.
+
+- cooling-maps: A sub-node which is a container of only cooling device
+ Type: sub-node map nodes, used to describe the relation between trips
+ and cooling devices.
+
+Optional property:
+- coefficients: An array of integers (one signed cell) containing
+ Type: array coefficients to compose a linear relation between
+ Elem size: one cell the sensors listed in the thermal-sensors property.
+ Elem type: signed Coefficients defaults to 1, in case this property
+ is not specified. A simple linear polynomial is used:
+ Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+
+ The coefficients are ordered and they match with sensors
+ by means of sensor ID. Additional coefficients are
+ interpreted as constant offset.
+
+Note: The delay properties are bound to the maximum dT/dt (temperature
+derivative over time) in two situations for a thermal zone:
+(i) - when passive cooling is activated (polling-delay-passive); and
+(ii) - when the zone just needs to be monitored (polling-delay) or
+when active cooling is activated.
+
+The maximum dT/dt is highly bound to hardware power consumption and dissipation
+capability. The delays should be chosen to account for said max dT/dt,
+such that a device does not cross several trip boundaries unexpectedly
+between polls. Choosing the right polling delays shall avoid having the
+device in temperature ranges that may damage the silicon structures and
+reduce silicon lifetime.
+
+* The thermal-zones node
+
+The "thermal-zones" node is a container for all thermal zone nodes. It shall
+contain only sub-nodes describing thermal zones as in the section
+"Thermal zone nodes". The "thermal-zones" node appears under "/".
+
+* Examples
+
+Below are several examples on how to use thermal data descriptors
+using device tree bindings:
+
+(a) - CPU thermal zone
+
+The CPU thermal zone example below describes how to setup one thermal zone
+using one single sensor as temperature source and many cooling devices and
+power dissipation control sources.
+
+#include <dt-bindings/thermal/thermal.h>
+
+cpus {
+ /*
+ * Here is an example of describing a cooling device for a DVFS
+ * capable CPU. The CPU node describes its four OPPs.
+ * The cooling states possible are 0..3, and they are
+ * used as OPP indexes. The minimum cooling state is 0, which means
+ * all four OPPs can be available to the system. The maximum
+ * cooling state is 3, which means only the lowest OPPs (198MHz@0.85V)
+ * can be available in the system.
+ */
+ cpu0: cpu@0 {
+ ...
+ operating-points = <
+ /* kHz uV */
+ 970000 1200000
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+ cooling-min-state = <0>;
+ cooling-max-state = <3>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
+ ...
+};
+
+&i2c1 {
+ ...
+ /*
+ * A simple fan controller which supports 10 speeds of operation
+ * (represented as 0-9).
+ */
+ fan0: fan@0x48 {
+ ...
+ cooling-min-state = <0>;
+ cooling-max-state = <9>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
+};
+
+ocp {
+ ...
+ /*
+ * A simple IC with a single bandgap temperature sensor.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ thermal-sensors = <&bandgap0>;
+
+ trips {
+ cpu_alert0: cpu-alert0 {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "active";
+ };
+ cpu_alert1: cpu-alert1 {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu_crit: cpu-crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert0>;
+ cooling-device = <&fan0 THERMAL_NO_LIMIT 4>;
+ };
+ map1 {
+ trip = <&cpu_alert1>;
+ cooling-device = <&fan0 5 THERMAL_NO_LIMIT>;
+ };
+ map2 {
+ trip = <&cpu_alert1>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
+ };
+ };
+ };
+};
+
+In the example above, the ADC sensor (bandgap0) at address 0x0000ED00 is
+used to monitor the zone 'cpu-thermal' using its sole sensor. A fan
+device (fan0) is controlled via I2C bus 1, at address 0x48, and has ten
+different cooling states 0-9. It is used to remove the heat out of
+the thermal zone 'cpu-thermal' using its cooling states
+from its minimum to 4, when it reaches trip point 'cpu_alert0'
+at 90C, as an example of active cooling. The same cooling device is used at
+'cpu_alert1', but from 5 to its maximum state. The cpu@0 device is also
+linked to the same thermal zone, 'cpu-thermal', as a passive cooling device,
+using all its cooling states at trip point 'cpu_alert1',
+which is a trip point at 100C. On the thermal zone 'cpu-thermal', at the
+temperature of 125C, represented by the trip point 'cpu_crit', the silicon
+is not reliable anymore.
+
+(b) - IC with several internal sensors
+
+The example below describes how to deploy several thermal zones based off a
+single sensor IC, assuming it has several internal sensors. This is a common
+case on SoC designs with several internal IPs that may need different thermal
+requirements, and thus may have their own sensor to monitor or detect internal
+hotspots in their silicon.
+
+#include <dt-bindings/thermal/thermal.h>
+
+ocp {
+ ...
+ /*
+ * A simple IC with several bandgap temperature sensors.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <1>;
+ };
+};
+
+thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 0>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ cpu_alert: cpu-alert {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu_crit: cpu-crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+
+ gpu_thermal: gpu-thermal {
+ polling-delay-passive = <120>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 1>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ gpu_alert: gpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ gpu_crit: gpu-crit {
+ temperature = <105000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+
+ dsp_thermal: dsp-thermal {
+ polling-delay-passive = <50>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 2>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ dsp_alert: dsp-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ dsp_crit: gpu-crit {
+ temperature = <135000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+};
+
+In the example above, there is one bandgap IC which has the capability to
+monitor three sensors. The hardware has been designed so that sensors are
+placed on different places in the DIE to monitor different temperature
+hotspots: one for CPU thermal zone, one for GPU thermal zone and the
+other to monitor a DSP thermal zone.
+
+Thus, there is a need to assign each sensor provided by the bandgap IC
+to different thermal zones. This is achieved by means of using the
+#thermal-sensor-cells property and using the first cell of the sensor
+specifier as sensor ID. In the example, then, <bandgap 0> is used to
+monitor CPU thermal zone, <bandgap 1> is used to monitor GPU thermal
+zone and <bandgap 2> is used to monitor DSP thermal zone. Each zone
+may be uncorrelated, having its own dT/dt requirements, trips
+and cooling maps.
+
+
+(c) - Several sensors within one single thermal zone
+
+The example below illustrates how to use more than one sensor within
+one thermal zone.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+ ...
+ /*
+ * A simple IC with a single temperature sensor.
+ */
+ adc: sensor@0x49 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+ocp {
+ ...
+ /*
+ * A simple IC with a single bandgap temperature sensor.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ thermal-sensors = <&bandgap0>, /* cpu */
+ <&adc>; /* pcb north */
+
+ /* hotspot = 100 * bandgap - 120 * adc + 484 */
+ coefficients = <100 -120 484>;
+
+ trips {
+ ...
+ };
+
+ cooling-maps {
+ ...
+ };
+ };
+};
+
+In some cases, there is a need to use more than one sensor to extrapolate
+a thermal hotspot in the silicon. The above example illustrates this situation.
+For instance, it may be the case that a sensor external to CPU IP may be placed
+close to CPU hotspot and together with internal CPU sensor, it is used
+to determine the hotspot. Assuming this is the case for the above example,
+the hypothetical extrapolation rule would be:
+ hotspot = 100 * bandgap - 120 * adc + 484
+
+In other context, the same idea can be used to add fixed offset. For instance,
+consider the hotspot extrapolation rule below:
+ hotspot = 1 * adc + 6000
+
+In the above equation, the hotspot is always 6C higher than what is read
+from the ADC sensor. The binding would be then:
+ thermal-sensors = <&adc>;
+
+ /* hotspot = 1 * adc + 6000 */
+ coefficients = <1 6000>;
+
+(d) - Board thermal
+
+The board thermal example below illustrates how to setup one thermal zone
+with many sensors and many cooling devices.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+ ...
+ /*
+ * An IC with several temperature sensor.
+ */
+ adc_dummy: sensor@0x50 {
+ ...
+ #thermal-sensor-cells = <1>; /* sensor internal ID */
+ };
+};
+
+thermal-zones {
+ batt-thermal {
+ polling-delay-passive = <500>; /* milliseconds */
+ polling-delay = <2500>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&adc_dummy 4>;
+
+ trips {
+ ...
+ };
+
+ cooling-maps {
+ ...
+ };
+ };
+
+ board_thermal: board-thermal {
+ polling-delay-passive = <1000>; /* milliseconds */
+ polling-delay = <2500>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&adc_dummy 0>, /* pcb top edge */
+ <&adc_dummy 1>, /* lcd */
+ <&adc_dummy 2>; /* back cover */
+ /*
+ * An array of coefficients describing the sensor
+ * linear relation. E.g.:
+ * z = c1*x1 + c2*x2 + c3*x3
+ */
+ coefficients = <1200 -345 890>;
+
+ trips {
+ /* Trips are based on resulting linear equation */
+ cpu_trip: cpu-trip {
+ temperature = <60000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ gpu_trip: gpu-trip {
+ temperature = <55000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ }
+ lcd_trip: lcp-trip {
+ temperature = <53000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ crit_trip: crit-trip {
+ temperature = <68000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu_trip>;
+ cooling-device = <&cpu0 0 2>;
+ contribution = <55>;
+ };
+ map1 {
+ trip = <&gpu_trip>;
+ cooling-device = <&gpu0 0 2>;
+ contribution = <20>;
+ };
+ map2 {
+ trip = <&lcd_trip>;
+ cooling-device = <&lcd0 5 10>;
+ contribution = <15>;
+ };
+ };
+ };
+};
+
+The above example is a mix of previous examples, a sensor IP with several internal
+sensors used to monitor different zones, one of them is composed by several sensors and
+with different cooling devices.
diff --git a/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt b/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt
new file mode 100644
index 000000000..0c9222d27
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/ti_soc_thermal.txt
@@ -0,0 +1,74 @@
+* Texas Instrument OMAP SCM bandgap bindings
+
+In the System Control Module, OMAP supplies a voltage reference
+and a temperature sensor feature that are gathered in the band
+gap voltage and temperature sensor (VBGAPTS) module. The band
+gap provides current and voltage reference for its internal
+circuits and other analog IP blocks. The analog-to-digital
+converter (ADC) produces an output value that is proportional
+to the silicon temperature.
+
+Required properties:
+- compatible : Should be:
+ - "ti,omap4430-bandgap" : for OMAP4430 bandgap
+ - "ti,omap4460-bandgap" : for OMAP4460 bandgap
+ - "ti,omap4470-bandgap" : for OMAP4470 bandgap
+ - "ti,omap5430-bandgap" : for OMAP5430 bandgap
+- interrupts : this entry should indicate which interrupt line
+the talert signal is routed to;
+Specific:
+- gpios : this entry should be used to inform which GPIO
+line the tshut signal is routed to. The informed GPIO will
+be treated as an IRQ;
+- regs : this entry must also be specified and it is specific
+to each bandgap version, because the mapping may change from
+soc to soc, apart of depending on available features.
+
+Example:
+OMAP4430:
+bandgap {
+ reg = <0x4a002260 0x4 0x4a00232C 0x4>;
+ compatible = "ti,omap4430-bandgap";
+};
+
+OMAP4460:
+bandgap {
+ reg = <0x4a002260 0x4
+ 0x4a00232C 0x4
+ 0x4a002378 0x18>;
+ compatible = "ti,omap4460-bandgap";
+ interrupts = <0 126 4>; /* talert */
+ gpios = <&gpio3 22 0>; /* tshut */
+};
+
+OMAP4470:
+bandgap {
+ reg = <0x4a002260 0x4
+ 0x4a00232C 0x4
+ 0x4a002378 0x18>;
+ compatible = "ti,omap4470-bandgap";
+ interrupts = <0 126 4>; /* talert */
+ gpios = <&gpio3 22 0>; /* tshut */
+};
+
+OMAP5430:
+bandgap {
+ reg = <0x4a0021e0 0xc
+ 0x4a00232c 0xc
+ 0x4a002380 0x2c
+ 0x4a0023C0 0x3c>;
+ compatible = "ti,omap5430-bandgap";
+ interrupts = <0 126 4>; /* talert */
+};
+
+DRA752:
+bandgap {
+ reg = <0x4a0021e0 0xc
+ 0x4a00232c 0xc
+ 0x4a002380 0x2c
+ 0x4a0023C0 0x3c
+ 0x4a002564 0x8
+ 0x4a002574 0x50>;
+ compatible = "ti,dra752-bandgap";
+ interrupts = <0 126 4>; /* talert */
+};
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt
new file mode 100644
index 000000000..5c2e23574
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-timer.txt
@@ -0,0 +1,17 @@
+Allwinner A1X SoCs Timer Controller
+
+Required properties:
+
+- compatible : should be "allwinner,sun4i-a10-timer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : The interrupt of the first timer
+- clocks: phandle to the source clock (usually a 24 MHz fixed clock)
+
+Example:
+
+timer {
+ compatible = "allwinner,sun4i-a10-timer";
+ reg = <0x01c20c00 0x400>;
+ interrupts = <22>;
+ clocks = <&osc>;
+};
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
new file mode 100644
index 000000000..27cfc7d7c
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.txt
@@ -0,0 +1,26 @@
+Allwinner SoCs High Speed Timer Controller
+
+Required properties:
+
+- compatible : should be "allwinner,sun5i-a13-hstimer" or
+ "allwinner,sun7i-a20-hstimer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : The interrupts of these timers (2 for the sun5i IP, 4 for the sun7i
+ one)
+- clocks: phandle to the source clock (usually the AHB clock)
+
+Optionnal properties:
+- resets: phandle to a reset controller asserting the timer
+
+Example:
+
+timer@01c60000 {
+ compatible = "allwinner,sun7i-a20-hstimer";
+ reg = <0x01c60000 0x1000>;
+ interrupts = <0 51 1>,
+ <0 52 1>,
+ <0 53 1>,
+ <0 54 1>;
+ clocks = <&ahb1_gates 19>;
+ resets = <&ahb1rst 19>;
+};
diff --git a/Documentation/devicetree/bindings/timer/amlogic,meson6-timer.txt b/Documentation/devicetree/bindings/timer/amlogic,meson6-timer.txt
new file mode 100644
index 000000000..a092053f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/amlogic,meson6-timer.txt
@@ -0,0 +1,15 @@
+Amlogic Meson6 SoCs Timer Controller
+
+Required properties:
+
+- compatible : should be "amlogic,meson6-timer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : The interrupt of the first timer
+
+Example:
+
+timer@c1109940 {
+ compatible = "amlogic,meson6-timer";
+ reg = <0xc1109940 0x14>;
+ interrupts = <0 10 1>;
+};
diff --git a/Documentation/devicetree/bindings/timer/arm,sp804.txt b/Documentation/devicetree/bindings/timer/arm,sp804.txt
new file mode 100644
index 000000000..5cd8eee74
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/arm,sp804.txt
@@ -0,0 +1,29 @@
+ARM sp804 Dual Timers
+---------------------------------------
+
+Required properties:
+- compatible: Should be "arm,sp804" & "arm,primecell"
+- interrupts: Should contain the list of Dual Timer interrupts. This is the
+ interrupt for timer 1 and timer 2. In the case of a single entry, it is
+ the combined interrupt or if "arm,sp804-has-irq" is present that
+ specifies which timer interrupt is connected.
+- reg: Should contain location and length for dual timer register.
+- clocks: clocks driving the dual timer hardware. This list should be 1 or 3
+ clocks. With 3 clocks, the order is timer0 clock, timer1 clock,
+ apb_pclk. A single clock can also be specified if the same clock is
+ used for all clock inputs.
+
+Optional properties:
+- arm,sp804-has-irq = <#>: In the case of only 1 timer irq line connected, this
+ specifies if the irq connection is for timer 1 or timer 2. A value of 1
+ or 2 should be used.
+
+Example:
+
+ timer0: timer@fc800000 {
+ compatible = "arm,sp804", "arm,primecell";
+ reg = <0xfc800000 0x1000>;
+ interrupts = <0 0 4>, <0 1 4>;
+ clocks = <&timclk1 &timclk2 &pclk>;
+ clock-names = "timer1", "timer2", "apb_pclk";
+ };
diff --git a/Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.txt b/Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.txt
new file mode 100644
index 000000000..844bd5fbd
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/brcm,bcm2835-system-timer.txt
@@ -0,0 +1,22 @@
+BCM2835 System Timer
+
+The System Timer peripheral provides four 32-bit timer channels and a
+single 64-bit free running counter. Each channel has an output compare
+register, which is compared against the 32 least significant bits of the
+free running counter values, and generates an interrupt.
+
+Required properties:
+
+- compatible : should be "brcm,bcm2835-system-timer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A list of 4 interrupt sinks; one per timer channel.
+- clock-frequency : The frequency of the clock that drives the counter, in Hz.
+
+Example:
+
+timer {
+ compatible = "brcm,bcm2835-system-timer";
+ reg = <0x7e003000 0x1000>;
+ interrupts = <1 0>, <1 1>, <1 2>, <1 3>;
+ clock-frequency = <1000000>;
+};
diff --git a/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt b/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt
new file mode 100644
index 000000000..39adf54b4
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/brcm,kona-timer.txt
@@ -0,0 +1,25 @@
+Broadcom Kona Family timer
+-----------------------------------------------------
+This timer is used in the following Broadcom SoCs:
+ BCM11130, BCM11140, BCM11351, BCM28145, BCM28155
+
+Required properties:
+- compatible : "brcm,kona-timer"
+- DEPRECATED: compatible : "bcm,kona-timer"
+- reg : Register range for the timer
+- interrupts : interrupt for the timer
+- clocks: phandle + clock specifier pair of the external clock
+- clock-frequency: frequency that the clock operates
+
+Only one of clocks or clock-frequency should be specified.
+
+Refer to clocks/clock-bindings.txt for generic clock consumer properties.
+
+Example:
+ timer@35006000 {
+ compatible = "brcm,kona-timer";
+ reg = <0x35006000 0x1000>;
+ interrupts = <0x0 7 0x4>;
+ clocks = <&hub_timer_clk>;
+ };
+
diff --git a/Documentation/devicetree/bindings/timer/cadence,ttc-timer.txt b/Documentation/devicetree/bindings/timer/cadence,ttc-timer.txt
new file mode 100644
index 000000000..993695c65
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/cadence,ttc-timer.txt
@@ -0,0 +1,17 @@
+Cadence TTC - Triple Timer Counter
+
+Required properties:
+- compatible : Should be "cdns,ttc".
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A list of 3 interrupts; one per timer channel.
+- clocks: phandle to the source clock
+
+Example:
+
+ttc0: ttc0@f8001000 {
+ interrupt-parent = <&intc>;
+ interrupts = < 0 10 4 0 11 4 0 12 4 >;
+ compatible = "cdns,ttc";
+ reg = <0xF8001000 0x1000>;
+ clocks = <&cpu_clk 3>;
+};
diff --git a/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt b/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt
new file mode 100644
index 000000000..cd55b5254
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/cirrus,clps711x-timer.txt
@@ -0,0 +1,29 @@
+* Cirrus Logic CLPS711X Timer Counter
+
+Required properties:
+- compatible: Shall contain "cirrus,clps711x-timer".
+- reg : Address and length of the register set.
+- interrupts: The interrupt number of the timer.
+- clocks : phandle of timer reference clock.
+
+Note: Each timer should have an alias correctly numbered in "aliases" node.
+
+Example:
+ aliases {
+ timer0 = &timer1;
+ timer1 = &timer2;
+ };
+
+ timer1: timer@80000300 {
+ compatible = "cirrus,ep7312-timer", "cirrus,clps711x-timer";
+ reg = <0x80000300 0x4>;
+ interrupts = <8>;
+ clocks = <&clks 5>;
+ };
+
+ timer2: timer@80000340 {
+ compatible = "cirrus,ep7312-timer", "cirrus,clps711x-timer";
+ reg = <0x80000340 0x4>;
+ interrupts = <9>;
+ clocks = <&clks 6>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/digicolor-timer.txt b/Documentation/devicetree/bindings/timer/digicolor-timer.txt
new file mode 100644
index 000000000..d1b659bbc
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/digicolor-timer.txt
@@ -0,0 +1,18 @@
+Conexant Digicolor SoCs Timer Controller
+
+Required properties:
+
+- compatible : should be "cnxt,cx92755-timer"
+- reg : Specifies base physical address and size of the "Agent Communication"
+ timer registers
+- interrupts : Contains 8 interrupts, one for each timer
+- clocks: phandle to the main clock
+
+Example:
+
+ timer@f0000fc0 {
+ compatible = "cnxt,cx92755-timer";
+ reg = <0xf0000fc0 0x40>;
+ interrupts = <19>, <31>, <34>, <35>, <52>, <53>, <54>, <55>;
+ clocks = <&main_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/energymicro,efm32-timer.txt b/Documentation/devicetree/bindings/timer/energymicro,efm32-timer.txt
new file mode 100644
index 000000000..e502c11b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/energymicro,efm32-timer.txt
@@ -0,0 +1,23 @@
+* EFM32 timer hardware
+
+The efm32 Giant Gecko SoCs come with four 16 bit timers. Two counters can be
+connected to form a 32 bit counter. Each timer has three Compare/Capture
+channels and can be used as PWM or Quadrature Decoder. Available clock sources
+are the cpu's HFPERCLK (with a 10-bit prescaler) or an external pin.
+
+Required properties:
+- compatible : Should be "energymicro,efm32-timer"
+- reg : Address and length of the register set
+- clocks : Should contain a reference to the HFPERCLK
+
+Optional properties:
+- interrupts : Reference to the timer interrupt
+
+Example:
+
+timer@40010c00 {
+ compatible = "energymicro,efm32-timer";
+ reg = <0x40010c00 0x400>;
+ interrupts = <14>;
+ clocks = <&cmu clk_HFPERCLKTIMER3>;
+};
diff --git a/Documentation/devicetree/bindings/timer/fsl,ftm-timer.txt b/Documentation/devicetree/bindings/timer/fsl,ftm-timer.txt
new file mode 100644
index 000000000..aa8c40230
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/fsl,ftm-timer.txt
@@ -0,0 +1,31 @@
+Freescale FlexTimer Module (FTM) Timer
+
+Required properties:
+
+- compatible : should be "fsl,ftm-timer"
+- reg : Specifies base physical address and size of the register sets for the
+ clock event device and clock source device.
+- interrupts : Should be the clock event device interrupt.
+- clocks : The clocks provided by the SoC to drive the timer, must contain an
+ entry for each entry in clock-names.
+- clock-names : Must include the following entries:
+ o "ftm-evt"
+ o "ftm-src"
+ o "ftm-evt-counter-en"
+ o "ftm-src-counter-en"
+- big-endian: One boolean property, the big endian mode will be in use if it is
+ present, or the little endian mode will be in use for all the device registers.
+
+Example:
+ftm: ftm@400b8000 {
+ compatible = "fsl,ftm-timer";
+ reg = <0x400b8000 0x1000 0x400b9000 0x1000>;
+ interrupts = <0 44 IRQ_TYPE_LEVEL_HIGH>;
+ clock-names = "ftm-evt", "ftm-src",
+ "ftm-evt-counter-en", "ftm-src-counter-en";
+ clocks = <&clks VF610_CLK_FTM2>,
+ <&clks VF610_CLK_FTM3>,
+ <&clks VF610_CLK_FTM2_EXT_FIX_EN>,
+ <&clks VF610_CLK_FTM3_EXT_FIX_EN>;
+ big-endian;
+};
diff --git a/Documentation/devicetree/bindings/timer/fsl,imxgpt.txt b/Documentation/devicetree/bindings/timer/fsl,imxgpt.txt
new file mode 100644
index 000000000..9809b11f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/fsl,imxgpt.txt
@@ -0,0 +1,18 @@
+Freescale i.MX General Purpose Timer (GPT)
+
+Required properties:
+
+- compatible : should be "fsl,<soc>-gpt"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A list of 4 interrupts; one per timer channel.
+- clocks : The clocks provided by the SoC to drive the timer.
+
+Example:
+
+gpt1: timer@10003000 {
+ compatible = "fsl,imx27-gpt", "fsl,imx1-gpt";
+ reg = <0x10003000 0x1000>;
+ interrupts = <26>;
+ clocks = <&clks 46>, <&clks 61>;
+ clock-names = "ipg", "per";
+};
diff --git a/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt
new file mode 100644
index 000000000..b2d07ad90
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/lsi,zevio-timer.txt
@@ -0,0 +1,33 @@
+TI-NSPIRE timer
+
+Required properties:
+
+- compatible : should be "lsi,zevio-timer".
+- reg : The physical base address and size of the timer (always first).
+- clocks: phandle to the source clock.
+
+Optional properties:
+
+- interrupts : The interrupt number of the first timer.
+- reg : The interrupt acknowledgement registers
+ (always after timer base address)
+
+If any of the optional properties are not given, the timer is added as a
+clock-source only.
+
+Example:
+
+timer {
+ compatible = "lsi,zevio-timer";
+ reg = <0x900D0000 0x1000>, <0x900A0020 0x8>;
+ interrupts = <19>;
+ clocks = <&timer_clk>;
+};
+
+Example (no clock-events):
+
+timer {
+ compatible = "lsi,zevio-timer";
+ reg = <0x900D0000 0x1000>;
+ clocks = <&timer_clk>;
+};
diff --git a/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
new file mode 100644
index 000000000..e9c78ce88
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
@@ -0,0 +1,44 @@
+Marvell Armada 370 and Armada XP Timers
+---------------------------------------
+
+Required properties:
+- compatible: Should be one of the following
+ "marvell,armada-370-timer",
+ "marvell,armada-375-timer",
+ "marvell,armada-xp-timer".
+- interrupts: Should contain the list of Global Timer interrupts and
+ then local timer interrupts
+- reg: Should contain location and length for timers register. First
+ pair for the Global Timer registers, second pair for the
+ local/private timers.
+
+Clocks required for compatible = "marvell,armada-370-timer":
+- clocks : Must contain a single entry describing the clock input
+
+Clocks required for compatibles = "marvell,armada-xp-timer",
+ "marvell,armada-375-timer":
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Must include the following entries:
+ "nbclk" (L2/coherency fabric clock),
+ "fixed" (Reference 25 MHz fixed-clock).
+
+Examples:
+
+- Armada 370:
+
+ timer {
+ compatible = "marvell,armada-370-timer";
+ reg = <0x20300 0x30>, <0x21040 0x30>;
+ interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
+ clocks = <&coreclk 2>;
+ };
+
+- Armada XP:
+
+ timer {
+ compatible = "marvell,armada-xp-timer";
+ reg = <0x20300 0x30>, <0x21040 0x30>;
+ interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
+ clocks = <&coreclk 2>, <&refclk>;
+ clock-names = "nbclk", "fixed";
+ };
diff --git a/Documentation/devicetree/bindings/timer/marvell,orion-timer.txt b/Documentation/devicetree/bindings/timer/marvell,orion-timer.txt
new file mode 100644
index 000000000..62bb8260c
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/marvell,orion-timer.txt
@@ -0,0 +1,17 @@
+Marvell Orion SoC timer
+
+Required properties:
+- compatible: shall be "marvell,orion-timer"
+- reg: base address of the timer register starting with TIMERS CONTROL register
+- interrupt-parent: phandle of the bridge interrupt controller
+- interrupts: should contain the interrupts for Timer0 and Timer1
+- clocks: phandle of timer reference clock (tclk)
+
+Example:
+ timer: timer {
+ compatible = "marvell,orion-timer";
+ reg = <0x20300 0x20>;
+ interrupt-parent = <&bridge_intc>;
+ interrupts = <1>, <2>;
+ clocks = <&core_clk 0>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
new file mode 100644
index 000000000..7c4408ff4
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/mediatek,mtk-timer.txt
@@ -0,0 +1,17 @@
+Mediatek MT6577, MT6572 and MT6589 Timers
+---------------------------------------
+
+Required properties:
+- compatible: Should be "mediatek,mt6577-timer"
+- reg: Should contain location and length for timers register.
+- clocks: Clocks driving the timer hardware. This list should include two
+ clocks. The order is system clock and as second clock the RTC clock.
+
+Examples:
+
+ timer@10008000 {
+ compatible = "mediatek,mt6577-timer";
+ reg = <0x10008000 0x80>;
+ interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_LOW>;
+ clocks = <&system_clk>, <&rtc_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt b/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
new file mode 100644
index 000000000..da2d510ca
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/moxa,moxart-timer.txt
@@ -0,0 +1,17 @@
+MOXA ART timer
+
+Required properties:
+
+- compatible : Must be "moxa,moxart-timer"
+- reg : Should contain registers location and length
+- interrupts : Should contain the timer interrupt number
+- clocks : Should contain phandle for the clock that drives the counter
+
+Example:
+
+ timer: timer@98400000 {
+ compatible = "moxa,moxart-timer";
+ reg = <0x98400000 0x42>;
+ interrupts = <19 1>;
+ clocks = <&coreclk>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
new file mode 100644
index 000000000..4a864bd10
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nvidia,tegra20-timer.txt
@@ -0,0 +1,24 @@
+NVIDIA Tegra20 timer
+
+The Tegra20 timer provides four 29-bit timer channels and a single 32-bit free
+running counter. The first two channels may also trigger a watchdog reset.
+
+Required properties:
+
+- compatible : should be "nvidia,tegra20-timer".
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A list of 4 interrupts; one per timer channel.
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+
+Example:
+
+timer {
+ compatible = "nvidia,tegra20-timer";
+ reg = <0x60005000 0x60>;
+ interrupts = <0 0 0x04
+ 0 1 0x04
+ 0 41 0x04
+ 0 42 0x04>;
+ clocks = <&tegra_car 132>;
+};
diff --git a/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
new file mode 100644
index 000000000..1761f53ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/nvidia,tegra30-timer.txt
@@ -0,0 +1,28 @@
+NVIDIA Tegra30 timer
+
+The Tegra30 timer provides ten 29-bit timer channels, a single 32-bit free
+running counter, and 5 watchdog modules. The first two channels may also
+trigger a legacy watchdog reset.
+
+Required properties:
+
+- compatible : For Tegra30, must contain "nvidia,tegra30-timer". Otherwise,
+ must contain '"nvidia,<chip>-timer", "nvidia,tegra30-timer"' where
+ <chip> is tegra124 or tegra132.
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A list of 6 interrupts; one per each of timer channels 1
+ through 5, and one for the shared interrupt for the remaining channels.
+- clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+
+timer {
+ compatible = "nvidia,tegra30-timer", "nvidia,tegra20-timer";
+ reg = <0x60005000 0x400>;
+ interrupts = <0 0 0x04
+ 0 1 0x04
+ 0 41 0x04
+ 0 42 0x04
+ 0 121 0x04
+ 0 122 0x04>;
+ clocks = <&tegra_car 214>;
+};
diff --git a/Documentation/devicetree/bindings/timer/renesas,cmt.txt b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
new file mode 100644
index 000000000..1a05c1b24
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/renesas,cmt.txt
@@ -0,0 +1,79 @@
+* Renesas R-Car Compare Match Timer (CMT)
+
+The CMT is a multi-channel 16/32/48-bit timer/counter with configurable clock
+inputs and programmable compare match.
+
+Channels share hardware resources but their counter and compare match value
+are independent. A particular CMT instance can implement only a subset of the
+channels supported by the CMT model. Channel indices represent the hardware
+position of the channel in the CMT and don't match the channel numbers in the
+datasheets.
+
+Required Properties:
+
+ - compatible: must contain one or more of the following:
+ - "renesas,cmt-32-r8a7740" for the r8a7740 32-bit CMT
+ (CMT0)
+ - "renesas,cmt-32-sh7372" for the sh7372 32-bit CMT
+ (CMT0)
+ - "renesas,cmt-32-sh73a0" for the sh73a0 32-bit CMT
+ (CMT0)
+ - "renesas,cmt-32" for all 32-bit CMT without fast clock support
+ (CMT0 on sh7372, sh73a0 and r8a7740)
+ This is a fallback for the above renesas,cmt-32-* entries.
+
+ - "renesas,cmt-32-fast-r8a7740" for the r8a7740 32-bit CMT with fast
+ clock support (CMT[234])
+ - "renesas,cmt-32-fast-sh7372" for the sh7372 32-bit CMT with fast
+ clock support (CMT[234])
+ - "renesas,cmt-32-fast-sh73a0" for the sh73A0 32-bit CMT with fast
+ clock support (CMT[234])
+ - "renesas,cmt-32-fast" for all 32-bit CMT with fast clock support
+ (CMT[234] on sh7372, sh73a0 and r8a7740)
+ This is a fallback for the above renesas,cmt-32-fast-* entries.
+
+ - "renesas,cmt-48-sh7372" for the sh7372 48-bit CMT
+ (CMT1)
+ - "renesas,cmt-48-sh73a0" for the sh73A0 48-bit CMT
+ (CMT1)
+ - "renesas,cmt-48-r8a7740" for the r8a7740 48-bit CMT
+ (CMT1)
+ - "renesas,cmt-48" for all non-second generation 48-bit CMT
+ (CMT1 on sh7372, sh73a0 and r8a7740)
+ This is a fallback for the above renesas,cmt-48-* entries.
+
+ - "renesas,cmt-48-r8a73a4" for the r8a73a4 48-bit CMT
+ (CMT[01])
+ - "renesas,cmt-48-r8a7790" for the r8a7790 48-bit CMT
+ (CMT[01])
+ - "renesas,cmt-48-r8a7791" for the r8a7791 48-bit CMT
+ (CMT[01])
+ - "renesas,cmt-48-gen2" for all second generation 48-bit CMT
+ (CMT[01] on r8a73a4, r8a7790 and r8a7791)
+ This is a fallback for the renesas,cmt-48-r8a73a4,
+ renesas,cmt-48-r8a7790 and renesas,cmt-48-r8a7791 entries.
+
+ - reg: base address and length of the registers block for the timer module.
+ - interrupts: interrupt-specifier for the timer, one per channel.
+ - clocks: a list of phandle + clock-specifier pairs, one for each entry
+ in clock-names.
+ - clock-names: must contain "fck" for the functional clock.
+
+ - renesas,channels-mask: bitmask of the available channels.
+
+
+Example: R8A7790 (R-Car H2) CMT0 node
+
+ CMT0 on R8A7790 implements hardware channels 5 and 6 only and names
+ them channels 0 and 1 in the documentation.
+
+ cmt0: timer@ffca0000 {
+ compatible = "renesas,cmt-48-r8a7790", "renesas,cmt-48-gen2";
+ reg = <0 0xffca0000 0 0x1004>;
+ interrupts = <0 142 IRQ_TYPE_LEVEL_HIGH>,
+ <0 142 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp1_clks R8A7790_CLK_CMT0>;
+ clock-names = "fck";
+
+ renesas,channels-mask = <0x60>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/renesas,mtu2.txt b/Documentation/devicetree/bindings/timer/renesas,mtu2.txt
new file mode 100644
index 000000000..ba0a34d97
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/renesas,mtu2.txt
@@ -0,0 +1,42 @@
+* Renesas Multi-Function Timer Pulse Unit 2 (MTU2)
+
+The MTU2 is a multi-purpose, multi-channel timer/counter with configurable
+clock inputs and programmable compare match.
+
+Channels share hardware resources but their counter and compare match value
+are independent. The MTU2 hardware supports five channels indexed from 0 to 4.
+
+Required Properties:
+
+ - compatible: must be one or more of the following:
+ - "renesas,mtu2-r7s72100" for the r7s72100 MTU2
+ - "renesas,mtu2" for any MTU2
+ This is a fallback for the above renesas,mtu2-* entries
+
+ - reg: base address and length of the registers block for the timer module.
+
+ - interrupts: interrupt specifiers for the timer, one for each entry in
+ interrupt-names.
+ - interrupt-names: must contain one entry named "tgi?a" for each enabled
+ channel, where "?" is the channel index expressed as one digit from "0" to
+ "4".
+
+ - clocks: a list of phandle + clock-specifier pairs, one for each entry
+ in clock-names.
+ - clock-names: must contain "fck" for the functional clock.
+
+
+Example: R7S72100 (RZ/A1H) MTU2 node
+
+ mtu2: timer@fcff0000 {
+ compatible = "renesas,mtu2-r7s72100", "renesas,mtu2";
+ reg = <0xfcff0000 0x400>;
+ interrupts = <0 139 IRQ_TYPE_LEVEL_HIGH>,
+ <0 146 IRQ_TYPE_LEVEL_HIGH>,
+ <0 150 IRQ_TYPE_LEVEL_HIGH>,
+ <0 154 IRQ_TYPE_LEVEL_HIGH>,
+ <0 159 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "tgi0a", "tgi1a", "tgi2a", "tgi3a", "tgi4a";
+ clocks = <&mstp3_clks R7S72100_CLK_MTU2>;
+ clock-names = "fck";
+ };
diff --git a/Documentation/devicetree/bindings/timer/renesas,tmu.txt b/Documentation/devicetree/bindings/timer/renesas,tmu.txt
new file mode 100644
index 000000000..cd5f20bf2
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/renesas,tmu.txt
@@ -0,0 +1,44 @@
+* Renesas R-Mobile/R-Car Timer Unit (TMU)
+
+The TMU is a 32-bit timer/counter with configurable clock inputs and
+programmable compare match.
+
+Channels share hardware resources but their counter and compare match value
+are independent. The TMU hardware supports up to three channels.
+
+Required Properties:
+
+ - compatible: must contain one or more of the following:
+ - "renesas,tmu-r8a7740" for the r8a7740 TMU
+ - "renesas,tmu-r8a7778" for the r8a7778 TMU
+ - "renesas,tmu-r8a7779" for the r8a7779 TMU
+ - "renesas,tmu" for any TMU.
+ This is a fallback for the above renesas,tmu-* entries
+
+ - reg: base address and length of the registers block for the timer module.
+
+ - interrupts: interrupt-specifier for the timer, one per channel.
+
+ - clocks: a list of phandle + clock-specifier pairs, one for each entry
+ in clock-names.
+ - clock-names: must contain "fck" for the functional clock.
+
+Optional Properties:
+
+ - #renesas,channels: number of channels implemented by the timer, must be 2
+ or 3 (if not specified the value defaults to 3).
+
+
+Example: R8A7779 (R-Car H1) TMU0 node
+
+ tmu0: timer@ffd80000 {
+ compatible = "renesas,tmu-r8a7779", "renesas,tmu";
+ reg = <0xffd80000 0x30>;
+ interrupts = <0 32 IRQ_TYPE_LEVEL_HIGH>,
+ <0 33 IRQ_TYPE_LEVEL_HIGH>,
+ <0 34 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp0_clks R8A7779_CLK_TMU0>;
+ clock-names = "fck";
+
+ #renesas,channels = <3>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt b/Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt
new file mode 100644
index 000000000..87f0b0042
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/rockchip,rk3288-timer.txt
@@ -0,0 +1,18 @@
+Rockchip rk3288 timer
+
+Required properties:
+- compatible: shall be "rockchip,rk3288-timer"
+- reg: base address of the timer register starting with TIMERS CONTROL register
+- interrupts: should contain the interrupts for Timer0
+- clocks : must contain an entry for each entry in clock-names
+- clock-names : must include the following entries:
+ "timer", "pclk"
+
+Example:
+ timer: timer@ff810000 {
+ compatible = "rockchip,rk3288-timer";
+ reg = <0xff810000 0x20>;
+ interrupts = <GIC_SPI 72 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&xin24m>, <&cru PCLK_TIMER>;
+ clock-names = "timer", "pclk";
+ };
diff --git a/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt
new file mode 100644
index 000000000..167d5dab9
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/samsung,exynos4210-mct.txt
@@ -0,0 +1,88 @@
+Samsung's Multi Core Timer (MCT)
+
+The Samsung's Multi Core Timer (MCT) module includes two main blocks, the
+global timer and CPU local timers. The global timer is a 64-bit free running
+up-counter and can generate 4 interrupts when the counter reaches one of the
+four preset counter values. The CPU local timers are 32-bit free running
+down-counters and generate an interrupt when the counter expires. There is
+one CPU local timer instantiated in MCT for every CPU in the system.
+
+Required properties:
+
+- compatible: should be "samsung,exynos4210-mct".
+ (a) "samsung,exynos4210-mct", for mct compatible with Exynos4210 mct.
+ (b) "samsung,exynos4412-mct", for mct compatible with Exynos4412 mct.
+
+- reg: base address of the mct controller and length of the address space
+ it occupies.
+
+- interrupts: the list of interrupts generated by the controller. The following
+ should be the order of the interrupts specified. The local timer interrupts
+ should be specified after the four global timer interrupts have been
+ specified.
+
+ 0: Global Timer Interrupt 0
+ 1: Global Timer Interrupt 1
+ 2: Global Timer Interrupt 2
+ 3: Global Timer Interrupt 3
+ 4: Local Timer Interrupt 0
+ 5: Local Timer Interrupt 1
+ 6: ..
+ 7: ..
+ i: Local Timer Interrupt n
+
+ For MCT block that uses a per-processor interrupt for local timers, such
+ as ones compatible with "samsung,exynos4412-mct", only one local timer
+ interrupt might be specified, meaning that all local timers use the same
+ per processor interrupt.
+
+Example 1: In this example, the IP contains two local timers, using separate
+ interrupts, so two local timer interrupts have been specified,
+ in addition to four global timer interrupts.
+
+ mct@10050000 {
+ compatible = "samsung,exynos4210-mct";
+ reg = <0x10050000 0x800>;
+ interrupts = <0 57 0>, <0 69 0>, <0 70 0>, <0 71 0>,
+ <0 42 0>, <0 48 0>;
+ };
+
+Example 2: In this example, the timer interrupts are connected to two separate
+ interrupt controllers. Hence, an interrupt-map is created to map
+ the interrupts to the respective interrupt controllers.
+
+ mct@101C0000 {
+ compatible = "samsung,exynos4210-mct";
+ reg = <0x101C0000 0x800>;
+ interrupt-parent = <&mct_map>;
+ interrupts = <0>, <1>, <2>, <3>, <4>, <5>;
+
+ mct_map: mct-map {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ interrupt-map = <0 &gic 0 57 0>,
+ <1 &gic 0 69 0>,
+ <2 &combiner 12 6>,
+ <3 &combiner 12 7>,
+ <4 &gic 0 42 0>,
+ <5 &gic 0 48 0>;
+ };
+ };
+
+Example 3: In this example, the IP contains four local timers, but using
+ a per-processor interrupt to handle them. Either all the local
+ timer interrupts can be specified, with the same interrupt specifier
+ value or just the first one.
+
+ mct@10050000 {
+ compatible = "samsung,exynos4412-mct";
+ reg = <0x10050000 0x800>;
+
+ /* Both ways are possible in this case. Either: */
+ interrupts = <0 57 0>, <0 69 0>, <0 70 0>, <0 71 0>,
+ <0 42 0>;
+ /* or: */
+ interrupts = <0 57 0>, <0 69 0>, <0 70 0>, <0 71 0>,
+ <0 42 0>, <0 42 0>, <0 42 0>, <0 42 0>;
+ };
diff --git a/Documentation/devicetree/bindings/timer/stericsson-u300-apptimer.txt b/Documentation/devicetree/bindings/timer/stericsson-u300-apptimer.txt
new file mode 100644
index 000000000..9499bc8ee
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/stericsson-u300-apptimer.txt
@@ -0,0 +1,18 @@
+ST-Ericsson U300 apptimer
+
+Required properties:
+
+- compatible : should be "stericsson,u300-apptimer"
+- reg : Specifies base physical address and size of the registers.
+- interrupts : A list of 4 interrupts; one for each subtimer. These
+ are, in order: OS (operating system), DD (device driver) both
+ adopted for EPOC/Symbian with two specific IRQs for these tasks,
+ then GP1 and GP2, which are general-purpose timers.
+
+Example:
+
+timer {
+ compatible = "stericsson,u300-apptimer";
+ reg = <0xc0014000 0x1000>;
+ interrupts = <24 25 26 27>;
+};
diff --git a/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
new file mode 100644
index 000000000..5fbe36125
--- /dev/null
+++ b/Documentation/devicetree/bindings/timer/ti,keystone-timer.txt
@@ -0,0 +1,29 @@
+* Device tree bindings for Texas instruments Keystone timer
+
+This document provides bindings for the 64-bit timer in the KeyStone
+architecture devices. The timer can be configured as a general-purpose 64-bit
+timer, dual general-purpose 32-bit timers. When configured as dual 32-bit
+timers, each half can operate in conjunction (chain mode) or independently
+(unchained mode) of each other.
+
+It is global timer is a free running up-counter and can generate interrupt
+when the counter reaches preset counter values.
+
+Documentation:
+http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
+
+Required properties:
+
+- compatible : should be "ti,keystone-timer".
+- reg : specifies base physical address and count of the registers.
+- interrupts : interrupt generated by the timer.
+- clocks : the clock feeding the timer clock.
+
+Example:
+
+timer@22f0000 {
+ compatible = "ti,keystone-timer";
+ reg = <0x022f0000 0x80>;
+ interrupts = <GIC_SPI 110 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&clktimer15>;
+};
diff --git a/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
new file mode 100644
index 000000000..53579197e
--- /dev/null
+++ b/Documentation/devicetree/bindings/ufs/ufshcd-pltfrm.txt
@@ -0,0 +1,57 @@
+* Universal Flash Storage (UFS) Host Controller
+
+UFSHC nodes are defined to describe on-chip UFS host controllers.
+Each UFS controller instance should have its own node.
+
+Required properties:
+- compatible : compatible list, contains "jedec,ufs-1.1"
+- interrupts : <interrupt mapping for UFS host controller IRQ>
+- reg : <registers mapping>
+
+Optional properties:
+- vdd-hba-supply : phandle to UFS host controller supply regulator node
+- vcc-supply : phandle to VCC supply regulator node
+- vccq-supply : phandle to VCCQ supply regulator node
+- vccq2-supply : phandle to VCCQ2 supply regulator node
+- vcc-supply-1p8 : For embedded UFS devices, valid VCC range is 1.7-1.95V
+ or 2.7-3.6V. This boolean property when set, specifies
+ to use low voltage range of 1.7-1.95V. Note for external
+ UFS cards this property is invalid and valid VCC range is
+ always 2.7-3.6V.
+- vcc-max-microamp : specifies max. load that can be drawn from vcc supply
+- vccq-max-microamp : specifies max. load that can be drawn from vccq supply
+- vccq2-max-microamp : specifies max. load that can be drawn from vccq2 supply
+- <name>-fixed-regulator : boolean property specifying that <name>-supply is a fixed regulator
+
+- clocks : List of phandle and clock specifier pairs
+- clock-names : List of clock input name strings sorted in the same
+ order as the clocks property.
+- freq-table-hz : Array of <min max> operating frequencies stored in the same
+ order as the clocks property. If this property is not
+ defined or a value in the array is "0" then it is assumed
+ that the frequency is set by the parent clock or a
+ fixed rate clock source.
+
+Note: If above properties are not defined it can be assumed that the supply
+regulators or clocks are always on.
+
+Example:
+ ufshc@0xfc598000 {
+ compatible = "jedec,ufs-1.1";
+ reg = <0xfc598000 0x800>;
+ interrupts = <0 28 0>;
+
+ vdd-hba-supply = <&xxx_reg0>;
+ vdd-hba-fixed-regulator;
+ vcc-supply = <&xxx_reg1>;
+ vcc-supply-1p8;
+ vccq-supply = <&xxx_reg2>;
+ vccq2-supply = <&xxx_reg3>;
+ vcc-max-microamp = 500000;
+ vccq-max-microamp = 200000;
+ vccq2-max-microamp = 200000;
+
+ clocks = <&core 0>, <&ref 0>, <&iface 0>;
+ clock-names = "core_clk", "ref_clk", "iface_clk";
+ freq-table-hz = <100000000 200000000>, <0 0>, <0 0>;
+ };
diff --git a/Documentation/devicetree/bindings/unittest.txt b/Documentation/devicetree/bindings/unittest.txt
new file mode 100644
index 000000000..3bf58c20f
--- /dev/null
+++ b/Documentation/devicetree/bindings/unittest.txt
@@ -0,0 +1,71 @@
+1) OF unittest platform device
+
+** unittest
+
+Required properties:
+- compatible: must be "unittest"
+
+All other properties are optional.
+
+Example:
+ unittest {
+ compatible = "unittest";
+ status = "okay";
+ };
+
+2) OF unittest i2c adapter platform device
+
+** platform device unittest adapter
+
+Required properties:
+- compatible: must be unittest-i2c-bus
+
+Children nodes contain unittest i2c devices.
+
+Example:
+ unittest-i2c-bus {
+ compatible = "unittest-i2c-bus";
+ status = "okay";
+ };
+
+3) OF unittest i2c device
+
+** I2C unittest device
+
+Required properties:
+- compatible: must be unittest-i2c-dev
+
+All other properties are optional
+
+Example:
+ unittest-i2c-dev {
+ compatible = "unittest-i2c-dev";
+ status = "okay";
+ };
+
+4) OF unittest i2c mux device
+
+** I2C unittest mux
+
+Required properties:
+- compatible: must be unittest-i2c-mux
+
+Children nodes contain unittest i2c bus nodes per channel.
+
+Example:
+ unittest-i2c-mux {
+ compatible = "unittest-i2c-mux";
+ status = "okay";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ channel-0 {
+ reg = <0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ i2c-dev {
+ reg = <8>;
+ compatible = "unittest-i2c-dev";
+ status = "okay";
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/usb/am33xx-usb.txt b/Documentation/devicetree/bindings/usb/am33xx-usb.txt
new file mode 100644
index 000000000..20c2ff2ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/am33xx-usb.txt
@@ -0,0 +1,197 @@
+ AM33xx MUSB
+~~~~~~~~~~~~~~~
+- compatible: ti,am33xx-usb
+- reg: offset and length of the usbss register sets
+- ti,hwmods : must be "usb_otg_hs"
+
+The glue layer contains multiple child nodes. It is required the have
+at least a control module node, USB node and a PHY node. The second USB
+node and its PHY node is optional. The DMA node is also optional.
+
+Reset module
+~~~~~~~~~~~~
+- compatible: ti,am335x-usb-ctrl-module
+- reg: offset and length of the "USB control registers" in the "Control
+ Module" block. A second offset and length for the USB wake up control
+ in the same memory block.
+- reg-names: "phy_ctrl" for the "USB control registers" and "wakeup" for
+ the USB wake up control register.
+
+USB PHY
+~~~~~~~
+compatible: ti,am335x-usb-phy
+reg: offset and length of the "USB PHY" register space
+ti,ctrl_mod: reference to the "reset module" node
+reg-names: phy
+The PHY should have a "phy" alias numbered properly in the alias
+node.
+
+USB
+~~~
+- compatible: ti,musb-am33xx
+- reg: offset and length of "USB Controller Registers", and offset and
+ length of "USB Core" register space.
+- reg-names: control for the ""USB Controller Registers" and "mc" for
+ "USB Core" register space
+- interrupts: USB interrupt number
+- interrupt-names: mc
+- dr_mode: Should be one of "host", "peripheral" or "otg".
+- mentor,multipoint: Should be "1" indicating the musb controller supports
+ multipoint. This is a MUSB configuration-specific setting.
+- mentor,num-eps: Specifies the number of endpoints. This is also a
+ MUSB configuration-specific setting. Should be set to "16"
+- mentor,ram-bits: Specifies the ram address size. Should be set to "12"
+- mentor,power: Should be "500". This signifies the controller can supply up to
+ 500mA when operating in host mode.
+- phys: reference to the USB phy
+- dmas: specifies the dma channels
+- dma-names: specifies the names of the channels. Use "rxN" for receive
+ and "txN" for transmit endpoints. N specifies the endpoint number.
+
+The controller should have an "usb" alias numbered properly in the alias
+node.
+
+DMA
+~~~
+- compatible: ti,am3359-cppi41
+- reg: offset and length of the following register spaces: USBSS, USB
+ CPPI DMA Controller, USB CPPI DMA Scheduler, USB Queue Manager
+- reg-names: glue, controller, scheduler, queuemgr
+- #dma-cells: should be set to 2. The first number represents the
+ endpoint number (0 … 14 for endpoints 1 … 15 on instance 0 and 15 … 29
+ for endpoints 1 … 15 on instance 1). The second number is 0 for RX and
+ 1 for TX transfers.
+- #dma-channels: should be set to 30 representing the 15 endpoints for
+ each USB instance.
+
+Example:
+~~~~~~~~
+The following example contains all the nodes as used on am335x-evm:
+
+aliases {
+ usb0 = &usb0;
+ usb1 = &usb1;
+ phy0 = &usb0_phy;
+ phy1 = &usb1_phy;
+};
+
+usb: usb@47400000 {
+ compatible = "ti,am33xx-usb";
+ reg = <0x47400000 0x1000>;
+ ranges;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ti,hwmods = "usb_otg_hs";
+
+ ctrl_mod: control@44e10000 {
+ compatible = "ti,am335x-usb-ctrl-module";
+ reg = <0x44e10620 0x10
+ 0x44e10648 0x4>;
+ reg-names = "phy_ctrl", "wakeup";
+ };
+
+ usb0_phy: usb-phy@47401300 {
+ compatible = "ti,am335x-usb-phy";
+ reg = <0x47401300 0x100>;
+ reg-names = "phy";
+ ti,ctrl_mod = <&ctrl_mod>;
+ };
+
+ usb0: usb@47401000 {
+ compatible = "ti,musb-am33xx";
+ reg = <0x47401400 0x400
+ 0x47401000 0x200>;
+ reg-names = "mc", "control";
+
+ interrupts = <18>;
+ interrupt-names = "mc";
+ dr_mode = "otg"
+ mentor,multipoint = <1>;
+ mentor,num-eps = <16>;
+ mentor,ram-bits = <12>;
+ mentor,power = <500>;
+ phys = <&usb0_phy>;
+
+ dmas = <&cppi41dma 0 0 &cppi41dma 1 0
+ &cppi41dma 2 0 &cppi41dma 3 0
+ &cppi41dma 4 0 &cppi41dma 5 0
+ &cppi41dma 6 0 &cppi41dma 7 0
+ &cppi41dma 8 0 &cppi41dma 9 0
+ &cppi41dma 10 0 &cppi41dma 11 0
+ &cppi41dma 12 0 &cppi41dma 13 0
+ &cppi41dma 14 0 &cppi41dma 0 1
+ &cppi41dma 1 1 &cppi41dma 2 1
+ &cppi41dma 3 1 &cppi41dma 4 1
+ &cppi41dma 5 1 &cppi41dma 6 1
+ &cppi41dma 7 1 &cppi41dma 8 1
+ &cppi41dma 9 1 &cppi41dma 10 1
+ &cppi41dma 11 1 &cppi41dma 12 1
+ &cppi41dma 13 1 &cppi41dma 14 1>;
+ dma-names =
+ "rx1", "rx2", "rx3", "rx4", "rx5", "rx6", "rx7",
+ "rx8", "rx9", "rx10", "rx11", "rx12", "rx13",
+ "rx14", "rx15",
+ "tx1", "tx2", "tx3", "tx4", "tx5", "tx6", "tx7",
+ "tx8", "tx9", "tx10", "tx11", "tx12", "tx13",
+ "tx14", "tx15";
+ };
+
+ usb1_phy: usb-phy@47401b00 {
+ compatible = "ti,am335x-usb-phy";
+ reg = <0x47401b00 0x100>;
+ reg-names = "phy";
+ ti,ctrl_mod = <&ctrl_mod>;
+ };
+
+ usb1: usb@47401800 {
+ compatible = "ti,musb-am33xx";
+ reg = <0x47401c00 0x400
+ 0x47401800 0x200>;
+ reg-names = "mc", "control";
+ interrupts = <19>;
+ interrupt-names = "mc";
+ dr_mode = "host"
+ mentor,multipoint = <1>;
+ mentor,num-eps = <16>;
+ mentor,ram-bits = <12>;
+ mentor,power = <500>;
+ phys = <&usb1_phy>;
+
+ dmas = <&cppi41dma 15 0 &cppi41dma 16 0
+ &cppi41dma 17 0 &cppi41dma 18 0
+ &cppi41dma 19 0 &cppi41dma 20 0
+ &cppi41dma 21 0 &cppi41dma 22 0
+ &cppi41dma 23 0 &cppi41dma 24 0
+ &cppi41dma 25 0 &cppi41dma 26 0
+ &cppi41dma 27 0 &cppi41dma 28 0
+ &cppi41dma 29 0 &cppi41dma 15 1
+ &cppi41dma 16 1 &cppi41dma 17 1
+ &cppi41dma 18 1 &cppi41dma 19 1
+ &cppi41dma 20 1 &cppi41dma 21 1
+ &cppi41dma 22 1 &cppi41dma 23 1
+ &cppi41dma 24 1 &cppi41dma 25 1
+ &cppi41dma 26 1 &cppi41dma 27 1
+ &cppi41dma 28 1 &cppi41dma 29 1>;
+ dma-names =
+ "rx1", "rx2", "rx3", "rx4", "rx5", "rx6", "rx7",
+ "rx8", "rx9", "rx10", "rx11", "rx12", "rx13",
+ "rx14", "rx15",
+ "tx1", "tx2", "tx3", "tx4", "tx5", "tx6", "tx7",
+ "tx8", "tx9", "tx10", "tx11", "tx12", "tx13",
+ "tx14", "tx15";
+ };
+
+ cppi41dma: dma-controller@07402000 {
+ compatible = "ti,am3359-cppi41";
+ reg = <0x47400000 0x1000
+ 0x47402000 0x1000
+ 0x47403000 0x1000
+ 0x47404000 0x4000>;
+ reg-names = "glue", "controller", "scheduler", "queuemgr";
+ interrupts = <17>;
+ interrupt-names = "glue";
+ #dma-cells = <2>;
+ #dma-channels = <30>;
+ #dma-requests = <256>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/usb/atmel-usb.txt b/Documentation/devicetree/bindings/usb/atmel-usb.txt
new file mode 100644
index 000000000..de773a00e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/atmel-usb.txt
@@ -0,0 +1,142 @@
+Atmel SOC USB controllers
+
+OHCI
+
+Required properties:
+ - compatible: Should be "atmel,at91rm9200-ohci" for USB controllers
+ used in host mode.
+ - num-ports: Number of ports.
+ - atmel,vbus-gpio: If present, specifies a gpio that needs to be
+ activated for the bus to be powered.
+ - atmel,oc-gpio: If present, specifies a gpio that needs to be
+ activated for the overcurrent detection.
+
+usb0: ohci@00500000 {
+ compatible = "atmel,at91rm9200-ohci", "usb-ohci";
+ reg = <0x00500000 0x100000>;
+ interrupts = <20 4>;
+ num-ports = <2>;
+};
+
+EHCI
+
+Required properties:
+ - compatible: Should be "atmel,at91sam9g45-ehci" for USB controllers
+ used in host mode.
+
+usb1: ehci@00800000 {
+ compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
+ reg = <0x00800000 0x100000>;
+ interrupts = <22 4>;
+};
+
+AT91 USB device controller
+
+Required properties:
+ - compatible: Should be one of the following
+ "atmel,at91rm9200-udc"
+ "atmel,at91sam9260-udc"
+ "atmel,at91sam9261-udc"
+ "atmel,at91sam9263-udc"
+ - reg: Address and length of the register set for the device
+ - interrupts: Should contain macb interrupt
+ - clocks: Should reference the peripheral and the AHB clocks
+ - clock-names: Should contains two strings
+ "pclk" for the peripheral clock
+ "hclk" for the AHB clock
+
+Optional properties:
+ - atmel,vbus-gpio: If present, specifies a gpio that needs to be
+ activated for the bus to be powered.
+
+usb1: gadget@fffa4000 {
+ compatible = "atmel,at91rm9200-udc";
+ reg = <0xfffa4000 0x4000>;
+ interrupts = <10 4>;
+ atmel,vbus-gpio = <&pioC 5 0>;
+};
+
+Atmel High-Speed USB device controller
+
+Required properties:
+ - compatible: Should be one of the following
+ "atmel,at91sam9rl-udc"
+ "atmel,at91sam9g45-udc"
+ "atmel,sama5d3-udc"
+ - reg: Address and length of the register set for the device
+ - interrupts: Should contain usba interrupt
+ - ep childnode: To specify the number of endpoints and their properties.
+
+Optional properties:
+ - atmel,vbus-gpio: If present, specifies a gpio that allows to detect whether
+ vbus is present (USB is connected).
+
+Required child node properties:
+ - name: Name of the endpoint.
+ - reg: Num of the endpoint.
+ - atmel,fifo-size: Size of the fifo.
+ - atmel,nb-banks: Number of banks.
+ - atmel,can-dma: Boolean to specify if the endpoint support DMA.
+ - atmel,can-isoc: Boolean to specify if the endpoint support ISOC.
+
+usb2: gadget@fff78000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "atmel,at91sam9rl-udc";
+ reg = <0x00600000 0x80000
+ 0xfff78000 0x400>;
+ interrupts = <27 4 0>;
+ atmel,vbus-gpio = <&pioB 19 0>;
+
+ ep0 {
+ reg = <0>;
+ atmel,fifo-size = <64>;
+ atmel,nb-banks = <1>;
+ };
+
+ ep1 {
+ reg = <1>;
+ atmel,fifo-size = <1024>;
+ atmel,nb-banks = <2>;
+ atmel,can-dma;
+ atmel,can-isoc;
+ };
+
+ ep2 {
+ reg = <2>;
+ atmel,fifo-size = <1024>;
+ atmel,nb-banks = <2>;
+ atmel,can-dma;
+ atmel,can-isoc;
+ };
+
+ ep3 {
+ reg = <3>;
+ atmel,fifo-size = <1024>;
+ atmel,nb-banks = <3>;
+ atmel,can-dma;
+ };
+
+ ep4 {
+ reg = <4>;
+ atmel,fifo-size = <1024>;
+ atmel,nb-banks = <3>;
+ atmel,can-dma;
+ };
+
+ ep5 {
+ reg = <5>;
+ atmel,fifo-size = <1024>;
+ atmel,nb-banks = <3>;
+ atmel,can-dma;
+ atmel,can-isoc;
+ };
+
+ ep6 {
+ reg = <6>;
+ atmel,fifo-size = <1024>;
+ atmel,nb-banks = <3>;
+ atmel,can-dma;
+ atmel,can-isoc;
+ };
+};
diff --git a/Documentation/devicetree/bindings/usb/brcm,bcm3384-usb.txt b/Documentation/devicetree/bindings/usb/brcm,bcm3384-usb.txt
new file mode 100644
index 000000000..452c45c7b
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/brcm,bcm3384-usb.txt
@@ -0,0 +1,11 @@
+* Broadcom USB controllers
+
+Required properties:
+- compatible: "brcm,bcm3384-ohci", "brcm,bcm3384-ehci"
+
+ These currently use the generic-ohci and generic-ehci drivers. On some
+ systems, special handling may be needed in the following cases:
+
+ - Restoring state after systemwide power save modes
+ - Sharing PHYs with the USBD (UDC) hardware
+ - Figuring out which controllers are disabled on ASIC bondout variants
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-imx.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-imx.txt
new file mode 100644
index 000000000..38a548001
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-imx.txt
@@ -0,0 +1,35 @@
+* Freescale i.MX ci13xxx usb controllers
+
+Required properties:
+- compatible: Should be "fsl,imx27-usb"
+- reg: Should contain registers location and length
+- interrupts: Should contain controller interrupt
+- fsl,usbphy: phandle of usb phy that connects to the port
+
+Recommended properies:
+- phy_type: the type of the phy connected to the core. Should be one
+ of "utmi", "utmi_wide", "ulpi", "serial" or "hsic". Without this
+ property the PORTSC register won't be touched
+- dr_mode: One of "host", "peripheral" or "otg". Defaults to "otg"
+
+Optional properties:
+- fsl,usbmisc: phandler of non-core register device, with one argument
+ that indicate usb controller index
+- vbus-supply: regulator for vbus
+- disable-over-current: disable over current detect
+- external-vbus-divider: enables off-chip resistor divider for Vbus
+- maximum-speed: limit the maximum connection speed to "full-speed".
+- tpl-support: TPL (Targeted Peripheral List) feature for targeted hosts
+
+Examples:
+usb@02184000 { /* USB OTG */
+ compatible = "fsl,imx6q-usb", "fsl,imx27-usb";
+ reg = <0x02184000 0x200>;
+ interrupts = <0 43 0x04>;
+ fsl,usbphy = <&usbphy1>;
+ fsl,usbmisc = <&usbmisc 0>;
+ disable-over-current;
+ external-vbus-divider;
+ maximum-speed = "full-speed";
+ tpl-support;
+};
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-qcom.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-qcom.txt
new file mode 100644
index 000000000..f2899b550
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-qcom.txt
@@ -0,0 +1,17 @@
+Qualcomm CI13xxx (Chipidea) USB controllers
+
+Required properties:
+- compatible: should contain "qcom,ci-hdrc"
+- reg: offset and length of the register set in the memory map
+- interrupts: interrupt-specifier for the controller interrupt.
+- usb-phy: phandle for the PHY device
+- dr_mode: Should be "peripheral"
+
+Examples:
+ gadget@f9a55000 {
+ compatible = "qcom,ci-hdrc";
+ reg = <0xf9a55000 0x400>;
+ dr_mode = "peripheral";
+ interrupts = <0 134 0>;
+ usb-phy = <&usbphy0>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
new file mode 100644
index 000000000..27f8b1e5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-usb2.txt
@@ -0,0 +1,24 @@
+* USB2 ChipIdea USB controller for ci13xxx
+
+Required properties:
+- compatible: should be "chipidea,usb2"
+- reg: base address and length of the registers
+- interrupts: interrupt for the USB controller
+
+Optional properties:
+- clocks: reference to the USB clock
+- phys: reference to the USB PHY
+- phy-names: should be "usb-phy"
+- vbus-supply: reference to the VBUS regulator
+
+Example:
+
+ usb@f7ed0000 {
+ compatible = "chipidea,usb2";
+ reg = <0xf7ed0000 0x10000>;
+ interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&chip CLKID_USB0>;
+ phys = <&usb_phy0>;
+ phy-names = "usb-phy";
+ vbus-supply = <&reg_usb0_vbus>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/ci-hdrc-zevio.txt b/Documentation/devicetree/bindings/usb/ci-hdrc-zevio.txt
new file mode 100644
index 000000000..abbcb2aea
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ci-hdrc-zevio.txt
@@ -0,0 +1,17 @@
+* LSI Zevio USB OTG Controller
+
+Required properties:
+- compatible: Should be "lsi,zevio-usb"
+- reg: Should contain registers location and length
+- interrupts: Should contain controller interrupt
+
+Optional properties:
+- vbus-supply: regulator for vbus
+
+Examples:
+ usb0: usb@b0000000 {
+ reg = <0xb0000000 0x1000>;
+ compatible = "lsi,zevio-usb";
+ interrupts = <8>;
+ vbus-supply = <&vbus_reg>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/dwc2.txt b/Documentation/devicetree/bindings/usb/dwc2.txt
new file mode 100644
index 000000000..fd132cbee
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/dwc2.txt
@@ -0,0 +1,38 @@
+Platform DesignWare HS OTG USB 2.0 controller
+-----------------------------------------------------
+
+Required properties:
+- compatible : One of:
+ - brcm,bcm2835-usb: The DWC2 USB controller instance in the BCM2835 SoC.
+ - rockchip,rk3066-usb: The DWC2 USB controller instance in the rk3066 Soc;
+ - "rockchip,rk3188-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3188 Soc;
+ - "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
+ - snps,dwc2: A generic DWC2 USB controller with default parameters.
+- reg : Should contain 1 register range (address and length)
+- interrupts : Should contain 1 interrupt
+- clocks: clock provider specifier
+- clock-names: shall be "otg"
+Refer to clk/clock-bindings.txt for generic clock consumer properties
+
+Optional properties:
+- phys: phy provider specifier
+- phy-names: shall be "usb2-phy"
+Refer to phy/phy-bindings.txt for generic phy consumer properties
+- dr_mode: shall be one of "host", "peripheral" and "otg"
+ Refer to usb/generic.txt
+- g-use-dma: enable dma usage in gadget driver.
+- g-rx-fifo-size: size of rx fifo size in gadget mode.
+- g-np-tx-fifo-size: size of non-periodic tx fifo size in gadget mode.
+- g-tx-fifo-size: size of periodic tx fifo per endpoint (except ep0) in gadget mode.
+
+Example:
+
+ usb@101c0000 {
+ compatible = "ralink,rt3050-usb, snps,dwc2";
+ reg = <0x101c0000 40000>;
+ interrupts = <18>;
+ clocks = <&usb_otg_ahb_clk>;
+ clock-names = "otg";
+ phys = <&usbphy>;
+ phy-names = "usb2-phy";
+ };
diff --git a/Documentation/devicetree/bindings/usb/dwc3-st.txt b/Documentation/devicetree/bindings/usb/dwc3-st.txt
new file mode 100644
index 000000000..f9d70252b
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/dwc3-st.txt
@@ -0,0 +1,68 @@
+ST DWC3 glue logic
+
+This file documents the parameters for the dwc3-st driver.
+This driver controls the glue logic used to configure the dwc3 core on
+STiH407 based platforms.
+
+Required properties:
+ - compatible : must be "st,stih407-dwc3"
+ - reg : glue logic base address and USB syscfg ctrl register offset
+ - reg-names : should be "reg-glue" and "syscfg-reg"
+ - st,syscon : should be phandle to system configuration node which
+ encompasses the glue registers
+ - resets : list of phandle and reset specifier pairs. There should be two entries, one
+ for the powerdown and softreset lines of the usb3 IP
+ - reset-names : list of reset signal names. Names should be "powerdown" and "softreset"
+See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/reset.txt
+
+ - #address-cells, #size-cells : should be '1' if the device has sub-nodes
+ with 'reg' property
+
+ - pinctl-names : A pinctrl state named "default" must be defined
+See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+ - pinctrl-0 : Pin control group
+See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+
+ - ranges : allows valid 1:1 translation between child's address space and
+ parent's address space
+
+Sub-nodes:
+The dwc3 core should be added as subnode to ST DWC3 glue as shown in the
+example below. The DT binding details of dwc3 can be found in:
+Documentation/devicetree/bindings/usb/dwc3.txt
+
+NB: The dr_mode property described in [1] is NOT optional for this driver, as the default value
+is "otg", which isn't supported by this SoC. Valid dr_mode values for dwc3-st are either "host"
+or "device".
+
+[1] Documentation/devicetree/bindings/usb/generic.txt
+
+Example:
+
+st_dwc3: dwc3@8f94000 {
+ status = "disabled";
+ compatible = "st,stih407-dwc3";
+ reg = <0x08f94000 0x1000>, <0x110 0x4>;
+ reg-names = "reg-glue", "syscfg-reg";
+ st,syscfg = <&syscfg_core>;
+ resets = <&powerdown STIH407_USB3_POWERDOWN>,
+ <&softreset STIH407_MIPHY2_SOFTRESET>;
+ reset-names = "powerdown",
+ "softreset";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb3>;
+ ranges;
+
+ dwc3: dwc3@9900000 {
+ compatible = "snps,dwc3";
+ reg = <0x09900000 0x100000>;
+ interrupts = <GIC_SPI 155 IRQ_TYPE_NONE>;
+ dr_mode = "host";
+ phys-names = "usb2-phy", "usb3-phy";
+ phys = <&usb2_picophy2>, <&phy_port2 MIPHY_TYPE_USB>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
new file mode 100644
index 000000000..5cc364309
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -0,0 +1,50 @@
+synopsys DWC3 CORE
+
+DWC3- USB3 CONTROLLER
+
+Required properties:
+ - compatible: must be "snps,dwc3"
+ - reg : Address and length of the register set for the device
+ - interrupts: Interrupts used by the dwc3 controller.
+
+Optional properties:
+ - usb-phy : array of phandle for the PHY device. The first element
+ in the array is expected to be a handle to the USB2/HS PHY and
+ the second element is expected to be a handle to the USB3/SS PHY
+ - phys: from the *Generic PHY* bindings
+ - phy-names: from the *Generic PHY* bindings
+ - tx-fifo-resize: determines if the FIFO *has* to be reallocated.
+ - snps,usb3_lpm_capable: determines if platform is USB3 LPM capable
+ - snps,disable_scramble_quirk: true when SW should disable data scrambling.
+ Only really useful for FPGA builds.
+ - snps,has-lpm-erratum: true when DWC3 was configured with LPM Erratum enabled
+ - snps,lpm-nyet-threshold: LPM NYET threshold
+ - snps,u2exit_lfps_quirk: set if we want to enable u2exit lfps quirk
+ - snps,u2ss_inp3_quirk: set if we enable P3 OK for U2/SS Inactive quirk
+ - snps,req_p1p2p3_quirk: when set, the core will always request for
+ P1/P2/P3 transition sequence.
+ - snps,del_p1p2p3_quirk: when set core will delay P1/P2/P3 until a certain
+ amount of 8B10B errors occur.
+ - snps,del_phy_power_chg_quirk: when set core will delay PHY power change
+ from P0 to P1/P2/P3.
+ - snps,lfps_filter_quirk: when set core will filter LFPS reception.
+ - snps,rx_detect_poll_quirk: when set core will disable a 400us delay to start
+ Polling LFPS after RX.Detect.
+ - snps,tx_de_emphasis_quirk: when set core will set Tx de-emphasis value.
+ - snps,tx_de_emphasis: the value driven to the PHY is controlled by the
+ LTSSM during USB3 Compliance mode.
+ - snps,dis_u3_susphy_quirk: when set core will disable USB3 suspend phy.
+ - snps,dis_u2_susphy_quirk: when set core will disable USB2 suspend phy.
+ - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal
+ utmi_l1_suspend_n, false when asserts utmi_sleep_n
+ - snps,hird-threshold: HIRD threshold
+
+This is usually a subnode to DWC3 glue to which it is connected.
+
+dwc3@4a030000 {
+ compatible = "snps,dwc3";
+ reg = <0x4a030000 0xcfff>;
+ interrupts = <0 92 4>
+ usb-phy = <&usb2_phy>, <&usb3,phy>;
+ tx-fifo-resize;
+};
diff --git a/Documentation/devicetree/bindings/usb/ehci-omap.txt b/Documentation/devicetree/bindings/usb/ehci-omap.txt
new file mode 100644
index 000000000..3dc231c83
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ehci-omap.txt
@@ -0,0 +1,32 @@
+OMAP HS USB EHCI controller
+
+This device is usually the child of the omap-usb-host
+Documentation/devicetree/bindings/mfd/omap-usb-host.txt
+
+Required properties:
+
+- compatible: should be "ti,ehci-omap"
+- reg: should contain one register range i.e. start and length
+- interrupts: description of the interrupt line
+
+Optional properties:
+
+- phys: list of phandles to PHY nodes.
+ This property is required if at least one of the ports are in
+ PHY mode i.e. OMAP_EHCI_PORT_MODE_PHY
+
+To specify the port mode, see
+Documentation/devicetree/bindings/mfd/omap-usb-host.txt
+
+Example for OMAP4:
+
+usbhsehci: ehci@4a064c00 {
+ compatible = "ti,ehci-omap";
+ reg = <0x4a064c00 0x400>;
+ interrupts = <0 77 0x4>;
+};
+
+&usbhsehci {
+ phys = <&hsusb1_phy 0 &hsusb3_phy>;
+};
+
diff --git a/Documentation/devicetree/bindings/usb/ehci-orion.txt b/Documentation/devicetree/bindings/usb/ehci-orion.txt
new file mode 100644
index 000000000..17c3bc858
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ehci-orion.txt
@@ -0,0 +1,20 @@
+* EHCI controller, Orion Marvell variants
+
+Required properties:
+- compatible: must be "marvell,orion-ehci"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The EHCI interrupt
+
+Optional properties:
+- clocks: reference to the clock
+- phys: reference to the USB PHY
+- phy-names: name of the USB PHY, should be "usb"
+
+Example:
+
+ ehci@50000 {
+ compatible = "marvell,orion-ehci";
+ reg = <0x50000 0x1000>;
+ interrupts = <19>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/ehci-st.txt b/Documentation/devicetree/bindings/usb/ehci-st.txt
new file mode 100644
index 000000000..fb45fa577
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ehci-st.txt
@@ -0,0 +1,39 @@
+ST USB EHCI controller
+
+Required properties:
+ - compatible : must be "st,st-ehci-300x"
+ - reg : physical base addresses of the controller and length of memory mapped
+ region
+ - interrupts : one EHCI interrupt should be described here
+ - pinctrl-names : a pinctrl state named "default" must be defined
+ - pinctrl-0 : phandle referencing pin configuration of the USB controller
+See: Documentation/devicetree/bindings/pinctrl/pinctrl-binding.txt
+ - clocks : phandle list of usb clocks
+ - clock-names : should be "ic" for interconnect clock and "clk48"
+See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+ - phys : phandle for the PHY device
+ - phy-names : should be "usb"
+ - resets : phandle + reset specifier pairs to the powerdown and softreset lines
+ of the USB IP
+ - reset-names : should be "power" and "softreset"
+See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/reset.txt
+
+Example:
+
+ ehci1: usb@0xfe203e00 {
+ compatible = "st,st-ehci-300x";
+ reg = <0xfe203e00 0x100>;
+ interrupts = <GIC_SPI 148 IRQ_TYPE_NONE>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_usb1>;
+ clocks = <&clk_s_a1_ls 0>;
+ phys = <&usb2_phy>;
+ phy-names = "usb";
+ status = "okay";
+
+ resets = <&powerdown STIH416_USB1_POWERDOWN>,
+ <&softreset STIH416_USB1_SOFTRESET>;
+ reset-names = "power", "softreset";
+ };
diff --git a/Documentation/devicetree/bindings/usb/exynos-usb.txt b/Documentation/devicetree/bindings/usb/exynos-usb.txt
new file mode 100644
index 000000000..9b4dbe3b2
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/exynos-usb.txt
@@ -0,0 +1,117 @@
+Samsung Exynos SoC USB controller
+
+The USB devices interface with USB controllers on Exynos SOCs.
+The device node has following properties.
+
+EHCI
+Required properties:
+ - compatible: should be "samsung,exynos4210-ehci" for USB 2.0
+ EHCI controller in host mode.
+ - reg: physical base address of the controller and length of memory mapped
+ region.
+ - interrupts: interrupt number to the cpu.
+ - clocks: from common clock binding: handle to usb clock.
+ - clock-names: from common clock binding: Shall be "usbhost".
+ - port: if in the SoC there are EHCI phys, they should be listed here.
+ One phy per port. Each port should have following entries:
+ - reg: port number on EHCI controller, e.g
+ On Exynos5250, port 0 is USB2.0 otg phy
+ port 1 is HSIC phy0
+ port 2 is HSIC phy1
+ - phys: from the *Generic PHY* bindings; specifying phy used by port.
+
+Optional properties:
+ - samsung,vbus-gpio: if present, specifies the GPIO that
+ needs to be pulled up for the bus to be powered.
+
+Example:
+
+ usb@12110000 {
+ compatible = "samsung,exynos4210-ehci";
+ reg = <0x12110000 0x100>;
+ interrupts = <0 71 0>;
+ samsung,vbus-gpio = <&gpx2 6 1 3 3>;
+
+ clocks = <&clock 285>;
+ clock-names = "usbhost";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ phys = <&usb2phy 1>;
+ status = "disabled";
+ };
+ };
+
+OHCI
+Required properties:
+ - compatible: should be "samsung,exynos4210-ohci" for USB 2.0
+ OHCI companion controller in host mode.
+ - reg: physical base address of the controller and length of memory mapped
+ region.
+ - interrupts: interrupt number to the cpu.
+ - clocks: from common clock binding: handle to usb clock.
+ - clock-names: from common clock binding: Shall be "usbhost".
+ - port: if in the SoC there are OHCI phys, they should be listed here.
+ One phy per port. Each port should have following entries:
+ - reg: port number on OHCI controller, e.g
+ On Exynos5250, port 0 is USB2.0 otg phy
+ port 1 is HSIC phy0
+ port 2 is HSIC phy1
+ - phys: from the *Generic PHY* bindings, specifying phy used by port.
+
+Example:
+ usb@12120000 {
+ compatible = "samsung,exynos4210-ohci";
+ reg = <0x12120000 0x100>;
+ interrupts = <0 71 0>;
+
+ clocks = <&clock 285>;
+ clock-names = "usbhost";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+ port@0 {
+ reg = <0>;
+ phys = <&usb2phy 1>;
+ status = "disabled";
+ };
+
+ };
+
+DWC3
+Required properties:
+ - compatible: should be one of the following -
+ "samsung,exynos5250-dwusb3": for USB 3.0 DWC3 controller on
+ Exynos5250/5420.
+ "samsung,exynos7-dwusb3": for USB 3.0 DWC3 controller on Exynos7.
+ - #address-cells, #size-cells : should be '1' if the device has sub-nodes
+ with 'reg' property.
+ - ranges: allows valid 1:1 translation between child's address space and
+ parent's address space
+ - clocks: Clock IDs array as required by the controller.
+ - clock-names: names of clocks correseponding to IDs in the clock property
+
+Sub-nodes:
+The dwc3 core should be added as subnode to Exynos dwc3 glue.
+- dwc3 :
+ The binding details of dwc3 can be found in:
+ Documentation/devicetree/bindings/usb/dwc3.txt
+
+Example:
+ usb@12000000 {
+ compatible = "samsung,exynos5250-dwusb3";
+ clocks = <&clock 286>;
+ clock-names = "usbdrd30";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ dwc3 {
+ compatible = "synopsys,dwc3";
+ reg = <0x12000000 0x10000>;
+ interrupts = <0 72 0>;
+ usb-phy = <&usb2_phy &usb3_phy>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/usb/fsl-usb.txt b/Documentation/devicetree/bindings/usb/fsl-usb.txt
new file mode 100644
index 000000000..4779c029b
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/fsl-usb.txt
@@ -0,0 +1,83 @@
+Freescale SOC USB controllers
+
+The device node for a USB controller that is part of a Freescale
+SOC is as described in the document "Open Firmware Recommended
+Practice : Universal Serial Bus" with the following modifications
+and additions :
+
+Required properties :
+ - compatible : Should be "fsl-usb2-mph" for multi port host USB
+ controllers, or "fsl-usb2-dr" for dual role USB controllers
+ or "fsl,mpc5121-usb2-dr" for dual role USB controllers of MPC5121.
+ Wherever applicable, the IP version of the USB controller should
+ also be mentioned (for eg. fsl-usb2-dr-v2.2 for bsc9132).
+ - phy_type : For multi port host USB controllers, should be one of
+ "ulpi", or "serial". For dual role USB controllers, should be
+ one of "ulpi", "utmi", "utmi_wide", or "serial".
+ - reg : Offset and length of the register set for the device
+ - port0 : boolean; if defined, indicates port0 is connected for
+ fsl-usb2-mph compatible controllers. Either this property or
+ "port1" (or both) must be defined for "fsl-usb2-mph" compatible
+ controllers.
+ - port1 : boolean; if defined, indicates port1 is connected for
+ fsl-usb2-mph compatible controllers. Either this property or
+ "port0" (or both) must be defined for "fsl-usb2-mph" compatible
+ controllers.
+ - dr_mode : indicates the working mode for "fsl-usb2-dr" compatible
+ controllers. Can be "host", "peripheral", or "otg". Default to
+ "host" if not defined for backward compatibility.
+
+Recommended properties :
+ - interrupts : <a b> where a is the interrupt number and b is a
+ field that represents an encoding of the sense and level
+ information for the interrupt. This should be encoded based on
+ the information in section 2) depending on the type of interrupt
+ controller you have.
+ - interrupt-parent : the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Optional properties :
+ - fsl,invert-drvvbus : boolean; for MPC5121 USB0 only. Indicates the
+ port power polarity of internal PHY signal DRVVBUS is inverted.
+ - fsl,invert-pwr-fault : boolean; for MPC5121 USB0 only. Indicates
+ the PWR_FAULT signal polarity is inverted.
+
+Example multi port host USB controller device node :
+ usb@22000 {
+ compatible = "fsl-usb2-mph";
+ reg = <22000 1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <700>;
+ interrupts = <27 1>;
+ phy_type = "ulpi";
+ port0;
+ port1;
+ };
+
+Example dual role USB controller device node :
+ usb@23000 {
+ compatible = "fsl-usb2-dr";
+ reg = <23000 1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = <700>;
+ interrupts = <26 1>;
+ dr_mode = "otg";
+ phy = "ulpi";
+ };
+
+Example dual role USB controller device node for MPC5121ADS:
+
+ usb@4000 {
+ compatible = "fsl,mpc5121-usb2-dr";
+ reg = <0x4000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupt-parent = < &ipic >;
+ interrupts = <44 0x8>;
+ dr_mode = "otg";
+ phy_type = "utmi_wide";
+ fsl,invert-drvvbus;
+ fsl,invert-pwr-fault;
+ };
diff --git a/Documentation/devicetree/bindings/usb/generic.txt b/Documentation/devicetree/bindings/usb/generic.txt
new file mode 100644
index 000000000..477d5bb5e
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/generic.txt
@@ -0,0 +1,24 @@
+Generic USB Properties
+
+Optional properties:
+ - maximum-speed: tells USB controllers we want to work up to a certain
+ speed. Valid arguments are "super-speed", "high-speed",
+ "full-speed" and "low-speed". In case this isn't passed
+ via DT, USB controllers should default to their maximum
+ HW capability.
+ - dr_mode: tells Dual-Role USB controllers that we want to work on a
+ particular mode. Valid arguments are "host",
+ "peripheral" and "otg". In case this attribute isn't
+ passed via DT, USB DRD controllers should default to
+ OTG.
+
+This is an attribute to a USB controller such as:
+
+dwc3@4a030000 {
+ compatible = "synopsys,dwc3";
+ reg = <0x4a030000 0xcfff>;
+ interrupts = <0 92 4>
+ usb-phy = <&usb2_phy>, <&usb3,phy>;
+ maximum-speed = "super-speed";
+ dr_mode = "otg";
+};
diff --git a/Documentation/devicetree/bindings/usb/gr-udc.txt b/Documentation/devicetree/bindings/usb/gr-udc.txt
new file mode 100644
index 000000000..e9445224f
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/gr-udc.txt
@@ -0,0 +1,34 @@
+USB Peripheral Controller driver for Aeroflex Gaisler GRUSBDC.
+
+The GRUSBDC USB Device Controller core is available in the GRLIB VHDL
+IP core library.
+
+Note: In the ordinary environment for the core, a Leon SPARC system,
+these properties are built from information in the AMBA plug&play.
+
+Required properties:
+
+- name : Should be "GAISLER_USBDC" or "01_021"
+
+- reg : Address and length of the register set for the device
+
+- interrupts : Interrupt numbers for this device. Either one interrupt number
+ for all interrupts, or one for status related interrupts, one for IN
+ endpoint related interrupts and one for OUT endpoint related interrupts.
+
+Optional properties:
+
+- epobufsizes : Array of buffer sizes for OUT endpoints when they differ
+ from the default size of 1024. The array is indexed by the OUT endpoint
+ number. If the property is present it typically contains one entry for
+ each OUT endpoint of the core. Fewer entries overrides the default sizes
+ only for as many endpoints as the array contains.
+
+- epibufsizes : Array of buffer sizes for IN endpoints when they differ
+ from the default size of 1024. The array is indexed by the IN endpoint
+ number. If the property is present it typically contains one entry for
+ each IN endpoint of the core. Fewer entries overrides the default sizes
+ only for as many endpoints as the array contains.
+
+For further information look in the documentation for the GLIB IP core library:
+http://www.gaisler.com/products/grlib/grip.pdf
diff --git a/Documentation/devicetree/bindings/usb/isp1301.txt b/Documentation/devicetree/bindings/usb/isp1301.txt
new file mode 100644
index 000000000..5405d99d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/isp1301.txt
@@ -0,0 +1,25 @@
+* NXP ISP1301 USB transceiver
+
+Required properties:
+- compatible: must be "nxp,isp1301"
+- reg: I2C address of the ISP1301 device
+
+Optional properties of devices using ISP1301:
+- transceiver: phandle of isp1301 - this helps the ISP1301 driver to find the
+ ISP1301 instance associated with the respective USB driver
+
+Example:
+
+ isp1301: usb-transceiver@2c {
+ compatible = "nxp,isp1301";
+ reg = <0x2c>;
+ };
+
+ usbd@31020000 {
+ compatible = "nxp,lpc3220-udc";
+ reg = <0x31020000 0x300>;
+ interrupt-parent = <&mic>;
+ interrupts = <0x3d 0>, <0x3e 0>, <0x3c 0>, <0x3a 0>;
+ transceiver = <&isp1301>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/usb/keystone-phy.txt b/Documentation/devicetree/bindings/usb/keystone-phy.txt
new file mode 100644
index 000000000..f37b3a863
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/keystone-phy.txt
@@ -0,0 +1,20 @@
+TI Keystone USB PHY
+
+Required properties:
+ - compatible: should be "ti,keystone-usbphy".
+ - #address-cells, #size-cells : should be '1' if the device has sub-nodes
+ with 'reg' property.
+ - reg : Address and length of the usb phy control register set.
+
+The main purpose of this PHY driver is to enable the USB PHY reference clock
+gate on the Keystone SOC for both the USB2 and USB3 PHY. Otherwise it is just
+an NOP PHY driver. Hence this node is referenced as both the usb2 and usb3
+phy node in the USB Glue layer driver node.
+
+usb_phy: usb_phy@2620738 {
+ compatible = "ti,keystone-usbphy";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x2620738 32>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/usb/keystone-usb.txt b/Documentation/devicetree/bindings/usb/keystone-usb.txt
new file mode 100644
index 000000000..60527d335
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/keystone-usb.txt
@@ -0,0 +1,42 @@
+TI Keystone Soc USB Controller
+
+DWC3 GLUE
+
+Required properties:
+ - compatible: should be "ti,keystone-dwc3".
+ - #address-cells, #size-cells : should be '1' if the device has sub-nodes
+ with 'reg' property.
+ - reg : Address and length of the register set for the USB subsystem on
+ the SOC.
+ - interrupts : The irq number of this device that is used to interrupt the
+ MPU.
+ - ranges: allows valid 1:1 translation between child's address space and
+ parent's address space.
+ - clocks: Clock IDs array as required by the controller.
+ - clock-names: names of clocks correseponding to IDs in the clock property.
+
+Sub-nodes:
+The dwc3 core should be added as subnode to Keystone DWC3 glue.
+- dwc3 :
+ The binding details of dwc3 can be found in:
+ Documentation/devicetree/bindings/usb/dwc3.txt
+
+Example:
+ usb: usb@2680000 {
+ compatible = "ti,keystone-dwc3";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x2680000 0x10000>;
+ clocks = <&clkusb>;
+ clock-names = "usb";
+ interrupts = <GIC_SPI 393 IRQ_TYPE_EDGE_RISING>;
+ ranges;
+ status = "disabled";
+
+ dwc3@2690000 {
+ compatible = "synopsys,dwc3";
+ reg = <0x2690000 0x70000>;
+ interrupts = <GIC_SPI 393 IRQ_TYPE_EDGE_RISING>;
+ usb-phy = <&usb_phy>, <&usb_phy>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/usb/lpc32xx-udc.txt b/Documentation/devicetree/bindings/usb/lpc32xx-udc.txt
new file mode 100644
index 000000000..29f12a533
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/lpc32xx-udc.txt
@@ -0,0 +1,28 @@
+* NXP LPC32xx SoC USB Device Controller (UDC)
+
+Required properties:
+- compatible: Must be "nxp,lpc3220-udc"
+- reg: Physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The USB interrupts:
+ * USB Device Low Priority Interrupt
+ * USB Device High Priority Interrupt
+ * USB Device DMA Interrupt
+ * External USB Transceiver Interrupt (OTG ATX)
+- transceiver: phandle of the associated ISP1301 device - this is necessary for
+ the UDC controller for connecting to the USB physical layer
+
+Example:
+
+ isp1301: usb-transceiver@2c {
+ compatible = "nxp,isp1301";
+ reg = <0x2c>;
+ };
+
+ usbd@31020000 {
+ compatible = "nxp,lpc3220-udc";
+ reg = <0x31020000 0x300>;
+ interrupt-parent = <&mic>;
+ interrupts = <0x3d 0>, <0x3e 0>, <0x3c 0>, <0x3a 0>;
+ transceiver = <&isp1301>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/msm-hsusb.txt b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
new file mode 100644
index 000000000..2826f2af5
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/msm-hsusb.txt
@@ -0,0 +1,95 @@
+MSM SoC HSUSB controllers
+
+EHCI
+
+Required properties:
+- compatible: Should contain "qcom,ehci-host"
+- regs: offset and length of the register set in the memory map
+- usb-phy: phandle for the PHY device
+
+Example EHCI controller device node:
+
+ ehci: ehci@f9a55000 {
+ compatible = "qcom,ehci-host";
+ reg = <0xf9a55000 0x400>;
+ usb-phy = <&usb_otg>;
+ };
+
+USB PHY with optional OTG:
+
+Required properties:
+- compatible: Should contain:
+ "qcom,usb-otg-ci" for chipsets with ChipIdea 45nm PHY
+ "qcom,usb-otg-snps" for chipsets with Synopsys 28nm PHY
+
+- regs: Offset and length of the register set in the memory map
+- interrupts: interrupt-specifier for the OTG interrupt.
+
+- clocks: A list of phandle + clock-specifier pairs for the
+ clocks listed in clock-names
+- clock-names: Should contain the following:
+ "phy" USB PHY reference clock
+ "core" Protocol engine clock
+ "iface" Interface bus clock
+ "alt_core" Protocol engine clock for targets with asynchronous
+ reset methodology. (optional)
+
+- vdccx-supply: phandle to the regulator for the vdd supply for
+ digital circuit operation.
+- v1p8-supply: phandle to the regulator for the 1.8V supply
+- v3p3-supply: phandle to the regulator for the 3.3V supply
+
+- resets: A list of phandle + reset-specifier pairs for the
+ resets listed in reset-names
+- reset-names: Should contain the following:
+ "phy" USB PHY controller reset
+ "link" USB LINK controller reset
+
+- qcom,otg-control: OTG control (VBUS and ID notifications) can be one of
+ 1 - PHY control
+ 2 - PMIC control
+
+Optional properties:
+- dr_mode: One of "host", "peripheral" or "otg". Defaults to "otg"
+
+- qcom,phy-init-sequence: PHY configuration sequence values. This is related to Device
+ Mode Eye Diagram test. Start address at which these values will be
+ written is ULPI_EXT_VENDOR_SPECIFIC. Value of -1 is reserved as
+ "do not overwrite default value at this address".
+ For example: qcom,phy-init-sequence = < -1 0x63 >;
+ Will update only value at address ULPI_EXT_VENDOR_SPECIFIC + 1.
+
+- qcom,phy-num: Select number of pyco-phy to use, can be one of
+ 0 - PHY one, default
+ 1 - Second PHY
+ Some platforms may have configuration to allow USB
+ controller work with any of the two HSPHYs present.
+
+- qcom,vdd-levels: This property must be a list of three integer values
+ (no, min, max) where each value represents either a voltage
+ in microvolts or a value corresponding to voltage corner.
+
+Example HSUSB OTG controller device node:
+
+ usb@f9a55000 {
+ compatible = "qcom,usb-otg-snps";
+ reg = <0xf9a55000 0x400>;
+ interrupts = <0 134 0>;
+ dr_mode = "peripheral";
+
+ clocks = <&gcc GCC_XO_CLK>, <&gcc GCC_USB_HS_SYSTEM_CLK>,
+ <&gcc GCC_USB_HS_AHB_CLK>;
+
+ clock-names = "phy", "core", "iface";
+
+ vddcx-supply = <&pm8841_s2_corner>;
+ v1p8-supply = <&pm8941_l6>;
+ v3p3-supply = <&pm8941_l24>;
+
+ resets = <&gcc GCC_USB2A_PHY_BCR>, <&gcc GCC_USB_HS_BCR>;
+ reset-names = "phy", "link";
+
+ qcom,otg-control = <1>;
+ qcom,phy-init-sequence = < -1 0x63 >;
+ qcom,vdd-levels = <1 5 7>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/mxs-phy.txt b/Documentation/devicetree/bindings/usb/mxs-phy.txt
new file mode 100644
index 000000000..379b84a56
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/mxs-phy.txt
@@ -0,0 +1,21 @@
+* Freescale MXS USB Phy Device
+
+Required properties:
+- compatible: should contain:
+ * "fsl,imx23-usbphy" for imx23 and imx28
+ * "fsl,imx6q-usbphy" for imx6dq and imx6dl
+ * "fsl,imx6sl-usbphy" for imx6sl
+ * "fsl,vf610-usbphy" for Vybrid vf610
+ * "fsl,imx6sx-usbphy" for imx6sx
+ "fsl,imx23-usbphy" is still a fallback for other strings
+- reg: Should contain registers location and length
+- interrupts: Should contain phy interrupt
+- fsl,anatop: phandle for anatop register, it is only for imx6 SoC series
+
+Example:
+usbphy1: usbphy@020c9000 {
+ compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy";
+ reg = <0x020c9000 0x1000>;
+ interrupts = <0 44 0x04>;
+ fsl,anatop = <&anatop>;
+};
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt b/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt
new file mode 100644
index 000000000..f60785f73
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra20-ehci.txt
@@ -0,0 +1,23 @@
+Tegra SOC USB controllers
+
+The device node for a USB controller that is part of a Tegra
+SOC is as described in the document "Open Firmware Recommended
+Practice : Universal Serial Bus" with the following modifications
+and additions :
+
+Required properties :
+ - compatible : For Tegra20, must contain "nvidia,tegra20-ehci".
+ For Tegra30, must contain "nvidia,tegra30-ehci". Otherwise, must contain
+ "nvidia,<chip>-ehci" plus at least one of the above, where <chip> is
+ tegra114, tegra124, tegra132, or tegra210.
+ - nvidia,phy : phandle of the PHY that the controller is connected to.
+ - clocks : Must contain one entry, for the module clock.
+ See ../clocks/clock-bindings.txt for details.
+ - resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names : Must include the following entries:
+ - usb
+
+Optional properties:
+ - nvidia,needs-double-reset : boolean is to be set for some of the Tegra20
+ USB ports, which need reset twice due to hardware issues.
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt b/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt
new file mode 100644
index 000000000..a9aa79fb9
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra20-usb-phy.txt
@@ -0,0 +1,72 @@
+Tegra SOC USB PHY
+
+The device node for Tegra SOC USB PHY:
+
+Required properties :
+ - compatible : For Tegra20, must contain "nvidia,tegra20-usb-phy".
+ For Tegra30, must contain "nvidia,tegra30-usb-phy". Otherwise, must contain
+ "nvidia,<chip>-usb-phy" plus at least one of the above, where <chip> is
+ tegra114, tegra124, tegra132, or tegra210.
+ - reg : Defines the following set of registers, in the order listed:
+ - The PHY's own register set.
+ Always present.
+ - The register set of the PHY containing the UTMI pad control registers.
+ Present if-and-only-if phy_type == utmi.
+ - phy_type : Should be one of "utmi", "ulpi" or "hsic".
+ - clocks : Defines the clocks listed in the clock-names property.
+ - clock-names : The following clock names must be present:
+ - reg: The clock needed to access the PHY's own registers. This is the
+ associated EHCI controller's clock. Always present.
+ - pll_u: PLL_U. Always present.
+ - timer: The timeout clock (clk_m). Present if phy_type == utmi.
+ - utmi-pads: The clock needed to access the UTMI pad control registers.
+ Present if phy_type == utmi.
+ - ulpi-link: The clock Tegra provides to the ULPI PHY (cdev2).
+ Present if phy_type == ulpi, and ULPI link mode is in use.
+ - resets : Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+ - reset-names : Must include the following entries:
+ - usb: The PHY's own reset signal.
+ - utmi-pads: The reset of the PHY containing the chip-wide UTMI pad control
+ registers. Required even if phy_type == ulpi.
+
+Required properties for phy_type == ulpi:
+ - nvidia,phy-reset-gpio : The GPIO used to reset the PHY.
+
+Required PHY timing params for utmi phy, for all chips:
+ - nvidia,hssync-start-delay : Number of 480 Mhz clock cycles to wait before
+ start of sync launches RxActive
+ - nvidia,elastic-limit : Variable FIFO Depth of elastic input store
+ - nvidia,idle-wait-delay : Number of 480 Mhz clock cycles of idle to wait
+ before declare IDLE.
+ - nvidia,term-range-adj : Range adjusment on terminations
+ - Either one of the following for HS driver output control:
+ - nvidia,xcvr-setup : integer, uses the provided value.
+ - nvidia,xcvr-setup-use-fuses : boolean, indicates that the value is read
+ from the on-chip fuses
+ If both are provided, nvidia,xcvr-setup-use-fuses takes precedence.
+ - nvidia,xcvr-lsfslew : LS falling slew rate control.
+ - nvidia,xcvr-lsrslew : LS rising slew rate control.
+
+Required PHY timing params for utmi phy, only on Tegra30 and above:
+ - nvidia,xcvr-hsslew : HS slew rate control.
+ - nvidia,hssquelch-level : HS squelch detector level.
+ - nvidia,hsdiscon-level : HS disconnect detector level.
+
+Optional properties:
+ - nvidia,has-legacy-mode : boolean indicates whether this controller can
+ operate in legacy mode (as APX 2500 / 2600). In legacy mode some
+ registers are accessed through the APB_MISC base address instead of
+ the USB controller.
+ - nvidia,is-wired : boolean. Indicates whether we can do certain kind of power
+ optimizations for the devices that are always connected. e.g. modem.
+ - dr_mode : dual role mode. Indicates the working mode for the PHY. Can be
+ "host", "peripheral", or "otg". Defaults to "host" if not defined.
+ host means this is a host controller
+ peripheral means it is device controller
+ otg means it can operate as either ("on the go")
+ - nvidia,has-utmi-pad-registers : boolean indicates whether this controller
+ contains the UTMI pad control registers common to all USB controllers.
+
+VBUS control (required for dr_mode == otg, optional for dr_mode == host):
+ - vbus-supply: regulator for VBUS
diff --git a/Documentation/devicetree/bindings/usb/ohci-nxp.txt b/Documentation/devicetree/bindings/usb/ohci-nxp.txt
new file mode 100644
index 000000000..71e28c101
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ohci-nxp.txt
@@ -0,0 +1,24 @@
+* OHCI controller, NXP ohci-nxp variant
+
+Required properties:
+- compatible: must be "nxp,ohci-nxp"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: The OHCI interrupt
+- transceiver: phandle of the associated ISP1301 device - this is necessary for
+ the UDC controller for connecting to the USB physical layer
+
+Example (LPC32xx):
+
+ isp1301: usb-transceiver@2c {
+ compatible = "nxp,isp1301";
+ reg = <0x2c>;
+ };
+
+ ohci@31020000 {
+ compatible = "nxp,ohci-nxp";
+ reg = <0x31020000 0x300>;
+ interrupt-parent = <&mic>;
+ interrupts = <0x3b 0>;
+ transceiver = <&isp1301>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/ohci-omap3.txt b/Documentation/devicetree/bindings/usb/ohci-omap3.txt
new file mode 100644
index 000000000..ce8c47cff
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ohci-omap3.txt
@@ -0,0 +1,15 @@
+OMAP HS USB OHCI controller (OMAP3 and later)
+
+Required properties:
+
+- compatible: should be "ti,ohci-omap3"
+- reg: should contain one register range i.e. start and length
+- interrupts: description of the interrupt line
+
+Example for OMAP4:
+
+usbhsohci: ohci@4a064800 {
+ compatible = "ti,ohci-omap3";
+ reg = <0x4a064800 0x400>;
+ interrupts = <0 76 0x4>;
+};
diff --git a/Documentation/devicetree/bindings/usb/ohci-st.txt b/Documentation/devicetree/bindings/usb/ohci-st.txt
new file mode 100644
index 000000000..6d8393748
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ohci-st.txt
@@ -0,0 +1,37 @@
+ST USB OHCI controller
+
+Required properties:
+
+ - compatible : must be "st,st-ohci-300x"
+ - reg : physical base addresses of the controller and length of memory mapped
+ region
+ - interrupts : one OHCI controller interrupt should be described here
+ - clocks : phandle list of usb clocks
+ - clock-names : should be "ic" for interconnect clock and "clk48"
+See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+ - phys : phandle for the PHY device
+ - phy-names : should be "usb"
+
+ - resets : phandle to the powerdown and reset controller for the USB IP
+ - reset-names : should be "power" and "softreset".
+See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/reset.txt
+
+Example:
+
+ ohci0: usb@0xfe1ffc00 {
+ compatible = "st,st-ohci-300x";
+ reg = <0xfe1ffc00 0x100>;
+ interrupts = <GIC_SPI 149 IRQ_TYPE_NONE>;
+ clocks = <&clk_s_a1_ls 0>,
+ <&clockgen_b0 0>;
+ clock-names = "ic", "clk48";
+ phys = <&usb2_phy>;
+ phy-names = "usb";
+ status = "okay";
+
+ resets = <&powerdown STIH416_USB0_POWERDOWN>,
+ <&softreset STIH416_USB0_SOFTRESET>;
+ reset-names = "power", "softreset";
+ };
diff --git a/Documentation/devicetree/bindings/usb/omap-usb.txt b/Documentation/devicetree/bindings/usb/omap-usb.txt
new file mode 100644
index 000000000..38d9bb850
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/omap-usb.txt
@@ -0,0 +1,80 @@
+OMAP GLUE AND OTHER OMAP SPECIFIC COMPONENTS
+
+OMAP MUSB GLUE
+ - compatible : Should be "ti,omap4-musb" or "ti,omap3-musb"
+ - ti,hwmods : must be "usb_otg_hs"
+ - multipoint : Should be "1" indicating the musb controller supports
+ multipoint. This is a MUSB configuration-specific setting.
+ - num-eps : Specifies the number of endpoints. This is also a
+ MUSB configuration-specific setting. Should be set to "16"
+ - ram-bits : Specifies the ram address size. Should be set to "12"
+ - interface-type : This is a board specific setting to describe the type of
+ interface between the controller and the phy. It should be "0" or "1"
+ specifying ULPI and UTMI respectively.
+ - mode : Should be "3" to represent OTG. "1" signifies HOST and "2"
+ represents PERIPHERAL.
+ - power : Should be "50". This signifies the controller can supply up to
+ 100mA when operating in host mode.
+ - usb-phy : the phandle for the PHY device
+ - phys : the phandle for the PHY device (used by generic PHY framework)
+ - phy-names : the names of the PHY corresponding to the PHYs present in the
+ *phy* phandle.
+
+Optional properties:
+ - ctrl-module : phandle of the control module this glue uses to write to
+ mailbox
+
+SOC specific device node entry
+usb_otg_hs: usb_otg_hs@4a0ab000 {
+ compatible = "ti,omap4-musb";
+ ti,hwmods = "usb_otg_hs";
+ multipoint = <1>;
+ num-eps = <16>;
+ ram-bits = <12>;
+ ctrl-module = <&omap_control_usb>;
+ phys = <&usb2_phy>;
+ phy-names = "usb2-phy";
+};
+
+Board specific device node entry
+&usb_otg_hs {
+ interface-type = <1>;
+ mode = <3>;
+ power = <50>;
+};
+
+OMAP DWC3 GLUE
+ - compatible : Should be
+ * "ti,dwc3" for OMAP5 and DRA7
+ * "ti,am437x-dwc3" for AM437x
+ - ti,hwmods : Should be "usb_otg_ss"
+ - reg : Address and length of the register set for the device.
+ - interrupts : The irq number of this device that is used to interrupt the
+ MPU
+ - #address-cells, #size-cells : Must be present if the device has sub-nodes
+ - utmi-mode : controls the source of UTMI/PIPE status for VBUS and OTG ID.
+ It should be set to "1" for HW mode and "2" for SW mode.
+ - ranges: the child address space are mapped 1:1 onto the parent address space
+
+Optional Properties:
+ - extcon : phandle for the extcon device omap dwc3 uses to detect
+ connect/disconnect events.
+ - vbus-supply : phandle to the regulator device tree node if needed.
+
+Sub-nodes:
+The dwc3 core should be added as subnode to omap dwc3 glue.
+- dwc3 :
+ The binding details of dwc3 can be found in:
+ Documentation/devicetree/bindings/usb/dwc3.txt
+
+omap_dwc3 {
+ compatible = "ti,dwc3";
+ ti,hwmods = "usb_otg_ss";
+ reg = <0x4a020000 0x1ff>;
+ interrupts = <0 93 4>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ utmi-mode = <2>;
+ ranges;
+};
+
diff --git a/Documentation/devicetree/bindings/usb/pxa-usb.txt b/Documentation/devicetree/bindings/usb/pxa-usb.txt
new file mode 100644
index 000000000..9c331799b
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/pxa-usb.txt
@@ -0,0 +1,53 @@
+PXA USB controllers
+
+OHCI
+
+Required properties:
+ - compatible: Should be "marvell,pxa-ohci" for USB controllers
+ used in host mode.
+
+Optional properties:
+ - "marvell,enable-port1", "marvell,enable-port2", "marvell,enable-port3"
+ If present, enables the appropriate USB port of the controller.
+ - "marvell,port-mode" selects the mode of the ports:
+ 1 = PMM_NPS_MODE
+ 2 = PMM_GLOBAL_MODE
+ 3 = PMM_PERPORT_MODE
+ - "marvell,power-sense-low" - power sense pin is low-active.
+ - "marvell,power-control-low" - power control pin is low-active.
+ - "marvell,no-oc-protection" - disable over-current protection.
+ - "marvell,oc-mode-perport" - enable per-port over-current protection.
+ - "marvell,power_on_delay" Power On to Power Good time - in ms.
+
+Example:
+
+ usb0: ohci@4c000000 {
+ compatible = "marvell,pxa-ohci", "usb-ohci";
+ reg = <0x4c000000 0x100000>;
+ interrupts = <18>;
+ marvell,enable-port1;
+ marvell,port-mode = <2>; /* PMM_GLOBAL_MODE */
+ };
+
+UDC
+
+Required properties:
+ - compatible: Should be "marvell,pxa270-udc" for USB controllers
+ used in device mode.
+ - reg: usb device MMIO address space
+ - interrupts: single interrupt generated by the UDC IP
+ - clocks: input clock of the UDC IP (see clock-bindings.txt)
+
+Optional properties:
+ - gpios:
+ - gpio activated to control the USB D+ pullup (see gpio.txt)
+
+Example:
+
+ pxa27x_udc: udc@40600000 {
+ compatible = "marvell,pxa270-udc";
+ reg = <0x40600000 0x10000>;
+ interrupts = <11>;
+ clocks = <&pxa2xx_clks 11>;
+ gpios = <&gpio 22 GPIO_ACTIVE_LOW>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.txt b/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
new file mode 100644
index 000000000..ca164e71d
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.txt
@@ -0,0 +1,66 @@
+Qualcomm SuperSpeed DWC3 USB SoC controller
+
+Required properties:
+- compatible: should contain "qcom,dwc3"
+- clocks: A list of phandle + clock-specifier pairs for the
+ clocks listed in clock-names
+- clock-names: Should contain the following:
+ "core" Master/Core clock, have to be >= 125 MHz for SS
+ operation and >= 60MHz for HS operation
+
+Optional clocks:
+ "iface" System bus AXI clock. Not present on all platforms
+ "sleep" Sleep clock, used when USB3 core goes into low
+ power mode (U3).
+
+Required child node:
+A child node must exist to represent the core DWC3 IP block. The name of
+the node is not important. The content of the node is defined in dwc3.txt.
+
+Phy documentation is provided in the following places:
+Documentation/devicetree/bindings/phy/qcom,dwc3-usb-phy.txt
+
+Example device nodes:
+
+ hs_phy: phy@100f8800 {
+ compatible = "qcom,dwc3-hs-usb-phy";
+ reg = <0x100f8800 0x30>;
+ clocks = <&gcc USB30_0_UTMI_CLK>;
+ clock-names = "ref";
+ #phy-cells = <0>;
+
+ status = "ok";
+ };
+
+ ss_phy: phy@100f8830 {
+ compatible = "qcom,dwc3-ss-usb-phy";
+ reg = <0x100f8830 0x30>;
+ clocks = <&gcc USB30_0_MASTER_CLK>;
+ clock-names = "ref";
+ #phy-cells = <0>;
+
+ status = "ok";
+ };
+
+ usb3_0: usb30@0 {
+ compatible = "qcom,dwc3";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ clocks = <&gcc USB30_0_MASTER_CLK>;
+ clock-names = "core";
+
+ ranges;
+
+ status = "ok";
+
+ dwc3@10000000 {
+ compatible = "snps,dwc3";
+ reg = <0x10000000 0xcd00>;
+ interrupts = <0 205 0x4>;
+ phys = <&hs_phy>, <&ss_phy>;
+ phy-names = "usb2-phy", "usb3-phy";
+ tx-fifo-resize;
+ dr_mode = "host";
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/usb/renesas_usbhs.txt b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
new file mode 100644
index 000000000..ddbe304be
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/renesas_usbhs.txt
@@ -0,0 +1,27 @@
+Renesas Electronics USBHS driver
+
+Required properties:
+ - compatible: Must contain one of the following:
+ - "renesas,usbhs-r8a7790"
+ - "renesas,usbhs-r8a7791"
+ - reg: Base address and length of the register for the USBHS
+ - interrupts: Interrupt specifier for the USBHS
+ - clocks: A list of phandle + clock specifier pairs
+
+Optional properties:
+ - renesas,buswait: Integer to use BUSWAIT register
+ - renesas,enable-gpio: A gpio specifier to check GPIO determining if USB
+ function should be enabled
+ - phys: phandle + phy specifier pair
+ - phy-names: must be "usb"
+ - dmas: Must contain a list of references to DMA specifiers.
+ - dma-names : named "ch%d", where %d is the channel number ranging from zero
+ to the number of channels (DnFIFOs) minus one.
+
+Example:
+ usbhs: usb@e6590000 {
+ compatible = "renesas,usbhs-r8a7790";
+ reg = <0 0xe6590000 0 0x100>;
+ interrupts = <0 107 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp7_clks R8A7790_CLK_HSUSB>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/samsung-hsotg.txt b/Documentation/devicetree/bindings/usb/samsung-hsotg.txt
new file mode 100644
index 000000000..b83d428a2
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/samsung-hsotg.txt
@@ -0,0 +1,40 @@
+Samsung High Speed USB OTG controller
+-----------------------------
+
+The Samsung HSOTG IP can be found on Samsung SoCs, from S3C6400 onwards.
+It gives functionality of OTG-compliant USB 2.0 host and device with
+support for USB 2.0 high-speed (480Mbps) and full-speed (12 Mbps)
+operation.
+
+Currently only device mode is supported.
+
+Binding details
+-----
+
+Required properties:
+- compatible: "samsung,s3c6400-hsotg" should be used for all currently
+ supported SoC,
+- interrupt-parent: phandle for the interrupt controller to which the
+ interrupt signal of the HSOTG block is routed,
+- interrupts: specifier of interrupt signal of interrupt controller,
+ according to bindings of interrupt controller,
+- clocks: contains an array of clock specifiers:
+ - first entry: OTG clock
+- clock-names: contains array of clock names:
+ - first entry: must be "otg"
+- vusb_d-supply: phandle to voltage regulator of digital section,
+- vusb_a-supply: phandle to voltage regulator of analog section.
+
+Example
+-----
+
+ hsotg@12480000 {
+ compatible = "samsung,s3c6400-hsotg";
+ reg = <0x12480000 0x20000>;
+ interrupts = <0 71 0>;
+ clocks = <&clock 305>;
+ clock-names = "otg";
+ vusb_d-supply = <&vusb_reg>;
+ vusb_a-supply = <&vusbdac_reg>;
+ };
+
diff --git a/Documentation/devicetree/bindings/usb/samsung-usbphy.txt b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
new file mode 100644
index 000000000..33fd3543f
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/samsung-usbphy.txt
@@ -0,0 +1,117 @@
+SAMSUNG USB-PHY controllers
+
+** Samsung's usb 2.0 phy transceiver
+
+The Samsung's usb 2.0 phy transceiver is used for controlling
+usb 2.0 phy for s3c-hsotg as well as ehci-s5p and ohci-exynos
+usb controllers across Samsung SOCs.
+TODO: Adding the PHY binding with controller(s) according to the under
+development generic PHY driver.
+
+Required properties:
+
+Exynos4210:
+- compatible : should be "samsung,exynos4210-usb2phy"
+- reg : base physical address of the phy registers and length of memory mapped
+ region.
+- clocks: Clock IDs array as required by the controller.
+- clock-names: names of clock correseponding IDs clock property as requested
+ by the controller driver.
+
+Exynos5250:
+- compatible : should be "samsung,exynos5250-usb2phy"
+- reg : base physical address of the phy registers and length of memory mapped
+ region.
+
+Optional properties:
+- #address-cells: should be '1' when usbphy node has a child node with 'reg'
+ property.
+- #size-cells: should be '1' when usbphy node has a child node with 'reg'
+ property.
+- ranges: allows valid translation between child's address space and parent's
+ address space.
+
+- The child node 'usbphy-sys' to the node 'usbphy' is for the system controller
+ interface for usb-phy. It should provide the following information required by
+ usb-phy controller to control phy.
+ - reg : base physical address of PHY_CONTROL registers.
+ The size of this register is the total sum of size of all PHY_CONTROL
+ registers that the SoC has. For example, the size will be
+ '0x4' in case we have only one PHY_CONTROL register (e.g.
+ OTHERS register in S3C64XX or USB_PHY_CONTROL register in S5PV210)
+ and, '0x8' in case we have two PHY_CONTROL registers (e.g.
+ USBDEVICE_PHY_CONTROL and USBHOST_PHY_CONTROL registers in exynos4x).
+ and so on.
+
+Example:
+ - Exynos4210
+
+ usbphy@125B0000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "samsung,exynos4210-usb2phy";
+ reg = <0x125B0000 0x100>;
+ ranges;
+
+ clocks = <&clock 2>, <&clock 305>;
+ clock-names = "xusbxti", "otg";
+
+ usbphy-sys {
+ /* USB device and host PHY_CONTROL registers */
+ reg = <0x10020704 0x8>;
+ };
+ };
+
+
+** Samsung's usb 3.0 phy transceiver
+
+Starting exynso5250, Samsung's SoC have usb 3.0 phy transceiver
+which is used for controlling usb 3.0 phy for dwc3-exynos usb 3.0
+controllers across Samsung SOCs.
+
+Required properties:
+
+Exynos5250:
+- compatible : should be "samsung,exynos5250-usb3phy"
+- reg : base physical address of the phy registers and length of memory mapped
+ region.
+- clocks: Clock IDs array as required by the controller.
+- clock-names: names of clocks correseponding to IDs in the clock property
+ as requested by the controller driver.
+
+Optional properties:
+- #address-cells: should be '1' when usbphy node has a child node with 'reg'
+ property.
+- #size-cells: should be '1' when usbphy node has a child node with 'reg'
+ property.
+- ranges: allows valid translation between child's address space and parent's
+ address space.
+
+- The child node 'usbphy-sys' to the node 'usbphy' is for the system controller
+ interface for usb-phy. It should provide the following information required by
+ usb-phy controller to control phy.
+ - reg : base physical address of PHY_CONTROL registers.
+ The size of this register is the total sum of size of all PHY_CONTROL
+ registers that the SoC has. For example, the size will be
+ '0x4' in case we have only one PHY_CONTROL register (e.g.
+ OTHERS register in S3C64XX or USB_PHY_CONTROL register in S5PV210)
+ and, '0x8' in case we have two PHY_CONTROL registers (e.g.
+ USBDEVICE_PHY_CONTROL and USBHOST_PHY_CONTROL registers in exynos4x).
+ and so on.
+
+Example:
+ usbphy@12100000 {
+ compatible = "samsung,exynos5250-usb3phy";
+ reg = <0x12100000 0x100>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ clocks = <&clock 1>, <&clock 286>;
+ clock-names = "ext_xtal", "usbdrd30";
+
+ usbphy-sys {
+ /* USB device and host PHY_CONTROL registers */
+ reg = <0x10040704 0x8>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/usb/spear-usb.txt b/Documentation/devicetree/bindings/usb/spear-usb.txt
new file mode 100644
index 000000000..f8a464a25
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/spear-usb.txt
@@ -0,0 +1,39 @@
+ST SPEAr SoC USB controllers:
+-----------------------------
+
+EHCI:
+-----
+
+Required properties:
+- compatible: "st,spear600-ehci"
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the EHCI interrupt
+
+Example:
+
+ ehci@e1800000 {
+ compatible = "st,spear600-ehci", "usb-ehci";
+ reg = <0xe1800000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <27>;
+ };
+
+
+OHCI:
+-----
+
+Required properties:
+- compatible: "st,spear600-ohci"
+- interrupt-parent: Should be the phandle for the interrupt controller
+ that services interrupts for this device
+- interrupts: Should contain the OHCI interrupt
+
+Example:
+
+ ohci@e1900000 {
+ compatible = "st,spear600-ohci", "usb-ohci";
+ reg = <0xe1800000 0x1000>;
+ interrupt-parent = <&vic1>;
+ interrupts = <26>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/twlxxxx-usb.txt b/Documentation/devicetree/bindings/usb/twlxxxx-usb.txt
new file mode 100644
index 000000000..0aee0ad3f
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/twlxxxx-usb.txt
@@ -0,0 +1,40 @@
+USB COMPARATOR OF TWL CHIPS
+
+TWL6030 USB COMPARATOR
+ - compatible : Should be "ti,twl6030-usb"
+ - interrupts : Two interrupt numbers to the cpu should be specified. First
+ interrupt number is the otg interrupt number that raises ID interrupts when
+ the controller has to act as host and the second interrupt number is the
+ usb interrupt number that raises VBUS interrupts when the controller has to
+ act as device
+ - usb-supply : phandle to the regulator device tree node. It should be vusb
+ if it is twl6030 or ldousb if it is twl6032 subclass.
+
+twl6030-usb {
+ compatible = "ti,twl6030-usb";
+ interrupts = < 4 10 >;
+};
+
+Board specific device node entry
+&twl6030-usb {
+ usb-supply = <&vusb>;
+};
+
+TWL4030 USB PHY AND COMPARATOR
+ - compatible : Should be "ti,twl4030-usb"
+ - interrupts : The interrupt numbers to the cpu should be specified. First
+ interrupt number is the otg interrupt number that raises ID interrupts
+ and VBUS interrupts. The second interrupt number is optional.
+ - <supply-name>-supply : phandle to the regulator device tree node.
+ <supply-name> should be vusb1v5, vusb1v8 and vusb3v1
+ - usb_mode : The mode used by the phy to connect to the controller. "1"
+ specifies "ULPI" mode and "2" specifies "CEA2011_3PIN" mode.
+
+twl4030-usb {
+ compatible = "ti,twl4030-usb";
+ interrupts = < 10 4 >;
+ usb1v5-supply = <&vusb1v5>;
+ usb1v8-supply = <&vusb1v8>;
+ usb3v1-supply = <&vusb3v1>;
+ usb_mode = <1>;
+};
diff --git a/Documentation/devicetree/bindings/usb/udc-xilinx.txt b/Documentation/devicetree/bindings/usb/udc-xilinx.txt
new file mode 100644
index 000000000..47b4e397a
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/udc-xilinx.txt
@@ -0,0 +1,18 @@
+Xilinx USB2 device controller
+
+Required properties:
+- compatible : Should be "xlnx,usb2-device-4.00.a"
+- reg : Physical base address and size of the USB2
+ device registers map.
+- interrupts : Should contain single irq line of USB2 device
+ controller
+- xlnx,has-builtin-dma : if DMA is included
+
+Example:
+ axi-usb2-device@42e00000 {
+ compatible = "xlnx,usb2-device-4.00.a";
+ interrupts = <0x0 0x39 0x1>;
+ reg = <0x42e00000 0x10000>;
+ xlnx,has-builtin-dma;
+ };
+
diff --git a/Documentation/devicetree/bindings/usb/usb-ehci.txt b/Documentation/devicetree/bindings/usb/usb-ehci.txt
new file mode 100644
index 000000000..0b04fdff9
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-ehci.txt
@@ -0,0 +1,38 @@
+USB EHCI controllers
+
+Required properties:
+ - compatible : should be "generic-ehci".
+ - reg : should contain at least address and length of the standard EHCI
+ register set for the device. Optional platform-dependent registers
+ (debug-port or other) can be also specified here, but only after
+ definition of standard EHCI registers.
+ - interrupts : one EHCI interrupt should be described here.
+
+Optional properties:
+ - big-endian-regs : boolean, set this for hcds with big-endian registers
+ - big-endian-desc : boolean, set this for hcds with big-endian descriptors
+ - big-endian : boolean, for hcds with big-endian-regs + big-endian-desc
+ - needs-reset-on-resume : boolean, set this to force EHCI reset after resume
+ - clocks : a list of phandle + clock specifier pairs
+ - phys : phandle + phy specifier pair
+ - phy-names : "usb"
+ - resets : phandle + reset specifier pair
+
+Example (Sequoia 440EPx):
+ ehci@e0000300 {
+ compatible = "ibm,usb-ehci-440epx", "usb-ehci";
+ interrupt-parent = <&UIC0>;
+ interrupts = <1a 4>;
+ reg = <0 e0000300 90 0 e0000390 70>;
+ big-endian;
+ };
+
+Example (Allwinner sun4i A10 SoC):
+ ehci0: usb@01c14000 {
+ compatible = "allwinner,sun4i-a10-ehci", "generic-ehci";
+ reg = <0x01c14000 0x100>;
+ interrupts = <39>;
+ clocks = <&ahb_gates 1>;
+ phys = <&usbphy 1>;
+ phy-names = "usb";
+ };
diff --git a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt
new file mode 100644
index 000000000..5be01c859
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.txt
@@ -0,0 +1,41 @@
+USB NOP PHY
+
+Required properties:
+- compatible: should be usb-nop-xceiv
+
+Optional properties:
+- clocks: phandle to the PHY clock. Use as per Documentation/devicetree
+ /bindings/clock/clock-bindings.txt
+ This property is required if clock-frequency is specified.
+
+- clock-names: Should be "main_clk"
+
+- clock-frequency: the clock frequency (in Hz) that the PHY clock must
+ be configured to.
+
+- vcc-supply: phandle to the regulator that provides power to the PHY.
+
+- reset-gpios: Should specify the GPIO for reset.
+
+- vbus-detect-gpio: should specify the GPIO detecting a VBus insertion
+ (see Documentation/devicetree/bindings/gpio/gpio.txt)
+- vbus-regulator : should specifiy the regulator supplying current drawn from
+ the VBus line (see Documentation/devicetree/bindings/regulator/regulator.txt).
+
+Example:
+
+ hsusb1_phy {
+ compatible = "usb-nop-xceiv";
+ clock-frequency = <19200000>;
+ clocks = <&osc 0>;
+ clock-names = "main_clk";
+ vcc-supply = <&hsusb1_vcc_regulator>;
+ reset-gpios = <&gpio1 7 GPIO_ACTIVE_LOW>;
+ vbus-detect-gpio = <&gpio2 13 GPIO_ACTIVE_HIGH>;
+ vbus-regulator = <&vbus_regulator>;
+ };
+
+hsusb1_phy is a NOP USB PHY device that gets its clock from an oscillator
+and expects that clock to be configured to 19.2MHz by the NOP PHY driver.
+hsusb1_vcc_regulator provides power to the PHY and GPIO 7 controls RESET.
+GPIO 13 detects VBus insertion, and accordingly notifies the vbus-regulator.
diff --git a/Documentation/devicetree/bindings/usb/usb-ohci.txt b/Documentation/devicetree/bindings/usb/usb-ohci.txt
new file mode 100644
index 000000000..19233b736
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-ohci.txt
@@ -0,0 +1,28 @@
+USB OHCI controllers
+
+Required properties:
+- compatible : "generic-ohci"
+- reg : ohci controller register range (address and length)
+- interrupts : ohci controller interrupt
+
+Optional properties:
+- big-endian-regs : boolean, set this for hcds with big-endian registers
+- big-endian-desc : boolean, set this for hcds with big-endian descriptors
+- big-endian : boolean, for hcds with big-endian-regs + big-endian-desc
+- no-big-frame-no : boolean, set if frame_no lives in bits [15:0] of HCCA
+- num-ports : u32, to override the detected port count
+- clocks : a list of phandle + clock specifier pairs
+- phys : phandle + phy specifier pair
+- phy-names : "usb"
+- resets : phandle + reset specifier pair
+
+Example:
+
+ ohci0: usb@01c14400 {
+ compatible = "allwinner,sun4i-a10-ohci", "generic-ohci";
+ reg = <0x01c14400 0x100>;
+ interrupts = <64>;
+ clocks = <&usb_clk 6>, <&ahb_gates 2>;
+ phys = <&usbphy 1>;
+ phy-names = "usb";
+ };
diff --git a/Documentation/devicetree/bindings/usb/usb-uhci.txt b/Documentation/devicetree/bindings/usb/usb-uhci.txt
new file mode 100644
index 000000000..298133416
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-uhci.txt
@@ -0,0 +1,15 @@
+Generic Platform UHCI Controller
+-----------------------------------------------------
+
+Required properties:
+- compatible : "generic-uhci" (deprecated: "platform-uhci")
+- reg : Should contain 1 register ranges(address and length)
+- interrupts : UHCI controller interrupt
+
+Example:
+
+ uhci@d8007b00 {
+ compatible = "generic-uhci";
+ reg = <0xd8007b00 0x200>;
+ interrupts = <43>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/usb-xhci.txt b/Documentation/devicetree/bindings/usb/usb-xhci.txt
new file mode 100644
index 000000000..86f67f088
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb-xhci.txt
@@ -0,0 +1,21 @@
+USB xHCI controllers
+
+Required properties:
+ - compatible: should be one of "generic-xhci",
+ "marvell,armada-375-xhci", "marvell,armada-380-xhci",
+ "renesas,xhci-r8a7790", "renesas,xhci-r8a7791" (deprecated:
+ "xhci-platform").
+ - reg: should contain address and length of the standard XHCI
+ register set for the device.
+ - interrupts: one XHCI interrupt should be described here.
+
+Optional properties:
+ - clocks: reference to a clock
+ - usb3-lpm-capable: determines if platform is USB3 LPM capable
+
+Example:
+ usb@f0931000 {
+ compatible = "generic-xhci";
+ reg = <0xf0931000 0x8c8>;
+ interrupts = <0x0 0x4e 0x0>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/usb3503.txt b/Documentation/devicetree/bindings/usb/usb3503.txt
new file mode 100644
index 000000000..52493b148
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb3503.txt
@@ -0,0 +1,36 @@
+SMSC USB3503 High-Speed Hub Controller
+
+Required properties:
+- compatible: Should be "smsc,usb3503" or "smsc,usb3503a".
+
+Optional properties:
+- reg: Specifies the i2c slave address, it is required and should be 0x08
+ if I2C is used.
+- connect-gpios: Should specify GPIO for connect.
+- disabled-ports: Should specify the ports unused.
+ '1' or '2' or '3' are available for this property to describe the port
+ number. 1~3 property values are possible to be described.
+ Do not describe this property if all ports have to be enabled.
+- intn-gpios: Should specify GPIO for interrupt.
+- reset-gpios: Should specify GPIO for reset.
+- initial-mode: Should specify initial mode.
+ (1 for HUB mode, 2 for STANDBY mode)
+- refclk: Clock used for driving REFCLK signal (optional, if not provided
+ the driver assumes that clock signal is always available, its
+ rate is specified by REF_SEL pins and a value from the primary
+ reference clock frequencies table is used)
+- refclk-frequency: Frequency of the REFCLK signal as defined by REF_SEL
+ pins (optional, if not provided, driver will not set rate of the
+ REFCLK signal and assume that a value from the primary reference
+ clock frequencies table is used)
+
+Examples:
+ usb3503@08 {
+ compatible = "smsc,usb3503";
+ reg = <0x08>;
+ connect-gpios = <&gpx3 0 1>;
+ disabled-ports = <2 3>;
+ intn-gpios = <&gpx3 4 1>;
+ reset-gpios = <&gpx3 5 1>;
+ initial-mode = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/usb/usbmisc-imx.txt b/Documentation/devicetree/bindings/usb/usbmisc-imx.txt
new file mode 100644
index 000000000..3539d4e7d
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usbmisc-imx.txt
@@ -0,0 +1,16 @@
+* Freescale i.MX non-core registers
+
+Required properties:
+- #index-cells: Cells used to descibe usb controller index. Should be <1>
+- compatible: Should be one of below:
+ "fsl,imx6q-usbmisc" for imx6q
+ "fsl,vf610-usbmisc" for Vybrid vf610
+ "fsl,imx6sx-usbmisc" for imx6sx
+- reg: Should contain registers location and length
+
+Examples:
+usbmisc@02184800 {
+ #index-cells = <1>;
+ compatible = "fsl,imx6q-usbmisc";
+ reg = <0x02184800 0x200>;
+};
diff --git a/Documentation/devicetree/bindings/usb/ux500-usb.txt b/Documentation/devicetree/bindings/usb/ux500-usb.txt
new file mode 100644
index 000000000..439a41c79
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/ux500-usb.txt
@@ -0,0 +1,50 @@
+Ux500 MUSB
+
+Required properties:
+ - compatible : Should be "stericsson,db8500-musb"
+ - reg : Offset and length of registers
+ - interrupts : Interrupt; mode, number and trigger
+ - dr_mode : Dual-role; either host mode "host", peripheral mode "peripheral"
+ or both "otg"
+
+Optional properties:
+ - dmas : A list of dma channels;
+ dma-controller, event-line, fixed-channel, flags
+ - dma-names : An ordered list of channel names affiliated to the above
+
+Example:
+
+usb_per5@a03e0000 {
+ compatible = "stericsson,db8500-musb";
+ reg = <0xa03e0000 0x10000>;
+ interrupts = <0 23 0x4>;
+ interrupt-names = "mc";
+
+ dr_mode = "otg";
+
+ dmas = <&dma 38 0 0x2>, /* Logical - DevToMem */
+ <&dma 38 0 0x0>, /* Logical - MemToDev */
+ <&dma 37 0 0x2>, /* Logical - DevToMem */
+ <&dma 37 0 0x0>, /* Logical - MemToDev */
+ <&dma 36 0 0x2>, /* Logical - DevToMem */
+ <&dma 36 0 0x0>, /* Logical - MemToDev */
+ <&dma 19 0 0x2>, /* Logical - DevToMem */
+ <&dma 19 0 0x0>, /* Logical - MemToDev */
+ <&dma 18 0 0x2>, /* Logical - DevToMem */
+ <&dma 18 0 0x0>, /* Logical - MemToDev */
+ <&dma 17 0 0x2>, /* Logical - DevToMem */
+ <&dma 17 0 0x0>, /* Logical - MemToDev */
+ <&dma 16 0 0x2>, /* Logical - DevToMem */
+ <&dma 16 0 0x0>, /* Logical - MemToDev */
+ <&dma 39 0 0x2>, /* Logical - DevToMem */
+ <&dma 39 0 0x0>; /* Logical - MemToDev */
+
+ dma-names = "iep_1_9", "oep_1_9",
+ "iep_2_10", "oep_2_10",
+ "iep_3_11", "oep_3_11",
+ "iep_4_12", "oep_4_12",
+ "iep_5_13", "oep_5_13",
+ "iep_6_14", "oep_6_14",
+ "iep_7_15", "oep_7_15",
+ "iep_8", "oep_8";
+};
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
new file mode 100644
index 000000000..80339192c
--- /dev/null
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -0,0 +1,213 @@
+Device tree binding vendor prefix registry. Keep list in alphabetical order.
+
+This isn't an exhaustive list, but you should add new prefixes to it before
+using them to avoid name-space collisions.
+
+abilis Abilis Systems
+abcn Abracon Corporation
+active-semi Active-Semi International Inc
+ad Avionic Design GmbH
+adapteva Adapteva, Inc.
+adh AD Holdings Plc.
+adi Analog Devices, Inc.
+aeroflexgaisler Aeroflex Gaisler AB
+al Annapurna Labs
+allwinner Allwinner Technology Co., Ltd.
+alphascale AlphaScale Integrated Circuits Systems, Inc.
+altr Altera Corp.
+amcc Applied Micro Circuits Corporation (APM, formally AMCC)
+amd Advanced Micro Devices (AMD), Inc.
+amlogic Amlogic, Inc.
+ampire Ampire Co., Ltd.
+ams AMS AG
+amstaos AMS-Taos Inc.
+apm Applied Micro Circuits Corporation (APM)
+aptina Aptina Imaging
+arasan Arasan Chip Systems
+arm ARM Ltd.
+armadeus ARMadeus Systems SARL
+artesyn Artesyn Embedded Technologies Inc.
+asahi-kasei Asahi Kasei Corp.
+atmel Atmel Corporation
+auo AU Optronics Corporation
+avago Avago Technologies
+avic Shanghai AVIC Optoelectronics Co., Ltd.
+axis Axis Communications AB
+bosch Bosch Sensortec GmbH
+brcm Broadcom Corporation
+buffalo Buffalo, Inc.
+calxeda Calxeda
+capella Capella Microsystems, Inc
+cavium Cavium, Inc.
+cdns Cadence Design Systems Inc.
+chipidea Chipidea, Inc
+chipone ChipOne
+chipspark ChipSPARK
+chrp Common Hardware Reference Platform
+chunghwa Chunghwa Picture Tubes Ltd.
+cirrus Cirrus Logic, Inc.
+cloudengines Cloud Engines, Inc.
+cnm Chips&Media, Inc.
+cnxt Conexant Systems, Inc.
+cortina Cortina Systems, Inc.
+cosmic Cosmic Circuits
+crystalfontz Crystalfontz America, Inc.
+dallas Maxim Integrated Products (formerly Dallas Semiconductor)
+davicom DAVICOM Semiconductor, Inc.
+denx Denx Software Engineering
+digi Digi International Inc.
+digilent Diglent, Inc.
+dlg Dialog Semiconductor
+dlink D-Link Corporation
+dmo Data Modul AG
+ebv EBV Elektronik
+edt Emerging Display Technologies
+elan Elan Microelectronic Corp.
+emmicro EM Microelectronic
+energymicro Silicon Laboratories (formerly Energy Micro AS)
+epcos EPCOS AG
+epfl Ecole Polytechnique Fédérale de Lausanne
+epson Seiko Epson Corp.
+est ESTeem Wireless Modems
+ettus NI Ettus Research
+eukrea Eukréa Electromatique
+everest Everest Semiconductor Co. Ltd.
+everspin Everspin Technologies, Inc.
+excito Excito
+fcs Fairchild Semiconductor
+firefly Firefly
+fsl Freescale Semiconductor
+GEFanuc GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+gef GE Fanuc Intelligent Platforms Embedded Systems, Inc.
+geniatech Geniatech, Inc.
+giantplus Giantplus Technology Co., Ltd.
+globalscale Globalscale Technologies, Inc.
+gmt Global Mixed-mode Technology, Inc.
+goodix Shenzhen Huiding Technology Co., Ltd.
+google Google, Inc.
+grinn Grinn
+gumstix Gumstix, Inc.
+gw Gateworks Corporation
+hannstar HannStar Display Corporation
+haoyu Haoyu Microelectronic Co. Ltd.
+himax Himax Technologies, Inc.
+hisilicon Hisilicon Limited.
+hit Hitachi Ltd.
+honeywell Honeywell
+hp Hewlett Packard
+i2se I2SE GmbH
+ibm International Business Machines (IBM)
+idt Integrated Device Technologies, Inc.
+iom Iomega Corporation
+img Imagination Technologies Ltd.
+innolux Innolux Corporation
+intel Intel Corporation
+intercontrol Inter Control Group
+isee ISEE 2007 S.L.
+isil Intersil
+karo Ka-Ro electronics GmbH
+keymile Keymile GmbH
+lacie LaCie
+lantiq Lantiq Semiconductor
+lenovo Lenovo Group Ltd.
+lg LG Corporation
+linux Linux-specific binding
+lsi LSI Corp. (LSI Logic)
+lltc Linear Technology Corporation
+marvell Marvell Technology Group Ltd.
+maxim Maxim Integrated Products
+mediatek MediaTek Inc.
+merrii Merrii Technology Co., Ltd.
+micrel Micrel Inc.
+microchip Microchip Technology Inc.
+micron Micron Technology Inc.
+minix MINIX Technology Ltd.
+mitsubishi Mitsubishi Electric Corporation
+mosaixtech Mosaix Technologies, Inc.
+moxa Moxa
+mpl MPL AG
+mti Imagination Technologies Ltd. (formerly MIPS Technologies Inc.)
+mundoreader Mundo Reader S.L.
+murata Murata Manufacturing Co., Ltd.
+mxicy Macronix International Co., Ltd.
+national National Semiconductor
+neonode Neonode Inc.
+netgear NETGEAR
+netlogic Broadcom Corporation (formerly NetLogic Microsystems)
+newhaven Newhaven Display International
+nintendo Nintendo
+nokia Nokia
+nvidia NVIDIA
+nxp NXP Semiconductors
+onnn ON Semiconductor Corp.
+opencores OpenCores.org
+ortustech Ortus Technology Co., Ltd.
+ovti OmniVision Technologies
+panasonic Panasonic Corporation
+parade Parade Technologies Inc.
+pericom Pericom Technology Inc.
+phytec PHYTEC Messtechnik GmbH
+picochip Picochip Ltd
+plathome Plat'Home Co., Ltd.
+pixcir PIXCIR MICROELECTRONICS Co., Ltd
+powervr PowerVR (deprecated, use img)
+qca Qualcomm Atheros, Inc.
+qcom Qualcomm Technologies, Inc
+qemu QEMU, a generic and open source machine emulator and virtualizer
+qnap QNAP Systems, Inc.
+radxa Radxa
+raidsonic RaidSonic Technology GmbH
+ralink Mediatek/Ralink Technology Corp.
+ramtron Ramtron International
+realtek Realtek Semiconductor Corp.
+renesas Renesas Electronics Corporation
+ricoh Ricoh Co. Ltd.
+rockchip Fuzhou Rockchip Electronics Co., Ltd
+samsung Samsung Semiconductor
+sandisk Sandisk Corporation
+sbs Smart Battery System
+schindler Schindler
+seagate Seagate Technology PLC
+semtech Semtech Corporation
+sil Silicon Image
+silabs Silicon Laboratories
+siliconmitus Silicon Mitus, Inc.
+simtek
+sii Seiko Instruments, Inc.
+silergy Silergy Corp.
+sirf SiRF Technology, Inc.
+sitronix Sitronix Technology Corporation
+skyworks Skyworks Solutions, Inc.
+smsc Standard Microsystems Corporation
+snps Synopsys, Inc.
+solidrun SolidRun
+sony Sony Corporation
+spansion Spansion Inc.
+sprd Spreadtrum Communications Inc.
+st STMicroelectronics
+ste ST-Ericsson
+stericsson ST-Ericsson
+synology Synology, Inc.
+tbs TBS Technologies
+thine THine Electronics, Inc.
+ti Texas Instruments
+tlm Trusted Logic Mobility
+toradex Toradex AG
+toshiba Toshiba Corporation
+toumaz Toumaz
+truly Truly Semiconductors Limited
+usi Universal Scientific Industrial Co., Ltd.
+v3 V3 Semiconductor
+variscite Variscite Ltd.
+via VIA Technologies, Inc.
+virtio Virtual I/O Device Specification, developed by the OASIS consortium
+voipac Voipac Technologies s.r.o.
+winbond Winbond Electronics corp.
+wlf Wolfson Microelectronics
+wm Wondermedia Technologies, Inc.
+x-powers X-Powers
+xes Extreme Engineering Solutions (X-ES)
+xillybus Xillybus Ltd.
+xlnx Xilinx
+zyxel ZyXEL Communications Corp.
+zarlink Zarlink Semiconductor
diff --git a/Documentation/devicetree/bindings/video/adi,adv7123.txt b/Documentation/devicetree/bindings/video/adi,adv7123.txt
new file mode 100644
index 000000000..a6b2b2b8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/adi,adv7123.txt
@@ -0,0 +1,50 @@
+Analog Device ADV7123 Video DAC
+-------------------------------
+
+The ADV7123 is a digital-to-analog converter that outputs VGA signals from a
+parallel video input.
+
+Required properties:
+
+- compatible: Should be "adi,adv7123"
+
+Optional properties:
+
+- psave-gpios: Power save control GPIO
+
+Required nodes:
+
+The ADV7123 has two video ports. Their connections are modeled using the OF
+graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 for DPI input
+- Video port 1 for VGA output
+
+
+Example
+-------
+
+ adv7123: encoder@0 {
+ compatible = "adi,adv7123";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ adv7123_in: endpoint@0 {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ adv7123_out: endpoint@0 {
+ remote-endpoint = <&vga_connector_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/adi,adv7511.txt b/Documentation/devicetree/bindings/video/adi,adv7511.txt
new file mode 100644
index 000000000..96c25ee01
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/adi,adv7511.txt
@@ -0,0 +1,88 @@
+Analog Device ADV7511(W)/13 HDMI Encoders
+-----------------------------------------
+
+The ADV7511, ADV7511W and ADV7513 are HDMI audio and video transmitters
+compatible with HDMI 1.4 and DVI 1.0. They support color space conversion,
+S/PDIF, CEC and HDCP.
+
+Required properties:
+
+- compatible: Should be one of "adi,adv7511", "adi,adv7511w" or "adi,adv7513"
+- reg: I2C slave address
+
+The ADV7511 supports a large number of input data formats that differ by their
+color depth, color format, clock mode, bit justification and random
+arrangement of components on the data bus. The combination of the following
+properties describe the input and map directly to the video input tables of the
+ADV7511 datasheet that document all the supported combinations.
+
+- adi,input-depth: Number of bits per color component at the input (8, 10 or
+ 12).
+- adi,input-colorspace: The input color space, one of "rgb", "yuv422" or
+ "yuv444".
+- adi,input-clock: The input clock type, one of "1x" (one clock cycle per
+ pixel), "2x" (two clock cycles per pixel), "ddr" (one clock cycle per pixel,
+ data driven on both edges).
+
+The following input format properties are required except in "rgb 1x" and
+"yuv444 1x" modes, in which case they must not be specified.
+
+- adi,input-style: The input components arrangement variant (1, 2 or 3), as
+ listed in the input format tables in the datasheet.
+- adi,input-justification: The input bit justification ("left", "evenly",
+ "right").
+
+Optional properties:
+
+- interrupts: Specifier for the ADV7511 interrupt
+- pd-gpios: Specifier for the GPIO connected to the power down signal
+
+- adi,clock-delay: Video data clock delay relative to the pixel clock, in ps
+ (-1200 ps .. 1600 ps). Defaults to no delay.
+- adi,embedded-sync: The input uses synchronization signals embedded in the
+ data stream (similar to BT.656). Defaults to separate H/V synchronization
+ signals.
+
+Required nodes:
+
+The ADV7511 has two video ports. Their connections are modelled using the OF
+graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 for the RGB or YUV input
+- Video port 1 for the HDMI output
+
+
+Example
+-------
+
+ adv7511w: hdmi@39 {
+ compatible = "adi,adv7511w";
+ reg = <39>;
+ interrupt-parent = <&gpio3>;
+ interrupts = <29 IRQ_TYPE_EDGE_FALLING>;
+
+ adi,input-depth = <8>;
+ adi,input-colorspace = "rgb";
+ adi,input-clock = "1x";
+ adi,input-style = <1>;
+ adi,input-justification = "evenly";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ adv7511w_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ adv7511_out: endpoint {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/analog-tv-connector.txt b/Documentation/devicetree/bindings/video/analog-tv-connector.txt
new file mode 100644
index 000000000..0c0970c21
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/analog-tv-connector.txt
@@ -0,0 +1,25 @@
+Analog TV Connector
+===================
+
+Required properties:
+- compatible: "composite-video-connector" or "svideo-connector"
+
+Optional properties:
+- label: a symbolic name for the connector
+
+Required nodes:
+- Video port for TV input
+
+Example
+-------
+
+tv: connector {
+ compatible = "composite-video-connector";
+ label = "tv";
+
+ port {
+ tv_connector_in: endpoint {
+ remote-endpoint = <&venc_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/arm,pl11x.txt b/Documentation/devicetree/bindings/video/arm,pl11x.txt
new file mode 100644
index 000000000..3e3039a8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/arm,pl11x.txt
@@ -0,0 +1,109 @@
+* ARM PrimeCell Color LCD Controller PL110/PL111
+
+See also Documentation/devicetree/bindings/arm/primecell.txt
+
+Required properties:
+
+- compatible: must be one of:
+ "arm,pl110", "arm,primecell"
+ "arm,pl111", "arm,primecell"
+
+- reg: base address and size of the control registers block
+
+- interrupt-names: either the single entry "combined" representing a
+ combined interrupt output (CLCDINTR), or the four entries
+ "mbe", "vcomp", "lnbu", "fuf" representing the individual
+ CLCDMBEINTR, CLCDVCOMPINTR, CLCDLNBUINTR, CLCDFUFINTR interrupts
+
+- interrupts: contains an interrupt specifier for each entry in
+ interrupt-names
+
+- clock-names: should contain "clcdclk" and "apb_pclk"
+
+- clocks: contains phandle and clock specifier pairs for the entries
+ in the clock-names property. See
+ Documentation/devicetree/binding/clock/clock-bindings.txt
+
+Optional properties:
+
+- memory-region: phandle to a node describing memory (see
+ Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
+ to be used for the framebuffer; if not present, the framebuffer
+ may be located anywhere in the memory
+
+- max-memory-bandwidth: maximum bandwidth in bytes per second that the
+ cell's memory interface can handle; if not present, the memory
+ interface is fast enough to handle all possible video modes
+
+Required sub-nodes:
+
+- port: describes LCD panel signals, following the common binding
+ for video transmitter interfaces; see
+ Documentation/devicetree/bindings/media/video-interfaces.txt;
+ when it is a TFT panel, the port's endpoint must define the
+ following property:
+
+ - arm,pl11x,tft-r0g0b0-pads: an array of three 32-bit values,
+ defining the way CLD pads are wired up; first value
+ contains index of the "CLD" external pin (pad) used
+ as R0 (first bit of the red component), second value
+ index of the pad used as G0, third value index of the
+ pad used as B0, see also "LCD panel signal multiplexing
+ details" paragraphs in the PL110/PL111 Technical
+ Reference Manuals; this implicitly defines available
+ color modes, for example:
+ - PL111 TFT 4:4:4 panel:
+ arm,pl11x,tft-r0g0b0-pads = <4 15 20>;
+ - PL110 TFT (1:)5:5:5 panel:
+ arm,pl11x,tft-r0g0b0-pads = <1 7 13>;
+ - PL111 TFT (1:)5:5:5 panel:
+ arm,pl11x,tft-r0g0b0-pads = <3 11 19>;
+ - PL111 TFT 5:6:5 panel:
+ arm,pl11x,tft-r0g0b0-pads = <3 10 19>;
+ - PL110 and PL111 TFT 8:8:8 panel:
+ arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
+ - PL110 and PL111 TFT 8:8:8 panel, R & B components swapped:
+ arm,pl11x,tft-r0g0b0-pads = <16 8 0>;
+
+
+Example:
+
+ clcd@10020000 {
+ compatible = "arm,pl111", "arm,primecell";
+ reg = <0x10020000 0x1000>;
+ interrupt-names = "combined";
+ interrupts = <0 44 4>;
+ clocks = <&oscclk1>, <&oscclk2>;
+ clock-names = "clcdclk", "apb_pclk";
+ max-memory-bandwidth = <94371840>; /* Bps, 1024x768@60 16bpp */
+
+ port {
+ clcd_pads: endpoint {
+ remote-endpoint = <&clcd_panel>;
+ arm,pl11x,tft-r0g0b0-pads = <0 8 16>;
+ };
+ };
+
+ };
+
+ panel {
+ compatible = "panel-dpi";
+
+ port {
+ clcd_panel: endpoint {
+ remote-endpoint = <&clcd_pads>;
+ };
+ };
+
+ panel-timing {
+ clock-frequency = <25175000>;
+ hactive = <640>;
+ hback-porch = <40>;
+ hfront-porch = <24>;
+ hsync-len = <96>;
+ vactive = <480>;
+ vback-porch = <32>;
+ vfront-porch = <11>;
+ vsync-len = <2>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/atmel,lcdc.txt b/Documentation/devicetree/bindings/video/atmel,lcdc.txt
new file mode 100644
index 000000000..ecb8da063
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/atmel,lcdc.txt
@@ -0,0 +1,89 @@
+Atmel LCDC Framebuffer
+-----------------------------------------------------
+
+Required properties:
+- compatible :
+ "atmel,at91sam9261-lcdc" ,
+ "atmel,at91sam9263-lcdc" ,
+ "atmel,at91sam9g10-lcdc" ,
+ "atmel,at91sam9g45-lcdc" ,
+ "atmel,at91sam9g45es-lcdc" ,
+ "atmel,at91sam9rl-lcdc" ,
+ "atmel,at32ap-lcdc"
+- reg : Should contain 1 register ranges(address and length).
+ Can contain an additional register range(address and length)
+ for fixed framebuffer memory. Useful for dedicated memories.
+- interrupts : framebuffer controller interrupt
+- display: a phandle pointing to the display node
+
+Required nodes:
+- display: a display node is required to initialize the lcd panel
+ This should be in the board dts.
+- default-mode: a videomode within the display with timing parameters
+ as specified below.
+
+Optional properties:
+- lcd-supply: Regulator for LCD supply voltage.
+
+Example:
+
+ fb0: fb@0x00500000 {
+ compatible = "atmel,at91sam9g45-lcdc";
+ reg = <0x00500000 0x1000>;
+ interrupts = <23 3 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_fb>;
+ display = <&display0>;
+ status = "okay";
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ };
+
+Example for fixed framebuffer memory:
+
+ fb0: fb@0x00500000 {
+ compatible = "atmel,at91sam9263-lcdc";
+ reg = <0x00700000 0x1000 0x70000000 0x200000>;
+ [...]
+ };
+
+Atmel LCDC Display
+-----------------------------------------------------
+Required properties (as per of_videomode_helper):
+
+ - atmel,dmacon: dma controller configuration
+ - atmel,lcdcon2: lcd controller configuration
+ - atmel,guard-time: lcd guard time (Delay in frame periods)
+ - bits-per-pixel: lcd panel bit-depth.
+
+Optional properties (as per of_videomode_helper):
+ - atmel,lcdcon-backlight: enable backlight
+ - atmel,lcdcon-backlight-inverted: invert backlight PWM polarity
+ - atmel,lcd-wiring-mode: lcd wiring mode "RGB" or "BRG"
+ - atmel,power-control-gpio: gpio to power on or off the LCD (as many as needed)
+
+Example:
+ display0: display {
+ bits-per-pixel = <32>;
+ atmel,lcdcon-backlight;
+ atmel,dmacon = <0x1>;
+ atmel,lcdcon2 = <0x80008002>;
+ atmel,guard-time = <9>;
+ atmel,lcd-wiring-mode = <1>;
+
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: timing0 {
+ clock-frequency = <9000000>;
+ hactive = <480>;
+ vactive = <272>;
+ hback-porch = <1>;
+ hfront-porch = <1>;
+ vback-porch = <40>;
+ vfront-porch = <1>;
+ hsync-len = <45>;
+ vsync-len = <1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/88pm860x.txt b/Documentation/devicetree/bindings/video/backlight/88pm860x.txt
new file mode 100644
index 000000000..261df2799
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/88pm860x.txt
@@ -0,0 +1,15 @@
+88pm860x-backlight bindings
+
+Optional properties:
+ - marvell,88pm860x-iset: Current supplies on backlight device.
+ - marvell,88pm860x-pwm: PWM frequency on backlight device.
+
+Example:
+
+ backlights {
+ backlight-0 {
+ marvell,88pm860x-iset = <4>;
+ marvell,88pm860x-pwm = <3>;
+ };
+ backlight-2 {
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/gpio-backlight.txt b/Documentation/devicetree/bindings/video/backlight/gpio-backlight.txt
new file mode 100644
index 000000000..321be6640
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/gpio-backlight.txt
@@ -0,0 +1,16 @@
+gpio-backlight bindings
+
+Required properties:
+ - compatible: "gpio-backlight"
+ - gpios: describes the gpio that is used for enabling/disabling the backlight.
+ refer to bindings/gpio/gpio.txt for more details.
+
+Optional properties:
+ - default-on: enable the backlight at boot.
+
+Example:
+ backlight {
+ compatible = "gpio-backlight";
+ gpios = <&gpio3 4 GPIO_ACTIVE_HIGH>;
+ default-on;
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
new file mode 100644
index 000000000..0a3ecbc3a
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
@@ -0,0 +1,70 @@
+lp855x bindings
+
+Required properties:
+ - compatible: "ti,lp8550", "ti,lp8551", "ti,lp8552", "ti,lp8553",
+ "ti,lp8555", "ti,lp8556", "ti,lp8557"
+ - reg: I2C slave address (u8)
+ - dev-ctrl: Value of DEVICE CONTROL register (u8). It depends on the device.
+
+Optional properties:
+ - bl-name: Backlight device name (string)
+ - init-brt: Initial value of backlight brightness (u8)
+ - pwm-period: PWM period value. Set only PWM input mode used (u32)
+ - rom-addr: Register address of ROM area to be updated (u8)
+ - rom-val: Register value to be updated (u8)
+ - power-supply: Regulator which controls the 3V rail
+
+Example:
+
+ /* LP8555 */
+ backlight@2c {
+ compatible = "ti,lp8555";
+ reg = <0x2c>;
+
+ dev-ctrl = /bits/ 8 <0x00>;
+ pwm-period = <10000>;
+
+ /* 4V OV, 4 output LED0 string enabled */
+ rom_14h {
+ rom-addr = /bits/ 8 <0x14>;
+ rom-val = /bits/ 8 <0xcf>;
+ };
+
+ /* Heavy smoothing, 24ms ramp time step */
+ rom_15h {
+ rom-addr = /bits/ 8 <0x15>;
+ rom-val = /bits/ 8 <0xc7>;
+ };
+
+ /* 4 output LED1 string enabled */
+ rom_19h {
+ rom-addr = /bits/ 8 <0x19>;
+ rom-val = /bits/ 8 <0x0f>;
+ };
+ };
+
+ /* LP8556 */
+ backlight@2c {
+ compatible = "ti,lp8556";
+ reg = <0x2c>;
+
+ bl-name = "lcd-bl";
+ dev-ctrl = /bits/ 8 <0x85>;
+ init-brt = /bits/ 8 <0x10>;
+ };
+
+ /* LP8557 */
+ backlight@2c {
+ compatible = "ti,lp8557";
+ reg = <0x2c>;
+ power-supply = <&backlight_vdd>;
+
+ dev-ctrl = /bits/ 8 <0x41>;
+ init-brt = /bits/ 8 <0x0a>;
+
+ /* 4V OV, 4 output LED string enabled */
+ rom_14h {
+ rom-addr = /bits/ 8 <0x14>;
+ rom-val = /bits/ 8 <0xcf>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/max8925-backlight.txt b/Documentation/devicetree/bindings/video/backlight/max8925-backlight.txt
new file mode 100644
index 000000000..b4cffdaa4
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/max8925-backlight.txt
@@ -0,0 +1,10 @@
+88pm860x-backlight bindings
+
+Optional properties:
+ - maxim,max8925-dual-string: whether support dual string
+
+Example:
+
+ backlights {
+ maxim,max8925-dual-string = <0>;
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/pwm-backlight.txt b/Documentation/devicetree/bindings/video/backlight/pwm-backlight.txt
new file mode 100644
index 000000000..764db86d4
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/pwm-backlight.txt
@@ -0,0 +1,35 @@
+pwm-backlight bindings
+
+Required properties:
+ - compatible: "pwm-backlight"
+ - pwms: OF device-tree PWM specification (see PWM binding[0])
+ - brightness-levels: Array of distinct brightness levels. Typically these
+ are in the range from 0 to 255, but any range starting at 0 will do.
+ The actual brightness level (PWM duty cycle) will be interpolated
+ from these values. 0 means a 0% duty cycle (darkest/off), while the
+ last value in the array represents a 100% duty cycle (brightest).
+ - default-brightness-level: the default brightness level (index into the
+ array defined by the "brightness-levels" property)
+ - power-supply: regulator for supply voltage
+
+Optional properties:
+ - pwm-names: a list of names for the PWM devices specified in the
+ "pwms" property (see PWM binding[0])
+ - enable-gpios: contains a single GPIO specifier for the GPIO which enables
+ and disables the backlight (see GPIO binding[1])
+
+[0]: Documentation/devicetree/bindings/pwm/pwm.txt
+[1]: Documentation/devicetree/bindings/gpio/gpio.txt
+
+Example:
+
+ backlight {
+ compatible = "pwm-backlight";
+ pwms = <&pwm 0 5000000>;
+
+ brightness-levels = <0 4 8 16 32 64 128 255>;
+ default-brightness-level = <6>;
+
+ power-supply = <&vdd_bl_reg>;
+ enable-gpios = <&gpio 58 0>;
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/sky81452-backlight.txt b/Documentation/devicetree/bindings/video/backlight/sky81452-backlight.txt
new file mode 100644
index 000000000..8bf2940f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/sky81452-backlight.txt
@@ -0,0 +1,29 @@
+SKY81452-backlight bindings
+
+Required properties:
+- compatible : Must be "skyworks,sky81452-backlight"
+
+Optional properties:
+- name : Name of backlight device. Default is 'lcd-backlight'.
+- gpios : GPIO to use to EN pin.
+ See Documentation/devicetree/bindings/gpio/gpio.txt
+- led-sources : List of enabled channels from 0 to 5.
+ See Documentation/devicetree/bindings/leds/common.txt
+- skyworks,ignore-pwm : Ignore both PWM input
+- skyworks,dpwm-mode : Enable DPWM dimming mode, otherwise Analog dimming.
+- skyworks,phase-shift : Enable phase shift mode
+- skyworks,short-detection-threshold-volt
+ : It should be one of 4, 5, 6 and 7V.
+- skyworks,current-limit-mA
+ : It should be 2300mA or 2750mA.
+
+Example:
+
+ backlight {
+ compatible = "skyworks,sky81452-backlight";
+ name = "pwm-backlight";
+ led-sources = <0 1 2 5>;
+ skyworks,ignore-pwm;
+ skyworks,phase-shift;
+ skyworks,current-limit-mA = <2300>;
+ };
diff --git a/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt b/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt
new file mode 100644
index 000000000..5fb9279ac
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/backlight/tps65217-backlight.txt
@@ -0,0 +1,27 @@
+TPS65217 family of regulators
+
+The TPS65217 chip contains a boost converter and current sinks which can be
+used to drive LEDs for use as backlights.
+
+Required properties:
+- compatible: "ti,tps65217"
+- reg: I2C slave address
+- backlight: node for specifying WLED1 and WLED2 lines in TPS65217
+- isel: selection bit, valid values: 1 for ISEL1 (low-level) and 2 for ISEL2 (high-level)
+- fdim: PWM dimming frequency, valid values: 100, 200, 500, 1000
+- default-brightness: valid values: 0-100
+
+Each regulator is defined using the standard binding for regulators.
+
+Example:
+
+ tps: tps@24 {
+ reg = <0x24>;
+ compatible = "ti,tps65217";
+ backlight {
+ isel = <1>; /* 1 - ISET1, 2 ISET2 */
+ fdim = <100>; /* TPS65217_BL_FDIM_100HZ */
+ default-brightness = <50>;
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/video/bridge/ps8622.txt b/Documentation/devicetree/bindings/video/bridge/ps8622.txt
new file mode 100644
index 000000000..c989c3807
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/bridge/ps8622.txt
@@ -0,0 +1,31 @@
+ps8622-bridge bindings
+
+Required properties:
+ - compatible: "parade,ps8622" or "parade,ps8625"
+ - reg: first i2c address of the bridge
+ - sleep-gpios: OF device-tree gpio specification for PD_ pin.
+ - reset-gpios: OF device-tree gpio specification for RST_ pin.
+
+Optional properties:
+ - lane-count: number of DP lanes to use
+ - use-external-pwm: backlight will be controlled by an external PWM
+ - video interfaces: Device node can contain video interface port
+ nodes for panel according to [1].
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+ lvds-bridge@48 {
+ compatible = "parade,ps8622";
+ reg = <0x48>;
+ sleep-gpios = <&gpc3 6 1 0 0>;
+ reset-gpios = <&gpc3 1 1 0 0>;
+ lane-count = <1>;
+ ports {
+ port@0 {
+ bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/bridge/ptn3460.txt b/Documentation/devicetree/bindings/video/bridge/ptn3460.txt
new file mode 100644
index 000000000..361971ba1
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/bridge/ptn3460.txt
@@ -0,0 +1,39 @@
+ptn3460 bridge bindings
+
+Required properties:
+ - compatible: "nxp,ptn3460"
+ - reg: i2c address of the bridge
+ - powerdown-gpio: OF device-tree gpio specification for PD_N pin.
+ - reset-gpio: OF device-tree gpio specification for RST_N pin.
+ - edid-emulation: The EDID emulation entry to use
+ +-------+------------+------------------+
+ | Value | Resolution | Description |
+ | 0 | 1024x768 | NXP Generic |
+ | 1 | 1920x1080 | NXP Generic |
+ | 2 | 1920x1080 | NXP Generic |
+ | 3 | 1600x900 | Samsung LTM200KT |
+ | 4 | 1920x1080 | Samsung LTM230HT |
+ | 5 | 1366x768 | NXP Generic |
+ | 6 | 1600x900 | ChiMei M215HGE |
+ +-------+------------+------------------+
+
+ - video interfaces: Device node can contain video interface port
+ nodes for panel according to [1].
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+ lvds-bridge@20 {
+ compatible = "nxp,ptn3460";
+ reg = <0x20>;
+ powerdown-gpio = <&gpy2 5 1 0 0>;
+ reset-gpio = <&gpx1 5 1 0 0>;
+ edid-emulation = <5>;
+ ports {
+ port@0 {
+ bridge_out: endpoint {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/cirrus,clps711x-fb.txt b/Documentation/devicetree/bindings/video/cirrus,clps711x-fb.txt
new file mode 100644
index 000000000..6fc3c6ade
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/cirrus,clps711x-fb.txt
@@ -0,0 +1,47 @@
+* Currus Logic CLPS711X Framebuffer
+
+Required properties:
+- compatible: Shall contain "cirrus,clps711x-fb".
+- reg : Physical base address and length of the controller's registers +
+ location and size of the framebuffer memory.
+- clocks : phandle + clock specifier pair of the FB reference clock.
+- display : phandle to a display node as described in
+ Documentation/devicetree/bindings/video/display-timing.txt.
+ Additionally, the display node has to define properties:
+ - bits-per-pixel: Bits per pixel.
+ - ac-prescale : LCD AC bias frequency. This frequency is the required
+ AC bias frequency for a given manufacturer's LCD plate.
+ - cmap-invert : Invert the color levels (Optional).
+
+Optional properties:
+- lcd-supply: Regulator for LCD supply voltage.
+
+Example:
+ fb: fb@800002c0 {
+ compatible = "cirrus,ep7312-fb", "cirrus,clps711x-fb";
+ reg = <0x800002c0 0xd44>, <0x60000000 0xc000>;
+ clocks = <&clks 2>;
+ lcd-supply = <&reg5v0>;
+ display = <&display>;
+ };
+
+ display: display {
+ model = "320x240x4";
+ native-mode = <&timing0>;
+ bits-per-pixel = <4>;
+ ac-prescale = <17>;
+
+ display-timings {
+ timing0: 320x240 {
+ hactive = <320>;
+ hback-porch = <0>;
+ hfront-porch = <0>;
+ hsync-len = <0>;
+ vactive = <240>;
+ vback-porch = <0>;
+ vfront-porch = <0>;
+ vsync-len = <0>;
+ clock-frequency = <6500000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/display-timing.txt b/Documentation/devicetree/bindings/video/display-timing.txt
new file mode 100644
index 000000000..e1d4a0b59
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/display-timing.txt
@@ -0,0 +1,110 @@
+display-timing bindings
+=======================
+
+display-timings node
+--------------------
+
+required properties:
+ - none
+
+optional properties:
+ - native-mode: The native mode for the display, in case multiple modes are
+ provided. When omitted, assume the first node is the native.
+
+timing subnode
+--------------
+
+required properties:
+ - hactive, vactive: display resolution
+ - hfront-porch, hback-porch, hsync-len: horizontal display timing parameters
+ in pixels
+ vfront-porch, vback-porch, vsync-len: vertical display timing parameters in
+ lines
+ - clock-frequency: display clock in Hz
+
+optional properties:
+ - hsync-active: hsync pulse is active low/high/ignored
+ - vsync-active: vsync pulse is active low/high/ignored
+ - de-active: data-enable pulse is active low/high/ignored
+ - pixelclk-active: with
+ - active high = drive pixel data on rising edge/
+ sample data on falling edge
+ - active low = drive pixel data on falling edge/
+ sample data on rising edge
+ - ignored = ignored
+ - interlaced (bool): boolean to enable interlaced mode
+ - doublescan (bool): boolean to enable doublescan mode
+ - doubleclk (bool): boolean to enable doubleclock mode
+
+All the optional properties that are not bool follow the following logic:
+ <1>: high active
+ <0>: low active
+ omitted: not used on hardware
+
+There are different ways of describing the capabilities of a display. The
+devicetree representation corresponds to the one commonly found in datasheets
+for displays. If a display supports multiple signal timings, the native-mode
+can be specified.
+
+The parameters are defined as:
+
+ +----------+-------------------------------------+----------+-------+
+ | | ↑ | | |
+ | | |vback_porch | | |
+ | | ↓ | | |
+ +----------#######################################----------+-------+
+ | # ↑ # | |
+ | # | # | |
+ | hback # | # hfront | hsync |
+ | porch # | hactive # porch | len |
+ |<-------->#<-------+--------------------------->#<-------->|<----->|
+ | # | # | |
+ | # |vactive # | |
+ | # | # | |
+ | # ↓ # | |
+ +----------#######################################----------+-------+
+ | | ↑ | | |
+ | | |vfront_porch | | |
+ | | ↓ | | |
+ +----------+-------------------------------------+----------+-------+
+ | | ↑ | | |
+ | | |vsync_len | | |
+ | | ↓ | | |
+ +----------+-------------------------------------+----------+-------+
+
+Example:
+
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: 1080p24 {
+ /* 1920x1080p24 */
+ clock-frequency = <52000000>;
+ hactive = <1920>;
+ vactive = <1080>;
+ hfront-porch = <25>;
+ hback-porch = <25>;
+ hsync-len = <25>;
+ vback-porch = <2>;
+ vfront-porch = <2>;
+ vsync-len = <2>;
+ hsync-active = <1>;
+ };
+ };
+
+Every required property also supports the use of ranges, so the commonly used
+datasheet description with minimum, typical and maximum values can be used.
+
+Example:
+
+ timing1: timing {
+ /* 1920x1080p24 */
+ clock-frequency = <148500000>;
+ hactive = <1920>;
+ vactive = <1080>;
+ hsync-len = <0 44 60>;
+ hfront-porch = <80 88 95>;
+ hback-porch = <100 148 160>;
+ vfront-porch = <0 4 6>;
+ vback-porch = <0 36 50>;
+ vsync-len = <0 5 6>;
+ };
diff --git a/Documentation/devicetree/bindings/video/dvi-connector.txt b/Documentation/devicetree/bindings/video/dvi-connector.txt
new file mode 100644
index 000000000..fc53f7c60
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/dvi-connector.txt
@@ -0,0 +1,35 @@
+DVI Connector
+==============
+
+Required properties:
+- compatible: "dvi-connector"
+
+Optional properties:
+- label: a symbolic name for the connector
+- ddc-i2c-bus: phandle to the i2c bus that is connected to DVI DDC
+- analog: the connector has DVI analog pins
+- digital: the connector has DVI digital pins
+- dual-link: the connector has pins for DVI dual-link
+
+Required nodes:
+- Video port for DVI input
+
+Note: One (or both) of 'analog' or 'digital' must be set.
+
+Example
+-------
+
+dvi0: connector@0 {
+ compatible = "dvi-connector";
+ label = "dvi";
+
+ digital;
+
+ ddc-i2c-bus = <&i2c3>;
+
+ port {
+ dvi_connector_in: endpoint {
+ remote-endpoint = <&tfp410_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt b/Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt
new file mode 100644
index 000000000..668091f27
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/dw_hdmi-rockchip.txt
@@ -0,0 +1,46 @@
+Rockchip specific extensions to the Synopsys Designware HDMI
+================================
+
+Required properties:
+- compatible: "rockchip,rk3288-dw-hdmi";
+- reg: Physical base address and length of the controller's registers.
+- clocks: phandle to hdmi iahb and isfr clocks.
+- clock-names: should be "iahb" "isfr"
+- rockchip,grf: this soc should set GRF regs to mux vopl/vopb.
+- interrupts: HDMI interrupt number
+- ports: contain a port node with endpoint definitions as defined in
+ Documentation/devicetree/bindings/media/video-interfaces.txt. For
+ vopb,set the reg = <0> and set the reg = <1> for vopl.
+- reg-io-width: the width of the reg:1,4, the value should be 4 on
+ rk3288 platform
+
+Optional properties
+- ddc-i2c-bus: phandle of an I2C controller used for DDC EDID probing
+- clocks, clock-names: phandle to the HDMI CEC clock, name should be "cec"
+
+Example:
+hdmi: hdmi@ff980000 {
+ compatible = "rockchip,rk3288-dw-hdmi";
+ reg = <0xff980000 0x20000>;
+ reg-io-width = <4>;
+ ddc-i2c-bus = <&i2c5>;
+ rockchip,grf = <&grf>;
+ interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>;
+ clock-names = "iahb", "isfr";
+ status = "disabled";
+ ports {
+ hdmi_in: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ hdmi_in_vopb: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&vopb_out_hdmi>;
+ };
+ hdmi_in_vopl: endpoint@1 {
+ reg = <1>;
+ remote-endpoint = <&vopl_out_hdmi>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/exynos7-decon.txt b/Documentation/devicetree/bindings/video/exynos7-decon.txt
new file mode 100644
index 000000000..f5f9c8d4a
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos7-decon.txt
@@ -0,0 +1,68 @@
+Device-Tree bindings for Samsung Exynos7 SoC display controller (DECON)
+
+DECON (Display and Enhancement Controller) is the Display Controller for the
+Exynos7 series of SoCs which transfers the image data from a video memory
+buffer to an external LCD interface.
+
+Required properties:
+- compatible: value should be "samsung,exynos7-decon";
+
+- reg: physical base address and length of the DECON registers set.
+
+- interrupt-parent: should be the phandle of the decon controller's
+ parent interrupt controller.
+
+- interrupts: should contain a list of all DECON IP block interrupts in the
+ order: FIFO Level, VSYNC, LCD_SYSTEM. The interrupt specifier
+ format depends on the interrupt controller used.
+
+- interrupt-names: should contain the interrupt names: "fifo", "vsync",
+ "lcd_sys", in the same order as they were listed in the interrupts
+ property.
+
+- pinctrl-0: pin control group to be used for this controller.
+
+- pinctrl-names: must contain a "default" entry.
+
+- clocks: must include clock specifiers corresponding to entries in the
+ clock-names property.
+
+- clock-names: list of clock names sorted in the same order as the clocks
+ property. Must contain "pclk_decon0", "aclk_decon0",
+ "decon0_eclk", "decon0_vclk".
+- i80-if-timings: timing configuration for lcd i80 interface support.
+
+Optional Properties:
+- samsung,power-domain: a phandle to DECON power domain node.
+- display-timings: timing settings for DECON, as described in document [1].
+ Can be used in case timings cannot be provided otherwise
+ or to override timings provided by the panel.
+
+[1]: Documentation/devicetree/bindings/video/display-timing.txt
+
+Example:
+
+SoC specific DT entry:
+
+ decon@13930000 {
+ compatible = "samsung,exynos7-decon";
+ interrupt-parent = <&combiner>;
+ reg = <0x13930000 0x1000>;
+ interrupt-names = "lcd_sys", "vsync", "fifo";
+ interrupts = <0 188 0>, <0 189 0>, <0 190 0>;
+ clocks = <&clock_disp PCLK_DECON_INT>,
+ <&clock_disp ACLK_DECON_INT>,
+ <&clock_disp SCLK_DECON_INT_ECLK>,
+ <&clock_disp SCLK_DECON_INT_EXTCLKPLL>;
+ clock-names = "pclk_decon0", "aclk_decon0", "decon0_eclk",
+ "decon0_vclk";
+ status = "disabled";
+ };
+
+Board specific DT entry:
+
+ decon@13930000 {
+ pinctrl-0 = <&lcd_clk &pwm1_out>;
+ pinctrl-names = "default";
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/video/exynos_dp.txt b/Documentation/devicetree/bindings/video/exynos_dp.txt
new file mode 100644
index 000000000..7a3a9cdb8
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos_dp.txt
@@ -0,0 +1,120 @@
+The Exynos display port interface should be configured based on
+the type of panel connected to it.
+
+We use two nodes:
+ -dp-controller node
+ -dptx-phy node(defined inside dp-controller node)
+
+For the DP-PHY initialization, we use the dptx-phy node.
+Required properties for dptx-phy: deprecated, use phys and phy-names
+ -reg: deprecated
+ Base address of DP PHY register.
+ -samsung,enable-mask: deprecated
+ The bit-mask used to enable/disable DP PHY.
+
+For the Panel initialization, we read data from dp-controller node.
+Required properties for dp-controller:
+ -compatible:
+ should be "samsung,exynos5-dp".
+ -reg:
+ physical base address of the controller and length
+ of memory mapped region.
+ -interrupts:
+ interrupt combiner values.
+ -clocks:
+ from common clock binding: handle to dp clock.
+ -clock-names:
+ from common clock binding: Shall be "dp".
+ -interrupt-parent:
+ phandle to Interrupt combiner node.
+ -phys:
+ from general PHY binding: the phandle for the PHY device.
+ -phy-names:
+ from general PHY binding: Should be "dp".
+ -samsung,color-space:
+ input video data format.
+ COLOR_RGB = 0, COLOR_YCBCR422 = 1, COLOR_YCBCR444 = 2
+ -samsung,dynamic-range:
+ dynamic range for input video data.
+ VESA = 0, CEA = 1
+ -samsung,ycbcr-coeff:
+ YCbCr co-efficients for input video.
+ COLOR_YCBCR601 = 0, COLOR_YCBCR709 = 1
+ -samsung,color-depth:
+ number of bits per colour component.
+ COLOR_6 = 0, COLOR_8 = 1, COLOR_10 = 2, COLOR_12 = 3
+ -samsung,link-rate:
+ link rate supported by the panel.
+ LINK_RATE_1_62GBPS = 0x6, LINK_RATE_2_70GBPS = 0x0A
+ -samsung,lane-count:
+ number of lanes supported by the panel.
+ LANE_COUNT1 = 1, LANE_COUNT2 = 2, LANE_COUNT4 = 4
+ - display-timings: timings for the connected panel as described by
+ Documentation/devicetree/bindings/video/display-timing.txt
+
+Optional properties for dp-controller:
+ -interlaced:
+ interlace scan mode.
+ Progressive if defined, Interlaced if not defined
+ -vsync-active-high:
+ VSYNC polarity configuration.
+ High if defined, Low if not defined
+ -hsync-active-high:
+ HSYNC polarity configuration.
+ High if defined, Low if not defined
+ -samsung,hpd-gpio:
+ Hotplug detect GPIO.
+ Indicates which GPIO should be used for hotplug
+ detection
+ -video interfaces: Device node can contain video interface port
+ nodes according to [1].
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+SOC specific portion:
+ dp-controller {
+ compatible = "samsung,exynos5-dp";
+ reg = <0x145b0000 0x10000>;
+ interrupts = <10 3>;
+ interrupt-parent = <&combiner>;
+ clocks = <&clock 342>;
+ clock-names = "dp";
+
+ phys = <&dp_phy>;
+ phy-names = "dp";
+ };
+
+Board Specific portion:
+ dp-controller {
+ samsung,color-space = <0>;
+ samsung,dynamic-range = <0>;
+ samsung,ycbcr-coeff = <0>;
+ samsung,color-depth = <1>;
+ samsung,link-rate = <0x0a>;
+ samsung,lane-count = <4>;
+
+ display-timings {
+ native-mode = <&lcd_timing>;
+ lcd_timing: 1366x768 {
+ clock-frequency = <70589280>;
+ hactive = <1366>;
+ vactive = <768>;
+ hfront-porch = <40>;
+ hback-porch = <40>;
+ hsync-len = <32>;
+ vback-porch = <10>;
+ vfront-porch = <12>;
+ vsync-len = <6>;
+ };
+ };
+
+ ports {
+ port@0 {
+ dp_out: endpoint {
+ remote-endpoint = <&bridge_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/exynos_dsim.txt b/Documentation/devicetree/bindings/video/exynos_dsim.txt
new file mode 100644
index 000000000..802aa7ef6
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos_dsim.txt
@@ -0,0 +1,84 @@
+Exynos MIPI DSI Master
+
+Required properties:
+ - compatible: value should be one of the following
+ "samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */
+ "samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */
+ "samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */
+ "samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */
+ - reg: physical base address and length of the registers set for the device
+ - interrupts: should contain DSI interrupt
+ - clocks: list of clock specifiers, must contain an entry for each required
+ entry in clock-names
+ - clock-names: should include "bus_clk"and "pll_clk" entries
+ - phys: list of phy specifiers, must contain an entry for each required
+ entry in phy-names
+ - phy-names: should include "dsim" entry
+ - vddcore-supply: MIPI DSIM Core voltage supply (e.g. 1.1V)
+ - vddio-supply: MIPI DSIM I/O and PLL voltage supply (e.g. 1.8V)
+ - samsung,pll-clock-frequency: specifies frequency of the "pll_clk" clock
+ - #address-cells, #size-cells: should be set respectively to <1> and <0>
+ according to DSI host bindings (see MIPI DSI bindings [1])
+
+Optional properties:
+ - power-domains: a phandle to DSIM power domain node
+
+Child nodes:
+ Should contain DSI peripheral nodes (see MIPI DSI bindings [1]).
+
+Video interfaces:
+ Device node can contain video interface port nodes according to [2].
+ The following are properties specific to those nodes:
+
+ port node:
+ - reg: (required) can be 0 for input RGB/I80 port or 1 for DSI port;
+
+ endpoint node of DSI port (reg = 1):
+ - samsung,burst-clock-frequency: specifies DSI frequency in high-speed burst
+ mode
+ - samsung,esc-clock-frequency: specifies DSI frequency in escape mode
+
+[1]: Documentation/devicetree/bindings/mipi/dsi/mipi-dsi-bus.txt
+[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+ dsi@11C80000 {
+ compatible = "samsung,exynos4210-mipi-dsi";
+ reg = <0x11C80000 0x10000>;
+ interrupts = <0 79 0>;
+ clocks = <&clock 286>, <&clock 143>;
+ clock-names = "bus_clk", "pll_clk";
+ phys = <&mipi_phy 1>;
+ phy-names = "dsim";
+ vddcore-supply = <&vusb_reg>;
+ vddio-supply = <&vmipi_reg>;
+ power-domains = <&pd_lcd0>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ samsung,pll-clock-frequency = <24000000>;
+
+ panel@1 {
+ reg = <0>;
+ ...
+ port {
+ panel_ep: endpoint {
+ remote-endpoint = <&dsi_ep>;
+ };
+ };
+ };
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@1 {
+ dsi_ep: endpoint {
+ reg = <0>;
+ samsung,burst-clock-frequency = <500000000>;
+ samsung,esc-clock-frequency = <20000000>;
+ remote-endpoint = <&panel_ep>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/exynos_hdmi.txt b/Documentation/devicetree/bindings/video/exynos_hdmi.txt
new file mode 100644
index 000000000..1fd8cf9cb
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos_hdmi.txt
@@ -0,0 +1,43 @@
+Device-Tree bindings for drm hdmi driver
+
+Required properties:
+- compatible: value should be one among the following:
+ 1) "samsung,exynos5-hdmi" <DEPRECATED>
+ 2) "samsung,exynos4210-hdmi"
+ 3) "samsung,exynos4212-hdmi"
+ 4) "samsung,exynos5420-hdmi"
+- reg: physical base address of the hdmi and length of memory mapped
+ region.
+- interrupts: interrupt number to the cpu.
+- hpd-gpio: following information about the hotplug gpio pin.
+ a) phandle of the gpio controller node.
+ b) pin number within the gpio controller.
+ c) optional flags and pull up/down.
+- clocks: list of clock IDs from SoC clock driver.
+ a) hdmi: Gate of HDMI IP bus clock.
+ b) sclk_hdmi: Gate of HDMI special clock.
+ c) sclk_pixel: Pixel special clock, one of the two possible inputs of
+ HDMI clock mux.
+ d) sclk_hdmiphy: HDMI PHY clock output, one of two possible inputs of
+ HDMI clock mux.
+ e) mout_hdmi: It is required by the driver to switch between the 2
+ parents i.e. sclk_pixel and sclk_hdmiphy. If hdmiphy is stable
+ after configuration, parent is set to sclk_hdmiphy else
+ sclk_pixel.
+- clock-names: aliases as per driver requirements for above clock IDs:
+ "hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy" and "mout_hdmi".
+- ddc: phandle to the hdmi ddc node
+- phy: phandle to the hdmi phy node
+- samsung,syscon-phandle: phandle for system controller node for PMU.
+
+Example:
+
+ hdmi {
+ compatible = "samsung,exynos4212-hdmi";
+ reg = <0x14530000 0x100000>;
+ interrupts = <0 95 0>;
+ hpd-gpio = <&gpx3 7 1>;
+ ddc = <&hdmi_ddc_node>;
+ phy = <&hdmi_phy_node>;
+ samsung,syscon-phandle = <&pmu_system_controller>;
+ };
diff --git a/Documentation/devicetree/bindings/video/exynos_hdmiddc.txt b/Documentation/devicetree/bindings/video/exynos_hdmiddc.txt
new file mode 100644
index 000000000..41eee9715
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos_hdmiddc.txt
@@ -0,0 +1,15 @@
+Device-Tree bindings for hdmiddc driver
+
+Required properties:
+- compatible: value should be one of the following
+ 1) "samsung,exynos5-hdmiddc" <DEPRECATED>
+ 2) "samsung,exynos4210-hdmiddc"
+
+- reg: I2C address of the hdmiddc device.
+
+Example:
+
+ hdmiddc {
+ compatible = "samsung,exynos4210-hdmiddc";
+ reg = <0x50>;
+ };
diff --git a/Documentation/devicetree/bindings/video/exynos_hdmiphy.txt b/Documentation/devicetree/bindings/video/exynos_hdmiphy.txt
new file mode 100644
index 000000000..162f641f7
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos_hdmiphy.txt
@@ -0,0 +1,15 @@
+Device-Tree bindings for hdmiphy driver
+
+Required properties:
+- compatible: value should be one of the following:
+ 1) "samsung,exynos5-hdmiphy" <DEPRECATED>
+ 2) "samsung,exynos4210-hdmiphy".
+ 3) "samsung,exynos4212-hdmiphy".
+- reg: I2C address of the hdmiphy device.
+
+Example:
+
+ hdmiphy {
+ compatible = "samsung,exynos4210-hdmiphy";
+ reg = <0x38>;
+ };
diff --git a/Documentation/devicetree/bindings/video/exynos_mixer.txt b/Documentation/devicetree/bindings/video/exynos_mixer.txt
new file mode 100644
index 000000000..3e38128f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/exynos_mixer.txt
@@ -0,0 +1,26 @@
+Device-Tree bindings for mixer driver
+
+Required properties:
+- compatible: value should be one of the following:
+ 1) "samsung,exynos5-mixer" <DEPRECATED>
+ 2) "samsung,exynos4210-mixer"
+ 3) "samsung,exynos4212-mixer"
+ 4) "samsung,exynos5250-mixer"
+ 5) "samsung,exynos5420-mixer"
+
+- reg: physical base address of the mixer and length of memory mapped
+ region.
+- interrupts: interrupt number to the cpu.
+- clocks: list of clock IDs from SoC clock driver.
+ a) mixer: Gate of Mixer IP bus clock.
+ b) sclk_hdmi: HDMI Special clock, one of the two possible inputs of
+ mixer mux.
+ c) hdmi: Gate of HDMI IP bus clock, needed together with sclk_hdmi.
+
+Example:
+
+ mixer {
+ compatible = "samsung,exynos5250-mixer";
+ reg = <0x14450000 0x10000>;
+ interrupts = <0 94 0>;
+ };
diff --git a/Documentation/devicetree/bindings/video/fsl,imx-fb.txt b/Documentation/devicetree/bindings/video/fsl,imx-fb.txt
new file mode 100644
index 000000000..8c8c2f4e4
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/fsl,imx-fb.txt
@@ -0,0 +1,55 @@
+Freescale imx21 Framebuffer
+
+This framebuffer driver supports devices imx1, imx21, imx25, and imx27.
+
+Required properties:
+- compatible : "fsl,<chip>-fb", chip should be imx1 or imx21
+- reg : Should contain 1 register ranges(address and length)
+- interrupts : One interrupt of the fb dev
+
+Required nodes:
+- display: Phandle to a display node as described in
+ Documentation/devicetree/bindings/video/display-timing.txt
+ Additional, the display node has to define properties:
+ - bits-per-pixel: Bits per pixel
+ - fsl,pcr: LCDC PCR value
+
+Optional properties:
+- lcd-supply: Regulator for LCD supply voltage.
+- fsl,dmacr: DMA Control Register value. This is optional. By default, the
+ register is not modified as recommended by the datasheet.
+- fsl,lpccr: Contrast Control Register value. This property provides the
+ default value for the contrast control register.
+ If that property is omitted, the register is zeroed.
+- fsl,lscr1: LCDC Sharp Configuration Register value.
+
+Example:
+
+ imxfb: fb@10021000 {
+ compatible = "fsl,imx21-fb";
+ interrupts = <61>;
+ reg = <0x10021000 0x1000>;
+ display = <&display0>;
+ };
+
+ ...
+
+ display0: display0 {
+ model = "Primeview-PD050VL1";
+ native-mode = <&timing_disp0>;
+ bits-per-pixel = <16>;
+ fsl,pcr = <0xf0c88080>; /* non-standard but required */
+ display-timings {
+ timing_disp0: 640x480 {
+ hactive = <640>;
+ vactive = <480>;
+ hback-porch = <112>;
+ hfront-porch = <36>;
+ hsync-len = <32>;
+ vback-porch = <33>;
+ vfront-porch = <33>;
+ vsync-len = <2>;
+ clock-frequency = <25000000>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/hdmi-connector.txt b/Documentation/devicetree/bindings/video/hdmi-connector.txt
new file mode 100644
index 000000000..acd5668b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/hdmi-connector.txt
@@ -0,0 +1,29 @@
+HDMI Connector
+==============
+
+Required properties:
+- compatible: "hdmi-connector"
+- type: the HDMI connector type: "a", "b", "c", "d" or "e"
+
+Optional properties:
+- label: a symbolic name for the connector
+- hpd-gpios: HPD GPIO number
+
+Required nodes:
+- Video port for HDMI input
+
+Example
+-------
+
+hdmi0: connector@1 {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+
+ type = "a";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&tpd12s015_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/lgphilips,lb035q02.txt b/Documentation/devicetree/bindings/video/lgphilips,lb035q02.txt
new file mode 100644
index 000000000..1a1e653e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/lgphilips,lb035q02.txt
@@ -0,0 +1,33 @@
+LG.Philips LB035Q02 Panel
+=========================
+
+Required properties:
+- compatible: "lgphilips,lb035q02"
+- enable-gpios: panel enable gpio
+
+Optional properties:
+- label: a symbolic name for the panel
+
+Required nodes:
+- Video port for DPI input
+
+Example
+-------
+
+lcd-panel: panel@0 {
+ compatible = "lgphilips,lb035q02";
+ reg = <0>;
+ spi-max-frequency = <100000>;
+ spi-cpol;
+ spi-cpha;
+
+ label = "lcd";
+
+ enable-gpios = <&gpio7 7 0>;
+
+ port {
+ lcd_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/panel-dpi.txt b/Documentation/devicetree/bindings/video/panel-dpi.txt
new file mode 100644
index 000000000..a40180b05
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/panel-dpi.txt
@@ -0,0 +1,45 @@
+Generic MIPI DPI Panel
+======================
+
+Required properties:
+- compatible: "panel-dpi"
+
+Optional properties:
+- label: a symbolic name for the panel
+- enable-gpios: panel enable gpio
+
+Required nodes:
+- "panel-timing" containing video timings
+ (Documentation/devicetree/bindings/video/display-timing.txt)
+- Video port for DPI input
+
+Example
+-------
+
+lcd0: display@0 {
+ compatible = "samsung,lte430wq-f0c", "panel-dpi";
+ label = "lcd";
+
+ port {
+ lcd_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ panel-timing {
+ clock-frequency = <9200000>;
+ hactive = <480>;
+ vactive = <272>;
+ hfront-porch = <8>;
+ hback-porch = <4>;
+ hsync-len = <41>;
+ vback-porch = <2>;
+ vfront-porch = <4>;
+ vsync-len = <10>;
+
+ hsync-active = <0>;
+ vsync-active = <0>;
+ de-active = <1>;
+ pixelclk-active = <1>;
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/panel-dsi-cm.txt b/Documentation/devicetree/bindings/video/panel-dsi-cm.txt
new file mode 100644
index 000000000..dce48eb9d
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/panel-dsi-cm.txt
@@ -0,0 +1,29 @@
+Generic MIPI DSI Command Mode Panel
+===================================
+
+Required properties:
+- compatible: "panel-dsi-cm"
+
+Optional properties:
+- label: a symbolic name for the panel
+- reset-gpios: panel reset gpio
+- te-gpios: panel TE gpio
+
+Required nodes:
+- Video port for DSI input
+
+Example
+-------
+
+lcd0: display {
+ compatible = "tpo,taal", "panel-dsi-cm";
+ label = "lcd0";
+
+ reset-gpios = <&gpio4 6 GPIO_ACTIVE_HIGH>;
+
+ port {
+ lcd0_in: endpoint {
+ remote-endpoint = <&dsi1_out_ep>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/renesas,du.txt b/Documentation/devicetree/bindings/video/renesas,du.txt
new file mode 100644
index 000000000..c90232392
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/renesas,du.txt
@@ -0,0 +1,88 @@
+* Renesas R-Car Display Unit (DU)
+
+Required Properties:
+
+ - compatible: must be one of the following.
+ - "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
+ - "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
+ - "renesas,du-r8a7791" for R8A7791 (R-Car M2) compatible DU
+
+ - reg: A list of base address and length of each memory resource, one for
+ each entry in the reg-names property.
+ - reg-names: Name of the memory resources. The DU requires one memory
+ resource for the DU core (named "du") and one memory resource for each
+ LVDS encoder (named "lvds.x" with "x" being the LVDS controller numerical
+ index).
+
+ - interrupt-parent: phandle of the parent interrupt controller.
+ - interrupts: Interrupt specifiers for the DU interrupts.
+
+ - clocks: A list of phandles + clock-specifier pairs, one for each entry in
+ the clock-names property.
+ - clock-names: Name of the clocks. This property is model-dependent.
+ - R8A7779 uses a single functional clock. The clock doesn't need to be
+ named.
+ - R8A7790 and R8A7791 use one functional clock per channel and one clock
+ per LVDS encoder. The functional clocks must be named "du.x" with "x"
+ being the channel numerical index. The LVDS clocks must be named
+ "lvds.x" with "x" being the LVDS encoder numerical index.
+ - In addition to the functional and encoder clocks, all DU versions also
+ support externally supplied pixel clocks. Those clocks are optional.
+ When supplied they must be named "dclkin.x" with "x" being the input
+ clock numerical index.
+
+Required nodes:
+
+The connections to the DU output video ports are modeled using the OF graph
+bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+The following table lists for each supported model the port number
+corresponding to each DU output.
+
+ Port 0 Port1 Port2
+-----------------------------------------------------------------------------
+ R8A7779 (H1) DPAD 0 DPAD 1 -
+ R8A7790 (H2) DPAD LVDS 0 LVDS 1
+ R8A7791 (M2) DPAD LVDS 0 -
+
+
+Example: R8A7790 (R-Car H2) DU
+
+ du: du@feb00000 {
+ compatible = "renesas,du-r8a7790";
+ reg = <0 0xfeb00000 0 0x70000>,
+ <0 0xfeb90000 0 0x1c>,
+ <0 0xfeb94000 0 0x1c>;
+ reg-names = "du", "lvds.0", "lvds.1";
+ interrupt-parent = <&gic>;
+ interrupts = <0 256 IRQ_TYPE_LEVEL_HIGH>,
+ <0 268 IRQ_TYPE_LEVEL_HIGH>,
+ <0 269 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&mstp7_clks R8A7790_CLK_DU0>,
+ <&mstp7_clks R8A7790_CLK_DU1>,
+ <&mstp7_clks R8A7790_CLK_DU2>,
+ <&mstp7_clks R8A7790_CLK_LVDS0>,
+ <&mstp7_clks R8A7790_CLK_LVDS1>;
+ clock-names = "du.0", "du.1", "du.2", "lvds.0", "lvds.1";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ du_out_rgb: endpoint {
+ };
+ };
+ port@1 {
+ reg = <1>;
+ du_out_lvds0: endpoint {
+ };
+ };
+ port@2 {
+ reg = <2>;
+ du_out_lvds1: endpoint {
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/rockchip-drm.txt b/Documentation/devicetree/bindings/video/rockchip-drm.txt
new file mode 100644
index 000000000..7fff58249
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/rockchip-drm.txt
@@ -0,0 +1,19 @@
+Rockchip DRM master device
+================================
+
+The Rockchip DRM master device is a virtual device needed to list all
+vop devices or other display interface nodes that comprise the
+graphics subsystem.
+
+Required properties:
+- compatible: Should be "rockchip,display-subsystem"
+- ports: Should contain a list of phandles pointing to display interface port
+ of vop devices. vop definitions as defined in
+ Documentation/devicetree/bindings/video/rockchip-vop.txt
+
+example:
+
+display-subsystem {
+ compatible = "rockchip,display-subsystem";
+ ports = <&vopl_out>, <&vopb_out>;
+};
diff --git a/Documentation/devicetree/bindings/video/rockchip-vop.txt b/Documentation/devicetree/bindings/video/rockchip-vop.txt
new file mode 100644
index 000000000..d15351f23
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/rockchip-vop.txt
@@ -0,0 +1,58 @@
+device-tree bindings for rockchip soc display controller (vop)
+
+VOP (Visual Output Processor) is the Display Controller for the Rockchip
+series of SoCs which transfers the image data from a video memory
+buffer to an external LCD interface.
+
+Required properties:
+- compatible: value should be one of the following
+ "rockchip,rk3288-vop";
+
+- interrupts: should contain a list of all VOP IP block interrupts in the
+ order: VSYNC, LCD_SYSTEM. The interrupt specifier
+ format depends on the interrupt controller used.
+
+- clocks: must include clock specifiers corresponding to entries in the
+ clock-names property.
+
+- clock-names: Must contain
+ aclk_vop: for ddr buffer transfer.
+ hclk_vop: for ahb bus to R/W the phy regs.
+ dclk_vop: pixel clock.
+
+- resets: Must contain an entry for each entry in reset-names.
+ See ../reset/reset.txt for details.
+- reset-names: Must include the following entries:
+ - axi
+ - ahb
+ - dclk
+
+- iommus: required a iommu node
+
+- port: A port node with endpoint definitions as defined in
+ Documentation/devicetree/bindings/media/video-interfaces.txt.
+
+Example:
+SoC specific DT entry:
+ vopb: vopb@ff930000 {
+ compatible = "rockchip,rk3288-vop";
+ reg = <0xff930000 0x19c>;
+ interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&cru ACLK_VOP0>, <&cru DCLK_VOP0>, <&cru HCLK_VOP0>;
+ clock-names = "aclk_vop", "dclk_vop", "hclk_vop";
+ resets = <&cru SRST_LCDC1_AXI>, <&cru SRST_LCDC1_AHB>, <&cru SRST_LCDC1_DCLK>;
+ reset-names = "axi", "ahb", "dclk";
+ iommus = <&vopb_mmu>;
+ vopb_out: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ vopb_out_edp: endpoint@0 {
+ reg = <0>;
+ remote-endpoint=<&edp_in_vopb>;
+ };
+ vopb_out_hdmi: endpoint@1 {
+ reg = <1>;
+ remote-endpoint=<&hdmi_in_vopb>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/samsung-fimd.txt b/Documentation/devicetree/bindings/video/samsung-fimd.txt
new file mode 100644
index 000000000..a8bbbde03
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/samsung-fimd.txt
@@ -0,0 +1,110 @@
+Device-Tree bindings for Samsung SoC display controller (FIMD)
+
+FIMD (Fully Interactive Mobile Display) is the Display Controller for the
+Samsung series of SoCs which transfers the image data from a video memory
+buffer to an external LCD interface.
+
+Required properties:
+- compatible: value should be one of the following
+ "samsung,s3c2443-fimd"; /* for S3C24XX SoCs */
+ "samsung,s3c6400-fimd"; /* for S3C64XX SoCs */
+ "samsung,s5pv210-fimd"; /* for S5PV210 SoC */
+ "samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */
+ "samsung,exynos4210-fimd"; /* for Exynos4 SoCs */
+ "samsung,exynos4415-fimd"; /* for Exynos4415 SoC */
+ "samsung,exynos5250-fimd"; /* for Exynos5 SoCs */
+
+- reg: physical base address and length of the FIMD registers set.
+
+- interrupt-parent: should be the phandle of the fimd controller's
+ parent interrupt controller.
+
+- interrupts: should contain a list of all FIMD IP block interrupts in the
+ order: FIFO Level, VSYNC, LCD_SYSTEM. The interrupt specifier
+ format depends on the interrupt controller used.
+
+- interrupt-names: should contain the interrupt names: "fifo", "vsync",
+ "lcd_sys", in the same order as they were listed in the interrupts
+ property.
+
+- pinctrl-0: pin control group to be used for this controller.
+
+- pinctrl-names: must contain a "default" entry.
+
+- clocks: must include clock specifiers corresponding to entries in the
+ clock-names property.
+
+- clock-names: list of clock names sorted in the same order as the clocks
+ property. Must contain "sclk_fimd" and "fimd".
+
+Optional Properties:
+- power-domains: a phandle to FIMD power domain node.
+- samsung,invert-vden: video enable signal is inverted
+- samsung,invert-vclk: video clock signal is inverted
+- display-timings: timing settings for FIMD, as described in document [1].
+ Can be used in case timings cannot be provided otherwise
+ or to override timings provided by the panel.
+- samsung,sysreg: handle to syscon used to control the system registers
+- i80-if-timings: timing configuration for lcd i80 interface support.
+ - cs-setup: clock cycles for the active period of address signal is enabled
+ until chip select is enabled.
+ If not specified, the default value(0) will be used.
+ - wr-setup: clock cycles for the active period of CS signal is enabled until
+ write signal is enabled.
+ If not specified, the default value(0) will be used.
+ - wr-active: clock cycles for the active period of CS is enabled.
+ If not specified, the default value(1) will be used.
+ - wr-hold: clock cycles for the active period of CS is disabled until write
+ signal is disabled.
+ If not specified, the default value(0) will be used.
+
+ The parameters are defined as:
+
+ VCLK(internal) __|??????|_____|??????|_____|??????|_____|??????|_____|??
+ : : : : :
+ Address Output --:<XXXXXXXXXXX:XXXXXXXXXXXX:XXXXXXXXXXXX:XXXXXXXXXXXX:XX
+ | cs-setup+1 | : : :
+ |<---------->| : : :
+ Chip Select ???????????????|____________:____________:____________|??
+ | wr-setup+1 | | wr-hold+1 |
+ |<---------->| |<---------->|
+ Write Enable ????????????????????????????|____________|???????????????
+ | wr-active+1|
+ |<---------->|
+ Video Data ----------------------------<XXXXXXXXXXXXXXXXXXXXXXXXX>--
+
+The device node can contain 'port' child nodes according to the bindings defined
+in [2]. The following are properties specific to those nodes:
+- reg: (required) port index, can be:
+ 0 - for CAMIF0 input,
+ 1 - for CAMIF1 input,
+ 2 - for CAMIF2 input,
+ 3 - for parallel output,
+ 4 - for write-back interface
+
+[1]: Documentation/devicetree/bindings/video/display-timing.txt
+[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+SoC specific DT entry:
+
+ fimd@11c00000 {
+ compatible = "samsung,exynos4210-fimd";
+ interrupt-parent = <&combiner>;
+ reg = <0x11c00000 0x20000>;
+ interrupt-names = "fifo", "vsync", "lcd_sys";
+ interrupts = <11 0>, <11 1>, <11 2>;
+ clocks = <&clock 140>, <&clock 283>;
+ clock-names = "sclk_fimd", "fimd";
+ power-domains = <&pd_lcd0>;
+ status = "disabled";
+ };
+
+Board specific DT entry:
+
+ fimd@11c00000 {
+ pinctrl-0 = <&lcd_clk &lcd_data24 &pwm1_out>;
+ pinctrl-names = "default";
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/video/sharp,ls037v7dw01.txt b/Documentation/devicetree/bindings/video/sharp,ls037v7dw01.txt
new file mode 100644
index 000000000..0cc8981e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/sharp,ls037v7dw01.txt
@@ -0,0 +1,43 @@
+SHARP LS037V7DW01 TFT-LCD panel
+===================================
+
+Required properties:
+- compatible: "sharp,ls037v7dw01"
+
+Optional properties:
+- label: a symbolic name for the panel
+- enable-gpios: a GPIO spec for the optional enable pin.
+ This pin is the INI pin as specified in the LS037V7DW01.pdf file.
+- reset-gpios: a GPIO spec for the optional reset pin.
+ This pin is the RESB pin as specified in the LS037V7DW01.pdf file.
+- mode-gpios: a GPIO
+ ordered MO, LR, and UD as specified in the LS037V7DW01.pdf file.
+
+Required nodes:
+- Video port for DPI input
+
+This panel can have zero to five GPIOs to configure to change configuration
+between QVGA and VGA mode and the scan direction. As these pins can be also
+configured with external pulls, all the GPIOs are considered optional with holes
+in the array.
+
+Example
+-------
+
+Example when connected to a omap2+ based device:
+
+lcd0: display {
+ compatible = "sharp,ls037v7dw01";
+ power-supply = <&lcd_3v3>;
+ enable-gpios = <&gpio5 24 GPIO_ACTIVE_HIGH>; /* gpio152, lcd INI */
+ reset-gpios = <&gpio5 27 GPIO_ACTIVE_HIGH>; /* gpio155, lcd RESB */
+ mode-gpios = <&gpio5 26 GPIO_ACTIVE_HIGH /* gpio154, lcd MO */
+ &gpio1 2 GPIO_ACTIVE_HIGH /* gpio2, lcd LR */
+ &gpio1 3 GPIO_ACTIVE_HIGH>; /* gpio3, lcd UD */
+
+ port {
+ lcd_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/simple-framebuffer-sunxi.txt b/Documentation/devicetree/bindings/video/simple-framebuffer-sunxi.txt
new file mode 100644
index 000000000..c46ba641a
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/simple-framebuffer-sunxi.txt
@@ -0,0 +1,33 @@
+Sunxi specific Simple Framebuffer bindings
+
+This binding documents sunxi specific extensions to the simple-framebuffer
+bindings. The sunxi simplefb u-boot code relies on the devicetree containing
+pre-populated simplefb nodes.
+
+These extensions are intended so that u-boot can select the right node based
+on which pipeline is being used. As such they are solely intended for
+firmware / bootloader use, and the OS should ignore them.
+
+Required properties:
+- compatible: "allwinner,simple-framebuffer"
+- allwinner,pipeline, one of:
+ "de_be0-lcd0"
+ "de_be1-lcd1"
+ "de_be0-lcd0-hdmi"
+ "de_be1-lcd1-hdmi"
+
+Example:
+
+chosen {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ framebuffer@0 {
+ compatible = "allwinner,simple-framebuffer", "simple-framebuffer";
+ allwinner,pipeline = "de_be0-lcd0-hdmi";
+ clocks = <&pll5 1>, <&ahb_gates 36>, <&ahb_gates 43>,
+ <&ahb_gates 44>;
+ status = "disabled";
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/simple-framebuffer.txt b/Documentation/devicetree/bindings/video/simple-framebuffer.txt
new file mode 100644
index 000000000..4474ef6e0
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/simple-framebuffer.txt
@@ -0,0 +1,86 @@
+Simple Framebuffer
+
+A simple frame-buffer describes a frame-buffer setup by firmware or
+the bootloader, with the assumption that the display hardware has already
+been set up to scan out from the memory pointed to by the reg property.
+
+Since simplefb nodes represent runtime information they must be sub-nodes of
+the chosen node (*). Simplefb nodes must be named "framebuffer@<address>".
+
+If the devicetree contains nodes for the display hardware used by a simplefb,
+then the simplefb node must contain a property called "display", which
+contains a phandle pointing to the primary display hw node, so that the OS
+knows which simplefb to disable when handing over control to a driver for the
+real hardware. The bindings for the hw nodes must specify which node is
+considered the primary node.
+
+It is advised to add display# aliases to help the OS determine how to number
+things. If display# aliases are used, then if the simplefb node contains a
+"display" property then the /aliases/display# path must point to the display
+hw node the "display" property points to, otherwise it must point directly
+to the simplefb node.
+
+If a simplefb node represents the preferred console for user interaction,
+then the chosen node's stdout-path property should point to it, or to the
+primary display hw node, as with display# aliases. If display aliases are
+used then it should be set to the alias instead.
+
+It is advised that devicetree files contain pre-filled, disabled framebuffer
+nodes, so that the firmware only needs to update the mode information and
+enable them. This way if e.g. later on support for more display clocks get
+added, the simplefb nodes will already contain this info and the firmware
+does not need to be updated.
+
+If pre-filled framebuffer nodes are used, the firmware may need extra
+information to find the right node. In that case an extra platform specific
+compatible and platform specific properties should be used and documented,
+see e.g. simple-framebuffer-sunxi.txt .
+
+Required properties:
+- compatible: "simple-framebuffer"
+- reg: Should contain the location and size of the framebuffer memory.
+- width: The width of the framebuffer in pixels.
+- height: The height of the framebuffer in pixels.
+- stride: The number of bytes in each line of the framebuffer.
+- format: The format of the framebuffer surface. Valid values are:
+ - r5g6b5 (16-bit pixels, d[15:11]=r, d[10:5]=g, d[4:0]=b).
+ - a8b8g8r8 (32-bit pixels, d[31:24]=a, d[23:16]=b, d[15:8]=g, d[7:0]=r).
+
+Optional properties:
+- clocks : List of clocks used by the framebuffer. Clocks listed here
+ are expected to already be configured correctly. The OS must
+ ensure these clocks are not modified or disabled while the
+ simple framebuffer remains active.
+- display : phandle pointing to the primary display hardware node
+
+Example:
+
+aliases {
+ display0 = &lcdc0;
+}
+
+chosen {
+ framebuffer0: framebuffer@1d385000 {
+ compatible = "simple-framebuffer";
+ reg = <0x1d385000 (1600 * 1200 * 2)>;
+ width = <1600>;
+ height = <1200>;
+ stride = <(1600 * 2)>;
+ format = "r5g6b5";
+ clocks = <&ahb_gates 36>, <&ahb_gates 43>, <&ahb_gates 44>;
+ display = <&lcdc0>;
+ };
+ stdout-path = "display0";
+};
+
+soc@01c00000 {
+ lcdc0: lcdc@1c0c000 {
+ compatible = "allwinner,sun4i-a10-lcdc";
+ ...
+ };
+};
+
+
+*) Older devicetree files may have a compatible = "simple-framebuffer" node
+in a different place, operating systems must first enumerate any compatible
+nodes found under chosen and then check for other compatible nodes.
diff --git a/Documentation/devicetree/bindings/video/sony,acx565akm.txt b/Documentation/devicetree/bindings/video/sony,acx565akm.txt
new file mode 100644
index 000000000..e12333280
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/sony,acx565akm.txt
@@ -0,0 +1,30 @@
+Sony ACX565AKM SDI Panel
+========================
+
+Required properties:
+- compatible: "sony,acx565akm"
+
+Optional properties:
+- label: a symbolic name for the panel
+- reset-gpios: panel reset gpio
+
+Required nodes:
+- Video port for SDI input
+
+Example
+-------
+
+acx565akm@2 {
+ compatible = "sony,acx565akm";
+ spi-max-frequency = <6000000>;
+ reg = <2>;
+
+ label = "lcd";
+ reset-gpios = <&gpio3 26 GPIO_ACTIVE_HIGH>; /* 90 */
+
+ port {
+ lcd_in: endpoint {
+ remote-endpoint = <&sdi_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/ssd1289fb.txt b/Documentation/devicetree/bindings/video/ssd1289fb.txt
new file mode 100644
index 000000000..4fcd5e68c
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ssd1289fb.txt
@@ -0,0 +1,13 @@
+* Solomon SSD1289 Framebuffer Driver
+
+Required properties:
+ - compatible: Should be "solomon,ssd1289fb". The only supported bus for
+ now is lbc.
+ - reg: Should contain address of the controller on the LBC bus. The detail
+ was described in Documentation/devicetree/bindings/powerpc/fsl/lbc.txt
+
+Examples:
+display@2,0 {
+ compatible = "solomon,ssd1289fb";
+ reg = <0x2 0x0000 0x0004>;
+};
diff --git a/Documentation/devicetree/bindings/video/ssd1307fb.txt b/Documentation/devicetree/bindings/video/ssd1307fb.txt
new file mode 100644
index 000000000..7a125427f
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ssd1307fb.txt
@@ -0,0 +1,28 @@
+* Solomon SSD1307 Framebuffer Driver
+
+Required properties:
+ - compatible: Should be "solomon,<chip>fb-<bus>". The only supported bus for
+ now is i2c, and the supported chips are ssd1306 and ssd1307.
+ - reg: Should contain address of the controller on the I2C bus. Most likely
+ 0x3c or 0x3d
+ - pwm: Should contain the pwm to use according to the OF device tree PWM
+ specification [0]. Only required for the ssd1307.
+ - reset-gpios: Should contain the GPIO used to reset the OLED display
+ - solomon,height: Height in pixel of the screen driven by the controller
+ - solomon,width: Width in pixel of the screen driven by the controller
+ - solomon,page-offset: Offset of pages (band of 8 pixels) that the screen is
+ mapped to.
+
+Optional properties:
+ - reset-active-low: Is the reset gpio is active on physical low?
+
+[0]: Documentation/devicetree/bindings/pwm/pwm.txt
+
+Examples:
+ssd1307: oled@3c {
+ compatible = "solomon,ssd1307fb-i2c";
+ reg = <0x3c>;
+ pwms = <&pwm 4 3000>;
+ reset-gpios = <&gpio2 7>;
+ reset-active-low;
+};
diff --git a/Documentation/devicetree/bindings/video/thine,thc63lvdm83d b/Documentation/devicetree/bindings/video/thine,thc63lvdm83d
new file mode 100644
index 000000000..527e236e9
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/thine,thc63lvdm83d
@@ -0,0 +1,50 @@
+THine Electronics THC63LVDM83D LVDS serializer
+----------------------------------------------
+
+The THC63LVDM83D is an LVDS serializer designed to support pixel data
+transmission between a host and a flat panel.
+
+Required properties:
+
+- compatible: Should be "thine,thc63lvdm83d"
+
+Optional properties:
+
+- pwdn-gpios: Power down control GPIO
+
+Required nodes:
+
+The THC63LVDM83D has two video ports. Their connections are modeled using the
+OFgraph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 for CMOS/TTL input
+- Video port 1 for LVDS output
+
+
+Example
+-------
+
+ lvds_enc: encoder@0 {
+ compatible = "thine,thc63lvdm83d";
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ lvds_enc_in: endpoint@0 {
+ remote-endpoint = <&rgb_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ lvds_enc_out: endpoint@0 {
+ remote-endpoint = <&panel_in>;
+ };
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/video/ti,dra7-dss.txt b/Documentation/devicetree/bindings/video/ti,dra7-dss.txt
new file mode 100644
index 000000000..f33a05137
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,dra7-dss.txt
@@ -0,0 +1,69 @@
+Texas Instruments DRA7x Display Subsystem
+=========================================
+
+See Documentation/devicetree/bindings/video/ti,omap-dss.txt for generic
+description about OMAP Display Subsystem bindings.
+
+DSS Core
+--------
+
+Required properties:
+- compatible: "ti,dra7-dss"
+- reg: address and length of the register spaces for 'dss'
+- ti,hwmods: "dss_core"
+- clocks: handle to fclk
+- clock-names: "fck"
+- syscon: phandle to control module core syscon node
+
+Optional properties:
+
+Some DRA7xx SoCs have one dedicated video PLL, some have two. These properties
+can be used to describe the video PLLs:
+
+- reg: address and length of the register spaces for 'pll1_clkctrl',
+ 'pll1', 'pll2_clkctrl', 'pll2'
+- clocks: handle to video1 pll clock and video2 pll clock
+- clock-names: "video1_clk" and "video2_clk"
+
+Required nodes:
+- DISPC
+
+Optional nodes:
+- DSS Submodules: HDMI
+- Video port for DPI output
+
+DPI Endpoint required properties:
+- data-lines: number of lines used
+
+
+DISPC
+-----
+
+Required properties:
+- compatible: "ti,dra7-dispc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_dispc"
+- interrupts: the DISPC interrupt
+- clocks: handle to fclk
+- clock-names: "fck"
+
+HDMI
+----
+
+Required properties:
+- compatible: "ti,dra7-hdmi"
+- reg: addresses and lengths of the register spaces for 'wp', 'pll', 'phy',
+ 'core'
+- reg-names: "wp", "pll", "phy", "core"
+- interrupts: the HDMI interrupt line
+- ti,hwmods: "dss_hdmi"
+- vdda-supply: vdda power supply
+- clocks: handles to fclk and pll clock
+- clock-names: "fck", "sys_clk"
+
+Optional nodes:
+- Video port for HDMI output
+
+HDMI Endpoint optional properties:
+- lanes: list of 8 pin numbers for the HDMI lanes: CLK+, CLK-, D0+, D0-,
+ D1+, D1-, D2+, D2-. (default: 0,1,2,3,4,5,6,7)
diff --git a/Documentation/devicetree/bindings/video/ti,omap-dss.txt b/Documentation/devicetree/bindings/video/ti,omap-dss.txt
new file mode 100644
index 000000000..e1ef29569
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,omap-dss.txt
@@ -0,0 +1,211 @@
+Texas Instruments OMAP Display Subsystem
+========================================
+
+Generic Description
+-------------------
+
+This document is a generic description of the OMAP Display Subsystem bindings.
+Binding details for each OMAP SoC version are described in respective binding
+documentation.
+
+The OMAP Display Subsystem (DSS) hardware consists of DSS Core, DISPC module and
+a number of encoder modules. All DSS versions contain DSS Core and DISPC, but
+the encoder modules vary.
+
+The DSS Core is the parent of the other DSS modules, and manages clock routing,
+integration to the SoC, etc.
+
+DISPC is the display controller, which reads pixels from the memory and outputs
+a RGB pixel stream to encoders.
+
+The encoder modules encode the received RGB pixel stream to a video output like
+HDMI, MIPI DPI, etc.
+
+Video Ports
+-----------
+
+The DSS Core and the encoders have video port outputs. The structure of the
+video ports is described in Documentation/devicetree/bindings/graph.txt,
+and the properties for the ports and endpoints for each encoder are
+described in the SoC's DSS binding documentation.
+
+The video ports are used to describe the connections to external hardware, like
+panels or external encoders.
+
+Aliases
+-------
+
+The board dts file may define aliases for displays to assign "displayX" style
+name for each display. If no aliases are defined, a semi-random number is used
+for the display.
+
+Example
+-------
+
+A shortened example of the DSS description for OMAP4, with non-relevant parts
+removed, defined in omap4.dtsi:
+
+dss: dss@58000000 {
+ compatible = "ti,omap4-dss";
+ reg = <0x58000000 0x80>;
+ status = "disabled";
+ ti,hwmods = "dss_core";
+ clocks = <&dss_dss_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ dispc@58001000 {
+ compatible = "ti,omap4-dispc";
+ reg = <0x58001000 0x1000>;
+ interrupts = <GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>;
+ ti,hwmods = "dss_dispc";
+ clocks = <&dss_dss_clk>;
+ clock-names = "fck";
+ };
+
+ hdmi: encoder@58006000 {
+ compatible = "ti,omap4-hdmi";
+ reg = <0x58006000 0x200>,
+ <0x58006200 0x100>,
+ <0x58006300 0x100>,
+ <0x58006400 0x1000>;
+ reg-names = "wp", "pll", "phy", "core";
+ interrupts = <GIC_SPI 101 IRQ_TYPE_LEVEL_HIGH>;
+ status = "disabled";
+ ti,hwmods = "dss_hdmi";
+ clocks = <&dss_48mhz_clk>, <&dss_sys_clk>;
+ clock-names = "fck", "sys_clk";
+ };
+};
+
+A shortened example of the board description for OMAP4 Panda board, defined in
+omap4-panda.dts.
+
+The Panda board has a DVI and a HDMI connector, and the board contains a TFP410
+chip (MIPI DPI to DVI encoder) and a TPD12S015 chip (HDMI ESD protection & level
+shifter). The video pipelines for the connectors are formed as follows:
+
+DSS Core --(MIPI DPI)--> TFP410 --(DVI)--> DVI Connector
+OMAP HDMI --(HDMI)--> TPD12S015 --(HDMI)--> HDMI Connector
+
+/ {
+ aliases {
+ display0 = &dvi0;
+ display1 = &hdmi0;
+ };
+
+ tfp410: encoder@0 {
+ compatible = "ti,tfp410";
+ gpios = <&gpio1 0 GPIO_ACTIVE_LOW>; /* 0, power-down */
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&tfp410_pins>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tfp410_in: endpoint@0 {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ tfp410_out: endpoint@0 {
+ remote-endpoint = <&dvi_connector_in>;
+ };
+ };
+ };
+ };
+
+ dvi0: connector@0 {
+ compatible = "dvi-connector";
+ label = "dvi";
+
+ i2c-bus = <&i2c3>;
+
+ port {
+ dvi_connector_in: endpoint {
+ remote-endpoint = <&tfp410_out>;
+ };
+ };
+ };
+
+ tpd12s015: encoder@1 {
+ compatible = "ti,tpd12s015";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&tpd12s015_pins>;
+
+ gpios = <&gpio2 28 GPIO_ACTIVE_HIGH>, /* 60, CT CP HPD */
+ <&gpio2 9 GPIO_ACTIVE_HIGH>, /* 41, LS OE */
+ <&gpio2 31 GPIO_ACTIVE_HIGH>; /* 63, HPD */
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tpd12s015_in: endpoint@0 {
+ remote-endpoint = <&hdmi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ tpd12s015_out: endpoint@0 {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+ };
+
+ hdmi0: connector@1 {
+ compatible = "hdmi-connector";
+ label = "hdmi";
+
+ port {
+ hdmi_connector_in: endpoint {
+ remote-endpoint = <&tpd12s015_out>;
+ };
+ };
+ };
+};
+
+&dss {
+ status = "ok";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&dss_dpi_pins>;
+
+ port {
+ dpi_out: endpoint {
+ remote-endpoint = <&tfp410_in>;
+ data-lines = <24>;
+ };
+ };
+};
+
+&hdmi {
+ status = "ok";
+ vdda-supply = <&vdac>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&dss_hdmi_pins>;
+
+ port {
+ hdmi_out: endpoint {
+ remote-endpoint = <&tpd12s015_in>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/ti,omap2-dss.txt b/Documentation/devicetree/bindings/video/ti,omap2-dss.txt
new file mode 100644
index 000000000..fa8bb2ed1
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,omap2-dss.txt
@@ -0,0 +1,54 @@
+Texas Instruments OMAP2 Display Subsystem
+=========================================
+
+See Documentation/devicetree/bindings/video/ti,omap-dss.txt for generic
+description about OMAP Display Subsystem bindings.
+
+DSS Core
+--------
+
+Required properties:
+- compatible: "ti,omap2-dss"
+- reg: address and length of the register space
+- ti,hwmods: "dss_core"
+
+Optional nodes:
+- Video port for DPI output
+
+DPI Endpoint required properties:
+- data-lines: number of lines used
+
+
+DISPC
+-----
+
+Required properties:
+- compatible: "ti,omap2-dispc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_dispc"
+- interrupts: the DISPC interrupt
+
+
+RFBI
+----
+
+Required properties:
+- compatible: "ti,omap2-rfbi"
+- reg: address and length of the register space
+- ti,hwmods: "dss_rfbi"
+
+
+VENC
+----
+
+Required properties:
+- compatible: "ti,omap2-venc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_venc"
+- vdda-supply: power supply for DAC
+
+VENC Endpoint required properties:
+
+Required properties:
+- ti,invert-polarity: invert the polarity of the video signal
+- ti,channels: 1 for composite, 2 for s-video
diff --git a/Documentation/devicetree/bindings/video/ti,omap3-dss.txt b/Documentation/devicetree/bindings/video/ti,omap3-dss.txt
new file mode 100644
index 000000000..0023fa4b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,omap3-dss.txt
@@ -0,0 +1,83 @@
+Texas Instruments OMAP3 Display Subsystem
+=========================================
+
+See Documentation/devicetree/bindings/video/ti,omap-dss.txt for generic
+description about OMAP Display Subsystem bindings.
+
+DSS Core
+--------
+
+Required properties:
+- compatible: "ti,omap3-dss"
+- reg: address and length of the register space
+- ti,hwmods: "dss_core"
+- clocks: handle to fclk
+- clock-names: "fck"
+
+Optional nodes:
+- Video ports:
+ - Port 0: DPI output
+ - Port 1: SDI output
+
+DPI Endpoint required properties:
+- data-lines: number of lines used
+
+SDI Endpoint required properties:
+- datapairs: number of datapairs used
+
+
+DISPC
+-----
+
+Required properties:
+- compatible: "ti,omap3-dispc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_dispc"
+- interrupts: the DISPC interrupt
+- clocks: handle to fclk
+- clock-names: "fck"
+
+
+RFBI
+----
+
+Required properties:
+- compatible: "ti,omap3-rfbi"
+- reg: address and length of the register space
+- ti,hwmods: "dss_rfbi"
+- clocks: handles to fclk and iclk
+- clock-names: "fck", "ick"
+
+
+VENC
+----
+
+Required properties:
+- compatible: "ti,omap3-venc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_venc"
+- vdda-supply: power supply for DAC
+- clocks: handle to fclk
+- clock-names: "fck"
+
+VENC Endpoint required properties:
+- ti,invert-polarity: invert the polarity of the video signal
+- ti,channels: 1 for composite, 2 for s-video
+
+
+DSI
+---
+
+Required properties:
+- compatible: "ti,omap3-dsi"
+- reg: addresses and lengths of the register spaces for 'proto', 'phy' and 'pll'
+- reg-names: "proto", "phy", "pll"
+- interrupts: the DSI interrupt line
+- ti,hwmods: "dss_dsi1"
+- vdd-supply: power supply for DSI
+- clocks: handles to fclk and pll clock
+- clock-names: "fck", "sys_clk"
+
+DSI Endpoint required properties:
+- lanes: list of pin numbers for the DSI lanes: CLK+, CLK-, DATA0+, DATA0-,
+ DATA1+, DATA1-, ...
diff --git a/Documentation/devicetree/bindings/video/ti,omap4-dss.txt b/Documentation/devicetree/bindings/video/ti,omap4-dss.txt
new file mode 100644
index 000000000..b8c29fbd1
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,omap4-dss.txt
@@ -0,0 +1,115 @@
+Texas Instruments OMAP4 Display Subsystem
+=========================================
+
+See Documentation/devicetree/bindings/video/ti,omap-dss.txt for generic
+description about OMAP Display Subsystem bindings.
+
+DSS Core
+--------
+
+Required properties:
+- compatible: "ti,omap4-dss"
+- reg: address and length of the register space
+- ti,hwmods: "dss_core"
+- clocks: handle to fclk
+- clock-names: "fck"
+
+Required nodes:
+- DISPC
+
+Optional nodes:
+- DSS Submodules: RFBI, VENC, DSI, HDMI
+- Video port for DPI output
+
+DPI Endpoint required properties:
+- data-lines: number of lines used
+
+
+DISPC
+-----
+
+Required properties:
+- compatible: "ti,omap4-dispc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_dispc"
+- interrupts: the DISPC interrupt
+- clocks: handle to fclk
+- clock-names: "fck"
+
+
+RFBI
+----
+
+Required properties:
+- compatible: "ti,omap4-rfbi"
+- reg: address and length of the register space
+- ti,hwmods: "dss_rfbi"
+- clocks: handles to fclk and iclk
+- clock-names: "fck", "ick"
+
+Optional nodes:
+- Video port for RFBI output
+- RFBI controlled peripherals
+
+
+VENC
+----
+
+Required properties:
+- compatible: "ti,omap4-venc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_venc"
+- vdda-supply: power supply for DAC
+- clocks: handle to fclk
+- clock-names: "fck"
+
+Optional nodes:
+- Video port for VENC output
+
+VENC Endpoint required properties:
+- ti,invert-polarity: invert the polarity of the video signal
+- ti,channels: 1 for composite, 2 for s-video
+
+
+DSI
+---
+
+Required properties:
+- compatible: "ti,omap4-dsi"
+- reg: addresses and lengths of the register spaces for 'proto', 'phy' and 'pll'
+- reg-names: "proto", "phy", "pll"
+- interrupts: the DSI interrupt line
+- ti,hwmods: "dss_dsi1" or "dss_dsi2"
+- vdd-supply: power supply for DSI
+- clocks: handles to fclk and pll clock
+- clock-names: "fck", "sys_clk"
+
+Optional nodes:
+- Video port for DSI output
+- DSI controlled peripherals
+
+DSI Endpoint required properties:
+- lanes: list of pin numbers for the DSI lanes: CLK+, CLK-, DATA0+, DATA0-,
+ DATA1+, DATA1-, ...
+
+
+HDMI
+----
+
+Required properties:
+- compatible: "ti,omap4-hdmi"
+- reg: addresses and lengths of the register spaces for 'wp', 'pll', 'phy',
+ 'core'
+- reg-names: "wp", "pll", "phy", "core"
+- interrupts: the HDMI interrupt line
+- ti,hwmods: "dss_hdmi"
+- vdda-supply: vdda power supply
+- clocks: handles to fclk and pll clock
+- clock-names: "fck", "sys_clk"
+
+Optional nodes:
+- Video port for HDMI output
+
+HDMI Endpoint optional properties:
+- lanes: list of 8 pin numbers for the HDMI lanes: CLK+, CLK-, D0+, D0-,
+ D1+, D1-, D2+, D2-. (default: 0,1,2,3,4,5,6,7)
diff --git a/Documentation/devicetree/bindings/video/ti,omap5-dss.txt b/Documentation/devicetree/bindings/video/ti,omap5-dss.txt
new file mode 100644
index 000000000..38ffc8fcd
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,omap5-dss.txt
@@ -0,0 +1,96 @@
+Texas Instruments OMAP5 Display Subsystem
+=========================================
+
+See Documentation/devicetree/bindings/video/ti,omap-dss.txt for generic
+description about OMAP Display Subsystem bindings.
+
+DSS Core
+--------
+
+Required properties:
+- compatible: "ti,omap5-dss"
+- reg: address and length of the register space
+- ti,hwmods: "dss_core"
+- clocks: handle to fclk
+- clock-names: "fck"
+
+Required nodes:
+- DISPC
+
+Optional nodes:
+- DSS Submodules: RFBI, DSI, HDMI
+- Video port for DPI output
+
+DPI Endpoint required properties:
+- data-lines: number of lines used
+
+
+DISPC
+-----
+
+Required properties:
+- compatible: "ti,omap5-dispc"
+- reg: address and length of the register space
+- ti,hwmods: "dss_dispc"
+- interrupts: the DISPC interrupt
+- clocks: handle to fclk
+- clock-names: "fck"
+
+
+RFBI
+----
+
+Required properties:
+- compatible: "ti,omap5-rfbi"
+- reg: address and length of the register space
+- ti,hwmods: "dss_rfbi"
+- clocks: handles to fclk and iclk
+- clock-names: "fck", "ick"
+
+Optional nodes:
+- Video port for RFBI output
+- RFBI controlled peripherals
+
+
+DSI
+---
+
+Required properties:
+- compatible: "ti,omap5-dsi"
+- reg: addresses and lengths of the register spaces for 'proto', 'phy' and 'pll'
+- reg-names: "proto", "phy", "pll"
+- interrupts: the DSI interrupt line
+- ti,hwmods: "dss_dsi1" or "dss_dsi2"
+- vdd-supply: power supply for DSI
+- clocks: handles to fclk and pll clock
+- clock-names: "fck", "sys_clk"
+
+Optional nodes:
+- Video port for DSI output
+- DSI controlled peripherals
+
+DSI Endpoint required properties:
+- lanes: list of pin numbers for the DSI lanes: CLK+, CLK-, DATA0+, DATA0-,
+ DATA1+, DATA1-, ...
+
+
+HDMI
+----
+
+Required properties:
+- compatible: "ti,omap5-hdmi"
+- reg: addresses and lengths of the register spaces for 'wp', 'pll', 'phy',
+ 'core'
+- reg-names: "wp", "pll", "phy", "core"
+- interrupts: the HDMI interrupt line
+- ti,hwmods: "dss_hdmi"
+- vdda-supply: vdda power supply
+- clocks: handles to fclk and pll clock
+- clock-names: "fck", "sys_clk"
+
+Optional nodes:
+- Video port for HDMI output
+
+HDMI Endpoint optional properties:
+- lanes: list of 8 pin numbers for the HDMI lanes: CLK+, CLK-, D0+, D0-,
+ D1+, D1-, D2+, D2-. (default: 0,1,2,3,4,5,6,7)
diff --git a/Documentation/devicetree/bindings/video/ti,opa362.txt b/Documentation/devicetree/bindings/video/ti,opa362.txt
new file mode 100644
index 000000000..f96083c0b
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,opa362.txt
@@ -0,0 +1,38 @@
+OPA362 analog video amplifier
+
+Required properties:
+- compatible: "ti,opa362"
+- enable-gpios: enable/disable output gpio
+
+Required node:
+- Video port 0 for opa362 input
+- Video port 1 for opa362 output
+
+Example:
+
+tv_amp: opa362 {
+ compatible = "ti,opa362";
+ enable-gpios = <&gpio1 23 0>; /* GPIO to enable video out amplifier */
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+ opa_in: endpoint@0 {
+ remote-endpoint = <&venc_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+ opa_out: endpoint@0 {
+ remote-endpoint = <&tv_connector_in>;
+ };
+ };
+ };
+};
+
+
+
diff --git a/Documentation/devicetree/bindings/video/ti,tfp410.txt b/Documentation/devicetree/bindings/video/ti,tfp410.txt
new file mode 100644
index 000000000..2cbe32a3d
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,tfp410.txt
@@ -0,0 +1,41 @@
+TFP410 DPI to DVI encoder
+=========================
+
+Required properties:
+- compatible: "ti,tfp410"
+
+Optional properties:
+- powerdown-gpios: power-down gpio
+
+Required nodes:
+- Video port 0 for DPI input
+- Video port 1 for DVI output
+
+Example
+-------
+
+tfp410: encoder@0 {
+ compatible = "ti,tfp410";
+ powerdown-gpios = <&twl_gpio 2 GPIO_ACTIVE_LOW>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tfp410_in: endpoint@0 {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ tfp410_out: endpoint@0 {
+ remote-endpoint = <&dvi_connector_in>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/ti,tpd12s015.txt b/Documentation/devicetree/bindings/video/ti,tpd12s015.txt
new file mode 100644
index 000000000..26e6d32e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/ti,tpd12s015.txt
@@ -0,0 +1,44 @@
+TPD12S015 HDMI level shifter and ESD protection chip
+====================================================
+
+Required properties:
+- compatible: "ti,tpd12s015"
+
+Optional properties:
+- gpios: CT CP HPD, LS OE and HPD gpios
+
+Required nodes:
+- Video port 0 for HDMI input
+- Video port 1 for HDMI output
+
+Example
+-------
+
+tpd12s015: encoder@1 {
+ compatible = "ti,tpd12s015";
+
+ gpios = <&gpio2 28 GPIO_ACTIVE_HIGH>, /* 60, CT CP HPD */
+ <&gpio2 9 GPIO_ACTIVE_HIGH>, /* 41, LS OE */
+ <&gpio2 31 GPIO_ACTIVE_HIGH>; /* 63, HPD */
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ port@0 {
+ reg = <0>;
+
+ tpd12s015_in: endpoint@0 {
+ remote-endpoint = <&hdmi_out>;
+ };
+ };
+
+ port@1 {
+ reg = <1>;
+
+ tpd12s015_out: endpoint@0 {
+ remote-endpoint = <&hdmi_connector_in>;
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/toppoly,td028ttec1.txt b/Documentation/devicetree/bindings/video/toppoly,td028ttec1.txt
new file mode 100644
index 000000000..7175dc374
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/toppoly,td028ttec1.txt
@@ -0,0 +1,30 @@
+Toppoly TD028TTEC1 Panel
+========================
+
+Required properties:
+- compatible: "toppoly,td028ttec1"
+
+Optional properties:
+- label: a symbolic name for the panel
+
+Required nodes:
+- Video port for DPI input
+
+Example
+-------
+
+lcd-panel: td028ttec1@0 {
+ compatible = "toppoly,td028ttec1";
+ reg = <0>;
+ spi-max-frequency = <100000>;
+ spi-cpol;
+ spi-cpha;
+
+ label = "lcd";
+ port {
+ lcd_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+};
+
diff --git a/Documentation/devicetree/bindings/video/tpo,td043mtea1.txt b/Documentation/devicetree/bindings/video/tpo,td043mtea1.txt
new file mode 100644
index 000000000..ec6d62975
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/tpo,td043mtea1.txt
@@ -0,0 +1,33 @@
+TPO TD043MTEA1 Panel
+====================
+
+Required properties:
+- compatible: "tpo,td043mtea1"
+- reset-gpios: panel reset gpio
+
+Optional properties:
+- label: a symbolic name for the panel
+
+Required nodes:
+- Video port for DPI input
+
+Example
+-------
+
+lcd-panel: panel@0 {
+ compatible = "tpo,td043mtea1";
+ reg = <0>;
+ spi-max-frequency = <100000>;
+ spi-cpol;
+ spi-cpha;
+
+ label = "lcd";
+
+ reset-gpios = <&gpio7 7 0>;
+
+ port {
+ lcd_in: endpoint {
+ remote-endpoint = <&dpi_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/vga-connector.txt b/Documentation/devicetree/bindings/video/vga-connector.txt
new file mode 100644
index 000000000..c727f298e
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/vga-connector.txt
@@ -0,0 +1,36 @@
+VGA Connector
+=============
+
+Required properties:
+
+- compatible: "vga-connector"
+
+Optional properties:
+
+- label: a symbolic name for the connector corresponding to a hardware label
+- ddc-i2c-bus: phandle to the I2C bus that is connected to VGA DDC
+
+Required nodes:
+
+The VGA connector internal connections are modeled using the OF graph bindings
+specified in Documentation/devicetree/bindings/graph.txt.
+
+The VGA connector has a single port that must be connected to a video source
+port.
+
+
+Example
+-------
+
+vga0: connector@0 {
+ compatible = "vga-connector";
+ label = "vga";
+
+ ddc-i2c-bus = <&i2c3>;
+
+ port {
+ vga_connector_in: endpoint {
+ remote-endpoint = <&adv7123_out>;
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/video/via,vt8500-fb.txt b/Documentation/devicetree/bindings/video/via,vt8500-fb.txt
new file mode 100644
index 000000000..2871e218a
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/via,vt8500-fb.txt
@@ -0,0 +1,36 @@
+VIA VT8500 Framebuffer
+-----------------------------------------------------
+
+Required properties:
+- compatible : "via,vt8500-fb"
+- reg : Should contain 1 register ranges(address and length)
+- interrupts : framebuffer controller interrupt
+- bits-per-pixel : bit depth of framebuffer (16 or 32)
+
+Required subnodes:
+- display-timings: see display-timing.txt for information
+
+Example:
+
+ fb@d8050800 {
+ compatible = "via,vt8500-fb";
+ reg = <0xd800e400 0x400>;
+ interrupts = <12>;
+ bits-per-pixel = <16>;
+
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: 800x480 {
+ clock-frequency = <0>; /* unused but required */
+ hactive = <800>;
+ vactive = <480>;
+ hfront-porch = <40>;
+ hback-porch = <88>;
+ hsync-len = <0>;
+ vback-porch = <32>;
+ vfront-porch = <11>;
+ vsync-len = <1>;
+ };
+ };
+ };
+
diff --git a/Documentation/devicetree/bindings/video/wm,prizm-ge-rops.txt b/Documentation/devicetree/bindings/video/wm,prizm-ge-rops.txt
new file mode 100644
index 000000000..a850fa011
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/wm,prizm-ge-rops.txt
@@ -0,0 +1,13 @@
+VIA/Wondermedia Graphics Engine Controller
+-----------------------------------------------------
+
+Required properties:
+- compatible : "wm,prizm-ge-rops"
+- reg : Should contain 1 register ranges(address and length)
+
+Example:
+
+ ge_rops@d8050400 {
+ compatible = "wm,prizm-ge-rops";
+ reg = <0xd8050400 0x100>;
+ };
diff --git a/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt b/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt
new file mode 100644
index 000000000..0bcadb284
--- /dev/null
+++ b/Documentation/devicetree/bindings/video/wm,wm8505-fb.txt
@@ -0,0 +1,33 @@
+Wondermedia WM8505 Framebuffer
+-----------------------------------------------------
+
+Required properties:
+- compatible : "wm,wm8505-fb"
+- reg : Should contain 1 register ranges(address and length)
+- bits-per-pixel : bit depth of framebuffer (16 or 32)
+
+Required subnodes:
+- display-timings: see display-timing.txt for information
+
+Example:
+
+ fb@d8051700 {
+ compatible = "wm,wm8505-fb";
+ reg = <0xd8051700 0x200>;
+ bits-per-pixel = <16>;
+
+ display-timings {
+ native-mode = <&timing0>;
+ timing0: 800x480 {
+ clock-frequency = <0>; /* unused but required */
+ hactive = <800>;
+ vactive = <480>;
+ hfront-porch = <40>;
+ hback-porch = <88>;
+ hsync-len = <0>;
+ vback-porch = <32>;
+ vfront-porch = <11>;
+ vsync-len = <1>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/virtio/mmio.txt b/Documentation/devicetree/bindings/virtio/mmio.txt
new file mode 100644
index 000000000..5069c1b8e
--- /dev/null
+++ b/Documentation/devicetree/bindings/virtio/mmio.txt
@@ -0,0 +1,17 @@
+* virtio memory mapped device
+
+See http://ozlabs.org/~rusty/virtio-spec/ for more details.
+
+Required properties:
+
+- compatible: "virtio,mmio" compatibility string
+- reg: control registers base address and size including configuration space
+- interrupts: interrupt generated by the device
+
+Example:
+
+ virtio_block@3000 {
+ compatible = "virtio,mmio";
+ reg = <0x3000 0x100>;
+ interrupts = <41>;
+ }
diff --git a/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt b/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt
new file mode 100644
index 000000000..ecf42c076
--- /dev/null
+++ b/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt
@@ -0,0 +1,19 @@
+* Freescale i.MX One wire bus master controller
+
+Required properties:
+- compatible : should be "fsl,imx21-owire"
+- reg : Address and length of the register set for the device
+
+Optional properties:
+- clocks : phandle of clock that supplies the module (required if platform
+ clock bindings use device tree)
+
+Example:
+
+- From imx53.dtsi:
+owire: owire@63fa4000 {
+ compatible = "fsl,imx53-owire", "fsl,imx21-owire";
+ reg = <0x63fa4000 0x4000>;
+ clocks = <&clks 159>;
+ status = "disabled";
+};
diff --git a/Documentation/devicetree/bindings/w1/omap-hdq.txt b/Documentation/devicetree/bindings/w1/omap-hdq.txt
new file mode 100644
index 000000000..fef794741
--- /dev/null
+++ b/Documentation/devicetree/bindings/w1/omap-hdq.txt
@@ -0,0 +1,17 @@
+* OMAP HDQ One wire bus master controller
+
+Required properties:
+- compatible : should be "ti,omap3-1w"
+- reg : Address and length of the register set for the device
+- interrupts : interrupt line.
+- ti,hwmods : "hdq1w"
+
+Example:
+
+- From omap3.dtsi
+ hdqw1w: 1w@480b2000 {
+ compatible = "ti,omap3-1w";
+ reg = <0x480b2000 0x1000>;
+ interrupts = <58>;
+ ti,hwmods = "hdq1w";
+ };
diff --git a/Documentation/devicetree/bindings/w1/w1-gpio.txt b/Documentation/devicetree/bindings/w1/w1-gpio.txt
new file mode 100644
index 000000000..6e09c35d9
--- /dev/null
+++ b/Documentation/devicetree/bindings/w1/w1-gpio.txt
@@ -0,0 +1,22 @@
+w1-gpio devicetree bindings
+
+Required properties:
+
+ - compatible: "w1-gpio"
+ - gpios: one or two GPIO specs:
+ - the first one is used as data I/O pin
+ - the second one is optional. If specified, it is used as
+ enable pin for an external pin pullup.
+
+Optional properties:
+
+ - linux,open-drain: if specified, the data pin is considered in
+ open-drain mode.
+
+Examples:
+
+ onewire@0 {
+ compatible = "w1-gpio";
+ gpios = <&gpio 126 0>, <&gpio 105 0>;
+ };
+
diff --git a/Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt
new file mode 100644
index 000000000..d4d86cf8f
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/atmel-at91rm9200-wdt.txt
@@ -0,0 +1,9 @@
+Atmel AT91RM9200 System Timer Watchdog
+
+Required properties:
+- compatible: must be "atmel,at91sam9260-wdt".
+
+Example:
+ watchdog@fffffd00 {
+ compatible = "atmel,at91rm9200-wdt";
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt b/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt
new file mode 100644
index 000000000..a4d869744
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/atmel-wdt.txt
@@ -0,0 +1,50 @@
+* Atmel Watchdog Timers
+
+** at91sam9-wdt
+
+Required properties:
+- compatible: must be "atmel,at91sam9260-wdt".
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+Optional properties:
+- timeout-sec: contains the watchdog timeout in seconds.
+- interrupts : Should contain WDT interrupt.
+- atmel,max-heartbeat-sec : Should contain the maximum heartbeat value in
+ seconds. This value should be less or equal to 16. It is used to
+ compute the WDV field.
+- atmel,min-heartbeat-sec : Should contain the minimum heartbeat value in
+ seconds. This value must be smaller than the max-heartbeat-sec value.
+ It is used to compute the WDD field.
+- atmel,watchdog-type : Should be "hardware" or "software". Hardware watchdog
+ use the at91 watchdog reset. Software watchdog use the watchdog
+ interrupt to trigger a software reset.
+- atmel,reset-type : Should be "proc" or "all".
+ "all" : assert peripherals and processor reset signals
+ "proc" : assert the processor reset signal
+ This is valid only when using "hardware" watchdog.
+- atmel,disable : Should be present if you want to disable the watchdog.
+- atmel,idle-halt : Should be present if you want to stop the watchdog when
+ entering idle state.
+ CAUTION: This property should be used with care, it actually makes the
+ watchdog not counting when the CPU is in idle state, therefore the
+ watchdog reset time depends on mean CPU usage and will not reset at all
+ if the CPU stop working while it is in idle state, which is probably
+ not what you want.
+- atmel,dbg-halt : Should be present if you want to stop the watchdog when
+ entering debug state.
+
+Example:
+ watchdog@fffffd40 {
+ compatible = "atmel,at91sam9260-wdt";
+ reg = <0xfffffd40 0x10>;
+ interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+ timeout-sec = <15>;
+ atmel,watchdog-type = "hardware";
+ atmel,reset-type = "all";
+ atmel,dbg-halt;
+ atmel,idle-halt;
+ atmel,max-heartbeat-sec = <16>;
+ atmel,min-heartbeat-sec = <0>;
+ status = "okay";
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt b/Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt
new file mode 100644
index 000000000..f801d71de
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/brcm,bcm2835-pm-wdog.txt
@@ -0,0 +1,18 @@
+BCM2835 Watchdog timer
+
+Required properties:
+
+- compatible : should be "brcm,bcm2835-pm-wdt"
+- reg : Specifies base physical address and size of the registers.
+
+Optional properties:
+
+- timeout-sec : Contains the watchdog timeout in seconds
+
+Example:
+
+watchdog {
+ compatible = "brcm,bcm2835-pm-wdt";
+ reg = <0x7e100000 0x28>;
+ timeout-sec = <10>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/brcm,kona-wdt.txt b/Documentation/devicetree/bindings/watchdog/brcm,kona-wdt.txt
new file mode 100644
index 000000000..2b86a00e3
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/brcm,kona-wdt.txt
@@ -0,0 +1,15 @@
+Broadcom Kona Family Watchdog Timer
+-----------------------------------
+
+This watchdog timer is used in the following Broadcom SoCs:
+ BCM11130, BCM11140, BCM11351, BCM28145, BCM28155
+
+Required properties:
+ - compatible = "brcm,bcm11351-wdt", "brcm,kona-wdt";
+ - reg: memory address & range
+
+Example:
+ watchdog@35002f40 {
+ compatible = "brcm,bcm11351-wdt", "brcm,kona-wdt";
+ reg = <0x35002f40 0x6c>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt
new file mode 100644
index 000000000..c3a36ee45
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/cadence-wdt.txt
@@ -0,0 +1,24 @@
+Zynq Watchdog Device Tree Bindings
+-------------------------------------------
+
+Required properties:
+- compatible : Should be "cdns,wdt-r1p2".
+- clocks : This is pclk (APB clock).
+- interrupts : This is wd_irq - watchdog timeout interrupt.
+- interrupt-parent : Must be core interrupt controller.
+
+Optional properties
+- reset-on-timeout : If this property exists, then a reset is done
+ when watchdog times out.
+- timeout-sec : Watchdog timeout value (in seconds).
+
+Example:
+ watchdog@f8005000 {
+ compatible = "cdns,wdt-r1p2";
+ clocks = <&clkc 45>;
+ interrupt-parent = <&intc>;
+ interrupts = <0 9 1>;
+ reg = <0xf8005000 0x1000>;
+ reset-on-timeout;
+ timeout-sec = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt
new file mode 100644
index 000000000..e60b9a13b
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/davinci-wdt.txt
@@ -0,0 +1,24 @@
+Texas Instruments DaVinci/Keystone Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "ti,davinci-wdt", "ti,keystone-wdt"
+- reg : Should contain WDT registers location and length
+
+Optional properties:
+- timeout-sec : Contains the watchdog timeout in seconds
+- clocks : the clock feeding the watchdog timer.
+ Needed if platform uses clocks.
+ See clock-bindings.txt
+
+Documentation:
+Davinci DM646x - http://www.ti.com/lit/ug/spruer5b/spruer5b.pdf
+Keystone - http://www.ti.com/lit/ug/sprugv5a/sprugv5a.pdf
+
+Examples:
+
+wdt: wdt@2320000 {
+ compatible = "ti,davinci-wdt";
+ reg = <0x02320000 0x80>;
+ timeout-sec = <30>;
+ clocks = <&clkwdtimer0>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/dw_wdt.txt b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
new file mode 100644
index 000000000..08e16f684
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/dw_wdt.txt
@@ -0,0 +1,21 @@
+Synopsys Designware Watchdog Timer
+
+Required Properties:
+
+- compatible : Should contain "snps,dw-wdt"
+- reg : Base address and size of the watchdog timer registers.
+- clocks : phandle + clock-specifier for the clock that drives the
+ watchdog timer.
+
+Optional Properties:
+
+- interrupts : The interrupt used for the watchdog timeout warning.
+
+Example:
+
+ watchdog0: wd@ffd02000 {
+ compatible = "snps,dw-wdt";
+ reg = <0xffd02000 0x1000>;
+ interrupts = <0 171 4>;
+ clocks = <&per_base_clk>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
new file mode 100644
index 000000000..8dab6fd02
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt
@@ -0,0 +1,20 @@
+* Freescale i.MX Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible : Should be "fsl,<soc>-wdt"
+- reg : Should contain WDT registers location and length
+- interrupts : Should contain WDT interrupt
+
+Optional property:
+- big-endian: If present the watchdog device's registers are implemented
+ in big endian mode, otherwise in native mode(same with CPU), for more
+ detail please see: Documentation/devicetree/bindings/regmap/regmap.txt.
+
+Examples:
+
+wdt@73f98000 {
+ compatible = "fsl,imx51-wdt", "fsl,imx21-wdt";
+ reg = <0x73f98000 0x4000>;
+ interrupts = <58>;
+ big-endian;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt b/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt
new file mode 100644
index 000000000..198794963
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt
@@ -0,0 +1,28 @@
+* GPIO-controlled Watchdog
+
+Required Properties:
+- compatible: Should contain "linux,wdt-gpio".
+- gpios: From common gpio binding; gpio connection to WDT reset pin.
+- hw_algo: The algorithm used by the driver. Should be one of the
+ following values:
+ - toggle: Either a high-to-low or a low-to-high transition clears
+ the WDT counter. The watchdog timer is disabled when GPIO is
+ left floating or connected to a three-state buffer.
+ - level: Low or high level starts counting WDT timeout,
+ the opposite level disables the WDT. Active level is determined
+ by the GPIO flags.
+- hw_margin_ms: Maximum time to reset watchdog circuit (milliseconds).
+
+Optional Properties:
+- always-running: If the watchdog timer cannot be disabled, add this flag to
+ have the driver keep toggling the signal without a client. It will only cease
+ to toggle the signal when the device is open and the timeout elapsed.
+
+Example:
+ watchdog: watchdog {
+ /* ADM706 */
+ compatible = "linux,wdt-gpio";
+ gpios = <&gpio3 9 GPIO_ACTIVE_LOW>;
+ hw_algo = "toggle";
+ hw_margin_ms = <1600>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/imgpdc-wdt.txt b/Documentation/devicetree/bindings/watchdog/imgpdc-wdt.txt
new file mode 100644
index 000000000..b2fa11fd4
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/imgpdc-wdt.txt
@@ -0,0 +1,19 @@
+*ImgTec PowerDown Controller (PDC) Watchdog Timer (WDT)
+
+Required properties:
+- compatible : Should be "img,pdc-wdt"
+- reg : Should contain WDT registers location and length
+- clocks: Must contain an entry for each entry in clock-names.
+- clock-names: Should contain "wdt" and "sys"; the watchdog counter
+ clock and register interface clock respectively.
+- interrupts : Should contain WDT interrupt
+
+Examples:
+
+watchdog@18102100 {
+ compatible = "img,pdc-wdt";
+ reg = <0x18102100 0x100>;
+ clocks = <&pdc_wdt_clk>, <&sys_clk>;
+ clock-names = "wdt", "sys";
+ interrupts = <0 52 IRQ_TYPE_LEVEL_HIGH>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt
new file mode 100644
index 000000000..e27763ef0
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/ingenic,jz4740-wdt.txt
@@ -0,0 +1,12 @@
+Ingenic Watchdog Timer (WDT) Controller for JZ4740
+
+Required properties:
+compatible: "ingenic,jz4740-watchdog"
+reg: Register address and length for watchdog registers
+
+Example:
+
+watchdog: jz4740-watchdog@0x10002000 {
+ compatible = "ingenic,jz4740-watchdog";
+ reg = <0x10002000 0x100>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/marvel.txt b/Documentation/devicetree/bindings/watchdog/marvel.txt
new file mode 100644
index 000000000..858ed9221
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/marvel.txt
@@ -0,0 +1,46 @@
+* Marvell Orion Watchdog Time
+
+Required Properties:
+
+- Compatibility : "marvell,orion-wdt"
+ "marvell,armada-370-wdt"
+ "marvell,armada-xp-wdt"
+ "marvell,armada-375-wdt"
+ "marvell,armada-380-wdt"
+
+- reg : Should contain two entries: first one with the
+ timer control address, second one with the
+ rstout enable address.
+
+For "marvell,armada-375-wdt" and "marvell,armada-380-wdt":
+
+- reg : A third entry is mandatory and should contain the
+ shared mask/unmask RSTOUT address.
+
+Clocks required for compatibles = "marvell,orion-wdt",
+ "marvell,armada-370-wdt":
+- clocks : Must contain a single entry describing the clock input
+
+Clocks required for compatibles = "marvell,armada-xp-wdt"
+ "marvell,armada-375-wdt"
+ "marvell,armada-380-wdt":
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Must include the following entries:
+ "nbclk" (L2/coherency fabric clock),
+ "fixed" (Reference 25 MHz fixed-clock).
+
+Optional properties:
+
+- interrupts : Contains the IRQ for watchdog expiration
+- timeout-sec : Contains the watchdog timeout in seconds
+
+Example:
+
+ wdt@20300 {
+ compatible = "marvell,orion-wdt";
+ reg = <0x20300 0x28>, <0x20108 0x4>;
+ interrupts = <3>;
+ timeout-sec = <10>;
+ status = "okay";
+ clocks = <&gate_clk 7>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/men-a021-wdt.txt b/Documentation/devicetree/bindings/watchdog/men-a021-wdt.txt
new file mode 100644
index 000000000..370dee322
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/men-a021-wdt.txt
@@ -0,0 +1,25 @@
+Bindings for MEN A21 Watchdog device connected to GPIO lines
+
+Required properties:
+- compatible: "men,a021-wdt"
+- gpios: Specifies the pins that control the Watchdog, order:
+ 1: Watchdog enable
+ 2: Watchdog fast-mode
+ 3: Watchdog trigger
+ 4: Watchdog reset cause bit 0
+ 5: Watchdog reset cause bit 1
+ 6: Watchdog reset cause bit 2
+
+Optional properties:
+- None
+
+Example:
+ watchdog {
+ compatible ="men,a021-wdt";
+ gpios = <&gpio3 9 1 /* WD_EN */
+ &gpio3 10 1 /* WD_FAST */
+ &gpio3 11 1 /* WD_TRIG */
+ &gpio3 6 1 /* RST_CAUSE[0] */
+ &gpio3 7 1 /* RST_CAUSE[1] */
+ &gpio3 8 1>; /* RST_CAUSE[2] */
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt
new file mode 100644
index 000000000..9200fc2d5
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/meson6-wdt.txt
@@ -0,0 +1,13 @@
+Meson SoCs Watchdog timer
+
+Required properties:
+
+- compatible : should be "amlogic,meson6-wdt"
+- reg : Specifies base physical address and size of the registers.
+
+Example:
+
+wdt: watchdog@c1109900 {
+ compatible = "amlogic,meson6-wdt";
+ reg = <0xc1109900 0x8>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/moxa,moxart-watchdog.txt b/Documentation/devicetree/bindings/watchdog/moxa,moxart-watchdog.txt
new file mode 100644
index 000000000..1169857d1
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/moxa,moxart-watchdog.txt
@@ -0,0 +1,15 @@
+MOXA ART Watchdog timer
+
+Required properties:
+
+- compatible : Must be "moxa,moxart-watchdog"
+- reg : Should contain registers location and length
+- clocks : Should contain phandle for the clock that drives the counter
+
+Example:
+
+ watchdog: watchdog@98500000 {
+ compatible = "moxa,moxart-watchdog";
+ reg = <0x98500000 0x10>;
+ clocks = <&coreclk>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
new file mode 100644
index 000000000..af9eb5b8a
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
@@ -0,0 +1,13 @@
+Mediatek SoCs Watchdog timer
+
+Required properties:
+
+- compatible : should be "mediatek,mt6589-wdt"
+- reg : Specifies base physical address and size of the registers.
+
+Example:
+
+wdt: watchdog@010000000 {
+ compatible = "mediatek,mt6589-wdt";
+ reg = <0x10000000 0x18>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt b/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt
new file mode 100644
index 000000000..6d63782a7
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/of-xilinx-wdt.txt
@@ -0,0 +1,23 @@
+Xilinx AXI/PLB soft-core watchdog Device Tree Bindings
+---------------------------------------------------------
+
+Required properties:
+- compatible : Should be "xlnx,xps-timebase-wdt-1.00.a" or
+ "xlnx,xps-timebase-wdt-1.01.a".
+- reg : Physical base address and size
+
+Optional properties:
+- clock-frequency : Frequency of clock in Hz
+- xlnx,wdt-enable-once : 0 - Watchdog can be restarted
+ 1 - Watchdog can be enabled just once
+- xlnx,wdt-interval : Watchdog timeout interval in 2^<val> clock cycles,
+ <val> is integer from 8 to 31.
+
+Example:
+axi-timebase-wdt@40100000 {
+ clock-frequency = <50000000>;
+ compatible = "xlnx,xps-timebase-wdt-1.00.a";
+ reg = <0x40100000 0x10000>;
+ xlnx,wdt-enable-once = <0x0>;
+ xlnx,wdt-interval = <0x1b>;
+} ;
diff --git a/Documentation/devicetree/bindings/watchdog/omap-wdt.txt b/Documentation/devicetree/bindings/watchdog/omap-wdt.txt
new file mode 100644
index 000000000..c22797067
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/omap-wdt.txt
@@ -0,0 +1,14 @@
+TI Watchdog Timer (WDT) Controller for OMAP
+
+Required properties:
+compatible:
+- "ti,omap3-wdt" for OMAP3
+- "ti,omap4-wdt" for OMAP4
+- ti,hwmods: Name of the hwmod associated to the WDT
+
+Examples:
+
+wdt2: wdt@4a314000 {
+ compatible = "ti,omap4-wdt", "ti,omap3-wdt";
+ ti,hwmods = "wd_timer2";
+};
diff --git a/Documentation/devicetree/bindings/watchdog/pnx4008-wdt.txt b/Documentation/devicetree/bindings/watchdog/pnx4008-wdt.txt
new file mode 100644
index 000000000..556d06c17
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/pnx4008-wdt.txt
@@ -0,0 +1,17 @@
+* NXP PNX watchdog timer
+
+Required properties:
+- compatible: must be "nxp,pnx4008-wdt"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+Optional properties:
+- timeout-sec: contains the watchdog timeout in seconds.
+
+Example:
+
+ watchdog@4003C000 {
+ compatible = "nxp,pnx4008-wdt";
+ reg = <0x4003C000 0x1000>;
+ timeout-sec = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/qca-ar7130-wdt.txt b/Documentation/devicetree/bindings/watchdog/qca-ar7130-wdt.txt
new file mode 100644
index 000000000..7a89e5f85
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/qca-ar7130-wdt.txt
@@ -0,0 +1,13 @@
+* Qualcomm Atheros AR7130 Watchdog Timer (WDT) Controller
+
+Required properties:
+- compatible: must be "qca,ar7130-wdt"
+- reg: physical base address of the controller and length of memory mapped
+ region.
+
+Example:
+
+wdt@18060008 {
+ compatible = "qca,ar9330-wdt", "qca,ar7130-wdt";
+ reg = <0x18060008 0x8>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt
new file mode 100644
index 000000000..4726924d0
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.txt
@@ -0,0 +1,24 @@
+Qualcomm Krait Processor Sub-system (KPSS) Watchdog
+---------------------------------------------------
+
+Required properties :
+- compatible : shall contain only one of the following:
+
+ "qcom,kpss-wdt-msm8960"
+ "qcom,kpss-wdt-apq8064"
+ "qcom,kpss-wdt-ipq8064"
+
+- reg : shall contain base register location and length
+- clocks : shall contain the input clock
+
+Optional properties :
+- timeout-sec : shall contain the default watchdog timeout in seconds,
+ if unset, the default timeout is 30 seconds
+
+Example:
+ watchdog@208a038 {
+ compatible = "qcom,kpss-wdt-ipq8064";
+ reg = <0x0208a038 0x40>;
+ clocks = <&sleep_clk>;
+ timeout-sec = <10>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/rt2880-wdt.txt b/Documentation/devicetree/bindings/watchdog/rt2880-wdt.txt
new file mode 100644
index 000000000..d7bab3db9
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/rt2880-wdt.txt
@@ -0,0 +1,19 @@
+Ralink Watchdog Timers
+
+Required properties:
+- compatible: must be "ralink,rt2880-wdt"
+- reg: physical base address of the controller and length of the register range
+
+Optional properties:
+- interrupt-parent: phandle to the INTC device node
+- interrupts: Specify the INTC interrupt number
+
+Example:
+
+ watchdog@120 {
+ compatible = "ralink,rt2880-wdt";
+ reg = <0x120 0x10>;
+
+ interrupt-parent = <&intc>;
+ interrupts = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
new file mode 100644
index 000000000..8f3d96af8
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt
@@ -0,0 +1,34 @@
+* Samsung's Watchdog Timer Controller
+
+The Samsung's Watchdog controller is used for resuming system operation
+after a preset amount of time during which the WDT reset event has not
+occurred.
+
+Required properties:
+- compatible : should be one among the following
+ (a) "samsung,s3c2410-wdt" for Exynos4 and previous SoCs
+ (b) "samsung,exynos5250-wdt" for Exynos5250
+ (c) "samsung,exynos5420-wdt" for Exynos5420
+ (c) "samsung,exynos7-wdt" for Exynos7
+
+- reg : base physical address of the controller and length of memory mapped
+ region.
+- interrupts : interrupt number to the cpu.
+- samsung,syscon-phandle : reference to syscon node (This property required only
+ in case of compatible being "samsung,exynos5250-wdt" or "samsung,exynos5420-wdt".
+ In case of Exynos5250 and 5420 this property points to syscon node holding the PMU
+ base address)
+
+Optional properties:
+- timeout-sec : contains the watchdog timeout in seconds.
+
+Example:
+
+watchdog@101D0000 {
+ compatible = "samsung,exynos5250-wdt";
+ reg = <0x101D0000 0x100>;
+ interrupts = <0 42 0>;
+ clocks = <&clock 336>;
+ clock-names = "watchdog";
+ samsung,syscon-phandle = <&pmu_syscon>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt b/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt
new file mode 100644
index 000000000..9cbc76c89
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/sirfsoc_wdt.txt
@@ -0,0 +1,14 @@
+SiRFSoC Timer and Watchdog Timer(WDT) Controller
+
+Required properties:
+- compatible: "sirf,prima2-tick"
+- reg: Address range of tick timer/WDT register set
+- interrupts: interrupt number to the cpu
+
+Example:
+
+timer@b0020000 {
+ compatible = "sirf,prima2-tick";
+ reg = <0xb0020000 0x1000>;
+ interrupts = <0>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/stericsson-coh901327.txt b/Documentation/devicetree/bindings/watchdog/stericsson-coh901327.txt
new file mode 100644
index 000000000..8ffb88e39
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/stericsson-coh901327.txt
@@ -0,0 +1,19 @@
+ST-Ericsson COH 901 327 Watchdog timer
+
+Required properties:
+- compatible: must be "stericsson,coh901327".
+- reg: physical base address of the controller and length of memory mapped
+ region.
+- interrupts: the interrupt used for the watchdog timeout warning.
+
+Optional properties:
+- timeout-sec: contains the watchdog timeout in seconds.
+
+Example:
+
+watchdog: watchdog@c0012000 {
+ compatible = "stericsson,coh901327";
+ reg = <0xc0012000 0x1000>;
+ interrupts = <3>;
+ timeout-sec = <60>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
new file mode 100644
index 000000000..b8f75c514
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/sunxi-wdt.txt
@@ -0,0 +1,14 @@
+Allwinner SoCs Watchdog timer
+
+Required properties:
+
+- compatible : should be either "allwinner,sun4i-a10-wdt" or
+ "allwinner,sun6i-a31-wdt"
+- reg : Specifies base physical address and size of the registers.
+
+Example:
+
+wdt: watchdog@01c20c90 {
+ compatible = "allwinner,sun4i-a10-wdt";
+ reg = <0x01c20c90 0x10>;
+};
diff --git a/Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt b/Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt
new file mode 100644
index 000000000..80a37193c
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/twl4030-wdt.txt
@@ -0,0 +1,10 @@
+Device tree bindings for twl4030-wdt driver (TWL4030 watchdog)
+
+Required properties:
+ compatible = "ti,twl4030-wdt";
+
+Example:
+
+watchdog {
+ compatible = "ti,twl4030-wdt";
+};
diff --git a/Documentation/devicetree/bindings/x86/ce4100.txt b/Documentation/devicetree/bindings/x86/ce4100.txt
new file mode 100644
index 000000000..b49ae593a
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/ce4100.txt
@@ -0,0 +1,38 @@
+CE4100 Device Tree Bindings
+---------------------------
+
+The CE4100 SoC uses for in core peripherals the following compatible
+format: <vendor>,<chip>-<device>.
+Many of the "generic" devices like HPET or IO APIC have the ce4100
+name in their compatible property because they first appeared in this
+SoC.
+
+The CPU node
+------------
+ cpu@0 {
+ device_type = "cpu";
+ compatible = "intel,ce4100";
+ reg = <0>;
+ lapic = <&lapic0>;
+ };
+
+The reg property describes the CPU number. The lapic property points to
+the local APIC timer.
+
+The SoC node
+------------
+
+This node describes the in-core peripherals. Required property:
+ compatible = "intel,ce4100-cp";
+
+The PCI node
+------------
+This node describes the PCI bus on the SoC. Its property should be
+ compatible = "intel,ce4100-pci", "pci";
+
+If the OS is using the IO-APIC for interrupt routing then the reported
+interrupt numbers for devices is no longer true. In order to obtain the
+correct interrupt number, the child node which represents the device has
+to contain the interrupt property. Besides the interrupt property it has
+to contain at least the reg property containing the PCI bus address and
+compatible property according to "PCI Bus Binding Revision 2.1".
diff --git a/Documentation/devicetree/bindings/x86/interrupt.txt b/Documentation/devicetree/bindings/x86/interrupt.txt
new file mode 100644
index 000000000..7d19f494f
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/interrupt.txt
@@ -0,0 +1,26 @@
+Interrupt chips
+---------------
+
+* Intel I/O Advanced Programmable Interrupt Controller (IO APIC)
+
+ Required properties:
+ --------------------
+ compatible = "intel,ce4100-ioapic";
+ #interrupt-cells = <2>;
+
+ Device's interrupt property:
+
+ interrupts = <P S>;
+
+ The first number (P) represents the interrupt pin which is wired to the
+ IO APIC. The second number (S) represents the sense of interrupt which
+ should be configured and can be one of:
+ 0 - Edge Rising
+ 1 - Level Low
+ 2 - Level High
+ 3 - Edge Falling
+
+* Local APIC
+ Required property:
+
+ compatible = "intel,ce4100-lapic";
diff --git a/Documentation/devicetree/bindings/x86/timer.txt b/Documentation/devicetree/bindings/x86/timer.txt
new file mode 100644
index 000000000..c688af58e
--- /dev/null
+++ b/Documentation/devicetree/bindings/x86/timer.txt
@@ -0,0 +1,6 @@
+Timers
+------
+
+* High Precision Event Timer (HPET)
+ Required property:
+ compatible = "intel,ce4100-hpet";
diff --git a/Documentation/devicetree/bindings/xilinx.txt b/Documentation/devicetree/bindings/xilinx.txt
new file mode 100644
index 000000000..299d09235
--- /dev/null
+++ b/Documentation/devicetree/bindings/xilinx.txt
@@ -0,0 +1,306 @@
+ d) Xilinx IP cores
+
+ The Xilinx EDK toolchain ships with a set of IP cores (devices) for use
+ in Xilinx Spartan and Virtex FPGAs. The devices cover the whole range
+ of standard device types (network, serial, etc.) and miscellaneous
+ devices (gpio, LCD, spi, etc). Also, since these devices are
+ implemented within the fpga fabric every instance of the device can be
+ synthesised with different options that change the behaviour.
+
+ Each IP-core has a set of parameters which the FPGA designer can use to
+ control how the core is synthesized. Historically, the EDK tool would
+ extract the device parameters relevant to device drivers and copy them
+ into an 'xparameters.h' in the form of #define symbols. This tells the
+ device drivers how the IP cores are configured, but it requires the kernel
+ to be recompiled every time the FPGA bitstream is resynthesized.
+
+ The new approach is to export the parameters into the device tree and
+ generate a new device tree each time the FPGA bitstream changes. The
+ parameters which used to be exported as #defines will now become
+ properties of the device node. In general, device nodes for IP-cores
+ will take the following form:
+
+ (name): (generic-name)@(base-address) {
+ compatible = "xlnx,(ip-core-name)-(HW_VER)"
+ [, (list of compatible devices), ...];
+ reg = <(baseaddr) (size)>;
+ interrupt-parent = <&interrupt-controller-phandle>;
+ interrupts = < ... >;
+ xlnx,(parameter1) = "(string-value)";
+ xlnx,(parameter2) = <(int-value)>;
+ };
+
+ (generic-name): an open firmware-style name that describes the
+ generic class of device. Preferably, this is one word, such
+ as 'serial' or 'ethernet'.
+ (ip-core-name): the name of the ip block (given after the BEGIN
+ directive in system.mhs). Should be in lowercase
+ and all underscores '_' converted to dashes '-'.
+ (name): is derived from the "PARAMETER INSTANCE" value.
+ (parameter#): C_* parameters from system.mhs. The C_ prefix is
+ dropped from the parameter name, the name is converted
+ to lowercase and all underscore '_' characters are
+ converted to dashes '-'.
+ (baseaddr): the baseaddr parameter value (often named C_BASEADDR).
+ (HW_VER): from the HW_VER parameter.
+ (size): the address range size (often C_HIGHADDR - C_BASEADDR + 1).
+
+ Typically, the compatible list will include the exact IP core version
+ followed by an older IP core version which implements the same
+ interface or any other device with the same interface.
+
+ 'reg', 'interrupt-parent' and 'interrupts' are all optional properties.
+
+ For example, the following block from system.mhs:
+
+ BEGIN opb_uartlite
+ PARAMETER INSTANCE = opb_uartlite_0
+ PARAMETER HW_VER = 1.00.b
+ PARAMETER C_BAUDRATE = 115200
+ PARAMETER C_DATA_BITS = 8
+ PARAMETER C_ODD_PARITY = 0
+ PARAMETER C_USE_PARITY = 0
+ PARAMETER C_CLK_FREQ = 50000000
+ PARAMETER C_BASEADDR = 0xEC100000
+ PARAMETER C_HIGHADDR = 0xEC10FFFF
+ BUS_INTERFACE SOPB = opb_7
+ PORT OPB_Clk = CLK_50MHz
+ PORT Interrupt = opb_uartlite_0_Interrupt
+ PORT RX = opb_uartlite_0_RX
+ PORT TX = opb_uartlite_0_TX
+ PORT OPB_Rst = sys_bus_reset_0
+ END
+
+ becomes the following device tree node:
+
+ opb_uartlite_0: serial@ec100000 {
+ device_type = "serial";
+ compatible = "xlnx,opb-uartlite-1.00.b";
+ reg = <ec100000 10000>;
+ interrupt-parent = <&opb_intc_0>;
+ interrupts = <1 0>; // got this from the opb_intc parameters
+ current-speed = <d#115200>; // standard serial device prop
+ clock-frequency = <d#50000000>; // standard serial device prop
+ xlnx,data-bits = <8>;
+ xlnx,odd-parity = <0>;
+ xlnx,use-parity = <0>;
+ };
+
+ Some IP cores actually implement 2 or more logical devices. In
+ this case, the device should still describe the whole IP core with
+ a single node and add a child node for each logical device. The
+ ranges property can be used to translate from parent IP-core to the
+ registers of each device. In addition, the parent node should be
+ compatible with the bus type 'xlnx,compound', and should contain
+ #address-cells and #size-cells, as with any other bus. (Note: this
+ makes the assumption that both logical devices have the same bus
+ binding. If this is not true, then separate nodes should be used
+ for each logical device). The 'cell-index' property can be used to
+ enumerate logical devices within an IP core. For example, the
+ following is the system.mhs entry for the dual ps2 controller found
+ on the ml403 reference design.
+
+ BEGIN opb_ps2_dual_ref
+ PARAMETER INSTANCE = opb_ps2_dual_ref_0
+ PARAMETER HW_VER = 1.00.a
+ PARAMETER C_BASEADDR = 0xA9000000
+ PARAMETER C_HIGHADDR = 0xA9001FFF
+ BUS_INTERFACE SOPB = opb_v20_0
+ PORT Sys_Intr1 = ps2_1_intr
+ PORT Sys_Intr2 = ps2_2_intr
+ PORT Clkin1 = ps2_clk_rx_1
+ PORT Clkin2 = ps2_clk_rx_2
+ PORT Clkpd1 = ps2_clk_tx_1
+ PORT Clkpd2 = ps2_clk_tx_2
+ PORT Rx1 = ps2_d_rx_1
+ PORT Rx2 = ps2_d_rx_2
+ PORT Txpd1 = ps2_d_tx_1
+ PORT Txpd2 = ps2_d_tx_2
+ END
+
+ It would result in the following device tree nodes:
+
+ opb_ps2_dual_ref_0: opb-ps2-dual-ref@a9000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "xlnx,compound";
+ ranges = <0 a9000000 2000>;
+ // If this device had extra parameters, then they would
+ // go here.
+ ps2@0 {
+ compatible = "xlnx,opb-ps2-dual-ref-1.00.a";
+ reg = <0 40>;
+ interrupt-parent = <&opb_intc_0>;
+ interrupts = <3 0>;
+ cell-index = <0>;
+ };
+ ps2@1000 {
+ compatible = "xlnx,opb-ps2-dual-ref-1.00.a";
+ reg = <1000 40>;
+ interrupt-parent = <&opb_intc_0>;
+ interrupts = <3 0>;
+ cell-index = <0>;
+ };
+ };
+
+ Also, the system.mhs file defines bus attachments from the processor
+ to the devices. The device tree structure should reflect the bus
+ attachments. Again an example; this system.mhs fragment:
+
+ BEGIN ppc405_virtex4
+ PARAMETER INSTANCE = ppc405_0
+ PARAMETER HW_VER = 1.01.a
+ BUS_INTERFACE DPLB = plb_v34_0
+ BUS_INTERFACE IPLB = plb_v34_0
+ END
+
+ BEGIN opb_intc
+ PARAMETER INSTANCE = opb_intc_0
+ PARAMETER HW_VER = 1.00.c
+ PARAMETER C_BASEADDR = 0xD1000FC0
+ PARAMETER C_HIGHADDR = 0xD1000FDF
+ BUS_INTERFACE SOPB = opb_v20_0
+ END
+
+ BEGIN opb_uart16550
+ PARAMETER INSTANCE = opb_uart16550_0
+ PARAMETER HW_VER = 1.00.d
+ PARAMETER C_BASEADDR = 0xa0000000
+ PARAMETER C_HIGHADDR = 0xa0001FFF
+ BUS_INTERFACE SOPB = opb_v20_0
+ END
+
+ BEGIN plb_v34
+ PARAMETER INSTANCE = plb_v34_0
+ PARAMETER HW_VER = 1.02.a
+ END
+
+ BEGIN plb_bram_if_cntlr
+ PARAMETER INSTANCE = plb_bram_if_cntlr_0
+ PARAMETER HW_VER = 1.00.b
+ PARAMETER C_BASEADDR = 0xFFFF0000
+ PARAMETER C_HIGHADDR = 0xFFFFFFFF
+ BUS_INTERFACE SPLB = plb_v34_0
+ END
+
+ BEGIN plb2opb_bridge
+ PARAMETER INSTANCE = plb2opb_bridge_0
+ PARAMETER HW_VER = 1.01.a
+ PARAMETER C_RNG0_BASEADDR = 0x20000000
+ PARAMETER C_RNG0_HIGHADDR = 0x3FFFFFFF
+ PARAMETER C_RNG1_BASEADDR = 0x60000000
+ PARAMETER C_RNG1_HIGHADDR = 0x7FFFFFFF
+ PARAMETER C_RNG2_BASEADDR = 0x80000000
+ PARAMETER C_RNG2_HIGHADDR = 0xBFFFFFFF
+ PARAMETER C_RNG3_BASEADDR = 0xC0000000
+ PARAMETER C_RNG3_HIGHADDR = 0xDFFFFFFF
+ BUS_INTERFACE SPLB = plb_v34_0
+ BUS_INTERFACE MOPB = opb_v20_0
+ END
+
+ Gives this device tree (some properties removed for clarity):
+
+ plb@0 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "xlnx,plb-v34-1.02.a";
+ device_type = "ibm,plb";
+ ranges; // 1:1 translation
+
+ plb_bram_if_cntrl_0: bram@ffff0000 {
+ reg = <ffff0000 10000>;
+ }
+
+ opb@20000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <20000000 20000000 20000000
+ 60000000 60000000 20000000
+ 80000000 80000000 40000000
+ c0000000 c0000000 20000000>;
+
+ opb_uart16550_0: serial@a0000000 {
+ reg = <a00000000 2000>;
+ };
+
+ opb_intc_0: interrupt-controller@d1000fc0 {
+ reg = <d1000fc0 20>;
+ };
+ };
+ };
+
+ That covers the general approach to binding xilinx IP cores into the
+ device tree. The following are bindings for specific devices:
+
+ i) Xilinx ML300 Framebuffer
+
+ Simple framebuffer device from the ML300 reference design (also on the
+ ML403 reference design as well as others).
+
+ Optional properties:
+ - resolution = <xres yres> : pixel resolution of framebuffer. Some
+ implementations use a different resolution.
+ Default is <d#640 d#480>
+ - virt-resolution = <xvirt yvirt> : Size of framebuffer in memory.
+ Default is <d#1024 d#480>.
+ - rotate-display (empty) : rotate display 180 degrees.
+
+ ii) Xilinx SystemACE
+
+ The Xilinx SystemACE device is used to program FPGAs from an FPGA
+ bitstream stored on a CF card. It can also be used as a generic CF
+ interface device.
+
+ Optional properties:
+ - 8-bit (empty) : Set this property for SystemACE in 8 bit mode
+
+ iii) Xilinx EMAC and Xilinx TEMAC
+
+ Xilinx Ethernet devices. In addition to general xilinx properties
+ listed above, nodes for these devices should include a phy-handle
+ property, and may include other common network device properties
+ like local-mac-address.
+
+ iv) Xilinx Uartlite
+
+ Xilinx uartlite devices are simple fixed speed serial ports.
+
+ Required properties:
+ - current-speed : Baud rate of uartlite
+
+ v) Xilinx hwicap
+
+ Xilinx hwicap devices provide access to the configuration logic
+ of the FPGA through the Internal Configuration Access Port
+ (ICAP). The ICAP enables partial reconfiguration of the FPGA,
+ readback of the configuration information, and some control over
+ 'warm boots' of the FPGA fabric.
+
+ Required properties:
+ - xlnx,family : The family of the FPGA, necessary since the
+ capabilities of the underlying ICAP hardware
+ differ between different families. May be
+ 'virtex2p', 'virtex4', or 'virtex5'.
+
+ vi) Xilinx Uart 16550
+
+ Xilinx UART 16550 devices are very similar to the NS16550 but with
+ different register spacing and an offset from the base address.
+
+ Required properties:
+ - clock-frequency : Frequency of the clock input
+ - reg-offset : A value of 3 is required
+ - reg-shift : A value of 2 is required
+
+ vii) Xilinx USB Host controller
+
+ The Xilinx USB host controller is EHCI compatible but with a different
+ base address for the EHCI registers, and it is always a big-endian
+ USB Host controller. The hardware can be configured as high speed only,
+ or high speed/full speed hybrid.
+
+ Required properties:
+ - xlnx,support-usb-fs: A value 0 means the core is built as high speed
+ only. A value 1 means the core also supports
+ full speed devices.
+
diff --git a/Documentation/devicetree/bindings/xillybus/xillybus.txt b/Documentation/devicetree/bindings/xillybus/xillybus.txt
new file mode 100644
index 000000000..9e316dc2e
--- /dev/null
+++ b/Documentation/devicetree/bindings/xillybus/xillybus.txt
@@ -0,0 +1,20 @@
+* Xillybus driver for generic FPGA interface
+
+Required properties:
+- compatible: Should be "xillybus,xillybus-1.00.a"
+- reg: Address and length of the register set for the device
+- interrupts: Contains one interrupt node, typically consisting of three cells.
+- interrupt-parent: the phandle for the interrupt controller that
+ services interrupts for this device.
+
+Optional properties:
+- dma-coherent: Present if DMA operations are coherent
+
+Example:
+
+ xillybus@ff200400 {
+ compatible = "xillybus,xillybus-1.00.a";
+ reg = < 0xff200400 0x00000080 >;
+ interrupts = < 0 40 1 >;
+ interrupt-parent = <&intc>;
+ } ;
diff --git a/Documentation/devicetree/booting-without-of.txt b/Documentation/devicetree/booting-without-of.txt
new file mode 100644
index 000000000..e49e42326
--- /dev/null
+++ b/Documentation/devicetree/booting-without-of.txt
@@ -0,0 +1,1529 @@
+ Booting the Linux/ppc kernel without Open Firmware
+ --------------------------------------------------
+
+(c) 2005 Benjamin Herrenschmidt <benh at kernel.crashing.org>,
+ IBM Corp.
+(c) 2005 Becky Bruce <becky.bruce at freescale.com>,
+ Freescale Semiconductor, FSL SOC and 32-bit additions
+(c) 2006 MontaVista Software, Inc.
+ Flash chip node definition
+
+Table of Contents
+=================
+
+ I - Introduction
+ 1) Entry point for arch/arm
+ 2) Entry point for arch/powerpc
+ 3) Entry point for arch/x86
+ 4) Entry point for arch/mips/bmips
+
+ II - The DT block format
+ 1) Header
+ 2) Device tree generalities
+ 3) Device tree "structure" block
+ 4) Device tree "strings" block
+
+ III - Required content of the device tree
+ 1) Note about cells and address representation
+ 2) Note about "compatible" properties
+ 3) Note about "name" properties
+ 4) Note about node and property names and character set
+ 5) Required nodes and properties
+ a) The root node
+ b) The /cpus node
+ c) The /cpus/* nodes
+ d) the /memory node(s)
+ e) The /chosen node
+ f) the /soc<SOCname> node
+
+ IV - "dtc", the device tree compiler
+
+ V - Recommendations for a bootloader
+
+ VI - System-on-a-chip devices and nodes
+ 1) Defining child nodes of an SOC
+ 2) Representing devices without a current OF specification
+
+ VII - Specifying interrupt information for devices
+ 1) interrupts property
+ 2) interrupt-parent property
+ 3) OpenPIC Interrupt Controllers
+ 4) ISA Interrupt Controllers
+
+ VIII - Specifying device power management information (sleep property)
+
+ IX - Specifying dma bus information
+
+ Appendix A - Sample SOC node for MPC8540
+
+
+Revision Information
+====================
+
+ May 18, 2005: Rev 0.1 - Initial draft, no chapter III yet.
+
+ May 19, 2005: Rev 0.2 - Add chapter III and bits & pieces here or
+ clarifies the fact that a lot of things are
+ optional, the kernel only requires a very
+ small device tree, though it is encouraged
+ to provide an as complete one as possible.
+
+ May 24, 2005: Rev 0.3 - Precise that DT block has to be in RAM
+ - Misc fixes
+ - Define version 3 and new format version 16
+ for the DT block (version 16 needs kernel
+ patches, will be fwd separately).
+ String block now has a size, and full path
+ is replaced by unit name for more
+ compactness.
+ linux,phandle is made optional, only nodes
+ that are referenced by other nodes need it.
+ "name" property is now automatically
+ deduced from the unit name
+
+ June 1, 2005: Rev 0.4 - Correct confusion between OF_DT_END and
+ OF_DT_END_NODE in structure definition.
+ - Change version 16 format to always align
+ property data to 4 bytes. Since tokens are
+ already aligned, that means no specific
+ required alignment between property size
+ and property data. The old style variable
+ alignment would make it impossible to do
+ "simple" insertion of properties using
+ memmove (thanks Milton for
+ noticing). Updated kernel patch as well
+ - Correct a few more alignment constraints
+ - Add a chapter about the device-tree
+ compiler and the textural representation of
+ the tree that can be "compiled" by dtc.
+
+ November 21, 2005: Rev 0.5
+ - Additions/generalizations for 32-bit
+ - Changed to reflect the new arch/powerpc
+ structure
+ - Added chapter VI
+
+
+ ToDo:
+ - Add some definitions of interrupt tree (simple/complex)
+ - Add some definitions for PCI host bridges
+ - Add some common address format examples
+ - Add definitions for standard properties and "compatible"
+ names for cells that are not already defined by the existing
+ OF spec.
+ - Compare FSL SOC use of PCI to standard and make sure no new
+ node definition required.
+ - Add more information about node definitions for SOC devices
+ that currently have no standard, like the FSL CPM.
+
+
+I - Introduction
+================
+
+During the development of the Linux/ppc64 kernel, and more
+specifically, the addition of new platform types outside of the old
+IBM pSeries/iSeries pair, it was decided to enforce some strict rules
+regarding the kernel entry and bootloader <-> kernel interfaces, in
+order to avoid the degeneration that had become the ppc32 kernel entry
+point and the way a new platform should be added to the kernel. The
+legacy iSeries platform breaks those rules as it predates this scheme,
+but no new board support will be accepted in the main tree that
+doesn't follow them properly. In addition, since the advent of the
+arch/powerpc merged architecture for ppc32 and ppc64, new 32-bit
+platforms and 32-bit platforms which move into arch/powerpc will be
+required to use these rules as well.
+
+The main requirement that will be defined in more detail below is
+the presence of a device-tree whose format is defined after Open
+Firmware specification. However, in order to make life easier
+to embedded board vendors, the kernel doesn't require the device-tree
+to represent every device in the system and only requires some nodes
+and properties to be present. This will be described in detail in
+section III, but, for example, the kernel does not require you to
+create a node for every PCI device in the system. It is a requirement
+to have a node for PCI host bridges in order to provide interrupt
+routing information and memory/IO ranges, among others. It is also
+recommended to define nodes for on chip devices and other buses that
+don't specifically fit in an existing OF specification. This creates a
+great flexibility in the way the kernel can then probe those and match
+drivers to device, without having to hard code all sorts of tables. It
+also makes it more flexible for board vendors to do minor hardware
+upgrades without significantly impacting the kernel code or cluttering
+it with special cases.
+
+
+1) Entry point for arch/arm
+---------------------------
+
+ There is one single entry point to the kernel, at the start
+ of the kernel image. That entry point supports two calling
+ conventions. A summary of the interface is described here. A full
+ description of the boot requirements is documented in
+ Documentation/arm/Booting
+
+ a) ATAGS interface. Minimal information is passed from firmware
+ to the kernel with a tagged list of predefined parameters.
+
+ r0 : 0
+
+ r1 : Machine type number
+
+ r2 : Physical address of tagged list in system RAM
+
+ b) Entry with a flattened device-tree block. Firmware loads the
+ physical address of the flattened device tree block (dtb) into r2,
+ r1 is not used, but it is considered good practice to use a valid
+ machine number as described in Documentation/arm/Booting.
+
+ r0 : 0
+
+ r1 : Valid machine type number. When using a device tree,
+ a single machine type number will often be assigned to
+ represent a class or family of SoCs.
+
+ r2 : physical pointer to the device-tree block
+ (defined in chapter II) in RAM. Device tree can be located
+ anywhere in system RAM, but it should be aligned on a 64 bit
+ boundary.
+
+ The kernel will differentiate between ATAGS and device tree booting by
+ reading the memory pointed to by r2 and looking for either the flattened
+ device tree block magic value (0xd00dfeed) or the ATAG_CORE value at
+ offset 0x4 from r2 (0x54410001).
+
+2) Entry point for arch/powerpc
+-------------------------------
+
+ There is one single entry point to the kernel, at the start
+ of the kernel image. That entry point supports two calling
+ conventions:
+
+ a) Boot from Open Firmware. If your firmware is compatible
+ with Open Firmware (IEEE 1275) or provides an OF compatible
+ client interface API (support for "interpret" callback of
+ forth words isn't required), you can enter the kernel with:
+
+ r5 : OF callback pointer as defined by IEEE 1275
+ bindings to powerpc. Only the 32-bit client interface
+ is currently supported
+
+ r3, r4 : address & length of an initrd if any or 0
+
+ The MMU is either on or off; the kernel will run the
+ trampoline located in arch/powerpc/kernel/prom_init.c to
+ extract the device-tree and other information from open
+ firmware and build a flattened device-tree as described
+ in b). prom_init() will then re-enter the kernel using
+ the second method. This trampoline code runs in the
+ context of the firmware, which is supposed to handle all
+ exceptions during that time.
+
+ b) Direct entry with a flattened device-tree block. This entry
+ point is called by a) after the OF trampoline and can also be
+ called directly by a bootloader that does not support the Open
+ Firmware client interface. It is also used by "kexec" to
+ implement "hot" booting of a new kernel from a previous
+ running one. This method is what I will describe in more
+ details in this document, as method a) is simply standard Open
+ Firmware, and thus should be implemented according to the
+ various standard documents defining it and its binding to the
+ PowerPC platform. The entry point definition then becomes:
+
+ r3 : physical pointer to the device-tree block
+ (defined in chapter II) in RAM
+
+ r4 : physical pointer to the kernel itself. This is
+ used by the assembly code to properly disable the MMU
+ in case you are entering the kernel with MMU enabled
+ and a non-1:1 mapping.
+
+ r5 : NULL (as to differentiate with method a)
+
+ Note about SMP entry: Either your firmware puts your other
+ CPUs in some sleep loop or spin loop in ROM where you can get
+ them out via a soft reset or some other means, in which case
+ you don't need to care, or you'll have to enter the kernel
+ with all CPUs. The way to do that with method b) will be
+ described in a later revision of this document.
+
+ Board supports (platforms) are not exclusive config options. An
+ arbitrary set of board supports can be built in a single kernel
+ image. The kernel will "know" what set of functions to use for a
+ given platform based on the content of the device-tree. Thus, you
+ should:
+
+ a) add your platform support as a _boolean_ option in
+ arch/powerpc/Kconfig, following the example of PPC_PSERIES,
+ PPC_PMAC and PPC_MAPLE. The later is probably a good
+ example of a board support to start from.
+
+ b) create your main platform file as
+ "arch/powerpc/platforms/myplatform/myboard_setup.c" and add it
+ to the Makefile under the condition of your CONFIG_
+ option. This file will define a structure of type "ppc_md"
+ containing the various callbacks that the generic code will
+ use to get to your platform specific code
+
+ A kernel image may support multiple platforms, but only if the
+ platforms feature the same core architecture. A single kernel build
+ cannot support both configurations with Book E and configurations
+ with classic Powerpc architectures.
+
+3) Entry point for arch/x86
+-------------------------------
+
+ There is one single 32bit entry point to the kernel at code32_start,
+ the decompressor (the real mode entry point goes to the same 32bit
+ entry point once it switched into protected mode). That entry point
+ supports one calling convention which is documented in
+ Documentation/x86/boot.txt
+ The physical pointer to the device-tree block (defined in chapter II)
+ is passed via setup_data which requires at least boot protocol 2.09.
+ The type filed is defined as
+
+ #define SETUP_DTB 2
+
+ This device-tree is used as an extension to the "boot page". As such it
+ does not parse / consider data which is already covered by the boot
+ page. This includes memory size, reserved ranges, command line arguments
+ or initrd address. It simply holds information which can not be retrieved
+ otherwise like interrupt routing or a list of devices behind an I2C bus.
+
+4) Entry point for arch/mips/bmips
+----------------------------------
+
+ Some bootloaders only support a single entry point, at the start of the
+ kernel image. Other bootloaders will jump to the ELF start address.
+ Both schemes are supported; CONFIG_BOOT_RAW=y and CONFIG_NO_EXCEPT_FILL=y,
+ so the first instruction immediately jumps to kernel_entry().
+
+ Similar to the arch/arm case (b), a DT-aware bootloader is expected to
+ set up the following registers:
+
+ a0 : 0
+
+ a1 : 0xffffffff
+
+ a2 : Physical pointer to the device tree block (defined in chapter
+ II) in RAM. The device tree can be located anywhere in the first
+ 512MB of the physical address space (0x00000000 - 0x1fffffff),
+ aligned on a 64 bit boundary.
+
+ Legacy bootloaders do not use this convention, and they do not pass in a
+ DT block. In this case, Linux will look for a builtin DTB, selected via
+ CONFIG_DT_*.
+
+ This convention is defined for 32-bit systems only, as there are not
+ currently any 64-bit BMIPS implementations.
+
+II - The DT block format
+========================
+
+
+This chapter defines the actual format of the flattened device-tree
+passed to the kernel. The actual content of it and kernel requirements
+are described later. You can find example of code manipulating that
+format in various places, including arch/powerpc/kernel/prom_init.c
+which will generate a flattened device-tree from the Open Firmware
+representation, or the fs2dt utility which is part of the kexec tools
+which will generate one from a filesystem representation. It is
+expected that a bootloader like uboot provides a bit more support,
+that will be discussed later as well.
+
+Note: The block has to be in main memory. It has to be accessible in
+both real mode and virtual mode with no mapping other than main
+memory. If you are writing a simple flash bootloader, it should copy
+the block to RAM before passing it to the kernel.
+
+
+1) Header
+---------
+
+ The kernel is passed the physical address pointing to an area of memory
+ that is roughly described in include/linux/of_fdt.h by the structure
+ boot_param_header:
+
+struct boot_param_header {
+ u32 magic; /* magic word OF_DT_HEADER */
+ u32 totalsize; /* total size of DT block */
+ u32 off_dt_struct; /* offset to structure */
+ u32 off_dt_strings; /* offset to strings */
+ u32 off_mem_rsvmap; /* offset to memory reserve map
+ */
+ u32 version; /* format version */
+ u32 last_comp_version; /* last compatible version */
+
+ /* version 2 fields below */
+ u32 boot_cpuid_phys; /* Which physical CPU id we're
+ booting on */
+ /* version 3 fields below */
+ u32 size_dt_strings; /* size of the strings block */
+
+ /* version 17 fields below */
+ u32 size_dt_struct; /* size of the DT structure block */
+};
+
+ Along with the constants:
+
+/* Definitions used by the flattened device tree */
+#define OF_DT_HEADER 0xd00dfeed /* 4: version,
+ 4: total size */
+#define OF_DT_BEGIN_NODE 0x1 /* Start node: full name
+ */
+#define OF_DT_END_NODE 0x2 /* End node */
+#define OF_DT_PROP 0x3 /* Property: name off,
+ size, content */
+#define OF_DT_END 0x9
+
+ All values in this header are in big endian format, the various
+ fields in this header are defined more precisely below. All
+ "offset" values are in bytes from the start of the header; that is
+ from the physical base address of the device tree block.
+
+ - magic
+
+ This is a magic value that "marks" the beginning of the
+ device-tree block header. It contains the value 0xd00dfeed and is
+ defined by the constant OF_DT_HEADER
+
+ - totalsize
+
+ This is the total size of the DT block including the header. The
+ "DT" block should enclose all data structures defined in this
+ chapter (who are pointed to by offsets in this header). That is,
+ the device-tree structure, strings, and the memory reserve map.
+
+ - off_dt_struct
+
+ This is an offset from the beginning of the header to the start
+ of the "structure" part the device tree. (see 2) device tree)
+
+ - off_dt_strings
+
+ This is an offset from the beginning of the header to the start
+ of the "strings" part of the device-tree
+
+ - off_mem_rsvmap
+
+ This is an offset from the beginning of the header to the start
+ of the reserved memory map. This map is a list of pairs of 64-
+ bit integers. Each pair is a physical address and a size. The
+ list is terminated by an entry of size 0. This map provides the
+ kernel with a list of physical memory areas that are "reserved"
+ and thus not to be used for memory allocations, especially during
+ early initialization. The kernel needs to allocate memory during
+ boot for things like un-flattening the device-tree, allocating an
+ MMU hash table, etc... Those allocations must be done in such a
+ way to avoid overriding critical things like, on Open Firmware
+ capable machines, the RTAS instance, or on some pSeries, the TCE
+ tables used for the iommu. Typically, the reserve map should
+ contain _at least_ this DT block itself (header,total_size). If
+ you are passing an initrd to the kernel, you should reserve it as
+ well. You do not need to reserve the kernel image itself. The map
+ should be 64-bit aligned.
+
+ - version
+
+ This is the version of this structure. Version 1 stops
+ here. Version 2 adds an additional field boot_cpuid_phys.
+ Version 3 adds the size of the strings block, allowing the kernel
+ to reallocate it easily at boot and free up the unused flattened
+ structure after expansion. Version 16 introduces a new more
+ "compact" format for the tree itself that is however not backward
+ compatible. Version 17 adds an additional field, size_dt_struct,
+ allowing it to be reallocated or moved more easily (this is
+ particularly useful for bootloaders which need to make
+ adjustments to a device tree based on probed information). You
+ should always generate a structure of the highest version defined
+ at the time of your implementation. Currently that is version 17,
+ unless you explicitly aim at being backward compatible.
+
+ - last_comp_version
+
+ Last compatible version. This indicates down to what version of
+ the DT block you are backward compatible. For example, version 2
+ is backward compatible with version 1 (that is, a kernel build
+ for version 1 will be able to boot with a version 2 format). You
+ should put a 1 in this field if you generate a device tree of
+ version 1 to 3, or 16 if you generate a tree of version 16 or 17
+ using the new unit name format.
+
+ - boot_cpuid_phys
+
+ This field only exist on version 2 headers. It indicate which
+ physical CPU ID is calling the kernel entry point. This is used,
+ among others, by kexec. If you are on an SMP system, this value
+ should match the content of the "reg" property of the CPU node in
+ the device-tree corresponding to the CPU calling the kernel entry
+ point (see further chapters for more information on the required
+ device-tree contents)
+
+ - size_dt_strings
+
+ This field only exists on version 3 and later headers. It
+ gives the size of the "strings" section of the device tree (which
+ starts at the offset given by off_dt_strings).
+
+ - size_dt_struct
+
+ This field only exists on version 17 and later headers. It gives
+ the size of the "structure" section of the device tree (which
+ starts at the offset given by off_dt_struct).
+
+ So the typical layout of a DT block (though the various parts don't
+ need to be in that order) looks like this (addresses go from top to
+ bottom):
+
+
+ ------------------------------
+ base -> | struct boot_param_header |
+ ------------------------------
+ | (alignment gap) (*) |
+ ------------------------------
+ | memory reserve map |
+ ------------------------------
+ | (alignment gap) |
+ ------------------------------
+ | |
+ | device-tree structure |
+ | |
+ ------------------------------
+ | (alignment gap) |
+ ------------------------------
+ | |
+ | device-tree strings |
+ | |
+ -----> ------------------------------
+ |
+ |
+ --- (base + totalsize)
+
+ (*) The alignment gaps are not necessarily present; their presence
+ and size are dependent on the various alignment requirements of
+ the individual data blocks.
+
+
+2) Device tree generalities
+---------------------------
+
+This device-tree itself is separated in two different blocks, a
+structure block and a strings block. Both need to be aligned to a 4
+byte boundary.
+
+First, let's quickly describe the device-tree concept before detailing
+the storage format. This chapter does _not_ describe the detail of the
+required types of nodes & properties for the kernel, this is done
+later in chapter III.
+
+The device-tree layout is strongly inherited from the definition of
+the Open Firmware IEEE 1275 device-tree. It's basically a tree of
+nodes, each node having two or more named properties. A property can
+have a value or not.
+
+It is a tree, so each node has one and only one parent except for the
+root node who has no parent.
+
+A node has 2 names. The actual node name is generally contained in a
+property of type "name" in the node property list whose value is a
+zero terminated string and is mandatory for version 1 to 3 of the
+format definition (as it is in Open Firmware). Version 16 makes it
+optional as it can generate it from the unit name defined below.
+
+There is also a "unit name" that is used to differentiate nodes with
+the same name at the same level, it is usually made of the node
+names, the "@" sign, and a "unit address", which definition is
+specific to the bus type the node sits on.
+
+The unit name doesn't exist as a property per-se but is included in
+the device-tree structure. It is typically used to represent "path" in
+the device-tree. More details about the actual format of these will be
+below.
+
+The kernel generic code does not make any formal use of the
+unit address (though some board support code may do) so the only real
+requirement here for the unit address is to ensure uniqueness of
+the node unit name at a given level of the tree. Nodes with no notion
+of address and no possible sibling of the same name (like /memory or
+/cpus) may omit the unit address in the context of this specification,
+or use the "@0" default unit address. The unit name is used to define
+a node "full path", which is the concatenation of all parent node
+unit names separated with "/".
+
+The root node doesn't have a defined name, and isn't required to have
+a name property either if you are using version 3 or earlier of the
+format. It also has no unit address (no @ symbol followed by a unit
+address). The root node unit name is thus an empty string. The full
+path to the root node is "/".
+
+Every node which actually represents an actual device (that is, a node
+which isn't only a virtual "container" for more nodes, like "/cpus"
+is) is also required to have a "compatible" property indicating the
+specific hardware and an optional list of devices it is fully
+backwards compatible with.
+
+Finally, every node that can be referenced from a property in another
+node is required to have either a "phandle" or a "linux,phandle"
+property. Real Open Firmware implementations provide a unique
+"phandle" value for every node that the "prom_init()" trampoline code
+turns into "linux,phandle" properties. However, this is made optional
+if the flattened device tree is used directly. An example of a node
+referencing another node via "phandle" is when laying out the
+interrupt tree which will be described in a further version of this
+document.
+
+The "phandle" property is a 32-bit value that uniquely
+identifies a node. You are free to use whatever values or system of
+values, internal pointers, or whatever to generate these, the only
+requirement is that every node for which you provide that property has
+a unique value for it.
+
+Here is an example of a simple device-tree. In this example, an "o"
+designates a node followed by the node unit name. Properties are
+presented with their name followed by their content. "content"
+represents an ASCII string (zero terminated) value, while <content>
+represents a 32-bit value, specified in decimal or hexadecimal (the
+latter prefixed 0x). The various nodes in this example will be
+discussed in a later chapter. At this point, it is only meant to give
+you a idea of what a device-tree looks like. I have purposefully kept
+the "name" and "linux,phandle" properties which aren't necessary in
+order to give you a better idea of what the tree looks like in
+practice.
+
+ / o device-tree
+ |- name = "device-tree"
+ |- model = "MyBoardName"
+ |- compatible = "MyBoardFamilyName"
+ |- #address-cells = <2>
+ |- #size-cells = <2>
+ |- linux,phandle = <0>
+ |
+ o cpus
+ | | - name = "cpus"
+ | | - linux,phandle = <1>
+ | | - #address-cells = <1>
+ | | - #size-cells = <0>
+ | |
+ | o PowerPC,970@0
+ | |- name = "PowerPC,970"
+ | |- device_type = "cpu"
+ | |- reg = <0>
+ | |- clock-frequency = <0x5f5e1000>
+ | |- 64-bit
+ | |- linux,phandle = <2>
+ |
+ o memory@0
+ | |- name = "memory"
+ | |- device_type = "memory"
+ | |- reg = <0x00000000 0x00000000 0x00000000 0x20000000>
+ | |- linux,phandle = <3>
+ |
+ o chosen
+ |- name = "chosen"
+ |- bootargs = "root=/dev/sda2"
+ |- linux,phandle = <4>
+
+This tree is almost a minimal tree. It pretty much contains the
+minimal set of required nodes and properties to boot a linux kernel;
+that is, some basic model information at the root, the CPUs, and the
+physical memory layout. It also includes misc information passed
+through /chosen, like in this example, the platform type (mandatory)
+and the kernel command line arguments (optional).
+
+The /cpus/PowerPC,970@0/64-bit property is an example of a
+property without a value. All other properties have a value. The
+significance of the #address-cells and #size-cells properties will be
+explained in chapter IV which defines precisely the required nodes and
+properties and their content.
+
+
+3) Device tree "structure" block
+
+The structure of the device tree is a linearized tree structure. The
+"OF_DT_BEGIN_NODE" token starts a new node, and the "OF_DT_END_NODE"
+ends that node definition. Child nodes are simply defined before
+"OF_DT_END_NODE" (that is nodes within the node). A 'token' is a 32
+bit value. The tree has to be "finished" with a OF_DT_END token
+
+Here's the basic structure of a single node:
+
+ * token OF_DT_BEGIN_NODE (that is 0x00000001)
+ * for version 1 to 3, this is the node full path as a zero
+ terminated string, starting with "/". For version 16 and later,
+ this is the node unit name only (or an empty string for the
+ root node)
+ * [align gap to next 4 bytes boundary]
+ * for each property:
+ * token OF_DT_PROP (that is 0x00000003)
+ * 32-bit value of property value size in bytes (or 0 if no
+ value)
+ * 32-bit value of offset in string block of property name
+ * property value data if any
+ * [align gap to next 4 bytes boundary]
+ * [child nodes if any]
+ * token OF_DT_END_NODE (that is 0x00000002)
+
+So the node content can be summarized as a start token, a full path,
+a list of properties, a list of child nodes, and an end token. Every
+child node is a full node structure itself as defined above.
+
+NOTE: The above definition requires that all property definitions for
+a particular node MUST precede any subnode definitions for that node.
+Although the structure would not be ambiguous if properties and
+subnodes were intermingled, the kernel parser requires that the
+properties come first (up until at least 2.6.22). Any tools
+manipulating a flattened tree must take care to preserve this
+constraint.
+
+4) Device tree "strings" block
+
+In order to save space, property names, which are generally redundant,
+are stored separately in the "strings" block. This block is simply the
+whole bunch of zero terminated strings for all property names
+concatenated together. The device-tree property definitions in the
+structure block will contain offset values from the beginning of the
+strings block.
+
+
+III - Required content of the device tree
+=========================================
+
+WARNING: All "linux,*" properties defined in this document apply only
+to a flattened device-tree. If your platform uses a real
+implementation of Open Firmware or an implementation compatible with
+the Open Firmware client interface, those properties will be created
+by the trampoline code in the kernel's prom_init() file. For example,
+that's where you'll have to add code to detect your board model and
+set the platform number. However, when using the flattened device-tree
+entry point, there is no prom_init() pass, and thus you have to
+provide those properties yourself.
+
+
+1) Note about cells and address representation
+----------------------------------------------
+
+The general rule is documented in the various Open Firmware
+documentations. If you choose to describe a bus with the device-tree
+and there exist an OF bus binding, then you should follow the
+specification. However, the kernel does not require every single
+device or bus to be described by the device tree.
+
+In general, the format of an address for a device is defined by the
+parent bus type, based on the #address-cells and #size-cells
+properties. Note that the parent's parent definitions of #address-cells
+and #size-cells are not inherited so every node with children must specify
+them. The kernel requires the root node to have those properties defining
+addresses format for devices directly mapped on the processor bus.
+
+Those 2 properties define 'cells' for representing an address and a
+size. A "cell" is a 32-bit number. For example, if both contain 2
+like the example tree given above, then an address and a size are both
+composed of 2 cells, and each is a 64-bit number (cells are
+concatenated and expected to be in big endian format). Another example
+is the way Apple firmware defines them, with 2 cells for an address
+and one cell for a size. Most 32-bit implementations should define
+#address-cells and #size-cells to 1, which represents a 32-bit value.
+Some 32-bit processors allow for physical addresses greater than 32
+bits; these processors should define #address-cells as 2.
+
+"reg" properties are always a tuple of the type "address size" where
+the number of cells of address and size is specified by the bus
+#address-cells and #size-cells. When a bus supports various address
+spaces and other flags relative to a given address allocation (like
+prefetchable, etc...) those flags are usually added to the top level
+bits of the physical address. For example, a PCI physical address is
+made of 3 cells, the bottom two containing the actual address itself
+while the top cell contains address space indication, flags, and pci
+bus & device numbers.
+
+For buses that support dynamic allocation, it's the accepted practice
+to then not provide the address in "reg" (keep it 0) though while
+providing a flag indicating the address is dynamically allocated, and
+then, to provide a separate "assigned-addresses" property that
+contains the fully allocated addresses. See the PCI OF bindings for
+details.
+
+In general, a simple bus with no address space bits and no dynamic
+allocation is preferred if it reflects your hardware, as the existing
+kernel address parsing functions will work out of the box. If you
+define a bus type with a more complex address format, including things
+like address space bits, you'll have to add a bus translator to the
+prom_parse.c file of the recent kernels for your bus type.
+
+The "reg" property only defines addresses and sizes (if #size-cells is
+non-0) within a given bus. In order to translate addresses upward
+(that is into parent bus addresses, and possibly into CPU physical
+addresses), all buses must contain a "ranges" property. If the
+"ranges" property is missing at a given level, it's assumed that
+translation isn't possible, i.e., the registers are not visible on the
+parent bus. The format of the "ranges" property for a bus is a list
+of:
+
+ bus address, parent bus address, size
+
+"bus address" is in the format of the bus this bus node is defining,
+that is, for a PCI bridge, it would be a PCI address. Thus, (bus
+address, size) defines a range of addresses for child devices. "parent
+bus address" is in the format of the parent bus of this bus. For
+example, for a PCI host controller, that would be a CPU address. For a
+PCI<->ISA bridge, that would be a PCI address. It defines the base
+address in the parent bus where the beginning of that range is mapped.
+
+For new 64-bit board support, I recommend either the 2/2 format or
+Apple's 2/1 format which is slightly more compact since sizes usually
+fit in a single 32-bit word. New 32-bit board support should use a
+1/1 format, unless the processor supports physical addresses greater
+than 32-bits, in which case a 2/1 format is recommended.
+
+Alternatively, the "ranges" property may be empty, indicating that the
+registers are visible on the parent bus using an identity mapping
+translation. In other words, the parent bus address space is the same
+as the child bus address space.
+
+2) Note about "compatible" properties
+-------------------------------------
+
+These properties are optional, but recommended in devices and the root
+node. The format of a "compatible" property is a list of concatenated
+zero terminated strings. They allow a device to express its
+compatibility with a family of similar devices, in some cases,
+allowing a single driver to match against several devices regardless
+of their actual names.
+
+3) Note about "name" properties
+-------------------------------
+
+While earlier users of Open Firmware like OldWorld macintoshes tended
+to use the actual device name for the "name" property, it's nowadays
+considered a good practice to use a name that is closer to the device
+class (often equal to device_type). For example, nowadays, Ethernet
+controllers are named "ethernet", an additional "model" property
+defining precisely the chip type/model, and "compatible" property
+defining the family in case a single driver can driver more than one
+of these chips. However, the kernel doesn't generally put any
+restriction on the "name" property; it is simply considered good
+practice to follow the standard and its evolutions as closely as
+possible.
+
+Note also that the new format version 16 makes the "name" property
+optional. If it's absent for a node, then the node's unit name is then
+used to reconstruct the name. That is, the part of the unit name
+before the "@" sign is used (or the entire unit name if no "@" sign
+is present).
+
+4) Note about node and property names and character set
+-------------------------------------------------------
+
+While Open Firmware provides more flexible usage of 8859-1, this
+specification enforces more strict rules. Nodes and properties should
+be comprised only of ASCII characters 'a' to 'z', '0' to
+'9', ',', '.', '_', '+', '#', '?', and '-'. Node names additionally
+allow uppercase characters 'A' to 'Z' (property names should be
+lowercase. The fact that vendors like Apple don't respect this rule is
+irrelevant here). Additionally, node and property names should always
+begin with a character in the range 'a' to 'z' (or 'A' to 'Z' for node
+names).
+
+The maximum number of characters for both nodes and property names
+is 31. In the case of node names, this is only the leftmost part of
+a unit name (the pure "name" property), it doesn't include the unit
+address which can extend beyond that limit.
+
+
+5) Required nodes and properties
+--------------------------------
+ These are all that are currently required. However, it is strongly
+ recommended that you expose PCI host bridges as documented in the
+ PCI binding to Open Firmware, and your interrupt tree as documented
+ in OF interrupt tree specification.
+
+ a) The root node
+
+ The root node requires some properties to be present:
+
+ - model : this is your board name/model
+ - #address-cells : address representation for "root" devices
+ - #size-cells: the size representation for "root" devices
+ - compatible : the board "family" generally finds its way here,
+ for example, if you have 2 board models with a similar layout,
+ that typically get driven by the same platform code in the
+ kernel, you would specify the exact board model in the
+ compatible property followed by an entry that represents the SoC
+ model.
+
+ The root node is also generally where you add additional properties
+ specific to your board like the serial number if any, that sort of
+ thing. It is recommended that if you add any "custom" property whose
+ name may clash with standard defined ones, you prefix them with your
+ vendor name and a comma.
+
+ b) The /cpus node
+
+ This node is the parent of all individual CPU nodes. It doesn't
+ have any specific requirements, though it's generally good practice
+ to have at least:
+
+ #address-cells = <00000001>
+ #size-cells = <00000000>
+
+ This defines that the "address" for a CPU is a single cell, and has
+ no meaningful size. This is not necessary but the kernel will assume
+ that format when reading the "reg" properties of a CPU node, see
+ below
+
+ c) The /cpus/* nodes
+
+ So under /cpus, you are supposed to create a node for every CPU on
+ the machine. There is no specific restriction on the name of the
+ CPU, though it's common to call it <architecture>,<core>. For
+ example, Apple uses PowerPC,G5 while IBM uses PowerPC,970FX.
+ However, the Generic Names convention suggests that it would be
+ better to simply use 'cpu' for each cpu node and use the compatible
+ property to identify the specific cpu core.
+
+ Required properties:
+
+ - device_type : has to be "cpu"
+ - reg : This is the physical CPU number, it's a single 32-bit cell
+ and is also used as-is as the unit number for constructing the
+ unit name in the full path. For example, with 2 CPUs, you would
+ have the full path:
+ /cpus/PowerPC,970FX@0
+ /cpus/PowerPC,970FX@1
+ (unit addresses do not require leading zeroes)
+ - d-cache-block-size : one cell, L1 data cache block size in bytes (*)
+ - i-cache-block-size : one cell, L1 instruction cache block size in
+ bytes
+ - d-cache-size : one cell, size of L1 data cache in bytes
+ - i-cache-size : one cell, size of L1 instruction cache in bytes
+
+(*) The cache "block" size is the size on which the cache management
+instructions operate. Historically, this document used the cache
+"line" size here which is incorrect. The kernel will prefer the cache
+block size and will fallback to cache line size for backward
+compatibility.
+
+ Recommended properties:
+
+ - timebase-frequency : a cell indicating the frequency of the
+ timebase in Hz. This is not directly used by the generic code,
+ but you are welcome to copy/paste the pSeries code for setting
+ the kernel timebase/decrementer calibration based on this
+ value.
+ - clock-frequency : a cell indicating the CPU core clock frequency
+ in Hz. A new property will be defined for 64-bit values, but if
+ your frequency is < 4Ghz, one cell is enough. Here as well as
+ for the above, the common code doesn't use that property, but
+ you are welcome to re-use the pSeries or Maple one. A future
+ kernel version might provide a common function for this.
+ - d-cache-line-size : one cell, L1 data cache line size in bytes
+ if different from the block size
+ - i-cache-line-size : one cell, L1 instruction cache line size in
+ bytes if different from the block size
+
+ You are welcome to add any property you find relevant to your board,
+ like some information about the mechanism used to soft-reset the
+ CPUs. For example, Apple puts the GPIO number for CPU soft reset
+ lines in there as a "soft-reset" property since they start secondary
+ CPUs by soft-resetting them.
+
+
+ d) the /memory node(s)
+
+ To define the physical memory layout of your board, you should
+ create one or more memory node(s). You can either create a single
+ node with all memory ranges in its reg property, or you can create
+ several nodes, as you wish. The unit address (@ part) used for the
+ full path is the address of the first range of memory defined by a
+ given node. If you use a single memory node, this will typically be
+ @0.
+
+ Required properties:
+
+ - device_type : has to be "memory"
+ - reg : This property contains all the physical memory ranges of
+ your board. It's a list of addresses/sizes concatenated
+ together, with the number of cells of each defined by the
+ #address-cells and #size-cells of the root node. For example,
+ with both of these properties being 2 like in the example given
+ earlier, a 970 based machine with 6Gb of RAM could typically
+ have a "reg" property here that looks like:
+
+ 00000000 00000000 00000000 80000000
+ 00000001 00000000 00000001 00000000
+
+ That is a range starting at 0 of 0x80000000 bytes and a range
+ starting at 0x100000000 and of 0x100000000 bytes. You can see
+ that there is no memory covering the IO hole between 2Gb and
+ 4Gb. Some vendors prefer splitting those ranges into smaller
+ segments, but the kernel doesn't care.
+
+ e) The /chosen node
+
+ This node is a bit "special". Normally, that's where Open Firmware
+ puts some variable environment information, like the arguments, or
+ the default input/output devices.
+
+ This specification makes a few of these mandatory, but also defines
+ some linux-specific properties that would be normally constructed by
+ the prom_init() trampoline when booting with an OF client interface,
+ but that you have to provide yourself when using the flattened format.
+
+ Recommended properties:
+
+ - bootargs : This zero-terminated string is passed as the kernel
+ command line
+ - linux,stdout-path : This is the full path to your standard
+ console device if any. Typically, if you have serial devices on
+ your board, you may want to put the full path to the one set as
+ the default console in the firmware here, for the kernel to pick
+ it up as its own default console.
+
+ Note that u-boot creates and fills in the chosen node for platforms
+ that use it.
+
+ (Note: a practice that is now obsolete was to include a property
+ under /chosen called interrupt-controller which had a phandle value
+ that pointed to the main interrupt controller)
+
+ f) the /soc<SOCname> node
+
+ This node is used to represent a system-on-a-chip (SoC) and must be
+ present if the processor is a SoC. The top-level soc node contains
+ information that is global to all devices on the SoC. The node name
+ should contain a unit address for the SoC, which is the base address
+ of the memory-mapped register set for the SoC. The name of an SoC
+ node should start with "soc", and the remainder of the name should
+ represent the part number for the soc. For example, the MPC8540's
+ soc node would be called "soc8540".
+
+ Required properties:
+
+ - ranges : Should be defined as specified in 1) to describe the
+ translation of SoC addresses for memory mapped SoC registers.
+ - bus-frequency: Contains the bus frequency for the SoC node.
+ Typically, the value of this field is filled in by the boot
+ loader.
+ - compatible : Exact model of the SoC
+
+
+ Recommended properties:
+
+ - reg : This property defines the address and size of the
+ memory-mapped registers that are used for the SOC node itself.
+ It does not include the child device registers - these will be
+ defined inside each child node. The address specified in the
+ "reg" property should match the unit address of the SOC node.
+ - #address-cells : Address representation for "soc" devices. The
+ format of this field may vary depending on whether or not the
+ device registers are memory mapped. For memory mapped
+ registers, this field represents the number of cells needed to
+ represent the address of the registers. For SOCs that do not
+ use MMIO, a special address format should be defined that
+ contains enough cells to represent the required information.
+ See 1) above for more details on defining #address-cells.
+ - #size-cells : Size representation for "soc" devices
+ - #interrupt-cells : Defines the width of cells used to represent
+ interrupts. Typically this value is <2>, which includes a
+ 32-bit number that represents the interrupt number, and a
+ 32-bit number that represents the interrupt sense and level.
+ This field is only needed if the SOC contains an interrupt
+ controller.
+
+ The SOC node may contain child nodes for each SOC device that the
+ platform uses. Nodes should not be created for devices which exist
+ on the SOC but are not used by a particular platform. See chapter VI
+ for more information on how to specify devices that are part of a SOC.
+
+ Example SOC node for the MPC8540:
+
+ soc8540@e0000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ #interrupt-cells = <2>;
+ device_type = "soc";
+ ranges = <0x00000000 0xe0000000 0x00100000>
+ reg = <0xe0000000 0x00003000>;
+ bus-frequency = <0>;
+ }
+
+
+
+IV - "dtc", the device tree compiler
+====================================
+
+
+dtc source code can be found at
+<http://git.jdl.com/gitweb/?p=dtc.git>
+
+WARNING: This version is still in early development stage; the
+resulting device-tree "blobs" have not yet been validated with the
+kernel. The current generated block lacks a useful reserve map (it will
+be fixed to generate an empty one, it's up to the bootloader to fill
+it up) among others. The error handling needs work, bugs are lurking,
+etc...
+
+dtc basically takes a device-tree in a given format and outputs a
+device-tree in another format. The currently supported formats are:
+
+ Input formats:
+ -------------
+
+ - "dtb": "blob" format, that is a flattened device-tree block
+ with
+ header all in a binary blob.
+ - "dts": "source" format. This is a text file containing a
+ "source" for a device-tree. The format is defined later in this
+ chapter.
+ - "fs" format. This is a representation equivalent to the
+ output of /proc/device-tree, that is nodes are directories and
+ properties are files
+
+ Output formats:
+ ---------------
+
+ - "dtb": "blob" format
+ - "dts": "source" format
+ - "asm": assembly language file. This is a file that can be
+ sourced by gas to generate a device-tree "blob". That file can
+ then simply be added to your Makefile. Additionally, the
+ assembly file exports some symbols that can be used.
+
+
+The syntax of the dtc tool is
+
+ dtc [-I <input-format>] [-O <output-format>]
+ [-o output-filename] [-V output_version] input_filename
+
+
+The "output_version" defines what version of the "blob" format will be
+generated. Supported versions are 1,2,3 and 16. The default is
+currently version 3 but that may change in the future to version 16.
+
+Additionally, dtc performs various sanity checks on the tree, like the
+uniqueness of linux, phandle properties, validity of strings, etc...
+
+The format of the .dts "source" file is "C" like, supports C and C++
+style comments.
+
+/ {
+}
+
+The above is the "device-tree" definition. It's the only statement
+supported currently at the toplevel.
+
+/ {
+ property1 = "string_value"; /* define a property containing a 0
+ * terminated string
+ */
+
+ property2 = <0x1234abcd>; /* define a property containing a
+ * numerical 32-bit value (hexadecimal)
+ */
+
+ property3 = <0x12345678 0x12345678 0xdeadbeef>;
+ /* define a property containing 3
+ * numerical 32-bit values (cells) in
+ * hexadecimal
+ */
+ property4 = [0x0a 0x0b 0x0c 0x0d 0xde 0xea 0xad 0xbe 0xef];
+ /* define a property whose content is
+ * an arbitrary array of bytes
+ */
+
+ childnode@address { /* define a child node named "childnode"
+ * whose unit name is "childnode at
+ * address"
+ */
+
+ childprop = "hello\n"; /* define a property "childprop" of
+ * childnode (in this case, a string)
+ */
+ };
+};
+
+Nodes can contain other nodes etc... thus defining the hierarchical
+structure of the tree.
+
+Strings support common escape sequences from C: "\n", "\t", "\r",
+"\(octal value)", "\x(hex value)".
+
+It is also suggested that you pipe your source file through cpp (gcc
+preprocessor) so you can use #include's, #define for constants, etc...
+
+Finally, various options are planned but not yet implemented, like
+automatic generation of phandles, labels (exported to the asm file so
+you can point to a property content and change it easily from whatever
+you link the device-tree with), label or path instead of numeric value
+in some cells to "point" to a node (replaced by a phandle at compile
+time), export of reserve map address to the asm file, ability to
+specify reserve map content at compile time, etc...
+
+We may provide a .h include file with common definitions of that
+proves useful for some properties (like building PCI properties or
+interrupt maps) though it may be better to add a notion of struct
+definitions to the compiler...
+
+
+V - Recommendations for a bootloader
+====================================
+
+
+Here are some various ideas/recommendations that have been proposed
+while all this has been defined and implemented.
+
+ - The bootloader may want to be able to use the device-tree itself
+ and may want to manipulate it (to add/edit some properties,
+ like physical memory size or kernel arguments). At this point, 2
+ choices can be made. Either the bootloader works directly on the
+ flattened format, or the bootloader has its own internal tree
+ representation with pointers (similar to the kernel one) and
+ re-flattens the tree when booting the kernel. The former is a bit
+ more difficult to edit/modify, the later requires probably a bit
+ more code to handle the tree structure. Note that the structure
+ format has been designed so it's relatively easy to "insert"
+ properties or nodes or delete them by just memmoving things
+ around. It contains no internal offsets or pointers for this
+ purpose.
+
+ - An example of code for iterating nodes & retrieving properties
+ directly from the flattened tree format can be found in the kernel
+ file drivers/of/fdt.c. Look at the of_scan_flat_dt() function,
+ its usage in early_init_devtree(), and the corresponding various
+ early_init_dt_scan_*() callbacks. That code can be re-used in a
+ GPL bootloader, and as the author of that code, I would be happy
+ to discuss possible free licensing to any vendor who wishes to
+ integrate all or part of this code into a non-GPL bootloader.
+ (reference needed; who is 'I' here? ---gcl Jan 31, 2011)
+
+
+
+VI - System-on-a-chip devices and nodes
+=======================================
+
+Many companies are now starting to develop system-on-a-chip
+processors, where the processor core (CPU) and many peripheral devices
+exist on a single piece of silicon. For these SOCs, an SOC node
+should be used that defines child nodes for the devices that make
+up the SOC. While platforms are not required to use this model in
+order to boot the kernel, it is highly encouraged that all SOC
+implementations define as complete a flat-device-tree as possible to
+describe the devices on the SOC. This will allow for the
+genericization of much of the kernel code.
+
+
+1) Defining child nodes of an SOC
+---------------------------------
+
+Each device that is part of an SOC may have its own node entry inside
+the SOC node. For each device that is included in the SOC, the unit
+address property represents the address offset for this device's
+memory-mapped registers in the parent's address space. The parent's
+address space is defined by the "ranges" property in the top-level soc
+node. The "reg" property for each node that exists directly under the
+SOC node should contain the address mapping from the child address space
+to the parent SOC address space and the size of the device's
+memory-mapped register file.
+
+For many devices that may exist inside an SOC, there are predefined
+specifications for the format of the device tree node. All SOC child
+nodes should follow these specifications, except where noted in this
+document.
+
+See appendix A for an example partial SOC node definition for the
+MPC8540.
+
+
+2) Representing devices without a current OF specification
+----------------------------------------------------------
+
+Currently, there are many devices on SoCs that do not have a standard
+representation defined as part of the Open Firmware specifications,
+mainly because the boards that contain these SoCs are not currently
+booted using Open Firmware. Binding documentation for new devices
+should be added to the Documentation/devicetree/bindings directory.
+That directory will expand as device tree support is added to more and
+more SoCs.
+
+
+VII - Specifying interrupt information for devices
+===================================================
+
+The device tree represents the buses and devices of a hardware
+system in a form similar to the physical bus topology of the
+hardware.
+
+In addition, a logical 'interrupt tree' exists which represents the
+hierarchy and routing of interrupts in the hardware.
+
+The interrupt tree model is fully described in the
+document "Open Firmware Recommended Practice: Interrupt
+Mapping Version 0.9". The document is available at:
+<http://www.openfirmware.org/ofwg/practice/>
+
+1) interrupts property
+----------------------
+
+Devices that generate interrupts to a single interrupt controller
+should use the conventional OF representation described in the
+OF interrupt mapping documentation.
+
+Each device which generates interrupts must have an 'interrupt'
+property. The interrupt property value is an arbitrary number of
+of 'interrupt specifier' values which describe the interrupt or
+interrupts for the device.
+
+The encoding of an interrupt specifier is determined by the
+interrupt domain in which the device is located in the
+interrupt tree. The root of an interrupt domain specifies in
+its #interrupt-cells property the number of 32-bit cells
+required to encode an interrupt specifier. See the OF interrupt
+mapping documentation for a detailed description of domains.
+
+For example, the binding for the OpenPIC interrupt controller
+specifies an #interrupt-cells value of 2 to encode the interrupt
+number and level/sense information. All interrupt children in an
+OpenPIC interrupt domain use 2 cells per interrupt in their interrupts
+property.
+
+The PCI bus binding specifies a #interrupt-cell value of 1 to encode
+which interrupt pin (INTA,INTB,INTC,INTD) is used.
+
+2) interrupt-parent property
+----------------------------
+
+The interrupt-parent property is specified to define an explicit
+link between a device node and its interrupt parent in
+the interrupt tree. The value of interrupt-parent is the
+phandle of the parent node.
+
+If the interrupt-parent property is not defined for a node, its
+interrupt parent is assumed to be an ancestor in the node's
+_device tree_ hierarchy.
+
+3) OpenPIC Interrupt Controllers
+--------------------------------
+
+OpenPIC interrupt controllers require 2 cells to encode
+interrupt information. The first cell defines the interrupt
+number. The second cell defines the sense and level
+information.
+
+Sense and level information should be encoded as follows:
+
+ 0 = low to high edge sensitive type enabled
+ 1 = active low level sensitive type enabled
+ 2 = active high level sensitive type enabled
+ 3 = high to low edge sensitive type enabled
+
+4) ISA Interrupt Controllers
+----------------------------
+
+ISA PIC interrupt controllers require 2 cells to encode
+interrupt information. The first cell defines the interrupt
+number. The second cell defines the sense and level
+information.
+
+ISA PIC interrupt controllers should adhere to the ISA PIC
+encodings listed below:
+
+ 0 = active low level sensitive type enabled
+ 1 = active high level sensitive type enabled
+ 2 = high to low edge sensitive type enabled
+ 3 = low to high edge sensitive type enabled
+
+VIII - Specifying Device Power Management Information (sleep property)
+===================================================================
+
+Devices on SOCs often have mechanisms for placing devices into low-power
+states that are decoupled from the devices' own register blocks. Sometimes,
+this information is more complicated than a cell-index property can
+reasonably describe. Thus, each device controlled in such a manner
+may contain a "sleep" property which describes these connections.
+
+The sleep property consists of one or more sleep resources, each of
+which consists of a phandle to a sleep controller, followed by a
+controller-specific sleep specifier of zero or more cells.
+
+The semantics of what type of low power modes are possible are defined
+by the sleep controller. Some examples of the types of low power modes
+that may be supported are:
+
+ - Dynamic: The device may be disabled or enabled at any time.
+ - System Suspend: The device may request to be disabled or remain
+ awake during system suspend, but will not be disabled until then.
+ - Permanent: The device is disabled permanently (until the next hard
+ reset).
+
+Some devices may share a clock domain with each other, such that they should
+only be suspended when none of the devices are in use. Where reasonable,
+such nodes should be placed on a virtual bus, where the bus has the sleep
+property. If the clock domain is shared among devices that cannot be
+reasonably grouped in this manner, then create a virtual sleep controller
+(similar to an interrupt nexus, except that defining a standardized
+sleep-map should wait until its necessity is demonstrated).
+
+IX - Specifying dma bus information
+
+Some devices may have DMA memory range shifted relatively to the beginning of
+RAM, or even placed outside of kernel RAM. For example, the Keystone 2 SoC
+worked in LPAE mode with 4G memory has:
+- RAM range: [0x8 0000 0000, 0x8 FFFF FFFF]
+- DMA range: [ 0x8000 0000, 0xFFFF FFFF]
+and DMA range is aliased into first 2G of RAM in HW.
+
+In such cases, DMA addresses translation should be performed between CPU phys
+and DMA addresses. The "dma-ranges" property is intended to be used
+for describing the configuration of such system in DT.
+
+In addition, each DMA master device on the DMA bus may or may not support
+coherent DMA operations. The "dma-coherent" property is intended to be used
+for identifying devices supported coherent DMA operations in DT.
+
+* DMA Bus master
+Optional property:
+- dma-ranges: <prop-encoded-array> encoded as arbitrary number of triplets of
+ (child-bus-address, parent-bus-address, length). Each triplet specified
+ describes a contiguous DMA address range.
+ The dma-ranges property is used to describe the direct memory access (DMA)
+ structure of a memory-mapped bus whose device tree parent can be accessed
+ from DMA operations originating from the bus. It provides a means of
+ defining a mapping or translation between the physical address space of
+ the bus and the physical address space of the parent of the bus.
+ (for more information see ePAPR specification)
+
+* DMA Bus child
+Optional property:
+- dma-ranges: <empty> value. if present - It means that DMA addresses
+ translation has to be enabled for this device.
+- dma-coherent: Present if dma operations are coherent
+
+Example:
+soc {
+ compatible = "ti,keystone","simple-bus";
+ ranges = <0x0 0x0 0x0 0xc0000000>;
+ dma-ranges = <0x80000000 0x8 0x00000000 0x80000000>;
+
+ [...]
+
+ usb: usb@2680000 {
+ compatible = "ti,keystone-dwc3";
+
+ [...]
+ dma-coherent;
+ };
+};
+
+Appendix A - Sample SOC node for MPC8540
+========================================
+
+ soc@e0000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc8540-ccsr", "simple-bus";
+ device_type = "soc";
+ ranges = <0x00000000 0xe0000000 0x00100000>
+ bus-frequency = <0>;
+ interrupt-parent = <&pic>;
+
+ ethernet@24000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ device_type = "network";
+ model = "TSEC";
+ compatible = "gianfar", "simple-bus";
+ reg = <0x24000 0x1000>;
+ local-mac-address = [ 0x00 0xE0 0x0C 0x00 0x73 0x00 ];
+ interrupts = <0x29 2 0x30 2 0x34 2>;
+ phy-handle = <&phy0>;
+ sleep = <&pmc 0x00000080>;
+ ranges;
+
+ mdio@24520 {
+ reg = <0x24520 0x20>;
+ compatible = "fsl,gianfar-mdio";
+
+ phy0: ethernet-phy@0 {
+ interrupts = <5 1>;
+ reg = <0>;
+ };
+
+ phy1: ethernet-phy@1 {
+ interrupts = <5 1>;
+ reg = <1>;
+ };
+
+ phy3: ethernet-phy@3 {
+ interrupts = <7 1>;
+ reg = <3>;
+ };
+ };
+ };
+
+ ethernet@25000 {
+ device_type = "network";
+ model = "TSEC";
+ compatible = "gianfar";
+ reg = <0x25000 0x1000>;
+ local-mac-address = [ 0x00 0xE0 0x0C 0x00 0x73 0x01 ];
+ interrupts = <0x13 2 0x14 2 0x18 2>;
+ phy-handle = <&phy1>;
+ sleep = <&pmc 0x00000040>;
+ };
+
+ ethernet@26000 {
+ device_type = "network";
+ model = "FEC";
+ compatible = "gianfar";
+ reg = <0x26000 0x1000>;
+ local-mac-address = [ 0x00 0xE0 0x0C 0x00 0x73 0x02 ];
+ interrupts = <0x41 2>;
+ phy-handle = <&phy3>;
+ sleep = <&pmc 0x00000020>;
+ };
+
+ serial@4500 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "fsl,mpc8540-duart", "simple-bus";
+ sleep = <&pmc 0x00000002>;
+ ranges;
+
+ serial@4500 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4500 0x100>;
+ clock-frequency = <0>;
+ interrupts = <0x42 2>;
+ };
+
+ serial@4600 {
+ device_type = "serial";
+ compatible = "ns16550";
+ reg = <0x4600 0x100>;
+ clock-frequency = <0>;
+ interrupts = <0x42 2>;
+ };
+ };
+
+ pic: pic@40000 {
+ interrupt-controller;
+ #address-cells = <0>;
+ #interrupt-cells = <2>;
+ reg = <0x40000 0x40000>;
+ compatible = "chrp,open-pic";
+ device_type = "open-pic";
+ };
+
+ i2c@3000 {
+ interrupts = <0x43 2>;
+ reg = <0x3000 0x100>;
+ compatible = "fsl-i2c";
+ dfsrr;
+ sleep = <&pmc 0x00000004>;
+ };
+
+ pmc: power@e0070 {
+ compatible = "fsl,mpc8540-pmc", "fsl,mpc8548-pmc";
+ reg = <0xe0070 0x20>;
+ };
+ };
diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt
new file mode 100644
index 000000000..935ba5acc
--- /dev/null
+++ b/Documentation/devicetree/changesets.txt
@@ -0,0 +1,40 @@
+A DT changeset is a method which allows one to apply changes
+in the live tree in such a way that either the full set of changes
+will be applied, or none of them will be. If an error occurs partway
+through applying the changeset, then the tree will be rolled back to the
+previous state. A changeset can also be removed after it has been
+applied.
+
+When a changeset is applied, all of the changes get applied to the tree
+at once before emitting OF_RECONFIG notifiers. This is so that the
+receiver sees a complete and consistent state of the tree when it
+receives the notifier.
+
+The sequence of a changeset is as follows.
+
+1. of_changeset_init() - initializes a changeset
+
+2. A number of DT tree change calls, of_changeset_attach_node(),
+of_changeset_detach_node(), of_changeset_add_property(),
+of_changeset_remove_property, of_changeset_update_property() to prepare
+a set of changes. No changes to the active tree are made at this point.
+All the change operations are recorded in the of_changeset 'entries'
+list.
+
+3. mutex_lock(of_mutex) - starts a changeset; The global of_mutex
+ensures there can only be one editor at a time.
+
+4. of_changeset_apply() - Apply the changes to the tree. Either the
+entire changeset will get applied, or if there is an error the tree will
+be restored to the previous state
+
+5. mutex_unlock(of_mutex) - All operations complete, release the mutex
+
+If a successfully applied changeset needs to be removed, it can be done
+with the following sequence.
+
+1. mutex_lock(of_mutex)
+
+2. of_changeset_revert()
+
+3. mutex_unlock(of_mutex)
diff --git a/Documentation/devicetree/dynamic-resolution-notes.txt b/Documentation/devicetree/dynamic-resolution-notes.txt
new file mode 100644
index 000000000..083d23262
--- /dev/null
+++ b/Documentation/devicetree/dynamic-resolution-notes.txt
@@ -0,0 +1,25 @@
+Device Tree Dynamic Resolver Notes
+----------------------------------
+
+This document describes the implementation of the in-kernel
+Device Tree resolver, residing in drivers/of/resolver.c and is a
+companion document to Documentation/devicetree/dt-object-internal.txt[1]
+
+How the resolver works
+----------------------
+
+The resolver is given as an input an arbitrary tree compiled with the
+proper dtc option and having a /plugin/ tag. This generates the
+appropriate __fixups__ & __local_fixups__ nodes as described in [1].
+
+In sequence the resolver works by the following steps:
+
+1. Get the maximum device tree phandle value from the live tree + 1.
+2. Adjust all the local phandles of the tree to resolve by that amount.
+3. Using the __local__fixups__ node information adjust all local references
+ by the same amount.
+4. For each property in the __fixups__ node locate the node it references
+ in the live tree. This is the label used to tag the node.
+5. Retrieve the phandle of the target of the fixup.
+6. For each fixup in the property locate the node:property:offset location
+ and replace it with the phandle value.
diff --git a/Documentation/devicetree/of_unittest.txt b/Documentation/devicetree/of_unittest.txt
new file mode 100644
index 000000000..3e4e7d48a
--- /dev/null
+++ b/Documentation/devicetree/of_unittest.txt
@@ -0,0 +1,197 @@
+Open Firmware Device Tree Unittest
+----------------------------------
+
+Author: Gaurav Minocha <gaurav.minocha.os@gmail.com>
+
+1. Introduction
+
+This document explains how the test data required for executing OF unittest
+is attached to the live tree dynamically, independent of the machine's
+architecture.
+
+It is recommended to read the following documents before moving ahead.
+
+[1] Documentation/devicetree/usage-model.txt
+[2] http://www.devicetree.org/Device_Tree_Usage
+
+OF Selftest has been designed to test the interface (include/linux/of.h)
+provided to device driver developers to fetch the device information..etc.
+from the unflattened device tree data structure. This interface is used by
+most of the device drivers in various use cases.
+
+
+2. Test-data
+
+The Device Tree Source file (drivers/of/unittest-data/testcases.dts) contains
+the test data required for executing the unit tests automated in
+drivers/of/unittest.c. Currently, following Device Tree Source Include files
+(.dtsi) are included in testcases.dts:
+
+drivers/of/unittest-data/tests-interrupts.dtsi
+drivers/of/unittest-data/tests-platform.dtsi
+drivers/of/unittest-data/tests-phandle.dtsi
+drivers/of/unittest-data/tests-match.dtsi
+
+When the kernel is build with OF_SELFTEST enabled, then the following make rule
+
+$(obj)/%.dtb: $(src)/%.dts FORCE
+ $(call if_changed_dep, dtc)
+
+is used to compile the DT source file (testcases.dts) into a binary blob
+(testcases.dtb), also referred as flattened DT.
+
+After that, using the following rule the binary blob above is wrapped as an
+assembly file (testcases.dtb.S).
+
+$(obj)/%.dtb.S: $(obj)/%.dtb
+ $(call cmd, dt_S_dtb)
+
+The assembly file is compiled into an object file (testcases.dtb.o), and is
+linked into the kernel image.
+
+
+2.1. Adding the test data
+
+Un-flattened device tree structure:
+
+Un-flattened device tree consists of connected device_node(s) in form of a tree
+structure described below.
+
+// following struct members are used to construct the tree
+struct device_node {
+ ...
+ struct device_node *parent;
+ struct device_node *child;
+ struct device_node *sibling;
+ ...
+ };
+
+Figure 1, describes a generic structure of machine's un-flattened device tree
+considering only child and sibling pointers. There exists another pointer,
+*parent, that is used to traverse the tree in the reverse direction. So, at
+a particular level the child node and all the sibling nodes will have a parent
+pointer pointing to a common node (e.g. child1, sibling2, sibling3, sibling4's
+parent points to root node)
+
+root ('/')
+ |
+child1 -> sibling2 -> sibling3 -> sibling4 -> null
+ | | | |
+ | | | null
+ | | |
+ | | child31 -> sibling32 -> null
+ | | | |
+ | | null null
+ | |
+ | child21 -> sibling22 -> sibling23 -> null
+ | | | |
+ | null null null
+ |
+child11 -> sibling12 -> sibling13 -> sibling14 -> null
+ | | | |
+ | | | null
+ | | |
+ null null child131 -> null
+ |
+ null
+
+Figure 1: Generic structure of un-flattened device tree
+
+
+Before executing OF unittest, it is required to attach the test data to
+machine's device tree (if present). So, when selftest_data_add() is called,
+at first it reads the flattened device tree data linked into the kernel image
+via the following kernel symbols:
+
+__dtb_testcases_begin - address marking the start of test data blob
+__dtb_testcases_end - address marking the end of test data blob
+
+Secondly, it calls of_fdt_unflatten_tree() to unflatten the flattened
+blob. And finally, if the machine's device tree (i.e live tree) is present,
+then it attaches the unflattened test data tree to the live tree, else it
+attaches itself as a live device tree.
+
+attach_node_and_children() uses of_attach_node() to attach the nodes into the
+live tree as explained below. To explain the same, the test data tree described
+ in Figure 2 is attached to the live tree described in Figure 1.
+
+root ('/')
+ |
+ testcase-data
+ |
+ test-child0 -> test-sibling1 -> test-sibling2 -> test-sibling3 -> null
+ | | | |
+ test-child01 null null null
+
+
+Figure 2: Example test data tree to be attached to live tree.
+
+According to the scenario above, the live tree is already present so it isn't
+required to attach the root('/') node. All other nodes are attached by calling
+of_attach_node() on each node.
+
+In the function of_attach_node(), the new node is attached as the child of the
+given parent in live tree. But, if parent already has a child then the new node
+replaces the current child and turns it into its sibling. So, when the testcase
+data node is attached to the live tree above (Figure 1), the final structure is
+ as shown in Figure 3.
+
+root ('/')
+ |
+testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+ | | | | |
+ (...) | | | null
+ | | child31 -> sibling32 -> null
+ | | | |
+ | | null null
+ | |
+ | child21 -> sibling22 -> sibling23 -> null
+ | | | |
+ | null null null
+ |
+ child11 -> sibling12 -> sibling13 -> sibling14 -> null
+ | | | |
+ null null | null
+ |
+ child131 -> null
+ |
+ null
+-----------------------------------------------------------------------
+
+root ('/')
+ |
+testcase-data -> child1 -> sibling2 -> sibling3 -> sibling4 -> null
+ | | | | |
+ | (...) (...) (...) null
+ |
+test-sibling3 -> test-sibling2 -> test-sibling1 -> test-child0 -> null
+ | | | |
+ null null null test-child01
+
+
+Figure 3: Live device tree structure after attaching the testcase-data.
+
+
+Astute readers would have noticed that test-child0 node becomes the last
+sibling compared to the earlier structure (Figure 2). After attaching first
+test-child0 the test-sibling1 is attached that pushes the child node
+(i.e. test-child0) to become a sibling and makes itself a child node,
+ as mentioned above.
+
+If a duplicate node is found (i.e. if a node with same full_name property is
+already present in the live tree), then the node isn't attached rather its
+properties are updated to the live tree's node by calling the function
+update_node_properties().
+
+
+2.2. Removing the test data
+
+Once the test case execution is complete, selftest_data_remove is called in
+order to remove the device nodes attached initially (first the leaf nodes are
+detached and then moving up the parent nodes are removed, and eventually the
+whole tree). selftest_data_remove() calls detach_node_and_children() that uses
+of_detach_node() to detach the nodes from the live device tree.
+
+To detach a node, of_detach_node() either updates the child pointer of given
+node's parent to its sibling or attaches the previous sibling to the given
+node's sibling, as appropriate. That is it :)
diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt
new file mode 100644
index 000000000..d418a6ce9
--- /dev/null
+++ b/Documentation/devicetree/overlay-notes.txt
@@ -0,0 +1,133 @@
+Device Tree Overlay Notes
+-------------------------
+
+This document describes the implementation of the in-kernel
+device tree overlay functionality residing in drivers/of/overlay.c and is a
+companion document to Documentation/devicetree/dt-object-internal.txt[1] &
+Documentation/devicetree/dynamic-resolution-notes.txt[2]
+
+How overlays work
+-----------------
+
+A Device Tree's overlay purpose is to modify the kernel's live tree, and
+have the modification affecting the state of the kernel in a way that
+is reflecting the changes.
+Since the kernel mainly deals with devices, any new device node that result
+in an active device should have it created while if the device node is either
+disabled or removed all together, the affected device should be deregistered.
+
+Lets take an example where we have a foo board with the following base tree
+which is taken from [1].
+
+---- foo.dts -----------------------------------------------------------------
+ /* FOO platform */
+ / {
+ compatible = "corp,foo";
+
+ /* shared resources */
+ res: res {
+ };
+
+ /* On chip peripherals */
+ ocp: ocp {
+ /* peripherals that are always instantiated */
+ peripheral1 { ... };
+ }
+ };
+---- foo.dts -----------------------------------------------------------------
+
+The overlay bar.dts, when loaded (and resolved as described in [2]) should
+
+---- bar.dts -----------------------------------------------------------------
+/plugin/; /* allow undefined label references and record them */
+/ {
+ .... /* various properties for loader use; i.e. part id etc. */
+ fragment@0 {
+ target = <&ocp>;
+ __overlay__ {
+ /* bar peripheral */
+ bar {
+ compatible = "corp,bar";
+ ... /* various properties and child nodes */
+ }
+ };
+ };
+};
+---- bar.dts -----------------------------------------------------------------
+
+result in foo+bar.dts
+
+---- foo+bar.dts -------------------------------------------------------------
+ /* FOO platform + bar peripheral */
+ / {
+ compatible = "corp,foo";
+
+ /* shared resources */
+ res: res {
+ };
+
+ /* On chip peripherals */
+ ocp: ocp {
+ /* peripherals that are always instantiated */
+ peripheral1 { ... };
+
+ /* bar peripheral */
+ bar {
+ compatible = "corp,bar";
+ ... /* various properties and child nodes */
+ }
+ }
+ };
+---- foo+bar.dts -------------------------------------------------------------
+
+As a result of the overlay, a new device node (bar) has been created
+so a bar platform device will be registered and if a matching device driver
+is loaded the device will be created as expected.
+
+Overlay in-kernel API
+--------------------------------
+
+The API is quite easy to use.
+
+1. Call of_overlay_create() to create and apply an overlay. The return value
+is a cookie identifying this overlay.
+
+2. Call of_overlay_destroy() to remove and cleanup the overlay previously
+created via the call to of_overlay_create(). Removal of an overlay that
+is stacked by another will not be permitted.
+
+Finally, if you need to remove all overlays in one-go, just call
+of_overlay_destroy_all() which will remove every single one in the correct
+order.
+
+Overlay DTS Format
+------------------
+
+The DTS of an overlay should have the following format:
+
+{
+ /* ignored properties by the overlay */
+
+ fragment@0 { /* first child node */
+
+ target=<phandle>; /* phandle target of the overlay */
+ or
+ target-path="/path"; /* target path of the overlay */
+
+ __overlay__ {
+ property-a; /* add property-a to the target */
+ node-a { /* add to an existing, or create a node-a */
+ ...
+ };
+ };
+ }
+ fragment@1 { /* second child node */
+ ...
+ };
+ /* more fragments follow */
+}
+
+Using the non-phandle based target method allows one to use a base DT which does
+not contain a __symbols__ node, i.e. it was not compiled with the -@ option.
+The __symbols__ node is only required for the target=<phandle> method, since it
+contains the information required to map from a phandle to a tree location.
diff --git a/Documentation/devicetree/todo.txt b/Documentation/devicetree/todo.txt
new file mode 100644
index 000000000..b5139d1de
--- /dev/null
+++ b/Documentation/devicetree/todo.txt
@@ -0,0 +1,10 @@
+Todo list for devicetree:
+
+=== General structure ===
+- Switch from custom lists to (h)list_head for nodes and properties structure
+
+=== CONFIG_OF_DYNAMIC ===
+- Switch to RCU for tree updates and get rid of global spinlock
+- Document node lifecycle for CONFIG_OF_DYNAMIC
+- Always set ->full_name at of_attach_node() time
+- pseries: Get rid of open-coded tree modification from arch/powerpc/platforms/pseries/dlpar.c
diff --git a/Documentation/devicetree/usage-model.txt b/Documentation/devicetree/usage-model.txt
new file mode 100644
index 000000000..2b6b3d3f0
--- /dev/null
+++ b/Documentation/devicetree/usage-model.txt
@@ -0,0 +1,415 @@
+Linux and the Device Tree
+-------------------------
+The Linux usage model for device tree data
+
+Author: Grant Likely <grant.likely@secretlab.ca>
+
+This article describes how Linux uses the device tree. An overview of
+the device tree data format can be found on the device tree usage page
+at devicetree.org[1].
+
+[1] http://devicetree.org/Device_Tree_Usage
+
+The "Open Firmware Device Tree", or simply Device Tree (DT), is a data
+structure and language for describing hardware. More specifically, it
+is a description of hardware that is readable by an operating system
+so that the operating system doesn't need to hard code details of the
+machine.
+
+Structurally, the DT is a tree, or acyclic graph with named nodes, and
+nodes may have an arbitrary number of named properties encapsulating
+arbitrary data. A mechanism also exists to create arbitrary
+links from one node to another outside of the natural tree structure.
+
+Conceptually, a common set of usage conventions, called 'bindings',
+is defined for how data should appear in the tree to describe typical
+hardware characteristics including data busses, interrupt lines, GPIO
+connections, and peripheral devices.
+
+As much as possible, hardware is described using existing bindings to
+maximize use of existing support code, but since property and node
+names are simply text strings, it is easy to extend existing bindings
+or create new ones by defining new nodes and properties. Be wary,
+however, of creating a new binding without first doing some homework
+about what already exists. There are currently two different,
+incompatible, bindings for i2c busses that came about because the new
+binding was created without first investigating how i2c devices were
+already being enumerated in existing systems.
+
+1. History
+----------
+The DT was originally created by Open Firmware as part of the
+communication method for passing data from Open Firmware to a client
+program (like to an operating system). An operating system used the
+Device Tree to discover the topology of the hardware at runtime, and
+thereby support a majority of available hardware without hard coded
+information (assuming drivers were available for all devices).
+
+Since Open Firmware is commonly used on PowerPC and SPARC platforms,
+the Linux support for those architectures has for a long time used the
+Device Tree.
+
+In 2005, when PowerPC Linux began a major cleanup and to merge 32-bit
+and 64-bit support, the decision was made to require DT support on all
+powerpc platforms, regardless of whether or not they used Open
+Firmware. To do this, a DT representation called the Flattened Device
+Tree (FDT) was created which could be passed to the kernel as a binary
+blob without requiring a real Open Firmware implementation. U-Boot,
+kexec, and other bootloaders were modified to support both passing a
+Device Tree Binary (dtb) and to modify a dtb at boot time. DT was
+also added to the PowerPC boot wrapper (arch/powerpc/boot/*) so that
+a dtb could be wrapped up with the kernel image to support booting
+existing non-DT aware firmware.
+
+Some time later, FDT infrastructure was generalized to be usable by
+all architectures. At the time of this writing, 6 mainlined
+architectures (arm, microblaze, mips, powerpc, sparc, and x86) and 1
+out of mainline (nios) have some level of DT support.
+
+2. Data Model
+-------------
+If you haven't already read the Device Tree Usage[1] page,
+then go read it now. It's okay, I'll wait....
+
+2.1 High Level View
+-------------------
+The most important thing to understand is that the DT is simply a data
+structure that describes the hardware. There is nothing magical about
+it, and it doesn't magically make all hardware configuration problems
+go away. What it does do is provide a language for decoupling the
+hardware configuration from the board and device driver support in the
+Linux kernel (or any other operating system for that matter). Using
+it allows board and device support to become data driven; to make
+setup decisions based on data passed into the kernel instead of on
+per-machine hard coded selections.
+
+Ideally, data driven platform setup should result in less code
+duplication and make it easier to support a wide range of hardware
+with a single kernel image.
+
+Linux uses DT data for three major purposes:
+1) platform identification,
+2) runtime configuration, and
+3) device population.
+
+2.2 Platform Identification
+---------------------------
+First and foremost, the kernel will use data in the DT to identify the
+specific machine. In a perfect world, the specific platform shouldn't
+matter to the kernel because all platform details would be described
+perfectly by the device tree in a consistent and reliable manner.
+Hardware is not perfect though, and so the kernel must identify the
+machine during early boot so that it has the opportunity to run
+machine-specific fixups.
+
+In the majority of cases, the machine identity is irrelevant, and the
+kernel will instead select setup code based on the machine's core
+CPU or SoC. On ARM for example, setup_arch() in
+arch/arm/kernel/setup.c will call setup_machine_fdt() in
+arch/arm/kernel/devtree.c which searches through the machine_desc
+table and selects the machine_desc which best matches the device tree
+data. It determines the best match by looking at the 'compatible'
+property in the root device tree node, and comparing it with the
+dt_compat list in struct machine_desc (which is defined in
+arch/arm/include/asm/mach/arch.h if you're curious).
+
+The 'compatible' property contains a sorted list of strings starting
+with the exact name of the machine, followed by an optional list of
+boards it is compatible with sorted from most compatible to least. For
+example, the root compatible properties for the TI BeagleBoard and its
+successor, the BeagleBoard xM board might look like, respectively:
+
+ compatible = "ti,omap3-beagleboard", "ti,omap3450", "ti,omap3";
+ compatible = "ti,omap3-beagleboard-xm", "ti,omap3450", "ti,omap3";
+
+Where "ti,omap3-beagleboard-xm" specifies the exact model, it also
+claims that it compatible with the OMAP 3450 SoC, and the omap3 family
+of SoCs in general. You'll notice that the list is sorted from most
+specific (exact board) to least specific (SoC family).
+
+Astute readers might point out that the Beagle xM could also claim
+compatibility with the original Beagle board. However, one should be
+cautioned about doing so at the board level since there is typically a
+high level of change from one board to another, even within the same
+product line, and it is hard to nail down exactly what is meant when one
+board claims to be compatible with another. For the top level, it is
+better to err on the side of caution and not claim one board is
+compatible with another. The notable exception would be when one
+board is a carrier for another, such as a CPU module attached to a
+carrier board.
+
+One more note on compatible values. Any string used in a compatible
+property must be documented as to what it indicates. Add
+documentation for compatible strings in Documentation/devicetree/bindings.
+
+Again on ARM, for each machine_desc, the kernel looks to see if
+any of the dt_compat list entries appear in the compatible property.
+If one does, then that machine_desc is a candidate for driving the
+machine. After searching the entire table of machine_descs,
+setup_machine_fdt() returns the 'most compatible' machine_desc based
+on which entry in the compatible property each machine_desc matches
+against. If no matching machine_desc is found, then it returns NULL.
+
+The reasoning behind this scheme is the observation that in the majority
+of cases, a single machine_desc can support a large number of boards
+if they all use the same SoC, or same family of SoCs. However,
+invariably there will be some exceptions where a specific board will
+require special setup code that is not useful in the generic case.
+Special cases could be handled by explicitly checking for the
+troublesome board(s) in generic setup code, but doing so very quickly
+becomes ugly and/or unmaintainable if it is more than just a couple of
+cases.
+
+Instead, the compatible list allows a generic machine_desc to provide
+support for a wide common set of boards by specifying "less
+compatible" values in the dt_compat list. In the example above,
+generic board support can claim compatibility with "ti,omap3" or
+"ti,omap3450". If a bug was discovered on the original beagleboard
+that required special workaround code during early boot, then a new
+machine_desc could be added which implements the workarounds and only
+matches on "ti,omap3-beagleboard".
+
+PowerPC uses a slightly different scheme where it calls the .probe()
+hook from each machine_desc, and the first one returning TRUE is used.
+However, this approach does not take into account the priority of the
+compatible list, and probably should be avoided for new architecture
+support.
+
+2.3 Runtime configuration
+-------------------------
+In most cases, a DT will be the sole method of communicating data from
+firmware to the kernel, so also gets used to pass in runtime and
+configuration data like the kernel parameters string and the location
+of an initrd image.
+
+Most of this data is contained in the /chosen node, and when booting
+Linux it will look something like this:
+
+ chosen {
+ bootargs = "console=ttyS0,115200 loglevel=8";
+ initrd-start = <0xc8000000>;
+ initrd-end = <0xc8200000>;
+ };
+
+The bootargs property contains the kernel arguments, and the initrd-*
+properties define the address and size of an initrd blob. Note that
+initrd-end is the first address after the initrd image, so this doesn't
+match the usual semantic of struct resource. The chosen node may also
+optionally contain an arbitrary number of additional properties for
+platform-specific configuration data.
+
+During early boot, the architecture setup code calls of_scan_flat_dt()
+several times with different helper callbacks to parse device tree
+data before paging is setup. The of_scan_flat_dt() code scans through
+the device tree and uses the helpers to extract information required
+during early boot. Typically the early_init_dt_scan_chosen() helper
+is used to parse the chosen node including kernel parameters,
+early_init_dt_scan_root() to initialize the DT address space model,
+and early_init_dt_scan_memory() to determine the size and
+location of usable RAM.
+
+On ARM, the function setup_machine_fdt() is responsible for early
+scanning of the device tree after selecting the correct machine_desc
+that supports the board.
+
+2.4 Device population
+---------------------
+After the board has been identified, and after the early configuration data
+has been parsed, then kernel initialization can proceed in the normal
+way. At some point in this process, unflatten_device_tree() is called
+to convert the data into a more efficient runtime representation.
+This is also when machine-specific setup hooks will get called, like
+the machine_desc .init_early(), .init_irq() and .init_machine() hooks
+on ARM. The remainder of this section uses examples from the ARM
+implementation, but all architectures will do pretty much the same
+thing when using a DT.
+
+As can be guessed by the names, .init_early() is used for any machine-
+specific setup that needs to be executed early in the boot process,
+and .init_irq() is used to set up interrupt handling. Using a DT
+doesn't materially change the behaviour of either of these functions.
+If a DT is provided, then both .init_early() and .init_irq() are able
+to call any of the DT query functions (of_* in include/linux/of*.h) to
+get additional data about the platform.
+
+The most interesting hook in the DT context is .init_machine() which
+is primarily responsible for populating the Linux device model with
+data about the platform. Historically this has been implemented on
+embedded platforms by defining a set of static clock structures,
+platform_devices, and other data in the board support .c file, and
+registering it en-masse in .init_machine(). When DT is used, then
+instead of hard coding static devices for each platform, the list of
+devices can be obtained by parsing the DT, and allocating device
+structures dynamically.
+
+The simplest case is when .init_machine() is only responsible for
+registering a block of platform_devices. A platform_device is a concept
+used by Linux for memory or I/O mapped devices which cannot be detected
+by hardware, and for 'composite' or 'virtual' devices (more on those
+later). While there is no 'platform device' terminology for the DT,
+platform devices roughly correspond to device nodes at the root of the
+tree and children of simple memory mapped bus nodes.
+
+About now is a good time to lay out an example. Here is part of the
+device tree for the NVIDIA Tegra board.
+
+/{
+ compatible = "nvidia,harmony", "nvidia,tegra20";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ interrupt-parent = <&intc>;
+
+ chosen { };
+ aliases { };
+
+ memory {
+ device_type = "memory";
+ reg = <0x00000000 0x40000000>;
+ };
+
+ soc {
+ compatible = "nvidia,tegra20-soc", "simple-bus";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ intc: interrupt-controller@50041000 {
+ compatible = "nvidia,tegra20-gic";
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ reg = <0x50041000 0x1000>, < 0x50040100 0x0100 >;
+ };
+
+ serial@70006300 {
+ compatible = "nvidia,tegra20-uart";
+ reg = <0x70006300 0x100>;
+ interrupts = <122>;
+ };
+
+ i2s1: i2s@70002800 {
+ compatible = "nvidia,tegra20-i2s";
+ reg = <0x70002800 0x100>;
+ interrupts = <77>;
+ codec = <&wm8903>;
+ };
+
+ i2c@7000c000 {
+ compatible = "nvidia,tegra20-i2c";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ reg = <0x7000c000 0x100>;
+ interrupts = <70>;
+
+ wm8903: codec@1a {
+ compatible = "wlf,wm8903";
+ reg = <0x1a>;
+ interrupts = <347>;
+ };
+ };
+ };
+
+ sound {
+ compatible = "nvidia,harmony-sound";
+ i2s-controller = <&i2s1>;
+ i2s-codec = <&wm8903>;
+ };
+};
+
+At .init_machine() time, Tegra board support code will need to look at
+this DT and decide which nodes to create platform_devices for.
+However, looking at the tree, it is not immediately obvious what kind
+of device each node represents, or even if a node represents a device
+at all. The /chosen, /aliases, and /memory nodes are informational
+nodes that don't describe devices (although arguably memory could be
+considered a device). The children of the /soc node are memory mapped
+devices, but the codec@1a is an i2c device, and the sound node
+represents not a device, but rather how other devices are connected
+together to create the audio subsystem. I know what each device is
+because I'm familiar with the board design, but how does the kernel
+know what to do with each node?
+
+The trick is that the kernel starts at the root of the tree and looks
+for nodes that have a 'compatible' property. First, it is generally
+assumed that any node with a 'compatible' property represents a device
+of some kind, and second, it can be assumed that any node at the root
+of the tree is either directly attached to the processor bus, or is a
+miscellaneous system device that cannot be described any other way.
+For each of these nodes, Linux allocates and registers a
+platform_device, which in turn may get bound to a platform_driver.
+
+Why is using a platform_device for these nodes a safe assumption?
+Well, for the way that Linux models devices, just about all bus_types
+assume that its devices are children of a bus controller. For
+example, each i2c_client is a child of an i2c_master. Each spi_device
+is a child of an SPI bus. Similarly for USB, PCI, MDIO, etc. The
+same hierarchy is also found in the DT, where I2C device nodes only
+ever appear as children of an I2C bus node. Ditto for SPI, MDIO, USB,
+etc. The only devices which do not require a specific type of parent
+device are platform_devices (and amba_devices, but more on that
+later), which will happily live at the base of the Linux /sys/devices
+tree. Therefore, if a DT node is at the root of the tree, then it
+really probably is best registered as a platform_device.
+
+Linux board support code calls of_platform_populate(NULL, NULL, NULL, NULL)
+to kick off discovery of devices at the root of the tree. The
+parameters are all NULL because when starting from the root of the
+tree, there is no need to provide a starting node (the first NULL), a
+parent struct device (the last NULL), and we're not using a match
+table (yet). For a board that only needs to register devices,
+.init_machine() can be completely empty except for the
+of_platform_populate() call.
+
+In the Tegra example, this accounts for the /soc and /sound nodes, but
+what about the children of the SoC node? Shouldn't they be registered
+as platform devices too? For Linux DT support, the generic behaviour
+is for child devices to be registered by the parent's device driver at
+driver .probe() time. So, an i2c bus device driver will register a
+i2c_client for each child node, an SPI bus driver will register
+its spi_device children, and similarly for other bus_types.
+According to that model, a driver could be written that binds to the
+SoC node and simply registers platform_devices for each of its
+children. The board support code would allocate and register an SoC
+device, a (theoretical) SoC device driver could bind to the SoC device,
+and register platform_devices for /soc/interrupt-controller, /soc/serial,
+/soc/i2s, and /soc/i2c in its .probe() hook. Easy, right?
+
+Actually, it turns out that registering children of some
+platform_devices as more platform_devices is a common pattern, and the
+device tree support code reflects that and makes the above example
+simpler. The second argument to of_platform_populate() is an
+of_device_id table, and any node that matches an entry in that table
+will also get its child nodes registered. In the Tegra case, the code
+can look something like this:
+
+static void __init harmony_init_machine(void)
+{
+ /* ... */
+ of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
+}
+
+"simple-bus" is defined in the ePAPR 1.0 specification as a property
+meaning a simple memory mapped bus, so the of_platform_populate() code
+could be written to just assume simple-bus compatible nodes will
+always be traversed. However, we pass it in as an argument so that
+board support code can always override the default behaviour.
+
+[Need to add discussion of adding i2c/spi/etc child devices]
+
+Appendix A: AMBA devices
+------------------------
+
+ARM Primecells are a certain kind of device attached to the ARM AMBA
+bus which include some support for hardware detection and power
+management. In Linux, struct amba_device and the amba_bus_type is
+used to represent Primecell devices. However, the fiddly bit is that
+not all devices on an AMBA bus are Primecells, and for Linux it is
+typical for both amba_device and platform_device instances to be
+siblings of the same bus segment.
+
+When using the DT, this creates problems for of_platform_populate()
+because it must decide whether to register each node as either a
+platform_device or an amba_device. This unfortunately complicates the
+device creation model a little bit, but the solution turns out not to
+be too invasive. If a node is compatible with "arm,amba-primecell", then
+of_platform_populate() will register it as an amba_device instead of a
+platform_device.
diff --git a/Documentation/digsig.txt b/Documentation/digsig.txt
new file mode 100644
index 000000000..3f6828890
--- /dev/null
+++ b/Documentation/digsig.txt
@@ -0,0 +1,96 @@
+Digital Signature Verification API
+
+CONTENTS
+
+1. Introduction
+2. API
+3. User-space utilities
+
+
+1. Introduction
+
+Digital signature verification API provides a method to verify digital signature.
+Currently digital signatures are used by the IMA/EVM integrity protection subsystem.
+
+Digital signature verification is implemented using cut-down kernel port of
+GnuPG multi-precision integers (MPI) library. The kernel port provides
+memory allocation errors handling, has been refactored according to kernel
+coding style, and checkpatch.pl reported errors and warnings have been fixed.
+
+Public key and signature consist of header and MPIs.
+
+struct pubkey_hdr {
+ uint8_t version; /* key format version */
+ time_t timestamp; /* key made, always 0 for now */
+ uint8_t algo;
+ uint8_t nmpi;
+ char mpi[0];
+} __packed;
+
+struct signature_hdr {
+ uint8_t version; /* signature format version */
+ time_t timestamp; /* signature made */
+ uint8_t algo;
+ uint8_t hash;
+ uint8_t keyid[8];
+ uint8_t nmpi;
+ char mpi[0];
+} __packed;
+
+keyid equals to SHA1[12-19] over the total key content.
+Signature header is used as an input to generate a signature.
+Such approach insures that key or signature header could not be changed.
+It protects timestamp from been changed and can be used for rollback
+protection.
+
+2. API
+
+API currently includes only 1 function:
+
+ digsig_verify() - digital signature verification with public key
+
+
+/**
+ * digsig_verify() - digital signature verification with public key
+ * @keyring: keyring to search key in
+ * @sig: digital signature
+ * @sigen: length of the signature
+ * @data: data
+ * @datalen: length of the data
+ * @return: 0 on success, -EINVAL otherwise
+ *
+ * Verifies data integrity against digital signature.
+ * Currently only RSA is supported.
+ * Normally hash of the content is used as a data for this function.
+ *
+ */
+int digsig_verify(struct key *keyring, const char *sig, int siglen,
+ const char *data, int datalen);
+
+3. User-space utilities
+
+The signing and key management utilities evm-utils provide functionality
+to generate signatures, to load keys into the kernel keyring.
+Keys can be in PEM or converted to the kernel format.
+When the key is added to the kernel keyring, the keyid defines the name
+of the key: 5D2B05FC633EE3E8 in the example bellow.
+
+Here is example output of the keyctl utility.
+
+$ keyctl show
+Session Keyring
+ -3 --alswrv 0 0 keyring: _ses
+603976250 --alswrv 0 -1 \_ keyring: _uid.0
+817777377 --alswrv 0 0 \_ user: kmk
+891974900 --alswrv 0 0 \_ encrypted: evm-key
+170323636 --alswrv 0 0 \_ keyring: _module
+548221616 --alswrv 0 0 \_ keyring: _ima
+128198054 --alswrv 0 0 \_ keyring: _evm
+
+$ keyctl list 128198054
+1 key in keyring:
+620789745: --alswrv 0 0 user: 5D2B05FC633EE3E8
+
+
+Dmitry Kasatkin
+06.10.2011
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
new file mode 100644
index 000000000..480c8de3c
--- /dev/null
+++ b/Documentation/dma-buf-sharing.txt
@@ -0,0 +1,465 @@
+ DMA Buffer Sharing API Guide
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Sumit Semwal
+ <sumit dot semwal at linaro dot org>
+ <sumit dot semwal at ti dot com>
+
+This document serves as a guide to device-driver writers on what is the dma-buf
+buffer sharing API, how to use it for exporting and using shared buffers.
+
+Any device driver which wishes to be a part of DMA buffer sharing, can do so as
+either the 'exporter' of buffers, or the 'user' of buffers.
+
+Say a driver A wants to use buffers created by driver B, then we call B as the
+exporter, and A as buffer-user.
+
+The exporter
+- implements and manages operations[1] for the buffer
+- allows other users to share the buffer by using dma_buf sharing APIs,
+- manages the details of buffer allocation,
+- decides about the actual backing storage where this allocation happens,
+- takes care of any migration of scatterlist - for all (shared) users of this
+ buffer,
+
+The buffer-user
+- is one of (many) sharing users of the buffer.
+- doesn't need to worry about how the buffer is allocated, or where.
+- needs a mechanism to get access to the scatterlist that makes up this buffer
+ in memory, mapped into its own address space, so it can access the same area
+ of memory.
+
+dma-buf operations for device dma only
+--------------------------------------
+
+The dma_buf buffer sharing API usage contains the following steps:
+
+1. Exporter announces that it wishes to export a buffer
+2. Userspace gets the file descriptor associated with the exported buffer, and
+ passes it around to potential buffer-users based on use case
+3. Each buffer-user 'connects' itself to the buffer
+4. When needed, buffer-user requests access to the buffer from exporter
+5. When finished with its use, the buffer-user notifies end-of-DMA to exporter
+6. when buffer-user is done using this buffer completely, it 'disconnects'
+ itself from the buffer.
+
+
+1. Exporter's announcement of buffer export
+
+ The buffer exporter announces its wish to export a buffer. In this, it
+ connects its own private buffer data, provides implementation for operations
+ that can be performed on the exported dma_buf, and flags for the file
+ associated with this buffer. All these fields are filled in struct
+ dma_buf_export_info, defined via the DEFINE_DMA_BUF_EXPORT_INFO macro.
+
+ Interface:
+ DEFINE_DMA_BUF_EXPORT_INFO(exp_info)
+ struct dma_buf *dma_buf_export(struct dma_buf_export_info *exp_info)
+
+ If this succeeds, dma_buf_export allocates a dma_buf structure, and
+ returns a pointer to the same. It also associates an anonymous file with this
+ buffer, so it can be exported. On failure to allocate the dma_buf object,
+ it returns NULL.
+
+ 'exp_name' in struct dma_buf_export_info is the name of exporter - to
+ facilitate information while debugging. It is set to KBUILD_MODNAME by
+ default, so exporters don't have to provide a specific name, if they don't
+ wish to.
+
+ DEFINE_DMA_BUF_EXPORT_INFO macro defines the struct dma_buf_export_info,
+ zeroes it out and pre-populates exp_name in it.
+
+
+2. Userspace gets a handle to pass around to potential buffer-users
+
+ Userspace entity requests for a file-descriptor (fd) which is a handle to the
+ anonymous file associated with the buffer. It can then share the fd with other
+ drivers and/or processes.
+
+ Interface:
+ int dma_buf_fd(struct dma_buf *dmabuf, int flags)
+
+ This API installs an fd for the anonymous file associated with this buffer;
+ returns either 'fd', or error.
+
+3. Each buffer-user 'connects' itself to the buffer
+
+ Each buffer-user now gets a reference to the buffer, using the fd passed to
+ it.
+
+ Interface:
+ struct dma_buf *dma_buf_get(int fd)
+
+ This API will return a reference to the dma_buf, and increment refcount for
+ it.
+
+ After this, the buffer-user needs to attach its device with the buffer, which
+ helps the exporter to know of device buffer constraints.
+
+ Interface:
+ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
+ struct device *dev)
+
+ This API returns reference to an attachment structure, which is then used
+ for scatterlist operations. It will optionally call the 'attach' dma_buf
+ operation, if provided by the exporter.
+
+ The dma-buf sharing framework does the bookkeeping bits related to managing
+ the list of all attachments to a buffer.
+
+Until this stage, the buffer-exporter has the option to choose not to actually
+allocate the backing storage for this buffer, but wait for the first buffer-user
+to request use of buffer for allocation.
+
+
+4. When needed, buffer-user requests access to the buffer
+
+ Whenever a buffer-user wants to use the buffer for any DMA, it asks for
+ access to the buffer using dma_buf_map_attachment API. At least one attach to
+ the buffer must have happened before map_dma_buf can be called.
+
+ Interface:
+ struct sg_table * dma_buf_map_attachment(struct dma_buf_attachment *,
+ enum dma_data_direction);
+
+ This is a wrapper to dma_buf->ops->map_dma_buf operation, which hides the
+ "dma_buf->ops->" indirection from the users of this interface.
+
+ In struct dma_buf_ops, map_dma_buf is defined as
+ struct sg_table * (*map_dma_buf)(struct dma_buf_attachment *,
+ enum dma_data_direction);
+
+ It is one of the buffer operations that must be implemented by the exporter.
+ It should return the sg_table containing scatterlist for this buffer, mapped
+ into caller's address space.
+
+ If this is being called for the first time, the exporter can now choose to
+ scan through the list of attachments for this buffer, collate the requirements
+ of the attached devices, and choose an appropriate backing storage for the
+ buffer.
+
+ Based on enum dma_data_direction, it might be possible to have multiple users
+ accessing at the same time (for reading, maybe), or any other kind of sharing
+ that the exporter might wish to make available to buffer-users.
+
+ map_dma_buf() operation can return -EINTR if it is interrupted by a signal.
+
+
+5. When finished, the buffer-user notifies end-of-DMA to exporter
+
+ Once the DMA for the current buffer-user is over, it signals 'end-of-DMA' to
+ the exporter using the dma_buf_unmap_attachment API.
+
+ Interface:
+ void dma_buf_unmap_attachment(struct dma_buf_attachment *,
+ struct sg_table *);
+
+ This is a wrapper to dma_buf->ops->unmap_dma_buf() operation, which hides the
+ "dma_buf->ops->" indirection from the users of this interface.
+
+ In struct dma_buf_ops, unmap_dma_buf is defined as
+ void (*unmap_dma_buf)(struct dma_buf_attachment *,
+ struct sg_table *,
+ enum dma_data_direction);
+
+ unmap_dma_buf signifies the end-of-DMA for the attachment provided. Like
+ map_dma_buf, this API also must be implemented by the exporter.
+
+
+6. when buffer-user is done using this buffer, it 'disconnects' itself from the
+ buffer.
+
+ After the buffer-user has no more interest in using this buffer, it should
+ disconnect itself from the buffer:
+
+ - it first detaches itself from the buffer.
+
+ Interface:
+ void dma_buf_detach(struct dma_buf *dmabuf,
+ struct dma_buf_attachment *dmabuf_attach);
+
+ This API removes the attachment from the list in dmabuf, and optionally calls
+ dma_buf->ops->detach(), if provided by exporter, for any housekeeping bits.
+
+ - Then, the buffer-user returns the buffer reference to exporter.
+
+ Interface:
+ void dma_buf_put(struct dma_buf *dmabuf);
+
+ This API then reduces the refcount for this buffer.
+
+ If, as a result of this call, the refcount becomes 0, the 'release' file
+ operation related to this fd is called. It calls the dmabuf->ops->release()
+ operation in turn, and frees the memory allocated for dmabuf when exported.
+
+NOTES:
+- Importance of attach-detach and {map,unmap}_dma_buf operation pairs
+ The attach-detach calls allow the exporter to figure out backing-storage
+ constraints for the currently-interested devices. This allows preferential
+ allocation, and/or migration of pages across different types of storage
+ available, if possible.
+
+ Bracketing of DMA access with {map,unmap}_dma_buf operations is essential
+ to allow just-in-time backing of storage, and migration mid-way through a
+ use-case.
+
+- Migration of backing storage if needed
+ If after
+ - at least one map_dma_buf has happened,
+ - and the backing storage has been allocated for this buffer,
+ another new buffer-user intends to attach itself to this buffer, it might
+ be allowed, if possible for the exporter.
+
+ In case it is allowed by the exporter:
+ if the new buffer-user has stricter 'backing-storage constraints', and the
+ exporter can handle these constraints, the exporter can just stall on the
+ map_dma_buf until all outstanding access is completed (as signalled by
+ unmap_dma_buf).
+ Once all users have finished accessing and have unmapped this buffer, the
+ exporter could potentially move the buffer to the stricter backing-storage,
+ and then allow further {map,unmap}_dma_buf operations from any buffer-user
+ from the migrated backing-storage.
+
+ If the exporter cannot fulfill the backing-storage constraints of the new
+ buffer-user device as requested, dma_buf_attach() would return an error to
+ denote non-compatibility of the new buffer-sharing request with the current
+ buffer.
+
+ If the exporter chooses not to allow an attach() operation once a
+ map_dma_buf() API has been called, it simply returns an error.
+
+Kernel cpu access to a dma-buf buffer object
+--------------------------------------------
+
+The motivation to allow cpu access from the kernel to a dma-buf object from the
+importers side are:
+- fallback operations, e.g. if the devices is connected to a usb bus and the
+ kernel needs to shuffle the data around first before sending it away.
+- full transparency for existing users on the importer side, i.e. userspace
+ should not notice the difference between a normal object from that subsystem
+ and an imported one backed by a dma-buf. This is really important for drm
+ opengl drivers that expect to still use all the existing upload/download
+ paths.
+
+Access to a dma_buf from the kernel context involves three steps:
+
+1. Prepare access, which invalidate any necessary caches and make the object
+ available for cpu access.
+2. Access the object page-by-page with the dma_buf map apis
+3. Finish access, which will flush any necessary cpu caches and free reserved
+ resources.
+
+1. Prepare access
+
+ Before an importer can access a dma_buf object with the cpu from the kernel
+ context, it needs to notify the exporter of the access that is about to
+ happen.
+
+ Interface:
+ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
+ size_t start, size_t len,
+ enum dma_data_direction direction)
+
+ This allows the exporter to ensure that the memory is actually available for
+ cpu access - the exporter might need to allocate or swap-in and pin the
+ backing storage. The exporter also needs to ensure that cpu access is
+ coherent for the given range and access direction. The range and access
+ direction can be used by the exporter to optimize the cache flushing, i.e.
+ access outside of the range or with a different direction (read instead of
+ write) might return stale or even bogus data (e.g. when the exporter needs to
+ copy the data to temporary storage).
+
+ This step might fail, e.g. in oom conditions.
+
+2. Accessing the buffer
+
+ To support dma_buf objects residing in highmem cpu access is page-based using
+ an api similar to kmap. Accessing a dma_buf is done in aligned chunks of
+ PAGE_SIZE size. Before accessing a chunk it needs to be mapped, which returns
+ a pointer in kernel virtual address space. Afterwards the chunk needs to be
+ unmapped again. There is no limit on how often a given chunk can be mapped
+ and unmapped, i.e. the importer does not need to call begin_cpu_access again
+ before mapping the same chunk again.
+
+ Interfaces:
+ void *dma_buf_kmap(struct dma_buf *, unsigned long);
+ void dma_buf_kunmap(struct dma_buf *, unsigned long, void *);
+
+ There are also atomic variants of these interfaces. Like for kmap they
+ facilitate non-blocking fast-paths. Neither the importer nor the exporter (in
+ the callback) is allowed to block when using these.
+
+ Interfaces:
+ void *dma_buf_kmap_atomic(struct dma_buf *, unsigned long);
+ void dma_buf_kunmap_atomic(struct dma_buf *, unsigned long, void *);
+
+ For importers all the restrictions of using kmap apply, like the limited
+ supply of kmap_atomic slots. Hence an importer shall only hold onto at most 2
+ atomic dma_buf kmaps at the same time (in any given process context).
+
+ dma_buf kmap calls outside of the range specified in begin_cpu_access are
+ undefined. If the range is not PAGE_SIZE aligned, kmap needs to succeed on
+ the partial chunks at the beginning and end but may return stale or bogus
+ data outside of the range (in these partial chunks).
+
+ Note that these calls need to always succeed. The exporter needs to complete
+ any preparations that might fail in begin_cpu_access.
+
+ For some cases the overhead of kmap can be too high, a vmap interface
+ is introduced. This interface should be used very carefully, as vmalloc
+ space is a limited resources on many architectures.
+
+ Interfaces:
+ void *dma_buf_vmap(struct dma_buf *dmabuf)
+ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
+
+ The vmap call can fail if there is no vmap support in the exporter, or if it
+ runs out of vmalloc space. Fallback to kmap should be implemented. Note that
+ the dma-buf layer keeps a reference count for all vmap access and calls down
+ into the exporter's vmap function only when no vmapping exists, and only
+ unmaps it once. Protection against concurrent vmap/vunmap calls is provided
+ by taking the dma_buf->lock mutex.
+
+3. Finish access
+
+ When the importer is done accessing the range specified in begin_cpu_access,
+ it needs to announce this to the exporter (to facilitate cache flushing and
+ unpinning of any pinned resources). The result of any dma_buf kmap calls
+ after end_cpu_access is undefined.
+
+ Interface:
+ void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
+ size_t start, size_t len,
+ enum dma_data_direction dir);
+
+
+Direct Userspace Access/mmap Support
+------------------------------------
+
+Being able to mmap an export dma-buf buffer object has 2 main use-cases:
+- CPU fallback processing in a pipeline and
+- supporting existing mmap interfaces in importers.
+
+1. CPU fallback processing in a pipeline
+
+ In many processing pipelines it is sometimes required that the cpu can access
+ the data in a dma-buf (e.g. for thumbnail creation, snapshots, ...). To avoid
+ the need to handle this specially in userspace frameworks for buffer sharing
+ it's ideal if the dma_buf fd itself can be used to access the backing storage
+ from userspace using mmap.
+
+ Furthermore Android's ION framework already supports this (and is otherwise
+ rather similar to dma-buf from a userspace consumer side with using fds as
+ handles, too). So it's beneficial to support this in a similar fashion on
+ dma-buf to have a good transition path for existing Android userspace.
+
+ No special interfaces, userspace simply calls mmap on the dma-buf fd.
+
+2. Supporting existing mmap interfaces in importers
+
+ Similar to the motivation for kernel cpu access it is again important that
+ the userspace code of a given importing subsystem can use the same interfaces
+ with a imported dma-buf buffer object as with a native buffer object. This is
+ especially important for drm where the userspace part of contemporary OpenGL,
+ X, and other drivers is huge, and reworking them to use a different way to
+ mmap a buffer rather invasive.
+
+ The assumption in the current dma-buf interfaces is that redirecting the
+ initial mmap is all that's needed. A survey of some of the existing
+ subsystems shows that no driver seems to do any nefarious thing like syncing
+ up with outstanding asynchronous processing on the device or allocating
+ special resources at fault time. So hopefully this is good enough, since
+ adding interfaces to intercept pagefaults and allow pte shootdowns would
+ increase the complexity quite a bit.
+
+ Interface:
+ int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
+ unsigned long);
+
+ If the importing subsystem simply provides a special-purpose mmap call to set
+ up a mapping in userspace, calling do_mmap with dma_buf->file will equally
+ achieve that for a dma-buf object.
+
+3. Implementation notes for exporters
+
+ Because dma-buf buffers have invariant size over their lifetime, the dma-buf
+ core checks whether a vma is too large and rejects such mappings. The
+ exporter hence does not need to duplicate this check.
+
+ Because existing importing subsystems might presume coherent mappings for
+ userspace, the exporter needs to set up a coherent mapping. If that's not
+ possible, it needs to fake coherency by manually shooting down ptes when
+ leaving the cpu domain and flushing caches at fault time. Note that all the
+ dma_buf files share the same anon inode, hence the exporter needs to replace
+ the dma_buf file stored in vma->vm_file with it's own if pte shootdown is
+ required. This is because the kernel uses the underlying inode's address_space
+ for vma tracking (and hence pte tracking at shootdown time with
+ unmap_mapping_range).
+
+ If the above shootdown dance turns out to be too expensive in certain
+ scenarios, we can extend dma-buf with a more explicit cache tracking scheme
+ for userspace mappings. But the current assumption is that using mmap is
+ always a slower path, so some inefficiencies should be acceptable.
+
+ Exporters that shoot down mappings (for any reasons) shall not do any
+ synchronization at fault time with outstanding device operations.
+ Synchronization is an orthogonal issue to sharing the backing storage of a
+ buffer and hence should not be handled by dma-buf itself. This is explicitly
+ mentioned here because many people seem to want something like this, but if
+ different exporters handle this differently, buffer sharing can fail in
+ interesting ways depending upong the exporter (if userspace starts depending
+ upon this implicit synchronization).
+
+Other Interfaces Exposed to Userspace on the dma-buf FD
+------------------------------------------------------
+
+- Since kernel 3.12 the dma-buf FD supports the llseek system call, but only
+ with offset=0 and whence=SEEK_END|SEEK_SET. SEEK_SET is supported to allow
+ the usual size discover pattern size = SEEK_END(0); SEEK_SET(0). Every other
+ llseek operation will report -EINVAL.
+
+ If llseek on dma-buf FDs isn't support the kernel will report -ESPIPE for all
+ cases. Userspace can use this to detect support for discovering the dma-buf
+ size using llseek.
+
+Miscellaneous notes
+-------------------
+
+- Any exporters or users of the dma-buf buffer sharing framework must have
+ a 'select DMA_SHARED_BUFFER' in their respective Kconfigs.
+
+- In order to avoid fd leaks on exec, the FD_CLOEXEC flag must be set
+ on the file descriptor. This is not just a resource leak, but a
+ potential security hole. It could give the newly exec'd application
+ access to buffers, via the leaked fd, to which it should otherwise
+ not be permitted access.
+
+ The problem with doing this via a separate fcntl() call, versus doing it
+ atomically when the fd is created, is that this is inherently racy in a
+ multi-threaded app[3]. The issue is made worse when it is library code
+ opening/creating the file descriptor, as the application may not even be
+ aware of the fd's.
+
+ To avoid this problem, userspace must have a way to request O_CLOEXEC
+ flag be set when the dma-buf fd is created. So any API provided by
+ the exporting driver to create a dmabuf fd must provide a way to let
+ userspace control setting of O_CLOEXEC flag passed in to dma_buf_fd().
+
+- If an exporter needs to manually flush caches and hence needs to fake
+ coherency for mmap support, it needs to be able to zap all the ptes pointing
+ at the backing storage. Now linux mm needs a struct address_space associated
+ with the struct file stored in vma->vm_file to do that with the function
+ unmap_mapping_range. But the dma_buf framework only backs every dma_buf fd
+ with the anon_file struct file, i.e. all dma_bufs share the same file.
+
+ Hence exporters need to setup their own file (and address_space) association
+ by setting vma->vm_file and adjusting vma->vm_pgoff in the dma_buf mmap
+ callback. In the specific case of a gem driver the exporter could use the
+ shmem file already provided by gem (and set vm_pgoff = 0). Exporters can then
+ zap ptes by unmapping the corresponding range of the struct address_space
+ associated with their own file.
+
+References:
+[1] struct dma_buf_ops in include/linux/dma-buf.h
+[2] All interfaces mentioned above defined in include/linux/dma-buf.h
+[3] https://lwn.net/Articles/236486/
diff --git a/Documentation/dmaengine/00-INDEX b/Documentation/dmaengine/00-INDEX
new file mode 100644
index 000000000..07de6573d
--- /dev/null
+++ b/Documentation/dmaengine/00-INDEX
@@ -0,0 +1,8 @@
+00-INDEX
+ - this file.
+client.txt
+ -the DMA Engine API Guide.
+dmatest.txt
+ - how to compile, configure and use the dmatest system.
+provider.txt
+ - the DMA controller API. \ No newline at end of file
diff --git a/Documentation/dmaengine/client.txt b/Documentation/dmaengine/client.txt
new file mode 100644
index 000000000..11fb87ff6
--- /dev/null
+++ b/Documentation/dmaengine/client.txt
@@ -0,0 +1,199 @@
+ DMA Engine API Guide
+ ====================
+
+ Vinod Koul <vinod dot koul at intel.com>
+
+NOTE: For DMA Engine usage in async_tx please see:
+ Documentation/crypto/async-tx-api.txt
+
+
+Below is a guide to device driver writers on how to use the Slave-DMA API of the
+DMA Engine. This is applicable only for slave DMA usage only.
+
+The slave DMA usage consists of following steps:
+1. Allocate a DMA slave channel
+2. Set slave and controller specific parameters
+3. Get a descriptor for transaction
+4. Submit the transaction
+5. Issue pending requests and wait for callback notification
+
+1. Allocate a DMA slave channel
+
+ Channel allocation is slightly different in the slave DMA context,
+ client drivers typically need a channel from a particular DMA
+ controller only and even in some cases a specific channel is desired.
+ To request a channel dma_request_channel() API is used.
+
+ Interface:
+ struct dma_chan *dma_request_channel(dma_cap_mask_t mask,
+ dma_filter_fn filter_fn,
+ void *filter_param);
+ where dma_filter_fn is defined as:
+ typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param);
+
+ The 'filter_fn' parameter is optional, but highly recommended for
+ slave and cyclic channels as they typically need to obtain a specific
+ DMA channel.
+
+ When the optional 'filter_fn' parameter is NULL, dma_request_channel()
+ simply returns the first channel that satisfies the capability mask.
+
+ Otherwise, the 'filter_fn' routine will be called once for each free
+ channel which has a capability in 'mask'. 'filter_fn' is expected to
+ return 'true' when the desired DMA channel is found.
+
+ A channel allocated via this interface is exclusive to the caller,
+ until dma_release_channel() is called.
+
+2. Set slave and controller specific parameters
+
+ Next step is always to pass some specific information to the DMA
+ driver. Most of the generic information which a slave DMA can use
+ is in struct dma_slave_config. This allows the clients to specify
+ DMA direction, DMA addresses, bus widths, DMA burst lengths etc
+ for the peripheral.
+
+ If some DMA controllers have more parameters to be sent then they
+ should try to embed struct dma_slave_config in their controller
+ specific structure. That gives flexibility to client to pass more
+ parameters, if required.
+
+ Interface:
+ int dmaengine_slave_config(struct dma_chan *chan,
+ struct dma_slave_config *config)
+
+ Please see the dma_slave_config structure definition in dmaengine.h
+ for a detailed explanation of the struct members. Please note
+ that the 'direction' member will be going away as it duplicates the
+ direction given in the prepare call.
+
+3. Get a descriptor for transaction
+
+ For slave usage the various modes of slave transfers supported by the
+ DMA-engine are:
+
+ slave_sg - DMA a list of scatter gather buffers from/to a peripheral
+ dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the
+ operation is explicitly stopped.
+ interleaved_dma - This is common to Slave as well as M2M clients. For slave
+ address of devices' fifo could be already known to the driver.
+ Various types of operations could be expressed by setting
+ appropriate values to the 'dma_interleaved_template' members.
+
+ A non-NULL return of this transfer API represents a "descriptor" for
+ the given transaction.
+
+ Interface:
+ struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
+ struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_data_direction direction,
+ unsigned long flags);
+
+ struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
+ struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
+ size_t period_len, enum dma_data_direction direction);
+
+ struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
+ struct dma_chan *chan, struct dma_interleaved_template *xt,
+ unsigned long flags);
+
+ The peripheral driver is expected to have mapped the scatterlist for
+ the DMA operation prior to calling dmaengine_prep_slave_sg(), and must
+ keep the scatterlist mapped until the DMA operation has completed.
+ The scatterlist must be mapped using the DMA struct device.
+ If a mapping needs to be synchronized later, dma_sync_*_for_*() must be
+ called using the DMA struct device, too.
+ So, normal setup should look like this:
+
+ nr_sg = dma_map_sg(chan->device->dev, sgl, sg_len);
+ if (nr_sg == 0)
+ /* error */
+
+ desc = dmaengine_prep_slave_sg(chan, sgl, nr_sg, direction, flags);
+
+ Once a descriptor has been obtained, the callback information can be
+ added and the descriptor must then be submitted. Some DMA engine
+ drivers may hold a spinlock between a successful preparation and
+ submission so it is important that these two operations are closely
+ paired.
+
+ Note:
+ Although the async_tx API specifies that completion callback
+ routines cannot submit any new operations, this is not the
+ case for slave/cyclic DMA.
+
+ For slave DMA, the subsequent transaction may not be available
+ for submission prior to callback function being invoked, so
+ slave DMA callbacks are permitted to prepare and submit a new
+ transaction.
+
+ For cyclic DMA, a callback function may wish to terminate the
+ DMA via dmaengine_terminate_all().
+
+ Therefore, it is important that DMA engine drivers drop any
+ locks before calling the callback function which may cause a
+ deadlock.
+
+ Note that callbacks will always be invoked from the DMA
+ engines tasklet, never from interrupt context.
+
+4. Submit the transaction
+
+ Once the descriptor has been prepared and the callback information
+ added, it must be placed on the DMA engine drivers pending queue.
+
+ Interface:
+ dma_cookie_t dmaengine_submit(struct dma_async_tx_descriptor *desc)
+
+ This returns a cookie can be used to check the progress of DMA engine
+ activity via other DMA engine calls not covered in this document.
+
+ dmaengine_submit() will not start the DMA operation, it merely adds
+ it to the pending queue. For this, see step 5, dma_async_issue_pending.
+
+5. Issue pending DMA requests and wait for callback notification
+
+ The transactions in the pending queue can be activated by calling the
+ issue_pending API. If channel is idle then the first transaction in
+ queue is started and subsequent ones queued up.
+
+ On completion of each DMA operation, the next in queue is started and
+ a tasklet triggered. The tasklet will then call the client driver
+ completion callback routine for notification, if set.
+
+ Interface:
+ void dma_async_issue_pending(struct dma_chan *chan);
+
+Further APIs:
+
+1. int dmaengine_terminate_all(struct dma_chan *chan)
+
+ This causes all activity for the DMA channel to be stopped, and may
+ discard data in the DMA FIFO which hasn't been fully transferred.
+ No callback functions will be called for any incomplete transfers.
+
+2. int dmaengine_pause(struct dma_chan *chan)
+
+ This pauses activity on the DMA channel without data loss.
+
+3. int dmaengine_resume(struct dma_chan *chan)
+
+ Resume a previously paused DMA channel. It is invalid to resume a
+ channel which is not currently paused.
+
+4. enum dma_status dma_async_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used)
+
+ This can be used to check the status of the channel. Please see
+ the documentation in include/linux/dmaengine.h for a more complete
+ description of this API.
+
+ This can be used in conjunction with dma_async_is_complete() and
+ the cookie returned from dmaengine_submit() to check for
+ completion of a specific DMA transaction.
+
+ Note:
+ Not all DMA engine drivers can return reliable information for
+ a running DMA channel. It is recommended that DMA engine users
+ pause or stop (via dmaengine_terminate_all()) the channel before
+ using this API.
diff --git a/Documentation/dmaengine/dmatest.txt b/Documentation/dmaengine/dmatest.txt
new file mode 100644
index 000000000..dd77a81bd
--- /dev/null
+++ b/Documentation/dmaengine/dmatest.txt
@@ -0,0 +1,92 @@
+ DMA Test Guide
+ ==============
+
+ Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+This small document introduces how to test DMA drivers using dmatest module.
+
+ Part 1 - How to build the test module
+
+The menuconfig contains an option that could be found by following path:
+ Device Drivers -> DMA Engine support -> DMA Test client
+
+In the configuration file the option called CONFIG_DMATEST. The dmatest could
+be built as module or inside kernel. Let's consider those cases.
+
+ Part 2 - When dmatest is built as a module...
+
+Example of usage:
+ % modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1
+
+...or:
+ % modprobe dmatest
+ % echo dma0chan0 > /sys/module/dmatest/parameters/channel
+ % echo 2000 > /sys/module/dmatest/parameters/timeout
+ % echo 1 > /sys/module/dmatest/parameters/iterations
+ % echo 1 > /sys/module/dmatest/parameters/run
+
+...or on the kernel command line:
+
+ dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1
+
+Hint: available channel list could be extracted by running the following
+command:
+ % ls -1 /sys/class/dma/
+
+Once started a message like "dmatest: Started 1 threads using dma0chan0" is
+emitted. After that only test failure messages are reported until the test
+stops.
+
+Note that running a new test will not stop any in progress test.
+
+The following command returns the state of the test.
+ % cat /sys/module/dmatest/parameters/run
+
+To wait for test completion userpace can poll 'run' until it is false, or use
+the wait parameter. Specifying 'wait=1' when loading the module causes module
+initialization to pause until a test run has completed, while reading
+/sys/module/dmatest/parameters/wait waits for any running test to complete
+before returning. For example, the following scripts wait for 42 tests
+to complete before exiting. Note that if 'iterations' is set to 'infinite' then
+waiting is disabled.
+
+Example:
+ % modprobe dmatest run=1 iterations=42 wait=1
+ % modprobe -r dmatest
+...or:
+ % modprobe dmatest run=1 iterations=42
+ % cat /sys/module/dmatest/parameters/wait
+ % modprobe -r dmatest
+
+ Part 3 - When built-in in the kernel...
+
+The module parameters that is supplied to the kernel command line will be used
+for the first performed test. After user gets a control, the test could be
+re-run with the same or different parameters. For the details see the above
+section "Part 2 - When dmatest is built as a module..."
+
+In both cases the module parameters are used as the actual values for the test
+case. You always could check them at run-time by running
+ % grep -H . /sys/module/dmatest/parameters/*
+
+ Part 4 - Gathering the test results
+
+Test results are printed to the kernel log buffer with the format:
+
+"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)"
+
+Example of output:
+ % dmesg | tail -n 1
+ dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0)
+
+The message format is unified across the different types of errors. A number in
+the parens represents additional information, e.g. error code, error counter,
+or status. A test thread also emits a summary line at completion listing the
+number of tests executed, number that failed, and a result code.
+
+Example:
+ % dmesg | tail -n 1
+ dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0)
+
+The details of a data miscompare error are also emitted, but do not follow the
+above format.
diff --git a/Documentation/dmaengine/provider.txt b/Documentation/dmaengine/provider.txt
new file mode 100644
index 000000000..05d228019
--- /dev/null
+++ b/Documentation/dmaengine/provider.txt
@@ -0,0 +1,379 @@
+DMAengine controller documentation
+==================================
+
+Hardware Introduction
++++++++++++++++++++++
+
+Most of the Slave DMA controllers have the same general principles of
+operations.
+
+They have a given number of channels to use for the DMA transfers, and
+a given number of requests lines.
+
+Requests and channels are pretty much orthogonal. Channels can be used
+to serve several to any requests. To simplify, channels are the
+entities that will be doing the copy, and requests what endpoints are
+involved.
+
+The request lines actually correspond to physical lines going from the
+DMA-eligible devices to the controller itself. Whenever the device
+will want to start a transfer, it will assert a DMA request (DRQ) by
+asserting that request line.
+
+A very simple DMA controller would only take into account a single
+parameter: the transfer size. At each clock cycle, it would transfer a
+byte of data from one buffer to another, until the transfer size has
+been reached.
+
+That wouldn't work well in the real world, since slave devices might
+require a specific number of bits to be transferred in a single
+cycle. For example, we may want to transfer as much data as the
+physical bus allows to maximize performances when doing a simple
+memory copy operation, but our audio device could have a narrower FIFO
+that requires data to be written exactly 16 or 24 bits at a time. This
+is why most if not all of the DMA controllers can adjust this, using a
+parameter called the transfer width.
+
+Moreover, some DMA controllers, whenever the RAM is used as a source
+or destination, can group the reads or writes in memory into a buffer,
+so instead of having a lot of small memory accesses, which is not
+really efficient, you'll get several bigger transfers. This is done
+using a parameter called the burst size, that defines how many single
+reads/writes it's allowed to do without the controller splitting the
+transfer into smaller sub-transfers.
+
+Our theoretical DMA controller would then only be able to do transfers
+that involve a single contiguous block of data. However, some of the
+transfers we usually have are not, and want to copy data from
+non-contiguous buffers to a contiguous buffer, which is called
+scatter-gather.
+
+DMAEngine, at least for mem2dev transfers, require support for
+scatter-gather. So we're left with two cases here: either we have a
+quite simple DMA controller that doesn't support it, and we'll have to
+implement it in software, or we have a more advanced DMA controller,
+that implements in hardware scatter-gather.
+
+The latter are usually programmed using a collection of chunks to
+transfer, and whenever the transfer is started, the controller will go
+over that collection, doing whatever we programmed there.
+
+This collection is usually either a table or a linked list. You will
+then push either the address of the table and its number of elements,
+or the first item of the list to one channel of the DMA controller,
+and whenever a DRQ will be asserted, it will go through the collection
+to know where to fetch the data from.
+
+Either way, the format of this collection is completely dependent on
+your hardware. Each DMA controller will require a different structure,
+but all of them will require, for every chunk, at least the source and
+destination addresses, whether it should increment these addresses or
+not and the three parameters we saw earlier: the burst size, the
+transfer width and the transfer size.
+
+The one last thing is that usually, slave devices won't issue DRQ by
+default, and you have to enable this in your slave device driver first
+whenever you're willing to use DMA.
+
+These were just the general memory-to-memory (also called mem2mem) or
+memory-to-device (mem2dev) kind of transfers. Most devices often
+support other kind of transfers or memory operations that dmaengine
+support and will be detailed later in this document.
+
+DMA Support in Linux
+++++++++++++++++++++
+
+Historically, DMA controller drivers have been implemented using the
+async TX API, to offload operations such as memory copy, XOR,
+cryptography, etc., basically any memory to memory operation.
+
+Over time, the need for memory to device transfers arose, and
+dmaengine was extended. Nowadays, the async TX API is written as a
+layer on top of dmaengine, and acts as a client. Still, dmaengine
+accommodates that API in some cases, and made some design choices to
+ensure that it stayed compatible.
+
+For more information on the Async TX API, please look the relevant
+documentation file in Documentation/crypto/async-tx-api.txt.
+
+DMAEngine Registration
+++++++++++++++++++++++
+
+struct dma_device Initialization
+--------------------------------
+
+Just like any other kernel framework, the whole DMAEngine registration
+relies on the driver filling a structure and registering against the
+framework. In our case, that structure is dma_device.
+
+The first thing you need to do in your driver is to allocate this
+structure. Any of the usual memory allocators will do, but you'll also
+need to initialize a few fields in there:
+
+ * channels: should be initialized as a list using the
+ INIT_LIST_HEAD macro for example
+
+ * src_addr_widths:
+ - should contain a bitmask of the supported source transfer width
+
+ * dst_addr_widths:
+ - should contain a bitmask of the supported destination transfer
+ width
+
+ * directions:
+ - should contain a bitmask of the supported slave directions
+ (i.e. excluding mem2mem transfers)
+
+ * residue_granularity:
+ - Granularity of the transfer residue reported to dma_set_residue.
+ - This can be either:
+ + Descriptor
+ -> Your device doesn't support any kind of residue
+ reporting. The framework will only know that a particular
+ transaction descriptor is done.
+ + Segment
+ -> Your device is able to report which chunks have been
+ transferred
+ + Burst
+ -> Your device is able to report which burst have been
+ transferred
+
+ * dev: should hold the pointer to the struct device associated
+ to your current driver instance.
+
+Supported transaction types
+---------------------------
+
+The next thing you need is to set which transaction types your device
+(and driver) supports.
+
+Our dma_device structure has a field called cap_mask that holds the
+various types of transaction supported, and you need to modify this
+mask using the dma_cap_set function, with various flags depending on
+transaction types you support as an argument.
+
+All those capabilities are defined in the dma_transaction_type enum,
+in include/linux/dmaengine.h
+
+Currently, the types available are:
+ * DMA_MEMCPY
+ - The device is able to do memory to memory copies
+
+ * DMA_XOR
+ - The device is able to perform XOR operations on memory areas
+ - Used to accelerate XOR intensive tasks, such as RAID5
+
+ * DMA_XOR_VAL
+ - The device is able to perform parity check using the XOR
+ algorithm against a memory buffer.
+
+ * DMA_PQ
+ - The device is able to perform RAID6 P+Q computations, P being a
+ simple XOR, and Q being a Reed-Solomon algorithm.
+
+ * DMA_PQ_VAL
+ - The device is able to perform parity check using RAID6 P+Q
+ algorithm against a memory buffer.
+
+ * DMA_INTERRUPT
+ - The device is able to trigger a dummy transfer that will
+ generate periodic interrupts
+ - Used by the client drivers to register a callback that will be
+ called on a regular basis through the DMA controller interrupt
+
+ * DMA_SG
+ - The device supports memory to memory scatter-gather
+ transfers.
+ - Even though a plain memcpy can look like a particular case of a
+ scatter-gather transfer, with a single chunk to transfer, it's a
+ distinct transaction type in the mem2mem transfers case
+
+ * DMA_PRIVATE
+ - The devices only supports slave transfers, and as such isn't
+ available for async transfers.
+
+ * DMA_ASYNC_TX
+ - Must not be set by the device, and will be set by the framework
+ if needed
+ - /* TODO: What is it about? */
+
+ * DMA_SLAVE
+ - The device can handle device to memory transfers, including
+ scatter-gather transfers.
+ - While in the mem2mem case we were having two distinct types to
+ deal with a single chunk to copy or a collection of them, here,
+ we just have a single transaction type that is supposed to
+ handle both.
+ - If you want to transfer a single contiguous memory buffer,
+ simply build a scatter list with only one item.
+
+ * DMA_CYCLIC
+ - The device can handle cyclic transfers.
+ - A cyclic transfer is a transfer where the chunk collection will
+ loop over itself, with the last item pointing to the first.
+ - It's usually used for audio transfers, where you want to operate
+ on a single ring buffer that you will fill with your audio data.
+
+ * DMA_INTERLEAVE
+ - The device supports interleaved transfer.
+ - These transfers can transfer data from a non-contiguous buffer
+ to a non-contiguous buffer, opposed to DMA_SLAVE that can
+ transfer data from a non-contiguous data set to a continuous
+ destination buffer.
+ - It's usually used for 2d content transfers, in which case you
+ want to transfer a portion of uncompressed data directly to the
+ display to print it
+
+These various types will also affect how the source and destination
+addresses change over time.
+
+Addresses pointing to RAM are typically incremented (or decremented)
+after each transfer. In case of a ring buffer, they may loop
+(DMA_CYCLIC). Addresses pointing to a device's register (e.g. a FIFO)
+are typically fixed.
+
+Device operations
+-----------------
+
+Our dma_device structure also requires a few function pointers in
+order to implement the actual logic, now that we described what
+operations we were able to perform.
+
+The functions that we have to fill in there, and hence have to
+implement, obviously depend on the transaction types you reported as
+supported.
+
+ * device_alloc_chan_resources
+ * device_free_chan_resources
+ - These functions will be called whenever a driver will call
+ dma_request_channel or dma_release_channel for the first/last
+ time on the channel associated to that driver.
+ - They are in charge of allocating/freeing all the needed
+ resources in order for that channel to be useful for your
+ driver.
+ - These functions can sleep.
+
+ * device_prep_dma_*
+ - These functions are matching the capabilities you registered
+ previously.
+ - These functions all take the buffer or the scatterlist relevant
+ for the transfer being prepared, and should create a hardware
+ descriptor or a list of hardware descriptors from it
+ - These functions can be called from an interrupt context
+ - Any allocation you might do should be using the GFP_NOWAIT
+ flag, in order not to potentially sleep, but without depleting
+ the emergency pool either.
+ - Drivers should try to pre-allocate any memory they might need
+ during the transfer setup at probe time to avoid putting to
+ much pressure on the nowait allocator.
+
+ - It should return a unique instance of the
+ dma_async_tx_descriptor structure, that further represents this
+ particular transfer.
+
+ - This structure can be initialized using the function
+ dma_async_tx_descriptor_init.
+ - You'll also need to set two fields in this structure:
+ + flags:
+ TODO: Can it be modified by the driver itself, or
+ should it be always the flags passed in the arguments
+
+ + tx_submit: A pointer to a function you have to implement,
+ that is supposed to push the current
+ transaction descriptor to a pending queue, waiting
+ for issue_pending to be called.
+
+ * device_issue_pending
+ - Takes the first transaction descriptor in the pending queue,
+ and starts the transfer. Whenever that transfer is done, it
+ should move to the next transaction in the list.
+ - This function can be called in an interrupt context
+
+ * device_tx_status
+ - Should report the bytes left to go over on the given channel
+ - Should only care about the transaction descriptor passed as
+ argument, not the currently active one on a given channel
+ - The tx_state argument might be NULL
+ - Should use dma_set_residue to report it
+ - In the case of a cyclic transfer, it should only take into
+ account the current period.
+ - This function can be called in an interrupt context.
+
+ * device_config
+ - Reconfigures the channel with the configuration given as
+ argument
+ - This command should NOT perform synchronously, or on any
+ currently queued transfers, but only on subsequent ones
+ - In this case, the function will receive a dma_slave_config
+ structure pointer as an argument, that will detail which
+ configuration to use.
+ - Even though that structure contains a direction field, this
+ field is deprecated in favor of the direction argument given to
+ the prep_* functions
+ - This call is mandatory for slave operations only. This should NOT be
+ set or expected to be set for memcpy operations.
+ If a driver support both, it should use this call for slave
+ operations only and not for memcpy ones.
+
+ * device_pause
+ - Pauses a transfer on the channel
+ - This command should operate synchronously on the channel,
+ pausing right away the work of the given channel
+
+ * device_resume
+ - Resumes a transfer on the channel
+ - This command should operate synchronously on the channel,
+ pausing right away the work of the given channel
+
+ * device_terminate_all
+ - Aborts all the pending and ongoing transfers on the channel
+ - This command should operate synchronously on the channel,
+ terminating right away all the channels
+
+Misc notes (stuff that should be documented, but don't really know
+where to put them)
+------------------------------------------------------------------
+ * dma_run_dependencies
+ - Should be called at the end of an async TX transfer, and can be
+ ignored in the slave transfers case.
+ - Makes sure that dependent operations are run before marking it
+ as complete.
+
+ * dma_cookie_t
+ - it's a DMA transaction ID that will increment over time.
+ - Not really relevant any more since the introduction of virt-dma
+ that abstracts it away.
+
+ * DMA_CTRL_ACK
+ - Undocumented feature
+ - No one really has an idea of what it's about, besides being
+ related to reusing the DMA transaction descriptors or having
+ additional transactions added to it in the async-tx API
+ - Useless in the case of the slave API
+
+General Design Notes
+--------------------
+
+Most of the DMAEngine drivers you'll see are based on a similar design
+that handles the end of transfer interrupts in the handler, but defer
+most work to a tasklet, including the start of a new transfer whenever
+the previous transfer ended.
+
+This is a rather inefficient design though, because the inter-transfer
+latency will be not only the interrupt latency, but also the
+scheduling latency of the tasklet, which will leave the channel idle
+in between, which will slow down the global transfer rate.
+
+You should avoid this kind of practice, and instead of electing a new
+transfer in your tasklet, move that part to the interrupt handler in
+order to have a shorter idle window (that we can't really avoid
+anyway).
+
+Glossary
+--------
+
+Burst: A number of consecutive read or write operations
+ that can be queued to buffers before being flushed to
+ memory.
+Chunk: A contiguous collection of bursts
+Transfer: A collection of chunks (be it contiguous or not)
diff --git a/Documentation/dontdiff b/Documentation/dontdiff
new file mode 100644
index 000000000..43eb13d87
--- /dev/null
+++ b/Documentation/dontdiff
@@ -0,0 +1,260 @@
+*.a
+*.aux
+*.bc
+*.bin
+*.bz2
+*.cis
+*.cpio
+*.csp
+*.dsp
+*.dvi
+*.elf
+*.eps
+*.fw
+*.gcno
+*.gcov
+*.gen.S
+*.gif
+*.grep
+*.grp
+*.gz
+*.html
+*.i
+*.jpeg
+*.ko
+*.ll
+*.log
+*.lst
+*.lzma
+*.lzo
+*.mo
+*.moc
+*.mod.c
+*.o
+*.o.*
+*.order
+*.orig
+*.out
+*.patch
+*.pdf
+*.plist
+*.png
+*.pot
+*.ps
+*.rej
+*.s
+*.sgml
+*.so
+*.so.dbg
+*.symtypes
+*.tab.c
+*.tab.h
+*.tex
+*.ver
+*.xml
+*.xz
+*_MODULES
+*_vga16.c
+*~
+\#*#
+*.9
+.*
+.*.d
+.mm
+53c700_d.h
+CVS
+ChangeSet
+GPATH
+GRTAGS
+GSYMS
+GTAGS
+Image
+Module.markers
+Module.symvers
+PENDING
+SCCS
+System.map*
+TAGS
+aconf
+af_names.h
+aic7*reg.h*
+aic7*reg_print.c*
+aic7*seq.h*
+aicasm
+aicdb.h*
+altivec*.c
+asm-offsets.h
+asm_offsets.h
+autoconf.h*
+av_permissions.h
+bbootsect
+bin2c
+binkernel.spec
+bootsect
+bounds.h
+bsetup
+btfixupprep
+build
+bvmlinux
+bzImage*
+capability_names.h
+capflags.c
+classlist.h*
+comp*.log
+compile.h*
+conf
+config
+config-*
+config_data.h*
+config.mak
+config.mak.autogen
+conmakehash
+consolemap_deftbl.c*
+cpustr.h
+crc32table.h*
+cscope.*
+defkeymap.c
+devlist.h*
+dnotify_test
+docproc
+dslm
+elf2ecoff
+elfconfig.h*
+evergreen_reg_safe.h
+fixdep
+flask.h
+fore200e_mkfirm
+fore200e_pca_fw.c*
+gconf
+gconf.glade.h
+gen-devlist
+gen_crc32table
+gen_init_cpio
+generated
+genheaders
+genksyms
+*_gray256.c
+hpet_example
+hugepage-mmap
+hugepage-shm
+ihex2fw
+ikconfig.h*
+inat-tables.c
+initramfs_list
+int16.c
+int1.c
+int2.c
+int32.c
+int4.c
+int8.c
+kallsyms
+kconfig
+keywords.c
+ksym.c*
+ksym.h*
+kxgettext
+lex.c
+lex.*.c
+linux
+logo_*.c
+logo_*_clut224.c
+logo_*_mono.c
+lxdialog
+mach-types
+mach-types.h
+machtypes.h
+map
+map_hugetlb
+media
+mconf
+miboot*
+mk_elfconfig
+mkboot
+mkbugboot
+mkcpustr
+mkdep
+mkprep
+mkregtable
+mktables
+mktree
+modpost
+modules.builtin
+modules.order
+modversions.h*
+nconf
+ncscope.*
+offset.h
+oui.c*
+page-types
+parse.c
+parse.h
+patches*
+/*(DEBLOBBED)*/
+/*(DEBLOBBED)*/
+perf.data
+perf.data.old
+perf-archive
+piggyback
+piggy.gzip
+piggy.S
+pnmtologo
+ppc_defs.h*
+pss_boot.h
+qconf
+r100_reg_safe.h
+r200_reg_safe.h
+r300_reg_safe.h
+r420_reg_safe.h
+r600_reg_safe.h
+recordmcount
+relocs
+rlim_names.h
+rn50_reg_safe.h
+rs600_reg_safe.h
+rv515_reg_safe.h
+series
+setup
+setup.bin
+setup.elf
+sImage
+sm_tbl*
+split-include
+syscalltab.h
+tables.c
+tags
+test_get_len
+tftpboot.img
+timeconst.h
+times.h*
+trix_boot.h
+utsrelease.h*
+vdso-syms.lds
+vdso.lds
+vdso32-int80-syms.lds
+vdso32-syms.lds
+vdso32-syscall-syms.lds
+vdso32-sysenter-syms.lds
+vdso32.lds
+vdso32.so.dbg
+vdso64.lds
+vdso64.so.dbg
+version.h*
+vmImage
+vmlinux
+vmlinux-*
+vmlinux.aout
+vmlinux.bin.all
+vmlinux.lds
+vmlinuz
+voffset.h
+vsyscall.lds
+vsyscall_32.lds
+wanxlfw.inc
+uImage
+unifdef
+wakeup.bin
+wakeup.elf
+wakeup.lds
+zImage*
+zconf.hash.c
+zoffset.h
diff --git a/Documentation/driver-model/binding.txt b/Documentation/driver-model/binding.txt
new file mode 100644
index 000000000..abfc8e290
--- /dev/null
+++ b/Documentation/driver-model/binding.txt
@@ -0,0 +1,98 @@
+
+Driver Binding
+
+Driver binding is the process of associating a device with a device
+driver that can control it. Bus drivers have typically handled this
+because there have been bus-specific structures to represent the
+devices and the drivers. With generic device and device driver
+structures, most of the binding can take place using common code.
+
+
+Bus
+~~~
+
+The bus type structure contains a list of all devices that are on that bus
+type in the system. When device_register is called for a device, it is
+inserted into the end of this list. The bus object also contains a
+list of all drivers of that bus type. When driver_register is called
+for a driver, it is inserted at the end of this list. These are the
+two events which trigger driver binding.
+
+
+device_register
+~~~~~~~~~~~~~~~
+
+When a new device is added, the bus's list of drivers is iterated over
+to find one that supports it. In order to determine that, the device
+ID of the device must match one of the device IDs that the driver
+supports. The format and semantics for comparing IDs is bus-specific.
+Instead of trying to derive a complex state machine and matching
+algorithm, it is up to the bus driver to provide a callback to compare
+a device against the IDs of a driver. The bus returns 1 if a match was
+found; 0 otherwise.
+
+int match(struct device * dev, struct device_driver * drv);
+
+If a match is found, the device's driver field is set to the driver
+and the driver's probe callback is called. This gives the driver a
+chance to verify that it really does support the hardware, and that
+it's in a working state.
+
+Device Class
+~~~~~~~~~~~~
+
+Upon the successful completion of probe, the device is registered with
+the class to which it belongs. Device drivers belong to one and only one
+class, and that is set in the driver's devclass field.
+devclass_add_device is called to enumerate the device within the class
+and actually register it with the class, which happens with the
+class's register_dev callback.
+
+
+Driver
+~~~~~~
+
+When a driver is attached to a device, the device is inserted into the
+driver's list of devices.
+
+
+sysfs
+~~~~~
+
+A symlink is created in the bus's 'devices' directory that points to
+the device's directory in the physical hierarchy.
+
+A symlink is created in the driver's 'devices' directory that points
+to the device's directory in the physical hierarchy.
+
+A directory for the device is created in the class's directory. A
+symlink is created in that directory that points to the device's
+physical location in the sysfs tree.
+
+A symlink can be created (though this isn't done yet) in the device's
+physical directory to either its class directory, or the class's
+top-level directory. One can also be created to point to its driver's
+directory also.
+
+
+driver_register
+~~~~~~~~~~~~~~~
+
+The process is almost identical for when a new driver is added.
+The bus's list of devices is iterated over to find a match. Devices
+that already have a driver are skipped. All the devices are iterated
+over, to bind as many devices as possible to the driver.
+
+
+Removal
+~~~~~~~
+
+When a device is removed, the reference count for it will eventually
+go to 0. When it does, the remove callback of the driver is called. It
+is removed from the driver's list of devices and the reference count
+of the driver is decremented. All symlinks between the two are removed.
+
+When a driver is removed, the list of devices that it supports is
+iterated over, and the driver's remove callback is called for each
+one. The device is removed from that list and the symlinks removed.
+
diff --git a/Documentation/driver-model/bus.txt b/Documentation/driver-model/bus.txt
new file mode 100644
index 000000000..b577a45b9
--- /dev/null
+++ b/Documentation/driver-model/bus.txt
@@ -0,0 +1,143 @@
+
+Bus Types
+
+Definition
+~~~~~~~~~~
+See the kerneldoc for the struct bus_type.
+
+int bus_register(struct bus_type * bus);
+
+
+Declaration
+~~~~~~~~~~~
+
+Each bus type in the kernel (PCI, USB, etc) should declare one static
+object of this type. They must initialize the name field, and may
+optionally initialize the match callback.
+
+struct bus_type pci_bus_type = {
+ .name = "pci",
+ .match = pci_bus_match,
+};
+
+The structure should be exported to drivers in a header file:
+
+extern struct bus_type pci_bus_type;
+
+
+Registration
+~~~~~~~~~~~~
+
+When a bus driver is initialized, it calls bus_register. This
+initializes the rest of the fields in the bus object and inserts it
+into a global list of bus types. Once the bus object is registered,
+the fields in it are usable by the bus driver.
+
+
+Callbacks
+~~~~~~~~~
+
+match(): Attaching Drivers to Devices
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The format of device ID structures and the semantics for comparing
+them are inherently bus-specific. Drivers typically declare an array
+of device IDs of devices they support that reside in a bus-specific
+driver structure.
+
+The purpose of the match callback is to give the bus an opportunity to
+determine if a particular driver supports a particular device by
+comparing the device IDs the driver supports with the device ID of a
+particular device, without sacrificing bus-specific functionality or
+type-safety.
+
+When a driver is registered with the bus, the bus's list of devices is
+iterated over, and the match callback is called for each device that
+does not have a driver associated with it.
+
+
+
+Device and Driver Lists
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The lists of devices and drivers are intended to replace the local
+lists that many buses keep. They are lists of struct devices and
+struct device_drivers, respectively. Bus drivers are free to use the
+lists as they please, but conversion to the bus-specific type may be
+necessary.
+
+The LDM core provides helper functions for iterating over each list.
+
+int bus_for_each_dev(struct bus_type * bus, struct device * start, void * data,
+ int (*fn)(struct device *, void *));
+
+int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+ void * data, int (*fn)(struct device_driver *, void *));
+
+These helpers iterate over the respective list, and call the callback
+for each device or driver in the list. All list accesses are
+synchronized by taking the bus's lock (read currently). The reference
+count on each object in the list is incremented before the callback is
+called; it is decremented after the next object has been obtained. The
+lock is not held when calling the callback.
+
+
+sysfs
+~~~~~~~~
+There is a top-level directory named 'bus'.
+
+Each bus gets a directory in the bus directory, along with two default
+directories:
+
+ /sys/bus/pci/
+ |-- devices
+ `-- drivers
+
+Drivers registered with the bus get a directory in the bus's drivers
+directory:
+
+ /sys/bus/pci/
+ |-- devices
+ `-- drivers
+ |-- Intel ICH
+ |-- Intel ICH Joystick
+ |-- agpgart
+ `-- e100
+
+Each device that is discovered on a bus of that type gets a symlink in
+the bus's devices directory to the device's directory in the physical
+hierarchy:
+
+ /sys/bus/pci/
+ |-- devices
+ | |-- 00:00.0 -> ../../../root/pci0/00:00.0
+ | |-- 00:01.0 -> ../../../root/pci0/00:01.0
+ | `-- 00:02.0 -> ../../../root/pci0/00:02.0
+ `-- drivers
+
+
+Exporting Attributes
+~~~~~~~~~~~~~~~~~~~~
+struct bus_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct bus_type *, char * buf);
+ ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+};
+
+Bus drivers can export attributes using the BUS_ATTR macro that works
+similarly to the DEVICE_ATTR macro for devices. For example, a definition
+like this:
+
+static BUS_ATTR(debug,0644,show_debug,store_debug);
+
+is equivalent to declaring:
+
+static bus_attribute bus_attr_debug;
+
+This can then be used to add and remove the attribute from the bus's
+sysfs directory using:
+
+int bus_create_file(struct bus_type *, struct bus_attribute *);
+void bus_remove_file(struct bus_type *, struct bus_attribute *);
+
+
diff --git a/Documentation/driver-model/class.txt b/Documentation/driver-model/class.txt
new file mode 100644
index 000000000..1fefc480a
--- /dev/null
+++ b/Documentation/driver-model/class.txt
@@ -0,0 +1,147 @@
+
+Device Classes
+
+
+Introduction
+~~~~~~~~~~~~
+A device class describes a type of device, like an audio or network
+device. The following device classes have been identified:
+
+<Insert List of Device Classes Here>
+
+
+Each device class defines a set of semantics and a programming interface
+that devices of that class adhere to. Device drivers are the
+implementation of that programming interface for a particular device on
+a particular bus.
+
+Device classes are agnostic with respect to what bus a device resides
+on.
+
+
+Programming Interface
+~~~~~~~~~~~~~~~~~~~~~
+The device class structure looks like:
+
+
+typedef int (*devclass_add)(struct device *);
+typedef void (*devclass_remove)(struct device *);
+
+See the kerneldoc for the struct class.
+
+A typical device class definition would look like:
+
+struct device_class input_devclass = {
+ .name = "input",
+ .add_device = input_add_device,
+ .remove_device = input_remove_device,
+};
+
+Each device class structure should be exported in a header file so it
+can be used by drivers, extensions and interfaces.
+
+Device classes are registered and unregistered with the core using:
+
+int devclass_register(struct device_class * cls);
+void devclass_unregister(struct device_class * cls);
+
+
+Devices
+~~~~~~~
+As devices are bound to drivers, they are added to the device class
+that the driver belongs to. Before the driver model core, this would
+typically happen during the driver's probe() callback, once the device
+has been initialized. It now happens after the probe() callback
+finishes from the core.
+
+The device is enumerated in the class. Each time a device is added to
+the class, the class's devnum field is incremented and assigned to the
+device. The field is never decremented, so if the device is removed
+from the class and re-added, it will receive a different enumerated
+value.
+
+The class is allowed to create a class-specific structure for the
+device and store it in the device's class_data pointer.
+
+There is no list of devices in the device class. Each driver has a
+list of devices that it supports. The device class has a list of
+drivers of that particular class. To access all of the devices in the
+class, iterate over the device lists of each driver in the class.
+
+
+Device Drivers
+~~~~~~~~~~~~~~
+Device drivers are added to device classes when they are registered
+with the core. A driver specifies the class it belongs to by setting
+the struct device_driver::devclass field.
+
+
+sysfs directory structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There is a top-level sysfs directory named 'class'.
+
+Each class gets a directory in the class directory, along with two
+default subdirectories:
+
+ class/
+ `-- input
+ |-- devices
+ `-- drivers
+
+
+Drivers registered with the class get a symlink in the drivers/ directory
+that points to the driver's directory (under its bus directory):
+
+ class/
+ `-- input
+ |-- devices
+ `-- drivers
+ `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/
+
+
+Each device gets a symlink in the devices/ directory that points to the
+device's directory in the physical hierarchy:
+
+ class/
+ `-- input
+ |-- devices
+ | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/
+ `-- drivers
+
+
+Exporting Attributes
+~~~~~~~~~~~~~~~~~~~~
+struct devclass_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device_class *, char * buf, size_t count, loff_t off);
+ ssize_t (*store)(struct device_class *, const char * buf, size_t count, loff_t off);
+};
+
+Class drivers can export attributes using the DEVCLASS_ATTR macro that works
+similarly to the DEVICE_ATTR macro for devices. For example, a definition
+like this:
+
+static DEVCLASS_ATTR(debug,0644,show_debug,store_debug);
+
+is equivalent to declaring:
+
+static devclass_attribute devclass_attr_debug;
+
+The bus driver can add and remove the attribute from the class's
+sysfs directory using:
+
+int devclass_create_file(struct device_class *, struct devclass_attribute *);
+void devclass_remove_file(struct device_class *, struct devclass_attribute *);
+
+In the example above, the file will be named 'debug' in placed in the
+class's directory in sysfs.
+
+
+Interfaces
+~~~~~~~~~~
+There may exist multiple mechanisms for accessing the same device of a
+particular class type. Device interfaces describe these mechanisms.
+
+When a device is added to a device class, the core attempts to add it
+to every interface that is registered with the device class.
+
diff --git a/Documentation/driver-model/design-patterns.txt b/Documentation/driver-model/design-patterns.txt
new file mode 100644
index 000000000..ba7b2df64
--- /dev/null
+++ b/Documentation/driver-model/design-patterns.txt
@@ -0,0 +1,116 @@
+
+Device Driver Design Patterns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This document describes a few common design patterns found in device drivers.
+It is likely that subsystem maintainers will ask driver developers to
+conform to these design patterns.
+
+1. State Container
+2. container_of()
+
+
+1. State Container
+~~~~~~~~~~~~~~~~~~
+
+While the kernel contains a few device drivers that assume that they will
+only be probed() once on a certain system (singletons), it is custom to assume
+that the device the driver binds to will appear in several instances. This
+means that the probe() function and all callbacks need to be reentrant.
+
+The most common way to achieve this is to use the state container design
+pattern. It usually has this form:
+
+struct foo {
+ spinlock_t lock; /* Example member */
+ (...)
+};
+
+static int foo_probe(...)
+{
+ struct foo *foo;
+
+ foo = devm_kzalloc(dev, sizeof(*foo), GFP_KERNEL);
+ if (!foo)
+ return -ENOMEM;
+ spin_lock_init(&foo->lock);
+ (...)
+}
+
+This will create an instance of struct foo in memory every time probe() is
+called. This is our state container for this instance of the device driver.
+Of course it is then necessary to always pass this instance of the
+state around to all functions that need access to the state and its members.
+
+For example, if the driver is registering an interrupt handler, you would
+pass around a pointer to struct foo like this:
+
+static irqreturn_t foo_handler(int irq, void *arg)
+{
+ struct foo *foo = arg;
+ (...)
+}
+
+static int foo_probe(...)
+{
+ struct foo *foo;
+
+ (...)
+ ret = request_irq(irq, foo_handler, 0, "foo", foo);
+}
+
+This way you always get a pointer back to the correct instance of foo in
+your interrupt handler.
+
+
+2. container_of()
+~~~~~~~~~~~~~~~~~
+
+Continuing on the above example we add an offloaded work:
+
+struct foo {
+ spinlock_t lock;
+ struct workqueue_struct *wq;
+ struct work_struct offload;
+ (...)
+};
+
+static void foo_work(struct work_struct *work)
+{
+ struct foo *foo = container_of(work, struct foo, offload);
+
+ (...)
+}
+
+static irqreturn_t foo_handler(int irq, void *arg)
+{
+ struct foo *foo = arg;
+
+ queue_work(foo->wq, &foo->offload);
+ (...)
+}
+
+static int foo_probe(...)
+{
+ struct foo *foo;
+
+ foo->wq = create_singlethread_workqueue("foo-wq");
+ INIT_WORK(&foo->offload, foo_work);
+ (...)
+}
+
+The design pattern is the same for an hrtimer or something similar that will
+return a single argument which is a pointer to a struct member in the
+callback.
+
+container_of() is a macro defined in <linux/kernel.h>
+
+What container_of() does is to obtain a pointer to the containing struct from
+a pointer to a member by a simple subtraction using the offsetof() macro from
+standard C, which allows something similar to object oriented behaviours.
+Notice that the contained member must not be a pointer, but an actual member
+for this to work.
+
+We can see here that we avoid having global pointers to our struct foo *
+instance this way, while still keeping the number of parameters passed to the
+work function to a single pointer.
diff --git a/Documentation/driver-model/device.txt b/Documentation/driver-model/device.txt
new file mode 100644
index 000000000..1e70220d2
--- /dev/null
+++ b/Documentation/driver-model/device.txt
@@ -0,0 +1,106 @@
+
+The Basic Device Structure
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+See the kerneldoc for the struct device.
+
+
+Programming Interface
+~~~~~~~~~~~~~~~~~~~~~
+The bus driver that discovers the device uses this to register the
+device with the core:
+
+int device_register(struct device * dev);
+
+The bus should initialize the following fields:
+
+ - parent
+ - name
+ - bus_id
+ - bus
+
+A device is removed from the core when its reference count goes to
+0. The reference count can be adjusted using:
+
+struct device * get_device(struct device * dev);
+void put_device(struct device * dev);
+
+get_device() will return a pointer to the struct device passed to it
+if the reference is not already 0 (if it's in the process of being
+removed already).
+
+A driver can access the lock in the device structure using:
+
+void lock_device(struct device * dev);
+void unlock_device(struct device * dev);
+
+
+Attributes
+~~~~~~~~~~
+struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+};
+
+Attributes of devices can be exported by a device driver through sysfs.
+
+Please see Documentation/filesystems/sysfs.txt for more information
+on how sysfs works.
+
+As explained in Documentation/kobject.txt, device attributes must be be
+created before the KOBJ_ADD uevent is generated. The only way to realize
+that is by defining an attribute group.
+
+Attributes are declared using a macro called DEVICE_ATTR:
+
+#define DEVICE_ATTR(name,mode,show,store)
+
+Example:
+
+static DEVICE_ATTR(type, 0444, show_type, NULL);
+static DEVICE_ATTR(power, 0644, show_power, store_power);
+
+This declares two structures of type struct device_attribute with respective
+names 'dev_attr_type' and 'dev_attr_power'. These two attributes can be
+organized as follows into a group:
+
+static struct attribute *dev_attrs[] = {
+ &dev_attr_type.attr,
+ &dev_attr_power.attr,
+ NULL,
+};
+
+static struct attribute_group dev_attr_group = {
+ .attrs = dev_attrs,
+};
+
+static const struct attribute_group *dev_attr_groups[] = {
+ &dev_attr_group,
+ NULL,
+};
+
+This array of groups can then be associated with a device by setting the
+group pointer in struct device before device_register() is invoked:
+
+ dev->groups = dev_attr_groups;
+ device_register(dev);
+
+The device_register() function will use the 'groups' pointer to create the
+device attributes and the device_unregister() function will use this pointer
+to remove the device attributes.
+
+Word of warning: While the kernel allows device_create_file() and
+device_remove_file() to be called on a device at any time, userspace has
+strict expectations on when attributes get created. When a new device is
+registered in the kernel, a uevent is generated to notify userspace (like
+udev) that a new device is available. If attributes are added after the
+device is registered, then userspace won't get notified and userspace will
+not know about the new attributes.
+
+This is important for device driver that need to publish additional
+attributes for a device at driver probe time. If the device driver simply
+calls device_create_file() on the device structure passed to it, then
+userspace will never be notified of the new attributes.
diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
new file mode 100644
index 000000000..831a5363f
--- /dev/null
+++ b/Documentation/driver-model/devres.txt
@@ -0,0 +1,341 @@
+Devres - Managed Device Resource
+================================
+
+Tejun Heo <teheo@suse.de>
+
+First draft 10 January 2007
+
+
+1. Intro : Huh? Devres?
+2. Devres : Devres in a nutshell
+3. Devres Group : Group devres'es and release them together
+4. Details : Life time rules, calling context, ...
+5. Overhead : How much do we have to pay for this?
+6. List of managed interfaces : Currently implemented managed interfaces
+
+
+ 1. Intro
+ --------
+
+devres came up while trying to convert libata to use iomap. Each
+iomapped address should be kept and unmapped on driver detach. For
+example, a plain SFF ATA controller (that is, good old PCI IDE) in
+native mode makes use of 5 PCI BARs and all of them should be
+maintained.
+
+As with many other device drivers, libata low level drivers have
+sufficient bugs in ->remove and ->probe failure path. Well, yes,
+that's probably because libata low level driver developers are lazy
+bunch, but aren't all low level driver developers? After spending a
+day fiddling with braindamaged hardware with no document or
+braindamaged document, if it's finally working, well, it's working.
+
+For one reason or another, low level drivers don't receive as much
+attention or testing as core code, and bugs on driver detach or
+initialization failure don't happen often enough to be noticeable.
+Init failure path is worse because it's much less travelled while
+needs to handle multiple entry points.
+
+So, many low level drivers end up leaking resources on driver detach
+and having half broken failure path implementation in ->probe() which
+would leak resources or even cause oops when failure occurs. iomap
+adds more to this mix. So do msi and msix.
+
+
+ 2. Devres
+ ---------
+
+devres is basically linked list of arbitrarily sized memory areas
+associated with a struct device. Each devres entry is associated with
+a release function. A devres can be released in several ways. No
+matter what, all devres entries are released on driver detach. On
+release, the associated release function is invoked and then the
+devres entry is freed.
+
+Managed interface is created for resources commonly used by device
+drivers using devres. For example, coherent DMA memory is acquired
+using dma_alloc_coherent(). The managed version is called
+dmam_alloc_coherent(). It is identical to dma_alloc_coherent() except
+for the DMA memory allocated using it is managed and will be
+automatically released on driver detach. Implementation looks like
+the following.
+
+ struct dma_devres {
+ size_t size;
+ void *vaddr;
+ dma_addr_t dma_handle;
+ };
+
+ static void dmam_coherent_release(struct device *dev, void *res)
+ {
+ struct dma_devres *this = res;
+
+ dma_free_coherent(dev, this->size, this->vaddr, this->dma_handle);
+ }
+
+ dmam_alloc_coherent(dev, size, dma_handle, gfp)
+ {
+ struct dma_devres *dr;
+ void *vaddr;
+
+ dr = devres_alloc(dmam_coherent_release, sizeof(*dr), gfp);
+ ...
+
+ /* alloc DMA memory as usual */
+ vaddr = dma_alloc_coherent(...);
+ ...
+
+ /* record size, vaddr, dma_handle in dr */
+ dr->vaddr = vaddr;
+ ...
+
+ devres_add(dev, dr);
+
+ return vaddr;
+ }
+
+If a driver uses dmam_alloc_coherent(), the area is guaranteed to be
+freed whether initialization fails half-way or the device gets
+detached. If most resources are acquired using managed interface, a
+driver can have much simpler init and exit code. Init path basically
+looks like the following.
+
+ my_init_one()
+ {
+ struct mydev *d;
+
+ d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->ring = dmam_alloc_coherent(...);
+ if (!d->ring)
+ return -ENOMEM;
+
+ if (check something)
+ return -EINVAL;
+ ...
+
+ return register_to_upper_layer(d);
+ }
+
+And exit path,
+
+ my_remove_one()
+ {
+ unregister_from_upper_layer(d);
+ shutdown_my_hardware();
+ }
+
+As shown above, low level drivers can be simplified a lot by using
+devres. Complexity is shifted from less maintained low level drivers
+to better maintained higher layer. Also, as init failure path is
+shared with exit path, both can get more testing.
+
+
+ 3. Devres group
+ ---------------
+
+Devres entries can be grouped using devres group. When a group is
+released, all contained normal devres entries and properly nested
+groups are released. One usage is to rollback series of acquired
+resources on failure. For example,
+
+ if (!devres_open_group(dev, NULL, GFP_KERNEL))
+ return -ENOMEM;
+
+ acquire A;
+ if (failed)
+ goto err;
+
+ acquire B;
+ if (failed)
+ goto err;
+ ...
+
+ devres_remove_group(dev, NULL);
+ return 0;
+
+ err:
+ devres_release_group(dev, NULL);
+ return err_code;
+
+As resource acquisition failure usually means probe failure, constructs
+like above are usually useful in midlayer driver (e.g. libata core
+layer) where interface function shouldn't have side effect on failure.
+For LLDs, just returning error code suffices in most cases.
+
+Each group is identified by void *id. It can either be explicitly
+specified by @id argument to devres_open_group() or automatically
+created by passing NULL as @id as in the above example. In both
+cases, devres_open_group() returns the group's id. The returned id
+can be passed to other devres functions to select the target group.
+If NULL is given to those functions, the latest open group is
+selected.
+
+For example, you can do something like the following.
+
+ int my_midlayer_create_something()
+ {
+ if (!devres_open_group(dev, my_midlayer_create_something, GFP_KERNEL))
+ return -ENOMEM;
+
+ ...
+
+ devres_close_group(dev, my_midlayer_create_something);
+ return 0;
+ }
+
+ void my_midlayer_destroy_something()
+ {
+ devres_release_group(dev, my_midlayer_create_something);
+ }
+
+
+ 4. Details
+ ----------
+
+Lifetime of a devres entry begins on devres allocation and finishes
+when it is released or destroyed (removed and freed) - no reference
+counting.
+
+devres core guarantees atomicity to all basic devres operations and
+has support for single-instance devres types (atomic
+lookup-and-add-if-not-found). Other than that, synchronizing
+concurrent accesses to allocated devres data is caller's
+responsibility. This is usually non-issue because bus ops and
+resource allocations already do the job.
+
+For an example of single-instance devres type, read pcim_iomap_table()
+in lib/devres.c.
+
+All devres interface functions can be called without context if the
+right gfp mask is given.
+
+
+ 5. Overhead
+ -----------
+
+Each devres bookkeeping info is allocated together with requested data
+area. With debug option turned off, bookkeeping info occupies 16
+bytes on 32bit machines and 24 bytes on 64bit (three pointers rounded
+up to ull alignment). If singly linked list is used, it can be
+reduced to two pointers (8 bytes on 32bit, 16 bytes on 64bit).
+
+Each devres group occupies 8 pointers. It can be reduced to 6 if
+singly linked list is used.
+
+Memory space overhead on ahci controller with two ports is between 300
+and 400 bytes on 32bit machine after naive conversion (we can
+certainly invest a bit more effort into libata core layer).
+
+
+ 6. List of managed interfaces
+ -----------------------------
+
+CLOCK
+ devm_clk_get()
+ devm_clk_put()
+
+DMA
+ dmam_alloc_coherent()
+ dmam_alloc_noncoherent()
+ dmam_declare_coherent_memory()
+ dmam_free_coherent()
+ dmam_free_noncoherent()
+ dmam_pool_create()
+ dmam_pool_destroy()
+
+GPIO
+ devm_gpiod_get()
+ devm_gpiod_get_index()
+ devm_gpiod_get_index_optional()
+ devm_gpiod_get_optional()
+ devm_gpiod_put()
+
+IIO
+ devm_iio_device_alloc()
+ devm_iio_device_free()
+ devm_iio_device_register()
+ devm_iio_device_unregister()
+ devm_iio_kfifo_allocate()
+ devm_iio_kfifo_free()
+ devm_iio_trigger_alloc()
+ devm_iio_trigger_free()
+
+IO region
+ devm_release_mem_region()
+ devm_release_region()
+ devm_release_resource()
+ devm_request_mem_region()
+ devm_request_region()
+ devm_request_resource()
+
+IOMAP
+ devm_ioport_map()
+ devm_ioport_unmap()
+ devm_ioremap()
+ devm_ioremap_nocache()
+ devm_ioremap_wc()
+ devm_ioremap_resource() : checks resource, requests memory region, ioremaps
+ devm_iounmap()
+ pcim_iomap()
+ pcim_iomap_regions() : do request_region() and iomap() on multiple BARs
+ pcim_iomap_table() : array of mapped addresses indexed by BAR
+ pcim_iounmap()
+
+IRQ
+ devm_free_irq()
+ devm_request_any_context_irq()
+ devm_request_irq()
+ devm_request_threaded_irq()
+
+LED
+ devm_led_classdev_register()
+ devm_led_classdev_unregister()
+
+MDIO
+ devm_mdiobus_alloc()
+ devm_mdiobus_alloc_size()
+ devm_mdiobus_free()
+
+MEM
+ devm_free_pages()
+ devm_get_free_pages()
+ devm_kasprintf()
+ devm_kcalloc()
+ devm_kfree()
+ devm_kmalloc()
+ devm_kmalloc_array()
+ devm_kmemdup()
+ devm_kstrdup()
+ devm_kvasprintf()
+ devm_kzalloc()
+
+PCI
+ pcim_enable_device() : after success, all PCI ops become managed
+ pcim_pin_device() : keep PCI device enabled after release
+
+PHY
+ devm_usb_get_phy()
+ devm_usb_put_phy()
+
+PINCTRL
+ devm_pinctrl_get()
+ devm_pinctrl_put()
+
+PWM
+ devm_pwm_get()
+ devm_pwm_put()
+
+REGULATOR
+ devm_regulator_bulk_get()
+ devm_regulator_get()
+ devm_regulator_put()
+ devm_regulator_register()
+
+SLAVE DMA ENGINE
+ devm_acpi_dma_controller_register()
+
+SPI
+ devm_spi_register_master()
diff --git a/Documentation/driver-model/driver.txt b/Documentation/driver-model/driver.txt
new file mode 100644
index 000000000..442113582
--- /dev/null
+++ b/Documentation/driver-model/driver.txt
@@ -0,0 +1,214 @@
+
+Device Drivers
+
+See the kerneldoc for the struct device_driver.
+
+
+Allocation
+~~~~~~~~~~
+
+Device drivers are statically allocated structures. Though there may
+be multiple devices in a system that a driver supports, struct
+device_driver represents the driver as a whole (not a particular
+device instance).
+
+Initialization
+~~~~~~~~~~~~~~
+
+The driver must initialize at least the name and bus fields. It should
+also initialize the devclass field (when it arrives), so it may obtain
+the proper linkage internally. It should also initialize as many of
+the callbacks as possible, though each is optional.
+
+Declaration
+~~~~~~~~~~~
+
+As stated above, struct device_driver objects are statically
+allocated. Below is an example declaration of the eepro100
+driver. This declaration is hypothetical only; it relies on the driver
+being converted completely to the new model.
+
+static struct device_driver eepro100_driver = {
+ .name = "eepro100",
+ .bus = &pci_bus_type,
+
+ .probe = eepro100_probe,
+ .remove = eepro100_remove,
+ .suspend = eepro100_suspend,
+ .resume = eepro100_resume,
+};
+
+Most drivers will not be able to be converted completely to the new
+model because the bus they belong to has a bus-specific structure with
+bus-specific fields that cannot be generalized.
+
+The most common example of this are device ID structures. A driver
+typically defines an array of device IDs that it supports. The format
+of these structures and the semantics for comparing device IDs are
+completely bus-specific. Defining them as bus-specific entities would
+sacrifice type-safety, so we keep bus-specific structures around.
+
+Bus-specific drivers should include a generic struct device_driver in
+the definition of the bus-specific driver. Like this:
+
+struct pci_driver {
+ const struct pci_device_id *id_table;
+ struct device_driver driver;
+};
+
+A definition that included bus-specific fields would look like
+(using the eepro100 driver again):
+
+static struct pci_driver eepro100_driver = {
+ .id_table = eepro100_pci_tbl,
+ .driver = {
+ .name = "eepro100",
+ .bus = &pci_bus_type,
+ .probe = eepro100_probe,
+ .remove = eepro100_remove,
+ .suspend = eepro100_suspend,
+ .resume = eepro100_resume,
+ },
+};
+
+Some may find the syntax of embedded struct initialization awkward or
+even a bit ugly. So far, it's the best way we've found to do what we want...
+
+Registration
+~~~~~~~~~~~~
+
+int driver_register(struct device_driver * drv);
+
+The driver registers the structure on startup. For drivers that have
+no bus-specific fields (i.e. don't have a bus-specific driver
+structure), they would use driver_register and pass a pointer to their
+struct device_driver object.
+
+Most drivers, however, will have a bus-specific structure and will
+need to register with the bus using something like pci_driver_register.
+
+It is important that drivers register their driver structure as early as
+possible. Registration with the core initializes several fields in the
+struct device_driver object, including the reference count and the
+lock. These fields are assumed to be valid at all times and may be
+used by the device model core or the bus driver.
+
+
+Transition Bus Drivers
+~~~~~~~~~~~~~~~~~~~~~~
+
+By defining wrapper functions, the transition to the new model can be
+made easier. Drivers can ignore the generic structure altogether and
+let the bus wrapper fill in the fields. For the callbacks, the bus can
+define generic callbacks that forward the call to the bus-specific
+callbacks of the drivers.
+
+This solution is intended to be only temporary. In order to get class
+information in the driver, the drivers must be modified anyway. Since
+converting drivers to the new model should reduce some infrastructural
+complexity and code size, it is recommended that they are converted as
+class information is added.
+
+Access
+~~~~~~
+
+Once the object has been registered, it may access the common fields of
+the object, like the lock and the list of devices.
+
+int driver_for_each_dev(struct device_driver * drv, void * data,
+ int (*callback)(struct device * dev, void * data));
+
+The devices field is a list of all the devices that have been bound to
+the driver. The LDM core provides a helper function to operate on all
+the devices a driver controls. This helper locks the driver on each
+node access, and does proper reference counting on each device as it
+accesses it.
+
+
+sysfs
+~~~~~
+
+When a driver is registered, a sysfs directory is created in its
+bus's directory. In this directory, the driver can export an interface
+to userspace to control operation of the driver on a global basis;
+e.g. toggling debugging output in the driver.
+
+A future feature of this directory will be a 'devices' directory. This
+directory will contain symlinks to the directories of devices it
+supports.
+
+
+
+Callbacks
+~~~~~~~~~
+
+ int (*probe) (struct device * dev);
+
+The probe() entry is called in task context, with the bus's rwsem locked
+and the driver partially bound to the device. Drivers commonly use
+container_of() to convert "dev" to a bus-specific type, both in probe()
+and other routines. That type often provides device resource data, such
+as pci_dev.resource[] or platform_device.resources, which is used in
+addition to dev->platform_data to initialize the driver.
+
+This callback holds the driver-specific logic to bind the driver to a
+given device. That includes verifying that the device is present, that
+it's a version the driver can handle, that driver data structures can
+be allocated and initialized, and that any hardware can be initialized.
+Drivers often store a pointer to their state with dev_set_drvdata().
+When the driver has successfully bound itself to that device, then probe()
+returns zero and the driver model code will finish its part of binding
+the driver to that device.
+
+A driver's probe() may return a negative errno value to indicate that
+the driver did not bind to this device, in which case it should have
+released all resources it allocated.
+
+ int (*remove) (struct device * dev);
+
+remove is called to unbind a driver from a device. This may be
+called if a device is physically removed from the system, if the
+driver module is being unloaded, during a reboot sequence, or
+in other cases.
+
+It is up to the driver to determine if the device is present or
+not. It should free any resources allocated specifically for the
+device; i.e. anything in the device's driver_data field.
+
+If the device is still present, it should quiesce the device and place
+it into a supported low-power state.
+
+ int (*suspend) (struct device * dev, pm_message_t state);
+
+suspend is called to put the device in a low power state.
+
+ int (*resume) (struct device * dev);
+
+Resume is used to bring a device back from a low power state.
+
+
+Attributes
+~~~~~~~~~~
+struct driver_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device_driver *driver, char *buf);
+ ssize_t (*store)(struct device_driver *, const char * buf, size_t count);
+};
+
+Device drivers can export attributes via their sysfs directories.
+Drivers can declare attributes using a DRIVER_ATTR macro that works
+identically to the DEVICE_ATTR macro.
+
+Example:
+
+DRIVER_ATTR(debug,0644,show_debug,store_debug);
+
+This is equivalent to declaring:
+
+struct driver_attribute driver_attr_debug;
+
+This can then be used to add and remove the attribute from the
+driver's directory using:
+
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
diff --git a/Documentation/driver-model/overview.txt b/Documentation/driver-model/overview.txt
new file mode 100644
index 000000000..6a8f9a807
--- /dev/null
+++ b/Documentation/driver-model/overview.txt
@@ -0,0 +1,123 @@
+The Linux Kernel Device Model
+
+Patrick Mochel <mochel@digitalimplant.org>
+
+Drafted 26 August 2002
+Updated 31 January 2006
+
+
+Overview
+~~~~~~~~
+
+The Linux Kernel Driver Model is a unification of all the disparate driver
+models that were previously used in the kernel. It is intended to augment the
+bus-specific drivers for bridges and devices by consolidating a set of data
+and operations into globally accessible data structures.
+
+Traditional driver models implemented some sort of tree-like structure
+(sometimes just a list) for the devices they control. There wasn't any
+uniformity across the different bus types.
+
+The current driver model provides a common, uniform data model for describing
+a bus and the devices that can appear under the bus. The unified bus
+model includes a set of common attributes which all busses carry, and a set
+of common callbacks, such as device discovery during bus probing, bus
+shutdown, bus power management, etc.
+
+The common device and bridge interface reflects the goals of the modern
+computer: namely the ability to do seamless device "plug and play", power
+management, and hot plug. In particular, the model dictated by Intel and
+Microsoft (namely ACPI) ensures that almost every device on almost any bus
+on an x86-compatible system can work within this paradigm. Of course,
+not every bus is able to support all such operations, although most
+buses support most of those operations.
+
+
+Downstream Access
+~~~~~~~~~~~~~~~~~
+
+Common data fields have been moved out of individual bus layers into a common
+data structure. These fields must still be accessed by the bus layers,
+and sometimes by the device-specific drivers.
+
+Other bus layers are encouraged to do what has been done for the PCI layer.
+struct pci_dev now looks like this:
+
+struct pci_dev {
+ ...
+
+ struct device dev; /* Generic device interface */
+ ...
+};
+
+Note first that the struct device dev within the struct pci_dev is
+statically allocated. This means only one allocation on device discovery.
+
+Note also that that struct device dev is not necessarily defined at the
+front of the pci_dev structure. This is to make people think about what
+they're doing when switching between the bus driver and the global driver,
+and to discourage meaningless and incorrect casts between the two.
+
+The PCI bus layer freely accesses the fields of struct device. It knows about
+the structure of struct pci_dev, and it should know the structure of struct
+device. Individual PCI device drivers that have been converted to the current
+driver model generally do not and should not touch the fields of struct device,
+unless there is a compelling reason to do so.
+
+The above abstraction prevents unnecessary pain during transitional phases.
+If it were not done this way, then when a field was renamed or removed, every
+downstream driver would break. On the other hand, if only the bus layer
+(and not the device layer) accesses the struct device, it is only the bus
+layer that needs to change.
+
+
+User Interface
+~~~~~~~~~~~~~~
+
+By virtue of having a complete hierarchical view of all the devices in the
+system, exporting a complete hierarchical view to userspace becomes relatively
+easy. This has been accomplished by implementing a special purpose virtual
+file system named sysfs.
+
+Almost all mainstream Linux distros mount this filesystem automatically; you
+can see some variation of the following in the output of the "mount" command:
+
+$ mount
+...
+none on /sys type sysfs (rw,noexec,nosuid,nodev)
+...
+$
+
+The auto-mounting of sysfs is typically accomplished by an entry similar to
+the following in the /etc/fstab file:
+
+none /sys sysfs defaults 0 0
+
+or something similar in the /lib/init/fstab file on Debian-based systems:
+
+none /sys sysfs nodev,noexec,nosuid 0 0
+
+If sysfs is not automatically mounted, you can always do it manually with:
+
+# mount -t sysfs sysfs /sys
+
+Whenever a device is inserted into the tree, a directory is created for it.
+This directory may be populated at each layer of discovery - the global layer,
+the bus layer, or the device layer.
+
+The global layer currently creates two files - 'name' and 'power'. The
+former only reports the name of the device. The latter reports the
+current power state of the device. It will also be used to set the current
+power state.
+
+The bus layer may also create files for the devices it finds while probing the
+bus. For example, the PCI layer currently creates 'irq' and 'resource' files
+for each PCI device.
+
+A device-specific driver may also export files in its directory to expose
+device-specific data or tunable interfaces.
+
+More information about the sysfs directory layout can be found in
+the other documents in this directory and in the file
+Documentation/filesystems/sysfs.txt.
+
diff --git a/Documentation/driver-model/platform.txt b/Documentation/driver-model/platform.txt
new file mode 100644
index 000000000..07795ec51
--- /dev/null
+++ b/Documentation/driver-model/platform.txt
@@ -0,0 +1,230 @@
+Platform Devices and Drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+See <linux/platform_device.h> for the driver model interface to the
+platform bus: platform_device, and platform_driver. This pseudo-bus
+is used to connect devices on busses with minimal infrastructure,
+like those used to integrate peripherals on many system-on-chip
+processors, or some "legacy" PC interconnects; as opposed to large
+formally specified ones like PCI or USB.
+
+
+Platform devices
+~~~~~~~~~~~~~~~~
+Platform devices are devices that typically appear as autonomous
+entities in the system. This includes legacy port-based devices and
+host bridges to peripheral buses, and most controllers integrated
+into system-on-chip platforms. What they usually have in common
+is direct addressing from a CPU bus. Rarely, a platform_device will
+be connected through a segment of some other kind of bus; but its
+registers will still be directly addressable.
+
+Platform devices are given a name, used in driver binding, and a
+list of resources such as addresses and IRQs.
+
+struct platform_device {
+ const char *name;
+ u32 id;
+ struct device dev;
+ u32 num_resources;
+ struct resource *resource;
+};
+
+
+Platform drivers
+~~~~~~~~~~~~~~~~
+Platform drivers follow the standard driver model convention, where
+discovery/enumeration is handled outside the drivers, and drivers
+provide probe() and remove() methods. They support power management
+and shutdown notifications using the standard conventions.
+
+struct platform_driver {
+ int (*probe)(struct platform_device *);
+ int (*remove)(struct platform_device *);
+ void (*shutdown)(struct platform_device *);
+ int (*suspend)(struct platform_device *, pm_message_t state);
+ int (*suspend_late)(struct platform_device *, pm_message_t state);
+ int (*resume_early)(struct platform_device *);
+ int (*resume)(struct platform_device *);
+ struct device_driver driver;
+};
+
+Note that probe() should in general verify that the specified device hardware
+actually exists; sometimes platform setup code can't be sure. The probing
+can use device resources, including clocks, and device platform_data.
+
+Platform drivers register themselves the normal way:
+
+ int platform_driver_register(struct platform_driver *drv);
+
+Or, in common situations where the device is known not to be hot-pluggable,
+the probe() routine can live in an init section to reduce the driver's
+runtime memory footprint:
+
+ int platform_driver_probe(struct platform_driver *drv,
+ int (*probe)(struct platform_device *))
+
+
+Device Enumeration
+~~~~~~~~~~~~~~~~~~
+As a rule, platform specific (and often board-specific) setup code will
+register platform devices:
+
+ int platform_device_register(struct platform_device *pdev);
+
+ int platform_add_devices(struct platform_device **pdevs, int ndev);
+
+The general rule is to register only those devices that actually exist,
+but in some cases extra devices might be registered. For example, a kernel
+might be configured to work with an external network adapter that might not
+be populated on all boards, or likewise to work with an integrated controller
+that some boards might not hook up to any peripherals.
+
+In some cases, boot firmware will export tables describing the devices
+that are populated on a given board. Without such tables, often the
+only way for system setup code to set up the correct devices is to build
+a kernel for a specific target board. Such board-specific kernels are
+common with embedded and custom systems development.
+
+In many cases, the memory and IRQ resources associated with the platform
+device are not enough to let the device's driver work. Board setup code
+will often provide additional information using the device's platform_data
+field to hold additional information.
+
+Embedded systems frequently need one or more clocks for platform devices,
+which are normally kept off until they're actively needed (to save power).
+System setup also associates those clocks with the device, so that that
+calls to clk_get(&pdev->dev, clock_name) return them as needed.
+
+
+Legacy Drivers: Device Probing
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Some drivers are not fully converted to the driver model, because they take
+on a non-driver role: the driver registers its platform device, rather than
+leaving that for system infrastructure. Such drivers can't be hotplugged
+or coldplugged, since those mechanisms require device creation to be in a
+different system component than the driver.
+
+The only "good" reason for this is to handle older system designs which, like
+original IBM PCs, rely on error-prone "probe-the-hardware" models for hardware
+configuration. Newer systems have largely abandoned that model, in favor of
+bus-level support for dynamic configuration (PCI, USB), or device tables
+provided by the boot firmware (e.g. PNPACPI on x86). There are too many
+conflicting options about what might be where, and even educated guesses by
+an operating system will be wrong often enough to make trouble.
+
+This style of driver is discouraged. If you're updating such a driver,
+please try to move the device enumeration to a more appropriate location,
+outside the driver. This will usually be cleanup, since such drivers
+tend to already have "normal" modes, such as ones using device nodes that
+were created by PNP or by platform device setup.
+
+None the less, there are some APIs to support such legacy drivers. Avoid
+using these calls except with such hotplug-deficient drivers.
+
+ struct platform_device *platform_device_alloc(
+ const char *name, int id);
+
+You can use platform_device_alloc() to dynamically allocate a device, which
+you will then initialize with resources and platform_device_register().
+A better solution is usually:
+
+ struct platform_device *platform_device_register_simple(
+ const char *name, int id,
+ struct resource *res, unsigned int nres);
+
+You can use platform_device_register_simple() as a one-step call to allocate
+and register a device.
+
+
+Device Naming and Driver Binding
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The platform_device.dev.bus_id is the canonical name for the devices.
+It's built from two components:
+
+ * platform_device.name ... which is also used to for driver matching.
+
+ * platform_device.id ... the device instance number, or else "-1"
+ to indicate there's only one.
+
+These are concatenated, so name/id "serial"/0 indicates bus_id "serial.0", and
+"serial/3" indicates bus_id "serial.3"; both would use the platform_driver
+named "serial". While "my_rtc"/-1 would be bus_id "my_rtc" (no instance id)
+and use the platform_driver called "my_rtc".
+
+Driver binding is performed automatically by the driver core, invoking
+driver probe() after finding a match between device and driver. If the
+probe() succeeds, the driver and device are bound as usual. There are
+three different ways to find such a match:
+
+ - Whenever a device is registered, the drivers for that bus are
+ checked for matches. Platform devices should be registered very
+ early during system boot.
+
+ - When a driver is registered using platform_driver_register(), all
+ unbound devices on that bus are checked for matches. Drivers
+ usually register later during booting, or by module loading.
+
+ - Registering a driver using platform_driver_probe() works just like
+ using platform_driver_register(), except that the driver won't
+ be probed later if another device registers. (Which is OK, since
+ this interface is only for use with non-hotpluggable devices.)
+
+
+Early Platform Devices and Drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The early platform interfaces provide platform data to platform device
+drivers early on during the system boot. The code is built on top of the
+early_param() command line parsing and can be executed very early on.
+
+Example: "earlyprintk" class early serial console in 6 steps
+
+1. Registering early platform device data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code registers platform device data using the function
+early_platform_add_devices(). In the case of early serial console this
+should be hardware configuration for the serial port. Devices registered
+at this point will later on be matched against early platform drivers.
+
+2. Parsing kernel command line
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code calls parse_early_param() to parse the kernel
+command line. This will execute all matching early_param() callbacks.
+User specified early platform devices will be registered at this point.
+For the early serial console case the user can specify port on the
+kernel command line as "earlyprintk=serial.0" where "earlyprintk" is
+the class string, "serial" is the name of the platform driver and
+0 is the platform device id. If the id is -1 then the dot and the
+id can be omitted.
+
+3. Installing early platform drivers belonging to a certain class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code may optionally force registration of all early
+platform drivers belonging to a certain class using the function
+early_platform_driver_register_all(). User specified devices from
+step 2 have priority over these. This step is omitted by the serial
+driver example since the early serial driver code should be disabled
+unless the user has specified port on the kernel command line.
+
+4. Early platform driver registration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Compiled-in platform drivers making use of early_platform_init() are
+automatically registered during step 2 or 3. The serial driver example
+should use early_platform_init("earlyprintk", &platform_driver).
+
+5. Probing of early platform drivers belonging to a certain class
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The architecture code calls early_platform_driver_probe() to match
+registered early platform devices associated with a certain class with
+registered early platform drivers. Matched devices will get probed().
+This step can be executed at any point during the early boot. As soon
+as possible may be good for the serial port case.
+
+6. Inside the early platform driver probe()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The driver code needs to take special care during early boot, especially
+when it comes to memory allocation and interrupt registration. The code
+in the probe() function can use is_early_platform_device() to check if
+it is called at early platform device or at the regular platform device
+time. The early serial driver performs register_console() at this point.
+
+For further information, see <linux/platform_device.h>.
diff --git a/Documentation/driver-model/porting.txt b/Documentation/driver-model/porting.txt
new file mode 100644
index 000000000..92d86f727
--- /dev/null
+++ b/Documentation/driver-model/porting.txt
@@ -0,0 +1,445 @@
+
+Porting Drivers to the New Driver Model
+
+Patrick Mochel
+
+7 January 2003
+
+
+Overview
+
+Please refer to Documentation/driver-model/*.txt for definitions of
+various driver types and concepts.
+
+Most of the work of porting devices drivers to the new model happens
+at the bus driver layer. This was intentional, to minimize the
+negative effect on kernel drivers, and to allow a gradual transition
+of bus drivers.
+
+In a nutshell, the driver model consists of a set of objects that can
+be embedded in larger, bus-specific objects. Fields in these generic
+objects can replace fields in the bus-specific objects.
+
+The generic objects must be registered with the driver model core. By
+doing so, they will exported via the sysfs filesystem. sysfs can be
+mounted by doing
+
+ # mount -t sysfs sysfs /sys
+
+
+
+The Process
+
+Step 0: Read include/linux/device.h for object and function definitions.
+
+Step 1: Registering the bus driver.
+
+
+- Define a struct bus_type for the bus driver.
+
+struct bus_type pci_bus_type = {
+ .name = "pci",
+};
+
+
+- Register the bus type.
+ This should be done in the initialization function for the bus type,
+ which is usually the module_init(), or equivalent, function.
+
+static int __init pci_driver_init(void)
+{
+ return bus_register(&pci_bus_type);
+}
+
+subsys_initcall(pci_driver_init);
+
+
+ The bus type may be unregistered (if the bus driver may be compiled
+ as a module) by doing:
+
+ bus_unregister(&pci_bus_type);
+
+
+- Export the bus type for others to use.
+
+ Other code may wish to reference the bus type, so declare it in a
+ shared header file and export the symbol.
+
+From include/linux/pci.h:
+
+extern struct bus_type pci_bus_type;
+
+
+From file the above code appears in:
+
+EXPORT_SYMBOL(pci_bus_type);
+
+
+
+- This will cause the bus to show up in /sys/bus/pci/ with two
+ subdirectories: 'devices' and 'drivers'.
+
+# tree -d /sys/bus/pci/
+/sys/bus/pci/
+|-- devices
+`-- drivers
+
+
+
+Step 2: Registering Devices.
+
+struct device represents a single device. It mainly contains metadata
+describing the relationship the device has to other entities.
+
+
+- Embed a struct device in the bus-specific device type.
+
+
+struct pci_dev {
+ ...
+ struct device dev; /* Generic device interface */
+ ...
+};
+
+ It is recommended that the generic device not be the first item in
+ the struct to discourage programmers from doing mindless casts
+ between the object types. Instead macros, or inline functions,
+ should be created to convert from the generic object type.
+
+
+#define to_pci_dev(n) container_of(n, struct pci_dev, dev)
+
+or
+
+static inline struct pci_dev * to_pci_dev(struct kobject * kobj)
+{
+ return container_of(n, struct pci_dev, dev);
+}
+
+ This allows the compiler to verify type-safety of the operations
+ that are performed (which is Good).
+
+
+- Initialize the device on registration.
+
+ When devices are discovered or registered with the bus type, the
+ bus driver should initialize the generic device. The most important
+ things to initialize are the bus_id, parent, and bus fields.
+
+ The bus_id is an ASCII string that contains the device's address on
+ the bus. The format of this string is bus-specific. This is
+ necessary for representing devices in sysfs.
+
+ parent is the physical parent of the device. It is important that
+ the bus driver sets this field correctly.
+
+ The driver model maintains an ordered list of devices that it uses
+ for power management. This list must be in order to guarantee that
+ devices are shutdown before their physical parents, and vice versa.
+ The order of this list is determined by the parent of registered
+ devices.
+
+ Also, the location of the device's sysfs directory depends on a
+ device's parent. sysfs exports a directory structure that mirrors
+ the device hierarchy. Accurately setting the parent guarantees that
+ sysfs will accurately represent the hierarchy.
+
+ The device's bus field is a pointer to the bus type the device
+ belongs to. This should be set to the bus_type that was declared
+ and initialized before.
+
+ Optionally, the bus driver may set the device's name and release
+ fields.
+
+ The name field is an ASCII string describing the device, like
+
+ "ATI Technologies Inc Radeon QD"
+
+ The release field is a callback that the driver model core calls
+ when the device has been removed, and all references to it have
+ been released. More on this in a moment.
+
+
+- Register the device.
+
+ Once the generic device has been initialized, it can be registered
+ with the driver model core by doing:
+
+ device_register(&dev->dev);
+
+ It can later be unregistered by doing:
+
+ device_unregister(&dev->dev);
+
+ This should happen on buses that support hotpluggable devices.
+ If a bus driver unregisters a device, it should not immediately free
+ it. It should instead wait for the driver model core to call the
+ device's release method, then free the bus-specific object.
+ (There may be other code that is currently referencing the device
+ structure, and it would be rude to free the device while that is
+ happening).
+
+
+ When the device is registered, a directory in sysfs is created.
+ The PCI tree in sysfs looks like:
+
+/sys/devices/pci0/
+|-- 00:00.0
+|-- 00:01.0
+| `-- 01:00.0
+|-- 00:02.0
+| `-- 02:1f.0
+| `-- 03:00.0
+|-- 00:1e.0
+| `-- 04:04.0
+|-- 00:1f.0
+|-- 00:1f.1
+| |-- ide0
+| | |-- 0.0
+| | `-- 0.1
+| `-- ide1
+| `-- 1.0
+|-- 00:1f.2
+|-- 00:1f.3
+`-- 00:1f.5
+
+ Also, symlinks are created in the bus's 'devices' directory
+ that point to the device's directory in the physical hierarchy.
+
+/sys/bus/pci/devices/
+|-- 00:00.0 -> ../../../devices/pci0/00:00.0
+|-- 00:01.0 -> ../../../devices/pci0/00:01.0
+|-- 00:02.0 -> ../../../devices/pci0/00:02.0
+|-- 00:1e.0 -> ../../../devices/pci0/00:1e.0
+|-- 00:1f.0 -> ../../../devices/pci0/00:1f.0
+|-- 00:1f.1 -> ../../../devices/pci0/00:1f.1
+|-- 00:1f.2 -> ../../../devices/pci0/00:1f.2
+|-- 00:1f.3 -> ../../../devices/pci0/00:1f.3
+|-- 00:1f.5 -> ../../../devices/pci0/00:1f.5
+|-- 01:00.0 -> ../../../devices/pci0/00:01.0/01:00.0
+|-- 02:1f.0 -> ../../../devices/pci0/00:02.0/02:1f.0
+|-- 03:00.0 -> ../../../devices/pci0/00:02.0/02:1f.0/03:00.0
+`-- 04:04.0 -> ../../../devices/pci0/00:1e.0/04:04.0
+
+
+
+Step 3: Registering Drivers.
+
+struct device_driver is a simple driver structure that contains a set
+of operations that the driver model core may call.
+
+
+- Embed a struct device_driver in the bus-specific driver.
+
+ Just like with devices, do something like:
+
+struct pci_driver {
+ ...
+ struct device_driver driver;
+};
+
+
+- Initialize the generic driver structure.
+
+ When the driver registers with the bus (e.g. doing pci_register_driver()),
+ initialize the necessary fields of the driver: the name and bus
+ fields.
+
+
+- Register the driver.
+
+ After the generic driver has been initialized, call
+
+ driver_register(&drv->driver);
+
+ to register the driver with the core.
+
+ When the driver is unregistered from the bus, unregister it from the
+ core by doing:
+
+ driver_unregister(&drv->driver);
+
+ Note that this will block until all references to the driver have
+ gone away. Normally, there will not be any.
+
+
+- Sysfs representation.
+
+ Drivers are exported via sysfs in their bus's 'driver's directory.
+ For example:
+
+/sys/bus/pci/drivers/
+|-- 3c59x
+|-- Ensoniq AudioPCI
+|-- agpgart-amdk7
+|-- e100
+`-- serial
+
+
+Step 4: Define Generic Methods for Drivers.
+
+struct device_driver defines a set of operations that the driver model
+core calls. Most of these operations are probably similar to
+operations the bus already defines for drivers, but taking different
+parameters.
+
+It would be difficult and tedious to force every driver on a bus to
+simultaneously convert their drivers to generic format. Instead, the
+bus driver should define single instances of the generic methods that
+forward call to the bus-specific drivers. For instance:
+
+
+static int pci_device_remove(struct device * dev)
+{
+ struct pci_dev * pci_dev = to_pci_dev(dev);
+ struct pci_driver * drv = pci_dev->driver;
+
+ if (drv) {
+ if (drv->remove)
+ drv->remove(pci_dev);
+ pci_dev->driver = NULL;
+ }
+ return 0;
+}
+
+
+The generic driver should be initialized with these methods before it
+is registered.
+
+ /* initialize common driver fields */
+ drv->driver.name = drv->name;
+ drv->driver.bus = &pci_bus_type;
+ drv->driver.probe = pci_device_probe;
+ drv->driver.resume = pci_device_resume;
+ drv->driver.suspend = pci_device_suspend;
+ drv->driver.remove = pci_device_remove;
+
+ /* register with core */
+ driver_register(&drv->driver);
+
+
+Ideally, the bus should only initialize the fields if they are not
+already set. This allows the drivers to implement their own generic
+methods.
+
+
+Step 5: Support generic driver binding.
+
+The model assumes that a device or driver can be dynamically
+registered with the bus at any time. When registration happens,
+devices must be bound to a driver, or drivers must be bound to all
+devices that it supports.
+
+A driver typically contains a list of device IDs that it supports. The
+bus driver compares these IDs to the IDs of devices registered with it.
+The format of the device IDs, and the semantics for comparing them are
+bus-specific, so the generic model does attempt to generalize them.
+
+Instead, a bus may supply a method in struct bus_type that does the
+comparison:
+
+ int (*match)(struct device * dev, struct device_driver * drv);
+
+match should return '1' if the driver supports the device, and '0'
+otherwise.
+
+When a device is registered, the bus's list of drivers is iterated
+over. bus->match() is called for each one until a match is found.
+
+When a driver is registered, the bus's list of devices is iterated
+over. bus->match() is called for each device that is not already
+claimed by a driver.
+
+When a device is successfully bound to a driver, device->driver is
+set, the device is added to a per-driver list of devices, and a
+symlink is created in the driver's sysfs directory that points to the
+device's physical directory:
+
+/sys/bus/pci/drivers/
+|-- 3c59x
+| `-- 00:0b.0 -> ../../../../devices/pci0/00:0b.0
+|-- Ensoniq AudioPCI
+|-- agpgart-amdk7
+| `-- 00:00.0 -> ../../../../devices/pci0/00:00.0
+|-- e100
+| `-- 00:0c.0 -> ../../../../devices/pci0/00:0c.0
+`-- serial
+
+
+This driver binding should replace the existing driver binding
+mechanism the bus currently uses.
+
+
+Step 6: Supply a hotplug callback.
+
+Whenever a device is registered with the driver model core, the
+userspace program /sbin/hotplug is called to notify userspace.
+Users can define actions to perform when a device is inserted or
+removed.
+
+The driver model core passes several arguments to userspace via
+environment variables, including
+
+- ACTION: set to 'add' or 'remove'
+- DEVPATH: set to the device's physical path in sysfs.
+
+A bus driver may also supply additional parameters for userspace to
+consume. To do this, a bus must implement the 'hotplug' method in
+struct bus_type:
+
+ int (*hotplug) (struct device *dev, char **envp,
+ int num_envp, char *buffer, int buffer_size);
+
+This is called immediately before /sbin/hotplug is executed.
+
+
+Step 7: Cleaning up the bus driver.
+
+The generic bus, device, and driver structures provide several fields
+that can replace those defined privately to the bus driver.
+
+- Device list.
+
+struct bus_type contains a list of all devices registered with the bus
+type. This includes all devices on all instances of that bus type.
+An internal list that the bus uses may be removed, in favor of using
+this one.
+
+The core provides an iterator to access these devices.
+
+int bus_for_each_dev(struct bus_type * bus, struct device * start,
+ void * data, int (*fn)(struct device *, void *));
+
+
+- Driver list.
+
+struct bus_type also contains a list of all drivers registered with
+it. An internal list of drivers that the bus driver maintains may
+be removed in favor of using the generic one.
+
+The drivers may be iterated over, like devices:
+
+int bus_for_each_drv(struct bus_type * bus, struct device_driver * start,
+ void * data, int (*fn)(struct device_driver *, void *));
+
+
+Please see drivers/base/bus.c for more information.
+
+
+- rwsem
+
+struct bus_type contains an rwsem that protects all core accesses to
+the device and driver lists. This can be used by the bus driver
+internally, and should be used when accessing the device or driver
+lists the bus maintains.
+
+
+- Device and driver fields.
+
+Some of the fields in struct device and struct device_driver duplicate
+fields in the bus-specific representations of these objects. Feel free
+to remove the bus-specific ones and favor the generic ones. Note
+though, that this will likely mean fixing up all the drivers that
+reference the bus-specific fields (though those should all be 1-line
+changes).
+
diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb
new file mode 100644
index 000000000..8eb92264e
--- /dev/null
+++ b/Documentation/dvb/README.dvb-usb
@@ -0,0 +1,232 @@
+Documentation for dvb-usb-framework module and its devices
+
+Idea behind the dvb-usb-framework
+=================================
+
+In March 2005 I got the new Twinhan USB2.0 DVB-T device. They provided specs and a firmware.
+
+Quite keen I wanted to put the driver (with some quirks of course) into dibusb.
+After reading some specs and doing some USB snooping, it realized, that the
+dibusb-driver would be a complete mess afterwards. So I decided to do it in a
+different way: With the help of a dvb-usb-framework.
+
+The framework provides generic functions (mostly kernel API calls), such as:
+
+- Transport Stream URB handling in conjunction with dvb-demux-feed-control
+ (bulk and isoc are supported)
+- registering the device for the DVB-API
+- registering an I2C-adapter if applicable
+- remote-control/input-device handling
+- firmware requesting and loading (currently just for the Cypress USB
+ controllers)
+- other functions/methods which can be shared by several drivers (such as
+ functions for bulk-control-commands)
+- TODO: a I2C-chunker. It creates device-specific chunks of register-accesses
+ depending on length of a register and the number of values that can be
+ multi-written and multi-read.
+
+The source code of the particular DVB USB devices does just the communication
+with the device via the bus. The connection between the DVB-API-functionality
+is done via callbacks, assigned in a static device-description (struct
+dvb_usb_device) each device-driver has to have.
+
+For an example have a look in drivers/media/usb/dvb-usb/vp7045*.
+
+Objective is to migrate all the usb-devices (dibusb, cinergyT2, maybe the
+ttusb; flexcop-usb already benefits from the generic flexcop-device) to use
+the dvb-usb-lib.
+
+TODO: dynamic enabling and disabling of the pid-filter in regard to number of
+feeds requested.
+
+Supported devices
+========================
+
+See the LinuxTV DVB Wiki at www.linuxtv.org for a complete list of
+cards/drivers/firmwares:
+
+http://www.linuxtv.org/wiki/index.php/DVB_USB
+
+0. History & News:
+ 2005-06-30 - added support for WideView WT-220U (Thanks to Steve Chang)
+ 2005-05-30 - added basic isochronous support to the dvb-usb-framework
+ added support for Conexant Hybrid reference design and Nebula DigiTV USB
+ 2005-04-17 - all dibusb devices ported to make use of the dvb-usb-framework
+ 2005-04-02 - re-enabled and improved remote control code.
+ 2005-03-31 - ported the Yakumo/Hama/Typhoon DVB-T USB2.0 device to dvb-usb.
+ 2005-03-30 - first commit of the dvb-usb-module based on the dibusb-source. First device is a new driver for the
+ TwinhanDTV Alpha / MagicBox II USB2.0-only DVB-T device.
+
+ (change from dvb-dibusb to dvb-usb)
+ 2005-03-28 - added support for the AVerMedia AverTV DVB-T USB2.0 device (Thanks to Glen Harris and Jiun-Kuei Jung, AVerMedia)
+ 2005-03-14 - added support for the Typhoon/Yakumo/HAMA DVB-T mobile USB2.0
+ 2005-02-11 - added support for the KWorld/ADSTech Instant DVB-T USB2.0. Thanks a lot to Joachim von Caron
+ 2005-02-02 - added support for the Hauppauge Win-TV Nova-T USB2
+ 2005-01-31 - distorted streaming is gone for USB1.1 devices
+ 2005-01-13 - moved the mirrored pid_filter_table back to dvb-dibusb
+ - first almost working version for HanfTek UMT-010
+ - found out, that Yakumo/HAMA/Typhoon are predecessors of the HanfTek UMT-010
+ 2005-01-10 - refactoring completed, now everything is very delightful
+ - tuner quirks for some weird devices (Artec T1 AN2235 device has sometimes a
+ Panasonic Tuner assembled). Tunerprobing implemented. Thanks a lot to Gunnar Wittich.
+ 2004-12-29 - after several days of struggling around bug of no returning URBs fixed.
+ 2004-12-26 - refactored the dibusb-driver, splitted into separate files
+ - i2c-probing enabled
+ 2004-12-06 - possibility for demod i2c-address probing
+ - new usb IDs (Compro, Artec)
+ 2004-11-23 - merged changes from DiB3000MC_ver2.1
+ - revised the debugging
+ - possibility to deliver the complete TS for USB2.0
+ 2004-11-21 - first working version of the dib3000mc/p frontend driver.
+ 2004-11-12 - added additional remote control keys. Thanks to Uwe Hanke.
+ 2004-11-07 - added remote control support. Thanks to David Matthews.
+ 2004-11-05 - added support for a new devices (Grandtec/Avermedia/Artec)
+ - merged my changes (for dib3000mb/dibusb) to the FE_REFACTORING, because it became HEAD
+ - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems
+ better settled there (added xfer_ops-struct)
+ - created a common files for frontends (mc/p/mb)
+ 2004-09-28 - added support for a new device (Unknown, vendor ID is Hyper-Paltek)
+ 2004-09-20 - added support for a new device (Compro DVB-U2000), thanks
+ to Amaury Demol for reporting
+ - changed usb TS transfer method (several urbs, stopping transfer
+ before setting a new pid)
+ 2004-09-13 - added support for a new device (Artec T1 USB TVBOX), thanks
+ to Christian Motschke for reporting
+ 2004-09-05 - released the dibusb device and dib3000mb-frontend driver
+
+ (old news for vp7041.c)
+ 2004-07-15 - found out, by accident, that the device has a TUA6010XS for
+ PLL
+ 2004-07-12 - figured out, that the driver should also work with the
+ CTS Portable (Chinese Television System)
+ 2004-07-08 - firmware-extraction-2.422-problem solved, driver is now working
+ properly with firmware extracted from 2.422
+ - #if for 2.6.4 (dvb), compile issue
+ - changed firmware handling, see vp7041.txt sec 1.1
+ 2004-07-02 - some tuner modifications, v0.1, cleanups, first public
+ 2004-06-28 - now using the dvb_dmx_swfilter_packets, everything
+ runs fine now
+ 2004-06-27 - able to watch and switching channels (pre-alpha)
+ - no section filtering yet
+ 2004-06-06 - first TS received, but kernel oops :/
+ 2004-05-14 - firmware loader is working
+ 2004-05-11 - start writing the driver
+
+1. How to use?
+1.1. Firmware
+
+Most of the USB drivers need to download a firmware to the device before start
+working.
+
+Have a look at the Wikipage for the DVB-USB-drivers to find out, which firmware
+you need for your device:
+
+http://www.linuxtv.org/wiki/index.php/DVB_USB
+
+1.2. Compiling
+
+Since the driver is in the linux kernel, activating the driver in
+your favorite config-environment should sufficient. I recommend
+to compile the driver as module. Hotplug does the rest.
+
+If you use dvb-kernel enter the build-2.6 directory run 'make' and 'insmod.sh
+load' afterwards.
+
+1.3. Loading the drivers
+
+Hotplug is able to load the driver, when it is needed (because you plugged
+in the device).
+
+If you want to enable debug output, you have to load the driver manually and
+from within the dvb-kernel cvs repository.
+
+first have a look, which debug level are available:
+
+modinfo dvb-usb
+modinfo dvb-usb-vp7045
+etc.
+
+modprobe dvb-usb debug=<level>
+modprobe dvb-usb-vp7045 debug=<level>
+etc.
+
+should do the trick.
+
+When the driver is loaded successfully, the firmware file was in
+the right place and the device is connected, the "Power"-LED should be
+turned on.
+
+At this point you should be able to start a dvb-capable application. I'm use
+(t|s)zap, mplayer and dvbscan to test the basics. VDR-xine provides the
+long-term test scenario.
+
+2. Known problems and bugs
+
+- Don't remove the USB device while running an DVB application, your system
+ will go crazy or die most likely.
+
+2.1. Adding support for devices
+
+TODO
+
+2.2. USB1.1 Bandwidth limitation
+
+A lot of the currently supported devices are USB1.1 and thus they have a
+maximum bandwidth of about 5-6 MBit/s when connected to a USB2.0 hub.
+This is not enough for receiving the complete transport stream of a
+DVB-T channel (which is about 16 MBit/s). Normally this is not a
+problem, if you only want to watch TV (this does not apply for HDTV),
+but watching a channel while recording another channel on the same
+frequency simply does not work very well. This applies to all USB1.1
+DVB-T devices, not just the dvb-usb-devices)
+
+The bug, where the TS is distorted by a heavy usage of the device is gone
+definitely. All dvb-usb-devices I was using (Twinhan, Kworld, DiBcom) are
+working like charm now with VDR. Sometimes I even was able to record a channel
+and watch another one.
+
+2.3. Comments
+
+Patches, comments and suggestions are very very welcome.
+
+3. Acknowledgements
+ Amaury Demol (ademol@dibcom.fr) and Francois Kanounnikoff from DiBcom for
+ providing specs, code and help, on which the dvb-dibusb, dib3000mb and
+ dib3000mc are based.
+
+ David Matthews for identifying a new device type (Artec T1 with AN2235)
+ and for extending dibusb with remote control event handling. Thank you.
+
+ Alex Woods for frequently answering question about usb and dvb
+ stuff, a big thank you.
+
+ Bernd Wagner for helping with huge bug reports and discussions.
+
+ Gunnar Wittich and Joachim von Caron for their trust for providing
+ root-shells on their machines to implement support for new devices.
+
+ Allan Third and Michael Hutchinson for their help to write the Nebula
+ digitv-driver.
+
+ Glen Harris for bringing up, that there is a new dibusb-device and Jiun-Kuei
+ Jung from AVerMedia who kindly provided a special firmware to get the device
+ up and running in Linux.
+
+ Jennifer Chen, Jeff and Jack from Twinhan for kindly supporting by
+ writing the vp7045-driver.
+
+ Steve Chang from WideView for providing information for new devices and
+ firmware files.
+
+ Michael Paxton for submitting remote control keymaps.
+
+ Some guys on the linux-dvb mailing list for encouraging me.
+
+ Peter Schildmann >peter.schildmann-nospam-at-web.de< for his
+ user-level firmware loader, which saves a lot of time
+ (when writing the vp7041 driver)
+
+ Ulf Hermenau for helping me out with traditional chinese.
+
+ André Smoktun and Christian Frömmel for supporting me with
+ hardware and listening to my problems very patiently.
diff --git a/Documentation/dvb/avermedia.txt b/Documentation/dvb/avermedia.txt
new file mode 100644
index 000000000..289e88f85
--- /dev/null
+++ b/Documentation/dvb/avermedia.txt
@@ -0,0 +1,299 @@
+HOWTO: Get An Avermedia DVB-T working under Linux
+ ______________________________________________
+
+ Table of Contents
+ Assumptions and Introduction
+ The Avermedia DVB-T
+ Getting the card going
+ Receiving DVB-T in Australia
+ Known Limitations
+ Further Update
+
+Assumptions and Introduction
+
+ It is assumed that the reader understands the basic structure
+ of the Linux Kernel DVB drivers and the general principles of
+ Digital TV.
+
+ One significant difference between Digital TV and Analogue TV
+ that the unwary (like myself) should consider is that,
+ although the component structure of budget DVB-T cards are
+ substantially similar to Analogue TV cards, they function in
+ substantially different ways.
+
+ The purpose of an Analogue TV is to receive and display an
+ Analogue Television signal. An Analogue TV signal (otherwise
+ known as composite video) is an analogue encoding of a
+ sequence of image frames (25 per second) rasterised using an
+ interlacing technique. Interlacing takes two fields to
+ represent one frame. Computers today are at their best when
+ dealing with digital signals, not analogue signals and a
+ composite video signal is about as far removed from a digital
+ data stream as you can get. Therefore, an Analogue TV card for
+ a PC has the following purpose:
+
+ * Tune the receiver to receive a broadcast signal
+ * demodulate the broadcast signal
+ * demultiplex the analogue video signal and analogue audio
+ signal (note some countries employ a digital audio signal
+ embedded within the modulated composite analogue signal -
+ NICAM.)
+ * digitize the analogue video signal and make the resulting
+ datastream available to the data bus.
+
+ The digital datastream from an Analogue TV card is generated
+ by circuitry on the card and is often presented uncompressed.
+ For a PAL TV signal encoded at a resolution of 768x576 24-bit
+ color pixels over 25 frames per second - a fair amount of data
+ is generated and must be processed by the PC before it can be
+ displayed on the video monitor screen. Some Analogue TV cards
+ for PCs have onboard MPEG2 encoders which permit the raw
+ digital data stream to be presented to the PC in an encoded
+ and compressed form - similar to the form that is used in
+ Digital TV.
+
+ The purpose of a simple budget digital TV card (DVB-T,C or S)
+ is to simply:
+
+ * Tune the received to receive a broadcast signal.
+ * Extract the encoded digital datastream from the broadcast
+ signal.
+ * Make the encoded digital datastream (MPEG2) available to
+ the data bus.
+
+ The significant difference between the two is that the tuner
+ on the analogue TV card spits out an Analogue signal, whereas
+ the tuner on the digital TV card spits out a compressed
+ encoded digital datastream. As the signal is already
+ digitised, it is trivial to pass this datastream to the PC
+ databus with minimal additional processing and then extract
+ the digital video and audio datastreams passing them to the
+ appropriate software or hardware for decoding and viewing.
+ _________________________________________________________
+
+The Avermedia DVB-T
+
+ The Avermedia DVB-T is a budget PCI DVB card. It has 3 inputs:
+
+ * RF Tuner Input
+ * Composite Video Input (RCA Jack)
+ * SVIDEO Input (Mini-DIN)
+
+ The RF Tuner Input is the input to the tuner module of the
+ card. The Tuner is otherwise known as the "Frontend" . The
+ Frontend of the Avermedia DVB-T is a Microtune 7202D. A timely
+ post to the linux-dvb mailing list ascertained that the
+ Microtune 7202D is supported by the sp887x driver which is
+ found in the dvb-hw CVS module.
+
+ The DVB-T card is based around the BT878 chip which is a very
+ common multimedia bridge and often found on Analogue TV cards.
+ There is no on-board MPEG2 decoder, which means that all MPEG2
+ decoding must be done in software, or if you have one, on an
+ MPEG2 hardware decoding card or chipset.
+ _________________________________________________________
+
+Getting the card going
+
+ In order to fire up the card, it is necessary to load a number
+ of modules from the DVB driver set. Prior to this it will have
+ been necessary to download these drivers from the linuxtv CVS
+ server and compile them successfully.
+
+ Depending on the card's feature set, the Device Driver API for
+ DVB under Linux will expose some of the following device files
+ in the /dev tree:
+
+ * /dev/dvb/adapter0/audio0
+ * /dev/dvb/adapter0/ca0
+ * /dev/dvb/adapter0/demux0
+ * /dev/dvb/adapter0/dvr0
+ * /dev/dvb/adapter0/frontend0
+ * /dev/dvb/adapter0/net0
+ * /dev/dvb/adapter0/osd0
+ * /dev/dvb/adapter0/video0
+
+ The primary device nodes that we are interested in (at this
+ stage) for the Avermedia DVB-T are:
+
+ * /dev/dvb/adapter0/dvr0
+ * /dev/dvb/adapter0/frontend0
+
+ The dvr0 device node is used to read the MPEG2 Data Stream and
+ the frontend0 node is used to tune the frontend tuner module.
+
+ At this stage, it has not been able to ascertain the
+ functionality of the remaining device nodes in respect of the
+ Avermedia DVBT. However, full functionality in respect of
+ tuning, receiving and supplying the MPEG2 data stream is
+ possible with the currently available versions of the driver.
+ It may be possible that additional functionality is available
+ from the card (i.e. viewing the additional analogue inputs
+ that the card presents), but this has not been tested yet. If
+ I get around to this, I'll update the document with whatever I
+ find.
+
+ To power up the card, load the following modules in the
+ following order:
+
+ * modprobe bttv (normally loaded automatically)
+ * modprobe dvb-bt8xx (or place dvb-bt8xx in /etc/modules)
+
+ Insertion of these modules into the running kernel will
+ activate the appropriate DVB device nodes. It is then possible
+ to start accessing the card with utilities such as scan, tzap,
+ dvbstream etc.
+
+ The frontend module sp887x.o, requires an external firmware.
+ /*(DEBLOBBED)*/
+
+Receiving DVB-T in Australia
+
+ I have no experience of DVB-T in other countries other than
+ Australia, so I will attempt to explain how it works here in
+ Melbourne and how this affects the configuration of the DVB-T
+ card.
+
+ The Digital Broadcasting Australia website has a Reception
+ locatortool which provides information on transponder channels
+ and frequencies. My local transmitter happens to be Mount
+ Dandenong.
+
+ The frequencies broadcast by Mount Dandenong are:
+
+ Table 1. Transponder Frequencies Mount Dandenong, Vic, Aus.
+ Broadcaster Channel Frequency
+ ABC VHF 12 226.5 MHz
+ TEN VHF 11 219.5 MHz
+ NINE VHF 8 191.625 MHz
+ SEVEN VHF 6 177.5 MHz
+ SBS UHF 29 536.5 MHz
+
+ The Scan utility has a set of compiled-in defaults for various
+ countries and regions, but if they do not suit, or if you have
+ a pre-compiled scan binary, you can specify a data file on the
+ command line which contains the transponder frequencies. Here
+ is a sample file for the above channel transponders:
+# Data file for DVB scan program
+#
+# C Frequency SymbolRate FEC QAM
+# S Frequency Polarisation SymbolRate FEC
+# T Frequency Bandwidth FEC FEC2 QAM Mode Guard Hier
+T 226500000 7MHz 2/3 NONE QAM64 8k 1/8 NONE
+T 191625000 7MHz 2/3 NONE QAM64 8k 1/8 NONE
+T 219500000 7MHz 2/3 NONE QAM64 8k 1/8 NONE
+T 177500000 7MHz 2/3 NONE QAM64 8k 1/8 NONE
+T 536500000 7MHz 2/3 NONE QAM64 8k 1/8 NONE
+
+ The defaults for the transponder frequency and other
+ modulation parameters were obtained from www.dba.org.au.
+
+ When Scan runs, it will output channels.conf information for
+ any channel's transponders which the card's frontend can lock
+ onto. (i.e. any whose signal is strong enough at your
+ antenna).
+
+ Here's my channels.conf file for anyone who's interested:
+ABC HDTV:226500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_3_4:QAM_64
+:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:2307:0:560
+ABC TV Melbourne:226500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_3_
+4:QAM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:65
+0:561
+ABC TV 2:226500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_3_4:QAM_64
+:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:562
+ABC TV 3:226500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_3_4:QAM_64
+:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:563
+ABC TV 4:226500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_3_4:QAM_64
+:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:564
+ABC DiG Radio:226500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_3_4:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:0:2311:56
+6
+TEN Digital:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:158
+5
+TEN Digital 1:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:1
+586
+TEN Digital 2:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:1
+587
+TEN Digital 3:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:1
+588
+TEN Digital:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:158
+9
+TEN Digital 4:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:1
+590
+TEN Digital:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:159
+1
+TEN HD:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QAM_64:T
+RANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:514:0:1592
+TEN Digital:219500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:650:159
+3
+Nine Digital:191625000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QA
+M_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:513:660:10
+72
+Nine Digital HD:191625000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2
+:QAM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:512:0:1
+073
+Nine Guide:191625000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_3_4:FEC_1_2:QAM_
+64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_16:HIERARCHY_NONE:514:670:1074
+7 Digital:177500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM_6
+4:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:769:770:1328
+7 Digital 1:177500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:769:770:1329
+7 Digital 2:177500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:769:770:1330
+7 Digital 3:177500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:769:770:1331
+7 HD Digital:177500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QA
+M_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:833:834:133
+2
+7 Program Guide:177500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3
+:QAM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:865:866:
+1334
+SBS HD:536500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM_64:T
+RANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:102:103:784
+SBS DIGITAL 1:536500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:161:81:785
+SBS DIGITAL 2:536500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:Q
+AM_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:162:83:786
+SBS EPG:536500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM_64:
+TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:163:85:787
+SBS RADIO 1:536500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:0:201:798
+SBS RADIO 2:536500000:INVERSION_OFF:BANDWIDTH_7_MHZ:FEC_2_3:FEC_2_3:QAM
+_64:TRANSMISSION_MODE_8K:GUARD_INTERVAL_1_8:HIERARCHY_NONE:0:202:799
+ _________________________________________________________
+
+Known Limitations
+
+ At present I can say with confidence that the frontend tunes
+ via /dev/dvb/adapter{x}/frontend0 and supplies an MPEG2 stream
+ via /dev/dvb/adapter{x}/dvr0. I have not tested the
+ functionality of any other part of the card yet. I will do so
+ over time and update this document.
+
+ There are some limitations in the i2c layer due to a returned
+ error message inconsistency. Although this generates errors in
+ dmesg and the system logs, it does not appear to affect the
+ ability of the frontend to function correctly.
+ _________________________________________________________
+
+Further Update
+
+ dvbstream and VideoLAN Client on windows works a treat with
+ DVB, in fact this is currently serving as my main way of
+ viewing DVB-T at the moment. Additionally, VLC is happily
+ decoding HDTV signals, although the PC is dropping the odd
+ frame here and there - I assume due to processing capability -
+ as all the decoding is being done under windows in software.
+
+ Many thanks to Nigel Pearson for the updates to this document
+ since the recent revision of the driver.
+
+ February 14th 2006
diff --git a/Documentation/dvb/bt8xx.txt b/Documentation/dvb/bt8xx.txt
new file mode 100644
index 000000000..b7b1d1b1d
--- /dev/null
+++ b/Documentation/dvb/bt8xx.txt
@@ -0,0 +1,98 @@
+How to get the bt8xx cards working
+==================================
+
+1) General information
+======================
+
+This class of cards has a bt878a as the PCI interface, and require the bttv driver
+for accessing the i2c bus and the gpio pins of the bt8xx chipset.
+Please see Documentation/dvb/cards.txt => o Cards based on the Conexant Bt8xx PCI bridge:
+
+Compiling kernel please enable:
+a.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "Enable Video for Linux API 1 (DEPRECATED)"
+b.)"Device drivers" => "Multimedia devices" => "Video For Linux" => "Video Capture Adapters" => "BT848 Video For Linux"
+c.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices" => "DVB for Linux" "DVB Core Support" "Bt8xx based PCI Cards"
+
+Please use the following options with care as deselection of drivers which are in fact necessary
+may result in DVB devices that cannot be tuned due to lack of driver support:
+You can save RAM by deselecting every frontend module that your DVB card does not need.
+
+First please remove the static dependency of DVB card drivers on all frontend modules for all possible card variants by enabling:
+d.) "Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
+ => "DVB for Linux" "DVB Core Support" "Load and attach frontend modules as needed"
+
+If you know the frontend driver that your card needs please enable:
+e.)"Device drivers" => "Multimedia devices" => "Digital Video Broadcasting Devices"
+ => "DVB for Linux" "DVB Core Support" "Customise DVB Frontends" => "Customise the frontend modules to build"
+ Then please select your card-specific frontend module.
+
+2) Loading Modules
+==================
+
+Regular case: If the bttv driver detects a bt8xx-based DVB card, all frontend and backend modules will be loaded automatically.
+Exceptions are:
+- Old TwinHan DST cards or clones with or without CA slot and not containing an Eeprom.
+People running udev please see Documentation/dvb/udev.txt.
+
+In the following cases overriding the PCI type detection for dvb-bt8xx might be necessary:
+
+2a) Running TwinHan and Clones
+------------------------------
+
+ $ modprobe bttv card=113
+ $ modprobe dst
+
+Useful parameters for verbosity level and debugging the dst module:
+
+verbose=0: messages are disabled
+ 1: only error messages are displayed
+ 2: notifications are displayed
+ 3: other useful messages are displayed
+ 4: debug setting
+dst_addons=0: card is a free to air (FTA) card only
+ 0x20: card has a conditional access slot for scrambled channels
+
+The autodetected values are determined by the cards' "response string".
+In your logs see f. ex.: dst_get_device_id: Recognize [DSTMCI].
+For bug reports please send in a complete log with verbose=4 activated.
+Please also see Documentation/dvb/ci.txt.
+
+2b) Running multiple cards
+--------------------------
+
+Examples of card ID's:
+
+Pinnacle PCTV Sat: 94
+Nebula Electronics Digi TV: 104
+pcHDTV HD-2000 TV: 112
+Twinhan DST and clones: 113
+Avermedia AverTV DVB-T 771: 123
+Avermedia AverTV DVB-T 761: 124
+DViCO FusionHDTV DVB-T Lite: 128
+DViCO FusionHDTV 5 Lite: 135
+
+Notice: The order of the card ID should be uprising:
+Example:
+ $ modprobe bttv card=113 card=135
+
+For a full list of card ID's please see Documentation/video4linux/CARDLIST.bttv.
+In case of further problems please subscribe and send questions to the mailing list: linux-dvb@linuxtv.org.
+
+2c) Probing the cards with broken PCI subsystem ID
+--------------------------------------------------
+There are some TwinHan cards that the EEPROM has become corrupted for some
+reason. The cards do not have correct PCI subsystem ID. But we can force
+probing the cards with broken PCI subsystem ID
+
+ $ echo 109e 0878 $subvendor $subdevice > \
+ /sys/bus/pci/drivers/bt878/new_id
+
+109e: PCI_VENDOR_ID_BROOKTREE
+0878: PCI_DEVICE_ID_BROOKTREE_878
+
+Authors: Richard Walker,
+ Jamie Honan,
+ Michael Hunold,
+ Manu Abraham,
+ Uwe Bugla,
+ Michael Krufky
diff --git a/Documentation/dvb/cards.txt b/Documentation/dvb/cards.txt
new file mode 100644
index 000000000..97709e9a3
--- /dev/null
+++ b/Documentation/dvb/cards.txt
@@ -0,0 +1,123 @@
+Hardware supported by the linuxtv.org DVB drivers
+=================================================
+
+ Generally, the DVB hardware manufacturers frequently change the
+ frontends (i.e. tuner / demodulator units) used, usually without
+ changing the product name, revision number or specs. Some cards
+ are also available in versions with different frontends for
+ DVB-S/DVB-C/DVB-T. Thus the frontend drivers are listed separately.
+
+ Note 1: There is no guarantee that every frontend driver works
+ out of the box with every card, because of different wiring.
+
+ Note 2: The demodulator chips can be used with a variety of
+ tuner/PLL chips, and not all combinations are supported. Often
+ the demodulator and tuner/PLL chip are inside a metal box for
+ shielding, and the whole metal box has its own part number.
+
+
+o Frontends drivers:
+ - dvb_dummy_fe: for testing...
+ DVB-S:
+ - ves1x93 : Alps BSRV2 (ves1893 demodulator) and dbox2 (ves1993)
+ - cx24110 : Conexant HM1221/HM1811 (cx24110 or cx24106 demod, cx24108 PLL)
+ - grundig_29504-491 : Grundig 29504-491 (Philips TDA8083 demodulator), tsa5522 PLL
+ - mt312 : Zarlink mt312 or Mitel vp310 demodulator, sl1935 or tsa5059 PLLi, Technisat Sky2Pc with bios Rev. 2.3
+ - stv0299 : Alps BSRU6 (tsa5059 PLL), LG TDQB-S00x (tsa5059 PLL),
+ LG TDQF-S001F (sl1935 PLL), Philips SU1278 (tua6100 PLL),
+ Philips SU1278SH (tsa5059 PLL), Samsung TBMU24112IMB, Technisat Sky2Pc with bios Rev. 2.6
+ DVB-C:
+ - ves1820 : various (ves1820 demodulator, sp5659c or spXXXX PLL)
+ - at76c651 : Atmel AT76c651(B) with DAT7021 PLL
+ DVB-T:
+ - alps_tdlb7 : Alps TDLB7 (sp8870 demodulator, sp5659 PLL)
+ - alps_tdmb7 : Alps TDMB7 (cx22700 demodulator)
+ - grundig_29504-401 : Grundig 29504-401 (LSI L64781 demodulator), tsa5060 PLL
+ - tda1004x : Philips tda10045h (td1344 or tdm1316l PLL)
+ - nxt6000 : Alps TDME7 (MITEL SP5659 PLL), Alps TDED4 (TI ALP510 PLL),
+ Comtech DVBT-6k07 (SP5730 PLL)
+ (NxtWave Communications NXT6000 demodulator)
+ - sp887x : Microtune 7202D
+ - dib3000mb : DiBcom 3000-MB demodulator
+ DVB-S/C/T:
+ - dst : TwinHan DST Frontend
+ ATSC:
+ - nxt200x : Nxtwave NXT2002 & NXT2004
+ - or51211 : or51211 based (pcHDTV HD2000 card)
+ - or51132 : or51132 based (pcHDTV HD3000 card)
+ - bcm3510 : Broadcom BCM3510
+ - lgdt330x : LG Electronics DT3302 & DT3303
+
+
+o Cards based on the Phillips saa7146 multimedia PCI bridge chip:
+ - TI AV7110 based cards (i.e. with hardware MPEG decoder):
+ - Siemens/Technotrend/Hauppauge PCI DVB card revision 1.1, 1.3, 1.5, 1.6, 2.1
+ (aka Hauppauge Nexus)
+ - "budget" cards (i.e. without hardware MPEG decoder):
+ - Technotrend Budget / Hauppauge WinTV-Nova PCI Cards
+ - SATELCO Multimedia PCI
+ - KNC1 DVB-S, Typhoon DVB-S, Terratec Cinergy 1200 DVB-S (no CI support)
+ - Typhoon DVB-S budget
+ - Fujitsu-Siemens Activy DVB-S budget card
+
+o Cards based on the B2C2 Inc. FlexCopII/IIb/III:
+ - Technisat SkyStar2 PCI DVB card revision 2.3, 2.6B, 2.6C
+
+o Cards based on the Conexant Bt8xx PCI bridge:
+ - Pinnacle PCTV Sat DVB
+ - Nebula Electronics DigiTV
+ - TwinHan DST
+ - Avermedia DVB-T
+ - ChainTech digitop DST-1000 DVB-S
+ - pcHDTV HD-2000 TV
+ - DViCO FusionHDTV DVB-T Lite
+ - DViCO FusionHDTV5 Lite
+
+o Technotrend / Hauppauge DVB USB devices:
+ - Nova USB
+ - DEC 2000-T, 3000-S, 2540-T
+
+o DiBcom DVB-T USB based devices:
+ - Twinhan VisionPlus VisionDTV USB-Ter DVB-T Device
+ - HAMA DVB-T USB device
+ - CTS Portable (Chinese Television System)
+ - KWorld V-Stream XPERT DTV DVB-T USB
+ - JetWay DTV DVB-T USB
+ - ADSTech Instant TV DVB-T USB
+ - Ultima Electronic/Artec T1 USB TVBOX (AN2135 and AN2235)
+ - Compro Videomate DVB-U2000 - DVB-T USB
+ - Grandtec USB DVB-T
+ - Avermedia AverTV DVBT USB
+ - DiBcom USB DVB-T reference device (non-public)
+ - Yakumo DVB-T mobile USB2.0
+ - DiBcom USB2.0 DVB-T reference device (non-public)
+
+o Experimental support for the analog module of the Siemens DVB-C PCI card
+
+o Cards based on the Conexant cx2388x PCI bridge:
+ - ADS Tech Instant TV DVB-T PCI
+ - ATI HDTV Wonder
+ - digitalnow DNTV Live! DVB-T
+ - DViCO FusionHDTV DVB-T1
+ - DViCO FusionHDTV DVB-T Plus
+ - DViCO FusionHDTV3 Gold-Q
+ - DViCO FusionHDTV3 Gold-T
+ - DViCO FusionHDTV5 Gold
+ - Hauppauge Nova-T DVB-T
+ - KWorld/VStream XPert DVB-T
+ - pcHDTV HD3000 HDTV
+ - TerraTec Cinergy 1400 DVB-T
+ - WinFast DTV1000-T
+
+o Cards based on the Phillips saa7134 PCI bridge:
+ - Medion 7134
+ - Pinnacle PCTV 300i DVB-T + PAL
+ - LifeView FlyDVB-T DUO
+ - Typhoon DVB-T Duo Digital/Analog Cardbus
+ - Philips TOUGH DVB-T reference design
+ - Philips EUROPA V3 reference design
+ - Compro Videomate DVB-T300
+ - Compro Videomate DVB-T200
+ - AVerMedia AVerTVHD MCE A180
+ - KWorld PC150-U ATSC Hybrid
+
diff --git a/Documentation/dvb/ci.txt b/Documentation/dvb/ci.txt
new file mode 100644
index 000000000..6c3bda50f
--- /dev/null
+++ b/Documentation/dvb/ci.txt
@@ -0,0 +1,212 @@
+* For the user
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+NOTE: This document describes the usage of the high level CI API as
+in accordance to the Linux DVB API. This is a not a documentation for the,
+existing low level CI API.
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To utilize the High Level CI capabilities,
+
+(1*) This point is valid only for the Twinhan/clones
+ For the Twinhan/Twinhan clones, the dst_ca module handles the CI
+ hardware handling.This module is loaded automatically if a CI
+ (Common Interface, that holds the CAM (Conditional Access Module)
+ is detected.
+
+(2) one requires a userspace application, ca_zap. This small userland
+ application is in charge of sending the descrambling related information
+ to the CAM.
+
+This application requires the following to function properly as of now.
+
+ (a) Tune to a valid channel, with szap.
+ eg: $ szap -c channels.conf -r "TMC" -x
+
+ (b) a channels.conf containing a valid PMT PID
+ eg: TMC:11996:h:0:27500:278:512:650:321
+
+ here 278 is a valid PMT PID. the rest of the values are the
+ same ones that szap uses.
+
+ (c) after running a szap, you have to run ca_zap, for the
+ descrambler to function,
+ eg: $ ca_zap channels.conf "TMC"
+
+ (d) Hopefully enjoy your favourite subscribed channel as you do with
+ a FTA card.
+
+(3) Currently ca_zap, and dst_test, both are meant for demonstration
+ purposes only, they can become full fledged applications if necessary.
+
+
+* Cards that fall in this category
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+At present the cards that fall in this category are the Twinhan and its
+clones, these cards are available as VVMER, Tomato, Hercules, Orange and
+so on.
+
+* CI modules that are supported
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The CI module support is largely dependent upon the firmware on the cards
+Some cards do support almost all of the available CI modules. There is
+nothing much that can be done in order to make additional CI modules
+working with these cards.
+
+Modules that have been tested by this driver at present are
+
+(1) Irdeto 1 and 2 from SCM
+(2) Viaccess from SCM
+(3) Dragoncam
+
+* The High level CI API
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+* For the programmer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+With the High Level CI approach any new card with almost any random
+architecture can be implemented with this style, the definitions
+inside the switch statement can be easily adapted for any card, thereby
+eliminating the need for any additional ioctls.
+
+The disadvantage is that the driver/hardware has to manage the rest. For
+the application programmer it would be as simple as sending/receiving an
+array to/from the CI ioctls as defined in the Linux DVB API. No changes
+have been made in the API to accommodate this feature.
+
+
+* Why the need for another CI interface ?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This is one of the most commonly asked question. Well a nice question.
+Strictly speaking this is not a new interface.
+
+The CI interface is defined in the DVB API in ca.h as
+
+typedef struct ca_slot_info {
+ int num; /* slot number */
+
+ int type; /* CA interface this slot supports */
+#define CA_CI 1 /* CI high level interface */
+#define CA_CI_LINK 2 /* CI link layer level interface */
+#define CA_CI_PHYS 4 /* CI physical layer level interface */
+#define CA_DESCR 8 /* built-in descrambler */
+#define CA_SC 128 /* simple smart card interface */
+
+ unsigned int flags;
+#define CA_CI_MODULE_PRESENT 1 /* module (or card) inserted */
+#define CA_CI_MODULE_READY 2
+} ca_slot_info_t;
+
+
+
+This CI interface follows the CI high level interface, which is not
+implemented by most applications. Hence this area is revisited.
+
+This CI interface is quite different in the case that it tries to
+accommodate all other CI based devices, that fall into the other categories.
+
+This means that this CI interface handles the EN50221 style tags in the
+Application layer only and no session management is taken care of by the
+application. The driver/hardware will take care of all that.
+
+This interface is purely an EN50221 interface exchanging APDU's. This
+means that no session management, link layer or a transport layer do
+exist in this case in the application to driver communication. It is
+as simple as that. The driver/hardware has to take care of that.
+
+
+With this High Level CI interface, the interface can be defined with the
+regular ioctls.
+
+All these ioctls are also valid for the High level CI interface
+
+#define CA_RESET _IO('o', 128)
+#define CA_GET_CAP _IOR('o', 129, ca_caps_t)
+#define CA_GET_SLOT_INFO _IOR('o', 130, ca_slot_info_t)
+#define CA_GET_DESCR_INFO _IOR('o', 131, ca_descr_info_t)
+#define CA_GET_MSG _IOR('o', 132, ca_msg_t)
+#define CA_SEND_MSG _IOW('o', 133, ca_msg_t)
+#define CA_SET_DESCR _IOW('o', 134, ca_descr_t)
+#define CA_SET_PID _IOW('o', 135, ca_pid_t)
+
+
+On querying the device, the device yields information thus
+
+CA_GET_SLOT_INFO
+----------------------------
+Command = [info]
+APP: Number=[1]
+APP: Type=[1]
+APP: flags=[1]
+APP: CI High level interface
+APP: CA/CI Module Present
+
+CA_GET_CAP
+----------------------------
+Command = [caps]
+APP: Slots=[1]
+APP: Type=[1]
+APP: Descrambler keys=[16]
+APP: Type=[1]
+
+CA_SEND_MSG
+----------------------------
+Descriptors(Program Level)=[ 09 06 06 04 05 50 ff f1]
+Found CA descriptor @ program level
+
+(20) ES type=[2] ES pid=[201] ES length =[0 (0x0)]
+(25) ES type=[4] ES pid=[301] ES length =[0 (0x0)]
+ca_message length is 25 (0x19) bytes
+EN50221 CA MSG=[ 9f 80 32 19 03 01 2d d1 f0 08 01 09 06 06 04 05 50 ff f1 02 e0 c9 00 00 04 e1 2d 00 00]
+
+
+Not all ioctl's are implemented in the driver from the API, the other
+features of the hardware that cannot be implemented by the API are achieved
+using the CA_GET_MSG and CA_SEND_MSG ioctls. An EN50221 style wrapper is
+used to exchange the data to maintain compatibility with other hardware.
+
+
+/* a message to/from a CI-CAM */
+typedef struct ca_msg {
+ unsigned int index;
+ unsigned int type;
+ unsigned int length;
+ unsigned char msg[256];
+} ca_msg_t;
+
+
+The flow of data can be described thus,
+
+
+
+
+
+ App (User)
+ -----
+ parse
+ |
+ |
+ v
+ en50221 APDU (package)
+ --------------------------------------
+ | | | High Level CI driver
+ | | |
+ | v |
+ | en50221 APDU (unpackage) |
+ | | |
+ | | |
+ | v |
+ | sanity checks |
+ | | |
+ | | |
+ | v |
+ | do (H/W dep) |
+ --------------------------------------
+ | Hardware
+ |
+ v
+
+
+
+
+The High Level CI interface uses the EN50221 DVB standard, following a
+standard ensures futureproofness.
diff --git a/Documentation/dvb/contributors.txt b/Documentation/dvb/contributors.txt
new file mode 100644
index 000000000..731a00972
--- /dev/null
+++ b/Documentation/dvb/contributors.txt
@@ -0,0 +1,96 @@
+Thanks go to the following people for patches and contributions:
+
+Michael Hunold <m.hunold@gmx.de>
+ for the initial saa7146 driver and its recent overhaul
+
+Christian Theiss
+ for his work on the initial Linux DVB driver
+
+Marcus Metzler <mocm@metzlerbros.de>
+Ralph Metzler <rjkm@metzlerbros.de>
+ for their continuing work on the DVB driver
+
+Michael Holzt <kju@debian.org>
+ for his contributions to the dvb-net driver
+
+Diego Picciani <d.picciani@novacomp.it>
+ for CyberLogin for Linux which allows logging onto EON
+ (in case you are wondering where CyberLogin is, EON changed its login
+ procedure and CyberLogin is no longer used.)
+
+Martin Schaller <martin@smurf.franken.de>
+ for patching the cable card decoder driver
+
+Klaus Schmidinger <Klaus.Schmidinger@cadsoft.de>
+ for various fixes regarding tuning, OSD and CI stuff and his work on VDR
+
+Steve Brown <sbrown@cortland.com>
+ for his AFC kernel thread
+
+Christoph Martin <martin@uni-mainz.de>
+ for his LIRC infrared handler
+
+Andreas Oberritter <obi@linuxtv.org>
+Dennis Noermann <dennis.noermann@noernet.de>
+Felix Domke <tmbinc@elitedvb.net>
+Florian Schirmer <jolt@tuxbox.org>
+Ronny Strutz <3des@elitedvb.de>
+Wolfram Joost <dbox2@frokaschwei.de>
+...and all the other dbox2 people
+ for many bugfixes in the generic DVB Core, frontend drivers and
+ their work on the dbox2 port of the DVB driver
+
+Oliver Endriss <o.endriss@gmx.de>
+ for many bugfixes
+
+Andrew de Quincey <adq_dvb@lidskialf.net>
+ for the tda1004x frontend driver, and various bugfixes
+
+Peter Schildmann <peter.schildmann@web.de>
+ for the driver for the Technisat SkyStar2 PCI DVB card
+
+Vadim Catana <skystar@moldova.cc>
+Roberto Ragusa <r.ragusa@libero.it>
+Augusto Cardoso <augusto@carhil.net>
+ for all the work for the FlexCopII chipset by B2C2,Inc.
+
+Davor Emard <emard@softhome.net>
+ for his work on the budget drivers, the demux code,
+ the module unloading problems, ...
+
+Hans-Frieder Vogt <hfvogt@arcor.de>
+ for his work on calculating and checking the crc's for the
+ TechnoTrend/Hauppauge DEC driver firmware
+
+Michael Dreher <michael@5dot1.de>
+Andreas 'randy' Weinberger
+ for the support of the Fujitsu-Siemens Activy budget DVB-S
+
+Kenneth Aafløy <ke-aa@frisurf.no>
+ for adding support for Typhoon DVB-S budget card
+
+Ernst Peinlich <e.peinlich@inode.at>
+ for tuning/DiSEqC support for the DEC 3000-s
+
+Peter Beutner <p.beutner@gmx.net>
+ for the IR code for the ttusb-dec driver
+
+Wilson Michaels <wilsonmichaels@earthlink.net>
+ for the lgdt330x frontend driver, and various bugfixes
+
+Michael Krufky <mkrufky@linuxtv.org>
+ for maintaining v4l/dvb inter-tree dependencies
+
+Taylor Jacob <rtjacob@earthlink.net>
+ for the nxt2002 frontend driver
+
+Jean-Francois Thibert <jeanfrancois@sagetv.com>
+ for the nxt2004 frontend driver
+
+Kirk Lapray <kirk.lapray@gmail.com>
+ for the or51211 and or51132 frontend drivers, and
+ for merging the nxt2002 and nxt2004 modules into a
+ single nxt200x frontend driver.
+
+(If you think you should be in this list, but you are not, drop a
+ line to the DVB mailing list)
diff --git a/Documentation/dvb/faq.txt b/Documentation/dvb/faq.txt
new file mode 100644
index 000000000..97b1373f2
--- /dev/null
+++ b/Documentation/dvb/faq.txt
@@ -0,0 +1,159 @@
+Some very frequently asked questions about linuxtv-dvb
+
+1. The signal seems to die a few seconds after tuning.
+
+ It's not a bug, it's a feature. Because the frontends have
+ significant power requirements (and hence get very hot), they
+ are powered down if they are unused (i.e. if the frontend device
+ is closed). The dvb-core.o module parameter "dvb_shutdown_timeout"
+ allow you to change the timeout (default 5 seconds). Setting the
+ timeout to 0 disables the timeout feature.
+
+2. How can I watch TV?
+
+ The driver distribution includes some simple utilities which
+ are mainly intended for testing and to demonstrate how the
+ DVB API works.
+
+ Depending on whether you have a DVB-S, DVB-C or DVB-T card, use
+ apps/szap/szap, czap or tzap. You must supply a channel list
+ in ~/.[sct]zap/channels.conf. If you are lucky you can just copy
+ one of the supplied channel lists, or you can create a new one
+ by running apps/scan/scan. If you run scan on an unknown network
+ you might have to supply some start data in apps/scan/initial.h.
+
+ If you have a card with a built-in hardware MPEG-decoder the
+ drivers create a video4linux device (/dev/v4l/video0) which
+ you can use to watch TV with any v4l application. xawtv is known
+ to work. Note that you cannot change channels with xawtv, you
+ have to zap using [sct]zap. If you want a nice application for
+ TV watching and record/playback, have a look at VDR.
+
+ If your card does not have a hardware MPEG decoder you need
+ a software MPEG decoder. Mplayer or xine are known to work.
+ Newsflash: MythTV also has DVB support now.
+ Note: Only very recent versions of Mplayer and xine can decode.
+ MPEG2 transport streams (TS) directly. Then, run
+ '[sct]zap channelname -r' in one xterm, and keep it running,
+ and start 'mplayer - < /dev/dvb/adapter0/dvr0' or
+ 'xine stdin://mpeg2 < /dev/dvb/adapter0/dvr0' in a second xterm.
+ That's all far from perfect, but it seems no one has written
+ a nice DVB application which includes a builtin software MPEG
+ decoder yet.
+
+ Newsflash: Newest xine directly supports DVB. Just copy your
+ channels.conf to ~/.xine and start 'xine dvb://', or select
+ the DVB button in the xine GUI. Channel switching works using the
+ numpad pgup/pgdown (NP9 / NP3) keys to scroll through the channel osd
+ menu and pressing numpad-enter to switch to the selected channel.
+
+ Note: Older versions of xine and mplayer understand MPEG program
+ streams (PS) only, and can be used in conjunction with the
+ ts2ps tool from the Metzler Brother's dvb-mpegtools package.
+
+3. Which other DVB applications exist?
+
+ http://www.cadsoft.de/people/kls/vdr/
+ Klaus Schmidinger's Video Disk Recorder
+
+ http://www.metzlerbros.org/dvb/
+ Metzler Bros. DVB development; alternate drivers and
+ DVB utilities, include dvb-mpegtools and tuxzap.
+
+ http://sourceforge.net/projects/dvbtools/
+ Dave Chapman's dvbtools package, including
+ dvbstream and dvbtune
+
+ http://www.linuxdvb.tv/
+ Henning Holtschneider's site with many interesting
+ links and docs
+
+ http://www.dbox2.info/
+ LinuxDVB on the dBox2
+
+ http://www.tuxbox.org/
+ http://cvs.tuxbox.org/
+ the TuxBox CVS many interesting DVB applications and the dBox2
+ DVB source
+
+ http://www.linuxtv.org/downloads/
+ DVB Swiss Army Knife library and utilities
+
+ http://www.nenie.org/misc/mpsys/
+ MPSYS: a MPEG2 system library and tools
+
+ http://mplayerhq.hu/
+ mplayer
+
+ http://xine.sourceforge.net/
+ http://xinehq.de/
+ xine
+
+ http://www.mythtv.org/
+ MythTV - analog TV PVR, but now with DVB support, too
+ (with software MPEG decode)
+
+ http://dvbsnoop.sourceforge.net/
+ DVB sniffer program to monitor, analyze, debug, dump
+ or view dvb/mpeg/dsm-cc/mhp stream information (TS,
+ PES, SECTION)
+
+4. Can't get a signal tuned correctly
+
+ If you are using a Technotrend/Hauppauge DVB-C card *without* analog
+ module, you might have to use module parameter adac=-1 (dvb-ttpci.o).
+
+5. The dvb_net device doesn't give me any packets at all
+
+ Run tcpdump on the dvb0_0 interface. This sets the interface
+ into promiscuous mode so it accepts any packets from the PID
+ you have configured with the dvbnet utility. Check if there
+ are any packets with the IP addr and MAC addr you have
+ configured with ifconfig.
+
+ If tcpdump doesn't give you any output, check the statistics
+ which ifconfig outputs. (Note: If the MAC address is wrong,
+ dvb_net won't get any input; thus you have to run tcpdump
+ before checking the statistics.) If there are no packets at
+ all then maybe the PID is wrong. If there are error packets,
+ then either the PID is wrong or the stream does not conform to
+ the MPE standard (EN 301 192, http://www.etsi.org/). You can
+ use e.g. dvbsnoop for debugging.
+
+6. The dvb_net device doesn't give me any multicast packets
+
+ Check your routes if they include the multicast address range.
+ Additionally make sure that "source validation by reversed path
+ lookup" is disabled:
+ $ "echo 0 > /proc/sys/net/ipv4/conf/dvb0/rp_filter"
+
+7. What the hell are all those modules that need to be loaded?
+
+ For a dvb-ttpci av7110 based full-featured card the following
+ modules are loaded:
+
+ - videodev: Video4Linux core module. This is the base module that
+ gives you access to the "analog" tv picture of the av7110 mpeg2
+ decoder.
+
+ - v4l2-common: common functions for Video4Linux-2 drivers
+
+ - v4l1-compat: backward compatibility layer for Video4Linux-1 legacy
+ applications
+
+ - dvb-core: DVB core module. This provides you with the
+ /dev/dvb/adapter entries
+
+ - saa7146: SAA7146 core driver. This is need to access any SAA7146
+ based card in your system.
+
+ - saa7146_vv: SAA7146 video and vbi functions. These are only needed
+ for full-featured cards.
+
+ - videobuf-dma-sg: capture helper module for the saa7146_vv driver. This
+ one is responsible to handle capture buffers.
+
+ - dvb-ttpci: The main driver for AV7110 based, full-featured
+ DVB-S/C/T cards
+
+eof
diff --git a/Documentation/dvb/opera-firmware.txt b/Documentation/dvb/opera-firmware.txt
new file mode 100644
index 000000000..506702edb
--- /dev/null
+++ b/Documentation/dvb/opera-firmware.txt
@@ -0,0 +1,8 @@
+/*(DEBLOBBED)*/
+
+After that the driver can load the firmware
+(if you have enabled firmware loading
+in kernel config and have hotplug running).
+
+
+Marco Gittler <g.marco@freenet.de>
diff --git a/Documentation/dvb/readme.txt b/Documentation/dvb/readme.txt
new file mode 100644
index 000000000..0b0380c91
--- /dev/null
+++ b/Documentation/dvb/readme.txt
@@ -0,0 +1,62 @@
+Linux Digital Video Broadcast (DVB) subsystem
+=============================================
+
+The main development site and CVS repository for these
+drivers is http://linuxtv.org/.
+
+The developer mailing list linux-dvb is also hosted there,
+see http://linuxtv.org/lists.php. Please check
+the archive http://linuxtv.org/pipermail/linux-dvb/
+and the Wiki http://linuxtv.org/wiki/
+before asking newbie questions on the list.
+
+API documentation, utilities and test/example programs
+are available as part of the old driver package for Linux 2.4
+(linuxtv-dvb-1.0.x.tar.gz), or from CVS (module DVB).
+We plan to split this into separate packages, but it's not
+been done yet.
+
+http://linuxtv.org/downloads/
+
+What's inside this directory:
+
+"avermedia.txt"
+contains detailed information about the
+Avermedia DVB-T cards. See also "bt8xx.txt".
+
+"bt8xx.txt"
+contains detailed information about the
+various bt8xx based "budget" DVB cards.
+
+"cards.txt"
+contains a list of supported hardware.
+
+"ci.txt"
+contains detailed information about the
+CI module as part from TwinHan cards and Clones.
+
+"contributors.txt"
+is the who-is-who of DVB development.
+
+"faq.txt"
+contains frequently asked questions and their answers.
+
+"get_dvb_firmware"
+script to download and extract firmware for those devices
+that require it.
+
+"ttusb-dec.txt"
+contains detailed information about the
+TT DEC2000/DEC3000 USB DVB hardware.
+
+"udev.txt"
+how to get DVB and udev up and running.
+
+"README.dvb-usb"
+contains detailed information about the DVB USB cards.
+
+"README.flexcop"
+contains detailed information about the
+Technisat- and Flexcop B2C2 drivers.
+
+Good luck and have fun!
diff --git a/Documentation/dvb/technisat.txt b/Documentation/dvb/technisat.txt
new file mode 100644
index 000000000..f0cc4f2d8
--- /dev/null
+++ b/Documentation/dvb/technisat.txt
@@ -0,0 +1,78 @@
+How to set up the Technisat/B2C2 Flexcop devices
+================================================
+
+1) Find out what device you have
+================================
+
+Important Notice: The driver does NOT support Technisat USB 2 devices!
+
+First start your linux box with a shipped kernel:
+lspci -vvv for a PCI device (lsusb -vvv for an USB device) will show you for example:
+02:0b.0 Network controller: Techsan Electronics Co Ltd B2C2 FlexCopII DVB chip /
+ Technisat SkyStar2 DVB card (rev 02)
+
+dmesg | grep frontend may show you for example:
+DVB: registering frontend 0 (Conexant CX24123/CX24109)...
+
+2) Kernel compilation:
+======================
+
+If the Flexcop / Technisat is the only DVB / TV / Radio device in your box
+ get rid of unnecessary modules and check this one:
+"Multimedia support" => "Customise analog and hybrid tuner modules to build"
+In this directory uncheck every driver which is activated there
+ (except "Simple tuner support" for ATSC 3rd generation only -> see case 9 please).
+
+Then please activate:
+2a) Main module part:
+"Multimedia support" => "DVB/ATSC adapters"
+ => "Technisat/B2C2 FlexcopII(b) and FlexCopIII adapters"
+
+a.) => "Technisat/B2C2 Air/Sky/Cable2PC PCI" (PCI card) or
+b.) => "Technisat/B2C2 Air/Sky/Cable2PC USB" (USB 1.1 adapter)
+ and for troubleshooting purposes:
+c.) => "Enable debug for the B2C2 FlexCop drivers"
+
+2b) Frontend / Tuner / Demodulator module part:
+"Multimedia support" => "DVB/ATSC adapters"
+ => "Customise the frontend modules to build" "Customise DVB frontends" =>
+
+1.) SkyStar DVB-S Revision 2.3:
+a.) => "Zarlink VP310/MT312/ZL10313 based"
+b.) => "Generic I2C PLL based tuners"
+
+2.) SkyStar DVB-S Revision 2.6:
+a.) => "ST STV0299 based"
+b.) => "Generic I2C PLL based tuners"
+
+3.) SkyStar DVB-S Revision 2.7:
+a.) => "Samsung S5H1420 based"
+b.) => "Integrant ITD1000 Zero IF tuner for DVB-S/DSS"
+c.) => "ISL6421 SEC controller"
+
+4.) SkyStar DVB-S Revision 2.8:
+a.) => "Conexant CX24123 based"
+b.) => "Conexant CX24113/CX24128 tuner for DVB-S/DSS"
+c.) => "ISL6421 SEC controller"
+
+5.) AirStar DVB-T card:
+a.) => "Zarlink MT352 based"
+b.) => "Generic I2C PLL based tuners"
+
+6.) CableStar DVB-C card:
+a.) => "ST STV0297 based"
+b.) => "Generic I2C PLL based tuners"
+
+7.) AirStar ATSC card 1st generation:
+a.) => "Broadcom BCM3510"
+
+8.) AirStar ATSC card 2nd generation:
+a.) => "NxtWave Communications NXT2002/NXT2004 based"
+b.) => "Generic I2C PLL based tuners"
+
+9.) AirStar ATSC card 3rd generation:
+a.) => "LG Electronics LGDT3302/LGDT3303 based"
+b.) "Multimedia support" => "Customise analog and hybrid tuner modules to build"
+ => "Simple tuner support"
+
+Author: Uwe Bugla <uwe.bugla@gmx.de> August 2009
diff --git a/Documentation/dvb/ttusb-dec.txt b/Documentation/dvb/ttusb-dec.txt
new file mode 100644
index 000000000..cbe42bebd
--- /dev/null
+++ b/Documentation/dvb/ttusb-dec.txt
@@ -0,0 +1,40 @@
+TechnoTrend/Hauppauge DEC USB Driver
+====================================
+
+Driver Status
+-------------
+
+Supported:
+ DEC2000-t
+ DEC2450-t
+ DEC3000-s
+ Linux Kernels 2.4 and 2.6
+ Video Streaming
+ Audio Streaming
+ Section Filters
+ Channel Zapping
+ Hotplug firmware loader under 2.6 kernels
+
+To Do:
+ Tuner status information
+ DVB network interface
+ Streaming video PC->DEC
+ Conax support for 2450-t
+
+/*(DEBLOBBED)*/
+
+
+Compilation Notes for 2.4 kernels
+---------------------------------
+For 2.4 kernels the firmware for the DECs is compiled into the driver itself.
+
+Copy the three files downloaded above into the build-2.4 directory.
+
+
+Hotplug Firmware Loading for 2.6 kernels
+----------------------------------------
+For 2.6 kernels the firmware is loaded at the point that the driver module is
+loaded. See linux/Documentation/dvb/firmware.txt for more information.
+
+Copy the three files downloaded above into the /usr/lib/hotplug/firmware or
+/lib/firmware directory (depending on configuration of firmware hotplug).
diff --git a/Documentation/dvb/udev.txt b/Documentation/dvb/udev.txt
new file mode 100644
index 000000000..412305b7c
--- /dev/null
+++ b/Documentation/dvb/udev.txt
@@ -0,0 +1,46 @@
+The DVB subsystem currently registers to the sysfs subsystem using the
+"class_simple" interface.
+
+This means that only the basic information like module loading parameters
+are presented through sysfs. Other things that might be interesting are
+currently *not* available.
+
+Nevertheless it's now possible to add proper udev rules so that the
+DVB device nodes are created automatically.
+
+We assume that you have udev already up and running and that have been
+creating the DVB device nodes manually up to now due to the missing sysfs
+support.
+
+0. Don't forget to disable your current method of creating the
+device nodes manually.
+
+1. Unfortunately, you'll need a helper script to transform the kernel
+sysfs device name into the well known dvb adapter / device naming scheme.
+The script should be called "dvb.sh" and should be placed into a script
+dir where udev can execute it, most likely /etc/udev/scripts/
+
+So, create a new file /etc/udev/scripts/dvb.sh and add the following:
+------------------------------schnipp------------------------------------------------
+#!/bin/sh
+/bin/echo $1 | /bin/sed -e 's,dvb\([0-9]\)\.\([^0-9]*\)\([0-9]\),dvb/adapter\1/\2\3,'
+------------------------------schnipp------------------------------------------------
+
+Don't forget to make the script executable with "chmod".
+
+1. You need to create a proper udev rule that will create the device nodes
+like you know them. All real distributions out there scan the /etc/udev/rules.d
+directory for rule files. The main udev configuration file /etc/udev/udev.conf
+will tell you the directory where the rules are, most likely it's /etc/udev/rules.d/
+
+Create a new rule file in that directory called "dvb.rule" and add the following line:
+------------------------------schnipp------------------------------------------------
+KERNEL="dvb*", PROGRAM="/etc/udev/scripts/dvb.sh %k", NAME="%c"
+------------------------------schnipp------------------------------------------------
+
+If you want more control over the device nodes (for example a special group membership)
+have a look at "man udev".
+
+For every device that registers to the sysfs subsystem with a "dvb" prefix,
+the helper script /etc/udev/scripts/dvb.sh is invoked, which will then
+create the proper device node in your /dev/ directory.
diff --git a/Documentation/dynamic-debug-howto.txt b/Documentation/dynamic-debug-howto.txt
new file mode 100644
index 000000000..9417871b8
--- /dev/null
+++ b/Documentation/dynamic-debug-howto.txt
@@ -0,0 +1,340 @@
+
+Introduction
+============
+
+This document describes how to use the dynamic debug (dyndbg) feature.
+
+Dynamic debug is designed to allow you to dynamically enable/disable
+kernel code to obtain additional kernel information. Currently, if
+CONFIG_DYNAMIC_DEBUG is set, then all pr_debug()/dev_dbg() and
+print_hex_dump_debug()/print_hex_dump_bytes() calls can be dynamically
+enabled per-callsite.
+
+If CONFIG_DYNAMIC_DEBUG is not set, print_hex_dump_debug() is just
+shortcut for print_hex_dump(KERN_DEBUG).
+
+For print_hex_dump_debug()/print_hex_dump_bytes(), format string is
+its 'prefix_str' argument, if it is constant string; or "hexdump"
+in case 'prefix_str' is build dynamically.
+
+Dynamic debug has even more useful features:
+
+ * Simple query language allows turning on and off debugging
+ statements by matching any combination of 0 or 1 of:
+
+ - source filename
+ - function name
+ - line number (including ranges of line numbers)
+ - module name
+ - format string
+
+ * Provides a debugfs control file: <debugfs>/dynamic_debug/control
+ which can be read to display the complete list of known debug
+ statements, to help guide you
+
+Controlling dynamic debug Behaviour
+===================================
+
+The behaviour of pr_debug()/dev_dbg()s are controlled via writing to a
+control file in the 'debugfs' filesystem. Thus, you must first mount
+the debugfs filesystem, in order to make use of this feature.
+Subsequently, we refer to the control file as:
+<debugfs>/dynamic_debug/control. For example, if you want to enable
+printing from source file 'svcsock.c', line 1603 you simply do:
+
+nullarbor:~ # echo 'file svcsock.c line 1603 +p' >
+ <debugfs>/dynamic_debug/control
+
+If you make a mistake with the syntax, the write will fail thus:
+
+nullarbor:~ # echo 'file svcsock.c wtf 1 +p' >
+ <debugfs>/dynamic_debug/control
+-bash: echo: write error: Invalid argument
+
+Viewing Dynamic Debug Behaviour
+===========================
+
+You can view the currently configured behaviour of all the debug
+statements via:
+
+nullarbor:~ # cat <debugfs>/dynamic_debug/control
+# filename:lineno [module]function flags format
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:323 [svcxprt_rdma]svc_rdma_cleanup =_ "SVCRDMA Module Removed, deregister RPC RDMA transport\012"
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:341 [svcxprt_rdma]svc_rdma_init =_ "\011max_inline : %d\012"
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:340 [svcxprt_rdma]svc_rdma_init =_ "\011sq_depth : %d\012"
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svc_rdma.c:338 [svcxprt_rdma]svc_rdma_init =_ "\011max_requests : %d\012"
+...
+
+
+You can also apply standard Unix text manipulation filters to this
+data, e.g.
+
+nullarbor:~ # grep -i rdma <debugfs>/dynamic_debug/control | wc -l
+62
+
+nullarbor:~ # grep -i tcp <debugfs>/dynamic_debug/control | wc -l
+42
+
+The third column shows the currently enabled flags for each debug
+statement callsite (see below for definitions of the flags). The
+default value, with no flags enabled, is "=_". So you can view all
+the debug statement callsites with any non-default flags:
+
+nullarbor:~ # awk '$3 != "=_"' <debugfs>/dynamic_debug/control
+# filename:lineno [module]function flags format
+/usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c:1603 [sunrpc]svc_send p "svc_process: st_sendto returned %d\012"
+
+
+Command Language Reference
+==========================
+
+At the lexical level, a command comprises a sequence of words separated
+by spaces or tabs. So these are all equivalent:
+
+nullarbor:~ # echo -c 'file svcsock.c line 1603 +p' >
+ <debugfs>/dynamic_debug/control
+nullarbor:~ # echo -c ' file svcsock.c line 1603 +p ' >
+ <debugfs>/dynamic_debug/control
+nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
+ <debugfs>/dynamic_debug/control
+
+Command submissions are bounded by a write() system call.
+Multiple commands can be written together, separated by ';' or '\n'.
+
+ ~# echo "func pnpacpi_get_resources +p; func pnp_assign_mem +p" \
+ > <debugfs>/dynamic_debug/control
+
+If your query set is big, you can batch them too:
+
+ ~# cat query-batch-file > <debugfs>/dynamic_debug/control
+
+A another way is to use wildcard. The match rule support '*' (matches
+zero or more characters) and '?' (matches exactly one character).For
+example, you can match all usb drivers:
+
+ ~# echo "file drivers/usb/* +p" > <debugfs>/dynamic_debug/control
+
+At the syntactical level, a command comprises a sequence of match
+specifications, followed by a flags change specification.
+
+command ::= match-spec* flags-spec
+
+The match-spec's are used to choose a subset of the known pr_debug()
+callsites to which to apply the flags-spec. Think of them as a query
+with implicit ANDs between each pair. Note that an empty list of
+match-specs will select all debug statement callsites.
+
+A match specification comprises a keyword, which controls the
+attribute of the callsite to be compared, and a value to compare
+against. Possible keywords are:
+
+match-spec ::= 'func' string |
+ 'file' string |
+ 'module' string |
+ 'format' string |
+ 'line' line-range
+
+line-range ::= lineno |
+ '-'lineno |
+ lineno'-' |
+ lineno'-'lineno
+// Note: line-range cannot contain space, e.g.
+// "1-30" is valid range but "1 - 30" is not.
+
+lineno ::= unsigned-int
+
+The meanings of each keyword are:
+
+func
+ The given string is compared against the function name
+ of each callsite. Example:
+
+ func svc_tcp_accept
+
+file
+ The given string is compared against either the full pathname, the
+ src-root relative pathname, or the basename of the source file of
+ each callsite. Examples:
+
+ file svcsock.c
+ file kernel/freezer.c
+ file /usr/src/packages/BUILD/sgi-enhancednfs-1.4/default/net/sunrpc/svcsock.c
+
+module
+ The given string is compared against the module name
+ of each callsite. The module name is the string as
+ seen in "lsmod", i.e. without the directory or the .ko
+ suffix and with '-' changed to '_'. Examples:
+
+ module sunrpc
+ module nfsd
+
+format
+ The given string is searched for in the dynamic debug format
+ string. Note that the string does not need to match the
+ entire format, only some part. Whitespace and other
+ special characters can be escaped using C octal character
+ escape \ooo notation, e.g. the space character is \040.
+ Alternatively, the string can be enclosed in double quote
+ characters (") or single quote characters (').
+ Examples:
+
+ format svcrdma: // many of the NFS/RDMA server pr_debugs
+ format readahead // some pr_debugs in the readahead cache
+ format nfsd:\040SETATTR // one way to match a format with whitespace
+ format "nfsd: SETATTR" // a neater way to match a format with whitespace
+ format 'nfsd: SETATTR' // yet another way to match a format with whitespace
+
+line
+ The given line number or range of line numbers is compared
+ against the line number of each pr_debug() callsite. A single
+ line number matches the callsite line number exactly. A
+ range of line numbers matches any callsite between the first
+ and last line number inclusive. An empty first number means
+ the first line in the file, an empty line number means the
+ last number in the file. Examples:
+
+ line 1603 // exactly line 1603
+ line 1600-1605 // the six lines from line 1600 to line 1605
+ line -1605 // the 1605 lines from line 1 to line 1605
+ line 1600- // all lines from line 1600 to the end of the file
+
+The flags specification comprises a change operation followed
+by one or more flag characters. The change operation is one
+of the characters:
+
+ - remove the given flags
+ + add the given flags
+ = set the flags to the given flags
+
+The flags are:
+
+ p enables the pr_debug() callsite.
+ f Include the function name in the printed message
+ l Include line number in the printed message
+ m Include module name in the printed message
+ t Include thread ID in messages not generated from interrupt context
+ _ No flags are set. (Or'd with others on input)
+
+For print_hex_dump_debug() and print_hex_dump_bytes(), only 'p' flag
+have meaning, other flags ignored.
+
+For display, the flags are preceded by '='
+(mnemonic: what the flags are currently equal to).
+
+Note the regexp ^[-+=][flmpt_]+$ matches a flags specification.
+To clear all flags at once, use "=_" or "-flmpt".
+
+
+Debug messages during Boot Process
+==================================
+
+To activate debug messages for core code and built-in modules during
+the boot process, even before userspace and debugfs exists, use
+dyndbg="QUERY", module.dyndbg="QUERY", or ddebug_query="QUERY"
+(ddebug_query is obsoleted by dyndbg, and deprecated). QUERY follows
+the syntax described above, but must not exceed 1023 characters. Your
+bootloader may impose lower limits.
+
+These dyndbg params are processed just after the ddebug tables are
+processed, as part of the arch_initcall. Thus you can enable debug
+messages in all code run after this arch_initcall via this boot
+parameter.
+
+On an x86 system for example ACPI enablement is a subsys_initcall and
+ dyndbg="file ec.c +p"
+will show early Embedded Controller transactions during ACPI setup if
+your machine (typically a laptop) has an Embedded Controller.
+PCI (or other devices) initialization also is a hot candidate for using
+this boot parameter for debugging purposes.
+
+If foo module is not built-in, foo.dyndbg will still be processed at
+boot time, without effect, but will be reprocessed when module is
+loaded later. dyndbg_query= and bare dyndbg= are only processed at
+boot.
+
+
+Debug Messages at Module Initialization Time
+============================================
+
+When "modprobe foo" is called, modprobe scans /proc/cmdline for
+foo.params, strips "foo.", and passes them to the kernel along with
+params given in modprobe args or /etc/modprob.d/*.conf files,
+in the following order:
+
+1. # parameters given via /etc/modprobe.d/*.conf
+ options foo dyndbg=+pt
+ options foo dyndbg # defaults to +p
+
+2. # foo.dyndbg as given in boot args, "foo." is stripped and passed
+ foo.dyndbg=" func bar +p; func buz +mp"
+
+3. # args to modprobe
+ modprobe foo dyndbg==pmf # override previous settings
+
+These dyndbg queries are applied in order, with last having final say.
+This allows boot args to override or modify those from /etc/modprobe.d
+(sensible, since 1 is system wide, 2 is kernel or boot specific), and
+modprobe args to override both.
+
+In the foo.dyndbg="QUERY" form, the query must exclude "module foo".
+"foo" is extracted from the param-name, and applied to each query in
+"QUERY", and only 1 match-spec of each type is allowed.
+
+The dyndbg option is a "fake" module parameter, which means:
+
+- modules do not need to define it explicitly
+- every module gets it tacitly, whether they use pr_debug or not
+- it doesn't appear in /sys/module/$module/parameters/
+ To see it, grep the control file, or inspect /proc/cmdline.
+
+For CONFIG_DYNAMIC_DEBUG kernels, any settings given at boot-time (or
+enabled by -DDEBUG flag during compilation) can be disabled later via
+the sysfs interface if the debug messages are no longer needed:
+
+ echo "module module_name -p" > <debugfs>/dynamic_debug/control
+
+Examples
+========
+
+// enable the message at line 1603 of file svcsock.c
+nullarbor:~ # echo -n 'file svcsock.c line 1603 +p' >
+ <debugfs>/dynamic_debug/control
+
+// enable all the messages in file svcsock.c
+nullarbor:~ # echo -n 'file svcsock.c +p' >
+ <debugfs>/dynamic_debug/control
+
+// enable all the messages in the NFS server module
+nullarbor:~ # echo -n 'module nfsd +p' >
+ <debugfs>/dynamic_debug/control
+
+// enable all 12 messages in the function svc_process()
+nullarbor:~ # echo -n 'func svc_process +p' >
+ <debugfs>/dynamic_debug/control
+
+// disable all 12 messages in the function svc_process()
+nullarbor:~ # echo -n 'func svc_process -p' >
+ <debugfs>/dynamic_debug/control
+
+// enable messages for NFS calls READ, READLINK, READDIR and READDIR+.
+nullarbor:~ # echo -n 'format "nfsd: READ" +p' >
+ <debugfs>/dynamic_debug/control
+
+// enable messages in files of which the paths include string "usb"
+nullarbor:~ # echo -n '*usb* +p' > <debugfs>/dynamic_debug/control
+
+// enable all messages
+nullarbor:~ # echo -n '+p' > <debugfs>/dynamic_debug/control
+
+// add module, function to all enabled messages
+nullarbor:~ # echo -n '+mf' > <debugfs>/dynamic_debug/control
+
+// boot-args example, with newlines and comments for readability
+Kernel command line: ...
+ // see whats going on in dyndbg=value processing
+ dynamic_debug.verbose=1
+ // enable pr_debugs in 2 builtins, #cmt is stripped
+ dyndbg="module params +p #cmt ; module sys +p"
+ // enable pr_debugs in 2 functions in a module loaded later
+ pc87360.dyndbg="func pc87360_init_device +p; func pc87360_find +p"
diff --git a/Documentation/early-userspace/README b/Documentation/early-userspace/README
new file mode 100644
index 000000000..93e63a9af
--- /dev/null
+++ b/Documentation/early-userspace/README
@@ -0,0 +1,151 @@
+Early userspace support
+=======================
+
+Last update: 2004-12-20 tlh
+
+
+"Early userspace" is a set of libraries and programs that provide
+various pieces of functionality that are important enough to be
+available while a Linux kernel is coming up, but that don't need to be
+run inside the kernel itself.
+
+It consists of several major infrastructure components:
+
+- gen_init_cpio, a program that builds a cpio-format archive
+ containing a root filesystem image. This archive is compressed, and
+ the compressed image is linked into the kernel image.
+- initramfs, a chunk of code that unpacks the compressed cpio image
+ midway through the kernel boot process.
+- klibc, a userspace C library, currently packaged separately, that is
+ optimized for correctness and small size.
+
+The cpio file format used by initramfs is the "newc" (aka "cpio -H newc")
+format, and is documented in the file "buffer-format.txt". There are
+two ways to add an early userspace image: specify an existing cpio
+archive to be used as the image or have the kernel build process build
+the image from specifications.
+
+CPIO ARCHIVE method
+
+You can create a cpio archive that contains the early userspace image.
+Your cpio archive should be specified in CONFIG_INITRAMFS_SOURCE and it
+will be used directly. Only a single cpio file may be specified in
+CONFIG_INITRAMFS_SOURCE and directory and file names are not allowed in
+combination with a cpio archive.
+
+IMAGE BUILDING method
+
+The kernel build process can also build an early userspace image from
+source parts rather than supplying a cpio archive. This method provides
+a way to create images with root-owned files even though the image was
+built by an unprivileged user.
+
+The image is specified as one or more sources in
+CONFIG_INITRAMFS_SOURCE. Sources can be either directories or files -
+cpio archives are *not* allowed when building from sources.
+
+A source directory will have it and all of its contents packaged. The
+specified directory name will be mapped to '/'. When packaging a
+directory, limited user and group ID translation can be performed.
+INITRAMFS_ROOT_UID can be set to a user ID that needs to be mapped to
+user root (0). INITRAMFS_ROOT_GID can be set to a group ID that needs
+to be mapped to group root (0).
+
+A source file must be directives in the format required by the
+usr/gen_init_cpio utility (run 'usr/gen_init_cpio --help' to get the
+file format). The directives in the file will be passed directly to
+usr/gen_init_cpio.
+
+When a combination of directories and files are specified then the
+initramfs image will be an aggregate of all of them. In this way a user
+can create a 'root-image' directory and install all files into it.
+Because device-special files cannot be created by a unprivileged user,
+special files can be listed in a 'root-files' file. Both 'root-image'
+and 'root-files' can be listed in CONFIG_INITRAMFS_SOURCE and a complete
+early userspace image can be built by an unprivileged user.
+
+As a technical note, when directories and files are specified, the
+entire CONFIG_INITRAMFS_SOURCE is passed to
+scripts/gen_initramfs_list.sh. This means that CONFIG_INITRAMFS_SOURCE
+can really be interpreted as any legal argument to
+gen_initramfs_list.sh. If a directory is specified as an argument then
+the contents are scanned, uid/gid translation is performed, and
+usr/gen_init_cpio file directives are output. If a directory is
+specified as an argument to scripts/gen_initramfs_list.sh then the
+contents of the file are simply copied to the output. All of the output
+directives from directory scanning and file contents copying are
+processed by usr/gen_init_cpio.
+
+See also 'scripts/gen_initramfs_list.sh -h'.
+
+Where's this all leading?
+=========================
+
+The klibc distribution contains some of the necessary software to make
+early userspace useful. The klibc distribution is currently
+maintained separately from the kernel.
+
+You can obtain somewhat infrequent snapshots of klibc from
+ftp://ftp.kernel.org/pub/linux/libs/klibc/
+
+For active users, you are better off using the klibc git
+repository, at http://git.kernel.org/?p=libs/klibc/klibc.git
+
+The standalone klibc distribution currently provides three components,
+in addition to the klibc library:
+
+- ipconfig, a program that configures network interfaces. It can
+ configure them statically, or use DHCP to obtain information
+ dynamically (aka "IP autoconfiguration").
+- nfsmount, a program that can mount an NFS filesystem.
+- kinit, the "glue" that uses ipconfig and nfsmount to replace the old
+ support for IP autoconfig, mount a filesystem over NFS, and continue
+ system boot using that filesystem as root.
+
+kinit is built as a single statically linked binary to save space.
+
+Eventually, several more chunks of kernel functionality will hopefully
+move to early userspace:
+
+- Almost all of init/do_mounts* (the beginning of this is already in
+ place)
+- ACPI table parsing
+- Insert unwieldy subsystem that doesn't really need to be in kernel
+ space here
+
+If kinit doesn't meet your current needs and you've got bytes to burn,
+the klibc distribution includes a small Bourne-compatible shell (ash)
+and a number of other utilities, so you can replace kinit and build
+custom initramfs images that meet your needs exactly.
+
+For questions and help, you can sign up for the early userspace
+mailing list at http://www.zytor.com/mailman/listinfo/klibc
+
+How does it work?
+=================
+
+The kernel has currently 3 ways to mount the root filesystem:
+
+a) all required device and filesystem drivers compiled into the kernel, no
+ initrd. init/main.c:init() will call prepare_namespace() to mount the
+ final root filesystem, based on the root= option and optional init= to run
+ some other init binary than listed at the end of init/main.c:init().
+
+b) some device and filesystem drivers built as modules and stored in an
+ initrd. The initrd must contain a binary '/linuxrc' which is supposed to
+ load these driver modules. It is also possible to mount the final root
+ filesystem via linuxrc and use the pivot_root syscall. The initrd is
+ mounted and executed via prepare_namespace().
+
+c) using initramfs. The call to prepare_namespace() must be skipped.
+ This means that a binary must do all the work. Said binary can be stored
+ into initramfs either via modifying usr/gen_init_cpio.c or via the new
+ initrd format, an cpio archive. It must be called "/init". This binary
+ is responsible to do all the things prepare_namespace() would do.
+
+ To maintain backwards compatibility, the /init binary will only run if it
+ comes via an initramfs cpio archive. If this is not the case,
+ init/main.c:init() will run prepare_namespace() to mount the final root
+ and exec one of the predefined init binaries.
+
+Bryan O'Sullivan <bos@serpentine.com>
diff --git a/Documentation/early-userspace/buffer-format.txt b/Documentation/early-userspace/buffer-format.txt
new file mode 100644
index 000000000..e1fd7f9da
--- /dev/null
+++ b/Documentation/early-userspace/buffer-format.txt
@@ -0,0 +1,112 @@
+ initramfs buffer format
+ -----------------------
+
+ Al Viro, H. Peter Anvin
+ Last revision: 2002-01-13
+
+Starting with kernel 2.5.x, the old "initial ramdisk" protocol is
+getting {replaced/complemented} with the new "initial ramfs"
+(initramfs) protocol. The initramfs contents is passed using the same
+memory buffer protocol used by the initrd protocol, but the contents
+is different. The initramfs buffer contains an archive which is
+expanded into a ramfs filesystem; this document details the format of
+the initramfs buffer format.
+
+The initramfs buffer format is based around the "newc" or "crc" CPIO
+formats, and can be created with the cpio(1) utility. The cpio
+archive can be compressed using gzip(1). One valid version of an
+initramfs buffer is thus a single .cpio.gz file.
+
+The full format of the initramfs buffer is defined by the following
+grammar, where:
+ * is used to indicate "0 or more occurrences of"
+ (|) indicates alternatives
+ + indicates concatenation
+ GZIP() indicates the gzip(1) of the operand
+ ALGN(n) means padding with null bytes to an n-byte boundary
+
+ initramfs := ("\0" | cpio_archive | cpio_gzip_archive)*
+
+ cpio_gzip_archive := GZIP(cpio_archive)
+
+ cpio_archive := cpio_file* + (<nothing> | cpio_trailer)
+
+ cpio_file := ALGN(4) + cpio_header + filename + "\0" + ALGN(4) + data
+
+ cpio_trailer := ALGN(4) + cpio_header + "TRAILER!!!\0" + ALGN(4)
+
+
+In human terms, the initramfs buffer contains a collection of
+compressed and/or uncompressed cpio archives (in the "newc" or "crc"
+formats); arbitrary amounts zero bytes (for padding) can be added
+between members.
+
+The cpio "TRAILER!!!" entry (cpio end-of-archive) is optional, but is
+not ignored; see "handling of hard links" below.
+
+The structure of the cpio_header is as follows (all fields contain
+hexadecimal ASCII numbers fully padded with '0' on the left to the
+full width of the field, for example, the integer 4780 is represented
+by the ASCII string "000012ac"):
+
+Field name Field size Meaning
+c_magic 6 bytes The string "070701" or "070702"
+c_ino 8 bytes File inode number
+c_mode 8 bytes File mode and permissions
+c_uid 8 bytes File uid
+c_gid 8 bytes File gid
+c_nlink 8 bytes Number of links
+c_mtime 8 bytes Modification time
+c_filesize 8 bytes Size of data field
+c_maj 8 bytes Major part of file device number
+c_min 8 bytes Minor part of file device number
+c_rmaj 8 bytes Major part of device node reference
+c_rmin 8 bytes Minor part of device node reference
+c_namesize 8 bytes Length of filename, including final \0
+c_chksum 8 bytes Checksum of data field if c_magic is 070702;
+ otherwise zero
+
+The c_mode field matches the contents of st_mode returned by stat(2)
+on Linux, and encodes the file type and file permissions.
+
+The c_filesize should be zero for any file which is not a regular file
+or symlink.
+
+The c_chksum field contains a simple 32-bit unsigned sum of all the
+bytes in the data field. cpio(1) refers to this as "crc", which is
+clearly incorrect (a cyclic redundancy check is a different and
+significantly stronger integrity check), however, this is the
+algorithm used.
+
+If the filename is "TRAILER!!!" this is actually an end-of-archive
+marker; the c_filesize for an end-of-archive marker must be zero.
+
+
+*** Handling of hard links
+
+When a nondirectory with c_nlink > 1 is seen, the (c_maj,c_min,c_ino)
+tuple is looked up in a tuple buffer. If not found, it is entered in
+the tuple buffer and the entry is created as usual; if found, a hard
+link rather than a second copy of the file is created. It is not
+necessary (but permitted) to include a second copy of the file
+contents; if the file contents is not included, the c_filesize field
+should be set to zero to indicate no data section follows. If data is
+present, the previous instance of the file is overwritten; this allows
+the data-carrying instance of a file to occur anywhere in the sequence
+(GNU cpio is reported to attach the data to the last instance of a
+file only.)
+
+c_filesize must not be zero for a symlink.
+
+When a "TRAILER!!!" end-of-archive marker is seen, the tuple buffer is
+reset. This permits archives which are generated independently to be
+concatenated.
+
+To combine file data from different sources (without having to
+regenerate the (c_maj,c_min,c_ino) fields), therefore, either one of
+the following techniques can be used:
+
+a) Separate the different file data sources with a "TRAILER!!!"
+ end-of-archive marker, or
+
+b) Make sure c_nlink == 1 for all nondirectory entries.
diff --git a/Documentation/edac.txt b/Documentation/edac.txt
new file mode 100644
index 000000000..73fff13e8
--- /dev/null
+++ b/Documentation/edac.txt
@@ -0,0 +1,775 @@
+
+
+EDAC - Error Detection And Correction
+
+Written by Doug Thompson <dougthompson@xmission.com>
+7 Dec 2005
+17 Jul 2007 Updated
+
+(c) Mauro Carvalho Chehab
+05 Aug 2009 Nehalem interface
+
+EDAC is maintained and written by:
+
+ Doug Thompson, Dave Jiang, Dave Peterson et al,
+ original author: Thayne Harbaugh,
+
+Contact:
+ website: bluesmoke.sourceforge.net
+ mailing list: bluesmoke-devel@lists.sourceforge.net
+
+"bluesmoke" was the name for this device driver when it was "out-of-tree"
+and maintained at sourceforge.net. When it was pushed into 2.6.16 for the
+first time, it was renamed to 'EDAC'.
+
+The bluesmoke project at sourceforge.net is now utilized as a 'staging area'
+for EDAC development, before it is sent upstream to kernel.org
+
+At the bluesmoke/EDAC project site is a series of quilt patches against
+recent kernels, stored in a SVN repository. For easier downloading, there
+is also a tarball snapshot available.
+
+============================================================================
+EDAC PURPOSE
+
+The 'edac' kernel module goal is to detect and report errors that occur
+within the computer system running under linux.
+
+MEMORY
+
+In the initial release, memory Correctable Errors (CE) and Uncorrectable
+Errors (UE) are the primary errors being harvested. These types of errors
+are harvested by the 'edac_mc' class of device.
+
+Detecting CE events, then harvesting those events and reporting them,
+CAN be a predictor of future UE events. With CE events, the system can
+continue to operate, but with less safety. Preventive maintenance and
+proactive part replacement of memory DIMMs exhibiting CEs can reduce
+the likelihood of the dreaded UE events and system 'panics'.
+
+NON-MEMORY
+
+A new feature for EDAC, the edac_device class of device, was added in
+the 2.6.23 version of the kernel.
+
+This new device type allows for non-memory type of ECC hardware detectors
+to have their states harvested and presented to userspace via the sysfs
+interface.
+
+Some architectures have ECC detectors for L1, L2 and L3 caches, along with DMA
+engines, fabric switches, main data path switches, interconnections,
+and various other hardware data paths. If the hardware reports it, then
+a edac_device device probably can be constructed to harvest and present
+that to userspace.
+
+
+PCI BUS SCANNING
+
+In addition, PCI Bus Parity and SERR Errors are scanned for on PCI devices
+in order to determine if errors are occurring on data transfers.
+
+The presence of PCI Parity errors must be examined with a grain of salt.
+There are several add-in adapters that do NOT follow the PCI specification
+with regards to Parity generation and reporting. The specification says
+the vendor should tie the parity status bits to 0 if they do not intend
+to generate parity. Some vendors do not do this, and thus the parity bit
+can "float" giving false positives.
+
+In the kernel there is a PCI device attribute located in sysfs that is
+checked by the EDAC PCI scanning code. If that attribute is set,
+PCI parity/error scanning is skipped for that device. The attribute
+is:
+
+ broken_parity_status
+
+as is located in /sys/devices/pci<XXX>/0000:XX:YY.Z directories for
+PCI devices.
+
+FUTURE HARDWARE SCANNING
+
+EDAC will have future error detectors that will be integrated with
+EDAC or added to it, in the following list:
+
+ MCE Machine Check Exception
+ MCA Machine Check Architecture
+ NMI NMI notification of ECC errors
+ MSRs Machine Specific Register error cases
+ and other mechanisms.
+
+These errors are usually bus errors, ECC errors, thermal throttling
+and the like.
+
+
+============================================================================
+EDAC VERSIONING
+
+EDAC is composed of a "core" module (edac_core.ko) and several Memory
+Controller (MC) driver modules. On a given system, the CORE
+is loaded and one MC driver will be loaded. Both the CORE and
+the MC driver (or edac_device driver) have individual versions that reflect
+current release level of their respective modules.
+
+Thus, to "report" on what version a system is running, one must report both
+the CORE's and the MC driver's versions.
+
+
+LOADING
+
+If 'edac' was statically linked with the kernel then no loading is
+necessary. If 'edac' was built as modules then simply modprobe the
+'edac' pieces that you need. You should be able to modprobe
+hardware-specific modules and have the dependencies load the necessary core
+modules.
+
+Example:
+
+$> modprobe amd76x_edac
+
+loads both the amd76x_edac.ko memory controller module and the edac_mc.ko
+core module.
+
+
+============================================================================
+EDAC sysfs INTERFACE
+
+EDAC presents a 'sysfs' interface for control, reporting and attribute
+reporting purposes.
+
+EDAC lives in the /sys/devices/system/edac directory.
+
+Within this directory there currently reside 2 'edac' components:
+
+ mc memory controller(s) system
+ pci PCI control and status system
+
+
+============================================================================
+Memory Controller (mc) Model
+
+First a background on the memory controller's model abstracted in EDAC.
+Each 'mc' device controls a set of DIMM memory modules. These modules are
+laid out in a Chip-Select Row (csrowX) and Channel table (chX). There can
+be multiple csrows and multiple channels.
+
+Memory controllers allow for several csrows, with 8 csrows being a typical value.
+Yet, the actual number of csrows depends on the electrical "loading"
+of a given motherboard, memory controller and DIMM characteristics.
+
+Dual channels allows for 128 bit data transfers to the CPU from memory.
+Some newer chipsets allow for more than 2 channels, like Fully Buffered DIMMs
+(FB-DIMMs). The following example will assume 2 channels:
+
+
+ Channel 0 Channel 1
+ ===================================
+ csrow0 | DIMM_A0 | DIMM_B0 |
+ csrow1 | DIMM_A0 | DIMM_B0 |
+ ===================================
+
+ ===================================
+ csrow2 | DIMM_A1 | DIMM_B1 |
+ csrow3 | DIMM_A1 | DIMM_B1 |
+ ===================================
+
+In the above example table there are 4 physical slots on the motherboard
+for memory DIMMs:
+
+ DIMM_A0
+ DIMM_B0
+ DIMM_A1
+ DIMM_B1
+
+Labels for these slots are usually silk screened on the motherboard. Slots
+labeled 'A' are channel 0 in this example. Slots labeled 'B'
+are channel 1. Notice that there are two csrows possible on a
+physical DIMM. These csrows are allocated their csrow assignment
+based on the slot into which the memory DIMM is placed. Thus, when 1 DIMM
+is placed in each Channel, the csrows cross both DIMMs.
+
+Memory DIMMs come single or dual "ranked". A rank is a populated csrow.
+Thus, 2 single ranked DIMMs, placed in slots DIMM_A0 and DIMM_B0 above
+will have 1 csrow, csrow0. csrow1 will be empty. On the other hand,
+when 2 dual ranked DIMMs are similarly placed, then both csrow0 and
+csrow1 will be populated. The pattern repeats itself for csrow2 and
+csrow3.
+
+The representation of the above is reflected in the directory tree
+in EDAC's sysfs interface. Starting in directory
+/sys/devices/system/edac/mc each memory controller will be represented
+by its own 'mcX' directory, where 'X' is the index of the MC.
+
+
+ ..../edac/mc/
+ |
+ |->mc0
+ |->mc1
+ |->mc2
+ ....
+
+Under each 'mcX' directory each 'csrowX' is again represented by a
+'csrowX', where 'X' is the csrow index:
+
+
+ .../mc/mc0/
+ |
+ |->csrow0
+ |->csrow2
+ |->csrow3
+ ....
+
+Notice that there is no csrow1, which indicates that csrow0 is
+composed of a single ranked DIMMs. This should also apply in both
+Channels, in order to have dual-channel mode be operational. Since
+both csrow2 and csrow3 are populated, this indicates a dual ranked
+set of DIMMs for channels 0 and 1.
+
+
+Within each of the 'mcX' and 'csrowX' directories are several
+EDAC control and attribute files.
+
+============================================================================
+'mcX' DIRECTORIES
+
+
+In 'mcX' directories are EDAC control and attribute files for
+this 'X' instance of the memory controllers.
+
+For a description of the sysfs API, please see:
+ Documentation/ABI/testing/sysfs/devices-edac
+
+
+============================================================================
+'csrowX' DIRECTORIES
+
+When CONFIG_EDAC_LEGACY_SYSFS is enabled, the sysfs will contain the
+csrowX directories. As this API doesn't work properly for Rambus, FB-DIMMs
+and modern Intel Memory Controllers, this is being deprecated in favor
+of dimmX directories.
+
+In the 'csrowX' directories are EDAC control and attribute files for
+this 'X' instance of csrow:
+
+
+Total Uncorrectable Errors count attribute file:
+
+ 'ue_count'
+
+ This attribute file displays the total count of uncorrectable
+ errors that have occurred on this csrow. If panic_on_ue is set
+ this counter will not have a chance to increment, since EDAC
+ will panic the system.
+
+
+Total Correctable Errors count attribute file:
+
+ 'ce_count'
+
+ This attribute file displays the total count of correctable
+ errors that have occurred on this csrow. This
+ count is very important to examine. CEs provide early
+ indications that a DIMM is beginning to fail. This count
+ field should be monitored for non-zero values and report
+ such information to the system administrator.
+
+
+Total memory managed by this csrow attribute file:
+
+ 'size_mb'
+
+ This attribute file displays, in count of megabytes, of memory
+ that this csrow contains.
+
+
+Memory Type attribute file:
+
+ 'mem_type'
+
+ This attribute file will display what type of memory is currently
+ on this csrow. Normally, either buffered or unbuffered memory.
+ Examples:
+ Registered-DDR
+ Unbuffered-DDR
+
+
+EDAC Mode of operation attribute file:
+
+ 'edac_mode'
+
+ This attribute file will display what type of Error detection
+ and correction is being utilized.
+
+
+Device type attribute file:
+
+ 'dev_type'
+
+ This attribute file will display what type of DRAM device is
+ being utilized on this DIMM.
+ Examples:
+ x1
+ x2
+ x4
+ x8
+
+
+Channel 0 CE Count attribute file:
+
+ 'ch0_ce_count'
+
+ This attribute file will display the count of CEs on this
+ DIMM located in channel 0.
+
+
+Channel 0 UE Count attribute file:
+
+ 'ch0_ue_count'
+
+ This attribute file will display the count of UEs on this
+ DIMM located in channel 0.
+
+
+Channel 0 DIMM Label control file:
+
+ 'ch0_dimm_label'
+
+ This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+
+Channel 1 CE Count attribute file:
+
+ 'ch1_ce_count'
+
+ This attribute file will display the count of CEs on this
+ DIMM located in channel 1.
+
+
+Channel 1 UE Count attribute file:
+
+ 'ch1_ue_count'
+
+ This attribute file will display the count of UEs on this
+ DIMM located in channel 0.
+
+
+Channel 1 DIMM Label control file:
+
+ 'ch1_dimm_label'
+
+ This control file allows this DIMM to have a label assigned
+ to it. With this label in the module, when errors occur
+ the output can provide the DIMM label in the system log.
+ This becomes vital for panic events to isolate the
+ cause of the UE event.
+
+ DIMM Labels must be assigned after booting, with information
+ that correctly identifies the physical slot with its
+ silk screen label. This information is currently very
+ motherboard specific and determination of this information
+ must occur in userland at this time.
+
+============================================================================
+SYSTEM LOGGING
+
+If logging for UEs and CEs are enabled then system logs will have
+error notices indicating errors that have been detected:
+
+EDAC MC0: CE page 0x283, offset 0xce0, grain 8, syndrome 0x6ec3, row 0,
+channel 1 "DIMM_B1": amd76x_edac
+
+EDAC MC0: CE page 0x1e5, offset 0xfb0, grain 8, syndrome 0xb741, row 0,
+channel 1 "DIMM_B1": amd76x_edac
+
+
+The structure of the message is:
+ the memory controller (MC0)
+ Error type (CE)
+ memory page (0x283)
+ offset in the page (0xce0)
+ the byte granularity (grain 8)
+ or resolution of the error
+ the error syndrome (0xb741)
+ memory row (row 0)
+ memory channel (channel 1)
+ DIMM label, if set prior (DIMM B1
+ and then an optional, driver-specific message that may
+ have additional information.
+
+Both UEs and CEs with no info will lack all but memory controller,
+error type, a notice of "no info" and then an optional,
+driver-specific error message.
+
+
+============================================================================
+PCI Bus Parity Detection
+
+
+On Header Type 00 devices the primary status is looked at
+for any parity error regardless of whether Parity is enabled on the
+device. (The spec indicates parity is generated in some cases).
+On Header Type 01 bridges, the secondary status register is also
+looked at to see if parity occurred on the bus on the other side of
+the bridge.
+
+
+SYSFS CONFIGURATION
+
+Under /sys/devices/system/edac/pci are control and attribute files as follows:
+
+
+Enable/Disable PCI Parity checking control file:
+
+ 'check_pci_parity'
+
+
+ This control file enables or disables the PCI Bus Parity scanning
+ operation. Writing a 1 to this file enables the scanning. Writing
+ a 0 to this file disables the scanning.
+
+ Enable:
+ echo "1" >/sys/devices/system/edac/pci/check_pci_parity
+
+ Disable:
+ echo "0" >/sys/devices/system/edac/pci/check_pci_parity
+
+
+Parity Count:
+
+ 'pci_parity_count'
+
+ This attribute file will display the number of parity errors that
+ have been detected.
+
+
+============================================================================
+MODULE PARAMETERS
+
+Panic on UE control file:
+
+ 'edac_mc_panic_on_ue'
+
+ An uncorrectable error will cause a machine panic. This is usually
+ desirable. It is a bad idea to continue when an uncorrectable error
+ occurs - it is indeterminate what was uncorrected and the operating
+ system context might be so mangled that continuing will lead to further
+ corruption. If the kernel has MCE configured, then EDAC will never
+ notice the UE.
+
+ LOAD TIME: module/kernel parameter: edac_mc_panic_on_ue=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_panic_on_ue
+
+
+Log UE control file:
+
+ 'edac_mc_log_ue'
+
+ Generate kernel messages describing uncorrectable errors. These errors
+ are reported through the system message log system. UE statistics
+ will be accumulated even when UE logging is disabled.
+
+ LOAD TIME: module/kernel parameter: edac_mc_log_ue=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ue
+
+
+Log CE control file:
+
+ 'edac_mc_log_ce'
+
+ Generate kernel messages describing correctable errors. These
+ errors are reported through the system message log system.
+ CE statistics will be accumulated even when CE logging is disabled.
+
+ LOAD TIME: module/kernel parameter: edac_mc_log_ce=[0|1]
+
+ RUN TIME: echo "1" > /sys/module/edac_core/parameters/edac_mc_log_ce
+
+
+Polling period control file:
+
+ 'edac_mc_poll_msec'
+
+ The time period, in milliseconds, for polling for error information.
+ Too small a value wastes resources. Too large a value might delay
+ necessary handling of errors and might loose valuable information for
+ locating the error. 1000 milliseconds (once each second) is the current
+ default. Systems which require all the bandwidth they can get, may
+ increase this.
+
+ LOAD TIME: module/kernel parameter: edac_mc_poll_msec=[0|1]
+
+ RUN TIME: echo "1000" > /sys/module/edac_core/parameters/edac_mc_poll_msec
+
+
+Panic on PCI PARITY Error:
+
+ 'panic_on_pci_parity'
+
+
+ This control files enables or disables panicking when a parity
+ error has been detected.
+
+
+ module/kernel parameter: edac_panic_on_pci_pe=[0|1]
+
+ Enable:
+ echo "1" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
+
+ Disable:
+ echo "0" > /sys/module/edac_core/parameters/edac_panic_on_pci_pe
+
+
+
+=======================================================================
+
+
+EDAC_DEVICE type of device
+
+In the header file, edac_core.h, there is a series of edac_device structures
+and APIs for the EDAC_DEVICE.
+
+User space access to an edac_device is through the sysfs interface.
+
+At the location /sys/devices/system/edac (sysfs) new edac_device devices will
+appear.
+
+There is a three level tree beneath the above 'edac' directory. For example,
+the 'test_device_edac' device (found at the bluesmoke.sourceforget.net website)
+installs itself as:
+
+ /sys/devices/systm/edac/test-instance
+
+in this directory are various controls, a symlink and one or more 'instance'
+directories.
+
+The standard default controls are:
+
+ log_ce boolean to log CE events
+ log_ue boolean to log UE events
+ panic_on_ue boolean to 'panic' the system if an UE is encountered
+ (default off, can be set true via startup script)
+ poll_msec time period between POLL cycles for events
+
+The test_device_edac device adds at least one of its own custom control:
+
+ test_bits which in the current test driver does nothing but
+ show how it is installed. A ported driver can
+ add one or more such controls and/or attributes
+ for specific uses.
+ One out-of-tree driver uses controls here to allow
+ for ERROR INJECTION operations to hardware
+ injection registers
+
+The symlink points to the 'struct dev' that is registered for this edac_device.
+
+INSTANCES
+
+One or more instance directories are present. For the 'test_device_edac' case:
+
+ test-instance0
+
+
+In this directory there are two default counter attributes, which are totals of
+counter in deeper subdirectories.
+
+ ce_count total of CE events of subdirectories
+ ue_count total of UE events of subdirectories
+
+BLOCKS
+
+At the lowest directory level is the 'block' directory. There can be 0, 1
+or more blocks specified in each instance.
+
+ test-block0
+
+
+In this directory the default attributes are:
+
+ ce_count which is counter of CE events for this 'block'
+ of hardware being monitored
+ ue_count which is counter of UE events for this 'block'
+ of hardware being monitored
+
+
+The 'test_device_edac' device adds 4 attributes and 1 control:
+
+ test-block-bits-0 for every POLL cycle this counter
+ is incremented
+ test-block-bits-1 every 10 cycles, this counter is bumped once,
+ and test-block-bits-0 is set to 0
+ test-block-bits-2 every 100 cycles, this counter is bumped once,
+ and test-block-bits-1 is set to 0
+ test-block-bits-3 every 1000 cycles, this counter is bumped once,
+ and test-block-bits-2 is set to 0
+
+
+ reset-counters writing ANY thing to this control will
+ reset all the above counters.
+
+
+Use of the 'test_device_edac' driver should any others to create their own
+unique drivers for their hardware systems.
+
+The 'test_device_edac' sample driver is located at the
+bluesmoke.sourceforge.net project site for EDAC.
+
+=======================================================================
+NEHALEM USAGE OF EDAC APIs
+
+This chapter documents some EXPERIMENTAL mappings for EDAC API to handle
+Nehalem EDAC driver. They will likely be changed on future versions
+of the driver.
+
+Due to the way Nehalem exports Memory Controller data, some adjustments
+were done at i7core_edac driver. This chapter will cover those differences
+
+1) On Nehalem, there are one Memory Controller per Quick Patch Interconnect
+ (QPI). At the driver, the term "socket" means one QPI. This is
+ associated with a physical CPU socket.
+
+ Each MC have 3 physical read channels, 3 physical write channels and
+ 3 logic channels. The driver currently sees it as just 3 channels.
+ Each channel can have up to 3 DIMMs.
+
+ The minimum known unity is DIMMs. There are no information about csrows.
+ As EDAC API maps the minimum unity is csrows, the driver sequencially
+ maps channel/dimm into different csrows.
+
+ For example, supposing the following layout:
+ Ch0 phy rd0, wr0 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ dimm 1 1024 Mb offset: 4, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ Ch1 phy rd1, wr1 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ Ch2 phy rd3, wr3 (0x063f4031): 2 ranks, UDIMMs
+ dimm 0 1024 Mb offset: 0, bank: 8, rank: 1, row: 0x4000, col: 0x400
+ The driver will map it as:
+ csrow0: channel 0, dimm0
+ csrow1: channel 0, dimm1
+ csrow2: channel 1, dimm0
+ csrow3: channel 2, dimm0
+
+exports one
+ DIMM per csrow.
+
+ Each QPI is exported as a different memory controller.
+
+2) Nehalem MC has the hability to generate errors. The driver implements this
+ functionality via some error injection nodes:
+
+ For injecting a memory error, there are some sysfs nodes, under
+ /sys/devices/system/edac/mc/mc?/:
+
+ inject_addrmatch/*:
+ Controls the error injection mask register. It is possible to specify
+ several characteristics of the address to match an error code:
+ dimm = the affected dimm. Numbers are relative to a channel;
+ rank = the memory rank;
+ channel = the channel that will generate an error;
+ bank = the affected bank;
+ page = the page address;
+ column (or col) = the address column.
+ each of the above values can be set to "any" to match any valid value.
+
+ At driver init, all values are set to any.
+
+ For example, to generate an error at rank 1 of dimm 2, for any channel,
+ any bank, any page, any column:
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
+ echo 1 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
+
+ To return to the default behaviour of matching any, you can do:
+ echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/dimm
+ echo any >/sys/devices/system/edac/mc/mc0/inject_addrmatch/rank
+
+ inject_eccmask:
+ specifies what bits will have troubles,
+
+ inject_section:
+ specifies what ECC cache section will get the error:
+ 3 for both
+ 2 for the highest
+ 1 for the lowest
+
+ inject_type:
+ specifies the type of error, being a combination of the following bits:
+ bit 0 - repeat
+ bit 1 - ecc
+ bit 2 - parity
+
+ inject_enable starts the error generation when something different
+ than 0 is written.
+
+ All inject vars can be read. root permission is needed for write.
+
+ Datasheet states that the error will only be generated after a write on an
+ address that matches inject_addrmatch. It seems, however, that reading will
+ also produce an error.
+
+ For example, the following code will generate an error for any write access
+ at socket 0, on any DIMM/address on channel 2:
+
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_addrmatch/channel
+ echo 2 >/sys/devices/system/edac/mc/mc0/inject_type
+ echo 64 >/sys/devices/system/edac/mc/mc0/inject_eccmask
+ echo 3 >/sys/devices/system/edac/mc/mc0/inject_section
+ echo 1 >/sys/devices/system/edac/mc/mc0/inject_enable
+ dd if=/dev/mem of=/dev/null seek=16k bs=4k count=1 >& /dev/null
+
+ For socket 1, it is needed to replace "mc0" by "mc1" at the above
+ commands.
+
+ The generated error message will look like:
+
+ EDAC MC0: UE row 0, channel-a= 0 channel-b= 0 labels "-": NON_FATAL (addr = 0x0075b980, socket=0, Dimm=0, Channel=2, syndrome=0x00000040, count=1, Err=8c0000400001009f:4000080482 (read error: read ECC error))
+
+3) Nehalem specific Corrected Error memory counters
+
+ Nehalem have some registers to count memory errors. The driver uses those
+ registers to report Corrected Errors on devices with Registered Dimms.
+
+ However, those counters don't work with Unregistered Dimms. As the chipset
+ offers some counters that also work with UDIMMS (but with a worse level of
+ granularity than the default ones), the driver exposes those registers for
+ UDIMM memories.
+
+ They can be read by looking at the contents of all_channel_counts/
+
+ $ for i in /sys/devices/system/edac/mc/mc0/all_channel_counts/*; do echo $i; cat $i; done
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm0
+ 0
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm1
+ 0
+ /sys/devices/system/edac/mc/mc0/all_channel_counts/udimm2
+ 0
+
+ What happens here is that errors on different csrows, but at the same
+ dimm number will increment the same counter.
+ So, in this memory mapping:
+ csrow0: channel 0, dimm0
+ csrow1: channel 0, dimm1
+ csrow2: channel 1, dimm0
+ csrow3: channel 2, dimm0
+ The hardware will increment udimm0 for an error at the first dimm at either
+ csrow0, csrow2 or csrow3;
+ The hardware will increment udimm1 for an error at the second dimm at either
+ csrow0, csrow2 or csrow3;
+ The hardware will increment udimm2 for an error at the third dimm at either
+ csrow0, csrow2 or csrow3;
+
+4) Standard error counters
+
+ The standard error counters are generated when an mcelog error is received
+ by the driver. Since, with udimm, this is counted by software, it is
+ possible that some errors could be lost. With rdimm's, they displays the
+ contents of the registers
diff --git a/Documentation/efi-stub.txt b/Documentation/efi-stub.txt
new file mode 100644
index 000000000..7747024d3
--- /dev/null
+++ b/Documentation/efi-stub.txt
@@ -0,0 +1,84 @@
+ The EFI Boot Stub
+ ---------------------------
+
+On the x86 and ARM platforms, a kernel zImage/bzImage can masquerade
+as a PE/COFF image, thereby convincing EFI firmware loaders to load
+it as an EFI executable. The code that modifies the bzImage header,
+along with the EFI-specific entry point that the firmware loader
+jumps to are collectively known as the "EFI boot stub", and live in
+arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c,
+respectively. For ARM the EFI stub is implemented in
+arch/arm/boot/compressed/efi-header.S and
+arch/arm/boot/compressed/efi-stub.c. EFI stub code that is shared
+between architectures is in drivers/firmware/efi/efi-stub-helper.c.
+
+For arm64, there is no compressed kernel support, so the Image itself
+masquerades as a PE/COFF image and the EFI stub is linked into the
+kernel. The arm64 EFI stub lives in arch/arm64/kernel/efi-entry.S
+and arch/arm64/kernel/efi-stub.c.
+
+By using the EFI boot stub it's possible to boot a Linux kernel
+without the use of a conventional EFI boot loader, such as grub or
+elilo. Since the EFI boot stub performs the jobs of a boot loader, in
+a certain sense it *IS* the boot loader.
+
+The EFI boot stub is enabled with the CONFIG_EFI_STUB kernel option.
+
+
+**** How to install bzImage.efi
+
+The bzImage located in arch/x86/boot/bzImage must be copied to the EFI
+System Partition (ESP) and renamed with the extension ".efi". Without
+the extension the EFI firmware loader will refuse to execute it. It's
+not possible to execute bzImage.efi from the usual Linux file systems
+because EFI firmware doesn't have support for them. For ARM the
+arch/arm/boot/zImage should be copied to the system partition, and it
+may not need to be renamed. Similarly for arm64, arch/arm64/boot/Image
+should be copied but not necessarily renamed.
+
+
+**** Passing kernel parameters from the EFI shell
+
+Arguments to the kernel can be passed after bzImage.efi, e.g.
+
+ fs0:> bzImage.efi console=ttyS0 root=/dev/sda4
+
+
+**** The "initrd=" option
+
+Like most boot loaders, the EFI stub allows the user to specify
+multiple initrd files using the "initrd=" option. This is the only EFI
+stub-specific command line parameter, everything else is passed to the
+kernel when it boots.
+
+The path to the initrd file must be an absolute path from the
+beginning of the ESP, relative path names do not work. Also, the path
+is an EFI-style path and directory elements must be separated with
+backslashes (\). For example, given the following directory layout,
+
+fs0:>
+ Kernels\
+ bzImage.efi
+ initrd-large.img
+
+ Ramdisks\
+ initrd-small.img
+ initrd-medium.img
+
+to boot with the initrd-large.img file if the current working
+directory is fs0:\Kernels, the following command must be used,
+
+ fs0:\Kernels> bzImage.efi initrd=\Kernels\initrd-large.img
+
+Notice how bzImage.efi can be specified with a relative path. That's
+because the image we're executing is interpreted by the EFI shell,
+which understands relative paths, whereas the rest of the command line
+is passed to bzImage.efi.
+
+
+**** The "dtb=" option
+
+For the ARM and arm64 architectures, we also need to be able to provide a
+device tree to the kernel. This is done with the "dtb=" command line option,
+and is processed in the same manner as the "initrd=" option that is
+described above.
diff --git a/Documentation/eisa.txt b/Documentation/eisa.txt
new file mode 100644
index 000000000..a55e49109
--- /dev/null
+++ b/Documentation/eisa.txt
@@ -0,0 +1,203 @@
+EISA bus support (Marc Zyngier <maz@wild-wind.fr.eu.org>)
+
+This document groups random notes about porting EISA drivers to the
+new EISA/sysfs API.
+
+Starting from version 2.5.59, the EISA bus is almost given the same
+status as other much more mainstream busses such as PCI or USB. This
+has been possible through sysfs, which defines a nice enough set of
+abstractions to manage busses, devices and drivers.
+
+Although the new API is quite simple to use, converting existing
+drivers to the new infrastructure is not an easy task (mostly because
+detection code is generally also used to probe ISA cards). Moreover,
+most EISA drivers are among the oldest Linux drivers so, as you can
+imagine, some dust has settled here over the years.
+
+The EISA infrastructure is made up of three parts :
+
+ - The bus code implements most of the generic code. It is shared
+ among all the architectures that the EISA code runs on. It
+ implements bus probing (detecting EISA cards available on the bus),
+ allocates I/O resources, allows fancy naming through sysfs, and
+ offers interfaces for driver to register.
+
+ - The bus root driver implements the glue between the bus hardware
+ and the generic bus code. It is responsible for discovering the
+ device implementing the bus, and setting it up to be latter probed
+ by the bus code. This can go from something as simple as reserving
+ an I/O region on x86, to the rather more complex, like the hppa
+ EISA code. This is the part to implement in order to have EISA
+ running on an "new" platform.
+
+ - The driver offers the bus a list of devices that it manages, and
+ implements the necessary callbacks to probe and release devices
+ whenever told to.
+
+Every function/structure below lives in <linux/eisa.h>, which depends
+heavily on <linux/device.h>.
+
+** Bus root driver :
+
+int eisa_root_register (struct eisa_root_device *root);
+
+The eisa_root_register function is used to declare a device as the
+root of an EISA bus. The eisa_root_device structure holds a reference
+to this device, as well as some parameters for probing purposes.
+
+struct eisa_root_device {
+ struct device *dev; /* Pointer to bridge device */
+ struct resource *res;
+ unsigned long bus_base_addr;
+ int slots; /* Max slot number */
+ int force_probe; /* Probe even when no slot 0 */
+ u64 dma_mask; /* from bridge device */
+ int bus_nr; /* Set by eisa_root_register */
+ struct resource eisa_root_res; /* ditto */
+};
+
+node : used for eisa_root_register internal purpose
+dev : pointer to the root device
+res : root device I/O resource
+bus_base_addr : slot 0 address on this bus
+slots : max slot number to probe
+force_probe : Probe even when slot 0 is empty (no EISA mainboard)
+dma_mask : Default DMA mask. Usually the bridge device dma_mask.
+bus_nr : unique bus id, set by eisa_root_register
+
+** Driver :
+
+int eisa_driver_register (struct eisa_driver *edrv);
+void eisa_driver_unregister (struct eisa_driver *edrv);
+
+Clear enough ?
+
+struct eisa_device_id {
+ char sig[EISA_SIG_LEN];
+ unsigned long driver_data;
+};
+
+struct eisa_driver {
+ const struct eisa_device_id *id_table;
+ struct device_driver driver;
+};
+
+id_table : an array of NULL terminated EISA id strings,
+ followed by an empty string. Each string can
+ optionally be paired with a driver-dependent value
+ (driver_data).
+
+driver : a generic driver, such as described in
+ Documentation/driver-model/driver.txt. Only .name,
+ .probe and .remove members are mandatory.
+
+An example is the 3c59x driver :
+
+static struct eisa_device_id vortex_eisa_ids[] = {
+ { "TCM5920", EISA_3C592_OFFSET },
+ { "TCM5970", EISA_3C597_OFFSET },
+ { "" }
+};
+
+static struct eisa_driver vortex_eisa_driver = {
+ .id_table = vortex_eisa_ids,
+ .driver = {
+ .name = "3c59x",
+ .probe = vortex_eisa_probe,
+ .remove = vortex_eisa_remove
+ }
+};
+
+** Device :
+
+The sysfs framework calls .probe and .remove functions upon device
+discovery and removal (note that the .remove function is only called
+when driver is built as a module).
+
+Both functions are passed a pointer to a 'struct device', which is
+encapsulated in a 'struct eisa_device' described as follows :
+
+struct eisa_device {
+ struct eisa_device_id id;
+ int slot;
+ int state;
+ unsigned long base_addr;
+ struct resource res[EISA_MAX_RESOURCES];
+ u64 dma_mask;
+ struct device dev; /* generic device */
+};
+
+id : EISA id, as read from device. id.driver_data is set from the
+ matching driver EISA id.
+slot : slot number which the device was detected on
+state : set of flags indicating the state of the device. Current
+ flags are EISA_CONFIG_ENABLED and EISA_CONFIG_FORCED.
+res : set of four 256 bytes I/O regions allocated to this device
+dma_mask: DMA mask set from the parent device.
+dev : generic device (see Documentation/driver-model/device.txt)
+
+You can get the 'struct eisa_device' from 'struct device' using the
+'to_eisa_device' macro.
+
+** Misc stuff :
+
+void eisa_set_drvdata (struct eisa_device *edev, void *data);
+
+Stores data into the device's driver_data area.
+
+void *eisa_get_drvdata (struct eisa_device *edev):
+
+Gets the pointer previously stored into the device's driver_data area.
+
+int eisa_get_region_index (void *addr);
+
+Returns the region number (0 <= x < EISA_MAX_RESOURCES) of a given
+address.
+
+** Kernel parameters :
+
+eisa_bus.enable_dev :
+
+A comma-separated list of slots to be enabled, even if the firmware
+set the card as disabled. The driver must be able to properly
+initialize the device in such conditions.
+
+eisa_bus.disable_dev :
+
+A comma-separated list of slots to be enabled, even if the firmware
+set the card as enabled. The driver won't be called to handle this
+device.
+
+virtual_root.force_probe :
+
+Force the probing code to probe EISA slots even when it cannot find an
+EISA compliant mainboard (nothing appears on slot 0). Defaults to 0
+(don't force), and set to 1 (force probing) when either
+CONFIG_ALPHA_JENSEN or CONFIG_EISA_VLB_PRIMING are set.
+
+** Random notes :
+
+Converting an EISA driver to the new API mostly involves *deleting*
+code (since probing is now in the core EISA code). Unfortunately, most
+drivers share their probing routine between ISA, and EISA. Special
+care must be taken when ripping out the EISA code, so other busses
+won't suffer from these surgical strikes...
+
+You *must not* expect any EISA device to be detected when returning
+from eisa_driver_register, since the chances are that the bus has not
+yet been probed. In fact, that's what happens most of the time (the
+bus root driver usually kicks in rather late in the boot process).
+Unfortunately, most drivers are doing the probing by themselves, and
+expect to have explored the whole machine when they exit their probe
+routine.
+
+For example, switching your favorite EISA SCSI card to the "hotplug"
+model is "the right thing"(tm).
+
+** Thanks :
+
+I'd like to thank the following people for their help :
+- Xavier Benigni for lending me a wonderful Alpha Jensen,
+- James Bottomley, Jeff Garzik for getting this stuff into the kernel,
+- Andries Brouwer for contributing numerous EISA ids,
+- Catrin Jones for coping with far too many machines at home.
diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt
new file mode 100644
index 000000000..c7d49b885
--- /dev/null
+++ b/Documentation/email-clients.txt
@@ -0,0 +1,263 @@
+Email clients info for Linux
+======================================================================
+
+Git
+----------------------------------------------------------------------
+These days most developers use `git send-email` instead of regular
+email clients. The man page for this is quite good. On the receiving
+end, maintainers use `git am` to apply the patches.
+
+If you are new to git then send your first patch to yourself. Save it
+as raw text including all the headers. Run `git am raw_email.txt` and
+then review the changelog with `git log`. When that works then send
+the patch to the appropriate mailing list(s).
+
+General Preferences
+----------------------------------------------------------------------
+Patches for the Linux kernel are submitted via email, preferably as
+inline text in the body of the email. Some maintainers accept
+attachments, but then the attachments should have content-type
+"text/plain". However, attachments are generally frowned upon because
+it makes quoting portions of the patch more difficult in the patch
+review process.
+
+Email clients that are used for Linux kernel patches should send the
+patch text untouched. For example, they should not modify or delete tabs
+or spaces, even at the beginning or end of lines.
+
+Don't send patches with "format=flowed". This can cause unexpected
+and unwanted line breaks.
+
+Don't let your email client do automatic word wrapping for you.
+This can also corrupt your patch.
+
+Email clients should not modify the character set encoding of the text.
+Emailed patches should be in ASCII or UTF-8 encoding only.
+If you configure your email client to send emails with UTF-8 encoding,
+you avoid some possible charset problems.
+
+Email clients should generate and maintain References: or In-Reply-To:
+headers so that mail threading is not broken.
+
+Copy-and-paste (or cut-and-paste) usually does not work for patches
+because tabs are converted to spaces. Using xclipboard, xclip, and/or
+xcutsel may work, but it's best to test this for yourself or just avoid
+copy-and-paste.
+
+Don't use PGP/GPG signatures in mail that contains patches.
+This breaks many scripts that read and apply the patches.
+(This should be fixable.)
+
+It's a good idea to send a patch to yourself, save the received message,
+and successfully apply it with 'patch' before sending patches to Linux
+mailing lists.
+
+
+Some email client (MUA) hints
+----------------------------------------------------------------------
+Here are some specific MUA configuration hints for editing and sending
+patches for the Linux kernel. These are not meant to be complete
+software package configuration summaries.
+
+Legend:
+TUI = text-based user interface
+GUI = graphical user interface
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Alpine (TUI)
+
+Config options:
+In the "Sending Preferences" section:
+
+- "Do Not Send Flowed Text" must be enabled
+- "Strip Whitespace Before Sending" must be disabled
+
+When composing the message, the cursor should be placed where the patch
+should appear, and then pressing CTRL-R let you specify the patch file
+to insert into the message.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Claws Mail (GUI)
+
+Works. Some people use this successfully for patches.
+
+To insert a patch use Message->Insert File (CTRL+i) or an external editor.
+
+If the inserted patch has to be edited in the Claws composition window
+"Auto wrapping" in Configuration->Preferences->Compose->Wrapping should be
+disabled.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Evolution (GUI)
+
+Some people use this successfully for patches.
+
+When composing mail select: Preformat
+ from Format->Heading->Preformatted (Ctrl-7)
+ or the toolbar
+
+Then use:
+ Insert->Text File... (Alt-n x)
+to insert the patch.
+
+You can also "diff -Nru old.c new.c | xclip", select Preformat, then
+paste with the middle button.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Kmail (GUI)
+
+Some people use Kmail successfully for patches.
+
+The default setting of not composing in HTML is appropriate; do not
+enable it.
+
+When composing an email, under options, uncheck "word wrap". The only
+disadvantage is any text you type in the email will not be word-wrapped
+so you will have to manually word wrap text before the patch. The easiest
+way around this is to compose your email with word wrap enabled, then save
+it as a draft. Once you pull it up again from your drafts it is now hard
+word-wrapped and you can uncheck "word wrap" without losing the existing
+wrapping.
+
+At the bottom of your email, put the commonly-used patch delimiter before
+inserting your patch: three hyphens (---).
+
+Then from the "Message" menu item, select insert file and choose your patch.
+As an added bonus you can customise the message creation toolbar menu
+and put the "insert file" icon there.
+
+Make the composer window wide enough so that no lines wrap. As of
+KMail 1.13.5 (KDE 4.5.4), KMail will apply word wrapping when sending
+the email if the lines wrap in the composer window. Having word wrapping
+disabled in the Options menu isn't enough. Thus, if your patch has very
+long lines, you must make the composer window very wide before sending
+the email. See: https://bugs.kde.org/show_bug.cgi?id=174034
+
+You can safely GPG sign attachments, but inlined text is preferred for
+patches so do not GPG sign them. Signing patches that have been inserted
+as inlined text will make them tricky to extract from their 7-bit encoding.
+
+If you absolutely must send patches as attachments instead of inlining
+them as text, right click on the attachment and select properties, and
+highlight "Suggest automatic display" to make the attachment inlined to
+make it more viewable.
+
+When saving patches that are sent as inlined text, select the email that
+contains the patch from the message list pane, right click and select
+"save as". You can use the whole email unmodified as a patch if it was
+properly composed. There is no option currently to save the email when you
+are actually viewing it in its own window -- there has been a request filed
+at kmail's bugzilla and hopefully this will be addressed. Emails are saved
+as read-write for user only so you will have to chmod them to make them
+group and world readable if you copy them elsewhere.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Lotus Notes (GUI)
+
+Run away from it.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Mutt (TUI)
+
+Plenty of Linux developers use mutt, so it must work pretty well.
+
+Mutt doesn't come with an editor, so whatever editor you use should be
+used in a way that there are no automatic linebreaks. Most editors have
+an "insert file" option that inserts the contents of a file unaltered.
+
+To use 'vim' with mutt:
+ set editor="vi"
+
+ If using xclip, type the command
+ :set paste
+ before middle button or shift-insert or use
+ :r filename
+
+if you want to include the patch inline.
+(a)ttach works fine without "set paste".
+
+Config options:
+It should work with default settings.
+However, it's a good idea to set the "send_charset" to:
+ set send_charset="us-ascii:utf-8"
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Pine (TUI)
+
+Pine has had some whitespace truncation issues in the past, but these
+should all be fixed now.
+
+Use alpine (pine's successor) if you can.
+
+Config options:
+- quell-flowed-text is needed for recent versions
+- the "no-strip-whitespace-before-send" option is needed
+
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sylpheed (GUI)
+
+- Works well for inlining text (or using attachments).
+- Allows use of an external editor.
+- Is slow on large folders.
+- Won't do TLS SMTP auth over a non-SSL connection.
+- Has a helpful ruler bar in the compose window.
+- Adding addresses to address book doesn't understand the display name
+ properly.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Thunderbird (GUI)
+
+Thunderbird is an Outlook clone that likes to mangle text, but there are ways
+to coerce it into behaving.
+
+- Allow use of an external editor:
+ The easiest thing to do with Thunderbird and patches is to use an
+ "external editor" extension and then just use your favorite $EDITOR
+ for reading/merging patches into the body text. To do this, download
+ and install the extension, then add a button for it using
+ View->Toolbars->Customize... and finally just click on it when in the
+ Compose dialog.
+
+ Please note that "external editor" requires that your editor must not
+ fork, or in other words, the editor must not return before closing.
+ You may have to pass additional flags or change the settings of your
+ editor. Most notably if you are using gvim then you must pass the -f
+ option to gvim by putting "/usr/bin/gvim -f" (if the binary is in
+ /usr/bin) to the text editor field in "external editor" settings. If you
+ are using some other editor then please read its manual to find out how
+ to do this.
+
+To beat some sense out of the internal editor, do this:
+
+- Edit your Thunderbird config settings so that it won't use format=flowed.
+ Go to "edit->preferences->advanced->config editor" to bring up the
+ thunderbird's registry editor.
+
+- Set "mailnews.send_plaintext_flowed" to "false"
+
+- Set "mailnews.wraplength" from "72" to "0"
+
+- "View" > "Message Body As" > "Plain Text"
+
+- "View" > "Character Encoding" > "Unicode (UTF-8)"
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TkRat (GUI)
+
+Works. Use "Insert file..." or external editor.
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Gmail (Web GUI)
+
+Does not work for sending patches.
+
+Gmail web client converts tabs to spaces automatically.
+
+At the same time it wraps lines every 78 chars with CRLF style line breaks
+although tab2space problem can be solved with external editor.
+
+Another problem is that Gmail will base64-encode any message that has a
+non-ASCII character. That includes things like European names.
+
+ ###
diff --git a/Documentation/extcon/porting-android-switch-class b/Documentation/extcon/porting-android-switch-class
new file mode 100644
index 000000000..49c81caef
--- /dev/null
+++ b/Documentation/extcon/porting-android-switch-class
@@ -0,0 +1,123 @@
+
+ Staging/Android Switch Class Porting Guide
+ (linux/drivers/staging/android/switch)
+ (c) Copyright 2012 Samsung Electronics
+
+AUTHORS
+MyungJoo Ham <myungjoo.ham@samsung.com>
+
+/*****************************************************************
+ * CHAPTER 1. *
+ * PORTING SWITCH CLASS DEVICE DRIVERS *
+ *****************************************************************/
+
+****** STEP 1. Basic Functionality
+ No extcon extended feature, but switch features only.
+
+- struct switch_dev (fed to switch_dev_register/unregister)
+ @name: no change
+ @dev: no change
+ @index: drop (not used in switch device driver side anyway)
+ @state: no change
+ If you have used @state with magic numbers, keep it
+ at this step.
+ @print_name: no change but type change (switch_dev->extcon_dev)
+ @print_state: no change but type change (switch_dev->extcon_dev)
+
+- switch_dev_register(sdev, dev)
+ => extcon_dev_register(edev)
+ : type change (sdev->edev)
+ : remove second param('dev'). if edev has parent device, should store
+ 'dev' to 'edev.dev.parent' before registering extcon device
+- switch_dev_unregister(sdev)
+ => extcon_dev_unregister(edev)
+ : no change but type change (sdev->edev)
+- switch_get_state(sdev)
+ => extcon_get_state(edev)
+ : no change but type change (sdev->edev) and (return: int->u32)
+- switch_set_state(sdev, state)
+ => extcon_set_state(edev, state)
+ : no change but type change (sdev->edev) and (state: int->u32)
+
+With this changes, the ex-switch extcon class device works as it once
+worked as switch class device. However, it will now have additional
+interfaces (both ABI and in-kernel API) and different ABI locations.
+However, if CONFIG_ANDROID is enabled without CONFIG_ANDROID_SWITCH,
+/sys/class/switch/* will be symbolically linked to /sys/class/extcon/
+so that they are still compatible with legacy userspace processes.
+
+****** STEP 2. Multistate (no more magic numbers in state value)
+ Extcon's extended features for switch device drivers with
+ complex features usually required magic numbers in state
+ value of switch_dev. With extcon, such magic numbers that
+ support multiple cables are no more required or supported.
+
+ 1. Define cable names at edev->supported_cable.
+ 2. (Recommended) remove print_state callback.
+ 3. Use extcon_get_cable_state_(edev, index) or
+ extcon_get_cable_state(edev, cable_name) instead of
+ extcon_get_state(edev) if you intend to get a state of a specific
+ cable. Same for set_state. This way, you can remove the usage of
+ magic numbers in state value.
+ 4. Use extcon_update_state() if you are updating specific bits of
+ the state value.
+
+Example: a switch device driver w/ magic numbers for two cables.
+ "0x00": no cables connected.
+ "0x01": cable 1 connected
+ "0x02": cable 2 connected
+ "0x03": cable 1 and 2 connected
+ 1. edev->supported_cable = {"1", "2", NULL};
+ 2. edev->print_state = NULL;
+ 3. extcon_get_cable_state_(edev, 0) shows cable 1's state.
+ extcon_get_cable_state(edev, "1") shows cable 1's state.
+ extcon_set_cable_state_(edev, 1) sets cable 2's state.
+ extcon_set_cable_state(edev, "2") sets cable 2's state
+ 4. extcon_update_state(edev, 0x01, 0) sets the least bit's 0.
+
+****** STEP 3. Notify other device drivers
+
+ You can notify others of the cable attach/detach events with
+notifier chains.
+
+ At the side of other device drivers (the extcon device itself
+does not need to get notified of its own events), there are two
+methods to register notifier_block for cable events:
+(a) for a specific cable or (b) for every cable.
+
+ (a) extcon_register_interest(obj, extcon_name, cable_name, nb)
+ Example: want to get news of "MAX8997_MUIC"'s "USB" cable
+
+ obj = kzalloc(sizeof(struct extcon_specific_cable_nb),
+ GFP_KERNEL);
+ nb->notifier_call = the_callback_to_handle_usb;
+
+ extcon_register_intereset(obj, "MAX8997_MUIC", "USB", nb);
+
+ (b) extcon_register_notifier(edev, nb)
+ Call nb for any changes in edev.
+
+ Please note that in order to properly behave with method (a),
+the extcon device driver should support multistate feature (STEP 2).
+
+****** STEP 4. Inter-cable relation (mutually exclusive)
+
+ You can provide inter-cable mutually exclusiveness information
+for an extcon device. When cables A and B are declared to be mutually
+exclusive, the two cables cannot be in ATTACHED state simulteneously.
+
+
+/*****************************************************************
+ * CHAPTER 2. *
+ * PORTING USERSPACE w/ SWITCH CLASS DEVICE SUPPORT *
+ *****************************************************************/
+
+****** ABI Location
+
+ If "CONFIG_ANDROID" is enabled, /sys/class/switch/* are created
+as symbolic links to /sys/class/extcon/*.
+
+ The two files of switch class, name and state, are provided with
+extcon, too. When the multistate support (STEP 2 of CHAPTER 1.) is
+not enabled or print_state callback is supplied, the output of
+state ABI is same with switch class.
diff --git a/Documentation/fault-injection/fault-injection.txt b/Documentation/fault-injection/fault-injection.txt
new file mode 100644
index 000000000..4cf1a2a6b
--- /dev/null
+++ b/Documentation/fault-injection/fault-injection.txt
@@ -0,0 +1,269 @@
+Fault injection capabilities infrastructure
+===========================================
+
+See also drivers/md/faulty.c and "every_nth" module option for scsi_debug.
+
+
+Available fault injection capabilities
+--------------------------------------
+
+o failslab
+
+ injects slab allocation failures. (kmalloc(), kmem_cache_alloc(), ...)
+
+o fail_page_alloc
+
+ injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
+
+o fail_make_request
+
+ injects disk IO errors on devices permitted by setting
+ /sys/block/<device>/make-it-fail or
+ /sys/block/<device>/<partition>/make-it-fail. (generic_make_request())
+
+o fail_mmc_request
+
+ injects MMC data errors on devices permitted by setting
+ debugfs entries under /sys/kernel/debug/mmc0/fail_mmc_request
+
+Configure fault-injection capabilities behavior
+-----------------------------------------------
+
+o debugfs entries
+
+fault-inject-debugfs kernel module provides some debugfs entries for runtime
+configuration of fault-injection capabilities.
+
+- /sys/kernel/debug/fail*/probability:
+
+ likelihood of failure injection, in percent.
+ Format: <percent>
+
+ Note that one-failure-per-hundred is a very high error rate
+ for some testcases. Consider setting probability=100 and configure
+ /sys/kernel/debug/fail*/interval for such testcases.
+
+- /sys/kernel/debug/fail*/interval:
+
+ specifies the interval between failures, for calls to
+ should_fail() that pass all the other tests.
+
+ Note that if you enable this, by setting interval>1, you will
+ probably want to set probability=100.
+
+- /sys/kernel/debug/fail*/times:
+
+ specifies how many times failures may happen at most.
+ A value of -1 means "no limit".
+
+- /sys/kernel/debug/fail*/space:
+
+ specifies an initial resource "budget", decremented by "size"
+ on each call to should_fail(,size). Failure injection is
+ suppressed until "space" reaches zero.
+
+- /sys/kernel/debug/fail*/verbose
+
+ Format: { 0 | 1 | 2 }
+ specifies the verbosity of the messages when failure is
+ injected. '0' means no messages; '1' will print only a single
+ log line per failure; '2' will print a call trace too -- useful
+ to debug the problems revealed by fault injection.
+
+- /sys/kernel/debug/fail*/task-filter:
+
+ Format: { 'Y' | 'N' }
+ A value of 'N' disables filtering by process (default).
+ Any positive value limits failures to only processes indicated by
+ /proc/<pid>/make-it-fail==1.
+
+- /sys/kernel/debug/fail*/require-start:
+- /sys/kernel/debug/fail*/require-end:
+- /sys/kernel/debug/fail*/reject-start:
+- /sys/kernel/debug/fail*/reject-end:
+
+ specifies the range of virtual addresses tested during
+ stacktrace walking. Failure is injected only if some caller
+ in the walked stacktrace lies within the required range, and
+ none lies within the rejected range.
+ Default required range is [0,ULONG_MAX) (whole of virtual address space).
+ Default rejected range is [0,0).
+
+- /sys/kernel/debug/fail*/stacktrace-depth:
+
+ specifies the maximum stacktrace depth walked during search
+ for a caller within [require-start,require-end) OR
+ [reject-start,reject-end).
+
+- /sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem:
+
+ Format: { 'Y' | 'N' }
+ default is 'N', setting it to 'Y' won't inject failures into
+ highmem/user allocations.
+
+- /sys/kernel/debug/failslab/ignore-gfp-wait:
+- /sys/kernel/debug/fail_page_alloc/ignore-gfp-wait:
+
+ Format: { 'Y' | 'N' }
+ default is 'N', setting it to 'Y' will inject failures
+ only into non-sleep allocations (GFP_ATOMIC allocations).
+
+- /sys/kernel/debug/fail_page_alloc/min-order:
+
+ specifies the minimum page allocation order to be injected
+ failures.
+
+o Boot option
+
+In order to inject faults while debugfs is not available (early boot time),
+use the boot option:
+
+ failslab=
+ fail_page_alloc=
+ fail_make_request=
+ mmc_core.fail_request=<interval>,<probability>,<space>,<times>
+
+How to add new fault injection capability
+-----------------------------------------
+
+o #include <linux/fault-inject.h>
+
+o define the fault attributes
+
+ DECLARE_FAULT_INJECTION(name);
+
+ Please see the definition of struct fault_attr in fault-inject.h
+ for details.
+
+o provide a way to configure fault attributes
+
+- boot option
+
+ If you need to enable the fault injection capability from boot time, you can
+ provide boot option to configure it. There is a helper function for it:
+
+ setup_fault_attr(attr, str);
+
+- debugfs entries
+
+ failslab, fail_page_alloc, and fail_make_request use this way.
+ Helper functions:
+
+ fault_create_debugfs_attr(name, parent, attr);
+
+- module parameters
+
+ If the scope of the fault injection capability is limited to a
+ single kernel module, it is better to provide module parameters to
+ configure the fault attributes.
+
+o add a hook to insert failures
+
+ Upon should_fail() returning true, client code should inject a failure.
+
+ should_fail(attr, size);
+
+Application Examples
+--------------------
+
+o Inject slab allocation failures into module init/exit code
+
+#!/bin/bash
+
+FAILTYPE=failslab
+echo Y > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+
+faulty_system()
+{
+ bash -c "echo 1 > /proc/self/make-it-fail && exec $*"
+}
+
+if [ $# -eq 0 ]
+then
+ echo "Usage: $0 modulename [ modulename ... ]"
+ exit 1
+fi
+
+for m in $*
+do
+ echo inserting $m...
+ faulty_system modprobe $m
+
+ echo removing $m...
+ faulty_system modprobe -r $m
+done
+
+------------------------------------------------------------------------------
+
+o Inject page allocation failures only for a specific module
+
+#!/bin/bash
+
+FAILTYPE=fail_page_alloc
+module=$1
+
+if [ -z $module ]
+then
+ echo "Usage: $0 <modulename>"
+ exit 1
+fi
+
+modprobe $module
+
+if [ ! -d /sys/module/$module/sections ]
+then
+ echo Module $module is not loaded
+ exit 1
+fi
+
+cat /sys/module/$module/sections/.text > /sys/kernel/debug/$FAILTYPE/require-start
+cat /sys/module/$module/sections/.data > /sys/kernel/debug/$FAILTYPE/require-end
+
+echo N > /sys/kernel/debug/$FAILTYPE/task-filter
+echo 10 > /sys/kernel/debug/$FAILTYPE/probability
+echo 100 > /sys/kernel/debug/$FAILTYPE/interval
+echo -1 > /sys/kernel/debug/$FAILTYPE/times
+echo 0 > /sys/kernel/debug/$FAILTYPE/space
+echo 2 > /sys/kernel/debug/$FAILTYPE/verbose
+echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-wait
+echo 1 > /sys/kernel/debug/$FAILTYPE/ignore-gfp-highmem
+echo 10 > /sys/kernel/debug/$FAILTYPE/stacktrace-depth
+
+trap "echo 0 > /sys/kernel/debug/$FAILTYPE/probability" SIGINT SIGTERM EXIT
+
+echo "Injecting errors into the module $module... (interrupt to stop)"
+sleep 1000000
+
+Tool to run command with failslab or fail_page_alloc
+----------------------------------------------------
+In order to make it easier to accomplish the tasks mentioned above, we can use
+tools/testing/fault-injection/failcmd.sh. Please run a command
+"./tools/testing/fault-injection/failcmd.sh --help" for more information and
+see the following examples.
+
+Examples:
+
+Run a command "make -C tools/testing/selftests/ run_tests" with injecting slab
+allocation failure.
+
+ # ./tools/testing/fault-injection/failcmd.sh \
+ -- make -C tools/testing/selftests/ run_tests
+
+Same as above except to specify 100 times failures at most instead of one time
+at most by default.
+
+ # ./tools/testing/fault-injection/failcmd.sh --times=100 \
+ -- make -C tools/testing/selftests/ run_tests
+
+Same as above except to inject page allocation failure instead of slab
+allocation failure.
+
+ # env FAILCMD_TYPE=fail_page_alloc \
+ ./tools/testing/fault-injection/failcmd.sh --times=100 \
+ -- make -C tools/testing/selftests/ run_tests
diff --git a/Documentation/fault-injection/notifier-error-inject.txt b/Documentation/fault-injection/notifier-error-inject.txt
new file mode 100644
index 000000000..09adabef5
--- /dev/null
+++ b/Documentation/fault-injection/notifier-error-inject.txt
@@ -0,0 +1,99 @@
+Notifier error injection
+========================
+
+Notifier error injection provides the ability to inject artificial errors to
+specified notifier chain callbacks. It is useful to test the error handling of
+notifier call chain failures which is rarely executed. There are kernel
+modules that can be used to test the following notifiers.
+
+ * CPU notifier
+ * PM notifier
+ * Memory hotplug notifier
+ * powerpc pSeries reconfig notifier
+
+CPU notifier error injection module
+-----------------------------------
+This feature can be used to test the error handling of the CPU notifiers by
+injecting artificial errors to CPU notifier chain callbacks.
+
+If the notifier call chain should be failed with some events notified, write
+the error code to debugfs interface
+/sys/kernel/debug/notifier-error-inject/cpu/actions/<notifier event>/error
+
+Possible CPU notifier events to be failed are:
+
+ * CPU_UP_PREPARE
+ * CPU_UP_PREPARE_FROZEN
+ * CPU_DOWN_PREPARE
+ * CPU_DOWN_PREPARE_FROZEN
+
+Example1: Inject CPU offline error (-1 == -EPERM)
+
+ # cd /sys/kernel/debug/notifier-error-inject/cpu
+ # echo -1 > actions/CPU_DOWN_PREPARE/error
+ # echo 0 > /sys/devices/system/cpu/cpu1/online
+ bash: echo: write error: Operation not permitted
+
+Example2: inject CPU online error (-2 == -ENOENT)
+
+ # echo -2 > actions/CPU_UP_PREPARE/error
+ # echo 1 > /sys/devices/system/cpu/cpu1/online
+ bash: echo: write error: No such file or directory
+
+PM notifier error injection module
+----------------------------------
+This feature is controlled through debugfs interface
+/sys/kernel/debug/notifier-error-inject/pm/actions/<notifier event>/error
+
+Possible PM notifier events to be failed are:
+
+ * PM_HIBERNATION_PREPARE
+ * PM_SUSPEND_PREPARE
+ * PM_RESTORE_PREPARE
+
+Example: Inject PM suspend error (-12 = -ENOMEM)
+
+ # cd /sys/kernel/debug/notifier-error-inject/pm/
+ # echo -12 > actions/PM_SUSPEND_PREPARE/error
+ # echo mem > /sys/power/state
+ bash: echo: write error: Cannot allocate memory
+
+Memory hotplug notifier error injection module
+----------------------------------------------
+This feature is controlled through debugfs interface
+/sys/kernel/debug/notifier-error-inject/memory/actions/<notifier event>/error
+
+Possible memory notifier events to be failed are:
+
+ * MEM_GOING_ONLINE
+ * MEM_GOING_OFFLINE
+
+Example: Inject memory hotplug offline error (-12 == -ENOMEM)
+
+ # cd /sys/kernel/debug/notifier-error-inject/memory
+ # echo -12 > actions/MEM_GOING_OFFLINE/error
+ # echo offline > /sys/devices/system/memory/memoryXXX/state
+ bash: echo: write error: Cannot allocate memory
+
+powerpc pSeries reconfig notifier error injection module
+--------------------------------------------------------
+This feature is controlled through debugfs interface
+/sys/kernel/debug/notifier-error-inject/pSeries-reconfig/actions/<notifier event>/error
+
+Possible pSeries reconfig notifier events to be failed are:
+
+ * PSERIES_RECONFIG_ADD
+ * PSERIES_RECONFIG_REMOVE
+ * PSERIES_DRCONF_MEM_ADD
+ * PSERIES_DRCONF_MEM_REMOVE
+
+For more usage examples
+-----------------------
+There are tools/testing/selftests using the notifier error injection features
+for CPU and memory notifiers.
+
+ * tools/testing/selftests/cpu-hotplug/on-off-test.sh
+ * tools/testing/selftests/memory-hotplug/on-off-test.sh
+
+These scripts first do simple online and offline tests and then do fault
+injection tests if notifier error injection module is available.
diff --git a/Documentation/fault-injection/provoke-crashes.txt b/Documentation/fault-injection/provoke-crashes.txt
new file mode 100644
index 000000000..7a9d3d815
--- /dev/null
+++ b/Documentation/fault-injection/provoke-crashes.txt
@@ -0,0 +1,38 @@
+The lkdtm module provides an interface to crash or injure the kernel at
+predefined crashpoints to evaluate the reliability of crash dumps obtained
+using different dumping solutions. The module uses KPROBEs to instrument
+crashing points, but can also crash the kernel directly without KRPOBE
+support.
+
+
+You can provide the way either through module arguments when inserting
+the module, or through a debugfs interface.
+
+Usage: insmod lkdtm.ko [recur_count={>0}] cpoint_name=<> cpoint_type=<>
+ [cpoint_count={>0}]
+
+ recur_count : Recursion level for the stack overflow test. Default is 10.
+
+ cpoint_name : Crash point where the kernel is to be crashed. It can be
+ one of INT_HARDWARE_ENTRY, INT_HW_IRQ_EN, INT_TASKLET_ENTRY,
+ FS_DEVRW, MEM_SWAPOUT, TIMERADD, SCSI_DISPATCH_CMD,
+ IDE_CORE_CP, DIRECT
+
+ cpoint_type : Indicates the action to be taken on hitting the crash point.
+ It can be one of PANIC, BUG, EXCEPTION, LOOP, OVERFLOW,
+ CORRUPT_STACK, UNALIGNED_LOAD_STORE_WRITE, OVERWRITE_ALLOCATION,
+ WRITE_AFTER_FREE,
+
+ cpoint_count : Indicates the number of times the crash point is to be hit
+ to trigger an action. The default is 10.
+
+You can also induce failures by mounting debugfs and writing the type to
+<mountpoint>/provoke-crash/<crashpoint>. E.g.,
+
+ mount -t debugfs debugfs /mnt
+ echo EXCEPTION > /mnt/provoke-crash/INT_HARDWARE_ENTRY
+
+
+A special file is `DIRECT' which will induce the crash directly without
+KPROBE instrumentation. This mode is the only one available when the module
+is built on a kernel without KPROBEs support.
diff --git a/Documentation/fb/00-INDEX b/Documentation/fb/00-INDEX
new file mode 100644
index 000000000..fe85e7c59
--- /dev/null
+++ b/Documentation/fb/00-INDEX
@@ -0,0 +1,75 @@
+Index of files in Documentation/fb. If you think something about frame
+buffer devices needs an entry here, needs correction or you've written one
+please mail me.
+ Geert Uytterhoeven <geert@linux-m68k.org>
+
+00-INDEX
+ - this file.
+api.txt
+ - The frame buffer API between applications and buffer devices.
+arkfb.txt
+ - info on the fbdev driver for ARK Logic chips.
+aty128fb.txt
+ - info on the ATI Rage128 frame buffer driver.
+cirrusfb.txt
+ - info on the driver for Cirrus Logic chipsets.
+cmap_xfbdev.txt
+ - an introduction to fbdev's cmap structures.
+deferred_io.txt
+ - an introduction to deferred IO.
+efifb.txt
+ - info on the EFI platform driver for Intel based Apple computers.
+ep93xx-fb.txt
+ - info on the driver for EP93xx LCD controller.
+fbcon.txt
+ - intro to and usage guide for the framebuffer console (fbcon).
+framebuffer.txt
+ - introduction to frame buffer devices.
+gxfb.txt
+ - info on the framebuffer driver for AMD Geode GX2 based processors.
+intel810.txt
+ - documentation for the Intel 810/815 framebuffer driver.
+intelfb.txt
+ - docs for Intel 830M/845G/852GM/855GM/865G/915G/945G fb driver.
+internals.txt
+ - quick overview of frame buffer device internals.
+lxfb.txt
+ - info on the framebuffer driver for AMD Geode LX based processors.
+matroxfb.txt
+ - info on the Matrox framebuffer driver for Alpha, Intel and PPC.
+metronomefb.txt
+ - info on the driver for the Metronome display controller.
+modedb.txt
+ - info on the video mode database.
+pvr2fb.txt
+ - info on the PowerVR 2 frame buffer driver.
+pxafb.txt
+ - info on the driver for the PXA25x LCD controller.
+s3fb.txt
+ - info on the fbdev driver for S3 Trio/Virge chips.
+sa1100fb.txt
+ - information about the driver for the SA-1100 LCD controller.
+sh7760fb.txt
+ - info on the SH7760/SH7763 integrated LCDC Framebuffer driver.
+sisfb.txt
+ - info on the framebuffer device driver for various SiS chips.
+sm501.txt
+ - info on the framebuffer device driver for sm501 videoframebuffer.
+sstfb.txt
+ - info on the frame buffer driver for 3dfx' Voodoo Graphics boards.
+tgafb.txt
+ - info on the TGA (DECChip 21030) frame buffer driver.
+tridentfb.txt
+ info on the framebuffer driver for some Trident chip based cards.
+udlfb.txt
+ - Driver for DisplayLink USB 2.0 chips.
+uvesafb.txt
+ - info on the userspace VESA (VBE2+ compliant) frame buffer device.
+vesafb.txt
+ - info on the VESA frame buffer device.
+viafb.modes
+ - list of modes for VIA Integration Graphic Chip.
+viafb.txt
+ - info on the VIA Integration Graphic Chip console framebuffer driver.
+vt8623fb.txt
+ - info on the fb driver for the graphics core in VIA VT8623 chipsets.
diff --git a/Documentation/fb/api.txt b/Documentation/fb/api.txt
new file mode 100644
index 000000000..d4ff7de85
--- /dev/null
+++ b/Documentation/fb/api.txt
@@ -0,0 +1,306 @@
+ The Frame Buffer Device API
+ ---------------------------
+
+Last revised: June 21, 2011
+
+
+0. Introduction
+---------------
+
+This document describes the frame buffer API used by applications to interact
+with frame buffer devices. In-kernel APIs between device drivers and the frame
+buffer core are not described.
+
+Due to a lack of documentation in the original frame buffer API, drivers
+behaviours differ in subtle (and not so subtle) ways. This document describes
+the recommended API implementation, but applications should be prepared to
+deal with different behaviours.
+
+
+1. Capabilities
+---------------
+
+Device and driver capabilities are reported in the fixed screen information
+capabilities field.
+
+struct fb_fix_screeninfo {
+ ...
+ __u16 capabilities; /* see FB_CAP_* */
+ ...
+};
+
+Application should use those capabilities to find out what features they can
+expect from the device and driver.
+
+- FB_CAP_FOURCC
+
+The driver supports the four character code (FOURCC) based format setting API.
+When supported, formats are configured using a FOURCC instead of manually
+specifying color components layout.
+
+
+2. Types and visuals
+--------------------
+
+Pixels are stored in memory in hardware-dependent formats. Applications need
+to be aware of the pixel storage format in order to write image data to the
+frame buffer memory in the format expected by the hardware.
+
+Formats are described by frame buffer types and visuals. Some visuals require
+additional information, which are stored in the variable screen information
+bits_per_pixel, grayscale, red, green, blue and transp fields.
+
+Visuals describe how color information is encoded and assembled to create
+macropixels. Types describe how macropixels are stored in memory. The following
+types and visuals are supported.
+
+- FB_TYPE_PACKED_PIXELS
+
+Macropixels are stored contiguously in a single plane. If the number of bits
+per macropixel is not a multiple of 8, whether macropixels are padded to the
+next multiple of 8 bits or packed together into bytes depends on the visual.
+
+Padding at end of lines may be present and is then reported through the fixed
+screen information line_length field.
+
+- FB_TYPE_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are located contiguously in memory.
+
+- FB_TYPE_INTERLEAVED_PLANES
+
+Macropixels are split across multiple planes. The number of planes is equal to
+the number of bits per macropixel, with plane i'th storing i'th bit from all
+macropixels.
+
+Planes are interleaved in memory. The interleave factor, defined as the
+distance in bytes between the beginning of two consecutive interleaved blocks
+belonging to different planes, is stored in the fixed screen information
+type_aux field.
+
+- FB_TYPE_FOURCC
+
+Macropixels are stored in memory as described by the format FOURCC identifier
+stored in the variable screen information grayscale field.
+
+- FB_VISUAL_MONO01
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 1 and white pixels by all bits
+set to 0. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_MONO10
+
+Pixels are black or white and stored on a number of bits (typically one)
+specified by the variable screen information bpp field.
+
+Black pixels are represented by all bits set to 0 and white pixels by all bits
+set to 1. When the number of bits per pixel is smaller than 8, several pixels
+are packed together in a byte.
+
+FB_VISUAL_MONO01 is currently used with FB_TYPE_PACKED_PIXELS only.
+
+- FB_VISUAL_TRUECOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a read-only lookup table for the corresponding value. Lookup tables
+are device-dependent, and provide linear or non-linear ramps.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR
+
+Pixel values are encoded as indices into a colormap that stores red, green and
+blue components. The colormap is read-only for FB_VISUAL_STATIC_PSEUDOCOLOR
+and read-write for FB_VISUAL_PSEUDOCOLOR.
+
+Each pixel value is stored in the number of bits reported by the variable
+screen information bits_per_pixel field.
+
+- FB_VISUAL_DIRECTCOLOR
+
+Pixels are broken into red, green and blue components, and each component
+indexes a programmable lookup table for the corresponding value.
+
+Each component is stored in a macropixel according to the variable screen
+information red, green, blue and transp fields.
+
+- FB_VISUAL_FOURCC
+
+Pixels are encoded and interpreted as described by the format FOURCC
+identifier stored in the variable screen information grayscale field.
+
+
+3. Screen information
+---------------------
+
+Screen information are queried by applications using the FBIOGET_FSCREENINFO
+and FBIOGET_VSCREENINFO ioctls. Those ioctls take a pointer to a
+fb_fix_screeninfo and fb_var_screeninfo structure respectively.
+
+struct fb_fix_screeninfo stores device independent unchangeable information
+about the frame buffer device and the current format. Those information can't
+be directly modified by applications, but can be changed by the driver when an
+application modifies the format.
+
+struct fb_fix_screeninfo {
+ char id[16]; /* identification string eg "TT Builtin" */
+ unsigned long smem_start; /* Start of frame buffer mem */
+ /* (physical address) */
+ __u32 smem_len; /* Length of frame buffer mem */
+ __u32 type; /* see FB_TYPE_* */
+ __u32 type_aux; /* Interleave for interleaved Planes */
+ __u32 visual; /* see FB_VISUAL_* */
+ __u16 xpanstep; /* zero if no hardware panning */
+ __u16 ypanstep; /* zero if no hardware panning */
+ __u16 ywrapstep; /* zero if no hardware ywrap */
+ __u32 line_length; /* length of a line in bytes */
+ unsigned long mmio_start; /* Start of Memory Mapped I/O */
+ /* (physical address) */
+ __u32 mmio_len; /* Length of Memory Mapped I/O */
+ __u32 accel; /* Indicate to driver which */
+ /* specific chip/card we have */
+ __u16 capabilities; /* see FB_CAP_* */
+ __u16 reserved[2]; /* Reserved for future compatibility */
+};
+
+struct fb_var_screeninfo stores device independent changeable information
+about a frame buffer device, its current format and video mode, as well as
+other miscellaneous parameters.
+
+struct fb_var_screeninfo {
+ __u32 xres; /* visible resolution */
+ __u32 yres;
+ __u32 xres_virtual; /* virtual resolution */
+ __u32 yres_virtual;
+ __u32 xoffset; /* offset from virtual to visible */
+ __u32 yoffset; /* resolution */
+
+ __u32 bits_per_pixel; /* guess what */
+ __u32 grayscale; /* 0 = color, 1 = grayscale, */
+ /* >1 = FOURCC */
+ struct fb_bitfield red; /* bitfield in fb mem if true color, */
+ struct fb_bitfield green; /* else only length is significant */
+ struct fb_bitfield blue;
+ struct fb_bitfield transp; /* transparency */
+
+ __u32 nonstd; /* != 0 Non standard pixel format */
+
+ __u32 activate; /* see FB_ACTIVATE_* */
+
+ __u32 height; /* height of picture in mm */
+ __u32 width; /* width of picture in mm */
+
+ __u32 accel_flags; /* (OBSOLETE) see fb_info.flags */
+
+ /* Timing: All values in pixclocks, except pixclock (of course) */
+ __u32 pixclock; /* pixel clock in ps (pico seconds) */
+ __u32 left_margin; /* time from sync to picture */
+ __u32 right_margin; /* time from picture to sync */
+ __u32 upper_margin; /* time from sync to picture */
+ __u32 lower_margin;
+ __u32 hsync_len; /* length of horizontal sync */
+ __u32 vsync_len; /* length of vertical sync */
+ __u32 sync; /* see FB_SYNC_* */
+ __u32 vmode; /* see FB_VMODE_* */
+ __u32 rotate; /* angle we rotate counter clockwise */
+ __u32 colorspace; /* colorspace for FOURCC-based modes */
+ __u32 reserved[4]; /* Reserved for future compatibility */
+};
+
+To modify variable information, applications call the FBIOPUT_VSCREENINFO
+ioctl with a pointer to a fb_var_screeninfo structure. If the call is
+successful, the driver will update the fixed screen information accordingly.
+
+Instead of filling the complete fb_var_screeninfo structure manually,
+applications should call the FBIOGET_VSCREENINFO ioctl and modify only the
+fields they care about.
+
+
+4. Format configuration
+-----------------------
+
+Frame buffer devices offer two ways to configure the frame buffer format: the
+legacy API and the FOURCC-based API.
+
+
+The legacy API has been the only frame buffer format configuration API for a
+long time and is thus widely used by application. It is the recommended API
+for applications when using RGB and grayscale formats, as well as legacy
+non-standard formats.
+
+To select a format, applications set the fb_var_screeninfo bits_per_pixel field
+to the desired frame buffer depth. Values up to 8 will usually map to
+monochrome, grayscale or pseudocolor visuals, although this is not required.
+
+- For grayscale formats, applications set the grayscale field to one. The red,
+ blue, green and transp fields must be set to 0 by applications and ignored by
+ drivers. Drivers must fill the red, blue and green offsets to 0 and lengths
+ to the bits_per_pixel value.
+
+- For pseudocolor formats, applications set the grayscale field to zero. The
+ red, blue, green and transp fields must be set to 0 by applications and
+ ignored by drivers. Drivers must fill the red, blue and green offsets to 0
+ and lengths to the bits_per_pixel value.
+
+- For truecolor and directcolor formats, applications set the grayscale field
+ to zero, and the red, blue, green and transp fields to describe the layout of
+ color components in memory.
+
+struct fb_bitfield {
+ __u32 offset; /* beginning of bitfield */
+ __u32 length; /* length of bitfield */
+ __u32 msb_right; /* != 0 : Most significant bit is */
+ /* right */
+};
+
+ Pixel values are bits_per_pixel wide and are split in non-overlapping red,
+ green, blue and alpha (transparency) components. Location and size of each
+ component in the pixel value are described by the fb_bitfield offset and
+ length fields. Offset are computed from the right.
+
+ Pixels are always stored in an integer number of bytes. If the number of
+ bits per pixel is not a multiple of 8, pixel values are padded to the next
+ multiple of 8 bits.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format.
+
+
+The FOURCC-based API replaces format descriptions by four character codes
+(FOURCC). FOURCCs are abstract identifiers that uniquely define a format
+without explicitly describing it. This is the only API that supports YUV
+formats. Drivers are also encouraged to implement the FOURCC-based API for RGB
+and grayscale formats.
+
+Drivers that support the FOURCC-based API report this capability by setting
+the FB_CAP_FOURCC bit in the fb_fix_screeninfo capabilities field.
+
+FOURCC definitions are located in the linux/videodev2.h header. However, and
+despite starting with the V4L2_PIX_FMT_prefix, they are not restricted to V4L2
+and don't require usage of the V4L2 subsystem. FOURCC documentation is
+available in Documentation/DocBook/v4l/pixfmt.xml.
+
+To select a format, applications set the grayscale field to the desired FOURCC.
+For YUV formats, they should also select the appropriate colorspace by setting
+the colorspace field to one of the colorspaces listed in linux/videodev2.h and
+documented in Documentation/DocBook/v4l/colorspaces.xml.
+
+The red, green, blue and transp fields are not used with the FOURCC-based API.
+For forward compatibility reasons applications must zero those fields, and
+drivers must ignore them. Values other than 0 may get a meaning in future
+extensions.
+
+Upon successful format configuration, drivers update the fb_fix_screeninfo
+type, visual and line_length fields depending on the selected format. The type
+and visual fields are set to FB_TYPE_FOURCC and FB_VISUAL_FOURCC respectively.
diff --git a/Documentation/fb/arkfb.txt b/Documentation/fb/arkfb.txt
new file mode 100644
index 000000000..e8487a9d6
--- /dev/null
+++ b/Documentation/fb/arkfb.txt
@@ -0,0 +1,68 @@
+
+ arkfb - fbdev driver for ARK Logic chips
+ ========================================
+
+
+Supported Hardware
+==================
+
+ ARK 2000PV chip
+ ICS 5342 ramdac
+
+ - only BIOS initialized VGA devices supported
+ - probably not working on big endian
+
+
+Supported Features
+==================
+
+ * 4 bpp pseudocolor modes (with 18bit palette, two variants)
+ * 8 bpp pseudocolor mode (with 18bit palette)
+ * 16 bpp truecolor modes (RGB 555 and RGB 565)
+ * 24 bpp truecolor mode (RGB 888)
+ * 32 bpp truecolor mode (RGB 888)
+ * text mode (activated by bpp = 0)
+ * doublescan mode variant (not available in text mode)
+ * panning in both directions
+ * suspend/resume support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (i got maximum about 70 MHz, it is dependent on specific
+hardware). This limitation is not enforced by driver. Text mode supports 8bit
+wide fonts only (hardware limitation) and 16bit tall fonts (driver
+limitation). Unfortunately character attributes (like color) in text mode are
+broken for unknown reason, so its usefulness is limited.
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+ * secondary (not initialized by BIOS) device support
+ * big endian support
+ * DPMS support
+ * MMIO support
+ * interlaced mode variant
+ * support for fontwidths != 8 in 4 bpp modes
+ * support for fontheight != 16 in text mode
+ * hardware cursor
+ * vsync synchronization
+ * feature connector support
+ * acceleration support (8514-like 2D)
+
+
+Known bugs
+==========
+
+ * character attributes (and cursor) in text mode are broken
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/aty128fb.txt b/Documentation/fb/aty128fb.txt
new file mode 100644
index 000000000..b605204fc
--- /dev/null
+++ b/Documentation/fb/aty128fb.txt
@@ -0,0 +1,72 @@
+[This file is cloned from VesaFB/matroxfb]
+
+What is aty128fb?
+=================
+
+This is a driver for a graphic framebuffer for ATI Rage128 based devices
+on Intel and PPC boxes.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+ without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode... but you should not notice
+ if you use same resolution as you used in textmode.
+ * still experimental.
+
+
+How to use it?
+==============
+
+Switching modes is done using the video=aty128fb:<resolution>... modedb
+boot parameter or using `fbset' program.
+
+See Documentation/fb/modedb.txt for more information on modedb
+resolutions.
+
+You should compile in both vgacon (to boot if you remove your Rage128 from
+box) and aty128fb (for graphics mode). You should not compile-in vesafb
+unless you have primary display on non-Rage128 VBE2.0 device (see
+Documentation/fb/vesafb.txt for details).
+
+
+X11
+===
+
+XF68_FBDev should generally work fine, but it is non-accelerated. As of
+this document, 8 and 32bpp works fine. There have been palette issues
+when switching from X to console and back to X. You will have to restart
+X to fix this.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to vesafb with
+`video=aty128fb:option1,option2:value2,option3' (multiple options should
+be separated by comma, values are separated from options by `:').
+Accepted options:
+
+noaccel - do not use acceleration engine. It is default.
+accel - use acceleration engine. Not finished.
+vmode:x - chooses PowerMacintosh video mode <x>. Deprecated.
+cmode:x - chooses PowerMacintosh colour mode <x>. Deprecated.
+<XxX@X> - selects startup videomode. See modedb.txt for detailed
+ explanation. Default is 640x480x8bpp.
+
+
+Limitations
+===========
+
+There are known and unknown bugs, features and misfeatures.
+Currently there are following known bugs:
+ + This driver is still experimental and is not finished. Too many
+ bugs/errata to list here.
+
+--
+Brad Douglas <brad@neruo.com>
diff --git a/Documentation/fb/cirrusfb.txt b/Documentation/fb/cirrusfb.txt
new file mode 100644
index 000000000..f75950d33
--- /dev/null
+++ b/Documentation/fb/cirrusfb.txt
@@ -0,0 +1,97 @@
+
+ Framebuffer driver for Cirrus Logic chipsets
+ Copyright 1999 Jeff Garzik <jgarzik@pobox.com>
+
+
+
+{ just a little something to get people going; contributors welcome! }
+
+
+
+Chip families supported:
+ SD64
+ Piccolo
+ Picasso
+ Spectrum
+ Alpine (GD-543x/4x)
+ Picasso4 (GD-5446)
+ GD-5480
+ Laguna (GD-546x)
+
+Bus's supported:
+ PCI
+ Zorro
+
+Architectures supported:
+ i386
+ Alpha
+ PPC (Motorola Powerstack)
+ m68k (Amiga)
+
+
+
+Default video modes
+-------------------
+At the moment, there are two kernel command line arguments supported:
+
+mode:640x480
+mode:800x600
+ or
+mode:1024x768
+
+Full support for startup video modes (modedb) will be integrated soon.
+
+Version 1.9.9.1
+---------------
+* Fix memory detection for 512kB case
+* 800x600 mode
+* Fixed timings
+* Hint for AXP: Use -accel false -vyres -1 when changing resolution
+
+
+Version 1.9.4.4
+---------------
+* Preliminary Laguna support
+* Overhaul color register routines.
+* Associated with the above, console colors are now obtained from a LUT
+ called 'palette' instead of from the VGA registers. This code was
+ modelled after that in atyfb and matroxfb.
+* Code cleanup, add comments.
+* Overhaul SR07 handling.
+* Bug fixes.
+
+
+Version 1.9.4.3
+---------------
+* Correctly set default startup video mode.
+* Do not override ram size setting. Define
+ CLGEN_USE_HARDCODED_RAM_SETTINGS if you _do_ want to override the RAM
+ setting.
+* Compile fixes related to new 2.3.x IORESOURCE_IO[PORT] symbol changes.
+* Use new 2.3.x resource allocation.
+* Some code cleanup.
+
+
+Version 1.9.4.2
+---------------
+* Casting fixes.
+* Assertions no longer cause an oops on purpose.
+* Bug fixes.
+
+
+Version 1.9.4.1
+---------------
+* Add compatibility support. Now requires a 2.1.x, 2.2.x or 2.3.x kernel.
+
+
+Version 1.9.4
+-------------
+* Several enhancements, smaller memory footprint, a few bugfixes.
+* Requires kernel 2.3.14-pre1 or later.
+
+
+Version 1.9.3
+-------------
+* Bundled with kernel 2.3.14-pre1 or later.
+
+
diff --git a/Documentation/fb/cmap_xfbdev.txt b/Documentation/fb/cmap_xfbdev.txt
new file mode 100644
index 000000000..55e1f0a3d
--- /dev/null
+++ b/Documentation/fb/cmap_xfbdev.txt
@@ -0,0 +1,53 @@
+Understanding fbdev's cmap
+--------------------------
+
+These notes explain how X's dix layer uses fbdev's cmap structures.
+
+*. example of relevant structures in fbdev as used for a 3-bit grayscale cmap
+struct fb_var_screeninfo {
+ .bits_per_pixel = 8,
+ .grayscale = 1,
+ .red = { 4, 3, 0 },
+ .green = { 0, 0, 0 },
+ .blue = { 0, 0, 0 },
+}
+struct fb_fix_screeninfo {
+ .visual = FB_VISUAL_STATIC_PSEUDOCOLOR,
+}
+for (i = 0; i < 8; i++)
+ info->cmap.red[i] = (((2*i)+1)*(0xFFFF))/16;
+memcpy(info->cmap.green, info->cmap.red, sizeof(u16)*8);
+memcpy(info->cmap.blue, info->cmap.red, sizeof(u16)*8);
+
+*. X11 apps do something like the following when trying to use grayscale.
+for (i=0; i < 8; i++) {
+ char colorspec[64];
+ memset(colorspec,0,64);
+ sprintf(colorspec, "rgb:%x/%x/%x", i*36,i*36,i*36);
+ if (!XParseColor(outputDisplay, testColormap, colorspec, &wantedColor))
+ printf("Can't get color %s\n",colorspec);
+ XAllocColor(outputDisplay, testColormap, &wantedColor);
+ grays[i] = wantedColor;
+}
+There's also named equivalents like gray1..x provided you have an rgb.txt.
+
+Somewhere in X's callchain, this results in a call to X code that handles the
+colormap. For example, Xfbdev hits the following:
+
+xc-011010/programs/Xserver/dix/colormap.c:
+
+FindBestPixel(pentFirst, size, prgb, channel)
+
+dr = (long) pent->co.local.red - prgb->red;
+dg = (long) pent->co.local.green - prgb->green;
+db = (long) pent->co.local.blue - prgb->blue;
+sq = dr * dr;
+UnsignedToBigNum (sq, &sum);
+BigNumAdd (&sum, &temp, &sum);
+
+co.local.red are entries that were brought in through FBIOGETCMAP which come
+directly from the info->cmap.red that was listed above. The prgb is the rgb
+that the app wants to match to. The above code is doing what looks like a least
+squares matching function. That's why the cmap entries can't be set to the left
+hand side boundaries of a color range.
+
diff --git a/Documentation/fb/deferred_io.txt b/Documentation/fb/deferred_io.txt
new file mode 100644
index 000000000..748328370
--- /dev/null
+++ b/Documentation/fb/deferred_io.txt
@@ -0,0 +1,75 @@
+Deferred IO
+-----------
+
+Deferred IO is a way to delay and repurpose IO. It uses host memory as a
+buffer and the MMU pagefault as a pretrigger for when to perform the device
+IO. The following example may be a useful explanation of how one such setup
+works:
+
+- userspace app like Xfbdev mmaps framebuffer
+- deferred IO and driver sets up fault and page_mkwrite handlers
+- userspace app tries to write to mmaped vaddress
+- we get pagefault and reach fault handler
+- fault handler finds and returns physical page
+- we get page_mkwrite where we add this page to a list
+- schedule a workqueue task to be run after a delay
+- app continues writing to that page with no additional cost. this is
+ the key benefit.
+- the workqueue task comes in and mkcleans the pages on the list, then
+ completes the work associated with updating the framebuffer. this is
+ the real work talking to the device.
+- app tries to write to the address (that has now been mkcleaned)
+- get pagefault and the above sequence occurs again
+
+As can be seen from above, one benefit is roughly to allow bursty framebuffer
+writes to occur at minimum cost. Then after some time when hopefully things
+have gone quiet, we go and really update the framebuffer which would be
+a relatively more expensive operation.
+
+For some types of nonvolatile high latency displays, the desired image is
+the final image rather than the intermediate stages which is why it's okay
+to not update for each write that is occurring.
+
+It may be the case that this is useful in other scenarios as well. Paul Mundt
+has mentioned a case where it is beneficial to use the page count to decide
+whether to coalesce and issue SG DMA or to do memory bursts.
+
+Another one may be if one has a device framebuffer that is in an usual format,
+say diagonally shifting RGB, this may then be a mechanism for you to allow
+apps to pretend to have a normal framebuffer but reswizzle for the device
+framebuffer at vsync time based on the touched pagelist.
+
+How to use it: (for applications)
+---------------------------------
+No changes needed. mmap the framebuffer like normal and just use it.
+
+How to use it: (for fbdev drivers)
+----------------------------------
+The following example may be helpful.
+
+1. Setup your structure. Eg:
+
+static struct fb_deferred_io hecubafb_defio = {
+ .delay = HZ,
+ .deferred_io = hecubafb_dpy_deferred_io,
+};
+
+The delay is the minimum delay between when the page_mkwrite trigger occurs
+and when the deferred_io callback is called. The deferred_io callback is
+explained below.
+
+2. Setup your deferred IO callback. Eg:
+static void hecubafb_dpy_deferred_io(struct fb_info *info,
+ struct list_head *pagelist)
+
+The deferred_io callback is where you would perform all your IO to the display
+device. You receive the pagelist which is the list of pages that were written
+to during the delay. You must not modify this list. This callback is called
+from a workqueue.
+
+3. Call init
+ info->fbdefio = &hecubafb_defio;
+ fb_deferred_io_init(info);
+
+4. Call cleanup
+ fb_deferred_io_cleanup(info);
diff --git a/Documentation/fb/efifb.txt b/Documentation/fb/efifb.txt
new file mode 100644
index 000000000..a59916c29
--- /dev/null
+++ b/Documentation/fb/efifb.txt
@@ -0,0 +1,31 @@
+
+What is efifb?
+===============
+
+This is a generic EFI platform driver for Intel based Apple computers.
+efifb is only for EFI booted Intel Macs.
+
+Supported Hardware
+==================
+
+iMac 17"/20"
+Macbook
+Macbook Pro 15"/17"
+MacMini
+
+How to use it?
+==============
+
+efifb does not have any kind of autodetection of your machine.
+You have to add the following kernel parameters in your elilo.conf:
+ Macbook :
+ video=efifb:macbook
+ MacMini :
+ video=efifb:mini
+ Macbook Pro 15", iMac 17" :
+ video=efifb:i17
+ Macbook Pro 17", iMac 20" :
+ video=efifb:i20
+
+--
+Edgar Hucek <gimli@dark-green.com>
diff --git a/Documentation/fb/ep93xx-fb.txt b/Documentation/fb/ep93xx-fb.txt
new file mode 100644
index 000000000..5af1bd9ef
--- /dev/null
+++ b/Documentation/fb/ep93xx-fb.txt
@@ -0,0 +1,135 @@
+================================
+Driver for EP93xx LCD controller
+================================
+
+The EP93xx LCD controller can drive both standard desktop monitors and
+embedded LCD displays. If you have a standard desktop monitor then you
+can use the standard Linux video mode database. In your board file:
+
+ static struct ep93xxfb_mach_info some_board_fb_info = {
+ .num_modes = EP93XXFB_USE_MODEDB,
+ .bpp = 16,
+ };
+
+If you have an embedded LCD display then you need to define a video
+mode for it as follows:
+
+ static struct fb_videomode some_board_video_modes[] = {
+ {
+ .name = "some_lcd_name",
+ /* Pixel clock, porches, etc */
+ },
+ };
+
+Note that the pixel clock value is in pico-seconds. You can use the
+KHZ2PICOS macro to convert the pixel clock value. Most other values
+are in pixel clocks. See Documentation/fb/framebuffer.txt for further
+details.
+
+The ep93xxfb_mach_info structure for your board should look like the
+following:
+
+ static struct ep93xxfb_mach_info some_board_fb_info = {
+ .num_modes = ARRAY_SIZE(some_board_video_modes),
+ .modes = some_board_video_modes,
+ .default_mode = &some_board_video_modes[0],
+ .bpp = 16,
+ };
+
+The framebuffer device can be registered by adding the following to
+your board initialisation function:
+
+ ep93xx_register_fb(&some_board_fb_info);
+
+=====================
+Video Attribute Flags
+=====================
+
+The ep93xxfb_mach_info structure has a flags field which can be used
+to configure the controller. The video attributes flags are fully
+documented in section 7 of the EP93xx users' guide. The following
+flags are available:
+
+EP93XXFB_PCLK_FALLING Clock data on the falling edge of the
+ pixel clock. The default is to clock
+ data on the rising edge.
+
+EP93XXFB_SYNC_BLANK_HIGH Blank signal is active high. By
+ default the blank signal is active low.
+
+EP93XXFB_SYNC_HORIZ_HIGH Horizontal sync is active high. By
+ default the horizontal sync is active low.
+
+EP93XXFB_SYNC_VERT_HIGH Vertical sync is active high. By
+ default the vertical sync is active high.
+
+The physical address of the framebuffer can be controlled using the
+following flags:
+
+EP93XXFB_USE_SDCSN0 Use SDCSn[0] for the framebuffer. This
+ is the default setting.
+
+EP93XXFB_USE_SDCSN1 Use SDCSn[1] for the framebuffer.
+
+EP93XXFB_USE_SDCSN2 Use SDCSn[2] for the framebuffer.
+
+EP93XXFB_USE_SDCSN3 Use SDCSn[3] for the framebuffer.
+
+==================
+Platform callbacks
+==================
+
+The EP93xx framebuffer driver supports three optional platform
+callbacks: setup, teardown and blank. The setup and teardown functions
+are called when the framebuffer driver is installed and removed
+respectively. The blank function is called whenever the display is
+blanked or unblanked.
+
+The setup and teardown devices pass the platform_device structure as
+an argument. The fb_info and ep93xxfb_mach_info structures can be
+obtained as follows:
+
+ static int some_board_fb_setup(struct platform_device *pdev)
+ {
+ struct ep93xxfb_mach_info *mach_info = pdev->dev.platform_data;
+ struct fb_info *fb_info = platform_get_drvdata(pdev);
+
+ /* Board specific framebuffer setup */
+ }
+
+======================
+Setting the video mode
+======================
+
+The video mode is set using the following syntax:
+
+ video=XRESxYRES[-BPP][@REFRESH]
+
+If the EP93xx video driver is built-in then the video mode is set on
+the Linux kernel command line, for example:
+
+ video=ep93xx-fb:800x600-16@60
+
+If the EP93xx video driver is built as a module then the video mode is
+set when the module is installed:
+
+ modprobe ep93xx-fb video=320x240
+
+==============
+Screenpage bug
+==============
+
+At least on the EP9315 there is a silicon bug which causes bit 27 of
+the VIDSCRNPAGE (framebuffer physical offset) to be tied low. There is
+an unofficial errata for this bug at:
+ http://marc.info/?l=linux-arm-kernel&m=110061245502000&w=2
+
+By default the EP93xx framebuffer driver checks if the allocated physical
+address has bit 27 set. If it does, then the memory is freed and an
+error is returned. The check can be disabled by adding the following
+option when loading the driver:
+
+ ep93xx-fb.check_screenpage_bug=0
+
+In some cases it may be possible to reconfigure your SDRAM layout to
+avoid this bug. See section 13 of the EP93xx users' guide for details.
diff --git a/Documentation/fb/fbcon.txt b/Documentation/fb/fbcon.txt
new file mode 100644
index 000000000..4a9739abc
--- /dev/null
+++ b/Documentation/fb/fbcon.txt
@@ -0,0 +1,324 @@
+The Framebuffer Console
+=======================
+
+ The framebuffer console (fbcon), as its name implies, is a text
+console running on top of the framebuffer device. It has the functionality of
+any standard text console driver, such as the VGA console, with the added
+features that can be attributed to the graphical nature of the framebuffer.
+
+ In the x86 architecture, the framebuffer console is optional, and
+some even treat it as a toy. For other architectures, it is the only available
+display device, text or graphical.
+
+ What are the features of fbcon? The framebuffer console supports
+high resolutions, varying font types, display rotation, primitive multihead,
+etc. Theoretically, multi-colored fonts, blending, aliasing, and any feature
+made available by the underlying graphics card are also possible.
+
+A. Configuration
+
+ The framebuffer console can be enabled by using your favorite kernel
+configuration tool. It is under Device Drivers->Graphics Support->Support for
+framebuffer devices->Framebuffer Console Support. Select 'y' to compile
+support statically, or 'm' for module support. The module will be fbcon.
+
+ In order for fbcon to activate, at least one framebuffer driver is
+required, so choose from any of the numerous drivers available. For x86
+systems, they almost universally have VGA cards, so vga16fb and vesafb will
+always be available. However, using a chipset-specific driver will give you
+more speed and features, such as the ability to change the video mode
+dynamically.
+
+ To display the penguin logo, choose any logo available in Logo
+Configuration->Boot up logo.
+
+ Also, you will need to select at least one compiled-in fonts, but if
+you don't do anything, the kernel configuration tool will select one for you,
+usually an 8x16 font.
+
+GOTCHA: A common bug report is enabling the framebuffer without enabling the
+framebuffer console. Depending on the driver, you may get a blanked or
+garbled display, but the system still boots to completion. If you are
+fortunate to have a driver that does not alter the graphics chip, then you
+will still get a VGA console.
+
+B. Loading
+
+Possible scenarios:
+
+1. Driver and fbcon are compiled statically
+
+ Usually, fbcon will automatically take over your console. The notable
+ exception is vesafb. It needs to be explicitly activated with the
+ vga= boot option parameter.
+
+2. Driver is compiled statically, fbcon is compiled as a module
+
+ Depending on the driver, you either get a standard console, or a
+ garbled display, as mentioned above. To get a framebuffer console,
+ do a 'modprobe fbcon'.
+
+3. Driver is compiled as a module, fbcon is compiled statically
+
+ You get your standard console. Once the driver is loaded with
+ 'modprobe xxxfb', fbcon automatically takes over the console with
+ the possible exception of using the fbcon=map:n option. See below.
+
+4. Driver and fbcon are compiled as a module.
+
+ You can load them in any order. Once both are loaded, fbcon will take
+ over the console.
+
+C. Boot options
+
+ The framebuffer console has several, largely unknown, boot options
+ that can change its behavior.
+
+1. fbcon=font:<name>
+
+ Select the initial font to use. The value 'name' can be any of the
+ compiled-in fonts: VGA8x16, 7x14, 10x18, VGA8x8, MINI4x6, RomanLarge,
+ SUN8x16, SUN12x22, ProFont6x11, Acorn8x8, PEARL8x8.
+
+ Note, not all drivers can handle font with widths not divisible by 8,
+ such as vga16fb.
+
+2. fbcon=scrollback:<value>[k]
+
+ The scrollback buffer is memory that is used to preserve display
+ contents that has already scrolled past your view. This is accessed
+ by using the Shift-PageUp key combination. The value 'value' is any
+ integer. It defaults to 32KB. The 'k' suffix is optional, and will
+ multiply the 'value' by 1024.
+
+3. fbcon=map:<0123>
+
+ This is an interesting option. It tells which driver gets mapped to
+ which console. The value '0123' is a sequence that gets repeated until
+ the total length is 64 which is the number of consoles available. In
+ the above example, it is expanded to 012301230123... and the mapping
+ will be:
+
+ tty | 1 2 3 4 5 6 7 8 9 ...
+ fb | 0 1 2 3 0 1 2 3 0 ...
+
+ ('cat /proc/fb' should tell you what the fb numbers are)
+
+ One side effect that may be useful is using a map value that exceeds
+ the number of loaded fb drivers. For example, if only one driver is
+ available, fb0, adding fbcon=map:1 tells fbcon not to take over the
+ console.
+
+ Later on, when you want to map the console the to the framebuffer
+ device, you can use the con2fbmap utility.
+
+4. fbcon=vc:<n1>-<n2>
+
+ This option tells fbcon to take over only a range of consoles as
+ specified by the values 'n1' and 'n2'. The rest of the consoles
+ outside the given range will still be controlled by the standard
+ console driver.
+
+ NOTE: For x86 machines, the standard console is the VGA console which
+ is typically located on the same video card. Thus, the consoles that
+ are controlled by the VGA console will be garbled.
+
+4. fbcon=rotate:<n>
+
+ This option changes the orientation angle of the console display. The
+ value 'n' accepts the following:
+
+ 0 - normal orientation (0 degree)
+ 1 - clockwise orientation (90 degrees)
+ 2 - upside down orientation (180 degrees)
+ 3 - counterclockwise orientation (270 degrees)
+
+ The angle can be changed anytime afterwards by 'echoing' the same
+ numbers to any one of the 2 attributes found in
+ /sys/class/graphics/fbcon
+
+ rotate - rotate the display of the active console
+ rotate_all - rotate the display of all consoles
+
+ Console rotation will only become available if Console Rotation
+ Support is compiled in your kernel.
+
+ NOTE: This is purely console rotation. Any other applications that
+ use the framebuffer will remain at their 'normal'orientation.
+ Actually, the underlying fb driver is totally ignorant of console
+ rotation.
+
+C. Attaching, Detaching and Unloading
+
+Before going on how to attach, detach and unload the framebuffer console, an
+illustration of the dependencies may help.
+
+The console layer, as with most subsystems, needs a driver that interfaces with
+the hardware. Thus, in a VGA console:
+
+console ---> VGA driver ---> hardware.
+
+Assuming the VGA driver can be unloaded, one must first unbind the VGA driver
+from the console layer before unloading the driver. The VGA driver cannot be
+unloaded if it is still bound to the console layer. (See
+Documentation/console/console.txt for more information).
+
+This is more complicated in the case of the framebuffer console (fbcon),
+because fbcon is an intermediate layer between the console and the drivers:
+
+console ---> fbcon ---> fbdev drivers ---> hardware
+
+The fbdev drivers cannot be unloaded if it's bound to fbcon, and fbcon cannot
+be unloaded if it's bound to the console layer.
+
+So to unload the fbdev drivers, one must first unbind fbcon from the console,
+then unbind the fbdev drivers from fbcon. Fortunately, unbinding fbcon from
+the console layer will automatically unbind framebuffer drivers from
+fbcon. Thus, there is no need to explicitly unbind the fbdev drivers from
+fbcon.
+
+So, how do we unbind fbcon from the console? Part of the answer is in
+Documentation/console/console.txt. To summarize:
+
+Echo a value to the bind file that represents the framebuffer console
+driver. So assuming vtcon1 represents fbcon, then:
+
+echo 1 > sys/class/vtconsole/vtcon1/bind - attach framebuffer console to
+ console layer
+echo 0 > sys/class/vtconsole/vtcon1/bind - detach framebuffer console from
+ console layer
+
+If fbcon is detached from the console layer, your boot console driver (which is
+usually VGA text mode) will take over. A few drivers (rivafb and i810fb) will
+restore VGA text mode for you. With the rest, before detaching fbcon, you
+must take a few additional steps to make sure that your VGA text mode is
+restored properly. The following is one of the several methods that you can do:
+
+1. Download or install vbetool. This utility is included with most
+ distributions nowadays, and is usually part of the suspend/resume tool.
+
+2. In your kernel configuration, ensure that CONFIG_FRAMEBUFFER_CONSOLE is set
+ to 'y' or 'm'. Enable one or more of your favorite framebuffer drivers.
+
+3. Boot into text mode and as root run:
+
+ vbetool vbestate save > <vga state file>
+
+ The above command saves the register contents of your graphics
+ hardware to <vga state file>. You need to do this step only once as
+ the state file can be reused.
+
+4. If fbcon is compiled as a module, load fbcon by doing:
+
+ modprobe fbcon
+
+5. Now to detach fbcon:
+
+ vbetool vbestate restore < <vga state file> && \
+ echo 0 > /sys/class/vtconsole/vtcon1/bind
+
+6. That's it, you're back to VGA mode. And if you compiled fbcon as a module,
+ you can unload it by 'rmmod fbcon'
+
+7. To reattach fbcon:
+
+ echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+8. Once fbcon is unbound, all drivers registered to the system will also
+become unbound. This means that fbcon and individual framebuffer drivers
+can be unloaded or reloaded at will. Reloading the drivers or fbcon will
+automatically bind the console, fbcon and the drivers together. Unloading
+all the drivers without unloading fbcon will make it impossible for the
+console to bind fbcon.
+
+Notes for vesafb users:
+=======================
+
+Unfortunately, if your bootline includes a vga=xxx parameter that sets the
+hardware in graphics mode, such as when loading vesafb, vgacon will not load.
+Instead, vgacon will replace the default boot console with dummycon, and you
+won't get any display after detaching fbcon. Your machine is still alive, so
+you can reattach vesafb. However, to reattach vesafb, you need to do one of
+the following:
+
+Variation 1:
+
+ a. Before detaching fbcon, do
+
+ vbetool vbemode save > <vesa state file> # do once for each vesafb mode,
+ # the file can be reused
+
+ b. Detach fbcon as in step 5.
+
+ c. Attach fbcon
+
+ vbetool vbestate restore < <vesa state file> && \
+ echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+Variation 2:
+
+ a. Before detaching fbcon, do:
+ echo <ID> > /sys/class/tty/console/bind
+
+
+ vbetool vbemode get
+
+ b. Take note of the mode number
+
+ b. Detach fbcon as in step 5.
+
+ c. Attach fbcon:
+
+ vbetool vbemode set <mode number> && \
+ echo 1 > /sys/class/vtconsole/vtcon1/bind
+
+Samples:
+========
+
+Here are 2 sample bash scripts that you can use to bind or unbind the
+framebuffer console driver if you are in an X86 box:
+
+---------------------------------------------------------------------------
+#!/bin/bash
+# Unbind fbcon
+
+# Change this to where your actual vgastate file is located
+# Or Use VGASTATE=$1 to indicate the state file at runtime
+VGASTATE=/tmp/vgastate
+
+# path to vbetool
+VBETOOL=/usr/local/bin
+
+
+for (( i = 0; i < 16; i++))
+do
+ if test -x /sys/class/vtconsole/vtcon$i; then
+ if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
+ = 1 ]; then
+ if test -x $VBETOOL/vbetool; then
+ echo Unbinding vtcon$i
+ $VBETOOL/vbetool vbestate restore < $VGASTATE
+ echo 0 > /sys/class/vtconsole/vtcon$i/bind
+ fi
+ fi
+ fi
+done
+
+---------------------------------------------------------------------------
+#!/bin/bash
+# Bind fbcon
+
+for (( i = 0; i < 16; i++))
+do
+ if test -x /sys/class/vtconsole/vtcon$i; then
+ if [ `cat /sys/class/vtconsole/vtcon$i/name | grep -c "frame buffer"` \
+ = 1 ]; then
+ echo Unbinding vtcon$i
+ echo 1 > /sys/class/vtconsole/vtcon$i/bind
+ fi
+ fi
+done
+---------------------------------------------------------------------------
+
+--
+Antonino Daplas <adaplas@pol.net>
diff --git a/Documentation/fb/framebuffer.txt b/Documentation/fb/framebuffer.txt
new file mode 100644
index 000000000..58c5ae2e9
--- /dev/null
+++ b/Documentation/fb/framebuffer.txt
@@ -0,0 +1,343 @@
+ The Frame Buffer Device
+ -----------------------
+
+Maintained by Geert Uytterhoeven <geert@linux-m68k.org>
+Last revised: May 10, 2001
+
+
+0. Introduction
+---------------
+
+The frame buffer device provides an abstraction for the graphics hardware. It
+represents the frame buffer of some video hardware and allows application
+software to access the graphics hardware through a well-defined interface, so
+the software doesn't need to know anything about the low-level (hardware
+register) stuff.
+
+The device is accessed through special device nodes, usually located in the
+/dev directory, i.e. /dev/fb*.
+
+
+1. User's View of /dev/fb*
+--------------------------
+
+From the user's point of view, the frame buffer device looks just like any
+other device in /dev. It's a character device using major 29; the minor
+specifies the frame buffer number.
+
+By convention, the following device nodes are used (numbers indicate the device
+minor numbers):
+
+ 0 = /dev/fb0 First frame buffer
+ 1 = /dev/fb1 Second frame buffer
+ ...
+ 31 = /dev/fb31 32nd frame buffer
+
+For backwards compatibility, you may want to create the following symbolic
+links:
+
+ /dev/fb0current -> fb0
+ /dev/fb1current -> fb1
+
+and so on...
+
+The frame buffer devices are also `normal' memory devices, this means, you can
+read and write their contents. You can, for example, make a screen snapshot by
+
+ cp /dev/fb0 myfile
+
+There also can be more than one frame buffer at a time, e.g. if you have a
+graphics card in addition to the built-in hardware. The corresponding frame
+buffer devices (/dev/fb0 and /dev/fb1 etc.) work independently.
+
+Application software that uses the frame buffer device (e.g. the X server) will
+use /dev/fb0 by default (older software uses /dev/fb0current). You can specify
+an alternative frame buffer device by setting the environment variable
+$FRAMEBUFFER to the path name of a frame buffer device, e.g. (for sh/bash
+users):
+
+ export FRAMEBUFFER=/dev/fb1
+
+or (for csh users):
+
+ setenv FRAMEBUFFER /dev/fb1
+
+After this the X server will use the second frame buffer.
+
+
+2. Programmer's View of /dev/fb*
+--------------------------------
+
+As you already know, a frame buffer device is a memory device like /dev/mem and
+it has the same features. You can read it, write it, seek to some location in
+it and mmap() it (the main usage). The difference is just that the memory that
+appears in the special file is not the whole memory, but the frame buffer of
+some video hardware.
+
+/dev/fb* also allows several ioctls on it, by which lots of information about
+the hardware can be queried and set. The color map handling works via ioctls,
+too. Look into <linux/fb.h> for more information on what ioctls exist and on
+which data structures they work. Here's just a brief overview:
+
+ - You can request unchangeable information about the hardware, like name,
+ organization of the screen memory (planes, packed pixels, ...) and address
+ and length of the screen memory.
+
+ - You can request and change variable information about the hardware, like
+ visible and virtual geometry, depth, color map format, timing, and so on.
+ If you try to change that information, the driver maybe will round up some
+ values to meet the hardware's capabilities (or return EINVAL if that isn't
+ possible).
+
+ - You can get and set parts of the color map. Communication is done with 16
+ bits per color part (red, green, blue, transparency) to support all
+ existing hardware. The driver does all the computations needed to apply
+ it to the hardware (round it down to less bits, maybe throw away
+ transparency).
+
+All this hardware abstraction makes the implementation of application programs
+easier and more portable. E.g. the X server works completely on /dev/fb* and
+thus doesn't need to know, for example, how the color registers of the concrete
+hardware are organized. XF68_FBDev is a general X server for bitmapped,
+unaccelerated video hardware. The only thing that has to be built into
+application programs is the screen organization (bitplanes or chunky pixels
+etc.), because it works on the frame buffer image data directly.
+
+For the future it is planned that frame buffer drivers for graphics cards and
+the like can be implemented as kernel modules that are loaded at runtime. Such
+a driver just has to call register_framebuffer() and supply some functions.
+Writing and distributing such drivers independently from the kernel will save
+much trouble...
+
+
+3. Frame Buffer Resolution Maintenance
+--------------------------------------
+
+Frame buffer resolutions are maintained using the utility `fbset'. It can
+change the video mode properties of a frame buffer device. Its main usage is
+to change the current video mode, e.g. during boot up in one of your /etc/rc.*
+or /etc/init.d/* files.
+
+Fbset uses a video mode database stored in a configuration file, so you can
+easily add your own modes and refer to them with a simple identifier.
+
+
+4. The X Server
+---------------
+
+The X server (XF68_FBDev) is the most notable application program for the frame
+buffer device. Starting with XFree86 release 3.2, the X server is part of
+XFree86 and has 2 modes:
+
+ - If the `Display' subsection for the `fbdev' driver in the /etc/XF86Config
+ file contains a
+
+ Modes "default"
+
+ line, the X server will use the scheme discussed above, i.e. it will start
+ up in the resolution determined by /dev/fb0 (or $FRAMEBUFFER, if set). You
+ still have to specify the color depth (using the Depth keyword) and virtual
+ resolution (using the Virtual keyword) though. This is the default for the
+ configuration file supplied with XFree86. It's the most simple
+ configuration, but it has some limitations.
+
+ - Therefore it's also possible to specify resolutions in the /etc/XF86Config
+ file. This allows for on-the-fly resolution switching while retaining the
+ same virtual desktop size. The frame buffer device that's used is still
+ /dev/fb0current (or $FRAMEBUFFER), but the available resolutions are
+ defined by /etc/XF86Config now. The disadvantage is that you have to
+ specify the timings in a different format (but `fbset -x' may help).
+
+To tune a video mode, you can use fbset or xvidtune. Note that xvidtune doesn't
+work 100% with XF68_FBDev: the reported clock values are always incorrect.
+
+
+5. Video Mode Timings
+---------------------
+
+A monitor draws an image on the screen by using an electron beam (3 electron
+beams for color models, 1 electron beam for monochrome monitors). The front of
+the screen is covered by a pattern of colored phosphors (pixels). If a phosphor
+is hit by an electron, it emits a photon and thus becomes visible.
+
+The electron beam draws horizontal lines (scanlines) from left to right, and
+from the top to the bottom of the screen. By modifying the intensity of the
+electron beam, pixels with various colors and intensities can be shown.
+
+After each scanline the electron beam has to move back to the left side of the
+screen and to the next line: this is called the horizontal retrace. After the
+whole screen (frame) was painted, the beam moves back to the upper left corner:
+this is called the vertical retrace. During both the horizontal and vertical
+retrace, the electron beam is turned off (blanked).
+
+The speed at which the electron beam paints the pixels is determined by the
+dotclock in the graphics board. For a dotclock of e.g. 28.37516 MHz (millions
+of cycles per second), each pixel is 35242 ps (picoseconds) long:
+
+ 1/(28.37516E6 Hz) = 35.242E-9 s
+
+If the screen resolution is 640x480, it will take
+
+ 640*35.242E-9 s = 22.555E-6 s
+
+to paint the 640 (xres) pixels on one scanline. But the horizontal retrace
+also takes time (e.g. 272 `pixels'), so a full scanline takes
+
+ (640+272)*35.242E-9 s = 32.141E-6 s
+
+We'll say that the horizontal scanrate is about 31 kHz:
+
+ 1/(32.141E-6 s) = 31.113E3 Hz
+
+A full screen counts 480 (yres) lines, but we have to consider the vertical
+retrace too (e.g. 49 `lines'). So a full screen will take
+
+ (480+49)*32.141E-6 s = 17.002E-3 s
+
+The vertical scanrate is about 59 Hz:
+
+ 1/(17.002E-3 s) = 58.815 Hz
+
+This means the screen data is refreshed about 59 times per second. To have a
+stable picture without visible flicker, VESA recommends a vertical scanrate of
+at least 72 Hz. But the perceived flicker is very human dependent: some people
+can use 50 Hz without any trouble, while I'll notice if it's less than 80 Hz.
+
+Since the monitor doesn't know when a new scanline starts, the graphics board
+will supply a synchronization pulse (horizontal sync or hsync) for each
+scanline. Similarly it supplies a synchronization pulse (vertical sync or
+vsync) for each new frame. The position of the image on the screen is
+influenced by the moments at which the synchronization pulses occur.
+
+The following picture summarizes all timings. The horizontal retrace time is
+the sum of the left margin, the right margin and the hsync length, while the
+vertical retrace time is the sum of the upper margin, the lower margin and the
+vsync length.
+
+ +----------+---------------------------------------------+----------+-------+
+ | | ↑ | | |
+ | | |upper_margin | | |
+ | | ↓ | | |
+ +----------###############################################----------+-------+
+ | # ↑ # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | left # | # right | hsync |
+ | margin # | xres # margin | len |
+ |<-------->#<---------------+--------------------------->#<-------->|<----->|
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # |yres # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # | # | |
+ | # ↓ # | |
+ +----------###############################################----------+-------+
+ | | ↑ | | |
+ | | |lower_margin | | |
+ | | ↓ | | |
+ +----------+---------------------------------------------+----------+-------+
+ | | ↑ | | |
+ | | |vsync_len | | |
+ | | ↓ | | |
+ +----------+---------------------------------------------+----------+-------+
+
+The frame buffer device expects all horizontal timings in number of dotclocks
+(in picoseconds, 1E-12 s), and vertical timings in number of scanlines.
+
+
+6. Converting XFree86 timing values info frame buffer device timings
+--------------------------------------------------------------------
+
+An XFree86 mode line consists of the following fields:
+ "800x600" 50 800 856 976 1040 600 637 643 666
+ < name > DCF HR SH1 SH2 HFL VR SV1 SV2 VFL
+
+The frame buffer device uses the following fields:
+
+ - pixclock: pixel clock in ps (pico seconds)
+ - left_margin: time from sync to picture
+ - right_margin: time from picture to sync
+ - upper_margin: time from sync to picture
+ - lower_margin: time from picture to sync
+ - hsync_len: length of horizontal sync
+ - vsync_len: length of vertical sync
+
+1) Pixelclock:
+ xfree: in MHz
+ fb: in picoseconds (ps)
+
+ pixclock = 1000000 / DCF
+
+2) horizontal timings:
+ left_margin = HFL - SH2
+ right_margin = SH1 - HR
+ hsync_len = SH2 - SH1
+
+3) vertical timings:
+ upper_margin = VFL - SV2
+ lower_margin = SV1 - VR
+ vsync_len = SV2 - SV1
+
+Good examples for VESA timings can be found in the XFree86 source tree,
+under "xc/programs/Xserver/hw/xfree86/doc/modeDB.txt".
+
+
+7. References
+-------------
+
+For more specific information about the frame buffer device and its
+applications, please refer to the Linux-fbdev website:
+
+ http://linux-fbdev.sourceforge.net/
+
+and to the following documentation:
+
+ - The manual pages for fbset: fbset(8), fb.modes(5)
+ - The manual pages for XFree86: XF68_FBDev(1), XF86Config(4/5)
+ - The mighty kernel sources:
+ o linux/drivers/video/
+ o linux/include/linux/fb.h
+ o linux/include/video/
+
+
+
+8. Mailing list
+---------------
+
+There is a frame buffer device related mailing list at kernel.org:
+linux-fbdev@vger.kernel.org.
+
+Point your web browser to http://sourceforge.net/projects/linux-fbdev/ for
+subscription information and archive browsing.
+
+
+9. Downloading
+--------------
+
+All necessary files can be found at
+
+ ftp://ftp.uni-erlangen.de/pub/Linux/LOCAL/680x0/
+
+and on its mirrors.
+
+The latest version of fbset can be found at
+
+ http://www.linux-fbdev.org/
+
+
+10. Credits
+----------
+
+This readme was written by Geert Uytterhoeven, partly based on the original
+`X-framebuffer.README' by Roman Hodek and Martin Schaller. Section 6 was
+provided by Frank Neumann.
+
+The frame buffer device abstraction was designed by Martin Schaller.
diff --git a/Documentation/fb/gxfb.txt b/Documentation/fb/gxfb.txt
new file mode 100644
index 000000000..2f640903b
--- /dev/null
+++ b/Documentation/fb/gxfb.txt
@@ -0,0 +1,52 @@
+[This file is cloned from VesaFB/aty128fb]
+
+What is gxfb?
+=================
+
+This is a graphics framebuffer driver for AMD Geode GX2 based processors.
+
+Advantages:
+
+ * No need to use AMD's VSA code (or other VESA emulation layer) in the
+ BIOS.
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+ without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode...
+
+
+How to use it?
+==============
+
+Switching modes is done using gxfb.mode_option=<resolution>... boot
+parameter or using `fbset' program.
+
+See Documentation/fb/modedb.txt for more information on modedb
+resolutions.
+
+
+X11
+===
+
+XF68_FBDev should generally work fine, but it is non-accelerated.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to gxfb with gxfb.<option>.
+For example, gxfb.mode_option=800x600@75.
+Accepted options:
+
+mode_option - specify the video mode. Of the form
+ <x>x<y>[-<bpp>][@<refresh>]
+vram - size of video ram (normally auto-detected)
+vt_switch - enable vt switching during suspend/resume. The vt
+ switch is slow, but harmless.
+
+--
+Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/intel810.txt b/Documentation/fb/intel810.txt
new file mode 100644
index 000000000..a8e9f5bca
--- /dev/null
+++ b/Documentation/fb/intel810.txt
@@ -0,0 +1,278 @@
+Intel 810/815 Framebuffer driver
+ Tony Daplas <adaplas@pol.net>
+ http://i810fb.sourceforge.net
+
+ March 17, 2002
+
+ First Released: July 2001
+ Last Update: September 12, 2005
+================================================================
+
+A. Introduction
+
+ This is a framebuffer driver for various Intel 810/815 compatible
+ graphics devices. These include:
+
+ Intel 810
+ Intel 810E
+ Intel 810-DC100
+ Intel 815 Internal graphics only, 100Mhz FSB
+ Intel 815 Internal graphics only
+ Intel 815 Internal graphics and AGP
+
+B. Features
+
+ - Choice of using Discrete Video Timings, VESA Generalized Timing
+ Formula, or a framebuffer specific database to set the video mode
+
+ - Supports a variable range of horizontal and vertical resolution and
+ vertical refresh rates if the VESA Generalized Timing Formula is
+ enabled.
+
+ - Supports color depths of 8, 16, 24 and 32 bits per pixel
+
+ - Supports pseudocolor, directcolor, or truecolor visuals
+
+ - Full and optimized hardware acceleration at 8, 16 and 24 bpp
+
+ - Robust video state save and restore
+
+ - MTRR support
+
+ - Utilizes user-entered monitor specifications to automatically
+ calculate required video mode parameters.
+
+ - Can concurrently run with xfree86 running with native i810 drivers
+
+ - Hardware Cursor Support
+
+ - Supports EDID probing either by DDC/I2C or through the BIOS
+
+C. List of available options
+
+ a. "video=i810fb"
+ enables the i810 driver
+
+ Recommendation: required
+
+ b. "xres:<value>"
+ select horizontal resolution in pixels. (This parameter will be
+ ignored if 'mode_option' is specified. See 'o' below).
+
+ Recommendation: user preference
+ (default = 640)
+
+ c. "yres:<value>"
+ select vertical resolution in scanlines. If Discrete Video Timings
+ is enabled, this will be ignored and computed as 3*xres/4. (This
+ parameter will be ignored if 'mode_option' is specified. See 'o'
+ below)
+
+ Recommendation: user preference
+ (default = 480)
+
+ d. "vyres:<value>"
+ select virtual vertical resolution in scanlines. If (0) or none
+ is specified, this will be computed against maximum available memory.
+
+ Recommendation: do not set
+ (default = 480)
+
+ e. "vram:<value>"
+ select amount of system RAM in MB to allocate for the video memory
+
+ Recommendation: 1 - 4 MB.
+ (default = 4)
+
+ f. "bpp:<value>"
+ select desired pixel depth
+
+ Recommendation: 8
+ (default = 8)
+
+ g. "hsync1/hsync2:<value>"
+ select the minimum and maximum Horizontal Sync Frequency of the
+ monitor in kHz. If using a fixed frequency monitor, hsync1 must
+ be equal to hsync2. If EDID probing is successful, these will be
+ ignored and values will be taken from the EDID block.
+
+ Recommendation: check monitor manual for correct values
+ (default = 29/30)
+
+ h. "vsync1/vsync2:<value>"
+ select the minimum and maximum Vertical Sync Frequency of the monitor
+ in Hz. You can also use this option to lock your monitor's refresh
+ rate. If EDID probing is successful, these will be ignored and values
+ will be taken from the EDID block.
+
+ Recommendation: check monitor manual for correct values
+ (default = 60/60)
+
+ IMPORTANT: If you need to clamp your timings, try to give some
+ leeway for computational errors (over/underflows). Example: if
+ using vsync1/vsync2 = 60/60, make sure hsync1/hsync2 has at least
+ a 1 unit difference, and vice versa.
+
+ i. "voffset:<value>"
+ select at what offset in MB of the logical memory to allocate the
+ framebuffer memory. The intent is to avoid the memory blocks
+ used by standard graphics applications (XFree86). The default
+ offset (16 MB for a 64 MB aperture, 8 MB for a 32 MB aperture) will
+ avoid XFree86's usage and allows up to 7 MB/15 MB of framebuffer
+ memory. Depending on your usage, adjust the value up or down
+ (0 for maximum usage, 31/63 MB for the least amount). Note, an
+ arbitrary setting may conflict with XFree86.
+
+ Recommendation: do not set
+ (default = 8 or 16 MB)
+
+ j. "accel"
+ enable text acceleration. This can be enabled/reenabled anytime
+ by using 'fbset -accel true/false'.
+
+ Recommendation: enable
+ (default = not set)
+
+ k. "mtrr"
+ enable MTRR. This allows data transfers to the framebuffer memory
+ to occur in bursts which can significantly increase performance.
+ Not very helpful with the i810/i815 because of 'shared memory'.
+
+ Recommendation: do not set
+ (default = not set)
+
+ l. "extvga"
+ if specified, secondary/external VGA output will always be enabled.
+ Useful if the BIOS turns off the VGA port when no monitor is attached.
+ The external VGA monitor can then be attached without rebooting.
+
+ Recommendation: do not set
+ (default = not set)
+
+ m. "sync"
+ Forces the hardware engine to do a "sync" or wait for the hardware
+ to finish before starting another instruction. This will produce a
+ more stable setup, but will be slower.
+
+ Recommendation: do not set
+ (default = not set)
+
+ n. "dcolor"
+ Use directcolor visual instead of truecolor for pixel depths greater
+ than 8 bpp. Useful for color tuning, such as gamma control.
+
+ Recommendation: do not set
+ (default = not set)
+
+ o. <xres>x<yres>[-<bpp>][@<refresh>]
+ The driver will now accept specification of boot mode option. If this
+ is specified, the options 'xres' and 'yres' will be ignored. See
+ Documentation/fb/modedb.txt for usage.
+
+D. Kernel booting
+
+Separate each option/option-pair by commas (,) and the option from its value
+with a colon (:) as in the following:
+
+video=i810fb:option1,option2:value2
+
+Sample Usage
+------------
+
+In /etc/lilo.conf, add the line:
+
+append="video=i810fb:vram:2,xres:1024,yres:768,bpp:8,hsync1:30,hsync2:55, \
+ vsync1:50,vsync2:85,accel,mtrr"
+
+This will initialize the framebuffer to 1024x768 at 8bpp. The framebuffer
+will use 2 MB of System RAM. MTRR support will be enabled. The refresh rate
+will be computed based on the hsync1/hsync2 and vsync1/vsync2 values.
+
+IMPORTANT:
+You must include hsync1, hsync2, vsync1 and vsync2 to enable video modes
+better than 640x480 at 60Hz. HOWEVER, if your chipset/display combination
+supports I2C and has an EDID block, you can safely exclude hsync1, hsync2,
+vsync1 and vsync2 parameters. These parameters will be taken from the EDID
+block.
+
+E. Module options
+
+The module parameters are essentially similar to the kernel
+parameters. The main difference is that you need to include a Boolean value
+(1 for TRUE, and 0 for FALSE) for those options which don't need a value.
+
+Example, to enable MTRR, include "mtrr=1".
+
+Sample Usage
+------------
+
+Using the same setup as described above, load the module like this:
+
+ modprobe i810fb vram=2 xres=1024 bpp=8 hsync1=30 hsync2=55 vsync1=50 \
+ vsync2=85 accel=1 mtrr=1
+
+Or just add the following to a configuration file in /etc/modprobe.d/
+
+ options i810fb vram=2 xres=1024 bpp=16 hsync1=30 hsync2=55 vsync1=50 \
+ vsync2=85 accel=1 mtrr=1
+
+and just do a
+
+ modprobe i810fb
+
+
+F. Setup
+
+ a. Do your usual method of configuring the kernel.
+
+ make menuconfig/xconfig/config
+
+ b. Under "Code maturity level options" enable "Prompt for development
+ and/or incomplete code/drivers".
+
+ c. Enable agpgart support for the Intel 810/815 on-board graphics.
+ This is required. The option is under "Character Devices".
+
+ d. Under "Graphics Support", select "Intel 810/815" either statically
+ or as a module. Choose "use VESA Generalized Timing Formula" if
+ you need to maximize the capability of your display. To be on the
+ safe side, you can leave this unselected.
+
+ e. If you want support for DDC/I2C probing (Plug and Play Displays),
+ set 'Enable DDC Support' to 'y'. To make this option appear, set
+ 'use VESA Generalized Timing Formula' to 'y'.
+
+ f. If you want a framebuffer console, enable it under "Console
+ Drivers".
+
+ g. Compile your kernel.
+
+ h. Load the driver as described in sections D and E.
+
+ i. Try the DirectFB (http://www.directfb.org) + the i810 gfxdriver
+ patch to see the chipset in action (or inaction :-).
+
+G. Acknowledgment:
+
+ 1. Geert Uytterhoeven - his excellent howto and the virtual
+ framebuffer driver code made this possible.
+
+ 2. Jeff Hartmann for his agpgart code.
+
+ 3. The X developers. Insights were provided just by reading the
+ XFree86 source code.
+
+ 4. Intel(c). For this value-oriented chipset driver and for
+ providing documentation.
+
+ 5. Matt Sottek. His inputs and ideas helped in making some
+ optimizations possible.
+
+H. Home Page:
+
+ A more complete, and probably updated information is provided at
+ http://i810fb.sourceforge.net.
+
+###########################
+Tony
+
diff --git a/Documentation/fb/intelfb.txt b/Documentation/fb/intelfb.txt
new file mode 100644
index 000000000..feac4e4d6
--- /dev/null
+++ b/Documentation/fb/intelfb.txt
@@ -0,0 +1,149 @@
+Intel 830M/845G/852GM/855GM/865G/915G/945G Framebuffer driver
+================================================================
+
+A. Introduction
+ This is a framebuffer driver for various Intel 8xx/9xx compatible
+graphics devices. These would include:
+
+ Intel 830M
+ Intel 845G
+ Intel 852GM
+ Intel 855GM
+ Intel 865G
+ Intel 915G
+ Intel 915GM
+ Intel 945G
+ Intel 945GM
+ Intel 945GME
+ Intel 965G
+ Intel 965GM
+
+B. List of available options
+
+ a. "video=intelfb"
+ enables the intelfb driver
+
+ Recommendation: required
+
+ b. "mode=<xres>x<yres>[-<bpp>][@<refresh>]"
+ select mode
+
+ Recommendation: user preference
+ (default = 1024x768-32@70)
+
+ c. "vram=<value>"
+ select amount of system RAM in MB to allocate for the video memory
+ if not enough RAM was already allocated by the BIOS.
+
+ Recommendation: 1 - 4 MB.
+ (default = 4 MB)
+
+ d. "voffset=<value>"
+ select at what offset in MB of the logical memory to allocate the
+ framebuffer memory. The intent is to avoid the memory blocks
+ used by standard graphics applications (XFree86). Depending on your
+ usage, adjust the value up or down, (0 for maximum usage, 63/127 MB
+ for the least amount). Note, an arbitrary setting may conflict
+ with XFree86.
+
+ Recommendation: do not set
+ (default = 48 MB)
+
+ e. "accel"
+ enable text acceleration. This can be enabled/reenabled anytime
+ by using 'fbset -accel true/false'.
+
+ Recommendation: enable
+ (default = set)
+
+ f. "hwcursor"
+ enable cursor acceleration.
+
+ Recommendation: enable
+ (default = set)
+
+ g. "mtrr"
+ enable MTRR. This allows data transfers to the framebuffer memory
+ to occur in bursts which can significantly increase performance.
+ Not very helpful with the intel chips because of 'shared memory'.
+
+ Recommendation: set
+ (default = set)
+
+ h. "fixed"
+ disable mode switching.
+
+ Recommendation: do not set
+ (default = not set)
+
+ The binary parameters can be unset with a "no" prefix, example "noaccel".
+ The default parameter (not named) is the mode.
+
+C. Kernel booting
+
+Separate each option/option-pair by commas (,) and the option from its value
+with an equals sign (=) as in the following:
+
+video=intelfb:option1,option2=value2
+
+Sample Usage
+------------
+
+In /etc/lilo.conf, add the line:
+
+append="video=intelfb:mode=800x600-32@75,accel,hwcursor,vram=8"
+
+This will initialize the framebuffer to 800x600 at 32bpp and 75Hz. The
+framebuffer will use 8 MB of System RAM. hw acceleration of text and cursor
+will be enabled.
+
+Remarks
+-------
+
+If setting this parameter doesn't work (you stay in a 80x25 text-mode),
+you might need to set the "vga=<mode>" parameter too - see vesafb.txt
+in this directory.
+
+
+D. Module options
+
+ The module parameters are essentially similar to the kernel
+parameters. The main difference is that you need to include a Boolean value
+(1 for TRUE, and 0 for FALSE) for those options which don't need a value.
+
+Example, to enable MTRR, include "mtrr=1".
+
+Sample Usage
+------------
+
+Using the same setup as described above, load the module like this:
+
+ modprobe intelfb mode=800x600-32@75 vram=8 accel=1 hwcursor=1
+
+Or just add the following to a configuration file in /etc/modprobe.d/
+
+ options intelfb mode=800x600-32@75 vram=8 accel=1 hwcursor=1
+
+and just do a
+
+ modprobe intelfb
+
+
+E. Acknowledgment:
+
+ 1. Geert Uytterhoeven - his excellent howto and the virtual
+ framebuffer driver code made this possible.
+
+ 2. Jeff Hartmann for his agpgart code.
+
+ 3. David Dawes for his original kernel 2.4 code.
+
+ 4. The X developers. Insights were provided just by reading the
+ XFree86 source code.
+
+ 5. Antonino A. Daplas for his inspiring i810fb driver.
+
+ 6. Andrew Morton for his kernel patches maintenance.
+
+###########################
+Sylvain
diff --git a/Documentation/fb/internals.txt b/Documentation/fb/internals.txt
new file mode 100644
index 000000000..9b2a2b2f3
--- /dev/null
+++ b/Documentation/fb/internals.txt
@@ -0,0 +1,82 @@
+
+This is a first start for some documentation about frame buffer device
+internals.
+
+Geert Uytterhoeven <geert@linux-m68k.org>, 21 July 1998
+James Simmons <jsimmons@user.sf.net>, Nov 26 2002
+
+--------------------------------------------------------------------------------
+
+ *** STRUCTURES USED BY THE FRAME BUFFER DEVICE API ***
+
+The following structures play a role in the game of frame buffer devices. They
+are defined in <linux/fb.h>.
+
+1. Outside the kernel (user space)
+
+ - struct fb_fix_screeninfo
+
+ Device independent unchangeable information about a frame buffer device and
+ a specific video mode. This can be obtained using the FBIOGET_FSCREENINFO
+ ioctl.
+
+ - struct fb_var_screeninfo
+
+ Device independent changeable information about a frame buffer device and a
+ specific video mode. This can be obtained using the FBIOGET_VSCREENINFO
+ ioctl, and updated with the FBIOPUT_VSCREENINFO ioctl. If you want to pan
+ the screen only, you can use the FBIOPAN_DISPLAY ioctl.
+
+ - struct fb_cmap
+
+ Device independent colormap information. You can get and set the colormap
+ using the FBIOGETCMAP and FBIOPUTCMAP ioctls.
+
+
+2. Inside the kernel
+
+ - struct fb_info
+
+ Generic information, API and low level information about a specific frame
+ buffer device instance (slot number, board address, ...).
+
+ - struct `par'
+
+ Device dependent information that uniquely defines the video mode for this
+ particular piece of hardware.
+
+
+--------------------------------------------------------------------------------
+
+ *** VISUALS USED BY THE FRAME BUFFER DEVICE API ***
+
+
+Monochrome (FB_VISUAL_MONO01 and FB_VISUAL_MONO10)
+-------------------------------------------------
+Each pixel is either black or white.
+
+
+Pseudo color (FB_VISUAL_PSEUDOCOLOR and FB_VISUAL_STATIC_PSEUDOCOLOR)
+---------------------------------------------------------------------
+The whole pixel value is fed through a programmable lookup table that has one
+color (including red, green, and blue intensities) for each possible pixel
+value, and that color is displayed.
+
+
+True color (FB_VISUAL_TRUECOLOR)
+--------------------------------
+The pixel value is broken up into red, green, and blue fields.
+
+
+Direct color (FB_VISUAL_DIRECTCOLOR)
+------------------------------------
+The pixel value is broken up into red, green, and blue fields, each of which
+are looked up in separate red, green, and blue lookup tables.
+
+
+Grayscale displays
+------------------
+Grayscale and static grayscale are special variants of pseudo color and static
+pseudo color, where the red, green and blue components are always equal to
+each other.
+
diff --git a/Documentation/fb/lxfb.txt b/Documentation/fb/lxfb.txt
new file mode 100644
index 000000000..38b3ca6f6
--- /dev/null
+++ b/Documentation/fb/lxfb.txt
@@ -0,0 +1,52 @@
+[This file is cloned from VesaFB/aty128fb]
+
+What is lxfb?
+=================
+
+This is a graphics framebuffer driver for AMD Geode LX based processors.
+
+Advantages:
+
+ * No need to use AMD's VSA code (or other VESA emulation layer) in the
+ BIOS.
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+ without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode...
+
+
+How to use it?
+==============
+
+Switching modes is done using lxfb.mode_option=<resolution>... boot
+parameter or using `fbset' program.
+
+See Documentation/fb/modedb.txt for more information on modedb
+resolutions.
+
+
+X11
+===
+
+XF68_FBDev should generally work fine, but it is non-accelerated.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to lxfb with lxfb.<option>.
+For example, lxfb.mode_option=800x600@75.
+Accepted options:
+
+mode_option - specify the video mode. Of the form
+ <x>x<y>[-<bpp>][@<refresh>]
+vram - size of video ram (normally auto-detected)
+vt_switch - enable vt switching during suspend/resume. The vt
+ switch is slow, but harmless.
+
+--
+Andres Salomon <dilinger@debian.org>
diff --git a/Documentation/fb/matroxfb.txt b/Documentation/fb/matroxfb.txt
new file mode 100644
index 000000000..b95f5bb52
--- /dev/null
+++ b/Documentation/fb/matroxfb.txt
@@ -0,0 +1,413 @@
+[This file is cloned from VesaFB. Thanks go to Gerd Knorr]
+
+What is matroxfb?
+=================
+
+This is a driver for a graphic framebuffer for Matrox devices on
+Alpha, Intel and PPC boxes.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+ without using tiny, unreadable fonts.
+ * You can run XF{68,86}_FBDev or XFree86 fbdev driver on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode... but you should not notice
+ if you use same resolution as you used in textmode.
+
+
+How to use it?
+==============
+
+Switching modes is done using the video=matroxfb:vesa:... boot parameter
+or using `fbset' program.
+
+If you want, for example, enable a resolution of 1280x1024x24bpp you should
+pass to the kernel this command line: "video=matroxfb:vesa:0x1BB".
+
+You should compile in both vgacon (to boot if you remove you Matrox from
+box) and matroxfb (for graphics mode). You should not compile-in vesafb
+unless you have primary display on non-Matrox VBE2.0 device (see
+Documentation/fb/vesafb.txt for details).
+
+Currently supported video modes are (through vesa:... interface, PowerMac
+has [as addon] compatibility code):
+
+
+[Graphic modes]
+
+bpp | 640x400 640x480 768x576 800x600 960x720
+----+--------------------------------------------
+ 4 | 0x12 0x102
+ 8 | 0x100 0x101 0x180 0x103 0x188
+ 15 | 0x110 0x181 0x113 0x189
+ 16 | 0x111 0x182 0x114 0x18A
+ 24 | 0x1B2 0x184 0x1B5 0x18C
+ 32 | 0x112 0x183 0x115 0x18B
+
+
+[Graphic modes (continued)]
+
+bpp | 1024x768 1152x864 1280x1024 1408x1056 1600x1200
+----+------------------------------------------------
+ 4 | 0x104 0x106
+ 8 | 0x105 0x190 0x107 0x198 0x11C
+ 15 | 0x116 0x191 0x119 0x199 0x11D
+ 16 | 0x117 0x192 0x11A 0x19A 0x11E
+ 24 | 0x1B8 0x194 0x1BB 0x19C 0x1BF
+ 32 | 0x118 0x193 0x11B 0x19B
+
+
+[Text modes]
+
+text | 640x400 640x480 1056x344 1056x400 1056x480
+-----+------------------------------------------------
+ 8x8 | 0x1C0 0x108 0x10A 0x10B 0x10C
+8x16 | 2, 3, 7 0x109
+
+You can enter these number either hexadecimal (leading `0x') or decimal
+(0x100 = 256). You can also use value + 512 to achieve compatibility
+with your old number passed to vesafb.
+
+Non-listed number can be achieved by more complicated command-line, for
+example 1600x1200x32bpp can be specified by `video=matroxfb:vesa:0x11C,depth:32'.
+
+
+X11
+===
+
+XF{68,86}_FBDev should work just fine, but it is non-accelerated. On non-intel
+architectures there are some glitches for 24bpp videomodes. 8, 16 and 32bpp
+works fine.
+
+Running another (accelerated) X-Server like XF86_SVGA works too. But (at least)
+XFree servers have big troubles in multihead configurations (even on first
+head, not even talking about second). Running XFree86 4.x accelerated mga
+driver is possible, but you must not enable DRI - if you do, resolution and
+color depth of your X desktop must match resolution and color depths of your
+virtual consoles, otherwise X will corrupt accelerator settings.
+
+
+SVGALib
+=======
+
+Driver contains SVGALib compatibility code. It is turned on by choosing textual
+mode for console. You can do it at boot time by using videomode
+2,3,7,0x108-0x10C or 0x1C0. At runtime, `fbset -depth 0' does this work.
+Unfortunately, after SVGALib application exits, screen contents is corrupted.
+Switching to another console and back fixes it. I hope that it is SVGALib's
+problem and not mine, but I'm not sure.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to matroxfb with
+`video=matroxfb:option1,option2:value2,option3' (multiple options should be
+separated by comma, values are separated from options by `:').
+Accepted options:
+
+mem:X - size of memory (X can be in megabytes, kilobytes or bytes)
+ You can only decrease value determined by driver because of
+ it always probe for memory. Default is to use whole detected
+ memory usable for on-screen display (i.e. max. 8 MB).
+disabled - do not load driver; you can use also `off', but `disabled'
+ is here too.
+enabled - load driver, if you have `video=matroxfb:disabled' in LILO
+ configuration, you can override it by this (you cannot override
+ `off'). It is default.
+noaccel - do not use acceleration engine. It does not work on Alphas.
+accel - use acceleration engine. It is default.
+nopan - create initial consoles with vyres = yres, thus disabling virtual
+ scrolling.
+pan - create initial consoles as tall as possible (vyres = memory/vxres).
+ It is default.
+nopciretry - disable PCI retries. It is needed for some broken chipsets,
+ it is autodetected for intel's 82437. In this case device does
+ not comply to PCI 2.1 specs (it will not guarantee that every
+ transaction terminate with success or retry in 32 PCLK).
+pciretry - enable PCI retries. It is default, except for intel's 82437.
+novga - disables VGA I/O ports. It is default if BIOS did not enable device.
+ You should not use this option, some boards then do not restart
+ without power off.
+vga - preserve state of VGA I/O ports. It is default. Driver does not
+ enable VGA I/O if BIOS did not it (it is not safe to enable it in
+ most cases).
+nobios - disables BIOS ROM. It is default if BIOS did not enable BIOS itself.
+ You should not use this option, some boards then do not restart
+ without power off.
+bios - preserve state of BIOS ROM. It is default. Driver does not enable
+ BIOS if BIOS was not enabled before.
+noinit - tells driver, that devices were already initialized. You should use
+ it if you have G100 and/or if driver cannot detect memory, you see
+ strange pattern on screen and so on. Devices not enabled by BIOS
+ are still initialized. It is default.
+init - driver initializes every device it knows about.
+memtype - specifies memory type, implies 'init'. This is valid only for G200
+ and G400 and has following meaning:
+ G200: 0 -> 2x128Kx32 chips, 2MB onboard, probably sgram
+ 1 -> 2x128Kx32 chips, 4MB onboard, probably sgram
+ 2 -> 2x256Kx32 chips, 4MB onboard, probably sgram
+ 3 -> 2x256Kx32 chips, 8MB onboard, probably sgram
+ 4 -> 2x512Kx16 chips, 8/16MB onboard, probably sdram only
+ 5 -> same as above
+ 6 -> 4x128Kx32 chips, 4MB onboard, probably sgram
+ 7 -> 4x128Kx32 chips, 8MB onboard, probably sgram
+ G400: 0 -> 2x512Kx16 SDRAM, 16/32MB
+ 2x512Kx32 SGRAM, 16/32MB
+ 1 -> 2x256Kx32 SGRAM, 8/16MB
+ 2 -> 4x128Kx32 SGRAM, 8/16MB
+ 3 -> 4x512Kx32 SDRAM, 32MB
+ 4 -> 4x256Kx32 SGRAM, 16/32MB
+ 5 -> 2x1Mx32 SDRAM, 32MB
+ 6 -> reserved
+ 7 -> reserved
+ You should use sdram or sgram parameter in addition to memtype
+ parameter.
+nomtrr - disables write combining on frame buffer. This slows down driver but
+ there is reported minor incompatibility between GUS DMA and XFree
+ under high loads if write combining is enabled (sound dropouts).
+mtrr - enables write combining on frame buffer. It speeds up video accesses
+ much. It is default. You must have MTRR support enabled in kernel
+ and your CPU must have MTRR (f.e. Pentium II have them).
+sgram - tells to driver that you have Gxx0 with SGRAM memory. It has no
+ effect without `init'.
+sdram - tells to driver that you have Gxx0 with SDRAM memory.
+ It is a default.
+inv24 - change timings parameters for 24bpp modes on Millennium and
+ Millennium II. Specify this if you see strange color shadows around
+ characters.
+noinv24 - use standard timings. It is the default.
+inverse - invert colors on screen (for LCD displays)
+noinverse - show true colors on screen. It is default.
+dev:X - bind driver to device X. Driver numbers device from 0 up to N,
+ where device 0 is first `known' device found, 1 second and so on.
+ lspci lists devices in this order.
+ Default is `every' known device.
+nohwcursor - disables hardware cursor (use software cursor instead).
+hwcursor - enables hardware cursor. It is default. If you are using
+ non-accelerated mode (`noaccel' or `fbset -accel false'), software
+ cursor is used (except for text mode).
+noblink - disables cursor blinking. Cursor in text mode always blinks (hw
+ limitation).
+blink - enables cursor blinking. It is default.
+nofastfont - disables fastfont feature. It is default.
+fastfont:X - enables fastfont feature. X specifies size of memory reserved for
+ font data, it must be >= (fontwidth*fontheight*chars_in_font)/8.
+ It is faster on Gx00 series, but slower on older cards.
+grayscale - enable grayscale summing. It works in PSEUDOCOLOR modes (text,
+ 4bpp, 8bpp). In DIRECTCOLOR modes it is limited to characters
+ displayed through putc/putcs. Direct accesses to framebuffer
+ can paint colors.
+nograyscale - disable grayscale summing. It is default.
+cross4MB - enables that pixel line can cross 4MB boundary. It is default for
+ non-Millennium.
+nocross4MB - pixel line must not cross 4MB boundary. It is default for
+ Millennium I or II, because of these devices have hardware
+ limitations which do not allow this. But this option is
+ incompatible with some (if not all yet released) versions of
+ XF86_FBDev.
+dfp - enables digital flat panel interface. This option is incompatible with
+ secondary (TV) output - if DFP is active, TV output must be
+ inactive and vice versa. DFP always uses same timing as primary
+ (monitor) output.
+dfp:X - use settings X for digital flat panel interface. X is number from
+ 0 to 0xFF, and meaning of each individual bit is described in
+ G400 manual, in description of DAC register 0x1F. For normal operation
+ you should set all bits to zero, except lowest bit. This lowest bit
+ selects who is source of display clocks, whether G400, or panel.
+ Default value is now read back from hardware - so you should specify
+ this value only if you are also using `init' parameter.
+outputs:XYZ - set mapping between CRTC and outputs. Each letter can have value
+ of 0 (for no CRTC), 1 (CRTC1) or 2 (CRTC2), and first letter corresponds
+ to primary analog output, second letter to the secondary analog output
+ and third letter to the DVI output. Default setting is 100 for
+ cards below G400 or G400 without DFP, 101 for G400 with DFP, and
+ 111 for G450 and G550. You can set mapping only on first card,
+ use matroxset for setting up other devices.
+vesa:X - selects startup videomode. X is number from 0 to 0x1FF, see table
+ above for detailed explanation. Default is 640x480x8bpp if driver
+ has 8bpp support. Otherwise first available of 640x350x4bpp,
+ 640x480x15bpp, 640x480x24bpp, 640x480x32bpp or 80x25 text
+ (80x25 text is always available).
+
+If you are not satisfied with videomode selected by `vesa' option, you
+can modify it with these options:
+
+xres:X - horizontal resolution, in pixels. Default is derived from `vesa'
+ option.
+yres:X - vertical resolution, in pixel lines. Default is derived from `vesa'
+ option.
+upper:X - top boundary: lines between end of VSYNC pulse and start of first
+ pixel line of picture. Default is derived from `vesa' option.
+lower:X - bottom boundary: lines between end of picture and start of VSYNC
+ pulse. Default is derived from `vesa' option.
+vslen:X - length of VSYNC pulse, in lines. Default is derived from `vesa'
+ option.
+left:X - left boundary: pixels between end of HSYNC pulse and first pixel.
+ Default is derived from `vesa' option.
+right:X - right boundary: pixels between end of picture and start of HSYNC
+ pulse. Default is derived from `vesa' option.
+hslen:X - length of HSYNC pulse, in pixels. Default is derived from `vesa'
+ option.
+pixclock:X - dotclocks, in ps (picoseconds). Default is derived from `vesa'
+ option and from `fh' and `fv' options.
+sync:X - sync. pulse - bit 0 inverts HSYNC polarity, bit 1 VSYNC polarity.
+ If bit 3 (value 0x08) is set, composite sync instead of HSYNC is
+ generated. If bit 5 (value 0x20) is set, sync on green is turned on.
+ Do not forget that if you want sync on green, you also probably
+ want composite sync.
+ Default depends on `vesa'.
+depth:X - Bits per pixel: 0=text, 4,8,15,16,24 or 32. Default depends on
+ `vesa'.
+
+If you know capabilities of your monitor, you can specify some (or all) of
+`maxclk', `fh' and `fv'. In this case, `pixclock' is computed so that
+pixclock <= maxclk, real_fh <= fh and real_fv <= fv.
+
+maxclk:X - maximum dotclock. X can be specified in MHz, kHz or Hz. Default is
+ `don't care'.
+fh:X - maximum horizontal synchronization frequency. X can be specified
+ in kHz or Hz. Default is `don't care'.
+fv:X - maximum vertical frequency. X must be specified in Hz. Default is
+ 70 for modes derived from `vesa' with yres <= 400, 60Hz for
+ yres > 400.
+
+
+Limitations
+===========
+
+There are known and unknown bugs, features and misfeatures.
+Currently there are following known bugs:
+ + SVGALib does not restore screen on exit
+ + generic fbcon-cfbX procedures do not work on Alphas. Due to this,
+ `noaccel' (and cfb4 accel) driver does not work on Alpha. So everyone
+ with access to /dev/fb* on Alpha can hang machine (you should restrict
+ access to /dev/fb* - everyone with access to this device can destroy
+ your monitor, believe me...).
+ + 24bpp does not support correctly XF-FBDev on big-endian architectures.
+ + interlaced text mode is not supported; it looks like hardware limitation,
+ but I'm not sure.
+ + Gxx0 SGRAM/SDRAM is not autodetected.
+ + If you are using more than one framebuffer device, you must boot kernel
+ with 'video=scrollback:0'.
+ + maybe more...
+And following misfeatures:
+ + SVGALib does not restore screen on exit.
+ + pixclock for text modes is limited by hardware to
+ 83 MHz on G200
+ 66 MHz on Millennium I
+ 60 MHz on Millennium II
+ Because I have no access to other devices, I do not know specific
+ frequencies for them. So driver does not check this and allows you to
+ set frequency higher that this. It causes sparks, black holes and other
+ pretty effects on screen. Device was not destroyed during tests. :-)
+ + my Millennium G200 oscillator has frequency range from 35 MHz to 380 MHz
+ (and it works with 8bpp on about 320 MHz dotclocks (and changed mclk)).
+ But Matrox says on product sheet that VCO limit is 50-250 MHz, so I believe
+ them (maybe that chip overheats, but it has a very big cooler (G100 has
+ none), so it should work).
+ + special mixed video/graphics videomodes of Mystique and Gx00 - 2G8V16 and
+ G16V16 are not supported
+ + color keying is not supported
+ + feature connector of Mystique and Gx00 is set to VGA mode (it is disabled
+ by BIOS)
+ + DDC (monitor detection) is supported through dualhead driver
+ + some check for input values are not so strict how it should be (you can
+ specify vslen=4000 and so on).
+ + maybe more...
+And following features:
+ + 4bpp is available only on Millennium I and Millennium II. It is hardware
+ limitation.
+ + selection between 1:5:5:5 and 5:6:5 16bpp videomode is done by -rgba
+ option of fbset: "fbset -depth 16 -rgba 5,5,5" selects 1:5:5:5, anything
+ else selects 5:6:5 mode.
+ + text mode uses 6 bit VGA palette instead of 8 bit (one of 262144 colors
+ instead of one of 16M colors). It is due to hardware limitation of
+ Millennium I/II and SVGALib compatibility.
+
+
+Benchmarks
+==========
+It is time to redraw whole screen 1000 times in 1024x768, 60Hz. It is
+time for draw 6144000 characters on screen through /dev/vcsa
+(for 32bpp it is about 3GB of data (exactly 3000 MB); for 8x16 font in
+16 seconds, i.e. 187 MBps).
+Times were obtained from one older version of driver, now they are about 3%
+faster, it is kernel-space only time on P-II/350 MHz, Millennium I in 33 MHz
+PCI slot, G200 in AGP 2x slot. I did not test vgacon.
+
+NOACCEL
+ 8x16 12x22
+ Millennium I G200 Millennium I G200
+8bpp 16.42 9.54 12.33 9.13
+16bpp 21.00 15.70 19.11 15.02
+24bpp 36.66 36.66 35.00 35.00
+32bpp 35.00 30.00 33.85 28.66
+
+ACCEL, nofastfont
+ 8x16 12x22 6x11
+ Millennium I G200 Millennium I G200 Millennium I G200
+8bpp 7.79 7.24 13.55 7.78 30.00 21.01
+16bpp 9.13 7.78 16.16 7.78 30.00 21.01
+24bpp 14.17 10.72 18.69 10.24 34.99 21.01
+32bpp 16.15 16.16 18.73 13.09 34.99 21.01
+
+ACCEL, fastfont
+ 8x16 12x22 6x11
+ Millennium I G200 Millennium I G200 Millennium I G200
+8bpp 8.41 6.01 6.54 4.37 16.00 10.51
+16bpp 9.54 9.12 8.76 6.17 17.52 14.01
+24bpp 15.00 12.36 11.67 10.00 22.01 18.32
+32bpp 16.18 18.29* 12.71 12.74 24.44 21.00
+
+TEXT
+ 8x16
+ Millennium I G200
+TEXT 3.29 1.50
+
+* Yes, it is slower than Millennium I.
+
+
+Dualhead G400
+=============
+Driver supports dualhead G400 with some limitations:
+ + secondary head shares videomemory with primary head. It is not problem
+ if you have 32MB of videoram, but if you have only 16MB, you may have
+ to think twice before choosing videomode (for example twice 1880x1440x32bpp
+ is not possible).
+ + due to hardware limitation, secondary head can use only 16 and 32bpp
+ videomodes.
+ + secondary head is not accelerated. There were bad problems with accelerated
+ XFree when secondary head used to use acceleration.
+ + secondary head always powerups in 640x480@60-32 videomode. You have to use
+ fbset to change this mode.
+ + secondary head always powerups in monitor mode. You have to use fbmatroxset
+ to change it to TV mode. Also, you must select at least 525 lines for
+ NTSC output and 625 lines for PAL output.
+ + kernel is not fully multihead ready. So some things are impossible to do.
+ + if you compiled it as module, you must insert i2c-matroxfb, matroxfb_maven
+ and matroxfb_crtc2 into kernel.
+
+
+Dualhead G450
+=============
+Driver supports dualhead G450 with some limitations:
+ + secondary head shares videomemory with primary head. It is not problem
+ if you have 32MB of videoram, but if you have only 16MB, you may have
+ to think twice before choosing videomode.
+ + due to hardware limitation, secondary head can use only 16 and 32bpp
+ videomodes.
+ + secondary head is not accelerated.
+ + secondary head always powerups in 640x480@60-32 videomode. You have to use
+ fbset to change this mode.
+ + TV output is not supported
+ + kernel is not fully multihead ready, so some things are impossible to do.
+ + if you compiled it as module, you must insert matroxfb_g450 and matroxfb_crtc2
+ into kernel.
+
+--
+Petr Vandrovec <vandrove@vc.cvut.cz>
diff --git a/Documentation/fb/metronomefb.txt b/Documentation/fb/metronomefb.txt
new file mode 100644
index 000000000..237ca4125
--- /dev/null
+++ b/Documentation/fb/metronomefb.txt
@@ -0,0 +1,36 @@
+ Metronomefb
+ -----------
+Maintained by Jaya Kumar <jayakumar.lkml.gmail.com>
+Last revised: Mar 10, 2008
+
+Metronomefb is a driver for the Metronome display controller. The controller
+is from E-Ink Corporation. It is intended to be used to drive the E-Ink
+Vizplex display media. E-Ink hosts some details of this controller and the
+display media here http://www.e-ink.com/products/matrix/metronome.html .
+
+Metronome is interfaced to the host CPU through the AMLCD interface. The
+host CPU generates the control information and the image in a framebuffer
+which is then delivered to the AMLCD interface by a host specific method.
+The display and error status are each pulled through individual GPIOs.
+
+Metronomefb is platform independent and depends on a board specific driver
+to do all physical IO work. Currently, an example is implemented for the
+PXA board used in the AM-200 EPD devkit. This example is am200epd.c
+
+Metronomefb requires waveform information which is delivered via the AMLCD
+interface to the metronome controller. The waveform information is expected to
+be delivered from userspace via the firmware class interface. The waveform file
+can be compressed as long as your udev or hotplug script is aware of the need
+to uncompress it before delivering it. metronomefb will ask for metronome.wbf
+which would typically go into /lib/firmware/metronome.wbf depending on your
+udev/hotplug setup. I have only tested with a single waveform file which was
+originally labeled 23P01201_60_WT0107_MTC. I do not know what it stands for.
+Caution should be exercised when manipulating the waveform as there may be
+a possibility that it could have some permanent effects on the display media.
+I neither have access to nor know exactly what the waveform does in terms of
+the physical media.
+
+Metronomefb uses the deferred IO interface so that it can provide a memory
+mappable frame buffer. It has been tested with tinyx (Xfbdev). It is known
+to work at this time with xeyes, xclock, xloadimage, xpdf.
+
diff --git a/Documentation/fb/modedb.txt b/Documentation/fb/modedb.txt
new file mode 100644
index 000000000..16aa08453
--- /dev/null
+++ b/Documentation/fb/modedb.txt
@@ -0,0 +1,151 @@
+
+
+ modedb default video mode support
+
+
+Currently all frame buffer device drivers have their own video mode databases,
+which is a mess and a waste of resources. The main idea of modedb is to have
+
+ - one routine to probe for video modes, which can be used by all frame buffer
+ devices
+ - one generic video mode database with a fair amount of standard videomodes
+ (taken from XFree86)
+ - the possibility to supply your own mode database for graphics hardware that
+ needs non-standard modes, like amifb and Mac frame buffer drivers (which
+ use macmodes.c)
+
+When a frame buffer device receives a video= option it doesn't know, it should
+consider that to be a video mode option. If no frame buffer device is specified
+in a video= option, fbmem considers that to be a global video mode option.
+
+Valid mode specifiers (mode_option argument):
+
+ <xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
+ <name>[-<bpp>][@<refresh>]
+
+with <xres>, <yres>, <bpp> and <refresh> decimal numbers and <name> a string.
+Things between square brackets are optional.
+
+If 'M' is specified in the mode_option argument (after <yres> and before
+<bpp> and <refresh>, if specified) the timings will be calculated using
+VESA(TM) Coordinated Video Timings instead of looking up the mode from a table.
+If 'R' is specified, do a 'reduced blanking' calculation for digital displays.
+If 'i' is specified, calculate for an interlaced mode. And if 'm' is
+specified, add margins to the calculation (1.8% of xres rounded down to 8
+pixels and 1.8% of yres).
+
+ Sample usage: 1024x768M@60m - CVT timing with margins
+
+DRM drivers also add options to enable or disable outputs:
+
+'e' will force the display to be enabled, i.e. it will override the detection
+if a display is connected. 'D' will force the display to be enabled and use
+digital output. This is useful for outputs that have both analog and digital
+signals (e.g. HDMI and DVI-I). For other outputs it behaves like 'e'. If 'd'
+is specified the output is disabled.
+
+You can additionally specify which output the options matches to.
+To force the VGA output to be enabled and drive a specific mode say:
+ video=VGA-1:1280x1024@60me
+
+Specifying the option multiple times for different ports is possible, e.g.:
+ video=LVDS-1:d video=HDMI-1:D
+
+***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo *****
+
+What is the VESA(TM) Coordinated Video Timings (CVT)?
+
+From the VESA(TM) Website:
+
+ "The purpose of CVT is to provide a method for generating a consistent
+ and coordinated set of standard formats, display refresh rates, and
+ timing specifications for computer display products, both those
+ employing CRTs, and those using other display technologies. The
+ intention of CVT is to give both source and display manufacturers a
+ common set of tools to enable new timings to be developed in a
+ consistent manner that ensures greater compatibility."
+
+This is the third standard approved by VESA(TM) concerning video timings. The
+first was the Discrete Video Timings (DVT) which is a collection of
+pre-defined modes approved by VESA(TM). The second is the Generalized Timing
+Formula (GTF) which is an algorithm to calculate the timings, given the
+pixelclock, the horizontal sync frequency, or the vertical refresh rate.
+
+The GTF is limited by the fact that it is designed mainly for CRT displays.
+It artificially increases the pixelclock because of its high blanking
+requirement. This is inappropriate for digital display interface with its high
+data rate which requires that it conserves the pixelclock as much as possible.
+Also, GTF does not take into account the aspect ratio of the display.
+
+The CVT addresses these limitations. If used with CRT's, the formula used
+is a derivation of GTF with a few modifications. If used with digital
+displays, the "reduced blanking" calculation can be used.
+
+From the framebuffer subsystem perspective, new formats need not be added
+to the global mode database whenever a new mode is released by display
+manufacturers. Specifying for CVT will work for most, if not all, relatively
+new CRT displays and probably with most flatpanels, if 'reduced blanking'
+calculation is specified. (The CVT compatibility of the display can be
+determined from its EDID. The version 1.3 of the EDID has extra 128-byte
+blocks where additional timing information is placed. As of this time, there
+is no support yet in the layer to parse this additional blocks.)
+
+CVT also introduced a new naming convention (should be seen from dmesg output):
+
+ <pix>M<a>[-R]
+
+ where: pix = total amount of pixels in MB (xres x yres)
+ M = always present
+ a = aspect ratio (3 - 4:3; 4 - 5:4; 9 - 15:9, 16:9; A - 16:10)
+ -R = reduced blanking
+
+ example: .48M3-R - 800x600 with reduced blanking
+
+Note: VESA(TM) has restrictions on what is a standard CVT timing:
+
+ - aspect ratio can only be one of the above values
+ - acceptable refresh rates are 50, 60, 70 or 85 Hz only
+ - if reduced blanking, the refresh rate must be at 60Hz
+
+If one of the above are not satisfied, the kernel will print a warning but the
+timings will still be calculated.
+
+***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo ***** oOo *****
+
+To find a suitable video mode, you just call
+
+int __init fb_find_mode(struct fb_var_screeninfo *var,
+ struct fb_info *info, const char *mode_option,
+ const struct fb_videomode *db, unsigned int dbsize,
+ const struct fb_videomode *default_mode,
+ unsigned int default_bpp)
+
+with db/dbsize your non-standard video mode database, or NULL to use the
+standard video mode database.
+
+fb_find_mode() first tries the specified video mode (or any mode that matches,
+e.g. there can be multiple 640x480 modes, each of them is tried). If that
+fails, the default mode is tried. If that fails, it walks over all modes.
+
+To specify a video mode at bootup, use the following boot options:
+ video=<driver>:<xres>x<yres>[-<bpp>][@refresh]
+
+where <driver> is a name from the table below. Valid default modes can be
+found in linux/drivers/video/modedb.c. Check your driver's documentation.
+There may be more modes.
+
+ Drivers that support modedb boot options
+ Boot Name Cards Supported
+
+ amifb - Amiga chipset frame buffer
+ aty128fb - ATI Rage128 / Pro frame buffer
+ atyfb - ATI Mach64 frame buffer
+ pm2fb - Permedia 2/2V frame buffer
+ pm3fb - Permedia 3 frame buffer
+ sstfb - Voodoo 1/2 (SST1) chipset frame buffer
+ tdfxfb - 3D Fx frame buffer
+ tridentfb - Trident (Cyber)blade chipset frame buffer
+ vt8623fb - VIA 8623 frame buffer
+
+BTW, only a few fb drivers use this at the moment. Others are to follow
+(feel free to send patches). The DRM drivers also support this.
diff --git a/Documentation/fb/pvr2fb.txt b/Documentation/fb/pvr2fb.txt
new file mode 100644
index 000000000..36bdeff58
--- /dev/null
+++ b/Documentation/fb/pvr2fb.txt
@@ -0,0 +1,65 @@
+$Id: pvr2fb.txt,v 1.1 2001/05/24 05:09:16 mrbrown Exp $
+
+What is pvr2fb?
+===============
+
+This is a driver for PowerVR 2 based graphics frame buffers, such as the
+one found in the Dreamcast.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+ without using tiny, unreadable fonts (NOT on the Dreamcast)
+ * You can run XF86_FBDev on top of /dev/fb0
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * Driver is largely untested on non-Dreamcast systems.
+
+Configuration
+=============
+
+You can pass kernel command line options to pvr2fb with
+`video=pvr2fb:option1,option2:value2,option3' (multiple options should be
+separated by comma, values are separated from options by `:').
+Accepted options:
+
+font:X - default font to use. All fonts are supported, including the
+ SUN12x22 font which is very nice at high resolutions.
+
+
+mode:X - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
+ The following video modes are supported:
+ 640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
+ defaults to 640x480-16@60. At the time of writing the
+ 24bpp and 32bpp modes function poorly. Work to fix that is
+ ongoing
+
+ Note: the 640x240 mode is currently broken, and should not be
+ used for any reason. It is only mentioned here as a reference.
+
+inverse - invert colors on screen (for LCD displays)
+
+nomtrr - disables write combining on frame buffer. This slows down driver
+ but there is reported minor incompatibility between GUS DMA and
+ XFree under high loads if write combining is enabled (sound
+ dropouts). MTRR is enabled by default on systems that have it
+ configured and that support it.
+
+cable:X - cable type. This can be any of the following: vga, rgb, and
+ composite. If none is specified, we guess.
+
+output:X - output type. This can be any of the following: pal, ntsc, and
+ vga. If none is specified, we guess.
+
+X11
+===
+
+XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
+on any 2.6 series kernel.
+
+--
+Paul Mundt <lethal@linuxdc.org>
+Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
+
diff --git a/Documentation/fb/pxafb.txt b/Documentation/fb/pxafb.txt
new file mode 100644
index 000000000..d143a0a74
--- /dev/null
+++ b/Documentation/fb/pxafb.txt
@@ -0,0 +1,142 @@
+Driver for PXA25x LCD controller
+================================
+
+The driver supports the following options, either via
+options=<OPTIONS> when modular or video=pxafb:<OPTIONS> when built in.
+
+For example:
+ modprobe pxafb options=vmem:2M,mode:640x480-8,passive
+or on the kernel command line
+ video=pxafb:vmem:2M,mode:640x480-8,passive
+
+vmem: VIDEO_MEM_SIZE
+ Amount of video memory to allocate (can be suffixed with K or M
+ for kilobytes or megabytes)
+
+mode:XRESxYRES[-BPP]
+ XRES == LCCR1_PPL + 1
+ YRES == LLCR2_LPP + 1
+ The resolution of the display in pixels
+ BPP == The bit depth. Valid values are 1, 2, 4, 8 and 16.
+
+pixclock:PIXCLOCK
+ Pixel clock in picoseconds
+
+left:LEFT == LCCR1_BLW + 1
+right:RIGHT == LCCR1_ELW + 1
+hsynclen:HSYNC == LCCR1_HSW + 1
+upper:UPPER == LCCR2_BFW
+lower:LOWER == LCCR2_EFR
+vsynclen:VSYNC == LCCR2_VSW + 1
+ Display margins and sync times
+
+color | mono => LCCR0_CMS
+ umm...
+
+active | passive => LCCR0_PAS
+ Active (TFT) or Passive (STN) display
+
+single | dual => LCCR0_SDS
+ Single or dual panel passive display
+
+4pix | 8pix => LCCR0_DPD
+ 4 or 8 pixel monochrome single panel data
+
+hsync:HSYNC
+vsync:VSYNC
+ Horizontal and vertical sync. 0 => active low, 1 => active
+ high.
+
+dpc:DPC
+ Double pixel clock. 1=>true, 0=>false
+
+outputen:POLARITY
+ Output Enable Polarity. 0 => active low, 1 => active high
+
+pixclockpol:POLARITY
+ pixel clock polarity
+ 0 => falling edge, 1 => rising edge
+
+
+Overlay Support for PXA27x and later LCD controllers
+====================================================
+
+ PXA27x and later processors support overlay1 and overlay2 on-top of the
+ base framebuffer (although under-neath the base is also possible). They
+ support palette and no-palette RGB formats, as well as YUV formats (only
+ available on overlay2). These overlays have dedicated DMA channels and
+ behave in a similar way as a framebuffer.
+
+ However, there are some differences between these overlay framebuffers
+ and normal framebuffers, as listed below:
+
+ 1. overlay can start at a 32-bit word aligned position within the base
+ framebuffer, which means they have a start (x, y). This information
+ is encoded into var->nonstd (no, var->xoffset and var->yoffset are
+ not for such purpose).
+
+ 2. overlay framebuffer is allocated dynamically according to specified
+ 'struct fb_var_screeninfo', the amount is decided by:
+
+ var->xres_virtual * var->yres_virtual * bpp
+
+ bpp = 16 -- for RGB565 or RGBT555
+ = 24 -- for YUV444 packed
+ = 24 -- for YUV444 planar
+ = 16 -- for YUV422 planar (1 pixel = 1 Y + 1/2 Cb + 1/2 Cr)
+ = 12 -- for YUV420 planar (1 pixel = 1 Y + 1/4 Cb + 1/4 Cr)
+
+ NOTE:
+
+ a. overlay does not support panning in x-direction, thus
+ var->xres_virtual will always be equal to var->xres
+
+ b. line length of overlay(s) must be on a 32-bit word boundary,
+ for YUV planar modes, it is a requirement for the component
+ with minimum bits per pixel, e.g. for YUV420, Cr component
+ for one pixel is actually 2-bits, it means the line length
+ should be a multiple of 16-pixels
+
+ c. starting horizontal position (XPOS) should start on a 32-bit
+ word boundary, otherwise the fb_check_var() will just fail.
+
+ d. the rectangle of the overlay should be within the base plane,
+ otherwise fail
+
+ Applications should follow the sequence below to operate an overlay
+ framebuffer:
+
+ a. open("/dev/fb[1-2]", ...)
+ b. ioctl(fd, FBIOGET_VSCREENINFO, ...)
+ c. modify 'var' with desired parameters:
+ 1) var->xres and var->yres
+ 2) larger var->yres_virtual if more memory is required,
+ usually for double-buffering
+ 3) var->nonstd for starting (x, y) and color format
+ 4) var->{red, green, blue, transp} if RGB mode is to be used
+ d. ioctl(fd, FBIOPUT_VSCREENINFO, ...)
+ e. ioctl(fd, FBIOGET_FSCREENINFO, ...)
+ f. mmap
+ g. ...
+
+ 3. for YUV planar formats, these are actually not supported within the
+ framebuffer framework, application has to take care of the offsets
+ and lengths of each component within the framebuffer.
+
+ 4. var->nonstd is used to pass starting (x, y) position and color format,
+ the detailed bit fields are shown below:
+
+ 31 23 20 10 0
+ +-----------------+---+----------+----------+
+ | ... unused ... |FOR| XPOS | YPOS |
+ +-----------------+---+----------+----------+
+
+ FOR - color format, as defined by OVERLAY_FORMAT_* in pxafb.h
+ 0 - RGB
+ 1 - YUV444 PACKED
+ 2 - YUV444 PLANAR
+ 3 - YUV422 PLANAR
+ 4 - YUR420 PLANAR
+
+ XPOS - starting horizontal position
+ YPOS - starting vertical position
diff --git a/Documentation/fb/s3fb.txt b/Documentation/fb/s3fb.txt
new file mode 100644
index 000000000..2c97770bd
--- /dev/null
+++ b/Documentation/fb/s3fb.txt
@@ -0,0 +1,82 @@
+
+ s3fb - fbdev driver for S3 Trio/Virge chips
+ ===========================================
+
+
+Supported Hardware
+==================
+
+ S3 Trio32
+ S3 Trio64 (and variants V+, UV+, V2/DX, V2/GX)
+ S3 Virge (and variants VX, DX, GX and GX2+)
+ S3 Plato/PX (completely untested)
+ S3 Aurora64V+ (completely untested)
+
+ - only PCI bus supported
+ - only BIOS initialized VGA devices supported
+ - probably not working on big endian
+
+I tested s3fb on Trio64 (plain, V+ and V2/DX) and Virge (plain, VX, DX),
+all on i386.
+
+
+Supported Features
+==================
+
+ * 4 bpp pseudocolor modes (with 18bit palette, two variants)
+ * 8 bpp pseudocolor mode (with 18bit palette)
+ * 16 bpp truecolor modes (RGB 555 and RGB 565)
+ * 24 bpp truecolor mode (RGB 888) on (only on Virge VX)
+ * 32 bpp truecolor mode (RGB 888) on (not on Virge VX)
+ * text mode (activated by bpp = 0)
+ * interlaced mode variant (not available in text mode)
+ * doublescan mode variant (not available in text mode)
+ * panning in both directions
+ * suspend/resume support
+ * DPMS support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (maximum usually between 50-60 MHz, depending on specific
+hardware, i get best results from plain S3 Trio32 card - about 75 MHz). This
+limitation is not enforced by driver. Text mode supports 8bit wide fonts only
+(hardware limitation) and 16bit tall fonts (driver limitation). Text mode
+support is broken on S3 Trio64 V2/DX.
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+ * secondary (not initialized by BIOS) device support
+ * big endian support
+ * Zorro bus support
+ * MMIO support
+ * 24 bpp mode support on more cards
+ * support for fontwidths != 8 in 4 bpp modes
+ * support for fontheight != 16 in text mode
+ * composite and external sync (is anyone able to test this?)
+ * hardware cursor
+ * video overlay support
+ * vsync synchronization
+ * feature connector support
+ * acceleration support (8514-like 2D, Virge 3D, busmaster transfers)
+ * better values for some magic registers (performance issues)
+
+
+Known bugs
+==========
+
+ * cursor disable in text mode doesn't work
+ * text mode broken on S3 Trio64 V2/DX
+
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/sa1100fb.txt b/Documentation/fb/sa1100fb.txt
new file mode 100644
index 000000000..f1b422046
--- /dev/null
+++ b/Documentation/fb/sa1100fb.txt
@@ -0,0 +1,39 @@
+[This file is cloned from VesaFB/matroxfb]
+
+What is sa1100fb?
+=================
+
+This is a driver for a graphic framebuffer for the SA-1100 LCD
+controller.
+
+Configuration
+==============
+
+For most common passive displays, giving the option
+
+video=sa1100fb:bpp:<value>,lccr0:<value>,lccr1:<value>,lccr2:<value>,lccr3:<value>
+
+on the kernel command line should be enough to configure the
+controller. The bits per pixel (bpp) value should be 4, 8, 12, or
+16. LCCR values are display-specific and should be computed as
+documented in the SA-1100 Developer's Manual, Section 11.7. Dual-panel
+displays are supported as long as the SDS bit is set in LCCR0; GPIO<9:2>
+are used for the lower panel.
+
+For active displays or displays requiring additional configuration
+(controlling backlights, powering on the LCD, etc.), the command line
+options may not be enough to configure the display. Adding sections to
+sa1100fb_init_fbinfo(), sa1100fb_activate_var(),
+sa1100fb_disable_lcd_controller(), and sa1100fb_enable_lcd_controller()
+will probably be necessary.
+
+Accepted options:
+
+bpp:<value> Configure for <value> bits per pixel
+lccr0:<value> Configure LCD control register 0 (11.7.3)
+lccr1:<value> Configure LCD control register 1 (11.7.4)
+lccr2:<value> Configure LCD control register 2 (11.7.5)
+lccr3:<value> Configure LCD control register 3 (11.7.6)
+
+--
+Mark Huang <mhuang@livetoy.com>
diff --git a/Documentation/fb/sh7760fb.txt b/Documentation/fb/sh7760fb.txt
new file mode 100644
index 000000000..b994c3b10
--- /dev/null
+++ b/Documentation/fb/sh7760fb.txt
@@ -0,0 +1,131 @@
+SH7760/SH7763 integrated LCDC Framebuffer driver
+================================================
+
+0. Overview
+-----------
+The SH7760/SH7763 have an integrated LCD Display controller (LCDC) which
+supports (in theory) resolutions ranging from 1x1 to 1024x1024,
+with color depths ranging from 1 to 16 bits, on STN, DSTN and TFT Panels.
+
+Caveats:
+* Framebuffer memory must be a large chunk allocated at the top
+ of Area3 (HW requirement). Because of this requirement you should NOT
+ make the driver a module since at runtime it may become impossible to
+ get a large enough contiguous chunk of memory.
+
+* The driver does not support changing resolution while loaded
+ (displays aren't hotpluggable anyway)
+
+* Heavy flickering may be observed
+ a) if you're using 15/16bit color modes at >= 640x480 px resolutions,
+ b) during PCMCIA (or any other slow bus) activity.
+
+* Rotation works only 90degress clockwise, and only if horizontal
+ resolution is <= 320 pixels.
+
+files: drivers/video/sh7760fb.c
+ include/asm-sh/sh7760fb.h
+ Documentation/fb/sh7760fb.txt
+
+1. Platform setup
+-----------------
+SH7760:
+ Video data is fetched via the DMABRG DMA engine, so you have to
+ configure the SH DMAC for DMABRG mode (write 0x94808080 to the
+ DMARSRA register somewhere at boot).
+
+ PFC registers PCCR and PCDR must be set to peripheral mode.
+ (write zeros to both).
+
+The driver does NOT do the above for you since board setup is, well, job
+of the board setup code.
+
+2. Panel definitions
+--------------------
+The LCDC must explicitly be told about the type of LCD panel
+attached. Data must be wrapped in a "struct sh7760fb_platdata" and
+passed to the driver as platform_data.
+
+Suggest you take a closer look at the SH7760 Manual, Section 30.
+(http://documentation.renesas.com/eng/products/mpumcu/e602291_sh7760.pdf)
+
+The following code illustrates what needs to be done to
+get the framebuffer working on a 640x480 TFT:
+
+====================== cut here ======================================
+
+#include <linux/fb.h>
+#include <asm/sh7760fb.h>
+
+/*
+ * NEC NL6440bc26-01 640x480 TFT
+ * dotclock 25175 kHz
+ * Xres 640 Yres 480
+ * Htotal 800 Vtotal 525
+ * HsynStart 656 VsynStart 490
+ * HsynLenn 30 VsynLenn 2
+ *
+ * The linux framebuffer layer does not use the syncstart/synclen
+ * values but right/left/upper/lower margin values. The comments
+ * for the x_margin explain how to calculate those from given
+ * panel sync timings.
+ */
+static struct fb_videomode nl6448bc26 = {
+ .name = "NL6448BC26",
+ .refresh = 60,
+ .xres = 640,
+ .yres = 480,
+ .pixclock = 39683, /* in picoseconds! */
+ .hsync_len = 30,
+ .vsync_len = 2,
+ .left_margin = 114, /* HTOT - (HSYNSLEN + HSYNSTART) */
+ .right_margin = 16, /* HSYNSTART - XRES */
+ .upper_margin = 33, /* VTOT - (VSYNLEN + VSYNSTART) */
+ .lower_margin = 10, /* VSYNSTART - YRES */
+ .sync = FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT,
+ .vmode = FB_VMODE_NONINTERLACED,
+ .flag = 0,
+};
+
+static struct sh7760fb_platdata sh7760fb_nl6448 = {
+ .def_mode = &nl6448bc26,
+ .ldmtr = LDMTR_TFT_COLOR_16, /* 16bit TFT panel */
+ .lddfr = LDDFR_8BPP, /* we want 8bit output */
+ .ldpmmr = 0x0070,
+ .ldpspr = 0x0500,
+ .ldaclnr = 0,
+ .ldickr = LDICKR_CLKSRC(LCDC_CLKSRC_EXTERNAL) |
+ LDICKR_CLKDIV(1),
+ .rotate = 0,
+ .novsync = 1,
+ .blank = NULL,
+};
+
+/* SH7760:
+ * 0xFE300800: 256 * 4byte xRGB palette ram
+ * 0xFE300C00: 42 bytes ctrl registers
+ */
+static struct resource sh7760_lcdc_res[] = {
+ [0] = {
+ .start = 0xFE300800,
+ .end = 0xFE300CFF,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = 65,
+ .end = 65,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct platform_device sh7760_lcdc_dev = {
+ .dev = {
+ .platform_data = &sh7760fb_nl6448,
+ },
+ .name = "sh7760-lcdc",
+ .id = -1,
+ .resource = sh7760_lcdc_res,
+ .num_resources = ARRAY_SIZE(sh7760_lcdc_res),
+};
+
+====================== cut here ======================================
diff --git a/Documentation/fb/sisfb.txt b/Documentation/fb/sisfb.txt
new file mode 100644
index 000000000..2e68e503e
--- /dev/null
+++ b/Documentation/fb/sisfb.txt
@@ -0,0 +1,158 @@
+
+What is sisfb?
+==============
+
+sisfb is a framebuffer device driver for SiS (Silicon Integrated Systems)
+graphics chips. Supported are:
+
+- SiS 300 series: SiS 300/305, 540, 630(S), 730(S)
+- SiS 315 series: SiS 315/H/PRO, 55x, (M)65x, 740, (M)661(F/M)X, (M)741(GX)
+- SiS 330 series: SiS 330 ("Xabre"), (M)760
+
+
+Why do I need a framebuffer driver?
+===================================
+
+sisfb is eg. useful if you want a high-resolution text console. Besides that,
+sisfb is required to run DirectFB (which comes with an additional, dedicated
+driver for the 315 series).
+
+On the 300 series, sisfb on kernels older than 2.6.3 furthermore plays an
+important role in connection with DRM/DRI: Sisfb manages the memory heap
+used by DRM/DRI for 3D texture and other data. This memory management is
+required for using DRI/DRM.
+
+Kernels >= around 2.6.3 do not need sisfb any longer for DRI/DRM memory
+management. The SiS DRM driver has been updated and features a memory manager
+of its own (which will be used if sisfb is not compiled). So unless you want
+a graphical console, you don't need sisfb on kernels >=2.6.3.
+
+Sidenote: Since this seems to be a commonly made mistake: sisfb and vesafb
+cannot be active at the same time! Do only select one of them in your kernel
+configuration.
+
+
+How are parameters passed to sisfb?
+===================================
+
+Well, it depends: If compiled statically into the kernel, use lilo's append
+statement to add the parameters to the kernel command line. Please see lilo's
+(or GRUB's) documentation for more information. If sisfb is a kernel module,
+parameters are given with the modprobe (or insmod) command.
+
+Example for sisfb as part of the static kernel: Add the following line to your
+lilo.conf:
+
+ append="video=sisfb:mode:1024x768x16,mem:12288,rate:75"
+
+Example for sisfb as a module: Start sisfb by typing
+
+ modprobe sisfb mode=1024x768x16 rate=75 mem=12288
+
+A common mistake is that folks use a wrong parameter format when using the
+driver compiled into the kernel. Please note: If compiled into the kernel,
+the parameter format is video=sisfb:mode:none or video=sisfb:mode:1024x768x16
+(or whatever mode you want to use, alternatively using any other format
+described above or the vesa keyword instead of mode). If compiled as a module,
+the parameter format reads mode=none or mode=1024x768x16 (or whatever mode you
+want to use). Using a "=" for a ":" (and vice versa) is a huge difference!
+Additionally: If you give more than one argument to the in-kernel sisfb, the
+arguments are separated with ",". For example:
+
+ video=sisfb:mode:1024x768x16,rate:75,mem:12288
+
+
+How do I use it?
+================
+
+Preface statement: This file only covers very little of the driver's
+capabilities and features. Please refer to the author's and maintainer's
+website at http://www.winischhofer.net/linuxsisvga.shtml for more
+information. Additionally, "modinfo sisfb" gives an overview over all
+supported options including some explanation.
+
+The desired display mode can be specified using the keyword "mode" with
+a parameter in one of the following formats:
+ - XxYxDepth or
+ - XxY-Depth or
+ - XxY-Depth@Rate or
+ - XxY
+ - or simply use the VESA mode number in hexadecimal or decimal.
+
+For example: 1024x768x16, 1024x768-16@75, 1280x1024-16. If no depth is
+specified, it defaults to 8. If no rate is given, it defaults to 60Hz. Depth 32
+means 24bit color depth (but 32 bit framebuffer depth, which is not relevant
+to the user).
+
+Additionally, sisfb understands the keyword "vesa" followed by a VESA mode
+number in decimal or hexadecimal. For example: vesa=791 or vesa=0x117. Please
+use either "mode" or "vesa" but not both.
+
+Linux 2.4 only: If no mode is given, sisfb defaults to "no mode" (mode=none) if
+compiled as a module; if sisfb is statically compiled into the kernel, it
+defaults to 800x600x8 unless CRT2 type is LCD, in which case the LCD's native
+resolution is used. If you want to switch to a different mode, use the fbset
+shell command.
+
+Linux 2.6 only: If no mode is given, sisfb defaults to 800x600x8 unless CRT2
+type is LCD, in which case it defaults to the LCD's native resolution. If
+you want to switch to another mode, use the stty shell command.
+
+You should compile in both vgacon (to boot if you remove you SiS card from
+your system) and sisfb (for graphics mode). Under Linux 2.6, also "Framebuffer
+console support" (fbcon) is needed for a graphical console.
+
+You should *not* compile-in vesafb. And please do not use the "vga=" keyword
+in lilo's or grub's configuration file; mode selection is done using the
+"mode" or "vesa" keywords as a parameter. See above and below.
+
+
+X11
+===
+
+If using XFree86 or X.org, it is recommended that you don't use the "fbdev"
+driver but the dedicated "sis" X driver. The "sis" X driver and sisfb are
+developed by the same person (Thomas Winischhofer) and cooperate well with
+each other.
+
+
+SVGALib
+=======
+
+SVGALib, if directly accessing the hardware, never restores the screen
+correctly, especially on laptops or if the output devices are LCD or TV.
+Therefore, use the chipset "FBDEV" in SVGALib configuration. This will make
+SVGALib use the framebuffer device for mode switches and restoration.
+
+
+Configuration
+=============
+
+(Some) accepted options:
+
+off - Disable sisfb. This option is only understood if sisfb is
+ in-kernel, not a module.
+mem:X - size of memory for the console, rest will be used for DRI/DRM. X
+ is in kilobytes. On 300 series, the default is 4096, 8192 or
+ 16384 (each in kilobyte) depending on how much video ram the card
+ has. On 315/330 series, the default is the maximum available ram
+ (since DRI/DRM is not supported for these chipsets).
+noaccel - do not use 2D acceleration engine. (Default: use acceleration)
+noypan - disable y-panning and scroll by redrawing the entire screen.
+ This is much slower than y-panning. (Default: use y-panning)
+vesa:X - selects startup videomode. X is number from 0 to 0x1FF and
+ represents the VESA mode number (can be given in decimal or
+ hexadecimal form, the latter prefixed with "0x").
+mode:X - selects startup videomode. Please see above for the format of
+ "X".
+
+Boolean options such as "noaccel" or "noypan" are to be given without a
+parameter if sisfb is in-kernel (for example "video=sisfb:noypan). If
+sisfb is a module, these are to be set to 1 (for example "modprobe sisfb
+noypan=1").
+
+--
+Thomas Winischhofer <thomas@winischhofer.net>
+May 27, 2004
+
+
diff --git a/Documentation/fb/sm501.txt b/Documentation/fb/sm501.txt
new file mode 100644
index 000000000..187f3b3cc
--- /dev/null
+++ b/Documentation/fb/sm501.txt
@@ -0,0 +1,10 @@
+Configuration:
+
+You can pass the following kernel command line options to sm501 videoframebuffer:
+
+ sm501fb.bpp= SM501 Display driver:
+ Specify bits-per-pixel if not specified by 'mode'
+
+ sm501fb.mode= SM501 Display driver:
+ Specify resolution as
+ "<xres>x<yres>[-<bpp>][@<refresh>]"
diff --git a/Documentation/fb/sstfb.txt b/Documentation/fb/sstfb.txt
new file mode 100644
index 000000000..13db1075e
--- /dev/null
+++ b/Documentation/fb/sstfb.txt
@@ -0,0 +1,174 @@
+
+Introduction
+
+ This is a frame buffer device driver for 3dfx' Voodoo Graphics
+ (aka voodoo 1, aka sst1) and Voodoo² (aka Voodoo 2, aka CVG) based
+ video boards. It's highly experimental code, but is guaranteed to work
+ on my computer, with my "Maxi Gamer 3D" and "Maxi Gamer 3d²" boards,
+ and with me "between chair and keyboard". Some people tested other
+ combinations and it seems that it works.
+ The main page is located at <http://sstfb.sourceforge.net>, and if
+ you want the latest version, check out the CVS, as the driver is a work
+ in progress, I feel uncomfortable with releasing tarballs of something
+ not completely working...Don't worry, it's still more than usable
+ (I eat my own dog food)
+
+ Please read the Bug section, and report any success or failure to me
+ (Ghozlane Toumi <gtoumi@laposte.net>).
+ BTW, If you have only one monitor , and you don't feel like playing
+ with the vga passthrou cable, I can only suggest borrowing a screen
+ somewhere...
+
+
+Installation
+
+ This driver (should) work on ix86, with "late" 2.2.x kernel (tested
+ with x = 19) and "recent" 2.4.x kernel, as a module or compiled in.
+ It has been included in mainstream kernel since the infamous 2.4.10.
+ You can apply the patches found in sstfb/kernel/*-2.{2|4}.x.patch,
+ and copy sstfb.c to linux/drivers/video/, or apply a single patch,
+ sstfb/patch-2.{2|4}.x-sstfb-yymmdd to your linux source tree.
+
+ Then configure your kernel as usual: choose "m" or "y" to 3Dfx Voodoo
+ Graphics in section "console". Compile, install, have fun... and please
+ drop me a report :)
+
+
+Module Usage
+
+ Warnings.
+ # You should read completely this section before issuing any command.
+ # If you have only one monitor to play with, once you insmod the
+ module, the 3dfx takes control of the output, so you'll have to
+ plug the monitor to the "normal" video board in order to issue
+ the commands, or you can blindly use sst_dbg_vgapass
+ in the tools directory (See Tools). The latest solution is pass the
+ parameter vgapass=1 when insmodding the driver. (See Kernel/Modules
+ Options)
+
+ Module insertion:
+ # insmod sstfb.o
+ you should see some strange output from the board:
+ a big blue square, a green and a red small squares and a vertical
+ white rectangle. why? the function's name is self-explanatory:
+ "sstfb_test()"...
+ (if you don't have a second monitor, you'll have to plug your monitor
+ directly to the 2D videocard to see what you're typing)
+ # con2fb /dev/fbx /dev/ttyx
+ bind a tty to the new frame buffer. if you already have a frame
+ buffer driver, the voodoo fb will likely be /dev/fb1. if not,
+ the device will be /dev/fb0. You can check this by doing a
+ cat /proc/fb. You can find a copy of con2fb in tools/ directory.
+ if you don't have another fb device, this step is superfluous,
+ as the console subsystem automagicaly binds ttys to the fb.
+ # switch to the virtual console you just mapped. "tadaaa" ...
+
+ Module removal:
+ # con2fb /dev/fbx /dev/ttyx
+ bind the tty to the old frame buffer so the module can be removed.
+ (how does it work with vgacon ? short answer : it doesn't work)
+ # rmmod sstfb
+
+
+Kernel/Modules Options
+
+ You can pass some options to the sstfb module, and via the kernel
+ command line when the driver is compiled in:
+ for module : insmod sstfb.o option1=value1 option2=value2 ...
+ in kernel : video=sstfb:option1,option2:value2,option3 ...
+
+ sstfb supports the following options :
+
+Module Kernel Description
+
+vgapass=0 vganopass Enable or disable VGA passthrou cable.
+vgapass=1 vgapass When enabled, the monitor will get the signal
+ from the VGA board and not from the voodoo.
+ Default: nopass
+
+mem=x mem:x Force frame buffer memory in MiB
+ allowed values: 0, 1, 2, 4.
+ Default: 0 (= autodetect)
+
+inverse=1 inverse Supposed to enable inverse console.
+ doesn't work yet...
+
+clipping=1 clipping Enable or disable clipping.
+clipping=0 noclipping With clipping enabled, all offscreen
+ reads and writes are discarded.
+ Default: enable clipping.
+
+gfxclk=x gfxclk:x Force graphic clock frequency (in MHz).
+ Be careful with this option, it may be
+ DANGEROUS.
+ Default: auto
+ 50Mhz for Voodoo 1,
+ 75MHz for Voodoo 2.
+
+slowpci=1 fastpci Enable or disable fast PCI read/writes.
+slowpci=1 slowpci Default : fastpci
+
+dev=x dev:x Attach the driver to device number x.
+ 0 is the first compatible board (in
+ lspci order)
+
+Tools
+
+ These tools are mostly for debugging purposes, but you can
+ find some of these interesting :
+ - con2fb , maps a tty to a fbramebuffer .
+ con2fb /dev/fb1 /dev/tty5
+ - sst_dbg_vgapass , changes vga passthrou. You have to recompile the
+ driver with SST_DEBUG and SST_DEBUG_IOCTL set to 1
+ sst_dbg_vgapass /dev/fb1 1 (enables vga cable)
+ sst_dbg_vgapass /dev/fb1 0 (disables vga cable)
+ - glide_reset , resets the voodoo using glide
+ use this after rmmoding sstfb, if the module refuses to
+ reinsert .
+
+Bugs
+
+ - DO NOT use glide while the sstfb module is in, you'll most likely
+ hang your computer.
+ - If you see some artefacts (pixels not cleaning and stuff like that),
+ try turning off clipping (clipping=0), and/or using slowpci
+ - the driver don't detect the 4Mb frame buffer voodoos, it seems that
+ the 2 last Mbs wrap around. looking into that .
+ - The driver is 16 bpp only, 24/32 won't work.
+ - The driver is not your_favorite_toy-safe. this includes SMP...
+ [Actually from inspection it seems to be safe - Alan]
+ - When using XFree86 FBdev (X over fbdev) you may see strange color
+ patterns at the border of your windows (the pixels lose the lowest
+ byte -> basically the blue component and some of the green). I'm unable
+ to reproduce this with XFree86-3.3, but one of the testers has this
+ problem with XFree86-4. Apparently recent Xfree86-4.x solve this
+ problem.
+ - I didn't really test changing the palette, so you may find some weird
+ things when playing with that.
+ - Sometimes the driver will not recognise the DAC, and the
+ initialisation will fail. This is specifically true for
+ voodoo 2 boards, but it should be solved in recent versions. Please
+ contact me.
+ - The 24/32 is not likely to work anytime soon, knowing that the
+ hardware does ... unusual things in 24/32 bpp.
+ - When used with another video board, current limitations of the linux
+ console subsystem can cause some troubles, specifically, you should
+ disable software scrollback, as it can oops badly ...
+
+Todo
+
+ - Get rid of the previous paragraph.
+ - Buy more coffee.
+ - test/port to other arch.
+ - try to add panning using tweeks with front and back buffer .
+ - try to implement accel on voodoo2, this board can actually do a
+ lot in 2D even if it was sold as a 3D only board ...
+
+ghoz.
+
+--
+Ghozlane Toumi <gtoumi@laposte.net>
+
+
+$Date: 2002/05/09 20:11:45 $
+http://sstfb.sourceforge.net/README
diff --git a/Documentation/fb/tgafb.txt b/Documentation/fb/tgafb.txt
new file mode 100644
index 000000000..250083ada
--- /dev/null
+++ b/Documentation/fb/tgafb.txt
@@ -0,0 +1,69 @@
+$Id: tgafb.txt,v 1.1.2.2 2000/04/04 06:50:18 mato Exp $
+
+What is tgafb?
+===============
+
+This is a driver for DECChip 21030 based graphics framebuffers, a.k.a. TGA
+cards, which are usually found in older Digital Alpha systems. The
+following models are supported:
+
+ZLxP-E1 (8bpp, 2 MB VRAM)
+ZLxP-E2 (32bpp, 8 MB VRAM)
+ZLxP-E3 (32bpp, 16 MB VRAM, Zbuffer)
+
+This version is an almost complete rewrite of the code written by Geert
+Uytterhoeven, which was based on the original TGA console code written by
+Jay Estabrook.
+
+Major new features since Linux 2.0.x:
+
+ * Support for multiple resolutions
+ * Support for fixed-frequency and other oddball monitors
+ (by allowing the video mode to be set at boot time)
+
+User-visible changes since Linux 2.2.x:
+
+ * Sync-on-green is now handled properly
+ * More useful information is printed on bootup
+ (this helps if people run into problems)
+
+This driver does not (yet) support the TGA2 family of framebuffers, so the
+PowerStorm 3D30/4D20 (also known as PBXGB) cards are not supported. These
+can however be used with the standard VGA Text Console driver.
+
+
+Configuration
+=============
+
+You can pass kernel command line options to tgafb with
+`video=tgafb:option1,option2:value2,option3' (multiple options should be
+separated by comma, values are separated from options by `:').
+Accepted options:
+
+font:X - default font to use. All fonts are supported, including the
+ SUN12x22 font which is very nice at high resolutions.
+
+mode:X - default video mode. The following video modes are supported:
+ 640x480-60, 800x600-56, 640x480-72, 800x600-60, 800x600-72,
+ 1024x768-60, 1152x864-60, 1024x768-70, 1024x768-76,
+ 1152x864-70, 1280x1024-61, 1024x768-85, 1280x1024-70,
+ 1152x864-84, 1280x1024-76, 1280x1024-85
+
+
+Known Issues
+============
+
+The XFree86 FBDev server has been reported not to work, since tgafb doesn't do
+mmap(). Running the standard XF86_TGA server from XFree86 3.3.x works fine for
+me, however this server does not do acceleration, which make certain operations
+quite slow. Support for acceleration is being progressively integrated in
+XFree86 4.x.
+
+When running tgafb in resolutions higher than 640x480, on switching VCs from
+tgafb to XF86_TGA 3.3.x, the entire screen is not re-drawn and must be manually
+refreshed. This is an X server problem, not a tgafb problem, and is fixed in
+XFree86 4.0.
+
+Enjoy!
+
+Martin Lucina <mato@kotelna.sk>
diff --git a/Documentation/fb/tridentfb.txt b/Documentation/fb/tridentfb.txt
new file mode 100644
index 000000000..45d9de5b1
--- /dev/null
+++ b/Documentation/fb/tridentfb.txt
@@ -0,0 +1,70 @@
+Tridentfb is a framebuffer driver for some Trident chip based cards.
+
+The following list of chips is thought to be supported although not all are
+tested:
+
+those from the TGUI series 9440/96XX and with Cyber in their names
+those from the Image series and with Cyber in their names
+those with Blade in their names (Blade3D,CyberBlade...)
+the newer CyberBladeXP family
+
+All families are accelerated. Only PCI/AGP based cards are supported,
+none of the older Tridents.
+The driver supports 8, 16 and 32 bits per pixel depths.
+The TGUI family requires a line length to be power of 2 if acceleration
+is enabled. This means that range of possible resolutions and bpp is
+limited comparing to the range if acceleration is disabled (see list
+of parameters below).
+
+Known bugs:
+1. The driver randomly locks up on 3DImage975 chip with acceleration
+ enabled. The same happens in X11 (Xorg).
+2. The ramdac speeds require some more fine tuning. It is possible to
+ switch resolution which the chip does not support at some depths for
+ older chips.
+
+How to use it?
+==============
+
+When booting you can pass the video parameter.
+video=tridentfb
+
+The parameters for tridentfb are concatenated with a ':' as in this example.
+
+video=tridentfb:800x600-16@75,noaccel
+
+The second level parameters that tridentfb understands are:
+
+noaccel - turns off acceleration (when it doesn't work for your card)
+
+fp - use flat panel related stuff
+crt - assume monitor is present instead of fp
+
+center - for flat panels and resolutions smaller than native size center the
+ image, otherwise use
+stretch
+
+memsize - integer value in KB, use if your card's memory size is misdetected.
+ look at the driver output to see what it says when initializing.
+
+memdiff - integer value in KB, should be nonzero if your card reports
+ more memory than it actually has. For instance mine is 192K less than
+ detection says in all three BIOS selectable situations 2M, 4M, 8M.
+ Only use if your video memory is taken from main memory hence of
+ configurable size. Otherwise use memsize.
+ If in some modes which barely fit the memory you see garbage
+ at the bottom this might help by not letting change to that mode
+ anymore.
+
+nativex - the width in pixels of the flat panel.If you know it (usually 1024
+ 800 or 1280) and it is not what the driver seems to detect use it.
+
+bpp - bits per pixel (8,16 or 32)
+mode - a mode name like 800x600-8@75 as described in
+ Documentation/fb/modedb.txt
+
+Using insane values for the above parameters will probably result in driver
+misbehaviour so take care(for instance memsize=12345678 or memdiff=23784 or
+nativex=93)
+
+Contact: jani@astechnix.ro
diff --git a/Documentation/fb/udlfb.txt b/Documentation/fb/udlfb.txt
new file mode 100644
index 000000000..57d2f2908
--- /dev/null
+++ b/Documentation/fb/udlfb.txt
@@ -0,0 +1,159 @@
+
+What is udlfb?
+===============
+
+This is a driver for DisplayLink USB 2.0 era graphics chips.
+
+DisplayLink chips provide simple hline/blit operations with some compression,
+pairing that with a hardware framebuffer (16MB) on the other end of the
+USB wire. That hardware framebuffer is able to drive the VGA, DVI, or HDMI
+monitor with no CPU involvement until a pixel has to change.
+
+The CPU or other local resource does all the rendering; optinally compares the
+result with a local shadow of the remote hardware framebuffer to identify
+the minimal set of pixels that have changed; and compresses and sends those
+pixels line-by-line via USB bulk transfers.
+
+Because of the efficiency of bulk transfers and a protocol on top that
+does not require any acks - the effect is very low latency that
+can support surprisingly high resolutions with good performance for
+non-gaming and non-video applications.
+
+Mode setting, EDID read, etc are other bulk or control transfers. Mode
+setting is very flexible - able to set nearly arbitrary modes from any timing.
+
+Advantages of USB graphics in general:
+
+ * Ability to add a nearly arbitrary number of displays to any USB 2.0
+ capable system. On Linux, number of displays is limited by fbdev interface
+ (FB_MAX is currently 32). Of course, all USB devices on the same
+ host controller share the same 480Mbs USB 2.0 interface.
+
+Advantages of supporting DisplayLink chips with kernel framebuffer interface:
+
+ * The actual hardware functionality of DisplayLink chips matches nearly
+ one-to-one with the fbdev interface, making the driver quite small and
+ tight relative to the functionality it provides.
+ * X servers and other applications can use the standard fbdev interface
+ from user mode to talk to the device, without needing to know anything
+ about USB or DisplayLink's protocol at all. A "displaylink" X driver
+ and a slightly modified "fbdev" X driver are among those that already do.
+
+Disadvantages:
+
+ * Fbdev's mmap interface assumes a real hardware framebuffer is mapped.
+ In the case of USB graphics, it is just an allocated (virtual) buffer.
+ Writes need to be detected and encoded into USB bulk transfers by the CPU.
+ Accurate damage/changed area notifications work around this problem.
+ In the future, hopefully fbdev will be enhanced with an small standard
+ interface to allow mmap clients to report damage, for the benefit
+ of virtual or remote framebuffers.
+ * Fbdev does not arbitrate client ownership of the framebuffer well.
+ * Fbcon assumes the first framebuffer it finds should be consumed for console.
+ * It's not clear what the future of fbdev is, given the rise of KMS/DRM.
+
+How to use it?
+==============
+
+Udlfb, when loaded as a module, will match against all USB 2.0 generation
+DisplayLink chips (Alex and Ollie family). It will then attempt to read the EDID
+of the monitor, and set the best common mode between the DisplayLink device
+and the monitor's capabilities.
+
+If the DisplayLink device is successful, it will paint a "green screen" which
+means that from a hardware and fbdev software perspective, everything is good.
+
+At that point, a /dev/fb? interface will be present for user-mode applications
+to open and begin writing to the framebuffer of the DisplayLink device using
+standard fbdev calls. Note that if mmap() is used, by default the user mode
+application must send down damage notifcations to trigger repaints of the
+changed regions. Alternatively, udlfb can be recompiled with experimental
+defio support enabled, to support a page-fault based detection mechanism
+that can work without explicit notifcation.
+
+The most common client of udlfb is xf86-video-displaylink or a modified
+xf86-video-fbdev X server. These servers have no real DisplayLink specific
+code. They write to the standard framebuffer interface and rely on udlfb
+to do its thing. The one extra feature they have is the ability to report
+rectangles from the X DAMAGE protocol extension down to udlfb via udlfb's
+damage interface (which will hopefully be standardized for all virtual
+framebuffers that need damage info). These damage notifications allow
+udlfb to efficiently process the changed pixels.
+
+Module Options
+==============
+
+Special configuration for udlfb is usually unnecessary. There are a few
+options, however.
+
+From the command line, pass options to modprobe
+modprobe udlfb fb_defio=0 console=1 shadow=1
+
+Or modify options on the fly at /sys/module/udlfb/parameters directory via
+sudo nano fb_defio
+change the parameter in place, and save the file.
+
+Unplug/replug USB device to apply with new settings
+
+Or for permanent option, create file like /etc/modprobe.d/udlfb.conf with text
+options udlfb fb_defio=0 console=1 shadow=1
+
+Accepted boolean options:
+
+fb_defio Make use of the fb_defio (CONFIG_FB_DEFERRED_IO) kernel
+ module to track changed areas of the framebuffer by page faults.
+ Standard fbdev applications that use mmap but that do not
+ report damage, should be able to work with this enabled.
+ Disable when running with X server that supports reporting
+ changed regions via ioctl, as this method is simpler,
+ more stable, and higher performance.
+ default: fb_defio=1
+
+console Allow fbcon to attach to udlfb provided framebuffers.
+ Can be disabled if fbcon and other clients
+ (e.g. X with --shared-vt) are in conflict.
+ default: console=1
+
+shadow Allocate a 2nd framebuffer to shadow what's currently across
+ the USB bus in device memory. If any pixels are unchanged,
+ do not transmit. Spends host memory to save USB transfers.
+ Enabled by default. Only disable on very low memory systems.
+ default: shadow=1
+
+Sysfs Attributes
+================
+
+Udlfb creates several files in /sys/class/graphics/fb?
+Where ? is the sequential framebuffer id of the particular DisplayLink device
+
+edid If a valid EDID blob is written to this file (typically
+ by a udev rule), then udlfb will use this EDID as a
+ backup in case reading the actual EDID of the monitor
+ attached to the DisplayLink device fails. This is
+ especially useful for fixed panels, etc. that cannot
+ communicate their capabilities via EDID. Reading
+ this file returns the current EDID of the attached
+ monitor (or last backup value written). This is
+ useful to get the EDID of the attached monitor,
+ which can be passed to utilities like parse-edid.
+
+metrics_bytes_rendered 32-bit count of pixel bytes rendered
+
+metrics_bytes_identical 32-bit count of how many of those bytes were found to be
+ unchanged, based on a shadow framebuffer check
+
+metrics_bytes_sent 32-bit count of how many bytes were transferred over
+ USB to communicate the resulting changed pixels to the
+ hardware. Includes compression and protocol overhead
+
+metrics_cpu_kcycles_used 32-bit count of CPU cycles used in processing the
+ above pixels (in thousands of cycles).
+
+metrics_reset Write-only. Any write to this file resets all metrics
+ above to zero. Note that the 32-bit counters above
+ roll over very quickly. To get reliable results, design
+ performance tests to start and finish in a very short
+ period of time (one minute or less is safe).
+
+--
+Bernie Thompson <bernie@plugable.com>
diff --git a/Documentation/fb/uvesafb.txt b/Documentation/fb/uvesafb.txt
new file mode 100644
index 000000000..f6362d887
--- /dev/null
+++ b/Documentation/fb/uvesafb.txt
@@ -0,0 +1,183 @@
+
+uvesafb - A Generic Driver for VBE2+ compliant video cards
+==========================================================
+
+1. Requirements
+---------------
+
+uvesafb should work with any video card that has a Video BIOS compliant
+with the VBE 2.0 standard.
+
+Unlike other drivers, uvesafb makes use of a userspace helper called
+v86d. v86d is used to run the x86 Video BIOS code in a simulated and
+controlled environment. This allows uvesafb to function on arches other
+than x86. Check the v86d documentation for a list of currently supported
+arches.
+
+v86d source code can be downloaded from the following website:
+ http://dev.gentoo.org/~spock/projects/uvesafb
+
+Please refer to the v86d documentation for detailed configuration and
+installation instructions.
+
+Note that the v86d userspace helper has to be available at all times in
+order for uvesafb to work properly. If you want to use uvesafb during
+early boot, you will have to include v86d into an initramfs image, and
+either compile it into the kernel or use it as an initrd.
+
+2. Caveats and limitations
+--------------------------
+
+uvesafb is a _generic_ driver which supports a wide variety of video
+cards, but which is ultimately limited by the Video BIOS interface.
+The most important limitations are:
+
+- Lack of any type of acceleration.
+- A strict and limited set of supported video modes. Often the native
+ or most optimal resolution/refresh rate for your setup will not work
+ with uvesafb, simply because the Video BIOS doesn't support the
+ video mode you want to use. This can be especially painful with
+ widescreen panels, where native video modes don't have the 4:3 aspect
+ ratio, which is what most BIOS-es are limited to.
+- Adjusting the refresh rate is only possible with a VBE 3.0 compliant
+ Video BIOS. Note that many nVidia Video BIOS-es claim to be VBE 3.0
+ compliant, while they simply ignore any refresh rate settings.
+
+3. Configuration
+----------------
+
+uvesafb can be compiled either as a module, or directly into the kernel.
+In both cases it supports the same set of configuration options, which
+are either given on the kernel command line or as module parameters, e.g.:
+
+ video=uvesafb:1024x768-32,mtrr:3,ywrap (compiled into the kernel)
+
+ # modprobe uvesafb mode_option=1024x768-32 mtrr=3 scroll=ywrap (module)
+
+Accepted options:
+
+ypan Enable display panning using the VESA protected mode
+ interface. The visible screen is just a window of the
+ video memory, console scrolling is done by changing the
+ start of the window. This option is available on x86
+ only and is the default option on that architecture.
+
+ywrap Same as ypan, but assumes your gfx board can wrap-around
+ the video memory (i.e. starts reading from top if it
+ reaches the end of video memory). Faster than ypan.
+ Available on x86 only.
+
+redraw Scroll by redrawing the affected part of the screen, this
+ is the default on non-x86.
+
+(If you're using uvesafb as a module, the above three options are
+ used a parameter of the scroll option, e.g. scroll=ypan.)
+
+vgapal Use the standard VGA registers for palette changes.
+
+pmipal Use the protected mode interface for palette changes.
+ This is the default if the protected mode interface is
+ available. Available on x86 only.
+
+mtrr:n Setup memory type range registers for the framebuffer
+ where n:
+ 0 - disabled (equivalent to nomtrr)
+ 3 - write-combining (default)
+
+ Values other than 0 and 3 will result in a warning and will be
+ treated just like 3.
+
+nomtrr Do not use memory type range registers.
+
+vremap:n
+ Remap 'n' MiB of video RAM. If 0 or not specified, remap memory
+ according to video mode.
+
+vtotal:n
+ If the video BIOS of your card incorrectly determines the total
+ amount of video RAM, use this option to override the BIOS (in MiB).
+
+<mode> The mode you want to set, in the standard modedb format. Refer to
+ modedb.txt for a detailed description. When uvesafb is compiled as
+ a module, the mode string should be provided as a value of the
+ 'mode_option' option.
+
+vbemode:x
+ Force the use of VBE mode x. The mode will only be set if it's
+ found in the VBE-provided list of supported modes.
+ NOTE: The mode number 'x' should be specified in VESA mode number
+ notation, not the Linux kernel one (eg. 257 instead of 769).
+ HINT: If you use this option because normal <mode> parameter does
+ not work for you and you use a X server, you'll probably want to
+ set the 'nocrtc' option to ensure that the video mode is properly
+ restored after console <-> X switches.
+
+nocrtc Do not use CRTC timings while setting the video mode. This option
+ has any effect only if the Video BIOS is VBE 3.0 compliant. Use it
+ if you have problems with modes set the standard way. Note that
+ using this option implies that any refresh rate adjustments will
+ be ignored and the refresh rate will stay at your BIOS default (60 Hz).
+
+noedid Do not try to fetch and use EDID-provided modes.
+
+noblank Disable hardware blanking.
+
+v86d:path
+ Set path to the v86d executable. This option is only available as
+ a module parameter, and not as a part of the video= string. If you
+ need to use it and have uvesafb built into the kernel, use
+ uvesafb.v86d="path".
+
+Additionally, the following parameters may be provided. They all override the
+EDID-provided values and BIOS defaults. Refer to your monitor's specs to get
+the correct values for maxhf, maxvf and maxclk for your hardware.
+
+maxhf:n Maximum horizontal frequency (in kHz).
+maxvf:n Maximum vertical frequency (in Hz).
+maxclk:n Maximum pixel clock (in MHz).
+
+4. The sysfs interface
+----------------------
+
+uvesafb provides several sysfs nodes for configurable parameters and
+additional information.
+
+Driver attributes:
+
+/sys/bus/platform/drivers/uvesafb
+ - v86d (default: /sbin/v86d)
+ Path to the v86d executable. v86d is started by uvesafb
+ if an instance of the daemon isn't already running.
+
+Device attributes:
+
+/sys/bus/platform/drivers/uvesafb/uvesafb.0
+ - nocrtc
+ Use the default refresh rate (60 Hz) if set to 1.
+
+ - oem_product_name
+ - oem_product_rev
+ - oem_string
+ - oem_vendor
+ Information about the card and its maker.
+
+ - vbe_modes
+ A list of video modes supported by the Video BIOS along with their
+ VBE mode numbers in hex.
+
+ - vbe_version
+ A BCD value indicating the implemented VBE standard.
+
+5. Miscellaneous
+----------------
+
+Uvesafb will set a video mode with the default refresh rate and timings
+from the Video BIOS if you set pixclock to 0 in fb_var_screeninfo.
+
+
+--
+ Michal Januszewski <spock@gentoo.org>
+ Last updated: 2009-03-30
+
+ Documentation of the uvesafb options is loosely based on vesafb.txt.
+
diff --git a/Documentation/fb/vesafb.txt b/Documentation/fb/vesafb.txt
new file mode 100644
index 000000000..950d5a658
--- /dev/null
+++ b/Documentation/fb/vesafb.txt
@@ -0,0 +1,181 @@
+
+What is vesafb?
+===============
+
+This is a generic driver for a graphic framebuffer on intel boxes.
+
+The idea is simple: Turn on graphics mode at boot time with the help
+of the BIOS, and use this as framebuffer device /dev/fb0, like the m68k
+(and other) ports do.
+
+This means we decide at boot time whenever we want to run in text or
+graphics mode. Switching mode later on (in protected mode) is
+impossible; BIOS calls work in real mode only. VESA BIOS Extensions
+Version 2.0 are required, because we need a linear frame buffer.
+
+Advantages:
+
+ * It provides a nice large console (128 cols + 48 lines with 1024x768)
+ without using tiny, unreadable fonts.
+ * You can run XF68_FBDev on top of /dev/fb0 (=> non-accelerated X11
+ support for every VBE 2.0 compliant graphics board).
+ * Most important: boot logo :-)
+
+Disadvantages:
+
+ * graphic mode is slower than text mode...
+
+
+How to use it?
+==============
+
+Switching modes is done using the vga=... boot parameter. Read
+Documentation/svga.txt for details.
+
+You should compile in both vgacon (for text mode) and vesafb (for
+graphics mode). Which of them takes over the console depends on
+whenever the specified mode is text or graphics.
+
+The graphic modes are NOT in the list which you get if you boot with
+vga=ask and hit return. The mode you wish to use is derived from the
+VESA mode number. Here are those VESA mode numbers:
+
+ | 640x480 800x600 1024x768 1280x1024
+----+-------------------------------------
+256 | 0x101 0x103 0x105 0x107
+32k | 0x110 0x113 0x116 0x119
+64k | 0x111 0x114 0x117 0x11A
+16M | 0x112 0x115 0x118 0x11B
+
+The video mode number of the Linux kernel is the VESA mode number plus
+0x200.
+
+ Linux_kernel_mode_number = VESA_mode_number + 0x200
+
+So the table for the Kernel mode numbers are:
+
+ | 640x480 800x600 1024x768 1280x1024
+----+-------------------------------------
+256 | 0x301 0x303 0x305 0x307
+32k | 0x310 0x313 0x316 0x319
+64k | 0x311 0x314 0x317 0x31A
+16M | 0x312 0x315 0x318 0x31B
+
+To enable one of those modes you have to specify "vga=ask" in the
+lilo.conf file and rerun LILO. Then you can type in the desired
+mode at the "vga=ask" prompt. For example if you like to use
+1024x768x256 colors you have to say "305" at this prompt.
+
+If this does not work, this might be because your BIOS does not support
+linear framebuffers or because it does not support this mode at all.
+Even if your board does, it might be the BIOS which does not. VESA BIOS
+Extensions v2.0 are required, 1.2 is NOT sufficient. You will get a
+"bad mode number" message if something goes wrong.
+
+1. Note: LILO cannot handle hex, for booting directly with
+ "vga=mode-number" you have to transform the numbers to decimal.
+2. Note: Some newer versions of LILO appear to work with those hex values,
+ if you set the 0x in front of the numbers.
+
+X11
+===
+
+XF68_FBDev should work just fine, but it is non-accelerated. Running
+another (accelerated) X-Server like XF86_SVGA might or might not work.
+It depends on X-Server and graphics board.
+
+The X-Server must restore the video mode correctly, else you end up
+with a broken console (and vesafb cannot do anything about this).
+
+
+Refresh rates
+=============
+
+There is no way to change the vesafb video mode and/or timings after
+booting linux. If you are not happy with the 60 Hz refresh rate, you
+have these options:
+
+ * configure and load the DOS-Tools for the graphics board (if
+ available) and boot linux with loadlin.
+ * use a native driver (matroxfb/atyfb) instead if vesafb. If none
+ is available, write a new one!
+ * VBE 3.0 might work too. I have neither a gfx board with VBE 3.0
+ support nor the specs, so I have not checked this yet.
+
+
+Configuration
+=============
+
+The VESA BIOS provides protected mode interface for changing
+some parameters. vesafb can use it for palette changes and
+to pan the display. It is turned off by default because it
+seems not to work with some BIOS versions, but there are options
+to turn it on.
+
+You can pass options to vesafb using "video=vesafb:option" on
+the kernel command line. Multiple options should be separated
+by comma, like this: "video=vesafb:ypan,invers"
+
+Accepted options:
+
+invers no comment...
+
+ypan enable display panning using the VESA protected mode
+ interface. The visible screen is just a window of the
+ video memory, console scrolling is done by changing the
+ start of the window.
+ pro: * scrolling (fullscreen) is fast, because there is
+ no need to copy around data.
+ * You'll get scrollback (the Shift-PgUp thing),
+ the video memory can be used as scrollback buffer
+ kontra: * scrolling only parts of the screen causes some
+ ugly flicker effects (boot logo flickers for
+ example).
+
+ywrap Same as ypan, but assumes your gfx board can wrap-around
+ the video memory (i.e. starts reading from top if it
+ reaches the end of video memory). Faster than ypan.
+
+redraw scroll by redrawing the affected part of the screen, this
+ is the safe (and slow) default.
+
+
+vgapal Use the standard vga registers for palette changes.
+ This is the default.
+pmipal Use the protected mode interface for palette changes.
+
+mtrr:n setup memory type range registers for the vesafb framebuffer
+ where n:
+ 0 - disabled (equivalent to nomtrr) (default)
+ 1 - uncachable
+ 2 - write-back
+ 3 - write-combining
+ 4 - write-through
+
+ If you see the following in dmesg, choose the type that matches the
+ old one. In this example, use "mtrr:2".
+...
+mtrr: type mismatch for e0000000,8000000 old: write-back new: write-combining
+...
+
+nomtrr disable mtrr
+
+vremap:n
+ remap 'n' MiB of video RAM. If 0 or not specified, remap memory
+ according to video mode. (2.5.66 patch/idea by Antonino Daplas
+ reversed to give override possibility (allocate more fb memory
+ than the kernel would) to 2.4 by tmb@iki.fi)
+
+vtotal:n
+ if the video BIOS of your card incorrectly determines the total
+ amount of video RAM, use this option to override the BIOS (in MiB).
+
+Have fun!
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@goldbach.in-berlin.de>
+
+Minor (mostly typo) changes
+by Nico Schmoigl <schmoigl@rumms.uni-mannheim.de>
diff --git a/Documentation/fb/viafb.modes b/Documentation/fb/viafb.modes
new file mode 100644
index 000000000..2a547da2e
--- /dev/null
+++ b/Documentation/fb/viafb.modes
@@ -0,0 +1,870 @@
+#
+#
+# These data are based on the CRTC parameters in
+#
+# VIA Integration Graphics Chip
+# (C) 2004 VIA Technologies Inc.
+#
+
+#
+# 640x480, 60 Hz, Non-Interlaced (25.175 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 640 480
+# Scan Frequency 31.469 kHz 59.94 Hz
+# Sync Width 3.813 us 0.064 ms
+# 12 chars 2 lines
+# Front Porch 0.636 us 0.318 ms
+# 2 chars 10 lines
+# Back Porch 1.907 us 1.048 ms
+# 6 chars 33 lines
+# Active Time 25.422 us 15.253 ms
+# 80 chars 480 lines
+# Blank Time 6.356 us 1.430 ms
+# 20 chars 45 lines
+# Polarity negative negative
+#
+
+mode "640x480-60"
+# D: 25.175 MHz, H: 31.469 kHz, V: 59.94 Hz
+ geometry 640 480 640 480 32
+ timings 39722 48 16 33 10 96 2 endmode mode "480x640-60"
+# D: 24.823 MHz, H: 39.780 kHz, V: 60.00 Hz
+ geometry 480 640 480 640 32 timings 39722 72 24 19 1 48 3 endmode
+#
+# 640x480, 75 Hz, Non-Interlaced (31.50 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 640 480
+# Scan Frequency 37.500 kHz 75.00 Hz
+# Sync Width 2.032 us 0.080 ms
+# 8 chars 3 lines
+# Front Porch 0.508 us 0.027 ms
+# 2 chars 1 lines
+# Back Porch 3.810 us 0.427 ms
+# 15 chars 16 lines
+# Active Time 20.317 us 12.800 ms
+# 80 chars 480 lines
+# Blank Time 6.349 us 0.533 ms
+# 25 chars 20 lines
+# Polarity negative negative
+#
+ mode "640x480-75"
+# D: 31.50 MHz, H: 37.500 kHz, V: 75.00 Hz
+ geometry 640 480 640 480 32 timings 31747 120 16 16 1 64 3 endmode
+#
+# 640x480, 85 Hz, Non-Interlaced (36.000 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 640 480
+# Scan Frequency 43.269 kHz 85.00 Hz
+# Sync Width 1.556 us 0.069 ms
+# 7 chars 3 lines
+# Front Porch 1.556 us 0.023 ms
+# 7 chars 1 lines
+# Back Porch 2.222 us 0.578 ms
+# 10 chars 25 lines
+# Active Time 17.778 us 11.093 ms
+# 80 chars 480 lines
+# Blank Time 5.333 us 0.670 ms
+# 24 chars 29 lines
+# Polarity negative negative
+#
+ mode "640x480-85"
+# D: 36.000 MHz, H: 43.269 kHz, V: 85.00 Hz
+ geometry 640 480 640 480 32 timings 27777 80 56 25 1 56 3 endmode
+#
+# 640x480, 100 Hz, Non-Interlaced (43.163 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 640 480
+# Scan Frequency 50.900 kHz 100.00 Hz
+# Sync Width 1.483 us 0.058 ms
+# 8 chars 3 lines
+# Front Porch 0.927 us 0.019 ms
+# 5 chars 1 lines
+# Back Porch 2.409 us 0.475 ms
+# 13 chars 25 lines
+# Active Time 14.827 us 9.430 ms
+# 80 chars 480 lines
+# Blank Time 4.819 us 0.570 ms
+# 26 chars 29 lines
+# Polarity positive positive
+#
+ mode "640x480-100"
+# D: 43.163 MHz, H: 50.900 kHz, V: 100.00 Hz
+ geometry 640 480 640 480 32 timings 23168 104 40 25 1 64 3 endmode
+#
+# 640x480, 120 Hz, Non-Interlaced (52.406 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 640 480
+# Scan Frequency 61.800 kHz 120.00 Hz
+# Sync Width 1.221 us 0.048 ms
+# 8 chars 3 lines
+# Front Porch 0.763 us 0.016 ms
+# 5 chars 1 lines
+# Back Porch 1.984 us 0.496 ms
+# 13 chars 31 lines
+# Active Time 12.212 us 7.767 ms
+# 80 chars 480 lines
+# Blank Time 3.969 us 0.566 ms
+# 26 chars 35 lines
+# Polarity positive positive
+#
+ mode "640x480-120"
+# D: 52.406 MHz, H: 61.800 kHz, V: 120.00 Hz
+ geometry 640 480 640 480 32 timings 19081 104 40 31 1 64 3 endmode
+#
+# 720x480, 60 Hz, Non-Interlaced (26.880 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 720 480
+# Scan Frequency 30.000 kHz 60.241 Hz
+# Sync Width 2.679 us 0.099 ms
+# 9 chars 3 lines
+# Front Porch 0.595 us 0.033 ms
+# 2 chars 1 lines
+# Back Porch 3.274 us 0.462 ms
+# 11 chars 14 lines
+# Active Time 26.786 us 16.000 ms
+# 90 chars 480 lines
+# Blank Time 6.548 us 0.600 ms
+# 22 chars 18 lines
+# Polarity positive positive
+#
+ mode "720x480-60"
+# D: 26.880 MHz, H: 30.000 kHz, V: 60.24 Hz
+ geometry 720 480 720 480 32 timings 37202 88 16 14 1 72 3 endmode
+#
+# 800x480, 60 Hz, Non-Interlaced (29.581 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 800 480
+# Scan Frequency 29.892 kHz 60.00 Hz
+# Sync Width 2.704 us 100.604 us
+# 10 chars 3 lines
+# Front Porch 0.541 us 33.535 us
+# 2 chars 1 lines
+# Back Porch 3.245 us 435.949 us
+# 12 chars 13 lines
+# Active Time 27.044 us 16.097 ms
+# 100 chars 480 lines
+# Blank Time 6.491 us 0.570 ms
+# 24 chars 17 lines
+# Polarity positive positive
+#
+ mode "800x480-60"
+# D: 29.500 MHz, H: 29.738 kHz, V: 60.00 Hz
+ geometry 800 480 800 480 32 timings 33805 96 24 10 3 72 7 endmode
+#
+# 720x576, 60 Hz, Non-Interlaced (32.668 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 720 576
+# Scan Frequency 35.820 kHz 60.00 Hz
+# Sync Width 2.204 us 0.083 ms
+# 9 chars 3 lines
+# Front Porch 0.735 us 0.027 ms
+# 3 chars 1 lines
+# Back Porch 2.939 us 0.459 ms
+# 12 chars 17 lines
+# Active Time 22.040 us 16.080 ms
+# 90 chars 476 lines
+# Blank Time 5.877 us 0.586 ms
+# 24 chars 21 lines
+# Polarity positive positive
+#
+ mode "720x576-60"
+# D: 32.668 MHz, H: 35.820 kHz, V: 60.00 Hz
+ geometry 720 576 720 576 32 timings 30611 96 24 17 1 72 3 endmode
+#
+# 800x600, 60 Hz, Non-Interlaced (40.00 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 800 600
+# Scan Frequency 37.879 kHz 60.32 Hz
+# Sync Width 3.200 us 0.106 ms
+# 16 chars 4 lines
+# Front Porch 1.000 us 0.026 ms
+# 5 chars 1 lines
+# Back Porch 2.200 us 0.607 ms
+# 11 chars 23 lines
+# Active Time 20.000 us 15.840 ms
+# 100 chars 600 lines
+# Blank Time 6.400 us 0.739 ms
+# 32 chars 28 lines
+# Polarity positive positive
+#
+ mode "800x600-60"
+# D: 40.00 MHz, H: 37.879 kHz, V: 60.32 Hz
+ geometry 800 600 800 600 32
+ timings 25000 88 40 23 1 128 4 hsync high vsync high endmode
+#
+# 800x600, 75 Hz, Non-Interlaced (49.50 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 800 600
+# Scan Frequency 46.875 kHz 75.00 Hz
+# Sync Width 1.616 us 0.064 ms
+# 10 chars 3 lines
+# Front Porch 0.323 us 0.021 ms
+# 2 chars 1 lines
+# Back Porch 3.232 us 0.448 ms
+# 20 chars 21 lines
+# Active Time 16.162 us 12.800 ms
+# 100 chars 600 lines
+# Blank Time 5.172 us 0.533 ms
+# 32 chars 25 lines
+# Polarity positive positive
+#
+ mode "800x600-75"
+# D: 49.50 MHz, H: 46.875 kHz, V: 75.00 Hz
+ geometry 800 600 800 600 32
+ timings 20203 160 16 21 1 80 3 hsync high vsync high endmode
+#
+# 800x600, 85 Hz, Non-Interlaced (56.25 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 800 600
+# Scan Frequency 53.674 kHz 85.061 Hz
+# Sync Width 1.138 us 0.056 ms
+# 8 chars 3 lines
+# Front Porch 0.569 us 0.019 ms
+# 4 chars 1 lines
+# Back Porch 2.702 us 0.503 ms
+# 19 chars 27 lines
+# Active Time 14.222 us 11.179 ms
+# 100 chars 600 lines
+# Blank Time 4.409 us 0.578 ms
+# 31 chars 31 lines
+# Polarity positive positive
+#
+ mode "800x600-85"
+# D: 56.25 MHz, H: 53.674 kHz, V: 85.061 Hz
+ geometry 800 600 800 600 32
+ timings 17777 152 32 27 1 64 3 hsync high vsync high endmode
+#
+# 800x600, 100 Hz, Non-Interlaced (67.50 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 800 600
+# Scan Frequency 62.500 kHz 100.00 Hz
+# Sync Width 0.948 us 0.064 ms
+# 8 chars 4 lines
+# Front Porch 0.000 us 0.112 ms
+# 0 chars 7 lines
+# Back Porch 3.200 us 0.224 ms
+# 27 chars 14 lines
+# Active Time 11.852 us 9.600 ms
+# 100 chars 600 lines
+# Blank Time 4.148 us 0.400 ms
+# 35 chars 25 lines
+# Polarity positive positive
+#
+ mode "800x600-100"
+# D: 67.50 MHz, H: 62.500 kHz, V: 100.00 Hz
+ geometry 800 600 800 600 32
+ timings 14667 216 0 14 7 64 4 hsync high vsync high endmode
+#
+# 800x600, 120 Hz, Non-Interlaced (83.950 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 800 600
+# Scan Frequency 77.160 kHz 120.00 Hz
+# Sync Width 1.048 us 0.039 ms
+# 11 chars 3 lines
+# Front Porch 0.667 us 0.013 ms
+# 7 chars 1 lines
+# Back Porch 1.715 us 0.507 ms
+# 18 chars 39 lines
+# Active Time 9.529 us 7.776 ms
+# 100 chars 600 lines
+# Blank Time 3.431 us 0.557 ms
+# 36 chars 43 lines
+# Polarity positive positive
+#
+ mode "800x600-120"
+# D: 83.950 MHz, H: 77.160 kHz, V: 120.00 Hz
+ geometry 800 600 800 600 32
+ timings 11912 144 56 39 1 88 3 hsync high vsync high endmode
+#
+# 848x480, 60 Hz, Non-Interlaced (31.490 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 848 480
+# Scan Frequency 29.820 kHz 60.00 Hz
+# Sync Width 2.795 us 0.099 ms
+# 11 chars 3 lines
+# Front Porch 0.508 us 0.033 ms
+# 2 chars 1 lines
+# Back Porch 3.303 us 0.429 ms
+# 13 chars 13 lines
+# Active Time 26.929 us 16.097 ms
+# 106 chars 480 lines
+# Blank Time 6.605 us 0.570 ms
+# 26 chars 17 lines
+# Polarity positive positive
+#
+ mode "848x480-60"
+# D: 31.500 MHz, H: 29.830 kHz, V: 60.00 Hz
+ geometry 848 480 848 480 32
+ timings 31746 104 24 12 3 80 5 hsync high vsync high endmode
+#
+# 856x480, 60 Hz, Non-Interlaced (31.728 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 856 480
+# Scan Frequency 29.820 kHz 60.00 Hz
+# Sync Width 2.774 us 0.099 ms
+# 11 chars 3 lines
+# Front Porch 0.504 us 0.033 ms
+# 2 chars 1 lines
+# Back Porch 3.728 us 0.429 ms
+# 13 chars 13 lines
+# Active Time 26.979 us 16.097 ms
+# 107 chars 480 lines
+# Blank Time 6.556 us 0.570 ms
+# 26 chars 17 lines
+# Polarity positive positive
+#
+ mode "856x480-60"
+# D: 31.728 MHz, H: 29.820 kHz, V: 60.00 Hz
+ geometry 856 480 856 480 32
+ timings 31518 104 16 13 1 88 3
+ hsync high vsync high endmode mode "960x600-60"
+# D: 45.250 MHz, H: 37.212 kHz, V: 60.00 Hz
+ geometry 960 600 960 600 32 timings 22099 128 32 15 3 96 6 endmode
+#
+# 1000x600, 60 Hz, Non-Interlaced (48.068 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1000 600
+# Scan Frequency 37.320 kHz 60.00 Hz
+# Sync Width 2.164 us 0.080 ms
+# 13 chars 3 lines
+# Front Porch 0.832 us 0.027 ms
+# 5 chars 1 lines
+# Back Porch 2.996 us 0.483 ms
+# 18 chars 18 lines
+# Active Time 20.804 us 16.077 ms
+# 125 chars 600 lines
+# Blank Time 5.991 us 0.589 ms
+# 36 chars 22 lines
+# Polarity negative positive
+#
+ mode "1000x600-60"
+# D: 48.068 MHz, H: 37.320 kHz, V: 60.00 Hz
+ geometry 1000 600 1000 600 32
+ timings 20834 144 40 18 1 104 3 endmode mode "1024x576-60"
+# D: 46.996 MHz, H: 35.820 kHz, V: 60.00 Hz
+ geometry 1024 576 1024 576 32
+ timings 21278 144 40 17 1 104 3 endmode mode "1024x600-60"
+# D: 48.964 MHz, H: 37.320 kHz, V: 60.00 Hz
+ geometry 1024 600 1024 600 32
+ timings 20461 144 40 18 1 104 3 endmode mode "1088x612-60"
+# D: 52.952 MHz, H: 38.040 kHz, V: 60.00 Hz
+ geometry 1088 612 1088 612 32 timings 18877 152 48 16 3 104 5 endmode
+#
+# 1024x512, 60 Hz, Non-Interlaced (41.291 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1024 512
+# Scan Frequency 31.860 kHz 60.00 Hz
+# Sync Width 2.519 us 0.094 ms
+# 13 chars 3 lines
+# Front Porch 0.775 us 0.031 ms
+# 4 chars 1 lines
+# Back Porch 3.294 us 0.465 ms
+# 17 chars 15 lines
+# Active Time 24.800 us 16.070 ms
+# 128 chars 512 lines
+# Blank Time 6.587 us 0.596 ms
+# 34 chars 19 lines
+# Polarity positive positive
+#
+ mode "1024x512-60"
+# D: 41.291 MHz, H: 31.860 kHz, V: 60.00 Hz
+ geometry 1024 512 1024 512 32
+ timings 24218 126 32 15 1 104 3 hsync high vsync high endmode
+#
+# 1024x600, 60 Hz, Non-Interlaced (48.875 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1024 768
+# Scan Frequency 37.252 kHz 60.00 Hz
+# Sync Width 2.128 us 80.532us
+# 13 chars 3 lines
+# Front Porch 0.818 us 26.844 us
+# 5 chars 1 lines
+# Back Porch 2.946 us 483.192 us
+# 18 chars 18 lines
+# Active Time 20.951 us 16.697 ms
+# 128 chars 622 lines
+# Blank Time 5.893 us 0.591 ms
+# 36 chars 22 lines
+# Polarity negative positive
+#
+#mode "1024x600-60"
+# # D: 48.875 MHz, H: 37.252 kHz, V: 60.00 Hz
+# geometry 1024 600 1024 600 32
+# timings 20460 144 40 18 1 104 3
+# endmode
+#
+# 1024x768, 60 Hz, Non-Interlaced (65.00 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1024 768
+# Scan Frequency 48.363 kHz 60.00 Hz
+# Sync Width 2.092 us 0.124 ms
+# 17 chars 6 lines
+# Front Porch 0.369 us 0.062 ms
+# 3 chars 3 lines
+# Back Porch 2.462 us 0.601 ms
+# 20 chars 29 lines
+# Active Time 15.754 us 15.880 ms
+# 128 chars 768 lines
+# Blank Time 4.923 us 0.786 ms
+# 40 chars 38 lines
+# Polarity negative negative
+#
+ mode "1024x768-60"
+# D: 65.00 MHz, H: 48.363 kHz, V: 60.00 Hz
+ geometry 1024 768 1024 768 32 timings 15385 160 24 29 3 136 6 endmode
+#
+# 1024x768, 75 Hz, Non-Interlaced (78.75 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1024 768
+# Scan Frequency 60.023 kHz 75.03 Hz
+# Sync Width 1.219 us 0.050 ms
+# 12 chars 3 lines
+# Front Porch 0.203 us 0.017 ms
+# 2 chars 1 lines
+# Back Porch 2.235 us 0.466 ms
+# 22 chars 28 lines
+# Active Time 13.003 us 12.795 ms
+# 128 chars 768 lines
+# Blank Time 3.657 us 0.533 ms
+# 36 chars 32 lines
+# Polarity positive positive
+#
+ mode "1024x768-75"
+# D: 78.75 MHz, H: 60.023 kHz, V: 75.03 Hz
+ geometry 1024 768 1024 768 32
+ timings 12699 176 16 28 1 96 3 hsync high vsync high endmode
+#
+# 1024x768, 85 Hz, Non-Interlaced (94.50 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1024 768
+# Scan Frequency 68.677 kHz 85.00 Hz
+# Sync Width 1.016 us 0.044 ms
+# 12 chars 3 lines
+# Front Porch 0.508 us 0.015 ms
+# 6 chars 1 lines
+# Back Porch 2.201 us 0.524 ms
+# 26 chars 36 lines
+# Active Time 10.836 us 11.183 ms
+# 128 chars 768 lines
+# Blank Time 3.725 us 0.582 ms
+# 44 chars 40 lines
+# Polarity positive positive
+#
+ mode "1024x768-85"
+# D: 94.50 MHz, H: 68.677 kHz, V: 85.00 Hz
+ geometry 1024 768 1024 768 32
+ timings 10582 208 48 36 1 96 3 hsync high vsync high endmode
+#
+# 1024x768, 100 Hz, Non-Interlaced (110.0 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1024 768
+# Scan Frequency 79.023 kHz 99.78 Hz
+# Sync Width 0.800 us 0.101 ms
+# 11 chars 8 lines
+# Front Porch 0.000 us 0.000 ms
+# 0 chars 0 lines
+# Back Porch 2.545 us 0.202 ms
+# 35 chars 16 lines
+# Active Time 9.309 us 9.719 ms
+# 128 chars 768 lines
+# Blank Time 3.345 us 0.304 ms
+# 46 chars 24 lines
+# Polarity negative negative
+#
+ mode "1024x768-100"
+# D: 113.3 MHz, H: 79.023 kHz, V: 99.78 Hz
+ geometry 1024 768 1024 768 32
+ timings 8825 280 0 16 0 88 8 endmode mode "1152x720-60"
+# D: 66.750 MHz, H: 44.859 kHz, V: 60.00 Hz
+ geometry 1152 720 1152 720 32 timings 14981 168 56 19 3 112 6 endmode
+#
+# 1152x864, 75 Hz, Non-Interlaced (110.0 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1152 864
+# Scan Frequency 75.137 kHz 74.99 Hz
+# Sync Width 1.309 us 0.106 ms
+# 18 chars 8 lines
+# Front Porch 0.245 us 0.599 ms
+# 3 chars 45 lines
+# Back Porch 1.282 us 1.132 ms
+# 18 chars 85 lines
+# Active Time 10.473 us 11.499 ms
+# 144 chars 864 lines
+# Blank Time 2.836 us 1.837 ms
+# 39 chars 138 lines
+# Polarity positive positive
+#
+ mode "1152x864-75"
+# D: 110.0 MHz, H: 75.137 kHz, V: 74.99 Hz
+ geometry 1152 864 1152 864 32
+ timings 9259 144 24 85 45 144 8
+ hsync high vsync high endmode mode "1200x720-60"
+# D: 70.184 MHz, H: 44.760 kHz, V: 60.00 Hz
+ geometry 1200 720 1200 720 32
+ timings 14253 184 28 22 1 128 3 endmode mode "1280x600-60"
+# D: 61.503 MHz, H: 37.320 kHz, V: 60.00 Hz
+ geometry 1280 600 1280 600 32
+ timings 16260 184 28 18 1 128 3 endmode mode "1280x720-50"
+# D: 60.466 MHz, H: 37.050 kHz, V: 50.00 Hz
+ geometry 1280 720 1280 720 32
+ timings 16538 176 48 17 1 128 3 endmode mode "1280x768-50"
+# D: 65.178 MHz, H: 39.550 kHz, V: 50.00 Hz
+ geometry 1280 768 1280 768 32 timings 15342 184 28 19 1 128 3 endmode
+#
+# 1280x768, 60 Hz, Non-Interlaced (80.136 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 768
+# Scan Frequency 47.700 kHz 60.00 Hz
+# Sync Width 1.697 us 0.063 ms
+# 17 chars 3 lines
+# Front Porch 0.799 us 0.021 ms
+# 8 chars 1 lines
+# Back Porch 2.496 us 0.483 ms
+# 25 chars 23 lines
+# Active Time 15.973 us 16.101 ms
+# 160 chars 768 lines
+# Blank Time 4.992 us 0.566 ms
+# 50 chars 27 lines
+# Polarity positive positive
+#
+ mode "1280x768-60"
+# D: 80.13 MHz, H: 47.700 kHz, V: 60.00 Hz
+ geometry 1280 768 1280 768 32
+ timings 12480 200 48 23 1 126 3 hsync high vsync high endmode
+#
+# 1280x800, 60 Hz, Non-Interlaced (83.375 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 800
+# Scan Frequency 49.628 kHz 60.00 Hz
+# Sync Width 1.631 us 60.450 us
+# 17 chars 3 lines
+# Front Porch 0.768 us 20.15 us
+# 8 chars 1 lines
+# Back Porch 2.399 us 0.483 ms
+# 25 chars 24 lines
+# Active Time 15.352 us 16.120 ms
+# 160 chars 800 lines
+# Blank Time 4.798 us 0.564 ms
+# 50 chars 28 lines
+# Polarity negative positive
+#
+ mode "1280x800-60"
+# D: 83.500 MHz, H: 49.702 kHz, V: 60.00 Hz
+ geometry 1280 800 1280 800 32 timings 11994 200 72 22 3 128 6 endmode
+#
+# 1280x960, 60 Hz, Non-Interlaced (108.00 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 960
+# Scan Frequency 60.000 kHz 60.00 Hz
+# Sync Width 1.037 us 0.050 ms
+# 14 chars 3 lines
+# Front Porch 0.889 us 0.017 ms
+# 12 chars 1 lines
+# Back Porch 2.889 us 0.600 ms
+# 39 chars 36 lines
+# Active Time 11.852 us 16.000 ms
+# 160 chars 960 lines
+# Blank Time 4.815 us 0.667 ms
+# 65 chars 40 lines
+# Polarity positive positive
+#
+ mode "1280x960-60"
+# D: 108.00 MHz, H: 60.000 kHz, V: 60.00 Hz
+ geometry 1280 960 1280 960 32
+ timings 9259 312 96 36 1 112 3 hsync high vsync high endmode
+#
+# 1280x1024, 60 Hz, Non-Interlaced (108.00 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 1024
+# Scan Frequency 63.981 kHz 60.02 Hz
+# Sync Width 1.037 us 0.047 ms
+# 14 chars 3 lines
+# Front Porch 0.444 us 0.015 ms
+# 6 chars 1 lines
+# Back Porch 2.297 us 0.594 ms
+# 31 chars 38 lines
+# Active Time 11.852 us 16.005 ms
+# 160 chars 1024 lines
+# Blank Time 3.778 us 0.656 ms
+# 51 chars 42 lines
+# Polarity positive positive
+#
+ mode "1280x1024-60"
+# D: 108.00 MHz, H: 63.981 kHz, V: 60.02 Hz
+ geometry 1280 1024 1280 1024 32
+ timings 9260 248 48 38 1 112 3 hsync high vsync high endmode
+#
+# 1280x1024, 75 Hz, Non-Interlaced (135.00 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 1024
+# Scan Frequency 79.976 kHz 75.02 Hz
+# Sync Width 1.067 us 0.038 ms
+# 18 chars 3 lines
+# Front Porch 0.119 us 0.012 ms
+# 2 chars 1 lines
+# Back Porch 1.837 us 0.475 ms
+# 31 chars 38 lines
+# Active Time 9.481 us 12.804 ms
+# 160 chars 1024 lines
+# Blank Time 3.022 us 0.525 ms
+# 51 chars 42 lines
+# Polarity positive positive
+#
+ mode "1280x1024-75"
+# D: 135.00 MHz, H: 79.976 kHz, V: 75.02 Hz
+ geometry 1280 1024 1280 1024 32
+ timings 7408 248 16 38 1 144 3 hsync high vsync high endmode
+#
+# 1280x1024, 85 Hz, Non-Interlaced (157.50 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 1024
+# Scan Frequency 91.146 kHz 85.02 Hz
+# Sync Width 1.016 us 0.033 ms
+# 20 chars 3 lines
+# Front Porch 0.406 us 0.011 ms
+# 8 chars 1 lines
+# Back Porch 1.422 us 0.483 ms
+# 28 chars 44 lines
+# Active Time 8.127 us 11.235 ms
+# 160 chars 1024 lines
+# Blank Time 2.844 us 0.527 ms
+# 56 chars 48 lines
+# Polarity positive positive
+#
+ mode "1280x1024-85"
+# D: 157.50 MHz, H: 91.146 kHz, V: 85.02 Hz
+ geometry 1280 1024 1280 1024 32
+ timings 6349 224 64 44 1 160 3
+ hsync high vsync high endmode mode "1440x900-60"
+# D: 106.500 MHz, H: 55.935 kHz, V: 60.00 Hz
+ geometry 1440 900 1440 900 32
+ timings 9390 232 80 25 3 152 6
+ hsync high vsync high endmode mode "1440x900-75"
+# D: 136.750 MHz, H: 70.635 kHz, V: 75.00 Hz
+ geometry 1440 900 1440 900 32
+ timings 7315 248 96 33 3 152 6 hsync high vsync high endmode
+#
+# 1440x1050, 60 Hz, Non-Interlaced (125.10 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1440 1050
+# Scan Frequency 65.220 kHz 60.00 Hz
+# Sync Width 1.204 us 0.046 ms
+# 19 chars 3 lines
+# Front Porch 0.760 us 0.015 ms
+# 12 chars 1 lines
+# Back Porch 1.964 us 0.495 ms
+# 31 chars 33 lines
+# Active Time 11.405 us 16.099 ms
+# 180 chars 1050 lines
+# Blank Time 3.928 us 0.567 ms
+# 62 chars 37 lines
+# Polarity positive positive
+#
+ mode "1440x1050-60"
+# D: 125.10 MHz, H: 65.220 kHz, V: 60.00 Hz
+ geometry 1440 1050 1440 1050 32
+ timings 7993 248 96 33 1 152 3
+ hsync high vsync high endmode mode "1600x900-60"
+# D: 118.250 MHz, H: 55.990 kHz, V: 60.00 Hz
+ geometry 1600 900 1600 900 32
+ timings 8415 256 88 26 3 168 5 endmode mode "1600x1024-60"
+# D: 136.358 MHz, H: 63.600 kHz, V: 60.00 Hz
+ geometry 1600 1024 1600 1024 32 timings 7315 272 104 32 1 168 3 endmode
+#
+# 1600x1200, 60 Hz, Non-Interlaced (156.00 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1600 1200
+# Scan Frequency 76.200 kHz 60.00 Hz
+# Sync Width 1.026 us 0.105 ms
+# 20 chars 8 lines
+# Front Porch 0.205 us 0.131 ms
+# 4 chars 10 lines
+# Back Porch 1.636 us 0.682 ms
+# 32 chars 52 lines
+# Active Time 10.256 us 15.748 ms
+# 200 chars 1200 lines
+# Blank Time 2.872 us 0.866 ms
+# 56 chars 66 lines
+# Polarity negative negative
+#
+ mode "1600x1200-60"
+# D: 156.00 MHz, H: 76.200 kHz, V: 60.00 Hz
+ geometry 1600 1200 1600 1200 32 timings 6172 256 32 52 10 160 8 endmode
+#
+# 1600x1200, 75 Hz, Non-Interlaced (202.50 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1600 1200
+# Scan Frequency 93.750 kHz 75.00 Hz
+# Sync Width 0.948 us 0.032 ms
+# 24 chars 3 lines
+# Front Porch 0.316 us 0.011 ms
+# 8 chars 1 lines
+# Back Porch 1.501 us 0.491 ms
+# 38 chars 46 lines
+# Active Time 7.901 us 12.800 ms
+# 200 chars 1200 lines
+# Blank Time 2.765 us 0.533 ms
+# 70 chars 50 lines
+# Polarity positive positive
+#
+ mode "1600x1200-75"
+# D: 202.50 MHz, H: 93.750 kHz, V: 75.00 Hz
+ geometry 1600 1200 1600 1200 32
+ timings 4938 304 64 46 1 192 3
+ hsync high vsync high endmode mode "1680x1050-60"
+# D: 146.250 MHz, H: 65.290 kHz, V: 59.954 Hz
+ geometry 1680 1050 1680 1050 32
+ timings 6814 280 104 30 3 176 6
+ hsync high vsync high endmode mode "1680x1050-75"
+# D: 187.000 MHz, H: 82.306 kHz, V: 74.892 Hz
+ geometry 1680 1050 1680 1050 32
+ timings 5348 296 120 40 3 176 6
+ hsync high vsync high endmode mode "1792x1344-60"
+# D: 202.975 MHz, H: 83.460 kHz, V: 60.00 Hz
+ geometry 1792 1344 1792 1344 32
+ timings 4902 320 128 43 1 192 3
+ hsync high vsync high endmode mode "1856x1392-60"
+# D: 218.571 MHz, H: 86.460 kHz, V: 60.00 Hz
+ geometry 1856 1392 1856 1392 32
+ timings 4577 336 136 45 1 200 3
+ hsync high vsync high endmode mode "1920x1200-60"
+# D: 193.250 MHz, H: 74.556 kHz, V: 60.00 Hz
+ geometry 1920 1200 1920 1200 32
+ timings 5173 336 136 36 3 200 6
+ hsync high vsync high endmode mode "1920x1440-60"
+# D: 234.000 MHz, H:90.000 kHz, V: 60.00 Hz
+ geometry 1920 1440 1920 1440 32
+ timings 4274 344 128 56 1 208 3
+ hsync high vsync high endmode mode "1920x1440-75"
+# D: 297.000 MHz, H:112.500 kHz, V: 75.00 Hz
+ geometry 1920 1440 1920 1440 32
+ timings 3367 352 144 56 1 224 3
+ hsync high vsync high endmode mode "2048x1536-60"
+# D: 267.250 MHz, H: 95.446 kHz, V: 60.00 Hz
+ geometry 2048 1536 2048 1536 32
+ timings 3742 376 152 49 3 224 4 hsync high vsync high endmode
+#
+# 1280x720, 60 Hz, Non-Interlaced (74.481 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1280 720
+# Scan Frequency 44.760 kHz 60.00 Hz
+# Sync Width 1.826 us 67.024 ms
+# 17 chars 3 lines
+# Front Porch 0.752 us 22.341 ms
+# 7 chars 1 lines
+# Back Porch 2.578 us 491.510 ms
+# 24 chars 22 lines
+# Active Time 17.186 us 16.086 ms
+# 160 chars 720 lines
+# Blank Time 5.156 us 0.581 ms
+# 48 chars 26 lines
+# Polarity negative negative
+#
+ mode "1280x720-60"
+# D: 74.481 MHz, H: 44.760 kHz, V: 60.00 Hz
+ geometry 1280 720 1280 720 32 timings 13426 192 64 22 1 136 3 endmode
+#
+# 1920x1080, 60 Hz, Non-Interlaced (172.798 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1920 1080
+# Scan Frequency 67.080 kHz 60.00 Hz
+# Sync Width 1.204 us 44.723 ms
+# 26 chars 3 lines
+# Front Porch 0.694 us 14.908 ms
+# 15 chars 1 lines
+# Back Porch 1.898 us 506.857 ms
+# 41 chars 34 lines
+# Active Time 11.111 us 16.100 ms
+# 240 chars 1080 lines
+# Blank Time 3.796 us 0.566 ms
+# 82 chars 38 lines
+# Polarity negative negative
+#
+ mode "1920x1080-60"
+# D: 74.481 MHz, H: 67.080 kHz, V: 60.00 Hz
+ geometry 1920 1080 1920 1080 32 timings 5787 328 120 34 1 208 3 endmode
+#
+# 1400x1050, 60 Hz, Non-Interlaced (122.61 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1400 1050
+# Scan Frequency 65.218 kHz 59.99 Hz
+# Sync Width 1.037 us 0.047 ms
+# 19 chars 3 lines
+# Front Porch 0.444 us 0.015 ms
+# 11 chars 1 lines
+# Back Porch 1.185 us 0.188 ms
+# 30 chars 33 lines
+# Active Time 12.963 us 16.411 ms
+# 175 chars 1050 lines
+# Blank Time 2.667 us 0.250 ms
+# 60 chars 37 lines
+# Polarity negative positive
+#
+ mode "1400x1050-60"
+# D: 122.750 MHz, H: 65.317 kHz, V: 59.99 Hz
+ geometry 1400 1050 1408 1050 32
+ timings 8214 232 88 32 3 144 4 endmode mode "1400x1050-75"
+# D: 156.000 MHz, H: 82.278 kHz, V: 74.867 Hz
+ geometry 1400 1050 1408 1050 32 timings 6410 248 104 42 3 144 4 endmode
+#
+# 1366x768, 60 Hz, Non-Interlaced (85.86 MHz dotclock)
+#
+# Horizontal Vertical
+# Resolution 1366 768
+# Scan Frequency 47.700 kHz 60.00 Hz
+# Sync Width 1.677 us 0.063 ms
+# 18 chars 3 lines
+# Front Porch 0.839 us 0.021 ms
+# 9 chars 1 lines
+# Back Porch 2.516 us 0.482 ms
+# 27 chars 23 lines
+# Active Time 15.933 us 16.101 ms
+# 171 chars 768 lines
+# Blank Time 5.031 us 0.566 ms
+# 54 chars 27 lines
+# Polarity negative positive
+#
+ mode "1360x768-60"
+# D: 84.750 MHz, H: 47.720 kHz, V: 60.00 Hz
+ geometry 1360 768 1360 768 32
+ timings 11799 208 72 22 3 136 5 endmode mode "1366x768-60"
+# D: 85.86 MHz, H: 47.700 kHz, V: 60.00 Hz
+ geometry 1366 768 1366 768 32
+ timings 11647 216 72 23 1 144 3 endmode mode "1366x768-50"
+# D: 69,924 MHz, H: 39.550 kHz, V: 50.00 Hz
+ geometry 1366 768 1366 768 32 timings 14301 200 56 19 1 144 3 endmode
diff --git a/Documentation/fb/viafb.txt b/Documentation/fb/viafb.txt
new file mode 100644
index 000000000..1cb2462a7
--- /dev/null
+++ b/Documentation/fb/viafb.txt
@@ -0,0 +1,252 @@
+
+ VIA Integration Graphic Chip Console Framebuffer Driver
+
+[Platform]
+-----------------------
+ The console framebuffer driver is for graphics chips of
+ VIA UniChrome Family(CLE266, PM800 / CN400 / CN300,
+ P4M800CE / P4M800Pro / CN700 / VN800,
+ CX700 / VX700, K8M890, P4M890,
+ CN896 / P4M900, VX800, VX855)
+
+[Driver features]
+------------------------
+ Device: CRT, LCD, DVI
+
+ Support viafb_mode:
+ CRT:
+ 640x480(60, 75, 85, 100, 120 Hz), 720x480(60 Hz),
+ 720x576(60 Hz), 800x600(60, 75, 85, 100, 120 Hz),
+ 848x480(60 Hz), 856x480(60 Hz), 1024x512(60 Hz),
+ 1024x768(60, 75, 85, 100 Hz), 1152x864(75 Hz),
+ 1280x768(60 Hz), 1280x960(60 Hz), 1280x1024(60, 75, 85 Hz),
+ 1440x1050(60 Hz), 1600x1200(60, 75 Hz), 1280x720(60 Hz),
+ 1920x1080(60 Hz), 1400x1050(60 Hz), 800x480(60 Hz)
+
+ color depth: 8 bpp, 16 bpp, 32 bpp supports.
+
+ Support 2D hardware accelerator.
+
+[Using the viafb module]
+-- -- --------------------
+ Start viafb with default settings:
+ #modprobe viafb
+
+ Start viafb with user options:
+ #modprobe viafb viafb_mode=800x600 viafb_bpp=16 viafb_refresh=60
+ viafb_active_dev=CRT+DVI viafb_dvi_port=DVP1
+ viafb_mode1=1024x768 viafb_bpp=16 viafb_refresh1=60
+ viafb_SAMM_ON=1
+
+ viafb_mode:
+ 640x480 (default)
+ 720x480
+ 800x600
+ 1024x768
+ ......
+
+ viafb_bpp:
+ 8, 16, 32 (default:32)
+
+ viafb_refresh:
+ 60, 75, 85, 100, 120 (default:60)
+
+ viafb_lcd_dsp_method:
+ 0 : expansion (default)
+ 1 : centering
+
+ viafb_lcd_mode:
+ 0 : LCD panel with LSB data format input (default)
+ 1 : LCD panel with MSB data format input
+
+ viafb_lcd_panel_id:
+ 0 : Resolution: 640x480, Channel: single, Dithering: Enable
+ 1 : Resolution: 800x600, Channel: single, Dithering: Enable
+ 2 : Resolution: 1024x768, Channel: single, Dithering: Enable (default)
+ 3 : Resolution: 1280x768, Channel: single, Dithering: Enable
+ 4 : Resolution: 1280x1024, Channel: dual, Dithering: Enable
+ 5 : Resolution: 1400x1050, Channel: dual, Dithering: Enable
+ 6 : Resolution: 1600x1200, Channel: dual, Dithering: Enable
+
+ 8 : Resolution: 800x480, Channel: single, Dithering: Enable
+ 9 : Resolution: 1024x768, Channel: dual, Dithering: Enable
+ 10: Resolution: 1024x768, Channel: single, Dithering: Disable
+ 11: Resolution: 1024x768, Channel: dual, Dithering: Disable
+ 12: Resolution: 1280x768, Channel: single, Dithering: Disable
+ 13: Resolution: 1280x1024, Channel: dual, Dithering: Disable
+ 14: Resolution: 1400x1050, Channel: dual, Dithering: Disable
+ 15: Resolution: 1600x1200, Channel: dual, Dithering: Disable
+ 16: Resolution: 1366x768, Channel: single, Dithering: Disable
+ 17: Resolution: 1024x600, Channel: single, Dithering: Enable
+ 18: Resolution: 1280x768, Channel: dual, Dithering: Enable
+ 19: Resolution: 1280x800, Channel: single, Dithering: Enable
+
+ viafb_accel:
+ 0 : No 2D Hardware Acceleration
+ 1 : 2D Hardware Acceleration (default)
+
+ viafb_SAMM_ON:
+ 0 : viafb_SAMM_ON disable (default)
+ 1 : viafb_SAMM_ON enable
+
+ viafb_mode1: (secondary display device)
+ 640x480 (default)
+ 720x480
+ 800x600
+ 1024x768
+ ... ...
+
+ viafb_bpp1: (secondary display device)
+ 8, 16, 32 (default:32)
+
+ viafb_refresh1: (secondary display device)
+ 60, 75, 85, 100, 120 (default:60)
+
+ viafb_active_dev:
+ This option is used to specify active devices.(CRT, DVI, CRT+LCD...)
+ DVI stands for DVI or HDMI, E.g., If you want to enable HDMI,
+ set viafb_active_dev=DVI. In SAMM case, the previous of
+ viafb_active_dev is primary device, and the following is
+ secondary device.
+
+ For example:
+ To enable one device, such as DVI only, we can use:
+ modprobe viafb viafb_active_dev=DVI
+ To enable two devices, such as CRT+DVI:
+ modprobe viafb viafb_active_dev=CRT+DVI;
+
+ For DuoView case, we can use:
+ modprobe viafb viafb_active_dev=CRT+DVI
+ OR
+ modprobe viafb viafb_active_dev=DVI+CRT...
+
+ For SAMM case:
+ If CRT is primary and DVI is secondary, we should use:
+ modprobe viafb viafb_active_dev=CRT+DVI viafb_SAMM_ON=1...
+ If DVI is primary and CRT is secondary, we should use:
+ modprobe viafb viafb_active_dev=DVI+CRT viafb_SAMM_ON=1...
+
+ viafb_display_hardware_layout:
+ This option is used to specify display hardware layout for CX700 chip.
+ 1 : LCD only
+ 2 : DVI only
+ 3 : LCD+DVI (default)
+ 4 : LCD1+LCD2 (internal + internal)
+ 16: LCD1+ExternalLCD2 (internal + external)
+
+ viafb_second_size:
+ This option is used to set second device memory size(MB) in SAMM case.
+ The minimal size is 16.
+
+ viafb_platform_epia_dvi:
+ This option is used to enable DVI on EPIA - M
+ 0 : No DVI on EPIA - M (default)
+ 1 : DVI on EPIA - M
+
+ viafb_bus_width:
+ When using 24 - Bit Bus Width Digital Interface,
+ this option should be set.
+ 12: 12-Bit LVDS or 12-Bit TMDS (default)
+ 24: 24-Bit LVDS or 24-Bit TMDS
+
+ viafb_device_lcd_dualedge:
+ When using Dual Edge Panel, this option should be set.
+ 0 : No Dual Edge Panel (default)
+ 1 : Dual Edge Panel
+
+ viafb_lcd_port:
+ This option is used to specify LCD output port,
+ available values are "DVP0" "DVP1" "DFP_HIGHLOW" "DFP_HIGH" "DFP_LOW".
+ for external LCD + external DVI on CX700(External LCD is on DVP0),
+ we should use:
+ modprobe viafb viafb_lcd_port=DVP0...
+
+Notes:
+ 1. CRT may not display properly for DuoView CRT & DVI display at
+ the "640x480" PAL mode with DVI overscan enabled.
+ 2. SAMM stands for single adapter multi monitors. It is different from
+ multi-head since SAMM support multi monitor at driver layers, thus fbcon
+ layer doesn't even know about it; SAMM's second screen doesn't have a
+ device node file, thus a user mode application can't access it directly.
+ When SAMM is enabled, viafb_mode and viafb_mode1, viafb_bpp and
+ viafb_bpp1, viafb_refresh and viafb_refresh1 can be different.
+ 3. When console is depending on viafbinfo1, dynamically change resolution
+ and bpp, need to call VIAFB specified ioctl interface VIAFB_SET_DEVICE
+ instead of calling common ioctl function FBIOPUT_VSCREENINFO since
+ viafb doesn't support multi-head well, or it will cause screen crush.
+
+
+[Configure viafb with "fbset" tool]
+-----------------------------------
+ "fbset" is an inbox utility of Linux.
+ 1. Inquire current viafb information, type,
+ # fbset -i
+
+ 2. Set various resolutions and viafb_refresh rates,
+ # fbset <resolution-vertical_sync>
+
+ example,
+ # fbset "1024x768-75"
+ or
+ # fbset -g 1024 768 1024 768 32
+ Check the file "/etc/fb.modes" to find display modes available.
+
+ 3. Set the color depth,
+ # fbset -depth <value>
+
+ example,
+ # fbset -depth 16
+
+
+[Configure viafb via /proc]
+---------------------------
+ The following files exist in /proc/viafb
+
+ supported_output_devices
+
+ This read-only file contains a full ',' separated list containing all
+ output devices that could be available on your platform. It is likely
+ that not all of those have a connector on your hardware but it should
+ provide a good starting point to figure out which of those names match
+ a real connector.
+ Example:
+ # cat /proc/viafb/supported_output_devices
+
+ iga1/output_devices
+ iga2/output_devices
+
+ These two files are readable and writable. iga1 and iga2 are the two
+ independent units that produce the screen image. Those images can be
+ forwarded to one or more output devices. Reading those files is a way
+ to query which output devices are currently used by an iga.
+ Example:
+ # cat /proc/viafb/iga1/output_devices
+ If there are no output devices printed the output of this iga is lost.
+ This can happen for example if only one (the other) iga is used.
+ Writing to these files allows adjusting the output devices during
+ runtime. One can add new devices, remove existing ones or switch
+ between igas. Essentially you can write a ',' separated list of device
+ names (or a single one) in the same format as the output to those
+ files. You can add a '+' or '-' as a prefix allowing simple addition
+ and removal of devices. So a prefix '+' adds the devices from your list
+ to the already existing ones, '-' removes the listed devices from the
+ existing ones and if no prefix is given it replaces all existing ones
+ with the listed ones. If you remove devices they are expected to turn
+ off. If you add devices that are already part of the other iga they are
+ removed there and added to the new one.
+ Examples:
+ Add CRT as output device to iga1
+ # echo +CRT > /proc/viafb/iga1/output_devices
+
+ Remove (turn off) DVP1 and LVDS1 as output devices of iga2
+ # echo -DVP1,LVDS1 > /proc/viafb/iga2/output_devices
+
+ Replace all iga1 output devices by CRT
+ # echo CRT > /proc/viafb/iga1/output_devices
+
+
+[Bootup with viafb]:
+--------------------
+ Add the following line to your grub.conf:
+ append = "video=viafb:viafb_mode=1024x768,viafb_bpp=32,viafb_refresh=85"
+
diff --git a/Documentation/fb/vt8623fb.txt b/Documentation/fb/vt8623fb.txt
new file mode 100644
index 000000000..f654576c5
--- /dev/null
+++ b/Documentation/fb/vt8623fb.txt
@@ -0,0 +1,64 @@
+
+ vt8623fb - fbdev driver for graphics core in VIA VT8623 chipset
+ ===============================================================
+
+
+Supported Hardware
+==================
+
+ VIA VT8623 [CLE266] chipset and its graphics core
+ (known as CastleRock or Unichrome)
+
+I tested vt8623fb on VIA EPIA ML-6000
+
+
+Supported Features
+==================
+
+ * 4 bpp pseudocolor modes (with 18bit palette, two variants)
+ * 8 bpp pseudocolor mode (with 18bit palette)
+ * 16 bpp truecolor mode (RGB 565)
+ * 32 bpp truecolor mode (RGB 888)
+ * text mode (activated by bpp = 0)
+ * doublescan mode variant (not available in text mode)
+ * panning in both directions
+ * suspend/resume support
+ * DPMS support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (maximum about 100 MHz). This limitation is not enforced by
+driver. Text mode supports 8bit wide fonts only (hardware limitation) and
+16bit tall fonts (driver limitation).
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+ * secondary (not initialized by BIOS) device support
+ * MMIO support
+ * interlaced mode variant
+ * support for fontwidths != 8 in 4 bpp modes
+ * support for fontheight != 16 in text mode
+ * hardware cursor
+ * video overlay support
+ * vsync synchronization
+ * acceleration support (8514-like 2D, busmaster transfers)
+
+
+Known bugs
+==========
+
+ * cursor disable in text mode doesn't work
+
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/filesystems/.gitignore b/Documentation/filesystems/.gitignore
new file mode 100644
index 000000000..31d6e426b
--- /dev/null
+++ b/Documentation/filesystems/.gitignore
@@ -0,0 +1 @@
+dnotify_test
diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
new file mode 100644
index 000000000..9922939e7
--- /dev/null
+++ b/Documentation/filesystems/00-INDEX
@@ -0,0 +1,159 @@
+00-INDEX
+ - this file (info on some of the filesystems supported by linux).
+Locking
+ - info on locking rules as they pertain to Linux VFS.
+Makefile
+ - Makefile for building the filsystems-part of DocBook.
+9p.txt
+ - 9p (v9fs) is an implementation of the Plan 9 remote fs protocol.
+adfs.txt
+ - info and mount options for the Acorn Advanced Disc Filing System.
+afs.txt
+ - info and examples for the distributed AFS (Andrew File System) fs.
+affs.txt
+ - info and mount options for the Amiga Fast File System.
+autofs4-mount-control.txt
+ - info on device control operations for autofs4 module.
+automount-support.txt
+ - information about filesystem automount support.
+befs.txt
+ - information about the BeOS filesystem for Linux.
+bfs.txt
+ - info for the SCO UnixWare Boot Filesystem (BFS).
+btrfs.txt
+ - info for the BTRFS filesystem.
+caching/
+ - directory containing filesystem cache documentation.
+ceph.txt
+ - info for the Ceph Distributed File System.
+cifs/
+ - directory containing CIFS filesystem documentation and example code.
+coda.txt
+ - description of the CODA filesystem.
+configfs/
+ - directory containing configfs documentation and example code.
+cramfs.txt
+ - info on the cram filesystem for small storage (ROMs etc).
+dax.txt
+ - info on avoiding the page cache for files stored on CPU-addressable
+ storage devices.
+debugfs.txt
+ - info on the debugfs filesystem.
+devpts.txt
+ - info on the devpts filesystem.
+directory-locking
+ - info about the locking scheme used for directory operations.
+dlmfs.txt
+ - info on the userspace interface to the OCFS2 DLM.
+dnotify.txt
+ - info about directory notification in Linux.
+dnotify_test.c
+ - example program for dnotify.
+ecryptfs.txt
+ - docs on eCryptfs: stacked cryptographic filesystem for Linux.
+efivarfs.txt
+ - info for the efivarfs filesystem.
+exofs.txt
+ - info, usage, mount options, design about EXOFS.
+ext2.txt
+ - info, mount options and specifications for the Ext2 filesystem.
+ext3.txt
+ - info, mount options and specifications for the Ext3 filesystem.
+ext4.txt
+ - info, mount options and specifications for the Ext4 filesystem.
+f2fs.txt
+ - info and mount options for the F2FS filesystem.
+fiemap.txt
+ - info on fiemap ioctl.
+files.txt
+ - info on file management in the Linux kernel.
+fuse.txt
+ - info on the Filesystem in User SpacE including mount options.
+gfs2-glocks.txt
+ - info on the Global File System 2 - Glock internal locking rules.
+gfs2-uevents.txt
+ - info on the Global File System 2 - uevents.
+gfs2.txt
+ - info on the Global File System 2.
+hfs.txt
+ - info on the Macintosh HFS Filesystem for Linux.
+hfsplus.txt
+ - info on the Macintosh HFSPlus Filesystem for Linux.
+hpfs.txt
+ - info and mount options for the OS/2 HPFS.
+inotify.txt
+ - info on the powerful yet simple file change notification system.
+isofs.txt
+ - info and mount options for the ISO 9660 (CDROM) filesystem.
+jfs.txt
+ - info and mount options for the JFS filesystem.
+locks.txt
+ - info on file locking implementations, flock() vs. fcntl(), etc.
+logfs.txt
+ - info on the LogFS flash filesystem.
+mandatory-locking.txt
+ - info on the Linux implementation of Sys V mandatory file locking.
+ncpfs.txt
+ - info on Novell Netware(tm) filesystem using NCP protocol.
+nfs/
+ - nfs-related documentation.
+nilfs2.txt
+ - info and mount options for the NILFS2 filesystem.
+ntfs.txt
+ - info and mount options for the NTFS filesystem (Windows NT).
+ocfs2.txt
+ - info and mount options for the OCFS2 clustered filesystem.
+omfs.txt
+ - info on the Optimized MPEG FileSystem.
+path-lookup.txt
+ - info on path walking and name lookup locking.
+pohmelfs/
+ - directory containing pohmelfs filesystem documentation.
+porting
+ - various information on filesystem porting.
+proc.txt
+ - info on Linux's /proc filesystem.
+qnx6.txt
+ - info on the QNX6 filesystem.
+quota.txt
+ - info on Quota subsystem.
+ramfs-rootfs-initramfs.txt
+ - info on the 'in memory' filesystems ramfs, rootfs and initramfs.
+relay.txt
+ - info on relay, for efficient streaming from kernel to user space.
+romfs.txt
+ - description of the ROMFS filesystem.
+seq_file.txt
+ - how to use the seq_file API.
+sharedsubtree.txt
+ - a description of shared subtrees for namespaces.
+spufs.txt
+ - info and mount options for the SPU filesystem used on Cell.
+squashfs.txt
+ - info on the squashfs filesystem.
+sysfs-pci.txt
+ - info on accessing PCI device resources through sysfs.
+sysfs-tagging.txt
+ - info on sysfs tagging to avoid duplicates.
+sysfs.txt
+ - info on sysfs, a ram-based filesystem for exporting kernel objects.
+sysv-fs.txt
+ - info on the SystemV/V7/Xenix/Coherent filesystem.
+tmpfs.txt
+ - info on tmpfs, a filesystem that holds all files in virtual memory.
+ubifs.txt
+ - info on the Unsorted Block Images FileSystem.
+udf.txt
+ - info and mount options for the UDF filesystem.
+ufs.txt
+ - info on the ufs filesystem.
+vfat.txt
+ - info on using the VFAT filesystem used in Windows NT and Windows 95.
+vfs.txt
+ - overview of the Virtual File System.
+xfs-delayed-logging-design.txt
+ - info on the XFS Delayed Logging Design.
+xfs-self-describing-metadata.txt
+ - info on XFS Self Describing Metadata.
+xfs.txt
+ - info and mount options for the XFS filesystem.
diff --git a/Documentation/filesystems/9p.txt b/Documentation/filesystems/9p.txt
new file mode 100644
index 000000000..fec7144e8
--- /dev/null
+++ b/Documentation/filesystems/9p.txt
@@ -0,0 +1,161 @@
+ v9fs: Plan 9 Resource Sharing for Linux
+ =======================================
+
+ABOUT
+=====
+
+v9fs is a Unix implementation of the Plan 9 9p remote filesystem protocol.
+
+This software was originally developed by Ron Minnich <rminnich@sandia.gov>
+and Maya Gokhale. Additional development by Greg Watson
+<gwatson@lanl.gov> and most recently Eric Van Hensbergen
+<ericvh@gmail.com>, Latchesar Ionkov <lucho@ionkov.net> and Russ Cox
+<rsc@swtch.com>.
+
+The best detailed explanation of the Linux implementation and applications of
+the 9p client is available in the form of a USENIX paper:
+ http://www.usenix.org/events/usenix05/tech/freenix/hensbergen.html
+
+Other applications are described in the following papers:
+ * XCPU & Clustering
+ http://xcpu.org/papers/xcpu-talk.pdf
+ * KVMFS: control file system for KVM
+ http://xcpu.org/papers/kvmfs.pdf
+ * CellFS: A New Programming Model for the Cell BE
+ http://xcpu.org/papers/cellfs-talk.pdf
+ * PROSE I/O: Using 9p to enable Application Partitions
+ http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf
+ * VirtFS: A Virtualization Aware File System pass-through
+ http://goo.gl/3WPDg
+
+USAGE
+=====
+
+For remote file server:
+
+ mount -t 9p 10.10.1.2 /mnt/9
+
+For Plan 9 From User Space applications (http://swtch.com/plan9)
+
+ mount -t 9p `namespace`/acme /mnt/9 -o trans=unix,uname=$USER
+
+For server running on QEMU host with virtio transport:
+
+ mount -t 9p -o trans=virtio <mount_tag> /mnt/9
+
+where mount_tag is the tag associated by the server to each of the exported
+mount points. Each 9P export is seen by the client as a virtio device with an
+associated "mount_tag" property. Available mount tags can be
+seen by reading /sys/bus/virtio/drivers/9pnet_virtio/virtio<n>/mount_tag files.
+
+OPTIONS
+=======
+
+ trans=name select an alternative transport. Valid options are
+ currently:
+ unix - specifying a named pipe mount point
+ tcp - specifying a normal TCP/IP connection
+ fd - used passed file descriptors for connection
+ (see rfdno and wfdno)
+ virtio - connect to the next virtio channel available
+ (from QEMU with trans_virtio module)
+ rdma - connect to a specified RDMA channel
+
+ uname=name user name to attempt mount as on the remote server. The
+ server may override or ignore this value. Certain user
+ names may require authentication.
+
+ aname=name aname specifies the file tree to access when the server is
+ offering several exported file systems.
+
+ cache=mode specifies a caching policy. By default, no caches are used.
+ none = default no cache policy, metadata and data
+ alike are synchronous.
+ loose = no attempts are made at consistency,
+ intended for exclusive, read-only mounts
+ fscache = use FS-Cache for a persistent, read-only
+ cache backend.
+ mmap = minimal cache that is only used for read-write
+ mmap. Northing else is cached, like cache=none
+
+ debug=n specifies debug level. The debug level is a bitmask.
+ 0x01 = display verbose error messages
+ 0x02 = developer debug (DEBUG_CURRENT)
+ 0x04 = display 9p trace
+ 0x08 = display VFS trace
+ 0x10 = display Marshalling debug
+ 0x20 = display RPC debug
+ 0x40 = display transport debug
+ 0x80 = display allocation debug
+ 0x100 = display protocol message debug
+ 0x200 = display Fid debug
+ 0x400 = display packet debug
+ 0x800 = display fscache tracing debug
+
+ rfdno=n the file descriptor for reading with trans=fd
+
+ wfdno=n the file descriptor for writing with trans=fd
+
+ msize=n the number of bytes to use for 9p packet payload
+
+ port=n port to connect to on the remote server
+
+ noextend force legacy mode (no 9p2000.u or 9p2000.L semantics)
+
+ version=name Select 9P protocol version. Valid options are:
+ 9p2000 - Legacy mode (same as noextend)
+ 9p2000.u - Use 9P2000.u protocol
+ 9p2000.L - Use 9P2000.L protocol
+
+ dfltuid attempt to mount as a particular uid
+
+ dfltgid attempt to mount with a particular gid
+
+ afid security channel - used by Plan 9 authentication protocols
+
+ nodevmap do not map special files - represent them as normal files.
+ This can be used to share devices/named pipes/sockets between
+ hosts. This functionality will be expanded in later versions.
+
+ access there are four access modes.
+ user = if a user tries to access a file on v9fs
+ filesystem for the first time, v9fs sends an
+ attach command (Tattach) for that user.
+ This is the default mode.
+ <uid> = allows only user with uid=<uid> to access
+ the files on the mounted filesystem
+ any = v9fs does single attach and performs all
+ operations as one user
+ client = ACL based access check on the 9p client
+ side for access validation
+
+ cachetag cache tag to use the specified persistent cache.
+ cache tags for existing cache sessions can be listed at
+ /sys/fs/9p/caches. (applies only to cache=fscache)
+
+RESOURCES
+=========
+
+Protocol specifications are maintained on github:
+http://ericvh.github.com/9p-rfc/
+
+9p client and server implementations are listed on
+http://9p.cat-v.org/implementations
+
+A 9p2000.L server is being developed by LLNL and can be found
+at http://code.google.com/p/diod/
+
+There are user and developer mailing lists available through the v9fs project
+on sourceforge (http://sourceforge.net/projects/v9fs).
+
+News and other information is maintained on a Wiki.
+(http://sf.net/apps/mediawiki/v9fs/index.php).
+
+Bug reports are best issued via the mailing list.
+
+For more information on the Plan 9 Operating System check out
+http://plan9.bell-labs.com/plan9
+
+For information on Plan 9 from User Space (Plan 9 applications and libraries
+ported to Linux/BSD/OSX/etc) check out http://swtch.com/plan9
+
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
new file mode 100644
index 000000000..0a926e2ba
--- /dev/null
+++ b/Documentation/filesystems/Locking
@@ -0,0 +1,578 @@
+ The text below describes the locking rules for VFS-related methods.
+It is (believed to be) up-to-date. *Please*, if you change anything in
+prototypes or locking protocols - update this file. And update the relevant
+instances in the tree, don't leave that to maintainers of filesystems/devices/
+etc. At the very least, put the list of dubious cases in the end of this file.
+Don't turn it into log - maintainers of out-of-the-tree code are supposed to
+be able to use diff(1).
+ Thing currently missing here: socket operations. Alexey?
+
+--------------------------- dentry_operations --------------------------
+prototypes:
+ int (*d_revalidate)(struct dentry *, unsigned int);
+ int (*d_weak_revalidate)(struct dentry *, unsigned int);
+ int (*d_hash)(const struct dentry *, struct qstr *);
+ int (*d_compare)(const struct dentry *, const struct dentry *,
+ unsigned int, const char *, const struct qstr *);
+ int (*d_delete)(struct dentry *);
+ void (*d_release)(struct dentry *);
+ void (*d_iput)(struct dentry *, struct inode *);
+ char *(*d_dname)((struct dentry *dentry, char *buffer, int buflen);
+ struct vfsmount *(*d_automount)(struct path *path);
+ int (*d_manage)(struct dentry *, bool);
+
+locking rules:
+ rename_lock ->d_lock may block rcu-walk
+d_revalidate: no no yes (ref-walk) maybe
+d_weak_revalidate:no no yes no
+d_hash no no no maybe
+d_compare: yes no no maybe
+d_delete: no yes no no
+d_release: no no yes no
+d_prune: no yes no no
+d_iput: no no yes no
+d_dname: no no no no
+d_automount: no no yes no
+d_manage: no no yes (ref-walk) maybe
+
+--------------------------- inode_operations ---------------------------
+prototypes:
+ int (*create) (struct inode *,struct dentry *,umode_t, bool);
+ struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
+ int (*unlink) (struct inode *,struct dentry *);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
+ int (*mkdir) (struct inode *,struct dentry *,umode_t);
+ int (*rmdir) (struct inode *,struct dentry *);
+ int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
+ int (*rename2) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
+ int (*readlink) (struct dentry *, char __user *,int);
+ void * (*follow_link) (struct dentry *, struct nameidata *);
+ void (*put_link) (struct dentry *, struct nameidata *, void *);
+ void (*truncate) (struct inode *);
+ int (*permission) (struct inode *, int, unsigned int);
+ int (*get_acl)(struct inode *, int);
+ int (*setattr) (struct dentry *, struct iattr *);
+ int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *);
+ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+ ssize_t (*listxattr) (struct dentry *, char *, size_t);
+ int (*removexattr) (struct dentry *, const char *);
+ int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
+ void (*update_time)(struct inode *, struct timespec *, int);
+ int (*atomic_open)(struct inode *, struct dentry *,
+ struct file *, unsigned open_flag,
+ umode_t create_mode, int *opened);
+ int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
+
+locking rules:
+ all may block
+ i_mutex(inode)
+lookup: yes
+create: yes
+link: yes (both)
+mknod: yes
+symlink: yes
+mkdir: yes
+unlink: yes (both)
+rmdir: yes (both) (see below)
+rename: yes (all) (see below)
+rename2: yes (all) (see below)
+readlink: no
+follow_link: no
+put_link: no
+setattr: yes
+permission: no (may not block if called in rcu-walk mode)
+get_acl: no
+getattr: no
+setxattr: yes
+getxattr: no
+listxattr: no
+removexattr: yes
+fiemap: no
+update_time: no
+atomic_open: yes
+tmpfile: no
+dentry_open: no
+
+ Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
+victim.
+ cross-directory ->rename() and rename2() has (per-superblock)
+->s_vfs_rename_sem.
+
+See Documentation/filesystems/directory-locking for more detailed discussion
+of the locking scheme for directory operations.
+
+--------------------------- super_operations ---------------------------
+prototypes:
+ struct inode *(*alloc_inode)(struct super_block *sb);
+ void (*destroy_inode)(struct inode *);
+ void (*dirty_inode) (struct inode *, int flags);
+ int (*write_inode) (struct inode *, struct writeback_control *wbc);
+ int (*drop_inode) (struct inode *);
+ void (*evict_inode) (struct inode *);
+ void (*put_super) (struct super_block *);
+ int (*sync_fs)(struct super_block *sb, int wait);
+ int (*freeze_fs) (struct super_block *);
+ int (*unfreeze_fs) (struct super_block *);
+ int (*statfs) (struct dentry *, struct kstatfs *);
+ int (*remount_fs) (struct super_block *, int *, char *);
+ void (*umount_begin) (struct super_block *);
+ int (*show_options)(struct seq_file *, struct dentry *);
+ ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+ ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+
+locking rules:
+ All may block [not true, see below]
+ s_umount
+alloc_inode:
+destroy_inode:
+dirty_inode:
+write_inode:
+drop_inode: !!!inode->i_lock!!!
+evict_inode:
+put_super: write
+sync_fs: read
+freeze_fs: write
+unfreeze_fs: write
+statfs: maybe(read) (see below)
+remount_fs: write
+umount_begin: no
+show_options: no (namespace_sem)
+quota_read: no (see below)
+quota_write: no (see below)
+bdev_try_to_free_page: no (see below)
+
+->statfs() has s_umount (shared) when called by ustat(2) (native or
+compat), but that's an accident of bad API; s_umount is used to pin
+the superblock down when we only have dev_t given us by userland to
+identify the superblock. Everything else (statfs(), fstatfs(), etc.)
+doesn't hold it when calling ->statfs() - superblock is pinned down
+by resolving the pathname passed to syscall.
+->quota_read() and ->quota_write() functions are both guaranteed to
+be the only ones operating on the quota file by the quota code (via
+dqio_sem) (unless an admin really wants to screw up something and
+writes to quota files with quotas on). For other details about locking
+see also dquot_operations section.
+->bdev_try_to_free_page is called from the ->releasepage handler of
+the block device inode. See there for more details.
+
+--------------------------- file_system_type ---------------------------
+prototypes:
+ struct dentry *(*mount) (struct file_system_type *, int,
+ const char *, void *);
+ void (*kill_sb) (struct super_block *);
+locking rules:
+ may block
+mount yes
+kill_sb yes
+
+->mount() returns ERR_PTR or the root dentry; its superblock should be locked
+on return.
+->kill_sb() takes a write-locked superblock, does all shutdown work on it,
+unlocks and drops the reference.
+
+--------------------------- address_space_operations --------------------------
+prototypes:
+ int (*writepage)(struct page *page, struct writeback_control *wbc);
+ int (*readpage)(struct file *, struct page *);
+ int (*sync_page)(struct page *);
+ int (*writepages)(struct address_space *, struct writeback_control *);
+ int (*set_page_dirty)(struct page *page);
+ int (*readpages)(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages);
+ int (*write_begin)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
+ int (*write_end)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
+ sector_t (*bmap)(struct address_space *, sector_t);
+ void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+ int (*releasepage) (struct page *, int);
+ void (*freepage)(struct page *);
+ int (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
+ int (*migratepage)(struct address_space *, struct page *, struct page *);
+ int (*launder_page)(struct page *);
+ int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long);
+ int (*error_remove_page)(struct address_space *, struct page *);
+ int (*swap_activate)(struct file *);
+ int (*swap_deactivate)(struct file *);
+
+locking rules:
+ All except set_page_dirty and freepage may block
+
+ PageLocked(page) i_mutex
+writepage: yes, unlocks (see below)
+readpage: yes, unlocks
+sync_page: maybe
+writepages:
+set_page_dirty no
+readpages:
+write_begin: locks the page yes
+write_end: yes, unlocks yes
+bmap:
+invalidatepage: yes
+releasepage: yes
+freepage: yes
+direct_IO:
+migratepage: yes (both)
+launder_page: yes
+is_partially_uptodate: yes
+error_remove_page: yes
+swap_activate: no
+swap_deactivate: no
+
+ ->write_begin(), ->write_end(), ->sync_page() and ->readpage()
+may be called from the request handler (/dev/loop).
+
+ ->readpage() unlocks the page, either synchronously or via I/O
+completion.
+
+ ->readpages() populates the pagecache with the passed pages and starts
+I/O against them. They come unlocked upon I/O completion.
+
+ ->writepage() is used for two purposes: for "memory cleansing" and for
+"sync". These are quite different operations and the behaviour may differ
+depending upon the mode.
+
+If writepage is called for sync (wbc->sync_mode != WBC_SYNC_NONE) then
+it *must* start I/O against the page, even if that would involve
+blocking on in-progress I/O.
+
+If writepage is called for memory cleansing (sync_mode ==
+WBC_SYNC_NONE) then its role is to get as much writeout underway as
+possible. So writepage should try to avoid blocking against
+currently-in-progress I/O.
+
+If the filesystem is not called for "sync" and it determines that it
+would need to block against in-progress I/O to be able to start new I/O
+against the page the filesystem should redirty the page with
+redirty_page_for_writepage(), then unlock the page and return zero.
+This may also be done to avoid internal deadlocks, but rarely.
+
+If the filesystem is called for sync then it must wait on any
+in-progress I/O and then start new I/O.
+
+The filesystem should unlock the page synchronously, before returning to the
+caller, unless ->writepage() returns special WRITEPAGE_ACTIVATE
+value. WRITEPAGE_ACTIVATE means that page cannot really be written out
+currently, and VM should stop calling ->writepage() on this page for some
+time. VM does this by moving page to the head of the active list, hence the
+name.
+
+Unless the filesystem is going to redirty_page_for_writepage(), unlock the page
+and return zero, writepage *must* run set_page_writeback() against the page,
+followed by unlocking it. Once set_page_writeback() has been run against the
+page, write I/O can be submitted and the write I/O completion handler must run
+end_page_writeback() once the I/O is complete. If no I/O is submitted, the
+filesystem must run end_page_writeback() against the page before returning from
+writepage.
+
+That is: after 2.5.12, pages which are under writeout are *not* locked. Note,
+if the filesystem needs the page to be locked during writeout, that is ok, too,
+the page is allowed to be unlocked at any point in time between the calls to
+set_page_writeback() and end_page_writeback().
+
+Note, failure to run either redirty_page_for_writepage() or the combination of
+set_page_writeback()/end_page_writeback() on a page submitted to writepage
+will leave the page itself marked clean but it will be tagged as dirty in the
+radix tree. This incoherency can lead to all sorts of hard-to-debug problems
+in the filesystem like having dirty inodes at umount and losing written data.
+
+ ->sync_page() locking rules are not well-defined - usually it is called
+with lock on page, but that is not guaranteed. Considering the currently
+existing instances of this method ->sync_page() itself doesn't look
+well-defined...
+
+ ->writepages() is used for periodic writeback and for syscall-initiated
+sync operations. The address_space should start I/O against at least
+*nr_to_write pages. *nr_to_write must be decremented for each page which is
+written. The address_space implementation may write more (or less) pages
+than *nr_to_write asks for, but it should try to be reasonably close. If
+nr_to_write is NULL, all dirty pages must be written.
+
+writepages should _only_ write pages which are present on
+mapping->io_pages.
+
+ ->set_page_dirty() is called from various places in the kernel
+when the target page is marked as needing writeback. It may be called
+under spinlock (it cannot block) and is sometimes called with the page
+not locked.
+
+ ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some
+filesystems and by the swapper. The latter will eventually go away. Please,
+keep it that way and don't breed new callers.
+
+ ->invalidatepage() is called when the filesystem must attempt to drop
+some or all of the buffers from the page when it is being truncated. It
+returns zero on success. If ->invalidatepage is zero, the kernel uses
+block_invalidatepage() instead.
+
+ ->releasepage() is called when the kernel is about to try to drop the
+buffers from the page in preparation for freeing it. It returns zero to
+indicate that the buffers are (or may be) freeable. If ->releasepage is zero,
+the kernel assumes that the fs has no private interest in the buffers.
+
+ ->freepage() is called when the kernel is done dropping the page
+from the page cache.
+
+ ->launder_page() may be called prior to releasing a page if
+it is still found to be dirty. It returns zero if the page was successfully
+cleaned, or an error value if not. Note that in order to prevent the page
+getting mapped back in and redirtied, it needs to be kept locked
+across the entire operation.
+
+ ->swap_activate will be called with a non-zero argument on
+files backing (non block device backed) swapfiles. A return value
+of zero indicates success, in which case this file can be used for
+backing swapspace. The swapspace operations will be proxied to the
+address space operations.
+
+ ->swap_deactivate() will be called in the sys_swapoff()
+path after ->swap_activate() returned success.
+
+----------------------- file_lock_operations ------------------------------
+prototypes:
+ void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
+ void (*fl_release_private)(struct file_lock *);
+
+
+locking rules:
+ inode->i_lock may block
+fl_copy_lock: yes no
+fl_release_private: maybe maybe[1]
+
+[1]: ->fl_release_private for flock or POSIX locks is currently allowed
+to block. Leases however can still be freed while the i_lock is held and
+so fl_release_private called on a lease should not block.
+
+----------------------- lock_manager_operations ---------------------------
+prototypes:
+ int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
+ unsigned long (*lm_owner_key)(struct file_lock *);
+ void (*lm_notify)(struct file_lock *); /* unblock callback */
+ int (*lm_grant)(struct file_lock *, struct file_lock *, int);
+ void (*lm_break)(struct file_lock *); /* break_lease callback */
+ int (*lm_change)(struct file_lock **, int);
+
+locking rules:
+
+ inode->i_lock blocked_lock_lock may block
+lm_compare_owner: yes[1] maybe no
+lm_owner_key yes[1] yes no
+lm_notify: yes yes no
+lm_grant: no no no
+lm_break: yes no no
+lm_change yes no no
+
+[1]: ->lm_compare_owner and ->lm_owner_key are generally called with
+*an* inode->i_lock held. It may not be the i_lock of the inode
+associated with either file_lock argument! This is the case with deadlock
+detection, since the code has to chase down the owners of locks that may
+be entirely unrelated to the one on which the lock is being acquired.
+For deadlock detection however, the blocked_lock_lock is also held. The
+fact that these locks are held ensures that the file_locks do not
+disappear out from under you while doing the comparison or generating an
+owner key.
+
+--------------------------- buffer_head -----------------------------------
+prototypes:
+ void (*b_end_io)(struct buffer_head *bh, int uptodate);
+
+locking rules:
+ called from interrupts. In other words, extreme care is needed here.
+bh is locked, but that's all warranties we have here. Currently only RAID1,
+highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices
+call this method upon the IO completion.
+
+--------------------------- block_device_operations -----------------------
+prototypes:
+ int (*open) (struct block_device *, fmode_t);
+ int (*release) (struct gendisk *, fmode_t);
+ int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+ int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+ int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
+ int (*media_changed) (struct gendisk *);
+ void (*unlock_native_capacity) (struct gendisk *);
+ int (*revalidate_disk) (struct gendisk *);
+ int (*getgeo)(struct block_device *, struct hd_geometry *);
+ void (*swap_slot_free_notify) (struct block_device *, unsigned long);
+
+locking rules:
+ bd_mutex
+open: yes
+release: yes
+ioctl: no
+compat_ioctl: no
+direct_access: no
+media_changed: no
+unlock_native_capacity: no
+revalidate_disk: no
+getgeo: no
+swap_slot_free_notify: no (see below)
+
+media_changed, unlock_native_capacity and revalidate_disk are called only from
+check_disk_change().
+
+swap_slot_free_notify is called with swap_lock and sometimes the page lock
+held.
+
+
+--------------------------- file_operations -------------------------------
+prototypes:
+ loff_t (*llseek) (struct file *, loff_t, int);
+ ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
+ ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
+ ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+ ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ int (*iterate) (struct file *, struct dir_context *);
+ unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+ long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+ int (*mmap) (struct file *, struct vm_area_struct *);
+ int (*open) (struct inode *, struct file *);
+ int (*flush) (struct file *);
+ int (*release) (struct inode *, struct file *);
+ int (*fsync) (struct file *, loff_t start, loff_t end, int datasync);
+ int (*aio_fsync) (struct kiocb *, int datasync);
+ int (*fasync) (int, struct file *, int);
+ int (*lock) (struct file *, int, struct file_lock *);
+ ssize_t (*readv) (struct file *, const struct iovec *, unsigned long,
+ loff_t *);
+ ssize_t (*writev) (struct file *, const struct iovec *, unsigned long,
+ loff_t *);
+ ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t,
+ void __user *);
+ ssize_t (*sendpage) (struct file *, struct page *, int, size_t,
+ loff_t *, int);
+ unsigned long (*get_unmapped_area)(struct file *, unsigned long,
+ unsigned long, unsigned long, unsigned long);
+ int (*check_flags)(int);
+ int (*flock) (struct file *, int, struct file_lock *);
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *,
+ size_t, unsigned int);
+ ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
+ size_t, unsigned int);
+ int (*setlease)(struct file *, long, struct file_lock **, void **);
+ long (*fallocate)(struct file *, int, loff_t, loff_t);
+};
+
+locking rules:
+ All may block.
+
+->llseek() locking has moved from llseek to the individual llseek
+implementations. If your fs is not using generic_file_llseek, you
+need to acquire and release the appropriate locks in your ->llseek().
+For many filesystems, it is probably safe to acquire the inode
+mutex or just to use i_size_read() instead.
+Note: this does not protect the file->f_pos against concurrent modifications
+since this is something the userspace has to take care about.
+
+->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
+Most instances call fasync_helper(), which does that maintenance, so it's
+not normally something one needs to worry about. Return values > 0 will be
+mapped to zero in the VFS layer.
+
+->readdir() and ->ioctl() on directories must be changed. Ideally we would
+move ->readdir() to inode_operations and use a separate method for directory
+->ioctl() or kill the latter completely. One of the problems is that for
+anything that resembles union-mount we won't have a struct file for all
+components. And there are other reasons why the current interface is a mess...
+
+->read on directories probably must go away - we should just enforce -EISDIR
+in sys_read() and friends.
+
+->setlease operations should call generic_setlease() before or after setting
+the lease within the individual filesystem to record the result of the
+operation
+
+--------------------------- dquot_operations -------------------------------
+prototypes:
+ int (*write_dquot) (struct dquot *);
+ int (*acquire_dquot) (struct dquot *);
+ int (*release_dquot) (struct dquot *);
+ int (*mark_dirty) (struct dquot *);
+ int (*write_info) (struct super_block *, int);
+
+These operations are intended to be more or less wrapping functions that ensure
+a proper locking wrt the filesystem and call the generic quota operations.
+
+What filesystem should expect from the generic quota functions:
+
+ FS recursion Held locks when called
+write_dquot: yes dqonoff_sem or dqptr_sem
+acquire_dquot: yes dqonoff_sem or dqptr_sem
+release_dquot: yes dqonoff_sem or dqptr_sem
+mark_dirty: no -
+write_info: yes dqonoff_sem
+
+FS recursion means calling ->quota_read() and ->quota_write() from superblock
+operations.
+
+More details about quota locking can be found in fs/dquot.c.
+
+--------------------------- vm_operations_struct -----------------------------
+prototypes:
+ void (*open)(struct vm_area_struct*);
+ void (*close)(struct vm_area_struct*);
+ int (*fault)(struct vm_area_struct*, struct vm_fault *);
+ int (*page_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+ int (*pfn_mkwrite)(struct vm_area_struct *, struct vm_fault *);
+ int (*access)(struct vm_area_struct *, unsigned long, void*, int, int);
+
+locking rules:
+ mmap_sem PageLocked(page)
+open: yes
+close: yes
+fault: yes can return with page locked
+map_pages: yes
+page_mkwrite: yes can return with page locked
+pfn_mkwrite: yes
+access: yes
+
+ ->fault() is called when a previously not present pte is about
+to be faulted in. The filesystem must find and return the page associated
+with the passed in "pgoff" in the vm_fault structure. If it is possible that
+the page may be truncated and/or invalidated, then the filesystem must lock
+the page, then ensure it is not already truncated (the page lock will block
+subsequent truncate), and then return with VM_FAULT_LOCKED, and the page
+locked. The VM will unlock the page.
+
+ ->map_pages() is called when VM asks to map easy accessible pages.
+Filesystem should find and map pages associated with offsets from "pgoff"
+till "max_pgoff". ->map_pages() is called with page table locked and must
+not block. If it's not possible to reach a page without blocking,
+filesystem should skip it. Filesystem should use do_set_pte() to setup
+page table entry. Pointer to entry associated with offset "pgoff" is
+passed in "pte" field in vm_fault structure. Pointers to entries for other
+offsets should be calculated relative to "pte".
+
+ ->page_mkwrite() is called when a previously read-only pte is
+about to become writeable. The filesystem again must ensure that there are
+no truncate/invalidate races, and then return with the page locked. If
+the page has been truncated, the filesystem should not look up a new page
+like the ->fault() handler, but simply return with VM_FAULT_NOPAGE, which
+will cause the VM to retry the fault.
+
+ ->pfn_mkwrite() is the same as page_mkwrite but when the pte is
+VM_PFNMAP or VM_MIXEDMAP with a page-less entry. Expected return is
+VM_FAULT_NOPAGE. Or one of the VM_FAULT_ERROR types. The default behavior
+after this call is to make the pte read-write, unless pfn_mkwrite returns
+an error.
+
+ ->access() is called when get_user_pages() fails in
+access_process_vm(), typically used to debug a process through
+/proc/pid/mem or ptrace. This function is needed only for
+VM_IO | VM_PFNMAP VMAs.
+
+================================================================================
+ Dubious stuff
+
+(if you break something or notice that it is broken and do not fix it yourself
+- at least put it here)
diff --git a/Documentation/filesystems/Makefile b/Documentation/filesystems/Makefile
new file mode 100644
index 000000000..13483d192
--- /dev/null
+++ b/Documentation/filesystems/Makefile
@@ -0,0 +1,7 @@
+subdir-y := configfs
+
+# List of programs to build
+hostprogs-y := dnotify_test
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/filesystems/adfs.txt b/Documentation/filesystems/adfs.txt
new file mode 100644
index 000000000..594976635
--- /dev/null
+++ b/Documentation/filesystems/adfs.txt
@@ -0,0 +1,75 @@
+Mount options for ADFS
+----------------------
+
+ uid=nnn All files in the partition will be owned by
+ user id nnn. Default 0 (root).
+ gid=nnn All files in the partition will be in group
+ nnn. Default 0 (root).
+ ownmask=nnn The permission mask for ADFS 'owner' permissions
+ will be nnn. Default 0700.
+ othmask=nnn The permission mask for ADFS 'other' permissions
+ will be nnn. Default 0077.
+ ftsuffix=n When ftsuffix=0, no file type suffix will be applied.
+ When ftsuffix=1, a hexadecimal suffix corresponding to
+ the RISC OS file type will be added. Default 0.
+
+Mapping of ADFS permissions to Linux permissions
+------------------------------------------------
+
+ ADFS permissions consist of the following:
+
+ Owner read
+ Owner write
+ Other read
+ Other write
+
+ (In older versions, an 'execute' permission did exist, but this
+ does not hold the same meaning as the Linux 'execute' permission
+ and is now obsolete).
+
+ The mapping is performed as follows:
+
+ Owner read -> -r--r--r--
+ Owner write -> --w--w---w
+ Owner read and filetype UnixExec -> ---x--x--x
+ These are then masked by ownmask, eg 700 -> -rwx------
+ Possible owner mode permissions -> -rwx------
+
+ Other read -> -r--r--r--
+ Other write -> --w--w--w-
+ Other read and filetype UnixExec -> ---x--x--x
+ These are then masked by othmask, eg 077 -> ----rwxrwx
+ Possible other mode permissions -> ----rwxrwx
+
+ Hence, with the default masks, if a file is owner read/write, and
+ not a UnixExec filetype, then the permissions will be:
+
+ -rw-------
+
+ However, if the masks were ownmask=0770,othmask=0007, then this would
+ be modified to:
+ -rw-rw----
+
+ There is no restriction on what you can do with these masks. You may
+ wish that either read bits give read access to the file for all, but
+ keep the default write protection (ownmask=0755,othmask=0577):
+
+ -rw-r--r--
+
+ You can therefore tailor the permission translation to whatever you
+ desire the permissions should be under Linux.
+
+RISC OS file type suffix
+------------------------
+
+ RISC OS file types are stored in bits 19..8 of the file load address.
+
+ To enable non-RISC OS systems to be used to store files without losing
+ file type information, a file naming convention was devised (initially
+ for use with NFS) such that a hexadecimal suffix of the form ,xyz
+ denoted the file type: e.g. BasicFile,ffb is a BASIC (0xffb) file. This
+ naming convention is now also used by RISC OS emulators such as RPCEmu.
+
+ Mounting an ADFS disc with option ftsuffix=1 will cause appropriate file
+ type suffixes to be appended to file names read from a directory. If the
+ ftsuffix option is zero or omitted, no file type suffixes will be added.
diff --git a/Documentation/filesystems/affs.txt b/Documentation/filesystems/affs.txt
new file mode 100644
index 000000000..71b63c2b9
--- /dev/null
+++ b/Documentation/filesystems/affs.txt
@@ -0,0 +1,222 @@
+Overview of Amiga Filesystems
+=============================
+
+Not all varieties of the Amiga filesystems are supported for reading and
+writing. The Amiga currently knows six different filesystems:
+
+DOS\0 The old or original filesystem, not really suited for
+ hard disks and normally not used on them, either.
+ Supported read/write.
+
+DOS\1 The original Fast File System. Supported read/write.
+
+DOS\2 The old "international" filesystem. International means that
+ a bug has been fixed so that accented ("international") letters
+ in file names are case-insensitive, as they ought to be.
+ Supported read/write.
+
+DOS\3 The "international" Fast File System. Supported read/write.
+
+DOS\4 The original filesystem with directory cache. The directory
+ cache speeds up directory accesses on floppies considerably,
+ but slows down file creation/deletion. Doesn't make much
+ sense on hard disks. Supported read only.
+
+DOS\5 The Fast File System with directory cache. Supported read only.
+
+All of the above filesystems allow block sizes from 512 to 32K bytes.
+Supported block sizes are: 512, 1024, 2048 and 4096 bytes. Larger blocks
+speed up almost everything at the expense of wasted disk space. The speed
+gain above 4K seems not really worth the price, so you don't lose too
+much here, either.
+
+The muFS (multi user File System) equivalents of the above file systems
+are supported, too.
+
+Mount options for the AFFS
+==========================
+
+protect If this option is set, the protection bits cannot be altered.
+
+setuid[=uid] This sets the owner of all files and directories in the file
+ system to uid or the uid of the current user, respectively.
+
+setgid[=gid] Same as above, but for gid.
+
+mode=mode Sets the mode flags to the given (octal) value, regardless
+ of the original permissions. Directories will get an x
+ permission if the corresponding r bit is set.
+ This is useful since most of the plain AmigaOS files
+ will map to 600.
+
+nofilenametruncate
+ The file system will return an error when filename exceeds
+ standard maximum filename length (30 characters).
+
+reserved=num Sets the number of reserved blocks at the start of the
+ partition to num. You should never need this option.
+ Default is 2.
+
+root=block Sets the block number of the root block. This should never
+ be necessary.
+
+bs=blksize Sets the blocksize to blksize. Valid block sizes are 512,
+ 1024, 2048 and 4096. Like the root option, this should
+ never be necessary, as the affs can figure it out itself.
+
+quiet The file system will not return an error for disallowed
+ mode changes.
+
+verbose The volume name, file system type and block size will
+ be written to the syslog when the filesystem is mounted.
+
+mufs The filesystem is really a muFS, also it doesn't
+ identify itself as one. This option is necessary if
+ the filesystem wasn't formatted as muFS, but is used
+ as one.
+
+prefix=path Path will be prefixed to every absolute path name of
+ symbolic links on an AFFS partition. Default = "/".
+ (See below.)
+
+volume=name When symbolic links with an absolute path are created
+ on an AFFS partition, name will be prepended as the
+ volume name. Default = "" (empty string).
+ (See below.)
+
+Handling of the Users/Groups and protection flags
+=================================================
+
+Amiga -> Linux:
+
+The Amiga protection flags RWEDRWEDHSPARWED are handled as follows:
+
+ - R maps to r for user, group and others. On directories, R implies x.
+
+ - If both W and D are allowed, w will be set.
+
+ - E maps to x.
+
+ - H and P are always retained and ignored under Linux.
+
+ - A is always reset when a file is written to.
+
+User id and group id will be used unless set[gu]id are given as mount
+options. Since most of the Amiga file systems are single user systems
+they will be owned by root. The root directory (the mount point) of the
+Amiga filesystem will be owned by the user who actually mounts the
+filesystem (the root directory doesn't have uid/gid fields).
+
+Linux -> Amiga:
+
+The Linux rwxrwxrwx file mode is handled as follows:
+
+ - r permission will set R for user, group and others.
+
+ - w permission will set W and D for user, group and others.
+
+ - x permission of the user will set E for plain files.
+
+ - All other flags (suid, sgid, ...) are ignored and will
+ not be retained.
+
+Newly created files and directories will get the user and group ID
+of the current user and a mode according to the umask.
+
+Symbolic links
+==============
+
+Although the Amiga and Linux file systems resemble each other, there
+are some, not always subtle, differences. One of them becomes apparent
+with symbolic links. While Linux has a file system with exactly one
+root directory, the Amiga has a separate root directory for each
+file system (for example, partition, floppy disk, ...). With the Amiga,
+these entities are called "volumes". They have symbolic names which
+can be used to access them. Thus, symbolic links can point to a
+different volume. AFFS turns the volume name into a directory name
+and prepends the prefix path (see prefix option) to it.
+
+Example:
+You mount all your Amiga partitions under /amiga/<volume> (where
+<volume> is the name of the volume), and you give the option
+"prefix=/amiga/" when mounting all your AFFS partitions. (They
+might be "User", "WB" and "Graphics", the mount points /amiga/User,
+/amiga/WB and /amiga/Graphics). A symbolic link referring to
+"User:sc/include/dos/dos.h" will be followed to
+"/amiga/User/sc/include/dos/dos.h".
+
+Examples
+========
+
+Command line:
+ mount Archive/Amiga/Workbench3.1.adf /mnt -t affs -o loop,verbose
+ mount /dev/sda3 /Amiga -t affs
+
+/etc/fstab entry:
+ /dev/sdb5 /amiga/Workbench affs noauto,user,exec,verbose 0 0
+
+IMPORTANT NOTE
+==============
+
+If you boot Windows 95 (don't know about 3.x, 98 and NT) while you
+have an Amiga harddisk connected to your PC, it will overwrite
+the bytes 0x00dc..0x00df of block 0 with garbage, thus invalidating
+the Rigid Disk Block. Sheer luck has it that this is an unused
+area of the RDB, so only the checksum doesn't match anymore.
+Linux will ignore this garbage and recognize the RDB anyway, but
+before you connect that drive to your Amiga again, you must
+restore or repair your RDB. So please do make a backup copy of it
+before booting Windows!
+
+If the damage is already done, the following should fix the RDB
+(where <disk> is the device name).
+DO AT YOUR OWN RISK:
+
+ dd if=/dev/<disk> of=rdb.tmp count=1
+ cp rdb.tmp rdb.fixed
+ dd if=/dev/zero of=rdb.fixed bs=1 seek=220 count=4
+ dd if=rdb.fixed of=/dev/<disk>
+
+Bugs, Restrictions, Caveats
+===========================
+
+Quite a few things may not work as advertised. Not everything is
+tested, though several hundred MB have been read and written using
+this fs. For a most up-to-date list of bugs please consult
+fs/affs/Changes.
+
+By default, filenames are truncated to 30 characters without warning.
+'nofilenametruncate' mount option can change that behavior.
+
+Case is ignored by the affs in filename matching, but Linux shells
+do care about the case. Example (with /wb being an affs mounted fs):
+ rm /wb/WRONGCASE
+will remove /mnt/wrongcase, but
+ rm /wb/WR*
+will not since the names are matched by the shell.
+
+The block allocation is designed for hard disk partitions. If more
+than 1 process writes to a (small) diskette, the blocks are allocated
+in an ugly way (but the real AFFS doesn't do much better). This
+is also true when space gets tight.
+
+You cannot execute programs on an OFS (Old File System), since the
+program files cannot be memory mapped due to the 488 byte blocks.
+For the same reason you cannot mount an image on such a filesystem
+via the loopback device.
+
+The bitmap valid flag in the root block may not be accurate when the
+system crashes while an affs partition is mounted. There's currently
+no way to fix a garbled filesystem without an Amiga (disk validator)
+or manually (who would do this?). Maybe later.
+
+If you mount affs partitions on system startup, you may want to tell
+fsck that the fs should not be checked (place a '0' in the sixth field
+of /etc/fstab).
+
+It's not possible to read floppy disks with a normal PC or workstation
+due to an incompatibility with the Amiga floppy controller.
+
+If you are interested in an Amiga Emulator for Linux, look at
+
+http://web.archive.org/web/*/http://www.freiburg.linux.de/~uae/
diff --git a/Documentation/filesystems/afs.txt b/Documentation/filesystems/afs.txt
new file mode 100644
index 000000000..ffef91c4e
--- /dev/null
+++ b/Documentation/filesystems/afs.txt
@@ -0,0 +1,247 @@
+ ====================
+ kAFS: AFS FILESYSTEM
+ ====================
+
+Contents:
+
+ - Overview.
+ - Usage.
+ - Mountpoints.
+ - Proc filesystem.
+ - The cell database.
+ - Security.
+ - Examples.
+
+
+========
+OVERVIEW
+========
+
+This filesystem provides a fairly simple secure AFS filesystem driver. It is
+under development and does not yet provide the full feature set. The features
+it does support include:
+
+ (*) Security (currently only AFS kaserver and KerberosIV tickets).
+
+ (*) File reading and writing.
+
+ (*) Automounting.
+
+ (*) Local caching (via fscache).
+
+It does not yet support the following AFS features:
+
+ (*) pioctl() system call.
+
+
+===========
+COMPILATION
+===========
+
+The filesystem should be enabled by turning on the kernel configuration
+options:
+
+ CONFIG_AF_RXRPC - The RxRPC protocol transport
+ CONFIG_RXKAD - The RxRPC Kerberos security handler
+ CONFIG_AFS - The AFS filesystem
+
+Additionally, the following can be turned on to aid debugging:
+
+ CONFIG_AF_RXRPC_DEBUG - Permit AF_RXRPC debugging to be enabled
+ CONFIG_AFS_DEBUG - Permit AFS debugging to be enabled
+
+They permit the debugging messages to be turned on dynamically by manipulating
+the masks in the following files:
+
+ /sys/module/af_rxrpc/parameters/debug
+ /sys/module/kafs/parameters/debug
+
+
+=====
+USAGE
+=====
+
+When inserting the driver modules the root cell must be specified along with a
+list of volume location server IP addresses:
+
+ modprobe af_rxrpc
+ modprobe rxkad
+ modprobe kafs rootcell=cambridge.redhat.com:172.16.18.73:172.16.18.91
+
+The first module is the AF_RXRPC network protocol driver. This provides the
+RxRPC remote operation protocol and may also be accessed from userspace. See:
+
+ Documentation/networking/rxrpc.txt
+
+The second module is the kerberos RxRPC security driver, and the third module
+is the actual filesystem driver for the AFS filesystem.
+
+Once the module has been loaded, more modules can be added by the following
+procedure:
+
+ echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
+
+Where the parameters to the "add" command are the name of a cell and a list of
+volume location servers within that cell, with the latter separated by colons.
+
+Filesystems can be mounted anywhere by commands similar to the following:
+
+ mount -t afs "%cambridge.redhat.com:root.afs." /afs
+ mount -t afs "#cambridge.redhat.com:root.cell." /afs/cambridge
+ mount -t afs "#root.afs." /afs
+ mount -t afs "#root.cell." /afs/cambridge
+
+Where the initial character is either a hash or a percent symbol depending on
+whether you definitely want a R/W volume (hash) or whether you'd prefer a R/O
+volume, but are willing to use a R/W volume instead (percent).
+
+The name of the volume can be suffixes with ".backup" or ".readonly" to
+specify connection to only volumes of those types.
+
+The name of the cell is optional, and if not given during a mount, then the
+named volume will be looked up in the cell specified during modprobe.
+
+Additional cells can be added through /proc (see later section).
+
+
+===========
+MOUNTPOINTS
+===========
+
+AFS has a concept of mountpoints. In AFS terms, these are specially formatted
+symbolic links (of the same form as the "device name" passed to mount). kAFS
+presents these to the user as directories that have a follow-link capability
+(ie: symbolic link semantics). If anyone attempts to access them, they will
+automatically cause the target volume to be mounted (if possible) on that site.
+
+Automatically mounted filesystems will be automatically unmounted approximately
+twenty minutes after they were last used. Alternatively they can be unmounted
+directly with the umount() system call.
+
+Manually unmounting an AFS volume will cause any idle submounts upon it to be
+culled first. If all are culled, then the requested volume will also be
+unmounted, otherwise error EBUSY will be returned.
+
+This can be used by the administrator to attempt to unmount the whole AFS tree
+mounted on /afs in one go by doing:
+
+ umount /afs
+
+
+===============
+PROC FILESYSTEM
+===============
+
+The AFS modules creates a "/proc/fs/afs/" directory and populates it:
+
+ (*) A "cells" file that lists cells currently known to the afs module and
+ their usage counts:
+
+ [root@andromeda ~]# cat /proc/fs/afs/cells
+ USE NAME
+ 3 cambridge.redhat.com
+
+ (*) A directory per cell that contains files that list volume location
+ servers, volumes, and active servers known within that cell.
+
+ [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/servers
+ USE ADDR STATE
+ 4 172.16.18.91 0
+ [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/vlservers
+ ADDRESS
+ 172.16.18.91
+ [root@andromeda ~]# cat /proc/fs/afs/cambridge.redhat.com/volumes
+ USE STT VLID[0] VLID[1] VLID[2] NAME
+ 1 Val 20000000 20000001 20000002 root.afs
+
+
+=================
+THE CELL DATABASE
+=================
+
+The filesystem maintains an internal database of all the cells it knows and the
+IP addresses of the volume location servers for those cells. The cell to which
+the system belongs is added to the database when modprobe is performed by the
+"rootcell=" argument or, if compiled in, using a "kafs.rootcell=" argument on
+the kernel command line.
+
+Further cells can be added by commands similar to the following:
+
+ echo add CELLNAME VLADDR[:VLADDR][:VLADDR]... >/proc/fs/afs/cells
+ echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 >/proc/fs/afs/cells
+
+No other cell database operations are available at this time.
+
+
+========
+SECURITY
+========
+
+Secure operations are initiated by acquiring a key using the klog program. A
+very primitive klog program is available at:
+
+ http://people.redhat.com/~dhowells/rxrpc/klog.c
+
+This should be compiled by:
+
+ make klog LDLIBS="-lcrypto -lcrypt -lkrb4 -lkeyutils"
+
+And then run as:
+
+ ./klog
+
+Assuming it's successful, this adds a key of type RxRPC, named for the service
+and cell, eg: "afs@<cellname>". This can be viewed with the keyctl program or
+by cat'ing /proc/keys:
+
+ [root@andromeda ~]# keyctl show
+ Session Keyring
+ -3 --alswrv 0 0 keyring: _ses.3268
+ 2 --alswrv 0 0 \_ keyring: _uid.0
+ 111416553 --als--v 0 0 \_ rxrpc: afs@CAMBRIDGE.REDHAT.COM
+
+Currently the username, realm, password and proposed ticket lifetime are
+compiled in to the program.
+
+It is not required to acquire a key before using AFS facilities, but if one is
+not acquired then all operations will be governed by the anonymous user parts
+of the ACLs.
+
+If a key is acquired, then all AFS operations, including mounts and automounts,
+made by a possessor of that key will be secured with that key.
+
+If a file is opened with a particular key and then the file descriptor is
+passed to a process that doesn't have that key (perhaps over an AF_UNIX
+socket), then the operations on the file will be made with key that was used to
+open the file.
+
+
+========
+EXAMPLES
+========
+
+Here's what I use to test this. Some of the names and IP addresses are local
+to my internal DNS. My "root.afs" partition has a mount point within it for
+some public volumes volumes.
+
+insmod /tmp/rxrpc.o
+insmod /tmp/rxkad.o
+insmod /tmp/kafs.o rootcell=cambridge.redhat.com:172.16.18.91
+
+mount -t afs \%root.afs. /afs
+mount -t afs \%cambridge.redhat.com:root.cell. /afs/cambridge.redhat.com/
+
+echo add grand.central.org 18.9.48.14:128.2.203.61:130.237.48.87 > /proc/fs/afs/cells
+mount -t afs "#grand.central.org:root.cell." /afs/grand.central.org/
+mount -t afs "#grand.central.org:root.archive." /afs/grand.central.org/archive
+mount -t afs "#grand.central.org:root.contrib." /afs/grand.central.org/contrib
+mount -t afs "#grand.central.org:root.doc." /afs/grand.central.org/doc
+mount -t afs "#grand.central.org:root.project." /afs/grand.central.org/project
+mount -t afs "#grand.central.org:root.service." /afs/grand.central.org/service
+mount -t afs "#grand.central.org:root.software." /afs/grand.central.org/software
+mount -t afs "#grand.central.org:root.user." /afs/grand.central.org/user
+
+umount /afs
+rmmod kafs
+rmmod rxkad
+rmmod rxrpc
diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README
new file mode 100644
index 000000000..27faa06fa
--- /dev/null
+++ b/Documentation/filesystems/aufs/README
@@ -0,0 +1,383 @@
+
+Aufs4 -- advanced multi layered unification filesystem version 4.x
+http://aufs.sf.net
+Junjiro R. Okajima
+
+
+0. Introduction
+----------------------------------------
+In the early days, aufs was entirely re-designed and re-implemented
+Unionfs Version 1.x series. Adding many original ideas, approaches,
+improvements and implementations, it becomes totally different from
+Unionfs while keeping the basic features.
+Recently, Unionfs Version 2.x series begin taking some of the same
+approaches to aufs1's.
+Unionfs is being developed by Professor Erez Zadok at Stony Brook
+University and his team.
+
+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
+If you want older kernel version support, try aufs2-2.6.git or
+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
+
+Note: it becomes clear that "Aufs was rejected. Let's give it up."
+ According to Christoph Hellwig, linux rejects all union-type
+ filesystems but UnionMount.
+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
+
+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
+ UnionMount, and he pointed out an issue around a directory mutex
+ lock and aufs addressed it. But it is still unsure whether aufs will
+ be merged (or any other union solution).
+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
+
+
+1. Features
+----------------------------------------
+- unite several directories into a single virtual filesystem. The member
+ directory is called as a branch.
+- you can specify the permission flags to the branch, which are 'readonly',
+ 'readwrite' and 'whiteout-able.'
+- by upper writable branch, internal copyup and whiteout, files/dirs on
+ readonly branch are modifiable logically.
+- dynamic branch manipulation, add, del.
+- etc...
+
+Also there are many enhancements in aufs, such as:
+- test only the highest one for the directory permission (dirperm1)
+- copyup on open (coo=)
+- 'move' policy for copy-up between two writable branches, after
+ checking free space.
+- xattr, acl
+- readdir(3) in userspace.
+- keep inode number by external inode number table
+- keep the timestamps of file/dir in internal copyup operation
+- seekable directory, supporting NFS readdir.
+- whiteout is hardlinked in order to reduce the consumption of inodes
+ on branch
+- do not copyup, nor create a whiteout when it is unnecessary
+- revert a single systemcall when an error occurs in aufs
+- remount interface instead of ioctl
+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
+- loopback mounted filesystem as a branch
+- kernel thread for removing the dir who has a plenty of whiteouts
+- support copyup sparse file (a file which has a 'hole' in it)
+- default permission flags for branches
+- selectable permission flags for ro branch, whether whiteout can
+ exist or not
+- export via NFS.
+- support <sysfs>/fs/aufs and <debugfs>/aufs.
+- support multiple writable branches, some policies to select one
+ among multiple writable branches.
+- a new semantics for link(2) and rename(2) to support multiple
+ writable branches.
+- no glibc changes are required.
+- pseudo hardlink (hardlink over branches)
+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
+ including NFS or remote filesystem branch.
+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
+- and more...
+
+Currently these features are dropped temporary from aufs4.
+See design/08plan.txt in detail.
+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
+ (robr)
+- statistics of aufs thread (/sys/fs/aufs/stat)
+
+Features or just an idea in the future (see also design/*.txt),
+- reorder the branch index without del/re-add.
+- permanent xino files for NFSD
+- an option for refreshing the opened files after add/del branches
+- light version, without branch manipulation. (unnecessary?)
+- copyup in userspace
+- inotify in userspace
+- readv/writev
+
+
+2. Download
+----------------------------------------
+There are three GIT trees for aufs4, aufs4-linux.git,
+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
+"aufs-util.git."
+While the aufs-util is always necessary, you need either of aufs4-linux
+or aufs4-standalone.
+
+The aufs4-linux tree includes the whole linux mainline GIT tree,
+git://git.kernel.org/.../torvalds/linux.git.
+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
+build aufs4 as an external kernel module.
+Several extra patches are not included in this tree. Only
+aufs4-standalone tree contains them. They are describe in the later
+section "Configuration and Compilation."
+
+On the other hand, the aufs4-standalone tree has only aufs source files
+and necessary patches, and you can select CONFIG_AUFS_FS=m.
+But you need to apply all aufs patches manually.
+
+You will find GIT branches whose name is in form of "aufs4.x" where "x"
+represents the linux kernel version, "linux-4.x". For instance,
+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
+"aufs4.x-rcN" branch.
+
+o aufs4-linux tree
+$ git clone --reference /your/linux/git/tree \
+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
+- if you don't have linux GIT tree, then remove "--reference ..."
+$ cd aufs4-linux.git
+$ git checkout origin/aufs4.0
+
+Or You may want to directly git-pull aufs into your linux GIT tree, and
+leave the patch-work to GIT.
+$ cd /your/linux/git/tree
+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
+$ git fetch aufs4
+$ git checkout -b my4.0 v4.0
+$ (add your local change...)
+$ git pull aufs4 aufs4.0
+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
+- you may need to solve some conflicts between your_changes and
+ aufs4.0. in this case, git-rerere is recommended so that you can
+ solve the similar conflicts automatically when you upgrade to 4.1 or
+ later in the future.
+
+o aufs4-standalone tree
+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
+$ cd aufs4-standalone.git
+$ git checkout origin/aufs4.0
+
+o aufs-util tree
+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
+- note that the public aufs-util.git is on SourceForge instead of
+ GitHUB.
+$ cd aufs-util.git
+$ git checkout origin/aufs4.0
+
+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
+The minor version number, 'x' in '4.x', of aufs may not always
+follow the minor version number of the kernel.
+Because changes in the kernel that cause the use of a new
+minor version number do not always require changes to aufs-util.
+
+Since aufs-util has its own minor version number, you may not be
+able to find a GIT branch in aufs-util for your kernel's
+exact minor version number.
+In this case, you should git-checkout the branch for the
+nearest lower number.
+
+For (an unreleased) example:
+If you are using "linux-4.10" and the "aufs4.10" branch
+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
+or something numerically smaller is the branch for your kernel.
+
+Also you can view all branches by
+ $ git branch -a
+
+
+3. Configuration and Compilation
+----------------------------------------
+Make sure you have git-checkout'ed the correct branch.
+
+For aufs4-linux tree,
+- enable CONFIG_AUFS_FS.
+- set other aufs configurations if necessary.
+
+For aufs4-standalone tree,
+There are several ways to build.
+
+1.
+- apply ./aufs4-kbuild.patch to your kernel source files.
+- apply ./aufs4-base.patch too.
+- apply ./aufs4-mmap.patch too.
+- apply ./aufs4-standalone.patch too, if you have a plan to set
+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
+- enable CONFIG_AUFS_FS, you can select either
+ =m or =y.
+- and build your kernel as usual.
+- install the built kernel.
+ Note: Since linux-3.9, every filesystem module requires an alias
+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
+ modules.aliases file if you set CONFIG_AUFS_FS=m.
+- install the header files too by "make headers_install" to the
+ directory where you specify. By default, it is $PWD/usr.
+ "make help" shows a brief note for headers_install.
+- and reboot your system.
+
+2.
+- module only (CONFIG_AUFS_FS=m).
+- apply ./aufs4-base.patch to your kernel source files.
+- apply ./aufs4-mmap.patch too.
+- apply ./aufs4-standalone.patch too.
+- build your kernel, don't forget "make headers_install", and reboot.
+- edit ./config.mk and set other aufs configurations if necessary.
+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
+ every aufs configurations.
+- build the module by simple "make".
+ Note: Since linux-3.9, every filesystem module requires an alias
+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
+ modules.aliases file.
+- you can specify ${KDIR} make variable which points to your kernel
+ source tree.
+- install the files
+ + run "make install" to install the aufs module, or copy the built
+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
+ + run "make install_headers" (instead of headers_install) to install
+ the modified aufs header file (you can specify DESTDIR which is
+ available in aufs standalone version's Makefile only), or copy
+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
+ you like manually. By default, the target directory is $PWD/usr.
+- no need to apply aufs4-kbuild.patch, nor copying source files to your
+ kernel source tree.
+
+Note: The header file aufs_type.h is necessary to build aufs-util
+ as well as "make headers_install" in the kernel source tree.
+ headers_install is subject to be forgotten, but it is essentially
+ necessary, not only for building aufs-util.
+ You may not meet problems without headers_install in some older
+ version though.
+
+And then,
+- read README in aufs-util, build and install it
+- note that your distribution may contain an obsoleted version of
+ aufs_type.h in /usr/include/linux or something. When you build aufs
+ utilities, make sure that your compiler refers the correct aufs header
+ file which is built by "make headers_install."
+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
+ then run "make install_ulib" too. And refer to the aufs manual in
+ detail.
+
+There several other patches in aufs4-standalone.git. They are all
+optional. When you meet some problems, they will help you.
+- aufs4-loopback.patch
+ Supports a nested loopback mount in a branch-fs. This patch is
+ unnecessary until aufs produces a message like "you may want to try
+ another patch for loopback file".
+- vfs-ino.patch
+ Modifies a system global kernel internal function get_next_ino() in
+ order to stop assigning 0 for an inode-number. Not directly related to
+ aufs, but recommended generally.
+- tmpfs-idr.patch
+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
+ the size of aufs XINO files for tmpfs branch. Also it prevents the
+ duplication of inode number, which is important for backup tools and
+ other utilities. When you find aufs XINO files for tmpfs branch
+ growing too much, try this patch.
+
+
+4. Usage
+----------------------------------------
+At first, make sure aufs-util are installed, and please read the aufs
+manual, aufs.5 in aufs-util.git tree.
+$ man -l aufs.5
+
+And then,
+$ mkdir /tmp/rw /tmp/aufs
+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
+
+Here is another example. The result is equivalent.
+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
+ Or
+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
+# mount -o remount,append:${HOME} /tmp/aufs
+
+Then, you can see whole tree of your home dir through /tmp/aufs. If
+you modify a file under /tmp/aufs, the one on your home directory is
+not affected, instead the same named file will be newly created under
+/tmp/rw. And all of your modification to a file will be applied to
+the one under /tmp/rw. This is called the file based Copy on Write
+(COW) method.
+Aufs mount options are described in aufs.5.
+If you run chroot or something and make your aufs as a root directory,
+then you need to customize the shutdown script. See the aufs manual in
+detail.
+
+Additionally, there are some sample usages of aufs which are a
+diskless system with network booting, and LiveCD over NFS.
+See sample dir in CVS tree on SourceForge.
+
+
+5. Contact
+----------------------------------------
+When you have any problems or strange behaviour in aufs, please let me
+know with:
+- /proc/mounts (instead of the output of mount(8))
+- /sys/module/aufs/*
+- /sys/fs/aufs/* (if you have them)
+- /debug/aufs/* (if you have them)
+- linux kernel version
+ if your kernel is not plain, for example modified by distributor,
+ the url where i can download its source is necessary too.
+- aufs version which was printed at loading the module or booting the
+ system, instead of the date you downloaded.
+- configuration (define/undefine CONFIG_AUFS_xxx)
+- kernel configuration or /proc/config.gz (if you have it)
+- behaviour which you think to be incorrect
+- actual operation, reproducible one is better
+- mailto: aufs-users at lists.sourceforge.net
+
+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
+and Feature Requests) on SourceForge. Please join and write to
+aufs-users ML.
+
+
+6. Acknowledgements
+----------------------------------------
+Thanks to everyone who have tried and are using aufs, whoever
+have reported a bug or any feedback.
+
+Especially donators:
+Tomas Matejicek(slax.org) made a donation (much more than once).
+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
+ scripts) is making "doubling" donations.
+ Unfortunately I cannot list all of the donators, but I really
+ appreciate.
+ It ends Aug 2010, but the ordinary donation URL is still available.
+ <http://sourceforge.net/donate/index.php?group_id=167503>
+Dai Itasaka made a donation (2007/8).
+Chuck Smith made a donation (2008/4, 10 and 12).
+Henk Schoneveld made a donation (2008/9).
+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
+Francois Dupoux made a donation (2008/11).
+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
+ aufs2 GIT tree (2009/2).
+William Grant made a donation (2009/3).
+Patrick Lane made a donation (2009/4).
+The Mail Archive (mail-archive.com) made donations (2009/5).
+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
+Pavel Pronskiy made a donation (2011/2).
+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
+11).
+Sam Liddicott made a donation (2011/9).
+Era Scarecrow made a donation (2013/4).
+Bor Ratajc made a donation (2013/4).
+Alessandro Gorreta made a donation (2013/4).
+POIRETTE Marc made a donation (2013/4).
+Alessandro Gorreta made a donation (2013/4).
+lauri kasvandik made a donation (2013/5).
+"pemasu from Finland" made a donation (2013/7).
+The Parted Magic Project made a donation (2013/9 and 11).
+Pavel Barta made a donation (2013/10).
+Nikolay Pertsev made a donation (2014/5).
+James B made a donation (2014/7).
+Stefano Di Biase made a donation (2014/8).
+Daniel Epellei made a donation (2015/1).
+
+Thank you very much.
+Donations are always, including future donations, very important and
+helpful for me to keep on developing aufs.
+
+
+7.
+----------------------------------------
+If you are an experienced user, no explanation is needed. Aufs is
+just a linux filesystem.
+
+
+Enjoy!
+
+# Local variables: ;
+# mode: text;
+# End: ;
diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt
new file mode 100644
index 000000000..a0194fe21
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/01intro.txt
@@ -0,0 +1,170 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Introduction
+----------------------------------------
+
+aufs [ei ju: ef es] | [a u f s]
+1. abbrev. for "advanced multi-layered unification filesystem".
+2. abbrev. for "another unionfs".
+3. abbrev. for "auf das" in German which means "on the" in English.
+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
+ But "Filesystem aufs Filesystem" is hard to understand.
+
+AUFS is a filesystem with features:
+- multi layered stackable unification filesystem, the member directory
+ is called as a branch.
+- branch permission and attribute, 'readonly', 'real-readonly',
+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
+ combination.
+- internal "file copy-on-write".
+- logical deletion, whiteout.
+- dynamic branch manipulation, adding, deleting and changing permission.
+- allow bypassing aufs, user's direct branch access.
+- external inode number translation table and bitmap which maintains the
+ persistent aufs inode number.
+- seekable directory, including NFS readdir.
+- file mapping, mmap and sharing pages.
+- pseudo-link, hardlink over branches.
+- loopback mounted filesystem as a branch.
+- several policies to select one among multiple writable branches.
+- revert a single systemcall when an error occurs in aufs.
+- and more...
+
+
+Multi Layered Stackable Unification Filesystem
+----------------------------------------------------------------------
+Most people already knows what it is.
+It is a filesystem which unifies several directories and provides a
+merged single directory. When users access a file, the access will be
+passed/re-directed/converted (sorry, I am not sure which English word is
+correct) to the real file on the member filesystem. The member
+filesystem is called 'lower filesystem' or 'branch' and has a mode
+'readonly' and 'readwrite.' And the deletion for a file on the lower
+readonly branch is handled by creating 'whiteout' on the upper writable
+branch.
+
+On LKML, there have been discussions about UnionMount (Jan Blunck,
+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
+different approaches to implement the merged-view.
+The former tries putting it into VFS, and the latter implements as a
+separate filesystem.
+(If I misunderstand about these implementations, please let me know and
+I shall correct it. Because it is a long time ago when I read their
+source files last time).
+
+UnionMount's approach will be able to small, but may be hard to share
+branches between several UnionMount since the whiteout in it is
+implemented in the inode on branch filesystem and always
+shared. According to Bharata's post, readdir does not seems to be
+finished yet.
+There are several missing features known in this implementations such as
+- for users, the inode number may change silently. eg. copy-up.
+- link(2) may break by copy-up.
+- read(2) may get an obsoleted filedata (fstat(2) too).
+- fcntl(F_SETLK) may be broken by copy-up.
+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
+ open(O_RDWR).
+
+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
+merged into mainline. This is another implementation of UnionMount as a
+separated filesystem. All the limitations and known problems which
+UnionMount are equally inherited to "overlay" filesystem.
+
+Unionfs has a longer history. When I started implementing a stackable
+filesystem (Aug 2005), it already existed. It has virtual super_block,
+inode, dentry and file objects and they have an array pointing lower
+same kind objects. After contributing many patches for Unionfs, I
+re-started my project AUFS (Jun 2006).
+
+In AUFS, the structure of filesystem resembles to Unionfs, but I
+implemented my own ideas, approaches and enhancements and it became
+totally different one.
+
+Comparing DM snapshot and fs based implementation
+- the number of bytes to be copied between devices is much smaller.
+- the type of filesystem must be one and only.
+- the fs must be writable, no readonly fs, even for the lower original
+ device. so the compression fs will not be usable. but if we use
+ loopback mount, we may address this issue.
+ for instance,
+ mount /cdrom/squashfs.img /sq
+ losetup /sq/ext2.img
+ losetup /somewhere/cow
+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
+- it will be difficult (or needs more operations) to extract the
+ difference between the original device and COW.
+- DM snapshot-merge may help a lot when users try merging. in the
+ fs-layer union, users will use rsync(1).
+
+You may want to read my old paper "Filesystems in LiveCD"
+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
+
+
+Several characters/aspects/persona of aufs
+----------------------------------------------------------------------
+
+Aufs has several characters, aspects or persona.
+1. a filesystem, callee of VFS helper
+2. sub-VFS, caller of VFS helper for branches
+3. a virtual filesystem which maintains persistent inode number
+4. reader/writer of files on branches such like an application
+
+1. Callee of VFS Helper
+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
+unlink(2) from an application reaches sys_unlink() kernel function and
+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
+calls filesystem specific unlink operation. Actually aufs implements the
+unlink operation but it behaves like a redirector.
+
+2. Caller of VFS Helper for Branches
+aufs_unlink() passes the unlink request to the branch filesystem as if
+it were called from VFS. So the called unlink operation of the branch
+filesystem acts as usual. As a caller of VFS helper, aufs should handle
+every necessary pre/post operation for the branch filesystem.
+- acquire the lock for the parent dir on a branch
+- lookup in a branch
+- revalidate dentry on a branch
+- mnt_want_write() for a branch
+- vfs_unlink() for a branch
+- mnt_drop_write() for a branch
+- release the lock on a branch
+
+3. Persistent Inode Number
+One of the most important issue for a filesystem is to maintain inode
+numbers. This is particularly important to support exporting a
+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
+backend block device for its own. But some storage is necessary to
+keep and maintain the inode numbers. It may be a large space and may not
+suit to keep in memory. Aufs rents some space from its first writable
+branch filesystem (by default) and creates file(s) on it. These files
+are created by aufs internally and removed soon (currently) keeping
+opened.
+Note: Because these files are removed, they are totally gone after
+ unmounting aufs. It means the inode numbers are not persistent
+ across unmount or reboot. I have a plan to make them really
+ persistent which will be important for aufs on NFS server.
+
+4. Read/Write Files Internally (copy-on-write)
+Because a branch can be readonly, when you write a file on it, aufs will
+"copy-up" it to the upper writable branch internally. And then write the
+originally requested thing to the file. Generally kernel doesn't
+open/read/write file actively. In aufs, even a single write may cause a
+internal "file copy". This behaviour is very similar to cp(1) command.
+
+Some people may think it is better to pass such work to user space
+helper, instead of doing in kernel space. Actually I am still thinking
+about it. But currently I have implemented it in kernel space.
diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt
new file mode 100644
index 000000000..b53a9778b
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/02struct.txt
@@ -0,0 +1,258 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Basic Aufs Internal Structure
+
+Superblock/Inode/Dentry/File Objects
+----------------------------------------------------------------------
+As like an ordinary filesystem, aufs has its own
+superblock/inode/dentry/file objects. All these objects have a
+dynamically allocated array and store the same kind of pointers to the
+lower filesystem, branch.
+For example, when you build a union with one readwrite branch and one
+readonly, mounted /au, /rw and /ro respectively.
+- /au = /rw + /ro
+- /ro/fileA exists but /rw/fileA
+
+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
+pointers are stored in a aufs dentry. The array in aufs dentry will be,
+- [0] = NULL (because /rw/fileA doesn't exist)
+- [1] = /ro/fileA
+
+This style of an array is essentially same to the aufs
+superblock/inode/dentry/file objects.
+
+Because aufs supports manipulating branches, ie. add/delete/change
+branches dynamically, these objects has its own generation. When
+branches are changed, the generation in aufs superblock is
+incremented. And a generation in other object are compared when it is
+accessed. When a generation in other objects are obsoleted, aufs
+refreshes the internal array.
+
+
+Superblock
+----------------------------------------------------------------------
+Additionally aufs superblock has some data for policies to select one
+among multiple writable branches, XIB files, pseudo-links and kobject.
+See below in detail.
+About the policies which supports copy-down a directory, see
+wbr_policy.txt too.
+
+
+Branch and XINO(External Inode Number Translation Table)
+----------------------------------------------------------------------
+Every branch has its own xino (external inode number translation table)
+file. The xino file is created and unlinked by aufs internally. When two
+members of a union exist on the same filesystem, they share the single
+xino file.
+The struct of a xino file is simple, just a sequence of aufs inode
+numbers which is indexed by the lower inode number.
+In the above sample, assume the inode number of /ro/fileA is i111 and
+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
+
+When the inode numbers are not contiguous, the xino file will be sparse
+which has a hole in it and doesn't consume as much disk space as it
+might appear. If your branch filesystem consumes disk space for such
+holes, then you should specify 'xino=' option at mounting aufs.
+
+Aufs has a mount option to free the disk blocks for such holes in XINO
+files on tmpfs or ramdisk. But it is not so effective actually. If you
+meet a problem of disk shortage due to XINO files, then you should try
+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
+The patch localizes the assignment inumbers per tmpfs-mount and avoid
+the holes in XINO files.
+
+Also a writable branch has three kinds of "whiteout bases". All these
+are existed when the branch is joined to aufs, and their names are
+whiteout-ed doubly, so that users will never see their names in aufs
+hierarchy.
+1. a regular file which will be hardlinked to all whiteouts.
+2. a directory to store a pseudo-link.
+3. a directory to store an "orphan"-ed file temporary.
+
+1. Whiteout Base
+ When you remove a file on a readonly branch, aufs handles it as a
+ logical deletion and creates a whiteout on the upper writable branch
+ as a hardlink of this file in order not to consume inode on the
+ writable branch.
+2. Pseudo-link Dir
+ See below, Pseudo-link.
+3. Step-Parent Dir
+ When "fileC" exists on the lower readonly branch only and it is
+ opened and removed with its parent dir, and then user writes
+ something into it, then aufs copies-up fileC to this
+ directory. Because there is no other dir to store fileC. After
+ creating a file under this dir, the file is unlinked.
+
+Because aufs supports manipulating branches, ie. add/delete/change
+dynamically, a branch has its own id. When the branch order changes,
+aufs finds the new index by searching the branch id.
+
+
+Pseudo-link
+----------------------------------------------------------------------
+Assume "fileA" exists on the lower readonly branch only and it is
+hardlinked to "fileB" on the branch. When you write something to fileA,
+aufs copies-up it to the upper writable branch. Additionally aufs
+creates a hardlink under the Pseudo-link Directory of the writable
+branch. The inode of a pseudo-link is kept in aufs super_block as a
+simple list. If fileB is read after unlinking fileA, aufs returns
+filedata from the pseudo-link instead of the lower readonly
+branch. Because the pseudo-link is based upon the inode, to keep the
+inode number by xino (see above) is essentially necessary.
+
+All the hardlinks under the Pseudo-link Directory of the writable branch
+should be restored in a proper location later. Aufs provides a utility
+to do this. The userspace helpers executed at remounting and unmounting
+aufs by default.
+During this utility is running, it puts aufs into the pseudo-link
+maintenance mode. In this mode, only the process which began the
+maintenance mode (and its child processes) is allowed to operate in
+aufs. Some other processes which are not related to the pseudo-link will
+be allowed to run too, but the rest have to return an error or wait
+until the maintenance mode ends. If a process already acquires an inode
+mutex (in VFS), it has to return an error.
+
+
+XIB(external inode number bitmap)
+----------------------------------------------------------------------
+Addition to the xino file per a branch, aufs has an external inode number
+bitmap in a superblock object. It is also an internal file such like a
+xino file.
+It is a simple bitmap to mark whether the aufs inode number is in-use or
+not.
+To reduce the file I/O, aufs prepares a single memory page to cache xib.
+
+As well as XINO files, aufs has a feature to truncate/refresh XIB to
+reduce the number of consumed disk blocks for these files.
+
+
+Virtual or Vertical Dir, and Readdir in Userspace
+----------------------------------------------------------------------
+In order to support multiple layers (branches), aufs readdir operation
+constructs a virtual dir block on memory. For readdir, aufs calls
+vfs_readdir() internally for each dir on branches, merges their entries
+with eliminating the whiteout-ed ones, and sets it to file (dir)
+object. So the file object has its entry list until it is closed. The
+entry list will be updated when the file position is zero and becomes
+obsoleted. This decision is made in aufs automatically.
+
+The dynamically allocated memory block for the name of entries has a
+unit of 512 bytes (by default) and stores the names contiguously (no
+padding). Another block for each entry is handled by kmem_cache too.
+During building dir blocks, aufs creates hash list and judging whether
+the entry is whiteouted by its upper branch or already listed.
+The merged result is cached in the corresponding inode object and
+maintained by a customizable life-time option.
+
+Some people may call it can be a security hole or invite DoS attack
+since the opened and once readdir-ed dir (file object) holds its entry
+list and becomes a pressure for system memory. But I'd say it is similar
+to files under /proc or /sys. The virtual files in them also holds a
+memory page (generally) while they are opened. When an idea to reduce
+memory for them is introduced, it will be applied to aufs too.
+For those who really hate this situation, I've developed readdir(3)
+library which operates this merging in userspace. You just need to set
+LD_PRELOAD environment variable, and aufs will not consume no memory in
+kernel space for readdir(3).
+
+
+Workqueue
+----------------------------------------------------------------------
+Aufs sometimes requires privilege access to a branch. For instance,
+in copy-up/down operation. When a user process is going to make changes
+to a file which exists in the lower readonly branch only, and the mode
+of one of ancestor directories may not be writable by a user
+process. Here aufs copy-up the file with its ancestors and they may
+require privilege to set its owner/group/mode/etc.
+This is a typical case of a application character of aufs (see
+Introduction).
+
+Aufs uses workqueue synchronously for this case. It creates its own
+workqueue. The workqueue is a kernel thread and has privilege. Aufs
+passes the request to call mkdir or write (for example), and wait for
+its completion. This approach solves a problem of a signal handler
+simply.
+If aufs didn't adopt the workqueue and changed the privilege of the
+process, then the process may receive the unexpected SIGXFSZ or other
+signals.
+
+Also aufs uses the system global workqueue ("events" kernel thread) too
+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
+whiteout base and etc. This is unrelated to a privilege.
+Most of aufs operation tries acquiring a rw_semaphore for aufs
+superblock at the beginning, at the same time waits for the completion
+of all queued asynchronous tasks.
+
+
+Whiteout
+----------------------------------------------------------------------
+The whiteout in aufs is very similar to Unionfs's. That is represented
+by its filename. UnionMount takes an approach of a file mode, but I am
+afraid several utilities (find(1) or something) will have to support it.
+
+Basically the whiteout represents "logical deletion" which stops aufs to
+lookup further, but also it represents "dir is opaque" which also stop
+further lookup.
+
+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
+In order to make several functions in a single systemcall to be
+revertible, aufs adopts an approach to rename a directory to a temporary
+unique whiteouted name.
+For example, in rename(2) dir where the target dir already existed, aufs
+renames the target dir to a temporary unique whiteouted name before the
+actual rename on a branch, and then handles other actions (make it opaque,
+update the attributes, etc). If an error happens in these actions, aufs
+simply renames the whiteouted name back and returns an error. If all are
+succeeded, aufs registers a function to remove the whiteouted unique
+temporary name completely and asynchronously to the system global
+workqueue.
+
+
+Copy-up
+----------------------------------------------------------------------
+It is a well-known feature or concept.
+When user modifies a file on a readonly branch, aufs operate "copy-up"
+internally and makes change to the new file on the upper writable branch.
+When the trigger systemcall does not update the timestamps of the parent
+dir, aufs reverts it after copy-up.
+
+
+Move-down (aufs3.9 and later)
+----------------------------------------------------------------------
+"Copy-up" is one of the essential feature in aufs. It copies a file from
+the lower readonly branch to the upper writable branch when a user
+changes something about the file.
+"Move-down" is an opposite action of copy-up. Basically this action is
+ran manually instead of automatically and internally.
+For desgin and implementation, aufs has to consider these issues.
+- whiteout for the file may exist on the lower branch.
+- ancestor directories may not exist on the lower branch.
+- diropq for the ancestor directories may exist on the upper branch.
+- free space on the lower branch will reduce.
+- another access to the file may happen during moving-down, including
+ UDBA (see "Revalidate Dentry and UDBA").
+- the file should not be hard-linked nor pseudo-linked. they should be
+ handled by auplink utility later.
+
+Sometimes users want to move-down a file from the upper writable branch
+to the lower readonly or writable branch. For instance,
+- the free space of the upper writable branch is going to run out.
+- create a new intermediate branch between the upper and lower branch.
+- etc.
+
+For this purpose, use "aumvdown" command in aufs-util.git.
diff --git a/Documentation/filesystems/aufs/design/03atomic_open.txt b/Documentation/filesystems/aufs/design/03atomic_open.txt
new file mode 100644
index 000000000..974b524f7
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/03atomic_open.txt
@@ -0,0 +1,85 @@
+
+# Copyright (C) 2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Support for a branch who has its ->atomic_open()
+----------------------------------------------------------------------
+The filesystems who implement its ->atomic_open() are not majority. For
+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
+sure whether all filesystems who have ->atomic_open() behave like this,
+but NFSv4 surely returns the error.
+
+In order to support ->atomic_open() for aufs, there are a few
+approaches.
+
+A. Introduce aufs_atomic_open()
+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
+ branch fs.
+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
+ an aufs user Pip Cet's approach
+ - calls aufs_create(), VFS finish_open() and notify_change().
+ - pass fake-mode to finish_open(), and then correct the mode by
+ notify_change().
+C. Extend aufs_open() to call branch fs's ->atomic_open()
+ - no aufs_atomic_open().
+ - aufs_lookup() registers the TID to an aufs internal object.
+ - aufs_create() does nothing when the matching TID is registered, but
+ registers the mode.
+ - aufs_open() calls branch fs's ->atomic_open() when the matching
+ TID is registered.
+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
+ credential
+ - no aufs_atomic_open().
+ - aufs_create() registers the TID to an internal object. this info
+ represents "this process created this file just now."
+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
+ registered TID and re-try open() with superuser's credential.
+
+Pros and cons for each approach.
+
+A.
+ - straightforward but highly depends upon VFS internal.
+ - the atomic behavaiour is kept.
+ - some of parameters such as nameidata are hard to reproduce for
+ branch fs.
+ - large overhead.
+B.
+ - easy to implement.
+ - the atomic behavaiour is lost.
+C.
+ - the atomic behavaiour is kept.
+ - dirty and tricky.
+ - VFS checks whether the file is created correctly after calling
+ ->create(), which means this approach doesn't work.
+D.
+ - easy to implement.
+ - the atomic behavaiour is lost.
+ - to open a file with superuser's credential and give it to a user
+ process is a bad idea, since the file object keeps the credential
+ in it. It may affect LSM or something. This approach doesn't work
+ either.
+
+The approach A is ideal, but it hard to implement. So here is a
+variation of A, which is to be implemented.
+
+A-1. Introduce aufs_atomic_open()
+ - calls branch fs ->atomic_open() if exists. otherwise calls
+ vfs_create() and finish_open().
+ - the demerit is that the several checks after branch fs
+ ->atomic_open() are lost. in the ordinary case, the checks are
+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
+ be implemented in aufs, but not all I am afraid.
diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt
new file mode 100644
index 000000000..3515c9228
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/03lookup.txt
@@ -0,0 +1,113 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Lookup in a Branch
+----------------------------------------------------------------------
+Since aufs has a character of sub-VFS (see Introduction), it operates
+lookup for branches as VFS does. It may be a heavy work. But almost all
+lookup operation in aufs is the simplest case, ie. lookup only an entry
+directly connected to its parent. Digging down the directory hierarchy
+is unnecessary. VFS has a function lookup_one_len() for that use, and
+aufs calls it.
+
+When a branch is a remote filesystem, aufs basically relies upon its
+->d_revalidate(), also aufs forces the hardest revalidate tests for
+them.
+For d_revalidate, aufs implements three levels of revalidate tests. See
+"Revalidate Dentry and UDBA" in detail.
+
+
+Test Only the Highest One for the Directory Permission (dirperm1 option)
+----------------------------------------------------------------------
+Let's try case study.
+- aufs has two branches, upper readwrite and lower readonly.
+ /au = /rw + /ro
+- "dirA" exists under /ro, but /rw. and its mode is 0700.
+- user invoked "chmod a+rx /au/dirA"
+- the internal copy-up is activated and "/rw/dirA" is created and its
+ permission bits are set to world readable.
+- then "/au/dirA" becomes world readable?
+
+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
+or it may be a natively readonly filesystem. If aufs respects the lower
+branch, it should not respond readdir request from other users. But user
+allowed it by chmod. Should really aufs rejects showing the entries
+under /ro/dirA?
+
+To be honest, I don't have a good solution for this case. So aufs
+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
+users.
+When dirperm1 is specified, aufs checks only the highest one for the
+directory permission, and shows the entries. Otherwise, as usual, checks
+every dir existing on all branches and rejects the request.
+
+As a side effect, dirperm1 option improves the performance of aufs
+because the number of permission check is reduced when the number of
+branch is many.
+
+
+Revalidate Dentry and UDBA (User's Direct Branch Access)
+----------------------------------------------------------------------
+Generally VFS helpers re-validate a dentry as a part of lookup.
+0. digging down the directory hierarchy.
+1. lock the parent dir by its i_mutex.
+2. lookup the final (child) entry.
+3. revalidate it.
+4. call the actual operation (create, unlink, etc.)
+5. unlock the parent dir
+
+If the filesystem implements its ->d_revalidate() (step 3), then it is
+called. Actually aufs implements it and checks the dentry on a branch is
+still valid.
+But it is not enough. Because aufs has to release the lock for the
+parent dir on a branch at the end of ->lookup() (step 2) and
+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
+held by VFS.
+If the file on a branch is changed directly, eg. bypassing aufs, after
+aufs released the lock, then the subsequent operation may cause
+something unpleasant result.
+
+This situation is a result of VFS architecture, ->lookup() and
+->d_revalidate() is separated. But I never say it is wrong. It is a good
+design from VFS's point of view. It is just not suitable for sub-VFS
+character in aufs.
+
+Aufs supports such case by three level of revalidation which is
+selectable by user.
+1. Simple Revalidate
+ Addition to the native flow in VFS's, confirm the child-parent
+ relationship on the branch just after locking the parent dir on the
+ branch in the "actual operation" (step 4). When this validation
+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
+ checks the validation of the dentry on branches.
+2. Monitor Changes Internally by Inotify/Fsnotify
+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
+ the dentry on the branch, and returns EBUSY if it finds different
+ dentry.
+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
+ during it is in cache. When the event is notified, aufs registers a
+ function to kernel 'events' thread by schedule_work(). And the
+ function sets some special status to the cached aufs dentry and inode
+ private data. If they are not cached, then aufs has nothing to
+ do. When the same file is accessed through aufs (step 0-3) later,
+ aufs will detect the status and refresh all necessary data.
+ In this mode, aufs has to ignore the event which is fired by aufs
+ itself.
+3. No Extra Validation
+ This is the simplest test and doesn't add any additional revalidation
+ test, and skip the revalidation in step 4. It is useful and improves
+ aufs performance when system surely hide the aufs branches from user,
+ by over-mounting something (or another method).
diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt
new file mode 100644
index 000000000..940216e0d
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/04branch.txt
@@ -0,0 +1,74 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Branch Manipulation
+
+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
+and changing its permission/attribute, there are a lot of works to do.
+
+
+Add a Branch
+----------------------------------------------------------------------
+o Confirm the adding dir exists outside of aufs, including loopback
+ mount, and its various attributes.
+o Initialize the xino file and whiteout bases if necessary.
+ See struct.txt.
+
+o Check the owner/group/mode of the directory
+ When the owner/group/mode of the adding directory differs from the
+ existing branch, aufs issues a warning because it may impose a
+ security risk.
+ For example, when a upper writable branch has a world writable empty
+ top directory, a malicious user can create any files on the writable
+ branch directly, like copy-up and modify manually. If something like
+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
+ writable branch, and the writable branch is world-writable, then a
+ malicious guy may create /etc/passwd on the writable branch directly
+ and the infected file will be valid in aufs.
+ I am afraid it can be a security issue, but aufs can do nothing except
+ producing a warning.
+
+
+Delete a Branch
+----------------------------------------------------------------------
+o Confirm the deleting branch is not busy
+ To be general, there is one merit to adopt "remount" interface to
+ manipulate branches. It is to discard caches. At deleting a branch,
+ aufs checks the still cached (and connected) dentries and inodes. If
+ there are any, then they are all in-use. An inode without its
+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
+
+ For the cached one, aufs checks whether the same named entry exists on
+ other branches.
+ If the cached one is a directory, because aufs provides a merged view
+ to users, as long as one dir is left on any branch aufs can show the
+ dir to users. In this case, the branch can be removed from aufs.
+ Otherwise aufs rejects deleting the branch.
+
+ If any file on the deleting branch is opened by aufs, then aufs
+ rejects deleting.
+
+
+Modify the Permission of a Branch
+----------------------------------------------------------------------
+o Re-initialize or remove the xino file and whiteout bases if necessary.
+ See struct.txt.
+
+o rw --> ro: Confirm the modifying branch is not busy
+ Aufs rejects the request if any of these conditions are true.
+ - a file on the branch is mmap-ed.
+ - a regular file on the branch is opened for write and there is no
+ same named entry on the upper branch.
diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt
new file mode 100644
index 000000000..aeb108734
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/05wbr_policy.txt
@@ -0,0 +1,64 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Policies to Select One among Multiple Writable Branches
+----------------------------------------------------------------------
+When the number of writable branch is more than one, aufs has to decide
+the target branch for file creation or copy-up. By default, the highest
+writable branch which has the parent (or ancestor) dir of the target
+file is chosen (top-down-parent policy).
+By user's request, aufs implements some other policies to select the
+writable branch, for file creation several policies, round-robin,
+most-free-space, and other policies. For copy-up, top-down-parent,
+bottom-up-parent, bottom-up and others.
+
+As expected, the round-robin policy selects the branch in circular. When
+you have two writable branches and creates 10 new files, 5 files will be
+created for each branch. mkdir(2) systemcall is an exception. When you
+create 10 new directories, all will be created on the same branch.
+And the most-free-space policy selects the one which has most free
+space among the writable branches. The amount of free space will be
+checked by aufs internally, and users can specify its time interval.
+
+The policies for copy-up is more simple,
+top-down-parent is equivalent to the same named on in create policy,
+bottom-up-parent selects the writable branch where the parent dir
+exists and the nearest upper one from the copyup-source,
+bottom-up selects the nearest upper writable branch from the
+copyup-source, regardless the existence of the parent dir.
+
+There are some rules or exceptions to apply these policies.
+- If there is a readonly branch above the policy-selected branch and
+ the parent dir is marked as opaque (a variation of whiteout), or the
+ target (creating) file is whiteout-ed on the upper readonly branch,
+ then the result of the policy is ignored and the target file will be
+ created on the nearest upper writable branch than the readonly branch.
+- If there is a writable branch above the policy-selected branch and
+ the parent dir is marked as opaque or the target file is whiteouted
+ on the branch, then the result of the policy is ignored and the target
+ file will be created on the highest one among the upper writable
+ branches who has diropq or whiteout. In case of whiteout, aufs removes
+ it as usual.
+- link(2) and rename(2) systemcalls are exceptions in every policy.
+ They try selecting the branch where the source exists as possible
+ since copyup a large file will take long time. If it can't be,
+ ie. the branch where the source exists is readonly, then they will
+ follow the copyup policy.
+- There is an exception for rename(2) when the target exists.
+ If the rename target exists, aufs compares the index of the branches
+ where the source and the target exists and selects the higher
+ one. If the selected branch is readonly, then aufs follows the
+ copyup policy.
diff --git a/Documentation/filesystems/aufs/design/06fhsm.txt b/Documentation/filesystems/aufs/design/06fhsm.txt
new file mode 100644
index 000000000..5928ed219
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06fhsm.txt
@@ -0,0 +1,120 @@
+
+# Copyright (C) 2011-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+File-based Hierarchical Storage Management (FHSM)
+----------------------------------------------------------------------
+Hierarchical Storage Management (or HSM) is a well-known feature in the
+storage world. Aufs provides this feature as file-based with multiple
+writable branches, based upon the principle of "Colder, the Lower".
+Here the word "colder" means that the less used files, and "lower" means
+that the position in the order of the stacked branches vertically.
+These multiple writable branches are prioritized, ie. the topmost one
+should be the fastest drive and be used heavily.
+
+o Characters in aufs FHSM story
+- aufs itself and a new branch attribute.
+- a new ioctl interface to move-down and to establish a connection with
+ the daemon ("move-down" is a converse of "copy-up").
+- userspace tool and daemon.
+
+The userspace daemon establishes a connection with aufs and waits for
+the notification. The notified information is very similar to struct
+statfs containing the number of consumed blocks and inodes.
+When the consumed blocks/inodes of a branch exceeds the user-specified
+upper watermark, the daemon activates its move-down process until the
+consumed blocks/inodes reaches the user-specified lower watermark.
+
+The actual move-down is done by aufs based upon the request from
+user-space since we need to maintain the inode number and the internal
+pointer arrays in aufs.
+
+Currently aufs FHSM handles the regular files only. Additionally they
+must not be hard-linked nor pseudo-linked.
+
+
+o Cowork of aufs and the user-space daemon
+ During the userspace daemon established the connection, aufs sends a
+ small notification to it whenever aufs writes something into the
+ writable branch. But it may cost high since aufs issues statfs(2)
+ internally. So user can specify a new option to cache the
+ info. Actually the notification is controlled by these factors.
+ + the specified cache time.
+ + classified as "force" by aufs internally.
+ Until the specified time expires, aufs doesn't send the info
+ except the forced cases. When aufs decide forcing, the info is always
+ notified to userspace.
+ For example, the number of free inodes is generally large enough and
+ the shortage of it happens rarely. So aufs doesn't force the
+ notification when creating a new file, directory and others. This is
+ the typical case which aufs doesn't force.
+ When aufs writes the actual filedata and the files consumes any of new
+ blocks, the aufs forces notifying.
+
+
+o Interfaces in aufs
+- New branch attribute.
+ + fhsm
+ Specifies that the branch is managed by FHSM feature. In other word,
+ participant in the FHSM.
+ When nofhsm is set to the branch, it will not be the source/target
+ branch of the move-down operation. This attribute is set
+ independently from coo and moo attributes, and if you want full
+ FHSM, you should specify them as well.
+- New mount option.
+ + fhsm_sec
+ Specifies a second to suppress many less important info to be
+ notified.
+- New ioctl.
+ + AUFS_CTL_FHSM_FD
+ create a new file descriptor which userspace can read the notification
+ (a subset of struct statfs) from aufs.
+- Module parameter 'brs'
+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
+ be set.
+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
+ When there are two or more branches with fhsm attributes,
+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
+ terminates it. As a result of remounting and branch-manipulation, the
+ number of branches with fhsm attribute can be one. In this case,
+ /sbin/mount.aufs will terminate the user-space daemon.
+
+
+Finally the operation is done as these steps in kernel-space.
+- make sure that,
+ + no one else is using the file.
+ + the file is not hard-linked.
+ + the file is not pseudo-linked.
+ + the file is a regular file.
+ + the parent dir is not opaqued.
+- find the target writable branch.
+- make sure the file is not whiteout-ed by the upper (than the target)
+ branch.
+- make the parent dir on the target branch.
+- mutex lock the inode on the branch.
+- unlink the whiteout on the target branch (if exists).
+- lookup and create the whiteout-ed temporary name on the target branch.
+- copy the file as the whiteout-ed temporary name on the target branch.
+- rename the whiteout-ed temporary name to the original name.
+- unlink the file on the source branch.
+- maintain the internal pointer array and the external inode number
+ table (XINO).
+- maintain the timestamps and other attributes of the parent dir and the
+ file.
+
+And of course, in every step, an error may happen. So the operation
+should restore the original file state after an error happens.
diff --git a/Documentation/filesystems/aufs/design/06mmap.txt b/Documentation/filesystems/aufs/design/06mmap.txt
new file mode 100644
index 000000000..a42364eee
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06mmap.txt
@@ -0,0 +1,72 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+mmap(2) -- File Memory Mapping
+----------------------------------------------------------------------
+In aufs, the file-mapped pages are handled by a branch fs directly, no
+interaction with aufs. It means aufs_mmap() calls the branch fs's
+->mmap().
+This approach is simple and good, but there is one problem.
+Under /proc, several entries show the mmapped files by its path (with
+device and inode number), and the printed path will be the path on the
+branch fs's instead of virtual aufs's.
+This is not a problem in most cases, but some utilities lsof(1) (and its
+user) may expect the path on aufs.
+
+To address this issue, aufs adds a new member called vm_prfile in struct
+vm_area_struct (and struct vm_region). The original vm_file points to
+the file on the branch fs in order to handle everything correctly as
+usual. The new vm_prfile points to a virtual file in aufs, and the
+show-functions in procfs refers to vm_prfile if it is set.
+Also we need to maintain several other places where touching vm_file
+such like
+- fork()/clone() copies vma and the reference count of vm_file is
+ incremented.
+- merging vma maintains the ref count too.
+
+This is not a good approach. It just fakes the printed path. But it
+leaves all behaviour around f_mapping unchanged. This is surely an
+advantage.
+Actually aufs had adopted another complicated approach which calls
+generic_file_mmap() and handles struct vm_operations_struct. In this
+approach, aufs met a hard problem and I could not solve it without
+switching the approach.
+
+There may be one more another approach which is
+- bind-mount the branch-root onto the aufs-root internally
+- grab the new vfsmount (ie. struct mount)
+- lazy-umount the branch-root internally
+- in open(2) the aufs-file, open the branch-file with the hidden
+ vfsmount (instead of the original branch's vfsmount)
+- ideally this "bind-mount and lazy-umount" should be done atomically,
+ but it may be possible from userspace by the mount helper.
+
+Adding the internal hidden vfsmount and using it in opening a file, the
+file path under /proc will be printed correctly. This approach looks
+smarter, but is not possible I am afraid.
+- aufs-root may be bind-mount later. when it happens, another hidden
+ vfsmount will be required.
+- it is hard to get the chance to bind-mount and lazy-umount
+ + in kernel-space, FS can have vfsmount in open(2) via
+ file->f_path, and aufs can know its vfsmount. But several locks are
+ already acquired, and if aufs tries to bind-mount and lazy-umount
+ here, then it may cause a deadlock.
+ + in user-space, bind-mount doesn't invoke the mount helper.
+- since /proc shows dev and ino, aufs has to give vma these info. it
+ means a new member vm_prinode will be necessary. this is essentially
+ equivalent to vm_prfile described above.
+
+I have to give up this "looks-smater" approach.
diff --git a/Documentation/filesystems/aufs/design/06xattr.txt b/Documentation/filesystems/aufs/design/06xattr.txt
new file mode 100644
index 000000000..8aad929b8
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/06xattr.txt
@@ -0,0 +1,96 @@
+
+# Copyright (C) 2014-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Listing XATTR/EA and getting the value
+----------------------------------------------------------------------
+For the inode standard attributes (owner, group, timestamps, etc.), aufs
+shows the values from the topmost existing file. This behaviour is good
+for the non-dir entries since the bahaviour exactly matches the shown
+information. But for the directories, aufs considers all the same named
+entries on the lower branches. Which means, if one of the lower entry
+rejects readdir call, then aufs returns an error even if the topmost
+entry allows it. This behaviour is necessary to respect the branch fs's
+security, but can make users confused since the user-visible standard
+attributes don't match the behaviour.
+To address this issue, aufs has a mount option called dirperm1 which
+checks the permission for the topmost entry only, and ignores the lower
+entry's permission.
+
+A similar issue can happen around XATTR.
+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
+always set. Otherwise these very unpleasant situation would happen.
+- listxattr(2) may return the duplicated entries.
+- users may not be able to remove or reset the XATTR forever,
+
+
+XATTR/EA support in the internal (copy,move)-(up,down)
+----------------------------------------------------------------------
+Generally the extended attributes of inode are categorized as these.
+- "security" for LSM and capability.
+- "system" for posix ACL, 'acl' mount option is required for the branch
+ fs generally.
+- "trusted" for userspace, CAP_SYS_ADMIN is required.
+- "user" for userspace, 'user_xattr' mount option is required for the
+ branch fs generally.
+
+Moreover there are some other categories. Aufs handles these rather
+unpopular categories as the ordinary ones, ie. there is no special
+condition nor exception.
+
+In copy-up, the support for XATTR on the dst branch may differ from the
+src branch. In this case, the copy-up operation will get an error and
+the original user operation which triggered the copy-up will fail. It
+can happen that even all copy-up will fail.
+When both of src and dst branches support XATTR and if an error occurs
+during copying XATTR, then the copy-up should fail obviously. That is a
+good reason and aufs should return an error to userspace. But when only
+the src branch support that XATTR, aufs should not return an error.
+For example, the src branch supports ACL but the dst branch doesn't
+because the dst branch may natively un-support it or temporary
+un-support it due to "noacl" mount option. Of course, the dst branch fs
+may NOT return an error even if the XATTR is not supported. It is
+totally up to the branch fs.
+
+Anyway when the aufs internal copy-up gets an error from the dst branch
+fs, then aufs tries removing the just copied entry and returns the error
+to the userspace. The worst case of this situation will be all copy-up
+will fail.
+
+For the copy-up operation, there two basic approaches.
+- copy the specified XATTR only (by category above), and return the
+ error unconditionally if it happens.
+- copy all XATTR, and ignore the error on the specified category only.
+
+In order to support XATTR and to implement the correct behaviour, aufs
+chooses the latter approach and introduces some new branch attributes,
+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
+They correspond to the XATTR namespaces (see above). Additionally, to be
+convenient, "icex" is also provided which means all "icex*" attributes
+are set (here the word "icex" stands for "ignore copy-error on XATTR").
+
+The meaning of these attributes is to ignore the error from setting
+XATTR on that branch.
+Note that aufs tries copying all XATTR unconditionally, and ignores the
+error from the dst branch according to the specified attributes.
+
+Some XATTR may have its default value. The default value may come from
+the parent dir or the environment. If the default value is set at the
+file creating-time, it will be overwritten by copy-up.
+Some contradiction may happen I am afraid.
+Do we need another attribute to stop copying XATTR? I am unsure. For
+now, aufs implements the branch attributes to ignore the error.
diff --git a/Documentation/filesystems/aufs/design/07export.txt b/Documentation/filesystems/aufs/design/07export.txt
new file mode 100644
index 000000000..b25dd950c
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/07export.txt
@@ -0,0 +1,58 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Export Aufs via NFS
+----------------------------------------------------------------------
+Here is an approach.
+- like xino/xib, add a new file 'xigen' which stores aufs inode
+ generation.
+- iget_locked(): initialize aufs inode generation for a new inode, and
+ store it in xigen file.
+- destroy_inode(): increment aufs inode generation and store it in xigen
+ file. it is necessary even if it is not unlinked, because any data of
+ inode may be changed by UDBA.
+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
+ build file handle by
+ + branch id (4 bytes)
+ + superblock generation (4 bytes)
+ + inode number (4 or 8 bytes)
+ + parent dir inode number (4 or 8 bytes)
+ + inode generation (4 bytes))
+ + return value of exportfs_encode_fh() for the parent on a branch (4
+ bytes)
+ + file handle for a branch (by exportfs_encode_fh())
+- fh_to_dentry():
+ + find the index of a branch from its id in handle, and check it is
+ still exist in aufs.
+ + 1st level: get the inode number from handle and search it in cache.
+ + 2nd level: if not found in cache, get the parent inode number from
+ the handle and search it in cache. and then open the found parent
+ dir, find the matching inode number by vfs_readdir() and get its
+ name, and call lookup_one_len() for the target dentry.
+ + 3rd level: if the parent dir is not cached, call
+ exportfs_decode_fh() for a branch and get the parent on a branch,
+ build a pathname of it, convert it a pathname in aufs, call
+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
+ the 2nd level.
+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
+ for every branch, but not itself. to get this, (currently) aufs
+ searches in current->nsproxy->mnt_ns list. it may not be a good
+ idea, but I didn't get other approach.
+ + test the generation of the gotten inode.
+- every inode operation: they may get EBUSY due to UDBA. in this case,
+ convert it into ESTALE for NFSD.
+- readdir(): call lockdep_on/off() because filldir in NFSD calls
+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
diff --git a/Documentation/filesystems/aufs/design/08shwh.txt b/Documentation/filesystems/aufs/design/08shwh.txt
new file mode 100644
index 000000000..a97a7987b
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/08shwh.txt
@@ -0,0 +1,52 @@
+
+# Copyright (C) 2005-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Show Whiteout Mode (shwh)
+----------------------------------------------------------------------
+Generally aufs hides the name of whiteouts. But in some cases, to show
+them is very useful for users. For instance, creating a new middle layer
+(branch) by merging existing layers.
+
+(borrowing aufs1 HOW-TO from a user, Michael Towers)
+When you have three branches,
+- Bottom: 'system', squashfs (underlying base system), read-only
+- Middle: 'mods', squashfs, read-only
+- Top: 'overlay', ram (tmpfs), read-write
+
+The top layer is loaded at boot time and saved at shutdown, to preserve
+the changes made to the system during the session.
+When larger changes have been made, or smaller changes have accumulated,
+the size of the saved top layer data grows. At this point, it would be
+nice to be able to merge the two overlay branches ('mods' and 'overlay')
+and rewrite the 'mods' squashfs, clearing the top layer and thus
+restoring save and load speed.
+
+This merging is simplified by the use of another aufs mount, of just the
+two overlay branches using the 'shwh' option.
+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
+ aufs /livesys/merge_union
+
+A merged view of these two branches is then available at
+/livesys/merge_union, and the new feature is that the whiteouts are
+visible!
+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
+writing to all branches. Also the default mode for all branches is 'ro'.
+It is now possible to save the combined contents of the two overlay
+branches to a new squashfs, e.g.:
+# mksquashfs /livesys/merge_union /path/to/newmods.squash
+
+This new squashfs archive can be stored on the boot device and the
+initramfs will use it to replace the old one at the next boot.
diff --git a/Documentation/filesystems/aufs/design/10dynop.txt b/Documentation/filesystems/aufs/design/10dynop.txt
new file mode 100644
index 000000000..04c35f5ac
--- /dev/null
+++ b/Documentation/filesystems/aufs/design/10dynop.txt
@@ -0,0 +1,47 @@
+
+# Copyright (C) 2010-2015 Junjiro R. Okajima
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Dynamically customizable FS operations
+----------------------------------------------------------------------
+Generally FS operations (struct inode_operations, struct
+address_space_operations, struct file_operations, etc.) are defined as
+"static const", but it never means that FS have only one set of
+operation. Some FS have multiple sets of them. For instance, ext2 has
+three sets, one for XIP, for NOBH, and for normal.
+Since aufs overrides and redirects these operations, sometimes aufs has
+to change its behaviour according to the branch FS type. More importantly
+VFS acts differently if a function (member in the struct) is set or
+not. It means aufs should have several sets of operations and select one
+among them according to the branch FS definition.
+
+In order to solve this problem and not to affect the behaviour of VFS,
+aufs defines these operations dynamically. For instance, aufs defines
+dummy direct_IO function for struct address_space_operations, but it may
+not be set to the address_space_operations actually. When the branch FS
+doesn't have it, aufs doesn't set it to its address_space_operations
+while the function definition itself is still alive. So the behaviour
+itself will not change, and it will return an error when direct_IO is
+not set.
+
+The lifetime of these dynamically generated operation object is
+maintained by aufs branch object. When the branch is removed from aufs,
+the reference counter of the object is decremented. When it reaches
+zero, the dynamically generated operation object will be freed.
+
+This approach is designed to support AIO (io_submit), Direct I/O and
+XIP (DAX) mainly.
+Currently this approach is applied to address_space_operations for
+regular files only.
diff --git a/Documentation/filesystems/autofs4-mount-control.txt b/Documentation/filesystems/autofs4-mount-control.txt
new file mode 100644
index 000000000..aff22113a
--- /dev/null
+++ b/Documentation/filesystems/autofs4-mount-control.txt
@@ -0,0 +1,393 @@
+
+Miscellaneous Device control operations for the autofs4 kernel module
+====================================================================
+
+The problem
+===========
+
+There is a problem with active restarts in autofs (that is to say
+restarting autofs when there are busy mounts).
+
+During normal operation autofs uses a file descriptor opened on the
+directory that is being managed in order to be able to issue control
+operations. Using a file descriptor gives ioctl operations access to
+autofs specific information stored in the super block. The operations
+are things such as setting an autofs mount catatonic, setting the
+expire timeout and requesting expire checks. As is explained below,
+certain types of autofs triggered mounts can end up covering an autofs
+mount itself which prevents us being able to use open(2) to obtain a
+file descriptor for these operations if we don't already have one open.
+
+Currently autofs uses "umount -l" (lazy umount) to clear active mounts
+at restart. While using lazy umount works for most cases, anything that
+needs to walk back up the mount tree to construct a path, such as
+getcwd(2) and the proc file system /proc/<pid>/cwd, no longer works
+because the point from which the path is constructed has been detached
+from the mount tree.
+
+The actual problem with autofs is that it can't reconnect to existing
+mounts. Immediately one thinks of just adding the ability to remount
+autofs file systems would solve it, but alas, that can't work. This is
+because autofs direct mounts and the implementation of "on demand mount
+and expire" of nested mount trees have the file system mounted directly
+on top of the mount trigger directory dentry.
+
+For example, there are two types of automount maps, direct (in the kernel
+module source you will see a third type called an offset, which is just
+a direct mount in disguise) and indirect.
+
+Here is a master map with direct and indirect map entries:
+
+/- /etc/auto.direct
+/test /etc/auto.indirect
+
+and the corresponding map files:
+
+/etc/auto.direct:
+
+/automount/dparse/g6 budgie:/autofs/export1
+/automount/dparse/g1 shark:/autofs/export1
+and so on.
+
+/etc/auto.indirect:
+
+g1 shark:/autofs/export1
+g6 budgie:/autofs/export1
+and so on.
+
+For the above indirect map an autofs file system is mounted on /test and
+mounts are triggered for each sub-directory key by the inode lookup
+operation. So we see a mount of shark:/autofs/export1 on /test/g1, for
+example.
+
+The way that direct mounts are handled is by making an autofs mount on
+each full path, such as /automount/dparse/g1, and using it as a mount
+trigger. So when we walk on the path we mount shark:/autofs/export1 "on
+top of this mount point". Since these are always directories we can
+use the follow_link inode operation to trigger the mount.
+
+But, each entry in direct and indirect maps can have offsets (making
+them multi-mount map entries).
+
+For example, an indirect mount map entry could also be:
+
+g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export1 \
+ /s2/ss2 shark:/autofs/export2
+
+and a similarly a direct mount map entry could also be:
+
+/automount/dparse/g1 \
+ / shark:/autofs/export5/testing/test \
+ /s1 shark:/autofs/export/testing/test/s1 \
+ /s2 shark:/autofs/export5/testing/test/s2 \
+ /s1/ss1 shark:/autofs/export2 \
+ /s2/ss2 shark:/autofs/export2
+
+One of the issues with version 4 of autofs was that, when mounting an
+entry with a large number of offsets, possibly with nesting, we needed
+to mount and umount all of the offsets as a single unit. Not really a
+problem, except for people with a large number of offsets in map entries.
+This mechanism is used for the well known "hosts" map and we have seen
+cases (in 2.4) where the available number of mounts are exhausted or
+where the number of privileged ports available is exhausted.
+
+In version 5 we mount only as we go down the tree of offsets and
+similarly for expiring them which resolves the above problem. There is
+somewhat more detail to the implementation but it isn't needed for the
+sake of the problem explanation. The one important detail is that these
+offsets are implemented using the same mechanism as the direct mounts
+above and so the mount points can be covered by a mount.
+
+The current autofs implementation uses an ioctl file descriptor opened
+on the mount point for control operations. The references held by the
+descriptor are accounted for in checks made to determine if a mount is
+in use and is also used to access autofs file system information held
+in the mount super block. So the use of a file handle needs to be
+retained.
+
+
+The Solution
+============
+
+To be able to restart autofs leaving existing direct, indirect and
+offset mounts in place we need to be able to obtain a file handle
+for these potentially covered autofs mount points. Rather than just
+implement an isolated operation it was decided to re-implement the
+existing ioctl interface and add new operations to provide this
+functionality.
+
+In addition, to be able to reconstruct a mount tree that has busy mounts,
+the uid and gid of the last user that triggered the mount needs to be
+available because these can be used as macro substitution variables in
+autofs maps. They are recorded at mount request time and an operation
+has been added to retrieve them.
+
+Since we're re-implementing the control interface, a couple of other
+problems with the existing interface have been addressed. First, when
+a mount or expire operation completes a status is returned to the
+kernel by either a "send ready" or a "send fail" operation. The
+"send fail" operation of the ioctl interface could only ever send
+ENOENT so the re-implementation allows user space to send an actual
+status. Another expensive operation in user space, for those using
+very large maps, is discovering if a mount is present. Usually this
+involves scanning /proc/mounts and since it needs to be done quite
+often it can introduce significant overhead when there are many entries
+in the mount table. An operation to lookup the mount status of a mount
+point dentry (covered or not) has also been added.
+
+Current kernel development policy recommends avoiding the use of the
+ioctl mechanism in favor of systems such as Netlink. An implementation
+using this system was attempted to evaluate its suitability and it was
+found to be inadequate, in this case. The Generic Netlink system was
+used for this as raw Netlink would lead to a significant increase in
+complexity. There's no question that the Generic Netlink system is an
+elegant solution for common case ioctl functions but it's not a complete
+replacement probably because its primary purpose in life is to be a
+message bus implementation rather than specifically an ioctl replacement.
+While it would be possible to work around this there is one concern
+that lead to the decision to not use it. This is that the autofs
+expire in the daemon has become far to complex because umount
+candidates are enumerated, almost for no other reason than to "count"
+the number of times to call the expire ioctl. This involves scanning
+the mount table which has proved to be a big overhead for users with
+large maps. The best way to improve this is try and get back to the
+way the expire was done long ago. That is, when an expire request is
+issued for a mount (file handle) we should continually call back to
+the daemon until we can't umount any more mounts, then return the
+appropriate status to the daemon. At the moment we just expire one
+mount at a time. A Generic Netlink implementation would exclude this
+possibility for future development due to the requirements of the
+message bus architecture.
+
+
+autofs4 Miscellaneous Device mount control interface
+====================================================
+
+The control interface is opening a device node, typically /dev/autofs.
+
+All the ioctls use a common structure to pass the needed parameter
+information and return operation results:
+
+struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ __u32 arg1; /* Command parameters */
+ __u32 arg2;
+
+ char path[0];
+};
+
+The ioctlfd field is a mount point file descriptor of an autofs mount
+point. It is returned by the open call and is used by all calls except
+the check for whether a given path is a mount point, where it may
+optionally be used to check a specific mount corresponding to a given
+mount point file descriptor, and when requesting the uid and gid of the
+last successful mount on a directory within the autofs file system.
+
+The fields arg1 and arg2 are used to communicate parameters and results of
+calls made as described below.
+
+The path field is used to pass a path where it is needed and the size field
+is used account for the increased structure length when translating the
+structure sent from user space.
+
+This structure can be initialized before setting specific fields by using
+the void function call init_autofs_dev_ioctl(struct autofs_dev_ioctl *).
+
+All of the ioctls perform a copy of this structure from user space to
+kernel space and return -EINVAL if the size parameter is smaller than
+the structure size itself, -ENOMEM if the kernel memory allocation fails
+or -EFAULT if the copy itself fails. Other checks include a version check
+of the compiled in user space version against the module version and a
+mismatch results in a -EINVAL return. If the size field is greater than
+the structure size then a path is assumed to be present and is checked to
+ensure it begins with a "/" and is NULL terminated, otherwise -EINVAL is
+returned. Following these checks, for all ioctl commands except
+AUTOFS_DEV_IOCTL_VERSION_CMD, AUTOFS_DEV_IOCTL_OPENMOUNT_CMD and
+AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD the ioctlfd is validated and if it is
+not a valid descriptor or doesn't correspond to an autofs mount point
+an error of -EBADF, -ENOTTY or -EINVAL (not an autofs descriptor) is
+returned.
+
+
+The ioctls
+==========
+
+An example of an implementation which uses this interface can be seen
+in autofs version 5.0.4 and later in file lib/dev-ioctl-lib.c of the
+distribution tar available for download from kernel.org in directory
+/pub/linux/daemons/autofs/v5.
+
+The device node ioctl operations implemented by this interface are:
+
+
+AUTOFS_DEV_IOCTL_VERSION
+------------------------
+
+Get the major and minor version of the autofs4 device ioctl kernel module
+implementation. It requires an initialized struct autofs_dev_ioctl as an
+input parameter and sets the version information in the passed in structure.
+It returns 0 on success or the error -EINVAL if a version mismatch is
+detected.
+
+
+AUTOFS_DEV_IOCTL_PROTOVER_CMD and AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD
+------------------------------------------------------------------
+
+Get the major and minor version of the autofs4 protocol version understood
+by loaded module. This call requires an initialized struct autofs_dev_ioctl
+with the ioctlfd field set to a valid autofs mount point descriptor
+and sets the requested version number in structure field arg1. These
+commands return 0 on success or one of the negative error codes if
+validation fails.
+
+
+AUTOFS_DEV_IOCTL_OPENMOUNT and AUTOFS_DEV_IOCTL_CLOSEMOUNT
+----------------------------------------------------------
+
+Obtain and release a file descriptor for an autofs managed mount point
+path. The open call requires an initialized struct autofs_dev_ioctl with
+the path field set and the size field adjusted appropriately as well
+as the arg1 field set to the device number of the autofs mount. The
+device number can be obtained from the mount options shown in
+/proc/mounts. The close call requires an initialized struct
+autofs_dev_ioct with the ioctlfd field set to the descriptor obtained
+from the open call. The release of the file descriptor can also be done
+with close(2) so any open descriptors will also be closed at process exit.
+The close call is included in the implemented operations largely for
+completeness and to provide for a consistent user space implementation.
+
+
+AUTOFS_DEV_IOCTL_READY_CMD and AUTOFS_DEV_IOCTL_FAIL_CMD
+--------------------------------------------------------
+
+Return mount and expire result status from user space to the kernel.
+Both of these calls require an initialized struct autofs_dev_ioctl
+with the ioctlfd field set to the descriptor obtained from the open
+call and the arg1 field set to the wait queue token number, received
+by user space in the foregoing mount or expire request. The arg2 field
+is set to the status to be returned. For the ready call this is always
+0 and for the fail call it is set to the errno of the operation.
+
+
+AUTOFS_DEV_IOCTL_SETPIPEFD_CMD
+------------------------------
+
+Set the pipe file descriptor used for kernel communication to the daemon.
+Normally this is set at mount time using an option but when reconnecting
+to a existing mount we need to use this to tell the autofs mount about
+the new kernel pipe descriptor. In order to protect mounts against
+incorrectly setting the pipe descriptor we also require that the autofs
+mount be catatonic (see next call).
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call and
+the arg1 field set to descriptor of the pipe. On success the call
+also sets the process group id used to identify the controlling process
+(eg. the owning automount(8) daemon) to the process group of the caller.
+
+
+AUTOFS_DEV_IOCTL_CATATONIC_CMD
+------------------------------
+
+Make the autofs mount point catatonic. The autofs mount will no longer
+issue mount requests, the kernel communication pipe descriptor is released
+and any remaining waits in the queue released.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call.
+
+
+AUTOFS_DEV_IOCTL_TIMEOUT_CMD
+----------------------------
+
+Set the expire timeout for mounts within an autofs mount point.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call.
+
+
+AUTOFS_DEV_IOCTL_REQUESTER_CMD
+------------------------------
+
+Return the uid and gid of the last process to successfully trigger a the
+mount on the given path dentry.
+
+The call requires an initialized struct autofs_dev_ioctl with the path
+field set to the mount point in question and the size field adjusted
+appropriately as well as the arg1 field set to the device number of the
+containing autofs mount. Upon return the struct field arg1 contains the
+uid and arg2 the gid.
+
+When reconstructing an autofs mount tree with active mounts we need to
+re-connect to mounts that may have used the original process uid and
+gid (or string variations of them) for mount lookups within the map entry.
+This call provides the ability to obtain this uid and gid so they may be
+used by user space for the mount map lookups.
+
+
+AUTOFS_DEV_IOCTL_EXPIRE_CMD
+---------------------------
+
+Issue an expire request to the kernel for an autofs mount. Typically
+this ioctl is called until no further expire candidates are found.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call. In
+addition an immediate expire, independent of the mount timeout, can be
+requested by setting the arg1 field to 1. If no expire candidates can
+be found the ioctl returns -1 with errno set to EAGAIN.
+
+This call causes the kernel module to check the mount corresponding
+to the given ioctlfd for mounts that can be expired, issues an expire
+request back to the daemon and waits for completion.
+
+AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD
+------------------------------
+
+Checks if an autofs mount point is in use.
+
+The call requires an initialized struct autofs_dev_ioctl with the
+ioctlfd field set to the descriptor obtained from the open call and
+it returns the result in the arg1 field, 1 for busy and 0 otherwise.
+
+
+AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD
+---------------------------------
+
+Check if the given path is a mountpoint.
+
+The call requires an initialized struct autofs_dev_ioctl. There are two
+possible variations. Both use the path field set to the path of the mount
+point to check and the size field adjusted appropriately. One uses the
+ioctlfd field to identify a specific mount point to check while the other
+variation uses the path and optionally arg1 set to an autofs mount type.
+The call returns 1 if this is a mount point and sets arg1 to the device
+number of the mount and field arg2 to the relevant super block magic
+number (described below) or 0 if it isn't a mountpoint. In both cases
+the the device number (as returned by new_encode_dev()) is returned
+in field arg1.
+
+If supplied with a file descriptor we're looking for a specific mount,
+not necessarily at the top of the mounted stack. In this case the path
+the descriptor corresponds to is considered a mountpoint if it is itself
+a mountpoint or contains a mount, such as a multi-mount without a root
+mount. In this case we return 1 if the descriptor corresponds to a mount
+point and and also returns the super magic of the covering mount if there
+is one or 0 if it isn't a mountpoint.
+
+If a path is supplied (and the ioctlfd field is set to -1) then the path
+is looked up and is checked to see if it is the root of a mount. If a
+type is also given we are looking for a particular autofs mount and if
+a match isn't found a fail is returned. If the the located path is the
+root of a mount 1 is returned along with the super magic of the mount
+or 0 otherwise.
+
diff --git a/Documentation/filesystems/autofs4.txt b/Documentation/filesystems/autofs4.txt
new file mode 100644
index 000000000..39d02e19f
--- /dev/null
+++ b/Documentation/filesystems/autofs4.txt
@@ -0,0 +1,520 @@
+<head>
+<style> p { max-width:50em} ol, ul {max-width: 40em}</style>
+</head>
+
+autofs - how it works
+=====================
+
+Purpose
+-------
+
+The goal of autofs is to provide on-demand mounting and race free
+automatic unmounting of various other filesystems. This provides two
+key advantages:
+
+1. There is no need to delay boot until all filesystems that
+ might be needed are mounted. Processes that try to access those
+ slow filesystems might be delayed but other processes can
+ continue freely. This is particularly important for
+ network filesystems (e.g. NFS) or filesystems stored on
+ media with a media-changing robot.
+
+2. The names and locations of filesystems can be stored in
+ a remote database and can change at any time. The content
+ in that data base at the time of access will be used to provide
+ a target for the access. The interpretation of names in the
+ filesystem can even be programmatic rather than database-backed,
+ allowing wildcards for example, and can vary based on the user who
+ first accessed a name.
+
+Context
+-------
+
+The "autofs4" filesystem module is only one part of an autofs system.
+There also needs to be a user-space program which looks up names
+and mounts filesystems. This will often be the "automount" program,
+though other tools including "systemd" can make use of "autofs4".
+This document describes only the kernel module and the interactions
+required with any user-space program. Subsequent text refers to this
+as the "automount daemon" or simply "the daemon".
+
+"autofs4" is a Linux kernel module with provides the "autofs"
+filesystem type. Several "autofs" filesystems can be mounted and they
+can each be managed separately, or all managed by the same daemon.
+
+Content
+-------
+
+An autofs filesystem can contain 3 sorts of objects: directories,
+symbolic links and mount traps. Mount traps are directories with
+extra properties as described in the next section.
+
+Objects can only be created by the automount daemon: symlinks are
+created with a regular `symlink` system call, while directories and
+mount traps are created with `mkdir`. The determination of whether a
+directory should be a mount trap or not is quite _ad hoc_, largely for
+historical reasons, and is determined in part by the
+*direct*/*indirect*/*offset* mount options, and the *maxproto* mount option.
+
+If neither the *direct* or *offset* mount options are given (so the
+mount is considered to be *indirect*), then the root directory is
+always a regular directory, otherwise it is a mount trap when it is
+empty and a regular directory when not empty. Note that *direct* and
+*offset* are treated identically so a concise summary is that the root
+directory is a mount trap only if the filesystem is mounted *direct*
+and the root is empty.
+
+Directories created in the root directory are mount traps only if the
+filesystem is mounted *indirect* and they are empty.
+
+Directories further down the tree depend on the *maxproto* mount
+option and particularly whether it is less than five or not.
+When *maxproto* is five, no directories further down the
+tree are ever mount traps, they are always regular directories. When
+the *maxproto* is four (or three), these directories are mount traps
+precisely when they are empty.
+
+So: non-empty (i.e. non-leaf) directories are never mount traps. Empty
+directories are sometimes mount traps, and sometimes not depending on
+where in the tree they are (root, top level, or lower), the *maxproto*,
+and whether the mount was *indirect* or not.
+
+Mount Traps
+---------------
+
+A core element of the implementation of autofs is the Mount Traps
+which are provided by the Linux VFS. Any directory provided by a
+filesystem can be designated as a trap. This involves two separate
+features that work together to allow autofs to do its job.
+
+**DCACHE_NEED_AUTOMOUNT**
+
+If a dentry has the DCACHE_NEED_AUTOMOUNT flag set (which gets set if
+the inode has S_AUTOMOUNT set, or can be set directly) then it is
+(potentially) a mount trap. Any access to this directory beyond a
+"`stat`" will (normally) cause the `d_op->d_automount()` dentry operation
+to be called. The task of this method is to find the filesystem that
+should be mounted on the directory and to return it. The VFS is
+responsible for actually mounting the root of this filesystem on the
+directory.
+
+autofs doesn't find the filesystem itself but sends a message to the
+automount daemon asking it to find and mount the filesystem. The
+autofs `d_automount` method then waits for the daemon to report that
+everything is ready. It will then return "`NULL`" indicating that the
+mount has already happened. The VFS doesn't try to mount anything but
+follows down the mount that is already there.
+
+This functionality is sufficient for some users of mount traps such
+as NFS which creates traps so that mountpoints on the server can be
+reflected on the client. However it is not sufficient for autofs. As
+mounting onto a directory is considered to be "beyond a `stat`", the
+automount daemon would not be able to mount a filesystem on the 'trap'
+directory without some way to avoid getting caught in the trap. For
+that purpose there is another flag.
+
+**DCACHE_MANAGE_TRANSIT**
+
+If a dentry has DCACHE_MANAGE_TRANSIT set then two very different but
+related behaviors are invoked, both using the `d_op->d_manage()`
+dentry operation.
+
+Firstly, before checking to see if any filesystem is mounted on the
+directory, d_manage() will be called with the `rcu_walk` parameter set
+to `false`. It may return one of three things:
+
+- A return value of zero indicates that there is nothing special
+ about this dentry and normal checks for mounts and automounts
+ should proceed.
+
+ autofs normally returns zero, but first waits for any
+ expiry (automatic unmounting of the mounted filesystem) to
+ complete. This avoids races.
+
+- A return value of `-EISDIR` tells the VFS to ignore any mounts
+ on the directory and to not consider calling `->d_automount()`.
+ This effectively disables the **DCACHE_NEED_AUTOMOUNT** flag
+ causing the directory not be a mount trap after all.
+
+ autofs returns this if it detects that the process performing the
+ lookup is the automount daemon and that the mount has been
+ requested but has not yet completed. How it determines this is
+ discussed later. This allows the automount daemon not to get
+ caught in the mount trap.
+
+ There is a subtlety here. It is possible that a second autofs
+ filesystem can be mounted below the first and for both of them to
+ be managed by the same daemon. For the daemon to be able to mount
+ something on the second it must be able to "walk" down past the
+ first. This means that d_manage cannot *always* return -EISDIR for
+ the automount daemon. It must only return it when a mount has
+ been requested, but has not yet completed.
+
+ `d_manage` also returns `-EISDIR` if the dentry shouldn't be a
+ mount trap, either because it is a symbolic link or because it is
+ not empty.
+
+- Any other negative value is treated as an error and returned
+ to the caller.
+
+ autofs can return
+
+ - -ENOENT if the automount daemon failed to mount anything,
+ - -ENOMEM if it ran out of memory,
+ - -EINTR if a signal arrived while waiting for expiry to
+ complete
+ - or any other error sent down by the automount daemon.
+
+
+The second use case only occurs during an "RCU-walk" and so `rcu_walk`
+will be set.
+
+An RCU-walk is a fast and lightweight process for walking down a
+filename path (i.e. it is like running on tip-toes). RCU-walk cannot
+cope with all situations so when it finds a difficulty it falls back
+to "REF-walk", which is slower but more robust.
+
+RCU-walk will never call `->d_automount`; the filesystems must already
+be mounted or RCU-walk cannot handle the path.
+To determine if a mount-trap is safe for RCU-walk mode it calls
+`->d_manage()` with `rcu_walk` set to `true`.
+
+In this case `d_manage()` must avoid blocking and should avoid taking
+spinlocks if at all possible. Its sole purpose is to determine if it
+would be safe to follow down into any mounted directory and the only
+reason that it might not be is if an expiry of the mount is
+underway.
+
+In the `rcu_walk` case, `d_manage()` cannot return -EISDIR to tell the
+VFS that this is a directory that doesn't require d_automount. If
+`rcu_walk` sees a dentry with DCACHE_NEED_AUTOMOUNT set but nothing
+mounted, it *will* fall back to REF-walk. `d_manage()` cannot make the
+VFS remain in RCU-walk mode, but can only tell it to get out of
+RCU-walk mode by returning `-ECHILD`.
+
+So `d_manage()`, when called with `rcu_walk` set, should either return
+-ECHILD if there is any reason to believe it is unsafe to end the
+mounted filesystem, and otherwise should return 0.
+
+autofs will return `-ECHILD` if an expiry of the filesystem has been
+initiated or is being considered, otherwise it returns 0.
+
+
+Mountpoint expiry
+-----------------
+
+The VFS has a mechansim for automatically expiring unused mounts,
+much as it can expire any unused dentry information from the dcache.
+This is guided by the MNT_SHRINKABLE flag. This only applies to
+mounts that were created by `d_automount()` returning a filesystem to be
+mounted. As autofs doesn't return such a filesystem but leaves the
+mounting to the automount daemon, it must involve the automount daemon
+in unmounting as well. This also means that autofs has more control
+of expiry.
+
+The VFS also supports "expiry" of mounts using the MNT_EXPIRE flag to
+the `umount` system call. Unmounting with MNT_EXPIRE will fail unless
+a previous attempt had been made, and the filesystem has been inactive
+and untouched since that previous attempt. autofs4 does not depend on
+this but has its own internal tracking of whether filesystems were
+recently used. This allows individual names in the autofs directory
+to expire separately.
+
+With version 4 of the protocol, the automount daemon can try to
+unmount any filesystems mounted on the autofs filesystem or remove any
+symbolic links or empty directories any time it likes. If the unmount
+or removal is successful the filesystem will be returned to the state
+it was before the mount or creation, so that any access of the name
+will trigger normal auto-mount processing. In particlar, `rmdir` and
+`unlink` do not leave negative entries in the dcache as a normal
+filesystem would, so an attempt to access a recently-removed object is
+passed to autofs for handling.
+
+With version 5, this is not safe except for unmounting from top-level
+directories. As lower-level directories are never mount traps, other
+processes will see an empty directory as soon as the filesystem is
+unmounted. So it is generally safest to use the autofs expiry
+protocol described below.
+
+Normally the daemon only wants to remove entries which haven't been
+used for a while. For this purpose autofs maintains a "`last_used`"
+time stamp on each directory or symlink. For symlinks it genuinely
+does record the last time the symlink was "used" or followed to find
+out where it points to. For directories the field is a slight
+misnomer. It actually records the last time that autofs checked if
+the directory or one of its descendents was busy and found that it
+was. This is just as useful and doesn't require updating the field so
+often.
+
+The daemon is able to ask autofs if anything is due to be expired,
+using an `ioctl` as discussed later. For a *direct* mount, autofs
+considers if the entire mount-tree can be unmounted or not. For an
+*indirect* mount, autofs considers each of the names in the top level
+directory to determine if any of those can be unmounted and cleaned
+up.
+
+There is an option with indirect mounts to consider each of the leaves
+that has been mounted on instead of considering the top-level names.
+This is intended for compatability with version 4 of autofs and should
+be considered as deprecated.
+
+When autofs considers a directory it checks the `last_used` time and
+compares it with the "timeout" value set when the filesystem was
+mounted, though this check is ignored in some cases. It also checks if
+the directory or anything below it is in use. For symbolic links,
+only the `last_used` time is ever considered.
+
+If both appear to support expiring the directory or symlink, an action
+is taken.
+
+There are two ways to ask autofs to consider expiry. The first is to
+use the **AUTOFS_IOC_EXPIRE** ioctl. This only works for indirect
+mounts. If it finds something in the root directory to expire it will
+return the name of that thing. Once a name has been returned the
+automount daemon needs to unmount any filesystems mounted below the
+name normally. As described above, this is unsafe for non-toplevel
+mounts in a version-5 autofs. For this reason the current `automountd`
+does not use this ioctl.
+
+The second mechanism uses either the **AUTOFS_DEV_IOCTL_EXPIRE_CMD** or
+the **AUTOFS_IOC_EXPIRE_MULTI** ioctl. This will work for both direct and
+indirect mounts. If it selects an object to expire, it will notify
+the daemon using the notification mechanism described below. This
+will block until the daemon acknowledges the expiry notification.
+This implies that the "`EXPIRE`" ioctl must be sent from a different
+thread than the one which handles notification.
+
+While the ioctl is blocking, the entry is marked as "expiring" and
+`d_manage` will block until the daemon affirms that the unmount has
+completed (together with removing any directories that might have been
+necessary), or has been aborted.
+
+Communicating with autofs: detecting the daemon
+-----------------------------------------------
+
+There are several forms of communication between the automount daemon
+and the filesystem. As we have already seen, the daemon can create and
+remove directories and symlinks using normal filesystem operations.
+autofs knows whether a process requesting some operation is the daemon
+or not based on its process-group id number (see getpgid(1)).
+
+When an autofs filesystem it mounted the pgid of the mounting
+processes is recorded unless the "pgrp=" option is given, in which
+case that number is recorded instead. Any request arriving from a
+process in that process group is considered to come from the daemon.
+If the daemon ever has to be stopped and restarted a new pgid can be
+provided through an ioctl as will be described below.
+
+Communicating with autofs: the event pipe
+-----------------------------------------
+
+When an autofs filesystem is mounted, the 'write' end of a pipe must
+be passed using the 'fd=' mount option. autofs will write
+notification messages to this pipe for the daemon to respond to.
+For version 5, the format of the message is:
+
+ struct autofs_v5_packet {
+ int proto_version; /* Protocol version */
+ int type; /* Type of packet */
+ autofs_wqt_t wait_queue_token;
+ __u32 dev;
+ __u64 ino;
+ __u32 uid;
+ __u32 gid;
+ __u32 pid;
+ __u32 tgid;
+ __u32 len;
+ char name[NAME_MAX+1];
+ };
+
+where the type is one of
+
+ autofs_ptype_missing_indirect
+ autofs_ptype_expire_indirect
+ autofs_ptype_missing_direct
+ autofs_ptype_expire_direct
+
+so messages can indicate that a name is missing (something tried to
+access it but it isn't there) or that it has been selected for expiry.
+
+The pipe will be set to "packet mode" (equivalent to passing
+`O_DIRECT`) to _pipe2(2)_ so that a read from the pipe will return at
+most one packet, and any unread portion of a packet will be discarded.
+
+The `wait_queue_token` is a unique number which can identify a
+particular request to be acknowledged. When a message is sent over
+the pipe the affected dentry is marked as either "active" or
+"expiring" and other accesses to it block until the message is
+acknowledged using one of the ioctls below and the relevant
+`wait_queue_token`.
+
+Communicating with autofs: root directory ioctls
+------------------------------------------------
+
+The root directory of an autofs filesystem will respond to a number of
+ioctls. The process issuing the ioctl must have the CAP_SYS_ADMIN
+capability, or must be the automount daemon.
+
+The available ioctl commands are:
+
+- **AUTOFS_IOC_READY**: a notification has been handled. The argument
+ to the ioctl command is the "wait_queue_token" number
+ corresponding to the notification being acknowledged.
+- **AUTOFS_IOC_FAIL**: similar to above, but indicates failure with
+ the error code `ENOENT`.
+- **AUTOFS_IOC_CATATONIC**: Causes the autofs to enter "catatonic"
+ mode meaning that it stops sending notifications to the daemon.
+ This mode is also entered if a write to the pipe fails.
+- **AUTOFS_IOC_PROTOVER**: This returns the protocol version in use.
+- **AUTOFS_IOC_PROTOSUBVER**: Returns the protocol sub-version which
+ is really a version number for the implementation. It is
+ currently 2.
+- **AUTOFS_IOC_SETTIMEOUT**: This passes a pointer to an unsigned
+ long. The value is used to set the timeout for expiry, and
+ the current timeout value is stored back through the pointer.
+- **AUTOFS_IOC_ASKUMOUNT**: Returns, in the pointed-to `int`, 1 if
+ the filesystem could be unmounted. This is only a hint as
+ the situation could change at any instant. This call can be
+ use to avoid a more expensive full unmount attempt.
+- **AUTOFS_IOC_EXPIRE**: as described above, this asks if there is
+ anything suitable to expire. A pointer to a packet:
+
+ struct autofs_packet_expire_multi {
+ int proto_version; /* Protocol version */
+ int type; /* Type of packet */
+ autofs_wqt_t wait_queue_token;
+ int len;
+ char name[NAME_MAX+1];
+ };
+
+ is required. This is filled in with the name of something
+ that can be unmounted or removed. If nothing can be expired,
+ `errno` is set to `EAGAIN`. Even though a `wait_queue_token`
+ is present in the structure, no "wait queue" is established
+ and no acknowledgment is needed.
+- **AUTOFS_IOC_EXPIRE_MULTI**: This is similar to
+ **AUTOFS_IOC_EXPIRE** except that it causes notification to be
+ sent to the daemon, and it blocks until the daemon acknowledges.
+ The argument is an integer which can contain two different flags.
+
+ **AUTOFS_EXP_IMMEDIATE** causes `last_used` time to be ignored
+ and objects are expired if the are not in use.
+
+ **AUTOFS_EXP_LEAVES** will select a leaf rather than a top-level
+ name to expire. This is only safe when *maxproto* is 4.
+
+Communicating with autofs: char-device ioctls
+---------------------------------------------
+
+It is not always possible to open the root of an autofs filesystem,
+particularly a *direct* mounted filesystem. If the automount daemon
+is restarted there is no way for it to regain control of existing
+mounts using any of the above communication channels. To address this
+need there is a "miscellaneous" character device (major 10, minor 235)
+which can be used to communicate directly with the autofs filesystem.
+It requires CAP_SYS_ADMIN for access.
+
+The `ioctl`s that can be used on this device are described in a separate
+document `autofs4-mount-control.txt`, and are summarized briefly here.
+Each ioctl is passed a pointer to an `autofs_dev_ioctl` structure:
+
+ struct autofs_dev_ioctl {
+ __u32 ver_major;
+ __u32 ver_minor;
+ __u32 size; /* total size of data passed in
+ * including this struct */
+ __s32 ioctlfd; /* automount command fd */
+
+ __u32 arg1; /* Command parameters */
+ __u32 arg2;
+
+ char path[0];
+ };
+
+For the **OPEN_MOUNT** and **IS_MOUNTPOINT** commands, the target
+filesystem is identified by the `path`. All other commands identify
+the filesystem by the `ioctlfd` which is a file descriptor open on the
+root, and which can be returned by **OPEN_MOUNT**.
+
+The `ver_major` and `ver_minor` are in/out parameters which check that
+the requested version is supported, and report the maximum version
+that the kernel module can support.
+
+Commands are:
+
+- **AUTOFS_DEV_IOCTL_VERSION_CMD**: does nothing, except validate and
+ set version numbers.
+- **AUTOFS_DEV_IOCTL_OPENMOUNT_CMD**: return an open file descriptor
+ on the root of an autofs filesystem. The filesystem is identified
+ by name and device number, which is stored in `arg1`. Device
+ numbers for existing filesystems can be found in
+ `/proc/self/mountinfo`.
+- **AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD**: same as `close(ioctlfd)`.
+- **AUTOFS_DEV_IOCTL_SETPIPEFD_CMD**: if the filesystem is in
+ catatonic mode, this can provide the write end of a new pipe
+ in `arg1` to re-establish communication with a daemon. The
+ process group of the calling process is used to identify the
+ daemon.
+- **AUTOFS_DEV_IOCTL_REQUESTER_CMD**: `path` should be a
+ name within the filesystem that has been auto-mounted on.
+ arg1 is the dev number of the underlying autofs. On successful
+ return, `arg1` and `arg2` will be the UID and GID of the process
+ which triggered that mount.
+
+- **AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD**: Check if path is a
+ mountpoint of a particular type - see separate documentation for
+ details.
+
+- **AUTOFS_DEV_IOCTL_PROTOVER_CMD**:
+- **AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD**:
+- **AUTOFS_DEV_IOCTL_READY_CMD**:
+- **AUTOFS_DEV_IOCTL_FAIL_CMD**:
+- **AUTOFS_DEV_IOCTL_CATATONIC_CMD**:
+- **AUTOFS_DEV_IOCTL_TIMEOUT_CMD**:
+- **AUTOFS_DEV_IOCTL_EXPIRE_CMD**:
+- **AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD**: These all have the same
+ function as the similarly named **AUTOFS_IOC** ioctls, except
+ that **FAIL** can be given an explicit error number in `arg1`
+ instead of assuming `ENOENT`, and this **EXPIRE** command
+ corresponds to **AUTOFS_IOC_EXPIRE_MULTI**.
+
+Catatonic mode
+--------------
+
+As mentioned, an autofs mount can enter "catatonic" mode. This
+happens if a write to the notification pipe fails, or if it is
+explicitly requested by an `ioctl`.
+
+When entering catatonic mode, the pipe is closed and any pending
+notifications are acknowledged with the error `ENOENT`.
+
+Once in catatonic mode attempts to access non-existing names will
+result in `ENOENT` while attempts to access existing directories will
+be treated in the same way as if they came from the daemon, so mount
+traps will not fire.
+
+When the filesystem is mounted a _uid_ and _gid_ can be given which
+set the ownership of directories and symbolic links. When the
+filesystem is in catatonic mode, any process with a matching UID can
+create directories or symlinks in the root directory, but not in other
+directories.
+
+Catatonic mode can only be left via the
+**AUTOFS_DEV_IOCTL_OPENMOUNT_CMD** ioctl on the `/dev/autofs`.
+
+autofs, name spaces, and shared mounts
+--------------------------------------
+
+With bind mounts and name spaces it is possible for an autofs
+filesystem to appear at multiple places in one or more filesystem
+name spaces. For this to work sensibly, the autofs filesystem should
+always be mounted "shared". e.g.
+
+> `mount --make-shared /autofs/mount/point`
+
+The automount daemon is only able to mange a single mount location for
+an autofs filesystem and if mounts on that are not 'shared', other
+locations will not behave as expected. In particular access to those
+other locations will likely result in the `ELOOP` error
+
+> Too many levels of symbolic links
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
new file mode 100644
index 000000000..7cac200e2
--- /dev/null
+++ b/Documentation/filesystems/automount-support.txt
@@ -0,0 +1,118 @@
+Support is available for filesystems that wish to do automounting support (such
+as kAFS which can be found in fs/afs/). This facility includes allowing
+in-kernel mounts to be performed and mountpoint degradation to be
+requested. The latter can also be requested by userspace.
+
+
+======================
+IN-KERNEL AUTOMOUNTING
+======================
+
+A filesystem can now mount another filesystem on one of its directories by the
+following procedure:
+
+ (1) Give the directory a follow_link() operation.
+
+ When the directory is accessed, the follow_link op will be called, and
+ it will be provided with the location of the mountpoint in the nameidata
+ structure (vfsmount and dentry).
+
+ (2) Have the follow_link() op do the following steps:
+
+ (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
+ superblock and gain a vfsmount structure representing it.
+
+ (b) Copy the nameidata provided as an argument and substitute the dentry
+ argument into it the copy.
+
+ (c) Call do_add_mount() to install the new vfsmount into the namespace's
+ mountpoint tree, thus making it accessible to userspace. Use the
+ nameidata set up in (b) as the destination.
+
+ If the mountpoint will be automatically expired, then do_add_mount()
+ should also be given the location of an expiration list (see further
+ down).
+
+ (d) Release the path in the nameidata argument and substitute in the new
+ vfsmount and its root dentry. The ref counts on these will need
+ incrementing.
+
+Then from userspace, you can just do something like:
+
+ [root@andromeda root]# mount -t afs \#root.afs. /afs
+ [root@andromeda root]# ls /afs
+ asd cambridge cambridge.redhat.com grand.central.org
+ [root@andromeda root]# ls /afs/cambridge
+ afsdoc
+ [root@andromeda root]# ls /afs/cambridge/afsdoc/
+ ChangeLog html LICENSE pdf RELNOTES-1.2.2
+
+And then if you look in the mountpoint catalogue, you'll see something like:
+
+ [root@andromeda root]# cat /proc/mounts
+ ...
+ #root.afs. /afs afs rw 0 0
+ #root.cell. /afs/cambridge.redhat.com afs rw 0 0
+ #afsdoc. /afs/cambridge.redhat.com/afsdoc afs rw 0 0
+
+
+===========================
+AUTOMATIC MOUNTPOINT EXPIRY
+===========================
+
+Automatic expiration of mountpoints is easy, provided you've mounted the
+mountpoint to be expired in the automounting procedure outlined above.
+
+To do expiration, you need to follow these steps:
+
+ (3) Create at least one list off which the vfsmounts to be expired can be
+ hung. Access to this list will be governed by the vfsmount_lock.
+
+ (4) In step (2c) above, the call to do_add_mount() should be provided with a
+ pointer to this list. It will hang the vfsmount off of it if it succeeds.
+
+ (5) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+ with a pointer to this list. This will process the list, marking every
+ vfsmount thereon for potential expiry on the next call.
+
+ If a vfsmount was already flagged for expiry, and if its usage count is 1
+ (it's only referenced by its parent vfsmount), then it will be deleted
+ from the namespace and thrown away (effectively unmounted).
+
+ It may prove simplest to simply call this at regular intervals, using
+ some sort of timed event to drive it.
+
+The expiration flag is cleared by calls to mntput. This means that expiration
+will only happen on the second expiration request after the last time the
+mountpoint was accessed.
+
+If a mountpoint is moved, it gets removed from the expiration list. If a bind
+mount is made on an expirable mount, the new vfsmount will not be on the
+expiration list and will not expire.
+
+If a namespace is copied, all mountpoints contained therein will be copied,
+and the copies of those that are on an expiration list will be added to the
+same expiration list.
+
+
+=======================
+USERSPACE DRIVEN EXPIRY
+=======================
+
+As an alternative, it is possible for userspace to request expiry of any
+mountpoint (though some will be rejected - the current process's idea of the
+rootfs for example). It does this by passing the MNT_EXPIRE flag to
+umount(). This flag is considered incompatible with MNT_FORCE and MNT_DETACH.
+
+If the mountpoint in question is in referenced by something other than
+umount() or its parent mountpoint, an EBUSY error will be returned and the
+mountpoint will not be marked for expiration or unmounted.
+
+If the mountpoint was not already marked for expiry at that time, an EAGAIN
+error will be given and it won't be unmounted.
+
+Otherwise if it was already marked and it wasn't referenced, unmounting will
+take place as usual.
+
+Again, the expiration flag is cleared every time anything other than umount()
+looks at a mountpoint.
diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.txt
new file mode 100644
index 000000000..da45e6c84
--- /dev/null
+++ b/Documentation/filesystems/befs.txt
@@ -0,0 +1,117 @@
+BeOS filesystem for Linux
+
+Document last updated: Dec 6, 2001
+
+WARNING
+=======
+Make sure you understand that this is alpha software. This means that the
+implementation is neither complete nor well-tested.
+
+I DISCLAIM ALL RESPONSIBILITY FOR ANY POSSIBLE BAD EFFECTS OF THIS CODE!
+
+LICENSE
+=====
+This software is covered by the GNU General Public License.
+See the file COPYING for the complete text of the license.
+Or the GNU website: <http://www.gnu.org/licenses/licenses.html>
+
+AUTHOR
+=====
+The largest part of the code written by Will Dyson <will_dyson@pobox.com>
+He has been working on the code since Aug 13, 2001. See the changelog for
+details.
+
+Original Author: Makoto Kato <m_kato@ga2.so-net.ne.jp>
+His original code can still be found at:
+<http://hp.vector.co.jp/authors/VA008030/bfs/>
+Does anyone know of a more current email address for Makoto? He doesn't
+respond to the address given above...
+
+This filesystem doesn't have a maintainer.
+
+WHAT IS THIS DRIVER?
+==================
+This module implements the native filesystem of BeOS http://www.beincorporated.com/
+for the linux 2.4.1 and later kernels. Currently it is a read-only
+implementation.
+
+Which is it, BFS or BEFS?
+================
+Be, Inc said, "BeOS Filesystem is officially called BFS, not BeFS".
+But Unixware Boot Filesystem is called bfs, too. And they are already in
+the kernel. Because of this naming conflict, on Linux the BeOS
+filesystem is called befs.
+
+HOW TO INSTALL
+==============
+step 1. Install the BeFS patch into the source code tree of linux.
+
+Apply the patchfile to your kernel source tree.
+Assuming that your kernel source is in /foo/bar/linux and the patchfile
+is called patch-befs-xxx, you would do the following:
+
+ cd /foo/bar/linux
+ patch -p1 < /path/to/patch-befs-xxx
+
+if the patching step fails (i.e. there are rejected hunks), you can try to
+figure it out yourself (it shouldn't be hard), or mail the maintainer
+(Will Dyson <will_dyson@pobox.com>) for help.
+
+step 2. Configuration & make kernel
+
+The linux kernel has many compile-time options. Most of them are beyond the
+scope of this document. I suggest the Kernel-HOWTO document as a good general
+reference on this topic. http://www.linuxdocs.org/HOWTOs/Kernel-HOWTO-4.html
+
+However, to use the BeFS module, you must enable it at configure time.
+
+ cd /foo/bar/linux
+ make menuconfig (or xconfig)
+
+The BeFS module is not a standard part of the linux kernel, so you must first
+enable support for experimental code under the "Code maturity level" menu.
+
+Then, under the "Filesystems" menu will be an option called "BeFS
+filesystem (experimental)", or something like that. Enable that option
+(it is fine to make it a module).
+
+Save your kernel configuration and then build your kernel.
+
+step 3. Install
+
+See the kernel howto <http://www.linux.com/howto/Kernel-HOWTO.html> for
+instructions on this critical step.
+
+USING BFS
+=========
+To use the BeOS filesystem, use filesystem type 'befs'.
+
+ex)
+ mount -t befs /dev/fd0 /beos
+
+MOUNT OPTIONS
+=============
+uid=nnn All files in the partition will be owned by user id nnn.
+gid=nnn All files in the partition will be in group nnn.
+iocharset=xxx Use xxx as the name of the NLS translation table.
+debug The driver will output debugging information to the syslog.
+
+HOW TO GET LASTEST VERSION
+==========================
+
+The latest version is currently available at:
+<http://befs-driver.sourceforge.net/>
+
+ANY KNOWN BUGS?
+===========
+As of Jan 20, 2002:
+
+ None
+
+SPECIAL THANKS
+==============
+Dominic Giampalo ... Writing "Practical file system design with Be filesystem"
+Hiroyuki Yamada ... Testing LinuxPPC.
+
+
+
diff --git a/Documentation/filesystems/bfs.txt b/Documentation/filesystems/bfs.txt
new file mode 100644
index 000000000..78043d5a8
--- /dev/null
+++ b/Documentation/filesystems/bfs.txt
@@ -0,0 +1,57 @@
+BFS FILESYSTEM FOR LINUX
+========================
+
+The BFS filesystem is used by SCO UnixWare OS for the /stand slice, which
+usually contains the kernel image and a few other files required for the
+boot process.
+
+In order to access /stand partition under Linux you obviously need to
+know the partition number and the kernel must support UnixWare disk slices
+(CONFIG_UNIXWARE_DISKLABEL config option). However BFS support does not
+depend on having UnixWare disklabel support because one can also mount
+BFS filesystem via loopback:
+
+# losetup /dev/loop0 stand.img
+# mount -t bfs /dev/loop0 /mnt/stand
+
+where stand.img is a file containing the image of BFS filesystem.
+When you have finished using it and umounted you need to also deallocate
+/dev/loop0 device by:
+
+# losetup -d /dev/loop0
+
+You can simplify mounting by just typing:
+
+# mount -t bfs -o loop stand.img /mnt/stand
+
+this will allocate the first available loopback device (and load loop.o
+kernel module if necessary) automatically. If the loopback driver is not
+loaded automatically, make sure that you have compiled the module and
+that modprobe is functioning. Beware that umount will not deallocate
+/dev/loopN device if /etc/mtab file on your system is a symbolic link to
+/proc/mounts. You will need to do it manually using "-d" switch of
+losetup(8). Read losetup(8) manpage for more info.
+
+To create the BFS image under UnixWare you need to find out first which
+slice contains it. The command prtvtoc(1M) is your friend:
+
+# prtvtoc /dev/rdsk/c0b0t0d0s0
+
+(assuming your root disk is on target=0, lun=0, bus=0, controller=0). Then you
+look for the slice with tag "STAND", which is usually slice 10. With this
+information you can use dd(1) to create the BFS image:
+
+# umount /stand
+# dd if=/dev/rdsk/c0b0t0d0sa of=stand.img bs=512
+
+Just in case, you can verify that you have done the right thing by checking
+the magic number:
+
+# od -Ad -tx4 stand.img | more
+
+The first 4 bytes should be 0x1badface.
+
+If you have any patches, questions or suggestions regarding this BFS
+implementation please contact the author:
+
+Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
diff --git a/Documentation/filesystems/btrfs.txt b/Documentation/filesystems/btrfs.txt
new file mode 100644
index 000000000..d11cc2f80
--- /dev/null
+++ b/Documentation/filesystems/btrfs.txt
@@ -0,0 +1,270 @@
+
+BTRFS
+=====
+
+Btrfs is a copy on write filesystem for Linux aimed at
+implementing advanced features while focusing on fault tolerance,
+repair and easy administration. Initially developed by Oracle, Btrfs
+is licensed under the GPL and open for contribution from anyone.
+
+Linux has a wealth of filesystems to choose from, but we are facing a
+number of challenges with scaling to the large storage subsystems that
+are becoming common in today's data centers. Filesystems need to scale
+in their ability to address and manage large storage, and also in
+their ability to detect, repair and tolerate errors in the data stored
+on disk. Btrfs is under heavy development, and is not suitable for
+any uses other than benchmarking and review. The Btrfs disk format is
+not yet finalized.
+
+The main Btrfs features include:
+
+ * Extent based file storage (2^64 max file size)
+ * Space efficient packing of small files
+ * Space efficient indexed directories
+ * Dynamic inode allocation
+ * Writable snapshots
+ * Subvolumes (separate internal filesystem roots)
+ * Object level mirroring and striping
+ * Checksums on data and metadata (multiple algorithms available)
+ * Compression
+ * Integrated multiple device support, with several raid algorithms
+ * Online filesystem check (not yet implemented)
+ * Very fast offline filesystem check
+ * Efficient incremental backup and FS mirroring (not yet implemented)
+ * Online filesystem defragmentation
+
+
+Mount Options
+=============
+
+When mounting a btrfs filesystem, the following option are accepted.
+Options with (*) are default options and will not show in the mount options.
+
+ alloc_start=<bytes>
+ Debugging option to force all block allocations above a certain
+ byte threshold on each block device. The value is specified in
+ bytes, optionally with a K, M, or G suffix, case insensitive.
+ Default is 1MB.
+
+ noautodefrag(*)
+ autodefrag
+ Disable/enable auto defragmentation.
+ Auto defragmentation detects small random writes into files and queue
+ them up for the defrag process. Works best for small files;
+ Not well suited for large database workloads.
+
+ check_int
+ check_int_data
+ check_int_print_mask=<value>
+ These debugging options control the behavior of the integrity checking
+ module (the BTRFS_FS_CHECK_INTEGRITY config option required).
+
+ check_int enables the integrity checker module, which examines all
+ block write requests to ensure on-disk consistency, at a large
+ memory and CPU cost.
+
+ check_int_data includes extent data in the integrity checks, and
+ implies the check_int option.
+
+ check_int_print_mask takes a bitmask of BTRFSIC_PRINT_MASK_* values
+ as defined in fs/btrfs/check-integrity.c, to control the integrity
+ checker module behavior.
+
+ See comments at the top of fs/btrfs/check-integrity.c for more info.
+
+ commit=<seconds>
+ Set the interval of periodic commit, 30 seconds by default. Higher
+ values defer data being synced to permanent storage with obvious
+ consequences when the system crashes. The upper bound is not forced,
+ but a warning is printed if it's more than 300 seconds (5 minutes).
+
+ compress
+ compress=<type>
+ compress-force
+ compress-force=<type>
+ Control BTRFS file data compression. Type may be specified as "zlib"
+ "lzo" or "no" (for no compression, used for remounting). If no type
+ is specified, zlib is used. If compress-force is specified,
+ all files will be compressed, whether or not they compress well.
+ If compression is enabled, nodatacow and nodatasum are disabled.
+
+ degraded
+ Allow mounts to continue with missing devices. A read-write mount may
+ fail with too many devices missing, for example if a stripe member
+ is completely missing.
+
+ device=<devicepath>
+ Specify a device during mount so that ioctls on the control device
+ can be avoided. Especially useful when trying to mount a multi-device
+ setup as root. May be specified multiple times for multiple devices.
+
+ nodiscard(*)
+ discard
+ Disable/enable discard mount option.
+ Discard issues frequent commands to let the block device reclaim space
+ freed by the filesystem.
+ This is useful for SSD devices, thinly provisioned
+ LUNs and virtual machine images, but may have a significant
+ performance impact. (The fstrim command is also available to
+ initiate batch trims from userspace).
+
+ noenospc_debug(*)
+ enospc_debug
+ Disable/enable debugging option to be more verbose in some ENOSPC conditions.
+
+ fatal_errors=<action>
+ Action to take when encountering a fatal error:
+ "bug" - BUG() on a fatal error. This is the default.
+ "panic" - panic() on a fatal error.
+
+ noflushoncommit(*)
+ flushoncommit
+ The 'flushoncommit' mount option forces any data dirtied by a write in a
+ prior transaction to commit as part of the current commit. This makes
+ the committed state a fully consistent view of the file system from the
+ application's perspective (i.e., it includes all completed file system
+ operations). This was previously the behavior only when a snapshot is
+ created.
+
+ inode_cache
+ Enable free inode number caching. Defaults to off due to an overflow
+ problem when the free space crcs don't fit inside a single page.
+
+ max_inline=<bytes>
+ Specify the maximum amount of space, in bytes, that can be inlined in
+ a metadata B-tree leaf. The value is specified in bytes, optionally
+ with a K, M, or G suffix, case insensitive. In practice, this value
+ is limited by the root sector size, with some space unavailable due
+ to leaf headers. For a 4k sectorsize, max inline data is ~3900 bytes.
+
+ metadata_ratio=<value>
+ Specify that 1 metadata chunk should be allocated after every <value>
+ data chunks. Off by default.
+
+ acl(*)
+ noacl
+ Enable/disable support for Posix Access Control Lists (ACLs). See the
+ acl(5) manual page for more information about ACLs.
+
+ barrier(*)
+ nobarrier
+ Enable/disable the use of block layer write barriers. Write barriers
+ ensure that certain IOs make it through the device cache and are on
+ persistent storage. If disabled on a device with a volatile
+ (non-battery-backed) write-back cache, nobarrier option will lead to
+ filesystem corruption on a system crash or power loss.
+
+ datacow(*)
+ nodatacow
+ Enable/disable data copy-on-write for newly created files.
+ Nodatacow implies nodatasum, and disables all compression.
+
+ datasum(*)
+ nodatasum
+ Enable/disable data checksumming for newly created files.
+ Datasum implies datacow.
+
+ treelog(*)
+ notreelog
+ Enable/disable the tree logging used for fsync and O_SYNC writes.
+
+ recovery
+ Enable autorecovery attempts if a bad tree root is found at mount time.
+ Currently this scans a list of several previous tree roots and tries to
+ use the first readable.
+
+ rescan_uuid_tree
+ Force check and rebuild procedure of the UUID tree. This should not
+ normally be needed.
+
+ skip_balance
+ Skip automatic resume of interrupted balance operation after mount.
+ May be resumed with "btrfs balance resume."
+
+ space_cache (*)
+ Enable the on-disk freespace cache.
+ nospace_cache
+ Disable freespace cache loading without clearing the cache.
+ clear_cache
+ Force clearing and rebuilding of the disk space cache if something
+ has gone wrong.
+
+ ssd
+ nossd
+ ssd_spread
+ Options to control ssd allocation schemes. By default, BTRFS will
+ enable or disable ssd allocation heuristics depending on whether a
+ rotational or nonrotational disk is in use. The ssd and nossd options
+ can override this autodetection.
+
+ The ssd_spread mount option attempts to allocate into big chunks
+ of unused space, and may perform better on low-end ssds. ssd_spread
+ implies ssd, enabling all other ssd heuristics as well.
+
+ subvol=<path>
+ Mount subvolume at <path> rather than the root subvolume. <path> is
+ relative to the top level subvolume.
+
+ subvolid=<ID>
+ Mount subvolume specified by an ID number rather than the root subvolume.
+ This allows mounting of subvolumes which are not in the root of the mounted
+ filesystem.
+ You can use "btrfs subvolume list" to see subvolume ID numbers.
+
+ subvolrootid=<objectid> (deprecated)
+ Mount subvolume specified by <objectid> rather than the root subvolume.
+ This allows mounting of subvolumes which are not in the root of the mounted
+ filesystem.
+ You can use "btrfs subvolume show " to see the object ID for a subvolume.
+
+ thread_pool=<number>
+ The number of worker threads to allocate. The default number is equal
+ to the number of CPUs + 2, or 8, whichever is smaller.
+
+ user_subvol_rm_allowed
+ Allow subvolumes to be deleted by a non-root user. Use with caution.
+
+MAILING LIST
+============
+
+There is a Btrfs mailing list hosted on vger.kernel.org. You can
+find details on how to subscribe here:
+
+http://vger.kernel.org/vger-lists.html#linux-btrfs
+
+Mailing list archives are available from gmane:
+
+http://dir.gmane.org/gmane.comp.file-systems.btrfs
+
+
+
+IRC
+===
+
+Discussion of Btrfs also occurs on the #btrfs channel of the Freenode
+IRC network.
+
+
+
+ UTILITIES
+ =========
+
+Userspace tools for creating and manipulating Btrfs file systems are
+available from the git repository at the following location:
+
+ http://git.kernel.org/?p=linux/kernel/git/mason/btrfs-progs.git
+ git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git
+
+These include the following tools:
+
+* mkfs.btrfs: create a filesystem
+
+* btrfs: a single tool to manage the filesystems, refer to the manpage for more details
+
+* 'btrfsck' or 'btrfs check': do a consistency check of the filesystem
+
+Other tools for specific tasks:
+
+* btrfs-convert: in-place conversion from ext2/3/4 filesystems
+
+* btrfs-image: dump filesystem metadata for debugging
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
new file mode 100644
index 000000000..277d1e810
--- /dev/null
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -0,0 +1,703 @@
+ ==========================
+ FS-CACHE CACHE BACKEND API
+ ==========================
+
+The FS-Cache system provides an API by which actual caches can be supplied to
+FS-Cache for it to then serve out to network filesystems and other interested
+parties.
+
+This API is declared in <linux/fscache-cache.h>.
+
+
+====================================
+INITIALISING AND REGISTERING A CACHE
+====================================
+
+To start off, a cache definition must be initialised and registered for each
+cache the backend wants to make available. For instance, CacheFS does this in
+the fill_super() operation on mounting.
+
+The cache definition (struct fscache_cache) should be initialised by calling:
+
+ void fscache_init_cache(struct fscache_cache *cache,
+ struct fscache_cache_ops *ops,
+ const char *idfmt,
+ ...);
+
+Where:
+
+ (*) "cache" is a pointer to the cache definition;
+
+ (*) "ops" is a pointer to the table of operations that the backend supports on
+ this cache; and
+
+ (*) "idfmt" is a format and printf-style arguments for constructing a label
+ for the cache.
+
+
+The cache should then be registered with FS-Cache by passing a pointer to the
+previously initialised cache definition to:
+
+ int fscache_add_cache(struct fscache_cache *cache,
+ struct fscache_object *fsdef,
+ const char *tagname);
+
+Two extra arguments should also be supplied:
+
+ (*) "fsdef" which should point to the object representation for the FS-Cache
+ master index in this cache. Netfs primary index entries will be created
+ here. FS-Cache keeps the caller's reference to the index object if
+ successful and will release it upon withdrawal of the cache.
+
+ (*) "tagname" which, if given, should be a text string naming this cache. If
+ this is NULL, the identifier will be used instead. For CacheFS, the
+ identifier is set to name the underlying block device and the tag can be
+ supplied by mount.
+
+This function may return -ENOMEM if it ran out of memory or -EEXIST if the tag
+is already in use. 0 will be returned on success.
+
+
+=====================
+UNREGISTERING A CACHE
+=====================
+
+A cache can be withdrawn from the system by calling this function with a
+pointer to the cache definition:
+
+ void fscache_withdraw_cache(struct fscache_cache *cache);
+
+In CacheFS's case, this is called by put_super().
+
+
+========
+SECURITY
+========
+
+The cache methods are executed one of two contexts:
+
+ (1) that of the userspace process that issued the netfs operation that caused
+ the cache method to be invoked, or
+
+ (2) that of one of the processes in the FS-Cache thread pool.
+
+In either case, this may not be an appropriate context in which to access the
+cache.
+
+The calling process's fsuid, fsgid and SELinux security identities may need to
+be masqueraded for the duration of the cache driver's access to the cache.
+This is left to the cache to handle; FS-Cache makes no effort in this regard.
+
+
+===================================
+CONTROL AND STATISTICS PRESENTATION
+===================================
+
+The cache may present data to the outside world through FS-Cache's interfaces
+in sysfs and procfs - the former for control and the latter for statistics.
+
+A sysfs directory called /sys/fs/fscache/<cachetag>/ is created if CONFIG_SYSFS
+is enabled. This is accessible through the kobject struct fscache_cache::kobj
+and is for use by the cache as it sees fit.
+
+
+========================
+RELEVANT DATA STRUCTURES
+========================
+
+ (*) Index/Data file FS-Cache representation cookie:
+
+ struct fscache_cookie {
+ struct fscache_object_def *def;
+ struct fscache_netfs *netfs;
+ void *netfs_data;
+ ...
+ };
+
+ The fields that might be of use to the backend describe the object
+ definition, the netfs definition and the netfs's data for this cookie.
+ The object definition contain functions supplied by the netfs for loading
+ and matching index entries; these are required to provide some of the
+ cache operations.
+
+
+ (*) In-cache object representation:
+
+ struct fscache_object {
+ int debug_id;
+ enum {
+ FSCACHE_OBJECT_RECYCLING,
+ ...
+ } state;
+ spinlock_t lock
+ struct fscache_cache *cache;
+ struct fscache_cookie *cookie;
+ ...
+ };
+
+ Structures of this type should be allocated by the cache backend and
+ passed to FS-Cache when requested by the appropriate cache operation. In
+ the case of CacheFS, they're embedded in CacheFS's internal object
+ structures.
+
+ The debug_id is a simple integer that can be used in debugging messages
+ that refer to a particular object. In such a case it should be printed
+ using "OBJ%x" to be consistent with FS-Cache.
+
+ Each object contains a pointer to the cookie that represents the object it
+ is backing. An object should retired when put_object() is called if it is
+ in state FSCACHE_OBJECT_RECYCLING. The fscache_object struct should be
+ initialised by calling fscache_object_init(object).
+
+
+ (*) FS-Cache operation record:
+
+ struct fscache_operation {
+ atomic_t usage;
+ struct fscache_object *object;
+ unsigned long flags;
+ #define FSCACHE_OP_EXCLUSIVE
+ void (*processor)(struct fscache_operation *op);
+ void (*release)(struct fscache_operation *op);
+ ...
+ };
+
+ FS-Cache has a pool of threads that it uses to give CPU time to the
+ various asynchronous operations that need to be done as part of driving
+ the cache. These are represented by the above structure. The processor
+ method is called to give the op CPU time, and the release method to get
+ rid of it when its usage count reaches 0.
+
+ An operation can be made exclusive upon an object by setting the
+ appropriate flag before enqueuing it with fscache_enqueue_operation(). If
+ an operation needs more processing time, it should be enqueued again.
+
+
+ (*) FS-Cache retrieval operation record:
+
+ struct fscache_retrieval {
+ struct fscache_operation op;
+ struct address_space *mapping;
+ struct list_head *to_do;
+ ...
+ };
+
+ A structure of this type is allocated by FS-Cache to record retrieval and
+ allocation requests made by the netfs. This struct is then passed to the
+ backend to do the operation. The backend may get extra refs to it by
+ calling fscache_get_retrieval() and refs may be discarded by calling
+ fscache_put_retrieval().
+
+ A retrieval operation can be used by the backend to do retrieval work. To
+ do this, the retrieval->op.processor method pointer should be set
+ appropriately by the backend and fscache_enqueue_retrieval() called to
+ submit it to the thread pool. CacheFiles, for example, uses this to queue
+ page examination when it detects PG_lock being cleared.
+
+ The to_do field is an empty list available for the cache backend to use as
+ it sees fit.
+
+
+ (*) FS-Cache storage operation record:
+
+ struct fscache_storage {
+ struct fscache_operation op;
+ pgoff_t store_limit;
+ ...
+ };
+
+ A structure of this type is allocated by FS-Cache to record outstanding
+ writes to be made. FS-Cache itself enqueues this operation and invokes
+ the write_page() method on the object at appropriate times to effect
+ storage.
+
+
+================
+CACHE OPERATIONS
+================
+
+The cache backend provides FS-Cache with a table of operations that can be
+performed on the denizens of the cache. These are held in a structure of type:
+
+ struct fscache_cache_ops
+
+ (*) Name of cache provider [mandatory]:
+
+ const char *name
+
+ This isn't strictly an operation, but should be pointed at a string naming
+ the backend.
+
+
+ (*) Allocate a new object [mandatory]:
+
+ struct fscache_object *(*alloc_object)(struct fscache_cache *cache,
+ struct fscache_cookie *cookie)
+
+ This method is used to allocate a cache object representation to back a
+ cookie in a particular cache. fscache_object_init() should be called on
+ the object to initialise it prior to returning.
+
+ This function may also be used to parse the index key to be used for
+ multiple lookup calls to turn it into a more convenient form. FS-Cache
+ will call the lookup_complete() method to allow the cache to release the
+ form once lookup is complete or aborted.
+
+
+ (*) Look up and create object [mandatory]:
+
+ void (*lookup_object)(struct fscache_object *object)
+
+ This method is used to look up an object, given that the object is already
+ allocated and attached to the cookie. This should instantiate that object
+ in the cache if it can.
+
+ The method should call fscache_object_lookup_negative() as soon as
+ possible if it determines the object doesn't exist in the cache. If the
+ object is found to exist and the netfs indicates that it is valid then
+ fscache_obtained_object() should be called once the object is in a
+ position to have data stored in it. Similarly, fscache_obtained_object()
+ should also be called once a non-present object has been created.
+
+ If a lookup error occurs, fscache_object_lookup_error() should be called
+ to abort the lookup of that object.
+
+
+ (*) Release lookup data [mandatory]:
+
+ void (*lookup_complete)(struct fscache_object *object)
+
+ This method is called to ask the cache to release any resources it was
+ using to perform a lookup.
+
+
+ (*) Increment object refcount [mandatory]:
+
+ struct fscache_object *(*grab_object)(struct fscache_object *object)
+
+ This method is called to increment the reference count on an object. It
+ may fail (for instance if the cache is being withdrawn) by returning NULL.
+ It should return the object pointer if successful.
+
+
+ (*) Lock/Unlock object [mandatory]:
+
+ void (*lock_object)(struct fscache_object *object)
+ void (*unlock_object)(struct fscache_object *object)
+
+ These methods are used to exclusively lock an object. It must be possible
+ to schedule with the lock held, so a spinlock isn't sufficient.
+
+
+ (*) Pin/Unpin object [optional]:
+
+ int (*pin_object)(struct fscache_object *object)
+ void (*unpin_object)(struct fscache_object *object)
+
+ These methods are used to pin an object into the cache. Once pinned an
+ object cannot be reclaimed to make space. Return -ENOSPC if there's not
+ enough space in the cache to permit this.
+
+
+ (*) Check coherency state of an object [mandatory]:
+
+ int (*check_consistency)(struct fscache_object *object)
+
+ This method is called to have the cache check the saved auxiliary data of
+ the object against the netfs's idea of the state. 0 should be returned
+ if they're consistent and -ESTALE otherwise. -ENOMEM and -ERESTARTSYS
+ may also be returned.
+
+ (*) Update object [mandatory]:
+
+ int (*update_object)(struct fscache_object *object)
+
+ This is called to update the index entry for the specified object. The
+ new information should be in object->cookie->netfs_data. This can be
+ obtained by calling object->cookie->def->get_aux()/get_attr().
+
+
+ (*) Invalidate data object [mandatory]:
+
+ int (*invalidate_object)(struct fscache_operation *op)
+
+ This is called to invalidate a data object (as pointed to by op->object).
+ All the data stored for this object should be discarded and an
+ attr_changed operation should be performed. The caller will follow up
+ with an object update operation.
+
+ fscache_op_complete() must be called on op before returning.
+
+
+ (*) Discard object [mandatory]:
+
+ void (*drop_object)(struct fscache_object *object)
+
+ This method is called to indicate that an object has been unbound from its
+ cookie, and that the cache should release the object's resources and
+ retire it if it's in state FSCACHE_OBJECT_RECYCLING.
+
+ This method should not attempt to release any references held by the
+ caller. The caller will invoke the put_object() method as appropriate.
+
+
+ (*) Release object reference [mandatory]:
+
+ void (*put_object)(struct fscache_object *object)
+
+ This method is used to discard a reference to an object. The object may
+ be freed when all the references to it are released.
+
+
+ (*) Synchronise a cache [mandatory]:
+
+ void (*sync)(struct fscache_cache *cache)
+
+ This is called to ask the backend to synchronise a cache with its backing
+ device.
+
+
+ (*) Dissociate a cache [mandatory]:
+
+ void (*dissociate_pages)(struct fscache_cache *cache)
+
+ This is called to ask a cache to perform any page dissociations as part of
+ cache withdrawal.
+
+
+ (*) Notification that the attributes on a netfs file changed [mandatory]:
+
+ int (*attr_changed)(struct fscache_object *object);
+
+ This is called to indicate to the cache that certain attributes on a netfs
+ file have changed (for example the maximum size a file may reach). The
+ cache can read these from the netfs by calling the cookie's get_attr()
+ method.
+
+ The cache may use the file size information to reserve space on the cache.
+ It should also call fscache_set_store_limit() to indicate to FS-Cache the
+ highest byte it's willing to store for an object.
+
+ This method may return -ve if an error occurred or the cache object cannot
+ be expanded. In such a case, the object will be withdrawn from service.
+
+ This operation is run asynchronously from FS-Cache's thread pool, and
+ storage and retrieval operations from the netfs are excluded during the
+ execution of this operation.
+
+
+ (*) Reserve cache space for an object's data [optional]:
+
+ int (*reserve_space)(struct fscache_object *object, loff_t size);
+
+ This is called to request that cache space be reserved to hold the data
+ for an object and the metadata used to track it. Zero size should be
+ taken as request to cancel a reservation.
+
+ This should return 0 if successful, -ENOSPC if there isn't enough space
+ available, or -ENOMEM or -EIO on other errors.
+
+ The reservation may exceed the current size of the object, thus permitting
+ future expansion. If the amount of space consumed by an object would
+ exceed the reservation, it's permitted to refuse requests to allocate
+ pages, but not required. An object may be pruned down to its reservation
+ size if larger than that already.
+
+
+ (*) Request page be read from cache [mandatory]:
+
+ int (*read_or_alloc_page)(struct fscache_retrieval *op,
+ struct page *page,
+ gfp_t gfp)
+
+ This is called to attempt to read a netfs page from the cache, or to
+ reserve a backing block if not. FS-Cache will have done as much checking
+ as it can before calling, but most of the work belongs to the backend.
+
+ If there's no page in the cache, then -ENODATA should be returned if the
+ backend managed to reserve a backing block; -ENOBUFS or -ENOMEM if it
+ didn't.
+
+ If there is suitable data in the cache, then a read operation should be
+ queued and 0 returned. When the read finishes, fscache_end_io() should be
+ called.
+
+ The fscache_mark_pages_cached() should be called for the page if any cache
+ metadata is retained. This will indicate to the netfs that the page needs
+ explicit uncaching. This operation takes a pagevec, thus allowing several
+ pages to be marked at once.
+
+ The retrieval record pointed to by op should be retained for each page
+ queued and released when I/O on the page has been formally ended.
+ fscache_get/put_retrieval() are available for this purpose.
+
+ The retrieval record may be used to get CPU time via the FS-Cache thread
+ pool. If this is desired, the op->op.processor should be set to point to
+ the appropriate processing routine, and fscache_enqueue_retrieval() should
+ be called at an appropriate point to request CPU time. For instance, the
+ retrieval routine could be enqueued upon the completion of a disk read.
+ The to_do field in the retrieval record is provided to aid in this.
+
+ If an I/O error occurs, fscache_io_error() should be called and -ENOBUFS
+ returned if possible or fscache_end_io() called with a suitable error
+ code.
+
+ fscache_put_retrieval() should be called after a page or pages are dealt
+ with. This will complete the operation when all pages are dealt with.
+
+
+ (*) Request pages be read from cache [mandatory]:
+
+ int (*read_or_alloc_pages)(struct fscache_retrieval *op,
+ struct list_head *pages,
+ unsigned *nr_pages,
+ gfp_t gfp)
+
+ This is like the read_or_alloc_page() method, except it is handed a list
+ of pages instead of one page. Any pages on which a read operation is
+ started must be added to the page cache for the specified mapping and also
+ to the LRU. Such pages must also be removed from the pages list and
+ *nr_pages decremented per page.
+
+ If there was an error such as -ENOMEM, then that should be returned; else
+ if one or more pages couldn't be read or allocated, then -ENOBUFS should
+ be returned; else if one or more pages couldn't be read, then -ENODATA
+ should be returned. If all the pages are dispatched then 0 should be
+ returned.
+
+
+ (*) Request page be allocated in the cache [mandatory]:
+
+ int (*allocate_page)(struct fscache_retrieval *op,
+ struct page *page,
+ gfp_t gfp)
+
+ This is like the read_or_alloc_page() method, except that it shouldn't
+ read from the cache, even if there's data there that could be retrieved.
+ It should, however, set up any internal metadata required such that
+ the write_page() method can write to the cache.
+
+ If there's no backing block available, then -ENOBUFS should be returned
+ (or -ENOMEM if there were other problems). If a block is successfully
+ allocated, then the netfs page should be marked and 0 returned.
+
+
+ (*) Request pages be allocated in the cache [mandatory]:
+
+ int (*allocate_pages)(struct fscache_retrieval *op,
+ struct list_head *pages,
+ unsigned *nr_pages,
+ gfp_t gfp)
+
+ This is an multiple page version of the allocate_page() method. pages and
+ nr_pages should be treated as for the read_or_alloc_pages() method.
+
+
+ (*) Request page be written to cache [mandatory]:
+
+ int (*write_page)(struct fscache_storage *op,
+ struct page *page);
+
+ This is called to write from a page on which there was a previously
+ successful read_or_alloc_page() call or similar. FS-Cache filters out
+ pages that don't have mappings.
+
+ This method is called asynchronously from the FS-Cache thread pool. It is
+ not required to actually store anything, provided -ENODATA is then
+ returned to the next read of this page.
+
+ If an error occurred, then a negative error code should be returned,
+ otherwise zero should be returned. FS-Cache will take appropriate action
+ in response to an error, such as withdrawing this object.
+
+ If this method returns success then FS-Cache will inform the netfs
+ appropriately.
+
+
+ (*) Discard retained per-page metadata [mandatory]:
+
+ void (*uncache_page)(struct fscache_object *object, struct page *page)
+
+ This is called when a netfs page is being evicted from the pagecache. The
+ cache backend should tear down any internal representation or tracking it
+ maintains for this page.
+
+
+==================
+FS-CACHE UTILITIES
+==================
+
+FS-Cache provides some utilities that a cache backend may make use of:
+
+ (*) Note occurrence of an I/O error in a cache:
+
+ void fscache_io_error(struct fscache_cache *cache)
+
+ This tells FS-Cache that an I/O error occurred in the cache. After this
+ has been called, only resource dissociation operations (object and page
+ release) will be passed from the netfs to the cache backend for the
+ specified cache.
+
+ This does not actually withdraw the cache. That must be done separately.
+
+
+ (*) Invoke the retrieval I/O completion function:
+
+ void fscache_end_io(struct fscache_retrieval *op, struct page *page,
+ int error);
+
+ This is called to note the end of an attempt to retrieve a page. The
+ error value should be 0 if successful and an error otherwise.
+
+
+ (*) Record that one or more pages being retrieved or allocated have been dealt
+ with:
+
+ void fscache_retrieval_complete(struct fscache_retrieval *op,
+ int n_pages);
+
+ This is called to record the fact that one or more pages have been dealt
+ with and are no longer the concern of this operation. When the number of
+ pages remaining in the operation reaches 0, the operation will be
+ completed.
+
+
+ (*) Record operation completion:
+
+ void fscache_op_complete(struct fscache_operation *op);
+
+ This is called to record the completion of an operation. This deducts
+ this operation from the parent object's run state, potentially permitting
+ one or more pending operations to start running.
+
+
+ (*) Set highest store limit:
+
+ void fscache_set_store_limit(struct fscache_object *object,
+ loff_t i_size);
+
+ This sets the limit FS-Cache imposes on the highest byte it's willing to
+ try and store for a netfs. Any page over this limit is automatically
+ rejected by fscache_read_alloc_page() and co with -ENOBUFS.
+
+
+ (*) Mark pages as being cached:
+
+ void fscache_mark_pages_cached(struct fscache_retrieval *op,
+ struct pagevec *pagevec);
+
+ This marks a set of pages as being cached. After this has been called,
+ the netfs must call fscache_uncache_page() to unmark the pages.
+
+
+ (*) Perform coherency check on an object:
+
+ enum fscache_checkaux fscache_check_aux(struct fscache_object *object,
+ const void *data,
+ uint16_t datalen);
+
+ This asks the netfs to perform a coherency check on an object that has
+ just been looked up. The cookie attached to the object will determine the
+ netfs to use. data and datalen should specify where the auxiliary data
+ retrieved from the cache can be found.
+
+ One of three values will be returned:
+
+ (*) FSCACHE_CHECKAUX_OKAY
+
+ The coherency data indicates the object is valid as is.
+
+ (*) FSCACHE_CHECKAUX_NEEDS_UPDATE
+
+ The coherency data needs updating, but otherwise the object is
+ valid.
+
+ (*) FSCACHE_CHECKAUX_OBSOLETE
+
+ The coherency data indicates that the object is obsolete and should
+ be discarded.
+
+
+ (*) Initialise a freshly allocated object:
+
+ void fscache_object_init(struct fscache_object *object);
+
+ This initialises all the fields in an object representation.
+
+
+ (*) Indicate the destruction of an object:
+
+ void fscache_object_destroyed(struct fscache_cache *cache);
+
+ This must be called to inform FS-Cache that an object that belonged to a
+ cache has been destroyed and deallocated. This will allow continuation
+ of the cache withdrawal process when it is stopped pending destruction of
+ all the objects.
+
+
+ (*) Indicate negative lookup on an object:
+
+ void fscache_object_lookup_negative(struct fscache_object *object);
+
+ This is called to indicate to FS-Cache that a lookup process for an object
+ found a negative result.
+
+ This changes the state of an object to permit reads pending on lookup
+ completion to go off and start fetching data from the netfs server as it's
+ known at this point that there can't be any data in the cache.
+
+ This may be called multiple times on an object. Only the first call is
+ significant - all subsequent calls are ignored.
+
+
+ (*) Indicate an object has been obtained:
+
+ void fscache_obtained_object(struct fscache_object *object);
+
+ This is called to indicate to FS-Cache that a lookup process for an object
+ produced a positive result, or that an object was created. This should
+ only be called once for any particular object.
+
+ This changes the state of an object to indicate:
+
+ (1) if no call to fscache_object_lookup_negative() has been made on
+ this object, that there may be data available, and that reads can
+ now go and look for it; and
+
+ (2) that writes may now proceed against this object.
+
+
+ (*) Indicate that object lookup failed:
+
+ void fscache_object_lookup_error(struct fscache_object *object);
+
+ This marks an object as having encountered a fatal error (usually EIO)
+ and causes it to move into a state whereby it will be withdrawn as soon
+ as possible.
+
+
+ (*) Get and release references on a retrieval record:
+
+ void fscache_get_retrieval(struct fscache_retrieval *op);
+ void fscache_put_retrieval(struct fscache_retrieval *op);
+
+ These two functions are used to retain a retrieval record whilst doing
+ asynchronous data retrieval and block allocation.
+
+
+ (*) Enqueue a retrieval record for processing.
+
+ void fscache_enqueue_retrieval(struct fscache_retrieval *op);
+
+ This enqueues a retrieval record for processing by the FS-Cache thread
+ pool. One of the threads in the pool will invoke the retrieval record's
+ op->op.processor callback function. This function may be called from
+ within the callback function.
+
+
+ (*) List of object state names:
+
+ const char *fscache_object_states[];
+
+ For debugging purposes, this may be used to turn the state that an object
+ is in into a text string for display purposes.
diff --git a/Documentation/filesystems/caching/cachefiles.txt b/Documentation/filesystems/caching/cachefiles.txt
new file mode 100644
index 000000000..748a1ae49
--- /dev/null
+++ b/Documentation/filesystems/caching/cachefiles.txt
@@ -0,0 +1,501 @@
+ ===============================================
+ CacheFiles: CACHE ON ALREADY MOUNTED FILESYSTEM
+ ===============================================
+
+Contents:
+
+ (*) Overview.
+
+ (*) Requirements.
+
+ (*) Configuration.
+
+ (*) Starting the cache.
+
+ (*) Things to avoid.
+
+ (*) Cache culling.
+
+ (*) Cache structure.
+
+ (*) Security model and SELinux.
+
+ (*) A note on security.
+
+ (*) Statistical information.
+
+ (*) Debugging.
+
+
+========
+OVERVIEW
+========
+
+CacheFiles is a caching backend that's meant to use as a cache a directory on
+an already mounted filesystem of a local type (such as Ext3).
+
+CacheFiles uses a userspace daemon to do some of the cache management - such as
+reaping stale nodes and culling. This is called cachefilesd and lives in
+/sbin.
+
+The filesystem and data integrity of the cache are only as good as those of the
+filesystem providing the backing services. Note that CacheFiles does not
+attempt to journal anything since the journalling interfaces of the various
+filesystems are very specific in nature.
+
+CacheFiles creates a misc character device - "/dev/cachefiles" - that is used
+to communication with the daemon. Only one thing may have this open at once,
+and whilst it is open, a cache is at least partially in existence. The daemon
+opens this and sends commands down it to control the cache.
+
+CacheFiles is currently limited to a single cache.
+
+CacheFiles attempts to maintain at least a certain percentage of free space on
+the filesystem, shrinking the cache by culling the objects it contains to make
+space if necessary - see the "Cache Culling" section. This means it can be
+placed on the same medium as a live set of data, and will expand to make use of
+spare space and automatically contract when the set of data requires more
+space.
+
+
+============
+REQUIREMENTS
+============
+
+The use of CacheFiles and its daemon requires the following features to be
+available in the system and in the cache filesystem:
+
+ - dnotify.
+
+ - extended attributes (xattrs).
+
+ - openat() and friends.
+
+ - bmap() support on files in the filesystem (FIBMAP ioctl).
+
+ - The use of bmap() to detect a partial page at the end of the file.
+
+It is strongly recommended that the "dir_index" option is enabled on Ext3
+filesystems being used as a cache.
+
+
+=============
+CONFIGURATION
+=============
+
+The cache is configured by a script in /etc/cachefilesd.conf. These commands
+set up cache ready for use. The following script commands are available:
+
+ (*) brun <N>%
+ (*) bcull <N>%
+ (*) bstop <N>%
+ (*) frun <N>%
+ (*) fcull <N>%
+ (*) fstop <N>%
+
+ Configure the culling limits. Optional. See the section on culling
+ The defaults are 7% (run), 5% (cull) and 1% (stop) respectively.
+
+ The commands beginning with a 'b' are file space (block) limits, those
+ beginning with an 'f' are file count limits.
+
+ (*) dir <path>
+
+ Specify the directory containing the root of the cache. Mandatory.
+
+ (*) tag <name>
+
+ Specify a tag to FS-Cache to use in distinguishing multiple caches.
+ Optional. The default is "CacheFiles".
+
+ (*) debug <mask>
+
+ Specify a numeric bitmask to control debugging in the kernel module.
+ Optional. The default is zero (all off). The following values can be
+ OR'd into the mask to collect various information:
+
+ 1 Turn on trace of function entry (_enter() macros)
+ 2 Turn on trace of function exit (_leave() macros)
+ 4 Turn on trace of internal debug points (_debug())
+
+ This mask can also be set through sysfs, eg:
+
+ echo 5 >/sys/modules/cachefiles/parameters/debug
+
+
+==================
+STARTING THE CACHE
+==================
+
+The cache is started by running the daemon. The daemon opens the cache device,
+configures the cache and tells it to begin caching. At that point the cache
+binds to fscache and the cache becomes live.
+
+The daemon is run as follows:
+
+ /sbin/cachefilesd [-d]* [-s] [-n] [-f <configfile>]
+
+The flags are:
+
+ (*) -d
+
+ Increase the debugging level. This can be specified multiple times and
+ is cumulative with itself.
+
+ (*) -s
+
+ Send messages to stderr instead of syslog.
+
+ (*) -n
+
+ Don't daemonise and go into background.
+
+ (*) -f <configfile>
+
+ Use an alternative configuration file rather than the default one.
+
+
+===============
+THINGS TO AVOID
+===============
+
+Do not mount other things within the cache as this will cause problems. The
+kernel module contains its own very cut-down path walking facility that ignores
+mountpoints, but the daemon can't avoid them.
+
+Do not create, rename or unlink files and directories in the cache whilst the
+cache is active, as this may cause the state to become uncertain.
+
+Renaming files in the cache might make objects appear to be other objects (the
+filename is part of the lookup key).
+
+Do not change or remove the extended attributes attached to cache files by the
+cache as this will cause the cache state management to get confused.
+
+Do not create files or directories in the cache, lest the cache get confused or
+serve incorrect data.
+
+Do not chmod files in the cache. The module creates things with minimal
+permissions to prevent random users being able to access them directly.
+
+
+=============
+CACHE CULLING
+=============
+
+The cache may need culling occasionally to make space. This involves
+discarding objects from the cache that have been used less recently than
+anything else. Culling is based on the access time of data objects. Empty
+directories are culled if not in use.
+
+Cache culling is done on the basis of the percentage of blocks and the
+percentage of files available in the underlying filesystem. There are six
+"limits":
+
+ (*) brun
+ (*) frun
+
+ If the amount of free space and the number of available files in the cache
+ rises above both these limits, then culling is turned off.
+
+ (*) bcull
+ (*) fcull
+
+ If the amount of available space or the number of available files in the
+ cache falls below either of these limits, then culling is started.
+
+ (*) bstop
+ (*) fstop
+
+ If the amount of available space or the number of available files in the
+ cache falls below either of these limits, then no further allocation of
+ disk space or files is permitted until culling has raised things above
+ these limits again.
+
+These must be configured thusly:
+
+ 0 <= bstop < bcull < brun < 100
+ 0 <= fstop < fcull < frun < 100
+
+Note that these are percentages of available space and available files, and do
+_not_ appear as 100 minus the percentage displayed by the "df" program.
+
+The userspace daemon scans the cache to build up a table of cullable objects.
+These are then culled in least recently used order. A new scan of the cache is
+started as soon as space is made in the table. Objects will be skipped if
+their atimes have changed or if the kernel module says it is still using them.
+
+
+===============
+CACHE STRUCTURE
+===============
+
+The CacheFiles module will create two directories in the directory it was
+given:
+
+ (*) cache/
+
+ (*) graveyard/
+
+The active cache objects all reside in the first directory. The CacheFiles
+kernel module moves any retired or culled objects that it can't simply unlink
+to the graveyard from which the daemon will actually delete them.
+
+The daemon uses dnotify to monitor the graveyard directory, and will delete
+anything that appears therein.
+
+
+The module represents index objects as directories with the filename "I..." or
+"J...". Note that the "cache/" directory is itself a special index.
+
+Data objects are represented as files if they have no children, or directories
+if they do. Their filenames all begin "D..." or "E...". If represented as a
+directory, data objects will have a file in the directory called "data" that
+actually holds the data.
+
+Special objects are similar to data objects, except their filenames begin
+"S..." or "T...".
+
+
+If an object has children, then it will be represented as a directory.
+Immediately in the representative directory are a collection of directories
+named for hash values of the child object keys with an '@' prepended. Into
+this directory, if possible, will be placed the representations of the child
+objects:
+
+ INDEX INDEX INDEX DATA FILES
+ ========= ========== ================================= ================
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...DB1ry
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...N22ry
+ cache/@4a/I03nfs/@30/Ji000000000000000--fHg8hi8400/@75/Es0g000w...FP1ry
+
+
+If the key is so long that it exceeds NAME_MAX with the decorations added on to
+it, then it will be cut into pieces, the first few of which will be used to
+make a nest of directories, and the last one of which will be the objects
+inside the last directory. The names of the intermediate directories will have
+'+' prepended:
+
+ J1223/@23/+xy...z/+kl...m/Epqr
+
+
+Note that keys are raw data, and not only may they exceed NAME_MAX in size,
+they may also contain things like '/' and NUL characters, and so they may not
+be suitable for turning directly into a filename.
+
+To handle this, CacheFiles will use a suitably printable filename directly and
+"base-64" encode ones that aren't directly suitable. The two versions of
+object filenames indicate the encoding:
+
+ OBJECT TYPE PRINTABLE ENCODED
+ =============== =============== ===============
+ Index "I..." "J..."
+ Data "D..." "E..."
+ Special "S..." "T..."
+
+Intermediate directories are always "@" or "+" as appropriate.
+
+
+Each object in the cache has an extended attribute label that holds the object
+type ID (required to distinguish special objects) and the auxiliary data from
+the netfs. The latter is used to detect stale objects in the cache and update
+or retire them.
+
+
+Note that CacheFiles will erase from the cache any file it doesn't recognise or
+any file of an incorrect type (such as a FIFO file or a device file).
+
+
+==========================
+SECURITY MODEL AND SELINUX
+==========================
+
+CacheFiles is implemented to deal properly with the LSM security features of
+the Linux kernel and the SELinux facility.
+
+One of the problems that CacheFiles faces is that it is generally acting on
+behalf of a process, and running in that process's context, and that includes a
+security context that is not appropriate for accessing the cache - either
+because the files in the cache are inaccessible to that process, or because if
+the process creates a file in the cache, that file may be inaccessible to other
+processes.
+
+The way CacheFiles works is to temporarily change the security context (fsuid,
+fsgid and actor security label) that the process acts as - without changing the
+security context of the process when it the target of an operation performed by
+some other process (so signalling and suchlike still work correctly).
+
+
+When the CacheFiles module is asked to bind to its cache, it:
+
+ (1) Finds the security label attached to the root cache directory and uses
+ that as the security label with which it will create files. By default,
+ this is:
+
+ cachefiles_var_t
+
+ (2) Finds the security label of the process which issued the bind request
+ (presumed to be the cachefilesd daemon), which by default will be:
+
+ cachefilesd_t
+
+ and asks LSM to supply a security ID as which it should act given the
+ daemon's label. By default, this will be:
+
+ cachefiles_kernel_t
+
+ SELinux transitions the daemon's security ID to the module's security ID
+ based on a rule of this form in the policy.
+
+ type_transition <daemon's-ID> kernel_t : process <module's-ID>;
+
+ For instance:
+
+ type_transition cachefilesd_t kernel_t : process cachefiles_kernel_t;
+
+
+The module's security ID gives it permission to create, move and remove files
+and directories in the cache, to find and access directories and files in the
+cache, to set and access extended attributes on cache objects, and to read and
+write files in the cache.
+
+The daemon's security ID gives it only a very restricted set of permissions: it
+may scan directories, stat files and erase files and directories. It may
+not read or write files in the cache, and so it is precluded from accessing the
+data cached therein; nor is it permitted to create new files in the cache.
+
+
+There are policy source files available in:
+
+ http://people.redhat.com/~dhowells/fscache/cachefilesd-0.8.tar.bz2
+
+and later versions. In that tarball, see the files:
+
+ cachefilesd.te
+ cachefilesd.fc
+ cachefilesd.if
+
+They are built and installed directly by the RPM.
+
+If a non-RPM based system is being used, then copy the above files to their own
+directory and run:
+
+ make -f /usr/share/selinux/devel/Makefile
+ semodule -i cachefilesd.pp
+
+You will need checkpolicy and selinux-policy-devel installed prior to the
+build.
+
+
+By default, the cache is located in /var/fscache, but if it is desirable that
+it should be elsewhere, than either the above policy files must be altered, or
+an auxiliary policy must be installed to label the alternate location of the
+cache.
+
+For instructions on how to add an auxiliary policy to enable the cache to be
+located elsewhere when SELinux is in enforcing mode, please see:
+
+ /usr/share/doc/cachefilesd-*/move-cache.txt
+
+When the cachefilesd rpm is installed; alternatively, the document can be found
+in the sources.
+
+
+==================
+A NOTE ON SECURITY
+==================
+
+CacheFiles makes use of the split security in the task_struct. It allocates
+its own task_security structure, and redirects current->cred to point to it
+when it acts on behalf of another process, in that process's context.
+
+The reason it does this is that it calls vfs_mkdir() and suchlike rather than
+bypassing security and calling inode ops directly. Therefore the VFS and LSM
+may deny the CacheFiles access to the cache data because under some
+circumstances the caching code is running in the security context of whatever
+process issued the original syscall on the netfs.
+
+Furthermore, should CacheFiles create a file or directory, the security
+parameters with that object is created (UID, GID, security label) would be
+derived from that process that issued the system call, thus potentially
+preventing other processes from accessing the cache - including CacheFiles's
+cache management daemon (cachefilesd).
+
+What is required is to temporarily override the security of the process that
+issued the system call. We can't, however, just do an in-place change of the
+security data as that affects the process as an object, not just as a subject.
+This means it may lose signals or ptrace events for example, and affects what
+the process looks like in /proc.
+
+So CacheFiles makes use of a logical split in the security between the
+objective security (task->real_cred) and the subjective security (task->cred).
+The objective security holds the intrinsic security properties of a process and
+is never overridden. This is what appears in /proc, and is what is used when a
+process is the target of an operation by some other process (SIGKILL for
+example).
+
+The subjective security holds the active security properties of a process, and
+may be overridden. This is not seen externally, and is used whan a process
+acts upon another object, for example SIGKILLing another process or opening a
+file.
+
+LSM hooks exist that allow SELinux (or Smack or whatever) to reject a request
+for CacheFiles to run in a context of a specific security label, or to create
+files and directories with another security label.
+
+
+=======================
+STATISTICAL INFORMATION
+=======================
+
+If FS-Cache is compiled with the following option enabled:
+
+ CONFIG_CACHEFILES_HISTOGRAM=y
+
+then it will gather certain statistics and display them through a proc file.
+
+ (*) /proc/fs/cachefiles/histogram
+
+ cat /proc/fs/cachefiles/histogram
+ JIFS SECS LOOKUPS MKDIRS CREATES
+ ===== ===== ========= ========= =========
+
+ This shows the breakdown of the number of times each amount of time
+ between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
+ columns are as follows:
+
+ COLUMN TIME MEASUREMENT
+ ======= =======================================================
+ LOOKUPS Length of time to perform a lookup on the backing fs
+ MKDIRS Length of time to perform a mkdir on the backing fs
+ CREATES Length of time to perform a create on the backing fs
+
+ Each row shows the number of events that took a particular range of times.
+ Each step is 1 jiffy in size. The JIFS column indicates the particular
+ jiffy range covered, and the SECS field the equivalent number of seconds.
+
+
+=========
+DEBUGGING
+=========
+
+If CONFIG_CACHEFILES_DEBUG is enabled, the CacheFiles facility can have runtime
+debugging enabled by adjusting the value in:
+
+ /sys/module/cachefiles/parameters/debug
+
+This is a bitmask of debugging streams to enable:
+
+ BIT VALUE STREAM POINT
+ ======= ======= =============================== =======================
+ 0 1 General Function entry trace
+ 1 2 Function exit trace
+ 2 4 General
+
+The appropriate set of values should be OR'd together and the result written to
+the control file. For example:
+
+ echo $((1|4|8)) >/sys/module/cachefiles/parameters/debug
+
+will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
new file mode 100644
index 000000000..770267af5
--- /dev/null
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -0,0 +1,443 @@
+ ==========================
+ General Filesystem Caching
+ ==========================
+
+========
+OVERVIEW
+========
+
+This facility is a general purpose cache for network filesystems, though it
+could be used for caching other things such as ISO9660 filesystems too.
+
+FS-Cache mediates between cache backends (such as CacheFS) and network
+filesystems:
+
+ +---------+
+ | | +--------------+
+ | NFS |--+ | |
+ | | | +-->| CacheFS |
+ +---------+ | +----------+ | | /dev/hda5 |
+ | | | | +--------------+
+ +---------+ +-->| | |
+ | | | |--+
+ | AFS |----->| FS-Cache |
+ | | | |--+
+ +---------+ +-->| | |
+ | | | | +--------------+
+ +---------+ | +----------+ | | |
+ | | | +-->| CacheFiles |
+ | ISOFS |--+ | /var/cache |
+ | | +--------------+
+ +---------+
+
+Or to look at it another way, FS-Cache is a module that provides a caching
+facility to a network filesystem such that the cache is transparent to the
+user:
+
+ +---------+
+ | |
+ | Server |
+ | |
+ +---------+
+ | NETWORK
+ ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ |
+ | +----------+
+ V | |
+ +---------+ | |
+ | | | |
+ | NFS |----->| FS-Cache |
+ | | | |--+
+ +---------+ | | | +--------------+ +--------------+
+ | | | | | | | |
+ V +----------+ +-->| CacheFiles |-->| Ext3 |
+ +---------+ | /var/cache | | /dev/sda6 |
+ | | +--------------+ +--------------+
+ | VFS | ^ ^
+ | | | |
+ +---------+ +--------------+ |
+ | KERNEL SPACE | |
+ ~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~|~~~~~~|~~~~
+ | USER SPACE | |
+ V | |
+ +---------+ +--------------+
+ | | | |
+ | Process | | cachefilesd |
+ | | | |
+ +---------+ +--------------+
+
+
+FS-Cache does not follow the idea of completely loading every netfs file
+opened in its entirety into a cache before permitting it to be accessed and
+then serving the pages out of that cache rather than the netfs inode because:
+
+ (1) It must be practical to operate without a cache.
+
+ (2) The size of any accessible file must not be limited to the size of the
+ cache.
+
+ (3) The combined size of all opened files (this includes mapped libraries)
+ must not be limited to the size of the cache.
+
+ (4) The user should not be forced to download an entire file just to do a
+ one-off access of a small portion of it (such as might be done with the
+ "file" program).
+
+It instead serves the cache out in PAGE_SIZE chunks as and when requested by
+the netfs('s) using it.
+
+
+FS-Cache provides the following facilities:
+
+ (1) More than one cache can be used at once. Caches can be selected
+ explicitly by use of tags.
+
+ (2) Caches can be added / removed at any time.
+
+ (3) The netfs is provided with an interface that allows either party to
+ withdraw caching facilities from a file (required for (2)).
+
+ (4) The interface to the netfs returns as few errors as possible, preferring
+ rather to let the netfs remain oblivious.
+
+ (5) Cookies are used to represent indices, files and other objects to the
+ netfs. The simplest cookie is just a NULL pointer - indicating nothing
+ cached there.
+
+ (6) The netfs is allowed to propose - dynamically - any index hierarchy it
+ desires, though it must be aware that the index search function is
+ recursive, stack space is limited, and indices can only be children of
+ indices.
+
+ (7) Data I/O is done direct to and from the netfs's pages. The netfs
+ indicates that page A is at index B of the data-file represented by cookie
+ C, and that it should be read or written. The cache backend may or may
+ not start I/O on that page, but if it does, a netfs callback will be
+ invoked to indicate completion. The I/O may be either synchronous or
+ asynchronous.
+
+ (8) Cookies can be "retired" upon release. At this point FS-Cache will mark
+ them as obsolete and the index hierarchy rooted at that point will get
+ recycled.
+
+ (9) The netfs provides a "match" function for index searches. In addition to
+ saying whether a match was made or not, this can also specify that an
+ entry should be updated or deleted.
+
+(10) As much as possible is done asynchronously.
+
+
+FS-Cache maintains a virtual indexing tree in which all indices, files, objects
+and pages are kept. Bits of this tree may actually reside in one or more
+caches.
+
+ FSDEF
+ |
+ +------------------------------------+
+ | |
+ NFS AFS
+ | |
+ +--------------------------+ +-----------+
+ | | | |
+ homedir mirror afs.org redhat.com
+ | | |
+ +------------+ +---------------+ +----------+
+ | | | | | |
+ 00001 00002 00007 00125 vol00001 vol00002
+ | | | | |
+ +---+---+ +-----+ +---+ +------+------+ +-----+----+
+ | | | | | | | | | | | | |
+PG0 PG1 PG2 PG0 XATTR PG0 PG1 DIRENT DIRENT DIRENT R/W R/O Bak
+ | |
+ PG0 +-------+
+ | |
+ 00001 00003
+ |
+ +---+---+
+ | | |
+ PG0 PG1 PG2
+
+In the example above, you can see two netfs's being backed: NFS and AFS. These
+have different index hierarchies:
+
+ (*) The NFS primary index contains per-server indices. Each server index is
+ indexed by NFS file handles to get data file objects. Each data file
+ objects can have an array of pages, but may also have further child
+ objects, such as extended attributes and directory entries. Extended
+ attribute objects themselves have page-array contents.
+
+ (*) The AFS primary index contains per-cell indices. Each cell index contains
+ per-logical-volume indices. Each of volume index contains up to three
+ indices for the read-write, read-only and backup mirrors of those volumes.
+ Each of these contains vnode data file objects, each of which contains an
+ array of pages.
+
+The very top index is the FS-Cache master index in which individual netfs's
+have entries.
+
+Any index object may reside in more than one cache, provided it only has index
+children. Any index with non-index object children will be assumed to only
+reside in one cache.
+
+
+The netfs API to FS-Cache can be found in:
+
+ Documentation/filesystems/caching/netfs-api.txt
+
+The cache backend API to FS-Cache can be found in:
+
+ Documentation/filesystems/caching/backend-api.txt
+
+A description of the internal representations and object state machine can be
+found in:
+
+ Documentation/filesystems/caching/object.txt
+
+
+=======================
+STATISTICAL INFORMATION
+=======================
+
+If FS-Cache is compiled with the following options enabled:
+
+ CONFIG_FSCACHE_STATS=y
+ CONFIG_FSCACHE_HISTOGRAM=y
+
+then it will gather certain statistics and display them through a number of
+proc files.
+
+ (*) /proc/fs/fscache/stats
+
+ This shows counts of a number of events that can happen in FS-Cache:
+
+ CLASS EVENT MEANING
+ ======= ======= =======================================================
+ Cookies idx=N Number of index cookies allocated
+ dat=N Number of data storage cookies allocated
+ spc=N Number of special cookies allocated
+ Objects alc=N Number of objects allocated
+ nal=N Number of object allocation failures
+ avl=N Number of objects that reached the available state
+ ded=N Number of objects that reached the dead state
+ ChkAux non=N Number of objects that didn't have a coherency check
+ ok=N Number of objects that passed a coherency check
+ upd=N Number of objects that needed a coherency data update
+ obs=N Number of objects that were declared obsolete
+ Pages mrk=N Number of pages marked as being cached
+ unc=N Number of uncache page requests seen
+ Acquire n=N Number of acquire cookie requests seen
+ nul=N Number of acq reqs given a NULL parent
+ noc=N Number of acq reqs rejected due to no cache available
+ ok=N Number of acq reqs succeeded
+ nbf=N Number of acq reqs rejected due to error
+ oom=N Number of acq reqs failed on ENOMEM
+ Lookups n=N Number of lookup calls made on cache backends
+ neg=N Number of negative lookups made
+ pos=N Number of positive lookups made
+ crt=N Number of objects created by lookup
+ tmo=N Number of lookups timed out and requeued
+ Updates n=N Number of update cookie requests seen
+ nul=N Number of upd reqs given a NULL parent
+ run=N Number of upd reqs granted CPU time
+ Relinqs n=N Number of relinquish cookie requests seen
+ nul=N Number of rlq reqs given a NULL parent
+ wcr=N Number of rlq reqs waited on completion of creation
+ AttrChg n=N Number of attribute changed requests seen
+ ok=N Number of attr changed requests queued
+ nbf=N Number of attr changed rejected -ENOBUFS
+ oom=N Number of attr changed failed -ENOMEM
+ run=N Number of attr changed ops given CPU time
+ Allocs n=N Number of allocation requests seen
+ ok=N Number of successful alloc reqs
+ wt=N Number of alloc reqs that waited on lookup completion
+ nbf=N Number of alloc reqs rejected -ENOBUFS
+ int=N Number of alloc reqs aborted -ERESTARTSYS
+ ops=N Number of alloc reqs submitted
+ owt=N Number of alloc reqs waited for CPU time
+ abt=N Number of alloc reqs aborted due to object death
+ Retrvls n=N Number of retrieval (read) requests seen
+ ok=N Number of successful retr reqs
+ wt=N Number of retr reqs that waited on lookup completion
+ nod=N Number of retr reqs returned -ENODATA
+ nbf=N Number of retr reqs rejected -ENOBUFS
+ int=N Number of retr reqs aborted -ERESTARTSYS
+ oom=N Number of retr reqs failed -ENOMEM
+ ops=N Number of retr reqs submitted
+ owt=N Number of retr reqs waited for CPU time
+ abt=N Number of retr reqs aborted due to object death
+ Stores n=N Number of storage (write) requests seen
+ ok=N Number of successful store reqs
+ agn=N Number of store reqs on a page already pending storage
+ nbf=N Number of store reqs rejected -ENOBUFS
+ oom=N Number of store reqs failed -ENOMEM
+ ops=N Number of store reqs submitted
+ run=N Number of store reqs granted CPU time
+ pgs=N Number of pages given store req processing time
+ rxd=N Number of store reqs deleted from tracking tree
+ olm=N Number of store reqs over store limit
+ VmScan nos=N Number of release reqs against pages with no pending store
+ gon=N Number of release reqs against pages stored by time lock granted
+ bsy=N Number of release reqs ignored due to in-progress store
+ can=N Number of page stores cancelled due to release req
+ Ops pend=N Number of times async ops added to pending queues
+ run=N Number of times async ops given CPU time
+ enq=N Number of times async ops queued for processing
+ can=N Number of async ops cancelled
+ rej=N Number of async ops rejected due to object lookup/create failure
+ dfr=N Number of async ops queued for deferred release
+ rel=N Number of async ops released
+ gc=N Number of deferred-release async ops garbage collected
+ CacheOp alo=N Number of in-progress alloc_object() cache ops
+ luo=N Number of in-progress lookup_object() cache ops
+ luc=N Number of in-progress lookup_complete() cache ops
+ gro=N Number of in-progress grab_object() cache ops
+ upo=N Number of in-progress update_object() cache ops
+ dro=N Number of in-progress drop_object() cache ops
+ pto=N Number of in-progress put_object() cache ops
+ syn=N Number of in-progress sync_cache() cache ops
+ atc=N Number of in-progress attr_changed() cache ops
+ rap=N Number of in-progress read_or_alloc_page() cache ops
+ ras=N Number of in-progress read_or_alloc_pages() cache ops
+ alp=N Number of in-progress allocate_page() cache ops
+ als=N Number of in-progress allocate_pages() cache ops
+ wrp=N Number of in-progress write_page() cache ops
+ ucp=N Number of in-progress uncache_page() cache ops
+ dsp=N Number of in-progress dissociate_pages() cache ops
+
+
+ (*) /proc/fs/fscache/histogram
+
+ cat /proc/fs/fscache/histogram
+ JIFS SECS OBJ INST OP RUNS OBJ RUNS RETRV DLY RETRIEVLS
+ ===== ===== ========= ========= ========= ========= =========
+
+ This shows the breakdown of the number of times each amount of time
+ between 0 jiffies and HZ-1 jiffies a variety of tasks took to run. The
+ columns are as follows:
+
+ COLUMN TIME MEASUREMENT
+ ======= =======================================================
+ OBJ INST Length of time to instantiate an object
+ OP RUNS Length of time a call to process an operation took
+ OBJ RUNS Length of time a call to process an object event took
+ RETRV DLY Time between an requesting a read and lookup completing
+ RETRIEVLS Time between beginning and end of a retrieval
+
+ Each row shows the number of events that took a particular range of times.
+ Each step is 1 jiffy in size. The JIFS column indicates the particular
+ jiffy range covered, and the SECS field the equivalent number of seconds.
+
+
+===========
+OBJECT LIST
+===========
+
+If CONFIG_FSCACHE_OBJECT_LIST is enabled, the FS-Cache facility will maintain a
+list of all the objects currently allocated and allow them to be viewed
+through:
+
+ /proc/fs/fscache/objects
+
+This will look something like:
+
+ [root@andromeda ~]# head /proc/fs/fscache/objects
+ OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS EM EV F S | NETFS_COOKIE_DEF TY FL NETFS_DATA OBJECT_KEY, AUX_DATA
+ ======== ======== ==== ===== === === === == ===== == == = = | ================ == == ================ ================
+ 17e4b 2 ACTV 0 0 0 0 0 0 7b 4 0 0 | NFS.fh DT 0 ffff88001dd82820 010006017edcf8bbc93b43298fdfbe71e50b57b13a172c0117f38472, e567634700000000000000000000000063f2404a000000000000000000000000c9030000000000000000000063f2404a
+ 1693a 2 ACTV 0 0 0 0 0 0 7b 4 0 0 | NFS.fh DT 0 ffff88002db23380 010006017edcf8bbc93b43298fdfbe71e50b57b1e0162c01a2df0ea6, 420ebc4a000000000000000000000000420ebc4a0000000000000000000000000e1801000000000000000000420ebc4a
+
+where the first set of columns before the '|' describe the object:
+
+ COLUMN DESCRIPTION
+ ======= ===============================================================
+ OBJECT Object debugging ID (appears as OBJ%x in some debug messages)
+ PARENT Debugging ID of parent object
+ STAT Object state
+ CHLDN Number of child objects of this object
+ OPS Number of outstanding operations on this object
+ OOP Number of outstanding child object management operations
+ IPR
+ EX Number of outstanding exclusive operations
+ READS Number of outstanding read operations
+ EM Object's event mask
+ EV Events raised on this object
+ F Object flags
+ S Object work item busy state mask (1:pending 2:running)
+
+and the second set of columns describe the object's cookie, if present:
+
+ COLUMN DESCRIPTION
+ =============== =======================================================
+ NETFS_COOKIE_DEF Name of netfs cookie definition
+ TY Cookie type (IX - index, DT - data, hex - special)
+ FL Cookie flags
+ NETFS_DATA Netfs private data stored in the cookie
+ OBJECT_KEY Object key } 1 column, with separating comma
+ AUX_DATA Object aux data } presence may be configured
+
+The data shown may be filtered by attaching the a key to an appropriate keyring
+before viewing the file. Something like:
+
+ keyctl add user fscache:objlist <restrictions> @s
+
+where <restrictions> are a selection of the following letters:
+
+ K Show hexdump of object key (don't show if not given)
+ A Show hexdump of object aux data (don't show if not given)
+
+and the following paired letters:
+
+ C Show objects that have a cookie
+ c Show objects that don't have a cookie
+ B Show objects that are busy
+ b Show objects that aren't busy
+ W Show objects that have pending writes
+ w Show objects that don't have pending writes
+ R Show objects that have outstanding reads
+ r Show objects that don't have outstanding reads
+ S Show objects that have work queued
+ s Show objects that don't have work queued
+
+If neither side of a letter pair is given, then both are implied. For example:
+
+ keyctl add user fscache:objlist KB @s
+
+shows objects that are busy, and lists their object keys, but does not dump
+their auxiliary data. It also implies "CcWwRrSs", but as 'B' is given, 'b' is
+not implied.
+
+By default all objects and all fields will be shown.
+
+
+=========
+DEBUGGING
+=========
+
+If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
+debugging enabled by adjusting the value in:
+
+ /sys/module/fscache/parameters/debug
+
+This is a bitmask of debugging streams to enable:
+
+ BIT VALUE STREAM POINT
+ ======= ======= =============================== =======================
+ 0 1 Cache management Function entry trace
+ 1 2 Function exit trace
+ 2 4 General
+ 3 8 Cookie management Function entry trace
+ 4 16 Function exit trace
+ 5 32 General
+ 6 64 Page handling Function entry trace
+ 7 128 Function exit trace
+ 8 256 General
+ 9 512 Operation management Function entry trace
+ 10 1024 Function exit trace
+ 11 2048 General
+
+The appropriate set of values should be OR'd together and the result written to
+the control file. For example:
+
+ echo $((1|8|64)) >/sys/module/fscache/parameters/debug
+
+will turn on all function entry debugging.
diff --git a/Documentation/filesystems/caching/netfs-api.txt b/Documentation/filesystems/caching/netfs-api.txt
new file mode 100644
index 000000000..aed6b9416
--- /dev/null
+++ b/Documentation/filesystems/caching/netfs-api.txt
@@ -0,0 +1,915 @@
+ ===============================
+ FS-CACHE NETWORK FILESYSTEM API
+ ===============================
+
+There's an API by which a network filesystem can make use of the FS-Cache
+facilities. This is based around a number of principles:
+
+ (1) Caches can store a number of different object types. There are two main
+ object types: indices and files. The first is a special type used by
+ FS-Cache to make finding objects faster and to make retiring of groups of
+ objects easier.
+
+ (2) Every index, file or other object is represented by a cookie. This cookie
+ may or may not have anything associated with it, but the netfs doesn't
+ need to care.
+
+ (3) Barring the top-level index (one entry per cached netfs), the index
+ hierarchy for each netfs is structured according the whim of the netfs.
+
+This API is declared in <linux/fscache.h>.
+
+This document contains the following sections:
+
+ (1) Network filesystem definition
+ (2) Index definition
+ (3) Object definition
+ (4) Network filesystem (un)registration
+ (5) Cache tag lookup
+ (6) Index registration
+ (7) Data file registration
+ (8) Miscellaneous object registration
+ (9) Setting the data file size
+ (10) Page alloc/read/write
+ (11) Page uncaching
+ (12) Index and data file consistency
+ (13) Cookie enablement
+ (14) Miscellaneous cookie operations
+ (15) Cookie unregistration
+ (16) Index invalidation
+ (17) Data file invalidation
+ (18) FS-Cache specific page flags.
+
+
+=============================
+NETWORK FILESYSTEM DEFINITION
+=============================
+
+FS-Cache needs a description of the network filesystem. This is specified
+using a record of the following structure:
+
+ struct fscache_netfs {
+ uint32_t version;
+ const char *name;
+ struct fscache_cookie *primary_index;
+ ...
+ };
+
+This first two fields should be filled in before registration, and the third
+will be filled in by the registration function; any other fields should just be
+ignored and are for internal use only.
+
+The fields are:
+
+ (1) The name of the netfs (used as the key in the toplevel index).
+
+ (2) The version of the netfs (if the name matches but the version doesn't, the
+ entire in-cache hierarchy for this netfs will be scrapped and begun
+ afresh).
+
+ (3) The cookie representing the primary index will be allocated according to
+ another parameter passed into the registration function.
+
+For example, kAFS (linux/fs/afs/) uses the following definitions to describe
+itself:
+
+ struct fscache_netfs afs_cache_netfs = {
+ .version = 0,
+ .name = "afs",
+ };
+
+
+================
+INDEX DEFINITION
+================
+
+Indices are used for two purposes:
+
+ (1) To aid the finding of a file based on a series of keys (such as AFS's
+ "cell", "volume ID", "vnode ID").
+
+ (2) To make it easier to discard a subset of all the files cached based around
+ a particular key - for instance to mirror the removal of an AFS volume.
+
+However, since it's unlikely that any two netfs's are going to want to define
+their index hierarchies in quite the same way, FS-Cache tries to impose as few
+restraints as possible on how an index is structured and where it is placed in
+the tree. The netfs can even mix indices and data files at the same level, but
+it's not recommended.
+
+Each index entry consists of a key of indeterminate length plus some auxiliary
+data, also of indeterminate length.
+
+There are some limits on indices:
+
+ (1) Any index containing non-index objects should be restricted to a single
+ cache. Any such objects created within an index will be created in the
+ first cache only. The cache in which an index is created can be
+ controlled by cache tags (see below).
+
+ (2) The entry data must be atomically journallable, so it is limited to about
+ 400 bytes at present. At least 400 bytes will be available.
+
+ (3) The depth of the index tree should be judged with care as the search
+ function is recursive. Too many layers will run the kernel out of stack.
+
+
+=================
+OBJECT DEFINITION
+=================
+
+To define an object, a structure of the following type should be filled out:
+
+ struct fscache_cookie_def
+ {
+ uint8_t name[16];
+ uint8_t type;
+
+ struct fscache_cache_tag *(*select_cache)(
+ const void *parent_netfs_data,
+ const void *cookie_netfs_data);
+
+ uint16_t (*get_key)(const void *cookie_netfs_data,
+ void *buffer,
+ uint16_t bufmax);
+
+ void (*get_attr)(const void *cookie_netfs_data,
+ uint64_t *size);
+
+ uint16_t (*get_aux)(const void *cookie_netfs_data,
+ void *buffer,
+ uint16_t bufmax);
+
+ enum fscache_checkaux (*check_aux)(void *cookie_netfs_data,
+ const void *data,
+ uint16_t datalen);
+
+ void (*get_context)(void *cookie_netfs_data, void *context);
+
+ void (*put_context)(void *cookie_netfs_data, void *context);
+
+ void (*mark_pages_cached)(void *cookie_netfs_data,
+ struct address_space *mapping,
+ struct pagevec *cached_pvec);
+
+ void (*now_uncached)(void *cookie_netfs_data);
+ };
+
+This has the following fields:
+
+ (1) The type of the object [mandatory].
+
+ This is one of the following values:
+
+ (*) FSCACHE_COOKIE_TYPE_INDEX
+
+ This defines an index, which is a special FS-Cache type.
+
+ (*) FSCACHE_COOKIE_TYPE_DATAFILE
+
+ This defines an ordinary data file.
+
+ (*) Any other value between 2 and 255
+
+ This defines an extraordinary object such as an XATTR.
+
+ (2) The name of the object type (NUL terminated unless all 16 chars are used)
+ [optional].
+
+ (3) A function to select the cache in which to store an index [optional].
+
+ This function is invoked when an index needs to be instantiated in a cache
+ during the instantiation of a non-index object. Only the immediate index
+ parent for the non-index object will be queried. Any indices above that
+ in the hierarchy may be stored in multiple caches. This function does not
+ need to be supplied for any non-index object or any index that will only
+ have index children.
+
+ If this function is not supplied or if it returns NULL then the first
+ cache in the parent's list will be chosen, or failing that, the first
+ cache in the master list.
+
+ (4) A function to retrieve an object's key from the netfs [mandatory].
+
+ This function will be called with the netfs data that was passed to the
+ cookie acquisition function and the maximum length of key data that it may
+ provide. It should write the required key data into the given buffer and
+ return the quantity it wrote.
+
+ (5) A function to retrieve attribute data from the netfs [optional].
+
+ This function will be called with the netfs data that was passed to the
+ cookie acquisition function. It should return the size of the file if
+ this is a data file. The size may be used to govern how much cache must
+ be reserved for this file in the cache.
+
+ If the function is absent, a file size of 0 is assumed.
+
+ (6) A function to retrieve auxiliary data from the netfs [optional].
+
+ This function will be called with the netfs data that was passed to the
+ cookie acquisition function and the maximum length of auxiliary data that
+ it may provide. It should write the auxiliary data into the given buffer
+ and return the quantity it wrote.
+
+ If this function is absent, the auxiliary data length will be set to 0.
+
+ The length of the auxiliary data buffer may be dependent on the key
+ length. A netfs mustn't rely on being able to provide more than 400 bytes
+ for both.
+
+ (7) A function to check the auxiliary data [optional].
+
+ This function will be called to check that a match found in the cache for
+ this object is valid. For instance with AFS it could check the auxiliary
+ data against the data version number returned by the server to determine
+ whether the index entry in a cache is still valid.
+
+ If this function is absent, it will be assumed that matching objects in a
+ cache are always valid.
+
+ If present, the function should return one of the following values:
+
+ (*) FSCACHE_CHECKAUX_OKAY - the entry is okay as is
+ (*) FSCACHE_CHECKAUX_NEEDS_UPDATE - the entry requires update
+ (*) FSCACHE_CHECKAUX_OBSOLETE - the entry should be deleted
+
+ This function can also be used to extract data from the auxiliary data in
+ the cache and copy it into the netfs's structures.
+
+ (8) A pair of functions to manage contexts for the completion callback
+ [optional].
+
+ The cache read/write functions are passed a context which is then passed
+ to the I/O completion callback function. To ensure this context remains
+ valid until after the I/O completion is called, two functions may be
+ provided: one to get an extra reference on the context, and one to drop a
+ reference to it.
+
+ If the context is not used or is a type of object that won't go out of
+ scope, then these functions are not required. These functions are not
+ required for indices as indices may not contain data. These functions may
+ be called in interrupt context and so may not sleep.
+
+ (9) A function to mark a page as retaining cache metadata [optional].
+
+ This is called by the cache to indicate that it is retaining in-memory
+ information for this page and that the netfs should uncache the page when
+ it has finished. This does not indicate whether there's data on the disk
+ or not. Note that several pages at once may be presented for marking.
+
+ The PG_fscache bit is set on the pages before this function would be
+ called, so the function need not be provided if this is sufficient.
+
+ This function is not required for indices as they're not permitted data.
+
+(10) A function to unmark all the pages retaining cache metadata [mandatory].
+
+ This is called by FS-Cache to indicate that a backing store is being
+ unbound from a cookie and that all the marks on the pages should be
+ cleared to prevent confusion. Note that the cache will have torn down all
+ its tracking information so that the pages don't need to be explicitly
+ uncached.
+
+ This function is not required for indices as they're not permitted data.
+
+
+===================================
+NETWORK FILESYSTEM (UN)REGISTRATION
+===================================
+
+The first step is to declare the network filesystem to the cache. This also
+involves specifying the layout of the primary index (for AFS, this would be the
+"cell" level).
+
+The registration function is:
+
+ int fscache_register_netfs(struct fscache_netfs *netfs);
+
+It just takes a pointer to the netfs definition. It returns 0 or an error as
+appropriate.
+
+For kAFS, registration is done as follows:
+
+ ret = fscache_register_netfs(&afs_cache_netfs);
+
+The last step is, of course, unregistration:
+
+ void fscache_unregister_netfs(struct fscache_netfs *netfs);
+
+
+================
+CACHE TAG LOOKUP
+================
+
+FS-Cache permits the use of more than one cache. To permit particular index
+subtrees to be bound to particular caches, the second step is to look up cache
+representation tags. This step is optional; it can be left entirely up to
+FS-Cache as to which cache should be used. The problem with doing that is that
+FS-Cache will always pick the first cache that was registered.
+
+To get the representation for a named tag:
+
+ struct fscache_cache_tag *fscache_lookup_cache_tag(const char *name);
+
+This takes a text string as the name and returns a representation of a tag. It
+will never return an error. It may return a dummy tag, however, if it runs out
+of memory; this will inhibit caching with this tag.
+
+Any representation so obtained must be released by passing it to this function:
+
+ void fscache_release_cache_tag(struct fscache_cache_tag *tag);
+
+The tag will be retrieved by FS-Cache when it calls the object definition
+operation select_cache().
+
+
+==================
+INDEX REGISTRATION
+==================
+
+The third step is to inform FS-Cache about part of an index hierarchy that can
+be used to locate files. This is done by requesting a cookie for each index in
+the path to the file:
+
+ struct fscache_cookie *
+ fscache_acquire_cookie(struct fscache_cookie *parent,
+ const struct fscache_object_def *def,
+ void *netfs_data,
+ bool enable);
+
+This function creates an index entry in the index represented by parent,
+filling in the index entry by calling the operations pointed to by def.
+
+Note that this function never returns an error - all errors are handled
+internally. It may, however, return NULL to indicate no cookie. It is quite
+acceptable to pass this token back to this function as the parent to another
+acquisition (or even to the relinquish cookie, read page and write page
+functions - see below).
+
+Note also that no indices are actually created in a cache until a non-index
+object needs to be created somewhere down the hierarchy. Furthermore, an index
+may be created in several different caches independently at different times.
+This is all handled transparently, and the netfs doesn't see any of it.
+
+A cookie will be created in the disabled state if enabled is false. A cookie
+must be enabled to do anything with it. A disabled cookie can be enabled by
+calling fscache_enable_cookie() (see below).
+
+For example, with AFS, a cell would be added to the primary index. This index
+entry would have a dependent inode containing a volume location index for the
+volume mappings within this cell:
+
+ cell->cache =
+ fscache_acquire_cookie(afs_cache_netfs.primary_index,
+ &afs_cell_cache_index_def,
+ cell, true);
+
+Then when a volume location was accessed, it would be entered into the cell's
+index and an inode would be allocated that acts as a volume type and hash chain
+combination:
+
+ vlocation->cache =
+ fscache_acquire_cookie(cell->cache,
+ &afs_vlocation_cache_index_def,
+ vlocation, true);
+
+And then a particular flavour of volume (R/O for example) could be added to
+that index, creating another index for vnodes (AFS inode equivalents):
+
+ volume->cache =
+ fscache_acquire_cookie(vlocation->cache,
+ &afs_volume_cache_index_def,
+ volume, true);
+
+
+======================
+DATA FILE REGISTRATION
+======================
+
+The fourth step is to request a data file be created in the cache. This is
+identical to index cookie acquisition. The only difference is that the type in
+the object definition should be something other than index type.
+
+ vnode->cache =
+ fscache_acquire_cookie(volume->cache,
+ &afs_vnode_cache_object_def,
+ vnode, true);
+
+
+=================================
+MISCELLANEOUS OBJECT REGISTRATION
+=================================
+
+An optional step is to request an object of miscellaneous type be created in
+the cache. This is almost identical to index cookie acquisition. The only
+difference is that the type in the object definition should be something other
+than index type. Whilst the parent object could be an index, it's more likely
+it would be some other type of object such as a data file.
+
+ xattr->cache =
+ fscache_acquire_cookie(vnode->cache,
+ &afs_xattr_cache_object_def,
+ xattr, true);
+
+Miscellaneous objects might be used to store extended attributes or directory
+entries for example.
+
+
+==========================
+SETTING THE DATA FILE SIZE
+==========================
+
+The fifth step is to set the physical attributes of the file, such as its size.
+This doesn't automatically reserve any space in the cache, but permits the
+cache to adjust its metadata for data tracking appropriately:
+
+ int fscache_attr_changed(struct fscache_cookie *cookie);
+
+The cache will return -ENOBUFS if there is no backing cache or if there is no
+space to allocate any extra metadata required in the cache. The attributes
+will be accessed with the get_attr() cookie definition operation.
+
+Note that attempts to read or write data pages in the cache over this size may
+be rebuffed with -ENOBUFS.
+
+This operation schedules an attribute adjustment to happen asynchronously at
+some point in the future, and as such, it may happen after the function returns
+to the caller. The attribute adjustment excludes read and write operations.
+
+
+=====================
+PAGE ALLOC/READ/WRITE
+=====================
+
+And the sixth step is to store and retrieve pages in the cache. There are
+three functions that are used to do this.
+
+Note:
+
+ (1) A page should not be re-read or re-allocated without uncaching it first.
+
+ (2) A read or allocated page must be uncached when the netfs page is released
+ from the pagecache.
+
+ (3) A page should only be written to the cache if previous read or allocated.
+
+This permits the cache to maintain its page tracking in proper order.
+
+
+PAGE READ
+---------
+
+Firstly, the netfs should ask FS-Cache to examine the caches and read the
+contents cached for a particular page of a particular file if present, or else
+allocate space to store the contents if not:
+
+ typedef
+ void (*fscache_rw_complete_t)(struct page *page,
+ void *context,
+ int error);
+
+ int fscache_read_or_alloc_page(struct fscache_cookie *cookie,
+ struct page *page,
+ fscache_rw_complete_t end_io_func,
+ void *context,
+ gfp_t gfp);
+
+The cookie argument must specify a cookie for an object that isn't an index,
+the page specified will have the data loaded into it (and is also used to
+specify the page number), and the gfp argument is used to control how any
+memory allocations made are satisfied.
+
+If the cookie indicates the inode is not cached:
+
+ (1) The function will return -ENOBUFS.
+
+Else if there's a copy of the page resident in the cache:
+
+ (1) The mark_pages_cached() cookie operation will be called on that page.
+
+ (2) The function will submit a request to read the data from the cache's
+ backing device directly into the page specified.
+
+ (3) The function will return 0.
+
+ (4) When the read is complete, end_io_func() will be invoked with:
+
+ (*) The netfs data supplied when the cookie was created.
+
+ (*) The page descriptor.
+
+ (*) The context argument passed to the above function. This will be
+ maintained with the get_context/put_context functions mentioned above.
+
+ (*) An argument that's 0 on success or negative for an error code.
+
+ If an error occurs, it should be assumed that the page contains no usable
+ data. fscache_readpages_cancel() may need to be called.
+
+ end_io_func() will be called in process context if the read is results in
+ an error, but it might be called in interrupt context if the read is
+ successful.
+
+Otherwise, if there's not a copy available in cache, but the cache may be able
+to store the page:
+
+ (1) The mark_pages_cached() cookie operation will be called on that page.
+
+ (2) A block may be reserved in the cache and attached to the object at the
+ appropriate place.
+
+ (3) The function will return -ENODATA.
+
+This function may also return -ENOMEM or -EINTR, in which case it won't have
+read any data from the cache.
+
+
+PAGE ALLOCATE
+-------------
+
+Alternatively, if there's not expected to be any data in the cache for a page
+because the file has been extended, a block can simply be allocated instead:
+
+ int fscache_alloc_page(struct fscache_cookie *cookie,
+ struct page *page,
+ gfp_t gfp);
+
+This is similar to the fscache_read_or_alloc_page() function, except that it
+never reads from the cache. It will return 0 if a block has been allocated,
+rather than -ENODATA as the other would. One or the other must be performed
+before writing to the cache.
+
+The mark_pages_cached() cookie operation will be called on the page if
+successful.
+
+
+PAGE WRITE
+----------
+
+Secondly, if the netfs changes the contents of the page (either due to an
+initial download or if a user performs a write), then the page should be
+written back to the cache:
+
+ int fscache_write_page(struct fscache_cookie *cookie,
+ struct page *page,
+ gfp_t gfp);
+
+The cookie argument must specify a data file cookie, the page specified should
+contain the data to be written (and is also used to specify the page number),
+and the gfp argument is used to control how any memory allocations made are
+satisfied.
+
+The page must have first been read or allocated successfully and must not have
+been uncached before writing is performed.
+
+If the cookie indicates the inode is not cached then:
+
+ (1) The function will return -ENOBUFS.
+
+Else if space can be allocated in the cache to hold this page:
+
+ (1) PG_fscache_write will be set on the page.
+
+ (2) The function will submit a request to write the data to cache's backing
+ device directly from the page specified.
+
+ (3) The function will return 0.
+
+ (4) When the write is complete PG_fscache_write is cleared on the page and
+ anyone waiting for that bit will be woken up.
+
+Else if there's no space available in the cache, -ENOBUFS will be returned. It
+is also possible for the PG_fscache_write bit to be cleared when no write took
+place if unforeseen circumstances arose (such as a disk error).
+
+Writing takes place asynchronously.
+
+
+MULTIPLE PAGE READ
+------------------
+
+A facility is provided to read several pages at once, as requested by the
+readpages() address space operation:
+
+ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie,
+ struct address_space *mapping,
+ struct list_head *pages,
+ int *nr_pages,
+ fscache_rw_complete_t end_io_func,
+ void *context,
+ gfp_t gfp);
+
+This works in a similar way to fscache_read_or_alloc_page(), except:
+
+ (1) Any page it can retrieve data for is removed from pages and nr_pages and
+ dispatched for reading to the disk. Reads of adjacent pages on disk may
+ be merged for greater efficiency.
+
+ (2) The mark_pages_cached() cookie operation will be called on several pages
+ at once if they're being read or allocated.
+
+ (3) If there was an general error, then that error will be returned.
+
+ Else if some pages couldn't be allocated or read, then -ENOBUFS will be
+ returned.
+
+ Else if some pages couldn't be read but were allocated, then -ENODATA will
+ be returned.
+
+ Otherwise, if all pages had reads dispatched, then 0 will be returned, the
+ list will be empty and *nr_pages will be 0.
+
+ (4) end_io_func will be called once for each page being read as the reads
+ complete. It will be called in process context if error != 0, but it may
+ be called in interrupt context if there is no error.
+
+Note that a return of -ENODATA, -ENOBUFS or any other error does not preclude
+some of the pages being read and some being allocated. Those pages will have
+been marked appropriately and will need uncaching.
+
+
+CANCELLATION OF UNREAD PAGES
+----------------------------
+
+If one or more pages are passed to fscache_read_or_alloc_pages() but not then
+read from the cache and also not read from the underlying filesystem then
+those pages will need to have any marks and reservations removed. This can be
+done by calling:
+
+ void fscache_readpages_cancel(struct fscache_cookie *cookie,
+ struct list_head *pages);
+
+prior to returning to the caller. The cookie argument should be as passed to
+fscache_read_or_alloc_pages(). Every page in the pages list will be examined
+and any that have PG_fscache set will be uncached.
+
+
+==============
+PAGE UNCACHING
+==============
+
+To uncache a page, this function should be called:
+
+ void fscache_uncache_page(struct fscache_cookie *cookie,
+ struct page *page);
+
+This function permits the cache to release any in-memory representation it
+might be holding for this netfs page. This function must be called once for
+each page on which the read or write page functions above have been called to
+make sure the cache's in-memory tracking information gets torn down.
+
+Note that pages can't be explicitly deleted from the a data file. The whole
+data file must be retired (see the relinquish cookie function below).
+
+Furthermore, note that this does not cancel the asynchronous read or write
+operation started by the read/alloc and write functions, so the page
+invalidation functions must use:
+
+ bool fscache_check_page_write(struct fscache_cookie *cookie,
+ struct page *page);
+
+to see if a page is being written to the cache, and:
+
+ void fscache_wait_on_page_write(struct fscache_cookie *cookie,
+ struct page *page);
+
+to wait for it to finish if it is.
+
+
+When releasepage() is being implemented, a special FS-Cache function exists to
+manage the heuristics of coping with vmscan trying to eject pages, which may
+conflict with the cache trying to write pages to the cache (which may itself
+need to allocate memory):
+
+ bool fscache_maybe_release_page(struct fscache_cookie *cookie,
+ struct page *page,
+ gfp_t gfp);
+
+This takes the netfs cookie, and the page and gfp arguments as supplied to
+releasepage(). It will return false if the page cannot be released yet for
+some reason and if it returns true, the page has been uncached and can now be
+released.
+
+To make a page available for release, this function may wait for an outstanding
+storage request to complete, or it may attempt to cancel the storage request -
+in which case the page will not be stored in the cache this time.
+
+
+BULK INODE PAGE UNCACHE
+-----------------------
+
+A convenience routine is provided to perform an uncache on all the pages
+attached to an inode. This assumes that the pages on the inode correspond on a
+1:1 basis with the pages in the cache.
+
+ void fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
+ struct inode *inode);
+
+This takes the netfs cookie that the pages were cached with and the inode that
+the pages are attached to. This function will wait for pages to finish being
+written to the cache and for the cache to finish with the page generally. No
+error is returned.
+
+
+===============================
+INDEX AND DATA FILE CONSISTENCY
+===============================
+
+To find out whether auxiliary data for an object is up to data within the
+cache, the following function can be called:
+
+ int fscache_check_consistency(struct fscache_cookie *cookie)
+
+This will call back to the netfs to check whether the auxiliary data associated
+with a cookie is correct. It returns 0 if it is and -ESTALE if it isn't; it
+may also return -ENOMEM and -ERESTARTSYS.
+
+To request an update of the index data for an index or other object, the
+following function should be called:
+
+ void fscache_update_cookie(struct fscache_cookie *cookie);
+
+This function will refer back to the netfs_data pointer stored in the cookie by
+the acquisition function to obtain the data to write into each revised index
+entry. The update method in the parent index definition will be called to
+transfer the data.
+
+Note that partial updates may happen automatically at other times, such as when
+data blocks are added to a data file object.
+
+
+=================
+COOKIE ENABLEMENT
+=================
+
+Cookies exist in one of two states: enabled and disabled. If a cookie is
+disabled, it ignores all attempts to acquire child cookies; check, update or
+invalidate its state; allocate, read or write backing pages - though it is
+still possible to uncache pages and relinquish the cookie.
+
+The initial enablement state is set by fscache_acquire_cookie(), but the cookie
+can be enabled or disabled later. To disable a cookie, call:
+
+ void fscache_disable_cookie(struct fscache_cookie *cookie,
+ bool invalidate);
+
+If the cookie is not already disabled, this locks the cookie against other
+enable and disable ops, marks the cookie as being disabled, discards or
+invalidates any backing objects and waits for cessation of activity on any
+associated object before unlocking the cookie.
+
+All possible failures are handled internally. The caller should consider
+calling fscache_uncache_all_inode_pages() afterwards to make sure all page
+markings are cleared up.
+
+Cookies can be enabled or reenabled with:
+
+ void fscache_enable_cookie(struct fscache_cookie *cookie,
+ bool (*can_enable)(void *data),
+ void *data)
+
+If the cookie is not already enabled, this locks the cookie against other
+enable and disable ops, invokes can_enable() and, if the cookie is not an index
+cookie, will begin the procedure of acquiring backing objects.
+
+The optional can_enable() function is passed the data argument and returns a
+ruling as to whether or not enablement should actually be permitted to begin.
+
+All possible failures are handled internally. The cookie will only be marked
+as enabled if provisional backing objects are allocated.
+
+
+===============================
+MISCELLANEOUS COOKIE OPERATIONS
+===============================
+
+There are a number of operations that can be used to control cookies:
+
+ (*) Cookie pinning:
+
+ int fscache_pin_cookie(struct fscache_cookie *cookie);
+ void fscache_unpin_cookie(struct fscache_cookie *cookie);
+
+ These operations permit data cookies to be pinned into the cache and to
+ have the pinning removed. They are not permitted on index cookies.
+
+ The pinning function will return 0 if successful, -ENOBUFS in the cookie
+ isn't backed by a cache, -EOPNOTSUPP if the cache doesn't support pinning,
+ -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
+ -EIO if there's any other problem.
+
+ (*) Data space reservation:
+
+ int fscache_reserve_space(struct fscache_cookie *cookie, loff_t size);
+
+ This permits a netfs to request cache space be reserved to store up to the
+ given amount of a file. It is permitted to ask for more than the current
+ size of the file to allow for future file expansion.
+
+ If size is given as zero then the reservation will be cancelled.
+
+ The function will return 0 if successful, -ENOBUFS in the cookie isn't
+ backed by a cache, -EOPNOTSUPP if the cache doesn't support reservations,
+ -ENOSPC if there isn't enough space to honour the operation, -ENOMEM or
+ -EIO if there's any other problem.
+
+ Note that this doesn't pin an object in a cache; it can still be culled to
+ make space if it's not in use.
+
+
+=====================
+COOKIE UNREGISTRATION
+=====================
+
+To get rid of a cookie, this function should be called.
+
+ void fscache_relinquish_cookie(struct fscache_cookie *cookie,
+ bool retire);
+
+If retire is non-zero, then the object will be marked for recycling, and all
+copies of it will be removed from all active caches in which it is present.
+Not only that but all child objects will also be retired.
+
+If retire is zero, then the object may be available again when next the
+acquisition function is called. Retirement here will overrule the pinning on a
+cookie.
+
+One very important note - relinquish must NOT be called for a cookie unless all
+the cookies for "child" indices, objects and pages have been relinquished
+first.
+
+
+==================
+INDEX INVALIDATION
+==================
+
+There is no direct way to invalidate an index subtree. To do this, the caller
+should relinquish and retire the cookie they have, and then acquire a new one.
+
+
+======================
+DATA FILE INVALIDATION
+======================
+
+Sometimes it will be necessary to invalidate an object that contains data.
+Typically this will be necessary when the server tells the netfs of a foreign
+change - at which point the netfs has to throw away all the state it had for an
+inode and reload from the server.
+
+To indicate that a cache object should be invalidated, the following function
+can be called:
+
+ void fscache_invalidate(struct fscache_cookie *cookie);
+
+This can be called with spinlocks held as it defers the work to a thread pool.
+All extant storage, retrieval and attribute change ops at this point are
+cancelled and discarded. Some future operations will be rejected until the
+cache has had a chance to insert a barrier in the operations queue. After
+that, operations will be queued again behind the invalidation operation.
+
+The invalidation operation will perform an attribute change operation and an
+auxiliary data update operation as it is very likely these will have changed.
+
+Using the following function, the netfs can wait for the invalidation operation
+to have reached a point at which it can start submitting ordinary operations
+once again:
+
+ void fscache_wait_on_invalidate(struct fscache_cookie *cookie);
+
+
+===========================
+FS-CACHE SPECIFIC PAGE FLAG
+===========================
+
+FS-Cache makes use of a page flag, PG_private_2, for its own purpose. This is
+given the alternative name PG_fscache.
+
+PG_fscache is used to indicate that the page is known by the cache, and that
+the cache must be informed if the page is going to go away. It's an indication
+to the netfs that the cache has an interest in this page, where an interest may
+be a pointer to it, resources allocated or reserved for it, or I/O in progress
+upon it.
+
+The netfs can use this information in methods such as releasepage() to
+determine whether it needs to uncache a page or update it.
+
+Furthermore, if this bit is set, releasepage() and invalidatepage() operations
+will be called on a page to get rid of it, even if PG_private is not set. This
+allows caching to attempted on a page before read_cache_pages() to be called
+after fscache_read_or_alloc_pages() as the former will try and release pages it
+was given under certain circumstances.
+
+This bit does not overlap with such as PG_private. This means that FS-Cache
+can be used with a filesystem that uses the block buffering code.
+
+There are a number of operations defined on this flag:
+
+ int PageFsCache(struct page *page);
+ void SetPageFsCache(struct page *page)
+ void ClearPageFsCache(struct page *page)
+ int TestSetPageFsCache(struct page *page)
+ int TestClearPageFsCache(struct page *page)
+
+These functions are bit test, bit set, bit clear, bit test and set and bit
+test and clear operations on PG_fscache.
diff --git a/Documentation/filesystems/caching/object.txt b/Documentation/filesystems/caching/object.txt
new file mode 100644
index 000000000..100ff4112
--- /dev/null
+++ b/Documentation/filesystems/caching/object.txt
@@ -0,0 +1,320 @@
+ ====================================================
+ IN-KERNEL CACHE OBJECT REPRESENTATION AND MANAGEMENT
+ ====================================================
+
+By: David Howells <dhowells@redhat.com>
+
+Contents:
+
+ (*) Representation
+
+ (*) Object management state machine.
+
+ - Provision of cpu time.
+ - Locking simplification.
+
+ (*) The set of states.
+
+ (*) The set of events.
+
+
+==============
+REPRESENTATION
+==============
+
+FS-Cache maintains an in-kernel representation of each object that a netfs is
+currently interested in. Such objects are represented by the fscache_cookie
+struct and are referred to as cookies.
+
+FS-Cache also maintains a separate in-kernel representation of the objects that
+a cache backend is currently actively caching. Such objects are represented by
+the fscache_object struct. The cache backends allocate these upon request, and
+are expected to embed them in their own representations. These are referred to
+as objects.
+
+There is a 1:N relationship between cookies and objects. A cookie may be
+represented by multiple objects - an index may exist in more than one cache -
+or even by no objects (it may not be cached).
+
+Furthermore, both cookies and objects are hierarchical. The two hierarchies
+correspond, but the cookies tree is a superset of the union of the object trees
+of multiple caches:
+
+ NETFS INDEX TREE : CACHE 1 : CACHE 2
+ : :
+ : +-----------+ :
+ +----------->| IObject | :
+ +-----------+ | : +-----------+ :
+ | ICookie |-------+ : | :
+ +-----------+ | : | : +-----------+
+ | +------------------------------>| IObject |
+ | : | : +-----------+
+ | : V : |
+ | : +-----------+ : |
+ V +----------->| IObject | : |
+ +-----------+ | : +-----------+ : |
+ | ICookie |-------+ : | : V
+ +-----------+ | : | : +-----------+
+ | +------------------------------>| IObject |
+ +-----+-----+ : | : +-----------+
+ | | : | : |
+ V | : V : |
+ +-----------+ | : +-----------+ : |
+ | ICookie |------------------------->| IObject | : |
+ +-----------+ | : +-----------+ : |
+ | V : | : V
+ | +-----------+ : | : +-----------+
+ | | ICookie |-------------------------------->| IObject |
+ | +-----------+ : | : +-----------+
+ V | : V : |
+ +-----------+ | : +-----------+ : |
+ | DCookie |------------------------->| DObject | : |
+ +-----------+ | : +-----------+ : |
+ | : : |
+ +-------+-------+ : : |
+ | | : : |
+ V V : : V
+ +-----------+ +-----------+ : : +-----------+
+ | DCookie | | DCookie |------------------------>| DObject |
+ +-----------+ +-----------+ : : +-----------+
+ : :
+
+In the above illustration, ICookie and IObject represent indices and DCookie
+and DObject represent data storage objects. Indices may have representation in
+multiple caches, but currently, non-index objects may not. Objects of any type
+may also be entirely unrepresented.
+
+As far as the netfs API goes, the netfs is only actually permitted to see
+pointers to the cookies. The cookies themselves and any objects attached to
+those cookies are hidden from it.
+
+
+===============================
+OBJECT MANAGEMENT STATE MACHINE
+===============================
+
+Within FS-Cache, each active object is managed by its own individual state
+machine. The state for an object is kept in the fscache_object struct, in
+object->state. A cookie may point to a set of objects that are in different
+states.
+
+Each state has an action associated with it that is invoked when the machine
+wakes up in that state. There are four logical sets of states:
+
+ (1) Preparation: states that wait for the parent objects to become ready. The
+ representations are hierarchical, and it is expected that an object must
+ be created or accessed with respect to its parent object.
+
+ (2) Initialisation: states that perform lookups in the cache and validate
+ what's found and that create on disk any missing metadata.
+
+ (3) Normal running: states that allow netfs operations on objects to proceed
+ and that update the state of objects.
+
+ (4) Termination: states that detach objects from their netfs cookies, that
+ delete objects from disk, that handle disk and system errors and that free
+ up in-memory resources.
+
+
+In most cases, transitioning between states is in response to signalled events.
+When a state has finished processing, it will usually set the mask of events in
+which it is interested (object->event_mask) and relinquish the worker thread.
+Then when an event is raised (by calling fscache_raise_event()), if the event
+is not masked, the object will be queued for processing (by calling
+fscache_enqueue_object()).
+
+
+PROVISION OF CPU TIME
+---------------------
+
+The work to be done by the various states was given CPU time by the threads of
+the slow work facility. This was used in preference to the workqueue facility
+because:
+
+ (1) Threads may be completely occupied for very long periods of time by a
+ particular work item. These state actions may be doing sequences of
+ synchronous, journalled disk accesses (lookup, mkdir, create, setxattr,
+ getxattr, truncate, unlink, rmdir, rename).
+
+ (2) Threads may do little actual work, but may rather spend a lot of time
+ sleeping on I/O. This means that single-threaded and 1-per-CPU-threaded
+ workqueues don't necessarily have the right numbers of threads.
+
+
+LOCKING SIMPLIFICATION
+----------------------
+
+Because only one worker thread may be operating on any particular object's
+state machine at once, this simplifies the locking, particularly with respect
+to disconnecting the netfs's representation of a cache object (fscache_cookie)
+from the cache backend's representation (fscache_object) - which may be
+requested from either end.
+
+
+=================
+THE SET OF STATES
+=================
+
+The object state machine has a set of states that it can be in. There are
+preparation states in which the object sets itself up and waits for its parent
+object to transit to a state that allows access to its children:
+
+ (1) State FSCACHE_OBJECT_INIT.
+
+ Initialise the object and wait for the parent object to become active. In
+ the cache, it is expected that it will not be possible to look an object
+ up from the parent object, until that parent object itself has been looked
+ up.
+
+There are initialisation states in which the object sets itself up and accesses
+disk for the object metadata:
+
+ (2) State FSCACHE_OBJECT_LOOKING_UP.
+
+ Look up the object on disk, using the parent as a starting point.
+ FS-Cache expects the cache backend to probe the cache to see whether this
+ object is represented there, and if it is, to see if it's valid (coherency
+ management).
+
+ The cache should call fscache_object_lookup_negative() to indicate lookup
+ failure for whatever reason, and should call fscache_obtained_object() to
+ indicate success.
+
+ At the completion of lookup, FS-Cache will let the netfs go ahead with
+ read operations, no matter whether the file is yet cached. If not yet
+ cached, read operations will be immediately rejected with ENODATA until
+ the first known page is uncached - as to that point there can be no data
+ to be read out of the cache for that file that isn't currently also held
+ in the pagecache.
+
+ (3) State FSCACHE_OBJECT_CREATING.
+
+ Create an object on disk, using the parent as a starting point. This
+ happens if the lookup failed to find the object, or if the object's
+ coherency data indicated what's on disk is out of date. In this state,
+ FS-Cache expects the cache to create
+
+ The cache should call fscache_obtained_object() if creation completes
+ successfully, fscache_object_lookup_negative() otherwise.
+
+ At the completion of creation, FS-Cache will start processing write
+ operations the netfs has queued for an object. If creation failed, the
+ write ops will be transparently discarded, and nothing recorded in the
+ cache.
+
+There are some normal running states in which the object spends its time
+servicing netfs requests:
+
+ (4) State FSCACHE_OBJECT_AVAILABLE.
+
+ A transient state in which pending operations are started, child objects
+ are permitted to advance from FSCACHE_OBJECT_INIT state, and temporary
+ lookup data is freed.
+
+ (5) State FSCACHE_OBJECT_ACTIVE.
+
+ The normal running state. In this state, requests the netfs makes will be
+ passed on to the cache.
+
+ (6) State FSCACHE_OBJECT_INVALIDATING.
+
+ The object is undergoing invalidation. When the state comes here, it
+ discards all pending read, write and attribute change operations as it is
+ going to clear out the cache entirely and reinitialise it. It will then
+ continue to the FSCACHE_OBJECT_UPDATING state.
+
+ (7) State FSCACHE_OBJECT_UPDATING.
+
+ The state machine comes here to update the object in the cache from the
+ netfs's records. This involves updating the auxiliary data that is used
+ to maintain coherency.
+
+And there are terminal states in which an object cleans itself up, deallocates
+memory and potentially deletes stuff from disk:
+
+ (8) State FSCACHE_OBJECT_LC_DYING.
+
+ The object comes here if it is dying because of a lookup or creation
+ error. This would be due to a disk error or system error of some sort.
+ Temporary data is cleaned up, and the parent is released.
+
+ (9) State FSCACHE_OBJECT_DYING.
+
+ The object comes here if it is dying due to an error, because its parent
+ cookie has been relinquished by the netfs or because the cache is being
+ withdrawn.
+
+ Any child objects waiting on this one are given CPU time so that they too
+ can destroy themselves. This object waits for all its children to go away
+ before advancing to the next state.
+
+(10) State FSCACHE_OBJECT_ABORT_INIT.
+
+ The object comes to this state if it was waiting on its parent in
+ FSCACHE_OBJECT_INIT, but its parent died. The object will destroy itself
+ so that the parent may proceed from the FSCACHE_OBJECT_DYING state.
+
+(11) State FSCACHE_OBJECT_RELEASING.
+(12) State FSCACHE_OBJECT_RECYCLING.
+
+ The object comes to one of these two states when dying once it is rid of
+ all its children, if it is dying because the netfs relinquished its
+ cookie. In the first state, the cached data is expected to persist, and
+ in the second it will be deleted.
+
+(13) State FSCACHE_OBJECT_WITHDRAWING.
+
+ The object transits to this state if the cache decides it wants to
+ withdraw the object from service, perhaps to make space, but also due to
+ error or just because the whole cache is being withdrawn.
+
+(14) State FSCACHE_OBJECT_DEAD.
+
+ The object transits to this state when the in-memory object record is
+ ready to be deleted. The object processor shouldn't ever see an object in
+ this state.
+
+
+THE SET OF EVENTS
+-----------------
+
+There are a number of events that can be raised to an object state machine:
+
+ (*) FSCACHE_OBJECT_EV_UPDATE
+
+ The netfs requested that an object be updated. The state machine will ask
+ the cache backend to update the object, and the cache backend will ask the
+ netfs for details of the change through its cookie definition ops.
+
+ (*) FSCACHE_OBJECT_EV_CLEARED
+
+ This is signalled in two circumstances:
+
+ (a) when an object's last child object is dropped and
+
+ (b) when the last operation outstanding on an object is completed.
+
+ This is used to proceed from the dying state.
+
+ (*) FSCACHE_OBJECT_EV_ERROR
+
+ This is signalled when an I/O error occurs during the processing of some
+ object.
+
+ (*) FSCACHE_OBJECT_EV_RELEASE
+ (*) FSCACHE_OBJECT_EV_RETIRE
+
+ These are signalled when the netfs relinquishes a cookie it was using.
+ The event selected depends on whether the netfs asks for the backing
+ object to be retired (deleted) or retained.
+
+ (*) FSCACHE_OBJECT_EV_WITHDRAW
+
+ This is signalled when the cache backend wants to withdraw an object.
+ This means that the object will have to be detached from the netfs's
+ cookie.
+
+Because the withdrawing releasing/retiring events are all handled by the object
+state machine, it doesn't matter if there's a collision with both ends trying
+to sever the connection at the same time. The state machine can just pick
+which one it wants to honour, and that effects the other.
diff --git a/Documentation/filesystems/caching/operations.txt b/Documentation/filesystems/caching/operations.txt
new file mode 100644
index 000000000..a1c052cbb
--- /dev/null
+++ b/Documentation/filesystems/caching/operations.txt
@@ -0,0 +1,213 @@
+ ================================
+ ASYNCHRONOUS OPERATIONS HANDLING
+ ================================
+
+By: David Howells <dhowells@redhat.com>
+
+Contents:
+
+ (*) Overview.
+
+ (*) Operation record initialisation.
+
+ (*) Parameters.
+
+ (*) Procedure.
+
+ (*) Asynchronous callback.
+
+
+========
+OVERVIEW
+========
+
+FS-Cache has an asynchronous operations handling facility that it uses for its
+data storage and retrieval routines. Its operations are represented by
+fscache_operation structs, though these are usually embedded into some other
+structure.
+
+This facility is available to and expected to be be used by the cache backends,
+and FS-Cache will create operations and pass them off to the appropriate cache
+backend for completion.
+
+To make use of this facility, <linux/fscache-cache.h> should be #included.
+
+
+===============================
+OPERATION RECORD INITIALISATION
+===============================
+
+An operation is recorded in an fscache_operation struct:
+
+ struct fscache_operation {
+ union {
+ struct work_struct fast_work;
+ struct slow_work slow_work;
+ };
+ unsigned long flags;
+ fscache_operation_processor_t processor;
+ ...
+ };
+
+Someone wanting to issue an operation should allocate something with this
+struct embedded in it. They should initialise it by calling:
+
+ void fscache_operation_init(struct fscache_operation *op,
+ fscache_operation_release_t release);
+
+with the operation to be initialised and the release function to use.
+
+The op->flags parameter should be set to indicate the CPU time provision and
+the exclusivity (see the Parameters section).
+
+The op->fast_work, op->slow_work and op->processor flags should be set as
+appropriate for the CPU time provision (see the Parameters section).
+
+FSCACHE_OP_WAITING may be set in op->flags prior to each submission of the
+operation and waited for afterwards.
+
+
+==========
+PARAMETERS
+==========
+
+There are a number of parameters that can be set in the operation record's flag
+parameter. There are three options for the provision of CPU time in these
+operations:
+
+ (1) The operation may be done synchronously (FSCACHE_OP_MYTHREAD). A thread
+ may decide it wants to handle an operation itself without deferring it to
+ another thread.
+
+ This is, for example, used in read operations for calling readpages() on
+ the backing filesystem in CacheFiles. Although readpages() does an
+ asynchronous data fetch, the determination of whether pages exist is done
+ synchronously - and the netfs does not proceed until this has been
+ determined.
+
+ If this option is to be used, FSCACHE_OP_WAITING must be set in op->flags
+ before submitting the operation, and the operating thread must wait for it
+ to be cleared before proceeding:
+
+ wait_on_bit(&op->flags, FSCACHE_OP_WAITING,
+ TASK_UNINTERRUPTIBLE);
+
+
+ (2) The operation may be fast asynchronous (FSCACHE_OP_FAST), in which case it
+ will be given to keventd to process. Such an operation is not permitted
+ to sleep on I/O.
+
+ This is, for example, used by CacheFiles to copy data from a backing fs
+ page to a netfs page after the backing fs has read the page in.
+
+ If this option is used, op->fast_work and op->processor must be
+ initialised before submitting the operation:
+
+ INIT_WORK(&op->fast_work, do_some_work);
+
+
+ (3) The operation may be slow asynchronous (FSCACHE_OP_SLOW), in which case it
+ will be given to the slow work facility to process. Such an operation is
+ permitted to sleep on I/O.
+
+ This is, for example, used by FS-Cache to handle background writes of
+ pages that have just been fetched from a remote server.
+
+ If this option is used, op->slow_work and op->processor must be
+ initialised before submitting the operation:
+
+ fscache_operation_init_slow(op, processor)
+
+
+Furthermore, operations may be one of two types:
+
+ (1) Exclusive (FSCACHE_OP_EXCLUSIVE). Operations of this type may not run in
+ conjunction with any other operation on the object being operated upon.
+
+ An example of this is the attribute change operation, in which the file
+ being written to may need truncation.
+
+ (2) Shareable. Operations of this type may be running simultaneously. It's
+ up to the operation implementation to prevent interference between other
+ operations running at the same time.
+
+
+=========
+PROCEDURE
+=========
+
+Operations are used through the following procedure:
+
+ (1) The submitting thread must allocate the operation and initialise it
+ itself. Normally this would be part of a more specific structure with the
+ generic op embedded within.
+
+ (2) The submitting thread must then submit the operation for processing using
+ one of the following two functions:
+
+ int fscache_submit_op(struct fscache_object *object,
+ struct fscache_operation *op);
+
+ int fscache_submit_exclusive_op(struct fscache_object *object,
+ struct fscache_operation *op);
+
+ The first function should be used to submit non-exclusive ops and the
+ second to submit exclusive ones. The caller must still set the
+ FSCACHE_OP_EXCLUSIVE flag.
+
+ If successful, both functions will assign the operation to the specified
+ object and return 0. -ENOBUFS will be returned if the object specified is
+ permanently unavailable.
+
+ The operation manager will defer operations on an object that is still
+ undergoing lookup or creation. The operation will also be deferred if an
+ operation of conflicting exclusivity is in progress on the object.
+
+ If the operation is asynchronous, the manager will retain a reference to
+ it, so the caller should put their reference to it by passing it to:
+
+ void fscache_put_operation(struct fscache_operation *op);
+
+ (3) If the submitting thread wants to do the work itself, and has marked the
+ operation with FSCACHE_OP_MYTHREAD, then it should monitor
+ FSCACHE_OP_WAITING as described above and check the state of the object if
+ necessary (the object might have died whilst the thread was waiting).
+
+ When it has finished doing its processing, it should call
+ fscache_op_complete() and fscache_put_operation() on it.
+
+ (4) The operation holds an effective lock upon the object, preventing other
+ exclusive ops conflicting until it is released. The operation can be
+ enqueued for further immediate asynchronous processing by adjusting the
+ CPU time provisioning option if necessary, eg:
+
+ op->flags &= ~FSCACHE_OP_TYPE;
+ op->flags |= ~FSCACHE_OP_FAST;
+
+ and calling:
+
+ void fscache_enqueue_operation(struct fscache_operation *op)
+
+ This can be used to allow other things to have use of the worker thread
+ pools.
+
+
+=====================
+ASYNCHRONOUS CALLBACK
+=====================
+
+When used in asynchronous mode, the worker thread pool will invoke the
+processor method with a pointer to the operation. This should then get at the
+container struct by using container_of():
+
+ static void fscache_write_op(struct fscache_operation *_op)
+ {
+ struct fscache_storage *op =
+ container_of(_op, struct fscache_storage, op);
+ ...
+ }
+
+The caller holds a reference on the operation, and will invoke
+fscache_put_operation() when the processor function returns. The processor
+function is at liberty to call fscache_enqueue_operation() or to take extra
+references.
diff --git a/Documentation/filesystems/ceph.txt b/Documentation/filesystems/ceph.txt
new file mode 100644
index 000000000..d6030aa33
--- /dev/null
+++ b/Documentation/filesystems/ceph.txt
@@ -0,0 +1,148 @@
+Ceph Distributed File System
+============================
+
+Ceph is a distributed network file system designed to provide good
+performance, reliability, and scalability.
+
+Basic features include:
+
+ * POSIX semantics
+ * Seamless scaling from 1 to many thousands of nodes
+ * High availability and reliability. No single point of failure.
+ * N-way replication of data across storage nodes
+ * Fast recovery from node failures
+ * Automatic rebalancing of data on node addition/removal
+ * Easy deployment: most FS components are userspace daemons
+
+Also,
+ * Flexible snapshots (on any directory)
+ * Recursive accounting (nested files, directories, bytes)
+
+In contrast to cluster filesystems like GFS, OCFS2, and GPFS that rely
+on symmetric access by all clients to shared block devices, Ceph
+separates data and metadata management into independent server
+clusters, similar to Lustre. Unlike Lustre, however, metadata and
+storage nodes run entirely as user space daemons. Storage nodes
+utilize btrfs to store data objects, leveraging its advanced features
+(checksumming, metadata replication, etc.). File data is striped
+across storage nodes in large chunks to distribute workload and
+facilitate high throughputs. When storage nodes fail, data is
+re-replicated in a distributed fashion by the storage nodes themselves
+(with some minimal coordination from a cluster monitor), making the
+system extremely efficient and scalable.
+
+Metadata servers effectively form a large, consistent, distributed
+in-memory cache above the file namespace that is extremely scalable,
+dynamically redistributes metadata in response to workload changes,
+and can tolerate arbitrary (well, non-Byzantine) node failures. The
+metadata server takes a somewhat unconventional approach to metadata
+storage to significantly improve performance for common workloads. In
+particular, inodes with only a single link are embedded in
+directories, allowing entire directories of dentries and inodes to be
+loaded into its cache with a single I/O operation. The contents of
+extremely large directories can be fragmented and managed by
+independent metadata servers, allowing scalable concurrent access.
+
+The system offers automatic data rebalancing/migration when scaling
+from a small cluster of just a few nodes to many hundreds, without
+requiring an administrator carve the data set into static volumes or
+go through the tedious process of migrating data between servers.
+When the file system approaches full, new nodes can be easily added
+and things will "just work."
+
+Ceph includes flexible snapshot mechanism that allows a user to create
+a snapshot on any subdirectory (and its nested contents) in the
+system. Snapshot creation and deletion are as simple as 'mkdir
+.snap/foo' and 'rmdir .snap/foo'.
+
+Ceph also provides some recursive accounting on directories for nested
+files and bytes. That is, a 'getfattr -d foo' on any directory in the
+system will reveal the total number of nested regular files and
+subdirectories, and a summation of all nested file sizes. This makes
+the identification of large disk space consumers relatively quick, as
+no 'du' or similar recursive scan of the file system is required.
+
+
+Mount Syntax
+============
+
+The basic mount syntax is:
+
+ # mount -t ceph monip[:port][,monip2[:port]...]:/[subdir] mnt
+
+You only need to specify a single monitor, as the client will get the
+full list when it connects. (However, if the monitor you specify
+happens to be down, the mount won't succeed.) The port can be left
+off if the monitor is using the default. So if the monitor is at
+1.2.3.4,
+
+ # mount -t ceph 1.2.3.4:/ /mnt/ceph
+
+is sufficient. If /sbin/mount.ceph is installed, a hostname can be
+used instead of an IP address.
+
+
+
+Mount Options
+=============
+
+ ip=A.B.C.D[:N]
+ Specify the IP and/or port the client should bind to locally.
+ There is normally not much reason to do this. If the IP is not
+ specified, the client's IP address is determined by looking at the
+ address its connection to the monitor originates from.
+
+ wsize=X
+ Specify the maximum write size in bytes. By default there is no
+ maximum. Ceph will normally size writes based on the file stripe
+ size.
+
+ rsize=X
+ Specify the maximum readahead.
+
+ mount_timeout=X
+ Specify the timeout value for mount (in seconds), in the case
+ of a non-responsive Ceph file system. The default is 30
+ seconds.
+
+ rbytes
+ When stat() is called on a directory, set st_size to 'rbytes',
+ the summation of file sizes over all files nested beneath that
+ directory. This is the default.
+
+ norbytes
+ When stat() is called on a directory, set st_size to the
+ number of entries in that directory.
+
+ nocrc
+ Disable CRC32C calculation for data writes. If set, the storage node
+ must rely on TCP's error correction to detect data corruption
+ in the data payload.
+
+ dcache
+ Use the dcache contents to perform negative lookups and
+ readdir when the client has the entire directory contents in
+ its cache. (This does not change correctness; the client uses
+ cached metadata only when a lease or capability ensures it is
+ valid.)
+
+ nodcache
+ Do not use the dcache as above. This avoids a significant amount of
+ complex code, sacrificing performance without affecting correctness,
+ and is useful for tracking down bugs.
+
+ noasyncreaddir
+ Do not use the dcache as above for readdir.
+
+More Information
+================
+
+For more information on Ceph, see the home page at
+ http://ceph.newdream.net/
+
+The Linux kernel client source tree is available at
+ git://ceph.newdream.net/git/ceph-client.git
+ git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
+
+and the source for the full system is at
+ git://ceph.newdream.net/git/ceph.git
diff --git a/Documentation/filesystems/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS
new file mode 100644
index 000000000..c98800df6
--- /dev/null
+++ b/Documentation/filesystems/cifs/AUTHORS
@@ -0,0 +1,57 @@
+Original Author
+===============
+Steve French (sfrench@samba.org)
+
+The author wishes to express his appreciation and thanks to:
+Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
+improvements. Thanks to IBM for allowing me time and test resources to pursue
+this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
+the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
+Jeremy Allison of the Samba team has done invaluable work in adding the server
+side of the original CIFS Unix extensions and reviewing and implementing
+portions of the newer CIFS POSIX extensions into the Samba 3 file server. Thank
+Dave Boutcher of IBM Rochester (author of the OS/400 smb/cifs filesystem client)
+for proving years ago that very good smb/cifs clients could be done on Unix-like
+operating systems. Volker Lendecke, Andrew Tridgell, Urban Widmark, John
+Newbigin and others for their work on the Linux smbfs module. Thanks to
+the other members of the Storage Network Industry Association CIFS Technical
+Workgroup for their work specifying this highly complex protocol and finally
+thanks to the Samba team for their technical advice and encouragement.
+
+Patch Contributors
+------------------
+Zwane Mwaikambo
+Andi Kleen
+Amrut Joshi
+Shobhit Dayal
+Sergey Vlasov
+Richard Hughes
+Yury Umanets
+Mark Hamzy (for some of the early cifs IPv6 work)
+Domen Puncer
+Jesper Juhl (in particular for lots of whitespace/formatting cleanup)
+Vince Negri and Dave Stahl (for finding an important caching bug)
+Adrian Bunk (kcalloc cleanups)
+Miklos Szeredi
+Kazeon team for various fixes especially for 2.4 version.
+Asser Ferno (Change Notify support)
+Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
+Gunter Kukkukk (testing and suggestions for support of old servers)
+Igor Mammedov (DFS support)
+Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
+Scott Lovenberg
+Pavel Shilovsky (for great work adding SMB2 support, and various SMB3 features)
+
+Test case and Bug Report contributors
+-------------------------------------
+Thanks to those in the community who have submitted detailed bug reports
+and debug of problems they have found: Jochen Dolze, David Blaine,
+Rene Scharfe, Martin Josefsson, Alexander Wild, Anthony Liguori,
+Lars Muller, Urban Widmark, Massimiliano Ferrero, Howard Owen,
+Olaf Kirch, Kieron Briggs, Nick Millington and others. Also special
+mention to the Stanford Checker (SWAT) which pointed out many minor
+bugs in error paths. Valuable suggestions also have come from Al Viro
+and Dave Miller.
+
+And thanks to the IBM LTC and Power test teams and SuSE testers for
+finding multiple bugs during excellent stress test runs.
diff --git a/Documentation/filesystems/cifs/CHANGES b/Documentation/filesystems/cifs/CHANGES
new file mode 100644
index 000000000..bc0025cdd
--- /dev/null
+++ b/Documentation/filesystems/cifs/CHANGES
@@ -0,0 +1,1065 @@
+Version 1.62
+------------
+Add sockopt=TCP_NODELAY mount option. EA (xattr) routines hardened
+to more strictly handle corrupt frames.
+
+Version 1.61
+------------
+Fix append problem to Samba servers (files opened with O_APPEND could
+have duplicated data). Fix oops in cifs_lookup. Workaround problem
+mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
+Disable use of server inode numbers when server only
+partially supports them (e.g. for one server querying inode numbers on
+FindFirst fails but QPathInfo queries works). Fix oops with dfs in
+cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
+for OpenOffice when on forcedirectio mount e.g.)
+
+Version 1.60
+-------------
+Fix memory leak in reconnect. Fix oops in DFS mount error path.
+Set s_maxbytes to smaller (the max that vfs can handle) so that
+sendfile will now work over cifs mounts again. Add noforcegid
+and noforceuid mount parameters. Fix small mem leak when using
+ntlmv2. Fix 2nd mount to same server but with different port to
+be allowed (rather than reusing the 1st port) - only when the
+user explicitly overrides the port on the 2nd mount.
+
+Version 1.59
+------------
+Client uses server inode numbers (which are persistent) rather than
+client generated ones by default (mount option "serverino" turned
+on by default if server supports it). Add forceuid and forcegid
+mount options (so that when negotiating unix extensions specifying
+which uid mounted does not immediately force the server's reported
+uids to be overridden). Add support for scope mount parm. Improve
+hard link detection to use same inode for both. Do not set
+read-only dos attribute on directories (for chmod) since Windows
+explorer special cases this attribute bit for directories for
+a different purpose.
+
+Version 1.58
+------------
+Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
+when the UTF-8 string is composed of unusually long (more than 4 byte) converted
+characters. Add support for mounting root of a share which redirects immediately
+to DFS target. Convert string conversion functions from Unicode to more
+accurately mark string length before allocating memory (which may help the
+rare cases where a UTF-8 string is much larger than the UCS2 string that
+we converted from). Fix endianness of the vcnum field used during
+session setup to distinguish multiple mounts to same server from different
+userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
+flag to be set to 2, and mount must enable krb5 to turn on extended security).
+Performance of file create to Samba improved (posix create on lookup
+removes 1 of 2 network requests sent on file create)
+
+Version 1.57
+------------
+Improve support for multiple security contexts to the same server. We
+used to use the same "vcnumber" for all connections which could cause
+the server to treat subsequent connections, especially those that
+are authenticated as guest, as reconnections, invalidating the earlier
+user's smb session. This fix allows cifs to mount multiple times to the
+same server with different userids without risking invalidating earlier
+established security contexts. fsync now sends SMB Flush operation
+to better ensure that we wait for server to write all of the data to
+server disk (not just write it over the network). Add new mount
+parameter to allow user to disable sending the (slow) SMB flush on
+fsync if desired (fsync still flushes all cached write data to the server).
+Posix file open support added (turned off after one attempt if server
+fails to support it properly, as with Samba server versions prior to 3.3.2)
+Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
+little memory for the "nativeFileSystem" field returned by the server
+during mount). Endian convert inode numbers if necessary (makes it easier
+to compare inode numbers on network files from big endian systems).
+
+Version 1.56
+------------
+Add "forcemandatorylock" mount option to allow user to use mandatory
+rather than posix (advisory) byte range locks, even though server would
+support posix byte range locks. Fix query of root inode when prefixpath
+specified and user does not have access to query information about the
+top of the share. Fix problem in 2.6.28 resolving DFS paths to
+Samba servers (worked to Windows). Fix rmdir so that pending search
+(readdir) requests do not get invalid results which include the now
+removed directory. Fix oops in cifs_dfs_ref.c when prefixpath is not reachable
+when using DFS. Add better file create support to servers which support
+the CIFS POSIX protocol extensions (this adds support for new flags
+on create, and improves semantics for write of locked ranges).
+
+Version 1.55
+------------
+Various fixes to make delete of open files behavior more predictable
+(when delete of an open file fails we mark the file as "delete-on-close"
+in a way that more servers accept, but only if we can first rename the
+file to a temporary name). Add experimental support for more safely
+handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp
+sends, and also let tcp autotune the socket send and receive buffers.
+This reduces the number of EAGAIN errors returned by TCP/IP in
+high stress workloads (and the number of retries on socket writes
+when sending large SMBWriteX requests). Fix case in which a portion of
+data can in some cases not get written to the file on the server before the
+file is closed. Fix DFS parsing to properly handle path consumed field,
+and to handle certain codepage conversions better. Fix mount and
+umount race that can cause oops in mount or umount or reconnect.
+
+Version 1.54
+------------
+Fix premature write failure on congested networks (we would give up
+on EAGAIN from the socket too quickly on large writes).
+Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
+Fix endian problems in acl (mode from/to cifs acl) on bigendian
+architectures. Fix problems with preserving timestamps on copying open
+files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit
+on parent directory when server supports Unix Extensions but not POSIX
+create. Update cifs.upcall version to handle new Kerberos sec flags
+(this requires update of cifs.upcall program from Samba). Fix memory leak
+on dns_upcall (resolving DFS referralls). Fix plain text password
+authentication (requires setting SecurityFlags to 0x30030 to enable
+lanman and plain text though). Fix writes to be at correct offset when
+file is open with O_APPEND and file is on a directio (forcediretio) mount.
+Fix bug in rewinding readdir directory searches. Add nodfs mount option.
+
+Version 1.53
+------------
+DFS support added (Microsoft Distributed File System client support needed
+for referrals which enable a hierarchical name space among servers).
+Disable temporary caching of mode bits to servers which do not support
+storing of mode (e.g. Windows servers, when client mounts without cifsacl
+mount option) and add new "dynperm" mount option to enable temporary caching
+of mode (enable old behavior). Fix hang on mount caused when server crashes
+tcp session during negotiate protocol.
+
+Version 1.52
+------------
+Fix oops on second mount to server when null auth is used.
+Enable experimental Kerberos support. Return writebehind errors on flush
+and sync so that events like out of disk space get reported properly on
+cached files. Fix setxattr failure to certain Samba versions. Fix mount
+of second share to disconnected server session (autoreconnect on this).
+Add ability to modify cifs acls for handling chmod (when mounted with
+cifsacl flag). Fix prefixpath path separator so we can handle mounts
+with prefixpaths longer than one directory (one path component) when
+mounted to Windows servers. Fix slow file open when cifsacl
+enabled. Fix memory leak in FindNext when the SMB call returns -EBADF.
+
+
+Version 1.51
+------------
+Fix memory leak in statfs when mounted to very old servers (e.g.
+Windows 9x). Add new feature "POSIX open" which allows servers
+which support the current POSIX Extensions to provide better semantics
+(e.g. delete for open files opened with posix open). Take into
+account umask on posix mkdir not just older style mkdir. Add
+ability to mount to IPC$ share (which allows CIFS named pipes to be
+opened, read and written as if they were files). When 1st tree
+connect fails (e.g. due to signing negotiation failure) fix
+leak that causes cifsd not to stop and rmmod to fail to cleanup
+cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
+bigendian architectures. Fix possible memory corruption when
+EAGAIN returned on kern_recvmsg. Return better error if server
+requires packet signing but client has disabled it. When mounted
+with cifsacl mount option - mode bits are approximated based
+on the contents of the ACL of the file or directory. When cifs
+mount helper is missing convert make sure that UNC name
+has backslash (not forward slash) between ip address of server
+and the share name.
+
+Version 1.50
+------------
+Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
+done with "serverino" mount option). Add support for POSIX Unlink
+(helps with certain sharing violation cases when server such as
+Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
+mount option to allow disabling the CIFS Unix Extensions for just
+that mount. Fix hang on spinlock in find_writable_file (race when
+reopening file after session crash). Byte range unlock request to
+windows server could unlock more bytes (on server copy of file)
+than intended if start of unlock request is well before start of
+a previous byte range lock that we issued.
+
+Version 1.49
+------------
+IPv6 support. Enable ipv6 addresses to be passed on mount (put the ipv6
+address after the "ip=" mount option, at least until mount.cifs is fixed to
+handle DNS host to ipv6 name translation). Accept override of uid or gid
+on mount even when Unix Extensions are negotiated (it used to be ignored
+when Unix Extensions were ignored). This allows users to override the
+default uid and gid for files when they are certain that the uids or
+gids on the server do not match those of the client. Make "sec=none"
+mount override username (so that null user connection is attempted)
+to match what documentation said. Support for very large reads, over 127K,
+available to some newer servers (such as Samba 3.0.26 and later but
+note that it also requires setting CIFSMaxBufSize at module install
+time to a larger value which may hurt performance in some cases).
+Make sign option force signing (or fail if server does not support it).
+
+Version 1.48
+------------
+Fix mtime bouncing around from local idea of last write times to remote time.
+Fix hang (in i_size_read) when simultaneous size update of same remote file
+on smp system corrupts sequence number. Do not reread unnecessarily partial page
+(which we are about to overwrite anyway) when writing out file opened rw.
+When DOS attribute of file on non-Unix server's file changes on the server side
+from read-only back to read-write, reflect this change in default file mode
+(we had been leaving a file's mode read-only until the inode were reloaded).
+Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute
+when archive dos attribute not set and we are changing mode back to writeable
+on server which does not support the Unix Extensions). Remove read only dos
+attribute on chmod when adding any write permission (ie on any of
+user/group/other (not all of user/group/other ie 0222) when
+mounted to windows. Add support for POSIX MkDir (slight performance
+enhancement and eliminates the network race between the mkdir and set
+path info of the mode).
+
+
+Version 1.47
+------------
+Fix oops in list_del during mount caused by unaligned string.
+Fix file corruption which could occur on some large file
+copies caused by writepages page i/o completion bug.
+Seek to SEEK_END forces check for update of file size for non-cached
+files. Allow file size to be updated on remote extend of locally open,
+non-cached file. Fix reconnect to newer Samba servers (or other servers
+which support the CIFS Unix/POSIX extensions) so that we again tell the
+server the Unix/POSIX cifs capabilities which we support (SetFSInfo).
+Add experimental support for new POSIX Open/Mkdir (which returns
+stat information on the open, and allows setting the mode).
+
+Version 1.46
+------------
+Support deep tree mounts. Better support OS/2, Win9x (DOS) time stamps.
+Allow null user to be specified on mount ("username="). Do not return
+EINVAL on readdir when filldir fails due to overwritten blocksize
+(fixes FC problem). Return error in rename 2nd attempt retry (ie report
+if rename by handle also fails, after rename by path fails, we were
+not reporting whether the retry worked or not). Fix NTLMv2 to
+work to Windows servers (mount with option "sec=ntlmv2").
+
+Version 1.45
+------------
+Do not time out lockw calls when using posix extensions. Do not
+time out requests if server still responding reasonably fast
+on requests on other threads. Improve POSIX locking emulation,
+(lock cancel now works, and unlock of merged range works even
+to Windows servers now). Fix oops on mount to lanman servers
+(win9x, os/2 etc.) when null password. Do not send listxattr
+(SMB to query all EAs) if nouser_xattr specified. Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
+
+Version 1.44
+------------
+Rewritten sessionsetup support, including support for legacy SMB
+session setup needed for OS/2 and older servers such as Windows 95 and 98.
+Fix oops on ls to OS/2 servers. Add support for level 1 FindFirst
+so we can do search (ls etc.) to OS/2. Do not send NTCreateX
+or recent levels of FindFirst unless server says it supports NT SMBs
+(instead use legacy equivalents from LANMAN dialect). Fix to allow
+NTLMv2 authentication support (now can use stronger password hashing
+on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
+Allow override of global cifs security flags on mount via "sec=" option(s).
+
+Version 1.43
+------------
+POSIX locking to servers which support CIFS POSIX Extensions
+(disabled by default controlled by proc/fs/cifs/Experimental).
+Handle conversion of long share names (especially Asian languages)
+to Unicode during mount. Fix memory leak in sess struct on reconnect.
+Fix rare oops after acpi suspend. Fix O_TRUNC opens to overwrite on
+cifs open which helps rare case when setpathinfo fails or server does
+not support it.
+
+Version 1.42
+------------
+Fix slow oplock break when mounted to different servers at the same time and
+the tids match and we try to find matching fid on wrong server. Fix read
+looping when signing required by server (2.6.16 kernel only). Fix readdir
+vs. rename race which could cause each to hang. Return . and .. even
+if server does not. Allow searches to skip first three entries and
+begin at any location. Fix oops in find_writeable_file.
+
+Version 1.41
+------------
+Fix NTLMv2 security (can be enabled in /proc/fs/cifs) so customers can
+configure stronger authentication. Fix sfu symlinks so they can
+be followed (not just recognized). Fix wraparound of bcc on
+read responses when buffer size over 64K and also fix wrap of
+max smb buffer size when CIFSMaxBufSize over 64K. Fix oops in
+cifs_user_read and cifs_readpages (when EAGAIN on send of smb
+on socket is returned over and over). Add POSIX (advisory) byte range
+locking support (requires server with newest CIFS UNIX Extensions
+to the protocol implemented). Slow down negprot slightly in port 139
+RFC1001 case to give session_init time on buggy servers.
+
+Version 1.40
+------------
+Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
+of readpages by eliminating one extra memcpy. Allow update of file size
+from remote server even if file is open for write as long as mount is
+directio. Recognize share mode security and send NTLM encrypted password
+on tree connect if share mode negotiated.
+
+Version 1.39
+------------
+Defer close of a file handle slightly if pending writes depend on that handle
+(this reduces the EBADF bad file handle errors that can be logged under heavy
+stress on writes). Modify cifs Kconfig options to expose CONFIG_CIFS_STATS2
+Fix SFU style symlinks and mknod needed for servers which do not support the
+CIFS Unix Extensions. Fix setfacl/getfacl on bigendian. Timeout negative
+dentries so files that the client sees as deleted but that later get created
+on the server will be recognized. Add client side permission check on setattr.
+Timeout stuck requests better (where server has never responded or sent corrupt
+responses)
+
+Version 1.38
+------------
+Fix tcp socket retransmission timeouts (e.g. on ENOSPACE from the socket)
+to be smaller at first (but increasing) so large write performance performance
+over GigE is better. Do not hang thread on illegal byte range lock response
+from Windows (Windows can send an RFC1001 size which does not match smb size) by
+allowing an SMBs TCP length to be up to a few bytes longer than it should be.
+wsize and rsize can now be larger than negotiated buffer size if server
+supports large readx/writex, even when directio mount flag not specified.
+Write size will in many cases now be 16K instead of 4K which greatly helps
+file copy performance on lightly loaded networks. Fix oops in dnotify
+when experimental config flag enabled. Make cifsFYI more granular.
+
+Version 1.37
+------------
+Fix readdir caching when unlink removes file in current search buffer,
+and this is followed by a rewind search to just before the deleted entry.
+Do not attempt to set ctime unless atime and/or mtime change requested
+(most servers throw it away anyway). Fix length check of received smbs
+to be more accurate. Fix big endian problem with mapchars mount option,
+and with a field returned by statfs.
+
+Version 1.36
+------------
+Add support for mounting to older pre-CIFS servers such as Windows9x and ME.
+For these older servers, add option for passing netbios name of server in
+on mount (servernetbiosname). Add suspend support for power management, to
+avoid cifsd thread preventing software suspend from working.
+Add mount option for disabling the default behavior of sending byte range lock
+requests to the server (necessary for certain applications which break with
+mandatory lock behavior such as Evolution), and also mount option for
+requesting case insensitive matching for path based requests (requesting
+case sensitive is the default).
+
+Version 1.35
+------------
+Add writepage performance improvements. Fix path name conversions
+for long filenames on mounts which were done with "mapchars" mount option
+specified. Ensure multiplex ids do not collide. Fix case in which
+rmmod can oops if done soon after last unmount. Fix truncated
+search (readdir) output when resume filename was a long filename.
+Fix filename conversion when mapchars mount option was specified and
+filename was a long filename.
+
+Version 1.34
+------------
+Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
+Do not oops if root user kills cifs oplock kernel thread or
+kills the cifsd thread (NB: killing the cifs kernel threads is not
+recommended, unmount and rmmod cifs will kill them when they are
+no longer needed). Fix readdir to ASCII servers (ie older servers
+which do not support Unicode) and also require asterisk.
+Fix out of memory case in which data could be written one page
+off in the page cache.
+
+Version 1.33
+------------
+Fix caching problem, in which readdir of directory containing a file
+which was cached could cause the file's time stamp to be updated
+without invalidating the readahead data (so we could get stale
+file data on the client for that file even as the server copy changed).
+Cleanup response processing so cifsd can not loop when abnormally
+terminated.
+
+
+Version 1.32
+------------
+Fix oops in ls when Transact2 FindFirst (or FindNext) returns more than one
+transact response for an SMB request and search entry split across two frames.
+Add support for lsattr (getting ext2/ext3/reiserfs attr flags from the server)
+as new protocol extensions. Do not send Get/Set calls for POSIX ACLs
+unless server explicitly claims to support them in CIFS Unix extensions
+POSIX ACL capability bit. Fix packet signing when multiuser mounting with
+different users from the same client to the same server. Fix oops in
+cifs_close. Add mount option for remapping reserved characters in
+filenames (also allow recognizing files with created by SFU which have any
+of these seven reserved characters, except backslash, to be recognized).
+Fix invalid transact2 message (we were sometimes trying to interpret
+oplock breaks as SMB responses). Add ioctl for checking that the
+current uid matches the uid of the mounter (needed by umount.cifs).
+Reduce the number of large buffer allocations in cifs response processing
+(significantly reduces memory pressure under heavy stress with multiple
+processes accessing the same server at the same time).
+
+Version 1.31
+------------
+Fix updates of DOS attributes and time fields so that files on NT4 servers
+do not get marked delete on close. Display sizes of cifs buffer pools in
+cifs stats. Fix oops in unmount when cifsd thread being killed by
+shutdown. Add generic readv/writev and aio support. Report inode numbers
+consistently in readdir and lookup (when serverino mount option is
+specified use the inode number that the server reports - for both lookup
+and readdir, otherwise by default the locally generated inode number is used
+for inodes created in either path since servers are not always able to
+provide unique inode numbers when exporting multiple volumes from under one
+sharename).
+
+Version 1.30
+------------
+Allow new nouser_xattr mount parm to disable xattr support for user namespace.
+Do not flag user_xattr mount parm in dmesg. Retry failures setting file time
+(mostly affects NT4 servers) by retry with handle based network operation.
+Add new POSIX Query FS Info for returning statfs info more accurately.
+Handle passwords with multiple commas in them.
+
+Version 1.29
+------------
+Fix default mode in sysfs of cifs module parms. Remove old readdir routine.
+Fix capabilities flags for large readx so as to allow reads larger than 64K.
+
+Version 1.28
+------------
+Add module init parm for large SMB buffer size (to allow it to be changed
+from its default of 16K) which is especially useful for large file copy
+when mounting with the directio mount option. Fix oops after
+returning from mount when experimental ExtendedSecurity enabled and
+SpnegoNegotiated returning invalid error. Fix case to retry better when
+peek returns from 1 to 3 bytes on socket which should have more data.
+Fixed path based calls (such as cifs lookup) to handle path names
+longer than 530 (now can handle PATH_MAX). Fix pass through authentication
+from Samba server to DC (Samba required dummy LM password).
+
+Version 1.27
+------------
+Turn off DNOTIFY (directory change notification support) by default
+(unless built with the experimental flag) to fix hang with KDE
+file browser. Fix DNOTIFY flag mappings. Fix hang (in wait_event
+waiting on an SMB response) in SendReceive when session dies but
+reconnects quickly from another task. Add module init parms for
+minimum number of large and small network buffers in the buffer pools,
+and for the maximum number of simultaneous requests.
+
+Version 1.26
+------------
+Add setfacl support to allow setting of ACLs remotely to Samba 3.10 and later
+and other POSIX CIFS compliant servers. Fix error mapping for getfacl
+to EOPNOTSUPP when server does not support posix acls on the wire. Fix
+improperly zeroed buffer in CIFS Unix extensions set times call.
+
+Version 1.25
+------------
+Fix internationalization problem in cifs readdir with filenames that map to
+longer UTF-8 strings than the string on the wire was in Unicode. Add workaround
+for readdir to netapp servers. Fix search rewind (seek into readdir to return
+non-consecutive entries). Do not do readdir when server negotiates
+buffer size to small to fit filename. Add support for reading POSIX ACLs from
+the server (add also acl and noacl mount options).
+
+Version 1.24
+------------
+Optionally allow using server side inode numbers, rather than client generated
+ones by specifying mount option "serverino" - this is required for some apps
+to work which double check hardlinked files and have persistent inode numbers.
+
+Version 1.23
+------------
+Multiple bigendian fixes. On little endian systems (for reconnect after
+network failure) fix tcp session reconnect code so we do not try first
+to reconnect on reverse of port 445. Treat reparse points (NTFS junctions)
+as directories rather than symlinks because we can do follow link on them.
+
+Version 1.22
+------------
+Add config option to enable XATTR (extended attribute) support, mapping
+xattr names in the "user." namespace space to SMB/CIFS EAs. Lots of
+minor fixes pointed out by the Stanford SWAT checker (mostly missing
+or out of order NULL pointer checks in little used error paths).
+
+Version 1.21
+------------
+Add new mount parm to control whether mode check (generic_permission) is done
+on the client. If Unix extensions are enabled and the uids on the client
+and server do not match, client permission checks are meaningless on
+server uids that do not exist on the client (this does not affect the
+normal ACL check which occurs on the server). Fix default uid
+on mknod to match create and mkdir. Add optional mount parm to allow
+override of the default uid behavior (in which the server sets the uid
+and gid of newly created files). Normally for network filesystem mounts
+user want the server to set the uid/gid on newly created files (rather than
+using uid of the client processes you would in a local filesystem).
+
+Version 1.20
+------------
+Make transaction counts more consistent. Merge /proc/fs/cifs/SimultaneousOps
+info into /proc/fs/cifs/DebugData. Fix oops in rare oops in readdir
+(in build_wildcard_path_from_dentry). Fix mknod to pass type field
+(block/char/fifo) properly. Remove spurious mount warning log entry when
+credentials passed as mount argument. Set major/minor device number in
+inode for block and char devices when unix extensions enabled.
+
+Version 1.19
+------------
+Fix /proc/fs/cifs/Stats and DebugData display to handle larger
+amounts of return data. Properly limit requests to MAX_REQ (50
+is the usual maximum active multiplex SMB/CIFS requests per server).
+Do not kill cifsd (and thus hurt the other SMB session) when more than one
+session to the same server (but with different userids) exists and one
+of the two user's smb sessions is being removed while leaving the other.
+Do not loop reconnecting in cifsd demultiplex thread when admin
+kills the thread without going through unmount.
+
+Version 1.18
+------------
+Do not rename hardlinked files (since that should be a noop). Flush
+cached write behind data when reopening a file after session abend,
+except when already in write. Grab per socket sem during reconnect
+to avoid oops in sendmsg if overlapping with reconnect. Do not
+reset cached inode file size on readdir for files open for write on
+client.
+
+
+Version 1.17
+------------
+Update number of blocks in file so du command is happier (in Linux a fake
+blocksize of 512 is required for calculating number of blocks in inode).
+Fix prepare write of partial pages to read in data from server if possible.
+Fix race on tcpStatus field between unmount and reconnection code, causing
+cifsd process sometimes to hang around forever. Improve out of memory
+checks in cifs_filldir
+
+Version 1.16
+------------
+Fix incorrect file size in file handle based setattr on big endian hardware.
+Fix oops in build_path_from_dentry when out of memory. Add checks for invalid
+and closing file structs in writepage/partialpagewrite. Add statistics
+for each mounted share (new menuconfig option). Fix endianness problem in
+volume information displayed in /proc/fs/cifs/DebugData (only affects
+affects big endian architectures). Prevent renames while constructing
+path names for open, mkdir and rmdir.
+
+Version 1.15
+------------
+Change to mempools for alloc smb request buffers and multiplex structs
+to better handle low memory problems (and potential deadlocks).
+
+Version 1.14
+------------
+Fix incomplete listings of large directories on Samba servers when Unix
+extensions enabled. Fix oops when smb_buffer can not be allocated. Fix
+rename deadlock when writing out dirty pages at same time.
+
+Version 1.13
+------------
+Fix open of files in which O_CREATE can cause the mode to change in
+some cases. Fix case in which retry of write overlaps file close.
+Fix PPC64 build error. Reduce excessive stack usage in smb password
+hashing. Fix overwrite of Linux user's view of file mode to Windows servers.
+
+Version 1.12
+------------
+Fixes for large file copy, signal handling, socket retry, buffer
+allocation and low memory situations.
+
+Version 1.11
+------------
+Better port 139 support to Windows servers (RFC1001/RFC1002 Session_Initialize)
+also now allowing support for specifying client netbiosname. NT4 support added.
+
+Version 1.10
+------------
+Fix reconnection (and certain failed mounts) to properly wake up the
+blocked users thread so it does not seem hung (in some cases was blocked
+until the cifs receive timeout expired). Fix spurious error logging
+to kernel log when application with open network files killed.
+
+Version 1.09
+------------
+Fix /proc/fs module unload warning message (that could be logged
+to the kernel log). Fix intermittent failure in connectathon
+test7 (hardlink count not immediately refreshed in case in which
+inode metadata can be incorrectly kept cached when time near zero)
+
+Version 1.08
+------------
+Allow file_mode and dir_mode (specified at mount time) to be enforced
+locally (the server already enforced its own ACLs too) for servers
+that do not report the correct mode (do not support the
+CIFS Unix Extensions).
+
+Version 1.07
+------------
+Fix some small memory leaks in some unmount error paths. Fix major leak
+of cache pages in readpages causing multiple read oriented stress
+testcases (including fsx, and even large file copy) to fail over time.
+
+Version 1.06
+------------
+Send NTCreateX with ATTR_POSIX if Linux/Unix extensions negotiated with server.
+This allows files that differ only in case and improves performance of file
+creation and file open to such servers. Fix semaphore conflict which causes
+slow delete of open file to Samba (which unfortunately can cause an oplock
+break to self while vfs_unlink held i_sem) which can hang for 20 seconds.
+
+Version 1.05
+------------
+fixes to cifs_readpages for fsx test case
+
+Version 1.04
+------------
+Fix caching data integrity bug when extending file size especially when no
+oplock on file. Fix spurious logging of valid already parsed mount options
+that are parsed outside of the cifs vfs such as nosuid.
+
+
+Version 1.03
+------------
+Connect to server when port number override not specified, and tcp port
+unitialized. Reset search to restart at correct file when kernel routine
+filldir returns error during large directory searches (readdir).
+
+Version 1.02
+------------
+Fix caching problem when files opened by multiple clients in which
+page cache could contain stale data, and write through did
+not occur often enough while file was still open when read ahead
+(read oplock) not allowed. Treat "sep=" when first mount option
+as an override of comma as the default separator between mount
+options.
+
+Version 1.01
+------------
+Allow passwords longer than 16 bytes. Allow null password string.
+
+Version 1.00
+------------
+Gracefully clean up failed mounts when attempting to mount to servers such as
+Windows 98 that terminate tcp sessions during protocol negotiation. Handle
+embedded commas in mount parsing of passwords.
+
+Version 0.99
+------------
+Invalidate local inode cached pages on oplock break and when last file
+instance is closed so that the client does not continue using stale local
+copy rather than later modified server copy of file. Do not reconnect
+when server drops the tcp session prematurely before negotiate
+protocol response. Fix oops in reopen_file when dentry freed. Allow
+the support for CIFS Unix Extensions to be disabled via proc interface.
+
+Version 0.98
+------------
+Fix hang in commit_write during reconnection of open files under heavy load.
+Fix unload_nls oops in a mount failure path. Serialize writes to same socket
+which also fixes any possible races when cifs signatures are enabled in SMBs
+being sent out of signature sequence number order.
+
+Version 0.97
+------------
+Fix byte range locking bug (endian problem) causing bad offset and
+length.
+
+Version 0.96
+------------
+Fix oops (in send_sig) caused by CIFS unmount code trying to
+wake up the demultiplex thread after it had exited. Do not log
+error on harmless oplock release of closed handle.
+
+Version 0.95
+------------
+Fix unsafe global variable usage and password hash failure on gcc 3.3.1
+Fix problem reconnecting secondary mounts to same server after session
+failure. Fix invalid dentry - race in mkdir when directory gets created
+by another client between the lookup and mkdir.
+
+Version 0.94
+------------
+Fix to list processing in reopen_files. Fix reconnection when server hung
+but tcpip session still alive. Set proper timeout on socket read.
+
+Version 0.93
+------------
+Add missing mount options including iocharset. SMP fixes in write and open.
+Fix errors in reconnecting after TCP session failure. Fix module unloading
+of default nls codepage
+
+Version 0.92
+------------
+Active smb transactions should never go negative (fix double FreeXid). Fix
+list processing in file routines. Check return code on kmalloc in open.
+Fix spinlock usage for SMP.
+
+Version 0.91
+------------
+Fix oops in reopen_files when invalid dentry. drop dentry on server rename
+and on revalidate errors. Fix cases where pid is now tgid. Fix return code
+on create hard link when server does not support them.
+
+Version 0.90
+------------
+Fix scheduling while atomic error in getting inode info on newly created file.
+Fix truncate of existing files opened with O_CREAT but not O_TRUNC set.
+
+Version 0.89
+------------
+Fix oops on write to dead tcp session. Remove error log write for case when file open
+O_CREAT but not O_EXCL
+
+Version 0.88
+------------
+Fix non-POSIX behavior on rename of open file and delete of open file by taking
+advantage of trans2 SetFileInfo rename facility if available on target server.
+Retry on ENOSPC and EAGAIN socket errors.
+
+Version 0.87
+------------
+Fix oops on big endian readdir. Set blksize to be even power of two (2**blkbits) to fix
+allocation size miscalculation. After oplock token lost do not read through
+cache.
+
+Version 0.86
+------------
+Fix oops on empty file readahead. Fix for file size handling for locally cached files.
+
+Version 0.85
+------------
+Fix oops in mkdir when server fails to return inode info. Fix oops in reopen_files
+during auto reconnection to server after server recovered from failure.
+
+Version 0.84
+------------
+Finish support for Linux 2.5 open/create changes, which removes the
+redundant NTCreate/QPathInfo/close that was sent during file create.
+Enable oplock by default. Enable packet signing by default (needed to
+access many recent Windows servers)
+
+Version 0.83
+------------
+Fix oops when mounting to long server names caused by inverted parms to kmalloc.
+Fix MultiuserMount (/proc/fs/cifs configuration setting) so that when enabled
+we will choose a cifs user session (smb uid) that better matches the local
+uid if a) the mount uid does not match the current uid and b) we have another
+session to the same server (ip address) for a different mount which
+matches the current local uid.
+
+Version 0.82
+------------
+Add support for mknod of block or character devices. Fix oplock
+code (distributed caching) to properly send response to oplock
+break from server.
+
+Version 0.81
+------------
+Finish up CIFS packet digital signing for the default
+NTLM security case. This should help Windows 2003
+network interoperability since it is common for
+packet signing to be required now. Fix statfs (stat -f)
+which recently started returning errors due to
+invalid value (-1 instead of 0) being set in the
+struct kstatfs f_ffiles field.
+
+Version 0.80
+-----------
+Fix oops on stopping oplock thread when removing cifs when
+built as module.
+
+Version 0.79
+------------
+Fix mount options for ro (readonly), uid, gid and file and directory mode.
+
+Version 0.78
+------------
+Fix errors displayed on failed mounts to be more understandable.
+Fixed various incorrect or misleading smb to posix error code mappings.
+
+Version 0.77
+------------
+Fix display of NTFS DFS junctions to display as symlinks.
+They are the network equivalent. Fix oops in
+cifs_partialpagewrite caused by missing spinlock protection
+of openfile linked list. Allow writebehind caching errors to
+be returned to the application at file close.
+
+Version 0.76
+------------
+Clean up options displayed in /proc/mounts by show_options to
+be more consistent with other filesystems.
+
+Version 0.75
+------------
+Fix delete of readonly file to Windows servers. Reflect
+presence or absence of read only dos attribute in mode
+bits for servers that do not support CIFS Unix extensions.
+Fix shortened results on readdir of large directories to
+servers supporting CIFS Unix extensions (caused by
+incorrect resume key).
+
+Version 0.74
+------------
+Fix truncate bug (set file size) that could cause hangs e.g. running fsx
+
+Version 0.73
+------------
+unload nls if mount fails.
+
+Version 0.72
+------------
+Add resume key support to search (readdir) code to workaround
+Windows bug. Add /proc/fs/cifs/LookupCacheEnable which
+allows disabling caching of attribute information for
+lookups.
+
+Version 0.71
+------------
+Add more oplock handling (distributed caching code). Remove
+dead code. Remove excessive stack space utilization from
+symlink routines.
+
+Version 0.70
+------------
+Fix oops in get dfs referral (triggered when null path sent in to
+mount). Add support for overriding rsize at mount time.
+
+Version 0.69
+------------
+Fix buffer overrun in readdir which caused intermittent kernel oopses.
+Fix writepage code to release kmap on write data. Allow "-ip=" new
+mount option to be passed in on parameter distinct from the first part
+(server name portion of) the UNC name. Allow override of the
+tcp port of the target server via new mount option "-port="
+
+Version 0.68
+------------
+Fix search handle leak on rewind. Fix setuid and gid so that they are
+reflected in the local inode immediately. Cleanup of whitespace
+to make 2.4 and 2.5 versions more consistent.
+
+
+Version 0.67
+------------
+Fix signal sending so that captive thread (cifsd) exits on umount
+(which was causing the warning in kmem_cache_free of the request buffers
+at rmmod time). This had broken as a sideeffect of the recent global
+kernel change to daemonize. Fix memory leak in readdir code which
+showed up in "ls -R" (and applications that did search rewinding).
+
+Version 0.66
+------------
+Reconnect tids and fids after session reconnection (still do not
+reconnect byte range locks though). Fix problem caching
+lookup information for directory inodes, improving performance,
+especially in deep directory trees. Fix various build warnings.
+
+Version 0.65
+------------
+Finish fixes to commit write for caching/readahead consistency. fsx
+now works to Samba servers. Fix oops caused when readahead
+was interrupted by a signal.
+
+Version 0.64
+------------
+Fix data corruption (in partial page after truncate) that caused fsx to
+fail to Windows servers. Cleaned up some extraneous error logging in
+common error paths. Add generic sendfile support.
+
+Version 0.63
+------------
+Fix memory leak in AllocMidQEntry.
+Finish reconnection logic, so connection with server can be dropped
+(or server rebooted) and the cifs client will reconnect.
+
+Version 0.62
+------------
+Fix temporary socket leak when bad userid or password specified
+(or other SMBSessSetup failure). Increase maximum buffer size to slightly
+over 16K to allow negotiation of up to Samba and Windows server default read
+sizes. Add support for readpages
+
+Version 0.61
+------------
+Fix oops when username not passed in on mount. Extensive fixes and improvements
+to error logging (strip redundant newlines, change debug macros to ensure newline
+passed in and to be more consistent). Fix writepage wrong file handle problem,
+a readonly file handle could be incorrectly used to attempt to write out
+file updates through the page cache to multiply open files. This could cause
+the iozone benchmark to fail on the fwrite test. Fix bug mounting two different
+shares to the same Windows server when using different usernames
+(doing this to Samba servers worked but Windows was rejecting it) - now it is
+possible to use different userids when connecting to the same server from a
+Linux client. Fix oops when treeDisconnect called during unmount on
+previously freed socket.
+
+Version 0.60
+------------
+Fix oops in readpages caused by not setting address space operations in inode in
+rare code path.
+
+Version 0.59
+------------
+Includes support for deleting of open files and renaming over existing files (per POSIX
+requirement). Add readlink support for Windows junction points (directory symlinks).
+
+Version 0.58
+------------
+Changed read and write to go through pagecache. Added additional address space operations.
+Memory mapped operations now working.
+
+Version 0.57
+------------
+Added writepage code for additional memory mapping support. Fixed leak in xids causing
+the simultaneous operations counter (/proc/fs/cifs/SimultaneousOps) to increase on
+every stat call. Additional formatting cleanup.
+
+Version 0.56
+------------
+Fix bigendian bug in order of time conversion. Merge 2.5 to 2.4 version. Formatting cleanup.
+
+Version 0.55
+------------
+Fixes from Zwane Mwaikambo for adding missing return code checking in a few places.
+Also included a modified version of his fix to protect global list manipulation of
+the smb session and tree connection and mid related global variables.
+
+Version 0.54
+------------
+Fix problem with captive thread hanging around at unmount time. Adjust to 2.5.42-pre
+changes to superblock layout. Remove wasteful allocation of smb buffers (now the send
+buffer is reused for responses). Add more oplock handling. Additional minor cleanup.
+
+Version 0.53
+------------
+More stylistic updates to better match kernel style. Add additional statistics
+for filesystem which can be viewed via /proc/fs/cifs. Add more pieces of NTLMv2
+and CIFS Packet Signing enablement.
+
+Version 0.52
+------------
+Replace call to sleep_on with safer wait_on_event.
+Make stylistic changes to better match kernel style recommendations.
+Remove most typedef usage (except for the PDUs themselves).
+
+Version 0.51
+------------
+Update mount so the -unc mount option is no longer required (the ip address can be specified
+in a UNC style device name. Implementation of readpage/writepage started.
+
+Version 0.50
+------------
+Fix intermittent problem with incorrect smb header checking on badly
+fragmented tcp responses
+
+Version 0.49
+------------
+Fixes to setting of allocation size and file size.
+
+Version 0.48
+------------
+Various 2.5.38 fixes. Now works on 2.5.38
+
+Version 0.47
+------------
+Prepare for 2.5 kernel merge. Remove ifdefs.
+
+Version 0.46
+------------
+Socket buffer management fixes. Fix dual free.
+
+Version 0.45
+------------
+Various big endian fixes for hardlinks and symlinks and also for dfs.
+
+Version 0.44
+------------
+Various big endian fixes for servers with Unix extensions such as Samba
+
+Version 0.43
+------------
+Various FindNext fixes for incorrect filenames on large directory searches on big endian
+clients. basic posix file i/o tests now work on big endian machines, not just le
+
+Version 0.42
+------------
+SessionSetup and NegotiateProtocol now work from Big Endian machines.
+Various Big Endian fixes found during testing on the Linux on 390. Various fixes for compatibility with older
+versions of 2.4 kernel (now builds and works again on kernels at least as early as 2.4.7).
+
+Version 0.41
+------------
+Various minor fixes for Connectathon Posix "basic" file i/o test suite. Directory caching fixed so hardlinked
+files now return the correct number of links on fstat as they are repeatedly linked and unlinked.
+
+Version 0.40
+------------
+Implemented "Raw" (i.e. not encapsulated in SPNEGO) NTLMSSP (i.e. the Security Provider Interface used to negotiate
+session advanced session authentication). Raw NTLMSSP is preferred by Windows 2000 Professional and Windows XP.
+Began implementing support for SPNEGO encapsulation of NTLMSSP based session authentication blobs
+(which is the mechanism preferred by Windows 2000 server in the absence of Kerberos).
+
+Version 0.38
+------------
+Introduced optional mount helper utility mount.cifs and made coreq changes to cifs vfs to enable
+it. Fixed a few bugs in the DFS code (e.g. bcc two bytes too short and incorrect uid in PDU).
+
+Version 0.37
+------------
+Rewrote much of connection and mount/unmount logic to handle bugs with
+multiple uses to same share, multiple users to same server etc.
+
+Version 0.36
+------------
+Fixed major problem with dentry corruption (missing call to dput)
+
+Version 0.35
+------------
+Rewrite of readdir code to fix bug. Various fixes for bigendian machines.
+Begin adding oplock support. Multiusermount and oplockEnabled flags added to /proc/fs/cifs
+although corresponding function not fully implemented in the vfs yet
+
+Version 0.34
+------------
+Fixed dentry caching bug, misc. cleanup
+
+Version 0.33
+------------
+Fixed 2.5 support to handle build and configure changes as well as misc. 2.5 changes. Now can build
+on current 2.5 beta version (2.5.24) of the Linux kernel as well as on 2.4 Linux kernels.
+Support for STATUS codes (newer 32 bit NT error codes) added. DFS support begun to be added.
+
+Version 0.32
+------------
+Unix extensions (symlink, readlink, hardlink, chmod and some chgrp and chown) implemented
+and tested against Samba 2.2.5
+
+
+Version 0.31
+------------
+1) Fixed lockrange to be correct (it was one byte too short)
+
+2) Fixed GETLK (i.e. the fcntl call to test a range of bytes in a file to see if locked) to correctly
+show range as locked when there is a conflict with an existing lock.
+
+3) default file perms are now 2767 (indicating support for mandatory locks) instead of 777 for directories
+in most cases. Eventually will offer optional ability to query server for the correct perms.
+
+3) Fixed eventual trap when mounting twice to different shares on the same server when the first succeeded
+but the second one was invalid and failed (the second one was incorrectly disconnecting the tcp and smb
+session)
+
+4) Fixed error logging of valid mount options
+
+5) Removed logging of password field.
+
+6) Moved negotiate, treeDisconnect and uloggoffX (only tConx and SessSetup remain in connect.c) to cifssmb.c
+and cleaned them up and made them more consistent with other cifs functions.
+
+7) Server support for Unix extensions is now fully detected and FindFirst is implemented both ways
+(with or without Unix extensions) but FindNext and QueryPathInfo with the Unix extensions are not completed,
+nor is the symlink support using the Unix extensions
+
+8) Started adding the readlink and follow_link code
+
+Version 0.3
+-----------
+Initial drop
+
diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README
new file mode 100644
index 000000000..2d5622f60
--- /dev/null
+++ b/Documentation/filesystems/cifs/README
@@ -0,0 +1,753 @@
+The CIFS VFS support for Linux supports many advanced network filesystem
+features such as hierarchical dfs like namespace, hardlinks, locking and more.
+It was designed to comply with the SNIA CIFS Technical Reference (which
+supersedes the 1992 X/Open SMB Standard) as well as to perform best practice
+practical interoperability with Windows 2000, Windows XP, Samba and equivalent
+servers. This code was developed in participation with the Protocol Freedom
+Information Foundation.
+
+Please see
+ http://protocolfreedom.org/ and
+ http://samba.org/samba/PFIF/
+for more details.
+
+
+For questions or bug reports please contact:
+ sfrench@samba.org (sfrench@us.ibm.com)
+
+Build instructions:
+==================
+For Linux 2.4:
+1) Get the kernel source (e.g.from http://www.kernel.org)
+and download the cifs vfs source (see the project page
+at http://us1.samba.org/samba/Linux_CIFS_client.html)
+and change directory into the top of the kernel directory
+then patch the kernel (e.g. "patch -p1 < cifs_24.patch")
+to add the cifs vfs to your kernel configure options if
+it has not already been added (e.g. current SuSE and UL
+users do not need to apply the cifs_24.patch since the cifs vfs is
+already in the kernel configure menu) and then
+mkdir linux/fs/cifs and then copy the current cifs vfs files from
+the cifs download to your kernel build directory e.g.
+
+ cp <cifs_download_dir>/fs/cifs/* to <kernel_download_dir>/fs/cifs
+
+2) make menuconfig (or make xconfig)
+3) select cifs from within the network filesystem choices
+4) save and exit
+5) make dep
+6) make modules (or "make" if CIFS VFS not to be built as a module)
+
+For Linux 2.6:
+1) Download the kernel (e.g. from http://www.kernel.org)
+and change directory into the top of the kernel directory tree
+(e.g. /usr/src/linux-2.5.73)
+2) make menuconfig (or make xconfig)
+3) select cifs from within the network filesystem choices
+4) save and exit
+5) make
+
+
+Installation instructions:
+=========================
+If you have built the CIFS vfs as module (successfully) simply
+type "make modules_install" (or if you prefer, manually copy the file to
+the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.o).
+
+If you have built the CIFS vfs into the kernel itself, follow the instructions
+for your distribution on how to install a new kernel (usually you
+would simply type "make install").
+
+If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on
+the CIFS VFS web site) copy it to the same directory in which mount.smbfs and
+similar files reside (usually /sbin). Although the helper software is not
+required, mount.cifs is recommended. Eventually the Samba 3.0 utility program
+"net" may also be helpful since it may someday provide easier mount syntax for
+users who are used to Windows e.g.
+ net use <mount point> <UNC name or cifs URL>
+Note that running the Winbind pam/nss module (logon service) on all of your
+Linux clients is useful in mapping Uids and Gids consistently across the
+domain to the proper network user. The mount.cifs mount helper can be
+trivially built from Samba 3.0 or later source e.g. by executing:
+
+ gcc samba/source/client/mount.cifs.c -o mount.cifs
+
+If cifs is built as a module, then the size and number of network buffers
+and maximum number of simultaneous requests to one server can be configured.
+Changing these from their defaults is not recommended. By executing modinfo
+ modinfo kernel/fs/cifs/cifs.ko
+on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
+at module initialization time (by running insmod cifs.ko) can be seen.
+
+Allowing User Mounts
+====================
+To permit users to mount and unmount over directories they own is possible
+with the cifs vfs. A way to enable such mounting is to mark the mount.cifs
+utility as suid (e.g. "chmod +s /sbin/mount.cifs). To enable users to
+umount shares they mount requires
+1) mount.cifs version 1.4 or later
+2) an entry for the share in /etc/fstab indicating that a user may
+unmount it e.g.
+//server/usersharename /mnt/username cifs user 0 0
+
+Note that when the mount.cifs utility is run suid (allowing user mounts),
+in order to reduce risks, the "nosuid" mount flag is passed in on mount to
+disallow execution of an suid program mounted on the remote target.
+When mount is executed as root, nosuid is not passed in by default,
+and execution of suid programs on the remote target would be enabled
+by default. This can be changed, as with nfs and other filesystems,
+by simply specifying "nosuid" among the mount options. For user mounts
+though to be able to pass the suid flag to mount requires rebuilding
+mount.cifs with the following flag:
+
+ gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs
+
+There is a corresponding manual page for cifs mounting in the Samba 3.0 and
+later source tree in docs/manpages/mount.cifs.8
+
+Allowing User Unmounts
+======================
+To permit users to ummount directories that they have user mounted (see above),
+the utility umount.cifs may be used. It may be invoked directly, or if
+umount.cifs is placed in /sbin, umount can invoke the cifs umount helper
+(at least for most versions of the umount utility) for umount of cifs
+mounts, unless umount is invoked with -i (which will avoid invoking a umount
+helper). As with mount.cifs, to enable user unmounts umount.cifs must be marked
+as suid (e.g. "chmod +s /sbin/umount.cifs") or equivalent (some distributions
+allow adding entries to a file to the /etc/permissions file to achieve the
+equivalent suid effect). For this utility to succeed the target path
+must be a cifs mount, and the uid of the current user must match the uid
+of the user who mounted the resource.
+
+Also note that the customary way of allowing user mounts and unmounts is
+(instead of using mount.cifs and unmount.cifs as suid) to add a line
+to the file /etc/fstab for each //server/share you wish to mount, but
+this can become unwieldy when potential mount targets include many
+or unpredictable UNC names.
+
+Samba Considerations
+====================
+To get the maximum benefit from the CIFS VFS, we recommend using a server that
+supports the SNIA CIFS Unix Extensions standard (e.g. Samba 2.2.5 or later or
+Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers.
+Note that uid, gid and file permissions will display default values if you do
+not have a server that supports the Unix extensions for CIFS (such as Samba
+2.2.5 or later). To enable the Unix CIFS Extensions in the Samba server, add
+the line:
+
+ unix extensions = yes
+
+to your smb.conf file on the server. Note that the following smb.conf settings
+are also useful (on the Samba server) when the majority of clients are Unix or
+Linux:
+
+ case sensitive = yes
+ delete readonly = yes
+ ea support = yes
+
+Note that server ea support is required for supporting xattrs from the Linux
+cifs client, and that EA support is present in later versions of Samba (e.g.
+3.0.6 and later (also EA support works in all versions of Windows, at least to
+shares on NTFS filesystems). Extended Attribute (xattr) support is an optional
+feature of most Linux filesystems which may require enabling via
+make menuconfig. Client support for extended attributes (user xattr) can be
+disabled on a per-mount basis by specifying "nouser_xattr" on mount.
+
+The CIFS client can get and set POSIX ACLs (getfacl, setfacl) to Samba servers
+version 3.10 and later. Setting POSIX ACLs requires enabling both XATTR and
+then POSIX support in the CIFS configuration options when building the cifs
+module. POSIX ACL support can be disabled on a per mount basic by specifying
+"noacl" on mount.
+
+Some administrators may want to change Samba's smb.conf "map archive" and
+"create mask" parameters from the default. Unless the create mask is changed
+newly created files can end up with an unnecessarily restrictive default mode,
+which may not be what you want, although if the CIFS Unix extensions are
+enabled on the server and client, subsequent setattr calls (e.g. chmod) can
+fix the mode. Note that creating special devices (mknod) remotely
+may require specifying a mkdev function to Samba if you are not using
+Samba 3.0.6 or later. For more information on these see the manual pages
+("man smb.conf") on the Samba server system. Note that the cifs vfs,
+unlike the smbfs vfs, does not read the smb.conf on the client system
+(the few optional settings are passed in on mount via -o parameters instead).
+Note that Samba 2.2.7 or later includes a fix that allows the CIFS VFS to delete
+open files (required for strict POSIX compliance). Windows Servers already
+supported this feature. Samba server does not allow symlinks that refer to files
+outside of the share, so in Samba versions prior to 3.0.6, most symlinks to
+files with absolute paths (ie beginning with slash) such as:
+ ln -s /mnt/foo bar
+would be forbidden. Samba 3.0.6 server or later includes the ability to create
+such symlinks safely by converting unsafe symlinks (ie symlinks to server
+files that are outside of the share) to a samba specific format on the server
+that is ignored by local server applications and non-cifs clients and that will
+not be traversed by the Samba server). This is opaque to the Linux client
+application using the cifs vfs. Absolute symlinks will work to Samba 3.0.5 or
+later, but only for remote clients using the CIFS Unix extensions, and will
+be invisbile to Windows clients and typically will not affect local
+applications running on the same server as Samba.
+
+Use instructions:
+================
+Once the CIFS VFS support is built into the kernel or installed as a module
+(cifs.o), you can use mount syntax like the following to access Samba or Windows
+servers:
+
+ mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword
+
+Before -o the option -v may be specified to make the mount.cifs
+mount helper display the mount steps more verbosely.
+After -o the following commonly used cifs vfs specific options
+are supported:
+
+ user=<username>
+ pass=<password>
+ domain=<domain name>
+
+Other cifs mount options are described below. Use of TCP names (in addition to
+ip addresses) is available if the mount helper (mount.cifs) is installed. If
+you do not trust the server to which are mounted, or if you do not have
+cifs signing enabled (and the physical network is insecure), consider use
+of the standard mount options "noexec" and "nosuid" to reduce the risk of
+running an altered binary on your local system (downloaded from a hostile server
+or altered by a hostile router).
+
+Although mounting using format corresponding to the CIFS URL specification is
+not possible in mount.cifs yet, it is possible to use an alternate format
+for the server and sharename (which is somewhat similar to NFS style mount
+syntax) instead of the more widely used UNC format (i.e. \\server\share):
+ mount -t cifs tcp_name_of_server:share_name /mnt -o user=myname,pass=mypasswd
+
+When using the mount helper mount.cifs, passwords may be specified via alternate
+mechanisms, instead of specifying it after -o using the normal "pass=" syntax
+on the command line:
+1) By including it in a credential file. Specify credentials=filename as one
+of the mount options. Credential files contain two lines
+ username=someuser
+ password=your_password
+2) By specifying the password in the PASSWD environment variable (similarly
+the user name can be taken from the USER environment variable).
+3) By specifying the password in a file by name via PASSWD_FILE
+4) By specifying the password in a file by file descriptor via PASSWD_FD
+
+If no password is provided, mount.cifs will prompt for password entry
+
+Restrictions
+============
+Servers must support either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC
+1001/1002 support for "Netbios-Over-TCP/IP." This is not likely to be a
+problem as most servers support this.
+
+Valid filenames differ between Windows and Linux. Windows typically restricts
+filenames which contain certain reserved characters (e.g.the character :
+which is used to delimit the beginning of a stream name by Windows), while
+Linux allows a slightly wider set of valid characters in filenames. Windows
+servers can remap such characters when an explicit mapping is specified in
+the Server's registry. Samba starting with version 3.10 will allow such
+filenames (ie those which contain valid Linux characters, which normally
+would be forbidden for Windows/CIFS semantics) as long as the server is
+configured for Unix Extensions (and the client has not disabled
+/proc/fs/cifs/LinuxExtensionsEnabled).
+
+
+CIFS VFS Mount Options
+======================
+A partial list of the supported mount options follows:
+ user The user name to use when trying to establish
+ the CIFS session.
+ password The user password. If the mount helper is
+ installed, the user will be prompted for password
+ if not supplied.
+ ip The ip address of the target server
+ unc The target server Universal Network Name (export) to
+ mount.
+ domain Set the SMB/CIFS workgroup name prepended to the
+ username during CIFS session establishment
+ forceuid Set the default uid for inodes to the uid
+ passed in on mount. For mounts to servers
+ which do support the CIFS Unix extensions, such as a
+ properly configured Samba server, the server provides
+ the uid, gid and mode so this parameter should not be
+ specified unless the server and clients uid and gid
+ numbering differ. If the server and client are in the
+ same domain (e.g. running winbind or nss_ldap) and
+ the server supports the Unix Extensions then the uid
+ and gid can be retrieved from the server (and uid
+ and gid would not have to be specifed on the mount.
+ For servers which do not support the CIFS Unix
+ extensions, the default uid (and gid) returned on lookup
+ of existing files will be the uid (gid) of the person
+ who executed the mount (root, except when mount.cifs
+ is configured setuid for user mounts) unless the "uid="
+ (gid) mount option is specified. Also note that permission
+ checks (authorization checks) on accesses to a file occur
+ at the server, but there are cases in which an administrator
+ may want to restrict at the client as well. For those
+ servers which do not report a uid/gid owner
+ (such as Windows), permissions can also be checked at the
+ client, and a crude form of client side permission checking
+ can be enabled by specifying file_mode and dir_mode on
+ the client. (default)
+ forcegid (similar to above but for the groupid instead of uid) (default)
+ noforceuid Fill in file owner information (uid) by requesting it from
+ the server if possible. With this option, the value given in
+ the uid= option (on mount) will only be used if the server
+ can not support returning uids on inodes.
+ noforcegid (similar to above but for the group owner, gid, instead of uid)
+ uid Set the default uid for inodes, and indicate to the
+ cifs kernel driver which local user mounted. If the server
+ supports the unix extensions the default uid is
+ not used to fill in the owner fields of inodes (files)
+ unless the "forceuid" parameter is specified.
+ gid Set the default gid for inodes (similar to above).
+ file_mode If CIFS Unix extensions are not supported by the server
+ this overrides the default mode for file inodes.
+ fsc Enable local disk caching using FS-Cache (off by default). This
+ option could be useful to improve performance on a slow link,
+ heavily loaded server and/or network where reading from the
+ disk is faster than reading from the server (over the network).
+ This could also impact scalability positively as the
+ number of calls to the server are reduced. However, local
+ caching is not suitable for all workloads for e.g. read-once
+ type workloads. So, you need to consider carefully your
+ workload/scenario before using this option. Currently, local
+ disk caching is functional for CIFS files opened as read-only.
+ dir_mode If CIFS Unix extensions are not supported by the server
+ this overrides the default mode for directory inodes.
+ port attempt to contact the server on this tcp port, before
+ trying the usual ports (port 445, then 139).
+ iocharset Codepage used to convert local path names to and from
+ Unicode. Unicode is used by default for network path
+ names if the server supports it. If iocharset is
+ not specified then the nls_default specified
+ during the local client kernel build will be used.
+ If server does not support Unicode, this parameter is
+ unused.
+ rsize default read size (usually 16K). The client currently
+ can not use rsize larger than CIFSMaxBufSize. CIFSMaxBufSize
+ defaults to 16K and may be changed (from 8K to the maximum
+ kmalloc size allowed by your kernel) at module install time
+ for cifs.ko. Setting CIFSMaxBufSize to a very large value
+ will cause cifs to use more memory and may reduce performance
+ in some cases. To use rsize greater than 127K (the original
+ cifs protocol maximum) also requires that the server support
+ a new Unix Capability flag (for very large read) which some
+ newer servers (e.g. Samba 3.0.26 or later) do. rsize can be
+ set from a minimum of 2048 to a maximum of 130048 (127K or
+ CIFSMaxBufSize, whichever is smaller)
+ wsize default write size (default 57344)
+ maximum wsize currently allowed by CIFS is 57344 (fourteen
+ 4096 byte pages)
+ actimeo=n attribute cache timeout in seconds (default 1 second).
+ After this timeout, the cifs client requests fresh attribute
+ information from the server. This option allows to tune the
+ attribute cache timeout to suit the workload needs. Shorter
+ timeouts mean better the cache coherency, but increased number
+ of calls to the server. Longer timeouts mean reduced number
+ of calls to the server at the expense of less stricter cache
+ coherency checks (i.e. incorrect attribute cache for a short
+ period of time).
+ rw mount the network share read-write (note that the
+ server may still consider the share read-only)
+ ro mount network share read-only
+ version used to distinguish different versions of the
+ mount helper utility (not typically needed)
+ sep if first mount option (after the -o), overrides
+ the comma as the separator between the mount
+ parms. e.g.
+ -o user=myname,password=mypassword,domain=mydom
+ could be passed instead with period as the separator by
+ -o sep=.user=myname.password=mypassword.domain=mydom
+ this might be useful when comma is contained within username
+ or password or domain. This option is less important
+ when the cifs mount helper cifs.mount (version 1.1 or later)
+ is used.
+ nosuid Do not allow remote executables with the suid bit
+ program to be executed. This is only meaningful for mounts
+ to servers such as Samba which support the CIFS Unix Extensions.
+ If you do not trust the servers in your network (your mount
+ targets) it is recommended that you specify this option for
+ greater security.
+ exec Permit execution of binaries on the mount.
+ noexec Do not permit execution of binaries on the mount.
+ dev Recognize block devices on the remote mount.
+ nodev Do not recognize devices on the remote mount.
+ suid Allow remote files on this mountpoint with suid enabled to
+ be executed (default for mounts when executed as root,
+ nosuid is default for user mounts).
+ credentials Although ignored by the cifs kernel component, it is used by
+ the mount helper, mount.cifs. When mount.cifs is installed it
+ opens and reads the credential file specified in order
+ to obtain the userid and password arguments which are passed to
+ the cifs vfs.
+ guest Although ignored by the kernel component, the mount.cifs
+ mount helper will not prompt the user for a password
+ if guest is specified on the mount options. If no
+ password is specified a null password will be used.
+ perm Client does permission checks (vfs_permission check of uid
+ and gid of the file against the mode and desired operation),
+ Note that this is in addition to the normal ACL check on the
+ target machine done by the server software.
+ Client permission checking is enabled by default.
+ noperm Client does not do permission checks. This can expose
+ files on this mount to access by other users on the local
+ client system. It is typically only needed when the server
+ supports the CIFS Unix Extensions but the UIDs/GIDs on the
+ client and server system do not match closely enough to allow
+ access by the user doing the mount, but it may be useful with
+ non CIFS Unix Extension mounts for cases in which the default
+ mode is specified on the mount but is not to be enforced on the
+ client (e.g. perhaps when MultiUserMount is enabled)
+ Note that this does not affect the normal ACL check on the
+ target machine done by the server software (of the server
+ ACL against the user name provided at mount time).
+ serverino Use server's inode numbers instead of generating automatically
+ incrementing inode numbers on the client. Although this will
+ make it easier to spot hardlinked files (as they will have
+ the same inode numbers) and inode numbers may be persistent,
+ note that the server does not guarantee that the inode numbers
+ are unique if multiple server side mounts are exported under a
+ single share (since inode numbers on the servers might not
+ be unique if multiple filesystems are mounted under the same
+ shared higher level directory). Note that some older
+ (e.g. pre-Windows 2000) do not support returning UniqueIDs
+ or the CIFS Unix Extensions equivalent and for those
+ this mount option will have no effect. Exporting cifs mounts
+ under nfsd requires this mount option on the cifs mount.
+ This is now the default if server supports the
+ required network operation.
+ noserverino Client generates inode numbers (rather than using the actual one
+ from the server). These inode numbers will vary after
+ unmount or reboot which can confuse some applications,
+ but not all server filesystems support unique inode
+ numbers.
+ setuids If the CIFS Unix extensions are negotiated with the server
+ the client will attempt to set the effective uid and gid of
+ the local process on newly created files, directories, and
+ devices (create, mkdir, mknod). If the CIFS Unix Extensions
+ are not negotiated, for newly created files and directories
+ instead of using the default uid and gid specified on
+ the mount, cache the new file's uid and gid locally which means
+ that the uid for the file can change when the inode is
+ reloaded (or the user remounts the share).
+ nosetuids The client will not attempt to set the uid and gid on
+ on newly created files, directories, and devices (create,
+ mkdir, mknod) which will result in the server setting the
+ uid and gid to the default (usually the server uid of the
+ user who mounted the share). Letting the server (rather than
+ the client) set the uid and gid is the default. If the CIFS
+ Unix Extensions are not negotiated then the uid and gid for
+ new files will appear to be the uid (gid) of the mounter or the
+ uid (gid) parameter specified on the mount.
+ netbiosname When mounting to servers via port 139, specifies the RFC1001
+ source name to use to represent the client netbios machine
+ name when doing the RFC1001 netbios session initialize.
+ direct Do not do inode data caching on files opened on this mount.
+ This precludes mmapping files on this mount. In some cases
+ with fast networks and little or no caching benefits on the
+ client (e.g. when the application is doing large sequential
+ reads bigger than page size without rereading the same data)
+ this can provide better performance than the default
+ behavior which caches reads (readahead) and writes
+ (writebehind) through the local Linux client pagecache
+ if oplock (caching token) is granted and held. Note that
+ direct allows write operations larger than page size
+ to be sent to the server.
+ strictcache Use for switching on strict cache mode. In this mode the
+ client read from the cache all the time it has Oplock Level II,
+ otherwise - read from the server. All written data are stored
+ in the cache, but if the client doesn't have Exclusive Oplock,
+ it writes the data to the server.
+ rwpidforward Forward pid of a process who opened a file to any read or write
+ operation on that file. This prevent applications like WINE
+ from failing on read and write if we use mandatory brlock style.
+ acl Allow setfacl and getfacl to manage posix ACLs if server
+ supports them. (default)
+ noacl Do not allow setfacl and getfacl calls on this mount
+ user_xattr Allow getting and setting user xattrs (those attributes whose
+ name begins with "user." or "os2.") as OS/2 EAs (extended
+ attributes) to the server. This allows support of the
+ setfattr and getfattr utilities. (default)
+ nouser_xattr Do not allow getfattr/setfattr to get/set/list xattrs
+ mapchars Translate six of the seven reserved characters (not backslash)
+ *?<>|:
+ to the remap range (above 0xF000), which also
+ allows the CIFS client to recognize files created with
+ such characters by Windows's POSIX emulation. This can
+ also be useful when mounting to most versions of Samba
+ (which also forbids creating and opening files
+ whose names contain any of these seven characters).
+ This has no effect if the server does not support
+ Unicode on the wire.
+ nomapchars Do not translate any of these seven characters (default).
+ nocase Request case insensitive path name matching (case
+ sensitive is the default if the server supports it).
+ (mount option "ignorecase" is identical to "nocase")
+ posixpaths If CIFS Unix extensions are supported, attempt to
+ negotiate posix path name support which allows certain
+ characters forbidden in typical CIFS filenames, without
+ requiring remapping. (default)
+ noposixpaths If CIFS Unix extensions are supported, do not request
+ posix path name support (this may cause servers to
+ reject creatingfile with certain reserved characters).
+ nounix Disable the CIFS Unix Extensions for this mount (tree
+ connection). This is rarely needed, but it may be useful
+ in order to turn off multiple settings all at once (ie
+ posix acls, posix locks, posix paths, symlink support
+ and retrieving uids/gids/mode from the server) or to
+ work around a bug in server which implement the Unix
+ Extensions.
+ nobrl Do not send byte range lock requests to the server.
+ This is necessary for certain applications that break
+ with cifs style mandatory byte range locks (and most
+ cifs servers do not yet support requesting advisory
+ byte range locks).
+ forcemandatorylock Even if the server supports posix (advisory) byte range
+ locking, send only mandatory lock requests. For some
+ (presumably rare) applications, originally coded for
+ DOS/Windows, which require Windows style mandatory byte range
+ locking, they may be able to take advantage of this option,
+ forcing the cifs client to only send mandatory locks
+ even if the cifs server would support posix advisory locks.
+ "forcemand" is accepted as a shorter form of this mount
+ option.
+ nostrictsync If this mount option is set, when an application does an
+ fsync call then the cifs client does not send an SMB Flush
+ to the server (to force the server to write all dirty data
+ for this file immediately to disk), although cifs still sends
+ all dirty (cached) file data to the server and waits for the
+ server to respond to the write. Since SMB Flush can be
+ very slow, and some servers may be reliable enough (to risk
+ delaying slightly flushing the data to disk on the server),
+ turning on this option may be useful to improve performance for
+ applications that fsync too much, at a small risk of server
+ crash. If this mount option is not set, by default cifs will
+ send an SMB flush request (and wait for a response) on every
+ fsync call.
+ nodfs Disable DFS (global name space support) even if the
+ server claims to support it. This can help work around
+ a problem with parsing of DFS paths with Samba server
+ versions 3.0.24 and 3.0.25.
+ remount remount the share (often used to change from ro to rw mounts
+ or vice versa)
+ cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for
+ the file. (EXPERIMENTAL)
+ servern Specify the server 's netbios name (RFC1001 name) to use
+ when attempting to setup a session to the server.
+ This is needed for mounting to some older servers (such
+ as OS/2 or Windows 98 and Windows ME) since they do not
+ support a default server name. A server name can be up
+ to 15 characters long and is usually uppercased.
+ sfu When the CIFS Unix Extensions are not negotiated, attempt to
+ create device files and fifos in a format compatible with
+ Services for Unix (SFU). In addition retrieve bits 10-12
+ of the mode via the SETFILEBITS extended attribute (as
+ SFU does). In the future the bottom 9 bits of the
+ mode also will be emulated using queries of the security
+ descriptor (ACL).
+ mfsymlinks Enable support for Minshall+French symlinks
+ (see http://wiki.samba.org/index.php/UNIX_Extensions#Minshall.2BFrench_symlinks)
+ This option is ignored when specified together with the
+ 'sfu' option. Minshall+French symlinks are used even if
+ the server supports the CIFS Unix Extensions.
+ sign Must use packet signing (helps avoid unwanted data modification
+ by intermediate systems in the route). Note that signing
+ does not work with lanman or plaintext authentication.
+ seal Must seal (encrypt) all data on this mounted share before
+ sending on the network. Requires support for Unix Extensions.
+ Note that this differs from the sign mount option in that it
+ causes encryption of data sent over this mounted share but other
+ shares mounted to the same server are unaffected.
+ locallease This option is rarely needed. Fcntl F_SETLEASE is
+ used by some applications such as Samba and NFSv4 server to
+ check to see whether a file is cacheable. CIFS has no way
+ to explicitly request a lease, but can check whether a file
+ is cacheable (oplocked). Unfortunately, even if a file
+ is not oplocked, it could still be cacheable (ie cifs client
+ could grant fcntl leases if no other local processes are using
+ the file) for cases for example such as when the server does not
+ support oplocks and the user is sure that the only updates to
+ the file will be from this client. Specifying this mount option
+ will allow the cifs client to check for leases (only) locally
+ for files which are not oplocked instead of denying leases
+ in that case. (EXPERIMENTAL)
+ sec Security mode. Allowed values are:
+ none attempt to connection as a null user (no name)
+ krb5 Use Kerberos version 5 authentication
+ krb5i Use Kerberos authentication and packet signing
+ ntlm Use NTLM password hashing (default)
+ ntlmi Use NTLM password hashing with signing (if
+ /proc/fs/cifs/PacketSigningEnabled on or if
+ server requires signing also can be the default)
+ ntlmv2 Use NTLMv2 password hashing
+ ntlmv2i Use NTLMv2 password hashing with packet signing
+ lanman (if configured in kernel config) use older
+ lanman hash
+hard Retry file operations if server is not responding
+soft Limit retries to unresponsive servers (usually only
+ one retry) before returning an error. (default)
+
+The mount.cifs mount helper also accepts a few mount options before -o
+including:
+
+ -S take password from stdin (equivalent to setting the environment
+ variable "PASSWD_FD=0"
+ -V print mount.cifs version
+ -? display simple usage information
+
+With most 2.6 kernel versions of modutils, the version of the cifs kernel
+module can be displayed via modinfo.
+
+Misc /proc/fs/cifs Flags and Debug Info
+=======================================
+Informational pseudo-files:
+DebugData Displays information about active CIFS sessions and
+ shares, features enabled as well as the cifs.ko
+ version.
+Stats Lists summary resource usage information as well as per
+ share statistics, if CONFIG_CIFS_STATS in enabled
+ in the kernel configuration.
+
+Configuration pseudo-files:
+PacketSigningEnabled If set to one, cifs packet signing is enabled
+ and will be used if the server requires
+ it. If set to two, cifs packet signing is
+ required even if the server considers packet
+ signing optional. (default 1)
+SecurityFlags Flags which control security negotiation and
+ also packet signing. Authentication (may/must)
+ flags (e.g. for NTLM and/or NTLMv2) may be combined with
+ the signing flags. Specifying two different password
+ hashing mechanisms (as "must use") on the other hand
+ does not make much sense. Default flags are
+ 0x07007
+ (NTLM, NTLMv2 and packet signing allowed). The maximum
+ allowable flags if you want to allow mounts to servers
+ using weaker password hashes is 0x37037 (lanman,
+ plaintext, ntlm, ntlmv2, signing allowed). Some
+ SecurityFlags require the corresponding menuconfig
+ options to be enabled (lanman and plaintext require
+ CONFIG_CIFS_WEAK_PW_HASH for example). Enabling
+ plaintext authentication currently requires also
+ enabling lanman authentication in the security flags
+ because the cifs module only supports sending
+ laintext passwords using the older lanman dialect
+ form of the session setup SMB. (e.g. for authentication
+ using plain text passwords, set the SecurityFlags
+ to 0x30030):
+
+ may use packet signing 0x00001
+ must use packet signing 0x01001
+ may use NTLM (most common password hash) 0x00002
+ must use NTLM 0x02002
+ may use NTLMv2 0x00004
+ must use NTLMv2 0x04004
+ may use Kerberos security 0x00008
+ must use Kerberos 0x08008
+ may use lanman (weak) password hash 0x00010
+ must use lanman password hash 0x10010
+ may use plaintext passwords 0x00020
+ must use plaintext passwords 0x20020
+ (reserved for future packet encryption) 0x00040
+
+cifsFYI If set to non-zero value, additional debug information
+ will be logged to the system error log. This field
+ contains three flags controlling different classes of
+ debugging entries. The maximum value it can be set
+ to is 7 which enables all debugging points (default 0).
+ Some debugging statements are not compiled into the
+ cifs kernel unless CONFIG_CIFS_DEBUG2 is enabled in the
+ kernel configuration. cifsFYI may be set to one or
+ nore of the following flags (7 sets them all):
+
+ log cifs informational messages 0x01
+ log return codes from cifs entry points 0x02
+ log slow responses (ie which take longer than 1 second)
+ CONFIG_CIFS_STATS2 must be enabled in .config 0x04
+
+
+traceSMB If set to one, debug information is logged to the
+ system error log with the start of smb requests
+ and responses (default 0)
+LookupCacheEnable If set to one, inode information is kept cached
+ for one second improving performance of lookups
+ (default 1)
+OplockEnabled If set to one, safe distributed caching enabled.
+ (default 1)
+LinuxExtensionsEnabled If set to one then the client will attempt to
+ use the CIFS "UNIX" extensions which are optional
+ protocol enhancements that allow CIFS servers
+ to return accurate UID/GID information as well
+ as support symbolic links. If you use servers
+ such as Samba that support the CIFS Unix
+ extensions but do not want to use symbolic link
+ support and want to map the uid and gid fields
+ to values supplied at mount (rather than the
+ actual values, then set this to zero. (default 1)
+
+These experimental features and tracing can be enabled by changing flags in
+/proc/fs/cifs (after the cifs module has been installed or built into the
+kernel, e.g. insmod cifs). To enable a feature set it to 1 e.g. to enable
+tracing to the kernel message log type:
+
+ echo 7 > /proc/fs/cifs/cifsFYI
+
+cifsFYI functions as a bit mask. Setting it to 1 enables additional kernel
+logging of various informational messages. 2 enables logging of non-zero
+SMB return codes while 4 enables logging of requests that take longer
+than one second to complete (except for byte range lock requests).
+Setting it to 4 requires defining CONFIG_CIFS_STATS2 manually in the
+source code (typically by setting it in the beginning of cifsglob.h),
+and setting it to seven enables all three. Finally, tracing
+the start of smb requests and responses can be enabled via:
+
+ echo 1 > /proc/fs/cifs/traceSMB
+
+Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
+if the kernel was configured with cifs statistics enabled. The statistics
+represent the number of successful (ie non-zero return code from the server)
+SMB responses to some of the more common commands (open, delete, mkdir etc.).
+Also recorded is the total bytes read and bytes written to the server for
+that share. Note that due to client caching effects this can be less than the
+number of bytes read and written by the application running on the client.
+The statistics for the number of total SMBs and oplock breaks are different in
+that they represent all for that share, not just those for which the server
+returned success.
+
+Also note that "cat /proc/fs/cifs/DebugData" will display information about
+the active sessions and the shares that are mounted.
+
+Enabling Kerberos (extended security) works but requires version 1.2 or later
+of the helper program cifs.upcall to be present and to be configured in the
+/etc/request-key.conf file. The cifs.upcall helper program is from the Samba
+project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
+require this helper. Note that NTLMv2 security (which does not require the
+cifs.upcall helper program), instead of using Kerberos, is sufficient for
+some use cases.
+
+DFS support allows transparent redirection to shares in an MS-DFS name space.
+In addition, DFS support for target shares which are specified as UNC
+names which begin with host names (rather than IP addresses) requires
+a user space helper (such as cifs.upcall) to be present in order to
+translate host names to ip address, and the user space helper must also
+be configured in the file /etc/request-key.conf. Samba, Windows servers and
+many NAS appliances support DFS as a way of constructing a global name
+space to ease network configuration and improve reliability.
+
+To use cifs Kerberos and DFS support, the Linux keyutils package should be
+installed and something like the following lines should be added to the
+/etc/request-key.conf file:
+
+create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
+create dns_resolver * * /usr/local/sbin/cifs.upcall %k
+
+CIFS kernel module parameters
+=============================
+These module parameters can be specified or modified either during the time of
+module loading or during the runtime by using the interface
+ /proc/module/cifs/parameters/<param>
+
+i.e. echo "value" > /sys/module/cifs/parameters/<param>
+
+1. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
+ [Y/y/1]. To disable use any of [N/n/0].
+
diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO
new file mode 100644
index 000000000..066ffddc3
--- /dev/null
+++ b/Documentation/filesystems/cifs/TODO
@@ -0,0 +1,104 @@
+Version 2.03 August 1, 2014
+
+A Partial List of Missing Features
+==================================
+
+Contributions are welcome. There are plenty of opportunities
+for visible, important contributions to this module. Here
+is a partial list of the known problems and missing features:
+
+a) SMB3 (and SMB3.02) missing optional features:
+ - RDMA
+ - multichannel (started)
+ - directory leases (improved metadata caching)
+ - T10 copy offload (copy chunk is only mechanism supported)
+ - encrypted shares
+
+b) improved sparse file support
+
+c) Directory entry caching relies on a 1 second timer, rather than
+using FindNotify or equivalent. - (started)
+
+d) quota support (needs minor kernel change since quota calls
+to make it to network filesystems or deviceless filesystems)
+
+e) improve support for very old servers (OS/2 and Win9x for example)
+Including support for changing the time remotely (utimes command).
+
+f) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
+extra copy in/out of the socket buffers in some cases.
+
+g) Better optimize open (and pathbased setfilesize) to reduce the
+oplock breaks coming from windows srv. Piggyback identical file
+opens on top of each other by incrementing reference count rather
+than resending (helps reduce server resource utilization and avoid
+spurious oplock breaks).
+
+h) Add support for storing symlink info to Windows servers
+in the Extended Attribute format their SFU clients would recognize.
+
+i) Finish inotify support so kde and gnome file list windows
+will autorefresh (partially complete by Asser). Needs minor kernel
+vfs change to support removing D_NOTIFY on a file.
+
+j) Add GUI tool to configure /proc/fs/cifs settings and for display of
+the CIFS statistics (started)
+
+k) implement support for security and trusted categories of xattrs
+(requires minor protocol extension) to enable better support for SELINUX
+
+l) Implement O_DIRECT flag on open (already supported on mount)
+
+m) Create UID mapping facility so server UIDs can be mapped on a per
+mount or a per server basis to client UIDs or nobody if no mapping
+exists. This is helpful when Unix extensions are negotiated to
+allow better permission checking when UIDs differ on the server
+and client. Add new protocol request to the CIFS protocol
+standard for asking the server for the corresponding name of a
+particular uid.
+
+n) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
+
+o) mount check for unmatched uids
+
+p) Add support for new vfs entry point for fallocate
+
+q) Add tools to take advantage of cifs/smb3 specific ioctls and features
+such as "CopyChunk" (fast server side file copy)
+
+r) encrypted file support
+
+s) improved stats gathering, tools (perhaps integration with nfsometer?)
+
+t) allow setting more NTFS/SMB3 file attributes remotely (currently limited to compressed
+file attribute via chflags)
+
+u) mount helper GUI (to simplify the various configuration options on mount)
+
+
+KNOWN BUGS
+====================================
+See http://bugzilla.samba.org - search on product "CifsVFS" for
+current bug list. Also check http://bugzilla.kernel.org (Product = File System, Component = CIFS)
+
+1) existing symbolic links (Windows reparse points) are recognized but
+can not be created remotely. They are implemented for Samba and those that
+support the CIFS Unix extensions, although earlier versions of Samba
+overly restrict the pathnames.
+2) follow_link and readdir code does not follow dfs junctions
+but recognizes them
+
+Misc testing to do
+==================
+1) check out max path names and max path name components against various server
+types. Try nested symlinks (8 deep). Return max path name in stat -f information
+
+2) Improve xfstest's cifs enablement and adapt xfstests where needed to test
+cifs better
+
+3) Additional performance testing and optimization using iozone and similar -
+there are some easy changes that can be done to parallelize sequential writes,
+and when signing is disabled to request larger read sizes (larger than
+negotiated size) and send larger write sizes to modern servers.
+
+4) More exhaustively test against less common servers
diff --git a/Documentation/filesystems/cifs/cifs.txt b/Documentation/filesystems/cifs/cifs.txt
new file mode 100644
index 000000000..2fac91ac9
--- /dev/null
+++ b/Documentation/filesystems/cifs/cifs.txt
@@ -0,0 +1,31 @@
+ This is the client VFS module for the Common Internet File System
+ (CIFS) protocol which is the successor to the Server Message Block
+ (SMB) protocol, the native file sharing mechanism for most early
+ PC operating systems. New and improved versions of CIFS are now
+ called SMB2 and SMB3. These dialects are also supported by the
+ CIFS VFS module. CIFS is fully supported by network
+ file servers such as Windows 2000, 2003, 2008 and 2012
+ as well by Samba (which provides excellent CIFS
+ server support for Linux and many other operating systems), so
+ this network filesystem client can mount to a wide variety of
+ servers.
+
+ The intent of this module is to provide the most advanced network
+ file system function for CIFS compliant servers, including better
+ POSIX compliance, secure per-user session establishment, high
+ performance safe distributed caching (oplock), optional packet
+ signing, large files, Unicode support and other internationalization
+ improvements. Since both Samba server and this filesystem client support
+ the CIFS Unix extensions, the combination can provide a reasonable
+ alternative to NFSv4 for fileserving in some Linux to Linux environments,
+ not just in Linux to Windows environments.
+
+ This filesystem has an mount utility (mount.cifs) that can be obtained from
+
+ https://ftp.samba.org/pub/linux-cifs/cifs-utils/
+
+ It must be installed in the directory with the other mount helpers.
+
+ For more information on the module see the project wiki page at
+
+ https://wiki.samba.org/index.php/LinuxCIFS_utils
diff --git a/Documentation/filesystems/cifs/winucase_convert.pl b/Documentation/filesystems/cifs/winucase_convert.pl
new file mode 100755
index 000000000..322a9c833
--- /dev/null
+++ b/Documentation/filesystems/cifs/winucase_convert.pl
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+#
+# winucase_convert.pl -- convert "Windows 8 Upper Case Mapping Table.txt" to
+# a two-level set of C arrays.
+#
+# Copyright 2013: Jeff Layton <jlayton@redhat.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+while(<>) {
+ next if (!/^0x(..)(..)\t0x(....)\t/);
+ $firstchar = hex($1);
+ $secondchar = hex($2);
+ $uppercase = hex($3);
+
+ $top[$firstchar][$secondchar] = $uppercase;
+}
+
+for ($i = 0; $i < 256; $i++) {
+ next if (!$top[$i]);
+
+ printf("static const wchar_t t2_%2.2x[256] = {", $i);
+ for ($j = 0; $j < 256; $j++) {
+ if (($j % 8) == 0) {
+ print "\n\t";
+ } else {
+ print " ";
+ }
+ printf("0x%4.4x,", $top[$i][$j] ? $top[$i][$j] : 0);
+ }
+ print "\n};\n\n";
+}
+
+printf("static const wchar_t *const toplevel[256] = {", $i);
+for ($i = 0; $i < 256; $i++) {
+ if (($i % 8) == 0) {
+ print "\n\t";
+ } elsif ($top[$i]) {
+ print " ";
+ } else {
+ print " ";
+ }
+
+ if ($top[$i]) {
+ printf("t2_%2.2x,", $i);
+ } else {
+ print "NULL,";
+ }
+}
+print "\n};\n\n";
diff --git a/Documentation/filesystems/coda.txt b/Documentation/filesystems/coda.txt
new file mode 100644
index 000000000..613113560
--- /dev/null
+++ b/Documentation/filesystems/coda.txt
@@ -0,0 +1,1673 @@
+NOTE:
+This is one of the technical documents describing a component of
+Coda -- this document describes the client kernel-Venus interface.
+
+For more information:
+ http://www.coda.cs.cmu.edu
+For user level software needed to run Coda:
+ ftp://ftp.coda.cs.cmu.edu
+
+To run Coda you need to get a user level cache manager for the client,
+named Venus, as well as tools to manipulate ACLs, to log in, etc. The
+client needs to have the Coda filesystem selected in the kernel
+configuration.
+
+The server needs a user level server and at present does not depend on
+kernel support.
+
+
+
+
+
+
+
+ The Venus kernel interface
+ Peter J. Braam
+ v1.0, Nov 9, 1997
+
+ This document describes the communication between Venus and kernel
+ level filesystem code needed for the operation of the Coda file sys-
+ tem. This document version is meant to describe the current interface
+ (version 1.0) as well as improvements we envisage.
+ ______________________________________________________________________
+
+ Table of Contents
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 1. Introduction
+
+ 2. Servicing Coda filesystem calls
+
+ 3. The message layer
+
+ 3.1 Implementation details
+
+ 4. The interface at the call level
+
+ 4.1 Data structures shared by the kernel and Venus
+ 4.2 The pioctl interface
+ 4.3 root
+ 4.4 lookup
+ 4.5 getattr
+ 4.6 setattr
+ 4.7 access
+ 4.8 create
+ 4.9 mkdir
+ 4.10 link
+ 4.11 symlink
+ 4.12 remove
+ 4.13 rmdir
+ 4.14 readlink
+ 4.15 open
+ 4.16 close
+ 4.17 ioctl
+ 4.18 rename
+ 4.19 readdir
+ 4.20 vget
+ 4.21 fsync
+ 4.22 inactive
+ 4.23 rdwr
+ 4.24 odymount
+ 4.25 ody_lookup
+ 4.26 ody_expand
+ 4.27 prefetch
+ 4.28 signal
+
+ 5. The minicache and downcalls
+
+ 5.1 INVALIDATE
+ 5.2 FLUSH
+ 5.3 PURGEUSER
+ 5.4 ZAPFILE
+ 5.5 ZAPDIR
+ 5.6 ZAPVNODE
+ 5.7 PURGEFID
+ 5.8 REPLACE
+
+ 6. Initialization and cleanup
+
+ 6.1 Requirements
+
+
+ ______________________________________________________________________
+ 0wpage
+
+ 11.. IInnttrroodduuccttiioonn
+
+
+
+ A key component in the Coda Distributed File System is the cache
+ manager, _V_e_n_u_s.
+
+
+ When processes on a Coda enabled system access files in the Coda
+ filesystem, requests are directed at the filesystem layer in the
+ operating system. The operating system will communicate with Venus to
+ service the request for the process. Venus manages a persistent
+ client cache and makes remote procedure calls to Coda file servers and
+ related servers (such as authentication servers) to service these
+ requests it receives from the operating system. When Venus has
+ serviced a request it replies to the operating system with appropriate
+ return codes, and other data related to the request. Optionally the
+ kernel support for Coda may maintain a minicache of recently processed
+ requests to limit the number of interactions with Venus. Venus
+ possesses the facility to inform the kernel when elements from its
+ minicache are no longer valid.
+
+ This document describes precisely this communication between the
+ kernel and Venus. The definitions of so called upcalls and downcalls
+ will be given with the format of the data they handle. We shall also
+ describe the semantic invariants resulting from the calls.
+
+ Historically Coda was implemented in a BSD file system in Mach 2.6.
+ The interface between the kernel and Venus is very similar to the BSD
+ VFS interface. Similar functionality is provided, and the format of
+ the parameters and returned data is very similar to the BSD VFS. This
+ leads to an almost natural environment for implementing a kernel-level
+ filesystem driver for Coda in a BSD system. However, other operating
+ systems such as Linux and Windows 95 and NT have virtual filesystem
+ with different interfaces.
+
+ To implement Coda on these systems some reverse engineering of the
+ Venus/Kernel protocol is necessary. Also it came to light that other
+ systems could profit significantly from certain small optimizations
+ and modifications to the protocol. To facilitate this work as well as
+ to make future ports easier, communication between Venus and the
+ kernel should be documented in great detail. This is the aim of this
+ document.
+
+ 0wpage
+
+ 22.. SSeerrvviicciinngg CCooddaa ffiilleessyysstteemm ccaallllss
+
+ The service of a request for a Coda file system service originates in
+ a process PP which accessing a Coda file. It makes a system call which
+ traps to the OS kernel. Examples of such calls trapping to the kernel
+ are _r_e_a_d_, _w_r_i_t_e_, _o_p_e_n_, _c_l_o_s_e_, _c_r_e_a_t_e_, _m_k_d_i_r_, _r_m_d_i_r_, _c_h_m_o_d in a Unix
+ context. Similar calls exist in the Win32 environment, and are named
+ _C_r_e_a_t_e_F_i_l_e_, .
+
+ Generally the operating system handles the request in a virtual
+ filesystem (VFS) layer, which is named I/O Manager in NT and IFS
+ manager in Windows 95. The VFS is responsible for partial processing
+ of the request and for locating the specific filesystem(s) which will
+ service parts of the request. Usually the information in the path
+ assists in locating the correct FS drivers. Sometimes after extensive
+ pre-processing, the VFS starts invoking exported routines in the FS
+ driver. This is the point where the FS specific processing of the
+ request starts, and here the Coda specific kernel code comes into
+ play.
+
+ The FS layer for Coda must expose and implement several interfaces.
+ First and foremost the VFS must be able to make all necessary calls to
+ the Coda FS layer, so the Coda FS driver must expose the VFS interface
+ as applicable in the operating system. These differ very significantly
+ among operating systems, but share features such as facilities to
+ read/write and create and remove objects. The Coda FS layer services
+ such VFS requests by invoking one or more well defined services
+ offered by the cache manager Venus. When the replies from Venus have
+ come back to the FS driver, servicing of the VFS call continues and
+ finishes with a reply to the kernel's VFS. Finally the VFS layer
+ returns to the process.
+
+ As a result of this design a basic interface exposed by the FS driver
+ must allow Venus to manage message traffic. In particular Venus must
+ be able to retrieve and place messages and to be notified of the
+ arrival of a new message. The notification must be through a mechanism
+ which does not block Venus since Venus must attend to other tasks even
+ when no messages are waiting or being processed.
+
+
+
+
+
+
+ Interfaces of the Coda FS Driver
+
+ Furthermore the FS layer provides for a special path of communication
+ between a user process and Venus, called the pioctl interface. The
+ pioctl interface is used for Coda specific services, such as
+ requesting detailed information about the persistent cache managed by
+ Venus. Here the involvement of the kernel is minimal. It identifies
+ the calling process and passes the information on to Venus. When
+ Venus replies the response is passed back to the caller in unmodified
+ form.
+
+ Finally Venus allows the kernel FS driver to cache the results from
+ certain services. This is done to avoid excessive context switches
+ and results in an efficient system. However, Venus may acquire
+ information, for example from the network which implies that cached
+ information must be flushed or replaced. Venus then makes a downcall
+ to the Coda FS layer to request flushes or updates in the cache. The
+ kernel FS driver handles such requests synchronously.
+
+ Among these interfaces the VFS interface and the facility to place,
+ receive and be notified of messages are platform specific. We will
+ not go into the calls exported to the VFS layer but we will state the
+ requirements of the message exchange mechanism.
+
+ 0wpage
+
+ 33.. TThhee mmeessssaaggee llaayyeerr
+
+
+
+ At the lowest level the communication between Venus and the FS driver
+ proceeds through messages. The synchronization between processes
+ requesting Coda file service and Venus relies on blocking and waking
+ up processes. The Coda FS driver processes VFS- and pioctl-requests
+ on behalf of a process P, creates messages for Venus, awaits replies
+ and finally returns to the caller. The implementation of the exchange
+ of messages is platform specific, but the semantics have (so far)
+ appeared to be generally applicable. Data buffers are created by the
+ FS Driver in kernel memory on behalf of P and copied to user memory in
+ Venus.
+
+ The FS Driver while servicing P makes upcalls to Venus. Such an
+ upcall is dispatched to Venus by creating a message structure. The
+ structure contains the identification of P, the message sequence
+ number, the size of the request and a pointer to the data in kernel
+ memory for the request. Since the data buffer is re-used to hold the
+ reply from Venus, there is a field for the size of the reply. A flags
+ field is used in the message to precisely record the status of the
+ message. Additional platform dependent structures involve pointers to
+ determine the position of the message on queues and pointers to
+ synchronization objects. In the upcall routine the message structure
+ is filled in, flags are set to 0, and it is placed on the _p_e_n_d_i_n_g
+ queue. The routine calling upcall is responsible for allocating the
+ data buffer; its structure will be described in the next section.
+
+ A facility must exist to notify Venus that the message has been
+ created, and implemented using available synchronization objects in
+ the OS. This notification is done in the upcall context of the process
+ P. When the message is on the pending queue, process P cannot proceed
+ in upcall. The (kernel mode) processing of P in the filesystem
+ request routine must be suspended until Venus has replied. Therefore
+ the calling thread in P is blocked in upcall. A pointer in the
+ message structure will locate the synchronization object on which P is
+ sleeping.
+
+ Venus detects the notification that a message has arrived, and the FS
+ driver allow Venus to retrieve the message with a getmsg_from_kernel
+ call. This action finishes in the kernel by putting the message on the
+ queue of processing messages and setting flags to READ. Venus is
+ passed the contents of the data buffer. The getmsg_from_kernel call
+ now returns and Venus processes the request.
+
+ At some later point the FS driver receives a message from Venus,
+ namely when Venus calls sendmsg_to_kernel. At this moment the Coda FS
+ driver looks at the contents of the message and decides if:
+
+
+ +o the message is a reply for a suspended thread P. If so it removes
+ the message from the processing queue and marks the message as
+ WRITTEN. Finally, the FS driver unblocks P (still in the kernel
+ mode context of Venus) and the sendmsg_to_kernel call returns to
+ Venus. The process P will be scheduled at some point and continues
+ processing its upcall with the data buffer replaced with the reply
+ from Venus.
+
+ +o The message is a _d_o_w_n_c_a_l_l. A downcall is a request from Venus to
+ the FS Driver. The FS driver processes the request immediately
+ (usually a cache eviction or replacement) and when it finishes
+ sendmsg_to_kernel returns.
+
+ Now P awakes and continues processing upcall. There are some
+ subtleties to take account of. First P will determine if it was woken
+ up in upcall by a signal from some other source (for example an
+ attempt to terminate P) or as is normally the case by Venus in its
+ sendmsg_to_kernel call. In the normal case, the upcall routine will
+ deallocate the message structure and return. The FS routine can proceed
+ with its processing.
+
+
+
+
+
+
+
+ Sleeping and IPC arrangements
+
+ In case P is woken up by a signal and not by Venus, it will first look
+ at the flags field. If the message is not yet READ, the process P can
+ handle its signal without notifying Venus. If Venus has READ, and
+ the request should not be processed, P can send Venus a signal message
+ to indicate that it should disregard the previous message. Such
+ signals are put in the queue at the head, and read first by Venus. If
+ the message is already marked as WRITTEN it is too late to stop the
+ processing. The VFS routine will now continue. (-- If a VFS request
+ involves more than one upcall, this can lead to complicated state, an
+ extra field "handle_signals" could be added in the message structure
+ to indicate points of no return have been passed.--)
+
+
+
+ 33..11.. IImmpplleemmeennttaattiioonn ddeettaaiillss
+
+ The Unix implementation of this mechanism has been through the
+ implementation of a character device associated with Coda. Venus
+ retrieves messages by doing a read on the device, replies are sent
+ with a write and notification is through the select system call on the
+ file descriptor for the device. The process P is kept waiting on an
+ interruptible wait queue object.
+
+ In Windows NT and the DPMI Windows 95 implementation a DeviceIoControl
+ call is used. The DeviceIoControl call is designed to copy buffers
+ from user memory to kernel memory with OPCODES. The sendmsg_to_kernel
+ is issued as a synchronous call, while the getmsg_from_kernel call is
+ asynchronous. Windows EventObjects are used for notification of
+ message arrival. The process P is kept waiting on a KernelEvent
+ object in NT and a semaphore in Windows 95.
+
+ 0wpage
+
+ 44.. TThhee iinntteerrffaaccee aatt tthhee ccaallll lleevveell
+
+
+ This section describes the upcalls a Coda FS driver can make to Venus.
+ Each of these upcalls make use of two structures: inputArgs and
+ outputArgs. In pseudo BNF form the structures take the following
+ form:
+
+
+ struct inputArgs {
+ u_long opcode;
+ u_long unique; /* Keep multiple outstanding msgs distinct */
+ u_short pid; /* Common to all */
+ u_short pgid; /* Common to all */
+ struct CodaCred cred; /* Common to all */
+
+ <union "in" of call dependent parts of inputArgs>
+ };
+
+ struct outputArgs {
+ u_long opcode;
+ u_long unique; /* Keep multiple outstanding msgs distinct */
+ u_long result;
+
+ <union "out" of call dependent parts of inputArgs>
+ };
+
+
+
+ Before going on let us elucidate the role of the various fields. The
+ inputArgs start with the opcode which defines the type of service
+ requested from Venus. There are approximately 30 upcalls at present
+ which we will discuss. The unique field labels the inputArg with a
+ unique number which will identify the message uniquely. A process and
+ process group id are passed. Finally the credentials of the caller
+ are included.
+
+ Before delving into the specific calls we need to discuss a variety of
+ data structures shared by the kernel and Venus.
+
+
+
+
+ 44..11.. DDaattaa ssttrruuccttuurreess sshhaarreedd bbyy tthhee kkeerrnneell aanndd VVeennuuss
+
+
+ The CodaCred structure defines a variety of user and group ids as
+ they are set for the calling process. The vuid_t and guid_t are 32 bit
+ unsigned integers. It also defines group membership in an array. On
+ Unix the CodaCred has proven sufficient to implement good security
+ semantics for Coda but the structure may have to undergo modification
+ for the Windows environment when these mature.
+
+ struct CodaCred {
+ vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, effective, set, fs uid*/
+ vgid_t cr_gid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
+ vgid_t cr_groups[NGROUPS]; /* Group membership for caller */
+ };
+
+
+
+ NNOOTTEE It is questionable if we need CodaCreds in Venus. Finally Venus
+ doesn't know about groups, although it does create files with the
+ default uid/gid. Perhaps the list of group membership is superfluous.
+
+
+ The next item is the fundamental identifier used to identify Coda
+ files, the ViceFid. A fid of a file uniquely defines a file or
+ directory in the Coda filesystem within a _c_e_l_l. (-- A _c_e_l_l is a
+ group of Coda servers acting under the aegis of a single system
+ control machine or SCM. See the Coda Administration manual for a
+ detailed description of the role of the SCM.--)
+
+
+ typedef struct ViceFid {
+ VolumeId Volume;
+ VnodeId Vnode;
+ Unique_t Unique;
+ } ViceFid;
+
+
+
+ Each of the constituent fields: VolumeId, VnodeId and Unique_t are
+ unsigned 32 bit integers. We envisage that a further field will need
+ to be prefixed to identify the Coda cell; this will probably take the
+ form of a Ipv6 size IP address naming the Coda cell through DNS.
+
+ The next important structure shared between Venus and the kernel is
+ the attributes of the file. The following structure is used to
+ exchange information. It has room for future extensions such as
+ support for device files (currently not present in Coda).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ struct coda_vattr {
+ enum coda_vtype va_type; /* vnode type (for create) */
+ u_short va_mode; /* files access mode and type */
+ short va_nlink; /* number of references to file */
+ vuid_t va_uid; /* owner user id */
+ vgid_t va_gid; /* owner group id */
+ long va_fsid; /* file system id (dev for now) */
+ long va_fileid; /* file id */
+ u_quad_t va_size; /* file size in bytes */
+ long va_blocksize; /* blocksize preferred for i/o */
+ struct timespec va_atime; /* time of last access */
+ struct timespec va_mtime; /* time of last modification */
+ struct timespec va_ctime; /* time file changed */
+ u_long va_gen; /* generation number of file */
+ u_long va_flags; /* flags defined for file */
+ dev_t va_rdev; /* device special file represents */
+ u_quad_t va_bytes; /* bytes of disk space held by file */
+ u_quad_t va_filerev; /* file modification number */
+ u_int va_vaflags; /* operations flags, see below */
+ long va_spare; /* remain quad aligned */
+ };
+
+
+
+
+ 44..22.. TThhee ppiiooccttll iinntteerrffaaccee
+
+
+ Coda specific requests can be made by application through the pioctl
+ interface. The pioctl is implemented as an ordinary ioctl on a
+ fictitious file /coda/.CONTROL. The pioctl call opens this file, gets
+ a file handle and makes the ioctl call. Finally it closes the file.
+
+ The kernel involvement in this is limited to providing the facility to
+ open and close and pass the ioctl message _a_n_d to verify that a path in
+ the pioctl data buffers is a file in a Coda filesystem.
+
+ The kernel is handed a data packet of the form:
+
+ struct {
+ const char *path;
+ struct ViceIoctl vidata;
+ int follow;
+ } data;
+
+
+
+ where
+
+
+ struct ViceIoctl {
+ caddr_t in, out; /* Data to be transferred in, or out */
+ short in_size; /* Size of input buffer <= 2K */
+ short out_size; /* Maximum size of output buffer, <= 2K */
+ };
+
+
+
+ The path must be a Coda file, otherwise the ioctl upcall will not be
+ made.
+
+ NNOOTTEE The data structures and code are a mess. We need to clean this
+ up.
+
+ We now proceed to document the individual calls:
+
+ 0wpage
+
+ 44..33.. rroooott
+
+
+ AArrgguummeennttss
+
+ iinn empty
+
+ oouutt
+
+ struct cfs_root_out {
+ ViceFid VFid;
+ } cfs_root;
+
+
+
+ DDeessccrriippttiioonn This call is made to Venus during the initialization of
+ the Coda filesystem. If the result is zero, the cfs_root structure
+ contains the ViceFid of the root of the Coda filesystem. If a non-zero
+ result is generated, its value is a platform dependent error code
+ indicating the difficulty Venus encountered in locating the root of
+ the Coda filesystem.
+
+ 0wpage
+
+ 44..44.. llooookkuupp
+
+
+ SSuummmmaarryy Find the ViceFid and type of an object in a directory if it
+ exists.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_lookup_in {
+ ViceFid VFid;
+ char *name; /* Place holder for data. */
+ } cfs_lookup;
+
+
+
+ oouutt
+
+ struct cfs_lookup_out {
+ ViceFid VFid;
+ int vtype;
+ } cfs_lookup;
+
+
+
+ DDeessccrriippttiioonn This call is made to determine the ViceFid and filetype of
+ a directory entry. The directory entry requested carries name name
+ and Venus will search the directory identified by cfs_lookup_in.VFid.
+ The result may indicate that the name does not exist, or that
+ difficulty was encountered in finding it (e.g. due to disconnection).
+ If the result is zero, the field cfs_lookup_out.VFid contains the
+ targets ViceFid and cfs_lookup_out.vtype the coda_vtype giving the
+ type of object the name designates.
+
+ The name of the object is an 8 bit character string of maximum length
+ CFS_MAXNAMLEN, currently set to 256 (including a 0 terminator.)
+
+ It is extremely important to realize that Venus bitwise ors the field
+ cfs_lookup.vtype with CFS_NOCACHE to indicate that the object should
+ not be put in the kernel name cache.
+
+ NNOOTTEE The type of the vtype is currently wrong. It should be
+ coda_vtype. Linux does not take note of CFS_NOCACHE. It should.
+
+ 0wpage
+
+ 44..55.. ggeettaattttrr
+
+
+ SSuummmmaarryy Get the attributes of a file.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_getattr_in {
+ ViceFid VFid;
+ struct coda_vattr attr; /* XXXXX */
+ } cfs_getattr;
+
+
+
+ oouutt
+
+ struct cfs_getattr_out {
+ struct coda_vattr attr;
+ } cfs_getattr;
+
+
+
+ DDeessccrriippttiioonn This call returns the attributes of the file identified by
+ fid.
+
+ EErrrroorrss Errors can occur if the object with fid does not exist, is
+ unaccessible or if the caller does not have permission to fetch
+ attributes.
+
+ NNoottee Many kernel FS drivers (Linux, NT and Windows 95) need to acquire
+ the attributes as well as the Fid for the instantiation of an internal
+ "inode" or "FileHandle". A significant improvement in performance on
+ such systems could be made by combining the _l_o_o_k_u_p and _g_e_t_a_t_t_r calls
+ both at the Venus/kernel interaction level and at the RPC level.
+
+ The vattr structure included in the input arguments is superfluous and
+ should be removed.
+
+ 0wpage
+
+ 44..66.. sseettaattttrr
+
+
+ SSuummmmaarryy Set the attributes of a file.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_setattr_in {
+ ViceFid VFid;
+ struct coda_vattr attr;
+ } cfs_setattr;
+
+
+
+
+ oouutt
+ empty
+
+ DDeessccrriippttiioonn The structure attr is filled with attributes to be changed
+ in BSD style. Attributes not to be changed are set to -1, apart from
+ vtype which is set to VNON. Other are set to the value to be assigned.
+ The only attributes which the FS driver may request to change are the
+ mode, owner, groupid, atime, mtime and ctime. The return value
+ indicates success or failure.
+
+ EErrrroorrss A variety of errors can occur. The object may not exist, may
+ be inaccessible, or permission may not be granted by Venus.
+
+ 0wpage
+
+ 44..77.. aacccceessss
+
+
+ SSuummmmaarryy
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_access_in {
+ ViceFid VFid;
+ int flags;
+ } cfs_access;
+
+
+
+ oouutt
+ empty
+
+ DDeessccrriippttiioonn Verify if access to the object identified by VFid for
+ operations described by flags is permitted. The result indicates if
+ access will be granted. It is important to remember that Coda uses
+ ACLs to enforce protection and that ultimately the servers, not the
+ clients enforce the security of the system. The result of this call
+ will depend on whether a _t_o_k_e_n is held by the user.
+
+ EErrrroorrss The object may not exist, or the ACL describing the protection
+ may not be accessible.
+
+ 0wpage
+
+ 44..88.. ccrreeaattee
+
+
+ SSuummmmaarryy Invoked to create a file
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_create_in {
+ ViceFid VFid;
+ struct coda_vattr attr;
+ int excl;
+ int mode;
+ char *name; /* Place holder for data. */
+ } cfs_create;
+
+
+
+
+ oouutt
+
+ struct cfs_create_out {
+ ViceFid VFid;
+ struct coda_vattr attr;
+ } cfs_create;
+
+
+
+ DDeessccrriippttiioonn This upcall is invoked to request creation of a file.
+ The file will be created in the directory identified by VFid, its name
+ will be name, and the mode will be mode. If excl is set an error will
+ be returned if the file already exists. If the size field in attr is
+ set to zero the file will be truncated. The uid and gid of the file
+ are set by converting the CodaCred to a uid using a macro CRTOUID
+ (this macro is platform dependent). Upon success the VFid and
+ attributes of the file are returned. The Coda FS Driver will normally
+ instantiate a vnode, inode or file handle at kernel level for the new
+ object.
+
+
+ EErrrroorrss A variety of errors can occur. Permissions may be insufficient.
+ If the object exists and is not a file the error EISDIR is returned
+ under Unix.
+
+ NNOOTTEE The packing of parameters is very inefficient and appears to
+ indicate confusion between the system call creat and the VFS operation
+ create. The VFS operation create is only called to create new objects.
+ This create call differs from the Unix one in that it is not invoked
+ to return a file descriptor. The truncate and exclusive options,
+ together with the mode, could simply be part of the mode as it is
+ under Unix. There should be no flags argument; this is used in open
+ (2) to return a file descriptor for READ or WRITE mode.
+
+ The attributes of the directory should be returned too, since the size
+ and mtime changed.
+
+ 0wpage
+
+ 44..99.. mmkkddiirr
+
+
+ SSuummmmaarryy Create a new directory.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_mkdir_in {
+ ViceFid VFid;
+ struct coda_vattr attr;
+ char *name; /* Place holder for data. */
+ } cfs_mkdir;
+
+
+
+ oouutt
+
+ struct cfs_mkdir_out {
+ ViceFid VFid;
+ struct coda_vattr attr;
+ } cfs_mkdir;
+
+
+
+
+ DDeessccrriippttiioonn This call is similar to create but creates a directory.
+ Only the mode field in the input parameters is used for creation.
+ Upon successful creation, the attr returned contains the attributes of
+ the new directory.
+
+ EErrrroorrss As for create.
+
+ NNOOTTEE The input parameter should be changed to mode instead of
+ attributes.
+
+ The attributes of the parent should be returned since the size and
+ mtime changes.
+
+ 0wpage
+
+ 44..1100.. lliinnkk
+
+
+ SSuummmmaarryy Create a link to an existing file.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_link_in {
+ ViceFid sourceFid; /* cnode to link *to* */
+ ViceFid destFid; /* Directory in which to place link */
+ char *tname; /* Place holder for data. */
+ } cfs_link;
+
+
+
+ oouutt
+ empty
+
+ DDeessccrriippttiioonn This call creates a link to the sourceFid in the directory
+ identified by destFid with name tname. The source must reside in the
+ target's parent, i.e. the source must be have parent destFid, i.e. Coda
+ does not support cross directory hard links. Only the return value is
+ relevant. It indicates success or the type of failure.
+
+ EErrrroorrss The usual errors can occur.0wpage
+
+ 44..1111.. ssyymmlliinnkk
+
+
+ SSuummmmaarryy create a symbolic link
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_symlink_in {
+ ViceFid VFid; /* Directory to put symlink in */
+ char *srcname;
+ struct coda_vattr attr;
+ char *tname;
+ } cfs_symlink;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn Create a symbolic link. The link is to be placed in the
+ directory identified by VFid and named tname. It should point to the
+ pathname srcname. The attributes of the newly created object are to
+ be set to attr.
+
+ EErrrroorrss
+
+ NNOOTTEE The attributes of the target directory should be returned since
+ its size changed.
+
+ 0wpage
+
+ 44..1122.. rreemmoovvee
+
+
+ SSuummmmaarryy Remove a file
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_remove_in {
+ ViceFid VFid;
+ char *name; /* Place holder for data. */
+ } cfs_remove;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn Remove file named cfs_remove_in.name in directory
+ identified by VFid.
+
+ EErrrroorrss
+
+ NNOOTTEE The attributes of the directory should be returned since its
+ mtime and size may change.
+
+ 0wpage
+
+ 44..1133.. rrmmddiirr
+
+
+ SSuummmmaarryy Remove a directory
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_rmdir_in {
+ ViceFid VFid;
+ char *name; /* Place holder for data. */
+ } cfs_rmdir;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn Remove the directory with name name from the directory
+ identified by VFid.
+
+ EErrrroorrss
+
+ NNOOTTEE The attributes of the parent directory should be returned since
+ its mtime and size may change.
+
+ 0wpage
+
+ 44..1144.. rreeaaddlliinnkk
+
+
+ SSuummmmaarryy Read the value of a symbolic link.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_readlink_in {
+ ViceFid VFid;
+ } cfs_readlink;
+
+
+
+ oouutt
+
+ struct cfs_readlink_out {
+ int count;
+ caddr_t data; /* Place holder for data. */
+ } cfs_readlink;
+
+
+
+ DDeessccrriippttiioonn This routine reads the contents of symbolic link
+ identified by VFid into the buffer data. The buffer data must be able
+ to hold any name up to CFS_MAXNAMLEN (PATH or NAM??).
+
+ EErrrroorrss No unusual errors.
+
+ 0wpage
+
+ 44..1155.. ooppeenn
+
+
+ SSuummmmaarryy Open a file.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_open_in {
+ ViceFid VFid;
+ int flags;
+ } cfs_open;
+
+
+
+ oouutt
+
+ struct cfs_open_out {
+ dev_t dev;
+ ino_t inode;
+ } cfs_open;
+
+
+
+ DDeessccrriippttiioonn This request asks Venus to place the file identified by
+ VFid in its cache and to note that the calling process wishes to open
+ it with flags as in open(2). The return value to the kernel differs
+ for Unix and Windows systems. For Unix systems the Coda FS Driver is
+ informed of the device and inode number of the container file in the
+ fields dev and inode. For Windows the path of the container file is
+ returned to the kernel.
+ EErrrroorrss
+
+ NNOOTTEE Currently the cfs_open_out structure is not properly adapted to
+ deal with the Windows case. It might be best to implement two
+ upcalls, one to open aiming at a container file name, the other at a
+ container file inode.
+
+ 0wpage
+
+ 44..1166.. cclloossee
+
+
+ SSuummmmaarryy Close a file, update it on the servers.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_close_in {
+ ViceFid VFid;
+ int flags;
+ } cfs_close;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn Close the file identified by VFid.
+
+ EErrrroorrss
+
+ NNOOTTEE The flags argument is bogus and not used. However, Venus' code
+ has room to deal with an execp input field, probably this field should
+ be used to inform Venus that the file was closed but is still memory
+ mapped for execution. There are comments about fetching versus not
+ fetching the data in Venus vproc_vfscalls. This seems silly. If a
+ file is being closed, the data in the container file is to be the new
+ data. Here again the execp flag might be in play to create confusion:
+ currently Venus might think a file can be flushed from the cache when
+ it is still memory mapped. This needs to be understood.
+
+ 0wpage
+
+ 44..1177.. iiooccttll
+
+
+ SSuummmmaarryy Do an ioctl on a file. This includes the pioctl interface.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_ioctl_in {
+ ViceFid VFid;
+ int cmd;
+ int len;
+ int rwflag;
+ char *data; /* Place holder for data. */
+ } cfs_ioctl;
+
+
+
+ oouutt
+
+
+ struct cfs_ioctl_out {
+ int len;
+ caddr_t data; /* Place holder for data. */
+ } cfs_ioctl;
+
+
+
+ DDeessccrriippttiioonn Do an ioctl operation on a file. The command, len and
+ data arguments are filled as usual. flags is not used by Venus.
+
+ EErrrroorrss
+
+ NNOOTTEE Another bogus parameter. flags is not used. What is the
+ business about PREFETCHING in the Venus code?
+
+
+ 0wpage
+
+ 44..1188.. rreennaammee
+
+
+ SSuummmmaarryy Rename a fid.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_rename_in {
+ ViceFid sourceFid;
+ char *srcname;
+ ViceFid destFid;
+ char *destname;
+ } cfs_rename;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn Rename the object with name srcname in directory
+ sourceFid to destname in destFid. It is important that the names
+ srcname and destname are 0 terminated strings. Strings in Unix
+ kernels are not always null terminated.
+
+ EErrrroorrss
+
+ 0wpage
+
+ 44..1199.. rreeaaddddiirr
+
+
+ SSuummmmaarryy Read directory entries.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_readdir_in {
+ ViceFid VFid;
+ int count;
+ int offset;
+ } cfs_readdir;
+
+
+
+
+ oouutt
+
+ struct cfs_readdir_out {
+ int size;
+ caddr_t data; /* Place holder for data. */
+ } cfs_readdir;
+
+
+
+ DDeessccrriippttiioonn Read directory entries from VFid starting at offset and
+ read at most count bytes. Returns the data in data and returns
+ the size in size.
+
+ EErrrroorrss
+
+ NNOOTTEE This call is not used. Readdir operations exploit container
+ files. We will re-evaluate this during the directory revamp which is
+ about to take place.
+
+ 0wpage
+
+ 44..2200.. vvggeett
+
+
+ SSuummmmaarryy instructs Venus to do an FSDB->Get.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_vget_in {
+ ViceFid VFid;
+ } cfs_vget;
+
+
+
+ oouutt
+
+ struct cfs_vget_out {
+ ViceFid VFid;
+ int vtype;
+ } cfs_vget;
+
+
+
+ DDeessccrriippttiioonn This upcall asks Venus to do a get operation on an fsobj
+ labelled by VFid.
+
+ EErrrroorrss
+
+ NNOOTTEE This operation is not used. However, it is extremely useful
+ since it can be used to deal with read/write memory mapped files.
+ These can be "pinned" in the Venus cache using vget and released with
+ inactive.
+
+ 0wpage
+
+ 44..2211.. ffssyynncc
+
+
+ SSuummmmaarryy Tell Venus to update the RVM attributes of a file.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_fsync_in {
+ ViceFid VFid;
+ } cfs_fsync;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn Ask Venus to update RVM attributes of object VFid. This
+ should be called as part of kernel level fsync type calls. The
+ result indicates if the syncing was successful.
+
+ EErrrroorrss
+
+ NNOOTTEE Linux does not implement this call. It should.
+
+ 0wpage
+
+ 44..2222.. iinnaaccttiivvee
+
+
+ SSuummmmaarryy Tell Venus a vnode is no longer in use.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_inactive_in {
+ ViceFid VFid;
+ } cfs_inactive;
+
+
+
+ oouutt
+ none
+
+ DDeessccrriippttiioonn This operation returns EOPNOTSUPP.
+
+ EErrrroorrss
+
+ NNOOTTEE This should perhaps be removed.
+
+ 0wpage
+
+ 44..2233.. rrddwwrr
+
+
+ SSuummmmaarryy Read or write from a file
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct cfs_rdwr_in {
+ ViceFid VFid;
+ int rwflag;
+ int count;
+ int offset;
+ int ioflag;
+ caddr_t data; /* Place holder for data. */
+ } cfs_rdwr;
+
+
+
+
+ oouutt
+
+ struct cfs_rdwr_out {
+ int rwflag;
+ int count;
+ caddr_t data; /* Place holder for data. */
+ } cfs_rdwr;
+
+
+
+ DDeessccrriippttiioonn This upcall asks Venus to read or write from a file.
+
+ EErrrroorrss
+
+ NNOOTTEE It should be removed since it is against the Coda philosophy that
+ read/write operations never reach Venus. I have been told the
+ operation does not work. It is not currently used.
+
+
+ 0wpage
+
+ 44..2244.. ooddyymmoouunntt
+
+
+ SSuummmmaarryy Allows mounting multiple Coda "filesystems" on one Unix mount
+ point.
+
+ AArrgguummeennttss
+
+ iinn
+
+ struct ody_mount_in {
+ char *name; /* Place holder for data. */
+ } ody_mount;
+
+
+
+ oouutt
+
+ struct ody_mount_out {
+ ViceFid VFid;
+ } ody_mount;
+
+
+
+ DDeessccrriippttiioonn Asks Venus to return the rootfid of a Coda system named
+ name. The fid is returned in VFid.
+
+ EErrrroorrss
+
+ NNOOTTEE This call was used by David for dynamic sets. It should be
+ removed since it causes a jungle of pointers in the VFS mounting area.
+ It is not used by Coda proper. Call is not implemented by Venus.
+
+ 0wpage
+
+ 44..2255.. ooddyy__llooookkuupp
+
+
+ SSuummmmaarryy Looks up something.
+
+ AArrgguummeennttss
+
+ iinn irrelevant
+
+
+ oouutt
+ irrelevant
+
+ DDeessccrriippttiioonn
+
+ EErrrroorrss
+
+ NNOOTTEE Gut it. Call is not implemented by Venus.
+
+ 0wpage
+
+ 44..2266.. ooddyy__eexxppaanndd
+
+
+ SSuummmmaarryy expands something in a dynamic set.
+
+ AArrgguummeennttss
+
+ iinn irrelevant
+
+ oouutt
+ irrelevant
+
+ DDeessccrriippttiioonn
+
+ EErrrroorrss
+
+ NNOOTTEE Gut it. Call is not implemented by Venus.
+
+ 0wpage
+
+ 44..2277.. pprreeffeettcchh
+
+
+ SSuummmmaarryy Prefetch a dynamic set.
+
+ AArrgguummeennttss
+
+ iinn Not documented.
+
+ oouutt
+ Not documented.
+
+ DDeessccrriippttiioonn Venus worker.cc has support for this call, although it is
+ noted that it doesn't work. Not surprising, since the kernel does not
+ have support for it. (ODY_PREFETCH is not a defined operation).
+
+ EErrrroorrss
+
+ NNOOTTEE Gut it. It isn't working and isn't used by Coda.
+
+
+ 0wpage
+
+ 44..2288.. ssiiggnnaall
+
+
+ SSuummmmaarryy Send Venus a signal about an upcall.
+
+ AArrgguummeennttss
+
+ iinn none
+
+ oouutt
+ not applicable.
+
+ DDeessccrriippttiioonn This is an out-of-band upcall to Venus to inform Venus
+ that the calling process received a signal after Venus read the
+ message from the input queue. Venus is supposed to clean up the
+ operation.
+
+ EErrrroorrss No reply is given.
+
+ NNOOTTEE We need to better understand what Venus needs to clean up and if
+ it is doing this correctly. Also we need to handle multiple upcall
+ per system call situations correctly. It would be important to know
+ what state changes in Venus take place after an upcall for which the
+ kernel is responsible for notifying Venus to clean up (e.g. open
+ definitely is such a state change, but many others are maybe not).
+
+ 0wpage
+
+ 55.. TThhee mmiinniiccaacchhee aanndd ddoowwnnccaallllss
+
+
+ The Coda FS Driver can cache results of lookup and access upcalls, to
+ limit the frequency of upcalls. Upcalls carry a price since a process
+ context switch needs to take place. The counterpart of caching the
+ information is that Venus will notify the FS Driver that cached
+ entries must be flushed or renamed.
+
+ The kernel code generally has to maintain a structure which links the
+ internal file handles (called vnodes in BSD, inodes in Linux and
+ FileHandles in Windows) with the ViceFid's which Venus maintains. The
+ reason is that frequent translations back and forth are needed in
+ order to make upcalls and use the results of upcalls. Such linking
+ objects are called ccnnooddeess.
+
+ The current minicache implementations have cache entries which record
+ the following:
+
+ 1. the name of the file
+
+ 2. the cnode of the directory containing the object
+
+ 3. a list of CodaCred's for which the lookup is permitted.
+
+ 4. the cnode of the object
+
+ The lookup call in the Coda FS Driver may request the cnode of the
+ desired object from the cache, by passing its name, directory and the
+ CodaCred's of the caller. The cache will return the cnode or indicate
+ that it cannot be found. The Coda FS Driver must be careful to
+ invalidate cache entries when it modifies or removes objects.
+
+ When Venus obtains information that indicates that cache entries are
+ no longer valid, it will make a downcall to the kernel. Downcalls are
+ intercepted by the Coda FS Driver and lead to cache invalidations of
+ the kind described below. The Coda FS Driver does not return an error
+ unless the downcall data could not be read into kernel memory.
+
+
+ 55..11.. IINNVVAALLIIDDAATTEE
+
+
+ No information is available on this call.
+
+
+ 55..22.. FFLLUUSSHH
+
+
+
+ AArrgguummeennttss None
+
+ SSuummmmaarryy Flush the name cache entirely.
+
+ DDeessccrriippttiioonn Venus issues this call upon startup and when it dies. This
+ is to prevent stale cache information being held. Some operating
+ systems allow the kernel name cache to be switched off dynamically.
+ When this is done, this downcall is made.
+
+
+ 55..33.. PPUURRGGEEUUSSEERR
+
+
+ AArrgguummeennttss
+
+ struct cfs_purgeuser_out {/* CFS_PURGEUSER is a venus->kernel call */
+ struct CodaCred cred;
+ } cfs_purgeuser;
+
+
+
+ DDeessccrriippttiioonn Remove all entries in the cache carrying the Cred. This
+ call is issued when tokens for a user expire or are flushed.
+
+
+ 55..44.. ZZAAPPFFIILLEE
+
+
+ AArrgguummeennttss
+
+ struct cfs_zapfile_out { /* CFS_ZAPFILE is a venus->kernel call */
+ ViceFid CodaFid;
+ } cfs_zapfile;
+
+
+
+ DDeessccrriippttiioonn Remove all entries which have the (dir vnode, name) pair.
+ This is issued as a result of an invalidation of cached attributes of
+ a vnode.
+
+ NNOOTTEE Call is not named correctly in NetBSD and Mach. The minicache
+ zapfile routine takes different arguments. Linux does not implement
+ the invalidation of attributes correctly.
+
+
+
+ 55..55.. ZZAAPPDDIIRR
+
+
+ AArrgguummeennttss
+
+ struct cfs_zapdir_out { /* CFS_ZAPDIR is a venus->kernel call */
+ ViceFid CodaFid;
+ } cfs_zapdir;
+
+
+
+ DDeessccrriippttiioonn Remove all entries in the cache lying in a directory
+ CodaFid, and all children of this directory. This call is issued when
+ Venus receives a callback on the directory.
+
+
+ 55..66.. ZZAAPPVVNNOODDEE
+
+
+
+ AArrgguummeennttss
+
+ struct cfs_zapvnode_out { /* CFS_ZAPVNODE is a venus->kernel call */
+ struct CodaCred cred;
+ ViceFid VFid;
+ } cfs_zapvnode;
+
+
+
+ DDeessccrriippttiioonn Remove all entries in the cache carrying the cred and VFid
+ as in the arguments. This downcall is probably never issued.
+
+
+ 55..77.. PPUURRGGEEFFIIDD
+
+
+ SSuummmmaarryy
+
+ AArrgguummeennttss
+
+ struct cfs_purgefid_out { /* CFS_PURGEFID is a venus->kernel call */
+ ViceFid CodaFid;
+ } cfs_purgefid;
+
+
+
+ DDeessccrriippttiioonn Flush the attribute for the file. If it is a dir (odd
+ vnode), purge its children from the namecache and remove the file from the
+ namecache.
+
+
+
+ 55..88.. RREEPPLLAACCEE
+
+
+ SSuummmmaarryy Replace the Fid's for a collection of names.
+
+ AArrgguummeennttss
+
+ struct cfs_replace_out { /* cfs_replace is a venus->kernel call */
+ ViceFid NewFid;
+ ViceFid OldFid;
+ } cfs_replace;
+
+
+
+ DDeessccrriippttiioonn This routine replaces a ViceFid in the name cache with
+ another. It is added to allow Venus during reintegration to replace
+ locally allocated temp fids while disconnected with global fids even
+ when the reference counts on those fids are not zero.
+
+ 0wpage
+
+ 66.. IInniittiiaalliizzaattiioonn aanndd cclleeaannuupp
+
+
+ This section gives brief hints as to desirable features for the Coda
+ FS Driver at startup and upon shutdown or Venus failures. Before
+ entering the discussion it is useful to repeat that the Coda FS Driver
+ maintains the following data:
+
+
+ 1. message queues
+
+ 2. cnodes
+
+ 3. name cache entries
+
+ The name cache entries are entirely private to the driver, so they
+ can easily be manipulated. The message queues will generally have
+ clear points of initialization and destruction. The cnodes are
+ much more delicate. User processes hold reference counts in Coda
+ filesystems and it can be difficult to clean up the cnodes.
+
+ It can expect requests through:
+
+ 1. the message subsystem
+
+ 2. the VFS layer
+
+ 3. pioctl interface
+
+ Currently the _p_i_o_c_t_l passes through the VFS for Coda so we can
+ treat these similarly.
+
+
+ 66..11.. RReeqquuiirreemmeennttss
+
+
+ The following requirements should be accommodated:
+
+ 1. The message queues should have open and close routines. On Unix
+ the opening of the character devices are such routines.
+
+ +o Before opening, no messages can be placed.
+
+ +o Opening will remove any old messages still pending.
+
+ +o Close will notify any sleeping processes that their upcall cannot
+ be completed.
+
+ +o Close will free all memory allocated by the message queues.
+
+
+ 2. At open the namecache shall be initialized to empty state.
+
+ 3. Before the message queues are open, all VFS operations will fail.
+ Fortunately this can be achieved by making sure than mounting the
+ Coda filesystem cannot succeed before opening.
+
+ 4. After closing of the queues, no VFS operations can succeed. Here
+ one needs to be careful, since a few operations (lookup,
+ read/write, readdir) can proceed without upcalls. These must be
+ explicitly blocked.
+
+ 5. Upon closing the namecache shall be flushed and disabled.
+
+ 6. All memory held by cnodes can be freed without relying on upcalls.
+
+ 7. Unmounting the file system can be done without relying on upcalls.
+
+ 8. Mounting the Coda filesystem should fail gracefully if Venus cannot
+ get the rootfid or the attributes of the rootfid. The latter is
+ best implemented by Venus fetching these objects before attempting
+ to mount.
+
+ NNOOTTEE NetBSD in particular but also Linux have not implemented the
+ above requirements fully. For smooth operation this needs to be
+ corrected.
+
+
+
diff --git a/Documentation/filesystems/configfs/Makefile b/Documentation/filesystems/configfs/Makefile
new file mode 100644
index 000000000..be7ec5e67
--- /dev/null
+++ b/Documentation/filesystems/configfs/Makefile
@@ -0,0 +1,3 @@
+ifneq ($(CONFIG_CONFIGFS_FS),)
+obj-m += configfs_example_explicit.o configfs_example_macros.o
+endif
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
new file mode 100644
index 000000000..b40fec9d3
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -0,0 +1,484 @@
+
+configfs - Userspace-driven kernel object configuration.
+
+Joel Becker <joel.becker@oracle.com>
+
+Updated: 31 March 2005
+
+Copyright (c) 2005 Oracle Corporation,
+ Joel Becker <joel.becker@oracle.com>
+
+
+[What is configfs?]
+
+configfs is a ram-based filesystem that provides the converse of
+sysfs's functionality. Where sysfs is a filesystem-based view of
+kernel objects, configfs is a filesystem-based manager of kernel
+objects, or config_items.
+
+With sysfs, an object is created in kernel (for example, when a device
+is discovered) and it is registered with sysfs. Its attributes then
+appear in sysfs, allowing userspace to read the attributes via
+readdir(3)/read(2). It may allow some attributes to be modified via
+write(2). The important point is that the object is created and
+destroyed in kernel, the kernel controls the lifecycle of the sysfs
+representation, and sysfs is merely a window on all this.
+
+A configfs config_item is created via an explicit userspace operation:
+mkdir(2). It is destroyed via rmdir(2). The attributes appear at
+mkdir(2) time, and can be read or modified via read(2) and write(2).
+As with sysfs, readdir(3) queries the list of items and/or attributes.
+symlink(2) can be used to group items together. Unlike sysfs, the
+lifetime of the representation is completely driven by userspace. The
+kernel modules backing the items must respond to this.
+
+Both sysfs and configfs can and should exist together on the same
+system. One is not a replacement for the other.
+
+[Using configfs]
+
+configfs can be compiled as a module or into the kernel. You can access
+it by doing
+
+ mount -t configfs none /config
+
+The configfs tree will be empty unless client modules are also loaded.
+These are modules that register their item types with configfs as
+subsystems. Once a client subsystem is loaded, it will appear as a
+subdirectory (or more than one) under /config. Like sysfs, the
+configfs tree is always there, whether mounted on /config or not.
+
+An item is created via mkdir(2). The item's attributes will also
+appear at this time. readdir(3) can determine what the attributes are,
+read(2) can query their default values, and write(2) can store new
+values. Like sysfs, attributes should be ASCII text files, preferably
+with only one value per file. The same efficiency caveats from sysfs
+apply. Don't mix more than one attribute in one attribute file.
+
+Like sysfs, configfs expects write(2) to store the entire buffer at
+once. When writing to configfs attributes, userspace processes should
+first read the entire file, modify the portions they wish to change, and
+then write the entire buffer back. Attribute files have a maximum size
+of one page (PAGE_SIZE, 4096 on i386).
+
+When an item needs to be destroyed, remove it with rmdir(2). An
+item cannot be destroyed if any other item has a link to it (via
+symlink(2)). Links can be removed via unlink(2).
+
+[Configuring FakeNBD: an Example]
+
+Imagine there's a Network Block Device (NBD) driver that allows you to
+access remote block devices. Call it FakeNBD. FakeNBD uses configfs
+for its configuration. Obviously, there will be a nice program that
+sysadmins use to configure FakeNBD, but somehow that program has to tell
+the driver about it. Here's where configfs comes in.
+
+When the FakeNBD driver is loaded, it registers itself with configfs.
+readdir(3) sees this just fine:
+
+ # ls /config
+ fakenbd
+
+A fakenbd connection can be created with mkdir(2). The name is
+arbitrary, but likely the tool will make some use of the name. Perhaps
+it is a uuid or a disk name:
+
+ # mkdir /config/fakenbd/disk1
+ # ls /config/fakenbd/disk1
+ target device rw
+
+The target attribute contains the IP address of the server FakeNBD will
+connect to. The device attribute is the device on the server.
+Predictably, the rw attribute determines whether the connection is
+read-only or read-write.
+
+ # echo 10.0.0.1 > /config/fakenbd/disk1/target
+ # echo /dev/sda1 > /config/fakenbd/disk1/device
+ # echo 1 > /config/fakenbd/disk1/rw
+
+That's it. That's all there is. Now the device is configured, via the
+shell no less.
+
+[Coding With configfs]
+
+Every object in configfs is a config_item. A config_item reflects an
+object in the subsystem. It has attributes that match values on that
+object. configfs handles the filesystem representation of that object
+and its attributes, allowing the subsystem to ignore all but the
+basic show/store interaction.
+
+Items are created and destroyed inside a config_group. A group is a
+collection of items that share the same attributes and operations.
+Items are created by mkdir(2) and removed by rmdir(2), but configfs
+handles that. The group has a set of operations to perform these tasks
+
+A subsystem is the top level of a client module. During initialization,
+the client module registers the subsystem with configfs, the subsystem
+appears as a directory at the top of the configfs filesystem. A
+subsystem is also a config_group, and can do everything a config_group
+can.
+
+[struct config_item]
+
+ struct config_item {
+ char *ci_name;
+ char ci_namebuf[UOBJ_NAME_LEN];
+ struct kref ci_kref;
+ struct list_head ci_entry;
+ struct config_item *ci_parent;
+ struct config_group *ci_group;
+ struct config_item_type *ci_type;
+ struct dentry *ci_dentry;
+ };
+
+ void config_item_init(struct config_item *);
+ void config_item_init_type_name(struct config_item *,
+ const char *name,
+ struct config_item_type *type);
+ struct config_item *config_item_get(struct config_item *);
+ void config_item_put(struct config_item *);
+
+Generally, struct config_item is embedded in a container structure, a
+structure that actually represents what the subsystem is doing. The
+config_item portion of that structure is how the object interacts with
+configfs.
+
+Whether statically defined in a source file or created by a parent
+config_group, a config_item must have one of the _init() functions
+called on it. This initializes the reference count and sets up the
+appropriate fields.
+
+All users of a config_item should have a reference on it via
+config_item_get(), and drop the reference when they are done via
+config_item_put().
+
+By itself, a config_item cannot do much more than appear in configfs.
+Usually a subsystem wants the item to display and/or store attributes,
+among other things. For that, it needs a type.
+
+[struct config_item_type]
+
+ struct configfs_item_operations {
+ void (*release)(struct config_item *);
+ ssize_t (*show_attribute)(struct config_item *,
+ struct configfs_attribute *,
+ char *);
+ ssize_t (*store_attribute)(struct config_item *,
+ struct configfs_attribute *,
+ const char *, size_t);
+ int (*allow_link)(struct config_item *src,
+ struct config_item *target);
+ int (*drop_link)(struct config_item *src,
+ struct config_item *target);
+ };
+
+ struct config_item_type {
+ struct module *ct_owner;
+ struct configfs_item_operations *ct_item_ops;
+ struct configfs_group_operations *ct_group_ops;
+ struct configfs_attribute **ct_attrs;
+ };
+
+The most basic function of a config_item_type is to define what
+operations can be performed on a config_item. All items that have been
+allocated dynamically will need to provide the ct_item_ops->release()
+method. This method is called when the config_item's reference count
+reaches zero. Items that wish to display an attribute need to provide
+the ct_item_ops->show_attribute() method. Similarly, storing a new
+attribute value uses the store_attribute() method.
+
+[struct configfs_attribute]
+
+ struct configfs_attribute {
+ char *ca_name;
+ struct module *ca_owner;
+ umode_t ca_mode;
+ };
+
+When a config_item wants an attribute to appear as a file in the item's
+configfs directory, it must define a configfs_attribute describing it.
+It then adds the attribute to the NULL-terminated array
+config_item_type->ct_attrs. When the item appears in configfs, the
+attribute file will appear with the configfs_attribute->ca_name
+filename. configfs_attribute->ca_mode specifies the file permissions.
+
+If an attribute is readable and the config_item provides a
+ct_item_ops->show_attribute() method, that method will be called
+whenever userspace asks for a read(2) on the attribute. The converse
+will happen for write(2).
+
+[struct config_group]
+
+A config_item cannot live in a vacuum. The only way one can be created
+is via mkdir(2) on a config_group. This will trigger creation of a
+child item.
+
+ struct config_group {
+ struct config_item cg_item;
+ struct list_head cg_children;
+ struct configfs_subsystem *cg_subsys;
+ struct config_group **default_groups;
+ };
+
+ void config_group_init(struct config_group *group);
+ void config_group_init_type_name(struct config_group *group,
+ const char *name,
+ struct config_item_type *type);
+
+
+The config_group structure contains a config_item. Properly configuring
+that item means that a group can behave as an item in its own right.
+However, it can do more: it can create child items or groups. This is
+accomplished via the group operations specified on the group's
+config_item_type.
+
+ struct configfs_group_operations {
+ struct config_item *(*make_item)(struct config_group *group,
+ const char *name);
+ struct config_group *(*make_group)(struct config_group *group,
+ const char *name);
+ int (*commit_item)(struct config_item *item);
+ void (*disconnect_notify)(struct config_group *group,
+ struct config_item *item);
+ void (*drop_item)(struct config_group *group,
+ struct config_item *item);
+ };
+
+A group creates child items by providing the
+ct_group_ops->make_item() method. If provided, this method is called from mkdir(2) in the group's directory. The subsystem allocates a new
+config_item (or more likely, its container structure), initializes it,
+and returns it to configfs. Configfs will then populate the filesystem
+tree to reflect the new item.
+
+If the subsystem wants the child to be a group itself, the subsystem
+provides ct_group_ops->make_group(). Everything else behaves the same,
+using the group _init() functions on the group.
+
+Finally, when userspace calls rmdir(2) on the item or group,
+ct_group_ops->drop_item() is called. As a config_group is also a
+config_item, it is not necessary for a separate drop_group() method.
+The subsystem must config_item_put() the reference that was initialized
+upon item allocation. If a subsystem has no work to do, it may omit
+the ct_group_ops->drop_item() method, and configfs will call
+config_item_put() on the item on behalf of the subsystem.
+
+IMPORTANT: drop_item() is void, and as such cannot fail. When rmdir(2)
+is called, configfs WILL remove the item from the filesystem tree
+(assuming that it has no children to keep it busy). The subsystem is
+responsible for responding to this. If the subsystem has references to
+the item in other threads, the memory is safe. It may take some time
+for the item to actually disappear from the subsystem's usage. But it
+is gone from configfs.
+
+When drop_item() is called, the item's linkage has already been torn
+down. It no longer has a reference on its parent and has no place in
+the item hierarchy. If a client needs to do some cleanup before this
+teardown happens, the subsystem can implement the
+ct_group_ops->disconnect_notify() method. The method is called after
+configfs has removed the item from the filesystem view but before the
+item is removed from its parent group. Like drop_item(),
+disconnect_notify() is void and cannot fail. Client subsystems should
+not drop any references here, as they still must do it in drop_item().
+
+A config_group cannot be removed while it still has child items. This
+is implemented in the configfs rmdir(2) code. ->drop_item() will not be
+called, as the item has not been dropped. rmdir(2) will fail, as the
+directory is not empty.
+
+[struct configfs_subsystem]
+
+A subsystem must register itself, usually at module_init time. This
+tells configfs to make the subsystem appear in the file tree.
+
+ struct configfs_subsystem {
+ struct config_group su_group;
+ struct mutex su_mutex;
+ };
+
+ int configfs_register_subsystem(struct configfs_subsystem *subsys);
+ void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+
+ A subsystem consists of a toplevel config_group and a mutex.
+The group is where child config_items are created. For a subsystem,
+this group is usually defined statically. Before calling
+configfs_register_subsystem(), the subsystem must have initialized the
+group via the usual group _init() functions, and it must also have
+initialized the mutex.
+ When the register call returns, the subsystem is live, and it
+will be visible via configfs. At that point, mkdir(2) can be called and
+the subsystem must be ready for it.
+
+[An Example]
+
+The best example of these basic concepts is the simple_children
+subsystem/group and the simple_child item in configfs_example_explicit.c
+and configfs_example_macros.c. It shows a trivial object displaying and
+storing an attribute, and a simple group creating and destroying these
+children.
+
+The only difference between configfs_example_explicit.c and
+configfs_example_macros.c is how the attributes of the childless item
+are defined. The childless item has extended attributes, each with
+their own show()/store() operation. This follows a convention commonly
+used in sysfs. configfs_example_explicit.c creates these attributes
+by explicitly defining the structures involved. Conversely
+configfs_example_macros.c uses some convenience macros from configfs.h
+to define the attributes. These macros are similar to their sysfs
+counterparts.
+
+[Hierarchy Navigation and the Subsystem Mutex]
+
+There is an extra bonus that configfs provides. The config_groups and
+config_items are arranged in a hierarchy due to the fact that they
+appear in a filesystem. A subsystem is NEVER to touch the filesystem
+parts, but the subsystem might be interested in this hierarchy. For
+this reason, the hierarchy is mirrored via the config_group->cg_children
+and config_item->ci_parent structure members.
+
+A subsystem can navigate the cg_children list and the ci_parent pointer
+to see the tree created by the subsystem. This can race with configfs'
+management of the hierarchy, so configfs uses the subsystem mutex to
+protect modifications. Whenever a subsystem wants to navigate the
+hierarchy, it must do so under the protection of the subsystem
+mutex.
+
+A subsystem will be prevented from acquiring the mutex while a newly
+allocated item has not been linked into this hierarchy. Similarly, it
+will not be able to acquire the mutex while a dropping item has not
+yet been unlinked. This means that an item's ci_parent pointer will
+never be NULL while the item is in configfs, and that an item will only
+be in its parent's cg_children list for the same duration. This allows
+a subsystem to trust ci_parent and cg_children while they hold the
+mutex.
+
+[Item Aggregation Via symlink(2)]
+
+configfs provides a simple group via the group->item parent/child
+relationship. Often, however, a larger environment requires aggregation
+outside of the parent/child connection. This is implemented via
+symlink(2).
+
+A config_item may provide the ct_item_ops->allow_link() and
+ct_item_ops->drop_link() methods. If the ->allow_link() method exists,
+symlink(2) may be called with the config_item as the source of the link.
+These links are only allowed between configfs config_items. Any
+symlink(2) attempt outside the configfs filesystem will be denied.
+
+When symlink(2) is called, the source config_item's ->allow_link()
+method is called with itself and a target item. If the source item
+allows linking to target item, it returns 0. A source item may wish to
+reject a link if it only wants links to a certain type of object (say,
+in its own subsystem).
+
+When unlink(2) is called on the symbolic link, the source item is
+notified via the ->drop_link() method. Like the ->drop_item() method,
+this is a void function and cannot return failure. The subsystem is
+responsible for responding to the change.
+
+A config_item cannot be removed while it links to any other item, nor
+can it be removed while an item links to it. Dangling symlinks are not
+allowed in configfs.
+
+[Automatically Created Subgroups]
+
+A new config_group may want to have two types of child config_items.
+While this could be codified by magic names in ->make_item(), it is much
+more explicit to have a method whereby userspace sees this divergence.
+
+Rather than have a group where some items behave differently than
+others, configfs provides a method whereby one or many subgroups are
+automatically created inside the parent at its creation. Thus,
+mkdir("parent") results in "parent", "parent/subgroup1", up through
+"parent/subgroupN". Items of type 1 can now be created in
+"parent/subgroup1", and items of type N can be created in
+"parent/subgroupN".
+
+These automatic subgroups, or default groups, do not preclude other
+children of the parent group. If ct_group_ops->make_group() exists,
+other child groups can be created on the parent group directly.
+
+A configfs subsystem specifies default groups by filling in the
+NULL-terminated array default_groups on the config_group structure.
+Each group in that array is populated in the configfs tree at the same
+time as the parent group. Similarly, they are removed at the same time
+as the parent. No extra notification is provided. When a ->drop_item()
+method call notifies the subsystem the parent group is going away, it
+also means every default group child associated with that parent group.
+
+As a consequence of this, default_groups cannot be removed directly via
+rmdir(2). They also are not considered when rmdir(2) on the parent
+group is checking for children.
+
+[Dependent Subsystems]
+
+Sometimes other drivers depend on particular configfs items. For
+example, ocfs2 mounts depend on a heartbeat region item. If that
+region item is removed with rmdir(2), the ocfs2 mount must BUG or go
+readonly. Not happy.
+
+configfs provides two additional API calls: configfs_depend_item() and
+configfs_undepend_item(). A client driver can call
+configfs_depend_item() on an existing item to tell configfs that it is
+depended on. configfs will then return -EBUSY from rmdir(2) for that
+item. When the item is no longer depended on, the client driver calls
+configfs_undepend_item() on it.
+
+These API cannot be called underneath any configfs callbacks, as
+they will conflict. They can block and allocate. A client driver
+probably shouldn't calling them of its own gumption. Rather it should
+be providing an API that external subsystems call.
+
+How does this work? Imagine the ocfs2 mount process. When it mounts,
+it asks for a heartbeat region item. This is done via a call into the
+heartbeat code. Inside the heartbeat code, the region item is looked
+up. Here, the heartbeat code calls configfs_depend_item(). If it
+succeeds, then heartbeat knows the region is safe to give to ocfs2.
+If it fails, it was being torn down anyway, and heartbeat can gracefully
+pass up an error.
+
+[Committable Items]
+
+NOTE: Committable items are currently unimplemented.
+
+Some config_items cannot have a valid initial state. That is, no
+default values can be specified for the item's attributes such that the
+item can do its work. Userspace must configure one or more attributes,
+after which the subsystem can start whatever entity this item
+represents.
+
+Consider the FakeNBD device from above. Without a target address *and*
+a target device, the subsystem has no idea what block device to import.
+The simple example assumes that the subsystem merely waits until all the
+appropriate attributes are configured, and then connects. This will,
+indeed, work, but now every attribute store must check if the attributes
+are initialized. Every attribute store must fire off the connection if
+that condition is met.
+
+Far better would be an explicit action notifying the subsystem that the
+config_item is ready to go. More importantly, an explicit action allows
+the subsystem to provide feedback as to whether the attributes are
+initialized in a way that makes sense. configfs provides this as
+committable items.
+
+configfs still uses only normal filesystem operations. An item is
+committed via rename(2). The item is moved from a directory where it
+can be modified to a directory where it cannot.
+
+Any group that provides the ct_group_ops->commit_item() method has
+committable items. When this group appears in configfs, mkdir(2) will
+not work directly in the group. Instead, the group will have two
+subdirectories: "live" and "pending". The "live" directory does not
+support mkdir(2) or rmdir(2) either. It only allows rename(2). The
+"pending" directory does allow mkdir(2) and rmdir(2). An item is
+created in the "pending" directory. Its attributes can be modified at
+will. Userspace commits the item by renaming it into the "live"
+directory. At this point, the subsystem receives the ->commit_item()
+callback. If all required attributes are filled to satisfaction, the
+method returns zero and the item is moved to the "live" directory.
+
+As rmdir(2) does not work in the "live" directory, an item must be
+shutdown, or "uncommitted". Again, this is done via rename(2), this
+time from the "live" directory back to the "pending" one. The subsystem
+is notified by the ct_group_ops->uncommit_object() method.
+
+
diff --git a/Documentation/filesystems/configfs/configfs_example_explicit.c b/Documentation/filesystems/configfs/configfs_example_explicit.c
new file mode 100644
index 000000000..1420233df
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example_explicit.c
@@ -0,0 +1,483 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_explicit.c - This file is a demonstration module
+ * containing a number of configfs subsystems. It explicitly defines
+ * each structure without using the helper macros defined in
+ * configfs.h.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem. It cannot create
+ * any config_items. It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem. See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+ struct configfs_subsystem subsys;
+ int showme;
+ int storeme;
+};
+
+struct childless_attribute {
+ struct configfs_attribute attr;
+ ssize_t (*show)(struct childless *, char *);
+ ssize_t (*store)(struct childless *, const char *, size_t);
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+ return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_read(struct childless *childless,
+ char *page)
+{
+ ssize_t pos;
+
+ pos = sprintf(page, "%d\n", childless->showme);
+ childless->showme++;
+
+ return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+ char *page)
+{
+ return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+ const char *page,
+ size_t count)
+{
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if ((*p != '\0') && (*p != '\n'))
+ return -EINVAL;
+
+ if (tmp > INT_MAX)
+ return -ERANGE;
+
+ childless->storeme = tmp;
+
+ return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+ char *page)
+{
+ return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs. It does not support the creation of child config_items.\n"
+"It only has a few attributes. In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+static struct childless_attribute childless_attr_showme = {
+ .attr = { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
+ .show = childless_showme_read,
+};
+static struct childless_attribute childless_attr_storeme = {
+ .attr = { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
+ .show = childless_storeme_read,
+ .store = childless_storeme_write,
+};
+static struct childless_attribute childless_attr_description = {
+ .attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
+ .show = childless_description_read,
+};
+
+static struct configfs_attribute *childless_attrs[] = {
+ &childless_attr_showme.attr,
+ &childless_attr_storeme.attr,
+ &childless_attr_description.attr,
+ NULL,
+};
+
+static ssize_t childless_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ struct childless *childless = to_childless(item);
+ struct childless_attribute *childless_attr =
+ container_of(attr, struct childless_attribute, attr);
+ ssize_t ret = 0;
+
+ if (childless_attr->show)
+ ret = childless_attr->show(childless, page);
+ return ret;
+}
+
+static ssize_t childless_attr_store(struct config_item *item,
+ struct configfs_attribute *attr,
+ const char *page, size_t count)
+{
+ struct childless *childless = to_childless(item);
+ struct childless_attribute *childless_attr =
+ container_of(attr, struct childless_attribute, attr);
+ ssize_t ret = -EINVAL;
+
+ if (childless_attr->store)
+ ret = childless_attr->store(childless, page, count);
+ return ret;
+}
+
+static struct configfs_item_operations childless_item_ops = {
+ .show_attribute = childless_attr_show,
+ .store_attribute = childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+ .ct_item_ops = &childless_item_ops,
+ .ct_attrs = childless_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+ .subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "01-childless",
+ .ci_type = &childless_type,
+ },
+ },
+ },
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child. Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go. Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+ struct config_item item;
+ int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+ return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "storeme",
+ .ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+ &simple_child_attr_storeme,
+ NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ ssize_t count;
+ struct simple_child *simple_child = to_simple_child(item);
+
+ count = sprintf(page, "%d\n", simple_child->storeme);
+
+ return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+ struct configfs_attribute *attr,
+ const char *page, size_t count)
+{
+ struct simple_child *simple_child = to_simple_child(item);
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ if (tmp > INT_MAX)
+ return -ERANGE;
+
+ simple_child->storeme = tmp;
+
+ return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+ kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+ .release = simple_child_release,
+ .show_attribute = simple_child_attr_show,
+ .store_attribute = simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+ .ct_item_ops = &simple_child_item_ops,
+ .ct_attrs = simple_child_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+
+struct simple_children {
+ struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+ return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+ struct simple_child *simple_child;
+
+ simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+ if (!simple_child)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&simple_child->item, name,
+ &simple_child_type);
+
+ simple_child->storeme = 0;
+
+ return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "description",
+ .ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+ &simple_children_attr_description,
+ NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items. These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static void simple_children_release(struct config_item *item)
+{
+ kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+ .release = simple_children_release,
+ .show_attribute = simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+ .make_item = simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+ .ct_item_ops = &simple_children_item_ops,
+ .ct_group_ops = &simple_children_group_ops,
+ .ct_attrs = simple_children_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "02-simple-children",
+ .ci_type = &simple_children_type,
+ },
+ },
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above. However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem. Creation of a group in the subsystem creates
+ * a new simple_children group. That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+ struct simple_children *simple_children;
+
+ simple_children = kzalloc(sizeof(struct simple_children),
+ GFP_KERNEL);
+ if (!simple_children)
+ return ERR_PTR(-ENOMEM);
+
+ config_group_init_type_name(&simple_children->group, name,
+ &simple_children_type);
+
+ return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "description",
+ .ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+ &group_children_attr_description,
+ NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups. These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+ .show_attribute = group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+ .make_group = group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+ .ct_item_ops = &group_children_item_ops,
+ .ct_group_ops = &group_children_group_ops,
+ .ct_attrs = group_children_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "03-group-children",
+ .ci_type = &group_children_type,
+ },
+ },
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all. It
+ * allows the init function to easily register them. Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+ &childless_subsys.subsys,
+ &simple_children_subsys,
+ &group_children_subsys,
+ NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+ int ret;
+ int i;
+ struct configfs_subsystem *subsys;
+
+ for (i = 0; example_subsys[i]; i++) {
+ subsys = example_subsys[i];
+
+ config_group_init(&subsys->su_group);
+ mutex_init(&subsys->su_mutex);
+ ret = configfs_register_subsystem(subsys);
+ if (ret) {
+ printk(KERN_ERR "Error %d while registering subsystem %s\n",
+ ret,
+ subsys->su_group.cg_item.ci_namebuf);
+ goto out_unregister;
+ }
+ }
+
+ return 0;
+
+out_unregister:
+ for (i--; i >= 0; i--)
+ configfs_unregister_subsystem(example_subsys[i]);
+
+ return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+ int i;
+
+ for (i = 0; example_subsys[i]; i++)
+ configfs_unregister_subsystem(example_subsys[i]);
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/configfs/configfs_example_macros.c b/Documentation/filesystems/configfs/configfs_example_macros.c
new file mode 100644
index 000000000..327dfbc64
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example_macros.c
@@ -0,0 +1,446 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example_macros.c - This file is a demonstration module
+ * containing a number of configfs subsystems. It uses the helper
+ * macros defined by configfs.h
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle. All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem. It cannot create
+ * any config_items. It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem. See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+ struct configfs_subsystem subsys;
+ int showme;
+ int storeme;
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+ return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+CONFIGFS_ATTR_STRUCT(childless);
+#define CHILDLESS_ATTR(_name, _mode, _show, _store) \
+struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR(_name, _mode, _show, _store)
+#define CHILDLESS_ATTR_RO(_name, _show) \
+struct childless_attribute childless_attr_##_name = __CONFIGFS_ATTR_RO(_name, _show);
+
+static ssize_t childless_showme_read(struct childless *childless,
+ char *page)
+{
+ ssize_t pos;
+
+ pos = sprintf(page, "%d\n", childless->showme);
+ childless->showme++;
+
+ return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+ char *page)
+{
+ return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+ const char *page,
+ size_t count)
+{
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ if (tmp > INT_MAX)
+ return -ERANGE;
+
+ childless->storeme = tmp;
+
+ return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+ char *page)
+{
+ return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs. It does not support the creation of child config_items.\n"
+"It only has a few attributes. In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+CHILDLESS_ATTR_RO(showme, childless_showme_read);
+CHILDLESS_ATTR(storeme, S_IRUGO | S_IWUSR, childless_storeme_read,
+ childless_storeme_write);
+CHILDLESS_ATTR_RO(description, childless_description_read);
+
+static struct configfs_attribute *childless_attrs[] = {
+ &childless_attr_showme.attr,
+ &childless_attr_storeme.attr,
+ &childless_attr_description.attr,
+ NULL,
+};
+
+CONFIGFS_ATTR_OPS(childless);
+static struct configfs_item_operations childless_item_ops = {
+ .show_attribute = childless_attr_show,
+ .store_attribute = childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+ .ct_item_ops = &childless_item_ops,
+ .ct_attrs = childless_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+ .subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "01-childless",
+ .ci_type = &childless_type,
+ },
+ },
+ },
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child. Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go. Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+ struct config_item item;
+ int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+ return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "storeme",
+ .ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+ &simple_child_attr_storeme,
+ NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ ssize_t count;
+ struct simple_child *simple_child = to_simple_child(item);
+
+ count = sprintf(page, "%d\n", simple_child->storeme);
+
+ return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+ struct configfs_attribute *attr,
+ const char *page, size_t count)
+{
+ struct simple_child *simple_child = to_simple_child(item);
+ unsigned long tmp;
+ char *p = (char *) page;
+
+ tmp = simple_strtoul(p, &p, 10);
+ if (!p || (*p && (*p != '\n')))
+ return -EINVAL;
+
+ if (tmp > INT_MAX)
+ return -ERANGE;
+
+ simple_child->storeme = tmp;
+
+ return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+ kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+ .release = simple_child_release,
+ .show_attribute = simple_child_attr_show,
+ .store_attribute = simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+ .ct_item_ops = &simple_child_item_ops,
+ .ct_attrs = simple_child_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+
+struct simple_children {
+ struct config_group group;
+};
+
+static inline struct simple_children *to_simple_children(struct config_item *item)
+{
+ return item ? container_of(to_config_group(item), struct simple_children, group) : NULL;
+}
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+ struct simple_child *simple_child;
+
+ simple_child = kzalloc(sizeof(struct simple_child), GFP_KERNEL);
+ if (!simple_child)
+ return ERR_PTR(-ENOMEM);
+
+ config_item_init_type_name(&simple_child->item, name,
+ &simple_child_type);
+
+ simple_child->storeme = 0;
+
+ return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "description",
+ .ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+ &simple_children_attr_description,
+ NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items. These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static void simple_children_release(struct config_item *item)
+{
+ kfree(to_simple_children(item));
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+ .release = simple_children_release,
+ .show_attribute = simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+ .make_item = simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+ .ct_item_ops = &simple_children_item_ops,
+ .ct_group_ops = &simple_children_group_ops,
+ .ct_attrs = simple_children_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "02-simple-children",
+ .ci_type = &simple_children_type,
+ },
+ },
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above. However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem. Creation of a group in the subsystem creates
+ * a new simple_children group. That group can then have simple_child
+ * children of its own.
+ */
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+ struct simple_children *simple_children;
+
+ simple_children = kzalloc(sizeof(struct simple_children),
+ GFP_KERNEL);
+ if (!simple_children)
+ return ERR_PTR(-ENOMEM);
+
+ config_group_init_type_name(&simple_children->group, name,
+ &simple_children_type);
+
+ return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+ .ca_owner = THIS_MODULE,
+ .ca_name = "description",
+ .ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+ &group_children_attr_description,
+ NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+ struct configfs_attribute *attr,
+ char *page)
+{
+ return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups. These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+ .show_attribute = group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+ .make_group = group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+ .ct_item_ops = &group_children_item_ops,
+ .ct_group_ops = &group_children_group_ops,
+ .ct_attrs = group_children_attrs,
+ .ct_owner = THIS_MODULE,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+ .su_group = {
+ .cg_item = {
+ .ci_namebuf = "03-group-children",
+ .ci_type = &group_children_type,
+ },
+ },
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all. It
+ * allows the init function to easily register them. Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+ &childless_subsys.subsys,
+ &simple_children_subsys,
+ &group_children_subsys,
+ NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+ int ret;
+ int i;
+ struct configfs_subsystem *subsys;
+
+ for (i = 0; example_subsys[i]; i++) {
+ subsys = example_subsys[i];
+
+ config_group_init(&subsys->su_group);
+ mutex_init(&subsys->su_mutex);
+ ret = configfs_register_subsystem(subsys);
+ if (ret) {
+ printk(KERN_ERR "Error %d while registering subsystem %s\n",
+ ret,
+ subsys->su_group.cg_item.ci_namebuf);
+ goto out_unregister;
+ }
+ }
+
+ return 0;
+
+out_unregister:
+ for (i--; i >= 0; i--)
+ configfs_unregister_subsystem(example_subsys[i]);
+
+ return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+ int i;
+
+ for (i = 0; example_subsys[i]; i++)
+ configfs_unregister_subsystem(example_subsys[i]);
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/Documentation/filesystems/cramfs.txt b/Documentation/filesystems/cramfs.txt
new file mode 100644
index 000000000..31f53f0ab
--- /dev/null
+++ b/Documentation/filesystems/cramfs.txt
@@ -0,0 +1,76 @@
+
+ Cramfs - cram a filesystem onto a small ROM
+
+cramfs is designed to be simple and small, and to compress things well.
+
+It uses the zlib routines to compress a file one page at a time, and
+allows random page access. The meta-data is not compressed, but is
+expressed in a very terse representation to make it use much less
+diskspace than traditional filesystems.
+
+You can't write to a cramfs filesystem (making it compressible and
+compact also makes it _very_ hard to update on-the-fly), so you have to
+create the disk image with the "mkcramfs" utility.
+
+
+Usage Notes
+-----------
+
+File sizes are limited to less than 16MB.
+
+Maximum filesystem size is a little over 256MB. (The last file on the
+filesystem is allowed to extend past 256MB.)
+
+Only the low 8 bits of gid are stored. The current version of
+mkcramfs simply truncates to 8 bits, which is a potential security
+issue.
+
+Hard links are supported, but hard linked files
+will still have a link count of 1 in the cramfs image.
+
+Cramfs directories have no `.' or `..' entries. Directories (like
+every other file on cramfs) always have a link count of 1. (There's
+no need to use -noleaf in `find', btw.)
+
+No timestamps are stored in a cramfs, so these default to the epoch
+(1970 GMT). Recently-accessed files may have updated timestamps, but
+the update lasts only as long as the inode is cached in memory, after
+which the timestamp reverts to 1970, i.e. moves backwards in time.
+
+Currently, cramfs must be written and read with architectures of the
+same endianness, and can be read only by kernels with PAGE_CACHE_SIZE
+== 4096. At least the latter of these is a bug, but it hasn't been
+decided what the best fix is. For the moment if you have larger pages
+you can just change the #define in mkcramfs.c, so long as you don't
+mind the filesystem becoming unreadable to future kernels.
+
+
+For /usr/share/magic
+--------------------
+
+0 ulelong 0x28cd3d45 Linux cramfs offset 0
+>4 ulelong x size %d
+>8 ulelong x flags 0x%x
+>12 ulelong x future 0x%x
+>16 string >\0 signature "%.16s"
+>32 ulelong x fsid.crc 0x%x
+>36 ulelong x fsid.edition %d
+>40 ulelong x fsid.blocks %d
+>44 ulelong x fsid.files %d
+>48 string >\0 name "%.16s"
+512 ulelong 0x28cd3d45 Linux cramfs offset 512
+>516 ulelong x size %d
+>520 ulelong x flags 0x%x
+>524 ulelong x future 0x%x
+>528 string >\0 signature "%.16s"
+>544 ulelong x fsid.crc 0x%x
+>548 ulelong x fsid.edition %d
+>552 ulelong x fsid.blocks %d
+>556 ulelong x fsid.files %d
+>560 string >\0 name "%.16s"
+
+
+Hacker Notes
+------------
+
+See fs/cramfs/README for filesystem layout and implementation notes.
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
new file mode 100644
index 000000000..baf411186
--- /dev/null
+++ b/Documentation/filesystems/dax.txt
@@ -0,0 +1,94 @@
+Direct Access for files
+-----------------------
+
+Motivation
+----------
+
+The page cache is usually used to buffer reads and writes to files.
+It is also used to provide the pages which are mapped into userspace
+by a call to mmap.
+
+For block devices that are memory-like, the page cache pages would be
+unnecessary copies of the original storage. The DAX code removes the
+extra copy by performing reads and writes directly to the storage device.
+For file mappings, the storage device is mapped directly into userspace.
+
+
+Usage
+-----
+
+If you have a block device which supports DAX, you can make a filesystem
+on it as usual. When mounting it, use the -o dax option manually
+or add 'dax' to the options in /etc/fstab.
+
+
+Implementation Tips for Block Driver Writers
+--------------------------------------------
+
+To support DAX in your block driver, implement the 'direct_access'
+block device operation. It is used to translate the sector number
+(expressed in units of 512-byte sectors) to a page frame number (pfn)
+that identifies the physical page for the memory. It also returns a
+kernel virtual address that can be used to access the memory.
+
+The direct_access method takes a 'size' parameter that indicates the
+number of bytes being requested. The function should return the number
+of bytes that can be contiguously accessed at that offset. It may also
+return a negative errno if an error occurs.
+
+In order to support this method, the storage must be byte-accessible by
+the CPU at all times. If your device uses paging techniques to expose
+a large amount of memory through a smaller window, then you cannot
+implement direct_access. Equally, if your device can occasionally
+stall the CPU for an extended period, you should also not attempt to
+implement direct_access.
+
+These block devices may be used for inspiration:
+- axonram: Axon DDR2 device driver
+- brd: RAM backed block device driver
+- dcssblk: s390 dcss block device driver
+
+
+Implementation Tips for Filesystem Writers
+------------------------------------------
+
+Filesystem support consists of
+- adding support to mark inodes as being DAX by setting the S_DAX flag in
+ i_flags
+- implementing the direct_IO address space operation, and calling
+ dax_do_io() instead of blockdev_direct_IO() if S_DAX is set
+- implementing an mmap file operation for DAX files which sets the
+ VM_MIXEDMAP flag on the VMA, and setting the vm_ops to include handlers
+ for fault and page_mkwrite (which should probably call dax_fault() and
+ dax_mkwrite(), passing the appropriate get_block() callback)
+- calling dax_truncate_page() instead of block_truncate_page() for DAX files
+- calling dax_zero_page_range() instead of zero_user() for DAX files
+- ensuring that there is sufficient locking between reads, writes,
+ truncates and page faults
+
+The get_block() callback passed to the DAX functions may return
+uninitialised extents. If it does, it must ensure that simultaneous
+calls to get_block() (for example by a page-fault racing with a read()
+or a write()) work correctly.
+
+These filesystems may be used for inspiration:
+- ext2: the second extended filesystem, see Documentation/filesystems/ext2.txt
+- ext4: the fourth extended filesystem, see Documentation/filesystems/ext4.txt
+
+
+Shortcomings
+------------
+
+Even if the kernel or its modules are stored on a filesystem that supports
+DAX on a block device that supports DAX, they will still be copied into RAM.
+
+The DAX code does not work correctly on architectures which have virtually
+mapped caches such as ARM, MIPS and SPARC.
+
+Calling get_user_pages() on a range of user memory that has been mmaped
+from a DAX file will fail as there are no 'struct page' to describe
+those pages. This problem is being worked on. That means that O_DIRECT
+reads/writes to those memory ranges from a non-DAX file will fail (note
+that O_DIRECT reads/writes _of a DAX file_ do work, it is the memory
+that is being accessed that is key here). Other things that will not
+work include RDMA, sendfile() and splice().
diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
new file mode 100644
index 000000000..88ab81c79
--- /dev/null
+++ b/Documentation/filesystems/debugfs.txt
@@ -0,0 +1,191 @@
+Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+
+Debugfs exists as a simple way for kernel developers to make information
+available to user space. Unlike /proc, which is only meant for information
+about a process, or sysfs, which has strict one-value-per-file rules,
+debugfs has no rules at all. Developers can put any information they want
+there. The debugfs filesystem is also intended to not serve as a stable
+ABI to user space; in theory, there are no stability constraints placed on
+files exported there. The real world is not always so simple, though [1];
+even debugfs interfaces are best designed with the idea that they will need
+to be maintained forever.
+
+Debugfs is typically mounted with a command like:
+
+ mount -t debugfs none /sys/kernel/debug
+
+(Or an equivalent /etc/fstab line).
+The debugfs root directory is accessible only to the root user by
+default. To change access to the tree the "uid", "gid" and "mode" mount
+options can be used.
+
+Note that the debugfs API is exported GPL-only to modules.
+
+Code using debugfs should include <linux/debugfs.h>. Then, the first order
+of business will be to create at least one directory to hold a set of
+debugfs files:
+
+ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent);
+
+This call, if successful, will make a directory called name underneath the
+indicated parent directory. If parent is NULL, the directory will be
+created in the debugfs root. On success, the return value is a struct
+dentry pointer which can be used to create files in the directory (and to
+clean it up at the end). A NULL return value indicates that something went
+wrong. If ERR_PTR(-ENODEV) is returned, that is an indication that the
+kernel has been built without debugfs support and none of the functions
+described below will work.
+
+The most general way to create a file within a debugfs directory is with:
+
+ struct dentry *debugfs_create_file(const char *name, umode_t mode,
+ struct dentry *parent, void *data,
+ const struct file_operations *fops);
+
+Here, name is the name of the file to create, mode describes the access
+permissions the file should have, parent indicates the directory which
+should hold the file, data will be stored in the i_private field of the
+resulting inode structure, and fops is a set of file operations which
+implement the file's behavior. At a minimum, the read() and/or write()
+operations should be provided; others can be included as needed. Again,
+the return value will be a dentry pointer to the created file, NULL for
+error, or ERR_PTR(-ENODEV) if debugfs support is missing.
+
+In a number of cases, the creation of a set of file operations is not
+actually necessary; the debugfs code provides a number of helper functions
+for simple situations. Files containing a single integer value can be
+created with any of:
+
+ struct dentry *debugfs_create_u8(const char *name, umode_t mode,
+ struct dentry *parent, u8 *value);
+ struct dentry *debugfs_create_u16(const char *name, umode_t mode,
+ struct dentry *parent, u16 *value);
+ struct dentry *debugfs_create_u32(const char *name, umode_t mode,
+ struct dentry *parent, u32 *value);
+ struct dentry *debugfs_create_u64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value);
+
+These files support both reading and writing the given value; if a specific
+file should not be written to, simply set the mode bits accordingly. The
+values in these files are in decimal; if hexadecimal is more appropriate,
+the following functions can be used instead:
+
+ struct dentry *debugfs_create_x8(const char *name, umode_t mode,
+ struct dentry *parent, u8 *value);
+ struct dentry *debugfs_create_x16(const char *name, umode_t mode,
+ struct dentry *parent, u16 *value);
+ struct dentry *debugfs_create_x32(const char *name, umode_t mode,
+ struct dentry *parent, u32 *value);
+ struct dentry *debugfs_create_x64(const char *name, umode_t mode,
+ struct dentry *parent, u64 *value);
+
+These functions are useful as long as the developer knows the size of the
+value to be exported. Some types can have different widths on different
+architectures, though, complicating the situation somewhat. There is a
+function meant to help out in one special case:
+
+ struct dentry *debugfs_create_size_t(const char *name, umode_t mode,
+ struct dentry *parent,
+ size_t *value);
+
+As might be expected, this function will create a debugfs file to represent
+a variable of type size_t.
+
+Boolean values can be placed in debugfs with:
+
+ struct dentry *debugfs_create_bool(const char *name, umode_t mode,
+ struct dentry *parent, u32 *value);
+
+A read on the resulting file will yield either Y (for non-zero values) or
+N, followed by a newline. If written to, it will accept either upper- or
+lower-case values, or 1 or 0. Any other input will be silently ignored.
+
+Another option is exporting a block of arbitrary binary data, with
+this structure and function:
+
+ struct debugfs_blob_wrapper {
+ void *data;
+ unsigned long size;
+ };
+
+ struct dentry *debugfs_create_blob(const char *name, umode_t mode,
+ struct dentry *parent,
+ struct debugfs_blob_wrapper *blob);
+
+A read of this file will return the data pointed to by the
+debugfs_blob_wrapper structure. Some drivers use "blobs" as a simple way
+to return several lines of (static) formatted text output. This function
+can be used to export binary information, but there does not appear to be
+any code which does so in the mainline. Note that all files created with
+debugfs_create_blob() are read-only.
+
+If you want to dump a block of registers (something that happens quite
+often during development, even if little such code reaches mainline.
+Debugfs offers two functions: one to make a registers-only file, and
+another to insert a register block in the middle of another sequential
+file.
+
+ struct debugfs_reg32 {
+ char *name;
+ unsigned long offset;
+ };
+
+ struct debugfs_regset32 {
+ struct debugfs_reg32 *regs;
+ int nregs;
+ void __iomem *base;
+ };
+
+ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
+ struct dentry *parent,
+ struct debugfs_regset32 *regset);
+
+ void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+ int nregs, void __iomem *base, char *prefix);
+
+The "base" argument may be 0, but you may want to build the reg32 array
+using __stringify, and a number of register names (macros) are actually
+byte offsets over a base for the register block.
+
+
+There are a couple of other directory-oriented helper functions:
+
+ struct dentry *debugfs_rename(struct dentry *old_dir,
+ struct dentry *old_dentry,
+ struct dentry *new_dir,
+ const char *new_name);
+
+ struct dentry *debugfs_create_symlink(const char *name,
+ struct dentry *parent,
+ const char *target);
+
+A call to debugfs_rename() will give a new name to an existing debugfs
+file, possibly in a different directory. The new_name must not exist prior
+to the call; the return value is old_dentry with updated information.
+Symbolic links can be created with debugfs_create_symlink().
+
+There is one important thing that all debugfs users must take into account:
+there is no automatic cleanup of any directories created in debugfs. If a
+module is unloaded without explicitly removing debugfs entries, the result
+will be a lot of stale pointers and no end of highly antisocial behavior.
+So all debugfs users - at least those which can be built as modules - must
+be prepared to remove all files and directories they create there. A file
+can be removed with:
+
+ void debugfs_remove(struct dentry *dentry);
+
+The dentry value can be NULL, in which case nothing will be removed.
+
+Once upon a time, debugfs users were required to remember the dentry
+pointer for every debugfs file they created so that all files could be
+cleaned up. We live in more civilized times now, though, and debugfs users
+can call:
+
+ void debugfs_remove_recursive(struct dentry *dentry);
+
+If this function is passed a pointer for the dentry corresponding to the
+top-level directory, the entire hierarchy below that directory will be
+removed.
+
+Notes:
+ [1] http://lwn.net/Articles/309298/
diff --git a/Documentation/filesystems/devpts.txt b/Documentation/filesystems/devpts.txt
new file mode 100644
index 000000000..68dffd87f
--- /dev/null
+++ b/Documentation/filesystems/devpts.txt
@@ -0,0 +1,132 @@
+
+To support containers, we now allow multiple instances of devpts filesystem,
+such that indices of ptys allocated in one instance are independent of indices
+allocated in other instances of devpts.
+
+To preserve backward compatibility, this support for multiple instances is
+enabled only if:
+
+ - CONFIG_DEVPTS_MULTIPLE_INSTANCES=y, and
+ - '-o newinstance' mount option is specified while mounting devpts
+
+IOW, devpts now supports both single-instance and multi-instance semantics.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=n, there is no change in behavior and
+this referred to as the "legacy" mode. In this mode, the new mount options
+(-o newinstance and -o ptmxmode) will be ignored with a 'bogus option' message
+on console.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and devpts is mounted without the
+'newinstance' option (as in current start-up scripts) the new mount binds
+to the initial kernel mount of devpts. This mode is referred to as the
+'single-instance' mode and the current, single-instance semantics are
+preserved, i.e PTYs are common across the system.
+
+The only difference between this single-instance mode and the legacy mode
+is the presence of new, '/dev/pts/ptmx' node with permissions 0000, which
+can safely be ignored.
+
+If CONFIG_DEVPTS_MULTIPLE_INSTANCES=y and 'newinstance' option is specified,
+the mount is considered to be in the multi-instance mode and a new instance
+of the devpts fs is created. Any ptys created in this instance are independent
+of ptys in other instances of devpts. Like in the single-instance mode, the
+/dev/pts/ptmx node is present. To effectively use the multi-instance mode,
+open of /dev/ptmx must be a redirected to '/dev/pts/ptmx' using a symlink or
+bind-mount.
+
+Eg: A container startup script could do the following:
+
+ $ chmod 0666 /dev/pts/ptmx
+ $ rm /dev/ptmx
+ $ ln -s pts/ptmx /dev/ptmx
+ $ ns_exec -cm /bin/bash
+
+ # We are now in new container
+
+ $ umount /dev/pts
+ $ mount -t devpts -o newinstance lxcpts /dev/pts
+ $ sshd -p 1234
+
+where 'ns_exec -cm /bin/bash' calls clone() with CLONE_NEWNS flag and execs
+/bin/bash in the child process. A pty created by the sshd is not visible in
+the original mount of /dev/pts.
+
+User-space changes
+------------------
+
+In multi-instance mode (i.e '-o newinstance' mount option is specified at least
+once), following user-space issues should be noted.
+
+1. If -o newinstance mount option is never used, /dev/pts/ptmx can be ignored
+ and no change is needed to system-startup scripts.
+
+2. To effectively use multi-instance mode (i.e -o newinstance is specified)
+ administrators or startup scripts should "redirect" open of /dev/ptmx to
+ /dev/pts/ptmx using either a bind mount or symlink.
+
+ $ mount -t devpts -o newinstance devpts /dev/pts
+
+ followed by either
+
+ $ rm /dev/ptmx
+ $ ln -s pts/ptmx /dev/ptmx
+ $ chmod 666 /dev/pts/ptmx
+ or
+ $ mount -o bind /dev/pts/ptmx /dev/ptmx
+
+3. The '/dev/ptmx -> pts/ptmx' symlink is the preferred method since it
+ enables better error-reporting and treats both single-instance and
+ multi-instance mounts similarly.
+
+ But this method requires that system-startup scripts set the mode of
+ /dev/pts/ptmx correctly (default mode is 0000). The scripts can set the
+ mode by, either
+
+ - adding ptmxmode mount option to devpts entry in /etc/fstab, or
+ - using 'chmod 0666 /dev/pts/ptmx'
+
+4. If multi-instance mode mount is needed for containers, but the system
+ startup scripts have not yet been updated, container-startup scripts
+ should bind mount /dev/ptmx to /dev/pts/ptmx to avoid breaking single-
+ instance mounts.
+
+ Or, in general, container-startup scripts should use:
+
+ mount -t devpts -o newinstance -o ptmxmode=0666 devpts /dev/pts
+ if [ ! -L /dev/ptmx ]; then
+ mount -o bind /dev/pts/ptmx /dev/ptmx
+ fi
+
+ When all devpts mounts are multi-instance, /dev/ptmx can permanently be
+ a symlink to pts/ptmx and the bind mount can be ignored.
+
+5. A multi-instance mount that is not accompanied by the /dev/ptmx to
+ /dev/pts/ptmx redirection would result in an unusable/unreachable pty.
+
+ mount -t devpts -o newinstance lxcpts /dev/pts
+
+ immediately followed by:
+
+ open("/dev/ptmx")
+
+ would create a pty, say /dev/pts/7, in the initial kernel mount.
+ But /dev/pts/7 would be invisible in the new mount.
+
+6. The permissions for /dev/pts/ptmx node should be specified when mounting
+ /dev/pts, using the '-o ptmxmode=%o' mount option (default is 0000).
+
+ mount -t devpts -o newinstance -o ptmxmode=0644 devpts /dev/pts
+
+ The permissions can be later be changed as usual with 'chmod'.
+
+ chmod 666 /dev/pts/ptmx
+
+7. A mount of devpts without the 'newinstance' option results in binding to
+ initial kernel mount. This behavior while preserving legacy semantics,
+ does not provide strict isolation in a container environment. i.e by
+ mounting devpts without the 'newinstance' option, a container could
+ get visibility into the 'host' or root container's devpts.
+
+ To workaround this and have strict isolation, all mounts of devpts,
+ including the mount in the root container, should use the newinstance
+ option.
diff --git a/Documentation/filesystems/directory-locking b/Documentation/filesystems/directory-locking
new file mode 100644
index 000000000..09bbf9a54
--- /dev/null
+++ b/Documentation/filesystems/directory-locking
@@ -0,0 +1,127 @@
+ Locking scheme used for directory operations is based on two
+kinds of locks - per-inode (->i_mutex) and per-filesystem
+(->s_vfs_rename_mutex).
+
+ When taking the i_mutex on multiple non-directory objects, we
+always acquire the locks in order by increasing address. We'll call
+that "inode pointer" order in the following.
+
+ For our purposes all operations fall in 5 classes:
+
+1) read access. Locking rules: caller locks directory we are accessing.
+
+2) object creation. Locking rules: same as above.
+
+3) object removal. Locking rules: caller locks parent, finds victim,
+locks victim and calls the method.
+
+4) rename() that is _not_ cross-directory. Locking rules: caller locks
+the parent and finds source and target. If target already exists, lock
+it. If source is a non-directory, lock it. If that means we need to
+lock both, lock them in inode pointer order.
+
+5) link creation. Locking rules:
+ * lock parent
+ * check that source is not a directory
+ * lock source
+ * call the method.
+
+6) cross-directory rename. The trickiest in the whole bunch. Locking
+rules:
+ * lock the filesystem
+ * lock parents in "ancestors first" order.
+ * find source and target.
+ * if old parent is equal to or is a descendent of target
+ fail with -ENOTEMPTY
+ * if new parent is equal to or is a descendent of source
+ fail with -ELOOP
+ * If target exists, lock it. If source is a non-directory, lock
+ it. In case that means we need to lock both source and target,
+ do so in inode pointer order.
+ * call the method.
+
+
+The rules above obviously guarantee that all directories that are going to be
+read, modified or removed by method will be locked by caller.
+
+
+If no directory is its own ancestor, the scheme above is deadlock-free.
+Proof:
+
+ First of all, at any moment we have a partial ordering of the
+objects - A < B iff A is an ancestor of B.
+
+ That ordering can change. However, the following is true:
+
+(1) if object removal or non-cross-directory rename holds lock on A and
+ attempts to acquire lock on B, A will remain the parent of B until we
+ acquire the lock on B. (Proof: only cross-directory rename can change
+ the parent of object and it would have to lock the parent).
+
+(2) if cross-directory rename holds the lock on filesystem, order will not
+ change until rename acquires all locks. (Proof: other cross-directory
+ renames will be blocked on filesystem lock and we don't start changing
+ the order until we had acquired all locks).
+
+(3) locks on non-directory objects are acquired only after locks on
+ directory objects, and are acquired in inode pointer order.
+ (Proof: all operations but renames take lock on at most one
+ non-directory object, except renames, which take locks on source and
+ target in inode pointer order in the case they are not directories.)
+
+ Now consider the minimal deadlock. Each process is blocked on
+attempt to acquire some lock and already holds at least one lock. Let's
+consider the set of contended locks. First of all, filesystem lock is
+not contended, since any process blocked on it is not holding any locks.
+Thus all processes are blocked on ->i_mutex.
+
+ By (3), any process holding a non-directory lock can only be
+waiting on another non-directory lock with a larger address. Therefore
+the process holding the "largest" such lock can always make progress, and
+non-directory objects are not included in the set of contended locks.
+
+ Thus link creation can't be a part of deadlock - it can't be
+blocked on source and it means that it doesn't hold any locks.
+
+ Any contended object is either held by cross-directory rename or
+has a child that is also contended. Indeed, suppose that it is held by
+operation other than cross-directory rename. Then the lock this operation
+is blocked on belongs to child of that object due to (1).
+
+ It means that one of the operations is cross-directory rename.
+Otherwise the set of contended objects would be infinite - each of them
+would have a contended child and we had assumed that no object is its
+own descendent. Moreover, there is exactly one cross-directory rename
+(see above).
+
+ Consider the object blocking the cross-directory rename. One
+of its descendents is locked by cross-directory rename (otherwise we
+would again have an infinite set of contended objects). But that
+means that cross-directory rename is taking locks out of order. Due
+to (2) the order hadn't changed since we had acquired filesystem lock.
+But locking rules for cross-directory rename guarantee that we do not
+try to acquire lock on descendent before the lock on ancestor.
+Contradiction. I.e. deadlock is impossible. Q.E.D.
+
+
+ These operations are guaranteed to avoid loop creation. Indeed,
+the only operation that could introduce loops is cross-directory rename.
+Since the only new (parent, child) pair added by rename() is (new parent,
+source), such loop would have to contain these objects and the rest of it
+would have to exist before rename(). I.e. at the moment of loop creation
+rename() responsible for that would be holding filesystem lock and new parent
+would have to be equal to or a descendent of source. But that means that
+new parent had been equal to or a descendent of source since the moment when
+we had acquired filesystem lock and rename() would fail with -ELOOP in that
+case.
+
+ While this locking scheme works for arbitrary DAGs, it relies on
+ability to check that directory is a descendent of another object. Current
+implementation assumes that directory graph is a tree. This assumption is
+also preserved by all operations (cross-directory rename on a tree that would
+not introduce a cycle will leave it a tree and link() fails for directories).
+
+ Notice that "directory" in the above == "anything that might have
+children", so if we are going to introduce hybrid objects we will need
+either to make sure that link(2) doesn't work for them or to make changes
+in is_subdir() that would make it work even in presence of such beasts.
diff --git a/Documentation/filesystems/dlmfs.txt b/Documentation/filesystems/dlmfs.txt
new file mode 100644
index 000000000..fcf4d509d
--- /dev/null
+++ b/Documentation/filesystems/dlmfs.txt
@@ -0,0 +1,130 @@
+dlmfs
+==================
+A minimal DLM userspace interface implemented via a virtual file
+system.
+
+dlmfs is built with OCFS2 as it requires most of its infrastructure.
+
+Project web page: http://ocfs2.wiki.kernel.org
+Tools web page: https://github.com/markfasheh/ocfs2-tools
+OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+
+All code copyright 2005 Oracle except when otherwise noted.
+
+CREDITS
+=======
+
+Some code taken from ramfs which is Copyright (C) 2000 Linus Torvalds
+and Transmeta Corp.
+
+Mark Fasheh <mark.fasheh@oracle.com>
+
+Caveats
+=======
+- Right now it only works with the OCFS2 DLM, though support for other
+ DLM implementations should not be a major issue.
+
+Mount options
+=============
+None
+
+Usage
+=====
+
+If you're just interested in OCFS2, then please see ocfs2.txt. The
+rest of this document will be geared towards those who want to use
+dlmfs for easy to setup and easy to use clustered locking in
+userspace.
+
+Setup
+=====
+
+dlmfs requires that the OCFS2 cluster infrastructure be in
+place. Please download ocfs2-tools from the above url and configure a
+cluster.
+
+You'll want to start heartbeating on a volume which all the nodes in
+your lockspace can access. The easiest way to do this is via
+ocfs2_hb_ctl (distributed with ocfs2-tools). Right now it requires
+that an OCFS2 file system be in place so that it can automatically
+find its heartbeat area, though it will eventually support heartbeat
+against raw disks.
+
+Please see the ocfs2_hb_ctl and mkfs.ocfs2 manual pages distributed
+with ocfs2-tools.
+
+Once you're heartbeating, DLM lock 'domains' can be easily created /
+destroyed and locks within them accessed.
+
+Locking
+=======
+
+Users may access dlmfs via standard file system calls, or they can use
+'libo2dlm' (distributed with ocfs2-tools) which abstracts the file
+system calls and presents a more traditional locking api.
+
+dlmfs handles lock caching automatically for the user, so a lock
+request for an already acquired lock will not generate another DLM
+call. Userspace programs are assumed to handle their own local
+locking.
+
+Two levels of locks are supported - Shared Read, and Exclusive.
+Also supported is a Trylock operation.
+
+For information on the libo2dlm interface, please see o2dlm.h,
+distributed with ocfs2-tools.
+
+Lock value blocks can be read and written to a resource via read(2)
+and write(2) against the fd obtained via your open(2) call. The
+maximum currently supported LVB length is 64 bytes (though that is an
+OCFS2 DLM limitation). Through this mechanism, users of dlmfs can share
+small amounts of data amongst their nodes.
+
+mkdir(2) signals dlmfs to join a domain (which will have the same name
+as the resulting directory)
+
+rmdir(2) signals dlmfs to leave the domain
+
+Locks for a given domain are represented by regular inodes inside the
+domain directory. Locking against them is done via the open(2) system
+call.
+
+The open(2) call will not return until your lock has been granted or
+an error has occurred, unless it has been instructed to do a trylock
+operation. If the lock succeeds, you'll get an fd.
+
+open(2) with O_CREAT to ensure the resource inode is created - dlmfs does
+not automatically create inodes for existing lock resources.
+
+Open Flag Lock Request Type
+--------- -----------------
+O_RDONLY Shared Read
+O_RDWR Exclusive
+
+Open Flag Resulting Locking Behavior
+--------- --------------------------
+O_NONBLOCK Trylock operation
+
+You must provide exactly one of O_RDONLY or O_RDWR.
+
+If O_NONBLOCK is also provided and the trylock operation was valid but
+could not lock the resource then open(2) will return ETXTBUSY.
+
+close(2) drops the lock associated with your fd.
+
+Modes passed to mkdir(2) or open(2) are adhered to locally. Chown is
+supported locally as well. This means you can use them to restrict
+access to the resources via dlmfs on your local node only.
+
+The resource LVB may be read from the fd in either Shared Read or
+Exclusive modes via the read(2) system call. It can be written via
+write(2) only when open in Exclusive mode.
+
+Once written, an LVB will be visible to other nodes who obtain Read
+Only or higher level locks on the resource.
+
+See Also
+========
+http://opendlm.sourceforge.net/cvsmirror/opendlm/docs/dlmbook_final.pdf
+
+For more information on the VMS distributed locking API.
diff --git a/Documentation/filesystems/dnotify.txt b/Documentation/filesystems/dnotify.txt
new file mode 100644
index 000000000..6baf88f46
--- /dev/null
+++ b/Documentation/filesystems/dnotify.txt
@@ -0,0 +1,70 @@
+ Linux Directory Notification
+ ============================
+
+ Stephen Rothwell <sfr@canb.auug.org.au>
+
+The intention of directory notification is to allow user applications
+to be notified when a directory, or any of the files in it, are changed.
+The basic mechanism involves the application registering for notification
+on a directory using a fcntl(2) call and the notifications themselves
+being delivered using signals.
+
+The application decides which "events" it wants to be notified about.
+The currently defined events are:
+
+ DN_ACCESS A file in the directory was accessed (read)
+ DN_MODIFY A file in the directory was modified (write,truncate)
+ DN_CREATE A file was created in the directory
+ DN_DELETE A file was unlinked from directory
+ DN_RENAME A file in the directory was renamed
+ DN_ATTRIB A file in the directory had its attributes
+ changed (chmod,chown)
+
+Usually, the application must reregister after each notification, but
+if DN_MULTISHOT is or'ed with the event mask, then the registration will
+remain until explicitly removed (by registering for no events).
+
+By default, SIGIO will be delivered to the process and no other useful
+information. However, if the F_SETSIG fcntl(2) call is used to let the
+kernel know which signal to deliver, a siginfo structure will be passed to
+the signal handler and the si_fd member of that structure will contain the
+file descriptor associated with the directory in which the event occurred.
+
+Preferably the application will choose one of the real time signals
+(SIGRTMIN + <n>) so that the notifications may be queued. This is
+especially important if DN_MULTISHOT is specified. Note that SIGRTMIN
+is often blocked, so it is better to use (at least) SIGRTMIN + 1.
+
+Implementation expectations (features and bugs :-))
+---------------------------
+
+The notification should work for any local access to files even if the
+actual file system is on a remote server. This implies that remote
+access to files served by local user mode servers should be notified.
+Also, remote accesses to files served by a local kernel NFS server should
+be notified.
+
+In order to make the impact on the file system code as small as possible,
+the problem of hard links to files has been ignored. So if a file (x)
+exists in two directories (a and b) then a change to the file using the
+name "a/x" should be notified to a program expecting notifications on
+directory "a", but will not be notified to one expecting notifications on
+directory "b".
+
+Also, files that are unlinked, will still cause notifications in the
+last directory that they were linked to.
+
+Configuration
+-------------
+
+Dnotify is controlled via the CONFIG_DNOTIFY configuration option. When
+disabled, fcntl(fd, F_NOTIFY, ...) will return -EINVAL.
+
+Example
+-------
+See Documentation/filesystems/dnotify_test.c for an example.
+
+NOTE
+----
+Beginning with Linux 2.6.13, dnotify has been replaced by inotify.
+See Documentation/filesystems/inotify.txt for more information on it.
diff --git a/Documentation/filesystems/dnotify_test.c b/Documentation/filesystems/dnotify_test.c
new file mode 100644
index 000000000..8b37b4a1e
--- /dev/null
+++ b/Documentation/filesystems/dnotify_test.c
@@ -0,0 +1,34 @@
+#define _GNU_SOURCE /* needed to get the defines */
+#include <fcntl.h> /* in glibc 2.2 this has the needed
+ values defined */
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static volatile int event_fd;
+
+static void handler(int sig, siginfo_t *si, void *data)
+{
+ event_fd = si->si_fd;
+}
+
+int main(void)
+{
+ struct sigaction act;
+ int fd;
+
+ act.sa_sigaction = handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ sigaction(SIGRTMIN + 1, &act, NULL);
+
+ fd = open(".", O_RDONLY);
+ fcntl(fd, F_SETSIG, SIGRTMIN + 1);
+ fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
+ /* we will now be notified if any of the files
+ in "." is modified or new files are created */
+ while (1) {
+ pause();
+ printf("Got event on fd=%d\n", event_fd);
+ }
+}
diff --git a/Documentation/filesystems/ecryptfs.txt b/Documentation/filesystems/ecryptfs.txt
new file mode 100644
index 000000000..01d8a0835
--- /dev/null
+++ b/Documentation/filesystems/ecryptfs.txt
@@ -0,0 +1,77 @@
+eCryptfs: A stacked cryptographic filesystem for Linux
+
+eCryptfs is free software. Please see the file COPYING for details.
+For documentation, please see the files in the doc/ subdirectory. For
+building and installation instructions please see the INSTALL file.
+
+Maintainer: Phillip Hellewell
+Lead developer: Michael A. Halcrow <mhalcrow@us.ibm.com>
+Developers: Michael C. Thompson
+ Kent Yoder
+Web Site: http://ecryptfs.sf.net
+
+This software is currently undergoing development. Make sure to
+maintain a backup copy of any data you write into eCryptfs.
+
+eCryptfs requires the userspace tools downloadable from the
+SourceForge site:
+
+http://sourceforge.net/projects/ecryptfs/
+
+Userspace requirements include:
+ - David Howells' userspace keyring headers and libraries (version
+ 1.0 or higher), obtainable from
+ http://people.redhat.com/~dhowells/keyutils/
+ - Libgcrypt
+
+
+NOTES
+
+In the beta/experimental releases of eCryptfs, when you upgrade
+eCryptfs, you should copy the files to an unencrypted location and
+then copy the files back into the new eCryptfs mount to migrate the
+files.
+
+
+MOUNT-WIDE PASSPHRASE
+
+Create a new directory into which eCryptfs will write its encrypted
+files (i.e., /root/crypt). Then, create the mount point directory
+(i.e., /mnt/crypt). Now it's time to mount eCryptfs:
+
+mount -t ecryptfs /root/crypt /mnt/crypt
+
+You should be prompted for a passphrase and a salt (the salt may be
+blank).
+
+Try writing a new file:
+
+echo "Hello, World" > /mnt/crypt/hello.txt
+
+The operation will complete. Notice that there is a new file in
+/root/crypt that is at least 12288 bytes in size (depending on your
+host page size). This is the encrypted underlying file for what you
+just wrote. To test reading, from start to finish, you need to clear
+the user session keyring:
+
+keyctl clear @u
+
+Then umount /mnt/crypt and mount again per the instructions given
+above.
+
+cat /mnt/crypt/hello.txt
+
+
+NOTES
+
+eCryptfs version 0.1 should only be mounted on (1) empty directories
+or (2) directories containing files only created by eCryptfs. If you
+mount a directory that has pre-existing files not created by eCryptfs,
+then behavior is undefined. Do not run eCryptfs in higher verbosity
+levels unless you are doing so for the sole purpose of debugging or
+development, since secret values will be written out to the system log
+in that case.
+
+
+Mike Halcrow
+mhalcrow@us.ibm.com
diff --git a/Documentation/filesystems/efivarfs.txt b/Documentation/filesystems/efivarfs.txt
new file mode 100644
index 000000000..c477af086
--- /dev/null
+++ b/Documentation/filesystems/efivarfs.txt
@@ -0,0 +1,16 @@
+
+efivarfs - a (U)EFI variable filesystem
+
+The efivarfs filesystem was created to address the shortcomings of
+using entries in sysfs to maintain EFI variables. The old sysfs EFI
+variables code only supported variables of up to 1024 bytes. This
+limitation existed in version 0.99 of the EFI specification, but was
+removed before any full releases. Since variables can now be larger
+than a single page, sysfs isn't the best interface for this.
+
+Variables can be created, deleted and modified with the efivarfs
+filesystem.
+
+efivarfs is typically mounted like this,
+
+ mount -t efivarfs none /sys/firmware/efi/efivars
diff --git a/Documentation/filesystems/exofs.txt b/Documentation/filesystems/exofs.txt
new file mode 100644
index 000000000..23583a136
--- /dev/null
+++ b/Documentation/filesystems/exofs.txt
@@ -0,0 +1,185 @@
+===============================================================================
+WHAT IS EXOFS?
+===============================================================================
+
+exofs is a file system that uses an OSD and exports the API of a normal Linux
+file system. Users access exofs like any other local file system, and exofs
+will in turn issue commands to the local OSD initiator.
+
+OSD is a new T10 command set that views storage devices not as a large/flat
+array of sectors but as a container of objects, each having a length, quota,
+time attributes and more. Each object is addressed by a 64bit ID, and is
+contained in a 64bit ID partition. Each object has associated attributes
+attached to it, which are integral part of the object and provide metadata about
+the object. The standard defines some common obligatory attributes, but user
+attributes can be added as needed.
+
+===============================================================================
+ENVIRONMENT
+===============================================================================
+
+To use this file system, you need to have an object store to run it on. You
+may download a target from:
+http://open-osd.org
+
+See Documentation/scsi/osd.txt for how to setup a working osd environment.
+
+===============================================================================
+USAGE
+===============================================================================
+
+1. Download and compile exofs and open-osd initiator:
+ You need an external Kernel source tree or kernel headers from your
+ distribution. (anything based on 2.6.26 or later).
+
+ a. download open-osd including exofs source using:
+ [parent-directory]$ git clone git://git.open-osd.org/open-osd.git
+
+ b. Build the library module like this:
+ [parent-directory]$ make -C KSRC=$(KER_DIR) open-osd
+
+ This will build both the open-osd initiator as well as the exofs kernel
+ module. Use whatever parameters you compiled your Kernel with and
+ $(KER_DIR) above pointing to the Kernel you compile against. See the file
+ open-osd/top-level-Makefile for an example.
+
+2. Get the OSD initiator and target set up properly, and login to the target.
+ See Documentation/scsi/osd.txt for farther instructions. Also see ./do-osd
+ for example script that does all these steps.
+
+3. Insmod the exofs.ko module:
+ [exofs]$ insmod exofs.ko
+
+4. Make sure the directory where you want to mount exists. If not, create it.
+ (For example, mkdir /mnt/exofs)
+
+5. At first run you will need to invoke the mkfs.exofs application
+
+ As an example, this will create the file system on:
+ /dev/osd0 partition ID 65536
+
+ mkfs.exofs --pid=65536 --format /dev/osd0
+
+ The --format is optional. If not specified, no OSD_FORMAT will be
+ performed and a clean file system will be created in the specified pid,
+ in the available space of the target. (Use --format=size_in_meg to limit
+ the total LUN space available)
+
+ If pid already exists, it will be deleted and a new one will be created in
+ its place. Be careful.
+
+ An exofs lives inside a single OSD partition. You can create multiple exofs
+ filesystems on the same device using multiple pids.
+
+ (run mkfs.exofs without any parameters for usage help message)
+
+6. Mount the file system.
+
+ For example, to mount /dev/osd0, partition ID 0x10000 on /mnt/exofs:
+
+ mount -t exofs -o pid=65536 /dev/osd0 /mnt/exofs/
+
+7. For reference (See do-exofs example script):
+ do-exofs start - an example of how to perform the above steps.
+ do-exofs stop - an example of how to unmount the file system.
+ do-exofs format - an example of how to format and mkfs a new exofs.
+
+8. Extra compilation flags (uncomment in fs/exofs/Kbuild):
+ CONFIG_EXOFS_DEBUG - for debug messages and extra checks.
+
+===============================================================================
+exofs mount options
+===============================================================================
+Similar to any mount command:
+ mount -t exofs -o exofs_options /dev/osdX mount_exofs_directory
+
+Where:
+ -t exofs: specifies the exofs file system
+
+ /dev/osdX: X is a decimal number. /dev/osdX was created after a successful
+ login into an OSD target.
+
+ mount_exofs_directory: The directory to mount the file system on
+
+ exofs specific options: Options are separated by commas (,)
+ pid=<integer> - The partition number to mount/create as
+ container of the filesystem.
+ This option is mandatory. integer can be
+ Hex by pre-pending an 0x to the number.
+ osdname=<id> - Mount by a device's osdname.
+ osdname is usually a 36 character uuid of the
+ form "d2683732-c906-4ee1-9dbd-c10c27bb40df".
+ It is one of the device's uuid specified in the
+ mkfs.exofs format command.
+ If this option is specified then the /dev/osdX
+ above can be empty and is ignored.
+ to=<integer> - Timeout in ticks for a single command.
+ default is (60 * HZ) [for debugging only]
+
+===============================================================================
+DESIGN
+===============================================================================
+
+* The file system control block (AKA on-disk superblock) resides in an object
+ with a special ID (defined in common.h).
+ Information included in the file system control block is used to fill the
+ in-memory superblock structure at mount time. This object is created before
+ the file system is used by mkexofs.c. It contains information such as:
+ - The file system's magic number
+ - The next inode number to be allocated
+
+* Each file resides in its own object and contains the data (and it will be
+ possible to extend the file over multiple objects, though this has not been
+ implemented yet).
+
+* A directory is treated as a file, and essentially contains a list of <file
+ name, inode #> pairs for files that are found in that directory. The object
+ IDs correspond to the files' inode numbers and will be allocated according to
+ a bitmap (stored in a separate object). Now they are allocated using a
+ counter.
+
+* Each file's control block (AKA on-disk inode) is stored in its object's
+ attributes. This applies to both regular files and other types (directories,
+ device files, symlinks, etc.).
+
+* Credentials are generated per object (inode and superblock) when they are
+ created in memory (read from disk or created). The credential works for all
+ operations and is used as long as the object remains in memory.
+
+* Async OSD operations are used whenever possible, but the target may execute
+ them out of order. The operations that concern us are create, delete,
+ readpage, writepage, update_inode, and truncate. The following pairs of
+ operations should execute in the order written, and we need to prevent them
+ from executing in reverse order:
+ - The following are handled with the OBJ_CREATED and OBJ_2BCREATED
+ flags. OBJ_CREATED is set when we know the object exists on the OSD -
+ in create's callback function, and when we successfully do a
+ read_inode.
+ OBJ_2BCREATED is set in the beginning of the create function, so we
+ know that we should wait.
+ - create/delete: delete should wait until the object is created
+ on the OSD.
+ - create/readpage: readpage should be able to return a page
+ full of zeroes in this case. If there was a write already
+ en-route (i.e. create, writepage, readpage) then the page
+ would be locked, and so it would really be the same as
+ create/writepage.
+ - create/writepage: if writepage is called for a sync write, it
+ should wait until the object is created on the OSD.
+ Otherwise, it should just return.
+ - create/truncate: truncate should wait until the object is
+ created on the OSD.
+ - create/update_inode: update_inode should wait until the
+ object is created on the OSD.
+ - Handled by VFS locks:
+ - readpage/delete: shouldn't happen because of page lock.
+ - writepage/delete: shouldn't happen because of page lock.
+ - readpage/writepage: shouldn't happen because of page lock.
+
+===============================================================================
+LICENSE/COPYRIGHT
+===============================================================================
+The exofs file system is based on ext2 v0.5b (distributed with the Linux kernel
+version 2.6.10). All files include the original copyrights, and the license
+is GPL version 2 (only version 2, as is true for the Linux kernel). The
+Linux kernel can be downloaded from www.kernel.org.
diff --git a/Documentation/filesystems/ext2.txt b/Documentation/filesystems/ext2.txt
new file mode 100644
index 000000000..b9714569e
--- /dev/null
+++ b/Documentation/filesystems/ext2.txt
@@ -0,0 +1,384 @@
+
+The Second Extended Filesystem
+==============================
+
+ext2 was originally released in January 1993. Written by R\'emy Card,
+Theodore Ts'o and Stephen Tweedie, it was a major rewrite of the
+Extended Filesystem. It is currently still (April 2001) the predominant
+filesystem in use by Linux. There are also implementations available
+for NetBSD, FreeBSD, the GNU HURD, Windows 95/98/NT, OS/2 and RISC OS.
+
+Options
+=======
+
+Most defaults are determined by the filesystem superblock, and can be
+set using tune2fs(8). Kernel-determined defaults are indicated by (*).
+
+bsddf (*) Makes `df' act like BSD.
+minixdf Makes `df' act like Minix.
+
+check=none, nocheck (*) Don't do extra checking of bitmaps on mount
+ (check=normal and check=strict options removed)
+
+dax Use direct access (no page cache). See
+ Documentation/filesystems/dax.txt.
+
+debug Extra debugging information is sent to the
+ kernel syslog. Useful for developers.
+
+errors=continue Keep going on a filesystem error.
+errors=remount-ro Remount the filesystem read-only on an error.
+errors=panic Panic and halt the machine if an error occurs.
+
+grpid, bsdgroups Give objects the same group ID as their parent.
+nogrpid, sysvgroups New objects have the group ID of their creator.
+
+nouid32 Use 16-bit UIDs and GIDs.
+
+oldalloc Enable the old block allocator. Orlov should
+ have better performance, we'd like to get some
+ feedback if it's the contrary for you.
+orlov (*) Use the Orlov block allocator.
+ (See http://lwn.net/Articles/14633/ and
+ http://lwn.net/Articles/14446/.)
+
+resuid=n The user ID which may use the reserved blocks.
+resgid=n The group ID which may use the reserved blocks.
+
+sb=n Use alternate superblock at this location.
+
+user_xattr Enable "user." POSIX Extended Attributes
+ (requires CONFIG_EXT2_FS_XATTR).
+ See also http://acl.bestbits.at
+nouser_xattr Don't support "user." extended attributes.
+
+acl Enable POSIX Access Control Lists support
+ (requires CONFIG_EXT2_FS_POSIX_ACL).
+ See also http://acl.bestbits.at
+noacl Don't support POSIX ACLs.
+
+nobh Do not attach buffer_heads to file pagecache.
+
+grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
+
+
+Specification
+=============
+
+ext2 shares many properties with traditional Unix filesystems. It has
+the concepts of blocks, inodes and directories. It has space in the
+specification for Access Control Lists (ACLs), fragments, undeletion and
+compression though these are not yet implemented (some are available as
+separate patches). There is also a versioning mechanism to allow new
+features (such as journalling) to be added in a maximally compatible
+manner.
+
+Blocks
+------
+
+The space in the device or file is split up into blocks. These are
+a fixed size, of 1024, 2048 or 4096 bytes (8192 bytes on Alpha systems),
+which is decided when the filesystem is created. Smaller blocks mean
+less wasted space per file, but require slightly more accounting overhead,
+and also impose other limits on the size of files and the filesystem.
+
+Block Groups
+------------
+
+Blocks are clustered into block groups in order to reduce fragmentation
+and minimise the amount of head seeking when reading a large amount
+of consecutive data. Information about each block group is kept in a
+descriptor table stored in the block(s) immediately after the superblock.
+Two blocks near the start of each group are reserved for the block usage
+bitmap and the inode usage bitmap which show which blocks and inodes
+are in use. Since each bitmap is limited to a single block, this means
+that the maximum size of a block group is 8 times the size of a block.
+
+The block(s) following the bitmaps in each block group are designated
+as the inode table for that block group and the remainder are the data
+blocks. The block allocation algorithm attempts to allocate data blocks
+in the same block group as the inode which contains them.
+
+The Superblock
+--------------
+
+The superblock contains all the information about the configuration of
+the filing system. The primary copy of the superblock is stored at an
+offset of 1024 bytes from the start of the device, and it is essential
+to mounting the filesystem. Since it is so important, backup copies of
+the superblock are stored in block groups throughout the filesystem.
+The first version of ext2 (revision 0) stores a copy at the start of
+every block group, along with backups of the group descriptor block(s).
+Because this can consume a considerable amount of space for large
+filesystems, later revisions can optionally reduce the number of backup
+copies by only putting backups in specific groups (this is the sparse
+superblock feature). The groups chosen are 0, 1 and powers of 3, 5 and 7.
+
+The information in the superblock contains fields such as the total
+number of inodes and blocks in the filesystem and how many are free,
+how many inodes and blocks are in each block group, when the filesystem
+was mounted (and if it was cleanly unmounted), when it was modified,
+what version of the filesystem it is (see the Revisions section below)
+and which OS created it.
+
+If the filesystem is revision 1 or higher, then there are extra fields,
+such as a volume name, a unique identification number, the inode size,
+and space for optional filesystem features to store configuration info.
+
+All fields in the superblock (as in all other ext2 structures) are stored
+on the disc in little endian format, so a filesystem is portable between
+machines without having to know what machine it was created on.
+
+Inodes
+------
+
+The inode (index node) is a fundamental concept in the ext2 filesystem.
+Each object in the filesystem is represented by an inode. The inode
+structure contains pointers to the filesystem blocks which contain the
+data held in the object and all of the metadata about an object except
+its name. The metadata about an object includes the permissions, owner,
+group, flags, size, number of blocks used, access time, change time,
+modification time, deletion time, number of links, fragments, version
+(for NFS) and extended attributes (EAs) and/or Access Control Lists (ACLs).
+
+There are some reserved fields which are currently unused in the inode
+structure and several which are overloaded. One field is reserved for the
+directory ACL if the inode is a directory and alternately for the top 32
+bits of the file size if the inode is a regular file (allowing file sizes
+larger than 2GB). The translator field is unused under Linux, but is used
+by the HURD to reference the inode of a program which will be used to
+interpret this object. Most of the remaining reserved fields have been
+used up for both Linux and the HURD for larger owner and group fields,
+The HURD also has a larger mode field so it uses another of the remaining
+fields to store the extra more bits.
+
+There are pointers to the first 12 blocks which contain the file's data
+in the inode. There is a pointer to an indirect block (which contains
+pointers to the next set of blocks), a pointer to a doubly-indirect
+block (which contains pointers to indirect blocks) and a pointer to a
+trebly-indirect block (which contains pointers to doubly-indirect blocks).
+
+The flags field contains some ext2-specific flags which aren't catered
+for by the standard chmod flags. These flags can be listed with lsattr
+and changed with the chattr command, and allow specific filesystem
+behaviour on a per-file basis. There are flags for secure deletion,
+undeletable, compression, synchronous updates, immutability, append-only,
+dumpable, no-atime, indexed directories, and data-journaling. Not all
+of these are supported yet.
+
+Directories
+-----------
+
+A directory is a filesystem object and has an inode just like a file.
+It is a specially formatted file containing records which associate
+each name with an inode number. Later revisions of the filesystem also
+encode the type of the object (file, directory, symlink, device, fifo,
+socket) to avoid the need to check the inode itself for this information
+(support for taking advantage of this feature does not yet exist in
+Glibc 2.2).
+
+The inode allocation code tries to assign inodes which are in the same
+block group as the directory in which they are first created.
+
+The current implementation of ext2 uses a singly-linked list to store
+the filenames in the directory; a pending enhancement uses hashing of the
+filenames to allow lookup without the need to scan the entire directory.
+
+The current implementation never removes empty directory blocks once they
+have been allocated to hold more files.
+
+Special files
+-------------
+
+Symbolic links are also filesystem objects with inodes. They deserve
+special mention because the data for them is stored within the inode
+itself if the symlink is less than 60 bytes long. It uses the fields
+which would normally be used to store the pointers to data blocks.
+This is a worthwhile optimisation as it we avoid allocating a full
+block for the symlink, and most symlinks are less than 60 characters long.
+
+Character and block special devices never have data blocks assigned to
+them. Instead, their device number is stored in the inode, again reusing
+the fields which would be used to point to the data blocks.
+
+Reserved Space
+--------------
+
+In ext2, there is a mechanism for reserving a certain number of blocks
+for a particular user (normally the super-user). This is intended to
+allow for the system to continue functioning even if non-privileged users
+fill up all the space available to them (this is independent of filesystem
+quotas). It also keeps the filesystem from filling up entirely which
+helps combat fragmentation.
+
+Filesystem check
+----------------
+
+At boot time, most systems run a consistency check (e2fsck) on their
+filesystems. The superblock of the ext2 filesystem contains several
+fields which indicate whether fsck should actually run (since checking
+the filesystem at boot can take a long time if it is large). fsck will
+run if the filesystem was not cleanly unmounted, if the maximum mount
+count has been exceeded or if the maximum time between checks has been
+exceeded.
+
+Feature Compatibility
+---------------------
+
+The compatibility feature mechanism used in ext2 is sophisticated.
+It safely allows features to be added to the filesystem, without
+unnecessarily sacrificing compatibility with older versions of the
+filesystem code. The feature compatibility mechanism is not supported by
+the original revision 0 (EXT2_GOOD_OLD_REV) of ext2, but was introduced in
+revision 1. There are three 32-bit fields, one for compatible features
+(COMPAT), one for read-only compatible (RO_COMPAT) features and one for
+incompatible (INCOMPAT) features.
+
+These feature flags have specific meanings for the kernel as follows:
+
+A COMPAT flag indicates that a feature is present in the filesystem,
+but the on-disk format is 100% compatible with older on-disk formats, so
+a kernel which didn't know anything about this feature could read/write
+the filesystem without any chance of corrupting the filesystem (or even
+making it inconsistent). This is essentially just a flag which says
+"this filesystem has a (hidden) feature" that the kernel or e2fsck may
+want to be aware of (more on e2fsck and feature flags later). The ext3
+HAS_JOURNAL feature is a COMPAT flag because the ext3 journal is simply
+a regular file with data blocks in it so the kernel does not need to
+take any special notice of it if it doesn't understand ext3 journaling.
+
+An RO_COMPAT flag indicates that the on-disk format is 100% compatible
+with older on-disk formats for reading (i.e. the feature does not change
+the visible on-disk format). However, an old kernel writing to such a
+filesystem would/could corrupt the filesystem, so this is prevented. The
+most common such feature, SPARSE_SUPER, is an RO_COMPAT feature because
+sparse groups allow file data blocks where superblock/group descriptor
+backups used to live, and ext2_free_blocks() refuses to free these blocks,
+which would leading to inconsistent bitmaps. An old kernel would also
+get an error if it tried to free a series of blocks which crossed a group
+boundary, but this is a legitimate layout in a SPARSE_SUPER filesystem.
+
+An INCOMPAT flag indicates the on-disk format has changed in some
+way that makes it unreadable by older kernels, or would otherwise
+cause a problem if an old kernel tried to mount it. FILETYPE is an
+INCOMPAT flag because older kernels would think a filename was longer
+than 256 characters, which would lead to corrupt directory listings.
+The COMPRESSION flag is an obvious INCOMPAT flag - if the kernel
+doesn't understand compression, you would just get garbage back from
+read() instead of it automatically decompressing your data. The ext3
+RECOVER flag is needed to prevent a kernel which does not understand the
+ext3 journal from mounting the filesystem without replaying the journal.
+
+For e2fsck, it needs to be more strict with the handling of these
+flags than the kernel. If it doesn't understand ANY of the COMPAT,
+RO_COMPAT, or INCOMPAT flags it will refuse to check the filesystem,
+because it has no way of verifying whether a given feature is valid
+or not. Allowing e2fsck to succeed on a filesystem with an unknown
+feature is a false sense of security for the user. Refusing to check
+a filesystem with unknown features is a good incentive for the user to
+update to the latest e2fsck. This also means that anyone adding feature
+flags to ext2 also needs to update e2fsck to verify these features.
+
+Metadata
+--------
+
+It is frequently claimed that the ext2 implementation of writing
+asynchronous metadata is faster than the ffs synchronous metadata
+scheme but less reliable. Both methods are equally resolvable by their
+respective fsck programs.
+
+If you're exceptionally paranoid, there are 3 ways of making metadata
+writes synchronous on ext2:
+
+per-file if you have the program source: use the O_SYNC flag to open()
+per-file if you don't have the source: use "chattr +S" on the file
+per-filesystem: add the "sync" option to mount (or in /etc/fstab)
+
+the first and last are not ext2 specific but do force the metadata to
+be written synchronously. See also Journaling below.
+
+Limitations
+-----------
+
+There are various limits imposed by the on-disk layout of ext2. Other
+limits are imposed by the current implementation of the kernel code.
+Many of the limits are determined at the time the filesystem is first
+created, and depend upon the block size chosen. The ratio of inodes to
+data blocks is fixed at filesystem creation time, so the only way to
+increase the number of inodes is to increase the size of the filesystem.
+No tools currently exist which can change the ratio of inodes to blocks.
+
+Most of these limits could be overcome with slight changes in the on-disk
+format and using a compatibility flag to signal the format change (at
+the expense of some compatibility).
+
+Filesystem block size: 1kB 2kB 4kB 8kB
+
+File size limit: 16GB 256GB 2048GB 2048GB
+Filesystem size limit: 2047GB 8192GB 16384GB 32768GB
+
+There is a 2.4 kernel limit of 2048GB for a single block device, so no
+filesystem larger than that can be created at this time. There is also
+an upper limit on the block size imposed by the page size of the kernel,
+so 8kB blocks are only allowed on Alpha systems (and other architectures
+which support larger pages).
+
+There is an upper limit of 32000 subdirectories in a single directory.
+
+There is a "soft" upper limit of about 10-15k files in a single directory
+with the current linear linked-list directory implementation. This limit
+stems from performance problems when creating and deleting (and also
+finding) files in such large directories. Using a hashed directory index
+(under development) allows 100k-1M+ files in a single directory without
+performance problems (although RAM size becomes an issue at this point).
+
+The (meaningless) absolute upper limit of files in a single directory
+(imposed by the file size, the realistic limit is obviously much less)
+is over 130 trillion files. It would be higher except there are not
+enough 4-character names to make up unique directory entries, so they
+have to be 8 character filenames, even then we are fairly close to
+running out of unique filenames.
+
+Journaling
+----------
+
+A journaling extension to the ext2 code has been developed by Stephen
+Tweedie. It avoids the risks of metadata corruption and the need to
+wait for e2fsck to complete after a crash, without requiring a change
+to the on-disk ext2 layout. In a nutshell, the journal is a regular
+file which stores whole metadata (and optionally data) blocks that have
+been modified, prior to writing them into the filesystem. This means
+it is possible to add a journal to an existing ext2 filesystem without
+the need for data conversion.
+
+When changes to the filesystem (e.g. a file is renamed) they are stored in
+a transaction in the journal and can either be complete or incomplete at
+the time of a crash. If a transaction is complete at the time of a crash
+(or in the normal case where the system does not crash), then any blocks
+in that transaction are guaranteed to represent a valid filesystem state,
+and are copied into the filesystem. If a transaction is incomplete at
+the time of the crash, then there is no guarantee of consistency for
+the blocks in that transaction so they are discarded (which means any
+filesystem changes they represent are also lost).
+Check Documentation/filesystems/ext3.txt if you want to read more about
+ext3 and journaling.
+
+References
+==========
+
+The kernel source file:/usr/src/linux/fs/ext2/
+e2fsprogs (e2fsck) http://e2fsprogs.sourceforge.net/
+Design & Implementation http://e2fsprogs.sourceforge.net/ext2intro.html
+Journaling (ext3) ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/
+Filesystem Resizing http://ext2resize.sourceforge.net/
+Compression (*) http://e2compr.sourceforge.net/
+
+Implementations for:
+Windows 95/98/NT/2000 http://www.chrysocome.net/explore2fs
+Windows 95 (*) http://www.yipton.net/content.html#FSDEXT2
+DOS client (*) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
+OS/2 (+) ftp://metalab.unc.edu/pub/Linux/system/filesystems/ext2/
+RISC OS client http://www.esw-heim.tu-clausthal.de/~marco/smorbrod/IscaFS/
+
+(*) no longer actively developed/supported (as of Apr 2001)
+(+) no longer actively developed/supported (as of Mar 2009)
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
new file mode 100644
index 000000000..7ed0d17d6
--- /dev/null
+++ b/Documentation/filesystems/ext3.txt
@@ -0,0 +1,215 @@
+
+Ext3 Filesystem
+===============
+
+Ext3 was originally released in September 1999. Written by Stephen Tweedie
+for the 2.2 branch, and ported to 2.4 kernels by Peter Braam, Andreas Dilger,
+Andrew Morton, Alexander Viro, Ted Ts'o and Stephen Tweedie.
+
+Ext3 is the ext2 filesystem enhanced with journalling capabilities.
+
+Options
+=======
+
+When mounting an ext3 filesystem, the following option are accepted:
+(*) == default
+
+ro Mount filesystem read only. Note that ext3 will replay
+ the journal (and thus write to the partition) even when
+ mounted "read only". Mount options "ro,noload" can be
+ used to prevent writes to the filesystem.
+
+journal=update Update the ext3 file system's journal to the current
+ format.
+
+journal=inum When a journal already exists, this option is ignored.
+ Otherwise, it specifies the number of the inode which
+ will represent the ext3 file system's journal file.
+
+journal_path=path
+journal_dev=devnum When the external journal device's major/minor numbers
+ have changed, these options allow the user to specify
+ the new journal location. The journal device is
+ identified through either its new major/minor numbers
+ encoded in devnum, or via a path to the device.
+
+norecovery Don't load the journal on mounting. Note that this forces
+noload mount of inconsistent filesystem, which can lead to
+ various problems.
+
+data=journal All data are committed into the journal prior to being
+ written into the main file system.
+
+data=ordered (*) All data are forced directly out to the main file
+ system prior to its metadata being committed to the
+ journal.
+
+data=writeback Data ordering is not preserved, data may be written
+ into the main file system after its metadata has been
+ committed to the journal.
+
+commit=nrsec (*) Ext3 can be told to sync all its data and metadata
+ every 'nrsec' seconds. The default value is 5 seconds.
+ This means that if you lose your power, you will lose
+ as much as the latest 5 seconds of work (your
+ filesystem will not be damaged though, thanks to the
+ journaling). This default value (or any low value)
+ will hurt performance, but it's good for data-safety.
+ Setting it to 0 will have the same effect as leaving
+ it at the default (5 seconds).
+ Setting it to very large values will improve
+ performance.
+
+barrier=<0|1(*)> This enables/disables the use of write barriers in
+barrier (*) the jbd code. barrier=0 disables, barrier=1 enables.
+nobarrier This also requires an IO stack which can support
+ barriers, and if jbd gets an error on a barrier
+ write, it will disable again with a warning.
+ Write barriers enforce proper on-disk ordering
+ of journal commits, making volatile disk write caches
+ safe to use, at some performance penalty. If
+ your disks are battery-backed in one way or another,
+ disabling barriers may safely improve performance.
+ The mount options "barrier" and "nobarrier" can
+ also be used to enable or disable barriers, for
+ consistency with other ext3 mount options.
+
+user_xattr Enables Extended User Attributes. Additionally, you
+ need to have extended attribute support enabled in the
+ kernel configuration (CONFIG_EXT3_FS_XATTR). See the
+ attr(5) manual page and http://acl.bestbits.at/ to
+ learn more about extended attributes.
+
+nouser_xattr Disables Extended User Attributes.
+
+acl Enables POSIX Access Control Lists support.
+ Additionally, you need to have ACL support enabled in
+ the kernel configuration (CONFIG_EXT3_FS_POSIX_ACL).
+ See the acl(5) manual page and http://acl.bestbits.at/
+ for more information.
+
+noacl This option disables POSIX Access Control List
+ support.
+
+reservation
+
+noreservation
+
+bsddf (*) Make 'df' act like BSD.
+minixdf Make 'df' act like Minix.
+
+check=none Don't do extra checking of bitmaps on mount.
+nocheck
+
+debug Extra debugging information is sent to syslog.
+
+errors=remount-ro Remount the filesystem read-only on an error.
+errors=continue Keep going on a filesystem error.
+errors=panic Panic and halt the machine if an error occurs.
+ (These mount options override the errors behavior
+ specified in the superblock, which can be
+ configured using tune2fs.)
+
+data_err=ignore(*) Just print an error message if an error occurs
+ in a file data buffer in ordered mode.
+data_err=abort Abort the journal if an error occurs in a file
+ data buffer in ordered mode.
+
+grpid Give objects the same group ID as their creator.
+bsdgroups
+
+nogrpid (*) New objects have the group ID of their creator.
+sysvgroups
+
+resgid=n The group ID which may use the reserved blocks.
+
+resuid=n The user ID which may use the reserved blocks.
+
+sb=n Use alternate superblock at this location.
+
+quota These options are ignored by the filesystem. They
+noquota are used only by quota tools to recognize volumes
+grpquota where quota should be turned on. See documentation
+usrquota in the quota-tools package for more details
+ (http://sourceforge.net/projects/linuxquota).
+
+jqfmt=<quota type> These options tell filesystem details about quota
+usrjquota=<file> so that quota information can be properly updated
+grpjquota=<file> during journal replay. They replace the above
+ quota options. See documentation in the quota-tools
+ package for more details
+ (http://sourceforge.net/projects/linuxquota).
+
+Specification
+=============
+Ext3 shares all disk implementation with the ext2 filesystem, and adds
+transactions capabilities to ext2. Journaling is done by the Journaling Block
+Device layer.
+
+Journaling Block Device layer
+-----------------------------
+The Journaling Block Device layer (JBD) isn't ext3 specific. It was designed
+to add journaling capabilities to a block device. The ext3 filesystem code
+will inform the JBD of modifications it is performing (called a transaction).
+The journal supports the transactions start and stop, and in case of a crash,
+the journal can replay the transactions to quickly put the partition back into
+a consistent state.
+
+Handles represent a single atomic update to a filesystem. JBD can handle an
+external journal on a block device.
+
+Data Mode
+---------
+There are 3 different data modes:
+
+* writeback mode
+In data=writeback mode, ext3 does not journal data at all. This mode provides
+a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
+mode - metadata journaling. A crash+recovery can cause incorrect data to
+appear in files which were written shortly before the crash. This mode will
+typically provide the best ext3 performance.
+
+* ordered mode
+In data=ordered mode, ext3 only officially journals metadata, but it logically
+groups metadata and data blocks into a single unit called a transaction. When
+it's time to write the new metadata out to disk, the associated data blocks
+are written first. In general, this mode performs slightly slower than
+writeback but significantly faster than journal mode.
+
+* journal mode
+data=journal mode provides full data and metadata journaling. All new data is
+written to the journal first, and then to its final location.
+In the event of a crash, the journal can be replayed, bringing both data and
+metadata into a consistent state. This mode is the slowest except when data
+needs to be read from and written to disk at the same time where it
+outperforms all other modes.
+
+Compatibility
+-------------
+
+Ext2 partitions can be easily convert to ext3, with `tune2fs -j <dev>`.
+Ext3 is fully compatible with Ext2. Ext3 partitions can easily be mounted as
+Ext2.
+
+
+External Tools
+==============
+See manual pages to learn more.
+
+tune2fs: create a ext3 journal on a ext2 partition with the -j flag.
+mke2fs: create a ext3 partition with the -j flag.
+debugfs: ext2 and ext3 file system debugger.
+ext2online: online (mounted) ext2 and ext3 filesystem resizer
+
+
+References
+==========
+
+kernel source: <file:fs/ext3/>
+ <file:fs/jbd/>
+
+programs: http://e2fsprogs.sourceforge.net/
+ http://ext2resize.sourceforge.net
+
+useful links: http://www.ibm.com/developerworks/library/l-fs7/index.html
+ http://www.ibm.com/developerworks/library/l-fs8/index.html
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
new file mode 100644
index 000000000..6c0108eb0
--- /dev/null
+++ b/Documentation/filesystems/ext4.txt
@@ -0,0 +1,629 @@
+
+Ext4 Filesystem
+===============
+
+Ext4 is an advanced level of the ext3 filesystem which incorporates
+scalability and reliability enhancements for supporting large filesystems
+(64 bit) in keeping with increasing disk capacities and state-of-the-art
+feature requirements.
+
+Mailing list: linux-ext4@vger.kernel.org
+Web site: http://ext4.wiki.kernel.org
+
+
+1. Quick usage instructions:
+===========================
+
+Note: More extensive information for getting started with ext4 can be
+ found at the ext4 wiki site at the URL:
+ http://ext4.wiki.kernel.org/index.php/Ext4_Howto
+
+ - Compile and install the latest version of e2fsprogs (as of this
+ writing version 1.41.3) from:
+
+ http://sourceforge.net/project/showfiles.php?group_id=2406
+
+ or
+
+ ftp://ftp.kernel.org/pub/linux/kernel/people/tytso/e2fsprogs/
+
+ or grab the latest git repository from:
+
+ git://git.kernel.org/pub/scm/fs/ext2/e2fsprogs.git
+
+ - Note that it is highly important to install the mke2fs.conf file
+ that comes with the e2fsprogs 1.41.x sources in /etc/mke2fs.conf. If
+ you have edited the /etc/mke2fs.conf file installed on your system,
+ you will need to merge your changes with the version from e2fsprogs
+ 1.41.x.
+
+ - Create a new filesystem using the ext4 filesystem type:
+
+ # mke2fs -t ext4 /dev/hda1
+
+ Or to configure an existing ext3 filesystem to support extents:
+
+ # tune2fs -O extents /dev/hda1
+
+ If the filesystem was created with 128 byte inodes, it can be
+ converted to use 256 byte for greater efficiency via:
+
+ # tune2fs -I 256 /dev/hda1
+
+ (Note: we currently do not have tools to convert an ext4
+ filesystem back to ext3; so please do not do try this on production
+ filesystems.)
+
+ - Mounting:
+
+ # mount -t ext4 /dev/hda1 /wherever
+
+ - When comparing performance with other filesystems, it's always
+ important to try multiple workloads; very often a subtle change in a
+ workload parameter can completely change the ranking of which
+ filesystems do well compared to others. When comparing versus ext3,
+ note that ext4 enables write barriers by default, while ext3 does
+ not enable write barriers by default. So it is useful to use
+ explicitly specify whether barriers are enabled or not when via the
+ '-o barriers=[0|1]' mount option for both ext3 and ext4 filesystems
+ for a fair comparison. When tuning ext3 for best benchmark numbers,
+ it is often worthwhile to try changing the data journaling mode; '-o
+ data=writeback' can be faster for some workloads. (Note however that
+ running mounted with data=writeback can potentially leave stale data
+ exposed in recently written files in case of an unclean shutdown,
+ which could be a security exposure in some situations.) Configuring
+ the filesystem with a large journal can also be helpful for
+ metadata-intensive workloads.
+
+2. Features
+===========
+
+2.1 Currently available
+
+* ability to use filesystems > 16TB (e2fsprogs support not available yet)
+* extent format reduces metadata overhead (RAM, IO for access, transactions)
+* extent format more robust in face of on-disk corruption due to magics,
+* internal redundancy in tree
+* improved file allocation (multi-block alloc)
+* lift 32000 subdirectory limit imposed by i_links_count[1]
+* nsec timestamps for mtime, atime, ctime, create time
+* inode version field on disk (NFSv4, Lustre)
+* reduced e2fsck time via uninit_bg feature
+* journal checksumming for robustness, performance
+* persistent file preallocation (e.g for streaming media, databases)
+* ability to pack bitmaps and inode tables into larger virtual groups via the
+ flex_bg feature
+* large file support
+* Inode allocation using large virtual block groups via flex_bg
+* delayed allocation
+* large block (up to pagesize) support
+* efficient new ordered mode in JBD2 and ext4(avoid using buffer head to force
+ the ordering)
+
+[1] Filesystems with a block size of 1k may see a limit imposed by the
+directory hash tree having a maximum depth of two.
+
+2.2 Candidate features for future inclusion
+
+* Online defrag (patches available but not well tested)
+* reduced mke2fs time via lazy itable initialization in conjunction with
+ the uninit_bg feature (capability to do this is available in e2fsprogs
+ but a kernel thread to do lazy zeroing of unused inode table blocks
+ after filesystem is first mounted is required for safety)
+
+There are several others under discussion, whether they all make it in is
+partly a function of how much time everyone has to work on them. Features like
+metadata checksumming have been discussed and planned for a bit but no patches
+exist yet so I'm not sure they're in the near-term roadmap.
+
+The big performance win will come with mballoc, delalloc and flex_bg
+grouping of bitmaps and inode tables. Some test results available here:
+
+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
+ - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
+
+3. Options
+==========
+
+When mounting an ext4 filesystem, the following option are accepted:
+(*) == default
+
+ro Mount filesystem read only. Note that ext4 will
+ replay the journal (and thus write to the
+ partition) even when mounted "read only". The
+ mount options "ro,noload" can be used to prevent
+ writes to the filesystem.
+
+journal_checksum Enable checksumming of the journal transactions.
+ This will allow the recovery code in e2fsck and the
+ kernel to detect corruption in the kernel. It is a
+ compatible change and will be ignored by older kernels.
+
+journal_async_commit Commit block can be written to disk without waiting
+ for descriptor blocks. If enabled older kernels cannot
+ mount the device. This will enable 'journal_checksum'
+ internally.
+
+journal_path=path
+journal_dev=devnum When the external journal device's major/minor numbers
+ have changed, these options allow the user to specify
+ the new journal location. The journal device is
+ identified through either its new major/minor numbers
+ encoded in devnum, or via a path to the device.
+
+norecovery Don't load the journal on mounting. Note that
+noload if the filesystem was not unmounted cleanly,
+ skipping the journal replay will lead to the
+ filesystem containing inconsistencies that can
+ lead to any number of problems.
+
+data=journal All data are committed into the journal prior to being
+ written into the main file system. Enabling
+ this mode will disable delayed allocation and
+ O_DIRECT support.
+
+data=ordered (*) All data are forced directly out to the main file
+ system prior to its metadata being committed to the
+ journal.
+
+data=writeback Data ordering is not preserved, data may be written
+ into the main file system after its metadata has been
+ committed to the journal.
+
+commit=nrsec (*) Ext4 can be told to sync all its data and metadata
+ every 'nrsec' seconds. The default value is 5 seconds.
+ This means that if you lose your power, you will lose
+ as much as the latest 5 seconds of work (your
+ filesystem will not be damaged though, thanks to the
+ journaling). This default value (or any low value)
+ will hurt performance, but it's good for data-safety.
+ Setting it to 0 will have the same effect as leaving
+ it at the default (5 seconds).
+ Setting it to very large values will improve
+ performance.
+
+barrier=<0|1(*)> This enables/disables the use of write barriers in
+barrier(*) the jbd code. barrier=0 disables, barrier=1 enables.
+nobarrier This also requires an IO stack which can support
+ barriers, and if jbd gets an error on a barrier
+ write, it will disable again with a warning.
+ Write barriers enforce proper on-disk ordering
+ of journal commits, making volatile disk write caches
+ safe to use, at some performance penalty. If
+ your disks are battery-backed in one way or another,
+ disabling barriers may safely improve performance.
+ The mount options "barrier" and "nobarrier" can
+ also be used to enable or disable barriers, for
+ consistency with other ext4 mount options.
+
+inode_readahead_blks=n This tuning parameter controls the maximum
+ number of inode table blocks that ext4's inode
+ table readahead algorithm will pre-read into
+ the buffer cache. The default value is 32 blocks.
+
+nouser_xattr Disables Extended User Attributes. See the
+ attr(5) manual page and http://acl.bestbits.at/
+ for more information about extended attributes.
+
+noacl This option disables POSIX Access Control List
+ support. If ACL support is enabled in the kernel
+ configuration (CONFIG_EXT4_FS_POSIX_ACL), ACL is
+ enabled by default on mount. See the acl(5) manual
+ page and http://acl.bestbits.at/ for more information
+ about acl.
+
+bsddf (*) Make 'df' act like BSD.
+minixdf Make 'df' act like Minix.
+
+debug Extra debugging information is sent to syslog.
+
+abort Simulate the effects of calling ext4_abort() for
+ debugging purposes. This is normally used while
+ remounting a filesystem which is already mounted.
+
+errors=remount-ro Remount the filesystem read-only on an error.
+errors=continue Keep going on a filesystem error.
+errors=panic Panic and halt the machine if an error occurs.
+ (These mount options override the errors behavior
+ specified in the superblock, which can be configured
+ using tune2fs)
+
+data_err=ignore(*) Just print an error message if an error occurs
+ in a file data buffer in ordered mode.
+data_err=abort Abort the journal if an error occurs in a file
+ data buffer in ordered mode.
+
+grpid Give objects the same group ID as their creator.
+bsdgroups
+
+nogrpid (*) New objects have the group ID of their creator.
+sysvgroups
+
+resgid=n The group ID which may use the reserved blocks.
+
+resuid=n The user ID which may use the reserved blocks.
+
+sb=n Use alternate superblock at this location.
+
+quota These options are ignored by the filesystem. They
+noquota are used only by quota tools to recognize volumes
+grpquota where quota should be turned on. See documentation
+usrquota in the quota-tools package for more details
+ (http://sourceforge.net/projects/linuxquota).
+
+jqfmt=<quota type> These options tell filesystem details about quota
+usrjquota=<file> so that quota information can be properly updated
+grpjquota=<file> during journal replay. They replace the above
+ quota options. See documentation in the quota-tools
+ package for more details
+ (http://sourceforge.net/projects/linuxquota).
+
+stripe=n Number of filesystem blocks that mballoc will try
+ to use for allocation size and alignment. For RAID5/6
+ systems this should be the number of data
+ disks * RAID chunk size in file system blocks.
+
+delalloc (*) Defer block allocation until just before ext4
+ writes out the block(s) in question. This
+ allows ext4 to better allocation decisions
+ more efficiently.
+nodelalloc Disable delayed allocation. Blocks are allocated
+ when the data is copied from userspace to the
+ page cache, either via the write(2) system call
+ or when an mmap'ed page which was previously
+ unallocated is written for the first time.
+
+max_batch_time=usec Maximum amount of time ext4 should wait for
+ additional filesystem operations to be batch
+ together with a synchronous write operation.
+ Since a synchronous write operation is going to
+ force a commit and then a wait for the I/O
+ complete, it doesn't cost much, and can be a
+ huge throughput win, we wait for a small amount
+ of time to see if any other transactions can
+ piggyback on the synchronous write. The
+ algorithm used is designed to automatically tune
+ for the speed of the disk, by measuring the
+ amount of time (on average) that it takes to
+ finish committing a transaction. Call this time
+ the "commit time". If the time that the
+ transaction has been running is less than the
+ commit time, ext4 will try sleeping for the
+ commit time to see if other operations will join
+ the transaction. The commit time is capped by
+ the max_batch_time, which defaults to 15000us
+ (15ms). This optimization can be turned off
+ entirely by setting max_batch_time to 0.
+
+min_batch_time=usec This parameter sets the commit time (as
+ described above) to be at least min_batch_time.
+ It defaults to zero microseconds. Increasing
+ this parameter may improve the throughput of
+ multi-threaded, synchronous workloads on very
+ fast disks, at the cost of increasing latency.
+
+journal_ioprio=prio The I/O priority (from 0 to 7, where 0 is the
+ highest priority) which should be used for I/O
+ operations submitted by kjournald2 during a
+ commit operation. This defaults to 3, which is
+ a slightly higher priority than the default I/O
+ priority.
+
+auto_da_alloc(*) Many broken applications don't use fsync() when
+noauto_da_alloc replacing existing files via patterns such as
+ fd = open("foo.new")/write(fd,..)/close(fd)/
+ rename("foo.new", "foo"), or worse yet,
+ fd = open("foo", O_TRUNC)/write(fd,..)/close(fd).
+ If auto_da_alloc is enabled, ext4 will detect
+ the replace-via-rename and replace-via-truncate
+ patterns and force that any delayed allocation
+ blocks are allocated such that at the next
+ journal commit, in the default data=ordered
+ mode, the data blocks of the new file are forced
+ to disk before the rename() operation is
+ committed. This provides roughly the same level
+ of guarantees as ext3, and avoids the
+ "zero-length" problem that can happen when a
+ system crashes before the delayed allocation
+ blocks are forced to disk.
+
+noinit_itable Do not initialize any uninitialized inode table
+ blocks in the background. This feature may be
+ used by installation CD's so that the install
+ process can complete as quickly as possible; the
+ inode table initialization process would then be
+ deferred until the next time the file system
+ is unmounted.
+
+init_itable=n The lazy itable init code will wait n times the
+ number of milliseconds it took to zero out the
+ previous block group's inode table. This
+ minimizes the impact on the system performance
+ while file system's inode table is being initialized.
+
+discard Controls whether ext4 should issue discard/TRIM
+nodiscard(*) commands to the underlying block device when
+ blocks are freed. This is useful for SSD devices
+ and sparse/thinly-provisioned LUNs, but it is off
+ by default until sufficient testing has been done.
+
+nouid32 Disables 32-bit UIDs and GIDs. This is for
+ interoperability with older kernels which only
+ store and expect 16-bit values.
+
+block_validity This options allows to enables/disables the in-kernel
+noblock_validity facility for tracking filesystem metadata blocks
+ within internal data structures. This allows multi-
+ block allocator and other routines to quickly locate
+ extents which might overlap with filesystem metadata
+ blocks. This option is intended for debugging
+ purposes and since it negatively affects the
+ performance, it is off by default.
+
+dioread_lock Controls whether or not ext4 should use the DIO read
+dioread_nolock locking. If the dioread_nolock option is specified
+ ext4 will allocate uninitialized extent before buffer
+ write and convert the extent to initialized after IO
+ completes. This approach allows ext4 code to avoid
+ using inode mutex, which improves scalability on high
+ speed storages. However this does not work with
+ data journaling and dioread_nolock option will be
+ ignored with kernel warning. Note that dioread_nolock
+ code path is only used for extent-based files.
+ Because of the restrictions this options comprises
+ it is off by default (e.g. dioread_lock).
+
+max_dir_size_kb=n This limits the size of directories so that any
+ attempt to expand them beyond the specified
+ limit in kilobytes will cause an ENOSPC error.
+ This is useful in memory constrained
+ environments, where a very large directory can
+ cause severe performance problems or even
+ provoke the Out Of Memory killer. (For example,
+ if there is only 512mb memory available, a 176mb
+ directory may seriously cramp the system's style.)
+
+i_version Enable 64-bit inode version support. This option is
+ off by default.
+
+dax Use direct access (no page cache). See
+ Documentation/filesystems/dax.txt. Note that
+ this option is incompatible with data=journal.
+
+Data Mode
+=========
+There are 3 different data modes:
+
+* writeback mode
+In data=writeback mode, ext4 does not journal data at all. This mode provides
+a similar level of journaling as that of XFS, JFS, and ReiserFS in its default
+mode - metadata journaling. A crash+recovery can cause incorrect data to
+appear in files which were written shortly before the crash. This mode will
+typically provide the best ext4 performance.
+
+* ordered mode
+In data=ordered mode, ext4 only officially journals metadata, but it logically
+groups metadata information related to data changes with the data blocks into a
+single unit called a transaction. When it's time to write the new metadata
+out to disk, the associated data blocks are written first. In general,
+this mode performs slightly slower than writeback but significantly faster than journal mode.
+
+* journal mode
+data=journal mode provides full data and metadata journaling. All new data is
+written to the journal first, and then to its final location.
+In the event of a crash, the journal can be replayed, bringing both data and
+metadata into a consistent state. This mode is the slowest except when data
+needs to be read from and written to disk at the same time where it
+outperforms all others modes. Enabling this mode will disable delayed
+allocation and O_DIRECT support.
+
+/proc entries
+=============
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4. Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0). The files in each per-device directory are shown
+in table below.
+
+Files in /proc/fs/ext4/<devname>
+..............................................................................
+ File Content
+ mb_groups details of multiblock allocator buddy cache of free blocks
+..............................................................................
+
+/sys entries
+============
+
+Information about mounted ext4 file systems can be found in
+/sys/fs/ext4. Each mounted filesystem will have a directory in
+/sys/fs/ext4 based on its device name (i.e., /sys/fs/ext4/hdc or
+/sys/fs/ext4/dm-0). The files in each per-device directory are shown
+in table below.
+
+Files in /sys/fs/ext4/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-ext4)
+..............................................................................
+ File Content
+
+ delayed_allocation_blocks This file is read-only and shows the number of
+ blocks that are dirty in the page cache, but
+ which do not have their location in the
+ filesystem allocated yet.
+
+ inode_goal Tuning parameter which (if non-zero) controls
+ the goal inode used by the inode allocator in
+ preference to all other allocation heuristics.
+ This is intended for debugging use only, and
+ should be 0 on production systems.
+
+ inode_readahead_blks Tuning parameter which controls the maximum
+ number of inode table blocks that ext4's inode
+ table readahead algorithm will pre-read into
+ the buffer cache
+
+ lifetime_write_kbytes This file is read-only and shows the number of
+ kilobytes of data that have been written to this
+ filesystem since it was created.
+
+ max_writeback_mb_bump The maximum number of megabytes the writeback
+ code will try to write out before move on to
+ another inode.
+
+ mb_group_prealloc The multiblock allocator will round up allocation
+ requests to a multiple of this tuning parameter if
+ the stripe size is not set in the ext4 superblock
+
+ mb_max_to_scan The maximum number of extents the multiblock
+ allocator will search to find the best extent
+
+ mb_min_to_scan The minimum number of extents the multiblock
+ allocator will search to find the best extent
+
+ mb_order2_req Tuning parameter which controls the minimum size
+ for requests (as a power of 2) where the buddy
+ cache is used
+
+ mb_stats Controls whether the multiblock allocator should
+ collect statistics, which are shown during the
+ unmount. 1 means to collect statistics, 0 means
+ not to collect statistics
+
+ mb_stream_req Files which have fewer blocks than this tunable
+ parameter will have their blocks allocated out
+ of a block group specific preallocation pool, so
+ that small files are packed closely together.
+ Each large file will have its blocks allocated
+ out of its own unique preallocation pool.
+
+ session_write_kbytes This file is read-only and shows the number of
+ kilobytes of data that have been written to this
+ filesystem since it was mounted.
+
+ reserved_clusters This is RW file and contains number of reserved
+ clusters in the file system which will be used
+ in the specific situations to avoid costly
+ zeroout, unexpected ENOSPC, or possible data
+ loss. The default is 2% or 4096 clusters,
+ whichever is smaller and this can be changed
+ however it can never exceed number of clusters
+ in the file system. If there is not enough space
+ for the reserved space when mounting the file
+ mount will _not_ fail.
+..............................................................................
+
+Ioctls
+======
+
+There is some Ext4 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all Ext4 specific ioctls are
+shown in the table below.
+
+Table of Ext4 specific ioctls
+..............................................................................
+ Ioctl Description
+ EXT4_IOC_GETFLAGS Get additional attributes associated with inode.
+ The ioctl argument is an integer bitfield, with
+ bit values described in ext4.h. This ioctl is an
+ alias for FS_IOC_GETFLAGS.
+
+ EXT4_IOC_SETFLAGS Set additional attributes associated with inode.
+ The ioctl argument is an integer bitfield, with
+ bit values described in ext4.h. This ioctl is an
+ alias for FS_IOC_SETFLAGS.
+
+ EXT4_IOC_GETVERSION
+ EXT4_IOC_GETVERSION_OLD
+ Get the inode i_generation number stored for
+ each inode. The i_generation number is normally
+ changed only when new inode is created and it is
+ particularly useful for network filesystems. The
+ '_OLD' version of this ioctl is an alias for
+ FS_IOC_GETVERSION.
+
+ EXT4_IOC_SETVERSION
+ EXT4_IOC_SETVERSION_OLD
+ Set the inode i_generation number stored for
+ each inode. The '_OLD' version of this ioctl
+ is an alias for FS_IOC_SETVERSION.
+
+ EXT4_IOC_GROUP_EXTEND This ioctl has the same purpose as the resize
+ mount option. It allows to resize filesystem
+ to the end of the last existing block group,
+ further resize has to be done with resize2fs,
+ either online, or offline. The argument points
+ to the unsigned logn number representing the
+ filesystem new block count.
+
+ EXT4_IOC_MOVE_EXT Move the block extents from orig_fd (the one
+ this ioctl is pointing to) to the donor_fd (the
+ one specified in move_extent structure passed
+ as an argument to this ioctl). Then, exchange
+ inode metadata between orig_fd and donor_fd.
+ This is especially useful for online
+ defragmentation, because the allocator has the
+ opportunity to allocate moved blocks better,
+ ideally into one contiguous extent.
+
+ EXT4_IOC_GROUP_ADD Add a new group descriptor to an existing or
+ new group descriptor block. The new group
+ descriptor is described by ext4_new_group_input
+ structure, which is passed as an argument to
+ this ioctl. This is especially useful in
+ conjunction with EXT4_IOC_GROUP_EXTEND,
+ which allows online resize of the filesystem
+ to the end of the last existing block group.
+ Those two ioctls combined is used in userspace
+ online resize tool (e.g. resize2fs).
+
+ EXT4_IOC_MIGRATE This ioctl operates on the filesystem itself.
+ It converts (migrates) ext3 indirect block mapped
+ inode to ext4 extent mapped inode by walking
+ through indirect block mapping of the original
+ inode and converting contiguous block ranges
+ into ext4 extents of the temporary inode. Then,
+ inodes are swapped. This ioctl might help, when
+ migrating from ext3 to ext4 filesystem, however
+ suggestion is to create fresh ext4 filesystem
+ and copy data from the backup. Note, that
+ filesystem has to support extents for this ioctl
+ to work.
+
+ EXT4_IOC_ALLOC_DA_BLKS Force all of the delay allocated blocks to be
+ allocated to preserve application-expected ext3
+ behaviour. Note that this will also start
+ triggering a write of the data blocks, but this
+ behaviour may change in the future as it is
+ not necessary and has been done this way only
+ for sake of simplicity.
+
+ EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number
+ of blocks of resized filesystem is passed in via
+ 64 bit integer argument. The kernel allocates
+ bitmaps and inode table, the userspace tool thus
+ just passes the new number of blocks.
+
+EXT4_IOC_SWAP_BOOT Swap i_blocks and associated attributes
+ (like i_blocks, i_size, i_flags, ...) from
+ the specified inode with inode
+ EXT4_BOOT_LOADER_INO (#5). This is typically
+ used to store a boot loader in a secure part of
+ the filesystem, where it can't be changed by a
+ normal user by accident.
+ The data blocks of the previous boot loader
+ will be associated with the given inode.
+
+..............................................................................
+
+References
+==========
+
+kernel source: <file:fs/ext4/>
+ <file:fs/jbd2/>
+
+programs: http://e2fsprogs.sourceforge.net/
+
+useful links: http://fedoraproject.org/wiki/ext3-devel
+ http://www.bullopensource.org/ext4/
+ http://ext4.wiki.kernel.org/index.php/Main_Page
+ http://fedoraproject.org/wiki/Features/Ext4
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt
new file mode 100644
index 000000000..e9e750e59
--- /dev/null
+++ b/Documentation/filesystems/f2fs.txt
@@ -0,0 +1,576 @@
+================================================================================
+WHAT IS Flash-Friendly File System (F2FS)?
+================================================================================
+
+NAND flash memory-based storage devices, such as SSD, eMMC, and SD cards, have
+been equipped on a variety systems ranging from mobile to server systems. Since
+they are known to have different characteristics from the conventional rotating
+disks, a file system, an upper layer to the storage device, should adapt to the
+changes from the sketch in the design level.
+
+F2FS is a file system exploiting NAND flash memory-based storage devices, which
+is based on Log-structured File System (LFS). The design has been focused on
+addressing the fundamental issues in LFS, which are snowball effect of wandering
+tree and high cleaning overhead.
+
+Since a NAND flash memory-based storage device shows different characteristic
+according to its internal geometry or flash memory management scheme, namely FTL,
+F2FS and its tools support various parameters not only for configuring on-disk
+layout, but also for selecting allocation and cleaning algorithms.
+
+The following git tree provides the file system formatting tool (mkfs.f2fs),
+a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
+>> git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
+
+For reporting bugs and sending patches, please use the following mailing list:
+>> linux-f2fs-devel@lists.sourceforge.net
+
+================================================================================
+BACKGROUND AND DESIGN ISSUES
+================================================================================
+
+Log-structured File System (LFS)
+--------------------------------
+"A log-structured file system writes all modifications to disk sequentially in
+a log-like structure, thereby speeding up both file writing and crash recovery.
+The log is the only structure on disk; it contains indexing information so that
+files can be read back from the log efficiently. In order to maintain large free
+areas on disk for fast writing, we divide the log into segments and use a
+segment cleaner to compress the live information from heavily fragmented
+segments." from Rosenblum, M. and Ousterhout, J. K., 1992, "The design and
+implementation of a log-structured file system", ACM Trans. Computer Systems
+10, 1, 26–52.
+
+Wandering Tree Problem
+----------------------
+In LFS, when a file data is updated and written to the end of log, its direct
+pointer block is updated due to the changed location. Then the indirect pointer
+block is also updated due to the direct pointer block update. In this manner,
+the upper index structures such as inode, inode map, and checkpoint block are
+also updated recursively. This problem is called as wandering tree problem [1],
+and in order to enhance the performance, it should eliminate or relax the update
+propagation as much as possible.
+
+[1] Bityutskiy, A. 2005. JFFS3 design issues. http://www.linux-mtd.infradead.org/
+
+Cleaning Overhead
+-----------------
+Since LFS is based on out-of-place writes, it produces so many obsolete blocks
+scattered across the whole storage. In order to serve new empty log space, it
+needs to reclaim these obsolete blocks seamlessly to users. This job is called
+as a cleaning process.
+
+The process consists of three operations as follows.
+1. A victim segment is selected through referencing segment usage table.
+2. It loads parent index structures of all the data in the victim identified by
+ segment summary blocks.
+3. It checks the cross-reference between the data and its parent index structure.
+4. It moves valid data selectively.
+
+This cleaning job may cause unexpected long delays, so the most important goal
+is to hide the latencies to users. And also definitely, it should reduce the
+amount of valid data to be moved, and move them quickly as well.
+
+================================================================================
+KEY FEATURES
+================================================================================
+
+Flash Awareness
+---------------
+- Enlarge the random write area for better performance, but provide the high
+ spatial locality
+- Align FS data structures to the operational units in FTL as best efforts
+
+Wandering Tree Problem
+----------------------
+- Use a term, “node”, that represents inodes as well as various pointer blocks
+- Introduce Node Address Table (NAT) containing the locations of all the “node”
+ blocks; this will cut off the update propagation.
+
+Cleaning Overhead
+-----------------
+- Support a background cleaning process
+- Support greedy and cost-benefit algorithms for victim selection policies
+- Support multi-head logs for static/dynamic hot and cold data separation
+- Introduce adaptive logging for efficient block allocation
+
+================================================================================
+MOUNT OPTIONS
+================================================================================
+
+background_gc=%s Turn on/off cleaning operations, namely garbage
+ collection, triggered in background when I/O subsystem is
+ idle. If background_gc=on, it will turn on the garbage
+ collection and if background_gc=off, garbage collection
+ will be truned off.
+ Default value for this option is on. So garbage
+ collection is on by default.
+disable_roll_forward Disable the roll-forward recovery routine
+norecovery Disable the roll-forward recovery routine, mounted read-
+ only (i.e., -o ro,disable_roll_forward)
+discard Issue discard/TRIM commands when a segment is cleaned.
+no_heap Disable heap-style segment allocation which finds free
+ segments for data from the beginning of main area, while
+ for node from the end of main area.
+nouser_xattr Disable Extended User Attributes. Note: xattr is enabled
+ by default if CONFIG_F2FS_FS_XATTR is selected.
+noacl Disable POSIX Access Control List. Note: acl is enabled
+ by default if CONFIG_F2FS_FS_POSIX_ACL is selected.
+active_logs=%u Support configuring the number of active logs. In the
+ current design, f2fs supports only 2, 4, and 6 logs.
+ Default number is 6.
+disable_ext_identify Disable the extension list configured by mkfs, so f2fs
+ does not aware of cold files such as media files.
+inline_xattr Enable the inline xattrs feature.
+inline_data Enable the inline data feature: New created small(<~3.4k)
+ files can be written into inode block.
+inline_dentry Enable the inline dir feature: data in new created
+ directory entries can be written into inode block. The
+ space of inode block which is used to store inline
+ dentries is limited to ~3.4k.
+flush_merge Merge concurrent cache_flush commands as much as possible
+ to eliminate redundant command issues. If the underlying
+ device handles the cache_flush command relatively slowly,
+ recommend to enable this option.
+nobarrier This option can be used if underlying storage guarantees
+ its cached data should be written to the novolatile area.
+ If this option is set, no cache_flush commands are issued
+ but f2fs still guarantees the write ordering of all the
+ data writes.
+fastboot This option is used when a system wants to reduce mount
+ time as much as possible, even though normal performance
+ can be sacrificed.
+extent_cache Enable an extent cache based on rb-tree, it can cache
+ as many as extent which map between contiguous logical
+ address and physical address per inode, resulting in
+ increasing the cache hit ratio.
+noinline_data Disable the inline data feature, inline data feature is
+ enabled by default.
+
+================================================================================
+DEBUGFS ENTRIES
+================================================================================
+
+/sys/kernel/debug/f2fs/ contains information about all the partitions mounted as
+f2fs. Each file shows the whole f2fs information.
+
+/sys/kernel/debug/f2fs/status includes:
+ - major file system information managed by f2fs currently
+ - average SIT information about whole segments
+ - current memory footprint consumed by f2fs.
+
+================================================================================
+SYSFS ENTRIES
+================================================================================
+
+Information about mounted f2f2 file systems can be found in
+/sys/fs/f2fs. Each mounted filesystem will have a directory in
+/sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
+The files in each per-device directory are shown in table below.
+
+Files in /sys/fs/f2fs/<devname>
+(see also Documentation/ABI/testing/sysfs-fs-f2fs)
+..............................................................................
+ File Content
+
+ gc_max_sleep_time This tuning parameter controls the maximum sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_min_sleep_time This tuning parameter controls the minimum sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_no_gc_sleep_time This tuning parameter controls the default sleep
+ time for the garbage collection thread. Time is
+ in milliseconds.
+
+ gc_idle This parameter controls the selection of victim
+ policy for garbage collection. Setting gc_idle = 0
+ (default) will disable this option. Setting
+ gc_idle = 1 will select the Cost Benefit approach
+ & setting gc_idle = 2 will select the greedy aproach.
+
+ reclaim_segments This parameter controls the number of prefree
+ segments to be reclaimed. If the number of prefree
+ segments is larger than the number of segments
+ in the proportion to the percentage over total
+ volume size, f2fs tries to conduct checkpoint to
+ reclaim the prefree segments to free segments.
+ By default, 5% over total # of segments.
+
+ max_small_discards This parameter controls the number of discard
+ commands that consist small blocks less than 2MB.
+ The candidates to be discarded are cached until
+ checkpoint is triggered, and issued during the
+ checkpoint. By default, it is disabled with 0.
+
+ trim_sections This parameter controls the number of sections
+ to be trimmed out in batch mode when FITRIM
+ conducts. 32 sections is set by default.
+
+ ipu_policy This parameter controls the policy of in-place
+ updates in f2fs. There are five policies:
+ 0x01: F2FS_IPU_FORCE, 0x02: F2FS_IPU_SSR,
+ 0x04: F2FS_IPU_UTIL, 0x08: F2FS_IPU_SSR_UTIL,
+ 0x10: F2FS_IPU_FSYNC.
+
+ min_ipu_util This parameter controls the threshold to trigger
+ in-place-updates. The number indicates percentage
+ of the filesystem utilization, and used by
+ F2FS_IPU_UTIL and F2FS_IPU_SSR_UTIL policies.
+
+ min_fsync_blocks This parameter controls the threshold to trigger
+ in-place-updates when F2FS_IPU_FSYNC mode is set.
+ The number indicates the number of dirty pages
+ when fsync needs to flush on its call path. If
+ the number is less than this value, it triggers
+ in-place-updates.
+
+ max_victim_search This parameter controls the number of trials to
+ find a victim segment when conducting SSR and
+ cleaning operations. The default value is 4096
+ which covers 8GB block address range.
+
+ dir_level This parameter controls the directory level to
+ support large directory. If a directory has a
+ number of files, it can reduce the file lookup
+ latency by increasing this dir_level value.
+ Otherwise, it needs to decrease this value to
+ reduce the space overhead. The default value is 0.
+
+ ram_thresh This parameter controls the memory footprint used
+ by free nids and cached nat entries. By default,
+ 10 is set, which indicates 10 MB / 1 GB RAM.
+
+================================================================================
+USAGE
+================================================================================
+
+1. Download userland tools and compile them.
+
+2. Skip, if f2fs was compiled statically inside kernel.
+ Otherwise, insert the f2fs.ko module.
+ # insmod f2fs.ko
+
+3. Create a directory trying to mount
+ # mkdir /mnt/f2fs
+
+4. Format the block device, and then mount as f2fs
+ # mkfs.f2fs -l label /dev/block_device
+ # mount -t f2fs /dev/block_device /mnt/f2fs
+
+mkfs.f2fs
+---------
+The mkfs.f2fs is for the use of formatting a partition as the f2fs filesystem,
+which builds a basic on-disk layout.
+
+The options consist of:
+-l [label] : Give a volume label, up to 512 unicode name.
+-a [0 or 1] : Split start location of each area for heap-based allocation.
+ 1 is set by default, which performs this.
+-o [int] : Set overprovision ratio in percent over volume size.
+ 5 is set by default.
+-s [int] : Set the number of segments per section.
+ 1 is set by default.
+-z [int] : Set the number of sections per zone.
+ 1 is set by default.
+-e [str] : Set basic extension list. e.g. "mp3,gif,mov"
+-t [0 or 1] : Disable discard command or not.
+ 1 is set by default, which conducts discard.
+
+fsck.f2fs
+---------
+The fsck.f2fs is a tool to check the consistency of an f2fs-formatted
+partition, which examines whether the filesystem metadata and user-made data
+are cross-referenced correctly or not.
+Note that, initial version of the tool does not fix any inconsistency.
+
+The options consist of:
+ -d debug level [default:0]
+
+dump.f2fs
+---------
+The dump.f2fs shows the information of specific inode and dumps SSA and SIT to
+file. Each file is dump_ssa and dump_sit.
+
+The dump.f2fs is used to debug on-disk data structures of the f2fs filesystem.
+It shows on-disk inode information reconized by a given inode number, and is
+able to dump all the SSA and SIT entries into predefined files, ./dump_ssa and
+./dump_sit respectively.
+
+The options consist of:
+ -d debug level [default:0]
+ -i inode no (hex)
+ -s [SIT dump segno from #1~#2 (decimal), for all 0~-1]
+ -a [SSA dump segno from #1~#2 (decimal), for all 0~-1]
+
+Examples:
+# dump.f2fs -i [ino] /dev/sdx
+# dump.f2fs -s 0~-1 /dev/sdx (SIT dump)
+# dump.f2fs -a 0~-1 /dev/sdx (SSA dump)
+
+================================================================================
+DESIGN
+================================================================================
+
+On-disk Layout
+--------------
+
+F2FS divides the whole volume into a number of segments, each of which is fixed
+to 2MB in size. A section is composed of consecutive segments, and a zone
+consists of a set of sections. By default, section and zone sizes are set to one
+segment size identically, but users can easily modify the sizes by mkfs.
+
+F2FS splits the entire volume into six areas, and all the areas except superblock
+consists of multiple segments as described below.
+
+ align with the zone size <-|
+ |-> align with the segment size
+ _________________________________________________________________________
+ | | | Segment | Node | Segment | |
+ | Superblock | Checkpoint | Info. | Address | Summary | Main |
+ | (SB) | (CP) | Table (SIT) | Table (NAT) | Area (SSA) | |
+ |____________|_____2______|______N______|______N______|______N_____|__N___|
+ . .
+ . .
+ . .
+ ._________________________________________.
+ |_Segment_|_..._|_Segment_|_..._|_Segment_|
+ . .
+ ._________._________
+ |_section_|__...__|_
+ . .
+ .________.
+ |__zone__|
+
+- Superblock (SB)
+ : It is located at the beginning of the partition, and there exist two copies
+ to avoid file system crash. It contains basic partition information and some
+ default parameters of f2fs.
+
+- Checkpoint (CP)
+ : It contains file system information, bitmaps for valid NAT/SIT sets, orphan
+ inode lists, and summary entries of current active segments.
+
+- Segment Information Table (SIT)
+ : It contains segment information such as valid block count and bitmap for the
+ validity of all the blocks.
+
+- Node Address Table (NAT)
+ : It is composed of a block address table for all the node blocks stored in
+ Main area.
+
+- Segment Summary Area (SSA)
+ : It contains summary entries which contains the owner information of all the
+ data and node blocks stored in Main area.
+
+- Main Area
+ : It contains file and directory data including their indices.
+
+In order to avoid misalignment between file system and flash-based storage, F2FS
+aligns the start block address of CP with the segment size. Also, it aligns the
+start block address of Main area with the zone size by reserving some segments
+in SSA area.
+
+Reference the following survey for additional technical details.
+https://wiki.linaro.org/WorkingGroups/Kernel/Projects/FlashCardSurvey
+
+File System Metadata Structure
+------------------------------
+
+F2FS adopts the checkpointing scheme to maintain file system consistency. At
+mount time, F2FS first tries to find the last valid checkpoint data by scanning
+CP area. In order to reduce the scanning time, F2FS uses only two copies of CP.
+One of them always indicates the last valid data, which is called as shadow copy
+mechanism. In addition to CP, NAT and SIT also adopt the shadow copy mechanism.
+
+For file system consistency, each CP points to which NAT and SIT copies are
+valid, as shown as below.
+
+ +--------+----------+---------+
+ | CP | SIT | NAT |
+ +--------+----------+---------+
+ . . . .
+ . . . .
+ . . . .
+ +-------+-------+--------+--------+--------+--------+
+ | CP #0 | CP #1 | SIT #0 | SIT #1 | NAT #0 | NAT #1 |
+ +-------+-------+--------+--------+--------+--------+
+ | ^ ^
+ | | |
+ `----------------------------------------'
+
+Index Structure
+---------------
+
+The key data structure to manage the data locations is a "node". Similar to
+traditional file structures, F2FS has three types of node: inode, direct node,
+indirect node. F2FS assigns 4KB to an inode block which contains 923 data block
+indices, two direct node pointers, two indirect node pointers, and one double
+indirect node pointer as described below. One direct node block contains 1018
+data blocks, and one indirect node block contains also 1018 node blocks. Thus,
+one inode block (i.e., a file) covers:
+
+ 4KB * (923 + 2 * 1018 + 2 * 1018 * 1018 + 1018 * 1018 * 1018) := 3.94TB.
+
+ Inode block (4KB)
+ |- data (923)
+ |- direct node (2)
+ | `- data (1018)
+ |- indirect node (2)
+ | `- direct node (1018)
+ | `- data (1018)
+ `- double indirect node (1)
+ `- indirect node (1018)
+ `- direct node (1018)
+ `- data (1018)
+
+Note that, all the node blocks are mapped by NAT which means the location of
+each node is translated by the NAT table. In the consideration of the wandering
+tree problem, F2FS is able to cut off the propagation of node updates caused by
+leaf data writes.
+
+Directory Structure
+-------------------
+
+A directory entry occupies 11 bytes, which consists of the following attributes.
+
+- hash hash value of the file name
+- ino inode number
+- len the length of file name
+- type file type such as directory, symlink, etc
+
+A dentry block consists of 214 dentry slots and file names. Therein a bitmap is
+used to represent whether each dentry is valid or not. A dentry block occupies
+4KB with the following composition.
+
+ Dentry Block(4 K) = bitmap (27 bytes) + reserved (3 bytes) +
+ dentries(11 * 214 bytes) + file name (8 * 214 bytes)
+
+ [Bucket]
+ +--------------------------------+
+ |dentry block 1 | dentry block 2 |
+ +--------------------------------+
+ . .
+ . .
+ . [Dentry Block Structure: 4KB] .
+ +--------+----------+----------+------------+
+ | bitmap | reserved | dentries | file names |
+ +--------+----------+----------+------------+
+ [Dentry Block: 4KB] . .
+ . .
+ . .
+ +------+------+-----+------+
+ | hash | ino | len | type |
+ +------+------+-----+------+
+ [Dentry Structure: 11 bytes]
+
+F2FS implements multi-level hash tables for directory structure. Each level has
+a hash table with dedicated number of hash buckets as shown below. Note that
+"A(2B)" means a bucket includes 2 data blocks.
+
+----------------------
+A : bucket
+B : block
+N : MAX_DIR_HASH_DEPTH
+----------------------
+
+level #0 | A(2B)
+ |
+level #1 | A(2B) - A(2B)
+ |
+level #2 | A(2B) - A(2B) - A(2B) - A(2B)
+ . | . . . .
+level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B)
+ . | . . . .
+level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B)
+
+The number of blocks and buckets are determined by,
+
+ ,- 2, if n < MAX_DIR_HASH_DEPTH / 2,
+ # of blocks in level #n = |
+ `- 4, Otherwise
+
+ ,- 2^(n + dir_level),
+ | if n + dir_level < MAX_DIR_HASH_DEPTH / 2,
+ # of buckets in level #n = |
+ `- 2^((MAX_DIR_HASH_DEPTH / 2) - 1),
+ Otherwise
+
+When F2FS finds a file name in a directory, at first a hash value of the file
+name is calculated. Then, F2FS scans the hash table in level #0 to find the
+dentry consisting of the file name and its inode number. If not found, F2FS
+scans the next hash table in level #1. In this way, F2FS scans hash tables in
+each levels incrementally from 1 to N. In each levels F2FS needs to scan only
+one bucket determined by the following equation, which shows O(log(# of files))
+complexity.
+
+ bucket number to scan in level #n = (hash value) % (# of buckets in level #n)
+
+In the case of file creation, F2FS finds empty consecutive slots that cover the
+file name. F2FS searches the empty slots in the hash tables of whole levels from
+1 to N in the same way as the lookup operation.
+
+The following figure shows an example of two cases holding children.
+ --------------> Dir <--------------
+ | |
+ child child
+
+ child - child [hole] - child
+
+ child - child - child [hole] - [hole] - child
+
+ Case 1: Case 2:
+ Number of children = 6, Number of children = 3,
+ File size = 7 File size = 7
+
+Default Block Allocation
+------------------------
+
+At runtime, F2FS manages six active logs inside "Main" area: Hot/Warm/Cold node
+and Hot/Warm/Cold data.
+
+- Hot node contains direct node blocks of directories.
+- Warm node contains direct node blocks except hot node blocks.
+- Cold node contains indirect node blocks
+- Hot data contains dentry blocks
+- Warm data contains data blocks except hot and cold data blocks
+- Cold data contains multimedia data or migrated data blocks
+
+LFS has two schemes for free space management: threaded log and copy-and-compac-
+tion. The copy-and-compaction scheme which is known as cleaning, is well-suited
+for devices showing very good sequential write performance, since free segments
+are served all the time for writing new data. However, it suffers from cleaning
+overhead under high utilization. Contrarily, the threaded log scheme suffers
+from random writes, but no cleaning process is needed. F2FS adopts a hybrid
+scheme where the copy-and-compaction scheme is adopted by default, but the
+policy is dynamically changed to the threaded log scheme according to the file
+system status.
+
+In order to align F2FS with underlying flash-based storage, F2FS allocates a
+segment in a unit of section. F2FS expects that the section size would be the
+same as the unit size of garbage collection in FTL. Furthermore, with respect
+to the mapping granularity in FTL, F2FS allocates each section of the active
+logs from different zones as much as possible, since FTL can write the data in
+the active logs into one allocation unit according to its mapping granularity.
+
+Cleaning process
+----------------
+
+F2FS does cleaning both on demand and in the background. On-demand cleaning is
+triggered when there are not enough free segments to serve VFS calls. Background
+cleaner is operated by a kernel thread, and triggers the cleaning job when the
+system is idle.
+
+F2FS supports two victim selection policies: greedy and cost-benefit algorithms.
+In the greedy algorithm, F2FS selects a victim segment having the smallest number
+of valid blocks. In the cost-benefit algorithm, F2FS selects a victim segment
+according to the segment age and the number of valid blocks in order to address
+log block thrashing problem in the greedy algorithm. F2FS adopts the greedy
+algorithm for on-demand cleaner, while background cleaner adopts cost-benefit
+algorithm.
+
+In order to identify whether the data in the victim segment are valid or not,
+F2FS manages a bitmap. Each bit represents the validity of a block, and the
+bitmap is composed of a bit stream covering whole blocks in main area.
diff --git a/Documentation/filesystems/fiemap.txt b/Documentation/filesystems/fiemap.txt
new file mode 100644
index 000000000..f6d9c9910
--- /dev/null
+++ b/Documentation/filesystems/fiemap.txt
@@ -0,0 +1,229 @@
+============
+Fiemap Ioctl
+============
+
+The fiemap ioctl is an efficient method for userspace to get file
+extent mappings. Instead of block-by-block mapping (such as bmap), fiemap
+returns a list of extents.
+
+
+Request Basics
+--------------
+
+A fiemap request is encoded within struct fiemap:
+
+struct fiemap {
+ __u64 fm_start; /* logical offset (inclusive) at
+ * which to start mapping (in) */
+ __u64 fm_length; /* logical length of mapping which
+ * userspace cares about (in) */
+ __u32 fm_flags; /* FIEMAP_FLAG_* flags for request (in/out) */
+ __u32 fm_mapped_extents; /* number of extents that were
+ * mapped (out) */
+ __u32 fm_extent_count; /* size of fm_extents array (in) */
+ __u32 fm_reserved;
+ struct fiemap_extent fm_extents[0]; /* array of mapped extents (out) */
+};
+
+
+fm_start, and fm_length specify the logical range within the file
+which the process would like mappings for. Extents returned mirror
+those on disk - that is, the logical offset of the 1st returned extent
+may start before fm_start, and the range covered by the last returned
+extent may end after fm_length. All offsets and lengths are in bytes.
+
+Certain flags to modify the way in which mappings are looked up can be
+set in fm_flags. If the kernel doesn't understand some particular
+flags, it will return EBADR and the contents of fm_flags will contain
+the set of flags which caused the error. If the kernel is compatible
+with all flags passed, the contents of fm_flags will be unmodified.
+It is up to userspace to determine whether rejection of a particular
+flag is fatal to its operation. This scheme is intended to allow the
+fiemap interface to grow in the future but without losing
+compatibility with old software.
+
+fm_extent_count specifies the number of elements in the fm_extents[] array
+that can be used to return extents. If fm_extent_count is zero, then the
+fm_extents[] array is ignored (no extents will be returned), and the
+fm_mapped_extents count will hold the number of extents needed in
+fm_extents[] to hold the file's current mapping. Note that there is
+nothing to prevent the file from changing between calls to FIEMAP.
+
+The following flags can be set in fm_flags:
+
+* FIEMAP_FLAG_SYNC
+If this flag is set, the kernel will sync the file before mapping extents.
+
+* FIEMAP_FLAG_XATTR
+If this flag is set, the extents returned will describe the inodes
+extended attribute lookup tree, instead of its data tree.
+
+
+Extent Mapping
+--------------
+
+Extent information is returned within the embedded fm_extents array
+which userspace must allocate along with the fiemap structure. The
+number of elements in the fiemap_extents[] array should be passed via
+fm_extent_count. The number of extents mapped by kernel will be
+returned via fm_mapped_extents. If the number of fiemap_extents
+allocated is less than would be required to map the requested range,
+the maximum number of extents that can be mapped in the fm_extent[]
+array will be returned and fm_mapped_extents will be equal to
+fm_extent_count. In that case, the last extent in the array will not
+complete the requested range and will not have the FIEMAP_EXTENT_LAST
+flag set (see the next section on extent flags).
+
+Each extent is described by a single fiemap_extent structure as
+returned in fm_extents.
+
+struct fiemap_extent {
+ __u64 fe_logical; /* logical offset in bytes for the start of
+ * the extent */
+ __u64 fe_physical; /* physical offset in bytes for the start
+ * of the extent */
+ __u64 fe_length; /* length in bytes for the extent */
+ __u64 fe_reserved64[2];
+ __u32 fe_flags; /* FIEMAP_EXTENT_* flags for this extent */
+ __u32 fe_reserved[3];
+};
+
+All offsets and lengths are in bytes and mirror those on disk. It is valid
+for an extents logical offset to start before the request or its logical
+length to extend past the request. Unless FIEMAP_EXTENT_NOT_ALIGNED is
+returned, fe_logical, fe_physical, and fe_length will be aligned to the
+block size of the file system. With the exception of extents flagged as
+FIEMAP_EXTENT_MERGED, adjacent extents will not be merged.
+
+The fe_flags field contains flags which describe the extent returned.
+A special flag, FIEMAP_EXTENT_LAST is always set on the last extent in
+the file so that the process making fiemap calls can determine when no
+more extents are available, without having to call the ioctl again.
+
+Some flags are intentionally vague and will always be set in the
+presence of other more specific flags. This way a program looking for
+a general property does not have to know all existing and future flags
+which imply that property.
+
+For example, if FIEMAP_EXTENT_DATA_INLINE or FIEMAP_EXTENT_DATA_TAIL
+are set, FIEMAP_EXTENT_NOT_ALIGNED will also be set. A program looking
+for inline or tail-packed data can key on the specific flag. Software
+which simply cares not to try operating on non-aligned extents
+however, can just key on FIEMAP_EXTENT_NOT_ALIGNED, and not have to
+worry about all present and future flags which might imply unaligned
+data. Note that the opposite is not true - it would be valid for
+FIEMAP_EXTENT_NOT_ALIGNED to appear alone.
+
+* FIEMAP_EXTENT_LAST
+This is the last extent in the file. A mapping attempt past this
+extent will return nothing.
+
+* FIEMAP_EXTENT_UNKNOWN
+The location of this extent is currently unknown. This may indicate
+the data is stored on an inaccessible volume or that no storage has
+been allocated for the file yet.
+
+* FIEMAP_EXTENT_DELALLOC
+ - This will also set FIEMAP_EXTENT_UNKNOWN.
+Delayed allocation - while there is data for this extent, its
+physical location has not been allocated yet.
+
+* FIEMAP_EXTENT_ENCODED
+This extent does not consist of plain filesystem blocks but is
+encoded (e.g. encrypted or compressed). Reading the data in this
+extent via I/O to the block device will have undefined results.
+
+Note that it is *always* undefined to try to update the data
+in-place by writing to the indicated location without the
+assistance of the filesystem, or to access the data using the
+information returned by the FIEMAP interface while the filesystem
+is mounted. In other words, user applications may only read the
+extent data via I/O to the block device while the filesystem is
+unmounted, and then only if the FIEMAP_EXTENT_ENCODED flag is
+clear; user applications must not try reading or writing to the
+filesystem via the block device under any other circumstances.
+
+* FIEMAP_EXTENT_DATA_ENCRYPTED
+ - This will also set FIEMAP_EXTENT_ENCODED
+The data in this extent has been encrypted by the file system.
+
+* FIEMAP_EXTENT_NOT_ALIGNED
+Extent offsets and length are not guaranteed to be block aligned.
+
+* FIEMAP_EXTENT_DATA_INLINE
+ This will also set FIEMAP_EXTENT_NOT_ALIGNED
+Data is located within a meta data block.
+
+* FIEMAP_EXTENT_DATA_TAIL
+ This will also set FIEMAP_EXTENT_NOT_ALIGNED
+Data is packed into a block with data from other files.
+
+* FIEMAP_EXTENT_UNWRITTEN
+Unwritten extent - the extent is allocated but its data has not been
+initialized. This indicates the extent's data will be all zero if read
+through the filesystem but the contents are undefined if read directly from
+the device.
+
+* FIEMAP_EXTENT_MERGED
+This will be set when a file does not support extents, i.e., it uses a block
+based addressing scheme. Since returning an extent for each block back to
+userspace would be highly inefficient, the kernel will try to merge most
+adjacent blocks into 'extents'.
+
+
+VFS -> File System Implementation
+---------------------------------
+
+File systems wishing to support fiemap must implement a ->fiemap callback on
+their inode_operations structure. The fs ->fiemap call is responsible for
+defining its set of supported fiemap flags, and calling a helper function on
+each discovered extent:
+
+struct inode_operations {
+ ...
+
+ int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
+ u64 len);
+
+->fiemap is passed struct fiemap_extent_info which describes the
+fiemap request:
+
+struct fiemap_extent_info {
+ unsigned int fi_flags; /* Flags as passed from user */
+ unsigned int fi_extents_mapped; /* Number of mapped extents */
+ unsigned int fi_extents_max; /* Size of fiemap_extent array */
+ struct fiemap_extent *fi_extents_start; /* Start of fiemap_extent array */
+};
+
+It is intended that the file system should not need to access any of this
+structure directly. Filesystem handlers should be tolerant to signals and return
+EINTR once fatal signal received.
+
+
+Flag checking should be done at the beginning of the ->fiemap callback via the
+fiemap_check_flags() helper:
+
+int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
+
+The struct fieinfo should be passed in as received from ioctl_fiemap(). The
+set of fiemap flags which the fs understands should be passed via fs_flags. If
+fiemap_check_flags finds invalid user flags, it will place the bad values in
+fieinfo->fi_flags and return -EBADR. If the file system gets -EBADR, from
+fiemap_check_flags(), it should immediately exit, returning that error back to
+ioctl_fiemap().
+
+
+For each extent in the request range, the file system should call
+the helper function, fiemap_fill_next_extent():
+
+int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
+ u64 phys, u64 len, u32 flags, u32 dev);
+
+fiemap_fill_next_extent() will use the passed values to populate the
+next free extent in the fm_extents array. 'General' extent flags will
+automatically be set from specific flags on behalf of the calling file
+system so that the userspace API is not broken.
+
+fiemap_fill_next_extent() returns 0 on success, and 1 when the
+user-supplied fm_extents array is full. If an error is encountered
+while copying the extent to user memory, -EFAULT will be returned.
diff --git a/Documentation/filesystems/files.txt b/Documentation/filesystems/files.txt
new file mode 100644
index 000000000..46dfc6b03
--- /dev/null
+++ b/Documentation/filesystems/files.txt
@@ -0,0 +1,123 @@
+File management in the Linux kernel
+-----------------------------------
+
+This document describes how locking for files (struct file)
+and file descriptor table (struct files) works.
+
+Up until 2.6.12, the file descriptor table has been protected
+with a lock (files->file_lock) and reference count (files->count).
+->file_lock protected accesses to all the file related fields
+of the table. ->count was used for sharing the file descriptor
+table between tasks cloned with CLONE_FILES flag. Typically
+this would be the case for posix threads. As with the common
+refcounting model in the kernel, the last task doing
+a put_files_struct() frees the file descriptor (fd) table.
+The files (struct file) themselves are protected using
+reference count (->f_count).
+
+In the new lock-free model of file descriptor management,
+the reference counting is similar, but the locking is
+based on RCU. The file descriptor table contains multiple
+elements - the fd sets (open_fds and close_on_exec, the
+array of file pointers, the sizes of the sets and the array
+etc.). In order for the updates to appear atomic to
+a lock-free reader, all the elements of the file descriptor
+table are in a separate structure - struct fdtable.
+files_struct contains a pointer to struct fdtable through
+which the actual fd table is accessed. Initially the
+fdtable is embedded in files_struct itself. On a subsequent
+expansion of fdtable, a new fdtable structure is allocated
+and files->fdtab points to the new structure. The fdtable
+structure is freed with RCU and lock-free readers either
+see the old fdtable or the new fdtable making the update
+appear atomic. Here are the locking rules for
+the fdtable structure -
+
+1. All references to the fdtable must be done through
+ the files_fdtable() macro :
+
+ struct fdtable *fdt;
+
+ rcu_read_lock();
+
+ fdt = files_fdtable(files);
+ ....
+ if (n <= fdt->max_fds)
+ ....
+ ...
+ rcu_read_unlock();
+
+ files_fdtable() uses rcu_dereference() macro which takes care of
+ the memory barrier requirements for lock-free dereference.
+ The fdtable pointer must be read within the read-side
+ critical section.
+
+2. Reading of the fdtable as described above must be protected
+ by rcu_read_lock()/rcu_read_unlock().
+
+3. For any update to the fd table, files->file_lock must
+ be held.
+
+4. To look up the file structure given an fd, a reader
+ must use either fcheck() or fcheck_files() APIs. These
+ take care of barrier requirements due to lock-free lookup.
+ An example :
+
+ struct file *file;
+
+ rcu_read_lock();
+ file = fcheck(fd);
+ if (file) {
+ ...
+ }
+ ....
+ rcu_read_unlock();
+
+5. Handling of the file structures is special. Since the look-up
+ of the fd (fget()/fget_light()) are lock-free, it is possible
+ that look-up may race with the last put() operation on the
+ file structure. This is avoided using atomic_long_inc_not_zero()
+ on ->f_count :
+
+ rcu_read_lock();
+ file = fcheck_files(files, fd);
+ if (file) {
+ if (atomic_long_inc_not_zero(&file->f_count))
+ *fput_needed = 1;
+ else
+ /* Didn't get the reference, someone's freed */
+ file = NULL;
+ }
+ rcu_read_unlock();
+ ....
+ return file;
+
+ atomic_long_inc_not_zero() detects if refcounts is already zero or
+ goes to zero during increment. If it does, we fail
+ fget()/fget_light().
+
+6. Since both fdtable and file structures can be looked up
+ lock-free, they must be installed using rcu_assign_pointer()
+ API. If they are looked up lock-free, rcu_dereference()
+ must be used. However it is advisable to use files_fdtable()
+ and fcheck()/fcheck_files() which take care of these issues.
+
+7. While updating, the fdtable pointer must be looked up while
+ holding files->file_lock. If ->file_lock is dropped, then
+ another thread expand the files thereby creating a new
+ fdtable and making the earlier fdtable pointer stale.
+ For example :
+
+ spin_lock(&files->file_lock);
+ fd = locate_fd(files, file, start);
+ if (fd >= 0) {
+ /* locate_fd() may have expanded fdtable, load the ptr */
+ fdt = files_fdtable(files);
+ __set_open_fd(fd, fdt);
+ __clear_close_on_exec(fd, fdt);
+ spin_unlock(&files->file_lock);
+ .....
+
+ Since locate_fd() can drop ->file_lock (and reacquire ->file_lock),
+ the fdtable pointer (fdt) must be loaded after locate_fd().
+
diff --git a/Documentation/filesystems/fuse.txt b/Documentation/filesystems/fuse.txt
new file mode 100644
index 000000000..13af4a49e
--- /dev/null
+++ b/Documentation/filesystems/fuse.txt
@@ -0,0 +1,423 @@
+Definitions
+~~~~~~~~~~~
+
+Userspace filesystem:
+
+ A filesystem in which data and metadata are provided by an ordinary
+ userspace process. The filesystem can be accessed normally through
+ the kernel interface.
+
+Filesystem daemon:
+
+ The process(es) providing the data and metadata of the filesystem.
+
+Non-privileged mount (or user mount):
+
+ A userspace filesystem mounted by a non-privileged (non-root) user.
+ The filesystem daemon is running with the privileges of the mounting
+ user. NOTE: this is not the same as mounts allowed with the "user"
+ option in /etc/fstab, which is not discussed here.
+
+Filesystem connection:
+
+ A connection between the filesystem daemon and the kernel. The
+ connection exists until either the daemon dies, or the filesystem is
+ umounted. Note that detaching (or lazy umounting) the filesystem
+ does _not_ break the connection, in this case it will exist until
+ the last reference to the filesystem is released.
+
+Mount owner:
+
+ The user who does the mounting.
+
+User:
+
+ The user who is performing filesystem operations.
+
+What is FUSE?
+~~~~~~~~~~~~~
+
+FUSE is a userspace filesystem framework. It consists of a kernel
+module (fuse.ko), a userspace library (libfuse.*) and a mount utility
+(fusermount).
+
+One of the most important features of FUSE is allowing secure,
+non-privileged mounts. This opens up new possibilities for the use of
+filesystems. A good example is sshfs: a secure network filesystem
+using the sftp protocol.
+
+The userspace library and utilities are available from the FUSE
+homepage:
+
+ http://fuse.sourceforge.net/
+
+Filesystem type
+~~~~~~~~~~~~~~~
+
+The filesystem type given to mount(2) can be one of the following:
+
+'fuse'
+
+ This is the usual way to mount a FUSE filesystem. The first
+ argument of the mount system call may contain an arbitrary string,
+ which is not interpreted by the kernel.
+
+'fuseblk'
+
+ The filesystem is block device based. The first argument of the
+ mount system call is interpreted as the name of the device.
+
+Mount options
+~~~~~~~~~~~~~
+
+'fd=N'
+
+ The file descriptor to use for communication between the userspace
+ filesystem and the kernel. The file descriptor must have been
+ obtained by opening the FUSE device ('/dev/fuse').
+
+'rootmode=M'
+
+ The file mode of the filesystem's root in octal representation.
+
+'user_id=N'
+
+ The numeric user id of the mount owner.
+
+'group_id=N'
+
+ The numeric group id of the mount owner.
+
+'default_permissions'
+
+ By default FUSE doesn't check file access permissions, the
+ filesystem is free to implement its access policy or leave it to
+ the underlying file access mechanism (e.g. in case of network
+ filesystems). This option enables permission checking, restricting
+ access based on file mode. It is usually useful together with the
+ 'allow_other' mount option.
+
+'allow_other'
+
+ This option overrides the security measure restricting file access
+ to the user mounting the filesystem. This option is by default only
+ allowed to root, but this restriction can be removed with a
+ (userspace) configuration option.
+
+'max_read=N'
+
+ With this option the maximum size of read operations can be set.
+ The default is infinite. Note that the size of read requests is
+ limited anyway to 32 pages (which is 128kbyte on i386).
+
+'blksize=N'
+
+ Set the block size for the filesystem. The default is 512. This
+ option is only valid for 'fuseblk' type mounts.
+
+Control filesystem
+~~~~~~~~~~~~~~~~~~
+
+There's a control filesystem for FUSE, which can be mounted by:
+
+ mount -t fusectl none /sys/fs/fuse/connections
+
+Mounting it under the '/sys/fs/fuse/connections' directory makes it
+backwards compatible with earlier versions.
+
+Under the fuse control filesystem each connection has a directory
+named by a unique number.
+
+For each connection the following files exist within this directory:
+
+ 'waiting'
+
+ The number of requests which are waiting to be transferred to
+ userspace or being processed by the filesystem daemon. If there is
+ no filesystem activity and 'waiting' is non-zero, then the
+ filesystem is hung or deadlocked.
+
+ 'abort'
+
+ Writing anything into this file will abort the filesystem
+ connection. This means that all waiting requests will be aborted an
+ error returned for all aborted and new requests.
+
+Only the owner of the mount may read or write these files.
+
+Interrupting filesystem operations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a process issuing a FUSE filesystem request is interrupted, the
+following will happen:
+
+ 1) If the request is not yet sent to userspace AND the signal is
+ fatal (SIGKILL or unhandled fatal signal), then the request is
+ dequeued and returns immediately.
+
+ 2) If the request is not yet sent to userspace AND the signal is not
+ fatal, then an 'interrupted' flag is set for the request. When
+ the request has been successfully transferred to userspace and
+ this flag is set, an INTERRUPT request is queued.
+
+ 3) If the request is already sent to userspace, then an INTERRUPT
+ request is queued.
+
+INTERRUPT requests take precedence over other requests, so the
+userspace filesystem will receive queued INTERRUPTs before any others.
+
+The userspace filesystem may ignore the INTERRUPT requests entirely,
+or may honor them by sending a reply to the _original_ request, with
+the error set to EINTR.
+
+It is also possible that there's a race between processing the
+original request and its INTERRUPT request. There are two possibilities:
+
+ 1) The INTERRUPT request is processed before the original request is
+ processed
+
+ 2) The INTERRUPT request is processed after the original request has
+ been answered
+
+If the filesystem cannot find the original request, it should wait for
+some timeout and/or a number of new requests to arrive, after which it
+should reply to the INTERRUPT request with an EAGAIN error. In case
+1) the INTERRUPT request will be requeued. In case 2) the INTERRUPT
+reply will be ignored.
+
+Aborting a filesystem connection
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is possible to get into certain situations where the filesystem is
+not responding. Reasons for this may be:
+
+ a) Broken userspace filesystem implementation
+
+ b) Network connection down
+
+ c) Accidental deadlock
+
+ d) Malicious deadlock
+
+(For more on c) and d) see later sections)
+
+In either of these cases it may be useful to abort the connection to
+the filesystem. There are several ways to do this:
+
+ - Kill the filesystem daemon. Works in case of a) and b)
+
+ - Kill the filesystem daemon and all users of the filesystem. Works
+ in all cases except some malicious deadlocks
+
+ - Use forced umount (umount -f). Works in all cases but only if
+ filesystem is still attached (it hasn't been lazy unmounted)
+
+ - Abort filesystem through the FUSE control filesystem. Most
+ powerful method, always works.
+
+How do non-privileged mounts work?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Since the mount() system call is a privileged operation, a helper
+program (fusermount) is needed, which is installed setuid root.
+
+The implication of providing non-privileged mounts is that the mount
+owner must not be able to use this capability to compromise the
+system. Obvious requirements arising from this are:
+
+ A) mount owner should not be able to get elevated privileges with the
+ help of the mounted filesystem
+
+ B) mount owner should not get illegitimate access to information from
+ other users' and the super user's processes
+
+ C) mount owner should not be able to induce undesired behavior in
+ other users' or the super user's processes
+
+How are requirements fulfilled?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ A) The mount owner could gain elevated privileges by either:
+
+ 1) creating a filesystem containing a device file, then opening
+ this device
+
+ 2) creating a filesystem containing a suid or sgid application,
+ then executing this application
+
+ The solution is not to allow opening device files and ignore
+ setuid and setgid bits when executing programs. To ensure this
+ fusermount always adds "nosuid" and "nodev" to the mount options
+ for non-privileged mounts.
+
+ B) If another user is accessing files or directories in the
+ filesystem, the filesystem daemon serving requests can record the
+ exact sequence and timing of operations performed. This
+ information is otherwise inaccessible to the mount owner, so this
+ counts as an information leak.
+
+ The solution to this problem will be presented in point 2) of C).
+
+ C) There are several ways in which the mount owner can induce
+ undesired behavior in other users' processes, such as:
+
+ 1) mounting a filesystem over a file or directory which the mount
+ owner could otherwise not be able to modify (or could only
+ make limited modifications).
+
+ This is solved in fusermount, by checking the access
+ permissions on the mountpoint and only allowing the mount if
+ the mount owner can do unlimited modification (has write
+ access to the mountpoint, and mountpoint is not a "sticky"
+ directory)
+
+ 2) Even if 1) is solved the mount owner can change the behavior
+ of other users' processes.
+
+ i) It can slow down or indefinitely delay the execution of a
+ filesystem operation creating a DoS against the user or the
+ whole system. For example a suid application locking a
+ system file, and then accessing a file on the mount owner's
+ filesystem could be stopped, and thus causing the system
+ file to be locked forever.
+
+ ii) It can present files or directories of unlimited length, or
+ directory structures of unlimited depth, possibly causing a
+ system process to eat up diskspace, memory or other
+ resources, again causing DoS.
+
+ The solution to this as well as B) is not to allow processes
+ to access the filesystem, which could otherwise not be
+ monitored or manipulated by the mount owner. Since if the
+ mount owner can ptrace a process, it can do all of the above
+ without using a FUSE mount, the same criteria as used in
+ ptrace can be used to check if a process is allowed to access
+ the filesystem or not.
+
+ Note that the ptrace check is not strictly necessary to
+ prevent B/2/i, it is enough to check if mount owner has enough
+ privilege to send signal to the process accessing the
+ filesystem, since SIGSTOP can be used to get a similar effect.
+
+I think these limitations are unacceptable?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If a sysadmin trusts the users enough, or can ensure through other
+measures, that system processes will never enter non-privileged
+mounts, it can relax the last limitation with a "user_allow_other"
+config option. If this config option is set, the mounting user can
+add the "allow_other" mount option which disables the check for other
+users' processes.
+
+Kernel - userspace interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following diagram shows how a filesystem operation (in this
+example unlink) is performed in FUSE.
+
+NOTE: everything in this description is greatly simplified
+
+ | "rm /mnt/fuse/file" | FUSE filesystem daemon
+ | |
+ | | >sys_read()
+ | | >fuse_dev_read()
+ | | >request_wait()
+ | | [sleep on fc->waitq]
+ | |
+ | >sys_unlink() |
+ | >fuse_unlink() |
+ | [get request from |
+ | fc->unused_list] |
+ | >request_send() |
+ | [queue req on fc->pending] |
+ | [wake up fc->waitq] | [woken up]
+ | >request_wait_answer() |
+ | [sleep on req->waitq] |
+ | | <request_wait()
+ | | [remove req from fc->pending]
+ | | [copy req to read buffer]
+ | | [add req to fc->processing]
+ | | <fuse_dev_read()
+ | | <sys_read()
+ | |
+ | | [perform unlink]
+ | |
+ | | >sys_write()
+ | | >fuse_dev_write()
+ | | [look up req in fc->processing]
+ | | [remove from fc->processing]
+ | | [copy write buffer to req]
+ | [woken up] | [wake up req->waitq]
+ | | <fuse_dev_write()
+ | | <sys_write()
+ | <request_wait_answer() |
+ | <request_send() |
+ | [add request to |
+ | fc->unused_list] |
+ | <fuse_unlink() |
+ | <sys_unlink() |
+
+There are a couple of ways in which to deadlock a FUSE filesystem.
+Since we are talking about unprivileged userspace programs,
+something must be done about these.
+
+Scenario 1 - Simple deadlock
+-----------------------------
+
+ | "rm /mnt/fuse/file" | FUSE filesystem daemon
+ | |
+ | >sys_unlink("/mnt/fuse/file") |
+ | [acquire inode semaphore |
+ | for "file"] |
+ | >fuse_unlink() |
+ | [sleep on req->waitq] |
+ | | <sys_read()
+ | | >sys_unlink("/mnt/fuse/file")
+ | | [acquire inode semaphore
+ | | for "file"]
+ | | *DEADLOCK*
+
+The solution for this is to allow the filesystem to be aborted.
+
+Scenario 2 - Tricky deadlock
+----------------------------
+
+This one needs a carefully crafted filesystem. It's a variation on
+the above, only the call back to the filesystem is not explicit,
+but is caused by a pagefault.
+
+ | Kamikaze filesystem thread 1 | Kamikaze filesystem thread 2
+ | |
+ | [fd = open("/mnt/fuse/file")] | [request served normally]
+ | [mmap fd to 'addr'] |
+ | [close fd] | [FLUSH triggers 'magic' flag]
+ | [read a byte from addr] |
+ | >do_page_fault() |
+ | [find or create page] |
+ | [lock page] |
+ | >fuse_readpage() |
+ | [queue READ request] |
+ | [sleep on req->waitq] |
+ | | [read request to buffer]
+ | | [create reply header before addr]
+ | | >sys_write(addr - headerlength)
+ | | >fuse_dev_write()
+ | | [look up req in fc->processing]
+ | | [remove from fc->processing]
+ | | [copy write buffer to req]
+ | | >do_page_fault()
+ | | [find or create page]
+ | | [lock page]
+ | | * DEADLOCK *
+
+Solution is basically the same as above.
+
+An additional problem is that while the write buffer is being copied
+to the request, the request must not be interrupted/aborted. This is
+because the destination address of the copy may not be valid after the
+request has returned.
+
+This is solved with doing the copy atomically, and allowing abort
+while the page(s) belonging to the write buffer are faulted with
+get_user_pages(). The 'req->locked' flag indicates when the copy is
+taking place, and abort is delayed until this flag is unset.
diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt
new file mode 100644
index 000000000..fcc79957b
--- /dev/null
+++ b/Documentation/filesystems/gfs2-glocks.txt
@@ -0,0 +1,231 @@
+ Glock internal locking rules
+ ------------------------------
+
+This documents the basic principles of the glock state machine
+internals. Each glock (struct gfs2_glock in fs/gfs2/incore.h)
+has two main (internal) locks:
+
+ 1. A spinlock (gl_spin) which protects the internal state such
+ as gl_state, gl_target and the list of holders (gl_holders)
+ 2. A non-blocking bit lock, GLF_LOCK, which is used to prevent other
+ threads from making calls to the DLM, etc. at the same time. If a
+ thread takes this lock, it must then call run_queue (usually via the
+ workqueue) when it releases it in order to ensure any pending tasks
+ are completed.
+
+The gl_holders list contains all the queued lock requests (not
+just the holders) associated with the glock. If there are any
+held locks, then they will be contiguous entries at the head
+of the list. Locks are granted in strictly the order that they
+are queued, except for those marked LM_FLAG_PRIORITY which are
+used only during recovery, and even then only for journal locks.
+
+There are three lock states that users of the glock layer can request,
+namely shared (SH), deferred (DF) and exclusive (EX). Those translate
+to the following DLM lock modes:
+
+Glock mode | DLM lock mode
+------------------------------
+ UN | IV/NL Unlocked (no DLM lock associated with glock) or NL
+ SH | PR (Protected read)
+ DF | CW (Concurrent write)
+ EX | EX (Exclusive)
+
+Thus DF is basically a shared mode which is incompatible with the "normal"
+shared lock mode, SH. In GFS2 the DF mode is used exclusively for direct I/O
+operations. The glocks are basically a lock plus some routines which deal
+with cache management. The following rules apply for the cache:
+
+Glock mode | Cache data | Cache Metadata | Dirty Data | Dirty Metadata
+--------------------------------------------------------------------------
+ UN | No | No | No | No
+ SH | Yes | Yes | No | No
+ DF | No | Yes | No | No
+ EX | Yes | Yes | Yes | Yes
+
+These rules are implemented using the various glock operations which
+are defined for each type of glock. Not all types of glocks use
+all the modes. Only inode glocks use the DF mode for example.
+
+Table of glock operations and per type constants:
+
+Field | Purpose
+----------------------------------------------------------------------------
+go_xmote_th | Called before remote state change (e.g. to sync dirty data)
+go_xmote_bh | Called after remote state change (e.g. to refill cache)
+go_inval | Called if remote state change requires invalidating the cache
+go_demote_ok | Returns boolean value of whether its ok to demote a glock
+ | (e.g. checks timeout, and that there is no cached data)
+go_lock | Called for the first local holder of a lock
+go_unlock | Called on the final local unlock of a lock
+go_dump | Called to print content of object for debugfs file, or on
+ | error to dump glock to the log.
+go_type | The type of the glock, LM_TYPE_.....
+go_callback | Called if the DLM sends a callback to drop this lock
+go_flags | GLOF_ASPACE is set, if the glock has an address space
+ | associated with it
+
+The minimum hold time for each lock is the time after a remote lock
+grant for which we ignore remote demote requests. This is in order to
+prevent a situation where locks are being bounced around the cluster
+from node to node with none of the nodes making any progress. This
+tends to show up most with shared mmaped files which are being written
+to by multiple nodes. By delaying the demotion in response to a
+remote callback, that gives the userspace program time to make
+some progress before the pages are unmapped.
+
+There is a plan to try and remove the go_lock and go_unlock callbacks
+if possible, in order to try and speed up the fast path though the locking.
+Also, eventually we hope to make the glock "EX" mode locally shared
+such that any local locking will be done with the i_mutex as required
+rather than via the glock.
+
+Locking rules for glock operations:
+
+Operation | GLF_LOCK bit lock held | gl_spin spinlock held
+-----------------------------------------------------------------
+go_xmote_th | Yes | No
+go_xmote_bh | Yes | No
+go_inval | Yes | No
+go_demote_ok | Sometimes | Yes
+go_lock | Yes | No
+go_unlock | Yes | No
+go_dump | Sometimes | Yes
+go_callback | Sometimes (N/A) | Yes
+
+N.B. Operations must not drop either the bit lock or the spinlock
+if its held on entry. go_dump and do_demote_ok must never block.
+Note that go_dump will only be called if the glock's state
+indicates that it is caching uptodate data.
+
+Glock locking order within GFS2:
+
+ 1. i_mutex (if required)
+ 2. Rename glock (for rename only)
+ 3. Inode glock(s)
+ (Parents before children, inodes at "same level" with same parent in
+ lock number order)
+ 4. Rgrp glock(s) (for (de)allocation operations)
+ 5. Transaction glock (via gfs2_trans_begin) for non-read operations
+ 6. Page lock (always last, very important!)
+
+There are two glocks per inode. One deals with access to the inode
+itself (locking order as above), and the other, known as the iopen
+glock is used in conjunction with the i_nlink field in the inode to
+determine the lifetime of the inode in question. Locking of inodes
+is on a per-inode basis. Locking of rgrps is on a per rgrp basis.
+In general we prefer to lock local locks prior to cluster locks.
+
+ Glock Statistics
+ ------------------
+
+The stats are divided into two sets: those relating to the
+super block and those relating to an individual glock. The
+super block stats are done on a per cpu basis in order to
+try and reduce the overhead of gathering them. They are also
+further divided by glock type. All timings are in nanoseconds.
+
+In the case of both the super block and glock statistics,
+the same information is gathered in each case. The super
+block timing statistics are used to provide default values for
+the glock timing statistics, so that newly created glocks
+should have, as far as possible, a sensible starting point.
+The per-glock counters are initialised to zero when the
+glock is created. The per-glock statistics are lost when
+the glock is ejected from memory.
+
+The statistics are divided into three pairs of mean and
+variance, plus two counters. The mean/variance pairs are
+smoothed exponential estimates and the algorithm used is
+one which will be very familiar to those used to calculation
+of round trip times in network code. See "TCP/IP Illustrated,
+Volume 1", W. Richard Stevens, sect 21.3, "Round-Trip Time Measurement",
+p. 299 and onwards. Also, Volume 2, Sect. 25.10, p. 838 and onwards.
+Unlike the TCP/IP Illustrated case, the mean and variance are
+not scaled, but are in units of integer nanoseconds.
+
+The three pairs of mean/variance measure the following
+things:
+
+ 1. DLM lock time (non-blocking requests)
+ 2. DLM lock time (blocking requests)
+ 3. Inter-request time (again to the DLM)
+
+A non-blocking request is one which will complete right
+away, whatever the state of the DLM lock in question. That
+currently means any requests when (a) the current state of
+the lock is exclusive, i.e. a lock demotion (b) the requested
+state is either null or unlocked (again, a demotion) or (c) the
+"try lock" flag is set. A blocking request covers all the other
+lock requests.
+
+There are two counters. The first is there primarily to show
+how many lock requests have been made, and thus how much data
+has gone into the mean/variance calculations. The other counter
+is counting queuing of holders at the top layer of the glock
+code. Hopefully that number will be a lot larger than the number
+of dlm lock requests issued.
+
+So why gather these statistics? There are several reasons
+we'd like to get a better idea of these timings:
+
+1. To be able to better set the glock "min hold time"
+2. To spot performance issues more easily
+3. To improve the algorithm for selecting resource groups for
+allocation (to base it on lock wait time, rather than blindly
+using a "try lock")
+
+Due to the smoothing action of the updates, a step change in
+some input quantity being sampled will only fully be taken
+into account after 8 samples (or 4 for the variance) and this
+needs to be carefully considered when interpreting the
+results.
+
+Knowing both the time it takes a lock request to complete and
+the average time between lock requests for a glock means we
+can compute the total percentage of the time for which the
+node is able to use a glock vs. time that the rest of the
+cluster has its share. That will be very useful when setting
+the lock min hold time.
+
+Great care has been taken to ensure that we
+measure exactly the quantities that we want, as accurately
+as possible. There are always inaccuracies in any
+measuring system, but I hope this is as accurate as we
+can reasonably make it.
+
+Per sb stats can be found here:
+/sys/kernel/debug/gfs2/<fsname>/sbstats
+Per glock stats can be found here:
+/sys/kernel/debug/gfs2/<fsname>/glstats
+
+Assuming that debugfs is mounted on /sys/kernel/debug and also
+that <fsname> is replaced with the name of the gfs2 filesystem
+in question.
+
+The abbreviations used in the output as are follows:
+
+srtt - Smoothed round trip time for non-blocking dlm requests
+srttvar - Variance estimate for srtt
+srttb - Smoothed round trip time for (potentially) blocking dlm requests
+srttvarb - Variance estimate for srttb
+sirt - Smoothed inter-request time (for dlm requests)
+sirtvar - Variance estimate for sirt
+dlm - Number of dlm requests made (dcnt in glstats file)
+queue - Number of glock requests queued (qcnt in glstats file)
+
+The sbstats file contains a set of these stats for each glock type (so 8 lines
+for each type) and for each cpu (one column per cpu). The glstats file contains
+a set of these stats for each glock in a similar format to the glocks file, but
+using the format mean/variance for each of the timing stats.
+
+The gfs2_glock_lock_time tracepoint prints out the current values of the stats
+for the glock in question, along with some addition information on each dlm
+reply that is received:
+
+status - The status of the dlm request
+flags - The dlm request flags
+tdiff - The time taken by this specific request
+(remaining fields as per above list)
+
+
diff --git a/Documentation/filesystems/gfs2-uevents.txt b/Documentation/filesystems/gfs2-uevents.txt
new file mode 100644
index 000000000..19a19ebeb
--- /dev/null
+++ b/Documentation/filesystems/gfs2-uevents.txt
@@ -0,0 +1,100 @@
+ uevents and GFS2
+ ==================
+
+During the lifetime of a GFS2 mount, a number of uevents are generated.
+This document explains what the events are and what they are used
+for (by gfs_controld in gfs2-utils).
+
+A list of GFS2 uevents
+-----------------------
+
+1. ADD
+
+The ADD event occurs at mount time. It will always be the first
+uevent generated by the newly created filesystem. If the mount
+is successful, an ONLINE uevent will follow. If it is not successful
+then a REMOVE uevent will follow.
+
+The ADD uevent has two environment variables: SPECTATOR=[0|1]
+and RDONLY=[0|1] that specify the spectator status (a read-only mount
+with no journal assigned), and read-only (with journal assigned) status
+of the filesystem respectively.
+
+2. ONLINE
+
+The ONLINE uevent is generated after a successful mount or remount. It
+has the same environment variables as the ADD uevent. The ONLINE
+uevent, along with the two environment variables for spectator and
+RDONLY are a relatively recent addition (2.6.32-rc+) and will not
+be generated by older kernels.
+
+3. CHANGE
+
+The CHANGE uevent is used in two places. One is when reporting the
+successful mount of the filesystem by the first node (FIRSTMOUNT=Done).
+This is used as a signal by gfs_controld that it is then ok for other
+nodes in the cluster to mount the filesystem.
+
+The other CHANGE uevent is used to inform of the completion
+of journal recovery for one of the filesystems journals. It has
+two environment variables, JID= which specifies the journal id which
+has just been recovered, and RECOVERY=[Done|Failed] to indicate the
+success (or otherwise) of the operation. These uevents are generated
+for every journal recovered, whether it is during the initial mount
+process or as the result of gfs_controld requesting a specific journal
+recovery via the /sys/fs/gfs2/<fsname>/lock_module/recovery file.
+
+Because the CHANGE uevent was used (in early versions of gfs_controld)
+without checking the environment variables to discover the state, we
+cannot add any more functions to it without running the risk of
+someone using an older version of the user tools and breaking their
+cluster. For this reason the ONLINE uevent was used when adding a new
+uevent for a successful mount or remount.
+
+4. OFFLINE
+
+The OFFLINE uevent is only generated due to filesystem errors and is used
+as part of the "withdraw" mechanism. Currently this doesn't give any
+information about what the error is, which is something that needs to
+be fixed.
+
+5. REMOVE
+
+The REMOVE uevent is generated at the end of an unsuccessful mount
+or at the end of a umount of the filesystem. All REMOVE uevents will
+have been preceded by at least an ADD uevent for the same filesystem,
+and unlike the other uevents is generated automatically by the kernel's
+kobject subsystem.
+
+
+Information common to all GFS2 uevents (uevent environment variables)
+----------------------------------------------------------------------
+
+1. LOCKTABLE=
+
+The LOCKTABLE is a string, as supplied on the mount command
+line (locktable=) or via fstab. It is used as a filesystem label
+as well as providing the information for a lock_dlm mount to be
+able to join the cluster.
+
+2. LOCKPROTO=
+
+The LOCKPROTO is a string, and its value depends on what is set
+on the mount command line, or via fstab. It will be either
+lock_nolock or lock_dlm. In the future other lock managers
+may be supported.
+
+3. JOURNALID=
+
+If a journal is in use by the filesystem (journals are not
+assigned for spectator mounts) then this will give the
+numeric journal id in all GFS2 uevents.
+
+4. UUID=
+
+With recent versions of gfs2-utils, mkfs.gfs2 writes a UUID
+into the filesystem superblock. If it exists, this will
+be included in every uevent relating to the filesystem.
+
+
+
diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt
new file mode 100644
index 000000000..cc4f23066
--- /dev/null
+++ b/Documentation/filesystems/gfs2.txt
@@ -0,0 +1,45 @@
+Global File System
+------------------
+
+https://fedorahosted.org/cluster/wiki/HomePage
+
+GFS is a cluster file system. It allows a cluster of computers to
+simultaneously use a block device that is shared between them (with FC,
+iSCSI, NBD, etc). GFS reads and writes to the block device like a local
+file system, but also uses a lock module to allow the computers coordinate
+their I/O so file system consistency is maintained. One of the nifty
+features of GFS is perfect consistency -- changes made to the file system
+on one machine show up immediately on all other machines in the cluster.
+
+GFS uses interchangeable inter-node locking mechanisms, the currently
+supported mechanisms are:
+
+ lock_nolock -- allows gfs to be used as a local file system
+
+ lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking
+ The dlm is found at linux/fs/dlm/
+
+Lock_dlm depends on user space cluster management systems found
+at the URL above.
+
+To use gfs as a local file system, no external clustering systems are
+needed, simply:
+
+ $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device
+ $ mount -t gfs2 /dev/block_device /dir
+
+If you are using Fedora, you need to install the gfs2-utils package
+and, for lock_dlm, you will also need to install the cman package
+and write a cluster.conf as per the documentation. For F17 and above
+cman has been replaced by the dlm package.
+
+GFS2 is not on-disk compatible with previous versions of GFS, but it
+is pretty close.
+
+The following man pages can be found at the URL above:
+ fsck.gfs2 to repair a filesystem
+ gfs2_grow to expand a filesystem online
+ gfs2_jadd to add journals to a filesystem online
+ tunegfs2 to manipulate, examine and tune a filesystem
+ gfs2_convert to convert a gfs filesystem to gfs2 in-place
+ mkfs.gfs2 to make a filesystem
diff --git a/Documentation/filesystems/hfs.txt b/Documentation/filesystems/hfs.txt
new file mode 100644
index 000000000..d096df6db
--- /dev/null
+++ b/Documentation/filesystems/hfs.txt
@@ -0,0 +1,82 @@
+Note: This filesystem doesn't have a maintainer.
+
+Macintosh HFS Filesystem for Linux
+==================================
+
+HFS stands for ``Hierarchical File System'' and is the filesystem used
+by the Mac Plus and all later Macintosh models. Earlier Macintosh
+models used MFS (``Macintosh File System''), which is not supported,
+MacOS 8.1 and newer support a filesystem called HFS+ that's similar to
+HFS but is extended in various areas. Use the hfsplus filesystem driver
+to access such filesystems from Linux.
+
+
+Mount options
+=============
+
+When mounting an HFS filesystem, the following options are accepted:
+
+ creator=cccc, type=cccc
+ Specifies the creator/type values as shown by the MacOS finder
+ used for creating new files. Default values: '????'.
+
+ uid=n, gid=n
+ Specifies the user/group that owns all files on the filesystems.
+ Default: user/group id of the mounting process.
+
+ dir_umask=n, file_umask=n, umask=n
+ Specifies the umask used for all files , all directories or all
+ files and directories. Defaults to the umask of the mounting process.
+
+ session=n
+ Select the CDROM session to mount as HFS filesystem. Defaults to
+ leaving that decision to the CDROM driver. This option will fail
+ with anything but a CDROM as underlying devices.
+
+ part=n
+ Select partition number n from the devices. Does only makes
+ sense for CDROMS because they can't be partitioned under Linux.
+ For disk devices the generic partition parsing code does this
+ for us. Defaults to not parsing the partition table at all.
+
+ quiet
+ Ignore invalid mount options instead of complaining.
+
+
+Writing to HFS Filesystems
+==========================
+
+HFS is not a UNIX filesystem, thus it does not have the usual features you'd
+expect:
+
+ o You can't modify the set-uid, set-gid, sticky or executable bits or the uid
+ and gid of files.
+ o You can't create hard- or symlinks, device files, sockets or FIFOs.
+
+HFS does on the other have the concepts of multiple forks per file. These
+non-standard forks are represented as hidden additional files in the normal
+filesystems namespace which is kind of a cludge and makes the semantics for
+the a little strange:
+
+ o You can't create, delete or rename resource forks of files or the
+ Finder's metadata.
+ o They are however created (with default values), deleted and renamed
+ along with the corresponding data fork or directory.
+ o Copying files to a different filesystem will loose those attributes
+ that are essential for MacOS to work.
+
+
+Creating HFS filesystems
+===================================
+
+The hfsutils package from Robert Leslie contains a program called
+hformat that can be used to create HFS filesystem. See
+<http://www.mars.org/home/rob/proj/hfs/> for details.
+
+
+Credits
+=======
+
+The HFS drivers was written by Paul H. Hargrovea (hargrove@sccm.Stanford.EDU).
+Roman Zippel (roman@ardistech.com) rewrote large parts of the code and brought
+in btree routines derived from Brad Boyer's hfsplus driver.
diff --git a/Documentation/filesystems/hfsplus.txt b/Documentation/filesystems/hfsplus.txt
new file mode 100644
index 000000000..59f7569fc
--- /dev/null
+++ b/Documentation/filesystems/hfsplus.txt
@@ -0,0 +1,59 @@
+
+Macintosh HFSPlus Filesystem for Linux
+======================================
+
+HFSPlus is a filesystem first introduced in MacOS 8.1.
+HFSPlus has several extensions to HFS, including 32-bit allocation
+blocks, 255-character unicode filenames, and file sizes of 2^63 bytes.
+
+
+Mount options
+=============
+
+When mounting an HFSPlus filesystem, the following options are accepted:
+
+ creator=cccc, type=cccc
+ Specifies the creator/type values as shown by the MacOS finder
+ used for creating new files. Default values: '????'.
+
+ uid=n, gid=n
+ Specifies the user/group that owns all files on the filesystem
+ that have uninitialized permissions structures.
+ Default: user/group id of the mounting process.
+
+ umask=n
+ Specifies the umask (in octal) used for files and directories
+ that have uninitialized permissions structures.
+ Default: umask of the mounting process.
+
+ session=n
+ Select the CDROM session to mount as HFSPlus filesystem. Defaults to
+ leaving that decision to the CDROM driver. This option will fail
+ with anything but a CDROM as underlying devices.
+
+ part=n
+ Select partition number n from the devices. This option only makes
+ sense for CDROMs because they can't be partitioned under Linux.
+ For disk devices the generic partition parsing code does this
+ for us. Defaults to not parsing the partition table at all.
+
+ decompose
+ Decompose file name characters.
+
+ nodecompose
+ Do not decompose file name characters.
+
+ force
+ Used to force write access to volumes that are marked as journalled
+ or locked. Use at your own risk.
+
+ nls=cccc
+ Encoding to use when presenting file names.
+
+
+References
+==========
+
+kernel source: <file:fs/hfsplus>
+
+Apple Technote 1150 https://developer.apple.com/legacy/library/technotes/tn/tn1150.html
diff --git a/Documentation/filesystems/hpfs.txt b/Documentation/filesystems/hpfs.txt
new file mode 100644
index 000000000..74630bd50
--- /dev/null
+++ b/Documentation/filesystems/hpfs.txt
@@ -0,0 +1,296 @@
+Read/Write HPFS 2.09
+1998-2004, Mikulas Patocka
+
+email: mikulas@artax.karlin.mff.cuni.cz
+homepage: http://artax.karlin.mff.cuni.cz/~mikulas/vyplody/hpfs/index-e.cgi
+
+CREDITS:
+Chris Smith, 1993, original read-only HPFS, some code and hpfs structures file
+ is taken from it
+Jacques Gelinas, MSDos mmap, Inspired by fs/nfs/mmap.c (Jon Tombs 15 Aug 1993)
+Werner Almesberger, 1992, 1993, MSDos option parser & CR/LF conversion
+
+Mount options
+
+uid=xxx,gid=xxx,umask=xxx (default uid=gid=0 umask=default_system_umask)
+ Set owner/group/mode for files that do not have it specified in extended
+ attributes. Mode is inverted umask - for example umask 027 gives owner
+ all permission, group read permission and anybody else no access. Note
+ that for files mode is anded with 0666. If you want files to have 'x'
+ rights, you must use extended attributes.
+case=lower,asis (default asis)
+ File name lowercasing in readdir.
+conv=binary,text,auto (default binary)
+ CR/LF -> LF conversion, if auto, decision is made according to extension
+ - there is a list of text extensions (I thing it's better to not convert
+ text file than to damage binary file). If you want to change that list,
+ change it in the source. Original readonly HPFS contained some strange
+ heuristic algorithm that I removed. I thing it's danger to let the
+ computer decide whether file is text or binary. For example, DJGPP
+ binaries contain small text message at the beginning and they could be
+ misidentified and damaged under some circumstances.
+check=none,normal,strict (default normal)
+ Check level. Selecting none will cause only little speedup and big
+ danger. I tried to write it so that it won't crash if check=normal on
+ corrupted filesystems. check=strict means many superfluous checks -
+ used for debugging (for example it checks if file is allocated in
+ bitmaps when accessing it).
+errors=continue,remount-ro,panic (default remount-ro)
+ Behaviour when filesystem errors found.
+chkdsk=no,errors,always (default errors)
+ When to mark filesystem dirty so that OS/2 checks it.
+eas=no,ro,rw (default rw)
+ What to do with extended attributes. 'no' - ignore them and use always
+ values specified in uid/gid/mode options. 'ro' - read extended
+ attributes but do not create them. 'rw' - create extended attributes
+ when you use chmod/chown/chgrp/mknod/ln -s on the filesystem.
+timeshift=(-)nnn (default 0)
+ Shifts the time by nnn seconds. For example, if you see under linux
+ one hour more, than under os/2, use timeshift=-3600.
+
+
+File names
+
+As in OS/2, filenames are case insensitive. However, shell thinks that names
+are case sensitive, so for example when you create a file FOO, you can use
+'cat FOO', 'cat Foo', 'cat foo' or 'cat F*' but not 'cat f*'. Note, that you
+also won't be able to compile linux kernel (and maybe other things) on HPFS
+because kernel creates different files with names like bootsect.S and
+bootsect.s. When searching for file thats name has characters >= 128, codepages
+are used - see below.
+OS/2 ignores dots and spaces at the end of file name, so this driver does as
+well. If you create 'a. ...', the file 'a' will be created, but you can still
+access it under names 'a.', 'a..', 'a . . . ' etc.
+
+
+Extended attributes
+
+On HPFS partitions, OS/2 can associate to each file a special information called
+extended attributes. Extended attributes are pairs of (key,value) where key is
+an ascii string identifying that attribute and value is any string of bytes of
+variable length. OS/2 stores window and icon positions and file types there. So
+why not use it for unix-specific info like file owner or access rights? This
+driver can do it. If you chown/chgrp/chmod on a hpfs partition, extended
+attributes with keys "UID", "GID" or "MODE" and 2-byte values are created. Only
+that extended attributes those value differs from defaults specified in mount
+options are created. Once created, the extended attributes are never deleted,
+they're just changed. It means that when your default uid=0 and you type
+something like 'chown luser file; chown root file' the file will contain
+extended attribute UID=0. And when you umount the fs and mount it again with
+uid=luser_uid, the file will be still owned by root! If you chmod file to 444,
+extended attribute "MODE" will not be set, this special case is done by setting
+read-only flag. When you mknod a block or char device, besides "MODE", the
+special 4-byte extended attribute "DEV" will be created containing the device
+number. Currently this driver cannot resize extended attributes - it means
+that if somebody (I don't know who?) has set "UID", "GID", "MODE" or "DEV"
+attributes with different sizes, they won't be rewritten and changing these
+values doesn't work.
+
+
+Symlinks
+
+You can do symlinks on HPFS partition, symlinks are achieved by setting extended
+attribute named "SYMLINK" with symlink value. Like on ext2, you can chown and
+chgrp symlinks but I don't know what is it good for. chmoding symlink results
+in chmoding file where symlink points. These symlinks are just for Linux use and
+incompatible with OS/2. OS/2 PmShell symlinks are not supported because they are
+stored in very crazy way. They tried to do it so that link changes when file is
+moved ... sometimes it works. But the link is partly stored in directory
+extended attributes and partly in OS2SYS.INI. I don't want (and don't know how)
+to analyze or change OS2SYS.INI.
+
+
+Codepages
+
+HPFS can contain several uppercasing tables for several codepages and each
+file has a pointer to codepage its name is in. However OS/2 was created in
+America where people don't care much about codepages and so multiple codepages
+support is quite buggy. I have Czech OS/2 working in codepage 852 on my disk.
+Once I booted English OS/2 working in cp 850 and I created a file on my 852
+partition. It marked file name codepage as 850 - good. But when I again booted
+Czech OS/2, the file was completely inaccessible under any name. It seems that
+OS/2 uppercases the search pattern with its system code page (852) and file
+name it's comparing to with its code page (850). These could never match. Is it
+really what IBM developers wanted? But problems continued. When I created in
+Czech OS/2 another file in that directory, that file was inaccessible too. OS/2
+probably uses different uppercasing method when searching where to place a file
+(note, that files in HPFS directory must be sorted) and when searching for
+a file. Finally when I opened this directory in PmShell, PmShell crashed (the
+funny thing was that, when rebooted, PmShell tried to reopen this directory
+again :-). chkdsk happily ignores these errors and only low-level disk
+modification saved me. Never mix different language versions of OS/2 on one
+system although HPFS was designed to allow that.
+OK, I could implement complex codepage support to this driver but I think it
+would cause more problems than benefit with such buggy implementation in OS/2.
+So this driver simply uses first codepage it finds for uppercasing and
+lowercasing no matter what's file codepage index. Usually all file names are in
+this codepage - if you don't try to do what I described above :-)
+
+
+Known bugs
+
+HPFS386 on OS/2 server is not supported. HPFS386 installed on normal OS/2 client
+should work. If you have OS/2 server, use only read-only mode. I don't know how
+to handle some HPFS386 structures like access control list or extended perm
+list, I don't know how to delete them when file is deleted and how to not
+overwrite them with extended attributes. Send me some info on these structures
+and I'll make it. However, this driver should detect presence of HPFS386
+structures, remount read-only and not destroy them (I hope).
+
+When there's not enough space for extended attributes, they will be truncated
+and no error is returned.
+
+OS/2 can't access files if the path is longer than about 256 chars but this
+driver allows you to do it. chkdsk ignores such errors.
+
+Sometimes you won't be able to delete some files on a very full filesystem
+(returning error ENOSPC). That's because file in non-leaf node in directory tree
+(one directory, if it's large, has dirents in tree on HPFS) must be replaced
+with another node when deleted. And that new file might have larger name than
+the old one so the new name doesn't fit in directory node (dnode). And that
+would result in directory tree splitting, that takes disk space. Workaround is
+to delete other files that are leaf (probability that the file is non-leaf is
+about 1/50) or to truncate file first to make some space.
+You encounter this problem only if you have many directories so that
+preallocated directory band is full i.e.
+ number_of_directories / size_of_filesystem_in_mb > 4.
+
+You can't delete open directories.
+
+You can't rename over directories (what is it good for?).
+
+Renaming files so that only case changes doesn't work. This driver supports it
+but vfs doesn't. Something like 'mv file FILE' won't work.
+
+All atimes and directory mtimes are not updated. That's because of performance
+reasons. If you extremely wish to update them, let me know, I'll write it (but
+it will be slow).
+
+When the system is out of memory and swap, it may slightly corrupt filesystem
+(lost files, unbalanced directories). (I guess all filesystem may do it).
+
+When compiled, you get warning: function declaration isn't a prototype. Does
+anybody know what does it mean?
+
+
+What does "unbalanced tree" message mean?
+
+Old versions of this driver created sometimes unbalanced dnode trees. OS/2
+chkdsk doesn't scream if the tree is unbalanced (and sometimes creates
+unbalanced trees too :-) but both HPFS and HPFS386 contain bug that it rarely
+crashes when the tree is not balanced. This driver handles unbalanced trees
+correctly and writes warning if it finds them. If you see this message, this is
+probably because of directories created with old version of this driver.
+Workaround is to move all files from that directory to another and then back
+again. Do it in Linux, not OS/2! If you see this message in directory that is
+whole created by this driver, it is BUG - let me know about it.
+
+
+Bugs in OS/2
+
+When you have two (or more) lost directories pointing each to other, chkdsk
+locks up when repairing filesystem.
+
+Sometimes (I think it's random) when you create a file with one-char name under
+OS/2, OS/2 marks it as 'long'. chkdsk then removes this flag saying "Minor fs
+error corrected".
+
+File names like "a .b" are marked as 'long' by OS/2 but chkdsk "corrects" it and
+marks them as short (and writes "minor fs error corrected"). This bug is not in
+HPFS386.
+
+Codepage bugs described above.
+
+If you don't install fixpacks, there are many, many more...
+
+
+History
+
+0.90 First public release
+0.91 Fixed bug that caused shooting to memory when write_inode was called on
+ open inode (rarely happened)
+0.92 Fixed a little memory leak in freeing directory inodes
+0.93 Fixed bug that locked up the machine when there were too many filenames
+ with first 15 characters same
+ Fixed write_file to zero file when writing behind file end
+0.94 Fixed a little memory leak when trying to delete busy file or directory
+0.95 Fixed a bug that i_hpfs_parent_dir was not updated when moving files
+1.90 First version for 2.1.1xx kernels
+1.91 Fixed a bug that chk_sectors failed when sectors were at the end of disk
+ Fixed a race-condition when write_inode is called while deleting file
+ Fixed a bug that could possibly happen (with very low probability) when
+ using 0xff in filenames
+ Rewritten locking to avoid race-conditions
+ Mount option 'eas' now works
+ Fsync no longer returns error
+ Files beginning with '.' are marked hidden
+ Remount support added
+ Alloc is not so slow when filesystem becomes full
+ Atimes are no more updated because it slows down operation
+ Code cleanup (removed all commented debug prints)
+1.92 Corrected a bug when sync was called just before closing file
+1.93 Modified, so that it works with kernels >= 2.1.131, I don't know if it
+ works with previous versions
+ Fixed a possible problem with disks > 64G (but I don't have one, so I can't
+ test it)
+ Fixed a file overflow at 2G
+ Added new option 'timeshift'
+ Changed behaviour on HPFS386: It is now possible to operate on HPFS386 in
+ read-only mode
+ Fixed a bug that slowed down alloc and prevented allocating 100% space
+ (this bug was not destructive)
+1.94 Added workaround for one bug in Linux
+ Fixed one buffer leak
+ Fixed some incompatibilities with large extended attributes (but it's still
+ not 100% ok, I have no info on it and OS/2 doesn't want to create them)
+ Rewritten allocation
+ Fixed a bug with i_blocks (du sometimes didn't display correct values)
+ Directories have no longer archive attribute set (some programs don't like
+ it)
+ Fixed a bug that it set badly one flag in large anode tree (it was not
+ destructive)
+1.95 Fixed one buffer leak, that could happen on corrupted filesystem
+ Fixed one bug in allocation in 1.94
+1.96 Added workaround for one bug in OS/2 (HPFS locked up, HPFS386 reported
+ error sometimes when opening directories in PMSHELL)
+ Fixed a possible bitmap race
+ Fixed possible problem on large disks
+ You can now delete open files
+ Fixed a nondestructive race in rename
+1.97 Support for HPFS v3 (on large partitions)
+ Fixed a bug that it didn't allow creation of files > 128M (it should be 2G)
+1.97.1 Changed names of global symbols
+ Fixed a bug when chmoding or chowning root directory
+1.98 Fixed a deadlock when using old_readdir
+ Better directory handling; workaround for "unbalanced tree" bug in OS/2
+1.99 Corrected a possible problem when there's not enough space while deleting
+ file
+ Now it tries to truncate the file if there's not enough space when deleting
+ Removed a lot of redundant code
+2.00 Fixed a bug in rename (it was there since 1.96)
+ Better anti-fragmentation strategy
+2.01 Fixed problem with directory listing over NFS
+ Directory lseek now checks for proper parameters
+ Fixed race-condition in buffer code - it is in all filesystems in Linux;
+ when reading device (cat /dev/hda) while creating files on it, files
+ could be damaged
+2.02 Workaround for bug in breada in Linux. breada could cause accesses beyond
+ end of partition
+2.03 Char, block devices and pipes are correctly created
+ Fixed non-crashing race in unlink (Alexander Viro)
+ Now it works with Japanese version of OS/2
+2.04 Fixed error when ftruncate used to extend file
+2.05 Fixed crash when got mount parameters without =
+ Fixed crash when allocation of anode failed due to full disk
+ Fixed some crashes when block io or inode allocation failed
+2.06 Fixed some crash on corrupted disk structures
+ Better allocation strategy
+ Reschedule points added so that it doesn't lock CPU long time
+ It should work in read-only mode on Warp Server
+2.07 More fixes for Warp Server. Now it really works
+2.08 Creating new files is not so slow on large disks
+ An attempt to sync deleted file does not generate filesystem error
+2.09 Fixed error on extremely fragmented files
+
+
+ vim: set textwidth=80:
diff --git a/Documentation/filesystems/inotify.txt b/Documentation/filesystems/inotify.txt
new file mode 100644
index 000000000..51f61db78
--- /dev/null
+++ b/Documentation/filesystems/inotify.txt
@@ -0,0 +1,79 @@
+ inotify
+ a powerful yet simple file change notification system
+
+
+
+Document started 15 Mar 2005 by Robert Love <rml@novell.com>
+Document updated 4 Jan 2015 by Zhang Zhen <zhenzhang.zhang@huawei.com>
+ --Deleted obsoleted interface, just refer to manpages for user interface.
+
+(i) Rationale
+
+Q: What is the design decision behind not tying the watch to the open fd of
+ the watched object?
+
+A: Watches are associated with an open inotify device, not an open file.
+ This solves the primary problem with dnotify: keeping the file open pins
+ the file and thus, worse, pins the mount. Dnotify is therefore infeasible
+ for use on a desktop system with removable media as the media cannot be
+ unmounted. Watching a file should not require that it be open.
+
+Q: What is the design decision behind using an-fd-per-instance as opposed to
+ an fd-per-watch?
+
+A: An fd-per-watch quickly consumes more file descriptors than are allowed,
+ more fd's than are feasible to manage, and more fd's than are optimally
+ select()-able. Yes, root can bump the per-process fd limit and yes, users
+ can use epoll, but requiring both is a silly and extraneous requirement.
+ A watch consumes less memory than an open file, separating the number
+ spaces is thus sensible. The current design is what user-space developers
+ want: Users initialize inotify, once, and add n watches, requiring but one
+ fd and no twiddling with fd limits. Initializing an inotify instance two
+ thousand times is silly. If we can implement user-space's preferences
+ cleanly--and we can, the idr layer makes stuff like this trivial--then we
+ should.
+
+ There are other good arguments. With a single fd, there is a single
+ item to block on, which is mapped to a single queue of events. The single
+ fd returns all watch events and also any potential out-of-band data. If
+ every fd was a separate watch,
+
+ - There would be no way to get event ordering. Events on file foo and
+ file bar would pop poll() on both fd's, but there would be no way to tell
+ which happened first. A single queue trivially gives you ordering. Such
+ ordering is crucial to existing applications such as Beagle. Imagine
+ "mv a b ; mv b a" events without ordering.
+
+ - We'd have to maintain n fd's and n internal queues with state,
+ versus just one. It is a lot messier in the kernel. A single, linear
+ queue is the data structure that makes sense.
+
+ - User-space developers prefer the current API. The Beagle guys, for
+ example, love it. Trust me, I asked. It is not a surprise: Who'd want
+ to manage and block on 1000 fd's via select?
+
+ - No way to get out of band data.
+
+ - 1024 is still too low. ;-)
+
+ When you talk about designing a file change notification system that
+ scales to 1000s of directories, juggling 1000s of fd's just does not seem
+ the right interface. It is too heavy.
+
+ Additionally, it _is_ possible to more than one instance and
+ juggle more than one queue and thus more than one associated fd. There
+ need not be a one-fd-per-process mapping; it is one-fd-per-queue and a
+ process can easily want more than one queue.
+
+Q: Why the system call approach?
+
+A: The poor user-space interface is the second biggest problem with dnotify.
+ Signals are a terrible, terrible interface for file notification. Or for
+ anything, for that matter. The ideal solution, from all perspectives, is a
+ file descriptor-based one that allows basic file I/O and poll/select.
+ Obtaining the fd and managing the watches could have been done either via a
+ device file or a family of new system calls. We decided to implement a
+ family of system calls because that is the preferred approach for new kernel
+ interfaces. The only real difference was whether we wanted to use open(2)
+ and ioctl(2) or a couple of new system calls. System calls beat ioctls.
+
diff --git a/Documentation/filesystems/isofs.txt b/Documentation/filesystems/isofs.txt
new file mode 100644
index 000000000..ba0a93384
--- /dev/null
+++ b/Documentation/filesystems/isofs.txt
@@ -0,0 +1,48 @@
+Mount options that are the same as for msdos and vfat partitions.
+
+ gid=nnn All files in the partition will be in group nnn.
+ uid=nnn All files in the partition will be owned by user id nnn.
+ umask=nnn The permission mask (see umask(1)) for the partition.
+
+Mount options that are the same as vfat partitions. These are only useful
+when using discs encoded using Microsoft's Joliet extensions.
+ iocharset=name Character set to use for converting from Unicode to
+ ASCII. Joliet filenames are stored in Unicode format, but
+ Unix for the most part doesn't know how to deal with Unicode.
+ There is also an option of doing UTF-8 translations with the
+ utf8 option.
+ utf8 Encode Unicode names in UTF-8 format. Default is no.
+
+Mount options unique to the isofs filesystem.
+ block=512 Set the block size for the disk to 512 bytes
+ block=1024 Set the block size for the disk to 1024 bytes
+ block=2048 Set the block size for the disk to 2048 bytes
+ check=relaxed Matches filenames with different cases
+ check=strict Matches only filenames with the exact same case
+ cruft Try to handle badly formatted CDs.
+ map=off Do not map non-Rock Ridge filenames to lower case
+ map=normal Map non-Rock Ridge filenames to lower case
+ map=acorn As map=normal but also apply Acorn extensions if present
+ mode=xxx Sets the permissions on files to xxx unless Rock Ridge
+ extensions set the permissions otherwise
+ dmode=xxx Sets the permissions on directories to xxx unless Rock Ridge
+ extensions set the permissions otherwise
+ overriderockperm Set permissions on files and directories according to
+ 'mode' and 'dmode' even though Rock Ridge extensions are
+ present.
+ nojoliet Ignore Joliet extensions if they are present.
+ norock Ignore Rock Ridge extensions if they are present.
+ hide Completely strip hidden files from the file system.
+ showassoc Show files marked with the 'associated' bit
+ unhide Deprecated; showing hidden files is now default;
+ If given, it is a synonym for 'showassoc' which will
+ recreate previous unhide behavior
+ session=x Select number of session on multisession CD
+ sbsector=xxx Session begins from sector xxx
+
+Recommended documents about ISO 9660 standard are located at:
+http://www.y-adagio.com/
+ftp://ftp.ecma.ch/ecma-st/Ecma-119.pdf
+Quoting from the PDF "This 2nd Edition of Standard ECMA-119 is technically
+identical with ISO 9660.", so it is a valid and gratis substitute of the
+official ISO specification.
diff --git a/Documentation/filesystems/jfs.txt b/Documentation/filesystems/jfs.txt
new file mode 100644
index 000000000..41fd75799
--- /dev/null
+++ b/Documentation/filesystems/jfs.txt
@@ -0,0 +1,52 @@
+IBM's Journaled File System (JFS) for Linux
+
+JFS Homepage: http://jfs.sourceforge.net/
+
+The following mount options are supported:
+(*) == default
+
+iocharset=name Character set to use for converting from Unicode to
+ ASCII. The default is to do no conversion. Use
+ iocharset=utf8 for UTF-8 translations. This requires
+ CONFIG_NLS_UTF8 to be set in the kernel .config file.
+ iocharset=none specifies the default behavior explicitly.
+
+resize=value Resize the volume to <value> blocks. JFS only supports
+ growing a volume, not shrinking it. This option is only
+ valid during a remount, when the volume is mounted
+ read-write. The resize keyword with no value will grow
+ the volume to the full size of the partition.
+
+nointegrity Do not write to the journal. The primary use of this option
+ is to allow for higher performance when restoring a volume
+ from backup media. The integrity of the volume is not
+ guaranteed if the system abnormally abends.
+
+integrity(*) Commit metadata changes to the journal. Use this option to
+ remount a volume where the nointegrity option was
+ previously specified in order to restore normal behavior.
+
+errors=continue Keep going on a filesystem error.
+errors=remount-ro(*) Remount the filesystem read-only on an error.
+errors=panic Panic and halt the machine if an error occurs.
+
+uid=value Override on-disk uid with specified value
+gid=value Override on-disk gid with specified value
+umask=value Override on-disk umask with specified octal value. For
+ directories, the execute bit will be set if the corresponding
+ read bit is set.
+
+discard=minlen This enables/disables the use of discard/TRIM commands.
+discard The discard/TRIM commands are sent to the underlying
+nodiscard(*) block device when blocks are freed. This is useful for SSD
+ devices and sparse/thinly-provisioned LUNs. The FITRIM ioctl
+ command is also available together with the nodiscard option.
+ The value of minlen specifies the minimum blockcount, when
+ a TRIM command to the block device is considered useful.
+ When no value is given to the discard option, it defaults to
+ 64 blocks, which means 256KiB in JFS.
+ The minlen value of discard overrides the minlen value given
+ on an FITRIM ioctl().
+
+The JFS mailing list can be subscribed to by using the link labeled
+"Mail list Subscribe" at our web page http://jfs.sourceforge.net/
diff --git a/Documentation/filesystems/locks.txt b/Documentation/filesystems/locks.txt
new file mode 100644
index 000000000..2cf810825
--- /dev/null
+++ b/Documentation/filesystems/locks.txt
@@ -0,0 +1,68 @@
+ File Locking Release Notes
+
+ Andy Walker <andy@lysaker.kvaerner.no>
+
+ 12 May 1997
+
+
+1. What's New?
+--------------
+
+1.1 Broken Flock Emulation
+--------------------------
+
+The old flock(2) emulation in the kernel was swapped for proper BSD
+compatible flock(2) support in the 1.3.x series of kernels. With the
+release of the 2.1.x kernel series, support for the old emulation has
+been totally removed, so that we don't need to carry this baggage
+forever.
+
+This should not cause problems for anybody, since everybody using a
+2.1.x kernel should have updated their C library to a suitable version
+anyway (see the file "Documentation/Changes".)
+
+1.2 Allow Mixed Locks Again
+---------------------------
+
+1.2.1 Typical Problems - Sendmail
+---------------------------------
+Because sendmail was unable to use the old flock() emulation, many sendmail
+installations use fcntl() instead of flock(). This is true of Slackware 3.0
+for example. This gave rise to some other subtle problems if sendmail was
+configured to rebuild the alias file. Sendmail tried to lock the aliases.dir
+file with fcntl() at the same time as the GDBM routines tried to lock this
+file with flock(). With pre 1.3.96 kernels this could result in deadlocks that,
+over time, or under a very heavy mail load, would eventually cause the kernel
+to lock solid with deadlocked processes.
+
+
+1.2.2 The Solution
+------------------
+The solution I have chosen, after much experimentation and discussion,
+is to make flock() and fcntl() locks oblivious to each other. Both can
+exists, and neither will have any effect on the other.
+
+I wanted the two lock styles to be cooperative, but there were so many
+race and deadlock conditions that the current solution was the only
+practical one. It puts us in the same position as, for example, SunOS
+4.1.x and several other commercial Unices. The only OS's that support
+cooperative flock()/fcntl() are those that emulate flock() using
+fcntl(), with all the problems that implies.
+
+
+1.3 Mandatory Locking As A Mount Option
+---------------------------------------
+
+Mandatory locking, as described in
+'Documentation/filesystems/mandatory-locking.txt' was prior to this release a
+general configuration option that was valid for all mounted filesystems. This
+had a number of inherent dangers, not the least of which was the ability to
+freeze an NFS server by asking it to read a file for which a mandatory lock
+existed.
+
+From this release of the kernel, mandatory locking can be turned on and off
+on a per-filesystem basis, using the mount options 'mand' and 'nomand'.
+The default is to disallow mandatory locking. The intention is that
+mandatory locking only be enabled on a local filesystem as the specific need
+arises.
+
diff --git a/Documentation/filesystems/logfs.txt b/Documentation/filesystems/logfs.txt
new file mode 100644
index 000000000..bca42c22a
--- /dev/null
+++ b/Documentation/filesystems/logfs.txt
@@ -0,0 +1,241 @@
+
+The LogFS Flash Filesystem
+==========================
+
+Specification
+=============
+
+Superblocks
+-----------
+
+Two superblocks exist at the beginning and end of the filesystem.
+Each superblock is 256 Bytes large, with another 3840 Bytes reserved
+for future purposes, making a total of 4096 Bytes.
+
+Superblock locations may differ for MTD and block devices. On MTD the
+first non-bad block contains a superblock in the first 4096 Bytes and
+the last non-bad block contains a superblock in the last 4096 Bytes.
+On block devices, the first 4096 Bytes of the device contain the first
+superblock and the last aligned 4096 Byte-block contains the second
+superblock.
+
+For the most part, the superblocks can be considered read-only. They
+are written only to correct errors detected within the superblocks,
+move the journal and change the filesystem parameters through tunefs.
+As a result, the superblock does not contain any fields that require
+constant updates, like the amount of free space, etc.
+
+Segments
+--------
+
+The space in the device is split up into equal-sized segments.
+Segments are the primary write unit of LogFS. Within each segments,
+writes happen from front (low addresses) to back (high addresses. If
+only a partial segment has been written, the segment number, the
+current position within and optionally a write buffer are stored in
+the journal.
+
+Segments are erased as a whole. Therefore Garbage Collection may be
+required to completely free a segment before doing so.
+
+Journal
+--------
+
+The journal contains all global information about the filesystem that
+is subject to frequent change. At mount time, it has to be scanned
+for the most recent commit entry, which contains a list of pointers to
+all currently valid entries.
+
+Object Store
+------------
+
+All space except for the superblocks and journal is part of the object
+store. Each segment contains a segment header and a number of
+objects, each consisting of the object header and the payload.
+Objects are either inodes, directory entries (dentries), file data
+blocks or indirect blocks.
+
+Levels
+------
+
+Garbage collection (GC) may fail if all data is written
+indiscriminately. One requirement of GC is that data is separated
+roughly according to the distance between the tree root and the data.
+Effectively that means all file data is on level 0, indirect blocks
+are on levels 1, 2, 3 4 or 5 for 1x, 2x, 3x, 4x or 5x indirect blocks,
+respectively. Inode file data is on level 6 for the inodes and 7-11
+for indirect blocks.
+
+Each segment contains objects of a single level only. As a result,
+each level requires its own separate segment to be open for writing.
+
+Inode File
+----------
+
+All inodes are stored in a special file, the inode file. Single
+exception is the inode file's inode (master inode) which for obvious
+reasons is stored in the journal instead. Instead of data blocks, the
+leaf nodes of the inode files are inodes.
+
+Aliases
+-------
+
+Writes in LogFS are done by means of a wandering tree. A naïve
+implementation would require that for each write or a block, all
+parent blocks are written as well, since the block pointers have
+changed. Such an implementation would not be very efficient.
+
+In LogFS, the block pointer changes are cached in the journal by means
+of alias entries. Each alias consists of its logical address - inode
+number, block index, level and child number (index into block) - and
+the changed data. Any 8-byte word can be changes in this manner.
+
+Currently aliases are used for block pointers, file size, file used
+bytes and the height of an inodes indirect tree.
+
+Segment Aliases
+---------------
+
+Related to regular aliases, these are used to handle bad blocks.
+Initially, bad blocks are handled by moving the affected segment
+content to a spare segment and noting this move in the journal with a
+segment alias, a simple (to, from) tupel. GC will later empty this
+segment and the alias can be removed again. This is used on MTD only.
+
+Vim
+---
+
+By cleverly predicting the life time of data, it is possible to
+separate long-living data from short-living data and thereby reduce
+the GC overhead later. Each type of distinc life expectency (vim) can
+have a separate segment open for writing. Each (level, vim) tupel can
+be open just once. If an open segment with unknown vim is encountered
+at mount time, it is closed and ignored henceforth.
+
+Indirect Tree
+-------------
+
+Inodes in LogFS are similar to FFS-style filesystems with direct and
+indirect block pointers. One difference is that LogFS uses a single
+indirect pointer that can be either a 1x, 2x, etc. indirect pointer.
+A height field in the inode defines the height of the indirect tree
+and thereby the indirection of the pointer.
+
+Another difference is the addressing of indirect blocks. In LogFS,
+the first 16 pointers in the first indirect block are left empty,
+corresponding to the 16 direct pointers in the inode. In ext2 (maybe
+others as well) the first pointer in the first indirect block
+corresponds to logical block 12, skipping the 12 direct pointers.
+So where ext2 is using arithmetic to better utilize space, LogFS keeps
+arithmetic simple and uses compression to save space.
+
+Compression
+-----------
+
+Both file data and metadata can be compressed. Compression for file
+data can be enabled with chattr +c and disabled with chattr -c. Doing
+so has no effect on existing data, but new data will be stored
+accordingly. New inodes will inherit the compression flag of the
+parent directory.
+
+Metadata is always compressed. However, the space accounting ignores
+this and charges for the uncompressed size. Failing to do so could
+result in GC failures when, after moving some data, indirect blocks
+compress worse than previously. Even on a 100% full medium, GC may
+not consume any extra space, so the compression gains are lost space
+to the user.
+
+However, they are not lost space to the filesystem internals. By
+cheating the user for those bytes, the filesystem gained some slack
+space and GC will run less often and faster.
+
+Garbage Collection and Wear Leveling
+------------------------------------
+
+Garbage collection is invoked whenever the number of free segments
+falls below a threshold. The best (known) candidate is picked based
+on the least amount of valid data contained in the segment. All
+remaining valid data is copied elsewhere, thereby invalidating it.
+
+The GC code also checks for aliases and writes then back if their
+number gets too large.
+
+Wear leveling is done by occasionally picking a suboptimal segment for
+garbage collection. If a stale segments erase count is significantly
+lower than the active segments' erase counts, it will be picked. Wear
+leveling is rate limited, so it will never monopolize the device for
+more than one segment worth at a time.
+
+Values for "occasionally", "significantly lower" are compile time
+constants.
+
+Hashed directories
+------------------
+
+To satisfy efficient lookup(), directory entries are hashed and
+located based on the hash. In order to both support large directories
+and not be overly inefficient for small directories, several hash
+tables of increasing size are used. For each table, the hash value
+modulo the table size gives the table index.
+
+Tables sizes are chosen to limit the number of indirect blocks with a
+fully populated table to 0, 1, 2 or 3 respectively. So the first
+table contains 16 entries, the second 512-16, etc.
+
+The last table is special in several ways. First its size depends on
+the effective 32bit limit on telldir/seekdir cookies. Since logfs
+uses the upper half of the address space for indirect blocks, the size
+is limited to 2^31. Secondly the table contains hash buckets with 16
+entries each.
+
+Using single-entry buckets would result in birthday "attacks". At
+just 2^16 used entries, hash collisions would be likely (P >= 0.5).
+My math skills are insufficient to do the combinatorics for the 17x
+collisions necessary to overflow a bucket, but testing showed that in
+10,000 runs the lowest directory fill before a bucket overflow was
+188,057,130 entries with an average of 315,149,915 entries. So for
+directory sizes of up to a million, bucket overflows should be
+virtually impossible under normal circumstances.
+
+With carefully chosen filenames, it is obviously possible to cause an
+overflow with just 21 entries (4 higher tables + 16 entries + 1). So
+there may be a security concern if a malicious user has write access
+to a directory.
+
+Open For Discussion
+===================
+
+Device Address Space
+--------------------
+
+A device address space is used for caching. Both block devices and
+MTD provide functions to either read a single page or write a segment.
+Partial segments may be written for data integrity, but where possible
+complete segments are written for performance on simple block device
+flash media.
+
+Meta Inodes
+-----------
+
+Inodes are stored in the inode file, which is just a regular file for
+most purposes. At umount time, however, the inode file needs to
+remain open until all dirty inodes are written. So
+generic_shutdown_super() may not close this inode, but shouldn't
+complain about remaining inodes due to the inode file either. Same
+goes for mapping inode of the device address space.
+
+Currently logfs uses a hack that essentially copies part of fs/inode.c
+code over. A general solution would be preferred.
+
+Indirect block mapping
+----------------------
+
+With compression, the block device (or mapping inode) cannot be used
+to cache indirect blocks. Some other place is required. Currently
+logfs uses the top half of each inode's address space. The low 8TB
+(on 32bit) are filled with file data, the high 8TB are used for
+indirect blocks.
+
+One problem is that 16TB files created on 64bit systems actually have
+data in the top 8TB. But files >16TB would cause problems anyway, so
+only the limit has changed.
diff --git a/Documentation/filesystems/mandatory-locking.txt b/Documentation/filesystems/mandatory-locking.txt
new file mode 100644
index 000000000..0979d1d2c
--- /dev/null
+++ b/Documentation/filesystems/mandatory-locking.txt
@@ -0,0 +1,171 @@
+ Mandatory File Locking For The Linux Operating System
+
+ Andy Walker <andy@lysaker.kvaerner.no>
+
+ 15 April 1996
+ (Updated September 2007)
+
+0. Why you should avoid mandatory locking
+-----------------------------------------
+
+The Linux implementation is prey to a number of difficult-to-fix race
+conditions which in practice make it not dependable:
+
+ - The write system call checks for a mandatory lock only once
+ at its start. It is therefore possible for a lock request to
+ be granted after this check but before the data is modified.
+ A process may then see file data change even while a mandatory
+ lock was held.
+ - Similarly, an exclusive lock may be granted on a file after
+ the kernel has decided to proceed with a read, but before the
+ read has actually completed, and the reading process may see
+ the file data in a state which should not have been visible
+ to it.
+ - Similar races make the claimed mutual exclusion between lock
+ and mmap similarly unreliable.
+
+1. What is mandatory locking?
+------------------------------
+
+Mandatory locking is kernel enforced file locking, as opposed to the more usual
+cooperative file locking used to guarantee sequential access to files among
+processes. File locks are applied using the flock() and fcntl() system calls
+(and the lockf() library routine which is a wrapper around fcntl().) It is
+normally a process' responsibility to check for locks on a file it wishes to
+update, before applying its own lock, updating the file and unlocking it again.
+The most commonly used example of this (and in the case of sendmail, the most
+troublesome) is access to a user's mailbox. The mail user agent and the mail
+transfer agent must guard against updating the mailbox at the same time, and
+prevent reading the mailbox while it is being updated.
+
+In a perfect world all processes would use and honour a cooperative, or
+"advisory" locking scheme. However, the world isn't perfect, and there's
+a lot of poorly written code out there.
+
+In trying to address this problem, the designers of System V UNIX came up
+with a "mandatory" locking scheme, whereby the operating system kernel would
+block attempts by a process to write to a file that another process holds a
+"read" -or- "shared" lock on, and block attempts to both read and write to a
+file that a process holds a "write " -or- "exclusive" lock on.
+
+The System V mandatory locking scheme was intended to have as little impact as
+possible on existing user code. The scheme is based on marking individual files
+as candidates for mandatory locking, and using the existing fcntl()/lockf()
+interface for applying locks just as if they were normal, advisory locks.
+
+Note 1: In saying "file" in the paragraphs above I am actually not telling
+the whole truth. System V locking is based on fcntl(). The granularity of
+fcntl() is such that it allows the locking of byte ranges in files, in addition
+to entire files, so the mandatory locking rules also have byte level
+granularity.
+
+Note 2: POSIX.1 does not specify any scheme for mandatory locking, despite
+borrowing the fcntl() locking scheme from System V. The mandatory locking
+scheme is defined by the System V Interface Definition (SVID) Version 3.
+
+2. Marking a file for mandatory locking
+---------------------------------------
+
+A file is marked as a candidate for mandatory locking by setting the group-id
+bit in its file mode but removing the group-execute bit. This is an otherwise
+meaningless combination, and was chosen by the System V implementors so as not
+to break existing user programs.
+
+Note that the group-id bit is usually automatically cleared by the kernel when
+a setgid file is written to. This is a security measure. The kernel has been
+modified to recognize the special case of a mandatory lock candidate and to
+refrain from clearing this bit. Similarly the kernel has been modified not
+to run mandatory lock candidates with setgid privileges.
+
+3. Available implementations
+----------------------------
+
+I have considered the implementations of mandatory locking available with
+SunOS 4.1.x, Solaris 2.x and HP-UX 9.x.
+
+Generally I have tried to make the most sense out of the behaviour exhibited
+by these three reference systems. There are many anomalies.
+
+All the reference systems reject all calls to open() for a file on which
+another process has outstanding mandatory locks. This is in direct
+contravention of SVID 3, which states that only calls to open() with the
+O_TRUNC flag set should be rejected. The Linux implementation follows the SVID
+definition, which is the "Right Thing", since only calls with O_TRUNC can
+modify the contents of the file.
+
+HP-UX even disallows open() with O_TRUNC for a file with advisory locks, not
+just mandatory locks. That would appear to contravene POSIX.1.
+
+mmap() is another interesting case. All the operating systems mentioned
+prevent mandatory locks from being applied to an mmap()'ed file, but HP-UX
+also disallows advisory locks for such a file. SVID actually specifies the
+paranoid HP-UX behaviour.
+
+In my opinion only MAP_SHARED mappings should be immune from locking, and then
+only from mandatory locks - that is what is currently implemented.
+
+SunOS is so hopeless that it doesn't even honour the O_NONBLOCK flag for
+mandatory locks, so reads and writes to locked files always block when they
+should return EAGAIN.
+
+I'm afraid that this is such an esoteric area that the semantics described
+below are just as valid as any others, so long as the main points seem to
+agree.
+
+4. Semantics
+------------
+
+1. Mandatory locks can only be applied via the fcntl()/lockf() locking
+ interface - in other words the System V/POSIX interface. BSD style
+ locks using flock() never result in a mandatory lock.
+
+2. If a process has locked a region of a file with a mandatory read lock, then
+ other processes are permitted to read from that region. If any of these
+ processes attempts to write to the region it will block until the lock is
+ released, unless the process has opened the file with the O_NONBLOCK
+ flag in which case the system call will return immediately with the error
+ status EAGAIN.
+
+3. If a process has locked a region of a file with a mandatory write lock, all
+ attempts to read or write to that region block until the lock is released,
+ unless a process has opened the file with the O_NONBLOCK flag in which case
+ the system call will return immediately with the error status EAGAIN.
+
+4. Calls to open() with O_TRUNC, or to creat(), on a existing file that has
+ any mandatory locks owned by other processes will be rejected with the
+ error status EAGAIN.
+
+5. Attempts to apply a mandatory lock to a file that is memory mapped and
+ shared (via mmap() with MAP_SHARED) will be rejected with the error status
+ EAGAIN.
+
+6. Attempts to create a shared memory map of a file (via mmap() with MAP_SHARED)
+ that has any mandatory locks in effect will be rejected with the error status
+ EAGAIN.
+
+5. Which system calls are affected?
+-----------------------------------
+
+Those which modify a file's contents, not just the inode. That gives read(),
+write(), readv(), writev(), open(), creat(), mmap(), truncate() and
+ftruncate(). truncate() and ftruncate() are considered to be "write" actions
+for the purposes of mandatory locking.
+
+The affected region is usually defined as stretching from the current position
+for the total number of bytes read or written. For the truncate calls it is
+defined as the bytes of a file removed or added (we must also consider bytes
+added, as a lock can specify just "the whole file", rather than a specific
+range of bytes.)
+
+Note 3: I may have overlooked some system calls that need mandatory lock
+checking in my eagerness to get this code out the door. Please let me know, or
+better still fix the system calls yourself and submit a patch to me or Linus.
+
+6. Warning!
+-----------
+
+Not even root can override a mandatory lock, so runaway processes can wreak
+havoc if they lock crucial files. The way around it is to change the file
+permissions (remove the setgid bit) before trying to read or write to it.
+Of course, that might be a bit tricky if the system is hung :-(
+
diff --git a/Documentation/filesystems/ncpfs.txt b/Documentation/filesystems/ncpfs.txt
new file mode 100644
index 000000000..5af164f4b
--- /dev/null
+++ b/Documentation/filesystems/ncpfs.txt
@@ -0,0 +1,12 @@
+The ncpfs filesystem understands the NCP protocol, designed by the
+Novell Corporation for their NetWare(tm) product. NCP is functionally
+similar to the NFS used in the TCP/IP community.
+To mount a NetWare filesystem, you need a special mount program, which
+can be found in the ncpfs package. The home site for ncpfs is
+ftp.gwdg.de/pub/linux/misc/ncpfs, but sunsite and its many mirrors
+will have it as well.
+
+Related products are linware and mars_nwe, which will give Linux partial
+NetWare server functionality.
+
+mars_nwe can be found on ftp.gwdg.de/pub/linux/misc/ncpfs.
diff --git a/Documentation/filesystems/nfs/00-INDEX b/Documentation/filesystems/nfs/00-INDEX
new file mode 100644
index 000000000..53f3b596a
--- /dev/null
+++ b/Documentation/filesystems/nfs/00-INDEX
@@ -0,0 +1,26 @@
+00-INDEX
+ - this file (nfs-related documentation).
+Exporting
+ - explanation of how to make filesystems exportable.
+fault_injection.txt
+ - information for using fault injection on the server
+knfsd-stats.txt
+ - statistics which the NFS server makes available to user space.
+nfs.txt
+ - nfs client, and DNS resolution for fs_locations.
+nfs41-server.txt
+ - info on the Linux server implementation of NFSv4 minor version 1.
+nfs-rdma.txt
+ - how to install and setup the Linux NFS/RDMA client and server software
+nfsd-admin-interfaces.txt
+ - Administrative interfaces for nfsd.
+nfsroot.txt
+ - short guide on setting up a diskless box with NFS root filesystem.
+pnfs.txt
+ - short explanation of some of the internals of the pnfs client code
+rpc-cache.txt
+ - introduction to the caching mechanisms in the sunrpc layer.
+idmapper.txt
+ - information for configuring request-keys to be used by idmapper
+rpc-server-gss.txt
+ - Information on GSS authentication support in the NFS Server
diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
new file mode 100644
index 000000000..520a4becb
--- /dev/null
+++ b/Documentation/filesystems/nfs/Exporting
@@ -0,0 +1,149 @@
+
+Making Filesystems Exportable
+=============================
+
+Overview
+--------
+
+All filesystem operations require a dentry (or two) as a starting
+point. Local applications have a reference-counted hold on suitable
+dentries via open file descriptors or cwd/root. However remote
+applications that access a filesystem via a remote filesystem protocol
+such as NFS may not be able to hold such a reference, and so need a
+different way to refer to a particular dentry. As the alternative
+form of reference needs to be stable across renames, truncates, and
+server-reboot (among other things, though these tend to be the most
+problematic), there is no simple answer like 'filename'.
+
+The mechanism discussed here allows each filesystem implementation to
+specify how to generate an opaque (outside of the filesystem) byte
+string for any dentry, and how to find an appropriate dentry for any
+given opaque byte string.
+This byte string will be called a "filehandle fragment" as it
+corresponds to part of an NFS filehandle.
+
+A filesystem which supports the mapping between filehandle fragments
+and dentries will be termed "exportable".
+
+
+
+Dcache Issues
+-------------
+
+The dcache normally contains a proper prefix of any given filesystem
+tree. This means that if any filesystem object is in the dcache, then
+all of the ancestors of that filesystem object are also in the dcache.
+As normal access is by filename this prefix is created naturally and
+maintained easily (by each object maintaining a reference count on
+its parent).
+
+However when objects are included into the dcache by interpreting a
+filehandle fragment, there is no automatic creation of a path prefix
+for the object. This leads to two related but distinct features of
+the dcache that are not needed for normal filesystem access.
+
+1/ The dcache must sometimes contain objects that are not part of the
+ proper prefix. i.e that are not connected to the root.
+2/ The dcache must be prepared for a newly found (via ->lookup) directory
+ to already have a (non-connected) dentry, and must be able to move
+ that dentry into place (based on the parent and name in the
+ ->lookup). This is particularly needed for directories as
+ it is a dcache invariant that directories only have one dentry.
+
+To implement these features, the dcache has:
+
+a/ A dentry flag DCACHE_DISCONNECTED which is set on
+ any dentry that might not be part of the proper prefix.
+ This is set when anonymous dentries are created, and cleared when a
+ dentry is noticed to be a child of a dentry which is in the proper
+ prefix.
+
+b/ A per-superblock list "s_anon" of dentries which are the roots of
+ subtrees that are not in the proper prefix. These dentries, as
+ well as the proper prefix, need to be released at unmount time. As
+ these dentries will not be hashed, they are linked together on the
+ d_hash list_head.
+
+c/ Helper routines to allocate anonymous dentries, and to help attach
+ loose directory dentries at lookup time. They are:
+ d_obtain_alias(inode) will return a dentry for the given inode.
+ If the inode already has a dentry, one of those is returned.
+ If it doesn't, a new anonymous (IS_ROOT and
+ DCACHE_DISCONNECTED) dentry is allocated and attached.
+ In the case of a directory, care is taken that only one dentry
+ can ever be attached.
+ d_splice_alias(inode, dentry) will introduce a new dentry into the tree;
+ either the passed-in dentry or a preexisting alias for the given inode
+ (such as an anonymous one created by d_obtain_alias), if appropriate.
+ It returns NULL when the passed-in dentry is used, following the calling
+ convention of ->lookup.
+
+
+Filesystem Issues
+-----------------
+
+For a filesystem to be exportable it must:
+
+ 1/ provide the filehandle fragment routines described below.
+ 2/ make sure that d_splice_alias is used rather than d_add
+ when ->lookup finds an inode for a given parent and name.
+
+ If inode is NULL, d_splice_alias(inode, dentry) is equivalent to
+
+ d_add(dentry, inode), NULL
+
+ Similarly, d_splice_alias(ERR_PTR(err), dentry) = ERR_PTR(err)
+
+ Typically the ->lookup routine will simply end with a:
+
+ return d_splice_alias(inode, dentry);
+ }
+
+
+
+ A file system implementation declares that instances of the filesystem
+are exportable by setting the s_export_op field in the struct
+super_block. This field must point to a "struct export_operations"
+struct which has the following members:
+
+ encode_fh (optional)
+ Takes a dentry and creates a filehandle fragment which can later be used
+ to find or create a dentry for the same object. The default
+ implementation creates a filehandle fragment that encodes a 32bit inode
+ and generation number for the inode encoded, and if necessary the
+ same information for the parent.
+
+ fh_to_dentry (mandatory)
+ Given a filehandle fragment, this should find the implied object and
+ create a dentry for it (possibly with d_obtain_alias).
+
+ fh_to_parent (optional but strongly recommended)
+ Given a filehandle fragment, this should find the parent of the
+ implied object and create a dentry for it (possibly with
+ d_obtain_alias). May fail if the filehandle fragment is too small.
+
+ get_parent (optional but strongly recommended)
+ When given a dentry for a directory, this should return a dentry for
+ the parent. Quite possibly the parent dentry will have been allocated
+ by d_alloc_anon. The default get_parent function just returns an error
+ so any filehandle lookup that requires finding a parent will fail.
+ ->lookup("..") is *not* used as a default as it can leave ".." entries
+ in the dcache which are too messy to work with.
+
+ get_name (optional)
+ When given a parent dentry and a child dentry, this should find a name
+ in the directory identified by the parent dentry, which leads to the
+ object identified by the child dentry. If no get_name function is
+ supplied, a default implementation is provided which uses vfs_readdir
+ to find potential names, and matches inode numbers to find the correct
+ match.
+
+
+A filehandle fragment consists of an array of 1 or more 4byte words,
+together with a one byte "type".
+The decode_fh routine should not depend on the stated size that is
+passed to it. This size may be larger than the original filehandle
+generated by encode_fh, in which case it will have been padded with
+nuls. Rather, the encode_fh routine should choose a "type" which
+indicates the decode_fh how much of the filehandle is valid, and how
+it should be interpreted.
diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt
new file mode 100644
index 000000000..426d16608
--- /dev/null
+++ b/Documentation/filesystems/nfs/fault_injection.txt
@@ -0,0 +1,69 @@
+
+Fault Injection
+===============
+Fault injection is a method for forcing errors that may not normally occur, or
+may be difficult to reproduce. Forcing these errors in a controlled environment
+can help the developer find and fix bugs before their code is shipped in a
+production system. Injecting an error on the Linux NFS server will allow us to
+observe how the client reacts and if it manages to recover its state correctly.
+
+NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
+feature.
+
+
+Using Fault Injection
+=====================
+On the client, mount the fault injection server through NFS v4.0+ and do some
+work over NFS (open files, take locks, ...).
+
+On the server, mount the debugfs filesystem to <debug_dir> and ls
+<debug_dir>/nfsd. This will show a list of files that will be used for
+injecting faults on the NFS server. As root, write a number n to the file
+corresponding to the action you want the server to take. The server will then
+process the first n items it finds. So if you want to forget 5 locks, echo '5'
+to <debug_dir>/nfsd/forget_locks. A value of 0 will tell the server to forget
+all corresponding items. A log message will be created containing the number
+of items forgotten (check dmesg).
+
+Go back to work on the client and check if the client recovered from the error
+correctly.
+
+
+Available Faults
+================
+forget_clients:
+ The NFS server keeps a list of clients that have placed a mount call. If
+ this list is cleared, the server will have no knowledge of who the client
+ is, forcing the client to reauthenticate with the server.
+
+forget_openowners:
+ The NFS server keeps a list of what files are currently opened and who
+ they were opened by. Clearing this list will force the client to reopen
+ its files.
+
+forget_locks:
+ The NFS server keeps a list of what files are currently locked in the VFS.
+ Clearing this list will force the client to reclaim its locks (files are
+ unlocked through the VFS as they are cleared from this list).
+
+forget_delegations:
+ A delegation is used to assure the client that a file, or part of a file,
+ has not changed since the delegation was awarded. Clearing this list will
+ force the client to reaquire its delegation before accessing the file
+ again.
+
+recall_delegations:
+ Delegations can be recalled by the server when another client attempts to
+ access a file. This test will notify the client that its delegation has
+ been revoked, forcing the client to reaquire the delegation before using
+ the file again.
+
+
+tools/nfs/inject_faults.sh script
+=================================
+This script has been created to ease the fault injection process. This script
+will detect the mounted debugfs directory and write to the files located there
+based on the arguments passed by the user. For example, running
+`inject_faults.sh forget_locks 1` as root will instruct the server to forget
+one lock. Running `inject_faults forget_locks` will instruct the server to
+forgetall locks.
diff --git a/Documentation/filesystems/nfs/idmapper.txt b/Documentation/filesystems/nfs/idmapper.txt
new file mode 100644
index 000000000..fe03d10bb
--- /dev/null
+++ b/Documentation/filesystems/nfs/idmapper.txt
@@ -0,0 +1,75 @@
+
+=========
+ID Mapper
+=========
+Id mapper is used by NFS to translate user and group ids into names, and to
+translate user and group names into ids. Part of this translation involves
+performing an upcall to userspace to request the information. There are two
+ways NFS could obtain this information: placing a call to /sbin/request-key
+or by placing a call to the rpc.idmap daemon.
+
+NFS will attempt to call /sbin/request-key first. If this succeeds, the
+result will be cached using the generic request-key cache. This call should
+only fail if /etc/request-key.conf is not configured for the id_resolver key
+type, see the "Configuring" section below if you wish to use the request-key
+method.
+
+If the call to /sbin/request-key fails (if /etc/request-key.conf is not
+configured with the id_resolver key type), then the idmapper will ask the
+legacy rpc.idmap daemon for the id mapping. This result will be stored
+in a custom NFS idmap cache.
+
+
+===========
+Configuring
+===========
+The file /etc/request-key.conf will need to be modified so /sbin/request-key can
+direct the upcall. The following line should be added:
+
+#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ...
+#====== ======= =============== =============== ===============================
+create id_resolver * * /usr/sbin/nfs.idmap %k %d 600
+
+This will direct all id_resolver requests to the program /usr/sbin/nfs.idmap.
+The last parameter, 600, defines how many seconds into the future the key will
+expire. This parameter is optional for /usr/sbin/nfs.idmap. When the timeout
+is not specified, nfs.idmap will default to 600 seconds.
+
+id mapper uses for key descriptions:
+ uid: Find the UID for the given user
+ gid: Find the GID for the given group
+ user: Find the user name for the given UID
+ group: Find the group name for the given GID
+
+You can handle any of these individually, rather than using the generic upcall
+program. If you would like to use your own program for a uid lookup then you
+would edit your request-key.conf so it look similar to this:
+
+#OP TYPE DESCRIPTION CALLOUT INFO PROGRAM ARG1 ARG2 ARG3 ...
+#====== ======= =============== =============== ===============================
+create id_resolver uid:* * /some/other/program %k %d 600
+create id_resolver * * /usr/sbin/nfs.idmap %k %d 600
+
+Notice that the new line was added above the line for the generic program.
+request-key will find the first matching line and corresponding program. In
+this case, /some/other/program will handle all uid lookups and
+/usr/sbin/nfs.idmap will handle gid, user, and group lookups.
+
+See <file:Documentation/security/keys-request-key.txt> for more information
+about the request-key function.
+
+
+=========
+nfs.idmap
+=========
+nfs.idmap is designed to be called by request-key, and should not be run "by
+hand". This program takes two arguments, a serialized key and a key
+description. The serialized key is first converted into a key_serial_t, and
+then passed as an argument to keyctl_instantiate (both are part of keyutils.h).
+
+The actual lookups are performed by functions found in nfsidmap.h. nfs.idmap
+determines the correct function to call by looking at the first part of the
+description string. For example, a uid lookup description will appear as
+"uid:user@domain".
+
+nfs.idmap will return 0 if the key was instantiated, and non-zero otherwise.
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt
new file mode 100644
index 000000000..64ced5149
--- /dev/null
+++ b/Documentation/filesystems/nfs/knfsd-stats.txt
@@ -0,0 +1,159 @@
+
+Kernel NFS Server Statistics
+============================
+
+This document describes the format and semantics of the statistics
+which the kernel NFS server makes available to userspace. These
+statistics are available in several text form pseudo files, each of
+which is described separately below.
+
+In most cases you don't need to know these formats, as the nfsstat(8)
+program from the nfs-utils distribution provides a helpful command-line
+interface for extracting and printing them.
+
+All the files described here are formatted as a sequence of text lines,
+separated by newline '\n' characters. Lines beginning with a hash
+'#' character are comments intended for humans and should be ignored
+by parsing routines. All other lines contain a sequence of fields
+separated by whitespace.
+
+/proc/fs/nfsd/pool_stats
+------------------------
+
+This file is available in kernels from 2.6.30 onwards, if the
+/proc/fs/nfsd filesystem is mounted (it almost always should be).
+
+The first line is a comment which describes the fields present in
+all the other lines. The other lines present the following data as
+a sequence of unsigned decimal numeric fields. One line is shown
+for each NFS thread pool.
+
+All counters are 64 bits wide and wrap naturally. There is no way
+to zero these counters, instead applications should do their own
+rate conversion.
+
+pool
+ The id number of the NFS thread pool to which this line applies.
+ This number does not change.
+
+ Thread pool ids are a contiguous set of small integers starting
+ at zero. The maximum value depends on the thread pool mode, but
+ currently cannot be larger than the number of CPUs in the system.
+ Note that in the default case there will be a single thread pool
+ which contains all the nfsd threads and all the CPUs in the system,
+ and thus this file will have a single line with a pool id of "0".
+
+packets-arrived
+ Counts how many NFS packets have arrived. More precisely, this
+ is the number of times that the network stack has notified the
+ sunrpc server layer that new data may be available on a transport
+ (e.g. an NFS or UDP socket or an NFS/RDMA endpoint).
+
+ Depending on the NFS workload patterns and various network stack
+ effects (such as Large Receive Offload) which can combine packets
+ on the wire, this may be either more or less than the number
+ of NFS calls received (which statistic is available elsewhere).
+ However this is a more accurate and less workload-dependent measure
+ of how much CPU load is being placed on the sunrpc server layer
+ due to NFS network traffic.
+
+sockets-enqueued
+ Counts how many times an NFS transport is enqueued to wait for
+ an nfsd thread to service it, i.e. no nfsd thread was considered
+ available.
+
+ The circumstance this statistic tracks indicates that there was NFS
+ network-facing work to be done but it couldn't be done immediately,
+ thus introducing a small delay in servicing NFS calls. The ideal
+ rate of change for this counter is zero; significantly non-zero
+ values may indicate a performance limitation.
+
+ This can happen either because there are too few nfsd threads in the
+ thread pool for the NFS workload (the workload is thread-limited),
+ or because the NFS workload needs more CPU time than is available in
+ the thread pool (the workload is CPU-limited). In the former case,
+ configuring more nfsd threads will probably improve the performance
+ of the NFS workload. In the latter case, the sunrpc server layer is
+ already choosing not to wake idle nfsd threads because there are too
+ many nfsd threads which want to run but cannot, so configuring more
+ nfsd threads will make no difference whatsoever. The overloads-avoided
+ statistic (see below) can be used to distinguish these cases.
+
+threads-woken
+ Counts how many times an idle nfsd thread is woken to try to
+ receive some data from an NFS transport.
+
+ This statistic tracks the circumstance where incoming
+ network-facing NFS work is being handled quickly, which is a good
+ thing. The ideal rate of change for this counter will be close
+ to but less than the rate of change of the packets-arrived counter.
+
+overloads-avoided
+ Counts how many times the sunrpc server layer chose not to wake an
+ nfsd thread, despite the presence of idle nfsd threads, because
+ too many nfsd threads had been recently woken but could not get
+ enough CPU time to actually run.
+
+ This statistic counts a circumstance where the sunrpc layer
+ heuristically avoids overloading the CPU scheduler with too many
+ runnable nfsd threads. The ideal rate of change for this counter
+ is zero. Significant non-zero values indicate that the workload
+ is CPU limited. Usually this is associated with heavy CPU usage
+ on all the CPUs in the nfsd thread pool.
+
+ If a sustained large overloads-avoided rate is detected on a pool,
+ the top(1) utility should be used to check for the following
+ pattern of CPU usage on all the CPUs associated with the given
+ nfsd thread pool.
+
+ - %us ~= 0 (as you're *NOT* running applications on your NFS server)
+
+ - %wa ~= 0
+
+ - %id ~= 0
+
+ - %sy + %hi + %si ~= 100
+
+ If this pattern is seen, configuring more nfsd threads will *not*
+ improve the performance of the workload. If this patten is not
+ seen, then something more subtle is wrong.
+
+threads-timedout
+ Counts how many times an nfsd thread triggered an idle timeout,
+ i.e. was not woken to handle any incoming network packets for
+ some time.
+
+ This statistic counts a circumstance where there are more nfsd
+ threads configured than can be used by the NFS workload. This is
+ a clue that the number of nfsd threads can be reduced without
+ affecting performance. Unfortunately, it's only a clue and not
+ a strong indication, for a couple of reasons:
+
+ - Currently the rate at which the counter is incremented is quite
+ slow; the idle timeout is 60 minutes. Unless the NFS workload
+ remains constant for hours at a time, this counter is unlikely
+ to be providing information that is still useful.
+
+ - It is usually a wise policy to provide some slack,
+ i.e. configure a few more nfsds than are currently needed,
+ to allow for future spikes in load.
+
+
+Note that incoming packets on NFS transports will be dealt with in
+one of three ways. An nfsd thread can be woken (threads-woken counts
+this case), or the transport can be enqueued for later attention
+(sockets-enqueued counts this case), or the packet can be temporarily
+deferred because the transport is currently being used by an nfsd
+thread. This last case is not very interesting and is not explicitly
+counted, but can be inferred from the other counters thus:
+
+packets-deferred = packets-arrived - ( sockets-enqueued + threads-woken )
+
+
+More
+----
+Descriptions of the other statistics file should go here.
+
+
+Greg Banks <gnb@sgi.com>
+26 Mar 2009
diff --git a/Documentation/filesystems/nfs/nfs-rdma.txt b/Documentation/filesystems/nfs/nfs-rdma.txt
new file mode 100644
index 000000000..95c13aa57
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs-rdma.txt
@@ -0,0 +1,276 @@
+################################################################################
+# #
+# NFS/RDMA README #
+# #
+################################################################################
+
+ Author: NetApp and Open Grid Computing
+ Date: May 29, 2008
+
+Table of Contents
+~~~~~~~~~~~~~~~~~
+ - Overview
+ - Getting Help
+ - Installation
+ - Check RDMA and NFS Setup
+ - NFS/RDMA Setup
+
+Overview
+~~~~~~~~
+
+ This document describes how to install and setup the Linux NFS/RDMA client
+ and server software.
+
+ The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
+ was first included in the following release, Linux 2.6.25.
+
+ In our testing, we have obtained excellent performance results (full 10Gbit
+ wire bandwidth at minimal client CPU) under many workloads. The code passes
+ the full Connectathon test suite and operates over both Infiniband and iWARP
+ RDMA adapters.
+
+Getting Help
+~~~~~~~~~~~~
+
+ If you get stuck, you can ask questions on the
+
+ nfs-rdma-devel@lists.sourceforge.net
+
+ mailing list.
+
+Installation
+~~~~~~~~~~~~
+
+ These instructions are a step by step guide to building a machine for
+ use with NFS/RDMA.
+
+ - Install an RDMA device
+
+ Any device supported by the drivers in drivers/infiniband/hw is acceptable.
+
+ Testing has been performed using several Mellanox-based IB cards, the
+ Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
+
+ - Install a Linux distribution and tools
+
+ The first kernel release to contain both the NFS/RDMA client and server was
+ Linux 2.6.25 Therefore, a distribution compatible with this and subsequent
+ Linux kernel release should be installed.
+
+ The procedures described in this document have been tested with
+ distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
+
+ - Install nfs-utils-1.1.2 or greater on the client
+
+ An NFS/RDMA mount point can be obtained by using the mount.nfs command in
+ nfs-utils-1.1.2 or greater (nfs-utils-1.1.1 was the first nfs-utils
+ version with support for NFS/RDMA mounts, but for various reasons we
+ recommend using nfs-utils-1.1.2 or greater). To see which version of
+ mount.nfs you are using, type:
+
+ $ /sbin/mount.nfs -V
+
+ If the version is less than 1.1.2 or the command does not exist,
+ you should install the latest version of nfs-utils.
+
+ Download the latest package from:
+
+ http://www.kernel.org/pub/linux/utils/nfs
+
+ Uncompress the package and follow the installation instructions.
+
+ If you will not need the idmapper and gssd executables (you do not need
+ these to create an NFS/RDMA enabled mount command), the installation
+ process can be simplified by disabling these features when running
+ configure:
+
+ $ ./configure --disable-gss --disable-nfsv4
+
+ To build nfs-utils you will need the tcp_wrappers package installed. For
+ more information on this see the package's README and INSTALL files.
+
+ After building the nfs-utils package, there will be a mount.nfs binary in
+ the utils/mount directory. This binary can be used to initiate NFS v2, v3,
+ or v4 mounts. To initiate a v4 mount, the binary must be called
+ mount.nfs4. The standard technique is to create a symlink called
+ mount.nfs4 to mount.nfs.
+
+ This mount.nfs binary should be installed at /sbin/mount.nfs as follows:
+
+ $ sudo cp utils/mount/mount.nfs /sbin/mount.nfs
+
+ In this location, mount.nfs will be invoked automatically for NFS mounts
+ by the system mount command.
+
+ NOTE: mount.nfs and therefore nfs-utils-1.1.2 or greater is only needed
+ on the NFS client machine. You do not need this specific version of
+ nfs-utils on the server. Furthermore, only the mount.nfs command from
+ nfs-utils-1.1.2 is needed on the client.
+
+ - Install a Linux kernel with NFS/RDMA
+
+ The NFS/RDMA client and server are both included in the mainline Linux
+ kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
+ kernel can be found at:
+
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
+
+ Download the sources and place them in an appropriate location.
+
+ - Configure the RDMA stack
+
+ Make sure your kernel configuration has RDMA support enabled. Under
+ Device Drivers -> InfiniBand support, update the kernel configuration
+ to enable InfiniBand support [NOTE: the option name is misleading. Enabling
+ InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
+
+ Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
+ iWARP adapter support (amso, cxgb3, etc.).
+
+ If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
+
+ - Configure the NFS client and server
+
+ Your kernel configuration must also have NFS file system support and/or
+ NFS server support enabled. These and other NFS related configuration
+ options can be found under File Systems -> Network File Systems.
+
+ - Build, install, reboot
+
+ The NFS/RDMA code will be enabled automatically if NFS and RDMA
+ are turned on. The NFS/RDMA client and server are configured via the
+ SUNRPC_XPRT_RDMA_CLIENT and SUNRPC_XPRT_RDMA_SERVER config options that both
+ depend on SUNRPC and INFINIBAND. The default value of both options will be:
+
+ - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
+ and server will not be built
+ - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
+ in this case the NFS/RDMA client and server will be built as modules
+ - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
+ and server will be built into the kernel
+
+ Therefore, if you have followed the steps above and turned no NFS and RDMA,
+ the NFS/RDMA client and server will be built.
+
+ Build a new kernel, install it, boot it.
+
+Check RDMA and NFS Setup
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Before configuring the NFS/RDMA software, it is a good idea to test
+ your new kernel to ensure that the kernel is working correctly.
+ In particular, it is a good idea to verify that the RDMA stack
+ is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
+ is working properly.
+
+ - Check RDMA Setup
+
+ If you built the RDMA components as modules, load them at
+ this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
+ card:
+
+ $ modprobe ib_mthca
+ $ modprobe ib_ipoib
+
+ If you are using InfiniBand, make sure there is a Subnet Manager (SM)
+ running on the network. If your IB switch has an embedded SM, you can
+ use it. Otherwise, you will need to run an SM, such as OpenSM, on one
+ of your end nodes.
+
+ If an SM is running on your network, you should see the following:
+
+ $ cat /sys/class/infiniband/driverX/ports/1/state
+ 4: ACTIVE
+
+ where driverX is mthca0, ipath5, ehca3, etc.
+
+ To further test the InfiniBand software stack, use IPoIB (this
+ assumes you have two IB hosts named host1 and host2):
+
+ host1$ ip link set dev ib0 up
+ host1$ ip address add dev ib0 a.b.c.x
+ host2$ ip link set dev ib0 up
+ host2$ ip address add dev ib0 a.b.c.y
+ host1$ ping a.b.c.y
+ host2$ ping a.b.c.x
+
+ For other device types, follow the appropriate procedures.
+
+ - Check NFS Setup
+
+ For the NFS components enabled above (client and/or server),
+ test their functionality over standard Ethernet using TCP/IP or UDP/IP.
+
+NFS/RDMA Setup
+~~~~~~~~~~~~~~
+
+ We recommend that you use two machines, one to act as the client and
+ one to act as the server.
+
+ One time configuration:
+
+ - On the server system, configure the /etc/exports file and
+ start the NFS/RDMA server.
+
+ Exports entries with the following formats have been tested:
+
+ /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash)
+ /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash)
+
+ The IP address(es) is(are) the client's IPoIB address for an InfiniBand
+ HCA or the cleint's iWARP address(es) for an RNIC.
+
+ NOTE: The "insecure" option must be used because the NFS/RDMA client does
+ not use a reserved port.
+
+ Each time a machine boots:
+
+ - Load and configure the RDMA drivers
+
+ For InfiniBand using a Mellanox adapter:
+
+ $ modprobe ib_mthca
+ $ modprobe ib_ipoib
+ $ ip li set dev ib0 up
+ $ ip addr add dev ib0 a.b.c.d
+
+ NOTE: use unique addresses for the client and server
+
+ - Start the NFS server
+
+ If the NFS/RDMA server was built as a module
+ (CONFIG_SUNRPC_XPRT_RDMA_SERVER=m in kernel config), load the RDMA
+ transport module:
+
+ $ modprobe svcrdma
+
+ Regardless of how the server was built (module or built-in), start the
+ server:
+
+ $ /etc/init.d/nfs start
+
+ or
+
+ $ service nfs start
+
+ Instruct the server to listen on the RDMA transport:
+
+ $ echo rdma 20049 > /proc/fs/nfsd/portlist
+
+ - On the client system
+
+ If the NFS/RDMA client was built as a module
+ (CONFIG_SUNRPC_XPRT_RDMA_CLIENT=m in kernel config), load the RDMA client
+ module:
+
+ $ modprobe xprtrdma.ko
+
+ Regardless of how the client was built (module or built-in), use this
+ command to mount the NFS/RDMA server:
+
+ $ mount -o rdma,port=20049 <IPoIB-server-name-or-address>:/<export> /mnt
+
+ To verify that the mount is using RDMA, run "cat /proc/mounts" and check
+ the "proto" field for the given mount.
+
+ Congratulations! You're using NFS/RDMA!
diff --git a/Documentation/filesystems/nfs/nfs.txt b/Documentation/filesystems/nfs/nfs.txt
new file mode 100644
index 000000000..f2571c8be
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs.txt
@@ -0,0 +1,136 @@
+
+The NFS client
+==============
+
+The NFS version 2 protocol was first documented in RFC1094 (March 1989).
+Since then two more major releases of NFS have been published, with NFSv3
+being documented in RFC1813 (June 1995), and NFSv4 in RFC3530 (April
+2003).
+
+The Linux NFS client currently supports all the above published versions,
+and work is in progress on adding support for minor version 1 of the NFSv4
+protocol.
+
+The purpose of this document is to provide information on some of the
+special features of the NFS client that can be configured by system
+administrators.
+
+
+The nfs4_unique_id parameter
+============================
+
+NFSv4 requires clients to identify themselves to servers with a unique
+string. File open and lock state shared between one client and one server
+is associated with this identity. To support robust NFSv4 state recovery
+and transparent state migration, this identity string must not change
+across client reboots.
+
+Without any other intervention, the Linux client uses a string that contains
+the local system's node name. System administrators, however, often do not
+take care to ensure that node names are fully qualified and do not change
+over the lifetime of a client system. Node names can have other
+administrative requirements that require particular behavior that does not
+work well as part of an nfs_client_id4 string.
+
+The nfs.nfs4_unique_id boot parameter specifies a unique string that can be
+used instead of a system's node name when an NFS client identifies itself to
+a server. Thus, if the system's node name is not unique, or it changes, its
+nfs.nfs4_unique_id stays the same, preventing collision with other clients
+or loss of state during NFS reboot recovery or transparent state migration.
+
+The nfs.nfs4_unique_id string is typically a UUID, though it can contain
+anything that is believed to be unique across all NFS clients. An
+nfs4_unique_id string should be chosen when a client system is installed,
+just as a system's root file system gets a fresh UUID in its label at
+install time.
+
+The string should remain fixed for the lifetime of the client. It can be
+changed safely if care is taken that the client shuts down cleanly and all
+outstanding NFSv4 state has expired, to prevent loss of NFSv4 state.
+
+This string can be stored in an NFS client's grub.conf, or it can be provided
+via a net boot facility such as PXE. It may also be specified as an nfs.ko
+module parameter. Specifying a uniquifier string is not support for NFS
+clients running in containers.
+
+
+The DNS resolver
+================
+
+NFSv4 allows for one server to refer the NFS client to data that has been
+migrated onto another server by means of the special "fs_locations"
+attribute. See
+ http://tools.ietf.org/html/rfc3530#section-6
+and
+ http://tools.ietf.org/html/draft-ietf-nfsv4-referrals-00
+
+The fs_locations information can take the form of either an ip address and
+a path, or a DNS hostname and a path. The latter requires the NFS client to
+do a DNS lookup in order to mount the new volume, and hence the need for an
+upcall to allow userland to provide this service.
+
+Assuming that the user has the 'rpc_pipefs' filesystem mounted in the usual
+/var/lib/nfs/rpc_pipefs, the upcall consists of the following steps:
+
+ (1) The process checks the dns_resolve cache to see if it contains a
+ valid entry. If so, it returns that entry and exits.
+
+ (2) If no valid entry exists, the helper script '/sbin/nfs_cache_getent'
+ (may be changed using the 'nfs.cache_getent' kernel boot parameter)
+ is run, with two arguments:
+ - the cache name, "dns_resolve"
+ - the hostname to resolve
+
+ (3) After looking up the corresponding ip address, the helper script
+ writes the result into the rpc_pipefs pseudo-file
+ '/var/lib/nfs/rpc_pipefs/cache/dns_resolve/channel'
+ in the following (text) format:
+
+ "<ip address> <hostname> <ttl>\n"
+
+ Where <ip address> is in the usual IPv4 (123.456.78.90) or IPv6
+ (ffee:ddcc:bbaa:9988:7766:5544:3322:1100, ffee::1100, ...) format.
+ <hostname> is identical to the second argument of the helper
+ script, and <ttl> is the 'time to live' of this cache entry (in
+ units of seconds).
+
+ Note: If <ip address> is invalid, say the string "0", then a negative
+ entry is created, which will cause the kernel to treat the hostname
+ as having no valid DNS translation.
+
+
+
+
+A basic sample /sbin/nfs_cache_getent
+=====================================
+
+#!/bin/bash
+#
+ttl=600
+#
+cut=/usr/bin/cut
+getent=/usr/bin/getent
+rpc_pipefs=/var/lib/nfs/rpc_pipefs
+#
+die()
+{
+ echo "Usage: $0 cache_name entry_name"
+ exit 1
+}
+
+[ $# -lt 2 ] && die
+cachename="$1"
+cache_path=${rpc_pipefs}/cache/${cachename}/channel
+
+case "${cachename}" in
+ dns_resolve)
+ name="$2"
+ result="$(${getent} hosts ${name} | ${cut} -f1 -d\ )"
+ [ -z "${result}" ] && result="0"
+ ;;
+ *)
+ die
+ ;;
+esac
+echo "${result} ${name} ${ttl}" >${cache_path}
+
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
new file mode 100644
index 000000000..682a59fab
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -0,0 +1,173 @@
+NFSv4.1 Server Implementation
+
+Server support for minorversion 1 can be controlled using the
+/proc/fs/nfsd/versions control file. The string output returned
+by reading this file will contain either "+4.1" or "-4.1"
+correspondingly.
+
+Currently, server support for minorversion 1 is enabled by default.
+It can be disabled at run time by writing the string "-4.1" to
+the /proc/fs/nfsd/versions control file. Note that to write this
+control file, the nfsd service must be taken down. You can use rpc.nfsd
+for this; see rpc.nfsd(8).
+
+(Warning: older servers will interpret "+4.1" and "-4.1" as "+4" and
+"-4", respectively. Therefore, code meant to work on both new and old
+kernels must turn 4.1 on or off *before* turning support for version 4
+on or off; rpc.nfsd does this correctly.)
+
+The NFSv4 minorversion 1 (NFSv4.1) implementation in nfsd is based
+on RFC 5661.
+
+From the many new features in NFSv4.1 the current implementation
+focuses on the mandatory-to-implement NFSv4.1 Sessions, providing
+"exactly once" semantics and better control and throttling of the
+resources allocated for each client.
+
+The table below, taken from the NFSv4.1 document, lists
+the operations that are mandatory to implement (REQ), optional
+(OPT), and NFSv4.0 operations that are required not to implement (MNI)
+in minor version 1. The first column indicates the operations that
+are not supported yet by the linux server implementation.
+
+The OPTIONAL features identified and their abbreviations are as follows:
+ pNFS Parallel NFS
+ FDELG File Delegations
+ DDELG Directory Delegations
+
+The following abbreviations indicate the linux server implementation status.
+ I Implemented NFSv4.1 operations.
+ NS Not Supported.
+ NS* Unimplemented optional feature.
+
+Operations
+
+ +----------------------+------------+--------------+----------------+
+ | Operation | REQ, REC, | Feature | Definition |
+ | | OPT, or | (REQ, REC, | |
+ | | MNI | or OPT) | |
+ +----------------------+------------+--------------+----------------+
+ | ACCESS | REQ | | Section 18.1 |
+I | BACKCHANNEL_CTL | REQ | | Section 18.33 |
+I | BIND_CONN_TO_SESSION | REQ | | Section 18.34 |
+ | CLOSE | REQ | | Section 18.2 |
+ | COMMIT | REQ | | Section 18.3 |
+ | CREATE | REQ | | Section 18.4 |
+I | CREATE_SESSION | REQ | | Section 18.36 |
+NS*| DELEGPURGE | OPT | FDELG (REQ) | Section 18.5 |
+ | DELEGRETURN | OPT | FDELG, | Section 18.6 |
+ | | | DDELG, pNFS | |
+ | | | (REQ) | |
+I | DESTROY_CLIENTID | REQ | | Section 18.50 |
+I | DESTROY_SESSION | REQ | | Section 18.37 |
+I | EXCHANGE_ID | REQ | | Section 18.35 |
+I | FREE_STATEID | REQ | | Section 18.38 |
+ | GETATTR | REQ | | Section 18.7 |
+I | GETDEVICEINFO | OPT | pNFS (REQ) | Section 18.40 |
+NS*| GETDEVICELIST | OPT | pNFS (OPT) | Section 18.41 |
+ | GETFH | REQ | | Section 18.8 |
+NS*| GET_DIR_DELEGATION | OPT | DDELG (REQ) | Section 18.39 |
+I | LAYOUTCOMMIT | OPT | pNFS (REQ) | Section 18.42 |
+I | LAYOUTGET | OPT | pNFS (REQ) | Section 18.43 |
+I | LAYOUTRETURN | OPT | pNFS (REQ) | Section 18.44 |
+ | LINK | OPT | | Section 18.9 |
+ | LOCK | REQ | | Section 18.10 |
+ | LOCKT | REQ | | Section 18.11 |
+ | LOCKU | REQ | | Section 18.12 |
+ | LOOKUP | REQ | | Section 18.13 |
+ | LOOKUPP | REQ | | Section 18.14 |
+ | NVERIFY | REQ | | Section 18.15 |
+ | OPEN | REQ | | Section 18.16 |
+NS*| OPENATTR | OPT | | Section 18.17 |
+ | OPEN_CONFIRM | MNI | | N/A |
+ | OPEN_DOWNGRADE | REQ | | Section 18.18 |
+ | PUTFH | REQ | | Section 18.19 |
+ | PUTPUBFH | REQ | | Section 18.20 |
+ | PUTROOTFH | REQ | | Section 18.21 |
+ | READ | REQ | | Section 18.22 |
+ | READDIR | REQ | | Section 18.23 |
+ | READLINK | OPT | | Section 18.24 |
+ | RECLAIM_COMPLETE | REQ | | Section 18.51 |
+ | RELEASE_LOCKOWNER | MNI | | N/A |
+ | REMOVE | REQ | | Section 18.25 |
+ | RENAME | REQ | | Section 18.26 |
+ | RENEW | MNI | | N/A |
+ | RESTOREFH | REQ | | Section 18.27 |
+ | SAVEFH | REQ | | Section 18.28 |
+ | SECINFO | REQ | | Section 18.29 |
+I | SECINFO_NO_NAME | REC | pNFS files | Section 18.45, |
+ | | | layout (REQ) | Section 13.12 |
+I | SEQUENCE | REQ | | Section 18.46 |
+ | SETATTR | REQ | | Section 18.30 |
+ | SETCLIENTID | MNI | | N/A |
+ | SETCLIENTID_CONFIRM | MNI | | N/A |
+NS | SET_SSV | REQ | | Section 18.47 |
+I | TEST_STATEID | REQ | | Section 18.48 |
+ | VERIFY | REQ | | Section 18.31 |
+NS*| WANT_DELEGATION | OPT | FDELG (OPT) | Section 18.49 |
+ | WRITE | REQ | | Section 18.32 |
+
+Callback Operations
+
+ +-------------------------+-----------+-------------+---------------+
+ | Operation | REQ, REC, | Feature | Definition |
+ | | OPT, or | (REQ, REC, | |
+ | | MNI | or OPT) | |
+ +-------------------------+-----------+-------------+---------------+
+ | CB_GETATTR | OPT | FDELG (REQ) | Section 20.1 |
+I | CB_LAYOUTRECALL | OPT | pNFS (REQ) | Section 20.3 |
+NS*| CB_NOTIFY | OPT | DDELG (REQ) | Section 20.4 |
+NS*| CB_NOTIFY_DEVICEID | OPT | pNFS (OPT) | Section 20.12 |
+NS*| CB_NOTIFY_LOCK | OPT | | Section 20.11 |
+NS*| CB_PUSH_DELEG | OPT | FDELG (OPT) | Section 20.5 |
+ | CB_RECALL | OPT | FDELG, | Section 20.2 |
+ | | | DDELG, pNFS | |
+ | | | (REQ) | |
+NS*| CB_RECALL_ANY | OPT | FDELG, | Section 20.6 |
+ | | | DDELG, pNFS | |
+ | | | (REQ) | |
+NS | CB_RECALL_SLOT | REQ | | Section 20.8 |
+NS*| CB_RECALLABLE_OBJ_AVAIL | OPT | DDELG, pNFS | Section 20.7 |
+ | | | (REQ) | |
+I | CB_SEQUENCE | OPT | FDELG, | Section 20.9 |
+ | | | DDELG, pNFS | |
+ | | | (REQ) | |
+NS*| CB_WANTS_CANCELLED | OPT | FDELG, | Section 20.10 |
+ | | | DDELG, pNFS | |
+ | | | (REQ) | |
+ +-------------------------+-----------+-------------+---------------+
+
+Implementation notes:
+
+SSV:
+* The spec claims this is mandatory, but we don't actually know of any
+ implementations, so we're ignoring it for now. The server returns
+ NFS4ERR_ENCR_ALG_UNSUPP on EXCHANGE_ID, which should be future-proof.
+
+GSS on the backchannel:
+* Again, theoretically required but not widely implemented (in
+ particular, the current Linux client doesn't request it). We return
+ NFS4ERR_ENCR_ALG_UNSUPP on CREATE_SESSION.
+
+DELEGPURGE:
+* mandatory only for servers that support CLAIM_DELEGATE_PREV and/or
+ CLAIM_DELEG_PREV_FH (which allows clients to keep delegations that
+ persist across client reboots). Thus we need not implement this for
+ now.
+
+EXCHANGE_ID:
+* implementation ids are ignored
+
+CREATE_SESSION:
+* backchannel attributes are ignored
+
+SEQUENCE:
+* no support for dynamic slot table renegotiation (optional)
+
+Nonstandard compound limitations:
+* No support for a sessions fore channel RPC compound that requires both a
+ ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
+ fail to live up to the promise we made in CREATE_SESSION fore channel
+ negotiation.
+
+See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/Documentation/filesystems/nfs/nfsd-admin-interfaces.txt b/Documentation/filesystems/nfs/nfsd-admin-interfaces.txt
new file mode 100644
index 000000000..56a96fb08
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfsd-admin-interfaces.txt
@@ -0,0 +1,41 @@
+Administrative interfaces for nfsd
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Note that normally these interfaces are used only by the utilities in
+nfs-utils.
+
+nfsd is controlled mainly by pseudofiles under the "nfsd" filesystem,
+which is normally mounted at /proc/fs/nfsd/.
+
+The server is always started by the first write of a nonzero value to
+nfsd/threads.
+
+Before doing that, NFSD can be told which sockets to listen on by
+writing to nfsd/portlist; that write may be:
+
+ - an ascii-encoded file descriptor, which should refer to a
+ bound (and listening, for tcp) socket, or
+ - "transportname port", where transportname is currently either
+ "udp", "tcp", or "rdma".
+
+If nfsd is started without doing any of these, then it will create one
+udp and one tcp listener at port 2049 (see nfsd_init_socks).
+
+On startup, nfsd and lockd grace periods start.
+
+nfsd is shut down by a write of 0 to nfsd/threads. All locks and state
+are thrown away at that point.
+
+Between startup and shutdown, the number of threads may be adjusted up
+or down by additional writes to nfsd/threads or by writes to
+nfsd/pool_threads.
+
+For more detail about files under nfsd/ and what they control, see
+fs/nfsd/nfsctl.c; most of them have detailed comments.
+
+Implementation notes
+^^^^^^^^^^^^^^^^^^^^
+
+Note that the rpc server requires the caller to serialize addition and
+removal of listening sockets, and startup and shutdown of the server.
+For nfsd this is done using nfsd_mutex.
diff --git a/Documentation/filesystems/nfs/nfsroot.txt b/Documentation/filesystems/nfs/nfsroot.txt
new file mode 100644
index 000000000..2d66ed688
--- /dev/null
+++ b/Documentation/filesystems/nfs/nfsroot.txt
@@ -0,0 +1,302 @@
+Mounting the root filesystem via NFS (nfsroot)
+===============================================
+
+Written 1996 by Gero Kuhlmann <gero@gkminix.han.de>
+Updated 1997 by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+Updated 2006 by Nico Schottelius <nico-kernel-nfsroot@schottelius.org>
+Updated 2006 by Horms <horms@verge.net.au>
+
+
+
+In order to use a diskless system, such as an X-terminal or printer server
+for example, it is necessary for the root filesystem to be present on a
+non-disk device. This may be an initramfs (see Documentation/filesystems/
+ramfs-rootfs-initramfs.txt), a ramdisk (see Documentation/initrd.txt) or a
+filesystem mounted via NFS. The following text describes on how to use NFS
+for the root filesystem. For the rest of this text 'client' means the
+diskless system, and 'server' means the NFS server.
+
+
+
+
+1.) Enabling nfsroot capabilities
+ -----------------------------
+
+In order to use nfsroot, NFS client support needs to be selected as
+built-in during configuration. Once this has been selected, the nfsroot
+option will become available, which should also be selected.
+
+In the networking options, kernel level autoconfiguration can be selected,
+along with the types of autoconfiguration to support. Selecting all of
+DHCP, BOOTP and RARP is safe.
+
+
+
+
+2.) Kernel command line
+ -------------------
+
+When the kernel has been loaded by a boot loader (see below) it needs to be
+told what root fs device to use. And in the case of nfsroot, where to find
+both the server and the name of the directory on the server to mount as root.
+This can be established using the following kernel command line parameters:
+
+
+root=/dev/nfs
+
+ This is necessary to enable the pseudo-NFS-device. Note that it's not a
+ real device but just a synonym to tell the kernel to use NFS instead of
+ a real device.
+
+
+nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>]
+
+ If the `nfsroot' parameter is NOT given on the command line,
+ the default "/tftpboot/%s" will be used.
+
+ <server-ip> Specifies the IP address of the NFS server.
+ The default address is determined by the `ip' parameter
+ (see below). This parameter allows the use of different
+ servers for IP autoconfiguration and NFS.
+
+ <root-dir> Name of the directory on the server to mount as root.
+ If there is a "%s" token in the string, it will be
+ replaced by the ASCII-representation of the client's
+ IP address.
+
+ <nfs-options> Standard NFS options. All options are separated by commas.
+ The following defaults are used:
+ port = as given by server portmap daemon
+ rsize = 4096
+ wsize = 4096
+ timeo = 7
+ retrans = 3
+ acregmin = 3
+ acregmax = 60
+ acdirmin = 30
+ acdirmax = 60
+ flags = hard, nointr, noposix, cto, ac
+
+
+ip=<client-ip>:<server-ip>:<gw-ip>:<netmask>:<hostname>:<device>:<autoconf>:
+ <dns0-ip>:<dns1-ip>
+
+ This parameter tells the kernel how to configure IP addresses of devices
+ and also how to set up the IP routing table. It was originally called
+ `nfsaddrs', but now the boot-time IP configuration works independently of
+ NFS, so it was renamed to `ip' and the old name remained as an alias for
+ compatibility reasons.
+
+ If this parameter is missing from the kernel command line, all fields are
+ assumed to be empty, and the defaults mentioned below apply. In general
+ this means that the kernel tries to configure everything using
+ autoconfiguration.
+
+ The <autoconf> parameter can appear alone as the value to the `ip'
+ parameter (without all the ':' characters before). If the value is
+ "ip=off" or "ip=none", no autoconfiguration will take place, otherwise
+ autoconfiguration will take place. The most common way to use this
+ is "ip=dhcp".
+
+ <client-ip> IP address of the client.
+
+ Default: Determined using autoconfiguration.
+
+ <server-ip> IP address of the NFS server. If RARP is used to determine
+ the client address and this parameter is NOT empty only
+ replies from the specified server are accepted.
+
+ Only required for NFS root. That is autoconfiguration
+ will not be triggered if it is missing and NFS root is not
+ in operation.
+
+ Default: Determined using autoconfiguration.
+ The address of the autoconfiguration server is used.
+
+ <gw-ip> IP address of a gateway if the server is on a different subnet.
+
+ Default: Determined using autoconfiguration.
+
+ <netmask> Netmask for local network interface. If unspecified
+ the netmask is derived from the client IP address assuming
+ classful addressing.
+
+ Default: Determined using autoconfiguration.
+
+ <hostname> Name of the client. May be supplied by autoconfiguration,
+ but its absence will not trigger autoconfiguration.
+ If specified and DHCP is used, the user provided hostname will
+ be carried in the DHCP request to hopefully update DNS record.
+
+ Default: Client IP address is used in ASCII notation.
+
+ <device> Name of network device to use.
+
+ Default: If the host only has one device, it is used.
+ Otherwise the device is determined using
+ autoconfiguration. This is done by sending
+ autoconfiguration requests out of all devices,
+ and using the device that received the first reply.
+
+ <autoconf> Method to use for autoconfiguration. In the case of options
+ which specify multiple autoconfiguration protocols,
+ requests are sent using all protocols, and the first one
+ to reply is used.
+
+ Only autoconfiguration protocols that have been compiled
+ into the kernel will be used, regardless of the value of
+ this option.
+
+ off or none: don't use autoconfiguration
+ (do static IP assignment instead)
+ on or any: use any protocol available in the kernel
+ (default)
+ dhcp: use DHCP
+ bootp: use BOOTP
+ rarp: use RARP
+ both: use both BOOTP and RARP but not DHCP
+ (old option kept for backwards compatibility)
+
+ Default: any
+
+ <dns0-ip> IP address of first nameserver.
+ Value gets exported by /proc/net/pnp which is often linked
+ on embedded systems by /etc/resolv.conf.
+
+ <dns1-ip> IP address of secound nameserver.
+ Same as above.
+
+
+nfsrootdebug
+
+ This parameter enables debugging messages to appear in the kernel
+ log at boot time so that administrators can verify that the correct
+ NFS mount options, server address, and root path are passed to the
+ NFS client.
+
+
+rdinit=<executable file>
+
+ To specify which file contains the program that starts system
+ initialization, administrators can use this command line parameter.
+ The default value of this parameter is "/init". If the specified
+ file exists and the kernel can execute it, root filesystem related
+ kernel command line parameters, including `nfsroot=', are ignored.
+
+ A description of the process of mounting the root file system can be
+ found in:
+
+ Documentation/early-userspace/README
+
+
+
+
+3.) Boot Loader
+ ----------
+
+To get the kernel into memory different approaches can be used.
+They depend on various facilities being available:
+
+
+3.1) Booting from a floppy using syslinux
+
+ When building kernels, an easy way to create a boot floppy that uses
+ syslinux is to use the zdisk or bzdisk make targets which use zimage
+ and bzimage images respectively. Both targets accept the
+ FDARGS parameter which can be used to set the kernel command line.
+
+ e.g.
+ make bzdisk FDARGS="root=/dev/nfs"
+
+ Note that the user running this command will need to have
+ access to the floppy drive device, /dev/fd0
+
+ For more information on syslinux, including how to create bootdisks
+ for prebuilt kernels, see http://syslinux.zytor.com/
+
+ N.B: Previously it was possible to write a kernel directly to
+ a floppy using dd, configure the boot device using rdev, and
+ boot using the resulting floppy. Linux no longer supports this
+ method of booting.
+
+3.2) Booting from a cdrom using isolinux
+
+ When building kernels, an easy way to create a bootable cdrom that
+ uses isolinux is to use the isoimage target which uses a bzimage
+ image. Like zdisk and bzdisk, this target accepts the FDARGS
+ parameter which can be used to set the kernel command line.
+
+ e.g.
+ make isoimage FDARGS="root=/dev/nfs"
+
+ The resulting iso image will be arch/<ARCH>/boot/image.iso
+ This can be written to a cdrom using a variety of tools including
+ cdrecord.
+
+ e.g.
+ cdrecord dev=ATAPI:1,0,0 arch/x86/boot/image.iso
+
+ For more information on isolinux, including how to create bootdisks
+ for prebuilt kernels, see http://syslinux.zytor.com/
+
+3.2) Using LILO
+ When using LILO all the necessary command line parameters may be
+ specified using the 'append=' directive in the LILO configuration
+ file.
+
+ However, to use the 'root=' directive you also need to create
+ a dummy root device, which may be removed after LILO is run.
+
+ mknod /dev/boot255 c 0 255
+
+ For information on configuring LILO, please refer to its documentation.
+
+3.3) Using GRUB
+ When using GRUB, kernel parameter are simply appended after the kernel
+ specification: kernel <kernel> <parameters>
+
+3.4) Using loadlin
+ loadlin may be used to boot Linux from a DOS command prompt without
+ requiring a local hard disk to mount as root. This has not been
+ thoroughly tested by the authors of this document, but in general
+ it should be possible configure the kernel command line similarly
+ to the configuration of LILO.
+
+ Please refer to the loadlin documentation for further information.
+
+3.5) Using a boot ROM
+ This is probably the most elegant way of booting a diskless client.
+ With a boot ROM the kernel is loaded using the TFTP protocol. The
+ authors of this document are not aware of any no commercial boot
+ ROMs that support booting Linux over the network. However, there
+ are two free implementations of a boot ROM, netboot-nfs and
+ etherboot, both of which are available on sunsite.unc.edu, and both
+ of which contain everything you need to boot a diskless Linux client.
+
+3.6) Using pxelinux
+ Pxelinux may be used to boot linux using the PXE boot loader
+ which is present on many modern network cards.
+
+ When using pxelinux, the kernel image is specified using
+ "kernel <relative-path-below /tftpboot>". The nfsroot parameters
+ are passed to the kernel by adding them to the "append" line.
+ It is common to use serial console in conjunction with pxeliunx,
+ see Documentation/serial-console.txt for more information.
+
+ For more information on isolinux, including how to create bootdisks
+ for prebuilt kernels, see http://syslinux.zytor.com/
+
+
+
+
+4.) Credits
+ -------
+
+ The nfsroot code in the kernel and the RARP support have been written
+ by Gero Kuhlmann <gero@gkminix.han.de>.
+
+ The rest of the IP layer autoconfiguration code has been written
+ by Martin Mares <mj@atrey.karlin.mff.cuni.cz>.
+
+ In order to write the initial version of nfsroot I would like to thank
+ Jens-Uwe Mager <jum@anubis.han.de> for his help.
diff --git a/Documentation/filesystems/nfs/pnfs-block-server.txt b/Documentation/filesystems/nfs/pnfs-block-server.txt
new file mode 100644
index 000000000..2143673cf
--- /dev/null
+++ b/Documentation/filesystems/nfs/pnfs-block-server.txt
@@ -0,0 +1,37 @@
+pNFS block layout server user guide
+
+The Linux NFS server now supports the pNFS block layout extension. In this
+case the NFS server acts as Metadata Server (MDS) for pNFS, which in addition
+to handling all the metadata access to the NFS export also hands out layouts
+to the clients to directly access the underlying block devices that are
+shared with the client.
+
+To use pNFS block layouts with with the Linux NFS server the exported file
+system needs to support the pNFS block layouts (currently just XFS), and the
+file system must sit on shared storage (typically iSCSI) that is accessible
+to the clients in addition to the MDS. As of now the file system needs to
+sit directly on the exported volume, striping or concatenation of
+volumes on the MDS and clients is not supported yet.
+
+On the server, pNFS block volume support is automatically if the file system
+support it. On the client make sure the kernel has the CONFIG_PNFS_BLOCK
+option enabled, the blkmapd daemon from nfs-utils is running, and the
+file system is mounted using the NFSv4.1 protocol version (mount -o vers=4.1).
+
+If the nfsd server needs to fence a non-responding client it calls
+/sbin/nfsd-recall-failed with the first argument set to the IP address of
+the client, and the second argument set to the device node without the /dev
+prefix for the file system to be fenced. Below is an example file that shows
+how to translate the device into a serial number from SCSI EVPD 0x80:
+
+cat > /sbin/nfsd-recall-failed << EOF
+#!/bin/sh
+
+CLIENT="$1"
+DEV="/dev/$2"
+EVPD=`sg_inq --page=0x80 ${DEV} | \
+ grep "Unit serial number:" | \
+ awk -F ': ' '{print $2}'`
+
+echo "fencing client ${CLIENT} serial ${EVPD}" >> /var/log/pnfsd-fence.log
+EOF
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
new file mode 100644
index 000000000..44a9f2493
--- /dev/null
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -0,0 +1,110 @@
+Reference counting in pnfs:
+==========================
+
+The are several inter-related caches. We have layouts which can
+reference multiple devices, each of which can reference multiple data servers.
+Each data server can be referenced by multiple devices. Each device
+can be referenced by multiple layouts. To keep all of this straight,
+we need to reference count.
+
+
+struct pnfs_layout_hdr
+----------------------
+The on-the-wire command LAYOUTGET corresponds to struct
+pnfs_layout_segment, usually referred to by the variable name lseg.
+Each nfs_inode may hold a pointer to a cache of these layout
+segments in nfsi->layout, of type struct pnfs_layout_hdr.
+
+We reference the header for the inode pointing to it, across each
+outstanding RPC call that references it (LAYOUTGET, LAYOUTRETURN,
+LAYOUTCOMMIT), and for each lseg held within.
+
+Each header is also (when non-empty) put on a list associated with
+struct nfs_client (cl_layouts). Being put on this list does not bump
+the reference count, as the layout is kept around by the lseg that
+keeps it in the list.
+
+deviceid_cache
+--------------
+lsegs reference device ids, which are resolved per nfs_client and
+layout driver type. The device ids are held in a RCU cache (struct
+nfs4_deviceid_cache). The cache itself is referenced across each
+mount. The entries (struct nfs4_deviceid) themselves are held across
+the lifetime of each lseg referencing them.
+
+RCU is used because the deviceid is basically a write once, read many
+data structure. The hlist size of 32 buckets needs better
+justification, but seems reasonable given that we can have multiple
+deviceid's per filesystem, and multiple filesystems per nfs_client.
+
+The hash code is copied from the nfsd code base. A discussion of
+hashing and variations of this algorithm can be found at:
+http://groups.google.com/group/comp.lang.c/browse_thread/thread/9522965e2b8d3809
+
+data server cache
+-----------------
+file driver devices refer to data servers, which are kept in a module
+level cache. Its reference is held over the lifetime of the deviceid
+pointing to it.
+
+lseg
+----
+lseg maintains an extra reference corresponding to the NFS_LSEG_VALID
+bit which holds it in the pnfs_layout_hdr's list. When the final lseg
+is removed from the pnfs_layout_hdr's list, the NFS_LAYOUT_DESTROYED
+bit is set, preventing any new lsegs from being added.
+
+layout drivers
+--------------
+
+PNFS utilizes what is called layout drivers. The STD defines 4 basic
+layout types: "files", "objects", "blocks", and "flexfiles". For each
+of these types there is a layout-driver with a common function-vectors
+table which are called by the nfs-client pnfs-core to implement the
+different layout types.
+
+Files-layout-driver code is in: fs/nfs/filelayout/.. directory
+Objects-layout-deriver code is in: fs/nfs/objlayout/.. directory
+Blocks-layout-deriver code is in: fs/nfs/blocklayout/.. directory
+Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
+
+objects-layout setup
+--------------------
+
+As part of the full STD implementation the objlayoutdriver.ko needs, at times,
+to automatically login to yet undiscovered iscsi/osd devices. For this the
+driver makes up-calles to a user-mode script called *osd_login*
+
+The path_name of the script to use is by default:
+ /sbin/osd_login.
+This name can be overridden by the Kernel module parameter:
+ objlayoutdriver.osd_login_prog
+
+If Kernel does not find the osd_login_prog path it will zero it out
+and will not attempt farther logins. An admin can then write new value
+to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
+
+The /sbin/osd_login is part of the nfs-utils package, and should usually
+be installed on distributions that support this Kernel version.
+
+The API to the login script is as follows:
+ Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
+ Options:
+ -u target uri e.g. iscsi://<ip>:<port>
+ (allways exists)
+ (More protocols can be defined in the future.
+ The client does not interpret this string it is
+ passed unchanged as received from the Server)
+ -o osdname of the requested target OSD
+ (Might be empty)
+ (A string which denotes the OSD name, there is a
+ limit of 64 chars on this string)
+ -s systemid of the requested target OSD
+ (Might be empty)
+ (This string, if not empty is always an hex
+ representation of the 20 bytes osd_system_id)
+
+blocks-layout setup
+-------------------
+
+TODO: Document the setup needs of the blocks layout driver
diff --git a/Documentation/filesystems/nfs/rpc-cache.txt b/Documentation/filesystems/nfs/rpc-cache.txt
new file mode 100644
index 000000000..ebcaaee21
--- /dev/null
+++ b/Documentation/filesystems/nfs/rpc-cache.txt
@@ -0,0 +1,202 @@
+ This document gives a brief introduction to the caching
+mechanisms in the sunrpc layer that is used, in particular,
+for NFS authentication.
+
+CACHES
+======
+The caching replaces the old exports table and allows for
+a wide variety of values to be caches.
+
+There are a number of caches that are similar in structure though
+quite possibly very different in content and use. There is a corpus
+of common code for managing these caches.
+
+Examples of caches that are likely to be needed are:
+ - mapping from IP address to client name
+ - mapping from client name and filesystem to export options
+ - mapping from UID to list of GIDs, to work around NFS's limitation
+ of 16 gids.
+ - mappings between local UID/GID and remote UID/GID for sites that
+ do not have uniform uid assignment
+ - mapping from network identify to public key for crypto authentication.
+
+The common code handles such things as:
+ - general cache lookup with correct locking
+ - supporting 'NEGATIVE' as well as positive entries
+ - allowing an EXPIRED time on cache items, and removing
+ items after they expire, and are no longer in-use.
+ - making requests to user-space to fill in cache entries
+ - allowing user-space to directly set entries in the cache
+ - delaying RPC requests that depend on as-yet incomplete
+ cache entries, and replaying those requests when the cache entry
+ is complete.
+ - clean out old entries as they expire.
+
+Creating a Cache
+----------------
+
+1/ A cache needs a datum to store. This is in the form of a
+ structure definition that must contain a
+ struct cache_head
+ as an element, usually the first.
+ It will also contain a key and some content.
+ Each cache element is reference counted and contains
+ expiry and update times for use in cache management.
+2/ A cache needs a "cache_detail" structure that
+ describes the cache. This stores the hash table, some
+ parameters for cache management, and some operations detailing how
+ to work with particular cache items.
+ The operations requires are:
+ struct cache_head *alloc(void)
+ This simply allocates appropriate memory and returns
+ a pointer to the cache_detail embedded within the
+ structure
+ void cache_put(struct kref *)
+ This is called when the last reference to an item is
+ dropped. The pointer passed is to the 'ref' field
+ in the cache_head. cache_put should release any
+ references create by 'cache_init' and, if CACHE_VALID
+ is set, any references created by cache_update.
+ It should then release the memory allocated by
+ 'alloc'.
+ int match(struct cache_head *orig, struct cache_head *new)
+ test if the keys in the two structures match. Return
+ 1 if they do, 0 if they don't.
+ void init(struct cache_head *orig, struct cache_head *new)
+ Set the 'key' fields in 'new' from 'orig'. This may
+ include taking references to shared objects.
+ void update(struct cache_head *orig, struct cache_head *new)
+ Set the 'content' fileds in 'new' from 'orig'.
+ int cache_show(struct seq_file *m, struct cache_detail *cd,
+ struct cache_head *h)
+ Optional. Used to provide a /proc file that lists the
+ contents of a cache. This should show one item,
+ usually on just one line.
+ int cache_request(struct cache_detail *cd, struct cache_head *h,
+ char **bpp, int *blen)
+ Format a request to be send to user-space for an item
+ to be instantiated. *bpp is a buffer of size *blen.
+ bpp should be moved forward over the encoded message,
+ and *blen should be reduced to show how much free
+ space remains. Return 0 on success or <0 if not
+ enough room or other problem.
+ int cache_parse(struct cache_detail *cd, char *buf, int len)
+ A message from user space has arrived to fill out a
+ cache entry. It is in 'buf' of length 'len'.
+ cache_parse should parse this, find the item in the
+ cache with sunrpc_cache_lookup, and update the item
+ with sunrpc_cache_update.
+
+
+3/ A cache needs to be registered using cache_register(). This
+ includes it on a list of caches that will be regularly
+ cleaned to discard old data.
+
+Using a cache
+-------------
+
+To find a value in a cache, call sunrpc_cache_lookup passing a pointer
+to the cache_head in a sample item with the 'key' fields filled in.
+This will be passed to ->match to identify the target entry. If no
+entry is found, a new entry will be create, added to the cache, and
+marked as not containing valid data.
+
+The item returned is typically passed to cache_check which will check
+if the data is valid, and may initiate an up-call to get fresh data.
+cache_check will return -ENOENT in the entry is negative or if an up
+call is needed but not possible, -EAGAIN if an upcall is pending,
+or 0 if the data is valid;
+
+cache_check can be passed a "struct cache_req *". This structure is
+typically embedded in the actual request and can be used to create a
+deferred copy of the request (struct cache_deferred_req). This is
+done when the found cache item is not uptodate, but the is reason to
+believe that userspace might provide information soon. When the cache
+item does become valid, the deferred copy of the request will be
+revisited (->revisit). It is expected that this method will
+reschedule the request for processing.
+
+The value returned by sunrpc_cache_lookup can also be passed to
+sunrpc_cache_update to set the content for the item. A second item is
+passed which should hold the content. If the item found by _lookup
+has valid data, then it is discarded and a new item is created. This
+saves any user of an item from worrying about content changing while
+it is being inspected. If the item found by _lookup does not contain
+valid data, then the content is copied across and CACHE_VALID is set.
+
+Populating a cache
+------------------
+
+Each cache has a name, and when the cache is registered, a directory
+with that name is created in /proc/net/rpc
+
+This directory contains a file called 'channel' which is a channel
+for communicating between kernel and user for populating the cache.
+This directory may later contain other files of interacting
+with the cache.
+
+The 'channel' works a bit like a datagram socket. Each 'write' is
+passed as a whole to the cache for parsing and interpretation.
+Each cache can treat the write requests differently, but it is
+expected that a message written will contain:
+ - a key
+ - an expiry time
+ - a content.
+with the intention that an item in the cache with the give key
+should be create or updated to have the given content, and the
+expiry time should be set on that item.
+
+Reading from a channel is a bit more interesting. When a cache
+lookup fails, or when it succeeds but finds an entry that may soon
+expire, a request is lodged for that cache item to be updated by
+user-space. These requests appear in the channel file.
+
+Successive reads will return successive requests.
+If there are no more requests to return, read will return EOF, but a
+select or poll for read will block waiting for another request to be
+added.
+
+Thus a user-space helper is likely to:
+ open the channel.
+ select for readable
+ read a request
+ write a response
+ loop.
+
+If it dies and needs to be restarted, any requests that have not been
+answered will still appear in the file and will be read by the new
+instance of the helper.
+
+Each cache should define a "cache_parse" method which takes a message
+written from user-space and processes it. It should return an error
+(which propagates back to the write syscall) or 0.
+
+Each cache should also define a "cache_request" method which
+takes a cache item and encodes a request into the buffer
+provided.
+
+Note: If a cache has no active readers on the channel, and has had not
+active readers for more than 60 seconds, further requests will not be
+added to the channel but instead all lookups that do not find a valid
+entry will fail. This is partly for backward compatibility: The
+previous nfs exports table was deemed to be authoritative and a
+failed lookup meant a definite 'no'.
+
+request/response format
+-----------------------
+
+While each cache is free to use its own format for requests
+and responses over channel, the following is recommended as
+appropriate and support routines are available to help:
+Each request or response record should be printable ASCII
+with precisely one newline character which should be at the end.
+Fields within the record should be separated by spaces, normally one.
+If spaces, newlines, or nul characters are needed in a field they
+much be quoted. two mechanisms are available:
+1/ If a field begins '\x' then it must contain an even number of
+ hex digits, and pairs of these digits provide the bytes in the
+ field.
+2/ otherwise a \ in the field must be followed by 3 octal digits
+ which give the code for a byte. Other characters are treated
+ as them selves. At the very least, space, newline, nul, and
+ '\' must be quoted in this way.
diff --git a/Documentation/filesystems/nfs/rpc-server-gss.txt b/Documentation/filesystems/nfs/rpc-server-gss.txt
new file mode 100644
index 000000000..716f4be8e
--- /dev/null
+++ b/Documentation/filesystems/nfs/rpc-server-gss.txt
@@ -0,0 +1,91 @@
+
+rpcsec_gss support for kernel RPC servers
+=========================================
+
+This document gives references to the standards and protocols used to
+implement RPCGSS authentication in kernel RPC servers such as the NFS
+server and the NFS client's NFSv4.0 callback server. (But note that
+NFSv4.1 and higher don't require the client to act as a server for the
+purposes of authentication.)
+
+RPCGSS is specified in a few IETF documents:
+ - RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt
+ - RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt
+and there is a 3rd version being proposed:
+ - http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt
+ (At draft n. 02 at the time of writing)
+
+Background
+----------
+
+The RPCGSS Authentication method describes a way to perform GSSAPI
+Authentication for NFS. Although GSSAPI is itself completely mechanism
+agnostic, in many cases only the KRB5 mechanism is supported by NFS
+implementations.
+
+The Linux kernel, at the moment, supports only the KRB5 mechanism, and
+depends on GSSAPI extensions that are KRB5 specific.
+
+GSSAPI is a complex library, and implementing it completely in kernel is
+unwarranted. However GSSAPI operations are fundementally separable in 2
+parts:
+- initial context establishment
+- integrity/privacy protection (signing and encrypting of individual
+ packets)
+
+The former is more complex and policy-independent, but less
+performance-sensitive. The latter is simpler and needs to be very fast.
+
+Therefore, we perform per-packet integrity and privacy protection in the
+kernel, but leave the initial context establishment to userspace. We
+need upcalls to request userspace to perform context establishment.
+
+NFS Server Legacy Upcall Mechanism
+----------------------------------
+
+The classic upcall mechanism uses a custom text based upcall mechanism
+to talk to a custom daemon called rpc.svcgssd that is provide by the
+nfs-utils package.
+
+This upcall mechanism has 2 limitations:
+
+A) It can handle tokens that are no bigger than 2KiB
+
+In some Kerberos deployment GSSAPI tokens can be quite big, up and
+beyond 64KiB in size due to various authorization extensions attacked to
+the Kerberos tickets, that needs to be sent through the GSS layer in
+order to perform context establishment.
+
+B) It does not properly handle creds where the user is member of more
+than a few housand groups (the current hard limit in the kernel is 65K
+groups) due to limitation on the size of the buffer that can be send
+back to the kernel (4KiB).
+
+NFS Server New RPC Upcall Mechanism
+-----------------------------------
+
+The newer upcall mechanism uses RPC over a unix socket to a daemon
+called gss-proxy, implemented by a userspace program called Gssproxy.
+
+The gss_proxy RPC protocol is currently documented here:
+
+ https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation
+
+This upcall mechanism uses the kernel rpc client and connects to the gssproxy
+userspace program over a regular unix socket. The gssproxy protocol does not
+suffer from the size limitations of the legacy protocol.
+
+Negotiating Upcall Mechanisms
+-----------------------------
+
+To provide backward compatibility, the kernel defaults to using the
+legacy mechanism. To switch to the new mechanism, gss-proxy must bind
+to /var/run/gssproxy.sock and then write "1" to
+/proc/net/rpc/use-gss-proxy. If gss-proxy dies, it must repeat both
+steps.
+
+Once the upcall mechanism is chosen, it cannot be changed. To prevent
+locking into the legacy mechanisms, the above steps must be performed
+before starting nfsd. Whoever starts nfsd can guarantee this by reading
+from /proc/net/rpc/use-gss-proxy and checking that it contains a
+"1"--the read will block until gss-proxy has done its write to the file.
diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt
new file mode 100644
index 000000000..41c3d332a
--- /dev/null
+++ b/Documentation/filesystems/nilfs2.txt
@@ -0,0 +1,270 @@
+NILFS2
+------
+
+NILFS2 is a log-structured file system (LFS) supporting continuous
+snapshotting. In addition to versioning capability of the entire file
+system, users can even restore files mistakenly overwritten or
+destroyed just a few seconds ago. Since NILFS2 can keep consistency
+like conventional LFS, it achieves quick recovery after system
+crashes.
+
+NILFS2 creates a number of checkpoints every few seconds or per
+synchronous write basis (unless there is no change). Users can select
+significant versions among continuously created checkpoints, and can
+change them into snapshots which will be preserved until they are
+changed back to checkpoints.
+
+There is no limit on the number of snapshots until the volume gets
+full. Each snapshot is mountable as a read-only file system
+concurrently with its writable mount, and this feature is convenient
+for online backup.
+
+The userland tools are included in nilfs-utils package, which is
+available from the following download page. At least "mkfs.nilfs2",
+"mount.nilfs2", "umount.nilfs2", and "nilfs_cleanerd" (so called
+cleaner or garbage collector) are required. Details on the tools are
+described in the man pages included in the package.
+
+Project web page: http://nilfs.sourceforge.net/
+Download page: http://nilfs.sourceforge.net/en/download.html
+List info: http://vger.kernel.org/vger-lists.html#linux-nilfs
+
+Caveats
+=======
+
+Features which NILFS2 does not support yet:
+
+ - atime
+ - extended attributes
+ - POSIX ACLs
+ - quotas
+ - fsck
+ - defragmentation
+
+Mount options
+=============
+
+NILFS2 supports the following mount options:
+(*) == default
+
+barrier(*) This enables/disables the use of write barriers. This
+nobarrier requires an IO stack which can support barriers, and
+ if nilfs gets an error on a barrier write, it will
+ disable again with a warning.
+errors=continue Keep going on a filesystem error.
+errors=remount-ro(*) Remount the filesystem read-only on an error.
+errors=panic Panic and halt the machine if an error occurs.
+cp=n Specify the checkpoint-number of the snapshot to be
+ mounted. Checkpoints and snapshots are listed by lscp
+ user command. Only the checkpoints marked as snapshot
+ are mountable with this option. Snapshot is read-only,
+ so a read-only mount option must be specified together.
+order=relaxed(*) Apply relaxed order semantics that allows modified data
+ blocks to be written to disk without making a
+ checkpoint if no metadata update is going. This mode
+ is equivalent to the ordered data mode of the ext3
+ filesystem except for the updates on data blocks still
+ conserve atomicity. This will improve synchronous
+ write performance for overwriting.
+order=strict Apply strict in-order semantics that preserves sequence
+ of all file operations including overwriting of data
+ blocks. That means, it is guaranteed that no
+ overtaking of events occurs in the recovered file
+ system after a crash.
+norecovery Disable recovery of the filesystem on mount.
+ This disables every write access on the device for
+ read-only mounts or snapshots. This option will fail
+ for r/w mounts on an unclean volume.
+discard This enables/disables the use of discard/TRIM commands.
+nodiscard(*) The discard/TRIM commands are sent to the underlying
+ block device when blocks are freed. This is useful
+ for SSD devices and sparse/thinly-provisioned LUNs.
+
+Ioctls
+======
+
+There is some NILFS2 specific functionality which can be accessed by applications
+through the system call interfaces. The list of all NILFS2 specific ioctls are
+shown in the table below.
+
+Table of NILFS2 specific ioctls
+..............................................................................
+ Ioctl Description
+ NILFS_IOCTL_CHANGE_CPMODE Change mode of given checkpoint between
+ checkpoint and snapshot state. This ioctl is
+ used in chcp and mkcp utilities.
+
+ NILFS_IOCTL_DELETE_CHECKPOINT Remove checkpoint from NILFS2 file system.
+ This ioctl is used in rmcp utility.
+
+ NILFS_IOCTL_GET_CPINFO Return info about requested checkpoints. This
+ ioctl is used in lscp utility and by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_CPSTAT Return checkpoints statistics. This ioctl is
+ used by lscp, rmcp utilities and by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_SUINFO Return segment usage info about requested
+ segments. This ioctl is used in lssu,
+ nilfs_resize utilities and by nilfs_cleanerd
+ daemon.
+
+ NILFS_IOCTL_SET_SUINFO Modify segment usage info of requested
+ segments. This ioctl is used by
+ nilfs_cleanerd daemon to skip unnecessary
+ cleaning operation of segments and reduce
+ performance penalty or wear of flash device
+ due to redundant move of in-use blocks.
+
+ NILFS_IOCTL_GET_SUSTAT Return segment usage statistics. This ioctl
+ is used in lssu, nilfs_resize utilities and
+ by nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_VINFO Return information on virtual block addresses.
+ This ioctl is used by nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_GET_BDESCS Return information about descriptors of disk
+ block numbers. This ioctl is used by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_CLEAN_SEGMENTS Do garbage collection operation in the
+ environment of requested parameters from
+ userspace. This ioctl is used by
+ nilfs_cleanerd daemon.
+
+ NILFS_IOCTL_SYNC Make a checkpoint. This ioctl is used in
+ mkcp utility.
+
+ NILFS_IOCTL_RESIZE Resize NILFS2 volume. This ioctl is used
+ by nilfs_resize utility.
+
+ NILFS_IOCTL_SET_ALLOC_RANGE Define lower limit of segments in bytes and
+ upper limit of segments in bytes. This ioctl
+ is used by nilfs_resize utility.
+
+NILFS2 usage
+============
+
+To use nilfs2 as a local file system, simply:
+
+ # mkfs -t nilfs2 /dev/block_device
+ # mount -t nilfs2 /dev/block_device /dir
+
+This will also invoke the cleaner through the mount helper program
+(mount.nilfs2).
+
+Checkpoints and snapshots are managed by the following commands.
+Their manpages are included in the nilfs-utils package above.
+
+ lscp list checkpoints or snapshots.
+ mkcp make a checkpoint or a snapshot.
+ chcp change an existing checkpoint to a snapshot or vice versa.
+ rmcp invalidate specified checkpoint(s).
+
+To mount a snapshot,
+
+ # mount -t nilfs2 -r -o cp=<cno> /dev/block_device /snap_dir
+
+where <cno> is the checkpoint number of the snapshot.
+
+To unmount the NILFS2 mount point or snapshot, simply:
+
+ # umount /dir
+
+Then, the cleaner daemon is automatically shut down by the umount
+helper program (umount.nilfs2).
+
+Disk format
+===========
+
+A nilfs2 volume is equally divided into a number of segments except
+for the super block (SB) and segment #0. A segment is the container
+of logs. Each log is composed of summary information blocks, payload
+blocks, and an optional super root block (SR):
+
+ ______________________________________________________
+ | |SB| | Segment | Segment | Segment | ... | Segment | |
+ |_|__|_|____0____|____1____|____2____|_____|____N____|_|
+ 0 +1K +4K +8M +16M +24M +(8MB x N)
+ . . (Typical offsets for 4KB-block)
+ . .
+ .______________________.
+ | log | log |... | log |
+ |__1__|__2__|____|__m__|
+ . .
+ . .
+ . .
+ .______________________________.
+ | Summary | Payload blocks |SR|
+ |_blocks__|_________________|__|
+
+The payload blocks are organized per file, and each file consists of
+data blocks and B-tree node blocks:
+
+ |<--- File-A --->|<--- File-B --->|
+ _______________________________________________________________
+ | Data blocks | B-tree blocks | Data blocks | B-tree blocks | ...
+ _|_____________|_______________|_____________|_______________|_
+
+
+Since only the modified blocks are written in the log, it may have
+files without data blocks or B-tree node blocks.
+
+The organization of the blocks is recorded in the summary information
+blocks, which contains a header structure (nilfs_segment_summary), per
+file structures (nilfs_finfo), and per block structures (nilfs_binfo):
+
+ _________________________________________________________________________
+ | Summary | finfo | binfo | ... | binfo | finfo | binfo | ... | binfo |...
+ |_blocks__|___A___|_(A,1)_|_____|(A,Na)_|___B___|_(B,1)_|_____|(B,Nb)_|___
+
+
+The logs include regular files, directory files, symbolic link files
+and several meta data files. The mata data files are the files used
+to maintain file system meta data. The current version of NILFS2 uses
+the following meta data files:
+
+ 1) Inode file (ifile) -- Stores on-disk inodes
+ 2) Checkpoint file (cpfile) -- Stores checkpoints
+ 3) Segment usage file (sufile) -- Stores allocation state of segments
+ 4) Data address translation file -- Maps virtual block numbers to usual
+ (DAT) block numbers. This file serves to
+ make on-disk blocks relocatable.
+
+The following figure shows a typical organization of the logs:
+
+ _________________________________________________________________________
+ | Summary | regular file | file | ... | ifile | cpfile | sufile | DAT |SR|
+ |_blocks__|_or_directory_|_______|_____|_______|________|________|_____|__|
+
+
+To stride over segment boundaries, this sequence of files may be split
+into multiple logs. The sequence of logs that should be treated as
+logically one log, is delimited with flags marked in the segment
+summary. The recovery code of nilfs2 looks this boundary information
+to ensure atomicity of updates.
+
+The super root block is inserted for every checkpoints. It includes
+three special inodes, inodes for the DAT, cpfile, and sufile. Inodes
+of regular files, directories, symlinks and other special files, are
+included in the ifile. The inode of ifile itself is included in the
+corresponding checkpoint entry in the cpfile. Thus, the hierarchy
+among NILFS2 files can be depicted as follows:
+
+ Super block (SB)
+ |
+ v
+ Super root block (the latest cno=xx)
+ |-- DAT
+ |-- sufile
+ `-- cpfile
+ |-- ifile (cno=c1)
+ |-- ifile (cno=c2) ---- file (ino=i1)
+ : : |-- file (ino=i2)
+ `-- ifile (cno=xx) |-- file (ino=i3)
+ : :
+ `-- file (ino=yy)
+ ( regular file, directory, or symlink )
+
+For detail on the format of each file, please see include/linux/nilfs2_fs.h.
diff --git a/Documentation/filesystems/ntfs.txt b/Documentation/filesystems/ntfs.txt
new file mode 100644
index 000000000..553f10d03
--- /dev/null
+++ b/Documentation/filesystems/ntfs.txt
@@ -0,0 +1,451 @@
+The Linux NTFS filesystem driver
+================================
+
+
+Table of contents
+=================
+
+- Overview
+- Web site
+- Features
+- Supported mount options
+- Known bugs and (mis-)features
+- Using NTFS volume and stripe sets
+ - The Device-Mapper driver
+ - The Software RAID / MD driver
+ - Limitations when using the MD driver
+
+
+Overview
+========
+
+Linux-NTFS comes with a number of user-space programs known as ntfsprogs.
+These include mkntfs, a full-featured ntfs filesystem format utility,
+ntfsundelete used for recovering files that were unintentionally deleted
+from an NTFS volume and ntfsresize which is used to resize an NTFS partition.
+See the web site for more information.
+
+To mount an NTFS 1.2/3.x (Windows NT4/2000/XP/2003) volume, use the file
+system type 'ntfs'. The driver currently supports read-only mode (with no
+fault-tolerance, encryption or journalling) and very limited, but safe, write
+support.
+
+For fault tolerance and raid support (i.e. volume and stripe sets), you can
+use the kernel's Software RAID / MD driver. See section "Using Software RAID
+with NTFS" for details.
+
+
+Web site
+========
+
+There is plenty of additional information on the linux-ntfs web site
+at http://www.linux-ntfs.org/
+
+The web site has a lot of additional information, such as a comprehensive
+FAQ, documentation on the NTFS on-disk format, information on the Linux-NTFS
+userspace utilities, etc.
+
+
+Features
+========
+
+- This is a complete rewrite of the NTFS driver that used to be in the 2.4 and
+ earlier kernels. This new driver implements NTFS read support and is
+ functionally equivalent to the old ntfs driver and it also implements limited
+ write support. The biggest limitation at present is that files/directories
+ cannot be created or deleted. See below for the list of write features that
+ are so far supported. Another limitation is that writing to compressed files
+ is not implemented at all. Also, neither read nor write access to encrypted
+ files is so far implemented.
+- The new driver has full support for sparse files on NTFS 3.x volumes which
+ the old driver isn't happy with.
+- The new driver supports execution of binaries due to mmap() now being
+ supported.
+- The new driver supports loopback mounting of files on NTFS which is used by
+ some Linux distributions to enable the user to run Linux from an NTFS
+ partition by creating a large file while in Windows and then loopback
+ mounting the file while in Linux and creating a Linux filesystem on it that
+ is used to install Linux on it.
+- A comparison of the two drivers using:
+ time find . -type f -exec md5sum "{}" \;
+ run three times in sequence with each driver (after a reboot) on a 1.4GiB
+ NTFS partition, showed the new driver to be 20% faster in total time elapsed
+ (from 9:43 minutes on average down to 7:53). The time spent in user space
+ was unchanged but the time spent in the kernel was decreased by a factor of
+ 2.5 (from 85 CPU seconds down to 33).
+- The driver does not support short file names in general. For backwards
+ compatibility, we implement access to files using their short file names if
+ they exist. The driver will not create short file names however, and a
+ rename will discard any existing short file name.
+- The new driver supports exporting of mounted NTFS volumes via NFS.
+- The new driver supports async io (aio).
+- The new driver supports fsync(2), fdatasync(2), and msync(2).
+- The new driver supports readv(2) and writev(2).
+- The new driver supports access time updates (including mtime and ctime).
+- The new driver supports truncate(2) and open(2) with O_TRUNC. But at present
+ only very limited support for highly fragmented files, i.e. ones which have
+ their data attribute split across multiple extents, is included. Another
+ limitation is that at present truncate(2) will never create sparse files,
+ since to mark a file sparse we need to modify the directory entry for the
+ file and we do not implement directory modifications yet.
+- The new driver supports write(2) which can both overwrite existing data and
+ extend the file size so that you can write beyond the existing data. Also,
+ writing into sparse regions is supported and the holes are filled in with
+ clusters. But at present only limited support for highly fragmented files,
+ i.e. ones which have their data attribute split across multiple extents, is
+ included. Another limitation is that write(2) will never create sparse
+ files, since to mark a file sparse we need to modify the directory entry for
+ the file and we do not implement directory modifications yet.
+
+Supported mount options
+=======================
+
+In addition to the generic mount options described by the manual page for the
+mount command (man 8 mount, also see man 5 fstab), the NTFS driver supports the
+following mount options:
+
+iocharset=name Deprecated option. Still supported but please use
+ nls=name in the future. See description for nls=name.
+
+nls=name Character set to use when returning file names.
+ Unlike VFAT, NTFS suppresses names that contain
+ unconvertible characters. Note that most character
+ sets contain insufficient characters to represent all
+ possible Unicode characters that can exist on NTFS.
+ To be sure you are not missing any files, you are
+ advised to use nls=utf8 which is capable of
+ representing all Unicode characters.
+
+utf8=<bool> Option no longer supported. Currently mapped to
+ nls=utf8 but please use nls=utf8 in the future and
+ make sure utf8 is compiled either as module or into
+ the kernel. See description for nls=name.
+
+uid=
+gid=
+umask= Provide default owner, group, and access mode mask.
+ These options work as documented in mount(8). By
+ default, the files/directories are owned by root and
+ he/she has read and write permissions, as well as
+ browse permission for directories. No one else has any
+ access permissions. I.e. the mode on all files is by
+ default rw------- and for directories rwx------, a
+ consequence of the default fmask=0177 and dmask=0077.
+ Using a umask of zero will grant all permissions to
+ everyone, i.e. all files and directories will have mode
+ rwxrwxrwx.
+
+fmask=
+dmask= Instead of specifying umask which applies both to
+ files and directories, fmask applies only to files and
+ dmask only to directories.
+
+sloppy=<BOOL> If sloppy is specified, ignore unknown mount options.
+ Otherwise the default behaviour is to abort mount if
+ any unknown options are found.
+
+show_sys_files=<BOOL> If show_sys_files is specified, show the system files
+ in directory listings. Otherwise the default behaviour
+ is to hide the system files.
+ Note that even when show_sys_files is specified, "$MFT"
+ will not be visible due to bugs/mis-features in glibc.
+ Further, note that irrespective of show_sys_files, all
+ files are accessible by name, i.e. you can always do
+ "ls -l \$UpCase" for example to specifically show the
+ system file containing the Unicode upcase table.
+
+case_sensitive=<BOOL> If case_sensitive is specified, treat all file names as
+ case sensitive and create file names in the POSIX
+ namespace. Otherwise the default behaviour is to treat
+ file names as case insensitive and to create file names
+ in the WIN32/LONG name space. Note, the Linux NTFS
+ driver will never create short file names and will
+ remove them on rename/delete of the corresponding long
+ file name.
+ Note that files remain accessible via their short file
+ name, if it exists. If case_sensitive, you will need
+ to provide the correct case of the short file name.
+
+disable_sparse=<BOOL> If disable_sparse is specified, creation of sparse
+ regions, i.e. holes, inside files is disabled for the
+ volume (for the duration of this mount only). By
+ default, creation of sparse regions is enabled, which
+ is consistent with the behaviour of traditional Unix
+ filesystems.
+
+errors=opt What to do when critical filesystem errors are found.
+ Following values can be used for "opt":
+ continue: DEFAULT, try to clean-up as much as
+ possible, e.g. marking a corrupt inode as
+ bad so it is no longer accessed, and then
+ continue.
+ recover: At present only supported is recovery of
+ the boot sector from the backup copy.
+ If read-only mount, the recovery is done
+ in memory only and not written to disk.
+ Note that the options are additive, i.e. specifying:
+ errors=continue,errors=recover
+ means the driver will attempt to recover and if that
+ fails it will clean-up as much as possible and
+ continue.
+
+mft_zone_multiplier= Set the MFT zone multiplier for the volume (this
+ setting is not persistent across mounts and can be
+ changed from mount to mount but cannot be changed on
+ remount). Values of 1 to 4 are allowed, 1 being the
+ default. The MFT zone multiplier determines how much
+ space is reserved for the MFT on the volume. If all
+ other space is used up, then the MFT zone will be
+ shrunk dynamically, so this has no impact on the
+ amount of free space. However, it can have an impact
+ on performance by affecting fragmentation of the MFT.
+ In general use the default. If you have a lot of small
+ files then use a higher value. The values have the
+ following meaning:
+ Value MFT zone size (% of volume size)
+ 1 12.5%
+ 2 25%
+ 3 37.5%
+ 4 50%
+ Note this option is irrelevant for read-only mounts.
+
+
+Known bugs and (mis-)features
+=============================
+
+- The link count on each directory inode entry is set to 1, due to Linux not
+ supporting directory hard links. This may well confuse some user space
+ applications, since the directory names will have the same inode numbers.
+ This also speeds up ntfs_read_inode() immensely. And we haven't found any
+ problems with this approach so far. If you find a problem with this, please
+ let us know.
+
+
+Please send bug reports/comments/feedback/abuse to the Linux-NTFS development
+list at sourceforge: linux-ntfs-dev@lists.sourceforge.net
+
+
+Using NTFS volume and stripe sets
+=================================
+
+For support of volume and stripe sets, you can either use the kernel's
+Device-Mapper driver or the kernel's Software RAID / MD driver. The former is
+the recommended one to use for linear raid. But the latter is required for
+raid level 5. For striping and mirroring, either driver should work fine.
+
+
+The Device-Mapper driver
+------------------------
+
+You will need to create a table of the components of the volume/stripe set and
+how they fit together and load this into the kernel using the dmsetup utility
+(see man 8 dmsetup).
+
+Linear volume sets, i.e. linear raid, has been tested and works fine. Even
+though untested, there is no reason why stripe sets, i.e. raid level 0, and
+mirrors, i.e. raid level 1 should not work, too. Stripes with parity, i.e.
+raid level 5, unfortunately cannot work yet because the current version of the
+Device-Mapper driver does not support raid level 5. You may be able to use the
+Software RAID / MD driver for raid level 5, see the next section for details.
+
+To create the table describing your volume you will need to know each of its
+components and their sizes in sectors, i.e. multiples of 512-byte blocks.
+
+For NT4 fault tolerant volumes you can obtain the sizes using fdisk. So for
+example if one of your partitions is /dev/hda2 you would do:
+
+$ fdisk -ul /dev/hda
+
+Disk /dev/hda: 81.9 GB, 81964302336 bytes
+255 heads, 63 sectors/track, 9964 cylinders, total 160086528 sectors
+Units = sectors of 1 * 512 = 512 bytes
+
+ Device Boot Start End Blocks Id System
+ /dev/hda1 * 63 4209029 2104483+ 83 Linux
+ /dev/hda2 4209030 37768814 16779892+ 86 NTFS
+ /dev/hda3 37768815 46170809 4200997+ 83 Linux
+
+And you would know that /dev/hda2 has a size of 37768814 - 4209030 + 1 =
+33559785 sectors.
+
+For Win2k and later dynamic disks, you can for example use the ldminfo utility
+which is part of the Linux LDM tools (the latest version at the time of
+writing is linux-ldm-0.0.8.tar.bz2). You can download it from:
+ http://www.linux-ntfs.org/
+Simply extract the downloaded archive (tar xvjf linux-ldm-0.0.8.tar.bz2), go
+into it (cd linux-ldm-0.0.8) and change to the test directory (cd test). You
+will find the precompiled (i386) ldminfo utility there. NOTE: You will not be
+able to compile this yourself easily so use the binary version!
+
+Then you would use ldminfo in dump mode to obtain the necessary information:
+
+$ ./ldminfo --dump /dev/hda
+
+This would dump the LDM database found on /dev/hda which describes all of your
+dynamic disks and all the volumes on them. At the bottom you will see the
+VOLUME DEFINITIONS section which is all you really need. You may need to look
+further above to determine which of the disks in the volume definitions is
+which device in Linux. Hint: Run ldminfo on each of your dynamic disks and
+look at the Disk Id close to the top of the output for each (the PRIVATE HEADER
+section). You can then find these Disk Ids in the VBLK DATABASE section in the
+<Disk> components where you will get the LDM Name for the disk that is found in
+the VOLUME DEFINITIONS section.
+
+Note you will also need to enable the LDM driver in the Linux kernel. If your
+distribution did not enable it, you will need to recompile the kernel with it
+enabled. This will create the LDM partitions on each device at boot time. You
+would then use those devices (for /dev/hda they would be /dev/hda1, 2, 3, etc)
+in the Device-Mapper table.
+
+You can also bypass using the LDM driver by using the main device (e.g.
+/dev/hda) and then using the offsets of the LDM partitions into this device as
+the "Start sector of device" when creating the table. Once again ldminfo would
+give you the correct information to do this.
+
+Assuming you know all your devices and their sizes things are easy.
+
+For a linear raid the table would look like this (note all values are in
+512-byte sectors):
+
+--- cut here ---
+# Offset into Size of this Raid type Device Start sector
+# volume device of device
+0 1028161 linear /dev/hda1 0
+1028161 3903762 linear /dev/hdb2 0
+4931923 2103211 linear /dev/hdc1 0
+--- cut here ---
+
+For a striped volume, i.e. raid level 0, you will need to know the chunk size
+you used when creating the volume. Windows uses 64kiB as the default, so it
+will probably be this unless you changes the defaults when creating the array.
+
+For a raid level 0 the table would look like this (note all values are in
+512-byte sectors):
+
+--- cut here ---
+# Offset Size Raid Number Chunk 1st Start 2nd Start
+# into of the type of size Device in Device in
+# volume volume stripes device device
+0 2056320 striped 2 128 /dev/hda1 0 /dev/hdb1 0
+--- cut here ---
+
+If there are more than two devices, just add each of them to the end of the
+line.
+
+Finally, for a mirrored volume, i.e. raid level 1, the table would look like
+this (note all values are in 512-byte sectors):
+
+--- cut here ---
+# Ofs Size Raid Log Number Region Should Number Source Start Target Start
+# in of the type type of log size sync? of Device in Device in
+# vol volume params mirrors Device Device
+0 2056320 mirror core 2 16 nosync 2 /dev/hda1 0 /dev/hdb1 0
+--- cut here ---
+
+If you are mirroring to multiple devices you can specify further targets at the
+end of the line.
+
+Note the "Should sync?" parameter "nosync" means that the two mirrors are
+already in sync which will be the case on a clean shutdown of Windows. If the
+mirrors are not clean, you can specify the "sync" option instead of "nosync"
+and the Device-Mapper driver will then copy the entirety of the "Source Device"
+to the "Target Device" or if you specified multiple target devices to all of
+them.
+
+Once you have your table, save it in a file somewhere (e.g. /etc/ntfsvolume1),
+and hand it over to dmsetup to work with, like so:
+
+$ dmsetup create myvolume1 /etc/ntfsvolume1
+
+You can obviously replace "myvolume1" with whatever name you like.
+
+If it all worked, you will now have the device /dev/device-mapper/myvolume1
+which you can then just use as an argument to the mount command as usual to
+mount the ntfs volume. For example:
+
+$ mount -t ntfs -o ro /dev/device-mapper/myvolume1 /mnt/myvol1
+
+(You need to create the directory /mnt/myvol1 first and of course you can use
+anything you like instead of /mnt/myvol1 as long as it is an existing
+directory.)
+
+It is advisable to do the mount read-only to see if the volume has been setup
+correctly to avoid the possibility of causing damage to the data on the ntfs
+volume.
+
+
+The Software RAID / MD driver
+-----------------------------
+
+An alternative to using the Device-Mapper driver is to use the kernel's
+Software RAID / MD driver. For which you need to set up your /etc/raidtab
+appropriately (see man 5 raidtab).
+
+Linear volume sets, i.e. linear raid, as well as stripe sets, i.e. raid level
+0, have been tested and work fine (though see section "Limitations when using
+the MD driver with NTFS volumes" especially if you want to use linear raid).
+Even though untested, there is no reason why mirrors, i.e. raid level 1, and
+stripes with parity, i.e. raid level 5, should not work, too.
+
+You have to use the "persistent-superblock 0" option for each raid-disk in the
+NTFS volume/stripe you are configuring in /etc/raidtab as the persistent
+superblock used by the MD driver would damage the NTFS volume.
+
+Windows by default uses a stripe chunk size of 64k, so you probably want the
+"chunk-size 64k" option for each raid-disk, too.
+
+For example, if you have a stripe set consisting of two partitions /dev/hda5
+and /dev/hdb1 your /etc/raidtab would look like this:
+
+raiddev /dev/md0
+ raid-level 0
+ nr-raid-disks 2
+ nr-spare-disks 0
+ persistent-superblock 0
+ chunk-size 64k
+ device /dev/hda5
+ raid-disk 0
+ device /dev/hdb1
+ raid-disk 1
+
+For linear raid, just change the raid-level above to "raid-level linear", for
+mirrors, change it to "raid-level 1", and for stripe sets with parity, change
+it to "raid-level 5".
+
+Note for stripe sets with parity you will also need to tell the MD driver
+which parity algorithm to use by specifying the option "parity-algorithm
+which", where you need to replace "which" with the name of the algorithm to
+use (see man 5 raidtab for available algorithms) and you will have to try the
+different available algorithms until you find one that works. Make sure you
+are working read-only when playing with this as you may damage your data
+otherwise. If you find which algorithm works please let us know (email the
+linux-ntfs developers list linux-ntfs-dev@lists.sourceforge.net or drop in on
+IRC in channel #ntfs on the irc.freenode.net network) so we can update this
+documentation.
+
+Once the raidtab is setup, run for example raid0run -a to start all devices or
+raid0run /dev/md0 to start a particular md device, in this case /dev/md0.
+
+Then just use the mount command as usual to mount the ntfs volume using for
+example: mount -t ntfs -o ro /dev/md0 /mnt/myntfsvolume
+
+It is advisable to do the mount read-only to see if the md volume has been
+setup correctly to avoid the possibility of causing damage to the data on the
+ntfs volume.
+
+
+Limitations when using the Software RAID / MD driver
+-----------------------------------------------------
+
+Using the md driver will not work properly if any of your NTFS partitions have
+an odd number of sectors. This is especially important for linear raid as all
+data after the first partition with an odd number of sectors will be offset by
+one or more sectors so if you mount such a partition with write support you
+will cause massive damage to the data on the volume which will only become
+apparent when you try to use the volume again under Windows.
+
+So when using linear raid, make sure that all your partitions have an even
+number of sectors BEFORE attempting to use it. You have been warned!
+
+Even better is to simply use the Device-Mapper for linear raid and then you do
+not have this problem with odd numbers of sectors.
diff --git a/Documentation/filesystems/ocfs2.txt b/Documentation/filesystems/ocfs2.txt
new file mode 100644
index 000000000..4c49e5410
--- /dev/null
+++ b/Documentation/filesystems/ocfs2.txt
@@ -0,0 +1,106 @@
+OCFS2 filesystem
+==================
+OCFS2 is a general purpose extent based shared disk cluster file
+system with many similarities to ext3. It supports 64 bit inode
+numbers, and has automatically extending metadata groups which may
+also make it attractive for non-clustered use.
+
+You'll want to install the ocfs2-tools package in order to at least
+get "mount.ocfs2" and "ocfs2_hb_ctl".
+
+Project web page: http://ocfs2.wiki.kernel.org
+Tools git tree: https://github.com/markfasheh/ocfs2-tools
+OCFS2 mailing lists: http://oss.oracle.com/projects/ocfs2/mailman/
+
+All code copyright 2005 Oracle except when otherwise noted.
+
+CREDITS:
+Lots of code taken from ext3 and other projects.
+
+Authors in alphabetical order:
+Joel Becker <joel.becker@oracle.com>
+Zach Brown <zach.brown@oracle.com>
+Mark Fasheh <mfasheh@suse.com>
+Kurt Hackel <kurt.hackel@oracle.com>
+Tao Ma <tao.ma@oracle.com>
+Sunil Mushran <sunil.mushran@oracle.com>
+Manish Singh <manish.singh@oracle.com>
+Tiger Yang <tiger.yang@oracle.com>
+
+Caveats
+=======
+Features which OCFS2 does not support yet:
+ - Directory change notification (F_NOTIFY)
+ - Distributed Caching (F_SETLEASE/F_GETLEASE/break_lease)
+
+Mount options
+=============
+
+OCFS2 supports the following mount options:
+(*) == default
+
+barrier=1 This enables/disables barriers. barrier=0 disables it,
+ barrier=1 enables it.
+errors=remount-ro(*) Remount the filesystem read-only on an error.
+errors=panic Panic and halt the machine if an error occurs.
+intr (*) Allow signals to interrupt cluster operations.
+nointr Do not allow signals to interrupt cluster
+ operations.
+noatime Do not update access time.
+relatime(*) Update atime if the previous atime is older than
+ mtime or ctime
+strictatime Always update atime, but the minimum update interval
+ is specified by atime_quantum.
+atime_quantum=60(*) OCFS2 will not update atime unless this number
+ of seconds has passed since the last update.
+ Set to zero to always update atime. This option need
+ work with strictatime.
+data=ordered (*) All data are forced directly out to the main file
+ system prior to its metadata being committed to the
+ journal.
+data=writeback Data ordering is not preserved, data may be written
+ into the main file system after its metadata has been
+ committed to the journal.
+preferred_slot=0(*) During mount, try to use this filesystem slot first. If
+ it is in use by another node, the first empty one found
+ will be chosen. Invalid values will be ignored.
+commit=nrsec (*) Ocfs2 can be told to sync all its data and metadata
+ every 'nrsec' seconds. The default value is 5 seconds.
+ This means that if you lose your power, you will lose
+ as much as the latest 5 seconds of work (your
+ filesystem will not be damaged though, thanks to the
+ journaling). This default value (or any low value)
+ will hurt performance, but it's good for data-safety.
+ Setting it to 0 will have the same effect as leaving
+ it at the default (5 seconds).
+ Setting it to very large values will improve
+ performance.
+localalloc=8(*) Allows custom localalloc size in MB. If the value is too
+ large, the fs will silently revert it to the default.
+localflocks This disables cluster aware flock.
+inode64 Indicates that Ocfs2 is allowed to create inodes at
+ any location in the filesystem, including those which
+ will result in inode numbers occupying more than 32
+ bits of significance.
+user_xattr (*) Enables Extended User Attributes.
+nouser_xattr Disables Extended User Attributes.
+acl Enables POSIX Access Control Lists support.
+noacl (*) Disables POSIX Access Control Lists support.
+resv_level=2 (*) Set how aggressive allocation reservations will be.
+ Valid values are between 0 (reservations off) to 8
+ (maximum space for reservations).
+dir_resv_level= (*) By default, directory reservations will scale with file
+ reservations - users should rarely need to change this
+ value. If allocation reservations are turned off, this
+ option will have no effect.
+coherency=full (*) Disallow concurrent O_DIRECT writes, cluster inode
+ lock will be taken to force other nodes drop cache,
+ therefore full cluster coherency is guaranteed even
+ for O_DIRECT writes.
+coherency=buffered Allow concurrent O_DIRECT writes without EX lock among
+ nodes, which gains high performance at risk of getting
+ stale data on other nodes.
+journal_async_commit Commit block can be written to disk without waiting
+ for descriptor blocks. If enabled older kernels cannot
+ mount the device. This will enable 'journal_checksum'
+ internally.
diff --git a/Documentation/filesystems/omfs.txt b/Documentation/filesystems/omfs.txt
new file mode 100644
index 000000000..1d0d41ff5
--- /dev/null
+++ b/Documentation/filesystems/omfs.txt
@@ -0,0 +1,106 @@
+Optimized MPEG Filesystem (OMFS)
+
+Overview
+========
+
+OMFS is a filesystem created by SonicBlue for use in the ReplayTV DVR
+and Rio Karma MP3 player. The filesystem is extent-based, utilizing
+block sizes from 2k to 8k, with hash-based directories. This
+filesystem driver may be used to read and write disks from these
+devices.
+
+Note, it is not recommended that this FS be used in place of a general
+filesystem for your own streaming media device. Native Linux filesystems
+will likely perform better.
+
+More information is available at:
+
+ http://linux-karma.sf.net/
+
+Various utilities, including mkomfs and omfsck, are included with
+omfsprogs, available at:
+
+ http://bobcopeland.com/karma/
+
+Instructions are included in its README.
+
+Options
+=======
+
+OMFS supports the following mount-time options:
+
+ uid=n - make all files owned by specified user
+ gid=n - make all files owned by specified group
+ umask=xxx - set permission umask to xxx
+ fmask=xxx - set umask to xxx for files
+ dmask=xxx - set umask to xxx for directories
+
+Disk format
+===========
+
+OMFS discriminates between "sysblocks" and normal data blocks. The sysblock
+group consists of super block information, file metadata, directory structures,
+and extents. Each sysblock has a header containing CRCs of the entire
+sysblock, and may be mirrored in successive blocks on the disk. A sysblock may
+have a smaller size than a data block, but since they are both addressed by the
+same 64-bit block number, any remaining space in the smaller sysblock is
+unused.
+
+Sysblock header information:
+
+struct omfs_header {
+ __be64 h_self; /* FS block where this is located */
+ __be32 h_body_size; /* size of useful data after header */
+ __be16 h_crc; /* crc-ccitt of body_size bytes */
+ char h_fill1[2];
+ u8 h_version; /* version, always 1 */
+ char h_type; /* OMFS_INODE_X */
+ u8 h_magic; /* OMFS_IMAGIC */
+ u8 h_check_xor; /* XOR of header bytes before this */
+ __be32 h_fill2;
+};
+
+Files and directories are both represented by omfs_inode:
+
+struct omfs_inode {
+ struct omfs_header i_head; /* header */
+ __be64 i_parent; /* parent containing this inode */
+ __be64 i_sibling; /* next inode in hash bucket */
+ __be64 i_ctime; /* ctime, in milliseconds */
+ char i_fill1[35];
+ char i_type; /* OMFS_[DIR,FILE] */
+ __be32 i_fill2;
+ char i_fill3[64];
+ char i_name[OMFS_NAMELEN]; /* filename */
+ __be64 i_size; /* size of file, in bytes */
+};
+
+Directories in OMFS are implemented as a large hash table. Filenames are
+hashed then prepended into the bucket list beginning at OMFS_DIR_START.
+Lookup requires hashing the filename, then seeking across i_sibling pointers
+until a match is found on i_name. Empty buckets are represented by block
+pointers with all-1s (~0).
+
+A file is an omfs_inode structure followed by an extent table beginning at
+OMFS_EXTENT_START:
+
+struct omfs_extent_entry {
+ __be64 e_cluster; /* start location of a set of blocks */
+ __be64 e_blocks; /* number of blocks after e_cluster */
+};
+
+struct omfs_extent {
+ __be64 e_next; /* next extent table location */
+ __be32 e_extent_count; /* total # extents in this table */
+ __be32 e_fill;
+ struct omfs_extent_entry e_entry; /* start of extent entries */
+};
+
+Each extent holds the block offset followed by number of blocks allocated to
+the extent. The final extent in each table is a terminator with e_cluster
+being ~0 and e_blocks being ones'-complement of the total number of blocks
+in the table.
+
+If this table overflows, a continuation inode is written and pointed to by
+e_next. These have a header but lack the rest of the inode structure.
+
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.txt
new file mode 100644
index 000000000..6db0e5d1d
--- /dev/null
+++ b/Documentation/filesystems/overlayfs.txt
@@ -0,0 +1,226 @@
+Written by: Neil Brown <neilb@suse.de>
+
+Overlay Filesystem
+==================
+
+This document describes a prototype for a new approach to providing
+overlay-filesystem functionality in Linux (sometimes referred to as
+union-filesystems). An overlay-filesystem tries to present a
+filesystem which is the result over overlaying one filesystem on top
+of the other.
+
+The result will inevitably fail to look exactly like a normal
+filesystem for various technical reasons. The expectation is that
+many use cases will be able to ignore these differences.
+
+This approach is 'hybrid' because the objects that appear in the
+filesystem do not all appear to belong to that filesystem. In many
+cases an object accessed in the union will be indistinguishable
+from accessing the corresponding object from the original filesystem.
+This is most obvious from the 'st_dev' field returned by stat(2).
+
+While directories will report an st_dev from the overlay-filesystem,
+all non-directory objects will report an st_dev from the lower or
+upper filesystem that is providing the object. Similarly st_ino will
+only be unique when combined with st_dev, and both of these can change
+over the lifetime of a non-directory object. Many applications and
+tools ignore these values and will not be affected.
+
+Upper and Lower
+---------------
+
+An overlay filesystem combines two filesystems - an 'upper' filesystem
+and a 'lower' filesystem. When a name exists in both filesystems, the
+object in the 'upper' filesystem is visible while the object in the
+'lower' filesystem is either hidden or, in the case of directories,
+merged with the 'upper' object.
+
+It would be more correct to refer to an upper and lower 'directory
+tree' rather than 'filesystem' as it is quite possible for both
+directory trees to be in the same filesystem and there is no
+requirement that the root of a filesystem be given for either upper or
+lower.
+
+The lower filesystem can be any filesystem supported by Linux and does
+not need to be writable. The lower filesystem can even be another
+overlayfs. The upper filesystem will normally be writable and if it
+is it must support the creation of trusted.* extended attributes, and
+must provide valid d_type in readdir responses, so NFS is not suitable.
+
+A read-only overlay of two read-only filesystems may use any
+filesystem type.
+
+Directories
+-----------
+
+Overlaying mainly involves directories. If a given name appears in both
+upper and lower filesystems and refers to a non-directory in either,
+then the lower object is hidden - the name refers only to the upper
+object.
+
+Where both upper and lower objects are directories, a merged directory
+is formed.
+
+At mount time, the two directories given as mount options "lowerdir" and
+"upperdir" are combined into a merged directory:
+
+ mount -t overlay overlay -olowerdir=/lower,upperdir=/upper,\
+workdir=/work /merged
+
+The "workdir" needs to be an empty directory on the same filesystem
+as upperdir.
+
+Then whenever a lookup is requested in such a merged directory, the
+lookup is performed in each actual directory and the combined result
+is cached in the dentry belonging to the overlay filesystem. If both
+actual lookups find directories, both are stored and a merged
+directory is created, otherwise only one is stored: the upper if it
+exists, else the lower.
+
+Only the lists of names from directories are merged. Other content
+such as metadata and extended attributes are reported for the upper
+directory only. These attributes of the lower directory are hidden.
+
+whiteouts and opaque directories
+--------------------------------
+
+In order to support rm and rmdir without changing the lower
+filesystem, an overlay filesystem needs to record in the upper filesystem
+that files have been removed. This is done using whiteouts and opaque
+directories (non-directories are always opaque).
+
+A whiteout is created as a character device with 0/0 device number.
+When a whiteout is found in the upper level of a merged directory, any
+matching name in the lower level is ignored, and the whiteout itself
+is also hidden.
+
+A directory is made opaque by setting the xattr "trusted.overlay.opaque"
+to "y". Where the upper filesystem contains an opaque directory, any
+directory in the lower filesystem with the same name is ignored.
+
+readdir
+-------
+
+When a 'readdir' request is made on a merged directory, the upper and
+lower directories are each read and the name lists merged in the
+obvious way (upper is read first, then lower - entries that already
+exist are not re-added). This merged name list is cached in the
+'struct file' and so remains as long as the file is kept open. If the
+directory is opened and read by two processes at the same time, they
+will each have separate caches. A seekdir to the start of the
+directory (offset 0) followed by a readdir will cause the cache to be
+discarded and rebuilt.
+
+This means that changes to the merged directory do not appear while a
+directory is being read. This is unlikely to be noticed by many
+programs.
+
+seek offsets are assigned sequentially when the directories are read.
+Thus if
+ - read part of a directory
+ - remember an offset, and close the directory
+ - re-open the directory some time later
+ - seek to the remembered offset
+
+there may be little correlation between the old and new locations in
+the list of filenames, particularly if anything has changed in the
+directory.
+
+Readdir on directories that are not merged is simply handled by the
+underlying directory (upper or lower).
+
+
+Non-directories
+---------------
+
+Objects that are not directories (files, symlinks, device-special
+files etc.) are presented either from the upper or lower filesystem as
+appropriate. When a file in the lower filesystem is accessed in a way
+the requires write-access, such as opening for write access, changing
+some metadata etc., the file is first copied from the lower filesystem
+to the upper filesystem (copy_up). Note that creating a hard-link
+also requires copy_up, though of course creation of a symlink does
+not.
+
+The copy_up may turn out to be unnecessary, for example if the file is
+opened for read-write but the data is not modified.
+
+The copy_up process first makes sure that the containing directory
+exists in the upper filesystem - creating it and any parents as
+necessary. It then creates the object with the same metadata (owner,
+mode, mtime, symlink-target etc.) and then if the object is a file, the
+data is copied from the lower to the upper filesystem. Finally any
+extended attributes are copied up.
+
+Once the copy_up is complete, the overlay filesystem simply
+provides direct access to the newly created file in the upper
+filesystem - future operations on the file are barely noticed by the
+overlay filesystem (though an operation on the name of the file such as
+rename or unlink will of course be noticed and handled).
+
+
+Multiple lower layers
+---------------------
+
+Multiple lower layers can now be given using the the colon (":") as a
+separator character between the directory names. For example:
+
+ mount -t overlay overlay -olowerdir=/lower1:/lower2:/lower3 /merged
+
+As the example shows, "upperdir=" and "workdir=" may be omitted. In
+that case the overlay will be read-only.
+
+The specified lower directories will be stacked beginning from the
+rightmost one and going left. In the above example lower1 will be the
+top, lower2 the middle and lower3 the bottom layer.
+
+
+Non-standard behavior
+---------------------
+
+The copy_up operation essentially creates a new, identical file and
+moves it over to the old name. The new file may be on a different
+filesystem, so both st_dev and st_ino of the file may change.
+
+Any open files referring to this inode will access the old data and
+metadata. Similarly any file locks obtained before copy_up will not
+apply to the copied up file.
+
+On a file opened with O_RDONLY fchmod(2), fchown(2), futimesat(2) and
+fsetxattr(2) will fail with EROFS.
+
+If a file with multiple hard links is copied up, then this will
+"break" the link. Changes will not be propagated to other names
+referring to the same inode.
+
+Symlinks in /proc/PID/ and /proc/PID/fd which point to a non-directory
+object in overlayfs will not contain valid absolute paths, only
+relative paths leading up to the filesystem's root. This will be
+fixed in the future.
+
+Some operations are not atomic, for example a crash during copy_up or
+rename will leave the filesystem in an inconsistent state. This will
+be addressed in the future.
+
+Changes to underlying filesystems
+---------------------------------
+
+Offline changes, when the overlay is not mounted, are allowed to either
+the upper or the lower trees.
+
+Changes to the underlying filesystems while part of a mounted overlay
+filesystem are not allowed. If the underlying filesystem is changed,
+the behavior of the overlay is undefined, though it will not result in
+a crash or deadlock.
+
+Testsuite
+---------
+
+There's testsuite developed by David Howells at:
+
+ git://git.infradead.org/users/dhowells/unionmount-testsuite.git
+
+Run as root:
+
+ # cd unionmount-testsuite
+ # ./run --ov
diff --git a/Documentation/filesystems/path-lookup.txt b/Documentation/filesystems/path-lookup.txt
new file mode 100644
index 000000000..3571667c7
--- /dev/null
+++ b/Documentation/filesystems/path-lookup.txt
@@ -0,0 +1,382 @@
+Path walking and name lookup locking
+====================================
+
+Path resolution is the finding a dentry corresponding to a path name string, by
+performing a path walk. Typically, for every open(), stat() etc., the path name
+will be resolved. Paths are resolved by walking the namespace tree, starting
+with the first component of the pathname (eg. root or cwd) with a known dentry,
+then finding the child of that dentry, which is named the next component in the
+path string. Then repeating the lookup from the child dentry and finding its
+child with the next element, and so on.
+
+Since it is a frequent operation for workloads like multiuser environments and
+web servers, it is important to optimize this code.
+
+Path walking synchronisation history:
+Prior to 2.5.10, dcache_lock was acquired in d_lookup (dcache hash lookup) and
+thus in every component during path look-up. Since 2.5.10 onwards, fast-walk
+algorithm changed this by holding the dcache_lock at the beginning and walking
+as many cached path component dentries as possible. This significantly
+decreases the number of acquisition of dcache_lock. However it also increases
+the lock hold time significantly and affects performance in large SMP machines.
+Since 2.5.62 kernel, dcache has been using a new locking model that uses RCU to
+make dcache look-up lock-free.
+
+All the above algorithms required taking a lock and reference count on the
+dentry that was looked up, so that may be used as the basis for walking the
+next path element. This is inefficient and unscalable. It is inefficient
+because of the locks and atomic operations required for every dentry element
+slows things down. It is not scalable because many parallel applications that
+are path-walk intensive tend to do path lookups starting from a common dentry
+(usually, the root "/" or current working directory). So contention on these
+common path elements causes lock and cacheline queueing.
+
+Since 2.6.38, RCU is used to make a significant part of the entire path walk
+(including dcache look-up) completely "store-free" (so, no locks, atomics, or
+even stores into cachelines of common dentries). This is known as "rcu-walk"
+path walking.
+
+Path walking overview
+=====================
+
+A name string specifies a start (root directory, cwd, fd-relative) and a
+sequence of elements (directory entry names), which together refer to a path in
+the namespace. A path is represented as a (dentry, vfsmount) tuple. The name
+elements are sub-strings, separated by '/'.
+
+Name lookups will want to find a particular path that a name string refers to
+(usually the final element, or parent of final element). This is done by taking
+the path given by the name's starting point (which we know in advance -- eg.
+current->fs->cwd or current->fs->root) as the first parent of the lookup. Then
+iteratively for each subsequent name element, look up the child of the current
+parent with the given name and if it is not the desired entry, make it the
+parent for the next lookup.
+
+A parent, of course, must be a directory, and we must have appropriate
+permissions on the parent inode to be able to walk into it.
+
+Turning the child into a parent for the next lookup requires more checks and
+procedures. Symlinks essentially substitute the symlink name for the target
+name in the name string, and require some recursive path walking. Mount points
+must be followed into (thus changing the vfsmount that subsequent path elements
+refer to), switching from the mount point path to the root of the particular
+mounted vfsmount. These behaviours are variously modified depending on the
+exact path walking flags.
+
+Path walking then must, broadly, do several particular things:
+- find the start point of the walk;
+- perform permissions and validity checks on inodes;
+- perform dcache hash name lookups on (parent, name element) tuples;
+- traverse mount points;
+- traverse symlinks;
+- lookup and create missing parts of the path on demand.
+
+Safe store-free look-up of dcache hash table
+============================================
+
+Dcache name lookup
+------------------
+In order to lookup a dcache (parent, name) tuple, we take a hash on the tuple
+and use that to select a bucket in the dcache-hash table. The list of entries
+in that bucket is then walked, and we do a full comparison of each entry
+against our (parent, name) tuple.
+
+The hash lists are RCU protected, so list walking is not serialised with
+concurrent updates (insertion, deletion from the hash). This is a standard RCU
+list application with the exception of renames, which will be covered below.
+
+Parent and name members of a dentry, as well as its membership in the dcache
+hash, and its inode are protected by the per-dentry d_lock spinlock. A
+reference is taken on the dentry (while the fields are verified under d_lock),
+and this stabilises its d_inode pointer and actual inode. This gives a stable
+point to perform the next step of our path walk against.
+
+These members are also protected by d_seq seqlock, although this offers
+read-only protection and no durability of results, so care must be taken when
+using d_seq for synchronisation (see seqcount based lookups, below).
+
+Renames
+-------
+Back to the rename case. In usual RCU protected lists, the only operations that
+will happen to an object is insertion, and then eventually removal from the
+list. The object will not be reused until an RCU grace period is complete.
+This ensures the RCU list traversal primitives can run over the object without
+problems (see RCU documentation for how this works).
+
+However when a dentry is renamed, its hash value can change, requiring it to be
+moved to a new hash list. Allocating and inserting a new alias would be
+expensive and also problematic for directory dentries. Latency would be far to
+high to wait for a grace period after removing the dentry and before inserting
+it in the new hash bucket. So what is done is to insert the dentry into the
+new list immediately.
+
+However, when the dentry's list pointers are updated to point to objects in the
+new list before waiting for a grace period, this can result in a concurrent RCU
+lookup of the old list veering off into the new (incorrect) list and missing
+the remaining dentries on the list.
+
+There is no fundamental problem with walking down the wrong list, because the
+dentry comparisons will never match. However it is fatal to miss a matching
+dentry. So a seqlock is used to detect when a rename has occurred, and so the
+lookup can be retried.
+
+ 1 2 3
+ +---+ +---+ +---+
+hlist-->| N-+->| N-+->| N-+->
+head <--+-P |<-+-P |<-+-P |
+ +---+ +---+ +---+
+
+Rename of dentry 2 may require it deleted from the above list, and inserted
+into a new list. Deleting 2 gives the following list.
+
+ 1 3
+ +---+ +---+ (don't worry, the longer pointers do not
+hlist-->| N-+-------->| N-+-> impose a measurable performance overhead
+head <--+-P |<--------+-P | on modern CPUs)
+ +---+ +---+
+ ^ 2 ^
+ | +---+ |
+ | | N-+----+
+ +----+-P |
+ +---+
+
+This is a standard RCU-list deletion, which leaves the deleted object's
+pointers intact, so a concurrent list walker that is currently looking at
+object 2 will correctly continue to object 3 when it is time to traverse the
+next object.
+
+However, when inserting object 2 onto a new list, we end up with this:
+
+ 1 3
+ +---+ +---+
+hlist-->| N-+-------->| N-+->
+head <--+-P |<--------+-P |
+ +---+ +---+
+ 2
+ +---+
+ | N-+---->
+ <----+-P |
+ +---+
+
+Because we didn't wait for a grace period, there may be a concurrent lookup
+still at 2. Now when it follows 2's 'next' pointer, it will walk off into
+another list without ever having checked object 3.
+
+A related, but distinctly different, issue is that of rename atomicity versus
+lookup operations. If a file is renamed from 'A' to 'B', a lookup must only
+find either 'A' or 'B'. So if a lookup of 'A' returns NULL, a subsequent lookup
+of 'B' must succeed (note the reverse is not true).
+
+Between deleting the dentry from the old hash list, and inserting it on the new
+hash list, a lookup may find neither 'A' nor 'B' matching the dentry. The same
+rename seqlock is also used to cover this race in much the same way, by
+retrying a negative lookup result if a rename was in progress.
+
+Seqcount based lookups
+----------------------
+In refcount based dcache lookups, d_lock is used to serialise access to
+the dentry, stabilising it while comparing its name and parent and then
+taking a reference count (the reference count then gives a stable place to
+start the next part of the path walk from).
+
+As explained above, we would like to do path walking without taking locks or
+reference counts on intermediate dentries along the path. To do this, a per
+dentry seqlock (d_seq) is used to take a "coherent snapshot" of what the dentry
+looks like (its name, parent, and inode). That snapshot is then used to start
+the next part of the path walk. When loading the coherent snapshot under d_seq,
+care must be taken to load the members up-front, and use those pointers rather
+than reloading from the dentry later on (otherwise we'd have interesting things
+like d_inode going NULL underneath us, if the name was unlinked).
+
+Also important is to avoid performing any destructive operations (pretty much:
+no non-atomic stores to shared data), and to recheck the seqcount when we are
+"done" with the operation. Retry or abort if the seqcount does not match.
+Avoiding destructive or changing operations means we can easily unwind from
+failure.
+
+What this means is that a caller, provided they are holding RCU lock to
+protect the dentry object from disappearing, can perform a seqcount based
+lookup which does not increment the refcount on the dentry or write to
+it in any way. This returned dentry can be used for subsequent operations,
+provided that d_seq is rechecked after that operation is complete.
+
+Inodes are also rcu freed, so the seqcount lookup dentry's inode may also be
+queried for permissions.
+
+With this two parts of the puzzle, we can do path lookups without taking
+locks or refcounts on dentry elements.
+
+RCU-walk path walking design
+============================
+
+Path walking code now has two distinct modes, ref-walk and rcu-walk. ref-walk
+is the traditional[*] way of performing dcache lookups using d_lock to
+serialise concurrent modifications to the dentry and take a reference count on
+it. ref-walk is simple and obvious, and may sleep, take locks, etc while path
+walking is operating on each dentry. rcu-walk uses seqcount based dentry
+lookups, and can perform lookup of intermediate elements without any stores to
+shared data in the dentry or inode. rcu-walk can not be applied to all cases,
+eg. if the filesystem must sleep or perform non trivial operations, rcu-walk
+must be switched to ref-walk mode.
+
+[*] RCU is still used for the dentry hash lookup in ref-walk, but not the full
+ path walk.
+
+Where ref-walk uses a stable, refcounted ``parent'' to walk the remaining
+path string, rcu-walk uses a d_seq protected snapshot. When looking up a
+child of this parent snapshot, we open d_seq critical section on the child
+before closing d_seq critical section on the parent. This gives an interlocking
+ladder of snapshots to walk down.
+
+
+ proc 101
+ /----------------\
+ / comm: "vi" \
+ / fs.root: dentry0 \
+ \ fs.cwd: dentry2 /
+ \ /
+ \----------------/
+
+So when vi wants to open("/home/npiggin/test.c", O_RDWR), then it will
+start from current->fs->root, which is a pinned dentry. Alternatively,
+"./test.c" would start from cwd; both names refer to the same path in
+the context of proc101.
+
+ dentry 0
+ +---------------------+ rcu-walk begins here, we note d_seq, check the
+ | name: "/" | inode's permission, and then look up the next
+ | inode: 10 | path element which is "home"...
+ | children:"home", ...|
+ +---------------------+
+ |
+ dentry 1 V
+ +---------------------+ ... which brings us here. We find dentry1 via
+ | name: "home" | hash lookup, then note d_seq and compare name
+ | inode: 678 | string and parent pointer. When we have a match,
+ | children:"npiggin" | we now recheck the d_seq of dentry0. Then we
+ +---------------------+ check inode and look up the next element.
+ |
+ dentry2 V
+ +---------------------+ Note: if dentry0 is now modified, lookup is
+ | name: "npiggin" | not necessarily invalid, so we need only keep a
+ | inode: 543 | parent for d_seq verification, and grandparents
+ | children:"a.c", ... | can be forgotten.
+ +---------------------+
+ |
+ dentry3 V
+ +---------------------+ At this point we have our destination dentry.
+ | name: "a.c" | We now take its d_lock, verify d_seq of this
+ | inode: 14221 | dentry. If that checks out, we can increment
+ | children:NULL | its refcount because we're holding d_lock.
+ +---------------------+
+
+Taking a refcount on a dentry from rcu-walk mode, by taking its d_lock,
+re-checking its d_seq, and then incrementing its refcount is called
+"dropping rcu" or dropping from rcu-walk into ref-walk mode.
+
+It is, in some sense, a bit of a house of cards. If the seqcount check of the
+parent snapshot fails, the house comes down, because we had closed the d_seq
+section on the grandparent, so we have nothing left to stand on. In that case,
+the path walk must be fully restarted (which we do in ref-walk mode, to avoid
+live locks). It is costly to have a full restart, but fortunately they are
+quite rare.
+
+When we reach a point where sleeping is required, or a filesystem callout
+requires ref-walk, then instead of restarting the walk, we attempt to drop rcu
+at the last known good dentry we have. Avoiding a full restart in ref-walk in
+these cases is fundamental for performance and scalability because blocking
+operations such as creates and unlinks are not uncommon.
+
+The detailed design for rcu-walk is like this:
+* LOOKUP_RCU is set in nd->flags, which distinguishes rcu-walk from ref-walk.
+* Take the RCU lock for the entire path walk, starting with the acquiring
+ of the starting path (eg. root/cwd/fd-path). So now dentry refcounts are
+ not required for dentry persistence.
+* synchronize_rcu is called when unregistering a filesystem, so we can
+ access d_ops and i_ops during rcu-walk.
+* Similarly take the vfsmount lock for the entire path walk. So now mnt
+ refcounts are not required for persistence. Also we are free to perform mount
+ lookups, and to assume dentry mount points and mount roots are stable up and
+ down the path.
+* Have a per-dentry seqlock to protect the dentry name, parent, and inode,
+ so we can load this tuple atomically, and also check whether any of its
+ members have changed.
+* Dentry lookups (based on parent, candidate string tuple) recheck the parent
+ sequence after the child is found in case anything changed in the parent
+ during the path walk.
+* inode is also RCU protected so we can load d_inode and use the inode for
+ limited things.
+* i_mode, i_uid, i_gid can be tested for exec permissions during path walk.
+* i_op can be loaded.
+* When the destination dentry is reached, drop rcu there (ie. take d_lock,
+ verify d_seq, increment refcount).
+* If seqlock verification fails anywhere along the path, do a full restart
+ of the path lookup in ref-walk mode. -ECHILD tends to be used (for want of
+ a better errno) to signal an rcu-walk failure.
+
+The cases where rcu-walk cannot continue are:
+* NULL dentry (ie. any uncached path element)
+* Following links
+
+It may be possible eventually to make following links rcu-walk aware.
+
+Uncached path elements will always require dropping to ref-walk mode, at the
+very least because i_mutex needs to be grabbed, and objects allocated.
+
+Final note:
+"store-free" path walking is not strictly store free. We take vfsmount lock
+and refcounts (both of which can be made per-cpu), and we also store to the
+stack (which is essentially CPU-local), and we also have to take locks and
+refcount on final dentry.
+
+The point is that shared data, where practically possible, is not locked
+or stored into. The result is massive improvements in performance and
+scalability of path resolution.
+
+
+Interesting statistics
+======================
+
+The following table gives rcu lookup statistics for a few simple workloads
+(2s12c24t Westmere, debian non-graphical system). Ungraceful are attempts to
+drop rcu that fail due to d_seq failure and requiring the entire path lookup
+again. Other cases are successful rcu-drops that are required before the final
+element, nodentry for missing dentry, revalidate for filesystem revalidate
+routine requiring rcu drop, permission for permission check requiring drop,
+and link for symlink traversal requiring drop.
+
+ rcu-lookups restart nodentry link revalidate permission
+bootup 47121 0 4624 1010 10283 7852
+dbench 25386793 0 6778659(26.7%) 55 549 1156
+kbuild 2696672 10 64442(2.3%) 108764(4.0%) 1 1590
+git diff 39605 0 28 2 0 106
+vfstest 24185492 4945 708725(2.9%) 1076136(4.4%) 0 2651
+
+What this shows is that failed rcu-walk lookups, ie. ones that are restarted
+entirely with ref-walk, are quite rare. Even the "vfstest" case which
+specifically has concurrent renames/mkdir/rmdir/ creat/unlink/etc to exercise
+such races is not showing a huge amount of restarts.
+
+Dropping from rcu-walk to ref-walk mean that we have encountered a dentry where
+the reference count needs to be taken for some reason. This is either because
+we have reached the target of the path walk, or because we have encountered a
+condition that can't be resolved in rcu-walk mode. Ideally, we drop rcu-walk
+only when we have reached the target dentry, so the other statistics show where
+this does not happen.
+
+Note that a graceful drop from rcu-walk mode due to something such as the
+dentry not existing (which can be common) is not necessarily a failure of
+rcu-walk scheme, because some elements of the path may have been walked in
+rcu-walk mode. The further we get from common path elements (such as cwd or
+root), the less contended the dentry is likely to be. The closer we are to
+common path elements, the more likely they will exist in dentry cache.
+
+
+Papers and other documentation on dcache locking
+================================================
+
+1. Scaling dcache with RCU (http://linuxjournal.com/article.php?sid=7124).
+
+2. http://lse.sourceforge.net/locking/dcache/dcache.html
+
+
diff --git a/Documentation/filesystems/pohmelfs/design_notes.txt b/Documentation/filesystems/pohmelfs/design_notes.txt
new file mode 100644
index 000000000..8aef91335
--- /dev/null
+++ b/Documentation/filesystems/pohmelfs/design_notes.txt
@@ -0,0 +1,72 @@
+POHMELFS: Parallel Optimized Host Message Exchange Layered File System.
+
+ Evgeniy Polyakov <zbr@ioremap.net>
+
+Homepage: http://www.ioremap.net/projects/pohmelfs
+
+POHMELFS first began as a network filesystem with coherent local data and
+metadata caches but is now evolving into a parallel distributed filesystem.
+
+Main features of this FS include:
+ * Locally coherent cache for data and metadata with (potentially) byte-range locks.
+ Since all Linux filesystems lock the whole inode during writing, algorithm
+ is very simple and does not use byte-ranges, although they are sent in
+ locking messages.
+ * Completely async processing of all events except creation of hard and symbolic
+ links, and rename events.
+ Object creation and data reading and writing are processed asynchronously.
+ * Flexible object architecture optimized for network processing.
+ Ability to create long paths to objects and remove arbitrarily huge
+ directories with a single network command.
+ (like removing the whole kernel tree via a single network command).
+ * Very high performance.
+ * Fast and scalable multithreaded userspace server. Being in userspace it works
+ with any underlying filesystem and still is much faster than async in-kernel NFS one.
+ * Client is able to switch between different servers (if one goes down, client
+ automatically reconnects to second and so on).
+ * Transactions support. Full failover for all operations.
+ Resending transactions to different servers on timeout or error.
+ * Read request (data read, directory listing, lookup requests) balancing between multiple servers.
+ * Write requests are replicated to multiple servers and completed only when all of them are acked.
+ * Ability to add and/or remove servers from the working set at run-time.
+ * Strong authentification and possible data encryption in network channel.
+ * Extended attributes support.
+
+POHMELFS is based on transactions, which are potentially long-standing objects that live
+in the client's memory. Each transaction contains all the information needed to process a given
+command (or set of commands, which is frequently used during data writing: single transactions
+can contain creation and data writing commands). Transactions are committed by all the servers
+to which they are sent and, in case of failures, are eventually resent or dropped with an error.
+For example, reading will return an error if no servers are available.
+
+POHMELFS uses a asynchronous approach to data processing. Courtesy of transactions, it is
+possible to detach replies from requests and, if the command requires data to be received, the
+caller sleeps waiting for it. Thus, it is possible to issue multiple read commands to different
+servers and async threads will pick up replies in parallel, find appropriate transactions in the
+system and put the data where it belongs (like the page or inode cache).
+
+The main feature of POHMELFS is writeback data and the metadata cache.
+Only a few non-performance critical operations use the write-through cache and
+are synchronous: hard and symbolic link creation, and object rename. Creation,
+removal of objects and data writing are asynchronous and are sent to
+the server during system writeback. Only one writer at a time is allowed for any
+given inode, which is guarded by an appropriate locking protocol.
+Because of this feature, POHMELFS is extremely fast at metadata intensive
+workloads and can fully utilize the bandwidth to the servers when doing bulk
+data transfers.
+
+POHMELFS clients operate with a working set of servers and are capable of balancing read-only
+operations (like lookups or directory listings) between them according to IO priorities.
+Administrators can add or remove servers from the set at run-time via special commands (described
+in Documentation/filesystems/pohmelfs/info.txt file). Writes are replicated to all servers, which
+are connected with write permission turned on. IO priority and permissions can be changed in
+run-time.
+
+POHMELFS is capable of full data channel encryption and/or strong crypto hashing.
+One can select any kernel supported cipher, encryption mode, hash type and operation mode
+(hmac or digest). It is also possible to use both or neither (default). Crypto configuration
+is checked during mount time and, if the server does not support it, appropriate capabilities
+will be disabled or mount will fail (if 'crypto_fail_unsupported' mount option is specified).
+Crypto performance heavily depends on the number of crypto threads, which asynchronously perform
+crypto operations and send the resulting data to server or submit it up the stack. This number
+can be controlled via a mount option.
diff --git a/Documentation/filesystems/pohmelfs/info.txt b/Documentation/filesystems/pohmelfs/info.txt
new file mode 100644
index 000000000..db2e41393
--- /dev/null
+++ b/Documentation/filesystems/pohmelfs/info.txt
@@ -0,0 +1,99 @@
+POHMELFS usage information.
+
+Mount options.
+All but index, number of crypto threads and maximum IO size can changed via remount.
+
+idx=%u
+ Each mountpoint is associated with a special index via this option.
+ Administrator can add or remove servers from the given index, so all mounts,
+ which were attached to it, are updated.
+ Default it is 0.
+
+trans_scan_timeout=%u
+ This timeout, expressed in milliseconds, specifies time to scan transaction
+ trees looking for stale requests, which have to be resent, or if number of
+ retries exceed specified limit, dropped with error.
+ Default is 5 seconds.
+
+drop_scan_timeout=%u
+ Internal timeout, expressed in milliseconds, which specifies how frequently
+ inodes marked to be dropped are freed. It also specifies how frequently
+ the system checks that servers have to be added or removed from current working set.
+ Default is 1 second.
+
+wait_on_page_timeout=%u
+ Number of milliseconds to wait for reply from remote server for data reading command.
+ If this timeout is exceeded, reading returns an error.
+ Default is 5 seconds.
+
+trans_retries=%u
+ This is the number of times that a transaction will be resent to a server that did
+ not answer for the last @trans_scan_timeout milliseconds.
+ When the number of resends exceeds this limit, the transaction is completed with error.
+ Default is 5 resends.
+
+crypto_thread_num=%u
+ Number of crypto processing threads. Threads are used both for RX and TX traffic.
+ Default is 2, or no threads if crypto operations are not supported.
+
+trans_max_pages=%u
+ Maximum number of pages in a single transaction. This parameter also controls
+ the number of pages, allocated for crypto processing (each crypto thread has
+ pool of pages, the number of which is equal to 'trans_max_pages'.
+ Default is 100 pages.
+
+crypto_fail_unsupported
+ If specified, mount will fail if the server does not support requested crypto operations.
+ By default mount will disable non-matching crypto operations.
+
+mcache_timeout=%u
+ Maximum number of milliseconds to wait for the mcache objects to be processed.
+ Mcache includes locks (given lock should be granted by server), attributes (they should be
+ fully received in the given timeframe).
+ Default is 5 seconds.
+
+Usage examples.
+
+Add server server1.net:1025 into the working set with index $idx
+with appropriate hash algorithm and key file and cipher algorithm, mode and key file:
+$cfg A add -a server1.net -p 1025 -i $idx -K $hash_key -k $cipher_key
+
+Mount filesystem with given index $idx to /mnt mountpoint.
+Client will connect to all servers specified in the working set via previous command:
+mount -t pohmel -o idx=$idx q /mnt
+
+Change permissions to read-only (-I 1 option, '-I 2' - write-only, 3 - rw):
+$cfg A modify -a server1.net -p 1025 -i $idx -I 1
+
+Change IO priority to 123 (node with the highest priority gets read requests).
+$cfg A modify -a server1.net -p 1025 -i $idx -P 123
+
+One can check currect status of all connections in the mountstats file:
+# cat /proc/$PID/mountstats
+...
+device none mounted on /mnt with fstype pohmel
+idx addr(:port) socket_type protocol active priority permissions
+0 server1.net:1026 1 6 1 250 1
+0 server2.net:1025 1 6 1 123 3
+
+Server installation.
+
+Creating a server, which listens at port 1025 and 0.0.0.0 address.
+Working root directory (note, that server chroots there, so you have to have appropriate permissions)
+is set to /mnt, server will negotiate hash/cipher with client, in case client requested it, there
+are appropriate key files.
+Number of working threads is set to 10.
+
+# ./fserver -a 0.0.0.0 -p 1025 -r /mnt -w 10 -K hash_key -k cipher_key
+
+ -A 6 - listen on ipv6 address. Default: Disabled.
+ -r root - path to root directory. Default: /tmp.
+ -a addr - listen address. Default: 0.0.0.0.
+ -p port - listen port. Default: 1025.
+ -w workers - number of workers per connected client. Default: 1.
+ -K file - hash key size. Default: none.
+ -k file - cipher key size. Default: none.
+ -h - this help.
+
+Number of worker threads specifies how many workers will be created for each client.
+Bulk single-client transafers usually are better handled with smaller number (like 1-3).
diff --git a/Documentation/filesystems/pohmelfs/network_protocol.txt b/Documentation/filesystems/pohmelfs/network_protocol.txt
new file mode 100644
index 000000000..c680b4b53
--- /dev/null
+++ b/Documentation/filesystems/pohmelfs/network_protocol.txt
@@ -0,0 +1,227 @@
+POHMELFS network protocol.
+
+Basic structure used in network communication is following command:
+
+struct netfs_cmd
+{
+ __u16 cmd; /* Command number */
+ __u16 csize; /* Attached crypto information size */
+ __u16 cpad; /* Attached padding size */
+ __u16 ext; /* External flags */
+ __u32 size; /* Size of the attached data */
+ __u32 trans; /* Transaction id */
+ __u64 id; /* Object ID to operate on. Used for feedback.*/
+ __u64 start; /* Start of the object. */
+ __u64 iv; /* IV sequence */
+ __u8 data[0];
+};
+
+Commands can be embedded into transaction command (which in turn has own command),
+so one can extend protocol as needed without breaking backward compatibility as long
+as old commands are supported. All string lengths include tail 0 byte.
+
+All commands are transferred over the network in big-endian. CPU endianness is used at the end peers.
+
+@cmd - command number, which specifies command to be processed. Following
+ commands are used currently:
+
+ NETFS_READDIR = 1, /* Read directory for given inode number */
+ NETFS_READ_PAGE, /* Read data page from the server */
+ NETFS_WRITE_PAGE, /* Write data page to the server */
+ NETFS_CREATE, /* Create directory entry */
+ NETFS_REMOVE, /* Remove directory entry */
+ NETFS_LOOKUP, /* Lookup single object */
+ NETFS_LINK, /* Create a link */
+ NETFS_TRANS, /* Transaction */
+ NETFS_OPEN, /* Open intent */
+ NETFS_INODE_INFO, /* Metadata cache coherency synchronization message */
+ NETFS_PAGE_CACHE, /* Page cache invalidation message */
+ NETFS_READ_PAGES, /* Read multiple contiguous pages in one go */
+ NETFS_RENAME, /* Rename object */
+ NETFS_CAPABILITIES, /* Capabilities of the client, for example supported crypto */
+ NETFS_LOCK, /* Distributed lock message */
+ NETFS_XATTR_SET, /* Set extended attribute */
+ NETFS_XATTR_GET, /* Get extended attribute */
+
+@ext - external flags. Used by different commands to specify some extra arguments
+ like partial size of the embedded objects or creation flags.
+
+@size - size of the attached data. For NETFS_READ_PAGE and NETFS_READ_PAGES no data is attached,
+ but size of the requested data is incorporated here. It does not include size of the command
+ header (struct netfs_cmd) itself.
+
+@id - id of the object this command operates on. Each command can use it for own purpose.
+
+@start - start of the object this command operates on. Each command can use it for own purpose.
+
+@csize, @cpad - size and padding size of the (attached if needed) crypto information.
+
+Command specifications.
+
+@NETFS_READDIR
+This command is used to sync content of the remote dir to the client.
+
+@ext - length of the path to object.
+@size - the same.
+@id - local inode number of the directory to read.
+@start - zero.
+
+
+@NETFS_READ_PAGE
+This command is used to read data from remote server.
+Data size does not exceed local page cache size.
+
+@id - inode number.
+@start - first byte offset.
+@size - number of bytes to read plus length of the path to object.
+@ext - object path length.
+
+
+@NETFS_CREATE
+Used to create object.
+It does not require that all directories on top of the object were
+already created, it will create them automatically. Each object has
+associated @netfs_path_entry data structure, which contains creation
+mode (permissions and type) and length of the name as long as name itself.
+
+@start - 0
+@size - size of the all data structures needed to create a path
+@id - local inode number
+@ext - 0
+
+
+@NETFS_REMOVE
+Used to remove object.
+
+@ext - length of the path to object.
+@size - the same.
+@id - local inode number.
+@start - zero.
+
+
+@NETFS_LOOKUP
+Lookup information about object on server.
+
+@ext - length of the path to object.
+@size - the same.
+@id - local inode number of the directory to look object in.
+@start - local inode number of the object to look at.
+
+
+@NETFS_LINK
+Create hard of symlink.
+Command is sent as "object_path|target_path".
+
+@size - size of the above string.
+@id - parent local inode number.
+@start - 1 for symlink, 0 for hardlink.
+@ext - size of the "object_path" above.
+
+
+@NETFS_TRANS
+Transaction header.
+
+@size - incorporates all embedded command sizes including theirs header sizes.
+@start - transaction generation number - unique id used to find transaction.
+@ext - transaction flags. Unused at the moment.
+@id - 0.
+
+
+@NETFS_OPEN
+Open intent for given transaction.
+
+@id - local inode number.
+@start - 0.
+@size - path length to the object.
+@ext - open flags (O_RDWR and so on).
+
+
+@NETFS_INODE_INFO
+Metadata update command.
+It is sent to servers when attributes of the object are changed and received
+when data or metadata were updated. It operates with the following structure:
+
+struct netfs_inode_info
+{
+ unsigned int mode;
+ unsigned int nlink;
+ unsigned int uid;
+ unsigned int gid;
+ unsigned int blocksize;
+ unsigned int padding;
+ __u64 ino;
+ __u64 blocks;
+ __u64 rdev;
+ __u64 size;
+ __u64 version;
+};
+
+It effectively mirrors stat(2) returned data.
+
+
+@ext - path length to the object.
+@size - the same plus size of the netfs_inode_info structure.
+@id - local inode number.
+@start - 0.
+
+
+@NETFS_PAGE_CACHE
+Command is only received by clients. It contains information about
+page to be marked as not up-to-date.
+
+@id - client's inode number.
+@start - last byte of the page to be invalidated. If it is not equal to
+ current inode size, it will be vmtruncated().
+@size - 0
+@ext - 0
+
+
+@NETFS_READ_PAGES
+Used to read multiple contiguous pages in one go.
+
+@start - first byte of the contiguous region to read.
+@size - contains of two fields: lower 8 bits are used to represent page cache shift
+ used by client, another 3 bytes are used to get number of pages.
+@id - local inode number.
+@ext - path length to the object.
+
+
+@NETFS_RENAME
+Used to rename object.
+Attached data is formed into following string: "old_path|new_path".
+
+@id - local inode number.
+@start - parent inode number.
+@size - length of the above string.
+@ext - length of the old path part.
+
+
+@NETFS_CAPABILITIES
+Used to exchange crypto capabilities with server.
+If crypto capabilities are not supported by server, then client will disable it
+or fail (if 'crypto_fail_unsupported' mount options was specified).
+
+@id - superblock index. Used to specify crypto information for group of servers.
+@size - size of the attached capabilities structure.
+@start - 0.
+@size - 0.
+@scsize - 0.
+
+@NETFS_LOCK
+Used to send lock request/release messages. Although it sends byte range request
+and is capable of flushing pages based on that, it is not used, since all Linux
+filesystems lock the whole inode.
+
+@id - lock generation number.
+@start - start of the locked range.
+@size - size of the locked range.
+@ext - lock type: read/write. Not used actually. 15'th bit is used to determine,
+ if it is lock request (1) or release (0).
+
+@NETFS_XATTR_SET
+@NETFS_XATTR_GET
+Used to set/get extended attributes for given inode.
+@id - attribute generation number or xattr setting type
+@start - size of the attribute (request or attached)
+@size - name length, path len and data size for given attribute
+@ext - path length for given object
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
new file mode 100644
index 000000000..e69274de8
--- /dev/null
+++ b/Documentation/filesystems/porting
@@ -0,0 +1,485 @@
+Changes since 2.5.0:
+
+---
+[recommended]
+
+New helpers: sb_bread(), sb_getblk(), sb_find_get_block(), set_bh(),
+ sb_set_blocksize() and sb_min_blocksize().
+
+Use them.
+
+(sb_find_get_block() replaces 2.4's get_hash_table())
+
+---
+[recommended]
+
+New methods: ->alloc_inode() and ->destroy_inode().
+
+Remove inode->u.foo_inode_i
+Declare
+ struct foo_inode_info {
+ /* fs-private stuff */
+ struct inode vfs_inode;
+ };
+ static inline struct foo_inode_info *FOO_I(struct inode *inode)
+ {
+ return list_entry(inode, struct foo_inode_info, vfs_inode);
+ }
+
+Use FOO_I(inode) instead of &inode->u.foo_inode_i;
+
+Add foo_alloc_inode() and foo_destroy_inode() - the former should allocate
+foo_inode_info and return the address of ->vfs_inode, the latter should free
+FOO_I(inode) (see in-tree filesystems for examples).
+
+Make them ->alloc_inode and ->destroy_inode in your super_operations.
+
+Keep in mind that now you need explicit initialization of private data
+typically between calling iget_locked() and unlocking the inode.
+
+At some point that will become mandatory.
+
+---
+[mandatory]
+
+Change of file_system_type method (->read_super to ->get_sb)
+
+->read_super() is no more. Ditto for DECLARE_FSTYPE and DECLARE_FSTYPE_DEV.
+
+Turn your foo_read_super() into a function that would return 0 in case of
+success and negative number in case of error (-EINVAL unless you have more
+informative error value to report). Call it foo_fill_super(). Now declare
+
+int foo_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ return get_sb_bdev(fs_type, flags, dev_name, data, foo_fill_super,
+ mnt);
+}
+
+(or similar with s/bdev/nodev/ or s/bdev/single/, depending on the kind of
+filesystem).
+
+Replace DECLARE_FSTYPE... with explicit initializer and have ->get_sb set as
+foo_get_sb.
+
+---
+[mandatory]
+
+Locking change: ->s_vfs_rename_sem is taken only by cross-directory renames.
+Most likely there is no need to change anything, but if you relied on
+global exclusion between renames for some internal purpose - you need to
+change your internal locking. Otherwise exclusion warranties remain the
+same (i.e. parents and victim are locked, etc.).
+
+---
+[informational]
+
+Now we have the exclusion between ->lookup() and directory removal (by
+->rmdir() and ->rename()). If you used to need that exclusion and do
+it by internal locking (most of filesystems couldn't care less) - you
+can relax your locking.
+
+---
+[mandatory]
+
+->lookup(), ->truncate(), ->create(), ->unlink(), ->mknod(), ->mkdir(),
+->rmdir(), ->link(), ->lseek(), ->symlink(), ->rename()
+and ->readdir() are called without BKL now. Grab it on entry, drop upon return
+- that will guarantee the same locking you used to have. If your method or its
+parts do not need BKL - better yet, now you can shift lock_kernel() and
+unlock_kernel() so that they would protect exactly what needs to be
+protected.
+
+---
+[mandatory]
+
+BKL is also moved from around sb operations. BKL should have been shifted into
+individual fs sb_op functions. If you don't need it, remove it.
+
+---
+[informational]
+
+check for ->link() target not being a directory is done by callers. Feel
+free to drop it...
+
+---
+[informational]
+
+->link() callers hold ->i_mutex on the object we are linking to. Some of your
+problems might be over...
+
+---
+[mandatory]
+
+new file_system_type method - kill_sb(superblock). If you are converting
+an existing filesystem, set it according to ->fs_flags:
+ FS_REQUIRES_DEV - kill_block_super
+ FS_LITTER - kill_litter_super
+ neither - kill_anon_super
+FS_LITTER is gone - just remove it from fs_flags.
+
+---
+[mandatory]
+
+ FS_SINGLE is gone (actually, that had happened back when ->get_sb()
+went in - and hadn't been documented ;-/). Just remove it from fs_flags
+(and see ->get_sb() entry for other actions).
+
+---
+[mandatory]
+
+->setattr() is called without BKL now. Caller _always_ holds ->i_mutex, so
+watch for ->i_mutex-grabbing code that might be used by your ->setattr().
+Callers of notify_change() need ->i_mutex now.
+
+---
+[recommended]
+
+New super_block field "struct export_operations *s_export_op" for
+explicit support for exporting, e.g. via NFS. The structure is fully
+documented at its declaration in include/linux/fs.h, and in
+Documentation/filesystems/nfs/Exporting.
+
+Briefly it allows for the definition of decode_fh and encode_fh operations
+to encode and decode filehandles, and allows the filesystem to use
+a standard helper function for decode_fh, and provide file-system specific
+support for this helper, particularly get_parent.
+
+It is planned that this will be required for exporting once the code
+settles down a bit.
+
+[mandatory]
+
+s_export_op is now required for exporting a filesystem.
+isofs, ext2, ext3, resierfs, fat
+can be used as examples of very different filesystems.
+
+---
+[mandatory]
+
+iget4() and the read_inode2 callback have been superseded by iget5_locked()
+which has the following prototype,
+
+ struct inode *iget5_locked(struct super_block *sb, unsigned long ino,
+ int (*test)(struct inode *, void *),
+ int (*set)(struct inode *, void *),
+ void *data);
+
+'test' is an additional function that can be used when the inode
+number is not sufficient to identify the actual file object. 'set'
+should be a non-blocking function that initializes those parts of a
+newly created inode to allow the test function to succeed. 'data' is
+passed as an opaque value to both test and set functions.
+
+When the inode has been created by iget5_locked(), it will be returned with the
+I_NEW flag set and will still be locked. The filesystem then needs to finalize
+the initialization. Once the inode is initialized it must be unlocked by
+calling unlock_new_inode().
+
+The filesystem is responsible for setting (and possibly testing) i_ino
+when appropriate. There is also a simpler iget_locked function that
+just takes the superblock and inode number as arguments and does the
+test and set for you.
+
+e.g.
+ inode = iget_locked(sb, ino);
+ if (inode->i_state & I_NEW) {
+ err = read_inode_from_disk(inode);
+ if (err < 0) {
+ iget_failed(inode);
+ return err;
+ }
+ unlock_new_inode(inode);
+ }
+
+Note that if the process of setting up a new inode fails, then iget_failed()
+should be called on the inode to render it dead, and an appropriate error
+should be passed back to the caller.
+
+---
+[recommended]
+
+->getattr() finally getting used. See instances in nfs, minix, etc.
+
+---
+[mandatory]
+
+->revalidate() is gone. If your filesystem had it - provide ->getattr()
+and let it call whatever you had as ->revlidate() + (for symlinks that
+had ->revalidate()) add calls in ->follow_link()/->readlink().
+
+---
+[mandatory]
+
+->d_parent changes are not protected by BKL anymore. Read access is safe
+if at least one of the following is true:
+ * filesystem has no cross-directory rename()
+ * we know that parent had been locked (e.g. we are looking at
+->d_parent of ->lookup() argument).
+ * we are called from ->rename().
+ * the child's ->d_lock is held
+Audit your code and add locking if needed. Notice that any place that is
+not protected by the conditions above is risky even in the old tree - you
+had been relying on BKL and that's prone to screwups. Old tree had quite
+a few holes of that kind - unprotected access to ->d_parent leading to
+anything from oops to silent memory corruption.
+
+---
+[mandatory]
+
+ FS_NOMOUNT is gone. If you use it - just set MS_NOUSER in flags
+(see rootfs for one kind of solution and bdev/socket/pipe for another).
+
+---
+[recommended]
+
+ Use bdev_read_only(bdev) instead of is_read_only(kdev). The latter
+is still alive, but only because of the mess in drivers/s390/block/dasd.c.
+As soon as it gets fixed is_read_only() will die.
+
+---
+[mandatory]
+
+->permission() is called without BKL now. Grab it on entry, drop upon
+return - that will guarantee the same locking you used to have. If
+your method or its parts do not need BKL - better yet, now you can
+shift lock_kernel() and unlock_kernel() so that they would protect
+exactly what needs to be protected.
+
+---
+[mandatory]
+
+->statfs() is now called without BKL held. BKL should have been
+shifted into individual fs sb_op functions where it's not clear that
+it's safe to remove it. If you don't need it, remove it.
+
+---
+[mandatory]
+
+ is_read_only() is gone; use bdev_read_only() instead.
+
+---
+[mandatory]
+
+ destroy_buffers() is gone; use invalidate_bdev().
+
+---
+[mandatory]
+
+ fsync_dev() is gone; use fsync_bdev(). NOTE: lvm breakage is
+deliberate; as soon as struct block_device * is propagated in a reasonable
+way by that code fixing will become trivial; until then nothing can be
+done.
+
+[mandatory]
+
+ block truncatation on error exit from ->write_begin, and ->direct_IO
+moved from generic methods (block_write_begin, cont_write_begin,
+nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at
+ext2_write_failed and callers for an example.
+
+[mandatory]
+
+ ->truncate is gone. The whole truncate sequence needs to be
+implemented in ->setattr, which is now mandatory for filesystems
+implementing on-disk size changes. Start with a copy of the old inode_setattr
+and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to
+be in order of zeroing blocks using block_truncate_page or similar helpers,
+size update and on finally on-disk truncation which should not fail.
+inode_change_ok now includes the size checks for ATTR_SIZE and must be called
+in the beginning of ->setattr unconditionally.
+
+[mandatory]
+
+ ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should
+be used instead. It gets called whenever the inode is evicted, whether it has
+remaining links or not. Caller does *not* evict the pagecache or inode-associated
+metadata buffers; the method has to use truncate_inode_pages_final() to get rid
+of those. Caller makes sure async writeback cannot be running for the inode while
+(or after) ->evict_inode() is called.
+
+ ->drop_inode() returns int now; it's called on final iput() with
+inode->i_lock held and it returns true if filesystems wants the inode to be
+dropped. As before, generic_drop_inode() is still the default and it's been
+updated appropriately. generic_delete_inode() is also alive and it consists
+simply of return 1. Note that all actual eviction work is done by caller after
+->drop_inode() returns.
+
+ As before, clear_inode() must be called exactly once on each call of
+->evict_inode() (as it used to be for each call of ->delete_inode()). Unlike
+before, if you are using inode-associated metadata buffers (i.e.
+mark_buffer_dirty_inode()), it's your responsibility to call
+invalidate_inode_buffers() before clear_inode().
+
+ NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out
+if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput()
+may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly
+free the on-disk inode, you may end up doing that while ->write_inode() is writing
+to it.
+
+---
+[mandatory]
+
+ .d_delete() now only advises the dcache as to whether or not to cache
+unreferenced dentries, and is now only called when the dentry refcount goes to
+0. Even on 0 refcount transition, it must be able to tolerate being called 0,
+1, or more times (eg. constant, idempotent).
+
+---
+[mandatory]
+
+ .d_compare() calling convention and locking rules are significantly
+changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
+look at examples of other filesystems) for guidance.
+
+---
+[mandatory]
+
+ .d_hash() calling convention and locking rules are significantly
+changed. Read updated documentation in Documentation/filesystems/vfs.txt (and
+look at examples of other filesystems) for guidance.
+
+---
+[mandatory]
+ dcache_lock is gone, replaced by fine grained locks. See fs/dcache.c
+for details of what locks to replace dcache_lock with in order to protect
+particular things. Most of the time, a filesystem only needs ->d_lock, which
+protects *all* the dcache state of a given dentry.
+
+--
+[mandatory]
+
+ Filesystems must RCU-free their inodes, if they can have been accessed
+via rcu-walk path walk (basically, if the file can have had a path name in the
+vfs namespace).
+
+ Even though i_dentry and i_rcu share storage in a union, we will
+initialize the former in inode_init_always(), so just leave it alone in
+the callback. It used to be necessary to clean it there, but not anymore
+(starting at 3.2).
+
+--
+[recommended]
+ vfs now tries to do path walking in "rcu-walk mode", which avoids
+atomic operations and scalability hazards on dentries and inodes (see
+Documentation/filesystems/path-lookup.txt). d_hash and d_compare changes
+(above) are examples of the changes required to support this. For more complex
+filesystem callbacks, the vfs drops out of rcu-walk mode before the fs call, so
+no changes are required to the filesystem. However, this is costly and loses
+the benefits of rcu-walk mode. We will begin to add filesystem callbacks that
+are rcu-walk aware, shown below. Filesystems should take advantage of this
+where possible.
+
+--
+[mandatory]
+ d_revalidate is a callback that is made on every path element (if
+the filesystem provides it), which requires dropping out of rcu-walk mode. This
+may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
+returned if the filesystem cannot handle rcu-walk. See
+Documentation/filesystems/vfs.txt for more details.
+
+ permission and check_acl are inode permission checks that are called
+on many or all directory inodes on the way down a path walk (to check for
+exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
+See Documentation/filesystems/vfs.txt for more details.
+
+--
+[mandatory]
+ In ->fallocate() you must check the mode option passed in. If your
+filesystem does not support hole punching (deallocating space in the middle of a
+file) you must return -EOPNOTSUPP if FALLOC_FL_PUNCH_HOLE is set in mode.
+Currently you can only have FALLOC_FL_PUNCH_HOLE with FALLOC_FL_KEEP_SIZE set,
+so the i_size should not change when hole punching, even when puching the end of
+a file off.
+
+--
+[mandatory]
+ ->get_sb() is gone. Switch to use of ->mount(). Typically it's just
+a matter of switching from calling get_sb_... to mount_... and changing the
+function type. If you were doing it manually, just switch from setting ->mnt_root
+to some pointer to returning that pointer. On errors return ERR_PTR(...).
+
+--
+[mandatory]
+ ->permission() and generic_permission()have lost flags
+argument; instead of passing IPERM_FLAG_RCU we add MAY_NOT_BLOCK into mask.
+ generic_permission() has also lost the check_acl argument; ACL checking
+has been taken to VFS and filesystems need to provide a non-NULL ->i_op->get_acl
+to read an ACL from disk.
+
+--
+[mandatory]
+ If you implement your own ->llseek() you must handle SEEK_HOLE and
+SEEK_DATA. You can hanle this by returning -EINVAL, but it would be nicer to
+support it in some way. The generic handler assumes that the entire file is
+data and there is a virtual hole at the end of the file. So if the provided
+offset is less than i_size and SEEK_DATA is specified, return the same offset.
+If the above is true for the offset and you are given SEEK_HOLE, return the end
+of the file. If the offset is i_size or greater return -ENXIO in either case.
+
+[mandatory]
+ If you have your own ->fsync() you must make sure to call
+filemap_write_and_wait_range() so that all dirty pages are synced out properly.
+You must also keep in mind that ->fsync() is not called with i_mutex held
+anymore, so if you require i_mutex locking you must make sure to take it and
+release it yourself.
+
+--
+[mandatory]
+ d_alloc_root() is gone, along with a lot of bugs caused by code
+misusing it. Replacement: d_make_root(inode). The difference is,
+d_make_root() drops the reference to inode if dentry allocation fails.
+
+--
+[mandatory]
+ The witch is dead! Well, 2/3 of it, anyway. ->d_revalidate() and
+->lookup() do *not* take struct nameidata anymore; just the flags.
+--
+[mandatory]
+ ->create() doesn't take struct nameidata *; unlike the previous
+two, it gets "is it an O_EXCL or equivalent?" boolean argument. Note that
+local filesystems can ignore tha argument - they are guaranteed that the
+object doesn't exist. It's remote/distributed ones that might care...
+--
+[mandatory]
+ FS_REVAL_DOT is gone; if you used to have it, add ->d_weak_revalidate()
+in your dentry operations instead.
+--
+[mandatory]
+ vfs_readdir() is gone; switch to iterate_dir() instead
+--
+[mandatory]
+ ->readdir() is gone now; switch to ->iterate()
+[mandatory]
+ vfs_follow_link has been removed. Filesystems must use nd_set_link
+ from ->follow_link for normal symlinks, or nd_jump_link for magic
+ /proc/<pid> style links.
+--
+[mandatory]
+ iget5_locked()/ilookup5()/ilookup5_nowait() test() callback used to be
+ called with both ->i_lock and inode_hash_lock held; the former is *not*
+ taken anymore, so verify that your callbacks do not rely on it (none
+ of the in-tree instances did). inode_hash_lock is still held,
+ of course, so they are still serialized wrt removal from inode hash,
+ as well as wrt set() callback of iget5_locked().
+--
+[mandatory]
+ d_materialise_unique() is gone; d_splice_alias() does everything you
+ need now. Remember that they have opposite orders of arguments ;-/
+--
+[mandatory]
+ f_dentry is gone; use f_path.dentry, or, better yet, see if you can avoid
+ it entirely.
+--
+[mandatory]
+ never call ->read() and ->write() directly; use __vfs_{read,write} or
+ wrappers; instead of checking for ->write or ->read being NULL, look for
+ FMODE_CAN_{WRITE,READ} in file->f_mode.
+--
+[mandatory]
+ do _not_ use new_sync_{read,write} for ->read/->write; leave it NULL
+ instead.
+--
+[mandatory]
+ ->aio_read/->aio_write are gone. Use ->read_iter/->write_iter.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
new file mode 100644
index 000000000..c3b6b301d
--- /dev/null
+++ b/Documentation/filesystems/proc.txt
@@ -0,0 +1,1869 @@
+------------------------------------------------------------------------------
+ T H E /proc F I L E S Y S T E M
+------------------------------------------------------------------------------
+/proc/sys Terrehon Bowden <terrehon@pacbell.net> October 7 1999
+ Bodo Bauer <bb@ricochet.net>
+
+2.4.x update Jorge Nerin <comandante@zaralinux.com> November 14 2000
+move /proc/sys Shen Feng <shen@cn.fujitsu.com> April 1 2009
+------------------------------------------------------------------------------
+Version 1.3 Kernel version 2.2.12
+ Kernel version 2.4.0-test11-pre4
+------------------------------------------------------------------------------
+fixes/update part 1.1 Stefani Seibold <stefani@seibold.net> June 9 2009
+
+Table of Contents
+-----------------
+
+ 0 Preface
+ 0.1 Introduction/Credits
+ 0.2 Legal Stuff
+
+ 1 Collecting System Information
+ 1.1 Process-Specific Subdirectories
+ 1.2 Kernel data
+ 1.3 IDE devices in /proc/ide
+ 1.4 Networking info in /proc/net
+ 1.5 SCSI info
+ 1.6 Parallel port info in /proc/parport
+ 1.7 TTY info in /proc/tty
+ 1.8 Miscellaneous kernel statistics in /proc/stat
+ 1.9 Ext4 file system parameters
+
+ 2 Modifying System Parameters
+
+ 3 Per-Process Parameters
+ 3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj - Adjust the oom-killer
+ score
+ 3.2 /proc/<pid>/oom_score - Display current oom-killer score
+ 3.3 /proc/<pid>/io - Display the IO accounting fields
+ 3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
+ 3.5 /proc/<pid>/mountinfo - Information about mounts
+ 3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm
+ 3.7 /proc/<pid>/task/<tid>/children - Information about task children
+ 3.8 /proc/<pid>/fdinfo/<fd> - Information about opened file
+ 3.9 /proc/<pid>/map_files - Information about memory mapped files
+
+ 4 Configuring procfs
+ 4.1 Mount options
+
+------------------------------------------------------------------------------
+Preface
+------------------------------------------------------------------------------
+
+0.1 Introduction/Credits
+------------------------
+
+This documentation is part of a soon (or so we hope) to be released book on
+the SuSE Linux distribution. As there is no complete documentation for the
+/proc file system and we've used many freely available sources to write these
+chapters, it seems only fair to give the work back to the Linux community.
+This work is based on the 2.2.* kernel version and the upcoming 2.4.*. I'm
+afraid it's still far from complete, but we hope it will be useful. As far as
+we know, it is the first 'all-in-one' document about the /proc file system. It
+is focused on the Intel x86 hardware, so if you are looking for PPC, ARM,
+SPARC, AXP, etc., features, you probably won't find what you are looking for.
+It also only covers IPv4 networking, not IPv6 nor other protocols - sorry. But
+additions and patches are welcome and will be added to this document if you
+mail them to Bodo.
+
+We'd like to thank Alan Cox, Rik van Riel, and Alexey Kuznetsov and a lot of
+other people for help compiling this documentation. We'd also like to extend a
+special thank you to Andi Kleen for documentation, which we relied on heavily
+to create this document, as well as the additional information he provided.
+Thanks to everybody else who contributed source or docs to the Linux kernel
+and helped create a great piece of software... :)
+
+If you have any comments, corrections or additions, please don't hesitate to
+contact Bodo Bauer at bb@ricochet.net. We'll be happy to add them to this
+document.
+
+The latest version of this document is available online at
+http://tldp.org/LDP/Linux-Filesystem-Hierarchy/html/proc.html
+
+If the above direction does not works for you, you could try the kernel
+mailing list at linux-kernel@vger.kernel.org and/or try to reach me at
+comandante@zaralinux.com.
+
+0.2 Legal Stuff
+---------------
+
+We don't guarantee the correctness of this document, and if you come to us
+complaining about how you screwed up your system because of incorrect
+documentation, we won't feel responsible...
+
+------------------------------------------------------------------------------
+CHAPTER 1: COLLECTING SYSTEM INFORMATION
+------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------
+In This Chapter
+------------------------------------------------------------------------------
+* Investigating the properties of the pseudo file system /proc and its
+ ability to provide information on the running Linux system
+* Examining /proc's structure
+* Uncovering various information about the kernel and the processes running
+ on the system
+------------------------------------------------------------------------------
+
+
+The proc file system acts as an interface to internal data structures in the
+kernel. It can be used to obtain information about the system and to change
+certain kernel parameters at runtime (sysctl).
+
+First, we'll take a look at the read-only parts of /proc. In Chapter 2, we
+show you how you can use /proc/sys to change settings.
+
+1.1 Process-Specific Subdirectories
+-----------------------------------
+
+The directory /proc contains (among other things) one subdirectory for each
+process running on the system, which is named after the process ID (PID).
+
+The link self points to the process reading the file system. Each process
+subdirectory has the entries listed in Table 1-1.
+
+
+Table 1-1: Process specific entries in /proc
+..............................................................................
+ File Content
+ clear_refs Clears page referenced bits shown in smaps output
+ cmdline Command line arguments
+ cpu Current and last cpu in which it was executed (2.4)(smp)
+ cwd Link to the current working directory
+ environ Values of environment variables
+ exe Link to the executable of this process
+ fd Directory, which contains all file descriptors
+ maps Memory maps to executables and library files (2.4)
+ mem Memory held by this process
+ root Link to the root directory of this process
+ stat Process status
+ statm Process memory status information
+ status Process status in human readable form
+ wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan
+ pagemap Page table
+ stack Report full stack trace, enable via CONFIG_STACKTRACE
+ smaps a extension based on maps, showing the memory consumption of
+ each mapping and flags associated with it
+ numa_maps an extension based on maps, showing the memory locality and
+ binding policy as well as mem usage (in pages) of each mapping.
+..............................................................................
+
+For example, to get the status information of a process, all you have to do is
+read the file /proc/PID/status:
+
+ >cat /proc/self/status
+ Name: cat
+ State: R (running)
+ Tgid: 5452
+ Pid: 5452
+ PPid: 743
+ TracerPid: 0 (2.4)
+ Uid: 501 501 501 501
+ Gid: 100 100 100 100
+ FDSize: 256
+ Groups: 100 14 16
+ VmPeak: 5004 kB
+ VmSize: 5004 kB
+ VmLck: 0 kB
+ VmHWM: 476 kB
+ VmRSS: 476 kB
+ VmData: 156 kB
+ VmStk: 88 kB
+ VmExe: 68 kB
+ VmLib: 1412 kB
+ VmPTE: 20 kb
+ VmSwap: 0 kB
+ Threads: 1
+ SigQ: 0/28578
+ SigPnd: 0000000000000000
+ ShdPnd: 0000000000000000
+ SigBlk: 0000000000000000
+ SigIgn: 0000000000000000
+ SigCgt: 0000000000000000
+ CapInh: 00000000fffffeff
+ CapPrm: 0000000000000000
+ CapEff: 0000000000000000
+ CapBnd: ffffffffffffffff
+ Seccomp: 0
+ voluntary_ctxt_switches: 0
+ nonvoluntary_ctxt_switches: 1
+
+This shows you nearly the same information you would get if you viewed it with
+the ps command. In fact, ps uses the proc file system to obtain its
+information. But you get a more detailed view of the process by reading the
+file /proc/PID/status. It fields are described in table 1-2.
+
+The statm file contains more detailed information about the process
+memory usage. Its seven fields are explained in Table 1-3. The stat file
+contains details information about the process itself. Its fields are
+explained in Table 1-4.
+
+(for SMP CONFIG users)
+For making accounting scalable, RSS related information are handled in an
+asynchronous manner and the value may not be very precise. To see a precise
+snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
+It's slow but very precise.
+
+Table 1-2: Contents of the status files (as of 3.20.0)
+..............................................................................
+ Field Content
+ Name filename of the executable
+ State state (R is running, S is sleeping, D is sleeping
+ in an uninterruptible wait, Z is zombie,
+ T is traced or stopped)
+ Tgid thread group ID
+ Ngid NUMA group ID (0 if none)
+ Pid process id
+ PPid process id of the parent process
+ TracerPid PID of process tracing this process (0 if not)
+ Uid Real, effective, saved set, and file system UIDs
+ Gid Real, effective, saved set, and file system GIDs
+ FDSize number of file descriptor slots currently allocated
+ Groups supplementary group list
+ NStgid descendant namespace thread group ID hierarchy
+ NSpid descendant namespace process ID hierarchy
+ NSpgid descendant namespace process group ID hierarchy
+ NSsid descendant namespace session ID hierarchy
+ VmPeak peak virtual memory size
+ VmSize total program size
+ VmLck locked memory size
+ VmHWM peak resident set size ("high water mark")
+ VmRSS size of memory portions
+ VmData size of data, stack, and text segments
+ VmStk size of data, stack, and text segments
+ VmExe size of text segment
+ VmLib size of shared library code
+ VmPTE size of page table entries
+ VmSwap size of swap usage (the number of referred swapents)
+ Threads number of threads
+ SigQ number of signals queued/max. number for queue
+ SigPnd bitmap of pending signals for the thread
+ ShdPnd bitmap of shared pending signals for the process
+ SigBlk bitmap of blocked signals
+ SigIgn bitmap of ignored signals
+ SigCgt bitmap of caught signals
+ CapInh bitmap of inheritable capabilities
+ CapPrm bitmap of permitted capabilities
+ CapEff bitmap of effective capabilities
+ CapBnd bitmap of capabilities bounding set
+ Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...)
+ Cpus_allowed mask of CPUs on which this process may run
+ Cpus_allowed_list Same as previous, but in "list format"
+ Mems_allowed mask of memory nodes allowed to this process
+ Mems_allowed_list Same as previous, but in "list format"
+ voluntary_ctxt_switches number of voluntary context switches
+ nonvoluntary_ctxt_switches number of non voluntary context switches
+..............................................................................
+
+Table 1-3: Contents of the statm files (as of 2.6.8-rc3)
+..............................................................................
+ Field Content
+ size total program size (pages) (same as VmSize in status)
+ resident size of memory portions (pages) (same as VmRSS in status)
+ shared number of pages that are shared (i.e. backed by a file)
+ trs number of pages that are 'code' (not including libs; broken,
+ includes data segment)
+ lrs number of pages of library (always 0 on 2.6)
+ drs number of pages of data/stack (including libs; broken,
+ includes library text)
+ dt number of dirty pages (always 0 on 2.6)
+..............................................................................
+
+
+Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
+..............................................................................
+ Field Content
+ pid process id
+ tcomm filename of the executable
+ state state (R is running, S is sleeping, D is sleeping in an
+ uninterruptible wait, Z is zombie, T is traced or stopped)
+ ppid process id of the parent process
+ pgrp pgrp of the process
+ sid session id
+ tty_nr tty the process uses
+ tty_pgrp pgrp of the tty
+ flags task flags
+ min_flt number of minor faults
+ cmin_flt number of minor faults with child's
+ maj_flt number of major faults
+ cmaj_flt number of major faults with child's
+ utime user mode jiffies
+ stime kernel mode jiffies
+ cutime user mode jiffies with child's
+ cstime kernel mode jiffies with child's
+ priority priority level
+ nice nice level
+ num_threads number of threads
+ it_real_value (obsolete, always 0)
+ start_time time the process started after system boot
+ vsize virtual memory size
+ rss resident set memory size
+ rsslim current limit in bytes on the rss
+ start_code address above which program text can run
+ end_code address below which program text can run
+ start_stack address of the start of the main process stack
+ esp current value of ESP
+ eip current value of EIP
+ pending bitmap of pending signals
+ blocked bitmap of blocked signals
+ sigign bitmap of ignored signals
+ sigcatch bitmap of caught signals
+ wchan address where process went to sleep
+ 0 (place holder)
+ 0 (place holder)
+ exit_signal signal to send to parent thread on exit
+ task_cpu which CPU the task is scheduled on
+ rt_priority realtime priority
+ policy scheduling policy (man sched_setscheduler)
+ blkio_ticks time spent waiting for block IO
+ gtime guest time of the task in jiffies
+ cgtime guest time of the task children in jiffies
+ start_data address above which program data+bss is placed
+ end_data address below which program data+bss is placed
+ start_brk address above which program heap can be expanded with brk()
+ arg_start address above which program command line is placed
+ arg_end address below which program command line is placed
+ env_start address above which program environment is placed
+ env_end address below which program environment is placed
+ exit_code the thread's exit_code in the form reported by the waitpid system call
+..............................................................................
+
+The /proc/PID/maps file containing the currently mapped memory regions and
+their access permissions.
+
+The format is:
+
+address perms offset dev inode pathname
+
+08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
+08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
+0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
+a7cb1000-a7cb2000 ---p 00000000 00:00 0
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+a7eb2000-a7eb3000 ---p 00000000 00:00 0
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack:1001]
+a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
+a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
+a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
+a800b000-a800e000 rw-p 00000000 00:00 0
+a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
+a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
+a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
+a8024000-a8027000 rw-p 00000000 00:00 0
+a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
+a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
+a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
+aff35000-aff4a000 rw-p 00000000 00:00 0 [stack]
+ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
+
+where "address" is the address space in the process that it occupies, "perms"
+is a set of permissions:
+
+ r = read
+ w = write
+ x = execute
+ s = shared
+ p = private (copy on write)
+
+"offset" is the offset into the mapping, "dev" is the device (major:minor), and
+"inode" is the inode on that device. 0 indicates that no inode is associated
+with the memory region, as the case would be with BSS (uninitialized data).
+The "pathname" shows the name associated file for this mapping. If the mapping
+is not associated with a file:
+
+ [heap] = the heap of the program
+ [stack] = the stack of the main process
+ [stack:1001] = the stack of the thread with tid 1001
+ [vdso] = the "virtual dynamic shared object",
+ the kernel system call handler
+
+ or if empty, the mapping is anonymous.
+
+The /proc/PID/task/TID/maps is a view of the virtual memory from the viewpoint
+of the individual tasks of a process. In this file you will see a mapping marked
+as [stack] if that task sees it as a stack. This is a key difference from the
+content of /proc/PID/maps, where you will see all mappings that are being used
+as stack by all of those tasks. Hence, for the example above, the task-level
+map, i.e. /proc/PID/task/TID/maps for thread 1001 will look like this:
+
+08048000-08049000 r-xp 00000000 03:00 8312 /opt/test
+08049000-0804a000 rw-p 00001000 03:00 8312 /opt/test
+0804a000-0806b000 rw-p 00000000 00:00 0 [heap]
+a7cb1000-a7cb2000 ---p 00000000 00:00 0
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0
+a7eb2000-a7eb3000 ---p 00000000 00:00 0
+a7eb3000-a7ed5000 rw-p 00000000 00:00 0 [stack]
+a7ed5000-a8008000 r-xp 00000000 03:00 4222 /lib/libc.so.6
+a8008000-a800a000 r--p 00133000 03:00 4222 /lib/libc.so.6
+a800a000-a800b000 rw-p 00135000 03:00 4222 /lib/libc.so.6
+a800b000-a800e000 rw-p 00000000 00:00 0
+a800e000-a8022000 r-xp 00000000 03:00 14462 /lib/libpthread.so.0
+a8022000-a8023000 r--p 00013000 03:00 14462 /lib/libpthread.so.0
+a8023000-a8024000 rw-p 00014000 03:00 14462 /lib/libpthread.so.0
+a8024000-a8027000 rw-p 00000000 00:00 0
+a8027000-a8043000 r-xp 00000000 03:00 8317 /lib/ld-linux.so.2
+a8043000-a8044000 r--p 0001b000 03:00 8317 /lib/ld-linux.so.2
+a8044000-a8045000 rw-p 0001c000 03:00 8317 /lib/ld-linux.so.2
+aff35000-aff4a000 rw-p 00000000 00:00 0
+ffffe000-fffff000 r-xp 00000000 00:00 0 [vdso]
+
+The /proc/PID/smaps is an extension based on maps, showing the memory
+consumption for each of the process's mappings. For each of mappings there
+is a series of lines such as the following:
+
+08048000-080bc000 r-xp 00000000 03:02 13130 /bin/bash
+Size: 1084 kB
+Rss: 892 kB
+Pss: 374 kB
+Shared_Clean: 892 kB
+Shared_Dirty: 0 kB
+Private_Clean: 0 kB
+Private_Dirty: 0 kB
+Referenced: 892 kB
+Anonymous: 0 kB
+Swap: 0 kB
+KernelPageSize: 4 kB
+MMUPageSize: 4 kB
+Locked: 374 kB
+VmFlags: rd ex mr mw me de
+
+the first of these lines shows the same information as is displayed for the
+mapping in /proc/PID/maps. The remaining lines show the size of the mapping
+(size), the amount of the mapping that is currently resident in RAM (RSS), the
+process' proportional share of this mapping (PSS), the number of clean and
+dirty private pages in the mapping. Note that even a page which is part of a
+MAP_SHARED mapping, but has only a single pte mapped, i.e. is currently used
+by only one process, is accounted as private and not as shared. "Referenced"
+indicates the amount of memory currently marked as referenced or accessed.
+"Anonymous" shows the amount of memory that does not belong to any file. Even
+a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
+and a page is modified, the file page is replaced by a private anonymous copy.
+"Swap" shows how much would-be-anonymous memory is also used, but out on
+swap.
+
+"VmFlags" field deserves a separate description. This member represents the kernel
+flags associated with the particular virtual memory area in two letter encoded
+manner. The codes are the following:
+ rd - readable
+ wr - writeable
+ ex - executable
+ sh - shared
+ mr - may read
+ mw - may write
+ me - may execute
+ ms - may share
+ gd - stack segment growns down
+ pf - pure PFN range
+ dw - disabled write to the mapped file
+ lo - pages are locked in memory
+ io - memory mapped I/O area
+ sr - sequential read advise provided
+ rr - random read advise provided
+ dc - do not copy area on fork
+ de - do not expand area on remapping
+ ac - area is accountable
+ nr - swap space is not reserved for the area
+ ht - area uses huge tlb pages
+ nl - non-linear mapping
+ ar - architecture specific flag
+ dd - do not include area into core dump
+ sd - soft-dirty flag
+ mm - mixed map area
+ hg - huge page advise flag
+ nh - no-huge page advise flag
+ mg - mergable advise flag
+
+Note that there is no guarantee that every flag and associated mnemonic will
+be present in all further kernel releases. Things get changed, the flags may
+be vanished or the reverse -- new added.
+
+This file is only present if the CONFIG_MMU kernel configuration option is
+enabled.
+
+The /proc/PID/clear_refs is used to reset the PG_Referenced and ACCESSED/YOUNG
+bits on both physical and virtual pages associated with a process, and the
+soft-dirty bit on pte (see Documentation/vm/soft-dirty.txt for details).
+To clear the bits for all the pages associated with the process
+ > echo 1 > /proc/PID/clear_refs
+
+To clear the bits for the anonymous pages associated with the process
+ > echo 2 > /proc/PID/clear_refs
+
+To clear the bits for the file mapped pages associated with the process
+ > echo 3 > /proc/PID/clear_refs
+
+To clear the soft-dirty bit
+ > echo 4 > /proc/PID/clear_refs
+
+To reset the peak resident set size ("high water mark") to the process's
+current value:
+ > echo 5 > /proc/PID/clear_refs
+
+Any other value written to /proc/PID/clear_refs will have no effect.
+
+The /proc/pid/pagemap gives the PFN, which can be used to find the pageflags
+using /proc/kpageflags and number of times a page is mapped using
+/proc/kpagecount. For detailed explanation, see Documentation/vm/pagemap.txt.
+
+The /proc/pid/numa_maps is an extension based on maps, showing the memory
+locality and binding policy, as well as the memory usage (in pages) of
+each mapping. The output follows a general format where mapping details get
+summarized separated by blank spaces, one mapping per each file line:
+
+address policy mapping details
+
+00400000 default file=/usr/local/bin/app mapped=1 active=0 N3=1 kernelpagesize_kB=4
+00600000 default file=/usr/local/bin/app anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+3206000000 default file=/lib64/ld-2.12.so mapped=26 mapmax=6 N0=24 N3=2 kernelpagesize_kB=4
+320621f000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+3206220000 default file=/lib64/ld-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+3206221000 default anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+3206800000 default file=/lib64/libc-2.12.so mapped=59 mapmax=21 active=55 N0=41 N3=18 kernelpagesize_kB=4
+320698b000 default file=/lib64/libc-2.12.so
+3206b8a000 default file=/lib64/libc-2.12.so anon=2 dirty=2 N3=2 kernelpagesize_kB=4
+3206b8e000 default file=/lib64/libc-2.12.so anon=1 dirty=1 N3=1 kernelpagesize_kB=4
+3206b8f000 default anon=3 dirty=3 active=1 N3=3 kernelpagesize_kB=4
+7f4dc10a2000 default anon=3 dirty=3 N3=3 kernelpagesize_kB=4
+7f4dc10b4000 default anon=2 dirty=2 active=1 N3=2 kernelpagesize_kB=4
+7f4dc1200000 default file=/anon_hugepage\040(deleted) huge anon=1 dirty=1 N3=1 kernelpagesize_kB=2048
+7fff335f0000 default stack anon=3 dirty=3 N3=3 kernelpagesize_kB=4
+7fff3369d000 default mapped=1 mapmax=35 active=0 N3=1 kernelpagesize_kB=4
+
+Where:
+"address" is the starting address for the mapping;
+"policy" reports the NUMA memory policy set for the mapping (see vm/numa_memory_policy.txt);
+"mapping details" summarizes mapping data such as mapping type, page usage counters,
+node locality page counters (N0 == node0, N1 == node1, ...) and the kernel page
+size, in KB, that is backing the mapping up.
+
+1.2 Kernel data
+---------------
+
+Similar to the process entries, the kernel data files give information about
+the running kernel. The files used to obtain this information are contained in
+/proc and are listed in Table 1-5. Not all of these will be present in your
+system. It depends on the kernel configuration and the loaded modules, which
+files are there, and which are missing.
+
+Table 1-5: Kernel info in /proc
+..............................................................................
+ File Content
+ apm Advanced power management info
+ buddyinfo Kernel memory allocator information (see text) (2.5)
+ bus Directory containing bus specific information
+ cmdline Kernel command line
+ cpuinfo Info about the CPU
+ devices Available devices (block and character)
+ dma Used DMS channels
+ filesystems Supported filesystems
+ driver Various drivers grouped here, currently rtc (2.4)
+ execdomains Execdomains, related to security (2.4)
+ fb Frame Buffer devices (2.4)
+ fs File system parameters, currently nfs/exports (2.4)
+ ide Directory containing info about the IDE subsystem
+ interrupts Interrupt usage
+ iomem Memory map (2.4)
+ ioports I/O port usage
+ irq Masks for irq to cpu affinity (2.4)(smp?)
+ isapnp ISA PnP (Plug&Play) Info (2.4)
+ kcore Kernel core image (can be ELF or A.OUT(deprecated in 2.4))
+ kmsg Kernel messages
+ ksyms Kernel symbol table
+ loadavg Load average of last 1, 5 & 15 minutes
+ locks Kernel locks
+ meminfo Memory info
+ misc Miscellaneous
+ modules List of loaded modules
+ mounts Mounted filesystems
+ net Networking info (see text)
+ pagetypeinfo Additional page allocator information (see text) (2.5)
+ partitions Table of partitions known to the system
+ pci Deprecated info of PCI bus (new way -> /proc/bus/pci/,
+ decoupled by lspci (2.4)
+ rtc Real time clock
+ scsi SCSI info (see text)
+ slabinfo Slab pool info
+ softirqs softirq usage
+ stat Overall statistics
+ swaps Swap space utilization
+ sys See chapter 2
+ sysvipc Info of SysVIPC Resources (msg, sem, shm) (2.4)
+ tty Info of tty drivers
+ uptime Wall clock since boot, combined idle time of all cpus
+ version Kernel version
+ video bttv info of video resources (2.4)
+ vmallocinfo Show vmalloced areas
+..............................................................................
+
+You can, for example, check which interrupts are currently in use and what
+they are used for by looking in the file /proc/interrupts:
+
+ > cat /proc/interrupts
+ CPU0
+ 0: 8728810 XT-PIC timer
+ 1: 895 XT-PIC keyboard
+ 2: 0 XT-PIC cascade
+ 3: 531695 XT-PIC aha152x
+ 4: 2014133 XT-PIC serial
+ 5: 44401 XT-PIC pcnet_cs
+ 8: 2 XT-PIC rtc
+ 11: 8 XT-PIC i82365
+ 12: 182918 XT-PIC PS/2 Mouse
+ 13: 1 XT-PIC fpu
+ 14: 1232265 XT-PIC ide0
+ 15: 7 XT-PIC ide1
+ NMI: 0
+
+In 2.4.* a couple of lines where added to this file LOC & ERR (this time is the
+output of a SMP machine):
+
+ > cat /proc/interrupts
+
+ CPU0 CPU1
+ 0: 1243498 1214548 IO-APIC-edge timer
+ 1: 8949 8958 IO-APIC-edge keyboard
+ 2: 0 0 XT-PIC cascade
+ 5: 11286 10161 IO-APIC-edge soundblaster
+ 8: 1 0 IO-APIC-edge rtc
+ 9: 27422 27407 IO-APIC-edge 3c503
+ 12: 113645 113873 IO-APIC-edge PS/2 Mouse
+ 13: 0 0 XT-PIC fpu
+ 14: 22491 24012 IO-APIC-edge ide0
+ 15: 2183 2415 IO-APIC-edge ide1
+ 17: 30564 30414 IO-APIC-level eth0
+ 18: 177 164 IO-APIC-level bttv
+ NMI: 2457961 2457959
+ LOC: 2457882 2457881
+ ERR: 2155
+
+NMI is incremented in this case because every timer interrupt generates a NMI
+(Non Maskable Interrupt) which is used by the NMI Watchdog to detect lockups.
+
+LOC is the local interrupt counter of the internal APIC of every CPU.
+
+ERR is incremented in the case of errors in the IO-APIC bus (the bus that
+connects the CPUs in a SMP system. This means that an error has been detected,
+the IO-APIC automatically retry the transmission, so it should not be a big
+problem, but you should read the SMP-FAQ.
+
+In 2.6.2* /proc/interrupts was expanded again. This time the goal was for
+/proc/interrupts to display every IRQ vector in use by the system, not
+just those considered 'most important'. The new vectors are:
+
+ THR -- interrupt raised when a machine check threshold counter
+ (typically counting ECC corrected errors of memory or cache) exceeds
+ a configurable threshold. Only available on some systems.
+
+ TRM -- a thermal event interrupt occurs when a temperature threshold
+ has been exceeded for the CPU. This interrupt may also be generated
+ when the temperature drops back to normal.
+
+ SPU -- a spurious interrupt is some interrupt that was raised then lowered
+ by some IO device before it could be fully processed by the APIC. Hence
+ the APIC sees the interrupt but does not know what device it came from.
+ For this case the APIC will generate the interrupt with a IRQ vector
+ of 0xff. This might also be generated by chipset bugs.
+
+ RES, CAL, TLB -- rescheduling, call and TLB flush interrupts are
+ sent from one CPU to another per the needs of the OS. Typically,
+ their statistics are used by kernel developers and interested users to
+ determine the occurrence of interrupts of the given type.
+
+The above IRQ vectors are displayed only when relevant. For example,
+the threshold vector does not exist on x86_64 platforms. Others are
+suppressed when the system is a uniprocessor. As of this writing, only
+i386 and x86_64 platforms support the new IRQ vector displays.
+
+Of some interest is the introduction of the /proc/irq directory to 2.4.
+It could be used to set IRQ to CPU affinity, this means that you can "hook" an
+IRQ to only one CPU, or to exclude a CPU of handling IRQs. The contents of the
+irq subdir is one subdir for each IRQ, and two files; default_smp_affinity and
+prof_cpu_mask.
+
+For example
+ > ls /proc/irq/
+ 0 10 12 14 16 18 2 4 6 8 prof_cpu_mask
+ 1 11 13 15 17 19 3 5 7 9 default_smp_affinity
+ > ls /proc/irq/0/
+ smp_affinity
+
+smp_affinity is a bitmask, in which you can specify which CPUs can handle the
+IRQ, you can set it by doing:
+
+ > echo 1 > /proc/irq/10/smp_affinity
+
+This means that only the first CPU will handle the IRQ, but you can also echo
+5 which means that only the first and fourth CPU can handle the IRQ.
+
+The contents of each smp_affinity file is the same by default:
+
+ > cat /proc/irq/0/smp_affinity
+ ffffffff
+
+There is an alternate interface, smp_affinity_list which allows specifying
+a cpu range instead of a bitmask:
+
+ > cat /proc/irq/0/smp_affinity_list
+ 1024-1031
+
+The default_smp_affinity mask applies to all non-active IRQs, which are the
+IRQs which have not yet been allocated/activated, and hence which lack a
+/proc/irq/[0-9]* directory.
+
+The node file on an SMP system shows the node to which the device using the IRQ
+reports itself as being attached. This hardware locality information does not
+include information about any possible driver locality preference.
+
+prof_cpu_mask specifies which CPUs are to be profiled by the system wide
+profiler. Default value is ffffffff (all cpus if there are only 32 of them).
+
+The way IRQs are routed is handled by the IO-APIC, and it's Round Robin
+between all the CPUs which are allowed to handle it. As usual the kernel has
+more info than you and does a better job than you, so the defaults are the
+best choice for almost everyone. [Note this applies only to those IO-APIC's
+that support "Round Robin" interrupt distribution.]
+
+There are three more important subdirectories in /proc: net, scsi, and sys.
+The general rule is that the contents, or even the existence of these
+directories, depend on your kernel configuration. If SCSI is not enabled, the
+directory scsi may not exist. The same is true with the net, which is there
+only when networking support is present in the running kernel.
+
+The slabinfo file gives information about memory usage at the slab level.
+Linux uses slab pools for memory management above page level in version 2.2.
+Commonly used objects have their own slab pool (such as network buffers,
+directory cache, and so on).
+
+..............................................................................
+
+> cat /proc/buddyinfo
+
+Node 0, zone DMA 0 4 5 4 4 3 ...
+Node 0, zone Normal 1 0 0 1 101 8 ...
+Node 0, zone HighMem 2 0 0 1 1 0 ...
+
+External fragmentation is a problem under some workloads, and buddyinfo is a
+useful tool for helping diagnose these problems. Buddyinfo will give you a
+clue as to how big an area you can safely allocate, or why a previous
+allocation failed.
+
+Each column represents the number of pages of a certain order which are
+available. In this case, there are 0 chunks of 2^0*PAGE_SIZE available in
+ZONE_DMA, 4 chunks of 2^1*PAGE_SIZE in ZONE_DMA, 101 chunks of 2^4*PAGE_SIZE
+available in ZONE_NORMAL, etc...
+
+More information relevant to external fragmentation can be found in
+pagetypeinfo.
+
+> cat /proc/pagetypeinfo
+Page block order: 9
+Pages per block: 512
+
+Free pages count per migrate type at order 0 1 2 3 4 5 6 7 8 9 10
+Node 0, zone DMA, type Unmovable 0 0 0 1 1 1 1 1 1 1 0
+Node 0, zone DMA, type Reclaimable 0 0 0 0 0 0 0 0 0 0 0
+Node 0, zone DMA, type Movable 1 1 2 1 2 1 1 0 1 0 2
+Node 0, zone DMA, type Reserve 0 0 0 0 0 0 0 0 0 1 0
+Node 0, zone DMA, type Isolate 0 0 0 0 0 0 0 0 0 0 0
+Node 0, zone DMA32, type Unmovable 103 54 77 1 1 1 11 8 7 1 9
+Node 0, zone DMA32, type Reclaimable 0 0 2 1 0 0 0 0 1 0 0
+Node 0, zone DMA32, type Movable 169 152 113 91 77 54 39 13 6 1 452
+Node 0, zone DMA32, type Reserve 1 2 2 2 2 0 1 1 1 1 0
+Node 0, zone DMA32, type Isolate 0 0 0 0 0 0 0 0 0 0 0
+
+Number of blocks type Unmovable Reclaimable Movable Reserve Isolate
+Node 0, zone DMA 2 0 5 1 0
+Node 0, zone DMA32 41 6 967 2 0
+
+Fragmentation avoidance in the kernel works by grouping pages of different
+migrate types into the same contiguous regions of memory called page blocks.
+A page block is typically the size of the default hugepage size e.g. 2MB on
+X86-64. By keeping pages grouped based on their ability to move, the kernel
+can reclaim pages within a page block to satisfy a high-order allocation.
+
+The pagetypinfo begins with information on the size of a page block. It
+then gives the same type of information as buddyinfo except broken down
+by migrate-type and finishes with details on how many page blocks of each
+type exist.
+
+If min_free_kbytes has been tuned correctly (recommendations made by hugeadm
+from libhugetlbfs http://sourceforge.net/projects/libhugetlbfs/), one can
+make an estimate of the likely number of huge pages that can be allocated
+at a given point in time. All the "Movable" blocks should be allocatable
+unless memory has been mlock()'d. Some of the Reclaimable blocks should
+also be allocatable although a lot of filesystem metadata may have to be
+reclaimed to achieve this.
+
+..............................................................................
+
+meminfo:
+
+Provides information about distribution and utilization of memory. This
+varies by architecture and compile options. The following is from a
+16GB PIII, which has highmem enabled. You may not have all of these fields.
+
+> cat /proc/meminfo
+
+The "Locked" indicates whether the mapping is locked in memory or not.
+
+
+MemTotal: 16344972 kB
+MemFree: 13634064 kB
+MemAvailable: 14836172 kB
+Buffers: 3656 kB
+Cached: 1195708 kB
+SwapCached: 0 kB
+Active: 891636 kB
+Inactive: 1077224 kB
+HighTotal: 15597528 kB
+HighFree: 13629632 kB
+LowTotal: 747444 kB
+LowFree: 4432 kB
+SwapTotal: 0 kB
+SwapFree: 0 kB
+Dirty: 968 kB
+Writeback: 0 kB
+AnonPages: 861800 kB
+Mapped: 280372 kB
+Slab: 284364 kB
+SReclaimable: 159856 kB
+SUnreclaim: 124508 kB
+PageTables: 24448 kB
+NFS_Unstable: 0 kB
+Bounce: 0 kB
+WritebackTmp: 0 kB
+CommitLimit: 7669796 kB
+Committed_AS: 100056 kB
+VmallocTotal: 112216 kB
+VmallocUsed: 428 kB
+VmallocChunk: 111088 kB
+AnonHugePages: 49152 kB
+
+ MemTotal: Total usable ram (i.e. physical ram minus a few reserved
+ bits and the kernel binary code)
+ MemFree: The sum of LowFree+HighFree
+MemAvailable: An estimate of how much memory is available for starting new
+ applications, without swapping. Calculated from MemFree,
+ SReclaimable, the size of the file LRU lists, and the low
+ watermarks in each zone.
+ The estimate takes into account that the system needs some
+ page cache to function well, and that not all reclaimable
+ slab will be reclaimable, due to items being in use. The
+ impact of those factors will vary from system to system.
+ Buffers: Relatively temporary storage for raw disk blocks
+ shouldn't get tremendously large (20MB or so)
+ Cached: in-memory cache for files read from the disk (the
+ pagecache). Doesn't include SwapCached
+ SwapCached: Memory that once was swapped out, is swapped back in but
+ still also is in the swapfile (if memory is needed it
+ doesn't need to be swapped out AGAIN because it is already
+ in the swapfile. This saves I/O)
+ Active: Memory that has been used more recently and usually not
+ reclaimed unless absolutely necessary.
+ Inactive: Memory which has been less recently used. It is more
+ eligible to be reclaimed for other purposes
+ HighTotal:
+ HighFree: Highmem is all memory above ~860MB of physical memory
+ Highmem areas are for use by userspace programs, or
+ for the pagecache. The kernel must use tricks to access
+ this memory, making it slower to access than lowmem.
+ LowTotal:
+ LowFree: Lowmem is memory which can be used for everything that
+ highmem can be used for, but it is also available for the
+ kernel's use for its own data structures. Among many
+ other things, it is where everything from the Slab is
+ allocated. Bad things happen when you're out of lowmem.
+ SwapTotal: total amount of swap space available
+ SwapFree: Memory which has been evicted from RAM, and is temporarily
+ on the disk
+ Dirty: Memory which is waiting to get written back to the disk
+ Writeback: Memory which is actively being written back to the disk
+ AnonPages: Non-file backed pages mapped into userspace page tables
+AnonHugePages: Non-file backed huge pages mapped into userspace page tables
+ Mapped: files which have been mmaped, such as libraries
+ Slab: in-kernel data structures cache
+SReclaimable: Part of Slab, that might be reclaimed, such as caches
+ SUnreclaim: Part of Slab, that cannot be reclaimed on memory pressure
+ PageTables: amount of memory dedicated to the lowest level of page
+ tables.
+NFS_Unstable: NFS pages sent to the server, but not yet committed to stable
+ storage
+ Bounce: Memory used for block device "bounce buffers"
+WritebackTmp: Memory used by FUSE for temporary writeback buffers
+ CommitLimit: Based on the overcommit ratio ('vm.overcommit_ratio'),
+ this is the total amount of memory currently available to
+ be allocated on the system. This limit is only adhered to
+ if strict overcommit accounting is enabled (mode 2 in
+ 'vm.overcommit_memory').
+ The CommitLimit is calculated with the following formula:
+ CommitLimit = ([total RAM pages] - [total huge TLB pages]) *
+ overcommit_ratio / 100 + [total swap pages]
+ For example, on a system with 1G of physical RAM and 7G
+ of swap with a `vm.overcommit_ratio` of 30 it would
+ yield a CommitLimit of 7.3G.
+ For more details, see the memory overcommit documentation
+ in vm/overcommit-accounting.
+Committed_AS: The amount of memory presently allocated on the system.
+ The committed memory is a sum of all of the memory which
+ has been allocated by processes, even if it has not been
+ "used" by them as of yet. A process which malloc()'s 1G
+ of memory, but only touches 300M of it will show up as
+ using 1G. This 1G is memory which has been "committed" to
+ by the VM and can be used at any time by the allocating
+ application. With strict overcommit enabled on the system
+ (mode 2 in 'vm.overcommit_memory'),allocations which would
+ exceed the CommitLimit (detailed above) will not be permitted.
+ This is useful if one needs to guarantee that processes will
+ not fail due to lack of memory once that memory has been
+ successfully allocated.
+VmallocTotal: total size of vmalloc memory area
+ VmallocUsed: amount of vmalloc area which is used
+VmallocChunk: largest contiguous block of vmalloc area which is free
+
+..............................................................................
+
+vmallocinfo:
+
+Provides information about vmalloced/vmaped areas. One line per area,
+containing the virtual address range of the area, size in bytes,
+caller information of the creator, and optional information depending
+on the kind of area :
+
+ pages=nr number of pages
+ phys=addr if a physical address was specified
+ ioremap I/O mapping (ioremap() and friends)
+ vmalloc vmalloc() area
+ vmap vmap()ed pages
+ user VM_USERMAP area
+ vpages buffer for pages pointers was vmalloced (huge area)
+ N<node>=nr (Only on NUMA kernels)
+ Number of pages allocated on memory node <node>
+
+> cat /proc/vmallocinfo
+0xffffc20000000000-0xffffc20000201000 2101248 alloc_large_system_hash+0x204 ...
+ /0x2c0 pages=512 vmalloc N0=128 N1=128 N2=128 N3=128
+0xffffc20000201000-0xffffc20000302000 1052672 alloc_large_system_hash+0x204 ...
+ /0x2c0 pages=256 vmalloc N0=64 N1=64 N2=64 N3=64
+0xffffc20000302000-0xffffc20000304000 8192 acpi_tb_verify_table+0x21/0x4f...
+ phys=7fee8000 ioremap
+0xffffc20000304000-0xffffc20000307000 12288 acpi_tb_verify_table+0x21/0x4f...
+ phys=7fee7000 ioremap
+0xffffc2000031d000-0xffffc2000031f000 8192 init_vdso_vars+0x112/0x210
+0xffffc2000031f000-0xffffc2000032b000 49152 cramfs_uncompress_init+0x2e ...
+ /0x80 pages=11 vmalloc N0=3 N1=3 N2=2 N3=3
+0xffffc2000033a000-0xffffc2000033d000 12288 sys_swapon+0x640/0xac0 ...
+ pages=2 vmalloc N1=2
+0xffffc20000347000-0xffffc2000034c000 20480 xt_alloc_table_info+0xfe ...
+ /0x130 [x_tables] pages=4 vmalloc N0=4
+0xffffffffa0000000-0xffffffffa000f000 61440 sys_init_module+0xc27/0x1d00 ...
+ pages=14 vmalloc N2=14
+0xffffffffa000f000-0xffffffffa0014000 20480 sys_init_module+0xc27/0x1d00 ...
+ pages=4 vmalloc N1=4
+0xffffffffa0014000-0xffffffffa0017000 12288 sys_init_module+0xc27/0x1d00 ...
+ pages=2 vmalloc N1=2
+0xffffffffa0017000-0xffffffffa0022000 45056 sys_init_module+0xc27/0x1d00 ...
+ pages=10 vmalloc N0=10
+
+..............................................................................
+
+softirqs:
+
+Provides counts of softirq handlers serviced since boot time, for each cpu.
+
+> cat /proc/softirqs
+ CPU0 CPU1 CPU2 CPU3
+ HI: 0 0 0 0
+ TIMER: 27166 27120 27097 27034
+ NET_TX: 0 0 0 17
+ NET_RX: 42 0 0 39
+ BLOCK: 0 0 107 1121
+ TASKLET: 0 0 0 290
+ SCHED: 27035 26983 26971 26746
+ HRTIMER: 0 0 0 0
+ RCU: 1678 1769 2178 2250
+
+
+1.3 IDE devices in /proc/ide
+----------------------------
+
+The subdirectory /proc/ide contains information about all IDE devices of which
+the kernel is aware. There is one subdirectory for each IDE controller, the
+file drivers and a link for each IDE device, pointing to the device directory
+in the controller specific subtree.
+
+The file drivers contains general information about the drivers used for the
+IDE devices:
+
+ > cat /proc/ide/drivers
+ ide-cdrom version 4.53
+ ide-disk version 1.08
+
+More detailed information can be found in the controller specific
+subdirectories. These are named ide0, ide1 and so on. Each of these
+directories contains the files shown in table 1-6.
+
+
+Table 1-6: IDE controller info in /proc/ide/ide?
+..............................................................................
+ File Content
+ channel IDE channel (0 or 1)
+ config Configuration (only for PCI/IDE bridge)
+ mate Mate name
+ model Type/Chipset of IDE controller
+..............................................................................
+
+Each device connected to a controller has a separate subdirectory in the
+controllers directory. The files listed in table 1-7 are contained in these
+directories.
+
+
+Table 1-7: IDE device information
+..............................................................................
+ File Content
+ cache The cache
+ capacity Capacity of the medium (in 512Byte blocks)
+ driver driver and version
+ geometry physical and logical geometry
+ identify device identify block
+ media media type
+ model device identifier
+ settings device setup
+ smart_thresholds IDE disk management thresholds
+ smart_values IDE disk management values
+..............................................................................
+
+The most interesting file is settings. This file contains a nice overview of
+the drive parameters:
+
+ # cat /proc/ide/ide0/hda/settings
+ name value min max mode
+ ---- ----- --- --- ----
+ bios_cyl 526 0 65535 rw
+ bios_head 255 0 255 rw
+ bios_sect 63 0 63 rw
+ breada_readahead 4 0 127 rw
+ bswap 0 0 1 r
+ file_readahead 72 0 2097151 rw
+ io_32bit 0 0 3 rw
+ keepsettings 0 0 1 rw
+ max_kb_per_request 122 1 127 rw
+ multcount 0 0 8 rw
+ nice1 1 0 1 rw
+ nowerr 0 0 1 rw
+ pio_mode write-only 0 255 w
+ slow 0 0 1 rw
+ unmaskirq 0 0 1 rw
+ using_dma 0 0 1 rw
+
+
+1.4 Networking info in /proc/net
+--------------------------------
+
+The subdirectory /proc/net follows the usual pattern. Table 1-8 shows the
+additional values you get for IP version 6 if you configure the kernel to
+support this. Table 1-9 lists the files and their meaning.
+
+
+Table 1-8: IPv6 info in /proc/net
+..............................................................................
+ File Content
+ udp6 UDP sockets (IPv6)
+ tcp6 TCP sockets (IPv6)
+ raw6 Raw device statistics (IPv6)
+ igmp6 IP multicast addresses, which this host joined (IPv6)
+ if_inet6 List of IPv6 interface addresses
+ ipv6_route Kernel routing table for IPv6
+ rt6_stats Global IPv6 routing tables statistics
+ sockstat6 Socket statistics (IPv6)
+ snmp6 Snmp data (IPv6)
+..............................................................................
+
+
+Table 1-9: Network info in /proc/net
+..............................................................................
+ File Content
+ arp Kernel ARP table
+ dev network devices with statistics
+ dev_mcast the Layer2 multicast groups a device is listening too
+ (interface index, label, number of references, number of bound
+ addresses).
+ dev_stat network device status
+ ip_fwchains Firewall chain linkage
+ ip_fwnames Firewall chain names
+ ip_masq Directory containing the masquerading tables
+ ip_masquerade Major masquerading table
+ netstat Network statistics
+ raw raw device statistics
+ route Kernel routing table
+ rpc Directory containing rpc info
+ rt_cache Routing cache
+ snmp SNMP data
+ sockstat Socket statistics
+ tcp TCP sockets
+ udp UDP sockets
+ unix UNIX domain sockets
+ wireless Wireless interface data (Wavelan etc)
+ igmp IP multicast addresses, which this host joined
+ psched Global packet scheduler parameters.
+ netlink List of PF_NETLINK sockets
+ ip_mr_vifs List of multicast virtual interfaces
+ ip_mr_cache List of multicast routing cache
+..............................................................................
+
+You can use this information to see which network devices are available in
+your system and how much traffic was routed over those devices:
+
+ > cat /proc/net/dev
+ Inter-|Receive |[...
+ face |bytes packets errs drop fifo frame compressed multicast|[...
+ lo: 908188 5596 0 0 0 0 0 0 [...
+ ppp0:15475140 20721 410 0 0 410 0 0 [...
+ eth0: 614530 7085 0 0 0 0 0 1 [...
+
+ ...] Transmit
+ ...] bytes packets errs drop fifo colls carrier compressed
+ ...] 908188 5596 0 0 0 0 0 0
+ ...] 1375103 17405 0 0 0 0 0 0
+ ...] 1703981 5535 0 0 0 3 0 0
+
+In addition, each Channel Bond interface has its own directory. For
+example, the bond0 device will have a directory called /proc/net/bond0/.
+It will contain information that is specific to that bond, such as the
+current slaves of the bond, the link status of the slaves, and how
+many times the slaves link has failed.
+
+1.5 SCSI info
+-------------
+
+If you have a SCSI host adapter in your system, you'll find a subdirectory
+named after the driver for this adapter in /proc/scsi. You'll also see a list
+of all recognized SCSI devices in /proc/scsi:
+
+ >cat /proc/scsi/scsi
+ Attached devices:
+ Host: scsi0 Channel: 00 Id: 00 Lun: 00
+ Vendor: IBM Model: DGHS09U Rev: 03E0
+ Type: Direct-Access ANSI SCSI revision: 03
+ Host: scsi0 Channel: 00 Id: 06 Lun: 00
+ Vendor: PIONEER Model: CD-ROM DR-U06S Rev: 1.04
+ Type: CD-ROM ANSI SCSI revision: 02
+
+
+The directory named after the driver has one file for each adapter found in
+the system. These files contain information about the controller, including
+the used IRQ and the IO address range. The amount of information shown is
+dependent on the adapter you use. The example shows the output for an Adaptec
+AHA-2940 SCSI adapter:
+
+ > cat /proc/scsi/aic7xxx/0
+
+ Adaptec AIC7xxx driver version: 5.1.19/3.2.4
+ Compile Options:
+ TCQ Enabled By Default : Disabled
+ AIC7XXX_PROC_STATS : Disabled
+ AIC7XXX_RESET_DELAY : 5
+ Adapter Configuration:
+ SCSI Adapter: Adaptec AHA-294X Ultra SCSI host adapter
+ Ultra Wide Controller
+ PCI MMAPed I/O Base: 0xeb001000
+ Adapter SEEPROM Config: SEEPROM found and used.
+ Adaptec SCSI BIOS: Enabled
+ IRQ: 10
+ SCBs: Active 0, Max Active 2,
+ Allocated 15, HW 16, Page 255
+ Interrupts: 160328
+ BIOS Control Word: 0x18b6
+ Adapter Control Word: 0x005b
+ Extended Translation: Enabled
+ Disconnect Enable Flags: 0xffff
+ Ultra Enable Flags: 0x0001
+ Tag Queue Enable Flags: 0x0000
+ Ordered Queue Tag Flags: 0x0000
+ Default Tag Queue Depth: 8
+ Tagged Queue By Device array for aic7xxx host instance 0:
+ {255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}
+ Actual queue depth per device for aic7xxx host instance 0:
+ {1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}
+ Statistics:
+ (scsi0:0:0:0)
+ Device using Wide/Sync transfers at 40.0 MByte/sec, offset 8
+ Transinfo settings: current(12/8/1/0), goal(12/8/1/0), user(12/15/1/0)
+ Total transfers 160151 (74577 reads and 85574 writes)
+ (scsi0:0:6:0)
+ Device using Narrow/Sync transfers at 5.0 MByte/sec, offset 15
+ Transinfo settings: current(50/15/0/0), goal(50/15/0/0), user(50/15/0/0)
+ Total transfers 0 (0 reads and 0 writes)
+
+
+1.6 Parallel port info in /proc/parport
+---------------------------------------
+
+The directory /proc/parport contains information about the parallel ports of
+your system. It has one subdirectory for each port, named after the port
+number (0,1,2,...).
+
+These directories contain the four files shown in Table 1-10.
+
+
+Table 1-10: Files in /proc/parport
+..............................................................................
+ File Content
+ autoprobe Any IEEE-1284 device ID information that has been acquired.
+ devices list of the device drivers using that port. A + will appear by the
+ name of the device currently using the port (it might not appear
+ against any).
+ hardware Parallel port's base address, IRQ line and DMA channel.
+ irq IRQ that parport is using for that port. This is in a separate
+ file to allow you to alter it by writing a new value in (IRQ
+ number or none).
+..............................................................................
+
+1.7 TTY info in /proc/tty
+-------------------------
+
+Information about the available and actually used tty's can be found in the
+directory /proc/tty.You'll find entries for drivers and line disciplines in
+this directory, as shown in Table 1-11.
+
+
+Table 1-11: Files in /proc/tty
+..............................................................................
+ File Content
+ drivers list of drivers and their usage
+ ldiscs registered line disciplines
+ driver/serial usage statistic and status of single tty lines
+..............................................................................
+
+To see which tty's are currently in use, you can simply look into the file
+/proc/tty/drivers:
+
+ > cat /proc/tty/drivers
+ pty_slave /dev/pts 136 0-255 pty:slave
+ pty_master /dev/ptm 128 0-255 pty:master
+ pty_slave /dev/ttyp 3 0-255 pty:slave
+ pty_master /dev/pty 2 0-255 pty:master
+ serial /dev/cua 5 64-67 serial:callout
+ serial /dev/ttyS 4 64-67 serial
+ /dev/tty0 /dev/tty0 4 0 system:vtmaster
+ /dev/ptmx /dev/ptmx 5 2 system
+ /dev/console /dev/console 5 1 system:console
+ /dev/tty /dev/tty 5 0 system:/dev/tty
+ unknown /dev/tty 4 1-63 console
+
+
+1.8 Miscellaneous kernel statistics in /proc/stat
+-------------------------------------------------
+
+Various pieces of information about kernel activity are available in the
+/proc/stat file. All of the numbers reported in this file are aggregates
+since the system first booted. For a quick look, simply cat the file:
+
+ > cat /proc/stat
+ cpu 2255 34 2290 22625563 6290 127 456 0 0 0
+ cpu0 1132 34 1441 11311718 3675 127 438 0 0 0
+ cpu1 1123 0 849 11313845 2614 0 18 0 0 0
+ intr 114930548 113199788 3 0 5 263 0 4 [... lots more numbers ...]
+ ctxt 1990473
+ btime 1062191376
+ processes 2915
+ procs_running 1
+ procs_blocked 0
+ softirq 183433 0 21755 12 39 1137 231 21459 2263
+
+The very first "cpu" line aggregates the numbers in all of the other "cpuN"
+lines. These numbers identify the amount of time the CPU has spent performing
+different kinds of work. Time units are in USER_HZ (typically hundredths of a
+second). The meanings of the columns are as follows, from left to right:
+
+- user: normal processes executing in user mode
+- nice: niced processes executing in user mode
+- system: processes executing in kernel mode
+- idle: twiddling thumbs
+- iowait: waiting for I/O to complete
+- irq: servicing interrupts
+- softirq: servicing softirqs
+- steal: involuntary wait
+- guest: running a normal guest
+- guest_nice: running a niced guest
+
+The "intr" line gives counts of interrupts serviced since boot time, for each
+of the possible system interrupts. The first column is the total of all
+interrupts serviced including unnumbered architecture specific interrupts;
+each subsequent column is the total for that particular numbered interrupt.
+Unnumbered interrupts are not shown, only summed into the total.
+
+The "ctxt" line gives the total number of context switches across all CPUs.
+
+The "btime" line gives the time at which the system booted, in seconds since
+the Unix epoch.
+
+The "processes" line gives the number of processes and threads created, which
+includes (but is not limited to) those created by calls to the fork() and
+clone() system calls.
+
+The "procs_running" line gives the total number of threads that are
+running or ready to run (i.e., the total number of runnable threads).
+
+The "procs_blocked" line gives the number of processes currently blocked,
+waiting for I/O to complete.
+
+The "softirq" line gives counts of softirqs serviced since boot time, for each
+of the possible system softirqs. The first column is the total of all
+softirqs serviced; each subsequent column is the total for that particular
+softirq.
+
+
+1.9 Ext4 file system parameters
+-------------------------------
+
+Information about mounted ext4 file systems can be found in
+/proc/fs/ext4. Each mounted filesystem will have a directory in
+/proc/fs/ext4 based on its device name (i.e., /proc/fs/ext4/hdc or
+/proc/fs/ext4/dm-0). The files in each per-device directory are shown
+in Table 1-12, below.
+
+Table 1-12: Files in /proc/fs/ext4/<devname>
+..............................................................................
+ File Content
+ mb_groups details of multiblock allocator buddy cache of free blocks
+..............................................................................
+
+2.0 /proc/consoles
+------------------
+Shows registered system console lines.
+
+To see which character device lines are currently used for the system console
+/dev/console, you may simply look into the file /proc/consoles:
+
+ > cat /proc/consoles
+ tty0 -WU (ECp) 4:7
+ ttyS0 -W- (Ep) 4:64
+
+The columns are:
+
+ device name of the device
+ operations R = can do read operations
+ W = can do write operations
+ U = can do unblank
+ flags E = it is enabled
+ C = it is preferred console
+ B = it is primary boot console
+ p = it is used for printk buffer
+ b = it is not a TTY but a Braille device
+ a = it is safe to use when cpu is offline
+ major:minor major and minor number of the device separated by a colon
+
+------------------------------------------------------------------------------
+Summary
+------------------------------------------------------------------------------
+The /proc file system serves information about the running system. It not only
+allows access to process data but also allows you to request the kernel status
+by reading files in the hierarchy.
+
+The directory structure of /proc reflects the types of information and makes
+it easy, if not obvious, where to look for specific data.
+------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------
+CHAPTER 2: MODIFYING SYSTEM PARAMETERS
+------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------
+In This Chapter
+------------------------------------------------------------------------------
+* Modifying kernel parameters by writing into files found in /proc/sys
+* Exploring the files which modify certain parameters
+* Review of the /proc/sys file tree
+------------------------------------------------------------------------------
+
+
+A very interesting part of /proc is the directory /proc/sys. This is not only
+a source of information, it also allows you to change parameters within the
+kernel. Be very careful when attempting this. You can optimize your system,
+but you can also cause it to crash. Never alter kernel parameters on a
+production system. Set up a development machine and test to make sure that
+everything works the way you want it to. You may have no alternative but to
+reboot the machine once an error has been made.
+
+To change a value, simply echo the new value into the file. An example is
+given below in the section on the file system data. You need to be root to do
+this. You can create your own boot script to perform this every time your
+system boots.
+
+The files in /proc/sys can be used to fine tune and monitor miscellaneous and
+general things in the operation of the Linux kernel. Since some of the files
+can inadvertently disrupt your system, it is advisable to read both
+documentation and source before actually making adjustments. In any case, be
+very careful when writing to any of these files. The entries in /proc may
+change slightly between the 2.1.* and the 2.2 kernel, so if there is any doubt
+review the kernel documentation in the directory /usr/src/linux/Documentation.
+This chapter is heavily based on the documentation included in the pre 2.2
+kernels, and became part of it in version 2.2.1 of the Linux kernel.
+
+Please see: Documentation/sysctl/ directory for descriptions of these
+entries.
+
+------------------------------------------------------------------------------
+Summary
+------------------------------------------------------------------------------
+Certain aspects of kernel behavior can be modified at runtime, without the
+need to recompile the kernel, or even to reboot the system. The files in the
+/proc/sys tree can not only be read, but also modified. You can use the echo
+command to write value into these files, thereby changing the default settings
+of the kernel.
+------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------
+CHAPTER 3: PER-PROCESS PARAMETERS
+------------------------------------------------------------------------------
+
+3.1 /proc/<pid>/oom_adj & /proc/<pid>/oom_score_adj- Adjust the oom-killer score
+--------------------------------------------------------------------------------
+
+These file can be used to adjust the badness heuristic used to select which
+process gets killed in out of memory conditions.
+
+The badness heuristic assigns a value to each candidate task ranging from 0
+(never kill) to 1000 (always kill) to determine which process is targeted. The
+units are roughly a proportion along that range of allowed memory the process
+may allocate from based on an estimation of its current memory and swap use.
+For example, if a task is using all allowed memory, its badness score will be
+1000. If it is using half of its allowed memory, its score will be 500.
+
+There is an additional factor included in the badness score: the current memory
+and swap usage is discounted by 3% for root processes.
+
+The amount of "allowed" memory depends on the context in which the oom killer
+was called. If it is due to the memory assigned to the allocating task's cpuset
+being exhausted, the allowed memory represents the set of mems assigned to that
+cpuset. If it is due to a mempolicy's node(s) being exhausted, the allowed
+memory represents the set of mempolicy nodes. If it is due to a memory
+limit (or swap limit) being reached, the allowed memory is that configured
+limit. Finally, if it is due to the entire system being out of memory, the
+allowed memory represents all allocatable resources.
+
+The value of /proc/<pid>/oom_score_adj is added to the badness score before it
+is used to determine which task to kill. Acceptable values range from -1000
+(OOM_SCORE_ADJ_MIN) to +1000 (OOM_SCORE_ADJ_MAX). This allows userspace to
+polarize the preference for oom killing either by always preferring a certain
+task or completely disabling it. The lowest possible value, -1000, is
+equivalent to disabling oom killing entirely for that task since it will always
+report a badness score of 0.
+
+Consequently, it is very simple for userspace to define the amount of memory to
+consider for each task. Setting a /proc/<pid>/oom_score_adj value of +500, for
+example, is roughly equivalent to allowing the remainder of tasks sharing the
+same system, cpuset, mempolicy, or memory controller resources to use at least
+50% more memory. A value of -500, on the other hand, would be roughly
+equivalent to discounting 50% of the task's allowed memory from being considered
+as scoring against the task.
+
+For backwards compatibility with previous kernels, /proc/<pid>/oom_adj may also
+be used to tune the badness score. Its acceptable values range from -16
+(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17
+(OOM_DISABLE) to disable oom killing entirely for that task. Its value is
+scaled linearly with /proc/<pid>/oom_score_adj.
+
+The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
+value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
+requires CAP_SYS_RESOURCE.
+
+Caveat: when a parent task is selected, the oom killer will sacrifice any first
+generation children with separate address spaces instead, if possible. This
+avoids servers and important system daemons from being killed and loses the
+minimal amount of work.
+
+
+3.2 /proc/<pid>/oom_score - Display current oom-killer score
+-------------------------------------------------------------
+
+This file can be used to check the current score used by the oom-killer is for
+any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which
+process should be killed in an out-of-memory situation.
+
+
+3.3 /proc/<pid>/io - Display the IO accounting fields
+-------------------------------------------------------
+
+This file contains IO statistics for each running process
+
+Example
+-------
+
+test:/tmp # dd if=/dev/zero of=/tmp/test.dat &
+[1] 3828
+
+test:/tmp # cat /proc/3828/io
+rchar: 323934931
+wchar: 323929600
+syscr: 632687
+syscw: 632675
+read_bytes: 0
+write_bytes: 323932160
+cancelled_write_bytes: 0
+
+
+Description
+-----------
+
+rchar
+-----
+
+I/O counter: chars read
+The number of bytes which this task has caused to be read from storage. This
+is simply the sum of bytes which this process passed to read() and pread().
+It includes things like tty IO and it is unaffected by whether or not actual
+physical disk IO was required (the read might have been satisfied from
+pagecache)
+
+
+wchar
+-----
+
+I/O counter: chars written
+The number of bytes which this task has caused, or shall cause to be written
+to disk. Similar caveats apply here as with rchar.
+
+
+syscr
+-----
+
+I/O counter: read syscalls
+Attempt to count the number of read I/O operations, i.e. syscalls like read()
+and pread().
+
+
+syscw
+-----
+
+I/O counter: write syscalls
+Attempt to count the number of write I/O operations, i.e. syscalls like
+write() and pwrite().
+
+
+read_bytes
+----------
+
+I/O counter: bytes read
+Attempt to count the number of bytes which this process really did cause to
+be fetched from the storage layer. Done at the submit_bio() level, so it is
+accurate for block-backed filesystems. <please add status regarding NFS and
+CIFS at a later time>
+
+
+write_bytes
+-----------
+
+I/O counter: bytes written
+Attempt to count the number of bytes which this process caused to be sent to
+the storage layer. This is done at page-dirtying time.
+
+
+cancelled_write_bytes
+---------------------
+
+The big inaccuracy here is truncate. If a process writes 1MB to a file and
+then deletes the file, it will in fact perform no writeout. But it will have
+been accounted as having caused 1MB of write.
+In other words: The number of bytes which this process caused to not happen,
+by truncating pagecache. A task can cause "negative" IO too. If this task
+truncates some dirty pagecache, some IO which another task has been accounted
+for (in its write_bytes) will not be happening. We _could_ just subtract that
+from the truncating task's write_bytes, but there is information loss in doing
+that.
+
+
+Note
+----
+
+At its current implementation state, this is a bit racy on 32-bit machines: if
+process A reads process B's /proc/pid/io while process B is updating one of
+those 64-bit counters, process A could see an intermediate result.
+
+
+More information about this can be found within the taskstats documentation in
+Documentation/accounting.
+
+3.4 /proc/<pid>/coredump_filter - Core dump filtering settings
+---------------------------------------------------------------
+When a process is dumped, all anonymous memory is written to a core file as
+long as the size of the core file isn't limited. But sometimes we don't want
+to dump some memory segments, for example, huge shared memory. Conversely,
+sometimes we want to save file-backed memory segments into a core file, not
+only the individual files.
+
+/proc/<pid>/coredump_filter allows you to customize which memory segments
+will be dumped when the <pid> process is dumped. coredump_filter is a bitmask
+of memory types. If a bit of the bitmask is set, memory segments of the
+corresponding memory type are dumped, otherwise they are not dumped.
+
+The following 7 memory types are supported:
+ - (bit 0) anonymous private memory
+ - (bit 1) anonymous shared memory
+ - (bit 2) file-backed private memory
+ - (bit 3) file-backed shared memory
+ - (bit 4) ELF header pages in file-backed private memory areas (it is
+ effective only if the bit 2 is cleared)
+ - (bit 5) hugetlb private memory
+ - (bit 6) hugetlb shared memory
+
+ Note that MMIO pages such as frame buffer are never dumped and vDSO pages
+ are always dumped regardless of the bitmask status.
+
+ Note bit 0-4 doesn't effect any hugetlb memory. hugetlb memory are only
+ effected by bit 5-6.
+
+Default value of coredump_filter is 0x23; this means all anonymous memory
+segments and hugetlb private memory are dumped.
+
+If you don't want to dump all shared memory segments attached to pid 1234,
+write 0x21 to the process's proc file.
+
+ $ echo 0x21 > /proc/1234/coredump_filter
+
+When a new process is created, the process inherits the bitmask status from its
+parent. It is useful to set up coredump_filter before the program runs.
+For example:
+
+ $ echo 0x7 > /proc/self/coredump_filter
+ $ ./some_program
+
+3.5 /proc/<pid>/mountinfo - Information about mounts
+--------------------------------------------------------
+
+This file contains lines of the form:
+
+36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
+
+(1) mount ID: unique identifier of the mount (may be reused after umount)
+(2) parent ID: ID of parent (or of self for the top of the mount tree)
+(3) major:minor: value of st_dev for files on filesystem
+(4) root: root of the mount within the filesystem
+(5) mount point: mount point relative to the process's root
+(6) mount options: per mount options
+(7) optional fields: zero or more fields of the form "tag[:value]"
+(8) separator: marks the end of the optional fields
+(9) filesystem type: name of filesystem of the form "type[.subtype]"
+(10) mount source: filesystem specific information or "none"
+(11) super options: per super block options
+
+Parsers should ignore all unrecognised optional fields. Currently the
+possible optional fields are:
+
+shared:X mount is shared in peer group X
+master:X mount is slave to peer group X
+propagate_from:X mount is slave and receives propagation from peer group X (*)
+unbindable mount is unbindable
+
+(*) X is the closest dominant peer group under the process's root. If
+X is the immediate master of the mount, or if there's no dominant peer
+group under the same root, then only the "master:X" field is present
+and not the "propagate_from:X" field.
+
+For more information on mount propagation see:
+
+ Documentation/filesystems/sharedsubtree.txt
+
+
+3.6 /proc/<pid>/comm & /proc/<pid>/task/<tid>/comm
+--------------------------------------------------------
+These files provide a method to access a tasks comm value. It also allows for
+a task to set its own or one of its thread siblings comm value. The comm value
+is limited in size compared to the cmdline value, so writing anything longer
+then the kernel's TASK_COMM_LEN (currently 16 chars) will result in a truncated
+comm value.
+
+
+3.7 /proc/<pid>/task/<tid>/children - Information about task children
+-------------------------------------------------------------------------
+This file provides a fast way to retrieve first level children pids
+of a task pointed by <pid>/<tid> pair. The format is a space separated
+stream of pids.
+
+Note the "first level" here -- if a child has own children they will
+not be listed here, one needs to read /proc/<children-pid>/task/<tid>/children
+to obtain the descendants.
+
+Since this interface is intended to be fast and cheap it doesn't
+guarantee to provide precise results and some children might be
+skipped, especially if they've exited right after we printed their
+pids, so one need to either stop or freeze processes being inspected
+if precise results are needed.
+
+
+3.8 /proc/<pid>/fdinfo/<fd> - Information about opened file
+---------------------------------------------------------------
+This file provides information associated with an opened file. The regular
+files have at least three fields -- 'pos', 'flags' and mnt_id. The 'pos'
+represents the current offset of the opened file in decimal form [see lseek(2)
+for details], 'flags' denotes the octal O_xxx mask the file has been
+created with [see open(2) for details] and 'mnt_id' represents mount ID of
+the file system containing the opened file [see 3.5 /proc/<pid>/mountinfo
+for details].
+
+A typical output is
+
+ pos: 0
+ flags: 0100002
+ mnt_id: 19
+
+All locks associated with a file descriptor are shown in its fdinfo too.
+
+lock: 1: FLOCK ADVISORY WRITE 359 00:13:11691 0 EOF
+
+The files such as eventfd, fsnotify, signalfd, epoll among the regular pos/flags
+pair provide additional information particular to the objects they represent.
+
+ Eventfd files
+ ~~~~~~~~~~~~~
+ pos: 0
+ flags: 04002
+ mnt_id: 9
+ eventfd-count: 5a
+
+ where 'eventfd-count' is hex value of a counter.
+
+ Signalfd files
+ ~~~~~~~~~~~~~~
+ pos: 0
+ flags: 04002
+ mnt_id: 9
+ sigmask: 0000000000000200
+
+ where 'sigmask' is hex value of the signal mask associated
+ with a file.
+
+ Epoll files
+ ~~~~~~~~~~~
+ pos: 0
+ flags: 02
+ mnt_id: 9
+ tfd: 5 events: 1d data: ffffffffffffffff
+
+ where 'tfd' is a target file descriptor number in decimal form,
+ 'events' is events mask being watched and the 'data' is data
+ associated with a target [see epoll(7) for more details].
+
+ Fsnotify files
+ ~~~~~~~~~~~~~~
+ For inotify files the format is the following
+
+ pos: 0
+ flags: 02000000
+ inotify wd:3 ino:9e7e sdev:800013 mask:800afce ignored_mask:0 fhandle-bytes:8 fhandle-type:1 f_handle:7e9e0000640d1b6d
+
+ where 'wd' is a watch descriptor in decimal form, ie a target file
+ descriptor number, 'ino' and 'sdev' are inode and device where the
+ target file resides and the 'mask' is the mask of events, all in hex
+ form [see inotify(7) for more details].
+
+ If the kernel was built with exportfs support, the path to the target
+ file is encoded as a file handle. The file handle is provided by three
+ fields 'fhandle-bytes', 'fhandle-type' and 'f_handle', all in hex
+ format.
+
+ If the kernel is built without exportfs support the file handle won't be
+ printed out.
+
+ If there is no inotify mark attached yet the 'inotify' line will be omitted.
+
+ For fanotify files the format is
+
+ pos: 0
+ flags: 02
+ mnt_id: 9
+ fanotify flags:10 event-flags:0
+ fanotify mnt_id:12 mflags:40 mask:38 ignored_mask:40000003
+ fanotify ino:4f969 sdev:800013 mflags:0 mask:3b ignored_mask:40000000 fhandle-bytes:8 fhandle-type:1 f_handle:69f90400c275b5b4
+
+ where fanotify 'flags' and 'event-flags' are values used in fanotify_init
+ call, 'mnt_id' is the mount point identifier, 'mflags' is the value of
+ flags associated with mark which are tracked separately from events
+ mask. 'ino', 'sdev' are target inode and device, 'mask' is the events
+ mask and 'ignored_mask' is the mask of events which are to be ignored.
+ All in hex format. Incorporation of 'mflags', 'mask' and 'ignored_mask'
+ does provide information about flags and mask used in fanotify_mark
+ call [see fsnotify manpage for details].
+
+ While the first three lines are mandatory and always printed, the rest is
+ optional and may be omitted if no marks created yet.
+
+ Timerfd files
+ ~~~~~~~~~~~~~
+
+ pos: 0
+ flags: 02
+ mnt_id: 9
+ clockid: 0
+ ticks: 0
+ settime flags: 01
+ it_value: (0, 49406829)
+ it_interval: (1, 0)
+
+ where 'clockid' is the clock type and 'ticks' is the number of the timer expirations
+ that have occurred [see timerfd_create(2) for details]. 'settime flags' are
+ flags in octal form been used to setup the timer [see timerfd_settime(2) for
+ details]. 'it_value' is remaining time until the timer exiration.
+ 'it_interval' is the interval for the timer. Note the timer might be set up
+ with TIMER_ABSTIME option which will be shown in 'settime flags', but 'it_value'
+ still exhibits timer's remaining time.
+
+3.9 /proc/<pid>/map_files - Information about memory mapped files
+---------------------------------------------------------------------
+This directory contains symbolic links which represent memory mapped files
+the process is maintaining. Example output:
+
+ | lr-------- 1 root root 64 Jan 27 11:24 333c600000-333c620000 -> /usr/lib64/ld-2.18.so
+ | lr-------- 1 root root 64 Jan 27 11:24 333c81f000-333c820000 -> /usr/lib64/ld-2.18.so
+ | lr-------- 1 root root 64 Jan 27 11:24 333c820000-333c821000 -> /usr/lib64/ld-2.18.so
+ | ...
+ | lr-------- 1 root root 64 Jan 27 11:24 35d0421000-35d0422000 -> /usr/lib64/libselinux.so.1
+ | lr-------- 1 root root 64 Jan 27 11:24 400000-41a000 -> /usr/bin/ls
+
+The name of a link represents the virtual memory bounds of a mapping, i.e.
+vm_area_struct::vm_start-vm_area_struct::vm_end.
+
+The main purpose of the map_files is to retrieve a set of memory mapped
+files in a fast way instead of parsing /proc/<pid>/maps or
+/proc/<pid>/smaps, both of which contain many more records. At the same
+time one can open(2) mappings from the listings of two processes and
+comparing their inode numbers to figure out which anonymous memory areas
+are actually shared.
+
+------------------------------------------------------------------------------
+Configuring procfs
+------------------------------------------------------------------------------
+
+4.1 Mount options
+---------------------
+
+The following mount options are supported:
+
+ hidepid= Set /proc/<pid>/ access mode.
+ gid= Set the group authorized to learn processes information.
+
+hidepid=0 means classic mode - everybody may access all /proc/<pid>/ directories
+(default).
+
+hidepid=1 means users may not access any /proc/<pid>/ directories but their
+own. Sensitive files like cmdline, sched*, status are now protected against
+other users. This makes it impossible to learn whether any user runs
+specific program (given the program doesn't reveal itself by its behaviour).
+As an additional bonus, as /proc/<pid>/cmdline is unaccessible for other users,
+poorly written programs passing sensitive information via program arguments are
+now protected against local eavesdroppers.
+
+hidepid=2 means hidepid=1 plus all /proc/<pid>/ will be fully invisible to other
+users. It doesn't mean that it hides a fact whether a process with a specific
+pid value exists (it can be learned by other means, e.g. by "kill -0 $PID"),
+but it hides process' uid and gid, which may be learned by stat()'ing
+/proc/<pid>/ otherwise. It greatly complicates an intruder's task of gathering
+information about running processes, whether some daemon runs with elevated
+privileges, whether other user runs some sensitive program, whether other users
+run any program at all, etc.
+
+gid= defines a group authorized to learn processes information otherwise
+prohibited by hidepid=. If you use some daemon like identd which needs to learn
+information about processes information, just add identd to this group.
diff --git a/Documentation/filesystems/qnx6.txt b/Documentation/filesystems/qnx6.txt
new file mode 100644
index 000000000..408679789
--- /dev/null
+++ b/Documentation/filesystems/qnx6.txt
@@ -0,0 +1,174 @@
+The QNX6 Filesystem
+===================
+
+The qnx6fs is used by newer QNX operating system versions. (e.g. Neutrino)
+It got introduced in QNX 6.4.0 and is used default since 6.4.1.
+
+Option
+======
+
+mmi_fs Mount filesystem as used for example by Audi MMI 3G system
+
+Specification
+=============
+
+qnx6fs shares many properties with traditional Unix filesystems. It has the
+concepts of blocks, inodes and directories.
+On QNX it is possible to create little endian and big endian qnx6 filesystems.
+This feature makes it possible to create and use a different endianness fs
+for the target (QNX is used on quite a range of embedded systems) plattform
+running on a different endianness.
+The Linux driver handles endianness transparently. (LE and BE)
+
+Blocks
+------
+
+The space in the device or file is split up into blocks. These are a fixed
+size of 512, 1024, 2048 or 4096, which is decided when the filesystem is
+created.
+Blockpointers are 32bit, so the maximum space that can be addressed is
+2^32 * 4096 bytes or 16TB
+
+The superblocks
+---------------
+
+The superblock contains all global information about the filesystem.
+Each qnx6fs got two superblocks, each one having a 64bit serial number.
+That serial number is used to identify the "active" superblock.
+In write mode with reach new snapshot (after each synchronous write), the
+serial of the new master superblock is increased (old superblock serial + 1)
+
+So basically the snapshot functionality is realized by an atomic final
+update of the serial number. Before updating that serial, all modifications
+are done by copying all modified blocks during that specific write request
+(or period) and building up a new (stable) filesystem structure under the
+inactive superblock.
+
+Each superblock holds a set of root inodes for the different filesystem
+parts. (Inode, Bitmap and Longfilenames)
+Each of these root nodes holds information like total size of the stored
+data and the addressing levels in that specific tree.
+If the level value is 0, up to 16 direct blocks can be addressed by each
+node.
+Level 1 adds an additional indirect addressing level where each indirect
+addressing block holds up to blocksize / 4 bytes pointers to data blocks.
+Level 2 adds an additional indirect addressing block level (so, already up
+to 16 * 256 * 256 = 1048576 blocks that can be addressed by such a tree).
+
+Unused block pointers are always set to ~0 - regardless of root node,
+indirect addressing blocks or inodes.
+Data leaves are always on the lowest level. So no data is stored on upper
+tree levels.
+
+The first Superblock is located at 0x2000. (0x2000 is the bootblock size)
+The Audi MMI 3G first superblock directly starts at byte 0.
+Second superblock position can either be calculated from the superblock
+information (total number of filesystem blocks) or by taking the highest
+device address, zeroing the last 3 bytes and then subtracting 0x1000 from
+that address.
+
+0x1000 is the size reserved for each superblock - regardless of the
+blocksize of the filesystem.
+
+Inodes
+------
+
+Each object in the filesystem is represented by an inode. (index node)
+The inode structure contains pointers to the filesystem blocks which contain
+the data held in the object and all of the metadata about an object except
+its longname. (filenames longer than 27 characters)
+The metadata about an object includes the permissions, owner, group, flags,
+size, number of blocks used, access time, change time and modification time.
+
+Object mode field is POSIX format. (which makes things easier)
+
+There are also pointers to the first 16 blocks, if the object data can be
+addressed with 16 direct blocks.
+For more than 16 blocks an indirect addressing in form of another tree is
+used. (scheme is the same as the one used for the superblock root nodes)
+
+The filesize is stored 64bit. Inode counting starts with 1. (whilst long
+filename inodes start with 0)
+
+Directories
+-----------
+
+A directory is a filesystem object and has an inode just like a file.
+It is a specially formatted file containing records which associate each
+name with an inode number.
+'.' inode number points to the directory inode
+'..' inode number points to the parent directory inode
+Eeach filename record additionally got a filename length field.
+
+One special case are long filenames or subdirectory names.
+These got set a filename length field of 0xff in the corresponding directory
+record plus the longfile inode number also stored in that record.
+With that longfilename inode number, the longfilename tree can be walked
+starting with the superblock longfilename root node pointers.
+
+Special files
+-------------
+
+Symbolic links are also filesystem objects with inodes. They got a specific
+bit in the inode mode field identifying them as symbolic link.
+The directory entry file inode pointer points to the target file inode.
+
+Hard links got an inode, a directory entry, but a specific mode bit set,
+no block pointers and the directory file record pointing to the target file
+inode.
+
+Character and block special devices do not exist in QNX as those files
+are handled by the QNX kernel/drivers and created in /dev independent of the
+underlaying filesystem.
+
+Long filenames
+--------------
+
+Long filenames are stored in a separate addressing tree. The staring point
+is the longfilename root node in the active superblock.
+Each data block (tree leaves) holds one long filename. That filename is
+limited to 510 bytes. The first two starting bytes are used as length field
+for the actual filename.
+If that structure shall fit for all allowed blocksizes, it is clear why there
+is a limit of 510 bytes for the actual filename stored.
+
+Bitmap
+------
+
+The qnx6fs filesystem allocation bitmap is stored in a tree under bitmap
+root node in the superblock and each bit in the bitmap represents one
+filesystem block.
+The first block is block 0, which starts 0x1000 after superblock start.
+So for a normal qnx6fs 0x3000 (bootblock + superblock) is the physical
+address at which block 0 is located.
+
+Bits at the end of the last bitmap block are set to 1, if the device is
+smaller than addressing space in the bitmap.
+
+Bitmap system area
+------------------
+
+The bitmap itself is divided into three parts.
+First the system area, that is split into two halves.
+Then userspace.
+
+The requirement for a static, fixed preallocated system area comes from how
+qnx6fs deals with writes.
+Each superblock got it's own half of the system area. So superblock #1
+always uses blocks from the lower half whilst superblock #2 just writes to
+blocks represented by the upper half bitmap system area bits.
+
+Bitmap blocks, Inode blocks and indirect addressing blocks for those two
+tree structures are treated as system blocks.
+
+The rational behind that is that a write request can work on a new snapshot
+(system area of the inactive - resp. lower serial numbered superblock) while
+at the same time there is still a complete stable filesystem structer in the
+other half of the system area.
+
+When finished with writing (a sync write is completed, the maximum sync leap
+time or a filesystem sync is requested), serial of the previously inactive
+superblock atomically is increased and the fs switches over to that - then
+stable declared - superblock.
+
+For all data outside the system area, blocks are just copied while writing.
diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt
new file mode 100644
index 000000000..5e8de25bf
--- /dev/null
+++ b/Documentation/filesystems/quota.txt
@@ -0,0 +1,65 @@
+
+Quota subsystem
+===============
+
+Quota subsystem allows system administrator to set limits on used space and
+number of used inodes (inode is a filesystem structure which is associated with
+each file or directory) for users and/or groups. For both used space and number
+of used inodes there are actually two limits. The first one is called softlimit
+and the second one hardlimit. An user can never exceed a hardlimit for any
+resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed
+softlimit but only for limited period of time. This period is called "grace
+period" or "grace time". When grace time is over, user is not able to allocate
+more space/inodes until he frees enough of them to get below softlimit.
+
+Quota limits (and amount of grace time) are set independently for each
+filesystem.
+
+For more details about quota design, see the documentation in quota-tools package
+(http://sourceforge.net/projects/linuxquota).
+
+Quota netlink interface
+=======================
+When user exceeds a softlimit, runs out of grace time or reaches hardlimit,
+quota subsystem traditionally printed a message to the controlling terminal of
+the process which caused the excess. This method has the disadvantage that
+when user is using a graphical desktop he usually cannot see the message.
+Thus quota netlink interface has been designed to pass information about
+the above events to userspace. There they can be captured by an application
+and processed accordingly.
+
+The interface uses generic netlink framework (see
+http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
+details about this layer). The name of the quota generic netlink interface
+is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
+ Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
+This command is used to send a notification about any of the above mentioned
+events. Each message has six attributes. These are (type of the argument is
+in parentheses):
+ QUOTA_NL_A_QTYPE (u32)
+ - type of quota being exceeded (one of USRQUOTA, GRPQUOTA)
+ QUOTA_NL_A_EXCESS_ID (u64)
+ - UID/GID (depends on quota type) of user / group whose limit
+ is being exceeded.
+ QUOTA_NL_A_CAUSED_ID (u64)
+ - UID of a user who caused the event
+ QUOTA_NL_A_WARNING (u32)
+ - what kind of limit is exceeded:
+ QUOTA_NL_IHARDWARN - inode hardlimit
+ QUOTA_NL_ISOFTLONGWARN - inode softlimit is exceeded longer
+ than given grace period
+ QUOTA_NL_ISOFTWARN - inode softlimit
+ QUOTA_NL_BHARDWARN - space (block) hardlimit
+ QUOTA_NL_BSOFTLONGWARN - space (block) softlimit is exceeded
+ longer than given grace period.
+ QUOTA_NL_BSOFTWARN - space (block) softlimit
+ - four warnings are also defined for the event when user stops
+ exceeding some limit:
+ QUOTA_NL_IHARDBELOW - inode hardlimit
+ QUOTA_NL_ISOFTBELOW - inode softlimit
+ QUOTA_NL_BHARDBELOW - space (block) hardlimit
+ QUOTA_NL_BSOFTBELOW - space (block) softlimit
+ QUOTA_NL_A_DEV_MAJOR (u32)
+ - major number of a device with the affected filesystem
+ QUOTA_NL_A_DEV_MINOR (u32)
+ - minor number of a device with the affected filesystem
diff --git a/Documentation/filesystems/ramfs-rootfs-initramfs.txt b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
new file mode 100644
index 000000000..b176928e6
--- /dev/null
+++ b/Documentation/filesystems/ramfs-rootfs-initramfs.txt
@@ -0,0 +1,359 @@
+ramfs, rootfs and initramfs
+October 17, 2005
+Rob Landley <rob@landley.net>
+=============================
+
+What is ramfs?
+--------------
+
+Ramfs is a very simple filesystem that exports Linux's disk caching
+mechanisms (the page cache and dentry cache) as a dynamically resizable
+RAM-based filesystem.
+
+Normally all files are cached in memory by Linux. Pages of data read from
+backing store (usually the block device the filesystem is mounted on) are kept
+around in case it's needed again, but marked as clean (freeable) in case the
+Virtual Memory system needs the memory for something else. Similarly, data
+written to files is marked clean as soon as it has been written to backing
+store, but kept around for caching purposes until the VM reallocates the
+memory. A similar mechanism (the dentry cache) greatly speeds up access to
+directories.
+
+With ramfs, there is no backing store. Files written into ramfs allocate
+dentries and page cache as usual, but there's nowhere to write them to.
+This means the pages are never marked clean, so they can't be freed by the
+VM when it's looking to recycle memory.
+
+The amount of code required to implement ramfs is tiny, because all the
+work is done by the existing Linux caching infrastructure. Basically,
+you're mounting the disk cache as a filesystem. Because of this, ramfs is not
+an optional component removable via menuconfig, since there would be negligible
+space savings.
+
+ramfs and ramdisk:
+------------------
+
+The older "ram disk" mechanism created a synthetic block device out of
+an area of RAM and used it as backing store for a filesystem. This block
+device was of fixed size, so the filesystem mounted on it was of fixed
+size. Using a ram disk also required unnecessarily copying memory from the
+fake block device into the page cache (and copying changes back out), as well
+as creating and destroying dentries. Plus it needed a filesystem driver
+(such as ext2) to format and interpret this data.
+
+Compared to ramfs, this wastes memory (and memory bus bandwidth), creates
+unnecessary work for the CPU, and pollutes the CPU caches. (There are tricks
+to avoid this copying by playing with the page tables, but they're unpleasantly
+complicated and turn out to be about as expensive as the copying anyway.)
+More to the point, all the work ramfs is doing has to happen _anyway_,
+since all file access goes through the page and dentry caches. The RAM
+disk is simply unnecessary; ramfs is internally much simpler.
+
+Another reason ramdisks are semi-obsolete is that the introduction of
+loopback devices offered a more flexible and convenient way to create
+synthetic block devices, now from files instead of from chunks of memory.
+See losetup (8) for details.
+
+ramfs and tmpfs:
+----------------
+
+One downside of ramfs is you can keep writing data into it until you fill
+up all memory, and the VM can't free it because the VM thinks that files
+should get written to backing store (rather than swap space), but ramfs hasn't
+got any backing store. Because of this, only root (or a trusted user) should
+be allowed write access to a ramfs mount.
+
+A ramfs derivative called tmpfs was created to add size limits, and the ability
+to write the data to swap space. Normal users can be allowed write access to
+tmpfs mounts. See Documentation/filesystems/tmpfs.txt for more information.
+
+What is rootfs?
+---------------
+
+Rootfs is a special instance of ramfs (or tmpfs, if that's enabled), which is
+always present in 2.6 systems. You can't unmount rootfs for approximately the
+same reason you can't kill the init process; rather than having special code
+to check for and handle an empty list, it's smaller and simpler for the kernel
+to just make sure certain lists can't become empty.
+
+Most systems just mount another filesystem over rootfs and ignore it. The
+amount of space an empty instance of ramfs takes up is tiny.
+
+If CONFIG_TMPFS is enabled, rootfs will use tmpfs instead of ramfs by
+default. To force ramfs, add "rootfstype=ramfs" to the kernel command
+line.
+
+What is initramfs?
+------------------
+
+All 2.6 Linux kernels contain a gzipped "cpio" format archive, which is
+extracted into rootfs when the kernel boots up. After extracting, the kernel
+checks to see if rootfs contains a file "init", and if so it executes it as PID
+1. If found, this init process is responsible for bringing the system the
+rest of the way up, including locating and mounting the real root device (if
+any). If rootfs does not contain an init program after the embedded cpio
+archive is extracted into it, the kernel will fall through to the older code
+to locate and mount a root partition, then exec some variant of /sbin/init
+out of that.
+
+All this differs from the old initrd in several ways:
+
+ - The old initrd was always a separate file, while the initramfs archive is
+ linked into the linux kernel image. (The directory linux-*/usr is devoted
+ to generating this archive during the build.)
+
+ - The old initrd file was a gzipped filesystem image (in some file format,
+ such as ext2, that needed a driver built into the kernel), while the new
+ initramfs archive is a gzipped cpio archive (like tar only simpler,
+ see cpio(1) and Documentation/early-userspace/buffer-format.txt). The
+ kernel's cpio extraction code is not only extremely small, it's also
+ __init text and data that can be discarded during the boot process.
+
+ - The program run by the old initrd (which was called /initrd, not /init) did
+ some setup and then returned to the kernel, while the init program from
+ initramfs is not expected to return to the kernel. (If /init needs to hand
+ off control it can overmount / with a new root device and exec another init
+ program. See the switch_root utility, below.)
+
+ - When switching another root device, initrd would pivot_root and then
+ umount the ramdisk. But initramfs is rootfs: you can neither pivot_root
+ rootfs, nor unmount it. Instead delete everything out of rootfs to
+ free up the space (find -xdev / -exec rm '{}' ';'), overmount rootfs
+ with the new root (cd /newmount; mount --move . /; chroot .), attach
+ stdin/stdout/stderr to the new /dev/console, and exec the new init.
+
+ Since this is a remarkably persnickety process (and involves deleting
+ commands before you can run them), the klibc package introduced a helper
+ program (utils/run_init.c) to do all this for you. Most other packages
+ (such as busybox) have named this command "switch_root".
+
+Populating initramfs:
+---------------------
+
+The 2.6 kernel build process always creates a gzipped cpio format initramfs
+archive and links it into the resulting kernel binary. By default, this
+archive is empty (consuming 134 bytes on x86).
+
+The config option CONFIG_INITRAMFS_SOURCE (in General Setup in menuconfig,
+and living in usr/Kconfig) can be used to specify a source for the
+initramfs archive, which will automatically be incorporated into the
+resulting binary. This option can point to an existing gzipped cpio
+archive, a directory containing files to be archived, or a text file
+specification such as the following example:
+
+ dir /dev 755 0 0
+ nod /dev/console 644 0 0 c 5 1
+ nod /dev/loop0 644 0 0 b 7 0
+ dir /bin 755 1000 1000
+ slink /bin/sh busybox 777 0 0
+ file /bin/busybox initramfs/busybox 755 0 0
+ dir /proc 755 0 0
+ dir /sys 755 0 0
+ dir /mnt 755 0 0
+ file /init initramfs/init.sh 755 0 0
+
+Run "usr/gen_init_cpio" (after the kernel build) to get a usage message
+documenting the above file format.
+
+One advantage of the configuration file is that root access is not required to
+set permissions or create device nodes in the new archive. (Note that those
+two example "file" entries expect to find files named "init.sh" and "busybox" in
+a directory called "initramfs", under the linux-2.6.* directory. See
+Documentation/early-userspace/README for more details.)
+
+The kernel does not depend on external cpio tools. If you specify a
+directory instead of a configuration file, the kernel's build infrastructure
+creates a configuration file from that directory (usr/Makefile calls
+scripts/gen_initramfs_list.sh), and proceeds to package up that directory
+using the config file (by feeding it to usr/gen_init_cpio, which is created
+from usr/gen_init_cpio.c). The kernel's build-time cpio creation code is
+entirely self-contained, and the kernel's boot-time extractor is also
+(obviously) self-contained.
+
+The one thing you might need external cpio utilities installed for is creating
+or extracting your own preprepared cpio files to feed to the kernel build
+(instead of a config file or directory).
+
+The following command line can extract a cpio image (either by the above script
+or by the kernel build) back into its component files:
+
+ cpio -i -d -H newc -F initramfs_data.cpio --no-absolute-filenames
+
+The following shell script can create a prebuilt cpio archive you can
+use in place of the above config file:
+
+ #!/bin/sh
+
+ # Copyright 2006 Rob Landley <rob@landley.net> and TimeSys Corporation.
+ # Licensed under GPL version 2
+
+ if [ $# -ne 2 ]
+ then
+ echo "usage: mkinitramfs directory imagename.cpio.gz"
+ exit 1
+ fi
+
+ if [ -d "$1" ]
+ then
+ echo "creating $2 from $1"
+ (cd "$1"; find . | cpio -o -H newc | gzip) > "$2"
+ else
+ echo "First argument must be a directory"
+ exit 1
+ fi
+
+Note: The cpio man page contains some bad advice that will break your initramfs
+archive if you follow it. It says "A typical way to generate the list
+of filenames is with the find command; you should give find the -depth option
+to minimize problems with permissions on directories that are unwritable or not
+searchable." Don't do this when creating initramfs.cpio.gz images, it won't
+work. The Linux kernel cpio extractor won't create files in a directory that
+doesn't exist, so the directory entries must go before the files that go in
+those directories. The above script gets them in the right order.
+
+External initramfs images:
+--------------------------
+
+If the kernel has initrd support enabled, an external cpio.gz archive can also
+be passed into a 2.6 kernel in place of an initrd. In this case, the kernel
+will autodetect the type (initramfs, not initrd) and extract the external cpio
+archive into rootfs before trying to run /init.
+
+This has the memory efficiency advantages of initramfs (no ramdisk block
+device) but the separate packaging of initrd (which is nice if you have
+non-GPL code you'd like to run from initramfs, without conflating it with
+the GPL licensed Linux kernel binary).
+
+It can also be used to supplement the kernel's built-in initramfs image. The
+files in the external archive will overwrite any conflicting files in
+the built-in initramfs archive. Some distributors also prefer to customize
+a single kernel image with task-specific initramfs images, without recompiling.
+
+Contents of initramfs:
+----------------------
+
+An initramfs archive is a complete self-contained root filesystem for Linux.
+If you don't already understand what shared libraries, devices, and paths
+you need to get a minimal root filesystem up and running, here are some
+references:
+http://www.tldp.org/HOWTO/Bootdisk-HOWTO/
+http://www.tldp.org/HOWTO/From-PowerUp-To-Bash-Prompt-HOWTO.html
+http://www.linuxfromscratch.org/lfs/view/stable/
+
+The "klibc" package (http://www.kernel.org/pub/linux/libs/klibc) is
+designed to be a tiny C library to statically link early userspace
+code against, along with some related utilities. It is BSD licensed.
+
+I use uClibc (http://www.uclibc.org) and busybox (http://www.busybox.net)
+myself. These are LGPL and GPL, respectively. (A self-contained initramfs
+package is planned for the busybox 1.3 release.)
+
+In theory you could use glibc, but that's not well suited for small embedded
+uses like this. (A "hello world" program statically linked against glibc is
+over 400k. With uClibc it's 7k. Also note that glibc dlopens libnss to do
+name lookups, even when otherwise statically linked.)
+
+A good first step is to get initramfs to run a statically linked "hello world"
+program as init, and test it under an emulator like qemu (www.qemu.org) or
+User Mode Linux, like so:
+
+ cat > hello.c << EOF
+ #include <stdio.h>
+ #include <unistd.h>
+
+ int main(int argc, char *argv[])
+ {
+ printf("Hello world!\n");
+ sleep(999999999);
+ }
+ EOF
+ gcc -static hello.c -o init
+ echo init | cpio -o -H newc | gzip > test.cpio.gz
+ # Testing external initramfs using the initrd loading mechanism.
+ qemu -kernel /boot/vmlinuz -initrd test.cpio.gz /dev/zero
+
+When debugging a normal root filesystem, it's nice to be able to boot with
+"init=/bin/sh". The initramfs equivalent is "rdinit=/bin/sh", and it's
+just as useful.
+
+Why cpio rather than tar?
+-------------------------
+
+This decision was made back in December, 2001. The discussion started here:
+
+ http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1538.html
+
+And spawned a second thread (specifically on tar vs cpio), starting here:
+
+ http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1587.html
+
+The quick and dirty summary version (which is no substitute for reading
+the above threads) is:
+
+1) cpio is a standard. It's decades old (from the AT&T days), and already
+ widely used on Linux (inside RPM, Red Hat's device driver disks). Here's
+ a Linux Journal article about it from 1996:
+
+ http://www.linuxjournal.com/article/1213
+
+ It's not as popular as tar because the traditional cpio command line tools
+ require _truly_hideous_ command line arguments. But that says nothing
+ either way about the archive format, and there are alternative tools,
+ such as:
+
+ http://freecode.com/projects/afio
+
+2) The cpio archive format chosen by the kernel is simpler and cleaner (and
+ thus easier to create and parse) than any of the (literally dozens of)
+ various tar archive formats. The complete initramfs archive format is
+ explained in buffer-format.txt, created in usr/gen_init_cpio.c, and
+ extracted in init/initramfs.c. All three together come to less than 26k
+ total of human-readable text.
+
+3) The GNU project standardizing on tar is approximately as relevant as
+ Windows standardizing on zip. Linux is not part of either, and is free
+ to make its own technical decisions.
+
+4) Since this is a kernel internal format, it could easily have been
+ something brand new. The kernel provides its own tools to create and
+ extract this format anyway. Using an existing standard was preferable,
+ but not essential.
+
+5) Al Viro made the decision (quote: "tar is ugly as hell and not going to be
+ supported on the kernel side"):
+
+ http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1540.html
+
+ explained his reasoning:
+
+ http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1550.html
+ http://www.uwsg.iu.edu/hypermail/linux/kernel/0112.2/1638.html
+
+ and, most importantly, designed and implemented the initramfs code.
+
+Future directions:
+------------------
+
+Today (2.6.16), initramfs is always compiled in, but not always used. The
+kernel falls back to legacy boot code that is reached only if initramfs does
+not contain an /init program. The fallback is legacy code, there to ensure a
+smooth transition and allowing early boot functionality to gradually move to
+"early userspace" (I.E. initramfs).
+
+The move to early userspace is necessary because finding and mounting the real
+root device is complex. Root partitions can span multiple devices (raid or
+separate journal). They can be out on the network (requiring dhcp, setting a
+specific MAC address, logging into a server, etc). They can live on removable
+media, with dynamically allocated major/minor numbers and persistent naming
+issues requiring a full udev implementation to sort out. They can be
+compressed, encrypted, copy-on-write, loopback mounted, strangely partitioned,
+and so on.
+
+This kind of complexity (which inevitably includes policy) is rightly handled
+in userspace. Both klibc and busybox/uClibc are working on simple initramfs
+packages to drop into a kernel build.
+
+The klibc package has now been accepted into Andrew Morton's 2.6.17-mm tree.
+The kernel's current early boot code (partition detection, etc) will probably
+be migrated into a default initramfs, automatically created and used by the
+kernel build.
diff --git a/Documentation/filesystems/relay.txt b/Documentation/filesystems/relay.txt
new file mode 100644
index 000000000..33e2f3694
--- /dev/null
+++ b/Documentation/filesystems/relay.txt
@@ -0,0 +1,494 @@
+relay interface (formerly relayfs)
+==================================
+
+The relay interface provides a means for kernel applications to
+efficiently log and transfer large quantities of data from the kernel
+to userspace via user-defined 'relay channels'.
+
+A 'relay channel' is a kernel->user data relay mechanism implemented
+as a set of per-cpu kernel buffers ('channel buffers'), each
+represented as a regular file ('relay file') in user space. Kernel
+clients write into the channel buffers using efficient write
+functions; these automatically log into the current cpu's channel
+buffer. User space applications mmap() or read() from the relay files
+and retrieve the data as it becomes available. The relay files
+themselves are files created in a host filesystem, e.g. debugfs, and
+are associated with the channel buffers using the API described below.
+
+The format of the data logged into the channel buffers is completely
+up to the kernel client; the relay interface does however provide
+hooks which allow kernel clients to impose some structure on the
+buffer data. The relay interface doesn't implement any form of data
+filtering - this also is left to the kernel client. The purpose is to
+keep things as simple as possible.
+
+This document provides an overview of the relay interface API. The
+details of the function parameters are documented along with the
+functions in the relay interface code - please see that for details.
+
+Semantics
+=========
+
+Each relay channel has one buffer per CPU, each buffer has one or more
+sub-buffers. Messages are written to the first sub-buffer until it is
+too full to contain a new message, in which case it is written to
+the next (if available). Messages are never split across sub-buffers.
+At this point, userspace can be notified so it empties the first
+sub-buffer, while the kernel continues writing to the next.
+
+When notified that a sub-buffer is full, the kernel knows how many
+bytes of it are padding i.e. unused space occurring because a complete
+message couldn't fit into a sub-buffer. Userspace can use this
+knowledge to copy only valid data.
+
+After copying it, userspace can notify the kernel that a sub-buffer
+has been consumed.
+
+A relay channel can operate in a mode where it will overwrite data not
+yet collected by userspace, and not wait for it to be consumed.
+
+The relay channel itself does not provide for communication of such
+data between userspace and kernel, allowing the kernel side to remain
+simple and not impose a single interface on userspace. It does
+provide a set of examples and a separate helper though, described
+below.
+
+The read() interface both removes padding and internally consumes the
+read sub-buffers; thus in cases where read(2) is being used to drain
+the channel buffers, special-purpose communication between kernel and
+user isn't necessary for basic operation.
+
+One of the major goals of the relay interface is to provide a low
+overhead mechanism for conveying kernel data to userspace. While the
+read() interface is easy to use, it's not as efficient as the mmap()
+approach; the example code attempts to make the tradeoff between the
+two approaches as small as possible.
+
+klog and relay-apps example code
+================================
+
+The relay interface itself is ready to use, but to make things easier,
+a couple simple utility functions and a set of examples are provided.
+
+The relay-apps example tarball, available on the relay sourceforge
+site, contains a set of self-contained examples, each consisting of a
+pair of .c files containing boilerplate code for each of the user and
+kernel sides of a relay application. When combined these two sets of
+boilerplate code provide glue to easily stream data to disk, without
+having to bother with mundane housekeeping chores.
+
+The 'klog debugging functions' patch (klog.patch in the relay-apps
+tarball) provides a couple of high-level logging functions to the
+kernel which allow writing formatted text or raw data to a channel,
+regardless of whether a channel to write into exists or not, or even
+whether the relay interface is compiled into the kernel or not. These
+functions allow you to put unconditional 'trace' statements anywhere
+in the kernel or kernel modules; only when there is a 'klog handler'
+registered will data actually be logged (see the klog and kleak
+examples for details).
+
+It is of course possible to use the relay interface from scratch,
+i.e. without using any of the relay-apps example code or klog, but
+you'll have to implement communication between userspace and kernel,
+allowing both to convey the state of buffers (full, empty, amount of
+padding). The read() interface both removes padding and internally
+consumes the read sub-buffers; thus in cases where read(2) is being
+used to drain the channel buffers, special-purpose communication
+between kernel and user isn't necessary for basic operation. Things
+such as buffer-full conditions would still need to be communicated via
+some channel though.
+
+klog and the relay-apps examples can be found in the relay-apps
+tarball on http://relayfs.sourceforge.net
+
+The relay interface user space API
+==================================
+
+The relay interface implements basic file operations for user space
+access to relay channel buffer data. Here are the file operations
+that are available and some comments regarding their behavior:
+
+open() enables user to open an _existing_ channel buffer.
+
+mmap() results in channel buffer being mapped into the caller's
+ memory space. Note that you can't do a partial mmap - you
+ must map the entire file, which is NRBUF * SUBBUFSIZE.
+
+read() read the contents of a channel buffer. The bytes read are
+ 'consumed' by the reader, i.e. they won't be available
+ again to subsequent reads. If the channel is being used
+ in no-overwrite mode (the default), it can be read at any
+ time even if there's an active kernel writer. If the
+ channel is being used in overwrite mode and there are
+ active channel writers, results may be unpredictable -
+ users should make sure that all logging to the channel has
+ ended before using read() with overwrite mode. Sub-buffer
+ padding is automatically removed and will not be seen by
+ the reader.
+
+sendfile() transfer data from a channel buffer to an output file
+ descriptor. Sub-buffer padding is automatically removed
+ and will not be seen by the reader.
+
+poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are
+ notified when sub-buffer boundaries are crossed.
+
+close() decrements the channel buffer's refcount. When the refcount
+ reaches 0, i.e. when no process or kernel client has the
+ buffer open, the channel buffer is freed.
+
+In order for a user application to make use of relay files, the
+host filesystem must be mounted. For example,
+
+ mount -t debugfs debugfs /sys/kernel/debug
+
+NOTE: the host filesystem doesn't need to be mounted for kernel
+ clients to create or use channels - it only needs to be
+ mounted when user space applications need access to the buffer
+ data.
+
+
+The relay interface kernel API
+==============================
+
+Here's a summary of the API the relay interface provides to in-kernel clients:
+
+TBD(curr. line MT:/API/)
+ channel management functions:
+
+ relay_open(base_filename, parent, subbuf_size, n_subbufs,
+ callbacks, private_data)
+ relay_close(chan)
+ relay_flush(chan)
+ relay_reset(chan)
+
+ channel management typically called on instigation of userspace:
+
+ relay_subbufs_consumed(chan, cpu, subbufs_consumed)
+
+ write functions:
+
+ relay_write(chan, data, length)
+ __relay_write(chan, data, length)
+ relay_reserve(chan, length)
+
+ callbacks:
+
+ subbuf_start(buf, subbuf, prev_subbuf, prev_padding)
+ buf_mapped(buf, filp)
+ buf_unmapped(buf, filp)
+ create_buf_file(filename, parent, mode, buf, is_global)
+ remove_buf_file(dentry)
+
+ helper functions:
+
+ relay_buf_full(buf)
+ subbuf_start_reserve(buf, length)
+
+
+Creating a channel
+------------------
+
+relay_open() is used to create a channel, along with its per-cpu
+channel buffers. Each channel buffer will have an associated file
+created for it in the host filesystem, which can be and mmapped or
+read from in user space. The files are named basename0...basenameN-1
+where N is the number of online cpus, and by default will be created
+in the root of the filesystem (if the parent param is NULL). If you
+want a directory structure to contain your relay files, you should
+create it using the host filesystem's directory creation function,
+e.g. debugfs_create_dir(), and pass the parent directory to
+relay_open(). Users are responsible for cleaning up any directory
+structure they create, when the channel is closed - again the host
+filesystem's directory removal functions should be used for that,
+e.g. debugfs_remove().
+
+In order for a channel to be created and the host filesystem's files
+associated with its channel buffers, the user must provide definitions
+for two callback functions, create_buf_file() and remove_buf_file().
+create_buf_file() is called once for each per-cpu buffer from
+relay_open() and allows the user to create the file which will be used
+to represent the corresponding channel buffer. The callback should
+return the dentry of the file created to represent the channel buffer.
+remove_buf_file() must also be defined; it's responsible for deleting
+the file(s) created in create_buf_file() and is called during
+relay_close().
+
+Here are some typical definitions for these callbacks, in this case
+using debugfs:
+
+/*
+ * create_buf_file() callback. Creates relay file in debugfs.
+ */
+static struct dentry *create_buf_file_handler(const char *filename,
+ struct dentry *parent,
+ int mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+/*
+ * remove_buf_file() callback. Removes relay file from debugfs.
+ */
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+
+ return 0;
+}
+
+/*
+ * relay interface callbacks
+ */
+static struct rchan_callbacks relay_callbacks =
+{
+ .create_buf_file = create_buf_file_handler,
+ .remove_buf_file = remove_buf_file_handler,
+};
+
+And an example relay_open() invocation using them:
+
+ chan = relay_open("cpu", NULL, SUBBUF_SIZE, N_SUBBUFS, &relay_callbacks, NULL);
+
+If the create_buf_file() callback fails, or isn't defined, channel
+creation and thus relay_open() will fail.
+
+The total size of each per-cpu buffer is calculated by multiplying the
+number of sub-buffers by the sub-buffer size passed into relay_open().
+The idea behind sub-buffers is that they're basically an extension of
+double-buffering to N buffers, and they also allow applications to
+easily implement random-access-on-buffer-boundary schemes, which can
+be important for some high-volume applications. The number and size
+of sub-buffers is completely dependent on the application and even for
+the same application, different conditions will warrant different
+values for these parameters at different times. Typically, the right
+values to use are best decided after some experimentation; in general,
+though, it's safe to assume that having only 1 sub-buffer is a bad
+idea - you're guaranteed to either overwrite data or lose events
+depending on the channel mode being used.
+
+The create_buf_file() implementation can also be defined in such a way
+as to allow the creation of a single 'global' buffer instead of the
+default per-cpu set. This can be useful for applications interested
+mainly in seeing the relative ordering of system-wide events without
+the need to bother with saving explicit timestamps for the purpose of
+merging/sorting per-cpu files in a postprocessing step.
+
+To have relay_open() create a global buffer, the create_buf_file()
+implementation should set the value of the is_global outparam to a
+non-zero value in addition to creating the file that will be used to
+represent the single buffer. In the case of a global buffer,
+create_buf_file() and remove_buf_file() will be called only once. The
+normal channel-writing functions, e.g. relay_write(), can still be
+used - writes from any cpu will transparently end up in the global
+buffer - but since it is a global buffer, callers should make sure
+they use the proper locking for such a buffer, either by wrapping
+writes in a spinlock, or by copying a write function from relay.h and
+creating a local version that internally does the proper locking.
+
+The private_data passed into relay_open() allows clients to associate
+user-defined data with a channel, and is immediately available
+(including in create_buf_file()) via chan->private_data or
+buf->chan->private_data.
+
+Buffer-only channels
+--------------------
+
+These channels have no files associated and can be created with
+relay_open(NULL, NULL, ...). Such channels are useful in scenarios such
+as when doing early tracing in the kernel, before the VFS is up. In these
+cases, one may open a buffer-only channel and then call
+relay_late_setup_files() when the kernel is ready to handle files,
+to expose the buffered data to the userspace.
+
+Channel 'modes'
+---------------
+
+relay channels can be used in either of two modes - 'overwrite' or
+'no-overwrite'. The mode is entirely determined by the implementation
+of the subbuf_start() callback, as described below. The default if no
+subbuf_start() callback is defined is 'no-overwrite' mode. If the
+default mode suits your needs, and you plan to use the read()
+interface to retrieve channel data, you can ignore the details of this
+section, as it pertains mainly to mmap() implementations.
+
+In 'overwrite' mode, also known as 'flight recorder' mode, writes
+continuously cycle around the buffer and will never fail, but will
+unconditionally overwrite old data regardless of whether it's actually
+been consumed. In no-overwrite mode, writes will fail, i.e. data will
+be lost, if the number of unconsumed sub-buffers equals the total
+number of sub-buffers in the channel. It should be clear that if
+there is no consumer or if the consumer can't consume sub-buffers fast
+enough, data will be lost in either case; the only difference is
+whether data is lost from the beginning or the end of a buffer.
+
+As explained above, a relay channel is made of up one or more
+per-cpu channel buffers, each implemented as a circular buffer
+subdivided into one or more sub-buffers. Messages are written into
+the current sub-buffer of the channel's current per-cpu buffer via the
+write functions described below. Whenever a message can't fit into
+the current sub-buffer, because there's no room left for it, the
+client is notified via the subbuf_start() callback that a switch to a
+new sub-buffer is about to occur. The client uses this callback to 1)
+initialize the next sub-buffer if appropriate 2) finalize the previous
+sub-buffer if appropriate and 3) return a boolean value indicating
+whether or not to actually move on to the next sub-buffer.
+
+To implement 'no-overwrite' mode, the userspace client would provide
+an implementation of the subbuf_start() callback something like the
+following:
+
+static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ unsigned int prev_padding)
+{
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
+
+ if (relay_buf_full(buf))
+ return 0;
+
+ subbuf_start_reserve(buf, sizeof(unsigned int));
+
+ return 1;
+}
+
+If the current buffer is full, i.e. all sub-buffers remain unconsumed,
+the callback returns 0 to indicate that the buffer switch should not
+occur yet, i.e. until the consumer has had a chance to read the
+current set of ready sub-buffers. For the relay_buf_full() function
+to make sense, the consumer is responsible for notifying the relay
+interface when sub-buffers have been consumed via
+relay_subbufs_consumed(). Any subsequent attempts to write into the
+buffer will again invoke the subbuf_start() callback with the same
+parameters; only when the consumer has consumed one or more of the
+ready sub-buffers will relay_buf_full() return 0, in which case the
+buffer switch can continue.
+
+The implementation of the subbuf_start() callback for 'overwrite' mode
+would be very similar:
+
+static int subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ unsigned int prev_padding)
+{
+ if (prev_subbuf)
+ *((unsigned *)prev_subbuf) = prev_padding;
+
+ subbuf_start_reserve(buf, sizeof(unsigned int));
+
+ return 1;
+}
+
+In this case, the relay_buf_full() check is meaningless and the
+callback always returns 1, causing the buffer switch to occur
+unconditionally. It's also meaningless for the client to use the
+relay_subbufs_consumed() function in this mode, as it's never
+consulted.
+
+The default subbuf_start() implementation, used if the client doesn't
+define any callbacks, or doesn't define the subbuf_start() callback,
+implements the simplest possible 'no-overwrite' mode, i.e. it does
+nothing but return 0.
+
+Header information can be reserved at the beginning of each sub-buffer
+by calling the subbuf_start_reserve() helper function from within the
+subbuf_start() callback. This reserved area can be used to store
+whatever information the client wants. In the example above, room is
+reserved in each sub-buffer to store the padding count for that
+sub-buffer. This is filled in for the previous sub-buffer in the
+subbuf_start() implementation; the padding value for the previous
+sub-buffer is passed into the subbuf_start() callback along with a
+pointer to the previous sub-buffer, since the padding value isn't
+known until a sub-buffer is filled. The subbuf_start() callback is
+also called for the first sub-buffer when the channel is opened, to
+give the client a chance to reserve space in it. In this case the
+previous sub-buffer pointer passed into the callback will be NULL, so
+the client should check the value of the prev_subbuf pointer before
+writing into the previous sub-buffer.
+
+Writing to a channel
+--------------------
+
+Kernel clients write data into the current cpu's channel buffer using
+relay_write() or __relay_write(). relay_write() is the main logging
+function - it uses local_irqsave() to protect the buffer and should be
+used if you might be logging from interrupt context. If you know
+you'll never be logging from interrupt context, you can use
+__relay_write(), which only disables preemption. These functions
+don't return a value, so you can't determine whether or not they
+failed - the assumption is that you wouldn't want to check a return
+value in the fast logging path anyway, and that they'll always succeed
+unless the buffer is full and no-overwrite mode is being used, in
+which case you can detect a failed write in the subbuf_start()
+callback by calling the relay_buf_full() helper function.
+
+relay_reserve() is used to reserve a slot in a channel buffer which
+can be written to later. This would typically be used in applications
+that need to write directly into a channel buffer without having to
+stage data in a temporary buffer beforehand. Because the actual write
+may not happen immediately after the slot is reserved, applications
+using relay_reserve() can keep a count of the number of bytes actually
+written, either in space reserved in the sub-buffers themselves or as
+a separate array. See the 'reserve' example in the relay-apps tarball
+at http://relayfs.sourceforge.net for an example of how this can be
+done. Because the write is under control of the client and is
+separated from the reserve, relay_reserve() doesn't protect the buffer
+at all - it's up to the client to provide the appropriate
+synchronization when using relay_reserve().
+
+Closing a channel
+-----------------
+
+The client calls relay_close() when it's finished using the channel.
+The channel and its associated buffers are destroyed when there are no
+longer any references to any of the channel buffers. relay_flush()
+forces a sub-buffer switch on all the channel buffers, and can be used
+to finalize and process the last sub-buffers before the channel is
+closed.
+
+Misc
+----
+
+Some applications may want to keep a channel around and re-use it
+rather than open and close a new channel for each use. relay_reset()
+can be used for this purpose - it resets a channel to its initial
+state without reallocating channel buffer memory or destroying
+existing mappings. It should however only be called when it's safe to
+do so, i.e. when the channel isn't currently being written to.
+
+Finally, there are a couple of utility callbacks that can be used for
+different purposes. buf_mapped() is called whenever a channel buffer
+is mmapped from user space and buf_unmapped() is called when it's
+unmapped. The client can use this notification to trigger actions
+within the kernel application, such as enabling/disabling logging to
+the channel.
+
+
+Resources
+=========
+
+For news, example code, mailing list, etc. see the relay interface homepage:
+
+ http://relayfs.sourceforge.net
+
+
+Credits
+=======
+
+The ideas and specs for the relay interface came about as a result of
+discussions on tracing involving the following:
+
+Michel Dagenais <michel.dagenais@polymtl.ca>
+Richard Moore <richardj_moore@uk.ibm.com>
+Bob Wisniewski <bob@watson.ibm.com>
+Karim Yaghmour <karim@opersys.com>
+Tom Zanussi <zanussi@us.ibm.com>
+
+Also thanks to Hubertus Franke for a lot of useful suggestions and bug
+reports.
diff --git a/Documentation/filesystems/romfs.txt b/Documentation/filesystems/romfs.txt
new file mode 100644
index 000000000..e2b07cc91
--- /dev/null
+++ b/Documentation/filesystems/romfs.txt
@@ -0,0 +1,186 @@
+ROMFS - ROM FILE SYSTEM
+
+This is a quite dumb, read only filesystem, mainly for initial RAM
+disks of installation disks. It has grown up by the need of having
+modules linked at boot time. Using this filesystem, you get a very
+similar feature, and even the possibility of a small kernel, with a
+file system which doesn't take up useful memory from the router
+functions in the basement of your office.
+
+For comparison, both the older minix and xiafs (the latter is now
+defunct) filesystems, compiled as module need more than 20000 bytes,
+while romfs is less than a page, about 4000 bytes (assuming i586
+code). Under the same conditions, the msdos filesystem would need
+about 30K (and does not support device nodes or symlinks), while the
+nfs module with nfsroot is about 57K. Furthermore, as a bit unfair
+comparison, an actual rescue disk used up 3202 blocks with ext2, while
+with romfs, it needed 3079 blocks.
+
+To create such a file system, you'll need a user program named
+genromfs. It is available on http://romfs.sourceforge.net/
+
+As the name suggests, romfs could be also used (space-efficiently) on
+various read-only media, like (E)EPROM disks if someone will have the
+motivation.. :)
+
+However, the main purpose of romfs is to have a very small kernel,
+which has only this filesystem linked in, and then can load any module
+later, with the current module utilities. It can also be used to run
+some program to decide if you need SCSI devices, and even IDE or
+floppy drives can be loaded later if you use the "initrd"--initial
+RAM disk--feature of the kernel. This would not be really news
+flash, but with romfs, you can even spare off your ext2 or minix or
+maybe even affs filesystem until you really know that you need it.
+
+For example, a distribution boot disk can contain only the cd disk
+drivers (and possibly the SCSI drivers), and the ISO 9660 filesystem
+module. The kernel can be small enough, since it doesn't have other
+filesystems, like the quite large ext2fs module, which can then be
+loaded off the CD at a later stage of the installation. Another use
+would be for a recovery disk, when you are reinstalling a workstation
+from the network, and you will have all the tools/modules available
+from a nearby server, so you don't want to carry two disks for this
+purpose, just because it won't fit into ext2.
+
+romfs operates on block devices as you can expect, and the underlying
+structure is very simple. Every accessible structure begins on 16
+byte boundaries for fast access. The minimum space a file will take
+is 32 bytes (this is an empty file, with a less than 16 character
+name). The maximum overhead for any non-empty file is the header, and
+the 16 byte padding for the name and the contents, also 16+14+15 = 45
+bytes. This is quite rare however, since most file names are longer
+than 3 bytes, and shorter than 15 bytes.
+
+The layout of the filesystem is the following:
+
+offset content
+
+ +---+---+---+---+
+ 0 | - | r | o | m | \
+ +---+---+---+---+ The ASCII representation of those bytes
+ 4 | 1 | f | s | - | / (i.e. "-rom1fs-")
+ +---+---+---+---+
+ 8 | full size | The number of accessible bytes in this fs.
+ +---+---+---+---+
+ 12 | checksum | The checksum of the FIRST 512 BYTES.
+ +---+---+---+---+
+ 16 | volume name | The zero terminated name of the volume,
+ : : padded to 16 byte boundary.
+ +---+---+---+---+
+ xx | file |
+ : headers :
+
+Every multi byte value (32 bit words, I'll use the longwords term from
+now on) must be in big endian order.
+
+The first eight bytes identify the filesystem, even for the casual
+inspector. After that, in the 3rd longword, it contains the number of
+bytes accessible from the start of this filesystem. The 4th longword
+is the checksum of the first 512 bytes (or the number of bytes
+accessible, whichever is smaller). The applied algorithm is the same
+as in the AFFS filesystem, namely a simple sum of the longwords
+(assuming bigendian quantities again). For details, please consult
+the source. This algorithm was chosen because although it's not quite
+reliable, it does not require any tables, and it is very simple.
+
+The following bytes are now part of the file system; each file header
+must begin on a 16 byte boundary.
+
+offset content
+
+ +---+---+---+---+
+ 0 | next filehdr|X| The offset of the next file header
+ +---+---+---+---+ (zero if no more files)
+ 4 | spec.info | Info for directories/hard links/devices
+ +---+---+---+---+
+ 8 | size | The size of this file in bytes
+ +---+---+---+---+
+ 12 | checksum | Covering the meta data, including the file
+ +---+---+---+---+ name, and padding
+ 16 | file name | The zero terminated name of the file,
+ : : padded to 16 byte boundary
+ +---+---+---+---+
+ xx | file data |
+ : :
+
+Since the file headers begin always at a 16 byte boundary, the lowest
+4 bits would be always zero in the next filehdr pointer. These four
+bits are used for the mode information. Bits 0..2 specify the type of
+the file; while bit 4 shows if the file is executable or not. The
+permissions are assumed to be world readable, if this bit is not set,
+and world executable if it is; except the character and block devices,
+they are never accessible for other than owner. The owner of every
+file is user and group 0, this should never be a problem for the
+intended use. The mapping of the 8 possible values to file types is
+the following:
+
+ mapping spec.info means
+ 0 hard link link destination [file header]
+ 1 directory first file's header
+ 2 regular file unused, must be zero [MBZ]
+ 3 symbolic link unused, MBZ (file data is the link content)
+ 4 block device 16/16 bits major/minor number
+ 5 char device - " -
+ 6 socket unused, MBZ
+ 7 fifo unused, MBZ
+
+Note that hard links are specifically marked in this filesystem, but
+they will behave as you can expect (i.e. share the inode number).
+Note also that it is your responsibility to not create hard link
+loops, and creating all the . and .. links for directories. This is
+normally done correctly by the genromfs program. Please refrain from
+using the executable bits for special purposes on the socket and fifo
+special files, they may have other uses in the future. Additionally,
+please remember that only regular files, and symlinks are supposed to
+have a nonzero size field; they contain the number of bytes available
+directly after the (padded) file name.
+
+Another thing to note is that romfs works on file headers and data
+aligned to 16 byte boundaries, but most hardware devices and the block
+device drivers are unable to cope with smaller than block-sized data.
+To overcome this limitation, the whole size of the file system must be
+padded to an 1024 byte boundary.
+
+If you have any problems or suggestions concerning this file system,
+please contact me. However, think twice before wanting me to add
+features and code, because the primary and most important advantage of
+this file system is the small code. On the other hand, don't be
+alarmed, I'm not getting that much romfs related mail. Now I can
+understand why Avery wrote poems in the ARCnet docs to get some more
+feedback. :)
+
+romfs has also a mailing list, and to date, it hasn't received any
+traffic, so you are welcome to join it to discuss your ideas. :)
+
+It's run by ezmlm, so you can subscribe to it by sending a message
+to romfs-subscribe@shadow.banki.hu, the content is irrelevant.
+
+Pending issues:
+
+- Permissions and owner information are pretty essential features of a
+Un*x like system, but romfs does not provide the full possibilities.
+I have never found this limiting, but others might.
+
+- The file system is read only, so it can be very small, but in case
+one would want to write _anything_ to a file system, he still needs
+a writable file system, thus negating the size advantages. Possible
+solutions: implement write access as a compile-time option, or a new,
+similarly small writable filesystem for RAM disks.
+
+- Since the files are only required to have alignment on a 16 byte
+boundary, it is currently possibly suboptimal to read or execute files
+from the filesystem. It might be resolved by reordering file data to
+have most of it (i.e. except the start and the end) laying at "natural"
+boundaries, thus it would be possible to directly map a big portion of
+the file contents to the mm subsystem.
+
+- Compression might be an useful feature, but memory is quite a
+limiting factor in my eyes.
+
+- Where it is used?
+
+- Does it work on other architectures than intel and motorola?
+
+
+Have fun,
+Janos Farkas <chexum@shadow.banki.hu>
diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
new file mode 100644
index 000000000..9de430320
--- /dev/null
+++ b/Documentation/filesystems/seq_file.txt
@@ -0,0 +1,338 @@
+The seq_file interface
+
+ Copyright 2003 Jonathan Corbet <corbet@lwn.net>
+ This file is originally from the LWN.net Driver Porting series at
+ http://lwn.net/Articles/driver-porting/
+
+
+There are numerous ways for a device driver (or other kernel component) to
+provide information to the user or system administrator. One useful
+technique is the creation of virtual files, in debugfs, /proc or elsewhere.
+Virtual files can provide human-readable output that is easy to get at
+without any special utility programs; they can also make life easier for
+script writers. It is not surprising that the use of virtual files has
+grown over the years.
+
+Creating those files correctly has always been a bit of a challenge,
+however. It is not that hard to make a virtual file which returns a
+string. But life gets trickier if the output is long - anything greater
+than an application is likely to read in a single operation. Handling
+multiple reads (and seeks) requires careful attention to the reader's
+position within the virtual file - that position is, likely as not, in the
+middle of a line of output. The kernel has traditionally had a number of
+implementations that got this wrong.
+
+The 2.6 kernel contains a set of functions (implemented by Alexander Viro)
+which are designed to make it easy for virtual file creators to get it
+right.
+
+The seq_file interface is available via <linux/seq_file.h>. There are
+three aspects to seq_file:
+
+ * An iterator interface which lets a virtual file implementation
+ step through the objects it is presenting.
+
+ * Some utility functions for formatting objects for output without
+ needing to worry about things like output buffers.
+
+ * A set of canned file_operations which implement most operations on
+ the virtual file.
+
+We'll look at the seq_file interface via an extremely simple example: a
+loadable module which creates a file called /proc/sequence. The file, when
+read, simply produces a set of increasing integer values, one per line. The
+sequence will continue until the user loses patience and finds something
+better to do. The file is seekable, in that one can do something like the
+following:
+
+ dd if=/proc/sequence of=out1 count=1
+ dd if=/proc/sequence skip=1 of=out2 count=1
+
+Then concatenate the output files out1 and out2 and get the right
+result. Yes, it is a thoroughly useless module, but the point is to show
+how the mechanism works without getting lost in other details. (Those
+wanting to see the full source for this module can find it at
+http://lwn.net/Articles/22359/).
+
+Deprecated create_proc_entry
+
+Note that the above article uses create_proc_entry which was removed in
+kernel 3.10. Current versions require the following update
+
+- entry = create_proc_entry("sequence", 0, NULL);
+- if (entry)
+- entry->proc_fops = &ct_file_ops;
++ entry = proc_create("sequence", 0, NULL, &ct_file_ops);
+
+The iterator interface
+
+Modules implementing a virtual file with seq_file must implement a simple
+iterator object that allows stepping through the data of interest.
+Iterators must be able to move to a specific position - like the file they
+implement - but the interpretation of that position is up to the iterator
+itself. A seq_file implementation that is formatting firewall rules, for
+example, could interpret position N as the Nth rule in the chain.
+Positioning can thus be done in whatever way makes the most sense for the
+generator of the data, which need not be aware of how a position translates
+to an offset in the virtual file. The one obvious exception is that a
+position of zero should indicate the beginning of the file.
+
+The /proc/sequence iterator just uses the count of the next number it
+will output as its position.
+
+Four functions must be implemented to make the iterator work. The first,
+called start() takes a position as an argument and returns an iterator
+which will start reading at that position. For our simple sequence example,
+the start() function looks like:
+
+ static void *ct_seq_start(struct seq_file *s, loff_t *pos)
+ {
+ loff_t *spos = kmalloc(sizeof(loff_t), GFP_KERNEL);
+ if (! spos)
+ return NULL;
+ *spos = *pos;
+ return spos;
+ }
+
+The entire data structure for this iterator is a single loff_t value
+holding the current position. There is no upper bound for the sequence
+iterator, but that will not be the case for most other seq_file
+implementations; in most cases the start() function should check for a
+"past end of file" condition and return NULL if need be.
+
+For more complicated applications, the private field of the seq_file
+structure can be used. There is also a special value which can be returned
+by the start() function called SEQ_START_TOKEN; it can be used if you wish
+to instruct your show() function (described below) to print a header at the
+top of the output. SEQ_START_TOKEN should only be used if the offset is
+zero, however.
+
+The next function to implement is called, amazingly, next(); its job is to
+move the iterator forward to the next position in the sequence. The
+example module can simply increment the position by one; more useful
+modules will do what is needed to step through some data structure. The
+next() function returns a new iterator, or NULL if the sequence is
+complete. Here's the example version:
+
+ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+ {
+ loff_t *spos = v;
+ *pos = ++*spos;
+ return spos;
+ }
+
+The stop() function is called when iteration is complete; its job, of
+course, is to clean up. If dynamic memory is allocated for the iterator,
+stop() is the place to free it.
+
+ static void ct_seq_stop(struct seq_file *s, void *v)
+ {
+ kfree(v);
+ }
+
+Finally, the show() function should format the object currently pointed to
+by the iterator for output. The example module's show() function is:
+
+ static int ct_seq_show(struct seq_file *s, void *v)
+ {
+ loff_t *spos = v;
+ seq_printf(s, "%lld\n", (long long)*spos);
+ return 0;
+ }
+
+If all is well, the show() function should return zero. A negative error
+code in the usual manner indicates that something went wrong; it will be
+passed back to user space. This function can also return SEQ_SKIP, which
+causes the current item to be skipped; if the show() function has already
+generated output before returning SEQ_SKIP, that output will be dropped.
+
+We will look at seq_printf() in a moment. But first, the definition of the
+seq_file iterator is finished by creating a seq_operations structure with
+the four functions we have just defined:
+
+ static const struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+ };
+
+This structure will be needed to tie our iterator to the /proc file in
+a little bit.
+
+It's worth noting that the iterator value returned by start() and
+manipulated by the other functions is considered to be completely opaque by
+the seq_file code. It can thus be anything that is useful in stepping
+through the data to be output. Counters can be useful, but it could also be
+a direct pointer into an array or linked list. Anything goes, as long as
+the programmer is aware that things can happen between calls to the
+iterator function. However, the seq_file code (by design) will not sleep
+between the calls to start() and stop(), so holding a lock during that time
+is a reasonable thing to do. The seq_file code will also avoid taking any
+other locks while the iterator is active.
+
+
+Formatted output
+
+The seq_file code manages positioning within the output created by the
+iterator and getting it into the user's buffer. But, for that to work, that
+output must be passed to the seq_file code. Some utility functions have
+been defined which make this task easy.
+
+Most code will simply use seq_printf(), which works pretty much like
+printk(), but which requires the seq_file pointer as an argument.
+
+For straight character output, the following functions may be used:
+
+ seq_putc(struct seq_file *m, char c);
+ seq_puts(struct seq_file *m, const char *s);
+ seq_escape(struct seq_file *m, const char *s, const char *esc);
+
+The first two output a single character and a string, just like one would
+expect. seq_escape() is like seq_puts(), except that any character in s
+which is in the string esc will be represented in octal form in the output.
+
+There are also a pair of functions for printing filenames:
+
+ int seq_path(struct seq_file *m, const struct path *path,
+ const char *esc);
+ int seq_path_root(struct seq_file *m, const struct path *path,
+ const struct path *root, const char *esc)
+
+Here, path indicates the file of interest, and esc is a set of characters
+which should be escaped in the output. A call to seq_path() will output
+the path relative to the current process's filesystem root. If a different
+root is desired, it can be used with seq_path_root(). If it turns out that
+path cannot be reached from root, seq_path_root() returns SEQ_SKIP.
+
+A function producing complicated output may want to check
+ bool seq_has_overflowed(struct seq_file *m);
+and avoid further seq_<output> calls if true is returned.
+
+A true return from seq_has_overflowed means that the seq_file buffer will
+be discarded and the seq_show function will attempt to allocate a larger
+buffer and retry printing.
+
+
+Making it all work
+
+So far, we have a nice set of functions which can produce output within the
+seq_file system, but we have not yet turned them into a file that a user
+can see. Creating a file within the kernel requires, of course, the
+creation of a set of file_operations which implement the operations on that
+file. The seq_file interface provides a set of canned operations which do
+most of the work. The virtual file author still must implement the open()
+method, however, to hook everything up. The open function is often a single
+line, as in the example module:
+
+ static int ct_open(struct inode *inode, struct file *file)
+ {
+ return seq_open(file, &ct_seq_ops);
+ }
+
+Here, the call to seq_open() takes the seq_operations structure we created
+before, and gets set up to iterate through the virtual file.
+
+On a successful open, seq_open() stores the struct seq_file pointer in
+file->private_data. If you have an application where the same iterator can
+be used for more than one file, you can store an arbitrary pointer in the
+private field of the seq_file structure; that value can then be retrieved
+by the iterator functions.
+
+There is also a wrapper function to seq_open() called seq_open_private(). It
+kmallocs a zero filled block of memory and stores a pointer to it in the
+private field of the seq_file structure, returning 0 on success. The
+block size is specified in a third parameter to the function, e.g.:
+
+ static int ct_open(struct inode *inode, struct file *file)
+ {
+ return seq_open_private(file, &ct_seq_ops,
+ sizeof(struct mystruct));
+ }
+
+There is also a variant function, __seq_open_private(), which is functionally
+identical except that, if successful, it returns the pointer to the allocated
+memory block, allowing further initialisation e.g.:
+
+ static int ct_open(struct inode *inode, struct file *file)
+ {
+ struct mystruct *p =
+ __seq_open_private(file, &ct_seq_ops, sizeof(*p));
+
+ if (!p)
+ return -ENOMEM;
+
+ p->foo = bar; /* initialize my stuff */
+ ...
+ p->baz = true;
+
+ return 0;
+ }
+
+A corresponding close function, seq_release_private() is available which
+frees the memory allocated in the corresponding open.
+
+The other operations of interest - read(), llseek(), and release() - are
+all implemented by the seq_file code itself. So a virtual file's
+file_operations structure will look like:
+
+ static const struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+ };
+
+There is also a seq_release_private() which passes the contents of the
+seq_file private field to kfree() before releasing the structure.
+
+The final step is the creation of the /proc file itself. In the example
+code, that is done in the initialization code in the usual way:
+
+ static int ct_init(void)
+ {
+ struct proc_dir_entry *entry;
+
+ proc_create("sequence", 0, NULL, &ct_file_ops);
+ return 0;
+ }
+
+ module_init(ct_init);
+
+And that is pretty much it.
+
+
+seq_list
+
+If your file will be iterating through a linked list, you may find these
+routines useful:
+
+ struct list_head *seq_list_start(struct list_head *head,
+ loff_t pos);
+ struct list_head *seq_list_start_head(struct list_head *head,
+ loff_t pos);
+ struct list_head *seq_list_next(void *v, struct list_head *head,
+ loff_t *ppos);
+
+These helpers will interpret pos as a position within the list and iterate
+accordingly. Your start() and next() functions need only invoke the
+seq_list_* helpers with a pointer to the appropriate list_head structure.
+
+
+The extra-simple version
+
+For extremely simple virtual files, there is an even easier interface. A
+module can define only the show() function, which should create all the
+output that the virtual file will contain. The file's open() method then
+calls:
+
+ int single_open(struct file *file,
+ int (*show)(struct seq_file *m, void *p),
+ void *data);
+
+When output time comes, the show() function will be called once. The data
+value given to single_open() can be found in the private field of the
+seq_file structure. When using single_open(), the programmer should use
+single_release() instead of seq_release() in the file_operations structure
+to avoid a memory leak.
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt
new file mode 100644
index 000000000..32a173dd3
--- /dev/null
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -0,0 +1,939 @@
+Shared Subtrees
+---------------
+
+Contents:
+ 1) Overview
+ 2) Features
+ 3) Setting mount states
+ 4) Use-case
+ 5) Detailed semantics
+ 6) Quiz
+ 7) FAQ
+ 8) Implementation
+
+
+1) Overview
+-----------
+
+Consider the following situation:
+
+A process wants to clone its own namespace, but still wants to access the CD
+that got mounted recently. Shared subtree semantics provide the necessary
+mechanism to accomplish the above.
+
+It provides the necessary building blocks for features like per-user-namespace
+and versioned filesystem.
+
+2) Features
+-----------
+
+Shared subtree provides four different flavors of mounts; struct vfsmount to be
+precise
+
+ a. shared mount
+ b. slave mount
+ c. private mount
+ d. unbindable mount
+
+
+2a) A shared mount can be replicated to as many mountpoints and all the
+replicas continue to be exactly same.
+
+ Here is an example:
+
+ Let's say /mnt has a mount that is shared.
+ mount --make-shared /mnt
+
+ Note: mount(8) command now supports the --make-shared flag,
+ so the sample 'smount' program is no longer needed and has been
+ removed.
+
+ # mount --bind /mnt /tmp
+ The above command replicates the mount at /mnt to the mountpoint /tmp
+ and the contents of both the mounts remain identical.
+
+ #ls /mnt
+ a b c
+
+ #ls /tmp
+ a b c
+
+ Now let's say we mount a device at /tmp/a
+ # mount /dev/sd0 /tmp/a
+
+ #ls /tmp/a
+ t1 t2 t3
+
+ #ls /mnt/a
+ t1 t2 t3
+
+ Note that the mount has propagated to the mount at /mnt as well.
+
+ And the same is true even when /dev/sd0 is mounted on /mnt/a. The
+ contents will be visible under /tmp/a too.
+
+
+2b) A slave mount is like a shared mount except that mount and umount events
+ only propagate towards it.
+
+ All slave mounts have a master mount which is a shared.
+
+ Here is an example:
+
+ Let's say /mnt has a mount which is shared.
+ # mount --make-shared /mnt
+
+ Let's bind mount /mnt to /tmp
+ # mount --bind /mnt /tmp
+
+ the new mount at /tmp becomes a shared mount and it is a replica of
+ the mount at /mnt.
+
+ Now let's make the mount at /tmp; a slave of /mnt
+ # mount --make-slave /tmp
+
+ let's mount /dev/sd0 on /mnt/a
+ # mount /dev/sd0 /mnt/a
+
+ #ls /mnt/a
+ t1 t2 t3
+
+ #ls /tmp/a
+ t1 t2 t3
+
+ Note the mount event has propagated to the mount at /tmp
+
+ However let's see what happens if we mount something on the mount at /tmp
+
+ # mount /dev/sd1 /tmp/b
+
+ #ls /tmp/b
+ s1 s2 s3
+
+ #ls /mnt/b
+
+ Note how the mount event has not propagated to the mount at
+ /mnt
+
+
+2c) A private mount does not forward or receive propagation.
+
+ This is the mount we are familiar with. Its the default type.
+
+
+2d) A unbindable mount is a unbindable private mount
+
+ let's say we have a mount at /mnt and we make is unbindable
+
+ # mount --make-unbindable /mnt
+
+ Let's try to bind mount this mount somewhere else.
+ # mount --bind /mnt /tmp
+ mount: wrong fs type, bad option, bad superblock on /mnt,
+ or too many mounted file systems
+
+ Binding a unbindable mount is a invalid operation.
+
+
+3) Setting mount states
+
+ The mount command (util-linux package) can be used to set mount
+ states:
+
+ mount --make-shared mountpoint
+ mount --make-slave mountpoint
+ mount --make-private mountpoint
+ mount --make-unbindable mountpoint
+
+
+4) Use cases
+------------
+
+ A) A process wants to clone its own namespace, but still wants to
+ access the CD that got mounted recently.
+
+ Solution:
+
+ The system administrator can make the mount at /cdrom shared
+ mount --bind /cdrom /cdrom
+ mount --make-shared /cdrom
+
+ Now any process that clones off a new namespace will have a
+ mount at /cdrom which is a replica of the same mount in the
+ parent namespace.
+
+ So when a CD is inserted and mounted at /cdrom that mount gets
+ propagated to the other mount at /cdrom in all the other clone
+ namespaces.
+
+ B) A process wants its mounts invisible to any other process, but
+ still be able to see the other system mounts.
+
+ Solution:
+
+ To begin with, the administrator can mark the entire mount tree
+ as shareable.
+
+ mount --make-rshared /
+
+ A new process can clone off a new namespace. And mark some part
+ of its namespace as slave
+
+ mount --make-rslave /myprivatetree
+
+ Hence forth any mounts within the /myprivatetree done by the
+ process will not show up in any other namespace. However mounts
+ done in the parent namespace under /myprivatetree still shows
+ up in the process's namespace.
+
+
+ Apart from the above semantics this feature provides the
+ building blocks to solve the following problems:
+
+ C) Per-user namespace
+
+ The above semantics allows a way to share mounts across
+ namespaces. But namespaces are associated with processes. If
+ namespaces are made first class objects with user API to
+ associate/disassociate a namespace with userid, then each user
+ could have his/her own namespace and tailor it to his/her
+ requirements. Offcourse its needs support from PAM.
+
+ D) Versioned files
+
+ If the entire mount tree is visible at multiple locations, then
+ a underlying versioning file system can return different
+ version of the file depending on the path used to access that
+ file.
+
+ An example is:
+
+ mount --make-shared /
+ mount --rbind / /view/v1
+ mount --rbind / /view/v2
+ mount --rbind / /view/v3
+ mount --rbind / /view/v4
+
+ and if /usr has a versioning filesystem mounted, then that
+ mount appears at /view/v1/usr, /view/v2/usr, /view/v3/usr and
+ /view/v4/usr too
+
+ A user can request v3 version of the file /usr/fs/namespace.c
+ by accessing /view/v3/usr/fs/namespace.c . The underlying
+ versioning filesystem can then decipher that v3 version of the
+ filesystem is being requested and return the corresponding
+ inode.
+
+5) Detailed semantics:
+-------------------
+ The section below explains the detailed semantics of
+ bind, rbind, move, mount, umount and clone-namespace operations.
+
+ Note: the word 'vfsmount' and the noun 'mount' have been used
+ to mean the same thing, throughout this document.
+
+5a) Mount states
+
+ A given mount can be in one of the following states
+ 1) shared
+ 2) slave
+ 3) shared and slave
+ 4) private
+ 5) unbindable
+
+ A 'propagation event' is defined as event generated on a vfsmount
+ that leads to mount or unmount actions in other vfsmounts.
+
+ A 'peer group' is defined as a group of vfsmounts that propagate
+ events to each other.
+
+ (1) Shared mounts
+
+ A 'shared mount' is defined as a vfsmount that belongs to a
+ 'peer group'.
+
+ For example:
+ mount --make-shared /mnt
+ mount --bind /mnt /tmp
+
+ The mount at /mnt and that at /tmp are both shared and belong
+ to the same peer group. Anything mounted or unmounted under
+ /mnt or /tmp reflect in all the other mounts of its peer
+ group.
+
+
+ (2) Slave mounts
+
+ A 'slave mount' is defined as a vfsmount that receives
+ propagation events and does not forward propagation events.
+
+ A slave mount as the name implies has a master mount from which
+ mount/unmount events are received. Events do not propagate from
+ the slave mount to the master. Only a shared mount can be made
+ a slave by executing the following command
+
+ mount --make-slave mount
+
+ A shared mount that is made as a slave is no more shared unless
+ modified to become shared.
+
+ (3) Shared and Slave
+
+ A vfsmount can be both shared as well as slave. This state
+ indicates that the mount is a slave of some vfsmount, and
+ has its own peer group too. This vfsmount receives propagation
+ events from its master vfsmount, and also forwards propagation
+ events to its 'peer group' and to its slave vfsmounts.
+
+ Strictly speaking, the vfsmount is shared having its own
+ peer group, and this peer-group is a slave of some other
+ peer group.
+
+ Only a slave vfsmount can be made as 'shared and slave' by
+ either executing the following command
+ mount --make-shared mount
+ or by moving the slave vfsmount under a shared vfsmount.
+
+ (4) Private mount
+
+ A 'private mount' is defined as vfsmount that does not
+ receive or forward any propagation events.
+
+ (5) Unbindable mount
+
+ A 'unbindable mount' is defined as vfsmount that does not
+ receive or forward any propagation events and cannot
+ be bind mounted.
+
+
+ State diagram:
+ The state diagram below explains the state transition of a mount,
+ in response to various commands.
+ ------------------------------------------------------------------------
+ | |make-shared | make-slave | make-private |make-unbindab|
+ --------------|------------|--------------|--------------|-------------|
+ |shared |shared |*slave/private| private | unbindable |
+ | | | | | |
+ |-------------|------------|--------------|--------------|-------------|
+ |slave |shared | **slave | private | unbindable |
+ | |and slave | | | |
+ |-------------|------------|--------------|--------------|-------------|
+ |shared |shared | slave | private | unbindable |
+ |and slave |and slave | | | |
+ |-------------|------------|--------------|--------------|-------------|
+ |private |shared | **private | private | unbindable |
+ |-------------|------------|--------------|--------------|-------------|
+ |unbindable |shared |**unbindable | private | unbindable |
+ ------------------------------------------------------------------------
+
+ * if the shared mount is the only mount in its peer group, making it
+ slave, makes it private automatically. Note that there is no master to
+ which it can be slaved to.
+
+ ** slaving a non-shared mount has no effect on the mount.
+
+ Apart from the commands listed below, the 'move' operation also changes
+ the state of a mount depending on type of the destination mount. Its
+ explained in section 5d.
+
+5b) Bind semantics
+
+ Consider the following command
+
+ mount --bind A/a B/b
+
+ where 'A' is the source mount, 'a' is the dentry in the mount 'A', 'B'
+ is the destination mount and 'b' is the dentry in the destination mount.
+
+ The outcome depends on the type of mount of 'A' and 'B'. The table
+ below contains quick reference.
+ ---------------------------------------------------------------------------
+ | BIND MOUNT OPERATION |
+ |**************************************************************************
+ |source(A)->| shared | private | slave | unbindable |
+ | dest(B) | | | | |
+ | | | | | | |
+ | v | | | | |
+ |**************************************************************************
+ | shared | shared | shared | shared & slave | invalid |
+ | | | | | |
+ |non-shared| shared | private | slave | invalid |
+ ***************************************************************************
+
+ Details:
+
+ 1. 'A' is a shared mount and 'B' is a shared mount. A new mount 'C'
+ which is clone of 'A', is created. Its root dentry is 'a' . 'C' is
+ mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
+ are created and mounted at the dentry 'b' on all mounts where 'B'
+ propagates to. A new propagation tree containing 'C1',..,'Cn' is
+ created. This propagation tree is identical to the propagation tree of
+ 'B'. And finally the peer-group of 'C' is merged with the peer group
+ of 'A'.
+
+ 2. 'A' is a private mount and 'B' is a shared mount. A new mount 'C'
+ which is clone of 'A', is created. Its root dentry is 'a'. 'C' is
+ mounted on mount 'B' at dentry 'b'. Also new mount 'C1', 'C2', 'C3' ...
+ are created and mounted at the dentry 'b' on all mounts where 'B'
+ propagates to. A new propagation tree is set containing all new mounts
+ 'C', 'C1', .., 'Cn' with exactly the same configuration as the
+ propagation tree for 'B'.
+
+ 3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. A new
+ mount 'C' which is clone of 'A', is created. Its root dentry is 'a' .
+ 'C' is mounted on mount 'B' at dentry 'b'. Also new mounts 'C1', 'C2',
+ 'C3' ... are created and mounted at the dentry 'b' on all mounts where
+ 'B' propagates to. A new propagation tree containing the new mounts
+ 'C','C1',.. 'Cn' is created. This propagation tree is identical to the
+ propagation tree for 'B'. And finally the mount 'C' and its peer group
+ is made the slave of mount 'Z'. In other words, mount 'C' is in the
+ state 'slave and shared'.
+
+ 4. 'A' is a unbindable mount and 'B' is a shared mount. This is a
+ invalid operation.
+
+ 5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+ unbindable) mount. A new mount 'C' which is clone of 'A', is created.
+ Its root dentry is 'a'. 'C' is mounted on mount 'B' at dentry 'b'.
+
+ 6. 'A' is a shared mount and 'B' is a non-shared mount. A new mount 'C'
+ which is a clone of 'A' is created. Its root dentry is 'a'. 'C' is
+ mounted on mount 'B' at dentry 'b'. 'C' is made a member of the
+ peer-group of 'A'.
+
+ 7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount. A
+ new mount 'C' which is a clone of 'A' is created. Its root dentry is
+ 'a'. 'C' is mounted on mount 'B' at dentry 'b'. Also 'C' is set as a
+ slave mount of 'Z'. In other words 'A' and 'C' are both slave mounts of
+ 'Z'. All mount/unmount events on 'Z' propagates to 'A' and 'C'. But
+ mount/unmount on 'A' do not propagate anywhere else. Similarly
+ mount/unmount on 'C' do not propagate anywhere else.
+
+ 8. 'A' is a unbindable mount and 'B' is a non-shared mount. This is a
+ invalid operation. A unbindable mount cannot be bind mounted.
+
+5c) Rbind semantics
+
+ rbind is same as bind. Bind replicates the specified mount. Rbind
+ replicates all the mounts in the tree belonging to the specified mount.
+ Rbind mount is bind mount applied to all the mounts in the tree.
+
+ If the source tree that is rbind has some unbindable mounts,
+ then the subtree under the unbindable mount is pruned in the new
+ location.
+
+ eg: let's say we have the following mount tree.
+
+ A
+ / \
+ B C
+ / \ / \
+ D E F G
+
+ Let's say all the mount except the mount C in the tree are
+ of a type other than unbindable.
+
+ If this tree is rbound to say Z
+
+ We will have the following tree at the new location.
+
+ Z
+ |
+ A'
+ /
+ B' Note how the tree under C is pruned
+ / \ in the new location.
+ D' E'
+
+
+
+5d) Move semantics
+
+ Consider the following command
+
+ mount --move A B/b
+
+ where 'A' is the source mount, 'B' is the destination mount and 'b' is
+ the dentry in the destination mount.
+
+ The outcome depends on the type of the mount of 'A' and 'B'. The table
+ below is a quick reference.
+ ---------------------------------------------------------------------------
+ | MOVE MOUNT OPERATION |
+ |**************************************************************************
+ | source(A)->| shared | private | slave | unbindable |
+ | dest(B) | | | | |
+ | | | | | | |
+ | v | | | | |
+ |**************************************************************************
+ | shared | shared | shared |shared and slave| invalid |
+ | | | | | |
+ |non-shared| shared | private | slave | unbindable |
+ ***************************************************************************
+ NOTE: moving a mount residing under a shared mount is invalid.
+
+ Details follow:
+
+ 1. 'A' is a shared mount and 'B' is a shared mount. The mount 'A' is
+ mounted on mount 'B' at dentry 'b'. Also new mounts 'A1', 'A2'...'An'
+ are created and mounted at dentry 'b' on all mounts that receive
+ propagation from mount 'B'. A new propagation tree is created in the
+ exact same configuration as that of 'B'. This new propagation tree
+ contains all the new mounts 'A1', 'A2'... 'An'. And this new
+ propagation tree is appended to the already existing propagation tree
+ of 'A'.
+
+ 2. 'A' is a private mount and 'B' is a shared mount. The mount 'A' is
+ mounted on mount 'B' at dentry 'b'. Also new mount 'A1', 'A2'... 'An'
+ are created and mounted at dentry 'b' on all mounts that receive
+ propagation from mount 'B'. The mount 'A' becomes a shared mount and a
+ propagation tree is created which is identical to that of
+ 'B'. This new propagation tree contains all the new mounts 'A1',
+ 'A2'... 'An'.
+
+ 3. 'A' is a slave mount of mount 'Z' and 'B' is a shared mount. The
+ mount 'A' is mounted on mount 'B' at dentry 'b'. Also new mounts 'A1',
+ 'A2'... 'An' are created and mounted at dentry 'b' on all mounts that
+ receive propagation from mount 'B'. A new propagation tree is created
+ in the exact same configuration as that of 'B'. This new propagation
+ tree contains all the new mounts 'A1', 'A2'... 'An'. And this new
+ propagation tree is appended to the already existing propagation tree of
+ 'A'. Mount 'A' continues to be the slave mount of 'Z' but it also
+ becomes 'shared'.
+
+ 4. 'A' is a unbindable mount and 'B' is a shared mount. The operation
+ is invalid. Because mounting anything on the shared mount 'B' can
+ create new mounts that get mounted on the mounts that receive
+ propagation from 'B'. And since the mount 'A' is unbindable, cloning
+ it to mount at other mountpoints is not possible.
+
+ 5. 'A' is a private mount and 'B' is a non-shared(private or slave or
+ unbindable) mount. The mount 'A' is mounted on mount 'B' at dentry 'b'.
+
+ 6. 'A' is a shared mount and 'B' is a non-shared mount. The mount 'A'
+ is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
+ shared mount.
+
+ 7. 'A' is a slave mount of mount 'Z' and 'B' is a non-shared mount.
+ The mount 'A' is mounted on mount 'B' at dentry 'b'. Mount 'A'
+ continues to be a slave mount of mount 'Z'.
+
+ 8. 'A' is a unbindable mount and 'B' is a non-shared mount. The mount
+ 'A' is mounted on mount 'B' at dentry 'b'. Mount 'A' continues to be a
+ unbindable mount.
+
+5e) Mount semantics
+
+ Consider the following command
+
+ mount device B/b
+
+ 'B' is the destination mount and 'b' is the dentry in the destination
+ mount.
+
+ The above operation is the same as bind operation with the exception
+ that the source mount is always a private mount.
+
+
+5f) Unmount semantics
+
+ Consider the following command
+
+ umount A
+
+ where 'A' is a mount mounted on mount 'B' at dentry 'b'.
+
+ If mount 'B' is shared, then all most-recently-mounted mounts at dentry
+ 'b' on mounts that receive propagation from mount 'B' and does not have
+ sub-mounts within them are unmounted.
+
+ Example: Let's say 'B1', 'B2', 'B3' are shared mounts that propagate to
+ each other.
+
+ let's say 'A1', 'A2', 'A3' are first mounted at dentry 'b' on mount
+ 'B1', 'B2' and 'B3' respectively.
+
+ let's say 'C1', 'C2', 'C3' are next mounted at the same dentry 'b' on
+ mount 'B1', 'B2' and 'B3' respectively.
+
+ if 'C1' is unmounted, all the mounts that are most-recently-mounted on
+ 'B1' and on the mounts that 'B1' propagates-to are unmounted.
+
+ 'B1' propagates to 'B2' and 'B3'. And the most recently mounted mount
+ on 'B2' at dentry 'b' is 'C2', and that of mount 'B3' is 'C3'.
+
+ So all 'C1', 'C2' and 'C3' should be unmounted.
+
+ If any of 'C2' or 'C3' has some child mounts, then that mount is not
+ unmounted, but all other mounts are unmounted. However if 'C1' is told
+ to be unmounted and 'C1' has some sub-mounts, the umount operation is
+ failed entirely.
+
+5g) Clone Namespace
+
+ A cloned namespace contains all the mounts as that of the parent
+ namespace.
+
+ Let's say 'A' and 'B' are the corresponding mounts in the parent and the
+ child namespace.
+
+ If 'A' is shared, then 'B' is also shared and 'A' and 'B' propagate to
+ each other.
+
+ If 'A' is a slave mount of 'Z', then 'B' is also the slave mount of
+ 'Z'.
+
+ If 'A' is a private mount, then 'B' is a private mount too.
+
+ If 'A' is unbindable mount, then 'B' is a unbindable mount too.
+
+
+6) Quiz
+
+ A. What is the result of the following command sequence?
+
+ mount --bind /mnt /mnt
+ mount --make-shared /mnt
+ mount --bind /mnt /tmp
+ mount --move /tmp /mnt/1
+
+ what should be the contents of /mnt /mnt/1 /mnt/1/1 should be?
+ Should they all be identical? or should /mnt and /mnt/1 be
+ identical only?
+
+
+ B. What is the result of the following command sequence?
+
+ mount --make-rshared /
+ mkdir -p /v/1
+ mount --rbind / /v/1
+
+ what should be the content of /v/1/v/1 be?
+
+
+ C. What is the result of the following command sequence?
+
+ mount --bind /mnt /mnt
+ mount --make-shared /mnt
+ mkdir -p /mnt/1/2/3 /mnt/1/test
+ mount --bind /mnt/1 /tmp
+ mount --make-slave /mnt
+ mount --make-shared /mnt
+ mount --bind /mnt/1/2 /tmp1
+ mount --make-slave /mnt
+
+ At this point we have the first mount at /tmp and
+ its root dentry is 1. Let's call this mount 'A'
+ And then we have a second mount at /tmp1 with root
+ dentry 2. Let's call this mount 'B'
+ Next we have a third mount at /mnt with root dentry
+ mnt. Let's call this mount 'C'
+
+ 'B' is the slave of 'A' and 'C' is a slave of 'B'
+ A -> B -> C
+
+ at this point if we execute the following command
+
+ mount --bind /bin /tmp/test
+
+ The mount is attempted on 'A'
+
+ will the mount propagate to 'B' and 'C' ?
+
+ what would be the contents of
+ /mnt/1/test be?
+
+7) FAQ
+
+ Q1. Why is bind mount needed? How is it different from symbolic links?
+ symbolic links can get stale if the destination mount gets
+ unmounted or moved. Bind mounts continue to exist even if the
+ other mount is unmounted or moved.
+
+ Q2. Why can't the shared subtree be implemented using exportfs?
+
+ exportfs is a heavyweight way of accomplishing part of what
+ shared subtree can do. I cannot imagine a way to implement the
+ semantics of slave mount using exportfs?
+
+ Q3 Why is unbindable mount needed?
+
+ Let's say we want to replicate the mount tree at multiple
+ locations within the same subtree.
+
+ if one rbind mounts a tree within the same subtree 'n' times
+ the number of mounts created is an exponential function of 'n'.
+ Having unbindable mount can help prune the unneeded bind
+ mounts. Here is a example.
+
+ step 1:
+ let's say the root tree has just two directories with
+ one vfsmount.
+ root
+ / \
+ tmp usr
+
+ And we want to replicate the tree at multiple
+ mountpoints under /root/tmp
+
+ step2:
+ mount --make-shared /root
+
+ mkdir -p /tmp/m1
+
+ mount --rbind /root /tmp/m1
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ /
+ m1
+ / \
+ tmp usr
+ /
+ m1
+
+ it has two vfsmounts
+
+ step3:
+ mkdir -p /tmp/m2
+ mount --rbind /root /tmp/m2
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ / \
+ m1 m2
+ / \ / \
+ tmp usr tmp usr
+ / \ /
+ m1 m2 m1
+ / \ / \
+ tmp usr tmp usr
+ / / \
+ m1 m1 m2
+ / \
+ tmp usr
+ / \
+ m1 m2
+
+ it has 6 vfsmounts
+
+ step 4:
+ mkdir -p /tmp/m3
+ mount --rbind /root /tmp/m3
+
+ I won't draw the tree..but it has 24 vfsmounts
+
+
+ at step i the number of vfsmounts is V[i] = i*V[i-1].
+ This is an exponential function. And this tree has way more
+ mounts than what we really needed in the first place.
+
+ One could use a series of umount at each step to prune
+ out the unneeded mounts. But there is a better solution.
+ Unclonable mounts come in handy here.
+
+ step 1:
+ let's say the root tree has just two directories with
+ one vfsmount.
+ root
+ / \
+ tmp usr
+
+ How do we set up the same tree at multiple locations under
+ /root/tmp
+
+ step2:
+ mount --bind /root/tmp /root/tmp
+
+ mount --make-rshared /root
+ mount --make-unbindable /root/tmp
+
+ mkdir -p /tmp/m1
+
+ mount --rbind /root /tmp/m1
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ /
+ m1
+ / \
+ tmp usr
+
+ step3:
+ mkdir -p /tmp/m2
+ mount --rbind /root /tmp/m2
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ / \
+ m1 m2
+ / \ / \
+ tmp usr tmp usr
+
+ step4:
+
+ mkdir -p /tmp/m3
+ mount --rbind /root /tmp/m3
+
+ the new tree now looks like this:
+
+ root
+ / \
+ tmp usr
+ / \ \
+ m1 m2 m3
+ / \ / \ / \
+ tmp usr tmp usr tmp usr
+
+8) Implementation
+
+8A) Datastructure
+
+ 4 new fields are introduced to struct vfsmount
+ ->mnt_share
+ ->mnt_slave_list
+ ->mnt_slave
+ ->mnt_master
+
+ ->mnt_share links together all the mount to/from which this vfsmount
+ send/receives propagation events.
+
+ ->mnt_slave_list links all the mounts to which this vfsmount propagates
+ to.
+
+ ->mnt_slave links together all the slaves that its master vfsmount
+ propagates to.
+
+ ->mnt_master points to the master vfsmount from which this vfsmount
+ receives propagation.
+
+ ->mnt_flags takes two more flags to indicate the propagation status of
+ the vfsmount. MNT_SHARE indicates that the vfsmount is a shared
+ vfsmount. MNT_UNCLONABLE indicates that the vfsmount cannot be
+ replicated.
+
+ All the shared vfsmounts in a peer group form a cyclic list through
+ ->mnt_share.
+
+ All vfsmounts with the same ->mnt_master form on a cyclic list anchored
+ in ->mnt_master->mnt_slave_list and going through ->mnt_slave.
+
+ ->mnt_master can point to arbitrary (and possibly different) members
+ of master peer group. To find all immediate slaves of a peer group
+ you need to go through _all_ ->mnt_slave_list of its members.
+ Conceptually it's just a single set - distribution among the
+ individual lists does not affect propagation or the way propagation
+ tree is modified by operations.
+
+ All vfsmounts in a peer group have the same ->mnt_master. If it is
+ non-NULL, they form a contiguous (ordered) segment of slave list.
+
+ A example propagation tree looks as shown in the figure below.
+ [ NOTE: Though it looks like a forest, if we consider all the shared
+ mounts as a conceptual entity called 'pnode', it becomes a tree]
+
+
+ A <--> B <--> C <---> D
+ /|\ /| |\
+ / F G J K H I
+ /
+ E<-->K
+ /|\
+ M L N
+
+ In the above figure A,B,C and D all are shared and propagate to each
+ other. 'A' has got 3 slave mounts 'E' 'F' and 'G' 'C' has got 2 slave
+ mounts 'J' and 'K' and 'D' has got two slave mounts 'H' and 'I'.
+ 'E' is also shared with 'K' and they propagate to each other. And
+ 'K' has 3 slaves 'M', 'L' and 'N'
+
+ A's ->mnt_share links with the ->mnt_share of 'B' 'C' and 'D'
+
+ A's ->mnt_slave_list links with ->mnt_slave of 'E', 'K', 'F' and 'G'
+
+ E's ->mnt_share links with ->mnt_share of K
+ 'E', 'K', 'F', 'G' have their ->mnt_master point to struct
+ vfsmount of 'A'
+ 'M', 'L', 'N' have their ->mnt_master point to struct vfsmount of 'K'
+ K's ->mnt_slave_list links with ->mnt_slave of 'M', 'L' and 'N'
+
+ C's ->mnt_slave_list links with ->mnt_slave of 'J' and 'K'
+ J and K's ->mnt_master points to struct vfsmount of C
+ and finally D's ->mnt_slave_list links with ->mnt_slave of 'H' and 'I'
+ 'H' and 'I' have their ->mnt_master pointing to struct vfsmount of 'D'.
+
+
+ NOTE: The propagation tree is orthogonal to the mount tree.
+
+8B Locking:
+
+ ->mnt_share, ->mnt_slave, ->mnt_slave_list, ->mnt_master are protected
+ by namespace_sem (exclusive for modifications, shared for reading).
+
+ Normally we have ->mnt_flags modifications serialized by vfsmount_lock.
+ There are two exceptions: do_add_mount() and clone_mnt().
+ The former modifies a vfsmount that has not been visible in any shared
+ data structures yet.
+ The latter holds namespace_sem and the only references to vfsmount
+ are in lists that can't be traversed without namespace_sem.
+
+8C Algorithm:
+
+ The crux of the implementation resides in rbind/move operation.
+
+ The overall algorithm breaks the operation into 3 phases: (look at
+ attach_recursive_mnt() and propagate_mnt())
+
+ 1. prepare phase.
+ 2. commit phases.
+ 3. abort phases.
+
+ Prepare phase:
+
+ for each mount in the source tree:
+ a) Create the necessary number of mount trees to
+ be attached to each of the mounts that receive
+ propagation from the destination mount.
+ b) Do not attach any of the trees to its destination.
+ However note down its ->mnt_parent and ->mnt_mountpoint
+ c) Link all the new mounts to form a propagation tree that
+ is identical to the propagation tree of the destination
+ mount.
+
+ If this phase is successful, there should be 'n' new
+ propagation trees; where 'n' is the number of mounts in the
+ source tree. Go to the commit phase
+
+ Also there should be 'm' new mount trees, where 'm' is
+ the number of mounts to which the destination mount
+ propagates to.
+
+ if any memory allocations fail, go to the abort phase.
+
+ Commit phase
+ attach each of the mount trees to their corresponding
+ destination mounts.
+
+ Abort phase
+ delete all the newly created trees.
+
+ NOTE: all the propagation related functionality resides in the file
+ pnode.c
+
+
+------------------------------------------------------------------------
+
+version 0.1 (created the initial document, Ram Pai linuxram@us.ibm.com)
+version 0.2 (Incorporated comments from Al Viro)
diff --git a/Documentation/filesystems/spufs.txt b/Documentation/filesystems/spufs.txt
new file mode 100644
index 000000000..1343d118a
--- /dev/null
+++ b/Documentation/filesystems/spufs.txt
@@ -0,0 +1,521 @@
+SPUFS(2) Linux Programmer's Manual SPUFS(2)
+
+
+
+NAME
+ spufs - the SPU file system
+
+
+DESCRIPTION
+ The SPU file system is used on PowerPC machines that implement the Cell
+ Broadband Engine Architecture in order to access Synergistic Processor
+ Units (SPUs).
+
+ The file system provides a name space similar to posix shared memory or
+ message queues. Users that have write permissions on the file system
+ can use spu_create(2) to establish SPU contexts in the spufs root.
+
+ Every SPU context is represented by a directory containing a predefined
+ set of files. These files can be used for manipulating the state of the
+ logical SPU. Users can change permissions on those files, but not actu-
+ ally add or remove files.
+
+
+MOUNT OPTIONS
+ uid=<uid>
+ set the user owning the mount point, the default is 0 (root).
+
+ gid=<gid>
+ set the group owning the mount point, the default is 0 (root).
+
+
+FILES
+ The files in spufs mostly follow the standard behavior for regular sys-
+ tem calls like read(2) or write(2), but often support only a subset of
+ the operations supported on regular file systems. This list details the
+ supported operations and the deviations from the behaviour in the
+ respective man pages.
+
+ All files that support the read(2) operation also support readv(2) and
+ all files that support the write(2) operation also support writev(2).
+ All files support the access(2) and stat(2) family of operations, but
+ only the st_mode, st_nlink, st_uid and st_gid fields of struct stat
+ contain reliable information.
+
+ All files support the chmod(2)/fchmod(2) and chown(2)/fchown(2) opera-
+ tions, but will not be able to grant permissions that contradict the
+ possible operations, e.g. read access on the wbox file.
+
+ The current set of files is:
+
+
+ /mem
+ the contents of the local storage memory of the SPU. This can be
+ accessed like a regular shared memory file and contains both code and
+ data in the address space of the SPU. The possible operations on an
+ open mem file are:
+
+ read(2), pread(2), write(2), pwrite(2), lseek(2)
+ These operate as documented, with the exception that seek(2),
+ write(2) and pwrite(2) are not supported beyond the end of the
+ file. The file size is the size of the local storage of the SPU,
+ which normally is 256 kilobytes.
+
+ mmap(2)
+ Mapping mem into the process address space gives access to the
+ SPU local storage within the process address space. Only
+ MAP_SHARED mappings are allowed.
+
+
+ /mbox
+ The first SPU to CPU communication mailbox. This file is read-only and
+ can be read in units of 32 bits. The file can only be used in non-
+ blocking mode and it even poll() will not block on it. The possible
+ operations on an open mbox file are:
+
+ read(2)
+ If a count smaller than four is requested, read returns -1 and
+ sets errno to EINVAL. If there is no data available in the mail
+ box, the return value is set to -1 and errno becomes EAGAIN.
+ When data has been read successfully, four bytes are placed in
+ the data buffer and the value four is returned.
+
+
+ /ibox
+ The second SPU to CPU communication mailbox. This file is similar to
+ the first mailbox file, but can be read in blocking I/O mode, and the
+ poll family of system calls can be used to wait for it. The possible
+ operations on an open ibox file are:
+
+ read(2)
+ If a count smaller than four is requested, read returns -1 and
+ sets errno to EINVAL. If there is no data available in the mail
+ box and the file descriptor has been opened with O_NONBLOCK, the
+ return value is set to -1 and errno becomes EAGAIN.
+
+ If there is no data available in the mail box and the file
+ descriptor has been opened without O_NONBLOCK, the call will
+ block until the SPU writes to its interrupt mailbox channel.
+ When data has been read successfully, four bytes are placed in
+ the data buffer and the value four is returned.
+
+ poll(2)
+ Poll on the ibox file returns (POLLIN | POLLRDNORM) whenever
+ data is available for reading.
+
+
+ /wbox
+ The CPU to SPU communation mailbox. It is write-only and can be written
+ in units of 32 bits. If the mailbox is full, write() will block and
+ poll can be used to wait for it becoming empty again. The possible
+ operations on an open wbox file are: write(2) If a count smaller than
+ four is requested, write returns -1 and sets errno to EINVAL. If there
+ is no space available in the mail box and the file descriptor has been
+ opened with O_NONBLOCK, the return value is set to -1 and errno becomes
+ EAGAIN.
+
+ If there is no space available in the mail box and the file descriptor
+ has been opened without O_NONBLOCK, the call will block until the SPU
+ reads from its PPE mailbox channel. When data has been read success-
+ fully, four bytes are placed in the data buffer and the value four is
+ returned.
+
+ poll(2)
+ Poll on the ibox file returns (POLLOUT | POLLWRNORM) whenever
+ space is available for writing.
+
+
+ /mbox_stat
+ /ibox_stat
+ /wbox_stat
+ Read-only files that contain the length of the current queue, i.e. how
+ many words can be read from mbox or ibox or how many words can be
+ written to wbox without blocking. The files can be read only in 4-byte
+ units and return a big-endian binary integer number. The possible
+ operations on an open *box_stat file are:
+
+ read(2)
+ If a count smaller than four is requested, read returns -1 and
+ sets errno to EINVAL. Otherwise, a four byte value is placed in
+ the data buffer, containing the number of elements that can be
+ read from (for mbox_stat and ibox_stat) or written to (for
+ wbox_stat) the respective mail box without blocking or resulting
+ in EAGAIN.
+
+
+ /npc
+ /decr
+ /decr_status
+ /spu_tag_mask
+ /event_mask
+ /srr0
+ Internal registers of the SPU. The representation is an ASCII string
+ with the numeric value of the next instruction to be executed. These
+ can be used in read/write mode for debugging, but normal operation of
+ programs should not rely on them because access to any of them except
+ npc requires an SPU context save and is therefore very inefficient.
+
+ The contents of these files are:
+
+ npc Next Program Counter
+
+ decr SPU Decrementer
+
+ decr_status Decrementer Status
+
+ spu_tag_mask MFC tag mask for SPU DMA
+
+ event_mask Event mask for SPU interrupts
+
+ srr0 Interrupt Return address register
+
+
+ The possible operations on an open npc, decr, decr_status,
+ spu_tag_mask, event_mask or srr0 file are:
+
+ read(2)
+ When the count supplied to the read call is shorter than the
+ required length for the pointer value plus a newline character,
+ subsequent reads from the same file descriptor will result in
+ completing the string, regardless of changes to the register by
+ a running SPU task. When a complete string has been read, all
+ subsequent read operations will return zero bytes and a new file
+ descriptor needs to be opened to read the value again.
+
+ write(2)
+ A write operation on the file results in setting the register to
+ the value given in the string. The string is parsed from the
+ beginning to the first non-numeric character or the end of the
+ buffer. Subsequent writes to the same file descriptor overwrite
+ the previous setting.
+
+
+ /fpcr
+ This file gives access to the Floating Point Status and Control Regis-
+ ter as a four byte long file. The operations on the fpcr file are:
+
+ read(2)
+ If a count smaller than four is requested, read returns -1 and
+ sets errno to EINVAL. Otherwise, a four byte value is placed in
+ the data buffer, containing the current value of the fpcr regis-
+ ter.
+
+ write(2)
+ If a count smaller than four is requested, write returns -1 and
+ sets errno to EINVAL. Otherwise, a four byte value is copied
+ from the data buffer, updating the value of the fpcr register.
+
+
+ /signal1
+ /signal2
+ The two signal notification channels of an SPU. These are read-write
+ files that operate on a 32 bit word. Writing to one of these files
+ triggers an interrupt on the SPU. The value written to the signal
+ files can be read from the SPU through a channel read or from host user
+ space through the file. After the value has been read by the SPU, it
+ is reset to zero. The possible operations on an open signal1 or sig-
+ nal2 file are:
+
+ read(2)
+ If a count smaller than four is requested, read returns -1 and
+ sets errno to EINVAL. Otherwise, a four byte value is placed in
+ the data buffer, containing the current value of the specified
+ signal notification register.
+
+ write(2)
+ If a count smaller than four is requested, write returns -1 and
+ sets errno to EINVAL. Otherwise, a four byte value is copied
+ from the data buffer, updating the value of the specified signal
+ notification register. The signal notification register will
+ either be replaced with the input data or will be updated to the
+ bitwise OR or the old value and the input data, depending on the
+ contents of the signal1_type, or signal2_type respectively,
+ file.
+
+
+ /signal1_type
+ /signal2_type
+ These two files change the behavior of the signal1 and signal2 notifi-
+ cation files. The contain a numerical ASCII string which is read as
+ either "1" or "0". In mode 0 (overwrite), the hardware replaces the
+ contents of the signal channel with the data that is written to it. in
+ mode 1 (logical OR), the hardware accumulates the bits that are subse-
+ quently written to it. The possible operations on an open signal1_type
+ or signal2_type file are:
+
+ read(2)
+ When the count supplied to the read call is shorter than the
+ required length for the digit plus a newline character, subse-
+ quent reads from the same file descriptor will result in com-
+ pleting the string. When a complete string has been read, all
+ subsequent read operations will return zero bytes and a new file
+ descriptor needs to be opened to read the value again.
+
+ write(2)
+ A write operation on the file results in setting the register to
+ the value given in the string. The string is parsed from the
+ beginning to the first non-numeric character or the end of the
+ buffer. Subsequent writes to the same file descriptor overwrite
+ the previous setting.
+
+
+EXAMPLES
+ /etc/fstab entry
+ none /spu spufs gid=spu 0 0
+
+
+AUTHORS
+ Arnd Bergmann <arndb@de.ibm.com>, Mark Nutter <mnutter@us.ibm.com>,
+ Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
+
+SEE ALSO
+ capabilities(7), close(2), spu_create(2), spu_run(2), spufs(7)
+
+
+
+Linux 2005-09-28 SPUFS(2)
+
+------------------------------------------------------------------------------
+
+SPU_RUN(2) Linux Programmer's Manual SPU_RUN(2)
+
+
+
+NAME
+ spu_run - execute an spu context
+
+
+SYNOPSIS
+ #include <sys/spu.h>
+
+ int spu_run(int fd, unsigned int *npc, unsigned int *event);
+
+DESCRIPTION
+ The spu_run system call is used on PowerPC machines that implement the
+ Cell Broadband Engine Architecture in order to access Synergistic Pro-
+ cessor Units (SPUs). It uses the fd that was returned from spu_cre-
+ ate(2) to address a specific SPU context. When the context gets sched-
+ uled to a physical SPU, it starts execution at the instruction pointer
+ passed in npc.
+
+ Execution of SPU code happens synchronously, meaning that spu_run does
+ not return while the SPU is still running. If there is a need to exe-
+ cute SPU code in parallel with other code on either the main CPU or
+ other SPUs, you need to create a new thread of execution first, e.g.
+ using the pthread_create(3) call.
+
+ When spu_run returns, the current value of the SPU instruction pointer
+ is written back to npc, so you can call spu_run again without updating
+ the pointers.
+
+ event can be a NULL pointer or point to an extended status code that
+ gets filled when spu_run returns. It can be one of the following con-
+ stants:
+
+ SPE_EVENT_DMA_ALIGNMENT
+ A DMA alignment error
+
+ SPE_EVENT_SPE_DATA_SEGMENT
+ A DMA segmentation error
+
+ SPE_EVENT_SPE_DATA_STORAGE
+ A DMA storage error
+
+ If NULL is passed as the event argument, these errors will result in a
+ signal delivered to the calling process.
+
+RETURN VALUE
+ spu_run returns the value of the spu_status register or -1 to indicate
+ an error and set errno to one of the error codes listed below. The
+ spu_status register value contains a bit mask of status codes and
+ optionally a 14 bit code returned from the stop-and-signal instruction
+ on the SPU. The bit masks for the status codes are:
+
+ 0x02 SPU was stopped by stop-and-signal.
+
+ 0x04 SPU was stopped by halt.
+
+ 0x08 SPU is waiting for a channel.
+
+ 0x10 SPU is in single-step mode.
+
+ 0x20 SPU has tried to execute an invalid instruction.
+
+ 0x40 SPU has tried to access an invalid channel.
+
+ 0x3fff0000
+ The bits masked with this value contain the code returned from
+ stop-and-signal.
+
+ There are always one or more of the lower eight bits set or an error
+ code is returned from spu_run.
+
+ERRORS
+ EAGAIN or EWOULDBLOCK
+ fd is in non-blocking mode and spu_run would block.
+
+ EBADF fd is not a valid file descriptor.
+
+ EFAULT npc is not a valid pointer or status is neither NULL nor a valid
+ pointer.
+
+ EINTR A signal occurred while spu_run was in progress. The npc value
+ has been updated to the new program counter value if necessary.
+
+ EINVAL fd is not a file descriptor returned from spu_create(2).
+
+ ENOMEM Insufficient memory was available to handle a page fault result-
+ ing from an MFC direct memory access.
+
+ ENOSYS the functionality is not provided by the current system, because
+ either the hardware does not provide SPUs or the spufs module is
+ not loaded.
+
+
+NOTES
+ spu_run is meant to be used from libraries that implement a more
+ abstract interface to SPUs, not to be used from regular applications.
+ See http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+ ommended libraries.
+
+
+CONFORMING TO
+ This call is Linux specific and only implemented by the ppc64 architec-
+ ture. Programs using this system call are not portable.
+
+
+BUGS
+ The code does not yet fully implement all features lined out here.
+
+
+AUTHOR
+ Arnd Bergmann <arndb@de.ibm.com>
+
+SEE ALSO
+ capabilities(7), close(2), spu_create(2), spufs(7)
+
+
+
+Linux 2005-09-28 SPU_RUN(2)
+
+------------------------------------------------------------------------------
+
+SPU_CREATE(2) Linux Programmer's Manual SPU_CREATE(2)
+
+
+
+NAME
+ spu_create - create a new spu context
+
+
+SYNOPSIS
+ #include <sys/types.h>
+ #include <sys/spu.h>
+
+ int spu_create(const char *pathname, int flags, mode_t mode);
+
+DESCRIPTION
+ The spu_create system call is used on PowerPC machines that implement
+ the Cell Broadband Engine Architecture in order to access Synergistic
+ Processor Units (SPUs). It creates a new logical context for an SPU in
+ pathname and returns a handle to associated with it. pathname must
+ point to a non-existing directory in the mount point of the SPU file
+ system (spufs). When spu_create is successful, a directory gets cre-
+ ated on pathname and it is populated with files.
+
+ The returned file handle can only be passed to spu_run(2) or closed,
+ other operations are not defined on it. When it is closed, all associ-
+ ated directory entries in spufs are removed. When the last file handle
+ pointing either inside of the context directory or to this file
+ descriptor is closed, the logical SPU context is destroyed.
+
+ The parameter flags can be zero or any bitwise or'd combination of the
+ following constants:
+
+ SPU_RAWIO
+ Allow mapping of some of the hardware registers of the SPU into
+ user space. This flag requires the CAP_SYS_RAWIO capability, see
+ capabilities(7).
+
+ The mode parameter specifies the permissions used for creating the new
+ directory in spufs. mode is modified with the user's umask(2) value
+ and then used for both the directory and the files contained in it. The
+ file permissions mask out some more bits of mode because they typically
+ support only read or write access. See stat(2) for a full list of the
+ possible mode values.
+
+
+RETURN VALUE
+ spu_create returns a new file descriptor. It may return -1 to indicate
+ an error condition and set errno to one of the error codes listed
+ below.
+
+
+ERRORS
+ EACCESS
+ The current user does not have write access on the spufs mount
+ point.
+
+ EEXIST An SPU context already exists at the given path name.
+
+ EFAULT pathname is not a valid string pointer in the current address
+ space.
+
+ EINVAL pathname is not a directory in the spufs mount point.
+
+ ELOOP Too many symlinks were found while resolving pathname.
+
+ EMFILE The process has reached its maximum open file limit.
+
+ ENAMETOOLONG
+ pathname was too long.
+
+ ENFILE The system has reached the global open file limit.
+
+ ENOENT Part of pathname could not be resolved.
+
+ ENOMEM The kernel could not allocate all resources required.
+
+ ENOSPC There are not enough SPU resources available to create a new
+ context or the user specific limit for the number of SPU con-
+ texts has been reached.
+
+ ENOSYS the functionality is not provided by the current system, because
+ either the hardware does not provide SPUs or the spufs module is
+ not loaded.
+
+ ENOTDIR
+ A part of pathname is not a directory.
+
+
+
+NOTES
+ spu_create is meant to be used from libraries that implement a more
+ abstract interface to SPUs, not to be used from regular applications.
+ See http://www.bsc.es/projects/deepcomputing/linuxoncell/ for the rec-
+ ommended libraries.
+
+
+FILES
+ pathname must point to a location beneath the mount point of spufs. By
+ convention, it gets mounted in /spu.
+
+
+CONFORMING TO
+ This call is Linux specific and only implemented by the ppc64 architec-
+ ture. Programs using this system call are not portable.
+
+
+BUGS
+ The code does not yet fully implement all features lined out here.
+
+
+AUTHOR
+ Arnd Bergmann <arndb@de.ibm.com>
+
+SEE ALSO
+ capabilities(7), close(2), spu_run(2), spufs(7)
+
+
+
+Linux 2005-09-28 SPU_CREATE(2)
diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt
new file mode 100644
index 000000000..e5274f84d
--- /dev/null
+++ b/Documentation/filesystems/squashfs.txt
@@ -0,0 +1,259 @@
+SQUASHFS 4.0 FILESYSTEM
+=======================
+
+Squashfs is a compressed read-only filesystem for Linux.
+It uses zlib, lz4, lzo, or xz compression to compress files, inodes and
+directories. Inodes in the system are very small and all blocks are packed to
+minimise data overhead. Block sizes greater than 4K are supported up to a
+maximum of 1Mbytes (default block size 128K).
+
+Squashfs is intended for general read-only filesystem use, for archival
+use (i.e. in cases where a .tar.gz file may be used), and in constrained
+block device/memory systems (e.g. embedded systems) where low overhead is
+needed.
+
+Mailing list: squashfs-devel@lists.sourceforge.net
+Web site: www.squashfs.org
+
+1. FILESYSTEM FEATURES
+----------------------
+
+Squashfs filesystem features versus Cramfs:
+
+ Squashfs Cramfs
+
+Max filesystem size: 2^64 256 MiB
+Max file size: ~ 2 TiB 16 MiB
+Max files: unlimited unlimited
+Max directories: unlimited unlimited
+Max entries per directory: unlimited unlimited
+Max block size: 1 MiB 4 KiB
+Metadata compression: yes no
+Directory indexes: yes no
+Sparse file support: yes no
+Tail-end packing (fragments): yes no
+Exportable (NFS etc.): yes no
+Hard link support: yes no
+"." and ".." in readdir: yes no
+Real inode numbers: yes no
+32-bit uids/gids: yes no
+File creation time: yes no
+Xattr support: yes no
+ACL support: no no
+
+Squashfs compresses data, inodes and directories. In addition, inode and
+directory data are highly compacted, and packed on byte boundaries. Each
+compressed inode is on average 8 bytes in length (the exact length varies on
+file type, i.e. regular file, directory, symbolic link, and block/char device
+inodes have different sizes).
+
+2. USING SQUASHFS
+-----------------
+
+As squashfs is a read-only filesystem, the mksquashfs program must be used to
+create populated squashfs filesystems. This and other squashfs utilities
+can be obtained from http://www.squashfs.org. Usage instructions can be
+obtained from this site also.
+
+The squashfs-tools development tree is now located on kernel.org
+ git://git.kernel.org/pub/scm/fs/squashfs/squashfs-tools.git
+
+3. SQUASHFS FILESYSTEM DESIGN
+-----------------------------
+
+A squashfs filesystem consists of a maximum of nine parts, packed together on a
+byte alignment:
+
+ ---------------
+ | superblock |
+ |---------------|
+ | compression |
+ | options |
+ |---------------|
+ | datablocks |
+ | & fragments |
+ |---------------|
+ | inode table |
+ |---------------|
+ | directory |
+ | table |
+ |---------------|
+ | fragment |
+ | table |
+ |---------------|
+ | export |
+ | table |
+ |---------------|
+ | uid/gid |
+ | lookup table |
+ |---------------|
+ | xattr |
+ | table |
+ ---------------
+
+Compressed data blocks are written to the filesystem as files are read from
+the source directory, and checked for duplicates. Once all file data has been
+written the completed inode, directory, fragment, export, uid/gid lookup and
+xattr tables are written.
+
+3.1 Compression options
+-----------------------
+
+Compressors can optionally support compression specific options (e.g.
+dictionary size). If non-default compression options have been used, then
+these are stored here.
+
+3.2 Inodes
+----------
+
+Metadata (inodes and directories) are compressed in 8Kbyte blocks. Each
+compressed block is prefixed by a two byte length, the top bit is set if the
+block is uncompressed. A block will be uncompressed if the -noI option is set,
+or if the compressed block was larger than the uncompressed block.
+
+Inodes are packed into the metadata blocks, and are not aligned to block
+boundaries, therefore inodes overlap compressed blocks. Inodes are identified
+by a 48-bit number which encodes the location of the compressed metadata block
+containing the inode, and the byte offset into that block where the inode is
+placed (<block, offset>).
+
+To maximise compression there are different inodes for each file type
+(regular file, directory, device, etc.), the inode contents and length
+varying with the type.
+
+To further maximise compression, two types of regular file inode and
+directory inode are defined: inodes optimised for frequently occurring
+regular files and directories, and extended types where extra
+information has to be stored.
+
+3.3 Directories
+---------------
+
+Like inodes, directories are packed into compressed metadata blocks, stored
+in a directory table. Directories are accessed using the start address of
+the metablock containing the directory and the offset into the
+decompressed block (<block, offset>).
+
+Directories are organised in a slightly complex way, and are not simply
+a list of file names. The organisation takes advantage of the
+fact that (in most cases) the inodes of the files will be in the same
+compressed metadata block, and therefore, can share the start block.
+Directories are therefore organised in a two level list, a directory
+header containing the shared start block value, and a sequence of directory
+entries, each of which share the shared start block. A new directory header
+is written once/if the inode start block changes. The directory
+header/directory entry list is repeated as many times as necessary.
+
+Directories are sorted, and can contain a directory index to speed up
+file lookup. Directory indexes store one entry per metablock, each entry
+storing the index/filename mapping to the first directory header
+in each metadata block. Directories are sorted in alphabetical order,
+and at lookup the index is scanned linearly looking for the first filename
+alphabetically larger than the filename being looked up. At this point the
+location of the metadata block the filename is in has been found.
+The general idea of the index is to ensure only one metadata block needs to be
+decompressed to do a lookup irrespective of the length of the directory.
+This scheme has the advantage that it doesn't require extra memory overhead
+and doesn't require much extra storage on disk.
+
+3.4 File data
+-------------
+
+Regular files consist of a sequence of contiguous compressed blocks, and/or a
+compressed fragment block (tail-end packed block). The compressed size
+of each datablock is stored in a block list contained within the
+file inode.
+
+To speed up access to datablocks when reading 'large' files (256 Mbytes or
+larger), the code implements an index cache that caches the mapping from
+block index to datablock location on disk.
+
+The index cache allows Squashfs to handle large files (up to 1.75 TiB) while
+retaining a simple and space-efficient block list on disk. The cache
+is split into slots, caching up to eight 224 GiB files (128 KiB blocks).
+Larger files use multiple slots, with 1.75 TiB files using all 8 slots.
+The index cache is designed to be memory efficient, and by default uses
+16 KiB.
+
+3.5 Fragment lookup table
+-------------------------
+
+Regular files can contain a fragment index which is mapped to a fragment
+location on disk and compressed size using a fragment lookup table. This
+fragment lookup table is itself stored compressed into metadata blocks.
+A second index table is used to locate these. This second index table for
+speed of access (and because it is small) is read at mount time and cached
+in memory.
+
+3.6 Uid/gid lookup table
+------------------------
+
+For space efficiency regular files store uid and gid indexes, which are
+converted to 32-bit uids/gids using an id look up table. This table is
+stored compressed into metadata blocks. A second index table is used to
+locate these. This second index table for speed of access (and because it
+is small) is read at mount time and cached in memory.
+
+3.7 Export table
+----------------
+
+To enable Squashfs filesystems to be exportable (via NFS etc.) filesystems
+can optionally (disabled with the -no-exports Mksquashfs option) contain
+an inode number to inode disk location lookup table. This is required to
+enable Squashfs to map inode numbers passed in filehandles to the inode
+location on disk, which is necessary when the export code reinstantiates
+expired/flushed inodes.
+
+This table is stored compressed into metadata blocks. A second index table is
+used to locate these. This second index table for speed of access (and because
+it is small) is read at mount time and cached in memory.
+
+3.8 Xattr table
+---------------
+
+The xattr table contains extended attributes for each inode. The xattrs
+for each inode are stored in a list, each list entry containing a type,
+name and value field. The type field encodes the xattr prefix
+("user.", "trusted." etc) and it also encodes how the name/value fields
+should be interpreted. Currently the type indicates whether the value
+is stored inline (in which case the value field contains the xattr value),
+or if it is stored out of line (in which case the value field stores a
+reference to where the actual value is stored). This allows large values
+to be stored out of line improving scanning and lookup performance and it
+also allows values to be de-duplicated, the value being stored once, and
+all other occurrences holding an out of line reference to that value.
+
+The xattr lists are packed into compressed 8K metadata blocks.
+To reduce overhead in inodes, rather than storing the on-disk
+location of the xattr list inside each inode, a 32-bit xattr id
+is stored. This xattr id is mapped into the location of the xattr
+list using a second xattr id lookup table.
+
+4. TODOS AND OUTSTANDING ISSUES
+-------------------------------
+
+4.1 Todo list
+-------------
+
+Implement ACL support.
+
+4.2 Squashfs internal cache
+---------------------------
+
+Blocks in Squashfs are compressed. To avoid repeatedly decompressing
+recently accessed data Squashfs uses two small metadata and fragment caches.
+
+The cache is not used for file datablocks, these are decompressed and cached in
+the page-cache in the normal way. The cache is used to temporarily cache
+fragment and metadata blocks which have been read as a result of a metadata
+(i.e. inode or directory) or fragment access. Because metadata and fragments
+are packed together into blocks (to gain greater compression) the read of a
+particular piece of metadata or fragment will retrieve other metadata/fragments
+which have been packed with it, these because of locality-of-reference may be
+read in the near future. Temporarily caching them ensures they are available
+for near future access without requiring an additional read and decompress.
+
+In the future this internal cache may be replaced with an implementation which
+uses the kernel page cache. Because the page cache operates on page sized
+units this may introduce additional complexity in terms of locking and
+associated race conditions.
diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt
new file mode 100644
index 000000000..74eaac26f
--- /dev/null
+++ b/Documentation/filesystems/sysfs-pci.txt
@@ -0,0 +1,120 @@
+Accessing PCI device resources through sysfs
+--------------------------------------------
+
+sysfs, usually mounted at /sys, provides access to PCI resources on platforms
+that support it. For example, a given bus might look like this:
+
+ /sys/devices/pci0000:17
+ |-- 0000:17:00.0
+ | |-- class
+ | |-- config
+ | |-- device
+ | |-- enable
+ | |-- irq
+ | |-- local_cpus
+ | |-- remove
+ | |-- resource
+ | |-- resource0
+ | |-- resource1
+ | |-- resource2
+ | |-- rom
+ | |-- subsystem_device
+ | |-- subsystem_vendor
+ | `-- vendor
+ `-- ...
+
+The topmost element describes the PCI domain and bus number. In this case,
+the domain number is 0000 and the bus number is 17 (both values are in hex).
+This bus contains a single function device in slot 0. The domain and bus
+numbers are reproduced for convenience. Under the device directory are several
+files, each with their own function.
+
+ file function
+ ---- --------
+ class PCI class (ascii, ro)
+ config PCI config space (binary, rw)
+ device PCI device (ascii, ro)
+ enable Whether the device is enabled (ascii, rw)
+ irq IRQ number (ascii, ro)
+ local_cpus nearby CPU mask (cpumask, ro)
+ remove remove device from kernel's list (ascii, wo)
+ resource PCI resource host addresses (ascii, ro)
+ resource0..N PCI resource N, if present (binary, mmap, rw[1])
+ resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap)
+ rom PCI ROM resource, if present (binary, ro)
+ subsystem_device PCI subsystem device (ascii, ro)
+ subsystem_vendor PCI subsystem vendor (ascii, ro)
+ vendor PCI vendor (ascii, ro)
+
+ ro - read only file
+ rw - file is readable and writable
+ wo - write only file
+ mmap - file is mmapable
+ ascii - file contains ascii text
+ binary - file contains binary data
+ cpumask - file contains a cpumask type
+
+[1] rw for RESOURCE_IO (I/O port) regions only
+
+The read only files are informational, writes to them will be ignored, with
+the exception of the 'rom' file. Writable files can be used to perform
+actions on the device (e.g. changing config space, detaching a device).
+mmapable files are available via an mmap of the file at offset 0 and can be
+used to do actual device programming from userspace. Note that some platforms
+don't support mmapping of certain resources, so be sure to check the return
+value from any attempted mmap. The most notable of these are I/O port
+resources, which also provide read/write access.
+
+The 'enable' file provides a counter that indicates how many times the device
+has been enabled. If the 'enable' file currently returns '4', and a '1' is
+echoed into it, it will then return '5'. Echoing a '0' into it will decrease
+the count. Even when it returns to 0, though, some of the initialisation
+may not be reversed.
+
+The 'rom' file is special in that it provides read-only access to the device's
+ROM file, if available. It's disabled by default, however, so applications
+should write the string "1" to the file to enable it before attempting a read
+call, and disable it following the access by writing "0" to the file. Note
+that the device must be enabled for a rom read to return data successfully.
+In the event a driver is not bound to the device, it can be enabled using the
+'enable' file, documented above.
+
+The 'remove' file is used to remove the PCI device, by writing a non-zero
+integer to the file. This does not involve any kind of hot-plug functionality,
+e.g. powering off the device. The device is removed from the kernel's list of
+PCI devices, the sysfs directory for it is removed, and the device will be
+removed from any drivers attached to it. Removal of PCI root buses is
+disallowed.
+
+Accessing legacy resources through sysfs
+----------------------------------------
+
+Legacy I/O port and ISA memory resources are also provided in sysfs if the
+underlying platform supports them. They're located in the PCI class hierarchy,
+e.g.
+
+ /sys/class/pci_bus/0000:17/
+ |-- bridge -> ../../../devices/pci0000:17
+ |-- cpuaffinity
+ |-- legacy_io
+ `-- legacy_mem
+
+The legacy_io file is a read/write file that can be used by applications to
+do legacy port I/O. The application should open the file, seek to the desired
+port (e.g. 0x3e8) and do a read or a write of 1, 2 or 4 bytes. The legacy_mem
+file should be mmapped with an offset corresponding to the memory offset
+desired, e.g. 0xa0000 for the VGA frame buffer. The application can then
+simply dereference the returned pointer (after checking for errors of course)
+to access legacy memory space.
+
+Supporting PCI access on new platforms
+--------------------------------------
+
+In order to support PCI resource mapping as described above, Linux platform
+code must define HAVE_PCI_MMAP and provide a pci_mmap_page_range function.
+Platforms are free to only support subsets of the mmap functionality, but
+useful return codes should be provided.
+
+Legacy resources are protected by the HAVE_PCI_LEGACY define. Platforms
+wishing to support legacy functionality should define it and provide
+pci_legacy_read, pci_legacy_write and pci_mmap_legacy_page_range functions.
diff --git a/Documentation/filesystems/sysfs-tagging.txt b/Documentation/filesystems/sysfs-tagging.txt
new file mode 100644
index 000000000..eb843e49c
--- /dev/null
+++ b/Documentation/filesystems/sysfs-tagging.txt
@@ -0,0 +1,42 @@
+Sysfs tagging
+-------------
+
+(Taken almost verbatim from Eric Biederman's netns tagging patch
+commit msg)
+
+The problem. Network devices show up in sysfs and with the network
+namespace active multiple devices with the same name can show up in
+the same directory, ouch!
+
+To avoid that problem and allow existing applications in network
+namespaces to see the same interface that is currently presented in
+sysfs, sysfs now has tagging directory support.
+
+By using the network namespace pointers as tags to separate out the
+the sysfs directory entries we ensure that we don't have conflicts
+in the directories and applications only see a limited set of
+the network devices.
+
+Each sysfs directory entry may be tagged with zero or one
+namespaces. A sysfs_dirent is augmented with a void *s_ns. If a
+directory entry is tagged, then sysfs_dirent->s_flags will have a
+flag between KOBJ_NS_TYPE_NONE and KOBJ_NS_TYPES, and s_ns will
+point to the namespace to which it belongs.
+
+Each sysfs superblock's sysfs_super_info contains an array void
+*ns[KOBJ_NS_TYPES]. When a task in a tagging namespace
+kobj_nstype first mounts sysfs, a new superblock is created. It
+will be differentiated from other sysfs mounts by having its
+s_fs_info->ns[kobj_nstype] set to the new namespace. Note that
+through bind mounting and mounts propagation, a task can easily view
+the contents of other namespaces' sysfs mounts. Therefore, when a
+namespace exits, it will call kobj_ns_exit() to invalidate any
+sysfs_dirent->s_ns pointers pointing to it.
+
+Users of this interface:
+- define a type in the kobj_ns_type enumeration.
+- call kobj_ns_type_register() with its kobj_ns_type_operations which has
+ - current_ns() which returns current's namespace
+ - netlink_ns() which returns a socket's namespace
+ - initial_ns() which returns the initial namesapce
+- call kobj_ns_exit() when an individual tag is no longer valid
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
new file mode 100644
index 000000000..b35a64b82
--- /dev/null
+++ b/Documentation/filesystems/sysfs.txt
@@ -0,0 +1,380 @@
+
+sysfs - _The_ filesystem for exporting kernel objects.
+
+Patrick Mochel <mochel@osdl.org>
+Mike Murphy <mamurph@cs.clemson.edu>
+
+Revised: 16 August 2011
+Original: 10 January 2003
+
+
+What it is:
+~~~~~~~~~~~
+
+sysfs is a ram-based filesystem initially based on ramfs. It provides
+a means to export kernel data structures, their attributes, and the
+linkages between them to userspace.
+
+sysfs is tied inherently to the kobject infrastructure. Please read
+Documentation/kobject.txt for more information concerning the kobject
+interface.
+
+
+Using sysfs
+~~~~~~~~~~~
+
+sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
+it by doing:
+
+ mount -t sysfs sysfs /sys
+
+
+Directory Creation
+~~~~~~~~~~~~~~~~~~
+
+For every kobject that is registered with the system, a directory is
+created for it in sysfs. That directory is created as a subdirectory
+of the kobject's parent, expressing internal object hierarchies to
+userspace. Top-level directories in sysfs represent the common
+ancestors of object hierarchies; i.e. the subsystems the objects
+belong to.
+
+Sysfs internally stores a pointer to the kobject that implements a
+directory in the sysfs_dirent object associated with the directory. In
+the past this kobject pointer has been used by sysfs to do reference
+counting directly on the kobject whenever the file is opened or closed.
+With the current sysfs implementation the kobject reference count is
+only modified directly by the function sysfs_schedule_callback().
+
+
+Attributes
+~~~~~~~~~~
+
+Attributes can be exported for kobjects in the form of regular files in
+the filesystem. Sysfs forwards file I/O operations to methods defined
+for the attributes, providing a means to read and write kernel
+attributes.
+
+Attributes should be ASCII text files, preferably with only one value
+per file. It is noted that it may not be efficient to contain only one
+value per file, so it is socially acceptable to express an array of
+values of the same type.
+
+Mixing types, expressing multiple lines of data, and doing fancy
+formatting of data is heavily frowned upon. Doing these things may get
+you publicly humiliated and your code rewritten without notice.
+
+
+An attribute definition is simply:
+
+struct attribute {
+ char * name;
+ struct module *owner;
+ umode_t mode;
+};
+
+
+int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
+void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
+
+
+A bare attribute contains no means to read or write the value of the
+attribute. Subsystems are encouraged to define their own attribute
+structure and wrapper functions for adding and removing attributes for
+a specific object type.
+
+For example, the driver model defines struct device_attribute like:
+
+struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+};
+
+int device_create_file(struct device *, const struct device_attribute *);
+void device_remove_file(struct device *, const struct device_attribute *);
+
+It also defines this helper for defining device attributes:
+
+#define DEVICE_ATTR(_name, _mode, _show, _store) \
+struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+For example, declaring
+
+static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
+
+is equivalent to doing:
+
+static struct device_attribute dev_attr_foo = {
+ .attr = {
+ .name = "foo",
+ .mode = S_IWUSR | S_IRUGO,
+ },
+ .show = show_foo,
+ .store = store_foo,
+};
+
+
+Subsystem-Specific Callbacks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a subsystem defines a new attribute type, it must implement a
+set of sysfs operations for forwarding read and write calls to the
+show and store methods of the attribute owners.
+
+struct sysfs_ops {
+ ssize_t (*show)(struct kobject *, struct attribute *, char *);
+ ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+};
+
+[ Subsystems should have already defined a struct kobj_type as a
+descriptor for this type, which is where the sysfs_ops pointer is
+stored. See the kobject documentation for more information. ]
+
+When a file is read or written, sysfs calls the appropriate method
+for the type. The method then translates the generic struct kobject
+and struct attribute pointers to the appropriate pointer types, and
+calls the associated methods.
+
+
+To illustrate:
+
+#define to_dev(obj) container_of(obj, struct device, kobj)
+#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
+
+static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = to_dev(kobj);
+ ssize_t ret = -EIO;
+
+ if (dev_attr->show)
+ ret = dev_attr->show(dev, dev_attr, buf);
+ if (ret >= (ssize_t)PAGE_SIZE) {
+ print_symbol("dev_attr_show: %s returned bad count\n",
+ (unsigned long)dev_attr->show);
+ }
+ return ret;
+}
+
+
+
+Reading/Writing Attribute Data
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To read or write attributes, show() or store() methods must be
+specified when declaring the attribute. The method types should be as
+simple as those defined for device attributes:
+
+ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
+ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+
+IOW, they should take only an object, an attribute, and a buffer as parameters.
+
+
+sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the
+method. Sysfs will call the method exactly once for each read or
+write. This forces the following behavior on the method
+implementations:
+
+- On read(2), the show() method should fill the entire buffer.
+ Recall that an attribute should only be exporting one value, or an
+ array of similar values, so this shouldn't be that expensive.
+
+ This allows userspace to do partial reads and forward seeks
+ arbitrarily over the entire file at will. If userspace seeks back to
+ zero or does a pread(2) with an offset of '0' the show() method will
+ be called again, rearmed, to fill the buffer.
+
+- On write(2), sysfs expects the entire buffer to be passed during the
+ first write. Sysfs then passes the entire buffer to the store()
+ method.
+
+ When writing sysfs files, userspace processes should first read the
+ entire file, modify the values it wishes to change, then write the
+ entire buffer back.
+
+ Attribute method implementations should operate on an identical
+ buffer when reading and writing values.
+
+Other notes:
+
+- Writing causes the show() method to be rearmed regardless of current
+ file position.
+
+- The buffer will always be PAGE_SIZE bytes in length. On i386, this
+ is 4096.
+
+- show() methods should return the number of bytes printed into the
+ buffer. This is the return value of scnprintf().
+
+- show() should always use scnprintf().
+
+- store() should return the number of bytes used from the buffer. If the
+ entire buffer has been used, just return the count argument.
+
+- show() or store() can always return errors. If a bad value comes
+ through, be sure to return an error.
+
+- The object passed to the methods will be pinned in memory via sysfs
+ referencing counting its embedded object. However, the physical
+ entity (e.g. device) the object represents may not be present. Be
+ sure to have a way to check this, if necessary.
+
+
+A very simple (and naive) implementation of a device attribute is:
+
+static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+}
+
+static ssize_t store_name(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ snprintf(dev->name, sizeof(dev->name), "%.*s",
+ (int)min(count, sizeof(dev->name) - 1), buf);
+ return count;
+}
+
+static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
+
+
+(Note that the real implementation doesn't allow userspace to set the
+name for a device.)
+
+
+Top Level Directory Layout
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The sysfs directory arrangement exposes the relationship of kernel
+data structures.
+
+The top level sysfs directory looks like:
+
+block/
+bus/
+class/
+dev/
+devices/
+firmware/
+net/
+fs/
+
+devices/ contains a filesystem representation of the device tree. It maps
+directly to the internal kernel device tree, which is a hierarchy of
+struct device.
+
+bus/ contains flat directory layout of the various bus types in the
+kernel. Each bus's directory contains two subdirectories:
+
+ devices/
+ drivers/
+
+devices/ contains symlinks for each device discovered in the system
+that point to the device's directory under root/.
+
+drivers/ contains a directory for each device driver that is loaded
+for devices on that particular bus (this assumes that drivers do not
+span multiple bus types).
+
+fs/ contains a directory for some filesystems. Currently each
+filesystem wanting to export attributes must create its own hierarchy
+below fs/ (see ./fuse.txt for an example).
+
+dev/ contains two directories char/ and block/. Inside these two
+directories there are symlinks named <major>:<minor>. These symlinks
+point to the sysfs directory for the given device. /sys/dev provides a
+quick way to lookup the sysfs interface for a device from the result of
+a stat(2) operation.
+
+More information can driver-model specific features can be found in
+Documentation/driver-model/.
+
+
+TODO: Finish this section.
+
+
+Current Interfaces
+~~~~~~~~~~~~~~~~~~
+
+The following interface layers currently exist in sysfs:
+
+
+- devices (include/linux/device.h)
+----------------------------------
+Structure:
+
+struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+};
+
+Declaring:
+
+DEVICE_ATTR(_name, _mode, _show, _store);
+
+Creation/Removal:
+
+int device_create_file(struct device *dev, const struct device_attribute * attr);
+void device_remove_file(struct device *dev, const struct device_attribute * attr);
+
+
+- bus drivers (include/linux/device.h)
+--------------------------------------
+Structure:
+
+struct bus_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct bus_type *, char * buf);
+ ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+};
+
+Declaring:
+
+BUS_ATTR(_name, _mode, _show, _store)
+
+Creation/Removal:
+
+int bus_create_file(struct bus_type *, struct bus_attribute *);
+void bus_remove_file(struct bus_type *, struct bus_attribute *);
+
+
+- device drivers (include/linux/device.h)
+-----------------------------------------
+
+Structure:
+
+struct driver_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device_driver *, char * buf);
+ ssize_t (*store)(struct device_driver *, const char * buf,
+ size_t count);
+};
+
+Declaring:
+
+DRIVER_ATTR(_name, _mode, _show, _store)
+
+Creation/Removal:
+
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
+
+
+Documentation
+~~~~~~~~~~~~~
+
+The sysfs directory structure and the attributes in each directory define an
+ABI between the kernel and user space. As for any ABI, it is important that
+this ABI is stable and properly documented. All new sysfs attributes must be
+documented in Documentation/ABI. See also Documentation/ABI/README for more
+information.
diff --git a/Documentation/filesystems/sysv-fs.txt b/Documentation/filesystems/sysv-fs.txt
new file mode 100644
index 000000000..253b50d13
--- /dev/null
+++ b/Documentation/filesystems/sysv-fs.txt
@@ -0,0 +1,197 @@
+It implements all of
+ - Xenix FS,
+ - SystemV/386 FS,
+ - Coherent FS.
+
+To install:
+* Answer the 'System V and Coherent filesystem support' question with 'y'
+ when configuring the kernel.
+* To mount a disk or a partition, use
+ mount [-r] -t sysv device mountpoint
+ The file system type names
+ -t sysv
+ -t xenix
+ -t coherent
+ may be used interchangeably, but the last two will eventually disappear.
+
+Bugs in the present implementation:
+- Coherent FS:
+ - The "free list interleave" n:m is currently ignored.
+ - Only file systems with no filesystem name and no pack name are recognized.
+ (See Coherent "man mkfs" for a description of these features.)
+- SystemV Release 2 FS:
+ The superblock is only searched in the blocks 9, 15, 18, which
+ corresponds to the beginning of track 1 on floppy disks. No support
+ for this FS on hard disk yet.
+
+
+These filesystems are rather similar. Here is a comparison with Minix FS:
+
+* Linux fdisk reports on partitions
+ - Minix FS 0x81 Linux/Minix
+ - Xenix FS ??
+ - SystemV FS ??
+ - Coherent FS 0x08 AIX bootable
+
+* Size of a block or zone (data allocation unit on disk)
+ - Minix FS 1024
+ - Xenix FS 1024 (also 512 ??)
+ - SystemV FS 1024 (also 512 and 2048)
+ - Coherent FS 512
+
+* General layout: all have one boot block, one super block and
+ separate areas for inodes and for directories/data.
+ On SystemV Release 2 FS (e.g. Microport) the first track is reserved and
+ all the block numbers (including the super block) are offset by one track.
+
+* Byte ordering of "short" (16 bit entities) on disk:
+ - Minix FS little endian 0 1
+ - Xenix FS little endian 0 1
+ - SystemV FS little endian 0 1
+ - Coherent FS little endian 0 1
+ Of course, this affects only the file system, not the data of files on it!
+
+* Byte ordering of "long" (32 bit entities) on disk:
+ - Minix FS little endian 0 1 2 3
+ - Xenix FS little endian 0 1 2 3
+ - SystemV FS little endian 0 1 2 3
+ - Coherent FS PDP-11 2 3 0 1
+ Of course, this affects only the file system, not the data of files on it!
+
+* Inode on disk: "short", 0 means non-existent, the root dir ino is:
+ - Minix FS 1
+ - Xenix FS, SystemV FS, Coherent FS 2
+
+* Maximum number of hard links to a file:
+ - Minix FS 250
+ - Xenix FS ??
+ - SystemV FS ??
+ - Coherent FS >=10000
+
+* Free inode management:
+ - Minix FS a bitmap
+ - Xenix FS, SystemV FS, Coherent FS
+ There is a cache of a certain number of free inodes in the super-block.
+ When it is exhausted, new free inodes are found using a linear search.
+
+* Free block management:
+ - Minix FS a bitmap
+ - Xenix FS, SystemV FS, Coherent FS
+ Free blocks are organized in a "free list". Maybe a misleading term,
+ since it is not true that every free block contains a pointer to
+ the next free block. Rather, the free blocks are organized in chunks
+ of limited size, and every now and then a free block contains pointers
+ to the free blocks pertaining to the next chunk; the first of these
+ contains pointers and so on. The list terminates with a "block number"
+ 0 on Xenix FS and SystemV FS, with a block zeroed out on Coherent FS.
+
+* Super-block location:
+ - Minix FS block 1 = bytes 1024..2047
+ - Xenix FS block 1 = bytes 1024..2047
+ - SystemV FS bytes 512..1023
+ - Coherent FS block 1 = bytes 512..1023
+
+* Super-block layout:
+ - Minix FS
+ unsigned short s_ninodes;
+ unsigned short s_nzones;
+ unsigned short s_imap_blocks;
+ unsigned short s_zmap_blocks;
+ unsigned short s_firstdatazone;
+ unsigned short s_log_zone_size;
+ unsigned long s_max_size;
+ unsigned short s_magic;
+ - Xenix FS, SystemV FS, Coherent FS
+ unsigned short s_firstdatazone;
+ unsigned long s_nzones;
+ unsigned short s_fzone_count;
+ unsigned long s_fzones[NICFREE];
+ unsigned short s_finode_count;
+ unsigned short s_finodes[NICINOD];
+ char s_flock;
+ char s_ilock;
+ char s_modified;
+ char s_rdonly;
+ unsigned long s_time;
+ short s_dinfo[4]; -- SystemV FS only
+ unsigned long s_free_zones;
+ unsigned short s_free_inodes;
+ short s_dinfo[4]; -- Xenix FS only
+ unsigned short s_interleave_m,s_interleave_n; -- Coherent FS only
+ char s_fname[6];
+ char s_fpack[6];
+ then they differ considerably:
+ Xenix FS
+ char s_clean;
+ char s_fill[371];
+ long s_magic;
+ long s_type;
+ SystemV FS
+ long s_fill[12 or 14];
+ long s_state;
+ long s_magic;
+ long s_type;
+ Coherent FS
+ unsigned long s_unique;
+ Note that Coherent FS has no magic.
+
+* Inode layout:
+ - Minix FS
+ unsigned short i_mode;
+ unsigned short i_uid;
+ unsigned long i_size;
+ unsigned long i_time;
+ unsigned char i_gid;
+ unsigned char i_nlinks;
+ unsigned short i_zone[7+1+1];
+ - Xenix FS, SystemV FS, Coherent FS
+ unsigned short i_mode;
+ unsigned short i_nlink;
+ unsigned short i_uid;
+ unsigned short i_gid;
+ unsigned long i_size;
+ unsigned char i_zone[3*(10+1+1+1)];
+ unsigned long i_atime;
+ unsigned long i_mtime;
+ unsigned long i_ctime;
+
+* Regular file data blocks are organized as
+ - Minix FS
+ 7 direct blocks
+ 1 indirect block (pointers to blocks)
+ 1 double-indirect block (pointer to pointers to blocks)
+ - Xenix FS, SystemV FS, Coherent FS
+ 10 direct blocks
+ 1 indirect block (pointers to blocks)
+ 1 double-indirect block (pointer to pointers to blocks)
+ 1 triple-indirect block (pointer to pointers to pointers to blocks)
+
+* Inode size, inodes per block
+ - Minix FS 32 32
+ - Xenix FS 64 16
+ - SystemV FS 64 16
+ - Coherent FS 64 8
+
+* Directory entry on disk
+ - Minix FS
+ unsigned short inode;
+ char name[14/30];
+ - Xenix FS, SystemV FS, Coherent FS
+ unsigned short inode;
+ char name[14];
+
+* Dir entry size, dir entries per block
+ - Minix FS 16/32 64/32
+ - Xenix FS 16 64
+ - SystemV FS 16 64
+ - Coherent FS 16 32
+
+* How to implement symbolic links such that the host fsck doesn't scream:
+ - Minix FS normal
+ - Xenix FS kludge: as regular files with chmod 1000
+ - SystemV FS ??
+ - Coherent FS kludge: as regular files with chmod 1000
+
+
+Notation: We often speak of a "block" but mean a zone (the allocation unit)
+and not the disk driver's notion of "block".
diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt
new file mode 100644
index 000000000..98ef55124
--- /dev/null
+++ b/Documentation/filesystems/tmpfs.txt
@@ -0,0 +1,148 @@
+Tmpfs is a file system which keeps all files in virtual memory.
+
+
+Everything in tmpfs is temporary in the sense that no files will be
+created on your hard drive. If you unmount a tmpfs instance,
+everything stored therein is lost.
+
+tmpfs puts everything into the kernel internal caches and grows and
+shrinks to accommodate the files it contains and is able to swap
+unneeded pages out to swap space. It has maximum size limits which can
+be adjusted on the fly via 'mount -o remount ...'
+
+If you compare it to ramfs (which was the template to create tmpfs)
+you gain swapping and limit checking. Another similar thing is the RAM
+disk (/dev/ram*), which simulates a fixed size hard disk in physical
+RAM, where you have to create an ordinary filesystem on top. Ramdisks
+cannot swap and you do not have the possibility to resize them.
+
+Since tmpfs lives completely in the page cache and on swap, all tmpfs
+pages currently in memory will show up as cached. It will not show up
+as shared or something like that. Further on you can check the actual
+RAM+swap use of a tmpfs instance with df(1) and du(1).
+
+
+tmpfs has the following uses:
+
+1) There is always a kernel internal mount which you will not see at
+ all. This is used for shared anonymous mappings and SYSV shared
+ memory.
+
+ This mount does not depend on CONFIG_TMPFS. If CONFIG_TMPFS is not
+ set, the user visible part of tmpfs is not build. But the internal
+ mechanisms are always present.
+
+2) glibc 2.2 and above expects tmpfs to be mounted at /dev/shm for
+ POSIX shared memory (shm_open, shm_unlink). Adding the following
+ line to /etc/fstab should take care of this:
+
+ tmpfs /dev/shm tmpfs defaults 0 0
+
+ Remember to create the directory that you intend to mount tmpfs on
+ if necessary.
+
+ This mount is _not_ needed for SYSV shared memory. The internal
+ mount is used for that. (In the 2.3 kernel versions it was
+ necessary to mount the predecessor of tmpfs (shm fs) to use SYSV
+ shared memory)
+
+3) Some people (including me) find it very convenient to mount it
+ e.g. on /tmp and /var/tmp and have a big swap partition. And now
+ loop mounts of tmpfs files do work, so mkinitrd shipped by most
+ distributions should succeed with a tmpfs /tmp.
+
+4) And probably a lot more I do not know about :-)
+
+
+tmpfs has three mount options for sizing:
+
+size: The limit of allocated bytes for this tmpfs instance. The
+ default is half of your physical RAM without swap. If you
+ oversize your tmpfs instances the machine will deadlock
+ since the OOM handler will not be able to free that memory.
+nr_blocks: The same as size, but in blocks of PAGE_CACHE_SIZE.
+nr_inodes: The maximum number of inodes for this instance. The default
+ is half of the number of your physical RAM pages, or (on a
+ machine with highmem) the number of lowmem RAM pages,
+ whichever is the lower.
+
+These parameters accept a suffix k, m or g for kilo, mega and giga and
+can be changed on remount. The size parameter also accepts a suffix %
+to limit this tmpfs instance to that percentage of your physical RAM:
+the default, when neither size nor nr_blocks is specified, is size=50%
+
+If nr_blocks=0 (or size=0), blocks will not be limited in that instance;
+if nr_inodes=0, inodes will not be limited. It is generally unwise to
+mount with such options, since it allows any user with write access to
+use up all the memory on the machine; but enhances the scalability of
+that instance in a system with many cpus making intensive use of it.
+
+
+tmpfs has a mount option to set the NUMA memory allocation policy for
+all files in that instance (if CONFIG_NUMA is enabled) - which can be
+adjusted on the fly via 'mount -o remount ...'
+
+mpol=default use the process allocation policy
+ (see set_mempolicy(2))
+mpol=prefer:Node prefers to allocate memory from the given Node
+mpol=bind:NodeList allocates memory only from nodes in NodeList
+mpol=interleave prefers to allocate from each node in turn
+mpol=interleave:NodeList allocates from each node of NodeList in turn
+mpol=local prefers to allocate memory from the local node
+
+NodeList format is a comma-separated list of decimal numbers and ranges,
+a range being two hyphen-separated decimal numbers, the smallest and
+largest node numbers in the range. For example, mpol=bind:0-3,5,7,9-15
+
+A memory policy with a valid NodeList will be saved, as specified, for
+use at file creation time. When a task allocates a file in the file
+system, the mount option memory policy will be applied with a NodeList,
+if any, modified by the calling task's cpuset constraints
+[See Documentation/cgroups/cpusets.txt] and any optional flags, listed
+below. If the resulting NodeLists is the empty set, the effective memory
+policy for the file will revert to "default" policy.
+
+NUMA memory allocation policies have optional flags that can be used in
+conjunction with their modes. These optional flags can be specified
+when tmpfs is mounted by appending them to the mode before the NodeList.
+See Documentation/vm/numa_memory_policy.txt for a list of all available
+memory allocation policy mode flags and their effect on memory policy.
+
+ =static is equivalent to MPOL_F_STATIC_NODES
+ =relative is equivalent to MPOL_F_RELATIVE_NODES
+
+For example, mpol=bind=static:NodeList, is the equivalent of an
+allocation policy of MPOL_BIND | MPOL_F_STATIC_NODES.
+
+Note that trying to mount a tmpfs with an mpol option will fail if the
+running kernel does not support NUMA; and will fail if its nodelist
+specifies a node which is not online. If your system relies on that
+tmpfs being mounted, but from time to time runs a kernel built without
+NUMA capability (perhaps a safe recovery kernel), or with fewer nodes
+online, then it is advisable to omit the mpol option from automatic
+mount options. It can be added later, when the tmpfs is already mounted
+on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'.
+
+
+To specify the initial root directory you can use the following mount
+options:
+
+mode: The permissions as an octal number
+uid: The user id
+gid: The group id
+
+These options do not have any effect on remount. You can change these
+parameters with chmod(1), chown(1) and chgrp(1) on a mounted filesystem.
+
+
+So 'mount -t tmpfs -o size=10G,nr_inodes=10k,mode=700 tmpfs /mytmpfs'
+will give you tmpfs instance on /mytmpfs which can allocate 10GB
+RAM/SWAP in 10240 inodes and it is only accessible by root.
+
+
+Author:
+ Christoph Rohland <cr@sap.com>, 1.12.01
+Updated:
+ Hugh Dickins, 4 June 2007
+Updated:
+ KOSAKI Motohiro, 16 Mar 2010
diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt
new file mode 100644
index 000000000..a0a61d2f3
--- /dev/null
+++ b/Documentation/filesystems/ubifs.txt
@@ -0,0 +1,119 @@
+Introduction
+=============
+
+UBIFS file-system stands for UBI File System. UBI stands for "Unsorted
+Block Images". UBIFS is a flash file system, which means it is designed
+to work with flash devices. It is important to understand, that UBIFS
+is completely different to any traditional file-system in Linux, like
+Ext2, XFS, JFS, etc. UBIFS represents a separate class of file-systems
+which work with MTD devices, not block devices. The other Linux
+file-system of this class is JFFS2.
+
+To make it more clear, here is a small comparison of MTD devices and
+block devices.
+
+1 MTD devices represent flash devices and they consist of eraseblocks of
+ rather large size, typically about 128KiB. Block devices consist of
+ small blocks, typically 512 bytes.
+2 MTD devices support 3 main operations - read from some offset within an
+ eraseblock, write to some offset within an eraseblock, and erase a whole
+ eraseblock. Block devices support 2 main operations - read a whole
+ block and write a whole block.
+3 The whole eraseblock has to be erased before it becomes possible to
+ re-write its contents. Blocks may be just re-written.
+4 Eraseblocks become worn out after some number of erase cycles -
+ typically 100K-1G for SLC NAND and NOR flashes, and 1K-10K for MLC
+ NAND flashes. Blocks do not have the wear-out property.
+5 Eraseblocks may become bad (only on NAND flashes) and software should
+ deal with this. Blocks on hard drives typically do not become bad,
+ because hardware has mechanisms to substitute bad blocks, at least in
+ modern LBA disks.
+
+It should be quite obvious why UBIFS is very different to traditional
+file-systems.
+
+UBIFS works on top of UBI. UBI is a separate software layer which may be
+found in drivers/mtd/ubi. UBI is basically a volume management and
+wear-leveling layer. It provides so called UBI volumes which is a higher
+level abstraction than a MTD device. The programming model of UBI devices
+is very similar to MTD devices - they still consist of large eraseblocks,
+they have read/write/erase operations, but UBI devices are devoid of
+limitations like wear and bad blocks (items 4 and 5 in the above list).
+
+In a sense, UBIFS is a next generation of JFFS2 file-system, but it is
+very different and incompatible to JFFS2. The following are the main
+differences.
+
+* JFFS2 works on top of MTD devices, UBIFS depends on UBI and works on
+ top of UBI volumes.
+* JFFS2 does not have on-media index and has to build it while mounting,
+ which requires full media scan. UBIFS maintains the FS indexing
+ information on the flash media and does not require full media scan,
+ so it mounts many times faster than JFFS2.
+* JFFS2 is a write-through file-system, while UBIFS supports write-back,
+ which makes UBIFS much faster on writes.
+
+Similarly to JFFS2, UBIFS supports on-the-flight compression which makes
+it possible to fit quite a lot of data to the flash.
+
+Similarly to JFFS2, UBIFS is tolerant of unclean reboots and power-cuts.
+It does not need stuff like fsck.ext2. UBIFS automatically replays its
+journal and recovers from crashes, ensuring that the on-flash data
+structures are consistent.
+
+UBIFS scales logarithmically (most of the data structures it uses are
+trees), so the mount time and memory consumption do not linearly depend
+on the flash size, like in case of JFFS2. This is because UBIFS
+maintains the FS index on the flash media. However, UBIFS depends on
+UBI, which scales linearly. So overall UBI/UBIFS stack scales linearly.
+Nevertheless, UBI/UBIFS scales considerably better than JFFS2.
+
+The authors of UBIFS believe, that it is possible to develop UBI2 which
+would scale logarithmically as well. UBI2 would support the same API as UBI,
+but it would be binary incompatible to UBI. So UBIFS would not need to be
+changed to use UBI2
+
+
+Mount options
+=============
+
+(*) == default.
+
+bulk_read read more in one go to take advantage of flash
+ media that read faster sequentially
+no_bulk_read (*) do not bulk-read
+no_chk_data_crc (*) skip checking of CRCs on data nodes in order to
+ improve read performance. Use this option only
+ if the flash media is highly reliable. The effect
+ of this option is that corruption of the contents
+ of a file can go unnoticed.
+chk_data_crc do not skip checking CRCs on data nodes
+compr=none override default compressor and set it to "none"
+compr=lzo override default compressor and set it to "lzo"
+compr=zlib override default compressor and set it to "zlib"
+
+
+Quick usage instructions
+========================
+
+The UBI volume to mount is specified using "ubiX_Y" or "ubiX:NAME" syntax,
+where "X" is UBI device number, "Y" is UBI volume number, and "NAME" is
+UBI volume name.
+
+Mount volume 0 on UBI device 0 to /mnt/ubifs:
+$ mount -t ubifs ubi0_0 /mnt/ubifs
+
+Mount "rootfs" volume of UBI device 0 to /mnt/ubifs ("rootfs" is volume
+name):
+$ mount -t ubifs ubi0:rootfs /mnt/ubifs
+
+The following is an example of the kernel boot arguments to attach mtd0
+to UBI and mount volume "rootfs":
+ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs
+
+References
+==========
+
+UBIFS documentation and FAQ/HOWTO at the MTD web site:
+http://www.linux-mtd.infradead.org/doc/ubifs.html
+http://www.linux-mtd.infradead.org/faq/ubifs.html
diff --git a/Documentation/filesystems/udf.txt b/Documentation/filesystems/udf.txt
new file mode 100644
index 000000000..902b95d0e
--- /dev/null
+++ b/Documentation/filesystems/udf.txt
@@ -0,0 +1,82 @@
+*
+* Documentation/filesystems/udf.txt
+*
+UDF Filesystem version 0.9.8.1
+
+If you encounter problems with reading UDF discs using this driver,
+please report them to linux_udf@hpesjro.fc.hp.com, which is the
+developer's list.
+
+Write support requires a block driver which supports writing. Currently
+dvd+rw drives and media support true random sector writes, and so a udf
+filesystem on such devices can be directly mounted read/write. CD-RW
+media however, does not support this. Instead the media can be formatted
+for packet mode using the utility cdrwtool, then the pktcdvd driver can
+be bound to the underlying cd device to provide the required buffering
+and read-modify-write cycles to allow the filesystem random sector writes
+while providing the hardware with only full packet writes. While not
+required for dvd+rw media, use of the pktcdvd driver often enhances
+performance due to very poor read-modify-write support supplied internally
+by drive firmware.
+
+-------------------------------------------------------------------------------
+The following mount options are supported:
+
+ gid= Set the default group.
+ umask= Set the default umask.
+ mode= Set the default file permissions.
+ dmode= Set the default directory permissions.
+ uid= Set the default user.
+ bs= Set the block size.
+ unhide Show otherwise hidden files.
+ undelete Show deleted files in lists.
+ adinicb Embed data in the inode (default)
+ noadinicb Don't embed data in the inode
+ shortad Use short ad's
+ longad Use long ad's (default)
+ nostrict Unset strict conformance
+ iocharset= Set the NLS character set
+
+The uid= and gid= options need a bit more explaining. They will accept a
+decimal numeric value which will be used as the default ID for that mount.
+They will also accept the string "ignore" and "forget". For files on the disk
+that are owned by nobody ( -1 ), they will instead look as if they are owned
+by the default ID. The ignore option causes the default ID to override all
+IDs on the disk, not just -1. The forget option causes all IDs to be written
+to disk as -1, so when the media is later remounted, they will appear to be
+owned by whatever default ID it is mounted with at that time.
+
+For typical desktop use of removable media, you should set the ID to that
+of the interactively logged on user, and also specify both the forget and
+ignore options. This way the interactive user will always see the files
+on the disk as belonging to him.
+
+The remaining are for debugging and disaster recovery:
+
+ novrs Skip volume sequence recognition
+
+The following expect a offset from 0.
+
+ session= Set the CDROM session (default= last session)
+ anchor= Override standard anchor location. (default= 256)
+ volume= Override the VolumeDesc location. (unused)
+ partition= Override the PartitionDesc location. (unused)
+ lastblock= Set the last block of the filesystem/
+
+The following expect a offset from the partition root.
+
+ fileset= Override the fileset block location. (unused)
+ rootdir= Override the root directory location. (unused)
+ WARNING: overriding the rootdir to a non-directory may
+ yield highly unpredictable results.
+-------------------------------------------------------------------------------
+
+
+For the latest version and toolset see:
+ http://linux-udf.sourceforge.net/
+
+Documentation on UDF and ECMA 167 is available FREE from:
+ http://www.osta.org/
+ http://www.ecma-international.org/
+
+Ben Fennema <bfennema@falcon.csc.calpoly.edu>
diff --git a/Documentation/filesystems/ufs.txt b/Documentation/filesystems/ufs.txt
new file mode 100644
index 000000000..7a602adec
--- /dev/null
+++ b/Documentation/filesystems/ufs.txt
@@ -0,0 +1,60 @@
+USING UFS
+=========
+
+mount -t ufs -o ufstype=type_of_ufs device dir
+
+
+UFS OPTIONS
+===========
+
+ufstype=type_of_ufs
+ UFS is a file system widely used in different operating systems.
+ The problem are differences among implementations. Features of
+ some implementations are undocumented, so its hard to recognize
+ type of ufs automatically. That's why user must specify type of
+ ufs manually by mount option ufstype. Possible values are:
+
+ old old format of ufs
+ default value, supported as read-only
+
+ 44bsd used in FreeBSD, NetBSD, OpenBSD
+ supported as read-write
+
+ ufs2 used in FreeBSD 5.x
+ supported as read-write
+
+ 5xbsd synonym for ufs2
+
+ sun used in SunOS (Solaris)
+ supported as read-write
+
+ sunx86 used in SunOS for Intel (Solarisx86)
+ supported as read-write
+
+ hp used in HP-UX
+ supported as read-only
+
+ nextstep
+ used in NextStep
+ supported as read-only
+
+ nextstep-cd
+ used for NextStep CDROMs (block_size == 2048)
+ supported as read-only
+
+ openstep
+ used in OpenStep
+ supported as read-only
+
+
+POSSIBLE PROBLEMS
+=================
+
+See next section, if you have any.
+
+
+BUG REPORTS
+===========
+
+Any ufs bug report you can send to daniel.pirkl@email.cz or
+to dushistov@mail.ru (do not send partition tables bug reports).
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
new file mode 100644
index 000000000..ce1126ace
--- /dev/null
+++ b/Documentation/filesystems/vfat.txt
@@ -0,0 +1,336 @@
+USING VFAT
+----------------------------------------------------------------------
+To use the vfat filesystem, use the filesystem type 'vfat'. i.e.
+ mount -t vfat /dev/fd0 /mnt
+
+No special partition formatter is required. mkdosfs will work fine
+if you want to format from within Linux.
+
+VFAT MOUNT OPTIONS
+----------------------------------------------------------------------
+uid=### -- Set the owner of all files on this filesystem.
+ The default is the uid of current process.
+
+gid=### -- Set the group of all files on this filesystem.
+ The default is the gid of current process.
+
+umask=### -- The permission mask (for files and directories, see umask(1)).
+ The default is the umask of current process.
+
+dmask=### -- The permission mask for the directory.
+ The default is the umask of current process.
+
+fmask=### -- The permission mask for files.
+ The default is the umask of current process.
+
+allow_utime=### -- This option controls the permission check of mtime/atime.
+
+ 20 - If current process is in group of file's group ID,
+ you can change timestamp.
+ 2 - Other users can change timestamp.
+
+ The default is set from `dmask' option. (If the directory is
+ writable, utime(2) is also allowed. I.e. ~dmask & 022)
+
+ Normally utime(2) checks current process is owner of
+ the file, or it has CAP_FOWNER capability. But FAT
+ filesystem doesn't have uid/gid on disk, so normal
+ check is too unflexible. With this option you can
+ relax it.
+
+codepage=### -- Sets the codepage number for converting to shortname
+ characters on FAT filesystem.
+ By default, FAT_DEFAULT_CODEPAGE setting is used.
+
+iocharset=<name> -- Character set to use for converting between the
+ encoding is used for user visible filename and 16 bit
+ Unicode characters. Long filenames are stored on disk
+ in Unicode format, but Unix for the most part doesn't
+ know how to deal with Unicode.
+ By default, FAT_DEFAULT_IOCHARSET setting is used.
+
+ There is also an option of doing UTF-8 translations
+ with the utf8 option.
+
+ NOTE: "iocharset=utf8" is not recommended. If unsure,
+ you should consider the following option instead.
+
+utf8=<bool> -- UTF-8 is the filesystem safe version of Unicode that
+ is used by the console. It can be enabled for the
+ filesystem with this option. If 'uni_xlate' gets set,
+ UTF-8 gets disabled.
+
+uni_xlate=<bool> -- Translate unhandled Unicode characters to special
+ escaped sequences. This would let you backup and
+ restore filenames that are created with any Unicode
+ characters. Until Linux supports Unicode for real,
+ this gives you an alternative. Without this option,
+ a '?' is used when no translation is possible. The
+ escape character is ':' because it is otherwise
+ illegal on the vfat filesystem. The escape sequence
+ that gets used is ':' and the four digits of hexadecimal
+ unicode.
+
+nonumtail=<bool> -- When creating 8.3 aliases, normally the alias will
+ end in '~1' or tilde followed by some number. If this
+ option is set, then if the filename is
+ "longfilename.txt" and "longfile.txt" does not
+ currently exist in the directory, 'longfile.txt' will
+ be the short alias instead of 'longfi~1.txt'.
+
+usefree -- Use the "free clusters" value stored on FSINFO. It'll
+ be used to determine number of free clusters without
+ scanning disk. But it's not used by default, because
+ recent Windows don't update it correctly in some
+ case. If you are sure the "free clusters" on FSINFO is
+ correct, by this option you can avoid scanning disk.
+
+quiet -- Stops printing certain warning messages.
+
+check=s|r|n -- Case sensitivity checking setting.
+ s: strict, case sensitive
+ r: relaxed, case insensitive
+ n: normal, default setting, currently case insensitive
+
+nocase -- This was deprecated for vfat. Use shortname=win95 instead.
+
+shortname=lower|win95|winnt|mixed
+ -- Shortname display/create setting.
+ lower: convert to lowercase for display,
+ emulate the Windows 95 rule for create.
+ win95: emulate the Windows 95 rule for display/create.
+ winnt: emulate the Windows NT rule for display/create.
+ mixed: emulate the Windows NT rule for display,
+ emulate the Windows 95 rule for create.
+ Default setting is `mixed'.
+
+tz=UTC -- Interpret timestamps as UTC rather than local time.
+ This option disables the conversion of timestamps
+ between local time (as used by Windows on FAT) and UTC
+ (which Linux uses internally). This is particularly
+ useful when mounting devices (like digital cameras)
+ that are set to UTC in order to avoid the pitfalls of
+ local time.
+time_offset=minutes
+ -- Set offset for conversion of timestamps from local time
+ used by FAT to UTC. I.e. <minutes> minutes will be subtracted
+ from each timestamp to convert it to UTC used internally by
+ Linux. This is useful when time zone set in sys_tz is
+ not the time zone used by the filesystem. Note that this
+ option still does not provide correct time stamps in all
+ cases in presence of DST - time stamps in a different DST
+ setting will be off by one hour.
+
+showexec -- If set, the execute permission bits of the file will be
+ allowed only if the extension part of the name is .EXE,
+ .COM, or .BAT. Not set by default.
+
+debug -- Can be set, but unused by the current implementation.
+
+sys_immutable -- If set, ATTR_SYS attribute on FAT is handled as
+ IMMUTABLE flag on Linux. Not set by default.
+
+flush -- If set, the filesystem will try to flush to disk more
+ early than normal. Not set by default.
+
+rodir -- FAT has the ATTR_RO (read-only) attribute. On Windows,
+ the ATTR_RO of the directory will just be ignored,
+ and is used only by applications as a flag (e.g. it's set
+ for the customized folder).
+
+ If you want to use ATTR_RO as read-only flag even for
+ the directory, set this option.
+
+errors=panic|continue|remount-ro
+ -- specify FAT behavior on critical errors: panic, continue
+ without doing anything or remount the partition in
+ read-only mode (default behavior).
+
+discard -- If set, issues discard/TRIM commands to the block
+ device when blocks are freed. This is useful for SSD devices
+ and sparse/thinly-provisoned LUNs.
+
+nfs=stale_rw|nostale_ro
+ Enable this only if you want to export the FAT filesystem
+ over NFS.
+
+ stale_rw: This option maintains an index (cache) of directory
+ inodes by i_logstart which is used by the nfs-related code to
+ improve look-ups. Full file operations (read/write) over NFS is
+ supported but with cache eviction at NFS server, this could
+ result in ESTALE issues.
+
+ nostale_ro: This option bases the inode number and filehandle
+ on the on-disk location of a file in the MS-DOS directory entry.
+ This ensures that ESTALE will not be returned after a file is
+ evicted from the inode cache. However, it means that operations
+ such as rename, create and unlink could cause filehandles that
+ previously pointed at one file to point at a different file,
+ potentially causing data corruption. For this reason, this
+ option also mounts the filesystem readonly.
+
+ To maintain backward compatibility, '-o nfs' is also accepted,
+ defaulting to stale_rw
+
+dos1xfloppy -- If set, use a fallback default BIOS Parameter Block
+ configuration, determined by backing device size. These static
+ parameters match defaults assumed by DOS 1.x for 160 kiB,
+ 180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
+
+
+<bool>: 0,1,yes,no,true,false
+
+TODO
+----------------------------------------------------------------------
+* Need to get rid of the raw scanning stuff. Instead, always use
+ a get next directory entry approach. The only thing left that uses
+ raw scanning is the directory renaming code.
+
+
+POSSIBLE PROBLEMS
+----------------------------------------------------------------------
+* vfat_valid_longname does not properly checked reserved names.
+* When a volume name is the same as a directory name in the root
+ directory of the filesystem, the directory name sometimes shows
+ up as an empty file.
+* autoconv option does not work correctly.
+
+BUG REPORTS
+----------------------------------------------------------------------
+If you have trouble with the VFAT filesystem, mail bug reports to
+chaffee@bmrc.cs.berkeley.edu. Please specify the filename
+and the operation that gave you trouble.
+
+TEST SUITE
+----------------------------------------------------------------------
+If you plan to make any modifications to the vfat filesystem, please
+get the test suite that comes with the vfat distribution at
+
+ http://web.archive.org/web/*/http://bmrc.berkeley.edu/
+ people/chaffee/vfat.html
+
+This tests quite a few parts of the vfat filesystem and additional
+tests for new features or untested features would be appreciated.
+
+NOTES ON THE STRUCTURE OF THE VFAT FILESYSTEM
+----------------------------------------------------------------------
+(This documentation was provided by Galen C. Hunt <gchunt@cs.rochester.edu>
+ and lightly annotated by Gordon Chaffee).
+
+This document presents a very rough, technical overview of my
+knowledge of the extended FAT file system used in Windows NT 3.5 and
+Windows 95. I don't guarantee that any of the following is correct,
+but it appears to be so.
+
+The extended FAT file system is almost identical to the FAT
+file system used in DOS versions up to and including 6.223410239847
+:-). The significant change has been the addition of long file names.
+These names support up to 255 characters including spaces and lower
+case characters as opposed to the traditional 8.3 short names.
+
+Here is the description of the traditional FAT entry in the current
+Windows 95 filesystem:
+
+ struct directory { // Short 8.3 names
+ unsigned char name[8]; // file name
+ unsigned char ext[3]; // file extension
+ unsigned char attr; // attribute byte
+ unsigned char lcase; // Case for base and extension
+ unsigned char ctime_ms; // Creation time, milliseconds
+ unsigned char ctime[2]; // Creation time
+ unsigned char cdate[2]; // Creation date
+ unsigned char adate[2]; // Last access date
+ unsigned char reserved[2]; // reserved values (ignored)
+ unsigned char time[2]; // time stamp
+ unsigned char date[2]; // date stamp
+ unsigned char start[2]; // starting cluster number
+ unsigned char size[4]; // size of the file
+ };
+
+The lcase field specifies if the base and/or the extension of an 8.3
+name should be capitalized. This field does not seem to be used by
+Windows 95 but it is used by Windows NT. The case of filenames is not
+completely compatible from Windows NT to Windows 95. It is not completely
+compatible in the reverse direction, however. Filenames that fit in
+the 8.3 namespace and are written on Windows NT to be lowercase will
+show up as uppercase on Windows 95.
+
+Note that the "start" and "size" values are actually little
+endian integer values. The descriptions of the fields in this
+structure are public knowledge and can be found elsewhere.
+
+With the extended FAT system, Microsoft has inserted extra
+directory entries for any files with extended names. (Any name which
+legally fits within the old 8.3 encoding scheme does not have extra
+entries.) I call these extra entries slots. Basically, a slot is a
+specially formatted directory entry which holds up to 13 characters of
+a file's extended name. Think of slots as additional labeling for the
+directory entry of the file to which they correspond. Microsoft
+prefers to refer to the 8.3 entry for a file as its alias and the
+extended slot directory entries as the file name.
+
+The C structure for a slot directory entry follows:
+
+ struct slot { // Up to 13 characters of a long name
+ unsigned char id; // sequence number for slot
+ unsigned char name0_4[10]; // first 5 characters in name
+ unsigned char attr; // attribute byte
+ unsigned char reserved; // always 0
+ unsigned char alias_checksum; // checksum for 8.3 alias
+ unsigned char name5_10[12]; // 6 more characters in name
+ unsigned char start[2]; // starting cluster number
+ unsigned char name11_12[4]; // last 2 characters in name
+ };
+
+If the layout of the slots looks a little odd, it's only
+because of Microsoft's efforts to maintain compatibility with old
+software. The slots must be disguised to prevent old software from
+panicking. To this end, a number of measures are taken:
+
+ 1) The attribute byte for a slot directory entry is always set
+ to 0x0f. This corresponds to an old directory entry with
+ attributes of "hidden", "system", "read-only", and "volume
+ label". Most old software will ignore any directory
+ entries with the "volume label" bit set. Real volume label
+ entries don't have the other three bits set.
+
+ 2) The starting cluster is always set to 0, an impossible
+ value for a DOS file.
+
+Because the extended FAT system is backward compatible, it is
+possible for old software to modify directory entries. Measures must
+be taken to ensure the validity of slots. An extended FAT system can
+verify that a slot does in fact belong to an 8.3 directory entry by
+the following:
+
+ 1) Positioning. Slots for a file always immediately proceed
+ their corresponding 8.3 directory entry. In addition, each
+ slot has an id which marks its order in the extended file
+ name. Here is a very abbreviated view of an 8.3 directory
+ entry and its corresponding long name slots for the file
+ "My Big File.Extension which is long":
+
+ <proceeding files...>
+ <slot #3, id = 0x43, characters = "h is long">
+ <slot #2, id = 0x02, characters = "xtension whic">
+ <slot #1, id = 0x01, characters = "My Big File.E">
+ <directory entry, name = "MYBIGFIL.EXT">
+
+ Note that the slots are stored from last to first. Slots
+ are numbered from 1 to N. The Nth slot is or'ed with 0x40
+ to mark it as the last one.
+
+ 2) Checksum. Each slot has an "alias_checksum" value. The
+ checksum is calculated from the 8.3 name using the
+ following algorithm:
+
+ for (sum = i = 0; i < 11; i++) {
+ sum = (((sum&1)<<7)|((sum&0xfe)>>1)) + name[i]
+ }
+
+ 3) If there is free space in the final slot, a Unicode NULL (0x0000)
+ is stored after the final character. After that, all unused
+ characters in the final slot are set to Unicode 0xFFFF.
+
+Finally, note that the extended name is stored in Unicode. Each Unicode
+character takes two bytes.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
new file mode 100644
index 000000000..5d833b32b
--- /dev/null
+++ b/Documentation/filesystems/vfs.txt
@@ -0,0 +1,1171 @@
+
+ Overview of the Linux Virtual File System
+
+ Original author: Richard Gooch <rgooch@atnf.csiro.au>
+
+ Last updated on June 24, 2007.
+
+ Copyright (C) 1999 Richard Gooch
+ Copyright (C) 2005 Pekka Enberg
+
+ This file is released under the GPLv2.
+
+
+Introduction
+============
+
+The Virtual File System (also known as the Virtual Filesystem Switch)
+is the software layer in the kernel that provides the filesystem
+interface to userspace programs. It also provides an abstraction
+within the kernel which allows different filesystem implementations to
+coexist.
+
+VFS system calls open(2), stat(2), read(2), write(2), chmod(2) and so
+on are called from a process context. Filesystem locking is described
+in the document Documentation/filesystems/Locking.
+
+
+Directory Entry Cache (dcache)
+------------------------------
+
+The VFS implements the open(2), stat(2), chmod(2), and similar system
+calls. The pathname argument that is passed to them is used by the VFS
+to search through the directory entry cache (also known as the dentry
+cache or dcache). This provides a very fast look-up mechanism to
+translate a pathname (filename) into a specific dentry. Dentries live
+in RAM and are never saved to disc: they exist only for performance.
+
+The dentry cache is meant to be a view into your entire filespace. As
+most computers cannot fit all dentries in the RAM at the same time,
+some bits of the cache are missing. In order to resolve your pathname
+into a dentry, the VFS may have to resort to creating dentries along
+the way, and then loading the inode. This is done by looking up the
+inode.
+
+
+The Inode Object
+----------------
+
+An individual dentry usually has a pointer to an inode. Inodes are
+filesystem objects such as regular files, directories, FIFOs and other
+beasts. They live either on the disc (for block device filesystems)
+or in the memory (for pseudo filesystems). Inodes that live on the
+disc are copied into the memory when required and changes to the inode
+are written back to disc. A single inode can be pointed to by multiple
+dentries (hard links, for example, do this).
+
+To look up an inode requires that the VFS calls the lookup() method of
+the parent directory inode. This method is installed by the specific
+filesystem implementation that the inode lives in. Once the VFS has
+the required dentry (and hence the inode), we can do all those boring
+things like open(2) the file, or stat(2) it to peek at the inode
+data. The stat(2) operation is fairly simple: once the VFS has the
+dentry, it peeks at the inode data and passes some of it back to
+userspace.
+
+
+The File Object
+---------------
+
+Opening a file requires another operation: allocation of a file
+structure (this is the kernel-side implementation of file
+descriptors). The freshly allocated file structure is initialized with
+a pointer to the dentry and a set of file operation member functions.
+These are taken from the inode data. The open() file method is then
+called so the specific filesystem implementation can do its work. You
+can see that this is another switch performed by the VFS. The file
+structure is placed into the file descriptor table for the process.
+
+Reading, writing and closing files (and other assorted VFS operations)
+is done by using the userspace file descriptor to grab the appropriate
+file structure, and then calling the required file structure method to
+do whatever is required. For as long as the file is open, it keeps the
+dentry in use, which in turn means that the VFS inode is still in use.
+
+
+Registering and Mounting a Filesystem
+=====================================
+
+To register and unregister a filesystem, use the following API
+functions:
+
+ #include <linux/fs.h>
+
+ extern int register_filesystem(struct file_system_type *);
+ extern int unregister_filesystem(struct file_system_type *);
+
+The passed struct file_system_type describes your filesystem. When a
+request is made to mount a filesystem onto a directory in your namespace,
+the VFS will call the appropriate mount() method for the specific
+filesystem. New vfsmount referring to the tree returned by ->mount()
+will be attached to the mountpoint, so that when pathname resolution
+reaches the mountpoint it will jump into the root of that vfsmount.
+
+You can see all filesystems that are registered to the kernel in the
+file /proc/filesystems.
+
+
+struct file_system_type
+-----------------------
+
+This describes the filesystem. As of kernel 2.6.39, the following
+members are defined:
+
+struct file_system_type {
+ const char *name;
+ int fs_flags;
+ struct dentry *(*mount) (struct file_system_type *, int,
+ const char *, void *);
+ void (*kill_sb) (struct super_block *);
+ struct module *owner;
+ struct file_system_type * next;
+ struct list_head fs_supers;
+ struct lock_class_key s_lock_key;
+ struct lock_class_key s_umount_key;
+};
+
+ name: the name of the filesystem type, such as "ext2", "iso9660",
+ "msdos" and so on
+
+ fs_flags: various flags (i.e. FS_REQUIRES_DEV, FS_NO_DCACHE, etc.)
+
+ mount: the method to call when a new instance of this
+ filesystem should be mounted
+
+ kill_sb: the method to call when an instance of this filesystem
+ should be shut down
+
+ owner: for internal VFS use: you should initialize this to THIS_MODULE in
+ most cases.
+
+ next: for internal VFS use: you should initialize this to NULL
+
+ s_lock_key, s_umount_key: lockdep-specific
+
+The mount() method has the following arguments:
+
+ struct file_system_type *fs_type: describes the filesystem, partly initialized
+ by the specific filesystem code
+
+ int flags: mount flags
+
+ const char *dev_name: the device name we are mounting.
+
+ void *data: arbitrary mount options, usually comes as an ASCII
+ string (see "Mount Options" section)
+
+The mount() method must return the root dentry of the tree requested by
+caller. An active reference to its superblock must be grabbed and the
+superblock must be locked. On failure it should return ERR_PTR(error).
+
+The arguments match those of mount(2) and their interpretation
+depends on filesystem type. E.g. for block filesystems, dev_name is
+interpreted as block device name, that device is opened and if it
+contains a suitable filesystem image the method creates and initializes
+struct super_block accordingly, returning its root dentry to caller.
+
+->mount() may choose to return a subtree of existing filesystem - it
+doesn't have to create a new one. The main result from the caller's
+point of view is a reference to dentry at the root of (sub)tree to
+be attached; creation of new superblock is a common side effect.
+
+The most interesting member of the superblock structure that the
+mount() method fills in is the "s_op" field. This is a pointer to
+a "struct super_operations" which describes the next level of the
+filesystem implementation.
+
+Usually, a filesystem uses one of the generic mount() implementations
+and provides a fill_super() callback instead. The generic variants are:
+
+ mount_bdev: mount a filesystem residing on a block device
+
+ mount_nodev: mount a filesystem that is not backed by a device
+
+ mount_single: mount a filesystem which shares the instance between
+ all mounts
+
+A fill_super() callback implementation has the following arguments:
+
+ struct super_block *sb: the superblock structure. The callback
+ must initialize this properly.
+
+ void *data: arbitrary mount options, usually comes as an ASCII
+ string (see "Mount Options" section)
+
+ int silent: whether or not to be silent on error
+
+
+The Superblock Object
+=====================
+
+A superblock object represents a mounted filesystem.
+
+
+struct super_operations
+-----------------------
+
+This describes how the VFS can manipulate the superblock of your
+filesystem. As of kernel 2.6.22, the following members are defined:
+
+struct super_operations {
+ struct inode *(*alloc_inode)(struct super_block *sb);
+ void (*destroy_inode)(struct inode *);
+
+ void (*dirty_inode) (struct inode *, int flags);
+ int (*write_inode) (struct inode *, int);
+ void (*drop_inode) (struct inode *);
+ void (*delete_inode) (struct inode *);
+ void (*put_super) (struct super_block *);
+ int (*sync_fs)(struct super_block *sb, int wait);
+ int (*freeze_fs) (struct super_block *);
+ int (*unfreeze_fs) (struct super_block *);
+ int (*statfs) (struct dentry *, struct kstatfs *);
+ int (*remount_fs) (struct super_block *, int *, char *);
+ void (*clear_inode) (struct inode *);
+ void (*umount_begin) (struct super_block *);
+
+ int (*show_options)(struct seq_file *, struct dentry *);
+
+ ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
+ ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ int (*nr_cached_objects)(struct super_block *);
+ void (*free_cached_objects)(struct super_block *, int);
+};
+
+All methods are called without any locks being held, unless otherwise
+noted. This means that most methods can block safely. All methods are
+only called from a process context (i.e. not from an interrupt handler
+or bottom half).
+
+ alloc_inode: this method is called by alloc_inode() to allocate memory
+ for struct inode and initialize it. If this function is not
+ defined, a simple 'struct inode' is allocated. Normally
+ alloc_inode will be used to allocate a larger structure which
+ contains a 'struct inode' embedded within it.
+
+ destroy_inode: this method is called by destroy_inode() to release
+ resources allocated for struct inode. It is only required if
+ ->alloc_inode was defined and simply undoes anything done by
+ ->alloc_inode.
+
+ dirty_inode: this method is called by the VFS to mark an inode dirty.
+
+ write_inode: this method is called when the VFS needs to write an
+ inode to disc. The second parameter indicates whether the write
+ should be synchronous or not, not all filesystems check this flag.
+
+ drop_inode: called when the last access to the inode is dropped,
+ with the inode->i_lock spinlock held.
+
+ This method should be either NULL (normal UNIX filesystem
+ semantics) or "generic_delete_inode" (for filesystems that do not
+ want to cache inodes - causing "delete_inode" to always be
+ called regardless of the value of i_nlink)
+
+ The "generic_delete_inode()" behavior is equivalent to the
+ old practice of using "force_delete" in the put_inode() case,
+ but does not have the races that the "force_delete()" approach
+ had.
+
+ delete_inode: called when the VFS wants to delete an inode
+
+ put_super: called when the VFS wishes to free the superblock
+ (i.e. unmount). This is called with the superblock lock held
+
+ sync_fs: called when VFS is writing out all dirty data associated with
+ a superblock. The second parameter indicates whether the method
+ should wait until the write out has been completed. Optional.
+
+ freeze_fs: called when VFS is locking a filesystem and
+ forcing it into a consistent state. This method is currently
+ used by the Logical Volume Manager (LVM).
+
+ unfreeze_fs: called when VFS is unlocking a filesystem and making it writable
+ again.
+
+ statfs: called when the VFS needs to get filesystem statistics.
+
+ remount_fs: called when the filesystem is remounted. This is called
+ with the kernel lock held
+
+ clear_inode: called then the VFS clears the inode. Optional
+
+ umount_begin: called when the VFS is unmounting a filesystem.
+
+ show_options: called by the VFS to show mount options for
+ /proc/<pid>/mounts. (see "Mount Options" section)
+
+ quota_read: called by the VFS to read from filesystem quota file.
+
+ quota_write: called by the VFS to write to filesystem quota file.
+
+ nr_cached_objects: called by the sb cache shrinking function for the
+ filesystem to return the number of freeable cached objects it contains.
+ Optional.
+
+ free_cache_objects: called by the sb cache shrinking function for the
+ filesystem to scan the number of objects indicated to try to free them.
+ Optional, but any filesystem implementing this method needs to also
+ implement ->nr_cached_objects for it to be called correctly.
+
+ We can't do anything with any errors that the filesystem might
+ encountered, hence the void return type. This will never be called if
+ the VM is trying to reclaim under GFP_NOFS conditions, hence this
+ method does not need to handle that situation itself.
+
+ Implementations must include conditional reschedule calls inside any
+ scanning loop that is done. This allows the VFS to determine
+ appropriate scan batch sizes without having to worry about whether
+ implementations will cause holdoff problems due to large scan batch
+ sizes.
+
+Whoever sets up the inode is responsible for filling in the "i_op" field. This
+is a pointer to a "struct inode_operations" which describes the methods that
+can be performed on individual inodes.
+
+
+The Inode Object
+================
+
+An inode object represents an object within the filesystem.
+
+
+struct inode_operations
+-----------------------
+
+This describes how the VFS can manipulate an inode in your
+filesystem. As of kernel 2.6.22, the following members are defined:
+
+struct inode_operations {
+ int (*create) (struct inode *,struct dentry *, umode_t, bool);
+ struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
+ int (*unlink) (struct inode *,struct dentry *);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
+ int (*mkdir) (struct inode *,struct dentry *,umode_t);
+ int (*rmdir) (struct inode *,struct dentry *);
+ int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
+ int (*rename2) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
+ int (*readlink) (struct dentry *, char __user *,int);
+ void * (*follow_link) (struct dentry *, struct nameidata *);
+ void (*put_link) (struct dentry *, struct nameidata *, void *);
+ int (*permission) (struct inode *, int);
+ int (*get_acl)(struct inode *, int);
+ int (*setattr) (struct dentry *, struct iattr *);
+ int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
+ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+ ssize_t (*listxattr) (struct dentry *, char *, size_t);
+ int (*removexattr) (struct dentry *, const char *);
+ void (*update_time)(struct inode *, struct timespec *, int);
+ int (*atomic_open)(struct inode *, struct dentry *, struct file *,
+ unsigned open_flag, umode_t create_mode, int *opened);
+ int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+ int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
+};
+
+Again, all methods are called without any locks being held, unless
+otherwise noted.
+
+ create: called by the open(2) and creat(2) system calls. Only
+ required if you want to support regular files. The dentry you
+ get should not have an inode (i.e. it should be a negative
+ dentry). Here you will probably call d_instantiate() with the
+ dentry and the newly created inode
+
+ lookup: called when the VFS needs to look up an inode in a parent
+ directory. The name to look for is found in the dentry. This
+ method must call d_add() to insert the found inode into the
+ dentry. The "i_count" field in the inode structure should be
+ incremented. If the named inode does not exist a NULL inode
+ should be inserted into the dentry (this is called a negative
+ dentry). Returning an error code from this routine must only
+ be done on a real error, otherwise creating inodes with system
+ calls like create(2), mknod(2), mkdir(2) and so on will fail.
+ If you wish to overload the dentry methods then you should
+ initialise the "d_dop" field in the dentry; this is a pointer
+ to a struct "dentry_operations".
+ This method is called with the directory inode semaphore held
+
+ link: called by the link(2) system call. Only required if you want
+ to support hard links. You will probably need to call
+ d_instantiate() just as you would in the create() method
+
+ unlink: called by the unlink(2) system call. Only required if you
+ want to support deleting inodes
+
+ symlink: called by the symlink(2) system call. Only required if you
+ want to support symlinks. You will probably need to call
+ d_instantiate() just as you would in the create() method
+
+ mkdir: called by the mkdir(2) system call. Only required if you want
+ to support creating subdirectories. You will probably need to
+ call d_instantiate() just as you would in the create() method
+
+ rmdir: called by the rmdir(2) system call. Only required if you want
+ to support deleting subdirectories
+
+ mknod: called by the mknod(2) system call to create a device (char,
+ block) inode or a named pipe (FIFO) or socket. Only required
+ if you want to support creating these types of inodes. You
+ will probably need to call d_instantiate() just as you would
+ in the create() method
+
+ rename: called by the rename(2) system call to rename the object to
+ have the parent and name given by the second inode and dentry.
+
+ rename2: this has an additional flags argument compared to rename.
+ If no flags are supported by the filesystem then this method
+ need not be implemented. If some flags are supported then the
+ filesystem must return -EINVAL for any unsupported or unknown
+ flags. Currently the following flags are implemented:
+ (1) RENAME_NOREPLACE: this flag indicates that if the target
+ of the rename exists the rename should fail with -EEXIST
+ instead of replacing the target. The VFS already checks for
+ existence, so for local filesystems the RENAME_NOREPLACE
+ implementation is equivalent to plain rename.
+ (2) RENAME_EXCHANGE: exchange source and target. Both must
+ exist; this is checked by the VFS. Unlike plain rename,
+ source and target may be of different type.
+
+ readlink: called by the readlink(2) system call. Only required if
+ you want to support reading symbolic links
+
+ follow_link: called by the VFS to follow a symbolic link to the
+ inode it points to. Only required if you want to support
+ symbolic links. This method returns a void pointer cookie
+ that is passed to put_link().
+
+ put_link: called by the VFS to release resources allocated by
+ follow_link(). The cookie returned by follow_link() is passed
+ to this method as the last parameter. It is used by
+ filesystems such as NFS where page cache is not stable
+ (i.e. page that was installed when the symbolic link walk
+ started might not be in the page cache at the end of the
+ walk).
+
+ permission: called by the VFS to check for access rights on a POSIX-like
+ filesystem.
+
+ May be called in rcu-walk mode (mask & MAY_NOT_BLOCK). If in rcu-walk
+ mode, the filesystem must check the permission without blocking or
+ storing to the inode.
+
+ If a situation is encountered that rcu-walk cannot handle, return
+ -ECHILD and it will be called again in ref-walk mode.
+
+ setattr: called by the VFS to set attributes for a file. This method
+ is called by chmod(2) and related system calls.
+
+ getattr: called by the VFS to get attributes of a file. This method
+ is called by stat(2) and related system calls.
+
+ setxattr: called by the VFS to set an extended attribute for a file.
+ Extended attribute is a name:value pair associated with an
+ inode. This method is called by setxattr(2) system call.
+
+ getxattr: called by the VFS to retrieve the value of an extended
+ attribute name. This method is called by getxattr(2) function
+ call.
+
+ listxattr: called by the VFS to list all extended attributes for a
+ given file. This method is called by listxattr(2) system call.
+
+ removexattr: called by the VFS to remove an extended attribute from
+ a file. This method is called by removexattr(2) system call.
+
+ update_time: called by the VFS to update a specific time or the i_version of
+ an inode. If this is not defined the VFS will update the inode itself
+ and call mark_inode_dirty_sync.
+
+ atomic_open: called on the last component of an open. Using this optional
+ method the filesystem can look up, possibly create and open the file in
+ one atomic operation. If it cannot perform this (e.g. the file type
+ turned out to be wrong) it may signal this by returning 1 instead of
+ usual 0 or -ve . This method is only called if the last component is
+ negative or needs lookup. Cached positive dentries are still handled by
+ f_op->open(). If the file was created, the FILE_CREATED flag should be
+ set in "opened". In case of O_EXCL the method must only succeed if the
+ file didn't exist and hence FILE_CREATED shall always be set on success.
+
+ tmpfile: called in the end of O_TMPFILE open(). Optional, equivalent to
+ atomically creating, opening and unlinking a file in given directory.
+
+The Address Space Object
+========================
+
+The address space object is used to group and manage pages in the page
+cache. It can be used to keep track of the pages in a file (or
+anything else) and also track the mapping of sections of the file into
+process address spaces.
+
+There are a number of distinct yet related services that an
+address-space can provide. These include communicating memory
+pressure, page lookup by address, and keeping track of pages tagged as
+Dirty or Writeback.
+
+The first can be used independently to the others. The VM can try to
+either write dirty pages in order to clean them, or release clean
+pages in order to reuse them. To do this it can call the ->writepage
+method on dirty pages, and ->releasepage on clean pages with
+PagePrivate set. Clean pages without PagePrivate and with no external
+references will be released without notice being given to the
+address_space.
+
+To achieve this functionality, pages need to be placed on an LRU with
+lru_cache_add and mark_page_active needs to be called whenever the
+page is used.
+
+Pages are normally kept in a radix tree index by ->index. This tree
+maintains information about the PG_Dirty and PG_Writeback status of
+each page, so that pages with either of these flags can be found
+quickly.
+
+The Dirty tag is primarily used by mpage_writepages - the default
+->writepages method. It uses the tag to find dirty pages to call
+->writepage on. If mpage_writepages is not used (i.e. the address
+provides its own ->writepages) , the PAGECACHE_TAG_DIRTY tag is
+almost unused. write_inode_now and sync_inode do use it (through
+__sync_single_inode) to check if ->writepages has been successful in
+writing out the whole address_space.
+
+The Writeback tag is used by filemap*wait* and sync_page* functions,
+via filemap_fdatawait_range, to wait for all writeback to
+complete. While waiting ->sync_page (if defined) will be called on
+each page that is found to require writeback.
+
+An address_space handler may attach extra information to a page,
+typically using the 'private' field in the 'struct page'. If such
+information is attached, the PG_Private flag should be set. This will
+cause various VM routines to make extra calls into the address_space
+handler to deal with that data.
+
+An address space acts as an intermediate between storage and
+application. Data is read into the address space a whole page at a
+time, and provided to the application either by copying of the page,
+or by memory-mapping the page.
+Data is written into the address space by the application, and then
+written-back to storage typically in whole pages, however the
+address_space has finer control of write sizes.
+
+The read process essentially only requires 'readpage'. The write
+process is more complicated and uses write_begin/write_end or
+set_page_dirty to write data into the address_space, and writepage,
+sync_page, and writepages to writeback data to storage.
+
+Adding and removing pages to/from an address_space is protected by the
+inode's i_mutex.
+
+When data is written to a page, the PG_Dirty flag should be set. It
+typically remains set until writepage asks for it to be written. This
+should clear PG_Dirty and set PG_Writeback. It can be actually
+written at any point after PG_Dirty is clear. Once it is known to be
+safe, PG_Writeback is cleared.
+
+Writeback makes use of a writeback_control structure...
+
+struct address_space_operations
+-------------------------------
+
+This describes how the VFS can manipulate mapping of a file to page cache in
+your filesystem. The following members are defined:
+
+struct address_space_operations {
+ int (*writepage)(struct page *page, struct writeback_control *wbc);
+ int (*readpage)(struct file *, struct page *);
+ int (*writepages)(struct address_space *, struct writeback_control *);
+ int (*set_page_dirty)(struct page *page);
+ int (*readpages)(struct file *filp, struct address_space *mapping,
+ struct list_head *pages, unsigned nr_pages);
+ int (*write_begin)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
+ int (*write_end)(struct file *, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
+ sector_t (*bmap)(struct address_space *, sector_t);
+ void (*invalidatepage) (struct page *, unsigned int, unsigned int);
+ int (*releasepage) (struct page *, int);
+ void (*freepage)(struct page *);
+ ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter, loff_t offset);
+ /* migrate the contents of a page to the specified target */
+ int (*migratepage) (struct page *, struct page *);
+ int (*launder_page) (struct page *);
+ int (*is_partially_uptodate) (struct page *, unsigned long,
+ unsigned long);
+ void (*is_dirty_writeback) (struct page *, bool *, bool *);
+ int (*error_remove_page) (struct mapping *mapping, struct page *page);
+ int (*swap_activate)(struct file *);
+ int (*swap_deactivate)(struct file *);
+};
+
+ writepage: called by the VM to write a dirty page to backing store.
+ This may happen for data integrity reasons (i.e. 'sync'), or
+ to free up memory (flush). The difference can be seen in
+ wbc->sync_mode.
+ The PG_Dirty flag has been cleared and PageLocked is true.
+ writepage should start writeout, should set PG_Writeback,
+ and should make sure the page is unlocked, either synchronously
+ or asynchronously when the write operation completes.
+
+ If wbc->sync_mode is WB_SYNC_NONE, ->writepage doesn't have to
+ try too hard if there are problems, and may choose to write out
+ other pages from the mapping if that is easier (e.g. due to
+ internal dependencies). If it chooses not to start writeout, it
+ should return AOP_WRITEPAGE_ACTIVATE so that the VM will not keep
+ calling ->writepage on that page.
+
+ See the file "Locking" for more details.
+
+ readpage: called by the VM to read a page from backing store.
+ The page will be Locked when readpage is called, and should be
+ unlocked and marked uptodate once the read completes.
+ If ->readpage discovers that it needs to unlock the page for
+ some reason, it can do so, and then return AOP_TRUNCATED_PAGE.
+ In this case, the page will be relocated, relocked and if
+ that all succeeds, ->readpage will be called again.
+
+ writepages: called by the VM to write out pages associated with the
+ address_space object. If wbc->sync_mode is WBC_SYNC_ALL, then
+ the writeback_control will specify a range of pages that must be
+ written out. If it is WBC_SYNC_NONE, then a nr_to_write is given
+ and that many pages should be written if possible.
+ If no ->writepages is given, then mpage_writepages is used
+ instead. This will choose pages from the address space that are
+ tagged as DIRTY and will pass them to ->writepage.
+
+ set_page_dirty: called by the VM to set a page dirty.
+ This is particularly needed if an address space attaches
+ private data to a page, and that data needs to be updated when
+ a page is dirtied. This is called, for example, when a memory
+ mapped page gets modified.
+ If defined, it should set the PageDirty flag, and the
+ PAGECACHE_TAG_DIRTY tag in the radix tree.
+
+ readpages: called by the VM to read pages associated with the address_space
+ object. This is essentially just a vector version of
+ readpage. Instead of just one page, several pages are
+ requested.
+ readpages is only used for read-ahead, so read errors are
+ ignored. If anything goes wrong, feel free to give up.
+
+ write_begin:
+ Called by the generic buffered write code to ask the filesystem to
+ prepare to write len bytes at the given offset in the file. The
+ address_space should check that the write will be able to complete,
+ by allocating space if necessary and doing any other internal
+ housekeeping. If the write will update parts of any basic-blocks on
+ storage, then those blocks should be pre-read (if they haven't been
+ read already) so that the updated blocks can be written out properly.
+
+ The filesystem must return the locked pagecache page for the specified
+ offset, in *pagep, for the caller to write into.
+
+ It must be able to cope with short writes (where the length passed to
+ write_begin is greater than the number of bytes copied into the page).
+
+ flags is a field for AOP_FLAG_xxx flags, described in
+ include/linux/fs.h.
+
+ A void * may be returned in fsdata, which then gets passed into
+ write_end.
+
+ Returns 0 on success; < 0 on failure (which is the error code), in
+ which case write_end is not called.
+
+ write_end: After a successful write_begin, and data copy, write_end must
+ be called. len is the original len passed to write_begin, and copied
+ is the amount that was able to be copied (copied == len is always true
+ if write_begin was called with the AOP_FLAG_UNINTERRUPTIBLE flag).
+
+ The filesystem must take care of unlocking the page and releasing it
+ refcount, and updating i_size.
+
+ Returns < 0 on failure, otherwise the number of bytes (<= 'copied')
+ that were able to be copied into pagecache.
+
+ bmap: called by the VFS to map a logical block offset within object to
+ physical block number. This method is used by the FIBMAP
+ ioctl and for working with swap-files. To be able to swap to
+ a file, the file must have a stable mapping to a block
+ device. The swap system does not go through the filesystem
+ but instead uses bmap to find out where the blocks in the file
+ are and uses those addresses directly.
+
+ dentry_open: *WARNING: probably going away soon, do not use!* This is an
+ alternative to f_op->open(), the difference is that this method may open
+ a file not necessarily originating from the same filesystem as the one
+ i_op->open() was called on. It may be useful for stacking filesystems
+ which want to allow native I/O directly on underlying files.
+
+
+ invalidatepage: If a page has PagePrivate set, then invalidatepage
+ will be called when part or all of the page is to be removed
+ from the address space. This generally corresponds to either a
+ truncation, punch hole or a complete invalidation of the address
+ space (in the latter case 'offset' will always be 0 and 'length'
+ will be PAGE_CACHE_SIZE). Any private data associated with the page
+ should be updated to reflect this truncation. If offset is 0 and
+ length is PAGE_CACHE_SIZE, then the private data should be released,
+ because the page must be able to be completely discarded. This may
+ be done by calling the ->releasepage function, but in this case the
+ release MUST succeed.
+
+ releasepage: releasepage is called on PagePrivate pages to indicate
+ that the page should be freed if possible. ->releasepage
+ should remove any private data from the page and clear the
+ PagePrivate flag. If releasepage() fails for some reason, it must
+ indicate failure with a 0 return value.
+ releasepage() is used in two distinct though related cases. The
+ first is when the VM finds a clean page with no active users and
+ wants to make it a free page. If ->releasepage succeeds, the
+ page will be removed from the address_space and become free.
+
+ The second case is when a request has been made to invalidate
+ some or all pages in an address_space. This can happen
+ through the fadvice(POSIX_FADV_DONTNEED) system call or by the
+ filesystem explicitly requesting it as nfs and 9fs do (when
+ they believe the cache may be out of date with storage) by
+ calling invalidate_inode_pages2().
+ If the filesystem makes such a call, and needs to be certain
+ that all pages are invalidated, then its releasepage will
+ need to ensure this. Possibly it can clear the PageUptodate
+ bit if it cannot free private data yet.
+
+ freepage: freepage is called once the page is no longer visible in
+ the page cache in order to allow the cleanup of any private
+ data. Since it may be called by the memory reclaimer, it
+ should not assume that the original address_space mapping still
+ exists, and it should not block.
+
+ direct_IO: called by the generic read/write routines to perform
+ direct_IO - that is IO requests which bypass the page cache
+ and transfer data directly between the storage and the
+ application's address space.
+
+ migrate_page: This is used to compact the physical memory usage.
+ If the VM wants to relocate a page (maybe off a memory card
+ that is signalling imminent failure) it will pass a new page
+ and an old page to this function. migrate_page should
+ transfer any private data across and update any references
+ that it has to the page.
+
+ launder_page: Called before freeing a page - it writes back the dirty page. To
+ prevent redirtying the page, it is kept locked during the whole
+ operation.
+
+ is_partially_uptodate: Called by the VM when reading a file through the
+ pagecache when the underlying blocksize != pagesize. If the required
+ block is up to date then the read can complete without needing the IO
+ to bring the whole page up to date.
+
+ is_dirty_writeback: Called by the VM when attempting to reclaim a page.
+ The VM uses dirty and writeback information to determine if it needs
+ to stall to allow flushers a chance to complete some IO. Ordinarily
+ it can use PageDirty and PageWriteback but some filesystems have
+ more complex state (unstable pages in NFS prevent reclaim) or
+ do not set those flags due to locking problems (jbd). This callback
+ allows a filesystem to indicate to the VM if a page should be
+ treated as dirty or writeback for the purposes of stalling.
+
+ error_remove_page: normally set to generic_error_remove_page if truncation
+ is ok for this address space. Used for memory failure handling.
+ Setting this implies you deal with pages going away under you,
+ unless you have them locked or reference counts increased.
+
+ swap_activate: Called when swapon is used on a file to allocate
+ space if necessary and pin the block lookup information in
+ memory. A return value of zero indicates success,
+ in which case this file can be used to back swapspace. The
+ swapspace operations will be proxied to this address space's
+ ->swap_{out,in} methods.
+
+ swap_deactivate: Called during swapoff on files where swap_activate
+ was successful.
+
+
+The File Object
+===============
+
+A file object represents a file opened by a process.
+
+
+struct file_operations
+----------------------
+
+This describes how the VFS can manipulate an open file. As of kernel
+3.12, the following members are defined:
+
+struct file_operations {
+ struct module *owner;
+ loff_t (*llseek) (struct file *, loff_t, int);
+ ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
+ ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
+ ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
+ ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
+ int (*iterate) (struct file *, struct dir_context *);
+ unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
+ long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
+ int (*mmap) (struct file *, struct vm_area_struct *);
+ int (*open) (struct inode *, struct file *);
+ int (*flush) (struct file *);
+ int (*release) (struct inode *, struct file *);
+ int (*fsync) (struct file *, loff_t, loff_t, int datasync);
+ int (*aio_fsync) (struct kiocb *, int datasync);
+ int (*fasync) (int, struct file *, int);
+ int (*lock) (struct file *, int, struct file_lock *);
+ ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
+ unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+ int (*check_flags)(int);
+ int (*flock) (struct file *, int, struct file_lock *);
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
+ ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
+ int (*setlease)(struct file *, long arg, struct file_lock **, void **);
+ long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
+ void (*show_fdinfo)(struct seq_file *m, struct file *f);
+};
+
+Again, all methods are called without any locks being held, unless
+otherwise noted.
+
+ llseek: called when the VFS needs to move the file position index
+
+ read: called by read(2) and related system calls
+
+ read_iter: possibly asynchronous read with iov_iter as destination
+
+ write: called by write(2) and related system calls
+
+ write_iter: possibly asynchronous write with iov_iter as source
+
+ iterate: called when the VFS needs to read the directory contents
+
+ poll: called by the VFS when a process wants to check if there is
+ activity on this file and (optionally) go to sleep until there
+ is activity. Called by the select(2) and poll(2) system calls
+
+ unlocked_ioctl: called by the ioctl(2) system call.
+
+ compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
+ are used on 64 bit kernels.
+
+ mmap: called by the mmap(2) system call
+
+ open: called by the VFS when an inode should be opened. When the VFS
+ opens a file, it creates a new "struct file". It then calls the
+ open method for the newly allocated file structure. You might
+ think that the open method really belongs in
+ "struct inode_operations", and you may be right. I think it's
+ done the way it is because it makes filesystems simpler to
+ implement. The open() method is a good place to initialize the
+ "private_data" member in the file structure if you want to point
+ to a device structure
+
+ flush: called by the close(2) system call to flush a file
+
+ release: called when the last reference to an open file is closed
+
+ fsync: called by the fsync(2) system call
+
+ fasync: called by the fcntl(2) system call when asynchronous
+ (non-blocking) mode is enabled for a file
+
+ lock: called by the fcntl(2) system call for F_GETLK, F_SETLK, and F_SETLKW
+ commands
+
+ get_unmapped_area: called by the mmap(2) system call
+
+ check_flags: called by the fcntl(2) system call for F_SETFL command
+
+ flock: called by the flock(2) system call
+
+ splice_write: called by the VFS to splice data from a pipe to a file. This
+ method is used by the splice(2) system call
+
+ splice_read: called by the VFS to splice data from file to a pipe. This
+ method is used by the splice(2) system call
+
+ setlease: called by the VFS to set or release a file lock lease. setlease
+ implementations should call generic_setlease to record or remove
+ the lease in the inode after setting it.
+
+ fallocate: called by the VFS to preallocate blocks or punch a hole.
+
+Note that the file operations are implemented by the specific
+filesystem in which the inode resides. When opening a device node
+(character or block special) most filesystems will call special
+support routines in the VFS which will locate the required device
+driver information. These support routines replace the filesystem file
+operations with those for the device driver, and then proceed to call
+the new open() method for the file. This is how opening a device file
+in the filesystem eventually ends up calling the device driver open()
+method.
+
+
+Directory Entry Cache (dcache)
+==============================
+
+
+struct dentry_operations
+------------------------
+
+This describes how a filesystem can overload the standard dentry
+operations. Dentries and the dcache are the domain of the VFS and the
+individual filesystem implementations. Device drivers have no business
+here. These methods may be set to NULL, as they are either optional or
+the VFS uses a default. As of kernel 2.6.22, the following members are
+defined:
+
+struct dentry_operations {
+ int (*d_revalidate)(struct dentry *, unsigned int);
+ int (*d_weak_revalidate)(struct dentry *, unsigned int);
+ int (*d_hash)(const struct dentry *, struct qstr *);
+ int (*d_compare)(const struct dentry *, const struct dentry *,
+ unsigned int, const char *, const struct qstr *);
+ int (*d_delete)(const struct dentry *);
+ void (*d_release)(struct dentry *);
+ void (*d_iput)(struct dentry *, struct inode *);
+ char *(*d_dname)(struct dentry *, char *, int);
+ struct vfsmount *(*d_automount)(struct path *);
+ int (*d_manage)(struct dentry *, bool);
+};
+
+ d_revalidate: called when the VFS needs to revalidate a dentry. This
+ is called whenever a name look-up finds a dentry in the
+ dcache. Most local filesystems leave this as NULL, because all their
+ dentries in the dcache are valid. Network filesystems are different
+ since things can change on the server without the client necessarily
+ being aware of it.
+
+ This function should return a positive value if the dentry is still
+ valid, and zero or a negative error code if it isn't.
+
+ d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU).
+ If in rcu-walk mode, the filesystem must revalidate the dentry without
+ blocking or storing to the dentry, d_parent and d_inode should not be
+ used without care (because they can change and, in d_inode case, even
+ become NULL under us).
+
+ If a situation is encountered that rcu-walk cannot handle, return
+ -ECHILD and it will be called again in ref-walk mode.
+
+ d_weak_revalidate: called when the VFS needs to revalidate a "jumped" dentry.
+ This is called when a path-walk ends at dentry that was not acquired by
+ doing a lookup in the parent directory. This includes "/", "." and "..",
+ as well as procfs-style symlinks and mountpoint traversal.
+
+ In this case, we are less concerned with whether the dentry is still
+ fully correct, but rather that the inode is still valid. As with
+ d_revalidate, most local filesystems will set this to NULL since their
+ dcache entries are always valid.
+
+ This function has the same return code semantics as d_revalidate.
+
+ d_weak_revalidate is only called after leaving rcu-walk mode.
+
+ d_hash: called when the VFS adds a dentry to the hash table. The first
+ dentry passed to d_hash is the parent directory that the name is
+ to be hashed into.
+
+ Same locking and synchronisation rules as d_compare regarding
+ what is safe to dereference etc.
+
+ d_compare: called to compare a dentry name with a given name. The first
+ dentry is the parent of the dentry to be compared, the second is
+ the child dentry. len and name string are properties of the dentry
+ to be compared. qstr is the name to compare it with.
+
+ Must be constant and idempotent, and should not take locks if
+ possible, and should not or store into the dentry.
+ Should not dereference pointers outside the dentry without
+ lots of care (eg. d_parent, d_inode, d_name should not be used).
+
+ However, our vfsmount is pinned, and RCU held, so the dentries and
+ inodes won't disappear, neither will our sb or filesystem module.
+ ->d_sb may be used.
+
+ It is a tricky calling convention because it needs to be called under
+ "rcu-walk", ie. without any locks or references on things.
+
+ d_delete: called when the last reference to a dentry is dropped and the
+ dcache is deciding whether or not to cache it. Return 1 to delete
+ immediately, or 0 to cache the dentry. Default is NULL which means to
+ always cache a reachable dentry. d_delete must be constant and
+ idempotent.
+
+ d_release: called when a dentry is really deallocated
+
+ d_iput: called when a dentry loses its inode (just prior to its
+ being deallocated). The default when this is NULL is that the
+ VFS calls iput(). If you define this method, you must call
+ iput() yourself
+
+ d_dname: called when the pathname of a dentry should be generated.
+ Useful for some pseudo filesystems (sockfs, pipefs, ...) to delay
+ pathname generation. (Instead of doing it when dentry is created,
+ it's done only when the path is needed.). Real filesystems probably
+ dont want to use it, because their dentries are present in global
+ dcache hash, so their hash should be an invariant. As no lock is
+ held, d_dname() should not try to modify the dentry itself, unless
+ appropriate SMP safety is used. CAUTION : d_path() logic is quite
+ tricky. The correct way to return for example "Hello" is to put it
+ at the end of the buffer, and returns a pointer to the first char.
+ dynamic_dname() helper function is provided to take care of this.
+
+ d_automount: called when an automount dentry is to be traversed (optional).
+ This should create a new VFS mount record and return the record to the
+ caller. The caller is supplied with a path parameter giving the
+ automount directory to describe the automount target and the parent
+ VFS mount record to provide inheritable mount parameters. NULL should
+ be returned if someone else managed to make the automount first. If
+ the vfsmount creation failed, then an error code should be returned.
+ If -EISDIR is returned, then the directory will be treated as an
+ ordinary directory and returned to pathwalk to continue walking.
+
+ If a vfsmount is returned, the caller will attempt to mount it on the
+ mountpoint and will remove the vfsmount from its expiration list in
+ the case of failure. The vfsmount should be returned with 2 refs on
+ it to prevent automatic expiration - the caller will clean up the
+ additional ref.
+
+ This function is only used if DCACHE_NEED_AUTOMOUNT is set on the
+ dentry. This is set by __d_instantiate() if S_AUTOMOUNT is set on the
+ inode being added.
+
+ d_manage: called to allow the filesystem to manage the transition from a
+ dentry (optional). This allows autofs, for example, to hold up clients
+ waiting to explore behind a 'mountpoint' whilst letting the daemon go
+ past and construct the subtree there. 0 should be returned to let the
+ calling process continue. -EISDIR can be returned to tell pathwalk to
+ use this directory as an ordinary directory and to ignore anything
+ mounted on it and not to check the automount flag. Any other error
+ code will abort pathwalk completely.
+
+ If the 'rcu_walk' parameter is true, then the caller is doing a
+ pathwalk in RCU-walk mode. Sleeping is not permitted in this mode,
+ and the caller can be asked to leave it and call again by returning
+ -ECHILD. -EISDIR may also be returned to tell pathwalk to
+ ignore d_automount or any mounts.
+
+ This function is only used if DCACHE_MANAGE_TRANSIT is set on the
+ dentry being transited from.
+
+Example :
+
+static char *pipefs_dname(struct dentry *dent, char *buffer, int buflen)
+{
+ return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
+ dentry->d_inode->i_ino);
+}
+
+Each dentry has a pointer to its parent dentry, as well as a hash list
+of child dentries. Child dentries are basically like files in a
+directory.
+
+
+Directory Entry Cache API
+--------------------------
+
+There are a number of functions defined which permit a filesystem to
+manipulate dentries:
+
+ dget: open a new handle for an existing dentry (this just increments
+ the usage count)
+
+ dput: close a handle for a dentry (decrements the usage count). If
+ the usage count drops to 0, and the dentry is still in its
+ parent's hash, the "d_delete" method is called to check whether
+ it should be cached. If it should not be cached, or if the dentry
+ is not hashed, it is deleted. Otherwise cached dentries are put
+ into an LRU list to be reclaimed on memory shortage.
+
+ d_drop: this unhashes a dentry from its parents hash list. A
+ subsequent call to dput() will deallocate the dentry if its
+ usage count drops to 0
+
+ d_delete: delete a dentry. If there are no other open references to
+ the dentry then the dentry is turned into a negative dentry
+ (the d_iput() method is called). If there are other
+ references, then d_drop() is called instead
+
+ d_add: add a dentry to its parents hash list and then calls
+ d_instantiate()
+
+ d_instantiate: add a dentry to the alias hash list for the inode and
+ updates the "d_inode" member. The "i_count" member in the
+ inode structure should be set/incremented. If the inode
+ pointer is NULL, the dentry is called a "negative
+ dentry". This function is commonly called when an inode is
+ created for an existing negative dentry
+
+ d_lookup: look up a dentry given its parent and path name component
+ It looks up the child of that given name from the dcache
+ hash table. If it is found, the reference count is incremented
+ and the dentry is returned. The caller must use dput()
+ to free the dentry when it finishes using it.
+
+Mount Options
+=============
+
+Parsing options
+---------------
+
+On mount and remount the filesystem is passed a string containing a
+comma separated list of mount options. The options can have either of
+these forms:
+
+ option
+ option=value
+
+The <linux/parser.h> header defines an API that helps parse these
+options. There are plenty of examples on how to use it in existing
+filesystems.
+
+Showing options
+---------------
+
+If a filesystem accepts mount options, it must define show_options()
+to show all the currently active options. The rules are:
+
+ - options MUST be shown which are not default or their values differ
+ from the default
+
+ - options MAY be shown which are enabled by default or have their
+ default value
+
+Options used only internally between a mount helper and the kernel
+(such as file descriptors), or which only have an effect during the
+mounting (such as ones controlling the creation of a journal) are exempt
+from the above rules.
+
+The underlying reason for the above rules is to make sure, that a
+mount can be accurately replicated (e.g. umounting and mounting again)
+based on the information found in /proc/mounts.
+
+A simple method of saving options at mount/remount time and showing
+them is provided with the save_mount_options() and
+generic_show_options() helper functions. Please note, that using
+these may have drawbacks. For more info see header comments for these
+functions in fs/namespace.c.
+
+Resources
+=========
+
+(Note some of these resources are not up-to-date with the latest kernel
+ version.)
+
+Creating Linux virtual filesystems. 2002
+ <http://lwn.net/Articles/13325/>
+
+The Linux Virtual File-system Layer by Neil Brown. 1999
+ <http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/vfs.html>
+
+A tour of the Linux VFS by Michael K. Johnson. 1996
+ <http://www.tldp.org/LDP/khg/HyperNews/get/fs/vfstour.html>
+
+A small trail through the Linux kernel by Andries Brouwer. 2001
+ <http://www.win.tue.nl/~aeb/linux/vfs/trail.html>
diff --git a/Documentation/filesystems/xfs-delayed-logging-design.txt b/Documentation/filesystems/xfs-delayed-logging-design.txt
new file mode 100644
index 000000000..2ce36439c
--- /dev/null
+++ b/Documentation/filesystems/xfs-delayed-logging-design.txt
@@ -0,0 +1,793 @@
+XFS Delayed Logging Design
+--------------------------
+
+Introduction to Re-logging in XFS
+---------------------------------
+
+XFS logging is a combination of logical and physical logging. Some objects,
+such as inodes and dquots, are logged in logical format where the details
+logged are made up of the changes to in-core structures rather than on-disk
+structures. Other objects - typically buffers - have their physical changes
+logged. The reason for these differences is to reduce the amount of log space
+required for objects that are frequently logged. Some parts of inodes are more
+frequently logged than others, and inodes are typically more frequently logged
+than any other object (except maybe the superblock buffer) so keeping the
+amount of metadata logged low is of prime importance.
+
+The reason that this is such a concern is that XFS allows multiple separate
+modifications to a single object to be carried in the log at any given time.
+This allows the log to avoid needing to flush each change to disk before
+recording a new change to the object. XFS does this via a method called
+"re-logging". Conceptually, this is quite simple - all it requires is that any
+new change to the object is recorded with a *new copy* of all the existing
+changes in the new transaction that is written to the log.
+
+That is, if we have a sequence of changes A through to F, and the object was
+written to disk after change D, we would see in the log the following series
+of transactions, their contents and the log sequence number (LSN) of the
+transaction:
+
+ Transaction Contents LSN
+ A A X
+ B A+B X+n
+ C A+B+C X+n+m
+ D A+B+C+D X+n+m+o
+ <object written to disk>
+ E E Y (> X+n+m+o)
+ F E+F Yٍ+p
+
+In other words, each time an object is relogged, the new transaction contains
+the aggregation of all the previous changes currently held only in the log.
+
+This relogging technique also allows objects to be moved forward in the log so
+that an object being relogged does not prevent the tail of the log from ever
+moving forward. This can be seen in the table above by the changing
+(increasing) LSN of each subsequent transaction - the LSN is effectively a
+direct encoding of the location in the log of the transaction.
+
+This relogging is also used to implement long-running, multiple-commit
+transactions. These transaction are known as rolling transactions, and require
+a special log reservation known as a permanent transaction reservation. A
+typical example of a rolling transaction is the removal of extents from an
+inode which can only be done at a rate of two extents per transaction because
+of reservation size limitations. Hence a rolling extent removal transaction
+keeps relogging the inode and btree buffers as they get modified in each
+removal operation. This keeps them moving forward in the log as the operation
+progresses, ensuring that current operation never gets blocked by itself if the
+log wraps around.
+
+Hence it can be seen that the relogging operation is fundamental to the correct
+working of the XFS journalling subsystem. From the above description, most
+people should be able to see why the XFS metadata operations writes so much to
+the log - repeated operations to the same objects write the same changes to
+the log over and over again. Worse is the fact that objects tend to get
+dirtier as they get relogged, so each subsequent transaction is writing more
+metadata into the log.
+
+Another feature of the XFS transaction subsystem is that most transactions are
+asynchronous. That is, they don't commit to disk until either a log buffer is
+filled (a log buffer can hold multiple transactions) or a synchronous operation
+forces the log buffers holding the transactions to disk. This means that XFS is
+doing aggregation of transactions in memory - batching them, if you like - to
+minimise the impact of the log IO on transaction throughput.
+
+The limitation on asynchronous transaction throughput is the number and size of
+log buffers made available by the log manager. By default there are 8 log
+buffers available and the size of each is 32kB - the size can be increased up
+to 256kB by use of a mount option.
+
+Effectively, this gives us the maximum bound of outstanding metadata changes
+that can be made to the filesystem at any point in time - if all the log
+buffers are full and under IO, then no more transactions can be committed until
+the current batch completes. It is now common for a single current CPU core to
+be to able to issue enough transactions to keep the log buffers full and under
+IO permanently. Hence the XFS journalling subsystem can be considered to be IO
+bound.
+
+Delayed Logging: Concepts
+-------------------------
+
+The key thing to note about the asynchronous logging combined with the
+relogging technique XFS uses is that we can be relogging changed objects
+multiple times before they are committed to disk in the log buffers. If we
+return to the previous relogging example, it is entirely possible that
+transactions A through D are committed to disk in the same log buffer.
+
+That is, a single log buffer may contain multiple copies of the same object,
+but only one of those copies needs to be there - the last one "D", as it
+contains all the changes from the previous changes. In other words, we have one
+necessary copy in the log buffer, and three stale copies that are simply
+wasting space. When we are doing repeated operations on the same set of
+objects, these "stale objects" can be over 90% of the space used in the log
+buffers. It is clear that reducing the number of stale objects written to the
+log would greatly reduce the amount of metadata we write to the log, and this
+is the fundamental goal of delayed logging.
+
+From a conceptual point of view, XFS is already doing relogging in memory (where
+memory == log buffer), only it is doing it extremely inefficiently. It is using
+logical to physical formatting to do the relogging because there is no
+infrastructure to keep track of logical changes in memory prior to physically
+formatting the changes in a transaction to the log buffer. Hence we cannot avoid
+accumulating stale objects in the log buffers.
+
+Delayed logging is the name we've given to keeping and tracking transactional
+changes to objects in memory outside the log buffer infrastructure. Because of
+the relogging concept fundamental to the XFS journalling subsystem, this is
+actually relatively easy to do - all the changes to logged items are already
+tracked in the current infrastructure. The big problem is how to accumulate
+them and get them to the log in a consistent, recoverable manner.
+Describing the problems and how they have been solved is the focus of this
+document.
+
+One of the key changes that delayed logging makes to the operation of the
+journalling subsystem is that it disassociates the amount of outstanding
+metadata changes from the size and number of log buffers available. In other
+words, instead of there only being a maximum of 2MB of transaction changes not
+written to the log at any point in time, there may be a much greater amount
+being accumulated in memory. Hence the potential for loss of metadata on a
+crash is much greater than for the existing logging mechanism.
+
+It should be noted that this does not change the guarantee that log recovery
+will result in a consistent filesystem. What it does mean is that as far as the
+recovered filesystem is concerned, there may be many thousands of transactions
+that simply did not occur as a result of the crash. This makes it even more
+important that applications that care about their data use fsync() where they
+need to ensure application level data integrity is maintained.
+
+It should be noted that delayed logging is not an innovative new concept that
+warrants rigorous proofs to determine whether it is correct or not. The method
+of accumulating changes in memory for some period before writing them to the
+log is used effectively in many filesystems including ext3 and ext4. Hence
+no time is spent in this document trying to convince the reader that the
+concept is sound. Instead it is simply considered a "solved problem" and as
+such implementing it in XFS is purely an exercise in software engineering.
+
+The fundamental requirements for delayed logging in XFS are simple:
+
+ 1. Reduce the amount of metadata written to the log by at least
+ an order of magnitude.
+ 2. Supply sufficient statistics to validate Requirement #1.
+ 3. Supply sufficient new tracing infrastructure to be able to debug
+ problems with the new code.
+ 4. No on-disk format change (metadata or log format).
+ 5. Enable and disable with a mount option.
+ 6. No performance regressions for synchronous transaction workloads.
+
+Delayed Logging: Design
+-----------------------
+
+Storing Changes
+
+The problem with accumulating changes at a logical level (i.e. just using the
+existing log item dirty region tracking) is that when it comes to writing the
+changes to the log buffers, we need to ensure that the object we are formatting
+is not changing while we do this. This requires locking the object to prevent
+concurrent modification. Hence flushing the logical changes to the log would
+require us to lock every object, format them, and then unlock them again.
+
+This introduces lots of scope for deadlocks with transactions that are already
+running. For example, a transaction has object A locked and modified, but needs
+the delayed logging tracking lock to commit the transaction. However, the
+flushing thread has the delayed logging tracking lock already held, and is
+trying to get the lock on object A to flush it to the log buffer. This appears
+to be an unsolvable deadlock condition, and it was solving this problem that
+was the barrier to implementing delayed logging for so long.
+
+The solution is relatively simple - it just took a long time to recognise it.
+Put simply, the current logging code formats the changes to each item into an
+vector array that points to the changed regions in the item. The log write code
+simply copies the memory these vectors point to into the log buffer during
+transaction commit while the item is locked in the transaction. Instead of
+using the log buffer as the destination of the formatting code, we can use an
+allocated memory buffer big enough to fit the formatted vector.
+
+If we then copy the vector into the memory buffer and rewrite the vector to
+point to the memory buffer rather than the object itself, we now have a copy of
+the changes in a format that is compatible with the log buffer writing code.
+that does not require us to lock the item to access. This formatting and
+rewriting can all be done while the object is locked during transaction commit,
+resulting in a vector that is transactionally consistent and can be accessed
+without needing to lock the owning item.
+
+Hence we avoid the need to lock items when we need to flush outstanding
+asynchronous transactions to the log. The differences between the existing
+formatting method and the delayed logging formatting can be seen in the
+diagram below.
+
+Current format log vector:
+
+Object +---------------------------------------------+
+Vector 1 +----+
+Vector 2 +----+
+Vector 3 +----------+
+
+After formatting:
+
+Log Buffer +-V1-+-V2-+----V3----+
+
+Delayed logging vector:
+
+Object +---------------------------------------------+
+Vector 1 +----+
+Vector 2 +----+
+Vector 3 +----------+
+
+After formatting:
+
+Memory Buffer +-V1-+-V2-+----V3----+
+Vector 1 +----+
+Vector 2 +----+
+Vector 3 +----------+
+
+The memory buffer and associated vector need to be passed as a single object,
+but still need to be associated with the parent object so if the object is
+relogged we can replace the current memory buffer with a new memory buffer that
+contains the latest changes.
+
+The reason for keeping the vector around after we've formatted the memory
+buffer is to support splitting vectors across log buffer boundaries correctly.
+If we don't keep the vector around, we do not know where the region boundaries
+are in the item, so we'd need a new encapsulation method for regions in the log
+buffer writing (i.e. double encapsulation). This would be an on-disk format
+change and as such is not desirable. It also means we'd have to write the log
+region headers in the formatting stage, which is problematic as there is per
+region state that needs to be placed into the headers during the log write.
+
+Hence we need to keep the vector, but by attaching the memory buffer to it and
+rewriting the vector addresses to point at the memory buffer we end up with a
+self-describing object that can be passed to the log buffer write code to be
+handled in exactly the same manner as the existing log vectors are handled.
+Hence we avoid needing a new on-disk format to handle items that have been
+relogged in memory.
+
+
+Tracking Changes
+
+Now that we can record transactional changes in memory in a form that allows
+them to be used without limitations, we need to be able to track and accumulate
+them so that they can be written to the log at some later point in time. The
+log item is the natural place to store this vector and buffer, and also makes sense
+to be the object that is used to track committed objects as it will always
+exist once the object has been included in a transaction.
+
+The log item is already used to track the log items that have been written to
+the log but not yet written to disk. Such log items are considered "active"
+and as such are stored in the Active Item List (AIL) which is a LSN-ordered
+double linked list. Items are inserted into this list during log buffer IO
+completion, after which they are unpinned and can be written to disk. An object
+that is in the AIL can be relogged, which causes the object to be pinned again
+and then moved forward in the AIL when the log buffer IO completes for that
+transaction.
+
+Essentially, this shows that an item that is in the AIL can still be modified
+and relogged, so any tracking must be separate to the AIL infrastructure. As
+such, we cannot reuse the AIL list pointers for tracking committed items, nor
+can we store state in any field that is protected by the AIL lock. Hence the
+committed item tracking needs it's own locks, lists and state fields in the log
+item.
+
+Similar to the AIL, tracking of committed items is done through a new list
+called the Committed Item List (CIL). The list tracks log items that have been
+committed and have formatted memory buffers attached to them. It tracks objects
+in transaction commit order, so when an object is relogged it is removed from
+it's place in the list and re-inserted at the tail. This is entirely arbitrary
+and done to make it easy for debugging - the last items in the list are the
+ones that are most recently modified. Ordering of the CIL is not necessary for
+transactional integrity (as discussed in the next section) so the ordering is
+done for convenience/sanity of the developers.
+
+
+Delayed Logging: Checkpoints
+
+When we have a log synchronisation event, commonly known as a "log force",
+all the items in the CIL must be written into the log via the log buffers.
+We need to write these items in the order that they exist in the CIL, and they
+need to be written as an atomic transaction. The need for all the objects to be
+written as an atomic transaction comes from the requirements of relogging and
+log replay - all the changes in all the objects in a given transaction must
+either be completely replayed during log recovery, or not replayed at all. If
+a transaction is not replayed because it is not complete in the log, then
+no later transactions should be replayed, either.
+
+To fulfill this requirement, we need to write the entire CIL in a single log
+transaction. Fortunately, the XFS log code has no fixed limit on the size of a
+transaction, nor does the log replay code. The only fundamental limit is that
+the transaction cannot be larger than just under half the size of the log. The
+reason for this limit is that to find the head and tail of the log, there must
+be at least one complete transaction in the log at any given time. If a
+transaction is larger than half the log, then there is the possibility that a
+crash during the write of a such a transaction could partially overwrite the
+only complete previous transaction in the log. This will result in a recovery
+failure and an inconsistent filesystem and hence we must enforce the maximum
+size of a checkpoint to be slightly less than a half the log.
+
+Apart from this size requirement, a checkpoint transaction looks no different
+to any other transaction - it contains a transaction header, a series of
+formatted log items and a commit record at the tail. From a recovery
+perspective, the checkpoint transaction is also no different - just a lot
+bigger with a lot more items in it. The worst case effect of this is that we
+might need to tune the recovery transaction object hash size.
+
+Because the checkpoint is just another transaction and all the changes to log
+items are stored as log vectors, we can use the existing log buffer writing
+code to write the changes into the log. To do this efficiently, we need to
+minimise the time we hold the CIL locked while writing the checkpoint
+transaction. The current log write code enables us to do this easily with the
+way it separates the writing of the transaction contents (the log vectors) from
+the transaction commit record, but tracking this requires us to have a
+per-checkpoint context that travels through the log write process through to
+checkpoint completion.
+
+Hence a checkpoint has a context that tracks the state of the current
+checkpoint from initiation to checkpoint completion. A new context is initiated
+at the same time a checkpoint transaction is started. That is, when we remove
+all the current items from the CIL during a checkpoint operation, we move all
+those changes into the current checkpoint context. We then initialise a new
+context and attach that to the CIL for aggregation of new transactions.
+
+This allows us to unlock the CIL immediately after transfer of all the
+committed items and effectively allow new transactions to be issued while we
+are formatting the checkpoint into the log. It also allows concurrent
+checkpoints to be written into the log buffers in the case of log force heavy
+workloads, just like the existing transaction commit code does. This, however,
+requires that we strictly order the commit records in the log so that
+checkpoint sequence order is maintained during log replay.
+
+To ensure that we can be writing an item into a checkpoint transaction at
+the same time another transaction modifies the item and inserts the log item
+into the new CIL, then checkpoint transaction commit code cannot use log items
+to store the list of log vectors that need to be written into the transaction.
+Hence log vectors need to be able to be chained together to allow them to be
+detached from the log items. That is, when the CIL is flushed the memory
+buffer and log vector attached to each log item needs to be attached to the
+checkpoint context so that the log item can be released. In diagrammatic form,
+the CIL would look like this before the flush:
+
+ CIL Head
+ |
+ V
+ Log Item <-> log vector 1 -> memory buffer
+ | -> vector array
+ V
+ Log Item <-> log vector 2 -> memory buffer
+ | -> vector array
+ V
+ ......
+ |
+ V
+ Log Item <-> log vector N-1 -> memory buffer
+ | -> vector array
+ V
+ Log Item <-> log vector N -> memory buffer
+ -> vector array
+
+And after the flush the CIL head is empty, and the checkpoint context log
+vector list would look like:
+
+ Checkpoint Context
+ |
+ V
+ log vector 1 -> memory buffer
+ | -> vector array
+ | -> Log Item
+ V
+ log vector 2 -> memory buffer
+ | -> vector array
+ | -> Log Item
+ V
+ ......
+ |
+ V
+ log vector N-1 -> memory buffer
+ | -> vector array
+ | -> Log Item
+ V
+ log vector N -> memory buffer
+ -> vector array
+ -> Log Item
+
+Once this transfer is done, the CIL can be unlocked and new transactions can
+start, while the checkpoint flush code works over the log vector chain to
+commit the checkpoint.
+
+Once the checkpoint is written into the log buffers, the checkpoint context is
+attached to the log buffer that the commit record was written to along with a
+completion callback. Log IO completion will call that callback, which can then
+run transaction committed processing for the log items (i.e. insert into AIL
+and unpin) in the log vector chain and then free the log vector chain and
+checkpoint context.
+
+Discussion Point: I am uncertain as to whether the log item is the most
+efficient way to track vectors, even though it seems like the natural way to do
+it. The fact that we walk the log items (in the CIL) just to chain the log
+vectors and break the link between the log item and the log vector means that
+we take a cache line hit for the log item list modification, then another for
+the log vector chaining. If we track by the log vectors, then we only need to
+break the link between the log item and the log vector, which means we should
+dirty only the log item cachelines. Normally I wouldn't be concerned about one
+vs two dirty cachelines except for the fact I've seen upwards of 80,000 log
+vectors in one checkpoint transaction. I'd guess this is a "measure and
+compare" situation that can be done after a working and reviewed implementation
+is in the dev tree....
+
+Delayed Logging: Checkpoint Sequencing
+
+One of the key aspects of the XFS transaction subsystem is that it tags
+committed transactions with the log sequence number of the transaction commit.
+This allows transactions to be issued asynchronously even though there may be
+future operations that cannot be completed until that transaction is fully
+committed to the log. In the rare case that a dependent operation occurs (e.g.
+re-using a freed metadata extent for a data extent), a special, optimised log
+force can be issued to force the dependent transaction to disk immediately.
+
+To do this, transactions need to record the LSN of the commit record of the
+transaction. This LSN comes directly from the log buffer the transaction is
+written into. While this works just fine for the existing transaction
+mechanism, it does not work for delayed logging because transactions are not
+written directly into the log buffers. Hence some other method of sequencing
+transactions is required.
+
+As discussed in the checkpoint section, delayed logging uses per-checkpoint
+contexts, and as such it is simple to assign a sequence number to each
+checkpoint. Because the switching of checkpoint contexts must be done
+atomically, it is simple to ensure that each new context has a monotonically
+increasing sequence number assigned to it without the need for an external
+atomic counter - we can just take the current context sequence number and add
+one to it for the new context.
+
+Then, instead of assigning a log buffer LSN to the transaction commit LSN
+during the commit, we can assign the current checkpoint sequence. This allows
+operations that track transactions that have not yet completed know what
+checkpoint sequence needs to be committed before they can continue. As a
+result, the code that forces the log to a specific LSN now needs to ensure that
+the log forces to a specific checkpoint.
+
+To ensure that we can do this, we need to track all the checkpoint contexts
+that are currently committing to the log. When we flush a checkpoint, the
+context gets added to a "committing" list which can be searched. When a
+checkpoint commit completes, it is removed from the committing list. Because
+the checkpoint context records the LSN of the commit record for the checkpoint,
+we can also wait on the log buffer that contains the commit record, thereby
+using the existing log force mechanisms to execute synchronous forces.
+
+It should be noted that the synchronous forces may need to be extended with
+mitigation algorithms similar to the current log buffer code to allow
+aggregation of multiple synchronous transactions if there are already
+synchronous transactions being flushed. Investigation of the performance of the
+current design is needed before making any decisions here.
+
+The main concern with log forces is to ensure that all the previous checkpoints
+are also committed to disk before the one we need to wait for. Therefore we
+need to check that all the prior contexts in the committing list are also
+complete before waiting on the one we need to complete. We do this
+synchronisation in the log force code so that we don't need to wait anywhere
+else for such serialisation - it only matters when we do a log force.
+
+The only remaining complexity is that a log force now also has to handle the
+case where the forcing sequence number is the same as the current context. That
+is, we need to flush the CIL and potentially wait for it to complete. This is a
+simple addition to the existing log forcing code to check the sequence numbers
+and push if required. Indeed, placing the current sequence checkpoint flush in
+the log force code enables the current mechanism for issuing synchronous
+transactions to remain untouched (i.e. commit an asynchronous transaction, then
+force the log at the LSN of that transaction) and so the higher level code
+behaves the same regardless of whether delayed logging is being used or not.
+
+Delayed Logging: Checkpoint Log Space Accounting
+
+The big issue for a checkpoint transaction is the log space reservation for the
+transaction. We don't know how big a checkpoint transaction is going to be
+ahead of time, nor how many log buffers it will take to write out, nor the
+number of split log vector regions are going to be used. We can track the
+amount of log space required as we add items to the commit item list, but we
+still need to reserve the space in the log for the checkpoint.
+
+A typical transaction reserves enough space in the log for the worst case space
+usage of the transaction. The reservation accounts for log record headers,
+transaction and region headers, headers for split regions, buffer tail padding,
+etc. as well as the actual space for all the changed metadata in the
+transaction. While some of this is fixed overhead, much of it is dependent on
+the size of the transaction and the number of regions being logged (the number
+of log vectors in the transaction).
+
+An example of the differences would be logging directory changes versus logging
+inode changes. If you modify lots of inode cores (e.g. chmod -R g+w *), then
+there are lots of transactions that only contain an inode core and an inode log
+format structure. That is, two vectors totaling roughly 150 bytes. If we modify
+10,000 inodes, we have about 1.5MB of metadata to write in 20,000 vectors. Each
+vector is 12 bytes, so the total to be logged is approximately 1.75MB. In
+comparison, if we are logging full directory buffers, they are typically 4KB
+each, so we in 1.5MB of directory buffers we'd have roughly 400 buffers and a
+buffer format structure for each buffer - roughly 800 vectors or 1.51MB total
+space. From this, it should be obvious that a static log space reservation is
+not particularly flexible and is difficult to select the "optimal value" for
+all workloads.
+
+Further, if we are going to use a static reservation, which bit of the entire
+reservation does it cover? We account for space used by the transaction
+reservation by tracking the space currently used by the object in the CIL and
+then calculating the increase or decrease in space used as the object is
+relogged. This allows for a checkpoint reservation to only have to account for
+log buffer metadata used such as log header records.
+
+However, even using a static reservation for just the log metadata is
+problematic. Typically log record headers use at least 16KB of log space per
+1MB of log space consumed (512 bytes per 32k) and the reservation needs to be
+large enough to handle arbitrary sized checkpoint transactions. This
+reservation needs to be made before the checkpoint is started, and we need to
+be able to reserve the space without sleeping. For a 8MB checkpoint, we need a
+reservation of around 150KB, which is a non-trivial amount of space.
+
+A static reservation needs to manipulate the log grant counters - we can take a
+permanent reservation on the space, but we still need to make sure we refresh
+the write reservation (the actual space available to the transaction) after
+every checkpoint transaction completion. Unfortunately, if this space is not
+available when required, then the regrant code will sleep waiting for it.
+
+The problem with this is that it can lead to deadlocks as we may need to commit
+checkpoints to be able to free up log space (refer back to the description of
+rolling transactions for an example of this). Hence we *must* always have
+space available in the log if we are to use static reservations, and that is
+very difficult and complex to arrange. It is possible to do, but there is a
+simpler way.
+
+The simpler way of doing this is tracking the entire log space used by the
+items in the CIL and using this to dynamically calculate the amount of log
+space required by the log metadata. If this log metadata space changes as a
+result of a transaction commit inserting a new memory buffer into the CIL, then
+the difference in space required is removed from the transaction that causes
+the change. Transactions at this level will *always* have enough space
+available in their reservation for this as they have already reserved the
+maximal amount of log metadata space they require, and such a delta reservation
+will always be less than or equal to the maximal amount in the reservation.
+
+Hence we can grow the checkpoint transaction reservation dynamically as items
+are added to the CIL and avoid the need for reserving and regranting log space
+up front. This avoids deadlocks and removes a blocking point from the
+checkpoint flush code.
+
+As mentioned early, transactions can't grow to more than half the size of the
+log. Hence as part of the reservation growing, we need to also check the size
+of the reservation against the maximum allowed transaction size. If we reach
+the maximum threshold, we need to push the CIL to the log. This is effectively
+a "background flush" and is done on demand. This is identical to
+a CIL push triggered by a log force, only that there is no waiting for the
+checkpoint commit to complete. This background push is checked and executed by
+transaction commit code.
+
+If the transaction subsystem goes idle while we still have items in the CIL,
+they will be flushed by the periodic log force issued by the xfssyncd. This log
+force will push the CIL to disk, and if the transaction subsystem stays idle,
+allow the idle log to be covered (effectively marked clean) in exactly the same
+manner that is done for the existing logging method. A discussion point is
+whether this log force needs to be done more frequently than the current rate
+which is once every 30s.
+
+
+Delayed Logging: Log Item Pinning
+
+Currently log items are pinned during transaction commit while the items are
+still locked. This happens just after the items are formatted, though it could
+be done any time before the items are unlocked. The result of this mechanism is
+that items get pinned once for every transaction that is committed to the log
+buffers. Hence items that are relogged in the log buffers will have a pin count
+for every outstanding transaction they were dirtied in. When each of these
+transactions is completed, they will unpin the item once. As a result, the item
+only becomes unpinned when all the transactions complete and there are no
+pending transactions. Thus the pinning and unpinning of a log item is symmetric
+as there is a 1:1 relationship with transaction commit and log item completion.
+
+For delayed logging, however, we have an asymmetric transaction commit to
+completion relationship. Every time an object is relogged in the CIL it goes
+through the commit process without a corresponding completion being registered.
+That is, we now have a many-to-one relationship between transaction commit and
+log item completion. The result of this is that pinning and unpinning of the
+log items becomes unbalanced if we retain the "pin on transaction commit, unpin
+on transaction completion" model.
+
+To keep pin/unpin symmetry, the algorithm needs to change to a "pin on
+insertion into the CIL, unpin on checkpoint completion". In other words, the
+pinning and unpinning becomes symmetric around a checkpoint context. We have to
+pin the object the first time it is inserted into the CIL - if it is already in
+the CIL during a transaction commit, then we do not pin it again. Because there
+can be multiple outstanding checkpoint contexts, we can still see elevated pin
+counts, but as each checkpoint completes the pin count will retain the correct
+value according to it's context.
+
+Just to make matters more slightly more complex, this checkpoint level context
+for the pin count means that the pinning of an item must take place under the
+CIL commit/flush lock. If we pin the object outside this lock, we cannot
+guarantee which context the pin count is associated with. This is because of
+the fact pinning the item is dependent on whether the item is present in the
+current CIL or not. If we don't pin the CIL first before we check and pin the
+object, we have a race with CIL being flushed between the check and the pin
+(or not pinning, as the case may be). Hence we must hold the CIL flush/commit
+lock to guarantee that we pin the items correctly.
+
+Delayed Logging: Concurrent Scalability
+
+A fundamental requirement for the CIL is that accesses through transaction
+commits must scale to many concurrent commits. The current transaction commit
+code does not break down even when there are transactions coming from 2048
+processors at once. The current transaction code does not go any faster than if
+there was only one CPU using it, but it does not slow down either.
+
+As a result, the delayed logging transaction commit code needs to be designed
+for concurrency from the ground up. It is obvious that there are serialisation
+points in the design - the three important ones are:
+
+ 1. Locking out new transaction commits while flushing the CIL
+ 2. Adding items to the CIL and updating item space accounting
+ 3. Checkpoint commit ordering
+
+Looking at the transaction commit and CIL flushing interactions, it is clear
+that we have a many-to-one interaction here. That is, the only restriction on
+the number of concurrent transactions that can be trying to commit at once is
+the amount of space available in the log for their reservations. The practical
+limit here is in the order of several hundred concurrent transactions for a
+128MB log, which means that it is generally one per CPU in a machine.
+
+The amount of time a transaction commit needs to hold out a flush is a
+relatively long period of time - the pinning of log items needs to be done
+while we are holding out a CIL flush, so at the moment that means it is held
+across the formatting of the objects into memory buffers (i.e. while memcpy()s
+are in progress). Ultimately a two pass algorithm where the formatting is done
+separately to the pinning of objects could be used to reduce the hold time of
+the transaction commit side.
+
+Because of the number of potential transaction commit side holders, the lock
+really needs to be a sleeping lock - if the CIL flush takes the lock, we do not
+want every other CPU in the machine spinning on the CIL lock. Given that
+flushing the CIL could involve walking a list of tens of thousands of log
+items, it will get held for a significant time and so spin contention is a
+significant concern. Preventing lots of CPUs spinning doing nothing is the
+main reason for choosing a sleeping lock even though nothing in either the
+transaction commit or CIL flush side sleeps with the lock held.
+
+It should also be noted that CIL flushing is also a relatively rare operation
+compared to transaction commit for asynchronous transaction workloads - only
+time will tell if using a read-write semaphore for exclusion will limit
+transaction commit concurrency due to cache line bouncing of the lock on the
+read side.
+
+The second serialisation point is on the transaction commit side where items
+are inserted into the CIL. Because transactions can enter this code
+concurrently, the CIL needs to be protected separately from the above
+commit/flush exclusion. It also needs to be an exclusive lock but it is only
+held for a very short time and so a spin lock is appropriate here. It is
+possible that this lock will become a contention point, but given the short
+hold time once per transaction I think that contention is unlikely.
+
+The final serialisation point is the checkpoint commit record ordering code
+that is run as part of the checkpoint commit and log force sequencing. The code
+path that triggers a CIL flush (i.e. whatever triggers the log force) will enter
+an ordering loop after writing all the log vectors into the log buffers but
+before writing the commit record. This loop walks the list of committing
+checkpoints and needs to block waiting for checkpoints to complete their commit
+record write. As a result it needs a lock and a wait variable. Log force
+sequencing also requires the same lock, list walk, and blocking mechanism to
+ensure completion of checkpoints.
+
+These two sequencing operations can use the mechanism even though the
+events they are waiting for are different. The checkpoint commit record
+sequencing needs to wait until checkpoint contexts contain a commit LSN
+(obtained through completion of a commit record write) while log force
+sequencing needs to wait until previous checkpoint contexts are removed from
+the committing list (i.e. they've completed). A simple wait variable and
+broadcast wakeups (thundering herds) has been used to implement these two
+serialisation queues. They use the same lock as the CIL, too. If we see too
+much contention on the CIL lock, or too many context switches as a result of
+the broadcast wakeups these operations can be put under a new spinlock and
+given separate wait lists to reduce lock contention and the number of processes
+woken by the wrong event.
+
+
+Lifecycle Changes
+
+The existing log item life cycle is as follows:
+
+ 1. Transaction allocate
+ 2. Transaction reserve
+ 3. Lock item
+ 4. Join item to transaction
+ If not already attached,
+ Allocate log item
+ Attach log item to owner item
+ Attach log item to transaction
+ 5. Modify item
+ Record modifications in log item
+ 6. Transaction commit
+ Pin item in memory
+ Format item into log buffer
+ Write commit LSN into transaction
+ Unlock item
+ Attach transaction to log buffer
+
+ <log buffer IO dispatched>
+ <log buffer IO completes>
+
+ 7. Transaction completion
+ Mark log item committed
+ Insert log item into AIL
+ Write commit LSN into log item
+ Unpin log item
+ 8. AIL traversal
+ Lock item
+ Mark log item clean
+ Flush item to disk
+
+ <item IO completion>
+
+ 9. Log item removed from AIL
+ Moves log tail
+ Item unlocked
+
+Essentially, steps 1-6 operate independently from step 7, which is also
+independent of steps 8-9. An item can be locked in steps 1-6 or steps 8-9
+at the same time step 7 is occurring, but only steps 1-6 or 8-9 can occur
+at the same time. If the log item is in the AIL or between steps 6 and 7
+and steps 1-6 are re-entered, then the item is relogged. Only when steps 8-9
+are entered and completed is the object considered clean.
+
+With delayed logging, there are new steps inserted into the life cycle:
+
+ 1. Transaction allocate
+ 2. Transaction reserve
+ 3. Lock item
+ 4. Join item to transaction
+ If not already attached,
+ Allocate log item
+ Attach log item to owner item
+ Attach log item to transaction
+ 5. Modify item
+ Record modifications in log item
+ 6. Transaction commit
+ Pin item in memory if not pinned in CIL
+ Format item into log vector + buffer
+ Attach log vector and buffer to log item
+ Insert log item into CIL
+ Write CIL context sequence into transaction
+ Unlock item
+
+ <next log force>
+
+ 7. CIL push
+ lock CIL flush
+ Chain log vectors and buffers together
+ Remove items from CIL
+ unlock CIL flush
+ write log vectors into log
+ sequence commit records
+ attach checkpoint context to log buffer
+
+ <log buffer IO dispatched>
+ <log buffer IO completes>
+
+ 8. Checkpoint completion
+ Mark log item committed
+ Insert item into AIL
+ Write commit LSN into log item
+ Unpin log item
+ 9. AIL traversal
+ Lock item
+ Mark log item clean
+ Flush item to disk
+ <item IO completion>
+ 10. Log item removed from AIL
+ Moves log tail
+ Item unlocked
+
+From this, it can be seen that the only life cycle differences between the two
+logging methods are in the middle of the life cycle - they still have the same
+beginning and end and execution constraints. The only differences are in the
+committing of the log items to the log itself and the completion processing.
+Hence delayed logging should not introduce any constraints on log item
+behaviour, allocation or freeing that don't already exist.
+
+As a result of this zero-impact "insertion" of delayed logging infrastructure
+and the design of the internal structures to avoid on disk format changes, we
+can basically switch between delayed logging and the existing mechanism with a
+mount option. Fundamentally, there is no reason why the log manager would not
+be able to swap methods automatically and transparently depending on load
+characteristics, but this should not be necessary if delayed logging works as
+designed.
diff --git a/Documentation/filesystems/xfs-self-describing-metadata.txt b/Documentation/filesystems/xfs-self-describing-metadata.txt
new file mode 100644
index 000000000..05aa45516
--- /dev/null
+++ b/Documentation/filesystems/xfs-self-describing-metadata.txt
@@ -0,0 +1,350 @@
+XFS Self Describing Metadata
+----------------------------
+
+Introduction
+------------
+
+The largest scalability problem facing XFS is not one of algorithmic
+scalability, but of verification of the filesystem structure. Scalabilty of the
+structures and indexes on disk and the algorithms for iterating them are
+adequate for supporting PB scale filesystems with billions of inodes, however it
+is this very scalability that causes the verification problem.
+
+Almost all metadata on XFS is dynamically allocated. The only fixed location
+metadata is the allocation group headers (SB, AGF, AGFL and AGI), while all
+other metadata structures need to be discovered by walking the filesystem
+structure in different ways. While this is already done by userspace tools for
+validating and repairing the structure, there are limits to what they can
+verify, and this in turn limits the supportable size of an XFS filesystem.
+
+For example, it is entirely possible to manually use xfs_db and a bit of
+scripting to analyse the structure of a 100TB filesystem when trying to
+determine the root cause of a corruption problem, but it is still mainly a
+manual task of verifying that things like single bit errors or misplaced writes
+weren't the ultimate cause of a corruption event. It may take a few hours to a
+few days to perform such forensic analysis, so for at this scale root cause
+analysis is entirely possible.
+
+However, if we scale the filesystem up to 1PB, we now have 10x as much metadata
+to analyse and so that analysis blows out towards weeks/months of forensic work.
+Most of the analysis work is slow and tedious, so as the amount of analysis goes
+up, the more likely that the cause will be lost in the noise. Hence the primary
+concern for supporting PB scale filesystems is minimising the time and effort
+required for basic forensic analysis of the filesystem structure.
+
+
+Self Describing Metadata
+------------------------
+
+One of the problems with the current metadata format is that apart from the
+magic number in the metadata block, we have no other way of identifying what it
+is supposed to be. We can't even identify if it is the right place. Put simply,
+you can't look at a single metadata block in isolation and say "yes, it is
+supposed to be there and the contents are valid".
+
+Hence most of the time spent on forensic analysis is spent doing basic
+verification of metadata values, looking for values that are in range (and hence
+not detected by automated verification checks) but are not correct. Finding and
+understanding how things like cross linked block lists (e.g. sibling
+pointers in a btree end up with loops in them) are the key to understanding what
+went wrong, but it is impossible to tell what order the blocks were linked into
+each other or written to disk after the fact.
+
+Hence we need to record more information into the metadata to allow us to
+quickly determine if the metadata is intact and can be ignored for the purpose
+of analysis. We can't protect against every possible type of error, but we can
+ensure that common types of errors are easily detectable. Hence the concept of
+self describing metadata.
+
+The first, fundamental requirement of self describing metadata is that the
+metadata object contains some form of unique identifier in a well known
+location. This allows us to identify the expected contents of the block and
+hence parse and verify the metadata object. IF we can't independently identify
+the type of metadata in the object, then the metadata doesn't describe itself
+very well at all!
+
+Luckily, almost all XFS metadata has magic numbers embedded already - only the
+AGFL, remote symlinks and remote attribute blocks do not contain identifying
+magic numbers. Hence we can change the on-disk format of all these objects to
+add more identifying information and detect this simply by changing the magic
+numbers in the metadata objects. That is, if it has the current magic number,
+the metadata isn't self identifying. If it contains a new magic number, it is
+self identifying and we can do much more expansive automated verification of the
+metadata object at runtime, during forensic analysis or repair.
+
+As a primary concern, self describing metadata needs some form of overall
+integrity checking. We cannot trust the metadata if we cannot verify that it has
+not been changed as a result of external influences. Hence we need some form of
+integrity check, and this is done by adding CRC32c validation to the metadata
+block. If we can verify the block contains the metadata it was intended to
+contain, a large amount of the manual verification work can be skipped.
+
+CRC32c was selected as metadata cannot be more than 64k in length in XFS and
+hence a 32 bit CRC is more than sufficient to detect multi-bit errors in
+metadata blocks. CRC32c is also now hardware accelerated on common CPUs so it is
+fast. So while CRC32c is not the strongest of possible integrity checks that
+could be used, it is more than sufficient for our needs and has relatively
+little overhead. Adding support for larger integrity fields and/or algorithms
+does really provide any extra value over CRC32c, but it does add a lot of
+complexity and so there is no provision for changing the integrity checking
+mechanism.
+
+Self describing metadata needs to contain enough information so that the
+metadata block can be verified as being in the correct place without needing to
+look at any other metadata. This means it needs to contain location information.
+Just adding a block number to the metadata is not sufficient to protect against
+mis-directed writes - a write might be misdirected to the wrong LUN and so be
+written to the "correct block" of the wrong filesystem. Hence location
+information must contain a filesystem identifier as well as a block number.
+
+Another key information point in forensic analysis is knowing who the metadata
+block belongs to. We already know the type, the location, that it is valid
+and/or corrupted, and how long ago that it was last modified. Knowing the owner
+of the block is important as it allows us to find other related metadata to
+determine the scope of the corruption. For example, if we have a extent btree
+object, we don't know what inode it belongs to and hence have to walk the entire
+filesystem to find the owner of the block. Worse, the corruption could mean that
+no owner can be found (i.e. it's an orphan block), and so without an owner field
+in the metadata we have no idea of the scope of the corruption. If we have an
+owner field in the metadata object, we can immediately do top down validation to
+determine the scope of the problem.
+
+Different types of metadata have different owner identifiers. For example,
+directory, attribute and extent tree blocks are all owned by an inode, whilst
+freespace btree blocks are owned by an allocation group. Hence the size and
+contents of the owner field are determined by the type of metadata object we are
+looking at. The owner information can also identify misplaced writes (e.g.
+freespace btree block written to the wrong AG).
+
+Self describing metadata also needs to contain some indication of when it was
+written to the filesystem. One of the key information points when doing forensic
+analysis is how recently the block was modified. Correlation of set of corrupted
+metadata blocks based on modification times is important as it can indicate
+whether the corruptions are related, whether there's been multiple corruption
+events that lead to the eventual failure, and even whether there are corruptions
+present that the run-time verification is not detecting.
+
+For example, we can determine whether a metadata object is supposed to be free
+space or still allocated if it is still referenced by its owner by looking at
+when the free space btree block that contains the block was last written
+compared to when the metadata object itself was last written. If the free space
+block is more recent than the object and the object's owner, then there is a
+very good chance that the block should have been removed from the owner.
+
+To provide this "written timestamp", each metadata block gets the Log Sequence
+Number (LSN) of the most recent transaction it was modified on written into it.
+This number will always increase over the life of the filesystem, and the only
+thing that resets it is running xfs_repair on the filesystem. Further, by use of
+the LSN we can tell if the corrupted metadata all belonged to the same log
+checkpoint and hence have some idea of how much modification occurred between
+the first and last instance of corrupt metadata on disk and, further, how much
+modification occurred between the corruption being written and when it was
+detected.
+
+Runtime Validation
+------------------
+
+Validation of self-describing metadata takes place at runtime in two places:
+
+ - immediately after a successful read from disk
+ - immediately prior to write IO submission
+
+The verification is completely stateless - it is done independently of the
+modification process, and seeks only to check that the metadata is what it says
+it is and that the metadata fields are within bounds and internally consistent.
+As such, we cannot catch all types of corruption that can occur within a block
+as there may be certain limitations that operational state enforces of the
+metadata, or there may be corruption of interblock relationships (e.g. corrupted
+sibling pointer lists). Hence we still need stateful checking in the main code
+body, but in general most of the per-field validation is handled by the
+verifiers.
+
+For read verification, the caller needs to specify the expected type of metadata
+that it should see, and the IO completion process verifies that the metadata
+object matches what was expected. If the verification process fails, then it
+marks the object being read as EFSCORRUPTED. The caller needs to catch this
+error (same as for IO errors), and if it needs to take special action due to a
+verification error it can do so by catching the EFSCORRUPTED error value. If we
+need more discrimination of error type at higher levels, we can define new
+error numbers for different errors as necessary.
+
+The first step in read verification is checking the magic number and determining
+whether CRC validating is necessary. If it is, the CRC32c is calculated and
+compared against the value stored in the object itself. Once this is validated,
+further checks are made against the location information, followed by extensive
+object specific metadata validation. If any of these checks fail, then the
+buffer is considered corrupt and the EFSCORRUPTED error is set appropriately.
+
+Write verification is the opposite of the read verification - first the object
+is extensively verified and if it is OK we then update the LSN from the last
+modification made to the object, After this, we calculate the CRC and insert it
+into the object. Once this is done the write IO is allowed to continue. If any
+error occurs during this process, the buffer is again marked with a EFSCORRUPTED
+error for the higher layers to catch.
+
+Structures
+----------
+
+A typical on-disk structure needs to contain the following information:
+
+struct xfs_ondisk_hdr {
+ __be32 magic; /* magic number */
+ __be32 crc; /* CRC, not logged */
+ uuid_t uuid; /* filesystem identifier */
+ __be64 owner; /* parent object */
+ __be64 blkno; /* location on disk */
+ __be64 lsn; /* last modification in log, not logged */
+};
+
+Depending on the metadata, this information may be part of a header structure
+separate to the metadata contents, or may be distributed through an existing
+structure. The latter occurs with metadata that already contains some of this
+information, such as the superblock and AG headers.
+
+Other metadata may have different formats for the information, but the same
+level of information is generally provided. For example:
+
+ - short btree blocks have a 32 bit owner (ag number) and a 32 bit block
+ number for location. The two of these combined provide the same
+ information as @owner and @blkno in eh above structure, but using 8
+ bytes less space on disk.
+
+ - directory/attribute node blocks have a 16 bit magic number, and the
+ header that contains the magic number has other information in it as
+ well. hence the additional metadata headers change the overall format
+ of the metadata.
+
+A typical buffer read verifier is structured as follows:
+
+#define XFS_FOO_CRC_OFF offsetof(struct xfs_ondisk_hdr, crc)
+
+static void
+xfs_foo_read_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+ !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+ XFS_FOO_CRC_OFF)) ||
+ !xfs_foo_verify(bp)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ }
+}
+
+The code ensures that the CRC is only checked if the filesystem has CRCs enabled
+by checking the superblock of the feature bit, and then if the CRC verifies OK
+(or is not needed) it verifies the actual contents of the block.
+
+The verifier function will take a couple of different forms, depending on
+whether the magic number can be used to determine the format of the block. In
+the case it can't, the code is structured as follows:
+
+static bool
+xfs_foo_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_ondisk_hdr *hdr = bp->b_addr;
+
+ if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
+ return false;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb)) {
+ if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (bp->b_bn != be64_to_cpu(hdr->blkno))
+ return false;
+ if (hdr->owner == 0)
+ return false;
+ }
+
+ /* object specific verification checks here */
+
+ return true;
+}
+
+If there are different magic numbers for the different formats, the verifier
+will look like:
+
+static bool
+xfs_foo_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_ondisk_hdr *hdr = bp->b_addr;
+
+ if (hdr->magic == cpu_to_be32(XFS_FOO_CRC_MAGIC)) {
+ if (!uuid_equal(&hdr->uuid, &mp->m_sb.sb_uuid))
+ return false;
+ if (bp->b_bn != be64_to_cpu(hdr->blkno))
+ return false;
+ if (hdr->owner == 0)
+ return false;
+ } else if (hdr->magic != cpu_to_be32(XFS_FOO_MAGIC))
+ return false;
+
+ /* object specific verification checks here */
+
+ return true;
+}
+
+Write verifiers are very similar to the read verifiers, they just do things in
+the opposite order to the read verifiers. A typical write verifier:
+
+static void
+xfs_foo_write_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!xfs_foo_verify(bp)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+ xfs_buf_ioerror(bp, EFSCORRUPTED);
+ return;
+ }
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return;
+
+
+ if (bip) {
+ struct xfs_ondisk_hdr *hdr = bp->b_addr;
+ hdr->lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ }
+ xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_FOO_CRC_OFF);
+}
+
+This will verify the internal structure of the metadata before we go any
+further, detecting corruptions that have occurred as the metadata has been
+modified in memory. If the metadata verifies OK, and CRCs are enabled, we then
+update the LSN field (when it was last modified) and calculate the CRC on the
+metadata. Once this is done, we can issue the IO.
+
+Inodes and Dquots
+-----------------
+
+Inodes and dquots are special snowflakes. They have per-object CRC and
+self-identifiers, but they are packed so that there are multiple objects per
+buffer. Hence we do not use per-buffer verifiers to do the work of per-object
+verification and CRC calculations. The per-buffer verifiers simply perform basic
+identification of the buffer - that they contain inodes or dquots, and that
+there are magic numbers in all the expected spots. All further CRC and
+verification checks are done when each inode is read from or written back to the
+buffer.
+
+The structure of the verifiers and the identifiers checks is very similar to the
+buffer code described above. The only difference is where they are called. For
+example, inode read verification is done in xfs_iread() when the inode is first
+read out of the buffer and the struct xfs_inode is instantiated. The inode is
+already extensively verified during writeback in xfs_iflush_int, so the only
+addition here is to add the LSN and CRC to the inode as it is copied back into
+the buffer.
+
+XXX: inode unlinked list modification doesn't recalculate the inode CRC! None of
+the unlinked list modifications check or update CRCs, neither during unlink nor
+log recovery. So, it's gone unnoticed until now. This won't matter immediately -
+repair will probably complain about it - but it needs to be fixed.
+
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
new file mode 100644
index 000000000..5a5a05582
--- /dev/null
+++ b/Documentation/filesystems/xfs.txt
@@ -0,0 +1,350 @@
+
+The SGI XFS Filesystem
+======================
+
+XFS is a high performance journaling filesystem which originated
+on the SGI IRIX platform. It is completely multi-threaded, can
+support large files and large filesystems, extended attributes,
+variable block sizes, is extent based, and makes extensive use of
+Btrees (directories, extents, free space) to aid both performance
+and scalability.
+
+Refer to the documentation at http://oss.sgi.com/projects/xfs/
+for further details. This implementation is on-disk compatible
+with the IRIX version of XFS.
+
+
+Mount Options
+=============
+
+When mounting an XFS filesystem, the following options are accepted.
+For boolean mount options, the names with the (*) suffix is the
+default behaviour.
+
+ allocsize=size
+ Sets the buffered I/O end-of-file preallocation size when
+ doing delayed allocation writeout (default size is 64KiB).
+ Valid values for this option are page size (typically 4KiB)
+ through to 1GiB, inclusive, in power-of-2 increments.
+
+ The default behaviour is for dynamic end-of-file
+ preallocation size, which uses a set of heuristics to
+ optimise the preallocation size based on the current
+ allocation patterns within the file and the access patterns
+ to the file. Specifying a fixed allocsize value turns off
+ the dynamic behaviour.
+
+ attr2
+ noattr2
+ The options enable/disable an "opportunistic" improvement to
+ be made in the way inline extended attributes are stored
+ on-disk. When the new form is used for the first time when
+ attr2 is selected (either when setting or removing extended
+ attributes) the on-disk superblock feature bit field will be
+ updated to reflect this format being in use.
+
+ The default behaviour is determined by the on-disk feature
+ bit indicating that attr2 behaviour is active. If either
+ mount option it set, then that becomes the new default used
+ by the filesystem.
+
+ CRC enabled filesystems always use the attr2 format, and so
+ will reject the noattr2 mount option if it is set.
+
+ barrier (*)
+ nobarrier
+ Enables/disables the use of block layer write barriers for
+ writes into the journal and for data integrity operations.
+ This allows for drive level write caching to be enabled, for
+ devices that support write barriers.
+
+ discard
+ nodiscard (*)
+ Enable/disable the issuing of commands to let the block
+ device reclaim space freed by the filesystem. This is
+ useful for SSD devices, thinly provisioned LUNs and virtual
+ machine images, but may have a performance impact.
+
+ Note: It is currently recommended that you use the fstrim
+ application to discard unused blocks rather than the discard
+ mount option because the performance impact of this option
+ is quite severe.
+
+ grpid/bsdgroups
+ nogrpid/sysvgroups (*)
+ These options define what group ID a newly created file
+ gets. When grpid is set, it takes the group ID of the
+ directory in which it is created; otherwise it takes the
+ fsgid of the current process, unless the directory has the
+ setgid bit set, in which case it takes the gid from the
+ parent directory, and also gets the setgid bit set if it is
+ a directory itself.
+
+ filestreams
+ Make the data allocator use the filestreams allocation mode
+ across the entire filesystem rather than just on directories
+ configured to use it.
+
+ ikeep
+ noikeep (*)
+ When ikeep is specified, XFS does not delete empty inode
+ clusters and keeps them around on disk. When noikeep is
+ specified, empty inode clusters are returned to the free
+ space pool.
+
+ inode32
+ inode64 (*)
+ When inode32 is specified, it indicates that XFS limits
+ inode creation to locations which will not result in inode
+ numbers with more than 32 bits of significance.
+
+ When inode64 is specified, it indicates that XFS is allowed
+ to create inodes at any location in the filesystem,
+ including those which will result in inode numbers occupying
+ more than 32 bits of significance.
+
+ inode32 is provided for backwards compatibility with older
+ systems and applications, since 64 bits inode numbers might
+ cause problems for some applications that cannot handle
+ large inode numbers. If applications are in use which do
+ not handle inode numbers bigger than 32 bits, the inode32
+ option should be specified.
+
+
+ largeio
+ nolargeio (*)
+ If "nolargeio" is specified, the optimal I/O reported in
+ st_blksize by stat(2) will be as small as possible to allow
+ user applications to avoid inefficient read/modify/write
+ I/O. This is typically the page size of the machine, as
+ this is the granularity of the page cache.
+
+ If "largeio" specified, a filesystem that was created with a
+ "swidth" specified will return the "swidth" value (in bytes)
+ in st_blksize. If the filesystem does not have a "swidth"
+ specified but does specify an "allocsize" then "allocsize"
+ (in bytes) will be returned instead. Otherwise the behaviour
+ is the same as if "nolargeio" was specified.
+
+ logbufs=value
+ Set the number of in-memory log buffers. Valid numbers
+ range from 2-8 inclusive.
+
+ The default value is 8 buffers.
+
+ If the memory cost of 8 log buffers is too high on small
+ systems, then it may be reduced at some cost to performance
+ on metadata intensive workloads. The logbsize option below
+ controls the size of each buffer and so is also relevant to
+ this case.
+
+ logbsize=value
+ Set the size of each in-memory log buffer. The size may be
+ specified in bytes, or in kilobytes with a "k" suffix.
+ Valid sizes for version 1 and version 2 logs are 16384 (16k)
+ and 32768 (32k). Valid sizes for version 2 logs also
+ include 65536 (64k), 131072 (128k) and 262144 (256k). The
+ logbsize must be an integer multiple of the log
+ stripe unit configured at mkfs time.
+
+ The default value for for version 1 logs is 32768, while the
+ default value for version 2 logs is MAX(32768, log_sunit).
+
+ logdev=device and rtdev=device
+ Use an external log (metadata journal) and/or real-time device.
+ An XFS filesystem has up to three parts: a data section, a log
+ section, and a real-time section. The real-time section is
+ optional, and the log section can be separate from the data
+ section or contained within it.
+
+ noalign
+ Data allocations will not be aligned at stripe unit
+ boundaries. This is only relevant to filesystems created
+ with non-zero data alignment parameters (sunit, swidth) by
+ mkfs.
+
+ norecovery
+ The filesystem will be mounted without running log recovery.
+ If the filesystem was not cleanly unmounted, it is likely to
+ be inconsistent when mounted in "norecovery" mode.
+ Some files or directories may not be accessible because of this.
+ Filesystems mounted "norecovery" must be mounted read-only or
+ the mount will fail.
+
+ nouuid
+ Don't check for double mounted file systems using the file
+ system uuid. This is useful to mount LVM snapshot volumes,
+ and often used in combination with "norecovery" for mounting
+ read-only snapshots.
+
+ noquota
+ Forcibly turns off all quota accounting and enforcement
+ within the filesystem.
+
+ uquota/usrquota/uqnoenforce/quota
+ User disk quota accounting enabled, and limits (optionally)
+ enforced. Refer to xfs_quota(8) for further details.
+
+ gquota/grpquota/gqnoenforce
+ Group disk quota accounting enabled and limits (optionally)
+ enforced. Refer to xfs_quota(8) for further details.
+
+ pquota/prjquota/pqnoenforce
+ Project disk quota accounting enabled and limits (optionally)
+ enforced. Refer to xfs_quota(8) for further details.
+
+ sunit=value and swidth=value
+ Used to specify the stripe unit and width for a RAID device
+ or a stripe volume. "value" must be specified in 512-byte
+ block units. These options are only relevant to filesystems
+ that were created with non-zero data alignment parameters.
+
+ The sunit and swidth parameters specified must be compatible
+ with the existing filesystem alignment characteristics. In
+ general, that means the only valid changes to sunit are
+ increasing it by a power-of-2 multiple. Valid swidth values
+ are any integer multiple of a valid sunit value.
+
+ Typically the only time these mount options are necessary if
+ after an underlying RAID device has had it's geometry
+ modified, such as adding a new disk to a RAID5 lun and
+ reshaping it.
+
+ swalloc
+ Data allocations will be rounded up to stripe width boundaries
+ when the current end of file is being extended and the file
+ size is larger than the stripe width size.
+
+ wsync
+ When specified, all filesystem namespace operations are
+ executed synchronously. This ensures that when the namespace
+ operation (create, unlink, etc) completes, the change to the
+ namespace is on stable storage. This is useful in HA setups
+ where failover must not result in clients seeing
+ inconsistent namespace presentation during or after a
+ failover event.
+
+
+Deprecated Mount Options
+========================
+
+None at present.
+
+
+Removed Mount Options
+=====================
+
+ Name Removed
+ ---- -------
+ delaylog/nodelaylog v3.20
+ ihashsize v3.20
+ irixsgid v3.20
+ osyncisdsync/osyncisosync v3.20
+
+
+sysctls
+=======
+
+The following sysctls are available for the XFS filesystem:
+
+ fs.xfs.stats_clear (Min: 0 Default: 0 Max: 1)
+ Setting this to "1" clears accumulated XFS statistics
+ in /proc/fs/xfs/stat. It then immediately resets to "0".
+
+ fs.xfs.xfssyncd_centisecs (Min: 100 Default: 3000 Max: 720000)
+ The interval at which the filesystem flushes metadata
+ out to disk and runs internal cache cleanup routines.
+
+ fs.xfs.filestream_centisecs (Min: 1 Default: 3000 Max: 360000)
+ The interval at which the filesystem ages filestreams cache
+ references and returns timed-out AGs back to the free stream
+ pool.
+
+ fs.xfs.speculative_prealloc_lifetime
+ (Units: seconds Min: 1 Default: 300 Max: 86400)
+ The interval at which the background scanning for inodes
+ with unused speculative preallocation runs. The scan
+ removes unused preallocation from clean inodes and releases
+ the unused space back to the free pool.
+
+ fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
+ A volume knob for error reporting when internal errors occur.
+ This will generate detailed messages & backtraces for filesystem
+ shutdowns, for example. Current threshold values are:
+
+ XFS_ERRLEVEL_OFF: 0
+ XFS_ERRLEVEL_LOW: 1
+ XFS_ERRLEVEL_HIGH: 5
+
+ fs.xfs.panic_mask (Min: 0 Default: 0 Max: 255)
+ Causes certain error conditions to call BUG(). Value is a bitmask;
+ OR together the tags which represent errors which should cause panics:
+
+ XFS_NO_PTAG 0
+ XFS_PTAG_IFLUSH 0x00000001
+ XFS_PTAG_LOGRES 0x00000002
+ XFS_PTAG_AILDELETE 0x00000004
+ XFS_PTAG_ERROR_REPORT 0x00000008
+ XFS_PTAG_SHUTDOWN_CORRUPT 0x00000010
+ XFS_PTAG_SHUTDOWN_IOERROR 0x00000020
+ XFS_PTAG_SHUTDOWN_LOGERROR 0x00000040
+ XFS_PTAG_FSBLOCK_ZERO 0x00000080
+
+ This option is intended for debugging only.
+
+ fs.xfs.irix_symlink_mode (Min: 0 Default: 0 Max: 1)
+ Controls whether symlinks are created with mode 0777 (default)
+ or whether their mode is affected by the umask (irix mode).
+
+ fs.xfs.irix_sgid_inherit (Min: 0 Default: 0 Max: 1)
+ Controls files created in SGID directories.
+ If the group ID of the new file does not match the effective group
+ ID or one of the supplementary group IDs of the parent dir, the
+ ISGID bit is cleared if the irix_sgid_inherit compatibility sysctl
+ is set.
+
+ fs.xfs.inherit_sync (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "sync" flag set
+ by the xfs_io(8) chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_nodump (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "nodump" flag set
+ by the xfs_io(8) chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_noatime (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "noatime" flag set
+ by the xfs_io(8) chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_nosymlinks (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "nosymlinks" flag set
+ by the xfs_io(8) chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.inherit_nodefrag (Min: 0 Default: 1 Max: 1)
+ Setting this to "1" will cause the "nodefrag" flag set
+ by the xfs_io(8) chattr command on a directory to be
+ inherited by files in that directory.
+
+ fs.xfs.rotorstep (Min: 1 Default: 1 Max: 256)
+ In "inode32" allocation mode, this option determines how many
+ files the allocator attempts to allocate in the same allocation
+ group before moving to the next allocation group. The intent
+ is to control the rate at which the allocator moves between
+ allocation groups when allocating extents for new files.
+
+Deprecated Sysctls
+==================
+
+None at present.
+
+
+Removed Sysctls
+===============
+
+ Name Removed
+ ---- -------
+ fs.xfs.xfsbufd_centisec v3.20
+ fs.xfs.age_buffer_centisecs v3.20
diff --git a/Documentation/firmware_class/README b/Documentation/firmware_class/README
new file mode 100644
index 000000000..71f86859d
--- /dev/null
+++ b/Documentation/firmware_class/README
@@ -0,0 +1,128 @@
+
+ request_firmware() hotplug interface:
+ ------------------------------------
+ Copyright (C) 2003 Manuel Estrada Sainz
+
+ Why:
+ ---
+
+ Today, the most extended way to use firmware in the Linux kernel is linking
+ it statically in a header file. Which has political and technical issues:
+
+ 1) Some firmware is not legal to redistribute.
+ 2) The firmware occupies memory permanently, even though it often is just
+ used once.
+ 3) Some people, like the Debian crowd, don't consider some firmware free
+ enough and remove entire drivers (e.g.: keyspan).
+
+ High level behavior (mixed):
+ ============================
+
+ 1), kernel(driver):
+ - calls request_firmware(&fw_entry, $FIRMWARE, device)
+ - kernel searchs the fimware image with name $FIRMWARE directly
+ in the below search path of root filesystem:
+ User customized search path by module parameter 'path'[1]
+ "/lib/firmware/updates/" UTS_RELEASE,
+ "/lib/firmware/updates",
+ "/lib/firmware/" UTS_RELEASE,
+ "/lib/firmware"
+ - If found, goto 7), else goto 2)
+
+ [1], the 'path' is a string parameter which length should be less
+ than 256, user should pass 'firmware_class.path=$CUSTOMIZED_PATH'
+ if firmware_class is built in kernel(the general situation)
+
+ 2), userspace:
+ - /sys/class/firmware/xxx/{loading,data} appear.
+ - hotplug gets called with a firmware identifier in $FIRMWARE
+ and the usual hotplug environment.
+ - hotplug: echo 1 > /sys/class/firmware/xxx/loading
+
+ 3), kernel: Discard any previous partial load.
+
+ 4), userspace:
+ - hotplug: cat appropriate_firmware_image > \
+ /sys/class/firmware/xxx/data
+
+ 5), kernel: grows a buffer in PAGE_SIZE increments to hold the image as it
+ comes in.
+
+ 6), userspace:
+ - hotplug: echo 0 > /sys/class/firmware/xxx/loading
+
+ 7), kernel: request_firmware() returns and the driver has the firmware
+ image in fw_entry->{data,size}. If something went wrong
+ request_firmware() returns non-zero and fw_entry is set to
+ NULL.
+
+ 8), kernel(driver): Driver code calls release_firmware(fw_entry) releasing
+ the firmware image and any related resource.
+
+ High level behavior (driver code):
+ ==================================
+
+ if(request_firmware(&fw_entry, $FIRMWARE, device) == 0)
+ copy_fw_to_device(fw_entry->data, fw_entry->size);
+ release_firmware(fw_entry);
+
+ Sample/simple hotplug script:
+ ============================
+
+ # Both $DEVPATH and $FIRMWARE are already provided in the environment.
+
+ HOTPLUG_FW_DIR=/usr/lib/hotplug/firmware/
+
+ echo 1 > /sys/$DEVPATH/loading
+ cat $HOTPLUG_FW_DIR/$FIRMWARE > /sys/$DEVPATH/data
+ echo 0 > /sys/$DEVPATH/loading
+
+ Random notes:
+ ============
+
+ - "echo -1 > /sys/class/firmware/xxx/loading" will cancel the load at
+ once and make request_firmware() return with error.
+
+ - firmware_data_read() and firmware_loading_show() are just provided
+ for testing and completeness, they are not called in normal use.
+
+ - There is also /sys/class/firmware/timeout which holds a timeout in
+ seconds for the whole load operation.
+
+ - request_firmware_nowait() is also provided for convenience in
+ user contexts to request firmware asynchronously, but can't be called
+ in atomic contexts.
+
+
+ about in-kernel persistence:
+ ---------------------------
+ Under some circumstances, as explained below, it would be interesting to keep
+ firmware images in non-swappable kernel memory or even in the kernel image
+ (probably within initramfs).
+
+ Note that this functionality has not been implemented.
+
+ - Why OPTIONAL in-kernel persistence may be a good idea sometimes:
+
+ - If the device that needs the firmware is needed to access the
+ filesystem. When upon some error the device has to be reset and the
+ firmware reloaded, it won't be possible to get it from userspace.
+ e.g.:
+ - A diskless client with a network card that needs firmware.
+ - The filesystem is stored in a disk behind an scsi device
+ that needs firmware.
+ - Replacing buggy DSDT/SSDT ACPI tables on boot.
+ Note: this would require the persistent objects to be included
+ within the kernel image, probably within initramfs.
+
+ And the same device can be needed to access the filesystem or not depending
+ on the setup, so I think that the choice on what firmware to make
+ persistent should be left to userspace.
+
+ about firmware cache:
+ --------------------
+ After firmware cache mechanism is introduced during system sleep,
+ request_firmware can be called safely inside device's suspend and
+ resume callback, and callers needn't cache the firmware by
+ themselves any more for dealing with firmware loss during system
+ resume.
diff --git a/Documentation/firmware_class/hotplug-script b/Documentation/firmware_class/hotplug-script
new file mode 100644
index 000000000..8143a950b
--- /dev/null
+++ b/Documentation/firmware_class/hotplug-script
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+# Simple hotplug script sample:
+#
+# Both $DEVPATH and $FIRMWARE are already provided in the environment.
+
+HOTPLUG_FW_DIR=/usr/lib/hotplug/firmware/
+
+if [ "$SUBSYSTEM" == "firmware" -a "$ACTION" == "add" ]; then
+ if [ -f $HOTPLUG_FW_DIR/$FIRMWARE ]; then
+ echo 1 > /sys/$DEVPATH/loading
+ cat $HOTPLUG_FW_DIR/$FIRMWARE > /sys/$DEVPATH/data
+ echo 0 > /sys/$DEVPATH/loading
+ else
+ echo -1 > /sys/$DEVPATH/loading
+ fi
+fi
diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt
new file mode 100644
index 000000000..df904aec9
--- /dev/null
+++ b/Documentation/flexible-arrays.txt
@@ -0,0 +1,120 @@
+Using flexible arrays in the kernel
+Last updated for 2.6.32
+Jonathan Corbet <corbet@lwn.net>
+
+Large contiguous memory allocations can be unreliable in the Linux kernel.
+Kernel programmers will sometimes respond to this problem by allocating
+pages with vmalloc(). This solution not ideal, though. On 32-bit systems,
+memory from vmalloc() must be mapped into a relatively small address space;
+it's easy to run out. On SMP systems, the page table changes required by
+vmalloc() allocations can require expensive cross-processor interrupts on
+all CPUs. And, on all systems, use of space in the vmalloc() range
+increases pressure on the translation lookaside buffer (TLB), reducing the
+performance of the system.
+
+In many cases, the need for memory from vmalloc() can be eliminated by
+piecing together an array from smaller parts; the flexible array library
+exists to make this task easier.
+
+A flexible array holds an arbitrary (within limits) number of fixed-sized
+objects, accessed via an integer index. Sparse arrays are handled
+reasonably well. Only single-page allocations are made, so memory
+allocation failures should be relatively rare. The down sides are that the
+arrays cannot be indexed directly, individual object size cannot exceed the
+system page size, and putting data into a flexible array requires a copy
+operation. It's also worth noting that flexible arrays do no internal
+locking at all; if concurrent access to an array is possible, then the
+caller must arrange for appropriate mutual exclusion.
+
+The creation of a flexible array is done with:
+
+ #include <linux/flex_array.h>
+
+ struct flex_array *flex_array_alloc(int element_size,
+ unsigned int total,
+ gfp_t flags);
+
+The individual object size is provided by element_size, while total is the
+maximum number of objects which can be stored in the array. The flags
+argument is passed directly to the internal memory allocation calls. With
+the current code, using flags to ask for high memory is likely to lead to
+notably unpleasant side effects.
+
+It is also possible to define flexible arrays at compile time with:
+
+ DEFINE_FLEX_ARRAY(name, element_size, total);
+
+This macro will result in a definition of an array with the given name; the
+element size and total will be checked for validity at compile time.
+
+Storing data into a flexible array is accomplished with a call to:
+
+ int flex_array_put(struct flex_array *array, unsigned int element_nr,
+ void *src, gfp_t flags);
+
+This call will copy the data from src into the array, in the position
+indicated by element_nr (which must be less than the maximum specified when
+the array was created). If any memory allocations must be performed, flags
+will be used. The return value is zero on success, a negative error code
+otherwise.
+
+There might possibly be a need to store data into a flexible array while
+running in some sort of atomic context; in this situation, sleeping in the
+memory allocator would be a bad thing. That can be avoided by using
+GFP_ATOMIC for the flags value, but, often, there is a better way. The
+trick is to ensure that any needed memory allocations are done before
+entering atomic context, using:
+
+ int flex_array_prealloc(struct flex_array *array, unsigned int start,
+ unsigned int nr_elements, gfp_t flags);
+
+This function will ensure that memory for the elements indexed in the range
+defined by start and nr_elements has been allocated. Thereafter, a
+flex_array_put() call on an element in that range is guaranteed not to
+block.
+
+Getting data back out of the array is done with:
+
+ void *flex_array_get(struct flex_array *fa, unsigned int element_nr);
+
+The return value is a pointer to the data element, or NULL if that
+particular element has never been allocated.
+
+Note that it is possible to get back a valid pointer for an element which
+has never been stored in the array. Memory for array elements is allocated
+one page at a time; a single allocation could provide memory for several
+adjacent elements. Flexible array elements are normally initialized to the
+value FLEX_ARRAY_FREE (defined as 0x6c in <linux/poison.h>), so errors
+involving that number probably result from use of unstored array entries.
+Note that, if array elements are allocated with __GFP_ZERO, they will be
+initialized to zero and this poisoning will not happen.
+
+Individual elements in the array can be cleared with:
+
+ int flex_array_clear(struct flex_array *array, unsigned int element_nr);
+
+This function will set the given element to FLEX_ARRAY_FREE and return
+zero. If storage for the indicated element is not allocated for the array,
+flex_array_clear() will return -EINVAL instead. Note that clearing an
+element does not release the storage associated with it; to reduce the
+allocated size of an array, call:
+
+ int flex_array_shrink(struct flex_array *array);
+
+The return value will be the number of pages of memory actually freed.
+This function works by scanning the array for pages containing nothing but
+FLEX_ARRAY_FREE bytes, so (1) it can be expensive, and (2) it will not work
+if the array's pages are allocated with __GFP_ZERO.
+
+It is possible to remove all elements of an array with a call to:
+
+ void flex_array_free_parts(struct flex_array *array);
+
+This call frees all elements, but leaves the array itself in place.
+Freeing the entire array is done with:
+
+ void flex_array_free(struct flex_array *array);
+
+As of this writing, there are no users of flexible arrays in the mainline
+kernel. The functions described here are also not exported to modules;
+that will probably be fixed when somebody comes up with a need for it.
diff --git a/Documentation/fmc/00-INDEX b/Documentation/fmc/00-INDEX
new file mode 100644
index 000000000..431c69570
--- /dev/null
+++ b/Documentation/fmc/00-INDEX
@@ -0,0 +1,38 @@
+
+Documentation in this directory comes from sections of the manual we
+wrote for the externally-developed fmc-bus package. The complete
+manual as of today (2013-02) is available in PDF format at
+http://www.ohwr.org/projects/fmc-bus/files
+
+00-INDEX
+ - this file.
+
+FMC-and-SDB.txt
+ - What are FMC and SDB, basic concepts for this framework
+
+API.txt
+ - The functions that are exported by the bus driver
+
+parameters.txt
+ - The module parameters
+
+carrier.txt
+ - writing a carrier (a device)
+
+mezzanine.txt
+ - writing code for your mezzanine (a driver)
+
+identifiers.txt
+ - how identification and matching works
+
+fmc-fakedev.txt
+ - about drivers/fmc/fmc-fakedev.ko
+
+fmc-trivial.txt
+ - about drivers/fmc/fmc-trivial.ko
+
+fmc-write-eeprom.txt
+ - about drivers/fmc/fmc-write-eeprom.ko
+
+fmc-chardev.txt
+ - about drivers/fmc/fmc-chardev.ko
diff --git a/Documentation/fmc/API.txt b/Documentation/fmc/API.txt
new file mode 100644
index 000000000..06b06b92c
--- /dev/null
+++ b/Documentation/fmc/API.txt
@@ -0,0 +1,47 @@
+Functions Exported by fmc.ko
+****************************
+
+The FMC core exports the usual 4 functions that are needed for a bus to
+work, and a few more:
+
+ int fmc_driver_register(struct fmc_driver *drv);
+ void fmc_driver_unregister(struct fmc_driver *drv);
+ int fmc_device_register(struct fmc_device *fmc);
+ void fmc_device_unregister(struct fmc_device *fmc);
+
+ int fmc_device_register_n(struct fmc_device **fmc, int n);
+ void fmc_device_unregister_n(struct fmc_device **fmc, int n);
+
+ uint32_t fmc_readl(struct fmc_device *fmc, int offset);
+ void fmc_writel(struct fmc_device *fmc, uint32_t val, int off);
+ void *fmc_get_drvdata(struct fmc_device *fmc);
+ void fmc_set_drvdata(struct fmc_device *fmc, void *data);
+
+ int fmc_reprogram(struct fmc_device *f, struct fmc_driver *d, char *gw,
+ int sdb_entry);
+
+The data structure that describe a device is detailed in *note FMC
+Device::, the one that describes a driver is detailed in *note FMC
+Driver::. Please note that structures of type fmc_device must be
+allocated by the caller, but must not be released after unregistering.
+The fmc-bus itself takes care of releasing the structure when their use
+count reaches zero - actually, the device model does that in lieu of us.
+
+The functions to register and unregister n devices are meant to be used
+by carriers that host more than one mezzanine. The devices must all be
+registered at the same time because if the FPGA is reprogrammed, all
+devices in the array are affected. Usually, the driver matching the
+first device will reprogram the FPGA, so other devices must know they
+are already driven by a reprogrammed FPGA.
+
+If a carrier hosts slots that are driven by different FPGA devices, it
+should register as a group only mezzanines that are driven by the same
+FPGA, for the reason outlined above.
+
+Finally, the fmc_reprogram function calls the reprogram method (see
+*note The API Offered by Carriers:: and also scans the memory area for
+an SDB tree. You can pass -1 as sdb_entry to disable such scan.
+Otherwise, the function fails if no tree is found at the specified
+entry point. The function is meant to factorize common code, and by
+the time you read this it is already used by the spec-sw and fine-delay
+modules.
diff --git a/Documentation/fmc/FMC-and-SDB.txt b/Documentation/fmc/FMC-and-SDB.txt
new file mode 100644
index 000000000..fa14e0b24
--- /dev/null
+++ b/Documentation/fmc/FMC-and-SDB.txt
@@ -0,0 +1,88 @@
+
+FMC (FPGA Mezzanine Card) is the standard we use for our I/O devices,
+in the context of White Rabbit and related hardware.
+
+In our I/O environments we need to write drivers for each mezzanine
+card, and such drivers must work regardless of the carrier being used.
+To achieve this, we abstract the FMC interface.
+
+We have a carrier for PCI-E called SPEC and one for VME called SVEC,
+but more are planned. Also, we support stand-alone devices (usually
+plugged on a SPEC card), controlled through Etherbone, developed by GSI.
+
+Code and documentation for the FMC bus was born as part of the spec-sw
+project, but now it lives in its own project. Other projects, i.e.
+software support for the various carriers, should include this as a
+submodule.
+
+The most up to date version of code and documentation is always
+available from the repository you can clone from:
+
+ git://ohwr.org/fmc-projects/fmc-bus.git (read-only)
+ git@ohwr.org:fmc-projects/fmc-bus.git (read-write for developers)
+
+Selected versions of the documentation, as well as complete tar
+archives for selected revisions are placed to the Files section of the
+project: `http://www.ohwr.org/projects/fmc-bus/files'
+
+
+What is FMC
+***********
+
+FMC, as said, stands for "FPGA Mezzanine Card". It is a standard
+developed by the VME consortium called VITA (VMEbus International Trade
+Association and ratified by ANSI, the American National Standard
+Institute. The official documentation is called "ANSI-VITA 57.1".
+
+The FMC card is an almost square PCB, around 70x75 millimeters, that is
+called mezzanine in this document. It usually lives plugged into
+another PCB for power supply and control; such bigger circuit board is
+called carrier from now on, and a single carrier may host more than one
+mezzanine.
+
+In the typical application the mezzanine is mostly analog while the
+carrier is mostly digital, and hosts an FPGA that must be configured to
+match the specific mezzanine and the desired application. Thus, you may
+need to load different FPGA images to drive different instances of the
+same mezzanine.
+
+FMC, as such, is not a bus in the usual meaning of the term, because
+most carriers have only one connector, and carriers with several
+connectors have completely separate electrical connections to them.
+This package, however, implements a bus as a software abstraction.
+
+
+What is SDB
+***********
+
+SDB (Self Describing Bus) is a set of data structures that we use for
+enumerating the internal structure of an FPGA image. We also use it as
+a filesystem inside the FMC EEPROM.
+
+SDB is not mandatory for use of this FMC kernel bus, but if you have SDB
+this package can make good use of it. SDB itself is developed in the
+fpga-config-space OHWR project. The link to the repository is
+`git://ohwr.org/hdl-core-lib/fpga-config-space.git' and what is used in
+this project lives in the sdbfs subdirectory in there.
+
+SDB support for FMC is described in *note FMC Identification:: and
+*note SDB Support::
+
+
+SDB Support
+***********
+
+The fmc.ko bus driver exports a few functions to help drivers taking
+advantage of the SDB information that may be present in your own FPGA
+memory image.
+
+The module exports the following functions, in the special header
+<linux/fmc-sdb.h>. The linux/ prefix in the name is there because we
+plan to submit it upstream in the future, and don't want to force
+changes on our drivers if that happens.
+
+ int fmc_scan_sdb_tree(struct fmc_device *fmc, unsigned long address);
+ void fmc_show_sdb_tree(struct fmc_device *fmc);
+ signed long fmc_find_sdb_device(struct sdb_array *tree, uint64_t vendor,
+ uint32_t device, unsigned long *sz);
+ int fmc_free_sdb_tree(struct fmc_device *fmc);
diff --git a/Documentation/fmc/carrier.txt b/Documentation/fmc/carrier.txt
new file mode 100644
index 000000000..5e4f1dd3e
--- /dev/null
+++ b/Documentation/fmc/carrier.txt
@@ -0,0 +1,311 @@
+FMC Device
+**********
+
+Within the Linux bus framework, the FMC device is created and
+registered by the carrier driver. For example, the PCI driver for the
+SPEC card fills a data structure for each SPEC that it drives, and
+registers an associated FMC device for each card. The SVEC driver can
+do exactly the same for the VME carrier (actually, it should do it
+twice, because the SVEC carries two FMC mezzanines). Similarly, an
+Etherbone driver will be able to register its own FMC devices, offering
+communication primitives through frame exchange.
+
+The contents of the EEPROM within the FMC are used for identification
+purposes, i.e. for matching the device with its own driver. For this
+reason the device structure includes a complete copy of the EEPROM
+(actually, the carrier driver may choose whether or not to return it -
+for example we most likely won't have the whole EEPROM available for
+Etherbone devices.
+
+The following listing shows the current structure defining a device.
+Please note that all the machinery is in place but some details may
+still change in the future. For this reason, there is a version field
+at the beginning of the structure. As usual, the minor number will
+change for compatible changes (like a new flag) and the major number
+will increase when an incompatible change happens (for example, a
+change in layout of some fmc data structures). Device writers should
+just set it to the value FMC_VERSION, and be ready to get back -EINVAL
+at registration time.
+
+ struct fmc_device {
+ unsigned long version;
+ unsigned long flags;
+ struct module *owner; /* char device must pin it */
+ struct fmc_fru_id id; /* for EEPROM-based match */
+ struct fmc_operations *op; /* carrier-provided */
+ int irq; /* according to host bus. 0 == none */
+ int eeprom_len; /* Usually 8kB, may be less */
+ int eeprom_addr; /* 0x50, 0x52 etc */
+ uint8_t *eeprom; /* Full contents or leading part */
+ char *carrier_name; /* "SPEC" or similar, for special use */
+ void *carrier_data; /* "struct spec *" or equivalent */
+ __iomem void *fpga_base; /* May be NULL (Etherbone) */
+ __iomem void *slot_base; /* Set by the driver */
+ struct fmc_device **devarray; /* Allocated by the bus */
+ int slot_id; /* Index in the slot array */
+ int nr_slots; /* Number of slots in this carrier */
+ unsigned long memlen; /* Used for the char device */
+ struct device dev; /* For Linux use */
+ struct device *hwdev; /* The underlying hardware device */
+ unsigned long sdbfs_entry;
+ struct sdb_array *sdb;
+ uint32_t device_id; /* Filled by the device */
+ char *mezzanine_name; /* Defaults to ``fmc'' */
+ void *mezzanine_data;
+ };
+
+The meaning of most fields is summarized in the code comment above.
+
+The following fields must be filled by the carrier driver before
+registration:
+
+ * version: must be set to FMC_VERSION.
+
+ * owner: set to MODULE_OWNER.
+
+ * op: the operations to act on the device.
+
+ * irq: number for the mezzanine; may be zero.
+
+ * eeprom_len: length of the following array.
+
+ * eeprom_addr: 0x50 for first mezzanine and so on.
+
+ * eeprom: the full content of the I2C EEPROM.
+
+ * carrier_name.
+
+ * carrier_data: a unique pointer for the carrier.
+
+ * fpga_base: the I/O memory address (may be NULL).
+
+ * slot_id: the index of this slot (starting from zero).
+
+ * memlen: if fpga_base is valid, the length of I/O memory.
+
+ * hwdev: to be used in some dev_err() calls.
+
+ * device_id: a slot-specific unique integer number.
+
+
+Please note that the carrier should read its own EEPROM memory before
+registering the device, as well as fill all other fields listed above.
+
+The following fields should not be assigned, because they are filled
+later by either the bus or the device driver:
+
+ * flags.
+
+ * fru_id: filled by the bus, parsing the eeprom.
+
+ * slot_base: filled and used by the driver, if useful to it.
+
+ * devarray: an array og all mezzanines driven by a singe FPGA.
+
+ * nr_slots: set by the core at registration time.
+
+ * dev: used by Linux.
+
+ * sdb: FPGA contents, scanned according to driver's directions.
+
+ * sdbfs_entry: SDB entry point in EEPROM: autodetected.
+
+ * mezzanine_data: available for the driver.
+
+ * mezzanine_name: filled by fmc-bus during identification.
+
+
+Note: mezzanine_data may be redundant, because Linux offers the drvdata
+approach, so the field may be removed in later versions of this bus
+implementation.
+
+As I write this, she SPEC carrier is already completely functional in
+the fmc-bus environment, and is a good reference to look at.
+
+
+The API Offered by Carriers
+===========================
+
+The carrier provides a number of methods by means of the
+`fmc_operations' structure, which currently is defined like this
+(again, it is a moving target, please refer to the header rather than
+this document):
+
+ struct fmc_operations {
+ uint32_t (*readl)(struct fmc_device *fmc, int offset);
+ void (*writel)(struct fmc_device *fmc, uint32_t value, int offset);
+ int (*reprogram)(struct fmc_device *f, struct fmc_driver *d, char *gw);
+ int (*validate)(struct fmc_device *fmc, struct fmc_driver *drv);
+ int (*irq_request)(struct fmc_device *fmc, irq_handler_t h,
+ char *name, int flags);
+ void (*irq_ack)(struct fmc_device *fmc);
+ int (*irq_free)(struct fmc_device *fmc);
+ int (*gpio_config)(struct fmc_device *fmc, struct fmc_gpio *gpio,
+ int ngpio);
+ int (*read_ee)(struct fmc_device *fmc, int pos, void *d, int l);
+ int (*write_ee)(struct fmc_device *fmc, int pos, const void *d, int l);
+ };
+
+The individual methods perform the following tasks:
+
+`readl'
+`writel'
+ These functions access FPGA registers by whatever means the
+ carrier offers. They are not expected to fail, and most of the time
+ they will just make a memory access to the host bus. If the
+ carrier provides a fpga_base pointer, the driver may use direct
+ access through that pointer. For this reason the header offers the
+ inline functions fmc_readl and fmc_writel that access fpga_base if
+ the respective method is NULL. A driver that wants to be portable
+ and efficient should use fmc_readl and fmc_writel. For Etherbone,
+ or other non-local carriers, error-management is still to be
+ defined.
+
+`validate'
+ Module parameters are used to manage different applications for
+ two or more boards of the same kind. Validation is based on the
+ busid module parameter, if provided, and returns the matching
+ index in the associated array. See *note Module Parameters:: in in
+ doubt. If no match is found, `-ENOENT' is returned; if the user
+ didn't pass `busid=', all devices will pass validation. The value
+ returned by the validate method can be used as index into other
+ parameters (for example, some drivers use the `lm32=' parameter in
+ this way). Such "generic parameters" are documented in *note
+ Module Parameters::, below. The validate method is used by
+ `fmc-trivial.ko', described in *note fmc-trivial::.
+
+`reprogram'
+ The carrier enumerates FMC devices by loading a standard (or
+ golden) FPGA binary that allows EEPROM access. Each driver, then,
+ will need to reprogram the FPGA by calling this function. If the
+ name argument is NULL, the carrier should reprogram the golden
+ binary. If the gateware name has been overridden through module
+ parameters (in a carrier-specific way) the file loaded will match
+ the parameters. Per-device gateware names can be specified using
+ the `gateware=' parameter, see *note Module Parameters::. Note:
+ Clients should call rhe new helper, fmc_reprogram, which both
+ calls this method and parse the SDB tree of the FPGA.
+
+`irq_request'
+`irq_ack'
+`irq_free'
+ Interrupt management is carrier-specific, so it is abstracted as
+ operations. The interrupt number is listed in the device
+ structure, and for the mezzanine driver the number is only
+ informative. The handler will receive the fmc pointer as dev_id;
+ the flags argument is passed to the Linux request_irq function,
+ but fmc-specific flags may be added in the future. You'll most
+ likely want to pass the `IRQF_SHARED' flag.
+
+`gpio_config'
+ The method allows to configure a GPIO pin in the carrier, and read
+ its current value if it is configured as input. See *note The GPIO
+ Abstraction:: for details.
+
+`read_ee'
+`write_ee'
+ Read or write the EEPROM. The functions are expected to be only
+ called before reprogramming and the carrier should refuse them
+ with `ENODEV' after reprogramming. The offset is expected to be
+ within 8kB (the current size), but addresses up to 1MB are
+ reserved to fit bigger I2C devices in the future. Carriers may
+ offer access to other internal flash memories using these same
+ methods: for example the SPEC driver may define that its carrier
+ I2C memory is seen at offset 1M and the internal SPI flash is seen
+ at offset 16M. This multiplexing of several flash memories in the
+ same address space is carrier-specific and should only be used
+ by a driver that has verified the `carrier_name' field.
+
+
+
+The GPIO Abstraction
+====================
+
+Support for GPIO pins in the fmc-bus environment is not very
+straightforward and deserves special discussion.
+
+While the general idea of a carrier-independent driver seems to fly,
+configuration of specific signals within the carrier needs at least
+some knowledge of the carrier itself. For this reason, the specific
+driver can request to configure carrier-specific GPIO pins, numbered
+from 0 to at most 4095. Configuration is performed by passing a
+pointer to an array of struct fmc_gpio items, as well as the length of
+the array. This is the data structure:
+
+ struct fmc_gpio {
+ char *carrier_name;
+ int gpio;
+ int _gpio; /* internal use by the carrier */
+ int mode; /* GPIOF_DIR_OUT etc, from <linux/gpio.h> */
+ int irqmode; /* IRQF_TRIGGER_LOW and so on */
+ };
+
+By specifying a carrier_name for each pin, the driver may access
+different pins in different carriers. The gpio_config method is
+expected to return the number of pins successfully configured, ignoring
+requests for other carriers. However, if no pin is configured (because
+no structure at all refers to the current carrier_name), the operation
+returns an error so the caller will know that it is running under a
+yet-unsupported carrier.
+
+So, for example, a driver that has been developed and tested on both
+the SPEC and the SVEC may request configuration of two different GPIO
+pins, and expect one such configuration to succeed - if none succeeds
+it most likely means that the current carrier is a still-unknown one.
+
+If, however, your GPIO pin has a specific known role, you can pass a
+special number in the gpio field, using one of the following macros:
+
+ #define FMC_GPIO_RAW(x) (x) /* 4096 of them */
+ #define FMC_GPIO_IRQ(x) ((x) + 0x1000) /* 256 of them */
+ #define FMC_GPIO_LED(x) ((x) + 0x1100) /* 256 of them */
+ #define FMC_GPIO_KEY(x) ((x) + 0x1200) /* 256 of them */
+ #define FMC_GPIO_TP(x) ((x) + 0x1300) /* 256 of them */
+ #define FMC_GPIO_USER(x) ((x) + 0x1400) /* 256 of them */
+
+Use of virtual GPIO numbers (anything but FMC_GPIO_RAW) is allowed
+provided the carrier_name field in the data structure is left
+unspecified (NULL). Each carrier is responsible for providing a mapping
+between virtual and physical GPIO numbers. The carrier may then use the
+_gpio field to cache the result of this mapping.
+
+All carriers must map their I/O lines to the sets above starting from
+zero. The SPEC, for example, maps interrupt pins 0 and 1, and test
+points 0 through 3 (even if the test points on the PCB are called
+5,6,7,8).
+
+If, for example, a driver requires a free LED and a test point (for a
+scope probe to be plugged at some point during development) it may ask
+for FMC_GPIO_LED(0) and FMC_GPIO_TP(0). Each carrier will provide
+suitable GPIO pins. Clearly, the person running the drivers will know
+the order used by the specific carrier driver in assigning leds and
+testpoints, so to make a carrier-dependent use of the diagnostic tools.
+
+In theory, some form of autodetection should be possible: a driver like
+the wr-nic (which uses IRQ(1) on the SPEC card) should configure
+IRQ(0), make a test with software-generated interrupts and configure
+IRQ(1) if the test fails. This probing step should be used because even
+if the wr-nic gateware is known to use IRQ1 on the SPEC, the driver
+should be carrier-independent and thus use IRQ(0) as a first bet -
+actually, the knowledge that IRQ0 may fail is carrier-dependent
+information, but using it doesn't make the driver unsuitable for other
+carriers.
+
+The return value of gpio_config is defined as follows:
+
+ * If no pin in the array can be used by the carrier, `-ENODEV'.
+
+ * If at least one virtual GPIO number cannot be mapped, `-ENOENT'.
+
+ * On success, 0 or positive. The value returned is the number of
+ high input bits (if no input is configured, the value for success
+ is 0).
+
+While I admit the procedure is not completely straightforward, it
+allows configuration, input and output with a single carrier operation.
+Given the typical use case of FMC devices, GPIO operations are not
+expected to ever by in hot paths, and GPIO access so fare has only been
+used to configure the interrupt pin, mode and polarity. Especially
+reading inputs is not expected to be common. If your device has GPIO
+capabilities in the hot path, you should consider using the kernel's
+GPIO mechanisms.
diff --git a/Documentation/fmc/fmc-chardev.txt b/Documentation/fmc/fmc-chardev.txt
new file mode 100644
index 000000000..d9ccb278e
--- /dev/null
+++ b/Documentation/fmc/fmc-chardev.txt
@@ -0,0 +1,64 @@
+fmc-chardev
+===========
+
+This is a simple generic driver, that allows user access by means of a
+character device (actually, one for each mezzanine it takes hold of).
+
+The char device is created as a misc device. Its name in /dev (as
+created by udev) is the same name as the underlying FMC device. Thus,
+the name can be a silly fmc-0000 look-alike if the device has no
+identifiers nor bus_id, a more specific fmc-0400 if the device has a
+bus-specific address but no associated name, or something like
+fdelay-0400 if the FMC core can rely on both a mezzanine name and a bus
+address.
+
+Currently the driver only supports read and write: you can lseek to the
+desired address and read or write a register.
+
+The driver assumes all registers are 32-bit in size, and only accepts a
+single read or write per system call. However, as a result of Unix read
+and write semantics, users can simply fread or fwrite bigger areas in
+order to dump or store bigger memory areas.
+
+There is currently no support for mmap, user-space interrupt management
+and DMA buffers. They may be added in later versions, if the need
+arises.
+
+The example below shows raw access to a SPEC card programmed with its
+golden FPGA file, that features an SDB structure at offset 256 - i.e.
+64 words. The mezzanine's EEPROM in this case is not programmed, so the
+default name is fmc-<bus><devfn>, and there are two cards in the system:
+
+ spusa.root# insmod fmc-chardev.ko
+ [ 1073.339332] spec 0000:02:00.0: Driver has no ID: matches all
+ [ 1073.345051] spec 0000:02:00.0: Created misc device "fmc-0200"
+ [ 1073.350821] spec 0000:04:00.0: Driver has no ID: matches all
+ [ 1073.356525] spec 0000:04:00.0: Created misc device "fmc-0400"
+ spusa.root# ls -l /dev/fmc*
+ crw------- 1 root root 10, 58 Nov 20 19:23 /dev/fmc-0200
+ crw------- 1 root root 10, 57 Nov 20 19:23 /dev/fmc-0400
+ spusa.root# dd bs=4 skip=64 count=1 if=/dev/fmc-0200 2> /dev/null | od -t x1z
+ 0000000 2d 42 44 53 >-BDS<
+ 0000004
+
+The simple program tools/fmc-mem in this package can access an FMC char
+device and read or write a word or a whole area. Actually, the program
+is not specific to FMC at all, it just uses lseek, read and write.
+
+Its first argument is the device name, the second the offset, the third
+(if any) the value to write and the optional last argument that must
+begin with "+" is the number of bytes to read or write. In case of
+repeated reading data is written to stdout; repeated writes read from
+stdin and the value argument is ignored.
+
+The following examples show reading the SDB magic number and the first
+SDB record from a SPEC device programmed with its golden image:
+
+ spusa.root# ./fmc-mem /dev/fmc-0200 100
+ 5344422d
+ spusa.root# ./fmc-mem /dev/fmc-0200 100 +40 | od -Ax -t x1z
+ 000000 2d 42 44 53 00 01 02 00 00 00 00 00 00 00 00 00 >-BDS............<
+ 000010 00 00 00 00 ff 01 00 00 00 00 00 00 51 06 00 00 >............Q...<
+ 000020 c9 42 a5 e6 02 00 00 00 11 05 12 20 2d 34 42 57 >.B......... -4BW<
+ 000030 73 6f 72 43 72 61 62 73 49 53 47 2d 00 20 20 20 >sorCrabsISG-. <
+ 000040
diff --git a/Documentation/fmc/fmc-fakedev.txt b/Documentation/fmc/fmc-fakedev.txt
new file mode 100644
index 000000000..e85b74a4a
--- /dev/null
+++ b/Documentation/fmc/fmc-fakedev.txt
@@ -0,0 +1,36 @@
+fmc-fakedev
+===========
+
+This package includes a software-only device, called fmc-fakedev, which
+is able to register up to 4 mezzanines (by default it registers one).
+Unlike the SPEC driver, which creates an FMC device for each PCI cards
+it manages, this module creates a single instance of its set of
+mezzanines.
+
+It is meant as the simplest possible example of how a driver should be
+written, and it includes a fake EEPROM image (built using the tools
+described in *note FMC Identification::),, which by default is
+replicated for each fake mezzanine.
+
+You can also use this device to verify the match algorithms, by asking
+it to test your own EEPROM image. You can provide the image by means of
+the eeprom= module parameter: the new EEPROM image is loaded, as usual,
+by means of the firmware loader. This example shows the defaults and a
+custom EEPROM image:
+
+ spusa.root# insmod fmc-fakedev.ko
+ [ 99.971247] fake-fmc-carrier: mezzanine 0
+ [ 99.975393] Manufacturer: fake-vendor
+ [ 99.979624] Product name: fake-design-for-testing
+ spusa.root# rmmod fmc-fakedev
+ spusa.root# insmod fmc-fakedev.ko eeprom=fdelay-eeprom.bin
+ [ 121.447464] fake-fmc-carrier: Mezzanine 0: eeprom "fdelay-eeprom.bin"
+ [ 121.462725] fake-fmc-carrier: mezzanine 0
+ [ 121.466858] Manufacturer: CERN
+ [ 121.470477] Product name: FmcDelay1ns4cha
+ spusa.root# rmmod fmc-fakedev
+
+After loading the device, you can use the write_ee method do modify its
+own internal fake EEPROM: whenever the image is overwritten starting at
+offset 0, the module will unregister and register again the FMC device.
+This is shown in fmc-write-eeprom.txt
diff --git a/Documentation/fmc/fmc-trivial.txt b/Documentation/fmc/fmc-trivial.txt
new file mode 100644
index 000000000..d1910bc67
--- /dev/null
+++ b/Documentation/fmc/fmc-trivial.txt
@@ -0,0 +1,17 @@
+fmc-trivial
+===========
+
+The simple module fmc-trivial is just a simple client that registers an
+interrupt handler. I used it to verify the basic mechanism of the FMC
+bus and how interrupts worked.
+
+The module implements the generic FMC parameters, so it can program a
+different gateware file in each card. The whole list of parameters it
+accepts are:
+
+`busid='
+`gateware='
+ Generic parameters. See mezzanine.txt
+
+
+This driver is worth reading, in my opinion.
diff --git a/Documentation/fmc/fmc-write-eeprom.txt b/Documentation/fmc/fmc-write-eeprom.txt
new file mode 100644
index 000000000..e0a971215
--- /dev/null
+++ b/Documentation/fmc/fmc-write-eeprom.txt
@@ -0,0 +1,98 @@
+fmc-write-eeprom
+================
+
+This module is designed to load a binary file from /lib/firmware and to
+write it to the internal EEPROM of the mezzanine card. This driver uses
+the `busid' generic parameter.
+
+Overwriting the EEPROM is not something you should do daily, and it is
+expected to only happen during manufacturing. For this reason, the
+module makes it unlikely for the random user to change a working EEPROM.
+
+However, since the EEPROM may include application-specific information
+other than the identification, later versions of this packages added
+write-support through sysfs. See *note Accessing the EEPROM::.
+
+To avoid damaging the EEPROM content, the module takes the following
+measures:
+
+ * It accepts a `file=' argument (within /lib/firmware) and if no
+ such argument is received, it doesn't write anything to EEPROM
+ (i.e. there is no default file name).
+
+ * If the file name ends with `.bin' it is written verbatim starting
+ at offset 0.
+
+ * If the file name ends with `.tlv' it is interpreted as
+ type-length-value (i.e., it allows writev(2)-like operation).
+
+ * If the file name doesn't match any of the patterns above, it is
+ ignored and no write is performed.
+
+ * Only cards listed with `busid=' are written to. If no busid is
+ specified, no programming is done (and the probe function of the
+ driver will fail).
+
+
+Each TLV tuple is formatted in this way: the header is 5 bytes,
+followed by data. The first byte is `w' for write, the next two bytes
+represent the address, in little-endian byte order, and the next two
+represent the data length, in little-endian order. The length does not
+include the header (it is the actual number of bytes to be written).
+
+This is a real example: that writes 5 bytes at position 0x110:
+
+ spusa.root# od -t x1 -Ax /lib/firmware/try.tlv
+ 000000 77 10 01 05 00 30 31 32 33 34
+ 00000a
+ spusa.root# insmod /tmp/fmc-write-eeprom.ko busid=0x0200 file=try.tlv
+ [19983.391498] spec 0000:03:00.0: write 5 bytes at 0x0110
+ [19983.414615] spec 0000:03:00.0: write_eeprom: success
+
+Please note that you'll most likely want to use SDBFS to build your
+EEPROM image, at least if your mezzanines are being used in the White
+Rabbit environment. For this reason the TLV format is not expected to
+be used much and is not expected to be developed further.
+
+If you want to try reflashing fake EEPROM devices, you can use the
+fmc-fakedev.ko module (see *note fmc-fakedev::). Whenever you change
+the image starting at offset 0, it will deregister and register again
+after two seconds. Please note, however, that if fmc-write-eeprom is
+still loaded, the system will associate it to the new device, which
+will be reprogrammed and thus will be unloaded after two seconds. The
+following example removes the module after it reflashed fakedev the
+first time.
+
+ spusa.root# insmod fmc-fakedev.ko
+ [ 72.984733] fake-fmc: Manufacturer: fake-vendor
+ [ 72.989434] fake-fmc: Product name: fake-design-for-testing
+ spusa.root# insmod fmc-write-eeprom.ko busid=0 file=fdelay-eeprom.bin; \
+ rmmod fmc-write-eeprom
+ [ 130.874098] fake-fmc: Matching a generic driver (no ID)
+ [ 130.887845] fake-fmc: programming 6155 bytes
+ [ 130.894567] fake-fmc: write_eeprom: success
+ [ 132.895794] fake-fmc: Manufacturer: CERN
+ [ 132.899872] fake-fmc: Product name: FmcDelay1ns4cha
+
+
+Accessing the EEPROM
+=====================
+
+The bus creates a sysfs binary file called eeprom for each mezzanine it
+knows about:
+
+ spusa.root# cd /sys/bus/fmc/devices; ls -l */eeprom
+ -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcAdc100m14b4cha-0800/eeprom
+ -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDelay1ns4cha-0200/eeprom
+ -r--r--r-- 1 root root 8192 Feb 21 12:30 FmcDio5cha-0400/eeprom
+
+Everybody can read the files and the superuser can also modify it, but
+the operation may on the carrier driver, if the carrier is unable to
+access the I2C bus. For example, the spec driver can access the bus
+only with its golden gateware: after a mezzanine driver reprogrammed
+the FPGA with a custom circuit, the carrier is unable to access the
+EEPROM and returns ENOTSUPP.
+
+An alternative way to write the EEPROM is the mezzanine driver
+fmc-write-eeprom (See *note fmc-write-eeprom::), but the procedure is
+more complex.
diff --git a/Documentation/fmc/identifiers.txt b/Documentation/fmc/identifiers.txt
new file mode 100644
index 000000000..3bb577ff0
--- /dev/null
+++ b/Documentation/fmc/identifiers.txt
@@ -0,0 +1,168 @@
+FMC Identification
+******************
+
+The FMC standard requires every compliant mezzanine to carry
+identification information in an I2C EEPROM. The information must be
+laid out according to the "IPMI Platform Management FRU Information",
+where IPMI is a lie I'd better not expand, and FRU means "Field
+Replaceable Unit".
+
+The FRU information is an intricate unreadable binary blob that must
+live at offset 0 of the EEPROM, and typically extends for a few hundred
+bytes. The standard allows the application to use all the remaining
+storage area of the EEPROM as it wants.
+
+This chapter explains how to create your own EEPROM image and how to
+write it in your mezzanine, as well as how devices and drivers are
+paired at run time. EEPROM programming uses tools that are part of this
+package and SDB (part of the fpga-config-space package).
+
+The first sections are only interesting for manufacturers who need to
+write the EEPROM. If you are just a software developer writing an FMC
+device or driver, you may jump straight to *note SDB Support::.
+
+
+Building the FRU Structure
+==========================
+
+If you want to know the internals of the FRU structure and despair, you
+can retrieve the document from
+`http://download.intel.com/design/servers/ipmi/FRU1011.pdf' . The
+standard is awful and difficult without reason, so we only support the
+minimum mandatory subset - we create a simple structure and parse it
+back at run time, but we are not able to either generate or parse more
+arcane features like non-english languages and 6-bit text. If you need
+more items of the FRU standard for your boards, please submit patches.
+
+This package includes the Python script that Matthieu Cattin wrote to
+generate the FRU binary blob, based on an helper libipmi by Manohar
+Vanga and Matthieu himself. I changed the test script to receive
+parameters from the command line or from the environment (the command
+line takes precedence)
+
+To make a long story short, in order to build a standard-compliant
+binary file to be burned in your EEPROM, you need the following items:
+
+ Environment Opt Official Name Default
+---------------------------------------------------------------------
+ FRU_VENDOR -v "Board Manufacturer" fmc-example
+ FRU_NAME -n "Board Product Name" mezzanine
+ FRU_SERIAL -s `Board Serial Number" 0001
+ FRU_PART -p "Board Part Number" sample-part
+ FRU_OUTPUT -o not applicable /dev/stdout
+
+The "Official Name" above is what you find in the FRU official
+documentation, chapter 11, page 7 ("Board Info Area Format"). The
+output option is used to save the generated binary to a specific file
+name instead of stdout.
+
+You can pass the items to the FRU generator either in the environment
+or on the command line. This package has currently no support for
+specifying power consumption or such stuff, but I plan to add it as
+soon as I find some time for that.
+
+FIXME: consumption etc for FRU are here or in PTS?
+
+The following example creates a binary image for a specific board:
+
+ ./tools/fru-generator -v CERN -n FmcAdc100m14b4cha \
+ -s HCCFFIA___-CR000003 -p EDA-02063-V5-0 > eeprom.bin
+
+The following example shows a script that builds several binary EEPROM
+images for a series of boards, changing the serial number for each of
+them. The script uses a mix of environment variables and command line
+options, and uses the same string patterns shown above.
+
+ #!/bin/sh
+
+ export FRU_VENDOR="CERN"
+ export FRU_NAME="FmcAdc100m14b4cha"
+ export FRU_PART="EDA-02063-V5-0"
+
+ serial="HCCFFIA___-CR"
+
+ for number in $(seq 1 50); do
+ # build number-string "ns"
+ ns="$(printf %06d $number)"
+ ./fru-generator -s "${serial}${ns}" > eeprom-${ns}.bin
+ done
+
+
+Using SDB-FS in the EEPROM
+==========================
+
+If you want to use SDB as a filesystem in the EEPROM device within the
+mezzanine, you should create one such filesystem using gensdbfs, from
+the fpga-config-space package on OHWR.
+
+By using an SBD filesystem you can cluster several files in a single
+EEPROM, so both the host system and a soft-core running in the FPGA (if
+any) can access extra production-time information.
+
+We chose to use SDB as a storage filesystem because the format is very
+simple, and both the host system and the soft-core will likely already
+include support code for such format. The SDB library offered by the
+fpga-config-space is less than 1kB under LM32, so it proves quite up to
+the task.
+
+The SDB entry point (which acts as a directory listing) cannot live at
+offset zero in the flash device, because the FRU information must live
+there. To avoid wasting precious storage space while still allowing
+for more-than-minimal FRU structures, the fmc.ko will look for the SDB
+record at address 256, 512 and 1024.
+
+In order to generate the complete EEPROM image you'll need a
+configuration file for gensdbfs: you tell the program where to place
+the sdb entry point, and you must force the FRU data file to be placed
+at the beginning of the storage device. If needed, you can also place
+other files at a special offset (we sometimes do it for backward
+compatibility with drivers we wrote before implementing SDB for flash
+memory).
+
+The directory tools/sdbfs of this package includes a well-commented
+example that you may want to use as a starting point (the comments are
+in the file called -SDB-CONFIG-). Reading documentation for gensdbfs
+is a suggested first step anyways.
+
+This package (generic FMC bus support) only accesses two files in the
+EEPROM: the FRU information, at offset zero, with a suggested filename
+of IPMI-FRU and the short name for the mezzanine, in a file called
+name. The IPMI-FRU name is not mandatory, but a strongly suggested
+choice; the name filename is mandatory, because this is the preferred
+short name used by the FMC core. For example, a name of "fdelay" may
+supplement a Product Name like "FmcDelay1ns4cha" - exactly as
+demonstrated in `tools/sdbfs'.
+
+Note: SDB access to flash memory is not yet supported, so the short
+name currently in use is just the "Product Name" FRU string.
+
+The example in tools/sdbfs includes an extra file, that is needed by
+the fine-delay driver, and must live at a known address of 0x1800. By
+running gensdbfs on that directory you can output your binary EEPROM
+image (here below spusa$ is the shell prompt):
+
+ spusa$ ../fru-generator -v CERN -n FmcDelay1ns4cha -s proto-0 \
+ -p EDA-02267-V3 > IPMI-FRU
+ spusa$ ls -l
+ total 16
+ -rw-rw-r-- 1 rubini staff 975 Nov 19 18:08 --SDB-CONFIG--
+ -rw-rw-r-- 1 rubini staff 216 Nov 19 18:13 IPMI-FRU
+ -rw-rw-r-- 1 rubini staff 11 Nov 19 18:04 fd-calib
+ -rw-rw-r-- 1 rubini staff 7 Nov 19 18:04 name
+ spusa$ sudo gensdbfs . /lib/firmware/fdelay-eeprom.bin
+ spusa$ sdb-read -l -e 0x100 /lib/firmware/fdelay-eeprom.bin
+ /home/rubini/wip/sdbfs/userspace/sdb-read: listing format is to be defined
+ 46696c6544617461:2e202020 00000100-000018ff .
+ 46696c6544617461:6e616d65 00000200-00000206 name
+ 46696c6544617461:66642d63 00001800-000018ff fd-calib
+ 46696c6544617461:49504d49 00000000-000000d7 IPMI-FRU
+ spusa$ ../fru-dump /lib/firmware/fdelay-eeprom.bin
+ /lib/firmware/fdelay-eeprom.bin: manufacturer: CERN
+ /lib/firmware/fdelay-eeprom.bin: product-name: FmcDelay1ns4cha
+ /lib/firmware/fdelay-eeprom.bin: serial-number: proto-0
+ /lib/firmware/fdelay-eeprom.bin: part-number: EDA-02267-V3
+
+As expected, the output file is both a proper sdbfs object and an IPMI
+FRU information blob. The fd-calib file lives at offset 0x1800 and is
+over-allocated to 256 bytes, according to the configuration file for
+gensdbfs.
diff --git a/Documentation/fmc/mezzanine.txt b/Documentation/fmc/mezzanine.txt
new file mode 100644
index 000000000..87910dbfc
--- /dev/null
+++ b/Documentation/fmc/mezzanine.txt
@@ -0,0 +1,123 @@
+FMC Driver
+**********
+
+An FMC driver is concerned with the specific mezzanine and associated
+gateware. As such, it is expected to be independent of the carrier
+being used: it will perform I/O accesses only by means of
+carrier-provided functions.
+
+The matching between device and driver is based on the content of the
+EEPROM (as mandated by the FMC standard) or by the actual cores
+configured in the FPGA; the latter technique is used when the FPGA is
+already programmed when the device is registered to the bus core.
+
+In some special cases it is possible for a driver to directly access
+FPGA registers, by means of the `fpga_base' field of the device
+structure. This may be needed for high-bandwidth peripherals like fast
+ADC cards. If the device module registered a remote device (for example
+by means of Etherbone), the `fpga_base' pointer will be NULL.
+Therefore, drivers must be ready to deal with NULL base pointers, and
+fail gracefully. Most driver, however, are not expected to access the
+pointer directly but run fmc_readl and fmc_writel instead, which will
+work in any case.
+
+In even more special cases, the driver may access carrier-specific
+functionality: the `carrier_name' string allows the driver to check
+which is the current carrier and make use of the `carrier_data'
+pointer. We chose to use carrier names rather than numeric identifiers
+for greater flexibility, but also to avoid a central registry within
+the `fmc.h' file - we hope other users will exploit our framework with
+their own carriers. An example use of carrier names is in GPIO setup
+(see *note The GPIO Abstraction::), although the name match is not
+expected to be performed by the driver. If you depend on specific
+carriers, please check the carrier name and fail gracefully if your
+driver finds it is running in a yet-unknown-to-it environment.
+
+
+ID Table
+========
+
+Like most other Linux drivers, and FMC driver must list all the devices
+which it is able to drive. This is usually done by means of a device
+table, but in FMC we can match hardware based either on the contents of
+their EEPROM or on the actual FPGA cores that can be enumerated.
+Therefore, we have two tables of identifiers.
+
+Matching of FRU information depends on two names, the manufacturer (or
+vendor) and the device (see *note FMC Identification::); for
+flexibility during production (i.e. before writing to the EEPROM) the
+bus supports a catch-all driver that specifies NULL strings. For this
+reason, the table is specified as pointer-and-length, not a a
+null-terminated array - the entry with NULL names can be a valid entry.
+
+Matching on FPGA cores depends on two numeric fields: the 64-bit vendor
+number and the 32-bit device number. Support for matching based on
+class is not yet implemented. Each device is expected to be uniquely
+identified by an array of cores (it matches if all of the cores are
+instantiated), and for consistency the list is passed as
+pointer-and-length. Several similar devices can be driven by the same
+driver, and thus the driver specifies and array of such arrays.
+
+The complete set of involved data structures is thus the following:
+
+ struct fmc_fru_id { char *manufacturer; char *product_name; };
+ struct fmc_sdb_one_id { uint64_t vendor; uint32_t device; };
+ struct fmc_sdb_id { struct fmc_sdb_one_id *cores; int cores_nr; };
+
+ struct fmc_device_id {
+ struct fmc_fru_id *fru_id; int fru_id_nr;
+ struct fmc_sdb_id *sdb_id; int sdb_id_nr;
+ };
+
+A better reference, with full explanation, is the <linux/fmc.h> header.
+
+
+Module Parameters
+=================
+
+Most of the FMC drivers need the same set of kernel parameters. This
+package includes support to implement common parameters by means of
+fields in the `fmc_driver' structure and simple macro definitions.
+
+The parameters are carrier-specific, in that they rely on the busid
+concept, that varies among carriers. For the SPEC, the identifier is a
+PCI bus and devfn number, 16 bits wide in total; drivers for other
+carriers will most likely offer something similar but not identical,
+and some code duplication is unavoidable.
+
+This is the list of parameters that are common to several modules to
+see how they are actually used, please look at spec-trivial.c.
+
+`busid='
+ This is an array of integers, listing carrier-specific
+ identification numbers. For PIC, for example, `0x0400' represents
+ bus 4, slot 0. If any such ID is specified, the driver will only
+ accept to drive cards that appear in the list (even if the FMC ID
+ matches). This is accomplished by the validate carrier method.
+
+`gateware='
+ The argument is an array of strings. If no busid= is specified,
+ the first string of gateware= is used for all cards; otherwise the
+ identifiers and gateware names are paired one by one, in the order
+ specified.
+
+`show_sdb='
+ For modules supporting it, this parameter asks to show the SDB
+ internal structure by means of kernel messages. It is disabled by
+ default because those lines tend to hide more important messages,
+ if you look at the system console while loading the drivers.
+ Note: the parameter is being obsoleted, because fmc.ko itself now
+ supports dump_sdb= that applies to every client driver.
+
+
+For example, if you are using the trivial driver to load two different
+gateware files to two different cards, you can use the following
+parameters to load different binaries to the cards, after looking up
+the PCI identifiers. This has been tested with a SPEC carrier.
+
+ insmod fmc-trivial.ko \
+ busid=0x0200,0x0400 \
+ gateware=fmc/fine-delay.bin,fmc/simple-dio.bin
+
+Please note that not all sub-modules support all of those parameters.
+You can use modinfo to check what is supported by each module.
diff --git a/Documentation/fmc/parameters.txt b/Documentation/fmc/parameters.txt
new file mode 100644
index 000000000..59edf088e
--- /dev/null
+++ b/Documentation/fmc/parameters.txt
@@ -0,0 +1,56 @@
+Module Parameters in fmc.ko
+***************************
+
+The core driver receives two module parameters, meant to help debugging
+client modules. Both parameters can be modified by writing to
+/sys/module/fmc/parameters/, because they are used when client drivers
+are devices are registered, not when fmc.ko is loaded.
+
+`dump_eeprom='
+ If not zero, the parameter asks the bus controller to dump the
+ EEPROM of any device that is registered, using printk.
+
+`dump_sdb='
+ If not zero, the parameter prints the SDB tree of every FPGA it is
+ loaded by fmc_reprogram(). If greater than one, it asks to dump
+ the binary content of SDB records. This currently only dumps the
+ top-level SDB array, though.
+
+
+EEPROM dumping avoids repeating lines, since most of the contents is
+usually empty and all bits are one or zero. This is an example of the
+output:
+
+ [ 6625.850480] spec 0000:02:00.0: FPGA programming successful
+ [ 6626.139949] spec 0000:02:00.0: Manufacturer: CERN
+ [ 6626.144666] spec 0000:02:00.0: Product name: FmcDelay1ns4cha
+ [ 6626.150370] FMC: mezzanine 0: 0000:02:00.0 on SPEC
+ [ 6626.155179] FMC: dumping eeprom 0x2000 (8192) bytes
+ [ 6626.160087] 0000: 01 00 00 01 00 0b 00 f3 01 0a 00 a5 85 87 c4 43
+ [ 6626.167069] 0010: 45 52 4e cf 46 6d 63 44 65 6c 61 79 31 6e 73 34
+ [ 6626.174019] 0020: 63 68 61 c7 70 72 6f 74 6f 2d 30 cc 45 44 41 2d
+ [ 6626.180975] 0030: 30 32 32 36 37 2d 56 33 da 32 30 31 32 2d 31 31
+ [...]
+ [ 6626.371366] 0200: 66 64 65 6c 61 79 0a 00 00 00 00 00 00 00 00 00
+ [ 6626.378359] 0210: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ [ 6626.385361] [...]
+ [ 6626.387308] 1800: 70 6c 61 63 65 68 6f 6c 64 65 72 ff ff ff ff ff
+ [ 6626.394259] 1810: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
+ [ 6626.401250] [...]
+
+The dump of SDB looks like the following; the example shows the simple
+golden gateware for the SPEC card, removing the leading timestamps to
+fit the page:
+
+ spec 0000:02:00.0: SDB: 00000651:e6a542c9 WB4-Crossbar-GSI
+ spec 0000:02:00.0: SDB: 0000ce42:ff07fc47 WR-Periph-Syscon (00000000-000000ff)
+ FMC: mezzanine 0: 0000:02:00.0 on SPEC
+ FMC: poor dump of sdb first level:
+ 0000: 53 44 42 2d 00 02 01 00 00 00 00 00 00 00 00 00
+ 0010: 00 00 00 00 00 00 01 ff 00 00 00 00 00 00 06 51
+ 0020: e6 a5 42 c9 00 00 00 02 20 12 05 11 57 42 34 2d
+ 0030: 43 72 6f 73 73 62 61 72 2d 47 53 49 20 20 20 00
+ 0040: 00 00 01 01 00 00 00 07 00 00 00 00 00 00 00 00
+ 0050: 00 00 00 00 00 00 00 ff 00 00 00 00 00 00 ce 42
+ 0060: ff 07 fc 47 00 00 00 01 20 12 03 05 57 52 2d 50
+ 0070: 65 72 69 70 68 2d 53 79 73 63 6f 6e 20 20 20 01
diff --git a/Documentation/frv/README.txt b/Documentation/frv/README.txt
new file mode 100644
index 000000000..a984faa96
--- /dev/null
+++ b/Documentation/frv/README.txt
@@ -0,0 +1,51 @@
+ ================================
+ Fujitsu FR-V LINUX DOCUMENTATION
+ ================================
+
+This directory contains documentation for the Fujitsu FR-V CPU architecture
+port of Linux.
+
+The following documents are available:
+
+ (*) features.txt
+
+ A description of the basic features inherent in this architecture port.
+
+
+ (*) configuring.txt
+
+ A summary of the configuration options particular to this architecture.
+
+
+ (*) booting.txt
+
+ A description of how to boot the kernel image and a summary of the kernel
+ command line options.
+
+
+ (*) gdbstub.txt
+
+ A description of how to debug the kernel using GDB attached by serial
+ port, and a summary of the services available.
+
+
+ (*) mmu-layout.txt
+
+ A description of the virtual and physical memory layout used in the
+ MMU linux kernel, and the registers used to support it.
+
+
+ (*) gdbinit
+
+ An example .gdbinit file for use with GDB. It includes macros for viewing
+ MMU state on the FR451. See mmu-layout.txt for more information.
+
+
+ (*) clock.txt
+
+ A description of the CPU clock scaling interface.
+
+
+ (*) atomic-ops.txt
+
+ A description of how the FR-V kernel's atomic operations work.
diff --git a/Documentation/frv/atomic-ops.txt b/Documentation/frv/atomic-ops.txt
new file mode 100644
index 000000000..96638e9b9
--- /dev/null
+++ b/Documentation/frv/atomic-ops.txt
@@ -0,0 +1,134 @@
+ =====================================
+ FUJITSU FR-V KERNEL ATOMIC OPERATIONS
+ =====================================
+
+On the FR-V CPUs, there is only one atomic Read-Modify-Write operation: the SWAP/SWAPI
+instruction. Unfortunately, this alone can't be used to implement the following operations:
+
+ (*) Atomic add to memory
+
+ (*) Atomic subtract from memory
+
+ (*) Atomic bit modification (set, clear or invert)
+
+ (*) Atomic compare and exchange
+
+On such CPUs, the standard way of emulating such operations in uniprocessor mode is to disable
+interrupts, but on the FR-V CPUs, modifying the PSR takes a lot of clock cycles, and it has to be
+done twice. This means the CPU runs for a relatively long time with interrupts disabled,
+potentially having a great effect on interrupt latency.
+
+
+=============
+NEW ALGORITHM
+=============
+
+To get around this, the following algorithm has been implemented. It operates in a way similar to
+the LL/SC instruction pairs supported on a number of platforms.
+
+ (*) The CCCR.CC3 register is reserved within the kernel to act as an atomic modify abort flag.
+
+ (*) In the exception prologues run on kernel->kernel entry, CCCR.CC3 is set to 0 (Undefined
+ state).
+
+ (*) All atomic operations can then be broken down into the following algorithm:
+
+ (1) Set ICC3.Z to true and set CC3 to True (ORCC/CKEQ/ORCR).
+
+ (2) Load the value currently in the memory to be modified into a register.
+
+ (3) Make changes to the value.
+
+ (4) If CC3 is still True, simultaneously and atomically (by VLIW packing):
+
+ (a) Store the modified value back to memory.
+
+ (b) Set ICC3.Z to false (CORCC on GR29 is sufficient for this - GR29 holds the current
+ task pointer in the kernel, and so is guaranteed to be non-zero).
+
+ (5) If ICC3.Z is still true, go back to step (1).
+
+This works in a non-SMP environment because any interrupt or other exception that happens between
+steps (1) and (4) will set CC3 to the Undefined, thus aborting the store in (4a), and causing the
+condition in ICC3 to remain with the Z flag set, thus causing step (5) to loop back to step (1).
+
+
+This algorithm suffers from two problems:
+
+ (1) The condition CCCR.CC3 is cleared unconditionally by an exception, irrespective of whether or
+ not any changes were made to the target memory location during that exception.
+
+ (2) The branch from step (5) back to step (1) may have to happen more than once until the store
+ manages to take place. In theory, this loop could cycle forever because there are too many
+ interrupts coming in, but it's unlikely.
+
+
+=======
+EXAMPLE
+=======
+
+Taking an example from include/asm-frv/atomic.h:
+
+ static inline int atomic_add_return(int i, atomic_t *v)
+ {
+ unsigned long val;
+
+ asm("0: \n"
+
+It starts by setting ICC3.Z to true for later use, and also transforming that into CC3 being in the
+True state.
+
+ " orcc gr0,gr0,gr0,icc3 \n" <-- (1)
+ " ckeq icc3,cc7 \n" <-- (1)
+
+Then it does the load. Note that the final phase of step (1) is done at the same time as the
+load. The VLIW packing ensures they are done simultaneously. The ".p" on the load must not be
+removed without swapping the order of these two instructions.
+
+ " ld.p %M0,%1 \n" <-- (2)
+ " orcr cc7,cc7,cc3 \n" <-- (1)
+
+Then the proposed modification is generated. Note that the old value can be retained if required
+(such as in test_and_set_bit()).
+
+ " add%I2 %1,%2,%1 \n" <-- (3)
+
+Then it attempts to store the value back, contingent on no exception having cleared CC3 since it
+was set to True.
+
+ " cst.p %1,%M0 ,cc3,#1 \n" <-- (4a)
+
+It simultaneously records the success or failure of the store in ICC3.Z.
+
+ " corcc gr29,gr29,gr0 ,cc3,#1 \n" <-- (4b)
+
+Such that the branch can then be taken if the operation was aborted.
+
+ " beq icc3,#0,0b \n" <-- (5)
+ : "+U"(v->counter), "=&r"(val)
+ : "NPr"(i)
+ : "memory", "cc7", "cc3", "icc3"
+ );
+
+ return val;
+ }
+
+
+=============
+CONFIGURATION
+=============
+
+The atomic ops implementation can be made inline or out-of-line by changing the
+CONFIG_FRV_OUTOFLINE_ATOMIC_OPS configuration variable. Making it out-of-line has a number of
+advantages:
+
+ - The resulting kernel image may be smaller
+ - Debugging is easier as atomic ops can just be stepped over and they can be breakpointed
+
+Keeping it inline also has a number of advantages:
+
+ - The resulting kernel may be Faster
+ - no out-of-line function calls need to be made
+ - the compiler doesn't have half its registers clobbered by making a call
+
+The out-of-line implementations live in arch/frv/lib/atomic-ops.S.
diff --git a/Documentation/frv/booting.txt b/Documentation/frv/booting.txt
new file mode 100644
index 000000000..9bdf4b46e
--- /dev/null
+++ b/Documentation/frv/booting.txt
@@ -0,0 +1,182 @@
+ =========================
+ BOOTING FR-V LINUX KERNEL
+ =========================
+
+======================
+PROVIDING A FILESYSTEM
+======================
+
+First of all, a root filesystem must be made available. This can be done in
+one of two ways:
+
+ (1) NFS Export
+
+ A filesystem should be constructed in a directory on an NFS server that
+ the target board can reach. This directory should then be NFS exported
+ such that the target board can read and write into it as root.
+
+ (2) Flash Filesystem (JFFS2 Recommended)
+
+ In this case, the image must be stored or built up on flash before it
+ can be used. A complete image can be built using the mkfs.jffs2 or
+ similar program and then downloaded and stored into flash by RedBoot.
+
+
+========================
+LOADING THE KERNEL IMAGE
+========================
+
+The kernel will need to be loaded into RAM by RedBoot (or by some alternative
+boot loader) before it can be run. The kernel image (arch/frv/boot/Image) may
+be loaded in one of three ways:
+
+ (1) Load from Flash
+
+ This is the simplest. RedBoot can store an image in the flash (see the
+ RedBoot documentation) and then load it back into RAM. RedBoot keeps
+ track of the load address, entry point and size, so the command to do
+ this is simply:
+
+ fis load linux
+
+ The image is then ready to be executed.
+
+ (2) Load by TFTP
+
+ The following command will download a raw binary kernel image from the
+ default server (as negotiated by BOOTP) and store it into RAM:
+
+ load -b 0x00100000 -r /tftpboot/image.bin
+
+ The image is then ready to be executed.
+
+ (3) Load by Y-Modem
+
+ The following command will download a raw binary kernel image across the
+ serial port that RedBoot is currently using:
+
+ load -m ymodem -b 0x00100000 -r zImage
+
+ The serial client (such as minicom) must then be told to transmit the
+ program by Y-Modem.
+
+ When finished, the image will then be ready to be executed.
+
+
+==================
+BOOTING THE KERNEL
+==================
+
+Boot the image with the following RedBoot command:
+
+ exec -c "<CMDLINE>" 0x00100000
+
+For example:
+
+ exec -c "console=ttySM0,115200 ip=:::::dhcp root=/dev/mtdblock2 rw"
+
+This will start the kernel running. Note that if the GDB-stub is compiled in,
+then the kernel will immediately wait for GDB to connect over serial before
+doing anything else. See the section on kernel debugging with GDB.
+
+The kernel command line <CMDLINE> tells the kernel where its console is and
+how to find its root filesystem. This is made up of the following components,
+separated by spaces:
+
+ (*) console=ttyS<x>[,<baud>[<parity>[<bits>[<flow>]]]]
+
+ This specifies that the system console should output through on-chip
+ serial port <x> (which can be "0" or "1").
+
+ <baud> is a standard baud rate between 1200 and 115200 (default 9600).
+
+ <parity> is a parity setting of "N", "O", "E", "M" or "S" for None, Odd,
+ Even, Mark or Space. "None" is the default.
+
+ <stop> is "7" or "8" for the number of bits per character. "8" is the
+ default.
+
+ <flow> is "r" to use flow control (XCTS on serial port 2 only). The
+ default is to not use flow control.
+
+ For example:
+
+ console=ttyS0,115200
+
+ To use the first on-chip serial port at baud rate 115200, no parity, 8
+ bits, and no flow control.
+
+ (*) root=<xxxx>
+
+ This specifies the device upon which the root filesystem resides. It
+ may be specified by major and minor number, device path, or even
+ partition uuid, if supported. For example:
+
+ /dev/nfs NFS root filesystem
+ /dev/mtdblock3 Fourth RedBoot partition on the System Flash
+ PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=1
+ first partition after the partition with the given UUID
+ 253:0 Device with major 253 and minor 0
+
+ Authoritative information can be found in
+ "Documentation/kernel-parameters.txt".
+
+ (*) rw
+
+ Start with the root filesystem mounted Read/Write.
+
+ The remaining components are all optional:
+
+ (*) ip=<ip>::::<host>:<iface>:<cfg>
+
+ Configure the network interface. If <cfg> is "off" then <ip> should
+ specify the IP address for the network device <iface>. <host> provide
+ the hostname for the device.
+
+ If <cfg> is "bootp" or "dhcp", then all of these parameters will be
+ discovered by consulting a BOOTP or DHCP server.
+
+ For example, the following might be used:
+
+ ip=192.168.73.12::::frv:eth0:off
+
+ This sets the IP address on the VDK motherboard RTL8029 ethernet chipset
+ (eth0) to be 192.168.73.12, and sets the board's hostname to be "frv".
+
+ (*) nfsroot=<server>:<dir>[,v<vers>]
+
+ This is mandatory if "root=/dev/nfs" is given as an option. It tells the
+ kernel the IP address of the NFS server providing its root filesystem,
+ and the pathname on that server of the filesystem.
+
+ The NFS version to use can also be specified. v2 and v3 are supported by
+ Linux.
+
+ For example:
+
+ nfsroot=192.168.73.1:/nfsroot-frv
+
+ (*) profile=1
+
+ Turns on the kernel profiler (accessible through /proc/profile).
+
+ (*) console=gdb0
+
+ This can be used as an alternative to the "console=ttyS..." listed
+ above. I tells the kernel to pass the console output to GDB if the
+ gdbstub is compiled in to the kernel.
+
+ If this is used, then the gdbstub passes the text to GDB, which then
+ simply dumps it to its standard output.
+
+ (*) mem=<xxx>M
+
+ Normally the kernel will work out how much SDRAM it has by reading the
+ SDRAM controller registers. That can be overridden with this
+ option. This allows the kernel to be told that it has <xxx> megabytes of
+ memory available.
+
+ (*) init=<prog> [<arg> [<arg> [<arg> ...]]]
+
+ This tells the kernel what program to run initially. By default this is
+ /sbin/init, but /sbin/sash or /bin/sh are common alternatives.
diff --git a/Documentation/frv/clock.txt b/Documentation/frv/clock.txt
new file mode 100644
index 000000000..c72d350e1
--- /dev/null
+++ b/Documentation/frv/clock.txt
@@ -0,0 +1,65 @@
+Clock scaling
+-------------
+
+The kernel supports scaling of CLCK.CMODE, CLCK.CM and CLKC.P0 clock
+registers. If built with CONFIG_PM and CONFIG_SYSCTL options enabled, four
+extra files will appear in the directory /proc/sys/pm/. Reading these files
+will show:
+
+ p0 -- current value of the P0 bit in CLKC register.
+ cm -- current value of the CM bits in CLKC register.
+ cmode -- current value of the CMODE bits in CLKC register.
+
+On all boards, the 'p0' file should also be writable, and either '1' or '0'
+can be rewritten, to set or clear the CLKC_P0 bit respectively, hence
+controlling whether the resource bus rate clock is halved.
+
+The 'cm' file should also be available on all boards. '0' can be written to it
+to shift the board into High-Speed mode (normal), and '1' can be written to
+shift the board into Medium-Speed mode. Selecting Low-Speed mode is not
+supported by this interface, even though some CPUs do support it.
+
+On the boards with FR405 CPU (i.e. CB60 and CB70), the 'cmode' file is also
+writable, allowing the CPU core speed (and other clock speeds) to be
+controlled from userspace.
+
+
+Determining current and possible settings
+-----------------------------------------
+
+The current state and the available masks can be found in /proc/cpuinfo. For
+example, on the CB70:
+
+ # cat /proc/cpuinfo
+ CPU-Series: fr400
+ CPU-Core: fr405, gr0-31, BE, CCCR
+ CPU: mb93405
+ MMU: Prot
+ FP-Media: fr0-31, Media
+ System: mb93091-cb70, mb93090-mb00
+ PM-Controls: cmode=0xd31f, cm=0x3, p0=0x3, suspend=0x9
+ PM-Status: cmode=3, cm=0, p0=0
+ Clock-In: 50.00 MHz
+ Clock-Core: 300.00 MHz
+ Clock-SDRAM: 100.00 MHz
+ Clock-CBus: 100.00 MHz
+ Clock-Res: 50.00 MHz
+ Clock-Ext: 50.00 MHz
+ Clock-DSU: 25.00 MHz
+ BogoMips: 300.00
+
+And on the PDK, the PM lines look like the following:
+
+ PM-Controls: cm=0x3, p0=0x3, suspend=0x9
+ PM-Status: cmode=9, cm=0, p0=0
+
+The PM-Controls line, if present, will indicate which /proc/sys/pm files can
+be set to what values. The specification values are bitmasks; so, for example,
+"suspend=0x9" indicates that 0 and 3 can be written validly to
+/proc/sys/pm/suspend.
+
+The PM-Controls line will only be present if CONFIG_PM is configured to Y.
+
+The PM-Status line indicates which clock controls are set to which value. If
+the file can be read, then the suspend value must be 0, and so that's not
+included.
diff --git a/Documentation/frv/configuring.txt b/Documentation/frv/configuring.txt
new file mode 100644
index 000000000..36e76a233
--- /dev/null
+++ b/Documentation/frv/configuring.txt
@@ -0,0 +1,125 @@
+ =======================================
+ FUJITSU FR-V LINUX KERNEL CONFIGURATION
+ =======================================
+
+=====================
+CONFIGURATION OPTIONS
+=====================
+
+The most important setting is in the "MMU support options" tab (the first
+presented in the configuration tools available):
+
+ (*) "Kernel Type"
+
+ This options allows selection of normal, MMU-requiring linux, and uClinux
+ (which doesn't require an MMU and doesn't have inter-process protection).
+
+There are a number of settings in the "Processor type and features" section of
+the kernel configuration that need to be considered.
+
+ (*) "CPU"
+
+ The register and instruction sets at the core of the processor. This can
+ only be set to "FR40x/45x/55x" at the moment - but this permits usage of
+ the kernel with MB93091 CB10, CB11, CB30, CB41, CB60, CB70 and CB451
+ CPU boards, and with the MB93093 PDK board.
+
+ (*) "System"
+
+ This option allows a choice of basic system. This governs the peripherals
+ that are expected to be available.
+
+ (*) "Motherboard"
+
+ This specifies the type of motherboard being used, and the peripherals
+ upon it. Currently only "MB93090-MB00" can be set here.
+
+ (*) "Default cache-write mode"
+
+ This controls the initial data cache write management mode. By default
+ Write-Through is selected, but Write-Back (Copy-Back) can also be
+ selected. This can be changed dynamically once the kernel is running (see
+ features.txt).
+
+There are some architecture specific configuration options in the "General
+Setup" section of the kernel configuration too:
+
+ (*) "Reserve memory uncached for (PCI) DMA"
+
+ This requests that a uClinux kernel set aside some memory in an uncached
+ window for the use as consistent DMA memory (mainly for PCI). At least a
+ megabyte will be allocated in this way, possibly more. Any memory so
+ reserved will not be available for normal allocations.
+
+ (*) "Kernel support for ELF-FDPIC binaries"
+
+ This enables the binary-format driver for the new FDPIC ELF binaries that
+ this platform normally uses. These binaries are totally relocatable -
+ their separate sections can relocated independently, allowing them to be
+ shared on uClinux where possible. This should normally be enabled.
+
+ (*) "Kernel image protection"
+
+ This makes the protection register governing access to the core kernel
+ image prohibit access by userspace programs. This option is available on
+ uClinux only.
+
+There are also a number of settings in the "Kernel Hacking" section of the
+kernel configuration especially for debugging a kernel on this
+architecture. See the "gdbstub.txt" file for information about those.
+
+
+======================
+DEFAULT CONFIGURATIONS
+======================
+
+The kernel sources include a number of example default configurations:
+
+ (*) defconfig-mb93091
+
+ Default configuration for the MB93091-VDK with both CPU board and
+ MB93090-MB00 motherboard running uClinux.
+
+
+ (*) defconfig-mb93091-fb
+
+ Default configuration for the MB93091-VDK with CPU board,
+ MB93090-MB00 motherboard, and DAV board running uClinux.
+ Includes framebuffer driver.
+
+
+ (*) defconfig-mb93093
+
+ Default configuration for the MB93093-PDK board running uClinux.
+
+
+ (*) defconfig-cb70-standalone
+
+ Default configuration for the MB93091-VDK with only CB70 CPU board
+ running uClinux. This will use the CB70's DM9000 for network access.
+
+
+ (*) defconfig-mmu
+
+ Default configuration for the MB93091-VDK with both CB451 CPU board and
+ MB93090-MB00 motherboard running MMU linux.
+
+ (*) defconfig-mmu-audio
+
+ Default configuration for the MB93091-VDK with CB451 CPU board, DAV
+ board, and MB93090-MB00 motherboard running MMU linux. Includes
+ audio driver.
+
+ (*) defconfig-mmu-fb
+
+ Default configuration for the MB93091-VDK with CB451 CPU board, DAV
+ board, and MB93090-MB00 motherboard running MMU linux. Includes
+ framebuffer driver.
+
+ (*) defconfig-mmu-standalone
+
+ Default configuration for the MB93091-VDK with only CB451 CPU board
+ running MMU linux.
+
+
+
diff --git a/Documentation/frv/features.txt b/Documentation/frv/features.txt
new file mode 100644
index 000000000..fa20c0e72
--- /dev/null
+++ b/Documentation/frv/features.txt
@@ -0,0 +1,310 @@
+ ===========================
+ FUJITSU FR-V LINUX FEATURES
+ ===========================
+
+This kernel port has a number of features of which the user should be aware:
+
+ (*) Linux and uClinux
+
+ The FR-V architecture port supports both normal MMU linux and uClinux out
+ of the same sources.
+
+
+ (*) CPU support
+
+ Support for the FR401, FR403, FR405, FR451 and FR555 CPUs should work with
+ the same uClinux kernel configuration.
+
+ In normal (MMU) Linux mode, only the FR451 CPU will work as that is the
+ only one with a suitably featured CPU.
+
+ The kernel is written and compiled with the assumption that only the
+ bottom 32 GR registers and no FR registers will be used by the kernel
+ itself, however all extra userspace registers will be saved on context
+ switch. Note that since most CPUs can't support lazy switching, no attempt
+ is made to do lazy register saving where that would be possible (FR555
+ only currently).
+
+
+ (*) Board support
+
+ The board on which the kernel will run can be configured on the "Processor
+ type and features" configuration tab.
+
+ Set the System to "MB93093-PDK" to boot from the MB93093 (FR403) PDK.
+
+ Set the System to "MB93091-VDK" to boot from the CB11, CB30, CB41, CB60,
+ CB70 or CB451 VDK boards. Set the Motherboard setting to "MB93090-MB00" to
+ boot with the standard ATA90590B VDK motherboard, and set it to "None" to
+ boot without any motherboard.
+
+
+ (*) Binary Formats
+
+ The only userspace binary format supported is FDPIC ELF. Normal ELF, FLAT
+ and AOUT binaries are not supported for this architecture.
+
+ FDPIC ELF supports shared library and program interpreter facilities.
+
+
+ (*) Scheduler Speed
+
+ The kernel scheduler runs at 100Hz irrespective of the clock speed on this
+ architecture. This value is set in asm/param.h (see the HZ macro defined
+ there).
+
+
+ (*) Normal (MMU) Linux Memory Layout.
+
+ See mmu-layout.txt in this directory for a description of the normal linux
+ memory layout
+
+ See include/asm-frv/mem-layout.h for constants pertaining to the memory
+ layout.
+
+ See include/asm-frv/mb-regs.h for the constants pertaining to the I/O bus
+ controller configuration.
+
+
+ (*) uClinux Memory Layout
+
+ The memory layout used by the uClinux kernel is as follows:
+
+ 0x00000000 - 0x00000FFF Null pointer catch page
+ 0x20000000 - 0x200FFFFF CS2# [PDK] FPGA
+ 0xC0000000 - 0xCFFFFFFF SDRAM
+ 0xC0000000 Base of Linux kernel image
+ 0xE0000000 - 0xEFFFFFFF CS2# [VDK] SLBUS/PCI window
+ 0xF0000000 - 0xF0FFFFFF CS5# MB93493 CSC area (DAV daughter board)
+ 0xF1000000 - 0xF1FFFFFF CS7# [CB70/CB451] CPU-card PCMCIA port space
+ 0xFC000000 - 0xFC0FFFFF CS1# [VDK] MB86943 config space
+ 0xFC100000 - 0xFC1FFFFF CS6# [CB70/CB451] CPU-card DM9000 NIC space
+ 0xFC100000 - 0xFC1FFFFF CS6# [PDK] AX88796 NIC space
+ 0xFC200000 - 0xFC2FFFFF CS3# MB93493 CSR area (DAV daughter board)
+ 0xFD000000 - 0xFDFFFFFF CS4# [CB70/CB451] CPU-card extra flash space
+ 0xFE000000 - 0xFEFFFFFF Internal CPU peripherals
+ 0xFF000000 - 0xFF1FFFFF CS0# Flash 1
+ 0xFF200000 - 0xFF3FFFFF CS0# Flash 2
+ 0xFFC00000 - 0xFFC0001F CS0# [VDK] FPGA
+
+ The kernel reads the size of the SDRAM from the memory bus controller
+ registers by default.
+
+ The kernel initialisation code (1) adjusts the SDRAM base addresses to
+ move the SDRAM to desired address, (2) moves the kernel image down to the
+ bottom of SDRAM, (3) adjusts the bus controller registers to move I/O
+ windows, and (4) rearranges the protection registers to protect all of
+ this.
+
+ The reasons for doing this are: (1) the page at address 0 should be
+ inaccessible so that NULL pointer errors can be caught; and (2) the bottom
+ three quarters are left unoccupied so that an FR-V CPU with an MMU can use
+ it for virtual userspace mappings.
+
+ See include/asm-frv/mem-layout.h for constants pertaining to the memory
+ layout.
+
+ See include/asm-frv/mb-regs.h for the constants pertaining to the I/O bus
+ controller configuration.
+
+
+ (*) uClinux Memory Protection
+
+ A DAMPR register is used to cover the entire region used for I/O
+ (0xE0000000 - 0xFFFFFFFF). This permits the kernel to make uncached
+ accesses to this region. Userspace is not permitted to access it.
+
+ The DAMPR/IAMPR protection registers not in use for any other purpose are
+ tiled over the top of the SDRAM such that:
+
+ (1) The core kernel image is covered by as small a tile as possible
+ granting only the kernel access to the underlying data, whilst
+ making sure no SDRAM is actually made unavailable by this approach.
+
+ (2) All other tiles are arranged to permit userspace access to the rest
+ of the SDRAM.
+
+ Barring point (1), there is nothing to protect kernel data against
+ userspace damage - but this is uClinux.
+
+
+ (*) Exceptions and Fixups
+
+ Since the FR40x and FR55x CPUs that do not have full MMUs generate
+ imprecise data error exceptions, there are currently no automatic fixup
+ services available in uClinux. This includes misaligned memory access
+ fixups.
+
+ Userspace EFAULT errors can be trapped by issuing a MEMBAR instruction and
+ forcing the fault to happen there.
+
+ On the FR451, however, data exceptions are mostly precise, and so
+ exception fixup handling is implemented as normal.
+
+
+ (*) Userspace Breakpoints
+
+ The ptrace() system call supports the following userspace debugging
+ features:
+
+ (1) Hardware assisted single step.
+
+ (2) Breakpoint via the FR-V "BREAK" instruction.
+
+ (3) Breakpoint via the FR-V "TIRA GR0, #1" instruction.
+
+ (4) Syscall entry/exit trap.
+
+ Each of the above generates a SIGTRAP.
+
+
+ (*) On-Chip Serial Ports
+
+ The FR-V on-chip serial ports are made available as ttyS0 and ttyS1. Note
+ that if the GDB stub is compiled in, ttyS1 will not actually be available
+ as it will be being used for the GDB stub.
+
+ These ports can be made by:
+
+ mknod /dev/ttyS0 c 4 64
+ mknod /dev/ttyS1 c 4 65
+
+
+ (*) Maskable Interrupts
+
+ Level 15 (Non-maskable) interrupts are dealt with by the GDB stub if
+ present, and cause a panic if not. If the GDB stub is present, ttyS1's
+ interrupts are rated at level 15.
+
+ All other interrupts are distributed over the set of available priorities
+ so that no IRQs are shared where possible. The arch interrupt handling
+ routines attempt to disentangle the various sources available through the
+ CPU's own multiplexor, and those on off-CPU peripherals.
+
+
+ (*) Accessing PCI Devices
+
+ Where PCI is available, care must be taken when dealing with drivers that
+ access PCI devices. PCI devices present their data in little-endian form,
+ but the CPU sees it in big-endian form. The macros in asm/io.h try to get
+ this right, but may not under all circumstances...
+
+
+ (*) Ax88796 Ethernet Driver
+
+ The MB93093 PDK board has an Ax88796 ethernet chipset (an NE2000 clone). A
+ driver has been written to deal specifically with this. The driver
+ provides MII services for the card.
+
+ The driver can be configured by running make xconfig, and going to:
+
+ (*) Network device support
+ - turn on "Network device support"
+ (*) Ethernet (10 or 100Mbit)
+ - turn on "Ethernet (10 or 100Mbit)"
+ - turn on "AX88796 NE2000 compatible chipset"
+
+ The driver can be found in:
+
+ drivers/net/ax88796.c
+ include/asm/ax88796.h
+
+
+ (*) WorkRAM Driver
+
+ This driver provides a character device that permits access to the WorkRAM
+ that can be found on the FR451 CPU. Each page is accessible through a
+ separate minor number, thereby permitting each page to have its own
+ filesystem permissions set on the device file.
+
+ The device files should be:
+
+ mknod /dev/frv/workram0 c 240 0
+ mknod /dev/frv/workram1 c 240 1
+ mknod /dev/frv/workram2 c 240 2
+ ...
+
+ The driver will not permit the opening of any device file that does not
+ correspond to at least a partial page of WorkRAM. So the first device file
+ is the only one available on the FR451. If any other CPU is detected, none
+ of the devices will be openable.
+
+ The devices can be accessed with read, write and llseek, and can also be
+ mmapped. If they're mmapped, they will only map at the appropriate
+ 0x7e8nnnnn address on linux and at the 0xfe8nnnnn address on uClinux. If
+ MAP_FIXED is not specified, the appropriate address will be chosen anyway.
+
+ The mappings must be MAP_SHARED not MAP_PRIVATE, and must not be
+ PROT_EXEC. They must also start at file offset 0, and must not be longer
+ than one page in size.
+
+ This driver can be configured by running make xconfig, and going to:
+
+ (*) Character devices
+ - turn on "Fujitsu FR-V CPU WorkRAM support"
+
+
+ (*) Dynamic data cache write mode changing
+
+ It is possible to view and to change the data cache's write mode through
+ the /proc/sys/frv/cache-mode file while the kernel is running. There are
+ two modes available:
+
+ NAME MEANING
+ ===== ==========================================
+ wthru Data cache is in Write-Through mode
+ wback Data cache is in Write-Back/Copy-Back mode
+
+ To read the cache mode:
+
+ # cat /proc/sys/frv/cache-mode
+ wthru
+
+ To change the cache mode:
+
+ # echo wback >/proc/sys/frv/cache-mode
+ # cat /proc/sys/frv/cache-mode
+ wback
+
+
+ (*) MMU Context IDs and Pinning
+
+ On MMU Linux the CPU supports the concept of a context ID in its MMU to
+ make it more efficient (TLB entries are labelled with a context ID to link
+ them to specific tasks).
+
+ Normally once a context ID is allocated, it will remain affixed to a task
+ or CLONE_VM'd group of tasks for as long as it exists. However, since the
+ kernel is capable of supporting more tasks than there are possible ID
+ numbers, the kernel will pass context IDs from one task to another if
+ there are insufficient available.
+
+ The context ID currently in use by a task can be viewed in /proc:
+
+ # grep CXNR /proc/1/status
+ CXNR: 1
+
+ Note that kernel threads do not have a userspace context, and so will not
+ show a CXNR entry in that file.
+
+ Under some circumstances, however, it is desirable to pin a context ID on
+ a process such that the kernel won't pass it on. This can be done by
+ writing the process ID of the target process to a special file:
+
+ # echo 17 >/proc/sys/frv/pin-cxnr
+
+ Reading from the file will then show the context ID pinned.
+
+ # cat /proc/sys/frv/pin-cxnr
+ 4
+
+ The context ID will remain pinned as long as any process is using that
+ context, i.e.: when the all the subscribing processes have exited or
+ exec'd; or when an unpinning request happens:
+
+ # echo 0 >/proc/sys/frv/pin-cxnr
+
+ When there isn't a pinned context, the file shows -1:
+
+ # cat /proc/sys/frv/pin-cxnr
+ -1
diff --git a/Documentation/frv/gdbinit b/Documentation/frv/gdbinit
new file mode 100644
index 000000000..51517b6f3
--- /dev/null
+++ b/Documentation/frv/gdbinit
@@ -0,0 +1,102 @@
+set remotebreak 1
+
+define _amr
+
+printf "AMRx DAMR IAMR \n"
+printf "==== ===================== =====================\n"
+printf "amr0 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x0].L,__debug_mmu.damr[0x0].P,__debug_mmu.iamr[0x0].L,__debug_mmu.iamr[0x0].P
+printf "amr1 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x1].L,__debug_mmu.damr[0x1].P,__debug_mmu.iamr[0x1].L,__debug_mmu.iamr[0x1].P
+printf "amr2 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x2].L,__debug_mmu.damr[0x2].P,__debug_mmu.iamr[0x2].L,__debug_mmu.iamr[0x2].P
+printf "amr3 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x3].L,__debug_mmu.damr[0x3].P,__debug_mmu.iamr[0x3].L,__debug_mmu.iamr[0x3].P
+printf "amr4 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x4].L,__debug_mmu.damr[0x4].P,__debug_mmu.iamr[0x4].L,__debug_mmu.iamr[0x4].P
+printf "amr5 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x5].L,__debug_mmu.damr[0x5].P,__debug_mmu.iamr[0x5].L,__debug_mmu.iamr[0x5].P
+printf "amr6 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x6].L,__debug_mmu.damr[0x6].P,__debug_mmu.iamr[0x6].L,__debug_mmu.iamr[0x6].P
+printf "amr7 : L:%08lx P:%08lx : L:%08lx P:%08lx\n",__debug_mmu.damr[0x7].L,__debug_mmu.damr[0x7].P,__debug_mmu.iamr[0x7].L,__debug_mmu.iamr[0x7].P
+
+printf "amr8 : L:%08lx P:%08lx\n",__debug_mmu.damr[0x8].L,__debug_mmu.damr[0x8].P
+printf "amr9 : L:%08lx P:%08lx\n",__debug_mmu.damr[0x9].L,__debug_mmu.damr[0x9].P
+printf "amr10: L:%08lx P:%08lx\n",__debug_mmu.damr[0xa].L,__debug_mmu.damr[0xa].P
+printf "amr11: L:%08lx P:%08lx\n",__debug_mmu.damr[0xb].L,__debug_mmu.damr[0xb].P
+
+end
+
+
+define _tlb
+printf "tlb[0x00]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x0].L,__debug_mmu.tlb[0x0].P,__debug_mmu.tlb[0x40+0x0].L,__debug_mmu.tlb[0x40+0x0].P
+printf "tlb[0x01]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1].L,__debug_mmu.tlb[0x1].P,__debug_mmu.tlb[0x40+0x1].L,__debug_mmu.tlb[0x40+0x1].P
+printf "tlb[0x02]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2].L,__debug_mmu.tlb[0x2].P,__debug_mmu.tlb[0x40+0x2].L,__debug_mmu.tlb[0x40+0x2].P
+printf "tlb[0x03]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3].L,__debug_mmu.tlb[0x3].P,__debug_mmu.tlb[0x40+0x3].L,__debug_mmu.tlb[0x40+0x3].P
+printf "tlb[0x04]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x4].L,__debug_mmu.tlb[0x4].P,__debug_mmu.tlb[0x40+0x4].L,__debug_mmu.tlb[0x40+0x4].P
+printf "tlb[0x05]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x5].L,__debug_mmu.tlb[0x5].P,__debug_mmu.tlb[0x40+0x5].L,__debug_mmu.tlb[0x40+0x5].P
+printf "tlb[0x06]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x6].L,__debug_mmu.tlb[0x6].P,__debug_mmu.tlb[0x40+0x6].L,__debug_mmu.tlb[0x40+0x6].P
+printf "tlb[0x07]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x7].L,__debug_mmu.tlb[0x7].P,__debug_mmu.tlb[0x40+0x7].L,__debug_mmu.tlb[0x40+0x7].P
+printf "tlb[0x08]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x8].L,__debug_mmu.tlb[0x8].P,__debug_mmu.tlb[0x40+0x8].L,__debug_mmu.tlb[0x40+0x8].P
+printf "tlb[0x09]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x9].L,__debug_mmu.tlb[0x9].P,__debug_mmu.tlb[0x40+0x9].L,__debug_mmu.tlb[0x40+0x9].P
+printf "tlb[0x0a]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0xa].L,__debug_mmu.tlb[0xa].P,__debug_mmu.tlb[0x40+0xa].L,__debug_mmu.tlb[0x40+0xa].P
+printf "tlb[0x0b]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0xb].L,__debug_mmu.tlb[0xb].P,__debug_mmu.tlb[0x40+0xb].L,__debug_mmu.tlb[0x40+0xb].P
+printf "tlb[0x0c]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0xc].L,__debug_mmu.tlb[0xc].P,__debug_mmu.tlb[0x40+0xc].L,__debug_mmu.tlb[0x40+0xc].P
+printf "tlb[0x0d]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0xd].L,__debug_mmu.tlb[0xd].P,__debug_mmu.tlb[0x40+0xd].L,__debug_mmu.tlb[0x40+0xd].P
+printf "tlb[0x0e]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0xe].L,__debug_mmu.tlb[0xe].P,__debug_mmu.tlb[0x40+0xe].L,__debug_mmu.tlb[0x40+0xe].P
+printf "tlb[0x0f]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0xf].L,__debug_mmu.tlb[0xf].P,__debug_mmu.tlb[0x40+0xf].L,__debug_mmu.tlb[0x40+0xf].P
+printf "tlb[0x10]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x10].L,__debug_mmu.tlb[0x10].P,__debug_mmu.tlb[0x40+0x10].L,__debug_mmu.tlb[0x40+0x10].P
+printf "tlb[0x11]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x11].L,__debug_mmu.tlb[0x11].P,__debug_mmu.tlb[0x40+0x11].L,__debug_mmu.tlb[0x40+0x11].P
+printf "tlb[0x12]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x12].L,__debug_mmu.tlb[0x12].P,__debug_mmu.tlb[0x40+0x12].L,__debug_mmu.tlb[0x40+0x12].P
+printf "tlb[0x13]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x13].L,__debug_mmu.tlb[0x13].P,__debug_mmu.tlb[0x40+0x13].L,__debug_mmu.tlb[0x40+0x13].P
+printf "tlb[0x14]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x14].L,__debug_mmu.tlb[0x14].P,__debug_mmu.tlb[0x40+0x14].L,__debug_mmu.tlb[0x40+0x14].P
+printf "tlb[0x15]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x15].L,__debug_mmu.tlb[0x15].P,__debug_mmu.tlb[0x40+0x15].L,__debug_mmu.tlb[0x40+0x15].P
+printf "tlb[0x16]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x16].L,__debug_mmu.tlb[0x16].P,__debug_mmu.tlb[0x40+0x16].L,__debug_mmu.tlb[0x40+0x16].P
+printf "tlb[0x17]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x17].L,__debug_mmu.tlb[0x17].P,__debug_mmu.tlb[0x40+0x17].L,__debug_mmu.tlb[0x40+0x17].P
+printf "tlb[0x18]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x18].L,__debug_mmu.tlb[0x18].P,__debug_mmu.tlb[0x40+0x18].L,__debug_mmu.tlb[0x40+0x18].P
+printf "tlb[0x19]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x19].L,__debug_mmu.tlb[0x19].P,__debug_mmu.tlb[0x40+0x19].L,__debug_mmu.tlb[0x40+0x19].P
+printf "tlb[0x1a]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1a].L,__debug_mmu.tlb[0x1a].P,__debug_mmu.tlb[0x40+0x1a].L,__debug_mmu.tlb[0x40+0x1a].P
+printf "tlb[0x1b]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1b].L,__debug_mmu.tlb[0x1b].P,__debug_mmu.tlb[0x40+0x1b].L,__debug_mmu.tlb[0x40+0x1b].P
+printf "tlb[0x1c]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1c].L,__debug_mmu.tlb[0x1c].P,__debug_mmu.tlb[0x40+0x1c].L,__debug_mmu.tlb[0x40+0x1c].P
+printf "tlb[0x1d]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1d].L,__debug_mmu.tlb[0x1d].P,__debug_mmu.tlb[0x40+0x1d].L,__debug_mmu.tlb[0x40+0x1d].P
+printf "tlb[0x1e]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1e].L,__debug_mmu.tlb[0x1e].P,__debug_mmu.tlb[0x40+0x1e].L,__debug_mmu.tlb[0x40+0x1e].P
+printf "tlb[0x1f]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x1f].L,__debug_mmu.tlb[0x1f].P,__debug_mmu.tlb[0x40+0x1f].L,__debug_mmu.tlb[0x40+0x1f].P
+printf "tlb[0x20]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x20].L,__debug_mmu.tlb[0x20].P,__debug_mmu.tlb[0x40+0x20].L,__debug_mmu.tlb[0x40+0x20].P
+printf "tlb[0x21]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x21].L,__debug_mmu.tlb[0x21].P,__debug_mmu.tlb[0x40+0x21].L,__debug_mmu.tlb[0x40+0x21].P
+printf "tlb[0x22]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x22].L,__debug_mmu.tlb[0x22].P,__debug_mmu.tlb[0x40+0x22].L,__debug_mmu.tlb[0x40+0x22].P
+printf "tlb[0x23]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x23].L,__debug_mmu.tlb[0x23].P,__debug_mmu.tlb[0x40+0x23].L,__debug_mmu.tlb[0x40+0x23].P
+printf "tlb[0x24]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x24].L,__debug_mmu.tlb[0x24].P,__debug_mmu.tlb[0x40+0x24].L,__debug_mmu.tlb[0x40+0x24].P
+printf "tlb[0x25]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x25].L,__debug_mmu.tlb[0x25].P,__debug_mmu.tlb[0x40+0x25].L,__debug_mmu.tlb[0x40+0x25].P
+printf "tlb[0x26]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x26].L,__debug_mmu.tlb[0x26].P,__debug_mmu.tlb[0x40+0x26].L,__debug_mmu.tlb[0x40+0x26].P
+printf "tlb[0x27]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x27].L,__debug_mmu.tlb[0x27].P,__debug_mmu.tlb[0x40+0x27].L,__debug_mmu.tlb[0x40+0x27].P
+printf "tlb[0x28]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x28].L,__debug_mmu.tlb[0x28].P,__debug_mmu.tlb[0x40+0x28].L,__debug_mmu.tlb[0x40+0x28].P
+printf "tlb[0x29]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x29].L,__debug_mmu.tlb[0x29].P,__debug_mmu.tlb[0x40+0x29].L,__debug_mmu.tlb[0x40+0x29].P
+printf "tlb[0x2a]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2a].L,__debug_mmu.tlb[0x2a].P,__debug_mmu.tlb[0x40+0x2a].L,__debug_mmu.tlb[0x40+0x2a].P
+printf "tlb[0x2b]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2b].L,__debug_mmu.tlb[0x2b].P,__debug_mmu.tlb[0x40+0x2b].L,__debug_mmu.tlb[0x40+0x2b].P
+printf "tlb[0x2c]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2c].L,__debug_mmu.tlb[0x2c].P,__debug_mmu.tlb[0x40+0x2c].L,__debug_mmu.tlb[0x40+0x2c].P
+printf "tlb[0x2d]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2d].L,__debug_mmu.tlb[0x2d].P,__debug_mmu.tlb[0x40+0x2d].L,__debug_mmu.tlb[0x40+0x2d].P
+printf "tlb[0x2e]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2e].L,__debug_mmu.tlb[0x2e].P,__debug_mmu.tlb[0x40+0x2e].L,__debug_mmu.tlb[0x40+0x2e].P
+printf "tlb[0x2f]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x2f].L,__debug_mmu.tlb[0x2f].P,__debug_mmu.tlb[0x40+0x2f].L,__debug_mmu.tlb[0x40+0x2f].P
+printf "tlb[0x30]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x30].L,__debug_mmu.tlb[0x30].P,__debug_mmu.tlb[0x40+0x30].L,__debug_mmu.tlb[0x40+0x30].P
+printf "tlb[0x31]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x31].L,__debug_mmu.tlb[0x31].P,__debug_mmu.tlb[0x40+0x31].L,__debug_mmu.tlb[0x40+0x31].P
+printf "tlb[0x32]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x32].L,__debug_mmu.tlb[0x32].P,__debug_mmu.tlb[0x40+0x32].L,__debug_mmu.tlb[0x40+0x32].P
+printf "tlb[0x33]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x33].L,__debug_mmu.tlb[0x33].P,__debug_mmu.tlb[0x40+0x33].L,__debug_mmu.tlb[0x40+0x33].P
+printf "tlb[0x34]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x34].L,__debug_mmu.tlb[0x34].P,__debug_mmu.tlb[0x40+0x34].L,__debug_mmu.tlb[0x40+0x34].P
+printf "tlb[0x35]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x35].L,__debug_mmu.tlb[0x35].P,__debug_mmu.tlb[0x40+0x35].L,__debug_mmu.tlb[0x40+0x35].P
+printf "tlb[0x36]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x36].L,__debug_mmu.tlb[0x36].P,__debug_mmu.tlb[0x40+0x36].L,__debug_mmu.tlb[0x40+0x36].P
+printf "tlb[0x37]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x37].L,__debug_mmu.tlb[0x37].P,__debug_mmu.tlb[0x40+0x37].L,__debug_mmu.tlb[0x40+0x37].P
+printf "tlb[0x38]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x38].L,__debug_mmu.tlb[0x38].P,__debug_mmu.tlb[0x40+0x38].L,__debug_mmu.tlb[0x40+0x38].P
+printf "tlb[0x39]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x39].L,__debug_mmu.tlb[0x39].P,__debug_mmu.tlb[0x40+0x39].L,__debug_mmu.tlb[0x40+0x39].P
+printf "tlb[0x3a]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3a].L,__debug_mmu.tlb[0x3a].P,__debug_mmu.tlb[0x40+0x3a].L,__debug_mmu.tlb[0x40+0x3a].P
+printf "tlb[0x3b]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3b].L,__debug_mmu.tlb[0x3b].P,__debug_mmu.tlb[0x40+0x3b].L,__debug_mmu.tlb[0x40+0x3b].P
+printf "tlb[0x3c]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3c].L,__debug_mmu.tlb[0x3c].P,__debug_mmu.tlb[0x40+0x3c].L,__debug_mmu.tlb[0x40+0x3c].P
+printf "tlb[0x3d]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3d].L,__debug_mmu.tlb[0x3d].P,__debug_mmu.tlb[0x40+0x3d].L,__debug_mmu.tlb[0x40+0x3d].P
+printf "tlb[0x3e]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3e].L,__debug_mmu.tlb[0x3e].P,__debug_mmu.tlb[0x40+0x3e].L,__debug_mmu.tlb[0x40+0x3e].P
+printf "tlb[0x3f]: %08lx %08lx %08lx %08lx\n",__debug_mmu.tlb[0x3f].L,__debug_mmu.tlb[0x3f].P,__debug_mmu.tlb[0x40+0x3f].L,__debug_mmu.tlb[0x40+0x3f].P
+end
+
+
+define _pgd
+p (pgd_t[0x40])*(pgd_t*)(__debug_mmu.damr[0x3].L)
+end
+
+define _ptd_i
+p (pte_t[0x1000])*(pte_t*)(__debug_mmu.damr[0x4].L)
+end
+
+define _ptd_d
+p (pte_t[0x1000])*(pte_t*)(__debug_mmu.damr[0x5].L)
+end
diff --git a/Documentation/frv/gdbstub.txt b/Documentation/frv/gdbstub.txt
new file mode 100644
index 000000000..b92bfd902
--- /dev/null
+++ b/Documentation/frv/gdbstub.txt
@@ -0,0 +1,130 @@
+ ====================
+ DEBUGGING FR-V LINUX
+ ====================
+
+
+The kernel contains a GDB stub that talks GDB remote protocol across a serial
+port. This permits GDB to single step through the kernel, set breakpoints and
+trap exceptions that happen in kernel space and interrupt execution. It also
+permits the NMI interrupt button or serial port events to jump the kernel into
+the debugger.
+
+On the CPUs that have on-chip UARTs (FR400, FR403, FR405, FR555), the
+GDB stub hijacks a serial port for its own purposes, and makes it
+generate level 15 interrupts (NMI). The kernel proper cannot see the serial
+port in question under these conditions.
+
+On the MB93091-VDK CPU boards, the GDB stub uses UART1, which would otherwise
+be /dev/ttyS1. On the MB93093-PDK, the GDB stub uses UART0. Therefore, on the
+PDK there is no externally accessible serial port and the serial port to
+which the touch screen is attached becomes /dev/ttyS0.
+
+Note that the GDB stub runs entirely within CPU debug mode, and so should not
+incur any exceptions or interrupts whilst it is active. In particular, note
+that the clock will lose time since it is implemented in software.
+
+
+==================
+KERNEL PREPARATION
+==================
+
+Firstly, a debuggable kernel must be built. To do this, unpack the kernel tree
+and copy the configuration that you wish to use to .config. Then reconfigure
+the following things on the "Kernel Hacking" tab:
+
+ (*) "Include debugging information"
+
+ Set this to "Y". This causes all C and Assembly files to be compiled
+ to include debugging information.
+
+ (*) "In-kernel GDB stub"
+
+ Set this to "Y". This causes the GDB stub to be compiled into the
+ kernel.
+
+ (*) "Immediate activation"
+
+ Set this to "Y" if you want the GDB stub to activate as soon as possible
+ and wait for GDB to connect. This allows you to start tracing right from
+ the beginning of start_kernel() in init/main.c.
+
+ (*) "Console through GDB stub"
+
+ Set this to "Y" if you wish to be able to use "console=gdb0" on the
+ command line. That tells the kernel to pass system console messages to
+ GDB (which then prints them on its standard output). This is useful when
+ debugging the serial drivers that'd otherwise be used to pass console
+ messages to the outside world.
+
+Then build as usual, download to the board and execute. Note that if
+"Immediate activation" was selected, then the kernel will wait for GDB to
+attach. If not, then the kernel will boot immediately and GDB will have to
+interrupt it or wait for an exception to occur before doing anything with
+the kernel.
+
+
+=========================
+KERNEL DEBUGGING WITH GDB
+=========================
+
+Set the serial port on the computer that's going to run GDB to the appropriate
+baud rate. Assuming the board's debug port is connected to ttyS0/COM1 on the
+computer doing the debugging:
+
+ stty -F /dev/ttyS0 115200
+
+Then start GDB in the base of the kernel tree:
+
+ frv-uclinux-gdb linux [uClinux]
+
+Or:
+
+ frv-uclinux-gdb vmlinux [MMU linux]
+
+When the prompt appears:
+
+ GNU gdb frv-031024
+ Copyright 2003 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "--host=i686-pc-linux-gnu --target=frv-uclinux"...
+ (gdb)
+
+Attach to the board like this:
+
+ (gdb) target remote /dev/ttyS0
+ Remote debugging using /dev/ttyS0
+ start_kernel () at init/main.c:395
+ (gdb)
+
+This should show the appropriate lines from the source too. The kernel can
+then be debugged almost as if it's any other program.
+
+
+===============================
+INTERRUPTING THE RUNNING KERNEL
+===============================
+
+The kernel can be interrupted whilst it is running, causing a jump back to the
+GDB stub and the debugger:
+
+ (*) Pressing Ctrl-C in GDB. This will cause GDB to try and interrupt the
+ kernel by sending an RS232 BREAK over the serial line to the GDB
+ stub. This will (mostly) immediately interrupt the kernel and return it
+ to the debugger.
+
+ (*) Pressing the NMI button on the board will also cause a jump into the
+ debugger.
+
+ (*) Setting a software breakpoint. This sets a break instruction at the
+ desired location which the GDB stub then traps the exception for.
+
+ (*) Setting a hardware breakpoint. The GDB stub is capable of using the IBAR
+ and DBAR registers to assist debugging.
+
+Furthermore, the GDB stub will intercept a number of exceptions automatically
+if they are caused by kernel execution. It will also intercept BUG() macro
+invocation.
+
diff --git a/Documentation/frv/kernel-ABI.txt b/Documentation/frv/kernel-ABI.txt
new file mode 100644
index 000000000..aaa1cec86
--- /dev/null
+++ b/Documentation/frv/kernel-ABI.txt
@@ -0,0 +1,262 @@
+ =================================
+ INTERNAL KERNEL ABI FOR FR-V ARCH
+ =================================
+
+The internal FRV kernel ABI is not quite the same as the userspace ABI. A
+number of the registers are used for special purposed, and the ABI is not
+consistent between modules vs core, and MMU vs no-MMU.
+
+This partly stems from the fact that FRV CPUs do not have a separate
+supervisor stack pointer, and most of them do not have any scratch
+registers, thus requiring at least one general purpose register to be
+clobbered in such an event. Also, within the kernel core, it is possible to
+simply jump or call directly between functions using a relative offset.
+This cannot be extended to modules for the displacement is likely to be too
+far. Thus in modules the address of a function to call must be calculated
+in a register and then used, requiring two extra instructions.
+
+This document has the following sections:
+
+ (*) System call register ABI
+ (*) CPU operating modes
+ (*) Internal kernel-mode register ABI
+ (*) Internal debug-mode register ABI
+ (*) Virtual interrupt handling
+
+
+========================
+SYSTEM CALL REGISTER ABI
+========================
+
+When a system call is made, the following registers are effective:
+
+ REGISTERS CALL RETURN
+ =============== ======================= =======================
+ GR7 System call number Preserved
+ GR8 Syscall arg #1 Return value
+ GR9-GR13 Syscall arg #2-6 Preserved
+
+
+===================
+CPU OPERATING MODES
+===================
+
+The FR-V CPU has three basic operating modes. In order of increasing
+capability:
+
+ (1) User mode.
+
+ Basic userspace running mode.
+
+ (2) Kernel mode.
+
+ Normal kernel mode. There are many additional control registers
+ available that may be accessed in this mode, in addition to all the
+ stuff available to user mode. This has two submodes:
+
+ (a) Exceptions enabled (PSR.T == 1).
+
+ Exceptions will invoke the appropriate normal kernel mode
+ handler. On entry to the handler, the PSR.T bit will be cleared.
+
+ (b) Exceptions disabled (PSR.T == 0).
+
+ No exceptions or interrupts may happen. Any mandatory exceptions
+ will cause the CPU to halt unless the CPU is told to jump into
+ debug mode instead.
+
+ (3) Debug mode.
+
+ No exceptions may happen in this mode. Memory protection and
+ management exceptions will be flagged for later consideration, but
+ the exception handler won't be invoked. Debugging traps such as
+ hardware breakpoints and watchpoints will be ignored. This mode is
+ entered only by debugging events obtained from the other two modes.
+
+ All kernel mode registers may be accessed, plus a few extra debugging
+ specific registers.
+
+
+=================================
+INTERNAL KERNEL-MODE REGISTER ABI
+=================================
+
+There are a number of permanent register assignments that are set up by
+entry.S in the exception prologue. Note that there is a complete set of
+exception prologues for each of user->kernel transition and kernel->kernel
+transition. There are also user->debug and kernel->debug mode transition
+prologues.
+
+
+ REGISTER FLAVOUR USE
+ =============== ======= ==============================================
+ GR1 Supervisor stack pointer
+ GR15 Current thread info pointer
+ GR16 GP-Rel base register for small data
+ GR28 Current exception frame pointer (__frame)
+ GR29 Current task pointer (current)
+ GR30 Destroyed by kernel mode entry
+ GR31 NOMMU Destroyed by debug mode entry
+ GR31 MMU Destroyed by TLB miss kernel mode entry
+ CCR.ICC2 Virtual interrupt disablement tracking
+ CCCR.CC3 Cleared by exception prologue
+ (atomic op emulation)
+ SCR0 MMU See mmu-layout.txt.
+ SCR1 MMU See mmu-layout.txt.
+ SCR2 MMU Save for EAR0 (destroyed by icache insns
+ in debug mode)
+ SCR3 MMU Save for GR31 during debug exceptions
+ DAMR/IAMR NOMMU Fixed memory protection layout.
+ DAMR/IAMR MMU See mmu-layout.txt.
+
+
+Certain registers are also used or modified across function calls:
+
+ REGISTER CALL RETURN
+ =============== =============================== ======================
+ GR0 Fixed Zero -
+ GR2 Function call frame pointer
+ GR3 Special Preserved
+ GR3-GR7 - Clobbered
+ GR8 Function call arg #1 Return value
+ (or clobbered)
+ GR9 Function call arg #2 Return value MSW
+ (or clobbered)
+ GR10-GR13 Function call arg #3-#6 Clobbered
+ GR14 - Clobbered
+ GR15-GR16 Special Preserved
+ GR17-GR27 - Preserved
+ GR28-GR31 Special Only accessed
+ explicitly
+ LR Return address after CALL Clobbered
+ CCR/CCCR - Mostly Clobbered
+
+
+================================
+INTERNAL DEBUG-MODE REGISTER ABI
+================================
+
+This is the same as the kernel-mode register ABI for functions calls. The
+difference is that in debug-mode there's a different stack and a different
+exception frame. Almost all the global registers from kernel-mode
+(including the stack pointer) may be changed.
+
+ REGISTER FLAVOUR USE
+ =============== ======= ==============================================
+ GR1 Debug stack pointer
+ GR16 GP-Rel base register for small data
+ GR31 Current debug exception frame pointer
+ (__debug_frame)
+ SCR3 MMU Saved value of GR31
+
+
+Note that debug mode is able to interfere with the kernel's emulated atomic
+ops, so it must be exceedingly careful not to do any that would interact
+with the main kernel in this regard. Hence the debug mode code (gdbstub) is
+almost completely self-contained. The only external code used is the
+sprintf family of functions.
+
+Furthermore, break.S is so complicated because single-step mode does not
+switch off on entry to an exception. That means unless manually disabled,
+single-stepping will blithely go on stepping into things like interrupts.
+See gdbstub.txt for more information.
+
+
+==========================
+VIRTUAL INTERRUPT HANDLING
+==========================
+
+Because accesses to the PSR is so slow, and to disable interrupts we have
+to access it twice (once to read and once to write), we don't actually
+disable interrupts at all if we don't have to. What we do instead is use
+the ICC2 condition code flags to note virtual disablement, such that if we
+then do take an interrupt, we note the flag, really disable interrupts, set
+another flag and resume execution at the point the interrupt happened.
+Setting condition flags as a side effect of an arithmetic or logical
+instruction is really fast. This use of the ICC2 only occurs within the
+kernel - it does not affect userspace.
+
+The flags we use are:
+
+ (*) CCR.ICC2.Z [Zero flag]
+
+ Set to virtually disable interrupts, clear when interrupts are
+ virtually enabled. Can be modified by logical instructions without
+ affecting the Carry flag.
+
+ (*) CCR.ICC2.C [Carry flag]
+
+ Clear to indicate hardware interrupts are really disabled, set otherwise.
+
+
+What happens is this:
+
+ (1) Normal kernel-mode operation.
+
+ ICC2.Z is 0, ICC2.C is 1.
+
+ (2) An interrupt occurs. The exception prologue examines ICC2.Z and
+ determines that nothing needs doing. This is done simply with an
+ unlikely BEQ instruction.
+
+ (3) The interrupts are disabled (local_irq_disable)
+
+ ICC2.Z is set to 1.
+
+ (4) If interrupts were then re-enabled (local_irq_enable):
+
+ ICC2.Z would be set to 0.
+
+ A TIHI #2 instruction (trap #2 if condition HI - Z==0 && C==0) would
+ be used to trap if interrupts were now virtually enabled, but
+ physically disabled - which they're not, so the trap isn't taken. The
+ kernel would then be back to state (1).
+
+ (5) An interrupt occurs. The exception prologue examines ICC2.Z and
+ determines that the interrupt shouldn't actually have happened. It
+ jumps aside, and there disabled interrupts by setting PSR.PIL to 14
+ and then it clears ICC2.C.
+
+ (6) If interrupts were then saved and disabled again (local_irq_save):
+
+ ICC2.Z would be shifted into the save variable and masked off
+ (giving a 1).
+
+ ICC2.Z would then be set to 1 (thus unchanged), and ICC2.C would be
+ unaffected (ie: 0).
+
+ (7) If interrupts were then restored from state (6) (local_irq_restore):
+
+ ICC2.Z would be set to indicate the result of XOR'ing the saved
+ value (ie: 1) with 1, which gives a result of 0 - thus leaving
+ ICC2.Z set.
+
+ ICC2.C would remain unaffected (ie: 0).
+
+ A TIHI #2 instruction would be used to again assay the current state,
+ but this would do nothing as Z==1.
+
+ (8) If interrupts were then enabled (local_irq_enable):
+
+ ICC2.Z would be cleared. ICC2.C would be left unaffected. Both
+ flags would now be 0.
+
+ A TIHI #2 instruction again issued to assay the current state would
+ then trap as both Z==0 [interrupts virtually enabled] and C==0
+ [interrupts really disabled] would then be true.
+
+ (9) The trap #2 handler would simply enable hardware interrupts
+ (set PSR.PIL to 0), set ICC2.C to 1 and return.
+
+(10) Immediately upon returning, the pending interrupt would be taken.
+
+(11) The interrupt handler would take the path of actually processing the
+ interrupt (ICC2.Z is clear, BEQ fails as per step (2)).
+
+(12) The interrupt handler would then set ICC2.C to 1 since hardware
+ interrupts are definitely enabled - or else the kernel wouldn't be here.
+
+(13) On return from the interrupt handler, things would be back to state (1).
+
+This trap (#2) is only available in kernel mode. In user mode it will
+result in SIGILL.
diff --git a/Documentation/frv/mmu-layout.txt b/Documentation/frv/mmu-layout.txt
new file mode 100644
index 000000000..db10250df
--- /dev/null
+++ b/Documentation/frv/mmu-layout.txt
@@ -0,0 +1,306 @@
+ =================================
+ FR451 MMU LINUX MEMORY MANAGEMENT
+ =================================
+
+============
+MMU HARDWARE
+============
+
+FR451 MMU Linux puts the MMU into EDAT mode whilst running. This means that it uses both the SAT
+registers and the DAT TLB to perform address translation.
+
+There are 8 IAMLR/IAMPR register pairs and 16 DAMLR/DAMPR register pairs for SAT mode.
+
+In DAT mode, there is also a TLB organised in cache format as 64 lines x 2 ways. Each line spans a
+16KB range of addresses, but can match a larger region.
+
+
+===========================
+MEMORY MANAGEMENT REGISTERS
+===========================
+
+Certain control registers are used by the kernel memory management routines:
+
+ REGISTERS USAGE
+ ====================== ==================================================
+ IAMR0, DAMR0 Kernel image and data mappings
+ IAMR1, DAMR1 First-chance TLB lookup mapping
+ DAMR2 Page attachment for cache flush by page
+ DAMR3 Current PGD mapping
+ SCR0, DAMR4 Instruction TLB PGE/PTD cache
+ SCR1, DAMR5 Data TLB PGE/PTD cache
+ DAMR6-10 kmap_atomic() mappings
+ DAMR11 I/O mapping
+ CXNR mm_struct context ID
+ TTBR Page directory (PGD) pointer (physical address)
+
+
+=====================
+GENERAL MEMORY LAYOUT
+=====================
+
+The physical memory layout is as follows:
+
+ PHYSICAL ADDRESS CONTROLLER DEVICE
+ =================== ============== =======================================
+ 00000000 - BFFFFFFF SDRAM SDRAM area
+ E0000000 - EFFFFFFF L-BUS CS2# VDK SLBUS/PCI window
+ F0000000 - F0FFFFFF L-BUS CS5# MB93493 CSC area (DAV daughter board)
+ F1000000 - F1FFFFFF L-BUS CS7# (CB70 CPU-card PCMCIA port I/O space)
+ FC000000 - FC0FFFFF L-BUS CS1# VDK MB86943 config space
+ FC100000 - FC1FFFFF L-BUS CS6# DM9000 NIC I/O space
+ FC200000 - FC2FFFFF L-BUS CS3# MB93493 CSR area (DAV daughter board)
+ FD000000 - FDFFFFFF L-BUS CS4# (CB70 CPU-card extra flash space)
+ FE000000 - FEFFFFFF Internal CPU peripherals
+ FF000000 - FF1FFFFF L-BUS CS0# Flash 1
+ FF200000 - FF3FFFFF L-BUS CS0# Flash 2
+ FFC00000 - FFC0001F L-BUS CS0# FPGA
+
+The virtual memory layout is:
+
+ VIRTUAL ADDRESS PHYSICAL TRANSLATOR FLAGS SIZE OCCUPATION
+ ================= ======== ============== ======= ======= ===================================
+ 00004000-BFFFFFFF various TLB,xAMR1 D-N-??V 3GB Userspace
+ C0000000-CFFFFFFF 00000000 xAMPR0 -L-S--V 256MB Kernel image and data
+ D0000000-D7FFFFFF various TLB,xAMR1 D-NS??V 128MB vmalloc area
+ D8000000-DBFFFFFF various TLB,xAMR1 D-NS??V 64MB kmap() area
+ DC000000-DCFFFFFF various TLB 1MB Secondary kmap_atomic() frame
+ DD000000-DD27FFFF various DAMR 160KB Primary kmap_atomic() frame
+ DD040000 DAMR2/IAMR2 -L-S--V page Page cache flush attachment point
+ DD080000 DAMR3 -L-SC-V page Page Directory (PGD)
+ DD0C0000 DAMR4 -L-SC-V page Cached insn TLB Page Table lookup
+ DD100000 DAMR5 -L-SC-V page Cached data TLB Page Table lookup
+ DD140000 DAMR6 -L-S--V page kmap_atomic(KM_BOUNCE_READ)
+ DD180000 DAMR7 -L-S--V page kmap_atomic(KM_SKB_SUNRPC_DATA)
+ DD1C0000 DAMR8 -L-S--V page kmap_atomic(KM_SKB_DATA_SOFTIRQ)
+ DD200000 DAMR9 -L-S--V page kmap_atomic(KM_USER0)
+ DD240000 DAMR10 -L-S--V page kmap_atomic(KM_USER1)
+ E0000000-FFFFFFFF E0000000 DAMR11 -L-SC-V 512MB I/O region
+
+IAMPR1 and DAMPR1 are used as an extension to the TLB.
+
+
+====================
+KMAP AND KMAP_ATOMIC
+====================
+
+To access pages in the page cache (which may not be directly accessible if highmem is available),
+the kernel calls kmap(), does the access and then calls kunmap(); or it calls kmap_atomic(), does
+the access and then calls kunmap_atomic().
+
+kmap() creates an attachment between an arbitrary inaccessible page and a range of virtual
+addresses by installing a PTE in a special page table. The kernel can then access this page as it
+wills. When it's finished, the kernel calls kunmap() to clear the PTE.
+
+kmap_atomic() does something slightly different. In the interests of speed, it chooses one of two
+strategies:
+
+ (1) If possible, kmap_atomic() attaches the requested page to one of DAMPR5 through DAMPR10
+ register pairs; and the matching kunmap_atomic() clears the DAMPR. This makes high memory
+ support really fast as there's no need to flush the TLB or modify the page tables. The DAMLR
+ registers being used for this are preset during boot and don't change over the lifetime of the
+ process. There's a direct mapping between the first few kmap_atomic() types, DAMR number and
+ virtual address slot.
+
+ However, there are more kmap_atomic() types defined than there are DAMR registers available,
+ so we fall back to:
+
+ (2) kmap_atomic() uses a slot in the secondary frame (determined by the type parameter), and then
+ locks an entry in the TLB to translate that slot to the specified page. The number of slots is
+ obviously limited, and their positions are controlled such that each slot is matched by a
+ different line in the TLB. kunmap() ejects the entry from the TLB.
+
+Note that the first three kmap atomic types are really just declared as placeholders. The DAMPR
+registers involved are actually modified directly.
+
+Also note that kmap() itself may sleep, kmap_atomic() may never sleep and both always succeed;
+furthermore, a driver using kmap() may sleep before calling kunmap(), but may not sleep before
+calling kunmap_atomic() if it had previously called kmap_atomic().
+
+
+===============================
+USING MORE THAN 256MB OF MEMORY
+===============================
+
+The kernel cannot access more than 256MB of memory directly. The physical layout, however, permits
+up to 3GB of SDRAM (possibly 3.25GB) to be made available. By using CONFIG_HIGHMEM, the kernel can
+allow userspace (by way of page tables) and itself (by way of kmap) to deal with the memory
+allocation.
+
+External devices can, of course, still DMA to and from all of the SDRAM, even if the kernel can't
+see it directly. The kernel translates page references into real addresses for communicating to the
+devices.
+
+
+===================
+PAGE TABLE TOPOLOGY
+===================
+
+The page tables are arranged in 2-layer format. There is a middle layer (PMD) that would be used in
+3-layer format tables but that is folded into the top layer (PGD) and so consumes no extra memory
+or processing power.
+
+ +------+ PGD PMD
+ | TTBR |--->+-------------------+
+ +------+ | | : STE |
+ | PGE0 | PME0 : STE |
+ | | : STE |
+ +-------------------+ Page Table
+ | | : STE -------------->+--------+ +0x0000
+ | PGE1 | PME0 : STE -----------+ | PTE0 |
+ | | : STE -------+ | +--------+
+ +-------------------+ | | | PTE63 |
+ | | : STE | | +-->+--------+ +0x0100
+ | PGE2 | PME0 : STE | | | PTE64 |
+ | | : STE | | +--------+
+ +-------------------+ | | PTE127 |
+ | | : STE | +------>+--------+ +0x0200
+ | PGE3 | PME0 : STE | | PTE128 |
+ | | : STE | +--------+
+ +-------------------+ | PTE191 |
+ +--------+ +0x0300
+
+Each Page Directory (PGD) is 16KB (page size) in size and is divided into 64 entries (PGEs). Each
+PGE contains one Page Mid Directory (PMD).
+
+Each PMD is 256 bytes in size and contains a single entry (PME). Each PME holds 64 FR451 MMU
+segment table entries of 4 bytes apiece. Each PME "points to" a page table. In practice, each STE
+points to a subset of the page table, the first to PT+0x0000, the second to PT+0x0100, the third to
+PT+0x200, and so on.
+
+Each PGE and PME covers 64MB of the total virtual address space.
+
+Each Page Table (PTD) is 16KB (page size) in size, and is divided into 4096 entries (PTEs). Each
+entry can point to one 16KB page. In practice, each Linux page table is subdivided into 64 FR451
+MMU page tables. But they are all grouped together to make management easier, in particular rmap
+support is then trivial.
+
+Grouping page tables in this fashion makes PGE caching in SCR0/SCR1 more efficient because the
+coverage of the cached item is greater.
+
+Page tables for the vmalloc area are allocated at boot time and shared between all mm_structs.
+
+
+=================
+USER SPACE LAYOUT
+=================
+
+For MMU capable Linux, the regions userspace code are allowed to access are kept entirely separate
+from those dedicated to the kernel:
+
+ VIRTUAL ADDRESS SIZE PURPOSE
+ ================= ===== ===================================
+ 00000000-00003fff 4KB NULL pointer access trap
+ 00004000-01ffffff ~32MB lower mmap space (grows up)
+ 02000000-021fffff 2MB Stack space (grows down from top)
+ 02200000-nnnnnnnn Executable mapping
+ nnnnnnnn- brk space (grows up)
+ -bfffffff upper mmap space (grows down)
+
+This is so arranged so as to make best use of the 16KB page tables and the way in which PGEs/PMEs
+are cached by the TLB handler. The lower mmap space is filled first, and then the upper mmap space
+is filled.
+
+
+===============================
+GDB-STUB MMU DEBUGGING SERVICES
+===============================
+
+The gdb-stub included in this kernel provides a number of services to aid in the debugging of MMU
+related kernel services:
+
+ (*) Every time the kernel stops, certain state information is dumped into __debug_mmu. This
+ variable is defined in arch/frv/kernel/gdb-stub.c. Note that the gdbinit file in this
+ directory has some useful macros for dealing with this.
+
+ (*) __debug_mmu.tlb[]
+
+ This receives the current TLB contents. This can be viewed with the _tlb GDB macro:
+
+ (gdb) _tlb
+ tlb[0x00]: 01000005 00718203 01000002 00718203
+ tlb[0x01]: 01004002 006d4201 01004005 006d4203
+ tlb[0x02]: 01008002 006d0201 01008006 00004200
+ tlb[0x03]: 0100c006 007f4202 0100c002 0064c202
+ tlb[0x04]: 01110005 00774201 01110002 00774201
+ tlb[0x05]: 01114005 00770201 01114002 00770201
+ tlb[0x06]: 01118002 0076c201 01118005 0076c201
+ ...
+ tlb[0x3d]: 010f4002 00790200 001f4002 0054ca02
+ tlb[0x3e]: 010f8005 0078c201 010f8002 0078c201
+ tlb[0x3f]: 001fc002 0056ca01 001fc005 00538a01
+
+ (*) __debug_mmu.iamr[]
+ (*) __debug_mmu.damr[]
+
+ These receive the current IAMR and DAMR contents. These can be viewed with the _amr
+ GDB macro:
+
+ (gdb) _amr
+ AMRx DAMR IAMR
+ ==== ===================== =====================
+ amr0 : L:c0000000 P:00000cb9 : L:c0000000 P:000004b9
+ amr1 : L:01070005 P:006f9203 : L:0102c005 P:006a1201
+ amr2 : L:d8d00000 P:00000000 : L:d8d00000 P:00000000
+ amr3 : L:d8d04000 P:00534c0d : L:00000000 P:00000000
+ amr4 : L:d8d08000 P:00554c0d : L:00000000 P:00000000
+ amr5 : L:d8d0c000 P:00554c0d : L:00000000 P:00000000
+ amr6 : L:d8d10000 P:00000000 : L:00000000 P:00000000
+ amr7 : L:d8d14000 P:00000000 : L:00000000 P:00000000
+ amr8 : L:d8d18000 P:00000000
+ amr9 : L:d8d1c000 P:00000000
+ amr10: L:d8d20000 P:00000000
+ amr11: L:e0000000 P:e0000ccd
+
+ (*) The current task's page directory is bound to DAMR3.
+
+ This can be viewed with the _pgd GDB macro:
+
+ (gdb) _pgd
+ $3 = {{pge = {{ste = {0x554001, 0x554101, 0x554201, 0x554301, 0x554401,
+ 0x554501, 0x554601, 0x554701, 0x554801, 0x554901, 0x554a01,
+ 0x554b01, 0x554c01, 0x554d01, 0x554e01, 0x554f01, 0x555001,
+ 0x555101, 0x555201, 0x555301, 0x555401, 0x555501, 0x555601,
+ 0x555701, 0x555801, 0x555901, 0x555a01, 0x555b01, 0x555c01,
+ 0x555d01, 0x555e01, 0x555f01, 0x556001, 0x556101, 0x556201,
+ 0x556301, 0x556401, 0x556501, 0x556601, 0x556701, 0x556801,
+ 0x556901, 0x556a01, 0x556b01, 0x556c01, 0x556d01, 0x556e01,
+ 0x556f01, 0x557001, 0x557101, 0x557201, 0x557301, 0x557401,
+ 0x557501, 0x557601, 0x557701, 0x557801, 0x557901, 0x557a01,
+ 0x557b01, 0x557c01, 0x557d01, 0x557e01, 0x557f01}}}}, {pge = {{
+ ste = {0x0 <repeats 64 times>}}}} <repeats 51 times>, {pge = {{ste = {
+ 0x248001, 0x248101, 0x248201, 0x248301, 0x248401, 0x248501,
+ 0x248601, 0x248701, 0x248801, 0x248901, 0x248a01, 0x248b01,
+ 0x248c01, 0x248d01, 0x248e01, 0x248f01, 0x249001, 0x249101,
+ 0x249201, 0x249301, 0x249401, 0x249501, 0x249601, 0x249701,
+ 0x249801, 0x249901, 0x249a01, 0x249b01, 0x249c01, 0x249d01,
+ 0x249e01, 0x249f01, 0x24a001, 0x24a101, 0x24a201, 0x24a301,
+ 0x24a401, 0x24a501, 0x24a601, 0x24a701, 0x24a801, 0x24a901,
+ 0x24aa01, 0x24ab01, 0x24ac01, 0x24ad01, 0x24ae01, 0x24af01,
+ 0x24b001, 0x24b101, 0x24b201, 0x24b301, 0x24b401, 0x24b501,
+ 0x24b601, 0x24b701, 0x24b801, 0x24b901, 0x24ba01, 0x24bb01,
+ 0x24bc01, 0x24bd01, 0x24be01, 0x24bf01}}}}, {pge = {{ste = {
+ 0x0 <repeats 64 times>}}}} <repeats 11 times>}
+
+ (*) The PTD last used by the instruction TLB miss handler is attached to DAMR4.
+ (*) The PTD last used by the data TLB miss handler is attached to DAMR5.
+
+ These can be viewed with the _ptd_i and _ptd_d GDB macros:
+
+ (gdb) _ptd_d
+ $5 = {{pte = 0x0} <repeats 127 times>, {pte = 0x539b01}, {
+ pte = 0x0} <repeats 896 times>, {pte = 0x719303}, {pte = 0x6d5303}, {
+ pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {
+ pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {pte = 0x6a1303}, {
+ pte = 0x0} <repeats 12 times>, {pte = 0x709303}, {pte = 0x0}, {pte = 0x0},
+ {pte = 0x6fd303}, {pte = 0x6f9303}, {pte = 0x6f5303}, {pte = 0x0}, {
+ pte = 0x6ed303}, {pte = 0x531b01}, {pte = 0x50db01}, {
+ pte = 0x0} <repeats 13 times>, {pte = 0x5303}, {pte = 0x7f5303}, {
+ pte = 0x509b01}, {pte = 0x505b01}, {pte = 0x7c9303}, {pte = 0x7b9303}, {
+ pte = 0x7b5303}, {pte = 0x7b1303}, {pte = 0x7ad303}, {pte = 0x0}, {
+ pte = 0x0}, {pte = 0x7a1303}, {pte = 0x0}, {pte = 0x795303}, {pte = 0x0}, {
+ pte = 0x78d303}, {pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {pte = 0x0}, {
+ pte = 0x0}, {pte = 0x775303}, {pte = 0x771303}, {pte = 0x76d303}, {
+ pte = 0x0}, {pte = 0x765303}, {pte = 0x7c5303}, {pte = 0x501b01}, {
+ pte = 0x4f1b01}, {pte = 0x4edb01}, {pte = 0x0}, {pte = 0x4f9b01}, {
+ pte = 0x4fdb01}, {pte = 0x0} <repeats 2992 times>}
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644
index 000000000..77b36f59d
--- /dev/null
+++ b/Documentation/futex-requeue-pi.txt
@@ -0,0 +1,131 @@
+Futex Requeue PI
+----------------
+
+Requeueing of tasks from a non-PI futex to a PI futex requires
+special handling in order to ensure the underlying rt_mutex is never
+left without an owner if it has waiters; doing so would break the PI
+boosting logic [see rt-mutex-desgin.txt] For the purposes of
+brevity, this action will be referred to as "requeue_pi" throughout
+this document. Priority inheritance is abbreviated throughout as
+"PI".
+
+Motivation
+----------
+
+Without requeue_pi, the glibc implementation of
+pthread_cond_broadcast() must resort to waking all the tasks waiting
+on a pthread_condvar and letting them try to sort out which task
+gets to run first in classic thundering-herd formation. An ideal
+implementation would wake the highest-priority waiter, and leave the
+rest to the natural wakeup inherent in unlocking the mutex
+associated with the condvar.
+
+Consider the simplified glibc calls:
+
+/* caller must lock mutex */
+pthread_cond_wait(cond, mutex)
+{
+ lock(cond->__data.__lock);
+ unlock(mutex);
+ do {
+ unlock(cond->__data.__lock);
+ futex_wait(cond->__data.__futex);
+ lock(cond->__data.__lock);
+ } while(...)
+ unlock(cond->__data.__lock);
+ lock(mutex);
+}
+
+pthread_cond_broadcast(cond)
+{
+ lock(cond->__data.__lock);
+ unlock(cond->__data.__lock);
+ futex_requeue(cond->data.__futex, cond->mutex);
+}
+
+Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
+has waiters. Note that pthread_cond_wait() attempts to lock the
+mutex only after it has returned to user space. This will leave the
+underlying rt_mutex with waiters, and no owner, breaking the
+previously mentioned PI-boosting algorithms.
+
+In order to support PI-aware pthread_condvar's, the kernel needs to
+be able to requeue tasks to PI futexes. This support implies that
+upon a successful futex_wait system call, the caller would return to
+user space already holding the PI futex. The glibc implementation
+would be modified as follows:
+
+
+/* caller must lock mutex */
+pthread_cond_wait_pi(cond, mutex)
+{
+ lock(cond->__data.__lock);
+ unlock(mutex);
+ do {
+ unlock(cond->__data.__lock);
+ futex_wait_requeue_pi(cond->__data.__futex);
+ lock(cond->__data.__lock);
+ } while(...)
+ unlock(cond->__data.__lock);
+ /* the kernel acquired the mutex for us */
+}
+
+pthread_cond_broadcast_pi(cond)
+{
+ lock(cond->__data.__lock);
+ unlock(cond->__data.__lock);
+ futex_requeue_pi(cond->data.__futex, cond->mutex);
+}
+
+The actual glibc implementation will likely test for PI and make the
+necessary changes inside the existing calls rather than creating new
+calls for the PI cases. Similar changes are needed for
+pthread_cond_timedwait() and pthread_cond_signal().
+
+Implementation
+--------------
+
+In order to ensure the rt_mutex has an owner if it has waiters, it
+is necessary for both the requeue code, as well as the waiting code,
+to be able to acquire the rt_mutex before returning to user space.
+The requeue code cannot simply wake the waiter and leave it to
+acquire the rt_mutex as it would open a race window between the
+requeue call returning to user space and the waiter waking and
+starting to run. This is especially true in the uncontended case.
+
+The solution involves two new rt_mutex helper routines,
+rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
+allow the requeue code to acquire an uncontended rt_mutex on behalf
+of the waiter and to enqueue the waiter on a contended rt_mutex.
+Two new system calls provide the kernel<->user interface to
+requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI.
+
+FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
+and pthread_cond_timedwait()) to block on the initial futex and wait
+to be requeued to a PI-aware futex. The implementation is the
+result of a high-speed collision between futex_wait() and
+futex_lock_pi(), with some extra logic to check for the additional
+wake-up scenarios.
+
+FUTEX_CMP_REQUEUE_PI is called by the waker
+(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
+possibly wake the waiting tasks. Internally, this system call is
+still handled by futex_requeue (by passing requeue_pi=1). Before
+requeueing, futex_requeue() attempts to acquire the requeue target
+PI futex on behalf of the top waiter. If it can, this waiter is
+woken. futex_requeue() then proceeds to requeue the remaining
+nr_wake+nr_requeue tasks to the PI futex, calling
+rt_mutex_start_proxy_lock() prior to each requeue to prepare the
+task as a waiter on the underlying rt_mutex. It is possible that
+the lock can be acquired at this stage as well, if so, the next
+waiter is woken to finish the acquisition of the lock.
+
+FUTEX_CMP_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
+their sum is all that really matters. futex_requeue() will wake or
+requeue up to nr_wake + nr_requeue tasks. It will wake only as many
+tasks as it can acquire the lock for, which in the majority of cases
+should be 0 as good programming practice dictates that the caller of
+either pthread_cond_broadcast() or pthread_cond_signal() acquire the
+mutex prior to making the call. FUTEX_CMP_REQUEUE_PI requires that
+nr_wake=1. nr_requeue should be INT_MAX for broadcast and 0 for
+signal.
diff --git a/Documentation/gcov.txt b/Documentation/gcov.txt
new file mode 100644
index 000000000..7b727783d
--- /dev/null
+++ b/Documentation/gcov.txt
@@ -0,0 +1,257 @@
+Using gcov with the Linux kernel
+================================
+
+1. Introduction
+2. Preparation
+3. Customization
+4. Files
+5. Modules
+6. Separated build and test machines
+7. Troubleshooting
+Appendix A: sample script: gather_on_build.sh
+Appendix B: sample script: gather_on_test.sh
+
+
+1. Introduction
+===============
+
+gcov profiling kernel support enables the use of GCC's coverage testing
+tool gcov [1] with the Linux kernel. Coverage data of a running kernel
+is exported in gcov-compatible format via the "gcov" debugfs directory.
+To get coverage data for a specific file, change to the kernel build
+directory and use gcov with the -o option as follows (requires root):
+
+# cd /tmp/linux-out
+# gcov -o /sys/kernel/debug/gcov/tmp/linux-out/kernel spinlock.c
+
+This will create source code files annotated with execution counts
+in the current directory. In addition, graphical gcov front-ends such
+as lcov [2] can be used to automate the process of collecting data
+for the entire kernel and provide coverage overviews in HTML format.
+
+Possible uses:
+
+* debugging (has this line been reached at all?)
+* test improvement (how do I change my test to cover these lines?)
+* minimizing kernel configurations (do I need this option if the
+ associated code is never run?)
+
+--
+
+[1] http://gcc.gnu.org/onlinedocs/gcc/Gcov.html
+[2] http://ltp.sourceforge.net/coverage/lcov.php
+
+
+2. Preparation
+==============
+
+Configure the kernel with:
+
+ CONFIG_DEBUG_FS=y
+ CONFIG_GCOV_KERNEL=y
+
+select the gcc's gcov format, default is autodetect based on gcc version:
+
+ CONFIG_GCOV_FORMAT_AUTODETECT=y
+
+and to get coverage data for the entire kernel:
+
+ CONFIG_GCOV_PROFILE_ALL=y
+
+Note that kernels compiled with profiling flags will be significantly
+larger and run slower. Also CONFIG_GCOV_PROFILE_ALL may not be supported
+on all architectures.
+
+Profiling data will only become accessible once debugfs has been
+mounted:
+
+ mount -t debugfs none /sys/kernel/debug
+
+
+3. Customization
+================
+
+To enable profiling for specific files or directories, add a line
+similar to the following to the respective kernel Makefile:
+
+ For a single file (e.g. main.o):
+ GCOV_PROFILE_main.o := y
+
+ For all files in one directory:
+ GCOV_PROFILE := y
+
+To exclude files from being profiled even when CONFIG_GCOV_PROFILE_ALL
+is specified, use:
+
+ GCOV_PROFILE_main.o := n
+ and:
+ GCOV_PROFILE := n
+
+Only files which are linked to the main kernel image or are compiled as
+kernel modules are supported by this mechanism.
+
+
+4. Files
+========
+
+The gcov kernel support creates the following files in debugfs:
+
+ /sys/kernel/debug/gcov
+ Parent directory for all gcov-related files.
+
+ /sys/kernel/debug/gcov/reset
+ Global reset file: resets all coverage data to zero when
+ written to.
+
+ /sys/kernel/debug/gcov/path/to/compile/dir/file.gcda
+ The actual gcov data file as understood by the gcov
+ tool. Resets file coverage data to zero when written to.
+
+ /sys/kernel/debug/gcov/path/to/compile/dir/file.gcno
+ Symbolic link to a static data file required by the gcov
+ tool. This file is generated by gcc when compiling with
+ option -ftest-coverage.
+
+
+5. Modules
+==========
+
+Kernel modules may contain cleanup code which is only run during
+module unload time. The gcov mechanism provides a means to collect
+coverage data for such code by keeping a copy of the data associated
+with the unloaded module. This data remains available through debugfs.
+Once the module is loaded again, the associated coverage counters are
+initialized with the data from its previous instantiation.
+
+This behavior can be deactivated by specifying the gcov_persist kernel
+parameter:
+
+ gcov_persist=0
+
+At run-time, a user can also choose to discard data for an unloaded
+module by writing to its data file or the global reset file.
+
+
+6. Separated build and test machines
+====================================
+
+The gcov kernel profiling infrastructure is designed to work out-of-the
+box for setups where kernels are built and run on the same machine. In
+cases where the kernel runs on a separate machine, special preparations
+must be made, depending on where the gcov tool is used:
+
+a) gcov is run on the TEST machine
+
+The gcov tool version on the test machine must be compatible with the
+gcc version used for kernel build. Also the following files need to be
+copied from build to test machine:
+
+from the source tree:
+ - all C source files + headers
+
+from the build tree:
+ - all C source files + headers
+ - all .gcda and .gcno files
+ - all links to directories
+
+It is important to note that these files need to be placed into the
+exact same file system location on the test machine as on the build
+machine. If any of the path components is symbolic link, the actual
+directory needs to be used instead (due to make's CURDIR handling).
+
+b) gcov is run on the BUILD machine
+
+The following files need to be copied after each test case from test
+to build machine:
+
+from the gcov directory in sysfs:
+ - all .gcda files
+ - all links to .gcno files
+
+These files can be copied to any location on the build machine. gcov
+must then be called with the -o option pointing to that directory.
+
+Example directory setup on the build machine:
+
+ /tmp/linux: kernel source tree
+ /tmp/out: kernel build directory as specified by make O=
+ /tmp/coverage: location of the files copied from the test machine
+
+ [user@build] cd /tmp/out
+ [user@build] gcov -o /tmp/coverage/tmp/out/init main.c
+
+
+7. Troubleshooting
+==================
+
+Problem: Compilation aborts during linker step.
+Cause: Profiling flags are specified for source files which are not
+ linked to the main kernel or which are linked by a custom
+ linker procedure.
+Solution: Exclude affected source files from profiling by specifying
+ GCOV_PROFILE := n or GCOV_PROFILE_basename.o := n in the
+ corresponding Makefile.
+
+Problem: Files copied from sysfs appear empty or incomplete.
+Cause: Due to the way seq_file works, some tools such as cp or tar
+ may not correctly copy files from sysfs.
+Solution: Use 'cat' to read .gcda files and 'cp -d' to copy links.
+ Alternatively use the mechanism shown in Appendix B.
+
+
+Appendix A: gather_on_build.sh
+==============================
+
+Sample script to gather coverage meta files on the build machine
+(see 6a):
+#!/bin/bash
+
+KSRC=$1
+KOBJ=$2
+DEST=$3
+
+if [ -z "$KSRC" ] || [ -z "$KOBJ" ] || [ -z "$DEST" ]; then
+ echo "Usage: $0 <ksrc directory> <kobj directory> <output.tar.gz>" >&2
+ exit 1
+fi
+
+KSRC=$(cd $KSRC; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
+KOBJ=$(cd $KOBJ; printf "all:\n\t@echo \${CURDIR}\n" | make -f -)
+
+find $KSRC $KOBJ \( -name '*.gcno' -o -name '*.[ch]' -o -type l \) -a \
+ -perm /u+r,g+r | tar cfz $DEST -P -T -
+
+if [ $? -eq 0 ] ; then
+ echo "$DEST successfully created, copy to test system and unpack with:"
+ echo " tar xfz $DEST -P"
+else
+ echo "Could not create file $DEST"
+fi
+
+
+Appendix B: gather_on_test.sh
+=============================
+
+Sample script to gather coverage data files on the test machine
+(see 6b):
+
+#!/bin/bash -e
+
+DEST=$1
+GCDA=/sys/kernel/debug/gcov
+
+if [ -z "$DEST" ] ; then
+ echo "Usage: $0 <output.tar.gz>" >&2
+ exit 1
+fi
+
+TEMPDIR=$(mktemp -d)
+echo Collecting data..
+find $GCDA -type d -exec mkdir -p $TEMPDIR/\{\} \;
+find $GCDA -name '*.gcda' -exec sh -c 'cat < $0 > '$TEMPDIR'/$0' {} \;
+find $GCDA -name '*.gcno' -exec sh -c 'cp -d $0 '$TEMPDIR'/$0' {} \;
+tar czf $DEST -C $TEMPDIR sys
+rm -rf $TEMPDIR
+
+echo "$DEST successfully created, copy to build system and unpack with:"
+echo " tar xfz $DEST"
diff --git a/Documentation/gdb-kernel-debugging.txt b/Documentation/gdb-kernel-debugging.txt
new file mode 100644
index 000000000..7050ce879
--- /dev/null
+++ b/Documentation/gdb-kernel-debugging.txt
@@ -0,0 +1,160 @@
+Debugging kernel and modules via gdb
+====================================
+
+The kernel debugger kgdb, hypervisors like QEMU or JTAG-based hardware
+interfaces allow to debug the Linux kernel and its modules during runtime
+using gdb. Gdb comes with a powerful scripting interface for python. The
+kernel provides a collection of helper scripts that can simplify typical
+kernel debugging steps. This is a short tutorial about how to enable and use
+them. It focuses on QEMU/KVM virtual machines as target, but the examples can
+be transferred to the other gdb stubs as well.
+
+
+Requirements
+------------
+
+ o gdb 7.2+ (recommended: 7.4+) with python support enabled (typically true
+ for distributions)
+
+
+Setup
+-----
+
+ o Create a virtual Linux machine for QEMU/KVM (see www.linux-kvm.org and
+ www.qemu.org for more details). For cross-development,
+ http://landley.net/aboriginal/bin keeps a pool of machine images and
+ toolchains that can be helpful to start from.
+
+ o Build the kernel with CONFIG_GDB_SCRIPTS enabled, but leave
+ CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports
+ CONFIG_FRAME_POINTER, keep it enabled.
+
+ o Install that kernel on the guest.
+
+ Alternatively, QEMU allows to boot the kernel directly using -kernel,
+ -append, -initrd command line switches. This is generally only useful if
+ you do not depend on modules. See QEMU documentation for more details on
+ this mode.
+
+ o Enable the gdb stub of QEMU/KVM, either
+ - at VM startup time by appending "-s" to the QEMU command line
+ or
+ - during runtime by issuing "gdbserver" from the QEMU monitor
+ console
+
+ o cd /path/to/linux-build
+
+ o Start gdb: gdb vmlinux
+
+ Note: Some distros may restrict auto-loading of gdb scripts to known safe
+ directories. In case gdb reports to refuse loading vmlinux-gdb.py, add
+
+ add-auto-load-safe-path /path/to/linux-build
+
+ to ~/.gdbinit. See gdb help for more details.
+
+ o Attach to the booted guest:
+ (gdb) target remote :1234
+
+
+Examples of using the Linux-provided gdb helpers
+------------------------------------------------
+
+ o Load module (and main kernel) symbols:
+ (gdb) lx-symbols
+ loading vmlinux
+ scanning for modules in /home/user/linux/build
+ loading @0xffffffffa0020000: /home/user/linux/build/net/netfilter/xt_tcpudp.ko
+ loading @0xffffffffa0016000: /home/user/linux/build/net/netfilter/xt_pkttype.ko
+ loading @0xffffffffa0002000: /home/user/linux/build/net/netfilter/xt_limit.ko
+ loading @0xffffffffa00ca000: /home/user/linux/build/net/packet/af_packet.ko
+ loading @0xffffffffa003c000: /home/user/linux/build/fs/fuse/fuse.ko
+ ...
+ loading @0xffffffffa0000000: /home/user/linux/build/drivers/ata/ata_generic.ko
+
+ o Set a breakpoint on some not yet loaded module function, e.g.:
+ (gdb) b btrfs_init_sysfs
+ Function "btrfs_init_sysfs" not defined.
+ Make breakpoint pending on future shared library load? (y or [n]) y
+ Breakpoint 1 (btrfs_init_sysfs) pending.
+
+ o Continue the target
+ (gdb) c
+
+ o Load the module on the target and watch the symbols being loaded as well as
+ the breakpoint hit:
+ loading @0xffffffffa0034000: /home/user/linux/build/lib/libcrc32c.ko
+ loading @0xffffffffa0050000: /home/user/linux/build/lib/lzo/lzo_compress.ko
+ loading @0xffffffffa006e000: /home/user/linux/build/lib/zlib_deflate/zlib_deflate.ko
+ loading @0xffffffffa01b1000: /home/user/linux/build/fs/btrfs/btrfs.ko
+
+ Breakpoint 1, btrfs_init_sysfs () at /home/user/linux/fs/btrfs/sysfs.c:36
+ 36 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj);
+
+ o Dump the log buffer of the target kernel:
+ (gdb) lx-dmesg
+ [ 0.000000] Initializing cgroup subsys cpuset
+ [ 0.000000] Initializing cgroup subsys cpu
+ [ 0.000000] Linux version 3.8.0-rc4-dbg+ (...
+ [ 0.000000] Command line: root=/dev/sda2 resume=/dev/sda1 vga=0x314
+ [ 0.000000] e820: BIOS-provided physical RAM map:
+ [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable
+ [ 0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
+ ....
+
+ o Examine fields of the current task struct:
+ (gdb) p $lx_current().pid
+ $1 = 4998
+ (gdb) p $lx_current().comm
+ $2 = "modprobe\000\000\000\000\000\000\000"
+
+ o Make use of the per-cpu function for the current or a specified CPU:
+ (gdb) p $lx_per_cpu("runqueues").nr_running
+ $3 = 1
+ (gdb) p $lx_per_cpu("runqueues", 2).nr_running
+ $4 = 0
+
+ o Dig into hrtimers using the container_of helper:
+ (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next
+ (gdb) p *$container_of($next, "struct hrtimer", "node")
+ $5 = {
+ node = {
+ node = {
+ __rb_parent_color = 18446612133355256072,
+ rb_right = 0x0 <irq_stack_union>,
+ rb_left = 0x0 <irq_stack_union>
+ },
+ expires = {
+ tv64 = 1835268000000
+ }
+ },
+ _softexpires = {
+ tv64 = 1835268000000
+ },
+ function = 0xffffffff81078232 <tick_sched_timer>,
+ base = 0xffff88003fd0d6f0,
+ state = 1,
+ start_pid = 0,
+ start_site = 0xffffffff81055c1f <hrtimer_start_range_ns+20>,
+ start_comm = "swapper/2\000\000\000\000\000\000"
+ }
+
+
+List of commands and functions
+------------------------------
+
+The number of commands and convenience functions may evolve over the time,
+this is just a snapshot of the initial version:
+
+ (gdb) apropos lx
+ function lx_current -- Return current task
+ function lx_module -- Find module by name and return the module variable
+ function lx_per_cpu -- Return per-cpu variable
+ function lx_task_by_pid -- Find Linux task by PID and return the task_struct variable
+ function lx_thread_info -- Calculate Linux thread_info from task variable
+ lx-dmesg -- Print Linux kernel log buffer
+ lx-lsmod -- List currently loaded modules
+ lx-symbols -- (Re-)load symbols of Linux kernel and currently loaded modules
+
+Detailed help can be obtained via "help <command-name>" for commands and "help
+function <function-name>" for convenience functions.
diff --git a/Documentation/gpio/00-INDEX b/Documentation/gpio/00-INDEX
new file mode 100644
index 000000000..1de43ae46
--- /dev/null
+++ b/Documentation/gpio/00-INDEX
@@ -0,0 +1,14 @@
+00-INDEX
+ - This file
+gpio.txt
+ - Introduction to GPIOs and their kernel interfaces
+consumer.txt
+ - How to obtain and use GPIOs in a driver
+driver.txt
+ - How to write a GPIO driver
+board.txt
+ - How to assign GPIOs to a consumer device and a function
+sysfs.txt
+ - Information about the GPIO sysfs interface
+gpio-legacy.txt
+ - Historical documentation of the deprecated GPIO integer interface
diff --git a/Documentation/gpio/board.txt b/Documentation/gpio/board.txt
new file mode 100644
index 000000000..b80606de5
--- /dev/null
+++ b/Documentation/gpio/board.txt
@@ -0,0 +1,154 @@
+GPIO Mappings
+=============
+
+This document explains how GPIOs can be assigned to given devices and functions.
+Note that it only applies to the new descriptor-based interface. For a
+description of the deprecated integer-based GPIO interface please refer to
+gpio-legacy.txt (actually, there is no real mapping possible with the old
+interface; you just fetch an integer from somewhere and request the
+corresponding GPIO.
+
+Platforms that make use of GPIOs must select ARCH_REQUIRE_GPIOLIB (if GPIO usage
+is mandatory) or ARCH_WANT_OPTIONAL_GPIOLIB (if GPIO support can be omitted) in
+their Kconfig. Then, how GPIOs are mapped depends on what the platform uses to
+describe its hardware layout. Currently, mappings can be defined through device
+tree, ACPI, and platform data.
+
+Device Tree
+-----------
+GPIOs can easily be mapped to devices and functions in the device tree. The
+exact way to do it depends on the GPIO controller providing the GPIOs, see the
+device tree bindings for your controller.
+
+GPIOs mappings are defined in the consumer device's node, in a property named
+<function>-gpios, where <function> is the function the driver will request
+through gpiod_get(). For example:
+
+ foo_device {
+ compatible = "acme,foo";
+ ...
+ led-gpios = <&gpio 15 GPIO_ACTIVE_HIGH>, /* red */
+ <&gpio 16 GPIO_ACTIVE_HIGH>, /* green */
+ <&gpio 17 GPIO_ACTIVE_HIGH>; /* blue */
+
+ power-gpios = <&gpio 1 GPIO_ACTIVE_LOW>;
+ };
+
+This property will make GPIOs 15, 16 and 17 available to the driver under the
+"led" function, and GPIO 1 as the "power" GPIO:
+
+ struct gpio_desc *red, *green, *blue, *power;
+
+ red = gpiod_get_index(dev, "led", 0);
+ green = gpiod_get_index(dev, "led", 1);
+ blue = gpiod_get_index(dev, "led", 2);
+
+ power = gpiod_get(dev, "power");
+
+The led GPIOs will be active-high, while the power GPIO will be active-low (i.e.
+gpiod_is_active_low(power) will be true).
+
+ACPI
+----
+ACPI also supports function names for GPIOs in a similar fashion to DT.
+The above DT example can be converted to an equivalent ACPI description
+with the help of _DSD (Device Specific Data), introduced in ACPI 5.1:
+
+ Device (FOO) {
+ Name (_CRS, ResourceTemplate () {
+ GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
+ "\\_SB.GPI0") {15} // red
+ GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
+ "\\_SB.GPI0") {16} // green
+ GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
+ "\\_SB.GPI0") {17} // blue
+ GpioIo (Exclusive, ..., IoRestrictionOutputOnly,
+ "\\_SB.GPI0") {1} // power
+ })
+
+ Name (_DSD, Package () {
+ ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
+ Package () {
+ Package () {
+ "led-gpios",
+ Package () {
+ ^FOO, 0, 0, 1,
+ ^FOO, 1, 0, 1,
+ ^FOO, 2, 0, 1,
+ }
+ },
+ Package () {
+ "power-gpios",
+ Package () {^FOO, 3, 0, 0},
+ },
+ }
+ })
+ }
+
+For more information about the ACPI GPIO bindings see
+Documentation/acpi/gpio-properties.txt.
+
+Platform Data
+-------------
+Finally, GPIOs can be bound to devices and functions using platform data. Board
+files that desire to do so need to include the following header:
+
+ #include <linux/gpio/machine.h>
+
+GPIOs are mapped by the means of tables of lookups, containing instances of the
+gpiod_lookup structure. Two macros are defined to help declaring such mappings:
+
+ GPIO_LOOKUP(chip_label, chip_hwnum, dev_id, con_id, flags)
+ GPIO_LOOKUP_IDX(chip_label, chip_hwnum, dev_id, con_id, idx, flags)
+
+where
+
+ - chip_label is the label of the gpiod_chip instance providing the GPIO
+ - chip_hwnum is the hardware number of the GPIO within the chip
+ - dev_id is the identifier of the device that will make use of this GPIO. It
+ can be NULL, in which case it will be matched for calls to gpiod_get()
+ with a NULL device.
+ - con_id is the name of the GPIO function from the device point of view. It
+ can be NULL, in which case it will match any function.
+ - idx is the index of the GPIO within the function.
+ - flags is defined to specify the following properties:
+ * GPIOF_ACTIVE_LOW - to configure the GPIO as active-low
+ * GPIOF_OPEN_DRAIN - GPIO pin is open drain type.
+ * GPIOF_OPEN_SOURCE - GPIO pin is open source type.
+
+In the future, these flags might be extended to support more properties.
+
+Note that GPIO_LOOKUP() is just a shortcut to GPIO_LOOKUP_IDX() where idx = 0.
+
+A lookup table can then be defined as follows, with an empty entry defining its
+end:
+
+struct gpiod_lookup_table gpios_table = {
+ .dev_id = "foo.0",
+ .table = {
+ GPIO_LOOKUP_IDX("gpio.0", 15, "led", 0, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio.0", 16, "led", 1, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP_IDX("gpio.0", 17, "led", 2, GPIO_ACTIVE_HIGH),
+ GPIO_LOOKUP("gpio.0", 1, "power", GPIO_ACTIVE_LOW),
+ { },
+ },
+};
+
+And the table can be added by the board code as follows:
+
+ gpiod_add_lookup_table(&gpios_table);
+
+The driver controlling "foo.0" will then be able to obtain its GPIOs as follows:
+
+ struct gpio_desc *red, *green, *blue, *power;
+
+ red = gpiod_get_index(dev, "led", 0);
+ green = gpiod_get_index(dev, "led", 1);
+ blue = gpiod_get_index(dev, "led", 2);
+
+ power = gpiod_get(dev, "power");
+ gpiod_direction_output(power, 1);
+
+Since the "power" GPIO is mapped as active-low, its actual signal will be 0
+after this code. Contrary to the legacy integer GPIO interface, the active-low
+property is handled during mapping and is thus transparent to GPIO consumers.
diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt
new file mode 100644
index 000000000..c21c1313f
--- /dev/null
+++ b/Documentation/gpio/consumer.txt
@@ -0,0 +1,340 @@
+GPIO Descriptor Consumer Interface
+==================================
+
+This document describes the consumer interface of the GPIO framework. Note that
+it describes the new descriptor-based interface. For a description of the
+deprecated integer-based GPIO interface please refer to gpio-legacy.txt.
+
+
+Guidelines for GPIOs consumers
+==============================
+
+Drivers that can't work without standard GPIO calls should have Kconfig entries
+that depend on GPIOLIB. The functions that allow a driver to obtain and use
+GPIOs are available by including the following file:
+
+ #include <linux/gpio/consumer.h>
+
+All the functions that work with the descriptor-based GPIO interface are
+prefixed with gpiod_. The gpio_ prefix is used for the legacy interface. No
+other function in the kernel should use these prefixes.
+
+
+Obtaining and Disposing GPIOs
+=============================
+
+With the descriptor-based interface, GPIOs are identified with an opaque,
+non-forgeable handler that must be obtained through a call to one of the
+gpiod_get() functions. Like many other kernel subsystems, gpiod_get() takes the
+device that will use the GPIO and the function the requested GPIO is supposed to
+fulfill:
+
+ struct gpio_desc *gpiod_get(struct device *dev, const char *con_id,
+ enum gpiod_flags flags)
+
+If a function is implemented by using several GPIOs together (e.g. a simple LED
+device that displays digits), an additional index argument can be specified:
+
+ struct gpio_desc *gpiod_get_index(struct device *dev,
+ const char *con_id, unsigned int idx,
+ enum gpiod_flags flags)
+
+The flags parameter is used to optionally specify a direction and initial value
+for the GPIO. Values can be:
+
+* GPIOD_ASIS or 0 to not initialize the GPIO at all. The direction must be set
+ later with one of the dedicated functions.
+* GPIOD_IN to initialize the GPIO as input.
+* GPIOD_OUT_LOW to initialize the GPIO as output with a value of 0.
+* GPIOD_OUT_HIGH to initialize the GPIO as output with a value of 1.
+
+Both functions return either a valid GPIO descriptor, or an error code checkable
+with IS_ERR() (they will never return a NULL pointer). -ENOENT will be returned
+if and only if no GPIO has been assigned to the device/function/index triplet,
+other error codes are used for cases where a GPIO has been assigned but an error
+occurred while trying to acquire it. This is useful to discriminate between mere
+errors and an absence of GPIO for optional GPIO parameters. For the common
+pattern where a GPIO is optional, the gpiod_get_optional() and
+gpiod_get_index_optional() functions can be used. These functions return NULL
+instead of -ENOENT if no GPIO has been assigned to the requested function:
+
+ struct gpio_desc *gpiod_get_optional(struct device *dev,
+ const char *con_id,
+ enum gpiod_flags flags)
+
+ struct gpio_desc *gpiod_get_index_optional(struct device *dev,
+ const char *con_id,
+ unsigned int index,
+ enum gpiod_flags flags)
+
+For a function using multiple GPIOs all of those can be obtained with one call:
+
+ struct gpio_descs *gpiod_get_array(struct device *dev,
+ const char *con_id,
+ enum gpiod_flags flags)
+
+This function returns a struct gpio_descs which contains an array of
+descriptors:
+
+ struct gpio_descs {
+ unsigned int ndescs;
+ struct gpio_desc *desc[];
+ }
+
+The following function returns NULL instead of -ENOENT if no GPIOs have been
+assigned to the requested function:
+
+ struct gpio_descs *gpiod_get_array_optional(struct device *dev,
+ const char *con_id,
+ enum gpiod_flags flags)
+
+Device-managed variants of these functions are also defined:
+
+ struct gpio_desc *devm_gpiod_get(struct device *dev, const char *con_id,
+ enum gpiod_flags flags)
+
+ struct gpio_desc *devm_gpiod_get_index(struct device *dev,
+ const char *con_id,
+ unsigned int idx,
+ enum gpiod_flags flags)
+
+ struct gpio_desc *devm_gpiod_get_optional(struct device *dev,
+ const char *con_id,
+ enum gpiod_flags flags)
+
+ struct gpio_desc *devm_gpiod_get_index_optional(struct device *dev,
+ const char *con_id,
+ unsigned int index,
+ enum gpiod_flags flags)
+
+ struct gpio_descs *devm_gpiod_get_array(struct device *dev,
+ const char *con_id,
+ enum gpiod_flags flags)
+
+ struct gpio_descs *devm_gpiod_get_array_optional(struct device *dev,
+ const char *con_id,
+ enum gpiod_flags flags)
+
+A GPIO descriptor can be disposed of using the gpiod_put() function:
+
+ void gpiod_put(struct gpio_desc *desc)
+
+For an array of GPIOs this function can be used:
+
+ void gpiod_put_array(struct gpio_descs *descs)
+
+It is strictly forbidden to use a descriptor after calling these functions.
+It is also not allowed to individually release descriptors (using gpiod_put())
+from an array acquired with gpiod_get_array().
+
+The device-managed variants are, unsurprisingly:
+
+ void devm_gpiod_put(struct device *dev, struct gpio_desc *desc)
+
+ void devm_gpiod_put_array(struct device *dev, struct gpio_descs *descs)
+
+
+Using GPIOs
+===========
+
+Setting Direction
+-----------------
+The first thing a driver must do with a GPIO is setting its direction. If no
+direction-setting flags have been given to gpiod_get*(), this is done by
+invoking one of the gpiod_direction_*() functions:
+
+ int gpiod_direction_input(struct gpio_desc *desc)
+ int gpiod_direction_output(struct gpio_desc *desc, int value)
+
+The return value is zero for success, else a negative errno. It should be
+checked, since the get/set calls don't return errors and since misconfiguration
+is possible. You should normally issue these calls from a task context. However,
+for spinlock-safe GPIOs it is OK to use them before tasking is enabled, as part
+of early board setup.
+
+For output GPIOs, the value provided becomes the initial output value. This
+helps avoid signal glitching during system startup.
+
+A driver can also query the current direction of a GPIO:
+
+ int gpiod_get_direction(const struct gpio_desc *desc)
+
+This function will return either GPIOF_DIR_IN or GPIOF_DIR_OUT.
+
+Be aware that there is no default direction for GPIOs. Therefore, **using a GPIO
+without setting its direction first is illegal and will result in undefined
+behavior!**
+
+
+Spinlock-Safe GPIO Access
+-------------------------
+Most GPIO controllers can be accessed with memory read/write instructions. Those
+don't need to sleep, and can safely be done from inside hard (non-threaded) IRQ
+handlers and similar contexts.
+
+Use the following calls to access GPIOs from an atomic context:
+
+ int gpiod_get_value(const struct gpio_desc *desc);
+ void gpiod_set_value(struct gpio_desc *desc, int value);
+
+The values are boolean, zero for low, nonzero for high. When reading the value
+of an output pin, the value returned should be what's seen on the pin. That
+won't always match the specified output value, because of issues including
+open-drain signaling and output latencies.
+
+The get/set calls do not return errors because "invalid GPIO" should have been
+reported earlier from gpiod_direction_*(). However, note that not all platforms
+can read the value of output pins; those that can't should always return zero.
+Also, using these calls for GPIOs that can't safely be accessed without sleeping
+(see below) is an error.
+
+
+GPIO Access That May Sleep
+--------------------------
+Some GPIO controllers must be accessed using message based buses like I2C or
+SPI. Commands to read or write those GPIO values require waiting to get to the
+head of a queue to transmit a command and get its response. This requires
+sleeping, which can't be done from inside IRQ handlers.
+
+Platforms that support this type of GPIO distinguish them from other GPIOs by
+returning nonzero from this call:
+
+ int gpiod_cansleep(const struct gpio_desc *desc)
+
+To access such GPIOs, a different set of accessors is defined:
+
+ int gpiod_get_value_cansleep(const struct gpio_desc *desc)
+ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
+
+Accessing such GPIOs requires a context which may sleep, for example a threaded
+IRQ handler, and those accessors must be used instead of spinlock-safe
+accessors without the cansleep() name suffix.
+
+Other than the fact that these accessors might sleep, and will work on GPIOs
+that can't be accessed from hardIRQ handlers, these calls act the same as the
+spinlock-safe calls.
+
+
+Active-low State and Raw GPIO Values
+------------------------------------
+Device drivers like to manage the logical state of a GPIO, i.e. the value their
+device will actually receive, no matter what lies between it and the GPIO line.
+In some cases, it might make sense to control the actual GPIO line value. The
+following set of calls ignore the active-low property of a GPIO and work on the
+raw line value:
+
+ int gpiod_get_raw_value(const struct gpio_desc *desc)
+ void gpiod_set_raw_value(struct gpio_desc *desc, int value)
+ int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
+ void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value)
+ int gpiod_direction_output_raw(struct gpio_desc *desc, int value)
+
+The active-low state of a GPIO can also be queried using the following call:
+
+ int gpiod_is_active_low(const struct gpio_desc *desc)
+
+Note that these functions should only be used with great moderation ; a driver
+should not have to care about the physical line level.
+
+
+Set multiple GPIO outputs with a single function call
+-----------------------------------------------------
+The following functions set the output values of an array of GPIOs:
+
+ void gpiod_set_array(unsigned int array_size,
+ struct gpio_desc **desc_array,
+ int *value_array)
+ void gpiod_set_raw_array(unsigned int array_size,
+ struct gpio_desc **desc_array,
+ int *value_array)
+ void gpiod_set_array_cansleep(unsigned int array_size,
+ struct gpio_desc **desc_array,
+ int *value_array)
+ void gpiod_set_raw_array_cansleep(unsigned int array_size,
+ struct gpio_desc **desc_array,
+ int *value_array)
+
+The array can be an arbitrary set of GPIOs. The functions will try to set
+GPIOs belonging to the same bank or chip simultaneously if supported by the
+corresponding chip driver. In that case a significantly improved performance
+can be expected. If simultaneous setting is not possible the GPIOs will be set
+sequentially.
+
+The gpiod_set_array() functions take three arguments:
+ * array_size - the number of array elements
+ * desc_array - an array of GPIO descriptors
+ * value_array - an array of values to assign to the GPIOs
+
+The descriptor array can be obtained using the gpiod_get_array() function
+or one of its variants. If the group of descriptors returned by that function
+matches the desired group of GPIOs, those GPIOs can be set by simply using
+the struct gpio_descs returned by gpiod_get_array():
+
+ struct gpio_descs *my_gpio_descs = gpiod_get_array(...);
+ gpiod_set_array(my_gpio_descs->ndescs, my_gpio_descs->desc,
+ my_gpio_values);
+
+It is also possible to set a completely arbitrary array of descriptors. The
+descriptors may be obtained using any combination of gpiod_get() and
+gpiod_get_array(). Afterwards the array of descriptors has to be setup
+manually before it can be used with gpiod_set_array().
+
+Note that for optimal performance GPIOs belonging to the same chip should be
+contiguous within the array of descriptors.
+
+
+GPIOs mapped to IRQs
+--------------------
+GPIO lines can quite often be used as IRQs. You can get the IRQ number
+corresponding to a given GPIO using the following call:
+
+ int gpiod_to_irq(const struct gpio_desc *desc)
+
+It will return an IRQ number, or an negative errno code if the mapping can't be
+done (most likely because that particular GPIO cannot be used as IRQ). It is an
+unchecked error to use a GPIO that wasn't set up as an input using
+gpiod_direction_input(), or to use an IRQ number that didn't originally come
+from gpiod_to_irq(). gpiod_to_irq() is not allowed to sleep.
+
+Non-error values returned from gpiod_to_irq() can be passed to request_irq() or
+free_irq(). They will often be stored into IRQ resources for platform devices,
+by the board-specific initialization code. Note that IRQ trigger options are
+part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are system wakeup
+capabilities.
+
+
+GPIOs and ACPI
+==============
+
+On ACPI systems, GPIOs are described by GpioIo()/GpioInt() resources listed by
+the _CRS configuration objects of devices. Those resources do not provide
+connection IDs (names) for GPIOs, so it is necessary to use an additional
+mechanism for this purpose.
+
+Systems compliant with ACPI 5.1 or newer may provide a _DSD configuration object
+which, among other things, may be used to provide connection IDs for specific
+GPIOs described by the GpioIo()/GpioInt() resources in _CRS. If that is the
+case, it will be handled by the GPIO subsystem automatically. However, if the
+_DSD is not present, the mappings between GpioIo()/GpioInt() resources and GPIO
+connection IDs need to be provided by device drivers.
+
+For details refer to Documentation/acpi/gpio-properties.txt
+
+
+Interacting With the Legacy GPIO Subsystem
+==========================================
+Many kernel subsystems still handle GPIOs using the legacy integer-based
+interface. Although it is strongly encouraged to upgrade them to the safer
+descriptor-based API, the following two functions allow you to convert a GPIO
+descriptor into the GPIO integer namespace and vice-versa:
+
+ int desc_to_gpio(const struct gpio_desc *desc)
+ struct gpio_desc *gpio_to_desc(unsigned gpio)
+
+The GPIO number returned by desc_to_gpio() can be safely used as long as the
+GPIO descriptor has not been freed. All the same, a GPIO number passed to
+gpio_to_desc() must have been properly acquired, and usage of the returned GPIO
+descriptor is only possible after the GPIO number has been released.
+
+Freeing a GPIO obtained by one API with the other API is forbidden and an
+unchecked error.
diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
new file mode 100644
index 000000000..90d0f6aba
--- /dev/null
+++ b/Documentation/gpio/driver.txt
@@ -0,0 +1,192 @@
+GPIO Descriptor Driver Interface
+================================
+
+This document serves as a guide for GPIO chip drivers writers. Note that it
+describes the new descriptor-based interface. For a description of the
+deprecated integer-based GPIO interface please refer to gpio-legacy.txt.
+
+Each GPIO controller driver needs to include the following header, which defines
+the structures used to define a GPIO driver:
+
+ #include <linux/gpio/driver.h>
+
+
+Internal Representation of GPIOs
+================================
+
+Inside a GPIO driver, individual GPIOs are identified by their hardware number,
+which is a unique number between 0 and n, n being the number of GPIOs managed by
+the chip. This number is purely internal: the hardware number of a particular
+GPIO descriptor is never made visible outside of the driver.
+
+On top of this internal number, each GPIO also need to have a global number in
+the integer GPIO namespace so that it can be used with the legacy GPIO
+interface. Each chip must thus have a "base" number (which can be automatically
+assigned), and for each GPIO the global number will be (base + hardware number).
+Although the integer representation is considered deprecated, it still has many
+users and thus needs to be maintained.
+
+So for example one platform could use numbers 32-159 for GPIOs, with a
+controller defining 128 GPIOs at a "base" of 32 ; while another platform uses
+numbers 0..63 with one set of GPIO controllers, 64-79 with another type of GPIO
+controller, and on one particular board 80-95 with an FPGA. The numbers need not
+be contiguous; either of those platforms could also use numbers 2000-2063 to
+identify GPIOs in a bank of I2C GPIO expanders.
+
+
+Controller Drivers: gpio_chip
+=============================
+
+In the gpiolib framework each GPIO controller is packaged as a "struct
+gpio_chip" (see linux/gpio/driver.h for its complete definition) with members
+common to each controller of that type:
+
+ - methods to establish GPIO direction
+ - methods used to access GPIO values
+ - method to return the IRQ number associated to a given GPIO
+ - flag saying whether calls to its methods may sleep
+ - optional debugfs dump method (showing extra state like pullup config)
+ - optional base number (will be automatically assigned if omitted)
+ - label for diagnostics and GPIOs mapping using platform data
+
+The code implementing a gpio_chip should support multiple instances of the
+controller, possibly using the driver model. That code will configure each
+gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be rare;
+use gpiochip_remove() when it is unavoidable.
+
+Most often a gpio_chip is part of an instance-specific structure with state not
+exposed by the GPIO interfaces, such as addressing, power management, and more.
+Chips such as codecs will have complex non-GPIO state.
+
+Any debugfs dump method should normally ignore signals which haven't been
+requested as GPIOs. They can use gpiochip_is_requested(), which returns either
+NULL or the label associated with that GPIO when it was requested.
+
+
+GPIO drivers providing IRQs
+---------------------------
+It is custom that GPIO drivers (GPIO chips) are also providing interrupts,
+most often cascaded off a parent interrupt controller, and in some special
+cases the GPIO logic is melded with a SoC's primary interrupt controller.
+
+The IRQ portions of the GPIO block are implemented using an irqchip, using
+the header <linux/irq.h>. So basically such a driver is utilizing two sub-
+systems simultaneously: gpio and irq.
+
+GPIO irqchips usually fall in one of two categories:
+
+* CHAINED GPIO irqchips: these are usually the type that is embedded on
+ an SoC. This means that there is a fast IRQ handler for the GPIOs that
+ gets called in a chain from the parent IRQ handler, most typically the
+ system interrupt controller. This means the GPIO irqchip is registered
+ using irq_set_chained_handler() or the corresponding
+ gpiochip_set_chained_irqchip() helper function, and the GPIO irqchip
+ handler will be called immediately from the parent irqchip, while
+ holding the IRQs disabled. The GPIO irqchip will then end up calling
+ something like this sequence in its interrupt handler:
+
+ static irqreturn_t tc3589x_gpio_irq(int irq, void *data)
+ chained_irq_enter(...);
+ generic_handle_irq(...);
+ chained_irq_exit(...);
+
+ Chained GPIO irqchips typically can NOT set the .can_sleep flag on
+ struct gpio_chip, as everything happens directly in the callbacks.
+
+* NESTED THREADED GPIO irqchips: these are off-chip GPIO expanders and any
+ other GPIO irqchip residing on the other side of a sleeping bus. Of course
+ such drivers that need slow bus traffic to read out IRQ status and similar,
+ traffic which may in turn incur other IRQs to happen, cannot be handled
+ in a quick IRQ handler with IRQs disabled. Instead they need to spawn a
+ thread and then mask the parent IRQ line until the interrupt is handled
+ by the driver. The hallmark of this driver is to call something like
+ this in its interrupt handler:
+
+ static irqreturn_t tc3589x_gpio_irq(int irq, void *data)
+ ...
+ handle_nested_irq(irq);
+
+ The hallmark of threaded GPIO irqchips is that they set the .can_sleep
+ flag on struct gpio_chip to true, indicating that this chip may sleep
+ when accessing the GPIOs.
+
+To help out in handling the set-up and management of GPIO irqchips and the
+associated irqdomain and resource allocation callbacks, the gpiolib has
+some helpers that can be enabled by selecting the GPIOLIB_IRQCHIP Kconfig
+symbol:
+
+* gpiochip_irqchip_add(): adds an irqchip to a gpiochip. It will pass
+ the struct gpio_chip* for the chip to all IRQ callbacks, so the callbacks
+ need to embed the gpio_chip in its state container and obtain a pointer
+ to the container using container_of().
+ (See Documentation/driver-model/design-patterns.txt)
+
+* gpiochip_set_chained_irqchip(): sets up a chained irq handler for a
+ gpio_chip from a parent IRQ and passes the struct gpio_chip* as handler
+ data. (Notice handler data, since the irqchip data is likely used by the
+ parent irqchip!) This is for the chained type of chip. This is also used
+ to set up a nested irqchip if NULL is passed as handler.
+
+To use the helpers please keep the following in mind:
+
+- Make sure to assign all relevant members of the struct gpio_chip so that
+ the irqchip can initialize. E.g. .dev and .can_sleep shall be set up
+ properly.
+
+It is legal for any IRQ consumer to request an IRQ from any irqchip no matter
+if that is a combined GPIO+IRQ driver. The basic premise is that gpio_chip and
+irq_chip are orthogonal, and offering their services independent of each
+other.
+
+gpiod_to_irq() is just a convenience function to figure out the IRQ for a
+certain GPIO line and should not be relied upon to have been called before
+the IRQ is used.
+
+So always prepare the hardware and make it ready for action in respective
+callbacks from the GPIO and irqchip APIs. Do not rely on gpiod_to_irq() having
+been called first.
+
+This orthogonality leads to ambiguities that we need to solve: if there is
+competition inside the subsystem which side is using the resource (a certain
+GPIO line and register for example) it needs to deny certain operations and
+keep track of usage inside of the gpiolib subsystem. This is why the API
+below exists.
+
+
+Locking IRQ usage
+-----------------
+Input GPIOs can be used as IRQ signals. When this happens, a driver is requested
+to mark the GPIO as being used as an IRQ:
+
+ int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+
+This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock
+is released:
+
+ void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
+
+When implementing an irqchip inside a GPIO driver, these two functions should
+typically be called in the .startup() and .shutdown() callbacks from the
+irqchip.
+
+
+Requesting self-owned GPIO pins
+-------------------------------
+
+Sometimes it is useful to allow a GPIO chip driver to request its own GPIO
+descriptors through the gpiolib API. Using gpio_request() for this purpose
+does not help since it pins the module to the kernel forever (it calls
+try_module_get()). A GPIO driver can use the following functions instead
+to request and free descriptors without being pinned to the kernel forever.
+
+ struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc,
+ const char *label)
+
+ void gpiochip_free_own_desc(struct gpio_desc *desc)
+
+Descriptors requested with gpiochip_request_own_desc() must be released with
+gpiochip_free_own_desc().
+
+These functions must be used with care since they do not affect module use
+count. Do not use the functions to request gpio descriptors not owned by the
+calling driver.
diff --git a/Documentation/gpio/gpio-legacy.txt b/Documentation/gpio/gpio-legacy.txt
new file mode 100644
index 000000000..6f83fa965
--- /dev/null
+++ b/Documentation/gpio/gpio-legacy.txt
@@ -0,0 +1,775 @@
+GPIO Interfaces
+
+This provides an overview of GPIO access conventions on Linux.
+
+These calls use the gpio_* naming prefix. No other calls should use that
+prefix, or the related __gpio_* prefix.
+
+
+What is a GPIO?
+===============
+A "General Purpose Input/Output" (GPIO) is a flexible software-controlled
+digital signal. They are provided from many kinds of chip, and are familiar
+to Linux developers working with embedded and custom hardware. Each GPIO
+represents a bit connected to a particular pin, or "ball" on Ball Grid Array
+(BGA) packages. Board schematics show which external hardware connects to
+which GPIOs. Drivers can be written generically, so that board setup code
+passes such pin configuration data to drivers.
+
+System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every
+non-dedicated pin can be configured as a GPIO; and most chips have at least
+several dozen of them. Programmable logic devices (like FPGAs) can easily
+provide GPIOs; multifunction chips like power managers, and audio codecs
+often have a few such pins to help with pin scarcity on SOCs; and there are
+also "GPIO Expander" chips that connect using the I2C or SPI serial busses.
+Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS
+firmware knowing how they're used).
+
+The exact capabilities of GPIOs vary between systems. Common options:
+
+ - Output values are writable (high=1, low=0). Some chips also have
+ options about how that value is driven, so that for example only one
+ value might be driven ... supporting "wire-OR" and similar schemes
+ for the other value (notably, "open drain" signaling).
+
+ - Input values are likewise readable (1, 0). Some chips support readback
+ of pins configured as "output", which is very useful in such "wire-OR"
+ cases (to support bidirectional signaling). GPIO controllers may have
+ input de-glitch/debounce logic, sometimes with software controls.
+
+ - Inputs can often be used as IRQ signals, often edge triggered but
+ sometimes level triggered. Such IRQs may be configurable as system
+ wakeup events, to wake the system from a low power state.
+
+ - Usually a GPIO will be configurable as either input or output, as needed
+ by different product boards; single direction ones exist too.
+
+ - Most GPIOs can be accessed while holding spinlocks, but those accessed
+ through a serial bus normally can't. Some systems support both types.
+
+On a given board each GPIO is used for one specific purpose like monitoring
+MMC/SD card insertion/removal, detecting card writeprotect status, driving
+a LED, configuring a transceiver, bitbanging a serial bus, poking a hardware
+watchdog, sensing a switch, and so on.
+
+
+GPIO conventions
+================
+Note that this is called a "convention" because you don't need to do it this
+way, and it's no crime if you don't. There **are** cases where portability
+is not the main issue; GPIOs are often used for the kind of board-specific
+glue logic that may even change between board revisions, and can't ever be
+used on a board that's wired differently. Only least-common-denominator
+functionality can be very portable. Other features are platform-specific,
+and that can be critical for glue logic.
+
+Plus, this doesn't require any implementation framework, just an interface.
+One platform might implement it as simple inline functions accessing chip
+registers; another might implement it by delegating through abstractions
+used for several very different kinds of GPIO controller. (There is some
+optional code supporting such an implementation strategy, described later
+in this document, but drivers acting as clients to the GPIO interface must
+not care how it's implemented.)
+
+That said, if the convention is supported on their platform, drivers should
+use it when possible. Platforms must select ARCH_REQUIRE_GPIOLIB or
+ARCH_WANT_OPTIONAL_GPIOLIB in their Kconfig. Drivers that can't work without
+standard GPIO calls should have Kconfig entries which depend on GPIOLIB. The
+GPIO calls are available, either as "real code" or as optimized-away stubs,
+when drivers use the include file:
+
+ #include <linux/gpio.h>
+
+If you stick to this convention then it'll be easier for other developers to
+see what your code is doing, and help maintain it.
+
+Note that these operations include I/O barriers on platforms which need to
+use them; drivers don't need to add them explicitly.
+
+
+Identifying GPIOs
+-----------------
+GPIOs are identified by unsigned integers in the range 0..MAX_INT. That
+reserves "negative" numbers for other purposes like marking signals as
+"not available on this board", or indicating faults. Code that doesn't
+touch the underlying hardware treats these integers as opaque cookies.
+
+Platforms define how they use those integers, and usually #define symbols
+for the GPIO lines so that board-specific setup code directly corresponds
+to the relevant schematics. In contrast, drivers should only use GPIO
+numbers passed to them from that setup code, using platform_data to hold
+board-specific pin configuration data (along with other board specific
+data they need). That avoids portability problems.
+
+So for example one platform uses numbers 32-159 for GPIOs; while another
+uses numbers 0..63 with one set of GPIO controllers, 64-79 with another
+type of GPIO controller, and on one particular board 80-95 with an FPGA.
+The numbers need not be contiguous; either of those platforms could also
+use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders.
+
+If you want to initialize a structure with an invalid GPIO number, use
+some negative number (perhaps "-EINVAL"); that will never be valid. To
+test if such number from such a structure could reference a GPIO, you
+may use this predicate:
+
+ int gpio_is_valid(int number);
+
+A number that's not valid will be rejected by calls which may request
+or free GPIOs (see below). Other numbers may also be rejected; for
+example, a number might be valid but temporarily unused on a given board.
+
+Whether a platform supports multiple GPIO controllers is a platform-specific
+implementation issue, as are whether that support can leave "holes" in the space
+of GPIO numbers, and whether new controllers can be added at runtime. Such issues
+can affect things including whether adjacent GPIO numbers are both valid.
+
+Using GPIOs
+-----------
+The first thing a system should do with a GPIO is allocate it, using
+the gpio_request() call; see later.
+
+One of the next things to do with a GPIO, often in board setup code when
+setting up a platform_device using the GPIO, is mark its direction:
+
+ /* set as input or output, returning 0 or negative errno */
+ int gpio_direction_input(unsigned gpio);
+ int gpio_direction_output(unsigned gpio, int value);
+
+The return value is zero for success, else a negative errno. It should
+be checked, since the get/set calls don't have error returns and since
+misconfiguration is possible. You should normally issue these calls from
+a task context. However, for spinlock-safe GPIOs it's OK to use them
+before tasking is enabled, as part of early board setup.
+
+For output GPIOs, the value provided becomes the initial output value.
+This helps avoid signal glitching during system startup.
+
+For compatibility with legacy interfaces to GPIOs, setting the direction
+of a GPIO implicitly requests that GPIO (see below) if it has not been
+requested already. That compatibility is being removed from the optional
+gpiolib framework.
+
+Setting the direction can fail if the GPIO number is invalid, or when
+that particular GPIO can't be used in that mode. It's generally a bad
+idea to rely on boot firmware to have set the direction correctly, since
+it probably wasn't validated to do more than boot Linux. (Similarly,
+that board setup code probably needs to multiplex that pin as a GPIO,
+and configure pullups/pulldowns appropriately.)
+
+
+Spinlock-Safe GPIO access
+-------------------------
+Most GPIO controllers can be accessed with memory read/write instructions.
+Those don't need to sleep, and can safely be done from inside hard
+(nonthreaded) IRQ handlers and similar contexts.
+
+Use the following calls to access such GPIOs,
+for which gpio_cansleep() will always return false (see below):
+
+ /* GPIO INPUT: return zero or nonzero */
+ int gpio_get_value(unsigned gpio);
+
+ /* GPIO OUTPUT */
+ void gpio_set_value(unsigned gpio, int value);
+
+The values are boolean, zero for low, nonzero for high. When reading the
+value of an output pin, the value returned should be what's seen on the
+pin ... that won't always match the specified output value, because of
+issues including open-drain signaling and output latencies.
+
+The get/set calls have no error returns because "invalid GPIO" should have
+been reported earlier from gpio_direction_*(). However, note that not all
+platforms can read the value of output pins; those that can't should always
+return zero. Also, using these calls for GPIOs that can't safely be accessed
+without sleeping (see below) is an error.
+
+Platform-specific implementations are encouraged to optimize the two
+calls to access the GPIO value in cases where the GPIO number (and for
+output, value) are constant. It's normal for them to need only a couple
+of instructions in such cases (reading or writing a hardware register),
+and not to need spinlocks. Such optimized calls can make bitbanging
+applications a lot more efficient (in both space and time) than spending
+dozens of instructions on subroutine calls.
+
+
+GPIO access that may sleep
+--------------------------
+Some GPIO controllers must be accessed using message based busses like I2C
+or SPI. Commands to read or write those GPIO values require waiting to
+get to the head of a queue to transmit a command and get its response.
+This requires sleeping, which can't be done from inside IRQ handlers.
+
+Platforms that support this type of GPIO distinguish them from other GPIOs
+by returning nonzero from this call (which requires a valid GPIO number,
+which should have been previously allocated with gpio_request):
+
+ int gpio_cansleep(unsigned gpio);
+
+To access such GPIOs, a different set of accessors is defined:
+
+ /* GPIO INPUT: return zero or nonzero, might sleep */
+ int gpio_get_value_cansleep(unsigned gpio);
+
+ /* GPIO OUTPUT, might sleep */
+ void gpio_set_value_cansleep(unsigned gpio, int value);
+
+
+Accessing such GPIOs requires a context which may sleep, for example
+a threaded IRQ handler, and those accessors must be used instead of
+spinlock-safe accessors without the cansleep() name suffix.
+
+Other than the fact that these accessors might sleep, and will work
+on GPIOs that can't be accessed from hardIRQ handlers, these calls act
+the same as the spinlock-safe calls.
+
+ ** IN ADDITION ** calls to setup and configure such GPIOs must be made
+from contexts which may sleep, since they may need to access the GPIO
+controller chip too: (These setup calls are usually made from board
+setup or driver probe/teardown code, so this is an easy constraint.)
+
+ gpio_direction_input()
+ gpio_direction_output()
+ gpio_request()
+
+## gpio_request_one()
+## gpio_request_array()
+## gpio_free_array()
+
+ gpio_free()
+ gpio_set_debounce()
+
+
+
+Claiming and Releasing GPIOs
+----------------------------
+To help catch system configuration errors, two calls are defined.
+
+ /* request GPIO, returning 0 or negative errno.
+ * non-null labels may be useful for diagnostics.
+ */
+ int gpio_request(unsigned gpio, const char *label);
+
+ /* release previously-claimed GPIO */
+ void gpio_free(unsigned gpio);
+
+Passing invalid GPIO numbers to gpio_request() will fail, as will requesting
+GPIOs that have already been claimed with that call. The return value of
+gpio_request() must be checked. You should normally issue these calls from
+a task context. However, for spinlock-safe GPIOs it's OK to request GPIOs
+before tasking is enabled, as part of early board setup.
+
+These calls serve two basic purposes. One is marking the signals which
+are actually in use as GPIOs, for better diagnostics; systems may have
+several hundred potential GPIOs, but often only a dozen are used on any
+given board. Another is to catch conflicts, identifying errors when
+(a) two or more drivers wrongly think they have exclusive use of that
+signal, or (b) something wrongly believes it's safe to remove drivers
+needed to manage a signal that's in active use. That is, requesting a
+GPIO can serve as a kind of lock.
+
+Some platforms may also use knowledge about what GPIOs are active for
+power management, such as by powering down unused chip sectors and, more
+easily, gating off unused clocks.
+
+For GPIOs that use pins known to the pinctrl subsystem, that subsystem should
+be informed of their use; a gpiolib driver's .request() operation may call
+pinctrl_request_gpio(), and a gpiolib driver's .free() operation may call
+pinctrl_free_gpio(). The pinctrl subsystem allows a pinctrl_request_gpio()
+to succeed concurrently with a pin or pingroup being "owned" by a device for
+pin multiplexing.
+
+Any programming of pin multiplexing hardware that is needed to route the
+GPIO signal to the appropriate pin should occur within a GPIO driver's
+.direction_input() or .direction_output() operations, and occur after any
+setup of an output GPIO's value. This allows a glitch-free migration from a
+pin's special function to GPIO. This is sometimes required when using a GPIO
+to implement a workaround on signals typically driven by a non-GPIO HW block.
+
+Some platforms allow some or all GPIO signals to be routed to different pins.
+Similarly, other aspects of the GPIO or pin may need to be configured, such as
+pullup/pulldown. Platform software should arrange that any such details are
+configured prior to gpio_request() being called for those GPIOs, e.g. using
+the pinctrl subsystem's mapping table, so that GPIO users need not be aware
+of these details.
+
+Also note that it's your responsibility to have stopped using a GPIO
+before you free it.
+
+Considering in most cases GPIOs are actually configured right after they
+are claimed, three additional calls are defined:
+
+ /* request a single GPIO, with initial configuration specified by
+ * 'flags', identical to gpio_request() wrt other arguments and
+ * return value
+ */
+ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
+
+ /* request multiple GPIOs in a single call
+ */
+ int gpio_request_array(struct gpio *array, size_t num);
+
+ /* release multiple GPIOs in a single call
+ */
+ void gpio_free_array(struct gpio *array, size_t num);
+
+where 'flags' is currently defined to specify the following properties:
+
+ * GPIOF_DIR_IN - to configure direction as input
+ * GPIOF_DIR_OUT - to configure direction as output
+
+ * GPIOF_INIT_LOW - as output, set initial level to LOW
+ * GPIOF_INIT_HIGH - as output, set initial level to HIGH
+ * GPIOF_OPEN_DRAIN - gpio pin is open drain type.
+ * GPIOF_OPEN_SOURCE - gpio pin is open source type.
+
+ * GPIOF_EXPORT_DIR_FIXED - export gpio to sysfs, keep direction
+ * GPIOF_EXPORT_DIR_CHANGEABLE - also export, allow changing direction
+
+since GPIOF_INIT_* are only valid when configured as output, so group valid
+combinations as:
+
+ * GPIOF_IN - configure as input
+ * GPIOF_OUT_INIT_LOW - configured as output, initial level LOW
+ * GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH
+
+When setting the flag as GPIOF_OPEN_DRAIN then it will assume that pins is
+open drain type. Such pins will not be driven to 1 in output mode. It is
+require to connect pull-up on such pins. By enabling this flag, gpio lib will
+make the direction to input when it is asked to set value of 1 in output mode
+to make the pin HIGH. The pin is make to LOW by driving value 0 in output mode.
+
+When setting the flag as GPIOF_OPEN_SOURCE then it will assume that pins is
+open source type. Such pins will not be driven to 0 in output mode. It is
+require to connect pull-down on such pin. By enabling this flag, gpio lib will
+make the direction to input when it is asked to set value of 0 in output mode
+to make the pin LOW. The pin is make to HIGH by driving value 1 in output mode.
+
+In the future, these flags can be extended to support more properties.
+
+Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is
+introduced to encapsulate all three fields as:
+
+ struct gpio {
+ unsigned gpio;
+ unsigned long flags;
+ const char *label;
+ };
+
+A typical example of usage:
+
+ static struct gpio leds_gpios[] = {
+ { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */
+ { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* default to OFF */
+ { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* default to OFF */
+ { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* default to OFF */
+ { ... },
+ };
+
+ err = gpio_request_one(31, GPIOF_IN, "Reset Button");
+ if (err)
+ ...
+
+ err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+ if (err)
+ ...
+
+ gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+
+
+GPIOs mapped to IRQs
+--------------------
+GPIO numbers are unsigned integers; so are IRQ numbers. These make up
+two logically distinct namespaces (GPIO 0 need not use IRQ 0). You can
+map between them using calls like:
+
+ /* map GPIO numbers to IRQ numbers */
+ int gpio_to_irq(unsigned gpio);
+
+ /* map IRQ numbers to GPIO numbers (avoid using this) */
+ int irq_to_gpio(unsigned irq);
+
+Those return either the corresponding number in the other namespace, or
+else a negative errno code if the mapping can't be done. (For example,
+some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO
+number that wasn't set up as an input using gpio_direction_input(), or
+to use an IRQ number that didn't originally come from gpio_to_irq().
+
+These two mapping calls are expected to cost on the order of a single
+addition or subtraction. They're not allowed to sleep.
+
+Non-error values returned from gpio_to_irq() can be passed to request_irq()
+or free_irq(). They will often be stored into IRQ resources for platform
+devices, by the board-specific initialization code. Note that IRQ trigger
+options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are
+system wakeup capabilities.
+
+Non-error values returned from irq_to_gpio() would most commonly be used
+with gpio_get_value(), for example to initialize or update driver state
+when the IRQ is edge-triggered. Note that some platforms don't support
+this reverse mapping, so you should avoid using it.
+
+
+Emulating Open Drain Signals
+----------------------------
+Sometimes shared signals need to use "open drain" signaling, where only the
+low signal level is actually driven. (That term applies to CMOS transistors;
+"open collector" is used for TTL.) A pullup resistor causes the high signal
+level. This is sometimes called a "wire-AND"; or more practically, from the
+negative logic (low=true) perspective this is a "wire-OR".
+
+One common example of an open drain signal is a shared active-low IRQ line.
+Also, bidirectional data bus signals sometimes use open drain signals.
+
+Some GPIO controllers directly support open drain outputs; many don't. When
+you need open drain signaling but your hardware doesn't directly support it,
+there's a common idiom you can use to emulate it with any GPIO pin that can
+be used as either an input or an output:
+
+ LOW: gpio_direction_output(gpio, 0) ... this drives the signal
+ and overrides the pullup.
+
+ HIGH: gpio_direction_input(gpio) ... this turns off the output,
+ so the pullup (or some other device) controls the signal.
+
+If you are "driving" the signal high but gpio_get_value(gpio) reports a low
+value (after the appropriate rise time passes), you know some other component
+is driving the shared signal low. That's not necessarily an error. As one
+common example, that's how I2C clocks are stretched: a slave that needs a
+slower clock delays the rising edge of SCK, and the I2C master adjusts its
+signaling rate accordingly.
+
+
+GPIO controllers and the pinctrl subsystem
+------------------------------------------
+
+A GPIO controller on a SOC might be tightly coupled with the pinctrl
+subsystem, in the sense that the pins can be used by other functions
+together with an optional gpio feature. We have already covered the
+case where e.g. a GPIO controller need to reserve a pin or set the
+direction of a pin by calling any of:
+
+pinctrl_request_gpio()
+pinctrl_free_gpio()
+pinctrl_gpio_direction_input()
+pinctrl_gpio_direction_output()
+
+But how does the pin control subsystem cross-correlate the GPIO
+numbers (which are a global business) to a certain pin on a certain
+pin controller?
+
+This is done by registering "ranges" of pins, which are essentially
+cross-reference tables. These are described in
+Documentation/pinctrl.txt
+
+While the pin allocation is totally managed by the pinctrl subsystem,
+gpio (under gpiolib) is still maintained by gpio drivers. It may happen
+that different pin ranges in a SoC is managed by different gpio drivers.
+
+This makes it logical to let gpio drivers announce their pin ranges to
+the pin ctrl subsystem before it will call 'pinctrl_request_gpio' in order
+to request the corresponding pin to be prepared by the pinctrl subsystem
+before any gpio usage.
+
+For this, the gpio controller can register its pin range with pinctrl
+subsystem. There are two ways of doing it currently: with or without DT.
+
+For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt.
+
+For non-DT support, user can call gpiochip_add_pin_range() with appropriate
+parameters to register a range of gpio pins with a pinctrl driver. For this
+exact name string of pinctrl device has to be passed as one of the
+argument to this routine.
+
+
+What do these conventions omit?
+===============================
+One of the biggest things these conventions omit is pin multiplexing, since
+this is highly chip-specific and nonportable. One platform might not need
+explicit multiplexing; another might have just two options for use of any
+given pin; another might have eight options per pin; another might be able
+to route a given GPIO to any one of several pins. (Yes, those examples all
+come from systems that run Linux today.)
+
+Related to multiplexing is configuration and enabling of the pullups or
+pulldowns integrated on some platforms. Not all platforms support them,
+or support them in the same way; and any given board might use external
+pullups (or pulldowns) so that the on-chip ones should not be used.
+(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.)
+Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a
+platform-specific issue, as are models like (not) having a one-to-one
+correspondence between configurable pins and GPIOs.
+
+There are other system-specific mechanisms that are not specified here,
+like the aforementioned options for input de-glitching and wire-OR output.
+Hardware may support reading or writing GPIOs in gangs, but that's usually
+configuration dependent: for GPIOs sharing the same bank. (GPIOs are
+commonly grouped in banks of 16 or 32, with a given SOC having several such
+banks.) Some systems can trigger IRQs from output GPIOs, or read values
+from pins not managed as GPIOs. Code relying on such mechanisms will
+necessarily be nonportable.
+
+Dynamic definition of GPIOs is not currently standard; for example, as
+a side effect of configuring an add-on board with some GPIO expanders.
+
+
+GPIO implementor's framework (OPTIONAL)
+=======================================
+As noted earlier, there is an optional implementation framework making it
+easier for platforms to support different kinds of GPIO controller using
+the same programming interface. This framework is called "gpiolib".
+
+As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file
+will be found there. That will list all the controllers registered through
+this framework, and the state of the GPIOs currently in use.
+
+
+Controller Drivers: gpio_chip
+-----------------------------
+In this framework each GPIO controller is packaged as a "struct gpio_chip"
+with information common to each controller of that type:
+
+ - methods to establish GPIO direction
+ - methods used to access GPIO values
+ - flag saying whether calls to its methods may sleep
+ - optional debugfs dump method (showing extra state like pullup config)
+ - label for diagnostics
+
+There is also per-instance data, which may come from device.platform_data:
+the number of its first GPIO, and how many GPIOs it exposes.
+
+The code implementing a gpio_chip should support multiple instances of the
+controller, possibly using the driver model. That code will configure each
+gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be
+rare; use gpiochip_remove() when it is unavoidable.
+
+Most often a gpio_chip is part of an instance-specific structure with state
+not exposed by the GPIO interfaces, such as addressing, power management,
+and more. Chips such as codecs will have complex non-GPIO state.
+
+Any debugfs dump method should normally ignore signals which haven't been
+requested as GPIOs. They can use gpiochip_is_requested(), which returns
+either NULL or the label associated with that GPIO when it was requested.
+
+
+Platform Support
+----------------
+To support this framework, a platform's Kconfig will "select" either
+ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB
+and arrange that its <asm/gpio.h> includes <asm-generic/gpio.h> and defines
+three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep().
+
+It may also provide a custom value for ARCH_NR_GPIOS, so that it better
+reflects the number of GPIOs in actual use on that platform, without
+wasting static table space. (It should count both built-in/SoC GPIOs and
+also ones on GPIO expanders.
+
+ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled
+into the kernel on that architecture.
+
+ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user
+can enable it and build it into the kernel optionally.
+
+If neither of these options are selected, the platform does not support
+GPIOs through GPIO-lib and the code cannot be enabled by the user.
+
+Trivial implementations of those functions can directly use framework
+code, which always dispatches through the gpio_chip:
+
+ #define gpio_get_value __gpio_get_value
+ #define gpio_set_value __gpio_set_value
+ #define gpio_cansleep __gpio_cansleep
+
+Fancier implementations could instead define those as inline functions with
+logic optimizing access to specific SOC-based GPIOs. For example, if the
+referenced GPIO is the constant "12", getting or setting its value could
+cost as little as two or three instructions, never sleeping. When such an
+optimization is not possible those calls must delegate to the framework
+code, costing at least a few dozen instructions. For bitbanged I/O, such
+instruction savings can be significant.
+
+For SOCs, platform-specific code defines and registers gpio_chip instances
+for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to
+match chip vendor documentation, and directly match board schematics. They
+may well start at zero and go up to a platform-specific limit. Such GPIOs
+are normally integrated into platform initialization to make them always be
+available, from arch_initcall() or earlier; they can often serve as IRQs.
+
+
+Board Support
+-------------
+For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi
+function devices, FPGAs or CPLDs -- most often board-specific code handles
+registering controller devices and ensures that their drivers know what GPIO
+numbers to use with gpiochip_add(). Their numbers often start right after
+platform-specific GPIOs.
+
+For example, board setup code could create structures identifying the range
+of GPIOs that chip will expose, and passes them to each GPIO expander chip
+using platform_data. Then the chip driver's probe() routine could pass that
+data to gpiochip_add().
+
+Initialization order can be important. For example, when a device relies on
+an I2C-based GPIO, its probe() routine should only be called after that GPIO
+becomes available. That may mean the device should not be registered until
+calls for that GPIO can work. One way to address such dependencies is for
+such gpio_chip controllers to provide setup() and teardown() callbacks to
+board specific code; those board specific callbacks would register devices
+once all the necessary resources are available, and remove them later when
+the GPIO controller device becomes unavailable.
+
+
+Sysfs Interface for Userspace (OPTIONAL)
+========================================
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs. This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary. Plus, it could be
+present on production systems without debugging support.
+
+Given appropriate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory. System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection. In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about. And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+ - Control interfaces used to get userspace control over GPIOs;
+
+ - GPIOs themselves; and
+
+ - GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+ /sys/class/gpio/
+
+ "export" ... Userspace may ask the kernel to export control of
+ a GPIO to userspace by writing its number to this file.
+
+ Example: "echo 19 > export" will create a "gpio19" node
+ for GPIO #19, if that's not requested by kernel code.
+
+ "unexport" ... Reverses the effect of exporting to userspace.
+
+ Example: "echo 19 > unexport" will remove a "gpio19"
+ node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+ /sys/class/gpio/gpioN/
+
+ "direction" ... reads as either "in" or "out". This value may
+ normally be written. Writing as "out" defaults to
+ initializing the value as low. To ensure glitch free
+ operation, values "low" and "high" may be written to
+ configure the GPIO as an output with that initial value.
+
+ Note that this attribute *will not exist* if the kernel
+ doesn't support changing the direction of a GPIO, or
+ it was exported by kernel code that didn't explicitly
+ allow userspace to reconfigure this GPIO's direction.
+
+ "value" ... reads as either 0 (low) or 1 (high). If the GPIO
+ is configured as an output, this value may be written;
+ any nonzero value is treated as high.
+
+ If the pin can be configured as interrupt-generating interrupt
+ and if it has been configured to generate interrupts (see the
+ description of "edge"), you can poll(2) on that file and
+ poll(2) will return whenever the interrupt was triggered. If
+ you use poll(2), set the events POLLPRI and POLLERR. If you
+ use select(2), set the file descriptor in exceptfds. After
+ poll(2) returns, either lseek(2) to the beginning of the sysfs
+ file and read the new value or close the file and re-open it
+ to read the value.
+
+ "edge" ... reads as either "none", "rising", "falling", or
+ "both". Write these strings to select the signal edge(s)
+ that will make poll(2) on the "value" file return.
+
+ This file exists only if the pin can be configured as an
+ interrupt generating input pin.
+
+ "active_low" ... reads as either 0 (false) or 1 (true). Write
+ any nonzero value to invert the value attribute both
+ for reading and writing. Existing and subsequent
+ poll(2) support configuration via the edge attribute
+ for "rising" and "falling" edges will follow this
+ setting.
+
+GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+ /sys/class/gpio/gpiochipN/
+
+ "base" ... same as N, the first GPIO managed by this chip
+
+ "label" ... provided for diagnostics (not always unique)
+
+ "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes. However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack. In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+ /* export the GPIO to userspace */
+ int gpio_export(unsigned gpio, bool direction_may_change);
+
+ /* reverse gpio_export() */
+ void gpio_unexport();
+
+ /* create a sysfs link to an exported GPIO node */
+ int gpio_export_link(struct device *dev, const char *name,
+ unsigned gpio)
+
+ /* change the polarity of a GPIO node in sysfs */
+ int gpio_sysfs_set_active_low(unsigned gpio, int value);
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpio_export(). The driver can control whether the
+signal direction may change. This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
+
+After the GPIO has been exported, gpio_export_link() allows creating
+symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can
+use this to provide the interface under their own device in sysfs with
+a descriptive name.
+
+Drivers can use gpio_sysfs_set_active_low() to hide GPIO line polarity
+differences between boards from user space. This only affects the
+sysfs interface. Polarity change can be done both before and after
+gpio_export(), and previously enabled poll(2) support for either
+rising or falling edge will be reconfigured to follow this setting.
diff --git a/Documentation/gpio/gpio.txt b/Documentation/gpio/gpio.txt
new file mode 100644
index 000000000..cd9b356e8
--- /dev/null
+++ b/Documentation/gpio/gpio.txt
@@ -0,0 +1,119 @@
+GPIO Interfaces
+===============
+
+The documents in this directory give detailed instructions on how to access
+GPIOs in drivers, and how to write a driver for a device that provides GPIOs
+itself.
+
+Due to the history of GPIO interfaces in the kernel, there are two different
+ways to obtain and use GPIOs:
+
+ - The descriptor-based interface is the preferred way to manipulate GPIOs,
+and is described by all the files in this directory excepted gpio-legacy.txt.
+ - The legacy integer-based interface which is considered deprecated (but still
+usable for compatibility reasons) is documented in gpio-legacy.txt.
+
+The remainder of this document applies to the new descriptor-based interface.
+gpio-legacy.txt contains the same information applied to the legacy
+integer-based interface.
+
+
+What is a GPIO?
+===============
+
+A "General Purpose Input/Output" (GPIO) is a flexible software-controlled
+digital signal. They are provided from many kinds of chip, and are familiar
+to Linux developers working with embedded and custom hardware. Each GPIO
+represents a bit connected to a particular pin, or "ball" on Ball Grid Array
+(BGA) packages. Board schematics show which external hardware connects to
+which GPIOs. Drivers can be written generically, so that board setup code
+passes such pin configuration data to drivers.
+
+System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every
+non-dedicated pin can be configured as a GPIO; and most chips have at least
+several dozen of them. Programmable logic devices (like FPGAs) can easily
+provide GPIOs; multifunction chips like power managers, and audio codecs
+often have a few such pins to help with pin scarcity on SOCs; and there are
+also "GPIO Expander" chips that connect using the I2C or SPI serial buses.
+Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS
+firmware knowing how they're used).
+
+The exact capabilities of GPIOs vary between systems. Common options:
+
+ - Output values are writable (high=1, low=0). Some chips also have
+ options about how that value is driven, so that for example only one
+ value might be driven, supporting "wire-OR" and similar schemes for the
+ other value (notably, "open drain" signaling).
+
+ - Input values are likewise readable (1, 0). Some chips support readback
+ of pins configured as "output", which is very useful in such "wire-OR"
+ cases (to support bidirectional signaling). GPIO controllers may have
+ input de-glitch/debounce logic, sometimes with software controls.
+
+ - Inputs can often be used as IRQ signals, often edge triggered but
+ sometimes level triggered. Such IRQs may be configurable as system
+ wakeup events, to wake the system from a low power state.
+
+ - Usually a GPIO will be configurable as either input or output, as needed
+ by different product boards; single direction ones exist too.
+
+ - Most GPIOs can be accessed while holding spinlocks, but those accessed
+ through a serial bus normally can't. Some systems support both types.
+
+On a given board each GPIO is used for one specific purpose like monitoring
+MMC/SD card insertion/removal, detecting card write-protect status, driving
+a LED, configuring a transceiver, bit-banging a serial bus, poking a hardware
+watchdog, sensing a switch, and so on.
+
+
+Common GPIO Properties
+======================
+
+These properties are met through all the other documents of the GPIO interface
+and it is useful to understand them, especially if you need to define GPIO
+mappings.
+
+Active-High and Active-Low
+--------------------------
+It is natural to assume that a GPIO is "active" when its output signal is 1
+("high"), and inactive when it is 0 ("low"). However in practice the signal of a
+GPIO may be inverted before is reaches its destination, or a device could decide
+to have different conventions about what "active" means. Such decisions should
+be transparent to device drivers, therefore it is possible to define a GPIO as
+being either active-high ("1" means "active", the default) or active-low ("0"
+means "active") so that drivers only need to worry about the logical signal and
+not about what happens at the line level.
+
+Open Drain and Open Source
+--------------------------
+Sometimes shared signals need to use "open drain" (where only the low signal
+level is actually driven), or "open source" (where only the high signal level is
+driven) signaling. That term applies to CMOS transistors; "open collector" is
+used for TTL. A pullup or pulldown resistor causes the high or low signal level.
+This is sometimes called a "wire-AND"; or more practically, from the negative
+logic (low=true) perspective this is a "wire-OR".
+
+One common example of an open drain signal is a shared active-low IRQ line.
+Also, bidirectional data bus signals sometimes use open drain signals.
+
+Some GPIO controllers directly support open drain and open source outputs; many
+don't. When you need open drain signaling but your hardware doesn't directly
+support it, there's a common idiom you can use to emulate it with any GPIO pin
+that can be used as either an input or an output:
+
+ LOW: gpiod_direction_output(gpio, 0) ... this drives the signal and overrides
+ the pullup.
+
+ HIGH: gpiod_direction_input(gpio) ... this turns off the output, so the pullup
+ (or some other device) controls the signal.
+
+The same logic can be applied to emulate open source signaling, by driving the
+high signal and configuring the GPIO as input for low. This open drain/open
+source emulation can be handled transparently by the GPIO framework.
+
+If you are "driving" the signal high but gpiod_get_value(gpio) reports a low
+value (after the appropriate rise time passes), you know some other component is
+driving the shared signal low. That's not necessarily an error. As one common
+example, that's how I2C clocks are stretched: a slave that needs a slower clock
+delays the rising edge of SCK, and the I2C master adjusts its signaling rate
+accordingly.
diff --git a/Documentation/gpio/sysfs.txt b/Documentation/gpio/sysfs.txt
new file mode 100644
index 000000000..c2c3a97f8
--- /dev/null
+++ b/Documentation/gpio/sysfs.txt
@@ -0,0 +1,155 @@
+GPIO Sysfs Interface for Userspace
+==================================
+
+Platforms which use the "gpiolib" implementors framework may choose to
+configure a sysfs user interface to GPIOs. This is different from the
+debugfs interface, since it provides control over GPIO direction and
+value instead of just showing a gpio state summary. Plus, it could be
+present on production systems without debugging support.
+
+Given appropriate hardware documentation for the system, userspace could
+know for example that GPIO #23 controls the write protect line used to
+protect boot loader segments in flash memory. System upgrade procedures
+may need to temporarily remove that protection, first importing a GPIO,
+then changing its output state, then updating the code before re-enabling
+the write protection. In normal use, GPIO #23 would never be touched,
+and the kernel would have no need to know about it.
+
+Again depending on appropriate hardware documentation, on some systems
+userspace GPIO can be used to determine system configuration data that
+standard kernels won't know about. And for some tasks, simple userspace
+GPIO drivers could be all that the system really needs.
+
+Note that standard kernel drivers exist for common "LEDs and Buttons"
+GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those
+instead of talking directly to the GPIOs; they integrate with kernel
+frameworks better than your userspace code could.
+
+
+Paths in Sysfs
+--------------
+There are three kinds of entry in /sys/class/gpio:
+
+ - Control interfaces used to get userspace control over GPIOs;
+
+ - GPIOs themselves; and
+
+ - GPIO controllers ("gpio_chip" instances).
+
+That's in addition to standard files including the "device" symlink.
+
+The control interfaces are write-only:
+
+ /sys/class/gpio/
+
+ "export" ... Userspace may ask the kernel to export control of
+ a GPIO to userspace by writing its number to this file.
+
+ Example: "echo 19 > export" will create a "gpio19" node
+ for GPIO #19, if that's not requested by kernel code.
+
+ "unexport" ... Reverses the effect of exporting to userspace.
+
+ Example: "echo 19 > unexport" will remove a "gpio19"
+ node exported using the "export" file.
+
+GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42)
+and have the following read/write attributes:
+
+ /sys/class/gpio/gpioN/
+
+ "direction" ... reads as either "in" or "out". This value may
+ normally be written. Writing as "out" defaults to
+ initializing the value as low. To ensure glitch free
+ operation, values "low" and "high" may be written to
+ configure the GPIO as an output with that initial value.
+
+ Note that this attribute *will not exist* if the kernel
+ doesn't support changing the direction of a GPIO, or
+ it was exported by kernel code that didn't explicitly
+ allow userspace to reconfigure this GPIO's direction.
+
+ "value" ... reads as either 0 (low) or 1 (high). If the GPIO
+ is configured as an output, this value may be written;
+ any nonzero value is treated as high.
+
+ If the pin can be configured as interrupt-generating interrupt
+ and if it has been configured to generate interrupts (see the
+ description of "edge"), you can poll(2) on that file and
+ poll(2) will return whenever the interrupt was triggered. If
+ you use poll(2), set the events POLLPRI and POLLERR. If you
+ use select(2), set the file descriptor in exceptfds. After
+ poll(2) returns, either lseek(2) to the beginning of the sysfs
+ file and read the new value or close the file and re-open it
+ to read the value.
+
+ "edge" ... reads as either "none", "rising", "falling", or
+ "both". Write these strings to select the signal edge(s)
+ that will make poll(2) on the "value" file return.
+
+ This file exists only if the pin can be configured as an
+ interrupt generating input pin.
+
+ "active_low" ... reads as either 0 (false) or 1 (true). Write
+ any nonzero value to invert the value attribute both
+ for reading and writing. Existing and subsequent
+ poll(2) support configuration via the edge attribute
+ for "rising" and "falling" edges will follow this
+ setting.
+
+GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the
+controller implementing GPIOs starting at #42) and have the following
+read-only attributes:
+
+ /sys/class/gpio/gpiochipN/
+
+ "base" ... same as N, the first GPIO managed by this chip
+
+ "label" ... provided for diagnostics (not always unique)
+
+ "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1)
+
+Board documentation should in most cases cover what GPIOs are used for
+what purposes. However, those numbers are not always stable; GPIOs on
+a daughtercard might be different depending on the base board being used,
+or other cards in the stack. In such cases, you may need to use the
+gpiochip nodes (possibly in conjunction with schematics) to determine
+the correct GPIO number to use for a given signal.
+
+
+Exporting from Kernel code
+--------------------------
+Kernel code can explicitly manage exports of GPIOs which have already been
+requested using gpio_request():
+
+ /* export the GPIO to userspace */
+ int gpiod_export(struct gpio_desc *desc, bool direction_may_change);
+
+ /* reverse gpio_export() */
+ void gpiod_unexport(struct gpio_desc *desc);
+
+ /* create a sysfs link to an exported GPIO node */
+ int gpiod_export_link(struct device *dev, const char *name,
+ struct gpio_desc *desc);
+
+ /* change the polarity of a GPIO node in sysfs */
+ int gpiod_sysfs_set_active_low(struct gpio_desc *desc, int value);
+
+After a kernel driver requests a GPIO, it may only be made available in
+the sysfs interface by gpiod_export(). The driver can control whether the
+signal direction may change. This helps drivers prevent userspace code
+from accidentally clobbering important system state.
+
+This explicit exporting can help with debugging (by making some kinds
+of experiments easier), or can provide an always-there interface that's
+suitable for documenting as part of a board support package.
+
+After the GPIO has been exported, gpiod_export_link() allows creating
+symlinks from elsewhere in sysfs to the GPIO sysfs node. Drivers can
+use this to provide the interface under their own device in sysfs with
+a descriptive name.
+
+Drivers can use gpiod_sysfs_set_active_low() to hide GPIO line polarity
+differences between boards from user space. Polarity change can be done both
+before and after gpiod_export(), and previously enabled poll(2) support for
+either rising or falling edge will be reconfigured to follow this setting.
diff --git a/Documentation/hid/hid-sensor.txt b/Documentation/hid/hid-sensor.txt
new file mode 100644
index 000000000..b287752a3
--- /dev/null
+++ b/Documentation/hid/hid-sensor.txt
@@ -0,0 +1,224 @@
+
+HID Sensors Framework
+======================
+HID sensor framework provides necessary interfaces to implement sensor drivers,
+which are connected to a sensor hub. The sensor hub is a HID device and it provides
+a report descriptor conforming to HID 1.12 sensor usage tables.
+
+Description from the HID 1.12 "HID Sensor Usages" specification:
+"Standardization of HID usages for sensors would allow (but not require) sensor
+hardware vendors to provide a consistent Plug And Play interface at the USB boundary,
+thereby enabling some operating systems to incorporate common device drivers that
+could be reused between vendors, alleviating any need for the vendors to provide
+the drivers themselves."
+
+This specification describes many usage IDs, which describe the type of sensor
+and also the individual data fields. Each sensor can have variable number of
+data fields. The length and order is specified in the report descriptor. For
+example a part of report descriptor can look like:
+
+ INPUT(1)[INPUT]
+ ..
+ Field(2)
+ Physical(0020.0073)
+ Usage(1)
+ 0020.045f
+ Logical Minimum(-32767)
+ Logical Maximum(32767)
+ Report Size(8)
+ Report Count(1)
+ Report Offset(16)
+ Flags(Variable Absolute)
+..
+..
+
+The report is indicating "sensor page (0x20)" contains an accelerometer-3D (0x73).
+This accelerometer-3D has some fields. Here for example field 2 is motion intensity
+(0x045f) with a logical minimum value of -32767 and logical maximum of 32767. The
+order of fields and length of each field is important as the input event raw
+data will use this format.
+
+
+Implementation
+=================
+
+This specification defines many different types of sensors with different sets of
+data fields. It is difficult to have a common input event to user space applications,
+for different sensors. For example an accelerometer can send X,Y and Z data, whereas
+an ambient light sensor can send illumination data.
+So the implementation has two parts:
+- Core hid driver
+- Individual sensor processing part (sensor drivers)
+
+Core driver
+-----------
+The core driver registers (hid-sensor-hub) registers as a HID driver. It parses
+report descriptors and identifies all the sensors present. It adds an MFD device
+with name HID-SENSOR-xxxx (where xxxx is usage id from the specification).
+For example
+HID-SENSOR-200073 is registered for an Accelerometer 3D driver.
+So if any driver with this name is inserted, then the probe routine for that
+function will be called. So an accelerometer processing driver can register
+with this name and will be probed if there is an accelerometer-3D detected.
+
+The core driver provides a set of APIs which can be used by the processing
+drivers to register and get events for that usage id. Also it provides parsing
+functions, which get and set each input/feature/output report.
+
+Individual sensor processing part (sensor drivers)
+-----------
+The processing driver will use an interface provided by the core driver to parse
+the report and get the indexes of the fields and also can get events. This driver
+can use IIO interface to use the standard ABI defined for a type of sensor.
+
+
+Core driver Interface
+=====================
+
+Callback structure:
+Each processing driver can use this structure to set some callbacks.
+ int (*suspend)(..): Callback when HID suspend is received
+ int (*resume)(..): Callback when HID resume is received
+ int (*capture_sample)(..): Capture a sample for one of its data fields
+ int (*send_event)(..): One complete event is received which can have
+ multiple data fields.
+
+Registration functions:
+int sensor_hub_register_callback(struct hid_sensor_hub_device *hsdev,
+ u32 usage_id,
+ struct hid_sensor_hub_callbacks *usage_callback):
+
+Registers callbacks for an usage id. The callback functions are not allowed
+to sleep.
+
+
+int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev,
+ u32 usage_id):
+
+Removes callbacks for an usage id.
+
+
+Parsing function:
+int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
+ u8 type,
+ u32 usage_id, u32 attr_usage_id,
+ struct hid_sensor_hub_attribute_info *info);
+
+A processing driver can look for some field of interest and check if it exists
+in a report descriptor. If it exists it will store necessary information
+so that fields can be set or get individually.
+These indexes avoid searching every time and getting field index to get or set.
+
+
+Set Feature report
+int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
+ u32 field_index, s32 value);
+
+This interface is used to set a value for a field in feature report. For example
+if there is a field report_interval, which is parsed by a call to
+sensor_hub_input_get_attribute_info before, then it can directly set that individual
+field.
+
+
+int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
+ u32 field_index, s32 *value);
+
+This interface is used to get a value for a field in input report. For example
+if there is a field report_interval, which is parsed by a call to
+sensor_hub_input_get_attribute_info before, then it can directly get that individual
+field value.
+
+
+int sensor_hub_input_attr_get_raw_value(struct hid_sensor_hub_device *hsdev,
+ u32 usage_id,
+ u32 attr_usage_id, u32 report_id);
+
+This is used to get a particular field value through input reports. For example
+accelerometer wants to poll X axis value, then it can call this function with
+the usage id of X axis. HID sensors can provide events, so this is not necessary
+to poll for any field. If there is some new sample, the core driver will call
+registered callback function to process the sample.
+
+
+----------
+
+HID Custom and generic Sensors
+
+HID Sensor specification defines two special sensor usage types. Since they
+don't represent a standard sensor, it is not possible to define using Linux IIO
+type interfaces.
+The purpose of these sensors is to extend the functionality or provide a
+way to obfuscate the data being communicated by a sensor. Without knowing the
+mapping between the data and its encapsulated form, it is difficult for
+an application/driver to determine what data is being communicated by the sensor.
+This allows some differentiating use cases, where vendor can provide applications.
+Some common use cases are debug other sensors or to provide some events like
+keyboard attached/detached or lid open/close.
+
+To allow application to utilize these sensors, here they are exported uses sysfs
+attribute groups, attributes and misc device interface.
+
+An example of this representation on sysfs:
+/sys/devices/pci0000:00/INT33C2:00/i2c-0/i2c-INT33D1:00/0018:8086:09FA.0001/HID-SENSOR-2000e1.6.auto$ tree -R
+.
+????????? enable_sensor
+????????? feature-0-200316
+??????? ????????? feature-0-200316-maximum
+??????? ????????? feature-0-200316-minimum
+??????? ????????? feature-0-200316-name
+??????? ????????? feature-0-200316-size
+??????? ????????? feature-0-200316-unit-expo
+??????? ????????? feature-0-200316-units
+??????? ????????? feature-0-200316-value
+????????? feature-1-200201
+??????? ????????? feature-1-200201-maximum
+??????? ????????? feature-1-200201-minimum
+??????? ????????? feature-1-200201-name
+??????? ????????? feature-1-200201-size
+??????? ????????? feature-1-200201-unit-expo
+??????? ????????? feature-1-200201-units
+??????? ????????? feature-1-200201-value
+????????? input-0-200201
+??????? ????????? input-0-200201-maximum
+??????? ????????? input-0-200201-minimum
+??????? ????????? input-0-200201-name
+??????? ????????? input-0-200201-size
+??????? ????????? input-0-200201-unit-expo
+??????? ????????? input-0-200201-units
+??????? ????????? input-0-200201-value
+????????? input-1-200202
+??????? ????????? input-1-200202-maximum
+??????? ????????? input-1-200202-minimum
+??????? ????????? input-1-200202-name
+??????? ????????? input-1-200202-size
+??????? ????????? input-1-200202-unit-expo
+??????? ????????? input-1-200202-units
+??????? ????????? input-1-200202-value
+
+Here there is a custom sensors with four fields, two feature and two inputs.
+Each field is represented by a set of attributes. All fields except the "value"
+are read only. The value field is a RW field.
+Example
+/sys/bus/platform/devices/HID-SENSOR-2000e1.6.auto/feature-0-200316$ grep -r . *
+feature-0-200316-maximum:6
+feature-0-200316-minimum:0
+feature-0-200316-name:property-reporting-state
+feature-0-200316-size:1
+feature-0-200316-unit-expo:0
+feature-0-200316-units:25
+feature-0-200316-value:1
+
+How to enable such sensor?
+By default sensor can be power gated. To enable sysfs attribute "enable" can be
+used.
+$ echo 1 > enable_sensor
+
+Once enabled and powered on, sensor can report value using HID reports.
+These reports are pushed using misc device interface in a FIFO order.
+/dev$ tree | grep HID-SENSOR-2000e1.6.auto
+??????? ????????? 10:53 -> ../HID-SENSOR-2000e1.6.auto
+????????? HID-SENSOR-2000e1.6.auto
+
+Each reports can be of variable length preceded by a header. This header
+consist of a 32 bit usage id, 64 bit time stamp and 32 bit length field of raw
+data.
diff --git a/Documentation/hid/hid-transport.txt b/Documentation/hid/hid-transport.txt
new file mode 100644
index 000000000..3dcba9fd4
--- /dev/null
+++ b/Documentation/hid/hid-transport.txt
@@ -0,0 +1,317 @@
+ HID I/O Transport Drivers
+ ===========================
+
+The HID subsystem is independent of the underlying transport driver. Initially,
+only USB was supported, but other specifications adopted the HID design and
+provided new transport drivers. The kernel includes at least support for USB,
+Bluetooth, I2C and user-space I/O drivers.
+
+1) HID Bus
+==========
+
+The HID subsystem is designed as a bus. Any I/O subsystem may provide HID
+devices and register them with the HID bus. HID core then loads generic device
+drivers on top of it. The transport drivers are responsible of raw data
+transport and device setup/management. HID core is responsible of
+report-parsing, report interpretation and the user-space API. Device specifics
+and quirks are handled by all layers depending on the quirk.
+
+ +-----------+ +-----------+ +-----------+ +-----------+
+ | Device #1 | | Device #i | | Device #j | | Device #k |
+ +-----------+ +-----------+ +-----------+ +-----------+
+ \\ // \\ //
+ +------------+ +------------+
+ | I/O Driver | | I/O Driver |
+ +------------+ +------------+
+ || ||
+ +------------------+ +------------------+
+ | Transport Driver | | Transport Driver |
+ +------------------+ +------------------+
+ \___ ___/
+ \ /
+ +----------------+
+ | HID Core |
+ +----------------+
+ / | | \
+ / | | \
+ ____________/ | | \_________________
+ / | | \
+ / | | \
+ +----------------+ +-----------+ +------------------+ +------------------+
+ | Generic Driver | | MT Driver | | Custom Driver #1 | | Custom Driver #2 |
+ +----------------+ +-----------+ +------------------+ +------------------+
+
+Example Drivers:
+ I/O: USB, I2C, Bluetooth-l2cap
+ Transport: USB-HID, I2C-HID, BT-HIDP
+
+Everything below "HID Core" is simplified in this graph as it is only of
+interest to HID device drivers. Transport drivers do not need to know the
+specifics.
+
+1.1) Device Setup
+-----------------
+
+I/O drivers normally provide hotplug detection or device enumeration APIs to the
+transport drivers. Transport drivers use this to find any suitable HID device.
+They allocate HID device objects and register them with HID core. Transport
+drivers are not required to register themselves with HID core. HID core is never
+aware of which transport drivers are available and is not interested in it. It
+is only interested in devices.
+
+Transport drivers attach a constant "struct hid_ll_driver" object with each
+device. Once a device is registered with HID core, the callbacks provided via
+this struct are used by HID core to communicate with the device.
+
+Transport drivers are responsible of detecting device failures and unplugging.
+HID core will operate a device as long as it is registered regardless of any
+device failures. Once transport drivers detect unplug or failure events, they
+must unregister the device from HID core and HID core will stop using the
+provided callbacks.
+
+1.2) Transport Driver Requirements
+----------------------------------
+
+The terms "asynchronous" and "synchronous" in this document describe the
+transmission behavior regarding acknowledgements. An asynchronous channel must
+not perform any synchronous operations like waiting for acknowledgements or
+verifications. Generally, HID calls operating on asynchronous channels must be
+running in atomic-context just fine.
+On the other hand, synchronous channels can be implemented by the transport
+driver in whatever way they like. They might just be the same as asynchronous
+channels, but they can also provide acknowledgement reports, automatic
+retransmission on failure, etc. in a blocking manner. If such functionality is
+required on asynchronous channels, a transport-driver must implement that via
+its own worker threads.
+
+HID core requires transport drivers to follow a given design. A Transport
+driver must provide two bi-directional I/O channels to each HID device. These
+channels must not necessarily be bi-directional in the hardware itself. A
+transport driver might just provide 4 uni-directional channels. Or it might
+multiplex all four on a single physical channel. However, in this document we
+will describe them as two bi-directional channels as they have several
+properties in common.
+
+ - Interrupt Channel (intr): The intr channel is used for asynchronous data
+ reports. No management commands or data acknowledgements are sent on this
+ channel. Any unrequested incoming or outgoing data report must be sent on
+ this channel and is never acknowledged by the remote side. Devices usually
+ send their input events on this channel. Outgoing events are normally
+ not send via intr, except if high throughput is required.
+ - Control Channel (ctrl): The ctrl channel is used for synchronous requests and
+ device management. Unrequested data input events must not be sent on this
+ channel and are normally ignored. Instead, devices only send management
+ events or answers to host requests on this channel.
+ The control-channel is used for direct blocking queries to the device
+ independent of any events on the intr-channel.
+ Outgoing reports are usually sent on the ctrl channel via synchronous
+ SET_REPORT requests.
+
+Communication between devices and HID core is mostly done via HID reports. A
+report can be of one of three types:
+
+ - INPUT Report: Input reports provide data from device to host. This
+ data may include button events, axis events, battery status or more. This
+ data is generated by the device and sent to the host with or without
+ requiring explicit requests. Devices can choose to send data continuously or
+ only on change.
+ - OUTPUT Report: Output reports change device states. They are sent from host
+ to device and may include LED requests, rumble requests or more. Output
+ reports are never sent from device to host, but a host can retrieve their
+ current state.
+ Hosts may choose to send output reports either continuously or only on
+ change.
+ - FEATURE Report: Feature reports are used for specific static device features
+ and never reported spontaneously. A host can read and/or write them to access
+ data like battery-state or device-settings.
+ Feature reports are never sent without requests. A host must explicitly set
+ or retrieve a feature report. This also means, feature reports are never sent
+ on the intr channel as this channel is asynchronous.
+
+INPUT and OUTPUT reports can be sent as pure data reports on the intr channel.
+For INPUT reports this is the usual operational mode. But for OUTPUT reports,
+this is rarely done as OUTPUT reports are normally quite scarce. But devices are
+free to make excessive use of asynchronous OUTPUT reports (for instance, custom
+HID audio speakers make great use of it).
+
+Plain reports must not be sent on the ctrl channel, though. Instead, the ctrl
+channel provides synchronous GET/SET_REPORT requests. Plain reports are only
+allowed on the intr channel and are the only means of data there.
+
+ - GET_REPORT: A GET_REPORT request has a report ID as payload and is sent
+ from host to device. The device must answer with a data report for the
+ requested report ID on the ctrl channel as a synchronous acknowledgement.
+ Only one GET_REPORT request can be pending for each device. This restriction
+ is enforced by HID core as several transport drivers don't allow multiple
+ simultaneous GET_REPORT requests.
+ Note that data reports which are sent as answer to a GET_REPORT request are
+ not handled as generic device events. That is, if a device does not operate
+ in continuous data reporting mode, an answer to GET_REPORT does not replace
+ the raw data report on the intr channel on state change.
+ GET_REPORT is only used by custom HID device drivers to query device state.
+ Normally, HID core caches any device state so this request is not necessary
+ on devices that follow the HID specs except during device initialization to
+ retrieve the current state.
+ GET_REPORT requests can be sent for any of the 3 report types and shall
+ return the current report state of the device. However, OUTPUT reports as
+ payload may be blocked by the underlying transport driver if the
+ specification does not allow them.
+ - SET_REPORT: A SET_REPORT request has a report ID plus data as payload. It is
+ sent from host to device and a device must update it's current report state
+ according to the given data. Any of the 3 report types can be used. However,
+ INPUT reports as payload might be blocked by the underlying transport driver
+ if the specification does not allow them.
+ A device must answer with a synchronous acknowledgement. However, HID core
+ does not require transport drivers to forward this acknowledgement to HID
+ core.
+ Same as for GET_REPORT, only one SET_REPORT can be pending at a time. This
+ restriction is enforced by HID core as some transport drivers do not support
+ multiple synchronous SET_REPORT requests.
+
+Other ctrl-channel requests are supported by USB-HID but are not available
+(or deprecated) in most other transport level specifications:
+
+ - GET/SET_IDLE: Only used by USB-HID and I2C-HID.
+ - GET/SET_PROTOCOL: Not used by HID core.
+ - RESET: Used by I2C-HID, not hooked up in HID core.
+ - SET_POWER: Used by I2C-HID, not hooked up in HID core.
+
+2) HID API
+==========
+
+2.1) Initialization
+-------------------
+
+Transport drivers normally use the following procedure to register a new device
+with HID core:
+
+ struct hid_device *hid;
+ int ret;
+
+ hid = hid_allocate_device();
+ if (IS_ERR(hid)) {
+ ret = PTR_ERR(hid);
+ goto err_<...>;
+ }
+
+ strlcpy(hid->name, <device-name-src>, 127);
+ strlcpy(hid->phys, <device-phys-src>, 63);
+ strlcpy(hid->uniq, <device-uniq-src>, 63);
+
+ hid->ll_driver = &custom_ll_driver;
+ hid->bus = <device-bus>;
+ hid->vendor = <device-vendor>;
+ hid->product = <device-product>;
+ hid->version = <device-version>;
+ hid->country = <device-country>;
+ hid->dev.parent = <pointer-to-parent-device>;
+ hid->driver_data = <transport-driver-data-field>;
+
+ ret = hid_add_device(hid);
+ if (ret)
+ goto err_<...>;
+
+Once hid_add_device() is entered, HID core might use the callbacks provided in
+"custom_ll_driver". Note that fields like "country" can be ignored by underlying
+transport-drivers if not supported.
+
+To unregister a device, use:
+
+ hid_destroy_device(hid);
+
+Once hid_destroy_device() returns, HID core will no longer make use of any
+driver callbacks.
+
+2.2) hid_ll_driver operations
+-----------------------------
+
+The available HID callbacks are:
+ - int (*start) (struct hid_device *hdev)
+ Called from HID device drivers once they want to use the device. Transport
+ drivers can choose to setup their device in this callback. However, normally
+ devices are already set up before transport drivers register them to HID core
+ so this is mostly only used by USB-HID.
+
+ - void (*stop) (struct hid_device *hdev)
+ Called from HID device drivers once they are done with a device. Transport
+ drivers can free any buffers and deinitialize the device. But note that
+ ->start() might be called again if another HID device driver is loaded on the
+ device.
+ Transport drivers are free to ignore it and deinitialize devices after they
+ destroyed them via hid_destroy_device().
+
+ - int (*open) (struct hid_device *hdev)
+ Called from HID device drivers once they are interested in data reports.
+ Usually, while user-space didn't open any input API/etc., device drivers are
+ not interested in device data and transport drivers can put devices asleep.
+ However, once ->open() is called, transport drivers must be ready for I/O.
+ ->open() calls are nested for each client that opens the HID device.
+
+ - void (*close) (struct hid_device *hdev)
+ Called from HID device drivers after ->open() was called but they are no
+ longer interested in device reports. (Usually if user-space closed any input
+ devices of the driver).
+ Transport drivers can put devices asleep and terminate any I/O of all
+ ->open() calls have been followed by a ->close() call. However, ->start() may
+ be called again if the device driver is interested in input reports again.
+
+ - int (*parse) (struct hid_device *hdev)
+ Called once during device setup after ->start() has been called. Transport
+ drivers must read the HID report-descriptor from the device and tell HID core
+ about it via hid_parse_report().
+
+ - int (*power) (struct hid_device *hdev, int level)
+ Called by HID core to give PM hints to transport drivers. Usually this is
+ analogical to the ->open() and ->close() hints and redundant.
+
+ - void (*request) (struct hid_device *hdev, struct hid_report *report,
+ int reqtype)
+ Send an HID request on the ctrl channel. "report" contains the report that
+ should be sent and "reqtype" the request type. Request-type can be
+ HID_REQ_SET_REPORT or HID_REQ_GET_REPORT.
+ This callback is optional. If not provided, HID core will assemble a raw
+ report following the HID specs and send it via the ->raw_request() callback.
+ The transport driver is free to implement this asynchronously.
+
+ - int (*wait) (struct hid_device *hdev)
+ Used by HID core before calling ->request() again. A transport driver can use
+ it to wait for any pending requests to complete if only one request is
+ allowed at a time.
+
+ - int (*raw_request) (struct hid_device *hdev, unsigned char reportnum,
+ __u8 *buf, size_t count, unsigned char rtype,
+ int reqtype)
+ Same as ->request() but provides the report as raw buffer. This request shall
+ be synchronous. A transport driver must not use ->wait() to complete such
+ requests. This request is mandatory and hid core will reject the device if
+ it is missing.
+
+ - int (*output_report) (struct hid_device *hdev, __u8 *buf, size_t len)
+ Send raw output report via intr channel. Used by some HID device drivers
+ which require high throughput for outgoing requests on the intr channel. This
+ must not cause SET_REPORT calls! This must be implemented as asynchronous
+ output report on the intr channel!
+
+ - int (*idle) (struct hid_device *hdev, int report, int idle, int reqtype)
+ Perform SET/GET_IDLE request. Only used by USB-HID, do not implement!
+
+2.3) Data Path
+--------------
+
+Transport drivers are responsible of reading data from I/O devices. They must
+handle any I/O-related state-tracking themselves. HID core does not implement
+protocol handshakes or other management commands which can be required by the
+given HID transport specification.
+
+Every raw data packet read from a device must be fed into HID core via
+hid_input_report(). You must specify the channel-type (intr or ctrl) and report
+type (input/output/feature). Under normal conditions, only input reports are
+provided via this API.
+
+Responses to GET_REPORT requests via ->request() must also be provided via this
+API. Responses to ->raw_request() are synchronous and must be intercepted by the
+transport driver and not passed to hid_input_report().
+Acknowledgements to SET_REPORT requests are not of interest to HID core.
+
+----------------------------------------------------
+Written 2013, David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/hid/hiddev.txt b/Documentation/hid/hiddev.txt
new file mode 100644
index 000000000..6e8c9f1d2
--- /dev/null
+++ b/Documentation/hid/hiddev.txt
@@ -0,0 +1,205 @@
+Care and feeding of your Human Interface Devices
+
+INTRODUCTION
+
+In addition to the normal input type HID devices, USB also uses the
+human interface device protocols for things that are not really human
+interfaces, but have similar sorts of communication needs. The two big
+examples for this are power devices (especially uninterruptable power
+supplies) and monitor control on higher end monitors.
+
+To support these disparate requirements, the Linux USB system provides
+HID events to two separate interfaces:
+* the input subsystem, which converts HID events into normal input
+device interfaces (such as keyboard, mouse and joystick) and a
+normalised event interface - see Documentation/input/input.txt
+* the hiddev interface, which provides fairly raw HID events
+
+The data flow for a HID event produced by a device is something like
+the following :
+
+ usb.c ---> hid-core.c ----> hid-input.c ----> [keyboard/mouse/joystick/event]
+ |
+ |
+ --> hiddev.c ----> POWER / MONITOR CONTROL
+
+In addition, other subsystems (apart from USB) can potentially feed
+events into the input subsystem, but these have no effect on the hid
+device interface.
+
+USING THE HID DEVICE INTERFACE
+
+The hiddev interface is a char interface using the normal USB major,
+with the minor numbers starting at 96 and finishing at 111. Therefore,
+you need the following commands:
+mknod /dev/usb/hiddev0 c 180 96
+mknod /dev/usb/hiddev1 c 180 97
+mknod /dev/usb/hiddev2 c 180 98
+mknod /dev/usb/hiddev3 c 180 99
+mknod /dev/usb/hiddev4 c 180 100
+mknod /dev/usb/hiddev5 c 180 101
+mknod /dev/usb/hiddev6 c 180 102
+mknod /dev/usb/hiddev7 c 180 103
+mknod /dev/usb/hiddev8 c 180 104
+mknod /dev/usb/hiddev9 c 180 105
+mknod /dev/usb/hiddev10 c 180 106
+mknod /dev/usb/hiddev11 c 180 107
+mknod /dev/usb/hiddev12 c 180 108
+mknod /dev/usb/hiddev13 c 180 109
+mknod /dev/usb/hiddev14 c 180 110
+mknod /dev/usb/hiddev15 c 180 111
+
+So you point your hiddev compliant user-space program at the correct
+interface for your device, and it all just works.
+
+Assuming that you have a hiddev compliant user-space program, of
+course. If you need to write one, read on.
+
+
+THE HIDDEV API
+This description should be read in conjunction with the HID
+specification, freely available from http://www.usb.org, and
+conveniently linked of http://www.linux-usb.org.
+
+The hiddev API uses a read() interface, and a set of ioctl() calls.
+
+HID devices exchange data with the host computer using data
+bundles called "reports". Each report is divided into "fields",
+each of which can have one or more "usages". In the hid-core,
+each one of these usages has a single signed 32 bit value.
+
+read():
+This is the event interface. When the HID device's state changes,
+it performs an interrupt transfer containing a report which contains
+the changed value. The hid-core.c module parses the report, and
+returns to hiddev.c the individual usages that have changed within
+the report. In its basic mode, the hiddev will make these individual
+usage changes available to the reader using a struct hiddev_event:
+
+ struct hiddev_event {
+ unsigned hid;
+ signed int value;
+ };
+
+containing the HID usage identifier for the status that changed, and
+the value that it was changed to. Note that the structure is defined
+within <linux/hiddev.h>, along with some other useful #defines and
+structures. The HID usage identifier is a composite of the HID usage
+page shifted to the 16 high order bits ORed with the usage code. The
+behavior of the read() function can be modified using the HIDIOCSFLAG
+ioctl() described below.
+
+
+ioctl():
+This is the control interface. There are a number of controls:
+
+HIDIOCGVERSION - int (read)
+Gets the version code out of the hiddev driver.
+
+HIDIOCAPPLICATION - (none)
+This ioctl call returns the HID application usage associated with the
+hid device. The third argument to ioctl() specifies which application
+index to get. This is useful when the device has more than one
+application collection. If the index is invalid (greater or equal to
+the number of application collections this device has) the ioctl
+returns -1. You can find out beforehand how many application
+collections the device has from the num_applications field from the
+hiddev_devinfo structure.
+
+HIDIOCGCOLLECTIONINFO - struct hiddev_collection_info (read/write)
+This returns a superset of the information above, providing not only
+application collections, but all the collections the device has. It
+also returns the level the collection lives in the hierarchy.
+The user passes in a hiddev_collection_info struct with the index
+field set to the index that should be returned. The ioctl fills in
+the other fields. If the index is larger than the last collection
+index, the ioctl returns -1 and sets errno to -EINVAL.
+
+HIDIOCGDEVINFO - struct hiddev_devinfo (read)
+Gets a hiddev_devinfo structure which describes the device.
+
+HIDIOCGSTRING - struct hiddev_string_descriptor (read/write)
+Gets a string descriptor from the device. The caller must fill in the
+"index" field to indicate which descriptor should be returned.
+
+HIDIOCINITREPORT - (none)
+Instructs the kernel to retrieve all input and feature report values
+from the device. At this point, all the usage structures will contain
+current values for the device, and will maintain it as the device
+changes. Note that the use of this ioctl is unnecessary in general,
+since later kernels automatically initialize the reports from the
+device at attach time.
+
+HIDIOCGNAME - string (variable length)
+Gets the device name
+
+HIDIOCGREPORT - struct hiddev_report_info (write)
+Instructs the kernel to get a feature or input report from the device,
+in order to selectively update the usage structures (in contrast to
+INITREPORT).
+
+HIDIOCSREPORT - struct hiddev_report_info (write)
+Instructs the kernel to send a report to the device. This report can
+be filled in by the user through HIDIOCSUSAGE calls (below) to fill in
+individual usage values in the report before sending the report in full
+to the device.
+
+HIDIOCGREPORTINFO - struct hiddev_report_info (read/write)
+Fills in a hiddev_report_info structure for the user. The report is
+looked up by type (input, output or feature) and id, so these fields
+must be filled in by the user. The ID can be absolute -- the actual
+report id as reported by the device -- or relative --
+HID_REPORT_ID_FIRST for the first report, and (HID_REPORT_ID_NEXT |
+report_id) for the next report after report_id. Without a-priori
+information about report ids, the right way to use this ioctl is to
+use the relative IDs above to enumerate the valid IDs. The ioctl
+returns non-zero when there is no more next ID. The real report ID is
+filled into the returned hiddev_report_info structure.
+
+HIDIOCGFIELDINFO - struct hiddev_field_info (read/write)
+Returns the field information associated with a report in a
+hiddev_field_info structure. The user must fill in report_id and
+report_type in this structure, as above. The field_index should also
+be filled in, which should be a number from 0 and maxfield-1, as
+returned from a previous HIDIOCGREPORTINFO call.
+
+HIDIOCGUCODE - struct hiddev_usage_ref (read/write)
+Returns the usage_code in a hiddev_usage_ref structure, given that
+given its report type, report id, field index, and index within the
+field have already been filled into the structure.
+
+HIDIOCGUSAGE - struct hiddev_usage_ref (read/write)
+Returns the value of a usage in a hiddev_usage_ref structure. The
+usage to be retrieved can be specified as above, or the user can
+choose to fill in the report_type field and specify the report_id as
+HID_REPORT_ID_UNKNOWN. In this case, the hiddev_usage_ref will be
+filled in with the report and field information associated with this
+usage if it is found.
+
+HIDIOCSUSAGE - struct hiddev_usage_ref (write)
+Sets the value of a usage in an output report. The user fills in
+the hiddev_usage_ref structure as above, but additionally fills in
+the value field.
+
+HIDIOGCOLLECTIONINDEX - struct hiddev_usage_ref (write)
+Returns the collection index associated with this usage. This
+indicates where in the collection hierarchy this usage sits.
+
+HIDIOCGFLAG - int (read)
+HIDIOCSFLAG - int (write)
+These operations respectively inspect and replace the mode flags
+that influence the read() call above. The flags are as follows:
+
+ HIDDEV_FLAG_UREF - read() calls will now return
+ struct hiddev_usage_ref instead of struct hiddev_event.
+ This is a larger structure, but in situations where the
+ device has more than one usage in its reports with the
+ same usage code, this mode serves to resolve such
+ ambiguity.
+
+ HIDDEV_FLAG_REPORT - This flag can only be used in conjunction
+ with HIDDEV_FLAG_UREF. With this flag set, when the device
+ sends a report, a struct hiddev_usage_ref will be returned
+ to read() filled in with the report_type and report_id, but
+ with field_index set to FIELD_INDEX_NONE. This serves as
+ additional notification when the device has sent a report.
diff --git a/Documentation/hid/hidraw.txt b/Documentation/hid/hidraw.txt
new file mode 100644
index 000000000..029e6cb9a
--- /dev/null
+++ b/Documentation/hid/hidraw.txt
@@ -0,0 +1,119 @@
+ HIDRAW - Raw Access to USB and Bluetooth Human Interface Devices
+ ==================================================================
+
+The hidraw driver provides a raw interface to USB and Bluetooth Human
+Interface Devices (HIDs). It differs from hiddev in that reports sent and
+received are not parsed by the HID parser, but are sent to and received from
+the device unmodified.
+
+Hidraw should be used if the userspace application knows exactly how to
+communicate with the hardware device, and is able to construct the HID
+reports manually. This is often the case when making userspace drivers for
+custom HID devices.
+
+Hidraw is also useful for communicating with non-conformant HID devices
+which send and receive data in a way that is inconsistent with their report
+descriptors. Because hiddev parses reports which are sent and received
+through it, checking them against the device's report descriptor, such
+communication with these non-conformant devices is impossible using hiddev.
+Hidraw is the only alternative, short of writing a custom kernel driver, for
+these non-conformant devices.
+
+A benefit of hidraw is that its use by userspace applications is independent
+of the underlying hardware type. Currently, Hidraw is implemented for USB
+and Bluetooth. In the future, as new hardware bus types are developed which
+use the HID specification, hidraw will be expanded to add support for these
+new bus types.
+
+Hidraw uses a dynamic major number, meaning that udev should be relied on to
+create hidraw device nodes. Udev will typically create the device nodes
+directly under /dev (eg: /dev/hidraw0). As this location is distribution-
+and udev rule-dependent, applications should use libudev to locate hidraw
+devices attached to the system. There is a tutorial on libudev with a
+working example at:
+ http://www.signal11.us/oss/udev/
+
+The HIDRAW API
+---------------
+
+read()
+-------
+read() will read a queued report received from the HID device. On USB
+devices, the reports read using read() are the reports sent from the device
+on the INTERRUPT IN endpoint. By default, read() will block until there is
+a report available to be read. read() can be made non-blocking, by passing
+the O_NONBLOCK flag to open(), or by setting the O_NONBLOCK flag using
+fcntl().
+
+On a device which uses numbered reports, the first byte of the returned data
+will be the report number; the report data follows, beginning in the second
+byte. For devices which do not use numbered reports, the report data
+will begin at the first byte.
+
+write()
+--------
+The write() function will write a report to the device. For USB devices, if
+the device has an INTERRUPT OUT endpoint, the report will be sent on that
+endpoint. If it does not, the report will be sent over the control endpoint,
+using a SET_REPORT transfer.
+
+The first byte of the buffer passed to write() should be set to the report
+number. If the device does not use numbered reports, the first byte should
+be set to 0. The report data itself should begin at the second byte.
+
+ioctl()
+--------
+Hidraw supports the following ioctls:
+
+HIDIOCGRDESCSIZE: Get Report Descriptor Size
+This ioctl will get the size of the device's report descriptor.
+
+HIDIOCGRDESC: Get Report Descriptor
+This ioctl returns the device's report descriptor using a
+hidraw_report_descriptor struct. Make sure to set the size field of the
+hidraw_report_descriptor struct to the size returned from HIDIOCGRDESCSIZE.
+
+HIDIOCGRAWINFO: Get Raw Info
+This ioctl will return a hidraw_devinfo struct containing the bus type, the
+vendor ID (VID), and product ID (PID) of the device. The bus type can be one
+of:
+ BUS_USB
+ BUS_HIL
+ BUS_BLUETOOTH
+ BUS_VIRTUAL
+which are defined in linux/input.h.
+
+HIDIOCGRAWNAME(len): Get Raw Name
+This ioctl returns a string containing the vendor and product strings of
+the device. The returned string is Unicode, UTF-8 encoded.
+
+HIDIOCGRAWPHYS(len): Get Physical Address
+This ioctl returns a string representing the physical address of the device.
+For USB devices, the string contains the physical path to the device (the
+USB controller, hubs, ports, etc). For Bluetooth devices, the string
+contains the hardware (MAC) address of the device.
+
+HIDIOCSFEATURE(len): Send a Feature Report
+This ioctl will send a feature report to the device. Per the HID
+specification, feature reports are always sent using the control endpoint.
+Set the first byte of the supplied buffer to the report number. For devices
+which do not use numbered reports, set the first byte to 0. The report data
+begins in the second byte. Make sure to set len accordingly, to one more
+than the length of the report (to account for the report number).
+
+HIDIOCGFEATURE(len): Get a Feature Report
+This ioctl will request a feature report from the device using the control
+endpoint. The first byte of the supplied buffer should be set to the report
+number of the requested report. For devices which do not use numbered
+reports, set the first byte to 0. The report will be returned starting at
+the first byte of the buffer (ie: the report number is not returned).
+
+Example
+---------
+In samples/, find hid-example.c, which shows examples of read(), write(),
+and all the ioctls for hidraw. The code may be used by anyone for any
+purpose, and can serve as a starting point for developing applications using
+hidraw.
+
+Document by:
+ Alan Ott <alan@signal11.us>, Signal 11 Software
diff --git a/Documentation/hid/uhid.txt b/Documentation/hid/uhid.txt
new file mode 100644
index 000000000..c8656dd02
--- /dev/null
+++ b/Documentation/hid/uhid.txt
@@ -0,0 +1,187 @@
+ UHID - User-space I/O driver support for HID subsystem
+ ========================================================
+
+UHID allows user-space to implement HID transport drivers. Please see
+hid-transport.txt for an introduction into HID transport drivers. This document
+relies heavily on the definitions declared there.
+
+With UHID, a user-space transport driver can create kernel hid-devices for each
+device connected to the user-space controlled bus. The UHID API defines the I/O
+events provided from the kernel to user-space and vice versa.
+
+There is an example user-space application in ./samples/uhid/uhid-example.c
+
+The UHID API
+------------
+
+UHID is accessed through a character misc-device. The minor-number is allocated
+dynamically so you need to rely on udev (or similar) to create the device node.
+This is /dev/uhid by default.
+
+If a new device is detected by your HID I/O Driver and you want to register this
+device with the HID subsystem, then you need to open /dev/uhid once for each
+device you want to register. All further communication is done by read()'ing or
+write()'ing "struct uhid_event" objects. Non-blocking operations are supported
+by setting O_NONBLOCK.
+
+struct uhid_event {
+ __u32 type;
+ union {
+ struct uhid_create2_req create2;
+ struct uhid_output_req output;
+ struct uhid_input2_req input2;
+ ...
+ } u;
+};
+
+The "type" field contains the ID of the event. Depending on the ID different
+payloads are sent. You must not split a single event across multiple read()'s or
+multiple write()'s. A single event must always be sent as a whole. Furthermore,
+only a single event can be sent per read() or write(). Pending data is ignored.
+If you want to handle multiple events in a single syscall, then use vectored
+I/O with readv()/writev().
+The "type" field defines the payload. For each type, there is a
+payload-structure available in the union "u" (except for empty payloads). This
+payload contains management and/or device data.
+
+The first thing you should do is sending an UHID_CREATE2 event. This will
+register the device. UHID will respond with an UHID_START event. You can now
+start sending data to and reading data from UHID. However, unless UHID sends the
+UHID_OPEN event, the internally attached HID Device Driver has no user attached.
+That is, you might put your device asleep unless you receive the UHID_OPEN
+event. If you receive the UHID_OPEN event, you should start I/O. If the last
+user closes the HID device, you will receive an UHID_CLOSE event. This may be
+followed by an UHID_OPEN event again and so on. There is no need to perform
+reference-counting in user-space. That is, you will never receive multiple
+UHID_OPEN events without an UHID_CLOSE event. The HID subsystem performs
+ref-counting for you.
+You may decide to ignore UHID_OPEN/UHID_CLOSE, though. I/O is allowed even
+though the device may have no users.
+
+If you want to send data on the interrupt channel to the HID subsystem, you send
+an HID_INPUT2 event with your raw data payload. If the kernel wants to send data
+on the interrupt channel to the device, you will read an UHID_OUTPUT event.
+Data requests on the control channel are currently limited to GET_REPORT and
+SET_REPORT (no other data reports on the control channel are defined so far).
+Those requests are always synchronous. That means, the kernel sends
+UHID_GET_REPORT and UHID_SET_REPORT events and requires you to forward them to
+the device on the control channel. Once the device responds, you must forward
+the response via UHID_GET_REPORT_REPLY and UHID_SET_REPORT_REPLY to the kernel.
+The kernel blocks internal driver-execution during such round-trips (times out
+after a hard-coded period).
+
+If your device disconnects, you should send an UHID_DESTROY event. This will
+unregister the device. You can now send UHID_CREATE2 again to register a new
+device.
+If you close() the fd, the device is automatically unregistered and destroyed
+internally.
+
+write()
+-------
+write() allows you to modify the state of the device and feed input data into
+the kernel. The kernel will parse the event immediately and if the event ID is
+not supported, it will return -EOPNOTSUPP. If the payload is invalid, then
+-EINVAL is returned, otherwise, the amount of data that was read is returned and
+the request was handled successfully. O_NONBLOCK does not affect write() as
+writes are always handled immediately in a non-blocking fashion. Future requests
+might make use of O_NONBLOCK, though.
+
+ UHID_CREATE2:
+ This creates the internal HID device. No I/O is possible until you send this
+ event to the kernel. The payload is of type struct uhid_create2_req and
+ contains information about your device. You can start I/O now.
+
+ UHID_DESTROY:
+ This destroys the internal HID device. No further I/O will be accepted. There
+ may still be pending messages that you can receive with read() but no further
+ UHID_INPUT events can be sent to the kernel.
+ You can create a new device by sending UHID_CREATE2 again. There is no need to
+ reopen the character device.
+
+ UHID_INPUT2:
+ You must send UHID_CREATE2 before sending input to the kernel! This event
+ contains a data-payload. This is the raw data that you read from your device
+ on the interrupt channel. The kernel will parse the HID reports.
+
+ UHID_GET_REPORT_REPLY:
+ If you receive a UHID_GET_REPORT request you must answer with this request.
+ You must copy the "id" field from the request into the answer. Set the "err"
+ field to 0 if no error occurred or to EIO if an I/O error occurred.
+ If "err" is 0 then you should fill the buffer of the answer with the results
+ of the GET_REPORT request and set "size" correspondingly.
+
+ UHID_SET_REPORT_REPLY:
+ This is the SET_REPORT equivalent of UHID_GET_REPORT_REPLY. Unlike GET_REPORT,
+ SET_REPORT never returns a data buffer, therefore, it's sufficient to set the
+ "id" and "err" fields correctly.
+
+read()
+------
+read() will return a queued output report. No reaction is required to any of
+them but you should handle them according to your needs.
+
+ UHID_START:
+ This is sent when the HID device is started. Consider this as an answer to
+ UHID_CREATE2. This is always the first event that is sent. Note that this
+ event might not be available immediately after write(UHID_CREATE2) returns.
+ Device drivers might required delayed setups.
+ This event contains a payload of type uhid_start_req. The "dev_flags" field
+ describes special behaviors of a device. The following flags are defined:
+ UHID_DEV_NUMBERED_FEATURE_REPORTS:
+ UHID_DEV_NUMBERED_OUTPUT_REPORTS:
+ UHID_DEV_NUMBERED_INPUT_REPORTS:
+ Each of these flags defines whether a given report-type uses numbered
+ reports. If numbered reports are used for a type, all messages from
+ the kernel already have the report-number as prefix. Otherwise, no
+ prefix is added by the kernel.
+ For messages sent by user-space to the kernel, you must adjust the
+ prefixes according to these flags.
+
+ UHID_STOP:
+ This is sent when the HID device is stopped. Consider this as an answer to
+ UHID_DESTROY.
+ If you didn't destroy your device via UHID_DESTROY, but the kernel sends an
+ UHID_STOP event, this should usually be ignored. It means that the kernel
+ reloaded/changed the device driver loaded on your HID device (or some other
+ maintenance actions happened).
+ You can usually ignored any UHID_STOP events safely.
+
+ UHID_OPEN:
+ This is sent when the HID device is opened. That is, the data that the HID
+ device provides is read by some other process. You may ignore this event but
+ it is useful for power-management. As long as you haven't received this event
+ there is actually no other process that reads your data so there is no need to
+ send UHID_INPUT2 events to the kernel.
+
+ UHID_CLOSE:
+ This is sent when there are no more processes which read the HID data. It is
+ the counterpart of UHID_OPEN and you may as well ignore this event.
+
+ UHID_OUTPUT:
+ This is sent if the HID device driver wants to send raw data to the I/O
+ device on the interrupt channel. You should read the payload and forward it to
+ the device. The payload is of type "struct uhid_data_req".
+ This may be received even though you haven't received UHID_OPEN, yet.
+
+ UHID_GET_REPORT:
+ This event is sent if the kernel driver wants to perform a GET_REPORT request
+ on the control channeld as described in the HID specs. The report-type and
+ report-number are available in the payload.
+ The kernel serializes GET_REPORT requests so there will never be two in
+ parallel. However, if you fail to respond with a UHID_GET_REPORT_REPLY, the
+ request might silently time out.
+ Once you read a GET_REPORT request, you shall forward it to the hid device and
+ remember the "id" field in the payload. Once your hid device responds to the
+ GET_REPORT (or if it fails), you must send a UHID_GET_REPORT_REPLY to the
+ kernel with the exact same "id" as in the request. If the request already
+ timed out, the kernel will ignore the response silently. The "id" field is
+ never re-used, so conflicts cannot happen.
+
+ UHID_SET_REPORT:
+ This is the SET_REPORT equivalent of UHID_GET_REPORT. On receipt, you shall
+ send a SET_REPORT request to your hid device. Once it replies, you must tell
+ the kernel about it via UHID_SET_REPORT_REPLY.
+ The same restrictions as for UHID_GET_REPORT apply.
+
+----------------------------------------------------
+Written 2012, David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/highuid.txt b/Documentation/highuid.txt
new file mode 100644
index 000000000..6bad6f1d1
--- /dev/null
+++ b/Documentation/highuid.txt
@@ -0,0 +1,77 @@
+Notes on the change from 16-bit UIDs to 32-bit UIDs:
+
+- kernel code MUST take into account __kernel_uid_t and __kernel_uid32_t
+ when communicating between user and kernel space in an ioctl or data
+ structure.
+
+- kernel code should use uid_t and gid_t in kernel-private structures and
+ code.
+
+What's left to be done for 32-bit UIDs on all Linux architectures:
+
+- Disk quotas have an interesting limitation that is not related to the
+ maximum UID/GID. They are limited by the maximum file size on the
+ underlying filesystem, because quota records are written at offsets
+ corresponding to the UID in question.
+ Further investigation is needed to see if the quota system can cope
+ properly with huge UIDs. If it can deal with 64-bit file offsets on all
+ architectures, this should not be a problem.
+
+- Decide whether or not to keep backwards compatibility with the system
+ accounting file, or if we should break it as the comments suggest
+ (currently, the old 16-bit UID and GID are still written to disk, and
+ part of the former pad space is used to store separate 32-bit UID and
+ GID)
+
+- Need to validate that OS emulation calls the 16-bit UID
+ compatibility syscalls, if the OS being emulated used 16-bit UIDs, or
+ uses the 32-bit UID system calls properly otherwise.
+
+ This affects at least:
+ iBCS on Intel
+
+ sparc32 emulation on sparc64
+ (need to support whatever new 32-bit UID system calls are added to
+ sparc32)
+
+- Validate that all filesystems behave properly.
+
+ At present, 32-bit UIDs _should_ work for:
+ ext2
+ ufs
+ isofs
+ nfs
+ coda
+ udf
+
+ Ioctl() fixups have been made for:
+ ncpfs
+ smbfs
+
+ Filesystems with simple fixups to prevent 16-bit UID wraparound:
+ minix
+ sysv
+ qnx4
+
+ Other filesystems have not been checked yet.
+
+- The ncpfs and smpfs filesystems cannot presently use 32-bit UIDs in
+ all ioctl()s. Some new ioctl()s have been added with 32-bit UIDs, but
+ more are needed. (as well as new user<->kernel data structures)
+
+- The ELF core dump format only supports 16-bit UIDs on arm, i386, m68k,
+ sh, and sparc32. Fixing this is probably not that important, but would
+ require adding a new ELF section.
+
+- The ioctl()s used to control the in-kernel NFS server only support
+ 16-bit UIDs on arm, i386, m68k, sh, and sparc32.
+
+- make sure that the UID mapping feature of AX25 networking works properly
+ (it should be safe because it's always used a 32-bit integer to
+ communicate between user and kernel)
+
+
+Chris Wing
+wingc@umich.edu
+
+last updated: January 11, 2000
diff --git a/Documentation/hsi.txt b/Documentation/hsi.txt
new file mode 100644
index 000000000..6ac6cd518
--- /dev/null
+++ b/Documentation/hsi.txt
@@ -0,0 +1,75 @@
+HSI - High-speed Synchronous Serial Interface
+
+1. Introduction
+~~~~~~~~~~~~~~~
+
+High Speed Syncronous Interface (HSI) is a fullduplex, low latency protocol,
+that is optimized for die-level interconnect between an Application Processor
+and a Baseband chipset. It has been specified by the MIPI alliance in 2003 and
+implemented by multiple vendors since then.
+
+The HSI interface supports full duplex communication over multiple channels
+(typically 8) and is capable of reaching speeds up to 200 Mbit/s.
+
+The serial protocol uses two signals, DATA and FLAG as combined data and clock
+signals and an additional READY signal for flow control. An additional WAKE
+signal can be used to wakeup the chips from standby modes. The signals are
+commonly prefixed by AC for signals going from the application die to the
+cellular die and CA for signals going the other way around.
+
++------------+ +---------------+
+| Cellular | | Application |
+| Die | | Die |
+| | - - - - - - CAWAKE - - - - - - >| |
+| T|------------ CADATA ------------>|R |
+| X|------------ CAFLAG ------------>|X |
+| |<----------- ACREADY ------------| |
+| | | |
+| | | |
+| |< - - - - - ACWAKE - - - - - - -| |
+| R|<----------- ACDATA -------------|T |
+| X|<----------- ACFLAG -------------|X |
+| |------------ CAREADY ----------->| |
+| | | |
+| | | |
++------------+ +---------------+
+
+2. HSI Subsystem in Linux
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the Linux kernel the hsi subsystem is supposed to be used for HSI devices.
+The hsi subsystem contains drivers for hsi controllers including support for
+multi-port controllers and provides a generic API for using the HSI ports.
+
+It also contains HSI client drivers, which make use of the generic API to
+implement a protocol used on the HSI interface. These client drivers can
+use an arbitrary number of channels.
+
+3. hsi-char Device
+~~~~~~~~~~~~~~~~~~
+
+Each port automatically registers a generic client driver called hsi_char,
+which provides a charecter device for userspace representing the HSI port.
+It can be used to communicate via HSI from userspace. Userspace may
+configure the hsi_char device using the following ioctl commands:
+
+* HSC_RESET:
+ - flush the HSI port
+
+* HSC_SET_PM
+ - enable or disable the client.
+
+* HSC_SEND_BREAK
+ - send break
+
+* HSC_SET_RX
+ - set RX configuration
+
+* HSC_GET_RX
+ - get RX configuration
+
+* HSC_SET_TX
+ - set TX configuration
+
+* HSC_GET_TX
+ - get TX configuration
diff --git a/Documentation/hw_random.txt b/Documentation/hw_random.txt
new file mode 100644
index 000000000..026e237bb
--- /dev/null
+++ b/Documentation/hw_random.txt
@@ -0,0 +1,90 @@
+Introduction:
+
+ The hw_random framework is software that makes use of a
+ special hardware feature on your CPU or motherboard,
+ a Random Number Generator (RNG). The software has two parts:
+ a core providing the /dev/hw_random character device and its
+ sysfs support, plus a hardware-specific driver that plugs
+ into that core.
+
+ To make the most effective use of these mechanisms, you
+ should download the support software as well. Download the
+ latest version of the "rng-tools" package from the
+ hw_random driver's official Web site:
+
+ http://sourceforge.net/projects/gkernel/
+
+ Those tools use /dev/hw_random to fill the kernel entropy pool,
+ which is used internally and exported by the /dev/urandom and
+ /dev/random special files.
+
+Theory of operation:
+
+ CHARACTER DEVICE. Using the standard open()
+ and read() system calls, you can read random data from
+ the hardware RNG device. This data is NOT CHECKED by any
+ fitness tests, and could potentially be bogus (if the
+ hardware is faulty or has been tampered with). Data is only
+ output if the hardware "has-data" flag is set, but nevertheless
+ a security-conscious person would run fitness tests on the
+ data before assuming it is truly random.
+
+ The rng-tools package uses such tests in "rngd", and lets you
+ run them by hand with a "rngtest" utility.
+
+ /dev/hw_random is char device major 10, minor 183.
+
+ CLASS DEVICE. There is a /sys/class/misc/hw_random node with
+ two unique attributes, "rng_available" and "rng_current". The
+ "rng_available" attribute lists the hardware-specific drivers
+ available, while "rng_current" lists the one which is currently
+ connected to /dev/hw_random. If your system has more than one
+ RNG available, you may change the one used by writing a name from
+ the list in "rng_available" into "rng_current".
+
+==========================================================================
+
+ Hardware driver for Intel/AMD/VIA Random Number Generators (RNG)
+ Copyright 2000,2001 Jeff Garzik <jgarzik@pobox.com>
+ Copyright 2000,2001 Philipp Rumpf <prumpf@mandrakesoft.com>
+
+
+About the Intel RNG hardware, from the firmware hub datasheet:
+
+ The Firmware Hub integrates a Random Number Generator (RNG)
+ using thermal noise generated from inherently random quantum
+ mechanical properties of silicon. When not generating new random
+ bits the RNG circuitry will enter a low power state. Intel will
+ provide a binary software driver to give third party software
+ access to our RNG for use as a security feature. At this time,
+ the RNG is only to be used with a system in an OS-present state.
+
+Intel RNG Driver notes:
+
+ * FIXME: support poll(2)
+
+ NOTE: request_mem_region was removed, for three reasons:
+ 1) Only one RNG is supported by this driver, 2) The location
+ used by the RNG is a fixed location in MMIO-addressable memory,
+ 3) users with properly working BIOS e820 handling will always
+ have the region in which the RNG is located reserved, so
+ request_mem_region calls always fail for proper setups.
+ However, for people who use mem=XX, BIOS e820 information is
+ -not- in /proc/iomem, and request_mem_region(RNG_ADDR) can
+ succeed.
+
+Driver details:
+
+ Based on:
+ Intel 82802AB/82802AC Firmware Hub (FWH) Datasheet
+ May 1999 Order Number: 290658-002 R
+
+ Intel 82802 Firmware Hub: Random Number Generator
+ Programmer's Reference Manual
+ December 1999 Order Number: 298029-001 R
+
+ Intel 82802 Firmware HUB Random Number Generator Driver
+ Copyright (c) 2000 Matt Sottek <msottek@quiknet.com>
+
+ Special thanks to Matt Sottek. I did the "guts", he
+ did the "brains" and all the testing.
diff --git a/Documentation/hwmon/ab8500 b/Documentation/hwmon/ab8500
new file mode 100644
index 000000000..cf169c8ef
--- /dev/null
+++ b/Documentation/hwmon/ab8500
@@ -0,0 +1,22 @@
+Kernel driver ab8500
+====================
+
+Supported chips:
+ * ST-Ericsson AB8500
+ Prefix: 'ab8500'
+ Addresses scanned: -
+ Datasheet: http://www.stericsson.com/developers/documentation.jsp
+
+Authors:
+ Martin Persson <martin.persson@stericsson.com>
+ Hongbo Zhang <hongbo.zhang@linaro.org>
+
+Description
+-----------
+
+See also Documentation/hwmon/abx500. This is the ST-Ericsson AB8500 specific
+driver.
+
+Currently only the AB8500 internal sensor and one external sensor for battery
+temperature are monitored. Other GPADC channels can also be monitored if needed
+in future.
diff --git a/Documentation/hwmon/abituguru b/Documentation/hwmon/abituguru
new file mode 100644
index 000000000..915f32063
--- /dev/null
+++ b/Documentation/hwmon/abituguru
@@ -0,0 +1,92 @@
+Kernel driver abituguru
+=======================
+
+Supported chips:
+ * Abit uGuru revision 1 & 2 (Hardware Monitor part only)
+ Prefix: 'abituguru'
+ Addresses scanned: ISA 0x0E0
+ Datasheet: Not available, this driver is based on reverse engineering.
+ A "Datasheet" has been written based on the reverse engineering it
+ should be available in the same dir as this file under the name
+ abituguru-datasheet.
+ Note:
+ The uGuru is a microcontroller with onboard firmware which programs
+ it to behave as a hwmon IC. There are many different revisions of the
+ firmware and thus effectivly many different revisions of the uGuru.
+ Below is an incomplete list with which revisions are used for which
+ Motherboards:
+ uGuru 1.00 ~ 1.24 (AI7, KV8-MAX3, AN7) (1)
+ uGuru 2.0.0.0 ~ 2.0.4.2 (KV8-PRO)
+ uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
+ uGuru 2.2.0.0 ~ 2.2.0.6 (AA8 Fatal1ty)
+ uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
+ uGuru 3.0.0.0 ~ 3.0.x.x (AW8, AL8, AT8, NI8 SLI, AT8 32X, AN8 32X,
+ AW9D-MAX) (2)
+ 1) For revisions 2 and 3 uGuru's the driver can autodetect the
+ sensortype (Volt or Temp) for bank1 sensors, for revision 1 uGuru's
+ this doesnot always work. For these uGuru's the autodection can
+ be overriden with the bank1_types module param. For all 3 known
+ revison 1 motherboards the correct use of this param is:
+ bank1_types=1,1,0,0,0,0,0,2,0,0,0,0,2,0,0,1
+ You may also need to specify the fan_sensors option for these boards
+ fan_sensors=5
+ 2) There is a separate abituguru3 driver for these motherboards,
+ the abituguru (without the 3 !) driver will not work on these
+ motherboards (and visa versa)!
+
+Authors:
+ Hans de Goede <j.w.r.degoede@hhs.nl>,
+ (Initial reverse engineering done by Olle Sandberg
+ <ollebull@gmail.com>)
+
+
+Module Parameters
+-----------------
+
+* force: bool Force detection. Note this parameter only causes the
+ detection to be skipped, and thus the insmod to
+ succeed. If the uGuru can't be read the actual hwmon
+ driver will not load and thus no hwmon device will get
+ registered.
+* bank1_types: int[] Bank1 sensortype autodetection override:
+ -1 autodetect (default)
+ 0 volt sensor
+ 1 temp sensor
+ 2 not connected
+* fan_sensors: int Tell the driver how many fan speed sensors there are
+ on your motherboard. Default: 0 (autodetect).
+* pwms: int Tell the driver how many fan speed controls (fan
+ pwms) your motherboard has. Default: 0 (autodetect).
+* verbose: int How verbose should the driver be? (0-3):
+ 0 normal output
+ 1 + verbose error reporting
+ 2 + sensors type probing info (default)
+ 3 + retryable error reporting
+ Default: 2 (the driver is still in the testing phase)
+
+Notice if you need any of the first three options above please insmod the
+driver with verbose set to 3 and mail me <j.w.r.degoede@hhs.nl> the output of:
+dmesg | grep abituguru
+
+
+Description
+-----------
+
+This driver supports the hardware monitoring features of the first and
+second revision of the Abit uGuru chip found on Abit uGuru featuring
+motherboards (most modern Abit motherboards).
+
+The first and second revision of the uGuru chip in reality is a Winbond
+W83L950D in disguise (despite Abit claiming it is "a new microprocessor
+designed by the ABIT Engineers"). Unfortunately this doesn't help since the
+W83L950D is a generic microcontroller with a custom Abit application running
+on it.
+
+Despite Abit not releasing any information regarding the uGuru, Olle
+Sandberg <ollebull@gmail.com> has managed to reverse engineer the sensor part
+of the uGuru. Without his work this driver would not have been possible.
+
+Known Issues
+------------
+
+The voltage and frequency control parts of the Abit uGuru are not supported.
diff --git a/Documentation/hwmon/abituguru-datasheet b/Documentation/hwmon/abituguru-datasheet
new file mode 100644
index 000000000..86c0b1251
--- /dev/null
+++ b/Documentation/hwmon/abituguru-datasheet
@@ -0,0 +1,312 @@
+uGuru datasheet
+===============
+
+First of all, what I know about uGuru is no fact based on any help, hints or
+datasheet from Abit. The data I have got on uGuru have I assembled through
+my weak knowledge in "backwards engineering".
+And just for the record, you may have noticed uGuru isn't a chip developed by
+Abit, as they claim it to be. It's really just an microprocessor (uC) created by
+Winbond (W83L950D). And no, reading the manual for this specific uC or
+mailing Windbond for help won't give any useful data about uGuru, as it is
+the program inside the uC that is responding to calls.
+
+Olle Sandberg <ollebull@gmail.com>, 2005-05-25
+
+
+Original version by Olle Sandberg who did the heavy lifting of the initial
+reverse engineering. This version has been almost fully rewritten for clarity
+and extended with write support and info on more databanks, the write support
+is once again reverse engineered by Olle the additional databanks have been
+reverse engineered by me. I would like to express my thanks to Olle, this
+document and the Linux driver could not have been written without his efforts.
+
+Note: because of the lack of specs only the sensors part of the uGuru is
+described here and not the CPU / RAM / etc voltage & frequency control.
+
+Hans de Goede <j.w.r.degoede@hhs.nl>, 28-01-2006
+
+
+Detection
+=========
+
+As far as known the uGuru is always placed at and using the (ISA) I/O-ports
+0xE0 and 0xE4, so we don't have to scan any port-range, just check what the two
+ports are holding for detection. We will refer to 0xE0 as CMD (command-port)
+and 0xE4 as DATA because Abit refers to them with these names.
+
+If DATA holds 0x00 or 0x08 and CMD holds 0x00 or 0xAC an uGuru could be
+present. We have to check for two different values at data-port, because
+after a reboot uGuru will hold 0x00 here, but if the driver is removed and
+later on attached again data-port will hold 0x08, more about this later.
+
+After wider testing of the Linux kernel driver some variants of the uGuru have
+turned up which will hold 0x00 instead of 0xAC at the CMD port, thus we also
+have to test CMD for two different values. On these uGuru's DATA will initially
+hold 0x09 and will only hold 0x08 after reading CMD first, so CMD must be read
+first!
+
+To be really sure an uGuru is present a test read of one or more register
+sets should be done.
+
+
+Reading / Writing
+=================
+
+Addressing
+----------
+
+The uGuru has a number of different addressing levels. The first addressing
+level we will call banks. A bank holds data for one or more sensors. The data
+in a bank for a sensor is one or more bytes large.
+
+The number of bytes is fixed for a given bank, you should always read or write
+that many bytes, reading / writing more will fail, the results when writing
+less then the number of bytes for a given bank are undetermined.
+
+See below for all known bank addresses, numbers of sensors in that bank,
+number of bytes data per sensor and contents/meaning of those bytes.
+
+Although both this document and the kernel driver have kept the sensor
+terminoligy for the addressing within a bank this is not 100% correct, in
+bank 0x24 for example the addressing within the bank selects a PWM output not
+a sensor.
+
+Notice that some banks have both a read and a write address this is how the
+uGuru determines if a read from or a write to the bank is taking place, thus
+when reading you should always use the read address and when writing the
+write address. The write address is always one (1) more than the read address.
+
+
+uGuru ready
+-----------
+
+Before you can read from or write to the uGuru you must first put the uGuru
+in "ready" mode.
+
+To put the uGuru in ready mode first write 0x00 to DATA and then wait for DATA
+to hold 0x09, DATA should read 0x09 within 250 read cycles.
+
+Next CMD _must_ be read and should hold 0xAC, usually CMD will hold 0xAC the
+first read but sometimes it takes a while before CMD holds 0xAC and thus it
+has to be read a number of times (max 50).
+
+After reading CMD, DATA should hold 0x08 which means that the uGuru is ready
+for input. As above DATA will usually hold 0x08 the first read but not always.
+This step can be skipped, but it is undetermined what happens if the uGuru has
+not yet reported 0x08 at DATA and you proceed with writing a bank address.
+
+
+Sending bank and sensor addresses to the uGuru
+----------------------------------------------
+
+First the uGuru must be in "ready" mode as described above, DATA should hold
+0x08 indicating that the uGuru wants input, in this case the bank address.
+
+Next write the bank address to DATA. After the bank address has been written
+wait for to DATA to hold 0x08 again indicating that it wants / is ready for
+more input (max 250 reads).
+
+Once DATA holds 0x08 again write the sensor address to CMD.
+
+
+Reading
+-------
+
+First send the bank and sensor addresses as described above.
+Then for each byte of data you want to read wait for DATA to hold 0x01
+which indicates that the uGuru is ready to be read (max 250 reads) and once
+DATA holds 0x01 read the byte from CMD.
+
+Once all bytes have been read data will hold 0x09, but there is no reason to
+test for this. Notice that the number of bytes is bank address dependent see
+above and below.
+
+After completing a successful read it is advised to put the uGuru back in
+ready mode, so that it is ready for the next read / write cycle. This way
+if your program / driver is unloaded and later loaded again the detection
+algorithm described above will still work.
+
+
+
+Writing
+-------
+
+First send the bank and sensor addresses as described above.
+Then for each byte of data you want to write wait for DATA to hold 0x00
+which indicates that the uGuru is ready to be written (max 250 reads) and
+once DATA holds 0x00 write the byte to CMD.
+
+Once all bytes have been written wait for DATA to hold 0x01 (max 250 reads)
+don't ask why this is the way it is.
+
+Once DATA holds 0x01 read CMD it should hold 0xAC now.
+
+After completing a successful write it is advised to put the uGuru back in
+ready mode, so that it is ready for the next read / write cycle. This way
+if your program / driver is unloaded and later loaded again the detection
+algorithm described above will still work.
+
+
+Gotchas
+-------
+
+After wider testing of the Linux kernel driver some variants of the uGuru have
+turned up which do not hold 0x08 at DATA within 250 reads after writing the
+bank address. With these versions this happens quite frequent, using larger
+timeouts doesn't help, they just go offline for a second or 2, doing some
+internal callibration or whatever. Your code should be prepared to handle
+this and in case of no response in this specific case just goto sleep for a
+while and then retry.
+
+
+Address Map
+===========
+
+Bank 0x20 Alarms (R)
+--------------------
+This bank contains 0 sensors, iow the sensor address is ignored (but must be
+written) just use 0. Bank 0x20 contains 3 bytes:
+
+Byte 0:
+This byte holds the alarm flags for sensor 0-7 of Sensor Bank1, with bit 0
+corresponding to sensor 0, 1 to 1, etc.
+
+Byte 1:
+This byte holds the alarm flags for sensor 8-15 of Sensor Bank1, with bit 0
+corresponding to sensor 8, 1 to 9, etc.
+
+Byte 2:
+This byte holds the alarm flags for sensor 0-5 of Sensor Bank2, with bit 0
+corresponding to sensor 0, 1 to 1, etc.
+
+
+Bank 0x21 Sensor Bank1 Values / Readings (R)
+--------------------------------------------
+This bank contains 16 sensors, for each sensor it contains 1 byte.
+So far the following sensors are known to be available on all motherboards:
+Sensor 0 CPU temp
+Sensor 1 SYS temp
+Sensor 3 CPU core volt
+Sensor 4 DDR volt
+Sensor 10 DDR Vtt volt
+Sensor 15 PWM temp
+
+Byte 0:
+This byte holds the reading from the sensor. Sensors in Bank1 can be both
+volt and temp sensors, this is motherboard specific. The uGuru however does
+seem to know (be programmed with) what kindoff sensor is attached see Sensor
+Bank1 Settings description.
+
+Volt sensors use a linear scale, a reading 0 corresponds with 0 volt and a
+reading of 255 with 3494 mV. The sensors for higher voltages however are
+connected through a division circuit. The currently known division circuits
+in use result in ranges of: 0-4361mV, 0-6248mV or 0-14510mV. 3.3 volt sources
+use the 0-4361mV range, 5 volt the 0-6248mV and 12 volt the 0-14510mV .
+
+Temp sensors also use a linear scale, a reading of 0 corresponds with 0 degree
+Celsius and a reading of 255 with a reading of 255 degrees Celsius.
+
+
+Bank 0x22 Sensor Bank1 Settings (R)
+Bank 0x23 Sensor Bank1 Settings (W)
+-----------------------------------
+
+This bank contains 16 sensors, for each sensor it contains 3 bytes. Each
+set of 3 bytes contains the settings for the sensor with the same sensor
+address in Bank 0x21 .
+
+Byte 0:
+Alarm behaviour for the selected sensor. A 1 enables the described behaviour.
+Bit 0: Give an alarm if measured temp is over the warning threshold (RW) *
+Bit 1: Give an alarm if measured volt is over the max threshold (RW) **
+Bit 2: Give an alarm if measured volt is under the min threshold (RW) **
+Bit 3: Beep if alarm (RW)
+Bit 4: 1 if alarm cause measured temp is over the warning threshold (R)
+Bit 5: 1 if alarm cause measured volt is over the max threshold (R)
+Bit 6: 1 if alarm cause measured volt is under the min threshold (R)
+Bit 7: Volt sensor: Shutdown if alarm persist for more than 4 seconds (RW)
+ Temp sensor: Shutdown if temp is over the shutdown threshold (RW)
+
+* This bit is only honored/used by the uGuru if a temp sensor is connected
+** This bit is only honored/used by the uGuru if a volt sensor is connected
+Note with some trickery this can be used to find out what kinda sensor is
+detected see the Linux kernel driver for an example with many comments on
+how todo this.
+
+Byte 1:
+Temp sensor: warning threshold (scale as bank 0x21)
+Volt sensor: min threshold (scale as bank 0x21)
+
+Byte 2:
+Temp sensor: shutdown threshold (scale as bank 0x21)
+Volt sensor: max threshold (scale as bank 0x21)
+
+
+Bank 0x24 PWM outputs for FAN's (R)
+Bank 0x25 PWM outputs for FAN's (W)
+-----------------------------------
+
+This bank contains 3 "sensors", for each sensor it contains 5 bytes.
+Sensor 0 usually controls the CPU fan
+Sensor 1 usually controls the NB (or chipset for single chip) fan
+Sensor 2 usually controls the System fan
+
+Byte 0:
+Flag 0x80 to enable control, Fan runs at 100% when disabled.
+low nibble (temp)sensor address at bank 0x21 used for control.
+
+Byte 1:
+0-255 = 0-12v (linear), specify voltage at which fan will rotate when under
+low threshold temp (specified in byte 3)
+
+Byte 2:
+0-255 = 0-12v (linear), specify voltage at which fan will rotate when above
+high threshold temp (specified in byte 4)
+
+Byte 3:
+Low threshold temp (scale as bank 0x21)
+
+byte 4:
+High threshold temp (scale as bank 0x21)
+
+
+Bank 0x26 Sensors Bank2 Values / Readings (R)
+---------------------------------------------
+
+This bank contains 6 sensors (AFAIK), for each sensor it contains 1 byte.
+So far the following sensors are known to be available on all motherboards:
+Sensor 0: CPU fan speed
+Sensor 1: NB (or chipset for single chip) fan speed
+Sensor 2: SYS fan speed
+
+Byte 0:
+This byte holds the reading from the sensor. 0-255 = 0-15300 (linear)
+
+
+Bank 0x27 Sensors Bank2 Settings (R)
+Bank 0x28 Sensors Bank2 Settings (W)
+------------------------------------
+
+This bank contains 6 sensors (AFAIK), for each sensor it contains 2 bytes.
+
+Byte 0:
+Alarm behaviour for the selected sensor. A 1 enables the described behaviour.
+Bit 0: Give an alarm if measured rpm is under the min threshold (RW)
+Bit 3: Beep if alarm (RW)
+Bit 7: Shutdown if alarm persist for more than 4 seconds (RW)
+
+Byte 1:
+min threshold (scale as bank 0x26)
+
+
+Warning for the adventurous
+===========================
+
+A word of caution to those who want to experiment and see if they can figure
+the voltage / clock programming out, I tried reading and only reading banks
+0-0x30 with the reading code used for the sensor banks (0x20-0x28) and this
+resulted in a _permanent_ reprogramming of the voltages, luckily I had the
+sensors part configured so that it would shutdown my system on any out of spec
+voltages which proprably safed my computer (after a reboot I managed to
+immediately enter the bios and reload the defaults). This probably means that
+the read/write cycle for the non sensor part is different from the sensor part.
diff --git a/Documentation/hwmon/abituguru3 b/Documentation/hwmon/abituguru3
new file mode 100644
index 000000000..a6ccfe4bb
--- /dev/null
+++ b/Documentation/hwmon/abituguru3
@@ -0,0 +1,65 @@
+Kernel driver abituguru3
+========================
+
+Supported chips:
+ * Abit uGuru revision 3 (Hardware Monitor part, reading only)
+ Prefix: 'abituguru3'
+ Addresses scanned: ISA 0x0E0
+ Datasheet: Not available, this driver is based on reverse engineering.
+ Note:
+ The uGuru is a microcontroller with onboard firmware which programs
+ it to behave as a hwmon IC. There are many different revisions of the
+ firmware and thus effectivly many different revisions of the uGuru.
+ Below is an incomplete list with which revisions are used for which
+ Motherboards:
+ uGuru 1.00 ~ 1.24 (AI7, KV8-MAX3, AN7)
+ uGuru 2.0.0.0 ~ 2.0.4.2 (KV8-PRO)
+ uGuru 2.1.0.0 ~ 2.1.2.8 (AS8, AV8, AA8, AG8, AA8XE, AX8)
+ uGuru 2.3.0.0 ~ 2.3.0.9 (AN8)
+ uGuru 3.0.0.0 ~ 3.0.x.x (AW8, AL8, AT8, NI8 SLI, AT8 32X, AN8 32X,
+ AW9D-MAX)
+ The abituguru3 driver is only for revison 3.0.x.x motherboards,
+ this driver will not work on older motherboards. For older
+ motherboards use the abituguru (without the 3 !) driver.
+
+Authors:
+ Hans de Goede <j.w.r.degoede@hhs.nl>,
+ (Initial reverse engineering done by Louis Kruger)
+
+
+Module Parameters
+-----------------
+
+* force: bool Force detection. Note this parameter only causes the
+ detection to be skipped, and thus the insmod to
+ succeed. If the uGuru can't be read the actual hwmon
+ driver will not load and thus no hwmon device will get
+ registered.
+* verbose: bool Should the driver be verbose?
+ 0/off/false normal output
+ 1/on/true + verbose error reporting (default)
+ Default: 1 (the driver is still in the testing phase)
+
+Description
+-----------
+
+This driver supports the hardware monitoring features of the third revision of
+the Abit uGuru chip, found on recent Abit uGuru featuring motherboards.
+
+The 3rd revision of the uGuru chip in reality is a Winbond W83L951G.
+Unfortunately this doesn't help since the W83L951G is a generic microcontroller
+with a custom Abit application running on it.
+
+Despite Abit not releasing any information regarding the uGuru revision 3,
+Louis Kruger has managed to reverse engineer the sensor part of the uGuru.
+Without his work this driver would not have been possible.
+
+Known Issues
+------------
+
+The voltage and frequency control parts of the Abit uGuru are not supported,
+neither is writing any of the sensor settings and writing / reading the
+fanspeed control registers (FanEQ)
+
+If you encounter any problems please mail me <j.w.r.degoede@hhs.nl> and
+include the output of: "dmesg | grep abituguru"
diff --git a/Documentation/hwmon/abx500 b/Documentation/hwmon/abx500
new file mode 100644
index 000000000..319a058ce
--- /dev/null
+++ b/Documentation/hwmon/abx500
@@ -0,0 +1,28 @@
+Kernel driver abx500
+====================
+
+Supported chips:
+ * ST-Ericsson ABx500 series
+ Prefix: 'abx500'
+ Addresses scanned: -
+ Datasheet: http://www.stericsson.com/developers/documentation.jsp
+
+Authors:
+ Martin Persson <martin.persson@stericsson.com>
+ Hongbo Zhang <hongbo.zhang@linaro.org>
+
+Description
+-----------
+
+Every ST-Ericsson Ux500 SOC consists of both ABx500 and DBx500 physically,
+this is kernel hwmon driver for ABx500.
+
+There are some GPADCs inside ABx500 which are designed for connecting to
+thermal sensors, and there is also a thermal sensor inside ABx500 too, which
+raises interrupt when critical temperature reached.
+
+This abx500 is a common layer which can monitor all of the sensors, every
+specific abx500 chip has its special configurations in its own file, e.g. some
+sensors can be configured invisible if they are not available on that chip, and
+the corresponding gpadc_addr should be set to 0, thus this sensor won't be
+polled.
diff --git a/Documentation/hwmon/acpi_power_meter b/Documentation/hwmon/acpi_power_meter
new file mode 100644
index 000000000..c80399a00
--- /dev/null
+++ b/Documentation/hwmon/acpi_power_meter
@@ -0,0 +1,51 @@
+Kernel driver power_meter
+=========================
+
+This driver talks to ACPI 4.0 power meters.
+
+Supported systems:
+ * Any recent system with ACPI 4.0.
+ Prefix: 'power_meter'
+ Datasheet: http://acpi.info/, section 10.4.
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements sensor reading support for the power meters exposed in
+the ACPI 4.0 spec (Chapter 10.4). These devices have a simple set of
+features--a power meter that returns average power use over a configurable
+interval, an optional capping mechanism, and a couple of trip points. The
+sysfs interface conforms with the specification outlined in the "Power" section
+of Documentation/hwmon/sysfs-interface.
+
+Special Features
+----------------
+
+The power[1-*]_is_battery knob indicates if the power supply is a battery.
+Both power[1-*]_average_{min,max} must be set before the trip points will work.
+When both of them are set, an ACPI event will be broadcast on the ACPI netlink
+socket and a poll notification will be sent to the appropriate
+power[1-*]_average sysfs file.
+
+The power[1-*]_{model_number, serial_number, oem_info} fields display arbitrary
+strings that ACPI provides with the meter. The measures/ directory contains
+symlinks to the devices that this meter measures.
+
+Some computers have the ability to enforce a power cap in hardware. If this is
+the case, the power[1-*]_cap and related sysfs files will appear. When the
+average power consumption exceeds the cap, an ACPI event will be broadcast on
+the netlink event socket and a poll notification will be sent to the
+appropriate power[1-*]_alarm file to indicate that capping has begun, and the
+hardware has taken action to reduce power consumption. Most likely this will
+result in reduced performance.
+
+There are a few other ACPI notifications that can be sent by the firmware. In
+all cases the ACPI event will be broadcast on the ACPI netlink event socket as
+well as sent as a poll notification to a sysfs file. The events are as
+follows:
+
+power[1-*]_cap will be notified if the firmware changes the power cap.
+power[1-*]_interval will be notified if the firmware changes the averaging
+interval.
diff --git a/Documentation/hwmon/ad7314 b/Documentation/hwmon/ad7314
new file mode 100644
index 000000000..1912549c7
--- /dev/null
+++ b/Documentation/hwmon/ad7314
@@ -0,0 +1,25 @@
+Kernel driver ad7314
+====================
+
+Supported chips:
+ * Analog Devices AD7314
+ Prefix: 'ad7314'
+ Datasheet: Publicly available at Analog Devices website.
+ * Analog Devices ADT7301
+ Prefix: 'adt7301'
+ Datasheet: Publicly available at Analog Devices website.
+ * Analog Devices ADT7302
+ Prefix: 'adt7302'
+ Datasheet: Publicly available at Analog Devices website.
+
+Description
+-----------
+
+Driver supports the above parts. The ad7314 has a 10 bit
+sensor with 1lsb = 0.25 degrees centigrade. The adt7301 and
+adt7302 have 14 bit sensors with 1lsb = 0.03125 degrees centigrade.
+
+Notes
+-----
+
+Currently power down mode is not supported.
diff --git a/Documentation/hwmon/adc128d818 b/Documentation/hwmon/adc128d818
new file mode 100644
index 000000000..39c95004d
--- /dev/null
+++ b/Documentation/hwmon/adc128d818
@@ -0,0 +1,47 @@
+Kernel driver adc128d818
+========================
+
+Supported chips:
+ * Texas Instruments ADC818D818
+ Prefix: 'adc818d818'
+ Addresses scanned: I2C 0x1d, 0x1e, 0x1f, 0x2d, 0x2e, 0x2f
+ Datasheet: Publicly available at the TI website
+ http://www.ti.com/
+
+Author: Guenter Roeck
+
+Description
+-----------
+
+This driver implements support for the Texas Instruments ADC128D818.
+It is described as 'ADC System Monitor with Temperature Sensor'.
+
+The ADC128D818 implements one temperature sensor and seven voltage sensors.
+
+Temperatures are measured in degrees Celsius. There is one set of limits.
+When the HOT Temperature Limit is crossed, this will cause an alarm that will
+be reasserted until the temperature drops below the HOT Hysteresis.
+Measurements are guaranteed between -55 and +125 degrees. The temperature
+measurement has a resolution of 0.5 degrees; the limits have a resolution
+of 1 degree.
+
+Voltage sensors (also known as IN sensors) report their values in volts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. All voltage
+inputs can measure voltages between 0 and 2.55 volts, with a resolution
+of 0.625 mV.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared by the time the alarm is read. The driver
+caches the alarm status for each sensor until it is at least reported
+once, to ensure that alarms are reported to user space.
+
+The ADC128D818 only updates its values approximately once per second;
+reading it more often will do no harm, but will return 'old' values.
+
+In addition to the scanned address list, the chip can also be configured for
+addresses 0x35 to 0x37. Those addresses are not scanned. You have to instantiate
+the driver explicitly if the chip is configured for any of those addresses in
+your system.
diff --git a/Documentation/hwmon/adm1021 b/Documentation/hwmon/adm1021
new file mode 100644
index 000000000..02ad96cf9
--- /dev/null
+++ b/Documentation/hwmon/adm1021
@@ -0,0 +1,113 @@
+Kernel driver adm1021
+=====================
+
+Supported chips:
+ * Analog Devices ADM1021
+ Prefix: 'adm1021'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Analog Devices website
+ * Analog Devices ADM1021A/ADM1023
+ Prefix: 'adm1023'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Analog Devices website
+ * Genesys Logic GL523SM
+ Prefix: 'gl523sm'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet:
+ * Maxim MAX1617
+ Prefix: 'max1617'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Maxim website
+ * Maxim MAX1617A
+ Prefix: 'max1617a'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Maxim website
+ * National Semiconductor LM84
+ Prefix: 'lm84'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the National Semiconductor website
+ * Philips NE1617
+ Prefix: 'max1617' (probably detected as a max1617)
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Philips website
+ * Philips NE1617A
+ Prefix: 'max1617' (probably detected as a max1617)
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Philips website
+ * TI THMC10
+ Prefix: 'thmc10'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the TI website
+ * Onsemi MC1066
+ Prefix: 'mc1066'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the Onsemi website
+
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>
+
+Module Parameters
+-----------------
+
+* read_only: int
+ Don't set any values, read only mode
+
+
+Description
+-----------
+
+The chips supported by this driver are very similar. The Maxim MAX1617 is
+the oldest; it has the problem that it is not very well detectable. The
+MAX1617A solves that. The ADM1021 is a straight clone of the MAX1617A.
+Ditto for the THMC10. From here on, we will refer to all these chips as
+ADM1021-clones.
+
+The ADM1021 and MAX1617A reports a die code, which is a sort of revision
+code. This can help us pinpoint problems; it is not very useful
+otherwise.
+
+ADM1021-clones implement two temperature sensors. One of them is internal,
+and measures the temperature of the chip itself; the other is external and
+is realised in the form of a transistor-like device. A special alarm
+indicates whether the remote sensor is connected.
+
+Each sensor has its own low and high limits. When they are crossed, the
+corresponding alarm is set and remains on as long as the temperature stays
+out of range. Temperatures are measured in degrees Celsius. Measurements
+are possible between -65 and +127 degrees, with a resolution of one degree.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may already
+have disappeared!
+
+This driver only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values. It is possible to make
+ADM1021-clones do faster measurements, but there is really no good reason
+for that.
+
+
+Netburst-based Xeon support
+---------------------------
+
+Some Xeon processors based on the Netburst (early Pentium 4, from 2001 to
+2003) microarchitecture had real MAX1617, ADM1021, or compatible chips
+within them, with two temperature sensors. Other Xeon processors of this
+era (with 400 MHz FSB) had chips with only one temperature sensor.
+
+If you have such an old Xeon, and you get two valid temperatures when
+loading the adm1021 module, then things are good.
+
+If nothing happens when loading the adm1021 module, and you are certain
+that your specific Xeon processor model includes compatible sensors, you
+will have to explicitly instantiate the sensor chips from user-space. See
+method 4 in Documentation/i2c/instantiating-devices. Possible slave
+addresses are 0x18, 0x1a, 0x29, 0x2b, 0x4c, or 0x4e. It is likely that
+only temp2 will be correct and temp1 will have to be ignored.
+
+Previous generations of the Xeon processor (based on Pentium II/III)
+didn't have these sensors. Next generations of Xeon processors (533 MHz
+FSB and faster) lost them, until the Core-based generation which
+introduced integrated digital thermal sensors. These are supported by
+the coretemp driver.
diff --git a/Documentation/hwmon/adm1025 b/Documentation/hwmon/adm1025
new file mode 100644
index 000000000..99f05049c
--- /dev/null
+++ b/Documentation/hwmon/adm1025
@@ -0,0 +1,51 @@
+Kernel driver adm1025
+=====================
+
+Supported chips:
+ * Analog Devices ADM1025, ADM1025A
+ Prefix: 'adm1025'
+ Addresses scanned: I2C 0x2c - 0x2e
+ Datasheet: Publicly available at the Analog Devices website
+ * Philips NE1619
+ Prefix: 'ne1619'
+ Addresses scanned: I2C 0x2c - 0x2d
+ Datasheet: Publicly available at the Philips website
+
+The NE1619 presents some differences with the original ADM1025:
+ * Only two possible addresses (0x2c - 0x2d).
+ * No temperature offset register, but we don't use it anyway.
+ * No INT mode for pin 16. We don't play with it anyway.
+
+Authors:
+ Chen-Yuan Wu <gwu@esoft.com>,
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+(This is from Analog Devices.) The ADM1025 is a complete system hardware
+monitor for microprocessor-based systems, providing measurement and limit
+comparison of various system parameters. Five voltage measurement inputs
+are provided, for monitoring +2.5V, +3.3V, +5V and +12V power supplies and
+the processor core voltage. The ADM1025 can monitor a sixth power-supply
+voltage by measuring its own VCC. One input (two pins) is dedicated to a
+remote temperature-sensing diode and an on-chip temperature sensor allows
+ambient temperature to be monitored.
+
+One specificity of this chip is that the pin 11 can be hardwired in two
+different manners. It can act as the +12V power-supply voltage analog
+input, or as the a fifth digital entry for the VID reading (bit 4). It's
+kind of strange since both are useful, and the reason for designing the
+chip that way is obscure at least to me. The bit 5 of the configuration
+register can be used to define how the chip is hardwired. Please note that
+it is not a choice you have to make as the user. The choice was already
+made by your motherboard's maker. If the configuration bit isn't set
+properly, you'll have a wrong +12V reading or a wrong VID reading. The way
+the driver handles that is to preserve this bit through the initialization
+process, assuming that the BIOS set it up properly beforehand. If it turns
+out not to be true in some cases, we'll provide a module parameter to force
+modes.
+
+This driver also supports the ADM1025A, which differs from the ADM1025
+only in that it has "open-drain VID inputs while the ADM1025 has on-chip
+100k pull-ups on the VID inputs". It doesn't make any difference for us.
diff --git a/Documentation/hwmon/adm1026 b/Documentation/hwmon/adm1026
new file mode 100644
index 000000000..d8fabe0c2
--- /dev/null
+++ b/Documentation/hwmon/adm1026
@@ -0,0 +1,93 @@
+Kernel driver adm1026
+=====================
+
+Supported chips:
+ * Analog Devices ADM1026
+ Prefix: 'adm1026'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.onsemi.com/PowerSolutions/product.do?id=ADM1026
+
+Authors:
+ Philip Pokorny <ppokorny@penguincomputing.com> for Penguin Computing
+ Justin Thiessen <jthiessen@penguincomputing.com>
+
+Module Parameters
+-----------------
+
+* gpio_input: int array (min = 1, max = 17)
+ List of GPIO pins (0-16) to program as inputs
+* gpio_output: int array (min = 1, max = 17)
+ List of GPIO pins (0-16) to program as outputs
+* gpio_inverted: int array (min = 1, max = 17)
+ List of GPIO pins (0-16) to program as inverted
+* gpio_normal: int array (min = 1, max = 17)
+ List of GPIO pins (0-16) to program as normal/non-inverted
+* gpio_fan: int array (min = 1, max = 8)
+ List of GPIO pins (0-7) to program as fan tachs
+
+
+Description
+-----------
+
+This driver implements support for the Analog Devices ADM1026. Analog
+Devices calls it a "complete thermal system management controller."
+
+The ADM1026 implements three (3) temperature sensors, 17 voltage sensors,
+16 general purpose digital I/O lines, eight (8) fan speed sensors (8-bit),
+an analog output and a PWM output along with limit, alarm and mask bits for
+all of the above. There is even 8k bytes of EEPROM memory on chip.
+
+Temperatures are measured in degrees Celsius. There are two external
+sensor inputs and one internal sensor. Each sensor has a high and low
+limit. If the limit is exceeded, an interrupt (#SMBALERT) can be
+generated. The interrupts can be masked. In addition, there are over-temp
+limits for each sensor. If this limit is exceeded, the #THERM output will
+be asserted. The current temperature and limits have a resolution of 1
+degree.
+
+Fan rotation speeds are reported in RPM (rotations per minute) but measured
+in counts of a 22.5kHz internal clock. Each fan has a high limit which
+corresponds to a minimum fan speed. If the limit is exceeded, an interrupt
+can be generated. Each fan can be programmed to divide the reference clock
+by 1, 2, 4 or 8. Not all RPM values can accurately be represented, so some
+rounding is done. With a divider of 8, the slowest measurable speed of a
+two pulse per revolution fan is 661 RPM.
+
+There are 17 voltage sensors. An alarm is triggered if the voltage has
+crossed a programmable minimum or maximum limit. Note that minimum in this
+case always means 'closest to zero'; this is important for negative voltage
+measurements. Several inputs have integrated attenuators so they can measure
+higher voltages directly. 3.3V, 5V, 12V, -12V and battery voltage all have
+dedicated inputs. There are several inputs scaled to 0-3V full-scale range
+for SCSI terminator power. The remaining inputs are not scaled and have
+a 0-2.5V full-scale range. A 2.5V or 1.82V reference voltage is provided
+for negative voltage measurements.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may already
+have disappeared! Note that in the current implementation, all hardware
+registers are read whenever any data is read (unless it is less than 2.0
+seconds since the last update). This means that you can easily miss
+once-only alarms.
+
+The ADM1026 measures continuously. Analog inputs are measured about 4
+times a second. Fan speed measurement time depends on fan speed and
+divisor. It can take as long as 1.5 seconds to measure all fan speeds.
+
+The ADM1026 has the ability to automatically control fan speed based on the
+temperature sensor inputs. Both the PWM output and the DAC output can be
+used to control fan speed. Usually only one of these two outputs will be
+used. Write the minimum PWM or DAC value to the appropriate control
+register. Then set the low temperature limit in the tmin values for each
+temperature sensor. The range of control is fixed at 20 °C, and the
+largest difference between current and tmin of the temperature sensors sets
+the control output. See the datasheet for several example circuits for
+controlling fan speed with the PWM and DAC outputs. The fan speed sensors
+do not have PWM compensation, so it is probably best to control the fan
+voltage from the power lead rather than on the ground lead.
+
+The datasheet shows an example application with VID signals attached to
+GPIO lines. Unfortunately, the chip may not be connected to the VID lines
+in this way. The driver assumes that the chips *is* connected this way to
+get a VID voltage.
diff --git a/Documentation/hwmon/adm1031 b/Documentation/hwmon/adm1031
new file mode 100644
index 000000000..a143117c9
--- /dev/null
+++ b/Documentation/hwmon/adm1031
@@ -0,0 +1,35 @@
+Kernel driver adm1031
+=====================
+
+Supported chips:
+ * Analog Devices ADM1030
+ Prefix: 'adm1030'
+ Addresses scanned: I2C 0x2c to 0x2e
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/en/prod/0%2C2877%2CADM1030%2C00.html
+
+ * Analog Devices ADM1031
+ Prefix: 'adm1031'
+ Addresses scanned: I2C 0x2c to 0x2e
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/en/prod/0%2C2877%2CADM1031%2C00.html
+
+Authors:
+ Alexandre d'Alton <alex@alexdalton.org>
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+The ADM1030 and ADM1031 are digital temperature sensors and fan controllers.
+They sense their own temperature as well as the temperature of up to one
+(ADM1030) or two (ADM1031) external diodes.
+
+All temperature values are given in degrees Celsius. Resolution is 0.5
+degree for the local temperature, 0.125 degree for the remote temperatures.
+
+Each temperature channel has its own high and low limits, plus a critical
+limit.
+
+The ADM1030 monitors a single fan speed, while the ADM1031 monitors up to
+two. Each fan channel has its own low speed limit.
diff --git a/Documentation/hwmon/adm1275 b/Documentation/hwmon/adm1275
new file mode 100644
index 000000000..15b4a20d5
--- /dev/null
+++ b/Documentation/hwmon/adm1275
@@ -0,0 +1,92 @@
+Kernel driver adm1275
+=====================
+
+Supported chips:
+ * Analog Devices ADM1075
+ Prefix: 'adm1075'
+ Addresses scanned: -
+ Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1075.pdf
+ * Analog Devices ADM1275
+ Prefix: 'adm1275'
+ Addresses scanned: -
+ Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1275.pdf
+ * Analog Devices ADM1276
+ Prefix: 'adm1276'
+ Addresses scanned: -
+ Datasheet: www.analog.com/static/imported-files/data_sheets/ADM1276.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Analog Devices ADM1075, ADM1275,
+and ADM1276 Hot-Swap Controller and Digital Power Monitor.
+
+ADM1075, ADM1275, and ADM1276 are hot-swap controllers that allow a circuit
+board to be removed from or inserted into a live backplane. They also feature
+current and voltage readback via an integrated 12-bit analog-to-digital
+converter (ADC), accessed using a PMBus interface.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+The ADM1075, unlike many other PMBus devices, does not support internal voltage
+or current scaling. Reported voltages, currents, and power are raw measurements,
+and will typically have to be scaled.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data. Please see
+Documentation/hwmon/pmbus for details.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write, history reset
+attributes are write-only, all other attributes are read-only.
+
+in1_label "vin1" or "vout1" depending on chip variant and
+ configuration. On ADM1075, vout1 reports the voltage on
+ the VAUX pin.
+in1_input Measured voltage.
+in1_min Minimum Voltage.
+in1_max Maximum voltage.
+in1_min_alarm Voltage low alarm.
+in1_max_alarm Voltage high alarm.
+in1_highest Historical maximum voltage.
+in1_reset_history Write any value to reset history.
+
+curr1_label "iout1"
+curr1_input Measured current.
+curr1_max Maximum current.
+curr1_max_alarm Current high alarm.
+curr1_lcrit Critical minimum current. Depending on the chip
+ configuration, either curr1_lcrit or curr1_crit is
+ supported, but not both.
+curr1_lcrit_alarm Critical current low alarm.
+curr1_crit Critical maximum current. Depending on the chip
+ configuration, either curr1_lcrit or curr1_crit is
+ supported, but not both.
+curr1_crit_alarm Critical current high alarm.
+curr1_highest Historical maximum current.
+curr1_reset_history Write any value to reset history.
+
+power1_label "pin1"
+power1_input Input power.
+power1_reset_history Write any value to reset history.
+
+ Power attributes are supported on ADM1075 and ADM1276
+ only.
diff --git a/Documentation/hwmon/adm9240 b/Documentation/hwmon/adm9240
new file mode 100644
index 000000000..9b174fc70
--- /dev/null
+++ b/Documentation/hwmon/adm9240
@@ -0,0 +1,177 @@
+Kernel driver adm9240
+=====================
+
+Supported chips:
+ * Analog Devices ADM9240
+ Prefix: 'adm9240'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/UploadedFiles/Data_Sheets/79857778ADM9240_0.pdf
+
+ * Dallas Semiconductor DS1780
+ Prefix: 'ds1780'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: Publicly available at the Dallas Semiconductor (Maxim) website
+ http://pdfserv.maxim-ic.com/en/ds/DS1780.pdf
+
+ * National Semiconductor LM81
+ Prefix: 'lm81'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/ds.cgi/LM/LM81.pdf
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Michiel Rook <michiel@grendelproject.nl>,
+ Grant Coady <gcoady.lk@gmail.com> with guidance
+ from Jean Delvare <jdelvare@suse.de>
+
+Interface
+---------
+The I2C addresses listed above assume BIOS has not changed the
+chip MSB 5-bit address. Each chip reports a unique manufacturer
+identification code as well as the chip revision/stepping level.
+
+Description
+-----------
+[From ADM9240] The ADM9240 is a complete system hardware monitor for
+microprocessor-based systems, providing measurement and limit comparison
+of up to four power supplies and two processor core voltages, plus
+temperature, two fan speeds and chassis intrusion. Measured values can
+be read out via an I2C-compatible serial System Management Bus, and values
+for limit comparisons can be programmed in over the same serial bus. The
+high speed successive approximation ADC allows frequent sampling of all
+analog channels to ensure a fast interrupt response to any out-of-limit
+measurement.
+
+The ADM9240, DS1780 and LM81 are register compatible, the following
+details are common to the three chips. Chip differences are described
+after this section.
+
+
+Measurements
+------------
+The measurement cycle
+
+The adm9240 driver will take a measurement reading no faster than once
+each two seconds. User-space may read sysfs interface faster than the
+measurement update rate and will receive cached data from the most
+recent measurement.
+
+ADM9240 has a very fast 320us temperature and voltage measurement cycle
+with independent fan speed measurement cycles counting alternating rising
+edges of the fan tacho inputs.
+
+DS1780 measurement cycle is about once per second including fan speed.
+
+LM81 measurement cycle is about once per 400ms including fan speed.
+The LM81 12-bit extended temperature measurement mode is not supported.
+
+Temperature
+-----------
+On chip temperature is reported as degrees Celsius as 9-bit signed data
+with resolution of 0.5 degrees Celsius. High and low temperature limits
+are 8-bit signed data with resolution of one degree Celsius.
+
+Temperature alarm is asserted once the temperature exceeds the high limit,
+and is cleared when the temperature falls below the temp1_max_hyst value.
+
+Fan Speed
+---------
+Two fan tacho inputs are provided, the ADM9240 gates an internal 22.5kHz
+clock via a divider to an 8-bit counter. Fan speed (rpm) is calculated by:
+
+rpm = (22500 * 60) / (count * divider)
+
+Automatic fan clock divider
+
+ * User sets 0 to fan_min limit
+ - low speed alarm is disabled
+ - fan clock divider not changed
+ - auto fan clock adjuster enabled for valid fan speed reading
+
+ * User sets fan_min limit too low
+ - low speed alarm is enabled
+ - fan clock divider set to max
+ - fan_min set to register value 254 which corresponds
+ to 664 rpm on adm9240
+ - low speed alarm will be asserted if fan speed is
+ less than minimum measurable speed
+ - auto fan clock adjuster disabled
+
+ * User sets reasonable fan speed
+ - low speed alarm is enabled
+ - fan clock divider set to suit fan_min
+ - auto fan clock adjuster enabled: adjusts fan_min
+
+ * User sets unreasonably high low fan speed limit
+ - resolution of the low speed limit may be reduced
+ - alarm will be asserted
+ - auto fan clock adjuster enabled: adjusts fan_min
+
+ * fan speed may be displayed as zero until the auto fan clock divider
+ adjuster brings fan speed clock divider back into chip measurement
+ range, this will occur within a few measurement cycles.
+
+Analog Output
+-------------
+An analog output provides a 0 to 1.25 volt signal intended for an external
+fan speed amplifier circuit. The analog output is set to maximum value on
+power up or reset. This doesn't do much on the test Intel SE440BX-2.
+
+Voltage Monitor
+
+Voltage (IN) measurement is internally scaled:
+
+ nr label nominal maximum resolution
+ mV mV mV
+ 0 +2.5V 2500 3320 13.0
+ 1 Vccp1 2700 3600 14.1
+ 2 +3.3V 3300 4380 17.2
+ 3 +5V 5000 6640 26.0
+ 4 +12V 12000 15940 62.5
+ 5 Vccp2 2700 3600 14.1
+
+The reading is an unsigned 8-bit value, nominal voltage measurement is
+represented by a reading of 192, being 3/4 of the measurement range.
+
+An alarm is asserted for any voltage going below or above the set limits.
+
+The driver reports and accepts voltage limits scaled to the above table.
+
+VID Monitor
+-----------
+The chip has five inputs to read the 5-bit VID and reports the mV value
+based on detected CPU type.
+
+Chassis Intrusion
+-----------------
+An alarm is asserted when the CI pin goes active high. The ADM9240
+Datasheet has an example of an external temperature sensor driving
+this pin. On an Intel SE440BX-2 the Chassis Intrusion header is
+connected to a normally open switch.
+
+The ADM9240 provides an internal open drain on this line, and may output
+a 20 ms active low pulse to reset an external Chassis Intrusion latch.
+
+Clear the CI latch by writing value 0 to the sysfs intrusion0_alarm file.
+
+Alarm flags reported as 16-bit word
+
+ bit label comment
+ --- ------------- --------------------------
+ 0 +2.5 V_Error high or low limit exceeded
+ 1 VCCP_Error high or low limit exceeded
+ 2 +3.3 V_Error high or low limit exceeded
+ 3 +5 V_Error high or low limit exceeded
+ 4 Temp_Error temperature error
+ 6 FAN1_Error fan low limit exceeded
+ 7 FAN2_Error fan low limit exceeded
+ 8 +12 V_Error high or low limit exceeded
+ 9 VCCP2_Error high or low limit exceeded
+ 12 Chassis_Error CI pin went high
+
+Remaining bits are reserved and thus undefined. It is important to note
+that alarm bits may be cleared on read, user-space may latch alarms and
+provide the end-user with a method to clear alarm memory.
diff --git a/Documentation/hwmon/ads1015 b/Documentation/hwmon/ads1015
new file mode 100644
index 000000000..063b80d85
--- /dev/null
+++ b/Documentation/hwmon/ads1015
@@ -0,0 +1,76 @@
+Kernel driver ads1015
+=====================
+
+Supported chips:
+ * Texas Instruments ADS1015
+ Prefix: 'ads1015'
+ Datasheet: Publicly available at the Texas Instruments website :
+ http://focus.ti.com/lit/ds/symlink/ads1015.pdf
+ * Texas Instruments ADS1115
+ Prefix: 'ads1115'
+ Datasheet: Publicly available at the Texas Instruments website :
+ http://focus.ti.com/lit/ds/symlink/ads1115.pdf
+
+Authors:
+ Dirk Eibach, Guntermann & Drunck GmbH <eibach@gdsys.de>
+
+Description
+-----------
+
+This driver implements support for the Texas Instruments ADS1015/ADS1115.
+
+This device is a 12/16-bit A-D converter with 4 inputs.
+
+The inputs can be used single ended or in certain differential combinations.
+
+The inputs can be made available by 8 sysfs input files in0_input - in7_input:
+in0: Voltage over AIN0 and AIN1.
+in1: Voltage over AIN0 and AIN3.
+in2: Voltage over AIN1 and AIN3.
+in3: Voltage over AIN2 and AIN3.
+in4: Voltage over AIN0 and GND.
+in5: Voltage over AIN1 and GND.
+in6: Voltage over AIN2 and GND.
+in7: Voltage over AIN3 and GND.
+
+Which inputs are available can be configured using platform data or devicetree.
+
+By default all inputs are exported.
+
+Platform Data
+-------------
+
+In linux/i2c/ads1015.h platform data is defined, channel_data contains
+configuration data for the used input combinations:
+- pga is the programmable gain amplifier (values are full scale)
+ 0: +/- 6.144 V
+ 1: +/- 4.096 V
+ 2: +/- 2.048 V
+ 3: +/- 1.024 V
+ 4: +/- 0.512 V
+ 5: +/- 0.256 V
+- data_rate in samples per second
+ 0: 128
+ 1: 250
+ 2: 490
+ 3: 920
+ 4: 1600
+ 5: 2400
+ 6: 3300
+
+Example:
+struct ads1015_platform_data data = {
+ .channel_data = {
+ [2] = { .enabled = true, .pga = 1, .data_rate = 0 },
+ [4] = { .enabled = true, .pga = 4, .data_rate = 5 },
+ }
+};
+
+In this case only in2_input (FS +/- 4.096 V, 128 SPS) and in4_input
+(FS +/- 0.512 V, 2400 SPS) would be created.
+
+Devicetree
+----------
+
+Configuration is also possible via devicetree:
+Documentation/devicetree/bindings/hwmon/ads1015.txt
diff --git a/Documentation/hwmon/ads7828 b/Documentation/hwmon/ads7828
new file mode 100644
index 000000000..f6e263e0f
--- /dev/null
+++ b/Documentation/hwmon/ads7828
@@ -0,0 +1,58 @@
+Kernel driver ads7828
+=====================
+
+Supported chips:
+ * Texas Instruments/Burr-Brown ADS7828
+ Prefix: 'ads7828'
+ Datasheet: Publicly available at the Texas Instruments website:
+ http://focus.ti.com/lit/ds/symlink/ads7828.pdf
+
+ * Texas Instruments ADS7830
+ Prefix: 'ads7830'
+ Datasheet: Publicly available at the Texas Instruments website:
+ http://focus.ti.com/lit/ds/symlink/ads7830.pdf
+
+Authors:
+ Steve Hardy <shardy@redhat.com>
+ Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ Guillaume Roguez <guillaume.roguez@savoirfairelinux.com>
+
+Platform data
+-------------
+
+The ads7828 driver accepts an optional ads7828_platform_data structure (defined
+in include/linux/platform_data/ads7828.h). The structure fields are:
+
+* diff_input: (bool) Differential operation
+ set to true for differential mode, false for default single ended mode.
+
+* ext_vref: (bool) External reference
+ set to true if it operates with an external reference, false for default
+ internal reference.
+
+* vref_mv: (unsigned int) Voltage reference
+ if using an external reference, set this to the reference voltage in mV,
+ otherwise it will default to the internal value (2500mV). This value will be
+ bounded with limits accepted by the chip, described in the datasheet.
+
+ If no structure is provided, the configuration defaults to single ended
+ operation and internal voltage reference (2.5V).
+
+Description
+-----------
+
+This driver implements support for the Texas Instruments ADS7828 and ADS7830.
+
+The ADS7828 device is a 12-bit 8-channel A/D converter, while the ADS7830 does
+8-bit sampling.
+
+It can operate in single ended mode (8 +ve inputs) or in differential mode,
+where 4 differential pairs can be measured.
+
+The chip also has the facility to use an external voltage reference. This
+may be required if your hardware supplies the ADS7828 from a 5V supply, see
+the datasheet for more details.
+
+There is no reliable way to identify this chip, so the driver will not scan
+some addresses to try to auto-detect it. That means that you will have to
+statically declare the device in the platform support code.
diff --git a/Documentation/hwmon/adt7410 b/Documentation/hwmon/adt7410
new file mode 100644
index 000000000..9817941e5
--- /dev/null
+++ b/Documentation/hwmon/adt7410
@@ -0,0 +1,73 @@
+Kernel driver adt7410
+=====================
+
+Supported chips:
+ * Analog Devices ADT7410
+ Prefix: 'adt7410'
+ Addresses scanned: None
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/static/imported-files/data_sheets/ADT7410.pdf
+ * Analog Devices ADT7420
+ Prefix: 'adt7420'
+ Addresses scanned: None
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/static/imported-files/data_sheets/ADT7420.pdf
+ * Analog Devices ADT7310
+ Prefix: 'adt7310'
+ Addresses scanned: None
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/static/imported-files/data_sheets/ADT7310.pdf
+ * Analog Devices ADT7320
+ Prefix: 'adt7320'
+ Addresses scanned: None
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/static/imported-files/data_sheets/ADT7320.pdf
+
+Author: Hartmut Knaack <knaack.h@gmx.de>
+
+Description
+-----------
+
+The ADT7310/ADT7410 is a temperature sensor with rated temperature range of
+-55°C to +150°C. It has a high accuracy of +/-0.5°C and can be operated at a
+resolution of 13 bits (0.0625°C) or 16 bits (0.0078°C). The sensor provides an
+INT pin to indicate that a minimum or maximum temperature set point has been
+exceeded, as well as a critical temperature (CT) pin to indicate that the
+critical temperature set point has been exceeded. Both pins can be set up with a
+common hysteresis of 0°C - 15°C and a fault queue, ranging from 1 to 4 events.
+Both pins can individually set to be active-low or active-high, while the whole
+device can either run in comparator mode or interrupt mode. The ADT7410 supports
+continuous temperature sampling, as well as sampling one temperature value per
+second or even just get one sample on demand for power saving. Besides, it can
+completely power down its ADC, if power management is required.
+
+The ADT7320/ADT7420 is register compatible, the only differences being the
+package, a slightly narrower operating temperature range (-40°C to +150°C), and
+a better accuracy (0.25°C instead of 0.50°C.)
+
+The difference between the ADT7310/ADT7320 and ADT7410/ADT7420 is the control
+interface, the ADT7310 and ADT7320 use SPI while the ADT7410 and ADT7420 use
+I2C.
+
+Configuration Notes
+-------------------
+
+Since the device uses one hysteresis value, which is an offset to minimum,
+maximum and critical temperature, it can only be set for temp#_max_hyst.
+However, temp#_min_hyst and temp#_crit_hyst show their corresponding
+hysteresis.
+The device is set to 16 bit resolution and comparator mode.
+
+sysfs-Interface
+---------------
+
+temp#_input - temperature input
+temp#_min - temperature minimum setpoint
+temp#_max - temperature maximum setpoint
+temp#_crit - critical temperature setpoint
+temp#_min_hyst - hysteresis for temperature minimum (read-only)
+temp#_max_hyst - hysteresis for temperature maximum (read/write)
+temp#_crit_hyst - hysteresis for critical temperature (read-only)
+temp#_min_alarm - temperature minimum alarm flag
+temp#_max_alarm - temperature maximum alarm flag
+temp#_crit_alarm - critical temperature alarm flag
diff --git a/Documentation/hwmon/adt7411 b/Documentation/hwmon/adt7411
new file mode 100644
index 000000000..1632960f9
--- /dev/null
+++ b/Documentation/hwmon/adt7411
@@ -0,0 +1,42 @@
+Kernel driver adt7411
+=====================
+
+Supported chips:
+ * Analog Devices ADT7411
+ Prefix: 'adt7411'
+ Addresses scanned: 0x48, 0x4a, 0x4b
+ Datasheet: Publicly available at the Analog Devices website
+
+Author: Wolfram Sang (based on adt7470 by Darrick J. Wong)
+
+Description
+-----------
+
+This driver implements support for the Analog Devices ADT7411 chip. There may
+be other chips that implement this interface.
+
+The ADT7411 can use an I2C/SMBus compatible 2-wire interface or an
+SPI-compatible 4-wire interface. It provides a 10-bit analog to digital
+converter which measures 1 temperature, vdd and 8 input voltages. It has an
+internal temperature sensor, but an external one can also be connected (one
+loses 2 inputs then). There are high- and low-limit registers for all inputs.
+
+Check the datasheet for details.
+
+sysfs-Interface
+---------------
+
+in0_input - vdd voltage input
+in[1-8]_input - analog 1-8 input
+temp1_input - temperature input
+
+Besides standard interfaces, this driver adds (0 = off, 1 = on):
+
+ adc_ref_vdd - Use vdd as reference instead of 2.25 V
+ fast_sampling - Sample at 22.5 kHz instead of 1.4 kHz, but drop filters
+ no_average - Turn off averaging over 16 samples
+
+Notes
+-----
+
+SPI, external temperature sensor and limit registers are not supported yet.
diff --git a/Documentation/hwmon/adt7462 b/Documentation/hwmon/adt7462
new file mode 100644
index 000000000..ec660b328
--- /dev/null
+++ b/Documentation/hwmon/adt7462
@@ -0,0 +1,67 @@
+Kernel driver adt7462
+======================
+
+Supported chips:
+ * Analog Devices ADT7462
+ Prefix: 'adt7462'
+ Addresses scanned: I2C 0x58, 0x5C
+ Datasheet: Publicly available at the Analog Devices website
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements support for the Analog Devices ADT7462 chip family.
+
+This chip is a bit of a beast. It has 8 counters for measuring fan speed. It
+can also measure 13 voltages or 4 temperatures, or various combinations of the
+two. See the chip documentation for more details about the exact set of
+configurations. This driver does not allow one to configure the chip; that is
+left to the system designer.
+
+A sophisticated control system for the PWM outputs is designed into the ADT7462
+that allows fan speed to be adjusted automatically based on any of the three
+temperature sensors. Each PWM output is individually adjustable and
+programmable. Once configured, the ADT7462 will adjust the PWM outputs in
+response to the measured temperatures without further host intervention. This
+feature can also be disabled for manual control of the PWM's.
+
+Each of the measured inputs (voltage, temperature, fan speed) has
+corresponding high/low limit values. The ADT7462 will signal an ALARM if
+any measured value exceeds either limit.
+
+The ADT7462 samples all inputs continuously. The driver will not read
+the registers more often than once every other second. Further,
+configuration data is only read once per minute.
+
+Special Features
+----------------
+
+The ADT7462 have a 10-bit ADC and can therefore measure temperatures
+with 0.25 degC resolution.
+
+The Analog Devices datasheet is very detailed and describes a procedure for
+determining an optimal configuration for the automatic PWM control.
+
+The driver will report sensor labels when it is able to determine that
+information from the configuration registers.
+
+Configuration Notes
+-------------------
+
+Besides standard interfaces driver adds the following:
+
+* PWM Control
+
+* pwm#_auto_point1_pwm and temp#_auto_point1_temp and
+* pwm#_auto_point2_pwm and temp#_auto_point2_temp -
+
+point1: Set the pwm speed at a lower temperature bound.
+point2: Set the pwm speed at a higher temperature bound.
+
+The ADT7462 will scale the pwm between the lower and higher pwm speed when
+the temperature is between the two temperature boundaries. PWM values range
+from 0 (off) to 255 (full speed). Fan speed will be set to maximum when the
+temperature sensor associated with the PWM control exceeds temp#_max.
+
diff --git a/Documentation/hwmon/adt7470 b/Documentation/hwmon/adt7470
new file mode 100644
index 000000000..8ce4aa0a0
--- /dev/null
+++ b/Documentation/hwmon/adt7470
@@ -0,0 +1,73 @@
+Kernel driver adt7470
+=====================
+
+Supported chips:
+ * Analog Devices ADT7470
+ Prefix: 'adt7470'
+ Addresses scanned: I2C 0x2C, 0x2E, 0x2F
+ Datasheet: Publicly available at the Analog Devices website
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements support for the Analog Devices ADT7470 chip. There may
+be other chips that implement this interface.
+
+The ADT7470 uses the 2-wire interface compatible with the SMBus 2.0
+specification. Using an analog to digital converter it measures up to ten (10)
+external temperatures. It has four (4) 16-bit counters for measuring fan speed.
+There are four (4) PWM outputs that can be used to control fan speed.
+
+A sophisticated control system for the PWM outputs is designed into the ADT7470
+that allows fan speed to be adjusted automatically based on any of the ten
+temperature sensors. Each PWM output is individually adjustable and
+programmable. Once configured, the ADT7470 will adjust the PWM outputs in
+response to the measured temperatures with further host intervention. This
+feature can also be disabled for manual control of the PWM's.
+
+Each of the measured inputs (temperature, fan speed) has corresponding high/low
+limit values. The ADT7470 will signal an ALARM if any measured value exceeds
+either limit.
+
+The ADT7470 samples all inputs continuously. A kernel thread is started up for
+the purpose of periodically querying the temperature sensors, thus allowing the
+automatic fan pwm control to set the fan speed. The driver will not read the
+registers more often than once every 5 seconds. Further, configuration data is
+only read once per minute.
+
+Special Features
+----------------
+
+The ADT7470 has a 8-bit ADC and is capable of measuring temperatures with 1
+degC resolution.
+
+The Analog Devices datasheet is very detailed and describes a procedure for
+determining an optimal configuration for the automatic PWM control.
+
+Configuration Notes
+-------------------
+
+Besides standard interfaces driver adds the following:
+
+* PWM Control
+
+* pwm#_auto_point1_pwm and pwm#_auto_point1_temp and
+* pwm#_auto_point2_pwm and pwm#_auto_point2_temp -
+
+point1: Set the pwm speed at a lower temperature bound.
+point2: Set the pwm speed at a higher temperature bound.
+
+The ADT7470 will scale the pwm between the lower and higher pwm speed when
+the temperature is between the two temperature boundaries. PWM values range
+from 0 (off) to 255 (full speed). Fan speed will be set to maximum when the
+temperature sensor associated with the PWM control exceeds
+pwm#_auto_point2_temp.
+
+Notes
+-----
+
+The temperature inputs no longer need to be read periodically from userspace in
+order for the automatic pwm algorithm to run. This was the case for earlier
+versions of the driver.
diff --git a/Documentation/hwmon/adt7475 b/Documentation/hwmon/adt7475
new file mode 100644
index 000000000..0502f2b46
--- /dev/null
+++ b/Documentation/hwmon/adt7475
@@ -0,0 +1,117 @@
+Kernel driver adt7475
+=====================
+
+Supported chips:
+ * Analog Devices ADT7473
+ Prefix: 'adt7473'
+ Addresses scanned: I2C 0x2C, 0x2D, 0x2E
+ Datasheet: Publicly available at the On Semiconductors website
+ * Analog Devices ADT7475
+ Prefix: 'adt7475'
+ Addresses scanned: I2C 0x2E
+ Datasheet: Publicly available at the On Semiconductors website
+ * Analog Devices ADT7476
+ Prefix: 'adt7476'
+ Addresses scanned: I2C 0x2C, 0x2D, 0x2E
+ Datasheet: Publicly available at the On Semiconductors website
+ * Analog Devices ADT7490
+ Prefix: 'adt7490'
+ Addresses scanned: I2C 0x2C, 0x2D, 0x2E
+ Datasheet: Publicly available at the On Semiconductors website
+
+Authors:
+ Jordan Crouse
+ Hans de Goede
+ Darrick J. Wong (documentation)
+ Jean Delvare
+
+
+Description
+-----------
+
+This driver implements support for the Analog Devices ADT7473, ADT7475,
+ADT7476 and ADT7490 chip family. The ADT7473 and ADT7475 differ only in
+minor details. The ADT7476 has additional features, including extra voltage
+measurement inputs and VID support. The ADT7490 also has additional
+features, including extra voltage measurement inputs and PECI support. All
+the supported chips will be collectively designed by the name "ADT747x" in
+the rest of this document.
+
+The ADT747x uses the 2-wire interface compatible with the SMBus 2.0
+specification. Using an analog to digital converter it measures three (3)
+temperatures and two (2) or more voltages. It has four (4) 16-bit counters
+for measuring fan speed. There are three (3) PWM outputs that can be used
+to control fan speed.
+
+A sophisticated control system for the PWM outputs is designed into the
+ADT747x that allows fan speed to be adjusted automatically based on any of the
+three temperature sensors. Each PWM output is individually adjustable and
+programmable. Once configured, the ADT747x will adjust the PWM outputs in
+response to the measured temperatures without further host intervention.
+This feature can also be disabled for manual control of the PWM's.
+
+Each of the measured inputs (voltage, temperature, fan speed) has
+corresponding high/low limit values. The ADT747x will signal an ALARM if
+any measured value exceeds either limit.
+
+The ADT747x samples all inputs continuously. The driver will not read
+the registers more often than once every other second. Further,
+configuration data is only read once per minute.
+
+Chip Differences Summary
+------------------------
+
+ADT7473:
+ * 2 voltage inputs
+ * system acoustics optimizations (not implemented)
+
+ADT7475:
+ * 2 voltage inputs
+
+ADT7476:
+ * 5 voltage inputs
+ * VID support
+
+ADT7490:
+ * 6 voltage inputs
+ * 1 Imon input (not implemented)
+ * PECI support (not implemented)
+ * 2 GPIO pins (not implemented)
+ * system acoustics optimizations (not implemented)
+
+Special Features
+----------------
+
+The ADT747x has a 10-bit ADC and can therefore measure temperatures
+with a resolution of 0.25 degree Celsius. Temperature readings can be
+configured either for two's complement format or "Offset 64" format,
+wherein 64 is subtracted from the raw value to get the temperature value.
+
+The datasheet is very detailed and describes a procedure for determining
+an optimal configuration for the automatic PWM control.
+
+Fan Speed Control
+-----------------
+
+The driver exposes two trip points per PWM channel.
+
+point1: Set the PWM speed at the lower temperature bound
+point2: Set the PWM speed at the higher temperature bound
+
+The ADT747x will scale the PWM linearly between the lower and higher PWM
+speed when the temperature is between the two temperature boundaries.
+Temperature boundaries are associated to temperature channels rather than
+PWM outputs, and a given PWM output can be controlled by several temperature
+channels. As a result, the ADT747x may compute more than one PWM value
+for a channel at a given time, in which case the maximum value (fastest
+fan speed) is applied. PWM values range from 0 (off) to 255 (full speed).
+
+Fan speed may be set to maximum when the temperature sensor associated with
+the PWM control exceeds temp#_max.
+
+Notes
+-----
+
+The nVidia binary driver presents an ADT7473 chip via an on-card i2c bus.
+Unfortunately, they fail to set the i2c adapter class, so this driver may
+fail to find the chip until the nvidia driver is patched.
diff --git a/Documentation/hwmon/amc6821 b/Documentation/hwmon/amc6821
new file mode 100644
index 000000000..ced8359c5
--- /dev/null
+++ b/Documentation/hwmon/amc6821
@@ -0,0 +1,102 @@
+Kernel driver amc6821
+=====================
+
+Supported chips:
+ Texas Instruments AMC6821
+ Prefix: 'amc6821'
+ Addresses scanned: 0x18, 0x19, 0x1a, 0x2c, 0x2d, 0x2e, 0x4c, 0x4d, 0x4e
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/amc6821.html
+
+Authors:
+ Tomaz Mertelj <tomaz.mertelj@guest.arnes.si>
+
+
+Description
+-----------
+
+This driver implements support for the Texas Instruments amc6821 chip.
+The chip has one on-chip and one remote temperature sensor and one pwm fan
+regulator.
+The pwm can be controlled either from software or automatically.
+
+The driver provides the following sensor accesses in sysfs:
+
+temp1_input ro on-chip temperature
+temp1_min rw "
+temp1_max rw "
+temp1_crit rw "
+temp1_min_alarm ro "
+temp1_max_alarm ro "
+temp1_crit_alarm ro "
+
+temp2_input ro remote temperature
+temp2_min rw "
+temp2_max rw "
+temp2_crit rw "
+temp2_min_alarm ro "
+temp2_max_alarm ro "
+temp2_crit_alarm ro "
+temp2_fault ro "
+
+fan1_input ro tachometer speed
+fan1_min rw "
+fan1_max rw "
+fan1_fault ro "
+fan1_div rw Fan divisor can be either 2 or 4.
+
+pwm1 rw pwm1
+pwm1_enable rw regulator mode, 1=open loop, 2=fan controlled
+ by remote temperature, 3=fan controlled by
+ combination of the on-chip temperature and
+ remote-sensor temperature,
+pwm1_auto_channels_temp ro 1 if pwm_enable==2, 3 if pwm_enable==3
+pwm1_auto_point1_pwm ro Hardwired to 0, shared for both
+ temperature channels.
+pwm1_auto_point2_pwm rw This value is shared for both temperature
+ channels.
+pwm1_auto_point3_pwm rw Hardwired to 255, shared for both
+ temperature channels.
+
+temp1_auto_point1_temp ro Hardwired to temp2_auto_point1_temp
+ which is rw. Below this temperature fan stops.
+temp1_auto_point2_temp rw The low-temperature limit of the proportional
+ range. Below this temperature
+ pwm1 = pwm1_auto_point2_pwm. It can go from
+ 0 degree C to 124 degree C in steps of
+ 4 degree C. Read it out after writing to get
+ the actual value.
+temp1_auto_point3_temp rw Above this temperature fan runs at maximum
+ speed. It can go from temp1_auto_point2_temp.
+ It can only have certain discrete values
+ which depend on temp1_auto_point2_temp and
+ pwm1_auto_point2_pwm. Read it out after
+ writing to get the actual value.
+
+temp2_auto_point1_temp rw Must be between 0 degree C and 63 degree C and
+ it defines the passive cooling temperature.
+ Below this temperature the fan stops in
+ the closed loop mode.
+temp2_auto_point2_temp rw The low-temperature limit of the proportional
+ range. Below this temperature
+ pwm1 = pwm1_auto_point2_pwm. It can go from
+ 0 degree C to 124 degree C in steps
+ of 4 degree C.
+
+temp2_auto_point3_temp rw Above this temperature fan runs at maximum
+ speed. It can only have certain discrete
+ values which depend on temp2_auto_point2_temp
+ and pwm1_auto_point2_pwm. Read it out after
+ writing to get actual value.
+
+
+Module parameters
+-----------------
+
+If your board has a BIOS that initializes the amc6821 correctly, you should
+load the module with: init=0.
+
+If your board BIOS doesn't initialize the chip, or you want
+different settings, you can set the following parameters:
+init=1,
+pwminv: 0 default pwm output, 1 inverts pwm output.
+
diff --git a/Documentation/hwmon/asb100 b/Documentation/hwmon/asb100
new file mode 100644
index 000000000..ab7365e13
--- /dev/null
+++ b/Documentation/hwmon/asb100
@@ -0,0 +1,72 @@
+Kernel driver asb100
+====================
+
+Supported Chips:
+ * Asus ASB100 and ASB100-A "Bach"
+ Prefix: 'asb100'
+ Addresses scanned: I2C 0x2d
+ Datasheet: none released
+
+Author: Mark M. Hoffman <mhoffman@lightlink.com>
+
+Description
+-----------
+
+This driver implements support for the Asus ASB100 and ASB100-A "Bach".
+These are custom ASICs available only on Asus mainboards. Asus refuses to
+supply a datasheet for these chips. Thanks go to many people who helped
+investigate their hardware, including:
+
+Vitaly V. Bursov
+Alexander van Kaam (author of MBM for Windows)
+Bertrik Sikken
+
+The ASB100 implements seven voltage sensors, three fan rotation speed
+sensors, four temperature sensors, VID lines and alarms. In addition to
+these, the ASB100-A also implements a single PWM controller for fans 2 and
+3 (i.e. one setting controls both.) If you have a plain ASB100, the PWM
+controller will simply not work (or maybe it will for you... it doesn't for
+me).
+
+Temperatures are measured and reported in degrees Celsius.
+
+Fan speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit.
+
+Voltage sensors (also known as IN sensors) report values in volts.
+
+The VID lines encode the core voltage value: the voltage level your
+processor should work with. This is hardcoded by the mainboard and/or
+processor itself. It is a value in volts.
+
+Alarms: (TODO question marks indicate may or may not work)
+
+0x0001 => in0 (?)
+0x0002 => in1 (?)
+0x0004 => in2
+0x0008 => in3
+0x0010 => temp1 (1)
+0x0020 => temp2
+0x0040 => fan1
+0x0080 => fan2
+0x0100 => in4
+0x0200 => in5 (?) (2)
+0x0400 => in6 (?) (2)
+0x0800 => fan3
+0x1000 => chassis switch
+0x2000 => temp3
+
+Alarm Notes:
+
+(1) This alarm will only trigger if the hysteresis value is 127C.
+I.e. it behaves the same as w83781d.
+
+(2) The min and max registers for these values appear to
+be read-only or otherwise stuck at 0x00.
+
+TODO:
+* Experiment with fan divisors > 8.
+* Experiment with temp. sensor types.
+* Are there really 13 voltage inputs? Probably not...
+* Cleanups, no doubt...
+
diff --git a/Documentation/hwmon/asc7621 b/Documentation/hwmon/asc7621
new file mode 100644
index 000000000..7287be7e1
--- /dev/null
+++ b/Documentation/hwmon/asc7621
@@ -0,0 +1,296 @@
+Kernel driver asc7621
+==================
+
+Supported chips:
+ Andigilog aSC7621 and aSC7621a
+ Prefix: 'asc7621'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.fairview5.com/linux/asc7621/asc7621.pdf
+
+Author:
+ George Joseph
+
+Description provided by Dave Pivin @ Andigilog:
+
+Andigilog has both the PECI and pre-PECI versions of the Heceta-6, as
+Intel calls them. Heceta-6e has high frequency PWM and Heceta-6p has
+added PECI and a 4th thermal zone. The Andigilog aSC7611 is the
+Heceta-6e part and aSC7621 is the Heceta-6p part. They are both in
+volume production, shipping to Intel and their subs.
+
+We have enhanced both parts relative to the governing Intel
+specification. First enhancement is temperature reading resolution. We
+have used registers below 20h for vendor-specific functions in addition
+to those in the Intel-specified vendor range.
+
+Our conversion process produces a result that is reported as two bytes.
+The fan speed control uses this finer value to produce a "step-less" fan
+PWM output. These two bytes are "read-locked" to guarantee that once a
+high or low byte is read, the other byte is locked-in until after the
+next read of any register. So to get an atomic reading, read high or low
+byte, then the very next read should be the opposite byte. Our data
+sheet says 10-bits of resolution, although you may find the lower bits
+are active, they are not necessarily reliable or useful externally. We
+chose not to mask them.
+
+We employ significant filtering that is user tunable as described in the
+data sheet. Our temperature reports and fan PWM outputs are very smooth
+when compared to the competition, in addition to the higher resolution
+temperature reports. The smoother PWM output does not require user
+intervention.
+
+We offer GPIO features on the former VID pins. These are open-drain
+outputs or inputs and may be used as general purpose I/O or as alarm
+outputs that are based on temperature limits. These are in 19h and 1Ah.
+
+We offer flexible mapping of temperature readings to thermal zones. Any
+temperature may be mapped to any zone, which has a default assignment
+that follows Intel's specs.
+
+Since there is a fan to zone assignment that allows for the "hotter" of
+a set of zones to control the PWM of an individual fan, but there is no
+indication to the user, we have added an indicator that shows which zone
+is currently controlling the PWM for a given fan. This is in register
+00h.
+
+Both remote diode temperature readings may be given an offset value such
+that the reported reading as well as the temperature used to determine
+PWM may be offset for system calibration purposes.
+
+PECI Extended configuration allows for having more than two domains per
+PECI address and also provides an enabling function for each PECI
+address. One could use our flexible zone assignment to have a zone
+assigned to up to 4 PECI addresses. This is not possible in the default
+Intel configuration. This would be useful in multi-CPU systems with
+individual fans on each that would benefit from individual fan control.
+This is in register 0Eh.
+
+The tachometer measurement system is flexible and able to adapt to many
+fan types. We can also support pulse-stretched PWM so that 3-wire fans
+may be used. These characteristics are in registers 04h to 07h.
+
+Finally, we have added a tach disable function that turns off the tach
+measurement system for individual tachs in order to save power. That is
+in register 75h.
+
+--
+aSC7621 Product Description
+
+The aSC7621 has a two wire digital interface compatible with SMBus 2.0.
+Using a 10-bit ADC, the aSC7621 measures the temperature of two remote diode
+connected transistors as well as its own die. Support for Platform
+Environmental Control Interface (PECI) is included.
+
+Using temperature information from these four zones, an automatic fan speed
+control algorithm is employed to minimize acoustic impact while achieving
+recommended CPU temperature under varying operational loads.
+
+To set fan speed, the aSC7621 has three independent pulse width modulation
+(PWM) outputs that are controlled by one, or a combination of three,
+temperature zones. Both high- and low-frequency PWM ranges are supported.
+
+The aSC7621 also includes a digital filter that can be invoked to smooth
+temperature readings for better control of fan speed and minimum acoustic
+impact.
+
+The aSC7621 has tachometer inputs to measure fan speed on up to four fans.
+Limit and status registers for all measured values are included to alert
+the system host that any measurements are outside of programmed limits
+via status registers.
+
+System voltages of VCCP, 2.5V, 3.3V, 5.0V, and 12V motherboard power are
+monitored efficiently with internal scaling resistors.
+
+Features
+- Supports PECI interface and monitors internal and remote thermal diodes
+- 2-wire, SMBus 2.0 compliant, serial interface
+- 10-bit ADC
+- Monitors VCCP, 2.5V, 3.3V, 5.0V, and 12V motherboard/processor supplies
+- Programmable autonomous fan control based on temperature readings
+- Noise filtering of temperature reading for fan speed control
+- 0.25C digital temperature sensor resolution
+- 3 PWM fan speed control outputs for 2-, 3- or 4-wire fans and up to 4 fan
+ tachometer inputs
+- Enhanced measured temperature to Temperature Zone assignment.
+- Provides high and low PWM frequency ranges
+- 3 GPIO pins for custom use
+- 24-Lead QSOP package
+
+Configuration Notes
+===================
+
+Except where noted below, the sysfs entries created by this driver follow
+the standards defined in "sysfs-interface".
+
+temp1_source
+ 0 (default) peci_legacy = 0, Remote 1 Temperature
+ peci_legacy = 1, PECI Processor Temperature 0
+ 1 Remote 1 Temperature
+ 2 Remote 2 Temperature
+ 3 Internal Temperature
+ 4 PECI Processor Temperature 0
+ 5 PECI Processor Temperature 1
+ 6 PECI Processor Temperature 2
+ 7 PECI Processor Temperature 3
+
+temp2_source
+ 0 (default) Internal Temperature
+ 1 Remote 1 Temperature
+ 2 Remote 2 Temperature
+ 3 Internal Temperature
+ 4 PECI Processor Temperature 0
+ 5 PECI Processor Temperature 1
+ 6 PECI Processor Temperature 2
+ 7 PECI Processor Temperature 3
+
+temp3_source
+ 0 (default) Remote 2 Temperature
+ 1 Remote 1 Temperature
+ 2 Remote 2 Temperature
+ 3 Internal Temperature
+ 4 PECI Processor Temperature 0
+ 5 PECI Processor Temperature 1
+ 6 PECI Processor Temperature 2
+ 7 PECI Processor Temperature 3
+
+temp4_source
+ 0 (default) peci_legacy = 0, PECI Processor Temperature 0
+ peci_legacy = 1, Remote 1 Temperature
+ 1 Remote 1 Temperature
+ 2 Remote 2 Temperature
+ 3 Internal Temperature
+ 4 PECI Processor Temperature 0
+ 5 PECI Processor Temperature 1
+ 6 PECI Processor Temperature 2
+ 7 PECI Processor Temperature 3
+
+temp[1-4]_smoothing_enable
+temp[1-4]_smoothing_time
+ Smooths spikes in temp readings caused by noise.
+ Valid values in milliseconds are:
+ 35000
+ 17600
+ 11800
+ 7000
+ 4400
+ 3000
+ 1600
+ 800
+
+temp[1-4]_crit
+ When the corresponding zone temperature reaches this value,
+ ALL pwm outputs will got to 100%.
+
+temp[5-8]_input
+temp[5-8]_enable
+ The aSC7621 can also read temperatures provided by the processor
+ via the PECI bus. Usually these are "core" temps and are relative
+ to the point where the automatic thermal control circuit starts
+ throttling. This means that these are usually negative numbers.
+
+pwm[1-3]_enable
+ 0 Fan off.
+ 1 Fan on manual control.
+ 2 Fan on automatic control and will run at the minimum pwm
+ if the temperature for the zone is below the minimum.
+ 3 Fan on automatic control but will be off if the temperature
+ for the zone is below the minimum.
+ 4-254 Ignored.
+ 255 Fan on full.
+
+pwm[1-3]_auto_channels
+ Bitmap as described in sysctl-interface with the following
+ exceptions...
+ Only the following combination of zones (and their corresponding masks)
+ are valid:
+ 1
+ 2
+ 3
+ 2,3
+ 1,2,3
+ 4
+ 1,2,3,4
+
+ Special values:
+ 0 Disabled.
+ 16 Fan on manual control.
+ 31 Fan on full.
+
+
+pwm[1-3]_invert
+ When set, inverts the meaning of pwm[1-3].
+ i.e. when pwm = 0, the fan will be on full and
+ when pwm = 255 the fan will be off.
+
+pwm[1-3]_freq
+ PWM frequency in Hz
+ Valid values in Hz are:
+
+ 10
+ 15
+ 23
+ 30 (default)
+ 38
+ 47
+ 62
+ 94
+ 23000
+ 24000
+ 25000
+ 26000
+ 27000
+ 28000
+ 29000
+ 30000
+
+ Setting any other value will be ignored.
+
+peci_enable
+ Enables or disables PECI
+
+peci_avg
+ Input filter average time.
+
+ 0 0 Sec. (no Smoothing) (default)
+ 1 0.25 Sec.
+ 2 0.5 Sec.
+ 3 1.0 Sec.
+ 4 2.0 Sec.
+ 5 4.0 Sec.
+ 6 8.0 Sec.
+ 7 0.0 Sec.
+
+peci_legacy
+
+ 0 Standard Mode (default)
+ Remote Diode 1 reading is associated with
+ Temperature Zone 1, PECI is associated with
+ Zone 4
+
+ 1 Legacy Mode
+ PECI is associated with Temperature Zone 1,
+ Remote Diode 1 is associated with Zone 4
+
+peci_diode
+ Diode filter
+
+ 0 0.25 Sec.
+ 1 1.1 Sec.
+ 2 2.4 Sec. (default)
+ 3 3.4 Sec.
+ 4 5.0 Sec.
+ 5 6.8 Sec.
+ 6 10.2 Sec.
+ 7 16.4 Sec.
+
+peci_4domain
+ Four domain enable
+
+ 0 1 or 2 Domains for enabled processors (default)
+ 1 3 or 4 Domains for enabled processors
+
+peci_domain
+ Domain
+
+ 0 Processor contains a single domain (0) (default)
+ 1 Processor contains two domains (0,1)
diff --git a/Documentation/hwmon/coretemp b/Documentation/hwmon/coretemp
new file mode 100644
index 000000000..fec5a9bf7
--- /dev/null
+++ b/Documentation/hwmon/coretemp
@@ -0,0 +1,181 @@
+Kernel driver coretemp
+======================
+
+Supported chips:
+ * All Intel Core family
+ Prefix: 'coretemp'
+ CPUID: family 0x6, models 0xe (Pentium M DC), 0xf (Core 2 DC 65nm),
+ 0x16 (Core 2 SC 65nm), 0x17 (Penryn 45nm),
+ 0x1a (Nehalem), 0x1c (Atom), 0x1e (Lynnfield),
+ 0x26 (Tunnel Creek Atom), 0x27 (Medfield Atom),
+ 0x36 (Cedar Trail Atom)
+ Datasheet: Intel 64 and IA-32 Architectures Software Developer's Manual
+ Volume 3A: System Programming Guide
+ http://softwarecommunity.intel.com/Wiki/Mobility/720.htm
+
+Author: Rudolf Marek
+
+Description
+-----------
+This driver permits reading the DTS (Digital Temperature Sensor) embedded
+inside Intel CPUs. This driver can read both the per-core and per-package
+temperature using the appropriate sensors. The per-package sensor is new;
+as of now, it is present only in the SandyBridge platform. The driver will
+show the temperature of all cores inside a package under a single device
+directory inside hwmon.
+
+Temperature is measured in degrees Celsius and measurement resolution is
+1 degree C. Valid temperatures are from 0 to TjMax degrees C, because
+the actual value of temperature register is in fact a delta from TjMax.
+
+Temperature known as TjMax is the maximum junction temperature of processor,
+which depends on the CPU model. See table below. At this temperature, protection
+mechanism will perform actions to forcibly cool down the processor. Alarm
+may be raised, if the temperature grows enough (more than TjMax) to trigger
+the Out-Of-Spec bit. Following table summarizes the exported sysfs files:
+
+All Sysfs entries are named with their core_id (represented here by 'X').
+tempX_input - Core temperature (in millidegrees Celsius).
+tempX_max - All cooling devices should be turned on (on Core2).
+tempX_crit - Maximum junction temperature (in millidegrees Celsius).
+tempX_crit_alarm - Set when Out-of-spec bit is set, never clears.
+ Correct CPU operation is no longer guaranteed.
+tempX_label - Contains string "Core X", where X is processor
+ number. For Package temp, this will be "Physical id Y",
+ where Y is the package number.
+
+On CPU models which support it, TjMax is read from a model-specific register.
+On other models, it is set to an arbitrary value based on weak heuristics.
+If these heuristics don't work for you, you can pass the correct TjMax value
+as a module parameter (tjmax).
+
+Appendix A. Known TjMax lists (TBD):
+Some information comes from ark.intel.com
+
+Process Processor TjMax(C)
+
+22nm Core i5/i7 Processors
+ i7 3920XM, 3820QM, 3720QM, 3667U, 3520M 105
+ i5 3427U, 3360M/3320M 105
+ i7 3770/3770K 105
+ i5 3570/3570K, 3550, 3470/3450 105
+ i7 3770S 103
+ i5 3570S/3550S, 3475S/3470S/3450S 103
+ i7 3770T 94
+ i5 3570T 94
+ i5 3470T 91
+
+32nm Core i3/i5/i7 Processors
+ i7 2600 98
+ i7 660UM/640/620, 640LM/620, 620M, 610E 105
+ i5 540UM/520/430, 540M/520/450/430 105
+ i3 330E, 370M/350/330 90 rPGA, 105 BGA
+ i3 330UM 105
+
+32nm Core i7 Extreme Processors
+ 980X 100
+
+32nm Celeron Processors
+ U3400 105
+ P4505/P4500 90
+
+32nm Atom Processors
+ S1260/1220 95
+ S1240 102
+ Z2460 90
+ Z2760 90
+ D2700/2550/2500 100
+ N2850/2800/2650/2600 100
+
+45nm Xeon Processors 5400 Quad-Core
+ X5492, X5482, X5472, X5470, X5460, X5450 85
+ E5472, E5462, E5450/40/30/20/10/05 85
+ L5408 95
+ L5430, L5420, L5410 70
+
+45nm Xeon Processors 5200 Dual-Core
+ X5282, X5272, X5270, X5260 90
+ E5240 90
+ E5205, E5220 70, 90
+ L5240 70
+ L5238, L5215 95
+
+45nm Atom Processors
+ D525/510/425/410 100
+ K525/510/425/410 100
+ Z670/650 90
+ Z560/550/540/530P/530/520PT/520/515/510PT/510P 90
+ Z510/500 90
+ N570/550 100
+ N475/470/455/450 100
+ N280/270 90
+ 330/230 125
+ E680/660/640/620 90
+ E680T/660T/640T/620T 110
+ E665C/645C 90
+ E665CT/645CT 110
+ CE4170/4150/4110 110
+ CE4200 series unknown
+ CE5300 series unknown
+
+45nm Core2 Processors
+ Solo ULV SU3500/3300 100
+ T9900/9800/9600/9550/9500/9400/9300/8300/8100 105
+ T6670/6500/6400 105
+ T6600 90
+ SU9600/9400/9300 105
+ SP9600/9400 105
+ SL9600/9400/9380/9300 105
+ P9700/9600/9500/8800/8700/8600/8400/7570 105
+ P7550/7450 90
+
+45nm Core2 Quad Processors
+ Q9100/9000 100
+
+45nm Core2 Extreme Processors
+ X9100/9000 105
+ QX9300 100
+
+45nm Core i3/i5/i7 Processors
+ i7 940XM/920 100
+ i7 840QM/820/740/720 100
+
+45nm Celeron Processors
+ SU2300 100
+ 900 105
+
+65nm Core2 Duo Processors
+ Solo U2200, U2100 100
+ U7700/7600/7500 100
+ T7800/7700/7600/7500/7400/7300/7250/7200/7100 100
+ T5870/5670/5600/5550/5500/5470/5450/5300/5270 100
+ T5250 100
+ T5800/5750/5200 85
+ L7700/7500/7400/7300/7200 100
+
+65nm Core2 Extreme Processors
+ X7900/7800 100
+
+65nm Core Duo Processors
+ U2500/2400 100
+ T2700/2600/2450/2400/2350/2300E/2300/2250/2050 100
+ L2500/2400/2300 100
+
+65nm Core Solo Processors
+ U1500/1400/1300 100
+ T1400/1350/1300/1250 100
+
+65nm Xeon Processors 5000 Quad-Core
+ X5000 90-95
+ E5000 80
+ L5000 70
+ L5318 95
+
+65nm Xeon Processors 5000 Dual-Core
+ 5080, 5063, 5060, 5050, 5030 80-90
+ 5160, 5150, 5148, 5140, 5130, 5120, 5110 80
+ L5138 100
+
+65nm Celeron Processors
+ T1700/1600 100
+ 560/550/540/530 100
diff --git a/Documentation/hwmon/da9052 b/Documentation/hwmon/da9052
new file mode 100644
index 000000000..5bc51346b
--- /dev/null
+++ b/Documentation/hwmon/da9052
@@ -0,0 +1,61 @@
+Supported chips:
+ * Dialog Semiconductors DA9052-BC and DA9053-AA/Bx PMICs
+ Prefix: 'da9052'
+ Datasheet: Datasheet is not publicly available.
+
+Authors: David Dajun Chen <dchen@diasemi.com>
+
+Description
+-----------
+
+The DA9052/53 provides an Analogue to Digital Converter (ADC) with 10 bits
+resolution and track and hold circuitry combined with an analogue input
+multiplexer. The analogue input multiplexer will allow conversion of up to 10
+different inputs. The track and hold circuit ensures stable input voltages at
+the input of the ADC during the conversion.
+
+The ADC is used to measure the following inputs:
+Channel 0: VDDOUT - measurement of the system voltage
+Channel 1: ICH - internal battery charger current measurement
+Channel 2: TBAT - output from the battery NTC
+Channel 3: VBAT - measurement of the battery voltage
+Channel 4: ADC_IN4 - high impedance input (0 - 2.5V)
+Channel 5: ADC_IN5 - high impedance input (0 - 2.5V)
+Channel 6: ADC_IN6 - high impedance input (0 - 2.5V)
+Channel 7: XY - TSI interface to measure the X and Y voltage of the touch
+ screen resistive potentiometers
+Channel 8: Internal Tjunc. - sense (internal temp. sensor)
+Channel 9: VBBAT - measurement of the backup battery voltage
+
+By using sysfs attributes we can measure the system voltage VDDOUT, the battery
+charging current ICH, battery temperature TBAT, battery junction temperature
+TJUNC, battery voltage VBAT and the back up battery voltage VBBAT.
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by a 10 bit ADC.
+
+The battery voltage is calculated as:
+ Milli volt = ((ADC value * 1000) / 512) + 2500
+
+The backup battery voltage is calculated as:
+ Milli volt = (ADC value * 2500) / 512;
+
+The voltages on ADC channels 4, 5 and 6 are calculated as:
+ Milli volt = (ADC value * 2500) / 1023
+
+Temperature Monitoring
+----------------------
+
+Temperatures are sampled by a 10 bit ADC. Junction and battery temperatures
+are monitored by the ADC channels.
+
+The junction temperature is calculated:
+ Degrees celsius = 1.708 * (TJUNC_RES - T_OFFSET) - 108.8
+The junction temperature attribute is supported by the driver.
+
+The battery temperature is calculated:
+ Degree Celsius = 1 / (t1 + 1/298)- 273
+where t1 = (1/B)* ln(( ADCval * 2.5)/(R25*ITBAT*255))
+Default values of R25, B, ITBAT are 10e3, 3380 and 50e-6 respectively.
diff --git a/Documentation/hwmon/da9055 b/Documentation/hwmon/da9055
new file mode 100644
index 000000000..855c3f536
--- /dev/null
+++ b/Documentation/hwmon/da9055
@@ -0,0 +1,47 @@
+Supported chips:
+ * Dialog Semiconductors DA9055 PMIC
+ Prefix: 'da9055'
+ Datasheet: Datasheet is not publicly available.
+
+Authors: David Dajun Chen <dchen@diasemi.com>
+
+Description
+-----------
+
+The DA9055 provides an Analogue to Digital Converter (ADC) with 10 bits
+resolution and track and hold circuitry combined with an analogue input
+multiplexer. The analogue input multiplexer will allow conversion of up to 5
+different inputs. The track and hold circuit ensures stable input voltages at
+the input of the ADC during the conversion.
+
+The ADC is used to measure the following inputs:
+Channel 0: VDDOUT - measurement of the system voltage
+Channel 1: ADC_IN1 - high impedance input (0 - 2.5V)
+Channel 2: ADC_IN2 - high impedance input (0 - 2.5V)
+Channel 3: ADC_IN3 - high impedance input (0 - 2.5V)
+Channel 4: Internal Tjunc. - sense (internal temp. sensor)
+
+By using sysfs attributes we can measure the system voltage VDDOUT,
+chip junction temperature and auxiliary channels voltages.
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled in a AUTO mode it can be manually sampled too and results
+are stored in a 10 bit ADC.
+
+The system voltage is calculated as:
+ Milli volt = ((ADC value * 1000) / 85) + 2500
+
+The voltages on ADC channels 1, 2 and 3 are calculated as:
+ Milli volt = (ADC value * 1000) / 102
+
+Temperature Monitoring
+----------------------
+
+Temperatures are sampled by a 10 bit ADC. Junction temperatures
+are monitored by the ADC channels.
+
+The junction temperature is calculated:
+ Degrees celsius = -0.4084 * (ADC_RES - T_OFFSET) + 307.6332
+The junction temperature attribute is supported by the driver.
diff --git a/Documentation/hwmon/dme1737 b/Documentation/hwmon/dme1737
new file mode 100644
index 000000000..4d2935145
--- /dev/null
+++ b/Documentation/hwmon/dme1737
@@ -0,0 +1,328 @@
+Kernel driver dme1737
+=====================
+
+Supported chips:
+ * SMSC DME1737 and compatibles (like Asus A8000)
+ Prefix: 'dme1737'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: Provided by SMSC upon request and under NDA
+ * SMSC SCH3112, SCH3114, SCH3116
+ Prefix: 'sch311x'
+ Addresses scanned: none, address read from Super-I/O config space
+ Datasheet: Available on the Internet
+ * SMSC SCH5027
+ Prefix: 'sch5027'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: Provided by SMSC upon request and under NDA
+ * SMSC SCH5127
+ Prefix: 'sch5127'
+ Addresses scanned: none, address read from Super-I/O config space
+ Datasheet: Provided by SMSC upon request and under NDA
+
+Authors:
+ Juerg Haefliger <juergh@gmail.com>
+
+
+Module Parameters
+-----------------
+
+* force_start: bool Enables the monitoring of voltage, fan and temp inputs
+ and PWM output control functions. Using this parameter
+ shouldn't be required since the BIOS usually takes care
+ of this.
+* probe_all_addr: bool Include non-standard LPC addresses 0x162e and 0x164e
+ when probing for ISA devices. This is required for the
+ following boards:
+ - VIA EPIA SN18000
+
+
+Description
+-----------
+
+This driver implements support for the hardware monitoring capabilities of the
+SMSC DME1737 and Asus A8000 (which are the same), SMSC SCH5027, SCH311x,
+and SCH5127 Super-I/O chips. These chips feature monitoring of 3 temp sensors
+temp[1-3] (2 remote diodes and 1 internal), 8 voltages in[0-7] (7 external and
+1 internal) and up to 6 fan speeds fan[1-6]. Additionally, the chips implement
+up to 5 PWM outputs pwm[1-3,5-6] for controlling fan speeds both manually and
+automatically.
+
+For the DME1737, A8000 and SCH5027, fan[1-2] and pwm[1-2] are always present.
+Fan[3-6] and pwm[3,5-6] are optional features and their availability depends on
+the configuration of the chip. The driver will detect which features are
+present during initialization and create the sysfs attributes accordingly.
+
+For the SCH311x and SCH5127, fan[1-3] and pwm[1-3] are always present and
+fan[4-6] and pwm[5-6] don't exist.
+
+The hardware monitoring features of the DME1737, A8000, and SCH5027 are only
+accessible via SMBus, while the SCH311x and SCH5127 only provide access via
+the ISA bus. The driver will therefore register itself as an I2C client driver
+if it detects a DME1737, A8000, or SCH5027 and as a platform driver if it
+detects a SCH311x or SCH5127 chip.
+
+
+Voltage Monitoring
+------------------
+
+The voltage inputs are sampled with 12-bit resolution and have internal
+scaling resistors. The values returned by the driver therefore reflect true
+millivolts and don't need scaling. The voltage inputs are mapped as follows
+(the last column indicates the input ranges):
+
+DME1737, A8000:
+ in0: +5VTR (+5V standby) 0V - 6.64V
+ in1: Vccp (processor core) 0V - 3V
+ in2: VCC (internal +3.3V) 0V - 4.38V
+ in3: +5V 0V - 6.64V
+ in4: +12V 0V - 16V
+ in5: VTR (+3.3V standby) 0V - 4.38V
+ in6: Vbat (+3.0V) 0V - 4.38V
+
+SCH311x:
+ in0: +2.5V 0V - 3.32V
+ in1: Vccp (processor core) 0V - 2V
+ in2: VCC (internal +3.3V) 0V - 4.38V
+ in3: +5V 0V - 6.64V
+ in4: +12V 0V - 16V
+ in5: VTR (+3.3V standby) 0V - 4.38V
+ in6: Vbat (+3.0V) 0V - 4.38V
+
+SCH5027:
+ in0: +5VTR (+5V standby) 0V - 6.64V
+ in1: Vccp (processor core) 0V - 3V
+ in2: VCC (internal +3.3V) 0V - 4.38V
+ in3: V2_IN 0V - 1.5V
+ in4: V1_IN 0V - 1.5V
+ in5: VTR (+3.3V standby) 0V - 4.38V
+ in6: Vbat (+3.0V) 0V - 4.38V
+
+SCH5127:
+ in0: +2.5 0V - 3.32V
+ in1: Vccp (processor core) 0V - 3V
+ in2: VCC (internal +3.3V) 0V - 4.38V
+ in3: V2_IN 0V - 1.5V
+ in4: V1_IN 0V - 1.5V
+ in5: VTR (+3.3V standby) 0V - 4.38V
+ in6: Vbat (+3.0V) 0V - 4.38V
+ in7: Vtrip (+1.5V) 0V - 1.99V
+
+Each voltage input has associated min and max limits which trigger an alarm
+when crossed.
+
+
+Temperature Monitoring
+----------------------
+
+Temperatures are measured with 12-bit resolution and reported in millidegree
+Celsius. The chip also features offsets for all 3 temperature inputs which -
+when programmed - get added to the input readings. The chip does all the
+scaling by itself and the driver therefore reports true temperatures that don't
+need any user-space adjustments. The temperature inputs are mapped as follows
+(the last column indicates the input ranges):
+
+ temp1: Remote diode 1 (3904 type) temperature -127C - +127C
+ temp2: DME1737 internal temperature -127C - +127C
+ temp3: Remote diode 2 (3904 type) temperature -127C - +127C
+
+Each temperature input has associated min and max limits which trigger an alarm
+when crossed. Additionally, each temperature input has a fault attribute that
+returns 1 when a faulty diode or an unconnected input is detected and 0
+otherwise.
+
+
+Fan Monitoring
+--------------
+
+Fan RPMs are measured with 16-bit resolution. The chip provides inputs for 6
+fan tachometers. All 6 inputs have an associated min limit which triggers an
+alarm when crossed. Fan inputs 1-4 provide type attributes that need to be set
+to the number of pulses per fan revolution that the connected tachometer
+generates. Supported values are 1, 2, and 4. Fan inputs 5-6 only support fans
+that generate 2 pulses per revolution. Fan inputs 5-6 also provide a max
+attribute that needs to be set to the maximum attainable RPM (fan at 100% duty-
+cycle) of the input. The chip adjusts the sampling rate based on this value.
+
+
+PWM Output Control
+------------------
+
+This chip features 5 PWM outputs. PWM outputs 1-3 are associated with fan
+inputs 1-3 and PWM outputs 5-6 are associated with fan inputs 5-6. PWM outputs
+1-3 can be configured to operate either in manual or automatic mode by setting
+the appropriate enable attribute accordingly. PWM outputs 5-6 can only operate
+in manual mode, their enable attributes are therefore read-only. When set to
+manual mode, the fan speed is set by writing the duty-cycle value to the
+appropriate PWM attribute. In automatic mode, the PWM attribute returns the
+current duty-cycle as set by the fan controller in the chip. All PWM outputs
+support the setting of the output frequency via the freq attribute.
+
+In automatic mode, the chip supports the setting of the PWM ramp rate which
+defines how fast the PWM output is adjusting to changes of the associated
+temperature input. Associating PWM outputs to temperature inputs is done via
+temperature zones. The chip features 3 zones whose assignments to temperature
+inputs is static and determined during initialization. These assignments can
+be retrieved via the zone[1-3]_auto_channels_temp attributes. Each PWM output
+is assigned to one (or hottest of multiple) temperature zone(s) through the
+pwm[1-3]_auto_channels_zone attributes. Each PWM output has 3 distinct output
+duty-cycles: full, low, and min. Full is internally hard-wired to 255 (100%)
+and low and min can be programmed via pwm[1-3]_auto_point1_pwm and
+pwm[1-3]_auto_pwm_min, respectively. The thermal thresholds of the zones are
+programmed via zone[1-3]_auto_point[1-3]_temp and
+zone[1-3]_auto_point1_temp_hyst:
+
+ pwm[1-3]_auto_point2_pwm full-speed duty-cycle (255, i.e., 100%)
+ pwm[1-3]_auto_point1_pwm low-speed duty-cycle
+ pwm[1-3]_auto_pwm_min min-speed duty-cycle
+
+ zone[1-3]_auto_point3_temp full-speed temp (all outputs)
+ zone[1-3]_auto_point2_temp full-speed temp
+ zone[1-3]_auto_point1_temp low-speed temp
+ zone[1-3]_auto_point1_temp_hyst min-speed temp
+
+The chip adjusts the output duty-cycle linearly in the range of auto_point1_pwm
+to auto_point2_pwm if the temperature of the associated zone is between
+auto_point1_temp and auto_point2_temp. If the temperature drops below the
+auto_point1_temp_hyst value, the output duty-cycle is set to the auto_pwm_min
+value which only supports two values: 0 or auto_point1_pwm. That means that the
+fan either turns completely off or keeps spinning with the low-speed
+duty-cycle. If any of the temperatures rise above the auto_point3_temp value,
+all PWM outputs are set to 100% duty-cycle.
+
+Following is another representation of how the chip sets the output duty-cycle
+based on the temperature of the associated thermal zone:
+
+ Duty-Cycle Duty-Cycle
+ Temperature Rising Temp Falling Temp
+ ----------- ----------- ------------
+ full-speed full-speed full-speed
+
+ < linearly adjusted duty-cycle >
+
+ low-speed low-speed low-speed
+ min-speed low-speed
+ min-speed min-speed min-speed
+ min-speed min-speed
+
+
+Sysfs Attributes
+----------------
+
+Following is a list of all sysfs attributes that the driver provides, their
+permissions and a short description:
+
+Name Perm Description
+---- ---- -----------
+cpu0_vid RO CPU core reference voltage in
+ millivolts.
+vrm RW Voltage regulator module version
+ number.
+
+in[0-7]_input RO Measured voltage in millivolts.
+in[0-7]_min RW Low limit for voltage input.
+in[0-7]_max RW High limit for voltage input.
+in[0-7]_alarm RO Voltage input alarm. Returns 1 if
+ voltage input is or went outside the
+ associated min-max range, 0 otherwise.
+
+temp[1-3]_input RO Measured temperature in millidegree
+ Celsius.
+temp[1-3]_min RW Low limit for temp input.
+temp[1-3]_max RW High limit for temp input.
+temp[1-3]_offset RW Offset for temp input. This value will
+ be added by the chip to the measured
+ temperature.
+temp[1-3]_alarm RO Alarm for temp input. Returns 1 if temp
+ input is or went outside the associated
+ min-max range, 0 otherwise.
+temp[1-3]_fault RO Temp input fault. Returns 1 if the chip
+ detects a faulty thermal diode or an
+ unconnected temp input, 0 otherwise.
+
+zone[1-3]_auto_channels_temp RO Temperature zone to temperature input
+ mapping. This attribute is a bitfield
+ and supports the following values:
+ 1: temp1
+ 2: temp2
+ 4: temp3
+zone[1-3]_auto_point1_temp_hyst RW Auto PWM temp point1 hysteresis. The
+ output of the corresponding PWM is set
+ to the pwm_auto_min value if the temp
+ falls below the auto_point1_temp_hyst
+ value.
+zone[1-3]_auto_point[1-3]_temp RW Auto PWM temp points. Auto_point1 is
+ the low-speed temp, auto_point2 is the
+ full-speed temp, and auto_point3 is the
+ temp at which all PWM outputs are set
+ to full-speed (100% duty-cycle).
+
+fan[1-6]_input RO Measured fan speed in RPM.
+fan[1-6]_min RW Low limit for fan input.
+fan[1-6]_alarm RO Alarm for fan input. Returns 1 if fan
+ input is or went below the associated
+ min value, 0 otherwise.
+fan[1-4]_type RW Type of attached fan. Expressed in
+ number of pulses per revolution that
+ the fan generates. Supported values are
+ 1, 2, and 4.
+fan[5-6]_max RW Max attainable RPM at 100% duty-cycle.
+ Required for chip to adjust the
+ sampling rate accordingly.
+
+pmw[1-3,5-6] RO/RW Duty-cycle of PWM output. Supported
+ values are 0-255 (0%-100%). Only
+ writeable if the associated PWM is in
+ manual mode.
+pwm[1-3]_enable RW Enable of PWM outputs 1-3. Supported
+ values are:
+ 0: turned off (output @ 100%)
+ 1: manual mode
+ 2: automatic mode
+pwm[5-6]_enable RO Enable of PWM outputs 5-6. Always
+ returns 1 since these 2 outputs are
+ hard-wired to manual mode.
+pmw[1-3,5-6]_freq RW Frequency of PWM output. Supported
+ values are in the range 11Hz-30000Hz
+ (default is 25000Hz).
+pmw[1-3]_ramp_rate RW Ramp rate of PWM output. Determines how
+ fast the PWM duty-cycle will change
+ when the PWM is in automatic mode.
+ Expressed in ms per PWM step. Supported
+ values are in the range 0ms-206ms
+ (default is 0, which means the duty-
+ cycle changes instantly).
+pwm[1-3]_auto_channels_zone RW PWM output to temperature zone mapping.
+ This attribute is a bitfield and
+ supports the following values:
+ 1: zone1
+ 2: zone2
+ 4: zone3
+ 6: highest of zone[2-3]
+ 7: highest of zone[1-3]
+pwm[1-3]_auto_pwm_min RW Auto PWM min pwm. Minimum PWM duty-
+ cycle. Supported values are 0 or
+ auto_point1_pwm.
+pwm[1-3]_auto_point1_pwm RW Auto PWM pwm point. Auto_point1 is the
+ low-speed duty-cycle.
+pwm[1-3]_auto_point2_pwm RO Auto PWM pwm point. Auto_point2 is the
+ full-speed duty-cycle which is hard-
+ wired to 255 (100% duty-cycle).
+
+Chip Differences
+----------------
+
+Feature dme1737 sch311x sch5027 sch5127
+-------------------------------------------------------
+temp[1-3]_offset yes yes
+vid yes
+zone3 yes yes yes
+zone[1-3]_hyst yes yes
+pwm min/off yes yes
+fan3 opt yes opt yes
+pwm3 opt yes opt yes
+fan4 opt opt
+fan5 opt opt
+pwm5 opt opt
+fan6 opt opt
+pwm6 opt opt
+in7 yes
diff --git a/Documentation/hwmon/ds1621 b/Documentation/hwmon/ds1621
new file mode 100644
index 000000000..f775e612f
--- /dev/null
+++ b/Documentation/hwmon/ds1621
@@ -0,0 +1,187 @@
+Kernel driver ds1621
+====================
+
+Supported chips:
+ * Dallas Semiconductor / Maxim Integrated DS1621
+ Prefix: 'ds1621'
+ Addresses scanned: none
+ Datasheet: Publicly available from www.maximintegrated.com
+
+ * Dallas Semiconductor DS1625
+ Prefix: 'ds1625'
+ Addresses scanned: none
+ Datasheet: Publicly available from www.datasheetarchive.com
+
+ * Maxim Integrated DS1631
+ Prefix: 'ds1631'
+ Addresses scanned: none
+ Datasheet: Publicly available from www.maximintegrated.com
+
+ * Maxim Integrated DS1721
+ Prefix: 'ds1721'
+ Addresses scanned: none
+ Datasheet: Publicly available from www.maximintegrated.com
+
+ * Maxim Integrated DS1731
+ Prefix: 'ds1731'
+ Addresses scanned: none
+ Datasheet: Publicly available from www.maximintegrated.com
+
+Authors:
+ Christian W. Zuckschwerdt <zany@triq.net>
+ valuable contributions by Jan M. Sendler <sendler@sendler.de>
+ ported to 2.6 by Aurelien Jarno <aurelien@aurel32.net>
+ with the help of Jean Delvare <jdelvare@suse.de>
+
+Module Parameters
+------------------
+
+* polarity int
+ Output's polarity: 0 = active high, 1 = active low
+
+Description
+-----------
+
+The DS1621 is a (one instance) digital thermometer and thermostat. It has
+both high and low temperature limits which can be user defined (i.e.
+programmed into non-volatile on-chip registers). Temperature range is -55
+degree Celsius to +125 in 0.5 increments. You may convert this into a
+Fahrenheit range of -67 to +257 degrees with 0.9 steps. If polarity
+parameter is not provided, original value is used.
+
+As for the thermostat, behavior can also be programmed using the polarity
+toggle. On the one hand ("heater"), the thermostat output of the chip,
+Tout, will trigger when the low limit temperature is met or underrun and
+stays high until the high limit is met or exceeded. On the other hand
+("cooler"), vice versa. That way "heater" equals "active low", whereas
+"conditioner" equals "active high". Please note that the DS1621 data sheet
+is somewhat misleading in this point since setting the polarity bit does
+not simply invert Tout.
+
+A second thing is that, during extensive testing, Tout showed a tolerance
+of up to +/- 0.5 degrees even when compared against precise temperature
+readings. Be sure to have a high vs. low temperature limit gap of al least
+1.0 degree Celsius to avoid Tout "bouncing", though!
+
+The alarm bits are set when the high or low limits are met or exceeded and
+are reset by the module as soon as the respective temperature ranges are
+left.
+
+The alarm registers are in no way suitable to find out about the actual
+status of Tout. They will only tell you about its history, whether or not
+any of the limits have ever been met or exceeded since last power-up or
+reset. Be aware: When testing, it showed that the status of Tout can change
+with neither of the alarms set.
+
+Since there is no version or vendor identification register, there is
+no unique identification for these devices. Therefore, explicit device
+instantiation is required for correct device identification and functionality
+(one device per address in this address range: 0x48..0x4f).
+
+The DS1625 is pin compatible and functionally equivalent with the DS1621,
+but the DS1621 is meant to replace it. The DS1631, DS1721, and DS1731 are
+also pin compatible with the DS1621 and provide multi-resolution support.
+
+Additionally, the DS1721 data sheet says the temperature flags (THF and TLF)
+are used internally, however, these flags do get set and cleared as the actual
+temperature crosses the min or max settings (which by default are set to 75
+and 80 degrees respectively).
+
+Temperature Conversion:
+-----------------------
+DS1621 - 750ms (older devices may take up to 1000ms)
+DS1625 - 500ms
+DS1631 - 93ms..750ms for 9..12 bits resolution, respectively.
+DS1721 - 93ms..750ms for 9..12 bits resolution, respectively.
+DS1731 - 93ms..750ms for 9..12 bits resolution, respectively.
+
+Note:
+On the DS1621, internal access to non-volatile registers may last for 10ms
+or less (unverified on the other devices).
+
+Temperature Accuracy:
+---------------------
+DS1621: +/- 0.5 degree Celsius (from 0 to +70 degrees)
+DS1625: +/- 0.5 degree Celsius (from 0 to +70 degrees)
+DS1631: +/- 0.5 degree Celsius (from 0 to +70 degrees)
+DS1721: +/- 1.0 degree Celsius (from -10 to +85 degrees)
+DS1731: +/- 1.0 degree Celsius (from -10 to +85 degrees)
+
+Note:
+Please refer to the device datasheets for accuracy at other temperatures.
+
+Temperature Resolution:
+-----------------------
+As mentioned above, the DS1631, DS1721, and DS1731 provide multi-resolution
+support, which is achieved via the R0 and R1 config register bits, where:
+
+R0..R1
+------
+ 0 0 => 9 bits, 0.5 degrees Celcius
+ 1 0 => 10 bits, 0.25 degrees Celcius
+ 0 1 => 11 bits, 0.125 degrees Celcius
+ 1 1 => 12 bits, 0.0625 degrees Celcius
+
+Note:
+At initial device power-on, the default resolution is set to 12-bits.
+
+The resolution mode for the DS1631, DS1721, or DS1731 can be changed from
+userspace, via the device 'update_interval' sysfs attribute. This attribute
+will normalize the range of input values to the device maximum resolution
+values defined in the datasheet as follows:
+
+Resolution Conversion Time Input Range
+ (C/LSB) (msec) (msec)
+------------------------------------------------
+0.5 93.75 0....94
+0.25 187.5 95...187
+0.125 375 188..375
+0.0625 750 376..infinity
+------------------------------------------------
+
+The following examples show how the 'update_interval' attribute can be
+used to change the conversion time:
+
+$ cat update_interval
+750
+$ cat temp1_input
+22062
+$
+$ echo 300 > update_interval
+$ cat update_interval
+375
+$ cat temp1_input
+22125
+$
+$ echo 150 > update_interval
+$ cat update_interval
+188
+$ cat temp1_input
+22250
+$
+$ echo 1 > update_interval
+$ cat update_interval
+94
+$ cat temp1_input
+22000
+$
+$ echo 1000 > update_interval
+$ cat update_interval
+750
+$ cat temp1_input
+22062
+$
+
+As shown, the ds1621 driver automatically adjusts the 'update_interval'
+user input, via a step function. Reading back the 'update_interval' value
+after a write operation provides the conversion time used by the device.
+
+Mathematically, the resolution can be derived from the conversion time
+via the following function:
+
+ g(x) = 0.5 * [minimum_conversion_time/x]
+
+where:
+ -> 'x' = the output from 'update_interval'
+ -> 'g(x)' = the resolution in degrees C per LSB.
+ -> 93.75ms = minimum conversion time
diff --git a/Documentation/hwmon/ds620 b/Documentation/hwmon/ds620
new file mode 100644
index 000000000..1fbe3cd91
--- /dev/null
+++ b/Documentation/hwmon/ds620
@@ -0,0 +1,34 @@
+Kernel driver ds620
+===================
+
+Supported chips:
+ * Dallas Semiconductor DS620
+ Prefix: 'ds620'
+ Datasheet: Publicly available at the Dallas Semiconductor website
+ http://www.dalsemi.com/
+
+Authors:
+ Roland Stigge <stigge@antcom.de>
+ based on ds1621.c by
+ Christian W. Zuckschwerdt <zany@triq.net>
+
+Description
+-----------
+
+The DS620 is a (one instance) digital thermometer and thermostat. It has both
+high and low temperature limits which can be user defined (i.e. programmed
+into non-volatile on-chip registers). Temperature range is -55 degree Celsius
+to +125. Between 0 and 70 degree Celsius, accuracy is 0.5 Kelvin. The value
+returned via sysfs displays post decimal positions.
+
+The thermostat function works as follows: When configured via platform_data
+(struct ds620_platform_data) .pomode == 0 (default), the thermostat output pin
+PO is always low. If .pomode == 1, the thermostat is in PO_LOW mode. I.e., the
+output pin PO becomes active when the temperature falls below temp1_min and
+stays active until the temperature goes above temp1_max.
+
+Likewise, with .pomode == 2, the thermostat is in PO_HIGH mode. I.e., the PO
+output pin becomes active when the temperature goes above temp1_max and stays
+active until the temperature falls below temp1_min.
+
+The PO output pin of the DS620 operates active-low.
diff --git a/Documentation/hwmon/emc1403 b/Documentation/hwmon/emc1403
new file mode 100644
index 000000000..a869b0ef6
--- /dev/null
+++ b/Documentation/hwmon/emc1403
@@ -0,0 +1,59 @@
+Kernel driver emc1403
+=====================
+
+Supported chips:
+ * SMSC / Microchip EMC1402, EMC1412
+ Addresses scanned: I2C 0x18, 0x1c, 0x29, 0x4c, 0x4d, 0x5c
+ Prefix: 'emc1402'
+ Datasheets:
+ http://ww1.microchip.com/downloads/en/DeviceDoc/1412.pdf
+ http://ww1.microchip.com/downloads/en/DeviceDoc/1402.pdf
+ * SMSC / Microchip EMC1403, EMC1404, EMC1413, EMC1414
+ Addresses scanned: I2C 0x18, 0x29, 0x4c, 0x4d
+ Prefix: 'emc1403', 'emc1404'
+ Datasheets:
+ http://ww1.microchip.com/downloads/en/DeviceDoc/1403_1404.pdf
+ http://ww1.microchip.com/downloads/en/DeviceDoc/1413_1414.pdf
+ * SMSC / Microchip EMC1422
+ Addresses scanned: I2C 0x4c
+ Prefix: 'emc1422'
+ Datasheet:
+ http://ww1.microchip.com/downloads/en/DeviceDoc/1422.pdf
+ * SMSC / Microchip EMC1423, EMC1424
+ Addresses scanned: I2C 0x4c
+ Prefix: 'emc1423', 'emc1424'
+ Datasheet:
+ http://ww1.microchip.com/downloads/en/DeviceDoc/1423_1424.pdf
+
+Author:
+ Kalhan Trisal <kalhan.trisal@intel.com
+
+
+Description
+-----------
+
+The Standard Microsystems Corporation (SMSC) / Microchip EMC14xx chips
+contain up to four temperature sensors. EMC14x2 support two sensors
+(one internal, one external). EMC14x3 support three sensors (one internal,
+two external), and EMC14x4 support four sensors (one internal, three
+external).
+
+The chips implement three limits for each sensor: low (tempX_min), high
+(tempX_max) and critical (tempX_crit.) The chips also implement an
+hysteresis mechanism which applies to all limits. The relative difference
+is stored in a single register on the chip, which means that the relative
+difference between the limit and its hysteresis is always the same for
+all three limits.
+
+This implementation detail implies the following:
+* When setting a limit, its hysteresis will automatically follow, the
+ difference staying unchanged. For example, if the old critical limit
+ was 80 degrees C, and the hysteresis was 75 degrees C, and you change
+ the critical limit to 90 degrees C, then the hysteresis will
+ automatically change to 85 degrees C.
+* The hysteresis values can't be set independently. We decided to make
+ only temp1_crit_hyst writable, while all other hysteresis attributes
+ are read-only. Setting temp1_crit_hyst writes the difference between
+ temp1_crit_hyst and temp1_crit into the chip, and the same relative
+ hysteresis applies automatically to all other limits.
+* The limits should be set before the hysteresis.
diff --git a/Documentation/hwmon/emc2103 b/Documentation/hwmon/emc2103
new file mode 100644
index 000000000..a12b2c127
--- /dev/null
+++ b/Documentation/hwmon/emc2103
@@ -0,0 +1,33 @@
+Kernel driver emc2103
+======================
+
+Supported chips:
+ * SMSC EMC2103
+ Addresses scanned: I2C 0x2e
+ Prefix: 'emc2103'
+ Datasheet: Not public
+
+Authors:
+ Steve Glendinning <steve.glendinning@smsc.com>
+
+Description
+-----------
+
+The Standard Microsystems Corporation (SMSC) EMC2103 chips
+contain up to 4 temperature sensors and a single fan controller.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 1, the lowest
+representable value is 480 RPM.
+
+This driver supports RPM based control, to use this a fan target
+should be written to fan1_target and pwm1_enable should be set to 3.
+
+The 2103-2 and 2103-4 variants have a third temperature sensor, which can
+be connected to two anti-parallel diodes. These values can be read
+as temp3 and temp4. If only one diode is attached to this channel, temp4
+will show as "fault". The module parameter "apd=0" can be used to suppress
+this 4th channel when anti-parallel diodes are not fitted.
diff --git a/Documentation/hwmon/emc6w201 b/Documentation/hwmon/emc6w201
new file mode 100644
index 000000000..757629b12
--- /dev/null
+++ b/Documentation/hwmon/emc6w201
@@ -0,0 +1,42 @@
+Kernel driver emc6w201
+======================
+
+Supported chips:
+ * SMSC EMC6W201
+ Prefix: 'emc6w201'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: Not public
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+
+Description
+-----------
+
+From the datasheet:
+
+"The EMC6W201 is an environmental monitoring device with automatic fan
+control capability and enhanced system acoustics for noise suppression.
+This ACPI compliant device provides hardware monitoring for up to six
+voltages (including its own VCC) and five external thermal sensors,
+measures the speed of up to five fans, and controls the speed of
+multiple DC fans using three Pulse Width Modulator (PWM) outputs. Note
+that it is possible to control more than three fans by connecting two
+fans to one PWM output. The EMC6W201 will be available in a 36-pin
+QFN package."
+
+The device is functionally close to the EMC6D100 series, but is
+register-incompatible.
+
+The driver currently only supports the monitoring of the voltages,
+temperatures and fan speeds. Limits can be changed. Alarms are not
+supported, and neither is fan speed control.
+
+
+Known Systems With EMC6W201
+---------------------------
+
+The EMC6W201 is a rare device, only found on a few systems, made in
+2005 and 2006. Known systems with this device:
+* Dell Precision 670 workstation
+* Gigabyte 2CEWH mainboard
diff --git a/Documentation/hwmon/f71805f b/Documentation/hwmon/f71805f
new file mode 100644
index 000000000..48a356084
--- /dev/null
+++ b/Documentation/hwmon/f71805f
@@ -0,0 +1,167 @@
+Kernel driver f71805f
+=====================
+
+Supported chips:
+ * Fintek F71805F/FG
+ Prefix: 'f71805f'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71806F/FG
+ Prefix: 'f71872f'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71872F/FG
+ Prefix: 'f71872f'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+Thanks to Denis Kieft from Barracuda Networks for the donation of a
+test system (custom Jetway K8M8MS motherboard, with CPU and RAM) and
+for providing initial documentation.
+
+Thanks to Kris Chen and Aaron Huang from Fintek for answering technical
+questions and providing additional documentation.
+
+Thanks to Chris Lin from Jetway for providing wiring schematics and
+answering technical questions.
+
+
+Description
+-----------
+
+The Fintek F71805F/FG Super I/O chip includes complete hardware monitoring
+capabilities. It can monitor up to 9 voltages (counting its own power
+source), 3 fans and 3 temperature sensors.
+
+This chip also has fan controlling features, using either DC or PWM, in
+three different modes (one manual, two automatic).
+
+The Fintek F71872F/FG Super I/O chip is almost the same, with two
+additional internal voltages monitored (VSB and battery). It also features
+6 VID inputs. The VID inputs are not yet supported by this driver.
+
+The Fintek F71806F/FG Super-I/O chip is essentially the same as the
+F71872F/FG, and is undistinguishable therefrom.
+
+The driver assumes that no more than one chip is present, which seems
+reasonable.
+
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by an 8-bit ADC with a LSB of 8 mV. The supported
+range is thus from 0 to 2.040 V. Voltage values outside of this range
+need external resistors. An exception is in0, which is used to monitor
+the chip's own power source (+3.3V), and is divided internally by a
+factor 2. For the F71872F/FG, in9 (VSB) and in10 (battery) are also
+divided internally by a factor 2.
+
+The two LSB of the voltage limit registers are not used (always 0), so
+you can only set the limits in steps of 32 mV (before scaling).
+
+The wirings and resistor values suggested by Fintek are as follow:
+
+ pin expected
+ name use R1 R2 divider raw val.
+
+in0 VCC VCC3.3V int. int. 2.00 1.65 V
+in1 VIN1 VTT1.2V 10K - 1.00 1.20 V
+in2 VIN2 VRAM 100K 100K 2.00 ~1.25 V (1)
+in3 VIN3 VCHIPSET 47K 100K 1.47 2.24 V (2)
+in4 VIN4 VCC5V 200K 47K 5.25 0.95 V
+in5 VIN5 +12V 200K 20K 11.00 1.05 V
+in6 VIN6 VCC1.5V 10K - 1.00 1.50 V
+in7 VIN7 VCORE 10K - 1.00 ~1.40 V (1)
+in8 VIN8 VSB5V 200K 47K 1.00 0.95 V
+in10 VSB VSB3.3V int. int. 2.00 1.65 V (3)
+in9 VBAT VBATTERY int. int. 2.00 1.50 V (3)
+
+(1) Depends on your hardware setup.
+(2) Obviously not correct, swapping R1 and R2 would make more sense.
+(3) F71872F/FG only.
+
+These values can be used as hints at best, as motherboard manufacturers
+are free to use a completely different setup. As a matter of fact, the
+Jetway K8M8MS uses a significantly different setup. You will have to
+find out documentation about your own motherboard, and edit sensors.conf
+accordingly.
+
+Each voltage measured has associated low and high limits, each of which
+triggers an alarm when crossed.
+
+
+Fan Monitoring
+--------------
+
+Fan rotation speeds are reported as 12-bit values from a gated clock
+signal. Speeds down to 366 RPM can be measured. There is no theoretical
+high limit, but values over 6000 RPM seem to cause problem. The effective
+resolution is much lower than you would expect, the step between different
+register values being 10 rather than 1.
+
+The chip assumes 2 pulse-per-revolution fans.
+
+An alarm is triggered if the rotation speed drops below a programmable
+limit or is too low to be measured.
+
+
+Temperature Monitoring
+----------------------
+
+Temperatures are reported in degrees Celsius. Each temperature measured
+has a high limit, those crossing triggers an alarm. There is an associated
+hysteresis value, below which the temperature has to drop before the
+alarm is cleared.
+
+All temperature channels are external, there is no embedded temperature
+sensor. Each channel can be used for connecting either a thermal diode
+or a thermistor. The driver reports the currently selected mode, but
+doesn't allow changing it. In theory, the BIOS should have configured
+everything properly.
+
+
+Fan Control
+-----------
+
+Both PWM (pulse-width modulation) and DC fan speed control methods are
+supported. The right one to use depends on external circuitry on the
+motherboard, so the driver assumes that the BIOS set the method
+properly. The driver will report the method, but won't let you change
+it.
+
+When the PWM method is used, you can select the operating frequency,
+from 187.5 kHz (default) to 31 Hz. The best frequency depends on the
+fan model. As a rule of thumb, lower frequencies seem to give better
+control, but may generate annoying high-pitch noise. So a frequency just
+above the audible range, such as 25 kHz, may be a good choice; if this
+doesn't give you good linear control, try reducing it. Fintek recommends
+not going below 1 kHz, as the fan tachometers get confused by lower
+frequencies as well.
+
+When the DC method is used, Fintek recommends not going below 5 V, which
+corresponds to a pwm value of 106 for the driver. The driver doesn't
+enforce this limit though.
+
+Three different fan control modes are supported; the mode number is written
+to the pwm<n>_enable file.
+
+* 1: Manual mode
+ You ask for a specific PWM duty cycle or DC voltage by writing to the
+ pwm<n> file.
+
+* 2: Temperature mode
+ You define 3 temperature/fan speed trip points using the
+ pwm<n>_auto_point<m>_temp and _fan files. These define a staircase
+ relationship between temperature and fan speed with two additional points
+ interpolated between the values that you define. When the temperature
+ is below auto_point1_temp the fan is switched off.
+
+* 3: Fan speed mode
+ You ask for a specific fan speed by writing to the fan<n>_target file.
+
+Both of the automatic modes require that pwm1 corresponds to fan1, pwm2 to
+fan2 and pwm3 to fan3. Temperature mode also requires that temp1 corresponds
+to pwm1 and fan1, etc.
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
new file mode 100644
index 000000000..de91c0db5
--- /dev/null
+++ b/Documentation/hwmon/f71882fg
@@ -0,0 +1,138 @@
+Kernel driver f71882fg
+======================
+
+Supported chips:
+ * Fintek F71808E
+ Prefix: 'f71808e'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
+ * Fintek F71808A
+ Prefix: 'f71808a'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
+ * Fintek F71858FG
+ Prefix: 'f71858fg'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71862FG and F71863FG
+ Prefix: 'f71862fg'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71869F and F71869E
+ Prefix: 'f71869'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71869A
+ Prefix: 'f71869a'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
+ * Fintek F71882FG and F71883FG
+ Prefix: 'f71882fg'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71889FG
+ Prefix: 'f71889fg'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+ * Fintek F71889ED
+ Prefix: 'f71889ed'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Should become available on the Fintek website soon
+ * Fintek F71889A
+ Prefix: 'f71889a'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Should become available on the Fintek website soon
+ * Fintek F8000
+ Prefix: 'f8000'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
+ * Fintek F81801U
+ Prefix: 'f71889fg'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Not public
+ Note: This is the 64-pin variant of the F71889FG, they have the
+ same device ID and are fully compatible as far as hardware
+ monitoring is concerned.
+ * Fintek F81865F
+ Prefix: 'f81865f'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Available from the Fintek website
+
+Author: Hans de Goede <hdegoede@redhat.com>
+
+
+Description
+-----------
+
+Fintek F718xx/F8000 Super I/O chips include complete hardware monitoring
+capabilities. They can monitor up to 9 voltages, 4 fans and 3 temperature
+sensors.
+
+These chips also have fan controlling features, using either DC or PWM, in
+three different modes (one manual, two automatic).
+
+The driver assumes that no more than one chip is present, which seems
+reasonable.
+
+
+Monitoring
+----------
+
+The Voltage, Fan and Temperature Monitoring uses the standard sysfs
+interface as documented in sysfs-interface, without any exceptions.
+
+
+Fan Control
+-----------
+
+Both PWM (pulse-width modulation) and DC fan speed control methods are
+supported. The right one to use depends on external circuitry on the
+motherboard, so the driver assumes that the BIOS set the method
+properly.
+
+Note that the lowest numbered temperature zone trip point corresponds to
+to the border between the highest and one but highest temperature zones, and
+vica versa. So the temperature zone trip points 1-4 (or 1-2) go from high temp
+to low temp! This is how things are implemented in the IC, and the driver
+mimicks this.
+
+There are 2 modes to specify the speed of the fan, PWM duty cycle (or DC
+voltage) mode, where 0-100% duty cycle (0-100% of 12V) is specified. And RPM
+mode where the actual RPM of the fan (as measured) is controlled and the speed
+gets specified as 0-100% of the fan#_full_speed file.
+
+Since both modes work in a 0-100% (mapped to 0-255) scale, there isn't a
+whole lot of a difference when modifying fan control settings. The only
+important difference is that in RPM mode the 0-100% controls the fan speed
+between 0-100% of fan#_full_speed. It is assumed that if the BIOS programs
+RPM mode, it will also set fan#_full_speed properly, if it does not then
+fan control will not work properly, unless you set a sane fan#_full_speed
+value yourself.
+
+Switching between these modes requires re-initializing a whole bunch of
+registers, so the mode which the BIOS has set is kept. The mode is
+printed when loading the driver.
+
+Three different fan control modes are supported; the mode number is written
+to the pwm#_enable file. Note that not all modes are supported on all
+chips, and some modes may only be available in RPM / PWM mode.
+Writing an unsupported mode will result in an invalid parameter error.
+
+* 1: Manual mode
+ You ask for a specific PWM duty cycle / DC voltage or a specific % of
+ fan#_full_speed by writing to the pwm# file. This mode is only
+ available on the F71858FG / F8000 if the fan channel is in RPM mode.
+
+* 2: Normal auto mode
+ You can define a number of temperature/fan speed trip points, which % the
+ fan should run at at this temp and which temp a fan should follow using the
+ standard sysfs interface. The number and type of trip points is chip
+ depended, see which files are available in sysfs.
+ Fan/PWM channel 3 of the F8000 is always in this mode!
+
+* 3: Thermostat mode (Only available on the F8000 when in duty cycle mode)
+ The fan speed is regulated to keep the temp the fan is mapped to between
+ temp#_auto_point2_temp and temp#_auto_point3_temp.
+
+All of the automatic modes require that pwm1 corresponds to fan1, pwm2 to
+fan2 and pwm3 to fan3.
diff --git a/Documentation/hwmon/fam15h_power b/Documentation/hwmon/fam15h_power
new file mode 100644
index 000000000..80654813d
--- /dev/null
+++ b/Documentation/hwmon/fam15h_power
@@ -0,0 +1,37 @@
+Kernel driver fam15h_power
+==========================
+
+Supported chips:
+* AMD Family 15h Processors
+
+ Prefix: 'fam15h_power'
+ Addresses scanned: PCI space
+ Datasheets:
+ BIOS and Kernel Developer's Guide (BKDG) For AMD Family 15h Processors
+ (not yet published)
+
+Author: Andreas Herrmann <herrmann.der.user@googlemail.com>
+
+Description
+-----------
+
+This driver permits reading of registers providing power information
+of AMD Family 15h processors.
+
+For AMD Family 15h processors the following power values can be
+calculated using different processor northbridge function registers:
+
+* BasePwrWatts: Specifies in watts the maximum amount of power
+ consumed by the processor for NB and logic external to the core.
+* ProcessorPwrWatts: Specifies in watts the maximum amount of power
+ the processor can support.
+* CurrPwrWatts: Specifies in watts the current amount of power being
+ consumed by the processor.
+
+This driver provides ProcessorPwrWatts and CurrPwrWatts:
+* power1_crit (ProcessorPwrWatts)
+* power1_input (CurrPwrWatts)
+
+On multi-node processors the calculated value is for the entire
+package and not for a single node. Thus the driver creates sysfs
+attributes only for internal node0 of a multi-node processor.
diff --git a/Documentation/hwmon/g760a b/Documentation/hwmon/g760a
new file mode 100644
index 000000000..cfc894537
--- /dev/null
+++ b/Documentation/hwmon/g760a
@@ -0,0 +1,36 @@
+Kernel driver g760a
+===================
+
+Supported chips:
+ * Global Mixed-mode Technology Inc. G760A
+ Prefix: 'g760a'
+ Datasheet: Publicly available at the GMT website
+ http://www.gmt.com.tw/product/datasheet/EDS-760A.pdf
+
+Author: Herbert Valerio Riedel <hvr@gnu.org>
+
+Description
+-----------
+
+The GMT G760A Fan Speed PWM Controller is connected directly to a fan
+and performs closed-loop control of the fan speed.
+
+The fan speed is programmed by setting the period via 'pwm1' of two
+consecutive speed pulses. The period is defined in terms of clock
+cycle counts of an assumed 32kHz clock source.
+
+Setting a period of 0 stops the fan; setting the period to 255 sets
+fan to maximum speed.
+
+The measured fan rotation speed returned via 'fan1_input' is derived
+from the measured speed pulse period by assuming again a 32kHz clock
+source and a 2 pulse-per-revolution fan.
+
+The 'alarms' file provides access to the two alarm bits provided by
+the G760A chip's status register: Bit 0 is set when the actual fan
+speed differs more than 20% with respect to the programmed fan speed;
+bit 1 is set when fan speed is below 1920 RPM.
+
+The g760a driver will not update its values more frequently than every
+other second; reading them more often will do no harm, but will return
+'old' values.
diff --git a/Documentation/hwmon/g762 b/Documentation/hwmon/g762
new file mode 100644
index 000000000..923db9c5b
--- /dev/null
+++ b/Documentation/hwmon/g762
@@ -0,0 +1,65 @@
+Kernel driver g762
+==================
+
+The GMT G762 Fan Speed PWM Controller is connected directly to a fan
+and performs closed-loop or open-loop control of the fan speed. Two
+modes - PWM or DC - are supported by the device.
+
+For additional information, a detailed datasheet is available at
+http://natisbad.org/NAS/ref/GMT_EDS-762_763-080710-0.2.pdf. sysfs
+bindings are described in Documentation/hwmon/sysfs-interface.
+
+The following entries are available to the user in a subdirectory of
+/sys/bus/i2c/drivers/g762/ to control the operation of the device.
+This can be done manually using the following entries but is usually
+done via a userland daemon like fancontrol.
+
+Note that those entries do not provide ways to setup the specific
+hardware characteristics of the system (reference clock, pulses per
+fan revolution, ...); Those can be modified via devicetree bindings
+documented in Documentation/devicetree/bindings/hwmon/g762.txt or
+using a specific platform_data structure in board initialization
+file (see include/linux/platform_data/g762.h).
+
+ fan1_target: set desired fan speed. This only makes sense in closed-loop
+ fan speed control (i.e. when pwm1_enable is set to 2).
+
+ fan1_input: provide current fan rotation value in RPM as reported by
+ the fan to the device.
+
+ fan1_div: fan clock divisor. Supported value are 1, 2, 4 and 8.
+
+ fan1_pulses: number of pulses per fan revolution. Supported values
+ are 2 and 4.
+
+ fan1_fault: reports fan failure, i.e. no transition on fan gear pin for
+ about 0.7s (if the fan is not voluntarily set off).
+
+ fan1_alarm: in closed-loop control mode, if fan RPM value is 25% out
+ of the programmed value for over 6 seconds 'fan1_alarm' is
+ set to 1.
+
+ pwm1_enable: set current fan speed control mode i.e. 1 for manual fan
+ speed control (open-loop) via pwm1 described below, 2 for
+ automatic fan speed control (closed-loop) via fan1_target
+ above.
+
+ pwm1_mode: set or get fan driving mode: 1 for PWM mode, 0 for DC mode.
+
+ pwm1: get or set PWM fan control value in open-loop mode. This is an
+ integer value between 0 and 255. 0 stops the fan, 255 makes
+ it run at full speed.
+
+Both in PWM mode ('pwm1_mode' set to 1) and DC mode ('pwm1_mode' set to 0),
+when current fan speed control mode is open-loop ('pwm1_enable' set to 1),
+the fan speed is programmed by setting a value between 0 and 255 via 'pwm1'
+entry (0 stops the fan, 255 makes it run at full speed). In closed-loop mode
+('pwm1_enable' set to 2), the expected rotation speed in RPM can be passed to
+the chip via 'fan1_target'. In closed-loop mode, the target speed is compared
+with current speed (available via 'fan1_input') by the device and a feedback
+is performed to match that target value. The fan speed value is computed
+based on the parameters associated with the physical characteristics of the
+system: a reference clock source frequency, a number of pulses per fan
+revolution, etc.
+
+Note that the driver will update its values at most once per second.
diff --git a/Documentation/hwmon/gl518sm b/Documentation/hwmon/gl518sm
new file mode 100644
index 000000000..494bb55b6
--- /dev/null
+++ b/Documentation/hwmon/gl518sm
@@ -0,0 +1,73 @@
+Kernel driver gl518sm
+=====================
+
+Supported chips:
+ * Genesys Logic GL518SM release 0x00
+ Prefix: 'gl518sm'
+ Addresses scanned: I2C 0x2c and 0x2d
+ * Genesys Logic GL518SM release 0x80
+ Prefix: 'gl518sm'
+ Addresses scanned: I2C 0x2c and 0x2d
+ Datasheet: http://www.genesyslogic.com/
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Kyösti Mälkki <kmalkki@cc.hut.fi>
+ Hong-Gunn Chew <hglinux@gunnet.org>
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+IMPORTANT:
+
+For the revision 0x00 chip, the in0, in1, and in2 values (+5V, +3V,
+and +12V) CANNOT be read. This is a limitation of the chip, not the driver.
+
+This driver supports the Genesys Logic GL518SM chip. There are at least
+two revision of this chip, which we call revision 0x00 and 0x80. Revision
+0x80 chips support the reading of all voltages and revision 0x00 only
+for VIN3.
+
+The GL518SM implements one temperature sensor, two fan rotation speed
+sensors, and four voltage sensors. It can report alarms through the
+computer speakers.
+
+Temperatures are measured in degrees Celsius. An alarm goes off while the
+temperature is above the over temperature limit, and has not yet dropped
+below the hysteresis limit. The alarm always reflects the current
+situation. Measurements are guaranteed between -10 degrees and +110
+degrees, with a accuracy of +/-3 degrees.
+
+Rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. In
+case when you have selected to turn fan1 off, no fan1 alarm is triggered.
+
+Fan readings can be divided by a programmable divider (1, 2, 4 or 8) to
+give the readings more range or accuracy. Not all RPM values can
+accurately be represented, so some rounding is done. With a divider
+of 2, the lowest representable value is around 1900 RPM.
+
+Voltage sensors (also known as VIN sensors) report their values in volts.
+An alarm is triggered if the voltage has crossed a programmable minimum or
+maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. The VDD input
+measures voltages between 0.000 and 5.865 volt, with a resolution of 0.023
+volt. The other inputs measure voltages between 0.000 and 4.845 volt, with
+a resolution of 0.019 volt. Note that revision 0x00 chips do not support
+reading the current voltage of any input except for VIN3; limit setting and
+alarms work fine, though.
+
+When an alarm is triggered, you can be warned by a beeping signal through your
+computer speaker. It is possible to enable all beeping globally, or only the
+beeping for some alarms.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once (except for temperature alarms). This means that the
+cause for the alarm may already have disappeared! Note that in the current
+implementation, all hardware registers are read whenever any data is read
+(unless it is less than 1.5 seconds since the last update). This means that
+you can easily miss once-only alarms.
+
+The GL518SM only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
diff --git a/Documentation/hwmon/hih6130 b/Documentation/hwmon/hih6130
new file mode 100644
index 000000000..73dae918e
--- /dev/null
+++ b/Documentation/hwmon/hih6130
@@ -0,0 +1,37 @@
+Kernel driver hih6130
+=====================
+
+Supported chips:
+ * Honeywell HIH-6130 / HIH-6131
+ Prefix: 'hih6130'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Honeywell website
+ http://sensing.honeywell.com/index.php?ci_id=3106&la_id=1&defId=44872
+
+Author:
+ Iain Paton <ipaton0@gmail.com>
+
+Description
+-----------
+
+The HIH-6130 & HIH-6131 are humidity and temperature sensors in a SO8 package.
+The difference between the two devices is that the HIH-6131 has a condensation
+filter.
+
+The devices communicate with the I2C protocol. All sensors are set to the same
+I2C address 0x27 by default, so an entry with I2C_BOARD_INFO("hih6130", 0x27)
+can be used in the board setup code.
+
+Please see Documentation/i2c/instantiating-devices for details on how to
+instantiate I2C devices.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input
+
+Notes
+-----
+
+Command mode and alarms are not currently supported.
diff --git a/Documentation/hwmon/htu21 b/Documentation/hwmon/htu21
new file mode 100644
index 000000000..f39a215fb
--- /dev/null
+++ b/Documentation/hwmon/htu21
@@ -0,0 +1,46 @@
+Kernel driver htu21
+===================
+
+Supported chips:
+ * Measurement Specialties HTU21D
+ Prefix: 'htu21'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Measurement Specialties website
+ http://www.meas-spec.com/downloads/HTU21D.pdf
+
+
+Author:
+ William Markezana <william.markezana@meas-spec.com>
+
+Description
+-----------
+
+The HTU21D is a humidity and temperature sensor in a DFN package of
+only 3 x 3 mm footprint and 0.9 mm height.
+
+The devices communicate with the I2C protocol. All sensors are set to the
+same I2C address 0x40, so an entry with I2C_BOARD_INFO("htu21", 0x40) can
+be used in the board setup code.
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices
+for details.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input
+
+Notes
+-----
+
+The driver uses the default resolution settings of 12 bit for humidity and 14
+bit for temperature, which results in typical measurement times of 11 ms for
+humidity and 44 ms for temperature. To keep self heating below 0.1 degree
+Celsius, the device should not be active for more than 10% of the time. For
+this reason, the driver performs no more than two measurements per second and
+reports cached information if polled more frequently.
+
+Different resolutions, the on-chip heater, using the CRC checksum and reading
+the serial number are not supported yet.
diff --git a/Documentation/hwmon/hwmon-kernel-api.txt b/Documentation/hwmon/hwmon-kernel-api.txt
new file mode 100644
index 000000000..2ecdbfc85
--- /dev/null
+++ b/Documentation/hwmon/hwmon-kernel-api.txt
@@ -0,0 +1,107 @@
+The Linux Hardware Monitoring kernel API.
+=========================================
+
+Guenter Roeck
+
+Introduction
+------------
+
+This document describes the API that can be used by hardware monitoring
+drivers that want to use the hardware monitoring framework.
+
+This document does not describe what a hardware monitoring (hwmon) Driver or
+Device is. It also does not describe the API which can be used by user space
+to communicate with a hardware monitoring device. If you want to know this
+then please read the following file: Documentation/hwmon/sysfs-interface.
+
+For additional guidelines on how to write and improve hwmon drivers, please
+also read Documentation/hwmon/submitting-patches.
+
+The API
+-------
+Each hardware monitoring driver must #include <linux/hwmon.h> and, in most
+cases, <linux/hwmon-sysfs.h>. linux/hwmon.h declares the following
+register/unregister functions:
+
+struct device *hwmon_device_register(struct device *dev);
+struct device *
+hwmon_device_register_with_groups(struct device *dev, const char *name,
+ void *drvdata,
+ const struct attribute_group **groups);
+
+struct device *
+devm_hwmon_device_register_with_groups(struct device *dev,
+ const char *name, void *drvdata,
+ const struct attribute_group **groups);
+
+void hwmon_device_unregister(struct device *dev);
+void devm_hwmon_device_unregister(struct device *dev);
+
+hwmon_device_register registers a hardware monitoring device. The parameter
+of this function is a pointer to the parent device.
+This function returns a pointer to the newly created hardware monitoring device
+or PTR_ERR for failure. If this registration function is used, hardware
+monitoring sysfs attributes are expected to have been created and attached to
+the parent device prior to calling hwmon_device_register. A name attribute must
+have been created by the caller.
+
+hwmon_device_register_with_groups is similar to hwmon_device_register. However,
+it has additional parameters. The name parameter is a pointer to the hwmon
+device name. The registration function wil create a name sysfs attribute
+pointing to this name. The drvdata parameter is the pointer to the local
+driver data. hwmon_device_register_with_groups will attach this pointer
+to the newly allocated hwmon device. The pointer can be retrieved by the driver
+using dev_get_drvdata() on the hwmon device pointer. The groups parameter is
+a pointer to a list of sysfs attribute groups. The list must be NULL terminated.
+hwmon_device_register_with_groups creates the hwmon device with name attribute
+as well as all sysfs attributes attached to the hwmon device.
+
+devm_hwmon_device_register_with_groups is similar to
+hwmon_device_register_with_groups. However, it is device managed, meaning the
+hwmon device does not have to be removed explicitly by the removal function.
+
+hwmon_device_unregister deregisters a registered hardware monitoring device.
+The parameter of this function is the pointer to the registered hardware
+monitoring device structure. This function must be called from the driver
+remove function if the hardware monitoring device was registered with
+hwmon_device_register or with hwmon_device_register_with_groups.
+
+devm_hwmon_device_unregister does not normally have to be called. It is only
+needed for error handling, and only needed if the driver probe fails after
+the call to devm_hwmon_device_register_with_groups.
+
+The header file linux/hwmon-sysfs.h provides a number of useful macros to
+declare and use hardware monitoring sysfs attributes.
+
+In many cases, you can use the exsting define DEVICE_ATTR to declare such
+attributes. This is feasible if an attribute has no additional context. However,
+in many cases there will be additional information such as a sensor index which
+will need to be passed to the sysfs attribute handling function.
+
+SENSOR_DEVICE_ATTR and SENSOR_DEVICE_ATTR_2 can be used to define attributes
+which need such additional context information. SENSOR_DEVICE_ATTR requires
+one additional argument, SENSOR_DEVICE_ATTR_2 requires two.
+
+SENSOR_DEVICE_ATTR defines a struct sensor_device_attribute variable.
+This structure has the following fields.
+
+struct sensor_device_attribute {
+ struct device_attribute dev_attr;
+ int index;
+};
+
+You can use to_sensor_dev_attr to get the pointer to this structure from the
+attribute read or write function. Its parameter is the device to which the
+attribute is attached.
+
+SENSOR_DEVICE_ATTR_2 defines a struct sensor_device_attribute_2 variable,
+which is defined as follows.
+
+struct sensor_device_attribute_2 {
+ struct device_attribute dev_attr;
+ u8 index;
+ u8 nr;
+};
+
+Use to_sensor_dev_attr_2 to get the pointer to this structure. Its parameter
+is the device to which the attribute is attached.
diff --git a/Documentation/hwmon/ibmaem b/Documentation/hwmon/ibmaem
new file mode 100644
index 000000000..1e0d59e00
--- /dev/null
+++ b/Documentation/hwmon/ibmaem
@@ -0,0 +1,38 @@
+Kernel driver ibmaem
+======================
+
+This driver talks to the IBM Systems Director Active Energy Manager, known
+henceforth as AEM.
+
+Supported systems:
+ * Any recent IBM System X server with AEM support.
+ This includes the x3350, x3550, x3650, x3655, x3755, x3850 M2,
+ x3950 M2, and certain HC10/HS2x/LS2x/QS2x blades. The IPMI host interface
+ driver ("ipmi-si") needs to be loaded for this driver to do anything.
+ Prefix: 'ibmaem'
+ Datasheet: Not available
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements sensor reading support for the energy and power meters
+available on various IBM System X hardware through the BMC. All sensor banks
+will be exported as platform devices; this driver can talk to both v1 and v2
+interfaces. This driver is completely separate from the older ibmpex driver.
+
+The v1 AEM interface has a simple set of features to monitor energy use. There
+is a register that displays an estimate of raw energy consumption since the
+last BMC reset, and a power sensor that returns average power use over a
+configurable interval.
+
+The v2 AEM interface is a bit more sophisticated, being able to present a wider
+range of energy and power use registers, the power cap as set by the AEM
+software, and temperature sensors.
+
+Special Features
+----------------
+
+The "power_cap" value displays the current system power cap, as set by the AEM
+software. Setting the power cap from the host is not currently supported.
diff --git a/Documentation/hwmon/ibmpowernv b/Documentation/hwmon/ibmpowernv
new file mode 100644
index 000000000..8826ba29d
--- /dev/null
+++ b/Documentation/hwmon/ibmpowernv
@@ -0,0 +1,41 @@
+Kernel Driver IBMPOWERNV
+========================
+
+Supported systems:
+ * Any recent IBM P servers based on POWERNV platform
+
+Author: Neelesh Gupta
+
+Description
+-----------
+
+This driver implements reading the platform sensors data like temperature/fan/
+voltage/power for 'POWERNV' platform.
+
+The driver uses the platform device infrastructure. It probes the device tree
+for sensor devices during the __init phase and registers them with the 'hwmon'.
+'hwmon' populates the 'sysfs' tree having attribute files, each for a given
+sensor type and its attribute data.
+
+All the nodes in the DT appear under "/ibm,opal/sensors" and each valid node in
+the DT maps to an attribute file in 'sysfs'. The node exports unique 'sensor-id'
+which the driver uses to make an OPAL call to the firmware.
+
+Usage notes
+-----------
+The driver is built statically with the kernel by enabling the config
+CONFIG_SENSORS_IBMPOWERNV. It can also be built as module 'ibmpowernv'.
+
+Sysfs attributes
+----------------
+
+fanX_input Measured RPM value.
+fanX_min Threshold RPM for alert generation.
+fanX_fault 0: No fail condition
+ 1: Failing fan
+tempX_input Measured ambient temperature.
+tempX_max Threshold ambient temperature for alert generation.
+inX_input Measured power supply voltage
+inX_fault 0: No fail condition.
+ 1: Failing power supply.
+power1_input System power consumption (microWatt)
diff --git a/Documentation/hwmon/ina209 b/Documentation/hwmon/ina209
new file mode 100644
index 000000000..672501de4
--- /dev/null
+++ b/Documentation/hwmon/ina209
@@ -0,0 +1,93 @@
+Kernel driver ina209
+=====================
+
+Supported chips:
+ * Burr-Brown / Texas Instruments INA209
+ Prefix: 'ina209'
+ Addresses scanned: -
+ Datasheet:
+ http://www.ti.com/lit/gpn/ina209
+
+Author: Paul Hays <Paul.Hays@cattail.ca>
+Author: Ira W. Snyder <iws@ovro.caltech.edu>
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+The TI / Burr-Brown INA209 monitors voltage, current, and power on the high side
+of a D.C. power supply. It can perform measurements and calculations in the
+background to supply readings at any time. It includes a programmable
+calibration multiplier to scale the displayed current and power values.
+
+
+Sysfs entries
+-------------
+
+The INA209 chip is highly configurable both via hardwiring and via
+the I2C bus. See the datasheet for details.
+
+This tries to expose most monitoring features of the hardware via
+sysfs. It does not support every feature of this chip.
+
+
+in0_input shunt voltage (mV)
+in0_input_highest shunt voltage historical maximum reading (mV)
+in0_input_lowest shunt voltage historical minimum reading (mV)
+in0_reset_history reset shunt voltage history
+in0_max shunt voltage max alarm limit (mV)
+in0_min shunt voltage min alarm limit (mV)
+in0_crit_max shunt voltage crit max alarm limit (mV)
+in0_crit_min shunt voltage crit min alarm limit (mV)
+in0_max_alarm shunt voltage max alarm limit exceeded
+in0_min_alarm shunt voltage min alarm limit exceeded
+in0_crit_max_alarm shunt voltage crit max alarm limit exceeded
+in0_crit_min_alarm shunt voltage crit min alarm limit exceeded
+
+in1_input bus voltage (mV)
+in1_input_highest bus voltage historical maximum reading (mV)
+in1_input_lowest bus voltage historical minimum reading (mV)
+in1_reset_history reset bus voltage history
+in1_max bus voltage max alarm limit (mV)
+in1_min bus voltage min alarm limit (mV)
+in1_crit_max bus voltage crit max alarm limit (mV)
+in1_crit_min bus voltage crit min alarm limit (mV)
+in1_max_alarm bus voltage max alarm limit exceeded
+in1_min_alarm bus voltage min alarm limit exceeded
+in1_crit_max_alarm bus voltage crit max alarm limit exceeded
+in1_crit_min_alarm bus voltage crit min alarm limit exceeded
+
+power1_input power measurement (uW)
+power1_input_highest power historical maximum reading (uW)
+power1_reset_history reset power history
+power1_max power max alarm limit (uW)
+power1_crit power crit alarm limit (uW)
+power1_max_alarm power max alarm limit exceeded
+power1_crit_alarm power crit alarm limit exceeded
+
+curr1_input current measurement (mA)
+
+update_interval data conversion time; affects number of samples used
+ to average results for shunt and bus voltages.
+
+General Remarks
+---------------
+
+The power and current registers in this chip require that the calibration
+register is programmed correctly before they are used. Normally this is expected
+to be done in the BIOS. In the absence of BIOS programming, the shunt resistor
+voltage can be provided using platform data. The driver uses platform data from
+the ina2xx driver for this purpose. If calibration register data is not provided
+via platform data, the driver checks if the calibration register has been
+programmed (ie has a value not equal to zero). If so, this value is retained.
+Otherwise, a default value reflecting a shunt resistor value of 10 mOhm is
+programmed into the calibration register.
+
+
+Output Pins
+-----------
+
+Output pin programming is a board feature which depends on the BIOS. It is
+outside the scope of a hardware monitoring driver to enable or disable output
+pins.
diff --git a/Documentation/hwmon/ina2xx b/Documentation/hwmon/ina2xx
new file mode 100644
index 000000000..cfd31d94c
--- /dev/null
+++ b/Documentation/hwmon/ina2xx
@@ -0,0 +1,64 @@
+Kernel driver ina2xx
+====================
+
+Supported chips:
+ * Texas Instruments INA219
+ Prefix: 'ina219'
+ Addresses: I2C 0x40 - 0x4f
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/
+
+ * Texas Instruments INA220
+ Prefix: 'ina220'
+ Addresses: I2C 0x40 - 0x4f
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/
+
+ * Texas Instruments INA226
+ Prefix: 'ina226'
+ Addresses: I2C 0x40 - 0x4f
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/
+
+ * Texas Instruments INA230
+ Prefix: 'ina230'
+ Addresses: I2C 0x40 - 0x4f
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/
+
+ * Texas Instruments INA231
+ Prefix: 'ina231'
+ Addresses: I2C 0x40 - 0x4f
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/
+
+Author: Lothar Felten <l-felten@ti.com>
+
+Description
+-----------
+
+The INA219 is a high-side current shunt and power monitor with an I2C
+interface. The INA219 monitors both shunt drop and supply voltage, with
+programmable conversion times and filtering.
+
+The INA220 is a high or low side current shunt and power monitor with an I2C
+interface. The INA220 monitors both shunt drop and supply voltage.
+
+The INA226 is a current shunt and power monitor with an I2C interface.
+The INA226 monitors both a shunt voltage drop and bus supply voltage.
+
+INA230 and INA231 are high or low side current shunt and power monitors
+with an I2C interface. The chips monitor both a shunt voltage drop and
+bus supply voltage.
+
+The shunt value in micro-ohms can be set via platform data or device tree at
+compile-time or via the shunt_resistor attribute in sysfs at run-time. Please
+refer to the Documentation/devicetree/bindings/i2c/ina2xx.txt for bindings
+if the device tree is used.
+
+Additionally ina226 supports update_interval attribute as described in
+Documentation/hwmon/sysfs-interface. Internally the interval is the sum of
+bus and shunt voltage conversion times multiplied by the averaging rate. We
+don't touch the conversion times and only modify the number of averages. The
+lower limit of the update_interval is 2 ms, the upper limit is 2253 ms.
+The actual programmed interval may vary from the desired value.
diff --git a/Documentation/hwmon/it87 b/Documentation/hwmon/it87
new file mode 100644
index 000000000..e87294878
--- /dev/null
+++ b/Documentation/hwmon/it87
@@ -0,0 +1,262 @@
+Kernel driver it87
+==================
+
+Supported chips:
+ * IT8603E/IT8623E
+ Prefix: 'it8603'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8620E
+ Prefix: 'it8620'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8705F
+ Prefix: 'it87'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Once publicly available at the ITE website, but no longer
+ * IT8712F
+ Prefix: 'it8712'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Once publicly available at the ITE website, but no longer
+ * IT8716F/IT8726F
+ Prefix: 'it8716'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Once publicly available at the ITE website, but no longer
+ * IT8718F
+ Prefix: 'it8718'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Once publicly available at the ITE website, but no longer
+ * IT8720F
+ Prefix: 'it8720'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8721F/IT8758E
+ Prefix: 'it8721'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8728F
+ Prefix: 'it8728'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8771E
+ Prefix: 'it8771'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8772E
+ Prefix: 'it8772'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8781F
+ Prefix: 'it8781'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8782F
+ Prefix: 'it8782'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8783E/F
+ Prefix: 'it8783'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8786E
+ Prefix: 'it8786'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * IT8790E
+ Prefix: 'it8790'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: Not publicly available
+ * SiS950 [clone of IT8705F]
+ Prefix: 'it87'
+ Addresses scanned: from Super I/O config space (8 I/O ports)
+ Datasheet: No longer be available
+
+Authors:
+ Christophe Gauthron
+ Jean Delvare <jdelvare@suse.de>
+
+
+Module Parameters
+-----------------
+
+* update_vbat: int
+
+ 0 if vbat should report power on value, 1 if vbat should be updated after
+ each read. Default is 0. On some boards the battery voltage is provided
+ by either the battery or the onboard power supply. Only the first reading
+ at power on will be the actual battery voltage (which the chip does
+ automatically). On other boards the battery voltage is always fed to
+ the chip so can be read at any time. Excessive reading may decrease
+ battery life but no information is given in the datasheet.
+
+* fix_pwm_polarity int
+
+ Force PWM polarity to active high (DANGEROUS). Some chips are
+ misconfigured by BIOS - PWM values would be inverted. This option tries
+ to fix this. Please contact your BIOS manufacturer and ask him for fix.
+
+
+Hardware Interfaces
+-------------------
+
+All the chips supported by this driver are LPC Super-I/O chips, accessed
+through the LPC bus (ISA-like I/O ports). The IT8712F additionally has an
+SMBus interface to the hardware monitoring functions. This driver no
+longer supports this interface though, as it is slower and less reliable
+than the ISA access, and was only available on a small number of
+motherboard models.
+
+
+Description
+-----------
+
+This driver implements support for the IT8603E, IT8620E, IT8623E, IT8705F,
+IT8712F, IT8716F, IT8718F, IT8720F, IT8721F, IT8726F, IT8728F, IT8758E,
+IT8771E, IT8772E, IT8781F, IT8782F, IT8783E/F, IT8786E, IT8790E, and SiS950
+chips.
+
+These chips are 'Super I/O chips', supporting floppy disks, infrared ports,
+joysticks and other miscellaneous stuff. For hardware monitoring, they
+include an 'environment controller' with 3 temperature sensors, 3 fan
+rotation speed sensors, 8 voltage sensors, associated alarms, and chassis
+intrusion detection.
+
+The IT8712F and IT8716F additionally feature VID inputs, used to report
+the Vcore voltage of the processor. The early IT8712F have 5 VID pins,
+the IT8716F and late IT8712F have 6. They are shared with other functions
+though, so the functionality may not be available on a given system.
+
+The IT8718F and IT8720F also features VID inputs (up to 8 pins) but the value
+is stored in the Super-I/O configuration space. Due to technical limitations,
+this value can currently only be read once at initialization time, so
+the driver won't notice and report changes in the VID value. The two
+upper VID bits share their pins with voltage inputs (in5 and in6) so you
+can't have both on a given board.
+
+The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E and later IT8712F revisions
+have support for 2 additional fans. The additional fans are supported by the
+driver.
+
+The IT8716F, IT8718F, IT8720F, IT8721F/IT8758E, IT8781F, IT8782F, IT8783E/F,
+and late IT8712F and IT8705F also have optional 16-bit tachometer counters
+for fans 1 to 3. This is better (no more fan clock divider mess) but not
+compatible with the older chips and revisions. The 16-bit tachometer mode
+is enabled by the driver when one of the above chips is detected.
+
+The IT8726F is just bit enhanced IT8716F with additional hardware
+for AMD power sequencing. Therefore the chip will appear as IT8716F
+to userspace applications.
+
+The IT8728F, IT8771E, and IT8772E are considered compatible with the IT8721F,
+until a datasheet becomes available (hopefully.)
+
+The IT8603E/IT8623E is a custom design, hardware monitoring part is similar to
+IT8728F. It only supports 3 fans, 16-bit fan mode, and the full speed mode
+of the fan is not supported (value 0 of pwmX_enable).
+
+The IT8620E is another custom design, hardware monitoring part is similar to
+IT8728F. It only supports 16-bit fan mode.
+
+The IT8790E supports up to 3 fans. 16-bit fan mode is always enabled.
+
+Temperatures are measured in degrees Celsius. An alarm is triggered once
+when the Overtemperature Shutdown limit is crossed.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. When
+16-bit tachometer counters aren't used, fan readings can be divided by
+a programmable divider (1, 2, 4 or 8) to give the readings more range or
+accuracy. With a divider of 2, the lowest representable value is around
+2600 RPM. Not all RPM values can accurately be represented, so some rounding
+is done.
+
+Voltage sensors (also known as IN sensors) report their values in volts. An
+alarm is triggered if the voltage has crossed a programmable minimum or
+maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. All voltage
+inputs can measure voltages between 0 and 4.08 volts, with a resolution of
+0.016 volt (except IT8603E, IT8721F/IT8758E and IT8728F: 0.012 volt.) The
+battery voltage in8 does not have limit registers.
+
+On the IT8603E, IT8721F/IT8758E, IT8781F, IT8782F, and IT8783E/F, some
+voltage inputs are internal and scaled inside the chip:
+* in3 (optional)
+* in7 (optional for IT8781F, IT8782F, and IT8783E/F)
+* in8 (always)
+* in9 (relevant for IT8603E only)
+The driver handles this transparently so user-space doesn't have to care.
+
+The VID lines (IT8712F/IT8716F/IT8718F/IT8720F) encode the core voltage value:
+the voltage level your processor should work with. This is hardcoded by
+the mainboard and/or processor itself. It is a value in volts.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may already
+have disappeared! Note that in the current implementation, all hardware
+registers are read whenever any data is read (unless it is less than 1.5
+seconds since the last update). This means that you can easily miss
+once-only alarms.
+
+Out-of-limit readings can also result in beeping, if the chip is properly
+wired and configured. Beeping can be enabled or disabled per sensor type
+(temperatures, voltages and fans.)
+
+The IT87xx only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
+
+To change sensor N to a thermistor, 'echo 4 > tempN_type' where N is 1, 2,
+or 3. To change sensor N to a thermal diode, 'echo 3 > tempN_type'.
+Give 0 for unused sensor. Any other value is invalid. To configure this at
+startup, consult lm_sensors's /etc/sensors.conf. (4 = thermistor;
+3 = thermal diode)
+
+
+Fan speed control
+-----------------
+
+The fan speed control features are limited to manual PWM mode. Automatic
+"Smart Guardian" mode control handling is only implemented for older chips
+(see below.) However if you want to go for "manual mode" just write 1 to
+pwmN_enable.
+
+If you are only able to control the fan speed with very small PWM values,
+try lowering the PWM base frequency (pwm1_freq). Depending on the fan,
+it may give you a somewhat greater control range. The same frequency is
+used to drive all fan outputs, which is why pwm2_freq and pwm3_freq are
+read-only.
+
+
+Automatic fan speed control (old interface)
+-------------------------------------------
+
+The driver supports the old interface to automatic fan speed control
+which is implemented by IT8705F chips up to revision F and IT8712F
+chips up to revision G.
+
+This interface implements 4 temperature vs. PWM output trip points.
+The PWM output of trip point 4 is always the maximum value (fan running
+at full speed) while the PWM output of the other 3 trip points can be
+freely chosen. The temperature of all 4 trip points can be freely chosen.
+Additionally, trip point 1 has an hysteresis temperature attached, to
+prevent fast switching between fan on and off.
+
+The chip automatically computes the PWM output value based on the input
+temperature, based on this simple rule: if the temperature value is
+between trip point N and trip point N+1 then the PWM output value is
+the one of trip point N. The automatic control mode is less flexible
+than the manual control mode, but it reacts faster, is more robust and
+doesn't use CPU cycles.
+
+Trip points must be set properly before switching to automatic fan speed
+control mode. The driver will perform basic integrity checks before
+actually switching to automatic control mode.
+
+
+Temperature offset attributes
+-----------------------------
+
+The driver supports temp[1-3]_offset sysfs attributes to adjust the reported
+temperature for thermal diodes or diode-connected thermal transistors.
+If a temperature sensor is configured for thermistors, the attribute values
+are ignored. If the thermal sensor type is Intel PECI, the temperature offset
+must be programmed to the critical CPU temperature.
diff --git a/Documentation/hwmon/jc42 b/Documentation/hwmon/jc42
new file mode 100644
index 000000000..f7f1830a2
--- /dev/null
+++ b/Documentation/hwmon/jc42
@@ -0,0 +1,102 @@
+Kernel driver jc42
+==================
+
+Supported chips:
+ * Analog Devices ADT7408
+ Datasheets:
+ http://www.analog.com/static/imported-files/data_sheets/ADT7408.pdf
+ * Atmel AT30TS00, AT30TS002A/B, AT30TSE004A
+ Datasheets:
+ http://www.atmel.com/Images/doc8585.pdf
+ http://www.atmel.com/Images/doc8711.pdf
+ http://www.atmel.com/Images/Atmel-8852-SEEPROM-AT30TSE002A-Datasheet.pdf
+ http://www.atmel.com/Images/Atmel-8868-DTS-AT30TSE004A-Datasheet.pdf
+ * IDT TSE2002B3, TSE2002GB2, TSE2004GB2, TS3000B3, TS3000GB0, TS3000GB2,
+ TS3001GB2
+ Datasheets:
+ Available from IDT web site
+ * Maxim MAX6604
+ Datasheets:
+ http://datasheets.maxim-ic.com/en/ds/MAX6604.pdf
+ * Microchip MCP9804, MCP9805, MCP98242, MCP98243, MCP98244, MCP9843
+ Datasheets:
+ http://ww1.microchip.com/downloads/en/DeviceDoc/22203C.pdf
+ http://ww1.microchip.com/downloads/en/DeviceDoc/21977b.pdf
+ http://ww1.microchip.com/downloads/en/DeviceDoc/21996a.pdf
+ http://ww1.microchip.com/downloads/en/DeviceDoc/22153c.pdf
+ http://ww1.microchip.com/downloads/en/DeviceDoc/22327A.pdf
+ * NXP Semiconductors SE97, SE97B, SE98, SE98A
+ Datasheets:
+ http://www.nxp.com/documents/data_sheet/SE97.pdf
+ http://www.nxp.com/documents/data_sheet/SE97B.pdf
+ http://www.nxp.com/documents/data_sheet/SE98.pdf
+ http://www.nxp.com/documents/data_sheet/SE98A.pdf
+ * ON Semiconductor CAT34TS02, CAT6095
+ Datasheet:
+ http://www.onsemi.com/pub_link/Collateral/CAT34TS02-D.PDF
+ http://www.onsemi.com/pub/Collateral/CAT6095-D.PDF
+ * ST Microelectronics STTS424, STTS424E02, STTS2002, STTS2004, STTS3000
+ Datasheets:
+ http://www.st.com/web/en/resource/technical/document/datasheet/CD00157556.pdf
+ http://www.st.com/web/en/resource/technical/document/datasheet/CD00157558.pdf
+ http://www.st.com/web/en/resource/technical/document/datasheet/CD00266638.pdf
+ http://www.st.com/web/en/resource/technical/document/datasheet/CD00225278.pdf
+ http://www.st.com/web/en/resource/technical/document/datasheet/DM00076709.pdf
+ * JEDEC JC 42.4 compliant temperature sensor chips
+ Datasheet:
+ http://www.jedec.org/sites/default/files/docs/4_01_04R19.pdf
+
+ Common for all chips:
+ Prefix: 'jc42'
+ Addresses scanned: I2C 0x18 - 0x1f
+
+Author:
+ Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver implements support for JEDEC JC 42.4 compliant temperature sensors,
+which are used on many DDR3 memory modules for mobile devices and servers. Some
+systems use the sensor to prevent memory overheating by automatically throttling
+the memory controller.
+
+The driver auto-detects the chips listed above, but can be manually instantiated
+to support other JC 42.4 compliant chips.
+
+Example: the following will load the driver for a generic JC 42.4 compliant
+temperature sensor at address 0x18 on I2C bus #1:
+
+# modprobe jc42
+# echo jc42 0x18 > /sys/bus/i2c/devices/i2c-1/new_device
+
+A JC 42.4 compliant chip supports a single temperature sensor. Minimum, maximum,
+and critical temperature can be configured. There are alarms for high, low,
+and critical thresholds.
+
+There is also an hysteresis to control the thresholds for resetting alarms.
+Per JC 42.4 specification, the hysteresis threshold can be configured to 0, 1.5,
+3.0, and 6.0 degrees C. Configured hysteresis values will be rounded to those
+limits. The chip supports only a single register to configure the hysteresis,
+which applies to all limits. This register can be written by writing into
+temp1_crit_hyst. Other hysteresis attributes are read-only.
+
+If the BIOS has configured the sensor for automatic temperature management, it
+is likely that it has locked the registers, i.e., that the temperature limits
+cannot be changed.
+
+Sysfs entries
+-------------
+
+temp1_input Temperature (RO)
+temp1_min Minimum temperature (RO or RW)
+temp1_max Maximum temperature (RO or RW)
+temp1_crit Critical high temperature (RO or RW)
+
+temp1_crit_hyst Critical hysteresis temperature (RO or RW)
+temp1_max_hyst Maximum hysteresis temperature (RO)
+
+temp1_min_alarm Temperature low alarm
+temp1_max_alarm Temperature high alarm
+temp1_crit_alarm Temperature critical alarm
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
new file mode 100644
index 000000000..254d2f553
--- /dev/null
+++ b/Documentation/hwmon/k10temp
@@ -0,0 +1,77 @@
+Kernel driver k10temp
+=====================
+
+Supported chips:
+* AMD Family 10h processors:
+ Socket F: Quad-Core/Six-Core/Embedded Opteron (but see below)
+ Socket AM2+: Quad-Core Opteron, Phenom (II) X3/X4, Athlon X2 (but see below)
+ Socket AM3: Quad-Core Opteron, Athlon/Phenom II X2/X3/X4, Sempron II
+ Socket S1G3: Athlon II, Sempron, Turion II
+* AMD Family 11h processors:
+ Socket S1G2: Athlon (X2), Sempron (X2), Turion X2 (Ultra)
+* AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series)
+* AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
+* AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity", "Kaveri", "Carrizo"
+* AMD Family 16h processors: "Kabini", "Mullins"
+
+ Prefix: 'k10temp'
+ Addresses scanned: PCI space
+ Datasheets:
+ BIOS and Kernel Developer's Guide (BKDG) For AMD Family 10h Processors:
+ http://support.amd.com/us/Processor_TechDocs/31116.pdf
+ BIOS and Kernel Developer's Guide (BKDG) for AMD Family 11h Processors:
+ http://support.amd.com/us/Processor_TechDocs/41256.pdf
+ BIOS and Kernel Developer's Guide (BKDG) for AMD Family 12h Processors:
+ http://support.amd.com/us/Processor_TechDocs/41131.pdf
+ BIOS and Kernel Developer's Guide (BKDG) for AMD Family 14h Models 00h-0Fh Processors:
+ http://support.amd.com/us/Processor_TechDocs/43170.pdf
+ Revision Guide for AMD Family 10h Processors:
+ http://support.amd.com/us/Processor_TechDocs/41322.pdf
+ Revision Guide for AMD Family 11h Processors:
+ http://support.amd.com/us/Processor_TechDocs/41788.pdf
+ Revision Guide for AMD Family 12h Processors:
+ http://support.amd.com/us/Processor_TechDocs/44739.pdf
+ Revision Guide for AMD Family 14h Models 00h-0Fh Processors:
+ http://support.amd.com/us/Processor_TechDocs/47534.pdf
+ AMD Family 11h Processor Power and Thermal Data Sheet for Notebooks:
+ http://support.amd.com/us/Processor_TechDocs/43373.pdf
+ AMD Family 10h Server and Workstation Processor Power and Thermal Data Sheet:
+ http://support.amd.com/us/Processor_TechDocs/43374.pdf
+ AMD Family 10h Desktop Processor Power and Thermal Data Sheet:
+ http://support.amd.com/us/Processor_TechDocs/43375.pdf
+
+Author: Clemens Ladisch <clemens@ladisch.de>
+
+Description
+-----------
+
+This driver permits reading of the internal temperature sensor of AMD
+Family 10h/11h/12h/14h/15h/16h processors.
+
+All these processors have a sensor, but on those for Socket F or AM2+,
+the sensor may return inconsistent values (erratum 319). The driver
+will refuse to load on these revisions unless you specify the "force=1"
+module parameter.
+
+Due to technical reasons, the driver can detect only the mainboard's
+socket type, not the processor's actual capabilities. Therefore, if you
+are using an AM3 processor on an AM2+ mainboard, you can safely use the
+"force=1" parameter.
+
+There is one temperature measurement value, available as temp1_input in
+sysfs. It is measured in degrees Celsius with a resolution of 1/8th degree.
+Please note that it is defined as a relative value; to quote the AMD manual:
+
+ Tctl is the processor temperature control value, used by the platform to
+ control cooling systems. Tctl is a non-physical temperature on an
+ arbitrary scale measured in degrees. It does _not_ represent an actual
+ physical temperature like die or case temperature. Instead, it specifies
+ the processor temperature relative to the point at which the system must
+ supply the maximum cooling for the processor's specified maximum case
+ temperature and maximum thermal power dissipation.
+
+The maximum value for Tctl is available in the file temp1_max.
+
+If the BIOS has enabled hardware temperature control, the threshold at
+which the processor will throttle itself to avoid damage is available in
+temp1_crit and temp1_crit_hyst.
diff --git a/Documentation/hwmon/k8temp b/Documentation/hwmon/k8temp
new file mode 100644
index 000000000..716dc24c7
--- /dev/null
+++ b/Documentation/hwmon/k8temp
@@ -0,0 +1,55 @@
+Kernel driver k8temp
+====================
+
+Supported chips:
+ * AMD Athlon64/FX or Opteron CPUs
+ Prefix: 'k8temp'
+ Addresses scanned: PCI space
+ Datasheet: http://support.amd.com/us/Processor_TechDocs/32559.pdf
+
+Author: Rudolf Marek
+Contact: Rudolf Marek <r.marek@assembler.cz>
+
+Description
+-----------
+
+This driver permits reading temperature sensor(s) embedded inside AMD K8
+family CPUs (Athlon64/FX, Opteron). Official documentation says that it works
+from revision F of K8 core, but in fact it seems to be implemented for all
+revisions of K8 except the first two revisions (SH-B0 and SH-B3).
+
+Please note that you will need at least lm-sensors 2.10.1 for proper userspace
+support.
+
+There can be up to four temperature sensors inside single CPU. The driver
+will auto-detect the sensors and will display only temperatures from
+implemented sensors.
+
+Mapping of /sys files is as follows:
+
+temp1_input - temperature of Core 0 and "place" 0
+temp2_input - temperature of Core 0 and "place" 1
+temp3_input - temperature of Core 1 and "place" 0
+temp4_input - temperature of Core 1 and "place" 1
+
+Temperatures are measured in degrees Celsius and measurement resolution is
+1 degree C. It is expected that future CPU will have better resolution. The
+temperature is updated once a second. Valid temperatures are from -49 to
+206 degrees C.
+
+Temperature known as TCaseMax was specified for processors up to revision E.
+This temperature is defined as temperature between heat-spreader and CPU
+case, so the internal CPU temperature supplied by this driver can be higher.
+There is no easy way how to measure the temperature which will correlate
+with TCaseMax temperature.
+
+For newer revisions of CPU (rev F, socket AM2) there is a mathematically
+computed temperature called TControl, which must be lower than TControlMax.
+
+The relationship is following:
+
+temp1_input - TjOffset*2 < TControlMax,
+
+TjOffset is not yet exported by the driver, TControlMax is usually
+70 degrees C. The rule of the thumb -> CPU temperature should not cross
+60 degrees C too much.
diff --git a/Documentation/hwmon/lineage-pem b/Documentation/hwmon/lineage-pem
new file mode 100644
index 000000000..83b2ddc16
--- /dev/null
+++ b/Documentation/hwmon/lineage-pem
@@ -0,0 +1,77 @@
+Kernel driver lineage-pem
+=========================
+
+Supported devices:
+ * Lineage Compact Power Line Power Entry Modules
+ Prefix: 'lineage-pem'
+ Addresses scanned: -
+ Documentation:
+ http://www.lineagepower.com/oem/pdf/CPLI2C.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports various Lineage Compact Power Line DC/DC and AC/DC
+converters such as CP1800, CP2000AC, CP2000DC, CP2100DC, and others.
+
+Lineage CPL power entry modules are nominally PMBus compliant. However, most
+standard PMBus commands are not supported. Specifically, all hardware monitoring
+and status reporting commands are non-standard. For this reason, a standard
+PMBus driver can not be used.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for Lineage CPL devices, since there is no register
+which can be safely used to identify the chip. You will have to instantiate
+the devices explicitly.
+
+Example: the following will load the driver for a Lineage PEM at address 0x40
+on I2C bus #1:
+$ modprobe lineage-pem
+$ echo lineage-pem 0x40 > /sys/bus/i2c/devices/i2c-1/new_device
+
+All Lineage CPL power entry modules have a built-in I2C bus master selector
+(PCA9541). To ensure device access, this driver should only be used as client
+driver to the pca9541 I2C master selector driver.
+
+
+Sysfs entries
+-------------
+
+All Lineage CPL devices report output voltage and device temperature as well as
+alarms for output voltage, temperature, input voltage, input current, input power,
+and fan status.
+
+Input voltage, input current, input power, and fan speed measurement is only
+supported on newer devices. The driver detects if those attributes are supported,
+and only creates respective sysfs entries if they are.
+
+in1_input Output voltage (mV)
+in1_min_alarm Output undervoltage alarm
+in1_max_alarm Output overvoltage alarm
+in1_crit Output voltage critical alarm
+
+in2_input Input voltage (mV, optional)
+in2_alarm Input voltage alarm
+
+curr1_input Input current (mA, optional)
+curr1_alarm Input overcurrent alarm
+
+power1_input Input power (uW, optional)
+power1_alarm Input power alarm
+
+fan1_input Fan 1 speed (rpm, optional)
+fan2_input Fan 2 speed (rpm, optional)
+fan3_input Fan 3 speed (rpm, optional)
+
+temp1_input
+temp1_max
+temp1_crit
+temp1_alarm
+temp1_crit_alarm
+temp1_fault
diff --git a/Documentation/hwmon/lm25066 b/Documentation/hwmon/lm25066
new file mode 100644
index 000000000..b34c3de5c
--- /dev/null
+++ b/Documentation/hwmon/lm25066
@@ -0,0 +1,120 @@
+Kernel driver lm25066
+=====================
+
+Supported chips:
+ * TI LM25056
+ Prefix: 'lm25056'
+ Addresses scanned: -
+ Datasheets:
+ http://www.ti.com/lit/gpn/lm25056
+ http://www.ti.com/lit/gpn/lm25056a
+ * TI LM25063
+ Prefix: 'lm25063'
+ Addresses scanned: -
+ Datasheet:
+ To be announced
+ * National Semiconductor LM25066
+ Prefix: 'lm25066'
+ Addresses scanned: -
+ Datasheets:
+ http://www.national.com/pf/LM/LM25066.html
+ http://www.national.com/pf/LM/LM25066A.html
+ * National Semiconductor LM5064
+ Prefix: 'lm5064'
+ Addresses scanned: -
+ Datasheet:
+ http://www.national.com/pf/LM/LM5064.html
+ * National Semiconductor LM5066
+ Prefix: 'lm5066'
+ Addresses scanned: -
+ Datasheet:
+ http://www.national.com/pf/LM/LM5066.html
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for National Semiconductor / TI LM25056,
+LM25063, LM25066, LM5064, and LM5066 Power Management, Monitoring, Control, and
+Protection ICs.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in1_label "vin"
+in1_input Measured input voltage.
+in1_average Average measured input voltage.
+in1_min Minimum input voltage.
+in1_max Maximum input voltage.
+in1_crit Critical high input voltage (LM25063 only).
+in1_lcrit Critical low input voltage (LM25063 only).
+in1_min_alarm Input voltage low alarm.
+in1_max_alarm Input voltage high alarm.
+in1_lcrit_alarm Input voltage critical low alarm (LM25063 only).
+in1_crit_alarm Input voltage critical high alarm. (LM25063 only).
+
+in2_label "vmon"
+in2_input Measured voltage on VAUX pin
+in2_min Minimum VAUX voltage (LM25056 only).
+in2_max Maximum VAUX voltage (LM25056 only).
+in2_min_alarm VAUX voltage low alarm (LM25056 only).
+in2_max_alarm VAUX voltage high alarm (LM25056 only).
+
+in3_label "vout1"
+ Not supported on LM25056.
+in3_input Measured output voltage.
+in3_average Average measured output voltage.
+in3_min Minimum output voltage.
+in3_min_alarm Output voltage low alarm.
+in3_highest Historical minimum output voltage (LM25063 only).
+in3_lowest Historical maximum output voltage (LM25063 only).
+
+curr1_label "iin"
+curr1_input Measured input current.
+curr1_average Average measured input current.
+curr1_max Maximum input current.
+curr1_crit Critical input current (LM25063 only).
+curr1_max_alarm Input current high alarm.
+curr1_crit_alarm Input current critical high alarm (LM25063 only).
+
+power1_label "pin"
+power1_input Measured input power.
+power1_average Average measured input power.
+power1_max Maximum input power limit.
+power1_alarm Input power alarm
+power1_input_highest Historical maximum power.
+power1_reset_history Write any value to reset maximum power history.
+
+power2_label "pout". LM25063 only.
+power2_input Measured output power.
+power2_max Maximum output power limit.
+power2_crit Critical output power limit.
+
+temp1_input Measured temperature.
+temp1_max Maximum temperature.
+temp1_crit Critical high temperature.
+temp1_max_alarm Chip temperature high alarm.
+temp1_crit_alarm Chip temperature critical high alarm.
diff --git a/Documentation/hwmon/lm63 b/Documentation/hwmon/lm63
new file mode 100644
index 000000000..4a0046151
--- /dev/null
+++ b/Documentation/hwmon/lm63
@@ -0,0 +1,77 @@
+Kernel driver lm63
+==================
+
+Supported chips:
+ * National Semiconductor LM63
+ Prefix: 'lm63'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM63.html
+ * National Semiconductor LM64
+ Prefix: 'lm64'
+ Addresses scanned: I2C 0x18 and 0x4e
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM64.html
+ * National Semiconductor LM96163
+ Prefix: 'lm96163'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM96163.html
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+Thanks go to Tyan and especially Alex Buckingham for setting up a remote
+access to their S4882 test platform for this driver.
+ http://www.tyan.com/
+
+Description
+-----------
+
+The LM63 is a digital temperature sensor with integrated fan monitoring
+and control.
+
+The LM63 is basically an LM86 with fan speed monitoring and control
+capabilities added. It misses some of the LM86 features though:
+ - No low limit for local temperature.
+ - No critical limit for local temperature.
+ - Critical limit for remote temperature can be changed only once. We
+ will consider that the critical limit is read-only.
+
+The datasheet isn't very clear about what the tachometer reading is.
+
+An explanation from National Semiconductor: The two lower bits of the read
+value have to be masked out. The value is still 16 bit in width.
+
+All temperature values are given in degrees Celsius. Resolution is 1.0
+degree for the local temperature, 0.125 degree for the remote temperature.
+
+The fan speed is measured using a tachometer. Contrary to most chips which
+store the value in an 8-bit register and have a selectable clock divider
+to make sure that the result will fit in the register, the LM63 uses 16-bit
+value for measuring the speed of the fan. It can measure fan speeds down to
+83 RPM, at least in theory.
+
+Note that the pin used for fan monitoring is shared with an alert out
+function. Depending on how the board designer wanted to use the chip, fan
+speed monitoring will or will not be possible. The proper chip configuration
+is left to the BIOS, and the driver will blindly trust it. Only the original
+LM63 suffers from this limitation, the LM64 and LM96163 have separate pins
+for fan monitoring and alert out. On the LM64, monitoring is always enabled;
+on the LM96163 it can be disabled.
+
+A PWM output can be used to control the speed of the fan. The LM63 has two
+PWM modes: manual and automatic. Automatic mode is not fully implemented yet
+(you cannot define your custom PWM/temperature curve), and mode change isn't
+supported either.
+
+The lm63 driver will not update its values more frequently than configured with
+the update_interval sysfs attribute; reading them more often will do no harm,
+but will return 'old' values. Values in the automatic fan control lookup table
+(attributes pwm1_auto_*) have their own independent lifetime of 5 seconds.
+
+The LM64 is effectively an LM63 with GPIO lines. The driver does not
+support these GPIO lines at present.
+
+The LM96163 is an enhanced version of LM63 with improved temperature accuracy
+and better PWM resolution. For LM96163, the external temperature sensor type is
+configurable as CPU embedded diode(1) or 3904 transistor(2).
diff --git a/Documentation/hwmon/lm70 b/Documentation/hwmon/lm70
new file mode 100644
index 000000000..1bb2db440
--- /dev/null
+++ b/Documentation/hwmon/lm70
@@ -0,0 +1,47 @@
+Kernel driver lm70
+==================
+
+Supported chips:
+ * National Semiconductor LM70
+ Datasheet: http://www.national.com/pf/LM/LM70.html
+ * Texas Instruments TMP121/TMP123
+ Information: http://focus.ti.com/docs/prod/folders/print/tmp121.html
+ * National Semiconductor LM71
+ Datasheet: http://www.ti.com/product/LM71
+ * National Semiconductor LM74
+ Datasheet: http://www.ti.com/product/LM74
+
+Author:
+ Kaiwan N Billimoria <kaiwan@designergraphix.com>
+
+Description
+-----------
+
+This driver implements support for the National Semiconductor LM70
+temperature sensor.
+
+The LM70 temperature sensor chip supports a single temperature sensor.
+It communicates with a host processor (or microcontroller) via an
+SPI/Microwire Bus interface.
+
+Communication with the LM70 is simple: when the temperature is to be sensed,
+the driver accesses the LM70 using SPI communication: 16 SCLK cycles
+comprise the MOSI/MISO loop. At the end of the transfer, the 11-bit 2's
+complement digital temperature (sent via the SIO line), is available in the
+driver for interpretation. This driver makes use of the kernel's in-core
+SPI support.
+
+As a real (in-tree) example of this "SPI protocol driver" interfacing
+with a "SPI master controller driver", see drivers/spi/spi_lm70llp.c
+and its associated documentation.
+
+The LM74 and TMP121/TMP123 are very similar; main difference is 13-bit
+temperature data (0.0625 degrees celsius resolution).
+
+The LM71 is also very similar; main difference is 14-bit temperature
+data (0.03125 degrees celsius resolution).
+
+Thanks to
+---------
+Jean Delvare <jdelvare@suse.de> for mentoring the hwmon-side driver
+development.
diff --git a/Documentation/hwmon/lm73 b/Documentation/hwmon/lm73
new file mode 100644
index 000000000..8af059dcb
--- /dev/null
+++ b/Documentation/hwmon/lm73
@@ -0,0 +1,90 @@
+Kernel driver lm73
+==================
+
+Supported chips:
+ * Texas Instruments LM73
+ Prefix: 'lm73'
+ Addresses scanned: I2C 0x48, 0x49, 0x4a, 0x4c, 0x4d, and 0x4e
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/product/lm73
+
+Author: Guillaume Ligneul <guillaume.ligneul@gmail.com>
+Documentation: Chris Verges <kg4ysn@gmail.com>
+
+
+Description
+-----------
+
+The LM73 is a digital temperature sensor. All temperature values are
+given in degrees Celsius.
+
+Measurement Resolution Support
+------------------------------
+
+The LM73 supports four resolutions, defined in terms of degrees C per
+LSB: 0.25, 0.125, 0.0625, and 0.3125. Changing the resolution mode
+affects the conversion time of the LM73's analog-to-digital converter.
+From userspace, the desired resolution can be specified as a function of
+conversion time via the 'update_interval' sysfs attribute for the
+device. This attribute will normalize ranges of input values to the
+maximum times defined for the resolution in the datasheet.
+
+ Resolution Conv. Time Input Range
+ (C/LSB) (msec) (msec)
+ --------------------------------------
+ 0.25 14 0..14
+ 0.125 28 15..28
+ 0.0625 56 29..56
+ 0.03125 112 57..infinity
+ --------------------------------------
+
+The following examples show how the 'update_interval' attribute can be
+used to change the conversion time:
+
+ $ echo 0 > update_interval
+ $ cat update_interval
+ 14
+ $ cat temp1_input
+ 24250
+
+ $ echo 22 > update_interval
+ $ cat update_interval
+ 28
+ $ cat temp1_input
+ 24125
+
+ $ echo 56 > update_interval
+ $ cat update_interval
+ 56
+ $ cat temp1_input
+ 24062
+
+ $ echo 85 > update_interval
+ $ cat update_interval
+ 112
+ $ cat temp1_input
+ 24031
+
+As shown here, the lm73 driver automatically adjusts any user input for
+'update_interval' via a step function. Reading back the
+'update_interval' value after a write operation will confirm the
+conversion time actively in use.
+
+Mathematically, the resolution can be derived from the conversion time
+via the following function:
+
+ g(x) = 0.250 * [log(x/14) / log(2)]
+
+where 'x' is the output from 'update_interval' and 'g(x)' is the
+resolution in degrees C per LSB.
+
+Alarm Support
+-------------
+
+The LM73 features a simple over-temperature alarm mechanism. This
+feature is exposed via the sysfs attributes.
+
+The attributes 'temp1_max_alarm' and 'temp1_min_alarm' are flags
+provided by the LM73 that indicate whether the measured temperature has
+passed the 'temp1_max' and 'temp1_min' thresholds, respectively. These
+values _must_ be read to clear the registers on the LM73.
diff --git a/Documentation/hwmon/lm75 b/Documentation/hwmon/lm75
new file mode 100644
index 000000000..67691a0aa
--- /dev/null
+++ b/Documentation/hwmon/lm75
@@ -0,0 +1,92 @@
+Kernel driver lm75
+==================
+
+Supported chips:
+ * National Semiconductor LM75
+ Prefix: 'lm75'
+ Addresses scanned: I2C 0x48 - 0x4f
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+ * National Semiconductor LM75A
+ Prefix: 'lm75a'
+ Addresses scanned: I2C 0x48 - 0x4f
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+ * Dallas Semiconductor (now Maxim) DS75, DS1775, DS7505
+ Prefixes: 'ds75', 'ds1775', 'ds7505'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Maxim website
+ http://www.maximintegrated.com/
+ * Maxim MAX6625, MAX6626
+ Prefixes: 'max6625', 'max6626'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/
+ * Microchip (TelCom) TCN75
+ Prefix: 'tcn75'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Microchip website
+ http://www.microchip.com/
+ * Microchip MCP9800, MCP9801, MCP9802, MCP9803
+ Prefix: 'mcp980x'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Microchip website
+ http://www.microchip.com/
+ * Analog Devices ADT75
+ Prefix: 'adt75'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Analog Devices website
+ http://www.analog.com/adt75
+ * ST Microelectronics STDS75
+ Prefix: 'stds75'
+ Addresses scanned: none
+ Datasheet: Publicly available at the ST website
+ http://www.st.com/internet/analog/product/121769.jsp
+ * Texas Instruments TMP100, TMP101, TMP105, TMP112, TMP75, TMP175, TMP275
+ Prefixes: 'tmp100', 'tmp101', 'tmp105', 'tmp112', 'tmp175', 'tmp75', 'tmp275'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/product/tmp100
+ http://www.ti.com/product/tmp101
+ http://www.ti.com/product/tmp105
+ http://www.ti.com/product/tmp112
+ http://www.ti.com/product/tmp75
+ http://www.ti.com/product/tmp175
+ http://www.ti.com/product/tmp275
+ * NXP LM75B
+ Prefix: 'lm75b'
+ Addresses scanned: none
+ Datasheet: Publicly available at the NXP website
+ http://www.nxp.com/documents/data_sheet/LM75B.pdf
+
+Author: Frodo Looijaard <frodol@dds.nl>
+
+Description
+-----------
+
+The LM75 implements one temperature sensor. Limits can be set through the
+Overtemperature Shutdown register and Hysteresis register. Each value can be
+set and read to half-degree accuracy.
+An alarm is issued (usually to a connected LM78) when the temperature
+gets higher then the Overtemperature Shutdown value; it stays on until
+the temperature falls below the Hysteresis value.
+All temperatures are in degrees Celsius, and are guaranteed within a
+range of -55 to +125 degrees.
+
+The driver caches the values for a period varying between 1 second for the
+slowest chips and 125 ms for the fastest chips; reading it more often
+will do no harm, but will return 'old' values.
+
+The original LM75 was typically used in combination with LM78-like chips
+on PC motherboards, to measure the temperature of the processor(s). Clones
+are now used in various embedded designs.
+
+The LM75 is essentially an industry standard; there may be other
+LM75 clones not listed here, with or without various enhancements,
+that are supported. The clones are not detected by the driver, unless
+they reproduce the exact register tricks of the original LM75, and must
+therefore be instantiated explicitly. Higher resolution up to 12-bit
+is supported by this driver, other specific enhancements are not.
+
+The LM77 is not supported, contrary to what we pretended for a long time.
+Both chips are simply not compatible, value encoding differs.
diff --git a/Documentation/hwmon/lm77 b/Documentation/hwmon/lm77
new file mode 100644
index 000000000..bfc915fe3
--- /dev/null
+++ b/Documentation/hwmon/lm77
@@ -0,0 +1,38 @@
+Kernel driver lm77
+==================
+
+Supported chips:
+ * National Semiconductor LM77
+ Prefix: 'lm77'
+ Addresses scanned: I2C 0x48 - 0x4b
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+
+Author: Andras BALI <drewie@freemail.hu>
+
+Description
+-----------
+
+The LM77 implements one temperature sensor. The temperature
+sensor incorporates a band-gap type temperature sensor,
+10-bit ADC, and a digital comparator with user-programmable upper
+and lower limit values.
+
+The LM77 implements 3 limits: low (temp1_min), high (temp1_max) and
+critical (temp1_crit.) It also implements an hysteresis mechanism which
+applies to all 3 limits. The relative difference is stored in a single
+register on the chip, which means that the relative difference between
+the limit and its hysteresis is always the same for all 3 limits.
+
+This implementation detail implies the following:
+* When setting a limit, its hysteresis will automatically follow, the
+ difference staying unchanged. For example, if the old critical limit
+ was 80 degrees C, and the hysteresis was 75 degrees C, and you change
+ the critical limit to 90 degrees C, then the hysteresis will
+ automatically change to 85 degrees C.
+* All 3 hysteresis can't be set independently. We decided to make
+ temp1_crit_hyst writable, while temp1_min_hyst and temp1_max_hyst are
+ read-only. Setting temp1_crit_hyst writes the difference between
+ temp1_crit_hyst and temp1_crit into the chip, and the same relative
+ hysteresis applies automatically to the low and high limits.
+* The limits should be set before the hysteresis.
diff --git a/Documentation/hwmon/lm78 b/Documentation/hwmon/lm78
new file mode 100644
index 000000000..4dd477317
--- /dev/null
+++ b/Documentation/hwmon/lm78
@@ -0,0 +1,68 @@
+Kernel driver lm78
+==================
+
+Supported chips:
+ * National Semiconductor LM78 / LM78-J
+ Prefix: 'lm78'
+ Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+ * National Semiconductor LM79
+ Prefix: 'lm79'
+ Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+
+Authors: Frodo Looijaard <frodol@dds.nl>
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+This driver implements support for the National Semiconductor LM78, LM78-J
+and LM79. They are described as 'Microprocessor System Hardware Monitors'.
+
+There is almost no difference between the three supported chips. Functionally,
+the LM78 and LM78-J are exactly identical. The LM79 has one more VID line,
+which is used to report the lower voltages newer Pentium processors use.
+From here on, LM7* means either of these three types.
+
+The LM7* implements one temperature sensor, three fan rotation speed sensors,
+seven voltage sensors, VID lines, alarms, and some miscellaneous stuff.
+
+Temperatures are measured in degrees Celsius. An alarm is triggered once
+when the Overtemperature Shutdown limit is crossed; it is triggered again
+as soon as it drops below the Hysteresis value. A more useful behavior
+can be found by setting the Hysteresis value to +127 degrees Celsius; in
+this case, alarms are issued during all the time when the actual temperature
+is above the Overtemperature Shutdown value. Measurements are guaranteed
+between -55 and +125 degrees, with a resolution of 1 degree.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+Voltage sensors (also known as IN sensors) report their values in volts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. All voltage
+inputs can measure voltages between 0 and 4.08 volts, with a resolution
+of 0.016 volt.
+
+The VID lines encode the core voltage value: the voltage level your processor
+should work with. This is hardcoded by the mainboard and/or processor itself.
+It is a value in volts. When it is unconnected, you will often find the
+value 3.50 V here.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared! Note that in the current implementation, all
+hardware registers are read whenever any data is read (unless it is less
+than 1.5 seconds since the last update). This means that you can easily
+miss once-only alarms.
+
+The LM7* only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
diff --git a/Documentation/hwmon/lm80 b/Documentation/hwmon/lm80
new file mode 100644
index 000000000..a60b43efc
--- /dev/null
+++ b/Documentation/hwmon/lm80
@@ -0,0 +1,63 @@
+Kernel driver lm80
+==================
+
+Supported chips:
+ * National Semiconductor LM80
+ Prefix: 'lm80'
+ Addresses scanned: I2C 0x28 - 0x2f
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+ * National Semiconductor LM96080
+ Prefix: 'lm96080'
+ Addresses scanned: I2C 0x28 - 0x2f
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>
+
+Description
+-----------
+
+This driver implements support for the National Semiconductor LM80.
+It is described as a 'Serial Interface ACPI-Compatible Microprocessor
+System Hardware Monitor'. The LM96080 is a more recent incarnation,
+it is pin and register compatible, with a few additional features not
+yet supported by the driver.
+
+The LM80 implements one temperature sensor, two fan rotation speed sensors,
+seven voltage sensors, alarms, and some miscellaneous stuff.
+
+Temperatures are measured in degrees Celsius. There are two sets of limits
+which operate independently. When the HOT Temperature Limit is crossed,
+this will cause an alarm that will be reasserted until the temperature
+drops below the HOT Hysteresis. The Overtemperature Shutdown (OS) limits
+should work in the same way (but this must be checked; the datasheet
+is unclear about this). Measurements are guaranteed between -55 and
++125 degrees. The current temperature measurement has a resolution of
+0.0625 degrees; the limits have a resolution of 1 degree.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+Voltage sensors (also known as IN sensors) report their values in volts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. All voltage
+inputs can measure voltages between 0 and 2.55 volts, with a resolution
+of 0.01 volt.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared! Note that in the current implementation, all
+hardware registers are read whenever any data is read (unless it is less
+than 2.0 seconds since the last update). This means that you can easily
+miss once-only alarms.
+
+The LM80 only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
diff --git a/Documentation/hwmon/lm83 b/Documentation/hwmon/lm83
new file mode 100644
index 000000000..50be5cb26
--- /dev/null
+++ b/Documentation/hwmon/lm83
@@ -0,0 +1,85 @@
+Kernel driver lm83
+==================
+
+Supported chips:
+ * National Semiconductor LM83
+ Prefix: 'lm83'
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM83.html
+ * National Semiconductor LM82
+ Addresses scanned: I2C 0x18 - 0x1a, 0x29 - 0x2b, 0x4c - 0x4e
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM82.html
+
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+The LM83 is a digital temperature sensor. It senses its own temperature as
+well as the temperature of up to three external diodes. The LM82 is
+a stripped down version of the LM83 that only supports one external diode.
+Both are compatible with many other devices such as the LM84 and all
+other ADM1021 clones. The main difference between the LM83 and the LM84
+in that the later can only sense the temperature of one external diode.
+
+Using the adm1021 driver for a LM83 should work, but only two temperatures
+will be reported instead of four.
+
+The LM83 is only found on a handful of motherboards. Both a confirmed
+list and an unconfirmed list follow. If you can confirm or infirm the
+fact that any of these motherboards do actually have an LM83, please
+contact us. Note that the LM90 can easily be misdetected as a LM83.
+
+Confirmed motherboards:
+ SBS P014
+ SBS PSL09
+
+Unconfirmed motherboards:
+ Gigabyte GA-8IK1100
+ Iwill MPX2
+ Soltek SL-75DRV5
+
+The LM82 is confirmed to have been found on most AMD Geode reference
+designs and test platforms.
+
+The driver has been successfully tested by Magnus Forsström, who I'd
+like to thank here. More testers will be of course welcome.
+
+The fact that the LM83 is only scarcely used can be easily explained.
+Most motherboards come with more than just temperature sensors for
+health monitoring. They also have voltage and fan rotation speed
+sensors. This means that temperature-only chips are usually used as
+secondary chips coupled with another chip such as an IT8705F or similar
+chip, which provides more features. Since systems usually need three
+temperature sensors (motherboard, processor, power supply) and primary
+chips provide some temperature sensors, the secondary chip, if needed,
+won't have to handle more than two temperatures. Thus, ADM1021 clones
+are sufficient, and there is no need for a four temperatures sensor
+chip such as the LM83. The only case where using an LM83 would make
+sense is on SMP systems, such as the above-mentioned Iwill MPX2,
+because you want an additional temperature sensor for each additional
+CPU.
+
+On the SBS P014, this is different, since the LM83 is the only hardware
+monitoring chipset. One temperature sensor is used for the motherboard
+(actually measuring the LM83's own temperature), one is used for the
+CPU. The two other sensors must be used to measure the temperature of
+two other points of the motherboard. We suspect these points to be the
+north and south bridges, but this couldn't be confirmed.
+
+All temperature values are given in degrees Celsius. Local temperature
+is given within a range of 0 to +85 degrees. Remote temperatures are
+given within a range of 0 to +125 degrees. Resolution is 1.0 degree,
+accuracy is guaranteed to 3.0 degrees (see the datasheet for more
+details).
+
+Each sensor has its own high limit, but the critical limit is common to
+all four sensors. There is no hysteresis mechanism as found on most
+recent temperature sensors.
+
+The lm83 driver will not update its values more frequently than every
+other second; reading them more often will do no harm, but will return
+'old' values.
diff --git a/Documentation/hwmon/lm85 b/Documentation/hwmon/lm85
new file mode 100644
index 000000000..7c49feaa7
--- /dev/null
+++ b/Documentation/hwmon/lm85
@@ -0,0 +1,230 @@
+Kernel driver lm85
+==================
+
+Supported chips:
+ * National Semiconductor LM85 (B and C versions)
+ Prefix: 'lm85'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.national.com/pf/LM/LM85.html
+ * Analog Devices ADM1027
+ Prefix: 'adm1027'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.onsemi.com/PowerSolutions/product.do?id=ADM1027
+ * Analog Devices ADT7463
+ Prefix: 'adt7463'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.onsemi.com/PowerSolutions/product.do?id=ADT7463
+ * Analog Devices ADT7468
+ Prefix: 'adt7468'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.onsemi.com/PowerSolutions/product.do?id=ADT7468
+ * SMSC EMC6D100, SMSC EMC6D101
+ Prefix: 'emc6d100'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.smsc.com/media/Downloads_Public/discontinued/6d100.pdf
+ * SMSC EMC6D102
+ Prefix: 'emc6d102'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.smsc.com/main/catalog/emc6d102.html
+ * SMSC EMC6D103
+ Prefix: 'emc6d103'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.smsc.com/main/catalog/emc6d103.html
+ * SMSC EMC6D103S
+ Prefix: 'emc6d103s'
+ Addresses scanned: I2C 0x2c, 0x2d, 0x2e
+ Datasheet: http://www.smsc.com/main/catalog/emc6d103s.html
+
+Authors:
+ Philip Pokorny <ppokorny@penguincomputing.com>,
+ Frodo Looijaard <frodol@dds.nl>,
+ Richard Barrington <rich_b_nz@clear.net.nz>,
+ Margit Schubert-While <margitsw@t-online.de>,
+ Justin Thiessen <jthiessen@penguincomputing.com>
+
+Description
+-----------
+
+This driver implements support for the National Semiconductor LM85 and
+compatible chips including the Analog Devices ADM1027, ADT7463, ADT7468 and
+SMSC EMC6D10x chips family.
+
+The LM85 uses the 2-wire interface compatible with the SMBUS 2.0
+specification. Using an analog to digital converter it measures three (3)
+temperatures and five (5) voltages. It has four (4) 16-bit counters for
+measuring fan speed. Five (5) digital inputs are provided for sampling the
+VID signals from the processor to the VRM. Lastly, there are three (3) PWM
+outputs that can be used to control fan speed.
+
+The voltage inputs have internal scaling resistors so that the following
+voltage can be measured without external resistors:
+
+ 2.5V, 3.3V, 5V, 12V, and CPU core voltage (2.25V)
+
+The temperatures measured are one internal diode, and two remote diodes.
+Remote 1 is generally the CPU temperature. These inputs are designed to
+measure a thermal diode like the one in a Pentium 4 processor in a socket
+423 or socket 478 package. They can also measure temperature using a
+transistor like the 2N3904.
+
+A sophisticated control system for the PWM outputs is designed into the
+LM85 that allows fan speed to be adjusted automatically based on any of the
+three temperature sensors. Each PWM output is individually adjustable and
+programmable. Once configured, the LM85 will adjust the PWM outputs in
+response to the measured temperatures without further host intervention.
+This feature can also be disabled for manual control of the PWM's.
+
+Each of the measured inputs (voltage, temperature, fan speed) has
+corresponding high/low limit values. The LM85 will signal an ALARM if any
+measured value exceeds either limit.
+
+The LM85 samples all inputs continuously. The lm85 driver will not read
+the registers more often than once a second. Further, configuration data is
+only read once each 5 minutes. There is twice as much config data as
+measurements, so this would seem to be a worthwhile optimization.
+
+Special Features
+----------------
+
+The LM85 has four fan speed monitoring modes. The ADM1027 has only two.
+Both have special circuitry to compensate for PWM interactions with the
+TACH signal from the fans. The ADM1027 can be configured to measure the
+speed of a two wire fan, but the input conditioning circuitry is different
+for 3-wire and 2-wire mode. For this reason, the 2-wire fan modes are not
+exposed to user control. The BIOS should initialize them to the correct
+mode. If you've designed your own ADM1027, you'll have to modify the
+init_client function and add an insmod parameter to set this up.
+
+To smooth the response of fans to changes in temperature, the LM85 has an
+optional filter for smoothing temperatures. The ADM1027 has the same
+config option but uses it to rate limit the changes to fan speed instead.
+
+The ADM1027, ADT7463 and ADT7468 have a 10-bit ADC and can therefore
+measure temperatures with 0.25 degC resolution. They also provide an offset
+to the temperature readings that is automatically applied during
+measurement. This offset can be used to zero out any errors due to traces
+and placement. The documentation says that the offset is in 0.25 degC
+steps, but in initial testing of the ADM1027 it was 1.00 degC steps. Analog
+Devices has confirmed this "bug". The ADT7463 is reported to work as
+described in the documentation. The current lm85 driver does not show the
+offset register.
+
+The ADT7468 has a high-frequency PWM mode, where all PWM outputs are
+driven by a 22.5 kHz clock. This is a global mode, not per-PWM output,
+which means that setting any PWM frequency above 11.3 kHz will switch
+all 3 PWM outputs to a 22.5 kHz frequency. Conversely, setting any PWM
+frequency below 11.3 kHz will switch all 3 PWM outputs to a frequency
+between 10 and 100 Hz, which can then be tuned separately.
+
+See the vendor datasheets for more information. There is application note
+from National (AN-1260) with some additional information about the LM85.
+The Analog Devices datasheet is very detailed and describes a procedure for
+determining an optimal configuration for the automatic PWM control.
+
+The SMSC EMC6D100 & EMC6D101 monitor external voltages, temperatures, and
+fan speeds. They use this monitoring capability to alert the system to out
+of limit conditions and can automatically control the speeds of multiple
+fans in a PC or embedded system. The EMC6D101, available in a 24-pin SSOP
+package, and the EMC6D100, available in a 28-pin SSOP package, are designed
+to be register compatible. The EMC6D100 offers all the features of the
+EMC6D101 plus additional voltage monitoring and system control features.
+Unfortunately it is not possible to distinguish between the package
+versions on register level so these additional voltage inputs may read
+zero. EMC6D102 and EMC6D103 feature additional ADC bits thus extending precision
+of voltage and temperature channels.
+
+SMSC EMC6D103S is similar to EMC6D103, but does not support pwm#_auto_pwm_minctl
+and temp#_auto_temp_off.
+
+Hardware Configurations
+-----------------------
+
+The LM85 can be jumpered for 3 different SMBus addresses. There are
+no other hardware configuration options for the LM85.
+
+The lm85 driver detects both LM85B and LM85C revisions of the chip. See the
+datasheet for a complete description of the differences. Other than
+identifying the chip, the driver behaves no differently with regard to
+these two chips. The LM85B is recommended for new designs.
+
+The ADM1027, ADT7463 and ADT7468 chips have an optional SMBALERT output
+that can be used to signal the chipset in case a limit is exceeded or the
+temperature sensors fail. Individual sensor interrupts can be masked so
+they won't trigger SMBALERT. The SMBALERT output if configured replaces one
+of the other functions (PWM2 or IN0). This functionality is not implemented
+in current driver.
+
+The ADT7463 and ADT7468 also have an optional THERM output/input which can
+be connected to the processor PROC_HOT output. If available, the autofan
+control dynamic Tmin feature can be enabled to keep the system temperature
+within spec (just?!) with the least possible fan noise.
+
+Configuration Notes
+-------------------
+
+Besides standard interfaces driver adds following:
+
+* Temperatures and Zones
+
+Each temperature sensor is associated with a Zone. There are three
+sensors and therefore three zones (# 1, 2 and 3). Each zone has the following
+temperature configuration points:
+
+* temp#_auto_temp_off - temperature below which fans should be off or spinning very low.
+* temp#_auto_temp_min - temperature over which fans start to spin.
+* temp#_auto_temp_max - temperature when fans spin at full speed.
+* temp#_auto_temp_crit - temperature when all fans will run full speed.
+
+* PWM Control
+
+There are three PWM outputs. The LM85 datasheet suggests that the
+pwm3 output control both fan3 and fan4. Each PWM can be individually
+configured and assigned to a zone for its control value. Each PWM can be
+configured individually according to the following options.
+
+* pwm#_auto_pwm_min - this specifies the PWM value for temp#_auto_temp_off
+ temperature. (PWM value from 0 to 255)
+
+* pwm#_auto_pwm_minctl - this flags selects for temp#_auto_temp_off temperature
+ the behaviour of fans. Write 1 to let fans spinning at
+ pwm#_auto_pwm_min or write 0 to let them off.
+
+NOTE: It has been reported that there is a bug in the LM85 that causes the flag
+to be associated with the zones not the PWMs. This contradicts all the
+published documentation. Setting pwm#_min_ctl in this case actually affects all
+PWMs controlled by zone '#'.
+
+* PWM Controlling Zone selection
+
+* pwm#_auto_channels - controls zone that is associated with PWM
+
+Configuration choices:
+
+ Value Meaning
+ ------ ------------------------------------------------
+ 1 Controlled by Zone 1
+ 2 Controlled by Zone 2
+ 3 Controlled by Zone 3
+ 23 Controlled by higher temp of Zone 2 or 3
+ 123 Controlled by highest temp of Zone 1, 2 or 3
+ 0 PWM always 0% (off)
+ -1 PWM always 100% (full on)
+ -2 Manual control (write to 'pwm#' to set)
+
+The National LM85's have two vendor specific configuration
+features. Tach. mode and Spinup Control. For more details on these,
+see the LM85 datasheet or Application Note AN-1260. These features
+are not currently supported by the lm85 driver.
+
+The Analog Devices ADM1027 has several vendor specific enhancements.
+The number of pulses-per-rev of the fans can be set, Tach monitoring
+can be optimized for PWM operation, and an offset can be applied to
+the temperatures to compensate for systemic errors in the
+measurements. These features are not currently supported by the lm85
+driver.
+
+In addition to the ADM1027 features, the ADT7463 and ADT7468 also have
+Tmin control and THERM asserted counts. Automatic Tmin control acts to
+adjust the Tmin value to maintain the measured temperature sensor at a
+specified temperature. There isn't much documentation on this feature in
+the ADT7463 data sheet. This is not supported by current driver.
diff --git a/Documentation/hwmon/lm87 b/Documentation/hwmon/lm87
new file mode 100644
index 000000000..a2339fd9a
--- /dev/null
+++ b/Documentation/hwmon/lm87
@@ -0,0 +1,77 @@
+Kernel driver lm87
+==================
+
+Supported chips:
+ * National Semiconductor LM87
+ Prefix: 'lm87'
+ Addresses scanned: I2C 0x2c - 0x2e
+ Datasheet: http://www.national.com/pf/LM/LM87.html
+ * Analog Devices ADM1024
+ Prefix: 'adm1024'
+ Addresses scanned: I2C 0x2c - 0x2e
+ Datasheet: http://www.analog.com/en/prod/0,2877,ADM1024,00.html
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Mark Studebaker <mdsxyz123@yahoo.com>,
+ Stephen Rousset <stephen.rousset@rocketlogix.com>,
+ Dan Eaton <dan.eaton@rocketlogix.com>,
+ Jean Delvare <jdelvare@suse.de>,
+ Original 2.6 port Jeff Oliver
+
+Description
+-----------
+
+This driver implements support for the National Semiconductor LM87
+and the Analog Devices ADM1024.
+
+The LM87 implements up to three temperature sensors, up to two fan
+rotation speed sensors, up to seven voltage sensors, alarms, and some
+miscellaneous stuff. The ADM1024 is fully compatible.
+
+Temperatures are measured in degrees Celsius. Each input has a high
+and low alarm settings. A high limit produces an alarm when the value
+goes above it, and an alarm is also produced when the value goes below
+the low limit.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+Voltage sensors (also known as IN sensors) report their values in
+volts. An alarm is triggered if the voltage has crossed a programmable
+minimum or maximum limit. Note that minimum in this case always means
+'closest to zero'; this is important for negative voltage measurements.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared! Note that in the current implementation, all
+hardware registers are read whenever any data is read (unless it is less
+than 1.0 seconds since the last update). This means that you can easily
+miss once-only alarms.
+
+The lm87 driver only updates its values each 1.0 seconds; reading it more
+often will do no harm, but will return 'old' values.
+
+
+Hardware Configurations
+-----------------------
+
+The LM87 has four pins which can serve one of two possible functions,
+depending on the hardware configuration.
+
+Some functions share pins, so not all functions are available at the same
+time. Which are depends on the hardware setup. This driver normally
+assumes that firmware configured the chip correctly. Where this is not
+the case, platform code must set the I2C client's platform_data to point
+to a u8 value to be written to the channel register.
+
+For reference, here is the list of exclusive functions:
+ - in0+in5 (default) or temp3
+ - fan1 (default) or in6
+ - fan2 (default) or in7
+ - VID lines (default) or IRQ lines (not handled by this driver)
diff --git a/Documentation/hwmon/lm90 b/Documentation/hwmon/lm90
new file mode 100644
index 000000000..8122675d3
--- /dev/null
+++ b/Documentation/hwmon/lm90
@@ -0,0 +1,275 @@
+Kernel driver lm90
+==================
+
+Supported chips:
+ * National Semiconductor LM90
+ Prefix: 'lm90'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM90.html
+ * National Semiconductor LM89
+ Prefix: 'lm89' (no auto-detection)
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/mpf/LM/LM89.html
+ * National Semiconductor LM99
+ Prefix: 'lm99'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LM/LM99.html
+ * National Semiconductor LM86
+ Prefix: 'lm86'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/mpf/LM/LM86.html
+ * Analog Devices ADM1032
+ Prefix: 'adm1032'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: Publicly available at the ON Semiconductor website
+ http://www.onsemi.com/PowerSolutions/product.do?id=ADM1032
+ * Analog Devices ADT7461
+ Prefix: 'adt7461'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: Publicly available at the ON Semiconductor website
+ http://www.onsemi.com/PowerSolutions/product.do?id=ADT7461
+ * Analog Devices ADT7461A
+ Prefix: 'adt7461a'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: Publicly available at the ON Semiconductor website
+ http://www.onsemi.com/PowerSolutions/product.do?id=ADT7461A
+ * ON Semiconductor NCT1008
+ Prefix: 'nct1008'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: Publicly available at the ON Semiconductor website
+ http://www.onsemi.com/PowerSolutions/product.do?id=NCT1008
+ * Maxim MAX6646
+ Prefix: 'max6646'
+ Addresses scanned: I2C 0x4d
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497
+ * Maxim MAX6647
+ Prefix: 'max6646'
+ Addresses scanned: I2C 0x4e
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497
+ * Maxim MAX6648
+ Prefix: 'max6646'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3500
+ * Maxim MAX6649
+ Prefix: 'max6646'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3497
+ * Maxim MAX6657
+ Prefix: 'max6657'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2578
+ * Maxim MAX6658
+ Prefix: 'max6657'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2578
+ * Maxim MAX6659
+ Prefix: 'max6659'
+ Addresses scanned: I2C 0x4c, 0x4d, 0x4e
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2578
+ * Maxim MAX6680
+ Prefix: 'max6680'
+ Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
+ 0x4c, 0x4d and 0x4e
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
+ * Maxim MAX6681
+ Prefix: 'max6680'
+ Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
+ 0x4c, 0x4d and 0x4e
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3370
+ * Maxim MAX6692
+ Prefix: 'max6646'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3500
+ * Maxim MAX6695
+ Prefix: 'max6695'
+ Addresses scanned: I2C 0x18
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/datasheet/index.mvp/id/4199
+ * Maxim MAX6696
+ Prefix: 'max6695'
+ Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b,
+ 0x4c, 0x4d and 0x4e
+ Datasheet: Publicly available at the Maxim website
+ http://www.maxim-ic.com/datasheet/index.mvp/id/4199
+ * Winbond/Nuvoton W83L771W/G
+ Prefix: 'w83l771'
+ Addresses scanned: I2C 0x4c
+ Datasheet: No longer available
+ * Winbond/Nuvoton W83L771AWG/ASG
+ Prefix: 'w83l771'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Not publicly available, can be requested from Nuvoton
+ * Philips/NXP SA56004X
+ Prefix: 'sa56004'
+ Addresses scanned: I2C 0x48 through 0x4F
+ Datasheet: Publicly available at NXP website
+ http://ics.nxp.com/products/interface/datasheet/sa56004x.pdf
+ * GMT G781
+ Prefix: 'g781'
+ Addresses scanned: I2C 0x4c, 0x4d
+ Datasheet: Not publicly available from GMT
+ * Texas Instruments TMP451
+ Prefix: 'tmp451'
+ Addresses scanned: I2C 0x4c
+ Datasheet: Publicly available at TI website
+ http://www.ti.com/litv/pdf/sbos686
+
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+
+Description
+-----------
+
+The LM90 is a digital temperature sensor. It senses its own temperature as
+well as the temperature of up to one external diode. It is compatible
+with many other devices, many of which are supported by this driver.
+
+Note that there is no easy way to differentiate between the MAX6657,
+MAX6658 and MAX6659 variants. The extra features of the MAX6659 are only
+supported by this driver if the chip is located at address 0x4d or 0x4e,
+or if the chip type is explicitly selected as max6659.
+The MAX6680 and MAX6681 only differ in their pinout, therefore they obviously
+can't (and don't need to) be distinguished.
+
+The specificity of this family of chipsets over the ADM1021/LM84
+family is that it features critical limits with hysteresis, and an
+increased resolution of the remote temperature measurement.
+
+The different chipsets of the family are not strictly identical, although
+very similar. For reference, here comes a non-exhaustive list of specific
+features:
+
+LM90:
+ * Filter and alert configuration register at 0xBF.
+ * ALERT is triggered by temperatures over critical limits.
+
+LM86 and LM89:
+ * Same as LM90
+ * Better external channel accuracy
+
+LM99:
+ * Same as LM89
+ * External temperature shifted by 16 degrees down
+
+ADM1032:
+ * Consecutive alert register at 0x22.
+ * Conversion averaging.
+ * Up to 64 conversions/s.
+ * ALERT is triggered by open remote sensor.
+ * SMBus PEC support for Write Byte and Receive Byte transactions.
+
+ADT7461, ADT7461A, NCT1008:
+ * Extended temperature range (breaks compatibility)
+ * Lower resolution for remote temperature
+
+MAX6657 and MAX6658:
+ * Better local resolution
+ * Remote sensor type selection
+
+MAX6659:
+ * Better local resolution
+ * Selectable address
+ * Second critical temperature limit
+ * Remote sensor type selection
+
+MAX6680 and MAX6681:
+ * Selectable address
+ * Remote sensor type selection
+
+MAX6695 and MAX6696:
+ * Better local resolution
+ * Selectable address (max6696)
+ * Second critical temperature limit
+ * Two remote sensors
+
+W83L771W/G
+ * The G variant is lead-free, otherwise similar to the W.
+ * Filter and alert configuration register at 0xBF
+ * Moving average (depending on conversion rate)
+
+W83L771AWG/ASG
+ * Successor of the W83L771W/G, same features.
+ * The AWG and ASG variants only differ in package format.
+ * Diode ideality factor configuration (remote sensor) at 0xE3
+
+SA56004X:
+ * Better local resolution
+
+All temperature values are given in degrees Celsius. Resolution
+is 1.0 degree for the local temperature, 0.125 degree for the remote
+temperature, except for the MAX6657, MAX6658 and MAX6659 which have a
+resolution of 0.125 degree for both temperatures.
+
+Each sensor has its own high and low limits, plus a critical limit.
+Additionally, there is a relative hysteresis value common to both critical
+values. To make life easier to user-space applications, two absolute values
+are exported, one for each channel, but these values are of course linked.
+Only the local hysteresis can be set from user-space, and the same delta
+applies to the remote hysteresis.
+
+The lm90 driver will not update its values more frequently than configured with
+the update_interval attribute; reading them more often will do no harm, but will
+return 'old' values.
+
+SMBus Alert Support
+-------------------
+
+This driver has basic support for SMBus alert. When an alert is received,
+the status register is read and the faulty temperature channel is logged.
+
+The Analog Devices chips (ADM1032, ADT7461 and ADT7461A) and ON
+Semiconductor chips (NCT1008) do not implement the SMBus alert protocol
+properly so additional care is needed: the ALERT output is disabled when
+an alert is received, and is re-enabled only when the alarm is gone.
+Otherwise the chip would block alerts from other chips in the bus as long
+as the alarm is active.
+
+PEC Support
+-----------
+
+The ADM1032 is the only chip of the family which supports PEC. It does
+not support PEC on all transactions though, so some care must be taken.
+
+When reading a register value, the PEC byte is computed and sent by the
+ADM1032 chip. However, in the case of a combined transaction (SMBus Read
+Byte), the ADM1032 computes the CRC value over only the second half of
+the message rather than its entirety, because it thinks the first half
+of the message belongs to a different transaction. As a result, the CRC
+value differs from what the SMBus master expects, and all reads fail.
+
+For this reason, the lm90 driver will enable PEC for the ADM1032 only if
+the bus supports the SMBus Send Byte and Receive Byte transaction types.
+These transactions will be used to read register values, instead of
+SMBus Read Byte, and PEC will work properly.
+
+Additionally, the ADM1032 doesn't support SMBus Send Byte with PEC.
+Instead, it will try to write the PEC value to the register (because the
+SMBus Send Byte transaction with PEC is similar to a Write Byte transaction
+without PEC), which is not what we want. Thus, PEC is explicitly disabled
+on SMBus Send Byte transactions in the lm90 driver.
+
+PEC on byte data transactions represents a significant increase in bandwidth
+usage (+33% for writes, +25% for reads) in normal conditions. With the need
+to use two SMBus transaction for reads, this overhead jumps to +50%. Worse,
+two transactions will typically mean twice as much delay waiting for
+transaction completion, effectively doubling the register cache refresh time.
+I guess reliability comes at a price, but it's quite expensive this time.
+
+So, as not everyone might enjoy the slowdown, PEC can be disabled through
+sysfs. Just write 0 to the "pec" file and PEC will be disabled. Write 1
+to that file to enable PEC again.
diff --git a/Documentation/hwmon/lm92 b/Documentation/hwmon/lm92
new file mode 100644
index 000000000..22f68ad03
--- /dev/null
+++ b/Documentation/hwmon/lm92
@@ -0,0 +1,37 @@
+Kernel driver lm92
+==================
+
+Supported chips:
+ * National Semiconductor LM92
+ Prefix: 'lm92'
+ Addresses scanned: I2C 0x48 - 0x4b
+ Datasheet: http://www.national.com/pf/LM/LM92.html
+ * National Semiconductor LM76
+ Prefix: 'lm92'
+ Addresses scanned: none, force parameter needed
+ Datasheet: http://www.national.com/pf/LM/LM76.html
+ * Maxim MAX6633/MAX6634/MAX6635
+ Prefix: 'lm92'
+ Addresses scanned: I2C 0x48 - 0x4b
+ MAX6633 with address in 0x40 - 0x47, 0x4c - 0x4f needs force parameter
+ and MAX6634 with address in 0x4c - 0x4f needs force parameter
+ Datasheet: http://www.maxim-ic.com/quick_view2.cfm/qv_pk/3074
+
+Authors:
+ Abraham van der Merwe <abraham@2d3d.co.za>
+ Jean Delvare <jdelvare@suse.de>
+
+
+Description
+-----------
+
+This driver implements support for the National Semiconductor LM92
+temperature sensor.
+
+Each LM92 temperature sensor supports a single temperature sensor. There are
+alarms for high, low, and critical thresholds. There's also an hysteresis to
+control the thresholds for resetting alarms.
+
+Support was added later for the LM76 and Maxim MAX6633/MAX6634/MAX6635,
+which are mostly compatible. They have not all been tested, so you
+may need to use the force parameter.
diff --git a/Documentation/hwmon/lm93 b/Documentation/hwmon/lm93
new file mode 100644
index 000000000..f3b2ad2ce
--- /dev/null
+++ b/Documentation/hwmon/lm93
@@ -0,0 +1,309 @@
+Kernel driver lm93
+==================
+
+Supported chips:
+ * National Semiconductor LM93
+ Prefix 'lm93'
+ Addresses scanned: I2C 0x2c-0x2e
+ Datasheet: http://www.national.com/ds.cgi/LM/LM93.pdf
+ * National Semiconductor LM94
+ Prefix 'lm94'
+ Addresses scanned: I2C 0x2c-0x2e
+ Datasheet: http://www.national.com/ds.cgi/LM/LM94.pdf
+
+Authors:
+ Mark M. Hoffman <mhoffman@lightlink.com>
+ Ported to 2.6 by Eric J. Bowersox <ericb@aspsys.com>
+ Adapted to 2.6.20 by Carsten Emde <ce@osadl.org>
+ Modified for mainline integration by Hans J. Koch <hjk@hansjkoch.de>
+
+Module Parameters
+-----------------
+
+* init: integer
+ Set to non-zero to force some initializations (default is 0).
+* disable_block: integer
+ A "0" allows SMBus block data transactions if the host supports them. A "1"
+ disables SMBus block data transactions. The default is 0.
+* vccp_limit_type: integer array (2)
+ Configures in7 and in8 limit type, where 0 means absolute and non-zero
+ means relative. "Relative" here refers to "Dynamic Vccp Monitoring using
+ VID" from the datasheet. It greatly simplifies the interface to allow
+ only one set of limits (absolute or relative) to be in operation at a
+ time (even though the hardware is capable of enabling both). There's
+ not a compelling use case for enabling both at once, anyway. The default
+ is "0,0".
+* vid_agtl: integer
+ A "0" configures the VID pins for V(ih) = 2.1V min, V(il) = 0.8V max.
+ A "1" configures the VID pins for V(ih) = 0.8V min, V(il) = 0.4V max.
+ (The latter setting is referred to as AGTL+ Compatible in the datasheet.)
+ I.e. this parameter controls the VID pin input thresholds; if your VID
+ inputs are not working, try changing this. The default value is "0".
+
+
+Hardware Description
+--------------------
+
+(from the datasheet)
+
+The LM93 hardware monitor has a two wire digital interface compatible with
+SMBus 2.0. Using an 8-bit ADC, the LM93 measures the temperature of two remote
+diode connected transistors as well as its own die and 16 power supply
+voltages. To set fan speed, the LM93 has two PWM outputs that are each
+controlled by up to four temperature zones. The fancontrol algorithm is lookup
+table based. The LM93 includes a digital filter that can be invoked to smooth
+temperature readings for better control of fan speed. The LM93 has four
+tachometer inputs to measure fan speed. Limit and status registers for all
+measured values are included. The LM93 builds upon the functionality of
+previous motherboard management ASICs and uses some of the LM85's features
+(i.e. smart tachometer mode). It also adds measurement and control support
+for dynamic Vccp monitoring and PROCHOT. It is designed to monitor a dual
+processor Xeon class motherboard with a minimum of external components.
+
+LM94 is also supported in LM93 compatible mode. Extra sensors and features of
+LM94 are not supported.
+
+
+User Interface
+--------------
+
+#PROCHOT:
+
+The LM93 can monitor two #PROCHOT signals. The results are found in the
+sysfs files prochot1, prochot2, prochot1_avg, prochot2_avg, prochot1_max,
+and prochot2_max. prochot1_max and prochot2_max contain the user limits
+for #PROCHOT1 and #PROCHOT2, respectively. prochot1 and prochot2 contain
+the current readings for the most recent complete time interval. The
+value of prochot1_avg and prochot2_avg is something like a 2 period
+exponential moving average (but not quite - check the datasheet). Note
+that this third value is calculated by the chip itself. All values range
+from 0-255 where 0 indicates no throttling, and 255 indicates > 99.6%.
+
+The monitoring intervals for the two #PROCHOT signals is also configurable.
+These intervals can be found in the sysfs files prochot1_interval and
+prochot2_interval. The values in these files specify the intervals for
+#P1_PROCHOT and #P2_PROCHOT, respectively. Selecting a value not in this
+list will cause the driver to use the next largest interval. The available
+intervals are (in seconds):
+
+#PROCHOT intervals: 0.73, 1.46, 2.9, 5.8, 11.7, 23.3, 46.6, 93.2, 186, 372
+
+It is possible to configure the LM93 to logically short the two #PROCHOT
+signals. I.e. when #P1_PROCHOT is asserted, the LM93 will automatically
+assert #P2_PROCHOT, and vice-versa. This mode is enabled by writing a
+non-zero integer to the sysfs file prochot_short.
+
+The LM93 can also override the #PROCHOT pins by driving a PWM signal onto
+one or both of them. When overridden, the signal has a period of 3.56 ms,
+a minimum pulse width of 5 clocks (at 22.5kHz => 6.25% duty cycle), and
+a maximum pulse width of 80 clocks (at 22.5kHz => 99.88% duty cycle).
+
+The sysfs files prochot1_override and prochot2_override contain boolean
+integers which enable or disable the override function for #P1_PROCHOT and
+#P2_PROCHOT, respectively. The sysfs file prochot_override_duty_cycle
+contains a value controlling the duty cycle for the PWM signal used when
+the override function is enabled. This value ranges from 0 to 15, with 0
+indicating minimum duty cycle and 15 indicating maximum.
+
+#VRD_HOT:
+
+The LM93 can monitor two #VRD_HOT signals. The results are found in the
+sysfs files vrdhot1 and vrdhot2. There is one value per file: a boolean for
+which 1 indicates #VRD_HOT is asserted and 0 indicates it is negated. These
+files are read-only.
+
+Smart Tach Mode:
+
+(from the datasheet)
+
+ If a fan is driven using a low-side drive PWM, the tachometer
+ output of the fan is corrupted. The LM93 includes smart tachometer
+ circuitry that allows an accurate tachometer reading to be
+ achieved despite the signal corruption. In smart tach mode all
+ four signals are measured within 4 seconds.
+
+Smart tach mode is enabled by the driver by writing 1 or 2 (associating the
+the fan tachometer with a pwm) to the sysfs file fan<n>_smart_tach. A zero
+will disable the function for that fan. Note that Smart tach mode cannot be
+enabled if the PWM output frequency is 22500 Hz (see below).
+
+Manual PWM:
+
+The LM93 has a fixed or override mode for the two PWM outputs (although, there
+are still some conditions that will override even this mode - see section
+15.10.6 of the datasheet for details.) The sysfs files pwm1_override
+and pwm2_override are used to enable this mode; each is a boolean integer
+where 0 disables and 1 enables the manual control mode. The sysfs files pwm1
+and pwm2 are used to set the manual duty cycle; each is an integer (0-255)
+where 0 is 0% duty cycle, and 255 is 100%. Note that the duty cycle values
+are constrained by the hardware. Selecting a value which is not available
+will cause the driver to use the next largest value. Also note: when manual
+PWM mode is disabled, the value of pwm1 and pwm2 indicates the current duty
+cycle chosen by the h/w.
+
+PWM Output Frequency:
+
+The LM93 supports several different frequencies for the PWM output channels.
+The sysfs files pwm1_freq and pwm2_freq are used to select the frequency. The
+frequency values are constrained by the hardware. Selecting a value which is
+not available will cause the driver to use the next largest value. Also note
+that this parameter has implications for the Smart Tach Mode (see above).
+
+PWM Output Frequencies (in Hz): 12, 36, 48, 60, 72, 84, 96, 22500 (default)
+
+Automatic PWM:
+
+The LM93 is capable of complex automatic fan control, with many different
+points of configuration. To start, each PWM output can be bound to any
+combination of eight control sources. The final PWM is the largest of all
+individual control sources to which the PWM output is bound.
+
+The eight control sources are: temp1-temp4 (aka "zones" in the datasheet),
+#PROCHOT 1 & 2, and #VRDHOT 1 & 2. The bindings are expressed as a bitmask
+in the sysfs files pwm<n>_auto_channels, where a "1" enables the binding, and
+a "0" disables it. The h/w default is 0x0f (all temperatures bound).
+
+ 0x01 - Temp 1
+ 0x02 - Temp 2
+ 0x04 - Temp 3
+ 0x08 - Temp 4
+ 0x10 - #PROCHOT 1
+ 0x20 - #PROCHOT 2
+ 0x40 - #VRDHOT 1
+ 0x80 - #VRDHOT 2
+
+The function y = f(x) takes a source temperature x to a PWM output y. This
+function of the LM93 is derived from a base temperature and a table of 12
+temperature offsets. The base temperature is expressed in degrees C in the
+sysfs files temp<n>_auto_base. The offsets are expressed in cumulative
+degrees C, with the value of offset <i> for temperature value <n> being
+contained in the file temp<n>_auto_offset<i>. E.g. if the base temperature
+is 40C:
+
+ offset # temp<n>_auto_offset<i> range pwm
+ 1 0 - 25.00%
+ 2 0 - 28.57%
+ 3 1 40C - 41C 32.14%
+ 4 1 41C - 42C 35.71%
+ 5 2 42C - 44C 39.29%
+ 6 2 44C - 46C 42.86%
+ 7 2 48C - 50C 46.43%
+ 8 2 50C - 52C 50.00%
+ 9 2 52C - 54C 53.57%
+ 10 2 54C - 56C 57.14%
+ 11 2 56C - 58C 71.43%
+ 12 2 58C - 60C 85.71%
+ > 60C 100.00%
+
+Valid offsets are in the range 0C <= x <= 7.5C in 0.5C increments.
+
+There is an independent base temperature for each temperature channel. Note,
+however, there are only two tables of offsets: one each for temp[12] and
+temp[34]. Therefore, any change to e.g. temp1_auto_offset<i> will also
+affect temp2_auto_offset<i>.
+
+The LM93 can also apply hysteresis to the offset table, to prevent unwanted
+oscillation between two steps in the offsets table. These values are found in
+the sysfs files temp<n>_auto_offset_hyst. The value in this file has the
+same representation as in temp<n>_auto_offset<i>.
+
+If a temperature reading falls below the base value for that channel, the LM93
+will use the minimum PWM value. These values are found in the sysfs files
+temp<n>_auto_pwm_min. Note, there are only two minimums: one each for temp[12]
+and temp[34]. Therefore, any change to e.g. temp1_auto_pwm_min will also
+affect temp2_auto_pwm_min.
+
+PWM Spin-Up Cycle:
+
+A spin-up cycle occurs when a PWM output is commanded from 0% duty cycle to
+some value > 0%. The LM93 supports a minimum duty cycle during spin-up. These
+values are found in the sysfs files pwm<n>_auto_spinup_min. The value in this
+file has the same representation as other PWM duty cycle values. The
+duration of the spin-up cycle is also configurable. These values are found in
+the sysfs files pwm<n>_auto_spinup_time. The value in this file is
+the spin-up time in seconds. The available spin-up times are constrained by
+the hardware. Selecting a value which is not available will cause the driver
+to use the next largest value.
+
+Spin-up Durations: 0 (disabled, h/w default), 0.1, 0.25, 0.4, 0.7, 1.0,
+ 2.0, 4.0
+
+#PROCHOT and #VRDHOT PWM Ramping:
+
+If the #PROCHOT or #VRDHOT signals are asserted while bound to a PWM output
+channel, the LM93 will ramp the PWM output up to 100% duty cycle in discrete
+steps. The duration of each step is configurable. There are two files, with
+one value each in seconds: pwm_auto_prochot_ramp and pwm_auto_vrdhot_ramp.
+The available ramp times are constrained by the hardware. Selecting a value
+which is not available will cause the driver to use the next largest value.
+
+Ramp Times: 0 (disabled, h/w default) to 0.75 in 0.05 second intervals
+
+Fan Boost:
+
+For each temperature channel, there is a boost temperature: if the channel
+exceeds this limit, the LM93 will immediately drive both PWM outputs to 100%.
+This limit is expressed in degrees C in the sysfs files temp<n>_auto_boost.
+There is also a hysteresis temperature for this function: after the boost
+limit is reached, the temperature channel must drop below this value before
+the boost function is disabled. This temperature is also expressed in degrees
+C in the sysfs files temp<n>_auto_boost_hyst.
+
+GPIO Pins:
+
+The LM93 can monitor the logic level of four dedicated GPIO pins as well as the
+four tach input pins. GPIO0-GPIO3 correspond to (fan) tach 1-4, respectively.
+All eight GPIOs are read by reading the bitmask in the sysfs file gpio. The
+LSB is GPIO0, and the MSB is GPIO7.
+
+
+LM93 Unique sysfs Files
+-----------------------
+
+ file description
+ -------------------------------------------------------------
+
+ prochot<n> current #PROCHOT %
+
+ prochot<n>_avg moving average #PROCHOT %
+
+ prochot<n>_max limit #PROCHOT %
+
+ prochot_short enable or disable logical #PROCHOT pin short
+
+ prochot<n>_override force #PROCHOT assertion as PWM
+
+ prochot_override_duty_cycle
+ duty cycle for the PWM signal used when
+ #PROCHOT is overridden
+
+ prochot<n>_interval #PROCHOT PWM sampling interval
+
+ vrdhot<n> 0 means negated, 1 means asserted
+
+ fan<n>_smart_tach enable or disable smart tach mode
+
+ pwm<n>_auto_channels select control sources for PWM outputs
+
+ pwm<n>_auto_spinup_min minimum duty cycle during spin-up
+
+ pwm<n>_auto_spinup_time duration of spin-up
+
+ pwm_auto_prochot_ramp ramp time per step when #PROCHOT asserted
+
+ pwm_auto_vrdhot_ramp ramp time per step when #VRDHOT asserted
+
+ temp<n>_auto_base temperature channel base
+
+ temp<n>_auto_offset[1-12]
+ temperature channel offsets
+
+ temp<n>_auto_offset_hyst
+ temperature channel offset hysteresis
+
+ temp<n>_auto_boost temperature channel boost (PWMs to 100%) limit
+
+ temp<n>_auto_boost_hyst temperature channel boost hysteresis
+
+ gpio input state of 8 GPIO pins; read-only
+
diff --git a/Documentation/hwmon/lm95234 b/Documentation/hwmon/lm95234
new file mode 100644
index 000000000..32b777ef2
--- /dev/null
+++ b/Documentation/hwmon/lm95234
@@ -0,0 +1,41 @@
+Kernel driver lm95234
+=====================
+
+Supported chips:
+ * National Semiconductor / Texas Instruments LM95233
+ Addresses scanned: I2C 0x18, 0x2a, 0x2b
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/product/lm95233
+ * National Semiconductor / Texas Instruments LM95234
+ Addresses scanned: I2C 0x18, 0x4d, 0x4e
+ Datasheet: Publicly available at the Texas Instruments website
+ http://www.ti.com/product/lm95234
+
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+LM95233 and LM95234 are 11-bit digital temperature sensors with a 2-wire
+System Management Bus (SMBus) interface and TrueTherm technology
+that can very accurately monitor the temperature of two (LM95233)
+or four (LM95234) remote diodes as well as its own temperature.
+The remote diodes can be external devices such as microprocessors,
+graphics processors or diode-connected 2N3904s. The chip's TruTherm
+beta compensation technology allows sensing of 90 nm or 65 nm process
+thermal diodes accurately.
+
+All temperature values are given in millidegrees Celsius. Temperature
+is provided within a range of -127 to +255 degrees (+127.875 degrees for
+the internal sensor). Resolution depends on temperature input and range.
+
+Each sensor has its own maximum limit, but the hysteresis is common to all
+channels. The hysteresis is configurable with the tem1_max_hyst attribute and
+affects the hysteresis on all channels. The first two external sensors also
+have a critical limit.
+
+The lm95234 driver can change its update interval to a fixed set of values.
+It will round up to the next selectable interval. See the datasheet for exact
+values. Reading sensor values more often will do no harm, but will return
+'old' values.
diff --git a/Documentation/hwmon/lm95245 b/Documentation/hwmon/lm95245
new file mode 100644
index 000000000..d755901f5
--- /dev/null
+++ b/Documentation/hwmon/lm95245
@@ -0,0 +1,41 @@
+Kernel driver lm95245
+==================
+
+Supported chips:
+ * TI LM95235
+ Addresses scanned: I2C 0x18, 0x29, 0x4c
+ Datasheet: Publicly available at the TI website
+ http://www.ti.com/lit/ds/symlink/lm95235.pdf
+ * TI / National Semiconductor LM95245
+ Addresses scanned: I2C 0x18, 0x19, 0x29, 0x4c, 0x4d
+ Datasheet: Publicly available at the TI website
+ http://www.ti.com/lit/ds/symlink/lm95245.pdf
+
+
+Author: Alexander Stein <alexander.stein@systec-electronic.com>
+
+Description
+-----------
+
+LM95235 and LM95245 are 11-bit digital temperature sensors with a 2-wire System
+Management Bus (SMBus) interface and TruTherm technology that can monitor
+the temperature of a remote diode as well as its own temperature.
+The chips can be used to very accurately monitor the temperature of
+external devices such as microprocessors.
+
+All temperature values are given in millidegrees Celsius. Local temperature
+is given within a range of -127 to +127.875 degrees. Remote temperatures are
+given within a range of -127 to +255 degrees. Resolution depends on
+temperature input and range.
+
+Each sensor has its own critical limit. Additionally, there is a relative
+hysteresis value common to both critical limits. To make life easier to
+user-space applications, two absolute values are exported, one for each
+channel, but these values are of course linked. Only the local hysteresis
+can be set from user-space, and the same delta applies to the remote
+hysteresis.
+
+The lm95245 driver can change its update interval to a fixed set of values.
+It will round up to the next selectable interval. See the datasheet for exact
+values. Reading sensor values more often will do no harm, but will return
+'old' values.
diff --git a/Documentation/hwmon/ltc2945 b/Documentation/hwmon/ltc2945
new file mode 100644
index 000000000..f8d0f7f19
--- /dev/null
+++ b/Documentation/hwmon/ltc2945
@@ -0,0 +1,84 @@
+Kernel driver ltc2945
+=====================
+
+Supported chips:
+ * Linear Technology LTC2945
+ Prefix: 'ltc2945'
+ Addresses scanned: -
+ Datasheet:
+ http://cds.linear.com/docs/en/datasheet/2945fa.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+The LTC2945 is a rail-to-rail system monitor that measures current, voltage,
+and power consumption.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC2945 devices, since there is no register
+which can be safely used to identify the chip. You will have to instantiate
+the devices explicitly.
+
+Example: the following will load the driver for an LTC2945 at address 0x10
+on I2C bus #1:
+$ modprobe ltc2945
+$ echo ltc2945 0x10 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs entries
+-------------
+
+Voltage readings provided by this driver are reported as obtained from the ADC
+registers. If a set of voltage divider resistors is installed, calculate the
+real voltage by multiplying the reported value with (R1+R2)/R2, where R1 is the
+value of the divider resistor against the measured voltage and R2 is the value
+of the divider resistor against Ground.
+
+Current reading provided by this driver is reported as obtained from the ADC
+Current Sense register. The reported value assumes that a 1 mOhm sense resistor
+is installed. If a different sense resistor is installed, calculate the real
+current by dividing the reported value by the sense resistor value in mOhm.
+
+in1_input VIN voltage (mV). Voltage is measured either at
+ SENSE+ or VDD pin depending on chip configuration.
+in1_min Undervoltage threshold
+in1_max Overvoltage threshold
+in1_lowest Lowest measured voltage
+in1_highest Highest measured voltage
+in1_reset_history Write 1 to reset in1 history
+in1_min_alarm Undervoltage alarm
+in1_max_alarm Overvoltage alarm
+
+in2_input ADIN voltage (mV)
+in2_min Undervoltage threshold
+in2_max Overvoltage threshold
+in2_lowest Lowest measured voltage
+in2_highest Highest measured voltage
+in2_reset_history Write 1 to reset in2 history
+in2_min_alarm Undervoltage alarm
+in2_max_alarm Overvoltage alarm
+
+curr1_input SENSE current (mA)
+curr1_min Undercurrent threshold
+curr1_max Overcurrent threshold
+curr1_lowest Lowest measured current
+curr1_highest Highest measured current
+curr1_reset_history Write 1 to reset curr1 history
+curr1_min_alarm Undercurrent alarm
+curr1_max_alarm Overcurrent alarm
+
+power1_input Power (in uW). Power is calculated based on SENSE+/VDD
+ voltage or ADIN voltage depending on chip configuration.
+power1_min Low lower threshold
+power1_max High power threshold
+power1_input_lowest Historical minimum power use
+power1_input_highest Historical maximum power use
+power1_reset_history Write 1 to reset power1 history
+power1_min_alarm Low power alarm
+power1_max_alarm High power alarm
diff --git a/Documentation/hwmon/ltc2978 b/Documentation/hwmon/ltc2978
new file mode 100644
index 000000000..686c078bb
--- /dev/null
+++ b/Documentation/hwmon/ltc2978
@@ -0,0 +1,157 @@
+Kernel driver ltc2978
+=====================
+
+Supported chips:
+ * Linear Technology LTC2974
+ Prefix: 'ltc2974'
+ Addresses scanned: -
+ Datasheet: http://www.linear.com/product/ltc2974
+ * Linear Technology LTC2977
+ Prefix: 'ltc2977'
+ Addresses scanned: -
+ Datasheet: http://www.linear.com/product/ltc2977
+ * Linear Technology LTC2978, LTC2978A
+ Prefix: 'ltc2978'
+ Addresses scanned: -
+ Datasheet: http://www.linear.com/product/ltc2978
+ http://www.linear.com/product/ltc2978a
+ * Linear Technology LTC3880
+ Prefix: 'ltc3880'
+ Addresses scanned: -
+ Datasheet: http://www.linear.com/product/ltc3880
+ * Linear Technology LTC3883
+ Prefix: 'ltc3883'
+ Addresses scanned: -
+ Datasheet: http://www.linear.com/product/ltc3883
+ * Linear Technology LTM4676
+ Prefix: 'ltm4676'
+ Addresses scanned: -
+ Datasheet: http://www.linear.com/product/ltm4676
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+LTC2974 is a quad digital power supply manager. LTC2978 is an octal power supply
+monitor. LTC2977 is a pin compatible replacement for LTC2978. LTC3880 is a dual
+output poly-phase step-down DC/DC controller. LTC3883 is a single phase
+step-down DC/DC controller. LTM4676 is a dual 13A or single 26A uModule
+regulator.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for PMBus devices. You will have to instantiate
+devices explicitly.
+
+Example: the following commands will load the driver for an LTC2978 at address
+0x60 on I2C bus #1:
+
+# modprobe ltc2978
+# echo ltc2978 0x60 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs attributes
+----------------
+
+in1_label "vin"
+in1_input Measured input voltage.
+in1_min Minimum input voltage.
+in1_max Maximum input voltage.
+ LTC2974, LTC2977, and LTC2978 only.
+in1_lcrit Critical minimum input voltage.
+ LTC2974, LTC2977, and LTC2978 only.
+in1_crit Critical maximum input voltage.
+in1_min_alarm Input voltage low alarm.
+in1_max_alarm Input voltage high alarm.
+ LTC2974, LTC2977, and LTC2978 only.
+in1_lcrit_alarm Input voltage critical low alarm.
+ LTC2974, LTC2977, and LTC2978 only.
+in1_crit_alarm Input voltage critical high alarm.
+in1_lowest Lowest input voltage.
+ LTC2974, LTC2977, and LTC2978 only.
+in1_highest Highest input voltage.
+in1_reset_history Reset input voltage history.
+
+in[N]_label "vout[1-8]".
+ LTC2974: N=2-5
+ LTC2977: N=2-9
+ LTC2978: N=2-9
+ LTC3880, LTM4676: N=2-3
+ LTC3883: N=2
+in[N]_input Measured output voltage.
+in[N]_min Minimum output voltage.
+in[N]_max Maximum output voltage.
+in[N]_lcrit Critical minimum output voltage.
+in[N]_crit Critical maximum output voltage.
+in[N]_min_alarm Output voltage low alarm.
+in[N]_max_alarm Output voltage high alarm.
+in[N]_lcrit_alarm Output voltage critical low alarm.
+in[N]_crit_alarm Output voltage critical high alarm.
+in[N]_lowest Lowest output voltage. LTC2974 and LTC2978 only.
+in[N]_highest Highest output voltage.
+in[N]_reset_history Reset output voltage history.
+
+temp[N]_input Measured temperature.
+ On LTC2974, temp[1-4] report external temperatures,
+ and temp5 reports the chip temperature.
+ On LTC2977 and LTC2978, only one temperature measurement
+ is supported and reports the chip temperature.
+ On LTC3880 and LTM4676, temp1 and temp2 report external
+ temperatures, and temp3 reports the chip temperature.
+ On LTC3883, temp1 reports an external temperature,
+ and temp2 reports the chip temperature.
+temp[N]_min Mimimum temperature. LTC2974, LCT2977, and LTC2978 only.
+temp[N]_max Maximum temperature.
+temp[N]_lcrit Critical low temperature.
+temp[N]_crit Critical high temperature.
+temp[N]_min_alarm Temperature low alarm.
+ LTC2974, LTC2977, and LTC2978 only.
+temp[N]_max_alarm Temperature high alarm.
+temp[N]_lcrit_alarm Temperature critical low alarm.
+temp[N]_crit_alarm Temperature critical high alarm.
+temp[N]_lowest Lowest measured temperature.
+ LTC2974, LTC2977, and LTC2978 only.
+ Not supported for chip temperature sensor on LTC2974.
+temp[N]_highest Highest measured temperature. Not supported for chip
+ temperature sensor on LTC2974.
+temp[N]_reset_history Reset temperature history. Not supported for chip
+ temperature sensor on LTC2974.
+
+power1_label "pin". LTC3883 only.
+power1_input Measured input power.
+
+power[N]_label "pout[1-4]".
+ LTC2974: N=1-4
+ LTC2977: Not supported
+ LTC2978: Not supported
+ LTC3880, LTM4676: N=1-2
+ LTC3883: N=2
+power[N]_input Measured output power.
+
+curr1_label "iin". LTC3880, LTC3883, and LTM4676 only.
+curr1_input Measured input current.
+curr1_max Maximum input current.
+curr1_max_alarm Input current high alarm.
+curr1_highest Highest input current. LTC3883 only.
+curr1_reset_history Reset input current history. LTC3883 only.
+
+curr[N]_label "iout[1-4]".
+ LTC2974: N=1-4
+ LTC2977: not supported
+ LTC2978: not supported
+ LTC3880, LTM4676: N=2-3
+ LTC3883: N=2
+curr[N]_input Measured output current.
+curr[N]_max Maximum output current.
+curr[N]_crit Critical high output current.
+curr[N]_lcrit Critical low output current. LTC2974 only.
+curr[N]_max_alarm Output current high alarm.
+curr[N]_crit_alarm Output current critical high alarm.
+curr[N]_lcrit_alarm Output current critical low alarm. LTC2974 only.
+curr[N]_lowest Lowest output current. LTC2974 only.
+curr[N]_highest Highest output current.
+curr[N]_reset_history Reset output current history.
diff --git a/Documentation/hwmon/ltc4151 b/Documentation/hwmon/ltc4151
new file mode 100644
index 000000000..43c667e66
--- /dev/null
+++ b/Documentation/hwmon/ltc4151
@@ -0,0 +1,47 @@
+Kernel driver ltc4151
+=====================
+
+Supported chips:
+ * Linear Technology LTC4151
+ Prefix: 'ltc4151'
+ Addresses scanned: -
+ Datasheet:
+ http://www.linear.com/docs/Datasheet/4151fc.pdf
+
+Author: Per Dalen <per.dalen@appeartv.com>
+
+
+Description
+-----------
+
+The LTC4151 is a High Voltage I2C Current and Voltage Monitor.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC4151 devices, since there is no register
+which can be safely used to identify the chip. You will have to instantiate
+the devices explicitly.
+
+Example: the following will load the driver for an LTC4151 at address 0x6f
+on I2C bus #0:
+# modprobe ltc4151
+# echo ltc4151 0x6f > /sys/bus/i2c/devices/i2c-0/new_device
+
+
+Sysfs entries
+-------------
+
+Voltage readings provided by this driver are reported as obtained from the ADIN
+and VIN registers.
+
+Current reading provided by this driver is reported as obtained from the Current
+Sense register. The reported value assumes that a 1 mOhm sense resistor is
+installed.
+
+in1_input VDIN voltage (mV)
+
+in2_input ADIN voltage (mV)
+
+curr1_input SENSE current (mA)
diff --git a/Documentation/hwmon/ltc4215 b/Documentation/hwmon/ltc4215
new file mode 100644
index 000000000..c196a1846
--- /dev/null
+++ b/Documentation/hwmon/ltc4215
@@ -0,0 +1,51 @@
+Kernel driver ltc4215
+=====================
+
+Supported chips:
+ * Linear Technology LTC4215
+ Prefix: 'ltc4215'
+ Addresses scanned: 0x44
+ Datasheet:
+ http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1163,P17572,D12697
+
+Author: Ira W. Snyder <iws@ovro.caltech.edu>
+
+
+Description
+-----------
+
+The LTC4215 controller allows a board to be safely inserted and removed
+from a live backplane.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC4215 devices, due to the fact that some
+of the possible addresses are unfriendly to probing. You will have to
+instantiate the devices explicitly.
+
+Example: the following will load the driver for an LTC4215 at address 0x44
+on I2C bus #0:
+$ modprobe ltc4215
+$ echo ltc4215 0x44 > /sys/bus/i2c/devices/i2c-0/new_device
+
+
+Sysfs entries
+-------------
+
+The LTC4215 has built-in limits for overvoltage, undervoltage, and
+undercurrent warnings. This makes it very likely that the reference
+circuit will be used.
+
+in1_input input voltage
+in2_input output voltage
+
+in1_min_alarm input undervoltage alarm
+in1_max_alarm input overvoltage alarm
+
+curr1_input current
+curr1_max_alarm overcurrent alarm
+
+power1_input power usage
+power1_alarm power bad alarm
diff --git a/Documentation/hwmon/ltc4245 b/Documentation/hwmon/ltc4245
new file mode 100644
index 000000000..b478b0864
--- /dev/null
+++ b/Documentation/hwmon/ltc4245
@@ -0,0 +1,102 @@
+Kernel driver ltc4245
+=====================
+
+Supported chips:
+ * Linear Technology LTC4245
+ Prefix: 'ltc4245'
+ Addresses scanned: 0x20-0x3f
+ Datasheet:
+ http://www.linear.com/pc/downloadDocument.do?navId=H0,C1,C1003,C1006,C1140,P19392,D13517
+
+Author: Ira W. Snyder <iws@ovro.caltech.edu>
+
+
+Description
+-----------
+
+The LTC4245 controller allows a board to be safely inserted and removed
+from a live backplane in multiple supply systems such as CompactPCI and
+PCI Express.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC4245 devices, due to the fact that some
+of the possible addresses are unfriendly to probing. You will have to
+instantiate the devices explicitly.
+
+Example: the following will load the driver for an LTC4245 at address 0x23
+on I2C bus #1:
+$ modprobe ltc4245
+$ echo ltc4245 0x23 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs entries
+-------------
+
+The LTC4245 has built-in limits for over and under current warnings. This
+makes it very likely that the reference circuit will be used.
+
+This driver uses the values in the datasheet to change the register values
+into the values specified in the sysfs-interface document. The current readings
+rely on the sense resistors listed in Table 2: "Sense Resistor Values".
+
+in1_input 12v input voltage (mV)
+in2_input 5v input voltage (mV)
+in3_input 3v input voltage (mV)
+in4_input Vee (-12v) input voltage (mV)
+
+in1_min_alarm 12v input undervoltage alarm
+in2_min_alarm 5v input undervoltage alarm
+in3_min_alarm 3v input undervoltage alarm
+in4_min_alarm Vee (-12v) input undervoltage alarm
+
+curr1_input 12v current (mA)
+curr2_input 5v current (mA)
+curr3_input 3v current (mA)
+curr4_input Vee (-12v) current (mA)
+
+curr1_max_alarm 12v overcurrent alarm
+curr2_max_alarm 5v overcurrent alarm
+curr3_max_alarm 3v overcurrent alarm
+curr4_max_alarm Vee (-12v) overcurrent alarm
+
+in5_input 12v output voltage (mV)
+in6_input 5v output voltage (mV)
+in7_input 3v output voltage (mV)
+in8_input Vee (-12v) output voltage (mV)
+
+in5_min_alarm 12v output undervoltage alarm
+in6_min_alarm 5v output undervoltage alarm
+in7_min_alarm 3v output undervoltage alarm
+in8_min_alarm Vee (-12v) output undervoltage alarm
+
+in9_input GPIO voltage data (see note 1)
+in10_input GPIO voltage data (see note 1)
+in11_input GPIO voltage data (see note 1)
+
+power1_input 12v power usage (mW)
+power2_input 5v power usage (mW)
+power3_input 3v power usage (mW)
+power4_input Vee (-12v) power usage (mW)
+
+
+Note 1
+------
+
+If you have NOT configured the driver to sample all GPIO pins as analog
+voltages, then the in10_input and in11_input sysfs attributes will not be
+created. The driver will sample the GPIO pin that is currently connected to the
+ADC as an analog voltage, and report the value in in9_input.
+
+If you have configured the driver to sample all GPIO pins as analog voltages,
+then they will be sampled in round-robin fashion. If userspace reads too
+slowly, -EAGAIN will be returned when you read the sysfs attribute containing
+the sensor reading.
+
+The LTC4245 chip can be configured to sample all GPIO pins with two methods:
+1) platform data -- see include/linux/i2c/ltc4245.h
+2) OF device tree -- add the "ltc4245,use-extra-gpios" property to each chip
+
+The default mode of operation is to sample a single GPIO pin.
diff --git a/Documentation/hwmon/ltc4260 b/Documentation/hwmon/ltc4260
new file mode 100644
index 000000000..c4ff4ad99
--- /dev/null
+++ b/Documentation/hwmon/ltc4260
@@ -0,0 +1,56 @@
+Kernel driver ltc4260
+=====================
+
+Supported chips:
+ * Linear Technology LTC4260
+ Prefix: 'ltc4260'
+ Addresses scanned: -
+ Datasheet:
+ http://cds.linear.com/docs/en/datasheet/4260fc.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+The LTC4260 Hot Swap controller allows a board to be safely inserted
+and removed from a live backplane.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC4260 devices, since there is no register
+which can be safely used to identify the chip. You will have to instantiate
+the devices explicitly.
+
+Example: the following will load the driver for an LTC4260 at address 0x10
+on I2C bus #1:
+$ modprobe ltc4260
+$ echo ltc4260 0x10 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs entries
+-------------
+
+Voltage readings provided by this driver are reported as obtained from the ADC
+registers. If a set of voltage divider resistors is installed, calculate the
+real voltage by multiplying the reported value with (R1+R2)/R2, where R1 is the
+value of the divider resistor against the measured voltage and R2 is the value
+of the divider resistor against Ground.
+
+Current reading provided by this driver is reported as obtained from the ADC
+Current Sense register. The reported value assumes that a 1 mOhm sense resistor
+is installed. If a different sense resistor is installed, calculate the real
+current by dividing the reported value by the sense resistor value in mOhm.
+
+in1_input SOURCE voltage (mV)
+in1_min_alarm Undervoltage alarm
+in1_max_alarm Overvoltage alarm
+
+in2_input ADIN voltage (mV)
+in2_alarm Power bad alarm
+
+curr1_input SENSE current (mA)
+curr1_alarm SENSE overcurrent alarm
diff --git a/Documentation/hwmon/ltc4261 b/Documentation/hwmon/ltc4261
new file mode 100644
index 000000000..9378a75c6
--- /dev/null
+++ b/Documentation/hwmon/ltc4261
@@ -0,0 +1,63 @@
+Kernel driver ltc4261
+=====================
+
+Supported chips:
+ * Linear Technology LTC4261
+ Prefix: 'ltc4261'
+ Addresses scanned: -
+ Datasheet:
+ http://cds.linear.com/docs/Datasheet/42612fb.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+The LTC4261/LTC4261-2 negative voltage Hot Swap controllers allow a board
+to be safely inserted and removed from a live backplane.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for LTC4261 devices, since there is no register
+which can be safely used to identify the chip. You will have to instantiate
+the devices explicitly.
+
+Example: the following will load the driver for an LTC4261 at address 0x10
+on I2C bus #1:
+$ modprobe ltc4261
+$ echo ltc4261 0x10 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs entries
+-------------
+
+Voltage readings provided by this driver are reported as obtained from the ADC
+registers. If a set of voltage divider resistors is installed, calculate the
+real voltage by multiplying the reported value with (R1+R2)/R2, where R1 is the
+value of the divider resistor against the measured voltage and R2 is the value
+of the divider resistor against Ground.
+
+Current reading provided by this driver is reported as obtained from the ADC
+Current Sense register. The reported value assumes that a 1 mOhm sense resistor
+is installed. If a different sense resistor is installed, calculate the real
+current by dividing the reported value by the sense resistor value in mOhm.
+
+The chip has two voltage sensors, but only one set of voltage alarm status bits.
+In many many designs, those alarms are associated with the ADIN2 sensor, due to
+the proximity of the ADIN2 pin to the OV pin. ADIN2 is, however, not available
+on all chip variants. To ensure that the alarm condition is reported to the user,
+report it with both voltage sensors.
+
+in1_input ADIN2 voltage (mV)
+in1_min_alarm ADIN/ADIN2 Undervoltage alarm
+in1_max_alarm ADIN/ADIN2 Overvoltage alarm
+
+in2_input ADIN voltage (mV)
+in2_min_alarm ADIN/ADIN2 Undervoltage alarm
+in2_max_alarm ADIN/ADIN2 Overvoltage alarm
+
+curr1_input SENSE current (mA)
+curr1_alarm SENSE overcurrent alarm
diff --git a/Documentation/hwmon/max16064 b/Documentation/hwmon/max16064
new file mode 100644
index 000000000..d59cc7829
--- /dev/null
+++ b/Documentation/hwmon/max16064
@@ -0,0 +1,66 @@
+Kernel driver max16064
+======================
+
+Supported chips:
+ * Maxim MAX16064
+ Prefix: 'max16064'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX16064.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Maxim MAX16064 Quad Power-Supply
+Controller with Active-Voltage Output Control and PMBus Interface.
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in[1-4]_label "vout[1-4]"
+in[1-4]_input Measured voltage. From READ_VOUT register.
+in[1-4]_min Minimum Voltage. From VOUT_UV_WARN_LIMIT register.
+in[1-4]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in[1-4]_lcrit Critical minimum Voltage. VOUT_UV_FAULT_LIMIT register.
+in[1-4]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in[1-4]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in[1-4]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in[1-4]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in[1-4]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+in[1-4]_highest Historical maximum voltage.
+in[1-4]_reset_history Write any value to reset history.
+
+temp1_input Measured temperature. From READ_TEMPERATURE_1 register.
+temp1_max Maximum temperature. From OT_WARN_LIMIT register.
+temp1_crit Critical high temperature. From OT_FAULT_LIMIT register.
+temp1_max_alarm Chip temperature high alarm. Set by comparing
+ READ_TEMPERATURE_1 with OT_WARN_LIMIT if TEMP_OT_WARNING
+ status is set.
+temp1_crit_alarm Chip temperature critical high alarm. Set by comparing
+ READ_TEMPERATURE_1 with OT_FAULT_LIMIT if TEMP_OT_FAULT
+ status is set.
+temp1_highest Historical maximum temperature.
+temp1_reset_history Write any value to reset history.
diff --git a/Documentation/hwmon/max16065 b/Documentation/hwmon/max16065
new file mode 100644
index 000000000..208a29e43
--- /dev/null
+++ b/Documentation/hwmon/max16065
@@ -0,0 +1,105 @@
+Kernel driver max16065
+======================
+
+Supported chips:
+ * Maxim MAX16065, MAX16066
+ Prefixes: 'max16065', 'max16066'
+ Addresses scanned: -
+ Datasheet:
+ http://datasheets.maxim-ic.com/en/ds/MAX16065-MAX16066.pdf
+ * Maxim MAX16067
+ Prefix: 'max16067'
+ Addresses scanned: -
+ Datasheet:
+ http://datasheets.maxim-ic.com/en/ds/MAX16067.pdf
+ * Maxim MAX16068
+ Prefix: 'max16068'
+ Addresses scanned: -
+ Datasheet:
+ http://datasheets.maxim-ic.com/en/ds/MAX16068.pdf
+ * Maxim MAX16070/MAX16071
+ Prefixes: 'max16070', 'max16071'
+ Addresses scanned: -
+ Datasheet:
+ http://datasheets.maxim-ic.com/en/ds/MAX16070-MAX16071.pdf
+
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+[From datasheets] The MAX16065/MAX16066 flash-configurable system managers
+monitor and sequence multiple system voltages. The MAX16065/MAX16066 can also
+accurately monitor (+/-2.5%) one current channel using a dedicated high-side
+current-sense amplifier. The MAX16065 manages up to twelve system voltages
+simultaneously, and the MAX16066 manages up to eight supply voltages.
+
+The MAX16067 flash-configurable system manager monitors and sequences multiple
+system voltages. The MAX16067 manages up to six system voltages simultaneously.
+
+The MAX16068 flash-configurable system manager monitors and manages up to six
+system voltages simultaneously.
+
+The MAX16070/MAX16071 flash-configurable system monitors supervise multiple
+system voltages. The MAX16070/MAX16071 can also accurately monitor (+/-2.5%)
+one current channel using a dedicated high-side current-sense amplifier. The
+MAX16070 monitors up to twelve system voltages simultaneously, and the MAX16071
+monitors up to eight supply voltages.
+
+Each monitored channel has its own low and high critical limits. MAX16065,
+MAX16066, MAX16070, and MAX16071 support an additional limit which is
+configurable as either low or high secondary limit. MAX16065, MAX16066,
+MAX16070, and MAX16071 also support supply current monitoring.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for devices, since there is no register which
+can be safely used to identify the chip. You will have to instantiate
+the devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+WARNING: Do not access chip registers using the i2cdump command, and do not use
+any of the i2ctools commands on a command register (0xa5 to 0xac). The chips
+supported by this driver interpret any access to a command register (including
+read commands) as request to execute the command in question. This may result in
+power loss, board resets, and/or Flash corruption. Worst case, your board may
+turn into a brick.
+
+
+Sysfs entries
+-------------
+
+in[0-11]_input Input voltage measurements.
+
+in12_input Voltage on CSP (Current Sense Positive) pin.
+ Only if the chip supports current sensing and if
+ current sensing is enabled.
+
+in[0-11]_min Low warning limit.
+ Supported on MAX16065, MAX16066, MAX16070, and MAX16071
+ only.
+
+in[0-11]_max High warning limit.
+ Supported on MAX16065, MAX16066, MAX16070, and MAX16071
+ only.
+
+ Either low or high warning limits are supported
+ (depending on chip configuration), but not both.
+
+in[0-11]_lcrit Low critical limit.
+
+in[0-11]_crit High critical limit.
+
+in[0-11]_alarm Input voltage alarm.
+
+curr1_input Current sense input; only if the chip supports current
+ sensing and if current sensing is enabled.
+ Displayed current assumes 0.001 Ohm current sense
+ resistor.
+
+curr1_alarm Overcurrent alarm; only if the chip supports current
+ sensing and if current sensing is enabled.
diff --git a/Documentation/hwmon/max1619 b/Documentation/hwmon/max1619
new file mode 100644
index 000000000..518bae3a8
--- /dev/null
+++ b/Documentation/hwmon/max1619
@@ -0,0 +1,29 @@
+Kernel driver max1619
+=====================
+
+Supported chips:
+ * Maxim MAX1619
+ Prefix: 'max1619'
+ Addresses scanned: I2C 0x18-0x1a, 0x29-0x2b, 0x4c-0x4e
+ Datasheet: Publicly available at the Maxim website
+ http://pdfserv.maxim-ic.com/en/ds/MAX1619.pdf
+
+Authors:
+ Oleksij Rempel <bug-track@fisher-privat.net>,
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+The MAX1619 is a digital temperature sensor. It senses its own temperature as
+well as the temperature of up to one external diode.
+
+All temperature values are given in degrees Celsius. Resolution
+is 1.0 degree for the local temperature and for the remote temperature.
+
+Only the external sensor has high and low limits.
+
+The max1619 driver will not update its values more frequently than every
+other second; reading them more often will do no harm, but will return
+'old' values.
+
diff --git a/Documentation/hwmon/max1668 b/Documentation/hwmon/max1668
new file mode 100644
index 000000000..0616ed975
--- /dev/null
+++ b/Documentation/hwmon/max1668
@@ -0,0 +1,60 @@
+Kernel driver max1668
+=====================
+
+Supported chips:
+ * Maxim MAX1668, MAX1805 and MAX1989
+ Prefix: 'max1668'
+ Addresses scanned: I2C 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX1668-MAX1989.pdf
+
+Author:
+ David George <david.george@ska.ac.za>
+
+Description
+-----------
+
+This driver implements support for the Maxim MAX1668, MAX1805 and MAX1989
+chips.
+
+The three devices are very similar, but the MAX1805 has a reduced feature
+set; only two remote temperature inputs vs the four avaible on the other
+two ICs.
+
+The driver is able to distinguish between the devices and creates sysfs
+entries as follows:
+
+MAX1805, MAX1668 and MAX1989:
+
+temp1_input ro local (ambient) temperature
+temp1_max rw local temperature maximum threshold for alarm
+temp1_max_alarm ro local temperature maximum threshold alarm
+temp1_min rw local temperature minimum threshold for alarm
+temp1_min_alarm ro local temperature minimum threshold alarm
+temp2_input ro remote temperature 1
+temp2_max rw remote temperature 1 maximum threshold for alarm
+temp2_max_alarm ro remote temperature 1 maximum threshold alarm
+temp2_min rw remote temperature 1 minimum threshold for alarm
+temp2_min_alarm ro remote temperature 1 minimum threshold alarm
+temp3_input ro remote temperature 2
+temp3_max rw remote temperature 2 maximum threshold for alarm
+temp3_max_alarm ro remote temperature 2 maximum threshold alarm
+temp3_min rw remote temperature 2 minimum threshold for alarm
+temp3_min_alarm ro remote temperature 2 minimum threshold alarm
+
+MAX1668 and MAX1989 only:
+temp4_input ro remote temperature 3
+temp4_max rw remote temperature 3 maximum threshold for alarm
+temp4_max_alarm ro remote temperature 3 maximum threshold alarm
+temp4_min rw remote temperature 3 minimum threshold for alarm
+temp4_min_alarm ro remote temperature 3 minimum threshold alarm
+temp5_input ro remote temperature 4
+temp5_max rw remote temperature 4 maximum threshold for alarm
+temp5_max_alarm ro remote temperature 4 maximum threshold alarm
+temp5_min rw remote temperature 4 minimum threshold for alarm
+temp5_min_alarm ro remote temperature 4 minimum threshold alarm
+
+Module Parameters
+-----------------
+
+* read_only: int
+ Set to non-zero if you wish to prevent write access to alarm thresholds.
diff --git a/Documentation/hwmon/max197 b/Documentation/hwmon/max197
new file mode 100644
index 000000000..8d89b9009
--- /dev/null
+++ b/Documentation/hwmon/max197
@@ -0,0 +1,60 @@
+Maxim MAX197 driver
+===================
+
+Author:
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+
+Supported chips:
+ * Maxim MAX197
+ Prefix: 'max197'
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX197.pdf
+
+ * Maxim MAX199
+ Prefix: 'max199'
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX199.pdf
+
+Description
+-----------
+
+The A/D converters MAX197, and MAX199 are both 8-Channel, Multi-Range, 5V,
+12-Bit DAS with 8+4 Bus Interface and Fault Protection.
+
+The available ranges for the MAX197 are {0,-5V} to 5V, and {0,-10V} to 10V,
+while they are {0,-2V} to 2V, and {0,-4V} to 4V on the MAX199.
+
+Platform data
+-------------
+
+The MAX197 platform data (defined in linux/platform_data/max197.h) should be
+filled with a pointer to a conversion function, defined like:
+
+ int convert(u8 ctrl);
+
+ctrl is the control byte to write to start a new conversion.
+On success, the function must return the 12-bit raw value read from the chip,
+or a negative error code otherwise.
+
+Control byte format:
+
+Bit Name Description
+7,6 PD1,PD0 Clock and Power-Down modes
+5 ACQMOD Internal or External Controlled Acquisition
+4 RNG Full-scale voltage magnitude at the input
+3 BIP Unipolar or Bipolar conversion mode
+2,1,0 A2,A1,A0 Channel
+
+Sysfs interface
+---------------
+
+* in[0-7]_input: The conversion value for the corresponding channel.
+ RO
+
+* in[0-7]_min: The lower limit (in mV) for the corresponding channel.
+ For the MAX197, it will be adjusted to -10000, -5000, or 0.
+ For the MAX199, it will be adjusted to -4000, -2000, or 0.
+ RW
+
+* in[0-7]_max: The higher limit (in mV) for the corresponding channel.
+ For the MAX197, it will be adjusted to 0, 5000, or 10000.
+ For the MAX199, it will be adjusted to 0, 2000, or 4000.
+ RW
diff --git a/Documentation/hwmon/max34440 b/Documentation/hwmon/max34440
new file mode 100644
index 000000000..37cbf472a
--- /dev/null
+++ b/Documentation/hwmon/max34440
@@ -0,0 +1,127 @@
+Kernel driver max34440
+======================
+
+Supported chips:
+ * Maxim MAX34440
+ Prefixes: 'max34440'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34440.pdf
+ * Maxim MAX34441
+ PMBus 5-Channel Power-Supply Manager and Intelligent Fan Controller
+ Prefixes: 'max34441'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34441.pdf
+ * Maxim MAX34446
+ PMBus Power-Supply Data Logger
+ Prefixes: 'max34446'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX34446.pdf
+ * Maxim MAX34460
+ PMBus 12-Channel Voltage Monitor & Sequencer
+ Prefix: 'max34460'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX34460.pdf
+ * Maxim MAX34461
+ PMBus 16-Channel Voltage Monitor & Sequencer
+ Prefix: 'max34461'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX34461.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Maxim MAX34440 PMBus 6-Channel
+Power-Supply Manager, MAX34441 PMBus 5-Channel Power-Supply Manager
+and Intelligent Fan Controller, and MAX34446 PMBus Power-Supply Data Logger.
+It also supports the MAX34460 and MAX34461 PMBus Voltage Monitor & Sequencers.
+The MAX34460 supports 12 voltage channels, and the MAX34461 supports 16 voltage
+channels.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+For MAX34446, the value of the currX_crit attribute determines if current or
+voltage measurement is enabled for a given channel. Voltage measurement is
+enabled if currX_crit is set to 0; current measurement is enabled if the
+attribute is set to a positive value. Power measurement is only enabled if
+channel 1 (3) is configured for voltage measurement, and channel 2 (4) is
+configured for current measurement.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in[1-6]_label "vout[1-6]".
+in[1-6]_input Measured voltage. From READ_VOUT register.
+in[1-6]_min Minimum Voltage. From VOUT_UV_WARN_LIMIT register.
+in[1-6]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in[1-6]_lcrit Critical minimum Voltage. VOUT_UV_FAULT_LIMIT register.
+in[1-6]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in[1-6]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in[1-6]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in[1-6]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in[1-6]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+in[1-6]_lowest Historical minimum voltage.
+in[1-6]_highest Historical maximum voltage.
+in[1-6]_reset_history Write any value to reset history.
+
+ MAX34446 only supports in[1-4].
+
+curr[1-6]_label "iout[1-6]".
+curr[1-6]_input Measured current. From READ_IOUT register.
+curr[1-6]_max Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr[1-6]_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr[1-6]_max_alarm Current high alarm. From IOUT_OC_WARNING status.
+curr[1-6]_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status.
+curr[1-4]_average Historical average current (MAX34446 only).
+curr[1-6]_highest Historical maximum current.
+curr[1-6]_reset_history Write any value to reset history.
+
+ in6 and curr6 attributes only exist for MAX34440.
+ MAX34446 only supports curr[1-4].
+
+power[1,3]_label "pout[1,3]"
+power[1,3]_input Measured power.
+power[1,3]_average Historical average power.
+power[1,3]_highest Historical maximum power.
+
+ Power attributes only exist for MAX34446.
+
+temp[1-8]_input Measured temperatures. From READ_TEMPERATURE_1 register.
+ temp1 is the chip's internal temperature. temp2..temp5
+ are remote I2C temperature sensors. For MAX34441, temp6
+ is a remote thermal-diode sensor. For MAX34440, temp6..8
+ are remote I2C temperature sensors.
+temp[1-8]_max Maximum temperature. From OT_WARN_LIMIT register.
+temp[1-8]_crit Critical high temperature. From OT_FAULT_LIMIT register.
+temp[1-8]_max_alarm Temperature high alarm.
+temp[1-8]_crit_alarm Temperature critical high alarm.
+temp[1-8]_average Historical average temperature (MAX34446 only).
+temp[1-8]_highest Historical maximum temperature.
+temp[1-8]_reset_history Write any value to reset history.
+
+ temp7 and temp8 attributes only exist for MAX34440.
+ MAX34446 only supports temp[1-3].
+
+MAX34460 supports attribute groups in[1-12] and temp[1-5].
+MAX34461 supports attribute groups in[1-16] and temp[1-5].
diff --git a/Documentation/hwmon/max6639 b/Documentation/hwmon/max6639
new file mode 100644
index 000000000..dc49f8be7
--- /dev/null
+++ b/Documentation/hwmon/max6639
@@ -0,0 +1,49 @@
+Kernel driver max6639
+=====================
+
+Supported chips:
+ * Maxim MAX6639
+ Prefix: 'max6639'
+ Addresses scanned: I2C 0x2c, 0x2e, 0x2f
+ Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6639.pdf
+
+Authors:
+ He Changqing <hechangqing@semptian.com>
+ Roland Stigge <stigge@antcom.de>
+
+Description
+-----------
+
+This driver implements support for the Maxim MAX6639. This chip is a 2-channel
+temperature monitor with dual PWM fan speed controller. It can monitor its own
+temperature and one external diode-connected transistor or two external
+diode-connected transistors.
+
+The following device attributes are implemented via sysfs:
+
+Attribute R/W Contents
+----------------------------------------------------------------------------
+temp1_input R Temperature channel 1 input (0..150 C)
+temp2_input R Temperature channel 2 input (0..150 C)
+temp1_fault R Temperature channel 1 diode fault
+temp2_fault R Temperature channel 2 diode fault
+temp1_max RW Set THERM temperature for input 1
+ (in C, see datasheet)
+temp2_max RW Set THERM temperature for input 2
+temp1_crit RW Set ALERT temperature for input 1
+temp2_crit RW Set ALERT temperature for input 2
+temp1_emergency RW Set OT temperature for input 1
+ (in C, see datasheet)
+temp2_emergency RW Set OT temperature for input 2
+pwm1 RW Fan 1 target duty cycle (0..255)
+pwm2 RW Fan 2 target duty cycle (0..255)
+fan1_input R TACH1 fan tachometer input (in RPM)
+fan2_input R TACH2 fan tachometer input (in RPM)
+fan1_fault R Fan 1 fault
+fan2_fault R Fan 2 fault
+temp1_max_alarm R Alarm on THERM temperature on channel 1
+temp2_max_alarm R Alarm on THERM temperature on channel 2
+temp1_crit_alarm R Alarm on ALERT temperature on channel 1
+temp2_crit_alarm R Alarm on ALERT temperature on channel 2
+temp1_emergency_alarm R Alarm on OT temperature on channel 1
+temp2_emergency_alarm R Alarm on OT temperature on channel 2
diff --git a/Documentation/hwmon/max6642 b/Documentation/hwmon/max6642
new file mode 100644
index 000000000..afbd3e494
--- /dev/null
+++ b/Documentation/hwmon/max6642
@@ -0,0 +1,21 @@
+Kernel driver max6642
+=====================
+
+Supported chips:
+ * Maxim MAX6642
+ Prefix: 'max6642'
+ Addresses scanned: I2C 0x48-0x4f
+ Datasheet: Publicly available at the Maxim website
+ http://datasheets.maxim-ic.com/en/ds/MAX6642.pdf
+
+Authors:
+ Per Dalen <per.dalen@appeartv.com>
+
+Description
+-----------
+
+The MAX6642 is a digital temperature sensor. It senses its own temperature as
+well as the temperature on one external diode.
+
+All temperature values are given in degrees Celsius. Resolution
+is 0.25 degree for the local temperature and for the remote temperature.
diff --git a/Documentation/hwmon/max6650 b/Documentation/hwmon/max6650
new file mode 100644
index 000000000..58d9644a2
--- /dev/null
+++ b/Documentation/hwmon/max6650
@@ -0,0 +1,64 @@
+Kernel driver max6650
+=====================
+
+Supported chips:
+ * Maxim MAX6650
+ Prefix: 'max6650'
+ Addresses scanned: none
+ Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf
+ * Maxim MAX6651
+ Prefix: 'max6651'
+ Addresses scanned: none
+ Datasheet: http://pdfserv.maxim-ic.com/en/ds/MAX6650-MAX6651.pdf
+
+Authors:
+ Hans J. Koch <hjk@hansjkoch.de>
+ John Morris <john.morris@spirentcom.com>
+ Claus Gindhart <claus.gindhart@kontron.com>
+
+Description
+-----------
+
+This driver implements support for the Maxim MAX6650 and MAX6651.
+
+The 2 devices are very similar, but the MAX6550 has a reduced feature
+set, e.g. only one fan-input, instead of 4 for the MAX6651.
+
+The driver is not able to distinguish between the 2 devices.
+
+The driver provides the following sensor accesses in sysfs:
+
+fan1_input ro fan tachometer speed in RPM
+fan2_input ro "
+fan3_input ro "
+fan4_input ro "
+fan1_target rw desired fan speed in RPM (closed loop mode only)
+pwm1_enable rw regulator mode, 0=full on, 1=open loop, 2=closed loop
+pwm1 rw relative speed (0-255), 255=max. speed.
+ Used in open loop mode only.
+fan1_div rw sets the speed range the inputs can handle. Legal
+ values are 1, 2, 4, and 8. Use lower values for
+ faster fans.
+
+Usage notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+Module parameters
+-----------------
+
+If your board has a BIOS that initializes the MAX6650/6651 correctly, you can
+simply load your module without parameters. It won't touch the configuration
+registers then. If your board BIOS doesn't initialize the chip, or you want
+different settings, you can set the following parameters:
+
+voltage_12V: 5=5V fan, 12=12V fan, 0=don't change
+prescaler: Possible values are 1,2,4,8,16, or 0 for don't change
+clock: The clock frequency in Hz of the chip the driver should assume [254000]
+
+Please have a look at the MAX6650/6651 data sheet and make sure that you fully
+understand the meaning of these parameters before you attempt to change them.
+
diff --git a/Documentation/hwmon/max6697 b/Documentation/hwmon/max6697
new file mode 100644
index 000000000..6594177ed
--- /dev/null
+++ b/Documentation/hwmon/max6697
@@ -0,0 +1,58 @@
+Kernel driver max6697
+=====================
+
+Supported chips:
+ * Maxim MAX6581
+ Prefix: 'max6581'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6581.pdf
+ * Maxim MAX6602
+ Prefix: 'max6602'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6602.pdf
+ * Maxim MAX6622
+ Prefix: 'max6622'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6622.pdf
+ * Maxim MAX6636
+ Prefix: 'max6636'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6636.pdf
+ * Maxim MAX6689
+ Prefix: 'max6689'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6689.pdf
+ * Maxim MAX6693
+ Prefix: 'max6693'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6693.pdf
+ * Maxim MAX6694
+ Prefix: 'max6694'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6694.pdf
+ * Maxim MAX6697
+ Prefix: 'max6697'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6697.pdf
+ * Maxim MAX6698
+ Prefix: 'max6698'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6698.pdf
+ * Maxim MAX6699
+ Prefix: 'max6699'
+ Datasheet: http://datasheets.maximintegrated.com/en/ds/MAX6699.pdf
+
+Author:
+ Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for several MAX6697 compatible temperature sensor
+chips. The chips support one local temperature sensor plus four, six, or seven
+remote temperature sensors. Remote temperature sensors are diode-connected
+thermal transitors, except for MAX6698 which supports three diode-connected
+thermal transistors plus three thermistors in addition to the local temperature
+sensor.
+
+The driver provides the following sysfs attributes. temp1 is the local (chip)
+temperature, temp[2..n] are remote temperatures. The actually supported
+per-channel attributes are chip type and channel dependent.
+
+tempX_input RO temperature
+tempX_max RW temperature maximum threshold
+tempX_max_alarm RO temperature maximum threshold alarm
+tempX_crit RW temperature critical threshold
+tempX_crit_alarm RO temperature critical threshold alarm
+tempX_fault RO temperature diode fault (remote sensors only)
diff --git a/Documentation/hwmon/max8688 b/Documentation/hwmon/max8688
new file mode 100644
index 000000000..e78078638
--- /dev/null
+++ b/Documentation/hwmon/max8688
@@ -0,0 +1,75 @@
+Kernel driver max8688
+=====================
+
+Supported chips:
+ * Maxim MAX8688
+ Prefix: 'max8688'
+ Addresses scanned: -
+ Datasheet: http://datasheets.maxim-ic.com/en/ds/MAX8688.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Maxim MAX8688 Digital Power-Supply
+Controller/Monitor with PMBus Interface.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in1_label "vout1"
+in1_input Measured voltage. From READ_VOUT register.
+in1_min Minimum Voltage. From VOUT_UV_WARN_LIMIT register.
+in1_max Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in1_lcrit Critical minimum Voltage. VOUT_UV_FAULT_LIMIT register.
+in1_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in1_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in1_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in1_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in1_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+in1_highest Historical maximum voltage.
+in1_reset_history Write any value to reset history.
+
+curr1_label "iout1"
+curr1_input Measured current. From READ_IOUT register.
+curr1_max Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr1_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr1_max_alarm Current high alarm. From IOUT_OC_WARN_LIMIT register.
+curr1_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status.
+curr1_highest Historical maximum current.
+curr1_reset_history Write any value to reset history.
+
+temp1_input Measured temperature. From READ_TEMPERATURE_1 register.
+temp1_max Maximum temperature. From OT_WARN_LIMIT register.
+temp1_crit Critical high temperature. From OT_FAULT_LIMIT register.
+temp1_max_alarm Chip temperature high alarm. Set by comparing
+ READ_TEMPERATURE_1 with OT_WARN_LIMIT if TEMP_OT_WARNING
+ status is set.
+temp1_crit_alarm Chip temperature critical high alarm. Set by comparing
+ READ_TEMPERATURE_1 with OT_FAULT_LIMIT if TEMP_OT_FAULT
+ status is set.
+temp1_highest Historical maximum temperature.
+temp1_reset_history Write any value to reset history.
diff --git a/Documentation/hwmon/mc13783-adc b/Documentation/hwmon/mc13783-adc
new file mode 100644
index 000000000..d0e7b3fa9
--- /dev/null
+++ b/Documentation/hwmon/mc13783-adc
@@ -0,0 +1,74 @@
+Kernel driver mc13783-adc
+=========================
+
+Supported chips:
+ * Freescale Atlas MC13783
+ Prefix: 'mc13783'
+ Datasheet: http://www.freescale.com/files/rf_if/doc/data_sheet/MC13783.pdf?fsrch=1
+ * Freescale Atlas MC13892
+ Prefix: 'mc13892'
+ Datasheet: http://cache.freescale.com/files/analog/doc/data_sheet/MC13892.pdf?fsrch=1&sr=1
+
+Authors:
+ Sascha Hauer <s.hauer@pengutronix.de>
+ Luotao Fu <l.fu@pengutronix.de>
+
+Description
+-----------
+
+The Freescale MC13783 and MC13892 are Power Management and Audio Circuits.
+Among other things they contain a 10-bit A/D converter. The converter has 16
+(MC13783) resp. 12 (MC13892) channels which can be used in different modes. The
+A/D converter has a resolution of 2.25mV.
+
+Some channels can be used as General Purpose inputs or in a dedicated mode with
+a chip internal scaling applied .
+
+Currently the driver only supports the Application Supply channel (BP / BPSNS),
+the General Purpose inputs and touchscreen.
+
+See the following tables for the meaning of the different channels and their
+chip internal scaling:
+
+MC13783:
+Channel Signal Input Range Scaling
+-------------------------------------------------------------------------------
+0 Battery Voltage (BATT) 2.50 - 4.65V -2.40V
+1 Battery Current (BATT - BATTISNS) -50 - 50 mV x20
+2 Application Supply (BP) 2.50 - 4.65V -2.40V
+3 Charger Voltage (CHRGRAW) 0 - 10V / /5
+ 0 - 20V /10
+4 Charger Current (CHRGISNSP-CHRGISNSN) -0.25 - 0.25V x4
+5 General Purpose ADIN5 / Battery Pack Thermistor 0 - 2.30V No
+6 General Purpose ADIN6 / Backup Voltage (LICELL) 0 - 2.30V / No /
+ 1.50 - 3.50V -1.20V
+7 General Purpose ADIN7 / UID / Die Temperature 0 - 2.30V / No /
+ 0 - 2.55V / x0.9 / No
+8 General Purpose ADIN8 0 - 2.30V No
+9 General Purpose ADIN9 0 - 2.30V No
+10 General Purpose ADIN10 0 - 2.30V No
+11 General Purpose ADIN11 0 - 2.30V No
+12 General Purpose TSX1 / Touchscreen X-plate 1 0 - 2.30V No
+13 General Purpose TSX2 / Touchscreen X-plate 2 0 - 2.30V No
+14 General Purpose TSY1 / Touchscreen Y-plate 1 0 - 2.30V No
+15 General Purpose TSY2 / Touchscreen Y-plate 2 0 - 2.30V No
+
+MC13892:
+Channel Signal Input Range Scaling
+-------------------------------------------------------------------------------
+0 Battery Voltage (BATT) 0 - 4.8V /2
+1 Battery Current (BATT - BATTISNSCC) -60 - 60 mV x20
+2 Application Supply (BPSNS) 0 - 4.8V /2
+3 Charger Voltage (CHRGRAW) 0 - 12V / /5
+ 0 - 20V /10
+4 Charger Current (CHRGISNS-BPSNS) / -0.3 - 0.3V / x4 /
+ Touchscreen X-plate 1 0 - 2.4V No
+5 General Purpose ADIN5 / Battery Pack Thermistor 0 - 2.4V No
+6 General Purpose ADIN6 / Backup Voltage (LICELL) 0 - 2.4V / No
+ Backup Voltage (LICELL) 0 - 3.6V x2/3
+7 General Purpose ADIN7 / UID / Die Temperature 0 - 2.4V / No /
+ 0 - 4.8V /2
+12 General Purpose TSX1 / Touchscreen X-plate 1 0 - 2.4V No
+13 General Purpose TSX2 / Touchscreen X-plate 2 0 - 2.4V No
+14 General Purpose TSY1 / Touchscreen Y-plate 1 0 - 2.4V No
+15 General Purpose TSY2 / Touchscreen Y-plate 2 0 - 2.4V No
diff --git a/Documentation/hwmon/mcp3021 b/Documentation/hwmon/mcp3021
new file mode 100644
index 000000000..74a6b72ad
--- /dev/null
+++ b/Documentation/hwmon/mcp3021
@@ -0,0 +1,29 @@
+Kernel driver MCP3021
+======================
+
+Supported chips:
+ * Microchip Technology MCP3021
+ Prefix: 'mcp3021'
+ Datasheet: http://ww1.microchip.com/downloads/en/DeviceDoc/21805a.pdf
+ * Microchip Technology MCP3221
+ Prefix: 'mcp3221'
+ Datasheet: http://ww1.microchip.com/downloads/en/DeviceDoc/21732c.pdf
+
+Authors:
+ Mingkai Hu
+ Sven Schuchmann <schuchmann@schleissheimer.de>
+
+Description
+-----------
+
+This driver implements support for the Microchip Technology MCP3021 and
+MCP3221 chip.
+
+The Microchip Technology Inc. MCP3021 is a successive approximation A/D
+converter (ADC) with 10-bit resolution. The MCP3221 has 12-bit resolution.
+
+These devices provide one single-ended input with very low power consumption.
+Communication to the MCP3021/MCP3221 is performed using a 2-wire I2C
+compatible interface. Standard (100 kHz) and Fast (400 kHz) I2C modes are
+available. The default I2C device address is 0x4d (contact the Microchip
+factory for additional address options).
diff --git a/Documentation/hwmon/menf21bmc b/Documentation/hwmon/menf21bmc
new file mode 100644
index 000000000..2a273a065
--- /dev/null
+++ b/Documentation/hwmon/menf21bmc
@@ -0,0 +1,50 @@
+Kernel driver menf21bmc_hwmon
+=============================
+
+Supported chips:
+ * MEN 14F021P00
+ Prefix: 'menf21bmc_hwmon'
+ Adresses scanned: -
+
+Author: Andreas Werner <andreas.werner@men.de>
+
+Description
+-----------
+
+The menf21bmc is a Board Management Controller (BMC) which provides an I2C
+interface to the host to access the features implemented in the BMC.
+
+This driver gives access to the voltage monitoring feature of the main
+voltages of the board.
+The voltage sensors are connected to the ADC inputs of the BMC which is
+a PIC16F917 Mikrocontroller.
+
+Usage Notes
+-----------
+
+This driver is part of the MFD driver named "menf21bmc" and does
+not auto-detect devices.
+You will have to instantiate the MFD driver explicitly.
+Please see Documentation/i2c/instantiating-devices for
+details.
+
+Sysfs entries
+-------------
+
+The following attributes are supported. All attributes are read only
+The Limits are read once by the driver.
+
+in0_input +3.3V input voltage
+in1_input +5.0V input voltage
+in2_input +12.0V input voltage
+in3_input +5V Standby input voltage
+in4_input VBAT (on board battery)
+
+in[0-4]_min Minimum voltage limit
+in[0-4]_max Maximum voltage limit
+
+in0_label "MON_3_3V"
+in1_label "MON_5V"
+in2_label "MON_12V"
+in3_label "5V_STANDBY"
+in4_label "VBAT"
diff --git a/Documentation/hwmon/nct6683 b/Documentation/hwmon/nct6683
new file mode 100644
index 000000000..c1301d430
--- /dev/null
+++ b/Documentation/hwmon/nct6683
@@ -0,0 +1,57 @@
+Kernel driver nct6683
+=====================
+
+Supported chips:
+ * Nuvoton NCT6683D
+ Prefix: 'nct6683'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+
+Authors:
+ Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for the Nuvoton NCT6683D eSIO chip.
+
+The chips implement up to shared 32 temperature and voltage sensors.
+It supports up to 16 fan rotation sensors and up to 8 fan control engines.
+
+Temperatures are measured in degrees Celsius. Measurement resolution is
+0.5 degrees C.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+
+Fan rotation speeds are reported in RPM (rotations per minute).
+
+Usage Note
+----------
+
+Limit register locations on Intel boards with EC firmware version 1.0
+build date 04/03/13 do not match the register locations in the Nuvoton
+datasheet. Nuvoton confirms that Intel uses a special firmware version
+with different register addresses. The specification describing the Intel
+firmware is held under NDA by Nuvoton and Intel and not available
+to the public.
+
+Some of the register locations can be reverse engineered; others are too
+well hidden. Given this, writing any values from the operating system is
+considered too risky with this firmware and has been disabled. All limits
+must all be written from the BIOS.
+
+The driver has only been tested with the Intel firmware, and by default
+only instantiates on Intel boards. To enable it on non-Intel boards,
+set the 'force' module parameter to 1.
+
+Tested Boards and Firmware Versions
+-----------------------------------
+
+The driver has been reported to work with the following boards and
+firmware versions.
+
+Board Firmware version
+---------------------------------------------------------------
+Intel DH87RL NCT6683D EC firmware version 1.0 build 04/03/13
+Intel DH87MC NCT6683D EC firmware version 1.0 build 04/03/13
+Intel DB85FL NCT6683D EC firmware version 1.0 build 04/03/13
diff --git a/Documentation/hwmon/nct6775 b/Documentation/hwmon/nct6775
new file mode 100644
index 000000000..f0dd3d2fe
--- /dev/null
+++ b/Documentation/hwmon/nct6775
@@ -0,0 +1,200 @@
+Note
+====
+
+This driver supersedes the NCT6775F and NCT6776F support in the W83627EHF
+driver.
+
+Kernel driver NCT6775
+=====================
+
+Supported chips:
+ * Nuvoton NCT6102D/NCT6104D/NCT6106D
+ Prefix: 'nct6106'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from the Nuvoton web site
+ * Nuvoton NCT5572D/NCT6771F/NCT6772F/NCT6775F/W83677HG-I
+ Prefix: 'nct6775'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT5573D/NCT5577D/NCT6776D/NCT6776F
+ Prefix: 'nct6776'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT5532D/NCT6779D
+ Prefix: 'nct6779'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT6791D
+ Prefix: 'nct6791'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT6792D
+ Prefix: 'nct6792'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+
+Authors:
+ Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for the Nuvoton NCT6775F, NCT6776F, and NCT6779D
+and compatible super I/O chips.
+
+The chips support up to 25 temperature monitoring sources. Up to 6 of those are
+direct temperature sensor inputs, the others are special sources such as PECI,
+PCH, and SMBUS. Depending on the chip type, 2 to 6 of the temperature sources
+can be monitored and compared against minimum, maximum, and critical
+temperatures. The driver reports up to 10 of the temperatures to the user.
+There are 4 to 5 fan rotation speed sensors, 8 to 15 analog voltage sensors,
+one VID, alarms with beep warnings (control unimplemented), and some automatic
+fan regulation strategies (plus manual fan control mode).
+
+The temperature sensor sources on all chips are configurable. The configured
+source for each of the temperature sensors is provided in tempX_label.
+
+Temperatures are measured in degrees Celsius and measurement resolution is
+either 1 degC or 0.5 degC, depending on the temperature source and
+configuration. An alarm is triggered when the temperature gets higher than
+the high limit; it stays on until the temperature falls below the hysteresis
+value. Alarms are only supported for temp1 to temp6, depending on the chip type.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. On
+NCT6775F, fan readings can be divided by a programmable divider (1, 2, 4, 8,
+16, 32, 64 or 128) to give the readings more range or accuracy; the other chips
+do not have a fan speed divider. The driver sets the most suitable fan divisor
+itself; specifically, it increases the divider value each time a fan speed
+reading returns an invalid value, and it reduces it if the fan speed reading
+is lower than optimal. Some fans might not be present because they share pins
+with other functions.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit.
+
+The driver supports automatic fan control mode known as Thermal Cruise.
+In this mode, the chip attempts to keep the measured temperature in a
+predefined temperature range. If the temperature goes out of range, fan
+is driven slower/faster to reach the predefined range again.
+
+The mode works for fan1-fan5.
+
+sysfs attributes
+----------------
+
+pwm[1-5] - this file stores PWM duty cycle or DC value (fan speed) in range:
+ 0 (lowest speed) to 255 (full)
+
+pwm[1-5]_enable - this file controls mode of fan/temperature control:
+ * 0 Fan control disabled (fans set to maximum speed)
+ * 1 Manual mode, write to pwm[0-5] any value 0-255
+ * 2 "Thermal Cruise" mode
+ * 3 "Fan Speed Cruise" mode
+ * 4 "Smart Fan III" mode (NCT6775F only)
+ * 5 "Smart Fan IV" mode
+
+pwm[1-5]_mode - controls if output is PWM or DC level
+ * 0 DC output
+ * 1 PWM output
+
+Common fan control attributes
+-----------------------------
+
+pwm[1-5]_temp_sel Temperature source. Value is temperature sensor index.
+ For example, select '1' for temp1_input.
+pwm[1-5]_weight_temp_sel
+ Secondary temperature source. Value is temperature
+ sensor index. For example, select '1' for temp1_input.
+ Set to 0 to disable secondary temperature control.
+
+If secondary temperature functionality is enabled, it is controlled with the
+following attributes.
+
+pwm[1-5]_weight_duty_step
+ Duty step size.
+pwm[1-5]_weight_temp_step
+ Temperature step size. With each step over
+ temp_step_base, the value of weight_duty_step is added
+ to the current pwm value.
+pwm[1-5]_weight_temp_step_base
+ Temperature at which secondary temperature control kicks
+ in.
+pwm[1-5]_weight_temp_step_tol
+ Temperature step tolerance.
+
+Thermal Cruise mode (2)
+-----------------------
+
+If the temperature is in the range defined by:
+
+pwm[1-5]_target_temp Target temperature, unit millidegree Celsius
+ (range 0 - 127000)
+pwm[1-5]_temp_tolerance
+ Target temperature tolerance, unit millidegree Celsius
+
+there are no changes to fan speed. Once the temperature leaves the interval, fan
+speed increases (if temperature is higher that desired) or decreases (if
+temperature is lower than desired), using the following limits and time
+intervals.
+
+pwm[1-5]_start fan pwm start value (range 1 - 255), to start fan
+ when the temperature is above defined range.
+pwm[1-5]_floor lowest fan pwm (range 0 - 255) if temperature is below
+ the defined range. If set to 0, the fan is expected to
+ stop if the temperature is below the defined range.
+pwm[1-5]_step_up_time milliseconds before fan speed is increased
+pwm[1-5]_step_down_time milliseconds before fan speed is decreased
+pwm[1-5]_stop_time how many milliseconds must elapse to switch
+ corresponding fan off (when the temperature was below
+ defined range).
+
+Speed Cruise mode (3)
+---------------------
+
+This modes tries to keep the fan speed constant.
+
+fan[1-5]_target Target fan speed
+fan[1-5]_tolerance
+ Target speed tolerance
+
+
+Untested; use at your own risk.
+
+Smart Fan IV mode (5)
+---------------------
+
+This mode offers multiple slopes to control the fan speed. The slopes can be
+controlled by setting the pwm and temperature attributes. When the temperature
+rises, the chip will calculate the DC/PWM output based on the current slope.
+There are up to seven data points depending on the chip type. Subsequent data
+points should be set to higher temperatures and higher pwm values to achieve
+higher fan speeds with increasing temperature. The last data point reflects
+critical temperature mode, in which the fans should run at full speed.
+
+pwm[1-5]_auto_point[1-7]_pwm
+ pwm value to be set if temperature reaches matching
+ temperature range.
+pwm[1-5]_auto_point[1-7]_temp
+ Temperature over which the matching pwm is enabled.
+pwm[1-5]_temp_tolerance
+ Temperature tolerance, unit millidegree Celsius
+pwm[1-5]_crit_temp_tolerance
+ Temperature tolerance for critical temperature,
+ unit millidegree Celsius
+
+pwm[1-5]_step_up_time milliseconds before fan speed is increased
+pwm[1-5]_step_down_time milliseconds before fan speed is decreased
+
+Usage Notes
+-----------
+
+On various ASUS boards with NCT6776F, it appears that CPUTIN is not really
+connected to anything and floats, or that it is connected to some non-standard
+temperature measurement device. As a result, the temperature reported on CPUTIN
+will not reflect a usable value. It often reports unreasonably high
+temperatures, and in some cases the reported temperature declines if the actual
+temperature increases (similar to the raw PECI temperature value - see PECI
+specification for details). CPUTIN should therefore be be ignored on ASUS
+boards. The CPU temperature on ASUS boards is reported from PECI 0.
diff --git a/Documentation/hwmon/nct7802 b/Documentation/hwmon/nct7802
new file mode 100644
index 000000000..2e00f5e34
--- /dev/null
+++ b/Documentation/hwmon/nct7802
@@ -0,0 +1,32 @@
+Kernel driver nct7802
+=====================
+
+Supported chips:
+ * Nuvoton NCT7802Y
+ Prefix: 'nct7802'
+ Addresses scanned: I2C 0x28..0x2f
+ Datasheet: Available from Nuvoton web site
+
+Authors:
+ Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This driver implements support for the Nuvoton NCT7802Y hardware monitoring
+chip. NCT7802Y supports 6 temperature sensors, 5 voltage sensors, and 3 fan
+speed sensors.
+
+The chip also supports intelligent fan speed control. This functionality is
+not currently supported by the driver.
+
+Tested Boards and BIOS Versions
+-------------------------------
+
+The driver has been reported to work with the following boards and
+BIOS versions.
+
+Board BIOS version
+---------------------------------------------------------------
+Kontron COMe-bSC2 CHR2E934.001.GGO
+Kontron COMe-bIP2 CCR2E212
diff --git a/Documentation/hwmon/nct7904 b/Documentation/hwmon/nct7904
new file mode 100644
index 000000000..014f112e2
--- /dev/null
+++ b/Documentation/hwmon/nct7904
@@ -0,0 +1,60 @@
+Kernel driver nct7904
+====================
+
+Supported chip:
+ * Nuvoton NCT7904D
+ Prefix: nct7904
+ Addresses: I2C 0x2d, 0x2e
+ Datasheet: Publicly available at Nuvoton website
+ http://www.nuvoton.com/
+
+Author: Vadim V. Vlasov <vvlasov@dev.rtsoft.ru>
+
+
+Description
+-----------
+
+The NCT7904D is a hardware monitor supporting up to 20 voltage sensors,
+internal temperature sensor, Intel PECI and AMD SB-TSI CPU temperature
+interface, up to 12 fan tachometer inputs, up to 4 fan control channels
+with SmartFan.
+
+
+Sysfs entries
+-------------
+
+Currently, the driver supports only the following features:
+
+in[1-20]_input Input voltage measurements (mV)
+
+fan[1-12]_input Fan tachometer measurements (rpm)
+
+temp1_input Local temperature (1/1000 degree,
+ 0.125 degree resolution)
+
+temp[2-9]_input CPU temperatures (1/1000 degree,
+ 0.125 degree resolution)
+
+fan[1-4]_mode R/W, 0/1 for manual or SmartFan mode
+ Setting SmartFan mode is supported only if it has been
+ previously configured by BIOS (or configuration EEPROM)
+
+fan[1-4]_pwm R/O in SmartFan mode, R/W in manual control mode
+
+The driver checks sensor control registers and does not export the sensors
+that are not enabled. Anyway, a sensor that is enabled may actually be not
+connected and thus provide zero readings.
+
+
+Limitations
+-----------
+
+The following features are not supported in current version:
+
+ - SmartFan control
+ - Watchdog
+ - GPIO
+ - external temperature sensors
+ - SMI
+ - min/max values
+ - many other...
diff --git a/Documentation/hwmon/ntc_thermistor b/Documentation/hwmon/ntc_thermistor
new file mode 100644
index 000000000..c5e05e290
--- /dev/null
+++ b/Documentation/hwmon/ntc_thermistor
@@ -0,0 +1,98 @@
+Kernel driver ntc_thermistor
+=================
+
+Supported thermistors from Murata:
+* Murata NTC Thermistors NCP15WB473, NCP18WB473, NCP21WB473, NCP03WB473, NCP15WL333
+ Prefixes: 'ncp15wb473', 'ncp18wb473', 'ncp21wb473', 'ncp03wb473', 'ncp15wl333'
+ Datasheet: Publicly available at Murata
+
+Supported thermistors from EPCOS:
+* EPCOS NTC Thermistors B57330V2103
+ Prefixes: b57330v2103
+ Datasheet: Publicly available at EPCOS
+
+Other NTC thermistors can be supported simply by adding compensation
+tables; e.g., NCP15WL333 support is added by the table ncpXXwl333.
+
+Authors:
+ MyungJoo Ham <myungjoo.ham@samsung.com>
+
+Description
+-----------
+
+The NTC (Negative Temperature Coefficient) thermistor is a simple thermistor
+that requires users to provide the resistance and lookup the corresponding
+compensation table to get the temperature input.
+
+The NTC driver provides lookup tables with a linear approximation function
+and four circuit models with an option not to use any of the four models.
+
+The four circuit models provided are:
+
+ $: resister, [TH]: the thermistor
+
+ 1. connect = NTC_CONNECTED_POSITIVE, pullup_ohm > 0
+
+ [pullup_uV]
+ | |
+ [TH] $ (pullup_ohm)
+ | |
+ +----+-----------------------[read_uV]
+ |
+ $ (pulldown_ohm)
+ |
+ --- (ground)
+
+ 2. connect = NTC_CONNECTED_POSITIVE, pullup_ohm = 0 (not-connected)
+
+ [pullup_uV]
+ |
+ [TH]
+ |
+ +----------------------------[read_uV]
+ |
+ $ (pulldown_ohm)
+ |
+ --- (ground)
+
+ 3. connect = NTC_CONNECTED_GROUND, pulldown_ohm > 0
+
+ [pullup_uV]
+ |
+ $ (pullup_ohm)
+ |
+ +----+-----------------------[read_uV]
+ | |
+ [TH] $ (pulldown_ohm)
+ | |
+ -------- (ground)
+
+ 4. connect = NTC_CONNECTED_GROUND, pulldown_ohm = 0 (not-connected)
+
+ [pullup_uV]
+ |
+ $ (pullup_ohm)
+ |
+ +----------------------------[read_uV]
+ |
+ [TH]
+ |
+ --- (ground)
+
+When one of the four circuit models is used, read_uV, pullup_uV, pullup_ohm,
+pulldown_ohm, and connect should be provided. When none of the four models
+are suitable or the user can get the resistance directly, the user should
+provide read_ohm and _not_ provide the others.
+
+Sysfs Interface
+---------------
+name the mandatory global attribute, the thermistor name.
+
+temp1_type always 4 (thermistor)
+ RO
+
+temp1_input measure the temperature and provide the measured value.
+ (reading this file initiates the reading procedure.)
+ RO
+
+Note that each NTC thermistor has only _one_ thermistor; thus, only temp1 exists.
diff --git a/Documentation/hwmon/pc87360 b/Documentation/hwmon/pc87360
new file mode 100644
index 000000000..d5f5cf16c
--- /dev/null
+++ b/Documentation/hwmon/pc87360
@@ -0,0 +1,184 @@
+Kernel driver pc87360
+=====================
+
+Supported chips:
+ * National Semiconductor PC87360, PC87363, PC87364, PC87365 and PC87366
+ Prefixes: 'pc87360', 'pc87363', 'pc87364', 'pc87365', 'pc87366'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheets: No longer available
+
+Authors: Jean Delvare <jdelvare@suse.de>
+
+Thanks to Sandeep Mehta, Tonko de Rooy and Daniel Ceregatti for testing.
+Thanks to Rudolf Marek for helping me investigate conversion issues.
+
+
+Module Parameters
+-----------------
+
+* init int
+ Chip initialization level:
+ 0: None
+ *1: Forcibly enable internal voltage and temperature channels, except in9
+ 2: Forcibly enable all voltage and temperature channels, except in9
+ 3: Forcibly enable all voltage and temperature channels, including in9
+
+Note that this parameter has no effect for the PC87360, PC87363 and PC87364
+chips.
+
+Also note that for the PC87366, initialization levels 2 and 3 don't enable
+all temperature channels, because some of them share pins with each other,
+so they can't be used at the same time.
+
+
+Description
+-----------
+
+The National Semiconductor PC87360 Super I/O chip contains monitoring and
+PWM control circuitry for two fans. The PC87363 chip is similar, and the
+PC87364 chip has monitoring and PWM control for a third fan.
+
+The National Semiconductor PC87365 and PC87366 Super I/O chips are complete
+hardware monitoring chipsets, not only controlling and monitoring three fans,
+but also monitoring eleven voltage inputs and two (PC87365) or up to four
+(PC87366) temperatures.
+
+ Chip #vin #fan #pwm #temp devid
+
+ PC87360 - 2 2 - 0xE1
+ PC87363 - 2 2 - 0xE8
+ PC87364 - 3 3 - 0xE4
+ PC87365 11 3 3 2 0xE5
+ PC87366 11 3 3 3-4 0xE9
+
+The driver assumes that no more than one chip is present, and one of the
+standard Super I/O addresses is used (0x2E/0x2F or 0x4E/0x4F)
+
+Fan Monitoring
+--------------
+
+Fan rotation speeds are reported in RPM (revolutions per minute). An alarm
+is triggered if the rotation speed has dropped below a programmable limit.
+A different alarm is triggered if the fan speed is too low to be measured.
+
+Fan readings are affected by a programmable clock divider, giving the
+readings more range or accuracy. Usually, users have to learn how it works,
+but this driver implements dynamic clock divider selection, so you don't
+have to care no more.
+
+For reference, here are a few values about clock dividers:
+
+ slowest accuracy highest
+ measurable around 3000 accurate
+ divider speed (RPM) RPM (RPM) speed (RPM)
+ 1 1882 18 6928
+ 2 941 37 4898
+ 4 470 74 3464
+ 8 235 150 2449
+
+For the curious, here is how the values above were computed:
+ * slowest measurable speed: clock/(255*divider)
+ * accuracy around 3000 RPM: 3000^2/clock
+ * highest accurate speed: sqrt(clock*100)
+The clock speed for the PC87360 family is 480 kHz. I arbitrarily chose 100
+RPM as the lowest acceptable accuracy.
+
+As mentioned above, you don't have to care about this no more.
+
+Note that not all RPM values can be represented, even when the best clock
+divider is selected. This is not only true for the measured speeds, but
+also for the programmable low limits, so don't be surprised if you try to
+set, say, fan1_min to 2900 and it finally reads 2909.
+
+
+Fan Control
+-----------
+
+PWM (pulse width modulation) values range from 0 to 255, with 0 meaning
+that the fan is stopped, and 255 meaning that the fan goes at full speed.
+
+Be extremely careful when changing PWM values. Low PWM values, even
+non-zero, can stop the fan, which may cause irreversible damage to your
+hardware if temperature increases too much. When changing PWM values, go
+step by step and keep an eye on temperatures.
+
+One user reported problems with PWM. Changing PWM values would break fan
+speed readings. No explanation nor fix could be found.
+
+
+Temperature Monitoring
+----------------------
+
+Temperatures are reported in degrees Celsius. Each temperature measured has
+associated low, high and overtemperature limits, each of which triggers an
+alarm when crossed.
+
+The first two temperature channels are external. The third one (PC87366
+only) is internal.
+
+The PC87366 has three additional temperature channels, based on
+thermistors (as opposed to thermal diodes for the first three temperature
+channels). For technical reasons, these channels are held by the VLM
+(voltage level monitor) logical device, not the TMS (temperature
+measurement) one. As a consequence, these temperatures are exported as
+voltages, and converted into temperatures in user-space.
+
+Note that these three additional channels share their pins with the
+external thermal diode channels, so you (physically) can't use them all at
+the same time. Although it should be possible to mix the two sensor types,
+the documents from National Semiconductor suggest that motherboard
+manufacturers should choose one type and stick to it. So you will more
+likely have either channels 1 to 3 (thermal diodes) or 3 to 6 (internal
+thermal diode, and thermistors).
+
+
+Voltage Monitoring
+------------------
+
+Voltages are reported relatively to a reference voltage, either internal or
+external. Some of them (in7:Vsb, in8:Vdd and in10:AVdd) are divided by two
+internally, you will have to compensate in sensors.conf. Others (in0 to in6)
+are likely to be divided externally. The meaning of each of these inputs as
+well as the values of the resistors used for division is left to the
+motherboard manufacturers, so you will have to document yourself and edit
+sensors.conf accordingly. National Semiconductor has a document with
+recommended resistor values for some voltages, but this still leaves much
+room for per motherboard specificities, unfortunately. Even worse,
+motherboard manufacturers don't seem to care about National Semiconductor's
+recommendations.
+
+Each voltage measured has associated low and high limits, each of which
+triggers an alarm when crossed.
+
+When available, VID inputs are used to provide the nominal CPU Core voltage.
+The driver will default to VRM 9.0, but this can be changed from user-space.
+The chipsets can handle two sets of VID inputs (on dual-CPU systems), but
+the driver will only export one for now. This may change later if there is
+a need.
+
+
+General Remarks
+---------------
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may already
+have disappeared! Note that all hardware registers are read whenever any
+data is read (unless it is less than 2 seconds since the last update, in
+which case cached values are returned instead). As a consequence, when
+a once-only alarm triggers, it may take 2 seconds for it to show, and 2
+more seconds for it to disappear.
+
+Monitoring of in9 isn't enabled at lower init levels (<3) because that
+channel measures the battery voltage (Vbat). It is a known fact that
+repeatedly sampling the battery voltage reduces its lifetime. National
+Semiconductor smartly designed their chipset so that in9 is sampled only
+once every 1024 sampling cycles (that is every 34 minutes at the default
+sampling rate), so the effect is attenuated, but still present.
+
+
+Limitations
+-----------
+
+The datasheets suggests that some values (fan mins, fan dividers)
+shouldn't be changed once the monitoring has started, but we ignore that
+recommendation. We'll reconsider if it actually causes trouble.
diff --git a/Documentation/hwmon/pc87427 b/Documentation/hwmon/pc87427
new file mode 100644
index 000000000..c313eb66e
--- /dev/null
+++ b/Documentation/hwmon/pc87427
@@ -0,0 +1,59 @@
+Kernel driver pc87427
+=====================
+
+Supported chips:
+ * National Semiconductor PC87427
+ Prefix: 'pc87427'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: No longer available
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+Thanks to Amir Habibi at Candelis for setting up a test system, and to
+Michael Kress for testing several iterations of this driver.
+
+
+Description
+-----------
+
+The National Semiconductor Super I/O chip includes complete hardware
+monitoring capabilities. It can monitor up to 18 voltages, 8 fans and
+6 temperature sensors. Only the fans and temperatures are supported at
+the moment, voltages aren't.
+
+This chip also has fan controlling features (up to 4 PWM outputs),
+which are partly supported by this driver.
+
+The driver assumes that no more than one chip is present, which seems
+reasonable.
+
+
+Fan Monitoring
+--------------
+
+Fan rotation speeds are reported as 14-bit values from a gated clock
+signal. Speeds down to 83 RPM can be measured.
+
+An alarm is triggered if the rotation speed drops below a programmable
+limit. Another alarm is triggered if the speed is too low to be measured
+(including stalled or missing fan).
+
+
+Fan Speed Control
+-----------------
+
+Fan speed can be controlled by PWM outputs. There are 4 possible modes:
+always off, always on, manual and automatic. The latter isn't supported
+by the driver: you can only return to that mode if it was the original
+setting, and the configuration interface is missing.
+
+
+Temperature Monitoring
+----------------------
+
+The PC87427 relies on external sensors (following the SensorPath
+standard), so the resolution and range depend on the type of sensor
+connected. The integer part can be 8-bit or 9-bit, and can be signed or
+not. I couldn't find a way to figure out the external sensor data
+temperature format, so user-space adjustment (typically by a factor 2)
+may be required.
diff --git a/Documentation/hwmon/pcf8591 b/Documentation/hwmon/pcf8591
new file mode 100644
index 000000000..447c0702c
--- /dev/null
+++ b/Documentation/hwmon/pcf8591
@@ -0,0 +1,90 @@
+Kernel driver pcf8591
+=====================
+
+Supported chips:
+ * Philips/NXP PCF8591
+ Prefix: 'pcf8591'
+ Addresses scanned: none
+ Datasheet: Publicly available at the NXP website
+ http://www.nxp.com/pip/PCF8591_6.html
+
+Authors:
+ Aurelien Jarno <aurelien@aurel32.net>
+ valuable contributions by Jan M. Sendler <sendler@sendler.de>,
+ Jean Delvare <jdelvare@suse.de>
+
+
+Description
+-----------
+
+The PCF8591 is an 8-bit A/D and D/A converter (4 analog inputs and one
+analog output) for the I2C bus produced by Philips Semiconductors (now NXP).
+It is designed to provide a byte I2C interface to up to 4 separate devices.
+
+The PCF8591 has 4 analog inputs programmable as single-ended or
+differential inputs :
+- mode 0 : four single ended inputs
+ Pins AIN0 to AIN3 are single ended inputs for channels 0 to 3
+
+- mode 1 : three differential inputs
+ Pins AIN3 is the common negative differential input
+ Pins AIN0 to AIN2 are positive differential inputs for channels 0 to 2
+
+- mode 2 : single ended and differential mixed
+ Pins AIN0 and AIN1 are single ended inputs for channels 0 and 1
+ Pins AIN2 is the positive differential input for channel 3
+ Pins AIN3 is the negative differential input for channel 3
+
+- mode 3 : two differential inputs
+ Pins AIN0 is the positive differential input for channel 0
+ Pins AIN1 is the negative differential input for channel 0
+ Pins AIN2 is the positive differential input for channel 1
+ Pins AIN3 is the negative differential input for channel 1
+
+See the datasheet for details.
+
+Module parameters
+-----------------
+
+* input_mode int
+
+ Analog input mode:
+ 0 = four single ended inputs
+ 1 = three differential inputs
+ 2 = single ended and differential mixed
+ 3 = two differential inputs
+
+
+Accessing PCF8591 via /sys interface
+-------------------------------------
+
+The PCF8591 is plainly impossible to detect! Thus the driver won't even
+try. You have to explicitly instantiate the device at the relevant
+address (in the interval [0x48..0x4f]) either through platform data, or
+using the sysfs interface. See Documentation/i2c/instantiating-devices
+for details.
+
+Directories are being created for each instantiated PCF8591:
+
+/sys/bus/i2c/devices/<0>-<1>/
+where <0> is the bus the chip is connected to (e. g. i2c-0)
+and <1> the chip address ([48..4f])
+
+Inside these directories, there are such files:
+in0_input, in1_input, in2_input, in3_input, out0_enable, out0_output, name
+
+Name contains chip name.
+
+The in0_input, in1_input, in2_input and in3_input files are RO. Reading gives
+the value of the corresponding channel. Depending on the current analog inputs
+configuration, files in2_input and in3_input may not exist. Values range
+from 0 to 255 for single ended inputs and -128 to +127 for differential inputs
+(8-bit ADC).
+
+The out0_enable file is RW. Reading gives "1" for analog output enabled and
+"0" for analog output disabled. Writing accepts "0" and "1" accordingly.
+
+The out0_output file is RW. Writing a number between 0 and 255 (8-bit DAC), send
+the value to the digital-to-analog converter. Note that a voltage will
+only appears on AOUT pin if aout0_enable equals 1. Reading returns the last
+value written.
diff --git a/Documentation/hwmon/pmbus b/Documentation/hwmon/pmbus
new file mode 100644
index 000000000..a3557da8f
--- /dev/null
+++ b/Documentation/hwmon/pmbus
@@ -0,0 +1,212 @@
+Kernel driver pmbus
+====================
+
+Supported chips:
+ * Ericsson BMR453, BMR454
+ Prefixes: 'bmr453', 'bmr454'
+ Addresses scanned: -
+ Datasheet:
+ http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146395
+ * ON Semiconductor ADP4000, NCP4200, NCP4208
+ Prefixes: 'adp4000', 'ncp4200', 'ncp4208'
+ Addresses scanned: -
+ Datasheets:
+ http://www.onsemi.com/pub_link/Collateral/ADP4000-D.PDF
+ http://www.onsemi.com/pub_link/Collateral/NCP4200-D.PDF
+ http://www.onsemi.com/pub_link/Collateral/JUNE%202009-%20REV.%200.PDF
+ * Lineage Power
+ Prefixes: 'mdt040', 'pdt003', 'pdt006', 'pdt012', 'udt020'
+ Addresses scanned: -
+ Datasheets:
+ http://www.lineagepower.com/oem/pdf/PDT003A0X.pdf
+ http://www.lineagepower.com/oem/pdf/PDT006A0X.pdf
+ http://www.lineagepower.com/oem/pdf/PDT012A0X.pdf
+ http://www.lineagepower.com/oem/pdf/UDT020A0X.pdf
+ http://www.lineagepower.com/oem/pdf/MDT040A0X.pdf
+ * Texas Instruments TPS40400
+ Prefixes: 'tps40400'
+ Addresses scanned: -
+ Datasheets:
+ http://www.ti.com/lit/gpn/tps40400
+ * Generic PMBus devices
+ Prefix: 'pmbus'
+ Addresses scanned: -
+ Datasheet: n.a.
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for various PMBus compliant devices.
+It supports voltage, current, power, and temperature sensors as supported
+by the device.
+
+Each monitored channel has its own high and low limits, plus a critical
+limit.
+
+Fan support will be added in a later version of this driver.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for PMBus devices, since there is no register
+which can be safely used to identify the chip (The MFG_ID register is not
+supported by all chips), and since there is no well defined address range for
+PMBus devices. You will have to instantiate the devices explicitly.
+
+Example: the following will load the driver for an LTC2978 at address 0x60
+on I2C bus #1:
+$ modprobe pmbus
+$ echo ltc2978 0x60 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Platform data support
+---------------------
+
+Support for additional PMBus chips can be added by defining chip parameters in
+a new chip specific driver file. For example, (untested) code to add support for
+Emerson DS1200 power modules might look as follows.
+
+static struct pmbus_driver_info ds1200_info = {
+ .pages = 1,
+ /* Note: All other sensors are in linear mode */
+ .direct[PSC_VOLTAGE_OUT] = true,
+ .direct[PSC_TEMPERATURE] = true,
+ .direct[PSC_CURRENT_OUT] = true,
+ .m[PSC_VOLTAGE_IN] = 1,
+ .b[PSC_VOLTAGE_IN] = 0,
+ .R[PSC_VOLTAGE_IN] = 3,
+ .m[PSC_VOLTAGE_OUT] = 1,
+ .b[PSC_VOLTAGE_OUT] = 0,
+ .R[PSC_VOLTAGE_OUT] = 3,
+ .m[PSC_TEMPERATURE] = 1,
+ .b[PSC_TEMPERATURE] = 0,
+ .R[PSC_TEMPERATURE] = 3,
+ .func[0] = PMBUS_HAVE_VIN | PMBUS_HAVE_IIN | PMBUS_HAVE_STATUS_INPUT
+ | PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT
+ | PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT
+ | PMBUS_HAVE_PIN | PMBUS_HAVE_POUT
+ | PMBUS_HAVE_TEMP | PMBUS_HAVE_STATUS_TEMP
+ | PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12,
+};
+
+static int ds1200_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ return pmbus_do_probe(client, id, &ds1200_info);
+}
+
+static int ds1200_remove(struct i2c_client *client)
+{
+ return pmbus_do_remove(client);
+}
+
+static const struct i2c_device_id ds1200_id[] = {
+ {"ds1200", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(i2c, ds1200_id);
+
+/* This is the driver that will be inserted */
+static struct i2c_driver ds1200_driver = {
+ .driver = {
+ .name = "ds1200",
+ },
+ .probe = ds1200_probe,
+ .remove = ds1200_remove,
+ .id_table = ds1200_id,
+};
+
+static int __init ds1200_init(void)
+{
+ return i2c_add_driver(&ds1200_driver);
+}
+
+static void __exit ds1200_exit(void)
+{
+ i2c_del_driver(&ds1200_driver);
+}
+
+
+Sysfs entries
+-------------
+
+When probing the chip, the driver identifies which PMBus registers are
+supported, and determines available sensors from this information.
+Attribute files only exist if respective sensors are supported by the chip.
+Labels are provided to inform the user about the sensor associated with
+a given sysfs entry.
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+inX_input Measured voltage. From READ_VIN or READ_VOUT register.
+inX_min Minimum Voltage.
+ From VIN_UV_WARN_LIMIT or VOUT_UV_WARN_LIMIT register.
+inX_max Maximum voltage.
+ From VIN_OV_WARN_LIMIT or VOUT_OV_WARN_LIMIT register.
+inX_lcrit Critical minimum Voltage.
+ From VIN_UV_FAULT_LIMIT or VOUT_UV_FAULT_LIMIT register.
+inX_crit Critical maximum voltage.
+ From VIN_OV_FAULT_LIMIT or VOUT_OV_FAULT_LIMIT register.
+inX_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status.
+inX_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status.
+inX_lcrit_alarm Voltage critical low alarm.
+ From VOLTAGE_UV_FAULT status.
+inX_crit_alarm Voltage critical high alarm.
+ From VOLTAGE_OV_FAULT status.
+inX_label "vin", "vcap", or "voutY"
+
+currX_input Measured current. From READ_IIN or READ_IOUT register.
+currX_max Maximum current.
+ From IIN_OC_WARN_LIMIT or IOUT_OC_WARN_LIMIT register.
+currX_lcrit Critical minimum output current.
+ From IOUT_UC_FAULT_LIMIT register.
+currX_crit Critical maximum current.
+ From IIN_OC_FAULT_LIMIT or IOUT_OC_FAULT_LIMIT register.
+currX_alarm Current high alarm.
+ From IIN_OC_WARNING or IOUT_OC_WARNING status.
+currX_max_alarm Current high alarm.
+ From IIN_OC_WARN_LIMIT or IOUT_OC_WARN_LIMIT status.
+currX_lcrit_alarm Output current critical low alarm.
+ From IOUT_UC_FAULT status.
+currX_crit_alarm Current critical high alarm.
+ From IIN_OC_FAULT or IOUT_OC_FAULT status.
+currX_label "iin" or "ioutY"
+
+powerX_input Measured power. From READ_PIN or READ_POUT register.
+powerX_cap Output power cap. From POUT_MAX register.
+powerX_max Power limit. From PIN_OP_WARN_LIMIT or
+ POUT_OP_WARN_LIMIT register.
+powerX_crit Critical output power limit.
+ From POUT_OP_FAULT_LIMIT register.
+powerX_alarm Power high alarm.
+ From PIN_OP_WARNING or POUT_OP_WARNING status.
+powerX_crit_alarm Output power critical high alarm.
+ From POUT_OP_FAULT status.
+powerX_label "pin" or "poutY"
+
+tempX_input Measured temperature.
+ From READ_TEMPERATURE_X register.
+tempX_min Mimimum temperature. From UT_WARN_LIMIT register.
+tempX_max Maximum temperature. From OT_WARN_LIMIT register.
+tempX_lcrit Critical low temperature.
+ From UT_FAULT_LIMIT register.
+tempX_crit Critical high temperature.
+ From OT_FAULT_LIMIT register.
+tempX_min_alarm Chip temperature low alarm. Set by comparing
+ READ_TEMPERATURE_X with UT_WARN_LIMIT if
+ TEMP_UT_WARNING status is set.
+tempX_max_alarm Chip temperature high alarm. Set by comparing
+ READ_TEMPERATURE_X with OT_WARN_LIMIT if
+ TEMP_OT_WARNING status is set.
+tempX_lcrit_alarm Chip temperature critical low alarm. Set by comparing
+ READ_TEMPERATURE_X with UT_FAULT_LIMIT if
+ TEMP_UT_FAULT status is set.
+tempX_crit_alarm Chip temperature critical high alarm. Set by comparing
+ READ_TEMPERATURE_X with OT_FAULT_LIMIT if
+ TEMP_OT_FAULT status is set.
diff --git a/Documentation/hwmon/pmbus-core b/Documentation/hwmon/pmbus-core
new file mode 100644
index 000000000..31e4720fe
--- /dev/null
+++ b/Documentation/hwmon/pmbus-core
@@ -0,0 +1,283 @@
+PMBus core driver and internal API
+==================================
+
+Introduction
+============
+
+[from pmbus.org] The Power Management Bus (PMBus) is an open standard
+power-management protocol with a fully defined command language that facilitates
+communication with power converters and other devices in a power system. The
+protocol is implemented over the industry-standard SMBus serial interface and
+enables programming, control, and real-time monitoring of compliant power
+conversion products. This flexible and highly versatile standard allows for
+communication between devices based on both analog and digital technologies, and
+provides true interoperability which will reduce design complexity and shorten
+time to market for power system designers. Pioneered by leading power supply and
+semiconductor companies, this open power system standard is maintained and
+promoted by the PMBus Implementers Forum (PMBus-IF), comprising 30+ adopters
+with the objective to provide support to, and facilitate adoption among, users.
+
+Unfortunately, while PMBus commands are standardized, there are no mandatory
+commands, and manufacturers can add as many non-standard commands as they like.
+Also, different PMBUs devices act differently if non-supported commands are
+executed. Some devices return an error, some devices return 0xff or 0xffff and
+set a status error flag, and some devices may simply hang up.
+
+Despite all those difficulties, a generic PMBus device driver is still useful
+and supported since kernel version 2.6.39. However, it was necessary to support
+device specific extensions in addition to the core PMBus driver, since it is
+simply unknown what new device specific functionality PMBus device developers
+come up with next.
+
+To make device specific extensions as scalable as possible, and to avoid having
+to modify the core PMBus driver repeatedly for new devices, the PMBus driver was
+split into core, generic, and device specific code. The core code (in
+pmbus_core.c) provides generic functionality. The generic code (in pmbus.c)
+provides support for generic PMBus devices. Device specific code is responsible
+for device specific initialization and, if needed, maps device specific
+functionality into generic functionality. This is to some degree comparable
+to PCI code, where generic code is augmented as needed with quirks for all kinds
+of devices.
+
+PMBus device capabilities auto-detection
+========================================
+
+For generic PMBus devices, code in pmbus.c attempts to auto-detect all supported
+PMBus commands. Auto-detection is somewhat limited, since there are simply too
+many variables to consider. For example, it is almost impossible to autodetect
+which PMBus commands are paged and which commands are replicated across all
+pages (see the PMBus specification for details on multi-page PMBus devices).
+
+For this reason, it often makes sense to provide a device specific driver if not
+all commands can be auto-detected. The data structures in this driver can be
+used to inform the core driver about functionality supported by individual
+chips.
+
+Some commands are always auto-detected. This applies to all limit commands
+(lcrit, min, max, and crit attributes) as well as associated alarm attributes.
+Limits and alarm attributes are auto-detected because there are simply too many
+possible combinations to provide a manual configuration interface.
+
+PMBus internal API
+==================
+
+The API between core and device specific PMBus code is defined in
+drivers/hwmon/pmbus/pmbus.h. In addition to the internal API, pmbus.h defines
+standard PMBus commands and virtual PMBus commands.
+
+Standard PMBus commands
+-----------------------
+
+Standard PMBus commands (commands values 0x00 to 0xff) are defined in the PMBUs
+specification.
+
+Virtual PMBus commands
+----------------------
+
+Virtual PMBus commands are provided to enable support for non-standard
+functionality which has been implemented by several chip vendors and is thus
+desirable to support.
+
+Virtual PMBus commands start with command value 0x100 and can thus easily be
+distinguished from standard PMBus commands (which can not have values larger
+than 0xff). Support for virtual PMBus commands is device specific and thus has
+to be implemented in device specific code.
+
+Virtual commands are named PMBUS_VIRT_xxx and start with PMBUS_VIRT_BASE. All
+virtual commands are word sized.
+
+There are currently two types of virtual commands.
+
+- READ commands are read-only; writes are either ignored or return an error.
+- RESET commands are read/write. Reading reset registers returns zero
+ (used for detection), writing any value causes the associated history to be
+ reset.
+
+Virtual commands have to be handled in device specific driver code. Chip driver
+code returns non-negative values if a virtual command is supported, or a
+negative error code if not. The chip driver may return -ENODATA or any other
+Linux error code in this case, though an error code other than -ENODATA is
+handled more efficiently and thus preferred. Either case, the calling PMBus
+core code will abort if the chip driver returns an error code when reading
+or writing virtual registers (in other words, the PMBus core code will never
+send a virtual command to a chip).
+
+PMBus driver information
+------------------------
+
+PMBus driver information, defined in struct pmbus_driver_info, is the main means
+for device specific drivers to pass information to the core PMBus driver.
+Specifically, it provides the following information.
+
+- For devices supporting its data in Direct Data Format, it provides coefficients
+ for converting register values into normalized data. This data is usually
+ provided by chip manufacturers in device datasheets.
+- Supported chip functionality can be provided to the core driver. This may be
+ necessary for chips which react badly if non-supported commands are executed,
+ and/or to speed up device detection and initialization.
+- Several function entry points are provided to support overriding and/or
+ augmenting generic command execution. This functionality can be used to map
+ non-standard PMBus commands to standard commands, or to augment standard
+ command return values with device specific information.
+
+ API functions
+ -------------
+
+ Functions provided by chip driver
+ ---------------------------------
+
+ All functions return the command return value (read) or zero (write) if
+ successful. A return value of -ENODATA indicates that there is no manufacturer
+ specific command, but that a standard PMBus command may exist. Any other
+ negative return value indicates that the commands does not exist for this
+ chip, and that no attempt should be made to read or write the standard
+ command.
+
+ As mentioned above, an exception to this rule applies to virtual commands,
+ which _must_ be handled in driver specific code. See "Virtual PMBus Commands"
+ above for more details.
+
+ Command execution in the core PMBus driver code is as follows.
+
+ if (chip_access_function) {
+ status = chip_access_function();
+ if (status != -ENODATA)
+ return status;
+ }
+ if (command >= PMBUS_VIRT_BASE) /* For word commands/registers only */
+ return -EINVAL;
+ return generic_access();
+
+ Chip drivers may provide pointers to the following functions in struct
+ pmbus_driver_info. All functions are optional.
+
+ int (*read_byte_data)(struct i2c_client *client, int page, int reg);
+
+ Read byte from page <page>, register <reg>.
+ <page> may be -1, which means "current page".
+
+ int (*read_word_data)(struct i2c_client *client, int page, int reg);
+
+ Read word from page <page>, register <reg>.
+
+ int (*write_word_data)(struct i2c_client *client, int page, int reg,
+ u16 word);
+
+ Write word to page <page>, register <reg>.
+
+ int (*write_byte)(struct i2c_client *client, int page, u8 value);
+
+ Write byte to page <page>, register <reg>.
+ <page> may be -1, which means "current page".
+
+ int (*identify)(struct i2c_client *client, struct pmbus_driver_info *info);
+
+ Determine supported PMBus functionality. This function is only necessary
+ if a chip driver supports multiple chips, and the chip functionality is not
+ pre-determined. It is currently only used by the generic pmbus driver
+ (pmbus.c).
+
+ Functions exported by core driver
+ ---------------------------------
+
+ Chip drivers are expected to use the following functions to read or write
+ PMBus registers. Chip drivers may also use direct I2C commands. If direct I2C
+ commands are used, the chip driver code must not directly modify the current
+ page, since the selected page is cached in the core driver and the core driver
+ will assume that it is selected. Using pmbus_set_page() to select a new page
+ is mandatory.
+
+ int pmbus_set_page(struct i2c_client *client, u8 page);
+
+ Set PMBus page register to <page> for subsequent commands.
+
+ int pmbus_read_word_data(struct i2c_client *client, u8 page, u8 reg);
+
+ Read word data from <page>, <reg>. Similar to i2c_smbus_read_word_data(), but
+ selects page first.
+
+ int pmbus_write_word_data(struct i2c_client *client, u8 page, u8 reg,
+ u16 word);
+
+ Write word data to <page>, <reg>. Similar to i2c_smbus_write_word_data(), but
+ selects page first.
+
+ int pmbus_read_byte_data(struct i2c_client *client, int page, u8 reg);
+
+ Read byte data from <page>, <reg>. Similar to i2c_smbus_read_byte_data(), but
+ selects page first. <page> may be -1, which means "current page".
+
+ int pmbus_write_byte(struct i2c_client *client, int page, u8 value);
+
+ Write byte data to <page>, <reg>. Similar to i2c_smbus_write_byte(), but
+ selects page first. <page> may be -1, which means "current page".
+
+ void pmbus_clear_faults(struct i2c_client *client);
+
+ Execute PMBus "Clear Fault" command on all chip pages.
+ This function calls the device specific write_byte function if defined.
+ Therefore, it must _not_ be called from that function.
+
+ bool pmbus_check_byte_register(struct i2c_client *client, int page, int reg);
+
+ Check if byte register exists. Return true if the register exists, false
+ otherwise.
+ This function calls the device specific write_byte function if defined to
+ obtain the chip status. Therefore, it must _not_ be called from that function.
+
+ bool pmbus_check_word_register(struct i2c_client *client, int page, int reg);
+
+ Check if word register exists. Return true if the register exists, false
+ otherwise.
+ This function calls the device specific write_byte function if defined to
+ obtain the chip status. Therefore, it must _not_ be called from that function.
+
+ int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id,
+ struct pmbus_driver_info *info);
+
+ Execute probe function. Similar to standard probe function for other drivers,
+ with the pointer to struct pmbus_driver_info as additional argument. Calls
+ identify function if supported. Must only be called from device probe
+ function.
+
+ void pmbus_do_remove(struct i2c_client *client);
+
+ Execute driver remove function. Similar to standard driver remove function.
+
+ const struct pmbus_driver_info
+ *pmbus_get_driver_info(struct i2c_client *client);
+
+ Return pointer to struct pmbus_driver_info as passed to pmbus_do_probe().
+
+
+PMBus driver platform data
+==========================
+
+PMBus platform data is defined in include/linux/i2c/pmbus.h. Platform data
+currently only provides a flag field with a single bit used.
+
+#define PMBUS_SKIP_STATUS_CHECK (1 << 0)
+
+struct pmbus_platform_data {
+ u32 flags; /* Device specific flags */
+};
+
+
+Flags
+-----
+
+PMBUS_SKIP_STATUS_CHECK
+
+During register detection, skip checking the status register for
+communication or command errors.
+
+Some PMBus chips respond with valid data when trying to read an unsupported
+register. For such chips, checking the status register is mandatory when
+trying to determine if a chip register exists or not.
+Other PMBus chips don't support the STATUS_CML register, or report
+communication errors for no explicable reason. For such chips, checking the
+status register must be disabled.
+
+Some i2c controllers do not support single-byte commands (write commands with
+no data, i2c_smbus_write_byte()). With such controllers, clearing the status
+register is impossible, and the PMBUS_SKIP_STATUS_CHECK flag must be set.
diff --git a/Documentation/hwmon/powr1220 b/Documentation/hwmon/powr1220
new file mode 100644
index 000000000..21e44f71a
--- /dev/null
+++ b/Documentation/hwmon/powr1220
@@ -0,0 +1,45 @@
+Kernel driver powr1220
+==================
+
+Supported chips:
+ * Lattice POWR1220AT8
+ Prefix: 'powr1220'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Lattice website
+ http://www.latticesemi.com/
+
+Author: Scott Kanowitz <scott.kanowitz@gmail.com>
+
+Description
+-----------
+
+This driver supports the Lattice POWR1220AT8 chip. The POWR1220
+includes voltage monitoring for 14 inputs as well as trim settings
+for output voltages and GPIOs. This driver implements the voltage
+monitoring portion of the chip.
+
+Voltages are sampled by a 12-bit ADC with a step size of 2 mV.
+An in-line attenuator allows measurements from 0 to 6 V. The
+attenuator is enabled or disabled depending on the setting of the
+input's max value. The driver will enable the attenuator for any
+value over the low measurement range maximum of 2 V.
+
+The input naming convention is as follows:
+
+driver name pin name
+in0 VMON1
+in1 VMON2
+in2 VMON3
+in2 VMON4
+in4 VMON5
+in5 VMON6
+in6 VMON7
+in7 VMON8
+in8 VMON9
+in9 VMON10
+in10 VMON11
+in11 VMON12
+in12 VCCA
+in13 VCCINP
+
+The ADC readings are updated on request with a minimum period of 1s.
diff --git a/Documentation/hwmon/pwm-fan b/Documentation/hwmon/pwm-fan
new file mode 100644
index 000000000..18529d2e3
--- /dev/null
+++ b/Documentation/hwmon/pwm-fan
@@ -0,0 +1,17 @@
+Kernel driver pwm-fan
+=====================
+
+This driver enables the use of a PWM module to drive a fan. It uses the
+generic PWM interface thus it is hardware independent. It can be used on
+many SoCs, as long as the SoC supplies a PWM line driver that exposes
+the generic PWM API.
+
+Author: Kamil Debski <k.debski@samsung.com>
+
+Description
+-----------
+
+The driver implements a simple interface for driving a fan connected to
+a PWM output. It uses the generic PWM interface, thus it can be used with
+a range of SoCs. The driver exposes the fan to the user space through
+the hwmon's sysfs interface.
diff --git a/Documentation/hwmon/sch5627 b/Documentation/hwmon/sch5627
new file mode 100644
index 000000000..0551d266c
--- /dev/null
+++ b/Documentation/hwmon/sch5627
@@ -0,0 +1,27 @@
+Kernel driver sch5627
+=====================
+
+Supported chips:
+ * SMSC SCH5627
+ Prefix: 'sch5627'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: Application Note available upon request
+
+Author: Hans de Goede <hdegoede@redhat.com>
+
+
+Description
+-----------
+
+SMSC SCH5627 Super I/O chips include complete hardware monitoring
+capabilities. They can monitor up to 5 voltages, 4 fans and 8 temperatures.
+
+The SMSC SCH5627 hardware monitoring part also contains an integrated
+watchdog. In order for this watchdog to function some motherboard specific
+initialization most be done by the BIOS, so if the watchdog is not enabled
+by the BIOS the sch5627 driver will not register a watchdog device.
+
+The hardware monitoring part of the SMSC SCH5627 is accessed by talking
+through an embedded microcontroller. An application note describing the
+protocol for communicating with the microcontroller is available upon
+request. Please mail me if you want a copy.
diff --git a/Documentation/hwmon/sch5636 b/Documentation/hwmon/sch5636
new file mode 100644
index 000000000..7b0a01da0
--- /dev/null
+++ b/Documentation/hwmon/sch5636
@@ -0,0 +1,34 @@
+Kernel driver sch5636
+=====================
+
+Supported chips:
+ * SMSC SCH5636
+ Prefix: 'sch5636'
+ Addresses scanned: none, address read from Super I/O config space
+
+Author: Hans de Goede <hdegoede@redhat.com>
+
+
+Description
+-----------
+
+SMSC SCH5636 Super I/O chips include an embedded microcontroller for
+hardware monitoring solutions, allowing motherboard manufacturers to create
+their own custom hwmon solution based upon the SCH5636.
+
+Currently the sch5636 driver only supports the Fujitsu Theseus SCH5636 based
+hwmon solution. The sch5636 driver runs a sanity check on loading to ensure
+it is dealing with a Fujitsu Theseus and not with another custom SCH5636 based
+hwmon solution.
+
+The Fujitsu Theseus can monitor up to 5 voltages, 8 fans and 16
+temperatures. Note that the driver detects how many fan headers /
+temperature sensors are actually implemented on the motherboard, so you will
+likely see fewer temperature and fan inputs.
+
+The Fujitsu Theseus hwmon solution also contains an integrated watchdog.
+This watchdog is fully supported by the sch5636 driver.
+
+An application note describing the Theseus' registers, as well as an
+application note describing the protocol for communicating with the
+microcontroller is available upon request. Please mail me if you want a copy.
diff --git a/Documentation/hwmon/sht15 b/Documentation/hwmon/sht15
new file mode 100644
index 000000000..778987d18
--- /dev/null
+++ b/Documentation/hwmon/sht15
@@ -0,0 +1,74 @@
+Kernel driver sht15
+===================
+
+Authors:
+ * Wouter Horre
+ * Jonathan Cameron
+ * Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+ * Jerome Oufella <jerome.oufella@savoirfairelinux.com>
+
+Supported chips:
+ * Sensirion SHT10
+ Prefix: 'sht10'
+
+ * Sensirion SHT11
+ Prefix: 'sht11'
+
+ * Sensirion SHT15
+ Prefix: 'sht15'
+
+ * Sensirion SHT71
+ Prefix: 'sht71'
+
+ * Sensirion SHT75
+ Prefix: 'sht75'
+
+Datasheet: Publicly available at the Sensirion website
+http://www.sensirion.ch/en/pdf/product_information/Datasheet-humidity-sensor-SHT1x.pdf
+
+Description
+-----------
+
+The SHT10, SHT11, SHT15, SHT71, and SHT75 are humidity and temperature
+sensors.
+
+The devices communicate using two GPIO lines.
+
+Supported resolutions for the measurements are 14 bits for temperature and 12
+bits for humidity, or 12 bits for temperature and 8 bits for humidity.
+
+The humidity calibration coefficients are programmed into an OTP memory on the
+chip. These coefficients are used to internally calibrate the signals from the
+sensors. Disabling the reload of those coefficients allows saving 10ms for each
+measurement and decrease power consumption, while losing on precision.
+
+Some options may be set directly in the sht15_platform_data structure
+or via sysfs attributes.
+
+Notes:
+ * The regulator supply name is set to "vcc".
+ * If a CRC validation fails, a soft reset command is sent, which resets
+ status register to its hardware default value, but the driver will try to
+ restore the previous device configuration.
+
+Platform data
+-------------
+
+* checksum:
+ set it to true to enable CRC validation of the readings (default to false).
+* no_otp_reload:
+ flag to indicate not to reload from OTP (default to false).
+* low_resolution:
+ flag to indicate the temp/humidity resolution to use (default to false).
+
+Sysfs interface
+---------------
+
+* temp1_input: temperature input
+* humidity1_input: humidity input
+* heater_enable: write 1 in this attribute to enable the on-chip heater,
+ 0 to disable it. Be careful not to enable the heater
+ for too long.
+* temp1_fault: if 1, this means that the voltage is low (below 2.47V) and
+ measurement may be invalid.
+* humidity1_fault: same as temp1_fault.
diff --git a/Documentation/hwmon/sht21 b/Documentation/hwmon/sht21
new file mode 100644
index 000000000..db17fda45
--- /dev/null
+++ b/Documentation/hwmon/sht21
@@ -0,0 +1,49 @@
+Kernel driver sht21
+===================
+
+Supported chips:
+ * Sensirion SHT21
+ Prefix: 'sht21'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Sensirion website
+ http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT21.pdf
+
+ * Sensirion SHT25
+ Prefix: 'sht21'
+ Addresses scanned: none
+ Datasheet: Publicly available at the Sensirion website
+ http://www.sensirion.com/en/pdf/product_information/Datasheet-humidity-sensor-SHT25.pdf
+
+Author:
+ Urs Fleisch <urs.fleisch@sensirion.com>
+
+Description
+-----------
+
+The SHT21 and SHT25 are humidity and temperature sensors in a DFN package of
+only 3 x 3 mm footprint and 1.1 mm height. The difference between the two
+devices is the higher level of precision of the SHT25 (1.8% relative humidity,
+0.2 degree Celsius) compared with the SHT21 (2.0% relative humidity,
+0.3 degree Celsius).
+
+The devices communicate with the I2C protocol. All sensors are set to the same
+I2C address 0x40, so an entry with I2C_BOARD_INFO("sht21", 0x40) can be used
+in the board setup code.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input
+
+Notes
+-----
+
+The driver uses the default resolution settings of 12 bit for humidity and 14
+bit for temperature, which results in typical measurement times of 22 ms for
+humidity and 66 ms for temperature. To keep self heating below 0.1 degree
+Celsius, the device should not be active for more than 10% of the time,
+e.g. maximum two measurements per second at the given resolution.
+
+Different resolutions, the on-chip heater, using the CRC checksum and reading
+the serial number are not supported yet.
diff --git a/Documentation/hwmon/shtc1 b/Documentation/hwmon/shtc1
new file mode 100644
index 000000000..6b1e05458
--- /dev/null
+++ b/Documentation/hwmon/shtc1
@@ -0,0 +1,43 @@
+Kernel driver shtc1
+===================
+
+Supported chips:
+ * Sensirion SHTC1
+ Prefix: 'shtc1'
+ Addresses scanned: none
+ Datasheet: http://www.sensirion.com/file/datasheet_shtc1
+
+ * Sensirion SHTW1
+ Prefix: 'shtw1'
+ Addresses scanned: none
+ Datasheet: Not publicly available
+
+Author:
+ Johannes Winkelmann <johannes.winkelmann@sensirion.com>
+
+Description
+-----------
+
+This driver implements support for the Sensirion SHTC1 chip, a humidity and
+temperature sensor. Temperature is measured in degrees celsius, relative
+humidity is expressed as a percentage. Driver can be used as well for SHTW1
+chip, which has the same electrical interface.
+
+The device communicates with the I2C protocol. All sensors are set to I2C
+address 0x70. See Documentation/i2c/instantiating-devices for methods to
+instantiate the device.
+
+There are two options configurable by means of shtc1_platform_data:
+1. blocking (pull the I2C clock line down while performing the measurement) or
+ non-blocking mode. Blocking mode will guarantee the fastest result but
+ the I2C bus will be busy during that time. By default, non-blocking mode
+ is used. Make sure clock-stretching works properly on your device if you
+ want to use blocking mode.
+2. high or low accuracy. High accuracy is used by default and using it is
+ strongly recommended.
+
+sysfs-Interface
+---------------
+
+temp1_input - temperature input
+humidity1_input - humidity input
diff --git a/Documentation/hwmon/sis5595 b/Documentation/hwmon/sis5595
new file mode 100644
index 000000000..4f8877a34
--- /dev/null
+++ b/Documentation/hwmon/sis5595
@@ -0,0 +1,106 @@
+Kernel driver sis5595
+=====================
+
+Supported chips:
+ * Silicon Integrated Systems Corp. SiS5595 Southbridge Hardware Monitor
+ Prefix: 'sis5595'
+ Addresses scanned: ISA in PCI-space encoded address
+ Datasheet: Publicly available at the Silicon Integrated Systems Corp. site.
+
+Authors:
+ Kyösti Mälkki <kmalkki@cc.hut.fi>,
+ Mark D. Studebaker <mdsxyz123@yahoo.com>,
+ Aurelien Jarno <aurelien@aurel32.net> 2.6 port
+
+ SiS southbridge has a LM78-like chip integrated on the same IC.
+ This driver is a customized copy of lm78.c
+
+ Supports following revisions:
+ Version PCI ID PCI Revision
+ 1 1039/0008 AF or less
+ 2 1039/0008 B0 or greater
+
+ Note: these chips contain a 0008 device which is incompatible with the
+ 5595. We recognize these by the presence of the listed
+ "blacklist" PCI ID and refuse to load.
+
+ NOT SUPPORTED PCI ID BLACKLIST PCI ID
+ 540 0008 0540
+ 550 0008 0550
+ 5513 0008 5511
+ 5581 0008 5597
+ 5582 0008 5597
+ 5597 0008 5597
+ 630 0008 0630
+ 645 0008 0645
+ 730 0008 0730
+ 735 0008 0735
+
+
+Module Parameters
+-----------------
+force_addr=0xaddr Set the I/O base address. Useful for boards
+ that don't set the address in the BIOS. Does not do a
+ PCI force; the device must still be present in lspci.
+ Don't use this unless the driver complains that the
+ base address is not set.
+ Example: 'modprobe sis5595 force_addr=0x290'
+
+
+Description
+-----------
+
+The SiS5595 southbridge has integrated hardware monitor functions. It also
+has an I2C bus, but this driver only supports the hardware monitor. For the
+I2C bus driver see i2c-sis5595.
+
+The SiS5595 implements zero or one temperature sensor, two fan speed
+sensors, four or five voltage sensors, and alarms.
+
+On the first version of the chip, there are four voltage sensors and one
+temperature sensor.
+
+On the second version of the chip, the temperature sensor (temp) and the
+fifth voltage sensor (in4) share a pin which is configurable, but not
+through the driver. Sorry. The driver senses the configuration of the pin,
+which was hopefully set by the BIOS.
+
+Temperatures are measured in degrees Celsius. An alarm is triggered once
+when the max is crossed; it is also triggered when it drops below the min
+value. Measurements are guaranteed between -55 and +125 degrees, with a
+resolution of 1 degree.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+Voltage sensors (also known as IN sensors) report their values in volts. An
+alarm is triggered if the voltage has crossed a programmable minimum or
+maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. All voltage
+inputs can measure voltages between 0 and 4.08 volts, with a resolution of
+0.016 volt.
+
+In addition to the alarms described above, there is a BTI alarm, which gets
+triggered when an external chip has crossed its limits. Usually, this is
+connected to some LM75-like chip; if at least one crosses its limits, this
+bit gets set.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may already
+have disappeared! Note that in the current implementation, all hardware
+registers are read whenever any data is read (unless it is less than 1.5
+seconds since the last update). This means that you can easily miss
+once-only alarms.
+
+The SiS5595 only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
+
+Problems
+--------
+Some chips refuse to be enabled. We don't know why.
+The driver will recognize this and print a message in dmesg.
+
diff --git a/Documentation/hwmon/smm665 b/Documentation/hwmon/smm665
new file mode 100644
index 000000000..a341eeeda
--- /dev/null
+++ b/Documentation/hwmon/smm665
@@ -0,0 +1,157 @@
+Kernel driver smm665
+====================
+
+Supported chips:
+ * Summit Microelectronics SMM465
+ Prefix: 'smm465'
+ Addresses scanned: -
+ Datasheet:
+ http://www.summitmicro.com/prod_select/summary/SMM465/SMM465DS.pdf
+ * Summit Microelectronics SMM665, SMM665B
+ Prefix: 'smm665'
+ Addresses scanned: -
+ Datasheet:
+ http://www.summitmicro.com/prod_select/summary/SMM665/SMM665B_2089_20.pdf
+ * Summit Microelectronics SMM665C
+ Prefix: 'smm665c'
+ Addresses scanned: -
+ Datasheet:
+ http://www.summitmicro.com/prod_select/summary/SMM665C/SMM665C_2125.pdf
+ * Summit Microelectronics SMM764
+ Prefix: 'smm764'
+ Addresses scanned: -
+ Datasheet:
+ http://www.summitmicro.com/prod_select/summary/SMM764/SMM764_2098.pdf
+ * Summit Microelectronics SMM766, SMM766B
+ Prefix: 'smm766'
+ Addresses scanned: -
+ Datasheets:
+ http://www.summitmicro.com/prod_select/summary/SMM766/SMM766_2086.pdf
+ http://www.summitmicro.com/prod_select/summary/SMM766B/SMM766B_2122.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Module Parameters
+-----------------
+
+* vref: int
+ Default: 1250 (mV)
+ Reference voltage on VREF_ADC pin in mV. It should not be necessary to set
+ this parameter unless a non-default reference voltage is used.
+
+
+Description
+-----------
+
+[From datasheet] The SMM665 is an Active DC Output power supply Controller
+that monitors, margins and cascade sequences power. The part monitors six
+power supply channels as well as VDD, 12V input, two general-purpose analog
+inputs and an internal temperature sensor using a 10-bit ADC.
+
+Each monitored channel has its own high and low limits, plus a critical
+limit.
+
+Support for SMM465, SMM764, and SMM766 has been implemented but is untested.
+
+
+Usage Notes
+-----------
+
+This driver does not probe for devices, since there is no register which
+can be safely used to identify the chip. You will have to instantiate
+the devices explicitly. When instantiating the device, you have to specify
+its configuration register address.
+
+Example: the following will load the driver for an SMM665 at address 0x57
+on I2C bus #1:
+$ modprobe smm665
+$ echo smm665 0x57 > /sys/bus/i2c/devices/i2c-1/new_device
+
+
+Sysfs entries
+-------------
+
+This driver uses the values in the datasheet to convert ADC register values
+into the values specified in the sysfs-interface document. All attributes are
+read only.
+
+Min, max, lcrit, and crit values are used by the chip to trigger external signals
+and/or other activity. Triggered signals can include HEALTHY, RST, Power Off,
+or Fault depending on the chip configuration. The driver reports values as lcrit
+or crit if exceeding the limits triggers RST, Power Off, or Fault, and as min or
+max otherwise. For details please see the SMM665 datasheet.
+
+For SMM465 and SMM764, values for Channel E and F are reported but undefined.
+
+in1_input 12V input voltage (mV)
+in2_input 3.3V (VDD) input voltage (mV)
+in3_input Channel A voltage (mV)
+in4_input Channel B voltage (mV)
+in5_input Channel C voltage (mV)
+in6_input Channel D voltage (mV)
+in7_input Channel E voltage (mV)
+in8_input Channel F voltage (mV)
+in9_input AIN1 voltage (mV)
+in10_input AIN2 voltage (mV)
+
+in1_min 12v input minimum voltage (mV)
+in2_min 3.3V (VDD) input minimum voltage (mV)
+in3_min Channel A minimum voltage (mV)
+in4_min Channel B minimum voltage (mV)
+in5_min Channel C minimum voltage (mV)
+in6_min Channel D minimum voltage (mV)
+in7_min Channel E minimum voltage (mV)
+in8_min Channel F minimum voltage (mV)
+in9_min AIN1 minimum voltage (mV)
+in10_min AIN2 minimum voltage (mV)
+
+in1_max 12v input maximum voltage (mV)
+in2_max 3.3V (VDD) input maximum voltage (mV)
+in3_max Channel A maximum voltage (mV)
+in4_max Channel B maximum voltage (mV)
+in5_max Channel C maximum voltage (mV)
+in6_max Channel D maximum voltage (mV)
+in7_max Channel E maximum voltage (mV)
+in8_max Channel F maximum voltage (mV)
+in9_max AIN1 maximum voltage (mV)
+in10_max AIN2 maximum voltage (mV)
+
+in1_lcrit 12v input critical minimum voltage (mV)
+in2_lcrit 3.3V (VDD) input critical minimum voltage (mV)
+in3_lcrit Channel A critical minimum voltage (mV)
+in4_lcrit Channel B critical minimum voltage (mV)
+in5_lcrit Channel C critical minimum voltage (mV)
+in6_lcrit Channel D critical minimum voltage (mV)
+in7_lcrit Channel E critical minimum voltage (mV)
+in8_lcrit Channel F critical minimum voltage (mV)
+in9_lcrit AIN1 critical minimum voltage (mV)
+in10_lcrit AIN2 critical minimum voltage (mV)
+
+in1_crit 12v input critical maximum voltage (mV)
+in2_crit 3.3V (VDD) input critical maximum voltage (mV)
+in3_crit Channel A critical maximum voltage (mV)
+in4_crit Channel B critical maximum voltage (mV)
+in5_crit Channel C critical maximum voltage (mV)
+in6_crit Channel D critical maximum voltage (mV)
+in7_crit Channel E critical maximum voltage (mV)
+in8_crit Channel F critical maximum voltage (mV)
+in9_crit AIN1 critical maximum voltage (mV)
+in10_crit AIN2 critical maximum voltage (mV)
+
+in1_crit_alarm 12v input critical alarm
+in2_crit_alarm 3.3V (VDD) input critical alarm
+in3_crit_alarm Channel A critical alarm
+in4_crit_alarm Channel B critical alarm
+in5_crit_alarm Channel C critical alarm
+in6_crit_alarm Channel D critical alarm
+in7_crit_alarm Channel E critical alarm
+in8_crit_alarm Channel F critical alarm
+in9_crit_alarm AIN1 critical alarm
+in10_crit_alarm AIN2 critical alarm
+
+temp1_input Chip temperature
+temp1_min Mimimum chip temperature
+temp1_max Maximum chip temperature
+temp1_crit Critical chip temperature
+temp1_crit_alarm Temperature critical alarm
diff --git a/Documentation/hwmon/smsc47b397 b/Documentation/hwmon/smsc47b397
new file mode 100644
index 000000000..3a43b6948
--- /dev/null
+++ b/Documentation/hwmon/smsc47b397
@@ -0,0 +1,163 @@
+Kernel driver smsc47b397
+========================
+
+Supported chips:
+ * SMSC LPC47B397-NC
+ * SMSC SCH5307-NS
+ * SMSC SCH5317
+ Prefix: 'smsc47b397'
+ Addresses scanned: none, address read from Super I/O config space
+ Datasheet: In this file
+
+Authors: Mark M. Hoffman <mhoffman@lightlink.com>
+ Utilitek Systems, Inc.
+
+November 23, 2004
+
+The following specification describes the SMSC LPC47B397-NC[1] sensor chip
+(for which there is no public datasheet available). This document was
+provided by Craig Kelly (In-Store Broadcast Network) and edited/corrected
+by Mark M. Hoffman <mhoffman@lightlink.com>.
+
+[1] And SMSC SCH5307-NS and SCH5317, which have different device IDs but are
+otherwise compatible.
+
+* * * * *
+
+Methods for detecting the HP SIO and reading the thermal data on a dc7100.
+
+The thermal information on the dc7100 is contained in the SIO Hardware Monitor
+(HWM). The information is accessed through an index/data pair. The index/data
+pair is located at the HWM Base Address + 0 and the HWM Base Address + 1. The
+HWM Base address can be obtained from Logical Device 8, registers 0x60 (MSB)
+and 0x61 (LSB). Currently we are using 0x480 for the HWM Base Address and
+0x480 and 0x481 for the index/data pair.
+
+Reading temperature information.
+The temperature information is located in the following registers:
+Temp1 0x25 (Currently, this reflects the CPU temp on all systems).
+Temp2 0x26
+Temp3 0x27
+Temp4 0x80
+
+Programming Example
+The following is an example of how to read the HWM temperature registers:
+MOV DX,480H
+MOV AX,25H
+OUT DX,AL
+MOV DX,481H
+IN AL,DX
+
+AL contains the data in hex, the temperature in Celsius is the decimal
+equivalent.
+
+Ex: If AL contains 0x2A, the temperature is 42 degrees C.
+
+Reading tach information.
+The fan speed information is located in the following registers:
+ LSB MSB
+Tach1 0x28 0x29 (Currently, this reflects the CPU
+ fan speed on all systems).
+Tach2 0x2A 0x2B
+Tach3 0x2C 0x2D
+Tach4 0x2E 0x2F
+
+Important!!!
+Reading the tach LSB locks the tach MSB.
+The LSB Must be read first.
+
+How to convert the tach reading to RPM.
+The tach reading (TCount) is given by: (Tach MSB * 256) + (Tach LSB)
+The SIO counts the number of 90kHz (11.111us) pulses per revolution.
+RPM = 60/(TCount * 11.111us)
+
+Example:
+Reg 0x28 = 0x9B
+Reg 0x29 = 0x08
+
+TCount = 0x89B = 2203
+
+RPM = 60 / (2203 * 11.11111 E-6) = 2451 RPM
+
+Obtaining the SIO version.
+
+CONFIGURATION SEQUENCE
+To program the configuration registers, the following sequence must be followed:
+1. Enter Configuration Mode
+2. Configure the Configuration Registers
+3. Exit Configuration Mode.
+
+Enter Configuration Mode
+To place the chip into the Configuration State The config key (0x55) is written
+to the CONFIG PORT (0x2E).
+
+Configuration Mode
+In configuration mode, the INDEX PORT is located at the CONFIG PORT address and
+the DATA PORT is at INDEX PORT address + 1.
+
+The desired configuration registers are accessed in two steps:
+a. Write the index of the Logical Device Number Configuration Register
+ (i.e., 0x07) to the INDEX PORT and then write the number of the
+ desired logical device to the DATA PORT.
+
+b. Write the address of the desired configuration register within the
+ logical device to the INDEX PORT and then write or read the config-
+ uration register through the DATA PORT.
+
+Note: If accessing the Global Configuration Registers, step (a) is not required.
+
+Exit Configuration Mode
+To exit the Configuration State the write 0xAA to the CONFIG PORT (0x2E).
+The chip returns to the RUN State. (This is important).
+
+Programming Example
+The following is an example of how to read the SIO Device ID located at 0x20
+
+; ENTER CONFIGURATION MODE
+MOV DX,02EH
+MOV AX,055H
+OUT DX,AL
+; GLOBAL CONFIGURATION REGISTER
+MOV DX,02EH
+MOV AL,20H
+OUT DX,AL
+; READ THE DATA
+MOV DX,02FH
+IN AL,DX
+; EXIT CONFIGURATION MODE
+MOV DX,02EH
+MOV AX,0AAH
+OUT DX,AL
+
+The registers of interest for identifying the SIO on the dc7100 are Device ID
+(0x20) and Device Rev (0x21).
+
+The Device ID will read 0x6F (0x81 for SCH5307-NS, and 0x85 for SCH5317)
+The Device Rev currently reads 0x01
+
+Obtaining the HWM Base Address.
+The following is an example of how to read the HWM Base Address located in
+Logical Device 8.
+
+; ENTER CONFIGURATION MODE
+MOV DX,02EH
+MOV AX,055H
+OUT DX,AL
+; CONFIGURE REGISTER CRE0,
+; LOGICAL DEVICE 8
+MOV DX,02EH
+MOV AL,07H
+OUT DX,AL ;Point to LD# Config Reg
+MOV DX,02FH
+MOV AL, 08H
+OUT DX,AL;Point to Logical Device 8
+;
+MOV DX,02EH
+MOV AL,60H
+OUT DX,AL ; Point to HWM Base Addr MSB
+MOV DX,02FH
+IN AL,DX ; Get MSB of HWM Base Addr
+; EXIT CONFIGURATION MODE
+MOV DX,02EH
+MOV AX,0AAH
+OUT DX,AL
diff --git a/Documentation/hwmon/smsc47m1 b/Documentation/hwmon/smsc47m1
new file mode 100644
index 000000000..10a24b420
--- /dev/null
+++ b/Documentation/hwmon/smsc47m1
@@ -0,0 +1,63 @@
+Kernel driver smsc47m1
+======================
+
+Supported chips:
+ * SMSC LPC47B27x, LPC47M112, LPC47M10x, LPC47M13x, LPC47M14x,
+ LPC47M15x and LPC47M192
+ Addresses scanned: none, address read from Super I/O config space
+ Prefix: 'smsc47m1'
+ Datasheets:
+ http://www.smsc.com/media/Downloads_Public/Data_Sheets/47b272.pdf
+ http://www.smsc.com/media/Downloads_Public/Data_Sheets/47m10x.pdf
+ http://www.smsc.com/media/Downloads_Public/Data_Sheets/47m112.pdf
+ http://www.smsc.com/
+ * SMSC LPC47M292
+ Addresses scanned: none, address read from Super I/O config space
+ Prefix: 'smsc47m2'
+ Datasheet: Not public
+ * SMSC LPC47M997
+ Addresses scanned: none, address read from Super I/O config space
+ Prefix: 'smsc47m1'
+ Datasheet: none
+
+Authors:
+ Mark D. Studebaker <mdsxyz123@yahoo.com>,
+ With assistance from Bruce Allen <ballen@uwm.edu>, and his
+ fan.c program: http://www.lsc-group.phys.uwm.edu/%7Eballen/driver/
+ Gabriele Gorla <gorlik@yahoo.com>,
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+The Standard Microsystems Corporation (SMSC) 47M1xx Super I/O chips
+contain monitoring and PWM control circuitry for two fans.
+
+The LPC47M15x, LPC47M192 and LPC47M292 chips contain a full 'hardware
+monitoring block' in addition to the fan monitoring and control. The
+hardware monitoring block is not supported by this driver, use the
+smsc47m192 driver for that.
+
+No documentation is available for the 47M997, but it has the same device
+ID as the 47M15x and 47M192 chips and seems to be compatible.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+PWM values are from 0 to 255.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared! Note that in the current implementation, all
+hardware registers are read whenever any data is read (unless it is less
+than 1.5 seconds since the last update). This means that you can easily
+miss once-only alarms.
+
+
+**********************
+The lm_sensors project gratefully acknowledges the support of
+Intel in the development of this driver.
diff --git a/Documentation/hwmon/smsc47m192 b/Documentation/hwmon/smsc47m192
new file mode 100644
index 000000000..6d54ecb7b
--- /dev/null
+++ b/Documentation/hwmon/smsc47m192
@@ -0,0 +1,103 @@
+Kernel driver smsc47m192
+========================
+
+Supported chips:
+ * SMSC LPC47M192, LPC47M15x, LPC47M292 and LPC47M997
+ Prefix: 'smsc47m192'
+ Addresses scanned: I2C 0x2c - 0x2d
+ Datasheet: The datasheet for LPC47M192 is publicly available from
+ http://www.smsc.com/
+ The LPC47M15x, LPC47M292 and LPC47M997 are compatible for
+ hardware monitoring.
+
+Author: Hartmut Rick <linux@rick.claranet.de>
+ Special thanks to Jean Delvare for careful checking
+ of the code and many helpful comments and suggestions.
+
+
+Description
+-----------
+
+This driver implements support for the hardware sensor capabilities
+of the SMSC LPC47M192 and compatible Super-I/O chips.
+
+These chips support 3 temperature channels and 8 voltage inputs
+as well as CPU voltage VID input.
+
+They do also have fan monitoring and control capabilities, but the
+these features are accessed via ISA bus and are not supported by this
+driver. Use the 'smsc47m1' driver for fan monitoring and control.
+
+Voltages and temperatures are measured by an 8-bit ADC, the resolution
+of the temperatures is 1 bit per degree C.
+Voltages are scaled such that the nominal voltage corresponds to
+192 counts, i.e. 3/4 of the full range. Thus the available range for
+each voltage channel is 0V ... 255/192*(nominal voltage), the resolution
+is 1 bit per (nominal voltage)/192.
+Both voltage and temperature values are scaled by 1000, the sys files
+show voltages in mV and temperatures in units of 0.001 degC.
+
+The +12V analog voltage input channel (in4_input) is multiplexed with
+bit 4 of the encoded CPU voltage. This means that you either get
+a +12V voltage measurement or a 5 bit CPU VID, but not both.
+The default setting is to use the pin as 12V input, and use only 4 bit VID.
+This driver assumes that the information in the configuration register
+is correct, i.e. that the BIOS has updated the configuration if
+the motherboard has this input wired to VID4.
+
+The temperature and voltage readings are updated once every 1.5 seconds.
+Reading them more often repeats the same values.
+
+
+sysfs interface
+---------------
+
+in0_input - +2.5V voltage input
+in1_input - CPU voltage input (nominal 2.25V)
+in2_input - +3.3V voltage input
+in3_input - +5V voltage input
+in4_input - +12V voltage input (may be missing if used as VID4)
+in5_input - Vcc voltage input (nominal 3.3V)
+ This is the supply voltage of the sensor chip itself.
+in6_input - +1.5V voltage input
+in7_input - +1.8V voltage input
+
+in[0-7]_min,
+in[0-7]_max - lower and upper alarm thresholds for in[0-7]_input reading
+
+ All voltages are read and written in mV.
+
+in[0-7]_alarm - alarm flags for voltage inputs
+ These files read '1' in case of alarm, '0' otherwise.
+
+temp1_input - chip temperature measured by on-chip diode
+temp[2-3]_input - temperature measured by external diodes (one of these would
+ typically be wired to the diode inside the CPU)
+
+temp[1-3]_min,
+temp[1-3]_max - lower and upper alarm thresholds for temperatures
+
+temp[1-3]_offset - temperature offset registers
+ The chip adds the offsets stored in these registers to
+ the corresponding temperature readings.
+ Note that temp1 and temp2 offsets share the same register,
+ they cannot both be different from zero at the same time.
+ Writing a non-zero number to one of them will reset the other
+ offset to zero.
+
+ All temperatures and offsets are read and written in
+ units of 0.001 degC.
+
+temp[1-3]_alarm - alarm flags for temperature inputs, '1' in case of alarm,
+ '0' otherwise.
+temp[2-3]_input_fault - diode fault flags for temperature inputs 2 and 3.
+ A fault is detected if the two pins for the corresponding
+ sensor are open or shorted, or any of the two is shorted
+ to ground or Vcc. '1' indicates a diode fault.
+
+cpu0_vid - CPU voltage as received from the CPU
+
+vrm - CPU VID standard used for decoding CPU voltage
+
+ The *_min, *_max, *_offset and vrm files can be read and
+ written, all others are read-only.
diff --git a/Documentation/hwmon/submitting-patches b/Documentation/hwmon/submitting-patches
new file mode 100644
index 000000000..3d1bac399
--- /dev/null
+++ b/Documentation/hwmon/submitting-patches
@@ -0,0 +1,110 @@
+ How to Get Your Patch Accepted Into the Hwmon Subsystem
+ -------------------------------------------------------
+
+This text is a collection of suggestions for people writing patches or
+drivers for the hwmon subsystem. Following these suggestions will greatly
+increase the chances of your change being accepted.
+
+
+1. General
+----------
+
+* It should be unnecessary to mention, but please read and follow
+ Documentation/SubmitChecklist
+ Documentation/SubmittingDrivers
+ Documentation/SubmittingPatches
+ Documentation/CodingStyle
+
+* If your patch generates checkpatch warnings, please refrain from explanations
+ such as "I don't like that coding style". Keep in mind that each unnecessary
+ warning helps hiding a real problem. If you don't like the kernel coding
+ style, don't write kernel drivers.
+
+* Please test your patch thoroughly. We are not your test group.
+ Sometimes a patch can not or not completely be tested because of missing
+ hardware. In such cases, you should test-build the code on at least one
+ architecture. If run-time testing was not achieved, it should be written
+ explicitly below the patch header.
+
+* If your patch (or the driver) is affected by configuration options such as
+ CONFIG_SMP, make sure it compiles for all configuration variants.
+
+
+2. Adding functionality to existing drivers
+-------------------------------------------
+
+* Make sure the documentation in Documentation/hwmon/<driver_name> is up to
+ date.
+
+* Make sure the information in Kconfig is up to date.
+
+* If the added functionality requires some cleanup or structural changes, split
+ your patch into a cleanup part and the actual addition. This makes it easier
+ to review your changes, and to bisect any resulting problems.
+
+* Never mix bug fixes, cleanup, and functional enhancements in a single patch.
+
+
+3. New drivers
+--------------
+
+* Running your patch or driver file(s) through checkpatch does not mean its
+ formatting is clean. If unsure about formatting in your new driver, run it
+ through Lindent. Lindent is not perfect, and you may have to do some minor
+ cleanup, but it is a good start.
+
+* Consider adding yourself to MAINTAINERS.
+
+* Document the driver in Documentation/hwmon/<driver_name>.
+
+* Add the driver to Kconfig and Makefile in alphabetical order.
+
+* Make sure that all dependencies are listed in Kconfig.
+
+* Avoid forward declarations if you can. Rearrange the code if necessary.
+
+* Avoid calculations in macros and macro-generated functions. While such macros
+ may save a line or so in the source, it obfuscates the code and makes code
+ review more difficult. It may also result in code which is more complicated
+ than necessary. Use inline functions or just regular functions instead.
+
+* Use devres functions whenever possible to allocate resources. For rationale
+ and supported functions, please see Documentation/driver-model/devres.txt.
+
+* If the driver has a detect function, make sure it is silent. Debug messages
+ and messages printed after a successful detection are acceptable, but it
+ must not print messages such as "Chip XXX not found/supported".
+
+ Keep in mind that the detect function will run for all drivers supporting an
+ address if a chip is detected on that address. Unnecessary messages will just
+ pollute the kernel log and not provide any value.
+
+* Provide a detect function if and only if a chip can be detected reliably.
+
+* Avoid writing to chip registers in the detect function. If you have to write,
+ only do it after you have already gathered enough data to be certain that the
+ detection is going to be successful.
+
+ Keep in mind that the chip might not be what your driver believes it is, and
+ writing to it might cause a bad misconfiguration.
+
+* Make sure there are no race conditions in the probe function. Specifically,
+ completely initialize your chip first, then create sysfs entries and register
+ with the hwmon subsystem.
+
+* Do not provide support for deprecated sysfs attributes.
+
+* Do not create non-standard attributes unless really needed. If you have to use
+ non-standard attributes, or you believe you do, discuss it on the mailing list
+ first. Either case, provide a detailed explanation why you need the
+ non-standard attribute(s).
+ Standard attributes are specified in Documentation/hwmon/sysfs-interface.
+
+* When deciding which sysfs attributes to support, look at the chip's
+ capabilities. While we do not expect your driver to support everything the
+ chip may offer, it should at least support all limits and alarms.
+
+* Last but not least, please check if a driver for your chip already exists
+ before starting to write a new driver. Especially for temperature sensors,
+ new chips are often variants of previously released chips. In some cases,
+ a presumably new chip may simply have been relabeled.
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
new file mode 100644
index 000000000..2cc95ad46
--- /dev/null
+++ b/Documentation/hwmon/sysfs-interface
@@ -0,0 +1,760 @@
+Naming and data format standards for sysfs files
+------------------------------------------------
+
+The libsensors library offers an interface to the raw sensors data
+through the sysfs interface. Since lm-sensors 3.0.0, libsensors is
+completely chip-independent. It assumes that all the kernel drivers
+implement the standard sysfs interface described in this document.
+This makes adding or updating support for any given chip very easy, as
+libsensors, and applications using it, do not need to be modified.
+This is a major improvement compared to lm-sensors 2.
+
+Note that motherboards vary widely in the connections to sensor chips.
+There is no standard that ensures, for example, that the second
+temperature sensor is connected to the CPU, or that the second fan is on
+the CPU. Also, some values reported by the chips need some computation
+before they make full sense. For example, most chips can only measure
+voltages between 0 and +4V. Other voltages are scaled back into that
+range using external resistors. Since the values of these resistors
+can change from motherboard to motherboard, the conversions cannot be
+hard coded into the driver and have to be done in user space.
+
+For this reason, even if we aim at a chip-independent libsensors, it will
+still require a configuration file (e.g. /etc/sensors.conf) for proper
+values conversion, labeling of inputs and hiding of unused inputs.
+
+An alternative method that some programs use is to access the sysfs
+files directly. This document briefly describes the standards that the
+drivers follow, so that an application program can scan for entries and
+access this data in a simple and consistent way. That said, such programs
+will have to implement conversion, labeling and hiding of inputs. For
+this reason, it is still not recommended to bypass the library.
+
+Each chip gets its own directory in the sysfs /sys/devices tree. To
+find all sensor chips, it is easier to follow the device symlinks from
+/sys/class/hwmon/hwmon*.
+
+Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes
+in the "physical" device directory. Since lm-sensors 3.0.1, attributes found
+in the hwmon "class" device directory are also supported. Complex drivers
+(e.g. drivers for multifunction chips) may want to use this possibility to
+avoid namespace pollution. The only drawback will be that older versions of
+libsensors won't support the driver in question.
+
+All sysfs values are fixed point numbers.
+
+There is only one value per file, unlike the older /proc specification.
+The common scheme for files naming is: <type><number>_<item>. Usual
+types for sensor chips are "in" (voltage), "temp" (temperature) and
+"fan" (fan). Usual items are "input" (measured value), "max" (high
+threshold, "min" (low threshold). Numbering usually starts from 1,
+except for voltages which start from 0 (because most data sheets use
+this). A number is always used for elements that can be present more
+than once, even if there is a single element of the given type on the
+specific chip. Other files do not refer to a specific element, so
+they have a simple name, and no number.
+
+Alarms are direct indications read from the chips. The drivers do NOT
+make comparisons of readings to thresholds. This allows violations
+between readings to be caught and alarmed. The exact definition of an
+alarm (for example, whether a threshold must be met or must be exceeded
+to cause an alarm) is chip-dependent.
+
+When setting values of hwmon sysfs attributes, the string representation of
+the desired value must be written, note that strings which are not a number
+are interpreted as 0! For more on how written strings are interpreted see the
+"sysfs attribute writes interpretation" section at the end of this file.
+
+-------------------------------------------------------------------------
+
+[0-*] denotes any positive number starting from 0
+[1-*] denotes any positive number starting from 1
+RO read only value
+WO write only value
+RW read/write value
+
+Read/write values may be read-only for some chips, depending on the
+hardware implementation.
+
+All entries (except name) are optional, and should only be created in a
+given driver if the chip has the feature.
+
+
+*********************
+* Global attributes *
+*********************
+
+name The chip name.
+ This should be a short, lowercase string, not containing
+ spaces nor dashes, representing the chip name. This is
+ the only mandatory attribute.
+ I2C devices get this attribute created automatically.
+ RO
+
+update_interval The interval at which the chip will update readings.
+ Unit: millisecond
+ RW
+ Some devices have a variable update rate or interval.
+ This attribute can be used to change it to the desired value.
+
+
+************
+* Voltages *
+************
+
+in[0-*]_min Voltage min value.
+ Unit: millivolt
+ RW
+
+in[0-*]_lcrit Voltage critical min value.
+ Unit: millivolt
+ RW
+ If voltage drops to or below this limit, the system may
+ take drastic action such as power down or reset. At the very
+ least, it should report a fault.
+
+in[0-*]_max Voltage max value.
+ Unit: millivolt
+ RW
+
+in[0-*]_crit Voltage critical max value.
+ Unit: millivolt
+ RW
+ If voltage reaches or exceeds this limit, the system may
+ take drastic action such as power down or reset. At the very
+ least, it should report a fault.
+
+in[0-*]_input Voltage input value.
+ Unit: millivolt
+ RO
+ Voltage measured on the chip pin.
+ Actual voltage depends on the scaling resistors on the
+ motherboard, as recommended in the chip datasheet.
+ This varies by chip and by motherboard.
+ Because of this variation, values are generally NOT scaled
+ by the chip driver, and must be done by the application.
+ However, some drivers (notably lm87 and via686a)
+ do scale, because of internal resistors built into a chip.
+ These drivers will output the actual voltage. Rule of
+ thumb: drivers should report the voltage values at the
+ "pins" of the chip.
+
+in[0-*]_average
+ Average voltage
+ Unit: millivolt
+ RO
+
+in[0-*]_lowest
+ Historical minimum voltage
+ Unit: millivolt
+ RO
+
+in[0-*]_highest
+ Historical maximum voltage
+ Unit: millivolt
+ RO
+
+in[0-*]_reset_history
+ Reset inX_lowest and inX_highest
+ WO
+
+in_reset_history
+ Reset inX_lowest and inX_highest for all sensors
+ WO
+
+in[0-*]_label Suggested voltage channel label.
+ Text string
+ Should only be created if the driver has hints about what
+ this voltage channel is being used for, and user-space
+ doesn't. In all other cases, the label is provided by
+ user-space.
+ RO
+
+cpu[0-*]_vid CPU core reference voltage.
+ Unit: millivolt
+ RO
+ Not always correct.
+
+vrm Voltage Regulator Module version number.
+ RW (but changing it should no more be necessary)
+ Originally the VRM standard version multiplied by 10, but now
+ an arbitrary number, as not all standards have a version
+ number.
+ Affects the way the driver calculates the CPU core reference
+ voltage from the vid pins.
+
+Also see the Alarms section for status flags associated with voltages.
+
+
+********
+* Fans *
+********
+
+fan[1-*]_min Fan minimum value
+ Unit: revolution/min (RPM)
+ RW
+
+fan[1-*]_max Fan maximum value
+ Unit: revolution/min (RPM)
+ Only rarely supported by the hardware.
+ RW
+
+fan[1-*]_input Fan input value.
+ Unit: revolution/min (RPM)
+ RO
+
+fan[1-*]_div Fan divisor.
+ Integer value in powers of two (1, 2, 4, 8, 16, 32, 64, 128).
+ RW
+ Some chips only support values 1, 2, 4 and 8.
+ Note that this is actually an internal clock divisor, which
+ affects the measurable speed range, not the read value.
+
+fan[1-*]_pulses Number of tachometer pulses per fan revolution.
+ Integer value, typically between 1 and 4.
+ RW
+ This value is a characteristic of the fan connected to the
+ device's input, so it has to be set in accordance with the fan
+ model.
+ Should only be created if the chip has a register to configure
+ the number of pulses. In the absence of such a register (and
+ thus attribute) the value assumed by all devices is 2 pulses
+ per fan revolution.
+
+fan[1-*]_target
+ Desired fan speed
+ Unit: revolution/min (RPM)
+ RW
+ Only makes sense if the chip supports closed-loop fan speed
+ control based on the measured fan speed.
+
+fan[1-*]_label Suggested fan channel label.
+ Text string
+ Should only be created if the driver has hints about what
+ this fan channel is being used for, and user-space doesn't.
+ In all other cases, the label is provided by user-space.
+ RO
+
+Also see the Alarms section for status flags associated with fans.
+
+
+*******
+* PWM *
+*******
+
+pwm[1-*] Pulse width modulation fan control.
+ Integer value in the range 0 to 255
+ RW
+ 255 is max or 100%.
+
+pwm[1-*]_enable
+ Fan speed control method:
+ 0: no fan speed control (i.e. fan at full speed)
+ 1: manual fan speed control enabled (using pwm[1-*])
+ 2+: automatic fan speed control enabled
+ Check individual chip documentation files for automatic mode
+ details.
+ RW
+
+pwm[1-*]_mode 0: DC mode (direct current)
+ 1: PWM mode (pulse-width modulation)
+ RW
+
+pwm[1-*]_freq Base PWM frequency in Hz.
+ Only possibly available when pwmN_mode is PWM, but not always
+ present even then.
+ RW
+
+pwm[1-*]_auto_channels_temp
+ Select which temperature channels affect this PWM output in
+ auto mode. Bitfield, 1 is temp1, 2 is temp2, 4 is temp3 etc...
+ Which values are possible depend on the chip used.
+ RW
+
+pwm[1-*]_auto_point[1-*]_pwm
+pwm[1-*]_auto_point[1-*]_temp
+pwm[1-*]_auto_point[1-*]_temp_hyst
+ Define the PWM vs temperature curve. Number of trip points is
+ chip-dependent. Use this for chips which associate trip points
+ to PWM output channels.
+ RW
+
+temp[1-*]_auto_point[1-*]_pwm
+temp[1-*]_auto_point[1-*]_temp
+temp[1-*]_auto_point[1-*]_temp_hyst
+ Define the PWM vs temperature curve. Number of trip points is
+ chip-dependent. Use this for chips which associate trip points
+ to temperature channels.
+ RW
+
+There is a third case where trip points are associated to both PWM output
+channels and temperature channels: the PWM values are associated to PWM
+output channels while the temperature values are associated to temperature
+channels. In that case, the result is determined by the mapping between
+temperature inputs and PWM outputs. When several temperature inputs are
+mapped to a given PWM output, this leads to several candidate PWM values.
+The actual result is up to the chip, but in general the highest candidate
+value (fastest fan speed) wins.
+
+
+****************
+* Temperatures *
+****************
+
+temp[1-*]_type Sensor type selection.
+ Integers 1 to 6
+ RW
+ 1: CPU embedded diode
+ 2: 3904 transistor
+ 3: thermal diode
+ 4: thermistor
+ 5: AMD AMDSI
+ 6: Intel PECI
+ Not all types are supported by all chips
+
+temp[1-*]_max Temperature max value.
+ Unit: millidegree Celsius (or millivolt, see below)
+ RW
+
+temp[1-*]_min Temperature min value.
+ Unit: millidegree Celsius
+ RW
+
+temp[1-*]_max_hyst
+ Temperature hysteresis value for max limit.
+ Unit: millidegree Celsius
+ Must be reported as an absolute temperature, NOT a delta
+ from the max value.
+ RW
+
+temp[1-*]_min_hyst
+ Temperature hysteresis value for min limit.
+ Unit: millidegree Celsius
+ Must be reported as an absolute temperature, NOT a delta
+ from the min value.
+ RW
+
+temp[1-*]_input Temperature input value.
+ Unit: millidegree Celsius
+ RO
+
+temp[1-*]_crit Temperature critical max value, typically greater than
+ corresponding temp_max values.
+ Unit: millidegree Celsius
+ RW
+
+temp[1-*]_crit_hyst
+ Temperature hysteresis value for critical limit.
+ Unit: millidegree Celsius
+ Must be reported as an absolute temperature, NOT a delta
+ from the critical value.
+ RW
+
+temp[1-*]_emergency
+ Temperature emergency max value, for chips supporting more than
+ two upper temperature limits. Must be equal or greater than
+ corresponding temp_crit values.
+ Unit: millidegree Celsius
+ RW
+
+temp[1-*]_emergency_hyst
+ Temperature hysteresis value for emergency limit.
+ Unit: millidegree Celsius
+ Must be reported as an absolute temperature, NOT a delta
+ from the emergency value.
+ RW
+
+temp[1-*]_lcrit Temperature critical min value, typically lower than
+ corresponding temp_min values.
+ Unit: millidegree Celsius
+ RW
+
+temp[1-*]_lcrit_hyst
+ Temperature hysteresis value for critical min limit.
+ Unit: millidegree Celsius
+ Must be reported as an absolute temperature, NOT a delta
+ from the critical min value.
+ RW
+
+temp[1-*]_offset
+ Temperature offset which is added to the temperature reading
+ by the chip.
+ Unit: millidegree Celsius
+ Read/Write value.
+
+temp[1-*]_label Suggested temperature channel label.
+ Text string
+ Should only be created if the driver has hints about what
+ this temperature channel is being used for, and user-space
+ doesn't. In all other cases, the label is provided by
+ user-space.
+ RO
+
+temp[1-*]_lowest
+ Historical minimum temperature
+ Unit: millidegree Celsius
+ RO
+
+temp[1-*]_highest
+ Historical maximum temperature
+ Unit: millidegree Celsius
+ RO
+
+temp[1-*]_reset_history
+ Reset temp_lowest and temp_highest
+ WO
+
+temp_reset_history
+ Reset temp_lowest and temp_highest for all sensors
+ WO
+
+Some chips measure temperature using external thermistors and an ADC, and
+report the temperature measurement as a voltage. Converting this voltage
+back to a temperature (or the other way around for limits) requires
+mathematical functions not available in the kernel, so the conversion
+must occur in user space. For these chips, all temp* files described
+above should contain values expressed in millivolt instead of millidegree
+Celsius. In other words, such temperature channels are handled as voltage
+channels by the driver.
+
+Also see the Alarms section for status flags associated with temperatures.
+
+
+************
+* Currents *
+************
+
+curr[1-*]_max Current max value
+ Unit: milliampere
+ RW
+
+curr[1-*]_min Current min value.
+ Unit: milliampere
+ RW
+
+curr[1-*]_lcrit Current critical low value
+ Unit: milliampere
+ RW
+
+curr[1-*]_crit Current critical high value.
+ Unit: milliampere
+ RW
+
+curr[1-*]_input Current input value
+ Unit: milliampere
+ RO
+
+curr[1-*]_average
+ Average current use
+ Unit: milliampere
+ RO
+
+curr[1-*]_lowest
+ Historical minimum current
+ Unit: milliampere
+ RO
+
+curr[1-*]_highest
+ Historical maximum current
+ Unit: milliampere
+ RO
+
+curr[1-*]_reset_history
+ Reset currX_lowest and currX_highest
+ WO
+
+curr_reset_history
+ Reset currX_lowest and currX_highest for all sensors
+ WO
+
+Also see the Alarms section for status flags associated with currents.
+
+*********
+* Power *
+*********
+
+power[1-*]_average Average power use
+ Unit: microWatt
+ RO
+
+power[1-*]_average_interval Power use averaging interval. A poll
+ notification is sent to this file if the
+ hardware changes the averaging interval.
+ Unit: milliseconds
+ RW
+
+power[1-*]_average_interval_max Maximum power use averaging interval
+ Unit: milliseconds
+ RO
+
+power[1-*]_average_interval_min Minimum power use averaging interval
+ Unit: milliseconds
+ RO
+
+power[1-*]_average_highest Historical average maximum power use
+ Unit: microWatt
+ RO
+
+power[1-*]_average_lowest Historical average minimum power use
+ Unit: microWatt
+ RO
+
+power[1-*]_average_max A poll notification is sent to
+ power[1-*]_average when power use
+ rises above this value.
+ Unit: microWatt
+ RW
+
+power[1-*]_average_min A poll notification is sent to
+ power[1-*]_average when power use
+ sinks below this value.
+ Unit: microWatt
+ RW
+
+power[1-*]_input Instantaneous power use
+ Unit: microWatt
+ RO
+
+power[1-*]_input_highest Historical maximum power use
+ Unit: microWatt
+ RO
+
+power[1-*]_input_lowest Historical minimum power use
+ Unit: microWatt
+ RO
+
+power[1-*]_reset_history Reset input_highest, input_lowest,
+ average_highest and average_lowest.
+ WO
+
+power[1-*]_accuracy Accuracy of the power meter.
+ Unit: Percent
+ RO
+
+power[1-*]_cap If power use rises above this limit, the
+ system should take action to reduce power use.
+ A poll notification is sent to this file if the
+ cap is changed by the hardware. The *_cap
+ files only appear if the cap is known to be
+ enforced by hardware.
+ Unit: microWatt
+ RW
+
+power[1-*]_cap_hyst Margin of hysteresis built around capping and
+ notification.
+ Unit: microWatt
+ RW
+
+power[1-*]_cap_max Maximum cap that can be set.
+ Unit: microWatt
+ RO
+
+power[1-*]_cap_min Minimum cap that can be set.
+ Unit: microWatt
+ RO
+
+power[1-*]_max Maximum power.
+ Unit: microWatt
+ RW
+
+power[1-*]_crit Critical maximum power.
+ If power rises to or above this limit, the
+ system is expected take drastic action to reduce
+ power consumption, such as a system shutdown or
+ a forced powerdown of some devices.
+ Unit: microWatt
+ RW
+
+Also see the Alarms section for status flags associated with power readings.
+
+**********
+* Energy *
+**********
+
+energy[1-*]_input Cumulative energy use
+ Unit: microJoule
+ RO
+
+
+************
+* Humidity *
+************
+
+humidity[1-*]_input Humidity
+ Unit: milli-percent (per cent mille, pcm)
+ RO
+
+
+**********
+* Alarms *
+**********
+
+Each channel or limit may have an associated alarm file, containing a
+boolean value. 1 means than an alarm condition exists, 0 means no alarm.
+
+Usually a given chip will either use channel-related alarms, or
+limit-related alarms, not both. The driver should just reflect the hardware
+implementation.
+
+in[0-*]_alarm
+curr[1-*]_alarm
+power[1-*]_alarm
+fan[1-*]_alarm
+temp[1-*]_alarm
+ Channel alarm
+ 0: no alarm
+ 1: alarm
+ RO
+
+OR
+
+in[0-*]_min_alarm
+in[0-*]_max_alarm
+in[0-*]_lcrit_alarm
+in[0-*]_crit_alarm
+curr[1-*]_min_alarm
+curr[1-*]_max_alarm
+curr[1-*]_lcrit_alarm
+curr[1-*]_crit_alarm
+power[1-*]_cap_alarm
+power[1-*]_max_alarm
+power[1-*]_crit_alarm
+fan[1-*]_min_alarm
+fan[1-*]_max_alarm
+temp[1-*]_min_alarm
+temp[1-*]_max_alarm
+temp[1-*]_lcrit_alarm
+temp[1-*]_crit_alarm
+temp[1-*]_emergency_alarm
+ Limit alarm
+ 0: no alarm
+ 1: alarm
+ RO
+
+Each input channel may have an associated fault file. This can be used
+to notify open diodes, unconnected fans etc. where the hardware
+supports it. When this boolean has value 1, the measurement for that
+channel should not be trusted.
+
+fan[1-*]_fault
+temp[1-*]_fault
+ Input fault condition
+ 0: no fault occurred
+ 1: fault condition
+ RO
+
+Some chips also offer the possibility to get beeped when an alarm occurs:
+
+beep_enable Master beep enable
+ 0: no beeps
+ 1: beeps
+ RW
+
+in[0-*]_beep
+curr[1-*]_beep
+fan[1-*]_beep
+temp[1-*]_beep
+ Channel beep
+ 0: disable
+ 1: enable
+ RW
+
+In theory, a chip could provide per-limit beep masking, but no such chip
+was seen so far.
+
+Old drivers provided a different, non-standard interface to alarms and
+beeps. These interface files are deprecated, but will be kept around
+for compatibility reasons:
+
+alarms Alarm bitmask.
+ RO
+ Integer representation of one to four bytes.
+ A '1' bit means an alarm.
+ Chips should be programmed for 'comparator' mode so that
+ the alarm will 'come back' after you read the register
+ if it is still valid.
+ Generally a direct representation of a chip's internal
+ alarm registers; there is no standard for the position
+ of individual bits. For this reason, the use of this
+ interface file for new drivers is discouraged. Use
+ individual *_alarm and *_fault files instead.
+ Bits are defined in kernel/include/sensors.h.
+
+beep_mask Bitmask for beep.
+ Same format as 'alarms' with the same bit locations,
+ use discouraged for the same reason. Use individual
+ *_beep files instead.
+ RW
+
+
+***********************
+* Intrusion detection *
+***********************
+
+intrusion[0-*]_alarm
+ Chassis intrusion detection
+ 0: OK
+ 1: intrusion detected
+ RW
+ Contrary to regular alarm flags which clear themselves
+ automatically when read, this one sticks until cleared by
+ the user. This is done by writing 0 to the file. Writing
+ other values is unsupported.
+
+intrusion[0-*]_beep
+ Chassis intrusion beep
+ 0: disable
+ 1: enable
+ RW
+
+
+sysfs attribute writes interpretation
+-------------------------------------
+
+hwmon sysfs attributes always contain numbers, so the first thing to do is to
+convert the input to a number, there are 2 ways todo this depending whether
+the number can be negative or not:
+unsigned long u = simple_strtoul(buf, NULL, 10);
+long s = simple_strtol(buf, NULL, 10);
+
+With buf being the buffer with the user input being passed by the kernel.
+Notice that we do not use the second argument of strto[u]l, and thus cannot
+tell when 0 is returned, if this was really 0 or is caused by invalid input.
+This is done deliberately as checking this everywhere would add a lot of
+code to the kernel.
+
+Notice that it is important to always store the converted value in an
+unsigned long or long, so that no wrap around can happen before any further
+checking.
+
+After the input string is converted to an (unsigned) long, the value should be
+checked if its acceptable. Be careful with further conversions on the value
+before checking it for validity, as these conversions could still cause a wrap
+around before the check. For example do not multiply the result, and only
+add/subtract if it has been divided before the add/subtract.
+
+What to do if a value is found to be invalid, depends on the type of the
+sysfs attribute that is being set. If it is a continuous setting like a
+tempX_max or inX_max attribute, then the value should be clamped to its
+limits using clamp_val(value, min_limit, max_limit). If it is not continuous
+like for example a tempX_type, then when an invalid value is written,
+-EINVAL should be returned.
+
+Example1, temp1_max, register is a signed 8 bit value (-128 - 127 degrees):
+
+ long v = simple_strtol(buf, NULL, 10) / 1000;
+ v = clamp_val(v, -128, 127);
+ /* write v to register */
+
+Example2, fan divider setting, valid values 2, 4 and 8:
+
+ unsigned long v = simple_strtoul(buf, NULL, 10);
+
+ switch (v) {
+ case 2: v = 1; break;
+ case 4: v = 2; break;
+ case 8: v = 3; break;
+ default:
+ return -EINVAL;
+ }
+ /* write v to register */
diff --git a/Documentation/hwmon/thmc50 b/Documentation/hwmon/thmc50
new file mode 100644
index 000000000..8a7772ade
--- /dev/null
+++ b/Documentation/hwmon/thmc50
@@ -0,0 +1,74 @@
+Kernel driver thmc50
+=====================
+
+Supported chips:
+ * Analog Devices ADM1022
+ Prefix: 'adm1022'
+ Addresses scanned: I2C 0x2c - 0x2e
+ Datasheet: http://www.analog.com/en/prod/0,2877,ADM1022,00.html
+ * Texas Instruments THMC50
+ Prefix: 'thmc50'
+ Addresses scanned: I2C 0x2c - 0x2e
+ Datasheet: http://www.ti.com/
+
+Author: Krzysztof Helt <krzysztof.h1@wp.pl>
+
+This driver was derived from the 2.4 kernel thmc50.c source file.
+
+Credits:
+ thmc50.c (2.4 kernel):
+ Frodo Looijaard <frodol@dds.nl>
+ Philip Edelbrock <phil@netroedge.com>
+
+Module Parameters
+-----------------
+
+* adm1022_temp3: short array
+ List of adapter,address pairs to force chips into ADM1022 mode with
+ second remote temperature. This does not work for original THMC50 chips.
+
+Description
+-----------
+
+The THMC50 implements: an internal temperature sensor, support for an
+external diode-type temperature sensor (compatible w/ the diode sensor inside
+many processors), and a controllable fan/analog_out DAC. For the temperature
+sensors, limits can be set through the appropriate Overtemperature Shutdown
+register and Hysteresis register. Each value can be set and read to half-degree
+accuracy. An alarm is issued (usually to a connected LM78) when the
+temperature gets higher then the Overtemperature Shutdown value; it stays on
+until the temperature falls below the Hysteresis value. All temperatures are in
+degrees Celsius, and are guaranteed within a range of -55 to +125 degrees.
+
+The THMC50 only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
+
+The THMC50 is usually used in combination with LM78-like chips, to measure
+the temperature of the processor(s).
+
+The ADM1022 works the same as THMC50 but it is faster (5 Hz instead of
+1 Hz for THMC50). It can be also put in a new mode to handle additional
+remote temperature sensor. The driver use the mode set by BIOS by default.
+
+In case the BIOS is broken and the mode is set incorrectly, you can force
+the mode with additional remote temperature with adm1022_temp3 parameter.
+A typical symptom of wrong setting is a fan forced to full speed.
+
+Driver Features
+---------------
+
+The driver provides up to three temperatures:
+
+temp1 -- internal
+temp2 -- remote
+temp3 -- 2nd remote only for ADM1022
+
+pwm1 -- fan speed (0 = stop, 255 = full)
+pwm1_mode -- always 0 (DC mode)
+
+The value of 0 for pwm1 also forces FAN_OFF signal from the chip,
+so it stops fans even if the value 0 into the ANALOG_OUT register does not.
+
+The driver was tested on Compaq AP550 with two ADM1022 chips (one works
+in the temp3 mode), five temperature readings and two fans.
+
diff --git a/Documentation/hwmon/tmp102 b/Documentation/hwmon/tmp102
new file mode 100644
index 000000000..8454a7763
--- /dev/null
+++ b/Documentation/hwmon/tmp102
@@ -0,0 +1,26 @@
+Kernel driver tmp102
+====================
+
+Supported chips:
+ * Texas Instruments TMP102
+ Prefix: 'tmp102'
+ Addresses scanned: none
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp102.html
+
+Author:
+ Steven King <sfking@fdwdc.com>
+
+Description
+-----------
+
+The Texas Instruments TMP102 implements one temperature sensor. Limits can be
+set through the Overtemperature Shutdown register and Hysteresis register. The
+sensor is accurate to 0.5 degree over the range of -25 to +85 C, and to 1.0
+degree from -40 to +125 C. Resolution of the sensor is 0.0625 degree. The
+operating temperature has a minimum of -55 C and a maximum of +150 C.
+
+The TMP102 has a programmable update rate that can select between 8, 4, 1, and
+0.5 Hz. (Currently the driver only supports the default of 4 Hz).
+
+The driver provides the common sysfs-interface for temperatures (see
+Documentation/hwmon/sysfs-interface under Temperatures).
diff --git a/Documentation/hwmon/tmp103 b/Documentation/hwmon/tmp103
new file mode 100644
index 000000000..ec00a1564
--- /dev/null
+++ b/Documentation/hwmon/tmp103
@@ -0,0 +1,28 @@
+Kernel driver tmp103
+====================
+
+Supported chips:
+ * Texas Instruments TMP103
+ Prefix: 'tmp103'
+ Addresses scanned: none
+ Product info and datasheet: http://www.ti.com/product/tmp103
+
+Author:
+ Heiko Schocher <hs@denx.de>
+
+Description
+-----------
+
+The TMP103 is a digital output temperature sensor in a four-ball
+wafer chip-scale package (WCSP). The TMP103 is capable of reading
+temperatures to a resolution of 1°C. The TMP103 is specified for
+operation over a temperature range of –40°C to +125°C.
+
+Resolution: 8 Bits
+Accuracy: ±1°C Typ (–10°C to +100°C)
+
+The driver provides the common sysfs-interface for temperatures (see
+Documentation/hwmon/sysfs-interface under Temperatures).
+
+Please refer how to instantiate this driver:
+Documentation/i2c/instantiating-devices
diff --git a/Documentation/hwmon/tmp401 b/Documentation/hwmon/tmp401
new file mode 100644
index 000000000..711f75e18
--- /dev/null
+++ b/Documentation/hwmon/tmp401
@@ -0,0 +1,57 @@
+Kernel driver tmp401
+====================
+
+Supported chips:
+ * Texas Instruments TMP401
+ Prefix: 'tmp401'
+ Addresses scanned: I2C 0x4c
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp401.html
+ * Texas Instruments TMP411
+ Prefix: 'tmp411'
+ Addresses scanned: I2C 0x4c, 0x4d, 0x4e
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp411.html
+ * Texas Instruments TMP431
+ Prefix: 'tmp431'
+ Addresses scanned: I2C 0x4c, 0x4d
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp431.html
+ * Texas Instruments TMP432
+ Prefix: 'tmp432'
+ Addresses scanned: I2C 0x4c, 0x4d
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp432.html
+ * Texas Instruments TMP435
+ Prefix: 'tmp435'
+ Addresses scanned: I2C 0x48 - 0x4f
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp435.html
+
+Authors:
+ Hans de Goede <hdegoede@redhat.com>
+ Andre Prendel <andre.prendel@gmx.de>
+
+Description
+-----------
+
+This driver implements support for Texas Instruments TMP401, TMP411,
+TMP431, TMP432 and TMP435 chips. These chips implement one or two remote
+and one local temperature sensors. Temperature is measured in degrees
+Celsius. Resolution of the remote sensor is 0.0625 degree. Local
+sensor resolution can be set to 0.5, 0.25, 0.125 or 0.0625 degree (not
+supported by the driver so far, so using the default resolution of 0.5
+degree).
+
+The driver provides the common sysfs-interface for temperatures (see
+Documentation/hwmon/sysfs-interface under Temperatures).
+
+The TMP411 and TMP431 chips are compatible with TMP401. TMP411 provides
+some additional features.
+
+* Minimum and Maximum temperature measured since power-on, chip-reset
+
+ Exported via sysfs attributes tempX_lowest and tempX_highest.
+
+* Reset of historical minimum/maximum temperature measurements
+
+ Exported via sysfs attribute temp_reset_history. Writing 1 to this
+ file triggers a reset.
+
+TMP432 is compatible with TMP401 and TMP431. It supports two external
+temperature sensors.
diff --git a/Documentation/hwmon/tmp421 b/Documentation/hwmon/tmp421
new file mode 100644
index 000000000..9e6fe5549
--- /dev/null
+++ b/Documentation/hwmon/tmp421
@@ -0,0 +1,44 @@
+Kernel driver tmp421
+====================
+
+Supported chips:
+ * Texas Instruments TMP421
+ Prefix: 'tmp421'
+ Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html
+ * Texas Instruments TMP422
+ Prefix: 'tmp422'
+ Addresses scanned: I2C 0x4c, 0x4d, 0x4e and 0x4f
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html
+ * Texas Instruments TMP423
+ Prefix: 'tmp423'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: http://focus.ti.com/docs/prod/folders/print/tmp421.html
+ * Texas Instruments TMP441
+ Prefix: 'tmp441'
+ Addresses scanned: I2C 0x2a, 0x4c, 0x4d, 0x4e and 0x4f
+ Datasheet: http://www.ti.com/product/tmp441
+ * Texas Instruments TMP442
+ Prefix: 'tmp442'
+ Addresses scanned: I2C 0x4c and 0x4d
+ Datasheet: http://www.ti.com/product/tmp442
+
+Authors:
+ Andre Prendel <andre.prendel@gmx.de>
+
+Description
+-----------
+
+This driver implements support for Texas Instruments TMP421, TMP422,
+TMP423, TMP441, and TMP442 temperature sensor chips. These chips
+implement one local and up to one (TMP421, TMP441), up to two (TMP422,
+TMP442) or up to three (TMP423) remote sensors. Temperature is measured
+in degrees Celsius. The chips are wired over I2C/SMBus and specified
+over a temperature range of -40 to +125 degrees Celsius. Resolution
+for both the local and remote channels is 0.0625 degree C.
+
+The chips support only temperature measurement. The driver exports
+the temperature values via the following sysfs files:
+
+temp[1-4]_input
+temp[2-4]_fault
diff --git a/Documentation/hwmon/tps40422 b/Documentation/hwmon/tps40422
new file mode 100644
index 000000000..24bb0688d
--- /dev/null
+++ b/Documentation/hwmon/tps40422
@@ -0,0 +1,64 @@
+Kernel driver tps40422
+======================
+
+Supported chips:
+ * TI TPS40422
+ Prefix: 'tps40422'
+ Addresses scanned: -
+ Datasheet: http://www.ti.com/lit/gpn/tps40422
+
+Author: Zhu Laiwen <richard.zhu@nsn.com>
+
+
+Description
+-----------
+
+This driver supports TI TPS40422 Dual-Output or Two-Phase Synchronous Buck
+Controller with PMBus
+
+The driver is a client driver to the core PMBus driver.
+Please see Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported.
+
+in[1-2]_label "vout[1-2]"
+in[1-2]_input Measured voltage. From READ_VOUT register.
+in[1-2]_alarm voltage alarm.
+
+curr[1-2]_input Measured current. From READ_IOUT register.
+curr[1-2]_label "iout[1-2]"
+curr1_max Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr1_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr1_max_alarm Current high alarm. From IOUT_OC_WARN_LIMIT status.
+curr1_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status.
+curr2_alarm Current high alarm. From IOUT_OC_WARNING status.
+
+temp1_input Measured temperature. From READ_TEMPERATURE_2 register on page 0.
+temp1_max Maximum temperature. From OT_WARN_LIMIT register.
+temp1_crit Critical high temperature. From OT_FAULT_LIMIT register.
+temp1_max_alarm Chip temperature high alarm. Set by comparing
+ READ_TEMPERATURE_2 on page 0 with OT_WARN_LIMIT if TEMP_OT_WARNING
+ status is set.
+temp1_crit_alarm Chip temperature critical high alarm. Set by comparing
+ READ_TEMPERATURE_2 on page 0 with OT_FAULT_LIMIT if TEMP_OT_FAULT
+ status is set.
+temp2_input Measured temperature. From READ_TEMPERATURE_2 register on page 1.
+temp2_alarm Chip temperature alarm on page 1.
diff --git a/Documentation/hwmon/twl4030-madc-hwmon b/Documentation/hwmon/twl4030-madc-hwmon
new file mode 100644
index 000000000..c3a3a5be1
--- /dev/null
+++ b/Documentation/hwmon/twl4030-madc-hwmon
@@ -0,0 +1,45 @@
+Kernel driver twl4030-madc
+=========================
+
+Supported chips:
+ * Texas Instruments TWL4030
+ Prefix: 'twl4030-madc'
+
+
+Authors:
+ J Keerthy <j-keerthy@ti.com>
+
+Description
+-----------
+
+The Texas Instruments TWL4030 is a Power Management and Audio Circuit. Among
+other things it contains a 10-bit A/D converter MADC. The converter has 16
+channels which can be used in different modes.
+
+
+See this table for the meaning of the different channels
+
+Channel Signal
+------------------------------------------
+0 Battery type(BTYPE)
+1 BCI: Battery temperature (BTEMP)
+2 GP analog input
+3 GP analog input
+4 GP analog input
+5 GP analog input
+6 GP analog input
+7 GP analog input
+8 BCI: VBUS voltage(VBUS)
+9 Backup Battery voltage (VBKP)
+10 BCI: Battery charger current (ICHG)
+11 BCI: Battery charger voltage (VCHG)
+12 BCI: Main battery voltage (VBAT)
+13 Reserved
+14 Reserved
+15 VRUSB Supply/Speaker left/Speaker right polarization level
+
+
+The Sysfs nodes will represent the voltage in the units of mV,
+the temperature channel shows the converted temperature in
+degree Celsius. The Battery charging current channel represents
+battery charging current in mA.
diff --git a/Documentation/hwmon/ucd9000 b/Documentation/hwmon/ucd9000
new file mode 100644
index 000000000..805e33edb
--- /dev/null
+++ b/Documentation/hwmon/ucd9000
@@ -0,0 +1,110 @@
+Kernel driver ucd9000
+=====================
+
+Supported chips:
+ * TI UCD90120, UCD90124, UCD9090, and UCD90910
+ Prefixes: 'ucd90120', 'ucd90124', 'ucd9090', 'ucd90910'
+ Addresses scanned: -
+ Datasheets:
+ http://focus.ti.com/lit/ds/symlink/ucd90120.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd90124.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9090.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd90910.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+From datasheets:
+
+The UCD90120 Power Supply Sequencer and System Health Monitor monitors and
+sequences up to 12 independent voltage rails. The device integrates a 12-bit
+ADC with a 2.5V internal reference for monitoring up to 13 power supply voltage,
+current, or temperature inputs.
+
+The UCD90124 is a 12-rail PMBus/I2C addressable power-supply sequencer and
+system-health monitor. The device integrates a 12-bit ADC for monitoring up to
+13 power-supply voltage, current, or temperature inputs. Twenty-six GPIO pins
+can be used for power supply enables, power-on reset signals, external
+interrupts, cascading, or other system functions. Twelve of these pins offer PWM
+functionality. Using these pins, the UCD90124 offers support for fan control,
+margining, and general-purpose PWM functions.
+
+The UCD9090 is a 10-rail PMBus/I2C addressable power-supply sequencer and
+monitor. The device integrates a 12-bit ADC for monitoring up to 10 power-supply
+voltage inputs. Twenty-three GPIO pins can be used for power supply enables,
+power-on reset signals, external interrupts, cascading, or other system
+functions. Ten of these pins offer PWM functionality. Using these pins, the
+UCD9090 offers support for margining, and general-purpose PWM functions.
+
+The UCD90910 is a ten-rail I2C / PMBus addressable power-supply sequencer and
+system-health monitor. The device integrates a 12-bit ADC for monitoring up to
+13 power-supply voltage, current, or temperature inputs.
+
+This driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data. Please see
+Documentation/hwmon/pmbus for details.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in[1-12]_label "vout[1-12]".
+in[1-12]_input Measured voltage. From READ_VOUT register.
+in[1-12]_min Minimum Voltage. From VOUT_UV_WARN_LIMIT register.
+in[1-12]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in[1-12]_lcrit Critical minimum Voltage. VOUT_UV_FAULT_LIMIT register.
+in[1-12]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in[1-12]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in[1-12]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in[1-12]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in[1-12]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+
+curr[1-12]_label "iout[1-12]".
+curr[1-12]_input Measured current. From READ_IOUT register.
+curr[1-12]_max Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr[1-12]_lcrit Critical minimum output current. From IOUT_UC_FAULT_LIMIT
+ register.
+curr[1-12]_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr[1-12]_max_alarm Current high alarm. From IOUT_OC_WARNING status.
+curr[1-12]_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status.
+
+ For each attribute index, either voltage or current is
+ reported, but not both. If voltage or current is
+ reported depends on the chip configuration.
+
+temp[1-2]_input Measured temperatures. From READ_TEMPERATURE_1 and
+ READ_TEMPERATURE_2 registers.
+temp[1-2]_max Maximum temperature. From OT_WARN_LIMIT register.
+temp[1-2]_crit Critical high temperature. From OT_FAULT_LIMIT register.
+temp[1-2]_max_alarm Temperature high alarm.
+temp[1-2]_crit_alarm Temperature critical high alarm.
+
+fan[1-4]_input Fan RPM.
+fan[1-4]_alarm Fan alarm.
+fan[1-4]_fault Fan fault.
+
+ Fan attributes are only available on chips supporting
+ fan control (UCD90124, UCD90910). Attribute files are
+ created only for enabled fans.
+ Note that even though UCD90910 supports up to 10 fans,
+ only up to four fans are currently supported.
diff --git a/Documentation/hwmon/ucd9200 b/Documentation/hwmon/ucd9200
new file mode 100644
index 000000000..1e8060e63
--- /dev/null
+++ b/Documentation/hwmon/ucd9200
@@ -0,0 +1,112 @@
+Kernel driver ucd9200
+=====================
+
+Supported chips:
+ * TI UCD9220, UCD9222, UCD9224, UCD9240, UCD9244, UCD9246, and UCD9248
+ Prefixes: 'ucd9220', 'ucd9222', 'ucd9224', 'ucd9240', 'ucd9244', 'ucd9246',
+ 'ucd9248'
+ Addresses scanned: -
+ Datasheets:
+ http://focus.ti.com/lit/ds/symlink/ucd9220.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9222.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9224.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9240.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9244.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9246.pdf
+ http://focus.ti.com/lit/ds/symlink/ucd9248.pdf
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+[From datasheets] UCD9220, UCD9222, UCD9224, UCD9240, UCD9244, UCD9246, and
+UCD9248 are multi-rail, multi-phase synchronous buck digital PWM controllers
+designed for non-isolated DC/DC power applications. The devices integrate
+dedicated circuitry for DC/DC loop management with flash memory and a serial
+interface to support configuration, monitoring and management.
+
+This driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus for details on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data. Please see
+Documentation/hwmon/pmbus for details.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in1_label "vin".
+in1_input Measured voltage. From READ_VIN register.
+in1_min Minimum Voltage. From VIN_UV_WARN_LIMIT register.
+in1_max Maximum voltage. From VIN_OV_WARN_LIMIT register.
+in1_lcrit Critical minimum Voltage. VIN_UV_FAULT_LIMIT register.
+in1_crit Critical maximum voltage. From VIN_OV_FAULT_LIMIT register.
+in1_min_alarm Voltage low alarm. From VIN_UV_WARNING status.
+in1_max_alarm Voltage high alarm. From VIN_OV_WARNING status.
+in1_lcrit_alarm Voltage critical low alarm. From VIN_UV_FAULT status.
+in1_crit_alarm Voltage critical high alarm. From VIN_OV_FAULT status.
+
+in[2-5]_label "vout[1-4]".
+in[2-5]_input Measured voltage. From READ_VOUT register.
+in[2-5]_min Minimum Voltage. From VOUT_UV_WARN_LIMIT register.
+in[2-5]_max Maximum voltage. From VOUT_OV_WARN_LIMIT register.
+in[2-5]_lcrit Critical minimum Voltage. VOUT_UV_FAULT_LIMIT register.
+in[2-5]_crit Critical maximum voltage. From VOUT_OV_FAULT_LIMIT register.
+in[2-5]_min_alarm Voltage low alarm. From VOLTAGE_UV_WARNING status.
+in[2-5]_max_alarm Voltage high alarm. From VOLTAGE_OV_WARNING status.
+in[2-5]_lcrit_alarm Voltage critical low alarm. From VOLTAGE_UV_FAULT status.
+in[2-5]_crit_alarm Voltage critical high alarm. From VOLTAGE_OV_FAULT status.
+
+curr1_label "iin".
+curr1_input Measured current. From READ_IIN register.
+
+curr[2-5]_label "iout[1-4]".
+curr[2-5]_input Measured current. From READ_IOUT register.
+curr[2-5]_max Maximum current. From IOUT_OC_WARN_LIMIT register.
+curr[2-5]_lcrit Critical minimum output current. From IOUT_UC_FAULT_LIMIT
+ register.
+curr[2-5]_crit Critical maximum current. From IOUT_OC_FAULT_LIMIT register.
+curr[2-5]_max_alarm Current high alarm. From IOUT_OC_WARNING status.
+curr[2-5]_crit_alarm Current critical high alarm. From IOUT_OC_FAULT status.
+
+power1_input Measured input power. From READ_PIN register.
+power1_label "pin"
+
+power[2-5]_input Measured output power. From READ_POUT register.
+power[2-5]_label "pout[1-4]"
+
+ The number of output voltage, current, and power
+ attribute sets is determined by the number of enabled
+ rails. See chip datasheets for details.
+
+temp[1-5]_input Measured temperatures. From READ_TEMPERATURE_1 and
+ READ_TEMPERATURE_2 registers.
+ temp1 is the chip internal temperature. temp[2-5] are
+ rail temperatures. temp[2-5] attributes are only
+ created for enabled rails. See chip datasheets for
+ details.
+temp[1-5]_max Maximum temperature. From OT_WARN_LIMIT register.
+temp[1-5]_crit Critical high temperature. From OT_FAULT_LIMIT register.
+temp[1-5]_max_alarm Temperature high alarm.
+temp[1-5]_crit_alarm Temperature critical high alarm.
+
+fan1_input Fan RPM. ucd9240 only.
+fan1_alarm Fan alarm. ucd9240 only.
+fan1_fault Fan fault. ucd9240 only.
diff --git a/Documentation/hwmon/userspace-tools b/Documentation/hwmon/userspace-tools
new file mode 100644
index 000000000..9865aeedc
--- /dev/null
+++ b/Documentation/hwmon/userspace-tools
@@ -0,0 +1,40 @@
+Introduction
+------------
+
+Most mainboards have sensor chips to monitor system health (like temperatures,
+voltages, fans speed). They are often connected through an I2C bus, but some
+are also connected directly through the ISA bus.
+
+The kernel drivers make the data from the sensor chips available in the /sys
+virtual filesystem. Userspace tools are then used to display the measured
+values or configure the chips in a more friendly manner.
+
+Lm-sensors
+----------
+
+Core set of utilities that will allow you to obtain health information,
+setup monitoring limits etc. You can get them on their homepage
+http://www.lm-sensors.org/ or as a package from your Linux distribution.
+
+If from website:
+Get lm-sensors from project web site. Please note, you need only userspace
+part, so compile with "make user" and install with "make user_install".
+
+General hints to get things working:
+
+0) get lm-sensors userspace utils
+1) compile all drivers in I2C and Hardware Monitoring sections as modules
+ in your kernel
+2) run sensors-detect script, it will tell you what modules you need to load.
+3) load them and run "sensors" command, you should see some results.
+4) fix sensors.conf, labels, limits, fan divisors
+5) if any more problems consult FAQ, or documentation
+
+Other utilities
+---------------
+
+If you want some graphical indicators of system health look for applications
+like: gkrellm, ksensors, xsensors, wmtemp, wmsensors, wmgtemp, ksysguardd,
+hardware-monitor
+
+If you are server administrator you can try snmpd or mrtgutils.
diff --git a/Documentation/hwmon/vexpress b/Documentation/hwmon/vexpress
new file mode 100644
index 000000000..557d6d5ad
--- /dev/null
+++ b/Documentation/hwmon/vexpress
@@ -0,0 +1,34 @@
+Kernel driver vexpress
+======================
+
+Supported systems:
+ * ARM Ltd. Versatile Express platform
+ Prefix: 'vexpress'
+ Datasheets:
+ * "Hardware Description" sections of the Technical Reference Manuals
+ for the Versatile Express boards:
+ http://infocenter.arm.com/help/topic/com.arm.doc.subset.boards.express/index.html
+ * Section "4.4.14. System Configuration registers" of the V2M-P1 TRM:
+ http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0447-/index.html
+
+Author: Pawel Moll
+
+Description
+-----------
+
+Versatile Express platform (http://www.arm.com/versatileexpress/) is a
+reference & prototyping system for ARM Ltd. processors. It can be set up
+from a wide range of boards, each of them containing (apart of the main
+chip/FPGA) a number of microcontrollers responsible for platform
+configuration and control. Theses microcontrollers can also monitor the
+board and its environment by a number of internal and external sensors,
+providing information about power lines voltages and currents, board
+temperature and power usage. Some of them also calculate consumed energy
+and provide a cumulative use counter.
+
+The configuration devices are _not_ memory mapped and must be accessed
+via a custom interface, abstracted by the "vexpress_config" API.
+
+As these devices are non-discoverable, they must be described in a Device
+Tree passed to the kernel. Details of the DT binding for them can be found
+in Documentation/devicetree/bindings/hwmon/vexpress.txt.
diff --git a/Documentation/hwmon/via686a b/Documentation/hwmon/via686a
new file mode 100644
index 000000000..e5f90ab5c
--- /dev/null
+++ b/Documentation/hwmon/via686a
@@ -0,0 +1,78 @@
+Kernel driver via686a
+=====================
+
+Supported chips:
+ * Via VT82C686A, VT82C686B Southbridge Integrated Hardware Monitor
+ Prefix: 'via686a'
+ Addresses scanned: ISA in PCI-space encoded address
+ Datasheet: On request through web form (http://www.via.com.tw/en/resources/download-center/)
+
+Authors:
+ Kyösti Mälkki <kmalkki@cc.hut.fi>,
+ Mark D. Studebaker <mdsxyz123@yahoo.com>
+ Bob Dougherty <bobd@stanford.edu>
+ (Some conversion-factor data were contributed by
+ Jonathan Teh Soon Yew <j.teh@iname.com>
+ and Alex van Kaam <darkside@chello.nl>.)
+
+Module Parameters
+-----------------
+
+force_addr=0xaddr Set the I/O base address. Useful for boards that
+ don't set the address in the BIOS. Look for a BIOS
+ upgrade before resorting to this. Does not do a
+ PCI force; the via686a must still be present in lspci.
+ Don't use this unless the driver complains that the
+ base address is not set.
+ Example: 'modprobe via686a force_addr=0x6000'
+
+Description
+-----------
+
+The driver does not distinguish between the chips and reports
+all as a 686A.
+
+The Via 686a southbridge has integrated hardware monitor functionality.
+It also has an I2C bus, but this driver only supports the hardware monitor.
+For the I2C bus driver, see <file:Documentation/i2c/busses/i2c-viapro>
+
+The Via 686a implements three temperature sensors, two fan rotation speed
+sensors, five voltage sensors and alarms.
+
+Temperatures are measured in degrees Celsius. An alarm is triggered once
+when the Overtemperature Shutdown limit is crossed; it is triggered again
+as soon as it drops below the hysteresis value.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8) to give
+the readings more range or accuracy. Not all RPM values can accurately be
+represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+Voltage sensors (also known as IN sensors) report their values in volts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit. Voltages are internally scalled, so each voltage channel
+has a different resolution and range.
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared! Note that in the current implementation, all
+hardware registers are read whenever any data is read (unless it is less
+than 1.5 seconds since the last update). This means that you can easily
+miss once-only alarms.
+
+The driver only updates its values each 1.5 seconds; reading it more often
+will do no harm, but will return 'old' values.
+
+Known Issues
+------------
+
+This driver handles sensors integrated in some VIA south bridges. It is
+possible that a motherboard maker used a VT82C686A/B chip as part of a
+product design but was not interested in its hardware monitoring features,
+in which case the sensor inputs will not be wired. This is the case of
+the Asus K7V, A7V and A7V133 motherboards, to name only a few of them.
+So, if you need the force_addr parameter, and end up with values which
+don't seem to make any sense, don't look any further: your chip is simply
+not wired for hardware monitoring.
diff --git a/Documentation/hwmon/vt1211 b/Documentation/hwmon/vt1211
new file mode 100644
index 000000000..77fa633b9
--- /dev/null
+++ b/Documentation/hwmon/vt1211
@@ -0,0 +1,206 @@
+Kernel driver vt1211
+====================
+
+Supported chips:
+ * VIA VT1211
+ Prefix: 'vt1211'
+ Addresses scanned: none, address read from Super-I/O config space
+ Datasheet: Provided by VIA upon request and under NDA
+
+Authors: Juerg Haefliger <juergh@gmail.com>
+
+This driver is based on the driver for kernel 2.4 by Mark D. Studebaker and
+its port to kernel 2.6 by Lars Ekman.
+
+Thanks to Joseph Chan and Fiona Gatt from VIA for providing documentation and
+technical support.
+
+
+Module Parameters
+-----------------
+
+* uch_config: int Override the BIOS default universal channel (UCH)
+ configuration for channels 1-5.
+ Legal values are in the range of 0-31. Bit 0 maps to
+ UCH1, bit 1 maps to UCH2 and so on. Setting a bit to 1
+ enables the thermal input of that particular UCH and
+ setting a bit to 0 enables the voltage input.
+
+* int_mode: int Override the BIOS default temperature interrupt mode.
+ The only possible value is 0 which forces interrupt
+ mode 0. In this mode, any pending interrupt is cleared
+ when the status register is read but is regenerated as
+ long as the temperature stays above the hysteresis
+ limit.
+
+Be aware that overriding BIOS defaults might cause some unwanted side effects!
+
+
+Description
+-----------
+
+The VIA VT1211 Super-I/O chip includes complete hardware monitoring
+capabilities. It monitors 2 dedicated temperature sensor inputs (temp1 and
+temp2), 1 dedicated voltage (in5) and 2 fans. Additionally, the chip
+implements 5 universal input channels (UCH1-5) that can be individually
+programmed to either monitor a voltage or a temperature.
+
+This chip also provides manual and automatic control of fan speeds (according
+to the datasheet). The driver only supports automatic control since the manual
+mode doesn't seem to work as advertised in the datasheet. In fact I couldn't
+get manual mode to work at all! Be aware that automatic mode hasn't been
+tested very well (due to the fact that my EPIA M10000 doesn't have the fans
+connected to the PWM outputs of the VT1211 :-().
+
+The following table shows the relationship between the vt1211 inputs and the
+sysfs nodes.
+
+Sensor Voltage Mode Temp Mode Default Use (from the datasheet)
+------ ------------ --------- --------------------------------
+Reading 1 temp1 Intel thermal diode
+Reading 3 temp2 Internal thermal diode
+UCH1/Reading2 in0 temp3 NTC type thermistor
+UCH2 in1 temp4 +2.5V
+UCH3 in2 temp5 VccP (processor core)
+UCH4 in3 temp6 +5V
+UCH5 in4 temp7 +12V
++3.3V in5 Internal VCC (+3.3V)
+
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by an 8-bit ADC with a LSB of ~10mV. The supported input
+range is thus from 0 to 2.60V. Voltage values outside of this range need
+external scaling resistors. This external scaling needs to be compensated for
+via compute lines in sensors.conf, like:
+
+compute inx @*(1+R1/R2), @/(1+R1/R2)
+
+The board level scaling resistors according to VIA's recommendation are as
+follows. And this is of course totally dependent on the actual board
+implementation :-) You will have to find documentation for your own
+motherboard and edit sensors.conf accordingly.
+
+ Expected
+Voltage R1 R2 Divider Raw Value
+-----------------------------------------------
++2.5V 2K 10K 1.2 2083 mV
+VccP --- --- 1.0 1400 mV (1)
++5V 14K 10K 2.4 2083 mV
++12V 47K 10K 5.7 2105 mV
++3.3V (int) 2K 3.4K 1.588 3300 mV (2)
++3.3V (ext) 6.8K 10K 1.68 1964 mV
+
+(1) Depending on the CPU (1.4V is for a VIA C3 Nehemiah).
+(2) R1 and R2 for 3.3V (int) are internal to the VT1211 chip and the driver
+ performs the scaling and returns the properly scaled voltage value.
+
+Each measured voltage has an associated low and high limit which triggers an
+alarm when crossed.
+
+
+Temperature Monitoring
+----------------------
+
+Temperatures are reported in millidegree Celsius. Each measured temperature
+has a high limit which triggers an alarm if crossed. There is an associated
+hysteresis value with each temperature below which the temperature has to drop
+before the alarm is cleared (this is only true for interrupt mode 0). The
+interrupt mode can be forced to 0 in case the BIOS doesn't do it
+automatically. See the 'Module Parameters' section for details.
+
+All temperature channels except temp2 are external. Temp2 is the VT1211
+internal thermal diode and the driver does all the scaling for temp2 and
+returns the temperature in millidegree Celsius. For the external channels
+temp1 and temp3-temp7, scaling depends on the board implementation and needs
+to be performed in userspace via sensors.conf.
+
+Temp1 is an Intel-type thermal diode which requires the following formula to
+convert between sysfs readings and real temperatures:
+
+compute temp1 (@-Offset)/Gain, (@*Gain)+Offset
+
+According to the VIA VT1211 BIOS porting guide, the following gain and offset
+values should be used:
+
+Diode Type Offset Gain
+---------- ------ ----
+Intel CPU 88.638 0.9528
+ 65.000 0.9686 *)
+VIA C3 Ezra 83.869 0.9528
+VIA C3 Ezra-T 73.869 0.9528
+
+*) This is the formula from the lm_sensors 2.10.0 sensors.conf file. I don't
+know where it comes from or how it was derived, it's just listed here for
+completeness.
+
+Temp3-temp7 support NTC thermistors. For these channels, the driver returns
+the voltages as seen at the individual pins of UCH1-UCH5. The voltage at the
+pin (Vpin) is formed by a voltage divider made of the thermistor (Rth) and a
+scaling resistor (Rs):
+
+Vpin = 2200 * Rth / (Rs + Rth) (2200 is the ADC max limit of 2200 mV)
+
+The equation for the thermistor is as follows (google it if you want to know
+more about it):
+
+Rth = Ro * exp(B * (1 / T - 1 / To)) (To is 298.15K (25C) and Ro is the
+ nominal resistance at 25C)
+
+Mingling the above two equations and assuming Rs = Ro and B = 3435 yields the
+following formula for sensors.conf:
+
+compute tempx 1 / (1 / 298.15 - (` (2200 / @ - 1)) / 3435) - 273.15,
+ 2200 / (1 + (^ (3435 / 298.15 - 3435 / (273.15 + @))))
+
+
+Fan Speed Control
+-----------------
+
+The VT1211 provides 2 programmable PWM outputs to control the speeds of 2
+fans. Writing a 2 to any of the two pwm[1-2]_enable sysfs nodes will put the
+PWM controller in automatic mode. There is only a single controller that
+controls both PWM outputs but each PWM output can be individually enabled and
+disabled.
+
+Each PWM has 4 associated distinct output duty-cycles: full, high, low and
+off. Full and off are internally hard-wired to 255 (100%) and 0 (0%),
+respectively. High and low can be programmed via
+pwm[1-2]_auto_point[2-3]_pwm. Each PWM output can be associated with a
+different thermal input but - and here's the weird part - only one set of
+thermal thresholds exist that controls both PWMs output duty-cycles. The
+thermal thresholds are accessible via pwm[1-2]_auto_point[1-4]_temp. Note
+that even though there are 2 sets of 4 auto points each, they map to the same
+registers in the VT1211 and programming one set is sufficient (actually only
+the first set pwm1_auto_point[1-4]_temp is writable, the second set is
+read-only).
+
+PWM Auto Point PWM Output Duty-Cycle
+------------------------------------------------
+pwm[1-2]_auto_point4_pwm full speed duty-cycle (hard-wired to 255)
+pwm[1-2]_auto_point3_pwm high speed duty-cycle
+pwm[1-2]_auto_point2_pwm low speed duty-cycle
+pwm[1-2]_auto_point1_pwm off duty-cycle (hard-wired to 0)
+
+Temp Auto Point Thermal Threshold
+---------------------------------------------
+pwm[1-2]_auto_point4_temp full speed temp
+pwm[1-2]_auto_point3_temp high speed temp
+pwm[1-2]_auto_point2_temp low speed temp
+pwm[1-2]_auto_point1_temp off temp
+
+Long story short, the controller implements the following algorithm to set the
+PWM output duty-cycle based on the input temperature:
+
+Thermal Threshold Output Duty-Cycle
+ (Rising Temp) (Falling Temp)
+----------------------------------------------------------
+ full speed duty-cycle full speed duty-cycle
+full speed temp
+ high speed duty-cycle full speed duty-cycle
+high speed temp
+ low speed duty-cycle high speed duty-cycle
+low speed temp
+ off duty-cycle low speed duty-cycle
+off temp
diff --git a/Documentation/hwmon/w83627ehf b/Documentation/hwmon/w83627ehf
new file mode 100644
index 000000000..735c42a85
--- /dev/null
+++ b/Documentation/hwmon/w83627ehf
@@ -0,0 +1,190 @@
+Kernel driver w83627ehf
+=======================
+
+Supported chips:
+ * Winbond W83627EHF/EHG (ISA access ONLY)
+ Prefix: 'w83627ehf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: not available
+ * Winbond W83627DHG
+ Prefix: 'w83627dhg'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: not available
+ * Winbond W83627DHG-P
+ Prefix: 'w83627dhg'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: not available
+ * Winbond W83627UHG
+ Prefix: 'w83627uhg'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: available from www.nuvoton.com
+ * Winbond W83667HG
+ Prefix: 'w83667hg'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: not available
+ * Winbond W83667HG-B
+ Prefix: 'w83667hg'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT6775F/W83667HG-I
+ Prefix: 'nct6775'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+ * Nuvoton NCT6776F
+ Prefix: 'nct6776'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Available from Nuvoton upon request
+
+Authors:
+ Jean Delvare <jdelvare@suse.de>
+ Yuan Mu (Winbond)
+ Rudolf Marek <r.marek@assembler.cz>
+ David Hubbard <david.c.hubbard@gmail.com>
+ Gong Jun <JGong@nuvoton.com>
+
+Description
+-----------
+
+This driver implements support for the Winbond W83627EHF, W83627EHG,
+W83627DHG, W83627DHG-P, W83627UHG, W83667HG, W83667HG-B, W83667HG-I
+(NCT6775F), and NCT6776F super I/O chips. We will refer to them collectively
+as Winbond chips.
+
+The chips implement 3 to 4 temperature sensors (9 for NCT6775F and NCT6776F),
+2 to 5 fan rotation speed sensors, 8 to 10 analog voltage sensors, one VID
+(except for 627UHG), alarms with beep warnings (control unimplemented),
+and some automatic fan regulation strategies (plus manual fan control mode).
+
+The temperature sensor sources on W82677HG-B, NCT6775F, and NCT6776F are
+configurable. temp4 and higher attributes are only reported if its temperature
+source differs from the temperature sources of the already reported temperature
+sensors. The configured source for each of the temperature sensors is provided
+in tempX_label.
+
+Temperatures are measured in degrees Celsius and measurement resolution is 1
+degC for temp1 and and 0.5 degC for temp2 and temp3. For temp4 and higher,
+resolution is 1 degC for W83667HG-B and 0.0 degC for NCT6775F and NCT6776F.
+An alarm is triggered when the temperature gets higher than high limit;
+it stays on until the temperature falls below the hysteresis value.
+Alarms are only supported for temp1, temp2, and temp3.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4, 8, 16, 32, 64 or
+128) to give the readings more range or accuracy. The driver sets the most
+suitable fan divisor itself. Some fans might not be present because they
+share pins with other functions.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit.
+
+The driver supports automatic fan control mode known as Thermal Cruise.
+In this mode, the chip attempts to keep the measured temperature in a
+predefined temperature range. If the temperature goes out of range, fan
+is driven slower/faster to reach the predefined range again.
+
+The mode works for fan1-fan4. Mapping of temperatures to pwm outputs is as
+follows:
+
+temp1 -> pwm1
+temp2 -> pwm2
+temp3 -> pwm3 (not on 627UHG)
+prog -> pwm4 (not on 667HG and 667HG-B; the programmable setting is not
+ supported by the driver)
+
+/sys files
+----------
+
+name - this is a standard hwmon device entry, it contains the name of
+ the device (see the prefix in the list of supported devices at
+ the top of this file)
+
+pwm[1-4] - this file stores PWM duty cycle or DC value (fan speed) in range:
+ 0 (stop) to 255 (full)
+
+pwm[1-4]_enable - this file controls mode of fan/temperature control:
+ * 1 Manual mode, write to pwm file any value 0-255 (full speed)
+ * 2 "Thermal Cruise" mode
+ * 3 "Fan Speed Cruise" mode
+ * 4 "Smart Fan III" mode
+ * 5 "Smart Fan IV" mode
+
+ SmartFan III mode is not supported on NCT6776F.
+
+ SmartFan IV mode is configurable only if it was configured at system
+ startup, and is only supported for W83677HG-B, NCT6775F, and NCT6776F.
+ SmartFan IV operational parameters can not be configured at this time,
+ and the various pwm attributes are not used in SmartFan IV mode.
+ The attributes can be written to, which is useful if you plan to
+ configure the system for a different pwm mode. However, the information
+ returned when reading pwm attributes is unrelated to SmartFan IV
+ operation.
+
+pwm[1-4]_mode - controls if output is PWM or DC level
+ * 0 DC output (0 - 12v)
+ * 1 PWM output
+
+Thermal Cruise mode
+-------------------
+
+If the temperature is in the range defined by:
+
+pwm[1-4]_target - set target temperature, unit millidegree Celsius
+ (range 0 - 127000)
+pwm[1-4]_tolerance - tolerance, unit millidegree Celsius (range 0 - 15000)
+
+there are no changes to fan speed. Once the temperature leaves the interval,
+fan speed increases (temp is higher) or decreases if lower than desired.
+There are defined steps and times, but not exported by the driver yet.
+
+pwm[1-4]_min_output - minimum fan speed (range 1 - 255), when the temperature
+ is below defined range.
+pwm[1-4]_stop_time - how many milliseconds [ms] must elapse to switch
+ corresponding fan off. (when the temperature was below
+ defined range).
+pwm[1-4]_start_output-minimum fan speed (range 1 - 255) when spinning up
+pwm[1-4]_step_output- rate of fan speed change (1 - 255)
+pwm[1-4]_stop_output- minimum fan speed (range 1 - 255) when spinning down
+pwm[1-4]_max_output - maximum fan speed (range 1 - 255), when the temperature
+ is above defined range.
+
+Note: last six functions are influenced by other control bits, not yet exported
+ by the driver, so a change might not have any effect.
+
+Implementation Details
+----------------------
+
+Future driver development should bear in mind that the following registers have
+different functions on the 627EHF and the 627DHG. Some registers also have
+different power-on default values, but BIOS should already be loading
+appropriate defaults. Note that bank selection must be performed as is currently
+done in the driver for all register addresses.
+
+0x49: only on DHG, selects temperature source for AUX fan, CPU fan0
+0x4a: not completely documented for the EHF and the DHG documentation assigns
+ different behavior to bits 7 and 6, including extending the temperature
+ input selection to SmartFan I, not just SmartFan III. Testing on the EHF
+ will reveal whether they are compatible or not.
+
+0x58: Chip ID: 0xa1=EHF 0xc1=DHG
+0x5e: only on DHG, has bits to enable "current mode" temperature detection and
+ critical temperature protection
+0x45b: only on EHF, bit 3, vin4 alarm (EHF supports 10 inputs, only 9 on DHG)
+0x552: only on EHF, vin4
+0x558: only on EHF, vin4 high limit
+0x559: only on EHF, vin4 low limit
+0x6b: only on DHG, SYS fan critical temperature
+0x6c: only on DHG, CPU fan0 critical temperature
+0x6d: only on DHG, AUX fan critical temperature
+0x6e: only on DHG, CPU fan1 critical temperature
+
+0x50-0x55 and 0x650-0x657 are marked "Test Register" for the EHF, but "Reserved
+ Register" for the DHG
+
+The DHG also supports PECI, where the DHG queries Intel CPU temperatures, and
+the ICH8 southbridge gets that data via PECI from the DHG, so that the
+southbridge drives the fans. And the DHG supports SST, a one-wire serial bus.
+
+The DHG-P has an additional automatic fan speed control mode named Smart Fan
+(TM) III+. This mode is not yet supported by the driver.
diff --git a/Documentation/hwmon/w83627hf b/Documentation/hwmon/w83627hf
new file mode 100644
index 000000000..8432e1118
--- /dev/null
+++ b/Documentation/hwmon/w83627hf
@@ -0,0 +1,115 @@
+Kernel driver w83627hf
+======================
+
+Supported chips:
+ * Winbond W83627HF (ISA accesses ONLY)
+ Prefix: 'w83627hf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ * Winbond W83627THF
+ Prefix: 'w83627thf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ * Winbond W83697HF
+ Prefix: 'w83697hf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ * Winbond W83637HF
+ Prefix: 'w83637hf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ * Winbond W83687THF
+ Prefix: 'w83687thf'
+ Addresses scanned: ISA address retrieved from Super I/O registers
+ Datasheet: Provided by Winbond on request(http://www.winbond.com/hq/enu)
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Mark Studebaker <mdsxyz123@yahoo.com>,
+ Bernhard C. Schrenk <clemy@clemy.org>
+
+Module Parameters
+-----------------
+
+* force_i2c: int
+ Initialize the I2C address of the sensors
+* init: int
+ (default is 1)
+ Use 'init=0' to bypass initializing the chip.
+ Try this if your computer crashes when you load the module.
+
+Description
+-----------
+
+This driver implements support for ISA accesses *only* for
+the Winbond W83627HF, W83627THF, W83697HF and W83637HF Super I/O chips.
+We will refer to them collectively as Winbond chips.
+
+This driver supports ISA accesses, which should be more reliable
+than i2c accesses. Also, for Tyan boards which contain both a
+Super I/O chip and a second i2c-only Winbond chip (often a W83782D),
+using this driver will avoid i2c address conflicts and complex
+initialization that were required in the w83781d driver.
+
+If you really want i2c accesses for these Super I/O chips,
+use the w83781d driver. However this is not the preferred method
+now that this ISA driver has been developed.
+
+The w83627_HF_ uses pins 110-106 as VID0-VID4. The w83627_THF_ uses the
+same pins as GPIO[0:4]. Technically, the w83627_THF_ does not support a
+VID reading. However the two chips have the identical 128 pin package. So,
+it is possible or even likely for a w83627thf to have the VID signals routed
+to these pins despite their not being labeled for that purpose. Therefore,
+the w83627thf driver interprets these as VID. If the VID on your board
+doesn't work, first see doc/vid in the lm_sensors package[1]. If that still
+doesn't help, you may just ignore the bogus VID reading with no harm done.
+
+For further information on this driver see the w83781d driver documentation.
+
+[1] http://www.lm-sensors.org/browser/lm-sensors/trunk/doc/vid
+
+Forcing the address
+-------------------
+
+The driver used to have a module parameter named force_addr, which could
+be used to force the base I/O address of the hardware monitoring block.
+This was meant as a workaround for mainboards with a broken BIOS. This
+module parameter is gone for technical reasons. If you need this feature,
+you can obtain the same result by using the isaset tool (part of
+lm-sensors) before loading the driver:
+
+# Enter the Super I/O config space
+isaset -y -f 0x2e 0x87
+isaset -y -f 0x2e 0x87
+
+# Select the hwmon logical device
+isaset -y 0x2e 0x2f 0x07 0x0b
+
+# Set the base I/O address (to 0x290 in this example)
+isaset -y 0x2e 0x2f 0x60 0x02
+isaset -y 0x2e 0x2f 0x61 0x90
+
+# Exit the Super-I/O config space
+isaset -y -f 0x2e 0xaa
+
+The above sequence assumes a Super-I/O config space at 0x2e/0x2f, but
+0x4e/0x4f is also possible.
+
+Voltage pin mapping
+-------------------
+
+Here is a summary of the voltage pin mapping for the W83627THF. This
+can be useful to convert data provided by board manufacturers into
+working libsensors configuration statements.
+
+ W83627THF |
+ Pin | Name | Register | Sysfs attribute
+-----------------------------------------------------
+ 100 | CPUVCORE | 20h | in0
+ 99 | VIN0 | 21h | in1
+ 98 | VIN1 | 22h | in2
+ 97 | VIN2 | 24h | in4
+ 114 | AVCC | 23h | in3
+ 61 | 5VSB | 50h (bank 5) | in7
+ 74 | VBAT | 51h (bank 5) | in8
+
+For other supported devices, you'll have to take the hard path and
+look up the information in the datasheet yourself (and then add it
+to this document please.)
diff --git a/Documentation/hwmon/w83781d b/Documentation/hwmon/w83781d
new file mode 100644
index 000000000..129b0a3b5
--- /dev/null
+++ b/Documentation/hwmon/w83781d
@@ -0,0 +1,453 @@
+Kernel driver w83781d
+=====================
+
+Supported chips:
+ * Winbond W83781D
+ Prefix: 'w83781d'
+ Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
+ Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83781d.pdf
+ * Winbond W83782D
+ Prefix: 'w83782d'
+ Addresses scanned: I2C 0x28 - 0x2f, ISA 0x290 (8 I/O ports)
+ Datasheet: http://www.winbond.com
+ * Winbond W83783S
+ Prefix: 'w83783s'
+ Addresses scanned: I2C 0x2d
+ Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/w83783s.pdf
+ * Asus AS99127F
+ Prefix: 'as99127f'
+ Addresses scanned: I2C 0x28 - 0x2f
+ Datasheet: Unavailable from Asus
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Mark Studebaker <mdsxyz123@yahoo.com>
+
+Module parameters
+-----------------
+
+* init int
+ (default 1)
+ Use 'init=0' to bypass initializing the chip.
+ Try this if your computer crashes when you load the module.
+
+* reset int
+ (default 0)
+ The driver used to reset the chip on load, but does no more. Use
+ 'reset=1' to restore the old behavior. Report if you need to do this.
+
+force_subclients=bus,caddr,saddr,saddr
+ This is used to force the i2c addresses for subclients of
+ a certain chip. Typical usage is `force_subclients=0,0x2d,0x4a,0x4b'
+ to force the subclients of chip 0x2d on bus 0 to i2c addresses
+ 0x4a and 0x4b. This parameter is useful for certain Tyan boards.
+
+Description
+-----------
+
+This driver implements support for the Winbond W83781D, W83782D, W83783S
+chips, and the Asus AS99127F chips. We will refer to them collectively as
+W8378* chips.
+
+There is quite some difference between these chips, but they are similar
+enough that it was sensible to put them together in one driver.
+The Asus chips are similar to an I2C-only W83782D.
+
+Chip #vin #fanin #pwm #temp wchipid vendid i2c ISA
+as99127f 7 3 0 3 0x31 0x12c3 yes no
+as99127f rev.2 (type_name = as99127f) 0x31 0x5ca3 yes no
+w83781d 7 3 0 3 0x10-1 0x5ca3 yes yes
+w83782d 9 3 2-4 3 0x30 0x5ca3 yes yes
+w83783s 5-6 3 2 1-2 0x40 0x5ca3 yes no
+
+Detection of these chips can sometimes be foiled because they can be in
+an internal state that allows no clean access. If you know the address
+of the chip, use a 'force' parameter; this will put them into a more
+well-behaved state first.
+
+The W8378* implements temperature sensors (three on the W83781D and W83782D,
+two on the W83783S), three fan rotation speed sensors, voltage sensors
+(seven on the W83781D, nine on the W83782D and six on the W83783S), VID
+lines, alarms with beep warnings, and some miscellaneous stuff.
+
+Temperatures are measured in degrees Celsius. There is always one main
+temperature sensor, and one (W83783S) or two (W83781D and W83782D) other
+sensors. An alarm is triggered for the main sensor once when the
+Overtemperature Shutdown limit is crossed; it is triggered again as soon as
+it drops below the Hysteresis value. A more useful behavior
+can be found by setting the Hysteresis value to +127 degrees Celsius; in
+this case, alarms are issued during all the time when the actual temperature
+is above the Overtemperature Shutdown value. The driver sets the
+hysteresis value for temp1 to 127 at initialization.
+
+For the other temperature sensor(s), an alarm is triggered when the
+temperature gets higher then the Overtemperature Shutdown value; it stays
+on until the temperature falls below the Hysteresis value. But on the
+W83781D, there is only one alarm that functions for both other sensors!
+Temperatures are guaranteed within a range of -55 to +125 degrees. The
+main temperature sensors has a resolution of 1 degree; the other sensor(s)
+of 0.5 degree.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4 or 8 for the
+W83781D; 1, 2, 4, 8, 16, 32, 64 or 128 for the others) to give
+the readings more range or accuracy. Not all RPM values can accurately
+be represented, so some rounding is done. With a divider of 2, the lowest
+representable value is around 2600 RPM.
+
+Voltage sensors (also known as IN sensors) report their values in volts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit. Note that minimum in this case always means 'closest to
+zero'; this is important for negative voltage measurements. All voltage
+inputs can measure voltages between 0 and 4.08 volts, with a resolution
+of 0.016 volt.
+
+The VID lines encode the core voltage value: the voltage level your processor
+should work with. This is hardcoded by the mainboard and/or processor itself.
+It is a value in volts. When it is unconnected, you will often find the
+value 3.50 V here.
+
+The W83782D and W83783S temperature conversion machine understands about
+several kinds of temperature probes. You can program the so-called
+beta value in the sensor files. '1' is the PII/Celeron diode, '2' is the
+TN3904 transistor, and 3435 the default thermistor value. Other values
+are (not yet) supported.
+
+In addition to the alarms described above, there is a CHAS alarm on the
+chips which triggers if your computer case is open.
+
+When an alarm goes off, you can be warned by a beeping signal through
+your computer speaker. It is possible to enable all beeping globally,
+or only the beeping for some alarms.
+
+Individual alarm and beep bits:
+
+0x000001: in0
+0x000002: in1
+0x000004: in2
+0x000008: in3
+0x000010: temp1
+0x000020: temp2 (+temp3 on W83781D)
+0x000040: fan1
+0x000080: fan2
+0x000100: in4
+0x000200: in5
+0x000400: in6
+0x000800: fan3
+0x001000: chassis
+0x002000: temp3 (W83782D only)
+0x010000: in7 (W83782D only)
+0x020000: in8 (W83782D only)
+
+If an alarm triggers, it will remain triggered until the hardware register
+is read at least once. This means that the cause for the alarm may
+already have disappeared! Note that in the current implementation, all
+hardware registers are read whenever any data is read (unless it is less
+than 1.5 seconds since the last update). This means that you can easily
+miss once-only alarms.
+
+The chips only update values each 1.5 seconds; reading them more often
+will do no harm, but will return 'old' values.
+
+AS99127F PROBLEMS
+-----------------
+The as99127f support was developed without the benefit of a datasheet.
+In most cases it is treated as a w83781d (although revision 2 of the
+AS99127F looks more like a w83782d).
+This support will be BETA until a datasheet is released.
+One user has reported problems with fans stopping
+occasionally.
+
+Note that the individual beep bits are inverted from the other chips.
+The driver now takes care of this so that user-space applications
+don't have to know about it.
+
+Known problems:
+ - Problems with diode/thermistor settings (supported?)
+ - One user reports fans stopping under high server load.
+ - Revision 2 seems to have 2 PWM registers but we don't know
+ how to handle them. More details below.
+
+These will not be fixed unless we get a datasheet.
+If you have problems, please lobby Asus to release a datasheet.
+Unfortunately several others have without success.
+Please do not send mail to us asking for better as99127f support.
+We have done the best we can without a datasheet.
+Please do not send mail to the author or the sensors group asking for
+a datasheet or ideas on how to convince Asus. We can't help.
+
+
+NOTES:
+-----
+ 783s has no in1 so that in[2-6] are compatible with the 781d/782d.
+
+ 783s pin is programmable for -5V or temp1; defaults to -5V,
+ no control in driver so temp1 doesn't work.
+
+ 782d and 783s datasheets differ on which is pwm1 and which is pwm2.
+ We chose to follow 782d.
+
+ 782d and 783s pin is programmable for fan3 input or pwm2 output;
+ defaults to fan3 input.
+ If pwm2 is enabled (with echo 255 1 > pwm2), then
+ fan3 will report 0.
+
+ 782d has pwm1-2 for ISA, pwm1-4 for i2c. (pwm3-4 share pins with
+ the ISA pins)
+
+Data sheet updates:
+------------------
+ - PWM clock registers:
+
+ 000: master / 512
+ 001: master / 1024
+ 010: master / 2048
+ 011: master / 4096
+ 100: master / 8192
+
+
+Answers from Winbond tech support
+---------------------------------
+>
+> 1) In the W83781D data sheet section 7.2 last paragraph, it talks about
+> reprogramming the R-T table if the Beta of the thermistor is not
+> 3435K. The R-T table is described briefly in section 8.20.
+> What formulas do I use to program a new R-T table for a given Beta?
+>
+ We are sorry that the calculation for R-T table value is
+confidential. If you have another Beta value of thermistor, we can help
+to calculate the R-T table for you. But you should give us real R-T
+Table which can be gotten by thermistor vendor. Therefore we will calculate
+them and obtain 32-byte data, and you can fill the 32-byte data to the
+register in Bank0.CR51 of W83781D.
+
+
+> 2) In the W83782D data sheet, it mentions that pins 38, 39, and 40 are
+> programmable to be either thermistor or Pentium II diode inputs.
+> How do I program them for diode inputs? I can't find any register
+> to program these to be diode inputs.
+ --> You may program Bank0 CR[5Dh] and CR[59h] registers.
+
+ CR[5Dh] bit 1(VTIN1) bit 2(VTIN2) bit 3(VTIN3)
+
+ thermistor 0 0 0
+ diode 1 1 1
+
+
+(error) CR[59h] bit 4(VTIN1) bit 2(VTIN2) bit 3(VTIN3)
+(right) CR[59h] bit 4(VTIN1) bit 5(VTIN2) bit 6(VTIN3)
+
+ PII thermal diode 1 1 1
+ 2N3904 diode 0 0 0
+
+
+Asus Clones
+-----------
+
+We have no datasheets for the Asus clones (AS99127F and ASB100 Bach).
+Here are some very useful information that were given to us by Alex Van
+Kaam about how to detect these chips, and how to read their values. He
+also gives advice for another Asus chipset, the Mozart-2 (which we
+don't support yet). Thanks Alex!
+I reworded some parts and added personal comments.
+
+# Detection:
+
+AS99127F rev.1, AS99127F rev.2 and ASB100:
+- I2C address range: 0x29 - 0x2F
+- If register 0x58 holds 0x31 then we have an Asus (either ASB100 or
+ AS99127F)
+- Which one depends on register 0x4F (manufacturer ID):
+ 0x06 or 0x94: ASB100
+ 0x12 or 0xC3: AS99127F rev.1
+ 0x5C or 0xA3: AS99127F rev.2
+ Note that 0x5CA3 is Winbond's ID (WEC), which let us think Asus get their
+ AS99127F rev.2 direct from Winbond. The other codes mean ATT and DVC,
+ respectively. ATT could stand for Asustek something (although it would be
+ very badly chosen IMHO), I don't know what DVC could stand for. Maybe
+ these codes simply aren't meant to be decoded that way.
+
+Mozart-2:
+- I2C address: 0x77
+- If register 0x58 holds 0x56 or 0x10 then we have a Mozart-2
+- Of the Mozart there are 3 types:
+ 0x58=0x56, 0x4E=0x94, 0x4F=0x36: Asus ASM58 Mozart-2
+ 0x58=0x56, 0x4E=0x94, 0x4F=0x06: Asus AS2K129R Mozart-2
+ 0x58=0x10, 0x4E=0x5C, 0x4F=0xA3: Asus ??? Mozart-2
+ You can handle all 3 the exact same way :)
+
+# Temperature sensors:
+
+ASB100:
+- sensor 1: register 0x27
+- sensor 2 & 3 are the 2 LM75's on the SMBus
+- sensor 4: register 0x17
+Remark: I noticed that on Intel boards sensor 2 is used for the CPU
+ and 4 is ignored/stuck, on AMD boards sensor 4 is the CPU and sensor 2 is
+ either ignored or a socket temperature.
+
+AS99127F (rev.1 and 2 alike):
+- sensor 1: register 0x27
+- sensor 2 & 3 are the 2 LM75's on the SMBus
+Remark: Register 0x5b is suspected to be temperature type selector. Bit 1
+ would control temp1, bit 3 temp2 and bit 5 temp3.
+
+Mozart-2:
+- sensor 1: register 0x27
+- sensor 2: register 0x13
+
+# Fan sensors:
+
+ASB100, AS99127F (rev.1 and 2 alike):
+- 3 fans, identical to the W83781D
+
+Mozart-2:
+- 2 fans only, 1350000/RPM/div
+- fan 1: register 0x28, divisor on register 0xA1 (bits 4-5)
+- fan 2: register 0x29, divisor on register 0xA1 (bits 6-7)
+
+# Voltages:
+
+This is where there is a difference between AS99127F rev.1 and 2.
+Remark: The difference is similar to the difference between
+ W83781D and W83782D.
+
+ASB100:
+in0=r(0x20)*0.016
+in1=r(0x21)*0.016
+in2=r(0x22)*0.016
+in3=r(0x23)*0.016*1.68
+in4=r(0x24)*0.016*3.8
+in5=r(0x25)*(-0.016)*3.97
+in6=r(0x26)*(-0.016)*1.666
+
+AS99127F rev.1:
+in0=r(0x20)*0.016
+in1=r(0x21)*0.016
+in2=r(0x22)*0.016
+in3=r(0x23)*0.016*1.68
+in4=r(0x24)*0.016*3.8
+in5=r(0x25)*(-0.016)*3.97
+in6=r(0x26)*(-0.016)*1.503
+
+AS99127F rev.2:
+in0=r(0x20)*0.016
+in1=r(0x21)*0.016
+in2=r(0x22)*0.016
+in3=r(0x23)*0.016*1.68
+in4=r(0x24)*0.016*3.8
+in5=(r(0x25)*0.016-3.6)*5.14+3.6
+in6=(r(0x26)*0.016-3.6)*3.14+3.6
+
+Mozart-2:
+in0=r(0x20)*0.016
+in1=255
+in2=r(0x22)*0.016
+in3=r(0x23)*0.016*1.68
+in4=r(0x24)*0.016*4
+in5=255
+in6=255
+
+
+# PWM
+
+* Additional info about PWM on the AS99127F (may apply to other Asus
+chips as well) by Jean Delvare as of 2004-04-09:
+
+AS99127F revision 2 seems to have two PWM registers at 0x59 and 0x5A,
+and a temperature sensor type selector at 0x5B (which basically means
+that they swapped registers 0x59 and 0x5B when you compare with Winbond
+chips).
+Revision 1 of the chip also has the temperature sensor type selector at
+0x5B, but PWM registers have no effect.
+
+We don't know exactly how the temperature sensor type selection works.
+Looks like bits 1-0 are for temp1, bits 3-2 for temp2 and bits 5-4 for
+temp3, although it is possible that only the most significant bit matters
+each time. So far, values other than 0 always broke the readings.
+
+PWM registers seem to be split in two parts: bit 7 is a mode selector,
+while the other bits seem to define a value or threshold.
+
+When bit 7 is clear, bits 6-0 seem to hold a threshold value. If the value
+is below a given limit, the fan runs at low speed. If the value is above
+the limit, the fan runs at full speed. We have no clue as to what the limit
+represents. Note that there seem to be some inertia in this mode, speed
+changes may need some time to trigger. Also, an hysteresis mechanism is
+suspected since walking through all the values increasingly and then
+decreasingly led to slightly different limits.
+
+When bit 7 is set, bits 3-0 seem to hold a threshold value, while bits 6-4
+would not be significant. If the value is below a given limit, the fan runs
+at full speed, while if it is above the limit it runs at low speed (so this
+is the contrary of the other mode, in a way). Here again, we don't know
+what the limit is supposed to represent.
+
+One remarkable thing is that the fans would only have two or three
+different speeds (transitional states left apart), not a whole range as
+you usually get with PWM.
+
+As a conclusion, you can write 0x00 or 0x8F to the PWM registers to make
+fans run at low speed, and 0x7F or 0x80 to make them run at full speed.
+
+Please contact us if you can figure out how it is supposed to work. As
+long as we don't know more, the w83781d driver doesn't handle PWM on
+AS99127F chips at all.
+
+* Additional info about PWM on the AS99127F rev.1 by Hector Martin:
+
+I've been fiddling around with the (in)famous 0x59 register and
+found out the following values do work as a form of coarse pwm:
+
+0x80 - seems to turn fans off after some time(1-2 minutes)... might be
+some form of auto-fan-control based on temp? hmm (Qfan? this mobo is an
+old ASUS, it isn't marketed as Qfan. Maybe some beta pre-attempt at Qfan
+that was dropped at the BIOS)
+0x81 - off
+0x82 - slightly "on-ner" than off, but my fans do not get to move. I can
+hear the high-pitched PWM sound that motors give off at too-low-pwm.
+0x83 - now they do move. Estimate about 70% speed or so.
+0x84-0x8f - full on
+
+Changing the high nibble doesn't seem to do much except the high bit
+(0x80) must be set for PWM to work, else the current pwm doesn't seem to
+change.
+
+My mobo is an ASUS A7V266-E. This behavior is similar to what I got
+with speedfan under Windows, where 0-15% would be off, 15-2x% (can't
+remember the exact value) would be 70% and higher would be full on.
+
+* Additional info about PWM on the AS99127F rev.1 from lm-sensors
+ ticket #2350:
+
+I conducted some experiment on Asus P3B-F motherboard with AS99127F
+(Ver. 1).
+
+I confirm that 0x59 register control the CPU_Fan Header on this
+motherboard, and 0x5a register control PWR_Fan.
+
+In order to reduce the dependency of specific fan, the measurement is
+conducted with a digital scope without fan connected. I found out that
+P3B-F actually output variable DC voltage on fan header center pin,
+looks like PWM is filtered on this motherboard.
+
+Here are some of measurements:
+
+0x80 20 mV
+0x81 20 mV
+0x82 232 mV
+0x83 1.2 V
+0x84 2.31 V
+0x85 3.44 V
+0x86 4.62 V
+0x87 5.81 V
+0x88 7.01 V
+9x89 8.22 V
+0x8a 9.42 V
+0x8b 10.6 V
+0x8c 11.9 V
+0x8d 12.4 V
+0x8e 12.4 V
+0x8f 12.4 V
diff --git a/Documentation/hwmon/w83791d b/Documentation/hwmon/w83791d
new file mode 100644
index 000000000..f4021a285
--- /dev/null
+++ b/Documentation/hwmon/w83791d
@@ -0,0 +1,161 @@
+Kernel driver w83791d
+=====================
+
+Supported chips:
+ * Winbond W83791D
+ Prefix: 'w83791d'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83791D_W83791Gb.pdf
+
+Author: Charles Spirakis <bezaur@gmail.com>
+
+This driver was derived from the w83781d.c and w83792d.c source files.
+
+Credits:
+ w83781d.c:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ and Mark Studebaker <mdsxyz123@yahoo.com>
+ w83792d.c:
+ Shane Huang (Winbond),
+ Rudolf Marek <r.marek@assembler.cz>
+
+Additional contributors:
+ Sven Anders <anders@anduras.de>
+ Marc Hulsman <m.hulsman@tudelft.nl>
+
+Module Parameters
+-----------------
+
+* init boolean
+ (default 0)
+ Use 'init=1' to have the driver do extra software initializations.
+ The default behavior is to do the minimum initialization possible
+ and depend on the BIOS to properly setup the chip. If you know you
+ have a w83791d and you're having problems, try init=1 before trying
+ reset=1.
+
+* reset boolean
+ (default 0)
+ Use 'reset=1' to reset the chip (via index 0x40, bit 7). The default
+ behavior is no chip reset to preserve BIOS settings.
+
+* force_subclients=bus,caddr,saddr,saddr
+ This is used to force the i2c addresses for subclients of
+ a certain chip. Example usage is `force_subclients=0,0x2f,0x4a,0x4b'
+ to force the subclients of chip 0x2f on bus 0 to i2c addresses
+ 0x4a and 0x4b.
+
+
+Description
+-----------
+
+This driver implements support for the Winbond W83791D chip. The W83791G
+chip appears to be the same as the W83791D but is lead free.
+
+Detection of the chip can sometimes be foiled because it can be in an
+internal state that allows no clean access (Bank with ID register is not
+currently selected). If you know the address of the chip, use a 'force'
+parameter; this will put it into a more well-behaved state first.
+
+The driver implements three temperature sensors, ten voltage sensors,
+five fan rotation speed sensors and manual PWM control of each fan.
+
+Temperatures are measured in degrees Celsius and measurement resolution is 1
+degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when
+the temperature gets higher than the Overtemperature Shutdown value; it stays
+on until the temperature falls below the Hysteresis value.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4, 8, 16,
+32, 64 or 128 for all fans) to give the readings more range or accuracy.
+
+Each fan controlled is controlled by PWM. The PWM duty cycle can be read and
+set for each fan separately. Valid values range from 0 (stop) to 255 (full).
+PWM 1-3 support Thermal Cruise mode, in which the PWMs are automatically
+regulated to keep respectively temp 1-3 at a certain target temperature.
+See below for the description of the sysfs-interface.
+
+The w83791d has a global bit used to enable beeping from the speaker when an
+alarm is triggered as well as a bitmask to enable or disable the beep for
+specific alarms. You need both the global beep enable bit and the
+corresponding beep bit to be on for a triggered alarm to sound a beep.
+
+The sysfs interface to the global enable is via the sysfs beep_enable file.
+This file is used for both legacy and new code.
+
+The sysfs interface to the beep bitmask has migrated from the original legacy
+method of a single sysfs beep_mask file to a newer method using multiple
+*_beep files as described in .../Documentation/hwmon/sysfs-interface.
+
+A similar change has occurred for the bitmap corresponding to the alarms. The
+original legacy method used a single sysfs alarms file containing a bitmap
+of triggered alarms. The newer method uses multiple sysfs *_alarm files
+(again following the pattern described in sysfs-interface).
+
+Since both methods read and write the underlying hardware, they can be used
+interchangeably and changes in one will automatically be reflected by
+the other. If you use the legacy bitmask method, your user-space code is
+responsible for handling the fact that the alarms and beep_mask bitmaps
+are not the same (see the table below).
+
+NOTE: All new code should be written to use the newer sysfs-interface
+specification as that avoids bitmap problems and is the preferred interface
+going forward.
+
+The driver reads the hardware chip values at most once every three seconds.
+User mode code requesting values more often will receive cached values.
+
+/sys files
+----------
+The sysfs-interface is documented in the 'sysfs-interface' file. Only
+chip-specific options are documented here.
+
+pwm[1-3]_enable - this file controls mode of fan/temperature control for
+ fan 1-3. Fan/PWM 4-5 only support manual mode.
+ * 1 Manual mode
+ * 2 Thermal Cruise mode
+ * 3 Fan Speed Cruise mode (no further support)
+
+temp[1-3]_target - defines the target temperature for Thermal Cruise mode.
+ Unit: millidegree Celsius
+ RW
+
+temp[1-3]_tolerance - temperature tolerance for Thermal Cruise mode.
+ Specifies an interval around the target temperature
+ in which the fan speed is not changed.
+ Unit: millidegree Celsius
+ RW
+
+Alarms bitmap vs. beep_mask bitmask
+------------------------------------
+For legacy code using the alarms and beep_mask files:
+
+in0 (VCORE) : alarms: 0x000001 beep_mask: 0x000001
+in1 (VINR0) : alarms: 0x000002 beep_mask: 0x002000 <== mismatch
+in2 (+3.3VIN): alarms: 0x000004 beep_mask: 0x000004
+in3 (5VDD) : alarms: 0x000008 beep_mask: 0x000008
+in4 (+12VIN) : alarms: 0x000100 beep_mask: 0x000100
+in5 (-12VIN) : alarms: 0x000200 beep_mask: 0x000200
+in6 (-5VIN) : alarms: 0x000400 beep_mask: 0x000400
+in7 (VSB) : alarms: 0x080000 beep_mask: 0x010000 <== mismatch
+in8 (VBAT) : alarms: 0x100000 beep_mask: 0x020000 <== mismatch
+in9 (VINR1) : alarms: 0x004000 beep_mask: 0x004000
+temp1 : alarms: 0x000010 beep_mask: 0x000010
+temp2 : alarms: 0x000020 beep_mask: 0x000020
+temp3 : alarms: 0x002000 beep_mask: 0x000002 <== mismatch
+fan1 : alarms: 0x000040 beep_mask: 0x000040
+fan2 : alarms: 0x000080 beep_mask: 0x000080
+fan3 : alarms: 0x000800 beep_mask: 0x000800
+fan4 : alarms: 0x200000 beep_mask: 0x200000
+fan5 : alarms: 0x400000 beep_mask: 0x400000
+tart1 : alarms: 0x010000 beep_mask: 0x040000 <== mismatch
+tart2 : alarms: 0x020000 beep_mask: 0x080000 <== mismatch
+tart3 : alarms: 0x040000 beep_mask: 0x100000 <== mismatch
+case_open : alarms: 0x001000 beep_mask: 0x001000
+global_enable: alarms: -------- beep_mask: 0x800000 (modified via beep_enable)
diff --git a/Documentation/hwmon/w83792d b/Documentation/hwmon/w83792d
new file mode 100644
index 000000000..53f7b6866
--- /dev/null
+++ b/Documentation/hwmon/w83792d
@@ -0,0 +1,173 @@
+Kernel driver w83792d
+=====================
+
+Supported chips:
+ * Winbond W83792D
+ Prefix: 'w83792d'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: http://www.winbond.com.tw
+
+Author: Shane Huang (Winbond)
+
+
+Module Parameters
+-----------------
+
+* init int
+ (default 1)
+ Use 'init=0' to bypass initializing the chip.
+ Try this if your computer crashes when you load the module.
+
+* force_subclients=bus,caddr,saddr,saddr
+ This is used to force the i2c addresses for subclients of
+ a certain chip. Example usage is `force_subclients=0,0x2f,0x4a,0x4b'
+ to force the subclients of chip 0x2f on bus 0 to i2c addresses
+ 0x4a and 0x4b.
+
+
+Description
+-----------
+
+This driver implements support for the Winbond W83792AD/D.
+
+Detection of the chip can sometimes be foiled because it can be in an
+internal state that allows no clean access (Bank with ID register is not
+currently selected). If you know the address of the chip, use a 'force'
+parameter; this will put it into a more well-behaved state first.
+
+The driver implements three temperature sensors, seven fan rotation speed
+sensors, nine voltage sensors, and two automatic fan regulation
+strategies called: Smart Fan I (Thermal Cruise mode) and Smart Fan II.
+Automatic fan control mode is possible only for fan1-fan3. Fan4-fan7 can run
+synchronized with selected fan (fan1-fan3). This functionality and manual PWM
+control for fan4-fan7 is not yet implemented.
+
+Temperatures are measured in degrees Celsius and measurement resolution is 1
+degC for temp1 and 0.5 degC for temp2 and temp3. An alarm is triggered when
+the temperature gets higher than the Overtemperature Shutdown value; it stays
+on until the temperature falls below the Hysteresis value.
+
+Fan rotation speeds are reported in RPM (rotations per minute). An alarm is
+triggered if the rotation speed has dropped below a programmable limit. Fan
+readings can be divided by a programmable divider (1, 2, 4, 8, 16, 32, 64 or
+128) to give the readings more range or accuracy.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit.
+
+Alarms are provided as output from "realtime status register". Following bits
+are defined:
+
+bit - alarm on:
+0 - in0
+1 - in1
+2 - temp1
+3 - temp2
+4 - temp3
+5 - fan1
+6 - fan2
+7 - fan3
+8 - in2
+9 - in3
+10 - in4
+11 - in5
+12 - in6
+13 - VID change
+14 - chassis
+15 - fan7
+16 - tart1
+17 - tart2
+18 - tart3
+19 - in7
+20 - in8
+21 - fan4
+22 - fan5
+23 - fan6
+
+Tart will be asserted while target temperature cannot be achieved after 3 minutes
+of full speed rotation of corresponding fan.
+
+In addition to the alarms described above, there is a CHAS alarm on the chips
+which triggers if your computer case is open (This one is latched, contrary
+to realtime alarms).
+
+The chips only update values each 3 seconds; reading them more often will
+do no harm, but will return 'old' values.
+
+
+W83792D PROBLEMS
+----------------
+Known problems:
+ - This driver is only for Winbond W83792D C version device, there
+ are also some motherboards with B version W83792D device. The
+ calculation method to in6-in7(measured value, limits) is a little
+ different between C and B version. C or B version can be identified
+ by CR[0x49h].
+ - The function of vid and vrm has not been finished, because I'm NOT
+ very familiar with them. Adding support is welcome.
+  - The function of chassis open detection needs more tests.
+ - If you have ASUS server board and chip was not found: Then you will
+ need to upgrade to latest (or beta) BIOS. If it does not help please
+ contact us.
+
+Fan control
+-----------
+
+Manual mode
+-----------
+
+Works as expected. You just need to specify desired PWM/DC value (fan speed)
+in appropriate pwm# file.
+
+Thermal cruise
+--------------
+
+In this mode, W83792D provides the Smart Fan system to automatically control
+fan speed to keep the temperatures of CPU and the system within specific
+range. At first a wanted temperature and interval must be set. This is done
+via thermal_cruise# file. The tolerance# file serves to create T +- tolerance
+interval. The fan speed will be lowered as long as the current temperature
+remains below the thermal_cruise# +- tolerance# value. Once the temperature
+exceeds the high limit (T+tolerance), the fan will be turned on with a
+specific speed set by pwm# and automatically controlled its PWM duty cycle
+with the temperature varying. Three conditions may occur:
+
+(1) If the temperature still exceeds the high limit, PWM duty
+cycle will increase slowly.
+
+(2) If the temperature goes below the high limit, but still above the low
+limit (T-tolerance), the fan speed will be fixed at the current speed because
+the temperature is in the target range.
+
+(3) If the temperature goes below the low limit, PWM duty cycle will decrease
+slowly to 0 or a preset stop value until the temperature exceeds the low
+limit. (The preset stop value handling is not yet implemented in driver)
+
+Smart Fan II
+------------
+
+W83792D also provides a special mode for fan. Four temperature points are
+available. When related temperature sensors detects the temperature in preset
+temperature region (sf2_point@_fan# +- tolerance#) it will cause fans to run
+on programmed value from sf2_level@_fan#. You need to set four temperatures
+for each fan.
+
+
+/sys files
+----------
+
+pwm[1-3] - this file stores PWM duty cycle or DC value (fan speed) in range:
+ 0 (stop) to 255 (full)
+pwm[1-3]_enable - this file controls mode of fan/temperature control:
+ * 0 Disabled
+ * 1 Manual mode
+ * 2 Smart Fan II
+ * 3 Thermal Cruise
+pwm[1-3]_mode - Select PWM of DC mode
+ * 0 DC
+ * 1 PWM
+thermal_cruise[1-3] - Selects the desired temperature for cruise (degC)
+tolerance[1-3] - Value in degrees of Celsius (degC) for +- T
+sf2_point[1-4]_fan[1-3] - four temperature points for each fan for Smart Fan II
+sf2_level[1-3]_fan[1-3] - three PWM/DC levels for each fan for Smart Fan II
diff --git a/Documentation/hwmon/w83793 b/Documentation/hwmon/w83793
new file mode 100644
index 000000000..6cc5f639b
--- /dev/null
+++ b/Documentation/hwmon/w83793
@@ -0,0 +1,106 @@
+Kernel driver w83793
+====================
+
+Supported chips:
+ * Winbond W83793G/W83793R
+ Prefix: 'w83793'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: Still not published
+
+Authors:
+ Yuan Mu (Winbond Electronics)
+ Rudolf Marek <r.marek@assembler.cz>
+
+
+Module parameters
+-----------------
+
+* reset int
+ (default 0)
+ This parameter is not recommended, it will lose motherboard specific
+ settings. Use 'reset=1' to reset the chip when loading this module.
+
+* force_subclients=bus,caddr,saddr1,saddr2
+ This is used to force the i2c addresses for subclients of
+ a certain chip. Typical usage is `force_subclients=0,0x2f,0x4a,0x4b'
+ to force the subclients of chip 0x2f on bus 0 to i2c addresses
+ 0x4a and 0x4b.
+
+
+Description
+-----------
+
+This driver implements support for Winbond W83793G/W83793R chips.
+
+* Exported features
+ This driver exports 10 voltage sensors, up to 12 fan tachometer inputs,
+ 6 remote temperatures, up to 8 sets of PWM fan controls, SmartFan
+ (automatic fan speed control) on all temperature/PWM combinations, 2
+ sets of 6-pin CPU VID input.
+
+* Sensor resolutions
+ If your motherboard maker used the reference design, the resolution of
+ voltage0-2 is 2mV, resolution of voltage3/4/5 is 16mV, 8mV for voltage6,
+ 24mV for voltage7/8. Temp1-4 have a 0.25 degree Celsius resolution,
+ temp5-6 have a 1 degree Celsiis resolution.
+
+* Temperature sensor types
+ Temp1-4 have 2 possible types. It can be read from (and written to)
+ temp[1-4]_type.
+ - If the value is 3, it starts monitoring using a remote termal diode
+ (default).
+ - If the value is 6, it starts monitoring using the temperature sensor
+ in Intel CPU and get result by PECI.
+ Temp5-6 can be connected to external thermistors (value of
+ temp[5-6]_type is 4).
+
+* Alarm mechanism
+ For voltage sensors, an alarm triggers if the measured value is below
+ the low voltage limit or over the high voltage limit.
+ For temperature sensors, an alarm triggers if the measured value goes
+ above the high temperature limit, and wears off only after the measured
+ value drops below the hysteresis value.
+ For fan sensors, an alarm triggers if the measured value is below the
+ low speed limit.
+
+* SmartFan/PWM control
+ If you want to set a pwm fan to manual mode, you just need to make sure it
+ is not controlled by any temp channel, for example, you want to set fan1
+ to manual mode, you need to check the value of temp[1-6]_fan_map, make
+ sure bit 0 is cleared in the 6 values. And then set the pwm1 value to
+ control the fan.
+
+ Each temperature channel can control all the 8 PWM outputs (by setting the
+ corresponding bit in tempX_fan_map), you can set the temperature channel
+ mode using temp[1-6]_pwm_enable, 2 is Thermal Cruise mode and 3
+ is the SmartFanII mode. Temperature channels will try to speed up or
+ slow down all controlled fans, this means one fan can receive different
+ PWM value requests from different temperature channels, but the chip
+ will always pick the safest (max) PWM value for each fan.
+
+ In Thermal Cruise mode, the chip attempts to keep the temperature at a
+ predefined value, within a tolerance margin. So if tempX_input >
+ thermal_cruiseX + toleranceX, the chip will increase the PWM value,
+ if tempX_input < thermal_cruiseX - toleranceX, the chip will decrease
+ the PWM value. If the temperature is within the tolerance range, the PWM
+ value is left unchanged.
+
+ SmartFanII works differently, you have to define up to 7 PWM, temperature
+ trip points, defining a PWM/temperature curve which the chip will follow.
+ While not fundamentally different from the Thermal Cruise mode, the
+ implementation is quite different, giving you a finer-grained control.
+
+* Chassis
+ If the case open alarm triggers, it will stay in this state unless cleared
+ by writing 0 to the sysfs file "intrusion0_alarm".
+
+* VID and VRM
+ The VRM version is detected automatically, don't modify the it unless you
+ *do* know the cpu VRM version and it's not properly detected.
+
+
+Notes
+-----
+
+ Only Fan1-5 and PWM1-3 are guaranteed to always exist, other fan inputs and
+ PWM outputs may or may not exist depending on the chip pin configuration.
diff --git a/Documentation/hwmon/w83795 b/Documentation/hwmon/w83795
new file mode 100644
index 000000000..d3e678216
--- /dev/null
+++ b/Documentation/hwmon/w83795
@@ -0,0 +1,127 @@
+Kernel driver w83795
+====================
+
+Supported chips:
+ * Winbond/Nuvoton W83795G
+ Prefix: 'w83795g'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: Available for download on nuvoton.com
+ * Winbond/Nuvoton W83795ADG
+ Prefix: 'w83795adg'
+ Addresses scanned: I2C 0x2c - 0x2f
+ Datasheet: Available for download on nuvoton.com
+
+Authors:
+ Wei Song (Nuvoton)
+ Jean Delvare <jdelvare@suse.de>
+
+
+Pin mapping
+-----------
+
+Here is a summary of the pin mapping for the W83795G and W83795ADG.
+This can be useful to convert data provided by board manufacturers
+into working libsensors configuration statements.
+
+ W83795G |
+ Pin | Name | Register | Sysfs attribute
+------------------------------------------------------------------
+ 13 | VSEN1 (VCORE1) | 10h | in0
+ 14 | VSEN2 (VCORE2) | 11h | in1
+ 15 | VSEN3 (VCORE3) | 12h | in2
+ 16 | VSEN4 | 13h | in3
+ 17 | VSEN5 | 14h | in4
+ 18 | VSEN6 | 15h | in5
+ 19 | VSEN7 | 16h | in6
+ 20 | VSEN8 | 17h | in7
+ 21 | VSEN9 | 18h | in8
+ 22 | VSEN10 | 19h | in9
+ 23 | VSEN11 | 1Ah | in10
+ 28 | VTT | 1Bh | in11
+ 24 | 3VDD | 1Ch | in12
+ 25 | 3VSB | 1Dh | in13
+ 26 | VBAT | 1Eh | in14
+ 3 | VSEN12/TR5 | 1Fh | in15/temp5
+ 4 | VSEN13/TR5 | 20h | in16/temp6
+ 5/ 6 | VDSEN14/TR1/TD1 | 21h | in17/temp1
+ 7/ 8 | VDSEN15/TR2/TD2 | 22h | in18/temp2
+ 9/ 10 | VDSEN16/TR3/TD3 | 23h | in19/temp3
+ 11/ 12 | VDSEN17/TR4/TD4 | 24h | in20/temp4
+ 40 | FANIN1 | 2Eh | fan1
+ 42 | FANIN2 | 2Fh | fan2
+ 44 | FANIN3 | 30h | fan3
+ 46 | FANIN4 | 31h | fan4
+ 48 | FANIN5 | 32h | fan5
+ 50 | FANIN6 | 33h | fan6
+ 52 | FANIN7 | 34h | fan7
+ 54 | FANIN8 | 35h | fan8
+ 57 | FANIN9 | 36h | fan9
+ 58 | FANIN10 | 37h | fan10
+ 59 | FANIN11 | 38h | fan11
+ 60 | FANIN12 | 39h | fan12
+ 31 | FANIN13 | 3Ah | fan13
+ 35 | FANIN14 | 3Bh | fan14
+ 41 | FANCTL1 | 10h (bank 2) | pwm1
+ 43 | FANCTL2 | 11h (bank 2) | pwm2
+ 45 | FANCTL3 | 12h (bank 2) | pwm3
+ 47 | FANCTL4 | 13h (bank 2) | pwm4
+ 49 | FANCTL5 | 14h (bank 2) | pwm5
+ 51 | FANCTL6 | 15h (bank 2) | pwm6
+ 53 | FANCTL7 | 16h (bank 2) | pwm7
+ 55 | FANCTL8 | 17h (bank 2) | pwm8
+ 29/ 30 | PECI/TSI (DTS1) | 26h | temp7
+ 29/ 30 | PECI/TSI (DTS2) | 27h | temp8
+ 29/ 30 | PECI/TSI (DTS3) | 28h | temp9
+ 29/ 30 | PECI/TSI (DTS4) | 29h | temp10
+ 29/ 30 | PECI/TSI (DTS5) | 2Ah | temp11
+ 29/ 30 | PECI/TSI (DTS6) | 2Bh | temp12
+ 29/ 30 | PECI/TSI (DTS7) | 2Ch | temp13
+ 29/ 30 | PECI/TSI (DTS8) | 2Dh | temp14
+ 27 | CASEOPEN# | 46h | intrusion0
+
+ W83795ADG |
+ Pin | Name | Register | Sysfs attribute
+------------------------------------------------------------------
+ 10 | VSEN1 (VCORE1) | 10h | in0
+ 11 | VSEN2 (VCORE2) | 11h | in1
+ 12 | VSEN3 (VCORE3) | 12h | in2
+ 13 | VSEN4 | 13h | in3
+ 14 | VSEN5 | 14h | in4
+ 15 | VSEN6 | 15h | in5
+ 16 | VSEN7 | 16h | in6
+ 17 | VSEN8 | 17h | in7
+ 22 | VTT | 1Bh | in11
+ 18 | 3VDD | 1Ch | in12
+ 19 | 3VSB | 1Dh | in13
+ 20 | VBAT | 1Eh | in14
+ 48 | VSEN12/TR5 | 1Fh | in15/temp5
+ 1 | VSEN13/TR5 | 20h | in16/temp6
+ 2/ 3 | VDSEN14/TR1/TD1 | 21h | in17/temp1
+ 4/ 5 | VDSEN15/TR2/TD2 | 22h | in18/temp2
+ 6/ 7 | VDSEN16/TR3/TD3 | 23h | in19/temp3
+ 8/ 9 | VDSEN17/TR4/TD4 | 24h | in20/temp4
+ 32 | FANIN1 | 2Eh | fan1
+ 34 | FANIN2 | 2Fh | fan2
+ 36 | FANIN3 | 30h | fan3
+ 37 | FANIN4 | 31h | fan4
+ 38 | FANIN5 | 32h | fan5
+ 39 | FANIN6 | 33h | fan6
+ 40 | FANIN7 | 34h | fan7
+ 41 | FANIN8 | 35h | fan8
+ 43 | FANIN9 | 36h | fan9
+ 44 | FANIN10 | 37h | fan10
+ 45 | FANIN11 | 38h | fan11
+ 46 | FANIN12 | 39h | fan12
+ 24 | FANIN13 | 3Ah | fan13
+ 28 | FANIN14 | 3Bh | fan14
+ 33 | FANCTL1 | 10h (bank 2) | pwm1
+ 35 | FANCTL2 | 11h (bank 2) | pwm2
+ 23 | PECI (DTS1) | 26h | temp7
+ 23 | PECI (DTS2) | 27h | temp8
+ 23 | PECI (DTS3) | 28h | temp9
+ 23 | PECI (DTS4) | 29h | temp10
+ 23 | PECI (DTS5) | 2Ah | temp11
+ 23 | PECI (DTS6) | 2Bh | temp12
+ 23 | PECI (DTS7) | 2Ch | temp13
+ 23 | PECI (DTS8) | 2Dh | temp14
+ 21 | CASEOPEN# | 46h | intrusion0
diff --git a/Documentation/hwmon/w83l785ts b/Documentation/hwmon/w83l785ts
new file mode 100644
index 000000000..c89784788
--- /dev/null
+++ b/Documentation/hwmon/w83l785ts
@@ -0,0 +1,40 @@
+Kernel driver w83l785ts
+=======================
+
+Supported chips:
+ * Winbond W83L785TS-S
+ Prefix: 'w83l785ts'
+ Addresses scanned: I2C 0x2e
+ Datasheet: Publicly available at the Winbond USA website
+ http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83L785TS-S.pdf
+
+Authors:
+ Jean Delvare <jdelvare@suse.de>
+
+Description
+-----------
+
+The W83L785TS-S is a digital temperature sensor. It senses the
+temperature of a single external diode. The high limit is
+theoretically defined as 85 or 100 degrees C through a combination
+of external resistors, so the user cannot change it. Values seen so
+far suggest that the two possible limits are actually 95 and 110
+degrees C. The datasheet is rather poor and obviously inaccurate
+on several points including this one.
+
+All temperature values are given in degrees Celsius. Resolution
+is 1.0 degree. See the datasheet for details.
+
+The w83l785ts driver will not update its values more frequently than
+every other second; reading them more often will do no harm, but will
+return 'old' values.
+
+Known Issues
+------------
+
+On some systems (Asus), the BIOS is known to interfere with the driver
+and cause read errors. Or maybe the W83L785TS-S chip is simply unreliable,
+we don't really know. The driver will retry a given number of times
+(5 by default) and then give up, returning the old value (or 0 if
+there is no old value). It seems to work well enough so that you should
+not notice anything. Thanks to James Bolt for helping test this feature.
diff --git a/Documentation/hwmon/w83l786ng b/Documentation/hwmon/w83l786ng
new file mode 100644
index 000000000..d8f55d7ff
--- /dev/null
+++ b/Documentation/hwmon/w83l786ng
@@ -0,0 +1,54 @@
+Kernel driver w83l786ng
+=====================
+
+Supported chips:
+ * Winbond W83L786NG/W83L786NR
+ Prefix: 'w83l786ng'
+ Addresses scanned: I2C 0x2e - 0x2f
+ Datasheet: http://www.winbond-usa.com/products/winbond_products/pdfs/PCIC/W83L786NRNG09.pdf
+
+Author: Kevin Lo <kevlo@kevlo.org>
+
+
+Module Parameters
+-----------------
+
+* reset boolean
+ (default 0)
+ Use 'reset=1' to reset the chip (via index 0x40, bit 7). The default
+ behavior is no chip reset to preserve BIOS settings
+
+
+Description
+-----------
+
+This driver implements support for Winbond W83L786NG/W83L786NR chips.
+
+The driver implements two temperature sensors, two fan rotation speed
+sensors, and three voltage sensors.
+
+Temperatures are measured in degrees Celsius and measurement resolution is 1
+degC for temp1 and temp2.
+
+Fan rotation speeds are reported in RPM (rotations per minute). Fan readings
+readings can be divided by a programmable divider (1, 2, 4, 8, 16, 32, 64
+or 128 for fan 1/2) to give the readings more range or accuracy.
+
+Voltage sensors (also known as IN sensors) report their values in millivolts.
+An alarm is triggered if the voltage has crossed a programmable minimum
+or maximum limit.
+
+/sys files
+----------
+
+pwm[1-2] - this file stores PWM duty cycle or DC value (fan speed) in range:
+ 0 (stop) to 255 (full)
+pwm[1-2]_enable - this file controls mode of fan/temperature control:
+ * 0 Manual Mode
+ * 1 Thermal Cruise
+ * 2 Smart Fan II
+ * 4 FAN_SET
+pwm[1-2]_mode - Select PWM of DC mode
+ * 0 DC
+ * 1 PWM
+tolerance[1-2] - Value in degrees of Celsius (degC) for +- T
diff --git a/Documentation/hwmon/wm831x b/Documentation/hwmon/wm831x
new file mode 100644
index 000000000..11446757c
--- /dev/null
+++ b/Documentation/hwmon/wm831x
@@ -0,0 +1,37 @@
+Kernel driver wm831x-hwmon
+==========================
+
+Supported chips:
+ * Wolfson Microelectronics WM831x PMICs
+ Prefix: 'wm831x'
+ Datasheet:
+ http://www.wolfsonmicro.com/products/WM8310
+ http://www.wolfsonmicro.com/products/WM8311
+ http://www.wolfsonmicro.com/products/WM8312
+
+Authors: Mark Brown <broonie@opensource.wolfsonmicro.com>
+
+Description
+-----------
+
+The WM831x series of PMICs include an AUXADC which can be used to
+monitor a range of system operating parameters, including the voltages
+of the major supplies within the system. Currently the driver provides
+reporting of all the input values but does not provide any alarms.
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by a 12 bit ADC. Voltages in millivolts are 1.465
+times the ADC value.
+
+Temperature Monitoring
+----------------------
+
+Temperatures are sampled by a 12 bit ADC. Chip and battery temperatures
+are available. The chip temperature is calculated as:
+
+ Degrees celsius = (512.18 - data) / 1.0983
+
+while the battery temperature calculation will depend on the NTC
+thermistor component.
diff --git a/Documentation/hwmon/wm8350 b/Documentation/hwmon/wm8350
new file mode 100644
index 000000000..98f923bd2
--- /dev/null
+++ b/Documentation/hwmon/wm8350
@@ -0,0 +1,26 @@
+Kernel driver wm8350-hwmon
+==========================
+
+Supported chips:
+ * Wolfson Microelectronics WM835x PMICs
+ Prefix: 'wm8350'
+ Datasheet:
+ http://www.wolfsonmicro.com/products/WM8350
+ http://www.wolfsonmicro.com/products/WM8351
+ http://www.wolfsonmicro.com/products/WM8352
+
+Authors: Mark Brown <broonie@opensource.wolfsonmicro.com>
+
+Description
+-----------
+
+The WM835x series of PMICs include an AUXADC which can be used to
+monitor a range of system operating parameters, including the voltages
+of the major supplies within the system. Currently the driver provides
+simple access to these major supplies.
+
+Voltage Monitoring
+------------------
+
+Voltages are sampled by a 12 bit ADC. For the internal supplies the ADC
+is referenced to the system VRTC.
diff --git a/Documentation/hwmon/zl6100 b/Documentation/hwmon/zl6100
new file mode 100644
index 000000000..33908a4d6
--- /dev/null
+++ b/Documentation/hwmon/zl6100
@@ -0,0 +1,160 @@
+Kernel driver zl6100
+====================
+
+Supported chips:
+ * Intersil / Zilker Labs ZL2004
+ Prefix: 'zl2004'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6847.pdf
+ * Intersil / Zilker Labs ZL2005
+ Prefix: 'zl2005'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6848.pdf
+ * Intersil / Zilker Labs ZL2006
+ Prefix: 'zl2006'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6850.pdf
+ * Intersil / Zilker Labs ZL2008
+ Prefix: 'zl2008'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6859.pdf
+ * Intersil / Zilker Labs ZL2105
+ Prefix: 'zl2105'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6851.pdf
+ * Intersil / Zilker Labs ZL2106
+ Prefix: 'zl2106'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6852.pdf
+ * Intersil / Zilker Labs ZL6100
+ Prefix: 'zl6100'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6876.pdf
+ * Intersil / Zilker Labs ZL6105
+ Prefix: 'zl6105'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn6906.pdf
+ * Intersil / Zilker Labs ZL9101M
+ Prefix: 'zl9101'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn7669.pdf
+ * Intersil / Zilker Labs ZL9117M
+ Prefix: 'zl9117'
+ Addresses scanned: -
+ Datasheet: http://www.intersil.com/data/fn/fn7914.pdf
+ * Ericsson BMR450, BMR451
+ Prefix: 'bmr450', 'bmr451'
+ Addresses scanned: -
+ Datasheet:
+http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146401
+ * Ericsson BMR462, BMR463, BMR464
+ Prefixes: 'bmr462', 'bmr463', 'bmr464'
+ Addresses scanned: -
+ Datasheet:
+http://archive.ericsson.net/service/internet/picov/get?DocNo=28701-EN/LZT146256
+
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver supports hardware montoring for Intersil / Zilker Labs ZL6100 and
+compatible digital DC-DC controllers.
+
+The driver is a client driver to the core PMBus driver. Please see
+Documentation/hwmon/pmbus and Documentation.hwmon/pmbus-core for details
+on PMBus client drivers.
+
+
+Usage Notes
+-----------
+
+This driver does not auto-detect devices. You will have to instantiate the
+devices explicitly. Please see Documentation/i2c/instantiating-devices for
+details.
+
+WARNING: Do not access chip registers using the i2cdump command, and do not use
+any of the i2ctools commands on a command register used to save and restore
+configuration data (0x11, 0x12, 0x15, 0x16, and 0xf4). The chips supported by
+this driver interpret any access to those command registers (including read
+commands) as request to execute the command in question. Unless write accesses
+to those registers are protected, this may result in power loss, board resets,
+and/or Flash corruption. Worst case, your board may turn into a brick.
+
+
+Platform data support
+---------------------
+
+The driver supports standard PMBus driver platform data.
+
+
+Module parameters
+-----------------
+
+delay
+-----
+
+Intersil/Zilker Labs DC-DC controllers require a minimum interval between I2C
+bus accesses. According to Intersil, the minimum interval is 2 ms, though 1 ms
+appears to be sufficient and has not caused any problems in testing. The problem
+is known to affect all currently supported chips. For manual override, the
+driver provides a writeable module parameter, 'delay', which can be used to set
+the interval to a value between 0 and 65,535 microseconds.
+
+
+Sysfs entries
+-------------
+
+The following attributes are supported. Limits are read-write; all other
+attributes are read-only.
+
+in1_label "vin"
+in1_input Measured input voltage.
+in1_min Minimum input voltage.
+in1_max Maximum input voltage.
+in1_lcrit Critical minimum input voltage.
+in1_crit Critical maximum input voltage.
+in1_min_alarm Input voltage low alarm.
+in1_max_alarm Input voltage high alarm.
+in1_lcrit_alarm Input voltage critical low alarm.
+in1_crit_alarm Input voltage critical high alarm.
+
+in2_label "vmon"
+in2_input Measured voltage on VMON (ZL2004) or VDRV (ZL9101M,
+ ZL9117M) pin. Reported voltage is 16x the voltage on the
+ pin (adjusted internally by the chip).
+in2_lcrit Critical minimum VMON/VDRV Voltage.
+in2_crit Critical maximum VMON/VDRV voltage.
+in2_lcrit_alarm VMON/VDRV voltage critical low alarm.
+in2_crit_alarm VMON/VDRV voltage critical high alarm.
+
+ vmon attributes are supported on ZL2004, ZL9101M,
+ and ZL9117M only.
+
+inX_label "vout1"
+inX_input Measured output voltage.
+inX_lcrit Critical minimum output Voltage.
+inX_crit Critical maximum output voltage.
+inX_lcrit_alarm Critical output voltage critical low alarm.
+inX_crit_alarm Critical output voltage critical high alarm.
+
+ X is 3 for ZL2004, ZL9101M, and ZL9117M, 2 otherwise.
+
+curr1_label "iout1"
+curr1_input Measured output current.
+curr1_lcrit Critical minimum output current.
+curr1_crit Critical maximum output current.
+curr1_lcrit_alarm Output current critical low alarm.
+curr1_crit_alarm Output current critical high alarm.
+
+temp[12]_input Measured temperature.
+temp[12]_min Minimum temperature.
+temp[12]_max Maximum temperature.
+temp[12]_lcrit Critical low temperature.
+temp[12]_crit Critical high temperature.
+temp[12]_min_alarm Chip temperature low alarm.
+temp[12]_max_alarm Chip temperature high alarm.
+temp[12]_lcrit_alarm Chip temperature critical low alarm.
+temp[12]_crit_alarm Chip temperature critical high alarm.
diff --git a/Documentation/hwspinlock.txt b/Documentation/hwspinlock.txt
new file mode 100644
index 000000000..62f7d4ea6
--- /dev/null
+++ b/Documentation/hwspinlock.txt
@@ -0,0 +1,307 @@
+Hardware Spinlock Framework
+
+1. Introduction
+
+Hardware spinlock modules provide hardware assistance for synchronization
+and mutual exclusion between heterogeneous processors and those not operating
+under a single, shared operating system.
+
+For example, OMAP4 has dual Cortex-A9, dual Cortex-M3 and a C64x+ DSP,
+each of which is running a different Operating System (the master, A9,
+is usually running Linux and the slave processors, the M3 and the DSP,
+are running some flavor of RTOS).
+
+A generic hwspinlock framework allows platform-independent drivers to use
+the hwspinlock device in order to access data structures that are shared
+between remote processors, that otherwise have no alternative mechanism
+to accomplish synchronization and mutual exclusion operations.
+
+This is necessary, for example, for Inter-processor communications:
+on OMAP4, cpu-intensive multimedia tasks are offloaded by the host to the
+remote M3 and/or C64x+ slave processors (by an IPC subsystem called Syslink).
+
+To achieve fast message-based communications, a minimal kernel support
+is needed to deliver messages arriving from a remote processor to the
+appropriate user process.
+
+This communication is based on simple data structures that is shared between
+the remote processors, and access to it is synchronized using the hwspinlock
+module (remote processor directly places new messages in this shared data
+structure).
+
+A common hwspinlock interface makes it possible to have generic, platform-
+independent, drivers.
+
+2. User API
+
+ struct hwspinlock *hwspin_lock_request(void);
+ - dynamically assign an hwspinlock and return its address, or NULL
+ in case an unused hwspinlock isn't available. Users of this
+ API will usually want to communicate the lock's id to the remote core
+ before it can be used to achieve synchronization.
+ Should be called from a process context (might sleep).
+
+ struct hwspinlock *hwspin_lock_request_specific(unsigned int id);
+ - assign a specific hwspinlock id and return its address, or NULL
+ if that hwspinlock is already in use. Usually board code will
+ be calling this function in order to reserve specific hwspinlock
+ ids for predefined purposes.
+ Should be called from a process context (might sleep).
+
+ int hwspin_lock_free(struct hwspinlock *hwlock);
+ - free a previously-assigned hwspinlock; returns 0 on success, or an
+ appropriate error code on failure (e.g. -EINVAL if the hwspinlock
+ is already free).
+ Should be called from a process context (might sleep).
+
+ int hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int timeout);
+ - lock a previously-assigned hwspinlock with a timeout limit (specified in
+ msecs). If the hwspinlock is already taken, the function will busy loop
+ waiting for it to be released, but give up when the timeout elapses.
+ Upon a successful return from this function, preemption is disabled so
+ the caller must not sleep, and is advised to release the hwspinlock as
+ soon as possible, in order to minimize remote cores polling on the
+ hardware interconnect.
+ Returns 0 when successful and an appropriate error code otherwise (most
+ notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+ The function will never sleep.
+
+ int hwspin_lock_timeout_irq(struct hwspinlock *hwlock, unsigned int timeout);
+ - lock a previously-assigned hwspinlock with a timeout limit (specified in
+ msecs). If the hwspinlock is already taken, the function will busy loop
+ waiting for it to be released, but give up when the timeout elapses.
+ Upon a successful return from this function, preemption and the local
+ interrupts are disabled, so the caller must not sleep, and is advised to
+ release the hwspinlock as soon as possible.
+ Returns 0 when successful and an appropriate error code otherwise (most
+ notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+ The function will never sleep.
+
+ int hwspin_lock_timeout_irqsave(struct hwspinlock *hwlock, unsigned int to,
+ unsigned long *flags);
+ - lock a previously-assigned hwspinlock with a timeout limit (specified in
+ msecs). If the hwspinlock is already taken, the function will busy loop
+ waiting for it to be released, but give up when the timeout elapses.
+ Upon a successful return from this function, preemption is disabled,
+ local interrupts are disabled and their previous state is saved at the
+ given flags placeholder. The caller must not sleep, and is advised to
+ release the hwspinlock as soon as possible.
+ Returns 0 when successful and an appropriate error code otherwise (most
+ notably -ETIMEDOUT if the hwspinlock is still busy after timeout msecs).
+ The function will never sleep.
+
+ int hwspin_trylock(struct hwspinlock *hwlock);
+ - attempt to lock a previously-assigned hwspinlock, but immediately fail if
+ it is already taken.
+ Upon a successful return from this function, preemption is disabled so
+ caller must not sleep, and is advised to release the hwspinlock as soon as
+ possible, in order to minimize remote cores polling on the hardware
+ interconnect.
+ Returns 0 on success and an appropriate error code otherwise (most
+ notably -EBUSY if the hwspinlock was already taken).
+ The function will never sleep.
+
+ int hwspin_trylock_irq(struct hwspinlock *hwlock);
+ - attempt to lock a previously-assigned hwspinlock, but immediately fail if
+ it is already taken.
+ Upon a successful return from this function, preemption and the local
+ interrupts are disabled so caller must not sleep, and is advised to
+ release the hwspinlock as soon as possible.
+ Returns 0 on success and an appropriate error code otherwise (most
+ notably -EBUSY if the hwspinlock was already taken).
+ The function will never sleep.
+
+ int hwspin_trylock_irqsave(struct hwspinlock *hwlock, unsigned long *flags);
+ - attempt to lock a previously-assigned hwspinlock, but immediately fail if
+ it is already taken.
+ Upon a successful return from this function, preemption is disabled,
+ the local interrupts are disabled and their previous state is saved
+ at the given flags placeholder. The caller must not sleep, and is advised
+ to release the hwspinlock as soon as possible.
+ Returns 0 on success and an appropriate error code otherwise (most
+ notably -EBUSY if the hwspinlock was already taken).
+ The function will never sleep.
+
+ void hwspin_unlock(struct hwspinlock *hwlock);
+ - unlock a previously-locked hwspinlock. Always succeed, and can be called
+ from any context (the function never sleeps). Note: code should _never_
+ unlock an hwspinlock which is already unlocked (there is no protection
+ against this).
+
+ void hwspin_unlock_irq(struct hwspinlock *hwlock);
+ - unlock a previously-locked hwspinlock and enable local interrupts.
+ The caller should _never_ unlock an hwspinlock which is already unlocked.
+ Doing so is considered a bug (there is no protection against this).
+ Upon a successful return from this function, preemption and local
+ interrupts are enabled. This function will never sleep.
+
+ void
+ hwspin_unlock_irqrestore(struct hwspinlock *hwlock, unsigned long *flags);
+ - unlock a previously-locked hwspinlock.
+ The caller should _never_ unlock an hwspinlock which is already unlocked.
+ Doing so is considered a bug (there is no protection against this).
+ Upon a successful return from this function, preemption is reenabled,
+ and the state of the local interrupts is restored to the state saved at
+ the given flags. This function will never sleep.
+
+ int hwspin_lock_get_id(struct hwspinlock *hwlock);
+ - retrieve id number of a given hwspinlock. This is needed when an
+ hwspinlock is dynamically assigned: before it can be used to achieve
+ mutual exclusion with a remote cpu, the id number should be communicated
+ to the remote task with which we want to synchronize.
+ Returns the hwspinlock id number, or -EINVAL if hwlock is null.
+
+3. Typical usage
+
+#include <linux/hwspinlock.h>
+#include <linux/err.h>
+
+int hwspinlock_example1(void)
+{
+ struct hwspinlock *hwlock;
+ int ret;
+
+ /* dynamically assign a hwspinlock */
+ hwlock = hwspin_lock_request();
+ if (!hwlock)
+ ...
+
+ id = hwspin_lock_get_id(hwlock);
+ /* probably need to communicate id to a remote processor now */
+
+ /* take the lock, spin for 1 sec if it's already taken */
+ ret = hwspin_lock_timeout(hwlock, 1000);
+ if (ret)
+ ...
+
+ /*
+ * we took the lock, do our thing now, but do NOT sleep
+ */
+
+ /* release the lock */
+ hwspin_unlock(hwlock);
+
+ /* free the lock */
+ ret = hwspin_lock_free(hwlock);
+ if (ret)
+ ...
+
+ return ret;
+}
+
+int hwspinlock_example2(void)
+{
+ struct hwspinlock *hwlock;
+ int ret;
+
+ /*
+ * assign a specific hwspinlock id - this should be called early
+ * by board init code.
+ */
+ hwlock = hwspin_lock_request_specific(PREDEFINED_LOCK_ID);
+ if (!hwlock)
+ ...
+
+ /* try to take it, but don't spin on it */
+ ret = hwspin_trylock(hwlock);
+ if (!ret) {
+ pr_info("lock is already taken\n");
+ return -EBUSY;
+ }
+
+ /*
+ * we took the lock, do our thing now, but do NOT sleep
+ */
+
+ /* release the lock */
+ hwspin_unlock(hwlock);
+
+ /* free the lock */
+ ret = hwspin_lock_free(hwlock);
+ if (ret)
+ ...
+
+ return ret;
+}
+
+
+4. API for implementors
+
+ int hwspin_lock_register(struct hwspinlock_device *bank, struct device *dev,
+ const struct hwspinlock_ops *ops, int base_id, int num_locks);
+ - to be called from the underlying platform-specific implementation, in
+ order to register a new hwspinlock device (which is usually a bank of
+ numerous locks). Should be called from a process context (this function
+ might sleep).
+ Returns 0 on success, or appropriate error code on failure.
+
+ int hwspin_lock_unregister(struct hwspinlock_device *bank);
+ - to be called from the underlying vendor-specific implementation, in order
+ to unregister an hwspinlock device (which is usually a bank of numerous
+ locks).
+ Should be called from a process context (this function might sleep).
+ Returns the address of hwspinlock on success, or NULL on error (e.g.
+ if the hwspinlock is still in use).
+
+5. Important structs
+
+struct hwspinlock_device is a device which usually contains a bank
+of hardware locks. It is registered by the underlying hwspinlock
+implementation using the hwspin_lock_register() API.
+
+/**
+ * struct hwspinlock_device - a device which usually spans numerous hwspinlocks
+ * @dev: underlying device, will be used to invoke runtime PM api
+ * @ops: platform-specific hwspinlock handlers
+ * @base_id: id index of the first lock in this device
+ * @num_locks: number of locks in this device
+ * @lock: dynamically allocated array of 'struct hwspinlock'
+ */
+struct hwspinlock_device {
+ struct device *dev;
+ const struct hwspinlock_ops *ops;
+ int base_id;
+ int num_locks;
+ struct hwspinlock lock[0];
+};
+
+struct hwspinlock_device contains an array of hwspinlock structs, each
+of which represents a single hardware lock:
+
+/**
+ * struct hwspinlock - this struct represents a single hwspinlock instance
+ * @bank: the hwspinlock_device structure which owns this lock
+ * @lock: initialized and used by hwspinlock core
+ * @priv: private data, owned by the underlying platform-specific hwspinlock drv
+ */
+struct hwspinlock {
+ struct hwspinlock_device *bank;
+ spinlock_t lock;
+ void *priv;
+};
+
+When registering a bank of locks, the hwspinlock driver only needs to
+set the priv members of the locks. The rest of the members are set and
+initialized by the hwspinlock core itself.
+
+6. Implementation callbacks
+
+There are three possible callbacks defined in 'struct hwspinlock_ops':
+
+struct hwspinlock_ops {
+ int (*trylock)(struct hwspinlock *lock);
+ void (*unlock)(struct hwspinlock *lock);
+ void (*relax)(struct hwspinlock *lock);
+};
+
+The first two callbacks are mandatory:
+
+The ->trylock() callback should make a single attempt to take the lock, and
+return 0 on failure and 1 on success. This callback may _not_ sleep.
+
+The ->unlock() callback releases the lock. It always succeed, and it, too,
+may _not_ sleep.
+
+The ->relax() callback is optional. It is called by hwspinlock core while
+spinning on a lock, and can be used by the underlying implementation to force
+a delay between two successive invocations of ->trylock(). It may _not_ sleep.
diff --git a/Documentation/i2c/busses/i2c-ali1535 b/Documentation/i2c/busses/i2c-ali1535
new file mode 100644
index 000000000..5d46342e4
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ali1535
@@ -0,0 +1,42 @@
+Kernel driver i2c-ali1535
+
+Supported adapters:
+ * Acer Labs, Inc. ALI 1535 (south bridge)
+ Datasheet: Now under NDA
+ http://www.ali.com.tw/
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Mark D. Studebaker <mdsxyz123@yahoo.com>,
+ Dan Eaton <dan.eaton@rocketlogix.com>,
+ Stephen Rousset<stephen.rousset@rocketlogix.com>
+
+Description
+-----------
+
+This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
+M1535 South Bridge.
+
+The M1535 is a South bridge for portable systems. It is very similar to the
+M15x3 South bridges also produced by Acer Labs Inc. Some of the registers
+within the part have moved and some have been redefined slightly.
+Additionally, the sequencing of the SMBus transactions has been modified to
+be more consistent with the sequencing recommended by the manufacturer and
+observed through testing. These changes are reflected in this driver and
+can be identified by comparing this driver to the i2c-ali15x3 driver. For
+an overview of these chips see http://www.acerlabs.com
+
+The SMB controller is part of the M7101 device, which is an ACPI-compliant
+Power Management Unit (PMU).
+
+The whole M7101 device has to be enabled for the SMB to work. You can't
+just enable the SMB alone. The SMB and the ACPI have separate I/O spaces.
+We make sure that the SMB is enabled. We leave the ACPI alone.
+
+
+Features
+--------
+
+This driver controls the SMB Host only. This driver does not use
+interrupts.
diff --git a/Documentation/i2c/busses/i2c-ali1563 b/Documentation/i2c/busses/i2c-ali1563
new file mode 100644
index 000000000..41b1a077e
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ali1563
@@ -0,0 +1,27 @@
+Kernel driver i2c-ali1563
+
+Supported adapters:
+ * Acer Labs, Inc. ALI 1563 (south bridge)
+ Datasheet: Now under NDA
+ http://www.ali.com.tw/
+
+Author: Patrick Mochel <mochel@digitalimplant.org>
+
+Description
+-----------
+
+This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
+M1563 South Bridge.
+
+For an overview of these chips see http://www.acerlabs.com
+
+The M1563 southbridge is deceptively similar to the M1533, with a few
+notable exceptions. One of those happens to be the fact they upgraded the
+i2c core to be SMBus 2.0 compliant, and happens to be almost identical to
+the i2c controller found in the Intel 801 south bridges.
+
+Features
+--------
+
+This driver controls the SMB Host only. This driver does not use
+interrupts.
diff --git a/Documentation/i2c/busses/i2c-ali15x3 b/Documentation/i2c/busses/i2c-ali15x3
new file mode 100644
index 000000000..42888d8ac
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ali15x3
@@ -0,0 +1,112 @@
+Kernel driver i2c-ali15x3
+
+Supported adapters:
+ * Acer Labs, Inc. ALI 1533 and 1543C (south bridge)
+ Datasheet: Now under NDA
+ http://www.ali.com.tw/
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Mark D. Studebaker <mdsxyz123@yahoo.com>
+
+Module Parameters
+-----------------
+
+* force_addr: int
+ Initialize the base address of the i2c controller
+
+
+Notes
+-----
+
+The force_addr parameter is useful for boards that don't set the address in
+the BIOS. Does not do a PCI force; the device must still be present in
+lspci. Don't use this unless the driver complains that the base address is
+not set.
+
+Example: 'modprobe i2c-ali15x3 force_addr=0xe800'
+
+SMBus periodically hangs on ASUS P5A motherboards and can only be cleared
+by a power cycle. Cause unknown (see Issues below).
+
+
+Description
+-----------
+
+This is the driver for the SMB Host controller on Acer Labs Inc. (ALI)
+M1541 and M1543C South Bridges.
+
+The M1543C is a South bridge for desktop systems.
+The M1541 is a South bridge for portable systems.
+They are part of the following ALI chipsets:
+
+ * "Aladdin Pro 2" includes the M1621 Slot 1 North bridge with AGP and
+ 100MHz CPU Front Side bus
+ * "Aladdin V" includes the M1541 Socket 7 North bridge with AGP and 100MHz
+ CPU Front Side bus
+ Some Aladdin V motherboards:
+ Asus P5A
+ Atrend ATC-5220
+ BCM/GVC VP1541
+ Biostar M5ALA
+ Gigabyte GA-5AX (** Generally doesn't work because the BIOS doesn't
+ enable the 7101 device! **)
+ Iwill XA100 Plus
+ Micronics C200
+ Microstar (MSI) MS-5169
+
+ * "Aladdin IV" includes the M1541 Socket 7 North bridge
+ with host bus up to 83.3 MHz.
+
+For an overview of these chips see http://www.acerlabs.com. At this time the
+full data sheets on the web site are password protected, however if you
+contact the ALI office in San Jose they may give you the password.
+
+The M1533/M1543C devices appear as FOUR separate devices on the PCI bus. An
+output of lspci will show something similar to the following:
+
+ 00:02.0 USB Controller: Acer Laboratories Inc. M5237 (rev 03)
+ 00:03.0 Bridge: Acer Laboratories Inc. M7101 <= THIS IS THE ONE WE NEED
+ 00:07.0 ISA bridge: Acer Laboratories Inc. M1533 (rev c3)
+ 00:0f.0 IDE interface: Acer Laboratories Inc. M5229 (rev c1)
+
+** IMPORTANT **
+** If you have a M1533 or M1543C on the board and you get
+** "ali15x3: Error: Can't detect ali15x3!"
+** then run lspci.
+** If you see the 1533 and 5229 devices but NOT the 7101 device,
+** then you must enable ACPI, the PMU, SMB, or something similar
+** in the BIOS.
+** The driver won't work if it can't find the M7101 device.
+
+The SMB controller is part of the M7101 device, which is an ACPI-compliant
+Power Management Unit (PMU).
+
+The whole M7101 device has to be enabled for the SMB to work. You can't
+just enable the SMB alone. The SMB and the ACPI have separate I/O spaces.
+We make sure that the SMB is enabled. We leave the ACPI alone.
+
+Features
+--------
+
+This driver controls the SMB Host only. The SMB Slave
+controller on the M15X3 is not enabled. This driver does not use
+interrupts.
+
+
+Issues
+------
+
+This driver requests the I/O space for only the SMB
+registers. It doesn't use the ACPI region.
+
+On the ASUS P5A motherboard, there are several reports that
+the SMBus will hang and this can only be resolved by
+powering off the computer. It appears to be worse when the board
+gets hot, for example under heavy CPU load, or in the summer.
+There may be electrical problems on this board.
+On the P5A, the W83781D sensor chip is on both the ISA and
+SMBus. Therefore the SMBus hangs can generally be avoided
+by accessing the W83781D on the ISA bus only.
+
diff --git a/Documentation/i2c/busses/i2c-amd756 b/Documentation/i2c/busses/i2c-amd756
new file mode 100644
index 000000000..67f30874d
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-amd756
@@ -0,0 +1,25 @@
+Kernel driver i2c-amd756
+
+Supported adapters:
+ * AMD 756
+ * AMD 766
+ * AMD 768
+ * AMD 8111
+ Datasheets: Publicly available on AMD website
+
+ * nVidia nForce
+ Datasheet: Unavailable
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>
+
+Description
+-----------
+
+This driver supports the AMD 756, 766, 768 and 8111 Peripheral Bus
+Controllers, and the nVidia nForce.
+
+Note that for the 8111, there are two SMBus adapters. The SMBus 1.0 adapter
+is supported by this driver, and the SMBus 2.0 adapter is supported by the
+i2c-amd8111 driver.
diff --git a/Documentation/i2c/busses/i2c-amd8111 b/Documentation/i2c/busses/i2c-amd8111
new file mode 100644
index 000000000..460dd6635
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-amd8111
@@ -0,0 +1,41 @@
+Kernel driver i2c-adm8111
+
+Supported adapters:
+ * AMD-8111 SMBus 2.0 PCI interface
+
+Datasheets:
+ AMD datasheet not yet available, but almost everything can be found
+ in the publicly available ACPI 2.0 specification, which the adapter
+ follows.
+
+Author: Vojtech Pavlik <vojtech@suse.cz>
+
+Description
+-----------
+
+If you see something like this:
+
+00:07.2 SMBus: Advanced Micro Devices [AMD] AMD-8111 SMBus 2.0 (rev 02)
+ Subsystem: Advanced Micro Devices [AMD] AMD-8111 SMBus 2.0
+ Flags: medium devsel, IRQ 19
+ I/O ports at d400 [size=32]
+
+in your 'lspci -v', then this driver is for your chipset.
+
+Process Call Support
+--------------------
+
+Supported.
+
+SMBus 2.0 Support
+-----------------
+
+Supported. Both PEC and block process call support is implemented. Slave
+mode or host notification are not yet implemented.
+
+Notes
+-----
+
+Note that for the 8111, there are two SMBus adapters. The SMBus 2.0 adapter
+is supported by this driver, and the SMBus 1.0 adapter is supported by the
+i2c-amd756 driver.
diff --git a/Documentation/i2c/busses/i2c-diolan-u2c b/Documentation/i2c/busses/i2c-diolan-u2c
new file mode 100644
index 000000000..0d6018c31
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-diolan-u2c
@@ -0,0 +1,26 @@
+Kernel driver i2c-diolan-u2c
+
+Supported adapters:
+ * Diolan U2C-12 I2C-USB adapter
+ Documentation:
+ http://www.diolan.com/i2c/u2c12.html
+
+Author: Guenter Roeck <linux@roeck-us.net>
+
+Description
+-----------
+
+This is the driver for the Diolan U2C-12 USB-I2C adapter.
+
+The Diolan U2C-12 I2C-USB Adapter provides a low cost solution to connect
+a computer to I2C slave devices using a USB interface. It also supports
+connectivity to SPI devices.
+
+This driver only supports the I2C interface of U2C-12. The driver does not use
+interrupts.
+
+
+Module parameters
+-----------------
+
+* frequency: I2C bus frequency
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
new file mode 100644
index 000000000..82f48f774
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-i801
@@ -0,0 +1,164 @@
+Kernel driver i2c-i801
+
+Supported adapters:
+ * Intel 82801AA and 82801AB (ICH and ICH0 - part of the
+ '810' and '810E' chipsets)
+ * Intel 82801BA (ICH2 - part of the '815E' chipset)
+ * Intel 82801CA/CAM (ICH3)
+ * Intel 82801DB (ICH4) (HW PEC supported)
+ * Intel 82801EB/ER (ICH5) (HW PEC supported)
+ * Intel 6300ESB
+ * Intel 82801FB/FR/FW/FRW (ICH6)
+ * Intel 82801G (ICH7)
+ * Intel 631xESB/632xESB (ESB2)
+ * Intel 82801H (ICH8)
+ * Intel 82801I (ICH9)
+ * Intel EP80579 (Tolapai)
+ * Intel 82801JI (ICH10)
+ * Intel 5/3400 Series (PCH)
+ * Intel 6 Series (PCH)
+ * Intel Patsburg (PCH)
+ * Intel DH89xxCC (PCH)
+ * Intel Panther Point (PCH)
+ * Intel Lynx Point (PCH)
+ * Intel Lynx Point-LP (PCH)
+ * Intel Avoton (SOC)
+ * Intel Wellsburg (PCH)
+ * Intel Coleto Creek (PCH)
+ * Intel Wildcat Point (PCH)
+ * Intel Wildcat Point-LP (PCH)
+ * Intel BayTrail (SOC)
+ * Intel Sunrise Point-H (PCH)
+ * Intel Sunrise Point-LP (PCH)
+ Datasheets: Publicly available at the Intel website
+
+On Intel Patsburg and later chipsets, both the normal host SMBus controller
+and the additional 'Integrated Device Function' controllers are supported.
+
+Authors:
+ Mark Studebaker <mdsxyz123@yahoo.com>
+ Jean Delvare <jdelvare@suse.de>
+
+
+Module Parameters
+-----------------
+
+* disable_features (bit vector)
+Disable selected features normally supported by the device. This makes it
+possible to work around possible driver or hardware bugs if the feature in
+question doesn't work as intended for whatever reason. Bit values:
+ 0x01 disable SMBus PEC
+ 0x02 disable the block buffer
+ 0x08 disable the I2C block read functionality
+ 0x10 don't use interrupts
+
+
+Description
+-----------
+
+The ICH (properly known as the 82801AA), ICH0 (82801AB), ICH2 (82801BA),
+ICH3 (82801CA/CAM) and later devices (PCH) are Intel chips that are a part of
+Intel's '810' chipset for Celeron-based PCs, '810E' chipset for
+Pentium-based PCs, '815E' chipset, and others.
+
+The ICH chips contain at least SEVEN separate PCI functions in TWO logical
+PCI devices. An output of lspci will show something similar to the
+following:
+
+ 00:1e.0 PCI bridge: Intel Corporation: Unknown device 2418 (rev 01)
+ 00:1f.0 ISA bridge: Intel Corporation: Unknown device 2410 (rev 01)
+ 00:1f.1 IDE interface: Intel Corporation: Unknown device 2411 (rev 01)
+ 00:1f.2 USB Controller: Intel Corporation: Unknown device 2412 (rev 01)
+ 00:1f.3 Unknown class [0c05]: Intel Corporation: Unknown device 2413 (rev 01)
+
+The SMBus controller is function 3 in device 1f. Class 0c05 is SMBus Serial
+Controller.
+
+The ICH chips are quite similar to Intel's PIIX4 chip, at least in the
+SMBus controller.
+
+
+Process Call Support
+--------------------
+
+Not supported.
+
+
+I2C Block Read Support
+----------------------
+
+I2C block read is supported on the 82801EB (ICH5) and later chips.
+
+
+SMBus 2.0 Support
+-----------------
+
+The 82801DB (ICH4) and later chips support several SMBus 2.0 features.
+
+
+Interrupt Support
+-----------------
+
+PCI interrupt support is supported on the 82801EB (ICH5) and later chips.
+
+
+Hidden ICH SMBus
+----------------
+
+If your system has an Intel ICH south bridge, but you do NOT see the
+SMBus device at 00:1f.3 in lspci, and you can't figure out any way in the
+BIOS to enable it, it means it has been hidden by the BIOS code. Asus is
+well known for first doing this on their P4B motherboard, and many other
+boards after that. Some vendor machines are affected as well.
+
+The first thing to try is the "i2c_ec" ACPI driver. It could be that the
+SMBus was hidden on purpose because it'll be driven by ACPI. If the
+i2c_ec driver works for you, just forget about the i2c-i801 driver and
+don't try to unhide the ICH SMBus. Even if i2c_ec doesn't work, you
+better make sure that the SMBus isn't used by the ACPI code. Try loading
+the "fan" and "thermal" drivers, and check in /proc/acpi/fan and
+/proc/acpi/thermal_zone. If you find anything there, it's likely that
+the ACPI is accessing the SMBus and it's safer not to unhide it. Only
+once you are certain that ACPI isn't using the SMBus, you can attempt
+to unhide it.
+
+In order to unhide the SMBus, we need to change the value of a PCI
+register before the kernel enumerates the PCI devices. This is done in
+drivers/pci/quirks.c, where all affected boards must be listed (see
+function asus_hides_smbus_hostbridge.) If the SMBus device is missing,
+and you think there's something interesting on the SMBus (e.g. a
+hardware monitoring chip), you need to add your board to the list.
+
+The motherboard is identified using the subvendor and subdevice IDs of the
+host bridge PCI device. Get yours with "lspci -n -v -s 00:00.0":
+
+00:00.0 Class 0600: 8086:2570 (rev 02)
+ Subsystem: 1043:80f2
+ Flags: bus master, fast devsel, latency 0
+ Memory at fc000000 (32-bit, prefetchable) [size=32M]
+ Capabilities: [e4] #09 [2106]
+ Capabilities: [a0] AGP version 3.0
+
+Here the host bridge ID is 2570 (82865G/PE/P), the subvendor ID is 1043
+(Asus) and the subdevice ID is 80f2 (P4P800-X). You can find the symbolic
+names for the bridge ID and the subvendor ID in include/linux/pci_ids.h,
+and then add a case for your subdevice ID at the right place in
+drivers/pci/quirks.c. Then please give it very good testing, to make sure
+that the unhidden SMBus doesn't conflict with e.g. ACPI.
+
+If it works, proves useful (i.e. there are usable chips on the SMBus)
+and seems safe, please submit a patch for inclusion into the kernel.
+
+Note: There's a useful script in lm_sensors 2.10.2 and later, named
+unhide_ICH_SMBus (in prog/hotplug), which uses the fakephp driver to
+temporarily unhide the SMBus without having to patch and recompile your
+kernel. It's very convenient if you just want to check if there's
+anything interesting on your hidden ICH SMBus.
+
+
+**********************
+The lm_sensors project gratefully acknowledges the support of Texas
+Instruments in the initial development of this driver.
+
+The lm_sensors project gratefully acknowledges the support of Intel in the
+development of SMBus 2.0 / ICH4 features of this driver.
diff --git a/Documentation/i2c/busses/i2c-ismt b/Documentation/i2c/busses/i2c-ismt
new file mode 100644
index 000000000..737355822
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ismt
@@ -0,0 +1,36 @@
+Kernel driver i2c-ismt
+
+Supported adapters:
+ * Intel S12xx series SOCs
+
+Authors:
+ Bill Brown <bill.e.brown@intel.com>
+
+
+Module Parameters
+-----------------
+
+* bus_speed (unsigned int)
+Allows changing of the bus speed. Normally, the bus speed is set by the BIOS
+and never needs to be changed. However, some SMBus analyzers are too slow for
+monitoring the bus during debug, thus the need for this module parameter.
+Specify the bus speed in kHz.
+Available bus frequency settings:
+ 0 no change
+ 80 kHz
+ 100 kHz
+ 400 kHz
+ 1000 kHz
+
+
+Description
+-----------
+
+The S12xx series of SOCs have a pair of integrated SMBus 2.0 controllers
+targeted primarily at the microserver and storage markets.
+
+The S12xx series contain a pair of PCI functions. An output of lspci will show
+something similar to the following:
+
+ 00:13.0 System peripheral: Intel Corporation Centerton SMBus 2.0 Controller 0
+ 00:13.1 System peripheral: Intel Corporation Centerton SMBus 2.0 Controller 1
diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
new file mode 100644
index 000000000..9698c396b
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-nforce2
@@ -0,0 +1,50 @@
+Kernel driver i2c-nforce2
+
+Supported adapters:
+ * nForce2 MCP 10de:0064
+ * nForce2 Ultra 400 MCP 10de:0084
+ * nForce3 Pro150 MCP 10de:00D4
+ * nForce3 250Gb MCP 10de:00E4
+ * nForce4 MCP 10de:0052
+ * nForce4 MCP-04 10de:0034
+ * nForce MCP51 10de:0264
+ * nForce MCP55 10de:0368
+ * nForce MCP61 10de:03EB
+ * nForce MCP65 10de:0446
+ * nForce MCP67 10de:0542
+ * nForce MCP73 10de:07D8
+ * nForce MCP78S 10de:0752
+ * nForce MCP79 10de:0AA2
+
+Datasheet: not publicly available, but seems to be similar to the
+ AMD-8111 SMBus 2.0 adapter.
+
+Authors:
+ Hans-Frieder Vogt <hfvogt@gmx.net>,
+ Thomas Leibold <thomas@plx.com>,
+ Patrick Dreker <patrick@dreker.de>
+
+Description
+-----------
+
+i2c-nforce2 is a driver for the SMBuses included in the nVidia nForce2 MCP.
+
+If your 'lspci -v' listing shows something like the following,
+
+00:01.1 SMBus: nVidia Corporation: Unknown device 0064 (rev a2)
+ Subsystem: Asustek Computer, Inc.: Unknown device 0c11
+ Flags: 66Mhz, fast devsel, IRQ 5
+ I/O ports at c000 [size=32]
+ Capabilities: <available only to root>
+
+then this driver should support the SMBuses of your motherboard.
+
+
+Notes
+-----
+
+The SMBus adapter in the nForce2 chipset seems to be very similar to the
+SMBus 2.0 adapter in the AMD-8111 south bridge. However, I could only get
+the driver to work with direct I/O access, which is different to the EC
+interface of the AMD-8111. Tested on Asus A7N8X. The ACPI DSDT table of the
+Asus A7N8X lists two SMBuses, both of which are supported by this driver.
diff --git a/Documentation/i2c/busses/i2c-ocores b/Documentation/i2c/busses/i2c-ocores
new file mode 100644
index 000000000..c269aaa2f
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-ocores
@@ -0,0 +1,68 @@
+Kernel driver i2c-ocores
+
+Supported adapters:
+ * OpenCores.org I2C controller by Richard Herveille (see datasheet link)
+ Datasheet: http://www.opencores.org/projects.cgi/web/i2c/overview
+
+Author: Peter Korsgaard <jacmet@sunsite.dk>
+
+Description
+-----------
+
+i2c-ocores is an i2c bus driver for the OpenCores.org I2C controller
+IP core by Richard Herveille.
+
+Usage
+-----
+
+i2c-ocores uses the platform bus, so you need to provide a struct
+platform_device with the base address and interrupt number. The
+dev.platform_data of the device should also point to a struct
+ocores_i2c_platform_data (see linux/i2c-ocores.h) describing the
+distance between registers and the input clock speed.
+There is also a possibility to attach a list of i2c_board_info which
+the i2c-ocores driver will add to the bus upon creation.
+
+E.G. something like:
+
+static struct resource ocores_resources[] = {
+ [0] = {
+ .start = MYI2C_BASEADDR,
+ .end = MYI2C_BASEADDR + 8,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = MYI2C_IRQ,
+ .end = MYI2C_IRQ,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+/* optional board info */
+struct i2c_board_info ocores_i2c_board_info[] = {
+ {
+ I2C_BOARD_INFO("tsc2003", 0x48),
+ .platform_data = &tsc2003_platform_data,
+ .irq = TSC_IRQ
+ },
+ {
+ I2C_BOARD_INFO("adv7180", 0x42 >> 1),
+ .irq = ADV_IRQ
+ }
+};
+
+static struct ocores_i2c_platform_data myi2c_data = {
+ .regstep = 2, /* two bytes between registers */
+ .clock_khz = 50000, /* input clock of 50MHz */
+ .devices = ocores_i2c_board_info, /* optional table of devices */
+ .num_devices = ARRAY_SIZE(ocores_i2c_board_info), /* table size */
+};
+
+static struct platform_device myi2c = {
+ .name = "ocores-i2c",
+ .dev = {
+ .platform_data = &myi2c_data,
+ },
+ .num_resources = ARRAY_SIZE(ocores_resources),
+ .resource = ocores_resources,
+};
diff --git a/Documentation/i2c/busses/i2c-parport b/Documentation/i2c/busses/i2c-parport
new file mode 100644
index 000000000..0e2d17b46
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-parport
@@ -0,0 +1,177 @@
+Kernel driver i2c-parport
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+This is a unified driver for several i2c-over-parallel-port adapters,
+such as the ones made by Philips, Velleman or ELV. This driver is
+meant as a replacement for the older, individual drivers:
+ * i2c-philips-par
+ * i2c-elv
+ * i2c-velleman
+ * video/i2c-parport (NOT the same as this one, dedicated to home brew
+ teletext adapters)
+
+It currently supports the following devices:
+ * (type=0) Philips adapter
+ * (type=1) home brew teletext adapter
+ * (type=2) Velleman K8000 adapter
+ * (type=3) ELV adapter
+ * (type=4) Analog Devices ADM1032 evaluation board
+ * (type=5) Analog Devices evaluation boards: ADM1025, ADM1030, ADM1031
+ * (type=6) Barco LPT->DVI (K5800236) adapter
+ * (type=7) One For All JP1 parallel port adapter
+
+These devices use different pinout configurations, so you have to tell
+the driver what you have, using the type module parameter. There is no
+way to autodetect the devices. Support for different pinout configurations
+can be easily added when needed.
+
+Earlier kernels defaulted to type=0 (Philips). But now, if the type
+parameter is missing, the driver will simply fail to initialize.
+
+SMBus alert support is available on adapters which have this line properly
+connected to the parallel port's interrupt pin.
+
+
+Building your own adapter
+-------------------------
+
+If you want to build you own i2c-over-parallel-port adapter, here is
+a sample electronics schema (credits go to Sylvain Munaut):
+
+Device PC
+Side ___________________Vdd (+) Side
+ | | |
+ --- --- ---
+ | | | | | |
+ |R| |R| |R|
+ | | | | | |
+ --- --- ---
+ | | |
+ | | /| |
+SCL ----------x--------o |-----------x------------------- pin 2
+ | \| | |
+ | | |
+ | |\ | |
+SDA ----------x----x---| o---x--------------------------- pin 13
+ | |/ |
+ | |
+ | /| |
+ ---------o |----------------x-------------- pin 3
+ \| | |
+ | |
+ --- ---
+ | | | |
+ |R| |R|
+ | | | |
+ --- ---
+ | |
+ ### ###
+ GND GND
+
+Remarks:
+ - This is the exact pinout and electronics used on the Analog Devices
+ evaluation boards.
+ /|
+ - All inverters -o |- must be 74HC05, they must be open collector output.
+ \|
+ - All resitors are 10k.
+ - Pins 18-25 of the parallel port connected to GND.
+ - Pins 4-9 (D2-D7) could be used as VDD is the driver drives them high.
+ The ADM1032 evaluation board uses D4-D7. Beware that the amount of
+ current you can draw from the parallel port is limited. Also note that
+ all connected lines MUST BE driven at the same state, else you'll short
+ circuit the output buffers! So plugging the I2C adapter after loading
+ the i2c-parport module might be a good safety since data line state
+ prior to init may be unknown.
+ - This is 5V!
+ - Obviously you cannot read SCL (so it's not really standard-compliant).
+ Pretty easy to add, just copy the SDA part and use another input pin.
+ That would give (ELV compatible pinout):
+
+
+Device PC
+Side ______________________________Vdd (+) Side
+ | | | |
+ --- --- --- ---
+ | | | | | | | |
+ |R| |R| |R| |R|
+ | | | | | | | |
+ --- --- --- ---
+ | | | |
+ | | |\ | |
+SCL ----------x--------x--| o---x------------------------ pin 15
+ | | |/ |
+ | | |
+ | | /| |
+ | ---o |-------------x-------------- pin 2
+ | \| | |
+ | | |
+ | | |
+ | |\ | |
+SDA ---------------x---x--| o--------x------------------- pin 10
+ | |/ |
+ | |
+ | /| |
+ ---o |------------------x--------- pin 3
+ \| | |
+ | |
+ --- ---
+ | | | |
+ |R| |R|
+ | | | |
+ --- ---
+ | |
+ ### ###
+ GND GND
+
+
+If possible, you should use the same pinout configuration as existing
+adapters do, so you won't even have to change the code.
+
+
+Similar (but different) drivers
+-------------------------------
+
+This driver is NOT the same as the i2c-pport driver found in the i2c
+package. The i2c-pport driver makes use of modern parallel port features so
+that you don't need additional electronics. It has other restrictions
+however, and was not ported to Linux 2.6 (yet).
+
+This driver is also NOT the same as the i2c-pcf-epp driver found in the
+lm_sensors package. The i2c-pcf-epp driver doesn't use the parallel port as
+an I2C bus directly. Instead, it uses it to control an external I2C bus
+master. That driver was not ported to Linux 2.6 (yet) either.
+
+
+Legacy documentation for Velleman adapter
+-----------------------------------------
+
+Useful links:
+Velleman http://www.velleman.be/
+Velleman K8000 Howto http://howto.htlw16.ac.at/k8000-howto.html
+
+The project has lead to new libs for the Velleman K8000 and K8005:
+ LIBK8000 v1.99.1 and LIBK8005 v0.21
+With these libs, you can control the K8000 interface card and the K8005
+stepper motor card with the simple commands which are in the original
+Velleman software, like SetIOchannel, ReadADchannel, SendStepCCWFull and
+many more, using /dev/velleman.
+ http://home.wanadoo.nl/hihihi/libk8000.htm
+ http://home.wanadoo.nl/hihihi/libk8005.htm
+ http://struyve.mine.nu:8080/index.php?block=k8000
+ http://sourceforge.net/projects/libk8005/
+
+
+One For All JP1 parallel port adapter
+-------------------------------------
+
+The JP1 project revolves around a set of remote controls which expose
+the I2C bus their internal configuration EEPROM lives on via a 6 pin
+jumper in the battery compartment. More details can be found at:
+
+http://www.hifi-remote.com/jp1/
+
+Details of the simple parallel port hardware can be found at:
+
+http://www.hifi-remote.com/jp1/hardware.shtml
diff --git a/Documentation/i2c/busses/i2c-parport-light b/Documentation/i2c/busses/i2c-parport-light
new file mode 100644
index 000000000..7071b8ba0
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-parport-light
@@ -0,0 +1,22 @@
+Kernel driver i2c-parport-light
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+This driver is a light version of i2c-parport. It doesn't depend
+on the parport driver, and uses direct I/O access instead. This might be
+preferred on embedded systems where wasting memory for the clean but heavy
+parport handling is not an option. The drawback is a reduced portability
+and the impossibility to daisy-chain other parallel port devices.
+
+Please see i2c-parport for documentation.
+
+Module parameters:
+
+* type: type of adapter (see i2c-parport or modinfo)
+
+* base: base I/O address
+ Default is 0x378 which is fairly common for parallel ports, at least on PC.
+
+* irq: optional IRQ
+ This must be passed if you want SMBus alert support, assuming your adapter
+ actually supports this.
diff --git a/Documentation/i2c/busses/i2c-pca-isa b/Documentation/i2c/busses/i2c-pca-isa
new file mode 100644
index 000000000..b044e5265
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-pca-isa
@@ -0,0 +1,23 @@
+Kernel driver i2c-pca-isa
+
+Supported adapters:
+This driver supports ISA boards using the Philips PCA 9564
+Parallel bus to I2C bus controller
+
+Author: Ian Campbell <icampbell@arcom.com>, Arcom Control Systems
+
+Module Parameters
+-----------------
+
+* base int
+ I/O base address
+* irq int
+ IRQ interrupt
+* clock int
+ Clock rate as described in table 1 of PCA9564 datasheet
+
+Description
+-----------
+
+This driver supports ISA boards using the Philips PCA 9564
+Parallel bus to I2C bus controller
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
new file mode 100644
index 000000000..aa959fd22
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -0,0 +1,110 @@
+Kernel driver i2c-piix4
+
+Supported adapters:
+ * Intel 82371AB PIIX4 and PIIX4E
+ * Intel 82443MX (440MX)
+ Datasheet: Publicly available at the Intel website
+ * ServerWorks OSB4, CSB5, CSB6, HT-1000 and HT-1100 southbridges
+ Datasheet: Only available via NDA from ServerWorks
+ * ATI IXP200, IXP300, IXP400, SB600, SB700 and SB800 southbridges
+ Datasheet: Not publicly available
+ SB700 register reference available at:
+ http://support.amd.com/us/Embedded_TechDocs/43009_sb7xx_rrg_pub_1.00.pdf
+ * AMD SP5100 (SB700 derivative found on some server mainboards)
+ Datasheet: Publicly available at the AMD website
+ http://support.amd.com/us/Embedded_TechDocs/44413.pdf
+ * AMD Hudson-2, ML, CZ
+ Datasheet: Not publicly available
+ * Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
+ Datasheet: Publicly available at the SMSC website http://www.smsc.com
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>
+ Philip Edelbrock <phil@netroedge.com>
+
+
+Module Parameters
+-----------------
+
+* force: int
+ Forcibly enable the PIIX4. DANGEROUS!
+* force_addr: int
+ Forcibly enable the PIIX4 at the given address. EXTREMELY DANGEROUS!
+
+
+Description
+-----------
+
+The PIIX4 (properly known as the 82371AB) is an Intel chip with a lot of
+functionality. Among other things, it implements the PCI bus. One of its
+minor functions is implementing a System Management Bus. This is a true
+SMBus - you can not access it on I2C levels. The good news is that it
+natively understands SMBus commands and you do not have to worry about
+timing problems. The bad news is that non-SMBus devices connected to it can
+confuse it mightily. Yes, this is known to happen...
+
+Do 'lspci -v' and see whether it contains an entry like this:
+
+0000:00:02.3 Bridge: Intel Corp. 82371AB/EB/MB PIIX4 ACPI (rev 02)
+ Flags: medium devsel, IRQ 9
+
+Bus and device numbers may differ, but the function number must be
+identical (like many PCI devices, the PIIX4 incorporates a number of
+different 'functions', which can be considered as separate devices). If you
+find such an entry, you have a PIIX4 SMBus controller.
+
+On some computers (most notably, some Dells), the SMBus is disabled by
+default. If you use the insmod parameter 'force=1', the kernel module will
+try to enable it. THIS IS VERY DANGEROUS! If the BIOS did not set up a
+correct address for this module, you could get in big trouble (read:
+crashes, data corruption, etc.). Try this only as a last resort (try BIOS
+updates first, for example), and backup first! An even more dangerous
+option is 'force_addr=<IOPORT>'. This will not only enable the PIIX4 like
+'force' foes, but it will also set a new base I/O port address. The SMBus
+parts of the PIIX4 needs a range of 8 of these addresses to function
+correctly. If these addresses are already reserved by some other device,
+you will get into big trouble! DON'T USE THIS IF YOU ARE NOT VERY SURE
+ABOUT WHAT YOU ARE DOING!
+
+The PIIX4E is just an new version of the PIIX4; it is supported as well.
+The PIIX/PIIX3 does not implement an SMBus or I2C bus, so you can't use
+this driver on those mainboards.
+
+The ServerWorks Southbridges, the Intel 440MX, and the Victory66 are
+identical to the PIIX4 in I2C/SMBus support.
+
+The AMD SB700, SB800, SP5100 and Hudson-2 chipsets implement two
+PIIX4-compatible SMBus controllers. If your BIOS initializes the
+secondary controller, it will be detected by this driver as
+an "Auxiliary SMBus Host Controller".
+
+If you own Force CPCI735 motherboard or other OSB4 based systems you may need
+to change the SMBus Interrupt Select register so the SMBus controller uses
+the SMI mode.
+
+1) Use lspci command and locate the PCI device with the SMBus controller:
+ 00:0f.0 ISA bridge: ServerWorks OSB4 South Bridge (rev 4f)
+ The line may vary for different chipsets. Please consult the driver source
+ for all possible PCI ids (and lspci -n to match them). Lets assume the
+ device is located at 00:0f.0.
+2) Now you just need to change the value in 0xD2 register. Get it first with
+ command: lspci -xxx -s 00:0f.0
+ If the value is 0x3 then you need to change it to 0x1
+ setpci -s 00:0f.0 d2.b=1
+
+Please note that you don't need to do that in all cases, just when the SMBus is
+not working properly.
+
+
+Hardware-specific issues
+------------------------
+
+This driver will refuse to load on IBM systems with an Intel PIIX4 SMBus.
+Some of these machines have an RFID EEPROM (24RF08) connected to the SMBus,
+which can easily get corrupted due to a state machine bug. These are mostly
+Thinkpad laptops, but desktop systems may also be affected. We have no list
+of all affected systems, so the only safe solution was to prevent access to
+the SMBus on all IBM systems (detected using DMI data.)
+
+For additional information, read:
+http://www.lm-sensors.org/browser/lm-sensors/trunk/README
diff --git a/Documentation/i2c/busses/i2c-sis5595 b/Documentation/i2c/busses/i2c-sis5595
new file mode 100644
index 000000000..ecd21fb49
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-sis5595
@@ -0,0 +1,59 @@
+Kernel driver i2c-sis5595
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Mark D. Studebaker <mdsxyz123@yahoo.com>,
+ Philip Edelbrock <phil@netroedge.com>
+
+Supported adapters:
+ * Silicon Integrated Systems Corp. SiS5595 Southbridge
+ Datasheet: Publicly available at the Silicon Integrated Systems Corp. site.
+
+Note: all have mfr. ID 0x1039.
+
+ SUPPORTED PCI ID
+ 5595 0008
+
+ Note: these chips contain a 0008 device which is incompatible with the
+ 5595. We recognize these by the presence of the listed
+ "blacklist" PCI ID and refuse to load.
+
+ NOT SUPPORTED PCI ID BLACKLIST PCI ID
+ 540 0008 0540
+ 550 0008 0550
+ 5513 0008 5511
+ 5581 0008 5597
+ 5582 0008 5597
+ 5597 0008 5597
+ 5598 0008 5597/5598
+ 630 0008 0630
+ 645 0008 0645
+ 646 0008 0646
+ 648 0008 0648
+ 650 0008 0650
+ 651 0008 0651
+ 730 0008 0730
+ 735 0008 0735
+ 745 0008 0745
+ 746 0008 0746
+
+Module Parameters
+-----------------
+
+* force_addr=0xaddr Set the I/O base address. Useful for boards
+ that don't set the address in the BIOS. Does not do a
+ PCI force; the device must still be present in lspci.
+ Don't use this unless the driver complains that the
+ base address is not set.
+
+Description
+-----------
+
+i2c-sis5595 is a true SMBus host driver for motherboards with the SiS5595
+southbridges.
+
+WARNING: If you are trying to access the integrated sensors on the SiS5595
+chip, you want the sis5595 driver for those, not this driver. This driver
+is a BUS driver, not a CHIP driver. A BUS driver is used by other CHIP
+drivers to access chips on the bus.
+
diff --git a/Documentation/i2c/busses/i2c-sis630 b/Documentation/i2c/busses/i2c-sis630
new file mode 100644
index 000000000..ee7943631
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-sis630
@@ -0,0 +1,58 @@
+Kernel driver i2c-sis630
+
+Supported adapters:
+ * Silicon Integrated Systems Corp (SiS)
+ 630 chipset (Datasheet: available at http://www.sfr-fresh.com/linux)
+ 730 chipset
+ 964 chipset
+ * Possible other SiS chipsets ?
+
+Author: Alexander Malysh <amalysh@web.de>
+ Amaury Decrême <amaury.decreme@gmail.com> - SiS964 support
+
+Module Parameters
+-----------------
+
+* force = [1|0] Forcibly enable the SIS630. DANGEROUS!
+ This can be interesting for chipsets not named
+ above to check if it works for you chipset, but DANGEROUS!
+
+* high_clock = [1|0] Forcibly set Host Master Clock to 56KHz (default,
+ what your BIOS use). DANGEROUS! This should be a bit
+ faster, but freeze some systems (i.e. my Laptop).
+ SIS630/730 chip only.
+
+
+Description
+-----------
+
+This SMBus only driver is known to work on motherboards with the above
+named chipsets.
+
+If you see something like this:
+
+00:00.0 Host bridge: Silicon Integrated Systems [SiS] 630 Host (rev 31)
+00:01.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
+
+or like this:
+
+00:00.0 Host bridge: Silicon Integrated Systems [SiS] 730 Host (rev 02)
+00:01.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
+
+or like this:
+
+00:00.0 Host bridge: Silicon Integrated Systems [SiS] 760/M760 Host (rev 02)
+00:02.0 ISA bridge: Silicon Integrated Systems [SiS] SiS964 [MuTIOL Media IO]
+ LPC Controller (rev 36)
+
+in your 'lspci' output , then this driver is for your chipset.
+
+Thank You
+---------
+Philip Edelbrock <phil@netroedge.com>
+- testing SiS730 support
+Mark M. Hoffman <mhoffman@lightlink.com>
+- bug fixes
+
+To anyone else which I forgot here ;), thanks!
+
diff --git a/Documentation/i2c/busses/i2c-sis96x b/Documentation/i2c/busses/i2c-sis96x
new file mode 100644
index 000000000..0b979f325
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-sis96x
@@ -0,0 +1,73 @@
+Kernel driver i2c-sis96x
+
+Replaces 2.4.x i2c-sis645
+
+Supported adapters:
+ * Silicon Integrated Systems Corp (SiS)
+ Any combination of these host bridges:
+ 645, 645DX (aka 646), 648, 650, 651, 655, 735, 745, 746
+ and these south bridges:
+ 961, 962, 963(L)
+
+Author: Mark M. Hoffman <mhoffman@lightlink.com>
+
+Description
+-----------
+
+This SMBus only driver is known to work on motherboards with the above
+named chipset combinations. The driver was developed without benefit of a
+proper datasheet from SiS. The SMBus registers are assumed compatible with
+those of the SiS630, although they are located in a completely different
+place. Thanks to Alexander Malysh <amalysh@web.de> for providing the
+SiS630 datasheet (and driver).
+
+The command "lspci" as root should produce something like these lines:
+
+00:00.0 Host bridge: Silicon Integrated Systems [SiS]: Unknown device 0645
+00:02.0 ISA bridge: Silicon Integrated Systems [SiS] 85C503/5513
+00:02.1 SMBus: Silicon Integrated Systems [SiS]: Unknown device 0016
+
+or perhaps this...
+
+00:00.0 Host bridge: Silicon Integrated Systems [SiS]: Unknown device 0645
+00:02.0 ISA bridge: Silicon Integrated Systems [SiS]: Unknown device 0961
+00:02.1 SMBus: Silicon Integrated Systems [SiS]: Unknown device 0016
+
+(kernel versions later than 2.4.18 may fill in the "Unknown"s)
+
+If you can't see it please look on quirk_sis_96x_smbus
+(drivers/pci/quirks.c) (also if southbridge detection fails)
+
+I suspect that this driver could be made to work for the following SiS
+chipsets as well: 635, and 635T. If anyone owns a board with those chips
+AND is willing to risk crashing & burning an otherwise well-behaved kernel
+in the name of progress... please contact me at <mhoffman@lightlink.com> or
+via the linux-i2c mailing list: <linux-i2c@vger.kernel.org>. Please send bug
+reports and/or success stories as well.
+
+
+TO DOs
+------
+
+* The driver does not support SMBus block reads/writes; I may add them if a
+scenario is found where they're needed.
+
+
+Thank You
+---------
+
+Mark D. Studebaker <mdsxyz123@yahoo.com>
+ - design hints and bug fixes
+Alexander Maylsh <amalysh@web.de>
+ - ditto, plus an important datasheet... almost the one I really wanted
+Hans-Günter Lütke Uphues <hg_lu@t-online.de>
+ - patch for SiS735
+Robert Zwerus <arzie@dds.nl>
+ - testing for SiS645DX
+Kianusch Sayah Karadji <kianusch@sk-tech.net>
+ - patch for SiS645DX/962
+Ken Healy
+ - patch for SiS655
+
+To anyone else who has written w/ feedback, thanks!
+
diff --git a/Documentation/i2c/busses/i2c-taos-evm b/Documentation/i2c/busses/i2c-taos-evm
new file mode 100644
index 000000000..60299555d
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-taos-evm
@@ -0,0 +1,46 @@
+Kernel driver i2c-taos-evm
+
+Author: Jean Delvare <jdelvare@suse.de>
+
+This is a driver for the evaluation modules for TAOS I2C/SMBus chips.
+The modules include an SMBus master with limited capabilities, which can
+be controlled over the serial port. Virtually all evaluation modules
+are supported, but a few lines of code need to be added for each new
+module to instantiate the right I2C chip on the bus. Obviously, a driver
+for the chip in question is also needed.
+
+Currently supported devices are:
+
+* TAOS TSL2550 EVM
+
+For additional information on TAOS products, please see
+ http://www.taosinc.com/
+
+
+Using this driver
+-----------------
+
+In order to use this driver, you'll need the serport driver, and the
+inputattach tool, which is part of the input-utils package. The following
+commands will tell the kernel that you have a TAOS EVM on the first
+serial port:
+
+# modprobe serport
+# inputattach --taos-evm /dev/ttyS0
+
+
+Technical details
+-----------------
+
+Only 4 SMBus transaction types are supported by the TAOS evaluation
+modules:
+* Receive Byte
+* Send Byte
+* Read Byte
+* Write Byte
+
+The communication protocol is text-based and pretty simple. It is
+described in a PDF document on the CD which comes with the evaluation
+module. The communication is rather slow, because the serial port has
+to operate at 1200 bps. However, I don't think this is a big concern in
+practice, as these modules are meant for evaluation and testing only.
diff --git a/Documentation/i2c/busses/i2c-via b/Documentation/i2c/busses/i2c-via
new file mode 100644
index 000000000..343870661
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-via
@@ -0,0 +1,34 @@
+Kernel driver i2c-via
+
+Supported adapters:
+ * VIA Technologies, InC. VT82C586B
+ Datasheet: Publicly available at the VIA website
+
+Author: Kyösti Mälkki <kmalkki@cc.hut.fi>
+
+Description
+-----------
+
+i2c-via is an i2c bus driver for motherboards with VIA chipset.
+
+The following VIA pci chipsets are supported:
+ - MVP3, VP3, VP2/97, VPX/97
+ - others with South bridge VT82C586B
+
+Your lspci listing must show this :
+
+ Bridge: VIA Technologies, Inc. VT82C586B ACPI (rev 10)
+
+ Problems?
+
+ Q: You have VT82C586B on the motherboard, but not in the listing.
+
+ A: Go to your BIOS setup, section PCI devices or similar.
+ Turn USB support on, and try again.
+
+ Q: No error messages, but still i2c doesn't seem to work.
+
+ A: This can happen. This driver uses the pins VIA recommends in their
+ datasheets, but there are several ways the motherboard manufacturer
+ can actually wire the lines.
+
diff --git a/Documentation/i2c/busses/i2c-viapro b/Documentation/i2c/busses/i2c-viapro
new file mode 100644
index 000000000..ab64ce21c
--- /dev/null
+++ b/Documentation/i2c/busses/i2c-viapro
@@ -0,0 +1,73 @@
+Kernel driver i2c-viapro
+
+Supported adapters:
+ * VIA Technologies, Inc. VT82C596A/B
+ Datasheet: Sometimes available at the VIA website
+
+ * VIA Technologies, Inc. VT82C686A/B
+ Datasheet: Sometimes available at the VIA website
+
+ * VIA Technologies, Inc. VT8231, VT8233, VT8233A
+ Datasheet: available on request from VIA
+
+ * VIA Technologies, Inc. VT8235, VT8237R, VT8237A, VT8237S, VT8251
+ Datasheet: available on request and under NDA from VIA
+
+ * VIA Technologies, Inc. CX700
+ Datasheet: available on request and under NDA from VIA
+
+ * VIA Technologies, Inc. VX800/VX820
+ Datasheet: available on http://linux.via.com.tw
+
+ * VIA Technologies, Inc. VX855/VX875
+ Datasheet: available on http://linux.via.com.tw
+
+ * VIA Technologies, Inc. VX900
+ Datasheet: available on http://linux.via.com.tw
+
+Authors:
+ Kyösti Mälkki <kmalkki@cc.hut.fi>,
+ Mark D. Studebaker <mdsxyz123@yahoo.com>,
+ Jean Delvare <jdelvare@suse.de>
+
+Module Parameters
+-----------------
+
+* force: int
+ Forcibly enable the SMBus controller. DANGEROUS!
+* force_addr: int
+ Forcibly enable the SMBus at the given address. EXTREMELY DANGEROUS!
+
+Description
+-----------
+
+i2c-viapro is a true SMBus host driver for motherboards with one of the
+supported VIA south bridges.
+
+Your lspci -n listing must show one of these :
+
+ device 1106:3050 (VT82C596A function 3)
+ device 1106:3051 (VT82C596B function 3)
+ device 1106:3057 (VT82C686 function 4)
+ device 1106:3074 (VT8233)
+ device 1106:3147 (VT8233A)
+ device 1106:8235 (VT8231 function 4)
+ device 1106:3177 (VT8235)
+ device 1106:3227 (VT8237R)
+ device 1106:3337 (VT8237A)
+ device 1106:3372 (VT8237S)
+ device 1106:3287 (VT8251)
+ device 1106:8324 (CX700)
+ device 1106:8353 (VX800/VX820)
+ device 1106:8409 (VX855/VX875)
+ device 1106:8410 (VX900)
+
+If none of these show up, you should look in the BIOS for settings like
+enable ACPI / SMBus or even USB.
+
+Except for the oldest chips (VT82C596A/B, VT82C686A and most probably
+VT8231), this driver supports I2C block transactions. Such transactions
+are mainly useful to read from and write to EEPROMs.
+
+The CX700/VX800/VX820 additionally appears to support SMBus PEC, although
+this driver doesn't implement it yet.
diff --git a/Documentation/i2c/busses/scx200_acb b/Documentation/i2c/busses/scx200_acb
new file mode 100644
index 000000000..ce83c871f
--- /dev/null
+++ b/Documentation/i2c/busses/scx200_acb
@@ -0,0 +1,32 @@
+Kernel driver scx200_acb
+
+Author: Christer Weinigel <wingel@nano-system.com>
+
+The driver supersedes the older, never merged driver named i2c-nscacb.
+
+Module Parameters
+-----------------
+
+* base: up to 4 ints
+ Base addresses for the ACCESS.bus controllers on SCx200 and SC1100 devices
+
+ By default the driver uses two base addresses 0x820 and 0x840.
+ If you want only one base address, specify the second as 0 so as to
+ override this default.
+
+Description
+-----------
+
+Enable the use of the ACCESS.bus controller on the Geode SCx200 and
+SC1100 processors and the CS5535 and CS5536 Geode companion devices.
+
+Device-specific notes
+---------------------
+
+The SC1100 WRAP boards are known to use base addresses 0x810 and 0x820.
+If the scx200_acb driver is built into the kernel, add the following
+parameter to your boot command line:
+ scx200_acb.base=0x810,0x820
+If the scx200_acb driver is built as a module, add the following line to
+a configuration file in /etc/modprobe.d/ instead:
+ options scx200_acb base=0x810,0x820
diff --git a/Documentation/i2c/dev-interface b/Documentation/i2c/dev-interface
new file mode 100644
index 000000000..2ac78ae10
--- /dev/null
+++ b/Documentation/i2c/dev-interface
@@ -0,0 +1,214 @@
+Usually, i2c devices are controlled by a kernel driver. But it is also
+possible to access all devices on an adapter from userspace, through
+the /dev interface. You need to load module i2c-dev for this.
+
+Each registered i2c adapter gets a number, counting from 0. You can
+examine /sys/class/i2c-dev/ to see what number corresponds to which adapter.
+Alternatively, you can run "i2cdetect -l" to obtain a formated list of all
+i2c adapters present on your system at a given time. i2cdetect is part of
+the i2c-tools package.
+
+I2C device files are character device files with major device number 89
+and a minor device number corresponding to the number assigned as
+explained above. They should be called "i2c-%d" (i2c-0, i2c-1, ...,
+i2c-10, ...). All 256 minor device numbers are reserved for i2c.
+
+
+C example
+=========
+
+So let's say you want to access an i2c adapter from a C program. The
+first thing to do is "#include <linux/i2c-dev.h>". Please note that
+there are two files named "i2c-dev.h" out there, one is distributed
+with the Linux kernel and is meant to be included from kernel
+driver code, the other one is distributed with i2c-tools and is
+meant to be included from user-space programs. You obviously want
+the second one here.
+
+Now, you have to decide which adapter you want to access. You should
+inspect /sys/class/i2c-dev/ or run "i2cdetect -l" to decide this.
+Adapter numbers are assigned somewhat dynamically, so you can not
+assume much about them. They can even change from one boot to the next.
+
+Next thing, open the device file, as follows:
+
+ int file;
+ int adapter_nr = 2; /* probably dynamically determined */
+ char filename[20];
+
+ snprintf(filename, 19, "/dev/i2c-%d", adapter_nr);
+ file = open(filename, O_RDWR);
+ if (file < 0) {
+ /* ERROR HANDLING; you can check errno to see what went wrong */
+ exit(1);
+ }
+
+When you have opened the device, you must specify with what device
+address you want to communicate:
+
+ int addr = 0x40; /* The I2C address */
+
+ if (ioctl(file, I2C_SLAVE, addr) < 0) {
+ /* ERROR HANDLING; you can check errno to see what went wrong */
+ exit(1);
+ }
+
+Well, you are all set up now. You can now use SMBus commands or plain
+I2C to communicate with your device. SMBus commands are preferred if
+the device supports them. Both are illustrated below.
+
+ __u8 reg = 0x10; /* Device register to access */
+ __s32 res;
+ char buf[10];
+
+ /* Using SMBus commands */
+ res = i2c_smbus_read_word_data(file, reg);
+ if (res < 0) {
+ /* ERROR HANDLING: i2c transaction failed */
+ } else {
+ /* res contains the read word */
+ }
+
+ /* Using I2C Write, equivalent of
+ i2c_smbus_write_word_data(file, reg, 0x6543) */
+ buf[0] = reg;
+ buf[1] = 0x43;
+ buf[2] = 0x65;
+ if (write(file, buf, 3) != 3) {
+ /* ERROR HANDLING: i2c transaction failed */
+ }
+
+ /* Using I2C Read, equivalent of i2c_smbus_read_byte(file) */
+ if (read(file, buf, 1) != 1) {
+ /* ERROR HANDLING: i2c transaction failed */
+ } else {
+ /* buf[0] contains the read byte */
+ }
+
+Note that only a subset of the I2C and SMBus protocols can be achieved by
+the means of read() and write() calls. In particular, so-called combined
+transactions (mixing read and write messages in the same transaction)
+aren't supported. For this reason, this interface is almost never used by
+user-space programs.
+
+IMPORTANT: because of the use of inline functions, you *have* to use
+'-O' or some variation when you compile your program!
+
+
+Full interface description
+==========================
+
+The following IOCTLs are defined:
+
+ioctl(file, I2C_SLAVE, long addr)
+ Change slave address. The address is passed in the 7 lower bits of the
+ argument (except for 10 bit addresses, passed in the 10 lower bits in this
+ case).
+
+ioctl(file, I2C_TENBIT, long select)
+ Selects ten bit addresses if select not equals 0, selects normal 7 bit
+ addresses if select equals 0. Default 0. This request is only valid
+ if the adapter has I2C_FUNC_10BIT_ADDR.
+
+ioctl(file, I2C_PEC, long select)
+ Selects SMBus PEC (packet error checking) generation and verification
+ if select not equals 0, disables if select equals 0. Default 0.
+ Used only for SMBus transactions. This request only has an effect if the
+ the adapter has I2C_FUNC_SMBUS_PEC; it is still safe if not, it just
+ doesn't have any effect.
+
+ioctl(file, I2C_FUNCS, unsigned long *funcs)
+ Gets the adapter functionality and puts it in *funcs.
+
+ioctl(file, I2C_RDWR, struct i2c_rdwr_ioctl_data *msgset)
+ Do combined read/write transaction without stop in between.
+ Only valid if the adapter has I2C_FUNC_I2C. The argument is
+ a pointer to a
+
+ struct i2c_rdwr_ioctl_data {
+ struct i2c_msg *msgs; /* ptr to array of simple messages */
+ int nmsgs; /* number of messages to exchange */
+ }
+
+ The msgs[] themselves contain further pointers into data buffers.
+ The function will write or read data to or from that buffers depending
+ on whether the I2C_M_RD flag is set in a particular message or not.
+ The slave address and whether to use ten bit address mode has to be
+ set in each message, overriding the values set with the above ioctl's.
+
+ioctl(file, I2C_SMBUS, struct i2c_smbus_ioctl_data *args)
+ Not meant to be called directly; instead, use the access functions
+ below.
+
+You can do plain i2c transactions by using read(2) and write(2) calls.
+You do not need to pass the address byte; instead, set it through
+ioctl I2C_SLAVE before you try to access the device.
+
+You can do SMBus level transactions (see documentation file smbus-protocol
+for details) through the following functions:
+ __s32 i2c_smbus_write_quick(int file, __u8 value);
+ __s32 i2c_smbus_read_byte(int file);
+ __s32 i2c_smbus_write_byte(int file, __u8 value);
+ __s32 i2c_smbus_read_byte_data(int file, __u8 command);
+ __s32 i2c_smbus_write_byte_data(int file, __u8 command, __u8 value);
+ __s32 i2c_smbus_read_word_data(int file, __u8 command);
+ __s32 i2c_smbus_write_word_data(int file, __u8 command, __u16 value);
+ __s32 i2c_smbus_process_call(int file, __u8 command, __u16 value);
+ __s32 i2c_smbus_read_block_data(int file, __u8 command, __u8 *values);
+ __s32 i2c_smbus_write_block_data(int file, __u8 command, __u8 length,
+ __u8 *values);
+All these transactions return -1 on failure; you can read errno to see
+what happened. The 'write' transactions return 0 on success; the
+'read' transactions return the read value, except for read_block, which
+returns the number of values read. The block buffers need not be longer
+than 32 bytes.
+
+The above functions are all inline functions, that resolve to calls to
+the i2c_smbus_access function, that on its turn calls a specific ioctl
+with the data in a specific format. Read the source code if you
+want to know what happens behind the screens.
+
+
+Implementation details
+======================
+
+For the interested, here's the code flow which happens inside the kernel
+when you use the /dev interface to I2C:
+
+1* Your program opens /dev/i2c-N and calls ioctl() on it, as described in
+section "C example" above.
+
+2* These open() and ioctl() calls are handled by the i2c-dev kernel
+driver: see i2c-dev.c:i2cdev_open() and i2c-dev.c:i2cdev_ioctl(),
+respectively. You can think of i2c-dev as a generic I2C chip driver
+that can be programmed from user-space.
+
+3* Some ioctl() calls are for administrative tasks and are handled by
+i2c-dev directly. Examples include I2C_SLAVE (set the address of the
+device you want to access) and I2C_PEC (enable or disable SMBus error
+checking on future transactions.)
+
+4* Other ioctl() calls are converted to in-kernel function calls by
+i2c-dev. Examples include I2C_FUNCS, which queries the I2C adapter
+functionality using i2c.h:i2c_get_functionality(), and I2C_SMBUS, which
+performs an SMBus transaction using i2c-core.c:i2c_smbus_xfer().
+
+The i2c-dev driver is responsible for checking all the parameters that
+come from user-space for validity. After this point, there is no
+difference between these calls that came from user-space through i2c-dev
+and calls that would have been performed by kernel I2C chip drivers
+directly. This means that I2C bus drivers don't need to implement
+anything special to support access from user-space.
+
+5* These i2c-core.c/i2c.h functions are wrappers to the actual
+implementation of your I2C bus driver. Each adapter must declare
+callback functions implementing these standard calls.
+i2c.h:i2c_get_functionality() calls i2c_adapter.algo->functionality(),
+while i2c-core.c:i2c_smbus_xfer() calls either
+adapter.algo->smbus_xfer() if it is implemented, or if not,
+i2c-core.c:i2c_smbus_xfer_emulated() which in turn calls
+i2c_adapter.algo->master_xfer().
+
+After your I2C bus driver has processed these requests, execution runs
+up the call chain, with almost no processing done, except by i2c-dev to
+package the returned data, if any, in suitable format for the ioctl.
diff --git a/Documentation/i2c/fault-codes b/Documentation/i2c/fault-codes
new file mode 100644
index 000000000..47c25abb7
--- /dev/null
+++ b/Documentation/i2c/fault-codes
@@ -0,0 +1,124 @@
+This is a summary of the most important conventions for use of fault
+codes in the I2C/SMBus stack.
+
+
+A "Fault" is not always an "Error"
+----------------------------------
+Not all fault reports imply errors; "page faults" should be a familiar
+example. Software often retries idempotent operations after transient
+faults. There may be fancier recovery schemes that are appropriate in
+some cases, such as re-initializing (and maybe resetting). After such
+recovery, triggered by a fault report, there is no error.
+
+In a similar way, sometimes a "fault" code just reports one defined
+result for an operation ... it doesn't indicate that anything is wrong
+at all, just that the outcome wasn't on the "golden path".
+
+In short, your I2C driver code may need to know these codes in order
+to respond correctly. Other code may need to rely on YOUR code reporting
+the right fault code, so that it can (in turn) behave correctly.
+
+
+I2C and SMBus fault codes
+-------------------------
+These are returned as negative numbers from most calls, with zero or
+some positive number indicating a non-fault return. The specific
+numbers associated with these symbols differ between architectures,
+though most Linux systems use <asm-generic/errno*.h> numbering.
+
+Note that the descriptions here are not exhaustive. There are other
+codes that may be returned, and other cases where these codes should
+be returned. However, drivers should not return other codes for these
+cases (unless the hardware doesn't provide unique fault reports).
+
+Also, codes returned by adapter probe methods follow rules which are
+specific to their host bus (such as PCI, or the platform bus).
+
+
+EAGAIN
+ Returned by I2C adapters when they lose arbitration in master
+ transmit mode: some other master was transmitting different
+ data at the same time.
+
+ Also returned when trying to invoke an I2C operation in an
+ atomic context, when some task is already using that I2C bus
+ to execute some other operation.
+
+EBADMSG
+ Returned by SMBus logic when an invalid Packet Error Code byte
+ is received. This code is a CRC covering all bytes in the
+ transaction, and is sent before the terminating STOP. This
+ fault is only reported on read transactions; the SMBus slave
+ may have a way to report PEC mismatches on writes from the
+ host. Note that even if PECs are in use, you should not rely
+ on these as the only way to detect incorrect data transfers.
+
+EBUSY
+ Returned by SMBus adapters when the bus was busy for longer
+ than allowed. This usually indicates some device (maybe the
+ SMBus adapter) needs some fault recovery (such as resetting),
+ or that the reset was attempted but failed.
+
+EINVAL
+ This rather vague error means an invalid parameter has been
+ detected before any I/O operation was started. Use a more
+ specific fault code when you can.
+
+EIO
+ This rather vague error means something went wrong when
+ performing an I/O operation. Use a more specific fault
+ code when you can.
+
+ENODEV
+ Returned by driver probe() methods. This is a bit more
+ specific than ENXIO, implying the problem isn't with the
+ address, but with the device found there. Driver probes
+ may verify the device returns *correct* responses, and
+ return this as appropriate. (The driver core will warn
+ about probe faults other than ENXIO and ENODEV.)
+
+ENOMEM
+ Returned by any component that can't allocate memory when
+ it needs to do so.
+
+ENXIO
+ Returned by I2C adapters to indicate that the address phase
+ of a transfer didn't get an ACK. While it might just mean
+ an I2C device was temporarily not responding, usually it
+ means there's nothing listening at that address.
+
+ Returned by driver probe() methods to indicate that they
+ found no device to bind to. (ENODEV may also be used.)
+
+EOPNOTSUPP
+ Returned by an adapter when asked to perform an operation
+ that it doesn't, or can't, support.
+
+ For example, this would be returned when an adapter that
+ doesn't support SMBus block transfers is asked to execute
+ one. In that case, the driver making that request should
+ have verified that functionality was supported before it
+ made that block transfer request.
+
+ Similarly, if an I2C adapter can't execute all legal I2C
+ messages, it should return this when asked to perform a
+ transaction it can't. (These limitations can't be seen in
+ the adapter's functionality mask, since the assumption is
+ that if an adapter supports I2C it supports all of I2C.)
+
+EPROTO
+ Returned when slave does not conform to the relevant I2C
+ or SMBus (or chip-specific) protocol specifications. One
+ case is when the length of an SMBus block data response
+ (from the SMBus slave) is outside the range 1-32 bytes.
+
+ETIMEDOUT
+ This is returned by drivers when an operation took too much
+ time, and was aborted before it completed.
+
+ SMBus adapters may return it when an operation took more
+ time than allowed by the SMBus specification; for example,
+ when a slave stretches clocks too far. I2C has no such
+ timeouts, but it's normal for I2C adapters to impose some
+ arbitrary limits (much longer than SMBus!) too.
+
diff --git a/Documentation/i2c/functionality b/Documentation/i2c/functionality
new file mode 100644
index 000000000..4aae8ed15
--- /dev/null
+++ b/Documentation/i2c/functionality
@@ -0,0 +1,148 @@
+INTRODUCTION
+------------
+
+Because not every I2C or SMBus adapter implements everything in the
+I2C specifications, a client can not trust that everything it needs
+is implemented when it is given the option to attach to an adapter:
+the client needs some way to check whether an adapter has the needed
+functionality.
+
+
+FUNCTIONALITY CONSTANTS
+-----------------------
+
+For the most up-to-date list of functionality constants, please check
+<uapi/linux/i2c.h>!
+
+ I2C_FUNC_I2C Plain i2c-level commands (Pure SMBus
+ adapters typically can not do these)
+ I2C_FUNC_10BIT_ADDR Handles the 10-bit address extensions
+ I2C_FUNC_PROTOCOL_MANGLING Knows about the I2C_M_IGNORE_NAK,
+ I2C_M_REV_DIR_ADDR and I2C_M_NO_RD_ACK
+ flags (which modify the I2C protocol!)
+ I2C_FUNC_NOSTART Can skip repeated start sequence
+ I2C_FUNC_SMBUS_QUICK Handles the SMBus write_quick command
+ I2C_FUNC_SMBUS_READ_BYTE Handles the SMBus read_byte command
+ I2C_FUNC_SMBUS_WRITE_BYTE Handles the SMBus write_byte command
+ I2C_FUNC_SMBUS_READ_BYTE_DATA Handles the SMBus read_byte_data command
+ I2C_FUNC_SMBUS_WRITE_BYTE_DATA Handles the SMBus write_byte_data command
+ I2C_FUNC_SMBUS_READ_WORD_DATA Handles the SMBus read_word_data command
+ I2C_FUNC_SMBUS_WRITE_WORD_DATA Handles the SMBus write_byte_data command
+ I2C_FUNC_SMBUS_PROC_CALL Handles the SMBus process_call command
+ I2C_FUNC_SMBUS_READ_BLOCK_DATA Handles the SMBus read_block_data command
+ I2C_FUNC_SMBUS_WRITE_BLOCK_DATA Handles the SMBus write_block_data command
+ I2C_FUNC_SMBUS_READ_I2C_BLOCK Handles the SMBus read_i2c_block_data command
+ I2C_FUNC_SMBUS_WRITE_I2C_BLOCK Handles the SMBus write_i2c_block_data command
+
+A few combinations of the above flags are also defined for your convenience:
+
+ I2C_FUNC_SMBUS_BYTE Handles the SMBus read_byte
+ and write_byte commands
+ I2C_FUNC_SMBUS_BYTE_DATA Handles the SMBus read_byte_data
+ and write_byte_data commands
+ I2C_FUNC_SMBUS_WORD_DATA Handles the SMBus read_word_data
+ and write_word_data commands
+ I2C_FUNC_SMBUS_BLOCK_DATA Handles the SMBus read_block_data
+ and write_block_data commands
+ I2C_FUNC_SMBUS_I2C_BLOCK Handles the SMBus read_i2c_block_data
+ and write_i2c_block_data commands
+ I2C_FUNC_SMBUS_EMUL Handles all SMBus commands that can be
+ emulated by a real I2C adapter (using
+ the transparent emulation layer)
+
+In kernel versions prior to 3.5 I2C_FUNC_NOSTART was implemented as
+part of I2C_FUNC_PROTOCOL_MANGLING.
+
+
+ADAPTER IMPLEMENTATION
+----------------------
+
+When you write a new adapter driver, you will have to implement a
+function callback `functionality'. Typical implementations are given
+below.
+
+A typical SMBus-only adapter would list all the SMBus transactions it
+supports. This example comes from the i2c-piix4 driver:
+
+ static u32 piix4_func(struct i2c_adapter *adapter)
+ {
+ return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+ I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+ I2C_FUNC_SMBUS_BLOCK_DATA;
+ }
+
+A typical full-I2C adapter would use the following (from the i2c-pxa
+driver):
+
+ static u32 i2c_pxa_functionality(struct i2c_adapter *adap)
+ {
+ return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
+ }
+
+I2C_FUNC_SMBUS_EMUL includes all the SMBus transactions (with the
+addition of I2C block transactions) which i2c-core can emulate using
+I2C_FUNC_I2C without any help from the adapter driver. The idea is
+to let the client drivers check for the support of SMBus functions
+without having to care whether the said functions are implemented in
+hardware by the adapter, or emulated in software by i2c-core on top
+of an I2C adapter.
+
+
+CLIENT CHECKING
+---------------
+
+Before a client tries to attach to an adapter, or even do tests to check
+whether one of the devices it supports is present on an adapter, it should
+check whether the needed functionality is present. The typical way to do
+this is (from the lm75 driver):
+
+ static int lm75_detect(...)
+ {
+ (...)
+ if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA |
+ I2C_FUNC_SMBUS_WORD_DATA))
+ goto exit;
+ (...)
+ }
+
+Here, the lm75 driver checks if the adapter can do both SMBus byte data
+and SMBus word data transactions. If not, then the driver won't work on
+this adapter and there's no point in going on. If the check above is
+successful, then the driver knows that it can call the following
+functions: i2c_smbus_read_byte_data(), i2c_smbus_write_byte_data(),
+i2c_smbus_read_word_data() and i2c_smbus_write_word_data(). As a rule of
+thumb, the functionality constants you test for with
+i2c_check_functionality() should match exactly the i2c_smbus_* functions
+which you driver is calling.
+
+Note that the check above doesn't tell whether the functionalities are
+implemented in hardware by the underlying adapter or emulated in
+software by i2c-core. Client drivers don't have to care about this, as
+i2c-core will transparently implement SMBus transactions on top of I2C
+adapters.
+
+
+CHECKING THROUGH /DEV
+---------------------
+
+If you try to access an adapter from a userspace program, you will have
+to use the /dev interface. You will still have to check whether the
+functionality you need is supported, of course. This is done using
+the I2C_FUNCS ioctl. An example, adapted from the i2cdetect program, is
+below:
+
+ int file;
+ if (file = open("/dev/i2c-0", O_RDWR) < 0) {
+ /* Some kind of error handling */
+ exit(1);
+ }
+ if (ioctl(file, I2C_FUNCS, &funcs) < 0) {
+ /* Some kind of error handling */
+ exit(1);
+ }
+ if (!(funcs & I2C_FUNC_SMBUS_QUICK)) {
+ /* Oops, the needed functionality (SMBus write_quick function) is
+ not available! */
+ exit(1);
+ }
+ /* Now it is safe to use the SMBus write_quick command */
diff --git a/Documentation/i2c/i2c-protocol b/Documentation/i2c/i2c-protocol
new file mode 100644
index 000000000..ff6d6cee6
--- /dev/null
+++ b/Documentation/i2c/i2c-protocol
@@ -0,0 +1,88 @@
+This document describes the i2c protocol. Or will, when it is finished :-)
+
+Key to symbols
+==============
+
+S (1 bit) : Start bit
+P (1 bit) : Stop bit
+Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0.
+A, NA (1 bit) : Accept and reverse accept bit.
+Addr (7 bits): I2C 7 bit address. Note that this can be expanded as usual to
+ get a 10 bit I2C address.
+Comm (8 bits): Command byte, a data byte which often selects a register on
+ the device.
+Data (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh
+ for 16 bit data.
+Count (8 bits): A data byte containing the length of a block operation.
+
+[..]: Data sent by I2C device, as opposed to data sent by the host adapter.
+
+
+Simple send transaction
+======================
+
+This corresponds to i2c_master_send.
+
+ S Addr Wr [A] Data [A] Data [A] ... [A] Data [A] P
+
+
+Simple receive transaction
+===========================
+
+This corresponds to i2c_master_recv
+
+ S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+
+
+Combined transactions
+====================
+
+This corresponds to i2c_transfer
+
+They are just like the above transactions, but instead of a stop bit P
+a start bit S is sent and the transaction continues. An example of
+a byte read, followed by a byte write:
+
+ S Addr Rd [A] [Data] NA S Addr Wr [A] Data [A] P
+
+
+Modified transactions
+=====================
+
+The following modifications to the I2C protocol can also be generated by
+setting these flags for i2c messages. With the exception of I2C_M_NOSTART, they
+are usually only needed to work around device issues:
+
+I2C_M_IGNORE_NAK:
+ Normally message is interrupted immediately if there is [NA] from the
+ client. Setting this flag treats any [NA] as [A], and all of
+ message is sent.
+ These messages may still fail to SCL lo->hi timeout.
+
+I2C_M_NO_RD_ACK:
+ In a read message, master A/NA bit is skipped.
+
+I2C_M_NOSTART:
+ In a combined transaction, no 'S Addr Wr/Rd [A]' is generated at some
+ point. For example, setting I2C_M_NOSTART on the second partial message
+ generates something like:
+ S Addr Rd [A] [Data] NA Data [A] P
+ If you set the I2C_M_NOSTART variable for the first partial message,
+ we do not generate Addr, but we do generate the startbit S. This will
+ probably confuse all other clients on your bus, so don't try this.
+
+ This is often used to gather transmits from multiple data buffers in
+ system memory into something that appears as a single transfer to the
+ I2C device but may also be used between direction changes by some
+ rare devices.
+
+I2C_M_REV_DIR_ADDR:
+ This toggles the Rd/Wr flag. That is, if you want to do a write, but
+ need to emit an Rd instead of a Wr, or vice versa, you set this
+ flag. For example:
+ S Addr Rd [A] Data [A] Data [A] ... [A] Data [A] P
+
+I2C_M_STOP:
+ Force a stop condition (P) after the message. Some I2C related protocols
+ like SCCB require that. Normally, you really don't want to get interrupted
+ between the messages of one transfer.
diff --git a/Documentation/i2c/i2c-stub b/Documentation/i2c/i2c-stub
new file mode 100644
index 000000000..a16924fbd
--- /dev/null
+++ b/Documentation/i2c/i2c-stub
@@ -0,0 +1,64 @@
+MODULE: i2c-stub
+
+DESCRIPTION:
+
+This module is a very simple fake I2C/SMBus driver. It implements six
+types of SMBus commands: write quick, (r/w) byte, (r/w) byte data, (r/w)
+word data, (r/w) I2C block data, and (r/w) SMBus block data.
+
+You need to provide chip addresses as a module parameter when loading this
+driver, which will then only react to SMBus commands to these addresses.
+
+No hardware is needed nor associated with this module. It will accept write
+quick commands to the specified addresses; it will respond to the other
+commands (also to the specified addresses) by reading from or writing to
+arrays in memory. It will also spam the kernel logs for every command it
+handles.
+
+A pointer register with auto-increment is implemented for all byte
+operations. This allows for continuous byte reads like those supported by
+EEPROMs, among others.
+
+SMBus block command support is disabled by default, and must be enabled
+explicitly by setting the respective bits (0x03000000) in the functionality
+module parameter.
+
+SMBus block commands must be written to configure an SMBus command for
+SMBus block operations. Writes can be partial. Block read commands always
+return the number of bytes selected with the largest write so far.
+
+The typical use-case is like this:
+ 1. load this module
+ 2. use i2cset (from the i2c-tools project) to pre-load some data
+ 3. load the target chip driver module
+ 4. observe its behavior in the kernel log
+
+There's a script named i2c-stub-from-dump in the i2c-tools package which
+can load register values automatically from a chip dump.
+
+PARAMETERS:
+
+int chip_addr[10]:
+ The SMBus addresses to emulate chips at.
+
+unsigned long functionality:
+ Functionality override, to disable some commands. See I2C_FUNC_*
+ constants in <linux/i2c.h> for the suitable values. For example,
+ value 0x1f0000 would only enable the quick, byte and byte data
+ commands.
+
+u8 bank_reg[10]
+u8 bank_mask[10]
+u8 bank_start[10]
+u8 bank_end[10]:
+ Optional bank settings. They tell which bits in which register
+ select the active bank, as well as the range of banked registers.
+
+CAVEATS:
+
+If your target driver polls some byte or word waiting for it to change, the
+stub could lock it up. Use i2cset to unlock it.
+
+If you spam it hard enough, printk can be lossy. This module really wants
+something like relayfs.
+
diff --git a/Documentation/i2c/instantiating-devices b/Documentation/i2c/instantiating-devices
new file mode 100644
index 000000000..0d85ac193
--- /dev/null
+++ b/Documentation/i2c/instantiating-devices
@@ -0,0 +1,248 @@
+How to instantiate I2C devices
+==============================
+
+Unlike PCI or USB devices, I2C devices are not enumerated at the hardware
+level. Instead, the software must know which devices are connected on each
+I2C bus segment, and what address these devices are using. For this
+reason, the kernel code must instantiate I2C devices explicitly. There are
+several ways to achieve this, depending on the context and requirements.
+
+
+Method 1a: Declare the I2C devices by bus number
+------------------------------------------------
+
+This method is appropriate when the I2C bus is a system bus as is the case
+for many embedded systems. On such systems, each I2C bus has a number
+which is known in advance. It is thus possible to pre-declare the I2C
+devices which live on this bus. This is done with an array of struct
+i2c_board_info which is registered by calling i2c_register_board_info().
+
+Example (from omap2 h4):
+
+static struct i2c_board_info h4_i2c_board_info[] __initdata = {
+ {
+ I2C_BOARD_INFO("isp1301_omap", 0x2d),
+ .irq = OMAP_GPIO_IRQ(125),
+ },
+ { /* EEPROM on mainboard */
+ I2C_BOARD_INFO("24c01", 0x52),
+ .platform_data = &m24c01,
+ },
+ { /* EEPROM on cpu card */
+ I2C_BOARD_INFO("24c01", 0x57),
+ .platform_data = &m24c01,
+ },
+};
+
+static void __init omap_h4_init(void)
+{
+ (...)
+ i2c_register_board_info(1, h4_i2c_board_info,
+ ARRAY_SIZE(h4_i2c_board_info));
+ (...)
+}
+
+The above code declares 3 devices on I2C bus 1, including their respective
+addresses and custom data needed by their drivers. When the I2C bus in
+question is registered, the I2C devices will be instantiated automatically
+by i2c-core.
+
+The devices will be automatically unbound and destroyed when the I2C bus
+they sit on goes away (if ever.)
+
+
+Method 1b: Declare the I2C devices via devicetree
+-------------------------------------------------
+
+This method has the same implications as method 1a. The declaration of I2C
+devices is here done via devicetree as subnodes of the master controller.
+
+Example:
+
+ i2c1: i2c@400a0000 {
+ /* ... master properties skipped ... */
+ clock-frequency = <100000>;
+
+ flash@50 {
+ compatible = "atmel,24c256";
+ reg = <0x50>;
+ };
+
+ pca9532: gpio@60 {
+ compatible = "nxp,pca9532";
+ gpio-controller;
+ #gpio-cells = <2>;
+ reg = <0x60>;
+ };
+ };
+
+Here, two devices are attached to the bus using a speed of 100kHz. For
+additional properties which might be needed to set up the device, please refer
+to its devicetree documentation in Documentation/devicetree/bindings/.
+
+
+Method 1c: Declare the I2C devices via ACPI
+-------------------------------------------
+
+ACPI can also describe I2C devices. There is special documentation for this
+which is currently located at Documentation/acpi/enumeration.txt.
+
+
+Method 2: Instantiate the devices explicitly
+--------------------------------------------
+
+This method is appropriate when a larger device uses an I2C bus for
+internal communication. A typical case is TV adapters. These can have a
+tuner, a video decoder, an audio decoder, etc. usually connected to the
+main chip by the means of an I2C bus. You won't know the number of the I2C
+bus in advance, so the method 1 described above can't be used. Instead,
+you can instantiate your I2C devices explicitly. This is done by filling
+a struct i2c_board_info and calling i2c_new_device().
+
+Example (from the sfe4001 network driver):
+
+static struct i2c_board_info sfe4001_hwmon_info = {
+ I2C_BOARD_INFO("max6647", 0x4e),
+};
+
+int sfe4001_init(struct efx_nic *efx)
+{
+ (...)
+ efx->board_info.hwmon_client =
+ i2c_new_device(&efx->i2c_adap, &sfe4001_hwmon_info);
+
+ (...)
+}
+
+The above code instantiates 1 I2C device on the I2C bus which is on the
+network adapter in question.
+
+A variant of this is when you don't know for sure if an I2C device is
+present or not (for example for an optional feature which is not present
+on cheap variants of a board but you have no way to tell them apart), or
+it may have different addresses from one board to the next (manufacturer
+changing its design without notice). In this case, you can call
+i2c_new_probed_device() instead of i2c_new_device().
+
+Example (from the nxp OHCI driver):
+
+static const unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END };
+
+static int usb_hcd_nxp_probe(struct platform_device *pdev)
+{
+ (...)
+ struct i2c_adapter *i2c_adap;
+ struct i2c_board_info i2c_info;
+
+ (...)
+ i2c_adap = i2c_get_adapter(2);
+ memset(&i2c_info, 0, sizeof(struct i2c_board_info));
+ strlcpy(i2c_info.type, "isp1301_nxp", I2C_NAME_SIZE);
+ isp1301_i2c_client = i2c_new_probed_device(i2c_adap, &i2c_info,
+ normal_i2c, NULL);
+ i2c_put_adapter(i2c_adap);
+ (...)
+}
+
+The above code instantiates up to 1 I2C device on the I2C bus which is on
+the OHCI adapter in question. It first tries at address 0x2c, if nothing
+is found there it tries address 0x2d, and if still nothing is found, it
+simply gives up.
+
+The driver which instantiated the I2C device is responsible for destroying
+it on cleanup. This is done by calling i2c_unregister_device() on the
+pointer that was earlier returned by i2c_new_device() or
+i2c_new_probed_device().
+
+
+Method 3: Probe an I2C bus for certain devices
+----------------------------------------------
+
+Sometimes you do not have enough information about an I2C device, not even
+to call i2c_new_probed_device(). The typical case is hardware monitoring
+chips on PC mainboards. There are several dozen models, which can live
+at 25 different addresses. Given the huge number of mainboards out there,
+it is next to impossible to build an exhaustive list of the hardware
+monitoring chips being used. Fortunately, most of these chips have
+manufacturer and device ID registers, so they can be identified by
+probing.
+
+In that case, I2C devices are neither declared nor instantiated
+explicitly. Instead, i2c-core will probe for such devices as soon as their
+drivers are loaded, and if any is found, an I2C device will be
+instantiated automatically. In order to prevent any misbehavior of this
+mechanism, the following restrictions apply:
+* The I2C device driver must implement the detect() method, which
+ identifies a supported device by reading from arbitrary registers.
+* Only buses which are likely to have a supported device and agree to be
+ probed, will be probed. For example this avoids probing for hardware
+ monitoring chips on a TV adapter.
+
+Example:
+See lm90_driver and lm90_detect() in drivers/hwmon/lm90.c
+
+I2C devices instantiated as a result of such a successful probe will be
+destroyed automatically when the driver which detected them is removed,
+or when the underlying I2C bus is itself destroyed, whichever happens
+first.
+
+Those of you familiar with the i2c subsystem of 2.4 kernels and early 2.6
+kernels will find out that this method 3 is essentially similar to what
+was done there. Two significant differences are:
+* Probing is only one way to instantiate I2C devices now, while it was the
+ only way back then. Where possible, methods 1 and 2 should be preferred.
+ Method 3 should only be used when there is no other way, as it can have
+ undesirable side effects.
+* I2C buses must now explicitly say which I2C driver classes can probe
+ them (by the means of the class bitfield), while all I2C buses were
+ probed by default back then. The default is an empty class which means
+ that no probing happens. The purpose of the class bitfield is to limit
+ the aforementioned undesirable side effects.
+
+Once again, method 3 should be avoided wherever possible. Explicit device
+instantiation (methods 1 and 2) is much preferred for it is safer and
+faster.
+
+
+Method 4: Instantiate from user-space
+-------------------------------------
+
+In general, the kernel should know which I2C devices are connected and
+what addresses they live at. However, in certain cases, it does not, so a
+sysfs interface was added to let the user provide the information. This
+interface is made of 2 attribute files which are created in every I2C bus
+directory: new_device and delete_device. Both files are write only and you
+must write the right parameters to them in order to properly instantiate,
+respectively delete, an I2C device.
+
+File new_device takes 2 parameters: the name of the I2C device (a string)
+and the address of the I2C device (a number, typically expressed in
+hexadecimal starting with 0x, but can also be expressed in decimal.)
+
+File delete_device takes a single parameter: the address of the I2C
+device. As no two devices can live at the same address on a given I2C
+segment, the address is sufficient to uniquely identify the device to be
+deleted.
+
+Example:
+# echo eeprom 0x50 > /sys/bus/i2c/devices/i2c-3/new_device
+
+While this interface should only be used when in-kernel device declaration
+can't be done, there is a variety of cases where it can be helpful:
+* The I2C driver usually detects devices (method 3 above) but the bus
+ segment your device lives on doesn't have the proper class bit set and
+ thus detection doesn't trigger.
+* The I2C driver usually detects devices, but your device lives at an
+ unexpected address.
+* The I2C driver usually detects devices, but your device is not detected,
+ either because the detection routine is too strict, or because your
+ device is not officially supported yet but you know it is compatible.
+* You are developing a driver on a test board, where you soldered the I2C
+ device yourself.
+
+This interface is a replacement for the force_* module parameters some I2C
+drivers implement. Being implemented in i2c-core rather than in each
+device driver individually, it is much more efficient, and also has the
+advantage that you do not have to reload the driver to change a setting.
+You can also instantiate the device before the driver is loaded or even
+available, and you don't need to know what driver the device needs.
diff --git a/Documentation/i2c/muxes/i2c-mux-gpio b/Documentation/i2c/muxes/i2c-mux-gpio
new file mode 100644
index 000000000..d4d91a53f
--- /dev/null
+++ b/Documentation/i2c/muxes/i2c-mux-gpio
@@ -0,0 +1,83 @@
+Kernel driver i2c-gpio-mux
+
+Author: Peter Korsgaard <peter.korsgaard@barco.com>
+
+Description
+-----------
+
+i2c-gpio-mux is an i2c mux driver providing access to I2C bus segments
+from a master I2C bus and a hardware MUX controlled through GPIO pins.
+
+E.G.:
+
+ ---------- ---------- Bus segment 1 - - - - -
+ | | SCL/SDA | |-------------- | |
+ | |------------| |
+ | | | | Bus segment 2 | |
+ | Linux | GPIO 1..N | MUX |--------------- Devices
+ | |------------| | | |
+ | | | | Bus segment M
+ | | | |---------------| |
+ ---------- ---------- - - - - -
+
+SCL/SDA of the master I2C bus is multiplexed to bus segment 1..M
+according to the settings of the GPIO pins 1..N.
+
+Usage
+-----
+
+i2c-gpio-mux uses the platform bus, so you need to provide a struct
+platform_device with the platform_data pointing to a struct
+gpio_i2cmux_platform_data with the I2C adapter number of the master
+bus, the number of bus segments to create and the GPIO pins used
+to control it. See include/linux/i2c-gpio-mux.h for details.
+
+E.G. something like this for a MUX providing 4 bus segments
+controlled through 3 GPIO pins:
+
+#include <linux/i2c-gpio-mux.h>
+#include <linux/platform_device.h>
+
+static const unsigned myboard_gpiomux_gpios[] = {
+ AT91_PIN_PC26, AT91_PIN_PC25, AT91_PIN_PC24
+};
+
+static const unsigned myboard_gpiomux_values[] = {
+ 0, 1, 2, 3
+};
+
+static struct gpio_i2cmux_platform_data myboard_i2cmux_data = {
+ .parent = 1,
+ .base_nr = 2, /* optional */
+ .values = myboard_gpiomux_values,
+ .n_values = ARRAY_SIZE(myboard_gpiomux_values),
+ .gpios = myboard_gpiomux_gpios,
+ .n_gpios = ARRAY_SIZE(myboard_gpiomux_gpios),
+ .idle = 4, /* optional */
+};
+
+static struct platform_device myboard_i2cmux = {
+ .name = "i2c-gpio-mux",
+ .id = 0,
+ .dev = {
+ .platform_data = &myboard_i2cmux_data,
+ },
+};
+
+If you don't know the absolute GPIO pin numbers at registration time,
+you can instead provide a chip name (.chip_name) and relative GPIO pin
+numbers, and the i2c-gpio-mux driver will do the work for you,
+including deferred probing if the GPIO chip isn't immediately
+available.
+
+Device Registration
+-------------------
+
+When registering your i2c-gpio-mux device, you should pass the number
+of any GPIO pin it uses as the device ID. This guarantees that every
+instance has a different ID.
+
+Alternatively, if you don't need a stable device name, you can simply
+pass PLATFORM_DEVID_AUTO as the device ID, and the platform core will
+assign a dynamic ID to your device. If you do not know the absolute
+GPIO pin numbers at registration time, this is even the only option.
diff --git a/Documentation/i2c/old-module-parameters b/Documentation/i2c/old-module-parameters
new file mode 100644
index 000000000..8e2b629d5
--- /dev/null
+++ b/Documentation/i2c/old-module-parameters
@@ -0,0 +1,44 @@
+I2C device driver binding control from user-space
+=================================================
+
+Up to kernel 2.6.32, many i2c drivers used helper macros provided by
+<linux/i2c.h> which created standard module parameters to let the user
+control how the driver would probe i2c buses and attach to devices. These
+parameters were known as "probe" (to let the driver probe for an extra
+address), "force" (to forcibly attach the driver to a given device) and
+"ignore" (to prevent a driver from probing a given address).
+
+With the conversion of the i2c subsystem to the standard device driver
+binding model, it became clear that these per-module parameters were no
+longer needed, and that a centralized implementation was possible. The new,
+sysfs-based interface is described in the documentation file
+"instantiating-devices", section "Method 4: Instantiate from user-space".
+
+Below is a mapping from the old module parameters to the new interface.
+
+Attaching a driver to an I2C device
+-----------------------------------
+
+Old method (module parameters):
+# modprobe <driver> probe=1,0x2d
+# modprobe <driver> force=1,0x2d
+# modprobe <driver> force_<device>=1,0x2d
+
+New method (sysfs interface):
+# echo <device> 0x2d > /sys/bus/i2c/devices/i2c-1/new_device
+
+Preventing a driver from attaching to an I2C device
+---------------------------------------------------
+
+Old method (module parameters):
+# modprobe <driver> ignore=1,0x2f
+
+New method (sysfs interface):
+# echo dummy 0x2f > /sys/bus/i2c/devices/i2c-1/new_device
+# modprobe <driver>
+
+Of course, it is important to instantiate the "dummy" device before loading
+the driver. The dummy device will be handled by i2c-core itself, preventing
+other drivers from binding to it later on. If there is a real device at the
+problematic address, and you want another driver to bind to it, then simply
+pass the name of the device in question instead of "dummy".
diff --git a/Documentation/i2c/slave-eeprom-backend b/Documentation/i2c/slave-eeprom-backend
new file mode 100644
index 000000000..c8444ef82
--- /dev/null
+++ b/Documentation/i2c/slave-eeprom-backend
@@ -0,0 +1,14 @@
+Linux I2C slave eeprom backend
+==============================
+
+by Wolfram Sang <wsa@sang-engineering.com> in 2014-15
+
+This is a proof-of-concept backend which acts like an EEPROM on the connected
+I2C bus. The memory contents can be modified from userspace via this file
+located in sysfs:
+
+ /sys/bus/i2c/devices/<device-direcory>/slave-eeprom
+
+As of 2015, Linux doesn't support poll on binary sysfs files, so there is no
+notfication when another master changed the content.
+
diff --git a/Documentation/i2c/slave-interface b/Documentation/i2c/slave-interface
new file mode 100644
index 000000000..b228ca54b
--- /dev/null
+++ b/Documentation/i2c/slave-interface
@@ -0,0 +1,179 @@
+Linux I2C slave interface description
+=====================================
+
+by Wolfram Sang <wsa@sang-engineering.com> in 2014-15
+
+Linux can also be an I2C slave in case I2C controllers have slave support.
+Besides this HW requirement, one also needs a software backend providing the
+actual functionality. An example for this is the slave-eeprom driver, which
+acts as a dual memory driver. While another I2C master on the bus can access it
+like a regular EEPROM, the Linux I2C slave can access the content via sysfs and
+retrieve/provide information as needed. The software backend driver and the I2C
+bus driver communicate via events. Here is a small graph visualizing the data
+flow and the means by which data is transported. The dotted line marks only one
+example. The backend could also use e.g. a character device, be in-kernel
+only, or something completely different:
+
+
+ e.g. sysfs I2C slave events I/O registers
+ +-----------+ v +---------+ v +--------+ v +------------+
+ | Userspace +........+ Backend +-----------+ Driver +-----+ Controller |
+ +-----------+ +---------+ +--------+ +------------+
+ | |
+ ----------------------------------------------------------------+-- I2C
+ --------------------------------------------------------------+---- Bus
+
+Note: Technically, there is also the I2C core between the backend and the
+driver. However, at this time of writing, the layer is transparent.
+
+
+User manual
+===========
+
+I2C slave backends behave like standard I2C clients. So, you can instantiate
+them as described in the document 'instantiating-devices'. A quick example for
+instantiating the slave-eeprom driver from userspace at address 0x64 on bus 1:
+
+ # echo slave-24c02 0x64 > /sys/bus/i2c/devices/i2c-1/new_device
+
+Each backend should come with separate documentation to describe its specific
+behaviour and setup.
+
+
+Developer manual
+================
+
+I2C slave events
+----------------
+
+The bus driver sends an event to the backend using the following function:
+
+ ret = i2c_slave_event(client, event, &val)
+
+'client' describes the i2c slave device. 'event' is one of the special event
+types described hereafter. 'val' holds an u8 value for the data byte to be
+read/written and is thus bidirectional. The pointer to val must always be
+provided even if val is not used for an event, i.e. don't use NULL here. 'ret'
+is the return value from the backend. Mandatory events must be provided by the
+bus drivers and must be checked for by backend drivers.
+
+Event types:
+
+* I2C_SLAVE_WRITE_REQUESTED (mandatory)
+
+'val': unused
+'ret': always 0
+
+Another I2C master wants to write data to us. This event should be sent once
+our own address and the write bit was detected. The data did not arrive yet, so
+there is nothing to process or return. Wakeup or initialization probably needs
+to be done, though.
+
+* I2C_SLAVE_READ_REQUESTED (mandatory)
+
+'val': backend returns first byte to be sent
+'ret': always 0
+
+Another I2C master wants to read data from us. This event should be sent once
+our own address and the read bit was detected. After returning, the bus driver
+should transmit the first byte.
+
+* I2C_SLAVE_WRITE_RECEIVED (mandatory)
+
+'val': bus driver delivers received byte
+'ret': 0 if the byte should be acked, some errno if the byte should be nacked
+
+Another I2C master has sent a byte to us which needs to be set in 'val'. If 'ret'
+is zero, the bus driver should ack this byte. If 'ret' is an errno, then the byte
+should be nacked.
+
+* I2C_SLAVE_READ_PROCESSED (mandatory)
+
+'val': backend returns next byte to be sent
+'ret': always 0
+
+The bus driver requests the next byte to be sent to another I2C master in
+'val'. Important: This does not mean that the previous byte has been acked, it
+only means that the previous byte is shifted out to the bus! To ensure seamless
+transmission, most hardware requests the next byte when the previous one is
+still shifted out. If the master sends NACK and stops reading after the byte
+currently shifted out, this byte requested here is never used. It very likely
+needs to be sent again on the next I2C_SLAVE_READ_REQUEST, depending a bit on
+your backend, though.
+
+* I2C_SLAVE_STOP (mandatory)
+
+'val': unused
+'ret': always 0
+
+A stop condition was received. This can happen anytime and the backend should
+reset its state machine for I2C transfers to be able to receive new requests.
+
+
+Software backends
+-----------------
+
+If you want to write a software backend:
+
+* use a standard i2c_driver and its matching mechanisms
+* write the slave_callback which handles the above slave events
+ (best using a state machine)
+* register this callback via i2c_slave_register()
+
+Check the i2c-slave-eeprom driver as an example.
+
+
+Bus driver support
+------------------
+
+If you want to add slave support to the bus driver:
+
+* implement calls to register/unregister the slave and add those to the
+ struct i2c_algorithm. When registering, you probably need to set the i2c
+ slave address and enable slave specific interrupts. If you use runtime pm, you
+ should use pm_runtime_forbid() because your device usually needs to be powered
+ on always to be able to detect its slave address. When unregistering, do the
+ inverse of the above.
+
+* Catch the slave interrupts and send appropriate i2c_slave_events to the backend.
+
+Check the i2c-rcar driver as an example.
+
+
+About ACK/NACK
+--------------
+
+It is good behaviour to always ACK the address phase, so the master knows if a
+device is basically present or if it mysteriously disappeared. Using NACK to
+state being busy is troublesome. SMBus demands to always ACK the address phase,
+while the I2C specification is more loose on that. Most I2C controllers also
+automatically ACK when detecting their slave addresses, so there is no option
+to NACK them. For those reasons, this API does not support NACK in the address
+phase.
+
+Currently, there is no slave event to report if the master did ACK or NACK a
+byte when it reads from us. We could make this an optional event if the need
+arises. However, cases should be extremely rare because the master is expected
+to send STOP after that and we have an event for that. Also, keep in mind not
+all I2C controllers have the possibility to report that event.
+
+
+About buffers
+-------------
+
+During development of this API, the question of using buffers instead of just
+bytes came up. Such an extension might be possible, usefulness is unclear at
+this time of writing. Some points to keep in mind when using buffers:
+
+* Buffers should be opt-in and slave drivers will always have to support
+ byte-based transactions as the ultimate fallback because this is how the
+ majority of HW works.
+
+* For backends simulating hardware registers, buffers are not helpful because
+ on writes an action should be immediately triggered. For reads, the data in
+ the buffer might get stale.
+
+* A master can send STOP at any time. For partially transferred buffers, this
+ means additional code to handle this exception. Such code tends to be
+ error-prone.
+
diff --git a/Documentation/i2c/smbus-protocol b/Documentation/i2c/smbus-protocol
new file mode 100644
index 000000000..6012b12b3
--- /dev/null
+++ b/Documentation/i2c/smbus-protocol
@@ -0,0 +1,273 @@
+SMBus Protocol Summary
+======================
+
+The following is a summary of the SMBus protocol. It applies to
+all revisions of the protocol (1.0, 1.1, and 2.0).
+Certain protocol features which are not supported by
+this package are briefly described at the end of this document.
+
+Some adapters understand only the SMBus (System Management Bus) protocol,
+which is a subset from the I2C protocol. Fortunately, many devices use
+only the same subset, which makes it possible to put them on an SMBus.
+
+If you write a driver for some I2C device, please try to use the SMBus
+commands if at all possible (if the device uses only that subset of the
+I2C protocol). This makes it possible to use the device driver on both
+SMBus adapters and I2C adapters (the SMBus command set is automatically
+translated to I2C on I2C adapters, but plain I2C commands can not be
+handled at all on most pure SMBus adapters).
+
+Below is a list of SMBus protocol operations, and the functions executing
+them. Note that the names used in the SMBus protocol specifications usually
+don't match these function names. For some of the operations which pass a
+single data byte, the functions using SMBus protocol operation names execute
+a different protocol operation entirely.
+
+Each transaction type corresponds to a functionality flag. Before calling a
+transaction function, a device driver should always check (just once) for
+the corresponding functionality flag to ensure that the underlying I2C
+adapter supports the transaction in question. See
+<file:Documentation/i2c/functionality> for the details.
+
+
+Key to symbols
+==============
+
+S (1 bit) : Start bit
+P (1 bit) : Stop bit
+Rd/Wr (1 bit) : Read/Write bit. Rd equals 1, Wr equals 0.
+A, NA (1 bit) : Accept and reverse accept bit.
+Addr (7 bits): I2C 7 bit address. Note that this can be expanded as usual to
+ get a 10 bit I2C address.
+Comm (8 bits): Command byte, a data byte which often selects a register on
+ the device.
+Data (8 bits): A plain data byte. Sometimes, I write DataLow, DataHigh
+ for 16 bit data.
+Count (8 bits): A data byte containing the length of a block operation.
+
+[..]: Data sent by I2C device, as opposed to data sent by the host adapter.
+
+
+SMBus Quick Command
+===================
+
+This sends a single bit to the device, at the place of the Rd/Wr bit.
+
+A Addr Rd/Wr [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_QUICK
+
+
+SMBus Receive Byte: i2c_smbus_read_byte()
+==========================================
+
+This reads a single byte from a device, without specifying a device
+register. Some devices are so simple that this interface is enough; for
+others, it is a shorthand if you want to read the same register as in
+the previous SMBus command.
+
+S Addr Rd [A] [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_BYTE
+
+
+SMBus Send Byte: i2c_smbus_write_byte()
+========================================
+
+This operation is the reverse of Receive Byte: it sends a single byte
+to a device. See Receive Byte for more information.
+
+S Addr Wr [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE
+
+
+SMBus Read Byte: i2c_smbus_read_byte_data()
+============================================
+
+This reads a single byte from a device, from a designated register.
+The register is specified through the Comm byte.
+
+S Addr Wr [A] Comm [A] S Addr Rd [A] [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_BYTE_DATA
+
+
+SMBus Read Word: i2c_smbus_read_word_data()
+============================================
+
+This operation is very like Read Byte; again, data is read from a
+device, from a designated register that is specified through the Comm
+byte. But this time, the data is a complete word (16 bits).
+
+S Addr Wr [A] Comm [A] S Addr Rd [A] [DataLow] A [DataHigh] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_WORD_DATA
+
+Note the convenience function i2c_smbus_read_word_swapped is
+available for reads where the two data bytes are the other way
+around (not SMBus compliant, but very popular.)
+
+
+SMBus Write Byte: i2c_smbus_write_byte_data()
+==============================================
+
+This writes a single byte to a device, to a designated register. The
+register is specified through the Comm byte. This is the opposite of
+the Read Byte operation.
+
+S Addr Wr [A] Comm [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BYTE_DATA
+
+
+SMBus Write Word: i2c_smbus_write_word_data()
+==============================================
+
+This is the opposite of the Read Word operation. 16 bits
+of data is written to a device, to the designated register that is
+specified through the Comm byte.
+
+S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_WORD_DATA
+
+Note the convenience function i2c_smbus_write_word_swapped is
+available for writes where the two data bytes are the other way
+around (not SMBus compliant, but very popular.)
+
+
+SMBus Process Call:
+===================
+
+This command selects a device register (through the Comm byte), sends
+16 bits of data to it, and reads 16 bits of data in return.
+
+S Addr Wr [A] Comm [A] DataLow [A] DataHigh [A]
+ S Addr Rd [A] [DataLow] A [DataHigh] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_PROC_CALL
+
+
+SMBus Block Read: i2c_smbus_read_block_data()
+==============================================
+
+This command reads a block of up to 32 bytes from a device, from a
+designated register that is specified through the Comm byte. The amount
+of data is specified by the device in the Count byte.
+
+S Addr Wr [A] Comm [A]
+ S Addr Rd [A] [Count] A [Data] A [Data] A ... A [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_BLOCK_DATA
+
+
+SMBus Block Write: i2c_smbus_write_block_data()
+================================================
+
+The opposite of the Block Read command, this writes up to 32 bytes to
+a device, to a designated register that is specified through the
+Comm byte. The amount of data is specified in the Count byte.
+
+S Addr Wr [A] Comm [A] Count [A] Data [A] Data [A] ... [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_BLOCK_DATA
+
+
+SMBus Block Write - Block Read Process Call
+===========================================
+
+SMBus Block Write - Block Read Process Call was introduced in
+Revision 2.0 of the specification.
+
+This command selects a device register (through the Comm byte), sends
+1 to 31 bytes of data to it, and reads 1 to 31 bytes of data in return.
+
+S Addr Wr [A] Comm [A] Count [A] Data [A] ...
+ S Addr Rd [A] [Count] A [Data] ... A P
+
+Functionality flag: I2C_FUNC_SMBUS_BLOCK_PROC_CALL
+
+
+SMBus Host Notify
+=================
+
+This command is sent from a SMBus device acting as a master to the
+SMBus host acting as a slave.
+It is the same form as Write Word, with the command code replaced by the
+alerting device's address.
+
+[S] [HostAddr] [Wr] A [DevAddr] A [DataLow] A [DataHigh] A [P]
+
+
+Packet Error Checking (PEC)
+===========================
+
+Packet Error Checking was introduced in Revision 1.1 of the specification.
+
+PEC adds a CRC-8 error-checking byte to transfers using it, immediately
+before the terminating STOP.
+
+
+Address Resolution Protocol (ARP)
+=================================
+
+The Address Resolution Protocol was introduced in Revision 2.0 of
+the specification. It is a higher-layer protocol which uses the
+messages above.
+
+ARP adds device enumeration and dynamic address assignment to
+the protocol. All ARP communications use slave address 0x61 and
+require PEC checksums.
+
+
+SMBus Alert
+===========
+
+SMBus Alert was introduced in Revision 1.0 of the specification.
+
+The SMBus alert protocol allows several SMBus slave devices to share a
+single interrupt pin on the SMBus master, while still allowing the master
+to know which slave triggered the interrupt.
+
+This is implemented the following way in the Linux kernel:
+* I2C bus drivers which support SMBus alert should call
+ i2c_setup_smbus_alert() to setup SMBus alert support.
+* I2C drivers for devices which can trigger SMBus alerts should implement
+ the optional alert() callback.
+
+
+I2C Block Transactions
+======================
+
+The following I2C block transactions are supported by the
+SMBus layer and are described here for completeness.
+They are *NOT* defined by the SMBus specification.
+
+I2C block transactions do not limit the number of bytes transferred
+but the SMBus layer places a limit of 32 bytes.
+
+
+I2C Block Read: i2c_smbus_read_i2c_block_data()
+================================================
+
+This command reads a block of bytes from a device, from a
+designated register that is specified through the Comm byte.
+
+S Addr Wr [A] Comm [A]
+ S Addr Rd [A] [Data] A [Data] A ... A [Data] NA P
+
+Functionality flag: I2C_FUNC_SMBUS_READ_I2C_BLOCK
+
+
+I2C Block Write: i2c_smbus_write_i2c_block_data()
+==================================================
+
+The opposite of the Block Read command, this writes bytes to
+a device, to a designated register that is specified through the
+Comm byte. Note that command lengths of 0, 2, or more bytes are
+supported as they are indistinguishable from data.
+
+S Addr Wr [A] Comm [A] Data [A] Data [A] ... [A] Data [A] P
+
+Functionality flag: I2C_FUNC_SMBUS_WRITE_I2C_BLOCK
diff --git a/Documentation/i2c/summary b/Documentation/i2c/summary
new file mode 100644
index 000000000..809541ab3
--- /dev/null
+++ b/Documentation/i2c/summary
@@ -0,0 +1,43 @@
+I2C and SMBus
+=============
+
+I2C (pronounce: I squared C) is a protocol developed by Philips. It is a
+slow two-wire protocol (variable speed, up to 400 kHz), with a high speed
+extension (3.4 MHz). It provides an inexpensive bus for connecting many
+types of devices with infrequent or low bandwidth communications needs.
+I2C is widely used with embedded systems. Some systems use variants that
+don't meet branding requirements, and so are not advertised as being I2C.
+
+SMBus (System Management Bus) is based on the I2C protocol, and is mostly
+a subset of I2C protocols and signaling. Many I2C devices will work on an
+SMBus, but some SMBus protocols add semantics beyond what is required to
+achieve I2C branding. Modern PC mainboards rely on SMBus. The most common
+devices connected through SMBus are RAM modules configured using I2C EEPROMs,
+and hardware monitoring chips.
+
+Because the SMBus is mostly a subset of the generalized I2C bus, we can
+use its protocols on many I2C systems. However, there are systems that don't
+meet both SMBus and I2C electrical constraints; and others which can't
+implement all the common SMBus protocol semantics or messages.
+
+
+Terminology
+===========
+
+When we talk about I2C, we use the following terms:
+ Bus -> Algorithm
+ Adapter
+ Device -> Driver
+ Client
+
+An Algorithm driver contains general code that can be used for a whole class
+of I2C adapters. Each specific adapter driver either depends on one algorithm
+driver, or includes its own implementation.
+
+A Driver driver (yes, this sounds ridiculous, sorry) contains the general
+code to access some type of device. Each detected device gets its own
+data in the Client structure. Usually, Driver and Client are more closely
+integrated than Algorithm and Adapter.
+
+For a given configuration, you will need a driver for your I2C bus, and
+drivers for your I2C devices (usually one driver for each device).
diff --git a/Documentation/i2c/ten-bit-addresses b/Documentation/i2c/ten-bit-addresses
new file mode 100644
index 000000000..cdfe13901
--- /dev/null
+++ b/Documentation/i2c/ten-bit-addresses
@@ -0,0 +1,24 @@
+The I2C protocol knows about two kinds of device addresses: normal 7 bit
+addresses, and an extended set of 10 bit addresses. The sets of addresses
+do not intersect: the 7 bit address 0x10 is not the same as the 10 bit
+address 0x10 (though a single device could respond to both of them).
+
+I2C messages to and from 10-bit address devices have a different format.
+See the I2C specification for the details.
+
+The current 10 bit address support is minimal. It should work, however
+you can expect some problems along the way:
+* Not all bus drivers support 10-bit addresses. Some don't because the
+ hardware doesn't support them (SMBus doesn't require 10-bit address
+ support for example), some don't because nobody bothered adding the
+ code (or it's there but not working properly.) Software implementation
+ (i2c-algo-bit) is known to work.
+* Some optional features do not support 10-bit addresses. This is the
+ case of automatic detection and instantiation of devices by their,
+ drivers, for example.
+* Many user-space packages (for example i2c-tools) lack support for
+ 10-bit addresses.
+
+Note that 10-bit address devices are still pretty rare, so the limitations
+listed above could stay for a long time, maybe even forever if nobody
+needs them to be fixed.
diff --git a/Documentation/i2c/upgrading-clients b/Documentation/i2c/upgrading-clients
new file mode 100644
index 000000000..ccba3ffd6
--- /dev/null
+++ b/Documentation/i2c/upgrading-clients
@@ -0,0 +1,279 @@
+Upgrading I2C Drivers to the new 2.6 Driver Model
+=================================================
+
+Ben Dooks <ben-linux@fluff.org>
+
+Introduction
+------------
+
+This guide outlines how to alter existing Linux 2.6 client drivers from
+the old to the new new binding methods.
+
+
+Example old-style driver
+------------------------
+
+
+struct example_state {
+ struct i2c_client client;
+ ....
+};
+
+static struct i2c_driver example_driver;
+
+static unsigned short ignore[] = { I2C_CLIENT_END };
+static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END };
+
+I2C_CLIENT_INSMOD;
+
+static int example_attach(struct i2c_adapter *adap, int addr, int kind)
+{
+ struct example_state *state;
+ struct device *dev = &adap->dev; /* to use for dev_ reports */
+ int ret;
+
+ state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
+ if (state == NULL) {
+ dev_err(dev, "failed to create our state\n");
+ return -ENOMEM;
+ }
+
+ example->client.addr = addr;
+ example->client.flags = 0;
+ example->client.adapter = adap;
+
+ i2c_set_clientdata(&state->i2c_client, state);
+ strlcpy(client->i2c_client.name, "example", I2C_NAME_SIZE);
+
+ ret = i2c_attach_client(&state->i2c_client);
+ if (ret < 0) {
+ dev_err(dev, "failed to attach client\n");
+ kfree(state);
+ return ret;
+ }
+
+ dev = &state->i2c_client.dev;
+
+ /* rest of the initialisation goes here. */
+
+ dev_info(dev, "example client created\n");
+
+ return 0;
+}
+
+static int example_detach(struct i2c_client *client)
+{
+ struct example_state *state = i2c_get_clientdata(client);
+
+ i2c_detach_client(client);
+ kfree(state);
+ return 0;
+}
+
+static int example_attach_adapter(struct i2c_adapter *adap)
+{
+ return i2c_probe(adap, &addr_data, example_attach);
+}
+
+static struct i2c_driver example_driver = {
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "example",
+ .pm = &example_pm_ops,
+ },
+ .attach_adapter = example_attach_adapter,
+ .detach_client = example_detach,
+};
+
+
+Updating the client
+-------------------
+
+The new style binding model will check against a list of supported
+devices and their associated address supplied by the code registering
+the busses. This means that the driver .attach_adapter and
+.detach_client methods can be removed, along with the addr_data,
+as follows:
+
+- static struct i2c_driver example_driver;
+
+- static unsigned short ignore[] = { I2C_CLIENT_END };
+- static unsigned short normal_addr[] = { OUR_ADDR, I2C_CLIENT_END };
+
+- I2C_CLIENT_INSMOD;
+
+- static int example_attach_adapter(struct i2c_adapter *adap)
+- {
+- return i2c_probe(adap, &addr_data, example_attach);
+- }
+
+ static struct i2c_driver example_driver = {
+- .attach_adapter = example_attach_adapter,
+- .detach_client = example_detach,
+ }
+
+Add the probe and remove methods to the i2c_driver, as so:
+
+ static struct i2c_driver example_driver = {
++ .probe = example_probe,
++ .remove = example_remove,
+ }
+
+Change the example_attach method to accept the new parameters
+which include the i2c_client that it will be working with:
+
+- static int example_attach(struct i2c_adapter *adap, int addr, int kind)
++ static int example_probe(struct i2c_client *client,
++ const struct i2c_device_id *id)
+
+Change the name of example_attach to example_probe to align it with the
+i2c_driver entry names. The rest of the probe routine will now need to be
+changed as the i2c_client has already been setup for use.
+
+The necessary client fields have already been setup before
+the probe function is called, so the following client setup
+can be removed:
+
+- example->client.addr = addr;
+- example->client.flags = 0;
+- example->client.adapter = adap;
+-
+- strlcpy(client->i2c_client.name, "example", I2C_NAME_SIZE);
+
+The i2c_set_clientdata is now:
+
+- i2c_set_clientdata(&state->client, state);
++ i2c_set_clientdata(client, state);
+
+The call to i2c_attach_client is no longer needed, if the probe
+routine exits successfully, then the driver will be automatically
+attached by the core. Change the probe routine as so:
+
+- ret = i2c_attach_client(&state->i2c_client);
+- if (ret < 0) {
+- dev_err(dev, "failed to attach client\n");
+- kfree(state);
+- return ret;
+- }
+
+
+Remove the storage of 'struct i2c_client' from the 'struct example_state'
+as we are provided with the i2c_client in our example_probe. Instead we
+store a pointer to it for when it is needed.
+
+struct example_state {
+- struct i2c_client client;
++ struct i2c_client *client;
+
+the new i2c client as so:
+
+- struct device *dev = &adap->dev; /* to use for dev_ reports */
++ struct device *dev = &i2c_client->dev; /* to use for dev_ reports */
+
+And remove the change after our client is attached, as the driver no
+longer needs to register a new client structure with the core:
+
+- dev = &state->i2c_client.dev;
+
+In the probe routine, ensure that the new state has the client stored
+in it:
+
+static int example_probe(struct i2c_client *i2c_client,
+ const struct i2c_device_id *id)
+{
+ struct example_state *state;
+ struct device *dev = &i2c_client->dev;
+ int ret;
+
+ state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
+ if (state == NULL) {
+ dev_err(dev, "failed to create our state\n");
+ return -ENOMEM;
+ }
+
++ state->client = i2c_client;
+
+Update the detach method, by changing the name to _remove and
+to delete the i2c_detach_client call. It is possible that you
+can also remove the ret variable as it is not needed for any
+of the core functions.
+
+- static int example_detach(struct i2c_client *client)
++ static int example_remove(struct i2c_client *client)
+{
+ struct example_state *state = i2c_get_clientdata(client);
+
+- i2c_detach_client(client);
+
+And finally ensure that we have the correct ID table for the i2c-core
+and other utilities:
+
++ struct i2c_device_id example_idtable[] = {
++ { "example", 0 },
++ { }
++};
++
++MODULE_DEVICE_TABLE(i2c, example_idtable);
+
+static struct i2c_driver example_driver = {
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "example",
+ },
++ .id_table = example_ids,
+
+
+Our driver should now look like this:
+
+struct example_state {
+ struct i2c_client *client;
+ ....
+};
+
+static int example_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct example_state *state;
+ struct device *dev = &client->dev;
+
+ state = kzalloc(sizeof(struct example_state), GFP_KERNEL);
+ if (state == NULL) {
+ dev_err(dev, "failed to create our state\n");
+ return -ENOMEM;
+ }
+
+ state->client = client;
+ i2c_set_clientdata(client, state);
+
+ /* rest of the initialisation goes here. */
+
+ dev_info(dev, "example client created\n");
+
+ return 0;
+}
+
+static int example_remove(struct i2c_client *client)
+{
+ struct example_state *state = i2c_get_clientdata(client);
+
+ kfree(state);
+ return 0;
+}
+
+static struct i2c_device_id example_idtable[] = {
+ { "example", 0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, example_idtable);
+
+static struct i2c_driver example_driver = {
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "example",
+ .pm = &example_pm_ops,
+ },
+ .id_table = example_idtable,
+ .probe = example_probe,
+ .remove = example_remove,
+};
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
new file mode 100644
index 000000000..a755b141f
--- /dev/null
+++ b/Documentation/i2c/writing-clients
@@ -0,0 +1,403 @@
+This is a small guide for those who want to write kernel drivers for I2C
+or SMBus devices, using Linux as the protocol host/master (not slave).
+
+To set up a driver, you need to do several things. Some are optional, and
+some things can be done slightly or completely different. Use this as a
+guide, not as a rule book!
+
+
+General remarks
+===============
+
+Try to keep the kernel namespace as clean as possible. The best way to
+do this is to use a unique prefix for all global symbols. This is
+especially important for exported symbols, but it is a good idea to do
+it for non-exported symbols too. We will use the prefix `foo_' in this
+tutorial.
+
+
+The driver structure
+====================
+
+Usually, you will implement a single driver structure, and instantiate
+all clients from it. Remember, a driver structure contains general access
+routines, and should be zero-initialized except for fields with data you
+provide. A client structure holds device-specific information like the
+driver model device node, and its I2C address.
+
+static struct i2c_device_id foo_idtable[] = {
+ { "foo", my_id_for_foo },
+ { "bar", my_id_for_bar },
+ { }
+};
+
+MODULE_DEVICE_TABLE(i2c, foo_idtable);
+
+static struct i2c_driver foo_driver = {
+ .driver = {
+ .name = "foo",
+ .pm = &foo_pm_ops, /* optional */
+ },
+
+ .id_table = foo_idtable,
+ .probe = foo_probe,
+ .remove = foo_remove,
+ /* if device autodetection is needed: */
+ .class = I2C_CLASS_SOMETHING,
+ .detect = foo_detect,
+ .address_list = normal_i2c,
+
+ .shutdown = foo_shutdown, /* optional */
+ .command = foo_command, /* optional, deprecated */
+}
+
+The name field is the driver name, and must not contain spaces. It
+should match the module name (if the driver can be compiled as a module),
+although you can use MODULE_ALIAS (passing "foo" in this example) to add
+another name for the module. If the driver name doesn't match the module
+name, the module won't be automatically loaded (hotplug/coldplug).
+
+All other fields are for call-back functions which will be explained
+below.
+
+
+Extra client data
+=================
+
+Each client structure has a special `data' field that can point to any
+structure at all. You should use this to keep device-specific data.
+
+ /* store the value */
+ void i2c_set_clientdata(struct i2c_client *client, void *data);
+
+ /* retrieve the value */
+ void *i2c_get_clientdata(const struct i2c_client *client);
+
+Note that starting with kernel 2.6.34, you don't have to set the `data' field
+to NULL in remove() or if probe() failed anymore. The i2c-core does this
+automatically on these occasions. Those are also the only times the core will
+touch this field.
+
+
+Accessing the client
+====================
+
+Let's say we have a valid client structure. At some time, we will need
+to gather information from the client, or write new information to the
+client.
+
+I have found it useful to define foo_read and foo_write functions for this.
+For some cases, it will be easier to call the i2c functions directly,
+but many chips have some kind of register-value idea that can easily
+be encapsulated.
+
+The below functions are simple examples, and should not be copied
+literally.
+
+int foo_read_value(struct i2c_client *client, u8 reg)
+{
+ if (reg < 0x10) /* byte-sized register */
+ return i2c_smbus_read_byte_data(client, reg);
+ else /* word-sized register */
+ return i2c_smbus_read_word_data(client, reg);
+}
+
+int foo_write_value(struct i2c_client *client, u8 reg, u16 value)
+{
+ if (reg == 0x10) /* Impossible to write - driver error! */
+ return -EINVAL;
+ else if (reg < 0x10) /* byte-sized register */
+ return i2c_smbus_write_byte_data(client, reg, value);
+ else /* word-sized register */
+ return i2c_smbus_write_word_data(client, reg, value);
+}
+
+
+Probing and attaching
+=====================
+
+The Linux I2C stack was originally written to support access to hardware
+monitoring chips on PC motherboards, and thus used to embed some assumptions
+that were more appropriate to SMBus (and PCs) than to I2C. One of these
+assumptions was that most adapters and devices drivers support the SMBUS_QUICK
+protocol to probe device presence. Another was that devices and their drivers
+can be sufficiently configured using only such probe primitives.
+
+As Linux and its I2C stack became more widely used in embedded systems
+and complex components such as DVB adapters, those assumptions became more
+problematic. Drivers for I2C devices that issue interrupts need more (and
+different) configuration information, as do drivers handling chip variants
+that can't be distinguished by protocol probing, or which need some board
+specific information to operate correctly.
+
+
+Device/Driver Binding
+---------------------
+
+System infrastructure, typically board-specific initialization code or
+boot firmware, reports what I2C devices exist. For example, there may be
+a table, in the kernel or from the boot loader, identifying I2C devices
+and linking them to board-specific configuration information about IRQs
+and other wiring artifacts, chip type, and so on. That could be used to
+create i2c_client objects for each I2C device.
+
+I2C device drivers using this binding model work just like any other
+kind of driver in Linux: they provide a probe() method to bind to
+those devices, and a remove() method to unbind.
+
+ static int foo_probe(struct i2c_client *client,
+ const struct i2c_device_id *id);
+ static int foo_remove(struct i2c_client *client);
+
+Remember that the i2c_driver does not create those client handles. The
+handle may be used during foo_probe(). If foo_probe() reports success
+(zero not a negative status code) it may save the handle and use it until
+foo_remove() returns. That binding model is used by most Linux drivers.
+
+The probe function is called when an entry in the id_table name field
+matches the device's name. It is passed the entry that was matched so
+the driver knows which one in the table matched.
+
+
+Device Creation
+---------------
+
+If you know for a fact that an I2C device is connected to a given I2C bus,
+you can instantiate that device by simply filling an i2c_board_info
+structure with the device address and driver name, and calling
+i2c_new_device(). This will create the device, then the driver core will
+take care of finding the right driver and will call its probe() method.
+If a driver supports different device types, you can specify the type you
+want using the type field. You can also specify an IRQ and platform data
+if needed.
+
+Sometimes you know that a device is connected to a given I2C bus, but you
+don't know the exact address it uses. This happens on TV adapters for
+example, where the same driver supports dozens of slightly different
+models, and I2C device addresses change from one model to the next. In
+that case, you can use the i2c_new_probed_device() variant, which is
+similar to i2c_new_device(), except that it takes an additional list of
+possible I2C addresses to probe. A device is created for the first
+responsive address in the list. If you expect more than one device to be
+present in the address range, simply call i2c_new_probed_device() that
+many times.
+
+The call to i2c_new_device() or i2c_new_probed_device() typically happens
+in the I2C bus driver. You may want to save the returned i2c_client
+reference for later use.
+
+
+Device Detection
+----------------
+
+Sometimes you do not know in advance which I2C devices are connected to
+a given I2C bus. This is for example the case of hardware monitoring
+devices on a PC's SMBus. In that case, you may want to let your driver
+detect supported devices automatically. This is how the legacy model
+was working, and is now available as an extension to the standard
+driver model.
+
+You simply have to define a detect callback which will attempt to
+identify supported devices (returning 0 for supported ones and -ENODEV
+for unsupported ones), a list of addresses to probe, and a device type
+(or class) so that only I2C buses which may have that type of device
+connected (and not otherwise enumerated) will be probed. For example,
+a driver for a hardware monitoring chip for which auto-detection is
+needed would set its class to I2C_CLASS_HWMON, and only I2C adapters
+with a class including I2C_CLASS_HWMON would be probed by this driver.
+Note that the absence of matching classes does not prevent the use of
+a device of that type on the given I2C adapter. All it prevents is
+auto-detection; explicit instantiation of devices is still possible.
+
+Note that this mechanism is purely optional and not suitable for all
+devices. You need some reliable way to identify the supported devices
+(typically using device-specific, dedicated identification registers),
+otherwise misdetections are likely to occur and things can get wrong
+quickly. Keep in mind that the I2C protocol doesn't include any
+standard way to detect the presence of a chip at a given address, let
+alone a standard way to identify devices. Even worse is the lack of
+semantics associated to bus transfers, which means that the same
+transfer can be seen as a read operation by a chip and as a write
+operation by another chip. For these reasons, explicit device
+instantiation should always be preferred to auto-detection where
+possible.
+
+
+Device Deletion
+---------------
+
+Each I2C device which has been created using i2c_new_device() or
+i2c_new_probed_device() can be unregistered by calling
+i2c_unregister_device(). If you don't call it explicitly, it will be
+called automatically before the underlying I2C bus itself is removed, as a
+device can't survive its parent in the device driver model.
+
+
+Initializing the driver
+=======================
+
+When the kernel is booted, or when your foo driver module is inserted,
+you have to do some initializing. Fortunately, just registering the
+driver module is usually enough.
+
+static int __init foo_init(void)
+{
+ return i2c_add_driver(&foo_driver);
+}
+module_init(foo_init);
+
+static void __exit foo_cleanup(void)
+{
+ i2c_del_driver(&foo_driver);
+}
+module_exit(foo_cleanup);
+
+The module_i2c_driver() macro can be used to reduce above code.
+
+module_i2c_driver(foo_driver);
+
+Note that some functions are marked by `__init'. These functions can
+be removed after kernel booting (or module loading) is completed.
+Likewise, functions marked by `__exit' are dropped by the compiler when
+the code is built into the kernel, as they would never be called.
+
+
+Driver Information
+==================
+
+/* Substitute your own name and email address */
+MODULE_AUTHOR("Frodo Looijaard <frodol@dds.nl>"
+MODULE_DESCRIPTION("Driver for Barf Inc. Foo I2C devices");
+
+/* a few non-GPL license types are also allowed */
+MODULE_LICENSE("GPL");
+
+
+Power Management
+================
+
+If your I2C device needs special handling when entering a system low
+power state -- like putting a transceiver into a low power mode, or
+activating a system wakeup mechanism -- do that by implementing the
+appropriate callbacks for the dev_pm_ops of the driver (like suspend
+and resume).
+
+These are standard driver model calls, and they work just like they
+would for any other driver stack. The calls can sleep, and can use
+I2C messaging to the device being suspended or resumed (since their
+parent I2C adapter is active when these calls are issued, and IRQs
+are still enabled).
+
+
+System Shutdown
+===============
+
+If your I2C device needs special handling when the system shuts down
+or reboots (including kexec) -- like turning something off -- use a
+shutdown() method.
+
+Again, this is a standard driver model call, working just like it
+would for any other driver stack: the calls can sleep, and can use
+I2C messaging.
+
+
+Command function
+================
+
+A generic ioctl-like function call back is supported. You will seldom
+need this, and its use is deprecated anyway, so newer design should not
+use it.
+
+
+Sending and receiving
+=====================
+
+If you want to communicate with your device, there are several functions
+to do this. You can find all of them in <linux/i2c.h>.
+
+If you can choose between plain I2C communication and SMBus level
+communication, please use the latter. All adapters understand SMBus level
+commands, but only some of them understand plain I2C!
+
+
+Plain I2C communication
+-----------------------
+
+ int i2c_master_send(struct i2c_client *client, const char *buf,
+ int count);
+ int i2c_master_recv(struct i2c_client *client, char *buf, int count);
+
+These routines read and write some bytes from/to a client. The client
+contains the i2c address, so you do not have to include it. The second
+parameter contains the bytes to read/write, the third the number of bytes
+to read/write (must be less than the length of the buffer, also should be
+less than 64k since msg.len is u16.) Returned is the actual number of bytes
+read/written.
+
+ int i2c_transfer(struct i2c_adapter *adap, struct i2c_msg *msg,
+ int num);
+
+This sends a series of messages. Each message can be a read or write,
+and they can be mixed in any way. The transactions are combined: no
+stop bit is sent between transaction. The i2c_msg structure contains
+for each message the client address, the number of bytes of the message
+and the message data itself.
+
+You can read the file `i2c-protocol' for more information about the
+actual I2C protocol.
+
+
+SMBus communication
+-------------------
+
+ s32 i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr,
+ unsigned short flags, char read_write, u8 command,
+ int size, union i2c_smbus_data *data);
+
+This is the generic SMBus function. All functions below are implemented
+in terms of it. Never use this function directly!
+
+ s32 i2c_smbus_read_byte(struct i2c_client *client);
+ s32 i2c_smbus_write_byte(struct i2c_client *client, u8 value);
+ s32 i2c_smbus_read_byte_data(struct i2c_client *client, u8 command);
+ s32 i2c_smbus_write_byte_data(struct i2c_client *client,
+ u8 command, u8 value);
+ s32 i2c_smbus_read_word_data(struct i2c_client *client, u8 command);
+ s32 i2c_smbus_write_word_data(struct i2c_client *client,
+ u8 command, u16 value);
+ s32 i2c_smbus_read_block_data(struct i2c_client *client,
+ u8 command, u8 *values);
+ s32 i2c_smbus_write_block_data(struct i2c_client *client,
+ u8 command, u8 length, const u8 *values);
+ s32 i2c_smbus_read_i2c_block_data(struct i2c_client *client,
+ u8 command, u8 length, u8 *values);
+ s32 i2c_smbus_write_i2c_block_data(struct i2c_client *client,
+ u8 command, u8 length,
+ const u8 *values);
+
+These ones were removed from i2c-core because they had no users, but could
+be added back later if needed:
+
+ s32 i2c_smbus_write_quick(struct i2c_client *client, u8 value);
+ s32 i2c_smbus_process_call(struct i2c_client *client,
+ u8 command, u16 value);
+ s32 i2c_smbus_block_process_call(struct i2c_client *client,
+ u8 command, u8 length, u8 *values);
+
+All these transactions return a negative errno value on failure. The 'write'
+transactions return 0 on success; the 'read' transactions return the read
+value, except for block transactions, which return the number of values
+read. The block buffers need not be longer than 32 bytes.
+
+You can read the file `smbus-protocol' for more information about the
+actual SMBus protocol.
+
+
+General purpose routines
+========================
+
+Below all general purpose routines are listed, that were not mentioned
+before.
+
+ /* Return the adapter number for a specific adapter */
+ int i2c_adapter_id(struct i2c_adapter *adap);
diff --git a/Documentation/ia64/.gitignore b/Documentation/ia64/.gitignore
new file mode 100644
index 000000000..ab806edc8
--- /dev/null
+++ b/Documentation/ia64/.gitignore
@@ -0,0 +1 @@
+aliasing-test
diff --git a/Documentation/ia64/IRQ-redir.txt b/Documentation/ia64/IRQ-redir.txt
new file mode 100644
index 000000000..f7bd72261
--- /dev/null
+++ b/Documentation/ia64/IRQ-redir.txt
@@ -0,0 +1,69 @@
+IRQ affinity on IA64 platforms
+------------------------------
+ 07.01.2002, Erich Focht <efocht@ess.nec.de>
+
+
+By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be
+controlled. The behavior on IA64 platforms is slightly different from
+that described in Documentation/IRQ-affinity.txt for i386 systems.
+
+Because of the usage of SAPIC mode and physical destination mode the
+IRQ target is one particular CPU and cannot be a mask of several
+CPUs. Only the first non-zero bit is taken into account.
+
+
+Usage examples:
+
+The target CPU has to be specified as a hexadecimal CPU mask. The
+first non-zero bit is the selected CPU. This format has been kept for
+compatibility reasons with i386.
+
+Set the delivery mode of interrupt 41 to fixed and route the
+interrupts to CPU #3 (logical CPU number) (2^3=0x08):
+ echo "8" >/proc/irq/41/smp_affinity
+
+Set the default route for IRQ number 41 to CPU 6 in lowest priority
+delivery mode (redirectable):
+ echo "r 40" >/proc/irq/41/smp_affinity
+
+The output of the command
+ cat /proc/irq/IRQ#/smp_affinity
+gives the target CPU mask for the specified interrupt vector. If the CPU
+mask is preceded by the character "r", the interrupt is redirectable
+(i.e. lowest priority mode routing is used), otherwise its route is
+fixed.
+
+
+
+Initialization and default behavior:
+
+If the platform features IRQ redirection (info provided by SAL) all
+IO-SAPIC interrupts are initialized with CPU#0 as their default target
+and the routing is the so called "lowest priority mode" (actually
+fixed SAPIC mode with hint). The XTP chipset registers are used as hints
+for the IRQ routing. Currently in Linux XTP registers can have three
+values:
+ - minimal for an idle task,
+ - normal if any other task runs,
+ - maximal if the CPU is going to be switched off.
+The IRQ is routed to the CPU with lowest XTP register value, the
+search begins at the default CPU. Therefore most of the interrupts
+will be handled by CPU #0.
+
+If the platform doesn't feature interrupt redirection IOSAPIC fixed
+routing is used. The target CPUs are distributed in a round robin
+manner. IRQs will be routed only to the selected target CPUs. Check
+with
+ cat /proc/interrupts
+
+
+
+Comments:
+
+On large (multi-node) systems it is recommended to route the IRQs to
+the node to which the corresponding device is connected.
+For systems like the NEC AzusA we get IRQ node-affinity for free. This
+is because usually the chipsets on each node redirect the interrupts
+only to their own CPUs (as they cannot see the XTP registers on the
+other nodes).
+
diff --git a/Documentation/ia64/Makefile b/Documentation/ia64/Makefile
new file mode 100644
index 000000000..d493163af
--- /dev/null
+++ b/Documentation/ia64/Makefile
@@ -0,0 +1,5 @@
+# List of programs to build
+hostprogs-y := aliasing-test
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/ia64/README b/Documentation/ia64/README
new file mode 100644
index 000000000..aa17f2154
--- /dev/null
+++ b/Documentation/ia64/README
@@ -0,0 +1,43 @@
+ Linux kernel release 2.4.xx for the IA-64 Platform
+
+ These are the release notes for Linux version 2.4 for IA-64
+ platform. This document provides information specific to IA-64
+ ONLY, to get additional information about the Linux kernel also
+ read the original Linux README provided with the kernel.
+
+INSTALLING the kernel:
+
+ - IA-64 kernel installation is the same as the other platforms, see
+ original README for details.
+
+
+SOFTWARE REQUIREMENTS
+
+ Compiling and running this kernel requires an IA-64 compliant GCC
+ compiler. And various software packages also compiled with an
+ IA-64 compliant GCC compiler.
+
+
+CONFIGURING the kernel:
+
+ Configuration is the same, see original README for details.
+
+
+COMPILING the kernel:
+
+ - Compiling this kernel doesn't differ from other platform so read
+ the original README for details BUT make sure you have an IA-64
+ compliant GCC compiler.
+
+IA-64 SPECIFICS
+
+ - General issues:
+
+ o Hardly any performance tuning has been done. Obvious targets
+ include the library routines (IP checksum, etc.). Less
+ obvious targets include making sure we don't flush the TLB
+ needlessly, etc.
+
+ o SMP locks cleanup/optimization
+
+ o IA32 support. Currently experimental. It mostly works.
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c
new file mode 100644
index 000000000..62a190d45
--- /dev/null
+++ b/Documentation/ia64/aliasing-test.c
@@ -0,0 +1,263 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/pci.h>
+
+int sum;
+
+static int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+ int fd, rc;
+ void *addr;
+ int *c;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
+ rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
+ if (rc == -1)
+ perror("PCIIOC_MMAP_IS_MEM ioctl");
+ }
+
+ addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+ if (addr == MAP_FAILED)
+ return 1;
+
+ if (touch) {
+ c = (int *) addr;
+ while (c < (int *) (addr + length))
+ sum += *c++;
+ }
+
+ rc = munmap(addr, length);
+ if (rc == -1) {
+ perror("munmap");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = map_mem(path2, offset, length, touch);
+ if (rc == 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+ else {
+ fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_tree(path2, file, offset, length, touch);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+char buf[1024];
+
+static int read_rom(char *path)
+{
+ int fd, rc;
+ size_t size = 0;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ rc = write(fd, "1", 2);
+ if (rc <= 0) {
+ close(fd);
+ perror("write");
+ return -1;
+ }
+
+ do {
+ rc = read(fd, buf, sizeof(buf));
+ if (rc > 0)
+ size += rc;
+ } while (rc > 0);
+
+ close(fd);
+ return size;
+}
+
+static int scan_rom(char *path, char *file)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = read_rom(path2);
+
+ /*
+ * It's OK if the ROM is unreadable. Maybe there
+ * is no ROM, or some other error occurred. The
+ * important thing is that no MCA happened.
+ */
+ if (rc > 0)
+ fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
+ else {
+ fprintf(stderr, "PASS: %s not readable\n", path2);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_rom(path2, file);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+int main(void)
+{
+ int rc;
+
+ if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+ /*
+ * It's not safe to blindly read the VGA frame buffer. If you know
+ * how to poke the card the right way, it should respond, but it's
+ * not safe in general. Many machines, e.g., Intel chipsets, cover
+ * up a non-responding card by just returning -1, but others will
+ * report the failure as a machine check.
+ */
+ if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+ if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+ /*
+ * Often you can map all the individual pieces above (0-0xA0000,
+ * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+ * thing at once. This is because the individual pieces use different
+ * attributes, and there's no single attribute supported over the
+ * whole region.
+ */
+ rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+ if (rc == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+ scan_rom("/sys/devices", "rom");
+
+ scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
+ scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+ return rc;
+}
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
new file mode 100644
index 000000000..5a4dea6ab
--- /dev/null
+++ b/Documentation/ia64/aliasing.txt
@@ -0,0 +1,221 @@
+ MEMORY ATTRIBUTE ALIASING ON IA-64
+
+ Bjorn Helgaas
+ <bjorn.helgaas@hp.com>
+ May 4, 2006
+
+
+MEMORY ATTRIBUTES
+
+ Itanium supports several attributes for virtual memory references.
+ The attribute is part of the virtual translation, i.e., it is
+ contained in the TLB entry. The ones of most interest to the Linux
+ kernel are:
+
+ WB Write-back (cacheable)
+ UC Uncacheable
+ WC Write-coalescing
+
+ System memory typically uses the WB attribute. The UC attribute is
+ used for memory-mapped I/O devices. The WC attribute is uncacheable
+ like UC is, but writes may be delayed and combined to increase
+ performance for things like frame buffers.
+
+ The Itanium architecture requires that we avoid accessing the same
+ page with both a cacheable mapping and an uncacheable mapping[1].
+
+ The design of the chipset determines which attributes are supported
+ on which regions of the address space. For example, some chipsets
+ support either WB or UC access to main memory, while others support
+ only WB access.
+
+MEMORY MAP
+
+ Platform firmware describes the physical memory map and the
+ supported attributes for each region. At boot-time, the kernel uses
+ the EFI GetMemoryMap() interface. ACPI can also describe memory
+ devices and the attributes they support, but Linux/ia64 currently
+ doesn't use this information.
+
+ The kernel uses the efi_memmap table returned from GetMemoryMap() to
+ learn the attributes supported by each region of physical address
+ space. Unfortunately, this table does not completely describe the
+ address space because some machines omit some or all of the MMIO
+ regions from the map.
+
+ The kernel maintains another table, kern_memmap, which describes the
+ memory Linux is actually using and the attribute for each region.
+ This contains only system memory; it does not contain MMIO space.
+
+ The kern_memmap table typically contains only a subset of the system
+ memory described by the efi_memmap. Linux/ia64 can't use all memory
+ in the system because of constraints imposed by the identity mapping
+ scheme.
+
+ The efi_memmap table is preserved unmodified because the original
+ boot-time information is required for kexec.
+
+KERNEL IDENTITY MAPPINGS
+
+ Linux/ia64 identity mappings are done with large pages, currently
+ either 16MB or 64MB, referred to as "granules." Cacheable mappings
+ are speculative[2], so the processor can read any location in the
+ page at any time, independent of the programmer's intentions. This
+ means that to avoid attribute aliasing, Linux can create a cacheable
+ identity mapping only when the entire granule supports cacheable
+ access.
+
+ Therefore, kern_memmap contains only full granule-sized regions that
+ can referenced safely by an identity mapping.
+
+ Uncacheable mappings are not speculative, so the processor will
+ generate UC accesses only to locations explicitly referenced by
+ software. This allows UC identity mappings to cover granules that
+ are only partially populated, or populated with a combination of UC
+ and WB regions.
+
+USER MAPPINGS
+
+ User mappings are typically done with 16K or 64K pages. The smaller
+ page size allows more flexibility because only 16K or 64K has to be
+ homogeneous with respect to memory attributes.
+
+POTENTIAL ATTRIBUTE ALIASING CASES
+
+ There are several ways the kernel creates new mappings:
+
+ mmap of /dev/mem
+
+ This uses remap_pfn_range(), which creates user mappings. These
+ mappings may be either WB or UC. If the region being mapped
+ happens to be in kern_memmap, meaning that it may also be mapped
+ by a kernel identity mapping, the user mapping must use the same
+ attribute as the kernel mapping.
+
+ If the region is not in kern_memmap, the user mapping should use
+ an attribute reported as being supported in the EFI memory map.
+
+ Since the EFI memory map does not describe MMIO on some
+ machines, this should use an uncacheable mapping as a fallback.
+
+ mmap of /sys/class/pci_bus/.../legacy_mem
+
+ This is very similar to mmap of /dev/mem, except that legacy_mem
+ only allows mmap of the one megabyte "legacy MMIO" area for a
+ specific PCI bus. Typically this is the first megabyte of
+ physical address space, but it may be different on machines with
+ several VGA devices.
+
+ "X" uses this to access VGA frame buffers. Using legacy_mem
+ rather than /dev/mem allows multiple instances of X to talk to
+ different VGA cards.
+
+ The /dev/mem mmap constraints apply.
+
+ mmap of /proc/bus/pci/.../??.?
+
+ This is an MMIO mmap of PCI functions, which additionally may or
+ may not be requested as using the WC attribute.
+
+ If WC is requested, and the region in kern_memmap is either WC
+ or UC, and the EFI memory map designates the region as WC, then
+ the WC mapping is allowed.
+
+ Otherwise, the user mapping must use the same attribute as the
+ kernel mapping.
+
+ read/write of /dev/mem
+
+ This uses copy_from_user(), which implicitly uses a kernel
+ identity mapping. This is obviously safe for things in
+ kern_memmap.
+
+ There may be corner cases of things that are not in kern_memmap,
+ but could be accessed this way. For example, registers in MMIO
+ space are not in kern_memmap, but could be accessed with a UC
+ mapping. This would not cause attribute aliasing. But
+ registers typically can be accessed only with four-byte or
+ eight-byte accesses, and the copy_from_user() path doesn't allow
+ any control over the access size, so this would be dangerous.
+
+ ioremap()
+
+ This returns a mapping for use inside the kernel.
+
+ If the region is in kern_memmap, we should use the attribute
+ specified there.
+
+ If the EFI memory map reports that the entire granule supports
+ WB, we should use that (granules that are partially reserved
+ or occupied by firmware do not appear in kern_memmap).
+
+ If the granule contains non-WB memory, but we can cover the
+ region safely with kernel page table mappings, we can use
+ ioremap_page_range() as most other architectures do.
+
+ Failing all of the above, we have to fall back to a UC mapping.
+
+PAST PROBLEM CASES
+
+ mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
+
+ The EFI memory map may not report these MMIO regions.
+
+ These must be allowed so that X will work. This means that
+ when the EFI memory map is incomplete, every /dev/mem mmap must
+ succeed. It may create either WB or UC user mappings, depending
+ on whether the region is in kern_memmap or the EFI memory map.
+
+ mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+
+ The EFI memory map reports the following attributes:
+ 0x00000-0x9FFFF WB only
+ 0xA0000-0xBFFFF UC only (VGA frame buffer)
+ 0xC0000-0xFFFFF WB only
+
+ This mmap is done with user pages, not kernel identity mappings,
+ so it is safe to use WB mappings.
+
+ The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
+ which uses a granule-sized UC mapping. This granule will cover some
+ WB-only memory, but since UC is non-speculative, the processor will
+ never generate an uncacheable reference to the WB-only areas unless
+ the driver explicitly touches them.
+
+ mmap of 0x0-0xFFFFF legacy_mem by "X"
+
+ If the EFI memory map reports that the entire range supports the
+ same attributes, we can allow the mmap (and we will prefer WB if
+ supported, as is the case with HP sx[12]000 machines with VGA
+ disabled).
+
+ If EFI reports the range as partly WB and partly UC (as on sx[12]000
+ machines with VGA enabled), we must fail the mmap because there's no
+ safe attribute to use.
+
+ If EFI reports some of the range but not all (as on Intel firmware
+ that doesn't report the VGA frame buffer at all), we should fail the
+ mmap and force the user to map just the specific region of interest.
+
+ mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
+
+ The EFI memory map reports the following attributes:
+ 0x00000-0xFFFFF WB only (no VGA MMIO hole)
+
+ This is a special case of the previous case, and the mmap should
+ fail for the same reason as above.
+
+ read of /sys/devices/.../rom
+
+ For VGA devices, this may cause an ioremap() of 0xC0000. This
+ used to be done with a UC mapping, because the VGA frame buffer
+ at 0xA0000 prevents use of a WB granule. The UC mapping causes
+ an MCA on HP sx[12]000 chipsets.
+
+ We should use WB page table mappings to avoid covering the VGA
+ frame buffer.
+
+NOTES
+
+ [1] SDM rev 2.2, vol 2, sec 4.4.1.
+ [2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/Documentation/ia64/efirtc.txt b/Documentation/ia64/efirtc.txt
new file mode 100644
index 000000000..057e6bebd
--- /dev/null
+++ b/Documentation/ia64/efirtc.txt
@@ -0,0 +1,128 @@
+EFI Real Time Clock driver
+-------------------------------
+S. Eranian <eranian@hpl.hp.com>
+March 2000
+
+I/ Introduction
+
+This document describes the efirtc.c driver has provided for
+the IA-64 platform.
+
+The purpose of this driver is to supply an API for kernel and user applications
+to get access to the Time Service offered by EFI version 0.92.
+
+EFI provides 4 calls one can make once the OS is booted: GetTime(),
+SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this
+driver. We describe those calls as well the design of the driver in the
+following sections.
+
+II/ Design Decisions
+
+The original ideas was to provide a very simple driver to get access to,
+at first, the time of day service. This is required in order to access, in a
+portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock
+to initialize the system view of the time during boot.
+
+Because we wanted to minimize the impact on existing user-level apps using
+the CMOS clock, we decided to expose an API that was very similar to the one
+used today with the legacy RTC driver (driver/char/rtc.c). However, because
+EFI provides a simpler services, not all ioctl() are available. Also
+new ioctl()s have been introduced for things that EFI provides but not the
+legacy.
+
+EFI uses a slightly different way of representing the time, noticeably
+the reference date is different. Year is the using the full 4-digit format.
+The Epoch is January 1st 1998. For backward compatibility reasons we don't
+expose this new way of representing time. Instead we use something very
+similar to the struct tm, i.e. struct rtc_time, as used by hwclock.
+One of the reasons for doing it this way is to allow for EFI to still evolve
+without necessarily impacting any of the user applications. The decoupling
+enables flexibility and permits writing wrapper code is ncase things change.
+
+The driver exposes two interfaces, one via the device file and a set of
+ioctl()s. The other is read-only via the /proc filesystem.
+
+As of today we don't offer a /proc/sys interface.
+
+To allow for a uniform interface between the legacy RTC and EFI time service,
+we have created the include/linux/rtc.h header file to contain only the
+"public" API of the two drivers. The specifics of the legacy RTC are still
+in include/linux/mc146818rtc.h.
+
+
+III/ Time of day service
+
+The part of the driver gives access to the time of day service of EFI.
+Two ioctl()s, compatible with the legacy RTC calls:
+
+ Read the CMOS clock: ioctl(d, RTC_RD_TIME, &rtc);
+
+ Write the CMOS clock: ioctl(d, RTC_SET_TIME, &rtc);
+
+The rtc is a pointer to a data structure defined in rtc.h which is close
+to a struct tm:
+
+struct rtc_time {
+ int tm_sec;
+ int tm_min;
+ int tm_hour;
+ int tm_mday;
+ int tm_mon;
+ int tm_year;
+ int tm_wday;
+ int tm_yday;
+ int tm_isdst;
+};
+
+The driver takes care of converting back an forth between the EFI time and
+this format.
+
+Those two ioctl()s can be exercised with the hwclock command:
+
+For reading:
+# /sbin/hwclock --show
+Mon Mar 6 15:32:32 2000 -0.910248 seconds
+
+For setting:
+# /sbin/hwclock --systohc
+
+Root privileges are required to be able to set the time of day.
+
+IV/ Wakeup Alarm service
+
+EFI provides an API by which one can program when a machine should wakeup,
+i.e. reboot. This is very different from the alarm provided by the legacy
+RTC which is some kind of interval timer alarm. For this reason we don't use
+the same ioctl()s to get access to the service. Instead we have
+introduced 2 news ioctl()s to the interface of an RTC.
+
+We have added 2 new ioctl()s that are specific to the EFI driver:
+
+ Read the current state of the alarm
+ ioctl(d, RTC_WKLAM_RD, &wkt)
+
+ Set the alarm or change its status
+ ioctl(d, RTC_WKALM_SET, &wkt)
+
+The wkt structure encapsulates a struct rtc_time + 2 extra fields to get
+status information:
+
+struct rtc_wkalrm {
+
+ unsigned char enabled; /* =1 if alarm is enabled */
+ unsigned char pending; /* =1 if alarm is pending */
+
+ struct rtc_time time;
+}
+
+As of today, none of the existing user-level apps supports this feature.
+However writing such a program should be hard by simply using those two
+ioctl().
+
+Root privileges are required to be able to set the alarm.
+
+V/ References.
+
+Checkout the following Web site for more information on EFI:
+
+http://developer.intel.com/technology/efi/
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
new file mode 100644
index 000000000..9f651c181
--- /dev/null
+++ b/Documentation/ia64/err_inject.txt
@@ -0,0 +1,1068 @@
+
+IPF Machine Check (MC) error inject tool
+========================================
+
+IPF Machine Check (MC) error inject tool is used to inject MC
+errors from Linux. The tool is a test bed for IPF MC work flow including
+hardware correctable error handling, OS recoverable error handling, MC
+event logging, etc.
+
+The tool includes two parts: a kernel driver and a user application
+sample. The driver provides interface to PAL to inject error
+and query error injection capabilities. The driver code is in
+arch/ia64/kernel/err_inject.c. The application sample (shown below)
+provides a combination of various errors and calls the driver's interface
+(sysfs interface) to inject errors or query error injection capabilities.
+
+The tool can be used to test Intel IPF machine MC handling capabilities.
+It's especially useful for people who can not access hardware MC injection
+tool to inject error. It's also very useful to integrate with other
+software test suits to do stressful testing on IPF.
+
+Below is a sample application as part of the whole tool. The sample
+can be used as a working test tool. Or it can be expanded to include
+more features. It also can be a integrated into a library or other user
+application to have more thorough test.
+
+The sample application takes err.conf as error configuration input. GCC
+compiles the code. After you install err_inject driver, you can run
+this sample application to inject errors.
+
+Errata: Itanium 2 Processors Specification Update lists some errata against
+the pal_mc_error_inject PAL procedure. The following err.conf has been tested
+on latest Montecito PAL.
+
+err.conf:
+
+#This is configuration file for err_inject_tool.
+#The format of the each line is:
+#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
+#where
+# cpu: logical cpu number the error will be inject in.
+# loop: times the error will be injected.
+# interval: In second. every so often one error is injected.
+# err_type_info, err_struct_info: PAL parameters.
+#
+#Note: All values are hex w/o or w/ 0x prefix.
+
+
+#On cpu2, inject only total 0x10 errors, interval 5 seconds
+#corrected, data cache, hier-2, physical addr(assigned by tool code).
+#working on Montecito latest PAL.
+2, 10, 5, 4101, 95
+
+#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
+#corrected, data cache, hier-2, physical addr(assigned by tool code).
+#working on Montecito latest PAL.
+4, 10, 5, 4109, 95
+
+#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
+#recoverable, DTR0, hier-2.
+#working on Montecito latest PAL.
+0xf, 0x10, 5, 4249, 15
+
+The sample application source code:
+
+err_injection_tool.c:
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2006 Intel Co
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+
+#define MAX_FN_SIZE 256
+#define MAX_BUF_SIZE 256
+#define DATA_BUF_SIZE 256
+#define NR_CPUS 512
+#define MAX_TASK_NUM 2048
+#define MIN_INTERVAL 5 // seconds
+#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
+#define PARA_FIELD_NUM 5
+#define MASK_SIZE (NR_CPUS/64)
+#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
+
+int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
+
+int verbose;
+#define vbprintf if (verbose) printf
+
+int log_info(int cpu, const char *fmt, ...)
+{
+ FILE *log;
+ char fn[MAX_FN_SIZE];
+ char buf[MAX_BUF_SIZE];
+ va_list args;
+
+ sprintf(fn, "%d.log", cpu);
+ log=fopen(fn, "a+");
+ if (log==NULL) {
+ perror("Error open:");
+ return -1;
+ }
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ memset(buf, 0, MAX_BUF_SIZE);
+ vsprintf(buf, fmt, args);
+ va_end(args);
+
+ fwrite(buf, sizeof(buf), 1, log);
+ fclose(log);
+
+ return 0;
+}
+
+typedef unsigned long u64;
+typedef unsigned int u32;
+
+typedef union err_type_info_u {
+ struct {
+ u64 mode : 3, /* 0-2 */
+ err_inj : 3, /* 3-5 */
+ err_sev : 2, /* 6-7 */
+ err_struct : 5, /* 8-12 */
+ struct_hier : 3, /* 13-15 */
+ reserved : 48; /* 16-63 */
+ } err_type_info_u;
+ u64 err_type_info;
+} err_type_info_t;
+
+typedef union err_struct_info_u {
+ struct {
+ u64 siv : 1, /* 0 */
+ c_t : 2, /* 1-2 */
+ cl_p : 3, /* 3-5 */
+ cl_id : 3, /* 6-8 */
+ cl_dp : 1, /* 9 */
+ reserved1 : 22, /* 10-31 */
+ tiv : 1, /* 32 */
+ trigger : 4, /* 33-36 */
+ trigger_pl : 3, /* 37-39 */
+ reserved2 : 24; /* 40-63 */
+ } err_struct_info_cache;
+ struct {
+ u64 siv : 1, /* 0 */
+ tt : 2, /* 1-2 */
+ tc_tr : 2, /* 3-4 */
+ tr_slot : 8, /* 5-12 */
+ reserved1 : 19, /* 13-31 */
+ tiv : 1, /* 32 */
+ trigger : 4, /* 33-36 */
+ trigger_pl : 3, /* 37-39 */
+ reserved2 : 24; /* 40-63 */
+ } err_struct_info_tlb;
+ struct {
+ u64 siv : 1, /* 0 */
+ regfile_id : 4, /* 1-4 */
+ reg_num : 7, /* 5-11 */
+ reserved1 : 20, /* 12-31 */
+ tiv : 1, /* 32 */
+ trigger : 4, /* 33-36 */
+ trigger_pl : 3, /* 37-39 */
+ reserved2 : 24; /* 40-63 */
+ } err_struct_info_register;
+ struct {
+ u64 reserved;
+ } err_struct_info_bus_processor_interconnect;
+ u64 err_struct_info;
+} err_struct_info_t;
+
+typedef union err_data_buffer_u {
+ struct {
+ u64 trigger_addr; /* 0-63 */
+ u64 inj_addr; /* 64-127 */
+ u64 way : 5, /* 128-132 */
+ index : 20, /* 133-152 */
+ : 39; /* 153-191 */
+ } err_data_buffer_cache;
+ struct {
+ u64 trigger_addr; /* 0-63 */
+ u64 inj_addr; /* 64-127 */
+ u64 way : 5, /* 128-132 */
+ index : 20, /* 133-152 */
+ reserved : 39; /* 153-191 */
+ } err_data_buffer_tlb;
+ struct {
+ u64 trigger_addr; /* 0-63 */
+ } err_data_buffer_register;
+ struct {
+ u64 reserved; /* 0-63 */
+ } err_data_buffer_bus_processor_interconnect;
+ u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+} err_data_buffer_t;
+
+typedef union capabilities_u {
+ struct {
+ u64 i : 1,
+ d : 1,
+ rv : 1,
+ tag : 1,
+ data : 1,
+ mesi : 1,
+ dp : 1,
+ reserved1 : 3,
+ pa : 1,
+ va : 1,
+ wi : 1,
+ reserved2 : 20,
+ trigger : 1,
+ trigger_pl : 1,
+ reserved3 : 30;
+ } capabilities_cache;
+ struct {
+ u64 d : 1,
+ i : 1,
+ rv : 1,
+ tc : 1,
+ tr : 1,
+ reserved1 : 27,
+ trigger : 1,
+ trigger_pl : 1,
+ reserved2 : 30;
+ } capabilities_tlb;
+ struct {
+ u64 gr_b0 : 1,
+ gr_b1 : 1,
+ fr : 1,
+ br : 1,
+ pr : 1,
+ ar : 1,
+ cr : 1,
+ rr : 1,
+ pkr : 1,
+ dbr : 1,
+ ibr : 1,
+ pmc : 1,
+ pmd : 1,
+ reserved1 : 3,
+ regnum : 1,
+ reserved2 : 15,
+ trigger : 1,
+ trigger_pl : 1,
+ reserved3 : 30;
+ } capabilities_register;
+ struct {
+ u64 reserved;
+ } capabilities_bus_processor_interconnect;
+} capabilities_t;
+
+typedef struct resources_s {
+ u64 ibr0 : 1,
+ ibr2 : 1,
+ ibr4 : 1,
+ ibr6 : 1,
+ dbr0 : 1,
+ dbr2 : 1,
+ dbr4 : 1,
+ dbr6 : 1,
+ reserved : 48;
+} resources_t;
+
+
+long get_page_size(void)
+{
+ long page_size=sysconf(_SC_PAGESIZE);
+ return page_size;
+}
+
+#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
+#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
+#define SHM_VA 0x2000000100000000
+
+int shmid;
+void *shmaddr;
+
+int create_shm(void)
+{
+ key_t key;
+ char fn[MAX_FN_SIZE];
+
+ /* cpu0 is always existing */
+ sprintf(fn, PATH_FORMAT, 0);
+ if ((key = ftok(fn, 's')) == -1) {
+ perror("ftok");
+ return -1;
+ }
+
+ shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
+ if (shmid == -1) {
+ if (errno==EEXIST) {
+ shmid = shmget(key, SHM_SIZE, 0);
+ if (shmid == -1) {
+ perror("shmget");
+ return -1;
+ }
+ }
+ else {
+ perror("shmget");
+ return -1;
+ }
+ }
+ vbprintf("shmid=%d", shmid);
+
+ /* connect to the segment: */
+ shmaddr = shmat(shmid, (void *)SHM_VA, 0);
+ if (shmaddr == (void*)-1) {
+ perror("shmat");
+ return -1;
+ }
+
+ memset(shmaddr, 0, SHM_SIZE);
+ mlock(shmaddr, SHM_SIZE);
+
+ return 0;
+}
+
+int free_shm()
+{
+ munlock(shmaddr, SHM_SIZE);
+ shmdt(shmaddr);
+ semctl(shmid, 0, IPC_RMID);
+
+ return 0;
+}
+
+#ifdef _SEM_SEMUN_UNDEFINED
+union semun
+{
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+#endif
+
+u32 mode=1; /* 1: physical mode; 2: virtual mode. */
+int one_lock=1;
+key_t key[NR_CPUS];
+int semid[NR_CPUS];
+
+int create_sem(int cpu)
+{
+ union semun arg;
+ char fn[MAX_FN_SIZE];
+ int sid;
+
+ sprintf(fn, PATH_FORMAT, cpu);
+ sprintf(fn, "%s/%s", fn, "err_type_info");
+ if ((key[cpu] = ftok(fn, 'e')) == -1) {
+ perror("ftok");
+ return -1;
+ }
+
+ if (semid[cpu]!=0)
+ return 0;
+
+ /* clear old semaphore */
+ if ((sid = semget(key[cpu], 1, 0)) != -1)
+ semctl(sid, 0, IPC_RMID);
+
+ /* get one semaphore */
+ if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
+ perror("semget");
+ printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
+ (u64)key[cpu]);
+ return -1;
+ }
+
+ vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
+ (u64)key[cpu]);
+ /* initialize the semaphore to 1: */
+ arg.val = 1;
+ if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
+ perror("semctl");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lock(int cpu)
+{
+ struct sembuf lock;
+
+ lock.sem_num = cpu;
+ lock.sem_op = 1;
+ semop(semid[cpu], &lock, 1);
+
+ return 0;
+}
+
+static int unlock(int cpu)
+{
+ struct sembuf unlock;
+
+ unlock.sem_num = cpu;
+ unlock.sem_op = -1;
+ semop(semid[cpu], &unlock, 1);
+
+ return 0;
+}
+
+void free_sem(int cpu)
+{
+ semctl(semid[cpu], 0, IPC_RMID);
+}
+
+int wr_multi(char *fn, unsigned long *data, int size)
+{
+ int fd;
+ char buf[MAX_BUF_SIZE];
+ int ret;
+
+ if (size==1)
+ sprintf(buf, "%lx", *data);
+ else if (size==3)
+ sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
+ else {
+ fprintf(stderr,"write to file with wrong size!\n");
+ return -1;
+ }
+
+ fd=open(fn, O_RDWR);
+ if (!fd) {
+ perror("Error:");
+ return -1;
+ }
+ ret=write(fd, buf, sizeof(buf));
+ close(fd);
+ return ret;
+}
+
+int wr(char *fn, unsigned long data)
+{
+ return wr_multi(fn, &data, 1);
+}
+
+int rd(char *fn, unsigned long *data)
+{
+ int fd;
+ char buf[MAX_BUF_SIZE];
+
+ fd=open(fn, O_RDONLY);
+ if (fd<0) {
+ perror("Error:");
+ return -1;
+ }
+ read(fd, buf, MAX_BUF_SIZE);
+ *data=strtoul(buf, NULL, 16);
+ close(fd);
+ return 0;
+}
+
+int rd_status(char *path, int *status)
+{
+ char fn[MAX_FN_SIZE];
+ sprintf(fn, "%s/status", path);
+ if (rd(fn, (u64*)status)<0) {
+ perror("status reading error.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int rd_capabilities(char *path, u64 *capabilities)
+{
+ char fn[MAX_FN_SIZE];
+ sprintf(fn, "%s/capabilities", path);
+ if (rd(fn, capabilities)<0) {
+ perror("capabilities reading error.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int rd_all(char *path)
+{
+ unsigned long err_type_info, err_struct_info, err_data_buffer;
+ int status;
+ unsigned long capabilities, resources;
+ char fn[MAX_FN_SIZE];
+
+ sprintf(fn, "%s/err_type_info", path);
+ if (rd(fn, &err_type_info)<0) {
+ perror("err_type_info reading error.\n");
+ return -1;
+ }
+ printf("err_type_info=%lx\n", err_type_info);
+
+ sprintf(fn, "%s/err_struct_info", path);
+ if (rd(fn, &err_struct_info)<0) {
+ perror("err_struct_info reading error.\n");
+ return -1;
+ }
+ printf("err_struct_info=%lx\n", err_struct_info);
+
+ sprintf(fn, "%s/err_data_buffer", path);
+ if (rd(fn, &err_data_buffer)<0) {
+ perror("err_data_buffer reading error.\n");
+ return -1;
+ }
+ printf("err_data_buffer=%lx\n", err_data_buffer);
+
+ sprintf(fn, "%s/status", path);
+ if (rd("status", (u64*)&status)<0) {
+ perror("status reading error.\n");
+ return -1;
+ }
+ printf("status=%d\n", status);
+
+ sprintf(fn, "%s/capabilities", path);
+ if (rd(fn,&capabilities)<0) {
+ perror("capabilities reading error.\n");
+ return -1;
+ }
+ printf("capabilities=%lx\n", capabilities);
+
+ sprintf(fn, "%s/resources", path);
+ if (rd(fn, &resources)<0) {
+ perror("resources reading error.\n");
+ return -1;
+ }
+ printf("resources=%lx\n", resources);
+
+ return 0;
+}
+
+int query_capabilities(char *path, err_type_info_t err_type_info,
+ u64 *capabilities)
+{
+ char fn[MAX_FN_SIZE];
+ err_struct_info_t err_struct_info;
+ err_data_buffer_t err_data_buffer;
+
+ err_struct_info.err_struct_info=0;
+ memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
+
+ sprintf(fn, "%s/err_type_info", path);
+ wr(fn, err_type_info.err_type_info);
+ sprintf(fn, "%s/err_struct_info", path);
+ wr(fn, 0x0);
+ sprintf(fn, "%s/err_data_buffer", path);
+ wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+ // Fire pal_mc_error_inject procedure.
+ sprintf(fn, "%s/call_start", path);
+ wr(fn, mode);
+
+ if (rd_capabilities(path, capabilities)<0)
+ return -1;
+
+ return 0;
+}
+
+int query_all_capabilities()
+{
+ int status;
+ err_type_info_t err_type_info;
+ int err_sev, err_struct, struct_hier;
+ int cap=0;
+ u64 capabilities;
+ char path[MAX_FN_SIZE];
+
+ err_type_info.err_type_info=0; // Initial
+ err_type_info.err_type_info_u.mode=0; // Query mode;
+ err_type_info.err_type_info_u.err_inj=0;
+
+ printf("All capabilities implemented in pal_mc_error_inject:\n");
+ sprintf(path, PATH_FORMAT ,0);
+ for (err_sev=0;err_sev<3;err_sev++)
+ for (err_struct=0;err_struct<5;err_struct++)
+ for (struct_hier=0;struct_hier<5;struct_hier++)
+ {
+ status=-1;
+ capabilities=0;
+ err_type_info.err_type_info_u.err_sev=err_sev;
+ err_type_info.err_type_info_u.err_struct=err_struct;
+ err_type_info.err_type_info_u.struct_hier=struct_hier;
+
+ if (query_capabilities(path, err_type_info, &capabilities)<0)
+ continue;
+
+ if (rd_status(path, &status)<0)
+ continue;
+
+ if (status==0) {
+ cap=1;
+ printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
+ err_sev, err_struct, struct_hier);
+ printf("capabilities 0x%lx\n", capabilities);
+ }
+ }
+ if (!cap) {
+ printf("No capabilities supported.\n");
+ return 0;
+ }
+
+ return 0;
+}
+
+int err_inject(int cpu, char *path, err_type_info_t err_type_info,
+ err_struct_info_t err_struct_info,
+ err_data_buffer_t err_data_buffer)
+{
+ int status;
+ char fn[MAX_FN_SIZE];
+
+ log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
+ err_type_info.err_type_info,
+ err_struct_info.err_struct_info);
+ log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
+ err_data_buffer.err_data_buffer[0],
+ err_data_buffer.err_data_buffer[1],
+ err_data_buffer.err_data_buffer[2]);
+ sprintf(fn, "%s/err_type_info", path);
+ wr(fn, err_type_info.err_type_info);
+ sprintf(fn, "%s/err_struct_info", path);
+ wr(fn, err_struct_info.err_struct_info);
+ sprintf(fn, "%s/err_data_buffer", path);
+ wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+ // Fire pal_mc_error_inject procedure.
+ sprintf(fn, "%s/call_start", path);
+ wr(fn,mode);
+
+ if (rd_status(path, &status)<0) {
+ vbprintf("fail: read status\n");
+ return -100;
+ }
+
+ if (status!=0) {
+ log_info(cpu, "fail: status=%d\n", status);
+ return status;
+ }
+
+ return status;
+}
+
+static int construct_data_buf(char *path, err_type_info_t err_type_info,
+ err_struct_info_t err_struct_info,
+ err_data_buffer_t *err_data_buffer,
+ void *va1)
+{
+ char fn[MAX_FN_SIZE];
+ u64 virt_addr=0, phys_addr=0;
+
+ vbprintf("va1=%lx\n", (u64)va1);
+ memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
+
+ switch (err_type_info.err_type_info_u.err_struct) {
+ case 1: // Cache
+ switch (err_struct_info.err_struct_info_cache.cl_id) {
+ case 1: //Virtual addr
+ err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
+ break;
+ case 2: //Phys addr
+ sprintf(fn, "%s/virtual_to_phys", path);
+ virt_addr=(u64)va1;
+ if (wr(fn,virt_addr)<0)
+ return -1;
+ rd(fn, &phys_addr);
+ err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
+ break;
+ default:
+ printf("Not supported cl_id\n");
+ break;
+ }
+ break;
+ case 2: // TLB
+ break;
+ case 3: // Register file
+ break;
+ case 4: // Bus/system interconnect
+ default:
+ printf("Not supported err_struct\n");
+ break;
+ }
+
+ return 0;
+}
+
+typedef struct {
+ u64 cpu;
+ u64 loop;
+ u64 interval;
+ u64 err_type_info;
+ u64 err_struct_info;
+ u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+} parameters_t;
+
+parameters_t line_para;
+int para;
+
+static int empty_data_buffer(u64 *err_data_buffer)
+{
+ int empty=1;
+ int i;
+
+ for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
+ if (err_data_buffer[i]!=-1)
+ empty=0;
+
+ return empty;
+}
+
+int err_inj()
+{
+ err_type_info_t err_type_info;
+ err_struct_info_t err_struct_info;
+ err_data_buffer_t err_data_buffer;
+ int count;
+ FILE *fp;
+ unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
+ u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
+ int num;
+ int i;
+ char path[MAX_FN_SIZE];
+ parameters_t parameters[MAX_TASK_NUM]={};
+ pid_t child_pid[MAX_TASK_NUM];
+ time_t current_time;
+ int status;
+
+ if (!para) {
+ fp=fopen("err.conf", "r");
+ if (fp==NULL) {
+ perror("Error open err.conf");
+ return -1;
+ }
+
+ num=0;
+ while (!feof(fp)) {
+ char buf[256];
+ memset(buf,0,256);
+ fgets(buf, 256, fp);
+ count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+ &cpu, &loop, &interval,&err_type_info_conf,
+ &err_struct_info_conf,
+ &err_data_buffer_conf[0],
+ &err_data_buffer_conf[1],
+ &err_data_buffer_conf[2]);
+ if (count!=PARA_FIELD_NUM+3) {
+ err_data_buffer_conf[0]=-1;
+ err_data_buffer_conf[1]=-1;
+ err_data_buffer_conf[2]=-1;
+ count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
+ &cpu, &loop, &interval,&err_type_info_conf,
+ &err_struct_info_conf);
+ if (count!=PARA_FIELD_NUM)
+ continue;
+ }
+
+ parameters[num].cpu=cpu;
+ parameters[num].loop=loop;
+ parameters[num].interval= interval>MIN_INTERVAL
+ ?interval:MIN_INTERVAL;
+ parameters[num].err_type_info=err_type_info_conf;
+ parameters[num].err_struct_info=err_struct_info_conf;
+ memcpy(parameters[num++].err_data_buffer,
+ err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
+
+ if (num>=MAX_TASK_NUM)
+ break;
+ }
+ }
+ else {
+ parameters[0].cpu=line_para.cpu;
+ parameters[0].loop=line_para.loop;
+ parameters[0].interval= line_para.interval>MIN_INTERVAL
+ ?line_para.interval:MIN_INTERVAL;
+ parameters[0].err_type_info=line_para.err_type_info;
+ parameters[0].err_struct_info=line_para.err_struct_info;
+ memcpy(parameters[0].err_data_buffer,
+ line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
+
+ num=1;
+ }
+
+ /* Create semaphore: If one_lock, one semaphore for all processors.
+ Otherwise, one semaphore for each processor. */
+ if (one_lock) {
+ if (create_sem(0)) {
+ printf("Can not create semaphore...exit\n");
+ free_sem(0);
+ return -1;
+ }
+ }
+ else {
+ for (i=0;i<num;i++) {
+ if (create_sem(parameters[i].cpu)) {
+ printf("Can not create semaphore for cpu%d...exit\n",i);
+ free_sem(parameters[num].cpu);
+ return -1;
+ }
+ }
+ }
+
+ /* Create a shm segment which will be used to inject/consume errors on.*/
+ if (create_shm()==-1) {
+ printf("Error to create shm...exit\n");
+ return -1;
+ }
+
+ for (i=0;i<num;i++) {
+ pid_t pid;
+
+ current_time=time(NULL);
+ log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
+ log_info(parameters[i].cpu, "Configurations:\n");
+ log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
+ parameters[i].cpu,
+ parameters[i].loop,
+ parameters[i].interval);
+ log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
+ parameters[i].err_type_info,
+ parameters[i].err_struct_info);
+
+ sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
+ err_type_info.err_type_info=parameters[i].err_type_info;
+ err_struct_info.err_struct_info=parameters[i].err_struct_info;
+ memcpy(err_data_buffer.err_data_buffer,
+ parameters[i].err_data_buffer,
+ ERR_DATA_BUFFER_SIZE*8);
+
+ pid=fork();
+ if (pid==0) {
+ unsigned long mask[MASK_SIZE];
+ int j, k;
+
+ void *va1, *va2;
+
+ /* Allocate two memory areas va1 and va2 in shm */
+ va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
+ va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
+
+ vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
+ memset(va1, 0x1, PAGE_SIZE);
+ memset(va2, 0x2, PAGE_SIZE);
+
+ if (empty_data_buffer(err_data_buffer.err_data_buffer))
+ /* If not specified yet, construct data buffer
+ * with va1
+ */
+ construct_data_buf(path, err_type_info,
+ err_struct_info, &err_data_buffer,va1);
+
+ for (j=0;j<MASK_SIZE;j++)
+ mask[j]=0;
+
+ cpu=parameters[i].cpu;
+ k = cpu%64;
+ j = cpu/64;
+ mask[j] = 1UL << k;
+
+ if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
+ perror("Error sched_setaffinity:");
+ return -1;
+ }
+
+ for (j=0; j<parameters[i].loop; j++) {
+ log_info(parameters[i].cpu,"Injection ");
+ log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
+
+ parameters[i].cpu,j+1, parameters[i].loop);
+
+ /* Hold the lock */
+ if (one_lock)
+ lock(0);
+ else
+ /* Hold lock on this cpu */
+ lock(parameters[i].cpu);
+
+ if ((status=err_inject(parameters[i].cpu,
+ path, err_type_info,
+ err_struct_info, err_data_buffer))
+ ==0) {
+ /* consume the error for "inject only"*/
+ memcpy(va2, va1, PAGE_SIZE);
+ memcpy(va1, va2, PAGE_SIZE);
+ log_info(parameters[i].cpu,
+ "successful\n");
+ }
+ else {
+ log_info(parameters[i].cpu,"fail:");
+ log_info(parameters[i].cpu,
+ "status=%d\n", status);
+ unlock(parameters[i].cpu);
+ break;
+ }
+ if (one_lock)
+ /* Release the lock */
+ unlock(0);
+ /* Release lock on this cpu */
+ else
+ unlock(parameters[i].cpu);
+
+ if (j < parameters[i].loop-1)
+ sleep(parameters[i].interval);
+ }
+ current_time=time(NULL);
+ log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
+ return 0;
+ }
+ else if (pid<0) {
+ perror("Error fork:");
+ continue;
+ }
+ child_pid[i]=pid;
+ }
+ for (i=0;i<num;i++)
+ waitpid(child_pid[i], NULL, 0);
+
+ if (one_lock)
+ free_sem(0);
+ else
+ for (i=0;i<num;i++)
+ free_sem(parameters[i].cpu);
+
+ printf("All done.\n");
+
+ return 0;
+}
+
+void help()
+{
+ printf("err_inject_tool:\n");
+ printf("\t-q: query all capabilities. default: off\n");
+ printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
+ printf("\t-i: inject errors. default: off\n");
+ printf("\t-l: one lock per cpu. default: one lock for all\n");
+ printf("\t-e: error parameters:\n");
+ printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
+ printf("\t\t cpu: logical cpu number the error will be inject in.\n");
+ printf("\t\t loop: times the error will be injected.\n");
+ printf("\t\t interval: In second. every so often one error is injected.\n");
+ printf("\t\t err_type_info, err_struct_info: PAL parameters.\n");
+ printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n");
+ printf("\t\t it's constructed by tool automatically. Be\n");
+ printf("\t\t careful to provide err_data_buffer and make\n");
+ printf("\t\t sure it's working with the environment.\n");
+ printf("\t Note:no space between error parameters.\n");
+ printf("\t default: Take error parameters from err.conf instead of command line.\n");
+ printf("\t-v: verbose. default: off\n");
+ printf("\t-h: help\n\n");
+ printf("The tool will take err.conf file as ");
+ printf("input to inject single or multiple errors ");
+ printf("on one or multiple cpus in parallel.\n");
+}
+
+int main(int argc, char **argv)
+{
+ char c;
+ int do_err_inj=0;
+ int do_query_all=0;
+ int count;
+ u32 m;
+
+ /* Default one lock for all cpu's */
+ one_lock=1;
+ while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
+ switch (c) {
+ case 'm': /* Procedure mode. 1: phys 2: virt */
+ count=sscanf(optarg, "%x", &m);
+ if (count!=1 || (m!=1 && m!=2)) {
+ printf("Wrong mode number.\n");
+ help();
+ return -1;
+ }
+ mode=m;
+ break;
+ case 'i': /* Inject errors */
+ do_err_inj=1;
+ break;
+ case 'q': /* Query */
+ do_query_all=1;
+ break;
+ case 'v': /* Verbose */
+ verbose=1;
+ break;
+ case 'l': /* One lock per cpu */
+ one_lock=0;
+ break;
+ case 'e': /* error arguments */
+ /* Take parameters:
+ * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
+ * err_data_buffer is optional. Recommend not to specify
+ * err_data_buffer. Better to use tool to generate it.
+ */
+ count=sscanf(optarg,
+ "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+ &line_para.cpu,
+ &line_para.loop,
+ &line_para.interval,
+ &line_para.err_type_info,
+ &line_para.err_struct_info,
+ &line_para.err_data_buffer[0],
+ &line_para.err_data_buffer[1],
+ &line_para.err_data_buffer[2]);
+ if (count!=PARA_FIELD_NUM+3) {
+ line_para.err_data_buffer[0]=-1,
+ line_para.err_data_buffer[1]=-1,
+ line_para.err_data_buffer[2]=-1;
+ count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
+ &line_para.cpu,
+ &line_para.loop,
+ &line_para.interval,
+ &line_para.err_type_info,
+ &line_para.err_struct_info);
+ if (count!=PARA_FIELD_NUM) {
+ printf("Wrong error arguments.\n");
+ help();
+ return -1;
+ }
+ }
+ para=1;
+ break;
+ continue;
+ break;
+ case 'h':
+ help();
+ return 0;
+ default:
+ break;
+ }
+
+ if (do_query_all)
+ query_all_capabilities();
+ if (do_err_inj)
+ err_inj();
+
+ if (!do_query_all && !do_err_inj)
+ help();
+
+ return 0;
+}
+
diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
new file mode 100644
index 000000000..59dd689d9
--- /dev/null
+++ b/Documentation/ia64/fsys.txt
@@ -0,0 +1,286 @@
+-*-Mode: outline-*-
+
+ Light-weight System Calls for IA-64
+ -----------------------------------
+
+ Started: 13-Jan-2003
+ Last update: 27-Sep-2003
+
+ David Mosberger-Tang
+ <davidm@hpl.hp.com>
+
+Using the "epc" instruction effectively introduces a new mode of
+execution to the ia64 linux kernel. We call this mode the
+"fsys-mode". To recap, the normal states of execution are:
+
+ - kernel mode:
+ Both the register stack and the memory stack have been
+ switched over to kernel memory. The user-level state is saved
+ in a pt-regs structure at the top of the kernel memory stack.
+
+ - user mode:
+ Both the register stack and the kernel stack are in
+ user memory. The user-level state is contained in the
+ CPU registers.
+
+ - bank 0 interruption-handling mode:
+ This is the non-interruptible state which all
+ interruption-handlers start execution in. The user-level
+ state remains in the CPU registers and some kernel state may
+ be stored in bank 0 of registers r16-r31.
+
+In contrast, fsys-mode has the following special properties:
+
+ - execution is at privilege level 0 (most-privileged)
+
+ - CPU registers may contain a mixture of user-level and kernel-level
+ state (it is the responsibility of the kernel to ensure that no
+ security-sensitive kernel-level state is leaked back to
+ user-level)
+
+ - execution is interruptible and preemptible (an fsys-mode handler
+ can disable interrupts and avoid all other interruption-sources
+ to avoid preemption)
+
+ - neither the memory-stack nor the register-stack can be trusted while
+ in fsys-mode (they point to the user-level stacks, which may
+ be invalid, or completely bogus addresses)
+
+In summary, fsys-mode is much more similar to running in user-mode
+than it is to running in kernel-mode. Of course, given that the
+privilege level is at level 0, this means that fsys-mode requires some
+care (see below).
+
+
+* How to tell fsys-mode
+
+Linux operates in fsys-mode when (a) the privilege level is 0 (most
+privileged) and (b) the stacks have NOT been switched to kernel memory
+yet. For convenience, the header file <asm-ia64/ptrace.h> provides
+three macros:
+
+ user_mode(regs)
+ user_stack(task,regs)
+ fsys_mode(task,regs)
+
+The "regs" argument is a pointer to a pt_regs structure. The "task"
+argument is a pointer to the task structure to which the "regs"
+pointer belongs to. user_mode() returns TRUE if the CPU state pointed
+to by "regs" was executing in user mode (privilege level 3).
+user_stack() returns TRUE if the state pointed to by "regs" was
+executing on the user-level stack(s). Finally, fsys_mode() returns
+TRUE if the CPU state pointed to by "regs" was executing in fsys-mode.
+The fsys_mode() macro is equivalent to the expression:
+
+ !user_mode(regs) && user_stack(task,regs)
+
+* How to write an fsyscall handler
+
+The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers
+(fsyscall_table). This table contains one entry for each system call.
+By default, a system call is handled by fsys_fallback_syscall(). This
+routine takes care of entering (full) kernel mode and calling the
+normal Linux system call handler. For performance-critical system
+calls, it is possible to write a hand-tuned fsyscall_handler. For
+example, fsys.S contains fsys_getpid(), which is a hand-tuned version
+of the getpid() system call.
+
+The entry and exit-state of an fsyscall handler is as follows:
+
+** Machine state on entry to fsyscall handler:
+
+ - r10 = 0
+ - r11 = saved ar.pfs (a user-level value)
+ - r15 = system call number
+ - r16 = "current" task pointer (in normal kernel-mode, this is in r13)
+ - r32-r39 = system call arguments
+ - b6 = return address (a user-level value)
+ - ar.pfs = previous frame-state (a user-level value)
+ - PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
+ - all other registers may contain values passed in from user-mode
+
+** Required machine state on exit to fsyscall handler:
+
+ - r11 = saved ar.pfs (as passed into the fsyscall handler)
+ - r15 = system call number (as passed into the fsyscall handler)
+ - r32-r39 = system call arguments (as passed into the fsyscall handler)
+ - b6 = return address (as passed into the fsyscall handler)
+ - ar.pfs = previous frame-state (as passed into the fsyscall handler)
+
+Fsyscall handlers can execute with very little overhead, but with that
+speed comes a set of restrictions:
+
+ o Fsyscall-handlers MUST check for any pending work in the flags
+ member of the thread-info structure and if any of the
+ TIF_ALLWORK_MASK flags are set, the handler needs to fall back on
+ doing a full system call (by calling fsys_fallback_syscall).
+
+ o Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11,
+ r15, b6, and ar.pfs) because they will be needed in case of a
+ system call restart. Of course, all "preserved" registers also
+ must be preserved, in accordance to the normal calling conventions.
+
+ o Fsyscall-handlers MUST check argument registers for containing a
+ NaT value before using them in any way that could trigger a
+ NaT-consumption fault. If a system call argument is found to
+ contain a NaT value, an fsyscall-handler may return immediately
+ with r8=EINVAL, r10=-1.
+
+ o Fsyscall-handlers MUST NOT use the "alloc" instruction or perform
+ any other operation that would trigger mandatory RSE
+ (register-stack engine) traffic.
+
+ o Fsyscall-handlers MUST NOT write to any stacked registers because
+ it is not safe to assume that user-level called a handler with the
+ proper number of arguments.
+
+ o Fsyscall-handlers need to be careful when accessing per-CPU variables:
+ unless proper safe-guards are taken (e.g., interruptions are avoided),
+ execution may be pre-empted and resumed on another CPU at any given
+ time.
+
+ o Fsyscall-handlers must be careful not to leak sensitive kernel'
+ information back to user-level. In particular, before returning to
+ user-level, care needs to be taken to clear any scratch registers
+ that could contain sensitive information (note that the current
+ task pointer is not considered sensitive: it's already exposed
+ through ar.k6).
+
+ o Fsyscall-handlers MUST NOT access user-memory without first
+ validating access-permission (this can be done typically via
+ probe.r.fault and/or probe.w.fault) and without guarding against
+ memory access exceptions (this can be done with the EX() macros
+ defined by asmmacro.h).
+
+The above restrictions may seem draconian, but remember that it's
+possible to trade off some of the restrictions by paying a slightly
+higher overhead. For example, if an fsyscall-handler could benefit
+from the shadow register bank, it could temporarily disable PSR.i and
+PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as
+needed. In other words, following the above rules yields extremely
+fast system call execution (while fully preserving system call
+semantics), but there is also a lot of flexibility in handling more
+complicated cases.
+
+* Signal handling
+
+The delivery of (asynchronous) signals must be delayed until fsys-mode
+is exited. This is accomplished with the help of the lower-privilege
+transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user()
+checks whether the interrupted task was in fsys-mode and, if so, sets
+PSR.lp and returns immediately. When fsys-mode is exited via the
+"br.ret" instruction that lowers the privilege level, a trap will
+occur. The trap handler clears PSR.lp again and returns immediately.
+The kernel exit path then checks for and delivers any pending signals.
+
+* PSR Handling
+
+The "epc" instruction doesn't change the contents of PSR at all. This
+is in contrast to a regular interruption, which clears almost all
+bits. Because of that, some care needs to be taken to ensure things
+work as expected. The following discussion describes how each PSR bit
+is handled.
+
+PSR.be Cleared when entering fsys-mode. A srlz.d instruction is used
+ to ensure the CPU is in little-endian mode before the first
+ load/store instruction is executed. PSR.be is normally NOT
+ restored upon return from an fsys-mode handler. In other
+ words, user-level code must not rely on PSR.be being preserved
+ across a system call.
+PSR.up Unchanged.
+PSR.ac Unchanged.
+PSR.mfl Unchanged. Note: fsys-mode handlers must not write-registers!
+PSR.mfh Unchanged. Note: fsys-mode handlers must not write-registers!
+PSR.ic Unchanged. Note: fsys-mode handlers can clear the bit, if needed.
+PSR.i Unchanged. Note: fsys-mode handlers can clear the bit, if needed.
+PSR.pk Unchanged.
+PSR.dt Unchanged.
+PSR.dfl Unchanged. Note: fsys-mode handlers must not write-registers!
+PSR.dfh Unchanged. Note: fsys-mode handlers must not write-registers!
+PSR.sp Unchanged.
+PSR.pp Unchanged.
+PSR.di Unchanged.
+PSR.si Unchanged.
+PSR.db Unchanged. The kernel prevents user-level from setting a hardware
+ breakpoint that triggers at any privilege level other than 3 (user-mode).
+PSR.lp Unchanged.
+PSR.tb Lazy redirect. If a taken-branch trap occurs while in
+ fsys-mode, the trap-handler modifies the saved machine state
+ such that execution resumes in the gate page at
+ syscall_via_break(), with privilege level 3. Note: the
+ taken branch would occur on the branch invoking the
+ fsyscall-handler, at which point, by definition, a syscall
+ restart is still safe. If the system call number is invalid,
+ the fsys-mode handler will return directly to user-level. This
+ return will trigger a taken-branch trap, but since the trap is
+ taken _after_ restoring the privilege level, the CPU has already
+ left fsys-mode, so no special treatment is needed.
+PSR.rt Unchanged.
+PSR.cpl Cleared to 0.
+PSR.is Unchanged (guaranteed to be 0 on entry to the gate page).
+PSR.mc Unchanged.
+PSR.it Unchanged (guaranteed to be 1).
+PSR.id Unchanged. Note: the ia64 linux kernel never sets this bit.
+PSR.da Unchanged. Note: the ia64 linux kernel never sets this bit.
+PSR.dd Unchanged. Note: the ia64 linux kernel never sets this bit.
+PSR.ss Lazy redirect. If set, "epc" will cause a Single Step Trap to
+ be taken. The trap handler then modifies the saved machine
+ state such that execution resumes in the gate page at
+ syscall_via_break(), with privilege level 3.
+PSR.ri Unchanged.
+PSR.ed Unchanged. Note: This bit could only have an effect if an fsys-mode
+ handler performed a speculative load that gets NaTted. If so, this
+ would be the normal & expected behavior, so no special treatment is
+ needed.
+PSR.bn Unchanged. Note: fsys-mode handlers may clear the bit, if needed.
+ Doing so requires clearing PSR.i and PSR.ic as well.
+PSR.ia Unchanged. Note: the ia64 linux kernel never sets this bit.
+
+* Using fast system calls
+
+To use fast system calls, userspace applications need simply call
+__kernel_syscall_via_epc(). For example
+
+-- example fgettimeofday() call --
+-- fgettimeofday.S --
+
+#include <asm/asmmacro.h>
+
+GLOBAL_ENTRY(fgettimeofday)
+.prologue
+.save ar.pfs, r11
+mov r11 = ar.pfs
+.body
+
+mov r2 = 0xa000000000020660;; // gate address
+ // found by inspection of System.map for the
+ // __kernel_syscall_via_epc() function. See
+ // below for how to do this for real.
+
+mov b7 = r2
+mov r15 = 1087 // gettimeofday syscall
+;;
+br.call.sptk.many b6 = b7
+;;
+
+.restore sp
+
+mov ar.pfs = r11
+br.ret.sptk.many rp;; // return to caller
+END(fgettimeofday)
+
+-- end fgettimeofday.S --
+
+In reality, getting the gate address is accomplished by two extra
+values passed via the ELF auxiliary vector (include/asm-ia64/elf.h)
+
+ o AT_SYSINFO : is the address of __kernel_syscall_via_epc()
+ o AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO
+
+The ELF DSO is a pre-linked library that is mapped in by the kernel at
+the gate page. It is a proper ELF shared object so, with a dynamic
+loader that recognises the library, you should be able to make calls to
+the exported functions within it as with any other shared library.
+AT_SYSINFO points into the kernel DSO at the
+__kernel_syscall_via_epc() function for historical reasons (it was
+used before the kernel DSO) and as a convenience.
diff --git a/Documentation/ia64/mca.txt b/Documentation/ia64/mca.txt
new file mode 100644
index 000000000..f097c60cb
--- /dev/null
+++ b/Documentation/ia64/mca.txt
@@ -0,0 +1,194 @@
+An ad-hoc collection of notes on IA64 MCA and INIT processing. Feel
+free to update it with notes about any area that is not clear.
+
+---
+
+MCA/INIT are completely asynchronous. They can occur at any time, when
+the OS is in any state. Including when one of the cpus is already
+holding a spinlock. Trying to get any lock from MCA/INIT state is
+asking for deadlock. Also the state of structures that are protected
+by locks is indeterminate, including linked lists.
+
+---
+
+The complicated ia64 MCA process. All of this is mandated by Intel's
+specification for ia64 SAL, error recovery and unwind, it is not as
+if we have a choice here.
+
+* MCA occurs on one cpu, usually due to a double bit memory error.
+ This is the monarch cpu.
+
+* SAL sends an MCA rendezvous interrupt (which is a normal interrupt)
+ to all the other cpus, the slaves.
+
+* Slave cpus that receive the MCA interrupt call down into SAL, they
+ end up spinning disabled while the MCA is being serviced.
+
+* If any slave cpu was already spinning disabled when the MCA occurred
+ then it cannot service the MCA interrupt. SAL waits ~20 seconds then
+ sends an unmaskable INIT event to the slave cpus that have not
+ already rendezvoused.
+
+* Because MCA/INIT can be delivered at any time, including when the cpu
+ is down in PAL in physical mode, the registers at the time of the
+ event are _completely_ undefined. In particular the MCA/INIT
+ handlers cannot rely on the thread pointer, PAL physical mode can
+ (and does) modify TP. It is allowed to do that as long as it resets
+ TP on return. However MCA/INIT events expose us to these PAL
+ internal TP changes. Hence curr_task().
+
+* If an MCA/INIT event occurs while the kernel was running (not user
+ space) and the kernel has called PAL then the MCA/INIT handler cannot
+ assume that the kernel stack is in a fit state to be used. Mainly
+ because PAL may or may not maintain the stack pointer internally.
+ Because the MCA/INIT handlers cannot trust the kernel stack, they
+ have to use their own, per-cpu stacks. The MCA/INIT stacks are
+ preformatted with just enough task state to let the relevant handlers
+ do their job.
+
+* Unlike most other architectures, the ia64 struct task is embedded in
+ the kernel stack[1]. So switching to a new kernel stack means that
+ we switch to a new task as well. Because various bits of the kernel
+ assume that current points into the struct task, switching to a new
+ stack also means a new value for current.
+
+* Once all slaves have rendezvoused and are spinning disabled, the
+ monarch is entered. The monarch now tries to diagnose the problem
+ and decide if it can recover or not.
+
+* Part of the monarch's job is to look at the state of all the other
+ tasks. The only way to do that on ia64 is to call the unwinder,
+ as mandated by Intel.
+
+* The starting point for the unwind depends on whether a task is
+ running or not. That is, whether it is on a cpu or is blocked. The
+ monarch has to determine whether or not a task is on a cpu before it
+ knows how to start unwinding it. The tasks that received an MCA or
+ INIT event are no longer running, they have been converted to blocked
+ tasks. But (and its a big but), the cpus that received the MCA
+ rendezvous interrupt are still running on their normal kernel stacks!
+
+* To distinguish between these two cases, the monarch must know which
+ tasks are on a cpu and which are not. Hence each slave cpu that
+ switches to an MCA/INIT stack, registers its new stack using
+ set_curr_task(), so the monarch can tell that the _original_ task is
+ no longer running on that cpu. That gives us a decent chance of
+ getting a valid backtrace of the _original_ task.
+
+* MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a
+ nested error, we want diagnostics on the MCA/INIT handler that
+ failed, not on the task that was originally running. Again this
+ requires set_curr_task() so the MCA/INIT handlers can register their
+ own stack as running on that cpu. Then a recursive error gets a
+ trace of the failing handler's "task".
+
+[1] My (Keith Owens) original design called for ia64 to separate its
+ struct task and the kernel stacks. Then the MCA/INIT data would be
+ chained stacks like i386 interrupt stacks. But that required
+ radical surgery on the rest of ia64, plus extra hard wired TLB
+ entries with its associated performance degradation. David
+ Mosberger vetoed that approach. Which meant that separate kernel
+ stacks meant separate "tasks" for the MCA/INIT handlers.
+
+---
+
+INIT is less complicated than MCA. Pressing the nmi button or using
+the equivalent command on the management console sends INIT to all
+cpus. SAL picks one of the cpus as the monarch and the rest are
+slaves. All the OS INIT handlers are entered at approximately the same
+time. The OS monarch prints the state of all tasks and returns, after
+which the slaves return and the system resumes.
+
+At least that is what is supposed to happen. Alas there are broken
+versions of SAL out there. Some drive all the cpus as monarchs. Some
+drive them all as slaves. Some drive one cpu as monarch, wait for that
+cpu to return from the OS then drive the rest as slaves. Some versions
+of SAL cannot even cope with returning from the OS, they spin inside
+SAL on resume. The OS INIT code has workarounds for some of these
+broken SAL symptoms, but some simply cannot be fixed from the OS side.
+
+---
+
+The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer
+violations. Unfortunately MCA/INIT start off as massive layer
+violations (can occur at _any_ time) and they build from there.
+
+At least ia64 makes an attempt at recovering from hardware errors, but
+it is a difficult problem because of the asynchronous nature of these
+errors. When processing an unmaskable interrupt we sometimes need
+special code to cope with our inability to take any locks.
+
+---
+
+How is ia64 MCA/INIT different from x86 NMI?
+
+* x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to
+ all cpus.
+
+* x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2
+ per cpu.
+
+* x86 has a separate struct task which points to one of multiple kernel
+ stacks. ia64 has the struct task embedded in the single kernel
+ stack, so switching stack means switching task.
+
+* x86 does not call the BIOS so the NMI handler does not have to worry
+ about any registers having changed. MCA/INIT can occur while the cpu
+ is in PAL in physical mode, with undefined registers and an undefined
+ kernel stack.
+
+* i386 backtrace is not very sensitive to whether a process is running
+ or not. ia64 unwind is very, very sensitive to whether a process is
+ running or not.
+
+---
+
+What happens when MCA/INIT is delivered what a cpu is running user
+space code?
+
+The user mode registers are stored in the RSE area of the MCA/INIT on
+entry to the OS and are restored from there on return to SAL, so user
+mode registers are preserved across a recoverable MCA/INIT. Since the
+OS has no idea what unwind data is available for the user space stack,
+MCA/INIT never tries to backtrace user space. Which means that the OS
+does not bother making the user space process look like a blocked task,
+i.e. the OS does not copy pt_regs and switch_stack to the user space
+stack. Also the OS has no idea how big the user space RSE and memory
+stacks are, which makes it too risky to copy the saved state to a user
+mode stack.
+
+---
+
+How do we get a backtrace on the tasks that were running when MCA/INIT
+was delivered?
+
+mca.c:::ia64_mca_modify_original_stack(). That identifies and
+verifies the original kernel stack, copies the dirty registers from
+the MCA/INIT stack's RSE to the original stack's RSE, copies the
+skeleton struct pt_regs and switch_stack to the original stack, fills
+in the skeleton structures from the PAL minstate area and updates the
+original stack's thread.ksp. That makes the original stack look
+exactly like any other blocked task, i.e. it now appears to be
+sleeping. To get a backtrace, just start with thread.ksp for the
+original task and unwind like any other sleeping task.
+
+---
+
+How do we identify the tasks that were running when MCA/INIT was
+delivered?
+
+If the previous task has been verified and converted to a blocked
+state, then sos->prev_task on the MCA/INIT stack is updated to point to
+the previous task. You can look at that field in dumps or debuggers.
+To help distinguish between the handler and the original tasks,
+handlers have _TIF_MCA_INIT set in thread_info.flags.
+
+The sos data is always in the MCA/INIT handler stack, at offset
+MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it
+as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct
+ia64_sal_os_state), with 16 byte alignment for all structures.
+
+Also the comm field of the MCA/INIT task is modified to include the pid
+of the original task, for humans to use. For example, a comm field of
+'MCA 12159' means that pid 12159 was running when the MCA was
+delivered.
diff --git a/Documentation/ia64/serial.txt b/Documentation/ia64/serial.txt
new file mode 100644
index 000000000..6869c73de
--- /dev/null
+++ b/Documentation/ia64/serial.txt
@@ -0,0 +1,151 @@
+SERIAL DEVICE NAMING
+
+ As of 2.6.10, serial devices on ia64 are named based on the
+ order of ACPI and PCI enumeration. The first device in the
+ ACPI namespace (if any) becomes /dev/ttyS0, the second becomes
+ /dev/ttyS1, etc., and PCI devices are named sequentially
+ starting after the ACPI devices.
+
+ Prior to 2.6.10, there were confusing exceptions to this:
+
+ - Firmware on some machines (mostly from HP) provides an HCDP
+ table[1] that tells the kernel about devices that can be used
+ as a serial console. If the user specified "console=ttyS0"
+ or the EFI ConOut path contained only UART devices, the
+ kernel registered the device described by the HCDP as
+ /dev/ttyS0.
+
+ - If there was no HCDP, we assumed there were UARTs at the
+ legacy COM port addresses (I/O ports 0x3f8 and 0x2f8), so
+ the kernel registered those as /dev/ttyS0 and /dev/ttyS1.
+
+ Any additional ACPI or PCI devices were registered sequentially
+ after /dev/ttyS0 as they were discovered.
+
+ With an HCDP, device names changed depending on EFI configuration
+ and "console=" arguments. Without an HCDP, device names didn't
+ change, but we registered devices that might not really exist.
+
+ For example, an HP rx1600 with a single built-in serial port
+ (described in the ACPI namespace) plus an MP[2] (a PCI device) has
+ these ports:
+
+ pre-2.6.10 pre-2.6.10
+ MMIO (EFI console (EFI console
+ address on builtin) on MP port) 2.6.10
+ ========== ========== ========== ======
+ builtin 0xff5e0000 ttyS0 ttyS1 ttyS0
+ MP UPS 0xf8031000 ttyS1 ttyS2 ttyS1
+ MP Console 0xf8030000 ttyS2 ttyS0 ttyS2
+ MP 2 0xf8030010 ttyS3 ttyS3 ttyS3
+ MP 3 0xf8030038 ttyS4 ttyS4 ttyS4
+
+CONSOLE SELECTION
+
+ EFI knows what your console devices are, but it doesn't tell the
+ kernel quite enough to actually locate them. The DIG64 HCDP
+ table[1] does tell the kernel where potential serial console
+ devices are, but not all firmware supplies it. Also, EFI supports
+ multiple simultaneous consoles and doesn't tell the kernel which
+ should be the "primary" one.
+
+ So how do you tell Linux which console device to use?
+
+ - If your firmware supplies the HCDP, it is simplest to
+ configure EFI with a single device (either a UART or a VGA
+ card) as the console. Then you don't need to tell Linux
+ anything; the kernel will automatically use the EFI console.
+
+ (This works only in 2.6.6 or later; prior to that you had
+ to specify "console=ttyS0" to get a serial console.)
+
+ - Without an HCDP, Linux defaults to a VGA console unless you
+ specify a "console=" argument.
+
+ NOTE: Don't assume that a serial console device will be /dev/ttyS0.
+ It might be ttyS1, ttyS2, etc. Make sure you have the appropriate
+ entries in /etc/inittab (for getty) and /etc/securetty (to allow
+ root login).
+
+EARLY SERIAL CONSOLE
+
+ The kernel can't start using a serial console until it knows where
+ the device lives. Normally this happens when the driver enumerates
+ all the serial devices, which can happen a minute or more after the
+ kernel starts booting.
+
+ 2.6.10 and later kernels have an "early uart" driver that works
+ very early in the boot process. The kernel will automatically use
+ this if the user supplies an argument like "console=uart,io,0x3f8",
+ or if the EFI console path contains only a UART device and the
+ firmware supplies an HCDP.
+
+TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
+
+ No kernel output after elilo prints "Uncompressing Linux... done":
+
+ - You specified "console=ttyS0" but Linux changed the device
+ to which ttyS0 refers. Configure exactly one EFI console
+ device[3] and remove the "console=" option.
+
+ - The EFI console path contains both a VGA device and a UART.
+ EFI and elilo use both, but Linux defaults to VGA. Remove
+ the VGA device from the EFI console path[3].
+
+ - Multiple UARTs selected as EFI console devices. EFI and
+ elilo use all selected devices, but Linux uses only one.
+ Make sure only one UART is selected in the EFI console
+ path[3].
+
+ - You're connected to an HP MP port[2] but have a non-MP UART
+ selected as EFI console device. EFI uses the MP as a
+ console device even when it isn't explicitly selected.
+ Either move the console cable to the non-MP UART, or change
+ the EFI console path[3] to the MP UART.
+
+ Long pause (60+ seconds) between "Uncompressing Linux... done" and
+ start of kernel output:
+
+ - No early console because you used "console=ttyS<n>". Remove
+ the "console=" option if your firmware supplies an HCDP.
+
+ - If you don't have an HCDP, the kernel doesn't know where
+ your console lives until the driver discovers serial
+ devices. Use "console=uart, io,0x3f8" (or appropriate
+ address for your machine).
+
+ Kernel and init script output works fine, but no "login:" prompt:
+
+ - Add getty entry to /etc/inittab for console tty. Look for
+ the "Adding console on ttyS<n>" message that tells you which
+ device is the console.
+
+ "login:" prompt, but can't login as root:
+
+ - Add entry to /etc/securetty for console tty.
+
+ No ACPI serial devices found in 2.6.17 or later:
+
+ - Turn on CONFIG_PNP and CONFIG_PNPACPI. Prior to 2.6.17, ACPI
+ serial devices were discovered by 8250_acpi. In 2.6.17,
+ 8250_acpi was replaced by the combination of 8250_pnp and
+ CONFIG_PNPACPI.
+
+
+
+[1] http://www.dig64.org/specifications/agreement
+ The table was originally defined as the "HCDP" for "Headless
+ Console/Debug Port." The current version is the "PCDP" for
+ "Primary Console and Debug Port Devices."
+
+[2] The HP MP (management processor) is a PCI device that provides
+ several UARTs. One of the UARTs is often used as a console; the
+ EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart".
+ The external connection is usually a 25-pin connector, and a
+ special dongle converts that to three 9-pin connectors, one of
+ which is labelled "Console."
+
+[3] EFI console devices are configured using the EFI Boot Manager
+ "Boot option maintenance" menu. You may have to interrupt the
+ boot sequence to use this menu, and you will have to reset the
+ box after changing console configuration.
diff --git a/Documentation/ia64/xen.txt b/Documentation/ia64/xen.txt
new file mode 100644
index 000000000..c61a99f7c
--- /dev/null
+++ b/Documentation/ia64/xen.txt
@@ -0,0 +1,183 @@
+ Recipe for getting/building/running Xen/ia64 with pv_ops
+ --------------------------------------------------------
+
+This recipe describes how to get xen-ia64 source and build it,
+and run domU with pv_ops.
+
+============
+Requirements
+============
+
+ - python
+ - mercurial
+ it (aka "hg") is an open-source source code
+ management software. See the below.
+ http://www.selenic.com/mercurial/wiki/
+ - git
+ - bridge-utils
+
+=================================
+Getting and Building Xen and Dom0
+=================================
+
+ My environment is;
+ Machine : Tiger4
+ Domain0 OS : RHEL5
+ DomainU OS : RHEL5
+
+ 1. Download source
+ # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
+ # cd xen-unstable.hg
+ # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
+
+ 2. # make world
+
+ 3. # make install-tools
+
+ 4. copy kernels and xen
+ # cp xen/xen.gz /boot/efi/efi/redhat/
+ # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
+ /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
+
+ 5. make initrd for Dom0/DomU
+ # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
+ O=$(/bin/pwd)/build-linux-2.6.18-xen_ia64
+ # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
+ 2.6.18.8-xen --builtin mptspi --builtin mptbase \
+ --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
+ --builtin ehci-hcd
+
+================================
+Making a disk image for guest OS
+================================
+
+ 1. make file
+ # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
+ # mke2fs -F -j /root/rhel5.img
+ # mount -o loop /root/rhel5.img /mnt
+ # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
+ # mkdir /mnt/{root,proc,sys,home,tmp}
+
+ Note: You may miss some device files. If so, please create them
+ with mknod. Or you can use tar instead of cp.
+
+ 2. modify DomU's fstab
+ # vi /mnt/etc/fstab
+ /dev/xvda1 / ext3 defaults 1 1
+ none /dev/pts devpts gid=5,mode=620 0 0
+ none /dev/shm tmpfs defaults 0 0
+ none /proc proc defaults 0 0
+ none /sys sysfs defaults 0 0
+
+ 3. modify inittab
+ set runlevel to 3 to avoid X trying to start
+ # vi /mnt/etc/inittab
+ id:3:initdefault:
+ Start a getty on the hvc0 console
+ X0:2345:respawn:/sbin/mingetty hvc0
+ tty1-6 mingetty can be commented out
+
+ 4. add hvc0 into /etc/securetty
+ # vi /mnt/etc/securetty (add hvc0)
+
+ 5. umount
+ # umount /mnt
+
+FYI, virt-manager can also make a disk image for guest OS.
+It's GUI tools and easy to make it.
+
+==================
+Boot Xen & Domain0
+==================
+
+ 1. replace elilo
+ elilo of RHEL5 can boot Xen and Dom0.
+ If you use old elilo (e.g RHEL4), please download from the below
+ http://elilo.sourceforge.net/cgi-bin/blosxom
+ and copy into /boot/efi/efi/redhat/
+ # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
+
+ 2. modify elilo.conf (like the below)
+ # vi /boot/efi/efi/redhat/elilo.conf
+ prompt
+ timeout=20
+ default=xen
+ relocatable
+
+ image=vmlinuz-2.6.18.8-xen
+ label=xen
+ vmm=xen.gz
+ initrd=initrd-2.6.18.8-xen.img
+ read-only
+ append=" -- rhgb root=/dev/sda2"
+
+The append options before "--" are for xen hypervisor,
+the options after "--" are for dom0.
+
+FYI, your machine may need console options like
+"com1=19200,8n1 console=vga,com1". For example,
+append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
+console=ttyS0 root=/dev/sda2"
+
+=====================================
+Getting and Building domU with pv_ops
+=====================================
+
+ 1. get pv_ops tree
+ # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
+
+ 2. git branch (if necessary)
+ # cd linux-2.6-xen-ia64/
+ # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
+ (Note: The current branch is xen-ia64-domu-minimal-2008may19.
+ But you would find the new branch. You can see with
+ "git branch -r" to get the branch lists.
+ http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
+ is also available. The tree is based on
+ git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
+
+
+ 3. copy .config for pv_ops of domU
+ # cp arch/ia64/configs/xen_domu_wip_defconfig .config
+
+ 4. make kernel with pv_ops
+ # make oldconfig
+ # make
+
+ 5. install the kernel and initrd
+ # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
+ # make modules_install
+ # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
+ 2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
+ --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
+ --builtin ohci-hcd --builtin ehci-hcd
+
+========================
+Boot DomainU with pv_ops
+========================
+
+ 1. make config of DomU
+ # vi /etc/xen/rhel5
+ kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
+ ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
+ vcpus = 1
+ memory = 512
+ name = "rhel5"
+ disk = [ 'file:/root/rhel5.img,xvda1,w' ]
+ root = "/dev/xvda1 ro"
+ extra= "rhgb console=hvc0"
+
+ 2. After boot xen and dom0, start xend
+ # /etc/init.d/xend start
+ ( In the debugging case, # XEND_DEBUG=1 xend trace_start )
+
+ 3. start domU
+ # xm create -c rhel5
+
+=========
+Reference
+=========
+- Wiki of Xen/IA64 upstream merge
+ http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
+
+Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008
diff --git a/Documentation/ide/00-INDEX b/Documentation/ide/00-INDEX
new file mode 100644
index 000000000..22f98ca79
--- /dev/null
+++ b/Documentation/ide/00-INDEX
@@ -0,0 +1,14 @@
+00-INDEX
+ - this file
+ChangeLog.ide-cd.1994-2004
+ - ide-cd changelog
+ChangeLog.ide-floppy.1996-2002
+ - ide-floppy changelog
+ChangeLog.ide-tape.1995-2002
+ - ide-tape changelog
+ide-tape.txt
+ - info on the IDE ATAPI streaming tape driver
+ide.txt
+ - important info for users of ATA devices (IDE/EIDE disks and CD-ROMS).
+warm-plug-howto.txt
+ - using sysfs to remove and add IDE devices. \ No newline at end of file
diff --git a/Documentation/ide/ChangeLog.ide-cd.1994-2004 b/Documentation/ide/ChangeLog.ide-cd.1994-2004
new file mode 100644
index 000000000..4cc3ad99f
--- /dev/null
+++ b/Documentation/ide/ChangeLog.ide-cd.1994-2004
@@ -0,0 +1,268 @@
+/*
+ * 1.00 Oct 31, 1994 -- Initial version.
+ * 1.01 Nov 2, 1994 -- Fixed problem with starting request in
+ * cdrom_check_status.
+ * 1.03 Nov 25, 1994 -- leaving unmask_intr[] as a user-setting (as for disks)
+ * (from mlord) -- minor changes to cdrom_setup()
+ * -- renamed ide_dev_s to ide_drive_t, enable irq on command
+ * 2.00 Nov 27, 1994 -- Generalize packet command interface;
+ * add audio ioctls.
+ * 2.01 Dec 3, 1994 -- Rework packet command interface to handle devices
+ * which send an interrupt when ready for a command.
+ * 2.02 Dec 11, 1994 -- Cache the TOC in the driver.
+ * Don't use SCMD_PLAYAUDIO_TI; it's not included
+ * in the current version of ATAPI.
+ * Try to use LBA instead of track or MSF addressing
+ * when possible.
+ * Don't wait for READY_STAT.
+ * 2.03 Jan 10, 1995 -- Rewrite block read routines to handle block sizes
+ * other than 2k and to move multiple sectors in a
+ * single transaction.
+ * 2.04 Apr 21, 1995 -- Add work-around for Creative Labs CD220E drives.
+ * Thanks to Nick Saw <cwsaw@pts7.pts.mot.com> for
+ * help in figuring this out. Ditto for Acer and
+ * Aztech drives, which seem to have the same problem.
+ * 2.04b May 30, 1995 -- Fix to match changes in ide.c version 3.16 -ml
+ * 2.05 Jun 8, 1995 -- Don't attempt to retry after an illegal request
+ * or data protect error.
+ * Use HWIF and DEV_HWIF macros as in ide.c.
+ * Always try to do a request_sense after
+ * a failed command.
+ * Include an option to give textual descriptions
+ * of ATAPI errors.
+ * Fix a bug in handling the sector cache which
+ * showed up if the drive returned data in 512 byte
+ * blocks (like Pioneer drives). Thanks to
+ * Richard Hirst <srh@gpt.co.uk> for diagnosing this.
+ * Properly supply the page number field in the
+ * MODE_SELECT command.
+ * PLAYAUDIO12 is broken on the Aztech; work around it.
+ * 2.05x Aug 11, 1995 -- lots of data structure renaming/restructuring in ide.c
+ * (my apologies to Scott, but now ide-cd.c is independent)
+ * 3.00 Aug 22, 1995 -- Implement CDROMMULTISESSION ioctl.
+ * Implement CDROMREADAUDIO ioctl (UNTESTED).
+ * Use input_ide_data() and output_ide_data().
+ * Add door locking.
+ * Fix usage count leak in cdrom_open, which happened
+ * when a read-write mount was attempted.
+ * Try to load the disk on open.
+ * Implement CDROMEJECT_SW ioctl (off by default).
+ * Read total cdrom capacity during open.
+ * Rearrange logic in cdrom_decode_status. Issue
+ * request sense commands for failed packet commands
+ * from here instead of from cdrom_queue_packet_command.
+ * Fix a race condition in retrieving error information.
+ * Suppress printing normal unit attention errors and
+ * some drive not ready errors.
+ * Implement CDROMVOLREAD ioctl.
+ * Implement CDROMREADMODE1/2 ioctls.
+ * Fix race condition in setting up interrupt handlers
+ * when the `serialize' option is used.
+ * 3.01 Sep 2, 1995 -- Fix ordering of reenabling interrupts in
+ * cdrom_queue_request.
+ * Another try at using ide_[input,output]_data.
+ * 3.02 Sep 16, 1995 -- Stick total disk capacity in partition table as well.
+ * Make VERBOSE_IDE_CD_ERRORS dump failed command again.
+ * Dump out more information for ILLEGAL REQUEST errs.
+ * Fix handling of errors occurring before the
+ * packet command is transferred.
+ * Fix transfers with odd bytelengths.
+ * 3.03 Oct 27, 1995 -- Some Creative drives have an id of just `CD'.
+ * `DCI-2S10' drives are broken too.
+ * 3.04 Nov 20, 1995 -- So are Vertos drives.
+ * 3.05 Dec 1, 1995 -- Changes to go with overhaul of ide.c and ide-tape.c
+ * 3.06 Dec 16, 1995 -- Add support needed for partitions.
+ * More workarounds for Vertos bugs (based on patches
+ * from Holger Dietze <dietze@aix520.informatik.uni-leipzig.de>).
+ * Try to eliminate byteorder assumptions.
+ * Use atapi_cdrom_subchnl struct definition.
+ * Add STANDARD_ATAPI compilation option.
+ * 3.07 Jan 29, 1996 -- More twiddling for broken drives: Sony 55D,
+ * Vertos 300.
+ * Add NO_DOOR_LOCKING configuration option.
+ * Handle drive_cmd requests w/NULL args (for hdparm -t).
+ * Work around sporadic Sony55e audio play problem.
+ * 3.07a Feb 11, 1996 -- check drive->id for NULL before dereferencing, to fix
+ * problem with "hde=cdrom" with no drive present. -ml
+ * 3.08 Mar 6, 1996 -- More Vertos workarounds.
+ * 3.09 Apr 5, 1996 -- Add CDROMCLOSETRAY ioctl.
+ * Switch to using MSF addressing for audio commands.
+ * Reformat to match kernel tabbing style.
+ * Add CDROM_GET_UPC ioctl.
+ * 3.10 Apr 10, 1996 -- Fix compilation error with STANDARD_ATAPI.
+ * 3.11 Apr 29, 1996 -- Patch from Heiko Eißfeldt <heiko@colossus.escape.de>
+ * to remove redundant verify_area calls.
+ * 3.12 May 7, 1996 -- Rudimentary changer support. Based on patches
+ * from Gerhard Zuber <zuber@berlin.snafu.de>.
+ * Let open succeed even if there's no loaded disc.
+ * 3.13 May 19, 1996 -- Fixes for changer code.
+ * 3.14 May 29, 1996 -- Add work-around for Vertos 600.
+ * (From Hennus Bergman <hennus@sky.ow.nl>.)
+ * 3.15 July 2, 1996 -- Added support for Sanyo 3 CD changers
+ * from Ben Galliart <bgallia@luc.edu> with
+ * special help from Jeff Lightfoot
+ * <jeffml@pobox.com>
+ * 3.15a July 9, 1996 -- Improved Sanyo 3 CD changer identification
+ * 3.16 Jul 28, 1996 -- Fix from Gadi to reduce kernel stack usage for ioctl.
+ * 3.17 Sep 17, 1996 -- Tweak audio reads for some drives.
+ * Start changing CDROMLOADFROMSLOT to CDROM_SELECT_DISC.
+ * 3.18 Oct 31, 1996 -- Added module and DMA support.
+ *
+ * 4.00 Nov 5, 1996 -- New ide-cd maintainer,
+ * Erik B. Andersen <andersee@debian.org>
+ * -- Newer Creative drives don't always set the error
+ * register correctly. Make sure we see media changes
+ * regardless.
+ * -- Integrate with generic cdrom driver.
+ * -- CDROMGETSPINDOWN and CDROMSETSPINDOWN ioctls, based on
+ * a patch from Ciro Cattuto <>.
+ * -- Call set_device_ro.
+ * -- Implement CDROMMECHANISMSTATUS and CDROMSLOTTABLE
+ * ioctls, based on patch by Erik Andersen
+ * -- Add some probes of drive capability during setup.
+ *
+ * 4.01 Nov 11, 1996 -- Split into ide-cd.c and ide-cd.h
+ * -- Removed CDROMMECHANISMSTATUS and CDROMSLOTTABLE
+ * ioctls in favor of a generalized approach
+ * using the generic cdrom driver.
+ * -- Fully integrated with the 2.1.X kernel.
+ * -- Other stuff that I forgot (lots of changes)
+ *
+ * 4.02 Dec 01, 1996 -- Applied patch from Gadi Oxman <gadio@netvision.net.il>
+ * to fix the drive door locking problems.
+ *
+ * 4.03 Dec 04, 1996 -- Added DSC overlap support.
+ * 4.04 Dec 29, 1996 -- Added CDROMREADRAW ioclt based on patch
+ * by Ales Makarov (xmakarov@sun.felk.cvut.cz)
+ *
+ * 4.05 Nov 20, 1997 -- Modified to print more drive info on init
+ * Minor other changes
+ * Fix errors on CDROMSTOP (If you have a "Dolphin",
+ * you must define IHAVEADOLPHIN)
+ * Added identifier so new Sanyo CD-changer works
+ * Better detection if door locking isn't supported
+ *
+ * 4.06 Dec 17, 1997 -- fixed endless "tray open" messages -ml
+ * 4.07 Dec 17, 1997 -- fallback to set pc->stat on "tray open"
+ * 4.08 Dec 18, 1997 -- spew less noise when tray is empty
+ * -- fix speed display for ACER 24X, 18X
+ * 4.09 Jan 04, 1998 -- fix handling of the last block so we return
+ * an end of file instead of an I/O error (Gadi)
+ * 4.10 Jan 24, 1998 -- fixed a bug so now changers can change to a new
+ * slot when there is no disc in the current slot.
+ * -- Fixed a memory leak where info->changer_info was
+ * malloc'ed but never free'd when closing the device.
+ * -- Cleaned up the global namespace a bit by making more
+ * functions static that should already have been.
+ * 4.11 Mar 12, 1998 -- Added support for the CDROM_SELECT_SPEED ioctl
+ * based on a patch for 2.0.33 by Jelle Foks
+ * <jelle@scintilla.utwente.nl>, a patch for 2.0.33
+ * by Toni Giorgino <toni@pcape2.pi.infn.it>, the SCSI
+ * version, and my own efforts. -erik
+ * -- Fixed a stupid bug which egcs was kind enough to
+ * inform me of where "Illegal mode for this track"
+ * was never returned due to a comparison on data
+ * types of limited range.
+ * 4.12 Mar 29, 1998 -- Fixed bug in CDROM_SELECT_SPEED so write speed is
+ * now set ionly for CD-R and CD-RW drives. I had
+ * removed this support because it produced errors.
+ * It produced errors _only_ for non-writers. duh.
+ * 4.13 May 05, 1998 -- Suppress useless "in progress of becoming ready"
+ * messages, since this is not an error.
+ * -- Change error messages to be const
+ * -- Remove a "\t" which looks ugly in the syslogs
+ * 4.14 July 17, 1998 -- Change to pointing to .ps version of ATAPI spec
+ * since the .pdf version doesn't seem to work...
+ * -- Updated the TODO list to something more current.
+ *
+ * 4.15 Aug 25, 1998 -- Updated ide-cd.h to respect machine endianness,
+ * patch thanks to "Eddie C. Dost" <ecd@skynet.be>
+ *
+ * 4.50 Oct 19, 1998 -- New maintainers!
+ * Jens Axboe <axboe@image.dk>
+ * Chris Zwilling <chris@cloudnet.com>
+ *
+ * 4.51 Dec 23, 1998 -- Jens Axboe <axboe@image.dk>
+ * - ide_cdrom_reset enabled since the ide subsystem
+ * handles resets fine now. <axboe@image.dk>
+ * - Transfer size fix for Samsung CD-ROMs, thanks to
+ * "Ville Hallik" <ville.hallik@mail.ee>.
+ * - other minor stuff.
+ *
+ * 4.52 Jan 19, 1999 -- Jens Axboe <axboe@image.dk>
+ * - Detect DVD-ROM/RAM drives
+ *
+ * 4.53 Feb 22, 1999 - Include other model Samsung and one Goldstar
+ * drive in transfer size limit.
+ * - Fix the I/O error when doing eject without a medium
+ * loaded on some drives.
+ * - CDROMREADMODE2 is now implemented through
+ * CDROMREADRAW, since many drives don't support
+ * MODE2 (even though ATAPI 2.6 says they must).
+ * - Added ignore parameter to ide-cd (as a module), eg
+ * insmod ide-cd ignore='hda hdb'
+ * Useful when using ide-cd in conjunction with
+ * ide-scsi. TODO: non-modular way of doing the
+ * same.
+ *
+ * 4.54 Aug 5, 1999 - Support for MMC2 class commands through the generic
+ * packet interface to cdrom.c.
+ * - Unified audio ioctl support, most of it.
+ * - cleaned up various deprecated verify_area().
+ * - Added ide_cdrom_packet() as the interface for
+ * the Uniform generic_packet().
+ * - bunch of other stuff, will fill in logs later.
+ * - report 1 slot for non-changers, like the other
+ * cd-rom drivers. don't report select disc for
+ * non-changers as well.
+ * - mask out audio playing, if the device can't do it.
+ *
+ * 4.55 Sep 1, 1999 - Eliminated the rest of the audio ioctls, except
+ * for CDROMREADTOC[ENTRY|HEADER]. Some of the drivers
+ * use this independently of the actual audio handling.
+ * They will disappear later when I get the time to
+ * do it cleanly.
+ * - Minimize the TOC reading - only do it when we
+ * know a media change has occurred.
+ * - Moved all the CDROMREADx ioctls to the Uniform layer.
+ * - Heiko Eißfeldt <heiko@colossus.escape.de> supplied
+ * some fixes for CDI.
+ * - CD-ROM leaving door locked fix from Andries
+ * Brouwer <Andries.Brouwer@cwi.nl>
+ * - Erik Andersen <andersen@xmission.com> unified
+ * commands across the various drivers and how
+ * sense errors are handled.
+ *
+ * 4.56 Sep 12, 1999 - Removed changer support - it is now in the
+ * Uniform layer.
+ * - Added partition based multisession handling.
+ * - Mode sense and mode select moved to the
+ * Uniform layer.
+ * - Fixed a problem with WPI CDS-32X drive - it
+ * failed the capabilities
+ *
+ * 4.57 Apr 7, 2000 - Fixed sense reporting.
+ * - Fixed possible oops in ide_cdrom_get_last_session()
+ * - Fix locking mania and make ide_cdrom_reset relock
+ * - Stop spewing errors to log when magicdev polls with
+ * TEST_UNIT_READY on some drives.
+ * - Various fixes from Tobias Ringstrom:
+ * tray if it was locked prior to the reset.
+ * - cdrom_read_capacity returns one frame too little.
+ * - Fix real capacity reporting.
+ *
+ * 4.58 May 1, 2000 - Clean up ACER50 stuff.
+ * - Fix small problem with ide_cdrom_capacity
+ *
+ * 4.59 Aug 11, 2000 - Fix changer problem in cdrom_read_toc, we weren't
+ * correctly sensing a disc change.
+ * - Rearranged some code
+ * - Use extended sense on drives that support it for
+ * correctly reporting tray status -- from
+ * Michael D Johnson <johnsom@orst.edu>
+ * 4.60 Dec 17, 2003 - Add mt rainier support
+ * - Bump timeout for packet commands, matches sr
+ * - Odd stuff
+ * 4.61 Jan 22, 2004 - support hardware sector sizes other than 2kB,
+ * Pascal Schmidt <der.eremit@email.de>
+ */
diff --git a/Documentation/ide/ChangeLog.ide-floppy.1996-2002 b/Documentation/ide/ChangeLog.ide-floppy.1996-2002
new file mode 100644
index 000000000..46c19ef32
--- /dev/null
+++ b/Documentation/ide/ChangeLog.ide-floppy.1996-2002
@@ -0,0 +1,63 @@
+/*
+ * Many thanks to Lode Leroy <Lode.Leroy@www.ibase.be>, who tested so many
+ * ALPHA patches to this driver on an EASYSTOR LS-120 ATAPI floppy drive.
+ *
+ * Ver 0.1 Oct 17 96 Initial test version, mostly based on ide-tape.c.
+ * Ver 0.2 Oct 31 96 Minor changes.
+ * Ver 0.3 Dec 2 96 Fixed error recovery bug.
+ * Ver 0.4 Jan 26 97 Add support for the HDIO_GETGEO ioctl.
+ * Ver 0.5 Feb 21 97 Add partitions support.
+ * Use the minimum of the LBA and CHS capacities.
+ * Avoid hwgroup->rq == NULL on the last irq.
+ * Fix potential null dereferencing with DEBUG_LOG.
+ * Ver 0.8 Dec 7 97 Increase irq timeout from 10 to 50 seconds.
+ * Add media write-protect detection.
+ * Issue START command only if TEST UNIT READY fails.
+ * Add work-around for IOMEGA ZIP revision 21.D.
+ * Remove idefloppy_get_capabilities().
+ * Ver 0.9 Jul 4 99 Fix a bug which might have caused the number of
+ * bytes requested on each interrupt to be zero.
+ * Thanks to <shanos@es.co.nz> for pointing this out.
+ * Ver 0.9.sv Jan 6 01 Sam Varshavchik <mrsam@courier-mta.com>
+ * Implement low level formatting. Reimplemented
+ * IDEFLOPPY_CAPABILITIES_PAGE, since we need the srfp
+ * bit. My LS-120 drive barfs on
+ * IDEFLOPPY_CAPABILITIES_PAGE, but maybe it's just me.
+ * Compromise by not reporting a failure to get this
+ * mode page. Implemented four IOCTLs in order to
+ * implement formatting. IOCTls begin with 0x4600,
+ * 0x46 is 'F' as in Format.
+ * Jan 9 01 Userland option to select format verify.
+ * Added PC_SUPPRESS_ERROR flag - some idefloppy drives
+ * do not implement IDEFLOPPY_CAPABILITIES_PAGE, and
+ * return a sense error. Suppress error reporting in
+ * this particular case in order to avoid spurious
+ * errors in syslog. The culprit is
+ * idefloppy_get_capability_page(), so move it to
+ * idefloppy_begin_format() so that it's not used
+ * unless absolutely necessary.
+ * If drive does not support format progress indication
+ * monitor the dsc bit in the status register.
+ * Also, O_NDELAY on open will allow the device to be
+ * opened without a disk available. This can be used to
+ * open an unformatted disk, or get the device capacity.
+ * Ver 0.91 Dec 11 99 Added IOMEGA Clik! drive support by
+ * <paul@paulbristow.net>
+ * Ver 0.92 Oct 22 00 Paul Bristow became official maintainer for this
+ * driver. Included Powerbook internal zip kludge.
+ * Ver 0.93 Oct 24 00 Fixed bugs for Clik! drive
+ * no disk on insert and disk change now works
+ * Ver 0.94 Oct 27 00 Tidied up to remove strstr(Clik) everywhere
+ * Ver 0.95 Nov 7 00 Brought across to kernel 2.4
+ * Ver 0.96 Jan 7 01 Actually in line with release version of 2.4.0
+ * including set_bit patch from Rusty Russell
+ * Ver 0.97 Jul 22 01 Merge 0.91-0.96 onto 0.9.sv for ac series
+ * Ver 0.97.sv Aug 3 01 Backported from 2.4.7-ac3
+ * Ver 0.98 Oct 26 01 Split idefloppy_transfer_pc into two pieces to
+ * fix a lost interrupt problem. It appears the busy
+ * bit was being deasserted by my IOMEGA ATAPI ZIP 100
+ * drive before the drive was actually ready.
+ * Ver 0.98a Oct 29 01 Expose delay value so we can play.
+ * Ver 0.99 Feb 24 02 Remove duplicate code, modify clik! detection code
+ * to support new PocketZip drives
+ */
diff --git a/Documentation/ide/ChangeLog.ide-tape.1995-2002 b/Documentation/ide/ChangeLog.ide-tape.1995-2002
new file mode 100644
index 000000000..877fac877
--- /dev/null
+++ b/Documentation/ide/ChangeLog.ide-tape.1995-2002
@@ -0,0 +1,257 @@
+/*
+ * Ver 0.1 Nov 1 95 Pre-working code :-)
+ * Ver 0.2 Nov 23 95 A short backup (few megabytes) and restore procedure
+ * was successful ! (Using tar cvf ... on the block
+ * device interface).
+ * A longer backup resulted in major swapping, bad
+ * overall Linux performance and eventually failed as
+ * we received non serial read-ahead requests from the
+ * buffer cache.
+ * Ver 0.3 Nov 28 95 Long backups are now possible, thanks to the
+ * character device interface. Linux's responsiveness
+ * and performance doesn't seem to be much affected
+ * from the background backup procedure.
+ * Some general mtio.h magnetic tape operations are
+ * now supported by our character device. As a result,
+ * popular tape utilities are starting to work with
+ * ide tapes :-)
+ * The following configurations were tested:
+ * 1. An IDE ATAPI TAPE shares the same interface
+ * and irq with an IDE ATAPI CDROM.
+ * 2. An IDE ATAPI TAPE shares the same interface
+ * and irq with a normal IDE disk.
+ * Both configurations seemed to work just fine !
+ * However, to be on the safe side, it is meanwhile
+ * recommended to give the IDE TAPE its own interface
+ * and irq.
+ * The one thing which needs to be done here is to
+ * add a "request postpone" feature to ide.c,
+ * so that we won't have to wait for the tape to finish
+ * performing a long media access (DSC) request (such
+ * as a rewind) before we can access the other device
+ * on the same interface. This effect doesn't disturb
+ * normal operation most of the time because read/write
+ * requests are relatively fast, and once we are
+ * performing one tape r/w request, a lot of requests
+ * from the other device can be queued and ide.c will
+ * service all of them after this single tape request.
+ * Ver 1.0 Dec 11 95 Integrated into Linux 1.3.46 development tree.
+ * On each read / write request, we now ask the drive
+ * if we can transfer a constant number of bytes
+ * (a parameter of the drive) only to its buffers,
+ * without causing actual media access. If we can't,
+ * we just wait until we can by polling the DSC bit.
+ * This ensures that while we are not transferring
+ * more bytes than the constant referred to above, the
+ * interrupt latency will not become too high and
+ * we won't cause an interrupt timeout, as happened
+ * occasionally in the previous version.
+ * While polling for DSC, the current request is
+ * postponed and ide.c is free to handle requests from
+ * the other device. This is handled transparently to
+ * ide.c. The hwgroup locking method which was used
+ * in the previous version was removed.
+ * Use of new general features which are provided by
+ * ide.c for use with atapi devices.
+ * (Programming done by Mark Lord)
+ * Few potential bug fixes (Again, suggested by Mark)
+ * Single character device data transfers are now
+ * not limited in size, as they were before.
+ * We are asking the tape about its recommended
+ * transfer unit and send a larger data transfer
+ * as several transfers of the above size.
+ * For best results, use an integral number of this
+ * basic unit (which is shown during driver
+ * initialization). I will soon add an ioctl to get
+ * this important parameter.
+ * Our data transfer buffer is allocated on startup,
+ * rather than before each data transfer. This should
+ * ensure that we will indeed have a data buffer.
+ * Ver 1.1 Dec 14 95 Fixed random problems which occurred when the tape
+ * shared an interface with another device.
+ * (poll_for_dsc was a complete mess).
+ * Removed some old (non-active) code which had
+ * to do with supporting buffer cache originated
+ * requests.
+ * The block device interface can now be opened, so
+ * that general ide driver features like the unmask
+ * interrupts flag can be selected with an ioctl.
+ * This is the only use of the block device interface.
+ * New fast pipelined operation mode (currently only on
+ * writes). When using the pipelined mode, the
+ * throughput can potentially reach the maximum
+ * tape supported throughput, regardless of the
+ * user backup program. On my tape drive, it sometimes
+ * boosted performance by a factor of 2. Pipelined
+ * mode is enabled by default, but since it has a few
+ * downfalls as well, you may want to disable it.
+ * A short explanation of the pipelined operation mode
+ * is available below.
+ * Ver 1.2 Jan 1 96 Eliminated pipelined mode race condition.
+ * Added pipeline read mode. As a result, restores
+ * are now as fast as backups.
+ * Optimized shared interface behavior. The new behavior
+ * typically results in better IDE bus efficiency and
+ * higher tape throughput.
+ * Pre-calculation of the expected read/write request
+ * service time, based on the tape's parameters. In
+ * the pipelined operation mode, this allows us to
+ * adjust our polling frequency to a much lower value,
+ * and thus to dramatically reduce our load on Linux,
+ * without any decrease in performance.
+ * Implemented additional mtio.h operations.
+ * The recommended user block size is returned by
+ * the MTIOCGET ioctl.
+ * Additional minor changes.
+ * Ver 1.3 Feb 9 96 Fixed pipelined read mode bug which prevented the
+ * use of some block sizes during a restore procedure.
+ * The character device interface will now present a
+ * continuous view of the media - any mix of block sizes
+ * during a backup/restore procedure is supported. The
+ * driver will buffer the requests internally and
+ * convert them to the tape's recommended transfer
+ * unit, making performance almost independent of the
+ * chosen user block size.
+ * Some improvements in error recovery.
+ * By cooperating with ide-dma.c, bus mastering DMA can
+ * now sometimes be used with IDE tape drives as well.
+ * Bus mastering DMA has the potential to dramatically
+ * reduce the CPU's overhead when accessing the device,
+ * and can be enabled by using hdparm -d1 on the tape's
+ * block device interface. For more info, read the
+ * comments in ide-dma.c.
+ * Ver 1.4 Mar 13 96 Fixed serialize support.
+ * Ver 1.5 Apr 12 96 Fixed shared interface operation, broken in 1.3.85.
+ * Fixed pipelined read mode inefficiency.
+ * Fixed nasty null dereferencing bug.
+ * Ver 1.6 Aug 16 96 Fixed FPU usage in the driver.
+ * Fixed end of media bug.
+ * Ver 1.7 Sep 10 96 Minor changes for the CONNER CTT8000-A model.
+ * Ver 1.8 Sep 26 96 Attempt to find a better balance between good
+ * interactive response and high system throughput.
+ * Ver 1.9 Nov 5 96 Automatically cross encountered filemarks rather
+ * than requiring an explicit FSF command.
+ * Abort pending requests at end of media.
+ * MTTELL was sometimes returning incorrect results.
+ * Return the real block size in the MTIOCGET ioctl.
+ * Some error recovery bug fixes.
+ * Ver 1.10 Nov 5 96 Major reorganization.
+ * Reduced CPU overhead a bit by eliminating internal
+ * bounce buffers.
+ * Added module support.
+ * Added multiple tape drives support.
+ * Added partition support.
+ * Rewrote DSC handling.
+ * Some portability fixes.
+ * Removed ide-tape.h.
+ * Additional minor changes.
+ * Ver 1.11 Dec 2 96 Bug fix in previous DSC timeout handling.
+ * Use ide_stall_queue() for DSC overlap.
+ * Use the maximum speed rather than the current speed
+ * to compute the request service time.
+ * Ver 1.12 Dec 7 97 Fix random memory overwriting and/or last block data
+ * corruption, which could occur if the total number
+ * of bytes written to the tape was not an integral
+ * number of tape blocks.
+ * Add support for INTERRUPT DRQ devices.
+ * Ver 1.13 Jan 2 98 Add "speed == 0" work-around for HP COLORADO 5GB
+ * Ver 1.14 Dec 30 98 Partial fixes for the Sony/AIWA tape drives.
+ * Replace cli()/sti() with hwgroup spinlocks.
+ * Ver 1.15 Mar 25 99 Fix SMP race condition by replacing hwgroup
+ * spinlock with private per-tape spinlock.
+ * Ver 1.16 Sep 1 99 Add OnStream tape support.
+ * Abort read pipeline on EOD.
+ * Wait for the tape to become ready in case it returns
+ * "in the process of becoming ready" on open().
+ * Fix zero padding of the last written block in
+ * case the tape block size is larger than PAGE_SIZE.
+ * Decrease the default disconnection time to tn.
+ * Ver 1.16e Oct 3 99 Minor fixes.
+ * Ver 1.16e1 Oct 13 99 Patches by Arnold Niessen,
+ * niessen@iae.nl / arnold.niessen@philips.com
+ * GO-1) Undefined code in idetape_read_position
+ * according to Gadi's email
+ * AJN-1) Minor fix asc == 11 should be asc == 0x11
+ * in idetape_issue_packet_command (did effect
+ * debugging output only)
+ * AJN-2) Added more debugging output, and
+ * added ide-tape: where missing. I would also
+ * like to add tape->name where possible
+ * AJN-3) Added different debug_level's
+ * via /proc/ide/hdc/settings
+ * "debug_level" determines amount of debugging output;
+ * can be changed using /proc/ide/hdx/settings
+ * 0 : almost no debugging output
+ * 1 : 0+output errors only
+ * 2 : 1+output all sensekey/asc
+ * 3 : 2+follow all chrdev related procedures
+ * 4 : 3+follow all procedures
+ * 5 : 4+include pc_stack rq_stack info
+ * 6 : 5+USE_COUNT updates
+ * AJN-4) Fixed timeout for retension in idetape_queue_pc_tail
+ * from 5 to 10 minutes
+ * AJN-5) Changed maximum number of blocks to skip when
+ * reading tapes with multiple consecutive write
+ * errors from 100 to 1000 in idetape_get_logical_blk
+ * Proposed changes to code:
+ * 1) output "logical_blk_num" via /proc
+ * 2) output "current_operation" via /proc
+ * 3) Either solve or document the fact that `mt rewind' is
+ * required after reading from /dev/nhtx to be
+ * able to rmmod the idetape module;
+ * Also, sometimes an application finishes but the
+ * device remains `busy' for some time. Same cause ?
+ * Proposed changes to release-notes:
+ * 4) write a simple `quickstart' section in the
+ * release notes; I volunteer if you don't want to
+ * 5) include a pointer to video4linux in the doc
+ * to stimulate video applications
+ * 6) release notes lines 331 and 362: explain what happens
+ * if the application data rate is higher than 1100 KB/s;
+ * similar approach to lower-than-500 kB/s ?
+ * 7) 6.6 Comparison; wouldn't it be better to allow different
+ * strategies for read and write ?
+ * Wouldn't it be better to control the tape buffer
+ * contents instead of the bandwidth ?
+ * 8) line 536: replace will by would (if I understand
+ * this section correctly, a hypothetical and unwanted situation
+ * is being described)
+ * Ver 1.16f Dec 15 99 Change place of the secondary OnStream header frames.
+ * Ver 1.17 Nov 2000 / Jan 2001 Marcel Mol, marcel@mesa.nl
+ * - Add idetape_onstream_mode_sense_tape_parameter_page
+ * function to get tape capacity in frames: tape->capacity.
+ * - Add support for DI-50 drives( or any DI- drive).
+ * - 'workaround' for read error/blank block around block 3000.
+ * - Implement Early warning for end of media for Onstream.
+ * - Cosmetic code changes for readability.
+ * - Idetape_position_tape should not use SKIP bit during
+ * Onstream read recovery.
+ * - Add capacity, logical_blk_num and first/last_frame_position
+ * to /proc/ide/hd?/settings.
+ * - Module use count was gone in the Linux 2.4 driver.
+ * Ver 1.17a Apr 2001 Willem Riede osst@riede.org
+ * - Get drive's actual block size from mode sense block descriptor
+ * - Limit size of pipeline
+ * Ver 1.17b Oct 2002 Alan Stern <stern@rowland.harvard.edu>
+ * Changed IDETAPE_MIN_PIPELINE_STAGES to 1 and actually used
+ * it in the code!
+ * Actually removed aborted stages in idetape_abort_pipeline
+ * instead of just changing the command code.
+ * Made the transfer byte count for Request Sense equal to the
+ * actual length of the data transfer.
+ * Changed handling of partial data transfers: they do not
+ * cause DMA errors.
+ * Moved initiation of DMA transfers to the correct place.
+ * Removed reference to unallocated memory.
+ * Made __idetape_discard_read_pipeline return the number of
+ * sectors skipped, not the number of stages.
+ * Replaced errant kfree() calls with __idetape_kfree_stage().
+ * Fixed off-by-one error in testing the pipeline length.
+ * Fixed handling of filemarks in the read pipeline.
+ * Small code optimization for MTBSF and MTBSFM ioctls.
+ * Don't try to unlock the door during device close if is
+ * already unlocked!
+ * Cosmetic fixes to miscellaneous debugging output messages.
+ * Set the minimum /proc/ide/hd?/settings values for "pipeline",
+ * "pipeline_min", and "pipeline_max" to 1.
+ */
diff --git a/Documentation/ide/ide-tape.txt b/Documentation/ide/ide-tape.txt
new file mode 100644
index 000000000..3f348a0b2
--- /dev/null
+++ b/Documentation/ide/ide-tape.txt
@@ -0,0 +1,65 @@
+IDE ATAPI streaming tape driver.
+
+This driver is a part of the Linux ide driver.
+
+The driver, in co-operation with ide.c, basically traverses the
+request-list for the block device interface. The character device
+interface, on the other hand, creates new requests, adds them
+to the request-list of the block device, and waits for their completion.
+
+The block device major and minor numbers are determined from the
+tape's relative position in the ide interfaces, as explained in ide.c.
+
+The character device interface consists of the following devices:
+
+ht0 major 37, minor 0 first IDE tape, rewind on close.
+ht1 major 37, minor 1 second IDE tape, rewind on close.
+...
+nht0 major 37, minor 128 first IDE tape, no rewind on close.
+nht1 major 37, minor 129 second IDE tape, no rewind on close.
+...
+
+The general magnetic tape commands compatible interface, as defined by
+include/linux/mtio.h, is accessible through the character device.
+
+General ide driver configuration options, such as the interrupt-unmask
+flag, can be configured by issuing an ioctl to the block device interface,
+as any other ide device.
+
+Our own ide-tape ioctl's can be issued to either the block device or
+the character device interface.
+
+Maximal throughput with minimal bus load will usually be achieved in the
+following scenario:
+
+ 1. ide-tape is operating in the pipelined operation mode.
+ 2. No buffering is performed by the user backup program.
+
+Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive.
+
+Here are some words from the first releases of hd.c, which are quoted
+in ide.c and apply here as well:
+
+| Special care is recommended. Have Fun!
+
+Possible improvements:
+
+1. Support for the ATAPI overlap protocol.
+
+In order to maximize bus throughput, we currently use the DSC
+overlap method which enables ide.c to service requests from the
+other device while the tape is busy executing a command. The
+DSC overlap method involves polling the tape's status register
+for the DSC bit, and servicing the other device while the tape
+isn't ready.
+
+In the current QIC development standard (December 1995),
+it is recommended that new tape drives will *in addition*
+implement the ATAPI overlap protocol, which is used for the
+same purpose - efficient use of the IDE bus, but is interrupt
+driven and thus has much less CPU overhead.
+
+ATAPI overlap is likely to be supported in most new ATAPI
+devices, including new ATAPI cdroms, and thus provides us
+a method by which we can achieve higher throughput when
+sharing a (fast) ATA-2 disk with any (slow) new ATAPI device.
diff --git a/Documentation/ide/ide.txt b/Documentation/ide/ide.txt
new file mode 100644
index 000000000..7aca987c2
--- /dev/null
+++ b/Documentation/ide/ide.txt
@@ -0,0 +1,256 @@
+
+ Information regarding the Enhanced IDE drive in Linux 2.6
+
+==============================================================================
+
+
+ The hdparm utility can be used to control various IDE features on a
+ running system. It is packaged separately. Please Look for it on popular
+ linux FTP sites.
+
+
+
+*** IMPORTANT NOTICES: BUGGY IDE CHIPSETS CAN CORRUPT DATA!!
+*** =================
+*** PCI versions of the CMD640 and RZ1000 interfaces are now detected
+*** automatically at startup when PCI BIOS support is configured.
+***
+*** Linux disables the "prefetch" ("readahead") mode of the RZ1000
+*** to prevent data corruption possible due to hardware design flaws.
+***
+*** For the CMD640, linux disables "IRQ unmasking" (hdparm -u1) on any
+*** drive for which the "prefetch" mode of the CMD640 is turned on.
+*** If "prefetch" is disabled (hdparm -p8), then "IRQ unmasking" can be
+*** used again.
+***
+*** For the CMD640, linux disables "32bit I/O" (hdparm -c1) on any drive
+*** for which the "prefetch" mode of the CMD640 is turned off.
+*** If "prefetch" is enabled (hdparm -p9), then "32bit I/O" can be
+*** used again.
+***
+*** The CMD640 is also used on some Vesa Local Bus (VLB) cards, and is *NOT*
+*** automatically detected by Linux. For safe, reliable operation with such
+*** interfaces, one *MUST* use the "cmd640.probe_vlb" kernel option.
+***
+*** Use of the "serialize" option is no longer necessary.
+
+================================================================================
+Common pitfalls:
+
+- 40-conductor IDE cables are capable of transferring data in DMA modes up to
+ udma2, but no faster.
+
+- If possible devices should be attached to separate channels if they are
+ available. Typically the disk on the first and CD-ROM on the second.
+
+- If you mix devices on the same cable, please consider using similar devices
+ in respect of the data transfer mode they support.
+
+- Even better try to stick to the same vendor and device type on the same
+ cable.
+
+================================================================================
+
+This is the multiple IDE interface driver, as evolved from hd.c.
+
+It supports up to 9 IDE interfaces per default, on one or more IRQs (usually
+14 & 15). There can be up to two drives per interface, as per the ATA-6 spec.
+
+Primary: ide0, port 0x1f0; major=3; hda is minor=0; hdb is minor=64
+Secondary: ide1, port 0x170; major=22; hdc is minor=0; hdd is minor=64
+Tertiary: ide2, port 0x1e8; major=33; hde is minor=0; hdf is minor=64
+Quaternary: ide3, port 0x168; major=34; hdg is minor=0; hdh is minor=64
+fifth.. ide4, usually PCI, probed
+sixth.. ide5, usually PCI, probed
+
+To access devices on interfaces > ide0, device entries please make sure that
+device files for them are present in /dev. If not, please create such
+entries, by using /dev/MAKEDEV.
+
+This driver automatically probes for most IDE interfaces (including all PCI
+ones), for the drives/geometries attached to those interfaces, and for the IRQ
+lines being used by the interfaces (normally 14, 15 for ide0/ide1).
+
+Any number of interfaces may share a single IRQ if necessary, at a slight
+performance penalty, whether on separate cards or a single VLB card.
+The IDE driver automatically detects and handles this. However, this may
+or may not be harmful to your hardware.. two or more cards driving the same IRQ
+can potentially burn each other's bus driver, though in practice this
+seldom occurs. Be careful, and if in doubt, don't do it!
+
+Drives are normally found by auto-probing and/or examining the CMOS/BIOS data.
+For really weird situations, the apparent (fdisk) geometry can also be specified
+on the kernel "command line" using LILO. The format of such lines is:
+
+ ide_core.chs=[interface_number.device_number]:cyls,heads,sects
+or ide_core.cdrom=[interface_number.device_number]
+
+For example:
+
+ ide_core.chs=1.0:1050,32,64 ide_core.cdrom=1.1
+
+The results of successful auto-probing may override the physical geometry/irq
+specified, though the "original" geometry may be retained as the "logical"
+geometry for partitioning purposes (fdisk).
+
+If the auto-probing during boot time confuses a drive (ie. the drive works
+with hd.c but not with ide.c), then an command line option may be specified
+for each drive for which you'd like the drive to skip the hardware
+probe/identification sequence. For example:
+
+ ide_core.noprobe=0.1
+or
+ ide_core.chs=1.0:768,16,32
+ ide_core.noprobe=1.0
+
+Note that when only one IDE device is attached to an interface, it should be
+jumpered as "single" or "master", *not* "slave". Many folks have had
+"trouble" with cdroms because of this requirement, so the driver now probes
+for both units, though success is more likely when the drive is jumpered
+correctly.
+
+Courtesy of Scott Snyder and others, the driver supports ATAPI cdrom drives
+such as the NEC-260 and the new MITSUMI triple/quad speed drives.
+Such drives will be identified at boot time, just like a hard disk.
+
+If for some reason your cdrom drive is *not* found at boot time, you can force
+the probe to look harder by supplying a kernel command line parameter
+via LILO, such as:
+
+ ide_core.cdrom=1.0 /* "master" on second interface (hdc) */
+or
+ ide_core.cdrom=1.1 /* "slave" on second interface (hdd) */
+
+For example, a GW2000 system might have a hard drive on the primary
+interface (/dev/hda) and an IDE cdrom drive on the secondary interface
+(/dev/hdc). To mount a CD in the cdrom drive, one would use something like:
+
+ ln -sf /dev/hdc /dev/cdrom
+ mkdir /mnt/cdrom
+ mount /dev/cdrom /mnt/cdrom -t iso9660 -o ro
+
+If, after doing all of the above, mount doesn't work and you see
+errors from the driver (with dmesg) complaining about `status=0xff',
+this means that the hardware is not responding to the driver's attempts
+to read it. One of the following is probably the problem:
+
+ - Your hardware is broken.
+
+ - You are using the wrong address for the device, or you have the
+ drive jumpered wrong. Review the configuration instructions above.
+
+ - Your IDE controller requires some nonstandard initialization sequence
+ before it will work properly. If this is the case, there will often
+ be a separate MS-DOS driver just for the controller. IDE interfaces
+ on sound cards usually fall into this category. Such configurations
+ can often be made to work by first booting MS-DOS, loading the
+ appropriate drivers, and then warm-booting linux (without powering
+ off). This can be automated using loadlin in the MS-DOS autoexec.
+
+If you always get timeout errors, interrupts from the drive are probably
+not making it to the host. Check how you have the hardware jumpered
+and make sure it matches what the driver expects (see the configuration
+instructions above). If you have a PCI system, also check the BIOS
+setup; I've had one report of a system which was shipped with IRQ 15
+disabled by the BIOS.
+
+The kernel is able to execute binaries directly off of the cdrom,
+provided it is mounted with the default block size of 1024 (as above).
+
+Please pass on any feedback on any of this stuff to the maintainer,
+whose address can be found in linux/MAINTAINERS.
+
+The IDE driver is modularized. The high level disk/CD-ROM/tape/floppy
+drivers can always be compiled as loadable modules, the chipset drivers
+can only be compiled into the kernel, and the core code (ide.c) can be
+compiled as a loadable module provided no chipset support is needed.
+
+When using ide.c as a module in combination with kmod, add:
+
+ alias block-major-3 ide-probe
+
+to a configuration file in /etc/modprobe.d/.
+
+When ide.c is used as a module, you can pass command line parameters to the
+driver using the "options=" keyword to insmod, while replacing any ',' with
+';'.
+
+
+================================================================================
+
+Summary of ide driver parameters for kernel command line
+--------------------------------------------------------
+
+For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672)
+you need to explicitly enable probing by using "probe" kernel parameter,
+i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use:
+
+* "ali14xx.probe" boot option when ali14xx driver is built-in the kernel
+
+* "probe" module parameter when ali14xx driver is compiled as module
+ ("modprobe ali14xx probe")
+
+Also for legacy CMD640 host driver (cmd640) you need to use "probe_vlb"
+kernel paremeter to enable probing for VLB version of the chipset (PCI ones
+are detected automatically).
+
+You also need to use "probe" kernel parameter for ide-4drives driver
+(support for IDE generic chipset with four drives on one port).
+
+To enable support for IDE doublers on Amiga use "doubler" kernel parameter
+for gayle host driver (i.e. "gayle.doubler" if the driver is built-in).
+
+To force ignoring cable detection (this should be needed only if you're using
+short 40-wires cable which cannot be automatically detected - if this is not
+a case please report it as a bug instead) use "ignore_cable" kernel parameter:
+
+* "ide_core.ignore_cable=[interface_number]" boot option if IDE is built-in
+ (i.e. "ide_core.ignore_cable=1" to force ignoring cable for "ide1")
+
+* "ignore_cable=[interface_number]" module parameter (for ide_core module)
+ if IDE is compiled as module
+
+Other kernel parameters for ide_core are:
+
+* "nodma=[interface_number.device_number]" to disallow DMA for a device
+
+* "noflush=[interface_number.device_number]" to disable flush requests
+
+* "nohpa=[interface_number.device_number]" to disable Host Protected Area
+
+* "noprobe=[interface_number.device_number]" to skip probing
+
+* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit
+
+* "cdrom=[interface_number.device_number]" to force device as a CD-ROM
+
+* "chs=[interface_number.device_number]" to force device as a disk (using CHS)
+
+================================================================================
+
+Some Terminology
+----------------
+IDE = Integrated Drive Electronics, meaning that each drive has a built-in
+controller, which is why an "IDE interface card" is not a "controller card".
+
+ATA = AT (the old IBM 286 computer) Attachment Interface, a draft American
+National Standard for connecting hard drives to PCs. This is the official
+name for "IDE".
+
+The latest standards define some enhancements, known as the ATA-6 spec,
+which grew out of vendor-specific "Enhanced IDE" (EIDE) implementations.
+
+ATAPI = ATA Packet Interface, a new protocol for controlling the drives,
+similar to SCSI protocols, created at the same time as the ATA2 standard.
+ATAPI is currently used for controlling CDROM, TAPE and FLOPPY (ZIP or
+LS120/240) devices, removable R/W cartridges, and for high capacity hard disk
+drives.
+
+mlord@pobox.com
+--
+
+Wed Apr 17 22:52:44 CEST 2002 edited by Marcin Dalecki, the current
+maintainer.
+
+Wed Aug 20 22:31:29 CEST 2003 updated ide boot options to current ide.c
+comments at 2.6.0-test4 time. Maciej Soltysiak <solt@dns.toxicfilms.tv>
diff --git a/Documentation/ide/warm-plug-howto.txt b/Documentation/ide/warm-plug-howto.txt
new file mode 100644
index 000000000..98152bcd5
--- /dev/null
+++ b/Documentation/ide/warm-plug-howto.txt
@@ -0,0 +1,18 @@
+
+IDE warm-plug HOWTO
+===================
+
+To warm-plug devices on a port 'idex':
+
+# echo -n "1" > /sys/class/ide_port/idex/delete_devices
+
+unplug old device(s) and plug new device(s)
+
+# echo -n "1" > /sys/class/ide_port/idex/scan
+
+done
+
+NOTE: please make sure that partitions are unmounted and that there are
+no other active references to devices before doing "delete_devices" step,
+also do not attempt "scan" step on devices currently in use -- otherwise
+results may be unpredictable and lead to data loss if you're unlucky
diff --git a/Documentation/infiniband/core_locking.txt b/Documentation/infiniband/core_locking.txt
new file mode 100644
index 000000000..e16785422
--- /dev/null
+++ b/Documentation/infiniband/core_locking.txt
@@ -0,0 +1,114 @@
+INFINIBAND MIDLAYER LOCKING
+
+ This guide is an attempt to make explicit the locking assumptions
+ made by the InfiniBand midlayer. It describes the requirements on
+ both low-level drivers that sit below the midlayer and upper level
+ protocols that use the midlayer.
+
+Sleeping and interrupt context
+
+ With the following exceptions, a low-level driver implementation of
+ all of the methods in struct ib_device may sleep. The exceptions
+ are any methods from the list:
+
+ create_ah
+ modify_ah
+ query_ah
+ destroy_ah
+ bind_mw
+ post_send
+ post_recv
+ poll_cq
+ req_notify_cq
+ map_phys_fmr
+
+ which may not sleep and must be callable from any context.
+
+ The corresponding functions exported to upper level protocol
+ consumers:
+
+ ib_create_ah
+ ib_modify_ah
+ ib_query_ah
+ ib_destroy_ah
+ ib_bind_mw
+ ib_post_send
+ ib_post_recv
+ ib_req_notify_cq
+ ib_map_phys_fmr
+
+ are therefore safe to call from any context.
+
+ In addition, the function
+
+ ib_dispatch_event
+
+ used by low-level drivers to dispatch asynchronous events through
+ the midlayer is also safe to call from any context.
+
+Reentrancy
+
+ All of the methods in struct ib_device exported by a low-level
+ driver must be fully reentrant. The low-level driver is required to
+ perform all synchronization necessary to maintain consistency, even
+ if multiple function calls using the same object are run
+ simultaneously.
+
+ The IB midlayer does not perform any serialization of function calls.
+
+ Because low-level drivers are reentrant, upper level protocol
+ consumers are not required to perform any serialization. However,
+ some serialization may be required to get sensible results. For
+ example, a consumer may safely call ib_poll_cq() on multiple CPUs
+ simultaneously. However, the ordering of the work completion
+ information between different calls of ib_poll_cq() is not defined.
+
+Callbacks
+
+ A low-level driver must not perform a callback directly from the
+ same callchain as an ib_device method call. For example, it is not
+ allowed for a low-level driver to call a consumer's completion event
+ handler directly from its post_send method. Instead, the low-level
+ driver should defer this callback by, for example, scheduling a
+ tasklet to perform the callback.
+
+ The low-level driver is responsible for ensuring that multiple
+ completion event handlers for the same CQ are not called
+ simultaneously. The driver must guarantee that only one CQ event
+ handler for a given CQ is running at a time. In other words, the
+ following situation is not allowed:
+
+ CPU1 CPU2
+
+ low-level driver ->
+ consumer CQ event callback:
+ /* ... */
+ ib_req_notify_cq(cq, ...);
+ low-level driver ->
+ /* ... */ consumer CQ event callback:
+ /* ... */
+ return from CQ event handler
+
+ The context in which completion event and asynchronous event
+ callbacks run is not defined. Depending on the low-level driver, it
+ may be process context, softirq context, or interrupt context.
+ Upper level protocol consumers may not sleep in a callback.
+
+Hot-plug
+
+ A low-level driver announces that a device is ready for use by
+ consumers when it calls ib_register_device(), all initialization
+ must be complete before this call. The device must remain usable
+ until the driver's call to ib_unregister_device() has returned.
+
+ A low-level driver must call ib_register_device() and
+ ib_unregister_device() from process context. It must not hold any
+ semaphores that could cause deadlock if a consumer calls back into
+ the driver across these calls.
+
+ An upper level protocol consumer may begin using an IB device as
+ soon as the add method of its struct ib_client is called for that
+ device. A consumer must finish all cleanup and free all resources
+ relating to a device before returning from the remove method.
+
+ A consumer is permitted to sleep in its add and remove methods.
diff --git a/Documentation/infiniband/ipoib.txt b/Documentation/infiniband/ipoib.txt
new file mode 100644
index 000000000..f2cfe265e
--- /dev/null
+++ b/Documentation/infiniband/ipoib.txt
@@ -0,0 +1,105 @@
+IP OVER INFINIBAND
+
+ The ib_ipoib driver is an implementation of the IP over InfiniBand
+ protocol as specified by RFC 4391 and 4392, issued by the IETF ipoib
+ working group. It is a "native" implementation in the sense of
+ setting the interface type to ARPHRD_INFINIBAND and the hardware
+ address length to 20 (earlier proprietary implementations
+ masqueraded to the kernel as ethernet interfaces).
+
+Partitions and P_Keys
+
+ When the IPoIB driver is loaded, it creates one interface for each
+ port using the P_Key at index 0. To create an interface with a
+ different P_Key, write the desired P_Key into the main interface's
+ /sys/class/net/<intf name>/create_child file. For example:
+
+ echo 0x8001 > /sys/class/net/ib0/create_child
+
+ This will create an interface named ib0.8001 with P_Key 0x8001. To
+ remove a subinterface, use the "delete_child" file:
+
+ echo 0x8001 > /sys/class/net/ib0/delete_child
+
+ The P_Key for any interface is given by the "pkey" file, and the
+ main interface for a subinterface is in "parent."
+
+ Child interface create/delete can also be done using IPoIB's
+ rtnl_link_ops, where childs created using either way behave the same.
+
+Datagram vs Connected modes
+
+ The IPoIB driver supports two modes of operation: datagram and
+ connected. The mode is set and read through an interface's
+ /sys/class/net/<intf name>/mode file.
+
+ In datagram mode, the IB UD (Unreliable Datagram) transport is used
+ and so the interface MTU has is equal to the IB L2 MTU minus the
+ IPoIB encapsulation header (4 bytes). For example, in a typical IB
+ fabric with a 2K MTU, the IPoIB MTU will be 2048 - 4 = 2044 bytes.
+
+ In connected mode, the IB RC (Reliable Connected) transport is used.
+ Connected mode takes advantage of the connected nature of the IB
+ transport and allows an MTU up to the maximal IP packet size of 64K,
+ which reduces the number of IP packets needed for handling large UDP
+ datagrams, TCP segments, etc and increases the performance for large
+ messages.
+
+ In connected mode, the interface's UD QP is still used for multicast
+ and communication with peers that don't support connected mode. In
+ this case, RX emulation of ICMP PMTU packets is used to cause the
+ networking stack to use the smaller UD MTU for these neighbours.
+
+Stateless offloads
+
+ If the IB HW supports IPoIB stateless offloads, IPoIB advertises
+ TCP/IP checksum and/or Large Send (LSO) offloading capability to the
+ network stack.
+
+ Large Receive (LRO) offloading is also implemented and may be turned
+ on/off using ethtool calls. Currently LRO is supported only for
+ checksum offload capable devices.
+
+ Stateless offloads are supported only in datagram mode.
+
+Interrupt moderation
+
+ If the underlying IB device supports CQ event moderation, one can
+ use ethtool to set interrupt mitigation parameters and thus reduce
+ the overhead incurred by handling interrupts. The main code path of
+ IPoIB doesn't use events for TX completion signaling so only RX
+ moderation is supported.
+
+Debugging Information
+
+ By compiling the IPoIB driver with CONFIG_INFINIBAND_IPOIB_DEBUG set
+ to 'y', tracing messages are compiled into the driver. They are
+ turned on by setting the module parameters debug_level and
+ mcast_debug_level to 1. These parameters can be controlled at
+ runtime through files in /sys/module/ib_ipoib/.
+
+ CONFIG_INFINIBAND_IPOIB_DEBUG also enables files in the debugfs
+ virtual filesystem. By mounting this filesystem, for example with
+
+ mount -t debugfs none /sys/kernel/debug
+
+ it is possible to get statistics about multicast groups from the
+ files /sys/kernel/debug/ipoib/ib0_mcg and so on.
+
+ The performance impact of this option is negligible, so it
+ is safe to enable this option with debug_level set to 0 for normal
+ operation.
+
+ CONFIG_INFINIBAND_IPOIB_DEBUG_DATA enables even more debug output in
+ the data path when data_debug_level is set to 1. However, even with
+ the output disabled, enabling this configuration option will affect
+ performance, because it adds tests to the fast path.
+
+References
+
+ Transmission of IP over InfiniBand (IPoIB) (RFC 4391)
+ http://ietf.org/rfc/rfc4391.txt
+ IP over InfiniBand (IPoIB) Architecture (RFC 4392)
+ http://ietf.org/rfc/rfc4392.txt
+ IP over InfiniBand: Connected Mode (RFC 4755)
+ http://ietf.org/rfc/rfc4755.txt
diff --git a/Documentation/infiniband/sysfs.txt b/Documentation/infiniband/sysfs.txt
new file mode 100644
index 000000000..ddd519b72
--- /dev/null
+++ b/Documentation/infiniband/sysfs.txt
@@ -0,0 +1,66 @@
+SYSFS FILES
+
+ For each InfiniBand device, the InfiniBand drivers create the
+ following files under /sys/class/infiniband/<device name>:
+
+ node_type - Node type (CA, switch or router)
+ node_guid - Node GUID
+ sys_image_guid - System image GUID
+
+ In addition, there is a "ports" subdirectory, with one subdirectory
+ for each port. For example, if mthca0 is a 2-port HCA, there will
+ be two directories:
+
+ /sys/class/infiniband/mthca0/ports/1
+ /sys/class/infiniband/mthca0/ports/2
+
+ (A switch will only have a single "0" subdirectory for switch port
+ 0; no subdirectory is created for normal switch ports)
+
+ In each port subdirectory, the following files are created:
+
+ cap_mask - Port capability mask
+ lid - Port LID
+ lid_mask_count - Port LID mask count
+ rate - Port data rate (active width * active speed)
+ sm_lid - Subnet manager LID for port's subnet
+ sm_sl - Subnet manager SL for port's subnet
+ state - Port state (DOWN, INIT, ARMED, ACTIVE or ACTIVE_DEFER)
+ phys_state - Port physical state (Sleep, Polling, LinkUp, etc)
+
+ There is also a "counters" subdirectory, with files
+
+ VL15_dropped
+ excessive_buffer_overrun_errors
+ link_downed
+ link_error_recovery
+ local_link_integrity_errors
+ port_rcv_constraint_errors
+ port_rcv_data
+ port_rcv_errors
+ port_rcv_packets
+ port_rcv_remote_physical_errors
+ port_rcv_switch_relay_errors
+ port_xmit_constraint_errors
+ port_xmit_data
+ port_xmit_discards
+ port_xmit_packets
+ symbol_error
+
+ Each of these files contains the corresponding value from the port's
+ Performance Management PortCounters attribute, as described in
+ section 16.1.3.5 of the InfiniBand Architecture Specification.
+
+ The "pkeys" and "gids" subdirectories contain one file for each
+ entry in the port's P_Key or GID table respectively. For example,
+ ports/1/pkeys/10 contains the value at index 10 in port 1's P_Key
+ table.
+
+MTHCA
+
+ The Mellanox HCA driver also creates the files:
+
+ hw_rev - Hardware revision number
+ fw_ver - Firmware version
+ hca_type - HCA type: "MT23108", "MT25208 (MT23108 compat mode)",
+ or "MT25208"
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
new file mode 100644
index 000000000..7aca13a54
--- /dev/null
+++ b/Documentation/infiniband/user_mad.txt
@@ -0,0 +1,153 @@
+USERSPACE MAD ACCESS
+
+Device files
+
+ Each port of each InfiniBand device has a "umad" device and an
+ "issm" device attached. For example, a two-port HCA will have two
+ umad devices and two issm devices, while a switch will have one
+ device of each type (for switch port 0).
+
+Creating MAD agents
+
+ A MAD agent can be created by filling in a struct ib_user_mad_reg_req
+ and then calling the IB_USER_MAD_REGISTER_AGENT ioctl on a file
+ descriptor for the appropriate device file. If the registration
+ request succeeds, a 32-bit id will be returned in the structure.
+ For example:
+
+ struct ib_user_mad_reg_req req = { /* ... */ };
+ ret = ioctl(fd, IB_USER_MAD_REGISTER_AGENT, (char *) &req);
+ if (!ret)
+ my_agent = req.id;
+ else
+ perror("agent register");
+
+ Agents can be unregistered with the IB_USER_MAD_UNREGISTER_AGENT
+ ioctl. Also, all agents registered through a file descriptor will
+ be unregistered when the descriptor is closed.
+
+ 2014 -- a new registration ioctl is now provided which allows additional
+ fields to be provided during registration.
+ Users of this registration call are implicitly setting the use of
+ pkey_index (see below).
+
+Receiving MADs
+
+ MADs are received using read(). The receive side now supports
+ RMPP. The buffer passed to read() must be at least one
+ struct ib_user_mad + 256 bytes. For example:
+
+ If the buffer passed is not large enough to hold the received
+ MAD (RMPP), the errno is set to ENOSPC and the length of the
+ buffer needed is set in mad.length.
+
+ Example for normal MAD (non RMPP) reads:
+ struct ib_user_mad *mad;
+ mad = malloc(sizeof *mad + 256);
+ ret = read(fd, mad, sizeof *mad + 256);
+ if (ret != sizeof mad + 256) {
+ perror("read");
+ free(mad);
+ }
+
+ Example for RMPP reads:
+ struct ib_user_mad *mad;
+ mad = malloc(sizeof *mad + 256);
+ ret = read(fd, mad, sizeof *mad + 256);
+ if (ret == -ENOSPC)) {
+ length = mad.length;
+ free(mad);
+ mad = malloc(sizeof *mad + length);
+ ret = read(fd, mad, sizeof *mad + length);
+ }
+ if (ret < 0) {
+ perror("read");
+ free(mad);
+ }
+
+ In addition to the actual MAD contents, the other struct ib_user_mad
+ fields will be filled in with information on the received MAD. For
+ example, the remote LID will be in mad.lid.
+
+ If a send times out, a receive will be generated with mad.status set
+ to ETIMEDOUT. Otherwise when a MAD has been successfully received,
+ mad.status will be 0.
+
+ poll()/select() may be used to wait until a MAD can be read.
+
+Sending MADs
+
+ MADs are sent using write(). The agent ID for sending should be
+ filled into the id field of the MAD, the destination LID should be
+ filled into the lid field, and so on. The send side does support
+ RMPP so arbitrary length MAD can be sent. For example:
+
+ struct ib_user_mad *mad;
+
+ mad = malloc(sizeof *mad + mad_length);
+
+ /* fill in mad->data */
+
+ mad->hdr.id = my_agent; /* req.id from agent registration */
+ mad->hdr.lid = my_dest; /* in network byte order... */
+ /* etc. */
+
+ ret = write(fd, &mad, sizeof *mad + mad_length);
+ if (ret != sizeof *mad + mad_length)
+ perror("write");
+
+Transaction IDs
+
+ Users of the umad devices can use the lower 32 bits of the
+ transaction ID field (that is, the least significant half of the
+ field in network byte order) in MADs being sent to match
+ request/response pairs. The upper 32 bits are reserved for use by
+ the kernel and will be overwritten before a MAD is sent.
+
+P_Key Index Handling
+
+ The old ib_umad interface did not allow setting the P_Key index for
+ MADs that are sent and did not provide a way for obtaining the P_Key
+ index of received MADs. A new layout for struct ib_user_mad_hdr
+ with a pkey_index member has been defined; however, to preserve binary
+ compatibility with older applications, this new layout will not be used
+ unless one of IB_USER_MAD_ENABLE_PKEY or IB_USER_MAD_REGISTER_AGENT2 ioctl's
+ are called before a file descriptor is used for anything else.
+
+ In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
+ to 6, the new layout of struct ib_user_mad_hdr will be used by
+ default, and the IB_USER_MAD_ENABLE_PKEY ioctl will be removed.
+
+Setting IsSM Capability Bit
+
+ To set the IsSM capability bit for a port, simply open the
+ corresponding issm device file. If the IsSM bit is already set,
+ then the open call will block until the bit is cleared (or return
+ immediately with errno set to EAGAIN if the O_NONBLOCK flag is
+ passed to open()). The IsSM bit will be cleared when the issm file
+ is closed. No read, write or other operations can be performed on
+ the issm file.
+
+/dev files
+
+ To create the appropriate character device files automatically with
+ udev, a rule like
+
+ KERNEL=="umad*", NAME="infiniband/%k"
+ KERNEL=="issm*", NAME="infiniband/%k"
+
+ can be used. This will create device nodes named
+
+ /dev/infiniband/umad0
+ /dev/infiniband/issm0
+
+ for the first port, and so on. The InfiniBand device and port
+ associated with these devices can be determined from the files
+
+ /sys/class/infiniband_mad/umad0/ibdev
+ /sys/class/infiniband_mad/umad0/port
+
+ and
+
+ /sys/class/infiniband_mad/issm0/ibdev
+ /sys/class/infiniband_mad/issm0/port
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt
new file mode 100644
index 000000000..e5092d696
--- /dev/null
+++ b/Documentation/infiniband/user_verbs.txt
@@ -0,0 +1,69 @@
+USERSPACE VERBS ACCESS
+
+ The ib_uverbs module, built by enabling CONFIG_INFINIBAND_USER_VERBS,
+ enables direct userspace access to IB hardware via "verbs," as
+ described in chapter 11 of the InfiniBand Architecture Specification.
+
+ To use the verbs, the libibverbs library, available from
+ http://www.openfabrics.org/, is required. libibverbs contains a
+ device-independent API for using the ib_uverbs interface.
+ libibverbs also requires appropriate device-dependent kernel and
+ userspace driver for your InfiniBand hardware. For example, to use
+ a Mellanox HCA, you will need the ib_mthca kernel module and the
+ libmthca userspace driver be installed.
+
+User-kernel communication
+
+ Userspace communicates with the kernel for slow path, resource
+ management operations via the /dev/infiniband/uverbsN character
+ devices. Fast path operations are typically performed by writing
+ directly to hardware registers mmap()ed into userspace, with no
+ system call or context switch into the kernel.
+
+ Commands are sent to the kernel via write()s on these device files.
+ The ABI is defined in drivers/infiniband/include/ib_user_verbs.h.
+ The structs for commands that require a response from the kernel
+ contain a 64-bit field used to pass a pointer to an output buffer.
+ Status is returned to userspace as the return value of the write()
+ system call.
+
+Resource management
+
+ Since creation and destruction of all IB resources is done by
+ commands passed through a file descriptor, the kernel can keep track
+ of which resources are attached to a given userspace context. The
+ ib_uverbs module maintains idr tables that are used to translate
+ between kernel pointers and opaque userspace handles, so that kernel
+ pointers are never exposed to userspace and userspace cannot trick
+ the kernel into following a bogus pointer.
+
+ This also allows the kernel to clean up when a process exits and
+ prevent one process from touching another process's resources.
+
+Memory pinning
+
+ Direct userspace I/O requires that memory regions that are potential
+ I/O targets be kept resident at the same physical address. The
+ ib_uverbs module manages pinning and unpinning memory regions via
+ get_user_pages() and put_page() calls. It also accounts for the
+ amount of memory pinned in the process's locked_vm, and checks that
+ unprivileged processes do not exceed their RLIMIT_MEMLOCK limit.
+
+ Pages that are pinned multiple times are counted each time they are
+ pinned, so the value of locked_vm may be an overestimate of the
+ number of pages pinned by a process.
+
+/dev files
+
+ To create the appropriate character device files automatically with
+ udev, a rule like
+
+ KERNEL=="uverbs*", NAME="infiniband/%k"
+
+ can be used. This will create device nodes named
+
+ /dev/infiniband/uverbs0
+
+ and so on. Since the InfiniBand userspace verbs should be safe for
+ use by non-privileged processes, it may be useful to add an
+ appropriate MODE or GROUP to the udev rule.
diff --git a/Documentation/init.txt b/Documentation/init.txt
new file mode 100644
index 000000000..535ad5e82
--- /dev/null
+++ b/Documentation/init.txt
@@ -0,0 +1,49 @@
+Explaining the dreaded "No init found." boot hang message
+=========================================================
+
+OK, so you've got this pretty unintuitive message (currently located
+in init/main.c) and are wondering what the H*** went wrong.
+Some high-level reasons for failure (listed roughly in order of execution)
+to load the init binary are:
+A) Unable to mount root FS
+B) init binary doesn't exist on rootfs
+C) broken console device
+D) binary exists but dependencies not available
+E) binary cannot be loaded
+
+Detailed explanations:
+0) Set "debug" kernel parameter (in bootloader config file or CONFIG_CMDLINE)
+ to get more detailed kernel messages.
+A) make sure you have the correct root FS type
+ (and root= kernel parameter points to the correct partition),
+ required drivers such as storage hardware (such as SCSI or USB!)
+ and filesystem (ext3, jffs2 etc.) are builtin (alternatively as modules,
+ to be pre-loaded by an initrd)
+C) Possibly a conflict in console= setup --> initial console unavailable.
+ E.g. some serial consoles are unreliable due to serial IRQ issues (e.g.
+ missing interrupt-based configuration).
+ Try using a different console= device or e.g. netconsole= .
+D) e.g. required library dependencies of the init binary such as
+ /lib/ld-linux.so.2 missing or broken. Use readelf -d <INIT>|grep NEEDED
+ to find out which libraries are required.
+E) make sure the binary's architecture matches your hardware.
+ E.g. i386 vs. x86_64 mismatch, or trying to load x86 on ARM hardware.
+ In case you tried loading a non-binary file here (shell script?),
+ you should make sure that the script specifies an interpreter in its shebang
+ header line (#!/...) that is fully working (including its library
+ dependencies). And before tackling scripts, better first test a simple
+ non-script binary such as /bin/sh and confirm its successful execution.
+ To find out more, add code to init/main.c to display kernel_execve()s
+ return values.
+
+Please extend this explanation whenever you find new failure causes
+(after all loading the init binary is a CRITICAL and hard transition step
+which needs to be made as painless as possible), then submit patch to LKML.
+Further TODOs:
+- Implement the various run_init_process() invocations via a struct array
+ which can then store the kernel_execve() result value and on failure
+ log it all by iterating over _all_ results (very important usability fix).
+- try to make the implementation itself more helpful in general,
+ e.g. by providing additional error messages at affected places.
+
+Andreas Mohr <andi at lisas period de>
diff --git a/Documentation/initrd.txt b/Documentation/initrd.txt
new file mode 100644
index 000000000..4e1839ccb
--- /dev/null
+++ b/Documentation/initrd.txt
@@ -0,0 +1,366 @@
+Using the initial RAM disk (initrd)
+===================================
+
+Written 1996,2000 by Werner Almesberger <werner.almesberger@epfl.ch> and
+ Hans Lermen <lermen@fgan.de>
+
+
+initrd provides the capability to load a RAM disk by the boot loader.
+This RAM disk can then be mounted as the root file system and programs
+can be run from it. Afterwards, a new root file system can be mounted
+from a different device. The previous root (from initrd) is then moved
+to a directory and can be subsequently unmounted.
+
+initrd is mainly designed to allow system startup to occur in two phases,
+where the kernel comes up with a minimum set of compiled-in drivers, and
+where additional modules are loaded from initrd.
+
+This document gives a brief overview of the use of initrd. A more detailed
+discussion of the boot process can be found in [1].
+
+
+Operation
+---------
+
+When using initrd, the system typically boots as follows:
+
+ 1) the boot loader loads the kernel and the initial RAM disk
+ 2) the kernel converts initrd into a "normal" RAM disk and
+ frees the memory used by initrd
+ 3) if the root device is not /dev/ram0, the old (deprecated)
+ change_root procedure is followed. see the "Obsolete root change
+ mechanism" section below.
+ 4) root device is mounted. if it is /dev/ram0, the initrd image is
+ then mounted as root
+ 5) /sbin/init is executed (this can be any valid executable, including
+ shell scripts; it is run with uid 0 and can do basically everything
+ init can do).
+ 6) init mounts the "real" root file system
+ 7) init places the root file system at the root directory using the
+ pivot_root system call
+ 8) init execs the /sbin/init on the new root filesystem, performing
+ the usual boot sequence
+ 9) the initrd file system is removed
+
+Note that changing the root directory does not involve unmounting it.
+It is therefore possible to leave processes running on initrd during that
+procedure. Also note that file systems mounted under initrd continue to
+be accessible.
+
+
+Boot command-line options
+-------------------------
+
+initrd adds the following new options:
+
+ initrd=<path> (e.g. LOADLIN)
+
+ Loads the specified file as the initial RAM disk. When using LILO, you
+ have to specify the RAM disk image file in /etc/lilo.conf, using the
+ INITRD configuration variable.
+
+ noinitrd
+
+ initrd data is preserved but it is not converted to a RAM disk and
+ the "normal" root file system is mounted. initrd data can be read
+ from /dev/initrd. Note that the data in initrd can have any structure
+ in this case and doesn't necessarily have to be a file system image.
+ This option is used mainly for debugging.
+
+ Note: /dev/initrd is read-only and it can only be used once. As soon
+ as the last process has closed it, all data is freed and /dev/initrd
+ can't be opened anymore.
+
+ root=/dev/ram0
+
+ initrd is mounted as root, and the normal boot procedure is followed,
+ with the RAM disk mounted as root.
+
+Compressed cpio images
+----------------------
+
+Recent kernels have support for populating a ramdisk from a compressed cpio
+archive. On such systems, the creation of a ramdisk image doesn't need to
+involve special block devices or loopbacks; you merely create a directory on
+disk with the desired initrd content, cd to that directory, and run (as an
+example):
+
+find . | cpio --quiet -H newc -o | gzip -9 -n > /boot/imagefile.img
+
+Examining the contents of an existing image file is just as simple:
+
+mkdir /tmp/imagefile
+cd /tmp/imagefile
+gzip -cd /boot/imagefile.img | cpio -imd --quiet
+
+Installation
+------------
+
+First, a directory for the initrd file system has to be created on the
+"normal" root file system, e.g.
+
+# mkdir /initrd
+
+The name is not relevant. More details can be found on the pivot_root(2)
+man page.
+
+If the root file system is created during the boot procedure (i.e. if
+you're building an install floppy), the root file system creation
+procedure should create the /initrd directory.
+
+If initrd will not be mounted in some cases, its content is still
+accessible if the following device has been created:
+
+# mknod /dev/initrd b 1 250
+# chmod 400 /dev/initrd
+
+Second, the kernel has to be compiled with RAM disk support and with
+support for the initial RAM disk enabled. Also, at least all components
+needed to execute programs from initrd (e.g. executable format and file
+system) must be compiled into the kernel.
+
+Third, you have to create the RAM disk image. This is done by creating a
+file system on a block device, copying files to it as needed, and then
+copying the content of the block device to the initrd file. With recent
+kernels, at least three types of devices are suitable for that:
+
+ - a floppy disk (works everywhere but it's painfully slow)
+ - a RAM disk (fast, but allocates physical memory)
+ - a loopback device (the most elegant solution)
+
+We'll describe the loopback device method:
+
+ 1) make sure loopback block devices are configured into the kernel
+ 2) create an empty file system of the appropriate size, e.g.
+ # dd if=/dev/zero of=initrd bs=300k count=1
+ # mke2fs -F -m0 initrd
+ (if space is critical, you may want to use the Minix FS instead of Ext2)
+ 3) mount the file system, e.g.
+ # mount -t ext2 -o loop initrd /mnt
+ 4) create the console device:
+ # mkdir /mnt/dev
+ # mknod /mnt/dev/console c 5 1
+ 5) copy all the files that are needed to properly use the initrd
+ environment. Don't forget the most important file, /sbin/init
+ Note that /sbin/init's permissions must include "x" (execute).
+ 6) correct operation the initrd environment can frequently be tested
+ even without rebooting with the command
+ # chroot /mnt /sbin/init
+ This is of course limited to initrds that do not interfere with the
+ general system state (e.g. by reconfiguring network interfaces,
+ overwriting mounted devices, trying to start already running demons,
+ etc. Note however that it is usually possible to use pivot_root in
+ such a chroot'ed initrd environment.)
+ 7) unmount the file system
+ # umount /mnt
+ 8) the initrd is now in the file "initrd". Optionally, it can now be
+ compressed
+ # gzip -9 initrd
+
+For experimenting with initrd, you may want to take a rescue floppy and
+only add a symbolic link from /sbin/init to /bin/sh. Alternatively, you
+can try the experimental newlib environment [2] to create a small
+initrd.
+
+Finally, you have to boot the kernel and load initrd. Almost all Linux
+boot loaders support initrd. Since the boot process is still compatible
+with an older mechanism, the following boot command line parameters
+have to be given:
+
+ root=/dev/ram0 rw
+
+(rw is only necessary if writing to the initrd file system.)
+
+With LOADLIN, you simply execute
+
+ LOADLIN <kernel> initrd=<disk_image>
+e.g. LOADLIN C:\LINUX\BZIMAGE initrd=C:\LINUX\INITRD.GZ root=/dev/ram0 rw
+
+With LILO, you add the option INITRD=<path> to either the global section
+or to the section of the respective kernel in /etc/lilo.conf, and pass
+the options using APPEND, e.g.
+
+ image = /bzImage
+ initrd = /boot/initrd.gz
+ append = "root=/dev/ram0 rw"
+
+and run /sbin/lilo
+
+For other boot loaders, please refer to the respective documentation.
+
+Now you can boot and enjoy using initrd.
+
+
+Changing the root device
+------------------------
+
+When finished with its duties, init typically changes the root device
+and proceeds with starting the Linux system on the "real" root device.
+
+The procedure involves the following steps:
+ - mounting the new root file system
+ - turning it into the root file system
+ - removing all accesses to the old (initrd) root file system
+ - unmounting the initrd file system and de-allocating the RAM disk
+
+Mounting the new root file system is easy: it just needs to be mounted on
+a directory under the current root. Example:
+
+# mkdir /new-root
+# mount -o ro /dev/hda1 /new-root
+
+The root change is accomplished with the pivot_root system call, which
+is also available via the pivot_root utility (see pivot_root(8) man
+page; pivot_root is distributed with util-linux version 2.10h or higher
+[3]). pivot_root moves the current root to a directory under the new
+root, and puts the new root at its place. The directory for the old root
+must exist before calling pivot_root. Example:
+
+# cd /new-root
+# mkdir initrd
+# pivot_root . initrd
+
+Now, the init process may still access the old root via its
+executable, shared libraries, standard input/output/error, and its
+current root directory. All these references are dropped by the
+following command:
+
+# exec chroot . what-follows <dev/console >dev/console 2>&1
+
+Where what-follows is a program under the new root, e.g. /sbin/init
+If the new root file system will be used with udev and has no valid
+/dev directory, udev must be initialized before invoking chroot in order
+to provide /dev/console.
+
+Note: implementation details of pivot_root may change with time. In order
+to ensure compatibility, the following points should be observed:
+
+ - before calling pivot_root, the current directory of the invoking
+ process should point to the new root directory
+ - use . as the first argument, and the _relative_ path of the directory
+ for the old root as the second argument
+ - a chroot program must be available under the old and the new root
+ - chroot to the new root afterwards
+ - use relative paths for dev/console in the exec command
+
+Now, the initrd can be unmounted and the memory allocated by the RAM
+disk can be freed:
+
+# umount /initrd
+# blockdev --flushbufs /dev/ram0
+
+It is also possible to use initrd with an NFS-mounted root, see the
+pivot_root(8) man page for details.
+
+
+Usage scenarios
+---------------
+
+The main motivation for implementing initrd was to allow for modular
+kernel configuration at system installation. The procedure would work
+as follows:
+
+ 1) system boots from floppy or other media with a minimal kernel
+ (e.g. support for RAM disks, initrd, a.out, and the Ext2 FS) and
+ loads initrd
+ 2) /sbin/init determines what is needed to (1) mount the "real" root FS
+ (i.e. device type, device drivers, file system) and (2) the
+ distribution media (e.g. CD-ROM, network, tape, ...). This can be
+ done by asking the user, by auto-probing, or by using a hybrid
+ approach.
+ 3) /sbin/init loads the necessary kernel modules
+ 4) /sbin/init creates and populates the root file system (this doesn't
+ have to be a very usable system yet)
+ 5) /sbin/init invokes pivot_root to change the root file system and
+ execs - via chroot - a program that continues the installation
+ 6) the boot loader is installed
+ 7) the boot loader is configured to load an initrd with the set of
+ modules that was used to bring up the system (e.g. /initrd can be
+ modified, then unmounted, and finally, the image is written from
+ /dev/ram0 or /dev/rd/0 to a file)
+ 8) now the system is bootable and additional installation tasks can be
+ performed
+
+The key role of initrd here is to re-use the configuration data during
+normal system operation without requiring the use of a bloated "generic"
+kernel or re-compiling or re-linking the kernel.
+
+A second scenario is for installations where Linux runs on systems with
+different hardware configurations in a single administrative domain. In
+such cases, it is desirable to generate only a small set of kernels
+(ideally only one) and to keep the system-specific part of configuration
+information as small as possible. In this case, a common initrd could be
+generated with all the necessary modules. Then, only /sbin/init or a file
+read by it would have to be different.
+
+A third scenario is more convenient recovery disks, because information
+like the location of the root FS partition doesn't have to be provided at
+boot time, but the system loaded from initrd can invoke a user-friendly
+dialog and it can also perform some sanity checks (or even some form of
+auto-detection).
+
+Last not least, CD-ROM distributors may use it for better installation
+from CD, e.g. by using a boot floppy and bootstrapping a bigger RAM disk
+via initrd from CD; or by booting via a loader like LOADLIN or directly
+from the CD-ROM, and loading the RAM disk from CD without need of
+floppies.
+
+
+Obsolete root change mechanism
+------------------------------
+
+The following mechanism was used before the introduction of pivot_root.
+Current kernels still support it, but you should _not_ rely on its
+continued availability.
+
+It works by mounting the "real" root device (i.e. the one set with rdev
+in the kernel image or with root=... at the boot command line) as the
+root file system when linuxrc exits. The initrd file system is then
+unmounted, or, if it is still busy, moved to a directory /initrd, if
+such a directory exists on the new root file system.
+
+In order to use this mechanism, you do not have to specify the boot
+command options root, init, or rw. (If specified, they will affect
+the real root file system, not the initrd environment.)
+
+If /proc is mounted, the "real" root device can be changed from within
+linuxrc by writing the number of the new root FS device to the special
+file /proc/sys/kernel/real-root-dev, e.g.
+
+ # echo 0x301 >/proc/sys/kernel/real-root-dev
+
+Note that the mechanism is incompatible with NFS and similar file
+systems.
+
+This old, deprecated mechanism is commonly called "change_root", while
+the new, supported mechanism is called "pivot_root".
+
+
+Mixed change_root and pivot_root mechanism
+------------------------------------------
+
+In case you did not want to use root=/dev/ram0 to trigger the pivot_root
+mechanism, you may create both /linuxrc and /sbin/init in your initrd image.
+
+/linuxrc would contain only the following:
+
+#! /bin/sh
+mount -n -t proc proc /proc
+echo 0x0100 >/proc/sys/kernel/real-root-dev
+umount -n /proc
+
+Once linuxrc exited, the kernel would mount again your initrd as root,
+this time executing /sbin/init. Again, it would be the duty of this init
+to build the right environment (maybe using the root= device passed on
+the cmdline) before the final execution of the real /sbin/init.
+
+
+Resources
+---------
+
+[1] Almesberger, Werner; "Booting Linux: The History and the Future"
+ http://www.almesberger.net/cv/papers/ols2k-9.ps.gz
+[2] newlib package (experimental), with initrd example
+ http://sources.redhat.com/newlib/
+[3] util-linux: Miscellaneous utilities for Linux
+ http://www.kernel.org/pub/linux/utils/util-linux/
diff --git a/Documentation/input/alps.txt b/Documentation/input/alps.txt
new file mode 100644
index 000000000..c86f2f1ae
--- /dev/null
+++ b/Documentation/input/alps.txt
@@ -0,0 +1,319 @@
+ALPS Touchpad Protocol
+----------------------
+
+Introduction
+------------
+Currently the ALPS touchpad driver supports seven protocol versions in use by
+ALPS touchpads, called versions 1, 2, 3, 4, 5, 6 and 7.
+
+Since roughly mid-2010 several new ALPS touchpads have been released and
+integrated into a variety of laptops and netbooks. These new touchpads
+have enough behavior differences that the alps_model_data definition
+table, describing the properties of the different versions, is no longer
+adequate. The design choices were to re-define the alps_model_data
+table, with the risk of regression testing existing devices, or isolate
+the new devices outside of the alps_model_data table. The latter design
+choice was made. The new touchpad signatures are named: "Rushmore",
+"Pinnacle", and "Dolphin", which you will see in the alps.c code.
+For the purposes of this document, this group of ALPS touchpads will
+generically be called "new ALPS touchpads".
+
+We experimented with probing the ACPI interface _HID (Hardware ID)/_CID
+(Compatibility ID) definition as a way to uniquely identify the
+different ALPS variants but there did not appear to be a 1:1 mapping.
+In fact, it appeared to be an m:n mapping between the _HID and actual
+hardware type.
+
+Detection
+---------
+
+All ALPS touchpads should respond to the "E6 report" command sequence:
+E8-E6-E6-E6-E9. An ALPS touchpad should respond with either 00-00-0A or
+00-00-64 if no buttons are pressed. The bits 0-2 of the first byte will be 1s
+if some buttons are pressed.
+
+If the E6 report is successful, the touchpad model is identified using the "E7
+report" sequence: E8-E7-E7-E7-E9. The response is the model signature and is
+matched against known models in the alps_model_data_array.
+
+For older touchpads supporting protocol versions 3 and 4, the E7 report
+model signature is always 73-02-64. To differentiate between these
+versions, the response from the "Enter Command Mode" sequence must be
+inspected as described below.
+
+The new ALPS touchpads have an E7 signature of 73-03-50 or 73-03-0A but
+seem to be better differentiated by the EC Command Mode response.
+
+Command Mode
+------------
+
+Protocol versions 3 and 4 have a command mode that is used to read and write
+one-byte device registers in a 16-bit address space. The command sequence
+EC-EC-EC-E9 places the device in command mode, and the device will respond
+with 88-07 followed by a third byte. This third byte can be used to determine
+whether the devices uses the version 3 or 4 protocol.
+
+To exit command mode, PSMOUSE_CMD_SETSTREAM (EA) is sent to the touchpad.
+
+While in command mode, register addresses can be set by first sending a
+specific command, either EC for v3 devices or F5 for v4 devices. Then the
+address is sent one nibble at a time, where each nibble is encoded as a
+command with optional data. This encoding differs slightly between the v3 and
+v4 protocols.
+
+Once an address has been set, the addressed register can be read by sending
+PSMOUSE_CMD_GETINFO (E9). The first two bytes of the response contains the
+address of the register being read, and the third contains the value of the
+register. Registers are written by writing the value one nibble at a time
+using the same encoding used for addresses.
+
+For the new ALPS touchpads, the EC command is used to enter command
+mode. The response in the new ALPS touchpads is significantly different,
+and more important in determining the behavior. This code has been
+separated from the original alps_model_data table and put in the
+alps_identify function. For example, there seem to be two hardware init
+sequences for the "Dolphin" touchpads as determined by the second byte
+of the EC response.
+
+Packet Format
+-------------
+
+In the following tables, the following notation is used.
+
+ CAPITALS = stick, miniscules = touchpad
+
+?'s can have different meanings on different models, such as wheel rotation,
+extra buttons, stick buttons on a dualpoint, etc.
+
+PS/2 packet format
+------------------
+
+ byte 0: 0 0 YSGN XSGN 1 M R L
+ byte 1: X7 X6 X5 X4 X3 X2 X1 X0
+ byte 2: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
+
+Note that the device never signals overflow condition.
+
+For protocol version 2 devices when the trackpoint is used, and no fingers
+are on the touchpad, the M R L bits signal the combined status of both the
+pointingstick and touchpad buttons.
+
+ALPS Absolute Mode - Protocol Version 1
+--------------------------------------
+
+ byte 0: 1 0 0 0 1 x9 x8 x7
+ byte 1: 0 x6 x5 x4 x3 x2 x1 x0
+ byte 2: 0 ? ? l r ? fin ges
+ byte 3: 0 ? ? ? ? y9 y8 y7
+ byte 4: 0 y6 y5 y4 y3 y2 y1 y0
+ byte 5: 0 z6 z5 z4 z3 z2 z1 z0
+
+ALPS Absolute Mode - Protocol Version 2
+---------------------------------------
+
+ byte 0: 1 ? ? ? 1 PSM PSR PSL
+ byte 1: 0 x6 x5 x4 x3 x2 x1 x0
+ byte 2: 0 x10 x9 x8 x7 ? fin ges
+ byte 3: 0 y9 y8 y7 1 M R L
+ byte 4: 0 y6 y5 y4 y3 y2 y1 y0
+ byte 5: 0 z6 z5 z4 z3 z2 z1 z0
+
+Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for
+the DualPoint Stick. For non interleaved dualpoint devices the pointingstick
+buttons get reported separately in the PSM, PSR and PSL bits.
+
+Dualpoint device -- interleaved packet format
+---------------------------------------------
+
+ byte 0: 1 1 0 0 1 1 1 1
+ byte 1: 0 x6 x5 x4 x3 x2 x1 x0
+ byte 2: 0 x10 x9 x8 x7 0 fin ges
+ byte 3: 0 0 YSGN XSGN 1 1 1 1
+ byte 4: X7 X6 X5 X4 X3 X2 X1 X0
+ byte 5: Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0
+ byte 6: 0 y9 y8 y7 1 m r l
+ byte 7: 0 y6 y5 y4 y3 y2 y1 y0
+ byte 8: 0 z6 z5 z4 z3 z2 z1 z0
+
+Devices which use the interleaving format normally send standard PS/2 mouse
+packets for the DualPoint Stick + ALPS Absolute Mode packets for the
+touchpad, switching to the interleaved packet format when both the stick and
+the touchpad are used at the same time.
+
+ALPS Absolute Mode - Protocol Version 3
+---------------------------------------
+
+ALPS protocol version 3 has three different packet formats. The first two are
+associated with touchpad events, and the third is associated with trackstick
+events.
+
+The first type is the touchpad position packet.
+
+ byte 0: 1 ? x1 x0 1 1 1 1
+ byte 1: 0 x10 x9 x8 x7 x6 x5 x4
+ byte 2: 0 y10 y9 y8 y7 y6 y5 y4
+ byte 3: 0 M R L 1 m r l
+ byte 4: 0 mt x3 x2 y3 y2 y1 y0
+ byte 5: 0 z6 z5 z4 z3 z2 z1 z0
+
+Note that for some devices the trackstick buttons are reported in this packet,
+and on others it is reported in the trackstick packets.
+
+The second packet type contains bitmaps representing the x and y axes. In the
+bitmaps a given bit is set if there is a finger covering that position on the
+given axis. Thus the bitmap packet can be used for low-resolution multi-touch
+data, although finger tracking is not possible. This packet also encodes the
+number of contacts (f1 and f0 in the table below).
+
+ byte 0: 1 1 x1 x0 1 1 1 1
+ byte 1: 0 x8 x7 x6 x5 x4 x3 x2
+ byte 2: 0 y7 y6 y5 y4 y3 y2 y1
+ byte 3: 0 y10 y9 y8 1 1 1 1
+ byte 4: 0 x14 x13 x12 x11 x10 x9 y0
+ byte 5: 0 1 ? ? ? ? f1 f0
+
+This packet only appears after a position packet with the mt bit set, and
+usually only appears when there are two or more contacts (although
+occasionally it's seen with only a single contact).
+
+The final v3 packet type is the trackstick packet.
+
+ byte 0: 1 1 x7 y7 1 1 1 1
+ byte 1: 0 x6 x5 x4 x3 x2 x1 x0
+ byte 2: 0 y6 y5 y4 y3 y2 y1 y0
+ byte 3: 0 1 0 0 1 0 0 0
+ byte 4: 0 z4 z3 z2 z1 z0 ? ?
+ byte 5: 0 0 1 1 1 1 1 1
+
+ALPS Absolute Mode - Protocol Version 4
+---------------------------------------
+
+Protocol version 4 has an 8-byte packet format.
+
+ byte 0: 1 ? x1 x0 1 1 1 1
+ byte 1: 0 x10 x9 x8 x7 x6 x5 x4
+ byte 2: 0 y10 y9 y8 y7 y6 y5 y4
+ byte 3: 0 1 x3 x2 y3 y2 y1 y0
+ byte 4: 0 ? ? ? 1 ? r l
+ byte 5: 0 z6 z5 z4 z3 z2 z1 z0
+ byte 6: bitmap data (described below)
+ byte 7: bitmap data (described below)
+
+The last two bytes represent a partial bitmap packet, with 3 full packets
+required to construct a complete bitmap packet. Once assembled, the 6-byte
+bitmap packet has the following format:
+
+ byte 0: 0 1 x7 x6 x5 x4 x3 x2
+ byte 1: 0 x1 x0 y4 y3 y2 y1 y0
+ byte 2: 0 0 ? x14 x13 x12 x11 x10
+ byte 3: 0 x9 x8 y9 y8 y7 y6 y5
+ byte 4: 0 0 0 0 0 0 0 0
+ byte 5: 0 0 0 0 0 0 0 y10
+
+There are several things worth noting here.
+
+ 1) In the bitmap data, bit 6 of byte 0 serves as a sync byte to
+ identify the first fragment of a bitmap packet.
+
+ 2) The bitmaps represent the same data as in the v3 bitmap packets, although
+ the packet layout is different.
+
+ 3) There doesn't seem to be a count of the contact points anywhere in the v4
+ protocol packets. Deriving a count of contact points must be done by
+ analyzing the bitmaps.
+
+ 4) There is a 3 to 1 ratio of position packets to bitmap packets. Therefore
+ MT position can only be updated for every third ST position update, and
+ the count of contact points can only be updated every third packet as
+ well.
+
+So far no v4 devices with tracksticks have been encountered.
+
+ALPS Absolute Mode - Protocol Version 5
+---------------------------------------
+This is basically Protocol Version 3 but with different logic for packet
+decode. It uses the same alps_process_touchpad_packet_v3 call with a
+specialized decode_fields function pointer to correctly interpret the
+packets. This appears to only be used by the Dolphin devices.
+
+For single-touch, the 6-byte packet format is:
+
+ byte 0: 1 1 0 0 1 0 0 0
+ byte 1: 0 x6 x5 x4 x3 x2 x1 x0
+ byte 2: 0 y6 y5 y4 y3 y2 y1 y0
+ byte 3: 0 M R L 1 m r l
+ byte 4: y10 y9 y8 y7 x10 x9 x8 x7
+ byte 5: 0 z6 z5 z4 z3 z2 z1 z0
+
+For mt, the format is:
+
+ byte 0: 1 1 1 n3 1 n2 n1 x24
+ byte 1: 1 y7 y6 y5 y4 y3 y2 y1
+ byte 2: ? x2 x1 y12 y11 y10 y9 y8
+ byte 3: 0 x23 x22 x21 x20 x19 x18 x17
+ byte 4: 0 x9 x8 x7 x6 x5 x4 x3
+ byte 5: 0 x16 x15 x14 x13 x12 x11 x10
+
+ALPS Absolute Mode - Protocol Version 6
+---------------------------------------
+
+For trackstick packet, the format is:
+
+ byte 0: 1 1 1 1 1 1 1 1
+ byte 1: 0 X6 X5 X4 X3 X2 X1 X0
+ byte 2: 0 Y6 Y5 Y4 Y3 Y2 Y1 Y0
+ byte 3: ? Y7 X7 ? ? M R L
+ byte 4: Z7 Z6 Z5 Z4 Z3 Z2 Z1 Z0
+ byte 5: 0 1 1 1 1 1 1 1
+
+For touchpad packet, the format is:
+
+ byte 0: 1 1 1 1 1 1 1 1
+ byte 1: 0 0 0 0 x3 x2 x1 x0
+ byte 2: 0 0 0 0 y3 y2 y1 y0
+ byte 3: ? x7 x6 x5 x4 ? r l
+ byte 4: ? y7 y6 y5 y4 ? ? ?
+ byte 5: z7 z6 z5 z4 z3 z2 z1 z0
+
+(v6 touchpad does not have middle button)
+
+ALPS Absolute Mode - Protocol Version 7
+---------------------------------------
+
+For trackstick packet, the format is:
+
+ byte 0: 0 1 0 0 1 0 0 0
+ byte 1: 1 1 * * 1 M R L
+ byte 2: X7 1 X5 X4 X3 X2 X1 X0
+ byte 3: Z6 1 Y6 X6 1 Y2 Y1 Y0
+ byte 4: Y7 0 Y5 Y4 Y3 1 1 0
+ byte 5: T&P 0 Z5 Z4 Z3 Z2 Z1 Z0
+
+For touchpad packet, the format is:
+
+ packet-fmt b7 b6 b5 b4 b3 b2 b1 b0
+ byte 0: TWO & MULTI L 1 R M 1 Y0-2 Y0-1 Y0-0
+ byte 0: NEW L 1 X1-5 1 1 Y0-2 Y0-1 Y0-0
+ byte 1: Y0-10 Y0-9 Y0-8 Y0-7 Y0-6 Y0-5 Y0-4 Y0-3
+ byte 2: X0-11 1 X0-10 X0-9 X0-8 X0-7 X0-6 X0-5
+ byte 3: X1-11 1 X0-4 X0-3 1 X0-2 X0-1 X0-0
+ byte 4: TWO X1-10 TWO X1-9 X1-8 X1-7 X1-6 X1-5 X1-4
+ byte 4: MULTI X1-10 TWO X1-9 X1-8 X1-7 X1-6 Y1-5 1
+ byte 4: NEW X1-10 TWO X1-9 X1-8 X1-7 X1-6 0 0
+ byte 5: TWO & NEW Y1-10 0 Y1-9 Y1-8 Y1-7 Y1-6 Y1-5 Y1-4
+ byte 5: MULTI Y1-10 0 Y1-9 Y1-8 Y1-7 Y1-6 F-1 F-0
+
+ L: Left button
+ R / M: Non-clickpads: Right / Middle button
+ Clickpads: When > 2 fingers are down, and some fingers
+ are in the button area, then the 2 coordinates reported
+ are for fingers outside the button area and these report
+ extra fingers being present in the right / left button
+ area. Note these fingers are not added to the F field!
+ so if a TWO packet is received and R = 1 then there are
+ 3 fingers down, etc.
+ TWO: 1: Two touches present, byte 0/4/5 are in TWO fmt
+ 0: If byte 4 bit 0 is 1, then byte 0/4/5 are in MULTI fmt
+ otherwise byte 0 bit 4 must be set and byte 0/4/5 are
+ in NEW fmt
+ F: Number of fingers - 3, 0 means 3 fingers, 1 means 4 ...
diff --git a/Documentation/input/amijoy.txt b/Documentation/input/amijoy.txt
new file mode 100644
index 000000000..7dc4f1759
--- /dev/null
+++ b/Documentation/input/amijoy.txt
@@ -0,0 +1,184 @@
+Amiga 4-joystick parport extension
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Parallel port pins:
+
+ (2) - Up1 (6) - Up2
+ (3) - Down1 (7) - Down2
+ (4) - Left1 (8) - Left2
+ (5) - Right1 (9) - Right2
+(13) - Fire1 (11) - Fire2
+(18) - Gnd1 (18) - Gnd2
+
+Amiga digital joystick pinout
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+(1) - Up
+(2) - Down
+(3) - Left
+(4) - Right
+(5) - n/c
+(6) - Fire button
+(7) - +5V (50mA)
+(8) - Gnd
+(9) - Thumb button
+
+Amiga mouse pinout
+~~~~~~~~~~~~~~~~~~
+(1) - V-pulse
+(2) - H-pulse
+(3) - VQ-pulse
+(4) - HQ-pulse
+(5) - Middle button
+(6) - Left button
+(7) - +5V (50mA)
+(8) - Gnd
+(9) - Right button
+
+Amiga analog joystick pinout
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+(1) - Top button
+(2) - Top2 button
+(3) - Trigger button
+(4) - Thumb button
+(5) - Analog X
+(6) - n/c
+(7) - +5V (50mA)
+(8) - Gnd
+(9) - Analog Y
+
+Amiga lightpen pinout
+~~~~~~~~~~~~~~~~~~~~~
+(1) - n/c
+(2) - n/c
+(3) - n/c
+(4) - n/c
+(5) - Touch button
+(6) - /Beamtrigger
+(7) - +5V (50mA)
+(8) - Gnd
+(9) - Stylus button
+
+-------------------------------------------------------------------------------
+
+NAME rev ADDR type chip Description
+JOY0DAT 00A R Denise Joystick-mouse 0 data (left vert, horiz)
+JOY1DAT 00C R Denise Joystick-mouse 1 data (right vert,horiz)
+
+ These addresses each read a 16 bit register. These in turn
+ are loaded from the MDAT serial stream and are clocked in on
+ the rising edge of SCLK. MLD output is used to parallel load
+ the external parallel-to-serial converter.This in turn is
+ loaded with the 4 quadrature inputs from each of two game
+ controller ports (8 total) plus 8 miscellaneous control bits
+ which are new for LISA and can be read in upper 8 bits of
+ LISAID.
+ Register bits are as follows:
+ Mouse counter usage (pins 1,3 =Yclock, pins 2,4 =Xclock)
+
+ BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+JOY0DAT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0
+JOY1DAT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0
+
+ 0=LEFT CONTROLLER PAIR, 1=RIGHT CONTROLLER PAIR.
+ (4 counters total). The bit usage for both left and right
+ addresses is shown below. Each 6 bit counter (Y7-Y2,X7-X2) is
+ clocked by 2 of the signals input from the mouse serial
+ stream. Starting with first bit received:
+
+ +-------------------+-----------------------------------------+
+ | Serial | Bit Name | Description |
+ +--------+----------+-----------------------------------------+
+ | 0 | M0H | JOY0DAT Horizontal Clock |
+ | 1 | M0HQ | JOY0DAT Horizontal Clock (quadrature) |
+ | 2 | M0V | JOY0DAT Vertical Clock |
+ | 3 | M0VQ | JOY0DAT Vertical Clock (quadrature) |
+ | 4 | M1V | JOY1DAT Horizontal Clock |
+ | 5 | M1VQ | JOY1DAT Horizontal Clock (quadrature) |
+ | 6 | M1V | JOY1DAT Vertical Clock |
+ | 7 | M1VQ | JOY1DAT Vertical Clock (quadrature) |
+ +--------+----------+-----------------------------------------+
+
+ Bits 1 and 0 of each counter (Y1-Y0,X1-X0) may be
+ read to determine the state of the related input signal pair.
+ This allows these pins to double as joystick switch inputs.
+ Joystick switch closures can be deciphered as follows:
+
+ +------------+------+---------------------------------+
+ | Directions | Pin# | Counter bits |
+ +------------+------+---------------------------------+
+ | Forward | 1 | Y1 xor Y0 (BIT#09 xor BIT#08) |
+ | Left | 3 | Y1 |
+ | Back | 2 | X1 xor X0 (BIT#01 xor BIT#00) |
+ | Right | 4 | X1 |
+ +------------+------+---------------------------------+
+
+-------------------------------------------------------------------------------
+
+NAME rev ADDR type chip Description
+JOYTEST 036 W Denise Write to all 4 joystick-mouse counters at once.
+
+ Mouse counter write test data:
+ BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+ JOYxDAT Y7 Y6 Y5 Y4 Y3 Y2 xx xx X7 X6 X5 X4 X3 X2 xx xx
+ JOYxDAT Y7 Y6 Y5 Y4 Y3 Y2 xx xx X7 X6 X5 X4 X3 X2 xx xx
+
+-------------------------------------------------------------------------------
+
+NAME rev ADDR type chip Description
+POT0DAT h 012 R Paula Pot counter data left pair (vert, horiz)
+POT1DAT h 014 R Paula Pot counter data right pair (vert,horiz)
+
+ These addresses each read a pair of 8 bit pot counters.
+ (4 counters total). The bit assignment for both
+ addresses is shown below. The counters are stopped by signals
+ from 2 controller connectors (left-right) with 2 pins each.
+
+ BIT# 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+ RIGHT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0
+ LEFT Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 X7 X6 X5 X4 X3 X2 X1 X0
+
+ +--------------------------+-------+
+ | CONNECTORS | PAULA |
+ +-------+------+-----+-----+-------+
+ | Loc. | Dir. | Sym | pin | pin |
+ +-------+------+-----+-----+-------+
+ | RIGHT | Y | RX | 9 | 33 |
+ | RIGHT | X | RX | 5 | 32 |
+ | LEFT | Y | LY | 9 | 36 |
+ | LEFT | X | LX | 5 | 35 |
+ +-------+------+-----+-----+-------+
+
+ With normal (NTSC or PAL) horiz. line rate, the pots will
+ give a full scale (FF) reading with about 500kohms in one
+ frame time. With proportionally faster horiz line times,
+ the counters will count proportionally faster.
+ This should be noted when doing variable beam displays.
+
+-------------------------------------------------------------------------------
+
+NAME rev ADDR type chip Description
+POTGO 034 W Paula Pot port (4 bit) bi-direction and data, and pot counter start.
+
+-------------------------------------------------------------------------------
+
+NAME rev ADDR type chip Description
+POTINP 016 R Paula Pot pin data read
+
+ This register controls a 4 bit bi-direction I/O port
+ that shares the same 4 pins as the 4 pot counters above.
+
+ +-------+----------+---------------------------------------------+
+ | BIT# | FUNCTION | DESCRIPTION |
+ +-------+----------+---------------------------------------------+
+ | 15 | OUTRY | Output enable for Paula pin 33 |
+ | 14 | DATRY | I/O data Paula pin 33 |
+ | 13 | OUTRX | Output enable for Paula pin 32 |
+ | 12 | DATRX | I/O data Paula pin 32 |
+ | 11 | OUTLY | Out put enable for Paula pin 36 |
+ | 10 | DATLY | I/O data Paula pin 36 |
+ | 09 | OUTLX | Output enable for Paula pin 35 |
+ | 08 | DATLX | I/O data Paula pin 35 |
+ | 07-01 | X | Not used |
+ | 00 | START | Start pots (dump capacitors,start counters) |
+ +-------+----------+---------------------------------------------+
+
+-------------------------------------------------------------------------------
diff --git a/Documentation/input/appletouch.txt b/Documentation/input/appletouch.txt
new file mode 100644
index 000000000..b13de3f89
--- /dev/null
+++ b/Documentation/input/appletouch.txt
@@ -0,0 +1,85 @@
+Apple Touchpad Driver (appletouch)
+----------------------------------
+ Copyright (C) 2005 Stelian Pop <stelian@popies.net>
+
+appletouch is a Linux kernel driver for the USB touchpad found on post
+February 2005 and October 2005 Apple Aluminium Powerbooks.
+
+This driver is derived from Johannes Berg's appletrackpad driver[1], but it has
+been improved in some areas:
+ * appletouch is a full kernel driver, no userspace program is necessary
+ * appletouch can be interfaced with the synaptics X11 driver, in order
+ to have touchpad acceleration, scrolling, etc.
+
+Credits go to Johannes Berg for reverse-engineering the touchpad protocol,
+Frank Arnold for further improvements, and Alex Harper for some additional
+information about the inner workings of the touchpad sensors. Michael
+Hanselmann added support for the October 2005 models.
+
+Usage:
+------
+
+In order to use the touchpad in the basic mode, compile the driver and load
+the module. A new input device will be detected and you will be able to read
+the mouse data from /dev/input/mice (using gpm, or X11).
+
+In X11, you can configure the touchpad to use the synaptics X11 driver, which
+will give additional functionalities, like acceleration, scrolling, 2 finger
+tap for middle button mouse emulation, 3 finger tap for right button mouse
+emulation, etc. In order to do this, make sure you're using a recent version of
+the synaptics driver (tested with 0.14.2, available from [2]), and configure a
+new input device in your X11 configuration file (take a look below for an
+example). For additional configuration, see the synaptics driver documentation.
+
+ Section "InputDevice"
+ Identifier "Synaptics Touchpad"
+ Driver "synaptics"
+ Option "SendCoreEvents" "true"
+ Option "Device" "/dev/input/mice"
+ Option "Protocol" "auto-dev"
+ Option "LeftEdge" "0"
+ Option "RightEdge" "850"
+ Option "TopEdge" "0"
+ Option "BottomEdge" "645"
+ Option "MinSpeed" "0.4"
+ Option "MaxSpeed" "1"
+ Option "AccelFactor" "0.02"
+ Option "FingerLow" "0"
+ Option "FingerHigh" "30"
+ Option "MaxTapMove" "20"
+ Option "MaxTapTime" "100"
+ Option "HorizScrollDelta" "0"
+ Option "VertScrollDelta" "30"
+ Option "SHMConfig" "on"
+ EndSection
+
+ Section "ServerLayout"
+ ...
+ InputDevice "Mouse"
+ InputDevice "Synaptics Touchpad"
+ ...
+ EndSection
+
+Fuzz problems:
+--------------
+
+The touchpad sensors are very sensitive to heat, and will generate a lot of
+noise when the temperature changes. This is especially true when you power-on
+the laptop for the first time.
+
+The appletouch driver tries to handle this noise and auto adapt itself, but it
+is not perfect. If finger movements are not recognized anymore, try reloading
+the driver.
+
+You can activate debugging using the 'debug' module parameter. A value of 0
+deactivates any debugging, 1 activates tracing of invalid samples, 2 activates
+full tracing (each sample is being traced):
+ modprobe appletouch debug=1
+ or
+ echo "1" > /sys/module/appletouch/parameters/debug
+
+Links:
+------
+
+[1]: http://johannes.sipsolutions.net/PowerBook/touchpad/
+[2]: http://web.archive.org/web/*/http://web.telia.com/~u89404340/touchpad/index.html
diff --git a/Documentation/input/atarikbd.txt b/Documentation/input/atarikbd.txt
new file mode 100644
index 000000000..f3a3ba884
--- /dev/null
+++ b/Documentation/input/atarikbd.txt
@@ -0,0 +1,709 @@
+Intelligent Keyboard (ikbd) Protocol
+
+
+1. Introduction
+
+The Atari Corp. Intelligent Keyboard (ikbd) is a general purpose keyboard
+controller that is flexible enough that it can be used in a variety of
+products without modification. The keyboard, with its microcontroller,
+provides a convenient connection point for a mouse and switch-type joysticks.
+The ikbd processor also maintains a time-of-day clock with one second
+resolution.
+The ikbd has been designed to be general enough that it can be used with a
+variety of new computer products. Product variations in a number of
+keyswitches, mouse resolution, etc. can be accommodated.
+The ikbd communicates with the main processor over a high speed bi-directional
+serial interface. It can function in a variety of modes to facilitate
+different applications of the keyboard, joysticks, or mouse. Limited use of
+the controller is possible in applications in which only a unidirectional
+communications medium is available by carefully designing the default modes.
+
+3. Keyboard
+
+The keyboard always returns key make/break scan codes. The ikbd generates
+keyboard scan codes for each key press and release. The key scan make (key
+closure) codes start at 1, and are defined in Appendix A. For example, the
+ISO key position in the scan code table should exist even if no keyswitch
+exists in that position on a particular keyboard. The break code for each key
+is obtained by ORing 0x80 with the make code.
+
+The special codes 0xF6 through 0xFF are reserved for use as follows:
+ 0xF6 status report
+ 0xF7 absolute mouse position record
+ 0xF8-0xFB relative mouse position records (lsbs determined by
+ mouse button states)
+ 0xFC time-of-day
+ 0xFD joystick report (both sticks)
+ 0xFE joystick 0 event
+ 0xFF joystick 1 event
+
+The two shift keys return different scan codes in this mode. The ENTER key
+and the RETurn key are also distinct.
+
+4. Mouse
+
+The mouse port should be capable of supporting a mouse with resolution of
+approximately 200 counts (phase changes or 'clicks') per inch of travel. The
+mouse should be scanned at a rate that will permit accurate tracking at
+velocities up to 10 inches per second.
+The ikbd can report mouse motion in three distinctly different ways. It can
+report relative motion, absolute motion in a coordinate system maintained
+within the ikbd, or by converting mouse motion into keyboard cursor control
+key equivalents.
+The mouse buttons can be treated as part of the mouse or as additional
+keyboard keys.
+
+4.1 Relative Position Reporting
+
+In relative position mode, the ikbd will return relative mouse position
+records whenever a mouse event occurs. A mouse event consists of a mouse
+button being pressed or released, or motion in either axis exceeding a
+settable threshold of motion. Regardless of the threshold, all bits of
+resolution are returned to the host computer.
+Note that the ikbd may return mouse relative position reports with
+significantly more than the threshold delta x or y. This may happen since no
+relative mouse motion events will be generated: (a) while the keyboard has
+been 'paused' ( the event will be stored until keyboard communications is
+resumed) (b) while any event is being transmitted.
+
+The relative mouse position record is a three byte record of the form
+(regardless of keyboard mode):
+ %111110xy ; mouse position record flag
+ ; where y is the right button state
+ ; and x is the left button state
+ X ; delta x as twos complement integer
+ Y ; delta y as twos complement integer
+
+Note that the value of the button state bits should be valid even if the
+MOUSE BUTTON ACTION has set the buttons to act like part of the keyboard.
+If the accumulated motion before the report packet is generated exceeds the
++127...-128 range, the motion is broken into multiple packets.
+Note that the sign of the delta y reported is a function of the Y origin
+selected.
+
+4.2 Absolute Position reporting
+
+The ikbd can also maintain absolute mouse position. Commands exist for
+resetting the mouse position, setting X/Y scaling, and interrogating the
+current mouse position.
+
+4.3 Mouse Cursor Key Mode
+
+The ikbd can translate mouse motion into the equivalent cursor keystrokes.
+The number of mouse clicks per keystroke is independently programmable in
+each axis. The ikbd internally maintains mouse motion information to the
+highest resolution available, and merely generates a pair of cursor key events
+for each multiple of the scale factor.
+Mouse motion produces the cursor key make code immediately followed by the
+break code for the appropriate cursor key. The mouse buttons produce scan
+codes above those normally assigned for the largest envisioned keyboard (i.e.
+LEFT=0x74 & RIGHT=0x75).
+
+5. Joystick
+
+5.1 Joystick Event Reporting
+
+In this mode, the ikbd generates a record whenever the joystick position is
+changed (i.e. for each opening or closing of a joystick switch or trigger).
+
+The joystick event record is two bytes of the form:
+ %1111111x ; Joystick event marker
+ ; where x is Joystick 0 or 1
+ %x000yyyy ; where yyyy is the stick position
+ ; and x is the trigger
+
+5.2 Joystick Interrogation
+
+The current state of the joystick ports may be interrogated at any time in
+this mode by sending an 'Interrogate Joystick' command to the ikbd.
+
+The ikbd response to joystick interrogation is a three byte report of the form
+ 0xFD ; joystick report header
+ %x000yyyy ; Joystick 0
+ %x000yyyy ; Joystick 1
+ ; where x is the trigger
+ ; and yyy is the stick position
+
+5.3 Joystick Monitoring
+
+A mode is available that devotes nearly all of the keyboard communications
+time to reporting the state of the joystick ports at a user specifiable rate.
+It remains in this mode until reset or commanded into another mode. The PAUSE
+command in this mode not only stop the output but also temporarily stops
+scanning the joysticks (samples are not queued).
+
+5.4 Fire Button Monitoring
+
+A mode is provided to permit monitoring a single input bit at a high rate. In
+this mode the ikbd monitors the state of the Joystick 1 fire button at the
+maximum rate permitted by the serial communication channel. The data is packed
+8 bits per byte for transmission to the host. The ikbd remains in this mode
+until reset or commanded into another mode. The PAUSE command in this mode not
+only stops the output but also temporarily stops scanning the button (samples
+are not queued).
+
+5.5 Joystick Key Code Mode
+
+The ikbd may be commanded to translate the use of either joystick into the
+equivalent cursor control keystroke(s). The ikbd provides a single breakpoint
+velocity joystick cursor.
+Joystick events produce the make code, immediately followed by the break code
+for the appropriate cursor motion keys. The trigger or fire buttons of the
+joysticks produce pseudo key scan codes above those used by the largest key
+matrix envisioned (i.e. JOYSTICK0=0x74, JOYSTICK1=0x75).
+
+6. Time-of-Day Clock
+
+The ikbd also maintains a time-of-day clock for the system. Commands are
+available to set and interrogate the timer-of-day clock. Time-keeping is
+maintained down to a resolution of one second.
+
+7. Status Inquiries
+
+The current state of ikbd modes and parameters may be found by sending status
+inquiry commands that correspond to the ikbd set commands.
+
+8. Power-Up Mode
+
+The keyboard controller will perform a simple self-test on power-up to detect
+major controller faults (ROM checksum and RAM test) and such things as stuck
+keys. Any keys down at power-up are presumed to be stuck, and their BREAK
+(sic) code is returned (which without the preceding MAKE code is a flag for a
+keyboard error). If the controller self-test completes without error, the code
+0xF0 is returned. (This code will be used to indicate the version/release of
+the ikbd controller. The first release of the ikbd is version 0xF0, should
+there be a second release it will be 0xF1, and so on.)
+The ikbd defaults to a mouse position reporting with threshold of 1 unit in
+either axis and the Y=0 origin at the top of the screen, and joystick event
+reporting mode for joystick 1, with both buttons being logically assigned to
+the mouse. After any joystick command, the ikbd assumes that joysticks are
+connected to both Joystick0 and Joystick1. Any mouse command (except MOUSE
+DISABLE) then causes port 0 to again be scanned as if it were a mouse, and
+both buttons are logically connected to it. If a mouse disable command is
+received while port 0 is presumed to be a mouse, the button is logically
+assigned to Joystick1 (until the mouse is reenabled by another mouse command).
+
+9. ikbd Command Set
+
+This section contains a list of commands that can be sent to the ikbd. Command
+codes (such as 0x00) which are not specified should perform no operation
+(NOPs).
+
+9.1 RESET
+
+ 0x80
+ 0x01
+
+N.B. The RESET command is the only two byte command understood by the ikbd.
+Any byte following an 0x80 command byte other than 0x01 is ignored (and causes
+the 0x80 to be ignored).
+A reset may also be caused by sending a break lasting at least 200mS to the
+ikbd.
+Executing the RESET command returns the keyboard to its default (power-up)
+mode and parameter settings. It does not affect the time-of-day clock.
+The RESET command or function causes the ikbd to perform a simple self-test.
+If the test is successful, the ikbd will send the code of 0xF0 within 300mS
+of receipt of the RESET command (or the end of the break, or power-up). The
+ikbd will then scan the key matrix for any stuck (closed) keys. Any keys found
+closed will cause the break scan code to be generated (the break code arriving
+without being preceded by the make code is a flag for a key matrix error).
+
+9.2. SET MOUSE BUTTON ACTION
+
+ 0x07
+ %00000mss ; mouse button action
+ ; (m is presumed = 1 when in MOUSE KEYCODE mode)
+ ; mss=0xy, mouse button press or release causes mouse
+ ; position report
+ ; where y=1, mouse key press causes absolute report
+ ; and x=1, mouse key release causes absolute report
+ ; mss=100, mouse buttons act like keys
+
+This command sets how the ikbd should treat the buttons on the mouse. The
+default mouse button action mode is %00000000, the buttons are treated as part
+of the mouse logically.
+When buttons act like keys, LEFT=0x74 & RIGHT=0x75.
+
+9.3 SET RELATIVE MOUSE POSITION REPORTING
+
+ 0x08
+
+Set relative mouse position reporting. (DEFAULT) Mouse position packets are
+generated asynchronously by the ikbd whenever motion exceeds the setable
+threshold in either axis (see SET MOUSE THRESHOLD). Depending upon the mouse
+key mode, mouse position reports may also be generated when either mouse
+button is pressed or released. Otherwise the mouse buttons behave as if they
+were keyboard keys.
+
+9.4 SET ABSOLUTE MOUSE POSITIONING
+
+ 0x09
+ XMSB ; X maximum (in scaled mouse clicks)
+ XLSB
+ YMSB ; Y maximum (in scaled mouse clicks)
+ YLSB
+
+Set absolute mouse position maintenance. Resets the ikbd maintained X and Y
+coordinates.
+In this mode, the value of the internally maintained coordinates does NOT wrap
+between 0 and large positive numbers. Excess motion below 0 is ignored. The
+command sets the maximum positive value that can be attained in the scaled
+coordinate system. Motion beyond that value is also ignored.
+
+9.5 SET MOUSE KEYCODE MOSE
+
+ 0x0A
+ deltax ; distance in X clicks to return (LEFT) or (RIGHT)
+ deltay ; distance in Y clicks to return (UP) or (DOWN)
+
+Set mouse monitoring routines to return cursor motion keycodes instead of
+either RELATIVE or ABSOLUTE motion records. The ikbd returns the appropriate
+cursor keycode after mouse travel exceeding the user specified deltas in
+either axis. When the keyboard is in key scan code mode, mouse motion will
+cause the make code immediately followed by the break code. Note that this
+command is not affected by the mouse motion origin.
+
+9..6 SET MOUSE THRESHOLD
+
+ 0x0B
+ X ; x threshold in mouse ticks (positive integers)
+ Y ; y threshold in mouse ticks (positive integers)
+
+This command sets the threshold before a mouse event is generated. Note that
+it does NOT affect the resolution of the data returned to the host. This
+command is valid only in RELATIVE MOUSE POSITIONING mode. The thresholds
+default to 1 at RESET (or power-up).
+
+9.7 SET MOUSE SCALE
+
+ 0x0C
+ X ; horizontal mouse ticks per internal X
+ Y ; vertical mouse ticks per internal Y
+
+This command sets the scale factor for the ABSOLUTE MOUSE POSITIONING mode.
+In this mode, the specified number of mouse phase changes ('clicks') must
+occur before the internally maintained coordinate is changed by one
+(independently scaled for each axis). Remember that the mouse position
+information is available only by interrogating the ikbd in the ABSOLUTE MOUSE
+POSITIONING mode unless the ikbd has been commanded to report on button press
+or release (see SET MOSE BUTTON ACTION).
+
+9.8 INTERROGATE MOUSE POSITION
+
+ 0x0D
+ Returns:
+ 0xF7 ; absolute mouse position header
+ BUTTONS
+ 0000dcba ; where a is right button down since last interrogation
+ ; b is right button up since last
+ ; c is left button down since last
+ ; d is left button up since last
+ XMSB ; X coordinate
+ XLSB
+ YMSB ; Y coordinate
+ YLSB
+
+The INTERROGATE MOUSE POSITION command is valid when in the ABSOLUTE MOUSE
+POSITIONING mode, regardless of the setting of the MOUSE BUTTON ACTION.
+
+9.9 LOAD MOUSE POSITION
+
+ 0x0E
+ 0x00 ; filler
+ XMSB ; X coordinate
+ XLSB ; (in scaled coordinate system)
+ YMSB ; Y coordinate
+ YLSB
+
+This command allows the user to preset the internally maintained absolute
+mouse position.
+
+9.10 SET Y=0 AT BOTTOM
+
+ 0x0F
+
+This command makes the origin of the Y axis to be at the bottom of the
+logical coordinate system internal to the ikbd for all relative or absolute
+mouse motion. This causes mouse motion toward the user to be negative in sign
+and away from the user to be positive.
+
+9.11 SET Y=0 AT TOP
+
+ 0x10
+
+Makes the origin of the Y axis to be at the top of the logical coordinate
+system within the ikbd for all relative or absolute mouse motion. (DEFAULT)
+This causes mouse motion toward the user to be positive in sign and away from
+the user to be negative.
+
+9.12 RESUME
+
+ 0x11
+
+Resume sending data to the host. Since any command received by the ikbd after
+its output has been paused also causes an implicit RESUME this command can be
+thought of as a NO OPERATION command. If this command is received by the ikbd
+and it is not PAUSED, it is simply ignored.
+
+9.13 DISABLE MOUSE
+
+ 0x12
+
+All mouse event reporting is disabled (and scanning may be internally
+disabled). Any valid mouse mode command resumes mouse motion monitoring. (The
+valid mouse mode commands are SET RELATIVE MOUSE POSITION REPORTING, SET
+ABSOLUTE MOUSE POSITIONING, and SET MOUSE KEYCODE MODE. )
+N.B. If the mouse buttons have been commanded to act like keyboard keys, this
+command DOES affect their actions.
+
+9.14 PAUSE OUTPUT
+
+ 0x13
+
+Stop sending data to the host until another valid command is received. Key
+matrix activity is still monitored and scan codes or ASCII characters enqueued
+(up to the maximum supported by the microcontroller) to be sent when the host
+allows the output to be resumed. If in the JOYSTICK EVENT REPORTING mode,
+joystick events are also queued.
+Mouse motion should be accumulated while the output is paused. If the ikbd is
+in RELATIVE MOUSE POSITIONING REPORTING mode, motion is accumulated beyond the
+normal threshold limits to produce the minimum number of packets necessary for
+transmission when output is resumed. Pressing or releasing either mouse button
+causes any accumulated motion to be immediately queued as packets, if the
+mouse is in RELATIVE MOUSE POSITION REPORTING mode.
+Because of the limitations of the microcontroller memory this command should
+be used sparingly, and the output should not be shut of for more than <tbd>
+milliseconds at a time.
+The output is stopped only at the end of the current 'even'. If the PAUSE
+OUTPUT command is received in the middle of a multiple byte report, the packet
+will still be transmitted to conclusion and then the PAUSE will take effect.
+When the ikbd is in either the JOYSTICK MONITORING mode or the FIRE BUTTON
+MONITORING mode, the PAUSE OUTPUT command also temporarily stops the
+monitoring process (i.e. the samples are not enqueued for transmission).
+
+0.15 SET JOYSTICK EVENT REPORTING
+
+ 0x14
+
+Enter JOYSTICK EVENT REPORTING mode (DEFAULT). Each opening or closure of a
+joystick switch or trigger causes a joystick event record to be generated.
+
+9.16 SET JOYSTICK INTERROGATION MODE
+
+ 0x15
+
+Disables JOYSTICK EVENT REPORTING. Host must send individual JOYSTICK
+INTERROGATE commands to sense joystick state.
+
+9.17 JOYSTICK INTERROGATE
+
+ 0x16
+
+Return a record indicating the current state of the joysticks. This command
+is valid in either the JOYSTICK EVENT REPORTING mode or the JOYSTICK
+INTERROGATION MODE.
+
+9.18 SET JOYSTICK MONITORING
+
+ 0x17
+ rate ; time between samples in hundredths of a second
+ Returns: (in packets of two as long as in mode)
+ %000000xy ; where y is JOYSTICK1 Fire button
+ ; and x is JOYSTICK0 Fire button
+ %nnnnmmmm ; where m is JOYSTICK1 state
+ ; and n is JOYSTICK0 state
+
+Sets the ikbd to do nothing but monitor the serial command line, maintain the
+time-of-day clock, and monitor the joystick. The rate sets the interval
+between joystick samples.
+N.B. The user should not set the rate higher than the serial communications
+channel will allow the 2 bytes packets to be transmitted.
+
+9.19 SET FIRE BUTTON MONITORING
+
+ 0x18
+ Returns: (as long as in mode)
+ %bbbbbbbb ; state of the JOYSTICK1 fire button packed
+ ; 8 bits per byte, the first sample if the MSB
+
+Set the ikbd to do nothing but monitor the serial command line, maintain the
+time-of-day clock, and monitor the fire button on Joystick 1. The fire button
+is scanned at a rate that causes 8 samples to be made in the time it takes for
+the previous byte to be sent to the host (i.e. scan rate = 8/10 * baud rate).
+The sample interval should be as constant as possible.
+
+9.20 SET JOYSTICK KEYCODE MODE
+
+ 0x19
+ RX ; length of time (in tenths of seconds) until
+ ; horizontal velocity breakpoint is reached
+ RY ; length of time (in tenths of seconds) until
+ ; vertical velocity breakpoint is reached
+ TX ; length (in tenths of seconds) of joystick closure
+ ; until horizontal cursor key is generated before RX
+ ; has elapsed
+ TY ; length (in tenths of seconds) of joystick closure
+ ; until vertical cursor key is generated before RY
+ ; has elapsed
+ VX ; length (in tenths of seconds) of joystick closure
+ ; until horizontal cursor keystrokes are generated
+ ; after RX has elapsed
+ VY ; length (in tenths of seconds) of joystick closure
+ ; until vertical cursor keystrokes are generated
+ ; after RY has elapsed
+
+In this mode, joystick 0 is scanned in a way that simulates cursor keystrokes.
+On initial closure, a keystroke pair (make/break) is generated. Then up to Rn
+tenths of seconds later, keystroke pairs are generated every Tn tenths of
+seconds. After the Rn breakpoint is reached, keystroke pairs are generated
+every Vn tenths of seconds. This provides a velocity (auto-repeat) breakpoint
+feature.
+Note that by setting RX and/or Ry to zero, the velocity feature can be
+disabled. The values of TX and TY then become meaningless, and the generation
+of cursor 'keystrokes' is set by VX and VY.
+
+9.21 DISABLE JOYSTICKS
+
+ 0x1A
+
+Disable the generation of any joystick events (and scanning may be internally
+disabled). Any valid joystick mode command resumes joystick monitoring. (The
+joystick mode commands are SET JOYSTICK EVENT REPORTING, SET JOYSTICK
+INTERROGATION MODE, SET JOYSTICK MONITORING, SET FIRE BUTTON MONITORING, and
+SET JOYSTICK KEYCODE MODE.)
+
+9.22 TIME-OF-DAY CLOCK SET
+
+ 0x1B
+ YY ; year (2 least significant digits)
+ MM ; month
+ DD ; day
+ hh ; hour
+ mm ; minute
+ ss ; second
+
+All time-of-day data should be sent to the ikbd in packed BCD format.
+Any digit that is not a valid BCD digit should be treated as a 'don't care'
+and not alter that particular field of the date or time. This permits setting
+only some subfields of the time-of-day clock.
+
+9.23 INTERROGATE TIME-OF-DAT CLOCK
+
+ 0x1C
+ Returns:
+ 0xFC ; time-of-day event header
+ YY ; year (2 least significant digits)
+ MM ; month
+ DD ; day
+ hh ; hour
+ mm ; minute
+ ss ; second
+
+ All time-of-day is sent in packed BCD format.
+
+9.24 MEMORY LOAD
+
+ 0x20
+ ADRMSB ; address in controller
+ ADRLSB ; memory to be loaded
+ NUM ; number of bytes (0-128)
+ { data }
+
+This command permits the host to load arbitrary values into the ikbd
+controller memory. The time between data bytes must be less than 20ms.
+
+9.25 MEMORY READ
+
+ 0x21
+ ADRMSB ; address in controller
+ ADRLSB ; memory to be read
+ Returns:
+ 0xF6 ; status header
+ 0x20 ; memory access
+ { data } ; 6 data bytes starting at ADR
+
+This command permits the host to read from the ikbd controller memory.
+
+9.26 CONTROLLER EXECUTE
+
+ 0x22
+ ADRMSB ; address of subroutine in
+ ADRLSB ; controller memory to be called
+
+This command allows the host to command the execution of a subroutine in the
+ikbd controller memory.
+
+9.27 STATUS INQUIRIES
+
+ Status commands are formed by inclusively ORing 0x80 with the
+ relevant SET command.
+
+ Example:
+ 0x88 (or 0x89 or 0x8A) ; request mouse mode
+ Returns:
+ 0xF6 ; status response header
+ mode ; 0x08 is RELATIVE
+ ; 0x09 is ABSOLUTE
+ ; 0x0A is KEYCODE
+ param1 ; 0 is RELATIVE
+ ; XMSB maximum if ABSOLUTE
+ ; DELTA X is KEYCODE
+ param2 ; 0 is RELATIVE
+ ; YMSB maximum if ABSOLUTE
+ ; DELTA Y is KEYCODE
+ param3 ; 0 if RELATIVE
+ ; or KEYCODE
+ ; YMSB is ABSOLUTE
+ param4 ; 0 if RELATIVE
+ ; or KEYCODE
+ ; YLSB is ABSOLUTE
+ 0 ; pad
+ 0
+
+The STATUS INQUIRY commands request the ikbd to return either the current mode
+or the parameters associated with a given command. All status reports are
+padded to form 8 byte long return packets. The responses to the status
+requests are designed so that the host may store them away (after stripping
+off the status report header byte) and later send them back as commands to
+ikbd to restore its state. The 0 pad bytes will be treated as NOPs by the
+ikbd.
+
+ Valid STATUS INQUIRY commands are:
+
+ 0x87 mouse button action
+ 0x88 mouse mode
+ 0x89
+ 0x8A
+ 0x8B mnouse threshold
+ 0x8C mouse scale
+ 0x8F mouse vertical coordinates
+ 0x90 ( returns 0x0F Y=0 at bottom
+ 0x10 Y=0 at top )
+ 0x92 mouse enable/disable
+ ( returns 0x00 enabled)
+ 0x12 disabled )
+ 0x94 joystick mode
+ 0x95
+ 0x96
+ 0x9A joystick enable/disable
+ ( returns 0x00 enabled
+ 0x1A disabled )
+
+It is the (host) programmer's responsibility to have only one unanswered
+inquiry in process at a time.
+STATUS INQUIRY commands are not valid if the ikbd is in JOYSTICK MONITORING
+mode or FIRE BUTTON MONITORING mode.
+
+
+10. SCAN CODES
+
+The key scan codes returned by the ikbd are chosen to simplify the
+implementation of GSX.
+
+GSX Standard Keyboard Mapping.
+
+Hex Keytop
+01 Esc
+02 1
+03 2
+04 3
+05 4
+06 5
+07 6
+08 7
+09 8
+0A 9
+0B 0
+0C -
+0D ==
+0E BS
+0F TAB
+10 Q
+11 W
+12 E
+13 R
+14 T
+15 Y
+16 U
+17 I
+18 O
+19 P
+1A [
+1B ]
+1C RET
+1D CTRL
+1E A
+1F S
+20 D
+21 F
+22 G
+23 H
+24 J
+25 K
+26 L
+27 ;
+28 '
+29 `
+2A (LEFT) SHIFT
+2B \
+2C Z
+2D X
+2E C
+2F V
+30 B
+31 N
+32 M
+33 ,
+34 .
+35 /
+36 (RIGHT) SHIFT
+37 { NOT USED }
+38 ALT
+39 SPACE BAR
+3A CAPS LOCK
+3B F1
+3C F2
+3D F3
+3E F4
+3F F5
+40 F6
+41 F7
+42 F8
+43 F9
+44 F10
+45 { NOT USED }
+46 { NOT USED }
+47 HOME
+48 UP ARROW
+49 { NOT USED }
+4A KEYPAD -
+4B LEFT ARROW
+4C { NOT USED }
+4D RIGHT ARROW
+4E KEYPAD +
+4F { NOT USED }
+50 DOWN ARROW
+51 { NOT USED }
+52 INSERT
+53 DEL
+54 { NOT USED }
+5F { NOT USED }
+60 ISO KEY
+61 UNDO
+62 HELP
+63 KEYPAD (
+64 KEYPAD /
+65 KEYPAD *
+66 KEYPAD *
+67 KEYPAD 7
+68 KEYPAD 8
+69 KEYPAD 9
+6A KEYPAD 4
+6B KEYPAD 5
+6C KEYPAD 6
+6D KEYPAD 1
+6E KEYPAD 2
+6F KEYPAD 3
+70 KEYPAD 0
+71 KEYPAD .
+72 KEYPAD ENTER
diff --git a/Documentation/input/bcm5974.txt b/Documentation/input/bcm5974.txt
new file mode 100644
index 000000000..74d3876d6
--- /dev/null
+++ b/Documentation/input/bcm5974.txt
@@ -0,0 +1,65 @@
+BCM5974 Driver (bcm5974)
+------------------------
+ Copyright (C) 2008-2009 Henrik Rydberg <rydberg@euromail.se>
+
+The USB initialization and package decoding was made by Scott Shawcroft as
+part of the touchd user-space driver project:
+ Copyright (C) 2008 Scott Shawcroft (scott.shawcroft@gmail.com)
+
+The BCM5974 driver is based on the appletouch driver:
+ Copyright (C) 2001-2004 Greg Kroah-Hartman (greg@kroah.com)
+ Copyright (C) 2005 Johannes Berg (johannes@sipsolutions.net)
+ Copyright (C) 2005 Stelian Pop (stelian@popies.net)
+ Copyright (C) 2005 Frank Arnold (frank@scirocco-5v-turbo.de)
+ Copyright (C) 2005 Peter Osterlund (petero2@telia.com)
+ Copyright (C) 2005 Michael Hanselmann (linux-kernel@hansmi.ch)
+ Copyright (C) 2006 Nicolas Boichat (nicolas@boichat.ch)
+
+This driver adds support for the multi-touch trackpad on the new Apple
+Macbook Air and Macbook Pro laptops. It replaces the appletouch driver on
+those computers, and integrates well with the synaptics driver of the Xorg
+system.
+
+Known to work on Macbook Air, Macbook Pro Penryn and the new unibody
+Macbook 5 and Macbook Pro 5.
+
+Usage
+-----
+
+The driver loads automatically for the supported usb device ids, and
+becomes available both as an event device (/dev/input/event*) and as a
+mouse via the mousedev driver (/dev/input/mice).
+
+USB Race
+--------
+
+The Apple multi-touch trackpads report both mouse and keyboard events via
+different interfaces of the same usb device. This creates a race condition
+with the HID driver, which, if not told otherwise, will find the standard
+HID mouse and keyboard, and claim the whole device. To remedy, the usb
+product id must be listed in the mouse_ignore list of the hid driver.
+
+Debug output
+------------
+
+To ease the development for new hardware version, verbose packet output can
+be switched on with the debug kernel module parameter. The range [1-9]
+yields different levels of verbosity. Example (as root):
+
+echo -n 9 > /sys/module/bcm5974/parameters/debug
+
+tail -f /var/log/debug
+
+echo -n 0 > /sys/module/bcm5974/parameters/debug
+
+Trivia
+------
+
+The driver was developed at the ubuntu forums in June 2008 [1], and now has
+a more permanent home at bitmath.org [2].
+
+Links
+-----
+
+[1] http://ubuntuforums.org/showthread.php?t=840040
+[2] http://bitmath.org/code/
diff --git a/Documentation/input/cd32.txt b/Documentation/input/cd32.txt
new file mode 100644
index 000000000..a003d9b41
--- /dev/null
+++ b/Documentation/input/cd32.txt
@@ -0,0 +1,19 @@
+I have written a small patch that let's me use my Amiga CD32
+joypad connected to the parallel port. Thought I'd share it with you so
+you can add it to the list of supported joysticks (hopefully someone will
+find it useful).
+
+It needs the following wiring:
+
+CD32 pad | Parallel port
+----------------------------
+1 (Up) | 2 (D0)
+2 (Down) | 3 (D1)
+3 (Left) | 4 (D2)
+4 (Right) | 5 (D3)
+5 (Fire3) | 14 (AUTOFD)
+6 (Fire1) | 17 (SELIN)
+7 (+5V) | 1 (STROBE)
+8 (Gnd) | 18 (Gnd)
+9 (Fire2) | 7 (D5)
+
diff --git a/Documentation/input/cma3000_d0x.txt b/Documentation/input/cma3000_d0x.txt
new file mode 100644
index 000000000..29d088db4
--- /dev/null
+++ b/Documentation/input/cma3000_d0x.txt
@@ -0,0 +1,115 @@
+Kernel driver for CMA3000-D0x
+============================
+
+Supported chips:
+* VTI CMA3000-D0x
+Datasheet:
+ CMA3000-D0X Product Family Specification 8281000A.02.pdf
+ <http://www.vti.fi/en/>
+
+Author: Hemanth V <hemanthv@ti.com>
+
+
+Description
+-----------
+CMA3000 Tri-axis accelerometer supports Motion detect, Measurement and
+Free fall modes.
+
+Motion Detect Mode: Its the low power mode where interrupts are generated only
+when motion exceeds the defined thresholds.
+
+Measurement Mode: This mode is used to read the acceleration data on X,Y,Z
+axis and supports 400, 100, 40 Hz sample frequency.
+
+Free fall Mode: This mode is intended to save system resources.
+
+Threshold values: Chip supports defining threshold values for above modes
+which includes time and g value. Refer product specifications for more details.
+
+CMA3000 chip supports mutually exclusive I2C and SPI interfaces for
+communication, currently the driver supports I2C based communication only.
+Initial configuration for bus mode is set in non volatile memory and can later
+be modified through bus interface command.
+
+Driver reports acceleration data through input subsystem. It generates ABS_MISC
+event with value 1 when free fall is detected.
+
+Platform data need to be configured for initial default values.
+
+Platform Data
+-------------
+fuzz_x: Noise on X Axis
+
+fuzz_y: Noise on Y Axis
+
+fuzz_z: Noise on Z Axis
+
+g_range: G range in milli g i.e 2000 or 8000
+
+mode: Default Operating mode
+
+mdthr: Motion detect g range threshold value
+
+mdfftmr: Motion detect and free fall time threshold value
+
+ffthr: Free fall g range threshold value
+
+Input Interface
+--------------
+Input driver version is 1.0.0
+Input device ID: bus 0x18 vendor 0x0 product 0x0 version 0x0
+Input device name: "cma3000-accelerometer"
+Supported events:
+ Event type 0 (Sync)
+ Event type 3 (Absolute)
+ Event code 0 (X)
+ Value 47
+ Min -8000
+ Max 8000
+ Fuzz 200
+ Event code 1 (Y)
+ Value -28
+ Min -8000
+ Max 8000
+ Fuzz 200
+ Event code 2 (Z)
+ Value 905
+ Min -8000
+ Max 8000
+ Fuzz 200
+ Event code 40 (Misc)
+ Value 0
+ Min 0
+ Max 1
+ Event type 4 (Misc)
+
+
+Register/Platform parameters Description
+----------------------------------------
+
+mode:
+ 0: power down mode
+ 1: 100 Hz Measurement mode
+ 2: 400 Hz Measurement mode
+ 3: 40 Hz Measurement mode
+ 4: Motion Detect mode (default)
+ 5: 100 Hz Free fall mode
+ 6: 40 Hz Free fall mode
+ 7: Power off mode
+
+grange:
+ 2000: 2000 mg or 2G Range
+ 8000: 8000 mg or 8G Range
+
+mdthr:
+ X: X * 71mg (8G Range)
+ X: X * 18mg (2G Range)
+
+mdfftmr:
+ X: (X & 0x70) * 100 ms (MDTMR)
+ (X & 0x0F) * 2.5 ms (FFTMR 400 Hz)
+ (X & 0x0F) * 10 ms (FFTMR 100 Hz)
+
+ffthr:
+ X: (X >> 2) * 18mg (2G Range)
+ X: (X & 0x0F) * 71 mg (8G Range)
diff --git a/Documentation/input/cs461x.txt b/Documentation/input/cs461x.txt
new file mode 100644
index 000000000..202e9dbac
--- /dev/null
+++ b/Documentation/input/cs461x.txt
@@ -0,0 +1,45 @@
+Preface.
+
+This is a new low-level driver to support analog joystick attached to
+Crystal SoundFusion CS4610/CS4612/CS4615. This code is based upon
+Vortex/Solo drivers as an example of decoration style, and ALSA
+0.5.8a kernel drivers as an chipset documentation and samples.
+
+This version does not have cooked mode support; the basic code
+is present here, but have not tested completely. The button analysis
+is completed in this mode, but the axis movement is not.
+
+Raw mode works fine with analog joystick front-end driver and cs461x
+driver as a backend. I've tested this driver with CS4610, 4-axis and
+4-button joystick; I mean the jstest utility. Also I've tried to
+play in xracer game using joystick, and the result is better than
+keyboard only mode.
+
+The sensitivity and calibrate quality have not been tested; the two
+reasons are performed: the same hardware cannot work under Win95 (blue
+screen in VJOYD); I have no documentation on my chip; and the existing
+behavior in my case was not raised the requirement of joystick calibration.
+So the driver have no code to perform hardware related calibration.
+
+The patch contains minor changes of Config.in and Makefile files. All
+needed code have been moved to one separate file cs461x.c like ns558.c
+This driver have the basic support for PCI devices only; there is no
+ISA or PnP ISA cards supported. AFAIK the ns558 have support for Crystal
+ISA and PnP ISA series.
+
+The driver works with ALSA drivers simultaneously. For example, the xracer
+uses joystick as input device and PCM device as sound output in one time.
+There are no sound or input collisions detected. The source code have
+comments about them; but I've found the joystick can be initialized
+separately of ALSA modules. So, you can use only one joystick driver
+without ALSA drivers. The ALSA drivers are not needed to compile or
+run this driver.
+
+There are no debug information print have been placed in source, and no
+specific options required to work this driver. The found chipset parameters
+are printed via printk(KERN_INFO "..."), see the /var/log/messages to
+inspect cs461x: prefixed messages to determine possible card detection
+errors.
+
+Regards,
+Viktor
diff --git a/Documentation/input/edt-ft5x06.txt b/Documentation/input/edt-ft5x06.txt
new file mode 100644
index 000000000..2032f0b7a
--- /dev/null
+++ b/Documentation/input/edt-ft5x06.txt
@@ -0,0 +1,54 @@
+EDT ft5x06 based Polytouch devices
+----------------------------------
+
+The edt-ft5x06 driver is useful for the EDT "Polytouch" family of capacitive
+touch screens. Note that it is *not* suitable for other devices based on the
+focaltec ft5x06 devices, since they contain vendor-specific firmware. In
+particular this driver is not suitable for the Nook tablet.
+
+It has been tested with the following devices:
+ * EP0350M06
+ * EP0430M06
+ * EP0570M06
+ * EP0700M06
+
+The driver allows configuration of the touch screen via a set of sysfs files:
+
+/sys/class/input/eventX/device/device/threshold:
+ allows setting the "click"-threshold in the range from 20 to 80.
+
+/sys/class/input/eventX/device/device/gain:
+ allows setting the sensitivity in the range from 0 to 31. Note that
+ lower values indicate higher sensitivity.
+
+/sys/class/input/eventX/device/device/offset:
+ allows setting the edge compensation in the range from 0 to 31.
+
+/sys/class/input/eventX/device/device/report_rate:
+ allows setting the report rate in the range from 3 to 14.
+
+
+For debugging purposes the driver provides a few files in the debug
+filesystem (if available in the kernel). In /sys/kernel/debug/edt_ft5x06
+you'll find the following files:
+
+num_x, num_y:
+ (readonly) contains the number of sensor fields in X- and
+ Y-direction.
+
+mode:
+ allows switching the sensor between "factory mode" and "operation
+ mode" by writing "1" or "0" to it. In factory mode (1) it is
+ possible to get the raw data from the sensor. Note that in factory
+ mode regular events don't get delivered and the options described
+ above are unavailable.
+
+raw_data:
+ contains num_x * num_y big endian 16 bit values describing the raw
+ values for each sensor field. Note that each read() call on this
+ files triggers a new readout. It is recommended to provide a buffer
+ big enough to contain num_x * num_y * 2 bytes.
+
+Note that reading raw_data gives a I/O error when the device is not in factory
+mode. The same happens when reading/writing to the parameter files when the
+device is not in regular operation mode.
diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt
new file mode 100644
index 000000000..1ec0db787
--- /dev/null
+++ b/Documentation/input/elantech.txt
@@ -0,0 +1,817 @@
+Elantech Touchpad Driver
+========================
+
+ Copyright (C) 2007-2008 Arjan Opmeer <arjan@opmeer.net>
+
+ Extra information for hardware version 1 found and
+ provided by Steve Havelka
+
+ Version 2 (EeePC) hardware support based on patches
+ received from Woody at Xandros and forwarded to me
+ by user StewieGriffin at the eeeuser.com forum
+
+
+Contents
+~~~~~~~~
+
+ 1. Introduction
+ 2. Extra knobs
+ 3. Differentiating hardware versions
+ 4. Hardware version 1
+ 4.1 Registers
+ 4.2 Native relative mode 4 byte packet format
+ 4.3 Native absolute mode 4 byte packet format
+ 5. Hardware version 2
+ 5.1 Registers
+ 5.2 Native absolute mode 6 byte packet format
+ 5.2.1 Parity checking and packet re-synchronization
+ 5.2.2 One/Three finger touch
+ 5.2.3 Two finger touch
+ 6. Hardware version 3
+ 6.1 Registers
+ 6.2 Native absolute mode 6 byte packet format
+ 6.2.1 One/Three finger touch
+ 6.2.2 Two finger touch
+ 7. Hardware version 4
+ 7.1 Registers
+ 7.2 Native absolute mode 6 byte packet format
+ 7.2.1 Status packet
+ 7.2.2 Head packet
+ 7.2.3 Motion packet
+ 8. Trackpoint (for Hardware version 3 and 4)
+ 8.1 Registers
+ 8.2 Native relative mode 6 byte packet format
+ 8.2.1 Status Packet
+
+
+
+1. Introduction
+ ~~~~~~~~~~~~
+
+Currently the Linux Elantech touchpad driver is aware of four different
+hardware versions unimaginatively called version 1,version 2, version 3
+and version 4. Version 1 is found in "older" laptops and uses 4 bytes per
+packet. Version 2 seems to be introduced with the EeePC and uses 6 bytes
+per packet, and provides additional features such as position of two fingers,
+and width of the touch. Hardware version 3 uses 6 bytes per packet (and
+for 2 fingers the concatenation of two 6 bytes packets) and allows tracking
+of up to 3 fingers. Hardware version 4 uses 6 bytes per packet, and can
+combine a status packet with multiple head or motion packets. Hardware version
+4 allows tracking up to 5 fingers.
+
+Some Hardware version 3 and version 4 also have a trackpoint which uses a
+separate packet format. It is also 6 bytes per packet.
+
+The driver tries to support both hardware versions and should be compatible
+with the Xorg Synaptics touchpad driver and its graphical configuration
+utilities.
+
+Note that a mouse button is also associated with either the touchpad or the
+trackpoint when a trackpoint is available. Disabling the Touchpad in xorg
+(TouchPadOff=0) will also disable the buttons associated with the touchpad.
+
+Additionally the operation of the touchpad can be altered by adjusting the
+contents of some of its internal registers. These registers are represented
+by the driver as sysfs entries under /sys/bus/serio/drivers/psmouse/serio?
+that can be read from and written to.
+
+Currently only the registers for hardware version 1 are somewhat understood.
+Hardware version 2 seems to use some of the same registers but it is not
+known whether the bits in the registers represent the same thing or might
+have changed their meaning.
+
+On top of that, some register settings have effect only when the touchpad is
+in relative mode and not in absolute mode. As the Linux Elantech touchpad
+driver always puts the hardware into absolute mode not all information
+mentioned below can be used immediately. But because there is no freely
+available Elantech documentation the information is provided here anyway for
+completeness sake.
+
+
+/////////////////////////////////////////////////////////////////////////////
+
+
+2. Extra knobs
+ ~~~~~~~~~~~
+
+Currently the Linux Elantech touchpad driver provides three extra knobs under
+/sys/bus/serio/drivers/psmouse/serio? for the user.
+
+* debug
+
+ Turn different levels of debugging ON or OFF.
+
+ By echoing "0" to this file all debugging will be turned OFF.
+
+ Currently a value of "1" will turn on some basic debugging and a value of
+ "2" will turn on packet debugging. For hardware version 1 the default is
+ OFF. For version 2 the default is "1".
+
+ Turning packet debugging on will make the driver dump every packet
+ received to the syslog before processing it. Be warned that this can
+ generate quite a lot of data!
+
+* paritycheck
+
+ Turns parity checking ON or OFF.
+
+ By echoing "0" to this file parity checking will be turned OFF. Any
+ non-zero value will turn it ON. For hardware version 1 the default is ON.
+ For version 2 the default it is OFF.
+
+ Hardware version 1 provides basic data integrity verification by
+ calculating a parity bit for the last 3 bytes of each packet. The driver
+ can check these bits and reject any packet that appears corrupted. Using
+ this knob you can bypass that check.
+
+ Hardware version 2 does not provide the same parity bits. Only some basic
+ data consistency checking can be done. For now checking is disabled by
+ default. Currently even turning it on will do nothing.
+
+* crc_enabled
+
+ Sets crc_enabled to 0/1. The name "crc_enabled" is the official name of
+ this integrity check, even though it is not an actual cyclic redundancy
+ check.
+
+ Depending on the state of crc_enabled, certain basic data integrity
+ verification is done by the driver on hardware version 3 and 4. The
+ driver will reject any packet that appears corrupted. Using this knob,
+ The state of crc_enabled can be altered with this knob.
+
+ Reading the crc_enabled value will show the active value. Echoing
+ "0" or "1" to this file will set the state to "0" or "1".
+
+/////////////////////////////////////////////////////////////////////////////
+
+3. Differentiating hardware versions
+ =================================
+
+To detect the hardware version, read the version number as param[0].param[1].param[2]
+
+ 4 bytes version: (after the arrow is the name given in the Dell-provided driver)
+ 02.00.22 => EF013
+ 02.06.00 => EF019
+In the wild, there appear to be more versions, such as 00.01.64, 01.00.21,
+02.00.00, 02.00.04, 02.00.06.
+
+ 6 bytes:
+ 02.00.30 => EF113
+ 02.08.00 => EF023
+ 02.08.XX => EF123
+ 02.0B.00 => EF215
+ 04.01.XX => Scroll_EF051
+ 04.02.XX => EF051
+In the wild, there appear to be more versions, such as 04.03.01, 04.04.11. There
+appears to be almost no difference, except for EF113, which does not report
+pressure/width and has different data consistency checks.
+
+Probably all the versions with param[0] <= 01 can be considered as
+4 bytes/firmware 1. The versions < 02.08.00, with the exception of 02.00.30, as
+4 bytes/firmware 2. Everything >= 02.08.00 can be considered as 6 bytes.
+
+/////////////////////////////////////////////////////////////////////////////
+
+4. Hardware version 1
+ ==================
+
+4.1 Registers
+ ~~~~~~~~~
+
+By echoing a hexadecimal value to a register it contents can be altered.
+
+For example:
+
+ echo -n 0x16 > reg_10
+
+* reg_10
+
+ bit 7 6 5 4 3 2 1 0
+ B C T D L A S E
+
+ E: 1 = enable smart edges unconditionally
+ S: 1 = enable smart edges only when dragging
+ A: 1 = absolute mode (needs 4 byte packets, see reg_11)
+ L: 1 = enable drag lock (see reg_22)
+ D: 1 = disable dynamic resolution
+ T: 1 = disable tapping
+ C: 1 = enable corner tap
+ B: 1 = swap left and right button
+
+* reg_11
+
+ bit 7 6 5 4 3 2 1 0
+ 1 0 0 H V 1 F P
+
+ P: 1 = enable parity checking for relative mode
+ F: 1 = enable native 4 byte packet mode
+ V: 1 = enable vertical scroll area
+ H: 1 = enable horizontal scroll area
+
+* reg_20
+
+ single finger width?
+
+* reg_21
+
+ scroll area width (small: 0x40 ... wide: 0xff)
+
+* reg_22
+
+ drag lock time out (short: 0x14 ... long: 0xfe;
+ 0xff = tap again to release)
+
+* reg_23
+
+ tap make timeout?
+
+* reg_24
+
+ tap release timeout?
+
+* reg_25
+
+ smart edge cursor speed (0x02 = slow, 0x03 = medium, 0x04 = fast)
+
+* reg_26
+
+ smart edge activation area width?
+
+
+4.2 Native relative mode 4 byte packet format
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+byte 0:
+ bit 7 6 5 4 3 2 1 0
+ c c p2 p1 1 M R L
+
+ L, R, M = 1 when Left, Right, Middle mouse button pressed
+ some models have M as byte 3 odd parity bit
+ when parity checking is enabled (reg_11, P = 1):
+ p1..p2 = byte 1 and 2 odd parity bit
+ c = 1 when corner tap detected
+
+byte 1:
+ bit 7 6 5 4 3 2 1 0
+ dx7 dx6 dx5 dx4 dx3 dx2 dx1 dx0
+
+ dx7..dx0 = x movement; positive = right, negative = left
+ byte 1 = 0xf0 when corner tap detected
+
+byte 2:
+ bit 7 6 5 4 3 2 1 0
+ dy7 dy6 dy5 dy4 dy3 dy2 dy1 dy0
+
+ dy7..dy0 = y movement; positive = up, negative = down
+
+byte 3:
+ parity checking enabled (reg_11, P = 1):
+
+ bit 7 6 5 4 3 2 1 0
+ w h n1 n0 ds3 ds2 ds1 ds0
+
+ normally:
+ ds3..ds0 = scroll wheel amount and direction
+ positive = down or left
+ negative = up or right
+ when corner tap detected:
+ ds0 = 1 when top right corner tapped
+ ds1 = 1 when bottom right corner tapped
+ ds2 = 1 when bottom left corner tapped
+ ds3 = 1 when top left corner tapped
+ n1..n0 = number of fingers on touchpad
+ only models with firmware 2.x report this, models with
+ firmware 1.x seem to map one, two and three finger taps
+ directly to L, M and R mouse buttons
+ h = 1 when horizontal scroll action
+ w = 1 when wide finger touch?
+
+ otherwise (reg_11, P = 0):
+
+ bit 7 6 5 4 3 2 1 0
+ ds7 ds6 ds5 ds4 ds3 ds2 ds1 ds0
+
+ ds7..ds0 = vertical scroll amount and direction
+ negative = up
+ positive = down
+
+
+4.3 Native absolute mode 4 byte packet format
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+EF013 and EF019 have a special behaviour (due to a bug in the firmware?), and
+when 1 finger is touching, the first 2 position reports must be discarded.
+This counting is reset whenever a different number of fingers is reported.
+
+byte 0:
+ firmware version 1.x:
+
+ bit 7 6 5 4 3 2 1 0
+ D U p1 p2 1 p3 R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ p1..p3 = byte 1..3 odd parity bit
+ D, U = 1 when rocker switch pressed Up, Down
+
+ firmware version 2.x:
+
+ bit 7 6 5 4 3 2 1 0
+ n1 n0 p2 p1 1 p3 R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ p1..p3 = byte 1..3 odd parity bit
+ n1..n0 = number of fingers on touchpad
+
+byte 1:
+ firmware version 1.x:
+
+ bit 7 6 5 4 3 2 1 0
+ f 0 th tw x9 x8 y9 y8
+
+ tw = 1 when two finger touch
+ th = 1 when three finger touch
+ f = 1 when finger touch
+
+ firmware version 2.x:
+
+ bit 7 6 5 4 3 2 1 0
+ . . . . x9 x8 y9 y8
+
+byte 2:
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+
+ x9..x0 = absolute x value (horizontal)
+
+byte 3:
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+ y9..y0 = absolute y value (vertical)
+
+
+/////////////////////////////////////////////////////////////////////////////
+
+
+5. Hardware version 2
+ ==================
+
+
+5.1 Registers
+ ~~~~~~~~~
+
+By echoing a hexadecimal value to a register it contents can be altered.
+
+For example:
+
+ echo -n 0x56 > reg_10
+
+* reg_10
+
+ bit 7 6 5 4 3 2 1 0
+ 0 1 0 1 0 1 D 0
+
+ D: 1 = enable drag and drop
+
+* reg_11
+
+ bit 7 6 5 4 3 2 1 0
+ 1 0 0 0 S 0 1 0
+
+ S: 1 = enable vertical scroll
+
+* reg_21
+
+ unknown (0x00)
+
+* reg_22
+
+ drag and drop release time out (short: 0x70 ... long 0x7e;
+ 0x7f = never i.e. tap again to release)
+
+
+5.2 Native absolute mode 6 byte packet format
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+5.2.1 Parity checking and packet re-synchronization
+There is no parity checking, however some consistency checks can be performed.
+
+For instance for EF113:
+ SA1= packet[0];
+ A1 = packet[1];
+ B1 = packet[2];
+ SB1= packet[3];
+ C1 = packet[4];
+ D1 = packet[5];
+ if( (((SA1 & 0x3C) != 0x3C) && ((SA1 & 0xC0) != 0x80)) || // check Byte 1
+ (((SA1 & 0x0C) != 0x0C) && ((SA1 & 0xC0) == 0x80)) || // check Byte 1 (one finger pressed)
+ (((SA1 & 0xC0) != 0x80) && (( A1 & 0xF0) != 0x00)) || // check Byte 2
+ (((SB1 & 0x3E) != 0x38) && ((SA1 & 0xC0) != 0x80)) || // check Byte 4
+ (((SB1 & 0x0E) != 0x08) && ((SA1 & 0xC0) == 0x80)) || // check Byte 4 (one finger pressed)
+ (((SA1 & 0xC0) != 0x80) && (( C1 & 0xF0) != 0x00)) ) // check Byte 5
+ // error detected
+
+For all the other ones, there are just a few constant bits:
+ if( ((packet[0] & 0x0C) != 0x04) ||
+ ((packet[3] & 0x0f) != 0x02) )
+ // error detected
+
+
+In case an error is detected, all the packets are shifted by one (and packet[0] is discarded).
+
+5.2.2 One/Three finger touch
+ ~~~~~~~~~~~~~~~~
+
+byte 0:
+
+ bit 7 6 5 4 3 2 1 0
+ n1 n0 w3 w2 . . R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ n1..n0 = number of fingers on touchpad
+
+byte 1:
+
+ bit 7 6 5 4 3 2 1 0
+ p7 p6 p5 p4 x11 x10 x9 x8
+
+byte 2:
+
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+
+ x11..x0 = absolute x value (horizontal)
+
+byte 3:
+
+ bit 7 6 5 4 3 2 1 0
+ n4 vf w1 w0 . . . b2
+
+ n4 = set if more than 3 fingers (only in 3 fingers mode)
+ vf = a kind of flag ? (only on EF123, 0 when finger is over one
+ of the buttons, 1 otherwise)
+ w3..w0 = width of the finger touch (not EF113)
+ b2 (on EF113 only, 0 otherwise), b2.R.L indicates one button pressed:
+ 0 = none
+ 1 = Left
+ 2 = Right
+ 3 = Middle (Left and Right)
+ 4 = Forward
+ 5 = Back
+ 6 = Another one
+ 7 = Another one
+
+byte 4:
+
+ bit 7 6 5 4 3 2 1 0
+ p3 p1 p2 p0 y11 y10 y9 y8
+
+ p7..p0 = pressure (not EF113)
+
+byte 5:
+
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+ y11..y0 = absolute y value (vertical)
+
+
+5.2.3 Two finger touch
+ ~~~~~~~~~~~~~~~~
+
+Note that the two pairs of coordinates are not exactly the coordinates of the
+two fingers, but only the pair of the lower-left and upper-right coordinates.
+So the actual fingers might be situated on the other diagonal of the square
+defined by these two points.
+
+byte 0:
+
+ bit 7 6 5 4 3 2 1 0
+ n1 n0 ay8 ax8 . . R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ n1..n0 = number of fingers on touchpad
+
+byte 1:
+
+ bit 7 6 5 4 3 2 1 0
+ ax7 ax6 ax5 ax4 ax3 ax2 ax1 ax0
+
+ ax8..ax0 = lower-left finger absolute x value
+
+byte 2:
+
+ bit 7 6 5 4 3 2 1 0
+ ay7 ay6 ay5 ay4 ay3 ay2 ay1 ay0
+
+ ay8..ay0 = lower-left finger absolute y value
+
+byte 3:
+
+ bit 7 6 5 4 3 2 1 0
+ . . by8 bx8 . . . .
+
+byte 4:
+
+ bit 7 6 5 4 3 2 1 0
+ bx7 bx6 bx5 bx4 bx3 bx2 bx1 bx0
+
+ bx8..bx0 = upper-right finger absolute x value
+
+byte 5:
+
+ bit 7 6 5 4 3 2 1 0
+ by7 by8 by5 by4 by3 by2 by1 by0
+
+ by8..by0 = upper-right finger absolute y value
+
+/////////////////////////////////////////////////////////////////////////////
+
+6. Hardware version 3
+ ==================
+
+6.1 Registers
+ ~~~~~~~~~
+* reg_10
+
+ bit 7 6 5 4 3 2 1 0
+ 0 0 0 0 R F T A
+
+ A: 1 = enable absolute tracking
+ T: 1 = enable two finger mode auto correct
+ F: 1 = disable ABS Position Filter
+ R: 1 = enable real hardware resolution
+
+6.2 Native absolute mode 6 byte packet format
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+1 and 3 finger touch shares the same 6-byte packet format, except that
+3 finger touch only reports the position of the center of all three fingers.
+
+Firmware would send 12 bytes of data for 2 finger touch.
+
+Note on debounce:
+In case the box has unstable power supply or other electricity issues, or
+when number of finger changes, F/W would send "debounce packet" to inform
+driver that the hardware is in debounce status.
+The debouce packet has the following signature:
+ byte 0: 0xc4
+ byte 1: 0xff
+ byte 2: 0xff
+ byte 3: 0x02
+ byte 4: 0xff
+ byte 5: 0xff
+When we encounter this kind of packet, we just ignore it.
+
+6.2.1 One/Three finger touch
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+byte 0:
+
+ bit 7 6 5 4 3 2 1 0
+ n1 n0 w3 w2 0 1 R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ n1..n0 = number of fingers on touchpad
+
+byte 1:
+
+ bit 7 6 5 4 3 2 1 0
+ p7 p6 p5 p4 x11 x10 x9 x8
+
+byte 2:
+
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+
+ x11..x0 = absolute x value (horizontal)
+
+byte 3:
+
+ bit 7 6 5 4 3 2 1 0
+ 0 0 w1 w0 0 0 1 0
+
+ w3..w0 = width of the finger touch
+
+byte 4:
+
+ bit 7 6 5 4 3 2 1 0
+ p3 p1 p2 p0 y11 y10 y9 y8
+
+ p7..p0 = pressure
+
+byte 5:
+
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+ y11..y0 = absolute y value (vertical)
+
+6.2.2 Two finger touch
+ ~~~~~~~~~~~~~~~~
+
+The packet format is exactly the same for two finger touch, except the hardware
+sends two 6 byte packets. The first packet contains data for the first finger,
+the second packet has data for the second finger. So for two finger touch a
+total of 12 bytes are sent.
+
+/////////////////////////////////////////////////////////////////////////////
+
+7. Hardware version 4
+ ==================
+
+7.1 Registers
+ ~~~~~~~~~
+* reg_07
+
+ bit 7 6 5 4 3 2 1 0
+ 0 0 0 0 0 0 0 A
+
+ A: 1 = enable absolute tracking
+
+7.2 Native absolute mode 6 byte packet format
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+v4 hardware is a true multitouch touchpad, capable of tracking up to 5 fingers.
+Unfortunately, due to PS/2's limited bandwidth, its packet format is rather
+complex.
+
+Whenever the numbers or identities of the fingers changes, the hardware sends a
+status packet to indicate how many and which fingers is on touchpad, followed by
+head packets or motion packets. A head packet contains data of finger id, finger
+position (absolute x, y values), width, and pressure. A motion packet contains
+two fingers' position delta.
+
+For example, when status packet tells there are 2 fingers on touchpad, then we
+can expect two following head packets. If the finger status doesn't change,
+the following packets would be motion packets, only sending delta of finger
+position, until we receive a status packet.
+
+One exception is one finger touch. when a status packet tells us there is only
+one finger, the hardware would just send head packets afterwards.
+
+7.2.1 Status packet
+ ~~~~~~~~~~~~~
+
+byte 0:
+
+ bit 7 6 5 4 3 2 1 0
+ . . . . 0 1 R L
+
+ L, R = 1 when Left, Right mouse button pressed
+
+byte 1:
+
+ bit 7 6 5 4 3 2 1 0
+ . . . ft4 ft3 ft2 ft1 ft0
+
+ ft4 ft3 ft2 ft1 ft0 ftn = 1 when finger n is on touchpad
+
+byte 2: not used
+
+byte 3:
+
+ bit 7 6 5 4 3 2 1 0
+ . . . 1 0 0 0 0
+
+ constant bits
+
+byte 4:
+
+ bit 7 6 5 4 3 2 1 0
+ p . . . . . . .
+
+ p = 1 for palm
+
+byte 5: not used
+
+7.2.2 Head packet
+ ~~~~~~~~~~~
+
+byte 0:
+
+ bit 7 6 5 4 3 2 1 0
+ w3 w2 w1 w0 0 1 R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ w3..w0 = finger width (spans how many trace lines)
+
+byte 1:
+
+ bit 7 6 5 4 3 2 1 0
+ p7 p6 p5 p4 x11 x10 x9 x8
+
+byte 2:
+
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+
+ x11..x0 = absolute x value (horizontal)
+
+byte 3:
+
+ bit 7 6 5 4 3 2 1 0
+ id2 id1 id0 1 0 0 0 1
+
+ id2..id0 = finger id
+
+byte 4:
+
+ bit 7 6 5 4 3 2 1 0
+ p3 p1 p2 p0 y11 y10 y9 y8
+
+ p7..p0 = pressure
+
+byte 5:
+
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+ y11..y0 = absolute y value (vertical)
+
+7.2.3 Motion packet
+ ~~~~~~~~~~~~~
+
+byte 0:
+
+ bit 7 6 5 4 3 2 1 0
+ id2 id1 id0 w 0 1 R L
+
+ L, R = 1 when Left, Right mouse button pressed
+ id2..id0 = finger id
+ w = 1 when delta overflows (> 127 or < -128), in this case
+ firmware sends us (delta x / 5) and (delta y / 5)
+
+byte 1:
+
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+
+ x7..x0 = delta x (two's complement)
+
+byte 2:
+
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+ y7..y0 = delta y (two's complement)
+
+byte 3:
+
+ bit 7 6 5 4 3 2 1 0
+ id2 id1 id0 1 0 0 1 0
+
+ id2..id0 = finger id
+
+byte 4:
+
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+
+ x7..x0 = delta x (two's complement)
+
+byte 5:
+
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+ y7..y0 = delta y (two's complement)
+
+ byte 0 ~ 2 for one finger
+ byte 3 ~ 5 for another
+
+
+8. Trackpoint (for Hardware version 3 and 4)
+ =========================================
+8.1 Registers
+ ~~~~~~~~~
+No special registers have been identified.
+
+8.2 Native relative mode 6 byte packet format
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+8.2.1 Status Packet
+ ~~~~~~~~~~~~~
+
+byte 0:
+ bit 7 6 5 4 3 2 1 0
+ 0 0 sx sy 0 M R L
+byte 1:
+ bit 7 6 5 4 3 2 1 0
+ ~sx 0 0 0 0 0 0 0
+byte 2:
+ bit 7 6 5 4 3 2 1 0
+ ~sy 0 0 0 0 0 0 0
+byte 3:
+ bit 7 6 5 4 3 2 1 0
+ 0 0 ~sy ~sx 0 1 1 0
+byte 4:
+ bit 7 6 5 4 3 2 1 0
+ x7 x6 x5 x4 x3 x2 x1 x0
+byte 5:
+ bit 7 6 5 4 3 2 1 0
+ y7 y6 y5 y4 y3 y2 y1 y0
+
+
+ x and y are written in two's complement spread
+ over 9 bits with sx/sy the relative top bit and
+ x7..x0 and y7..y0 the lower bits.
+ ~sx is the inverse of sx, ~sy is the inverse of sy.
+ The sign of y is opposite to what the input driver
+ expects for a relative movement
diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt
new file mode 100644
index 000000000..3f0f5ce33
--- /dev/null
+++ b/Documentation/input/event-codes.txt
@@ -0,0 +1,348 @@
+The input protocol uses a map of types and codes to express input device values
+to userspace. This document describes the types and codes and how and when they
+may be used.
+
+A single hardware event generates multiple input events. Each input event
+contains the new value of a single data item. A special event type, EV_SYN, is
+used to separate input events into packets of input data changes occurring at
+the same moment in time. In the following, the term "event" refers to a single
+input event encompassing a type, code, and value.
+
+The input protocol is a stateful protocol. Events are emitted only when values
+of event codes have changed. However, the state is maintained within the Linux
+input subsystem; drivers do not need to maintain the state and may attempt to
+emit unchanged values without harm. Userspace may obtain the current state of
+event code values using the EVIOCG* ioctls defined in linux/input.h. The event
+reports supported by a device are also provided by sysfs in
+class/input/event*/device/capabilities/, and the properties of a device are
+provided in class/input/event*/device/properties.
+
+Event types:
+===========
+Event types are groupings of codes under a logical input construct. Each
+type has a set of applicable codes to be used in generating events. See the
+Codes section for details on valid codes for each type.
+
+* EV_SYN:
+ - Used as markers to separate events. Events may be separated in time or in
+ space, such as with the multitouch protocol.
+
+* EV_KEY:
+ - Used to describe state changes of keyboards, buttons, or other key-like
+ devices.
+
+* EV_REL:
+ - Used to describe relative axis value changes, e.g. moving the mouse 5 units
+ to the left.
+
+* EV_ABS:
+ - Used to describe absolute axis value changes, e.g. describing the
+ coordinates of a touch on a touchscreen.
+
+* EV_MSC:
+ - Used to describe miscellaneous input data that do not fit into other types.
+
+* EV_SW:
+ - Used to describe binary state input switches.
+
+* EV_LED:
+ - Used to turn LEDs on devices on and off.
+
+* EV_SND:
+ - Used to output sound to devices.
+
+* EV_REP:
+ - Used for autorepeating devices.
+
+* EV_FF:
+ - Used to send force feedback commands to an input device.
+
+* EV_PWR:
+ - A special type for power button and switch input.
+
+* EV_FF_STATUS:
+ - Used to receive force feedback device status.
+
+Event codes:
+===========
+Event codes define the precise type of event.
+
+EV_SYN:
+----------
+EV_SYN event values are undefined. Their usage is defined only by when they are
+sent in the evdev event stream.
+
+* SYN_REPORT:
+ - Used to synchronize and separate events into packets of input data changes
+ occurring at the same moment in time. For example, motion of a mouse may set
+ the REL_X and REL_Y values for one motion, then emit a SYN_REPORT. The next
+ motion will emit more REL_X and REL_Y values and send another SYN_REPORT.
+
+* SYN_CONFIG:
+ - TBD
+
+* SYN_MT_REPORT:
+ - Used to synchronize and separate touch events. See the
+ multi-touch-protocol.txt document for more information.
+
+* SYN_DROPPED:
+ - Used to indicate buffer overrun in the evdev client's event queue.
+ Client should ignore all events up to and including next SYN_REPORT
+ event and query the device (using EVIOCG* ioctls) to obtain its
+ current state.
+
+EV_KEY:
+----------
+EV_KEY events take the form KEY_<name> or BTN_<name>. For example, KEY_A is used
+to represent the 'A' key on a keyboard. When a key is depressed, an event with
+the key's code is emitted with value 1. When the key is released, an event is
+emitted with value 0. Some hardware send events when a key is repeated. These
+events have a value of 2. In general, KEY_<name> is used for keyboard keys, and
+BTN_<name> is used for other types of momentary switch events.
+
+A few EV_KEY codes have special meanings:
+
+* BTN_TOOL_<name>:
+ - These codes are used in conjunction with input trackpads, tablets, and
+ touchscreens. These devices may be used with fingers, pens, or other tools.
+ When an event occurs and a tool is used, the corresponding BTN_TOOL_<name>
+ code should be set to a value of 1. When the tool is no longer interacting
+ with the input device, the BTN_TOOL_<name> code should be reset to 0. All
+ trackpads, tablets, and touchscreens should use at least one BTN_TOOL_<name>
+ code when events are generated.
+
+* BTN_TOUCH:
+ BTN_TOUCH is used for touch contact. While an input tool is determined to be
+ within meaningful physical contact, the value of this property must be set
+ to 1. Meaningful physical contact may mean any contact, or it may mean
+ contact conditioned by an implementation defined property. For example, a
+ touchpad may set the value to 1 only when the touch pressure rises above a
+ certain value. BTN_TOUCH may be combined with BTN_TOOL_<name> codes. For
+ example, a pen tablet may set BTN_TOOL_PEN to 1 and BTN_TOUCH to 0 while the
+ pen is hovering over but not touching the tablet surface.
+
+Note: For appropriate function of the legacy mousedev emulation driver,
+BTN_TOUCH must be the first evdev code emitted in a synchronization frame.
+
+Note: Historically a touch device with BTN_TOOL_FINGER and BTN_TOUCH was
+interpreted as a touchpad by userspace, while a similar device without
+BTN_TOOL_FINGER was interpreted as a touchscreen. For backwards compatibility
+with current userspace it is recommended to follow this distinction. In the
+future, this distinction will be deprecated and the device properties ioctl
+EVIOCGPROP, defined in linux/input.h, will be used to convey the device type.
+
+* BTN_TOOL_FINGER, BTN_TOOL_DOUBLETAP, BTN_TOOL_TRIPLETAP, BTN_TOOL_QUADTAP:
+ - These codes denote one, two, three, and four finger interaction on a
+ trackpad or touchscreen. For example, if the user uses two fingers and moves
+ them on the touchpad in an effort to scroll content on screen,
+ BTN_TOOL_DOUBLETAP should be set to value 1 for the duration of the motion.
+ Note that all BTN_TOOL_<name> codes and the BTN_TOUCH code are orthogonal in
+ purpose. A trackpad event generated by finger touches should generate events
+ for one code from each group. At most only one of these BTN_TOOL_<name>
+ codes should have a value of 1 during any synchronization frame.
+
+Note: Historically some drivers emitted multiple of the finger count codes with
+a value of 1 in the same synchronization frame. This usage is deprecated.
+
+Note: In multitouch drivers, the input_mt_report_finger_count() function should
+be used to emit these codes. Please see multi-touch-protocol.txt for details.
+
+EV_REL:
+----------
+EV_REL events describe relative changes in a property. For example, a mouse may
+move to the left by a certain number of units, but its absolute position in
+space is unknown. If the absolute position is known, EV_ABS codes should be used
+instead of EV_REL codes.
+
+A few EV_REL codes have special meanings:
+
+* REL_WHEEL, REL_HWHEEL:
+ - These codes are used for vertical and horizontal scroll wheels,
+ respectively.
+
+EV_ABS:
+----------
+EV_ABS events describe absolute changes in a property. For example, a touchpad
+may emit coordinates for a touch location.
+
+A few EV_ABS codes have special meanings:
+
+* ABS_DISTANCE:
+ - Used to describe the distance of a tool from an interaction surface. This
+ event should only be emitted while the tool is hovering, meaning in close
+ proximity of the device and while the value of the BTN_TOUCH code is 0. If
+ the input device may be used freely in three dimensions, consider ABS_Z
+ instead.
+
+* ABS_MT_<name>:
+ - Used to describe multitouch input events. Please see
+ multi-touch-protocol.txt for details.
+
+EV_SW:
+----------
+EV_SW events describe stateful binary switches. For example, the SW_LID code is
+used to denote when a laptop lid is closed.
+
+Upon binding to a device or resuming from suspend, a driver must report
+the current switch state. This ensures that the device, kernel, and userspace
+state is in sync.
+
+Upon resume, if the switch state is the same as before suspend, then the input
+subsystem will filter out the duplicate switch state reports. The driver does
+not need to keep the state of the switch at any time.
+
+EV_MSC:
+----------
+EV_MSC events are used for input and output events that do not fall under other
+categories.
+
+A few EV_MSC codes have special meaning:
+
+* MSC_TIMESTAMP:
+ - Used to report the number of microseconds since the last reset. This event
+ should be coded as an uint32 value, which is allowed to wrap around with
+ no special consequence. It is assumed that the time difference between two
+ consecutive events is reliable on a reasonable time scale (hours).
+ A reset to zero can happen, in which case the time since the last event is
+ unknown. If the device does not provide this information, the driver must
+ not provide it to user space.
+
+EV_LED:
+----------
+EV_LED events are used for input and output to set and query the state of
+various LEDs on devices.
+
+EV_REP:
+----------
+EV_REP events are used for specifying autorepeating events.
+
+EV_SND:
+----------
+EV_SND events are used for sending sound commands to simple sound output
+devices.
+
+EV_FF:
+----------
+EV_FF events are used to initialize a force feedback capable device and to cause
+such device to feedback.
+
+EV_PWR:
+----------
+EV_PWR events are a special type of event used specifically for power
+management. Its usage is not well defined. To be addressed later.
+
+Device properties:
+=================
+Normally, userspace sets up an input device based on the data it emits,
+i.e., the event types. In the case of two devices emitting the same event
+types, additional information can be provided in the form of device
+properties.
+
+INPUT_PROP_DIRECT + INPUT_PROP_POINTER:
+--------------------------------------
+The INPUT_PROP_DIRECT property indicates that device coordinates should be
+directly mapped to screen coordinates (not taking into account trivial
+transformations, such as scaling, flipping and rotating). Non-direct input
+devices require non-trivial transformation, such as absolute to relative
+transformation for touchpads. Typical direct input devices: touchscreens,
+drawing tablets; non-direct devices: touchpads, mice.
+
+The INPUT_PROP_POINTER property indicates that the device is not transposed
+on the screen and thus requires use of an on-screen pointer to trace user's
+movements. Typical pointer devices: touchpads, tablets, mice; non-pointer
+device: touchscreen.
+
+If neither INPUT_PROP_DIRECT or INPUT_PROP_POINTER are set, the property is
+considered undefined and the device type should be deduced in the
+traditional way, using emitted event types.
+
+INPUT_PROP_BUTTONPAD:
+--------------------
+For touchpads where the button is placed beneath the surface, such that
+pressing down on the pad causes a button click, this property should be
+set. Common in clickpad notebooks and macbooks from 2009 and onwards.
+
+Originally, the buttonpad property was coded into the bcm5974 driver
+version field under the name integrated button. For backwards
+compatibility, both methods need to be checked in userspace.
+
+INPUT_PROP_SEMI_MT:
+------------------
+Some touchpads, most common between 2008 and 2011, can detect the presence
+of multiple contacts without resolving the individual positions; only the
+number of contacts and a rectangular shape is known. For such
+touchpads, the semi-mt property should be set.
+
+Depending on the device, the rectangle may enclose all touches, like a
+bounding box, or just some of them, for instance the two most recent
+touches. The diversity makes the rectangle of limited use, but some
+gestures can normally be extracted from it.
+
+If INPUT_PROP_SEMI_MT is not set, the device is assumed to be a true MT
+device.
+
+INPUT_PROP_TOPBUTTONPAD:
+-----------------------
+Some laptops, most notably the Lenovo *40 series provide a trackstick
+device but do not have physical buttons associated with the trackstick
+device. Instead, the top area of the touchpad is marked to show
+visual/haptic areas for left, middle, right buttons intended to be used
+with the trackstick.
+
+If INPUT_PROP_TOPBUTTONPAD is set, userspace should emulate buttons
+accordingly. This property does not affect kernel behavior.
+The kernel does not provide button emulation for such devices but treats
+them as any other INPUT_PROP_BUTTONPAD device.
+
+INPUT_PROP_ACCELEROMETER
+-------------------------
+Directional axes on this device (absolute and/or relative x, y, z) represent
+accelerometer data. All other axes retain their meaning. A device must not mix
+regular directional axes and accelerometer axes on the same event node.
+
+Guidelines:
+==========
+The guidelines below ensure proper single-touch and multi-finger functionality.
+For multi-touch functionality, see the multi-touch-protocol.txt document for
+more information.
+
+Mice:
+----------
+REL_{X,Y} must be reported when the mouse moves. BTN_LEFT must be used to report
+the primary button press. BTN_{MIDDLE,RIGHT,4,5,etc.} should be used to report
+further buttons of the device. REL_WHEEL and REL_HWHEEL should be used to report
+scroll wheel events where available.
+
+Touchscreens:
+----------
+ABS_{X,Y} must be reported with the location of the touch. BTN_TOUCH must be
+used to report when a touch is active on the screen.
+BTN_{MOUSE,LEFT,MIDDLE,RIGHT} must not be reported as the result of touch
+contact. BTN_TOOL_<name> events should be reported where possible.
+
+For new hardware, INPUT_PROP_DIRECT should be set.
+
+Trackpads:
+----------
+Legacy trackpads that only provide relative position information must report
+events like mice described above.
+
+Trackpads that provide absolute touch position must report ABS_{X,Y} for the
+location of the touch. BTN_TOUCH should be used to report when a touch is active
+on the trackpad. Where multi-finger support is available, BTN_TOOL_<name> should
+be used to report the number of touches active on the trackpad.
+
+For new hardware, INPUT_PROP_POINTER should be set.
+
+Tablets:
+----------
+BTN_TOOL_<name> events must be reported when a stylus or other tool is active on
+the tablet. ABS_{X,Y} must be reported with the location of the tool. BTN_TOUCH
+should be used to report when the tool is in contact with the tablet.
+BTN_{STYLUS,STYLUS2} should be used to report buttons on the tool itself. Any
+button may be used for buttons on the tablet except BTN_{MOUSE,LEFT}.
+BTN_{0,1,2,etc} are good generic codes for unlabeled buttons. Do not use
+meaningful buttons, like BTN_FORWARD, unless the button is labeled for that
+purpose on the device.
+
+For new hardware, both INPUT_PROP_DIRECT and INPUT_PROP_POINTER should be set.
diff --git a/Documentation/input/ff.txt b/Documentation/input/ff.txt
new file mode 100644
index 000000000..b3867bf49
--- /dev/null
+++ b/Documentation/input/ff.txt
@@ -0,0 +1,221 @@
+Force feedback for Linux.
+By Johann Deneux <johann.deneux@gmail.com> on 2001/04/22.
+Updated by Anssi Hannula <anssi.hannula@gmail.com> on 2006/04/09.
+You may redistribute this file. Please remember to include shape.fig and
+interactive.fig as well.
+----------------------------------------------------------------------------
+
+1. Introduction
+~~~~~~~~~~~~~~~
+This document describes how to use force feedback devices under Linux. The
+goal is not to support these devices as if they were simple input-only devices
+(as it is already the case), but to really enable the rendering of force
+effects.
+This document only describes the force feedback part of the Linux input
+interface. Please read joystick.txt and input.txt before reading further this
+document.
+
+2. Instructions to the user
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+To enable force feedback, you have to:
+
+1. have your kernel configured with evdev and a driver that supports your
+ device.
+2. make sure evdev module is loaded and /dev/input/event* device files are
+ created.
+
+Before you start, let me WARN you that some devices shake violently during the
+initialisation phase. This happens for example with my "AVB Top Shot Pegasus".
+To stop this annoying behaviour, move you joystick to its limits. Anyway, you
+should keep a hand on your device, in order to avoid it to break down if
+something goes wrong.
+
+If you have a serial iforce device, you need to start inputattach. See
+joystick.txt for details.
+
+2.1 Does it work ?
+~~~~~~~~~~~~~~~~~~
+There is an utility called fftest that will allow you to test the driver.
+% fftest /dev/input/eventXX
+
+3. Instructions to the developer
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+All interactions are done using the event API. That is, you can use ioctl()
+and write() on /dev/input/eventXX.
+This information is subject to change.
+
+3.1 Querying device capabilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#include <linux/input.h>
+#include <sys/ioctl.h>
+
+#define BITS_TO_LONGS(x) \
+ (((x) + 8 * sizeof (unsigned long) - 1) / (8 * sizeof (unsigned long)))
+unsigned long features[BITS_TO_LONGS(FF_CNT)];
+int ioctl(int file_descriptor, int request, unsigned long *features);
+
+"request" must be EVIOCGBIT(EV_FF, size of features array in bytes )
+
+Returns the features supported by the device. features is a bitfield with the
+following bits:
+- FF_CONSTANT can render constant force effects
+- FF_PERIODIC can render periodic effects with the following waveforms:
+ - FF_SQUARE square waveform
+ - FF_TRIANGLE triangle waveform
+ - FF_SINE sine waveform
+ - FF_SAW_UP sawtooth up waveform
+ - FF_SAW_DOWN sawtooth down waveform
+ - FF_CUSTOM custom waveform
+- FF_RAMP can render ramp effects
+- FF_SPRING can simulate the presence of a spring
+- FF_FRICTION can simulate friction
+- FF_DAMPER can simulate damper effects
+- FF_RUMBLE rumble effects
+- FF_INERTIA can simulate inertia
+- FF_GAIN gain is adjustable
+- FF_AUTOCENTER autocenter is adjustable
+
+Note: In most cases you should use FF_PERIODIC instead of FF_RUMBLE. All
+ devices that support FF_RUMBLE support FF_PERIODIC (square, triangle,
+ sine) and the other way around.
+
+Note: The exact syntax FF_CUSTOM is undefined for the time being as no driver
+ supports it yet.
+
+
+int ioctl(int fd, EVIOCGEFFECTS, int *n);
+
+Returns the number of effects the device can keep in its memory.
+
+3.2 Uploading effects to the device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#include <linux/input.h>
+#include <sys/ioctl.h>
+
+int ioctl(int file_descriptor, int request, struct ff_effect *effect);
+
+"request" must be EVIOCSFF.
+
+"effect" points to a structure describing the effect to upload. The effect is
+uploaded, but not played.
+The content of effect may be modified. In particular, its field "id" is set
+to the unique id assigned by the driver. This data is required for performing
+some operations (removing an effect, controlling the playback).
+This if field must be set to -1 by the user in order to tell the driver to
+allocate a new effect.
+
+Effects are file descriptor specific.
+
+See <linux/input.h> for a description of the ff_effect struct. You should also
+find help in a few sketches, contained in files shape.fig and interactive.fig.
+You need xfig to visualize these files.
+
+3.3 Removing an effect from the device
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+int ioctl(int fd, EVIOCRMFF, effect.id);
+
+This makes room for new effects in the device's memory. Note that this also
+stops the effect if it was playing.
+
+3.4 Controlling the playback of effects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Control of playing is done with write(). Below is an example:
+
+#include <linux/input.h>
+#include <unistd.h>
+
+ struct input_event play;
+ struct input_event stop;
+ struct ff_effect effect;
+ int fd;
+...
+ fd = open("/dev/input/eventXX", O_RDWR);
+...
+ /* Play three times */
+ play.type = EV_FF;
+ play.code = effect.id;
+ play.value = 3;
+
+ write(fd, (const void*) &play, sizeof(play));
+...
+ /* Stop an effect */
+ stop.type = EV_FF;
+ stop.code = effect.id;
+ stop.value = 0;
+
+ write(fd, (const void*) &play, sizeof(stop));
+
+3.5 Setting the gain
+~~~~~~~~~~~~~~~~~~~~
+Not all devices have the same strength. Therefore, users should set a gain
+factor depending on how strong they want effects to be. This setting is
+persistent across access to the driver.
+
+/* Set the gain of the device
+int gain; /* between 0 and 100 */
+struct input_event ie; /* structure used to communicate with the driver */
+
+ie.type = EV_FF;
+ie.code = FF_GAIN;
+ie.value = 0xFFFFUL * gain / 100;
+
+if (write(fd, &ie, sizeof(ie)) == -1)
+ perror("set gain");
+
+3.6 Enabling/Disabling autocenter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The autocenter feature quite disturbs the rendering of effects in my opinion,
+and I think it should be an effect, which computation depends on the game
+type. But you can enable it if you want.
+
+int autocenter; /* between 0 and 100 */
+struct input_event ie;
+
+ie.type = EV_FF;
+ie.code = FF_AUTOCENTER;
+ie.value = 0xFFFFUL * autocenter / 100;
+
+if (write(fd, &ie, sizeof(ie)) == -1)
+ perror("set auto-center");
+
+A value of 0 means "no auto-center".
+
+3.7 Dynamic update of an effect
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Proceed as if you wanted to upload a new effect, except that instead of
+setting the id field to -1, you set it to the wanted effect id.
+Normally, the effect is not stopped and restarted. However, depending on the
+type of device, not all parameters can be dynamically updated. For example,
+the direction of an effect cannot be updated with iforce devices. In this
+case, the driver stops the effect, up-load it, and restart it.
+
+Therefore it is recommended to dynamically change direction while the effect
+is playing only when it is ok to restart the effect with a replay count of 1.
+
+3.8 Information about the status of effects
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Every time the status of an effect is changed, an event is sent. The values
+and meanings of the fields of the event are as follows:
+
+struct input_event {
+/* When the status of the effect changed */
+ struct timeval time;
+
+/* Set to EV_FF_STATUS */
+ unsigned short type;
+
+/* Contains the id of the effect */
+ unsigned short code;
+
+/* Indicates the status */
+ unsigned int value;
+};
+
+FF_STATUS_STOPPED The effect stopped playing
+FF_STATUS_PLAYING The effect started to play
+
+NOTE: Status feedback is only supported by iforce driver. If you have
+ a really good reason to use this, please contact
+ linux-joystick@atrey.karlin.mff.cuni.cz or anssi.hannula@gmail.com
+ so that support for it can be added to the rest of the drivers.
+
diff --git a/Documentation/input/gamepad.txt b/Documentation/input/gamepad.txt
new file mode 100644
index 000000000..3f6d8a5e9
--- /dev/null
+++ b/Documentation/input/gamepad.txt
@@ -0,0 +1,159 @@
+ Linux Gamepad API
+----------------------------------------------------------------------------
+
+1. Intro
+~~~~~~~~
+Linux provides many different input drivers for gamepad hardware. To avoid
+having user-space deal with different button-mappings for each gamepad, this
+document defines how gamepads are supposed to report their data.
+
+2. Geometry
+~~~~~~~~~~~
+As "gamepad" we define devices which roughly look like this:
+
+ ____________________________ __
+ / [__ZL__] [__ZR__] \ |
+ / [__ TL __] [__ TR __] \ | Front Triggers
+ __/________________________________\__ __|
+ / _ \ |
+ / /\ __ (N) \ |
+ / || __ |MO| __ _ _ \ | Main Pad
+ | <===DP===> |SE| |ST| (W) -|- (E) | |
+ \ || ___ ___ _ / |
+ /\ \/ / \ / \ (S) /\ __|
+ / \________ | LS | ____ | RS | ________/ \ |
+ | / \ \___/ / \ \___/ / \ | | Control Sticks
+ | / \_____/ \_____/ \ | __|
+ | / \ |
+ \_____/ \_____/
+
+ |________|______| |______|___________|
+ D-Pad Left Right Action Pad
+ Stick Stick
+
+ |_____________|
+ Menu Pad
+
+Most gamepads have the following features:
+ - Action-Pad
+ 4 buttons in diamonds-shape (on the right side). The buttons are
+ differently labeled on most devices so we define them as NORTH,
+ SOUTH, WEST and EAST.
+ - D-Pad (Direction-pad)
+ 4 buttons (on the left side) that point up, down, left and right.
+ - Menu-Pad
+ Different constellations, but most-times 2 buttons: SELECT - START
+ Furthermore, many gamepads have a fancy branded button that is used as
+ special system-button. It often looks different to the other buttons and
+ is used to pop up system-menus or system-settings.
+ - Analog-Sticks
+ Analog-sticks provide freely moveable sticks to control directions. Not
+ all devices have both or any, but they are present at most times.
+ Analog-sticks may also provide a digital button if you press them.
+ - Triggers
+ Triggers are located on the upper-side of the pad in vertical direction.
+ Not all devices provide them, but the upper buttons are normally named
+ Left- and Right-Triggers, the lower buttons Z-Left and Z-Right.
+ - Rumble
+ Many devices provide force-feedback features. But are mostly just
+ simple rumble motors.
+
+3. Detection
+~~~~~~~~~~~~
+All gamepads that follow the protocol described here map BTN_GAMEPAD. This is
+an alias for BTN_SOUTH/BTN_A. It can be used to identify a gamepad as such.
+However, not all gamepads provide all features, so you need to test for all
+features that you need, first. How each feature is mapped is described below.
+
+Legacy drivers often don't comply to these rules. As we cannot change them
+for backwards-compatibility reasons, you need to provide fixup mappings in
+user-space yourself. Some of them might also provide module-options that
+change the mappings so you can advise users to set these.
+
+All new gamepads are supposed to comply with this mapping. Please report any
+bugs, if they don't.
+
+There are a lot of less-featured/less-powerful devices out there, which re-use
+the buttons from this protocol. However, they try to do this in a compatible
+fashion. For example, the "Nintendo Wii Nunchuk" provides two trigger buttons
+and one analog stick. It reports them as if it were a gamepad with only one
+analog stick and two trigger buttons on the right side.
+But that means, that if you only support "real" gamepads, you must test
+devices for _all_ reported events that you need. Otherwise, you will also get
+devices that report a small subset of the events.
+
+No other devices, that do not look/feel like a gamepad, shall report these
+events.
+
+4. Events
+~~~~~~~~~
+Gamepads report the following events:
+
+Action-Pad:
+ Every gamepad device has at least 2 action buttons. This means, that every
+ device reports BTN_SOUTH (which BTN_GAMEPAD is an alias for). Regardless
+ of the labels on the buttons, the codes are sent according to the
+ physical position of the buttons.
+ Please note that 2- and 3-button pads are fairly rare and old. You might
+ want to filter gamepads that do not report all four.
+ 2-Button Pad:
+ If only 2 action-buttons are present, they are reported as BTN_SOUTH and
+ BTN_EAST. For vertical layouts, the upper button is BTN_EAST. For
+ horizontal layouts, the button more on the right is BTN_EAST.
+ 3-Button Pad:
+ If only 3 action-buttons are present, they are reported as (from left
+ to right): BTN_WEST, BTN_SOUTH, BTN_EAST
+ If the buttons are aligned perfectly vertically, they are reported as
+ (from top down): BTN_WEST, BTN_SOUTH, BTN_EAST
+ 4-Button Pad:
+ If all 4 action-buttons are present, they can be aligned in two
+ different formations. If diamond-shaped, they are reported as BTN_NORTH,
+ BTN_WEST, BTN_SOUTH, BTN_EAST according to their physical location.
+ If rectangular-shaped, the upper-left button is BTN_NORTH, lower-left
+ is BTN_WEST, lower-right is BTN_SOUTH and upper-right is BTN_EAST.
+
+D-Pad:
+ Every gamepad provides a D-Pad with four directions: Up, Down, Left, Right
+ Some of these are available as digital buttons, some as analog buttons. Some
+ may even report both. The kernel does not convert between these so
+ applications should support both and choose what is more appropriate if
+ both are reported.
+ Digital buttons are reported as:
+ BTN_DPAD_*
+ Analog buttons are reported as:
+ ABS_HAT0X and ABS_HAT0Y
+ (for ABS values negative is left/up, positive is right/down)
+
+Analog-Sticks:
+ The left analog-stick is reported as ABS_X, ABS_Y. The right analog stick is
+ reported as ABS_RX, ABS_RY. Zero, one or two sticks may be present.
+ If analog-sticks provide digital buttons, they are mapped accordingly as
+ BTN_THUMBL (first/left) and BTN_THUMBR (second/right).
+ (for ABS values negative is left/up, positive is right/down)
+
+Triggers:
+ Trigger buttons can be available as digital or analog buttons or both. User-
+ space must correctly deal with any situation and choose the most appropriate
+ mode.
+ Upper trigger buttons are reported as BTN_TR or ABS_HAT1X (right) and BTN_TL
+ or ABS_HAT1Y (left). Lower trigger buttons are reported as BTN_TR2 or
+ ABS_HAT2X (right/ZR) and BTN_TL2 or ABS_HAT2Y (left/ZL).
+ If only one trigger-button combination is present (upper+lower), they are
+ reported as "right" triggers (BTN_TR/ABS_HAT1X).
+ (ABS trigger values start at 0, pressure is reported as positive values)
+
+Menu-Pad:
+ Menu buttons are always digital and are mapped according to their location
+ instead of their labels. That is:
+ 1-button Pad: Mapped as BTN_START
+ 2-button Pad: Left button mapped as BTN_SELECT, right button mapped as
+ BTN_START
+ Many pads also have a third button which is branded or has a special symbol
+ and meaning. Such buttons are mapped as BTN_MODE. Examples are the Nintendo
+ "HOME" button, the XBox "X"-button or Sony "PS" button.
+
+Rumble:
+ Rumble is advertised as FF_RUMBLE.
+
+----------------------------------------------------------------------------
+ Written 2013 by David Herrmann <dh.herrmann@gmail.com>
diff --git a/Documentation/input/gameport-programming.txt b/Documentation/input/gameport-programming.txt
new file mode 100644
index 000000000..03a74fc3b
--- /dev/null
+++ b/Documentation/input/gameport-programming.txt
@@ -0,0 +1,187 @@
+Programming gameport drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. A basic classic gameport
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+If the gameport doesn't provide more than the inb()/outb() functionality,
+the code needed to register it with the joystick drivers is simple:
+
+ struct gameport gameport;
+
+ gameport.io = MY_IO_ADDRESS;
+ gameport_register_port(&gameport);
+
+Make sure struct gameport is initialized to 0 in all other fields. The
+gameport generic code will take care of the rest.
+
+If your hardware supports more than one io address, and your driver can
+choose which one to program the hardware to, starting from the more exotic
+addresses is preferred, because the likelihood of clashing with the standard
+0x201 address is smaller.
+
+Eg. if your driver supports addresses 0x200, 0x208, 0x210 and 0x218, then
+0x218 would be the address of first choice.
+
+If your hardware supports a gameport address that is not mapped to ISA io
+space (is above 0x1000), use that one, and don't map the ISA mirror.
+
+Also, always request_region() on the whole io space occupied by the
+gameport. Although only one ioport is really used, the gameport usually
+occupies from one to sixteen addresses in the io space.
+
+Please also consider enabling the gameport on the card in the ->open()
+callback if the io is mapped to ISA space - this way it'll occupy the io
+space only when something really is using it. Disable it again in the
+->close() callback. You also can select the io address in the ->open()
+callback, so that it doesn't fail if some of the possible addresses are
+already occupied by other gameports.
+
+2. Memory mapped gameport
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When a gameport can be accessed through MMIO, this way is preferred, because
+it is faster, allowing more reads per second. Registering such a gameport
+isn't as easy as a basic IO one, but not so much complex:
+
+ struct gameport gameport;
+
+ void my_trigger(struct gameport *gameport)
+ {
+ my_mmio = 0xff;
+ }
+
+ unsigned char my_read(struct gameport *gameport)
+ {
+ return my_mmio;
+ }
+
+ gameport.read = my_read;
+ gameport.trigger = my_trigger;
+ gameport_register_port(&gameport);
+
+3. Cooked mode gameport
+~~~~~~~~~~~~~~~~~~~~~~~
+
+There are gameports that can report the axis values as numbers, that means
+the driver doesn't have to measure them the old way - an ADC is built into
+the gameport. To register a cooked gameport:
+
+ struct gameport gameport;
+
+ int my_cooked_read(struct gameport *gameport, int *axes, int *buttons)
+ {
+ int i;
+
+ for (i = 0; i < 4; i++)
+ axes[i] = my_mmio[i];
+ buttons[i] = my_mmio[4];
+ }
+
+ int my_open(struct gameport *gameport, int mode)
+ {
+ return -(mode != GAMEPORT_MODE_COOKED);
+ }
+
+ gameport.cooked_read = my_cooked_read;
+ gameport.open = my_open;
+ gameport.fuzz = 8;
+ gameport_register_port(&gameport);
+
+The only confusing thing here is the fuzz value. Best determined by
+experimentation, it is the amount of noise in the ADC data. Perfect
+gameports can set this to zero, most common have fuzz between 8 and 32.
+See analog.c and input.c for handling of fuzz - the fuzz value determines
+the size of a gaussian filter window that is used to eliminate the noise
+in the data.
+
+4. More complex gameports
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Gameports can support both raw and cooked modes. In that case combine either
+examples 1+2 or 1+3. Gameports can support internal calibration - see below,
+and also lightning.c and analog.c on how that works. If your driver supports
+more than one gameport instance simultaneously, use the ->private member of
+the gameport struct to point to your data.
+
+5. Unregistering a gameport
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Simple:
+
+gameport_unregister_port(&gameport);
+
+6. The gameport structure
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+struct gameport {
+
+ void *private;
+
+A private pointer for free use in the gameport driver. (Not the joystick
+driver!)
+
+ int number;
+
+Number assigned to the gameport when registered. Informational purpose only.
+
+ int io;
+
+I/O address for use with raw mode. You have to either set this, or ->read()
+to some value if your gameport supports raw mode.
+
+ int speed;
+
+Raw mode speed of the gameport reads in thousands of reads per second.
+
+ int fuzz;
+
+If the gameport supports cooked mode, this should be set to a value that
+represents the amount of noise in the data. See section 3.
+
+ void (*trigger)(struct gameport *);
+
+Trigger. This function should trigger the ns558 oneshots. If set to NULL,
+outb(0xff, io) will be used.
+
+ unsigned char (*read)(struct gameport *);
+
+Read the buttons and ns558 oneshot bits. If set to NULL, inb(io) will be
+used instead.
+
+ int (*cooked_read)(struct gameport *, int *axes, int *buttons);
+
+If the gameport supports cooked mode, it should point this to its cooked
+read function. It should fill axes[0..3] with four values of the joystick axes
+and buttons[0] with four bits representing the buttons.
+
+ int (*calibrate)(struct gameport *, int *axes, int *max);
+
+Function for calibrating the ADC hardware. When called, axes[0..3] should be
+pre-filled by cooked data by the caller, max[0..3] should be pre-filled with
+expected maximums for each axis. The calibrate() function should set the
+sensitivity of the ADC hardware so that the maximums fit in its range and
+recompute the axes[] values to match the new sensitivity or re-read them from
+the hardware so that they give valid values.
+
+ int (*open)(struct gameport *, int mode);
+
+Open() serves two purposes. First a driver either opens the port in raw or
+in cooked mode, the open() callback can decide which modes are supported.
+Second, resource allocation can happen here. The port can also be enabled
+here. Prior to this call, other fields of the gameport struct (namely the io
+member) need not to be valid.
+
+ void (*close)(struct gameport *);
+
+Close() should free the resources allocated by open, possibly disabling the
+gameport.
+
+ struct gameport_dev *dev;
+ struct gameport *next;
+
+For internal use by the gameport layer.
+
+};
+
+Enjoy!
diff --git a/Documentation/input/gpio-tilt.txt b/Documentation/input/gpio-tilt.txt
new file mode 100644
index 000000000..2cdfd9bcb
--- /dev/null
+++ b/Documentation/input/gpio-tilt.txt
@@ -0,0 +1,103 @@
+Driver for tilt-switches connected via GPIOs
+============================================
+
+Generic driver to read data from tilt switches connected via gpios.
+Orientation can be provided by one or more than one tilt switches,
+i.e. each tilt switch providing one axis, and the number of axes
+is also not limited.
+
+
+Data structures:
+----------------
+
+The array of struct gpio in the gpios field is used to list the gpios
+that represent the current tilt state.
+
+The array of struct gpio_tilt_axis describes the axes that are reported
+to the input system. The values set therein are used for the
+input_set_abs_params calls needed to init the axes.
+
+The array of struct gpio_tilt_state maps gpio states to the corresponding
+values to report. The gpio state is represented as a bitfield where the
+bit-index corresponds to the index of the gpio in the struct gpio array.
+In the same manner the values stored in the axes array correspond to
+the elements of the gpio_tilt_axis-array.
+
+
+Example:
+--------
+
+Example configuration for a single TS1003 tilt switch that rotates around
+one axis in 4 steps and emits the current tilt via two GPIOs.
+
+static int sg060_tilt_enable(struct device *dev) {
+ /* code to enable the sensors */
+};
+
+static void sg060_tilt_disable(struct device *dev) {
+ /* code to disable the sensors */
+};
+
+static struct gpio sg060_tilt_gpios[] = {
+ { SG060_TILT_GPIO_SENSOR1, GPIOF_IN, "tilt_sensor1" },
+ { SG060_TILT_GPIO_SENSOR2, GPIOF_IN, "tilt_sensor2" },
+};
+
+static struct gpio_tilt_state sg060_tilt_states[] = {
+ {
+ .gpios = (0 << 1) | (0 << 0),
+ .axes = (int[]) {
+ 0,
+ },
+ }, {
+ .gpios = (0 << 1) | (1 << 0),
+ .axes = (int[]) {
+ 1, /* 90 degrees */
+ },
+ }, {
+ .gpios = (1 << 1) | (1 << 0),
+ .axes = (int[]) {
+ 2, /* 180 degrees */
+ },
+ }, {
+ .gpios = (1 << 1) | (0 << 0),
+ .axes = (int[]) {
+ 3, /* 270 degrees */
+ },
+ },
+};
+
+static struct gpio_tilt_axis sg060_tilt_axes[] = {
+ {
+ .axis = ABS_RY,
+ .min = 0,
+ .max = 3,
+ .fuzz = 0,
+ .flat = 0,
+ },
+};
+
+static struct gpio_tilt_platform_data sg060_tilt_pdata= {
+ .gpios = sg060_tilt_gpios,
+ .nr_gpios = ARRAY_SIZE(sg060_tilt_gpios),
+
+ .axes = sg060_tilt_axes,
+ .nr_axes = ARRAY_SIZE(sg060_tilt_axes),
+
+ .states = sg060_tilt_states,
+ .nr_states = ARRAY_SIZE(sg060_tilt_states),
+
+ .debounce_interval = 100,
+
+ .poll_interval = 1000,
+ .enable = sg060_tilt_enable,
+ .disable = sg060_tilt_disable,
+};
+
+static struct platform_device sg060_device_tilt = {
+ .name = "gpio-tilt-polled",
+ .id = -1,
+ .dev = {
+ .platform_data = &sg060_tilt_pdata,
+ },
+};
diff --git a/Documentation/input/iforce-protocol.txt b/Documentation/input/iforce-protocol.txt
new file mode 100644
index 000000000..66287151c
--- /dev/null
+++ b/Documentation/input/iforce-protocol.txt
@@ -0,0 +1,258 @@
+** Introduction
+This document describes what I managed to discover about the protocol used to
+specify force effects to I-Force 2.0 devices. None of this information comes
+from Immerse. That's why you should not trust what is written in this
+document. This document is intended to help understanding the protocol.
+This is not a reference. Comments and corrections are welcome. To contact me,
+send an email to: johann.deneux@gmail.com
+
+** WARNING **
+I shall not be held responsible for any damage or harm caused if you try to
+send data to your I-Force device based on what you read in this document.
+
+** Preliminary Notes:
+All values are hexadecimal with big-endian encoding (msb on the left). Beware,
+values inside packets are encoded using little-endian. Bytes whose roles are
+unknown are marked ??? Information that needs deeper inspection is marked (?)
+
+** General form of a packet **
+This is how packets look when the device uses the rs232 to communicate.
+2B OP LEN DATA CS
+CS is the checksum. It is equal to the exclusive or of all bytes.
+
+When using USB:
+OP DATA
+The 2B, LEN and CS fields have disappeared, probably because USB handles frames and
+data corruption is handled or unsignificant.
+
+First, I describe effects that are sent by the device to the computer
+
+** Device input state
+This packet is used to indicate the state of each button and the value of each
+axis
+OP= 01 for a joystick, 03 for a wheel
+LEN= Varies from device to device
+00 X-Axis lsb
+01 X-Axis msb
+02 Y-Axis lsb, or gas pedal for a wheel
+03 Y-Axis msb, or brake pedal for a wheel
+04 Throttle
+05 Buttons
+06 Lower 4 bits: Buttons
+ Upper 4 bits: Hat
+07 Rudder
+
+** Device effects states
+OP= 02
+LEN= Varies
+00 ? Bit 1 (Value 2) is the value of the deadman switch
+01 Bit 8 is set if the effect is playing. Bits 0 to 7 are the effect id.
+02 ??
+03 Address of parameter block changed (lsb)
+04 Address of parameter block changed (msb)
+05 Address of second parameter block changed (lsb)
+... depending on the number of parameter blocks updated
+
+** Force effect **
+OP= 01
+LEN= 0e
+00 Channel (when playing several effects at the same time, each must be assigned a channel)
+01 Wave form
+ Val 00 Constant
+ Val 20 Square
+ Val 21 Triangle
+ Val 22 Sine
+ Val 23 Sawtooth up
+ Val 24 Sawtooth down
+ Val 40 Spring (Force = f(pos))
+ Val 41 Friction (Force = f(velocity)) and Inertia (Force = f(acceleration))
+
+
+02 Axes affected and trigger
+ Bits 4-7: Val 2 = effect along one axis. Byte 05 indicates direction
+ Val 4 = X axis only. Byte 05 must contain 5a
+ Val 8 = Y axis only. Byte 05 must contain b4
+ Val c = X and Y axes. Bytes 05 must contain 60
+ Bits 0-3: Val 0 = No trigger
+ Val x+1 = Button x triggers the effect
+ When the whole byte is 0, cancel the previously set trigger
+
+03-04 Duration of effect (little endian encoding, in ms)
+
+05 Direction of effect, if applicable. Else, see 02 for value to assign.
+
+06-07 Minimum time between triggering.
+
+08-09 Address of periodicity or magnitude parameters
+0a-0b Address of attack and fade parameters, or ffff if none.
+*or*
+08-09 Address of interactive parameters for X-axis, or ffff if not applicable
+0a-0b Address of interactive parameters for Y-axis, or ffff if not applicable
+
+0c-0d Delay before execution of effect (little endian encoding, in ms)
+
+
+** Time based parameters **
+
+*** Attack and fade ***
+OP= 02
+LEN= 08
+00-01 Address where to store the parameters
+02-03 Duration of attack (little endian encoding, in ms)
+04 Level at end of attack. Signed byte.
+05-06 Duration of fade.
+07 Level at end of fade.
+
+*** Magnitude ***
+OP= 03
+LEN= 03
+00-01 Address
+02 Level. Signed byte.
+
+*** Periodicity ***
+OP= 04
+LEN= 07
+00-01 Address
+02 Magnitude. Signed byte.
+03 Offset. Signed byte.
+04 Phase. Val 00 = 0 deg, Val 40 = 90 degs.
+05-06 Period (little endian encoding, in ms)
+
+** Interactive parameters **
+OP= 05
+LEN= 0a
+00-01 Address
+02 Positive Coeff
+03 Negative Coeff
+04+05 Offset (center)
+06+07 Dead band (Val 01F4 = 5000 (decimal))
+08 Positive saturation (Val 0a = 1000 (decimal) Val 64 = 10000 (decimal))
+09 Negative saturation
+
+The encoding is a bit funny here: For coeffs, these are signed values. The
+maximum value is 64 (100 decimal), the min is 9c.
+For the offset, the minimum value is FE0C, the maximum value is 01F4.
+For the deadband, the minimum value is 0, the max is 03E8.
+
+** Controls **
+OP= 41
+LEN= 03
+00 Channel
+01 Start/Stop
+ Val 00: Stop
+ Val 01: Start and play once.
+ Val 41: Start and play n times (See byte 02 below)
+02 Number of iterations n.
+
+** Init **
+
+*** Querying features ***
+OP= ff
+Query command. Length varies according to the query type.
+The general format of this packet is:
+ff 01 QUERY [INDEX] CHECKSUM
+responses are of the same form:
+FF LEN QUERY VALUE_QUERIED CHECKSUM2
+where LEN = 1 + length(VALUE_QUERIED)
+
+**** Query ram size ****
+QUERY = 42 ('B'uffer size)
+The device should reply with the same packet plus two additional bytes
+containing the size of the memory:
+ff 03 42 03 e8 CS would mean that the device has 1000 bytes of ram available.
+
+**** Query number of effects ****
+QUERY = 4e ('N'umber of effects)
+The device should respond by sending the number of effects that can be played
+at the same time (one byte)
+ff 02 4e 14 CS would stand for 20 effects.
+
+**** Vendor's id ****
+QUERY = 4d ('M'anufacturer)
+Query the vendors'id (2 bytes)
+
+**** Product id *****
+QUERY = 50 ('P'roduct)
+Query the product id (2 bytes)
+
+**** Open device ****
+QUERY = 4f ('O'pen)
+No data returned.
+
+**** Close device *****
+QUERY = 43 ('C')lose
+No data returned.
+
+**** Query effect ****
+QUERY = 45 ('E')
+Send effect type.
+Returns nonzero if supported (2 bytes)
+
+**** Firmware Version ****
+QUERY = 56 ('V'ersion)
+Sends back 3 bytes - major, minor, subminor
+
+*** Initialisation of the device ***
+
+**** Set Control ****
+!!! Device dependent, can be different on different models !!!
+OP= 40 <idx> <val> [<val>]
+LEN= 2 or 3
+00 Idx
+ Idx 00 Set dead zone (0..2048)
+ Idx 01 Ignore Deadman sensor (0..1)
+ Idx 02 Enable comm watchdog (0..1)
+ Idx 03 Set the strength of the spring (0..100)
+ Idx 04 Enable or disable the spring (0/1)
+ Idx 05 Set axis saturation threshold (0..2048)
+
+**** Set Effect State ****
+OP= 42 <val>
+LEN= 1
+00 State
+ Bit 3 Pause force feedback
+ Bit 2 Enable force feedback
+ Bit 0 Stop all effects
+
+**** Set overall gain ****
+OP= 43 <val>
+LEN= 1
+00 Gain
+ Val 00 = 0%
+ Val 40 = 50%
+ Val 80 = 100%
+
+** Parameter memory **
+
+Each device has a certain amount of memory to store parameters of effects.
+The amount of RAM may vary, I encountered values from 200 to 1000 bytes. Below
+is the amount of memory apparently needed for every set of parameters:
+ - period : 0c
+ - magnitude : 02
+ - attack and fade : 0e
+ - interactive : 08
+
+** Appendix: How to study the protocol ? **
+
+1. Generate effects using the force editor provided with the DirectX SDK, or
+use Immersion Studio (freely available at their web site in the developer section:
+www.immersion.com)
+2. Start a soft spying RS232 or USB (depending on where you connected your
+joystick/wheel). I used ComPortSpy from fCoder (alpha version!)
+3. Play the effect, and watch what happens on the spy screen.
+
+A few words about ComPortSpy:
+At first glance, this software seems, hum, well... buggy. In fact, data appear with a
+few seconds latency. Personally, I restart it every time I play an effect.
+Remember it's free (as in free beer) and alpha!
+
+** URLS **
+Check www.immerse.com for Immersion Studio, and www.fcoder.com for ComPortSpy.
+
+** Author of this document **
+Johann Deneux <johann.deneux@gmail.com>
+Home page at http://web.archive.org/web/*/http://www.esil.univ-mrs.fr
+
+Additions by Vojtech Pavlik.
+
+I-Force is trademark of Immersion Corp.
diff --git a/Documentation/input/input-programming.txt b/Documentation/input/input-programming.txt
new file mode 100644
index 000000000..7f8b9d97b
--- /dev/null
+++ b/Documentation/input/input-programming.txt
@@ -0,0 +1,302 @@
+Programming input drivers
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. Creating an input device driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1.0 The simplest example
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Here comes a very simple example of an input device driver. The device has
+just one button and the button is accessible at i/o port BUTTON_PORT. When
+pressed or released a BUTTON_IRQ happens. The driver could look like:
+
+#include <linux/input.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <asm/irq.h>
+#include <asm/io.h>
+
+static struct input_dev *button_dev;
+
+static irqreturn_t button_interrupt(int irq, void *dummy)
+{
+ input_report_key(button_dev, BTN_0, inb(BUTTON_PORT) & 1);
+ input_sync(button_dev);
+ return IRQ_HANDLED;
+}
+
+static int __init button_init(void)
+{
+ int error;
+
+ if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) {
+ printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq);
+ return -EBUSY;
+ }
+
+ button_dev = input_allocate_device();
+ if (!button_dev) {
+ printk(KERN_ERR "button.c: Not enough memory\n");
+ error = -ENOMEM;
+ goto err_free_irq;
+ }
+
+ button_dev->evbit[0] = BIT_MASK(EV_KEY);
+ button_dev->keybit[BIT_WORD(BTN_0)] = BIT_MASK(BTN_0);
+
+ error = input_register_device(button_dev);
+ if (error) {
+ printk(KERN_ERR "button.c: Failed to register device\n");
+ goto err_free_dev;
+ }
+
+ return 0;
+
+ err_free_dev:
+ input_free_device(button_dev);
+ err_free_irq:
+ free_irq(BUTTON_IRQ, button_interrupt);
+ return error;
+}
+
+static void __exit button_exit(void)
+{
+ input_unregister_device(button_dev);
+ free_irq(BUTTON_IRQ, button_interrupt);
+}
+
+module_init(button_init);
+module_exit(button_exit);
+
+1.1 What the example does
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+First it has to include the <linux/input.h> file, which interfaces to the
+input subsystem. This provides all the definitions needed.
+
+In the _init function, which is called either upon module load or when
+booting the kernel, it grabs the required resources (it should also check
+for the presence of the device).
+
+Then it allocates a new input device structure with input_allocate_device()
+and sets up input bitfields. This way the device driver tells the other
+parts of the input systems what it is - what events can be generated or
+accepted by this input device. Our example device can only generate EV_KEY
+type events, and from those only BTN_0 event code. Thus we only set these
+two bits. We could have used
+
+ set_bit(EV_KEY, button_dev.evbit);
+ set_bit(BTN_0, button_dev.keybit);
+
+as well, but with more than single bits the first approach tends to be
+shorter.
+
+Then the example driver registers the input device structure by calling
+
+ input_register_device(&button_dev);
+
+This adds the button_dev structure to linked lists of the input driver and
+calls device handler modules _connect functions to tell them a new input
+device has appeared. input_register_device() may sleep and therefore must
+not be called from an interrupt or with a spinlock held.
+
+While in use, the only used function of the driver is
+
+ button_interrupt()
+
+which upon every interrupt from the button checks its state and reports it
+via the
+
+ input_report_key()
+
+call to the input system. There is no need to check whether the interrupt
+routine isn't reporting two same value events (press, press for example) to
+the input system, because the input_report_* functions check that
+themselves.
+
+Then there is the
+
+ input_sync()
+
+call to tell those who receive the events that we've sent a complete report.
+This doesn't seem important in the one button case, but is quite important
+for for example mouse movement, where you don't want the X and Y values
+to be interpreted separately, because that'd result in a different movement.
+
+1.2 dev->open() and dev->close()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In case the driver has to repeatedly poll the device, because it doesn't
+have an interrupt coming from it and the polling is too expensive to be done
+all the time, or if the device uses a valuable resource (eg. interrupt), it
+can use the open and close callback to know when it can stop polling or
+release the interrupt and when it must resume polling or grab the interrupt
+again. To do that, we would add this to our example driver:
+
+static int button_open(struct input_dev *dev)
+{
+ if (request_irq(BUTTON_IRQ, button_interrupt, 0, "button", NULL)) {
+ printk(KERN_ERR "button.c: Can't allocate irq %d\n", button_irq);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static void button_close(struct input_dev *dev)
+{
+ free_irq(IRQ_AMIGA_VERTB, button_interrupt);
+}
+
+static int __init button_init(void)
+{
+ ...
+ button_dev->open = button_open;
+ button_dev->close = button_close;
+ ...
+}
+
+Note that input core keeps track of number of users for the device and
+makes sure that dev->open() is called only when the first user connects
+to the device and that dev->close() is called when the very last user
+disconnects. Calls to both callbacks are serialized.
+
+The open() callback should return a 0 in case of success or any nonzero value
+in case of failure. The close() callback (which is void) must always succeed.
+
+1.3 Basic event types
+~~~~~~~~~~~~~~~~~~~~~
+
+The most simple event type is EV_KEY, which is used for keys and buttons.
+It's reported to the input system via:
+
+ input_report_key(struct input_dev *dev, int code, int value)
+
+See linux/input.h for the allowable values of code (from 0 to KEY_MAX).
+Value is interpreted as a truth value, ie any nonzero value means key
+pressed, zero value means key released. The input code generates events only
+in case the value is different from before.
+
+In addition to EV_KEY, there are two more basic event types: EV_REL and
+EV_ABS. They are used for relative and absolute values supplied by the
+device. A relative value may be for example a mouse movement in the X axis.
+The mouse reports it as a relative difference from the last position,
+because it doesn't have any absolute coordinate system to work in. Absolute
+events are namely for joysticks and digitizers - devices that do work in an
+absolute coordinate systems.
+
+Having the device report EV_REL buttons is as simple as with EV_KEY, simply
+set the corresponding bits and call the
+
+ input_report_rel(struct input_dev *dev, int code, int value)
+
+function. Events are generated only for nonzero value.
+
+However EV_ABS requires a little special care. Before calling
+input_register_device, you have to fill additional fields in the input_dev
+struct for each absolute axis your device has. If our button device had also
+the ABS_X axis:
+
+ button_dev.absmin[ABS_X] = 0;
+ button_dev.absmax[ABS_X] = 255;
+ button_dev.absfuzz[ABS_X] = 4;
+ button_dev.absflat[ABS_X] = 8;
+
+Or, you can just say:
+
+ input_set_abs_params(button_dev, ABS_X, 0, 255, 4, 8);
+
+This setting would be appropriate for a joystick X axis, with the minimum of
+0, maximum of 255 (which the joystick *must* be able to reach, no problem if
+it sometimes reports more, but it must be able to always reach the min and
+max values), with noise in the data up to +- 4, and with a center flat
+position of size 8.
+
+If you don't need absfuzz and absflat, you can set them to zero, which mean
+that the thing is precise and always returns to exactly the center position
+(if it has any).
+
+1.4 BITS_TO_LONGS(), BIT_WORD(), BIT_MASK()
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These three macros from bitops.h help some bitfield computations:
+
+ BITS_TO_LONGS(x) - returns the length of a bitfield array in longs for
+ x bits
+ BIT_WORD(x) - returns the index in the array in longs for bit x
+ BIT_MASK(x) - returns the index in a long for bit x
+
+1.5 The id* and name fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The dev->name should be set before registering the input device by the input
+device driver. It's a string like 'Generic button device' containing a
+user friendly name of the device.
+
+The id* fields contain the bus ID (PCI, USB, ...), vendor ID and device ID
+of the device. The bus IDs are defined in input.h. The vendor and device ids
+are defined in pci_ids.h, usb_ids.h and similar include files. These fields
+should be set by the input device driver before registering it.
+
+The idtype field can be used for specific information for the input device
+driver.
+
+The id and name fields can be passed to userland via the evdev interface.
+
+1.6 The keycode, keycodemax, keycodesize fields
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+These three fields should be used by input devices that have dense keymaps.
+The keycode is an array used to map from scancodes to input system keycodes.
+The keycode max should contain the size of the array and keycodesize the
+size of each entry in it (in bytes).
+
+Userspace can query and alter current scancode to keycode mappings using
+EVIOCGKEYCODE and EVIOCSKEYCODE ioctls on corresponding evdev interface.
+When a device has all 3 aforementioned fields filled in, the driver may
+rely on kernel's default implementation of setting and querying keycode
+mappings.
+
+1.7 dev->getkeycode() and dev->setkeycode()
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+getkeycode() and setkeycode() callbacks allow drivers to override default
+keycode/keycodesize/keycodemax mapping mechanism provided by input core
+and implement sparse keycode maps.
+
+1.8 Key autorepeat
+~~~~~~~~~~~~~~~~~~
+
+... is simple. It is handled by the input.c module. Hardware autorepeat is
+not used, because it's not present in many devices and even where it is
+present, it is broken sometimes (at keyboards: Toshiba notebooks). To enable
+autorepeat for your device, just set EV_REP in dev->evbit. All will be
+handled by the input system.
+
+1.9 Other event types, handling output events
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The other event types up to now are:
+
+EV_LED - used for the keyboard LEDs.
+EV_SND - used for keyboard beeps.
+
+They are very similar to for example key events, but they go in the other
+direction - from the system to the input device driver. If your input device
+driver can handle these events, it has to set the respective bits in evbit,
+*and* also the callback routine:
+
+ button_dev->event = button_event;
+
+int button_event(struct input_dev *dev, unsigned int type, unsigned int code, int value);
+{
+ if (type == EV_SND && code == SND_BELL) {
+ outb(value, BUTTON_BELL);
+ return 0;
+ }
+ return -1;
+}
+
+This callback routine can be called from an interrupt or a BH (although that
+isn't a rule), and thus must not sleep, and must not take too long to finish.
diff --git a/Documentation/input/input.txt b/Documentation/input/input.txt
new file mode 100644
index 000000000..0acfddbe2
--- /dev/null
+++ b/Documentation/input/input.txt
@@ -0,0 +1,290 @@
+ Linux Input drivers v1.0
+ (c) 1999-2001 Vojtech Pavlik <vojtech@ucw.cz>
+ Sponsored by SuSE
+----------------------------------------------------------------------------
+
+0. Disclaimer
+~~~~~~~~~~~~~
+ This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+ This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+ You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ Should you need to contact me, the author, you can do so either by e-mail
+- mail your message to <vojtech@ucw.cz>, or by paper mail: Vojtech Pavlik,
+Simunkova 1594, Prague 8, 182 00 Czech Republic
+
+ For your convenience, the GNU General Public License version 2 is included
+in the package: See the file COPYING.
+
+1. Introduction
+~~~~~~~~~~~~~~~
+ This is a collection of drivers that is designed to support all input
+devices under Linux. While it is currently used only on for USB input
+devices, future use (say 2.5/2.6) is expected to expand to replace
+most of the existing input system, which is why it lives in
+drivers/input/ instead of drivers/usb/.
+
+ The centre of the input drivers is the input module, which must be
+loaded before any other of the input modules - it serves as a way of
+communication between two groups of modules:
+
+1.1 Device drivers
+~~~~~~~~~~~~~~~~~~
+ These modules talk to the hardware (for example via USB), and provide
+events (keystrokes, mouse movements) to the input module.
+
+1.2 Event handlers
+~~~~~~~~~~~~~~~~~~
+ These modules get events from input and pass them where needed via
+various interfaces - keystrokes to the kernel, mouse movements via a
+simulated PS/2 interface to GPM and X and so on.
+
+2. Simple Usage
+~~~~~~~~~~~~~~~
+ For the most usual configuration, with one USB mouse and one USB keyboard,
+you'll have to load the following modules (or have them built in to the
+kernel):
+
+ input
+ mousedev
+ keybdev
+ usbcore
+ uhci_hcd or ohci_hcd or ehci_hcd
+ usbhid
+
+ After this, the USB keyboard will work straight away, and the USB mouse
+will be available as a character device on major 13, minor 63:
+
+ crw-r--r-- 1 root root 13, 63 Mar 28 22:45 mice
+
+ This device has to be created.
+ The commands to create it by hand are:
+
+ cd /dev
+ mkdir input
+ mknod input/mice c 13 63
+
+ After that you have to point GPM (the textmode mouse cut&paste tool) and
+XFree to this device to use it - GPM should be called like:
+
+ gpm -t ps2 -m /dev/input/mice
+
+ And in X:
+
+ Section "Pointer"
+ Protocol "ImPS/2"
+ Device "/dev/input/mice"
+ ZAxisMapping 4 5
+ EndSection
+
+ When you do all of the above, you can use your USB mouse and keyboard.
+
+3. Detailed Description
+~~~~~~~~~~~~~~~~~~~~~~~
+3.1 Device drivers
+~~~~~~~~~~~~~~~~~~
+ Device drivers are the modules that generate events. The events are
+however not useful without being handled, so you also will need to use some
+of the modules from section 3.2.
+
+3.1.1 usbhid
+~~~~~~~~~~~~
+ usbhid is the largest and most complex driver of the whole suite. It
+handles all HID devices, and because there is a very wide variety of them,
+and because the USB HID specification isn't simple, it needs to be this big.
+
+ Currently, it handles USB mice, joysticks, gamepads, steering wheels
+keyboards, trackballs and digitizers.
+
+ However, USB uses HID also for monitor controls, speaker controls, UPSs,
+LCDs and many other purposes.
+
+ The monitor and speaker controls should be easy to add to the hid/input
+interface, but for the UPSs and LCDs it doesn't make much sense. For this,
+the hiddev interface was designed. See Documentation/hid/hiddev.txt
+for more information about it.
+
+ The usage of the usbhid module is very simple, it takes no parameters,
+detects everything automatically and when a HID device is inserted, it
+detects it appropriately.
+
+ However, because the devices vary wildly, you might happen to have a
+device that doesn't work well. In that case #define DEBUG at the beginning
+of hid-core.c and send me the syslog traces.
+
+3.1.2 usbmouse
+~~~~~~~~~~~~~~
+ For embedded systems, for mice with broken HID descriptors and just any
+other use when the big usbhid wouldn't be a good choice, there is the
+usbmouse driver. It handles USB mice only. It uses a simpler HIDBP
+protocol. This also means the mice must support this simpler protocol. Not
+all do. If you don't have any strong reason to use this module, use usbhid
+instead.
+
+3.1.3 usbkbd
+~~~~~~~~~~~~
+ Much like usbmouse, this module talks to keyboards with a simplified
+HIDBP protocol. It's smaller, but doesn't support any extra special keys.
+Use usbhid instead if there isn't any special reason to use this.
+
+3.1.4 wacom
+~~~~~~~~~~~
+ This is a driver for Wacom Graphire and Intuos tablets. Not for Wacom
+PenPartner, that one is handled by the HID driver. Although the Intuos and
+Graphire tablets claim that they are HID tablets as well, they are not and
+thus need this specific driver.
+
+3.1.5 iforce
+~~~~~~~~~~~~
+ A driver for I-Force joysticks and wheels, both over USB and RS232.
+It includes ForceFeedback support now, even though Immersion
+Corp. considers the protocol a trade secret and won't disclose a word
+about it.
+
+3.2 Event handlers
+~~~~~~~~~~~~~~~~~~
+ Event handlers distribute the events from the devices to userland and
+kernel, as needed.
+
+3.2.1 keybdev
+~~~~~~~~~~~~~
+ keybdev is currently a rather ugly hack that translates the input
+events into architecture-specific keyboard raw mode (Xlated AT Set2 on
+x86), and passes them into the handle_scancode function of the
+keyboard.c module. This works well enough on all architectures that
+keybdev can generate rawmode on, other architectures can be added to
+it.
+
+ The right way would be to pass the events to keyboard.c directly,
+best if keyboard.c would itself be an event handler. This is done in
+the input patch, available on the webpage mentioned below.
+
+3.2.2 mousedev
+~~~~~~~~~~~~~~
+ mousedev is also a hack to make programs that use mouse input
+work. It takes events from either mice or digitizers/tablets and makes
+a PS/2-style (a la /dev/psaux) mouse device available to the
+userland. Ideally, the programs could use a more reasonable interface,
+for example evdev
+
+ Mousedev devices in /dev/input (as shown above) are:
+
+ crw-r--r-- 1 root root 13, 32 Mar 28 22:45 mouse0
+ crw-r--r-- 1 root root 13, 33 Mar 29 00:41 mouse1
+ crw-r--r-- 1 root root 13, 34 Mar 29 00:41 mouse2
+ crw-r--r-- 1 root root 13, 35 Apr 1 10:50 mouse3
+ ...
+ ...
+ crw-r--r-- 1 root root 13, 62 Apr 1 10:50 mouse30
+ crw-r--r-- 1 root root 13, 63 Apr 1 10:50 mice
+
+Each 'mouse' device is assigned to a single mouse or digitizer, except
+the last one - 'mice'. This single character device is shared by all
+mice and digitizers, and even if none are connected, the device is
+present. This is useful for hotplugging USB mice, so that programs
+can open the device even when no mice are present.
+
+ CONFIG_INPUT_MOUSEDEV_SCREEN_[XY] in the kernel configuration are
+the size of your screen (in pixels) in XFree86. This is needed if you
+want to use your digitizer in X, because its movement is sent to X
+via a virtual PS/2 mouse and thus needs to be scaled
+accordingly. These values won't be used if you use a mouse only.
+
+ Mousedev will generate either PS/2, ImPS/2 (Microsoft IntelliMouse) or
+ExplorerPS/2 (IntelliMouse Explorer) protocols, depending on what the
+program reading the data wishes. You can set GPM and X to any of
+these. You'll need ImPS/2 if you want to make use of a wheel on a USB
+mouse and ExplorerPS/2 if you want to use extra (up to 5) buttons.
+
+3.2.3 joydev
+~~~~~~~~~~~~
+ Joydev implements v0.x and v1.x Linux joystick api, much like
+drivers/char/joystick/joystick.c used to in earlier versions. See
+joystick-api.txt in the Documentation subdirectory for details. As
+soon as any joystick is connected, it can be accessed in /dev/input
+on:
+
+ crw-r--r-- 1 root root 13, 0 Apr 1 10:50 js0
+ crw-r--r-- 1 root root 13, 1 Apr 1 10:50 js1
+ crw-r--r-- 1 root root 13, 2 Apr 1 10:50 js2
+ crw-r--r-- 1 root root 13, 3 Apr 1 10:50 js3
+ ...
+
+And so on up to js31.
+
+3.2.4 evdev
+~~~~~~~~~~~
+ evdev is the generic input event interface. It passes the events
+generated in the kernel straight to the program, with timestamps. The
+API is still evolving, but should be usable now. It's described in
+section 5.
+
+ This should be the way for GPM and X to get keyboard and mouse
+events. It allows for multihead in X without any specific multihead
+kernel support. The event codes are the same on all architectures and
+are hardware independent.
+
+ The devices are in /dev/input:
+
+ crw-r--r-- 1 root root 13, 64 Apr 1 10:49 event0
+ crw-r--r-- 1 root root 13, 65 Apr 1 10:50 event1
+ crw-r--r-- 1 root root 13, 66 Apr 1 10:50 event2
+ crw-r--r-- 1 root root 13, 67 Apr 1 10:50 event3
+ ...
+
+And so on up to event31.
+
+4. Verifying if it works
+~~~~~~~~~~~~~~~~~~~~~~~~
+ Typing a couple keys on the keyboard should be enough to check that
+a USB keyboard works and is correctly connected to the kernel keyboard
+driver.
+
+ Doing a "cat /dev/input/mouse0" (c, 13, 32) will verify that a mouse
+is also emulated; characters should appear if you move it.
+
+ You can test the joystick emulation with the 'jstest' utility,
+available in the joystick package (see Documentation/input/joystick.txt).
+
+ You can test the event devices with the 'evtest' utility available
+in the LinuxConsole project CVS archive (see the URL below).
+
+5. Event interface
+~~~~~~~~~~~~~~~~~~
+ Should you want to add event device support into any application (X, gpm,
+svgalib ...) I <vojtech@ucw.cz> will be happy to provide you any help I
+can. Here goes a description of the current state of things, which is going
+to be extended, but not changed incompatibly as time goes:
+
+ You can use blocking and nonblocking reads, also select() on the
+/dev/input/eventX devices, and you'll always get a whole number of input
+events on a read. Their layout is:
+
+struct input_event {
+ struct timeval time;
+ unsigned short type;
+ unsigned short code;
+ unsigned int value;
+};
+
+ 'time' is the timestamp, it returns the time at which the event happened.
+Type is for example EV_REL for relative moment, EV_KEY for a keypress or
+release. More types are defined in include/linux/input.h.
+
+ 'code' is event code, for example REL_X or KEY_BACKSPACE, again a complete
+list is in include/linux/input.h.
+
+ 'value' is the value the event carries. Either a relative change for
+EV_REL, absolute new value for EV_ABS (joysticks ...), or 0 for EV_KEY for
+release, 1 for keypress and 2 for autorepeat.
+
diff --git a/Documentation/input/interactive.fig b/Documentation/input/interactive.fig
new file mode 100644
index 000000000..1e7de3877
--- /dev/null
+++ b/Documentation/input/interactive.fig
@@ -0,0 +1,42 @@
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 0 -1 6.000 0 0 -1 0 0 6
+ 1200 3600 1800 3600 2400 4800 3000 4800 4200 5700 4800 5700
+2 2 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 5
+ 1200 3150 4800 3150 4800 6300 1200 6300 1200 3150
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 1200 4800 4800 4800
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4
+ 2400 4800 2400 6525 1950 7125 1950 7800
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4
+ 3000 4800 3000 6525 3600 7125 3600 7800
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 1 3
+ 0 0 1.00 60.00 120.00
+ 3825 5400 4125 5100 5400 5100
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 1 3
+ 0 0 1.00 60.00 120.00
+ 2100 4200 2400 3900 5400 3900
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 4800 5700 5400 5700
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 1800 3600 5400 3600
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 1 3
+ 0 0 1.00 60.00 120.00
+ 2700 4800 2700 4425 5400 4425
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 1950 7800 3600 7800
+4 1 0 50 0 0 12 0.0000 4 135 810 2775 7725 Dead band\001
+4 0 0 50 0 0 12 0.0000 4 180 1155 5400 5700 right saturation\001
+4 0 0 50 0 0 12 0.0000 4 135 1065 5400 3600 left saturation\001
+4 0 0 50 0 0 12 0.0000 4 180 2505 5400 3900 left coeff ( positive in that case )\001
+4 0 0 50 0 0 12 0.0000 4 180 2640 5475 5100 right coeff ( negative in that case )\001
+4 0 0 50 0 0 12 0.0000 4 105 480 5400 4425 center\001
diff --git a/Documentation/input/joystick-api.txt b/Documentation/input/joystick-api.txt
new file mode 100644
index 000000000..943b18eac
--- /dev/null
+++ b/Documentation/input/joystick-api.txt
@@ -0,0 +1,314 @@
+ Joystick API Documentation -*-Text-*-
+
+ Ragnar Hojland Espinosa
+ <ragnar@macula.net>
+
+ 7 Aug 1998
+
+1. Initialization
+~~~~~~~~~~~~~~~~~
+
+Open the joystick device following the usual semantics (that is, with open).
+Since the driver now reports events instead of polling for changes,
+immediately after the open it will issue a series of synthetic events
+(JS_EVENT_INIT) that you can read to check the initial state of the
+joystick.
+
+By default, the device is opened in blocking mode.
+
+ int fd = open ("/dev/input/js0", O_RDONLY);
+
+
+2. Event Reading
+~~~~~~~~~~~~~~~~
+
+ struct js_event e;
+ read (fd, &e, sizeof(e));
+
+where js_event is defined as
+
+ struct js_event {
+ __u32 time; /* event timestamp in milliseconds */
+ __s16 value; /* value */
+ __u8 type; /* event type */
+ __u8 number; /* axis/button number */
+ };
+
+If the read is successful, it will return sizeof(e), unless you wanted to read
+more than one event per read as described in section 3.1.
+
+
+2.1 js_event.type
+~~~~~~~~~~~~~~~~~
+
+The possible values of ``type'' are
+
+ #define JS_EVENT_BUTTON 0x01 /* button pressed/released */
+ #define JS_EVENT_AXIS 0x02 /* joystick moved */
+ #define JS_EVENT_INIT 0x80 /* initial state of device */
+
+As mentioned above, the driver will issue synthetic JS_EVENT_INIT ORed
+events on open. That is, if it's issuing a INIT BUTTON event, the
+current type value will be
+
+ int type = JS_EVENT_BUTTON | JS_EVENT_INIT; /* 0x81 */
+
+If you choose not to differentiate between synthetic or real events
+you can turn off the JS_EVENT_INIT bits
+
+ type &= ~JS_EVENT_INIT; /* 0x01 */
+
+
+2.2 js_event.number
+~~~~~~~~~~~~~~~~~~~
+
+The values of ``number'' correspond to the axis or button that
+generated the event. Note that they carry separate numeration (that
+is, you have both an axis 0 and a button 0). Generally,
+
+ number
+ 1st Axis X 0
+ 1st Axis Y 1
+ 2nd Axis X 2
+ 2nd Axis Y 3
+ ...and so on
+
+Hats vary from one joystick type to another. Some can be moved in 8
+directions, some only in 4, The driver, however, always reports a hat as two
+independent axis, even if the hardware doesn't allow independent movement.
+
+
+2.3 js_event.value
+~~~~~~~~~~~~~~~~~~
+
+For an axis, ``value'' is a signed integer between -32767 and +32767
+representing the position of the joystick along that axis. If you
+don't read a 0 when the joystick is `dead', or if it doesn't span the
+full range, you should recalibrate it (with, for example, jscal).
+
+For a button, ``value'' for a press button event is 1 and for a release
+button event is 0.
+
+Though this
+
+ if (js_event.type == JS_EVENT_BUTTON) {
+ buttons_state ^= (1 << js_event.number);
+ }
+
+may work well if you handle JS_EVENT_INIT events separately,
+
+ if ((js_event.type & ~JS_EVENT_INIT) == JS_EVENT_BUTTON) {
+ if (js_event.value)
+ buttons_state |= (1 << js_event.number);
+ else
+ buttons_state &= ~(1 << js_event.number);
+ }
+
+is much safer since it can't lose sync with the driver. As you would
+have to write a separate handler for JS_EVENT_INIT events in the first
+snippet, this ends up being shorter.
+
+
+2.4 js_event.time
+~~~~~~~~~~~~~~~~~
+
+The time an event was generated is stored in ``js_event.time''. It's a time
+in milliseconds since ... well, since sometime in the past. This eases the
+task of detecting double clicks, figuring out if movement of axis and button
+presses happened at the same time, and similar.
+
+
+3. Reading
+~~~~~~~~~~
+
+If you open the device in blocking mode, a read will block (that is,
+wait) forever until an event is generated and effectively read. There
+are two alternatives if you can't afford to wait forever (which is,
+admittedly, a long time;)
+
+ a) use select to wait until there's data to be read on fd, or
+ until it timeouts. There's a good example on the select(2)
+ man page.
+
+ b) open the device in non-blocking mode (O_NONBLOCK)
+
+
+3.1 O_NONBLOCK
+~~~~~~~~~~~~~~
+
+If read returns -1 when reading in O_NONBLOCK mode, this isn't
+necessarily a "real" error (check errno(3)); it can just mean there
+are no events pending to be read on the driver queue. You should read
+all events on the queue (that is, until you get a -1).
+
+For example,
+
+ while (1) {
+ while (read (fd, &e, sizeof(e)) > 0) {
+ process_event (e);
+ }
+ /* EAGAIN is returned when the queue is empty */
+ if (errno != EAGAIN) {
+ /* error */
+ }
+ /* do something interesting with processed events */
+ }
+
+One reason for emptying the queue is that if it gets full you'll start
+missing events since the queue is finite, and older events will get
+overwritten.
+
+The other reason is that you want to know all what happened, and not
+delay the processing till later.
+
+Why can get the queue full? Because you don't empty the queue as
+mentioned, or because too much time elapses from one read to another
+and too many events to store in the queue get generated. Note that
+high system load may contribute to space those reads even more.
+
+If time between reads is enough to fill the queue and lose an event,
+the driver will switch to startup mode and next time you read it,
+synthetic events (JS_EVENT_INIT) will be generated to inform you of
+the actual state of the joystick.
+
+[As for version 1.2.8, the queue is circular and able to hold 64
+ events. You can increment this size bumping up JS_BUFF_SIZE in
+ joystick.h and recompiling the driver.]
+
+
+In the above code, you might as well want to read more than one event
+at a time using the typical read(2) functionality. For that, you would
+replace the read above with something like
+
+ struct js_event mybuffer[0xff];
+ int i = read (fd, mybuffer, sizeof(mybuffer));
+
+In this case, read would return -1 if the queue was empty, or some
+other value in which the number of events read would be i /
+sizeof(js_event) Again, if the buffer was full, it's a good idea to
+process the events and keep reading it until you empty the driver queue.
+
+
+4. IOCTLs
+~~~~~~~~~
+
+The joystick driver defines the following ioctl(2) operations.
+
+ /* function 3rd arg */
+ #define JSIOCGAXES /* get number of axes char */
+ #define JSIOCGBUTTONS /* get number of buttons char */
+ #define JSIOCGVERSION /* get driver version int */
+ #define JSIOCGNAME(len) /* get identifier string char */
+ #define JSIOCSCORR /* set correction values &js_corr */
+ #define JSIOCGCORR /* get correction values &js_corr */
+
+For example, to read the number of axes
+
+ char number_of_axes;
+ ioctl (fd, JSIOCGAXES, &number_of_axes);
+
+
+4.1 JSIOGCVERSION
+~~~~~~~~~~~~~~~~~
+
+JSIOGCVERSION is a good way to check in run-time whether the running
+driver is 1.0+ and supports the event interface. If it is not, the
+IOCTL will fail. For a compile-time decision, you can test the
+JS_VERSION symbol
+
+ #ifdef JS_VERSION
+ #if JS_VERSION > 0xsomething
+
+
+4.2 JSIOCGNAME
+~~~~~~~~~~~~~~
+
+JSIOCGNAME(len) allows you to get the name string of the joystick - the same
+as is being printed at boot time. The 'len' argument is the length of the
+buffer provided by the application asking for the name. It is used to avoid
+possible overrun should the name be too long.
+
+ char name[128];
+ if (ioctl(fd, JSIOCGNAME(sizeof(name)), name) < 0)
+ strncpy(name, "Unknown", sizeof(name));
+ printf("Name: %s\n", name);
+
+
+4.3 JSIOC[SG]CORR
+~~~~~~~~~~~~~~~~~
+
+For usage on JSIOC[SG]CORR I suggest you to look into jscal.c They are
+not needed in a normal program, only in joystick calibration software
+such as jscal or kcmjoy. These IOCTLs and data types aren't considered
+to be in the stable part of the API, and therefore may change without
+warning in following releases of the driver.
+
+Both JSIOCSCORR and JSIOCGCORR expect &js_corr to be able to hold
+information for all axis. That is, struct js_corr corr[MAX_AXIS];
+
+struct js_corr is defined as
+
+ struct js_corr {
+ __s32 coef[8];
+ __u16 prec;
+ __u16 type;
+ };
+
+and ``type''
+
+ #define JS_CORR_NONE 0x00 /* returns raw values */
+ #define JS_CORR_BROKEN 0x01 /* broken line */
+
+
+5. Backward compatibility
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The 0.x joystick driver API is quite limited and its usage is deprecated.
+The driver offers backward compatibility, though. Here's a quick summary:
+
+ struct JS_DATA_TYPE js;
+ while (1) {
+ if (read (fd, &js, JS_RETURN) != JS_RETURN) {
+ /* error */
+ }
+ usleep (1000);
+ }
+
+As you can figure out from the example, the read returns immediately,
+with the actual state of the joystick.
+
+ struct JS_DATA_TYPE {
+ int buttons; /* immediate button state */
+ int x; /* immediate x axis value */
+ int y; /* immediate y axis value */
+ };
+
+and JS_RETURN is defined as
+
+ #define JS_RETURN sizeof(struct JS_DATA_TYPE)
+
+To test the state of the buttons,
+
+ first_button_state = js.buttons & 1;
+ second_button_state = js.buttons & 2;
+
+The axis values do not have a defined range in the original 0.x driver,
+except for that the values are non-negative. The 1.2.8+ drivers use a
+fixed range for reporting the values, 1 being the minimum, 128 the
+center, and 255 maximum value.
+
+The v0.8.0.2 driver also had an interface for 'digital joysticks', (now
+called Multisystem joysticks in this driver), under /dev/djsX. This driver
+doesn't try to be compatible with that interface.
+
+
+6. Final Notes
+~~~~~~~~~~~~~~
+
+____/| Comments, additions, and specially corrections are welcome.
+\ o.O| Documentation valid for at least version 1.2.8 of the joystick
+ =(_)= driver and as usual, the ultimate source for documentation is
+ U to "Use The Source Luke" or, at your convenience, Vojtech ;)
+
+ - Ragnar
+EOF
diff --git a/Documentation/input/joystick-parport.txt b/Documentation/input/joystick-parport.txt
new file mode 100644
index 000000000..56870c70a
--- /dev/null
+++ b/Documentation/input/joystick-parport.txt
@@ -0,0 +1,542 @@
+ Linux Joystick parport drivers v2.0
+ (c) 1998-2000 Vojtech Pavlik <vojtech@ucw.cz>
+ (c) 1998 Andree Borrmann <a.borrmann@tu-bs.de>
+ Sponsored by SuSE
+----------------------------------------------------------------------------
+
+0. Disclaimer
+~~~~~~~~~~~~~
+ Any information in this file is provided as-is, without any guarantee that
+it will be true. So, use it at your own risk. The possible damages that can
+happen include burning your parallel port, and/or the sticks and joystick
+and maybe even more. Like when a lightning kills you it is not our problem.
+
+1. Intro
+~~~~~~~~
+ The joystick parport drivers are used for joysticks and gamepads not
+originally designed for PCs and other computers Linux runs on. Because of
+that, PCs usually lack the right ports to connect these devices to. Parallel
+port, because of its ability to change single bits at will, and providing
+both output and input bits is the most suitable port on the PC for
+connecting such devices.
+
+2. Devices supported
+~~~~~~~~~~~~~~~~~~~~
+ Many console and 8-bit computer gamepads and joysticks are supported. The
+following subsections discuss usage of each.
+
+2.1 NES and SNES
+~~~~~~~~~~~~~~~~
+ The Nintendo Entertainment System and Super Nintendo Entertainment System
+gamepads are widely available, and easy to get. Also, they are quite easy to
+connect to a PC, and don't need much processing speed (108 us for NES and
+165 us for SNES, compared to about 1000 us for PC gamepads) to communicate
+with them.
+
+ All NES and SNES use the same synchronous serial protocol, clocked from
+the computer's side (and thus timing insensitive). To allow up to 5 NES
+and/or SNES gamepads and/or SNES mice connected to the parallel port at once,
+the output lines of the parallel port are shared, while one of 5 available
+input lines is assigned to each gamepad.
+
+ This protocol is handled by the gamecon.c driver, so that's the one
+you'll use for NES, SNES gamepads and SNES mice.
+
+ The main problem with PC parallel ports is that they don't have +5V power
+source on any of their pins. So, if you want a reliable source of power
+for your pads, use either keyboard or joystick port, and make a pass-through
+cable. You can also pull the power directly from the power supply (the red
+wire is +5V).
+
+ If you want to use the parallel port only, you can take the power is from
+some data pin. For most gamepad and parport implementations only one pin is
+needed, and I'd recommend pin 9 for that, the highest data bit. On the other
+hand, if you are not planning to use anything else than NES / SNES on the
+port, anything between and including pin 4 and pin 9 will work.
+
+(pin 9) -----> Power
+
+ Unfortunately, there are pads that need a lot more of power, and parallel
+ports that can't give much current through the data pins. If this is your
+case, you'll need to use diodes (as a prevention of destroying your parallel
+port), and combine the currents of two or more data bits together.
+
+ Diodes
+(pin 9) ----|>|-------+------> Power
+ |
+(pin 8) ----|>|-------+
+ |
+(pin 7) ----|>|-------+
+ |
+ <and so on> :
+ |
+(pin 4) ----|>|-------+
+
+ Ground is quite easy. On PC's parallel port the ground is on any of the
+pins from pin 18 to pin 25. So use any pin of these you like for the ground.
+
+(pin 18) -----> Ground
+
+ NES and SNES pads have two input bits, Clock and Latch, which drive the
+serial transfer. These are connected to pins 2 and 3 of the parallel port,
+respectively.
+
+(pin 2) -----> Clock
+(pin 3) -----> Latch
+
+ And the last thing is the NES / SNES data wire. Only that isn't shared and
+each pad needs its own data pin. The parallel port pins are:
+
+(pin 10) -----> Pad 1 data
+(pin 11) -----> Pad 2 data
+(pin 12) -----> Pad 3 data
+(pin 13) -----> Pad 4 data
+(pin 15) -----> Pad 5 data
+
+ Note that pin 14 is not used, since it is not an input pin on the parallel
+port.
+
+ This is everything you need on the PC's side of the connection, now on to
+the gamepads side. The NES and SNES have different connectors. Also, there
+are quite a lot of NES clones, and because Nintendo used proprietary
+connectors for their machines, the cloners couldn't and used standard D-Cannon
+connectors. Anyway, if you've got a gamepad, and it has buttons A, B, Turbo
+A, Turbo B, Select and Start, and is connected through 5 wires, then it is
+either a NES or NES clone and will work with this connection. SNES gamepads
+also use 5 wires, but have more buttons. They will work as well, of course.
+
+Pinout for NES gamepads Pinout for SNES gamepads and mice
+
+ +----> Power +-----------------------\
+ | 7 | o o o o | x x o | 1
+ 5 +---------+ 7 +-----------------------/
+ | x x o \ | | | | |
+ | o o o o | | | | | +-> Ground
+ 4 +------------+ 1 | | | +------------> Data
+ | | | | | | +---------------> Latch
+ | | | +-> Ground | +------------------> Clock
+ | | +----> Clock +---------------------> Power
+ | +-------> Latch
+ +----------> Data
+
+Pinout for NES clone (db9) gamepads Pinout for NES clone (db15) gamepads
+
+ +---------> Clock +-----------------> Data
+ | +-------> Latch | +---> Ground
+ | | +-----> Data | |
+ | | | ___________________
+ _____________ 8 \ o x x x x x x o / 1
+ 5 \ x o o o x / 1 \ o x x o x x o /
+ \ x o x o / 15 `~~~~~~~~~~~~~' 9
+ 9 `~~~~~~~' 6 | | |
+ | | | | +----> Clock
+ | +----> Power | +----------> Latch
+ +--------> Ground +----------------> Power
+
+2.2 Multisystem joysticks
+~~~~~~~~~~~~~~~~~~~~~~~~~
+ In the era of 8-bit machines, there was something like de-facto standard
+for joystick ports. They were all digital, and all used D-Cannon 9 pin
+connectors (db9). Because of that, a single joystick could be used without
+hassle on Atari (130, 800XE, 800XL, 2600, 7200), Amiga, Commodore C64,
+Amstrad CPC, Sinclair ZX Spectrum and many other machines. That's why these
+joysticks are called "Multisystem".
+
+ Now their pinout:
+
+ +---------> Right
+ | +-------> Left
+ | | +-----> Down
+ | | | +---> Up
+ | | | |
+ _____________
+5 \ x o o o o / 1
+ \ x o x o /
+ 9 `~~~~~~~' 6
+ | |
+ | +----> Button
+ +--------> Ground
+
+ However, as time passed, extensions to this standard developed, and these
+were not compatible with each other:
+
+
+ Atari 130, 800/XL/XE MSX
+
+ +-----------> Power
+ +---------> Right | +---------> Right
+ | +-------> Left | | +-------> Left
+ | | +-----> Down | | | +-----> Down
+ | | | +---> Up | | | | +---> Up
+ | | | | | | | | |
+ _____________ _____________
+5 \ x o o o o / 1 5 \ o o o o o / 1
+ \ x o o o / \ o o o o /
+ 9 `~~~~~~~' 6 9 `~~~~~~~' 6
+ | | | | | | |
+ | | +----> Button | | | +----> Button 1
+ | +------> Power | | +------> Button 2
+ +--------> Ground | +--------> Output 3
+ +----------> Ground
+
+ Amstrad CPC Commodore C64
+
+ +-----------> Analog Y
+ +---------> Right | +---------> Right
+ | +-------> Left | | +-------> Left
+ | | +-----> Down | | | +-----> Down
+ | | | +---> Up | | | | +---> Up
+ | | | | | | | | |
+ _____________ _____________
+5 \ x o o o o / 1 5 \ o o o o o / 1
+ \ x o o o / \ o o o o /
+ 9 `~~~~~~~' 6 9 `~~~~~~~' 6
+ | | | | | | |
+ | | +----> Button 1 | | | +----> Button
+ | +------> Button 2 | | +------> Power
+ +--------> Ground | +--------> Ground
+ +----------> Analog X
+
+ Sinclair Spectrum +2A/+3 Amiga 1200
+
+ +-----------> Up +-----------> Button 3
+ | +---------> Fire | +---------> Right
+ | | | | +-------> Left
+ | | +-----> Ground | | | +-----> Down
+ | | | | | | | +---> Up
+ | | | | | | | |
+ _____________ _____________
+5 \ o o x o x / 1 5 \ o o o o o / 1
+ \ o o o o / \ o o o o /
+ 9 `~~~~~~~' 6 9 `~~~~~~~' 6
+ | | | | | | | |
+ | | | +----> Right | | | +----> Button 1
+ | | +------> Left | | +------> Power
+ | +--------> Ground | +--------> Ground
+ +----------> Down +----------> Button 2
+
+ And there were many others.
+
+2.2.1 Multisystem joysticks using db9.c
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ For the Multisystem joysticks, and their derivatives, the db9.c driver
+was written. It allows only one joystick / gamepad per parallel port, but
+the interface is easy to build and works with almost anything.
+
+ For the basic 1-button Multisystem joystick you connect its wires to the
+parallel port like this:
+
+(pin 1) -----> Power
+(pin 18) -----> Ground
+
+(pin 2) -----> Up
+(pin 3) -----> Down
+(pin 4) -----> Left
+(pin 5) -----> Right
+(pin 6) -----> Button 1
+
+ However, if the joystick is switch based (eg. clicks when you move it),
+you might or might not, depending on your parallel port, need 10 kOhm pullup
+resistors on each of the direction and button signals, like this:
+
+(pin 2) ------------+------> Up
+ Resistor |
+(pin 1) --[10kOhm]--+
+
+ Try without, and if it doesn't work, add them. For TTL based joysticks /
+gamepads the pullups are not needed.
+
+ For joysticks with two buttons you connect the second button to pin 7 on
+the parallel port.
+
+(pin 7) -----> Button 2
+
+ And that's it.
+
+ On a side note, if you have already built a different adapter for use with
+the digital joystick driver 0.8.0.2, this is also supported by the db9.c
+driver, as device type 8. (See section 3.2)
+
+2.2.2 Multisystem joysticks using gamecon.c
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ For some people just one joystick per parallel port is not enough, and/or
+want to use them on one parallel port together with NES/SNES/PSX pads. This is
+possible using the gamecon.c. It supports up to 5 devices of the above types,
+including 1 and 2 buttons Multisystem joysticks.
+
+ However, there is nothing for free. To allow more sticks to be used at
+once, you need the sticks to be purely switch based (that is non-TTL), and
+not to need power. Just a plain simple six switches inside. If your
+joystick can do more (eg. turbofire) you'll need to disable it totally first
+if you want to use gamecon.c.
+
+ Also, the connection is a bit more complex. You'll need a bunch of diodes,
+and one pullup resistor. First, you connect the Directions and the button
+the same as for db9, however with the diodes between.
+
+ Diodes
+(pin 2) -----|<|----> Up
+(pin 3) -----|<|----> Down
+(pin 4) -----|<|----> Left
+(pin 5) -----|<|----> Right
+(pin 6) -----|<|----> Button 1
+
+ For two button sticks you also connect the other button.
+
+(pin 7) -----|<|----> Button 2
+
+ And finally, you connect the Ground wire of the joystick, like done in
+this little schematic to Power and Data on the parallel port, as described
+for the NES / SNES pads in section 2.1 of this file - that is, one data pin
+for each joystick. The power source is shared.
+
+Data ------------+-----> Ground
+ Resistor |
+Power --[10kOhm]--+
+
+ And that's all, here we go!
+
+2.2.3 Multisystem joysticks using turbografx.c
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The TurboGraFX interface, designed by
+
+ Steffen Schwenke <schwenke@burg-halle.de>
+
+ allows up to 7 Multisystem joysticks connected to the parallel port. In
+Steffen's version, there is support for up to 5 buttons per joystick. However,
+since this doesn't work reliably on all parallel ports, the turbografx.c driver
+supports only one button per joystick. For more information on how to build the
+interface, see
+
+ http://www2.burg-halle.de/~schwenke/parport.html
+
+2.3 Sony Playstation
+~~~~~~~~~~~~~~~~~~~~
+
+ The PSX controller is supported by the gamecon.c. Pinout of the PSX
+controller (compatible with DirectPadPro):
+
+ +---------+---------+---------+
+9 | o o o | o o o | o o o | 1 parallel
+ \________|_________|________/ port pins
+ | | | | | |
+ | | | | | +--------> Clock --- (4)
+ | | | | +------------> Select --- (3)
+ | | | +---------------> Power --- (5-9)
+ | | +------------------> Ground --- (18-25)
+ | +-------------------------> Command --- (2)
+ +----------------------------> Data --- (one of 10,11,12,13,15)
+
+ The driver supports these controllers:
+
+ * Standard PSX Pad
+ * NegCon PSX Pad
+ * Analog PSX Pad (red mode)
+ * Analog PSX Pad (green mode)
+ * PSX Rumble Pad
+ * PSX DDR Pad
+
+2.4 Sega
+~~~~~~~~
+ All the Sega controllers are more or less based on the standard 2-button
+Multisystem joystick. However, since they don't use switches and use TTL
+logic, the only driver usable with them is the db9.c driver.
+
+2.4.1 Sega Master System
+~~~~~~~~~~~~~~~~~~~~~~~~
+ The SMS gamepads are almost exactly the same as normal 2-button
+Multisystem joysticks. Set the driver to Multi2 mode, use the corresponding
+parallel port pins, and the following schematic:
+
+ +-----------> Power
+ | +---------> Right
+ | | +-------> Left
+ | | | +-----> Down
+ | | | | +---> Up
+ | | | | |
+ _____________
+5 \ o o o o o / 1
+ \ o o x o /
+ 9 `~~~~~~~' 6
+ | | |
+ | | +----> Button 1
+ | +--------> Ground
+ +----------> Button 2
+
+2.4.2 Sega Genesis aka MegaDrive
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The Sega Genesis (in Europe sold as Sega MegaDrive) pads are an extension
+to the Sega Master System pads. They use more buttons (3+1, 5+1, 6+1). Use
+the following schematic:
+
+ +-----------> Power
+ | +---------> Right
+ | | +-------> Left
+ | | | +-----> Down
+ | | | | +---> Up
+ | | | | |
+ _____________
+5 \ o o o o o / 1
+ \ o o o o /
+ 9 `~~~~~~~' 6
+ | | | |
+ | | | +----> Button 1
+ | | +------> Select
+ | +--------> Ground
+ +----------> Button 2
+
+ The Select pin goes to pin 14 on the parallel port.
+
+(pin 14) -----> Select
+
+ The rest is the same as for Multi2 joysticks using db9.c
+
+2.4.3 Sega Saturn
+~~~~~~~~~~~~~~~~~
+ Sega Saturn has eight buttons, and to transfer that, without hacks like
+Genesis 6 pads use, it needs one more select pin. Anyway, it is still
+handled by the db9.c driver. Its pinout is very different from anything
+else. Use this schematic:
+
+ +-----------> Select 1
+ | +---------> Power
+ | | +-------> Up
+ | | | +-----> Down
+ | | | | +---> Ground
+ | | | | |
+ _____________
+5 \ o o o o o / 1
+ \ o o o o /
+ 9 `~~~~~~~' 6
+ | | | |
+ | | | +----> Select 2
+ | | +------> Right
+ | +--------> Left
+ +----------> Power
+
+ Select 1 is pin 14 on the parallel port, Select 2 is pin 16 on the
+parallel port.
+
+(pin 14) -----> Select 1
+(pin 16) -----> Select 2
+
+ The other pins (Up, Down, Right, Left, Power, Ground) are the same as for
+Multi joysticks using db9.c
+
+3. The drivers
+~~~~~~~~~~~~~~
+ There are three drivers for the parallel port interfaces. Each, as
+described above, allows to connect a different group of joysticks and pads.
+Here are described their command lines:
+
+3.1 gamecon.c
+~~~~~~~~~~~~~
+ Using gamecon.c you can connect up to five devices to one parallel port. It
+uses the following kernel/module command line:
+
+ gamecon.map=port,pad1,pad2,pad3,pad4,pad5
+
+ Where 'port' the number of the parport interface (eg. 0 for parport0).
+
+ And 'pad1' to 'pad5' are pad types connected to different data input pins
+(10,11,12,13,15), as described in section 2.1 of this file.
+
+ The types are:
+
+ Type | Joystick/Pad
+ --------------------
+ 0 | None
+ 1 | SNES pad
+ 2 | NES pad
+ 4 | Multisystem 1-button joystick
+ 5 | Multisystem 2-button joystick
+ 6 | N64 pad
+ 7 | Sony PSX controller
+ 8 | Sony PSX DDR controller
+ 9 | SNES mouse
+
+ The exact type of the PSX controller type is autoprobed when used, so
+hot swapping should work (but is not recommended).
+
+ Should you want to use more than one of parallel ports at once, you can use
+gamecon.map2 and gamecon.map3 as additional command line parameters for two
+more parallel ports.
+
+ There are two options specific to PSX driver portion. gamecon.psx_delay sets
+the command delay when talking to the controllers. The default of 25 should
+work but you can try lowering it for better performance. If your pads don't
+respond try raising it until they work. Setting the type to 8 allows the
+driver to be used with Dance Dance Revolution or similar games. Arrow keys are
+registered as key presses instead of X and Y axes.
+
+3.2 db9.c
+~~~~~~~~~
+ Apart from making an interface, there is nothing difficult on using the
+db9.c driver. It uses the following kernel/module command line:
+
+ db9.dev=port,type
+
+ Where 'port' is the number of the parport interface (eg. 0 for parport0).
+
+ Caveat here: This driver only works on bidirectional parallel ports. If
+your parallel port is recent enough, you should have no trouble with this.
+Old parallel ports may not have this feature.
+
+ 'Type' is the type of joystick or pad attached:
+
+ Type | Joystick/Pad
+ --------------------
+ 0 | None
+ 1 | Multisystem 1-button joystick
+ 2 | Multisystem 2-button joystick
+ 3 | Genesis pad (3+1 buttons)
+ 5 | Genesis pad (5+1 buttons)
+ 6 | Genesis pad (6+2 buttons)
+ 7 | Saturn pad (8 buttons)
+ 8 | Multisystem 1-button joystick (v0.8.0.2 pin-out)
+ 9 | Two Multisystem 1-button joysticks (v0.8.0.2 pin-out)
+ 10 | Amiga CD32 pad
+
+ Should you want to use more than one of these joysticks/pads at once, you
+can use db9.dev2 and db9.dev3 as additional command line parameters for two
+more joysticks/pads.
+
+3.3 turbografx.c
+~~~~~~~~~~~~~~~~
+ The turbografx.c driver uses a very simple kernel/module command line:
+
+ turbografx.map=port,js1,js2,js3,js4,js5,js6,js7
+
+ Where 'port' is the number of the parport interface (eg. 0 for parport0).
+
+ 'jsX' is the number of buttons the Multisystem joysticks connected to the
+interface ports 1-7 have. For a standard multisystem joystick, this is 1.
+
+ Should you want to use more than one of these interfaces at once, you can
+use turbografx.map2 and turbografx.map3 as additional command line parameters
+for two more interfaces.
+
+3.4 PC parallel port pinout
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ .----------------------------------------.
+ At the PC: \ 13 12 11 10 9 8 7 6 5 4 3 2 1 /
+ \ 25 24 23 22 21 20 19 18 17 16 15 14 /
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Pin | Name | Description
+ ~~~~~~|~~~~~~~~~|~~~~~~~~~~
+ 1 | /STROBE | Strobe
+ 2-9 | D0-D7 | Data Bit 0-7
+ 10 | /ACK | Acknowledge
+ 11 | BUSY | Busy
+ 12 | PE | Paper End
+ 13 | SELIN | Select In
+ 14 | /AUTOFD | Autofeed
+ 15 | /ERROR | Error
+ 16 | /INIT | Initialize
+ 17 | /SEL | Select
+ 18-25 | GND | Signal Ground
+
+3.5 End
+~~~~~~~
+ That's all, folks! Have fun!
diff --git a/Documentation/input/joystick.txt b/Documentation/input/joystick.txt
new file mode 100644
index 000000000..8d027dc86
--- /dev/null
+++ b/Documentation/input/joystick.txt
@@ -0,0 +1,586 @@
+ Linux Joystick driver v2.0.0
+ (c) 1996-2000 Vojtech Pavlik <vojtech@ucw.cz>
+ Sponsored by SuSE
+----------------------------------------------------------------------------
+
+0. Disclaimer
+~~~~~~~~~~~~~
+ This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+ This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+ You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ Should you need to contact me, the author, you can do so either by e-mail
+- mail your message to <vojtech@ucw.cz>, or by paper mail: Vojtech Pavlik,
+Simunkova 1594, Prague 8, 182 00 Czech Republic
+
+ For your convenience, the GNU General Public License version 2 is included
+in the package: See the file COPYING.
+
+1. Intro
+~~~~~~~~
+ The joystick driver for Linux provides support for a variety of joysticks
+and similar devices. It is based on a larger project aiming to support all
+input devices in Linux.
+
+ Should you encounter any problems while using the driver, or joysticks
+this driver can't make complete use of, I'm very interested in hearing about
+them. Bug reports and success stories are also welcome.
+
+ The input project website is at:
+
+ http://atrey.karlin.mff.cuni.cz/~vojtech/input/
+
+ There is also a mailing list for the driver at:
+
+ listproc@atrey.karlin.mff.cuni.cz
+
+send "subscribe linux-joystick Your Name" to subscribe to it.
+
+2. Usage
+~~~~~~~~
+ For basic usage you just choose the right options in kernel config and
+you should be set.
+
+2.1 inpututils
+~~~~~~~~~~~~~~
+For testing and other purposes (for example serial devices), a set of
+utilities is available at the abovementioned website. I suggest you download
+and install it before going on.
+
+2.2 Device nodes
+~~~~~~~~~~~~~~~~
+For applications to be able to use the joysticks,
+you'll have to manually create these nodes in /dev:
+
+cd /dev
+rm js*
+mkdir input
+mknod input/js0 c 13 0
+mknod input/js1 c 13 1
+mknod input/js2 c 13 2
+mknod input/js3 c 13 3
+ln -s input/js0 js0
+ln -s input/js1 js1
+ln -s input/js2 js2
+ln -s input/js3 js3
+
+For testing with inpututils it's also convenient to create these:
+
+mknod input/event0 c 13 64
+mknod input/event1 c 13 65
+mknod input/event2 c 13 66
+mknod input/event3 c 13 67
+
+2.4 Modules needed
+~~~~~~~~~~~~~~~~~~
+ For all joystick drivers to function, you'll need the userland interface
+module in kernel, either loaded or compiled in:
+
+ modprobe joydev
+
+ For gameport joysticks, you'll have to load the gameport driver as well;
+
+ modprobe ns558
+
+ And for serial port joysticks, you'll need the serial input line
+discipline module loaded and the inputattach utility started:
+
+ modprobe serport
+ inputattach -xxx /dev/tts/X &
+
+ In addition to that, you'll need the joystick driver module itself, most
+usually you'll have an analog joystick:
+
+ modprobe analog
+
+ For automatic module loading, something like this might work - tailor to
+your needs:
+
+ alias tty-ldisc-2 serport
+ alias char-major-13 input
+ above input joydev ns558 analog
+ options analog map=gamepad,none,2btn
+
+2.5 Verifying that it works
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ For testing the joystick driver functionality, there is the jstest
+program in the utilities package. You run it by typing:
+
+ jstest /dev/input/js0
+
+ And it should show a line with the joystick values, which update as you
+move the stick, and press its buttons. The axes should all be zero when the
+joystick is in the center position. They should not jitter by themselves to
+other close values, and they also should be steady in any other position of
+the stick. They should have the full range from -32767 to 32767. If all this
+is met, then it's all fine, and you can play the games. :)
+
+ If it's not, then there might be a problem. Try to calibrate the joystick,
+and if it still doesn't work, read the drivers section of this file, the
+troubleshooting section, and the FAQ.
+
+2.6. Calibration
+~~~~~~~~~~~~~~~~
+ For most joysticks you won't need any manual calibration, since the
+joystick should be autocalibrated by the driver automagically. However, with
+some analog joysticks, that either do not use linear resistors, or if you
+want better precision, you can use the jscal program
+
+ jscal -c /dev/input/js0
+
+ included in the joystick package to set better correction coefficients than
+what the driver would choose itself.
+
+ After calibrating the joystick you can verify if you like the new
+calibration using the jstest command, and if you do, you then can save the
+correction coefficients into a file
+
+ jscal -p /dev/input/js0 > /etc/joystick.cal
+
+ And add a line to your rc script executing that file
+
+ source /etc/joystick.cal
+
+ This way, after the next reboot your joystick will remain calibrated. You
+can also add the jscal -p line to your shutdown script.
+
+
+3. HW specific driver information
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In this section each of the separate hardware specific drivers is described.
+
+3.1 Analog joysticks
+~~~~~~~~~~~~~~~~~~~~
+ The analog.c uses the standard analog inputs of the gameport, and thus
+supports all standard joysticks and gamepads. It uses a very advanced
+routine for this, allowing for data precision that can't be found on any
+other system.
+
+ It also supports extensions like additional hats and buttons compatible
+with CH Flightstick Pro, ThrustMaster FCS or 6 and 8 button gamepads. Saitek
+Cyborg 'digital' joysticks are also supported by this driver, because
+they're basically souped up CHF sticks.
+
+ However the only types that can be autodetected are:
+
+* 2-axis, 4-button joystick
+* 3-axis, 4-button joystick
+* 4-axis, 4-button joystick
+* Saitek Cyborg 'digital' joysticks
+
+ For other joystick types (more/less axes, hats, and buttons) support
+you'll need to specify the types either on the kernel command line or on the
+module command line, when inserting analog into the kernel. The
+parameters are:
+
+ analog.map=<type1>,<type2>,<type3>,....
+
+ 'type' is type of the joystick from the table below, defining joysticks
+present on gameports in the system, starting with gameport0, second 'type'
+entry defining joystick on gameport1 and so on.
+
+ Type | Meaning
+ -----------------------------------
+ none | No analog joystick on that port
+ auto | Autodetect joystick
+ 2btn | 2-button n-axis joystick
+ y-joy | Two 2-button 2-axis joysticks on an Y-cable
+ y-pad | Two 2-button 2-axis gamepads on an Y-cable
+ fcs | Thrustmaster FCS compatible joystick
+ chf | Joystick with a CH Flightstick compatible hat
+ fullchf | CH Flightstick compatible with two hats and 6 buttons
+ gamepad | 4/6-button n-axis gamepad
+ gamepad8 | 8-button 2-axis gamepad
+
+ In case your joystick doesn't fit in any of the above categories, you can
+specify the type as a number by combining the bits in the table below. This
+is not recommended unless you really know what are you doing. It's not
+dangerous, but not simple either.
+
+ Bit | Meaning
+ --------------------------
+ 0 | Axis X1
+ 1 | Axis Y1
+ 2 | Axis X2
+ 3 | Axis Y2
+ 4 | Button A
+ 5 | Button B
+ 6 | Button C
+ 7 | Button D
+ 8 | CHF Buttons X and Y
+ 9 | CHF Hat 1
+ 10 | CHF Hat 2
+ 11 | FCS Hat
+ 12 | Pad Button X
+ 13 | Pad Button Y
+ 14 | Pad Button U
+ 15 | Pad Button V
+ 16 | Saitek F1-F4 Buttons
+ 17 | Saitek Digital Mode
+ 19 | GamePad
+ 20 | Joy2 Axis X1
+ 21 | Joy2 Axis Y1
+ 22 | Joy2 Axis X2
+ 23 | Joy2 Axis Y2
+ 24 | Joy2 Button A
+ 25 | Joy2 Button B
+ 26 | Joy2 Button C
+ 27 | Joy2 Button D
+ 31 | Joy2 GamePad
+
+3.2 Microsoft SideWinder joysticks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Microsoft 'Digital Overdrive' protocol is supported by the sidewinder.c
+module. All currently supported joysticks:
+
+* Microsoft SideWinder 3D Pro
+* Microsoft SideWinder Force Feedback Pro
+* Microsoft SideWinder Force Feedback Wheel
+* Microsoft SideWinder FreeStyle Pro
+* Microsoft SideWinder GamePad (up to four, chained)
+* Microsoft SideWinder Precision Pro
+* Microsoft SideWinder Precision Pro USB
+
+ are autodetected, and thus no module parameters are needed.
+
+ There is one caveat with the 3D Pro. There are 9 buttons reported,
+although the joystick has only 8. The 9th button is the mode switch on the
+rear side of the joystick. However, moving it, you'll reset the joystick,
+and make it unresponsive for about a one third of a second. Furthermore, the
+joystick will also re-center itself, taking the position it was in during
+this time as a new center position. Use it if you want, but think first.
+
+ The SideWinder Standard is not a digital joystick, and thus is supported
+by the analog driver described above.
+
+3.3 Logitech ADI devices
+~~~~~~~~~~~~~~~~~~~~~~~~
+ Logitech ADI protocol is supported by the adi.c module. It should support
+any Logitech device using this protocol. This includes, but is not limited
+to:
+
+* Logitech CyberMan 2
+* Logitech ThunderPad Digital
+* Logitech WingMan Extreme Digital
+* Logitech WingMan Formula
+* Logitech WingMan Interceptor
+* Logitech WingMan GamePad
+* Logitech WingMan GamePad USB
+* Logitech WingMan GamePad Extreme
+* Logitech WingMan Extreme Digital 3D
+
+ ADI devices are autodetected, and the driver supports up to two (any
+combination of) devices on a single gameport, using an Y-cable or chained
+together.
+
+ Logitech WingMan Joystick, Logitech WingMan Attack, Logitech WingMan
+Extreme and Logitech WingMan ThunderPad are not digital joysticks and are
+handled by the analog driver described above. Logitech WingMan Warrior and
+Logitech Magellan are supported by serial drivers described below. Logitech
+WingMan Force and Logitech WingMan Formula Force are supported by the
+I-Force driver described below. Logitech CyberMan is not supported yet.
+
+3.4 Gravis GrIP
+~~~~~~~~~~~~~~~
+ Gravis GrIP protocol is supported by the grip.c module. It currently
+supports:
+
+* Gravis GamePad Pro
+* Gravis BlackHawk Digital
+* Gravis Xterminator
+* Gravis Xterminator DualControl
+
+ All these devices are autodetected, and you can even use any combination
+of up to two of these pads either chained together or using an Y-cable on a
+single gameport.
+
+GrIP MultiPort isn't supported yet. Gravis Stinger is a serial device and is
+supported by the stinger driver. Other Gravis joysticks are supported by the
+analog driver.
+
+3.5 FPGaming A3D and MadCatz A3D
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The Assassin 3D protocol created by FPGaming, is used both by FPGaming
+themselves and is licensed to MadCatz. A3D devices are supported by the
+a3d.c module. It currently supports:
+
+* FPGaming Assassin 3D
+* MadCatz Panther
+* MadCatz Panther XL
+
+ All these devices are autodetected. Because the Assassin 3D and the Panther
+allow connecting analog joysticks to them, you'll need to load the analog
+driver as well to handle the attached joysticks.
+
+ The trackball should work with USB mousedev module as a normal mouse. See
+the USB documentation for how to setup an USB mouse.
+
+3.6 ThrustMaster DirectConnect (BSP)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The TM DirectConnect (BSP) protocol is supported by the tmdc.c
+module. This includes, but is not limited to:
+
+* ThrustMaster Millennium 3D Interceptor
+* ThrustMaster 3D Rage Pad
+* ThrustMaster Fusion Digital Game Pad
+
+ Devices not directly supported, but hopefully working are:
+
+* ThrustMaster FragMaster
+* ThrustMaster Attack Throttle
+
+ If you have one of these, contact me.
+
+ TMDC devices are autodetected, and thus no parameters to the module
+are needed. Up to two TMDC devices can be connected to one gameport, using
+an Y-cable.
+
+3.7 Creative Labs Blaster
+~~~~~~~~~~~~~~~~~~~~~~~~~
+ The Blaster protocol is supported by the cobra.c module. It supports only
+the:
+
+* Creative Blaster GamePad Cobra
+
+ Up to two of these can be used on a single gameport, using an Y-cable.
+
+3.8 Genius Digital joysticks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The Genius digitally communicating joysticks are supported by the gf2k.c
+module. This includes:
+
+* Genius Flight2000 F-23 joystick
+* Genius Flight2000 F-31 joystick
+* Genius G-09D gamepad
+
+ Other Genius digital joysticks are not supported yet, but support can be
+added fairly easily.
+
+3.9 InterAct Digital joysticks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The InterAct digitally communicating joysticks are supported by the
+interact.c module. This includes:
+
+* InterAct HammerHead/FX gamepad
+* InterAct ProPad8 gamepad
+
+ Other InterAct digital joysticks are not supported yet, but support can be
+added fairly easily.
+
+3.10 PDPI Lightning 4 gamecards
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PDPI Lightning 4 gamecards are supported by the lightning.c module.
+Once the module is loaded, the analog driver can be used to handle the
+joysticks. Digitally communicating joystick will work only on port 0, while
+using Y-cables, you can connect up to 8 analog joysticks to a single L4
+card, 16 in case you have two in your system.
+
+3.11 Trident 4DWave / Aureal Vortex
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ Soundcards with a Trident 4DWave DX/NX or Aureal Vortex/Vortex2 chipsets
+provide an "Enhanced Game Port" mode where the soundcard handles polling the
+joystick. This mode is supported by the pcigame.c module. Once loaded the
+analog driver can use the enhanced features of these gameports..
+
+3.13 Crystal SoundFusion
+~~~~~~~~~~~~~~~~~~~~~~~~
+ Soundcards with Crystal SoundFusion chipsets provide an "Enhanced Game
+Port", much like the 4DWave or Vortex above. This, and also the normal mode
+for the port of the SoundFusion is supported by the cs461x.c module.
+
+3.14 SoundBlaster Live!
+~~~~~~~~~~~~~~~~~~~~~~~~
+ The Live! has a special PCI gameport, which, although it doesn't provide
+any "Enhanced" stuff like 4DWave and friends, is quite a bit faster than
+its ISA counterparts. It also requires special support, hence the
+emu10k1-gp.c module for it instead of the normal ns558.c one.
+
+3.15 SoundBlaster 64 and 128 - ES1370 and ES1371, ESS Solo1 and S3 SonicVibes
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ These PCI soundcards have specific gameports. They are handled by the
+sound drivers themselves. Make sure you select gameport support in the
+joystick menu and sound card support in the sound menu for your appropriate
+card.
+
+3.16 Amiga
+~~~~~~~~~~
+ Amiga joysticks, connected to an Amiga, are supported by the amijoy.c
+driver. Since they can't be autodetected, the driver has a command line.
+
+ amijoy.map=<a>,<b>
+
+ a and b define the joysticks connected to the JOY0DAT and JOY1DAT ports of
+the Amiga.
+
+ Value | Joystick type
+ ---------------------
+ 0 | None
+ 1 | 1-button digital joystick
+
+ No more joystick types are supported now, but that should change in the
+future if I get an Amiga in the reach of my fingers.
+
+3.17 Game console and 8-bit pads and joysticks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+See joystick-parport.txt for more info.
+
+3.18 SpaceTec/LabTec devices
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ SpaceTec serial devices communicate using the SpaceWare protocol. It is
+supported by the spaceorb.c and spaceball.c drivers. The devices currently
+supported by spaceorb.c are:
+
+* SpaceTec SpaceBall Avenger
+* SpaceTec SpaceOrb 360
+
+Devices currently supported by spaceball.c are:
+
+* SpaceTec SpaceBall 4000 FLX
+
+ In addition to having the spaceorb/spaceball and serport modules in the
+kernel, you also need to attach a serial port to it. to do that, run the
+inputattach program:
+
+ inputattach --spaceorb /dev/tts/x &
+or
+ inputattach --spaceball /dev/tts/x &
+
+where /dev/tts/x is the serial port which the device is connected to. After
+doing this, the device will be reported and will start working.
+
+ There is one caveat with the SpaceOrb. The button #6, the on the bottom
+side of the orb, although reported as an ordinary button, causes internal
+recentering of the spaceorb, moving the zero point to the position in which
+the ball is at the moment of pressing the button. So, think first before
+you bind it to some other function.
+
+SpaceTec SpaceBall 2003 FLX and 3003 FLX are not supported yet.
+
+3.19 Logitech SWIFT devices
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The SWIFT serial protocol is supported by the warrior.c module. It
+currently supports only the:
+
+* Logitech WingMan Warrior
+
+but in the future, Logitech CyberMan (the original one, not CM2) could be
+supported as well. To use the module, you need to run inputattach after you
+insert/compile the module into your kernel:
+
+ inputattach --warrior /dev/tts/x &
+
+/dev/tts/x is the serial port your Warrior is attached to.
+
+3.20 Magellan / Space Mouse
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The Magellan (or Space Mouse), manufactured by LogiCad3d (formerly Space
+Systems), for many other companies (Logitech, HP, ...) is supported by the
+joy-magellan module. It currently supports only the:
+
+* Magellan 3D
+* Space Mouse
+
+models, the additional buttons on the 'Plus' versions are not supported yet.
+
+ To use it, you need to attach the serial port to the driver using the
+
+ inputattach --magellan /dev/tts/x &
+
+command. After that the Magellan will be detected, initialized, will beep,
+and the /dev/input/jsX device should become usable.
+
+3.21 I-Force devices
+~~~~~~~~~~~~~~~~~~~~
+ All I-Force devices are supported by the iforce module. This includes:
+
+* AVB Mag Turbo Force
+* AVB Top Shot Pegasus
+* AVB Top Shot Force Feedback Racing Wheel
+* Logitech WingMan Force
+* Logitech WingMan Force Wheel
+* Guillemot Race Leader Force Feedback
+* Guillemot Force Feedback Racing Wheel
+* Thrustmaster Motor Sport GT
+
+ To use it, you need to attach the serial port to the driver using the
+
+ inputattach --iforce /dev/tts/x &
+
+command. After that the I-Force device will be detected, and the
+/dev/input/jsX device should become usable.
+
+ In case you're using the device via the USB port, the inputattach command
+isn't needed.
+
+ The I-Force driver now supports force feedback via the event interface.
+
+ Please note that Logitech WingMan *3D devices are _not_ supported by this
+module, rather by hid. Force feedback is not supported for those devices.
+Logitech gamepads are also hid devices.
+
+3.22 Gravis Stinger gamepad
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The Gravis Stinger serial port gamepad, designed for use with laptop
+computers, is supported by the stinger.c module. To use it, attach the
+serial port to the driver using:
+
+ inputattach --stinger /dev/tty/x &
+
+where x is the number of the serial port.
+
+4. Troubleshooting
+~~~~~~~~~~~~~~~~~~
+ There is quite a high probability that you run into some problems. For
+testing whether the driver works, if in doubt, use the jstest utility in
+some of its modes. The most useful modes are "normal" - for the 1.x
+interface, and "old" for the "0.x" interface. You run it by typing:
+
+ jstest --normal /dev/input/js0
+ jstest --old /dev/input/js0
+
+ Additionally you can do a test with the evtest utility:
+
+ evtest /dev/input/event0
+
+ Oh, and read the FAQ! :)
+
+5. FAQ
+~~~~~~
+Q: Running 'jstest /dev/input/js0' results in "File not found" error. What's the
+ cause?
+A: The device files don't exist. Create them (see section 2.2).
+
+Q: Is it possible to connect my old Atari/Commodore/Amiga/console joystick
+ or pad that uses a 9-pin D-type cannon connector to the serial port of my
+ PC?
+A: Yes, it is possible, but it'll burn your serial port or the pad. It
+ won't work, of course.
+
+Q: My joystick doesn't work with Quake / Quake 2. What's the cause?
+A: Quake / Quake 2 don't support joystick. Use joy2key to simulate keypresses
+ for them.
+
+6. Programming Interface
+~~~~~~~~~~~~~~~~~~~~~~~~
+ The 1.0 driver uses a new, event based approach to the joystick driver.
+Instead of the user program polling for the joystick values, the joystick
+driver now reports only any changes of its state. See joystick-api.txt,
+joystick.h and jstest.c included in the joystick package for more
+information. The joystick device can be used in either blocking or
+nonblocking mode and supports select() calls.
+
+ For backward compatibility the old (v0.x) interface is still included.
+Any call to the joystick driver using the old interface will return values
+that are compatible to the old interface. This interface is still limited
+to 2 axes, and applications using it usually decode only 2 buttons, although
+the driver provides up to 32.
diff --git a/Documentation/input/multi-touch-protocol.txt b/Documentation/input/multi-touch-protocol.txt
new file mode 100644
index 000000000..b85d000fa
--- /dev/null
+++ b/Documentation/input/multi-touch-protocol.txt
@@ -0,0 +1,418 @@
+Multi-touch (MT) Protocol
+-------------------------
+ Copyright (C) 2009-2010 Henrik Rydberg <rydberg@euromail.se>
+
+
+Introduction
+------------
+
+In order to utilize the full power of the new multi-touch and multi-user
+devices, a way to report detailed data from multiple contacts, i.e.,
+objects in direct contact with the device surface, is needed. This
+document describes the multi-touch (MT) protocol which allows kernel
+drivers to report details for an arbitrary number of contacts.
+
+The protocol is divided into two types, depending on the capabilities of the
+hardware. For devices handling anonymous contacts (type A), the protocol
+describes how to send the raw data for all contacts to the receiver. For
+devices capable of tracking identifiable contacts (type B), the protocol
+describes how to send updates for individual contacts via event slots.
+
+
+Protocol Usage
+--------------
+
+Contact details are sent sequentially as separate packets of ABS_MT
+events. Only the ABS_MT events are recognized as part of a contact
+packet. Since these events are ignored by current single-touch (ST)
+applications, the MT protocol can be implemented on top of the ST protocol
+in an existing driver.
+
+Drivers for type A devices separate contact packets by calling
+input_mt_sync() at the end of each packet. This generates a SYN_MT_REPORT
+event, which instructs the receiver to accept the data for the current
+contact and prepare to receive another.
+
+Drivers for type B devices separate contact packets by calling
+input_mt_slot(), with a slot as argument, at the beginning of each packet.
+This generates an ABS_MT_SLOT event, which instructs the receiver to
+prepare for updates of the given slot.
+
+All drivers mark the end of a multi-touch transfer by calling the usual
+input_sync() function. This instructs the receiver to act upon events
+accumulated since last EV_SYN/SYN_REPORT and prepare to receive a new set
+of events/packets.
+
+The main difference between the stateless type A protocol and the stateful
+type B slot protocol lies in the usage of identifiable contacts to reduce
+the amount of data sent to userspace. The slot protocol requires the use of
+the ABS_MT_TRACKING_ID, either provided by the hardware or computed from
+the raw data [5].
+
+For type A devices, the kernel driver should generate an arbitrary
+enumeration of the full set of anonymous contacts currently on the
+surface. The order in which the packets appear in the event stream is not
+important. Event filtering and finger tracking is left to user space [3].
+
+For type B devices, the kernel driver should associate a slot with each
+identified contact, and use that slot to propagate changes for the contact.
+Creation, replacement and destruction of contacts is achieved by modifying
+the ABS_MT_TRACKING_ID of the associated slot. A non-negative tracking id
+is interpreted as a contact, and the value -1 denotes an unused slot. A
+tracking id not previously present is considered new, and a tracking id no
+longer present is considered removed. Since only changes are propagated,
+the full state of each initiated contact has to reside in the receiving
+end. Upon receiving an MT event, one simply updates the appropriate
+attribute of the current slot.
+
+Some devices identify and/or track more contacts than they can report to the
+driver. A driver for such a device should associate one type B slot with each
+contact that is reported by the hardware. Whenever the identity of the
+contact associated with a slot changes, the driver should invalidate that
+slot by changing its ABS_MT_TRACKING_ID. If the hardware signals that it is
+tracking more contacts than it is currently reporting, the driver should use
+a BTN_TOOL_*TAP event to inform userspace of the total number of contacts
+being tracked by the hardware at that moment. The driver should do this by
+explicitly sending the corresponding BTN_TOOL_*TAP event and setting
+use_count to false when calling input_mt_report_pointer_emulation().
+The driver should only advertise as many slots as the hardware can report.
+Userspace can detect that a driver can report more total contacts than slots
+by noting that the largest supported BTN_TOOL_*TAP event is larger than the
+total number of type B slots reported in the absinfo for the ABS_MT_SLOT axis.
+
+The minimum value of the ABS_MT_SLOT axis must be 0.
+
+Protocol Example A
+------------------
+
+Here is what a minimal event sequence for a two-contact touch would look
+like for a type A device:
+
+ ABS_MT_POSITION_X x[0]
+ ABS_MT_POSITION_Y y[0]
+ SYN_MT_REPORT
+ ABS_MT_POSITION_X x[1]
+ ABS_MT_POSITION_Y y[1]
+ SYN_MT_REPORT
+ SYN_REPORT
+
+The sequence after moving one of the contacts looks exactly the same; the
+raw data for all present contacts are sent between every synchronization
+with SYN_REPORT.
+
+Here is the sequence after lifting the first contact:
+
+ ABS_MT_POSITION_X x[1]
+ ABS_MT_POSITION_Y y[1]
+ SYN_MT_REPORT
+ SYN_REPORT
+
+And here is the sequence after lifting the second contact:
+
+ SYN_MT_REPORT
+ SYN_REPORT
+
+If the driver reports one of BTN_TOUCH or ABS_PRESSURE in addition to the
+ABS_MT events, the last SYN_MT_REPORT event may be omitted. Otherwise, the
+last SYN_REPORT will be dropped by the input core, resulting in no
+zero-contact event reaching userland.
+
+
+Protocol Example B
+------------------
+
+Here is what a minimal event sequence for a two-contact touch would look
+like for a type B device:
+
+ ABS_MT_SLOT 0
+ ABS_MT_TRACKING_ID 45
+ ABS_MT_POSITION_X x[0]
+ ABS_MT_POSITION_Y y[0]
+ ABS_MT_SLOT 1
+ ABS_MT_TRACKING_ID 46
+ ABS_MT_POSITION_X x[1]
+ ABS_MT_POSITION_Y y[1]
+ SYN_REPORT
+
+Here is the sequence after moving contact 45 in the x direction:
+
+ ABS_MT_SLOT 0
+ ABS_MT_POSITION_X x[0]
+ SYN_REPORT
+
+Here is the sequence after lifting the contact in slot 0:
+
+ ABS_MT_TRACKING_ID -1
+ SYN_REPORT
+
+The slot being modified is already 0, so the ABS_MT_SLOT is omitted. The
+message removes the association of slot 0 with contact 45, thereby
+destroying contact 45 and freeing slot 0 to be reused for another contact.
+
+Finally, here is the sequence after lifting the second contact:
+
+ ABS_MT_SLOT 1
+ ABS_MT_TRACKING_ID -1
+ SYN_REPORT
+
+
+Event Usage
+-----------
+
+A set of ABS_MT events with the desired properties is defined. The events
+are divided into categories, to allow for partial implementation. The
+minimum set consists of ABS_MT_POSITION_X and ABS_MT_POSITION_Y, which
+allows for multiple contacts to be tracked. If the device supports it, the
+ABS_MT_TOUCH_MAJOR and ABS_MT_WIDTH_MAJOR may be used to provide the size
+of the contact area and approaching tool, respectively.
+
+The TOUCH and WIDTH parameters have a geometrical interpretation; imagine
+looking through a window at someone gently holding a finger against the
+glass. You will see two regions, one inner region consisting of the part
+of the finger actually touching the glass, and one outer region formed by
+the perimeter of the finger. The center of the touching region (a) is
+ABS_MT_POSITION_X/Y and the center of the approaching finger (b) is
+ABS_MT_TOOL_X/Y. The touch diameter is ABS_MT_TOUCH_MAJOR and the finger
+diameter is ABS_MT_WIDTH_MAJOR. Now imagine the person pressing the finger
+harder against the glass. The touch region will increase, and in general,
+the ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR, which is always smaller
+than unity, is related to the contact pressure. For pressure-based devices,
+ABS_MT_PRESSURE may be used to provide the pressure on the contact area
+instead. Devices capable of contact hovering can use ABS_MT_DISTANCE to
+indicate the distance between the contact and the surface.
+
+
+ Linux MT Win8
+ __________ _______________________
+ / \ | |
+ / \ | |
+ / ____ \ | |
+ / / \ \ | |
+ \ \ a \ \ | a |
+ \ \____/ \ | |
+ \ \ | |
+ \ b \ | b |
+ \ \ | |
+ \ \ | |
+ \ \ | |
+ \ / | |
+ \ / | |
+ \ / | |
+ \__________/ |_______________________|
+
+
+In addition to the MAJOR parameters, the oval shape of the touch and finger
+regions can be described by adding the MINOR parameters, such that MAJOR
+and MINOR are the major and minor axis of an ellipse. The orientation of
+the touch ellipse can be described with the ORIENTATION parameter, and the
+direction of the finger ellipse is given by the vector (a - b).
+
+For type A devices, further specification of the touch shape is possible
+via ABS_MT_BLOB_ID.
+
+The ABS_MT_TOOL_TYPE may be used to specify whether the touching tool is a
+finger or a pen or something else. Finally, the ABS_MT_TRACKING_ID event
+may be used to track identified contacts over time [5].
+
+In the type B protocol, ABS_MT_TOOL_TYPE and ABS_MT_TRACKING_ID are
+implicitly handled by input core; drivers should instead call
+input_mt_report_slot_state().
+
+
+Event Semantics
+---------------
+
+ABS_MT_TOUCH_MAJOR
+
+The length of the major axis of the contact. The length should be given in
+surface units. If the surface has an X times Y resolution, the largest
+possible value of ABS_MT_TOUCH_MAJOR is sqrt(X^2 + Y^2), the diagonal [4].
+
+ABS_MT_TOUCH_MINOR
+
+The length, in surface units, of the minor axis of the contact. If the
+contact is circular, this event can be omitted [4].
+
+ABS_MT_WIDTH_MAJOR
+
+The length, in surface units, of the major axis of the approaching
+tool. This should be understood as the size of the tool itself. The
+orientation of the contact and the approaching tool are assumed to be the
+same [4].
+
+ABS_MT_WIDTH_MINOR
+
+The length, in surface units, of the minor axis of the approaching
+tool. Omit if circular [4].
+
+The above four values can be used to derive additional information about
+the contact. The ratio ABS_MT_TOUCH_MAJOR / ABS_MT_WIDTH_MAJOR approximates
+the notion of pressure. The fingers of the hand and the palm all have
+different characteristic widths.
+
+ABS_MT_PRESSURE
+
+The pressure, in arbitrary units, on the contact area. May be used instead
+of TOUCH and WIDTH for pressure-based devices or any device with a spatial
+signal intensity distribution.
+
+ABS_MT_DISTANCE
+
+The distance, in surface units, between the contact and the surface. Zero
+distance means the contact is touching the surface. A positive number means
+the contact is hovering above the surface.
+
+ABS_MT_ORIENTATION
+
+The orientation of the touching ellipse. The value should describe a signed
+quarter of a revolution clockwise around the touch center. The signed value
+range is arbitrary, but zero should be returned for an ellipse aligned with
+the Y axis of the surface, a negative value when the ellipse is turned to
+the left, and a positive value when the ellipse is turned to the
+right. When completely aligned with the X axis, the range max should be
+returned.
+
+Touch ellipsis are symmetrical by default. For devices capable of true 360
+degree orientation, the reported orientation must exceed the range max to
+indicate more than a quarter of a revolution. For an upside-down finger,
+range max * 2 should be returned.
+
+Orientation can be omitted if the touch area is circular, or if the
+information is not available in the kernel driver. Partial orientation
+support is possible if the device can distinguish between the two axis, but
+not (uniquely) any values in between. In such cases, the range of
+ABS_MT_ORIENTATION should be [0, 1] [4].
+
+ABS_MT_POSITION_X
+
+The surface X coordinate of the center of the touching ellipse.
+
+ABS_MT_POSITION_Y
+
+The surface Y coordinate of the center of the touching ellipse.
+
+ABS_MT_TOOL_X
+
+The surface X coordinate of the center of the approaching tool. Omit if
+the device cannot distinguish between the intended touch point and the
+tool itself.
+
+ABS_MT_TOOL_Y
+
+The surface Y coordinate of the center of the approaching tool. Omit if the
+device cannot distinguish between the intended touch point and the tool
+itself.
+
+The four position values can be used to separate the position of the touch
+from the position of the tool. If both positions are present, the major
+tool axis points towards the touch point [1]. Otherwise, the tool axes are
+aligned with the touch axes.
+
+ABS_MT_TOOL_TYPE
+
+The type of approaching tool. A lot of kernel drivers cannot distinguish
+between different tool types, such as a finger or a pen. In such cases, the
+event should be omitted. The protocol currently supports MT_TOOL_FINGER,
+MT_TOOL_PEN, and MT_TOOL_PALM [2]. For type B devices, this event is handled
+by input core; drivers should instead use input_mt_report_slot_state().
+A contact's ABS_MT_TOOL_TYPE may change over time while still touching the
+device, because the firmware may not be able to determine which tool is being
+used when it first appears.
+
+ABS_MT_BLOB_ID
+
+The BLOB_ID groups several packets together into one arbitrarily shaped
+contact. The sequence of points forms a polygon which defines the shape of
+the contact. This is a low-level anonymous grouping for type A devices, and
+should not be confused with the high-level trackingID [5]. Most type A
+devices do not have blob capability, so drivers can safely omit this event.
+
+ABS_MT_TRACKING_ID
+
+The TRACKING_ID identifies an initiated contact throughout its life cycle
+[5]. The value range of the TRACKING_ID should be large enough to ensure
+unique identification of a contact maintained over an extended period of
+time. For type B devices, this event is handled by input core; drivers
+should instead use input_mt_report_slot_state().
+
+
+Event Computation
+-----------------
+
+The flora of different hardware unavoidably leads to some devices fitting
+better to the MT protocol than others. To simplify and unify the mapping,
+this section gives recipes for how to compute certain events.
+
+For devices reporting contacts as rectangular shapes, signed orientation
+cannot be obtained. Assuming X and Y are the lengths of the sides of the
+touching rectangle, here is a simple formula that retains the most
+information possible:
+
+ ABS_MT_TOUCH_MAJOR := max(X, Y)
+ ABS_MT_TOUCH_MINOR := min(X, Y)
+ ABS_MT_ORIENTATION := bool(X > Y)
+
+The range of ABS_MT_ORIENTATION should be set to [0, 1], to indicate that
+the device can distinguish between a finger along the Y axis (0) and a
+finger along the X axis (1).
+
+For win8 devices with both T and C coordinates, the position mapping is
+
+ ABS_MT_POSITION_X := T_X
+ ABS_MT_POSITION_Y := T_Y
+ ABS_MT_TOOL_X := C_X
+ ABS_MT_TOOL_X := C_Y
+
+Unfortunately, there is not enough information to specify both the touching
+ellipse and the tool ellipse, so one has to resort to approximations. One
+simple scheme, which is compatible with earlier usage, is:
+
+ ABS_MT_TOUCH_MAJOR := min(X, Y)
+ ABS_MT_TOUCH_MINOR := <not used>
+ ABS_MT_ORIENTATION := <not used>
+ ABS_MT_WIDTH_MAJOR := min(X, Y) + distance(T, C)
+ ABS_MT_WIDTH_MINOR := min(X, Y)
+
+Rationale: We have no information about the orientation of the touching
+ellipse, so approximate it with an inscribed circle instead. The tool
+ellipse should align with the vector (T - C), so the diameter must
+increase with distance(T, C). Finally, assume that the touch diameter is
+equal to the tool thickness, and we arrive at the formulas above.
+
+Finger Tracking
+---------------
+
+The process of finger tracking, i.e., to assign a unique trackingID to each
+initiated contact on the surface, is a Euclidian Bipartite Matching
+problem. At each event synchronization, the set of actual contacts is
+matched to the set of contacts from the previous synchronization. A full
+implementation can be found in [3].
+
+
+Gestures
+--------
+
+In the specific application of creating gesture events, the TOUCH and WIDTH
+parameters can be used to, e.g., approximate finger pressure or distinguish
+between index finger and thumb. With the addition of the MINOR parameters,
+one can also distinguish between a sweeping finger and a pointing finger,
+and with ORIENTATION, one can detect twisting of fingers.
+
+
+Notes
+-----
+
+In order to stay compatible with existing applications, the data reported
+in a finger packet must not be recognized as single-touch events.
+
+For type A devices, all finger data bypasses input filtering, since
+subsequent events of the same type refer to different fingers.
+
+For example usage of the type A protocol, see the bcm5974 driver. For
+example usage of the type B protocol, see the hid-egalax driver.
+
+[1] Also, the difference (TOOL_X - POSITION_X) can be used to model tilt.
+[2] The list can of course be extended.
+[3] The mtdev project: http://bitmath.org/code/mtdev/.
+[4] See the section on event computation.
+[5] See the section on finger tracking.
diff --git a/Documentation/input/notifier.txt b/Documentation/input/notifier.txt
new file mode 100644
index 000000000..95172ca6f
--- /dev/null
+++ b/Documentation/input/notifier.txt
@@ -0,0 +1,52 @@
+Keyboard notifier
+
+One can use register_keyboard_notifier to get called back on keyboard
+events (see kbd_keycode() function for details). The passed structure is
+keyboard_notifier_param:
+
+- 'vc' always provide the VC for which the keyboard event applies;
+- 'down' is 1 for a key press event, 0 for a key release;
+- 'shift' is the current modifier state, mask bit indexes are KG_*;
+- 'value' depends on the type of event.
+
+- KBD_KEYCODE events are always sent before other events, value is the keycode.
+- KBD_UNBOUND_KEYCODE events are sent if the keycode is not bound to a keysym.
+ value is the keycode.
+- KBD_UNICODE events are sent if the keycode -> keysym translation produced a
+ unicode character. value is the unicode value.
+- KBD_KEYSYM events are sent if the keycode -> keysym translation produced a
+ non-unicode character. value is the keysym.
+- KBD_POST_KEYSYM events are sent after the treatment of non-unicode keysyms.
+ That permits one to inspect the resulting LEDs for instance.
+
+For each kind of event but the last, the callback may return NOTIFY_STOP in
+order to "eat" the event: the notify loop is stopped and the keyboard event is
+dropped.
+
+In a rough C snippet, we have:
+
+kbd_keycode(keycode) {
+ ...
+ params.value = keycode;
+ if (notifier_call_chain(KBD_KEYCODE,&params) == NOTIFY_STOP)
+ || !bound) {
+ notifier_call_chain(KBD_UNBOUND_KEYCODE,&params);
+ return;
+ }
+
+ if (unicode) {
+ param.value = unicode;
+ if (notifier_call_chain(KBD_UNICODE,&params) == NOTIFY_STOP)
+ return;
+ emit unicode;
+ return;
+ }
+
+ params.value = keysym;
+ if (notifier_call_chain(KBD_KEYSYM,&params) == NOTIFY_STOP)
+ return;
+ apply keysym;
+ notifier_call_chain(KBD_POST_KEYSYM,&params);
+}
+
+NOTE: This notifier is usually called from interrupt context.
diff --git a/Documentation/input/ntrig.txt b/Documentation/input/ntrig.txt
new file mode 100644
index 000000000..be1fd981f
--- /dev/null
+++ b/Documentation/input/ntrig.txt
@@ -0,0 +1,126 @@
+N-Trig touchscreen Driver
+-------------------------
+ Copyright (c) 2008-2010 Rafi Rubin <rafi@seas.upenn.edu>
+ Copyright (c) 2009-2010 Stephane Chatty
+
+This driver provides support for N-Trig pen and multi-touch sensors. Single
+and multi-touch events are translated to the appropriate protocols for
+the hid and input systems. Pen events are sufficiently hid compliant and
+are left to the hid core. The driver also provides additional filtering
+and utility functions accessible with sysfs and module parameters.
+
+This driver has been reported to work properly with multiple N-Trig devices
+attached.
+
+
+Parameters
+----------
+
+Note: values set at load time are global and will apply to all applicable
+devices. Adjusting parameters with sysfs will override the load time values,
+but only for that one device.
+
+The following parameters are used to configure filters to reduce noise:
+
+activate_slack number of fingers to ignore before processing events
+
+activation_height size threshold to activate immediately
+activation_width
+
+min_height size threshold bellow which fingers are ignored
+min_width both to decide activation and during activity
+
+deactivate_slack the number of "no contact" frames to ignore before
+ propagating the end of activity events
+
+When the last finger is removed from the device, it sends a number of empty
+frames. By holding off on deactivation for a few frames we can tolerate false
+erroneous disconnects, where the sensor may mistakenly not detect a finger that
+is still present. Thus deactivate_slack addresses problems where a users might
+see breaks in lines during drawing, or drop an object during a long drag.
+
+
+Additional sysfs items
+----------------------
+
+These nodes just provide easy access to the ranges reported by the device.
+sensor_logical_height the range for positions reported during activity
+sensor_logical_width
+
+sensor_physical_height internal ranges not used for normal events but
+sensor_physical_width useful for tuning
+
+All N-Trig devices with product id of 1 report events in the ranges of
+X: 0-9600
+Y: 0-7200
+However not all of these devices have the same physical dimensions. Most
+seem to be 12" sensors (Dell Latitude XT and XT2 and the HP TX2), and
+at least one model (Dell Studio 17) has a 17" sensor. The ratio of physical
+to logical sizes is used to adjust the size based filter parameters.
+
+
+Filtering
+---------
+
+With the release of the early multi-touch firmwares it became increasingly
+obvious that these sensors were prone to erroneous events. Users reported
+seeing both inappropriately dropped contact and ghosts, contacts reported
+where no finger was actually touching the screen.
+
+Deactivation slack helps prevent dropped contact for single touch use, but does
+not address the problem of dropping one of more contacts while other contacts
+are still active. Drops in the multi-touch context require additional
+processing and should be handled in tandem with tacking.
+
+As observed ghost contacts are similar to actual use of the sensor, but they
+seem to have different profiles. Ghost activity typically shows up as small
+short lived touches. As such, I assume that the longer the continuous stream
+of events the more likely those events are from a real contact, and that the
+larger the size of each contact the more likely it is real. Balancing the
+goals of preventing ghosts and accepting real events quickly (to minimize
+user observable latency), the filter accumulates confidence for incoming
+events until it hits thresholds and begins propagating. In the interest in
+minimizing stored state as well as the cost of operations to make a decision,
+I've kept that decision simple.
+
+Time is measured in terms of the number of fingers reported, not frames since
+the probability of multiple simultaneous ghosts is expected to drop off
+dramatically with increasing numbers. Rather than accumulate weight as a
+function of size, I just use it as a binary threshold. A sufficiently large
+contact immediately overrides the waiting period and leads to activation.
+
+Setting the activation size thresholds to large values will result in deciding
+primarily on activation slack. If you see longer lived ghosts, turning up the
+activation slack while reducing the size thresholds may suffice to eliminate
+the ghosts while keeping the screen quite responsive to firm taps.
+
+Contacts continue to be filtered with min_height and min_width even after
+the initial activation filter is satisfied. The intent is to provide
+a mechanism for filtering out ghosts in the form of an extra finger while
+you actually are using the screen. In practice this sort of ghost has
+been far less problematic or relatively rare and I've left the defaults
+set to 0 for both parameters, effectively turning off that filter.
+
+I don't know what the optimal values are for these filters. If the defaults
+don't work for you, please play with the parameters. If you do find other
+values more comfortable, I would appreciate feedback.
+
+The calibration of these devices does drift over time. If ghosts or contact
+dropping worsen and interfere with the normal usage of your device, try
+recalibrating it.
+
+
+Calibration
+-----------
+
+The N-Trig windows tools provide calibration and testing routines. Also an
+unofficial unsupported set of user space tools including a calibrator is
+available at:
+http://code.launchpad.net/~rafi-seas/+junk/ntrig_calib
+
+
+Tracking
+--------
+
+As of yet, all tested N-Trig firmwares do not track fingers. When multiple
+contacts are active they seem to be sorted primarily by Y position.
diff --git a/Documentation/input/rotary-encoder.txt b/Documentation/input/rotary-encoder.txt
new file mode 100644
index 000000000..92e68bce1
--- /dev/null
+++ b/Documentation/input/rotary-encoder.txt
@@ -0,0 +1,121 @@
+rotary-encoder - a generic driver for GPIO connected devices
+Daniel Mack <daniel@caiaq.de>, Feb 2009
+
+0. Function
+-----------
+
+Rotary encoders are devices which are connected to the CPU or other
+peripherals with two wires. The outputs are phase-shifted by 90 degrees
+and by triggering on falling and rising edges, the turn direction can
+be determined.
+
+Some encoders have both outputs low in stable states, whereas others also have
+a stable state with both outputs high (half-period mode).
+
+The phase diagram of these two outputs look like this:
+
+ _____ _____ _____
+ | | | | | |
+ Channel A ____| |_____| |_____| |____
+
+ : : : : : : : : : : : :
+ __ _____ _____ _____
+ | | | | | | |
+ Channel B |_____| |_____| |_____| |__
+
+ : : : : : : : : : : : :
+ Event a b c d a b c d a b c d
+
+ |<-------->|
+ one step
+
+ |<-->|
+ one step (half-period mode)
+
+For more information, please see
+ http://en.wikipedia.org/wiki/Rotary_encoder
+
+
+1. Events / state machine
+-------------------------
+
+In half-period mode, state a) and c) above are used to determine the
+rotational direction based on the last stable state. Events are reported in
+states b) and d) given that the new stable state is different from the last
+(i.e. the rotation was not reversed half-way).
+
+Otherwise, the following apply:
+
+a) Rising edge on channel A, channel B in low state
+ This state is used to recognize a clockwise turn
+
+b) Rising edge on channel B, channel A in high state
+ When entering this state, the encoder is put into 'armed' state,
+ meaning that there it has seen half the way of a one-step transition.
+
+c) Falling edge on channel A, channel B in high state
+ This state is used to recognize a counter-clockwise turn
+
+d) Falling edge on channel B, channel A in low state
+ Parking position. If the encoder enters this state, a full transition
+ should have happened, unless it flipped back on half the way. The
+ 'armed' state tells us about that.
+
+2. Platform requirements
+------------------------
+
+As there is no hardware dependent call in this driver, the platform it is
+used with must support gpiolib. Another requirement is that IRQs must be
+able to fire on both edges.
+
+
+3. Board integration
+--------------------
+
+To use this driver in your system, register a platform_device with the
+name 'rotary-encoder' and associate the IRQs and some specific platform
+data with it.
+
+struct rotary_encoder_platform_data is declared in
+include/linux/rotary-encoder.h and needs to be filled with the number of
+steps the encoder has and can carry information about externally inverted
+signals (because of an inverting buffer or other reasons). The encoder
+can be set up to deliver input information as either an absolute or relative
+axes. For relative axes the input event returns +/-1 for each step. For
+absolute axes the position of the encoder can either roll over between zero
+and the number of steps or will clamp at the maximum and zero depending on
+the configuration.
+
+Because GPIO to IRQ mapping is platform specific, this information must
+be given in separately to the driver. See the example below.
+
+---------<snip>---------
+
+/* board support file example */
+
+#include <linux/input.h>
+#include <linux/rotary_encoder.h>
+
+#define GPIO_ROTARY_A 1
+#define GPIO_ROTARY_B 2
+
+static struct rotary_encoder_platform_data my_rotary_encoder_info = {
+ .steps = 24,
+ .axis = ABS_X,
+ .relative_axis = false,
+ .rollover = false,
+ .gpio_a = GPIO_ROTARY_A,
+ .gpio_b = GPIO_ROTARY_B,
+ .inverted_a = 0,
+ .inverted_b = 0,
+ .half_period = false,
+};
+
+static struct platform_device rotary_encoder_device = {
+ .name = "rotary-encoder",
+ .id = 0,
+ .dev = {
+ .platform_data = &my_rotary_encoder_info,
+ }
+};
+
diff --git a/Documentation/input/sentelic.txt b/Documentation/input/sentelic.txt
new file mode 100644
index 000000000..89251e2a3
--- /dev/null
+++ b/Documentation/input/sentelic.txt
@@ -0,0 +1,873 @@
+Copyright (C) 2002-2011 Sentelic Corporation.
+Last update: Dec-07-2011
+
+==============================================================================
+* Finger Sensing Pad Intellimouse Mode(scrolling wheel, 4th and 5th buttons)
+==============================================================================
+A) MSID 4: Scrolling wheel mode plus Forward page(4th button) and Backward
+ page (5th button)
+@1. Set sample rate to 200;
+@2. Set sample rate to 200;
+@3. Set sample rate to 80;
+@4. Issuing the "Get device ID" command (0xF2) and waits for the response;
+@5. FSP will respond 0x04.
+
+Packet 1
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|W|W|W|W|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7 => Y overflow
+ Bit6 => X overflow
+ Bit5 => Y sign bit
+ Bit4 => X sign bit
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X Movement(9-bit 2's complement integers)
+Byte 3: Y Movement(9-bit 2's complement integers)
+Byte 4: Bit3~Bit0 => the scrolling wheel's movement since the last data report.
+ valid values, -8 ~ +7
+ Bit4 => 1 = 4th mouse button is pressed, Forward one page.
+ 0 = 4th mouse button is not pressed.
+ Bit5 => 1 = 5th mouse button is pressed, Backward one page.
+ 0 = 5th mouse button is not pressed.
+
+B) MSID 6: Horizontal and Vertical scrolling.
+@ Set bit 1 in register 0x40 to 1
+
+# FSP replaces scrolling wheel's movement as 4 bits to show horizontal and
+ vertical scrolling.
+
+Packet 1
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |Y|X|y|x|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 | | |B|F|r|l|u|d|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7 => Y overflow
+ Bit6 => X overflow
+ Bit5 => Y sign bit
+ Bit4 => X sign bit
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X Movement(9-bit 2's complement integers)
+Byte 3: Y Movement(9-bit 2's complement integers)
+Byte 4: Bit0 => the Vertical scrolling movement downward.
+ Bit1 => the Vertical scrolling movement upward.
+ Bit2 => the Horizontal scrolling movement leftward.
+ Bit3 => the Horizontal scrolling movement rightward.
+ Bit4 => 1 = 4th mouse button is pressed, Forward one page.
+ 0 = 4th mouse button is not pressed.
+ Bit5 => 1 = 5th mouse button is pressed, Backward one page.
+ 0 = 5th mouse button is not pressed.
+
+C) MSID 7:
+# FSP uses 2 packets (8 Bytes) to represent Absolute Position.
+ so we have PACKET NUMBER to identify packets.
+ If PACKET NUMBER is 0, the packet is Packet 1.
+ If PACKET NUMBER is 1, the packet is Packet 2.
+ Please count this number in program.
+
+# MSID6 special packet will be enable at the same time when enable MSID 7.
+
+==============================================================================
+* Absolute position for STL3886-G0.
+==============================================================================
+@ Set bit 2 or 3 in register 0x40 to 1
+@ Set bit 6 in register 0x40 to 1
+
+Packet 1 (ABSOLUTE POSITION)
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|V|1|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|d|u|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordination packet
+ => 10, Notify packet
+ Bit5 => valid bit
+ Bit4 => 1
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit4 => scroll up
+ Bit5 => scroll down
+ Bit6 => scroll left
+ Bit7 => scroll right
+
+Notify Packet for G0
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |1|0|0|1|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |M|M|M|M|M|M|M|M| 4 |0|0|0|0|0|0|0|0|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordination packet
+ => 10, Notify packet
+ Bit5 => 0
+ Bit4 => 1
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: Message Type => 0x5A (Enable/Disable status packet)
+ Mode Type => 0xA5 (Normal/Icon mode status)
+Byte 3: Message Type => 0x00 (Disabled)
+ => 0x01 (Enabled)
+ Mode Type => 0x00 (Normal)
+ => 0x01 (Icon)
+Byte 4: Bit7~Bit0 => Don't Care
+
+==============================================================================
+* Absolute position for STL3888-Ax.
+==============================================================================
+Packet 1 (ABSOLUTE POSITION)
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|V|A|1|L|0|1| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordination packet
+ => 10, Notify packet
+ => 11, Normal data packet with on-pad click
+ Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
+ When both fingers are up, the last two reports have zero valid
+ bit.
+ Bit4 => arc
+ Bit3 => 1
+ Bit2 => Left Button, 1 is pressed, 0 is released.
+ Bit1 => 0
+ Bit0 => 1
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit5~Bit4 => y1_g
+ Bit7~Bit6 => x1_g
+
+Packet 2 (ABSOLUTE POSITION)
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|V|A|1|R|1|0| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |x|x|y|y|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordinates packet
+ => 10, Notify packet
+ => 11, Normal data packet with on-pad click
+ Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
+ When both fingers are up, the last two reports have zero valid
+ bit.
+ Bit4 => arc
+ Bit3 => 1
+ Bit2 => Right Button, 1 is pressed, 0 is released.
+ Bit1 => 1
+ Bit0 => 0
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit5~Bit4 => y2_g
+ Bit7~Bit6 => x2_g
+
+Notify Packet for STL3888-Ax
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|d|u|0|0|0|0|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordinates packet
+ => 10, Notify packet
+ => 11, Normal data packet with on-pad click
+ Bit5 => 1
+ Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1):
+ 0: left button is generated by the on-pad command
+ 1: left button is generated by the external button
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode)
+Byte 3: Bit7~Bit6 => Don't care
+ Bit5~Bit4 => Number of fingers
+ Bit3~Bit1 => Reserved
+ Bit0 => 1: enter gesture mode; 0: leaving gesture mode
+Byte 4: Bit7 => scroll right button
+ Bit6 => scroll left button
+ Bit5 => scroll down button
+ Bit4 => scroll up button
+ * Note that if gesture and additional button (Bit4~Bit7)
+ happen at the same time, the button information will not
+ be sent.
+ Bit3~Bit0 => Reserved
+
+Sample sequence of Multi-finger, Multi-coordinate mode:
+
+ notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1,
+ abs pkt 2, ..., notify packet (valid bit == 0)
+
+==============================================================================
+* Absolute position for STL3888-B0.
+==============================================================================
+Packet 1(ABSOLUTE POSITION)
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|V|F|1|0|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|u|d|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordinates packet
+ => 10, Notify packet
+ => 11, Normal data packet with on-pad click
+ Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
+ When both fingers are up, the last two reports have zero valid
+ bit.
+ Bit4 => finger up/down information. 1: finger down, 0: finger up.
+ Bit3 => 1
+ Bit2 => finger index, 0 is the first finger, 1 is the second finger.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit4 => scroll down button
+ Bit5 => scroll up button
+ Bit6 => scroll left button
+ Bit7 => scroll right button
+
+Packet 2 (ABSOLUTE POSITION)
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|V|F|1|1|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|u|d|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordination packet
+ => 10, Notify packet
+ => 11, Normal data packet with on-pad click
+ Bit5 => Valid bit, 0 means that the coordinate is invalid or finger up.
+ When both fingers are up, the last two reports have zero valid
+ bit.
+ Bit4 => finger up/down information. 1: finger down, 0: finger up.
+ Bit3 => 1
+ Bit2 => finger index, 0 is the first finger, 1 is the second finger.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit4 => scroll down button
+ Bit5 => scroll up button
+ Bit6 => scroll left button
+ Bit7 => scroll right button
+
+Notify Packet for STL3888-B0
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |1|0|1|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordination packet
+ => 10, Notify packet
+ => 11, Normal data packet with on-pad click
+ Bit5 => 1
+ Bit4 => when in absolute coordinates mode (valid when EN_PKT_GO is 1):
+ 0: left button is generated by the on-pad command
+ 1: left button is generated by the external button
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: Message Type => 0xB7 (Multi Finger, Multi Coordinate mode)
+Byte 3: Bit7~Bit6 => Don't care
+ Bit5~Bit4 => Number of fingers
+ Bit3~Bit1 => Reserved
+ Bit0 => 1: enter gesture mode; 0: leaving gesture mode
+Byte 4: Bit7 => scroll right button
+ Bit6 => scroll left button
+ Bit5 => scroll up button
+ Bit4 => scroll down button
+ * Note that if gesture and additional button(Bit4~Bit7)
+ happen at the same time, the button information will not
+ be sent.
+ Bit3~Bit0 => Reserved
+
+Sample sequence of Multi-finger, Multi-coordinate mode:
+
+ notify packet (valid bit == 1), abs pkt 1, abs pkt 2, abs pkt 1,
+ abs pkt 2, ..., notify packet (valid bit == 0)
+
+==============================================================================
+* Absolute position for STL3888-Cx and STL3888-Dx.
+==============================================================================
+Single Finger, Absolute Coordinate Mode (SFAC)
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|0|P|1|M|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordinates packet
+ => 10, Notify packet
+ Bit5 => Coordinate mode(always 0 in SFAC mode):
+ 0: single-finger absolute coordinates (SFAC) mode
+ 1: multi-finger, multiple coordinates (MFMC) mode
+ Bit4 => 0: The LEFT button is generated by on-pad command (OPC)
+ 1: The LEFT button is generated by external button
+ Default is 1 even if the LEFT button is not pressed.
+ Bit3 => Always 1, as specified by PS/2 protocol.
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit4 => 4th mouse button(forward one page)
+ Bit5 => 5th mouse button(backward one page)
+ Bit6 => scroll left button
+ Bit7 => scroll right button
+
+Multi Finger, Multiple Coordinates Mode (MFMC):
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |0|1|1|P|1|F|R|L| 2 |X|X|X|X|X|X|X|X| 3 |Y|Y|Y|Y|Y|Y|Y|Y| 4 |r|l|B|F|X|X|Y|Y|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordination packet
+ => 10, Notify packet
+ Bit5 => Coordinate mode (always 1 in MFMC mode):
+ 0: single-finger absolute coordinates (SFAC) mode
+ 1: multi-finger, multiple coordinates (MFMC) mode
+ Bit4 => 0: The LEFT button is generated by on-pad command (OPC)
+ 1: The LEFT button is generated by external button
+ Default is 1 even if the LEFT button is not pressed.
+ Bit3 => Always 1, as specified by PS/2 protocol.
+ Bit2 => Finger index, 0 is the first finger, 1 is the second finger.
+ If bit 1 and 0 are all 1 and bit 4 is 0, the middle external
+ button is pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: X coordinate (xpos[9:2])
+Byte 3: Y coordinate (ypos[9:2])
+Byte 4: Bit1~Bit0 => Y coordinate (xpos[1:0])
+ Bit3~Bit2 => X coordinate (ypos[1:0])
+ Bit4 => 4th mouse button(forward one page)
+ Bit5 => 5th mouse button(backward one page)
+ Bit6 => scroll left button
+ Bit7 => scroll right button
+
+ When one of the two fingers is up, the device will output four consecutive
+MFMC#0 report packets with zero X and Y to represent 1st finger is up or
+four consecutive MFMC#1 report packets with zero X and Y to represent that
+the 2nd finger is up. On the other hand, if both fingers are up, the device
+will output four consecutive single-finger, absolute coordinate(SFAC) packets
+with zero X and Y.
+
+Notify Packet for STL3888-Cx/Dx
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |1|0|0|P|1|M|R|L| 2 |C|C|C|C|C|C|C|C| 3 |0|0|F|F|0|0|0|i| 4 |r|l|u|d|0|0|0|0|
+ |---------------| |---------------| |---------------| |---------------|
+
+Byte 1: Bit7~Bit6 => 00, Normal data packet
+ => 01, Absolute coordinates packet
+ => 10, Notify packet
+ Bit5 => Always 0
+ Bit4 => 0: The LEFT button is generated by on-pad command(OPC)
+ 1: The LEFT button is generated by external button
+ Default is 1 even if the LEFT button is not pressed.
+ Bit3 => 1
+ Bit2 => Middle Button, 1 is pressed, 0 is not pressed.
+ Bit1 => Right Button, 1 is pressed, 0 is not pressed.
+ Bit0 => Left Button, 1 is pressed, 0 is not pressed.
+Byte 2: Message type:
+ 0xba => gesture information
+ 0xc0 => one finger hold-rotating gesture
+Byte 3: The first parameter for the received message:
+ 0xba => gesture ID (refer to the 'Gesture ID' section)
+ 0xc0 => region ID
+Byte 4: The second parameter for the received message:
+ 0xba => N/A
+ 0xc0 => finger up/down information
+
+Sample sequence of Multi-finger, Multi-coordinates mode:
+
+ notify packet (valid bit == 1), MFMC packet 1 (byte 1, bit 2 == 0),
+ MFMC packet 2 (byte 1, bit 2 == 1), MFMC packet 1, MFMC packet 2,
+ ..., notify packet (valid bit == 0)
+
+ That is, when the device is in MFMC mode, the host will receive
+ interleaved absolute coordinate packets for each finger.
+
+==============================================================================
+* FSP Enable/Disable packet
+==============================================================================
+ Bit 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
+BYTE |---------------|BYTE |---------------|BYTE|---------------|BYTE|---------------|
+ 1 |Y|X|0|0|1|M|R|L| 2 |0|1|0|1|1|0|1|E| 3 | | | | | | | | | 4 | | | | | | | | |
+ |---------------| |---------------| |---------------| |---------------|
+
+FSP will send out enable/disable packet when FSP receive PS/2 enable/disable
+command. Host will receive the packet which Middle, Right, Left button will
+be set. The packet only use byte 0 and byte 1 as a pattern of original packet.
+Ignore the other bytes of the packet.
+
+Byte 1: Bit7 => 0, Y overflow
+ Bit6 => 0, X overflow
+ Bit5 => 0, Y sign bit
+ Bit4 => 0, X sign bit
+ Bit3 => 1
+ Bit2 => 1, Middle Button
+ Bit1 => 1, Right Button
+ Bit0 => 1, Left Button
+Byte 2: Bit7~1 => (0101101b)
+ Bit0 => 1 = Enable
+ 0 = Disable
+Byte 3: Don't care
+Byte 4: Don't care (MOUSE ID 3, 4)
+Byte 5~8: Don't care (Absolute packet)
+
+==============================================================================
+* PS/2 Command Set
+==============================================================================
+
+FSP supports basic PS/2 commanding set and modes, refer to following URL for
+details about PS/2 commands:
+
+http://www.computer-engineering.org/ps2mouse/
+
+==============================================================================
+* Programming Sequence for Determining Packet Parsing Flow
+==============================================================================
+1. Identify FSP by reading device ID(0x00) and version(0x01) register
+
+2a. For FSP version < STL3888 Cx, determine number of buttons by reading
+ the 'test mode status' (0x20) register:
+
+ buttons = reg[0x20] & 0x30
+
+ if buttons == 0x30 or buttons == 0x20:
+ # two/four buttons
+ Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+ section A for packet parsing detail(ignore byte 4, bit ~ 7)
+ elif buttons == 0x10:
+ # 6 buttons
+ Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+ section B for packet parsing detail
+ elif buttons == 0x00:
+ # 6 buttons
+ Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+ section A for packet parsing detail
+
+2b. For FSP version >= STL3888 Cx:
+ Refer to 'Finger Sensing Pad PS/2 Mouse Intellimouse'
+ section A for packet parsing detail (ignore byte 4, bit ~ 7)
+
+==============================================================================
+* Programming Sequence for Register Reading/Writing
+==============================================================================
+
+Register inversion requirement:
+
+ Following values needed to be inverted(the '~' operator in C) before being
+sent to FSP:
+
+ 0xe8, 0xe9, 0xee, 0xf2, 0xf3 and 0xff.
+
+Register swapping requirement:
+
+ Following values needed to have their higher 4 bits and lower 4 bits being
+swapped before being sent to FSP:
+
+ 10, 20, 40, 60, 80, 100 and 200.
+
+Register reading sequence:
+
+ 1. send 0xf3 PS/2 command to FSP;
+
+ 2. send 0x66 PS/2 command to FSP;
+
+ 3. send 0x88 PS/2 command to FSP;
+
+ 4. send 0xf3 PS/2 command to FSP;
+
+ 5. if the register address being to read is not required to be
+ inverted(refer to the 'Register inversion requirement' section),
+ goto step 6
+
+ 5a. send 0x68 PS/2 command to FSP;
+
+ 5b. send the inverted register address to FSP and goto step 8;
+
+ 6. if the register address being to read is not required to be
+ swapped(refer to the 'Register swapping requirement' section),
+ goto step 7
+
+ 6a. send 0xcc PS/2 command to FSP;
+
+ 6b. send the swapped register address to FSP and goto step 8;
+
+ 7. send 0x66 PS/2 command to FSP;
+
+ 7a. send the original register address to FSP and goto step 8;
+
+ 8. send 0xe9(status request) PS/2 command to FSP;
+
+ 9. the 4th byte of the response read from FSP should be the
+ requested register value(?? indicates don't care byte):
+
+ host: 0xe9
+ 3888: 0xfa (??) (??) (val)
+
+ * Note that since the Cx release, the hardware will return 1's
+ complement of the register value at the 3rd byte of status request
+ result:
+
+ host: 0xe9
+ 3888: 0xfa (??) (~val) (val)
+
+Register writing sequence:
+
+ 1. send 0xf3 PS/2 command to FSP;
+
+ 2. if the register address being to write is not required to be
+ inverted(refer to the 'Register inversion requirement' section),
+ goto step 3
+
+ 2a. send 0x74 PS/2 command to FSP;
+
+ 2b. send the inverted register address to FSP and goto step 5;
+
+ 3. if the register address being to write is not required to be
+ swapped(refer to the 'Register swapping requirement' section),
+ goto step 4
+
+ 3a. send 0x77 PS/2 command to FSP;
+
+ 3b. send the swapped register address to FSP and goto step 5;
+
+ 4. send 0x55 PS/2 command to FSP;
+
+ 4a. send the register address to FSP and goto step 5;
+
+ 5. send 0xf3 PS/2 command to FSP;
+
+ 6. if the register value being to write is not required to be
+ inverted(refer to the 'Register inversion requirement' section),
+ goto step 7
+
+ 6a. send 0x47 PS/2 command to FSP;
+
+ 6b. send the inverted register value to FSP and goto step 9;
+
+ 7. if the register value being to write is not required to be
+ swapped(refer to the 'Register swapping requirement' section),
+ goto step 8
+
+ 7a. send 0x44 PS/2 command to FSP;
+
+ 7b. send the swapped register value to FSP and goto step 9;
+
+ 8. send 0x33 PS/2 command to FSP;
+
+ 8a. send the register value to FSP;
+
+ 9. the register writing sequence is completed.
+
+ * Note that since the Cx release, the hardware will return 1's
+ complement of the register value at the 3rd byte of status request
+ result. Host can optionally send another 0xe9 (status request) PS/2
+ command to FSP at the end of register writing to verify that the
+ register writing operation is successful (?? indicates don't care
+ byte):
+
+ host: 0xe9
+ 3888: 0xfa (??) (~val) (val)
+
+==============================================================================
+* Programming Sequence for Page Register Reading/Writing
+==============================================================================
+
+ In order to overcome the limitation of maximum number of registers
+supported, the hardware separates register into different groups called
+'pages.' Each page is able to include up to 255 registers.
+
+ The default page after power up is 0x82; therefore, if one has to get
+access to register 0x8301, one has to use following sequence to switch
+to page 0x83, then start reading/writing from/to offset 0x01 by using
+the register read/write sequence described in previous section.
+
+Page register reading sequence:
+
+ 1. send 0xf3 PS/2 command to FSP;
+
+ 2. send 0x66 PS/2 command to FSP;
+
+ 3. send 0x88 PS/2 command to FSP;
+
+ 4. send 0xf3 PS/2 command to FSP;
+
+ 5. send 0x83 PS/2 command to FSP;
+
+ 6. send 0x88 PS/2 command to FSP;
+
+ 7. send 0xe9(status request) PS/2 command to FSP;
+
+ 8. the response read from FSP should be the requested page value.
+
+Page register writing sequence:
+
+ 1. send 0xf3 PS/2 command to FSP;
+
+ 2. send 0x38 PS/2 command to FSP;
+
+ 3. send 0x88 PS/2 command to FSP;
+
+ 4. send 0xf3 PS/2 command to FSP;
+
+ 5. if the page address being written is not required to be
+ inverted(refer to the 'Register inversion requirement' section),
+ goto step 6
+
+ 5a. send 0x47 PS/2 command to FSP;
+
+ 5b. send the inverted page address to FSP and goto step 9;
+
+ 6. if the page address being written is not required to be
+ swapped(refer to the 'Register swapping requirement' section),
+ goto step 7
+
+ 6a. send 0x44 PS/2 command to FSP;
+
+ 6b. send the swapped page address to FSP and goto step 9;
+
+ 7. send 0x33 PS/2 command to FSP;
+
+ 8. send the page address to FSP;
+
+ 9. the page register writing sequence is completed.
+
+==============================================================================
+* Gesture ID
+==============================================================================
+
+ Unlike other devices which sends multiple fingers' coordinates to host,
+FSP processes multiple fingers' coordinates internally and convert them
+into a 8 bits integer, namely 'Gesture ID.' Following is a list of
+supported gesture IDs:
+
+ ID Description
+ 0x86 2 finger straight up
+ 0x82 2 finger straight down
+ 0x80 2 finger straight right
+ 0x84 2 finger straight left
+ 0x8f 2 finger zoom in
+ 0x8b 2 finger zoom out
+ 0xc0 2 finger curve, counter clockwise
+ 0xc4 2 finger curve, clockwise
+ 0x2e 3 finger straight up
+ 0x2a 3 finger straight down
+ 0x28 3 finger straight right
+ 0x2c 3 finger straight left
+ 0x38 palm
+
+==============================================================================
+* Register Listing
+==============================================================================
+
+ Registers are represented in 16 bits values. The higher 8 bits represent
+the page address and the lower 8 bits represent the relative offset within
+that particular page. Refer to the 'Programming Sequence for Page Register
+Reading/Writing' section for instructions on how to change current page
+address.
+
+offset width default r/w name
+0x8200 bit7~bit0 0x01 RO device ID
+
+0x8201 bit7~bit0 RW version ID
+ 0xc1: STL3888 Ax
+ 0xd0 ~ 0xd2: STL3888 Bx
+ 0xe0 ~ 0xe1: STL3888 Cx
+ 0xe2 ~ 0xe3: STL3888 Dx
+
+0x8202 bit7~bit0 0x01 RO vendor ID
+
+0x8203 bit7~bit0 0x01 RO product ID
+
+0x8204 bit3~bit0 0x01 RW revision ID
+
+0x820b test mode status 1
+ bit3 1 RO 0: rotate 180 degree
+ 1: no rotation
+ *only supported by H/W prior to Cx
+
+0x820f register file page control
+ bit2 0 RW 1: rotate 180 degree
+ 0: no rotation
+ *supported since Cx
+
+ bit0 0 RW 1 to enable page 1 register files
+ *only supported by H/W prior to Cx
+
+0x8210 RW system control 1
+ bit0 1 RW Reserved, must be 1
+ bit1 0 RW Reserved, must be 0
+ bit4 0 RW Reserved, must be 0
+ bit5 1 RW register clock gating enable
+ 0: read only, 1: read/write enable
+ (Note that following registers does not require clock gating being
+ enabled prior to write: 05 06 07 08 09 0c 0f 10 11 12 16 17 18 23 2e
+ 40 41 42 43. In addition to that, this bit must be 1 when gesture
+ mode is enabled)
+
+0x8220 test mode status
+ bit5~bit4 RO number of buttons
+ 11 => 2, lbtn/rbtn
+ 10 => 4, lbtn/rbtn/scru/scrd
+ 01 => 6, lbtn/rbtn/scru/scrd/scrl/scrr
+ 00 => 6, lbtn/rbtn/scru/scrd/fbtn/bbtn
+ *only supported by H/W prior to Cx
+
+0x8231 RW on-pad command detection
+ bit7 0 RW on-pad command left button down tag
+ enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+0x8234 RW on-pad command control 5
+ bit4~bit0 0x05 RW XLO in 0s/4/1, so 03h = 0010.1b = 2.5
+ (Note that position unit is in 0.5 scanline)
+ *only supported by H/W prior to Cx
+
+ bit7 0 RW on-pad tap zone enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+0x8235 RW on-pad command control 6
+ bit4~bit0 0x1d RW XHI in 0s/4/1, so 19h = 1100.1b = 12.5
+ (Note that position unit is in 0.5 scanline)
+ *only supported by H/W prior to Cx
+
+0x8236 RW on-pad command control 7
+ bit4~bit0 0x04 RW YLO in 0s/4/1, so 03h = 0010.1b = 2.5
+ (Note that position unit is in 0.5 scanline)
+ *only supported by H/W prior to Cx
+
+0x8237 RW on-pad command control 8
+ bit4~bit0 0x13 RW YHI in 0s/4/1, so 11h = 1000.1b = 8.5
+ (Note that position unit is in 0.5 scanline)
+ *only supported by H/W prior to Cx
+
+0x8240 RW system control 5
+ bit1 0 RW FSP Intellimouse mode enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+ bit2 0 RW movement + abs. coordinate mode enable
+ 0: disable, 1: enable
+ (Note that this function has the functionality of bit 1 even when
+ bit 1 is not set. However, the format is different from that of bit 1.
+ In addition, when bit 1 and bit 2 are set at the same time, bit 2 will
+ override bit 1.)
+ *only supported by H/W prior to Cx
+
+ bit3 0 RW abs. coordinate only mode enable
+ 0: disable, 1: enable
+ (Note that this function has the functionality of bit 1 even when
+ bit 1 is not set. However, the format is different from that of bit 1.
+ In addition, when bit 1, bit 2 and bit 3 are set at the same time,
+ bit 3 will override bit 1 and 2.)
+ *only supported by H/W prior to Cx
+
+ bit5 0 RW auto switch enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+ bit6 0 RW G0 abs. + notify packet format enable
+ 0: disable, 1: enable
+ (Note that the absolute/relative coordinate output still depends on
+ bit 2 and 3. That is, if any of those bit is 1, host will receive
+ absolute coordinates; otherwise, host only receives packets with
+ relative coordinate.)
+ *only supported by H/W prior to Cx
+
+ bit7 0 RW EN_PS2_F2: PS/2 gesture mode 2nd
+ finger packet enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+0x8243 RW on-pad control
+ bit0 0 RW on-pad control enable
+ 0: disable, 1: enable
+ (Note that if this bit is cleared, bit 3/5 will be ineffective)
+ *only supported by H/W prior to Cx
+
+ bit3 0 RW on-pad fix vertical scrolling enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+ bit5 0 RW on-pad fix horizontal scrolling enable
+ 0: disable, 1: enable
+ *only supported by H/W prior to Cx
+
+0x8290 RW software control register 1
+ bit0 0 RW absolute coordination mode
+ 0: disable, 1: enable
+ *supported since Cx
+
+ bit1 0 RW gesture ID output
+ 0: disable, 1: enable
+ *supported since Cx
+
+ bit2 0 RW two fingers' coordinates output
+ 0: disable, 1: enable
+ *supported since Cx
+
+ bit3 0 RW finger up one packet output
+ 0: disable, 1: enable
+ *supported since Cx
+
+ bit4 0 RW absolute coordination continuous mode
+ 0: disable, 1: enable
+ *supported since Cx
+
+ bit6~bit5 00 RW gesture group selection
+ 00: basic
+ 01: suite
+ 10: suite pro
+ 11: advanced
+ *supported since Cx
+
+ bit7 0 RW Bx packet output compatible mode
+ 0: disable, 1: enable *supported since Cx
+ *supported since Cx
+
+
+0x833d RW on-pad command control 1
+ bit7 1 RW on-pad command detection enable
+ 0: disable, 1: enable
+ *supported since Cx
+
+0x833e RW on-pad command detection
+ bit7 0 RW on-pad command left button down tag
+ enable. Works only in H/W based PS/2
+ data packet mode.
+ 0: disable, 1: enable
+ *supported since Cx
diff --git a/Documentation/input/shape.fig b/Documentation/input/shape.fig
new file mode 100644
index 000000000..c22bff83d
--- /dev/null
+++ b/Documentation/input/shape.fig
@@ -0,0 +1,65 @@
+#FIG 3.2
+Landscape
+Center
+Inches
+Letter
+100.00
+Single
+-2
+1200 2
+2 1 0 2 0 7 50 0 -1 0.000 0 0 -1 0 0 6
+ 4200 3600 4200 3075 4950 2325 7425 2325 8250 3150 8250 3600
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 4200 3675 4200 5400
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 8250 3675 8250 5400
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 3675 3600 8700 3600
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 8775 3600 10200 3600
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 8325 3150 9075 3150
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 7500 2325 10200 2325
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 3600 3600 3000 3600
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 4125 3075 3000 3075
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 4200 5400 8175 5400
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 10125 2325 10125 3600
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 3000 3150 3000 3600
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 9075 3150 9075 3600
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 4950 2325 4950 1200
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 2
+ 7425 2325 7425 1200
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4
+ 4200 3075 4200 2400 3600 1800 3600 1200
+2 1 1 1 0 7 50 0 -1 4.000 0 0 -1 0 0 4
+ 8250 3150 8250 2475 8775 1950 8775 1200
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 3600 1275 4950 1275
+2 1 0 1 0 7 50 0 -1 4.000 0 0 -1 1 1 2
+ 0 0 1.00 60.00 120.00
+ 0 0 1.00 60.00 120.00
+ 7425 1275 8700 1275
+4 1 0 50 0 0 12 0.0000 4 135 1140 6075 5325 Effect duration\001
+4 0 0 50 0 0 12 0.0000 4 180 1305 10200 3000 Effect magnitude\001
+4 0 0 50 0 0 12 0.0000 4 135 780 9150 3450 Fade level\001
+4 1 0 50 0 0 12 0.0000 4 180 1035 4275 1200 Attack length\001
+4 1 0 50 0 0 12 0.0000 4 180 885 8175 1200 Fade length\001
+4 2 0 50 0 0 12 0.0000 4 135 930 2925 3375 Attack level\001
diff --git a/Documentation/input/walkera0701.txt b/Documentation/input/walkera0701.txt
new file mode 100644
index 000000000..49e3ac60d
--- /dev/null
+++ b/Documentation/input/walkera0701.txt
@@ -0,0 +1,109 @@
+
+Walkera WK-0701 transmitter is supplied with a ready to fly Walkera
+helicopters such as HM36, HM37, HM60. The walkera0701 module enables to use
+this transmitter as joystick
+
+Devel homepage and download:
+http://zub.fei.tuke.sk/walkera-wk0701/
+
+or use cogito:
+cg-clone http://zub.fei.tuke.sk/GIT/walkera0701-joystick
+
+
+Connecting to PC:
+
+At back side of transmitter S-video connector can be found. Modulation
+pulses from processor to HF part can be found at pin 2 of this connector,
+pin 3 is GND. Between pin 3 and CPU 5k6 resistor can be found. To get
+modulation pulses to PC, signal pulses must be amplified.
+
+Cable: (walkera TX to parport)
+
+Walkera WK-0701 TX S-VIDEO connector:
+ (back side of TX)
+ __ __ S-video: canon25
+ / |_| \ pin 2 (signal) NPN parport
+ / O 4 3 O \ pin 3 (GND) LED ________________ 10 ACK
+ ( O 2 1 O ) | C
+ \ ___ / 2 ________________________|\|_____|/
+ | [___] | |/| B |\
+ ------- 3 __________________________________|________________ 25 GND
+ E
+
+
+I use green LED and BC109 NPN transistor.
+
+Software:
+
+Build kernel with walkera0701 module. Module walkera0701 need exclusive
+access to parport, modules like lp must be unloaded before loading
+walkera0701 module, check dmesg for error messages. Connect TX to PC by
+cable and run jstest /dev/input/js0 to see values from TX. If no value can
+be changed by TX "joystick", check output from /proc/interrupts. Value for
+(usually irq7) parport must increase if TX is on.
+
+
+
+Technical details:
+
+Driver use interrupt from parport ACK input bit to measure pulse length
+using hrtimers.
+
+Frame format:
+Based on walkera WK-0701 PCM Format description by Shaul Eizikovich.
+(downloaded from http://www.smartpropoplus.com/Docs/Walkera_Wk-0701_PCM.pdf)
+
+Signal pulses:
+ (ANALOG)
+ SYNC BIN OCT
+ +---------+ +------+
+ | | | |
+--+ +------+ +---
+
+Frame:
+ SYNC , BIN1, OCT1, BIN2, OCT2 ... BIN24, OCT24, BIN25, next frame SYNC ..
+
+pulse length:
+ Binary values: Analog octal values:
+
+ 288 uS Binary 0 318 uS 000
+ 438 uS Binary 1 398 uS 001
+ 478 uS 010
+ 558 uS 011
+ 638 uS 100
+ 1306 uS SYNC 718 uS 101
+ 798 uS 110
+ 878 uS 111
+
+24 bin+oct values + 1 bin value = 24*4+1 bits = 97 bits
+
+(Warning, pulses on ACK are inverted by transistor, irq is raised up on sync
+to bin change or octal value to bin change).
+
+Binary data representations:
+
+One binary and octal value can be grouped to nibble. 24 nibbles + one binary
+values can be sampled between sync pulses.
+
+Values for first four channels (analog joystick values) can be found in
+first 10 nibbles. Analog value is represented by one sign bit and 9 bit
+absolute binary value. (10 bits per channel). Next nibble is checksum for
+first ten nibbles.
+
+Next nibbles 12 .. 21 represents four channels (not all channels can be
+directly controlled from TX). Binary representations are the same as in first
+four channels. In nibbles 22 and 23 is a special magic number. Nibble 24 is
+checksum for nibbles 12..23.
+
+After last octal value for nibble 24 and next sync pulse one additional
+binary value can be sampled. This bit and magic number is not used in
+software driver. Some details about this magic numbers can be found in
+Walkera_Wk-0701_PCM.pdf.
+
+Checksum calculation:
+
+Summary of octal values in nibbles must be same as octal value in checksum
+nibble (only first 3 bits are used). Binary value for checksum nibble is
+calculated by sum of binary values in checked nibbles + sum of octal values
+in checked nibbles divided by 8. Only bit 0 of this sum is used.
+
diff --git a/Documentation/input/xpad.txt b/Documentation/input/xpad.txt
new file mode 100644
index 000000000..d1b23f295
--- /dev/null
+++ b/Documentation/input/xpad.txt
@@ -0,0 +1,226 @@
+xpad - Linux USB driver for Xbox compatible controllers
+
+This driver exposes all first-party and third-party Xbox compatible
+controllers. It has a long history and has enjoyed considerable usage
+as Window's xinput library caused most PC games to focus on Xbox
+controller compatibility.
+
+Due to backwards compatibility all buttons are reported as digital.
+This only effects Original Xbox controllers. All later controller models
+have only digital face buttons.
+
+Rumble is supported on some models of Xbox 360 controllers but not of
+Original Xbox controllers nor on Xbox One controllers. As of writing
+the Xbox One's rumble protocol has not been reverse engineered but in
+the future could be supported.
+
+
+0. Notes
+--------
+The number of buttons/axes reported varies based on 3 things:
+- if you are using a known controller
+- if you are using a known dance pad
+- if using an unknown device (one not listed below), what you set in the
+ module configuration for "Map D-PAD to buttons rather than axes for unknown
+ pads" (module option dpad_to_buttons)
+
+If you set dpad_to_buttons to N and you are using an unknown device
+the driver will map the directional pad to axes (X/Y).
+If you said Y it will map the d-pad to buttons, which is needed for dance
+style games to function correctly. The default is Y.
+
+dpad_to_buttons has no effect for known pads. A erroneous commit message
+claimed dpad_to_buttons could be used to force behavior on known devices.
+This is not true. Both dpad_to_buttons and triggers_to_buttons only affect
+unknown controllers.
+
+
+0.1 Normal Controllers
+----------------------
+With a normal controller, the directional pad is mapped to its own X/Y axes.
+The jstest-program from joystick-1.2.15 (jstest-version 2.1.0) will report 8
+axes and 10 buttons.
+
+All 8 axes work, though they all have the same range (-32768..32767)
+and the zero-setting is not correct for the triggers (I don't know if that
+is some limitation of jstest, since the input device setup should be fine. I
+didn't have a look at jstest itself yet).
+
+All of the 10 buttons work (in digital mode). The six buttons on the
+right side (A, B, X, Y, black, white) are said to be "analog" and
+report their values as 8 bit unsigned, not sure what this is good for.
+
+I tested the controller with quake3, and configuration and
+in game functionality were OK. However, I find it rather difficult to
+play first person shooters with a pad. Your mileage may vary.
+
+
+0.2 Xbox Dance Pads
+-------------------
+When using a known dance pad, jstest will report 6 axes and 14 buttons.
+
+For dance style pads (like the redoctane pad) several changes
+have been made. The old driver would map the d-pad to axes, resulting
+in the driver being unable to report when the user was pressing both
+left+right or up+down, making DDR style games unplayable.
+
+Known dance pads automatically map the d-pad to buttons and will work
+correctly out of the box.
+
+If your dance pad is recognized by the driver but is using axes instead
+of buttons, see section 0.3 - Unknown Controllers
+
+I've tested this with Stepmania, and it works quite well.
+
+
+0.3 Unknown Controllers
+----------------------
+If you have an unknown xbox controller, it should work just fine with
+the default settings.
+
+HOWEVER if you have an unknown dance pad not listed below, it will not
+work UNLESS you set "dpad_to_buttons" to 1 in the module configuration.
+
+PLEASE, if you have an unknown controller, email Dom <binary1230@yahoo.com> with
+a dump from /proc/bus/usb and a description of the pad (manufacturer, country,
+whether it is a dance pad or normal controller) so that we can add your pad
+to the list of supported devices, ensuring that it will work out of the
+box in the future.
+
+
+1. USB adapters
+--------------
+All generations of Xbox controllers speak USB over the wire.
+- Original Xbox controllers use a proprietary connector and require adapters.
+- Wireless Xbox 360 controllers require a 'Xbox 360 Wireless Gaming Receiver
+ for Windows'
+- Wired Xbox 360 controllers use standard USB connectors.
+- Xbox One controllers can be wireless but speak Wi-Fi Direct and are not
+ yet supported.
+- Xbox One controllers can be wired and use standard Micro-USB connectors.
+
+
+
+1.1 Original Xbox USB adapters
+--------------
+Using this driver with an Original Xbox controller requires an
+adapter cable to break out the proprietary connector's pins to USB.
+You can buy these online fairly cheap, or build your own.
+
+Such a cable is pretty easy to build. The Controller itself is a USB
+compound device (a hub with three ports for two expansion slots and
+the controller device) with the only difference in a nonstandard connector
+(5 pins vs. 4 on standard USB 1.0 connectors).
+
+You just need to solder a USB connector onto the cable and keep the
+yellow wire unconnected. The other pins have the same order on both
+connectors so there is no magic to it. Detailed info on these matters
+can be found on the net ([1], [2], [3]).
+
+Thanks to the trip splitter found on the cable you don't even need to cut the
+original one. You can buy an extension cable and cut that instead. That way,
+you can still use the controller with your X-Box, if you have one ;)
+
+
+
+2. Driver Installation
+----------------------
+
+Once you have the adapter cable, if needed, and the controller connected
+the xpad module should be auto loaded. To confirm you can cat
+/proc/bus/usb/devices. There should be an entry like the one at the end [4].
+
+
+
+3. Supported Controllers
+------------------------
+For a full list of supported controllers and associated vendor and product
+IDs see the xpad_device[] array[6].
+
+As of the historic version 0.0.6 (2006-10-10) the following devices
+were supported:
+ original Microsoft XBOX controller (US), vendor=0x045e, product=0x0202
+ smaller Microsoft XBOX controller (US), vendor=0x045e, product=0x0289
+ original Microsoft XBOX controller (Japan), vendor=0x045e, product=0x0285
+ InterAct PowerPad Pro (Germany), vendor=0x05fd, product=0x107a
+ RedOctane Xbox Dance Pad (US), vendor=0x0c12, product=0x8809
+
+Unrecognized models of Xbox controllers should function as Generic
+Xbox controllers. Unrecognized Dance Pad controllers require setting
+the module option 'dpad_to_buttons'.
+
+If you have an unrecognized controller please see 0.3 - Unknown Controllers
+
+
+4. Manual Testing
+-----------------
+To test this driver's functionality you may use 'jstest'.
+
+For example:
+> modprobe xpad
+> modprobe joydev
+> jstest /dev/js0
+
+If you're using a normal controller, there should be a single line showing
+18 inputs (8 axes, 10 buttons), and its values should change if you move
+the sticks and push the buttons. If you're using a dance pad, it should
+show 20 inputs (6 axes, 14 buttons).
+
+It works? Voila, you're done ;)
+
+
+
+5. Thanks
+---------
+
+I have to thank ITO Takayuki for the detailed info on his site
+ http://euc.jp/periphs/xbox-controller.ja.html.
+
+His useful info and both the usb-skeleton as well as the iforce input driver
+(Greg Kroah-Hartmann; Vojtech Pavlik) helped a lot in rapid prototyping
+the basic functionality.
+
+
+
+6. References
+-------------
+
+[1]: http://euc.jp/periphs/xbox-controller.ja.html (ITO Takayuki)
+[2]: http://xpad.xbox-scene.com/
+[3]: http://www.markosweb.com/www/xboxhackz.com/
+[4]: /proc/bus/usb/devices - dump from InterAct PowerPad Pro (Germany):
+
+T: Bus=01 Lev=03 Prnt=04 Port=00 Cnt=01 Dev#= 5 Spd=12 MxCh= 0
+D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=32 #Cfgs= 1
+P: Vendor=05fd ProdID=107a Rev= 1.00
+C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA
+I: If#= 0 Alt= 0 #EPs= 2 Cls=58(unk. ) Sub=42 Prot=00 Driver=(none)
+E: Ad=81(I) Atr=03(Int.) MxPS= 32 Ivl= 10ms
+E: Ad=02(O) Atr=03(Int.) MxPS= 32 Ivl= 10ms
+
+[5]: /proc/bus/usb/devices - dump from Redoctane Xbox Dance Pad (US):
+
+T: Bus=01 Lev=02 Prnt=09 Port=00 Cnt=01 Dev#= 10 Spd=12 MxCh= 0
+D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=0c12 ProdID=8809 Rev= 0.01
+S: Product=XBOX DDR
+C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA
+I: If#= 0 Alt= 0 #EPs= 2 Cls=58(unk. ) Sub=42 Prot=00 Driver=xpad
+E: Ad=82(I) Atr=03(Int.) MxPS= 32 Ivl=4ms
+E: Ad=02(O) Atr=03(Int.) MxPS= 32 Ivl=4ms
+
+[6]: http://lxr.free-electrons.com/ident?i=xpad_device
+
+
+
+7. Historic Edits
+-----------------
+Marko Friedemann <mfr@bmx-chemnitz.de>
+2002-07-16
+ - original doc
+
+Dominic Cerquetti <binary1230@yahoo.com>
+2005-03-19
+ - added stuff for dance pads, new d-pad->axes mappings
+
+Later changes may be viewed with 'git log Documentation/input/xpad.txt'
diff --git a/Documentation/input/yealink.txt b/Documentation/input/yealink.txt
new file mode 100644
index 000000000..8277b76ec
--- /dev/null
+++ b/Documentation/input/yealink.txt
@@ -0,0 +1,216 @@
+Driver documentation for yealink usb-p1k phones
+
+0. Status
+~~~~~~~~~
+The p1k is a relatively cheap usb 1.1 phone with:
+ - keyboard full support, yealink.ko / input event API
+ - LCD full support, yealink.ko / sysfs API
+ - LED full support, yealink.ko / sysfs API
+ - dialtone full support, yealink.ko / sysfs API
+ - ringtone full support, yealink.ko / sysfs API
+ - audio playback full support, snd_usb_audio.ko / alsa API
+ - audio record full support, snd_usb_audio.ko / alsa API
+
+For vendor documentation see http://www.yealink.com
+
+
+1. Compilation (stand alone version)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Currently only kernel 2.6.x.y versions are supported.
+In order to build the yealink.ko module do
+
+ make
+
+If you encounter problems please check if in the MAKE_OPTS variable in
+the Makefile is pointing to the location where your kernel sources
+are located, default /usr/src/linux.
+
+
+1.1 Troubleshooting
+~~~~~~~~~~~~~~~~~~~
+Q: Module yealink compiled and installed without any problem but phone
+ is not initialized and does not react to any actions.
+A: If you see something like:
+ hiddev0: USB HID v1.00 Device [Yealink Network Technology Ltd. VOIP USB Phone
+ in dmesg, it means that the hid driver has grabbed the device first. Try to
+ load module yealink before any other usb hid driver. Please see the
+ instructions provided by your distribution on module configuration.
+
+Q: Phone is working now (displays version and accepts keypad input) but I can't
+ find the sysfs files.
+A: The sysfs files are located on the particular usb endpoint. On most
+ distributions you can do: "find /sys/ -name get_icons" for a hint.
+
+
+2. keyboard features
+~~~~~~~~~~~~~~~~~~~~
+The current mapping in the kernel is provided by the map_p1k_to_key
+function:
+
+ Physical USB-P1K button layout input events
+
+
+ up up
+ IN OUT left, right
+ down down
+
+ pickup C hangup enter, backspace, escape
+ 1 2 3 1, 2, 3
+ 4 5 6 4, 5, 6,
+ 7 8 9 7, 8, 9,
+ * 0 # *, 0, #,
+
+ The "up" and "down" keys, are symbolised by arrows on the button.
+ The "pickup" and "hangup" keys are symbolised by a green and red phone
+ on the button.
+
+
+3. LCD features
+~~~~~~~~~~~~~~~
+The LCD is divided and organised as a 3 line display:
+
+ |[] [][] [][] [][] in |[][]
+ |[] M [][] D [][] : [][] out |[][]
+ store
+
+ NEW REP SU MO TU WE TH FR SA
+
+ [] [] [] [] [] [] [] [] [] [] [] []
+ [] [] [] [] [] [] [] [] [] [] [] []
+
+
+Line 1 Format (see below) : 18.e8.M8.88...188
+ Icon names : M D : IN OUT STORE
+Line 2 Format : .........
+ Icon name : NEW REP SU MO TU WE TH FR SA
+Line 3 Format : 888888888888
+
+
+Format description:
+ From a userspace perspective the world is separated into "digits" and "icons".
+ A digit can have a character set, an icon can only be ON or OFF.
+
+ Format specifier
+ '8' : Generic 7 segment digit with individual addressable segments
+
+ Reduced capability 7 segment digit, when segments are hard wired together.
+ '1' : 2 segments digit only able to produce a 1.
+ 'e' : Most significant day of the month digit,
+ able to produce at least 1 2 3.
+ 'M' : Most significant minute digit,
+ able to produce at least 0 1 2 3 4 5.
+
+ Icons or pictograms:
+ '.' : For example like AM, PM, SU, a 'dot' .. or other single segment
+ elements.
+
+
+4. Driver usage
+~~~~~~~~~~~~~~~
+For userland the following interfaces are available using the sysfs interface:
+ /sys/.../
+ line1 Read/Write, lcd line1
+ line2 Read/Write, lcd line2
+ line3 Read/Write, lcd line3
+
+ get_icons Read, returns a set of available icons.
+ hide_icon Write, hide the element by writing the icon name.
+ show_icon Write, display the element by writing the icon name.
+
+ map_seg7 Read/Write, the 7 segments char set, common for all
+ yealink phones. (see map_to_7segment.h)
+
+ ringtone Write, upload binary representation of a ringtone,
+ see yealink.c. status EXPERIMENTAL due to potential
+ races between async. and sync usb calls.
+
+
+4.1 lineX
+~~~~~~~~~
+Reading /sys/../lineX will return the format string with its current value:
+
+ Example:
+ cat ./line3
+ 888888888888
+ Linux Rocks!
+
+Writing to /sys/../lineX will set the corresponding LCD line.
+ - Excess characters are ignored.
+ - If less characters are written than allowed, the remaining digits are
+ unchanged.
+ - The tab '\t'and '\n' char does not overwrite the original content.
+ - Writing a space to an icon will always hide its content.
+
+ Example:
+ date +"%m.%e.%k:%M" | sed 's/^0/ /' > ./line1
+
+ Will update the LCD with the current date & time.
+
+
+4.2 get_icons
+~~~~~~~~~~~~~
+Reading will return all available icon names and its current settings:
+
+ cat ./get_icons
+ on M
+ on D
+ on :
+ IN
+ OUT
+ STORE
+ NEW
+ REP
+ SU
+ MO
+ TU
+ WE
+ TH
+ FR
+ SA
+ LED
+ DIALTONE
+ RINGTONE
+
+
+4.3 show/hide icons
+~~~~~~~~~~~~~~~~~~~
+Writing to these files will update the state of the icon.
+Only one icon at a time can be updated.
+
+If an icon is also on a ./lineX the corresponding value is
+updated with the first letter of the icon.
+
+ Example - light up the store icon:
+ echo -n "STORE" > ./show_icon
+
+ cat ./line1
+ 18.e8.M8.88...188
+ S
+
+ Example - sound the ringtone for 10 seconds:
+ echo -n RINGTONE > /sys/..../show_icon
+ sleep 10
+ echo -n RINGTONE > /sys/..../hide_icon
+
+
+5. Sound features
+~~~~~~~~~~~~~~~~~
+Sound is supported by the ALSA driver: snd_usb_audio
+
+One 16-bit channel with sample and playback rates of 8000 Hz is the practical
+limit of the device.
+
+ Example - recording test:
+ arecord -v -d 10 -r 8000 -f S16_LE -t wav foobar.wav
+
+ Example - playback test:
+ aplay foobar.wav
+
+
+6. Credits & Acknowledgments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ - Olivier Vandorpe, for starting the usbb2k-api project doing much of
+ the reverse engineering.
+ - Martin Diehl, for pointing out how to handle USB memory allocation.
+ - Dmitry Torokhov, for the numerous code reviews and suggestions.
+
diff --git a/Documentation/intel_txt.txt b/Documentation/intel_txt.txt
new file mode 100644
index 000000000..91d89c540
--- /dev/null
+++ b/Documentation/intel_txt.txt
@@ -0,0 +1,210 @@
+Intel(R) TXT Overview:
+=====================
+
+Intel's technology for safer computing, Intel(R) Trusted Execution
+Technology (Intel(R) TXT), defines platform-level enhancements that
+provide the building blocks for creating trusted platforms.
+
+Intel TXT was formerly known by the code name LaGrande Technology (LT).
+
+Intel TXT in Brief:
+o Provides dynamic root of trust for measurement (DRTM)
+o Data protection in case of improper shutdown
+o Measurement and verification of launched environment
+
+Intel TXT is part of the vPro(TM) brand and is also available some
+non-vPro systems. It is currently available on desktop systems
+based on the Q35, X38, Q45, and Q43 Express chipsets (e.g. Dell
+Optiplex 755, HP dc7800, etc.) and mobile systems based on the GM45,
+PM45, and GS45 Express chipsets.
+
+For more information, see http://www.intel.com/technology/security/.
+This site also has a link to the Intel TXT MLE Developers Manual,
+which has been updated for the new released platforms.
+
+Intel TXT has been presented at various events over the past few
+years, some of which are:
+ LinuxTAG 2008:
+ http://www.linuxtag.org/2008/en/conf/events/vp-donnerstag.html
+ TRUST2008:
+ http://www.trust-conference.eu/downloads/Keynote-Speakers/
+ 3_David-Grawrock_The-Front-Door-of-Trusted-Computing.pdf
+ IDF, Shanghai:
+ http://www.prcidf.com.cn/index_en.html
+ IDFs 2006, 2007 (I'm not sure if/where they are online)
+
+Trusted Boot Project Overview:
+=============================
+
+Trusted Boot (tboot) is an open source, pre-kernel/VMM module that
+uses Intel TXT to perform a measured and verified launch of an OS
+kernel/VMM.
+
+It is hosted on SourceForge at http://sourceforge.net/projects/tboot.
+The mercurial source repo is available at http://www.bughost.org/
+repos.hg/tboot.hg.
+
+Tboot currently supports launching Xen (open source VMM/hypervisor
+w/ TXT support since v3.2), and now Linux kernels.
+
+
+Value Proposition for Linux or "Why should you care?"
+=====================================================
+
+While there are many products and technologies that attempt to
+measure or protect the integrity of a running kernel, they all
+assume the kernel is "good" to begin with. The Integrity
+Measurement Architecture (IMA) and Linux Integrity Module interface
+are examples of such solutions.
+
+To get trust in the initial kernel without using Intel TXT, a
+static root of trust must be used. This bases trust in BIOS
+starting at system reset and requires measurement of all code
+executed between system reset through the completion of the kernel
+boot as well as data objects used by that code. In the case of a
+Linux kernel, this means all of BIOS, any option ROMs, the
+bootloader and the boot config. In practice, this is a lot of
+code/data, much of which is subject to change from boot to boot
+(e.g. changing NICs may change option ROMs). Without reference
+hashes, these measurement changes are difficult to assess or
+confirm as benign. This process also does not provide DMA
+protection, memory configuration/alias checks and locks, crash
+protection, or policy support.
+
+By using the hardware-based root of trust that Intel TXT provides,
+many of these issues can be mitigated. Specifically: many
+pre-launch components can be removed from the trust chain, DMA
+protection is provided to all launched components, a large number
+of platform configuration checks are performed and values locked,
+protection is provided for any data in the event of an improper
+shutdown, and there is support for policy-based execution/verification.
+This provides a more stable measurement and a higher assurance of
+system configuration and initial state than would be otherwise
+possible. Since the tboot project is open source, source code for
+almost all parts of the trust chain is available (excepting SMM and
+Intel-provided firmware).
+
+How Does it Work?
+=================
+
+o Tboot is an executable that is launched by the bootloader as
+ the "kernel" (the binary the bootloader executes).
+o It performs all of the work necessary to determine if the
+ platform supports Intel TXT and, if so, executes the GETSEC[SENTER]
+ processor instruction that initiates the dynamic root of trust.
+ - If tboot determines that the system does not support Intel TXT
+ or is not configured correctly (e.g. the SINIT AC Module was
+ incorrect), it will directly launch the kernel with no changes
+ to any state.
+ - Tboot will output various information about its progress to the
+ terminal, serial port, and/or an in-memory log; the output
+ locations can be configured with a command line switch.
+o The GETSEC[SENTER] instruction will return control to tboot and
+ tboot then verifies certain aspects of the environment (e.g. TPM NV
+ lock, e820 table does not have invalid entries, etc.).
+o It will wake the APs from the special sleep state the GETSEC[SENTER]
+ instruction had put them in and place them into a wait-for-SIPI
+ state.
+ - Because the processors will not respond to an INIT or SIPI when
+ in the TXT environment, it is necessary to create a small VT-x
+ guest for the APs. When they run in this guest, they will
+ simply wait for the INIT-SIPI-SIPI sequence, which will cause
+ VMEXITs, and then disable VT and jump to the SIPI vector. This
+ approach seemed like a better choice than having to insert
+ special code into the kernel's MP wakeup sequence.
+o Tboot then applies an (optional) user-defined launch policy to
+ verify the kernel and initrd.
+ - This policy is rooted in TPM NV and is described in the tboot
+ project. The tboot project also contains code for tools to
+ create and provision the policy.
+ - Policies are completely under user control and if not present
+ then any kernel will be launched.
+ - Policy action is flexible and can include halting on failures
+ or simply logging them and continuing.
+o Tboot adjusts the e820 table provided by the bootloader to reserve
+ its own location in memory as well as to reserve certain other
+ TXT-related regions.
+o As part of its launch, tboot DMA protects all of RAM (using the
+ VT-d PMRs). Thus, the kernel must be booted with 'intel_iommu=on'
+ in order to remove this blanket protection and use VT-d's
+ page-level protection.
+o Tboot will populate a shared page with some data about itself and
+ pass this to the Linux kernel as it transfers control.
+ - The location of the shared page is passed via the boot_params
+ struct as a physical address.
+o The kernel will look for the tboot shared page address and, if it
+ exists, map it.
+o As one of the checks/protections provided by TXT, it makes a copy
+ of the VT-d DMARs in a DMA-protected region of memory and verifies
+ them for correctness. The VT-d code will detect if the kernel was
+ launched with tboot and use this copy instead of the one in the
+ ACPI table.
+o At this point, tboot and TXT are out of the picture until a
+ shutdown (S<n>)
+o In order to put a system into any of the sleep states after a TXT
+ launch, TXT must first be exited. This is to prevent attacks that
+ attempt to crash the system to gain control on reboot and steal
+ data left in memory.
+ - The kernel will perform all of its sleep preparation and
+ populate the shared page with the ACPI data needed to put the
+ platform in the desired sleep state.
+ - Then the kernel jumps into tboot via the vector specified in the
+ shared page.
+ - Tboot will clean up the environment and disable TXT, then use the
+ kernel-provided ACPI information to actually place the platform
+ into the desired sleep state.
+ - In the case of S3, tboot will also register itself as the resume
+ vector. This is necessary because it must re-establish the
+ measured environment upon resume. Once the TXT environment
+ has been restored, it will restore the TPM PCRs and then
+ transfer control back to the kernel's S3 resume vector.
+ In order to preserve system integrity across S3, the kernel
+ provides tboot with a set of memory ranges (RAM and RESERVED_KERN
+ in the e820 table, but not any memory that BIOS might alter over
+ the S3 transition) that tboot will calculate a MAC (message
+ authentication code) over and then seal with the TPM. On resume
+ and once the measured environment has been re-established, tboot
+ will re-calculate the MAC and verify it against the sealed value.
+ Tboot's policy determines what happens if the verification fails.
+ Note that the c/s 194 of tboot which has the new MAC code supports
+ this.
+
+That's pretty much it for TXT support.
+
+
+Configuring the System:
+======================
+
+This code works with 32bit, 32bit PAE, and 64bit (x86_64) kernels.
+
+In BIOS, the user must enable: TPM, TXT, VT-x, VT-d. Not all BIOSes
+allow these to be individually enabled/disabled and the screens in
+which to find them are BIOS-specific.
+
+grub.conf needs to be modified as follows:
+ title Linux 2.6.29-tip w/ tboot
+ root (hd0,0)
+ kernel /tboot.gz logging=serial,vga,memory
+ module /vmlinuz-2.6.29-tip intel_iommu=on ro
+ root=LABEL=/ rhgb console=ttyS0,115200 3
+ module /initrd-2.6.29-tip.img
+ module /Q35_SINIT_17.BIN
+
+The kernel option for enabling Intel TXT support is found under the
+Security top-level menu and is called "Enable Intel(R) Trusted
+Execution Technology (TXT)". It is considered EXPERIMENTAL and
+depends on the generic x86 support (to allow maximum flexibility in
+kernel build options), since the tboot code will detect whether the
+platform actually supports Intel TXT and thus whether any of the
+kernel code is executed.
+
+The Q35_SINIT_17.BIN file is what Intel TXT refers to as an
+Authenticated Code Module. It is specific to the chipset in the
+system and can also be found on the Trusted Boot site. It is an
+(unencrypted) module signed by Intel that is used as part of the
+DRTM process to verify and configure the system. It is signed
+because it operates at a higher privilege level in the system than
+any other macrocode and its correct operation is critical to the
+establishment of the DRTM. The process for determining the correct
+SINIT ACM for a system is documented in the SINIT-guide.txt file
+that is on the tboot SourceForge site under the SINIT ACM downloads.
diff --git a/Documentation/io-mapping.txt b/Documentation/io-mapping.txt
new file mode 100644
index 000000000..5ca78426f
--- /dev/null
+++ b/Documentation/io-mapping.txt
@@ -0,0 +1,82 @@
+The io_mapping functions in linux/io-mapping.h provide an abstraction for
+efficiently mapping small regions of an I/O device to the CPU. The initial
+usage is to support the large graphics aperture on 32-bit processors where
+ioremap_wc cannot be used to statically map the entire aperture to the CPU
+as it would consume too much of the kernel address space.
+
+A mapping object is created during driver initialization using
+
+ struct io_mapping *io_mapping_create_wc(unsigned long base,
+ unsigned long size)
+
+ 'base' is the bus address of the region to be made
+ mappable, while 'size' indicates how large a mapping region to
+ enable. Both are in bytes.
+
+ This _wc variant provides a mapping which may only be used
+ with the io_mapping_map_atomic_wc or io_mapping_map_wc.
+
+With this mapping object, individual pages can be mapped either atomically
+or not, depending on the necessary scheduling environment. Of course, atomic
+maps are more efficient:
+
+ void *io_mapping_map_atomic_wc(struct io_mapping *mapping,
+ unsigned long offset)
+
+ 'offset' is the offset within the defined mapping region.
+ Accessing addresses beyond the region specified in the
+ creation function yields undefined results. Using an offset
+ which is not page aligned yields an undefined result. The
+ return value points to a single page in CPU address space.
+
+ This _wc variant returns a write-combining map to the
+ page and may only be used with mappings created by
+ io_mapping_create_wc
+
+ Note that the task may not sleep while holding this page
+ mapped.
+
+ void io_mapping_unmap_atomic(void *vaddr)
+
+ 'vaddr' must be the value returned by the last
+ io_mapping_map_atomic_wc call. This unmaps the specified
+ page and allows the task to sleep once again.
+
+If you need to sleep while holding the lock, you can use the non-atomic
+variant, although they may be significantly slower.
+
+ void *io_mapping_map_wc(struct io_mapping *mapping,
+ unsigned long offset)
+
+ This works like io_mapping_map_atomic_wc except it allows
+ the task to sleep while holding the page mapped.
+
+ void io_mapping_unmap(void *vaddr)
+
+ This works like io_mapping_unmap_atomic, except it is used
+ for pages mapped with io_mapping_map_wc.
+
+At driver close time, the io_mapping object must be freed:
+
+ void io_mapping_free(struct io_mapping *mapping)
+
+Current Implementation:
+
+The initial implementation of these functions uses existing mapping
+mechanisms and so provides only an abstraction layer and no new
+functionality.
+
+On 64-bit processors, io_mapping_create_wc calls ioremap_wc for the whole
+range, creating a permanent kernel-visible mapping to the resource. The
+map_atomic and map functions add the requested offset to the base of the
+virtual address returned by ioremap_wc.
+
+On 32-bit processors with HIGHMEM defined, io_mapping_map_atomic_wc uses
+kmap_atomic_pfn to map the specified page in an atomic fashion;
+kmap_atomic_pfn isn't really supposed to be used with device pages, but it
+provides an efficient mapping for this usage.
+
+On 32-bit processors without HIGHMEM defined, io_mapping_map_atomic_wc and
+io_mapping_map_wc both use ioremap_wc, a terribly inefficient function which
+performs an IPI to inform all processors about the new mapping. This results
+in a significant performance penalty.
diff --git a/Documentation/io_ordering.txt b/Documentation/io_ordering.txt
new file mode 100644
index 000000000..9faae6f26
--- /dev/null
+++ b/Documentation/io_ordering.txt
@@ -0,0 +1,47 @@
+On some platforms, so-called memory-mapped I/O is weakly ordered. On such
+platforms, driver writers are responsible for ensuring that I/O writes to
+memory-mapped addresses on their device arrive in the order intended. This is
+typically done by reading a 'safe' device or bridge register, causing the I/O
+chipset to flush pending writes to the device before any reads are posted. A
+driver would usually use this technique immediately prior to the exit of a
+critical section of code protected by spinlocks. This would ensure that
+subsequent writes to I/O space arrived only after all prior writes (much like a
+memory barrier op, mb(), only with respect to I/O).
+
+A more concrete example from a hypothetical device driver:
+
+ ...
+CPU A: spin_lock_irqsave(&dev_lock, flags)
+CPU A: val = readl(my_status);
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: spin_unlock_irqrestore(&dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&dev_lock, flags)
+CPU B: val = readl(my_status);
+CPU B: ...
+CPU B: writel(newval2, ring_ptr);
+CPU B: spin_unlock_irqrestore(&dev_lock, flags)
+ ...
+
+In the case above, the device may receive newval2 before it receives newval,
+which could cause problems. Fixing it is easy enough though:
+
+ ...
+CPU A: spin_lock_irqsave(&dev_lock, flags)
+CPU A: val = readl(my_status);
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: (void)readl(safe_register); /* maybe a config register? */
+CPU A: spin_unlock_irqrestore(&dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&dev_lock, flags)
+CPU B: val = readl(my_status);
+CPU B: ...
+CPU B: writel(newval2, ring_ptr);
+CPU B: (void)readl(safe_register); /* maybe a config register? */
+CPU B: spin_unlock_irqrestore(&dev_lock, flags)
+
+Here, the reads from safe_register will cause the I/O chipset to flush any
+pending writes before actually posting the read to the chipset, preventing
+possible data corruption.
diff --git a/Documentation/ioctl/00-INDEX b/Documentation/ioctl/00-INDEX
new file mode 100644
index 000000000..c1a925787
--- /dev/null
+++ b/Documentation/ioctl/00-INDEX
@@ -0,0 +1,12 @@
+00-INDEX
+ - this file
+botching-up-ioctls.txt
+ - how to avoid botching up ioctls
+cdrom.txt
+ - summary of CDROM ioctl calls
+hdio.txt
+ - summary of HDIO_ ioctl calls
+ioctl-decoding.txt
+ - how to decode the bits of an IOCTL code
+ioctl-number.txt
+ - how to implement and register device/driver ioctl calls
diff --git a/Documentation/ioctl/botching-up-ioctls.txt b/Documentation/ioctl/botching-up-ioctls.txt
new file mode 100644
index 000000000..45fe78c58
--- /dev/null
+++ b/Documentation/ioctl/botching-up-ioctls.txt
@@ -0,0 +1,219 @@
+(How to avoid) Botching up ioctls
+=================================
+
+From: http://blog.ffwll.ch/2013/11/botching-up-ioctls.html
+
+By: Daniel Vetter, Copyright © 2013 Intel Corporation
+
+One clear insight kernel graphics hackers gained in the past few years is that
+trying to come up with a unified interface to manage the execution units and
+memory on completely different GPUs is a futile effort. So nowadays every
+driver has its own set of ioctls to allocate memory and submit work to the GPU.
+Which is nice, since there's no more insanity in the form of fake-generic, but
+actually only used once interfaces. But the clear downside is that there's much
+more potential to screw things up.
+
+To avoid repeating all the same mistakes again I've written up some of the
+lessons learned while botching the job for the drm/i915 driver. Most of these
+only cover technicalities and not the big-picture issues like what the command
+submission ioctl exactly should look like. Learning these lessons is probably
+something every GPU driver has to do on its own.
+
+
+Prerequisites
+-------------
+
+First the prerequisites. Without these you have already failed, because you
+will need to add a a 32-bit compat layer:
+
+ * Only use fixed sized integers. To avoid conflicts with typedefs in userspace
+ the kernel has special types like __u32, __s64. Use them.
+
+ * Align everything to the natural size and use explicit padding. 32-bit
+ platforms don't necessarily align 64-bit values to 64-bit boundaries, but
+ 64-bit platforms do. So we always need padding to the natural size to get
+ this right.
+
+ * Pad the entire struct to a multiple of 64-bits - the structure size will
+ otherwise differ on 32-bit versus 64-bit. Having a different structure size
+ hurts when passing arrays of structures to the kernel, or if the kernel
+ checks the structure size, which e.g. the drm core does.
+
+ * Pointers are __u64, cast from/to a uintprt_t on the userspace side and
+ from/to a void __user * in the kernel. Try really hard not to delay this
+ conversion or worse, fiddle the raw __u64 through your code since that
+ diminishes the checking tools like sparse can provide.
+
+
+Basics
+------
+
+With the joys of writing a compat layer avoided we can take a look at the basic
+fumbles. Neglecting these will make backward and forward compatibility a real
+pain. And since getting things wrong on the first attempt is guaranteed you
+will have a second iteration or at least an extension for any given interface.
+
+ * Have a clear way for userspace to figure out whether your new ioctl or ioctl
+ extension is supported on a given kernel. If you can't rely on old kernels
+ rejecting the new flags/modes or ioctls (since doing that was botched in the
+ past) then you need a driver feature flag or revision number somewhere.
+
+ * Have a plan for extending ioctls with new flags or new fields at the end of
+ the structure. The drm core checks the passed-in size for each ioctl call
+ and zero-extends any mismatches between kernel and userspace. That helps,
+ but isn't a complete solution since newer userspace on older kernels won't
+ notice that the newly added fields at the end get ignored. So this still
+ needs a new driver feature flags.
+
+ * Check all unused fields and flags and all the padding for whether it's 0,
+ and reject the ioctl if that's not the case. Otherwise your nice plan for
+ future extensions is going right down the gutters since someone will submit
+ an ioctl struct with random stack garbage in the yet unused parts. Which
+ then bakes in the ABI that those fields can never be used for anything else
+ but garbage.
+
+ * Have simple testcases for all of the above.
+
+
+Fun with Error Paths
+--------------------
+
+Nowadays we don't have any excuse left any more for drm drivers being neat
+little root exploits. This means we both need full input validation and solid
+error handling paths - GPUs will die eventually in the oddmost corner cases
+anyway:
+
+ * The ioctl must check for array overflows. Also it needs to check for
+ over/underflows and clamping issues of integer values in general. The usual
+ example is sprite positioning values fed directly into the hardware with the
+ hardware just having 12 bits or so. Works nicely until some odd display
+ server doesn't bother with clamping itself and the cursor wraps around the
+ screen.
+
+ * Have simple testcases for every input validation failure case in your ioctl.
+ Check that the error code matches your expectations. And finally make sure
+ that you only test for one single error path in each subtest by submitting
+ otherwise perfectly valid data. Without this an earlier check might reject
+ the ioctl already and shadow the codepath you actually want to test, hiding
+ bugs and regressions.
+
+ * Make all your ioctls restartable. First X really loves signals and second
+ this will allow you to test 90% of all error handling paths by just
+ interrupting your main test suite constantly with signals. Thanks to X's
+ love for signal you'll get an excellent base coverage of all your error
+ paths pretty much for free for graphics drivers. Also, be consistent with
+ how you handle ioctl restarting - e.g. drm has a tiny drmIoctl helper in its
+ userspace library. The i915 driver botched this with the set_tiling ioctl,
+ now we're stuck forever with some arcane semantics in both the kernel and
+ userspace.
+
+ * If you can't make a given codepath restartable make a stuck task at least
+ killable. GPUs just die and your users won't like you more if you hang their
+ entire box (by means of an unkillable X process). If the state recovery is
+ still too tricky have a timeout or hangcheck safety net as a last-ditch
+ effort in case the hardware has gone bananas.
+
+ * Have testcases for the really tricky corner cases in your error recovery code
+ - it's way too easy to create a deadlock between your hangcheck code and
+ waiters.
+
+
+Time, Waiting and Missing it
+----------------------------
+
+GPUs do most everything asynchronously, so we have a need to time operations and
+wait for oustanding ones. This is really tricky business; at the moment none of
+the ioctls supported by the drm/i915 get this fully right, which means there's
+still tons more lessons to learn here.
+
+ * Use CLOCK_MONOTONIC as your reference time, always. It's what alsa, drm and
+ v4l use by default nowadays. But let userspace know which timestamps are
+ derived from different clock domains like your main system clock (provided
+ by the kernel) or some independent hardware counter somewhere else. Clocks
+ will mismatch if you look close enough, but if performance measuring tools
+ have this information they can at least compensate. If your userspace can
+ get at the raw values of some clocks (e.g. through in-command-stream
+ performance counter sampling instructions) consider exposing those also.
+
+ * Use __s64 seconds plus __u64 nanoseconds to specify time. It's not the most
+ convenient time specification, but it's mostly the standard.
+
+ * Check that input time values are normalized and reject them if not. Note
+ that the kernel native struct ktime has a signed integer for both seconds
+ and nanoseconds, so beware here.
+
+ * For timeouts, use absolute times. If you're a good fellow and made your
+ ioctl restartable relative timeouts tend to be too coarse and can
+ indefinitely extend your wait time due to rounding on each restart.
+ Especially if your reference clock is something really slow like the display
+ frame counter. With a spec laywer hat on this isn't a bug since timeouts can
+ always be extended - but users will surely hate you if their neat animations
+ starts to stutter due to this.
+
+ * Consider ditching any synchronous wait ioctls with timeouts and just deliver
+ an asynchronous event on a pollable file descriptor. It fits much better
+ into event driven applications' main loop.
+
+ * Have testcases for corner-cases, especially whether the return values for
+ already-completed events, successful waits and timed-out waits are all sane
+ and suiting to your needs.
+
+
+Leaking Resources, Not
+----------------------
+
+A full-blown drm driver essentially implements a little OS, but specialized to
+the given GPU platforms. This means a driver needs to expose tons of handles
+for different objects and other resources to userspace. Doing that right
+entails its own little set of pitfalls:
+
+ * Always attach the lifetime of your dynamically created resources to the
+ lifetime of a file descriptor. Consider using a 1:1 mapping if your resource
+ needs to be shared across processes - fd-passing over unix domain sockets
+ also simplifies lifetime management for userspace.
+
+ * Always have O_CLOEXEC support.
+
+ * Ensure that you have sufficient insulation between different clients. By
+ default pick a private per-fd namespace which forces any sharing to be done
+ explictly. Only go with a more global per-device namespace if the objects
+ are truly device-unique. One counterexample in the drm modeset interfaces is
+ that the per-device modeset objects like connectors share a namespace with
+ framebuffer objects, which mostly are not shared at all. A separate
+ namespace, private by default, for framebuffers would have been more
+ suitable.
+
+ * Think about uniqueness requirements for userspace handles. E.g. for most drm
+ drivers it's a userspace bug to submit the same object twice in the same
+ command submission ioctl. But then if objects are shareable userspace needs
+ to know whether it has seen an imported object from a different process
+ already or not. I haven't tried this myself yet due to lack of a new class
+ of objects, but consider using inode numbers on your shared file descriptors
+ as unique identifiers - it's how real files are told apart, too.
+ Unfortunately this requires a full-blown virtual filesystem in the kernel.
+
+
+Last, but not Least
+-------------------
+
+Not every problem needs a new ioctl:
+
+ * Think hard whether you really want a driver-private interface. Of course
+ it's much quicker to push a driver-private interface than engaging in
+ lengthy discussions for a more generic solution. And occasionally doing a
+ private interface to spearhead a new concept is what's required. But in the
+ end, once the generic interface comes around you'll end up maintainer two
+ interfaces. Indefinitely.
+
+ * Consider other interfaces than ioctls. A sysfs attribute is much better for
+ per-device settings, or for child objects with fairly static lifetimes (like
+ output connectors in drm with all the detection override attributes). Or
+ maybe only your testsuite needs this interface, and then debugfs with its
+ disclaimer of not having a stable ABI would be better.
+
+Finally, the name of the game is to get it right on the first attempt, since if
+your driver proves popular and your hardware platforms long-lived then you'll
+be stuck with a given ioctl essentially forever. You can try to deprecate
+horrible ioctls on newer iterations of your hardware, but generally it takes
+years to accomplish this. And then again years until the last user able to
+complain about regressions disappears, too.
diff --git a/Documentation/ioctl/cdrom.txt b/Documentation/ioctl/cdrom.txt
new file mode 100644
index 000000000..59df81c8d
--- /dev/null
+++ b/Documentation/ioctl/cdrom.txt
@@ -0,0 +1,966 @@
+ Summary of CDROM ioctl calls.
+ ============================
+
+ Edward A. Falk <efalk@google.com>
+
+ November, 2004
+
+This document attempts to describe the ioctl(2) calls supported by
+the CDROM layer. These are by-and-large implemented (as of Linux 2.6)
+in drivers/cdrom/cdrom.c and drivers/block/scsi_ioctl.c
+
+ioctl values are listed in <linux/cdrom.h>. As of this writing, they
+are as follows:
+
+ CDROMPAUSE Pause Audio Operation
+ CDROMRESUME Resume paused Audio Operation
+ CDROMPLAYMSF Play Audio MSF (struct cdrom_msf)
+ CDROMPLAYTRKIND Play Audio Track/index (struct cdrom_ti)
+ CDROMREADTOCHDR Read TOC header (struct cdrom_tochdr)
+ CDROMREADTOCENTRY Read TOC entry (struct cdrom_tocentry)
+ CDROMSTOP Stop the cdrom drive
+ CDROMSTART Start the cdrom drive
+ CDROMEJECT Ejects the cdrom media
+ CDROMVOLCTRL Control output volume (struct cdrom_volctrl)
+ CDROMSUBCHNL Read subchannel data (struct cdrom_subchnl)
+ CDROMREADMODE2 Read CDROM mode 2 data (2336 Bytes)
+ (struct cdrom_read)
+ CDROMREADMODE1 Read CDROM mode 1 data (2048 Bytes)
+ (struct cdrom_read)
+ CDROMREADAUDIO (struct cdrom_read_audio)
+ CDROMEJECT_SW enable(1)/disable(0) auto-ejecting
+ CDROMMULTISESSION Obtain the start-of-last-session
+ address of multi session disks
+ (struct cdrom_multisession)
+ CDROM_GET_MCN Obtain the "Universal Product Code"
+ if available (struct cdrom_mcn)
+ CDROM_GET_UPC Deprecated, use CDROM_GET_MCN instead.
+ CDROMRESET hard-reset the drive
+ CDROMVOLREAD Get the drive's volume setting
+ (struct cdrom_volctrl)
+ CDROMREADRAW read data in raw mode (2352 Bytes)
+ (struct cdrom_read)
+ CDROMREADCOOKED read data in cooked mode
+ CDROMSEEK seek msf address
+ CDROMPLAYBLK scsi-cd only, (struct cdrom_blk)
+ CDROMREADALL read all 2646 bytes
+ CDROMGETSPINDOWN return 4-bit spindown value
+ CDROMSETSPINDOWN set 4-bit spindown value
+ CDROMCLOSETRAY pendant of CDROMEJECT
+ CDROM_SET_OPTIONS Set behavior options
+ CDROM_CLEAR_OPTIONS Clear behavior options
+ CDROM_SELECT_SPEED Set the CD-ROM speed
+ CDROM_SELECT_DISC Select disc (for juke-boxes)
+ CDROM_MEDIA_CHANGED Check is media changed
+ CDROM_DRIVE_STATUS Get tray position, etc.
+ CDROM_DISC_STATUS Get disc type, etc.
+ CDROM_CHANGER_NSLOTS Get number of slots
+ CDROM_LOCKDOOR lock or unlock door
+ CDROM_DEBUG Turn debug messages on/off
+ CDROM_GET_CAPABILITY get capabilities
+ CDROMAUDIOBUFSIZ set the audio buffer size
+ DVD_READ_STRUCT Read structure
+ DVD_WRITE_STRUCT Write structure
+ DVD_AUTH Authentication
+ CDROM_SEND_PACKET send a packet to the drive
+ CDROM_NEXT_WRITABLE get next writable block
+ CDROM_LAST_WRITTEN get last block written on disc
+
+
+The information that follows was determined from reading kernel source
+code. It is likely that some corrections will be made over time.
+
+
+
+
+
+
+
+General:
+
+ Unless otherwise specified, all ioctl calls return 0 on success
+ and -1 with errno set to an appropriate value on error. (Some
+ ioctls return non-negative data values.)
+
+ Unless otherwise specified, all ioctl calls return -1 and set
+ errno to EFAULT on a failed attempt to copy data to or from user
+ address space.
+
+ Individual drivers may return error codes not listed here.
+
+ Unless otherwise specified, all data structures and constants
+ are defined in <linux/cdrom.h>
+
+
+
+
+CDROMPAUSE Pause Audio Operation
+
+ usage:
+
+ ioctl(fd, CDROMPAUSE, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+
+CDROMRESUME Resume paused Audio Operation
+
+ usage:
+
+ ioctl(fd, CDROMRESUME, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+
+CDROMPLAYMSF Play Audio MSF (struct cdrom_msf)
+
+ usage:
+
+ struct cdrom_msf msf;
+ ioctl(fd, CDROMPLAYMSF, &msf);
+
+ inputs:
+ cdrom_msf structure, describing a segment of music to play
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+ notes:
+ MSF stands for minutes-seconds-frames
+ LBA stands for logical block address
+
+ Segment is described as start and end times, where each time
+ is described as minutes:seconds:frames. A frame is 1/75 of
+ a second.
+
+
+CDROMPLAYTRKIND Play Audio Track/index (struct cdrom_ti)
+
+ usage:
+
+ struct cdrom_ti ti;
+ ioctl(fd, CDROMPLAYTRKIND, &ti);
+
+ inputs:
+ cdrom_ti structure, describing a segment of music to play
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+ notes:
+ Segment is described as start and end times, where each time
+ is described as a track and an index.
+
+
+
+CDROMREADTOCHDR Read TOC header (struct cdrom_tochdr)
+
+ usage:
+
+ cdrom_tochdr header;
+ ioctl(fd, CDROMREADTOCHDR, &header);
+
+ inputs:
+ cdrom_tochdr structure
+
+ outputs:
+ cdrom_tochdr structure
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+
+
+CDROMREADTOCENTRY Read TOC entry (struct cdrom_tocentry)
+
+ usage:
+
+ struct cdrom_tocentry entry;
+ ioctl(fd, CDROMREADTOCENTRY, &entry);
+
+ inputs:
+ cdrom_tocentry structure
+
+ outputs:
+ cdrom_tocentry structure
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+ EINVAL entry.cdte_format not CDROM_MSF or CDROM_LBA
+ EINVAL requested track out of bounds
+ EIO I/O error reading TOC
+
+ notes:
+ TOC stands for Table Of Contents
+ MSF stands for minutes-seconds-frames
+ LBA stands for logical block address
+
+
+
+CDROMSTOP Stop the cdrom drive
+
+ usage:
+
+ ioctl(fd, CDROMSTOP, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+ notes:
+ Exact interpretation of this ioctl depends on the device,
+ but most seem to spin the drive down.
+
+
+CDROMSTART Start the cdrom drive
+
+ usage:
+
+ ioctl(fd, CDROMSTART, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+ notes:
+ Exact interpretation of this ioctl depends on the device,
+ but most seem to spin the drive up and/or close the tray.
+ Other devices ignore the ioctl completely.
+
+
+CDROMEJECT Ejects the cdrom media
+
+ usage:
+
+ ioctl(fd, CDROMEJECT, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error returns:
+ ENOSYS cd drive not capable of ejecting
+ EBUSY other processes are accessing drive, or door is locked
+
+ notes:
+ See CDROM_LOCKDOOR, below.
+
+
+
+CDROMCLOSETRAY pendant of CDROMEJECT
+
+ usage:
+
+ ioctl(fd, CDROMCLOSETRAY, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error returns:
+ ENOSYS cd drive not capable of closing the tray
+ EBUSY other processes are accessing drive, or door is locked
+
+ notes:
+ See CDROM_LOCKDOOR, below.
+
+
+
+CDROMVOLCTRL Control output volume (struct cdrom_volctrl)
+
+ usage:
+
+ struct cdrom_volctrl volume;
+ ioctl(fd, CDROMVOLCTRL, &volume);
+
+ inputs:
+ cdrom_volctrl structure containing volumes for up to 4
+ channels.
+
+ outputs: none
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+
+
+CDROMVOLREAD Get the drive's volume setting
+ (struct cdrom_volctrl)
+
+ usage:
+
+ struct cdrom_volctrl volume;
+ ioctl(fd, CDROMVOLREAD, &volume);
+
+ inputs: none
+
+ outputs:
+ The current volume settings.
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+
+
+
+CDROMSUBCHNL Read subchannel data (struct cdrom_subchnl)
+
+ usage:
+
+ struct cdrom_subchnl q;
+ ioctl(fd, CDROMSUBCHNL, &q);
+
+ inputs:
+ cdrom_subchnl structure
+
+ outputs:
+ cdrom_subchnl structure
+
+ error return:
+ ENOSYS cd drive not audio-capable.
+ EINVAL format not CDROM_MSF or CDROM_LBA
+
+ notes:
+ Format is converted to CDROM_MSF on return
+
+
+
+CDROMREADRAW read data in raw mode (2352 Bytes)
+ (struct cdrom_read)
+
+ usage:
+
+ union {
+ struct cdrom_msf msf; /* input */
+ char buffer[CD_FRAMESIZE_RAW]; /* return */
+ } arg;
+ ioctl(fd, CDROMREADRAW, &arg);
+
+ inputs:
+ cdrom_msf structure indicating an address to read.
+ Only the start values are significant.
+
+ outputs:
+ Data written to address provided by user.
+
+ error return:
+ EINVAL address less than 0, or msf less than 0:2:0
+ ENOMEM out of memory
+
+ notes:
+ As of 2.6.8.1, comments in <linux/cdrom.h> indicate that this
+ ioctl accepts a cdrom_read structure, but actual source code
+ reads a cdrom_msf structure and writes a buffer of data to
+ the same address.
+
+ MSF values are converted to LBA values via this formula:
+
+ lba = (((m * CD_SECS) + s) * CD_FRAMES + f) - CD_MSF_OFFSET;
+
+
+
+
+CDROMREADMODE1 Read CDROM mode 1 data (2048 Bytes)
+ (struct cdrom_read)
+
+ notes:
+ Identical to CDROMREADRAW except that block size is
+ CD_FRAMESIZE (2048) bytes
+
+
+
+CDROMREADMODE2 Read CDROM mode 2 data (2336 Bytes)
+ (struct cdrom_read)
+
+ notes:
+ Identical to CDROMREADRAW except that block size is
+ CD_FRAMESIZE_RAW0 (2336) bytes
+
+
+
+CDROMREADAUDIO (struct cdrom_read_audio)
+
+ usage:
+
+ struct cdrom_read_audio ra;
+ ioctl(fd, CDROMREADAUDIO, &ra);
+
+ inputs:
+ cdrom_read_audio structure containing read start
+ point and length
+
+ outputs:
+ audio data, returned to buffer indicated by ra
+
+ error return:
+ EINVAL format not CDROM_MSF or CDROM_LBA
+ EINVAL nframes not in range [1 75]
+ ENXIO drive has no queue (probably means invalid fd)
+ ENOMEM out of memory
+
+
+CDROMEJECT_SW enable(1)/disable(0) auto-ejecting
+
+ usage:
+
+ int val;
+ ioctl(fd, CDROMEJECT_SW, val);
+
+ inputs:
+ Flag specifying auto-eject flag.
+
+ outputs: none
+
+ error return:
+ ENOSYS Drive is not capable of ejecting.
+ EBUSY Door is locked
+
+
+
+
+CDROMMULTISESSION Obtain the start-of-last-session
+ address of multi session disks
+ (struct cdrom_multisession)
+ usage:
+
+ struct cdrom_multisession ms_info;
+ ioctl(fd, CDROMMULTISESSION, &ms_info);
+
+ inputs:
+ cdrom_multisession structure containing desired
+ format.
+
+ outputs:
+ cdrom_multisession structure is filled with last_session
+ information.
+
+ error return:
+ EINVAL format not CDROM_MSF or CDROM_LBA
+
+
+CDROM_GET_MCN Obtain the "Universal Product Code"
+ if available (struct cdrom_mcn)
+
+ usage:
+
+ struct cdrom_mcn mcn;
+ ioctl(fd, CDROM_GET_MCN, &mcn);
+
+ inputs: none
+
+ outputs:
+ Universal Product Code
+
+ error return:
+ ENOSYS Drive is not capable of reading MCN data.
+
+ notes:
+ Source code comments state:
+
+ The following function is implemented, although very few
+ audio discs give Universal Product Code information, which
+ should just be the Medium Catalog Number on the box. Note,
+ that the way the code is written on the CD is /not/ uniform
+ across all discs!
+
+
+
+
+CDROM_GET_UPC CDROM_GET_MCN (deprecated)
+
+ Not implemented, as of 2.6.8.1
+
+
+
+CDROMRESET hard-reset the drive
+
+ usage:
+
+ ioctl(fd, CDROMRESET, 0);
+
+ inputs: none
+
+ outputs: none
+
+ error return:
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ ENOSYS Drive is not capable of resetting.
+
+
+
+
+CDROMREADCOOKED read data in cooked mode
+
+ usage:
+
+ u8 buffer[CD_FRAMESIZE]
+ ioctl(fd, CDROMREADCOOKED, buffer);
+
+ inputs: none
+
+ outputs:
+ 2048 bytes of data, "cooked" mode.
+
+ notes:
+ Not implemented on all drives.
+
+
+
+
+CDROMREADALL read all 2646 bytes
+
+ Same as CDROMREADCOOKED, but reads 2646 bytes.
+
+
+
+CDROMSEEK seek msf address
+
+ usage:
+
+ struct cdrom_msf msf;
+ ioctl(fd, CDROMSEEK, &msf);
+
+ inputs:
+ MSF address to seek to.
+
+ outputs: none
+
+
+
+CDROMPLAYBLK scsi-cd only, (struct cdrom_blk)
+
+ usage:
+
+ struct cdrom_blk blk;
+ ioctl(fd, CDROMPLAYBLK, &blk);
+
+ inputs:
+ Region to play
+
+ outputs: none
+
+
+
+CDROMGETSPINDOWN
+
+ usage:
+
+ char spindown;
+ ioctl(fd, CDROMGETSPINDOWN, &spindown);
+
+ inputs: none
+
+ outputs:
+ The value of the current 4-bit spindown value.
+
+
+
+
+CDROMSETSPINDOWN
+
+ usage:
+
+ char spindown
+ ioctl(fd, CDROMSETSPINDOWN, &spindown);
+
+ inputs:
+ 4-bit value used to control spindown (TODO: more detail here)
+
+ outputs: none
+
+
+
+
+
+CDROM_SET_OPTIONS Set behavior options
+
+ usage:
+
+ int options;
+ ioctl(fd, CDROM_SET_OPTIONS, options);
+
+ inputs:
+ New values for drive options. The logical 'or' of:
+ CDO_AUTO_CLOSE close tray on first open(2)
+ CDO_AUTO_EJECT open tray on last release
+ CDO_USE_FFLAGS use O_NONBLOCK information on open
+ CDO_LOCK lock tray on open files
+ CDO_CHECK_TYPE check type on open for data
+
+ outputs:
+ Returns the resulting options settings in the
+ ioctl return value. Returns -1 on error.
+
+ error return:
+ ENOSYS selected option(s) not supported by drive.
+
+
+
+
+CDROM_CLEAR_OPTIONS Clear behavior options
+
+ Same as CDROM_SET_OPTIONS, except that selected options are
+ turned off.
+
+
+
+CDROM_SELECT_SPEED Set the CD-ROM speed
+
+ usage:
+
+ int speed;
+ ioctl(fd, CDROM_SELECT_SPEED, speed);
+
+ inputs:
+ New drive speed.
+
+ outputs: none
+
+ error return:
+ ENOSYS speed selection not supported by drive.
+
+
+
+CDROM_SELECT_DISC Select disc (for juke-boxes)
+
+ usage:
+
+ int disk;
+ ioctl(fd, CDROM_SELECT_DISC, disk);
+
+ inputs:
+ Disk to load into drive.
+
+ outputs: none
+
+ error return:
+ EINVAL Disk number beyond capacity of drive
+
+
+
+CDROM_MEDIA_CHANGED Check is media changed
+
+ usage:
+
+ int slot;
+ ioctl(fd, CDROM_MEDIA_CHANGED, slot);
+
+ inputs:
+ Slot number to be tested, always zero except for jukeboxes.
+ May also be special values CDSL_NONE or CDSL_CURRENT
+
+ outputs:
+ Ioctl return value is 0 or 1 depending on whether the media
+ has been changed, or -1 on error.
+
+ error returns:
+ ENOSYS Drive can't detect media change
+ EINVAL Slot number beyond capacity of drive
+ ENOMEM Out of memory
+
+
+
+CDROM_DRIVE_STATUS Get tray position, etc.
+
+ usage:
+
+ int slot;
+ ioctl(fd, CDROM_DRIVE_STATUS, slot);
+
+ inputs:
+ Slot number to be tested, always zero except for jukeboxes.
+ May also be special values CDSL_NONE or CDSL_CURRENT
+
+ outputs:
+ Ioctl return value will be one of the following values
+ from <linux/cdrom.h>:
+
+ CDS_NO_INFO Information not available.
+ CDS_NO_DISC
+ CDS_TRAY_OPEN
+ CDS_DRIVE_NOT_READY
+ CDS_DISC_OK
+ -1 error
+
+ error returns:
+ ENOSYS Drive can't detect drive status
+ EINVAL Slot number beyond capacity of drive
+ ENOMEM Out of memory
+
+
+
+
+CDROM_DISC_STATUS Get disc type, etc.
+
+ usage:
+
+ ioctl(fd, CDROM_DISC_STATUS, 0);
+
+ inputs: none
+
+ outputs:
+ Ioctl return value will be one of the following values
+ from <linux/cdrom.h>:
+ CDS_NO_INFO
+ CDS_AUDIO
+ CDS_MIXED
+ CDS_XA_2_2
+ CDS_XA_2_1
+ CDS_DATA_1
+
+ error returns: none at present
+
+ notes:
+ Source code comments state:
+
+ Ok, this is where problems start. The current interface for
+ the CDROM_DISC_STATUS ioctl is flawed. It makes the false
+ assumption that CDs are all CDS_DATA_1 or all CDS_AUDIO, etc.
+ Unfortunately, while this is often the case, it is also
+ very common for CDs to have some tracks with data, and some
+ tracks with audio. Just because I feel like it, I declare
+ the following to be the best way to cope. If the CD has
+ ANY data tracks on it, it will be returned as a data CD.
+ If it has any XA tracks, I will return it as that. Now I
+ could simplify this interface by combining these returns with
+ the above, but this more clearly demonstrates the problem
+ with the current interface. Too bad this wasn't designed
+ to use bitmasks... -Erik
+
+ Well, now we have the option CDS_MIXED: a mixed-type CD.
+ User level programmers might feel the ioctl is not very
+ useful.
+ ---david
+
+
+
+
+CDROM_CHANGER_NSLOTS Get number of slots
+
+ usage:
+
+ ioctl(fd, CDROM_CHANGER_NSLOTS, 0);
+
+ inputs: none
+
+ outputs:
+ The ioctl return value will be the number of slots in a
+ CD changer. Typically 1 for non-multi-disk devices.
+
+ error returns: none
+
+
+
+CDROM_LOCKDOOR lock or unlock door
+
+ usage:
+
+ int lock;
+ ioctl(fd, CDROM_LOCKDOOR, lock);
+
+ inputs:
+ Door lock flag, 1=lock, 0=unlock
+
+ outputs: none
+
+ error returns:
+ EDRIVE_CANT_DO_THIS Door lock function not supported.
+ EBUSY Attempt to unlock when multiple users
+ have the drive open and not CAP_SYS_ADMIN
+
+ notes:
+ As of 2.6.8.1, the lock flag is a global lock, meaning that
+ all CD drives will be locked or unlocked together. This is
+ probably a bug.
+
+ The EDRIVE_CANT_DO_THIS value is defined in <linux/cdrom.h>
+ and is currently (2.6.8.1) the same as EOPNOTSUPP
+
+
+
+CDROM_DEBUG Turn debug messages on/off
+
+ usage:
+
+ int debug;
+ ioctl(fd, CDROM_DEBUG, debug);
+
+ inputs:
+ Cdrom debug flag, 0=disable, 1=enable
+
+ outputs:
+ The ioctl return value will be the new debug flag.
+
+ error return:
+ EACCES Access denied: requires CAP_SYS_ADMIN
+
+
+
+CDROM_GET_CAPABILITY get capabilities
+
+ usage:
+
+ ioctl(fd, CDROM_GET_CAPABILITY, 0);
+
+ inputs: none
+
+ outputs:
+ The ioctl return value is the current device capability
+ flags. See CDC_CLOSE_TRAY, CDC_OPEN_TRAY, etc.
+
+
+
+CDROMAUDIOBUFSIZ set the audio buffer size
+
+ usage:
+
+ int arg;
+ ioctl(fd, CDROMAUDIOBUFSIZ, val);
+
+ inputs:
+ New audio buffer size
+
+ outputs:
+ The ioctl return value is the new audio buffer size, or -1
+ on error.
+
+ error return:
+ ENOSYS Not supported by this driver.
+
+ notes:
+ Not supported by all drivers.
+
+
+
+DVD_READ_STRUCT Read structure
+
+ usage:
+
+ dvd_struct s;
+ ioctl(fd, DVD_READ_STRUCT, &s);
+
+ inputs:
+ dvd_struct structure, containing:
+ type specifies the information desired, one of
+ DVD_STRUCT_PHYSICAL, DVD_STRUCT_COPYRIGHT,
+ DVD_STRUCT_DISCKEY, DVD_STRUCT_BCA,
+ DVD_STRUCT_MANUFACT
+ physical.layer_num desired layer, indexed from 0
+ copyright.layer_num desired layer, indexed from 0
+ disckey.agid
+
+ outputs:
+ dvd_struct structure, containing:
+ physical for type == DVD_STRUCT_PHYSICAL
+ copyright for type == DVD_STRUCT_COPYRIGHT
+ disckey.value for type == DVD_STRUCT_DISCKEY
+ bca.{len,value} for type == DVD_STRUCT_BCA
+ manufact.{len,valu} for type == DVD_STRUCT_MANUFACT
+
+ error returns:
+ EINVAL physical.layer_num exceeds number of layers
+ EIO Received invalid response from drive
+
+
+
+DVD_WRITE_STRUCT Write structure
+
+ Not implemented, as of 2.6.8.1
+
+
+
+DVD_AUTH Authentication
+
+ usage:
+
+ dvd_authinfo ai;
+ ioctl(fd, DVD_AUTH, &ai);
+
+ inputs:
+ dvd_authinfo structure. See <linux/cdrom.h>
+
+ outputs:
+ dvd_authinfo structure.
+
+ error return:
+ ENOTTY ai.type not recognized.
+
+
+
+CDROM_SEND_PACKET send a packet to the drive
+
+ usage:
+
+ struct cdrom_generic_command cgc;
+ ioctl(fd, CDROM_SEND_PACKET, &cgc);
+
+ inputs:
+ cdrom_generic_command structure containing the packet to send.
+
+ outputs: none
+ cdrom_generic_command structure containing results.
+
+ error return:
+ EIO command failed.
+ EPERM Operation not permitted, either because a
+ write command was attempted on a drive which
+ is opened read-only, or because the command
+ requires CAP_SYS_RAWIO
+ EINVAL cgc.data_direction not set
+
+
+
+CDROM_NEXT_WRITABLE get next writable block
+
+ usage:
+
+ long next;
+ ioctl(fd, CDROM_NEXT_WRITABLE, &next);
+
+ inputs: none
+
+ outputs:
+ The next writable block.
+
+ notes:
+ If the device does not support this ioctl directly, the
+ ioctl will return CDROM_LAST_WRITTEN + 7.
+
+
+
+CDROM_LAST_WRITTEN get last block written on disc
+
+ usage:
+
+ long last;
+ ioctl(fd, CDROM_LAST_WRITTEN, &last);
+
+ inputs: none
+
+ outputs:
+ The last block written on disc
+
+ notes:
+ If the device does not support this ioctl directly, the
+ result is derived from the disc's table of contents. If the
+ table of contents can't be read, this ioctl returns an
+ error.
diff --git a/Documentation/ioctl/hdio.txt b/Documentation/ioctl/hdio.txt
new file mode 100644
index 000000000..18eb98c44
--- /dev/null
+++ b/Documentation/ioctl/hdio.txt
@@ -0,0 +1,1071 @@
+ Summary of HDIO_ ioctl calls.
+ ============================
+
+ Edward A. Falk <efalk@google.com>
+
+ November, 2004
+
+This document attempts to describe the ioctl(2) calls supported by
+the HD/IDE layer. These are by-and-large implemented (as of Linux 2.6)
+in drivers/ide/ide.c and drivers/block/scsi_ioctl.c
+
+ioctl values are listed in <linux/hdreg.h>. As of this writing, they
+are as follows:
+
+ ioctls that pass argument pointers to user space:
+
+ HDIO_GETGEO get device geometry
+ HDIO_GET_UNMASKINTR get current unmask setting
+ HDIO_GET_MULTCOUNT get current IDE blockmode setting
+ HDIO_GET_QDMA get use-qdma flag
+ HDIO_SET_XFER set transfer rate via proc
+ HDIO_OBSOLETE_IDENTITY OBSOLETE, DO NOT USE
+ HDIO_GET_KEEPSETTINGS get keep-settings-on-reset flag
+ HDIO_GET_32BIT get current io_32bit setting
+ HDIO_GET_NOWERR get ignore-write-error flag
+ HDIO_GET_DMA get use-dma flag
+ HDIO_GET_NICE get nice flags
+ HDIO_GET_IDENTITY get IDE identification info
+ HDIO_GET_WCACHE get write cache mode on|off
+ HDIO_GET_ACOUSTIC get acoustic value
+ HDIO_GET_ADDRESS get sector addressing mode
+ HDIO_GET_BUSSTATE get the bus state of the hwif
+ HDIO_TRISTATE_HWIF execute a channel tristate
+ HDIO_DRIVE_RESET execute a device reset
+ HDIO_DRIVE_TASKFILE execute raw taskfile
+ HDIO_DRIVE_TASK execute task and special drive command
+ HDIO_DRIVE_CMD execute a special drive command
+ HDIO_DRIVE_CMD_AEB HDIO_DRIVE_TASK
+
+ ioctls that pass non-pointer values:
+
+ HDIO_SET_MULTCOUNT change IDE blockmode
+ HDIO_SET_UNMASKINTR permit other irqs during I/O
+ HDIO_SET_KEEPSETTINGS keep ioctl settings on reset
+ HDIO_SET_32BIT change io_32bit flags
+ HDIO_SET_NOWERR change ignore-write-error flag
+ HDIO_SET_DMA change use-dma flag
+ HDIO_SET_PIO_MODE reconfig interface to new speed
+ HDIO_SCAN_HWIF register and (re)scan interface
+ HDIO_SET_NICE set nice flags
+ HDIO_UNREGISTER_HWIF unregister interface
+ HDIO_SET_WCACHE change write cache enable-disable
+ HDIO_SET_ACOUSTIC change acoustic behavior
+ HDIO_SET_BUSSTATE set the bus state of the hwif
+ HDIO_SET_QDMA change use-qdma flag
+ HDIO_SET_ADDRESS change lba addressing modes
+
+ HDIO_SET_IDE_SCSI Set scsi emulation mode on/off
+ HDIO_SET_SCSI_IDE not implemented yet
+
+
+The information that follows was determined from reading kernel source
+code. It is likely that some corrections will be made over time.
+
+
+
+
+
+
+
+General:
+
+ Unless otherwise specified, all ioctl calls return 0 on success
+ and -1 with errno set to an appropriate value on error.
+
+ Unless otherwise specified, all ioctl calls return -1 and set
+ errno to EFAULT on a failed attempt to copy data to or from user
+ address space.
+
+ Unless otherwise specified, all data structures and constants
+ are defined in <linux/hdreg.h>
+
+
+
+HDIO_GETGEO get device geometry
+
+ usage:
+
+ struct hd_geometry geom;
+ ioctl(fd, HDIO_GETGEO, &geom);
+
+
+ inputs: none
+
+ outputs:
+
+ hd_geometry structure containing:
+
+ heads number of heads
+ sectors number of sectors/track
+ cylinders number of cylinders, mod 65536
+ start starting sector of this partition.
+
+
+ error returns:
+ EINVAL if the device is not a disk drive or floppy drive,
+ or if the user passes a null pointer
+
+
+ notes:
+
+ Not particularly useful with modern disk drives, whose geometry
+ is a polite fiction anyway. Modern drives are addressed
+ purely by sector number nowadays (lba addressing), and the
+ drive geometry is an abstraction which is actually subject
+ to change. Currently (as of Nov 2004), the geometry values
+ are the "bios" values -- presumably the values the drive had
+ when Linux first booted.
+
+ In addition, the cylinders field of the hd_geometry is an
+ unsigned short, meaning that on most architectures, this
+ ioctl will not return a meaningful value on drives with more
+ than 65535 tracks.
+
+ The start field is unsigned long, meaning that it will not
+ contain a meaningful value for disks over 219 Gb in size.
+
+
+
+
+HDIO_GET_UNMASKINTR get current unmask setting
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_UNMASKINTR, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the drive's current unmask setting
+
+
+
+HDIO_SET_UNMASKINTR permit other irqs during I/O
+
+ usage:
+
+ unsigned long val;
+ ioctl(fd, HDIO_SET_UNMASKINTR, val);
+
+ inputs:
+ New value for unmask flag
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 1]
+ EBUSY Controller busy
+
+
+
+
+HDIO_GET_MULTCOUNT get current IDE blockmode setting
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_MULTCOUNT, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current IDE block mode setting. This
+ controls how many sectors the drive will transfer per
+ interrupt.
+
+
+
+HDIO_SET_MULTCOUNT change IDE blockmode
+
+ usage:
+
+ int val;
+ ioctl(fd, HDIO_SET_MULTCOUNT, val);
+
+ inputs:
+ New value for IDE block mode setting. This controls how many
+ sectors the drive will transfer per interrupt.
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range supported by disk.
+ EBUSY Controller busy or blockmode already set.
+ EIO Drive did not accept new block mode.
+
+ notes:
+
+ Source code comments read:
+
+ This is tightly woven into the driver->do_special cannot
+ touch. DON'T do it again until a total personality rewrite
+ is committed.
+
+ If blockmode has already been set, this ioctl will fail with
+ EBUSY
+
+
+
+HDIO_GET_QDMA get use-qdma flag
+
+ Not implemented, as of 2.6.8.1
+
+
+
+HDIO_SET_XFER set transfer rate via proc
+
+ Not implemented, as of 2.6.8.1
+
+
+
+HDIO_OBSOLETE_IDENTITY OBSOLETE, DO NOT USE
+
+ Same as HDIO_GET_IDENTITY (see below), except that it only
+ returns the first 142 bytes of drive identity information.
+
+
+
+HDIO_GET_IDENTITY get IDE identification info
+
+ usage:
+
+ unsigned char identity[512];
+ ioctl(fd, HDIO_GET_IDENTITY, identity);
+
+ inputs: none
+
+ outputs:
+
+ ATA drive identity information. For full description, see
+ the IDENTIFY DEVICE and IDENTIFY PACKET DEVICE commands in
+ the ATA specification.
+
+ error returns:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ ENOMSG IDENTIFY DEVICE information not available
+
+ notes:
+
+ Returns information that was obtained when the drive was
+ probed. Some of this information is subject to change, and
+ this ioctl does not re-probe the drive to update the
+ information.
+
+ This information is also available from /proc/ide/hdX/identify
+
+
+
+HDIO_GET_KEEPSETTINGS get keep-settings-on-reset flag
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_KEEPSETTINGS, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current "keep settings" flag
+
+ notes:
+
+ When set, indicates that kernel should restore settings
+ after a drive reset.
+
+
+
+HDIO_SET_KEEPSETTINGS keep ioctl settings on reset
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_SET_KEEPSETTINGS, val);
+
+ inputs:
+ New value for keep_settings flag
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 1]
+ EBUSY Controller busy
+
+
+
+HDIO_GET_32BIT get current io_32bit setting
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_32BIT, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current io_32bit setting
+
+ notes:
+
+ 0=16-bit, 1=32-bit, 2,3 = 32bit+sync
+
+
+
+HDIO_GET_NOWERR get ignore-write-error flag
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_NOWERR, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current ignore-write-error flag
+
+
+
+HDIO_GET_DMA get use-dma flag
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_DMA, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current use-dma flag
+
+
+
+HDIO_GET_NICE get nice flags
+
+ usage:
+
+ long nice;
+ ioctl(fd, HDIO_GET_NICE, &nice);
+
+ inputs: none
+
+ outputs:
+
+ The drive's "nice" values.
+
+ notes:
+
+ Per-drive flags which determine when the system will give more
+ bandwidth to other devices sharing the same IDE bus.
+ See <linux/hdreg.h>, near symbol IDE_NICE_DSC_OVERLAP.
+
+
+
+
+HDIO_SET_NICE set nice flags
+
+ usage:
+
+ unsigned long nice;
+ ...
+ ioctl(fd, HDIO_SET_NICE, nice);
+
+ inputs:
+ bitmask of nice flags.
+
+ outputs: none
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EPERM Flags other than DSC_OVERLAP and NICE_1 set.
+ EPERM DSC_OVERLAP specified but not supported by drive
+
+ notes:
+
+ This ioctl sets the DSC_OVERLAP and NICE_1 flags from values
+ provided by the user.
+
+ Nice flags are listed in <linux/hdreg.h>, starting with
+ IDE_NICE_DSC_OVERLAP. These values represent shifts.
+
+
+
+
+
+HDIO_GET_WCACHE get write cache mode on|off
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_WCACHE, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current write cache mode
+
+
+
+HDIO_GET_ACOUSTIC get acoustic value
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_ACOUSTIC, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current acoustic settings
+
+ notes:
+
+ See HDIO_SET_ACOUSTIC
+
+
+
+HDIO_GET_ADDRESS
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_GET_ADDRESS, &val);
+
+ inputs: none
+
+ outputs:
+ The value of the current addressing mode:
+ 0 = 28-bit
+ 1 = 48-bit
+ 2 = 48-bit doing 28-bit
+ 3 = 64-bit
+
+
+
+HDIO_GET_BUSSTATE get the bus state of the hwif
+
+ usage:
+
+ long state;
+ ioctl(fd, HDIO_SCAN_HWIF, &state);
+
+ inputs: none
+
+ outputs:
+ Current power state of the IDE bus. One of BUSSTATE_OFF,
+ BUSSTATE_ON, or BUSSTATE_TRISTATE
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_ADMIN
+
+
+
+
+HDIO_SET_BUSSTATE set the bus state of the hwif
+
+ usage:
+
+ int state;
+ ...
+ ioctl(fd, HDIO_SCAN_HWIF, state);
+
+ inputs:
+ Desired IDE power state. One of BUSSTATE_OFF, BUSSTATE_ON,
+ or BUSSTATE_TRISTATE
+
+ outputs: none
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_RAWIO
+ EOPNOTSUPP Hardware interface does not support bus power control
+
+
+
+
+HDIO_TRISTATE_HWIF execute a channel tristate
+
+ Not implemented, as of 2.6.8.1. See HDIO_SET_BUSSTATE
+
+
+
+HDIO_DRIVE_RESET execute a device reset
+
+ usage:
+
+ int args[3]
+ ...
+ ioctl(fd, HDIO_DRIVE_RESET, args);
+
+ inputs: none
+
+ outputs: none
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ ENXIO No such device: phy dead or ctl_addr == 0
+ EIO I/O error: reset timed out or hardware error
+
+ notes:
+
+ Execute a reset on the device as soon as the current IO
+ operation has completed.
+
+ Executes an ATAPI soft reset if applicable, otherwise
+ executes an ATA soft reset on the controller.
+
+
+
+HDIO_DRIVE_TASKFILE execute raw taskfile
+
+ Note: If you don't have a copy of the ANSI ATA specification
+ handy, you should probably ignore this ioctl.
+
+ Execute an ATA disk command directly by writing the "taskfile"
+ registers of the drive. Requires ADMIN and RAWIO access
+ privileges.
+
+ usage:
+
+ struct {
+ ide_task_request_t req_task;
+ u8 outbuf[OUTPUT_SIZE];
+ u8 inbuf[INPUT_SIZE];
+ } task;
+ memset(&task.req_task, 0, sizeof(task.req_task));
+ task.req_task.out_size = sizeof(task.outbuf);
+ task.req_task.in_size = sizeof(task.inbuf);
+ ...
+ ioctl(fd, HDIO_DRIVE_TASKFILE, &task);
+ ...
+
+ inputs:
+
+ (See below for details on memory area passed to ioctl.)
+
+ io_ports[8] values to be written to taskfile registers
+ hob_ports[8] high-order bytes, for extended commands.
+ out_flags flags indicating which registers are valid
+ in_flags flags indicating which registers should be returned
+ data_phase see below
+ req_cmd command type to be executed
+ out_size size of output buffer
+ outbuf buffer of data to be transmitted to disk
+ inbuf buffer of data to be received from disk (see [1])
+
+ outputs:
+
+ io_ports[] values returned in the taskfile registers
+ hob_ports[] high-order bytes, for extended commands.
+ out_flags flags indicating which registers are valid (see [2])
+ in_flags flags indicating which registers should be returned
+ outbuf buffer of data to be transmitted to disk (see [1])
+ inbuf buffer of data to be received from disk
+
+ error returns:
+ EACCES CAP_SYS_ADMIN or CAP_SYS_RAWIO privilege not set.
+ ENOMSG Device is not a disk drive.
+ ENOMEM Unable to allocate memory for task
+ EFAULT req_cmd == TASKFILE_IN_OUT (not implemented as of 2.6.8)
+ EPERM req_cmd == TASKFILE_MULTI_OUT and drive
+ multi-count not yet set.
+ EIO Drive failed the command.
+
+ notes:
+
+ [1] READ THE FOLLOWING NOTES *CAREFULLY*. THIS IOCTL IS
+ FULL OF GOTCHAS. Extreme caution should be used with using
+ this ioctl. A mistake can easily corrupt data or hang the
+ system.
+
+ [2] Both the input and output buffers are copied from the
+ user and written back to the user, even when not used.
+
+ [3] If one or more bits are set in out_flags and in_flags is
+ zero, the following values are used for in_flags.all and
+ written back into in_flags on completion.
+
+ * IDE_TASKFILE_STD_IN_FLAGS | (IDE_HOB_STD_IN_FLAGS << 8)
+ if LBA48 addressing is enabled for the drive
+ * IDE_TASKFILE_STD_IN_FLAGS
+ if CHS/LBA28
+
+ The association between in_flags.all and each enable
+ bitfield flips depending on endianness; fortunately, TASKFILE
+ only uses inflags.b.data bit and ignores all other bits.
+ The end result is that, on any endian machines, it has no
+ effect other than modifying in_flags on completion.
+
+ [4] The default value of SELECT is (0xa0|DEV_bit|LBA_bit)
+ except for four drives per port chipsets. For four drives
+ per port chipsets, it's (0xa0|DEV_bit|LBA_bit) for the first
+ pair and (0x80|DEV_bit|LBA_bit) for the second pair.
+
+ [5] The argument to the ioctl is a pointer to a region of
+ memory containing a ide_task_request_t structure, followed
+ by an optional buffer of data to be transmitted to the
+ drive, followed by an optional buffer to receive data from
+ the drive.
+
+ Command is passed to the disk drive via the ide_task_request_t
+ structure, which contains these fields:
+
+ io_ports[8] values for the taskfile registers
+ hob_ports[8] high-order bytes, for extended commands
+ out_flags flags indicating which entries in the
+ io_ports[] and hob_ports[] arrays
+ contain valid values. Type ide_reg_valid_t.
+ in_flags flags indicating which entries in the
+ io_ports[] and hob_ports[] arrays
+ are expected to contain valid values
+ on return.
+ data_phase See below
+ req_cmd Command type, see below
+ out_size output (user->drive) buffer size, bytes
+ in_size input (drive->user) buffer size, bytes
+
+ When out_flags is zero, the following registers are loaded.
+
+ HOB_FEATURE If the drive supports LBA48
+ HOB_NSECTOR If the drive supports LBA48
+ HOB_SECTOR If the drive supports LBA48
+ HOB_LCYL If the drive supports LBA48
+ HOB_HCYL If the drive supports LBA48
+ FEATURE
+ NSECTOR
+ SECTOR
+ LCYL
+ HCYL
+ SELECT First, masked with 0xE0 if LBA48, 0xEF
+ otherwise; then, or'ed with the default
+ value of SELECT.
+
+ If any bit in out_flags is set, the following registers are loaded.
+
+ HOB_DATA If out_flags.b.data is set. HOB_DATA will
+ travel on DD8-DD15 on little endian machines
+ and on DD0-DD7 on big endian machines.
+ DATA If out_flags.b.data is set. DATA will
+ travel on DD0-DD7 on little endian machines
+ and on DD8-DD15 on big endian machines.
+ HOB_NSECTOR If out_flags.b.nsector_hob is set
+ HOB_SECTOR If out_flags.b.sector_hob is set
+ HOB_LCYL If out_flags.b.lcyl_hob is set
+ HOB_HCYL If out_flags.b.hcyl_hob is set
+ FEATURE If out_flags.b.feature is set
+ NSECTOR If out_flags.b.nsector is set
+ SECTOR If out_flags.b.sector is set
+ LCYL If out_flags.b.lcyl is set
+ HCYL If out_flags.b.hcyl is set
+ SELECT Or'ed with the default value of SELECT and
+ loaded regardless of out_flags.b.select.
+
+ Taskfile registers are read back from the drive into
+ {io|hob}_ports[] after the command completes iff one of the
+ following conditions is met; otherwise, the original values
+ will be written back, unchanged.
+
+ 1. The drive fails the command (EIO).
+ 2. One or more than one bits are set in out_flags.
+ 3. The requested data_phase is TASKFILE_NO_DATA.
+
+ HOB_DATA If in_flags.b.data is set. It will contain
+ DD8-DD15 on little endian machines and
+ DD0-DD7 on big endian machines.
+ DATA If in_flags.b.data is set. It will contain
+ DD0-DD7 on little endian machines and
+ DD8-DD15 on big endian machines.
+ HOB_FEATURE If the drive supports LBA48
+ HOB_NSECTOR If the drive supports LBA48
+ HOB_SECTOR If the drive supports LBA48
+ HOB_LCYL If the drive supports LBA48
+ HOB_HCYL If the drive supports LBA48
+ NSECTOR
+ SECTOR
+ LCYL
+ HCYL
+
+ The data_phase field describes the data transfer to be
+ performed. Value is one of:
+
+ TASKFILE_IN
+ TASKFILE_MULTI_IN
+ TASKFILE_OUT
+ TASKFILE_MULTI_OUT
+ TASKFILE_IN_OUT
+ TASKFILE_IN_DMA
+ TASKFILE_IN_DMAQ == IN_DMA (queueing not supported)
+ TASKFILE_OUT_DMA
+ TASKFILE_OUT_DMAQ == OUT_DMA (queueing not supported)
+ TASKFILE_P_IN unimplemented
+ TASKFILE_P_IN_DMA unimplemented
+ TASKFILE_P_IN_DMAQ unimplemented
+ TASKFILE_P_OUT unimplemented
+ TASKFILE_P_OUT_DMA unimplemented
+ TASKFILE_P_OUT_DMAQ unimplemented
+
+ The req_cmd field classifies the command type. It may be
+ one of:
+
+ IDE_DRIVE_TASK_NO_DATA
+ IDE_DRIVE_TASK_SET_XFER unimplemented
+ IDE_DRIVE_TASK_IN
+ IDE_DRIVE_TASK_OUT unimplemented
+ IDE_DRIVE_TASK_RAW_WRITE
+
+ [6] Do not access {in|out}_flags->all except for resetting
+ all the bits. Always access individual bit fields. ->all
+ value will flip depending on endianness. For the same
+ reason, do not use IDE_{TASKFILE|HOB}_STD_{OUT|IN}_FLAGS
+ constants defined in hdreg.h.
+
+
+
+HDIO_DRIVE_CMD execute a special drive command
+
+ Note: If you don't have a copy of the ANSI ATA specification
+ handy, you should probably ignore this ioctl.
+
+ usage:
+
+ u8 args[4+XFER_SIZE];
+ ...
+ ioctl(fd, HDIO_DRIVE_CMD, args);
+
+ inputs:
+
+ Commands other than WIN_SMART
+ args[0] COMMAND
+ args[1] NSECTOR
+ args[2] FEATURE
+ args[3] NSECTOR
+
+ WIN_SMART
+ args[0] COMMAND
+ args[1] SECTOR
+ args[2] FEATURE
+ args[3] NSECTOR
+
+ outputs:
+
+ args[] buffer is filled with register values followed by any
+ data returned by the disk.
+ args[0] status
+ args[1] error
+ args[2] NSECTOR
+ args[3] undefined
+ args[4+] NSECTOR * 512 bytes of data returned by the command.
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_RAWIO
+ ENOMEM Unable to allocate memory for task
+ EIO Drive reports error
+
+ notes:
+
+ [1] For commands other than WIN_SMART, args[1] should equal
+ args[3]. SECTOR, LCYL and HCYL are undefined. For
+ WIN_SMART, 0x4f and 0xc2 are loaded into LCYL and HCYL
+ respectively. In both cases SELECT will contain the default
+ value for the drive. Please refer to HDIO_DRIVE_TASKFILE
+ notes for the default value of SELECT.
+
+ [2] If NSECTOR value is greater than zero and the drive sets
+ DRQ when interrupting for the command, NSECTOR * 512 bytes
+ are read from the device into the area following NSECTOR.
+ In the above example, the area would be
+ args[4..4+XFER_SIZE]. 16bit PIO is used regardless of
+ HDIO_SET_32BIT setting.
+
+ [3] If COMMAND == WIN_SETFEATURES && FEATURE == SETFEATURES_XFER
+ && NSECTOR >= XFER_SW_DMA_0 && the drive supports any DMA
+ mode, IDE driver will try to tune the transfer mode of the
+ drive accordingly.
+
+
+
+HDIO_DRIVE_TASK execute task and special drive command
+
+ Note: If you don't have a copy of the ANSI ATA specification
+ handy, you should probably ignore this ioctl.
+
+ usage:
+
+ u8 args[7];
+ ...
+ ioctl(fd, HDIO_DRIVE_TASK, args);
+
+ inputs:
+
+ Taskfile register values:
+ args[0] COMMAND
+ args[1] FEATURE
+ args[2] NSECTOR
+ args[3] SECTOR
+ args[4] LCYL
+ args[5] HCYL
+ args[6] SELECT
+
+ outputs:
+
+ Taskfile register values:
+ args[0] status
+ args[1] error
+ args[2] NSECTOR
+ args[3] SECTOR
+ args[4] LCYL
+ args[5] HCYL
+ args[6] SELECT
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_RAWIO
+ ENOMEM Unable to allocate memory for task
+ ENOMSG Device is not a disk drive.
+ EIO Drive failed the command.
+
+ notes:
+
+ [1] DEV bit (0x10) of SELECT register is ignored and the
+ appropriate value for the drive is used. All other bits
+ are used unaltered.
+
+
+
+HDIO_DRIVE_CMD_AEB HDIO_DRIVE_TASK
+
+ Not implemented, as of 2.6.8.1
+
+
+
+HDIO_SET_32BIT change io_32bit flags
+
+ usage:
+
+ int val;
+ ioctl(fd, HDIO_SET_32BIT, val);
+
+ inputs:
+ New value for io_32bit flag
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 3]
+ EBUSY Controller busy
+
+
+
+
+HDIO_SET_NOWERR change ignore-write-error flag
+
+ usage:
+
+ int val;
+ ioctl(fd, HDIO_SET_NOWERR, val);
+
+ inputs:
+ New value for ignore-write-error flag. Used for ignoring
+ WRERR_STAT
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 1]
+ EBUSY Controller busy
+
+
+
+HDIO_SET_DMA change use-dma flag
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_SET_DMA, val);
+
+ inputs:
+ New value for use-dma flag
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 1]
+ EBUSY Controller busy
+
+
+
+HDIO_SET_PIO_MODE reconfig interface to new speed
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_SET_PIO_MODE, val);
+
+ inputs:
+ New interface speed.
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 255]
+ EBUSY Controller busy
+
+
+
+HDIO_SCAN_HWIF register and (re)scan interface
+
+ usage:
+
+ int args[3]
+ ...
+ ioctl(fd, HDIO_SCAN_HWIF, args);
+
+ inputs:
+ args[0] io address to probe
+ args[1] control address to probe
+ args[2] irq number
+
+ outputs: none
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_RAWIO
+ EIO Probe failed.
+
+ notes:
+
+ This ioctl initializes the addresses and irq for a disk
+ controller, probes for drives, and creates /proc/ide
+ interfaces as appropriate.
+
+
+
+HDIO_UNREGISTER_HWIF unregister interface
+
+ usage:
+
+ int index;
+ ioctl(fd, HDIO_UNREGISTER_HWIF, index);
+
+ inputs:
+ index index of hardware interface to unregister
+
+ outputs: none
+
+ error returns:
+ EACCES Access denied: requires CAP_SYS_RAWIO
+
+ notes:
+
+ This ioctl removes a hardware interface from the kernel.
+
+ Currently (2.6.8) this ioctl silently fails if any drive on
+ the interface is busy.
+
+
+
+HDIO_SET_WCACHE change write cache enable-disable
+
+ usage:
+
+ int val;
+ ioctl(fd, HDIO_SET_WCACHE, val);
+
+ inputs:
+ New value for write cache enable
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 1]
+ EBUSY Controller busy
+
+
+
+HDIO_SET_ACOUSTIC change acoustic behavior
+
+ usage:
+
+ int val;
+ ioctl(fd, HDIO_SET_ACOUSTIC, val);
+
+ inputs:
+ New value for drive acoustic settings
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 254]
+ EBUSY Controller busy
+
+
+
+HDIO_SET_QDMA change use-qdma flag
+
+ Not implemented, as of 2.6.8.1
+
+
+
+HDIO_SET_ADDRESS change lba addressing modes
+
+ usage:
+
+ int val;
+ ioctl(fd, HDIO_SET_ADDRESS, val);
+
+ inputs:
+ New value for addressing mode
+ 0 = 28-bit
+ 1 = 48-bit
+ 2 = 48-bit doing 28-bit
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 2]
+ EBUSY Controller busy
+ EIO Drive does not support lba48 mode.
+
+
+HDIO_SET_IDE_SCSI
+
+ usage:
+
+ long val;
+ ioctl(fd, HDIO_SET_IDE_SCSI, val);
+
+ inputs:
+ New value for scsi emulation mode (?)
+
+ outputs: none
+
+ error return:
+ EINVAL (bdev != bdev->bd_contains) (not sure what this means)
+ EACCES Access denied: requires CAP_SYS_ADMIN
+ EINVAL value out of range [0 1]
+ EBUSY Controller busy
+
+
+
+HDIO_SET_SCSI_IDE
+
+ Not implemented, as of 2.6.8.1
+
+
diff --git a/Documentation/ioctl/ioctl-decoding.txt b/Documentation/ioctl/ioctl-decoding.txt
new file mode 100644
index 000000000..e35efb0ce
--- /dev/null
+++ b/Documentation/ioctl/ioctl-decoding.txt
@@ -0,0 +1,24 @@
+To decode a hex IOCTL code:
+
+Most architectures use this generic format, but check
+include/ARCH/ioctl.h for specifics, e.g. powerpc
+uses 3 bits to encode read/write and 13 bits for size.
+
+ bits meaning
+ 31-30 00 - no parameters: uses _IO macro
+ 10 - read: _IOR
+ 01 - write: _IOW
+ 11 - read/write: _IOWR
+
+ 29-16 size of arguments
+
+ 15-8 ascii character supposedly
+ unique to each driver
+
+ 7-0 function #
+
+
+So for example 0x82187201 is a read with arg length of 0x218,
+character 'r' function 1. Grepping the source reveals this is:
+
+#define VFAT_IOCTL_READDIR_BOTH _IOR('r', 1, struct dirent [2])
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
new file mode 100644
index 000000000..ec7c81b4f
--- /dev/null
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -0,0 +1,332 @@
+Ioctl Numbers
+19 October 1999
+Michael Elizabeth Chastain
+<mec@shout.net>
+
+If you are adding new ioctl's to the kernel, you should use the _IO
+macros defined in <linux/ioctl.h>:
+
+ _IO an ioctl with no parameters
+ _IOW an ioctl with write parameters (copy_from_user)
+ _IOR an ioctl with read parameters (copy_to_user)
+ _IOWR an ioctl with both write and read parameters.
+
+'Write' and 'read' are from the user's point of view, just like the
+system calls 'write' and 'read'. For example, a SET_FOO ioctl would
+be _IOW, although the kernel would actually read data from user space;
+a GET_FOO ioctl would be _IOR, although the kernel would actually write
+data to user space.
+
+The first argument to _IO, _IOW, _IOR, or _IOWR is an identifying letter
+or number from the table below. Because of the large number of drivers,
+many drivers share a partial letter with other drivers.
+
+If you are writing a driver for a new device and need a letter, pick an
+unused block with enough room for expansion: 32 to 256 ioctl commands.
+You can register the block by patching this file and submitting the
+patch to Linus Torvalds. Or you can e-mail me at <mec@shout.net> and
+I'll register one for you.
+
+The second argument to _IO, _IOW, _IOR, or _IOWR is a sequence number
+to distinguish ioctls from each other. The third argument to _IOW,
+_IOR, or _IOWR is the type of the data going into the kernel or coming
+out of the kernel (e.g. 'int' or 'struct foo'). NOTE! Do NOT use
+sizeof(arg) as the third argument as this results in your ioctl thinking
+it passes an argument of type size_t.
+
+Some devices use their major number as the identifier; this is OK, as
+long as it is unique. Some devices are irregular and don't follow any
+convention at all.
+
+Following this convention is good because:
+
+(1) Keeping the ioctl's globally unique helps error checking:
+ if a program calls an ioctl on the wrong device, it will get an
+ error rather than some unexpected behaviour.
+
+(2) The 'strace' build procedure automatically finds ioctl numbers
+ defined with _IO, _IOW, _IOR, or _IOWR.
+
+(3) 'strace' can decode numbers back into useful names when the
+ numbers are unique.
+
+(4) People looking for ioctls can grep for them more easily when
+ this convention is used to define the ioctl numbers.
+
+(5) When following the convention, the driver code can use generic
+ code to copy the parameters between user and kernel space.
+
+This table lists ioctls visible from user land for Linux/x86. It contains
+most drivers up to 2.6.31, but I know I am missing some. There has been
+no attempt to list non-X86 architectures or ioctls from drivers/staging/.
+
+Code Seq#(hex) Include File Comments
+========================================================
+0x00 00-1F linux/fs.h conflict!
+0x00 00-1F scsi/scsi_ioctl.h conflict!
+0x00 00-1F linux/fb.h conflict!
+0x00 00-1F linux/wavefront.h conflict!
+0x02 all linux/fd.h
+0x03 all linux/hdreg.h
+0x04 D2-DC linux/umsdos_fs.h Dead since 2.6.11, but don't reuse these.
+0x06 all linux/lp.h
+0x09 all linux/raid/md_u.h
+0x10 00-0F drivers/char/s390/vmcp.h
+0x10 10-1F arch/s390/include/uapi/sclp_ctl.h
+0x10 20-2F arch/s390/include/uapi/asm/hypfs.h
+0x12 all linux/fs.h
+ linux/blkpg.h
+0x1b all InfiniBand Subsystem <http://infiniband.sourceforge.net/>
+0x20 all drivers/cdrom/cm206.h
+0x22 all scsi/sg.h
+'#' 00-3F IEEE 1394 Subsystem Block for the entire subsystem
+'$' 00-0F linux/perf_counter.h, linux/perf_event.h
+'&' 00-07 drivers/firewire/nosy-user.h
+'1' 00-1F <linux/timepps.h> PPS kit from Ulrich Windl
+ <ftp://ftp.de.kernel.org/pub/linux/daemons/ntp/PPS/>
+'2' 01-04 linux/i2o.h
+'3' 00-0F drivers/s390/char/raw3270.h conflict!
+'3' 00-1F linux/suspend_ioctls.h conflict!
+ and kernel/power/user.c
+'8' all SNP8023 advanced NIC card
+ <mailto:mcr@solidum.com>
+';' 64-7F linux/vfio.h
+'@' 00-0F linux/radeonfb.h conflict!
+'@' 00-0F drivers/video/aty/aty128fb.c conflict!
+'A' 00-1F linux/apm_bios.h conflict!
+'A' 00-0F linux/agpgart.h conflict!
+ and drivers/char/agp/compat_ioctl.h
+'A' 00-7F sound/asound.h conflict!
+'B' 00-1F linux/cciss_ioctl.h conflict!
+'B' 00-0F include/linux/pmu.h conflict!
+'B' C0-FF advanced bbus
+ <mailto:maassen@uni-freiburg.de>
+'C' all linux/soundcard.h conflict!
+'C' 01-2F linux/capi.h conflict!
+'C' F0-FF drivers/net/wan/cosa.h conflict!
+'D' all arch/s390/include/asm/dasd.h
+'D' 40-5F drivers/scsi/dpt/dtpi_ioctl.h
+'D' 05 drivers/scsi/pmcraid.h
+'E' all linux/input.h conflict!
+'E' 00-0F xen/evtchn.h conflict!
+'F' all linux/fb.h conflict!
+'F' 01-02 drivers/scsi/pmcraid.h conflict!
+'F' 20 drivers/video/fsl-diu-fb.h conflict!
+'F' 20 drivers/video/intelfb/intelfb.h conflict!
+'F' 20 linux/ivtvfb.h conflict!
+'F' 20 linux/matroxfb.h conflict!
+'F' 20 drivers/video/aty/atyfb_base.c conflict!
+'F' 00-0F video/da8xx-fb.h conflict!
+'F' 80-8F linux/arcfb.h conflict!
+'F' DD video/sstfb.h conflict!
+'G' 00-3F drivers/misc/sgi-gru/grulib.h conflict!
+'G' 00-0F linux/gigaset_dev.h conflict!
+'H' 00-7F linux/hiddev.h conflict!
+'H' 00-0F linux/hidraw.h conflict!
+'H' 01 linux/mei.h conflict!
+'H' 00-0F sound/asound.h conflict!
+'H' 20-40 sound/asound_fm.h conflict!
+'H' 80-8F sound/sfnt_info.h conflict!
+'H' 10-8F sound/emu10k1.h conflict!
+'H' 10-1F sound/sb16_csp.h conflict!
+'H' 10-1F sound/hda_hwdep.h conflict!
+'H' 40-4F sound/hdspm.h conflict!
+'H' 40-4F sound/hdsp.h conflict!
+'H' 90 sound/usb/usx2y/usb_stream.h
+'H' A0 uapi/linux/usb/cdc-wdm.h
+'H' C0-F0 net/bluetooth/hci.h conflict!
+'H' C0-DF net/bluetooth/hidp/hidp.h conflict!
+'H' C0-DF net/bluetooth/cmtp/cmtp.h conflict!
+'H' C0-DF net/bluetooth/bnep/bnep.h conflict!
+'H' F1 linux/hid-roccat.h <mailto:erazor_de@users.sourceforge.net>
+'H' F8-FA sound/firewire.h
+'I' all linux/isdn.h conflict!
+'I' 00-0F drivers/isdn/divert/isdn_divert.h conflict!
+'I' 40-4F linux/mISDNif.h conflict!
+'J' 00-1F drivers/scsi/gdth_ioctl.h
+'K' all linux/kd.h
+'L' 00-1F linux/loop.h conflict!
+'L' 10-1F drivers/scsi/mpt2sas/mpt2sas_ctl.h conflict!
+'L' E0-FF linux/ppdd.h encrypted disk device driver
+ <http://linux01.gwdg.de/~alatham/ppdd.html>
+'M' all linux/soundcard.h conflict!
+'M' 01-16 mtd/mtd-abi.h conflict!
+ and drivers/mtd/mtdchar.c
+'M' 01-03 drivers/scsi/megaraid/megaraid_sas.h
+'M' 00-0F drivers/video/fsl-diu-fb.h conflict!
+'N' 00-1F drivers/usb/scanner.h
+'N' 40-7F drivers/block/nvme.c
+'O' 00-06 mtd/ubi-user.h UBI
+'P' all linux/soundcard.h conflict!
+'P' 60-6F sound/sscape_ioctl.h conflict!
+'P' 00-0F drivers/usb/class/usblp.c conflict!
+'Q' all linux/soundcard.h
+'R' 00-1F linux/random.h conflict!
+'R' 01 linux/rfkill.h conflict!
+'R' C0-DF net/bluetooth/rfcomm.h
+'S' all linux/cdrom.h conflict!
+'S' 80-81 scsi/scsi_ioctl.h conflict!
+'S' 82-FF scsi/scsi.h conflict!
+'S' 00-7F sound/asequencer.h conflict!
+'T' all linux/soundcard.h conflict!
+'T' 00-AF sound/asound.h conflict!
+'T' all arch/x86/include/asm/ioctls.h conflict!
+'T' C0-DF linux/if_tun.h conflict!
+'U' all sound/asound.h conflict!
+'U' 00-CF linux/uinput.h conflict!
+'U' 00-EF linux/usbdevice_fs.h
+'U' C0-CF drivers/bluetooth/hci_uart.h
+'V' all linux/vt.h conflict!
+'V' all linux/videodev2.h conflict!
+'V' C0 linux/ivtvfb.h conflict!
+'V' C0 linux/ivtv.h conflict!
+'V' C0 media/davinci/vpfe_capture.h conflict!
+'V' C0 media/si4713.h conflict!
+'W' 00-1F linux/watchdog.h conflict!
+'W' 00-1F linux/wanrouter.h conflict! (pre 3.9)
+'W' 00-3F sound/asound.h conflict!
+'X' all fs/xfs/xfs_fs.h conflict!
+ and fs/xfs/linux-2.6/xfs_ioctl32.h
+ and include/linux/falloc.h
+ and linux/fs.h
+'X' all fs/ocfs2/ocfs_fs.h conflict!
+'X' 01 linux/pktcdvd.h conflict!
+'Y' all linux/cyclades.h
+'Z' 14-15 drivers/message/fusion/mptctl.h
+'[' 00-07 linux/usb/tmc.h USB Test and Measurement Devices
+ <mailto:gregkh@linuxfoundation.org>
+'a' all linux/atm*.h, linux/sonet.h ATM on linux
+ <http://lrcwww.epfl.ch/>
+'a' 00-0F drivers/crypto/qat/qat_common/adf_cfg_common.h conflict! qat driver
+'b' 00-FF conflict! bit3 vme host bridge
+ <mailto:natalia@nikhefk.nikhef.nl>
+'c' all linux/cm4000_cs.h conflict!
+'c' 00-7F linux/comstats.h conflict!
+'c' 00-7F linux/coda.h conflict!
+'c' 00-1F linux/chio.h conflict!
+'c' 80-9F arch/s390/include/asm/chsc.h conflict!
+'c' A0-AF arch/x86/include/asm/msr.h conflict!
+'d' 00-FF linux/char/drm/drm/h conflict!
+'d' 02-40 pcmcia/ds.h conflict!
+'d' F0-FF linux/digi1.h
+'e' all linux/digi1.h conflict!
+'e' 00-1F drivers/net/irda/irtty-sir.h conflict!
+'f' 00-1F linux/ext2_fs.h conflict!
+'f' 00-1F linux/ext3_fs.h conflict!
+'f' 00-0F fs/jfs/jfs_dinode.h conflict!
+'f' 00-0F fs/ext4/ext4.h conflict!
+'f' 00-0F linux/fs.h conflict!
+'f' 00-0F fs/ocfs2/ocfs2_fs.h conflict!
+'g' 00-0F linux/usb/gadgetfs.h
+'g' 20-2F linux/usb/g_printer.h
+'h' 00-7F conflict! Charon filesystem
+ <mailto:zapman@interlan.net>
+'h' 00-1F linux/hpet.h conflict!
+'h' 80-8F fs/hfsplus/ioctl.c
+'i' 00-3F linux/i2o-dev.h conflict!
+'i' 0B-1F linux/ipmi.h conflict!
+'i' 80-8F linux/i8k.h
+'j' 00-3F linux/joystick.h
+'k' 00-0F linux/spi/spidev.h conflict!
+'k' 00-05 video/kyro.h conflict!
+'k' 10-17 linux/hsi/hsi_char.h HSI character device
+'l' 00-3F linux/tcfs_fs.h transparent cryptographic file system
+ <http://web.archive.org/web/*/http://mikonos.dia.unisa.it/tcfs>
+'l' 40-7F linux/udf_fs_i.h in development:
+ <http://sourceforge.net/projects/linux-udf/>
+'m' 00-09 linux/mmtimer.h conflict!
+'m' all linux/mtio.h conflict!
+'m' all linux/soundcard.h conflict!
+'m' all linux/synclink.h conflict!
+'m' 00-19 drivers/message/fusion/mptctl.h conflict!
+'m' 00 drivers/scsi/megaraid/megaraid_ioctl.h conflict!
+'m' 00-1F net/irda/irmod.h conflict!
+'n' 00-7F linux/ncp_fs.h and fs/ncpfs/ioctl.c
+'n' 80-8F linux/nilfs2_fs.h NILFS2
+'n' E0-FF linux/matroxfb.h matroxfb
+'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
+'o' 00-03 mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
+'o' 40-41 mtd/ubi-user.h UBI
+'o' 01-A1 linux/dvb/*.h DVB
+'p' 00-0F linux/phantom.h conflict! (OpenHaptics needs this)
+'p' 00-1F linux/rtc.h conflict!
+'p' 00-3F linux/mc146818rtc.h conflict!
+'p' 40-7F linux/nvram.h
+'p' 80-9F linux/ppdev.h user-space parport
+ <mailto:tim@cyberelk.net>
+'p' A1-A5 linux/pps.h LinuxPPS
+ <mailto:giometti@linux.it>
+'q' 00-1F linux/serio.h
+'q' 80-FF linux/telephony.h Internet PhoneJACK, Internet LineJACK
+ linux/ixjuser.h <http://web.archive.org/web/*/http://www.quicknet.net>
+'r' 00-1F linux/msdos_fs.h and fs/fat/dir.c
+'s' all linux/cdk.h
+'t' 00-7F linux/ppp-ioctl.h
+'t' 80-8F linux/isdn_ppp.h
+'t' 90 linux/toshiba.h
+'u' 00-1F linux/smb_fs.h gone
+'u' 20-3F linux/uvcvideo.h USB video class host driver
+'v' 00-1F linux/ext2_fs.h conflict!
+'v' 00-1F linux/fs.h conflict!
+'v' 00-0F linux/sonypi.h conflict!
+'v' C0-FF linux/meye.h conflict!
+'w' all CERN SCI driver
+'y' 00-1F packet based user level communications
+ <mailto:zapman@interlan.net>
+'z' 00-3F CAN bus card conflict!
+ <mailto:hdstich@connectu.ulm.circular.de>
+'z' 40-7F CAN bus card conflict!
+ <mailto:oe@port.de>
+'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict!
+'|' 00-7F linux/media.h
+0x80 00-1F linux/fb.h
+0x89 00-06 arch/x86/include/asm/sockios.h
+0x89 0B-DF linux/sockios.h
+0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
+0x89 E0-EF linux/dn.h PROTOPRIVATE range
+0x89 F0-FF linux/sockios.h SIOCDEVPRIVATE range
+0x8B all linux/wireless.h
+0x8C 00-3F WiNRADiO driver
+ <http://www.winradio.com.au/>
+0x90 00 drivers/cdrom/sbpcd.h
+0x92 00-0F drivers/usb/mon/mon_bin.c
+0x93 60-7F linux/auto_fs.h
+0x94 all fs/btrfs/ioctl.h
+0x95 all uapi/linux/kdbus.h kdbus IPC driver
+0x97 00-7F fs/ceph/ioctl.h Ceph file system
+0x99 00-0F 537-Addinboard driver
+ <mailto:buk@buks.ipn.de>
+0xA0 all linux/sdp/sdp.h Industrial Device Project
+ <mailto:kenji@bitgate.com>
+0xA2 00-0F arch/tile/include/asm/hardwall.h
+0xA3 80-8F Port ACL in development:
+ <mailto:tlewis@mindspring.com>
+0xA3 90-9F linux/dtlk.h
+0xAB 00-1F linux/nbd.h
+0xAC 00-1F linux/raw.h
+0xAD 00 Netfilter device in development:
+ <mailto:rusty@rustcorp.com.au>
+0xAE all linux/kvm.h Kernel-based Virtual Machine
+ <mailto:kvm@vger.kernel.org>
+0xAF 00-1F linux/fsl_hypervisor.h Freescale hypervisor
+0xB0 all RATIO devices in development:
+ <mailto:vgo@ratio.de>
+0xB1 00-1F PPPoX <mailto:mostrows@styx.uwaterloo.ca>
+0xB3 00 linux/mmc/ioctl.h
+0xC0 00-0F linux/usb/iowarrior.h
+0xCA 00-0F uapi/misc/cxl.h
+0xCB 00-1F CBM serial IEC bus in development:
+ <mailto:michael.klein@puffin.lb.shuttle.de>
+0xCD 01 linux/reiserfs_fs.h
+0xCF 02 fs/cifs/ioctl.c
+0xDB 00-0F drivers/char/mwave/mwavepub.h
+0xDD 00-3F ZFCP device driver see drivers/s390/scsi/
+ <mailto:aherrman@de.ibm.com>
+0xEC 00-01 drivers/platform/chrome/cros_ec_dev.h ChromeOS EC driver
+0xF3 00-3F drivers/usb/misc/sisusbvga/sisusb.h sisfb (in development)
+ <mailto:thomas@winischhofer.net>
+0xF4 00-1F video/mbxfb.h mbxfb
+ <mailto:raph@8d.com>
+0xF6 all LTTng Linux Trace Toolkit Next Generation
+ <mailto:mathieu.desnoyers@efficios.com>
+0xFD all linux/dm-ioctl.h
diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt
new file mode 100644
index 000000000..65f694f2d
--- /dev/null
+++ b/Documentation/iostats.txt
@@ -0,0 +1,164 @@
+I/O statistics fields
+---------------
+
+Since 2.4.20 (and some versions before, with patches), and 2.5.45,
+more extensive disk statistics have been introduced to help measure disk
+activity. Tools such as sar and iostat typically interpret these and do
+the work for you, but in case you are interested in creating your own
+tools, the fields are explained here.
+
+In 2.4 now, the information is found as additional fields in
+/proc/partitions. In 2.6, the same information is found in two
+places: one is in the file /proc/diskstats, and the other is within
+the sysfs file system, which must be mounted in order to obtain
+the information. Throughout this document we'll assume that sysfs
+is mounted on /sys, although of course it may be mounted anywhere.
+Both /proc/diskstats and sysfs use the same source for the information
+and so should not differ.
+
+Here are examples of these different formats:
+
+2.4:
+ 3 0 39082680 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+ 3 1 9221278 hda1 35486 0 35496 38030 0 0 0 0 0 38030 38030
+
+
+2.6 sysfs:
+ 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+ 35486 38030 38030 38030
+
+2.6 diskstats:
+ 3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
+ 3 1 hda1 35486 38030 38030 38030
+
+On 2.4 you might execute "grep 'hda ' /proc/partitions". On 2.6, you have
+a choice of "cat /sys/block/hda/stat" or "grep 'hda ' /proc/diskstats".
+The advantage of one over the other is that the sysfs choice works well
+if you are watching a known, small set of disks. /proc/diskstats may
+be a better choice if you are watching a large number of disks because
+you'll avoid the overhead of 50, 100, or 500 or more opens/closes with
+each snapshot of your disk statistics.
+
+In 2.4, the statistics fields are those after the device name. In
+the above example, the first field of statistics would be 446216.
+By contrast, in 2.6 if you look at /sys/block/hda/stat, you'll
+find just the eleven fields, beginning with 446216. If you look at
+/proc/diskstats, the eleven fields will be preceded by the major and
+minor device numbers, and device name. Each of these formats provides
+eleven fields of statistics, each meaning exactly the same things.
+All fields except field 9 are cumulative since boot. Field 9 should
+go to zero as I/Os complete; all others only increase (unless they
+overflow and wrap). Yes, these are (32-bit or 64-bit) unsigned long
+(native word size) numbers, and on a very busy or long-lived system they
+may wrap. Applications should be prepared to deal with that; unless
+your observations are measured in large numbers of minutes or hours,
+they should not wrap twice before you notice them.
+
+Each set of stats only applies to the indicated device; if you want
+system-wide stats you'll have to find all the devices and sum them all up.
+
+Field 1 -- # of reads completed
+ This is the total number of reads completed successfully.
+Field 2 -- # of reads merged, field 6 -- # of writes merged
+ Reads and writes which are adjacent to each other may be merged for
+ efficiency. Thus two 4K reads may become one 8K read before it is
+ ultimately handed to the disk, and so it will be counted (and queued)
+ as only one I/O. This field lets you know how often this was done.
+Field 3 -- # of sectors read
+ This is the total number of sectors read successfully.
+Field 4 -- # of milliseconds spent reading
+ This is the total number of milliseconds spent by all reads (as
+ measured from __make_request() to end_that_request_last()).
+Field 5 -- # of writes completed
+ This is the total number of writes completed successfully.
+Field 6 -- # of writes merged
+ See the description of field 2.
+Field 7 -- # of sectors written
+ This is the total number of sectors written successfully.
+Field 8 -- # of milliseconds spent writing
+ This is the total number of milliseconds spent by all writes (as
+ measured from __make_request() to end_that_request_last()).
+Field 9 -- # of I/Os currently in progress
+ The only field that should go to zero. Incremented as requests are
+ given to appropriate struct request_queue and decremented as they finish.
+Field 10 -- # of milliseconds spent doing I/Os
+ This field increases so long as field 9 is nonzero.
+Field 11 -- weighted # of milliseconds spent doing I/Os
+ This field is incremented at each I/O start, I/O completion, I/O
+ merge, or read of these stats by the number of I/Os in progress
+ (field 9) times the number of milliseconds spent doing I/O since the
+ last update of this field. This can provide an easy measure of both
+ I/O completion time and the backlog that may be accumulating.
+
+
+To avoid introducing performance bottlenecks, no locks are held while
+modifying these counters. This implies that minor inaccuracies may be
+introduced when changes collide, so (for instance) adding up all the
+read I/Os issued per partition should equal those made to the disks ...
+but due to the lack of locking it may only be very close.
+
+In 2.6, there are counters for each CPU, which make the lack of locking
+almost a non-issue. When the statistics are read, the per-CPU counters
+are summed (possibly overflowing the unsigned long variable they are
+summed to) and the result given to the user. There is no convenient
+user interface for accessing the per-CPU counters themselves.
+
+Disks vs Partitions
+-------------------
+
+There were significant changes between 2.4 and 2.6 in the I/O subsystem.
+As a result, some statistic information disappeared. The translation from
+a disk address relative to a partition to the disk address relative to
+the host disk happens much earlier. All merges and timings now happen
+at the disk level rather than at both the disk and partition level as
+in 2.4. Consequently, you'll see a different statistics output on 2.6 for
+partitions from that for disks. There are only *four* fields available
+for partitions on 2.6 machines. This is reflected in the examples above.
+
+Field 1 -- # of reads issued
+ This is the total number of reads issued to this partition.
+Field 2 -- # of sectors read
+ This is the total number of sectors requested to be read from this
+ partition.
+Field 3 -- # of writes issued
+ This is the total number of writes issued to this partition.
+Field 4 -- # of sectors written
+ This is the total number of sectors requested to be written to
+ this partition.
+
+Note that since the address is translated to a disk-relative one, and no
+record of the partition-relative address is kept, the subsequent success
+or failure of the read cannot be attributed to the partition. In other
+words, the number of reads for partitions is counted slightly before time
+of queuing for partitions, and at completion for whole disks. This is
+a subtle distinction that is probably uninteresting for most cases.
+
+More significant is the error induced by counting the numbers of
+reads/writes before merges for partitions and after for disks. Since a
+typical workload usually contains a lot of successive and adjacent requests,
+the number of reads/writes issued can be several times higher than the
+number of reads/writes completed.
+
+In 2.6.25, the full statistic set is again available for partitions and
+disk and partition statistics are consistent again. Since we still don't
+keep record of the partition-relative address, an operation is attributed to
+the partition which contains the first sector of the request after the
+eventual merges. As requests can be merged across partition, this could lead
+to some (probably insignificant) inaccuracy.
+
+Additional notes
+----------------
+
+In 2.6, sysfs is not mounted by default. If your distribution of
+Linux hasn't added it already, here's the line you'll want to add to
+your /etc/fstab:
+
+none /sys sysfs defaults 0 0
+
+
+In 2.6, all disk statistics were removed from /proc/stat. In 2.4, they
+appear in both /proc/partitions and /proc/stat, although the ones in
+/proc/stat take a very different format from those in /proc/partitions
+(see proc(5), if your system has it.)
+
+-- ricklind@us.ibm.com
diff --git a/Documentation/irqflags-tracing.txt b/Documentation/irqflags-tracing.txt
new file mode 100644
index 000000000..f6da05670
--- /dev/null
+++ b/Documentation/irqflags-tracing.txt
@@ -0,0 +1,50 @@
+IRQ-flags state tracing
+
+started by Ingo Molnar <mingo@redhat.com>
+
+the "irq-flags tracing" feature "traces" hardirq and softirq state, in
+that it gives interested subsystems an opportunity to be notified of
+every hardirqs-off/hardirqs-on, softirqs-off/softirqs-on event that
+happens in the kernel.
+
+CONFIG_TRACE_IRQFLAGS_SUPPORT is needed for CONFIG_PROVE_SPIN_LOCKING
+and CONFIG_PROVE_RW_LOCKING to be offered by the generic lock debugging
+code. Otherwise only CONFIG_PROVE_MUTEX_LOCKING and
+CONFIG_PROVE_RWSEM_LOCKING will be offered on an architecture - these
+are locking APIs that are not used in IRQ context. (the one exception
+for rwsems is worked around)
+
+architecture support for this is certainly not in the "trivial"
+category, because lots of lowlevel assembly code deal with irq-flags
+state changes. But an architecture can be irq-flags-tracing enabled in a
+rather straightforward and risk-free manner.
+
+Architectures that want to support this need to do a couple of
+code-organizational changes first:
+
+- add and enable TRACE_IRQFLAGS_SUPPORT in their arch level Kconfig file
+
+and then a couple of functional changes are needed as well to implement
+irq-flags-tracing support:
+
+- in lowlevel entry code add (build-conditional) calls to the
+ trace_hardirqs_off()/trace_hardirqs_on() functions. The lock validator
+ closely guards whether the 'real' irq-flags matches the 'virtual'
+ irq-flags state, and complains loudly (and turns itself off) if the
+ two do not match. Usually most of the time for arch support for
+ irq-flags-tracing is spent in this state: look at the lockdep
+ complaint, try to figure out the assembly code we did not cover yet,
+ fix and repeat. Once the system has booted up and works without a
+ lockdep complaint in the irq-flags-tracing functions arch support is
+ complete.
+- if the architecture has non-maskable interrupts then those need to be
+ excluded from the irq-tracing [and lock validation] mechanism via
+ lockdep_off()/lockdep_on().
+
+in general there is no risk from having an incomplete irq-flags-tracing
+implementation in an architecture: lockdep will detect that and will
+turn itself off. I.e. the lock validator will still be reliable. There
+should be no crashes due to irq-tracing bugs. (except if the assembly
+changes break other code by modifying conditions or registers that
+shouldn't be)
+
diff --git a/Documentation/isapnp.txt b/Documentation/isapnp.txt
new file mode 100644
index 000000000..400d1b5b5
--- /dev/null
+++ b/Documentation/isapnp.txt
@@ -0,0 +1,14 @@
+ISA Plug & Play support by Jaroslav Kysela <perex@suse.cz>
+==========================================================
+
+Interface /proc/isapnp
+======================
+
+The interface has been removed. See pnp.txt for more details.
+
+Interface /proc/bus/isapnp
+==========================
+
+This directory allows access to ISA PnP cards and logical devices.
+The regular files contain the contents of ISA PnP registers for
+a logical device.
diff --git a/Documentation/isdn/00-INDEX b/Documentation/isdn/00-INDEX
new file mode 100644
index 000000000..e87e336f5
--- /dev/null
+++ b/Documentation/isdn/00-INDEX
@@ -0,0 +1,50 @@
+00-INDEX
+ - this file (info on ISDN implementation for Linux)
+CREDITS
+ - list of the kind folks that brought you this stuff.
+HiSax.cert
+ - information about the ITU approval certification of the HiSax driver.
+INTERFACE
+ - description of isdn4linux Link Level and Hardware Level interfaces.
+INTERFACE.fax
+ - description of the fax subinterface of isdn4linux.
+INTERFACE.CAPI
+ - description of kernel CAPI Link Level to Hardware Level interface.
+README
+ - general info on what you need and what to do for Linux ISDN.
+README.FAQ
+ - general info for FAQ.
+README.HiSax
+ - info on the HiSax driver which replaces the old teles.
+README.act2000
+ - info on driver for IBM ACT-2000 card.
+README.audio
+ - info for running audio over ISDN.
+README.avmb1
+ - info on driver for AVM-B1 ISDN card.
+README.concap
+ - info on "CONCAP" encapsulation protocol interface used for X.25.
+README.diversion
+ - info on module for isdn diversion services.
+README.fax
+ - info for using Fax over ISDN.
+README.gigaset
+ - info on the drivers for Siemens Gigaset ISDN adapters
+README.hfc-pci
+ - info on hfc-pci based cards.
+README.hysdn
+ - info on driver for Hypercope active HYSDN cards
+README.icn
+ - info on the ICN-ISDN-card and its driver.
+README.mISDN
+ - info on the Modular ISDN subsystem (mISDN)
+README.pcbit
+ - info on the PCBIT-D ISDN adapter and driver.
+README.sc
+ - info on driver for Spellcaster cards.
+README.syncppp
+ - info on running Sync PPP over ISDN.
+README.x25
+ - info for running X.25 over ISDN.
+syncPPP.FAQ
+ - frequently asked questions about running PPP over ISDN.
diff --git a/Documentation/isdn/CREDITS b/Documentation/isdn/CREDITS
new file mode 100644
index 000000000..c1679e913
--- /dev/null
+++ b/Documentation/isdn/CREDITS
@@ -0,0 +1,70 @@
+
+I want to thank all who contributed to this project and especially to:
+(in alphabetical order)
+
+Thomas Bogendörfer (tsbogend@bigbug.franken.de)
+ Tester, lots of bugfixes and hints.
+
+Alan Cox (alan@lxorguk.ukuu.org.uk)
+ For help getting into standard-kernel.
+
+Henner Eisen (eis@baty.hanse.de)
+ For X.25 implementation.
+
+Volker Götz (volker@oops.franken.de)
+ For contribution of man-pages, the imontty-tool and a perfect
+ maintaining of the mailing-list at hub-wue.
+
+Matthias Hessler (hessler@isdn4linux.de)
+ For creating and maintaining the FAQ.
+
+Bernhard Hailer (Bernhard.Hailer@lrz.uni-muenchen.de)
+ For creating the FAQ, and the leafsite HOWTO.
+
+Michael 'Ghandi' Herold (michael@abadonna.franken.de)
+ For contribution of the vbox answering machine.
+
+Michael Hipp (Michael.Hipp@student.uni-tuebingen.de)
+ For his Sync-PPP-code.
+
+Karsten Keil (keil@isdn4linux.de)
+ For adding 1TR6-support to the Teles-driver.
+ For the HiSax-driver.
+
+Michael Knigge (knick@cove.han.de)
+ For contributing the imon-tool
+
+Andreas Kool (akool@Kool.f.EUnet.de)
+ For contribution of the isdnlog/isdnrep-tool
+
+Pedro Roque Marques (roque@di.fc.ul.pt)
+ For lot of new ideas and the pcbit driver.
+
+Eberhard Mönkeberg (emoenke@gwdg.de)
+ For testing and help to get into kernel.
+
+Thomas Neumann (tn@ruhr.de)
+ For help with Cisco-SLARP and keepalive
+
+Jan den Ouden (denouden@groovin.xs4all.nl)
+ For contribution of the original teles-driver
+
+Carsten Paeth (calle@calle.in-berlin.de)
+ For the AVM-B1-CAPI2.0 driver
+
+Thomas Pfeiffer (pfeiffer@pds.de)
+ For V.110, extended T.70 and Hylafax extensions in isdn_tty.c
+
+Max Riegel (riegel@max.franken.de)
+ For making the ICN hardware-documentation and test-equipment available.
+
+Armin Schindler (mac@melware.de)
+ For the eicon active card driver.
+
+Gerhard 'Fido' Schneider (fido@wuff.mayn.de)
+ For heavy-duty-beta-testing with his BBS ;)
+
+Thomas Uhl (uhl@think.de)
+ For distributing the cards.
+ For pushing me to work ;-)
+
diff --git a/Documentation/isdn/HiSax.cert b/Documentation/isdn/HiSax.cert
new file mode 100644
index 000000000..f2a6fcb8e
--- /dev/null
+++ b/Documentation/isdn/HiSax.cert
@@ -0,0 +1,96 @@
+-----BEGIN PGP SIGNED MESSAGE-----
+
+First:
+
+ HiSax is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+However, if you wish to modify the HiSax sources, please note the following:
+
+HiSax has passed the ITU approval test suite with ELSA Quickstep ISDN cards
+and Eicon Technology Diva 2.01 PCI card.
+The certification is only valid for the combination of the tested software
+version and the tested hardware. Any changes to the HiSax source code may
+therefore affect the certification.
+
+Additional ITU approval tests have been carried out for all generic cards
+using Colognechip single chip solutions HFC-S PCI A for PCI cards as well
+as HFC-S USB based USB ISDN ta adapters.
+These tests included all layers 1-3 and as well all functional tests for
+the layer 1. Because all hardware based on these chips are complete ISDN
+solutions in one chip all cards and USB-TAs using these chips are to be
+regarded as approved for those tests. Some additional electrical tests
+of the layer 1 which are independent of the driver and related to a
+special hardware used will be regarded as approved if at least one
+solution has been tested including those electrical tests. So if cards
+or tas have been completely approved for any other os, the approval
+for those electrical tests is valid for linux, too.
+Please send any questions regarding this drivers or approval abouts to
+werner@isdn-development.de
+Additional information and the type approval documents will be found
+shortly on the Colognechip website www.colognechip.com
+
+If you change the main files of the HiSax ISDN stack, the certification will
+become invalid. Because in most countries it is illegal to connect
+unapproved ISDN equipment to the public network, I have to guarantee that
+changes in HiSax do not affect the certification.
+
+In order to make a valid certification apparent to the user, I have built in
+some validation checks that are made during the make process. The HiSax main
+files are protected by md5 checksums and the md5sum file is pgp signed by
+myself:
+
+KeyID 1024/FF992F6D 1997/01/16 Karsten Keil <kkeil@suse.de>
+Key fingerprint = 92 6B F7 58 EE 86 28 C8 C4 1A E6 DC 39 89 F2 AA
+
+Only if the checksums are OK, and the signature of the file
+"drivers/isdn/hisax/md5sums.asc" match, is the certification valid; a
+message confirming this is then displayed during the hisax init process.
+
+The affected files are:
+
+drivers/isdn/hisax/isac.c
+drivers/isdn/hisax/isdnl1.c
+drivers/isdn/hisax/isdnl2.c
+drivers/isdn/hisax/isdnl3.c
+drivers/isdn/hisax/tei.c
+drivers/isdn/hisax/callc.c
+drivers/isdn/hisax/l3dss1.c
+drivers/isdn/hisax/l3_1tr6.c
+drivers/isdn/hisax/cert.c
+drivers/isdn/hisax/elsa.c
+drivers/isdn/hisax/diva.c
+drivers/isdn/hisax/hfc_pci.c
+
+Please send any changes, bugfixes and patches to me rather than implementing
+them directly into the HiSax sources.
+
+This does not reduce your rights granted by the GNU General Public License.
+If you wish to change the sources, go ahead; but note that then the
+certification is invalid even if you use one of the approved cards.
+
+Here are the certification registration numbers for ELSA Quickstep cards:
+German D133361J CETECOM ICT Services GmbH 0682
+European D133362J CETECOM ICT Services GmbH 0682
+
+
+Karsten Keil
+keil@isdn4linux.de
+
+-----BEGIN PGP SIGNATURE-----
+Version: 2.6.3i
+Charset: noconv
+
+iQCVAwUBOFAwqTpxHvX/mS9tAQFI2QP9GLDK2iy/KBhwReE3F7LeO+tVhffTVZ3a
+20q5/z/WcIg/pnH0uTkl2UgDXBFXYl45zJyDGNpAposIFmT+Edd14o7Vj1w/BBdn
+Y+5rBmJf+gyBu61da5d6bv0lpymwRa/um+ri+ilYnZ/XPfg5JKhdjGSBCJuJAElM
+d2jFbTrsMYw=
+=LNf9
+-----END PGP SIGNATURE-----
diff --git a/Documentation/isdn/INTERFACE b/Documentation/isdn/INTERFACE
new file mode 100644
index 000000000..5df17e5b2
--- /dev/null
+++ b/Documentation/isdn/INTERFACE
@@ -0,0 +1,759 @@
+$Id: INTERFACE,v 1.15.8.2 2001/03/13 16:17:07 kai Exp $
+
+Description of the Interface between Linklevel and Hardwarelevel
+ of isdn4linux:
+
+
+ The Communication between Linklevel (LL) and Hardwarelevel (HL)
+ is based on the struct isdn_if (defined in isdnif.h).
+
+ An HL-driver can register itself at LL by calling the function
+ register_isdn() with a pointer to that struct. Prior to that, it has
+ to preset some of the fields of isdn_if. The LL sets the rest of
+ the fields. All further communication is done via callbacks using
+ the function-pointers defined in isdn_if.
+
+ Changes/Version numbering:
+
+ During development of the ISDN subsystem, several changes have been
+ made to the interface. Before it went into kernel, the package
+ had a unique version number. The last version, distributed separately
+ was 0.7.4. When the subsystem went into kernel, every functional unit
+ got a separate version number. These numbers are shown at initialization,
+ separated by slashes:
+
+ c.c/t.t/n.n/p.p/a.a/v.v
+
+ where
+
+ c.c is the revision of the common code.
+ t.t is the revision of the tty related code.
+ n.n is the revision of the network related code.
+ p.p is the revision of the ppp related code.
+ a.a is the revision of the audio related code.
+ v.v is the revision of the V.110 related code.
+
+ Changes in this document are marked with '***CHANGEx' where x representing
+ the version number. If that number starts with 0, it refers to the old,
+ separately distributed package. If it starts with one of the letters
+ above, it refers to the revision of the corresponding module.
+ ***CHANGEIx refers to the revision number of the isdnif.h
+
+1. Description of the fields of isdn_if:
+
+ int channels;
+
+ This field has to be set by the HL-driver to the number of channels
+ supported prior to calling register_isdn(). Upon return of the call,
+ the LL puts an id there, which has to be used by the HL-driver when
+ invoking the other callbacks.
+
+ int maxbufsize;
+
+ ***CHANGE0.6: New since this version.
+
+ Also to be preset by the HL-driver. With this value the HL-driver
+ tells the LL the maximum size of a data-packet it will accept.
+
+ unsigned long features;
+
+ To be preset by the HL-driver. Using this field, the HL-driver
+ announces the features supported. At the moment this is limited to
+ report the supported layer2 and layer3-protocols. For setting this
+ field the constants ISDN_FEATURE..., declared in isdnif.h have to be
+ used.
+
+ ***CHANGE0.7.1: The line type (1TR6, EDSS1) has to be set.
+
+ unsigned short hl_hdrlen;
+
+ ***CHANGE0.7.4: New field.
+
+ To be preset by the HL-driver, if it supports sk_buff's. The driver
+ should put here the amount of additional space needed in sk_buff's for
+ its internal purposes. Drivers not supporting sk_buff's should
+ initialize this field to 0.
+
+ void (*rcvcallb_skb)(int, int, struct sk_buff *)
+
+ ***CHANGE0.7.4: New field.
+
+ This field will be set by LL. The HL-driver delivers received data-
+ packets by calling this function. Upon calling, the HL-driver must
+ already have its private data pulled off the head of the sk_buff.
+
+ Parameter:
+ int driver-Id
+ int Channel-number locally to the driver. (starting with 0)
+ struct sk_buff * Pointer to sk_buff, containing received data.
+
+ int (*statcallb)(isdn_ctrl*);
+
+ This field will be set by LL. This function has to be called by the
+ HL-driver for signaling status-changes or other events to the LL.
+
+ Parameter:
+ isdn_ctrl*
+
+ The struct isdn_ctrl also defined in isdn_if. The exact meanings of its
+ fields are described together with the descriptions of the possible
+ events. Here is only a short description of the fields:
+
+ driver = driver Id.
+ command = event-type. (one of the constants ISDN_STAT_...)
+ arg = depends on event-type.
+ num = depends on event-type.
+
+ Returnvalue:
+ 0 on success, else -1
+
+ int (*command)(isdn_ctrl*);
+
+ This field has to be preset by the HL-driver. It points to a function,
+ to be called by LL to perform functions like dialing, B-channel
+ setup, etc. The exact meaning of the parameters is described with the
+ descriptions of the possible commands.
+
+ Parameter:
+ isdn_ctrl*
+ driver = driver-Id
+ command = command to perform. (one of the constants ISDN_CMD_...)
+ arg = depends on command.
+ num = depends on command.
+
+ Returnvalue:
+ >=0 on success, else error-code (-ENODEV etc.)
+
+ int (*writebuf_skb)(int, int, int, struct sk_buff *)
+
+ ***CHANGE0.7.4: New field.
+ ***CHANGEI.1.21: New field.
+
+ This field has to be preset by the HL-driver. The given function will
+ be called by the LL for delivering data to be send via B-Channel.
+
+
+ Parameter:
+ int driver-Id ***CHANGE0.7.4: New parameter.
+ int channel-number locally to the HL-driver. (starts with 0)
+ int ack ***ChangeI1.21: New parameter
+ If this is !0, the driver has to signal the delivery
+ by sending an ISDN_STAT_BSENT. If this is 0, the driver
+ MUST NOT send an ISDN_STAT_BSENT.
+ struct sk_buff * Pointer to sk_buff containing data to be send via
+ B-channel.
+
+ Returnvalue:
+ Length of data accepted on success, else error-code (-EINVAL on
+ oversized packets etc.)
+
+ int (*writecmd)(u_char*, int, int, int, int);
+
+ This field has to be preset by the HL-driver. The given function will be
+ called to perform write-requests on /dev/isdnctrl (i.e. sending commands
+ to the card) The data-format is hardware-specific. This function is
+ intended for debugging only. It is not necessary for normal operation
+ and never will be called by the tty-emulation- or network-code. If
+ this function is not supported, the driver has to set NULL here.
+
+ Parameter:
+ u_char* pointer to data.
+ int length of data.
+ int flag: 0 = call from within kernel-space. (HL-driver must use
+ memcpy, may NOT use schedule())
+ 1 = call from user-space. (HL-driver must use
+ memcpy_fromfs, use of schedule() allowed)
+ int driver-Id.
+ int channel-number locally to the HL-driver. (starts with 0)
+
+***CHANGEI1.14: The driver-Id and channel-number are new since this revision.
+
+ Returnvalue:
+ Length of data accepted on success, else error-code (-EINVAL etc.)
+
+ int (*readstat)(u_char*, int, int, int, int);
+
+ This field has to be preset by the HL-driver. The given function will be
+ called to perform read-requests on /dev/isdnctrl (i.e. reading replies
+ from the card) The data-format is hardware-specific. This function is
+ intended for debugging only. It is not necessary for normal operation
+ and never will be called by the tty-emulation- or network-code. If
+ this function is not supported, the driver has to set NULL here.
+
+ Parameter:
+ u_char* pointer to data.
+ int length of data.
+ int flag: 0 = call from within kernel-space. (HL-driver must use
+ memcpy, may NOT use schedule())
+ 1 = call from user-space. (HL-driver must use
+ memcpy_fromfs, use of schedule() allowed)
+ int driver-Id.
+ int channel-number locally to the HL-driver. (starts with 0)
+
+***CHANGEI1.14: The driver-Id and channel-number are new since this revision.
+
+ Returnvalue:
+ Length of data on success, else error-code (-EINVAL etc.)
+
+ char id[20];
+ ***CHANGE0.7: New since this version.
+
+ This string has to be preset by the HL-driver. Its purpose is for
+ identification of the driver by the user. Eg.: it is shown in the
+ status-info of /dev/isdninfo. Furthermore it is used as Id for binding
+ net-interfaces to a specific channel. If a string of length zero is
+ given, upon return, isdn4linux will replace it by a generic name. (line0,
+ line1 etc.) It is recommended to make this string configurable during
+ module-load-time. (copy a global variable to this string.) For doing that,
+ modules 1.2.8 or newer are necessary.
+
+2. Description of the commands, a HL-driver has to support:
+
+ All commands will be performed by calling the function command() described
+ above from within the LL. The field command of the struct-parameter will
+ contain the desired command, the field driver is always set to the
+ appropriate driver-Id.
+
+ Until now, the following commands are defined:
+
+***CHANGEI1.34: The parameter "num" has been replaced by a union "parm" containing
+ the old "num" and a new setup_type struct used for ISDN_CMD_DIAL
+ and ISDN_STAT_ICALL callback.
+
+ ISDN_CMD_IOCTL:
+
+ This command is intended for performing ioctl-calls for configuring
+ hardware or similar purposes (setting port-addresses, loading firmware
+ etc.) For this purpose, in the LL all ioctl-calls with an argument
+ >= IIOCDRVCTL (0x100) will be handed transparently to this
+ function after subtracting 0x100 and placing the result in arg.
+ Example:
+ If a userlevel-program calls ioctl(0x101,...) the function gets
+ called with the field command set to 1.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_IOCTL
+ arg = Original ioctl-cmd - IIOCDRVCTL
+ parm.num = first bytes filled with (unsigned long)arg
+
+ Returnvalue:
+ Depending on driver.
+
+
+ ISDN_CMD_DIAL:
+
+ This command is used to tell the HL-driver it should dial a given
+ number.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_DIAL
+ arg = channel-number locally to the driver. (starting with 0)
+
+ parm.setup.phone = An ASCII-String containing the number to dial.
+ parm.setup.eazmsn = An ASCII-Sting containing the own EAZ or MSN.
+ parm.setup.si1 = The Service-Indicator.
+ parm.setup.si2 = Additional Service-Indicator.
+
+ If the Line has been designed as SPV (a special german
+ feature, meaning semi-leased-line) the phone has to
+ start with an "S".
+ ***CHANGE0.6: In previous versions the EAZ has been given in the
+ highbyte of arg.
+ ***CHANGE0.7.1: New since this version: ServiceIndicator and AddInfo.
+
+ ISDN_CMD_ACCEPTD:
+
+ With this command, the HL-driver is told to accept a D-Channel-setup.
+ (Response to an incoming call)
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_ACCEPTD
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_CMD_ACCEPTB:
+
+ With this command, the HL-driver is told to perform a B-Channel-setup.
+ (after establishing D-Channel-Connection)
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_ACCEPTB
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_CMD_HANGUP:
+
+ With this command, the HL-driver is told to hangup (B-Channel if
+ established first, then D-Channel). This command is also used for
+ actively rejecting an incoming call.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_HANGUP
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_CMD_CLREAZ:
+
+ With this command, the HL-driver is told not to signal incoming
+ calls to the LL.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_CLREAZ
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_CMD_SETEAZ:
+
+ With this command, the HL-driver is told to signal incoming calls for
+ the given EAZs/MSNs to the LL.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_SETEAZ
+ arg = channel-number locally to the driver. (starting with 0)
+ parm.num = ASCII-String, containing the desired EAZ's/MSN's
+ (comma-separated). If an empty String is given, the
+ HL-driver should respond to ALL incoming calls,
+ regardless of the destination-address.
+ ***CHANGE0.6: New since this version the "empty-string"-feature.
+
+ ISDN_CMD_GETEAZ: (currently unused)
+
+ With this command, the HL-driver is told to report the current setting
+ given with ISDN_CMD_SETEAZ.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_GETEAZ
+ arg = channel-number locally to the driver. (starting with 0)
+ parm.num = ASCII-String, containing the current EAZ's/MSN's
+
+ ISDN_CMD_SETSIL: (currently unused)
+
+ With this command, the HL-driver is told to signal only incoming
+ calls with the given Service-Indicators.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_SETSIL
+ arg = channel-number locally to the driver. (starting with 0)
+ parm.num = ASCII-String, containing the desired Service-Indicators.
+
+ ISDN_CMD_GETSIL: (currently unused)
+
+ With this command, the HL-driver is told to return the current
+ Service-Indicators it will respond to.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_SETSIL
+ arg = channel-number locally to the driver. (starting with 0)
+ parm.num = ASCII-String, containing the current Service-Indicators.
+
+ ISDN_CMD_SETL2:
+
+ With this command, the HL-driver is told to select the given Layer-2-
+ protocol. This command is issued by the LL prior to ISDN_CMD_DIAL or
+ ISDN_CMD_ACCEPTD.
+
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_SETL2
+ arg = channel-number locally to the driver. (starting with 0)
+ logical or'ed with (protocol-Id << 8)
+ protocol-Id is one of the constants ISDN_PROTO_L2...
+ parm = unused.
+
+ ISDN_CMD_GETL2: (currently unused)
+
+ With this command, the HL-driver is told to return the current
+ setting of the Layer-2-protocol.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_GETL2
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+ Returnvalue:
+ current protocol-Id (one of the constants ISDN_L2_PROTO)
+
+ ISDN_CMD_SETL3:
+
+ With this command, the HL-driver is told to select the given Layer-3-
+ protocol. This command is issued by the LL prior to ISDN_CMD_DIAL or
+ ISDN_CMD_ACCEPTD.
+
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_SETL3
+ arg = channel-number locally to the driver. (starting with 0)
+ logical or'ed with (protocol-Id << 8)
+ protocol-Id is one of the constants ISDN_PROTO_L3...
+ parm.fax = Pointer to T30_s fax struct. (fax usage only)
+
+ ISDN_CMD_GETL2: (currently unused)
+
+ With this command, the HL-driver is told to return the current
+ setting of the Layer-3-protocol.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_GETL3
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+ Returnvalue:
+ current protocol-Id (one of the constants ISDN_L3_PROTO)
+
+ ISDN_CMD_PROCEED:
+
+ With this command, the HL-driver is told to proceed with a incoming call.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_PROCEED
+ arg = channel-number locally to the driver. (starting with 0)
+ setup.eazmsn= empty string or string send as uus1 in DSS1 with
+ PROCEED message
+
+ ISDN_CMD_ALERT:
+
+ With this command, the HL-driver is told to alert a proceeding call.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_ALERT
+ arg = channel-number locally to the driver. (starting with 0)
+ setup.eazmsn= empty string or string send as uus1 in DSS1 with
+ ALERT message
+
+ ISDN_CMD_REDIR:
+
+ With this command, the HL-driver is told to redirect a call in proceeding
+ or alerting state.
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_REDIR
+ arg = channel-number locally to the driver. (starting with 0)
+ setup.eazmsn= empty string or string send as uus1 in DSS1 protocol
+ setup.screen= screening indicator
+ setup.phone = redirected to party number
+
+ ISDN_CMD_PROT_IO:
+
+ With this call, the LL-driver invokes protocol specific features through
+ the LL.
+ The call is not implicitely bound to a connection.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_CMD_PROT_IO
+ arg = The lower 8 Bits define the addressed protocol as defined
+ in ISDN_PTYPE..., the upper bits are used to differentiate
+ the protocol specific CMD.
+
+ para = protocol and function specific. See isdnif.h for detail.
+
+
+ ISDN_CMD_FAXCMD:
+
+ With this command the HL-driver receives a fax sub-command.
+ For details refer to INTERFACE.fax
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_CMD_FAXCMD
+ arg = channel-number locally to the driver. (starting with 0)
+ parm = unused.
+
+
+3. Description of the events to be signaled by the HL-driver to the LL.
+
+ All status-changes are signaled via calling the previously described
+ function statcallb(). The field command of the struct isdn_cmd has
+ to be set by the HL-driver with the appropriate Status-Id (event-number).
+ The field arg has to be set to the channel-number (locally to the driver,
+ starting with 0) to which this event applies. (Exception: STAVAIL-event)
+
+ Until now, the following Status-Ids are defined:
+
+ ISDN_STAT_AVAIL:
+
+ With this call, the HL-driver signals the availability of new data
+ for readstat(). Used only for debugging-purposes, see description
+ of readstat().
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_STAVAIL
+ arg = length of available data.
+ parm = unused.
+
+ ISDN_STAT_ICALL:
+ ISDN_STAT_ICALLW:
+
+ With this call, the HL-driver signals an incoming call to the LL.
+ If ICALLW is signalled the incoming call is a waiting call without
+ a available B-chan.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_ICALL
+ arg = channel-number, locally to the driver. (starting with 0)
+ para.setup.phone = Callernumber.
+ para.setup.eazmsn = CalledNumber.
+ para.setup.si1 = Service Indicator.
+ para.setup.si2 = Additional Service Indicator.
+ para.setup.plan = octet 3 from Calling party number Information Element.
+ para.setup.screen = octet 3a from Calling party number Information Element.
+
+ Return:
+ 0 = No device matching this call.
+ 1 = At least one device matching this call (RING on ttyI).
+ HL-driver may send ALERTING on the D-channel in this case.
+ 2 = Call will be rejected.
+ 3 = Incoming called party number is currently incomplete.
+ Additional digits are required.
+ Used for signalling with PtP connections.
+ 4 = Call will be held in a proceeding state
+ (HL driver sends PROCEEDING)
+ Used when a user space prog needs time to interpret a call
+ para.setup.eazmsn may be filled with an uus1 message of
+ 30 octets maximum. Empty string if no uus.
+ 5 = Call will be actively deflected to another party
+ Only available in DSS1/EURO protocol
+ para.setup.phone must be set to destination party number
+ para.setup.eazmsn may be filled with an uus1 message of
+ 30 octets maximum. Empty string if no uus.
+ -1 = An error happened. (Invalid parameters for example.)
+ The keypad support now is included in the dial command.
+
+
+ ISDN_STAT_RUN:
+
+ With this call, the HL-driver signals availability of the ISDN-card.
+ (after initializing, loading firmware)
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_RUN
+ arg = unused.
+ parm = unused.
+
+ ISDN_STAT_STOP:
+
+ With this call, the HL-driver signals unavailability of the ISDN-card.
+ (before unloading, while resetting/reconfiguring the card)
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_STOP
+ arg = unused.
+ parm = unused.
+
+ ISDN_STAT_DCONN:
+
+ With this call, the HL-driver signals the successful establishment of
+ a D-Channel-connection. (Response to ISDN_CMD_ACCEPTD or ISDN_CMD_DIAL)
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_DCONN
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_STAT_BCONN:
+
+ With this call, the HL-driver signals the successful establishment of
+ a B-Channel-connection. (Response to ISDN_CMD_ACCEPTB or because the
+ remote-station has initiated establishment)
+
+ The HL driver should call this when the logical l2/l3 protocol
+ connection on top of the physical B-channel is established.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_BCONN
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm.num = ASCII-String, containing type of connection (for analog
+ modem only). This will be appended to the CONNECT message
+ e.g. 14400/V.32bis
+
+ ISDN_STAT_DHUP:
+
+ With this call, the HL-driver signals the shutdown of a
+ D-Channel-connection. This could be a response to a prior ISDN_CMD_HANGUP,
+ or caused by a remote-hangup or if the remote-station has actively
+ rejected a call.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_DHUP
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_STAT_BHUP:
+
+ With this call, the HL-driver signals the shutdown of a
+ B-Channel-connection. This could be a response to a prior ISDN_CMD_HANGUP,
+ or caused by a remote-hangup.
+
+ The HL driver should call this as soon as the logical l2/l3 protocol
+ connection on top of the physical B-channel is released.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_BHUP
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_STAT_CINF:
+
+ With this call, the HL-driver delivers charge-unit information to the
+ LL.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_CINF
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm.num = ASCII string containing charge-units (digits only).
+
+ ISDN_STAT_LOAD: (currently unused)
+
+ ISDN_STAT_UNLOAD:
+
+ With this call, the HL-driver signals that it will be unloaded now. This
+ tells the LL to release all corresponding data-structures.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_UNLOAD
+ arg = unused.
+ parm = unused.
+
+ ISDN_STAT_BSENT:
+
+ With this call the HL-driver signals the delivery of a data-packet.
+ This callback is used by the network-interfaces only, tty-Emulation
+ does not need this call.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_BSENT
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm.length = ***CHANGEI.1.21: New field.
+ the driver has to set this to the original length
+ of the skb at the time of receiving it from the linklevel.
+
+ ISDN_STAT_NODCH:
+
+ With this call, the driver has to respond to a prior ISDN_CMD_DIAL, if
+ no D-Channel is available.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_NODCH
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm = unused.
+
+ ISDN_STAT_ADDCH:
+
+ This call is for HL-drivers, which are unable to check card-type
+ or numbers of supported channels before they have loaded any firmware
+ using ioctl. Those HL-driver simply set the channel-parameter to a
+ minimum channel-number when registering, and later if they know
+ the real amount, perform this call, allocating additional channels.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_ADDCH
+ arg = number of channels to be added.
+ parm = unused.
+
+ ISDN_STAT_CAUSE:
+
+ With this call, the HL-driver delivers CAUSE-messages to the LL.
+ Currently the LL does not use this messages. Their contents is simply
+ logged via kernel-messages. Therefore, currently the format of the
+ messages is completely free. However they should be printable.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_NODCH
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm.num = ASCII string containing CAUSE-message.
+
+ ISDN_STAT_DISPLAY:
+
+ With this call, the HL-driver delivers DISPLAY-messages to the LL.
+ Currently the LL does not use this messages.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_DISPLAY
+ arg = channel-number, locally to the driver. (starting with 0)
+ para.display= string containing DISPLAY-message.
+
+ ISDN_STAT_PROT:
+
+ With this call, the HL-driver delivers protocol specific infos to the LL.
+ The call is not implicitely bound to a connection.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_PROT
+ arg = The lower 8 Bits define the addressed protocol as defined
+ in ISDN_PTYPE..., the upper bits are used to differentiate
+ the protocol specific STAT.
+
+ para = protocol and function specific. See isdnif.h for detail.
+
+ ISDN_STAT_DISCH:
+
+ With this call, the HL-driver signals the LL to disable or enable the
+ use of supplied channel and driver.
+ The call may be used to reduce the available number of B-channels after
+ loading the driver. The LL has to ignore a disabled channel when searching
+ for free channels. The HL driver itself never delivers STAT callbacks for
+ disabled channels.
+ The LL returns a nonzero code if the operation was not successful or the
+ selected channel is actually regarded as busy.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_DISCH
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm.num[0] = 0 if channel shall be disabled, else enabled.
+
+ ISDN_STAT_L1ERR:
+
+ ***CHANGEI1.21 new status message.
+ A signal can be sent to the linklevel if an Layer1-error results in
+ packet-loss on receive or send. The field errcode of the cmd.parm
+ union describes the error more precisely.
+
+ Parameter:
+ driver = driver-Id
+ command = ISDN_STAT_L1ERR
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm.errcode= ISDN_STAT_L1ERR_SEND: Packet lost while sending.
+ ISDN_STAT_L1ERR_RECV: Packet lost while receiving.
+ ISDN_STAT_FAXIND:
+
+ With this call the HL-driver signals a fax sub-command to the LL.
+ For details refer to INTERFACE.fax
+
+ Parameter:
+ driver = driver-Id.
+ command = ISDN_STAT_FAXIND
+ arg = channel-number, locally to the driver. (starting with 0)
+ parm = unused.
+
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
new file mode 100644
index 000000000..1688b5a1f
--- /dev/null
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -0,0 +1,355 @@
+Kernel CAPI Interface to Hardware Drivers
+-----------------------------------------
+
+1. Overview
+
+From the CAPI 2.0 specification:
+COMMON-ISDN-API (CAPI) is an application programming interface standard used
+to access ISDN equipment connected to basic rate interfaces (BRI) and primary
+rate interfaces (PRI).
+
+Kernel CAPI operates as a dispatching layer between CAPI applications and CAPI
+hardware drivers. Hardware drivers register ISDN devices (controllers, in CAPI
+lingo) with Kernel CAPI to indicate their readiness to provide their service
+to CAPI applications. CAPI applications also register with Kernel CAPI,
+requesting association with a CAPI device. Kernel CAPI then dispatches the
+application registration to an available device, forwarding it to the
+corresponding hardware driver. Kernel CAPI then forwards CAPI messages in both
+directions between the application and the hardware driver.
+
+Format and semantics of CAPI messages are specified in the CAPI 2.0 standard.
+This standard is freely available from http://www.capi.org.
+
+
+2. Driver and Device Registration
+
+CAPI drivers optionally register themselves with Kernel CAPI by calling the
+Kernel CAPI function register_capi_driver() with a pointer to a struct
+capi_driver. This structure must be filled with the name and revision of the
+driver, and optionally a pointer to a callback function, add_card(). The
+registration can be revoked by calling the function unregister_capi_driver()
+with a pointer to the same struct capi_driver.
+
+CAPI drivers must register each of the ISDN devices they control with Kernel
+CAPI by calling the Kernel CAPI function attach_capi_ctr() with a pointer to a
+struct capi_ctr before they can be used. This structure must be filled with
+the names of the driver and controller, and a number of callback function
+pointers which are subsequently used by Kernel CAPI for communicating with the
+driver. The registration can be revoked by calling the function
+detach_capi_ctr() with a pointer to the same struct capi_ctr.
+
+Before the device can be actually used, the driver must fill in the device
+information fields 'manu', 'version', 'profile' and 'serial' in the capi_ctr
+structure of the device, and signal its readiness by calling capi_ctr_ready().
+From then on, Kernel CAPI may call the registered callback functions for the
+device.
+
+If the device becomes unusable for any reason (shutdown, disconnect ...), the
+driver has to call capi_ctr_down(). This will prevent further calls to the
+callback functions by Kernel CAPI.
+
+
+3. Application Registration and Communication
+
+Kernel CAPI forwards registration requests from applications (calls to CAPI
+operation CAPI_REGISTER) to an appropriate hardware driver by calling its
+register_appl() callback function. A unique Application ID (ApplID, u16) is
+allocated by Kernel CAPI and passed to register_appl() along with the
+parameter structure provided by the application. This is analogous to the
+open() operation on regular files or character devices.
+
+After a successful return from register_appl(), CAPI messages from the
+application may be passed to the driver for the device via calls to the
+send_message() callback function. Conversely, the driver may call Kernel
+CAPI's capi_ctr_handle_message() function to pass a received CAPI message to
+Kernel CAPI for forwarding to an application, specifying its ApplID.
+
+Deregistration requests (CAPI operation CAPI_RELEASE) from applications are
+forwarded as calls to the release_appl() callback function, passing the same
+ApplID as with register_appl(). After return from release_appl(), no CAPI
+messages for that application may be passed to or from the device anymore.
+
+
+4. Data Structures
+
+4.1 struct capi_driver
+
+This structure describes a Kernel CAPI driver itself. It is used in the
+register_capi_driver() and unregister_capi_driver() functions, and contains
+the following non-private fields, all to be set by the driver before calling
+register_capi_driver():
+
+char name[32]
+ the name of the driver, as a zero-terminated ASCII string
+char revision[32]
+ the revision number of the driver, as a zero-terminated ASCII string
+int (*add_card)(struct capi_driver *driver, capicardparams *data)
+ a callback function pointer (may be NULL)
+
+
+4.2 struct capi_ctr
+
+This structure describes an ISDN device (controller) handled by a Kernel CAPI
+driver. After registration via the attach_capi_ctr() function it is passed to
+all controller specific lower layer interface and callback functions to
+identify the controller to operate on.
+
+It contains the following non-private fields:
+
+- to be set by the driver before calling attach_capi_ctr():
+
+struct module *owner
+ pointer to the driver module owning the device
+
+void *driverdata
+ an opaque pointer to driver specific data, not touched by Kernel CAPI
+
+char name[32]
+ the name of the controller, as a zero-terminated ASCII string
+
+char *driver_name
+ the name of the driver, as a zero-terminated ASCII string
+
+int (*load_firmware)(struct capi_ctr *ctrlr, capiloaddata *ldata)
+ (optional) pointer to a callback function for sending firmware and
+ configuration data to the device
+ The function may return before the operation has completed.
+ Completion must be signalled by a call to capi_ctr_ready().
+ Return value: 0 on success, error code on error
+ Called in process context.
+
+void (*reset_ctr)(struct capi_ctr *ctrlr)
+ (optional) pointer to a callback function for stopping the device,
+ releasing all registered applications
+ The function may return before the operation has completed.
+ Completion must be signalled by a call to capi_ctr_down().
+ Called in process context.
+
+void (*register_appl)(struct capi_ctr *ctrlr, u16 applid,
+ capi_register_params *rparam)
+void (*release_appl)(struct capi_ctr *ctrlr, u16 applid)
+ pointers to callback functions for registration and deregistration of
+ applications with the device
+ Calls to these functions are serialized by Kernel CAPI so that only
+ one call to any of them is active at any time.
+
+u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)
+ pointer to a callback function for sending a CAPI message to the
+ device
+ Return value: CAPI error code
+ If the method returns 0 (CAPI_NOERROR) the driver has taken ownership
+ of the skb and the caller may no longer access it. If it returns a
+ non-zero (error) value then ownership of the skb returns to the caller
+ who may reuse or free it.
+ The return value should only be used to signal problems with respect
+ to accepting or queueing the message. Errors occurring during the
+ actual processing of the message should be signaled with an
+ appropriate reply message.
+ May be called in process or interrupt context.
+ Calls to this function are not serialized by Kernel CAPI, ie. it must
+ be prepared to be re-entered.
+
+char *(*procinfo)(struct capi_ctr *ctrlr)
+ pointer to a callback function returning the entry for the device in
+ the CAPI controller info table, /proc/capi/controller
+
+const struct file_operations *proc_fops
+ pointers to callback functions for the device's proc file
+ system entry, /proc/capi/controllers/<n>; pointer to the device's
+ capi_ctr structure is available from struct proc_dir_entry::data
+ which is available from struct inode.
+
+Note: Callback functions except send_message() are never called in interrupt
+context.
+
+- to be filled in before calling capi_ctr_ready():
+
+u8 manu[CAPI_MANUFACTURER_LEN]
+ value to return for CAPI_GET_MANUFACTURER
+
+capi_version version
+ value to return for CAPI_GET_VERSION
+
+capi_profile profile
+ value to return for CAPI_GET_PROFILE
+
+u8 serial[CAPI_SERIAL_LEN]
+ value to return for CAPI_GET_SERIAL
+
+
+4.3 SKBs
+
+CAPI messages are passed between Kernel CAPI and the driver via send_message()
+and capi_ctr_handle_message(), stored in the data portion of a socket buffer
+(skb). Each skb contains a single CAPI message coded according to the CAPI 2.0
+standard.
+
+For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual
+payload data immediately follows the CAPI message itself within the same skb.
+The Data and Data64 parameters are not used for processing. The Data64
+parameter may be omitted by setting the length field of the CAPI message to 22
+instead of 30.
+
+
+4.4 The _cmsg Structure
+
+(declared in <linux/isdn/capiutil.h>)
+
+The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
+accessible form. It contains members for all possible CAPI 2.0 parameters,
+including subparameters of the Additional Info and B Protocol structured
+parameters, with the following exceptions:
+
+* second Calling party number (CONNECT_IND)
+
+* Data64 (DATA_B3_REQ and DATA_B3_IND)
+
+* Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ)
+
+* Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP
+ and SELECT_B_PROTOCOL_REQ)
+
+Only those parameters appearing in the message type currently being processed
+are actually used. Unused members should be set to zero.
+
+Members are named after the CAPI 2.0 standard names of the parameters they
+represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
+types are:
+
+u8 for CAPI parameters of type 'byte'
+
+u16 for CAPI parameters of type 'word'
+
+u32 for CAPI parameters of type 'dword'
+
+_cstruct for CAPI parameters of type 'struct'
+ The member is a pointer to a buffer containing the parameter in
+ CAPI encoding (length + content). It may also be NULL, which will
+ be taken to represent an empty (zero length) parameter.
+ Subparameters are stored in encoded form within the content part.
+
+_cmstruct alternative representation for CAPI parameters of type 'struct'
+ (used only for the 'Additional Info' and 'B Protocol' parameters)
+ The representation is a single byte containing one of the values:
+ CAPI_DEFAULT: The parameter is empty/absent.
+ CAPI_COMPOSE: The parameter is present.
+ Subparameter values are stored individually in the corresponding
+ _cmsg structure members.
+
+Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
+messages between their transport encoding described in the CAPI 2.0 standard
+and their _cmsg structure representation. Note that capi_cmsg2message() does
+not know or check the size of its destination buffer. The caller must make
+sure it is big enough to accommodate the resulting CAPI message.
+
+
+5. Lower Layer Interface Functions
+
+(declared in <linux/isdn/capilli.h>)
+
+void register_capi_driver(struct capi_driver *drvr)
+void unregister_capi_driver(struct capi_driver *drvr)
+ register/unregister a driver with Kernel CAPI
+
+int attach_capi_ctr(struct capi_ctr *ctrlr)
+int detach_capi_ctr(struct capi_ctr *ctrlr)
+ register/unregister a device (controller) with Kernel CAPI
+
+void capi_ctr_ready(struct capi_ctr *ctrlr)
+void capi_ctr_down(struct capi_ctr *ctrlr)
+ signal controller ready/not ready
+
+void capi_ctr_suspend_output(struct capi_ctr *ctrlr)
+void capi_ctr_resume_output(struct capi_ctr *ctrlr)
+ signal suspend/resume
+
+void capi_ctr_handle_message(struct capi_ctr * ctrlr, u16 applid,
+ struct sk_buff *skb)
+ pass a received CAPI message to Kernel CAPI
+ for forwarding to the specified application
+
+
+6. Helper Functions and Macros
+
+Library functions (from <linux/isdn/capilli.h>):
+
+void capilib_new_ncci(struct list_head *head, u16 applid,
+ u32 ncci, u32 winsize)
+void capilib_free_ncci(struct list_head *head, u16 applid, u32 ncci)
+void capilib_release_appl(struct list_head *head, u16 applid)
+void capilib_release(struct list_head *head)
+void capilib_data_b3_conf(struct list_head *head, u16 applid,
+ u32 ncci, u16 msgid)
+u16 capilib_data_b3_req(struct list_head *head, u16 applid,
+ u32 ncci, u16 msgid)
+
+
+Macros to extract/set element values from/in a CAPI message header
+(from <linux/isdn/capiutil.h>):
+
+Get Macro Set Macro Element (Type)
+
+CAPIMSG_LEN(m) CAPIMSG_SETLEN(m, len) Total Length (u16)
+CAPIMSG_APPID(m) CAPIMSG_SETAPPID(m, applid) ApplID (u16)
+CAPIMSG_COMMAND(m) CAPIMSG_SETCOMMAND(m,cmd) Command (u8)
+CAPIMSG_SUBCOMMAND(m) CAPIMSG_SETSUBCOMMAND(m, cmd) Subcommand (u8)
+CAPIMSG_CMD(m) - Command*256
+ + Subcommand (u16)
+CAPIMSG_MSGID(m) CAPIMSG_SETMSGID(m, msgid) Message Number (u16)
+
+CAPIMSG_CONTROL(m) CAPIMSG_SETCONTROL(m, contr) Controller/PLCI/NCCI
+ (u32)
+CAPIMSG_DATALEN(m) CAPIMSG_SETDATALEN(m, len) Data Length (u16)
+
+
+Library functions for working with _cmsg structures
+(from <linux/isdn/capiutil.h>):
+
+unsigned capi_cmsg2message(_cmsg *cmsg, u8 *msg)
+ Assembles a CAPI 2.0 message from the parameters in *cmsg, storing the
+ result in *msg.
+
+unsigned capi_message2cmsg(_cmsg *cmsg, u8 *msg)
+ Disassembles the CAPI 2.0 message in *msg, storing the parameters in
+ *cmsg.
+
+unsigned capi_cmsg_header(_cmsg *cmsg, u16 ApplId, u8 Command, u8 Subcommand,
+ u16 Messagenumber, u32 Controller)
+ Fills the header part and address field of the _cmsg structure *cmsg
+ with the given values, zeroing the remainder of the structure so only
+ parameters with non-default values need to be changed before sending
+ the message.
+
+void capi_cmsg_answer(_cmsg *cmsg)
+ Sets the low bit of the Subcommand field in *cmsg, thereby converting
+ _REQ to _CONF and _IND to _RESP.
+
+char *capi_cmd2str(u8 Command, u8 Subcommand)
+ Returns the CAPI 2.0 message name corresponding to the given command
+ and subcommand values, as a static ASCII string. The return value may
+ be NULL if the command/subcommand is not one of those defined in the
+ CAPI 2.0 standard.
+
+
+7. Debugging
+
+The module kernelcapi has a module parameter showcapimsgs controlling some
+debugging output produced by the module. It can only be set when the module is
+loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on
+the command line or in the configuration file.
+
+If the lowest bit of showcapimsgs is set, kernelcapi logs controller and
+application up and down events.
+
+In addition, every registered CAPI controller has an associated traceflag
+parameter controlling how CAPI messages sent from and to tha controller are
+logged. The traceflag parameter is initialized with the value of the
+showcapimsgs parameter when the controller is registered, but can later be
+changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE.
+
+If the value of traceflag is non-zero, CAPI messages are logged.
+DATA_B3 messages are only logged if the value of traceflag is > 2.
+
+If the lowest bit of traceflag is set, only the command/subcommand and message
+length are logged. Otherwise, kernelcapi logs a readable representation of
+the entire message.
diff --git a/Documentation/isdn/INTERFACE.fax b/Documentation/isdn/INTERFACE.fax
new file mode 100644
index 000000000..9c8c6d914
--- /dev/null
+++ b/Documentation/isdn/INTERFACE.fax
@@ -0,0 +1,163 @@
+$Id: INTERFACE.fax,v 1.2 2000/08/06 09:22:50 armin Exp $
+
+
+Description of the fax-subinterface between linklevel and hardwarelevel of
+ isdn4linux.
+
+ The communication between linklevel (LL) and hardwarelevel (HL) for fax
+ is based on the struct T30_s (defined in isdnif.h).
+ This struct is allocated in the LL.
+ In order to use fax, the LL provides the pointer to this struct with the
+ command ISDN_CMD_SETL3 (parm.fax). This pointer expires in case of hangup
+ and when a new channel to a new connection is assigned.
+
+
+Data handling:
+ In send-mode the HL-driver has to handle the <DLE> codes and the bit-order
+ conversion by itself.
+ In receive-mode the LL-driver takes care of the bit-order conversion
+ (specified by +FBOR)
+
+Structure T30_s description:
+
+ This structure stores the values (set by AT-commands), the remote-
+ capability-values and the command-codes between LL and HL.
+
+ If the HL-driver receives ISDN_CMD_FAXCMD, all needed information
+ is in this struct set by the LL.
+ To signal information to the LL, the HL-driver has to set the
+ parameters and use ISDN_STAT_FAXIND.
+ (Please refer to INTERFACE)
+
+Structure T30_s:
+
+ All members are 8-bit unsigned (__u8)
+
+ - resolution
+ - rate
+ - width
+ - length
+ - compression
+ - ecm
+ - binary
+ - scantime
+ - id[]
+ Local faxmachine's parameters, set by +FDIS, +FDCS, +FLID, ...
+
+ - r_resolution
+ - r_rate
+ - r_width
+ - r_length
+ - r_compression
+ - r_ecm
+ - r_binary
+ - r_scantime
+ - r_id[]
+ Remote faxmachine's parameters. To be set by HL-driver.
+
+ - phase
+ Defines the actual state of fax connection. Set by HL or LL
+ depending on progress and type of connection.
+ If the phase changes because of an AT command, the LL driver
+ changes this value. Otherwise the HL-driver takes care of it, but
+ only necessary on call establishment (from IDLE to PHASE_A).
+ (one of the constants ISDN_FAX_PHASE_[IDLE,A,B,C,D,E])
+
+ - direction
+ Defines outgoing/send or incoming/receive connection.
+ (ISDN_TTY_FAX_CONN_[IN,OUT])
+
+ - code
+ Commands from LL to HL; possible constants :
+ ISDN_TTY_FAX_DR signals +FDR command to HL
+
+ ISDN_TTY_FAX_DT signals +FDT command to HL
+
+ ISDN_TTY_FAX_ET signals +FET command to HL
+
+
+ Other than that the "code" is set with the hangup-code value at
+ the end of connection for the +FHNG message.
+
+ - r_code
+ Commands from HL to LL; possible constants :
+ ISDN_TTY_FAX_CFR output of +FCFR message.
+
+ ISDN_TTY_FAX_RID output of remote ID set in r_id[]
+ (+FCSI/+FTSI on send/receive)
+
+ ISDN_TTY_FAX_DCS output of +FDCS and CONNECT message,
+ switching to phase C.
+
+ ISDN_TTY_FAX_ET signals end of data,
+ switching to phase D.
+
+ ISDN_TTY_FAX_FCON signals the established, outgoing connection,
+ switching to phase B.
+
+ ISDN_TTY_FAX_FCON_I signals the established, incoming connection,
+ switching to phase B.
+
+ ISDN_TTY_FAX_DIS output of +FDIS message and values.
+
+ ISDN_TTY_FAX_SENT signals that all data has been sent
+ and <DLE><ETX> is acknowledged,
+ OK message will be sent.
+
+ ISDN_TTY_FAX_PTS signals a msg-confirmation (page sent successful),
+ depending on fet value:
+ 0: output OK message (more pages follow)
+ 1: switching to phase B (next document)
+
+ ISDN_TTY_FAX_TRAIN_OK output of +FDCS and OK message (for receive mode).
+
+ ISDN_TTY_FAX_EOP signals end of data in receive mode,
+ switching to phase D.
+
+ ISDN_TTY_FAX_HNG output of the +FHNG and value set by code and
+ OK message, switching to phase E.
+
+
+ - badlin
+ Value of +FBADLIN
+
+ - badmul
+ Value of +FBADMUL
+
+ - bor
+ Value of +FBOR
+
+ - fet
+ Value of +FET command in send-mode.
+ Set by HL in receive-mode for +FET message.
+
+ - pollid[]
+ ID-string, set by +FCIG
+
+ - cq
+ Value of +FCQ
+
+ - cr
+ Value of +FCR
+
+ - ctcrty
+ Value of +FCTCRTY
+
+ - minsp
+ Value of +FMINSP
+
+ - phcto
+ Value of +FPHCTO
+
+ - rel
+ Value of +FREL
+
+ - nbc
+ Value of +FNBC (0,1)
+ (+FNBC is not a known class 2 fax command, I added this to change the
+ automatic "best capabilities" connection in the eicon HL-driver)
+
+
+Armin
+mac@melware.de
+
diff --git a/Documentation/isdn/README b/Documentation/isdn/README
new file mode 100644
index 000000000..cfb188434
--- /dev/null
+++ b/Documentation/isdn/README
@@ -0,0 +1,599 @@
+README for the ISDN-subsystem
+
+1. Preface
+
+ 1.1 Introduction
+
+ This README describes how to set up and how to use the different parts
+ of the ISDN-subsystem.
+
+ For using the ISDN-subsystem, some additional userlevel programs are
+ necessary. Those programs and some contributed utilities are available
+ at
+
+ ftp.isdn4linux.de
+
+ /pub/isdn4linux/isdn4k-utils-<VersionNumber>.tar.gz
+
+
+ We also have set up a mailing-list:
+
+ The isdn4linux-project originates in Germany, and therefore by historical
+ reasons, the mailing-list's primary language is german. However mails
+ written in english have been welcome all the time.
+
+ to subscribe: write a email to majordomo@listserv.isdn4linux.de,
+ Subject irrelevant, in the message body:
+ subscribe isdn4linux <your_email_address>
+
+ To write to the mailing-list, write to isdn4linux@listserv.isdn4linux.de
+
+ This mailinglist is bidirectionally gated to the newsgroup
+
+ de.alt.comm.isdn4linux
+
+ There is also a well maintained FAQ in English available at
+ http://www.mhessler.de/i4lfaq/
+ It can be viewed online, or downloaded in sgml/text/html format.
+ The FAQ can also be viewed online at
+ http://www.isdn4linux.de/faq/
+ or downloaded from
+ ftp://ftp.isdn4linux.de/pub/isdn4linux/FAQ/
+
+ 1.1 Technical details
+
+ In the following Text, the terms MSN and EAZ are used.
+
+ MSN is the abbreviation for (M)ultiple(S)ubscriber(N)umber, and applies
+ to Euro(EDSS1)-type lines. Usually it is simply the phone number.
+
+ EAZ is the abbreviation of (E)ndgeraete(A)uswahl(Z)iffer and
+ applies to German 1TR6-type lines. This is a one-digit string,
+ simply appended to the base phone number
+
+ The internal handling is nearly identical, so replace the appropriate
+ term to that one, which applies to your local ISDN-environment.
+
+ When the link-level-module isdn.o is loaded, it supports up to 16
+ low-level-modules with up to 64 channels. (The number 64 is arbitrarily
+ chosen and can be configured at compile-time --ISDN_MAX in isdn.h).
+ A low-level-driver can register itself through an interface (which is
+ defined in isdnif.h) and gets assigned a slot.
+ The following char-devices are made available for each channel:
+
+ A raw-control-device with the following functions:
+ write: raw D-channel-messages (format: depends on driver).
+ read: raw D-channel-messages (format: depends on driver).
+ ioctl: depends on driver, i.e. for the ICN-driver, the base-address of
+ the ports and the shared memory on the card can be set and read
+ also the boot-code and the protocol software can be loaded into
+ the card.
+
+ O N L Y !!! for debugging (no locking against other devices):
+ One raw-data-device with the following functions:
+ write: data to B-channel.
+ read: data from B-channel.
+
+ In addition the following devices are made available:
+
+ 128 tty-devices (64 cuix and 64 ttyIx) with integrated modem-emulator:
+ The functionality is almost the same as that of a serial device
+ (the line-discs are handled by the kernel), which lets you run
+ SLIP, CSLIP and asynchronous PPP through the devices. We have tested
+ Seyon, minicom, CSLIP (uri-dip) PPP, mgetty, XCept and Hylafax.
+
+ The modem-emulation supports the following:
+ 1.3.1 Commands:
+
+ ATA Answer incoming call.
+ ATD<No.> Dial, the number may contain:
+ [0-9] and [,#.*WPT-S]
+ the latter are ignored until 'S'.
+ The 'S' must precede the number, if
+ the line is a SPV (German 1TR6).
+ ATE0 Echo off.
+ ATE1 Echo on (default).
+ ATH Hang-up.
+ ATH1 Off hook (ignored).
+ ATH0 Hang-up.
+ ATI Return "ISDN for Linux...".
+ ATI0 "
+ ATI1 "
+ ATI2 Report of last connection.
+ ATO On line (data mode).
+ ATQ0 Enable result codes (default).
+ ATQ1 Disable result codes (default).
+ ATSx=y Set register x to y.
+ ATSx? Show contents of register x.
+ ATV0 Numeric responses.
+ ATV1 English responses (default).
+ ATZ Load registers and EAZ/MSN from Profile.
+ AT&Bx Set Send-Packet-size to x (max. 4000)
+ The real packet-size may be limited by the
+ low-level-driver used. e.g. the HiSax-Module-
+ limit is 2000. You will get NO Error-Message,
+ if you set it to higher values, because at the
+ time of giving this command the corresponding
+ driver may not be selected (see "Automatic
+ Assignment") however the size of outgoing packets
+ will be limited correctly.
+ AT&D0 Ignore DTR
+ AT&D2 DTR-low-edge: Hang up and return to
+ command mode (default).
+ AT&D3 Same as AT&D2 but also resets all registers.
+ AT&Ex Set the EAZ/MSN for this channel to x.
+ AT&F Reset all registers and profile to "factory-defaults"
+ AT&Lx Set list of phone numbers to listen on. x is a
+ list of wildcard patterns separated by semicolon.
+ If this is set, it has precedence over the MSN set
+ by AT&E.
+ AT&Rx Select V.110 bitrate adaption.
+ This command enables V.110 protocol with 9600 baud
+ (x=9600), 19200 baud (x=19200) or 38400 baud
+ (x=38400). A value of x=0 disables V.110 switching
+ back to default X.75. This command sets the following
+ Registers:
+ Reg 14 (Layer-2 protocol):
+ x = 0: 0
+ x = 9600: 7
+ x = 19200: 8
+ x = 38400: 9
+ Reg 18.2 = 1
+ Reg 19 (Additional Service Indicator):
+ x = 0: 0
+ x = 9600: 197
+ x = 19200: 199
+ x = 38400: 198
+ Note on value in Reg 19:
+ There is _NO_ common convention for 38400 baud.
+ The value 198 is chosen arbitrarily. Users
+ _MUST_ negotiate this value before establishing
+ a connection.
+ AT&Sx Set window-size (x = 1..8) (not yet implemented)
+ AT&V Show all settings.
+ AT&W0 Write registers and EAZ/MSN to profile. See also
+ iprofd (5.c in this README).
+ AT&X0 BTX-mode and T.70-mode off (default)
+ AT&X1 BTX-mode on. (S13.1=1, S13.5=0 S14=0, S16=7, S18=7, S19=0)
+ AT&X2 T.70-mode on. (S13.1=1, S13.5=1, S14=0, S16=7, S18=7, S19=0)
+ AT+Rx Resume a suspended call with CallID x (x = 1,2,3...)
+ AT+Sx Suspend a call with CallID x (x = 1,2,3...)
+
+ For voice-mode commands refer to README.audio
+
+ 1.3.2 Escape sequence:
+ During a connection, the emulation reacts just like
+ a normal modem to the escape sequence <DELAY>+++<DELAY>.
+ (The escape character - default '+' - can be set in the
+ register 2).
+ The DELAY must at least be 1.5 seconds long and delay
+ between the escape characters must not exceed 0.5 seconds.
+
+ 1.3.3 Registers:
+
+ Nr. Default Description
+ 0 0 Answer on ring number.
+ (no auto-answer if S0=0).
+ 1 0 Count of rings.
+ 2 43 Escape character.
+ (a value >= 128 disables the escape sequence).
+ 3 13 Carriage return character (ASCII).
+ 4 10 Line feed character (ASCII).
+ 5 8 Backspace character (ASCII).
+ 6 3 Delay in seconds before dialing.
+ 7 60 Wait for carrier.
+ 8 2 Pause time for comma (ignored)
+ 9 6 Carrier detect time (ignored)
+ 10 7 Carrier loss to disconnect time (ignored).
+ 11 70 Touch tone timing (ignored).
+ 12 69 Bit coded register:
+ Bit 0: 0 = Suppress response messages.
+ 1 = Show response messages.
+ Bit 1: 0 = English response messages.
+ 1 = Numeric response messages.
+ Bit 2: 0 = Echo off.
+ 1 = Echo on.
+ Bit 3 0 = DCD always on.
+ 1 = DCD follows carrier.
+ Bit 4 0 = CTS follows RTS
+ 1 = Ignore RTS, CTS always on.
+ Bit 5 0 = return to command mode on DTR low.
+ 1 = Same as 0 but also resets all
+ registers.
+ See also register 13, bit 2
+ Bit 6 0 = DSR always on.
+ 1 = DSR only on if channel is available.
+ Bit 7 0 = Cisco-PPP-flag-hack off (default).
+ 1 = Cisco-PPP-flag-hack on.
+ 13 0 Bit coded register:
+ Bit 0: 0 = Use delayed tty-send-algorithm
+ 1 = Direct tty-send.
+ Bit 1: 0 = T.70 protocol (Only for BTX!) off
+ 1 = T.70 protocol (Only for BTX!) on
+ Bit 2: 0 = Don't hangup on DTR low.
+ 1 = Hangup on DTR low.
+ Bit 3: 0 = Standard response messages
+ 1 = Extended response messages
+ Bit 4: 0 = CALLER NUMBER before every RING.
+ 1 = CALLER NUMBER after first RING.
+ Bit 5: 0 = T.70 extended protocol off
+ 1 = T.70 extended protocol on
+ Bit 6: 0 = Special RUNG Message off
+ 1 = Special RUNG Message on
+ "RUNG" is delivered on a ttyI, if
+ an incoming call happened (RING) and
+ the remote party hung up before any
+ local ATA was given.
+ Bit 7: 0 = Don't show display messages from net
+ 1 = Show display messages from net
+ (S12 Bit 1 must be 0 too)
+ 14 0 Layer-2 protocol:
+ 0 = X75/LAPB with I-frames
+ 1 = X75/LAPB with UI-frames
+ 2 = X75/LAPB with BUI-frames
+ 3 = HDLC
+ 4 = Transparent (audio)
+ 7 = V.110, 9600 baud
+ 8 = V.110, 19200 baud
+ 9 = V.110, 38400 baud
+ 10 = Analog Modem (only if hardware supports this)
+ 11 = Fax G3 (only if hardware supports this)
+ 15 0 Layer-3 protocol:
+ 0 = transparent
+ 1 = transparent with audio features (e.g. DSP)
+ 2 = Fax G3 Class 2 commands (S14 has to be set to 11)
+ 3 = Fax G3 Class 1 commands (S14 has to be set to 11)
+ 16 250 Send-Packet-size/16
+ 17 8 Window-size (not yet implemented)
+ 18 4 Bit coded register, Service-Octet-1 to accept,
+ or to be used on dialout:
+ Bit 0: Service 1 (audio) when set.
+ Bit 1: Service 5 (BTX) when set.
+ Bit 2: Service 7 (data) when set.
+ Note: It is possible to set more than one
+ bit. In this case, on incoming calls
+ the selected services are accepted,
+ and if the service is "audio", the
+ Layer-2-protocol is automatically
+ changed to 4 regardless of the setting
+ of register 14. On outgoing calls,
+ the most significant 1-bit is chosen to
+ select the outgoing service octet.
+ 19 0 Service-Octet-2
+ 20 0 Bit coded register (readonly)
+ Service-Octet-1 of last call.
+ Bit mapping is the same as register 18
+ 21 0 Bit coded register (readonly)
+ Set on incoming call (during RING) to
+ octet 3 of calling party number IE (Numbering plan)
+ See section 4.5.10 of ITU Q.931
+ 22 0 Bit coded register (readonly)
+ Set on incoming call (during RING) to
+ octet 3a of calling party number IE (Screening info)
+ See section 4.5.10 of ITU Q.931
+ 23 0 Bit coded register:
+ Bit 0: 0 = Add CPN to RING message off
+ 1 = Add CPN to RING message on
+ Bit 1: 0 = Add CPN to FCON message off
+ 1 = Add CPN to FCON message on
+ Bit 2: 0 = Add CDN to RING/FCON message off
+ 1 = Add CDN to RING/FCON message on
+
+ Last but not least a (at the moment fairly primitive) device to request
+ the line-status (/dev/isdninfo) is made available.
+
+ Automatic assignment of devices to lines:
+
+ All inactive physical lines are listening to all EAZs for incoming
+ calls and are NOT assigned to a specific tty or network interface.
+ When an incoming call is detected, the driver looks first for a network
+ interface and then for an opened tty which:
+
+ 1. is configured for the same EAZ.
+ 2. has the same protocol settings for the B-channel.
+ 3. (only for network interfaces if the security flag is set)
+ contains the caller number in its access list.
+ 4. Either the channel is not bound exclusively to another Net-interface, or
+ it is bound AND the other checks apply to exactly this interface.
+ (For usage of the bind-features, refer to the isdnctrl-man-page)
+
+ Only when a matching interface or tty is found is the call accepted
+ and the "connection" between the low-level-layer and the link-level-layer
+ is established and kept until the end of the connection.
+ In all other cases no connection is established. Isdn4linux can be
+ configured to either do NOTHING in this case (which is useful, if
+ other, external devices with the same EAZ/MSN are connected to the bus)
+ or to reject the call actively. (isdnctrl busreject ...)
+
+ For an outgoing call, the inactive physical lines are searched.
+ The call is placed on the first physical line, which supports the
+ requested protocols for the B-channel. If a net-interface, however
+ is pre-bound to a channel, this channel is used directly.
+
+ This makes it possible to configure several network interfaces and ttys
+ for one EAZ, if the network interfaces are set to secure operation.
+ If an incoming call matches one network interface, it gets connected to it.
+ If another incoming call for the same EAZ arrives, which does not match
+ a network interface, the first tty gets a "RING" and so on.
+
+2 System prerequisites:
+
+ ATTENTION!
+
+ Always use the latest module utilities. The current version is
+ named in Documentation/Changes. Some old versions of insmod
+ are not capable of setting the driver-Ids correctly.
+
+3. Lowlevel-driver configuration.
+
+ Configuration depends on how the drivers are built. See the
+ README.<yourDriver> for information on driver-specific setup.
+
+4. Device-inodes
+
+ The major and minor numbers and their names are described in
+ Documentation/devices.txt. The major numbers are:
+
+ 43 for the ISDN-tty's.
+ 44 for the ISDN-callout-tty's.
+ 45 for control/info/debug devices.
+
+5. Application
+
+ a) For some card-types, firmware has to be loaded into the cards, before
+ proceeding with device-independent setup. See README.<yourDriver>
+ for how to do that.
+
+ b) If you only intend to use ttys, you are nearly ready now.
+
+ c) If you want to have really permanent "Modem"-settings on disk, you
+ can start the daemon iprofd. Give it a path to a file at the command-
+ line. It will store the profile-settings in this file every time
+ an AT&W0 is performed on any ISDN-tty. If the file already exists,
+ all profiles are initialized from this file. If you want to unload
+ any of the modules, kill iprofd first.
+
+ d) For networking, continue: Create an interface:
+ isdnctrl addif isdn0
+
+ e) Set the EAZ (or MSN for Euro-ISDN):
+ isdnctrl eaz isdn0 2
+
+ (For 1TR6 a single digit is allowed, for Euro-ISDN the number is your
+ real MSN e.g.: Phone-Number)
+
+ f) Set the number for outgoing calls on the interface:
+ isdnctrl addphone isdn0 out 1234567
+ ... (this can be executed more than once, all assigned numbers are
+ tried in order)
+ and the number(s) for incoming calls:
+ isdnctrl addphone isdn0 in 1234567
+
+ g) Set the timeout for hang-up:
+ isdnctrl huptimeout isdn0 <timeout_in_seconds>
+
+ h) additionally you may activate charge-hang-up (= Hang up before
+ next charge-info, this only works, if your isdn-provider transmits
+ the charge-info during and after the connection):
+ isdnctrl chargehup isdn0 on
+
+ i) Set the dial mode of the interface:
+ isdnctrl dialmode isdn0 auto
+ "off" means that you (or the system) cannot make any connection
+ (neither incoming or outgoing connections are possible). Use
+ this if you want to be sure that no connections will be made.
+ "auto" means that the interface is in auto-dial mode, and will
+ attempt to make a connection whenever a network data packet needs
+ the interface's link. Note that this can cause unexpected dialouts,
+ and lead to a high phone bill! Some daemons or other pc's that use
+ this interface can cause this.
+ Incoming connections are also possible.
+ "manual" is a dial mode created to prevent the unexpected dialouts.
+ In this mode, the interface will never make any connections on its
+ own. You must explicitly initiate a connection with "isdnctrl dial
+ isdn0". However, after an idle time of no traffic as configured for
+ the huptimeout value with isdnctrl, the connection _will_ be ended.
+ If you don't want any automatic hangup, set the huptimeout value to 0.
+ "manual" is the default.
+
+ j) Setup the interface with ifconfig as usual, and set a route to it.
+
+ k) (optional) If you run X11 and have Tcl/Tk-wish version 4.0, you can use
+ the script tools/tcltk/isdnmon. You can add actions for line-status
+ changes. See the comments at the beginning of the script for how to
+ do that. There are other tty-based tools in the tools-subdirectory
+ contributed by Michael Knigge (imon), Volker Götz (imontty) and
+ Andreas Kool (isdnmon).
+
+ l) For initial testing, you can set the verbose-level to 2 (default: 0).
+ Then all incoming calls are logged, even if they are not addressed
+ to one of the configured net-interfaces:
+ isdnctrl verbose 2
+
+ Now you are ready! A ping to the set address should now result in an
+ automatic dial-out (look at syslog kernel-messages).
+ The phone numbers and EAZs can be assigned at any time with isdnctrl.
+ You can add as many interfaces as you like with addif following the
+ directions above. Of course, there may be some limitations. But we have
+ tested as many as 20 interfaces without any problem. However, if you
+ don't give an interface name to addif, the kernel will assign a name
+ which starts with "eth". The number of "eth"-interfaces is limited by
+ the kernel.
+
+5. Additional options for isdnctrl:
+
+ "isdnctrl secure <InterfaceName> on"
+ Only incoming calls, for which the caller-id is listed in the access
+ list of the interface are accepted. You can add caller-id's With the
+ command "isdnctrl addphone <InterfaceName> in <caller-id>"
+ Euro-ISDN does not transmit the leading '0' of the caller-id for an
+ incoming call, therefore you should configure it accordingly.
+ If the real number for the dialout e.g. is "09311234567" the number
+ to configure here is "9311234567". The pattern-match function
+ works similar to the shell mechanism.
+
+ ? one arbitrary digit
+ * zero or arbitrary many digits
+ [123] one of the digits in the list
+ [1-5] one digit between '1' and '5'
+ a '^' as the first character in a list inverts the list
+
+
+ "isdnctrl secure <InterfaceName> off"
+ Switch off secure operation (default).
+
+ "isdnctrl ihup <InterfaceName> [on|off]"
+ Switch the hang-up-timer for incoming calls on or off.
+
+ "isdnctrl eaz <InterfaceName>"
+ Returns the EAZ of an interface.
+
+ "isdnctrl delphone <InterfaceName> in|out <number>"
+ Deletes a number from one of the access-lists of the interface.
+
+ "isdnctrl delif <InterfaceName>"
+ Removes the interface (and possible slaves) from the kernel.
+ (You have to unregister it with "ifconfig <InterfaceName> down" before).
+
+ "isdnctrl callback <InterfaceName> [on|off]"
+ Switches an interface to callback-mode. In this mode, an incoming call
+ will be rejected and after this the remote-station will be called. If
+ you test this feature by using ping, some routers will re-dial very
+ quickly, so that the callback from isdn4linux may not be recognized.
+ In this case use ping with the option -i <sec> to increase the interval
+ between echo-packets.
+
+ "isdnctrl cbdelay <InterfaceName> [seconds]"
+ Sets the delay (default 5 sec) between an incoming call and start of
+ dialing when callback is enabled.
+
+ "isdnctrl cbhup <InterfaceName> [on|off]"
+ This enables (default) or disables an active hangup (reject) when getting an
+ incoming call for an interface which is configured for callback.
+
+ "isdnctrl encap <InterfaceName> <EncapType>"
+ Selects the type of packet-encapsulation. The encapsulation can be changed
+ only while an interface is down.
+
+ At the moment the following values are supported:
+
+ rawip (Default) Selects raw-IP-encapsulation. This means, MAC-headers
+ are stripped off.
+ ip IP with type-field. Same as IP but the type-field of the MAC-header
+ is preserved.
+ x25iface X.25 interface encapsulation (first byte semantics as defined in
+ ../networking/x25-iface.txt). Use this for running the linux
+ X.25 network protocol stack (AF_X25 sockets) on top of isdn.
+ cisco-h A special-mode for communicating with a Cisco, which is configured
+ to do "hdlc"
+ ethernet No stripping. Packets are sent with full MAC-header.
+ The Ethernet-address of the interface is faked, from its
+ IP-address: fc:fc:i1:i2:i3:i4, where i1-4 are the IP-addr.-values.
+ syncppp Synchronous PPP
+
+ uihdlc HDLC with UI-frame-header (for use with DOS ISPA, option -h1)
+
+
+ NOTE: x25iface encapsulation is currently experimental. Please
+ read README.x25 for further details
+
+
+ Watching packets, using standard-tcpdump will fail for all encapsulations
+ except ethernet because tcpdump does not know how to handle packets
+ without MAC-header. A patch for tcpdump is included in the utility-package
+ mentioned above.
+
+ "isdnctrl l2_prot <InterfaceName> <L2-ProtocolName>"
+ Selects a layer-2-protocol.
+ (With the ICN-driver and the HiSax-driver, "x75i" and "hdlc" is available.
+ With other drivers, "x75ui", "x75bui", "x25dte", "x25dce" may be
+ possible too. See README.x25 for x25 related l2 protocols.)
+
+ isdnctrl l3_prot <InterfaceName> <L3-ProtocolName>
+ The same for layer-3. (At the moment only "trans" is allowed)
+
+ "isdnctrl list <InterfaceName>"
+ Shows all parameters of an interface and the charge-info.
+ Try "all" as the interface name.
+
+ "isdnctrl hangup <InterfaceName>"
+ Forces hangup of an interface.
+
+ "isdnctrl bind <InterfaceName> <DriverId>,<ChannelNumber> [exclusive]"
+ If you are using more than one ISDN card, it is sometimes necessary to
+ dial out using a specific card or even preserve a specific channel for
+ dialout of a specific net-interface. This can be done with the above
+ command. Replace <DriverId> by whatever you assigned while loading the
+ module. The <ChannelNumber> is counted from zero. The upper limit
+ depends on the card used. At the moment no card supports more than
+ 2 channels, so the upper limit is one.
+
+ "isdnctrl unbind <InterfaceName>"
+ unbinds a previously bound interface.
+
+ "isdnctrl busreject <DriverId> on|off"
+ If switched on, isdn4linux replies a REJECT to incoming calls, it
+ cannot match to any configured interface.
+ If switched off, nothing happens in this case.
+ You normally should NOT enable this feature, if the ISDN adapter is not
+ the only device connected to the S0-bus. Otherwise it could happen that
+ isdn4linux rejects an incoming call, which belongs to another device on
+ the bus.
+
+ "isdnctrl addslave <InterfaceName> <SlaveName>
+ Creates a slave interface for channel-bundling. Slave interfaces are
+ not seen by the kernel, but their ISDN-part can be configured with
+ isdnctrl as usual. (Phone numbers, EAZ/MSN, timeouts etc.) If more
+ than two channels are to be bundled, feel free to create as many as you
+ want. InterfaceName must be a real interface, NOT a slave. Slave interfaces
+ start dialing, if the master interface resp. the previous slave interface
+ has a load of more than 7000 cps. They hangup if the load goes under 7000
+ cps, according to their "huptimeout"-parameter.
+
+ "isdnctrl sdelay <InterfaceName> secs."
+ This sets the minimum time an Interface has to be fully loaded, until
+ it sends a dial-request to its slave.
+
+ "isdnctrl dial <InterfaceName>"
+ Forces an interface to start dialing even if no packets are to be
+ transferred.
+
+ "isdnctrl mapping <DriverId> MSN0,MSN1,MSN2,...MSN9"
+ This installs a mapping table for EAZ<->MSN-mapping for a single line.
+ Missing MSN's have to be given as "-" or can be omitted, if at the end
+ of the commandline.
+ With this command, it's now possible to have an interface listening to
+ mixed 1TR6- and Euro-Type lines. In this case, the interface has to be
+ configured to a 1TR6-type EAZ (one digit). The mapping is also valid
+ for tty-emulation. Seen from the interface/tty-level the mapping
+ CAN be used, however it's possible to use single tty's/interfaces with
+ real MSN's (more digits) also, in which case the mapping will be ignored.
+ Here is an example:
+
+ You have a 1TR6-type line with base-nr. 1234567 and a Euro-line with
+ MSN's 987654, 987655 and 987656. The DriverId for the Euro-line is "EURO".
+
+ isdnctrl mapping EURO -,987654,987655,987656,-,987655
+ ...
+ isdnctrl eaz isdn0 1 # listen on 12345671(1tr6) and 987654(euro)
+ ...
+ isdnctrl eaz isdn1 4 # listen on 12345674(1tr6) only.
+ ...
+ isdnctrl eaz isdn2 987654 # listen on 987654(euro) only.
+
+ Same scheme is used with AT&E... at the tty's.
+
+6. If you want to write a new low-level-driver, you are welcome.
+ The interface to the link-level-module is described in the file INTERFACE.
+ If the interface should be expanded for any reason, don't do it
+ on your own, send me a mail containing the proposed changes and
+ some reasoning about them.
+ If other drivers will not be affected, I will include the changes
+ in the next release.
+ For developers only, there is a second mailing-list. Write to me
+ (fritz@isdn4linux.de), if you want to join that list.
+
+Have fun!
+
+ -Fritz
+
diff --git a/Documentation/isdn/README.FAQ b/Documentation/isdn/README.FAQ
new file mode 100644
index 000000000..356f79446
--- /dev/null
+++ b/Documentation/isdn/README.FAQ
@@ -0,0 +1,26 @@
+
+The FAQ for isdn4linux
+======================
+
+Please note that there is a big FAQ available in the isdn4k-utils.
+You find it in:
+ isdn4k-utils/FAQ/i4lfaq.sgml
+
+In case you just want to see the FAQ online, or download the newest version,
+you can have a look at my website:
+http://www.mhessler.de/i4lfaq/ (view + download)
+or:
+http://www.isdn4linux.de/faq/ (view)
+
+As the extension tells, the FAQ is in SGML format, and you can convert it
+into text/html/... format by using the sgml2txt/sgml2html/... tools.
+Alternatively, you can also do a 'configure; make all' in the FAQ directory.
+
+
+Please have a look at the FAQ before posting anything in the Mailinglist,
+or the newsgroup!
+
+
+Matthias Hessler
+hessler@isdn4linux.de
+
diff --git a/Documentation/isdn/README.HiSax b/Documentation/isdn/README.HiSax
new file mode 100644
index 000000000..b1a573cf4
--- /dev/null
+++ b/Documentation/isdn/README.HiSax
@@ -0,0 +1,659 @@
+HiSax is a Linux hardware-level driver for passive ISDN cards with Siemens
+chipset (ISAC_S 2085/2086/2186, HSCX SAB 82525). It is based on the Teles
+driver from Jan den Ouden.
+It is meant to be used with isdn4linux, an ISDN link-level module for Linux
+written by Fritz Elfert.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+Supported cards
+---------------
+
+Teles 8.0/16.0/16.3 and compatible ones
+Teles 16.3c
+Teles S0/PCMCIA
+Teles PCI
+Teles S0Box
+Creatix S0Box
+Creatix PnP S0
+Compaq ISDN S0 ISA card
+AVM A1 (Fritz, Teledat 150)
+AVM Fritz PCMCIA
+AVM Fritz PnP
+AVM Fritz PCI
+ELSA Microlink PCC-16, PCF, PCF-Pro, PCC-8
+ELSA Quickstep 1000
+ELSA Quickstep 1000PCI
+ELSA Quickstep 3000 (same settings as QS1000)
+ELSA Quickstep 3000PCI
+ELSA PCMCIA
+ITK ix1-micro Rev.2
+Eicon Diva 2.0 ISA and PCI (S0 and U interface, no PRO version)
+Eicon Diva 2.01 ISA and PCI
+Eicon Diva 2.02 PCI
+Eicon Diva Piccola
+ASUSCOM NETWORK INC. ISDNLink 128K PC adapter (order code I-IN100-ST-D)
+Dynalink IS64PH (OEM version of ASUSCOM NETWORK INC. ISDNLink 128K adapter)
+PCBIT-DP (OEM version of ASUSCOM NETWORK INC. ISDNLink)
+HFC-2BS0 based cards (TeleInt SA1)
+Sedlbauer Speed Card (Speed Win, Teledat 100, PCI, Fax+)
+Sedlbauer Speed Star/Speed Star2 (PCMCIA)
+Sedlbauer ISDN-Controller PC/104
+USR Sportster internal TA (compatible Stollmann tina-pp V3)
+USR internal TA PCI
+ith Kommunikationstechnik GmbH MIC 16 ISA card
+Traverse Technologie NETjet PCI S0 card and NETspider U card
+Ovislink ISDN sc100-p card (NETjet driver)
+Dr. Neuhaus Niccy PnP/PCI
+Siemens I-Surf 1.0
+Siemens I-Surf 2.0 (with IPAC, try type 12 asuscom)
+ACER P10
+HST Saphir
+Berkom Telekom A4T
+Scitel Quadro
+Gazel ISDN cards
+HFC-PCI based cards
+Winbond W6692 based cards
+HFC-S+, HFC-SP/PCMCIA cards
+formula-n enternow
+Gerdes Power ISDN
+
+Note: PCF, PCF-Pro: up to now, only the ISDN part is supported
+ PCC-8: not tested yet
+ Eicon.Diehl Diva U interface not tested
+
+If you know other passive cards with the Siemens chipset, please let me know.
+You can combine any card, if there is no conflict between the resources
+(io, mem, irq).
+
+
+Configuring the driver
+----------------------
+
+The HiSax driver can either be built directly into the kernel or as a module.
+It can be configured using the command line feature while loading the kernel
+with LILO or LOADLIN or, if built as a module, using insmod/modprobe with
+parameters.
+There is also some config needed before you compile the kernel and/or
+modules. It is included in the normal "make [menu]config" target at the
+kernel. Don't forget it, especially to select the right D-channel protocol.
+
+Please note: In older versions of the HiSax driver, all PnP cards
+needed to be configured with isapnp and worked only with the HiSax
+driver used as a module.
+
+In the current version, HiSax will automatically use the in-kernel
+ISAPnP support, provided you selected it during kernel configuration
+(CONFIG_ISAPNP), if you don't give the io=, irq= command line parameters.
+
+The affected card types are: 4,7,12,14,19,27-30
+
+a) when built as a module
+-------------------------
+
+insmod/modprobe hisax.o \
+ io=iobase irq=IRQ mem=membase type=card_type \
+ protocol=D_channel_protocol id=idstring
+
+or, if several cards are installed:
+
+insmod/modprobe hisax.o \
+ io=iobase1,iobase2,... irq=IRQ1,IRQ2,... mem=membase1,membase2,... \
+ type=card_type1,card_type2,... \
+ protocol=D_channel_protocol1,D_channel_protocol2,... \
+ id=idstring1%idstring2 ...
+
+where "iobaseN" represents the I/O base address of the Nth card, "membaseN"
+the memory base address of the Nth card, etc.
+
+The reason for the delimiter "%" being used in the idstrings is that ","
+won't work with the current modules package.
+
+The parameters may be specified in any order. For example, the "io"
+parameter may precede the "irq" parameter, or vice versa. If several
+cards are installed, the ordering within the comma separated parameter
+lists must of course be consistent.
+
+Only parameters applicable to the card type need to be specified. For
+example, the Teles 16.3 card is not memory-mapped, so the "mem"
+parameter may be omitted for this card. Sometimes it may be necessary
+to specify a dummy parameter, however. This is the case when there is
+a card of a different type later in the list that needs a parameter
+which the preceding card does not. For instance, if a Teles 16.0 card
+is listed after a Teles 16.3 card, a dummy memory base parameter of 0
+must be specified for the 16.3. Instead of a dummy value, the parameter
+can also be skipped by simply omitting the value. For example:
+mem=,0xd0000. See example 6 below.
+
+The parameter for the D-Channel protocol may be omitted if you selected the
+correct one during kernel config. Valid values are "1" for German 1TR6,
+"2" for EDSS1 (Euro ISDN), "3" for leased lines (no D-Channel) and "4"
+for US NI1.
+With US NI1 you have to include your SPID into the MSN setting in the form
+<MSN>:<SPID> for example (your phonenumber is 1234 your SPID 5678):
+AT&E1234:5678 on ttyI interfaces
+isdnctrl eaz ippp0 1234:5678 on network devices
+
+The Creatix/Teles PnP cards use io1= and io2= instead of io= for specifying
+the I/O addresses of the ISAC and HSCX chips, respectively.
+
+Card types:
+
+ Type Required parameters (in addition to type and protocol)
+
+ 1 Teles 16.0 irq, mem, io
+ 2 Teles 8.0 irq, mem
+ 3 Teles 16.3 (non PnP) irq, io
+ 4 Creatix/Teles PnP irq, io0 (ISAC), io1 (HSCX)
+ 5 AVM A1 (Fritz) irq, io
+ 6 ELSA PCC/PCF cards io or nothing for autodetect (the iobase is
+ required only if you have more than one ELSA
+ card in your PC)
+ 7 ELSA Quickstep 1000 irq, io (from isapnp setup)
+ 8 Teles 16.3 PCMCIA irq, io
+ 9 ITK ix1-micro Rev.2 irq, io
+ 10 ELSA PCMCIA irq, io (set with card manager)
+ 11 Eicon.Diehl Diva ISA PnP irq, io
+ 11 Eicon.Diehl Diva PCI no parameter
+ 12 ASUS COM ISDNLink irq, io (from isapnp setup)
+ 13 HFC-2BS0 based cards irq, io
+ 14 Teles 16.3c PnP irq, io
+ 15 Sedlbauer Speed Card irq, io
+ 15 Sedlbauer PC/104 irq, io
+ 15 Sedlbauer Speed PCI no parameter
+ 16 USR Sportster internal irq, io
+ 17 MIC card irq, io
+ 18 ELSA Quickstep 1000PCI no parameter
+ 19 Compaq ISDN S0 ISA card irq, io0, io1, io (from isapnp setup io=IO2)
+ 20 NETjet PCI card no parameter
+ 21 Teles PCI no parameter
+ 22 Sedlbauer Speed Star (PCMCIA) irq, io (set with card manager)
+ 24 Dr. Neuhaus Niccy PnP irq, io0, io1 (from isapnp setup)
+ 24 Dr. Neuhaus Niccy PCI no parameter
+ 25 Teles S0Box irq, io (of the used lpt port)
+ 26 AVM A1 PCMCIA (Fritz!) irq, io (set with card manager)
+ 27 AVM PnP (Fritz!PnP) irq, io (from isapnp setup)
+ 27 AVM PCI (Fritz!PCI) no parameter
+ 28 Sedlbauer Speed Fax+ irq, io (from isapnp setup)
+ 29 Siemens I-Surf 1.0 irq, io, memory (from isapnp setup)
+ 30 ACER P10 irq, io (from isapnp setup)
+ 31 HST Saphir irq, io
+ 32 Telekom A4T none
+ 33 Scitel Quadro subcontroller (4*S0, subctrl 1...4)
+ 34 Gazel ISDN cards (ISA) irq,io
+ 34 Gazel ISDN cards (PCI) none
+ 35 HFC 2BDS0 PCI none
+ 36 W6692 based PCI cards none
+ 37 HFC 2BDS0 S+, SP irq,io
+ 38 NETspider U PCI card none
+ 39 HFC 2BDS0 SP/PCMCIA irq,io (set with cardmgr)
+ 40 hotplug interface
+ 41 Formula-n enter:now PCI none
+
+At the moment IRQ sharing is only possible with PCI cards. Please make sure
+that your IRQ is free and enabled for ISA use.
+
+
+Examples for module loading
+
+1. Teles 16.3, Euro ISDN, I/O base 280 hex, IRQ 10
+ modprobe hisax type=3 protocol=2 io=0x280 irq=10
+
+2. Teles 16.0, 1TR6 ISDN, I/O base d80 hex, IRQ 5, Memory d0000 hex
+ modprobe hisax protocol=1 type=1 io=0xd80 mem=0xd0000 irq=5
+
+3. Fritzcard, Euro ISDN, I/O base 340 hex, IRQ 10 and ELSA PCF, Euro ISDN
+ modprobe hisax type=5,6 protocol=2,2 io=0x340 irq=10 id=Fritz%Elsa
+
+4. Any ELSA PCC/PCF card, Euro ISDN
+ modprobe hisax type=6 protocol=2
+
+5. Teles 16.3 PnP, Euro ISDN, with isapnp configured
+ isapnp config: (INT 0 (IRQ 10 (MODE +E)))
+ (IO 0 (BASE 0x0580))
+ (IO 1 (BASE 0x0180))
+ modprobe hisax type=4 protocol=2 irq=10 io0=0x580 io1=0x180
+
+ In the current version of HiSax, you can instead simply use
+
+ modprobe hisax type=4 protocol=2
+
+ if you configured your kernel for ISAPnP. Don't run isapnp in
+ this case!
+
+6. Teles 16.3, Euro ISDN, I/O base 280 hex, IRQ 12 and
+ Teles 16.0, 1TR6, IRQ 5, Memory d0000 hex
+ modprobe hisax type=3,1 protocol=2,1 io=0x280 mem=0,0xd0000
+
+ Please note the dummy 0 memory address for the Teles 16.3, used as a
+ placeholder as described above, in the last example.
+
+7. Teles PCMCIA, Euro ISDN, I/O base 180 hex, IRQ 15 (default values)
+ modprobe hisax type=8 protocol=2 io=0x180 irq=15
+
+
+b) using LILO/LOADLIN, with the driver compiled directly into the kernel
+------------------------------------------------------------------------
+
+hisax=typ1,dp1,pa_1,pb_1,pc_1[,typ2,dp2,pa_2 ... \
+ typn,dpn,pa_n,pb_n,pc_n][,idstring1[,idstring2,...,idstringn]]
+
+where
+ typ1 = type of 1st card (default depends on kernel settings)
+ dp1 = D-Channel protocol of 1st card. 1=1TR6, 2=EDSS1, 3=leased
+ pa_1 = 1st parameter (depending on the type of the card)
+ pb_1 = 2nd parameter ( " " " " " " " )
+ pc_1 = 3rd parameter ( " " " " " " " )
+
+ typ2,dp2,pa_2,pb_2,pc_2 = Parameters of the second card (defaults: none)
+ typn,dpn,pa_n,pb_n,pc_n = Parameters of the n'th card (up to 16 cards are
+ supported)
+
+ idstring = Driver ID for accessing the particular card with utility
+ programs and for identification when using a line monitor
+ (default: "HiSax")
+
+ Note: the ID string must start with an alphabetical character!
+
+Card types:
+
+type
+ 1 Teles 16.0 pa=irq pb=membase pc=iobase
+ 2 Teles 8.0 pa=irq pb=membase
+ 3 Teles 16.3 pa=irq pb=iobase
+ 4 Creatix/Teles PNP ONLY WORKS AS A MODULE !
+ 5 AVM A1 (Fritz) pa=irq pb=iobase
+ 6 ELSA PCC/PCF cards pa=iobase or nothing for autodetect
+ 7 ELSA Quickstep 1000 ONLY WORKS AS A MODULE !
+ 8 Teles S0 PCMCIA pa=irq pb=iobase
+ 9 ITK ix1-micro Rev.2 pa=irq pb=iobase
+ 10 ELSA PCMCIA pa=irq, pb=io (set with card manager)
+ 11 Eicon.Diehl Diva ISAPnP ONLY WORKS AS A MODULE !
+ 11 Eicon.Diehl Diva PCI no parameter
+ 12 ASUS COM ISDNLink ONLY WORKS AS A MODULE !
+ 13 HFC-2BS0 based cards pa=irq pb=io
+ 14 Teles 16.3c PnP ONLY WORKS AS A MODULE !
+ 15 Sedlbauer Speed Card pa=irq pb=io (Speed Win only as module !)
+ 15 Sedlbauer PC/104 pa=irq pb=io
+ 15 Sedlbauer Speed PCI no parameter
+ 16 USR Sportster internal pa=irq pb=io
+ 17 MIC card pa=irq pb=io
+ 18 ELSA Quickstep 1000PCI no parameter
+ 19 Compaq ISDN S0 ISA card ONLY WORKS AS A MODULE !
+ 20 NETjet PCI card no parameter
+ 21 Teles PCI no parameter
+ 22 Sedlbauer Speed Star (PCMCIA) pa=irq, pb=io (set with card manager)
+ 24 Dr. Neuhaus Niccy PnP ONLY WORKS AS A MODULE !
+ 24 Dr. Neuhaus Niccy PCI no parameter
+ 25 Teles S0Box pa=irq, pb=io (of the used lpt port)
+ 26 AVM A1 PCMCIA (Fritz!) pa=irq, pb=io (set with card manager)
+ 27 AVM PnP (Fritz!PnP) ONLY WORKS AS A MODULE !
+ 27 AVM PCI (Fritz!PCI) no parameter
+ 28 Sedlbauer Speed Fax+ ONLY WORKS AS A MODULE !
+ 29 Siemens I-Surf 1.0 ONLY WORKS AS A MODULE !
+ 30 ACER P10 ONLY WORKS AS A MODULE !
+ 31 HST Saphir pa=irq, pb=io
+ 32 Telekom A4T no parameter
+ 33 Scitel Quadro subcontroller (4*S0, subctrl 1...4)
+ 34 Gazel ISDN cards (ISA) pa=irq, pb=io
+ 34 Gazel ISDN cards (PCI) no parameter
+ 35 HFC 2BDS0 PCI no parameter
+ 36 W6692 based PCI cards none
+ 37 HFC 2BDS0 S+,SP/PCMCIA ONLY WORKS AS A MODULE !
+ 38 NETspider U PCI card none
+ 39 HFC 2BDS0 SP/PCMCIA ONLY WORKS AS A MODULE !
+ 40 hotplug interface ONLY WORKS AS A MODULE !
+ 41 Formula-n enter:now PCI none
+
+Running the driver
+------------------
+
+When you insmod isdn.o and hisax.o (or with the in-kernel version, during
+boot time), a few lines should appear in your syslog. Look for something like:
+
+Apr 13 21:01:59 kke01 kernel: HiSax: Driver for Siemens chip set ISDN cards
+Apr 13 21:01:59 kke01 kernel: HiSax: Version 2.9
+Apr 13 21:01:59 kke01 kernel: HiSax: Revisions 1.14/1.9/1.10/1.25/1.8
+Apr 13 21:01:59 kke01 kernel: HiSax: Total 1 card defined
+Apr 13 21:01:59 kke01 kernel: HiSax: Card 1 Protocol EDSS1 Id=HiSax1 (0)
+Apr 13 21:01:59 kke01 kernel: HiSax: Elsa driver Rev. 1.13
+...
+Apr 13 21:01:59 kke01 kernel: Elsa: PCF-Pro found at 0x360 Rev.:C IRQ 10
+Apr 13 21:01:59 kke01 kernel: Elsa: timer OK; resetting card
+Apr 13 21:01:59 kke01 kernel: Elsa: HSCX version A: V2.1 B: V2.1
+Apr 13 21:01:59 kke01 kernel: Elsa: ISAC 2086/2186 V1.1
+...
+Apr 13 21:01:59 kke01 kernel: HiSax: DSS1 Rev. 1.14
+Apr 13 21:01:59 kke01 kernel: HiSax: 2 channels added
+
+This means that the card is ready for use.
+Cabling problems or line-downs are not detected, and only some ELSA cards can
+detect the S0 power.
+
+Remember that, according to the new strategy for accessing low-level drivers
+from within isdn4linux, you should also define a driver ID while doing
+insmod: Simply append hisax_id=<SomeString> to the insmod command line. This
+string MUST NOT start with a digit or a small 'x'!
+
+At this point you can run a 'cat /dev/isdnctrl0' and view debugging messages.
+
+At the moment, debugging messages are enabled with the hisaxctrl tool:
+
+ hisaxctrl <DriverId> DebugCmd <debugging_flags>
+
+<DriverId> default is HiSax, if you didn't specify one.
+
+DebugCmd is 1 for generic debugging
+ 11 for layer 1 development debugging
+ 13 for layer 3 development debugging
+
+where <debugging_flags> is the integer sum of the following debugging
+options you wish enabled:
+
+With DebugCmd set to 1:
+
+ 0x0001 Link-level <--> hardware-level communication
+ 0x0002 Top state machine
+ 0x0004 D-Channel Frames for isdnlog
+ 0x0008 D-Channel Q.921
+ 0x0010 B-Channel X.75
+ 0x0020 D-Channel l2
+ 0x0040 B-Channel l2
+ 0x0080 D-Channel link state debugging
+ 0x0100 B-Channel link state debugging
+ 0x0200 TEI debug
+ 0x0400 LOCK debug in callc.c
+ 0x0800 More paranoid debug in callc.c (not for normal use)
+ 0x1000 D-Channel l1 state debugging
+ 0x2000 B-Channel l1 state debugging
+
+With DebugCmd set to 11:
+
+ 0x0001 Warnings (default: on)
+ 0x0002 IRQ status
+ 0x0004 ISAC
+ 0x0008 ISAC FIFO
+ 0x0010 HSCX
+ 0x0020 HSCX FIFO (attention: full B-Channel output!)
+ 0x0040 D-Channel LAPD frame types
+ 0x0080 IPAC debug
+ 0x0100 HFC receive debug
+ 0x0200 ISAC monitor debug
+ 0x0400 D-Channel frames for isdnlog (set with 1 0x4 too)
+ 0x0800 D-Channel message verbose
+
+With DebugCmd set to 13:
+
+ 1 Warnings (default: on)
+ 2 l3 protocol descriptor errors
+ 4 l3 state machine
+ 8 charge info debugging (1TR6)
+
+For example, 'hisaxctrl HiSax 1 0x3ff' enables full generic debugging.
+
+Because of some obscure problems with some switch equipment, the delay
+between the CONNECT message and sending the first data on the B-channel is now
+configurable with
+
+hisaxctrl <DriverId> 2 <delay>
+<delay> in ms Value between 50 and 800 ms is recommended.
+
+Downloading Firmware
+--------------------
+At the moment, the Sedlbauer speed fax+ is the only card, which
+needs to download firmware.
+The firmware is downloaded with the hisaxctrl tool:
+
+ hisaxctrl <DriverId> 9 <firmware_filename>
+
+<DriverId> default is HiSax, if you didn't specify one,
+
+where <firmware_filename> is the filename of the firmware file.
+
+For example, 'hisaxctrl HiSax 9 ISAR.BIN' downloads the firmware for
+ISAR based cards (like the Sedlbauer speed fax+).
+
+Warning
+-------
+HiSax is a work in progress and may crash your machine.
+For certification look at HiSax.cert file.
+
+Limitations
+-----------
+At this time, HiSax only works on Euro ISDN lines and German 1TR6 lines.
+For leased lines see appendix.
+
+Bugs
+----
+If you find any, please let me know.
+
+
+Thanks
+------
+Special thanks to:
+
+ Emil Stephan for the name HiSax which is a mix of HSCX and ISAC.
+
+ Fritz Elfert, Jan den Ouden, Michael Hipp, Michael Wein,
+ Andreas Kool, Pekka Sarnila, Sim Yskes, Johan Myrre'en,
+ Klaus-Peter Nischke (ITK AG), Christof Petig, Werner Fehn (ELSA GmbH),
+ Volker Schmidt
+ Edgar Toernig and Marcus Niemann for the Sedlbauer driver
+ Stephan von Krawczynski
+ Juergen Quade for the Leased Line part
+ Klaus Lichtenwalder (Klaus.Lichtenwalder@WebForum.DE), for ELSA PCMCIA support
+ Enrik Berkhan (enrik@starfleet.inka.de) for S0BOX specific stuff
+ Ton van Rosmalen for Teles PCI
+ Petr Novak <petr.novak@i.cz> for Winbond W6692 support
+ Werner Cornelius <werner@isdn4linux.de> for HFC-PCI, HFC-S(+/P) and supplementary services support
+ and more people who are hunting bugs. (If I forgot somebody, please
+ send me a mail).
+
+ Firma ELSA GmbH
+ Firma Eicon.Diehl GmbH
+ Firma Dynalink NL
+ Firma ASUSCOM NETWORK INC. Taiwan
+ Firma S.u.S.E
+ Firma ith Kommunikationstechnik GmbH
+ Firma Traverse Technologie Australia
+ Firma Medusa GmbH (www.medusa.de).
+ Firma Quant-X Austria for sponsoring a DEC Alpha board+CPU
+ Firma Cologne Chip Designs GmbH
+
+ My girl friend and partner in life Ute for her patience with me.
+
+
+Enjoy,
+
+Karsten Keil
+keil@isdn4linux.de
+
+
+Appendix: Teles PCMCIA driver
+-----------------------------
+
+See
+ http://www.linux.no/teles_cs.txt
+for instructions.
+
+Appendix: Linux and ISDN-leased lines
+-------------------------------------
+
+Original from Juergen Quade, new version KKe.
+
+Attention NEW VERSION, the old leased line syntax won't work !!!
+
+You can use HiSax to connect your Linux-Box via an ISDN leased line
+to e.g. the Internet:
+
+1. Build a kernel which includes the HiSax driver either as a module
+ or as part of the kernel.
+ cd /usr/src/linux
+ make menuconfig
+ <ISDN subsystem - ISDN support -- HiSax>
+ make clean; make zImage; make modules; make modules_install
+2. Install the new kernel
+ cp /usr/src/linux/arch/x86/boot/zImage /etc/kernel/linux.isdn
+ vi /etc/lilo.conf
+ <add new kernel in the bootable image section>
+ lilo
+3. in case the hisax driver is a "fixed" part of the kernel, configure
+ the driver with lilo:
+ vi /etc/lilo.conf
+ <add HiSax driver parameter in the global section (see below)>
+ lilo
+ Your lilo.conf _might_ look like the following:
+
+ # LILO configuration-file
+ # global section
+ # teles 16.0 on IRQ=5, MEM=0xd8000, PORT=0xd80
+ append="hisax=1,3,5,0xd8000,0xd80,HiSax"
+ # teles 16.3 (non pnp) on IRQ=15, PORT=0xd80
+ # append="hisax=3,3,5,0xd8000,0xd80,HiSax"
+ boot=/dev/sda
+ compact # faster, but won't work on all systems.
+ linear
+ read-only
+ prompt
+ timeout=100
+ vga = normal # force sane state
+ # Linux bootable partition config begins
+ image = /etc/kernel/linux.isdn
+ root = /dev/sda1
+ label = linux.isdn
+ #
+ image = /etc/kernel/linux-2.0.30
+ root = /dev/sda1
+ label = linux.secure
+
+ In the line starting with "append" you have to adapt the parameters
+ according to your card (see above in this file)
+
+3. boot the new linux.isdn kernel
+4. start the ISDN subsystem:
+ a) load - if necessary - the modules (depends, whether you compiled
+ the ISDN driver as module or not)
+ According to the type of card you have to specify the necessary
+ driver parameter (irq, io, mem, type, protocol).
+ For the leased line the protocol is "3". See the table above for
+ the parameters, which you have to specify depending on your card.
+ b) configure i4l
+ /sbin/isdnctrl addif isdn0
+ # EAZ 1 -- B1 channel 2 --B2 channel
+ /sbin/isdnctrl eaz isdn0 1
+ /sbin/isdnctrl secure isdn0 on
+ /sbin/isdnctrl huptimeout isdn0 0
+ /sbin/isdnctrl l2_prot isdn0 hdlc
+ # Attention you must not set an outgoing number !!! This won't work !!!
+ # The incoming number is LEASED0 for the first card, LEASED1 for the
+ # second and so on.
+ /sbin/isdnctrl addphone isdn0 in LEASED0
+ # Here is no need to bind the channel.
+ c) in case the remote partner is a CISCO:
+ /sbin/isdnctrl encap isdn0 cisco-h
+ d) configure the interface
+ /sbin/ifconfig isdn0 ${LOCAL_IP} pointopoint ${REMOTE_IP}
+ e) set the routes
+ /sbin/route add -host ${REMOTE_IP} isdn0
+ /sbin/route add default gw ${REMOTE_IP}
+ f) switch the card into leased mode for each used B-channel
+ /sbin/hisaxctrl HiSax 5 1
+
+Remarks:
+a) Use state of the art isdn4k-utils
+
+Here an example script:
+#!/bin/sh
+# Start/Stop ISDN leased line connection
+
+I4L_AS_MODULE=yes
+I4L_REMOTE_IS_CISCO=no
+I4L_MODULE_PARAMS="type=16 io=0x268 irq=7 "
+I4L_DEBUG=no
+I4L_LEASED_128K=yes
+LOCAL_IP=192.168.1.1
+REMOTE_IP=192.168.2.1
+
+case "$1" in
+ start)
+ echo "Starting ISDN ..."
+ if [ ${I4L_AS_MODULE} = "yes" ]; then
+ echo "loading modules..."
+ /sbin/modprobe hisax ${I4L_MODULE_PARAMS}
+ fi
+ # configure interface
+ /sbin/isdnctrl addif isdn0
+ /sbin/isdnctrl secure isdn0 on
+ if [ ${I4L_DEBUG} = "yes" ]; then
+ /sbin/isdnctrl verbose 7
+ /sbin/hisaxctrl HiSax 1 0xffff
+ /sbin/hisaxctrl HiSax 11 0xff
+ cat /dev/isdnctrl >/tmp/lea.log &
+ fi
+ if [ ${I4L_REMOTE_IS_CISCO} = "yes" ]; then
+ /sbin/isdnctrl encap isdn0 cisco-h
+ fi
+ /sbin/isdnctrl huptimeout isdn0 0
+ # B-CHANNEL 1
+ /sbin/isdnctrl eaz isdn0 1
+ /sbin/isdnctrl l2_prot isdn0 hdlc
+ # 1. card
+ /sbin/isdnctrl addphone isdn0 in LEASED0
+ if [ ${I4L_LEASED_128K} = "yes" ]; then
+ /sbin/isdnctrl addslave isdn0 isdn0s
+ /sbin/isdnctrl secure isdn0s on
+ /sbin/isdnctrl huptimeout isdn0s 0
+ # B-CHANNEL 2
+ /sbin/isdnctrl eaz isdn0s 2
+ /sbin/isdnctrl l2_prot isdn0s hdlc
+ # 1. card
+ /sbin/isdnctrl addphone isdn0s in LEASED0
+ if [ ${I4L_REMOTE_IS_CISCO} = "yes" ]; then
+ /sbin/isdnctrl encap isdn0s cisco-h
+ fi
+ fi
+ /sbin/isdnctrl dialmode isdn0 manual
+ # configure tcp/ip
+ /sbin/ifconfig isdn0 ${LOCAL_IP} pointopoint ${REMOTE_IP}
+ /sbin/route add -host ${REMOTE_IP} isdn0
+ /sbin/route add default gw ${REMOTE_IP}
+ # switch to leased mode
+ # B-CHANNEL 1
+ /sbin/hisaxctrl HiSax 5 1
+ if [ ${I4L_LEASED_128K} = "yes" ]; then
+ # B-CHANNEL 2
+ sleep 10; /* Wait for master */
+ /sbin/hisaxctrl HiSax 5 2
+ fi
+ ;;
+ stop)
+ /sbin/ifconfig isdn0 down
+ /sbin/isdnctrl delif isdn0
+ if [ ${I4L_DEBUG} = "yes" ]; then
+ killall cat
+ fi
+ if [ ${I4L_AS_MODULE} = "yes" ]; then
+ /sbin/rmmod hisax
+ /sbin/rmmod isdn
+ /sbin/rmmod ppp
+ /sbin/rmmod slhc
+ fi
+ ;;
+ *)
+ echo "Usage: $0 {start|stop}"
+ exit 1
+esac
+exit 0
diff --git a/Documentation/isdn/README.act2000 b/Documentation/isdn/README.act2000
new file mode 100644
index 000000000..ce7115e7f
--- /dev/null
+++ b/Documentation/isdn/README.act2000
@@ -0,0 +1,104 @@
+$Id: README.act2000,v 1.3 2000/08/06 09:22:51 armin Exp $
+
+This document describes the ACT2000 driver for the
+IBM Active 2000 ISDN card.
+
+There are 3 Types of this card available. A ISA-, MCA-, and PCMCIA-Bus
+Version. Currently, only the ISA-Bus version of the card is supported.
+However MCA and PCMCIA will follow soon.
+
+The ISA-Bus Version uses 8 IO-ports. The base port address has to be set
+manually using the DIP switches.
+
+Setting up the DIP switches for the IBM Active 2000 ISDN card:
+
+ Note: S5 and S6 always set off!
+
+ S1 S2 S3 S4 Base-port
+ on on on on 0x0200 (Factory default)
+ off on on on 0x0240
+ on off on on 0x0280
+ off off on on 0x02c0
+ on on off on 0x0300
+ off on off on 0x0340
+ on off off on 0x0380
+ on on on off 0xcfe0
+ off on on off 0xcfa0
+ on off on off 0xcf60
+ off off on off 0xcf20
+ on on off off 0xcee0
+ off on off off 0xcea0
+ on off off off 0xce60
+ off off off off Card disabled
+
+IRQ is configured by software. Possible values are:
+
+ 3, 5, 7, 10, 11, 12, 15 and none (polled mode)
+
+
+The ACT2000 driver may either be built into the kernel or as a module.
+Initialization depends on how the driver is built:
+
+Driver built into the kernel:
+
+ The ACT2000 driver can be configured using the commandline-feature while
+ loading the kernel with LILO or LOADLIN. It accepts the following syntax:
+
+ act2000=b,p,i[,idstring]
+
+ where
+
+ b = Bus-Type (1=ISA, 2=MCA, 3=PCMCIA)
+ p = portbase (-1 means autoprobe)
+ i = Interrupt (-1 means use next free IRQ, 0 means polled mode)
+
+ The idstring is an arbitrary string used for referencing the card
+ by the actctrl tool later.
+
+ Defaults used, when no parameters given at all:
+
+ 1,-1,-1,""
+
+ which means: Autoprobe for an ISA card, use next free IRQ, let the
+ ISDN linklevel fill the IdString (usually "line0" for the first card).
+
+ If you like to use more than one card, you can use the program
+ "actctrl" from the utility-package to configure additional cards.
+
+ Using the "actctrl"-utility, portbase and irq can also be changed
+ during runtime. The D-channel protocol is configured by the "dproto"
+ option of the "actctrl"-utility after loading the firmware into the
+ card's memory using the "actctrl"-utility.
+
+Driver built as module:
+
+ The module act2000.o can be configured during modprobe (insmod) by
+ appending its parameters to the modprobe resp. insmod commandline.
+ The following syntax is accepted:
+
+ act_bus=b act_port=p act_irq=i act_id=idstring
+
+ where b, p, i and idstring have the same meanings as the parameters
+ described for the builtin version above.
+
+ Using the "actctrl"-utility, the same features apply to the modularized
+ version as to the kernel-builtin one. (i.e. loading of firmware and
+ configuring the D-channel protocol)
+
+Loading the firmware into the card:
+
+ The firmware is supplied together with the isdn4k-utils package. It
+ can be found in the subdirectory act2000/firmware/
+
+ Assuming you have installed the utility-package correctly, the firmware
+ will be downloaded into the card using the following command:
+
+ actctrl -d idstring load /etc/isdn/bip11.btl
+
+ where idstring is the Name of the card, given during insmod-time or
+ (for kernel-builtin driver) on the kernel commandline. If only one
+ ISDN card is used, the -d isdstrin may be omitted.
+
+ For further documentation (adding more IBM Active 2000 cards), refer to
+ the manpage actctrl.8 which is included in the isdn4k-utils package.
+
diff --git a/Documentation/isdn/README.audio b/Documentation/isdn/README.audio
new file mode 100644
index 000000000..8ebca1929
--- /dev/null
+++ b/Documentation/isdn/README.audio
@@ -0,0 +1,138 @@
+$Id: README.audio,v 1.8 1999/07/11 17:17:29 armin Exp $
+
+ISDN subsystem for Linux.
+ Description of audio mode.
+
+When enabled during kernel configuration, the tty emulator of the ISDN
+subsystem is capable of a reduced set of commands to support audio.
+This document describes the commands supported and the format of
+audio data.
+
+Commands for enabling/disabling audio mode:
+
+ AT+FCLASS=8 Enable audio mode.
+ This affects the following registers:
+ S18: Bits 0 and 2 are set.
+ S16: Set to 48 and any further change to
+ larger values is blocked.
+ AT+FCLASS=0 Disable audio mode.
+ Register 18 is set to 4.
+ AT+FCLASS=? Show possible modes.
+ AT+FCLASS? Report current mode (0 or 8).
+
+Commands supported in audio mode:
+
+All audio mode commands have one of the following forms:
+
+ AT+Vxx? Show current setting.
+ AT+Vxx=? Show possible settings.
+ AT+Vxx=v Set simple parameter.
+ AT+Vxx=v,v ... Set complex parameter.
+
+where xx is a two-character code and v are alphanumerical parameters.
+The following commands are supported:
+
+ AT+VNH=x Auto hangup setting. NO EFFECT, supported
+ for compatibility only.
+ AT+VNH? Always reporting "1"
+ AT+VNH=? Always reporting "1"
+
+ AT+VIP Reset all audio parameters.
+
+ AT+VLS=x Line select. x is one of the following:
+ 0 = No device.
+ 2 = Phone line.
+ AT+VLS=? Always reporting "0,2"
+ AT+VLS? Show current line.
+
+ AT+VRX Start recording. Emulator responds with
+ CONNECT and starts sending audio data to
+ the application. See below for data format
+
+ AT+VSD=x,y Set silence-detection parameters.
+ Possible parameters:
+ x = 0 ... 31 sensitivity threshold level.
+ (default 0 , deactivated)
+ y = 0 ... 255 range of interval in units
+ of 0.1 second. (default 70)
+ AT+VSD=? Report possible parameters.
+ AT+VSD? Show current parameters.
+
+ AT+VDD=x,y Set DTMF-detection parameters.
+ Only possible if online and during this connection.
+ Possible parameters:
+ x = 0 ... 15 sensitivity threshold level.
+ (default 0 , I4L soft-decode)
+ (1-15 soft-decode off, hardware on)
+ y = 0 ... 255 tone duration in units of 5ms.
+ Not for I4L soft decode (default 8, 40ms)
+ AT+VDD=? Report possible parameters.
+ AT+VDD? Show current parameters.
+
+ AT+VSM=x Select audio data format.
+ Possible parameters:
+ 2 = ADPCM-2
+ 3 = ADPCM-3
+ 4 = ADPCM-4
+ 5 = aLAW
+ 6 = uLAW
+ AT+VSM=? Show possible audio formats.
+
+ AT+VTX Start audio playback. Emulator responds
+ with CONNECT and starts sending audio data
+ received from the application via phone line.
+General behavior and description of data formats/protocol.
+ when a connection is made:
+
+ On incoming calls, if the application responds to a RING
+ with ATA, depending on the calling service, the emulator
+ responds with either CONNECT (data call) or VCON (voice call).
+
+ On outgoing voice calls, the emulator responds with VCON
+ upon connection setup.
+
+ Audio recording.
+
+ When receiving audio data, a kind of bisync protocol is used.
+ Upon AT+VRX command, the emulator responds with CONNECT, and
+ starts sending audio data to the application. There are several
+ escape sequences defined, all using DLE (0x10) as Escape char:
+
+ <DLE><ETX> End of audio data. (i.e. caused by a
+ hangup of the remote side) Emulator stops
+ recording, responding with VCON.
+ <DLE><DC4> Abort recording, (send by appl.) Emulator
+ stops recording, sends DLE,ETX.
+ <DLE><DLE> Escape sequence for DLE in data stream.
+ <DLE>0 Touchtone "0" received.
+ ...
+ <DLE>9 Touchtone "9" received.
+ <DLE># Touchtone "#" received.
+ <DLE>* Touchtone "*" received.
+ <DLE>A Touchtone "A" received.
+ <DLE>B Touchtone "B" received.
+ <DLE>C Touchtone "C" received.
+ <DLE>D Touchtone "D" received.
+
+ <DLE>q quiet. Silence detected after non-silence.
+ <DLE>s silence. Silence detected from the
+ start of recording.
+
+ Currently unsupported DLE sequences:
+
+ <DLE>c FAX calling tone received.
+ <DLE>b busy tone received.
+
+ Audio playback.
+
+ When sending audio data, upon AT+VTX command, emulator responds with
+ CONNECT, and starts transferring data from application to the phone line.
+ The same DLE sequences apply to this mode.
+
+ Full-Duplex-Audio:
+
+ When _both_ commands for recording and playback are given in _one_
+ AT-command-line (i.e.: "AT+VTX+VRX"), full-duplex-mode is selected.
+ In this mode, the only way to stop recording is sending <DLE><DC4>
+ and the only way to stop playback is to send <DLE><ETX>.
+
diff --git a/Documentation/isdn/README.avmb1 b/Documentation/isdn/README.avmb1
new file mode 100644
index 000000000..9e075484e
--- /dev/null
+++ b/Documentation/isdn/README.avmb1
@@ -0,0 +1,187 @@
+Driver for active AVM Controller.
+
+The driver provides a kernel capi2.0 Interface (kernelcapi) and
+on top of this a User-Level-CAPI2.0-interface (capi)
+and a driver to connect isdn4linux with CAPI2.0 (capidrv).
+The lowlevel interface can be used to implement a CAPI2.0
+also for passive cards since July 1999.
+
+The author can be reached at calle@calle.in-berlin.de.
+The command avmcapictrl is part of the isdn4k-utils.
+t4-files can be found at ftp://ftp.avm.de/cardware/b1/linux/firmware
+
+Currently supported cards:
+ B1 ISA (all versions)
+ B1 PCI
+ T1/T1B (HEMA card)
+ M1
+ M2
+ B1 PCMCIA
+
+Installing
+----------
+
+You need at least /dev/capi20 to load the firmware.
+
+mknod /dev/capi20 c 68 0
+mknod /dev/capi20.00 c 68 1
+mknod /dev/capi20.01 c 68 2
+.
+.
+.
+mknod /dev/capi20.19 c 68 20
+
+Running
+-------
+
+To use the card you need the t4-files to download the firmware.
+AVM GmbH provides several t4-files for the different D-channel
+protocols (b1.t4 for Euro-ISDN). Install these file in /lib/isdn.
+
+if you configure as modules load the modules this way:
+
+insmod /lib/modules/current/misc/capiutil.o
+insmod /lib/modules/current/misc/b1.o
+insmod /lib/modules/current/misc/kernelcapi.o
+insmod /lib/modules/current/misc/capidrv.o
+insmod /lib/modules/current/misc/capi.o
+
+if you have an B1-PCI card load the module b1pci.o
+insmod /lib/modules/current/misc/b1pci.o
+and load the firmware with
+avmcapictrl load /lib/isdn/b1.t4 1
+
+if you have an B1-ISA card load the module b1isa.o
+and add the card by calling
+avmcapictrl add 0x150 15
+and load the firmware by calling
+avmcapictrl load /lib/isdn/b1.t4 1
+
+if you have an T1-ISA card load the module t1isa.o
+and add the card by calling
+avmcapictrl add 0x450 15 T1 0
+and load the firmware by calling
+avmcapictrl load /lib/isdn/t1.t4 1
+
+if you have an PCMCIA card (B1/M1/M2) load the module b1pcmcia.o
+before you insert the card.
+
+Leased Lines with B1
+--------------------
+Init card and load firmware.
+For an D64S use "FV: 1" as phone number
+For an D64S2 use "FV: 1" and "FV: 2" for multilink
+or "FV: 1,2" to use CAPI channel bundling.
+
+/proc-Interface
+-----------------
+
+/proc/capi:
+ dr-xr-xr-x 2 root root 0 Jul 1 14:03 .
+ dr-xr-xr-x 82 root root 0 Jun 30 19:08 ..
+ -r--r--r-- 1 root root 0 Jul 1 14:03 applications
+ -r--r--r-- 1 root root 0 Jul 1 14:03 applstats
+ -r--r--r-- 1 root root 0 Jul 1 14:03 capi20
+ -r--r--r-- 1 root root 0 Jul 1 14:03 capidrv
+ -r--r--r-- 1 root root 0 Jul 1 14:03 controller
+ -r--r--r-- 1 root root 0 Jul 1 14:03 contrstats
+ -r--r--r-- 1 root root 0 Jul 1 14:03 driver
+ -r--r--r-- 1 root root 0 Jul 1 14:03 ncci
+ -r--r--r-- 1 root root 0 Jul 1 14:03 users
+
+/proc/capi/applications:
+ applid level3cnt datablkcnt datablklen ncci-cnt recvqueuelen
+ level3cnt: capi_register parameter
+ datablkcnt: capi_register parameter
+ ncci-cnt: current number of nccis (connections)
+ recvqueuelen: number of messages on receive queue
+ for example:
+1 -2 16 2048 1 0
+2 2 7 2048 1 0
+
+/proc/capi/applstats:
+ applid recvctlmsg nrecvdatamsg nsentctlmsg nsentdatamsg
+ recvctlmsg: capi messages received without DATA_B3_IND
+ recvdatamsg: capi DATA_B3_IND received
+ sentctlmsg: capi messages sent without DATA_B3_REQ
+ sentdatamsg: capi DATA_B3_REQ sent
+ for example:
+1 2057 1699 1721 1699
+
+/proc/capi/capi20: statistics of capi.o (/dev/capi20)
+ minor nopen nrecvdropmsg nrecvctlmsg nrecvdatamsg sentctlmsg sentdatamsg
+ minor: minor device number of capi device
+ nopen: number of calls to devices open
+ nrecvdropmsg: capi messages dropped (messages in recvqueue in close)
+ nrecvctlmsg: capi messages received without DATA_B3_IND
+ nrecvdatamsg: capi DATA_B3_IND received
+ nsentctlmsg: capi messages sent without DATA_B3_REQ
+ nsentdatamsg: capi DATA_B3_REQ sent
+
+ for example:
+1 2 18 0 16 2
+
+/proc/capi/capidrv: statistics of capidrv.o (capi messages)
+ nrecvctlmsg nrecvdatamsg sentctlmsg sentdatamsg
+ nrecvctlmsg: capi messages received without DATA_B3_IND
+ nrecvdatamsg: capi DATA_B3_IND received
+ nsentctlmsg: capi messages sent without DATA_B3_REQ
+ nsentdatamsg: capi DATA_B3_REQ sent
+ for example:
+2780 2226 2256 2226
+
+/proc/capi/controller:
+ controller drivername state cardname controllerinfo
+ for example:
+1 b1pci running b1pci-e000 B1 3.07-01 0xe000 19
+2 t1isa running t1isa-450 B1 3.07-01 0x450 11 0
+3 b1pcmcia running m2-150 B1 3.07-01 0x150 5
+
+/proc/capi/contrstats:
+ controller nrecvctlmsg nrecvdatamsg sentctlmsg sentdatamsg
+ nrecvctlmsg: capi messages received without DATA_B3_IND
+ nrecvdatamsg: capi DATA_B3_IND received
+ nsentctlmsg: capi messages sent without DATA_B3_REQ
+ nsentdatamsg: capi DATA_B3_REQ sent
+ for example:
+1 2845 2272 2310 2274
+2 2 0 2 0
+3 2 0 2 0
+
+/proc/capi/driver:
+ drivername ncontroller
+ for example:
+b1pci 1
+t1isa 1
+b1pcmcia 1
+b1isa 0
+
+/proc/capi/ncci:
+ apllid ncci winsize sendwindow
+ for example:
+1 0x10101 8 0
+
+/proc/capi/users: kernelmodules that use the kernelcapi.
+ name
+ for example:
+capidrv
+capi20
+
+Questions
+---------
+Check out the FAQ (ftp.isdn4linux.de) or subscribe to the
+linux-avmb1@calle.in-berlin.de mailing list by sending
+a mail to majordomo@calle.in-berlin.de with
+subscribe linux-avmb1
+in the body.
+
+German documentation and several scripts can be found at
+ftp://ftp.avm.de/cardware/b1/linux/
+
+Bugs
+----
+If you find any please let me know.
+
+Enjoy,
+
+Carsten Paeth (calle@calle.in-berlin.de)
diff --git a/Documentation/isdn/README.concap b/Documentation/isdn/README.concap
new file mode 100644
index 000000000..a76d74845
--- /dev/null
+++ b/Documentation/isdn/README.concap
@@ -0,0 +1,259 @@
+Description of the "concap" encapsulation protocol interface
+============================================================
+
+The "concap" interface is intended to be used by network device
+drivers that need to process an encapsulation protocol.
+It is assumed that the protocol interacts with a linux network device by
+- data transmission
+- connection control (establish, release)
+Thus, the mnemonic: "CONnection CONtrolling eNCAPsulation Protocol".
+
+This is currently only used inside the isdn subsystem. But it might
+also be useful to other kinds of network devices. Thus, if you want
+to suggest changes that improve usability or performance of the
+interface, please let me know. I'm willing to include them in future
+releases (even if I needed to adapt the current isdn code to the
+changed interface).
+
+
+Why is this useful?
+===================
+
+The encapsulation protocol used on top of WAN connections or permanent
+point-to-point links are frequently chosen upon bilateral agreement.
+Thus, a device driver for a certain type of hardware must support
+several different encapsulation protocols at once.
+
+The isdn device driver did already support several different
+encapsulation protocols. The encapsulation protocol is configured by a
+user space utility (isdnctrl). The isdn network interface code then
+uses several case statements which select appropriate actions
+depending on the currently configured encapsulation protocol.
+
+In contrast, LAN network interfaces always used a single encapsulation
+protocol which is unique to the hardware type of the interface. The LAN
+encapsulation is usually done by just sticking a header on the data. Thus,
+traditional linux network device drivers used to process the
+encapsulation protocol directly (usually by just providing a hard_header()
+method in the device structure) using some hardware type specific support
+functions. This is simple, direct and efficient. But it doesn't fit all
+the requirements for complex WAN encapsulations.
+
+
+ The configurability of the encapsulation protocol to be used
+ makes isdn network interfaces more flexible, but also much more
+ complex than traditional lan network interfaces.
+
+
+Many Encapsulation protocols used on top of WAN connections will not just
+stick a header on the data. They also might need to set up or release
+the WAN connection. They also might want to send other data for their
+private purpose over the wire, e.g. ppp does a lot of link level
+negotiation before the first piece of user data can be transmitted.
+Such encapsulation protocols for WAN devices are typically more complex
+than encapsulation protocols for lan devices. Thus, network interface
+code for typical WAN devices also tends to be more complex.
+
+
+In order to support Linux' x25 PLP implementation on top of
+isdn network interfaces I could have introduced yet another branch to
+the various case statements inside drivers/isdn/isdn_net.c.
+This eventually made isdn_net.c even more complex. In addition, it made
+isdn_net.c harder to maintain. Thus, by identifying an abstract
+interface between the network interface code and the encapsulation
+protocol, complexity could be reduced and maintainability could be
+increased.
+
+
+Likewise, a similar encapsulation protocol will frequently be needed by
+several different interfaces of even different hardware type, e.g. the
+synchronous ppp implementation used by the isdn driver and the
+asynchronous ppp implementation used by the ppp driver have a lot of
+similar code in them. By cleanly separating the encapsulation protocol
+from the hardware specific interface stuff such code could be shared
+better in future.
+
+
+When operating over dial-up-connections (e.g. telephone lines via modem,
+non-permanent virtual circuits of wide area networks, ISDN) many
+encapsulation protocols will need to control the connection. Therefore,
+some basic connection control primitives are supported. The type and
+semantics of the connection (i.e the ISO layer where connection service
+is provided) is outside our scope and might be different depending on
+the encapsulation protocol used, e.g. for a ppp module using our service
+on top of a modem connection a connect_request will result in dialing
+a (somewhere else configured) remote phone number. For an X25-interface
+module (LAPB semantics, as defined in Documentation/networking/x25-iface.txt)
+a connect_request will ask for establishing a reliable lapb
+datalink connection.
+
+
+The encapsulation protocol currently provides the following
+service primitives to the network device.
+
+- create a new encapsulation protocol instance
+- delete encapsulation protocol instance and free all its resources
+- initialize (open) the encapsulation protocol instance for use.
+- deactivate (close) an encapsulation protocol instance.
+- process (xmit) data handed down by upper protocol layer
+- receive data from lower (hardware) layer
+- process connect indication from lower (hardware) layer
+- process disconnect indication from lower (hardware) layer
+
+
+The network interface driver accesses those primitives via callbacks
+provided by the encapsulation protocol instance within a
+struct concap_proto_ops.
+
+struct concap_proto_ops{
+
+ /* create a new encapsulation protocol instance of same type */
+ struct concap_proto * (*proto_new) (void);
+
+ /* delete encapsulation protocol instance and free all its resources.
+ cprot may no longer be referenced after calling this */
+ void (*proto_del)(struct concap_proto *cprot);
+
+ /* initialize the protocol's data. To be called at interface startup
+ or when the device driver resets the interface. All services of the
+ encapsulation protocol may be used after this*/
+ int (*restart)(struct concap_proto *cprot,
+ struct net_device *ndev,
+ struct concap_device_ops *dops);
+
+ /* deactivate an encapsulation protocol instance. The encapsulation
+ protocol may not call any *dops methods after this. */
+ int (*close)(struct concap_proto *cprot);
+
+ /* process a frame handed down to us by upper layer */
+ int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb);
+
+ /* to be called for each data entity received from lower layer*/
+ int (*data_ind)(struct concap_proto *cprot, struct sk_buff *skb);
+
+ /* to be called when a connection was set up/down.
+ Protocols that don't process these primitives might fill in
+ dummy methods here */
+ int (*connect_ind)(struct concap_proto *cprot);
+ int (*disconn_ind)(struct concap_proto *cprot);
+};
+
+
+The data structures are defined in the header file include/linux/concap.h.
+
+
+A Network interface using encapsulation protocols must also provide
+some service primitives to the encapsulation protocol:
+
+- request data being submitted by lower layer (device hardware)
+- request a connection being set up by lower layer
+- request a connection being released by lower layer
+
+The encapsulation protocol accesses those primitives via callbacks
+provided by the network interface within a struct concap_device_ops.
+
+struct concap_device_ops{
+
+ /* to request data be submitted by device */
+ int (*data_req)(struct concap_proto *, struct sk_buff *);
+
+ /* Control methods must be set to NULL by devices which do not
+ support connection control. */
+ /* to request a connection be set up */
+ int (*connect_req)(struct concap_proto *);
+
+ /* to request a connection be released */
+ int (*disconn_req)(struct concap_proto *);
+};
+
+The network interface does not explicitly provide a receive service
+because the encapsulation protocol directly calls netif_rx().
+
+
+
+
+An encapsulation protocol itself is actually the
+struct concap_proto{
+ struct net_device *net_dev; /* net device using our service */
+ struct concap_device_ops *dops; /* callbacks provided by device */
+ struct concap_proto_ops *pops; /* callbacks provided by us */
+ int flags;
+ void *proto_data; /* protocol specific private data, to
+ be accessed via *pops methods only*/
+ /*
+ :
+ whatever
+ :
+ */
+};
+
+Most of this is filled in when the device requests the protocol to
+be reset (opend). The network interface must provide the net_dev and
+dops pointers. Other concap_proto members should be considered private
+data that are only accessed by the pops callback functions. Likewise,
+a concap proto should access the network device's private data
+only by means of the callbacks referred to by the dops pointer.
+
+
+A possible extended device structure which uses the connection controlling
+encapsulation services could look like this:
+
+struct concap_device{
+ struct net_device net_dev;
+ struct my_priv /* device->local stuff */
+ /* the my_priv struct might contain a
+ struct concap_device_ops *dops;
+ to provide the device specific callbacks
+ */
+ struct concap_proto *cprot; /* callbacks provided by protocol */
+};
+
+
+
+Misc Thoughts
+=============
+
+The concept of the concap proto might help to reuse protocol code and
+reduce the complexity of certain network interface implementations.
+The trade off is that it introduces yet another procedure call layer
+when processing the protocol. This has of course some impact on
+performance. However, typically the concap interface will be used by
+devices attached to slow lines (like telephone, isdn, leased synchronous
+lines). For such slow lines, the overhead is probably negligible.
+This might no longer hold for certain high speed WAN links (like
+ATM).
+
+
+If general linux network interfaces explicitly supported concap
+protocols (e.g. by a member struct concap_proto* in struct net_device)
+then the interface of the service function could be changed
+by passing a pointer of type (struct net_device*) instead of
+type (struct concap_proto*). Doing so would make many of the service
+functions compatible to network device support functions.
+
+e.g. instead of the concap protocol's service function
+
+ int (*encap_and_xmit)(struct concap_proto *cprot, struct sk_buff *skb);
+
+we could have
+
+ int (*encap_and_xmit)(struct net_device *ndev, struct sk_buff *skb);
+
+As this is compatible to the dev->hard_start_xmit() method, the device
+driver could directly register the concap protocol's encap_and_xmit()
+function as its hard_start_xmit() method. This would eliminate one
+procedure call layer.
+
+
+The device's data request function could also be defined as
+
+ int (*data_req)(struct net_device *ndev, struct sk_buff *skb);
+
+This might even allow for some protocol stacking. And the network
+interface might even register the same data_req() function directly
+as its hard_start_xmit() method when a zero layer encapsulation
+protocol is configured. Thus, eliminating the performance penalty
+of the concap interface when a trivial concap protocol is used.
+Nevertheless, the device remains able to support encapsulation
+protocol configuration.
+
diff --git a/Documentation/isdn/README.diversion b/Documentation/isdn/README.diversion
new file mode 100644
index 000000000..bddcd5fb8
--- /dev/null
+++ b/Documentation/isdn/README.diversion
@@ -0,0 +1,127 @@
+The isdn diversion services are a supporting module working together with
+the isdn4linux and the HiSax module for passive cards.
+Active cards, TAs and cards using a own or other driver than the HiSax
+module need to be adapted to the HL<->LL interface described in a separate
+document. The diversion services may be used with all cards supported by
+the HiSax driver.
+The diversion kernel interface and controlling tool divertctrl were written
+by Werner Cornelius (werner@isdn4linux.de or werner@titro.de) under the
+GNU General Public License.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Table of contents
+=================
+
+1. Features of the i4l diversion services
+ (Or what can the i4l diversion services do for me)
+
+2. Required hard- and software
+
+3. Compiling, installing and loading/unloading the module
+ Tracing calling and diversion information
+
+4. Tracing calling and diversion information
+
+5. Format of the divert device ASCII output
+
+
+1. Features of the i4l diversion services
+ (Or what can the i4l diversion services do for me)
+
+ The i4l diversion services offers call forwarding and logging normally
+ only supported by isdn phones. Incoming calls may be diverted
+ unconditionally (CFU), when not reachable (CFNR) or on busy condition
+ (CFB).
+ The diversions may be invoked statically in the providers exchange
+ as normally done by isdn phones. In this case all incoming calls
+ with a special (or all) service identifiers are forwarded if the
+ forwarding reason is met. Activated static services may also be
+ interrogated (queried).
+ The i4l diversion services additionally offers a dynamic version of
+ call forwarding which is not preprogrammed inside the providers exchange
+ but dynamically activated by i4l.
+ In this case all incoming calls are checked by rules that may be
+ compared to the mechanism of ipfwadm or ipchains. If a given rule matches
+ the checking process is finished and the rule matching will be applied
+ to the call.
+ The rules include primary and secondary service identifiers, called
+ number and subaddress, callers number and subaddress and whether the rule
+ matches to all filtered calls or only those when all B-channel resources
+ are exhausted.
+ Actions that may be invoked by a rule are ignore, proceed, reject,
+ direct divert or delayed divert of a call.
+ All incoming calls matching a rule except the ignore rule a reported and
+ logged as ASCII via the proc filesystem (/proc/net/isdn/divert). If proceed
+ is selected the call will be held in a proceeding state (without ringing)
+ for a certain amount of time to let an external program or client decide
+ how to handle the call.
+
+
+2. Required hard- and software
+
+ For using the i4l diversion services the isdn line must be of a EURO/DSS1
+ type. Additionally the i4l services only work together with the HiSax
+ driver for passive isdn cards. All HiSax supported cards may be used for
+ the diversion purposes.
+ The static diversion services require the provider having static services
+ CFU, CFNR, CFB activated on an MSN-line. The static services may not be
+ used on a point-to-point connection. Further the static services are only
+ available in some countries (for example germany). Countries requiring the
+ keypad protocol for activating static diversions (like the netherlands) are
+ not supported but may use the tty devices for this purpose.
+ The dynamic diversion services may be used in all countries if the provider
+ enables the feature CF (call forwarding). This should work on both MSN- and
+ point-to-point lines.
+ To add and delete rules the additional divertctrl program is needed. This
+ program is part of the isdn4kutils package.
+
+3. Compiling, installing and loading/unloading the module
+ Tracing calling and diversion information
+
+
+ To compile the i4l code with diversion support you need to say yes to the
+ DSS1 diversion services when selecting the i4l options in the kernel
+ config (menuconfig or config).
+ After having properly activated a make modules and make modules_install all
+ required modules will be correctly installed in the needed modules dirs.
+ As the diversion services are currently not included in the scripts of most
+ standard distributions you will have to add a "insmod dss1_divert" after
+ having loaded the global isdn module.
+ The module can be loaded without any command line parameters.
+ If the module is actually loaded and active may be checked with a
+ "cat /proc/modules" or "ls /proc/net/isdn/divert". The divert file is
+ dynamically created by the diversion module and removed when the module is
+ unloaded.
+
+
+4. Tracing calling and diversion information
+
+ You also may put a "cat /proc/net/isdn/divert" in the background with the
+ output redirected to a file. Then all actions of the module are logged.
+ The divert file in the proc system may be opened more than once, so in
+ conjunction with inetd and a small remote client on other machines inside
+ your network incoming calls and reactions by the module may be shown on
+ every listening machine.
+ If a call is reported as proceeding an external program or client may
+ specify during a certain amount of time (normally 4 to 10 seconds) what
+ to do with that call.
+ To unload the module all open files to the device in the proc system must
+ be closed. Otherwise the module (and isdn.o) may not be unloaded.
+
+5. Format of the divert device ASCII output
+
+ To be done later
+
diff --git a/Documentation/isdn/README.fax b/Documentation/isdn/README.fax
new file mode 100644
index 000000000..5314958a8
--- /dev/null
+++ b/Documentation/isdn/README.fax
@@ -0,0 +1,45 @@
+
+Fax with isdn4linux
+===================
+
+When enabled during kernel configuration, the tty emulator
+of the ISDN subsystem is capable of the Fax Class 2 commands.
+
+This only makes sense under the following conditions :
+
+- You need the commands as dummy, because you are using
+ hylafax (with patch) for AVM capi.
+- You want to use the fax capabilities of your isdn-card.
+ (supported cards are listed below)
+
+
+NOTE: This implementation does *not* support fax with passive
+ ISDN-cards (known as softfax). The low-level driver of
+ the ISDN-card and/or the card itself must support this.
+
+
+Supported ISDN-Cards
+--------------------
+
+Eicon DIVA Server BRI/PCI
+ - full support with both B-channels.
+
+Eicon DIVA Server 4BRI/PCI
+ - full support with all B-channels.
+
+Eicon DIVA Server PRI/PCI
+ - full support on amount of B-channels
+ depending on DSPs on board.
+
+
+
+The command set is known as Class 2 (not Class 2.0) and
+can be activated by AT+FCLASS=2
+
+
+The interface between the link-level-module and the hardware-level driver
+is described in the files INTERFACE.fax and INTERFACE.
+
+Armin
+mac@melware.de
+
diff --git a/Documentation/isdn/README.gigaset b/Documentation/isdn/README.gigaset
new file mode 100644
index 000000000..7534c6039
--- /dev/null
+++ b/Documentation/isdn/README.gigaset
@@ -0,0 +1,421 @@
+GigaSet 307x Device Driver
+==========================
+
+1. Requirements
+ ------------
+1.1. Hardware
+ --------
+ This driver supports the connection of the Gigaset 307x/417x family of
+ ISDN DECT bases via Gigaset M101 Data, Gigaset M105 Data or direct USB
+ connection. The following devices are reported to be compatible:
+
+ Bases:
+ Siemens Gigaset 3070/3075 isdn
+ Siemens Gigaset 4170/4175 isdn
+ Siemens Gigaset SX205/255
+ Siemens Gigaset SX353
+ T-Com Sinus 45 [AB] isdn
+ T-Com Sinus 721X[A] [SE]
+ Vox Chicago 390 ISDN (KPN Telecom)
+
+ RS232 data boxes:
+ Siemens Gigaset M101 Data
+ T-Com Sinus 45 Data 1
+
+ USB data boxes:
+ Siemens Gigaset M105 Data
+ Siemens Gigaset USB Adapter DECT
+ T-Com Sinus 45 Data 2
+ T-Com Sinus 721 data
+ Chicago 390 USB (KPN)
+
+ See also http://www.erbze.info/sinus_gigaset.htm and
+ http://gigaset307x.sourceforge.net/
+
+ We had also reports from users of Gigaset M105 who could use the drivers
+ with SX 100 and CX 100 ISDN bases (only in unimodem mode, see section 2.5.)
+ If you have another device that works with our driver, please let us know.
+
+ Chances of getting an USB device to work are good if the output of
+ lsusb
+ at the command line contains one of the following:
+ ID 0681:0001
+ ID 0681:0002
+ ID 0681:0009
+ ID 0681:0021
+ ID 0681:0022
+
+1.2. Software
+ --------
+ The driver works with the Kernel CAPI subsystem as well as the old
+ ISDN4Linux subsystem, so it can be used with any software which is able
+ to use CAPI 2.0 or ISDN4Linux for ISDN connections (voice or data).
+
+ There are some user space tools available at
+ http://sourceforge.net/projects/gigaset307x/
+ which provide access to additional device specific functions like SMS,
+ phonebook or call journal.
+
+
+2. How to use the driver
+ ---------------------
+2.1. Modules
+ -------
+ For the devices to work, the proper kernel modules have to be loaded.
+ This normally happens automatically when the system detects the USB
+ device (base, M105) or when the line discipline is attached (M101). It
+ can also be triggered manually using the modprobe(8) command, for example
+ for troubleshooting or to pass module parameters.
+
+ The module ser_gigaset provides a serial line discipline N_GIGASET_M101
+ which uses the regular serial port driver to access the device, and must
+ therefore be attached to the serial device to which the M101 is connected.
+ The ldattach(8) command (included in util-linux-ng release 2.14 or later)
+ can be used for that purpose, for example:
+ ldattach GIGASET_M101 /dev/ttyS1
+ This will open the device file, attach the line discipline to it, and
+ then sleep in the background, keeping the device open so that the line
+ discipline remains active. To deactivate it, kill the daemon, for example
+ with
+ killall ldattach
+ before disconnecting the device. To have this happen automatically at
+ system startup/shutdown on an LSB compatible system, create and activate
+ an appropriate LSB startup script /etc/init.d/gigaset. (The init name
+ 'gigaset' is officially assigned to this project by LANANA.)
+ Alternatively, just add the 'ldattach' command line to /etc/rc.local.
+
+ The modules accept the following parameters:
+
+ Module Parameter Meaning
+
+ gigaset debug debug level (see section 3.2.)
+
+ startmode initial operation mode (see section 2.5.):
+ bas_gigaset ) 1=ISDN4linux/CAPI (default), 0=Unimodem
+ ser_gigaset )
+ usb_gigaset ) cidmode initial Call-ID mode setting (see section
+ 2.5.): 1=on (default), 0=off
+
+ Depending on your distribution you may want to create a separate module
+ configuration file like /etc/modprobe.d/gigaset.conf for these.
+
+2.2. Device nodes for user space programs
+ ------------------------------------
+ The device can be accessed from user space (eg. by the user space tools
+ mentioned in 1.2.) through the device nodes:
+
+ - /dev/ttyGS0 for M101 (RS232 data boxes)
+ - /dev/ttyGU0 for M105 (USB data boxes)
+ - /dev/ttyGB0 for the base driver (direct USB connection)
+
+ If you connect more than one device of a type, they will get consecutive
+ device nodes, eg. /dev/ttyGU1 for a second M105.
+
+ You can also set a "default device" for the user space tools to use when
+ no device node is given as parameter, by creating a symlink /dev/ttyG to
+ one of them, eg.:
+
+ ln -s /dev/ttyGB0 /dev/ttyG
+
+ The devices accept the following device specific ioctl calls
+ (defined in gigaset_dev.h):
+
+ ioctl(int fd, GIGASET_REDIR, int *cmd);
+ If cmd==1, the device is set to be controlled exclusively through the
+ character device node; access from the ISDN subsystem is blocked.
+ If cmd==0, the device is set to be used from the ISDN subsystem and does
+ not communicate through the character device node.
+
+ ioctl(int fd, GIGASET_CONFIG, int *cmd);
+ (ser_gigaset and usb_gigaset only)
+ If cmd==1, the device is set to adapter configuration mode where commands
+ are interpreted by the M10x DECT adapter itself instead of being
+ forwarded to the base station. In this mode, the device accepts the
+ commands described in Siemens document "AT-Kommando Alignment M10x Data"
+ for setting the operation mode, associating with a base station and
+ querying parameters like field strengh and signal quality.
+ Note that there is no ioctl command for leaving adapter configuration
+ mode and returning to regular operation. In order to leave adapter
+ configuration mode, write the command ATO to the device.
+
+ ioctl(int fd, GIGASET_BRKCHARS, unsigned char brkchars[6]);
+ (usb_gigaset only)
+ Set the break characters on an M105's internal serial adapter to the six
+ bytes stored in brkchars[]. Unused bytes should be set to zero.
+
+ ioctl(int fd, GIGASET_VERSION, unsigned version[4]);
+ Retrieve version information from the driver. version[0] must be set to
+ one of:
+ - GIGVER_DRIVER: retrieve driver version
+ - GIGVER_COMPAT: retrieve interface compatibility version
+ - GIGVER_FWBASE: retrieve the firmware version of the base
+ Upon return, version[] is filled with the requested version information.
+
+2.3. CAPI
+ ----
+ If the driver is compiled with CAPI support (kernel configuration option
+ GIGASET_CAPI) the devices will show up as CAPI controllers as soon as the
+ corresponding driver module is loaded, and can then be used with CAPI 2.0
+ kernel and user space applications. For user space access, the module
+ capi.ko must be loaded.
+
+ Legacy ISDN4Linux applications are supported via the capidrv
+ compatibility driver. The kernel module capidrv.ko must be loaded
+ explicitly with the command
+ modprobe capidrv
+ if needed, and cannot be unloaded again without unloading the driver
+ first. (These are limitations of capidrv.)
+
+ Most distributions handle loading and unloading of the various CAPI
+ modules automatically via the command capiinit(1) from the capi4k-utils
+ package or a similar mechanism. Note that capiinit(1) cannot unload the
+ Gigaset drivers because it doesn't support more than one module per
+ driver.
+
+2.4. ISDN4Linux
+ ----------
+ If the driver is compiled without CAPI support (native ISDN4Linux
+ variant), it registers the device with the legacy ISDN4Linux subsystem
+ after loading the module. It can then be used with ISDN4Linux
+ applications only. Most distributions provide some configuration utility
+ for setting up that subsystem. Otherwise you can use some HOWTOs like
+ http://www.linuxhaven.de/dlhp/HOWTO/DE-ISDN-HOWTO-5.html
+
+
+2.5. Unimodem mode
+ -------------
+ In this mode the device works like a modem connected to a serial port
+ (the /dev/ttyGU0, ... mentioned above) which understands the commands
+
+ ATZ init, reset
+ => OK or ERROR
+ ATD
+ ATDT dial
+ => OK, CONNECT,
+ BUSY,
+ NO DIAL TONE,
+ NO CARRIER,
+ NO ANSWER
+ <pause>+++<pause> change to command mode when connected
+ ATH hangup
+
+ You can use some configuration tool of your distribution to configure this
+ "modem" or configure pppd/wvdial manually. There are some example ppp
+ configuration files and chat scripts in the gigaset-VERSION/ppp directory
+ in the driver packages from http://sourceforge.net/projects/gigaset307x/.
+ Please note that the USB drivers are not able to change the state of the
+ control lines. This means you must use "Stupid Mode" if you are using
+ wvdial or you should use the nocrtscts option of pppd.
+ You must also assure that the ppp_async module is loaded with the parameter
+ flag_time=0. You can do this e.g. by adding a line like
+
+ options ppp_async flag_time=0
+
+ to an appropriate module configuration file, like
+ /etc/modprobe.d/gigaset.conf.
+
+ Unimodem mode is needed for making some devices [e.g. SX100] work which
+ do not support the regular Gigaset command set. If debug output (see
+ section 3.2.) shows something like this when dialing:
+ CMD Received: ERROR
+ Available Params: 0
+ Connection State: 0, Response: -1
+ gigaset_process_response: resp_code -1 in ConState 0 !
+ Timeout occurred
+ then switching to unimodem mode may help.
+
+ If you have installed the command line tool gigacontr, you can enter
+ unimodem mode using
+ gigacontr --mode unimodem
+ You can switch back using
+ gigacontr --mode isdn
+
+ You can also put the driver directly into Unimodem mode when it's loaded,
+ by passing the module parameter startmode=0 to the hardware specific
+ module, e.g.
+ modprobe usb_gigaset startmode=0
+ or by adding a line like
+ options usb_gigaset startmode=0
+ to an appropriate module configuration file, like
+ /etc/modprobe.d/gigaset.conf
+
+2.6. Call-ID (CID) mode
+ ------------------
+ Call-IDs are numbers used to tag commands to, and responses from, the
+ Gigaset base in order to support the simultaneous handling of multiple
+ ISDN calls. Their use can be enabled ("CID mode") or disabled ("Unimodem
+ mode"). Without Call-IDs (in Unimodem mode), only a very limited set of
+ functions is available. It allows outgoing data connections only, but
+ does not signal incoming calls or other base events.
+
+ DECT cordless data devices (M10x) permanently occupy the cordless
+ connection to the base while Call-IDs are activated. As the Gigaset
+ bases only support one DECT data connection at a time, this prevents
+ other DECT cordless data devices from accessing the base.
+
+ During active operation, the driver switches to the necessary mode
+ automatically. However, for the reasons above, the mode chosen when
+ the device is not in use (idle) can be selected by the user.
+ - If you want to receive incoming calls, you can use the default
+ settings (CID mode).
+ - If you have several DECT data devices (M10x) which you want to use
+ in turn, select Unimodem mode by passing the parameter "cidmode=0" to
+ the appropriate driver module (ser_gigaset or usb_gigaset).
+
+ If you want both of these at once, you are out of luck.
+
+ You can also use the tty class parameter "cidmode" of the device to
+ change its CID mode while the driver is loaded, eg.
+ echo 0 > /sys/class/tty/ttyGU0/cidmode
+
+2.7. Dialing Numbers
+ ---------------
+ The called party number provided by an application for dialing out must
+ be a public network number according to the local dialing plan, without
+ any dial prefix for getting an outside line.
+
+ Internal calls can be made by providing an internal extension number
+ prefixed with "**" (two asterisks) as the called party number. So to dial
+ eg. the first registered DECT handset, give "**11" as the called party
+ number. Dialing "***" (three asterisks) calls all extensions
+ simultaneously (global call).
+
+ This holds for both CAPI 2.0 and ISDN4Linux applications. Unimodem mode
+ does not support internal calls.
+
+2.8. Unregistered Wireless Devices (M101/M105)
+ -----------------------------------------
+ The main purpose of the ser_gigaset and usb_gigaset drivers is to allow
+ the M101 and M105 wireless devices to be used as ISDN devices for ISDN
+ connections through a Gigaset base. Therefore they assume that the device
+ is registered to a DECT base.
+
+ If the M101/M105 device is not registered to a base, initialization of
+ the device fails, and a corresponding error message is logged by the
+ driver. In that situation, a restricted set of functions is available
+ which includes, in particular, those necessary for registering the device
+ to a base or for switching it between Fixed Part and Portable Part
+ modes. See the gigacontr(8) manpage for details.
+
+3. Troubleshooting
+ ---------------
+3.1. Solutions to frequently reported problems
+ -----------------------------------------
+ Problem:
+ You have a slow provider and isdn4linux gives up dialing too early.
+ Solution:
+ Load the isdn module using the dialtimeout option. You can do this e.g.
+ by adding a line like
+
+ options isdn dialtimeout=15
+
+ to /etc/modprobe.d/gigaset.conf or a similar file.
+
+ Problem:
+ The isdnlog program emits error messages or just doesn't work.
+ Solution:
+ Isdnlog supports only the HiSax driver. Do not attempt to use it with
+ other drivers such as Gigaset.
+
+ Problem:
+ You have two or more DECT data adapters (M101/M105) and only the
+ first one you turn on works.
+ Solution:
+ Select Unimodem mode for all DECT data adapters. (see section 2.5.)
+
+ Problem:
+ Messages like this:
+ usb_gigaset 3-2:1.0: Could not initialize the device.
+ appear in your syslog.
+ Solution:
+ Check whether your M10x wireless device is correctly registered to the
+ Gigaset base. (see section 2.7.)
+
+3.2. Telling the driver to provide more information
+ ----------------------------------------------
+ Building the driver with the "Gigaset debugging" kernel configuration
+ option (CONFIG_GIGASET_DEBUG) gives it the ability to produce additional
+ information useful for debugging.
+
+ You can control the amount of debugging information the driver produces by
+ writing an appropriate value to /sys/module/gigaset/parameters/debug, e.g.
+ echo 0 > /sys/module/gigaset/parameters/debug
+ switches off debugging output completely,
+ echo 0x302020 > /sys/module/gigaset/parameters/debug
+ enables a reasonable set of debugging output messages. These values are
+ bit patterns where every bit controls a certain type of debugging output.
+ See the constants DEBUG_* in the source file gigaset.h for details.
+
+ The initial value can be set using the debug parameter when loading the
+ module "gigaset", e.g. by adding a line
+ options gigaset debug=0
+ to your module configuration file, eg. /etc/modprobe.d/gigaset.conf
+
+ Generated debugging information can be found
+ - as output of the command
+ dmesg
+ - in system log files written by your syslog daemon, usually
+ in /var/log/, e.g. /var/log/messages.
+
+3.3. Reporting problems and bugs
+ ---------------------------
+ If you can't solve problems with the driver on your own, feel free to
+ use one of the forums, bug trackers, or mailing lists on
+ http://sourceforge.net/projects/gigaset307x
+ or write an electronic mail to the maintainers.
+
+ Try to provide as much information as possible, such as
+ - distribution
+ - kernel version (uname -r)
+ - gcc version (gcc --version)
+ - hardware architecture (uname -m, ...)
+ - type and firmware version of your device (base and wireless module,
+ if any)
+ - output of "lsusb -v" (if using an USB device)
+ - error messages
+ - relevant system log messages (it would help if you activate debug
+ output as described in 3.2.)
+
+ For help with general configuration problems not specific to our driver,
+ such as isdn4linux and network configuration issues, please refer to the
+ appropriate forums and newsgroups.
+
+3.4. Reporting problem solutions
+ ---------------------------
+ If you solved a problem with our drivers, wrote startup scripts for your
+ distribution, ... feel free to contact us (using one of the places
+ mentioned in 3.3.). We'd like to add scripts, hints, documentation
+ to the driver and/or the project web page.
+
+
+4. Links, other software
+ ---------------------
+ - Sourceforge project developing this driver and associated tools
+ http://sourceforge.net/projects/gigaset307x
+ - Yahoo! Group on the Siemens Gigaset family of devices
+ http://de.groups.yahoo.com/group/Siemens-Gigaset
+ - Siemens Gigaset/T-Sinus compatibility table
+ http://www.erbze.info/sinus_gigaset.htm
+
+
+5. Credits
+ -------
+ Thanks to
+
+ Karsten Keil
+ for his help with isdn4linux
+ Deti Fliegl
+ for his base driver code
+ Dennis Dietrich
+ for his kernel 2.6 patches
+ Andreas Rummel
+ for his work and logs to get unimodem mode working
+ Andreas Degert
+ for his logs and patches to get cx 100 working
+ Dietrich Feist
+ for his generous donation of one M105 and two M101 cordless adapters
+ Christoph Schweers
+ for his generous donation of a M34 device
+
+ and all the other people who sent logs and other information.
+
diff --git a/Documentation/isdn/README.hfc-pci b/Documentation/isdn/README.hfc-pci
new file mode 100644
index 000000000..e8a4ef022
--- /dev/null
+++ b/Documentation/isdn/README.hfc-pci
@@ -0,0 +1,41 @@
+The driver for the HFC-PCI and HFC-PCI-A chips from CCD may be used
+for many OEM cards using this chips.
+Additionally the driver has a special feature which makes it possible
+to read the echo-channel of the isdn bus. So all frames in both directions
+may be logged.
+When the echo logging feature is used the number of available B-channels
+for a HFC-PCI card is reduced to 1. Of course this is only relevant to
+the card, not to the isdn line.
+To activate the echo mode the following ioctls must be entered:
+
+hisaxctrl <driver/cardname> 10 1
+
+This reduces the available channels to 1. There must not be open connections
+through this card when entering the command.
+And then:
+
+hisaxctrl <driver/cardname> 12 1
+
+This enables the echo mode. If Hex logging is activated the isdnctrlx
+devices show a output with a line beginning of HEX: for the providers
+exchange and ECHO: for isdn devices sending to the provider.
+
+If more than one HFC-PCI cards are installed, a specific card may be selected
+at the hisax module load command line. Supply the load command with the desired
+IO-address of the desired card.
+Example:
+There tree cards installed in your machine at IO-base addresses 0xd000, 0xd400
+and 0xdc00
+If you want to use the card at 0xd400 standalone you should supply the insmod
+or depmod with type=35 io=0xd400.
+If you want to use all three cards, but the order needs to be at 0xdc00,0xd400,
+0xd000 you may give the parameters type=35,35,35 io=0xdc00,0xd400,0xd00
+Then the desired card will be the initialised in the desired order.
+If the io parameter is used the io addresses of all used cards should be
+supplied else the parameter is assumed 0 and a auto search for a free card is
+invoked which may not give the wanted result.
+
+Comments and reports to werner@isdn4linux.de or werner@isdn-development.de
+
+
+
diff --git a/Documentation/isdn/README.hysdn b/Documentation/isdn/README.hysdn
new file mode 100644
index 000000000..eeca11f00
--- /dev/null
+++ b/Documentation/isdn/README.hysdn
@@ -0,0 +1,195 @@
+$Id: README.hysdn,v 1.3.6.1 2001/02/10 14:41:19 kai Exp $
+The hysdn driver has been written by
+Werner Cornelius (werner@isdn4linux.de or werner@titro.de)
+for Hypercope GmbH Aachen Germany. Hypercope agreed to publish this driver
+under the GNU General Public License.
+
+The CAPI 2.0-support was added by Ulrich Albrecht (ualbrecht@hypercope.de)
+for Hypercope GmbH Aachen, Germany.
+
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Table of contents
+=================
+
+1. About the driver
+
+2. Loading/Unloading the driver
+
+3. Entries in the /proc filesystem
+
+4. The /proc/net/hysdn/cardconfX file
+
+5. The /proc/net/hysdn/cardlogX file
+
+6. Where to get additional info and help
+
+
+1. About the driver
+
+ The drivers/isdn/hysdn subdir contains a driver for HYPERCOPEs active
+ PCI isdn cards Champ, Ergo and Metro. To enable support for this cards
+ enable ISDN support in the kernel config and support for HYSDN cards in
+ the active cards submenu. The driver may only be compiled and used if
+ support for loadable modules and the process filesystem have been enabled.
+
+ These cards provide two different interfaces to the kernel. Without the
+ optional CAPI 2.0 support, they register as ethernet card. IP-routing
+ to a ISDN-destination is performed on the card itself. All necessary
+ handlers for various protocols like ppp and others as well as config info
+ and firmware may be fetched from Hypercopes WWW-Site www.hypercope.de.
+
+ With CAPI 2.0 support enabled, the card can also be used as a CAPI 2.0
+ compliant devices with either CAPI 2.0 applications
+ (check isdn4k-utils) or -using the capidrv module- as a regular
+ isdn4linux device. This is done via the same mechanism as with the
+ active AVM cards and in fact uses the same module.
+
+
+2. Loading/Unloading the driver
+
+ The module has no command line parameters and auto detects up to 10 cards
+ in the id-range 0-9.
+ If a loaded driver shall be unloaded all open files in the /proc/net/hysdn
+ subdir need to be closed and all ethernet interfaces allocated by this
+ driver must be shut down. Otherwise the module counter will avoid a module
+ unload.
+
+ If you are using the CAPI 2.0-interface, make sure to load/modprobe the
+ kernelcapi-module first.
+
+ If you plan to use the capidrv-link to isdn4linux, make sure to load
+ capidrv.o after all modules using this driver (i.e. after hysdn and
+ any avm-specific modules).
+
+3. Entries in the /proc filesystem
+
+ When the module has been loaded it adds the directory hysdn in the
+ /proc/net tree. This directory contains exactly 2 file entries for each
+ card. One is called cardconfX and the other cardlogX, where X is the
+ card id number from 0 to 9.
+ The cards are numbered in the order found in the PCI config data.
+
+4. The /proc/net/hysdn/cardconfX file
+
+ This file may be read to get by everyone to get info about the cards type,
+ actual state, available features and used resources.
+ The first 3 entries (id, bus and slot) are PCI info fields, the following
+ type field gives the information about the cards type:
+
+ 4 -> Ergo card (server card with 2 b-chans)
+ 5 -> Metro card (server card with 4 or 8 b-chans)
+ 6 -> Champ card (client card with 2 b-chans)
+
+ The following 3 fields show the hardware assignments for irq, iobase and the
+ dual ported memory (dp-mem).
+ The fields b-chans and fax-chans announce the available card resources of
+ this types for the user.
+ The state variable indicates the actual drivers state for this card with the
+ following assignments.
+
+ 0 -> card has not been booted since driver load
+ 1 -> card booting is actually in progess
+ 2 -> card is in an error state due to a previous boot failure
+ 3 -> card is booted and active
+
+ And the last field (device) shows the name of the ethernet device assigned
+ to this card. Up to the first successful boot this field only shows a -
+ to tell that no net device has been allocated up to now. Once a net device
+ has been allocated it remains assigned to this card, even if a card is
+ rebooted and an boot error occurs.
+
+ Writing to the cardconfX file boots the card or transfers config lines to
+ the cards firmware. The type of data is automatically detected when the
+ first data is written. Only root has write access to this file.
+ The firmware boot files are normally called hyclient.pof for client cards
+ and hyserver.pof for server cards.
+ After successfully writing the boot file, complete config files or single
+ config lines may be copied to this file.
+ If an error occurs the return value given to the writing process has the
+ following additional codes (decimal):
+
+ 1000 Another process is currently bootng the card
+ 1001 Invalid firmware header
+ 1002 Boards dual-port RAM test failed
+ 1003 Internal firmware handler error
+ 1004 Boot image size invalid
+ 1005 First boot stage (bootstrap loader) failed
+ 1006 Second boot stage failure
+ 1007 Timeout waiting for card ready during boot
+ 1008 Operation only allowed in booted state
+ 1009 Config line too long
+ 1010 Invalid channel number
+ 1011 Timeout sending config data
+
+ Additional info about error reasons may be fetched from the log output.
+
+5. The /proc/net/hysdn/cardlogX file
+
+ The cardlogX file entry may be opened multiple for reading by everyone to
+ get the cards and drivers log data. Card messages always start with the
+ keyword LOG. All other lines are output from the driver.
+ The driver log data may be redirected to the syslog by selecting the
+ appropriate bitmask. The cards log messages will always be send to this
+ interface but never to the syslog.
+
+ A root user may write a decimal or hex (with 0x) value t this file to select
+ desired output options. As mentioned above the cards log dat is always
+ written to the cardlog file independent of the following options only used
+ to check and debug the driver itself:
+
+ For example:
+ echo "0x34560078" > /proc/net/hysdn/cardlog0
+ to output the hex log mask 34560078 for card 0.
+
+ The written value is regarded as an unsigned 32-Bit value, bit ored for
+ desired output. The following bits are already assigned:
+
+ 0x80000000 All driver log data is alternatively via syslog
+ 0x00000001 Log memory allocation errors
+ 0x00000010 Firmware load start and close are logged
+ 0x00000020 Log firmware record parser
+ 0x00000040 Log every firmware write actions
+ 0x00000080 Log all card related boot messages
+ 0x00000100 Output all config data sent for debugging purposes
+ 0x00000200 Only non comment config lines are shown wth channel
+ 0x00000400 Additional conf log output
+ 0x00001000 Log the asynchronous scheduler actions (config and log)
+ 0x00100000 Log all open and close actions to /proc/net/hysdn/card files
+ 0x00200000 Log all actions from /proc file entries
+ 0x00010000 Log network interface init and deinit
+
+6. Where to get additional info and help
+
+ If you have any problems concerning the driver or configuration contact
+ the Hypercope support team (support@hypercope.de) and or the authors
+ Werner Cornelius (werner@isdn4linux or cornelius@titro.de) or
+ Ulrich Albrecht (ualbrecht@hypercope.de).
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Documentation/isdn/README.icn b/Documentation/isdn/README.icn
new file mode 100644
index 000000000..13f833d4e
--- /dev/null
+++ b/Documentation/isdn/README.icn
@@ -0,0 +1,148 @@
+$Id: README.icn,v 1.7 2000/08/06 09:22:51 armin Exp $
+
+You can get the ICN-ISDN-card from:
+
+Thinking Objects Software GmbH
+Versbacher Röthe 159
+97078 Würzburg
+Tel: +49 931 2877950
+Fax: +49 931 2877951
+
+email info@think.de
+WWW http:/www.think.de
+
+
+The card communicates with the PC by two interfaces:
+ 1. A range of 4 successive port-addresses, whose base address can be
+ configured with the switches.
+ 2. A memory window with 16KB-256KB size, which can be setup in 16k steps
+ over the whole range of 16MB. Isdn4linux only uses a 16k window.
+ The base address of the window can be configured when loading
+ the lowlevel-module (see README). If using more than one card,
+ all cards are mapped to the same window and activated as needed.
+
+Setting up the IO-address dipswitches for the ICN-ISDN-card:
+
+ Two types of cards exist, one with dip-switches and one with
+ hook-switches.
+
+ 1. Setting for the card with hook-switches:
+
+ (0 = switch closed, 1 = switch open)
+
+ S3 S2 S1 Base-address
+ 0 0 0 0x300
+ 0 0 1 0x310
+ 0 1 0 0x320 (Default for isdn4linux)
+ 0 1 1 0x330
+ 1 0 0 0x340
+ 1 0 1 0x350
+ 1 1 0 0x360
+ 1 1 1 NOT ALLOWED!
+
+ 2. Setting for the card with dip-switches:
+
+ (0 = switch closed, 1 = switch open)
+
+ S1 S2 S3 S4 Base-Address
+ 0 0 0 0 0x300
+ 0 0 0 1 0x310
+ 0 0 1 0 0x320 (Default for isdn4linux)
+ 0 0 1 1 0x330
+ 0 1 0 0 0x340
+ 0 1 0 1 0x350
+ 0 1 1 0 0x360
+ 0 1 1 1 NOT ALLOWED!
+ 1 0 0 0 0x308
+ 1 0 0 1 0x318
+ 1 0 1 0 0x328
+ 1 0 1 1 0x338
+ 1 1 0 0 0x348
+ 1 1 0 1 0x358
+ 1 1 1 0 0x368
+ 1 1 1 1 NOT ALLOWED!
+
+The ICN driver may be built into the kernel or as a module. Initialization
+depends on how the driver is built:
+
+Driver built into the kernel:
+
+ The ICN driver can be configured using the commandline-feature while
+ loading the kernel with LILO or LOADLIN. It accepts the following syntax:
+
+ icn=p,m[,idstring1[,idstring2]]
+
+ where
+
+ p = portbase (default: 0x320)
+ m = shared memory (default: 0xd0000)
+
+ When using the ICN double card (4B), you MUST define TWO idstrings.
+ idstring must start with a character! There is no way for the driver
+ to distinguish between a 2B and 4B type card. Therefore, by supplying
+ TWO idstrings, you tell the driver that you have a 4B installed.
+
+ If you like to use more than one card, you can use the program
+ "icnctrl" from the utility-package to configure additional cards.
+ You need to configure shared memory only once, since the icn-driver
+ maps all cards into the same address-space.
+
+ Using the "icnctrl"-utility, portbase and shared memory can also be
+ changed during runtime.
+
+ The D-channel protocol is configured by loading different firmware
+ into the card's memory using the "icnctrl"-utility.
+
+
+Driver built as module:
+
+ The module icn.o can be configured during "insmod'ing" it by
+ appending its parameters to the insmod-commandline. The following
+ syntax is accepted:
+
+ portbase=p membase=m icn_id=idstring [icn_id2=idstring2]
+
+ where p, m, idstring1 and idstring2 have the same meanings as the
+ parameters described for the kernel-version above.
+
+ When using the ICN double card (4B), you MUST define TWO idstrings.
+ idstring must start with a character! There is no way for the driver
+ to distinguish between a 2B and 4B type card. Therefore, by supplying
+ TWO idstrings, you tell the driver that you have a 4B installed.
+
+ Using the "icnctrl"-utility, the same features apply to the modularized
+ version like to the kernel-builtin one.
+
+ The D-channel protocol is configured by loading different firmware
+ into the card's memory using the "icnctrl"-utility.
+
+Loading the firmware into the card:
+
+ The firmware is supplied together with the isdn4k-utils package. It
+ can be found in the subdirectory icnctrl/firmware/
+
+ There are 3 files:
+
+ loadpg.bin - Image of the bootstrap loader.
+ pc_1t_ca.bin - Image of firmware for german 1TR6 protocol.
+ pc_eu_ca.bin - Image if firmware for EDSS1 (Euro-ISDN) protocol.
+
+ Assuming you have installed the utility-package correctly, the firmware
+ will be downloaded into the 2B-card using the following command:
+
+ icnctrl -d Idstring load /etc/isdn/loadpg.bin /etc/isdn/pc_XX_ca.bin
+
+ where XX is either "1t" or "eu", depending on the D-Channel protocol
+ used on your S0-bus and Idstring is the Name of the card, given during
+ insmod-time or (for kernel-builtin driver) on the kernel commandline.
+
+ To load a 4B-card, the same command is used, except a second firmware
+ file is appended to the commandline of icnctrl.
+
+ -> After downloading firmware, the two LEDs at the back cover of the card
+ (ICN-4B: 4 LEDs) must be blinking intermittently now. If a connection
+ is up, the corresponding led is lit continuously.
+
+ For further documentation (adding more ICN-cards), refer to the manpage
+ icnctrl.8 which is included in the isdn4k-utils package.
+
diff --git a/Documentation/isdn/README.mISDN b/Documentation/isdn/README.mISDN
new file mode 100644
index 000000000..cd8bf920e
--- /dev/null
+++ b/Documentation/isdn/README.mISDN
@@ -0,0 +1,6 @@
+mISDN is a new modular ISDN driver, in the long term it should replace
+the old I4L driver architecture for passiv ISDN cards.
+It was designed to allow a broad range of applications and interfaces
+but only have the basic function in kernel, the interface to the user
+space is based on sockets with a own address family AF_ISDN.
+
diff --git a/Documentation/isdn/README.pcbit b/Documentation/isdn/README.pcbit
new file mode 100644
index 000000000..512500228
--- /dev/null
+++ b/Documentation/isdn/README.pcbit
@@ -0,0 +1,40 @@
+------------------------------------------------------------------------------
+ README file for the PCBIT-D Device Driver.
+------------------------------------------------------------------------------
+
+The PCBIT is a Euro ISDN adapter manufactured in Portugal by Octal and
+developed in cooperation with Portugal Telecom and Inesc.
+The driver interfaces with the standard kernel isdn facilities
+originally developed by Fritz Elfert in the isdn4linux project.
+
+The common versions of the pcbit board require a firmware that is
+distributed (and copyrighted) by the manufacturer. To load this
+firmware you need "pcbitctl" available on the standard isdn4k-utils
+package or in the pcbit package available in:
+
+ftp://ftp.di.fc.ul.pt/pub/systems/Linux/isdn
+
+Known Limitations:
+
+- The board reset procedure is at the moment incorrect and will only
+allow you to load the firmware after a hard reset.
+
+- Only HDLC in B-channels is supported at the moment. There is no
+current support for X.25 in B or D channels nor LAPD in B
+channels. The main reason is that these two other protocol modes have,
+to my knowledge, very little use. If you want to see them implemented
+*do* send me a mail.
+
+- The driver often triggers errors in the board that I and the
+manufacturer believe to be caused by bugs in the firmware. The current
+version includes several procedures for error recovery that should
+allow normal operation. Plans for the future include cooperation with
+the manufacturer in order to solve this problem.
+
+Information/hints/help can be obtained in the linux isdn
+mailing list (isdn4linux@listserv.isdn4linux.de) or directly from me.
+
+regards,
+ Pedro.
+
+<roque@di.fc.ul.pt>
diff --git a/Documentation/isdn/README.sc b/Documentation/isdn/README.sc
new file mode 100644
index 000000000..1153cd926
--- /dev/null
+++ b/Documentation/isdn/README.sc
@@ -0,0 +1,281 @@
+Welcome to Beta Release 2 of the combination ISDN driver for SpellCaster's
+ISA ISDN adapters. Please note this release 2 includes support for the
+DataCommute/BRI and TeleCommute/BRI adapters only and any other use is
+guaranteed to fail. If you have a DataCommute/PRI installed in the test
+computer, we recommend removing it as it will be detected but will not
+be usable. To see what we have done to Beta Release 2, see section 3.
+
+Speaking of guarantees, THIS IS BETA SOFTWARE and as such contains
+bugs and defects either known or unknown. Use this software at your own
+risk. There is NO SUPPORT for this software. Some help may be available
+through the web site or the mailing list but such support is totally at
+our own option and without warranty. If you choose to assume all and
+total risk by using this driver, we encourage you to join the beta
+mailing list.
+
+To join the Linux beta mailing list, send a message to:
+majordomo@spellcast.com with the words "subscribe linux-beta" as the only
+contents of the message. Do not include a signature. If you choose to
+remove yourself from this list at a later date, send another message to
+the same address with the words "unsubscribe linux-beta" as its only
+contents.
+
+TABLE OF CONTENTS
+-----------------
+ 1. Introduction
+ 1.1 What is ISDN4Linux?
+ 1.2 What is different between this driver and previous drivers?
+ 1.3 How do I setup my system with the correct software to use
+ this driver release?
+
+ 2. Basic Operations
+ 2.1 Unpacking and installing the driver
+ 2.2 Read the man pages!!!
+ 2.3 Installing the driver
+ 2.4 Removing the driver
+ 2.5 What to do if it doesn't load
+ 2.6 How to setup ISDN4Linux with the driver
+
+ 3. Beta Change Summaries and Miscellaneous Notes
+
+1. Introduction
+---------------
+
+The revision 2 Linux driver for SpellCaster ISA ISDN adapters is built
+upon ISDN4Linux available separately or as included in Linux 2.0 and later.
+The driver will support a maximum of 4 adapters in any one system of any
+type including DataCommute/BRI, DataCommute/PRI and TeleCommute/BRI for a
+maximum of 92 channels for host. The driver is supplied as a module in
+source form and needs to be complied before it can be used. It has been
+tested on Linux 2.0.20.
+
+1.1 What Is ISDN4Linux
+
+ISDN4Linux is a driver and set of tools used to access and use ISDN devices
+on a Linux platform in a common and standard way. It supports HDLC and PPP
+protocols and offers channel bundling and MLPPP support. To use ISDN4Linux
+you need to configure your kernel for ISDN support and get the ISDN4Linux
+tool kit from our web site.
+
+ISDN4Linux creates a channel pool from all of the available ISDN channels
+and therefore can function across adapters. When an ISDN4Linux compliant
+driver (such as ours) is loaded, all of the channels go into a pool and
+are used on a first-come first-served basis. In addition, individual
+channels can be specifically bound to particular interfaces.
+
+1.2 What is different between this driver and previous drivers?
+
+The revision 2 driver besides adopting the ISDN4Linux architecture has many
+subtle and not so subtle functional differences from previous releases. These
+include:
+ - More efficient shared memory management combined with a simpler
+ configuration. All adapters now use only 16Kbytes of shared RAM
+ versus between 16K and 64K. New methods for using the shared RAM
+ allow us to utilize all of the available RAM on the adapter through
+ only one 16K page.
+ - Better detection of available upper memory. The probing routines
+ have been improved to better detect available shared RAM pages and
+ used pages are now locked.
+ - Decreased loading time and a wider range of I/O ports probed.
+ We have significantly reduced the amount of time it takes to load
+ the driver and at the same time doubled the number of I/O ports
+ probed increasing the likelihood of finding an adapter.
+ - We now support all ISA adapter models with a single driver instead
+ of separate drivers for each model. The revision 2 driver supports
+ the DataCommute/BRI, DataCommute/PRI and TeleCommute/BRI in any
+ combination up to a maximum of four adapters per system.
+ - On board PPP protocol support has been removed in favour of the
+ sync-PPP support used in ISDN4Linux. This means more control of
+ the protocol parameters, faster negotiation time and a more
+ familiar interface.
+
+1.3 How do I setup my system with the correct software to use
+ this driver release?
+
+Before you can compile, install and use the SpellCaster ISA ISDN driver, you
+must ensure that the following software is installed, configured and running:
+
+ - Linux kernel 2.0.20 or later with the required init and ps
+ versions. Please see your distribution vendor for the correct
+ utility packages. The latest kernel is available from
+ ftp://sunsite.unc.edu/pub/Linux/kernel/v2.0/
+
+ - The latest modules package (modules-2.0.0.tar.gz) from
+ ftp://sunsite.unc.edu/pub/Linux/kernel/modules-2.0.0.tar.gz
+
+ - The ISDN4Linux tools available from
+ ftp://ftp.franken.de/pub/isdn4linux/v2.0/isdn4k-utils-2.0.tar.gz
+ This package may fail to compile for you so you can alternatively
+ get a pre-compiled version from
+ ftp://ftp.spellcast.com/pub/drivers/isdn4linux/isdn4k-bin-2.0.tar.gz
+
+
+2. Basic Operations
+-------------------
+
+2.1 Unpacking and installing the driver
+
+ 1. As root, create a directory in a convenient place. We suggest
+ /usr/src/spellcaster.
+
+ 2. Unpack the archive with :
+ tar xzf sc-n.nn.tar.gz -C /usr/src/spellcaster
+
+ 3. Change directory to /usr/src/spellcaster
+
+ 4. Read the README and RELNOTES files.
+
+ 5. Run 'make' and if all goes well, run 'make install'.
+
+2.2 Read the man pages!!!
+
+Make sure you read the scctrl(8) and sc(4) manual pages before continuing
+any further. Type 'man 8 scctrl' and 'man 4 sc'.
+
+2.3 Installing the driver
+
+To install the driver, type '/sbin/insmod sc' as root. sc(4) details options
+you can specify but you shouldn't need to use any unless this doesn't work.
+
+Make sure the driver loaded and detected all of the adapters by typing
+'dmesg'.
+
+The driver can be configured so that it is loaded upon startup. To do this,
+edit the file "/etc/modules/'uname -f'/'uname -v'" and insert the driver name
+"sc" into this file.
+
+2.4 Removing the driver
+
+To remove the driver, delete any interfaces that may exist (see isdnctrl(8)
+for more on this) and then type '/sbin/rmmod sc'.
+
+2.5 What to do if it doesn't load
+
+If, when you try to install the driver, you get a message mentioning
+'register_isdn' then you do not have the ISDN4Linux system installed. Please
+make sure that ISDN support is configured in the kernel.
+
+If you get a message that says 'initialization of sc failed', then the
+driver failed to detect an adapter or failed to find resources needed such
+as a free IRQ line or shared memory segment. If you are sure there are free
+resources available, use the insmod options detailed in sc(4) to override
+the probing function.
+
+Upon testing, the following problem was noted, the driver would load without
+problems, but the board would not respond beyond that point. When a check was
+done with 'cat /proc/interrupts' the interrupt count for sc was 0. In the event
+of this problem, change the BIOS settings so that the interrupts in question are
+reserved for ISA use only.
+
+
+2.6 How to setup ISDN4Linux with the driver
+
+There are three main configurations which you can use with the driver:
+
+A) Basic HDLC connection
+B) PPP connection
+C) MLPPP connection
+
+It should be mentioned here that you may also use a tty connection if you
+desire. The Documentation directory of the isdn4linux subsystem offers good
+documentation on this feature.
+
+A) 10 steps to the establishment of a basic HDLC connection
+-----------------------------------------------------------
+
+- please open the isdn-hdlc file in the examples directory and follow along...
+
+ This file is a script used to configure a BRI ISDN TA to establish a
+ basic HDLC connection between its two channels. Two network
+ interfaces are created and two routes added between the channels.
+
+ i) using the isdnctrl utility, add an interface with "addif" and
+ name it "isdn0"
+ ii) add the outgoing and inbound telephone numbers
+ iii) set the Layer 2 protocol to hdlc
+ iv) set the eaz of the interface to be the phone number of that
+ specific channel
+ v) to turn the callback features off, set the callback to "off" and
+ the callback delay (cbdelay) to 0.
+ vi) the hangup timeout can be set to a specified number of seconds
+ vii) the hangup upon incoming call can be set on or off
+ viii) use the ifconfig command to bring up the network interface with
+ a specific IP address and point to point address
+ ix) add a route to the IP address through the isdn0 interface
+ x) a ping should result in the establishment of the connection
+
+
+B) Establishment of a PPP connection
+------------------------------------
+
+- please open the isdn-ppp file in the examples directory and follow along...
+
+ This file is a script used to configure a BRI ISDN TA to establish a
+ PPP connection between the two channels. The file is almost
+ identical to the HDLC connection example except that the packet
+ encapsulation type has to be set.
+
+ use the same procedure as in the HDLC connection from steps i) to
+ iii) then, after the Layer 2 protocol is set, set the encapsulation
+ "encap" to syncppp. With this done, the rest of the steps, iv) to x)
+ can be followed from above.
+
+ Then, the ipppd (ippp daemon) must be setup:
+
+ xi) use the ipppd function found in /sbin/ipppd to set the following:
+ xii) take out (minus) VJ compression and bsd compression
+ xiii) set the mru size to 2000
+ xiv) link the two /dev interfaces to the daemon
+
+NOTE: A "*" in the inbound telephone number specifies that a call can be
+accepted on any number.
+
+C) Establishment of a MLPPP connection
+--------------------------------------
+
+- please open the isdn-mppp file in the examples directory and follow along...
+
+ This file is a script used to configure a BRI ISDN TA to accept a
+ Multi Link PPP connection.
+
+ i) using the isdnctrl utility, add an interface with "addif" and
+ name it "ippp0"
+ ii) add the inbound telephone number
+ iii) set the Layer 2 protocol to hdlc and the Layer 3 protocol to
+ trans (transparent)
+ iv) set the packet encapsulation to syncppp
+ v) set the eaz of the interface to be the phone number of that
+ specific channel
+ vi) to turn the callback features off, set the callback to "off" and
+ the callback delay (cbdelay) to 0.
+ vi) the hangup timeout can be set to a specified number of seconds
+ vii) the hangup upon incoming call can be set on or off
+ viii) add a slave interface and name it "ippp32" for example
+ ix) set the similar parameters for the ippp32 interface
+ x) use the ifconfig command to bring-up the ippp0 interface with a
+ specific IP address and point to point address
+ xi) add a route to the IP address through the ippp0 interface
+ xii) use the ipppd function found in /sbin/ipppd to set the following:
+ xiii) take out (minus) bsd compression
+ xiv) set the mru size to 2000
+ xv) add (+) the multi-link function "+mp"
+ xvi) link the two /dev interfaces to the daemon
+
+NOTE: To use the MLPPP connection to dial OUT to a MLPPP connection, change
+the inbound telephone numbers to the outgoing telephone numbers of the MLPPP
+host.
+
+
+3. Beta Change Summaries and Miscellaneous Notes
+------------------------------------------------
+When using the "scctrl" utility to upload firmware revisions on the board,
+please note that the byte count displayed at the end of the operation may be
+different from the total number of bytes in the "dcbfwn.nn.sr" file. Please
+disregard the displayed byte count.
+
+It was noted that in Beta Release 1, the module would fail to load and result
+in a segmentation fault when 'insmod'ed. This problem was created when one of
+the isdn4linux parameters, (isdn_ctrl, data field) was filled in. In some
+cases, this data field was NULL, and was left unchecked, so when it was
+referenced... segv. The bug has been fixed around line 63-68 of event.c.
+
diff --git a/Documentation/isdn/README.syncppp b/Documentation/isdn/README.syncppp
new file mode 100644
index 000000000..27d260095
--- /dev/null
+++ b/Documentation/isdn/README.syncppp
@@ -0,0 +1,58 @@
+Some additional information for setting up a syncPPP
+connection using network interfaces.
+---------------------------------------------------------------
+
+You need one thing beside the isdn4linux package:
+
+ a patched pppd .. (I called it ipppd to show the difference)
+
+Compiling isdn4linux with sync PPP:
+-----------------------------------
+To compile isdn4linux with the sync PPP part, you have
+to answer the appropriate question when doing a "make config"
+Don't forget to load the slhc.o
+module before the isdn.o module, if VJ-compression support
+is not compiled into your kernel. (e.g if you have no PPP or
+CSLIP in the kernel)
+
+Using isdn4linux with sync PPP:
+-------------------------------
+Sync PPP is just another encapsulation for isdn4linux. The
+name to enable sync PPP encapsulation is 'syncppp' .. e.g:
+
+ /sbin/isdnctrl encap ippp0 syncppp
+
+The name of the interface is here 'ippp0'. You need
+one interface with the name 'ippp0' to saturate the
+ipppd, which checks the ppp version via this interface.
+Currently, all devices must have the name ipppX where
+'X' is a decimal value.
+
+To set up a PPP connection you need the ipppd .. You must start
+the ipppd once after installing the modules. The ipppd
+communicates with the isdn4linux link-level driver using the
+/dev/ippp0 to /dev/ippp15 devices. One ipppd can handle
+all devices at once. If you want to use two PPP connections
+at the same time, you have to connect the ipppd to two
+devices .. and so on.
+I've implemented one additional option for the ipppd:
+ 'useifip' will get (if set to not 0.0.0.0) the IP address
+ for the negotiation from the attached network-interface.
+(also: ipppd will try to negotiate pointopoint IP as remote IP)
+You must disable BSD-compression, this implementation can't
+handle compressed packets.
+
+Check the etc/rc.isdn.syncppp in the isdn4kernel-util package
+for an example setup script.
+
+To use the MPPP stuff, you must configure a slave device
+with isdn4linux. Now call the ipppd with the '+mp' option.
+To increase the number of links, you must use the
+'addlink' option of the isdnctrl tool. (rc.isdn.syncppp.MPPP is
+an example script)
+
+enjoy it,
+ michael
+
+
+
diff --git a/Documentation/isdn/README.x25 b/Documentation/isdn/README.x25
new file mode 100644
index 000000000..e561a77c4
--- /dev/null
+++ b/Documentation/isdn/README.x25
@@ -0,0 +1,184 @@
+
+X.25 support within isdn4linux
+==============================
+
+This is alpha/beta test code. Use it completely at your own risk.
+As new versions appear, the stuff described here might suddenly change
+or become invalid without notice.
+
+Keep in mind:
+
+You are using several new parts of the 2.2.x kernel series which
+have not been tested in a large scale. Therefore, you might encounter
+more bugs as usual.
+
+- If you connect to an X.25 neighbour not operated by yourself, ASK the
+ other side first. Be prepared that bugs in the protocol implementation
+ might result in problems.
+
+- This implementation has never wiped out my whole hard disk yet. But as
+ this is experimental code, don't blame me if that happened to you.
+ Backing up important data will never harm.
+
+- Monitor your isdn connections while using this software. This should
+ prevent you from undesired phone bills in case of driver problems.
+
+
+
+
+How to configure the kernel
+===========================
+
+The ITU-T (former CCITT) X.25 network protocol layer has been implemented
+in the Linux source tree since version 2.1.16. The isdn subsystem might be
+useful to run X.25 on top of ISDN. If you want to try it, select
+
+ "CCITT X.25 Packet Layer"
+
+from the networking options as well as
+
+ "ISDN Support" and "X.25 PLP on Top of ISDN"
+
+from the ISDN subsystem options when you configure your kernel for
+compilation. You currently also need to enable
+"Prompt for development and/or incomplete code/drivers" from the
+"Code maturity level options" menu. For the x25trace utility to work
+you also need to enable "Packet socket".
+
+For local testing it is also recommended to enable the isdnloop driver
+from the isdn subsystem's configuration menu.
+
+For testing, it is recommended that all isdn drivers and the X.25 PLP
+protocol are compiled as loadable modules. Like this, you can recover
+from certain errors by simply unloading and reloading the modules.
+
+
+
+What's it for? How to use it?
+=============================
+
+X.25 on top of isdn might be useful with two different scenarios:
+
+- You might want to access a public X.25 data network from your Linux box.
+ You can use i4l if you were physically connected to the X.25 switch
+ by an ISDN B-channel (leased line as well as dial up connection should
+ work).
+
+ This corresponds to ITU-T recommendation X.31 Case A (circuit-mode
+ access to PSPDN [packet switched public data network]).
+
+ NOTE: X.31 also covers a Case B (access to PSPDN via virtual
+ circuit / packet mode service). The latter mode (which in theory
+ also allows using the D-channel) is not supported by isdn4linux.
+ It should however be possible to establish such packet mode connections
+ with certain active isdn cards provided that the firmware supports X.31
+ and the driver exports this functionality to the user. Currently,
+ the AVM B1 driver is the only driver which does so. (It should be
+ possible to access D-channel X.31 with active AVM cards using the
+ CAPI interface of the AVM-B1 driver).
+
+- Or you might want to operate certain ISDN teleservices on your linux
+ box. A lot of those teleservices run on top of the ISO-8208
+ (DTE-DTE mode) network layer protocol. ISO-8208 is essentially the
+ same as ITU-T X.25.
+
+ Popular candidates of such teleservices are EUROfile transfer or any
+ teleservice applying ITU-T recommendation T.90.
+
+To use the X.25 protocol on top of isdn, just create an isdn network
+interface as usual, configure your own and/or peer's ISDN numbers,
+and choose x25iface encapsulation by
+
+ isdnctrl encap <iface-name> x25iface.
+
+Once encap is set like this, the device can be used by the X.25 packet layer.
+
+All the stuff needed for X.25 is implemented inside the isdn link
+level (mainly isdn_net.c and some new source files). Thus, it should
+work with every existing HL driver. I was able to successfully open X.25
+connections on top of the isdnloop driver and the hisax driver.
+"x25iface"-encapsulation bypasses demand dialing. Dialing will be
+initiated when the upper (X.25 packet) layer requests the lapb datalink to
+be established. But hangup timeout is still active. Whenever a hangup
+occurs, all existing X.25 connections on that link will be cleared
+It is recommended to use sufficiently large hangup-timeouts for the
+isdn interfaces.
+
+
+In order to set up a conforming protocol stack you also need to
+specify the proper l2_prot parameter:
+
+To operate in ISO-8208 X.25 DTE-DTE mode, use
+
+ isdnctrl l2_prot <iface-name> x75i
+
+To access an X.25 network switch via isdn (your linux box is the DTE), use
+
+ isdnctrl l2_prot <iface-name> x25dte
+
+To mimic an X.25 network switch (DCE side of the connection), use
+
+ isdnctrl l2_prot <iface-name> x25dce
+
+However, x25dte or x25dce is currently not supported by any real HL
+level driver. The main difference between x75i and x25dte/dce is that
+x25d[tc]e uses fixed lap_b addresses. With x75i, the side which
+initiates the isdn connection uses the DTE's lap_b address while the
+called side used the DCE's lap_b address. Thus, l2_prot x75i might
+probably work if you access a public X.25 network as long as the
+corresponding isdn connection is set up by you. At least one test
+was successful to connect via isdn4linux to an X.25 switch using this
+trick. At the switch side, a terminal adapter X.21 was used to connect
+it to the isdn.
+
+
+How to set up a test installation?
+==================================
+
+To test X.25 on top of isdn, you need to get
+
+- a recent version of the "isdnctrl" program that supports setting the new
+ X.25 specific parameters.
+
+- the x25-utils-2.X package from
+ ftp://ftp.hes.iki.fi/pub/ham/linux/ax25/x25utils-*
+ (don't confuse the x25-utils with the ax25-utils)
+
+- an application program that uses linux PF_X25 sockets (some are
+ contained in the x25-util package).
+
+Before compiling the user level utilities make sure that the compiler/
+preprocessor will fetch the proper kernel header files of this kernel
+source tree. Either make /usr/include/linux a symbolic link pointing to
+this kernel's include/linux directory or set the appropriate compiler flags.
+
+When all drivers and interfaces are loaded and configured you need to
+ifconfig the network interfaces up and add X.25-routes to them. Use
+the usual ifconfig tool.
+
+ifconfig <iface-name> up
+
+But a special x25route tool (distributed with the x25-util package)
+is needed to set up X.25 routes. I.e.
+
+x25route add 01 <iface-name>
+
+will cause all x.25 connections to the destination X.25-address
+"01" to be routed to your created isdn network interface.
+
+There are currently no real X.25 applications available. However, for
+tests, the x25-utils package contains a modified version of telnet
+and telnetd that uses X.25 sockets instead of tcp/ip sockets. You can
+use those for your first tests. Furthermore, you might check
+ftp://ftp.hamburg.pop.de/pub/LOCAL/linux/i4l-eft/ which contains some
+alpha-test implementation ("eftp4linux") of the EUROfile transfer
+protocol.
+
+The scripts distributed with the eftp4linux test releases might also
+provide useful examples for setting up X.25 on top of isdn.
+
+The x25-utility package also contains an x25trace tool that can be
+used to monitor X.25 packets received by the network interfaces.
+The /proc/net/x25* files also contain useful information.
+
+- Henner
diff --git a/Documentation/isdn/syncPPP.FAQ b/Documentation/isdn/syncPPP.FAQ
new file mode 100644
index 000000000..3257a4bc0
--- /dev/null
+++ b/Documentation/isdn/syncPPP.FAQ
@@ -0,0 +1,224 @@
+simple isdn4linux PPP FAQ .. to be continued .. not 'debugged'
+-------------------------------------------------------------------
+
+Q01: what's pppd, ipppd, syncPPP, asyncPPP ??
+Q02: error message "this system lacks PPP support"
+Q03: strange information using 'ifconfig'
+Q04: MPPP?? What's that and how can I use it ...
+Q05: I tried MPPP but it doesn't work
+Q06: can I use asynchronous PPP encapsulation with network devices
+Q07: A SunISDN machine can't connect to my i4l system
+Q08: I wanna talk to several machines, which need different configs
+Q09: Starting the ipppd, I get only error messages from i4l
+Q10: I wanna use dynamic IP address assignment
+Q11: I can't connect. How can I check where the problem is.
+Q12: How can I reduce login delay?
+
+-------------------------------------------------------------------
+
+Q01: pppd, ipppd, syncPPP, asyncPPP .. what is that ?
+ what should I use?
+A: The pppd is for asynchronous PPP .. asynchronous means
+ here, the framing is character based. (e.g when
+ using ttyI* or tty* devices)
+
+ The ipppd handles PPP packets coming in HDLC
+ frames (bit based protocol) ... The PPP driver
+ in isdn4linux pushes all IP packets direct
+ to the network layer and all PPP protocol
+ frames to the /dev/ippp* device.
+ So, the ipppd is a simple external network
+ protocol handler.
+
+ If you login into a remote machine using the
+ /dev/ttyI* devices and then enable PPP on the
+ remote terminal server -> use the 'old' pppd
+
+ If your remote side immediately starts to send
+ frames ... you probably connect to a
+ syncPPP machine .. use the network device part
+ of isdn4linux with the 'syncppp' encapsulation
+ and make sure, that the ipppd is running and
+ connected to at least one /dev/ippp*. Check the
+ isdn4linux manual on how to configure a network device.
+
+--
+
+Q02: when I start the ipppd .. I only get the
+ error message "this system lacks PPP support"
+A: check that at least the device 'ippp0' exists.
+ (you can check this e.g with the program 'ifconfig')
+ The ipppd NEEDS this device under THIS name ..
+ If this device doesn't exists, use:
+ isdnctrl addif ippp0
+ isdnctrl encap ippp0 syncppp
+ ... (see isdn4linux doc for more) ...
+A: Maybe you have compiled the ipppd with another
+ kernel source tree than the kernel you currently
+ run ...
+
+--
+
+Q03: when I list the netdevices with ifconfig I see, that
+ my ISDN interface has a HWaddr and IRQ=0 and Base
+ address = 0
+A: The device is a fake ethernet device .. ignore IRQ and baseaddr
+ You need the HWaddr only for ethernet encapsulation.
+
+--
+
+Q04: MPPP?? What's that and how can I use it ...
+
+A: MPPP or MP or MPP (Warning: MP is also an
+ acronym for 'Multi Processor') stands for
+ Multi Point to Point and means bundling
+ of several channels to one logical stream.
+ To enable MPPP negotiation you must call the
+ ipppd with the '+mp' option.
+ You must also configure a slave device for
+ every additional channel. (see the i4l manual
+ for more)
+ To use channel bundling you must first activate
+ the 'master' or initial call. Now you can add
+ the slave channels with the command:
+ isdnctrl addlink <device>
+ e.g:
+ isdnctrl addlink ippp0
+ This is different from other encapsulations of
+ isdn4linux! With syncPPP, there is no automatic
+ activation of slave devices.
+
+--
+
+Q05: I tried MPPP but it doesn't work .. the ipppd
+ writes in the debug log something like:
+ .. rcvd [0][proto=0x3d] c0 00 00 00 80 fd 01 01 00 0a ...
+ .. sent [0][LCP ProtRej id=0x2 00 3d c0 00 00 00 80 fd 01 ...
+
+A: you forgot to compile MPPP/RFC1717 support into the
+ ISDN Subsystem. Recompile with this option enabled.
+
+--
+
+Q06: can I use asynchronous PPP encapsulation
+ over the network interface of isdn4linux ..
+
+A: No .. that's not possible .. Use the standard
+ PPP package over the /dev/ttyI* devices. You
+ must not use the ipppd for this.
+
+--
+
+Q07: A SunISDN machine tries to connect my i4l system,
+ which doesn't work.
+ Checking the debug log I just saw garbage like:
+!![ ... fill in the line ... ]!!
+
+A: The Sun tries to talk asynchronous PPP ... i4l
+ can't understand this ... try to use the ttyI*
+ devices with the standard PPP/pppd package
+
+A: (from Alexanter Strauss: )
+!![ ... fill in mail ]!!
+
+--
+
+Q08: I wanna talk to remote machines, which need
+ a different configuration. The only way
+ I found to do this is to kill the ipppd and
+ start a new one with another config to connect
+ to the second machine.
+
+A: you must bind a network interface explicitly to
+ an ippp device, where you can connect a (for this
+ interface) individually configured ipppd.
+
+--
+
+Q09: When I start the ipppd I only get error messages
+ from the i4l driver ..
+
+A: When starting, the ipppd calls functions which may
+ trigger a network packet. (e.g gethostbyname()).
+ Without the ipppd (at this moment, it is not
+ fully started) we can't handle this network request.
+ Try to configure hostnames necessary for the ipppd
+ in your local /etc/hosts file or in a way, that
+ your system can resolve it without using an
+ isdn/ippp network-interface.
+
+--
+
+Q10: I wanna use dynamic IP address assignment ... How
+ must I configure the network device.
+
+A: At least you must have a route which forwards
+ a packet to the ippp network-interface to trigger
+ the dial-on-demand.
+ A default route to the ippp-interface will work.
+ Now you must choose a dummy IP address for your
+ interface.
+ If for some reason you can't set the default
+ route to the ippp interface, you may take any
+ address of the subnet from which you expect your
+ dynamic IP number and set a 'network route' for
+ this subnet to the ippp interface.
+ To allow overriding of the dummy address you
+ must call the ipppd with the 'ipcp-accept-local' option.
+
+A: You must know, how the ipppd gets the addresses it wanna
+ configure. If you don't give any option, the ipppd
+ tries to negotiate the local host address!
+ With the option 'noipdefault' it requests an address
+ from the remote machine. With 'useifip' it gets the
+ addresses from the net interface. Or you set the address
+ on the option line with the <a.b.c.d:e.f.g.h> option.
+ Note: the IP address of the remote machine must be configured
+ locally or the remote machine must send it in an IPCP request.
+ If your side doesn't know the IP address after negotiation, it
+ closes the connection!
+ You must allow overriding of address with the 'ipcp-accept-*'
+ options, if you have set your own or the remote address
+ explicitly.
+
+A: Maybe you try these options .. e.g:
+
+ /sbin/ipppd :$REMOTE noipdefault /dev/ippp0
+
+ where REMOTE must be the address of the remote machine (the
+ machine, which gives you your address)
+
+--
+
+Q11: I can't connect. How can I check where the problem is.
+
+A: A good help log is the debug output from the ipppd...
+ Check whether you can find there:
+ - only a few LCP-conf-req SENT messages (less then 10)
+ and then a Term-REQ:
+ -> check whether your ISDN card is well configured
+ it seems, that your machine doesn't dial
+ (IRQ,IO,Proto, etc problems)
+ Configure your ISDN card to print debug messages and
+ check the /dev/isdnctrl output next time. There
+ you can see, whether there is activity on the card/line.
+ - there are at least a few RECV messages in the log:
+ -> fine: your card is dialing and your remote machine
+ tries to talk with you. Maybe only a missing
+ authentication. Check your ipppd configuration again.
+ - the ipppd exits for some reason:
+ -> not good ... check /var/adm/syslog and /var/adm/daemon.
+ Could be a bug in the ipppd.
+
+--
+
+Q12: How can I reduce login delay?
+
+A: Log a login session ('debug' log) and check which options
+ your remote side rejects. Next time configure your ipppd
+ to not negotiate these options. Another 'side effect' is, that
+ this increases redundancy. (e.g your remote side is buggy and
+ rejects options in a wrong way).
+
+
+
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
new file mode 100644
index 000000000..b61885c35
--- /dev/null
+++ b/Documentation/ja_JP/HOWTO
@@ -0,0 +1,644 @@
+NOTE:
+This is a version of Documentation/HOWTO translated into Japanese.
+This document is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
+and the JF Project team <www.linux.or.jp/JF>.
+If you find any difference between this document and the original file
+or a problem with the translation,
+please contact the maintainer of this file or JF project.
+
+Please also note that the purpose of this file is to be easier to read
+for non English (read: Japanese) speakers and is not intended as a
+fork. So if you have any comments or updates for this file, please try
+to update the original English file first.
+
+Last Updated: 2013/07/19
+==================================
+これは、
+linux-3.10/Documentation/HOWTO
+の和訳です。
+
+翻訳団体: JF プロジェクト < http://linuxjf.sourceforge.jp/ >
+翻訳日: 2013/7/19
+翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
+校正者: 松倉さん <nbh--mats at nifty dot com>
+ 小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
+ 武井伸光さん、<takei at webmasters dot gr dot jp>
+ かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp>
+ 野口さん (Kenji Noguchi) <tokyo246 at gmail dot com>
+ 河内さん (Takayoshi Kochi) <t-kochi at bq dot jp dot nec dot com>
+ 岩本さん (iwamoto) <iwamoto.kn at ncos dot nec dot co dot jp>
+ 内田さん (Satoshi Uchida) <s-uchida at ap dot jp dot nec dot com>
+==================================
+
+Linux カーネル開発のやり方
+-------------------------------
+
+これは上のトピック( Linux カーネル開発のやり方)の重要な事柄を網羅した
+ドキュメントです。ここには Linux カーネル開発者になるための方法と
+Linux カーネル開発コミュニティと共に活動するやり方を学ぶ方法が含まれて
+います。カーネルプログラミングに関する技術的な項目に関することは何も含
+めないようにしていますが、カーネル開発者となるための正しい方向に向かう
+手助けになります。
+
+もし、このドキュメントのどこかが古くなっていた場合には、このドキュメン
+トの最後にリストしたメンテナにパッチを送ってください。
+
+はじめに
+---------
+
+あなたは Linux カーネルの開発者になる方法を学びたいのでしょうか? そ
+れともあなたは上司から「このデバイスの Linux ドライバを書くように」と
+言われているのでしょうか? 
+この文書の目的は、あなたが踏むべき手順と、コミュニティと一緒にうまく働
+くヒントを書き下すことで、あなたが知るべき全てのことを教えることです。
+また、このコミュニティがなぜ今うまくまわっているのかという理由の一部も
+説明しようと試みています。
+
+
+カーネルは 少量のアーキテクチャ依存部分がアセンブリ言語で書かれている
+以外は大部分は C 言語で書かれています。C言語をよく理解していることはカー
+ネル開発者には必要です。アーキテクチャ向けの低レベル部分の開発をするの
+でなければ、(どんなアーキテクチャでも)アセンブリ(訳注: 言語)は必要あり
+ません。以下の本は、C 言語の十分な知識や何年もの経験に取って代わるもの
+ではありませんが、少なくともリファレンスとしては良い本です。
+ - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
+ -『プログラミング言語C第2版』(B.W. カーニハン/D.M. リッチー著 石田晴久訳) [共立出版]
+ - "Practical C Programming" by Steve Oualline [O'Reilly]
+ - 『C実践プログラミング第3版』(Steve Oualline著 望月康司監訳 谷口功訳) [オライリージャパン]
+ - "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
+ - 『新・詳説 C 言語 H&S リファレンス』
+ (サミュエル P ハービソン/ガイ L スティール共著 斉藤 信男監訳)[ソフトバンク]
+
+カーネルは GNU C と GNU ツールチェインを使って書かれています。カーネル
+は ISO C89 仕様に準拠して書く一方で、標準には無い言語拡張を多く使って
+います。カーネルは標準 C ライブラリとは関係がないといった、C 言語フリー
+スタンディング環境です。そのため、C の標準で使えないものもあります。任
+意の long long の除算や浮動小数点は使えません。
+ときどき、カーネルがツールチェインや C 言語拡張に置いている前提がどう
+なっているのかわかりにくいことがあり、また、残念なことに決定的なリファ
+レンスは存在しません。情報を得るには、gcc の info ページ( info gcc )を
+見てください。
+
+あなたは既存の開発コミュニティと一緒に作業する方法を学ぼうとしているこ
+とに留意してください。そのコミュニティは、コーディング、スタイル、
+開発手順について高度な標準を持つ、多様な人の集まりです。
+地理的に分散した大規模なチームに対してもっともうまくいくとわかったこと
+をベースにしながら、これらの標準は長い時間をかけて築かれてきました。
+これらはきちんと文書化されていますから、事前にこれらの標準についてでき
+るだけたくさん学んでください。また皆があなたやあなたの会社のやり方に合わ
+せてくれると思わないでください。
+
+法的問題
+------------
+
+Linux カーネルのソースコードは GPL ライセンスの下でリリースされていま
+す。ライセンスの詳細については、ソースツリーのメインディレクトリに存在
+する、COPYING のファイルを見てください。もしライセンスについてさらに質
+問があれば、Linux Kernel メーリングリストに質問するのではなく、どうぞ
+法律家に相談してください。メーリングリストの人達は法律家ではなく、法的
+問題については彼らの声明はあてにするべきではありません。
+
+GPL に関する共通の質問や回答については、以下を参照してください。
+ http://www.gnu.org/licenses/gpl-faq.html
+
+ドキュメント
+------------
+
+Linux カーネルソースツリーは幅広い範囲のドキュメントを含んでおり、それ
+らはカーネルコミュニティと会話する方法を学ぶのに非常に貴重なものです。
+新しい機能がカーネルに追加される場合、その機能の使い方について説明した
+新しいドキュメントファイルも追加することを勧めます。
+カーネルの変更が、カーネルがユーザ空間に公開しているインターフェイスの
+変更を引き起こす場合、その変更を説明するマニュアルページのパッチや情報
+をマニュアルページのメンテナ mtk.manpages@gmail.com に送り、CC を
+linux-api@ver.kernel.org に送ることを勧めます。
+
+以下はカーネルソースツリーに含まれている読んでおくべきファイルの一覧で
+す-
+
+ README
+ このファイルは Linuxカーネルの簡単な背景とカーネルを設定(訳注
+ configure )し、生成(訳注 build )するために必要なことは何かが書かれ
+ ています。カーネルに関して初めての人はここからスタートすると良いで
+ しょう。
+
+ Documentation/Changes
+ このファイルはカーネルをうまく生成(訳注 build )し、走らせるのに最
+ 小限のレベルで必要な数々のソフトウェアパッケージの一覧を示してい
+ ます。
+
+ Documentation/CodingStyle
+ これは Linux カーネルのコーディングスタイルと背景にある理由を記述
+ しています。全ての新しいコードはこのドキュメントにあるガイドライン
+ に従っていることを期待されています。大部分のメンテナはこれらのルー
+ ルに従っているものだけを受け付け、多くの人は正しいスタイルのコード
+ だけをレビューします。
+
+ Documentation/SubmittingPatches
+ Documentation/SubmittingDrivers
+ これらのファイルには、どうやってうまくパッチを作って投稿するかに
+ ついて非常に詳しく書かれており、以下を含みます(これだけに限らない
+ けれども)
+ - Email に含むこと
+ - Email の形式
+ - だれに送るか
+ これらのルールに従えばうまくいくことを保証することではありません
+ が (すべてのパッチは内容とスタイルについて精査を受けるので)、
+ ルールに従わなければ間違いなくうまくいかないでしょう。
+
+ この他にパッチを作る方法についてのよくできた記述は-
+
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+ "Linux kernel patch submission format"
+ http://linux.yyz.us/patch-format.html
+
+ Documentation/stable_api_nonsense.txt
+ このファイルはカーネルの中に不変のAPIを持たないことにした意識的な
+ 決断の背景にある理由について書かれています。以下のようなことを含
+ んでいます-
+ - サブシステムとの間に層を作ること(コンパチビリティのため?)
+ - オペレーティングシステム間のドライバの移植性
+ - カーネルソースツリーの素早い変更を遅らせる(もしくは素早い変更
+ を妨げる)
+ このドキュメントは Linux 開発の思想を理解するのに非常に重要です。
+ そして、他のOSでの開発者が Linux に移る時にとても重要です。
+
+ Documentation/SecurityBugs
+ もし Linux カーネルでセキュリティ問題を発見したように思ったら、こ
+ のドキュメントのステップに従ってカーネル開発者に連絡し、問題解決を
+ 支援してください。
+
+ Documentation/ManagementStyle
+ このドキュメントは Linux カーネルのメンテナ達がどう行動するか、
+ 彼らの手法の背景にある共有されている精神について記述しています。こ
+ れはカーネル開発の初心者なら(もしくは、単に興味があるだけの人でも)
+ 重要です。なぜならこのドキュメントは、カーネルメンテナ達の独特な
+ 行動についての多くの誤解や混乱を解消するからです。
+
+ Documentation/stable_kernel_rules.txt
+ このファイルはどのように stable カーネルのリリースが行われるかのルー
+ ルが記述されています。そしてこれらのリリースの中のどこかで変更を取
+ り入れてもらいたい場合に何をすれば良いかが示されています。
+
+ Documentation/kernel-docs.txt
+  カーネル開発に付随する外部ドキュメントのリストです。もしあなたが
+ 探しているものがカーネル内のドキュメントでみつからなかった場合、
+ このリストをあたってみてください。
+
+ Documentation/applying-patches.txt
+ パッチとはなにか、パッチをどうやって様々なカーネルの開発ブランチに
+ 適用するのかについて正確に記述した良い入門書です。
+
+カーネルはソースコードから自動的に生成可能な多数のドキュメントを自分自
+身でもっています。これにはカーネル内 API のすべての記述や、どう正しく
+ロックをかけるかの規則が含まれます。このドキュメントは
+Documentation/DocBook/ ディレクトリに作られ、以下のように
+ make pdfdocs
+ make psdocs
+ make htmldocs
+ make mandocs
+コマンドを実行するとメインカーネルのソースディレクトリから
+それぞれ、PDF, Postscript, HTML, man page の形式で生成されます。
+
+カーネル開発者になるには
+---------------------------
+
+もしあなたが、Linux カーネル開発について何も知らないならば、
+KernelNewbies プロジェクトを見るべきです
+ http://kernelnewbies.org
+
+このサイトには役に立つメーリングリストがあり、基本的なカーネル開発に関
+するほとんどどんな種類の質問もできます (既に回答されているようなことを
+聞く前にまずはアーカイブを調べてください)。
+またここには、リアルタイムで質問を聞くことができる IRC チャネルや、Linux
+カーネルの開発に関して学ぶのに便利なたくさんの役に立つドキュメントがあ
+ります。
+
+web サイトには、コードの構成、サブシステム、現在存在するプロジェクト(ツ
+リーにあるもの無いものの両方)の基本的な管理情報があります。
+ここには、また、カーネルのコンパイルのやり方やパッチの当て方などの間接
+的な基本情報も記述されています。
+
+あなたがどこからスタートして良いかわからないが、Linux カーネル開発コミュ
+ニティに参加して何かすることをさがしている場合には、Linux kernel
+Janitor's プロジェクトにいけば良いでしょう -
+ http://kernelnewbies.org/KernelJanitors
+ここはそのようなスタートをするのにうってつけの場所です。ここには、
+Linux カーネルソースツリーの中に含まれる、きれいにし、修正しなければな
+らない、単純な問題のリストが記述されています。このプロジェクトに関わる
+開発者と一緒に作業することで、あなたのパッチを Linuxカーネルツリーに入
+れるための基礎を学ぶことができ、そしてもしあなたがまだアイディアを持っ
+ていない場合には、次にやる仕事の方向性が見えてくるかもしれません。
+
+もしあなたが、すでにひとまとまりコードを書いていて、カーネルツリーに入
+れたいと思っていたり、それに関する適切な支援を求めたい場合、カーネル
+メンターズプロジェクトはそのような皆さんを助けるためにできました。
+ここにはメーリングリストがあり、以下から参照できます
+ http://selenic.com/mailman/listinfo/kernel-mentors
+
+実際に Linux カーネルのコードについて修正を加える前に、どうやってその
+コードが動作するのかを理解することが必要です。そのためには、特別なツー
+ルの助けを借りてでも、それを直接よく読むことが最良の方法です(ほとんど
+のトリッキーな部分は十分にコメントしてありますから)。そういうツールで
+特におすすめなのは、Linux クロスリファレンスプロジェクトです。これは、
+自己参照方式で、索引がついた web 形式で、ソースコードを参照することが
+できます。この最新の素晴しいカーネルコードのリポジトリは以下で見つかり
+ます-
+ http://lxr.linux.no/+trees
+
+開発プロセス
+-----------------------
+
+Linux カーネルの開発プロセスは現在幾つかの異なるメインカーネル「ブラン
+チ」と多数のサブシステム毎のカーネルブランチから構成されます。
+これらのブランチとは-
+ - メインの 3.x カーネルツリー
+ - 3.x.y -stable カーネルツリー
+ - 3.x -git カーネルパッチ
+ - サブシステム毎のカーネルツリーとパッチ
+ - 統合テストのための 3.x -next カーネルツリー
+
+3.x カーネルツリー
+-----------------
+
+3.x カーネルは Linus Torvalds によってメンテナンスされ、kernel.org
+の pub/linux/kernel/v3.x/ ディレクトリに存在します。この開発プロセスは
+以下のとおり-
+
+ - 新しいカーネルがリリースされた直後に、2週間の特別期間が設けられ、
+ この期間中に、メンテナ達は Linus に大きな差分を送ることができます。
+ このような差分は通常 -next カーネルに数週間含まれてきたパッチです。
+ 大きな変更は git(カーネルのソース管理ツール、詳細は
+ http://git-scm.com/ 参照) を使って送るのが好ましいやり方ですが、パッ
+ チファイルの形式のまま送るのでも十分です。
+
+ - 2週間後、-rc1 カーネルがリリースされ、この後にはカーネル全体の安定
+ 性に影響をあたえるような新機能は含まない類のパッチしか取り込むこと
+ はできません。新しいドライバ(もしくはファイルシステム)のパッチは
+ -rc1 の後で受け付けられることもあることを覚えておいてください。な
+ ぜなら、変更が独立していて、追加されたコードの外の領域に影響を与え
+ ない限り、退行のリスクは無いからです。-rc1 がリリースされた後、
+ Linus へパッチを送付するのに git を使うこともできますが、パッチは
+ レビューのために、パブリックなメーリングリストへも同時に送る必要が
+ あります。
+
+ - 新しい -rc は Linus が、最新の git ツリーがテスト目的であれば十分
+ に安定した状態にあると判断したときにリリースされます。目標は毎週新
+ しい -rc カーネルをリリースすることです。
+
+ - このプロセスはカーネルが 「準備ができた」と考えられるまで継続しま
+ す。このプロセスはだいたい 6週間継続します。
+
+ - 各リリースでの既知の後戻り問題(regression: このリリースの中で新規
+ に作り込まれた問題を指す) はその都度 Linux-kernel メーリングリスト
+ に投稿されます。ゴールとしては、カーネルが 「準備ができた」と宣言
+ する前にこのリストの長さをゼロに減らすことですが、現実には、数個の
+ 後戻り問題がリリース時にたびたび残ってしまいます。
+
+Andrew Morton が Linux-kernel メーリングリストにカーネルリリースについ
+て書いたことをここで言っておくことは価値があります-
+ 「カーネルがいつリリースされるかは誰も知りません。なぜなら、これは現
+ 実に認識されたバグの状況によりリリースされるのであり、前もって決めら
+ れた計画によってリリースされるものではないからです。」
+
+3.x.y -stable カーネルツリー
+---------------------------
+
+バージョン番号が3つの数字に分かれているカーネルは -stable カーネルです。
+これには、3.x カーネルで見つかったセキュリティ問題や重大な後戻りに対
+する比較的小さい重要な修正が含まれます。
+
+これは、開発/実験的バージョンのテストに協力することに興味が無く、
+最新の安定したカーネルを使いたいユーザに推奨するブランチです。
+
+もし、3.x.y カーネルが存在しない場合には、番号が一番大きい 3.x が
+最新の安定版カーネルです。
+
+3.x.y は "stable" チーム <stable@vger.kernel.org> でメンテされており、必
+要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差し迫っ
+た問題がなければもう少し長くなることもあります。セキュリティ関連の問題
+の場合はこれに対してだいたいの場合、すぐにリリースがされます。
+
+カーネルツリーに入っている、Documentation/stable_kernel_rules.txt ファ
+イルにはどのような種類の変更が -stable ツリーに受け入れ可能か、またリ
+リースプロセスがどう動くかが記述されています。
+
+3.x -git パッチ
+------------------
+
+git リポジトリで管理されているLinus のカーネルツリーの毎日のスナップ
+ショットがあります。(だから -git という名前がついています)。これらのパッ
+チはおおむね毎日リリースされており、Linus のツリーの現状を表します。こ
+れは -rc カーネルと比べて、パッチが大丈夫かどうかも確認しないで自動的
+に生成されるので、より実験的です。
+
+サブシステム毎のカーネルツリーとパッチ
+-------------------------------------------
+
+それぞれのカーネルサブシステムのメンテナ達は --- そして多くのカーネル
+サブシステムの開発者達も --- 各自の最新の開発状況をソースリポジトリに
+公開しています。そのため、自分とは異なる領域のカーネルで何が起きている
+かを他の人が見られるようになっています。開発が早く進んでいる領域では、
+開発者は自身の投稿がどのサブシステムカーネルツリーを元にしているか質問
+されるので、その投稿とすでに進行中の他の作業との衝突が避けられます。
+
+大部分のこれらのリポジトリは git ツリーです。しかしその他の SCM や
+quilt シリーズとして公開されているパッチキューも使われています。これら
+のサブシステムリポジトリのアドレスは MAINTAINERS ファイルにリストされ
+ています。これらの多くは http://git.kernel.org/ で参照することができま
+す。
+
+提案されたパッチがこのようなサブシステムツリーにコミットされる前に、メー
+リングリストで事前にレビューにかけられます(以下の対応するセクションを
+参照)。いくつかのカーネルサブシステムでは、このレビューは patchwork
+というツールによって追跡されます。Patchwork は web インターフェイスに
+よってパッチ投稿の表示、パッチへのコメント付けや改訂などができ、そして
+メンテナはパッチに対して、レビュー中、受付済み、拒否というようなマーク
+をつけることができます。大部分のこれらの patchwork のサイトは
+http://patchwork.kernel.org/ でリストされています。
+
+統合テストのための 3.x -next カーネルツリー
+---------------------------------------------
+
+サブシステムツリーの更新内容がメインラインの 3.x ツリーにマージされ
+る前に、それらは統合テストされる必要があります。この目的のため、実質的
+に全サブシステムツリーからほぼ毎日プルされてできる特別なテスト用のリ
+ポジトリが存在します-
+ http://git.kernel.org/?p=linux/kernel/git/next/linux-next.git
+ http://linux.f-seidel.de/linux-next/pmwiki/
+
+このやり方によって、-next カーネルは次のマージ機会でどんなものがメイン
+ラインカーネルにマージされるか、おおまかなの展望を提供します。-next
+カーネルの実行テストを行う冒険好きなテスターは大いに歓迎されます
+
+バグレポート
+-------------
+
+bugzilla.kernel.org は Linux カーネル開発者がカーネルのバグを追跡する
+場所です。ユーザは見つけたバグの全てをこのツールで報告すべきです。
+どう kernel bugzilla を使うかの詳細は、以下を参照してください-
+ http://bugzilla.kernel.org/page.cgi?id=faq.html
+メインカーネルソースディレクトリにあるファイル REPORTING-BUGS はカーネ
+ルバグらしいものについてどうレポートするかの良いテンプレートであり、問
+題の追跡を助けるためにカーネル開発者にとってどんな情報が必要なのかの詳
+細が書かれています。
+
+バグレポートの管理
+-------------------
+
+あなたのハッキングのスキルを訓練する最高の方法のひとつに、他人がレポー
+トしたバグを修正することがあります。あなたがカーネルをより安定化させる
+こに寄与するということだけでなく、あなたは 現実の問題を修正することを
+学び、自分のスキルも強化でき、また他の開発者があなたの存在に気がつき
+ます。バグを修正することは、多くの開発者の中から自分が功績をあげる最善
+の道です、なぜなら多くの人は他人のバグの修正に時間を浪費することを好ま
+ないからです。
+
+すでにレポートされたバグのために仕事をするためには、
+http://bugzilla.kernel.org に行ってください。もし今後のバグレポートに
+ついてアドバイスを受けたいのであれば、bugme-new メーリングリスト(新し
+いバグレポートだけがここにメールされる) または bugme-janitor メーリン
+グリスト(bugzilla の変更毎にここにメールされる)を購読できます。
+
+ http://lists.linux-foundation.org/mailman/listinfo/bugme-new
+ http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
+
+メーリングリスト
+-------------
+
+上のいくつかのドキュメントで述べていますが、コアカーネル開発者の大部分
+は Linux kernel メーリングリストに参加しています。このリストの登録/脱
+退の方法については以下を参照してください-
+ http://vger.kernel.org/vger-lists.html#linux-kernel
+
+このメーリングリストのアーカイブは web 上の多数の場所に存在します。こ
+れらのアーカイブを探すにはサーチエンジンを使いましょう。例えば-
+ http://dir.gmane.org/gmane.linux.kernel
+
+リストに投稿する前にすでにその話題がアーカイブに存在するかどうかを検索
+することを是非やってください。多数の事がすでに詳細に渡って議論されて
+おり、アーカイブにのみ記録されています。
+
+大部分のカーネルサブシステムも自分の個別の開発を実施するメーリングリス
+トを持っています。個々のグループがどんなリストを持っているかは、
+MAINTAINERS ファイルにリストがありますので参照してください。
+
+多くのリストは kernel.org でホストされています。これらの情報は以下にあ
+ります-
+ http://vger.kernel.org/vger-lists.html
+
+メーリングリストを使う場合、良い行動習慣に従うようにしましょう。
+少し安っぽいが、以下の URL は上のリスト(や他のリスト)で会話する場合の
+シンプルなガイドラインを示しています-
+ http://www.albion.com/netiquette/
+
+もし複数の人があなたのメールに返事をした場合、CC: で受ける人のリストは
+だいぶ多くなるでしょう。良い理由がない場合、CC: リストから誰かを削除を
+しないように、また、メーリングリストのアドレスだけにリプライすることの
+ないようにしましょう。1つは送信者から、もう1つはリストからのように、メー
+ルを2回受けることになってもそれに慣れ、しゃれたメールヘッダーを追加し
+てこの状態を変えようとしないように。人々はそのようなことは好みません。
+
+今までのメールでのやりとりとその間のあなたの発言はそのまま残し、
+"John Kernlehacker wrote ...:" の行をあなたのリプライの先頭行にして、
+メールの先頭でなく、各引用行の間にあなたの言いたいことを追加するべきで
+す。
+
+もしパッチをメールに付ける場合は、Documentaion/SubmittingPatches に提
+示されているように、それは プレーンな可読テキストにすることを忘れない
+ようにしましょう。カーネル開発者は 添付や圧縮したパッチを扱いたがりま
+せん-
+彼らはあなたのパッチの行毎にコメントを入れたいので、そのためにはそうす
+るしかありません。あなたのメールプログラムが空白やタブを圧縮しないよう
+に確認した方が良いです。最初の良いテストとしては、自分にメールを送って
+みて、そのパッチを自分で当ててみることです。もしそれがうまく行かないな
+ら、あなたのメールプログラムを直してもらうか、正しく動くように変えるべ
+きです。
+
+とりわけ、他の登録者に対する尊敬を表すようにすることを覚えておいてくだ
+さい。
+
+コミュニティと共に働くこと
+--------------------------
+
+カーネルコミュニティのゴールは可能なかぎり最高のカーネルを提供すること
+です。あなたがパッチを受け入れてもらうために投稿した場合、それは、技術
+的メリットだけがレビューされます。その際、あなたは何を予想すべきでしょ
+うか?
+ - 批判
+ - コメント
+ - 変更の要求
+ - パッチの正当性の証明要求
+ - 沈黙
+
+思い出してください、ここはあなたのパッチをカーネルに入れる話です。あ
+なたは、あなたのパッチに対する批判とコメントを受け入れるべきで、それら
+を技術的レベルで評価して、パッチを再作成するか、なぜそれらの変更をすべ
+きでないかを明確で簡潔な理由の説明を提供してください。
+もし、あなたのパッチに何も反応がない場合、たまにはメールの山に埋もれて
+見逃され、あなたの投稿が忘れられてしまうこともあるので、数日待って再度
+投稿してください。
+
+あなたがやるべきでないものは?
+ - 質問なしにあなたのパッチが受け入れられると想像すること
+ - 守りに入ること
+ - コメントを無視すること
+ - 要求された変更を何もしないでパッチを出し直すこと
+
+可能な限り最高の技術的解決を求めているコミュニティでは、パッチがどのく
+らい有益なのかについては常に異なる意見があります。あなたは協調的である
+べきですし、また、あなたのアイディアをカーネルに対してうまく合わせるよ
+うにすることが望まれています。もしくは、最低限あなたのアイディアがそれ
+だけの価値があるとすすんで証明するようにしなければなりません。
+正しい解決に向かって進もうという意志がある限り、間違うことがあっても許
+容されることを忘れないでください。
+
+あなたの最初のパッチに単に 1ダースもの修正を求めるリストの返答になるこ
+とも普通のことです。これはあなたのパッチが受け入れられないということで
+は *ありません*、そしてあなた自身に反対することを意味するのでも *ありま
+せん*。単に自分のパッチに対して指摘された問題を全て修正して再送すれば
+良いのです。
+
+
+カーネルコミュニティと企業組織のちがい
+-----------------------------------------------------------------
+
+カーネルコミュニティは大部分の伝統的な会社の開発環境とは異ったやり方で
+動いています。以下は問題を避けるためにできると良いことのリストです-
+
+ あなたの提案する変更について言うときのうまい言い方:
+
+ - "これは複数の問題を解決します"
+ - "これは2000行のコードを削除します"
+ - "以下のパッチは、私が言おうとしていることを説明するものです"
+ - "私はこれを5つの異なるアーキテクチャでテストしたのですが..."
+ - "以下は一連の小さなパッチ群ですが..."
+ - "これは典型的なマシンでの性能を向上させます.."
+
+ やめた方が良い悪い言い方:
+
+ - このやり方で AIX/ptx/Solaris ではできたので、できるはずだ
+ - 私はこれを20年もの間やってきた、だから
+ - これは、私の会社が金儲けをするために必要だ
+ - これは我々のエンタープライズ向け商品ラインのためである
+ - これは 私が自分のアイディアを記述した、1000ページの設計資料である
+ - 私はこれについて、6ケ月作業している。
+ - 以下は ... に関する5000行のパッチです
+ - 私は現在のぐちゃぐちゃを全部書き直した、それが以下です...
+ - 私は〆切がある、そのためこのパッチは今すぐ適用される必要がある
+
+カーネルコミュニティが大部分の伝統的なソフトウェアエンジニアリングの労
+働環境と異なるもう一つの点は、やりとりに顔を合わせないということです。
+email と irc を第一のコミュニケーションの形とする一つの利点は、性別や
+民族の差別がないことです。Linux カーネルの職場環境は女性や少数民族を受
+容します。なぜなら、email アドレスによってのみあなたが認識されるからで
+す。
+国際的な側面からも活動領域を均等にするようにします。なぜならば、あなた
+は人の名前で性別を想像できないからです。ある男性が アンドレアという名
+前で、女性の名前は パット かもしれません (訳注 Andrea は米国では女性、
+それ以外(欧州など)では男性名として使われることが多い。同様に、Pat は
+Patricia (主に女性名)や Patrick (主に男性名)の略称)。
+Linux カーネルの活動をして、意見を表明したことがある大部分の女性は、前
+向きな経験をもっています。
+
+言葉の壁は英語が得意でない一部の人には問題になります。
+メーリングリストの中できちんとアイディアを交換するには、相当うまく英語
+を操れる必要があることもあります。そのため、あなたは自分のメール
+を送る前に英語で意味が通じているかをチェックすることをお薦めします。
+
+変更を分割する
+---------------------
+
+Linux カーネルコミュニティは、一度に大量のコードの塊を喜んで受容するこ
+とはありません。変更は正確に説明される必要があり、議論され、小さい、個
+別の部分に分割する必要があります。これはこれまで多くの会社がやり慣れて
+きたことと全く正反対のことです。あなたのプロポーザルは、開発プロセスのと
+ても早い段階から紹介されるべきです。そうすれば あなたは自分のやってい
+ることにフィードバックを得られます。これは、コミュニティからみれば、あ
+なたが彼らと一緒にやっているように感じられ、単にあなたの提案する機能の
+ゴミ捨て場として使っているのではない、と感じられるでしょう。
+しかし、一度に 50 もの email をメーリングリストに送りつけるようなことは
+やってはいけません、あなたのパッチ群はいつもどんな時でもそれよりは小さ
+くなければなりません。
+
+パッチを分割する理由は以下です-
+
+1) 小さいパッチはあなたのパッチが適用される見込みを大きくします、カー
+ ネルの人達はパッチが正しいかどうかを確認する時間や労力をかけないか
+ らです。5行のパッチはメンテナがたった1秒見るだけで適用できます。
+ しかし、500行のパッチは、正しいことをレビューするのに数時間かかるか
+ もしれません(時間はパッチのサイズなどにより指数関数に比例してかかり
+ ます)
+
+ 小さいパッチは何かあったときにデバッグもとても簡単になります。パッ
+ チを1個1個取り除くのは、とても大きなパッチを当てた後に(かつ、何かお
+ かしくなった後で)解剖するのに比べればとても簡単です。
+
+2) 小さいパッチを送るだけでなく、送るまえに、書き直して、シンプルにす
+ る(もしくは、単に順番を変えるだけでも)ことも、とても重要です。
+
+以下はカーネル開発者の Al Viro のたとえ話です:
+
+ "生徒の数学の宿題を採点する先生のことを考えてみてください、先
+ 生は生徒が解に到達するまでの試行錯誤を見たいとは思わないでしょ
+ う。先生は簡潔な最高の解を見たいのです。良い生徒はこれを知って
+ おり、そして最終解の前の中間作業を提出することは決してないので
+ す"
+
+ カーネル開発でもこれは同じです。メンテナ達とレビューア達は、
+ 問題を解決する解の背後になる思考プロセスを見たいとは思いません。
+ 彼らは単純であざやかな解決方法を見たいのです。
+
+あざやかな解を説明するのと、コミュニティと共に仕事をし、未解決の仕事を
+議論することのバランスをキープするのは難しいかもしれません。
+ですから、開発プロセスの早期段階で改善のためのフィードバックをもらうよ
+うにするのも良いですが、変更点を小さい部分に分割して全体ではまだ完成し
+ていない仕事を(部分的に)取り込んでもらえるようにすることも良いことです。
+
+また、でき上がっていないものや、"将来直す" ようなパッチを、本流に含め
+てもらうように送っても、それは受け付けられないことを理解してください。
+
+あなたの変更を正当化する
+-------------------
+
+あなたのパッチを分割するのと同時に、なぜその変更を追加しなければならな
+いかを Linux コミュニティに知らせることはとても重要です。新機能は必要
+性と有用性で正当化されなければなりません。
+
+あなたの変更の説明
+--------------------
+
+あなたのパッチを送付する場合には、メールの中のテキストで何を言うかにつ
+いて、特別に注意を払ってください。この情報はパッチの ChangeLog に使わ
+れ、いつも皆がみられるように保管されます。これは次のような項目を含め、
+パッチを完全に記述するべきです-
+
+ - なぜ変更が必要か
+ - パッチ全体の設計アプローチ
+ - 実装の詳細
+ - テスト結果
+
+これについて全てがどのようにあるべきかについての詳細は、以下のドキュメ
+ントの ChangeLog セクションを見てください-
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+
+これらのどれもが、時にはとても困難です。これらの慣例を完璧に実施するに
+は数年かかるかもしれません。これは継続的な改善のプロセスであり、そのた
+めには多数の忍耐と決意を必要とするものです。でも、諦めないで、これは可
+能なことです。多数の人がすでにできていますし、彼らも皆最初はあなたと同
+じところからスタートしたのですから。
+
+Paolo Ciarrocchi に感謝、彼は彼の書いた "Development Process"
+(http://lwn.net/Articles/94386/) セクションをこのテキストの原型にする
+ことを許可してくれました。Rundy Dunlap と Gerrit Huizenga はメーリング
+リストでやるべきこととやってはいけないことのリストを提供してくれました。
+以下の人々のレビュー、コメント、貢献に感謝。
+Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
+Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi
+Kleen, Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
+David A. Wheeler, Junio Hamano, Michael Kerrisk, と Alex Shepard
+彼らの支援なしでは、このドキュメントはできなかったでしょう。
+
+Maintainer: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/ja_JP/SubmitChecklist b/Documentation/ja_JP/SubmitChecklist
new file mode 100644
index 000000000..cb5507b1a
--- /dev/null
+++ b/Documentation/ja_JP/SubmitChecklist
@@ -0,0 +1,111 @@
+NOTE:
+This is a version of Documentation/SubmitChecklist into Japanese.
+This document is maintained by Takenori Nagano <t-nagano@ah.jp.nec.com>
+and the JF Project team <http://www.linux.or.jp/JF/>.
+If you find any difference between this document and the original file
+or a problem with the translation,
+please contact the maintainer of this file or JF project.
+
+Please also note that the purpose of this file is to be easier to read
+for non English (read: Japanese) speakers and is not intended as a
+fork. So if you have any comments or updates of this file, please try
+to update the original English file first.
+
+Last Updated: 2008/07/14
+==================================
+これは、
+linux-2.6.26/Documentation/SubmitChecklist の和訳です。
+
+翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
+翻訳日: 2008/07/14
+翻訳者: Takenori Nagano <t-nagano at ah dot jp dot nec dot com>
+校正者: Masanori Kobayashi さん <zap03216 at nifty dot ne dot jp>
+==================================
+
+
+Linux カーネルパッチ投稿者向けチェックリスト
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+本書では、パッチをより素早く取り込んでもらいたい開発者が実践すべき基本的な事柄
+をいくつか紹介します。ここにある全ての事柄は、Documentation/SubmittingPatches
+などのLinuxカーネルパッチ投稿に際しての心得を補足するものです。
+
+ 1: 妥当なCONFIGオプションや変更されたCONFIGオプション、つまり =y, =m, =n
+ 全てで正しくビルドできることを確認してください。その際、gcc及びリンカが
+ warningやerrorを出していないことも確認してください。
+
+ 2: allnoconfig, allmodconfig オプションを用いて正しくビルドできることを
+ 確認してください。
+
+ 3: 手許のクロスコンパイルツールやOSDLのPLMのようなものを用いて、複数の
+ アーキテクチャにおいても正しくビルドできることを確認してください。
+
+ 4: 64bit長の'unsigned long'を使用しているppc64は、クロスコンパイルでの
+ チェックに適当なアーキテクチャです。
+
+ 5: カーネルコーディングスタイルに準拠しているかどうか確認してください(!)
+
+ 6: CONFIGオプションの追加・変更をした場合には、CONFIGメニューが壊れていない
+ ことを確認してください。
+
+ 7: 新しくKconfigのオプションを追加する際には、必ずそのhelpも記述してください。
+
+ 8: 適切なKconfigの依存関係を考えながら慎重にチェックしてください。
+ ただし、この作業はマシンを使ったテストできちんと行うのがとても困難です。
+ うまくやるには、自分の頭で考えることです。
+
+ 9: sparseを利用してちゃんとしたコードチェックをしてください。
+
+10: 'make checkstack' と 'make namespacecheck' を利用し、問題が発見されたら
+ 修正してください。'make checkstack' は明示的に問題を示しませんが、どれか
+ 1つの関数が512バイトより大きいスタックを使っていれば、修正すべき候補と
+ なります。
+
+11: グローバルなkernel API を説明する kernel-doc をソースの中に含めてください。
+ ( staticな関数においては必須ではありませんが、含めてもらっても結構です )
+ そして、'make htmldocs' もしくは 'make mandocs' を利用して追記した
+ ドキュメントのチェックを行い、問題が見つかった場合には修正を行ってください。
+
+12: CONFIG_PREEMPT, CONFIG_DEBUG_PREEMPT, CONFIG_DEBUG_SLAB,
+ CONFIG_DEBUG_PAGEALLOC, CONFIG_DEBUG_MUTEXES, CONFIG_DEBUG_SPINLOCK,
+ CONFIG_DEBUG_ATOMIC_SLEEP これら全てを同時に有効にして動作確認を
+ 行ってください。
+
+13: CONFIG_SMP, CONFIG_PREEMPT を有効にした場合と無効にした場合の両方で
+ ビルドした上、動作確認を行ってください。
+
+14: もしパッチがディスクのI/O性能などに影響を与えるようであれば、
+ 'CONFIG_LBDAF'オプションを有効にした場合と無効にした場合の両方で
+ テストを実施してみてください。
+
+15: lockdepの機能を全て有効にした上で、全てのコードパスを評価してください。
+
+16: /proc に新しいエントリを追加した場合には、Documentation/ 配下に
+ 必ずドキュメントを追加してください。
+
+17: 新しいブートパラメータを追加した場合には、
+ 必ずDocumentation/kernel-parameters.txt に説明を追加してください。
+
+18: 新しくmoduleにパラメータを追加した場合には、MODULE_PARM_DESC()を
+ 利用して必ずその説明を記述してください。
+
+19: 新しいuserspaceインタフェースを作成した場合には、Documentation/ABI/ に
+ Documentation/ABI/README を参考にして必ずドキュメントを追加してください。
+
+20: 'make headers_check'を実行して全く問題がないことを確認してください。
+
+21: 少なくともslabアロケーションとpageアロケーションに失敗した場合の
+ 挙動について、fault-injectionを利用して確認してください。
+ Documentation/fault-injection/ を参照してください。
+
+ 追加したコードがかなりの量であったならば、サブシステム特有の
+ fault-injectionを追加したほうが良いかもしれません。
+
+22: 新たに追加したコードは、`gcc -W'でコンパイルしてください。
+ このオプションは大量の不要なメッセージを出力しますが、
+ "warning: comparison between signed and unsigned" のようなメッセージは、
+ バグを見つけるのに役に立ちます。
+
+23: 投稿したパッチが -mm パッチセットにマージされた後、全ての既存のパッチや
+ VM, VFS およびその他のサブシステムに関する様々な変更と、現時点でも共存
+ できることを確認するテストを行ってください。
diff --git a/Documentation/ja_JP/SubmittingPatches b/Documentation/ja_JP/SubmittingPatches
new file mode 100644
index 000000000..5d6ae639b
--- /dev/null
+++ b/Documentation/ja_JP/SubmittingPatches
@@ -0,0 +1,719 @@
+NOTE:
+This is a version of Documentation/SubmittingPatches into Japanese.
+This document is maintained by Keiichi KII <k-keiichi@bx.jp.nec.com>
+and the JF Project team <http://www.linux.or.jp/JF/>.
+If you find any difference between this document and the original file
+or a problem with the translation,
+please contact the maintainer of this file or JF project.
+
+Please also note that the purpose of this file is to be easier to read
+for non English (read: Japanese) speakers and is not intended as a
+fork. So if you have any comments or updates of this file, please try
+to update the original English file first.
+
+Last Updated: 2011/06/09
+
+==================================
+これは、
+linux-2.6.39/Documentation/SubmittingPatches の和訳
+です。
+翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
+翻訳日: 2011/06/09
+翻訳者: Keiichi Kii <k-keiichi at bx dot jp dot nec dot com>
+校正者: Masanari Kobayashi さん <zap03216 at nifty dot ne dot jp>
+ Matsukura さん <nbh--mats at nifty dot com>
+ Takeshi Hamasaki さん <hmatrjp at users dot sourceforge dot jp>
+==================================
+
+ Linux カーネルに変更を加えるための Howto
+ 又は
+ かの Linus Torvalds の取り扱い説明書
+
+Linux カーネルに変更を加えたいと思っている個人又は会社にとって、パッ
+チの投稿に関連した仕組みに慣れていなければ、その過程は時々みなさんを
+おじけづかせることもあります。この文章はあなたの変更を大いに受け入れ
+てもらえやすくする提案を集めたものです。
+
+コードを投稿する前に、Documentation/SubmitChecklist の項目リストに目
+を通してチェックしてください。もしあなたがドライバーを投稿しようとし
+ているなら、Documentation/SubmittingDrivers にも目を通してください。
+
+--------------------------------------------
+セクション1 パッチの作り方と送り方
+--------------------------------------------
+
+1) 「 diff -up 」
+------------
+
+パッチの作成には「 diff -up 」又は「 diff -uprN 」を使ってください。
+
+Linux カーネルに対する全ての変更は diff(1) コマンドによるパッチの形式で
+生成してください。パッチを作成するときには、diff(1) コマンドに「 -u 」引
+数を指定して、unified 形式のパッチを作成することを確認してください。また、
+変更がどの C 関数で行われたのかを表示する「 -p 」引数を使ってください。
+この引数は生成した差分をずっと読みやすくしてくれます。パッチは Linux
+カーネルソースの中のサブディレクトリではなく Linux カーネルソースのルート
+ディレクトリを基準にしないといけません。
+
+1個のファイルについてのパッチを作成するためには、ほとんどの場合、
+以下の作業を行えば十分です。
+
+ SRCTREE= linux-2.6
+ MYFILE= drivers/net/mydriver.c
+
+ cd $SRCTREE
+ cp $MYFILE $MYFILE.orig
+ vi $MYFILE # make your change
+ cd ..
+ diff -up $SRCTREE/$MYFILE{.orig,} > /tmp/patch
+
+複数のファイルについてのパッチを作成するためには、素の( vanilla )、す
+なわち変更を加えてない Linux カーネルを展開し、自分の Linux カーネル
+ソースとの差分を生成しないといけません。例えば、
+
+ MYSRC= /devel/linux-2.6
+
+ tar xvfz linux-2.6.12.tar.gz
+ mv linux-2.6.12 linux-2.6.12-vanilla
+ diff -uprN -X linux-2.6.12-vanilla/Documentation/dontdiff \
+ linux-2.6.12-vanilla $MYSRC > /tmp/patch
+
+dontdiff ファイルには Linux カーネルのビルドプロセスの過程で生成された
+ファイルの一覧がのっています。そして、それらはパッチを生成する diff(1)
+コマンドで無視されるべきです。dontdiff ファイルは 2.6.12 以後のバージョ
+ンの Linux カーネルソースツリーに含まれています。それより前のバージョン
+の Linux カーネルソースツリーに対する dontdiff ファイルは、
+<http://www.xenotime.net/linux/doc/dontdiff>から取得することができます。
+
+投稿するパッチの中に関係のない余分なファイルが含まれていないことを確
+認してください。diff(1) コマンドで生成したパッチがあなたの意図したとお
+りのものであることを確認してください。
+
+もしあなたのパッチが多くの差分を生み出すのであれば、あなたはパッチ
+を意味のあるひとまとまりごとに分けたいと思うかもしれません。
+これは他のカーネル開発者にとってレビューしやすくなるので、あなたの
+パッチを受け入れてもらうためにはとても重要なことです。これを補助でき
+る多くのスクリプトがあります。
+
+Quilt:
+http://savannah.nongnu.org/projects/quilt
+
+2) パッチに対する説明
+
+パッチの中の変更点に対する技術的な詳細について説明してください。
+
+説明はできる限り具体的に。もっとも悪い説明は「ドライバー X を更新」、
+「ドライバー X に対するバグフィックス」あるいは「このパッチはサブシス
+テム X に対する更新を含んでいます。どうか取り入れてください。」などです。
+
+パッチの説明を Linux カーネルのソースコードマネジメントシステム「 git 」の
+コミットログとして簡単に引用できる形で書けば、メンテナから感謝されるでしょう。
+以下の #15 を見てください。
+
+説明が長くなりだしたのであれば、おそらくそれはパッチを分ける必要がある
+という兆候です。次の #3 を見てください。
+
+パッチ(シリーズ)を(再)投稿する時、十分なパッチの説明とそのパッチが必要な理由を
+パッチに含めてください。ただ「これはパッチ(シリーズ)のバージョン N」とだけ
+書かないでください。そして、パッチをマージする人にパッチの説明を探させそれを
+パッチに追記させるため、過去のバージョンのパッチやそのパッチの URL を参照する
+手間をかけさせないでください。
+つまり、パッチシリーズとその説明は一緒にあるべきです。これはパッチをマージする
+人、レビューする人、どちらのためにもなります。レビューする人の中には、おそらく
+過去のバージョンのパッチを受け取ってもいない人がいます。
+
+登録済みのバグエントリを修正するパッチであれば、そのバグエントリを示すバグ ID
+や URL を明記してください。
+
+3) パッチの分割
+
+意味のあるひとまとまりごとに変更を個々のパッチファイルに分けてください。
+
+例えば、もし1つのドライバーに対するバグフィックスとパフォーマンス強
+化の両方の変更を含んでいるのであれば、その変更を2つ以上のパッチに分
+けてください。もし変更箇所に API の更新と、その新しい API を使う新たな
+ドライバーが含まれているなら、2つのパッチに分けてください。
+
+一方で、もしあなたが多数のファイルに対して意味的に同じ1つの変更を加え
+るのであれば、その変更を1つのパッチにまとめてください。言いかえると、
+意味的に同じ1つの変更は1つのパッチの中に含まれます。
+
+あるパッチが変更を完結させるために他のパッチに依存していたとしても、
+それは問題ありません。パッチの説明の中で「このパッチはパッチ X に依存
+している」と簡単に注意書きをつけてください。
+
+もしパッチをより小さなパッチの集合に凝縮することができないなら、まずは
+15かそこらのパッチを送り、そのレビューと統合を待って下さい。
+
+4) パッチのスタイルチェック
+
+あなたのパッチが基本的な( Linux カーネルの)コーディングスタイルに違反し
+ていないかをチェックして下さい。その詳細を Documentation/CodingStyle で
+見つけることができます。コーディングスタイルの違反はレビューする人の
+時間を無駄にするだけなので、恐らくあなたのパッチは読まれることすらなく
+拒否されるでしょう。
+
+あなたはパッチを投稿する前に最低限パッチスタイルチェッカー
+( scripts/checkpatch.pl )を利用してパッチをチェックすべきです。
+もしパッチに違反がのこっているならば、それらの全てについてあなたは正当な
+理由を示せるようにしておく必要があります。
+
+5) 電子メールの宛先の選び方
+
+MAINTAINERS ファイルとソースコードに目を通してください。そして、その変
+更がメンテナのいる特定のサブシステムに加えられるものであることが分か
+れば、その人に電子メールを送ってください。
+
+もし、メンテナが載っていなかったり、メンテナからの応答がないなら、
+LKML ( linux-kernel@vger.kernel.org )へパッチを送ってください。ほとんど
+のカーネル開発者はこのメーリングリストに目を通しており、変更に対して
+コメントを得ることができます。
+
+15個より多くのパッチを同時に vger.kernel.org のメーリングリストへ送らな
+いでください!!!
+
+Linus Torvalds は Linux カーネルに入る全ての変更に対する最終的な意思決定者
+です。電子メールアドレスは torvalds@linux-foundation.org になります。彼は
+多くの電子メールを受け取っているため、できる限り彼に電子メールを送るのは
+避けるべきです。
+
+バグフィックスであったり、自明な変更であったり、話し合いをほとんど
+必要としないパッチは Linus へ電子メールを送るか CC しなければなりません。
+話し合いを必要としたり、明確なアドバンテージがないパッチは、通常まず
+は LKML へ送られるべきです。パッチが議論された後にだけ、そのパッチを
+Linus へ送るべきです。
+
+6) CC (カーボンコピー)先の選び方
+
+特に理由がないなら、LKML にも CC してください。
+
+Linus 以外のカーネル開発者は変更に気づく必要があり、その結果、彼らはそ
+の変更に対してコメントをくれたり、コードに対してレビューや提案をくれ
+るかもしれません。LKML とは Linux カーネル開発者にとって一番中心的なメー
+リングリストです。USB やフレームバッファデバイスや VFS や SCSI サブシステ
+ムなどの特定のサブシステムに関するメーリングリストもあります。あなた
+の変更に、はっきりと関連のあるメーリングリストについて知りたければ
+MAINTAINERS ファイルを参照してください。
+
+VGER.KERNEL.ORG でホスティングされているメーリングリストの一覧が下記の
+サイトに載っています。
+<http://vger.kernel.org/vger-lists.html>
+
+もし、変更がユーザランドのカーネルインタフェースに影響を与え
+るのであれば、MAN-PAGES のメンテナ( MAINTAINERS ファイルに一覧
+があります)に man ページのパッチを送ってください。少なくとも
+情報がマニュアルページの中に入ってくるように、変更が起きたという
+通知を送ってください。
+
+たとえ、メンテナが #5 で反応がなかったとしても、メンテナのコードに変更を
+加えたときには、いつもメンテナに CC するのを忘れないようにしてください。
+
+小さなパッチであれば、Trivial Patch Monkey(ちょっとしたパッチを集めている)
+<trivial@kernel.org>に CC してもいいです。その現管理者については MAINTAINERS
+ファイルを見てください。ちょっとしたパッチとは以下のルールのどれか1つを満たして
+いなければなりません。
+ ・ドキュメントのスペルミスの修正
+ ・grep(1) コマンドによる検索を困難にしているスペルの修正
+ ・コンパイル時の警告の修正(無駄な警告が散乱することは好ましくないた
+ めです)
+ ・コンパイル問題の修正(それらの修正が本当に正しい場合に限る)
+ ・実行時の問題の修正(それらの修正が本当に問題を修正している場合に限る)
+ ・廃止予定の関数やマクロを使用しているコードの除去(例 check_region )
+ ・問い合わせ先やドキュメントの修正
+ ・移植性のないコードから移植性のあるコードへの置き換え(小さい範囲で
+ あればアーキテクチャ特有のことでも他の人がコピーできます)
+ ・作者やメンテナによる修正(すなわち patch monkey の再転送モード)
+
+7) MIME やリンクや圧縮ファイルや添付ファイルではなくプレインテキストのみ
+
+Linus や他のカーネル開発者はあなたが投稿した変更を読んで、コメントでき
+る必要があります。カーネル開発者にとって、あなたが書いたコードの特定の
+部分にコメントをするために、標準的な電子メールクライアントで変更が引用
+できることは重要です。
+
+上記の理由で、すべてのパッチは文中に含める形式の電子メールで投稿さ
+れるべきです。警告:あなたがパッチをコピー&ペーストする際には、パッ
+チを改悪するエディターの折り返し機能に注意してください。
+
+パッチを圧縮の有無に関わらず MIME 形式で添付しないでください。多くのポ
+ピュラーな電子メールクライアントは MIME 形式の添付ファイルをプレーンテ
+キストとして送信するとは限らないでしょう。そうなると、電子メールクラ
+イアントがコードに対するコメントを付けることをできなくします。また、
+MIME 形式の添付ファイルは Linus に手間を取らせることになり、その変更を
+受け入れてもらう可能性が低くなってしまいます。
+
+例外:お使いの電子メールクライアントがパッチをめちゃくちゃにするので
+あれば、誰かが MIME 形式のパッチを再送するよう求めるかもしれません。
+
+余計な変更を加えずにあなたのパッチを送信するための電子メールクライアントの設定
+のヒントについては Documentation/email-clients.txt を参照してください。
+
+8) 電子メールのサイズ
+
+パッチを Linus へ送るときは常に #7 の手順に従ってください。
+
+大きなパッチはメーリングリストやメンテナにとって不親切です。パッチが
+未圧縮で 300KB を超えるようであるなら、インターネット上のアクセス可能な
+サーバに保存し、保存場所を示す URL を伝えるほうが適切です。
+
+9) カーネルバージョンの明記
+
+パッチが対象とするカーネルのバージョンをパッチの概要か電子メールの
+サブジェクトに付けることが重要です。
+
+パッチが最新バージョンのカーネルに正しく適用できなければ、Linus は
+そのパッチを採用しないでしょう。
+
+10) がっかりせず再投稿
+
+パッチを投稿した後は、辛抱強く待っていてください。Linus があなたのパッ
+チを気に入って採用すれば、Linus がリリースする次のバージョンのカーネル
+の中で姿を見せるでしょう。
+
+しかし、パッチが次のバージョンのカーネルに入っていないなら、いくつもの
+理由があるのでしょう。その原因を絞り込み、間違っているものを正し、更新
+したパッチを投稿するのはあなたの仕事です。
+
+Linus があなたのパッチに対して何のコメントもなく不採用にすることは極め
+て普通のことです。それは自然な姿です。もし、Linus があなたのパッチを受
+け取っていないのであれば、以下の理由が考えられます。
+* パッチが最新バージョンの Linux カーネルにきちんと適用できなかった
+* パッチが LKML で十分に議論されていなかった
+* スタイルの問題(セクション2を参照)
+* 電子メールフォーマットの問題(このセクションを参照)
+* パッチに対する技術的な問題
+* Linus はたくさんの電子メールを受け取っているので、どさくさに紛れて見
+ 失った
+* 不愉快にさせている
+
+判断できない場合は、LKML にコメントを頼んでください。
+
+11) サブジェクトに「 PATCH 」
+
+Linus や LKML への大量の電子メールのために、サブジェクトのプレフィックスに
+「 [PATCH] 」を付けることが慣習となっています。これによって Linus や他の
+カーネル開発者がパッチであるのか、又は、他の議論に関する電子メールであるの
+かをより簡単に識別できます。
+
+12) パッチへの署名
+
+誰が何をしたのかを追いかけやすくするために (特に、パッチが何人かの
+メンテナを経て最終的に Linux カーネルに取り込まれる場合のために)、電子
+メールでやり取りされるパッチに対して「 sign-off 」という手続きを導入し
+ました。
+
+「 sign-off 」とは、パッチがあなたの書いたものであるか、あるいは、
+あなたがそのパッチをオープンソースとして提供する権利を保持している、
+という証明をパッチの説明の末尾に一行記載するというものです。
+ルールはとても単純です。以下の項目を確認して下さい。
+
+ 原作者の証明書( DCO ) 1.1
+
+ このプロジェクトに寄与するものとして、以下のことを証明する。
+
+ (a) 本寄与は私が全体又は一部作成したものであり、私がそのファイ
+ ル中に明示されたオープンソースライセンスの下で公開する権利
+ を持っている。もしくは、
+
+ (b) 本寄与は、私が知る限り、適切なオープンソースライセンスでカバ
+ ーされている既存の作品を元にしている。同時に、私はそのライセ
+ ンスの下で、私が全体又は一部作成した修正物を、ファイル中で示
+ される同一のオープンソースライセンスで(異なるライセンスの下で
+ 投稿することが許可されている場合を除いて)投稿する権利を持って
+ いる。もしくは、
+
+ (c) 本寄与は(a)、(b)、(c)を証明する第3者から私へ直接提供された
+ ものであり、私はそれに変更を加えていない。
+
+ (d) 私はこのプロジェクトと本寄与が公のものであることに理解及び同意す
+ る。同時に、関与した記録(投稿の際の全ての個人情報と sign-off を
+ 含む)が無期限に保全されることと、当該プロジェクト又は関連する
+ オープンソースライセンスに沿った形で再配布されることに理解及び
+ 同意する。
+
+もしこれに同意できるなら、以下のような1行を追加してください。
+
+ Signed-off-by: Random J Developer <random@developer.example.org>
+
+実名を使ってください。(残念ですが、偽名や匿名による寄与はできません。)
+
+人によっては sign-off の近くに追加のタグを付加しています。それらは今のところ
+無視されますが、あなたはそのタグを社内の手続きに利用したり、sign-off に特別
+な情報を示したりすることができます。
+
+あなたがサブシステムまたはブランチのメンテナであれば、受け取ったパッチを自身の
+ツリーにマージするために、わずかに変更が必要となる場合があります。なぜなら
+あなたのツリーの中のコードと投稿者のツリーの中のコードは同一ではないためです。
+もし、あなたが厳密に上記ルール(c)にこだわるのであれば、投稿者に再度差分を
+とるよう依頼すべきです。しかし、これは時間とエネルギーを非生産的に浪費する
+ことになります。ルール(b)はあなたにコードを修正する権利を与えてくれます。
+しかし、投稿者のコードを修正し、その修正によるバグを投稿者に押し付けてしまう
+ことはとても失礼なことです。この問題を解決するために、末尾の投稿者の
+Signed-off-by とあなたがその末尾に追加する Signed-off-by の間に、修正を
+加えたことを示す1行を追加することが推奨されています。
+(その1行の書き方に)決まりはありませんが、大括弧の中に電子メールアドレスや氏名
+と修正内容を記載するやり方は目につきやすく、最終段階での変更の責任があなたに
+あることを明確にするのに十分な方法のようです。例えば、
+
+ Signed-off-by: Random J Developer <random@developer.example.org>
+ [lucky@maintainer.example.org: struct foo moved from foo.c to foo.h]
+ Signed-off-by: Lucky K Maintainer <lucky@maintainer.example.org>
+
+あなたが安定版のブランチを管理しており、作成者のクレジット、変更の追跡、
+修正のマージ、と同時に苦情からの投稿者の保護を行いたい場合、この慣習は特に
+有用となります。いかなる事情があってもチェンジログに出てくる作成者の
+アイデンティティ情報(From ヘッダ)は変更できないことに注意してください。
+
+バックポートする人のための特別な注意事項。追跡を容易に行うために、コミット
+メッセージのトップ(サブジェクト行のすぐ後)にパッチの起源を示す情報を記述する
+ことは一般的で有用な慣習です。例えば、これは 2.6-stable ツリーでの一例です。
+
+ Date: Tue May 13 19:10:30 2008 +0000
+
+ SCSI: libiscsi regression in 2.6.25: fix nop timer handling
+
+ commit 4cf1043593db6a337f10e006c23c69e5fc93e722 upstream
+
+そして、これは 2.4 ツリーでの一例です。
+
+ Date: Tue May 13 22:12:27 2008 +0200
+
+ wireless, airo: waitbusy() won't delay
+
+ [backport of 2.6 commit b7acbdfbd1f277c1eb23f344f899cfa4cd0bf36a]
+
+どんな形式であれ、この情報はあなたのツリーを追跡する人やあなたのツリーのバグを
+解決しようとしている人にとって価値のある支援となります。
+
+13) いつ Acked-by: と Cc: を使うのか
+
+「 Signed-off-by: 」タグはその署名者がパッチの開発に関わっていたことやパッチ
+の伝播パスにいたことを示しています。
+
+ある人が直接パッチの準備や作成に関わっていないけれど、その人のパッチに対す
+る承認を記録し、示したいとします。その場合、その人を示すのに Acked-by: が使
+えます。Acked-by: はパッチのチェンジログにも追加されます。
+
+パッチの影響を受けるコードのメンテナがパッチに関わっていなかったり、パッチ
+の伝播パスにいなかった時にも、メンテナは Acked-by: をしばしば利用します。
+
+Acked-by: は Signed-off-by: のように公式なタグではありません。それはメンテナが
+少なくともパッチをレビューし、同意を示しているという記録です。そのような
+ことからパッチをマージする人がメンテナの「うん、良いと思うよ」という発言を
+Acked-by: へ置き換えることがあります。
+
+Acked-by: が必ずしもパッチ全体の承認を示しているわけではありません。例えば、
+あるパッチが複数のサブシステムへ影響を与えており、その中の1つのサブシステム
+のメンテナからの Acked-by: を持っているとします。その場合、Acked-by: は通常
+そのメンテナのコードに影響を与える一部分だけに対する承認を示しています。
+この点は、ご自分で判断してください。(その Acked-by: が)疑わしい場合は、
+メーリングリストアーカイブの中の大元の議論を参照すべきです。
+
+パッチにコメントする機会を持っていたが、その時にコメントしなかった人がいれば、
+その人を指す「Cc:」タグを任意で追加してもかまいません。これは指定された人からの
+明確なアクションなしに付与できる唯一のタグです。
+このタグはパッチに関心があると思われる人達がそのパッチの議論に含まれていたこと
+を明文化します。
+
+14) Reported-by と Tested-by: と Reviewed-by: の利用
+
+他の誰かによって報告された問題を修正するパッチであれば、問題報告者という寄与を
+クレジットするために、Reported-by: タグを追加することを検討してください。
+こまめにバグ報告者をクレジットしていくことで、うまくいけばその人たちが将来再び
+コミュニティの力となってくれるでしょう。
+ただし、報告者の許可無くこのタグを追加しないように注意してください。特に、
+問題が公の場で報告されていなかったのであれば。
+
+Tested-by: タグはタグで指定された人によって(ある環境下で)パッチのテストに成功
+していることを示します。このタグはメンテナにテストが実施済みであることを
+知らせ、将来の関連パッチのテスト協力者を見つける方法を提供し、テスト実施者に
+対するクレジットを保証します。
+
+Reviewed-by: タグは、それとは異なり、下記のレビューア宣言の下にレビューされ、
+受け入れ可能とみなされたパッチであることを示します。
+
+ レビューアによる監督宣言
+
+ 私は Reviewed-by: タグを提示することによって、以下のことを明言する。
+
+ (a) 私はメインラインカーネルへの統合に向け、その妥当性及び「即応性
+ (訳注)」を検証し、技術的側面からパッチをレビュー済みである。
+
+ 訳注:
+ 「即応性」の原文は "readiness"。
+ パッチが十分な品質を持っており、メインラインカーネルへの統合を即座に
+ 行うことができる状態であるかどうかを "readiness" という単語で表現
+ している。
+
+ (b) パッチに関するあらゆる問題、懸念、あるいは、疑問は投稿者へ伝達済み
+ である。私はそれらのコメントに対する投稿者の返答に満足している。
+
+ (c) 投稿に伴い改良されるコードがある一方で、現時点で、私は(1)それが
+ カーネルにとって価値のある変更であること、そして、(2)統合に際して
+ 議論になり得るような問題はないものと確信している。
+
+ (d) 私はパッチをレビューし適切であると確信している一方で、あらゆる
+ 状況においてその宣言した目的や機能が正しく実現することに関して、
+ いかなる保証もしない(特にどこかで明示しない限り)。
+
+Reviewd-by タグはそのパッチがカーネルに対して適切な修正であって、深刻な技術的
+問題を残していないという意見の宣言です。興味のあるレビューアは誰でも(レビュー
+作業を終えたら)パッチに対して Reviewed-by タグを提示できます。このタグは
+レビューアの寄与をクレジットする働き、レビューの進捗の度合いをメンテナに
+知らせる働きを持ちます。そのパッチの領域に詳しく、そして、しっかりとした
+レビューを実施したレビューアによって提供される時、Reviewed-by: タグがあなたの
+パッチをカーネルにマージする可能性を高めるでしょう。
+
+15) 標準的なパッチのフォーマット
+
+標準的なパッチのサブジェクトは以下のとおりです。
+
+ Subject: [PATCH 001/123] subsystem: summary phrase
+
+標準的なパッチの、電子メールのボディは以下の項目を含んでいます。
+
+ - パッチの作成者を明記する「 from 」行
+
+ - 空行
+
+ - 説明本体。これはこのパッチを説明するために無期限のチェンジログ
+ (変更履歴)にコピーされます。
+
+ - 上述した「 Signed-off-by: 」行。これも説明本体と同じくチェン
+ ジログ内にコピーされます。
+
+ - マーカー行は単純に「 --- 」です。
+
+ - 余計なコメントは、チェンジログには不適切です。
+
+ - 実際のパッチ(差分出力)
+
+サブジェクト行のフォーマットは、アルファベット順で電子メールをとても
+ソートしやすいものになっています。(ほとんどの電子メールクライアント
+はソートをサポートしています)パッチのサブジェクトの連番は0詰めであ
+るため、数字でのソートとアルファベットでのソートは同じ結果になります。
+
+電子メールのサブジェクト内のサブシステム表記は、パッチが適用される
+分野またはサブシステムを識別できるようにすべきです。
+
+電子メールのサブジェクトの「summary phrase」はそのパッチの概要を正確
+に表現しなければなりません。「summary phrase」をファイル名にしてはい
+けません。パッチシリーズ中でそれぞれのパッチは同じ「summary phrase」を
+使ってはいけません(「パッチシリーズ」とは順序付けられた関連のある複数の
+パッチ群です)。
+
+あなたの電子メールの「summary phrase」がそのパッチにとって世界で唯一の識別子に
+なるように心がけてください。「summary phrase」は git のチェンジログの中へ
+ずっと伝播していきます。「summary phrase」は、開発者が後でパッチを参照する
+ために議論の中で利用するかもしれません。
+人々はそのパッチに関連した議論を読むために「summary phrase」を使って google で
+検索したがるでしょう。それはまた2、3ヶ月あとで、人々が「gitk」や
+「git log --oneline」のようなツールを使用して何千ものパッチに目を通す時、
+唯一目にとまる情報となるでしょう。
+
+これらの理由のため、「summary phrase」はなぜパッチが必要であるか、パッチが何を
+変更するかの2つの情報をせいぜい70〜75文字で表現していなければなりません。
+「summary phrase」は簡潔であり説明的である表現を目指しつつ、うまく
+まとめられている概要となるべきです。
+
+「summary phrase」は「Subject: [PATCH tag] <summary phrase>」のように、
+大括弧で閉じられたタグを接頭辞として付加してもかまいません。このタグは
+「summary phrase」の一部とは考えませんが、パッチをどのように取り扱うべきかを
+表現します。
+一般的には「v1, v2, v3」のようなバージョン情報を表すタグ(過去のパッチに対する
+コメントを反映するために複数のバージョンのパッチが投稿されているのであれば)、
+「RFC」のようなコメントを要求するタグが挙げられます。パッチシリーズとして4つの
+パッチがあれば、個々のパッチに「1/4, 2/4, 3/4, 4/4」のように番号を付けても
+かまいません。これは開発者がパッチを適用する順番を確実に把握するためです。
+そして、開発者がパッチシリーズの中のすべてのパッチをもらさずレビュー或いは
+適用するのを保証するためです。
+
+サブジェクトの例を二つ
+
+ Subject: [patch 2/5] ext2: improve scalability of bitmap searching
+ Subject: [PATCHv2 001/207] x86: fix eflags tracking
+
+「 from 」行は電子メールのボディの一番最初の行でなければなりません。
+その形式は以下のとおりです。
+
+ From: Original Author <author@example.com>
+
+「 from 」行はチェンジログの中で、そのパッチの作成者としてクレジットされ
+ている人を特定するものです。「 from 」行がかけていると、電子メールのヘッ
+ダーの「 From: 」が、チェンジログの中でパッチの作成者を決定するために使わ
+れるでしょう。
+
+説明本体は無期限のソースのチェンジログにコミットされます。なので、説明
+本体はそのパッチに至った議論の詳細を忘れているある程度の技量を持っている人
+がその詳細を思い出すことができるものでなければなりません。パッチが対処する
+障害の症状(カーネルログメッセージや oops メッセージ等)を記載することは問題に
+対処可能なパッチを求めてコミットログを検索する人々にとって特に有用です。
+パッチがコンパイル問題を解決するのであれば、そのパッチを探している人が見つける
+ことができる情報だけで十分であり、コンパイル時の全てのエラーを含める必要は
+ありません。「summary phrase」と同様に、簡潔であり説明的であることが重要です。
+
+「 --- 」マーカー行はパッチ処理ツールに対して、チェンジログメッセージの終端
+部分を認識させるという重要な役目を果たします。
+
+「 --- 」マーカー行の後の追加コメントの良い使用方法の1つに diffstat コマンド
+があります。diffstat コマンドとは何のファイルが変更され、1ファイル当たり何行
+追加され何行消されたかを示すものです。diffstat コマンドは特に大きなパッチに
+おいて役立ちます。その時点でだけ又はメンテナにとってのみ関係のあるコメント
+は無期限に保存されるチェンジログにとって適切ではありません。そのため、この
+ようなコメントもマーカー行の後に書かれるべきです。
+このようなコメントの良い例として、v1 と v2 のバージョン間で何が変更されたかを
+表す「パッチの変更履歴」が挙げられます。
+
+「 --- 」マーカー行の後に diffstat コマンドの結果を含めるのであれば、ファイル
+名はカーネルソースツリーのトップディレクトリからの表記で列記されるため、横方向
+のスペースをとり過ぎないように、diffstat コマンドにオプション「 -p 1 -w 70 」
+を指定してください(インデントを含めてちょうど80列に合うでしょう)。
+
+適切なパッチのフォーマットの詳細についてはセクション3の参考文献を参照して
+ください。
+
+16) 「git pull」要求の送り方(Linus の電子メールから)
+
+間違ったブランチから引っ張るのを防ぐために、git リポジトリのアドレスと
+ブランチ名を同じ行に1行で記載してください。そうすることで、3回の連続クリック
+で全て選択できます。
+
+正しい形式は下記の通りです。
+
+ "Please pull from
+
+ git://jdelvare.pck.nerim.net/jdelvare-2.6 i2c-for-linus
+
+ to get these changes:"
+
+その結果、アドレスを自分自身でタイピングして間違えることはなくなります(実際に、
+何度か間違ったブランチから引っ張ってきてしまい、その時に diffstat の結果を
+検証して間違っていることに気づいたことがあります。どこから何を引っ張るべきかを
+「探したり」、正しいブランチ名かどうかを重ねてチェックしたりする必要が
+なくなればより快適になるでしょう)。
+
+diffstat の結果を生成するために「 git diff -M --stat --summary 」を使って
+ください。-M オプションはファイル名の変更を検知でき、--summary オプションは
+新規ファイル、削除されたファイル、名前が変更されたファイルの概要を生成します。
+
+-M オプション(ファイル名の変更検知)を指定すると、diffstat の結果はかなり
+異なってきます。git は大規模な変更(追加と削除のペア)をファイル名の変更と
+判断するためです。
+
+------------------------------------
+セクション2 - ヒントとTIPSと小技
+------------------------------------
+
+このセクションは Linux カーネルに変更を適用することに関係のある一般的な
+「お約束」の多くを載せています。物事には例外というものがあります。しか
+し例外を適用するには、本当に妥当な理由が不可欠です。あなたは恐らくこの
+セクションを Linus のコンピュータ・サイエンス101と呼ぶでしょう。
+
+1) Documentation/CodingStyleを参照
+
+言うまでもなく、あなたのコードがこのコーディングスタイルからあまりに
+も逸脱していると、レビューやコメントなしに受け取ってもらえないかもし
+れません。
+
+特筆すべき例外は、コードをあるファイルから別のファイルに移動
+するときです。この場合、コードを移動するパッチでは、移動されるコード
+に関して移動以外の変更を一切加えるべきではありません。これにより、
+コードの移動とあなたが行ったコードの修正を明確に区別できるようにな
+ります。これは実際に何が変更されたかをレビューする際の大きな助けに
+なるとともに、ツールにコードの履歴を追跡させることも容易になります。
+
+投稿するより前にパッチのスタイルチェッカー( scripts/checkpatch.pl )で
+あなたのパッチをチェックしてください。このスタイルチェッカーは最終結
+論としてではなく、指標としてみるべきです。もし、あなたのコードが違反
+はしているが修正するより良く見えるのであれば、おそらくそのままにする
+のがベストです。
+
+スタイルチェッカーによる3段階のレポート:
+ - エラー: 間違っている可能性が高い
+ - 警告:注意してレビューする必要がある
+ - チェック:考慮する必要がある
+
+あなたはパッチに残っている全ての違反について、それがなぜ必要なのか正当な
+理由を示せるようにしておく必要があります。
+
+2) #ifdefは見苦しい
+
+ifdef が散乱したコードは、読むのもメンテナンスするのも面倒です。コードの中
+で ifdef を使わないでください。代わりに、ヘッダファイルの中に ifdef を入れて、
+条件付きで、コードの中で使われる関数を「 static inline 」関数かマクロで定義し
+てください。後はコンパイラが、何もしない箇所を最適化して取り去ってくれるで
+しょう。
+
+まずいコードの簡単な例
+
+ dev = alloc_etherdev (sizeof(struct funky_private));
+ if (!dev)
+ return -ENODEV;
+ #ifdef CONFIG_NET_FUNKINESS
+ init_funky_net(dev);
+ #endif
+
+クリーンアップしたコードの例
+
+(in header)
+ #ifndef CONFIG_NET_FUNKINESS
+ static inline void init_funky_net (struct net_device *d) {}
+ #endif
+
+(in the code itself)
+ dev = alloc_etherdev (sizeof(struct funky_private));
+ if (!dev)
+ return -ENODEV;
+ init_funky_net(dev);
+
+3) マクロより「 static inline 」を推奨
+
+「 static inline 」関数はマクロよりもずっと推奨されています。それらは、
+型安全性があり、長さにも制限が無く、フォーマットの制限もありません。
+gcc においては、マクロと同じくらい軽いです。
+
+マクロは「 static inline 」が明らかに不適切であると分かる場所(高速化パスの
+いくつかの特定のケース)や「 static inline 」関数を使うことができないような
+場所(マクロの引数の文字列連結のような)にだけ使われるべきです。
+
+「 static inline 」は「 static __inline__ 」や「 extern inline 」や
+「 extern __inline__ 」よりも適切です。
+
+4) 設計に凝りすぎるな
+
+それが有用になるかどうか分からないような不明瞭な将来を見越した設計
+をしないでください。「できる限り簡単に、そして、それ以上簡単になら
+ないような設計をしてください。」
+
+----------------------
+セクション3 参考文献
+----------------------
+
+Andrew Morton, "The perfect patch" (tpp).
+ <http://www.ozlabs.org/~akpm/stuff/tpp.txt>
+
+Jeff Garzik, "Linux kernel patch submission format".
+ <http://linux.yyz.us/patch-format.html>
+
+Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
+ <http://www.kroah.com/log/2005/03/31/>
+ <http://www.kroah.com/log/2005/07/08/>
+ <http://www.kroah.com/log/2005/10/19/>
+ <http://www.kroah.com/log/2006/01/11/>
+
+NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
+ <https://lkml.org/lkml/2005/7/11/336>
+
+Kernel Documentation/CodingStyle:
+ <http://users.sosdg.org/~qiyong/lxr/source/Documentation/CodingStyle>
+
+Linus Torvalds's mail on the canonical patch format:
+ <http://lkml.org/lkml/2005/4/7/183>
+
+Andi Kleen, "On submitting kernel patches"
+ Some strategies to get difficult or controversial changes in.
+ http://halobates.de/on-submitting-patches.pdf
+
+--
+
+
diff --git a/Documentation/ja_JP/stable_api_nonsense.txt b/Documentation/ja_JP/stable_api_nonsense.txt
new file mode 100644
index 000000000..7653b5cbf
--- /dev/null
+++ b/Documentation/ja_JP/stable_api_nonsense.txt
@@ -0,0 +1,263 @@
+NOTE:
+This is a version of Documentation/stable_api_nonsense.txt into Japanese.
+This document is maintained by IKEDA, Munehiro <m-ikeda@ds.jp.nec.com>
+and the JF Project team <http://www.linux.or.jp/JF/>.
+If you find any difference between this document and the original file
+or a problem with the translation,
+please contact the maintainer of this file or JF project.
+
+Please also note that the purpose of this file is to be easier to read
+for non English (read: Japanese) speakers and is not intended as a
+fork. So if you have any comments or updates of this file, please try
+to update the original English file first.
+
+Last Updated: 2007/07/18
+==================================
+これは、
+linux-2.6.22-rc4/Documentation/stable_api_nonsense.txt の和訳
+です。
+翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
+翻訳日 : 2007/06/11
+原著作者: Greg Kroah-Hartman < greg at kroah dot com >
+翻訳者 : 池田 宗広 < m-ikeda at ds dot jp dot nec dot com >
+校正者 : Masanori Kobayashi さん < zap03216 at nifty dot ne dot jp >
+ Seiji Kaneko さん < skaneko at a2 dot mbn dot or dot jp >
+==================================
+
+
+
+Linux カーネルのドライバインターフェース
+(あなたの質問すべてに対する回答とその他諸々)
+
+Greg Kroah-Hartman <greg at kroah dot com>
+
+
+この文書は、なぜ Linux ではバイナリカーネルインターフェースが定義
+されていないのか、またはなぜ不変のカーネルインターフェースを持たな
+いのか、ということを説明するために書かれた。ここでの話題は「カーネ
+ル内部の」インターフェースについてであり、ユーザー空間とのインター
+フェースではないことを理解してほしい。カーネルとユーザー空間とのイ
+ンターフェースとはアプリケーションプログラムが使用するものであり、
+つまりシステムコールのインターフェースがこれに当たる。これは今まで
+長きに渡り、かつ今後も「まさしく」不変である。私は確か 0.9 か何か
+より前のカーネルを使ってビルドした古いプログラムを持っているが、そ
+れは最新の 2.6 カーネルでもきちんと動作する。ユーザー空間とのイン
+ターフェースは、ユーザーとアプリケーションプログラマが不変性を信頼
+してよいものの一つである。
+
+
+要旨
+----
+
+あなたは不変のカーネルインターフェースが必要だと考えているかもしれ
+ないが、実際のところはそうではない。あなたは必要としているものが分
+かっていない。あなたが必要としているものは安定して動作するドライバ
+であり、それはドライバがメインのカーネルツリーに含まれる場合のみ得
+ることができる。ドライバがメインのカーネルツリーに含まれていると、
+他にも多くの良いことがある。それは、Linux をより強固で、安定な、成
+熟したオペレーティングシステムにすることができるということだ。これ
+こそ、そもそもあなたが Linux を使う理由のはずだ。
+
+
+はじめに
+--------
+
+カーネル内部のインターフェース変更を心配しなければならないドライバ
+を書きたいなどというのは、変わり者だけだ。この世界のほとんどの人は、
+そのようなドライバがどんなインターフェースを使っているかなど知らな
+いし、そんなドライバのことなど全く気にもかけていない。
+
+
+まず初めに、クローズソースとか、ソースコードの隠蔽とか、バイナリの
+みが配布される使い物にならない代物[訳注(1)]とか、実体はバイナリ
+コードでそれを読み込むためのラッパー部分のみソースコードが公開され
+ているとか、その他用語は何であれ GPL の下にソースコードがリリース
+されていないカーネルドライバに関する法的な問題について、私は「いか
+なる議論も」行うつもりがない。法的な疑問があるのならば、プログラマ
+である私ではなく、弁護士に相談して欲しい。ここでは単に、技術的な問
+題について述べることにする。(法的な問題を軽視しているわけではない。
+それらは実際に存在するし、あなたはそれをいつも気にかけておく必要が
+ある)
+
+訳注(1)
+「使い物にならない代物」の原文は "blob"
+
+
+さてここでは、バイナリカーネルインターフェースについてと、ソースレ
+ベルでのインターフェースの不変性について、という二つの話題を取り上
+げる。この二つは互いに依存する関係にあるが、まずはバイナリインター
+フェースについて議論を行いやっつけてしまおう。
+
+
+バイナリカーネルインターフェース
+--------------------------------
+
+もしソースレベルでのインターフェースが不変ならば、バイナリインター
+フェースも当然のように不変である、というのは正しいだろうか?正しく
+ない。Linux カーネルに関する以下の事実を考えてみてほしい。
+ - あなたが使用するCコンパイラのバージョンによって、カーネル内部
+ の構造体の配置構造は異なったものになる。また、関数は異なった方
+ 法でカーネルに含まれることになるかもしれない(例えばインライン
+ 関数として扱われたり、扱われなかったりする)。個々の関数がどの
+ ようにコンパイルされるかはそれほど重要ではないが、構造体のパデ
+ ィングが異なるというのは非常に重要である。
+ - あなたがカーネルのビルドオプションをどのように設定するかによっ
+ て、カーネルには広い範囲で異なった事態が起こり得る。
+ - データ構造は異なるデータフィールドを持つかもしれない
+ - いくつかの関数は全く実装されていない状態になり得る
+ (例:SMP向けではないビルドでは、いくつかのロックは中身が
+ カラにコンパイルされる)
+ - カーネル内のメモリは、異なった方法で配置され得る。これはビ
+ ルドオプションに依存している。
+ - Linux は様々な異なるプロセッサアーキテクチャ上で動作する。
+ あるアーキテクチャ用のバイナリドライバを、他のアーキテクチャで
+ 正常に動作させる方法はない。
+
+
+ある特定のカーネル設定を使用し、カーネルをビルドしたのと正確に同じ
+Cコンパイラを使用して単にカーネルモジュールをコンパイルするだけで
+も、あなたはこれらいくつもの問題に直面することになる。ある特定の
+Linux ディストリビューションの、ある特定のリリースバージョン用にモ
+ジュールを提供しようと思っただけでも、これらの問題を引き起こすには
+十分である。にも関わらず Linux ディストリビューションの数と、サ
+ポートするディストリビューションのリリース数を掛け算し、それら一つ
+一つについてビルドを行ったとしたら、今度はリリースごとのビルドオプ
+ションの違いという悪夢にすぐさま悩まされることになる。また、ディス
+トリビューションの各リリースバージョンには、異なるハードウェア(プ
+ロセッサタイプや種々のオプション)に対応するため、何種類かのカーネ
+ルが含まれているということも理解して欲しい。従って、ある一つのリ
+リースバージョンだけのためにモジュールを作成する場合でも、あなたは
+何バージョンものモジュールを用意しなければならない。
+
+
+信じて欲しい。このような方法でサポートを続けようとするなら、あなた
+はいずれ正気を失うだろう。遠い昔、私はそれがいかに困難なことか、身
+をもって学んだのだ・・・
+
+
+不変のカーネルソースレベルインターフェース
+------------------------------------------
+
+メインカーネルツリーに含まれていない Linux カーネルドライバを継続
+してサポートしていこうとしている人たちとの議論においては、これは極
+めて「引火性の高い」話題である。[訳注(2)]
+
+訳注(2)
+「引火性の高い」の原文は "volatile"。
+volatile には「揮発性の」「爆発しやすい」という意味の他、「変わり
+やすい」「移り気な」という意味がある。
+「(この話題は)爆発的に激しい論争を巻き起こしかねない」ということ
+を、「(カーネルのソースレベルインターフェースは)移ろい行くもので
+ある」ということを連想させる "volatile" という単語で表現している。
+
+
+Linux カーネルの開発は継続的に速いペースで行われ、決して歩みを緩め
+ることがない。その中でカーネル開発者達は、現状のインターフェースに
+あるバグを見つけ、より良い方法を考え出す。彼らはやがて、現状のイン
+ターフェースがより正しく動作するように修正を行う。その過程で関数の
+名前は変更されるかもしれず、構造体は大きく、または小さくなるかもし
+れず、関数の引数は検討しなおされるかもしれない。そのような場合、引
+き続き全てが正常に動作するよう、カーネル内でこれらのインターフェー
+スを使用している個所も全て同時に修正される。
+
+
+具体的な例として、カーネル内の USB インターフェースを挙げる。USB
+サブシステムはこれまでに少なくとも3回の書き直しが行われ、その結果
+インターフェースが変更された。これらの書き直しはいくつかの異なった
+問題を修正するために行われた。
+ - 同期的データストリームが非同期に変更された。これにより多数のド
+ ライバを単純化でき、全てのドライバのスループットが向上した。今
+ やほとんど全ての USB デバイスは、考えられる最高の速度で動作し
+ ている。
+ - USB ドライバが USB サブシステムのコアから行う、データパケット
+ 用のメモリ確保方法が変更された。これに伴い、いくつもの文書化さ
+ れたデッドロック条件を回避するため、全ての USB ドライバはより
+ 多くの情報を USB コアに提供しなければならないようになっている。
+
+
+このできごとは、数多く存在するクローズソースのオペレーティングシス
+テムとは全く対照的だ。それらは長期に渡り古い USB インターフェース
+をメンテナンスしなければならない。古いインターフェースが残ることで、
+新たな開発者が偶然古いインターフェースを使い、正しくない方法で開発
+を行ってしまう可能性が生じる。これによりシステムの安定性は危険にさ
+らされることになる。
+
+
+上に挙げたどちらの例においても、開発者達はその変更が重要かつ必要で
+あることに合意し、比較的楽にそれを実行した。もし Linux がソースレ
+ベルでインターフェースの不変性を保証しなければならないとしたら、新
+しいインターフェースを作ると同時に、古い、問題のある方を今後ともメ
+ンテナンスするという余計な仕事を USB の開発者にさせなければならな
+い。Linux の USB 開発者は、自分の時間を使って仕事をしている。よっ
+て、価値のない余計な仕事を報酬もなしに実行しろと言うことはできない。
+
+
+セキュリティ問題も、Linux にとっては非常に重要である。ひとたびセキ
+ュリティに関する問題が発見されれば、それは極めて短期間のうちに修正
+される。セキュリティ問題の発生を防ぐための修正は、カーネルの内部イ
+ンターフェースの変更を何度も引き起こしてきた。その際同時に、変更さ
+れたインターフェースを使用する全てのドライバもまた変更された。これ
+により問題が解消し、将来偶然に問題が再発してしまわないことが保証さ
+れる。もし内部インターフェースの変更が許されないとしたら、このよう
+にセキュリティ問題を修正し、将来再発しないことを保証することなど不
+可能なのだ。
+
+
+カーネルのインターフェースは時が経つにつれクリーンナップを受ける。
+誰も使っていないインターフェースは削除される。これにより、可能な限
+りカーネルが小さく保たれ、現役の全てのインターフェースが可能な限り
+テストされることを保証しているのだ。(使われていないインターフェー
+スの妥当性をテストすることは不可能と言っていいだろう)
+
+
+
+これから何をすべきか
+-----------------------
+
+では、もしメインのカーネルツリーに含まれない Linux カーネルドライ
+バがあったとして、あなたは、つまり開発者は何をするべきだろうか?全
+てのディストリビューションの全てのカーネルバージョン向けにバイナリ
+のドライバを供給することは悪夢であり、カーネルインターフェースの変
+更を追いかけ続けることもまた過酷な仕事だ。
+
+
+答えは簡単。そのドライバをメインのカーネルツリーに入れてしまえばよ
+い。(ここで言及しているのは、GPL に従って公開されるドライバのこと
+だということに注意してほしい。あなたのコードがそれに該当しないなら
+ば、さよなら。幸運を祈ります。ご自分で何とかしてください。Andrew
+と Linus からのコメント<Andrew と Linus のコメントへのリンクをこ
+こに置く>をどうぞ)ドライバがメインツリーに入れば、カーネルのイン
+ターフェースが変更された場合、変更を行った開発者によってドライバも
+修正されることになるだろう。あなたはほとんど労力を払うことなしに、
+常にビルド可能できちんと動作するドライバを手に入れることができる。
+
+
+ドライバをメインのカーネルツリーに入れると、非常に好ましい以下の効
+果がある。
+ - ドライバの品質が向上する一方で、(元の開発者にとっての)メンテ
+ ナンスコストは下がる。
+ - あなたのドライバに他の開発者が機能を追加してくれる。
+ - 誰かがあなたのドライバにあるバグを見つけ、修正してくれる。
+ - 誰かがあなたのドライバにある改善点を見つけてくれる。
+ - 外部インターフェースが変更されドライバの更新が必要になった場合、
+ 誰かがあなたの代わりに更新してくれる。
+ - ドライバを入れてくれとディストロに頼まなくても、そのドライバは
+ 全ての Linux ディストリビューションに自動的に含まれてリリース
+ される。
+
+
+Linux では、他のどのオペレーティングシステムよりも数多くのデバイス
+が「そのまま」使用できるようになった。また Linux は、どのオペレー
+ティングシステムよりも数多くのプロセッサアーキテクチャ上でそれらの
+デバイスを使用することができるようにもなった。このように、Linux の
+開発モデルは実証されており、今後も間違いなく正しい方向へと進んでい
+くだろう。:)
+
+
+
+------
+
+この文書の初期の草稿に対し、Randy Dunlap, Andrew Morton, David
+Brownell, Hanna Linder, Robert Love, Nishanth Aravamudan から査読
+と助言を頂きました。感謝申し上げます。
+
diff --git a/Documentation/ja_JP/stable_kernel_rules.txt b/Documentation/ja_JP/stable_kernel_rules.txt
new file mode 100644
index 000000000..9dbda9b5d
--- /dev/null
+++ b/Documentation/ja_JP/stable_kernel_rules.txt
@@ -0,0 +1,84 @@
+NOTE:
+This is Japanese translated version of "Documentation/stable_kernel_rules.txt".
+This one is maintained by Tsugikazu Shibata <tshibata@ab.jp.nec.com>
+and JF Project team <www.linux.or.jp/JF>.
+If you find difference with original file or problem in translation,
+please contact maintainer of this file or JF project.
+
+Please also note that purpose of this file is easier to read for non
+English natives and do no intended to fork. So, if you have any
+comment or update of this file, please try to update Original(English)
+file at first.
+
+==================================
+これは、
+linux-2.6.29/Documentation/stable_kernel_rules.txt
+の和訳です。
+
+翻訳団体: JF プロジェクト < http://www.linux.or.jp/JF/ >
+翻訳日: 2009/1/14
+翻訳者: Tsugikazu Shibata <tshibata at ab dot jp dot nec dot com>
+校正者: 武井伸光さん、<takei at webmasters dot gr dot jp>
+ かねこさん (Seiji Kaneko) <skaneko at a2 dot mbn dot or dot jp>
+ 小林 雅典さん (Masanori Kobayasi) <zap03216 at nifty dot ne dot jp>
+ 野口さん (Kenji Noguchi) <tokyo246 at gmail dot com>
+ 神宮信太郎さん <jin at libjingu dot jp>
+==================================
+
+ずっと知りたかった Linux 2.6 -stable リリースの全て
+
+"-stable" ツリーにどのような種類のパッチが受け入れられるか、どのような
+ものが受け入れられないか、についての規則-
+
+ - 明らかに正しく、テストされているものでなければならない。
+ - 文脈(変更行の前後)を含めて 100 行より大きくてはいけない。
+ - ただ一個のことだけを修正しているべき。
+ - 皆を悩ませている本物のバグを修正しなければならない。("これはバグで
+ あるかもしれないが..." のようなものではない)
+ - ビルドエラー(CONFIG_BROKENになっているものを除く), oops, ハング、デー
+ タ破壊、現実のセキュリティ問題、その他 "ああ、これはダメだね"という
+ ようなものを修正しなければならない。短く言えば、重大な問題。
+ - 新しい device ID とクオークも受け入れられる。
+ - どのように競合状態が発生するかの説明も一緒に書かれていない限り、
+ "理論的には競合状態になる"ようなものは不可。
+ - いかなる些細な修正も含めることはできない。(スペルの修正、空白のクリー
+ ンアップなど)
+ - Documentation/SubmittingPatches の規則に従ったものでなければならない。
+ - パッチ自体か同等の修正が Linus のツリーに既に存在しなければならない。
+  Linus のツリーでのコミットID を -stable へのパッチ投稿の際に引用す
+ ること。
+
+-stable ツリーにパッチを送付する手続き-
+
+ - 上記の規則に従っているかを確認した後に、stable@vger.kernel.org にパッチ
+ を送る。
+ - 送信者はパッチがキューに受け付けられた際には ACK を、却下された場合
+ には NAK を受け取る。この反応は開発者たちのスケジュールによって、数
+ 日かかる場合がある。
+ - もし受け取られたら、パッチは他の開発者たちと関連するサブシステムの
+ メンテナーによるレビューのために -stable キューに追加される。
+ - パッチに stable@vger.kernel.org のアドレスが付加されているときには、それ
+ が Linus のツリーに入る時に自動的に stable チームに email される。
+ - セキュリティパッチはこのエイリアス (stable@vger.kernel.org) に送られるべ
+ きではなく、代わりに security@kernel.org のアドレスに送られる。
+
+レビューサイクル-
+
+ - -stable メンテナがレビューサイクルを決めるとき、パッチはレビュー委
+ 員会とパッチが影響する領域のメンテナ(提供者がその領域のメンテナで無
+ い限り)に送られ、linux-kernel メーリングリストにCCされる。
+ - レビュー委員会は 48時間の間に ACK か NAK を出す。
+ - もしパッチが委員会のメンバから却下されるか、メンテナ達やメンバが気付
+ かなかった問題が持ちあがり、linux-kernel メンバがパッチに異議を唱え
+ た場合には、パッチはキューから削除される。
+ - レビューサイクルの最後に、ACK を受けたパッチは最新の -stable リリー
+ スに追加され、その後に新しい -stable リリースが行われる。
+ - セキュリティパッチは、通常のレビューサイクルを通らず、セキュリティ
+ カーネルチームから直接 -stable ツリーに受け付けられる。
+ この手続きの詳細については kernel security チームに問い合わせること。
+
+レビュー委員会-
+
+ - この委員会は、このタスクについて活動する多くのボランティアと、少数の
+ 非ボランティアのカーネル開発者達で構成されている。
+
diff --git a/Documentation/java.txt b/Documentation/java.txt
new file mode 100644
index 000000000..418020584
--- /dev/null
+++ b/Documentation/java.txt
@@ -0,0 +1,404 @@
+ Java(tm) Binary Kernel Support for Linux v1.03
+ ----------------------------------------------
+
+Linux beats them ALL! While all other OS's are TALKING about direct
+support of Java Binaries in the OS, Linux is doing it!
+
+You can execute Java applications and Java Applets just like any
+other program after you have done the following:
+
+1) You MUST FIRST install the Java Developers Kit for Linux.
+ The Java on Linux HOWTO gives the details on getting and
+ installing this. This HOWTO can be found at:
+
+ ftp://sunsite.unc.edu/pub/Linux/docs/HOWTO/Java-HOWTO
+
+ You should also set up a reasonable CLASSPATH environment
+ variable to use Java applications that make use of any
+ nonstandard classes (not included in the same directory
+ as the application itself).
+
+2) You have to compile BINFMT_MISC either as a module or into
+ the kernel (CONFIG_BINFMT_MISC) and set it up properly.
+ If you choose to compile it as a module, you will have
+ to insert it manually with modprobe/insmod, as kmod
+ cannot easily be supported with binfmt_misc.
+ Read the file 'binfmt_misc.txt' in this directory to know
+ more about the configuration process.
+
+3) Add the following configuration items to binfmt_misc
+ (you should really have read binfmt_misc.txt now):
+ support for Java applications:
+ ':Java:M::\xca\xfe\xba\xbe::/usr/local/bin/javawrapper:'
+ support for executable Jar files:
+ ':ExecutableJAR:E::jar::/usr/local/bin/jarwrapper:'
+ support for Java Applets:
+ ':Applet:E::html::/usr/bin/appletviewer:'
+ or the following, if you want to be more selective:
+ ':Applet:M::<!--applet::/usr/bin/appletviewer:'
+
+ Of course you have to fix the path names. The path/file names given in this
+ document match the Debian 2.1 system. (i.e. jdk installed in /usr,
+ custom wrappers from this document in /usr/local)
+
+ Note, that for the more selective applet support you have to modify
+ existing html-files to contain <!--applet--> in the first line
+ ('<' has to be the first character!) to let this work!
+
+ For the compiled Java programs you need a wrapper script like the
+ following (this is because Java is broken in case of the filename
+ handling), again fix the path names, both in the script and in the
+ above given configuration string.
+
+ You, too, need the little program after the script. Compile like
+ gcc -O2 -o javaclassname javaclassname.c
+ and stick it to /usr/local/bin.
+
+ Both the javawrapper shellscript and the javaclassname program
+ were supplied by Colin J. Watson <cjw44@cam.ac.uk>.
+
+====================== Cut here ===================
+#!/bin/bash
+# /usr/local/bin/javawrapper - the wrapper for binfmt_misc/java
+
+if [ -z "$1" ]; then
+ exec 1>&2
+ echo Usage: $0 class-file
+ exit 1
+fi
+
+CLASS=$1
+FQCLASS=`/usr/local/bin/javaclassname $1`
+FQCLASSN=`echo $FQCLASS | sed -e 's/^.*\.\([^.]*\)$/\1/'`
+FQCLASSP=`echo $FQCLASS | sed -e 's-\.-/-g' -e 's-^[^/]*$--' -e 's-/[^/]*$--'`
+
+# for example:
+# CLASS=Test.class
+# FQCLASS=foo.bar.Test
+# FQCLASSN=Test
+# FQCLASSP=foo/bar
+
+unset CLASSBASE
+
+declare -i LINKLEVEL=0
+
+while :; do
+ if [ "`basename $CLASS .class`" == "$FQCLASSN" ]; then
+ # See if this directory works straight off
+ cd -L `dirname $CLASS`
+ CLASSDIR=$PWD
+ cd $OLDPWD
+ if echo $CLASSDIR | grep -q "$FQCLASSP$"; then
+ CLASSBASE=`echo $CLASSDIR | sed -e "s.$FQCLASSP$.."`
+ break;
+ fi
+ # Try dereferencing the directory name
+ cd -P `dirname $CLASS`
+ CLASSDIR=$PWD
+ cd $OLDPWD
+ if echo $CLASSDIR | grep -q "$FQCLASSP$"; then
+ CLASSBASE=`echo $CLASSDIR | sed -e "s.$FQCLASSP$.."`
+ break;
+ fi
+ # If no other possible filename exists
+ if [ ! -L $CLASS ]; then
+ exec 1>&2
+ echo $0:
+ echo " $CLASS should be in a" \
+ "directory tree called $FQCLASSP"
+ exit 1
+ fi
+ fi
+ if [ ! -L $CLASS ]; then break; fi
+ # Go down one more level of symbolic links
+ let LINKLEVEL+=1
+ if [ $LINKLEVEL -gt 5 ]; then
+ exec 1>&2
+ echo $0:
+ echo " Too many symbolic links encountered"
+ exit 1
+ fi
+ CLASS=`ls --color=no -l $CLASS | sed -e 's/^.* \([^ ]*\)$/\1/'`
+done
+
+if [ -z "$CLASSBASE" ]; then
+ if [ -z "$FQCLASSP" ]; then
+ GOODNAME=$FQCLASSN.class
+ else
+ GOODNAME=$FQCLASSP/$FQCLASSN.class
+ fi
+ exec 1>&2
+ echo $0:
+ echo " $FQCLASS should be in a file called $GOODNAME"
+ exit 1
+fi
+
+if ! echo $CLASSPATH | grep -q "^\(.*:\)*$CLASSBASE\(:.*\)*"; then
+ # class is not in CLASSPATH, so prepend dir of class to CLASSPATH
+ if [ -z "${CLASSPATH}" ] ; then
+ export CLASSPATH=$CLASSBASE
+ else
+ export CLASSPATH=$CLASSBASE:$CLASSPATH
+ fi
+fi
+
+shift
+/usr/bin/java $FQCLASS "$@"
+====================== Cut here ===================
+
+
+====================== Cut here ===================
+/* javaclassname.c
+ *
+ * Extracts the class name from a Java class file; intended for use in a Java
+ * wrapper of the type supported by the binfmt_misc option in the Linux kernel.
+ *
+ * Copyright (C) 1999 Colin J. Watson <cjw44@cam.ac.uk>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <sys/types.h>
+
+/* From Sun's Java VM Specification, as tag entries in the constant pool. */
+
+#define CP_UTF8 1
+#define CP_INTEGER 3
+#define CP_FLOAT 4
+#define CP_LONG 5
+#define CP_DOUBLE 6
+#define CP_CLASS 7
+#define CP_STRING 8
+#define CP_FIELDREF 9
+#define CP_METHODREF 10
+#define CP_INTERFACEMETHODREF 11
+#define CP_NAMEANDTYPE 12
+#define CP_METHODHANDLE 15
+#define CP_METHODTYPE 16
+#define CP_INVOKEDYNAMIC 18
+
+/* Define some commonly used error messages */
+
+#define seek_error() error("%s: Cannot seek\n", program)
+#define corrupt_error() error("%s: Class file corrupt\n", program)
+#define eof_error() error("%s: Unexpected end of file\n", program)
+#define utf8_error() error("%s: Only ASCII 1-255 supported\n", program);
+
+char *program;
+
+long *pool;
+
+u_int8_t read_8(FILE *classfile);
+u_int16_t read_16(FILE *classfile);
+void skip_constant(FILE *classfile, u_int16_t *cur);
+void error(const char *format, ...);
+int main(int argc, char **argv);
+
+/* Reads in an unsigned 8-bit integer. */
+u_int8_t read_8(FILE *classfile)
+{
+ int b = fgetc(classfile);
+ if(b == EOF)
+ eof_error();
+ return (u_int8_t)b;
+}
+
+/* Reads in an unsigned 16-bit integer. */
+u_int16_t read_16(FILE *classfile)
+{
+ int b1, b2;
+ b1 = fgetc(classfile);
+ if(b1 == EOF)
+ eof_error();
+ b2 = fgetc(classfile);
+ if(b2 == EOF)
+ eof_error();
+ return (u_int16_t)((b1 << 8) | b2);
+}
+
+/* Reads in a value from the constant pool. */
+void skip_constant(FILE *classfile, u_int16_t *cur)
+{
+ u_int16_t len;
+ int seekerr = 1;
+ pool[*cur] = ftell(classfile);
+ switch(read_8(classfile))
+ {
+ case CP_UTF8:
+ len = read_16(classfile);
+ seekerr = fseek(classfile, len, SEEK_CUR);
+ break;
+ case CP_CLASS:
+ case CP_STRING:
+ case CP_METHODTYPE:
+ seekerr = fseek(classfile, 2, SEEK_CUR);
+ break;
+ case CP_METHODHANDLE:
+ seekerr = fseek(classfile, 3, SEEK_CUR);
+ break;
+ case CP_INTEGER:
+ case CP_FLOAT:
+ case CP_FIELDREF:
+ case CP_METHODREF:
+ case CP_INTERFACEMETHODREF:
+ case CP_NAMEANDTYPE:
+ case CP_INVOKEDYNAMIC:
+ seekerr = fseek(classfile, 4, SEEK_CUR);
+ break;
+ case CP_LONG:
+ case CP_DOUBLE:
+ seekerr = fseek(classfile, 8, SEEK_CUR);
+ ++(*cur);
+ break;
+ default:
+ corrupt_error();
+ }
+ if(seekerr)
+ seek_error();
+}
+
+void error(const char *format, ...)
+{
+ va_list ap;
+ va_start(ap, format);
+ vfprintf(stderr, format, ap);
+ va_end(ap);
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ FILE *classfile;
+ u_int16_t cp_count, i, this_class, classinfo_ptr;
+ u_int8_t length;
+
+ program = argv[0];
+
+ if(!argv[1])
+ error("%s: Missing input file\n", program);
+ classfile = fopen(argv[1], "rb");
+ if(!classfile)
+ error("%s: Error opening %s\n", program, argv[1]);
+
+ if(fseek(classfile, 8, SEEK_SET)) /* skip magic and version numbers */
+ seek_error();
+ cp_count = read_16(classfile);
+ pool = calloc(cp_count, sizeof(long));
+ if(!pool)
+ error("%s: Out of memory for constant pool\n", program);
+
+ for(i = 1; i < cp_count; ++i)
+ skip_constant(classfile, &i);
+ if(fseek(classfile, 2, SEEK_CUR)) /* skip access flags */
+ seek_error();
+
+ this_class = read_16(classfile);
+ if(this_class < 1 || this_class >= cp_count)
+ corrupt_error();
+ if(!pool[this_class] || pool[this_class] == -1)
+ corrupt_error();
+ if(fseek(classfile, pool[this_class] + 1, SEEK_SET))
+ seek_error();
+
+ classinfo_ptr = read_16(classfile);
+ if(classinfo_ptr < 1 || classinfo_ptr >= cp_count)
+ corrupt_error();
+ if(!pool[classinfo_ptr] || pool[classinfo_ptr] == -1)
+ corrupt_error();
+ if(fseek(classfile, pool[classinfo_ptr] + 1, SEEK_SET))
+ seek_error();
+
+ length = read_16(classfile);
+ for(i = 0; i < length; ++i)
+ {
+ u_int8_t x = read_8(classfile);
+ if((x & 0x80) || !x)
+ {
+ if((x & 0xE0) == 0xC0)
+ {
+ u_int8_t y = read_8(classfile);
+ if((y & 0xC0) == 0x80)
+ {
+ int c = ((x & 0x1f) << 6) + (y & 0x3f);
+ if(c) putchar(c);
+ else utf8_error();
+ }
+ else utf8_error();
+ }
+ else utf8_error();
+ }
+ else if(x == '/') putchar('.');
+ else putchar(x);
+ }
+ putchar('\n');
+ free(pool);
+ fclose(classfile);
+ return 0;
+}
+====================== Cut here ===================
+
+
+====================== Cut here ===================
+#!/bin/bash
+# /usr/local/java/bin/jarwrapper - the wrapper for binfmt_misc/jar
+
+java -jar $1
+====================== Cut here ===================
+
+
+Now simply chmod +x the .class, .jar and/or .html files you want to execute.
+To add a Java program to your path best put a symbolic link to the main
+.class file into /usr/bin (or another place you like) omitting the .class
+extension. The directory containing the original .class file will be
+added to your CLASSPATH during execution.
+
+
+To test your new setup, enter in the following simple Java app, and name
+it "HelloWorld.java":
+
+ class HelloWorld {
+ public static void main(String args[]) {
+ System.out.println("Hello World!");
+ }
+ }
+
+Now compile the application with:
+ javac HelloWorld.java
+
+Set the executable permissions of the binary file, with:
+ chmod 755 HelloWorld.class
+
+And then execute it:
+ ./HelloWorld.class
+
+
+To execute Java Jar files, simple chmod the *.jar files to include
+the execution bit, then just do
+ ./Application.jar
+
+
+To execute Java Applets, simple chmod the *.html files to include
+the execution bit, then just do
+ ./Applet.html
+
+
+originally by Brian A. Lantz, brian@lantz.com
+heavily edited for binfmt_misc by Richard Günther
+new scripts by Colin J. Watson <cjw44@cam.ac.uk>
+added executable Jar file support by Kurt Huwig <kurt@iku-netz.de>
+
diff --git a/Documentation/kasan.txt b/Documentation/kasan.txt
new file mode 100644
index 000000000..469224178
--- /dev/null
+++ b/Documentation/kasan.txt
@@ -0,0 +1,172 @@
+Kernel address sanitizer
+================
+
+0. Overview
+===========
+
+Kernel Address sanitizer (KASan) is a dynamic memory error detector. It provides
+a fast and comprehensive solution for finding use-after-free and out-of-bounds
+bugs.
+
+KASan uses compile-time instrumentation for checking every memory access,
+therefore you will need a gcc version of 4.9.2 or later. KASan could detect out
+of bounds accesses to stack or global variables, but only if gcc 5.0 or later was
+used to built the kernel.
+
+Currently KASan is supported only for x86_64 architecture and requires that the
+kernel be built with the SLUB allocator.
+
+1. Usage
+=========
+
+To enable KASAN configure kernel with:
+
+ CONFIG_KASAN = y
+
+and choose between CONFIG_KASAN_OUTLINE and CONFIG_KASAN_INLINE. Outline/inline
+is compiler instrumentation types. The former produces smaller binary the
+latter is 1.1 - 2 times faster. Inline instrumentation requires a gcc version
+of 5.0 or later.
+
+Currently KASAN works only with the SLUB memory allocator.
+For better bug detection and nicer report, enable CONFIG_STACKTRACE and put
+at least 'slub_debug=U' in the boot cmdline.
+
+To disable instrumentation for specific files or directories, add a line
+similar to the following to the respective kernel Makefile:
+
+ For a single file (e.g. main.o):
+ KASAN_SANITIZE_main.o := n
+
+ For all files in one directory:
+ KASAN_SANITIZE := n
+
+1.1 Error reports
+==========
+
+A typical out of bounds access report looks like this:
+
+==================================================================
+BUG: AddressSanitizer: out of bounds access in kmalloc_oob_right+0x65/0x75 [test_kasan] at addr ffff8800693bc5d3
+Write of size 1 by task modprobe/1689
+=============================================================================
+BUG kmalloc-128 (Not tainted): kasan error
+-----------------------------------------------------------------------------
+
+Disabling lock debugging due to kernel taint
+INFO: Allocated in kmalloc_oob_right+0x3d/0x75 [test_kasan] age=0 cpu=0 pid=1689
+ __slab_alloc+0x4b4/0x4f0
+ kmem_cache_alloc_trace+0x10b/0x190
+ kmalloc_oob_right+0x3d/0x75 [test_kasan]
+ init_module+0x9/0x47 [test_kasan]
+ do_one_initcall+0x99/0x200
+ load_module+0x2cb3/0x3b20
+ SyS_finit_module+0x76/0x80
+ system_call_fastpath+0x12/0x17
+INFO: Slab 0xffffea0001a4ef00 objects=17 used=7 fp=0xffff8800693bd728 flags=0x100000000004080
+INFO: Object 0xffff8800693bc558 @offset=1368 fp=0xffff8800693bc720
+
+Bytes b4 ffff8800693bc548: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+Object ffff8800693bc558: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc568: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc578: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc588: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc598: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc5a8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc5b8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b kkkkkkkkkkkkkkkk
+Object ffff8800693bc5c8: 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b a5 kkkkkkkkkkkkkkk.
+Redzone ffff8800693bc5d8: cc cc cc cc cc cc cc cc ........
+Padding ffff8800693bc718: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
+CPU: 0 PID: 1689 Comm: modprobe Tainted: G B 3.18.0-rc1-mm1+ #98
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.7.5-0-ge51488c-20140602_164612-nilsson.home.kraxel.org 04/01/2014
+ ffff8800693bc000 0000000000000000 ffff8800693bc558 ffff88006923bb78
+ ffffffff81cc68ae 00000000000000f3 ffff88006d407600 ffff88006923bba8
+ ffffffff811fd848 ffff88006d407600 ffffea0001a4ef00 ffff8800693bc558
+Call Trace:
+ [<ffffffff81cc68ae>] dump_stack+0x46/0x58
+ [<ffffffff811fd848>] print_trailer+0xf8/0x160
+ [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
+ [<ffffffff811ff0f5>] object_err+0x35/0x40
+ [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
+ [<ffffffff8120b9fa>] kasan_report_error+0x38a/0x3f0
+ [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
+ [<ffffffff8120b344>] ? kasan_unpoison_shadow+0x14/0x40
+ [<ffffffff8120a79f>] ? kasan_poison_shadow+0x2f/0x40
+ [<ffffffffa00026a7>] ? kmem_cache_oob+0xc3/0xc3 [test_kasan]
+ [<ffffffff8120a995>] __asan_store1+0x75/0xb0
+ [<ffffffffa0002601>] ? kmem_cache_oob+0x1d/0xc3 [test_kasan]
+ [<ffffffffa0002065>] ? kmalloc_oob_right+0x65/0x75 [test_kasan]
+ [<ffffffffa0002065>] kmalloc_oob_right+0x65/0x75 [test_kasan]
+ [<ffffffffa00026b0>] init_module+0x9/0x47 [test_kasan]
+ [<ffffffff810002d9>] do_one_initcall+0x99/0x200
+ [<ffffffff811e4e5c>] ? __vunmap+0xec/0x160
+ [<ffffffff81114f63>] load_module+0x2cb3/0x3b20
+ [<ffffffff8110fd70>] ? m_show+0x240/0x240
+ [<ffffffff81115f06>] SyS_finit_module+0x76/0x80
+ [<ffffffff81cd3129>] system_call_fastpath+0x12/0x17
+Memory state around the buggy address:
+ ffff8800693bc300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8800693bc380: fc fc 00 00 00 00 00 00 00 00 00 00 00 00 00 fc
+ ffff8800693bc400: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8800693bc480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8800693bc500: fc fc fc fc fc fc fc fc fc fc fc 00 00 00 00 00
+>ffff8800693bc580: 00 00 00 00 00 00 00 00 00 00 03 fc fc fc fc fc
+ ^
+ ffff8800693bc600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8800693bc680: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff8800693bc700: fc fc fc fc fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8800693bc780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8800693bc800: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+==================================================================
+
+First sections describe slub object where bad access happened.
+See 'SLUB Debug output' section in Documentation/vm/slub.txt for details.
+
+In the last section the report shows memory state around the accessed address.
+Reading this part requires some more understanding of how KASAN works.
+
+Each 8 bytes of memory are encoded in one shadow byte as accessible,
+partially accessible, freed or they can be part of a redzone.
+We use the following encoding for each shadow byte: 0 means that all 8 bytes
+of the corresponding memory region are accessible; number N (1 <= N <= 7) means
+that the first N bytes are accessible, and other (8 - N) bytes are not;
+any negative value indicates that the entire 8-byte word is inaccessible.
+We use different negative values to distinguish between different kinds of
+inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h).
+
+In the report above the arrows point to the shadow byte 03, which means that
+the accessed address is partially accessible.
+
+
+2. Implementation details
+========================
+
+From a high level, our approach to memory error detection is similar to that
+of kmemcheck: use shadow memory to record whether each byte of memory is safe
+to access, and use compile-time instrumentation to check shadow memory on each
+memory access.
+
+AddressSanitizer dedicates 1/8 of kernel memory to its shadow memory
+(e.g. 16TB to cover 128TB on x86_64) and uses direct mapping with a scale and
+offset to translate a memory address to its corresponding shadow address.
+
+Here is the function witch translate an address to its corresponding shadow
+address:
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+ return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ + KASAN_SHADOW_OFFSET;
+}
+
+where KASAN_SHADOW_SCALE_SHIFT = 3.
+
+Compile-time instrumentation used for checking memory accesses. Compiler inserts
+function calls (__asan_load*(addr), __asan_store*(addr)) before each memory
+access of size 1, 2, 4, 8 or 16. These functions check whether memory access is
+valid or not by checking corresponding shadow memory.
+
+GCC 5.0 has possibility to perform inline instrumentation. Instead of making
+function calls GCC directly inserts the code to check the shadow memory.
+This option significantly enlarges kernel but it gives x1.1-x2 performance
+boost over outline instrumented kernel.
diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
new file mode 100644
index 000000000..8c5e6aa78
--- /dev/null
+++ b/Documentation/kbuild/00-INDEX
@@ -0,0 +1,14 @@
+00-INDEX
+ - this file: info on the kernel build process
+headers_install.txt
+ - how to export Linux headers for use by userspace
+kbuild.txt
+ - developer information on kbuild
+kconfig.txt
+ - usage help for make *config
+kconfig-language.txt
+ - specification of Config Language, the language in Kconfig files
+makefiles.txt
+ - developer information for linux kernel makefiles
+modules.txt
+ - how to build modules and to install them
diff --git a/Documentation/kbuild/headers_install.txt b/Documentation/kbuild/headers_install.txt
new file mode 100644
index 000000000..951eb9f1e
--- /dev/null
+++ b/Documentation/kbuild/headers_install.txt
@@ -0,0 +1,47 @@
+Exporting kernel headers for use by userspace
+=============================================
+
+The "make headers_install" command exports the kernel's header files in a
+form suitable for use by userspace programs.
+
+The linux kernel's exported header files describe the API for user space
+programs attempting to use kernel services. These kernel header files are
+used by the system's C library (such as glibc or uClibc) to define available
+system calls, as well as constants and structures to be used with these
+system calls. The C library's header files include the kernel header files
+from the "linux" subdirectory. The system's libc headers are usually
+installed at the default location /usr/include and the kernel headers in
+subdirectories under that (most notably /usr/include/linux and
+/usr/include/asm).
+
+Kernel headers are backwards compatible, but not forwards compatible. This
+means that a program built against a C library using older kernel headers
+should run on a newer kernel (although it may not have access to new
+features), but a program built against newer kernel headers may not work on an
+older kernel.
+
+The "make headers_install" command can be run in the top level directory of the
+kernel source code (or using a standard out-of-tree build). It takes two
+optional arguments:
+
+ make headers_install ARCH=i386 INSTALL_HDR_PATH=/usr/include
+
+ARCH indicates which architecture to produce headers for, and defaults to the
+current architecture. The linux/asm directory of the exported kernel headers
+is platform-specific, to see a complete list of supported architectures use
+the command:
+
+ ls -d include/asm-* | sed 's/.*-//'
+
+INSTALL_HDR_PATH indicates where to install the headers. It defaults to
+"./usr/include".
+
+The command "make headers_install_all" exports headers for all architectures
+simultaneously. (This is mostly of interest to distribution maintainers,
+who create an architecture-independent tarball from the resulting include
+directory.) You also can use HDR_ARCH_LIST to specify list of architectures.
+Remember to provide the appropriate linux/asm directory via "mv" or "ln -s"
+before building a C library with headers exported this way.
+
+The kernel header export infrastructure is maintained by David Woodhouse
+<dwmw2@infradead.org>.
diff --git a/Documentation/kbuild/kbuild.txt b/Documentation/kbuild/kbuild.txt
new file mode 100644
index 000000000..6466704d4
--- /dev/null
+++ b/Documentation/kbuild/kbuild.txt
@@ -0,0 +1,235 @@
+Output files
+
+modules.order
+--------------------------------------------------
+This file records the order in which modules appear in Makefiles. This
+is used by modprobe to deterministically resolve aliases that match
+multiple modules.
+
+modules.builtin
+--------------------------------------------------
+This file lists all modules that are built into the kernel. This is used
+by modprobe to not fail when trying to load something builtin.
+
+
+Environment variables
+
+KCPPFLAGS
+--------------------------------------------------
+Additional options to pass when preprocessing. The preprocessing options
+will be used in all cases where kbuild does preprocessing including
+building C files and assembler files.
+
+KAFLAGS
+--------------------------------------------------
+Additional options to the assembler (for built-in and modules).
+
+AFLAGS_MODULE
+--------------------------------------------------
+Additional module specific options to use for $(AS).
+
+AFLAGS_KERNEL
+--------------------------------------------------
+Additional options for $(AS) when used for assembler
+code for code that is compiled as built-in.
+
+KCFLAGS
+--------------------------------------------------
+Additional options to the C compiler (for built-in and modules).
+
+CFLAGS_KERNEL
+--------------------------------------------------
+Additional options for $(CC) when used to compile
+code that is compiled as built-in.
+
+CFLAGS_MODULE
+--------------------------------------------------
+Additional module specific options to use for $(CC).
+
+LDFLAGS_MODULE
+--------------------------------------------------
+Additional options used for $(LD) when linking modules.
+
+LDFLAGS_vmlinux
+--------------------------------------------------
+Additional options passed to final link of vmlinux.
+
+KBUILD_VERBOSE
+--------------------------------------------------
+Set the kbuild verbosity. Can be assigned same values as "V=...".
+See make help for the full list.
+Setting "V=..." takes precedence over KBUILD_VERBOSE.
+
+KBUILD_EXTMOD
+--------------------------------------------------
+Set the directory to look for the kernel source when building external
+modules.
+The directory can be specified in several ways:
+1) Use "M=..." on the command line
+2) Environment variable KBUILD_EXTMOD
+3) Environment variable SUBDIRS
+The possibilities are listed in the order they take precedence.
+Using "M=..." will always override the others.
+
+KBUILD_OUTPUT
+--------------------------------------------------
+Specify the output directory when building the kernel.
+The output directory can also be specified using "O=...".
+Setting "O=..." takes precedence over KBUILD_OUTPUT.
+
+KBUILD_DEBARCH
+--------------------------------------------------
+For the deb-pkg target, allows overriding the normal heuristics deployed by
+deb-pkg. Normally deb-pkg attempts to guess the right architecture based on
+the UTS_MACHINE variable, and on some architectures also the kernel config.
+The value of KBUILD_DEBARCH is assumed (not checked) to be a valid Debian
+architecture.
+
+ARCH
+--------------------------------------------------
+Set ARCH to the architecture to be built.
+In most cases the name of the architecture is the same as the
+directory name found in the arch/ directory.
+But some architectures such as x86 and sparc have aliases.
+x86: i386 for 32 bit, x86_64 for 64 bit
+sparc: sparc for 32 bit, sparc64 for 64 bit
+
+CROSS_COMPILE
+--------------------------------------------------
+Specify an optional fixed part of the binutils filename.
+CROSS_COMPILE can be a part of the filename or the full path.
+
+CROSS_COMPILE is also used for ccache in some setups.
+
+CF
+--------------------------------------------------
+Additional options for sparse.
+CF is often used on the command-line like this:
+
+ make CF=-Wbitwise C=2
+
+INSTALL_PATH
+--------------------------------------------------
+INSTALL_PATH specifies where to place the updated kernel and system map
+images. Default is /boot, but you can set it to other values.
+
+INSTALLKERNEL
+--------------------------------------------------
+Install script called when using "make install".
+The default name is "installkernel".
+
+The script will be called with the following arguments:
+ $1 - kernel version
+ $2 - kernel image file
+ $3 - kernel map file
+ $4 - default install path (use root directory if blank)
+
+The implementation of "make install" is architecture specific
+and it may differ from the above.
+
+INSTALLKERNEL is provided to enable the possibility to
+specify a custom installer when cross compiling a kernel.
+
+MODLIB
+--------------------------------------------------
+Specify where to install modules.
+The default value is:
+
+ $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE)
+
+The value can be overridden in which case the default value is ignored.
+
+INSTALL_MOD_PATH
+--------------------------------------------------
+INSTALL_MOD_PATH specifies a prefix to MODLIB for module directory
+relocations required by build roots. This is not defined in the
+makefile but the argument can be passed to make if needed.
+
+INSTALL_MOD_STRIP
+--------------------------------------------------
+INSTALL_MOD_STRIP, if defined, will cause modules to be
+stripped after they are installed. If INSTALL_MOD_STRIP is '1', then
+the default option --strip-debug will be used. Otherwise,
+INSTALL_MOD_STRIP value will be used as the options to the strip command.
+
+INSTALL_FW_PATH
+--------------------------------------------------
+INSTALL_FW_PATH specifies where to install the firmware blobs.
+The default value is:
+
+ $(INSTALL_MOD_PATH)/lib/firmware
+
+The value can be overridden in which case the default value is ignored.
+
+INSTALL_HDR_PATH
+--------------------------------------------------
+INSTALL_HDR_PATH specifies where to install user space headers when
+executing "make headers_*".
+The default value is:
+
+ $(objtree)/usr
+
+$(objtree) is the directory where output files are saved.
+The output directory is often set using "O=..." on the commandline.
+
+The value can be overridden in which case the default value is ignored.
+
+KBUILD_MODPOST_WARN
+--------------------------------------------------
+KBUILD_MODPOST_WARN can be set to avoid errors in case of undefined
+symbols in the final module linking stage. It changes such errors
+into warnings.
+
+KBUILD_MODPOST_NOFINAL
+--------------------------------------------------
+KBUILD_MODPOST_NOFINAL can be set to skip the final link of modules.
+This is solely useful to speed up test compiles.
+
+KBUILD_EXTRA_SYMBOLS
+--------------------------------------------------
+For modules that use symbols from other modules.
+See more details in modules.txt.
+
+ALLSOURCE_ARCHS
+--------------------------------------------------
+For tags/TAGS/cscope targets, you can specify more than one arch
+to be included in the databases, separated by blank space. E.g.:
+
+ $ make ALLSOURCE_ARCHS="x86 mips arm" tags
+
+To get all available archs you can also specify all. E.g.:
+
+ $ make ALLSOURCE_ARCHS=all tags
+
+KBUILD_ENABLE_EXTRA_GCC_CHECKS
+--------------------------------------------------
+If enabled over the make command line with "W=1", it turns on additional
+gcc -W... options for more extensive build-time checking.
+
+KBUILD_BUILD_TIMESTAMP
+--------------------------------------------------
+Setting this to a date string overrides the timestamp used in the
+UTS_VERSION definition (uname -v in the running kernel). The value has to
+be a string that can be passed to date -d. The default value
+is the output of the date command at one point during build.
+
+KBUILD_BUILD_USER, KBUILD_BUILD_HOST
+--------------------------------------------------
+These two variables allow to override the user@host string displayed during
+boot and in /proc/version. The default value is the output of the commands
+whoami and host, respectively.
+
+KBUILD_LDS
+--------------------------------------------------
+The linker script with full path. Assigned by the top-level Makefile.
+
+KBUILD_VMLINUX_INIT
+--------------------------------------------------
+All object files for the init (first) part of vmlinux.
+Files specified with KBUILD_VMLINUX_INIT are linked first.
+
+KBUILD_VMLINUX_MAIN
+--------------------------------------------------
+All object files for the main part of vmlinux.
+KBUILD_VMLINUX_INIT and KBUILD_VMLINUX_MAIN together specify
+all the object files used to link vmlinux.
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
new file mode 100644
index 000000000..350f733bf
--- /dev/null
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -0,0 +1,395 @@
+Introduction
+------------
+
+The configuration database is a collection of configuration options
+organized in a tree structure:
+
+ +- Code maturity level options
+ | +- Prompt for development and/or incomplete code/drivers
+ +- General setup
+ | +- Networking support
+ | +- System V IPC
+ | +- BSD Process Accounting
+ | +- Sysctl support
+ +- Loadable module support
+ | +- Enable loadable module support
+ | +- Set version information on all module symbols
+ | +- Kernel module loader
+ +- ...
+
+Every entry has its own dependencies. These dependencies are used
+to determine the visibility of an entry. Any child entry is only
+visible if its parent entry is also visible.
+
+Menu entries
+------------
+
+Most entries define a config option; all other entries help to organize
+them. A single configuration option is defined like this:
+
+config MODVERSIONS
+ bool "Set version information on all module symbols"
+ depends on MODULES
+ help
+ Usually, modules have to be recompiled whenever you switch to a new
+ kernel. ...
+
+Every line starts with a key word and can be followed by multiple
+arguments. "config" starts a new config entry. The following lines
+define attributes for this config option. Attributes can be the type of
+the config option, input prompt, dependencies, help text and default
+values. A config option can be defined multiple times with the same
+name, but every definition can have only a single input prompt and the
+type must not conflict.
+
+Menu attributes
+---------------
+
+A menu entry can have a number of attributes. Not all of them are
+applicable everywhere (see syntax).
+
+- type definition: "bool"/"tristate"/"string"/"hex"/"int"
+ Every config option must have a type. There are only two basic types:
+ tristate and string; the other types are based on these two. The type
+ definition optionally accepts an input prompt, so these two examples
+ are equivalent:
+
+ bool "Networking support"
+ and
+ bool
+ prompt "Networking support"
+
+- input prompt: "prompt" <prompt> ["if" <expr>]
+ Every menu entry can have at most one prompt, which is used to display
+ to the user. Optionally dependencies only for this prompt can be added
+ with "if".
+
+- default value: "default" <expr> ["if" <expr>]
+ A config option can have any number of default values. If multiple
+ default values are visible, only the first defined one is active.
+ Default values are not limited to the menu entry where they are
+ defined. This means the default can be defined somewhere else or be
+ overridden by an earlier definition.
+ The default value is only assigned to the config symbol if no other
+ value was set by the user (via the input prompt above). If an input
+ prompt is visible the default value is presented to the user and can
+ be overridden by him.
+ Optionally, dependencies only for this default value can be added with
+ "if".
+
+- type definition + default value:
+ "def_bool"/"def_tristate" <expr> ["if" <expr>]
+ This is a shorthand notation for a type definition plus a value.
+ Optionally dependencies for this default value can be added with "if".
+
+- dependencies: "depends on" <expr>
+ This defines a dependency for this menu entry. If multiple
+ dependencies are defined, they are connected with '&&'. Dependencies
+ are applied to all other options within this menu entry (which also
+ accept an "if" expression), so these two examples are equivalent:
+
+ bool "foo" if BAR
+ default y if BAR
+ and
+ depends on BAR
+ bool "foo"
+ default y
+
+- reverse dependencies: "select" <symbol> ["if" <expr>]
+ While normal dependencies reduce the upper limit of a symbol (see
+ below), reverse dependencies can be used to force a lower limit of
+ another symbol. The value of the current menu symbol is used as the
+ minimal value <symbol> can be set to. If <symbol> is selected multiple
+ times, the limit is set to the largest selection.
+ Reverse dependencies can only be used with boolean or tristate
+ symbols.
+ Note:
+ select should be used with care. select will force
+ a symbol to a value without visiting the dependencies.
+ By abusing select you are able to select a symbol FOO even
+ if FOO depends on BAR that is not set.
+ In general use select only for non-visible symbols
+ (no prompts anywhere) and for symbols with no dependencies.
+ That will limit the usefulness but on the other hand avoid
+ the illegal configurations all over.
+
+- limiting menu display: "visible if" <expr>
+ This attribute is only applicable to menu blocks, if the condition is
+ false, the menu block is not displayed to the user (the symbols
+ contained there can still be selected by other symbols, though). It is
+ similar to a conditional "prompt" attribute for individual menu
+ entries. Default value of "visible" is true.
+
+- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
+ This allows to limit the range of possible input values for int
+ and hex symbols. The user can only input a value which is larger than
+ or equal to the first symbol and smaller than or equal to the second
+ symbol.
+
+- help text: "help" or "---help---"
+ This defines a help text. The end of the help text is determined by
+ the indentation level, this means it ends at the first line which has
+ a smaller indentation than the first line of the help text.
+ "---help---" and "help" do not differ in behaviour, "---help---" is
+ used to help visually separate configuration logic from help within
+ the file as an aid to developers.
+
+- misc options: "option" <symbol>[=<value>]
+ Various less common options can be defined via this option syntax,
+ which can modify the behaviour of the menu entry and its config
+ symbol. These options are currently possible:
+
+ - "defconfig_list"
+ This declares a list of default entries which can be used when
+ looking for the default configuration (which is used when the main
+ .config doesn't exists yet.)
+
+ - "modules"
+ This declares the symbol to be used as the MODULES symbol, which
+ enables the third modular state for all config symbols.
+ At most one symbol may have the "modules" option set.
+
+ - "env"=<value>
+ This imports the environment variable into Kconfig. It behaves like
+ a default, except that the value comes from the environment, this
+ also means that the behaviour when mixing it with normal defaults is
+ undefined at this point. The symbol is currently not exported back
+ to the build environment (if this is desired, it can be done via
+ another symbol).
+
+ - "allnoconfig_y"
+ This declares the symbol as one that should have the value y when
+ using "allnoconfig". Used for symbols that hide other symbols.
+
+Menu dependencies
+-----------------
+
+Dependencies define the visibility of a menu entry and can also reduce
+the input range of tristate symbols. The tristate logic used in the
+expressions uses one more state than normal boolean logic to express the
+module state. Dependency expressions have the following syntax:
+
+<expr> ::= <symbol> (1)
+ <symbol> '=' <symbol> (2)
+ <symbol> '!=' <symbol> (3)
+ '(' <expr> ')' (4)
+ '!' <expr> (5)
+ <expr> '&&' <expr> (6)
+ <expr> '||' <expr> (7)
+
+Expressions are listed in decreasing order of precedence.
+
+(1) Convert the symbol into an expression. Boolean and tristate symbols
+ are simply converted into the respective expression values. All
+ other symbol types result in 'n'.
+(2) If the values of both symbols are equal, it returns 'y',
+ otherwise 'n'.
+(3) If the values of both symbols are equal, it returns 'n',
+ otherwise 'y'.
+(4) Returns the value of the expression. Used to override precedence.
+(5) Returns the result of (2-/expr/).
+(6) Returns the result of min(/expr/, /expr/).
+(7) Returns the result of max(/expr/, /expr/).
+
+An expression can have a value of 'n', 'm' or 'y' (or 0, 1, 2
+respectively for calculations). A menu entry becomes visible when its
+expression evaluates to 'm' or 'y'.
+
+There are two types of symbols: constant and non-constant symbols.
+Non-constant symbols are the most common ones and are defined with the
+'config' statement. Non-constant symbols consist entirely of alphanumeric
+characters or underscores.
+Constant symbols are only part of expressions. Constant symbols are
+always surrounded by single or double quotes. Within the quote, any
+other character is allowed and the quotes can be escaped using '\'.
+
+Menu structure
+--------------
+
+The position of a menu entry in the tree is determined in two ways. First
+it can be specified explicitly:
+
+menu "Network device support"
+ depends on NET
+
+config NETDEVICES
+ ...
+
+endmenu
+
+All entries within the "menu" ... "endmenu" block become a submenu of
+"Network device support". All subentries inherit the dependencies from
+the menu entry, e.g. this means the dependency "NET" is added to the
+dependency list of the config option NETDEVICES.
+
+The other way to generate the menu structure is done by analyzing the
+dependencies. If a menu entry somehow depends on the previous entry, it
+can be made a submenu of it. First, the previous (parent) symbol must
+be part of the dependency list and then one of these two conditions
+must be true:
+- the child entry must become invisible, if the parent is set to 'n'
+- the child entry must only be visible, if the parent is visible
+
+config MODULES
+ bool "Enable loadable module support"
+
+config MODVERSIONS
+ bool "Set version information on all module symbols"
+ depends on MODULES
+
+comment "module support disabled"
+ depends on !MODULES
+
+MODVERSIONS directly depends on MODULES, this means it's only visible if
+MODULES is different from 'n'. The comment on the other hand is always
+visible when MODULES is visible (the (empty) dependency of MODULES is
+also part of the comment dependencies).
+
+
+Kconfig syntax
+--------------
+
+The configuration file describes a series of menu entries, where every
+line starts with a keyword (except help texts). The following keywords
+end a menu entry:
+- config
+- menuconfig
+- choice/endchoice
+- comment
+- menu/endmenu
+- if/endif
+- source
+The first five also start the definition of a menu entry.
+
+config:
+
+ "config" <symbol>
+ <config options>
+
+This defines a config symbol <symbol> and accepts any of above
+attributes as options.
+
+menuconfig:
+ "menuconfig" <symbol>
+ <config options>
+
+This is similar to the simple config entry above, but it also gives a
+hint to front ends, that all suboptions should be displayed as a
+separate list of options.
+
+choices:
+
+ "choice" [symbol]
+ <choice options>
+ <choice block>
+ "endchoice"
+
+This defines a choice group and accepts any of the above attributes as
+options. A choice can only be of type bool or tristate, while a boolean
+choice only allows a single config entry to be selected, a tristate
+choice also allows any number of config entries to be set to 'm'. This
+can be used if multiple drivers for a single hardware exists and only a
+single driver can be compiled/loaded into the kernel, but all drivers
+can be compiled as modules.
+A choice accepts another option "optional", which allows to set the
+choice to 'n' and no entry needs to be selected.
+If no [symbol] is associated with a choice, then you can not have multiple
+definitions of that choice. If a [symbol] is associated to the choice,
+then you may define the same choice (ie. with the same entries) in another
+place.
+
+comment:
+
+ "comment" <prompt>
+ <comment options>
+
+This defines a comment which is displayed to the user during the
+configuration process and is also echoed to the output files. The only
+possible options are dependencies.
+
+menu:
+
+ "menu" <prompt>
+ <menu options>
+ <menu block>
+ "endmenu"
+
+This defines a menu block, see "Menu structure" above for more
+information. The only possible options are dependencies and "visible"
+attributes.
+
+if:
+
+ "if" <expr>
+ <if block>
+ "endif"
+
+This defines an if block. The dependency expression <expr> is appended
+to all enclosed menu entries.
+
+source:
+
+ "source" <prompt>
+
+This reads the specified configuration file. This file is always parsed.
+
+mainmenu:
+
+ "mainmenu" <prompt>
+
+This sets the config program's title bar if the config program chooses
+to use it. It should be placed at the top of the configuration, before any
+other statement.
+
+
+Kconfig hints
+-------------
+This is a collection of Kconfig tips, most of which aren't obvious at
+first glance and most of which have become idioms in several Kconfig
+files.
+
+Adding common features and make the usage configurable
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is a common idiom to implement a feature/functionality that are
+relevant for some architectures but not all.
+The recommended way to do so is to use a config variable named HAVE_*
+that is defined in a common Kconfig file and selected by the relevant
+architectures.
+An example is the generic IOMAP functionality.
+
+We would in lib/Kconfig see:
+
+# Generic IOMAP is used to ...
+config HAVE_GENERIC_IOMAP
+
+config GENERIC_IOMAP
+ depends on HAVE_GENERIC_IOMAP && FOO
+
+And in lib/Makefile we would see:
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+
+For each architecture using the generic IOMAP functionality we would see:
+
+config X86
+ select ...
+ select HAVE_GENERIC_IOMAP
+ select ...
+
+Note: we use the existing config option and avoid creating a new
+config variable to select HAVE_GENERIC_IOMAP.
+
+Note: the use of the internal config variable HAVE_GENERIC_IOMAP, it is
+introduced to overcome the limitation of select which will force a
+config option to 'y' no matter the dependencies.
+The dependencies are moved to the symbol GENERIC_IOMAP and we avoid the
+situation where select forces a symbol equals to 'y'.
+
+Build as module only
+~~~~~~~~~~~~~~~~~~~~
+To restrict a component build to module-only, qualify its config symbol
+with "depends on m". E.g.:
+
+config FOO
+ depends on BAR && m
+
+limits FOO to module (=m) or disabled (=n).
diff --git a/Documentation/kbuild/kconfig.txt b/Documentation/kbuild/kconfig.txt
new file mode 100644
index 000000000..bbc99c0c1
--- /dev/null
+++ b/Documentation/kbuild/kconfig.txt
@@ -0,0 +1,237 @@
+This file contains some assistance for using "make *config".
+
+Use "make help" to list all of the possible configuration targets.
+
+The xconfig ('qconf') and menuconfig ('mconf') programs also
+have embedded help text. Be sure to check it for navigation,
+search, and other general help text.
+
+======================================================================
+General
+--------------------------------------------------
+
+New kernel releases often introduce new config symbols. Often more
+important, new kernel releases may rename config symbols. When
+this happens, using a previously working .config file and running
+"make oldconfig" won't necessarily produce a working new kernel
+for you, so you may find that you need to see what NEW kernel
+symbols have been introduced.
+
+To see a list of new config symbols when using "make oldconfig", use
+
+ cp user/some/old.config .config
+ make listnewconfig
+
+and the config program will list any new symbols, one per line.
+
+ scripts/diffconfig .config.old .config | less
+
+______________________________________________________________________
+Environment variables for '*config'
+
+KCONFIG_CONFIG
+--------------------------------------------------
+This environment variable can be used to specify a default kernel config
+file name to override the default name of ".config".
+
+KCONFIG_OVERWRITECONFIG
+--------------------------------------------------
+If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
+break symlinks when .config is a symlink to somewhere else.
+
+CONFIG_
+--------------------------------------------------
+If you set CONFIG_ in the environment, Kconfig will prefix all symbols
+with its value when saving the configuration, instead of using the default,
+"CONFIG_".
+
+______________________________________________________________________
+Environment variables for '{allyes/allmod/allno/rand}config'
+
+KCONFIG_ALLCONFIG
+--------------------------------------------------
+(partially based on lkml email from/by Rob Landley, re: miniconfig)
+--------------------------------------------------
+The allyesconfig/allmodconfig/allnoconfig/randconfig variants can also
+use the environment variable KCONFIG_ALLCONFIG as a flag or a filename
+that contains config symbols that the user requires to be set to a
+specific value. If KCONFIG_ALLCONFIG is used without a filename where
+KCONFIG_ALLCONFIG == "" or KCONFIG_ALLCONFIG == "1", "make *config"
+checks for a file named "all{yes/mod/no/def/random}.config"
+(corresponding to the *config command that was used) for symbol values
+that are to be forced. If this file is not found, it checks for a
+file named "all.config" to contain forced values.
+
+This enables you to create "miniature" config (miniconfig) or custom
+config files containing just the config symbols that you are interested
+in. Then the kernel config system generates the full .config file,
+including symbols of your miniconfig file.
+
+This 'KCONFIG_ALLCONFIG' file is a config file which contains
+(usually a subset of all) preset config symbols. These variable
+settings are still subject to normal dependency checks.
+
+Examples:
+ KCONFIG_ALLCONFIG=custom-notebook.config make allnoconfig
+or
+ KCONFIG_ALLCONFIG=mini.config make allnoconfig
+or
+ make KCONFIG_ALLCONFIG=mini.config allnoconfig
+
+These examples will disable most options (allnoconfig) but enable or
+disable the options that are explicitly listed in the specified
+mini-config files.
+
+______________________________________________________________________
+Environment variables for 'randconfig'
+
+KCONFIG_SEED
+--------------------------------------------------
+You can set this to the integer value used to seed the RNG, if you want
+to somehow debug the behaviour of the kconfig parser/frontends.
+If not set, the current time will be used.
+
+KCONFIG_PROBABILITY
+--------------------------------------------------
+This variable can be used to skew the probabilities. This variable can
+be unset or empty, or set to three different formats:
+ KCONFIG_PROBABILITY y:n split y:m:n split
+ -----------------------------------------------------------------
+ unset or empty 50 : 50 33 : 33 : 34
+ N N : 100-N N/2 : N/2 : 100-N
+ [1] N:M N+M : 100-(N+M) N : M : 100-(N+M)
+ [2] N:M:L N : 100-N M : L : 100-(M+L)
+
+where N, M and L are integers (in base 10) in the range [0,100], and so
+that:
+ [1] N+M is in the range [0,100]
+ [2] M+L is in the range [0,100]
+
+Examples:
+ KCONFIG_PROBABILITY=10
+ 10% of booleans will be set to 'y', 90% to 'n'
+ 5% of tristates will be set to 'y', 5% to 'm', 90% to 'n'
+ KCONFIG_PROBABILITY=15:25
+ 40% of booleans will be set to 'y', 60% to 'n'
+ 15% of tristates will be set to 'y', 25% to 'm', 60% to 'n'
+ KCONFIG_PROBABILITY=10:15:15
+ 10% of booleans will be set to 'y', 90% to 'n'
+ 15% of tristates will be set to 'y', 15% to 'm', 70% to 'n'
+
+______________________________________________________________________
+Environment variables for 'silentoldconfig'
+
+KCONFIG_NOSILENTUPDATE
+--------------------------------------------------
+If this variable has a non-blank value, it prevents silent kernel
+config updates (requires explicit updates).
+
+KCONFIG_AUTOCONFIG
+--------------------------------------------------
+This environment variable can be set to specify the path & name of the
+"auto.conf" file. Its default value is "include/config/auto.conf".
+
+KCONFIG_TRISTATE
+--------------------------------------------------
+This environment variable can be set to specify the path & name of the
+"tristate.conf" file. Its default value is "include/config/tristate.conf".
+
+KCONFIG_AUTOHEADER
+--------------------------------------------------
+This environment variable can be set to specify the path & name of the
+"autoconf.h" (header) file.
+Its default value is "include/generated/autoconf.h".
+
+
+======================================================================
+menuconfig
+--------------------------------------------------
+
+SEARCHING for CONFIG symbols
+
+Searching in menuconfig:
+
+ The Search function searches for kernel configuration symbol
+ names, so you have to know something close to what you are
+ looking for.
+
+ Example:
+ /hotplug
+ This lists all config symbols that contain "hotplug",
+ e.g., HOTPLUG_CPU, MEMORY_HOTPLUG.
+
+ For search help, enter / followed TAB-TAB-TAB (to highlight
+ <Help>) and Enter. This will tell you that you can also use
+ regular expressions (regexes) in the search string, so if you
+ are not interested in MEMORY_HOTPLUG, you could try
+
+ /^hotplug
+
+ When searching, symbols are sorted thus:
+ - first, exact matches, sorted alphabetically (an exact match
+ is when the search matches the complete symbol name);
+ - then, other matches, sorted alphabetically.
+ For example: ^ATH.K matches:
+ ATH5K ATH9K ATH5K_AHB ATH5K_DEBUG [...] ATH6KL ATH6KL_DEBUG
+ [...] ATH9K_AHB ATH9K_BTCOEX_SUPPORT ATH9K_COMMON [...]
+ of which only ATH5K and ATH9K match exactly and so are sorted
+ first (and in alphabetical order), then come all other symbols,
+ sorted in alphabetical order.
+
+______________________________________________________________________
+User interface options for 'menuconfig'
+
+MENUCONFIG_COLOR
+--------------------------------------------------
+It is possible to select different color themes using the variable
+MENUCONFIG_COLOR. To select a theme use:
+
+ make MENUCONFIG_COLOR=<theme> menuconfig
+
+Available themes are:
+ mono => selects colors suitable for monochrome displays
+ blackbg => selects a color scheme with black background
+ classic => theme with blue background. The classic look
+ bluetitle => a LCD friendly version of classic. (default)
+
+MENUCONFIG_MODE
+--------------------------------------------------
+This mode shows all sub-menus in one large tree.
+
+Example:
+ make MENUCONFIG_MODE=single_menu menuconfig
+
+
+======================================================================
+xconfig
+--------------------------------------------------
+
+Searching in xconfig:
+
+ The Search function searches for kernel configuration symbol
+ names, so you have to know something close to what you are
+ looking for.
+
+ Example:
+ Ctrl-F hotplug
+ or
+ Menu: File, Search, hotplug
+
+ lists all config symbol entries that contain "hotplug" in
+ the symbol name. In this Search dialog, you may change the
+ config setting for any of the entries that are not grayed out.
+ You can also enter a different search string without having
+ to return to the main menu.
+
+
+======================================================================
+gconfig
+--------------------------------------------------
+
+Searching in gconfig:
+
+ None (gconfig isn't maintained as well as xconfig or menuconfig);
+ however, gconfig does have a few more viewing choices than
+ xconfig does.
+
+###
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
new file mode 100644
index 000000000..74b6c6d97
--- /dev/null
+++ b/Documentation/kbuild/makefiles.txt
@@ -0,0 +1,1410 @@
+Linux Kernel Makefiles
+
+This document describes the Linux kernel Makefiles.
+
+=== Table of Contents
+
+ === 1 Overview
+ === 2 Who does what
+ === 3 The kbuild files
+ --- 3.1 Goal definitions
+ --- 3.2 Built-in object goals - obj-y
+ --- 3.3 Loadable module goals - obj-m
+ --- 3.4 Objects which export symbols
+ --- 3.5 Library file goals - lib-y
+ --- 3.6 Descending down in directories
+ --- 3.7 Compilation flags
+ --- 3.8 Command line dependency
+ --- 3.9 Dependency tracking
+ --- 3.10 Special Rules
+ --- 3.11 $(CC) support functions
+ --- 3.12 $(LD) support functions
+
+ === 4 Host Program support
+ --- 4.1 Simple Host Program
+ --- 4.2 Composite Host Programs
+ --- 4.3 Using C++ for host programs
+ --- 4.4 Controlling compiler options for host programs
+ --- 4.5 When host programs are actually built
+ --- 4.6 Using hostprogs-$(CONFIG_FOO)
+
+ === 5 Kbuild clean infrastructure
+
+ === 6 Architecture Makefiles
+ --- 6.1 Set variables to tweak the build to the architecture
+ --- 6.2 Add prerequisites to archheaders:
+ --- 6.3 Add prerequisites to archprepare:
+ --- 6.4 List directories to visit when descending
+ --- 6.5 Architecture-specific boot images
+ --- 6.6 Building non-kbuild targets
+ --- 6.7 Commands useful for building a boot image
+ --- 6.8 Custom kbuild commands
+ --- 6.9 Preprocessing linker scripts
+ --- 6.10 Generic header files
+
+ === 7 Kbuild syntax for exported headers
+ --- 7.1 header-y
+ --- 7.2 genhdr-y
+ --- 7.3 destination-y
+ --- 7.4 generic-y
+
+ === 8 Kbuild Variables
+ === 9 Makefile language
+ === 10 Credits
+ === 11 TODO
+
+=== 1 Overview
+
+The Makefiles have five parts:
+
+ Makefile the top Makefile.
+ .config the kernel configuration file.
+ arch/$(ARCH)/Makefile the arch Makefile.
+ scripts/Makefile.* common rules etc. for all kbuild Makefiles.
+ kbuild Makefiles there are about 500 of these.
+
+The top Makefile reads the .config file, which comes from the kernel
+configuration process.
+
+The top Makefile is responsible for building two major products: vmlinux
+(the resident kernel image) and modules (any module files).
+It builds these goals by recursively descending into the subdirectories of
+the kernel source tree.
+The list of subdirectories which are visited depends upon the kernel
+configuration. The top Makefile textually includes an arch Makefile
+with the name arch/$(ARCH)/Makefile. The arch Makefile supplies
+architecture-specific information to the top Makefile.
+
+Each subdirectory has a kbuild Makefile which carries out the commands
+passed down from above. The kbuild Makefile uses information from the
+.config file to construct various file lists used by kbuild to build
+any built-in or modular targets.
+
+scripts/Makefile.* contains all the definitions/rules etc. that
+are used to build the kernel based on the kbuild makefiles.
+
+
+=== 2 Who does what
+
+People have four different relationships with the kernel Makefiles.
+
+*Users* are people who build kernels. These people type commands such as
+"make menuconfig" or "make". They usually do not read or edit
+any kernel Makefiles (or any other source files).
+
+*Normal developers* are people who work on features such as device
+drivers, file systems, and network protocols. These people need to
+maintain the kbuild Makefiles for the subsystem they are
+working on. In order to do this effectively, they need some overall
+knowledge about the kernel Makefiles, plus detailed knowledge about the
+public interface for kbuild.
+
+*Arch developers* are people who work on an entire architecture, such
+as sparc or ia64. Arch developers need to know about the arch Makefile
+as well as kbuild Makefiles.
+
+*Kbuild developers* are people who work on the kernel build system itself.
+These people need to know about all aspects of the kernel Makefiles.
+
+This document is aimed towards normal developers and arch developers.
+
+
+=== 3 The kbuild files
+
+Most Makefiles within the kernel are kbuild Makefiles that use the
+kbuild infrastructure. This chapter introduces the syntax used in the
+kbuild makefiles.
+The preferred name for the kbuild files are 'Makefile' but 'Kbuild' can
+be used and if both a 'Makefile' and a 'Kbuild' file exists, then the 'Kbuild'
+file will be used.
+
+Section 3.1 "Goal definitions" is a quick intro, further chapters provide
+more details, with real examples.
+
+--- 3.1 Goal definitions
+
+ Goal definitions are the main part (heart) of the kbuild Makefile.
+ These lines define the files to be built, any special compilation
+ options, and any subdirectories to be entered recursively.
+
+ The most simple kbuild makefile contains one line:
+
+ Example:
+ obj-y += foo.o
+
+ This tells kbuild that there is one object in that directory, named
+ foo.o. foo.o will be built from foo.c or foo.S.
+
+ If foo.o shall be built as a module, the variable obj-m is used.
+ Therefore the following pattern is often used:
+
+ Example:
+ obj-$(CONFIG_FOO) += foo.o
+
+ $(CONFIG_FOO) evaluates to either y (for built-in) or m (for module).
+ If CONFIG_FOO is neither y nor m, then the file will not be compiled
+ nor linked.
+
+--- 3.2 Built-in object goals - obj-y
+
+ The kbuild Makefile specifies object files for vmlinux
+ in the $(obj-y) lists. These lists depend on the kernel
+ configuration.
+
+ Kbuild compiles all the $(obj-y) files. It then calls
+ "$(LD) -r" to merge these files into one built-in.o file.
+ built-in.o is later linked into vmlinux by the parent Makefile.
+
+ The order of files in $(obj-y) is significant. Duplicates in
+ the lists are allowed: the first instance will be linked into
+ built-in.o and succeeding instances will be ignored.
+
+ Link order is significant, because certain functions
+ (module_init() / __initcall) will be called during boot in the
+ order they appear. So keep in mind that changing the link
+ order may e.g. change the order in which your SCSI
+ controllers are detected, and thus your disks are renumbered.
+
+ Example:
+ #drivers/isdn/i4l/Makefile
+ # Makefile for the kernel ISDN subsystem and device drivers.
+ # Each configuration option enables a list of files.
+ obj-$(CONFIG_ISDN_I4L) += isdn.o
+ obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o
+
+--- 3.3 Loadable module goals - obj-m
+
+ $(obj-m) specifies object files which are built as loadable
+ kernel modules.
+
+ A module may be built from one source file or several source
+ files. In the case of one source file, the kbuild makefile
+ simply adds the file to $(obj-m).
+
+ Example:
+ #drivers/isdn/i4l/Makefile
+ obj-$(CONFIG_ISDN_PPP_BSDCOMP) += isdn_bsdcomp.o
+
+ Note: In this example $(CONFIG_ISDN_PPP_BSDCOMP) evaluates to 'm'
+
+ If a kernel module is built from several source files, you specify
+ that you want to build a module in the same way as above; however,
+ kbuild needs to know which object files you want to build your
+ module from, so you have to tell it by setting a $(<module_name>-y)
+ variable.
+
+ Example:
+ #drivers/isdn/i4l/Makefile
+ obj-$(CONFIG_ISDN_I4L) += isdn.o
+ isdn-y := isdn_net_lib.o isdn_v110.o isdn_common.o
+
+ In this example, the module name will be isdn.o. Kbuild will
+ compile the objects listed in $(isdn-y) and then run
+ "$(LD) -r" on the list of these files to generate isdn.o.
+
+ Due to kbuild recognizing $(<module_name>-y) for composite objects,
+ you can use the value of a CONFIG_ symbol to optionally include an
+ object file as part of a composite object.
+
+ Example:
+ #fs/ext2/Makefile
+ obj-$(CONFIG_EXT2_FS) += ext2.o
+ ext2-y := balloc.o dir.o file.o ialloc.o inode.o ioctl.o \
+ namei.o super.o symlink.o
+ ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o \
+ xattr_trusted.o
+
+ In this example, xattr.o, xattr_user.o and xattr_trusted.o are only
+ part of the composite object ext2.o if $(CONFIG_EXT2_FS_XATTR)
+ evaluates to 'y'.
+
+ Note: Of course, when you are building objects into the kernel,
+ the syntax above will also work. So, if you have CONFIG_EXT2_FS=y,
+ kbuild will build an ext2.o file for you out of the individual
+ parts and then link this into built-in.o, as you would expect.
+
+--- 3.4 Objects which export symbols
+
+ No special notation is required in the makefiles for
+ modules exporting symbols.
+
+--- 3.5 Library file goals - lib-y
+
+ Objects listed with obj-* are used for modules, or
+ combined in a built-in.o for that specific directory.
+ There is also the possibility to list objects that will
+ be included in a library, lib.a.
+ All objects listed with lib-y are combined in a single
+ library for that directory.
+ Objects that are listed in obj-y and additionally listed in
+ lib-y will not be included in the library, since they will
+ be accessible anyway.
+ For consistency, objects listed in lib-m will be included in lib.a.
+
+ Note that the same kbuild makefile may list files to be built-in
+ and to be part of a library. Therefore the same directory
+ may contain both a built-in.o and a lib.a file.
+
+ Example:
+ #arch/x86/lib/Makefile
+ lib-y := delay.o
+
+ This will create a library lib.a based on delay.o. For kbuild to
+ actually recognize that there is a lib.a being built, the directory
+ shall be listed in libs-y.
+ See also "6.4 List directories to visit when descending".
+
+ Use of lib-y is normally restricted to lib/ and arch/*/lib.
+
+--- 3.6 Descending down in directories
+
+ A Makefile is only responsible for building objects in its own
+ directory. Files in subdirectories should be taken care of by
+ Makefiles in these subdirs. The build system will automatically
+ invoke make recursively in subdirectories, provided you let it know of
+ them.
+
+ To do so, obj-y and obj-m are used.
+ ext2 lives in a separate directory, and the Makefile present in fs/
+ tells kbuild to descend down using the following assignment.
+
+ Example:
+ #fs/Makefile
+ obj-$(CONFIG_EXT2_FS) += ext2/
+
+ If CONFIG_EXT2_FS is set to either 'y' (built-in) or 'm' (modular)
+ the corresponding obj- variable will be set, and kbuild will descend
+ down in the ext2 directory.
+ Kbuild only uses this information to decide that it needs to visit
+ the directory, it is the Makefile in the subdirectory that
+ specifies what is modular and what is built-in.
+
+ It is good practice to use a CONFIG_ variable when assigning directory
+ names. This allows kbuild to totally skip the directory if the
+ corresponding CONFIG_ option is neither 'y' nor 'm'.
+
+--- 3.7 Compilation flags
+
+ ccflags-y, asflags-y and ldflags-y
+ These three flags apply only to the kbuild makefile in which they
+ are assigned. They are used for all the normal cc, as and ld
+ invocations happening during a recursive build.
+ Note: Flags with the same behaviour were previously named:
+ EXTRA_CFLAGS, EXTRA_AFLAGS and EXTRA_LDFLAGS.
+ They are still supported but their usage is deprecated.
+
+ ccflags-y specifies options for compiling with $(CC).
+
+ Example:
+ # drivers/acpi/Makefile
+ ccflags-y := -Os
+ ccflags-$(CONFIG_ACPI_DEBUG) += -DACPI_DEBUG_OUTPUT
+
+ This variable is necessary because the top Makefile owns the
+ variable $(KBUILD_CFLAGS) and uses it for compilation flags for the
+ entire tree.
+
+ asflags-y specifies options for assembling with $(AS).
+
+ Example:
+ #arch/sparc/kernel/Makefile
+ asflags-y := -ansi
+
+ ldflags-y specifies options for linking with $(LD).
+
+ Example:
+ #arch/cris/boot/compressed/Makefile
+ ldflags-y += -T $(srctree)/$(src)/decompress_$(arch-y).lds
+
+ subdir-ccflags-y, subdir-asflags-y
+ The two flags listed above are similar to ccflags-y and asflags-y.
+ The difference is that the subdir- variants have effect for the kbuild
+ file where they are present and all subdirectories.
+ Options specified using subdir-* are added to the commandline before
+ the options specified using the non-subdir variants.
+
+ Example:
+ subdir-ccflags-y := -Werror
+
+ CFLAGS_$@, AFLAGS_$@
+
+ CFLAGS_$@ and AFLAGS_$@ only apply to commands in current
+ kbuild makefile.
+
+ $(CFLAGS_$@) specifies per-file options for $(CC). The $@
+ part has a literal value which specifies the file that it is for.
+
+ Example:
+ # drivers/scsi/Makefile
+ CFLAGS_aha152x.o = -DAHA152X_STAT -DAUTOCONF
+ CFLAGS_gdth.o = # -DDEBUG_GDTH=2 -D__SERIAL__ -D__COM2__ \
+ -DGDTH_STATISTICS
+
+ These two lines specify compilation flags for aha152x.o and gdth.o.
+
+ $(AFLAGS_$@) is a similar feature for source files in assembly
+ languages.
+
+ Example:
+ # arch/arm/kernel/Makefile
+ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
+ AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312
+ AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt
+
+
+--- 3.9 Dependency tracking
+
+ Kbuild tracks dependencies on the following:
+ 1) All prerequisite files (both *.c and *.h)
+ 2) CONFIG_ options used in all prerequisite files
+ 3) Command-line used to compile target
+
+ Thus, if you change an option to $(CC) all affected files will
+ be re-compiled.
+
+--- 3.10 Special Rules
+
+ Special rules are used when the kbuild infrastructure does
+ not provide the required support. A typical example is
+ header files generated during the build process.
+ Another example are the architecture-specific Makefiles which
+ need special rules to prepare boot images etc.
+
+ Special rules are written as normal Make rules.
+ Kbuild is not executing in the directory where the Makefile is
+ located, so all special rules shall provide a relative
+ path to prerequisite files and target files.
+
+ Two variables are used when defining special rules:
+
+ $(src)
+ $(src) is a relative path which points to the directory
+ where the Makefile is located. Always use $(src) when
+ referring to files located in the src tree.
+
+ $(obj)
+ $(obj) is a relative path which points to the directory
+ where the target is saved. Always use $(obj) when
+ referring to generated files.
+
+ Example:
+ #drivers/scsi/Makefile
+ $(obj)/53c8xx_d.h: $(src)/53c7,8xx.scr $(src)/script_asm.pl
+ $(CPP) -DCHIP=810 - < $< | ... $(src)/script_asm.pl
+
+ This is a special rule, following the normal syntax
+ required by make.
+ The target file depends on two prerequisite files. References
+ to the target file are prefixed with $(obj), references
+ to prerequisites are referenced with $(src) (because they are not
+ generated files).
+
+ $(kecho)
+ echoing information to user in a rule is often a good practice
+ but when execution "make -s" one does not expect to see any output
+ except for warnings/errors.
+ To support this kbuild defines $(kecho) which will echo out the
+ text following $(kecho) to stdout except if "make -s" is used.
+
+ Example:
+ #arch/blackfin/boot/Makefile
+ $(obj)/vmImage: $(obj)/vmlinux.gz
+ $(call if_changed,uimage)
+ @$(kecho) 'Kernel: $@ is ready'
+
+
+--- 3.11 $(CC) support functions
+
+ The kernel may be built with several different versions of
+ $(CC), each supporting a unique set of features and options.
+ kbuild provides basic support to check for valid options for $(CC).
+ $(CC) is usually the gcc compiler, but other alternatives are
+ available.
+
+ as-option
+ as-option is used to check if $(CC) -- when used to compile
+ assembler (*.S) files -- supports the given option. An optional
+ second option may be specified if the first option is not supported.
+
+ Example:
+ #arch/sh/Makefile
+ cflags-y += $(call as-option,-Wa$(comma)-isa=$(isa-y),)
+
+ In the above example, cflags-y will be assigned the option
+ -Wa$(comma)-isa=$(isa-y) if it is supported by $(CC).
+ The second argument is optional, and if supplied will be used
+ if first argument is not supported.
+
+ cc-ldoption
+ cc-ldoption is used to check if $(CC) when used to link object files
+ supports the given option. An optional second option may be
+ specified if first option are not supported.
+
+ Example:
+ #arch/x86/kernel/Makefile
+ vsyscall-flags += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+
+ In the above example, vsyscall-flags will be assigned the option
+ -Wl$(comma)--hash-style=sysv if it is supported by $(CC).
+ The second argument is optional, and if supplied will be used
+ if first argument is not supported.
+
+ as-instr
+ as-instr checks if the assembler reports a specific instruction
+ and then outputs either option1 or option2
+ C escapes are supported in the test instruction
+ Note: as-instr-option uses KBUILD_AFLAGS for $(AS) options
+
+ cc-option
+ cc-option is used to check if $(CC) supports a given option, and if
+ not supported to use an optional second option.
+
+ Example:
+ #arch/x86/Makefile
+ cflags-y += $(call cc-option,-march=pentium-mmx,-march=i586)
+
+ In the above example, cflags-y will be assigned the option
+ -march=pentium-mmx if supported by $(CC), otherwise -march=i586.
+ The second argument to cc-option is optional, and if omitted,
+ cflags-y will be assigned no value if first option is not supported.
+ Note: cc-option uses KBUILD_CFLAGS for $(CC) options
+
+ cc-option-yn
+ cc-option-yn is used to check if gcc supports a given option
+ and return 'y' if supported, otherwise 'n'.
+
+ Example:
+ #arch/ppc/Makefile
+ biarch := $(call cc-option-yn, -m32)
+ aflags-$(biarch) += -a32
+ cflags-$(biarch) += -m32
+
+ In the above example, $(biarch) is set to y if $(CC) supports the -m32
+ option. When $(biarch) equals 'y', the expanded variables $(aflags-y)
+ and $(cflags-y) will be assigned the values -a32 and -m32,
+ respectively.
+ Note: cc-option-yn uses KBUILD_CFLAGS for $(CC) options
+
+ cc-option-align
+ gcc versions >= 3.0 changed the type of options used to specify
+ alignment of functions, loops etc. $(cc-option-align), when used
+ as prefix to the align options, will select the right prefix:
+ gcc < 3.00
+ cc-option-align = -malign
+ gcc >= 3.00
+ cc-option-align = -falign
+
+ Example:
+ KBUILD_CFLAGS += $(cc-option-align)-functions=4
+
+ In the above example, the option -falign-functions=4 is used for
+ gcc >= 3.00. For gcc < 3.00, -malign-functions=4 is used.
+ Note: cc-option-align uses KBUILD_CFLAGS for $(CC) options
+
+ cc-disable-warning
+ cc-disable-warning checks if gcc supports a given warning and returns
+ the commandline switch to disable it. This special function is needed,
+ because gcc 4.4 and later accept any unknown -Wno-* option and only
+ warn about it if there is another warning in the source file.
+
+ Example:
+ KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+
+ In the above example, -Wno-unused-but-set-variable will be added to
+ KBUILD_CFLAGS only if gcc really accepts it.
+
+ cc-version
+ cc-version returns a numerical version of the $(CC) compiler version.
+ The format is <major><minor> where both are two digits. So for example
+ gcc 3.41 would return 0341.
+ cc-version is useful when a specific $(CC) version is faulty in one
+ area, for example -mregparm=3 was broken in some gcc versions
+ even though the option was accepted by gcc.
+
+ Example:
+ #arch/x86/Makefile
+ cflags-y += $(shell \
+ if [ $(cc-version) -ge 0300 ] ; then \
+ echo "-mregparm=3"; fi ;)
+
+ In the above example, -mregparm=3 is only used for gcc version greater
+ than or equal to gcc 3.0.
+
+ cc-ifversion
+ cc-ifversion tests the version of $(CC) and equals the fourth parameter
+ if version expression is true, or the fifth (if given) if the version
+ expression is false.
+
+ Example:
+ #fs/reiserfs/Makefile
+ ccflags-y := $(call cc-ifversion, -lt, 0402, -O1)
+
+ In this example, ccflags-y will be assigned the value -O1 if the
+ $(CC) version is less than 4.2.
+ cc-ifversion takes all the shell operators:
+ -eq, -ne, -lt, -le, -gt, and -ge
+ The third parameter may be a text as in this example, but it may also
+ be an expanded variable or a macro.
+
+ cc-fullversion
+ cc-fullversion is useful when the exact version of gcc is needed.
+ One typical use-case is when a specific GCC version is broken.
+ cc-fullversion points out a more specific version than cc-version does.
+
+ Example:
+ #arch/powerpc/Makefile
+ $(Q)if test "$(cc-fullversion)" = "040200" ; then \
+ echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
+ false ; \
+ fi
+
+ In this example for a specific GCC version the build will error out
+ explaining to the user why it stops.
+
+ cc-cross-prefix
+ cc-cross-prefix is used to check if there exists a $(CC) in path with
+ one of the listed prefixes. The first prefix where there exist a
+ prefix$(CC) in the PATH is returned - and if no prefix$(CC) is found
+ then nothing is returned.
+ Additional prefixes are separated by a single space in the
+ call of cc-cross-prefix.
+ This functionality is useful for architecture Makefiles that try
+ to set CROSS_COMPILE to well-known values but may have several
+ values to select between.
+ It is recommended only to try to set CROSS_COMPILE if it is a cross
+ build (host arch is different from target arch). And if CROSS_COMPILE
+ is already set then leave it with the old value.
+
+ Example:
+ #arch/m68k/Makefile
+ ifneq ($(SUBARCH),$(ARCH))
+ ifeq ($(CROSS_COMPILE),)
+ CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu-)
+ endif
+ endif
+
+--- 3.12 $(LD) support functions
+
+ ld-option
+ ld-option is used to check if $(LD) supports the supplied option.
+ ld-option takes two options as arguments.
+ The second argument is an optional option that can be used if the
+ first option is not supported by $(LD).
+
+ Example:
+ #Makefile
+ LDFLAGS_vmlinux += $(call ld-option, -X)
+
+
+=== 4 Host Program support
+
+Kbuild supports building executables on the host for use during the
+compilation stage.
+Two steps are required in order to use a host executable.
+
+The first step is to tell kbuild that a host program exists. This is
+done utilising the variable hostprogs-y.
+
+The second step is to add an explicit dependency to the executable.
+This can be done in two ways. Either add the dependency in a rule,
+or utilise the variable $(always).
+Both possibilities are described in the following.
+
+--- 4.1 Simple Host Program
+
+ In some cases there is a need to compile and run a program on the
+ computer where the build is running.
+ The following line tells kbuild that the program bin2hex shall be
+ built on the build host.
+
+ Example:
+ hostprogs-y := bin2hex
+
+ Kbuild assumes in the above example that bin2hex is made from a single
+ c-source file named bin2hex.c located in the same directory as
+ the Makefile.
+
+--- 4.2 Composite Host Programs
+
+ Host programs can be made up based on composite objects.
+ The syntax used to define composite objects for host programs is
+ similar to the syntax used for kernel objects.
+ $(<executable>-objs) lists all objects used to link the final
+ executable.
+
+ Example:
+ #scripts/lxdialog/Makefile
+ hostprogs-y := lxdialog
+ lxdialog-objs := checklist.o lxdialog.o
+
+ Objects with extension .o are compiled from the corresponding .c
+ files. In the above example, checklist.c is compiled to checklist.o
+ and lxdialog.c is compiled to lxdialog.o.
+ Finally, the two .o files are linked to the executable, lxdialog.
+ Note: The syntax <executable>-y is not permitted for host-programs.
+
+--- 4.3 Using C++ for host programs
+
+ kbuild offers support for host programs written in C++. This was
+ introduced solely to support kconfig, and is not recommended
+ for general use.
+
+ Example:
+ #scripts/kconfig/Makefile
+ hostprogs-y := qconf
+ qconf-cxxobjs := qconf.o
+
+ In the example above the executable is composed of the C++ file
+ qconf.cc - identified by $(qconf-cxxobjs).
+
+ If qconf is composed of a mixture of .c and .cc files, then an
+ additional line can be used to identify this.
+
+ Example:
+ #scripts/kconfig/Makefile
+ hostprogs-y := qconf
+ qconf-cxxobjs := qconf.o
+ qconf-objs := check.o
+
+--- 4.4 Controlling compiler options for host programs
+
+ When compiling host programs, it is possible to set specific flags.
+ The programs will always be compiled utilising $(HOSTCC) passed
+ the options specified in $(HOSTCFLAGS).
+ To set flags that will take effect for all host programs created
+ in that Makefile, use the variable HOST_EXTRACFLAGS.
+
+ Example:
+ #scripts/lxdialog/Makefile
+ HOST_EXTRACFLAGS += -I/usr/include/ncurses
+
+ To set specific flags for a single file the following construction
+ is used:
+
+ Example:
+ #arch/ppc64/boot/Makefile
+ HOSTCFLAGS_piggyback.o := -DKERNELBASE=$(KERNELBASE)
+
+ It is also possible to specify additional options to the linker.
+
+ Example:
+ #scripts/kconfig/Makefile
+ HOSTLOADLIBES_qconf := -L$(QTDIR)/lib
+
+ When linking qconf, it will be passed the extra option
+ "-L$(QTDIR)/lib".
+
+--- 4.5 When host programs are actually built
+
+ Kbuild will only build host-programs when they are referenced
+ as a prerequisite.
+ This is possible in two ways:
+
+ (1) List the prerequisite explicitly in a special rule.
+
+ Example:
+ #drivers/pci/Makefile
+ hostprogs-y := gen-devlist
+ $(obj)/devlist.h: $(src)/pci.ids $(obj)/gen-devlist
+ ( cd $(obj); ./gen-devlist ) < $<
+
+ The target $(obj)/devlist.h will not be built before
+ $(obj)/gen-devlist is updated. Note that references to
+ the host programs in special rules must be prefixed with $(obj).
+
+ (2) Use $(always)
+ When there is no suitable special rule, and the host program
+ shall be built when a makefile is entered, the $(always)
+ variable shall be used.
+
+ Example:
+ #scripts/lxdialog/Makefile
+ hostprogs-y := lxdialog
+ always := $(hostprogs-y)
+
+ This will tell kbuild to build lxdialog even if not referenced in
+ any rule.
+
+--- 4.6 Using hostprogs-$(CONFIG_FOO)
+
+ A typical pattern in a Kbuild file looks like this:
+
+ Example:
+ #scripts/Makefile
+ hostprogs-$(CONFIG_KALLSYMS) += kallsyms
+
+ Kbuild knows about both 'y' for built-in and 'm' for module.
+ So if a config symbol evaluates to 'm', kbuild will still build
+ the binary. In other words, Kbuild handles hostprogs-m exactly
+ like hostprogs-y. But only hostprogs-y is recommended to be used
+ when no CONFIG symbols are involved.
+
+=== 5 Kbuild clean infrastructure
+
+"make clean" deletes most generated files in the obj tree where the kernel
+is compiled. This includes generated files such as host programs.
+Kbuild knows targets listed in $(hostprogs-y), $(hostprogs-m), $(always),
+$(extra-y) and $(targets). They are all deleted during "make clean".
+Files matching the patterns "*.[oas]", "*.ko", plus some additional files
+generated by kbuild are deleted all over the kernel src tree when
+"make clean" is executed.
+
+Additional files can be specified in kbuild makefiles by use of $(clean-files).
+
+ Example:
+ #lib/Makefile
+ clean-files := crc32table.h
+
+When executing "make clean", the two files "devlist.h classlist.h" will be
+deleted. Kbuild will assume files to be in the same relative directory as the
+Makefile, except if prefixed with $(objtree).
+
+To delete a directory hierarchy use:
+
+ Example:
+ #scripts/package/Makefile
+ clean-dirs := $(objtree)/debian/
+
+This will delete the directory debian in the toplevel directory, including all
+subdirectories.
+
+To exclude certain files from make clean, use the $(no-clean-files) variable.
+This is only a special case used in the top level Kbuild file:
+
+ Example:
+ #Kbuild
+ no-clean-files := $(bounds-file) $(offsets-file)
+
+Usually kbuild descends down in subdirectories due to "obj-* := dir/",
+but in the architecture makefiles where the kbuild infrastructure
+is not sufficient this sometimes needs to be explicit.
+
+ Example:
+ #arch/x86/boot/Makefile
+ subdir- := compressed/
+
+The above assignment instructs kbuild to descend down in the
+directory compressed/ when "make clean" is executed.
+
+To support the clean infrastructure in the Makefiles that build the
+final bootimage there is an optional target named archclean:
+
+ Example:
+ #arch/x86/Makefile
+ archclean:
+ $(Q)$(MAKE) $(clean)=arch/x86/boot
+
+When "make clean" is executed, make will descend down in arch/x86/boot,
+and clean as usual. The Makefile located in arch/x86/boot/ may use
+the subdir- trick to descend further down.
+
+Note 1: arch/$(ARCH)/Makefile cannot use "subdir-", because that file is
+included in the top level makefile, and the kbuild infrastructure
+is not operational at that point.
+
+Note 2: All directories listed in core-y, libs-y, drivers-y and net-y will
+be visited during "make clean".
+
+=== 6 Architecture Makefiles
+
+The top level Makefile sets up the environment and does the preparation,
+before starting to descend down in the individual directories.
+The top level makefile contains the generic part, whereas
+arch/$(ARCH)/Makefile contains what is required to set up kbuild
+for said architecture.
+To do so, arch/$(ARCH)/Makefile sets up a number of variables and defines
+a few targets.
+
+When kbuild executes, the following steps are followed (roughly):
+1) Configuration of the kernel => produce .config
+2) Store kernel version in include/linux/version.h
+3) Updating all other prerequisites to the target prepare:
+ - Additional prerequisites are specified in arch/$(ARCH)/Makefile
+4) Recursively descend down in all directories listed in
+ init-* core* drivers-* net-* libs-* and build all targets.
+ - The values of the above variables are expanded in arch/$(ARCH)/Makefile.
+5) All object files are then linked and the resulting file vmlinux is
+ located at the root of the obj tree.
+ The very first objects linked are listed in head-y, assigned by
+ arch/$(ARCH)/Makefile.
+6) Finally, the architecture-specific part does any required post processing
+ and builds the final bootimage.
+ - This includes building boot records
+ - Preparing initrd images and the like
+
+
+--- 6.1 Set variables to tweak the build to the architecture
+
+ LDFLAGS Generic $(LD) options
+
+ Flags used for all invocations of the linker.
+ Often specifying the emulation is sufficient.
+
+ Example:
+ #arch/s390/Makefile
+ LDFLAGS := -m elf_s390
+ Note: ldflags-y can be used to further customise
+ the flags used. See chapter 3.7.
+
+ LDFLAGS_MODULE Options for $(LD) when linking modules
+
+ LDFLAGS_MODULE is used to set specific flags for $(LD) when
+ linking the .ko files used for modules.
+ Default is "-r", for relocatable output.
+
+ LDFLAGS_vmlinux Options for $(LD) when linking vmlinux
+
+ LDFLAGS_vmlinux is used to specify additional flags to pass to
+ the linker when linking the final vmlinux image.
+ LDFLAGS_vmlinux uses the LDFLAGS_$@ support.
+
+ Example:
+ #arch/x86/Makefile
+ LDFLAGS_vmlinux := -e stext
+
+ OBJCOPYFLAGS objcopy flags
+
+ When $(call if_changed,objcopy) is used to translate a .o file,
+ the flags specified in OBJCOPYFLAGS will be used.
+ $(call if_changed,objcopy) is often used to generate raw binaries on
+ vmlinux.
+
+ Example:
+ #arch/s390/Makefile
+ OBJCOPYFLAGS := -O binary
+
+ #arch/s390/boot/Makefile
+ $(obj)/image: vmlinux FORCE
+ $(call if_changed,objcopy)
+
+ In this example, the binary $(obj)/image is a binary version of
+ vmlinux. The usage of $(call if_changed,xxx) will be described later.
+
+ KBUILD_AFLAGS $(AS) assembler flags
+
+ Default value - see top level Makefile
+ Append or modify as required per architecture.
+
+ Example:
+ #arch/sparc64/Makefile
+ KBUILD_AFLAGS += -m64 -mcpu=ultrasparc
+
+ KBUILD_CFLAGS $(CC) compiler flags
+
+ Default value - see top level Makefile
+ Append or modify as required per architecture.
+
+ Often, the KBUILD_CFLAGS variable depends on the configuration.
+
+ Example:
+ #arch/x86/boot/compressed/Makefile
+ cflags-$(CONFIG_X86_32) := -march=i386
+ cflags-$(CONFIG_X86_64) := -mcmodel=small
+ KBUILD_CFLAGS += $(cflags-y)
+
+ Many arch Makefiles dynamically run the target C compiler to
+ probe supported options:
+
+ #arch/x86/Makefile
+
+ ...
+ cflags-$(CONFIG_MPENTIUMII) += $(call cc-option,\
+ -march=pentium2,-march=i686)
+ ...
+ # Disable unit-at-a-time mode ...
+ KBUILD_CFLAGS += $(call cc-option,-fno-unit-at-a-time)
+ ...
+
+
+ The first example utilises the trick that a config option expands
+ to 'y' when selected.
+
+ KBUILD_AFLAGS_KERNEL $(AS) options specific for built-in
+
+ $(KBUILD_AFLAGS_KERNEL) contains extra C compiler flags used to compile
+ resident kernel code.
+
+ KBUILD_AFLAGS_MODULE Options for $(AS) when building modules
+
+ $(KBUILD_AFLAGS_MODULE) is used to add arch-specific options that
+ are used for $(AS).
+ From commandline AFLAGS_MODULE shall be used (see kbuild.txt).
+
+ KBUILD_CFLAGS_KERNEL $(CC) options specific for built-in
+
+ $(KBUILD_CFLAGS_KERNEL) contains extra C compiler flags used to compile
+ resident kernel code.
+
+ KBUILD_CFLAGS_MODULE Options for $(CC) when building modules
+
+ $(KBUILD_CFLAGS_MODULE) is used to add arch-specific options that
+ are used for $(CC).
+ From commandline CFLAGS_MODULE shall be used (see kbuild.txt).
+
+ KBUILD_LDFLAGS_MODULE Options for $(LD) when linking modules
+
+ $(KBUILD_LDFLAGS_MODULE) is used to add arch-specific options
+ used when linking modules. This is often a linker script.
+ From commandline LDFLAGS_MODULE shall be used (see kbuild.txt).
+
+ KBUILD_ARFLAGS Options for $(AR) when creating archives
+
+ $(KBUILD_ARFLAGS) set by the top level Makefile to "D" (deterministic
+ mode) if this option is supported by $(AR).
+
+--- 6.2 Add prerequisites to archheaders:
+
+ The archheaders: rule is used to generate header files that
+ may be installed into user space by "make header_install" or
+ "make headers_install_all". In order to support
+ "make headers_install_all", this target has to be able to run
+ on an unconfigured tree, or a tree configured for another
+ architecture.
+
+ It is run before "make archprepare" when run on the
+ architecture itself.
+
+
+--- 6.3 Add prerequisites to archprepare:
+
+ The archprepare: rule is used to list prerequisites that need to be
+ built before starting to descend down in the subdirectories.
+ This is usually used for header files containing assembler constants.
+
+ Example:
+ #arch/arm/Makefile
+ archprepare: maketools
+
+ In this example, the file target maketools will be processed
+ before descending down in the subdirectories.
+ See also chapter XXX-TODO that describe how kbuild supports
+ generating offset header files.
+
+
+--- 6.4 List directories to visit when descending
+
+ An arch Makefile cooperates with the top Makefile to define variables
+ which specify how to build the vmlinux file. Note that there is no
+ corresponding arch-specific section for modules; the module-building
+ machinery is all architecture-independent.
+
+
+ head-y, init-y, core-y, libs-y, drivers-y, net-y
+
+ $(head-y) lists objects to be linked first in vmlinux.
+ $(libs-y) lists directories where a lib.a archive can be located.
+ The rest list directories where a built-in.o object file can be
+ located.
+
+ $(init-y) objects will be located after $(head-y).
+ Then the rest follows in this order:
+ $(core-y), $(libs-y), $(drivers-y) and $(net-y).
+
+ The top level Makefile defines values for all generic directories,
+ and arch/$(ARCH)/Makefile only adds architecture-specific directories.
+
+ Example:
+ #arch/sparc64/Makefile
+ core-y += arch/sparc64/kernel/
+ libs-y += arch/sparc64/prom/ arch/sparc64/lib/
+ drivers-$(CONFIG_OPROFILE) += arch/sparc64/oprofile/
+
+
+--- 6.5 Architecture-specific boot images
+
+ An arch Makefile specifies goals that take the vmlinux file, compress
+ it, wrap it in bootstrapping code, and copy the resulting files
+ somewhere. This includes various kinds of installation commands.
+ The actual goals are not standardized across architectures.
+
+ It is common to locate any additional processing in a boot/
+ directory below arch/$(ARCH)/.
+
+ Kbuild does not provide any smart way to support building a
+ target specified in boot/. Therefore arch/$(ARCH)/Makefile shall
+ call make manually to build a target in boot/.
+
+ The recommended approach is to include shortcuts in
+ arch/$(ARCH)/Makefile, and use the full path when calling down
+ into the arch/$(ARCH)/boot/Makefile.
+
+ Example:
+ #arch/x86/Makefile
+ boot := arch/x86/boot
+ bzImage: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+ "$(Q)$(MAKE) $(build)=<dir>" is the recommended way to invoke
+ make in a subdirectory.
+
+ There are no rules for naming architecture-specific targets,
+ but executing "make help" will list all relevant targets.
+ To support this, $(archhelp) must be defined.
+
+ Example:
+ #arch/x86/Makefile
+ define archhelp
+ echo '* bzImage - Image (arch/$(ARCH)/boot/bzImage)'
+ endif
+
+ When make is executed without arguments, the first goal encountered
+ will be built. In the top level Makefile the first goal present
+ is all:.
+ An architecture shall always, per default, build a bootable image.
+ In "make help", the default goal is highlighted with a '*'.
+ Add a new prerequisite to all: to select a default goal different
+ from vmlinux.
+
+ Example:
+ #arch/x86/Makefile
+ all: bzImage
+
+ When "make" is executed without arguments, bzImage will be built.
+
+--- 6.6 Building non-kbuild targets
+
+ extra-y
+
+ extra-y specifies additional targets created in the current
+ directory, in addition to any targets specified by obj-*.
+
+ Listing all targets in extra-y is required for two purposes:
+ 1) Enable kbuild to check changes in command lines
+ - When $(call if_changed,xxx) is used
+ 2) kbuild knows what files to delete during "make clean"
+
+ Example:
+ #arch/x86/kernel/Makefile
+ extra-y := head.o init_task.o
+
+ In this example, extra-y is used to list object files that
+ shall be built, but shall not be linked as part of built-in.o.
+
+
+--- 6.7 Commands useful for building a boot image
+
+ Kbuild provides a few macros that are useful when building a
+ boot image.
+
+ if_changed
+
+ if_changed is the infrastructure used for the following commands.
+
+ Usage:
+ target: source(s) FORCE
+ $(call if_changed,ld/objcopy/gzip/...)
+
+ When the rule is evaluated, it is checked to see if any files
+ need an update, or the command line has changed since the last
+ invocation. The latter will force a rebuild if any options
+ to the executable have changed.
+ Any target that utilises if_changed must be listed in $(targets),
+ otherwise the command line check will fail, and the target will
+ always be built.
+ Assignments to $(targets) are without $(obj)/ prefix.
+ if_changed may be used in conjunction with custom commands as
+ defined in 6.8 "Custom kbuild commands".
+
+ Note: It is a typical mistake to forget the FORCE prerequisite.
+ Another common pitfall is that whitespace is sometimes
+ significant; for instance, the below will fail (note the extra space
+ after the comma):
+ target: source(s) FORCE
+ #WRONG!# $(call if_changed, ld/objcopy/gzip/...)
+
+ ld
+ Link target. Often, LDFLAGS_$@ is used to set specific options to ld.
+
+ objcopy
+ Copy binary. Uses OBJCOPYFLAGS usually specified in
+ arch/$(ARCH)/Makefile.
+ OBJCOPYFLAGS_$@ may be used to set additional options.
+
+ gzip
+ Compress target. Use maximum compression to compress target.
+
+ Example:
+ #arch/x86/boot/Makefile
+ LDFLAGS_bootsect := -Ttext 0x0 -s --oformat binary
+ LDFLAGS_setup := -Ttext 0x0 -s --oformat binary -e begtext
+
+ targets += setup setup.o bootsect bootsect.o
+ $(obj)/setup $(obj)/bootsect: %: %.o FORCE
+ $(call if_changed,ld)
+
+ In this example, there are two possible targets, requiring different
+ options to the linker. The linker options are specified using the
+ LDFLAGS_$@ syntax - one for each potential target.
+ $(targets) are assigned all potential targets, by which kbuild knows
+ the targets and will:
+ 1) check for commandline changes
+ 2) delete target during make clean
+
+ The ": %: %.o" part of the prerequisite is a shorthand that
+ frees us from listing the setup.o and bootsect.o files.
+ Note: It is a common mistake to forget the "targets :=" assignment,
+ resulting in the target file being recompiled for no
+ obvious reason.
+
+ dtc
+ Create flattened device tree blob object suitable for linking
+ into vmlinux. Device tree blobs linked into vmlinux are placed
+ in an init section in the image. Platform code *must* copy the
+ blob to non-init memory prior to calling unflatten_device_tree().
+
+ To use this command, simply add *.dtb into obj-y or targets, or make
+ some other target depend on %.dtb
+
+ A central rule exists to create $(obj)/%.dtb from $(src)/%.dts;
+ architecture Makefiles do no need to explicitly write out that rule.
+
+ Example:
+ targets += $(dtb-y)
+ clean-files += *.dtb
+ DTC_FLAGS ?= -p 1024
+
+--- 6.8 Custom kbuild commands
+
+ When kbuild is executing with KBUILD_VERBOSE=0, then only a shorthand
+ of a command is normally displayed.
+ To enable this behaviour for custom commands kbuild requires
+ two variables to be set:
+ quiet_cmd_<command> - what shall be echoed
+ cmd_<command> - the command to execute
+
+ Example:
+ #
+ quiet_cmd_image = BUILD $@
+ cmd_image = $(obj)/tools/build $(BUILDFLAGS) \
+ $(obj)/vmlinux.bin > $@
+
+ targets += bzImage
+ $(obj)/bzImage: $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+ $(call if_changed,image)
+ @echo 'Kernel: $@ is ready'
+
+ When updating the $(obj)/bzImage target, the line
+
+ BUILD arch/x86/boot/bzImage
+
+ will be displayed with "make KBUILD_VERBOSE=0".
+
+
+--- 6.9 Preprocessing linker scripts
+
+ When the vmlinux image is built, the linker script
+ arch/$(ARCH)/kernel/vmlinux.lds is used.
+ The script is a preprocessed variant of the file vmlinux.lds.S
+ located in the same directory.
+ kbuild knows .lds files and includes a rule *lds.S -> *lds.
+
+ Example:
+ #arch/x86/kernel/Makefile
+ always := vmlinux.lds
+
+ #Makefile
+ export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
+
+ The assignment to $(always) is used to tell kbuild to build the
+ target vmlinux.lds.
+ The assignment to $(CPPFLAGS_vmlinux.lds) tells kbuild to use the
+ specified options when building the target vmlinux.lds.
+
+ When building the *.lds target, kbuild uses the variables:
+ KBUILD_CPPFLAGS : Set in top-level Makefile
+ cppflags-y : May be set in the kbuild makefile
+ CPPFLAGS_$(@F) : Target-specific flags.
+ Note that the full filename is used in this
+ assignment.
+
+ The kbuild infrastructure for *lds files is used in several
+ architecture-specific files.
+
+--- 6.10 Generic header files
+
+ The directory include/asm-generic contains the header files
+ that may be shared between individual architectures.
+ The recommended approach how to use a generic header file is
+ to list the file in the Kbuild file.
+ See "7.4 generic-y" for further info on syntax etc.
+
+=== 7 Kbuild syntax for exported headers
+
+The kernel includes a set of headers that is exported to userspace.
+Many headers can be exported as-is but other headers require a
+minimal pre-processing before they are ready for user-space.
+The pre-processing does:
+- drop kernel-specific annotations
+- drop include of compiler.h
+- drop all sections that are kernel internal (guarded by ifdef __KERNEL__)
+
+Each relevant directory contains a file name "Kbuild" which specifies the
+headers to be exported.
+See subsequent chapter for the syntax of the Kbuild file.
+
+ --- 7.1 header-y
+
+ header-y specifies header files to be exported.
+
+ Example:
+ #include/linux/Kbuild
+ header-y += usb/
+ header-y += aio_abi.h
+
+ The convention is to list one file per line and
+ preferably in alphabetic order.
+
+ header-y also specifies which subdirectories to visit.
+ A subdirectory is identified by a trailing '/' which
+ can be seen in the example above for the usb subdirectory.
+
+ Subdirectories are visited before their parent directories.
+
+ --- 7.2 genhdr-y
+
+ genhdr-y specifies generated files to be exported.
+ Generated files are special as they need to be looked
+ up in another directory when doing 'make O=...' builds.
+
+ Example:
+ #include/linux/Kbuild
+ genhdr-y += version.h
+
+ --- 7.3 destination-y
+
+ When an architecture has a set of exported headers that needs to be
+ exported to a different directory destination-y is used.
+ destination-y specifies the destination directory for all exported
+ headers in the file where it is present.
+
+ Example:
+ #arch/xtensa/platforms/s6105/include/platform/Kbuild
+ destination-y := include/linux
+
+ In the example above all exported headers in the Kbuild file
+ will be located in the directory "include/linux" when exported.
+
+ --- 7.4 generic-y
+
+ If an architecture uses a verbatim copy of a header from
+ include/asm-generic then this is listed in the file
+ arch/$(ARCH)/include/asm/Kbuild like this:
+
+ Example:
+ #arch/x86/include/asm/Kbuild
+ generic-y += termios.h
+ generic-y += rtc.h
+
+ During the prepare phase of the build a wrapper include
+ file is generated in the directory:
+
+ arch/$(ARCH)/include/generated/asm
+
+ When a header is exported where the architecture uses
+ the generic header a similar wrapper is generated as part
+ of the set of exported headers in the directory:
+
+ usr/include/asm
+
+ The generated wrapper will in both cases look like the following:
+
+ Example: termios.h
+ #include <asm-generic/termios.h>
+
+=== 8 Kbuild Variables
+
+The top Makefile exports the following variables:
+
+ VERSION, PATCHLEVEL, SUBLEVEL, EXTRAVERSION
+
+ These variables define the current kernel version. A few arch
+ Makefiles actually use these values directly; they should use
+ $(KERNELRELEASE) instead.
+
+ $(VERSION), $(PATCHLEVEL), and $(SUBLEVEL) define the basic
+ three-part version number, such as "2", "4", and "0". These three
+ values are always numeric.
+
+ $(EXTRAVERSION) defines an even tinier sublevel for pre-patches
+ or additional patches. It is usually some non-numeric string
+ such as "-pre4", and is often blank.
+
+ KERNELRELEASE
+
+ $(KERNELRELEASE) is a single string such as "2.4.0-pre4", suitable
+ for constructing installation directory names or showing in
+ version strings. Some arch Makefiles use it for this purpose.
+
+ ARCH
+
+ This variable defines the target architecture, such as "i386",
+ "arm", or "sparc". Some kbuild Makefiles test $(ARCH) to
+ determine which files to compile.
+
+ By default, the top Makefile sets $(ARCH) to be the same as the
+ host system architecture. For a cross build, a user may
+ override the value of $(ARCH) on the command line:
+
+ make ARCH=m68k ...
+
+
+ INSTALL_PATH
+
+ This variable defines a place for the arch Makefiles to install
+ the resident kernel image and System.map file.
+ Use this for architecture-specific install targets.
+
+ INSTALL_MOD_PATH, MODLIB
+
+ $(INSTALL_MOD_PATH) specifies a prefix to $(MODLIB) for module
+ installation. This variable is not defined in the Makefile but
+ may be passed in by the user if desired.
+
+ $(MODLIB) specifies the directory for module installation.
+ The top Makefile defines $(MODLIB) to
+ $(INSTALL_MOD_PATH)/lib/modules/$(KERNELRELEASE). The user may
+ override this value on the command line if desired.
+
+ INSTALL_MOD_STRIP
+
+ If this variable is specified, it will cause modules to be stripped
+ after they are installed. If INSTALL_MOD_STRIP is '1', then the
+ default option --strip-debug will be used. Otherwise, the
+ INSTALL_MOD_STRIP value will be used as the option(s) to the strip
+ command.
+
+
+=== 9 Makefile language
+
+The kernel Makefiles are designed to be run with GNU Make. The Makefiles
+use only the documented features of GNU Make, but they do use many
+GNU extensions.
+
+GNU Make supports elementary list-processing functions. The kernel
+Makefiles use a novel style of list building and manipulation with few
+"if" statements.
+
+GNU Make has two assignment operators, ":=" and "=". ":=" performs
+immediate evaluation of the right-hand side and stores an actual string
+into the left-hand side. "=" is like a formula definition; it stores the
+right-hand side in an unevaluated form and then evaluates this form each
+time the left-hand side is used.
+
+There are some cases where "=" is appropriate. Usually, though, ":="
+is the right choice.
+
+=== 10 Credits
+
+Original version made by Michael Elizabeth Chastain, <mailto:mec@shout.net>
+Updates by Kai Germaschewski <kai@tp1.ruhr-uni-bochum.de>
+Updates by Sam Ravnborg <sam@ravnborg.org>
+Language QA by Jan Engelhardt <jengelh@gmx.de>
+
+=== 11 TODO
+
+- Describe how kbuild supports shipped files with _shipped.
+- Generating offset header files.
+- Add more variables to section 7?
+
+
+
diff --git a/Documentation/kbuild/modules.txt b/Documentation/kbuild/modules.txt
new file mode 100644
index 000000000..3fb39e011
--- /dev/null
+++ b/Documentation/kbuild/modules.txt
@@ -0,0 +1,541 @@
+Building External Modules
+
+This document describes how to build an out-of-tree kernel module.
+
+=== Table of Contents
+
+ === 1 Introduction
+ === 2 How to Build External Modules
+ --- 2.1 Command Syntax
+ --- 2.2 Options
+ --- 2.3 Targets
+ --- 2.4 Building Separate Files
+ === 3. Creating a Kbuild File for an External Module
+ --- 3.1 Shared Makefile
+ --- 3.2 Separate Kbuild file and Makefile
+ --- 3.3 Binary Blobs
+ --- 3.4 Building Multiple Modules
+ === 4. Include Files
+ --- 4.1 Kernel Includes
+ --- 4.2 Single Subdirectory
+ --- 4.3 Several Subdirectories
+ === 5. Module Installation
+ --- 5.1 INSTALL_MOD_PATH
+ --- 5.2 INSTALL_MOD_DIR
+ === 6. Module Versioning
+ --- 6.1 Symbols From the Kernel (vmlinux + modules)
+ --- 6.2 Symbols and External Modules
+ --- 6.3 Symbols From Another External Module
+ === 7. Tips & Tricks
+ --- 7.1 Testing for CONFIG_FOO_BAR
+
+
+
+=== 1. Introduction
+
+"kbuild" is the build system used by the Linux kernel. Modules must use
+kbuild to stay compatible with changes in the build infrastructure and
+to pick up the right flags to "gcc." Functionality for building modules
+both in-tree and out-of-tree is provided. The method for building
+either is similar, and all modules are initially developed and built
+out-of-tree.
+
+Covered in this document is information aimed at developers interested
+in building out-of-tree (or "external") modules. The author of an
+external module should supply a makefile that hides most of the
+complexity, so one only has to type "make" to build the module. This is
+easily accomplished, and a complete example will be presented in
+section 3.
+
+
+=== 2. How to Build External Modules
+
+To build external modules, you must have a prebuilt kernel available
+that contains the configuration and header files used in the build.
+Also, the kernel must have been built with modules enabled. If you are
+using a distribution kernel, there will be a package for the kernel you
+are running provided by your distribution.
+
+An alternative is to use the "make" target "modules_prepare." This will
+make sure the kernel contains the information required. The target
+exists solely as a simple way to prepare a kernel source tree for
+building external modules.
+
+NOTE: "modules_prepare" will not build Module.symvers even if
+CONFIG_MODVERSIONS is set; therefore, a full kernel build needs to be
+executed to make module versioning work.
+
+--- 2.1 Command Syntax
+
+ The command to build an external module is:
+
+ $ make -C <path_to_kernel_src> M=$PWD
+
+ The kbuild system knows that an external module is being built
+ due to the "M=<dir>" option given in the command.
+
+ To build against the running kernel use:
+
+ $ make -C /lib/modules/`uname -r`/build M=$PWD
+
+ Then to install the module(s) just built, add the target
+ "modules_install" to the command:
+
+ $ make -C /lib/modules/`uname -r`/build M=$PWD modules_install
+
+--- 2.2 Options
+
+ ($KDIR refers to the path of the kernel source directory.)
+
+ make -C $KDIR M=$PWD
+
+ -C $KDIR
+ The directory where the kernel source is located.
+ "make" will actually change to the specified directory
+ when executing and will change back when finished.
+
+ M=$PWD
+ Informs kbuild that an external module is being built.
+ The value given to "M" is the absolute path of the
+ directory where the external module (kbuild file) is
+ located.
+
+--- 2.3 Targets
+
+ When building an external module, only a subset of the "make"
+ targets are available.
+
+ make -C $KDIR M=$PWD [target]
+
+ The default will build the module(s) located in the current
+ directory, so a target does not need to be specified. All
+ output files will also be generated in this directory. No
+ attempts are made to update the kernel source, and it is a
+ precondition that a successful "make" has been executed for the
+ kernel.
+
+ modules
+ The default target for external modules. It has the
+ same functionality as if no target was specified. See
+ description above.
+
+ modules_install
+ Install the external module(s). The default location is
+ /lib/modules/<kernel_release>/extra/, but a prefix may
+ be added with INSTALL_MOD_PATH (discussed in section 5).
+
+ clean
+ Remove all generated files in the module directory only.
+
+ help
+ List the available targets for external modules.
+
+--- 2.4 Building Separate Files
+
+ It is possible to build single files that are part of a module.
+ This works equally well for the kernel, a module, and even for
+ external modules.
+
+ Example (The module foo.ko, consist of bar.o and baz.o):
+ make -C $KDIR M=$PWD bar.lst
+ make -C $KDIR M=$PWD baz.o
+ make -C $KDIR M=$PWD foo.ko
+ make -C $KDIR M=$PWD /
+
+
+=== 3. Creating a Kbuild File for an External Module
+
+In the last section we saw the command to build a module for the
+running kernel. The module is not actually built, however, because a
+build file is required. Contained in this file will be the name of
+the module(s) being built, along with the list of requisite source
+files. The file may be as simple as a single line:
+
+ obj-m := <module_name>.o
+
+The kbuild system will build <module_name>.o from <module_name>.c,
+and, after linking, will result in the kernel module <module_name>.ko.
+The above line can be put in either a "Kbuild" file or a "Makefile."
+When the module is built from multiple sources, an additional line is
+needed listing the files:
+
+ <module_name>-y := <src1>.o <src2>.o ...
+
+NOTE: Further documentation describing the syntax used by kbuild is
+located in Documentation/kbuild/makefiles.txt.
+
+The examples below demonstrate how to create a build file for the
+module 8123.ko, which is built from the following files:
+
+ 8123_if.c
+ 8123_if.h
+ 8123_pci.c
+ 8123_bin.o_shipped <= Binary blob
+
+--- 3.1 Shared Makefile
+
+ An external module always includes a wrapper makefile that
+ supports building the module using "make" with no arguments.
+ This target is not used by kbuild; it is only for convenience.
+ Additional functionality, such as test targets, can be included
+ but should be filtered out from kbuild due to possible name
+ clashes.
+
+ Example 1:
+ --> filename: Makefile
+ ifneq ($(KERNELRELEASE),)
+ # kbuild part of makefile
+ obj-m := 8123.o
+ 8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+ else
+ # normal makefile
+ KDIR ?= /lib/modules/`uname -r`/build
+
+ default:
+ $(MAKE) -C $(KDIR) M=$$PWD
+
+ # Module specific targets
+ genbin:
+ echo "X" > 8123_bin.o_shipped
+
+ endif
+
+ The check for KERNELRELEASE is used to separate the two parts
+ of the makefile. In the example, kbuild will only see the two
+ assignments, whereas "make" will see everything except these
+ two assignments. This is due to two passes made on the file:
+ the first pass is by the "make" instance run on the command
+ line; the second pass is by the kbuild system, which is
+ initiated by the parameterized "make" in the default target.
+
+--- 3.2 Separate Kbuild File and Makefile
+
+ In newer versions of the kernel, kbuild will first look for a
+ file named "Kbuild," and only if that is not found, will it
+ then look for a makefile. Utilizing a "Kbuild" file allows us
+ to split up the makefile from example 1 into two files:
+
+ Example 2:
+ --> filename: Kbuild
+ obj-m := 8123.o
+ 8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+ --> filename: Makefile
+ KDIR ?= /lib/modules/`uname -r`/build
+
+ default:
+ $(MAKE) -C $(KDIR) M=$$PWD
+
+ # Module specific targets
+ genbin:
+ echo "X" > 8123_bin.o_shipped
+
+ The split in example 2 is questionable due to the simplicity of
+ each file; however, some external modules use makefiles
+ consisting of several hundred lines, and here it really pays
+ off to separate the kbuild part from the rest.
+
+ The next example shows a backward compatible version.
+
+ Example 3:
+ --> filename: Kbuild
+ obj-m := 8123.o
+ 8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+ --> filename: Makefile
+ ifneq ($(KERNELRELEASE),)
+ # kbuild part of makefile
+ include Kbuild
+
+ else
+ # normal makefile
+ KDIR ?= /lib/modules/`uname -r`/build
+
+ default:
+ $(MAKE) -C $(KDIR) M=$$PWD
+
+ # Module specific targets
+ genbin:
+ echo "X" > 8123_bin.o_shipped
+
+ endif
+
+ Here the "Kbuild" file is included from the makefile. This
+ allows an older version of kbuild, which only knows of
+ makefiles, to be used when the "make" and kbuild parts are
+ split into separate files.
+
+--- 3.3 Binary Blobs
+
+ Some external modules need to include an object file as a blob.
+ kbuild has support for this, but requires the blob file to be
+ named <filename>_shipped. When the kbuild rules kick in, a copy
+ of <filename>_shipped is created with _shipped stripped off,
+ giving us <filename>. This shortened filename can be used in
+ the assignment to the module.
+
+ Throughout this section, 8123_bin.o_shipped has been used to
+ build the kernel module 8123.ko; it has been included as
+ 8123_bin.o.
+
+ 8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+ Although there is no distinction between the ordinary source
+ files and the binary file, kbuild will pick up different rules
+ when creating the object file for the module.
+
+--- 3.4 Building Multiple Modules
+
+ kbuild supports building multiple modules with a single build
+ file. For example, if you wanted to build two modules, foo.ko
+ and bar.ko, the kbuild lines would be:
+
+ obj-m := foo.o bar.o
+ foo-y := <foo_srcs>
+ bar-y := <bar_srcs>
+
+ It is that simple!
+
+
+=== 4. Include Files
+
+Within the kernel, header files are kept in standard locations
+according to the following rule:
+
+ * If the header file only describes the internal interface of a
+ module, then the file is placed in the same directory as the
+ source files.
+ * If the header file describes an interface used by other parts
+ of the kernel that are located in different directories, then
+ the file is placed in include/linux/.
+
+ NOTE: There are two notable exceptions to this rule: larger
+ subsystems have their own directory under include/, such as
+ include/scsi; and architecture specific headers are located
+ under arch/$(ARCH)/include/.
+
+--- 4.1 Kernel Includes
+
+ To include a header file located under include/linux/, simply
+ use:
+
+ #include <linux/module.h>
+
+ kbuild will add options to "gcc" so the relevant directories
+ are searched.
+
+--- 4.2 Single Subdirectory
+
+ External modules tend to place header files in a separate
+ include/ directory where their source is located, although this
+ is not the usual kernel style. To inform kbuild of the
+ directory, use either ccflags-y or CFLAGS_<filename>.o.
+
+ Using the example from section 3, if we moved 8123_if.h to a
+ subdirectory named include, the resulting kbuild file would
+ look like:
+
+ --> filename: Kbuild
+ obj-m := 8123.o
+
+ ccflags-y := -Iinclude
+ 8123-y := 8123_if.o 8123_pci.o 8123_bin.o
+
+ Note that in the assignment there is no space between -I and
+ the path. This is a limitation of kbuild: there must be no
+ space present.
+
+--- 4.3 Several Subdirectories
+
+ kbuild can handle files that are spread over several directories.
+ Consider the following example:
+
+ .
+ |__ src
+ | |__ complex_main.c
+ | |__ hal
+ | |__ hardwareif.c
+ | |__ include
+ | |__ hardwareif.h
+ |__ include
+ |__ complex.h
+
+ To build the module complex.ko, we then need the following
+ kbuild file:
+
+ --> filename: Kbuild
+ obj-m := complex.o
+ complex-y := src/complex_main.o
+ complex-y += src/hal/hardwareif.o
+
+ ccflags-y := -I$(src)/include
+ ccflags-y += -I$(src)/src/hal/include
+
+ As you can see, kbuild knows how to handle object files located
+ in other directories. The trick is to specify the directory
+ relative to the kbuild file's location. That being said, this
+ is NOT recommended practice.
+
+ For the header files, kbuild must be explicitly told where to
+ look. When kbuild executes, the current directory is always the
+ root of the kernel tree (the argument to "-C") and therefore an
+ absolute path is needed. $(src) provides the absolute path by
+ pointing to the directory where the currently executing kbuild
+ file is located.
+
+
+=== 5. Module Installation
+
+Modules which are included in the kernel are installed in the
+directory:
+
+ /lib/modules/$(KERNELRELEASE)/kernel/
+
+And external modules are installed in:
+
+ /lib/modules/$(KERNELRELEASE)/extra/
+
+--- 5.1 INSTALL_MOD_PATH
+
+ Above are the default directories but as always some level of
+ customization is possible. A prefix can be added to the
+ installation path using the variable INSTALL_MOD_PATH:
+
+ $ make INSTALL_MOD_PATH=/frodo modules_install
+ => Install dir: /frodo/lib/modules/$(KERNELRELEASE)/kernel/
+
+ INSTALL_MOD_PATH may be set as an ordinary shell variable or,
+ as shown above, can be specified on the command line when
+ calling "make." This has effect when installing both in-tree
+ and out-of-tree modules.
+
+--- 5.2 INSTALL_MOD_DIR
+
+ External modules are by default installed to a directory under
+ /lib/modules/$(KERNELRELEASE)/extra/, but you may wish to
+ locate modules for a specific functionality in a separate
+ directory. For this purpose, use INSTALL_MOD_DIR to specify an
+ alternative name to "extra."
+
+ $ make INSTALL_MOD_DIR=gandalf -C $KDIR \
+ M=$PWD modules_install
+ => Install dir: /lib/modules/$(KERNELRELEASE)/gandalf/
+
+
+=== 6. Module Versioning
+
+Module versioning is enabled by the CONFIG_MODVERSIONS tag, and is used
+as a simple ABI consistency check. A CRC value of the full prototype
+for an exported symbol is created. When a module is loaded/used, the
+CRC values contained in the kernel are compared with similar values in
+the module; if they are not equal, the kernel refuses to load the
+module.
+
+Module.symvers contains a list of all exported symbols from a kernel
+build.
+
+--- 6.1 Symbols From the Kernel (vmlinux + modules)
+
+ During a kernel build, a file named Module.symvers will be
+ generated. Module.symvers contains all exported symbols from
+ the kernel and compiled modules. For each symbol, the
+ corresponding CRC value is also stored.
+
+ The syntax of the Module.symvers file is:
+ <CRC> <Symbol> <module>
+
+ 0x2d036834 scsi_remove_host drivers/scsi/scsi_mod
+
+ For a kernel build without CONFIG_MODVERSIONS enabled, the CRC
+ would read 0x00000000.
+
+ Module.symvers serves two purposes:
+ 1) It lists all exported symbols from vmlinux and all modules.
+ 2) It lists the CRC if CONFIG_MODVERSIONS is enabled.
+
+--- 6.2 Symbols and External Modules
+
+ When building an external module, the build system needs access
+ to the symbols from the kernel to check if all external symbols
+ are defined. This is done in the MODPOST step. modpost obtains
+ the symbols by reading Module.symvers from the kernel source
+ tree. If a Module.symvers file is present in the directory
+ where the external module is being built, this file will be
+ read too. During the MODPOST step, a new Module.symvers file
+ will be written containing all exported symbols that were not
+ defined in the kernel.
+
+--- 6.3 Symbols From Another External Module
+
+ Sometimes, an external module uses exported symbols from
+ another external module. kbuild needs to have full knowledge of
+ all symbols to avoid spitting out warnings about undefined
+ symbols. Three solutions exist for this situation.
+
+ NOTE: The method with a top-level kbuild file is recommended
+ but may be impractical in certain situations.
+
+ Use a top-level kbuild file
+ If you have two modules, foo.ko and bar.ko, where
+ foo.ko needs symbols from bar.ko, you can use a
+ common top-level kbuild file so both modules are
+ compiled in the same build. Consider the following
+ directory layout:
+
+ ./foo/ <= contains foo.ko
+ ./bar/ <= contains bar.ko
+
+ The top-level kbuild file would then look like:
+
+ #./Kbuild (or ./Makefile):
+ obj-y := foo/ bar/
+
+ And executing
+
+ $ make -C $KDIR M=$PWD
+
+ will then do the expected and compile both modules with
+ full knowledge of symbols from either module.
+
+ Use an extra Module.symvers file
+ When an external module is built, a Module.symvers file
+ is generated containing all exported symbols which are
+ not defined in the kernel. To get access to symbols
+ from bar.ko, copy the Module.symvers file from the
+ compilation of bar.ko to the directory where foo.ko is
+ built. During the module build, kbuild will read the
+ Module.symvers file in the directory of the external
+ module, and when the build is finished, a new
+ Module.symvers file is created containing the sum of
+ all symbols defined and not part of the kernel.
+
+ Use "make" variable KBUILD_EXTRA_SYMBOLS
+ If it is impractical to copy Module.symvers from
+ another module, you can assign a space separated list
+ of files to KBUILD_EXTRA_SYMBOLS in your build file.
+ These files will be loaded by modpost during the
+ initialization of its symbol tables.
+
+
+=== 7. Tips & Tricks
+
+--- 7.1 Testing for CONFIG_FOO_BAR
+
+ Modules often need to check for certain CONFIG_ options to
+ decide if a specific feature is included in the module. In
+ kbuild this is done by referencing the CONFIG_ variable
+ directly.
+
+ #fs/ext2/Makefile
+ obj-$(CONFIG_EXT2_FS) += ext2.o
+
+ ext2-y := balloc.o bitmap.o dir.o
+ ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o
+
+ External modules have traditionally used "grep" to check for
+ specific CONFIG_ settings directly in .config. This usage is
+ broken. As introduced before, external modules should use
+ kbuild for building and can therefore use the same methods as
+ in-tree modules when testing for CONFIG_ definitions.
+
diff --git a/Documentation/kdbus/.gitignore b/Documentation/kdbus/.gitignore
new file mode 100644
index 000000000..b4a77ccba
--- /dev/null
+++ b/Documentation/kdbus/.gitignore
@@ -0,0 +1,2 @@
+*.7
+*.html
diff --git a/Documentation/kdbus/Makefile b/Documentation/kdbus/Makefile
new file mode 100644
index 000000000..8caffe565
--- /dev/null
+++ b/Documentation/kdbus/Makefile
@@ -0,0 +1,44 @@
+DOCS := \
+ kdbus.xml \
+ kdbus.bus.xml \
+ kdbus.connection.xml \
+ kdbus.endpoint.xml \
+ kdbus.fs.xml \
+ kdbus.item.xml \
+ kdbus.match.xml \
+ kdbus.message.xml \
+ kdbus.name.xml \
+ kdbus.policy.xml \
+ kdbus.pool.xml
+
+XMLFILES := $(addprefix $(obj)/,$(DOCS))
+MANFILES := $(patsubst %.xml, %.7, $(XMLFILES))
+HTMLFILES := $(patsubst %.xml, %.html, $(XMLFILES))
+
+XMLTO_ARGS := -m $(srctree)/$(src)/stylesheet.xsl --skip-validation
+
+quiet_cmd_db2man = MAN $@
+ cmd_db2man = xmlto man $(XMLTO_ARGS) -o $(obj) $<
+%.7: %.xml
+ @(which xmlto > /dev/null 2>&1) || \
+ (echo "*** You need to install xmlto ***"; \
+ exit 1)
+ $(call cmd,db2man)
+
+quiet_cmd_db2html = HTML $@
+ cmd_db2html = xmlto html-nochunks $(XMLTO_ARGS) -o $(obj) $<
+%.html: %.xml
+ @(which xmlto > /dev/null 2>&1) || \
+ (echo "*** You need to install xmlto ***"; \
+ exit 1)
+ $(call cmd,db2html)
+
+mandocs: $(MANFILES)
+
+htmldocs: $(HTMLFILES)
+
+clean-files := $(MANFILES) $(HTMLFILES)
+
+# we don't support other %docs targets right now
+%docs:
+ @true
diff --git a/Documentation/kdbus/kdbus.bus.xml b/Documentation/kdbus/kdbus.bus.xml
new file mode 100644
index 000000000..83f1198bc
--- /dev/null
+++ b/Documentation/kdbus/kdbus.bus.xml
@@ -0,0 +1,344 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.bus">
+
+ <refentryinfo>
+ <title>kdbus.bus</title>
+ <productname>kdbus.bus</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.bus</refname>
+ <refpurpose>kdbus bus</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ A bus is a resource that is shared between connections in order to
+ transmit messages (see
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>).
+ Each bus is independent, and operations on the bus will not have any
+ effect on other buses. A bus is a management entity that controls the
+ addresses of its connections, their policies and message transactions
+ performed via this bus.
+ </para>
+ <para>
+ Each bus is bound to the mount instance it was created on. It has a
+ custom name that is unique across all buses of a domain. In
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ a bus is presented as a directory. No operations can be performed on
+ the bus itself; instead you need to perform the operations on an endpoint
+ associated with the bus. Endpoints are accessible as files underneath the
+ bus directory. A default endpoint called <constant>bus</constant> is
+ provided on each bus.
+ </para>
+ <para>
+ Bus names may be chosen freely except for one restriction: the name must
+ be prefixed with the numeric effective UID of the creator and a dash. This
+ is required to avoid namespace clashes between different users. When
+ creating a bus, the name that is passed in must be properly formatted, or
+ the kernel will refuse creation of the bus. Example:
+ <literal>1047-foobar</literal> is an acceptable name for a bus
+ registered by a user with UID 1047. However,
+ <literal>1024-foobar</literal> is not, and neither is
+ <literal>foobar</literal>. The UID must be provided in the
+ user-namespace of the bus owner.
+ </para>
+ <para>
+ To create a new bus, you need to open the control file of a domain and
+ employ the <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl. The control
+ file descriptor that was used to issue
+ <constant>KDBUS_CMD_BUS_MAKE</constant> must not previously have been
+ used for any other control-ioctl and must be kept open for the entire
+ life-time of the created bus. Closing it will immediately cleanup the
+ entire bus and all its associated resources and endpoints. Every control
+ file descriptor can only be used to create a single new bus; from that
+ point on, it is not used for any further communication until the final
+ <citerefentry>
+ <refentrytitle>close</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ .
+ </para>
+ <para>
+ Each bus will generate a random, 128-bit UUID upon creation. This UUID
+ will be returned to creators of connections through
+ <varname>kdbus_cmd_hello.id128</varname> and can be used to uniquely
+ identify buses, even across different machines or containers. The UUID
+ will have its variant bits set to <literal>DCE</literal>, and denote
+ version 4 (random). For more details on UUIDs, see <ulink
+ url="https://en.wikipedia.org/wiki/Universally_unique_identifier">
+ the Wikipedia article on UUIDs</ulink>.
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>Creating buses</title>
+ <para>
+ To create a new bus, the <constant>KDBUS_CMD_BUS_MAKE</constant>
+ command is used. It takes a <type>struct kdbus_cmd</type> argument.
+ </para>
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>The flags for creation.</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_MAKE_ACCESS_GROUP</constant></term>
+ <listitem>
+ <para>Make the bus file group-accessible.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_MAKE_ACCESS_WORLD</constant></term>
+ <listitem>
+ <para>Make the bus file world-accessible.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Requests a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will return
+ <errorcode>0</errorcode>, and the <varname>flags</varname>
+ field will have all bits set that are valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ The following items (see
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>)
+ are expected for <constant>KDBUS_CMD_BUS_MAKE</constant>.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_MAKE_NAME</constant></term>
+ <listitem>
+ <para>
+ Contains a null-terminated string that identifies the
+ bus. The name must be unique across the kdbus domain and
+ must start with the effective UID of the caller, followed by
+ a '<literal>-</literal>' (dash). This item is mandatory.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_PARAMETER</constant></term>
+ <listitem>
+ <para>
+ Bus-wide bloom parameters passed in a
+ <type>struct kdbus_bloom_parameter</type>. These settings are
+ copied back to new connections verbatim. This item is
+ mandatory. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for a more detailed description of this item.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant></term>
+ <listitem>
+ <para>
+ An optional item that contains a set of attach flags that are
+ returned to connections when they query the bus creator
+ metadata. If not set, no metadata is returned.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_BUS_MAKE</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EBADMSG</constant></term>
+ <listitem><para>
+ A mandatory item is missing.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ The flags supplied in the <constant>struct kdbus_cmd</constant>
+ are invalid or the supplied name does not start with the current
+ UID and a '<literal>-</literal>' (dash).
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EEXIST</constant></term>
+ <listitem><para>
+ A bus of that name already exists.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ESHUTDOWN</constant></term>
+ <listitem><para>
+ The kdbus mount instance for the bus was already shut down.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EMFILE</constant></term>
+ <listitem><para>
+ The maximum number of buses for the current user is exhausted.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.connection.xml b/Documentation/kdbus/kdbus.connection.xml
new file mode 100644
index 000000000..4bb5f30f3
--- /dev/null
+++ b/Documentation/kdbus/kdbus.connection.xml
@@ -0,0 +1,1244 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.connection">
+
+ <refentryinfo>
+ <title>kdbus.connection</title>
+ <productname>kdbus.connection</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.connection</refname>
+ <refpurpose>kdbus connection</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ Connections are identified by their <emphasis>connection ID</emphasis>,
+ internally implemented as a <type>uint64_t</type> counter.
+ The IDs of every newly created bus start at <constant>1</constant>, and
+ every new connection will increment the counter by <constant>1</constant>.
+ The IDs are not reused.
+ </para>
+ <para>
+ In higher level tools, the user visible representation of a connection is
+ defined by the D-Bus protocol specification as
+ <constant>":1.&lt;ID&gt;"</constant>.
+ </para>
+ <para>
+ Messages with a specific <type>uint64_t</type> destination ID are
+ directly delivered to the connection with the corresponding ID. Signal
+ messages (see
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>)
+ may be addressed to the special destination ID
+ <constant>KDBUS_DST_ID_BROADCAST</constant> (~0ULL) and will then
+ potentially be delivered to all currently active connections on the bus.
+ However, in order to receive any signal messages, clients must subscribe
+ to them by installing a match (see
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>).
+ </para>
+ <para>
+ Messages synthesized and sent directly by the kernel will carry the
+ special source ID <constant>KDBUS_SRC_ID_KERNEL</constant> (0).
+ </para>
+ <para>
+ In addition to the unique <type>uint64_t</type> connection ID,
+ established connections can request the ownership of
+ <emphasis>well-known names</emphasis>, under which they can be found and
+ addressed by other bus clients. A well-known name is associated with one
+ and only one connection at a time. See
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ on name acquisition, the name registry, and the validity of names.
+ </para>
+ <para>
+ Messages can specify the special destination ID
+ <constant>KDBUS_DST_ID_NAME</constant> (0) and carry a well-known name
+ in the message data. Such a message is delivered to the destination
+ connection which owns that well-known name.
+ </para>
+
+ <programlisting><![CDATA[
+ +-------------------------------------------------------------------------+
+ | +---------------+ +---------------------------+ |
+ | | Connection | | Message | -----------------+ |
+ | | :1.22 | --> | src: 22 | | |
+ | | | | dst: 25 | | |
+ | | | | | | |
+ | | | | | | |
+ | | | +---------------------------+ | |
+ | | | | |
+ | | | <--------------------------------------+ | |
+ | +---------------+ | | |
+ | | | |
+ | +---------------+ +---------------------------+ | | |
+ | | Connection | | Message | -----+ | |
+ | | :1.25 | --> | src: 25 | | |
+ | | | | dst: 0xffffffffffffffff | -------------+ | |
+ | | | | (KDBUS_DST_ID_BROADCAST) | | | |
+ | | | | | ---------+ | | |
+ | | | +---------------------------+ | | | |
+ | | | | | | |
+ | | | <--------------------------------------------------+ |
+ | +---------------+ | | |
+ | | | |
+ | +---------------+ +---------------------------+ | | |
+ | | Connection | | Message | --+ | | |
+ | | :1.55 | --> | src: 55 | | | | |
+ | | | | dst: 0 / org.foo.bar | | | | |
+ | | | | | | | | |
+ | | | | | | | | |
+ | | | +---------------------------+ | | | |
+ | | | | | | |
+ | | | <------------------------------------------+ | |
+ | +---------------+ | | |
+ | | | |
+ | +---------------+ | | |
+ | | Connection | | | |
+ | | :1.81 | | | |
+ | | org.foo.bar | | | |
+ | | | | | |
+ | | | | | |
+ | | | <-----------------------------------+ | |
+ | | | | |
+ | | | <----------------------------------------------+ |
+ | +---------------+ |
+ +-------------------------------------------------------------------------+
+ ]]></programlisting>
+ </refsect1>
+
+ <refsect1>
+ <title>Privileged connections</title>
+ <para>
+ A connection is considered <emphasis>privileged</emphasis> if the user
+ it was created by is the same that created the bus, or if the creating
+ task had <constant>CAP_IPC_OWNER</constant> set when it called
+ <constant>KDBUS_CMD_HELLO</constant> (see below).
+ </para>
+ <para>
+ Privileged connections have permission to employ certain restricted
+ functions and commands, which are explained below and in other kdbus
+ man-pages.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Activator and policy holder connection</title>
+ <para>
+ An <emphasis>activator</emphasis> connection is a placeholder for a
+ <emphasis>well-known name</emphasis>. Messages sent to such a connection
+ can be used to start an implementer connection, which will then get all
+ the messages from the activator copied over. An activator connection
+ cannot be used to send any message.
+ </para>
+ <para>
+ A <emphasis>policy holder</emphasis> connection only installs a policy
+ for one or more names. These policy entries are kept active as long as
+ the connection is alive, and are removed once it terminates. Such a
+ policy connection type can be used to deploy restrictions for names that
+ are not yet active on the bus. A policy holder connection cannot be used
+ to send any message.
+ </para>
+ <para>
+ The creation of activator or policy holder connections is restricted to
+ privileged users on the bus (see above).
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Monitor connections</title>
+ <para>
+ Monitors are eavesdropping connections that receive all the traffic on the
+ bus, but is invisible to other connections. Such connections have all
+ properties of any other, regular connection, except for the following
+ details:
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ They will get every message sent over the bus, both unicasts and
+ broadcasts.
+ </para></listitem>
+
+ <listitem><para>
+ Installing matches for signal messages is neither necessary
+ nor allowed.
+ </para></listitem>
+
+ <listitem><para>
+ They cannot send messages or be directly addressed as receiver.
+ </para></listitem>
+
+ <listitem><para>
+ They cannot own well-known names. Therefore, they also can't operate as
+ activators.
+ </para></listitem>
+
+ <listitem><para>
+ Their creation and destruction will not cause
+ <constant>KDBUS_ITEM_ID_{ADD,REMOVE}</constant> (see
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>).
+ </para></listitem>
+
+ <listitem><para>
+ They are not listed with their unique name in name registry dumps
+ (see <constant>KDBUS_CMD_NAME_LIST</constant> in
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>), so other connections cannot detect the presence of
+ a monitor.
+ </para></listitem>
+ </itemizedlist>
+ <para>
+ The creation of monitor connections is restricted to privileged users on
+ the bus (see above).
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Creating connections</title>
+ <para>
+ A connection to a bus is created by opening an endpoint file (see
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>)
+ of a bus and becoming an active client with the
+ <constant>KDBUS_CMD_HELLO</constant> ioctl. Every connection has a unique
+ identifier on the bus and can address messages to every other connection
+ on the same bus by using the peer's connection ID as the destination.
+ </para>
+ <para>
+ The <constant>KDBUS_CMD_HELLO</constant> ioctl takes a <type>struct
+ kdbus_cmd_hello</type> as argument.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd_hello {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ __u64 attach_flags_send;
+ __u64 attach_flags_recv;
+ __u64 bus_flags;
+ __u64 id;
+ __u64 pool_size;
+ __u64 offset;
+ __u8 id128[16];
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem>
+ <para>Flags to apply to this connection</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_HELLO_ACCEPT_FD</constant></term>
+ <listitem>
+ <para>
+ When this flag is set, the connection can be sent file
+ descriptors as message payload of unicast messages. If it's
+ not set, an attempt to send file descriptors will result in
+ <constant>-ECOMM</constant> on the sender's side.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_HELLO_ACTIVATOR</constant></term>
+ <listitem>
+ <para>
+ Make this connection an activator (see above). With this bit
+ set, an item of type <constant>KDBUS_ITEM_NAME</constant> has
+ to be attached. This item describes the well-known name this
+ connection should be an activator for.
+ A connection can not be an activator and a policy holder at
+ the same time time, so this bit is not allowed together with
+ <constant>KDBUS_HELLO_POLICY_HOLDER</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_HELLO_POLICY_HOLDER</constant></term>
+ <listitem>
+ <para>
+ Make this connection a policy holder (see above). With this
+ bit set, an item of type <constant>KDBUS_ITEM_NAME</constant>
+ has to be attached. This item describes the well-known name
+ this connection should hold a policy for.
+ A connection can not be an activator and a policy holder at
+ the same time time, so this bit is not allowed together with
+ <constant>KDBUS_HELLO_ACTIVATOR</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_HELLO_MONITOR</constant></term>
+ <listitem>
+ <para>
+ Make this connection a monitor connection (see above).
+ </para>
+ <para>
+ This flag can only be set by privileged bus connections. See
+ below for more information.
+ A connection can not be monitor and an activator or a policy
+ holder at the same time time, so this bit is not allowed
+ together with <constant>KDBUS_HELLO_ACTIVATOR</constant> or
+ <constant>KDBUS_HELLO_POLICY_HOLDER</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Requests a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will return
+ <errorcode>0</errorcode>, and the <varname>flags</varname>
+ field will have all bits set that are valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>attach_flags_send</varname></term>
+ <listitem><para>
+ Set the bits for metadata this connection permits to be sent to the
+ receiving peer. Only metadata items that are both allowed to be sent
+ by the sender and that are requested by the receiver will be attached
+ to the message.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>attach_flags_recv</varname></term>
+ <listitem><para>
+ Request the attachment of metadata for each message received by this
+ connection. See
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for information about metadata, and
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ regarding items in general.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>bus_flags</varname></term>
+ <listitem><para>
+ Upon successful completion of the ioctl, this member will contain the
+ flags of the bus it connected to.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id</varname></term>
+ <listitem><para>
+ Upon successful completion of the command, this member will contain
+ the numerical ID of the new connection.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>pool_size</varname></term>
+ <listitem><para>
+ The size of the communication pool, in bytes. The pool can be
+ accessed by calling
+ <citerefentry>
+ <refentrytitle>mmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ on the file descriptor that was used to issue the
+ <constant>KDBUS_CMD_HELLO</constant> ioctl.
+ The pool size of a connection must be greater than
+ <constant>0</constant> and a multiple of
+ <constant>PAGE_SIZE</constant>. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>offset</varname></term>
+ <listitem><para>
+ The kernel will return the offset in the pool where returned details
+ will be stored. See below.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id128</varname></term>
+ <listitem><para>
+ Upon successful completion of the ioctl, this member will contain the
+ <emphasis>128-bit UUID</emphasis> of the connected bus.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Variable list of items containing optional additional information.
+ The following items are currently expected/valid:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CONN_DESCRIPTION</constant></term>
+ <listitem>
+ <para>
+ Contains a string that describes this connection, so it can
+ be identified later.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME</constant></term>
+ <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+ <listitem>
+ <para>
+ For activators and policy holders only, combinations of
+ these two items describe policy access entries. See
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further details.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CREDS</constant></term>
+ <term><constant>KDBUS_ITEM_PIDS</constant></term>
+ <term><constant>KDBUS_ITEM_SECLABEL</constant></term>
+ <listitem>
+ <para>
+ Privileged bus users may submit these types in order to
+ create connections with faked credentials. This information
+ will be returned when peer information is queried by
+ <constant>KDBUS_CMD_CONN_INFO</constant>. See below for more
+ information on retrieving information on connections.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ At the offset returned in the <varname>offset</varname> field of
+ <type>struct kdbus_cmd_hello</type>, the kernel will store items
+ of the following types:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_PARAMETER</constant></term>
+ <listitem>
+ <para>
+ Bloom filter parameter as defined by the bus creator.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The offset in the pool has to be freed with the
+ <constant>KDBUS_CMD_FREE</constant> ioctl. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further information.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Retrieving information on a connection</title>
+ <para>
+ The <constant>KDBUS_CMD_CONN_INFO</constant> ioctl can be used to
+ retrieve credentials and properties of the initial creator of a
+ connection. This ioctl uses the following struct.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd_info {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ __u64 id;
+ __u64 attach_flags;
+ __u64 offset;
+ __u64 info_size;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ Currently, no flags are supported.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+ and the <varname>flags</varname> field is set to
+ <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id</varname></term>
+ <listitem><para>
+ The numerical ID of the connection for which information is to be
+ retrieved. If set to a non-zero value, the
+ <constant>KDBUS_ITEM_OWNED_NAME</constant> item is ignored.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>attach_flags</varname></term>
+ <listitem><para>
+ Specifies which metadata items should be attached to the answer. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>offset</varname></term>
+ <listitem><para>
+ When the ioctl returns, this field will contain the offset of the
+ connection information inside the caller's pool. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further information.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>info_size</varname></term>
+ <listitem><para>
+ The kernel will return the size of the returned information, so
+ applications can optionally
+ <citerefentry>
+ <refentrytitle>mmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ specific parts of the pool. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further information.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ The following items are expected for
+ <constant>KDBUS_CMD_CONN_INFO</constant>.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_OWNED_NAME</constant></term>
+ <listitem>
+ <para>
+ Contains the well-known name of the connection to look up as.
+ This item is mandatory if the <varname>id</varname> field is
+ set to 0.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ When the ioctl returns, the following struct will be stored in the
+ caller's pool at <varname>offset</varname>. The fields in this struct
+ are described below.
+ </para>
+
+ <programlisting>
+struct kdbus_info {
+ __u64 size;
+ __u64 id;
+ __u64 flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id</varname></term>
+ <listitem><para>
+ The connection's unique ID.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ The connection's flags as specified when it was created.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Depending on the <varname>flags</varname> field in
+ <type>struct kdbus_cmd_info</type>, items of types
+ <constant>KDBUS_ITEM_OWNED_NAME</constant> and
+ <constant>KDBUS_ITEM_CONN_DESCRIPTION</constant> may follow here.
+ <constant>KDBUS_ITEM_NEGOTIATE</constant> is also allowed.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Once the caller is finished with parsing the return buffer, it needs to
+ employ the <constant>KDBUS_CMD_FREE</constant> command for the offset, in
+ order to free the buffer part. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further information.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Getting information about a connection's bus creator</title>
+ <para>
+ The <constant>KDBUS_CMD_BUS_CREATOR_INFO</constant> ioctl takes the same
+ struct as <constant>KDBUS_CMD_CONN_INFO</constant>, but is used to
+ retrieve information about the creator of the bus the connection is
+ attached to. The metadata returned by this call is collected during the
+ creation of the bus and is never altered afterwards, so it provides
+ pristine information on the task that created the bus, at the moment when
+ it did so.
+ </para>
+ <para>
+ In response to this call, a slice in the connection's pool is allocated
+ and filled with an object of type <type>struct kdbus_info</type>,
+ pointed to by the ioctl's <varname>offset</varname> field.
+ </para>
+
+ <programlisting>
+struct kdbus_info {
+ __u64 size;
+ __u64 id;
+ __u64 flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id</varname></term>
+ <listitem><para>
+ The bus ID.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ The bus flags as specified when it was created.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Metadata information is stored in items here. The item list
+ contains a <constant>KDBUS_ITEM_MAKE_NAME</constant> item that
+ indicates the bus name of the calling connection.
+ <constant>KDBUS_ITEM_NEGOTIATE</constant> is allowed to probe
+ for known item types.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Once the caller is finished with parsing the return buffer, it needs to
+ employ the <constant>KDBUS_CMD_FREE</constant> command for the offset, in
+ order to free the buffer part. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further information.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Updating connection details</title>
+ <para>
+ Some of a connection's details can be updated with the
+ <constant>KDBUS_CMD_CONN_UPDATE</constant> ioctl, using the file
+ descriptor that was used to create the connection. The update command
+ uses the following struct.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ Currently, no flags are supported.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+ and the <varname>flags</varname> field is set to
+ <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Items to describe the connection details to be updated. The
+ following item types are supported.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant></term>
+ <listitem>
+ <para>
+ Supply a new set of metadata items that this connection
+ permits to be sent along with messages.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ATTACH_FLAGS_RECV</constant></term>
+ <listitem>
+ <para>
+ Supply a new set of metadata items that this connection
+ requests to be attached to each message.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME</constant></term>
+ <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+ <listitem>
+ <para>
+ Policy holder connections may supply a new set of policy
+ information with these items. For other connection types,
+ <constant>EOPNOTSUPP</constant> is returned in
+ <varname>errno</varname>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Termination of connections</title>
+ <para>
+ A connection can be terminated by simply calling
+ <citerefentry>
+ <refentrytitle>close</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ on its file descriptor. All pending incoming messages will be discarded,
+ and the memory allocated by the pool will be freed.
+ </para>
+
+ <para>
+ An alternative way of closing down a connection is via the
+ <constant>KDBUS_CMD_BYEBYE</constant> ioctl. This ioctl will succeed only
+ if the message queue of the connection is empty at the time of closing;
+ otherwise, the ioctl will fail with <varname>errno</varname> set to
+ <constant>EBUSY</constant>. When this ioctl returns
+ successfully, the connection has been terminated and won't accept any new
+ messages from remote peers. This way, a connection can be terminated
+ race-free, without losing any messages. The ioctl takes an argument of
+ type <type>struct kdbus_cmd</type>.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ Currently, no flags are supported.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will fail with
+ <varname>errno</varname> set to <constant>EPROTO</constant>, and
+ the <varname>flags</varname> field is set to <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ The following item types are supported.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_HELLO</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EFAULT</constant></term>
+ <listitem><para>
+ The supplied pool size was 0 or not a multiple of the page size.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ The flags supplied in <type>struct kdbus_cmd_hello</type>
+ are invalid.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ An illegal combination of
+ <constant>KDBUS_HELLO_MONITOR</constant>,
+ <constant>KDBUS_HELLO_ACTIVATOR</constant> and
+ <constant>KDBUS_HELLO_POLICY_HOLDER</constant> was passed in
+ <varname>flags</varname>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ An invalid set of items was supplied.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ECONNREFUSED</constant></term>
+ <listitem><para>
+ The attach_flags_send field did not satisfy the requirements of
+ the bus.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EPERM</constant></term>
+ <listitem><para>
+ A <constant>KDBUS_ITEM_CREDS</constant> items was supplied, but the
+ current user is not privileged.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ESHUTDOWN</constant></term>
+ <listitem><para>
+ The bus you were trying to connect to has already been shut down.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EMFILE</constant></term>
+ <listitem><para>
+ The maximum number of connections on the bus has been reached.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EOPNOTSUPP</constant></term>
+ <listitem><para>
+ The endpoint does not support the connection flags supplied in
+ <type>struct kdbus_cmd_hello</type>.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_BYEBYE</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EALREADY</constant></term>
+ <listitem><para>
+ The connection has already been shut down.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EBUSY</constant></term>
+ <listitem><para>
+ There are still messages queued up in the connection's pool.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_CONN_INFO</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Invalid flags, or neither an ID nor a name was provided, or the
+ name is invalid.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ESRCH</constant></term>
+ <listitem><para>
+ Connection lookup by name failed.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ENXIO</constant></term>
+ <listitem><para>
+ No connection with the provided connection ID found.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_CONN_UPDATE</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Illegal flags or items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Wildcards submitted in policy entries, or illegal sequence
+ of policy items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EOPNOTSUPP</constant></term>
+ <listitem><para>
+ Operation not supported by connection.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>E2BIG</constant></term>
+ <listitem><para>
+ Too many policy items attached.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.endpoint.xml b/Documentation/kdbus/kdbus.endpoint.xml
new file mode 100644
index 000000000..6632485f3
--- /dev/null
+++ b/Documentation/kdbus/kdbus.endpoint.xml
@@ -0,0 +1,429 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.endpoint">
+
+ <refentryinfo>
+ <title>kdbus.endpoint</title>
+ <productname>kdbus.endpoint</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.endpoint</refname>
+ <refpurpose>kdbus endpoint</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ Endpoints are entry points to a bus (see
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>).
+ By default, each bus has a default
+ endpoint called 'bus'. The bus owner has the ability to create custom
+ endpoints with specific names, permissions, and policy databases
+ (see below). An endpoint is presented as file underneath the directory
+ of the parent bus.
+ </para>
+ <para>
+ To create a custom endpoint, open the default endpoint
+ (<literal>bus</literal>) and use the
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> ioctl with
+ <type>struct kdbus_cmd</type>. Custom endpoints always have a policy
+ database that, by default, forbids any operation. You have to explicitly
+ install policy entries to allow any operation on this endpoint.
+ </para>
+ <para>
+ Once <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> succeeded, the new
+ endpoint will appear in the filesystem
+ (<citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>), and the used file descriptor will manage the
+ newly created endpoint resource. It cannot be used to manage further
+ resources and must be kept open as long as the endpoint is needed. The
+ endpoint will be terminated as soon as the file descriptor is closed.
+ </para>
+ <para>
+ Endpoint names may be chosen freely except for one restriction: the name
+ must be prefixed with the numeric effective UID of the creator and a dash.
+ This is required to avoid namespace clashes between different users. When
+ creating an endpoint, the name that is passed in must be properly
+ formatted or the kernel will refuse creation of the endpoint. Example:
+ <literal>1047-my-endpoint</literal> is an acceptable name for an
+ endpoint registered by a user with UID 1047. However,
+ <literal>1024-my-endpoint</literal> is not, and neither is
+ <literal>my-endpoint</literal>. The UID must be provided in the
+ user-namespace of the bus.
+ </para>
+ <para>
+ To create connections to a bus, use <constant>KDBUS_CMD_HELLO</constant>
+ on a file descriptor returned by <function>open()</function> on an
+ endpoint node. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further details.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Creating custom endpoints</title>
+ <para>
+ To create a new endpoint, the
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> command is used. Along with
+ the endpoint's name, which will be used to expose the endpoint in the
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>,
+ the command also optionally takes items to set up the endpoint's
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> takes a
+ <type>struct kdbus_cmd</type> argument.
+ </para>
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>The flags for creation.</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_MAKE_ACCESS_GROUP</constant></term>
+ <listitem>
+ <para>Make the endpoint file group-accessible.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_MAKE_ACCESS_WORLD</constant></term>
+ <listitem>
+ <para>Make the endpoint file world-accessible.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Requests a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will return
+ <errorcode>0</errorcode>, and the <varname>flags</varname>
+ field will have all bits set that are valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ The following items are expected for
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_MAKE_NAME</constant></term>
+ <listitem>
+ <para>Contains a string to identify the endpoint name.</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME</constant></term>
+ <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+ <listitem>
+ <para>
+ These items are used to set the policy attached to the
+ endpoint. For more details on bus and endpoint policies, see
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <varname>EINVAL</varname>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Updating endpoints</title>
+ <para>
+ To update an existing endpoint, the
+ <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant> command is used on the file
+ descriptor that was used to create the endpoint, using
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>. The only relevant detail of
+ the endpoint that can be updated is the policy. When the command is
+ employed, the policy of the endpoint is <emphasis>replaced</emphasis>
+ atomically with the new set of rules.
+ The command takes a <type>struct kdbus_cmd</type> argument.
+ </para>
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ Unused for this command.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+ and the <varname>flags</varname> field is set to
+ <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ The following items are expected for
+ <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant>.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME</constant></term>
+ <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+ <listitem>
+ <para>
+ These items are used to set the policy attached to the
+ endpoint. For more details on bus and endpoint policies, see
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ Existing policy is atomically replaced with the new rules
+ provided.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> may fail with the
+ following errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ The flags supplied in the <type>struct kdbus_cmd</type>
+ are invalid.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Illegal combination of <constant>KDBUS_ITEM_NAME</constant> and
+ <constant>KDBUS_ITEM_POLICY_ACCESS</constant> was provided.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EEXIST</constant></term>
+ <listitem><para>
+ An endpoint of that name already exists.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EPERM</constant></term>
+ <listitem><para>
+ The calling user is not privileged. See
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for information about privileged users.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant> may fail with the
+ following errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ The flags supplied in <type>struct kdbus_cmd</type>
+ are invalid.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Illegal combination of <constant>KDBUS_ITEM_NAME</constant> and
+ <constant>KDBUS_ITEM_POLICY_ACCESS</constant> was provided.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.fs.xml b/Documentation/kdbus/kdbus.fs.xml
new file mode 100644
index 000000000..8c2a90e10
--- /dev/null
+++ b/Documentation/kdbus/kdbus.fs.xml
@@ -0,0 +1,124 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus_fs">
+
+ <refentryinfo>
+ <title>kdbus.fs</title>
+ <productname>kdbus.fs</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.fs</refname>
+ <refpurpose>kdbus file system</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>File-system Layout</title>
+
+ <para>
+ The <emphasis>kdbusfs</emphasis> pseudo filesystem provides access to
+ kdbus entities, such as <emphasis>buses</emphasis> and
+ <emphasis>endpoints</emphasis>. Each time the filesystem is mounted,
+ a new, isolated kdbus instance is created, which is independent from the
+ other instances.
+ </para>
+ <para>
+ The system-wide standard mount point for <emphasis>kdbusfs</emphasis> is
+ <constant>/sys/fs/kdbus</constant>.
+ </para>
+
+ <para>
+ Buses are represented as directories in the file system layout, whereas
+ endpoints are exposed as files inside these directories. At the top-level,
+ a <emphasis>control</emphasis> node is present, which can be opened to
+ create new buses via the <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl.
+ Each <emphasis>bus</emphasis> shows a default endpoint called
+ <varname>bus</varname>, which can be opened to either create a connection
+ with the <constant>KDBUS_CMD_HELLO</constant> ioctl, or to create new
+ custom endpoints for the bus with
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>. See
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>,
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry> and
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+
+ <para>Following, you can see an example layout of the
+ <emphasis>kdbusfs</emphasis> filesystem:</para>
+
+<programlisting>
+ /sys/fs/kdbus/ ; mount-point
+ |-- 0-system ; bus directory
+ | |-- bus ; default endpoint
+ | `-- 1017-custom ; custom endpoint
+ |-- 1000-user ; bus directory
+ | |-- bus ; default endpoint
+ | |-- 1000-service-A ; custom endpoint
+ | `-- 1000-service-B ; custom endpoint
+ `-- control ; control file
+</programlisting>
+ </refsect1>
+
+ <refsect1>
+ <title>Mounting instances</title>
+ <para>
+ In order to get a new and separate kdbus environment, a new instance
+ of <emphasis>kdbusfs</emphasis> can be mounted like this:
+ </para>
+<programlisting>
+ # mount -t kdbusfs kdbusfs /tmp/new_kdbus/
+</programlisting>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>mount</refentrytitle>
+ <manvolnum>8</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.item.xml b/Documentation/kdbus/kdbus.item.xml
new file mode 100644
index 000000000..ee09dfa44
--- /dev/null
+++ b/Documentation/kdbus/kdbus.item.xml
@@ -0,0 +1,839 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus">
+
+ <refentryinfo>
+ <title>kdbus.item</title>
+ <productname>kdbus item</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.item</refname>
+ <refpurpose>kdbus item structure, layout and usage</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ To flexibly augment transport structures, data blobs of type
+ <type>struct kdbus_item</type> can be attached to the structs passed
+ into the ioctls. Some ioctls make items of certain types mandatory,
+ others are optional. Items that are unsupported by ioctls they are
+ attached to will cause the ioctl to fail with <varname>errno</varname>
+ set to <constant>EINVAL</constant>.
+ Items are also used for information stored in a connection's
+ <emphasis>pool</emphasis>, such as received messages, name lists or
+ requested connection or bus owner information. Depending on the type of
+ an item, its total size is either fixed or variable.
+ </para>
+
+ <refsect2>
+ <title>Chaining items</title>
+ <para>
+ Whenever items are used as part of the kdbus kernel API, they are
+ embedded in structs that are embedded inside structs that themselves
+ include a size field containing the overall size of the structure.
+ This allows multiple items to be chained up, and an item iterator
+ (see below) is capable of detecting the end of an item chain.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Alignment</title>
+ <para>
+ The kernel expects all items to be aligned to 8-byte boundaries.
+ Unaligned items will cause the ioctl they are used with to fail
+ with <varname>errno</varname> set to <constant>EINVAL</constant>.
+ An item that has an unaligned size itself hence needs to be padded
+ if it is followed by another item.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Iterating items</title>
+ <para>
+ A simple iterator would iterate over the items until the items have
+ reached the embedding structure's overall size. An example
+ implementation is shown below.
+ </para>
+
+ <programlisting><![CDATA[
+#define KDBUS_ALIGN8(val) (((val) + 7) & ~7)
+
+#define KDBUS_ITEM_NEXT(item) \
+ (typeof(item))((uint8_t *)(item) + KDBUS_ALIGN8((item)->size))
+
+#define KDBUS_ITEM_FOREACH(item, head, first) \
+ for ((item) = (head)->first; \
+ ((uint8_t *)(item) < (uint8_t *)(head) + (head)->size) && \
+ ((uint8_t *)(item) >= (uint8_t *)(head)); \
+ (item) = KDBUS_ITEM_NEXT(item))
+ ]]></programlisting>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>Item layout</title>
+ <para>
+ A <type>struct kdbus_item</type> consists of a
+ <varname>size</varname> field, describing its overall size, and a
+ <varname>type</varname> field, both 64 bit wide. They are followed by
+ a union to store information that is specific to the item's type.
+ The struct layout is shown below.
+ </para>
+
+ <programlisting>
+struct kdbus_item {
+ __u64 size;
+ __u64 type;
+ /* item payload - see below */
+ union {
+ __u8 data[0];
+ __u32 data32[0];
+ __u64 data64[0];
+ char str[0];
+
+ __u64 id;
+ struct kdbus_vec vec;
+ struct kdbus_creds creds;
+ struct kdbus_pids pids;
+ struct kdbus_audit audit;
+ struct kdbus_caps caps;
+ struct kdbus_timestamp timestamp;
+ struct kdbus_name name;
+ struct kdbus_bloom_parameter bloom_parameter;
+ struct kdbus_bloom_filter bloom_filter;
+ struct kdbus_memfd memfd;
+ int fds[0];
+ struct kdbus_notify_name_change name_change;
+ struct kdbus_notify_id_change id_change;
+ struct kdbus_policy_access policy_access;
+ };
+};
+ </programlisting>
+
+ <para>
+ <type>struct kdbus_item</type> should never be used to allocate
+ an item instance, as its size may grow in future releases of the API.
+ Instead, it should be manually assembled by storing the
+ <varname>size</varname>, <varname>type</varname> and payload to a
+ struct of its own.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Item types</title>
+
+ <refsect2>
+ <title>Negotiation item</title>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item is attached to any ioctl, programs can
+ <emphasis>probe</emphasis> the kernel for known item types.
+ The item carries an array of <type>uint64_t</type> values in
+ <varname>item.data64</varname>, each set to an item type to
+ probe. The kernel will reset each member of this array that is
+ not recognized as valid item type to <constant>0</constant>.
+ This way, users can negotiate kernel features at start-up to
+ keep newer userspace compatible with older kernels. This item
+ is never attached by the kernel in response to any command.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>Command specific items</title>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PAYLOAD_VEC</constant></term>
+ <term><constant>KDBUS_ITEM_PAYLOAD_OFF</constant></term>
+ <listitem><para>
+ Messages are directly copied by the sending process into the
+ receiver's
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ This way, two peers can exchange data by effectively doing a
+ single-copy from one process to another; the kernel will not buffer
+ the data anywhere else. <constant>KDBUS_ITEM_PAYLOAD_VEC</constant>
+ is used when <emphasis>sending</emphasis> message. The item
+ references a memory address when the payload data can be found.
+ <constant>KDBUS_ITEM_PAYLOAD_OFF</constant> is used when messages
+ are <emphasis>received</emphasis>, and the
+ <constant>offset</constant> value describes the offset inside the
+ receiving connection's
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ where the message payload can be found. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on passing of payload data along with a
+ message.
+ <programlisting>
+struct kdbus_vec {
+ __u64 size;
+ union {
+ __u64 address;
+ __u64 offset;
+ };
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant></term>
+ <listitem><para>
+ Transports a file descriptor of a <emphasis>memfd</emphasis> in
+ <type>struct kdbus_memfd</type> in <varname>item.memfd</varname>.
+ The <varname>size</varname> field has to match the actual size of
+ the memfd that was specified when it was created. The
+ <varname>start</varname> parameter denotes the offset inside the
+ memfd at which the referenced payload starts. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on passing of payload data along with a
+ message.
+ <programlisting>
+struct kdbus_memfd {
+ __u64 start;
+ __u64 size;
+ int fd;
+ __u32 __pad;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_FDS</constant></term>
+ <listitem><para>
+ Contains an array of <emphasis>file descriptors</emphasis>.
+ When used with <constant>KDBUS_CMD_SEND</constant>, the values of
+ this array must be filled with valid file descriptor numbers.
+ When received as item attached to a message, the array will
+ contain the numbers of the installed file descriptors, or
+ <constant>-1</constant> in case an error occurred.
+ In either case, the number of entries in the array is derived from
+ the item's total size. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>Items specific to some commands</title>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CANCEL_FD</constant></term>
+ <listitem><para>
+ Transports a file descriptor that can be used to cancel a
+ synchronous <constant>KDBUS_CMD_SEND</constant> operation by
+ writing to it. The file descriptor is stored in
+ <varname>item.fd[0]</varname>. The item may only contain one
+ file descriptor. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on this item and how to use it.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_PARAMETER</constant></term>
+ <listitem><para>
+ Contains a set of <emphasis>bloom parameters</emphasis> as
+ <type>struct kdbus_bloom_parameter</type> in
+ <varname>item.bloom_parameter</varname>.
+ The item is passed from userspace to kernel during the
+ <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl, and returned
+ verbatim when <constant>KDBUS_CMD_HELLO</constant> is called.
+ The kernel does not use the bloom parameters, but they need to
+ be known by each connection on the bus in order to define the
+ bloom filter hash details. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on matching and bloom filters.
+ <programlisting>
+struct kdbus_bloom_parameter {
+ __u64 size;
+ __u64 n_hash;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_FILTER</constant></term>
+ <listitem><para>
+ Carries a <emphasis>bloom filter</emphasis> as
+ <type>struct kdbus_bloom_filter</type> in
+ <varname>item.bloom_filter</varname>. It is mandatory to send this
+ item attached to a <type>struct kdbus_msg</type>, in case the
+ message is a signal. This item is never transported from kernel to
+ userspace. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on matching and bloom filters.
+ <programlisting>
+struct kdbus_bloom_filter {
+ __u64 generation;
+ __u64 data[0];
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_MASK</constant></term>
+ <listitem><para>
+ Transports a <emphasis>bloom mask</emphasis> as binary data blob
+ stored in <varname>item.data</varname>. This item is used to
+ describe a match into a connection's match database. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on matching and bloom filters.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_DST_NAME</constant></term>
+ <listitem><para>
+ Contains a <emphasis>well-known name</emphasis> to send a
+ message to, as null-terminated string in
+ <varname>item.str</varname>. This item is used with
+ <constant>KDBUS_CMD_SEND</constant>. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on how to send a message.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_MAKE_NAME</constant></term>
+ <listitem><para>
+ Contains a <emphasis>bus name</emphasis> or
+ <emphasis>endpoint name</emphasis>, stored as null-terminated
+ string in <varname>item.str</varname>. This item is sent from
+ userspace to kernel when buses or endpoints are created, and
+ returned back to userspace when the bus creator information is
+ queried. See
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ and
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant></term>
+ <term><constant>KDBUS_ITEM_ATTACH_FLAGS_RECV</constant></term>
+ <listitem><para>
+ Contains a set of <emphasis>attach flags</emphasis> at
+ <emphasis>send</emphasis> or <emphasis>receive</emphasis> time. See
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>,
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry> and
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on attach flags.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ID</constant></term>
+ <listitem><para>
+ Transports a connection's <emphasis>numerical ID</emphasis> of
+ a connection as <type>uint64_t</type> value in
+ <varname>item.id</varname>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME</constant></term>
+ <listitem><para>
+ Transports a name associated with the
+ <emphasis>name registry</emphasis> as null-terminated string as
+ <type>struct kdbus_name</type> in
+ <varname>item.name</varname>. The <varname>flags</varname>
+ contains the flags of the name. See
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on how to access the name registry of a bus.
+ <programlisting>
+struct kdbus_name {
+ __u64 flags;
+ char name[0];
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>Items attached by the kernel as metadata</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_TIMESTAMP</constant></term>
+ <listitem><para>
+ Contains both the <emphasis>monotonic</emphasis> and the
+ <emphasis>realtime</emphasis> timestamp, taken when the message
+ was processed on the kernel side.
+ Stored as <type>struct kdbus_timestamp</type> in
+ <varname>item.timestamp</varname>.
+ <programlisting>
+struct kdbus_timestamp {
+ __u64 seqnum;
+ __u64 monotonic_ns;
+ __u64 realtime_ns;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CREDS</constant></term>
+ <listitem><para>
+ Contains a set of <emphasis>user</emphasis> and
+ <emphasis>group</emphasis> information as 32-bit values, in the
+ usual four flavors: real, effective, saved and filesystem related.
+ Stored as <type>struct kdbus_creds</type> in
+ <varname>item.creds</varname>.
+ <programlisting>
+struct kdbus_creds {
+ __u32 uid;
+ __u32 euid;
+ __u32 suid;
+ __u32 fsuid;
+ __u32 gid;
+ __u32 egid;
+ __u32 sgid;
+ __u32 fsgid;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PIDS</constant></term>
+ <listitem><para>
+ Contains the <emphasis>PID</emphasis>, <emphasis>TID</emphasis>
+ and <emphasis>parent PID (PPID)</emphasis> of a remote peer.
+ Stored as <type>struct kdbus_pids</type> in
+ <varname>item.pids</varname>.
+ <programlisting>
+struct kdbus_pids {
+ __u64 pid;
+ __u64 tid;
+ __u64 ppid;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_AUXGROUPS</constant></term>
+ <listitem><para>
+ Contains the <emphasis>auxiliary (supplementary) groups</emphasis>
+ a remote peer is a member of, stored as array of
+ <type>uint32_t</type> values in <varname>item.data32</varname>.
+ The array length can be determined by looking at the item's total
+ size, subtracting the size of the header and dividing the
+ remainder by <constant>sizeof(uint32_t)</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_OWNED_NAME</constant></term>
+ <listitem><para>
+ Contains a <emphasis>well-known name</emphasis> currently owned
+ by a connection. The name is stored as null-terminated string in
+ <varname>item.str</varname>. Its length can also be derived from
+ the item's total size.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_TID_COMM</constant> [*]</term>
+ <listitem><para>
+ Contains the <emphasis>comm</emphasis> string of a task's
+ <emphasis>TID</emphasis> (thread ID), stored as null-terminated
+ string in <varname>item.str</varname>. Its length can also be
+ derived from the item's total size. Receivers of this item should
+ not use its contents for any kind of security measures. See below.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PID_COMM</constant> [*]</term>
+ <listitem><para>
+ Contains the <emphasis>comm</emphasis> string of a task's
+ <emphasis>PID</emphasis> (process ID), stored as null-terminated
+ string in <varname>item.str</varname>. Its length can also be
+ derived from the item's total size. Receivers of this item should
+ not use its contents for any kind of security measures. See below.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_EXE</constant> [*]</term>
+ <listitem><para>
+ Contains the <emphasis>path to the executable</emphasis> of a task,
+ stored as null-terminated string in <varname>item.str</varname>. Its
+ length can also be derived from the item's total size. Receivers of
+ this item should not use its contents for any kind of security
+ measures. See below.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CMDLINE</constant> [*]</term>
+ <listitem><para>
+ Contains the <emphasis>command line arguments</emphasis> of a
+ task, stored as an <emphasis>array</emphasis> of null-terminated
+ strings in <varname>item.str</varname>. The total length of all
+ strings in the array can be derived from the item's total size.
+ Receivers of this item should not use its contents for any kind
+ of security measures. See below.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CGROUP</constant></term>
+ <listitem><para>
+ Contains the <emphasis>cgroup path</emphasis> of a task, stored
+ as null-terminated string in <varname>item.str</varname>. Its
+ length can also be derived from the item's total size.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CAPS</constant></term>
+ <listitem><para>
+ Contains sets of <emphasis>capabilities</emphasis>, stored as
+ <type>struct kdbus_caps</type> in <varname>item.caps</varname>.
+ As the item size may increase in the future, programs should be
+ written in a way that it takes
+ <varname>item.caps.last_cap</varname> into account, and derive
+ the number of sets and rows from the item size and the reported
+ number of valid capability bits.
+ <programlisting>
+struct kdbus_caps {
+ __u32 last_cap;
+ __u32 caps[0];
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_SECLABEL</constant></term>
+ <listitem><para>
+ Contains the <emphasis>LSM label</emphasis> of a task, stored as
+ null-terminated string in <varname>item.str</varname>. Its length
+ can also be derived from the item's total size.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_AUDIT</constant></term>
+ <listitem><para>
+ Contains the audit <emphasis>sessionid</emphasis> and
+ <emphasis>loginuid</emphasis> of a task, stored as
+ <type>struct kdbus_audit</type> in
+ <varname>item.audit</varname>.
+ <programlisting>
+struct kdbus_audit {
+ __u32 sessionid;
+ __u32 loginuid;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CONN_DESCRIPTION</constant></term>
+ <listitem><para>
+ Contains the <emphasis>connection description</emphasis>, as set
+ by <constant>KDBUS_CMD_HELLO</constant> or
+ <constant>KDBUS_CMD_CONN_UPDATE</constant>, stored as
+ null-terminated string in <varname>item.str</varname>. Its length
+ can also be derived from the item's total size.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ All metadata is automatically translated into the
+ <emphasis>namespaces</emphasis> of the task that receives them. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+ </para>
+
+ <para>
+ [*] Note that the content stored in metadata items of type
+ <constant>KDBUS_ITEM_TID_COMM</constant>,
+ <constant>KDBUS_ITEM_PID_COMM</constant>,
+ <constant>KDBUS_ITEM_EXE</constant> and
+ <constant>KDBUS_ITEM_CMDLINE</constant>
+ can easily be tampered by the sending tasks. Therefore, they should
+ <emphasis>not</emphasis> be used for any sort of security relevant
+ assumptions. The only reason they are transmitted is to let
+ receivers know about details that were set when metadata was
+ collected, even though the task they were collected from is not
+ active any longer when the items are received.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Items used for policy entries, matches and notifications</title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_POLICY_ACCESS</constant></term>
+ <listitem><para>
+ This item describes a <emphasis>policy access</emphasis> entry to
+ access the policy database of a
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry> or
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ Please refer to
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on the policy database and how to access it.
+ <programlisting>
+struct kdbus_policy_access {
+ __u64 type;
+ __u64 access;
+ __u64 id;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ID_ADD</constant></term>
+ <term><constant>KDBUS_ITEM_ID_REMOVE</constant></term>
+ <listitem><para>
+ This item is sent as attachment to a
+ <emphasis>kernel notification</emphasis> and indicates that a
+ new connection was created on the bus, or that a connection was
+ disconnected, respectively. It stores a
+ <type>struct kdbus_notify_id_change</type> in
+ <varname>item.id_change</varname>.
+ The <varname>id</varname> field contains the numeric ID of the
+ connection that was added or removed, and <varname>flags</varname>
+ is set to the connection flags, as passed by
+ <constant>KDBUS_CMD_HELLO</constant>. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ and
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on matches and notification messages.
+ <programlisting>
+struct kdbus_notify_id_change {
+ __u64 id;
+ __u64 flags;
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME_ADD</constant></term>
+ <term><constant>KDBUS_ITEM_NAME_REMOVE</constant></term>
+ <term><constant>KDBUS_ITEM_NAME_CHANGE</constant></term>
+ <listitem><para>
+ This item is sent as attachment to a
+ <emphasis>kernel notification</emphasis> and indicates that a
+ <emphasis>well-known name</emphasis> appeared, disappeared or
+ transferred to another owner on the bus. It stores a
+ <type>struct kdbus_notify_name_change</type> in
+ <varname>item.name_change</varname>.
+ <varname>old_id</varname> describes the former owner of the name
+ and is set to <constant>0</constant> values in case of
+ <constant>KDBUS_ITEM_NAME_ADD</constant>.
+ <varname>new_id</varname> describes the new owner of the name and
+ is set to <constant>0</constant> values in case of
+ <constant>KDBUS_ITEM_NAME_REMOVE</constant>.
+ The <varname>name</varname> field contains the well-known name the
+ notification is about, as null-terminated string. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ and
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on matches and notification messages.
+ <programlisting>
+struct kdbus_notify_name_change {
+ struct kdbus_notify_id_change old_id;
+ struct kdbus_notify_id_change new_id;
+ char name[0];
+};
+ </programlisting>
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_REPLY_TIMEOUT</constant></term>
+ <listitem><para>
+ This item is sent as attachment to a
+ <emphasis>kernel notification</emphasis>. It informs the receiver
+ that an expected reply to a message was not received in time.
+ The remote peer ID and the message cookie are stored in the message
+ header. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information about messages, timeouts and notifications.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_REPLY_DEAD</constant></term>
+ <listitem><para>
+ This item is sent as attachment to a
+ <emphasis>kernel notification</emphasis>. It informs the receiver
+ that a remote connection a reply is expected from was disconnected
+ before that reply was sent. The remote peer ID and the message
+ cookie are stored in the message header. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information about messages, timeouts and notifications.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>memfd_create</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+
+</refentry>
diff --git a/Documentation/kdbus/kdbus.match.xml b/Documentation/kdbus/kdbus.match.xml
new file mode 100644
index 000000000..ae38e04ab
--- /dev/null
+++ b/Documentation/kdbus/kdbus.match.xml
@@ -0,0 +1,555 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.match">
+
+ <refentryinfo>
+ <title>kdbus.match</title>
+ <productname>kdbus.match</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.match</refname>
+ <refpurpose>kdbus match</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ kdbus connections can install matches in order to subscribe to signal
+ messages sent on the bus. Such signal messages can be either directed
+ to a single connection (by setting a specific connection ID in
+ <varname>struct kdbus_msg.dst_id</varname> or by sending it to a
+ well-known name), or to potentially <emphasis>all</emphasis> currently
+ active connections on the bus (by setting
+ <varname>struct kdbus_msg.dst_id</varname> to
+ <constant>KDBUS_DST_ID_BROADCAST</constant>).
+ A signal message always has the <constant>KDBUS_MSG_SIGNAL</constant>
+ bit set in the <varname>flags</varname> bitfield.
+ Also, signal messages can originate from either the kernel (called
+ <emphasis>notifications</emphasis>), or from other bus connections.
+ In either case, a bus connection needs to have a suitable
+ <emphasis>match</emphasis> installed in order to receive any signal
+ message. Without any rules installed in the connection, no signal message
+ will be received.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Matches for signal messages from other connections</title>
+ <para>
+ Matches for messages from other connections (not kernel notifications)
+ are implemented as bloom filters (see below). The sender adds certain
+ properties of the message as elements to a bloom filter bit field, and
+ sends that along with the signal message.
+
+ The receiving connection adds the message properties it is interested in
+ as elements to a bloom mask bit field, and uploads the mask as match rule,
+ possibly along with some other rules to further limit the match.
+
+ The kernel will match the signal message's bloom filter against the
+ connection's bloom mask (simply by &amp;-ing it), and will decide whether
+ the message should be delivered to a connection.
+ </para>
+ <para>
+ The kernel has no notion of any specific properties of the signal message,
+ all it sees are the bit fields of the bloom filter and the mask to match
+ against. The use of bloom filters allows simple and efficient matching,
+ without exposing any message properties or internals to the kernel side.
+ Clients need to deal with the fact that they might receive signal messages
+ which they did not subscribe to, as the bloom filter might allow
+ false-positives to pass the filter.
+
+ To allow the future extension of the set of elements in the bloom filter,
+ the filter specifies a <emphasis>generation</emphasis> number. A later
+ generation must always contain all elements of the set of the previous
+ generation, but can add new elements to the set. The match rules mask can
+ carry an array with all previous generations of masks individually stored.
+ When the filter and mask are matched by the kernel, the mask with the
+ closest matching generation is selected as the index into the mask array.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Bloom filters</title>
+ <para>
+ Bloom filters allow checking whether a given word is present in a
+ dictionary. This allows connections to set up a mask for information it
+ is interested in, and will be delivered signal messages that have a
+ matching filter.
+
+ For general information, see
+ <ulink url="https://en.wikipedia.org/wiki/Bloom_filter">the Wikipedia
+ article on bloom filters</ulink>.
+ </para>
+ <para>
+ The size of the bloom filter is defined per bus when it is created, in
+ <varname>kdbus_bloom_parameter.size</varname>. All bloom filters attached
+ to signal messages on the bus must match this size, and all bloom filter
+ matches uploaded by connections must also match the size, or a multiple
+ thereof (see below).
+
+ The calculation of the mask has to be done in userspace applications. The
+ kernel just checks the bitmasks to decide whether or not to let the
+ message pass. All bits in the mask must match the filter in and bit-wise
+ <emphasis>AND</emphasis> logic, but the mask may have more bits set than
+ the filter. Consequently, false positive matches are expected to happen,
+ and programs must deal with that fact by checking the contents of the
+ payload again at receive time.
+ </para>
+ <para>
+ Masks are entities that are always passed to the kernel as part of a
+ match (with an item of type <constant>KDBUS_ITEM_BLOOM_MASK</constant>),
+ and filters can be attached to signals, with an item of type
+ <constant>KDBUS_ITEM_BLOOM_FILTER</constant>. For a filter to match, all
+ its bits have to be set in the match mask as well.
+ </para>
+ <para>
+ For example, consider a bus that has a bloom size of 8 bytes, and the
+ following mask/filter combinations:
+ </para>
+ <programlisting><![CDATA[
+ filter 0x0101010101010101
+ mask 0x0101010101010101
+ -> matches
+
+ filter 0x0303030303030303
+ mask 0x0101010101010101
+ -> doesn't match
+
+ filter 0x0101010101010101
+ mask 0x0303030303030303
+ -> matches
+ ]]></programlisting>
+
+ <para>
+ Hence, in order to catch all messages, a mask filled with
+ <constant>0xff</constant> bytes can be installed as a wildcard match rule.
+ </para>
+
+ <refsect2>
+ <title>Generations</title>
+
+ <para>
+ Uploaded matches may contain multiple masks, which have to be as large
+ as the bloom filter size defined by the bus. Each block of a mask is
+ called a <emphasis>generation</emphasis>, starting at index 0.
+
+ At match time, when a signal is about to be delivered, a bloom mask
+ generation is passed, which denotes which of the bloom masks the filter
+ should be matched against. This allows programs to provide backward
+ compatible masks at upload time, while older clients can still match
+ against older versions of filters.
+ </para>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>Matches for kernel notifications</title>
+ <para>
+ To receive kernel generated notifications (see
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>),
+ a connection must install match rules that are different from
+ the bloom filter matches described in the section above. They can be
+ filtered by the connection ID that caused the notification to be sent, by
+ one of the names it currently owns, or by the type of the notification
+ (ID/name add/remove/change).
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Adding a match</title>
+ <para>
+ To add a match, the <constant>KDBUS_CMD_MATCH_ADD</constant> ioctl is
+ used, which takes a <type>struct kdbus_cmd_match</type> as an argument
+ described below.
+
+ Note that each of the items attached to this command will internally
+ create one match <emphasis>rule</emphasis>, and the collection of them,
+ which is submitted as one block via the ioctl, is called a
+ <emphasis>match</emphasis>. To allow a message to pass, all rules of a
+ match have to be satisfied. Hence, adding more items to the command will
+ only narrow the possibility of a match to effectively let the message
+ pass, and will decrease the chance that the connection's process will be
+ woken up needlessly.
+
+ Multiple matches can be installed per connection. As long as one of it has
+ a set of rules which allows the message to pass, this one will be
+ decisive.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd_match {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ __u64 cookie;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>Flags to control the behavior of the ioctl.</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_MATCH_REPLACE</constant></term>
+ <listitem>
+ <para>Make the endpoint file group-accessible</para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Requests a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will return
+ <errorcode>0</errorcode>, and the <varname>flags</varname>
+ field will have all bits set that are valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>cookie</varname></term>
+ <listitem><para>
+ A cookie which identifies the match, so it can be referred to when
+ removing it.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Items to define the actual rules of the matches. The following item
+ types are expected. Each item will create one new match rule.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_MASK</constant></term>
+ <listitem>
+ <para>
+ An item that carries the bloom filter mask to match against
+ in its data field. The payload size must match the bloom
+ filter size that was specified when the bus was created.
+ See the "Bloom filters" section above for more information on
+ bloom filters.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME</constant></term>
+ <listitem>
+ <para>
+ When used as part of kernel notifications, this item specifies
+ a name that is acquired, lost or that changed its owner (see
+ below). When used as part of a match for user-generated signal
+ messages, it specifies a name that the sending connection must
+ own at the time of sending the signal.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ID</constant></term>
+ <listitem>
+ <para>
+ Specify a sender connection's ID that will match this rule.
+ For kernel notifications, this specifies the ID of a
+ connection that was added to or removed from the bus.
+ For used-generated signals, it specifies the ID of the
+ connection that sent the signal message.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NAME_ADD</constant></term>
+ <term><constant>KDBUS_ITEM_NAME_REMOVE</constant></term>
+ <term><constant>KDBUS_ITEM_NAME_CHANGE</constant></term>
+ <listitem>
+ <para>
+ These items request delivery of kernel notifications that
+ describe a name acquisition, loss, or change. The details
+ are stored in the item's
+ <varname>kdbus_notify_name_change</varname> member.
+ All information specified must be matched in order to make
+ the message pass. Use
+ <constant>KDBUS_MATCH_ID_ANY</constant> to
+ match against any unique connection ID.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_ID_ADD</constant></term>
+ <term><constant>KDBUS_ITEM_ID_REMOVE</constant></term>
+ <listitem>
+ <para>
+ These items request delivery of kernel notifications that are
+ generated when a connection is created or terminated.
+ <type>struct kdbus_notify_id_change</type> is used to
+ store the actual match information. This item can be used to
+ monitor one particular connection ID, or, when the ID field
+ is set to <constant>KDBUS_MATCH_ID_ANY</constant>,
+ all of them.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_NEGOTIATE</constant></term>
+ <listitem><para>
+ With this item, programs can <emphasis>probe</emphasis> the
+ kernel for known item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Refer to
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on message types.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Removing a match</title>
+ <para>
+ Matches can be removed with the
+ <constant>KDBUS_CMD_MATCH_REMOVE</constant> ioctl, which takes
+ <type>struct kdbus_cmd_match</type> as argument, but its fields
+ usage slightly differs compared to that of
+ <constant>KDBUS_CMD_MATCH_ADD</constant>.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd_match {
+ __u64 size;
+ __u64 cookie;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>cookie</varname></term>
+ <listitem><para>
+ The cookie of the match, as it was passed when the match was added.
+ All matches that have this cookie will be removed.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ No flags are supported for this use case.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will fail with
+ <errorcode>-1</errorcode>, <varname>errno</varname> is set to
+ <constant>EPROTO</constant>, and the <varname>flags</varname> field
+ is set to <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ No items are supported for this use case, but
+ <constant>KDBUS_ITEM_NEGOTIATE</constant> is allowed nevertheless.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_MATCH_ADD</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Illegal flags or items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EDOM</constant></term>
+ <listitem><para>
+ Illegal bloom filter size.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EMFILE</constant></term>
+ <listitem><para>
+ Too many matches for this connection.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_MATCH_REMOVE</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Illegal flags.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EBADSLT</constant></term>
+ <listitem><para>
+ A match entry with the given cookie could not be found.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.message.xml b/Documentation/kdbus/kdbus.message.xml
new file mode 100644
index 000000000..0115d9d50
--- /dev/null
+++ b/Documentation/kdbus/kdbus.message.xml
@@ -0,0 +1,1276 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.message">
+
+ <refentryinfo>
+ <title>kdbus.message</title>
+ <productname>kdbus.message</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.message</refname>
+ <refpurpose>kdbus message</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ A kdbus message is used to exchange information between two connections
+ on a bus, or to transport notifications from the kernel to one or many
+ connections. This document describes the layout of messages, how payload
+ is added to them and how they are sent and received.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Message layout</title>
+
+ <para>The layout of a message is shown below.</para>
+
+ <programlisting>
+ +-------------------------------------------------------------------------+
+ | Message |
+ | +---------------------------------------------------------------------+ |
+ | | Header | |
+ | | size: overall message size, including the data records | |
+ | | destination: connection ID of the receiver | |
+ | | source: connection ID of the sender (set by kernel) | |
+ | | payload_type: "DBusDBus" textual identifier stored as uint64_t | |
+ | +---------------------------------------------------------------------+ |
+ | +---------------------------------------------------------------------+ |
+ | | Data Record | |
+ | | size: overall record size (without padding) | |
+ | | type: type of data | |
+ | | data: reference to data (address or file descriptor) | |
+ | +---------------------------------------------------------------------+ |
+ | +---------------------------------------------------------------------+ |
+ | | padding bytes to the next 8 byte alignment | |
+ | +---------------------------------------------------------------------+ |
+ | +---------------------------------------------------------------------+ |
+ | | Data Record | |
+ | | size: overall record size (without padding) | |
+ | | ... | |
+ | +---------------------------------------------------------------------+ |
+ | +---------------------------------------------------------------------+ |
+ | | padding bytes to the next 8 byte alignment | |
+ | +---------------------------------------------------------------------+ |
+ | +---------------------------------------------------------------------+ |
+ | | Data Record | |
+ | | size: overall record size | |
+ | | ... | |
+ | +---------------------------------------------------------------------+ |
+ | ... further data records ... |
+ +-------------------------------------------------------------------------+
+ </programlisting>
+ </refsect1>
+
+ <refsect1>
+ <title>Message payload</title>
+
+ <para>
+ When connecting to the bus, receivers request a memory pool of a given
+ size, large enough to carry all backlog of data enqueued for the
+ connection. The pool is internally backed by a shared memory file which
+ can be <function>mmap()</function>ed by the receiver. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+ </para>
+
+ <para>
+ Message payload must be described in items attached to a message when
+ it is sent. A receiver can access the payload by looking at the items
+ that are attached to a message in its pool. The following items are used.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PAYLOAD_VEC</constant></term>
+ <listitem>
+ <para>
+ This item references a piece of memory on the sender side which is
+ directly copied into the receiver's pool. This way, two peers can
+ exchange data by effectively doing a single-copy from one process
+ to another; the kernel will not buffer the data anywhere else.
+ This item is never found in a message received by a connection.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PAYLOAD_OFF</constant></term>
+ <listitem>
+ <para>
+ This item is attached to messages on the receiving side and points
+ to a memory area inside the receiver's pool. The
+ <varname>offset</varname> variable in the item denotes the memory
+ location relative to the message itself.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant></term>
+ <listitem>
+ <para>
+ Messages can reference <emphasis>memfd</emphasis> files which
+ contain the data. memfd files are tmpfs-backed files that allow
+ sealing of the content of the file, which prevents all writable
+ access to the file content.
+ </para>
+ <para>
+ Only memfds that have
+ <constant>(F_SEAL_SHRINK|F_SEAL_GROW|F_SEAL_WRITE|F_SEAL_SEAL)
+ </constant>
+ set are accepted as payload data, which enforces reliable passing of
+ data. The receiver can assume that neither the sender nor anyone
+ else can alter the content after the message is sent. If those
+ seals are not set on the memfd, the ioctl will fail with
+ <errorcode>-1</errorcode>, and <varname>errno</varname> will be
+ set to <constant>ETXTBUSY</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_FDS</constant></term>
+ <listitem>
+ <para>
+ Messages can transport regular file descriptors via
+ <constant>KDBUS_ITEM_FDS</constant>. This item carries an array
+ of <type>int</type> values in <varname>item.fd</varname>. The
+ maximum number of file descriptors in the item is
+ <constant>253</constant>, and only one item of this type is
+ accepted per message. All passed values must be valid file
+ descriptors; the open count of each file descriptors is increased
+ by installing it to the receiver's task. This item can only be
+ used for directed messages, not for broadcasts, and only to
+ remote peers that have opted-in for receiving file descriptors
+ at connection time (<constant>KDBUS_HELLO_ACCEPT_FD</constant>).
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The sender must not make any assumptions on the type in which data is
+ received by the remote peer. The kernel is free to re-pack multiple
+ <constant>KDBUS_ITEM_PAYLOAD_VEC</constant> and
+ <constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant> payloads. For instance, the
+ kernel may decide to merge multiple <constant>VECs</constant> into a
+ single <constant>VEC</constant>, inline <constant>MEMFD</constant>
+ payloads into memory, or merge all passed <constant>VECs</constant> into a
+ single <constant>MEMFD</constant>. However, the kernel preserves the order
+ of passed data. This means that the order of all <constant>VEC</constant>
+ and <constant>MEMFD</constant> items is not changed in respect to each
+ other. In other words: All passed <constant>VEC</constant> and
+ <constant>MEMFD</constant> data payloads are treated as a single stream
+ of data that may be received by the remote peer in a different set of
+ chunks than it was sent as.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Sending messages</title>
+
+ <para>
+ Messages are passed to the kernel with the
+ <constant>KDBUS_CMD_SEND</constant> ioctl. Depending on the destination
+ address of the message, the kernel delivers the message to the specific
+ destination connection, or to some subset of all connections on the same
+ bus. Sending messages across buses is not possible. Messages are always
+ queued in the memory pool of the destination connection (see above).
+ </para>
+
+ <para>
+ The <constant>KDBUS_CMD_SEND</constant> ioctl uses a
+ <type>struct kdbus_cmd_send</type> to describe the message
+ transfer.
+ </para>
+ <programlisting>
+struct kdbus_cmd_send {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ __u64 msg_address;
+ struct kdbus_msg_info reply;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>Flags for message delivery</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_SEND_SYNC_REPLY</constant></term>
+ <listitem>
+ <para>
+ By default, all calls to kdbus are considered asynchronous,
+ non-blocking. However, as there are many use cases that need
+ to wait for a remote peer to answer a method call, there's a
+ way to send a message and wait for a reply in a synchronous
+ fashion. This is what the
+ <constant>KDBUS_SEND_SYNC_REPLY</constant> controls. The
+ <constant>KDBUS_CMD_SEND</constant> ioctl will block until the
+ reply has arrived, the timeout limit is reached, in case the
+ remote connection was shut down, or if interrupted by a signal
+ before any reply; see
+ <citerefentry>
+ <refentrytitle>signal</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+
+ The offset of the reply message in the sender's pool is stored
+ in <varname>reply</varname> when the ioctl has returned without
+ error. Hence, there is no need for another
+ <constant>KDBUS_CMD_RECV</constant> ioctl or anything else to
+ receive the reply.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Request a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will fail with
+ <errorcode>-1</errorcode>, <varname>errno</varname>
+ is set to <constant>EPROTO</constant>.
+ Once the ioctl returned, the <varname>flags</varname>
+ field will have all bits set that the kernel recognizes as
+ valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>msg_address</varname></term>
+ <listitem><para>
+ In this field, users have to provide a pointer to a message
+ (<type>struct kdbus_msg</type>) to send. See below for a
+ detailed description.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>reply</varname></term>
+ <listitem><para>
+ Only used for synchronous replies. See description of
+ <type>struct kdbus_cmd_recv</type> for more details.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ The following items are currently recognized.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_CANCEL_FD</constant></term>
+ <listitem>
+ <para>
+ When this optional item is passed in, and the call is
+ executed as SYNC call, the passed in file descriptor can be
+ used as alternative cancellation point. The kernel will call
+ <citerefentry>
+ <refentrytitle>poll</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ on this file descriptor, and once it reports any incoming
+ bytes, the blocking send operation will be canceled; the
+ blocking, synchronous ioctl call will return
+ <errorcode>-1</errorcode>, and <varname>errno</varname> will
+ be set to <errorname>ECANCELED</errorname>.
+ Any type of file descriptor on which
+ <citerefentry>
+ <refentrytitle>poll</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ can be called on can be used as payload to this item; for
+ example, an eventfd can be used for this purpose, see
+ <citerefentry>
+ <refentrytitle>eventfd</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>.
+ For asynchronous message sending, this item is allowed but
+ ignored.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The message referenced by the <varname>msg_address</varname> above has
+ the following layout.
+ </para>
+
+ <programlisting>
+struct kdbus_msg {
+ __u64 size;
+ __u64 flags;
+ __s64 priority;
+ __u64 dst_id;
+ __u64 src_id;
+ __u64 payload_type;
+ __u64 cookie;
+ __u64 timeout_ns;
+ __u64 cookie_reply;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>Flags to describe message details.</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_MSG_EXPECT_REPLY</constant></term>
+ <listitem>
+ <para>
+ Expect a reply to this message from the remote peer. With
+ this bit set, the timeout_ns field must be set to a non-zero
+ number of nanoseconds in which the receiving peer is expected
+ to reply. If such a reply is not received in time, the sender
+ will be notified with a timeout message (see below). The
+ value must be an absolute value, in nanoseconds and based on
+ <constant>CLOCK_MONOTONIC</constant>.
+ </para><para>
+ For a message to be accepted as reply, it must be a direct
+ message to the original sender (not a broadcast and not a
+ signal message), and its
+ <varname>kdbus_msg.cookie_reply</varname> must match the
+ previous message's <varname>kdbus_msg.cookie</varname>.
+ </para><para>
+ Expected replies also temporarily open the policy of the
+ sending connection, so the other peer is allowed to respond
+ within the given time window.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_MSG_NO_AUTO_START</constant></term>
+ <listitem>
+ <para>
+ By default, when a message is sent to an activator
+ connection, the activator is notified and will start an
+ implementer. This flag inhibits that behavior. With this bit
+ set, and the remote being an activator, the ioctl will fail
+ with <varname>errno</varname> set to
+ <constant>EADDRNOTAVAIL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Requests a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will return
+ <errorcode>0</errorcode>, and the <varname>flags</varname>
+ field will have all bits set that are valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>priority</varname></term>
+ <listitem><para>
+ The priority of this message. Receiving messages (see below) may
+ optionally be constrained to messages of a minimal priority. This
+ allows for use cases where timing critical data is interleaved with
+ control data on the same connection. If unused, the priority field
+ should be set to <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>dst_id</varname></term>
+ <listitem><para>
+ The numeric ID of the destination connection, or
+ <constant>KDBUS_DST_ID_BROADCAST</constant>
+ (~0ULL) to address every peer on the bus, or
+ <constant>KDBUS_DST_ID_NAME</constant> (0) to look
+ it up dynamically from the bus' name registry.
+ In the latter case, an item of type
+ <constant>KDBUS_ITEM_DST_NAME</constant> is mandatory.
+ Also see
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ .
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>src_id</varname></term>
+ <listitem><para>
+ Upon return of the ioctl, this member will contain the sending
+ connection's numerical ID. Should be 0 at send time.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>payload_type</varname></term>
+ <listitem><para>
+ Type of the payload in the actual data records. Currently, only
+ <constant>KDBUS_PAYLOAD_DBUS</constant> is accepted as input value
+ of this field. When receiving messages that are generated by the
+ kernel (notifications), this field will contain
+ <constant>KDBUS_PAYLOAD_KERNEL</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>cookie</varname></term>
+ <listitem><para>
+ Cookie of this message, for later recognition. Also, when replying
+ to a message (see above), the <varname>cookie_reply</varname>
+ field must match this value.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>timeout_ns</varname></term>
+ <listitem><para>
+ If the message sent requires a reply from the remote peer (see above),
+ this field contains the timeout in absolute nanoseconds based on
+ <constant>CLOCK_MONOTONIC</constant>. Also see
+ <citerefentry>
+ <refentrytitle>clock_gettime</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>cookie_reply</varname></term>
+ <listitem><para>
+ If the message sent is a reply to another message, this field must
+ match the cookie of the formerly received message.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ A dynamically sized list of items to contain additional information.
+ The following items are expected/valid:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_PAYLOAD_VEC</constant></term>
+ <term><constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant></term>
+ <term><constant>KDBUS_ITEM_FDS</constant></term>
+ <listitem>
+ <para>
+ Actual data records containing the payload. See section
+ "Message payload".
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_BLOOM_FILTER</constant></term>
+ <listitem>
+ <para>
+ Bloom filter for matches (see below).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ITEM_DST_NAME</constant></term>
+ <listitem>
+ <para>
+ Well-known name to send this message to. Required if
+ <varname>dst_id</varname> is set to
+ <constant>KDBUS_DST_ID_NAME</constant>.
+ If a connection holding the given name can't be found,
+ the ioctl will fail with <varname>errno</varname> set to
+ <constant>ESRCH</constant> is returned.
+ </para>
+ <para>
+ For messages to a unique name (ID), this item is optional. If
+ present, the kernel will make sure the name owner matches the
+ given unique name. This allows programs to tie the message
+ sending to the condition that a name is currently owned by a
+ certain unique name.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The message will be augmented by the requested metadata items when
+ queued into the receiver's pool. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ and
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on metadata.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Receiving messages</title>
+
+ <para>
+ Messages are received by the client with the
+ <constant>KDBUS_CMD_RECV</constant> ioctl. The endpoint file of the bus
+ supports <function>poll()/epoll()/select()</function>; when new messages
+ are available on the connection's file descriptor,
+ <constant>POLLIN</constant> is reported. For compatibility reasons,
+ <constant>POLLOUT</constant> is always reported as well. Note, however,
+ that the latter does not guarantee that a message can in fact be sent, as
+ this depends on how many pending messages the receiver has in its pool.
+ </para>
+
+ <para>
+ With the <constant>KDBUS_CMD_RECV</constant> ioctl, a
+ <type>struct kdbus_cmd_recv</type> is used.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd_recv {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ __s64 priority;
+ __u64 dropped_msgs;
+ struct kdbus_msg_info msg;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>Flags to control the receive command.</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_RECV_PEEK</constant></term>
+ <listitem>
+ <para>
+ Just return the location of the next message. Do not install
+ file descriptors or anything else. This is usually used to
+ determine the sender of the next queued message.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_RECV_DROP</constant></term>
+ <listitem>
+ <para>
+ Drop the next message without doing anything else with it,
+ and free the pool slice. This a short-cut for
+ <constant>KDBUS_RECV_PEEK</constant> and
+ <constant>KDBUS_CMD_FREE</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_RECV_USE_PRIORITY</constant></term>
+ <listitem>
+ <para>
+ Dequeue the messages ordered by their priority, and filtering
+ them with the priority field (see below).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Request a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will fail with
+ <errorcode>-1</errorcode>, <varname>errno</varname>
+ is set to <constant>EPROTO</constant>.
+ Once the ioctl returned, the <varname>flags</varname>
+ field will have all bits set that the kernel recognizes as
+ valid for this command.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. If the <varname>dropped_msgs</varname>
+ field is non-zero, <constant>KDBUS_RECV_RETURN_DROPPED_MSGS</constant>
+ is set. If a file descriptor could not be installed, the
+ <constant>KDBUS_RECV_RETURN_INCOMPLETE_FDS</constant> flag is set.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>priority</varname></term>
+ <listitem><para>
+ With <constant>KDBUS_RECV_USE_PRIORITY</constant> set in
+ <varname>flags</varname>, messages will be dequeued ordered by their
+ priority, starting with the highest value. Also, messages will be
+ filtered by the value given in this field, so the returned message
+ will at least have the requested priority. If no such message is
+ waiting in the queue, the ioctl will fail, and
+ <varname>errno</varname> will be set to <constant>EAGAIN</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>dropped_msgs</varname></term>
+ <listitem><para>
+ Whenever a message with <constant>KDBUS_MSG_SIGNAL</constant> is sent
+ but cannot be queued on a peer (e.g., as it contains FDs but the peer
+ does not support FDs, or there is no space left in the peer's pool)
+ the 'dropped_msgs' counter of the peer is incremented. On the next
+ RECV ioctl, the 'dropped_msgs' field is copied into the ioctl struct
+ and cleared on the peer. If it was non-zero, the
+ <constant>KDBUS_RECV_RETURN_DROPPED_MSGS</constant> flag will be set
+ in <varname>return_flags</varname>. Note that this will only happen
+ if the ioctl succeeded or failed with <constant>EAGAIN</constant>. In
+ other error cases, the 'dropped_msgs' field of the peer is left
+ untouched.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>msg</varname></term>
+ <listitem><para>
+ Embedded struct containing information on the received message when
+ this command succeeded (see below).
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem><para>
+ Items to specify further details for the receive command.
+ Currently unused, and all items will be rejected with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Both <type>struct kdbus_cmd_recv</type> and
+ <type>struct kdbus_cmd_send</type> embed
+ <type>struct kdbus_msg_info</type>.
+ For the <constant>KDBUS_CMD_SEND</constant> ioctl, it is used to catch
+ synchronous replies, if one was requested, and is unused otherwise.
+ </para>
+
+ <programlisting>
+struct kdbus_msg_info {
+ __u64 offset;
+ __u64 msg_size;
+ __u64 return_flags;
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>offset</varname></term>
+ <listitem><para>
+ Upon return of the ioctl, this field contains the offset in the
+ receiver's memory pool. The memory must be freed with
+ <constant>KDBUS_CMD_FREE</constant>. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further details.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>msg_size</varname></term>
+ <listitem><para>
+ Upon successful return of the ioctl, this field contains the size of
+ the allocated slice at offset <varname>offset</varname>.
+ It is the combination of the size of the stored
+ <type>struct kdbus_msg</type> object plus all appended VECs.
+ You can use it in combination with <varname>offset</varname> to map
+ a single message, instead of mapping the entire pool. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for further details.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem>
+ <para>
+ Kernel-provided return flags. Currently, the following flags are
+ defined.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_RECV_RETURN_INCOMPLETE_FDS</constant></term>
+ <listitem>
+ <para>
+ The message contained memfds or file descriptors, and the
+ kernel failed to install one or more of them at receive time.
+ Most probably that happened because the maximum number of
+ file descriptors for the receiver's task were exceeded.
+ In such cases, the message is still delivered, so this is not
+ a fatal condition. File descriptors numbers inside the
+ <constant>KDBUS_ITEM_FDS</constant> item or memfd files
+ referenced by <constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant>
+ items which could not be installed will be set to
+ <constant>-1</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Unless <constant>KDBUS_RECV_DROP</constant> was passed, the
+ <varname>offset</varname> field contains the location of the new message
+ inside the receiver's pool after the <constant>KDBUS_CMD_RECV</constant>
+ ioctl was employed. The message is stored as <type>struct kdbus_msg</type>
+ at this offset, and can be interpreted with the semantics described above.
+ </para>
+ <para>
+ Also, if the connection allowed for file descriptor to be passed
+ (<constant>KDBUS_HELLO_ACCEPT_FD</constant>), and if the message contained
+ any, they will be installed into the receiving process when the
+ <constant>KDBUS_CMD_RECV</constant> ioctl is called.
+ <emphasis>memfds</emphasis> may always be part of the message payload.
+ The receiving task is obliged to close all file descriptors appropriately
+ once no longer needed. If <constant>KDBUS_RECV_PEEK</constant> is set, no
+ file descriptors are installed. This allows for peeking at a message,
+ looking at its metadata only and dropping it via
+ <constant>KDBUS_RECV_DROP</constant>, without installing any of the file
+ descriptors into the receiving process.
+ </para>
+ <para>
+ The caller is obliged to call the <constant>KDBUS_CMD_FREE</constant>
+ ioctl with the returned offset when the memory is no longer needed.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Notifications</title>
+ <para>
+ A kernel notification is a regular kdbus message with the following
+ details.
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ kdbus_msg.src_id == <constant>KDBUS_SRC_ID_KERNEL</constant>
+ </para></listitem>
+ <listitem><para>
+ kdbus_msg.dst_id == <constant>KDBUS_DST_ID_BROADCAST</constant>
+ </para></listitem>
+ <listitem><para>
+ kdbus_msg.payload_type == <constant>KDBUS_PAYLOAD_KERNEL</constant>
+ </para></listitem>
+ <listitem><para>
+ Has exactly one of the items attached that are described below.
+ </para></listitem>
+ <listitem><para>
+ Always has a timestamp item (<constant>KDBUS_ITEM_TIMESTAMP</constant>)
+ attached.
+ </para></listitem>
+ </itemizedlist>
+
+ <para>
+ The kernel will notify its users of the following events.
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ When connection <emphasis>A</emphasis> is terminated while connection
+ <emphasis>B</emphasis> is waiting for a reply from it, connection
+ <emphasis>B</emphasis> is notified with a message with an item of
+ type <constant>KDBUS_ITEM_REPLY_DEAD</constant>.
+ </para></listitem>
+
+ <listitem><para>
+ When connection <emphasis>A</emphasis> does not receive a reply from
+ connection <emphasis>B</emphasis> within the specified timeout window,
+ connection <emphasis>A</emphasis> will receive a message with an
+ item of type <constant>KDBUS_ITEM_REPLY_TIMEOUT</constant>.
+ </para></listitem>
+
+ <listitem><para>
+ When an ordinary connection (not a monitor) is created on or removed
+ from a bus, messages with an item of type
+ <constant>KDBUS_ITEM_ID_ADD</constant> or
+ <constant>KDBUS_ITEM_ID_REMOVE</constant>, respectively, are delivered
+ to all bus members that match these messages through their match
+ database. Eavesdroppers (monitor connections) do not cause such
+ notifications to be sent. They are invisible on the bus.
+ </para></listitem>
+
+ <listitem><para>
+ When a connection gains or loses ownership of a name, messages with an
+ item of type <constant>KDBUS_ITEM_NAME_ADD</constant>,
+ <constant>KDBUS_ITEM_NAME_REMOVE</constant> or
+ <constant>KDBUS_ITEM_NAME_CHANGE</constant> are delivered to all bus
+ members that match these messages through their match database.
+ </para></listitem>
+ </itemizedlist>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_SEND</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EOPNOTSUPP</constant></term>
+ <listitem><para>
+ The connection is not an ordinary connection, or the passed
+ file descriptors in <constant>KDBUS_ITEM_FDS</constant> item are
+ either kdbus handles or unix domain sockets. Both are currently
+ unsupported.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ The submitted payload type is
+ <constant>KDBUS_PAYLOAD_KERNEL</constant>,
+ <constant>KDBUS_MSG_EXPECT_REPLY</constant> was set without timeout
+ or cookie values, <constant>KDBUS_SEND_SYNC_REPLY</constant> was
+ set without <constant>KDBUS_MSG_EXPECT_REPLY</constant>, an invalid
+ item was supplied, <constant>src_id</constant> was non-zero and was
+ different from the current connection's ID, a supplied memfd had a
+ size of 0, or a string was not properly null-terminated.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ENOTUNIQ</constant></term>
+ <listitem><para>
+ The supplied destination is
+ <constant>KDBUS_DST_ID_BROADCAST</constant> and either
+ file descriptors were passed, or
+ <constant>KDBUS_MSG_EXPECT_REPLY</constant> was set,
+ or a timeout was given.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>E2BIG</constant></term>
+ <listitem><para>
+ Too many items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EMSGSIZE</constant></term>
+ <listitem><para>
+ The size of the message header and items or the payload vector
+ is excessive.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EEXIST</constant></term>
+ <listitem><para>
+ Multiple <constant>KDBUS_ITEM_FDS</constant>,
+ <constant>KDBUS_ITEM_BLOOM_FILTER</constant> or
+ <constant>KDBUS_ITEM_DST_NAME</constant> items were supplied.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EBADF</constant></term>
+ <listitem><para>
+ The supplied <constant>KDBUS_ITEM_FDS</constant> or
+ <constant>KDBUS_ITEM_PAYLOAD_MEMFD</constant> items
+ contained an illegal file descriptor.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EMEDIUMTYPE</constant></term>
+ <listitem><para>
+ The supplied memfd is not a sealed kdbus memfd.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EMFILE</constant></term>
+ <listitem><para>
+ Too many file descriptors inside a
+ <constant>KDBUS_ITEM_FDS</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EBADMSG</constant></term>
+ <listitem><para>
+ An item had illegal size, both a <constant>dst_id</constant> and a
+ <constant>KDBUS_ITEM_DST_NAME</constant> was given, or both a name
+ and a bloom filter was given.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ETXTBSY</constant></term>
+ <listitem><para>
+ The supplied kdbus memfd file cannot be sealed or the seal
+ was removed, because it is shared with other processes or
+ still mapped with
+ <citerefentry>
+ <refentrytitle>mmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ECOMM</constant></term>
+ <listitem><para>
+ A peer does not accept the file descriptors addressed to it.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EFAULT</constant></term>
+ <listitem><para>
+ The supplied bloom filter size was not 64-bit aligned, or supplied
+ memory could not be accessed by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EDOM</constant></term>
+ <listitem><para>
+ The supplied bloom filter size did not match the bloom filter
+ size of the bus.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EDESTADDRREQ</constant></term>
+ <listitem><para>
+ <constant>dst_id</constant> was set to
+ <constant>KDBUS_DST_ID_NAME</constant>, but no
+ <constant>KDBUS_ITEM_DST_NAME</constant> was attached.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ESRCH</constant></term>
+ <listitem><para>
+ The name to look up was not found in the name registry.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EADDRNOTAVAIL</constant></term>
+ <listitem><para>
+ <constant>KDBUS_MSG_NO_AUTO_START</constant> was given but the
+ destination connection is an activator.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ENXIO</constant></term>
+ <listitem><para>
+ The passed numeric destination connection ID couldn't be found,
+ or is not connected.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ECONNRESET</constant></term>
+ <listitem><para>
+ The destination connection is no longer active.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ETIMEDOUT</constant></term>
+ <listitem><para>
+ Timeout while synchronously waiting for a reply.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINTR</constant></term>
+ <listitem><para>
+ Interrupted system call while synchronously waiting for a reply.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EPIPE</constant></term>
+ <listitem><para>
+ When sending a message, a synchronous reply from the receiving
+ connection was expected but the connection died before answering.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ENOBUFS</constant></term>
+ <listitem><para>
+ Too many pending messages on the receiver side.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EREMCHG</constant></term>
+ <listitem><para>
+ Both a well-known name and a unique name (ID) was given, but
+ the name is not currently owned by that connection.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EXFULL</constant></term>
+ <listitem><para>
+ The memory pool of the receiver is full.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EREMOTEIO</constant></term>
+ <listitem><para>
+ While synchronously waiting for a reply, the remote peer
+ failed with an I/O error.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_RECV</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EOPNOTSUPP</constant></term>
+ <listitem><para>
+ The connection is not an ordinary connection, or the passed
+ file descriptors are either kdbus handles or unix domain
+ sockets. Both are currently unsupported.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Invalid flags or offset.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EAGAIN</constant></term>
+ <listitem><para>
+ No message found in the queue.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>clock_gettime</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>ioctl</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>poll</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>select</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>epoll</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>eventfd</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>memfd_create</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.name.xml b/Documentation/kdbus/kdbus.name.xml
new file mode 100644
index 000000000..3f5f6a6c5
--- /dev/null
+++ b/Documentation/kdbus/kdbus.name.xml
@@ -0,0 +1,711 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.name">
+
+ <refentryinfo>
+ <title>kdbus.name</title>
+ <productname>kdbus.name</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.name</refname>
+ <refpurpose>kdbus.name</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+ <para>
+ Each
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ instantiates a name registry to resolve well-known names into unique
+ connection IDs for message delivery. The registry will be queried when a
+ message is sent with <varname>kdbus_msg.dst_id</varname> set to
+ <constant>KDBUS_DST_ID_NAME</constant>, or when a registry dump is
+ requested with <constant>KDBUS_CMD_NAME_LIST</constant>.
+ </para>
+
+ <para>
+ All of the below is subject to policy rules for <emphasis>SEE</emphasis>
+ and <emphasis>OWN</emphasis> permissions. See
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Name validity</title>
+ <para>
+ A name has to comply with the following rules in order to be considered
+ valid.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ The name has two or more elements separated by a
+ '<literal>.</literal>' (period) character.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ All elements must contain at least one character.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Each element must only contain the ASCII characters
+ <literal>[A-Z][a-z][0-9]_</literal> and must not begin with a
+ digit.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The name must contain at least one '<literal>.</literal>' (period)
+ character (and thus at least two elements).
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The name must not begin with a '<literal>.</literal>' (period)
+ character.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ The name must not exceed <constant>255</constant> characters in
+ length.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </refsect1>
+
+ <refsect1>
+ <title>Acquiring a name</title>
+ <para>
+ To acquire a name, a client uses the
+ <constant>KDBUS_CMD_NAME_ACQUIRE</constant> ioctl with
+ <type>struct kdbus_cmd</type> as argument.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>Flags to control details in the name acquisition.</para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_NAME_REPLACE_EXISTING</constant></term>
+ <listitem>
+ <para>
+ Acquiring a name that is already present usually fails,
+ unless this flag is set in the call, and
+ <constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant> (see below)
+ was set when the current owner of the name acquired it, or
+ if the current owner is an activator connection (see
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant></term>
+ <listitem>
+ <para>
+ Allow other connections to take over this name. When this
+ happens, the former owner of the connection will be notified
+ of the name loss.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_NAME_QUEUE</constant></term>
+ <listitem>
+ <para>
+ A name that is already acquired by a connection can not be
+ acquired again (unless the
+ <constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant> flag was
+ set during acquisition; see above).
+ However, a connection can put itself in a queue of
+ connections waiting for the name to be released. Once that
+ happens, the first connection in that queue becomes the new
+ owner and is notified accordingly.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Request a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will fail with
+ <errorcode>-1</errorcode>, and <varname>errno</varname>
+ is set to <constant>EPROTO</constant>.
+ Once the ioctl returned, the <varname>flags</varname>
+ field will have all bits set that the kernel recognizes as
+ valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem>
+ <para>
+ Flags returned by the kernel. Currently, the following may be
+ returned by the kernel.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_NAME_IN_QUEUE</constant></term>
+ <listitem>
+ <para>
+ The name was not acquired yet, but the connection was
+ placed in the queue of peers waiting for the name.
+ This can only happen if <constant>KDBUS_NAME_QUEUE</constant>
+ was set in the <varname>flags</varname> member (see above).
+ The connection will receive a name owner change notification
+ once the current owner has given up the name and its
+ ownership was transferred.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Items to submit the name. Currently, one item of type
+ <constant>KDBUS_ITEM_NAME</constant> is expected and allowed, and
+ the contained string must be a valid bus name.
+ <constant>KDBUS_ITEM_NEGOTIATE</constant> may be used to probe for
+ valid item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for a detailed description of how this item is used.
+ </para>
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <errorname>>EINVAL</errorname>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Releasing a name</title>
+ <para>
+ A connection may release a name explicitly with the
+ <constant>KDBUS_CMD_NAME_RELEASE</constant> ioctl. If the connection was
+ an implementer of an activatable name, its pending messages are moved
+ back to the activator. If there are any connections queued up as waiters
+ for the name, the first one in the queue (the oldest entry) will become
+ the new owner. The same happens implicitly for all names once a
+ connection terminates. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on connections.
+ </para>
+ <para>
+ The <constant>KDBUS_CMD_NAME_RELEASE</constant> ioctl uses the same data
+ structure as the acquisition call
+ (<constant>KDBUS_CMD_NAME_ACQUIRE</constant>),
+ but with slightly different field usage.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ Flags to the command. Currently unused.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+ and the <varname>flags</varname> field is set to
+ <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Items to submit the name. Currently, one item of type
+ <constant>KDBUS_ITEM_NAME</constant> is expected and allowed, and
+ the contained string must be a valid bus name.
+ <constant>KDBUS_ITEM_NEGOTIATE</constant> may be used to probe for
+ valid item types. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for a detailed description of how this item is used.
+ </para>
+ <para>
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Dumping the name registry</title>
+ <para>
+ A connection may request a complete or filtered dump of currently active
+ bus names with the <constant>KDBUS_CMD_LIST</constant> ioctl, which
+ takes a <type>struct kdbus_cmd_list</type> as argument.
+ </para>
+
+ <programlisting>
+struct kdbus_cmd_list {
+ __u64 flags;
+ __u64 return_flags;
+ __u64 offset;
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem>
+ <para>
+ Any combination of flags to specify which names should be dumped.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_LIST_UNIQUE</constant></term>
+ <listitem>
+ <para>
+ List the unique (numeric) IDs of the connection, whether it
+ owns a name or not.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_LIST_NAMES</constant></term>
+ <listitem>
+ <para>
+ List well-known names stored in the database which are
+ actively owned by a real connection (not an activator).
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_LIST_ACTIVATORS</constant></term>
+ <listitem>
+ <para>
+ List names that are owned by an activator.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_LIST_QUEUED</constant></term>
+ <listitem>
+ <para>
+ List connections that are not yet owning a name but are
+ waiting for it to become available.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Request a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will fail with
+ <errorcode>-1</errorcode>, and <varname>errno</varname>
+ is set to <constant>EPROTO</constant>.
+ Once the ioctl returned, the <varname>flags</varname>
+ field will have all bits set that the kernel recognizes as
+ valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>offset</varname></term>
+ <listitem><para>
+ When the ioctl returns successfully, the offset to the name registry
+ dump inside the connection's pool will be stored in this field.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The returned list of names is stored in a <type>struct kdbus_list</type>
+ that in turn contains an array of type <type>struct kdbus_info</type>,
+ The array-size in bytes is given as <varname>list_size</varname>.
+ The fields inside <type>struct kdbus_info</type> is described next.
+ </para>
+
+ <programlisting>
+struct kdbus_info {
+ __u64 size;
+ __u64 id;
+ __u64 flags;
+ struct kdbus_item items[0];
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id</varname></term>
+ <listitem><para>
+ The owning connection's unique ID.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ The flags of the owning connection.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem>
+ <para>
+ Items containing the actual name. Currently, one item of type
+ <constant>KDBUS_ITEM_OWNED_NAME</constant> will be attached,
+ including the name's flags. In that item, the flags field of the
+ name may carry the following bits:
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_NAME_ALLOW_REPLACEMENT</constant></term>
+ <listitem>
+ <para>
+ Other connections are allowed to take over this name from the
+ connection that owns it.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_NAME_IN_QUEUE</constant></term>
+ <listitem>
+ <para>
+ When retrieving a list of currently acquired names in the
+ registry, this flag indicates whether the connection
+ actually owns the name or is currently waiting for it to
+ become available.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_NAME_ACTIVATOR</constant></term>
+ <listitem>
+ <para>
+ An activator connection owns a name as a placeholder for an
+ implementer, which is started on demand by programs as soon
+ as the first message arrives. There's some more information
+ on this topic in
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ .
+ </para>
+ <para>
+ In contrast to
+ <constant>KDBUS_NAME_REPLACE_EXISTING</constant>,
+ when a name is taken over from an activator connection, all
+ the messages that have been queued in the activator
+ connection will be moved over to the new owner. The activator
+ connection will still be tracked for the name and will take
+ control again if the implementer connection terminates.
+ </para>
+ <para>
+ This flag can not be used when acquiring a name, but is
+ implicitly set through <constant>KDBUS_CMD_HELLO</constant>
+ with <constant>KDBUS_HELLO_ACTIVATOR</constant> set in
+ <varname>kdbus_cmd_hello.conn_flags</varname>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_FLAG_NEGOTIATE</constant></term>
+ <listitem>
+ <para>
+ Requests a set of valid flags for this ioctl. When this bit is
+ set, no action is taken; the ioctl will return
+ <errorcode>0</errorcode>, and the <varname>flags</varname>
+ field will have all bits set that are valid for this command.
+ The <constant>KDBUS_FLAG_NEGOTIATE</constant> bit will be
+ cleared by the operation.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ The returned buffer must be freed with the
+ <constant>KDBUS_CMD_FREE</constant> ioctl when the user is finished with
+ it. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_NAME_ACQUIRE</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Illegal command flags, illegal name provided, or an activator
+ tried to acquire a second name.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EPERM</constant></term>
+ <listitem><para>
+ Policy prohibited name ownership.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EALREADY</constant></term>
+ <listitem><para>
+ Connection already owns that name.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EEXIST</constant></term>
+ <listitem><para>
+ The name already exists and can not be taken over.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>E2BIG</constant></term>
+ <listitem><para>
+ The maximum number of well-known names per connection is exhausted.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_NAME_RELEASE</constant>
+ may fail with the following errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Invalid command flags, or invalid name provided.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ESRCH</constant></term>
+ <listitem><para>
+ Name is not found in the registry.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EADDRINUSE</constant></term>
+ <listitem><para>
+ Name is owned by a different connection and can't be released.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_LIST</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Invalid command flags
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>ENOBUFS</constant></term>
+ <listitem><para>
+ No available memory in the connection's pool.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.policy.xml b/Documentation/kdbus/kdbus.policy.xml
new file mode 100644
index 000000000..673241638
--- /dev/null
+++ b/Documentation/kdbus/kdbus.policy.xml
@@ -0,0 +1,406 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.policy">
+
+ <refentryinfo>
+ <title>kdbus.policy</title>
+ <productname>kdbus.policy</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.policy</refname>
+ <refpurpose>kdbus policy</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+
+ <para>
+ A kdbus policy restricts the possibilities of connections to own, see and
+ talk to well-known names. A policy can be associated with a bus (through a
+ policy holder connection) or a custom endpoint. kdbus stores its policy
+ information in a database that can be accessed through the following
+ ioctl commands:
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_CMD_HELLO</constant></term>
+ <listitem><para>
+ When creating, or updating, a policy holder connection. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_CMD_ENDPOINT_MAKE</constant></term>
+ <term><constant>KDBUS_CMD_ENDPOINT_UPDATE</constant></term>
+ <listitem><para>
+ When creating, or updating, a bus custom endpoint. See
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ In all cases, the name and policy access information is stored in items
+ of type <constant>KDBUS_ITEM_NAME</constant> and
+ <constant>KDBUS_ITEM_POLICY_ACCESS</constant>. For this transport, the
+ following rules apply.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ An item of type <constant>KDBUS_ITEM_NAME</constant> must be followed
+ by at least one <constant>KDBUS_ITEM_POLICY_ACCESS</constant> item.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ An item of type <constant>KDBUS_ITEM_NAME</constant> can be followed
+ by an arbitrary number of
+ <constant>KDBUS_ITEM_POLICY_ACCESS</constant> items.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ An arbitrary number of groups of names and access levels can be given.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Names passed in items of type <constant>KDBUS_ITEM_NAME</constant> must
+ comply to the rules of valid kdbus.name. See
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information.
+
+ The payload of an item of type
+ <constant>KDBUS_ITEM_POLICY_ACCESS</constant> is defined by the following
+ struct. For more information on the layout of items, please refer to
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para>
+
+ <programlisting>
+struct kdbus_policy_access {
+ __u64 type;
+ __u64 access;
+ __u64 id;
+};
+ </programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>type</varname></term>
+ <listitem>
+ <para>
+ One of the following.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_POLICY_ACCESS_USER</constant></term>
+ <listitem><para>
+ Grant access to a user with the UID stored in the
+ <varname>id</varname> field.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_POLICY_ACCESS_GROUP</constant></term>
+ <listitem><para>
+ Grant access to a user with the GID stored in the
+ <varname>id</varname> field.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_POLICY_ACCESS_WORLD</constant></term>
+ <listitem><para>
+ Grant access to everyone. The <varname>id</varname> field
+ is ignored.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>access</varname></term>
+ <listitem>
+ <para>
+ The access to grant. One of the following.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_POLICY_SEE</constant></term>
+ <listitem><para>
+ Allow the name to be seen.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_POLICY_TALK</constant></term>
+ <listitem><para>
+ Allow the name to be talked to.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_POLICY_OWN</constant></term>
+ <listitem><para>
+ Allow the name to be owned.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>id</varname></term>
+ <listitem><para>
+ For <constant>KDBUS_POLICY_ACCESS_USER</constant>, stores the UID.
+ For <constant>KDBUS_POLICY_ACCESS_GROUP</constant>, stores the GID.
+ </para></listitem>
+ </varlistentry>
+
+ </variablelist>
+
+ <para>
+ All endpoints of buses have an empty policy database by default.
+ Therefore, unless policy rules are added, all operations will also be
+ denied by default. Also see
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Wildcard names</title>
+ <para>
+ Policy holder connections may upload names that contain the wildcard
+ suffix (<literal>".*"</literal>). Such a policy entry is effective for
+ every well-known name that extends the provided name by exactly one more
+ level.
+
+ For example, the name <literal>foo.bar.*</literal> matches both
+ <literal>"foo.bar.baz"</literal> and
+ <literal>"foo.bar.bazbaz"</literal> are, but not
+ <literal>"foo.bar.baz.baz"</literal>.
+
+ This allows connections to take control over multiple names that the
+ policy holder doesn't need to know about when uploading the policy.
+
+ Such wildcard entries are not allowed for custom endpoints.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Privileged connections</title>
+ <para>
+ The policy database is overruled when action is taken by a privileged
+ connection. Please refer to
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information on what makes a connection privileged.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Examples</title>
+ <para>
+ For instance, a set of policy rules may look like this:
+ </para>
+
+ <programlisting>
+KDBUS_ITEM_NAME: str='org.foo.bar'
+KDBUS_ITEM_POLICY_ACCESS: type=USER, access=OWN, ID=1000
+KDBUS_ITEM_POLICY_ACCESS: type=USER, access=TALK, ID=1001
+KDBUS_ITEM_POLICY_ACCESS: type=WORLD, access=SEE
+
+KDBUS_ITEM_NAME: str='org.blah.baz'
+KDBUS_ITEM_POLICY_ACCESS: type=USER, access=OWN, ID=0
+KDBUS_ITEM_POLICY_ACCESS: type=WORLD, access=TALK
+ </programlisting>
+
+ <para>
+ That means that 'org.foo.bar' may only be owned by UID 1000, but every
+ user on the bus is allowed to see the name. However, only UID 1001 may
+ actually send a message to the connection and receive a reply from it.
+
+ The second rule allows 'org.blah.baz' to be owned by UID 0 only, but
+ every user may talk to it.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>TALK access and multiple well-known names per connection</title>
+ <para>
+ Note that TALK access is checked against all names of a connection. For
+ example, if a connection owns both <constant>'org.foo.bar'</constant> and
+ <constant>'org.blah.baz'</constant>, and the policy database allows
+ <constant>'org.blah.baz'</constant> to be talked to by WORLD, then this
+ permission is also granted to <constant>'org.foo.bar'</constant>. That
+ might sound illogical, but after all, we allow messages to be directed to
+ either the ID or a well-known name, and policy is applied to the
+ connection, not the name. In other words, the effective TALK policy for a
+ connection is the most permissive of all names the connection owns.
+
+ For broadcast messages, the receiver needs TALK permissions to the sender
+ to receive the broadcast.
+ </para>
+ <para>
+ Both the endpoint and the bus policy databases are consulted to allow
+ name registry listing, owning a well-known name and message delivery.
+ If either one fails, the operation is failed with
+ <varname>errno</varname> set to <constant>EPERM</constant>.
+
+ For best practices, connections that own names with a restricted TALK
+ access should not install matches. This avoids cases where the sent
+ message may pass the bloom filter due to false-positives and may also
+ satisfy the policy rules.
+
+ Also see
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Implicit policies</title>
+ <para>
+ Depending on the type of the endpoint, a set of implicit rules that
+ override installed policies might be enforced.
+
+ On default endpoints, the following set is enforced and checked before
+ any user-supplied policy is checked.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Privileged connections always override any installed policy. Those
+ connections could easily install their own policies, so there is no
+ reason to enforce installed policies.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Connections can always talk to connections of the same user. This
+ includes broadcast messages.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Custom endpoints have stricter policies. The following rules apply:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Policy rules are always enforced, even if the connection is a
+ privileged connection.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ Policy rules are always enforced for <constant>TALK</constant> access,
+ even if both ends are running under the same user. This includes
+ broadcast messages.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ To restrict the set of names that can be seen, endpoint policies can
+ install <constant>SEE</constant> policies.
+ </para>
+ </listitem>
+ </itemizedlist>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.pool.xml b/Documentation/kdbus/kdbus.pool.xml
new file mode 100644
index 000000000..a9e16f196
--- /dev/null
+++ b/Documentation/kdbus/kdbus.pool.xml
@@ -0,0 +1,326 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus.pool">
+
+ <refentryinfo>
+ <title>kdbus.pool</title>
+ <productname>kdbus.pool</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus.pool</refname>
+ <refpurpose>kdbus pool</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Description</title>
+ <para>
+ A pool for data received from the kernel is installed for every
+ <emphasis>connection</emphasis> of the <emphasis>bus</emphasis>, and
+ is sized according to the information stored in the
+ <varname>pool_size</varname> member of <type>struct kdbus_cmd_hello</type>
+ when <constant>KDBUS_CMD_HELLO</constant> is employed. Internally, the
+ pool is segmented into <emphasis>slices</emphasis>, each referenced by its
+ <emphasis>offset</emphasis> in the pool, expressed in <type>bytes</type>.
+ See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more information about <constant>KDBUS_CMD_HELLO</constant>.
+ </para>
+
+ <para>
+ The pool is written to by the kernel when one of the following
+ <emphasis>ioctls</emphasis> is issued:
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_CMD_HELLO</constant></term>
+ <listitem><para>
+ ... to receive details about the bus the connection was made to
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><constant>KDBUS_CMD_RECV</constant></term>
+ <listitem><para>
+ ... to receive a message
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><constant>KDBUS_CMD_LIST</constant></term>
+ <listitem><para>
+ ... to dump the name registry
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><constant>KDBUS_CMD_CONN_INFO</constant></term>
+ <listitem><para>
+ ... to retrieve information on a connection
+ </para></listitem>
+ </varlistentry>
+ <varlistentry>
+ <term><constant>KDBUS_CMD_BUS_CREATOR_INFO</constant></term>
+ <listitem><para>
+ ... to retrieve information about a connection's bus creator
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ </para>
+ <para>
+ The <varname>offset</varname> fields returned by either one of the
+ aforementioned ioctls describe offsets inside the pool. In order to make
+ the slice available for subsequent calls,
+ <constant>KDBUS_CMD_FREE</constant> has to be called on that offset
+ (see below). Otherwise, the pool will fill up, and the connection won't
+ be able to receive any more information through its pool.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Pool slice allocation</title>
+ <para>
+ Pool slices are allocated by the kernel in order to report information
+ back to a task, such as messages, returned name list etc.
+ Allocation of pool slices cannot be initiated by userspace. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ and
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for examples of commands that use the <emphasis>pool</emphasis> to
+ return data.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Accessing the pool memory</title>
+ <para>
+ Memory in the pool is read-only for userspace and may only be written
+ to by the kernel. To read from the pool memory, the caller is expected to
+ <citerefentry>
+ <refentrytitle>mmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ the buffer into its task, like this:
+ </para>
+ <programlisting>
+uint8_t *buf = mmap(NULL, size, PROT_READ, MAP_SHARED, conn_fd, 0);
+ </programlisting>
+
+ <para>
+ In order to map the entire pool, the <varname>size</varname> parameter in
+ the example above should be set to the value of the
+ <varname>pool_size</varname> member of
+ <type>struct kdbus_cmd_hello</type> when
+ <constant>KDBUS_CMD_HELLO</constant> was employed to create the
+ connection (see above).
+ </para>
+
+ <para>
+ The <emphasis>file descriptor</emphasis> used to map the memory must be
+ the one that was used to create the <emphasis>connection</emphasis>.
+ In other words, the one that was used to call
+ <constant>KDBUS_CMD_HELLO</constant>. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+
+ <para>
+ Alternatively, instead of mapping the entire pool buffer, only parts
+ of it can be mapped. Every kdbus command that returns an
+ <emphasis>offset</emphasis> (see above) also reports a
+ <emphasis>size</emphasis> along with it, so programs can be written
+ in a way that it only maps portions of the pool to access a specific
+ <emphasis>slice</emphasis>.
+ </para>
+
+ <para>
+ When access to the pool memory is no longer needed, programs should
+ call <function>munmap()</function> on the pointer returned by
+ <function>mmap()</function>.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Freeing pool slices</title>
+ <para>
+ The <constant>KDBUS_CMD_FREE</constant> ioctl is used to free a slice
+ inside the pool, describing an offset that was returned in an
+ <varname>offset</varname> field of another ioctl struct.
+ The <constant>KDBUS_CMD_FREE</constant> command takes a
+ <type>struct kdbus_cmd_free</type> as argument.
+ </para>
+
+<programlisting>
+struct kdbus_cmd_free {
+ __u64 size;
+ __u64 flags;
+ __u64 return_flags;
+ __u64 offset;
+ struct kdbus_item items[0];
+};
+</programlisting>
+
+ <para>The fields in this struct are described below.</para>
+
+ <variablelist>
+ <varlistentry>
+ <term><varname>size</varname></term>
+ <listitem><para>
+ The overall size of the struct, including its items.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>flags</varname></term>
+ <listitem><para>
+ Currently unused.
+ <constant>KDBUS_FLAG_NEGOTIATE</constant> is accepted to probe for
+ valid flags. If set, the ioctl will return <errorcode>0</errorcode>,
+ and the <varname>flags</varname> field is set to
+ <constant>0</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>return_flags</varname></term>
+ <listitem><para>
+ Flags returned by the kernel. Currently unused and always set to
+ <constant>0</constant> by the kernel.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>offset</varname></term>
+ <listitem><para>
+ The offset to free, as returned by other ioctls that allocated
+ memory for returned information.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><varname>items</varname></term>
+ <listitem><para>
+ Items to specify further details for the receive command.
+ Currently unused.
+ Unrecognized items are rejected, and the ioctl will fail with
+ <varname>errno</varname> set to <constant>EINVAL</constant>.
+ All items except for
+ <constant>KDBUS_ITEM_NEGOTIATE</constant> (see
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ ) will be rejected.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect1>
+
+ <refsect1>
+ <title>Return value</title>
+ <para>
+ On success, all mentioned ioctl commands return <errorcode>0</errorcode>;
+ on error, <errorcode>-1</errorcode> is returned, and
+ <varname>errno</varname> is set to indicate the error.
+ If the issued ioctl is illegal for the file descriptor used,
+ <varname>errno</varname> will be set to <constant>ENOTTY</constant>.
+ </para>
+
+ <refsect2>
+ <title>
+ <constant>KDBUS_CMD_FREE</constant> may fail with the following
+ errors
+ </title>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>ENXIO</constant></term>
+ <listitem><para>
+ No pool slice found at given offset.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ Invalid flags provided.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>EINVAL</constant></term>
+ <listitem><para>
+ The offset is valid, but the user is not allowed to free the slice.
+ This happens, for example, if the offset was retrieved with
+ <constant>KDBUS_RECV_PEEK</constant>.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>mmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>munmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ </simplelist>
+ </refsect1>
+</refentry>
diff --git a/Documentation/kdbus/kdbus.xml b/Documentation/kdbus/kdbus.xml
new file mode 100644
index 000000000..d8e7400df
--- /dev/null
+++ b/Documentation/kdbus/kdbus.xml
@@ -0,0 +1,1012 @@
+<?xml version='1.0'?> <!--*-nxml-*-->
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
+
+<refentry id="kdbus">
+
+ <refentryinfo>
+ <title>kdbus</title>
+ <productname>kdbus</productname>
+ </refentryinfo>
+
+ <refmeta>
+ <refentrytitle>kdbus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </refmeta>
+
+ <refnamediv>
+ <refname>kdbus</refname>
+ <refpurpose>Kernel Message Bus</refpurpose>
+ </refnamediv>
+
+ <refsect1>
+ <title>Synopsis</title>
+ <para>
+ kdbus is an inter-process communication bus system controlled by the
+ kernel. It provides user-space with an API to create buses and send
+ unicast and multicast messages to one, or many, peers connected to the
+ same bus. It does not enforce any layout on the transmitted data, but
+ only provides the transport layer used for message interchange between
+ peers.
+ </para>
+ <para>
+ This set of man-pages gives a comprehensive overview of the kernel-level
+ API, with all ioctl commands, associated structs and bit masks. However,
+ most people will not use this API level directly, but rather let one of
+ the high-level abstraction libraries help them integrate D-Bus
+ functionality into their applications.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>Description</title>
+ <para>
+ kdbus provides a pseudo filesystem called <emphasis>kdbusfs</emphasis>,
+ which is usually mounted on <filename>/sys/fs/kdbus</filename>. Bus
+ primitives can be accessed as files and sub-directories underneath this
+ mount-point. Any advanced operations are done via
+ <function>ioctl()</function> on files created by
+ <emphasis>kdbusfs</emphasis>. Multiple mount-points of
+ <emphasis>kdbusfs</emphasis> are independent of each other. This allows
+ namespacing of kdbus by mounting a new instance of
+ <emphasis>kdbusfs</emphasis> in a new mount-namespace. kdbus calls these
+ mount instances domains and each bus belongs to exactly one domain.
+ </para>
+
+ <para>
+ kdbus was designed as a transport layer for D-Bus, but is in no way
+ limited, nor controlled by the D-Bus protocol specification. The D-Bus
+ protocol is one possible application layer on top of kdbus.
+ </para>
+
+ <para>
+ For the general D-Bus protocol specification, its payload format, its
+ marshaling, and its communication semantics, please refer to the
+ <ulink url="http://dbus.freedesktop.org/doc/dbus-specification.html">
+ D-Bus specification</ulink>.
+ </para>
+
+ </refsect1>
+
+ <refsect1>
+ <title>Terminology</title>
+
+ <refsect2>
+ <title>Domain</title>
+ <para>
+ A domain is a <emphasis>kdbusfs</emphasis> mount-point containing all
+ the bus primitives. Each domain is independent, and separate domains
+ do not affect each other.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Bus</title>
+ <para>
+ A bus is a named object inside a domain. Clients exchange messages
+ over a bus. Multiple buses themselves have no connection to each other;
+ messages can only be exchanged on the same bus. The default endpoint of
+ a bus, to which clients establish connections, is the "bus" file
+ /sys/fs/kdbus/&lt;bus name&gt;/bus.
+ Common operating system setups create one "system bus" per system,
+ and one "user bus" for every logged-in user. Applications or services
+ may create their own private buses. The kernel driver does not
+ distinguish between different bus types, they are all handled the same
+ way. See
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Endpoint</title>
+ <para>
+ An endpoint provides a file to talk to a bus. Opening an endpoint
+ creates a new connection to the bus to which the endpoint belongs. All
+ endpoints have unique names and are accessible as files underneath the
+ directory of a bus, e.g., /sys/fs/kdbus/&lt;bus&gt;/&lt;endpoint&gt;
+ Every bus has a default endpoint called "bus".
+ A bus can optionally offer additional endpoints with custom names
+ to provide restricted access to the bus. Custom endpoints carry
+ additional policy which can be used to create sandboxes with
+ locked-down, limited, filtered access to a bus. See
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Connection</title>
+ <para>
+ A connection to a bus is created by opening an endpoint file of a
+ bus. Every ordinary client connection has a unique identifier on the
+ bus and can address messages to every other connection on the same
+ bus by using the peer's connection ID as the destination. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Pool</title>
+ <para>
+ Each connection allocates a piece of shmem-backed memory that is
+ used to receive messages and answers to ioctl commands from the kernel.
+ It is never used to send anything to the kernel. In order to access that
+ memory, an application must mmap() it into its address space. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Well-known Name</title>
+ <para>
+ A connection can, in addition to its implicit unique connection ID,
+ request the ownership of a textual well-known name. Well-known names are
+ noted in reverse-domain notation, such as com.example.service1. A
+ connection that offers a service on a bus is usually reached by its
+ well-known name. An analogy of connection ID and well-known name is an
+ IP address and a DNS name associated with that address. See
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Message</title>
+ <para>
+ Connections can exchange messages with other connections by addressing
+ the peers with their connection ID or well-known name. A message
+ consists of a message header with information on how to route the
+ message, and the message payload, which is a logical byte stream of
+ arbitrary size. Messages can carry additional file descriptors to be
+ passed from one connection to another, just like passing file
+ descriptors over UNIX domain sockets. Every connection can specify which
+ set of metadata the kernel should attach to the message when it is
+ delivered to the receiving connection. Metadata contains information
+ like: system time stamps, UID, GID, TID, proc-starttime, well-known
+ names, process comm, process exe, process argv, cgroup, capabilities,
+ seclabel, audit session, loginuid and the connection's human-readable
+ name. See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Item</title>
+ <para>
+ The API of kdbus implements the notion of items, submitted through and
+ returned by most ioctls, and stored inside data structures in the
+ connection's pool. See
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Broadcast, signal, filter, match</title>
+ <para>
+ Signals are messages that a receiver opts in for by installing a blob of
+ bytes, called a 'match'. Signal messages must always carry a
+ counter-part blob, called a 'filter', and signals are only delivered to
+ peers which have a match that white-lists the message's filter. Senders
+ of signal messages can use either a single connection ID as receiver,
+ or the special connection ID
+ <constant>KDBUS_DST_ID_BROADCAST</constant> to potentially send it to
+ all connections of a bus, following the logic described above. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ and
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Policy</title>
+ <para>
+ A policy is a set of rules that define which connections can see, talk
+ to, or register a well-known name on the bus. A policy is attached to
+ buses and custom endpoints, and modified by policy holder connections or
+ owners of custom endpoints. See
+ <citerefentry>
+ <refentrytitle>kdbus.policy</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Privileged bus users</title>
+ <para>
+ A user connecting to the bus is considered privileged if it is either
+ the creator of the bus, or if it has the CAP_IPC_OWNER capability flag
+ set. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>Bus Layout</title>
+
+ <para>
+ A <emphasis>bus</emphasis> provides and defines an environment that peers
+ can connect to for message interchange. A bus is created via the kdbus
+ control interface and can be modified by the bus creator. It applies the
+ policy that control all bus operations. The bus creator itself does not
+ participate as a peer. To establish a peer
+ <emphasis>connection</emphasis>, you have to open one of the
+ <emphasis>endpoints</emphasis> of a bus. Each bus provides a default
+ endpoint, but further endpoints can be created on-demand. Endpoints are
+ used to apply additional policies for all connections on this endpoint.
+ Thus, they provide additional filters to further restrict access of
+ specific connections to the bus.
+ </para>
+
+ <para>
+ Following, you can see an example bus layout:
+ </para>
+
+ <programlisting><![CDATA[
+ Bus Creator
+ |
+ |
+ +-----+
+ | Bus |
+ +-----+
+ |
+ __________________/ \__________________
+ / \
+ | |
+ +----------+ +----------+
+ | Endpoint | | Endpoint |
+ +----------+ +----------+
+ _________/|\_________ _________/|\_________
+ / | \ / | \
+ | | | | | |
+ | | | | | |
+ Connection Connection Connection Connection Connection Connection
+ ]]></programlisting>
+
+ </refsect1>
+
+ <refsect1>
+ <title>Data structures and interconnections</title>
+ <programlisting><![CDATA[
+ +--------------------------------------------------------------------------+
+ | Domain (Mount Point) |
+ | /sys/fs/kdbus/control |
+ | +----------------------------------------------------------------------+ |
+ | | Bus (System Bus) | |
+ | | /sys/fs/kdbus/0-system/ | |
+ | | +-------------------------------+ +--------------------------------+ | |
+ | | | Endpoint | | Endpoint | | |
+ | | | /sys/fs/kdbus/0-system/bus | | /sys/fs/kdbus/0-system/ep.app | | |
+ | | +-------------------------------+ +--------------------------------+ | |
+ | | +--------------+ +--------------+ +--------------+ +---------------+ | |
+ | | | Connection | | Connection | | Connection | | Connection | | |
+ | | | :1.22 | | :1.25 | | :1.55 | | :1.81 | | |
+ | | +--------------+ +--------------+ +--------------+ +---------------+ | |
+ | +----------------------------------------------------------------------+ |
+ | |
+ | +----------------------------------------------------------------------+ |
+ | | Bus (User Bus for UID 2702) | |
+ | | /sys/fs/kdbus/2702-user/ | |
+ | | +-------------------------------+ +--------------------------------+ | |
+ | | | Endpoint | | Endpoint | | |
+ | | | /sys/fs/kdbus/2702-user/bus | | /sys/fs/kdbus/2702-user/ep.app | | |
+ | | +-------------------------------+ +--------------------------------+ | |
+ | | +--------------+ +--------------+ +--------------+ +---------------+ | |
+ | | | Connection | | Connection | | Connection | | Connection | | |
+ | | | :1.22 | | :1.25 | | :1.55 | | :1.81 | | |
+ | | +--------------+ +--------------+ +--------------------------------+ | |
+ | +----------------------------------------------------------------------+ |
+ +--------------------------------------------------------------------------+
+ ]]></programlisting>
+ </refsect1>
+
+ <refsect1>
+ <title>Metadata</title>
+
+ <refsect2>
+ <title>When metadata is collected</title>
+ <para>
+ kdbus records data about the system in certain situations. Such metadata
+ can refer to the currently active process (creds, PIDs, current user
+ groups, process names and its executable path, cgroup membership,
+ capabilities, security label and audit information), connection
+ information (description string, currently owned names) and time stamps.
+ </para>
+ <para>
+ Metadata is collected at the following times.
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ When a bus is created (<constant>KDBUS_CMD_MAKE</constant>),
+ information about the calling task is collected. This data is returned
+ by the kernel via the <constant>KDBUS_CMD_BUS_CREATOR_INFO</constant>
+ call.
+ </para></listitem>
+
+ <listitem>
+ <para>
+ When a connection is created (<constant>KDBUS_CMD_HELLO</constant>),
+ information about the calling task is collected. Alternatively, a
+ privileged connection may provide 'faked' information about
+ credentials, PIDs and security labels which will be stored instead.
+ This data is returned by the kernel as information on a connection
+ (<constant>KDBUS_CMD_CONN_INFO</constant>). Only metadata that a
+ connection allowed to be sent (by setting its bit in
+ <varname>attach_flags_send</varname>) will be exported in this way.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ When a message is sent (<constant>KDBUS_CMD_SEND</constant>),
+ information about the sending task and the sending connection is
+ collected. This metadata will be attached to the message when it
+ arrives in the receiver's pool. If the connection sending the
+ message installed faked credentials (see
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>),
+ the message will not be augmented by any information about the
+ currently sending task. Note that only metadata that was requested
+ by the receiving connection will be collected and attached to
+ messages.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Which metadata items are actually delivered depends on the following
+ sets and masks:
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ (a) the system-wide kmod creds mask
+ (module parameter <varname>attach_flags_mask</varname>)
+ </para></listitem>
+
+ <listitem><para>
+ (b) the per-connection send creds mask, set by the connecting client
+ </para></listitem>
+
+ <listitem><para>
+ (c) the per-connection receive creds mask, set by the connecting
+ client
+ </para></listitem>
+
+ <listitem><para>
+ (d) the per-bus minimal creds mask, set by the bus creator
+ </para></listitem>
+
+ <listitem><para>
+ (e) the per-bus owner creds mask, set by the bus creator
+ </para></listitem>
+
+ <listitem><para>
+ (f) the mask specified when querying creds of a bus peer
+ </para></listitem>
+
+ <listitem><para>
+ (g) the mask specified when querying creds of a bus owner
+ </para></listitem>
+ </itemizedlist>
+
+ <para>
+ With the following rules:
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ [1] The creds attached to messages are determined as
+ <constant>a &amp; b &amp; c</constant>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ [2] When connecting to a bus (<constant>KDBUS_CMD_HELLO</constant>),
+ and <constant>~b &amp; d != 0</constant>, the call will fail with,
+ <errorcode>-1</errorcode>, and <varname>errno</varname> is set to
+ <constant>ECONNREFUSED</constant>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ [3] When querying creds of a bus peer, the creds returned are
+ <constant>a &amp; b &amp; f</constant>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ [4] When querying creds of a bus owner, the creds returned are
+ <constant>a &amp; e &amp; g</constant>.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ Hence, programs might not always get all requested metadata items that
+ it requested. Code must be written so that it can cope with this fact.
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Benefits and heads-up</title>
+ <para>
+ Attaching metadata to messages has two major benefits.
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ Metadata attached to messages is gathered at the moment when the
+ other side calls <constant>KDBUS_CMD_SEND</constant>, or,
+ respectively, then the kernel notification is generated. There is
+ no need for the receiving peer to retrieve information about the
+ task in a second step. This closes a race gap that would otherwise
+ be inherent.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ As metadata is delivered along with messages in the same data
+ blob, no extra calls to kernel functions etc. are needed to gather
+ them.
+ </para>
+ </listitem>
+ </itemizedlist>
+
+ Note, however, that collecting metadata does come at a price for
+ performance, so developers should carefully assess which metadata to
+ really opt-in for. For best practice, data that is not needed as part
+ of a message should not be requested by the connection in the first
+ place (see <varname>attach_flags_recv</varname> in
+ <constant>KDBUS_CMD_HELLO</constant>).
+ </para>
+ </refsect2>
+
+ <refsect2>
+ <title>Attach flags for metadata items</title>
+ <para>
+ To let the kernel know which metadata information to attach as items
+ to the aforementioned commands, it uses a bitmask. In those, the
+ following <emphasis>attach flags</emphasis> are currently supported.
+ Both the <varname>attach_flags_recv</varname> and
+ <varname>attach_flags_send</varname> fields of
+ <type>struct kdbus_cmd_hello</type>, as well as the payload of the
+ <constant>KDBUS_ITEM_ATTACH_FLAGS_SEND</constant> and
+ <constant>KDBUS_ITEM_ATTACH_FLAGS_RECV</constant> items follow this
+ scheme.
+ </para>
+
+ <variablelist>
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_TIMESTAMP</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_TIMESTAMP</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_CREDS</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_CREDS</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_PIDS</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_PIDS</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_AUXGROUPS</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_AUXGROUPS</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_NAMES</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_OWNED_NAME</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_TID_COMM</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_TID_COMM</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_PID_COMM</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_PID_COMM</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_EXE</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_EXE</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_CMDLINE</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_CMDLINE</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_CGROUP</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_CGROUP</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_CAPS</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_CAPS</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_SECLABEL</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_SECLABEL</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_AUDIT</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_AUDIT</constant>.
+ </para></listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term><constant>KDBUS_ATTACH_CONN_DESCRIPTION</constant></term>
+ <listitem><para>
+ Requests the attachment of an item of type
+ <constant>KDBUS_ITEM_CONN_DESCRIPTION</constant>.
+ </para></listitem>
+ </varlistentry>
+ </variablelist>
+
+ <para>
+ Please refer to
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for detailed information about the layout and payload of items and
+ what metadata should be used to.
+ </para>
+ </refsect2>
+ </refsect1>
+
+ <refsect1>
+ <title>The ioctl interface</title>
+
+ <para>
+ As stated in the 'synopsis' section above, application developers are
+ strongly encouraged to use kdbus through one of the high-level D-Bus
+ abstraction libraries, rather than using the low-level API directly.
+ </para>
+
+ <para>
+ kdbus on the kernel level exposes its functions exclusively through
+ <citerefentry>
+ <refentrytitle>ioctl</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>,
+ employed on file descriptors returned by
+ <citerefentry>
+ <refentrytitle>open</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ on pseudo files exposed by
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para>
+ <para>
+ Following is a list of all the ioctls, along with the command structs
+ they must be used with.
+ </para>
+
+ <informaltable frame="none">
+ <tgroup cols="3" colsep="1">
+ <thead>
+ <row>
+ <entry>ioctl signature</entry>
+ <entry>command</entry>
+ <entry>transported struct</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry><constant>0x40189500</constant></entry>
+ <entry><constant>KDBUS_CMD_BUS_MAKE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0x40189510</constant></entry>
+ <entry><constant>KDBUS_CMD_ENDPOINT_MAKE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0xc0609580</constant></entry>
+ <entry><constant>KDBUS_CMD_HELLO</constant></entry>
+ <entry><type>struct kdbus_cmd_hello *</type></entry>
+ </row><row>
+ <entry><constant>0x40189582</constant></entry>
+ <entry><constant>KDBUS_CMD_BYEBYE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0x40389590</constant></entry>
+ <entry><constant>KDBUS_CMD_SEND</constant></entry>
+ <entry><type>struct kdbus_cmd_send *</type></entry>
+ </row><row>
+ <entry><constant>0x80409591</constant></entry>
+ <entry><constant>KDBUS_CMD_RECV</constant></entry>
+ <entry><type>struct kdbus_cmd_recv *</type></entry>
+ </row><row>
+ <entry><constant>0x40209583</constant></entry>
+ <entry><constant>KDBUS_CMD_FREE</constant></entry>
+ <entry><type>struct kdbus_cmd_free *</type></entry>
+ </row><row>
+ <entry><constant>0x401895a0</constant></entry>
+ <entry><constant>KDBUS_CMD_NAME_ACQUIRE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0x401895a1</constant></entry>
+ <entry><constant>KDBUS_CMD_NAME_RELEASE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0x80289586</constant></entry>
+ <entry><constant>KDBUS_CMD_LIST</constant></entry>
+ <entry><type>struct kdbus_cmd_list *</type></entry>
+ </row><row>
+ <entry><constant>0x80309584</constant></entry>
+ <entry><constant>KDBUS_CMD_CONN_INFO</constant></entry>
+ <entry><type>struct kdbus_cmd_info *</type></entry>
+ </row><row>
+ <entry><constant>0x40209551</constant></entry>
+ <entry><constant>KDBUS_CMD_UPDATE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0x80309585</constant></entry>
+ <entry><constant>KDBUS_CMD_BUS_CREATOR_INFO</constant></entry>
+ <entry><type>struct kdbus_cmd_info *</type></entry>
+ </row><row>
+ <entry><constant>0x40189511</constant></entry>
+ <entry><constant>KDBUS_CMD_ENDPOINT_UPDATE</constant></entry>
+ <entry><type>struct kdbus_cmd *</type></entry>
+ </row><row>
+ <entry><constant>0x402095b0</constant></entry>
+ <entry><constant>KDBUS_CMD_MATCH_ADD</constant></entry>
+ <entry><type>struct kdbus_cmd_match *</type></entry>
+ </row><row>
+ <entry><constant>0x402095b1</constant></entry>
+ <entry><constant>KDBUS_CMD_MATCH_REMOVE</constant></entry>
+ <entry><type>struct kdbus_cmd_match *</type></entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+
+ <para>
+ Depending on the type of <emphasis>kdbusfs</emphasis> node that was
+ opened and what ioctls have been executed on a file descriptor before,
+ a different sub-set of ioctl commands is allowed.
+ </para>
+
+ <itemizedlist>
+ <listitem>
+ <para>
+ On a file descriptor resulting from opening a
+ <emphasis>control node</emphasis>, only the
+ <constant>KDBUS_CMD_BUS_MAKE</constant> ioctl may be executed.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ On a file descriptor resulting from opening a
+ <emphasis>bus endpoint node</emphasis>, only the
+ <constant>KDBUS_CMD_ENDPOINT_MAKE</constant> and
+ <constant>KDBUS_CMD_HELLO</constant> ioctls may be executed.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ A file descriptor that was used to create a bus
+ (via <constant>KDBUS_CMD_BUS_MAKE</constant>) is called a
+ <emphasis>bus owner</emphasis> file descriptor. The bus will be
+ active as long as the file descriptor is kept open.
+ A bus owner file descriptor can not be used to
+ employ any further ioctls. As soon as
+ <citerefentry>
+ <refentrytitle>close</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ is called on it, the bus will be shut down, along will all associated
+ endpoints and connections. See
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ A file descriptor that was used to create an endpoint
+ (via <constant>KDBUS_CMD_ENDPOINT_MAKE</constant>) is called an
+ <emphasis>endpoint owner</emphasis> file descriptor. The endpoint
+ will be active as long as the file descriptor is kept open.
+ An endpoint owner file descriptor can only be used
+ to update details of an endpoint through the
+ <constant>KDBUS_CMD_ENDPOINT_UPDATE</constant> ioctl. As soon as
+ <citerefentry>
+ <refentrytitle>close</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ is called on it, the endpoint will be removed from the bus, and all
+ connections that are connected to the bus through it are shut down.
+ See
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ for more details.
+ </para>
+ </listitem>
+ <listitem>
+ <para>
+ A file descriptor that was used to create a connection
+ (via <constant>KDBUS_CMD_HELLO</constant>) is called a
+ <emphasis>connection owner</emphasis> file descriptor. The connection
+ will be active as long as the file descriptor is kept open.
+ A connection owner file descriptor may be used to
+ issue any of the following ioctls.
+ </para>
+
+ <itemizedlist>
+ <listitem><para>
+ <constant>KDBUS_CMD_UPDATE</constant> to tweak details of the
+ connection. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_BYEBYE</constant> to shut down a connection
+ without losing messages. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_FREE</constant> to free a slice of memory in
+ the pool. See
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_CONN_INFO</constant> to retrieve information
+ on other connections on the bus. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_BUS_CREATOR_INFO</constant> to retrieve
+ information on the bus creator. See
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_LIST</constant> to retrieve a list of
+ currently active well-known names and unique IDs on the bus. See
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_SEND</constant> and
+ <constant>KDBUS_CMD_RECV</constant> to send or receive a message.
+ See
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_NAME_ACQUIRE</constant> and
+ <constant>KDBUS_CMD_NAME_RELEASE</constant> to acquire or release
+ a well-known name on the bus. See
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+
+ <listitem><para>
+ <constant>KDBUS_CMD_MATCH_ADD</constant> and
+ <constant>KDBUS_CMD_MATCH_REMOVE</constant> to add or remove
+ a match for signal messages. See
+ <citerefentry>
+ <refentrytitle>kdbus.match</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>.
+ </para></listitem>
+ </itemizedlist>
+ </listitem>
+ </itemizedlist>
+
+ <para>
+ These ioctls, along with the structs they transport, are explained in
+ detail in the other documents linked to in the "See Also" section below.
+ </para>
+ </refsect1>
+
+ <refsect1>
+ <title>See Also</title>
+ <simplelist type="inline">
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.bus</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.connection</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.endpoint</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.fs</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.item</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.message</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.name</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>kdbus.pool</refentrytitle>
+ <manvolnum>7</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>ioctl</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>mmap</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>open</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <citerefentry>
+ <refentrytitle>close</refentrytitle>
+ <manvolnum>2</manvolnum>
+ </citerefentry>
+ </member>
+ <member>
+ <ulink url="http://freedesktop.org/wiki/Software/dbus">D-Bus</ulink>
+ </member>
+ </simplelist>
+ </refsect1>
+
+</refentry>
diff --git a/Documentation/kdbus/stylesheet.xsl b/Documentation/kdbus/stylesheet.xsl
new file mode 100644
index 000000000..52565eac7
--- /dev/null
+++ b/Documentation/kdbus/stylesheet.xsl
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<stylesheet xmlns="http://www.w3.org/1999/XSL/Transform" version="1.0">
+ <param name="chunk.quietly">1</param>
+ <param name="funcsynopsis.style">ansi</param>
+ <param name="funcsynopsis.tabular.threshold">80</param>
+ <param name="callout.graphics">0</param>
+ <param name="paper.type">A4</param>
+ <param name="generate.section.toc.level">2</param>
+ <param name="use.id.as.filename">1</param>
+ <param name="citerefentry.link">1</param>
+ <strip-space elements="*"/>
+ <template name="generate.citerefentry.link">
+ <value-of select="refentrytitle"/>
+ <text>.html</text>
+ </template>
+</stylesheet>
diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/kdump/gdbmacros.txt
new file mode 100644
index 000000000..9b9b454b0
--- /dev/null
+++ b/Documentation/kdump/gdbmacros.txt
@@ -0,0 +1,201 @@
+#
+# This file contains a few gdb macros (user defined commands) to extract
+# useful information from kernel crashdump (kdump) like stack traces of
+# all the processes or a particular process and trapinfo.
+#
+# These macros can be used by copying this file in .gdbinit (put in home
+# directory or current directory) or by invoking gdb command with
+# --command=<command-file-name> option
+#
+# Credits:
+# Alexander Nyberg <alexn@telia.com>
+# V Srivatsa <vatsa@in.ibm.com>
+# Maneesh Soni <maneesh@in.ibm.com>
+#
+
+define bttnobp
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+ printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+ printf "===================\n"
+ set var $stackp = $next_t.thread.esp
+ set var $stack_top = ($stackp & ~4095) + 4096
+
+ while ($stackp < $stack_top)
+ if (*($stackp) > _stext && *($stackp) < _sinittext)
+ info symbol *($stackp)
+ end
+ set $stackp += 4
+ end
+ set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+ printf "===================\n"
+ set var $stackp = $next_t.thread.esp
+ set var $stack_top = ($stackp & ~4095) + 4096
+
+ while ($stackp < $stack_top)
+ if (*($stackp) > _stext && *($stackp) < _sinittext)
+ info symbol *($stackp)
+ end
+ set $stackp += 4
+ end
+ set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+end
+document bttnobp
+ dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER
+end
+
+define btt
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+ printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+ printf "===================\n"
+ set var $stackp = $next_t.thread.esp
+ set var $stack_top = ($stackp & ~4095) + 4096
+ set var $stack_bot = ($stackp & ~4095)
+
+ set $stackp = *($stackp)
+ while (($stackp < $stack_top) && ($stackp > $stack_bot))
+ set var $addr = *($stackp + 4)
+ info symbol $addr
+ set $stackp = *($stackp)
+ end
+
+ set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm
+ printf "===================\n"
+ set var $stackp = $next_t.thread.esp
+ set var $stack_top = ($stackp & ~4095) + 4096
+ set var $stack_bot = ($stackp & ~4095)
+
+ set $stackp = *($stackp)
+ while (($stackp < $stack_top) && ($stackp > $stack_bot))
+ set var $addr = *($stackp + 4)
+ info symbol $addr
+ set $stackp = *($stackp)
+ end
+ set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+end
+document btt
+ dump all thread stack traces on a kernel compiled with CONFIG_FRAME_POINTER
+end
+
+define btpid
+ set var $pid = $arg0
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ set var $pid_task = 0
+
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+
+ if ($next_t.pid == $pid)
+ set $pid_task = $next_t
+ end
+
+ set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ if ($next_th.pid == $pid)
+ set $pid_task = $next_th
+ end
+ set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+
+ printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm
+ printf "===================\n"
+ set var $stackp = $pid_task.thread.esp
+ set var $stack_top = ($stackp & ~4095) + 4096
+ set var $stack_bot = ($stackp & ~4095)
+
+ set $stackp = *($stackp)
+ while (($stackp < $stack_top) && ($stackp > $stack_bot))
+ set var $addr = *($stackp + 4)
+ info symbol $addr
+ set $stackp = *($stackp)
+ end
+end
+document btpid
+ backtrace of pid
+end
+
+
+define trapinfo
+ set var $pid = $arg0
+ set $tasks_off=((size_t)&((struct task_struct *)0)->tasks)
+ set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next)
+ set $init_t=&init_task
+ set $next_t=(((char *)($init_t->tasks).next) - $tasks_off)
+ set var $pid_task = 0
+
+ while ($next_t != $init_t)
+ set $next_t=(struct task_struct *)$next_t
+
+ if ($next_t.pid == $pid)
+ set $pid_task = $next_t
+ end
+
+ set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off)
+ while ($next_th != $next_t)
+ set $next_th=(struct task_struct *)$next_th
+ if ($next_th.pid == $pid)
+ set $pid_task = $next_th
+ end
+ set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off)
+ end
+ set $next_t=(char *)($next_t->tasks.next) - $tasks_off
+ end
+
+ printf "Trapno %ld, cr2 0x%lx, error_code %ld\n", $pid_task.thread.trap_no, \
+ $pid_task.thread.cr2, $pid_task.thread.error_code
+
+end
+document trapinfo
+ Run info threads and lookup pid of thread #1
+ 'trapinfo <pid>' will tell you by which trap & possibly
+ address the kernel panicked.
+end
+
+
+define dmesg
+ set $i = 0
+ set $end_idx = (log_end - 1) & (log_buf_len - 1)
+
+ while ($i < logged_chars)
+ set $idx = (log_end - 1 - logged_chars + $i) & (log_buf_len - 1)
+
+ if ($idx + 100 <= $end_idx) || \
+ ($end_idx <= $idx && $idx + 100 < log_buf_len)
+ printf "%.100s", &log_buf[$idx]
+ set $i = $i + 100
+ else
+ printf "%c", log_buf[$idx]
+ set $i = $i + 1
+ end
+ end
+end
+document dmesg
+ print the kernel ring buffer
+end
diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
new file mode 100644
index 000000000..bc4bd5a44
--- /dev/null
+++ b/Documentation/kdump/kdump.txt
@@ -0,0 +1,487 @@
+================================================================
+Documentation for Kdump - The kexec-based Crash Dumping Solution
+================================================================
+
+This document includes overview, setup and installation, and analysis
+information.
+
+Overview
+========
+
+Kdump uses kexec to quickly boot to a dump-capture kernel whenever a
+dump of the system kernel's memory needs to be taken (for example, when
+the system panics). The system kernel's memory image is preserved across
+the reboot and is accessible to the dump-capture kernel.
+
+You can use common commands, such as cp and scp, to copy the
+memory image to a dump file on the local disk, or across the network to
+a remote system.
+
+Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64,
+s390x and arm architectures.
+
+When the system kernel boots, it reserves a small section of memory for
+the dump-capture kernel. This ensures that ongoing Direct Memory Access
+(DMA) from the system kernel does not corrupt the dump-capture kernel.
+The kexec -p command loads the dump-capture kernel into this reserved
+memory.
+
+On x86 machines, the first 640 KB of physical memory is needed to boot,
+regardless of where the kernel loads. Therefore, kexec backs up this
+region just before rebooting into the dump-capture kernel.
+
+Similarly on PPC64 machines first 32KB of physical memory is needed for
+booting regardless of where the kernel is loaded and to support 64K page
+size kexec backs up the first 64KB memory.
+
+For s390x, when kdump is triggered, the crashkernel region is exchanged
+with the region [0, crashkernel region size] and then the kdump kernel
+runs in [0, crashkernel region size]. Therefore no relocatable kernel is
+needed for s390x.
+
+All of the necessary information about the system kernel's core image is
+encoded in the ELF format, and stored in a reserved area of memory
+before a crash. The physical address of the start of the ELF header is
+passed to the dump-capture kernel through the elfcorehdr= boot
+parameter. Optionally the size of the ELF header can also be passed
+when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax.
+
+
+With the dump-capture kernel, you can access the memory image through
+/proc/vmcore. This exports the dump as an ELF-format file that you can
+write out using file copy commands such as cp or scp. Further, you can
+use analysis tools such as the GNU Debugger (GDB) and the Crash tool to
+debug the dump file. This method ensures that the dump pages are correctly
+ordered.
+
+
+Setup and Installation
+======================
+
+Install kexec-tools
+-------------------
+
+1) Login as the root user.
+
+2) Download the kexec-tools user-space package from the following URL:
+
+http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz
+
+This is a symlink to the latest version.
+
+The latest kexec-tools git tree is available at:
+
+git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+and
+http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git
+
+There is also a gitweb interface available at
+http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git
+
+More information about kexec-tools can be found at
+http://horms.net/projects/kexec/
+
+3) Unpack the tarball with the tar command, as follows:
+
+ tar xvpzf kexec-tools.tar.gz
+
+4) Change to the kexec-tools directory, as follows:
+
+ cd kexec-tools-VERSION
+
+5) Configure the package, as follows:
+
+ ./configure
+
+6) Compile the package, as follows:
+
+ make
+
+7) Install the package, as follows:
+
+ make install
+
+
+Build the system and dump-capture kernels
+-----------------------------------------
+There are two possible methods of using Kdump.
+
+1) Build a separate custom dump-capture kernel for capturing the
+ kernel core dump.
+
+2) Or use the system kernel binary itself as dump-capture kernel and there is
+ no need to build a separate dump-capture kernel. This is possible
+ only with the architectures which support a relocatable kernel. As
+ of today, i386, x86_64, ppc64, ia64 and arm architectures support relocatable
+ kernel.
+
+Building a relocatable kernel is advantageous from the point of view that
+one does not have to build a second kernel for capturing the dump. But
+at the same time one might want to build a custom dump capture kernel
+suitable to his needs.
+
+Following are the configuration setting required for system and
+dump-capture kernels for enabling kdump support.
+
+System kernel config options
+----------------------------
+
+1) Enable "kexec system call" in "Processor type and features."
+
+ CONFIG_KEXEC=y
+
+2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo
+ filesystems." This is usually enabled by default.
+
+ CONFIG_SYSFS=y
+
+ Note that "sysfs file system support" might not appear in the "Pseudo
+ filesystems" menu if "Configure standard kernel features (for small
+ systems)" is not enabled in "General Setup." In this case, check the
+ .config file itself to ensure that sysfs is turned on, as follows:
+
+ grep 'CONFIG_SYSFS' .config
+
+3) Enable "Compile the kernel with debug info" in "Kernel hacking."
+
+ CONFIG_DEBUG_INFO=Y
+
+ This causes the kernel to be built with debug symbols. The dump
+ analysis tools require a vmlinux with debug symbols in order to read
+ and analyze a dump file.
+
+Dump-capture kernel config options (Arch Independent)
+-----------------------------------------------------
+
+1) Enable "kernel crash dumps" support under "Processor type and
+ features":
+
+ CONFIG_CRASH_DUMP=y
+
+2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems".
+
+ CONFIG_PROC_VMCORE=y
+ (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.)
+
+Dump-capture kernel config options (Arch Dependent, i386 and x86_64)
+--------------------------------------------------------------------
+
+1) On i386, enable high memory support under "Processor type and
+ features":
+
+ CONFIG_HIGHMEM64G=y
+ or
+ CONFIG_HIGHMEM4G
+
+2) On i386 and x86_64, disable symmetric multi-processing support
+ under "Processor type and features":
+
+ CONFIG_SMP=n
+
+ (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line
+ when loading the dump-capture kernel, see section "Load the Dump-capture
+ Kernel".)
+
+3) If one wants to build and use a relocatable kernel,
+ Enable "Build a relocatable kernel" support under "Processor type and
+ features"
+
+ CONFIG_RELOCATABLE=y
+
+4) Use a suitable value for "Physical address where the kernel is
+ loaded" (under "Processor type and features"). This only appears when
+ "kernel crash dumps" is enabled. A suitable value depends upon
+ whether kernel is relocatable or not.
+
+ If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000
+ This will compile the kernel for physical address 1MB, but given the fact
+ kernel is relocatable, it can be run from any physical address hence
+ kexec boot loader will load it in memory region reserved for dump-capture
+ kernel.
+
+ Otherwise it should be the start of memory region reserved for
+ second kernel using boot parameter "crashkernel=Y@X". Here X is
+ start of memory region reserved for dump-capture kernel.
+ Generally X is 16MB (0x1000000). So you can set
+ CONFIG_PHYSICAL_START=0x1000000
+
+5) Make and install the kernel and its modules. DO NOT add this kernel
+ to the boot loader configuration files.
+
+Dump-capture kernel config options (Arch Dependent, ppc64)
+----------------------------------------------------------
+
+1) Enable "Build a kdump crash kernel" support under "Kernel" options:
+
+ CONFIG_CRASH_DUMP=y
+
+2) Enable "Build a relocatable kernel" support
+
+ CONFIG_RELOCATABLE=y
+
+ Make and install the kernel and its modules.
+
+Dump-capture kernel config options (Arch Dependent, ia64)
+----------------------------------------------------------
+
+- No specific options are required to create a dump-capture kernel
+ for ia64, other than those specified in the arch independent section
+ above. This means that it is possible to use the system kernel
+ as a dump-capture kernel if desired.
+
+ The crashkernel region can be automatically placed by the system
+ kernel at run time. This is done by specifying the base address as 0,
+ or omitting it all together.
+
+ crashkernel=256M@0
+ or
+ crashkernel=256M
+
+ If the start address is specified, note that the start address of the
+ kernel will be aligned to 64Mb, so if the start address is not then
+ any space below the alignment point will be wasted.
+
+Dump-capture kernel config options (Arch Dependent, arm)
+----------------------------------------------------------
+
+- To use a relocatable kernel,
+ Enable "AUTO_ZRELADDR" support under "Boot" options:
+
+ AUTO_ZRELADDR=y
+
+Extended crashkernel syntax
+===========================
+
+While the "crashkernel=size[@offset]" syntax is sufficient for most
+configurations, sometimes it's handy to have the reserved memory dependent
+on the value of System RAM -- that's mostly for distributors that pre-setup
+the kernel command line to avoid a unbootable system after some memory has
+been removed from the machine.
+
+The syntax is:
+
+ crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset]
+ range=start-[end]
+
+Please note, on arm, the offset is required.
+ crashkernel=<range1>:<size1>[,<range2>:<size2>,...]@offset
+ range=start-[end]
+
+ 'start' is inclusive and 'end' is exclusive.
+
+For example:
+
+ crashkernel=512M-2G:64M,2G-:128M
+
+This would mean:
+
+ 1) if the RAM is smaller than 512M, then don't reserve anything
+ (this is the "rescue" case)
+ 2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M
+ 3) if the RAM size is larger than 2G, then reserve 128M
+
+
+
+Boot into System Kernel
+=======================
+
+1) Update the boot loader (such as grub, yaboot, or lilo) configuration
+ files as necessary.
+
+2) Boot the system kernel with the boot parameter "crashkernel=Y@X",
+ where Y specifies how much memory to reserve for the dump-capture kernel
+ and X specifies the beginning of this reserved memory. For example,
+ "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory
+ starting at physical address 0x01000000 (16MB) for the dump-capture kernel.
+
+ On x86 and x86_64, use "crashkernel=64M@16M".
+
+ On ppc64, use "crashkernel=128M@32M".
+
+ On ia64, 256M@256M is a generous value that typically works.
+ The region may be automatically placed on ia64, see the
+ dump-capture kernel config option notes above.
+ If use sparse memory, the size should be rounded to GRANULE boundaries.
+
+ On s390x, typically use "crashkernel=xxM". The value of xx is dependent
+ on the memory consumption of the kdump system. In general this is not
+ dependent on the memory size of the production system.
+
+ On arm, use "crashkernel=Y@X". Note that the start address of the kernel
+ will be aligned to 128MiB (0x08000000), so if the start address is not then
+ any space below the alignment point may be overwritten by the dump-capture kernel,
+ which means it is possible that the vmcore is not that precise as expected.
+
+
+Load the Dump-capture Kernel
+============================
+
+After booting to the system kernel, dump-capture kernel needs to be
+loaded.
+
+Based on the architecture and type of image (relocatable or not), one
+can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz
+of dump-capture kernel. Following is the summary.
+
+For i386 and x86_64:
+ - Use vmlinux if kernel is not relocatable.
+ - Use bzImage/vmlinuz if kernel is relocatable.
+For ppc64:
+ - Use vmlinux
+For ia64:
+ - Use vmlinux or vmlinuz.gz
+For s390x:
+ - Use image or bzImage
+For arm:
+ - Use zImage
+
+If you are using a uncompressed vmlinux image then use following command
+to load dump-capture kernel.
+
+ kexec -p <dump-capture-kernel-vmlinux-image> \
+ --initrd=<initrd-for-dump-capture-kernel> --args-linux \
+ --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed bzImage/vmlinuz, then use following command
+to load dump-capture kernel.
+
+ kexec -p <dump-capture-kernel-bzImage> \
+ --initrd=<initrd-for-dump-capture-kernel> \
+ --append="root=<root-dev> <arch-specific-options>"
+
+If you are using a compressed zImage, then use following command
+to load dump-capture kernel.
+
+ kexec --type zImage -p <dump-capture-kernel-bzImage> \
+ --initrd=<initrd-for-dump-capture-kernel> \
+ --dtb=<dtb-for-dump-capture-kernel> \
+ --append="root=<root-dev> <arch-specific-options>"
+
+
+Please note, that --args-linux does not need to be specified for ia64.
+It is planned to make this a no-op on that architecture, but for now
+it should be omitted
+
+Following are the arch specific command line options to be used while
+loading dump-capture kernel.
+
+For i386, x86_64 and ia64:
+ "1 irqpoll maxcpus=1 reset_devices"
+
+For ppc64:
+ "1 maxcpus=1 noirqdistrib reset_devices"
+
+For s390x:
+ "1 maxcpus=1 cgroup_disable=memory"
+
+For arm:
+ "1 maxcpus=1 reset_devices"
+
+Notes on loading the dump-capture kernel:
+
+* By default, the ELF headers are stored in ELF64 format to support
+ systems with more than 4GB memory. On i386, kexec automatically checks if
+ the physical RAM size exceeds the 4 GB limit and if not, uses ELF32.
+ So, on non-PAE systems, ELF32 is always used.
+
+ The --elf32-core-headers option can be used to force the generation of ELF32
+ headers. This is necessary because GDB currently cannot open vmcore files
+ with ELF64 headers on 32-bit systems.
+
+* The "irqpoll" boot parameter reduces driver initialization failures
+ due to shared interrupts in the dump-capture kernel.
+
+* You must specify <root-dev> in the format corresponding to the root
+ device name in the output of mount command.
+
+* Boot parameter "1" boots the dump-capture kernel into single-user
+ mode without networking. If you want networking, use "3".
+
+* We generally don' have to bring up a SMP kernel just to capture the
+ dump. Hence generally it is useful either to build a UP dump-capture
+ kernel or specify maxcpus=1 option while loading dump-capture kernel.
+
+* For s390x there are two kdump modes: If a ELF header is specified with
+ the elfcorehdr= kernel parameter, it is used by the kdump kernel as it
+ is done on all other architectures. If no elfcorehdr= kernel parameter is
+ specified, the s390x kdump kernel dynamically creates the header. The
+ second mode has the advantage that for CPU and memory hotplug, kdump has
+ not to be reloaded with kexec_load().
+
+* For s390x systems with many attached devices the "cio_ignore" kernel
+ parameter should be used for the kdump kernel in order to prevent allocation
+ of kernel memory for devices that are not relevant for kdump. The same
+ applies to systems that use SCSI/FCP devices. In that case the
+ "allow_lun_scan" zfcp module parameter should be set to zero before
+ setting FCP devices online.
+
+Kernel Panic
+============
+
+After successfully loading the dump-capture kernel as previously
+described, the system will reboot into the dump-capture kernel if a
+system crash is triggered. Trigger points are located in panic(),
+die(), die_nmi() and in the sysrq handler (ALT-SysRq-c).
+
+The following conditions will execute a crash trigger point:
+
+If a hard lockup is detected and "NMI watchdog" is configured, the system
+will boot into the dump-capture kernel ( die_nmi() ).
+
+If die() is called, and it happens to be a thread with pid 0 or 1, or die()
+is called inside interrupt context or die() is called and panic_on_oops is set,
+the system will boot into the dump-capture kernel.
+
+On powerpc systems when a soft-reset is generated, die() is called by all cpus
+and the system will boot into the dump-capture kernel.
+
+For testing purposes, you can trigger a crash by using "ALT-SysRq-c",
+"echo c > /proc/sysrq-trigger" or write a module to force the panic.
+
+Write Out the Dump File
+=======================
+
+After the dump-capture kernel is booted, write out the dump file with
+the following command:
+
+ cp /proc/vmcore <dump-file>
+
+
+Analysis
+========
+
+Before analyzing the dump image, you should reboot into a stable kernel.
+
+You can do limited analysis using GDB on the dump file copied out of
+/proc/vmcore. Use the debug vmlinux built with -g and run the following
+command:
+
+ gdb vmlinux <dump-file>
+
+Stack trace for the task on processor 0, register display, and memory
+display work fine.
+
+Note: GDB cannot analyze core files generated in ELF64 format for x86.
+On systems with a maximum of 4GB of memory, you can generate
+ELF32-format headers using the --elf32-core-headers kernel option on the
+dump kernel.
+
+You can also use the Crash utility to analyze dump files in Kdump
+format. Crash is available on Dave Anderson's site at the following URL:
+
+ http://people.redhat.com/~anderson/
+
+Trigger Kdump on WARN()
+=======================
+
+The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This
+will cause a kdump to occur at the panic() call. In cases where a user wants
+to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
+to achieve the same behaviour.
+
+Contact
+=======
+
+Vivek Goyal (vgoyal@redhat.com)
+Maneesh Soni (maneesh@in.ibm.com)
+
diff --git a/Documentation/kernel-doc-nano-HOWTO.txt b/Documentation/kernel-doc-nano-HOWTO.txt
new file mode 100644
index 000000000..acbc1a3d0
--- /dev/null
+++ b/Documentation/kernel-doc-nano-HOWTO.txt
@@ -0,0 +1,362 @@
+kernel-doc nano-HOWTO
+=====================
+
+How to format kernel-doc comments
+---------------------------------
+
+In order to provide embedded, 'C' friendly, easy to maintain,
+but consistent and extractable documentation of the functions and
+data structures in the Linux kernel, the Linux kernel has adopted
+a consistent style for documenting functions and their parameters,
+and structures and their members.
+
+The format for this documentation is called the kernel-doc format.
+It is documented in this Documentation/kernel-doc-nano-HOWTO.txt file.
+
+This style embeds the documentation within the source files, using
+a few simple conventions. The scripts/kernel-doc perl script, some
+SGML templates in Documentation/DocBook, and other tools understand
+these conventions, and are used to extract this embedded documentation
+into various documents.
+
+In order to provide good documentation of kernel functions and data
+structures, please use the following conventions to format your
+kernel-doc comments in Linux kernel source.
+
+We definitely need kernel-doc formatted documentation for functions
+that are exported to loadable modules using EXPORT_SYMBOL.
+
+We also look to provide kernel-doc formatted documentation for
+functions externally visible to other kernel files (not marked
+"static").
+
+We also recommend providing kernel-doc formatted documentation
+for private (file "static") routines, for consistency of kernel
+source code layout. But this is lower priority and at the
+discretion of the MAINTAINER of that kernel source file.
+
+Data structures visible in kernel include files should also be
+documented using kernel-doc formatted comments.
+
+The opening comment mark "/**" is reserved for kernel-doc comments.
+Only comments so marked will be considered by the kernel-doc scripts,
+and any comment so marked must be in kernel-doc format. Do not use
+"/**" to be begin a comment block unless the comment block contains
+kernel-doc formatted comments. The closing comment marker for
+kernel-doc comments can be either "*/" or "**/", but "*/" is
+preferred in the Linux kernel tree.
+
+Kernel-doc comments should be placed just before the function
+or data structure being described.
+
+Example kernel-doc function comment:
+
+/**
+ * foobar() - short function description of foobar
+ * @arg1: Describe the first argument to foobar.
+ * @arg2: Describe the second argument to foobar.
+ * One can provide multiple line descriptions
+ * for arguments.
+ *
+ * A longer description, with more discussion of the function foobar()
+ * that might be useful to those using or modifying it. Begins with
+ * empty comment line, and may include additional embedded empty
+ * comment lines.
+ *
+ * The longer description can have multiple paragraphs.
+ *
+ * Return: Describe the return value of foobar.
+ */
+
+The short description following the subject can span multiple lines
+and ends with an @argument description, an empty line or the end of
+the comment block.
+
+The @argument descriptions must begin on the very next line following
+this opening short function description line, with no intervening
+empty comment lines.
+
+If a function parameter is "..." (varargs), it should be listed in
+kernel-doc notation as:
+ * @...: description
+
+The return value, if any, should be described in a dedicated section
+named "Return".
+
+Example kernel-doc data structure comment.
+
+/**
+ * struct blah - the basic blah structure
+ * @mem1: describe the first member of struct blah
+ * @mem2: describe the second member of struct blah,
+ * perhaps with more lines and words.
+ *
+ * Longer description of this structure.
+ */
+
+The kernel-doc function comments describe each parameter to the
+function, in order, with the @name lines.
+
+The kernel-doc data structure comments describe each structure member
+in the data structure, with the @name lines.
+
+The longer description formatting is "reflowed", losing your line
+breaks. So presenting carefully formatted lists within these
+descriptions won't work so well; derived documentation will lose
+the formatting.
+
+See the section below "How to add extractable documentation to your
+source files" for more details and notes on how to format kernel-doc
+comments.
+
+Components of the kernel-doc system
+-----------------------------------
+
+Many places in the source tree have extractable documentation in the
+form of block comments above functions. The components of this system
+are:
+
+- scripts/kernel-doc
+
+ This is a perl script that hunts for the block comments and can mark
+ them up directly into DocBook, man, text, and HTML. (No, not
+ texinfo.)
+
+- Documentation/DocBook/*.tmpl
+
+ These are SGML template files, which are normal SGML files with
+ special place-holders for where the extracted documentation should
+ go.
+
+- scripts/basic/docproc.c
+
+ This is a program for converting SGML template files into SGML
+ files. When a file is referenced it is searched for symbols
+ exported (EXPORT_SYMBOL), to be able to distinguish between internal
+ and external functions.
+ It invokes kernel-doc, giving it the list of functions that
+ are to be documented.
+ Additionally it is used to scan the SGML template files to locate
+ all the files referenced herein. This is used to generate dependency
+ information as used by make.
+
+- Makefile
+
+ The targets 'xmldocs', 'psdocs', 'pdfdocs', and 'htmldocs' are used
+ to build XML DocBook files, PostScript files, PDF files, and html files
+ in Documentation/DocBook. The older target 'sgmldocs' is equivalent
+ to 'xmldocs'.
+
+- Documentation/DocBook/Makefile
+
+ This is where C files are associated with SGML templates.
+
+
+How to extract the documentation
+--------------------------------
+
+If you just want to read the ready-made books on the various
+subsystems (see Documentation/DocBook/*.tmpl), just type 'make
+psdocs', or 'make pdfdocs', or 'make htmldocs', depending on your
+preference. If you would rather read a different format, you can type
+'make xmldocs' and then use DocBook tools to convert
+Documentation/DocBook/*.xml to a format of your choice (for example,
+'db2html ...' if 'make htmldocs' was not defined).
+
+If you want to see man pages instead, you can do this:
+
+$ cd linux
+$ scripts/kernel-doc -man $(find -name '*.c') | split-man.pl /tmp/man
+$ scripts/kernel-doc -man $(find -name '*.h') | split-man.pl /tmp/man
+
+Here is split-man.pl:
+
+-->
+#!/usr/bin/perl
+
+if ($#ARGV < 0) {
+ die "where do I put the results?\n";
+}
+
+mkdir $ARGV[0],0777;
+$state = 0;
+while (<STDIN>) {
+ if (/^\.TH \"[^\"]*\" 9 \"([^\"]*)\"/) {
+ if ($state == 1) { close OUT }
+ $state = 1;
+ $fn = "$ARGV[0]/$1.9";
+ print STDERR "Creating $fn\n";
+ open OUT, ">$fn" or die "can't open $fn: $!\n";
+ print OUT $_;
+ } elsif ($state != 0) {
+ print OUT $_;
+ }
+}
+
+close OUT;
+<--
+
+If you just want to view the documentation for one function in one
+file, you can do this:
+
+$ scripts/kernel-doc -man -function fn file | nroff -man | less
+
+or this:
+
+$ scripts/kernel-doc -text -function fn file
+
+
+How to add extractable documentation to your source files
+---------------------------------------------------------
+
+The format of the block comment is like this:
+
+/**
+ * function_name(:)? (- short description)?
+(* @parameterx(space)*: (description of parameter x)?)*
+(* a blank line)?
+ * (Description:)? (Description of function)?
+ * (section header: (section description)? )*
+(*)?*/
+
+All "description" text can span multiple lines, although the
+function_name & its short description are traditionally on a single line.
+Description text may also contain blank lines (i.e., lines that contain
+only a "*").
+
+"section header:" names must be unique per function (or struct,
+union, typedef, enum).
+
+Use the section header "Return" for sections describing the return value
+of a function.
+
+Avoid putting a spurious blank line after the function name, or else the
+description will be repeated!
+
+All descriptive text is further processed, scanning for the following special
+patterns, which are highlighted appropriately.
+
+'funcname()' - function
+'$ENVVAR' - environment variable
+'&struct_name' - name of a structure (up to two words including 'struct')
+'@parameter' - name of a parameter
+'%CONST' - name of a constant.
+
+NOTE 1: The multi-line descriptive text you provide does *not* recognize
+line breaks, so if you try to format some text nicely, as in:
+
+ Return:
+ 0 - cool
+ 1 - invalid arg
+ 2 - out of memory
+
+this will all run together and produce:
+
+ Return: 0 - cool 1 - invalid arg 2 - out of memory
+
+NOTE 2: If the descriptive text you provide has lines that begin with
+some phrase followed by a colon, each of those phrases will be taken as
+a new section heading, which means you should similarly try to avoid text
+like:
+
+ Return:
+ 0: cool
+ 1: invalid arg
+ 2: out of memory
+
+every line of which would start a new section. Again, probably not
+what you were after.
+
+Take a look around the source tree for examples.
+
+
+kernel-doc for structs, unions, enums, and typedefs
+---------------------------------------------------
+
+Beside functions you can also write documentation for structs, unions,
+enums and typedefs. Instead of the function name you must write the name
+of the declaration; the struct/union/enum/typedef must always precede
+the name. Nesting of declarations is not supported.
+Use the argument mechanism to document members or constants.
+
+Inside a struct description, you can use the "private:" and "public:"
+comment tags. Structure fields that are inside a "private:" area
+are not listed in the generated output documentation. The "private:"
+and "public:" tags must begin immediately following a "/*" comment
+marker. They may optionally include comments between the ":" and the
+ending "*/" marker.
+
+Example:
+
+/**
+ * struct my_struct - short description
+ * @a: first member
+ * @b: second member
+ *
+ * Longer description
+ */
+struct my_struct {
+ int a;
+ int b;
+/* private: internal use only */
+ int c;
+};
+
+
+Including documentation blocks in source files
+----------------------------------------------
+
+To facilitate having source code and comments close together, you can
+include kernel-doc documentation blocks that are free-form comments
+instead of being kernel-doc for functions, structures, unions,
+enums, or typedefs. This could be used for something like a
+theory of operation for a driver or library code, for example.
+
+This is done by using a DOC: section keyword with a section title. E.g.:
+
+/**
+ * DOC: Theory of Operation
+ *
+ * The whizbang foobar is a dilly of a gizmo. It can do whatever you
+ * want it to do, at any time. It reads your mind. Here's how it works.
+ *
+ * foo bar splat
+ *
+ * The only drawback to this gizmo is that is can sometimes damage
+ * hardware, software, or its subject(s).
+ */
+
+DOC: sections are used in SGML templates files as indicated below.
+
+
+How to make new SGML template files
+-----------------------------------
+
+SGML template files (*.tmpl) are like normal SGML files, except that
+they can contain escape sequences where extracted documentation should
+be inserted.
+
+!E<filename> is replaced by the documentation, in <filename>, for
+functions that are exported using EXPORT_SYMBOL: the function list is
+collected from files listed in Documentation/DocBook/Makefile.
+
+!I<filename> is replaced by the documentation for functions that are
+_not_ exported using EXPORT_SYMBOL.
+
+!D<filename> is used to name additional files to search for functions
+exported using EXPORT_SYMBOL.
+
+!F<filename> <function [functions...]> is replaced by the
+documentation, in <filename>, for the functions listed.
+
+!P<filename> <section title> is replaced by the contents of the DOC:
+section titled <section title> from <filename>.
+Spaces are allowed in <section title>; do not quote the <section title>.
+
+!C<filename> is replaced by nothing, but makes the tools check that
+all DOC: sections and documented functions, symbols, etc. are used.
+This makes sense to use when you use !F/!P only and want to verify
+that all documentation is included.
+
+Tim.
+*/ <twaugh@redhat.com>
diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt
new file mode 100644
index 000000000..eda1eb145
--- /dev/null
+++ b/Documentation/kernel-docs.txt
@@ -0,0 +1,724 @@
+
+ Index of Documentation for People Interested in Writing and/or
+
+ Understanding the Linux Kernel.
+
+ Juan-Mariano de Goyeneche <jmseyas@dit.upm.es>
+
+/*
+ * The latest version of this document may be found at:
+ * http://www.dit.upm.es/~jmseyas/linux/kernel/hackers-docs.html
+ */
+
+ The need for a document like this one became apparent in the
+ linux-kernel mailing list as the same questions, asking for pointers
+ to information, appeared again and again.
+
+ Fortunately, as more and more people get to GNU/Linux, more and more
+ get interested in the Kernel. But reading the sources is not always
+ enough. It is easy to understand the code, but miss the concepts, the
+ philosophy and design decisions behind this code.
+
+ Unfortunately, not many documents are available for beginners to
+ start. And, even if they exist, there was no "well-known" place which
+ kept track of them. These lines try to cover this lack. All documents
+ available on line known by the author are listed, while some reference
+ books are also mentioned.
+
+ PLEASE, if you know any paper not listed here or write a new document,
+ send me an e-mail, and I'll include a reference to it here. Any
+ corrections, ideas or comments are also welcomed.
+
+ The papers that follow are listed in no particular order. All are
+ cataloged with the following fields: the document's "Title", the
+ "Author"/s, the "URL" where they can be found, some "Keywords" helpful
+ when searching for specific topics, and a brief "Description" of the
+ Document.
+
+ Enjoy!
+
+ ON-LINE DOCS:
+
+ * Title: "Linux Device Drivers, Third Edition"
+ Author: Jonathan Corbet, Alessandro Rubini, Greg Kroah-Hartman
+ URL: http://lwn.net/Kernel/LDD3/
+ Description: A 600-page book covering the (2.6.10) driver
+ programming API and kernel hacking in general. Available under the
+ Creative Commons Attribution-ShareAlike 2.0 license.
+
+ * Title: "The Linux Kernel"
+ Author: David A. Rusling.
+ URL: http://www.tldp.org/LDP/tlk/tlk.html
+ Keywords: everything!, book.
+ Description: On line, 200 pages book describing most aspects of
+ the Linux Kernel. Probably, the first reference for beginners.
+ Lots of illustrations explaining data structures use and
+ relationships in the purest Richard W. Stevens' style. Contents:
+ "1.-Hardware Basics, 2.-Software Basics, 3.-Memory Management,
+ 4.-Processes, 5.-Interprocess Communication Mechanisms, 6.-PCI,
+ 7.-Interrupts and Interrupt Handling, 8.-Device Drivers, 9.-The
+ File system, 10.-Networks, 11.-Kernel Mechanisms, 12.-Modules,
+ 13.-The Linux Kernel Sources, A.-Linux Data Structures, B.-The
+ Alpha AXP Processor, C.-Useful Web and FTP Sites, D.-The GNU
+ General Public License, Glossary". In short: a must have.
+
+ * Title: "Linux Device Drivers, 2nd Edition"
+ Author: Alessandro Rubini and Jonathan Corbet.
+ URL: http://www.xml.com/ldd/chapter/book/index.html
+ Keywords: device drivers, modules, debugging, memory, hardware,
+ interrupt handling, char drivers, block drivers, kmod, mmap, DMA,
+ buses.
+ Description: O'Reilly's popular book, now also on-line under the
+ GNU Free Documentation License.
+ Notes: You can also buy it in paper-form from O'Reilly. See below
+ under BOOKS (Not on-line).
+
+ * Title: "Conceptual Architecture of the Linux Kernel"
+ Author: Ivan T. Bowman.
+ URL: http://plg.uwaterloo.ca/
+ Keywords: conceptual software architecture, extracted design,
+ reverse engineering, system structure.
+ Description: Conceptual software architecture of the Linux kernel,
+ automatically extracted from the source code. Very detailed. Good
+ figures. Gives good overall kernel understanding.
+
+ * Title: "Concrete Architecture of the Linux Kernel"
+ Author: Ivan T. Bowman, Saheem Siddiqi, and Meyer C. Tanuan.
+ URL: http://plg.uwaterloo.ca/
+ Keywords: concrete architecture, extracted design, reverse
+ engineering, system structure, dependencies.
+ Description: Concrete architecture of the Linux kernel,
+ automatically extracted from the source code. Very detailed. Good
+ figures. Gives good overall kernel understanding. This papers
+ focus on lower details than its predecessor (files, variables...).
+
+ * Title: "Linux as a Case Study: Its Extracted Software
+ Architecture"
+ Author: Ivan T. Bowman, Richard C. Holt and Neil V. Brewster.
+ URL: http://plg.uwaterloo.ca/
+ Keywords: software architecture, architecture recovery,
+ redocumentation.
+ Description: Paper appeared at ICSE'99, Los Angeles, May 16-22,
+ 1999. A mixture of the previous two documents from the same
+ author.
+
+ * Title: "Overview of the Virtual File System"
+ Author: Richard Gooch.
+ URL: http://www.mjmwired.net/kernel/Documentation/filesystems/vfs.txt
+ Keywords: VFS, File System, mounting filesystems, opening files,
+ dentries, dcache.
+ Description: Brief introduction to the Linux Virtual File System.
+ What is it, how it works, operations taken when opening a file or
+ mounting a file system and description of important data
+ structures explaining the purpose of each of their entries.
+
+ * Title: "The Linux RAID-1, 4, 5 Code"
+ Author: Ingo Molnar, Gadi Oxman and Miguel de Icaza.
+ URL: http://www.linuxjournal.com/article.php?sid=2391
+ Keywords: RAID, MD driver.
+ Description: Linux Journal Kernel Korner article. Here is its
+ abstract: "A description of the implementation of the RAID-1,
+ RAID-4 and RAID-5 personalities of the MD device driver in the
+ Linux kernel, providing users with high performance and reliable,
+ secondary-storage capability using software".
+
+ * Title: "Dynamic Kernels: Modularized Device Drivers"
+ Author: Alessandro Rubini.
+ URL: http://www.linuxjournal.com/article.php?sid=1219
+ Keywords: device driver, module, loading/unloading modules,
+ allocating resources.
+ Description: Linux Journal Kernel Korner article. Here is its
+ abstract: "This is the first of a series of four articles
+ co-authored by Alessandro Rubini and Georg Zezchwitz which present
+ a practical approach to writing Linux device drivers as kernel
+ loadable modules. This installment presents an introduction to the
+ topic, preparing the reader to understand next month's
+ installment".
+
+ * Title: "Dynamic Kernels: Discovery"
+ Author: Alessandro Rubini.
+ URL: http://www.linuxjournal.com/article.php?sid=1220
+ Keywords: character driver, init_module, clean_up module,
+ autodetection, mayor number, minor number, file operations,
+ open(), close().
+ Description: Linux Journal Kernel Korner article. Here is its
+ abstract: "This article, the second of four, introduces part of
+ the actual code to create custom module implementing a character
+ device driver. It describes the code for module initialization and
+ cleanup, as well as the open() and close() system calls".
+
+ * Title: "The Devil's in the Details"
+ Author: Georg v. Zezschwitz and Alessandro Rubini.
+ URL: http://www.linuxjournal.com/article.php?sid=1221
+ Keywords: read(), write(), select(), ioctl(), blocking/non
+ blocking mode, interrupt handler.
+ Description: Linux Journal Kernel Korner article. Here is its
+ abstract: "This article, the third of four on writing character
+ device drivers, introduces concepts of reading, writing, and using
+ ioctl-calls".
+
+ * Title: "Dissecting Interrupts and Browsing DMA"
+ Author: Alessandro Rubini and Georg v. Zezschwitz.
+ URL: http://www.linuxjournal.com/article.php?sid=1222
+ Keywords: interrupts, irqs, DMA, bottom halves, task queues.
+ Description: Linux Journal Kernel Korner article. Here is its
+ abstract: "This is the fourth in a series of articles about
+ writing character device drivers as loadable kernel modules. This
+ month, we further investigate the field of interrupt handling.
+ Though it is conceptually simple, practical limitations and
+ constraints make this an ``interesting'' part of device driver
+ writing, and several different facilities have been provided for
+ different situations. We also investigate the complex topic of
+ DMA".
+
+ * Title: "Device Drivers Concluded"
+ Author: Georg v. Zezschwitz.
+ URL: http://www.linuxjournal.com/article.php?sid=1287
+ Keywords: address spaces, pages, pagination, page management,
+ demand loading, swapping, memory protection, memory mapping, mmap,
+ virtual memory areas (VMAs), vremap, PCI.
+ Description: Finally, the above turned out into a five articles
+ series. This latest one's introduction reads: "This is the last of
+ five articles about character device drivers. In this final
+ section, Georg deals with memory mapping devices, beginning with
+ an overall description of the Linux memory management concepts".
+
+ * Title: "Network Buffers And Memory Management"
+ Author: Alan Cox.
+ URL: http://www.linuxjournal.com/article.php?sid=1312
+ Keywords: sk_buffs, network devices, protocol/link layer
+ variables, network devices flags, transmit, receive,
+ configuration, multicast.
+ Description: Linux Journal Kernel Korner. Here is the abstract:
+ "Writing a network device driver for Linux is fundamentally
+ simple---most of the complexity (other than talking to the
+ hardware) involves managing network packets in memory".
+
+ * Title: "Writing Linux Device Drivers"
+ Author: Michael K. Johnson.
+ URL: http://users.evitech.fi/~tk/rtos/writing_linux_device_d.html
+ Keywords: files, VFS, file operations, kernel interface, character
+ vs block devices, I/O access, hardware interrupts, DMA, access to
+ user memory, memory allocation, timers.
+ Description: Introductory 50-minutes (sic) tutorial on writing
+ device drivers. 12 pages written by the same author of the "Kernel
+ Hackers' Guide" which give a very good overview of the topic.
+
+ * Title: "The Venus kernel interface"
+ Author: Peter J. Braam.
+ URL:
+ http://www.coda.cs.cmu.edu/doc/html/kernel-venus-protocol.html
+ Keywords: coda, filesystem, venus, cache manager.
+ Description: "This document describes the communication between
+ Venus and kernel level file system code needed for the operation
+ of the Coda filesystem. This version document is meant to describe
+ the current interface (version 1.0) as well as improvements we
+ envisage".
+
+ * Title: "Programming PCI-Devices under Linux"
+ Author: Claus Schroeter.
+ URL:
+ ftp://ftp.llp.fu-berlin.de/pub/linux/LINUX-LAB/whitepapers/pcip.ps.gz
+ Keywords: PCI, device, busmastering.
+ Description: 6 pages tutorial on PCI programming under Linux.
+ Gives the basic concepts on the architecture of the PCI subsystem,
+ as long as basic functions and macros to read/write the devices
+ and perform busmastering.
+
+ * Title: "Writing Character Device Driver for Linux"
+ Author: R. Baruch and C. Schroeter.
+ URL:
+ ftp://ftp.llp.fu-berlin.de/pub/linux/LINUX-LAB/whitepapers/drivers.ps.gz
+ Keywords: character device drivers, I/O, signals, DMA, accessing
+ ports in user space, kernel environment.
+ Description: 68 pages paper on writing character drivers. A little
+ bit old (1.993, 1.994) although still useful.
+
+ * Title: "Design and Implementation of the Second Extended
+ Filesystem"
+ Author: Rémy Card, Theodore Ts'o, Stephen Tweedie.
+ URL: http://web.mit.edu/tytso/www/linux/ext2intro.html
+ Keywords: ext2, linux fs history, inode, directory, link, devices,
+ VFS, physical structure, performance, benchmarks, ext2fs library,
+ ext2fs tools, e2fsck.
+ Description: Paper written by three of the top ext2 hackers.
+ Covers Linux filesystems history, ext2 motivation, ext2 features,
+ design, physical structure on disk, performance, benchmarks,
+ e2fsck's passes description... A must read!
+ Notes: This paper was first published in the Proceedings of the
+ First Dutch International Symposium on Linux, ISBN 90-367-0385-9.
+
+ * Title: "Analysis of the Ext2fs structure"
+ Author: Louis-Dominique Dubeau.
+ URL: http://www.nondot.org/sabre/os/files/FileSystems/ext2fs/
+ Keywords: ext2, filesystem, ext2fs.
+ Description: Description of ext2's blocks, directories, inodes,
+ bitmaps, invariants...
+
+ * Title: "Journaling the Linux ext2fs Filesystem"
+ Author: Stephen C. Tweedie.
+ URL:
+ ftp://ftp.uk.linux.org/pub/linux/sct/fs/jfs/journal-design.ps.gz
+ Keywords: ext3, journaling.
+ Description: Excellent 8-pages paper explaining the journaling
+ capabilities added to ext2 by the author, showing different
+ problems faced and the alternatives chosen.
+
+ * Title: "Kernel API changes from 2.0 to 2.2"
+ Author: Richard Gooch.
+ URL:
+ http://www.linuxhq.com/guides/LKMPG/node28.html
+ Keywords: 2.2, changes.
+ Description: Kernel functions/structures/variables which changed
+ from 2.0.x to 2.2.x.
+
+ * Title: "Kernel API changes from 2.2 to 2.4"
+ Author: Richard Gooch.
+ Keywords: 2.4, changes.
+ Description: Kernel functions/structures/variables which changed
+ from 2.2.x to 2.4.x.
+
+ * Title: "Linux Kernel Module Programming Guide"
+ Author: Ori Pomerantz.
+ URL: http://tldp.org/LDP/lkmpg/2.6/html/index.html
+ Keywords: modules, GPL book, /proc, ioctls, system calls,
+ interrupt handlers .
+ Description: Very nice 92 pages GPL book on the topic of modules
+ programming. Lots of examples.
+
+ * Title: "I/O Event Handling Under Linux"
+ Author: Richard Gooch.
+ Keywords: IO, I/O, select(2), poll(2), FDs, aio_read(2), readiness
+ event queues.
+ Description: From the Introduction: "I/O Event handling is about
+ how your Operating System allows you to manage a large number of
+ open files (file descriptors in UNIX/POSIX, or FDs) in your
+ application. You want the OS to notify you when FDs become active
+ (have data ready to be read or are ready for writing). Ideally you
+ want a mechanism that is scalable. This means a large number of
+ inactive FDs cost very little in memory and CPU time to manage".
+
+ * Title: "The Kernel Hacking HOWTO"
+ Author: Various Talented People, and Rusty.
+ Location: in kernel tree, Documentation/DocBook/kernel-hacking.tmpl
+ (must be built as "make {htmldocs | psdocs | pdfdocs})
+ Keywords: HOWTO, kernel contexts, deadlock, locking, modules,
+ symbols, return conventions.
+ Description: From the Introduction: "Please understand that I
+ never wanted to write this document, being grossly underqualified,
+ but I always wanted to read it, and this was the only way. I
+ simply explain some best practices, and give reading entry-points
+ into the kernel sources. I avoid implementation details: that's
+ what the code is for, and I ignore whole tracts of useful
+ routines. This document assumes familiarity with C, and an
+ understanding of what the kernel is, and how it is used. It was
+ originally written for the 2.3 kernels, but nearly all of it
+ applies to 2.2 too; 2.0 is slightly different".
+
+ * Title: "Writing an ALSA Driver"
+ Author: Takashi Iwai <tiwai@suse.de>
+ URL: http://www.alsa-project.org/~iwai/writing-an-alsa-driver/index.html
+ Keywords: ALSA, sound, soundcard, driver, lowlevel, hardware.
+ Description: Advanced Linux Sound Architecture for developers,
+ both at kernel and user-level sides. ALSA is the Linux kernel
+ sound architecture in the 2.6 kernel version.
+
+ * Title: "Programming Guide for Linux USB Device Drivers"
+ Author: Detlef Fliegl.
+ URL: http://usb.in.tum.de/usbdoc/
+ Keywords: USB, universal serial bus.
+ Description: A must-read. From the Preface: "This document should
+ give detailed information about the current state of the USB
+ subsystem and its API for USB device drivers. The first section
+ will deal with the basics of USB devices. You will learn about
+ different types of devices and their properties. Going into detail
+ you will see how USB devices communicate on the bus. The second
+ section gives an overview of the Linux USB subsystem [2] and the
+ device driver framework. Then the API and its data structures will
+ be explained step by step. The last section of this document
+ contains a reference of all API calls and their return codes".
+ Notes: Beware: the main page states: "This document may not be
+ published, printed or used in excerpts without explicit permission
+ of the author". Fortunately, it may still be read...
+
+ * Title: "Linux Kernel Mailing List Glossary"
+ Author: various
+ URL: http://kernelnewbies.org/glossary/
+ Keywords: glossary, terms, linux-kernel.
+ Description: From the introduction: "This glossary is intended as
+ a brief description of some of the acronyms and terms you may hear
+ during discussion of the Linux kernel".
+
+ * Title: "Linux Kernel Locking HOWTO"
+ Author: Various Talented People, and Rusty.
+ Location: in kernel tree, Documentation/DocBook/kernel-locking.tmpl
+ (must be built as "make {htmldocs | psdocs | pdfdocs})
+ Keywords: locks, locking, spinlock, semaphore, atomic, race
+ condition, bottom halves, tasklets, softirqs.
+ Description: The title says it all: document describing the
+ locking system in the Linux Kernel either in uniprocessor or SMP
+ systems.
+ Notes: "It was originally written for the later (>2.3.47) 2.3
+ kernels, but most of it applies to 2.2 too; 2.0 is slightly
+ different". Freely redistributable under the conditions of the GNU
+ General Public License.
+
+ * Title: "Global spinlock list and usage"
+ Author: Rick Lindsley.
+ URL: http://lse.sourceforge.net/lockhier/global-spin-lock
+ Keywords: spinlock.
+ Description: This is an attempt to document both the existence and
+ usage of the spinlocks in the Linux 2.4.5 kernel. Comprehensive
+ list of spinlocks showing when they are used, which functions
+ access them, how each lock is acquired, under what conditions it
+ is held, whether interrupts can occur or not while it is held...
+
+ * Title: "Porting Linux 2.0 Drivers To Linux 2.2: Changes and New
+ Features "
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/1999-05/gear_01.html
+ Keywords: ports, porting.
+ Description: Article from Linux Magazine on porting from 2.0 to
+ 2.2 kernels.
+
+ * Title: "Porting Device Drivers To Linux 2.2: part II"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/238
+ Keywords: ports, porting.
+ Description: Second part on porting from 2.0 to 2.2 kernels.
+
+ * Title: "How To Make Sure Your Driver Will Work On The Power
+ Macintosh"
+ Author: Paul Mackerras.
+ URL: http://www.linux-mag.com/id/261
+ Keywords: Mac, Power Macintosh, porting, drivers, compatibility.
+ Description: The title says it all.
+
+ * Title: "An Introduction to SCSI Drivers"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/284
+ Keywords: SCSI, device, driver.
+ Description: The title says it all.
+
+ * Title: "Advanced SCSI Drivers And Other Tales"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/307
+ Keywords: SCSI, device, driver, advanced.
+ Description: The title says it all.
+
+ * Title: "Writing Linux Mouse Drivers"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/330
+ Keywords: mouse, driver, gpm.
+ Description: The title says it all.
+
+ * Title: "More on Mouse Drivers"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/356
+ Keywords: mouse, driver, gpm, races, asynchronous I/O.
+ Description: The title still says it all.
+
+ * Title: "Writing Video4linux Radio Driver"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/381
+ Keywords: video4linux, driver, radio, radio devices.
+ Description: The title says it all.
+
+ * Title: "Video4linux Drivers, Part 1: Video-Capture Device"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/406
+ Keywords: video4linux, driver, video capture, capture devices,
+ camera driver.
+ Description: The title says it all.
+
+ * Title: "Video4linux Drivers, Part 2: Video-capture Devices"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/429
+ Keywords: video4linux, driver, video capture, capture devices,
+ camera driver, control, query capabilities, capability, facility.
+ Description: The title says it all.
+
+ * Title: "PCI Management in Linux 2.2"
+ Author: Alan Cox.
+ URL: http://www.linux-mag.com/id/452
+ Keywords: PCI, bus, bus-mastering.
+ Description: The title says it all.
+
+ * Title: "Linux 2.4 Kernel Internals"
+ Author: Tigran Aivazian and Christoph Hellwig.
+ URL: http://www.moses.uklinux.net/patches/lki.html
+ Keywords: Linux, kernel, booting, SMB boot, VFS, page cache.
+ Description: A little book used for a short training course.
+ Covers building the kernel image, booting (including SMP bootup),
+ process management, VFS and more.
+
+ * Title: "Linux IP Networking. A Guide to the Implementation and
+ Modification of the Linux Protocol Stack."
+ Author: Glenn Herrin.
+ URL: http://www.cs.unh.edu/cnrg/gherrin
+ Keywords: network, networking, protocol, IP, UDP, TCP, connection,
+ socket, receiving, transmitting, forwarding, routing, packets,
+ modules, /proc, sk_buff, FIB, tags.
+ Description: Excellent paper devoted to the Linux IP Networking,
+ explaining anything from the kernel's to the user space
+ configuration tools' code. Very good to get a general overview of
+ the kernel networking implementation and understand all steps
+ packets follow from the time they are received at the network
+ device till they are delivered to applications. The studied kernel
+ code is from 2.2.14 version. Provides code for a working packet
+ dropper example.
+
+ * Title: "Get those boards talking under Linux."
+ Author: Alex Ivchenko.
+ URL: http://www.edn.com/article/CA46968.html
+ Keywords: data-acquisition boards, drivers, modules, interrupts,
+ memory allocation.
+ Description: Article written for people wishing to make their data
+ acquisition boards work on their GNU/Linux machines. Gives a basic
+ overview on writing drivers, from the naming of functions to
+ interrupt handling.
+ Notes: Two-parts article. Part II is at
+ URL: http://www.edn.com/article/CA46998.html
+
+ * Title: "Linux PCMCIA Programmer's Guide"
+ Author: David Hinds.
+ URL: http://pcmcia-cs.sourceforge.net/ftp/doc/PCMCIA-PROG.html
+ Keywords: PCMCIA.
+ Description: "This document describes how to write kernel device
+ drivers for the Linux PCMCIA Card Services interface. It also
+ describes how to write user-mode utilities for communicating with
+ Card Services.
+
+ * Title: "The Linux Kernel NFSD Implementation"
+ Author: Neil Brown.
+ URL:
+ http://www.cse.unsw.edu.au/~neilb/oss/linux-commentary/nfsd.html
+ Keywords: knfsd, nfsd, NFS, RPC, lockd, mountd, statd.
+ Description: The title says it all.
+ Notes: Covers knfsd's version 1.4.7 (patch against 2.2.7 kernel).
+
+ * Title: "A Linux vm README"
+ Author: Kanoj Sarcar.
+ URL: http://kos.enix.org/pub/linux-vmm.html
+ Keywords: virtual memory, mm, pgd, vma, page, page flags, page
+ cache, swap cache, kswapd.
+ Description: Telegraphic, short descriptions and definitions
+ relating the Linux virtual memory implementation.
+
+ * Title: "(nearly) Complete Linux Loadable Kernel Modules. The
+ definitive guide for hackers, virus coders and system
+ administrators."
+ Author: pragmatic/THC.
+ URL: http://packetstormsecurity.org/docs/hack/LKM_HACKING.html
+ Keywords: syscalls, intercept, hide, abuse, symbol table.
+ Description: Interesting paper on how to abuse the Linux kernel in
+ order to intercept and modify syscalls, make
+ files/directories/processes invisible, become root, hijack ttys,
+ write kernel modules based virus... and solutions for admins to
+ avoid all those abuses.
+ Notes: For 2.0.x kernels. Gives guidances to port it to 2.2.x
+ kernels.
+
+ BOOKS: (Not on-line)
+
+ * Title: "Linux Device Drivers"
+ Author: Alessandro Rubini.
+ Publisher: O'Reilly & Associates.
+ Date: 1998.
+ Pages: 439.
+ ISBN: 1-56592-292-1
+
+ * Title: "Linux Device Drivers, 2nd Edition"
+ Author: Alessandro Rubini and Jonathan Corbet.
+ Publisher: O'Reilly & Associates.
+ Date: 2001.
+ Pages: 586.
+ ISBN: 0-59600-008-1
+ Notes: Further information in
+ http://www.oreilly.com/catalog/linuxdrive2/
+
+ * Title: "Linux Device Drivers, 3rd Edition"
+ Authors: Jonathan Corbet, Alessandro Rubini, and Greg Kroah-Hartman
+ Publisher: O'Reilly & Associates.
+ Date: 2005.
+ Pages: 636.
+ ISBN: 0-596-00590-3
+ Notes: Further information in
+ http://www.oreilly.com/catalog/linuxdrive3/
+ PDF format, URL: http://lwn.net/Kernel/LDD3/
+
+ * Title: "Linux Kernel Internals"
+ Author: Michael Beck.
+ Publisher: Addison-Wesley.
+ Date: 1997.
+ ISBN: 0-201-33143-8 (second edition)
+
+ * Title: "The Design of the UNIX Operating System"
+ Author: Maurice J. Bach.
+ Publisher: Prentice Hall.
+ Date: 1986.
+ Pages: 471.
+ ISBN: 0-13-201757-1
+
+ * Title: "The Design and Implementation of the 4.3 BSD UNIX
+ Operating System"
+ Author: Samuel J. Leffler, Marshall Kirk McKusick, Michael J.
+ Karels, John S. Quarterman.
+ Publisher: Addison-Wesley.
+ Date: 1989 (reprinted with corrections on October, 1990).
+ ISBN: 0-201-06196-1
+
+ * Title: "The Design and Implementation of the 4.4 BSD UNIX
+ Operating System"
+ Author: Marshall Kirk McKusick, Keith Bostic, Michael J. Karels,
+ John S. Quarterman.
+ Publisher: Addison-Wesley.
+ Date: 1996.
+ ISBN: 0-201-54979-4
+
+ * Title: "Programmation Linux 2.0 API systeme et fonctionnement du
+ noyau"
+ Author: Remy Card, Eric Dumas, Franck Mevel.
+ Publisher: Eyrolles.
+ Date: 1997.
+ Pages: 520.
+ ISBN: 2-212-08932-5
+ Notes: French.
+
+ * Title: "Unix internals -- the new frontiers"
+ Author: Uresh Vahalia.
+ Publisher: Prentice Hall.
+ Date: 1996.
+ Pages: 600.
+ ISBN: 0-13-101908-2
+
+ * Title: "Programming for the real world - POSIX.4"
+ Author: Bill O. Gallmeister.
+ Publisher: O'Reilly & Associates, Inc..
+ Date: 1995.
+ Pages: ???.
+ ISBN: I-56592-074-0
+ Notes: Though not being directly about Linux, Linux aims to be
+ POSIX. Good reference.
+
+ * Title: "UNIX Systems for Modern Architectures: Symmetric
+ Multiprocessing and Caching for Kernel Programmers"
+ Author: Curt Schimmel.
+ Publisher: Addison Wesley.
+ Date: June, 1994.
+ Pages: 432.
+ ISBN: 0-201-63338-8
+
+ MISCELLANEOUS:
+
+ * Name: linux/Documentation
+ Author: Many.
+ URL: Just look inside your kernel sources.
+ Keywords: anything, DocBook.
+ Description: Documentation that comes with the kernel sources,
+ inside the Documentation directory. Some pages from this document
+ (including this document itself) have been moved there, and might
+ be more up to date than the web version.
+
+ * Name: "Linux Kernel Source Reference"
+ Author: Thomas Graichen.
+ URL: http://marc.info/?l=linux-kernel&m=96446640102205&w=4
+ Keywords: CVS, web, cvsweb, browsing source code.
+ Description: Web interface to a CVS server with the kernel
+ sources. "Here you can have a look at any file of the Linux kernel
+ sources of any version starting from 1.0 up to the (daily updated)
+ current version available. Also you can check the differences
+ between two versions of a file".
+
+ * Name: "Cross-Referencing Linux"
+ URL: http://lxr.linux.no/source/
+ Keywords: Browsing source code.
+ Description: Another web-based Linux kernel source code browser.
+ Lots of cross references to variables and functions. You can see
+ where they are defined and where they are used.
+
+ * Name: "Linux Weekly News"
+ URL: http://lwn.net
+ Keywords: latest kernel news.
+ Description: The title says it all. There's a fixed kernel section
+ summarizing developers' work, bug fixes, new features and versions
+ produced during the week. Published every Thursday.
+
+ * Name: "Kernel Traffic"
+ URL: http://kt.earth.li/kernel-traffic/index.html
+ Keywords: linux-kernel mailing list, weekly kernel news.
+ Description: Weekly newsletter covering the most relevant
+ discussions of the linux-kernel mailing list.
+
+ * Name: "CuTTiNG.eDGe.LiNuX"
+ URL: http://edge.kernelnotes.org
+ Keywords: changelist.
+ Description: Site which provides the changelist for every kernel
+ release. What's new, what's better, what's changed. Myrdraal reads
+ the patches and describes them. Pointers to the patches are there,
+ too.
+
+ * Name: "New linux-kernel Mailing List FAQ"
+ URL: http://www.tux.org/lkml/
+ Keywords: linux-kernel mailing list FAQ.
+ Description: linux-kernel is a mailing list for developers to
+ communicate. This FAQ builds on the previous linux-kernel mailing
+ list FAQ maintained by Frohwalt Egerer, who no longer maintains
+ it. Read it to see how to join the mailing list. Dozens of
+ interesting questions regarding the list, Linux, developers (who
+ is ...?), terms (what is...?) are answered here too. Just read it.
+
+ * Name: "Linux Virtual File System"
+ Author: Peter J. Braam.
+ URL: http://www.coda.cs.cmu.edu/doc/talks/linuxvfs/
+ Keywords: slides, VFS, inode, superblock, dentry, dcache.
+ Description: Set of slides, presumably from a presentation on the
+ Linux VFS layer. Covers version 2.1.x, with dentries and the
+ dcache.
+
+ * Name: "Gary's Encyclopedia - The Linux Kernel"
+ Author: Gary (I suppose...).
+ URL: http://slencyclopedia.berlios.de/index.html
+ Keywords: linux, community, everything!
+ Description: Gary's Encyclopedia exists to allow the rapid finding
+ of documentation and other information of interest to GNU/Linux
+ users. It has about 4000 links to external pages in 150 major
+ categories. This link is for kernel-specific links, documents,
+ sites... This list is now hosted by developer.Berlios.de,
+ but seems not to have been updated since sometime in 1999.
+
+ * Name: "The home page of Linux-MM"
+ Author: The Linux-MM team.
+ URL: http://linux-mm.org/
+ Keywords: memory management, Linux-MM, mm patches, TODO, docs,
+ mailing list.
+ Description: Site devoted to Linux Memory Management development.
+ Memory related patches, HOWTOs, links, mm developers... Don't miss
+ it if you are interested in memory management development!
+
+ * Name: "Kernel Newbies IRC Channel"
+ URL: http://www.kernelnewbies.org
+ Keywords: IRC, newbies, channel, asking doubts.
+ Description: #kernelnewbies on irc.openprojects.net. From the web
+ page: "#kernelnewbies is an IRC network dedicated to the 'newbie'
+ kernel hacker. The audience mostly consists of people who are
+ learning about the kernel, working on kernel projects or
+ professional kernel hackers that want to help less seasoned kernel
+ people. [...] #kernelnewbies is on the Open Projects IRC Network,
+ try irc.openprojects.net or irc.<country>.openprojects.net as your
+ server and then /join #kernelnewbies". It also hosts articles,
+ documents, FAQs...
+
+ * Name: "linux-kernel mailing list archives and search engines"
+ URL: http://vger.kernel.org/vger-lists.html
+ URL: http://www.uwsg.indiana.edu/hypermail/linux/kernel/index.html
+ URL: http://marc.theaimsgroup.com/?l=linux-kernel
+ URL: http://groups.google.com/group/mlist.linux.kernel
+ URL: http://www.cs.helsinki.fi/linux/linux-kernel/
+ URL: http://www.lib.uaa.alaska.edu/linux-kernel/
+ Keywords: linux-kernel, archives, search.
+ Description: Some of the linux-kernel mailing list archivers. If
+ you have a better/another one, please let me know.
+ _________________________________________________________________
+
+ Document last updated on Sat 2005-NOV-19
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
new file mode 100644
index 000000000..d6522d892
--- /dev/null
+++ b/Documentation/kernel-parameters.txt
@@ -0,0 +1,4062 @@
+ Kernel Parameters
+ ~~~~~~~~~~~~~~~~~
+
+The following is a consolidated list of the kernel parameters as
+implemented by the __setup(), core_param() and module_param() macros
+and sorted into English Dictionary order (defined as ignoring all
+punctuation and sorting digits before letters in a case insensitive
+manner), and with descriptions where known.
+
+The kernel parses parameters from the kernel command line up to "--";
+if it doesn't recognize a parameter and it doesn't contain a '.', the
+parameter gets passed to init: parameters with '=' go into init's
+environment, others are passed as command line arguments to init.
+Everything after "--" is passed as an argument to init.
+
+Module parameters can be specified in two ways: via the kernel command
+line with a module name prefix, or via modprobe, e.g.:
+
+ (kernel command line) usbcore.blinkenlights=1
+ (modprobe command line) modprobe usbcore blinkenlights=1
+
+Parameters for modules which are built into the kernel need to be
+specified on the kernel command line. modprobe looks through the
+kernel command line (/proc/cmdline) and collects module parameters
+when it loads a module, so the kernel command line can be used for
+loadable modules too.
+
+Hyphens (dashes) and underscores are equivalent in parameter names, so
+ log_buf_len=1M print-fatal-signals=1
+can also be entered as
+ log-buf-len=1M print_fatal_signals=1
+
+Double-quotes can be used to protect spaces in values, e.g.:
+ param="spaces in here"
+
+This document may not be entirely up to date and comprehensive. The command
+"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
+module. Loadable modules, after being loaded into the running kernel, also
+reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
+parameters may be changed at runtime by the command
+"echo -n ${value} > /sys/module/${modulename}/parameters/${parm}".
+
+The parameters listed below are only valid if certain kernel build options were
+enabled and if respective hardware is present. The text in square brackets at
+the beginning of each description states the restrictions within which a
+parameter is applicable:
+
+ ACPI ACPI support is enabled.
+ AGP AGP (Accelerated Graphics Port) is enabled.
+ ALSA ALSA sound support is enabled.
+ APIC APIC support is enabled.
+ APM Advanced Power Management support is enabled.
+ ARM ARM architecture is enabled.
+ AVR32 AVR32 architecture is enabled.
+ AX25 Appropriate AX.25 support is enabled.
+ BLACKFIN Blackfin architecture is enabled.
+ CLK Common clock infrastructure is enabled.
+ CMA Contiguous Memory Area support is enabled.
+ DRM Direct Rendering Management support is enabled.
+ DYNAMIC_DEBUG Build in debug messages and enable them at runtime
+ EDD BIOS Enhanced Disk Drive Services (EDD) is enabled
+ EFI EFI Partitioning (GPT) is enabled
+ EIDE EIDE/ATAPI support is enabled.
+ EVM Extended Verification Module
+ FB The frame buffer device is enabled.
+ FTRACE Function tracing enabled.
+ GCOV GCOV profiling is enabled.
+ HW Appropriate hardware is enabled.
+ IA-64 IA-64 architecture is enabled.
+ IMA Integrity measurement architecture is enabled.
+ IOSCHED More than one I/O scheduler is enabled.
+ IP_PNP IP DHCP, BOOTP, or RARP is enabled.
+ IPV6 IPv6 support is enabled.
+ ISAPNP ISA PnP code is enabled.
+ ISDN Appropriate ISDN support is enabled.
+ JOY Appropriate joystick support is enabled.
+ KGDB Kernel debugger support is enabled.
+ KVM Kernel Virtual Machine support is enabled.
+ LIBATA Libata driver is enabled
+ LP Printer support is enabled.
+ LOOP Loopback device support is enabled.
+ M68k M68k architecture is enabled.
+ These options have more detailed description inside of
+ Documentation/m68k/kernel-options.txt.
+ MDA MDA console support is enabled.
+ MIPS MIPS architecture is enabled.
+ MOUSE Appropriate mouse support is enabled.
+ MSI Message Signaled Interrupts (PCI).
+ MTD MTD (Memory Technology Device) support is enabled.
+ NET Appropriate network support is enabled.
+ NUMA NUMA support is enabled.
+ NFS Appropriate NFS support is enabled.
+ OSS OSS sound support is enabled.
+ PV_OPS A paravirtualized kernel is enabled.
+ PARIDE The ParIDE (parallel port IDE) subsystem is enabled.
+ PARISC The PA-RISC architecture is enabled.
+ PCI PCI bus support is enabled.
+ PCIE PCI Express support is enabled.
+ PCMCIA The PCMCIA subsystem is enabled.
+ PNP Plug & Play support is enabled.
+ PPC PowerPC architecture is enabled.
+ PPT Parallel port support is enabled.
+ PS2 Appropriate PS/2 support is enabled.
+ RAM RAM disk support is enabled.
+ S390 S390 architecture is enabled.
+ SCSI Appropriate SCSI support is enabled.
+ A lot of drivers have their options described inside
+ the Documentation/scsi/ sub-directory.
+ SECURITY Different security models are enabled.
+ SELINUX SELinux support is enabled.
+ APPARMOR AppArmor support is enabled.
+ SERIAL Serial support is enabled.
+ SH SuperH architecture is enabled.
+ SMP The kernel is an SMP kernel.
+ SPARC Sparc architecture is enabled.
+ SWSUSP Software suspend (hibernation) is enabled.
+ SUSPEND System suspend states are enabled.
+ TPM TPM drivers are enabled.
+ TS Appropriate touchscreen support is enabled.
+ UMS USB Mass Storage support is enabled.
+ USB USB support is enabled.
+ USBHID USB Human Interface Device support is enabled.
+ V4L Video For Linux support is enabled.
+ VMMIO Driver for memory mapped virtio devices is enabled.
+ VGA The VGA console has been enabled.
+ VT Virtual terminal support is enabled.
+ WDT Watchdog support is enabled.
+ XT IBM PC/XT MFM hard disk support is enabled.
+ X86-32 X86-32, aka i386 architecture is enabled.
+ X86-64 X86-64 architecture is enabled.
+ More X86-64 boot options can be found in
+ Documentation/x86/x86_64/boot-options.txt .
+ X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64)
+ XEN Xen support is enabled
+
+In addition, the following text indicates that the option:
+
+ BUGS= Relates to possible processor bugs on the said processor.
+ KNL Is a kernel start-up parameter.
+ BOOT Is a boot loader parameter.
+
+Parameters denoted with BOOT are actually interpreted by the boot
+loader, and have no meaning to the kernel directly.
+Do not modify the syntax of boot loader parameters without extreme
+need or coordination with <Documentation/x86/boot.txt>.
+
+There are also arch-specific kernel-parameters not documented here.
+See for example <Documentation/x86/x86_64/boot-options.txt>.
+
+Note that ALL kernel parameters listed below are CASE SENSITIVE, and that
+a trailing = on the name of any parameter states that that parameter will
+be entered as an environment variable, whereas its absence indicates that
+it will appear as a kernel argument readable via /proc/cmdline by programs
+running once the system is up.
+
+The number of kernel parameters is not limited, but the length of the
+complete command line (parameters including spaces etc.) is limited to
+a fixed number of characters. This limit depends on the architecture
+and is between 256 and 4096 characters. It is defined in the file
+./include/asm/setup.h as COMMAND_LINE_SIZE.
+
+Finally, the [KMG] suffix is commonly described after a number of kernel
+parameter values. These 'K', 'M', and 'G' letters represent the _binary_
+multipliers 'Kilo', 'Mega', and 'Giga', equalling 2^10, 2^20, and 2^30
+bytes respectively. Such letter suffixes can also be entirely omitted.
+
+
+ acpi= [HW,ACPI,X86,ARM64]
+ Advanced Configuration and Power Interface
+ Format: { force | off | strict | noirq | rsdt }
+ force -- enable ACPI if default was off
+ off -- disable ACPI if default was on
+ noirq -- do not use ACPI for IRQ routing
+ strict -- Be less tolerant of platforms that are not
+ strictly ACPI specification compliant.
+ rsdt -- prefer RSDT over (default) XSDT
+ copy_dsdt -- copy DSDT to memory
+ For ARM64, ONLY "acpi=off" or "acpi=force" are available
+
+ See also Documentation/power/runtime_pm.txt, pci=noacpi
+
+ acpi_rsdp= [ACPI,EFI,KEXEC]
+ Pass the RSDP address to the kernel, mostly used
+ on machines running EFI runtime service to boot the
+ second kernel for kdump.
+
+ acpi_apic_instance= [ACPI, IOAPIC]
+ Format: <int>
+ 2: use 2nd APIC table, if available
+ 1,0: use 1st APIC table
+ default: 0
+
+ acpi_backlight= [HW,ACPI]
+ acpi_backlight=vendor
+ acpi_backlight=video
+ If set to vendor, prefer vendor specific driver
+ (e.g. thinkpad_acpi, sony_acpi, etc.) instead
+ of the ACPI video.ko driver.
+
+ acpi.debug_layer= [HW,ACPI,ACPI_DEBUG]
+ acpi.debug_level= [HW,ACPI,ACPI_DEBUG]
+ Format: <int>
+ CONFIG_ACPI_DEBUG must be enabled to produce any ACPI
+ debug output. Bits in debug_layer correspond to a
+ _COMPONENT in an ACPI source file, e.g.,
+ #define _COMPONENT ACPI_PCI_COMPONENT
+ Bits in debug_level correspond to a level in
+ ACPI_DEBUG_PRINT statements, e.g.,
+ ACPI_DEBUG_PRINT((ACPI_DB_INFO, ...
+ The debug_level mask defaults to "info". See
+ Documentation/acpi/debug.txt for more information about
+ debug layers and levels.
+
+ Enable processor driver info messages:
+ acpi.debug_layer=0x20000000
+ Enable PCI/PCI interrupt routing info messages:
+ acpi.debug_layer=0x400000
+ Enable AML "Debug" output, i.e., stores to the Debug
+ object while interpreting AML:
+ acpi.debug_layer=0xffffffff acpi.debug_level=0x2
+ Enable all messages related to ACPI hardware:
+ acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
+
+ Some values produce so much output that the system is
+ unusable. The "log_buf_len" parameter may be useful
+ if you need to capture more output.
+
+ acpi_force_table_verification [HW,ACPI]
+ Enable table checksum verification during early stage.
+ By default, this is disabled due to x86 early mapping
+ size limitation.
+
+ acpi_irq_balance [HW,ACPI]
+ ACPI will balance active IRQs
+ default in APIC mode
+
+ acpi_irq_nobalance [HW,ACPI]
+ ACPI will not move active IRQs (default)
+ default in PIC mode
+
+ acpi_irq_isa= [HW,ACPI] If irq_balance, mark listed IRQs used by ISA
+ Format: <irq>,<irq>...
+
+ acpi_irq_pci= [HW,ACPI] If irq_balance, clear listed IRQs for
+ use by PCI
+ Format: <irq>,<irq>...
+
+ acpi_no_auto_serialize [HW,ACPI]
+ Disable auto-serialization of AML methods
+ AML control methods that contain the opcodes to create
+ named objects will be marked as "Serialized" by the
+ auto-serialization feature.
+ This feature is enabled by default.
+ This option allows to turn off the feature.
+
+ acpi_no_static_ssdt [HW,ACPI]
+ Disable installation of static SSDTs at early boot time
+ By default, SSDTs contained in the RSDT/XSDT will be
+ installed automatically and they will appear under
+ /sys/firmware/acpi/tables.
+ This option turns off this feature.
+ Note that specifying this option does not affect
+ dynamic table installation which will install SSDT
+ tables to /sys/firmware/acpi/tables/dynamic.
+
+ acpica_no_return_repair [HW, ACPI]
+ Disable AML predefined validation mechanism
+ This mechanism can repair the evaluation result to make
+ the return objects more ACPI specification compliant.
+ This option is useful for developers to identify the
+ root cause of an AML interpreter issue when the issue
+ has something to do with the repair mechanism.
+
+ acpi_os_name= [HW,ACPI] Tell ACPI BIOS the name of the OS
+ Format: To spoof as Windows 98: ="Microsoft Windows"
+
+ acpi_osi= [HW,ACPI] Modify list of supported OS interface strings
+ acpi_osi="string1" # add string1
+ acpi_osi="!string2" # remove string2
+ acpi_osi=!* # remove all strings
+ acpi_osi=! # disable all built-in OS vendor
+ strings
+ acpi_osi= # disable all strings
+
+ 'acpi_osi=!' can be used in combination with single or
+ multiple 'acpi_osi="string1"' to support specific OS
+ vendor string(s). Note that such command can only
+ affect the default state of the OS vendor strings, thus
+ it cannot affect the default state of the feature group
+ strings and the current state of the OS vendor strings,
+ specifying it multiple times through kernel command line
+ is meaningless. This command is useful when one do not
+ care about the state of the feature group strings which
+ should be controlled by the OSPM.
+ Examples:
+ 1. 'acpi_osi=! acpi_osi="Windows 2000"' is equivalent
+ to 'acpi_osi="Windows 2000" acpi_osi=!', they all
+ can make '_OSI("Windows 2000")' TRUE.
+
+ 'acpi_osi=' cannot be used in combination with other
+ 'acpi_osi=' command lines, the _OSI method will not
+ exist in the ACPI namespace. NOTE that such command can
+ only affect the _OSI support state, thus specifying it
+ multiple times through kernel command line is also
+ meaningless.
+ Examples:
+ 1. 'acpi_osi=' can make 'CondRefOf(_OSI, Local1)'
+ FALSE.
+
+ 'acpi_osi=!*' can be used in combination with single or
+ multiple 'acpi_osi="string1"' to support specific
+ string(s). Note that such command can affect the
+ current state of both the OS vendor strings and the
+ feature group strings, thus specifying it multiple times
+ through kernel command line is meaningful. But it may
+ still not able to affect the final state of a string if
+ there are quirks related to this string. This command
+ is useful when one want to control the state of the
+ feature group strings to debug BIOS issues related to
+ the OSPM features.
+ Examples:
+ 1. 'acpi_osi="Module Device" acpi_osi=!*' can make
+ '_OSI("Module Device")' FALSE.
+ 2. 'acpi_osi=!* acpi_osi="Module Device"' can make
+ '_OSI("Module Device")' TRUE.
+ 3. 'acpi_osi=! acpi_osi=!* acpi_osi="Windows 2000"' is
+ equivalent to
+ 'acpi_osi=!* acpi_osi=! acpi_osi="Windows 2000"'
+ and
+ 'acpi_osi=!* acpi_osi="Windows 2000" acpi_osi=!',
+ they all will make '_OSI("Windows 2000")' TRUE.
+
+ acpi_pm_good [X86]
+ Override the pmtimer bug detection: force the kernel
+ to assume that this machine's pmtimer latches its value
+ and always returns good values.
+
+ acpi_sci= [HW,ACPI] ACPI System Control Interrupt trigger mode
+ Format: { level | edge | high | low }
+
+ acpi_skip_timer_override [HW,ACPI]
+ Recognize and ignore IRQ0/pin2 Interrupt Override.
+ For broken nForce2 BIOS resulting in XT-PIC timer.
+
+ acpi_sleep= [HW,ACPI] Sleep options
+ Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
+ old_ordering, nonvs, sci_force_enable }
+ See Documentation/power/video.txt for information on
+ s3_bios and s3_mode.
+ s3_beep is for debugging; it makes the PC's speaker beep
+ as soon as the kernel's real-mode entry point is called.
+ s4_nohwsig prevents ACPI hardware signature from being
+ used during resume from hibernation.
+ old_ordering causes the ACPI 1.0 ordering of the _PTS
+ control method, with respect to putting devices into
+ low power states, to be enforced (the ACPI 2.0 ordering
+ of _PTS is used by default).
+ nonvs prevents the kernel from saving/restoring the
+ ACPI NVS memory during suspend/hibernation and resume.
+ sci_force_enable causes the kernel to set SCI_EN directly
+ on resume from S1/S3 (which is against the ACPI spec,
+ but some broken systems don't work without it).
+
+ acpi_use_timer_override [HW,ACPI]
+ Use timer override. For some broken Nvidia NF5 boards
+ that require a timer override, but don't have HPET
+
+ acpi_enforce_resources= [ACPI]
+ { strict | lax | no }
+ Check for resource conflicts between native drivers
+ and ACPI OperationRegions (SystemIO and SystemMemory
+ only). IO ports and memory declared in ACPI might be
+ used by the ACPI subsystem in arbitrary AML code and
+ can interfere with legacy drivers.
+ strict (default): access to resources claimed by ACPI
+ is denied; legacy drivers trying to access reserved
+ resources will fail to bind to device using them.
+ lax: access to resources claimed by ACPI is allowed;
+ legacy drivers trying to access reserved resources
+ will bind successfully but a warning message is logged.
+ no: ACPI OperationRegions are not marked as reserved,
+ no further checks are performed.
+
+ acpi_no_memhotplug [ACPI] Disable memory hotplug. Useful for kdump
+ kernels.
+
+ add_efi_memmap [EFI; X86] Include EFI memory map in
+ kernel's map of available physical RAM.
+
+ agp= [AGP]
+ { off | try_unsupported }
+ off: disable AGP support
+ try_unsupported: try to drive unsupported chipsets
+ (may crash computer or cause data corruption)
+
+ ALSA [HW,ALSA]
+ See Documentation/sound/alsa/alsa-parameters.txt
+
+ alignment= [KNL,ARM]
+ Allow the default userspace alignment fault handler
+ behaviour to be specified. Bit 0 enables warnings,
+ bit 1 enables fixups, and bit 2 sends a segfault.
+
+ align_va_addr= [X86-64]
+ Align virtual addresses by clearing slice [14:12] when
+ allocating a VMA at process creation time. This option
+ gives you up to 3% performance improvement on AMD F15h
+ machines (where it is enabled by default) for a
+ CPU-intensive style benchmark, and it can vary highly in
+ a microbenchmark depending on workload and compiler.
+
+ 32: only for 32-bit processes
+ 64: only for 64-bit processes
+ on: enable for both 32- and 64-bit processes
+ off: disable for both 32- and 64-bit processes
+
+ alloc_snapshot [FTRACE]
+ Allocate the ftrace snapshot buffer on boot up when the
+ main buffer is allocated. This is handy if debugging
+ and you need to use tracing_snapshot() on boot up, and
+ do not want to use tracing_snapshot_alloc() as it needs
+ to be done where GFP_KERNEL allocations are allowed.
+
+ amd_iommu= [HW,X86-64]
+ Pass parameters to the AMD IOMMU driver in the system.
+ Possible values are:
+ fullflush - enable flushing of IO/TLB entries when
+ they are unmapped. Otherwise they are
+ flushed before they will be reused, which
+ is a lot of faster
+ off - do not initialize any AMD IOMMU found in
+ the system
+ force_isolation - Force device isolation for all
+ devices. The IOMMU driver is not
+ allowed anymore to lift isolation
+ requirements as needed. This option
+ does not override iommu=pt
+
+ amd_iommu_dump= [HW,X86-64]
+ Enable AMD IOMMU driver option to dump the ACPI table
+ for AMD IOMMU. With this option enabled, AMD IOMMU
+ driver will print ACPI tables for AMD IOMMU during
+ IOMMU initialization.
+
+ amijoy.map= [HW,JOY] Amiga joystick support
+ Map of devices attached to JOY0DAT and JOY1DAT
+ Format: <a>,<b>
+ See also Documentation/input/joystick.txt
+
+ analog.map= [HW,JOY] Analog joystick and gamepad support
+ Specifies type or capabilities of an analog joystick
+ connected to one of 16 gameports
+ Format: <type1>,<type2>,..<type16>
+
+ apc= [HW,SPARC]
+ Power management functions (SPARCstation-4/5 + deriv.)
+ Format: noidle
+ Disable APC CPU standby support. SPARCstation-Fox does
+ not play well with APC CPU idle - disable it if you have
+ APC and your system crashes randomly.
+
+ apic= [APIC,X86-32] Advanced Programmable Interrupt Controller
+ Change the output verbosity whilst booting
+ Format: { quiet (default) | verbose | debug }
+ Change the amount of debugging information output
+ when initialising the APIC and IO-APIC components.
+
+ autoconf= [IPV6]
+ See Documentation/networking/ipv6.txt.
+
+ show_lapic= [APIC,X86] Advanced Programmable Interrupt Controller
+ Limit apic dumping. The parameter defines the maximal
+ number of local apics being dumped. Also it is possible
+ to set it to "all" by meaning -- no limit here.
+ Format: { 1 (default) | 2 | ... | all }.
+ The parameter valid if only apic=debug or
+ apic=verbose is specified.
+ Example: apic=debug show_lapic=all
+
+ apm= [APM] Advanced Power Management
+ See header of arch/x86/kernel/apm_32.c.
+
+ arcrimi= [HW,NET] ARCnet - "RIM I" (entirely mem-mapped) cards
+ Format: <io>,<irq>,<nodeID>
+
+ ataflop= [HW,M68k]
+
+ atarimouse= [HW,MOUSE] Atari Mouse
+
+ atkbd.extra= [HW] Enable extra LEDs and keys on IBM RapidAccess,
+ EzKey and similar keyboards
+
+ atkbd.reset= [HW] Reset keyboard during initialization
+
+ atkbd.set= [HW] Select keyboard code set
+ Format: <int> (2 = AT (default), 3 = PS/2)
+
+ atkbd.scroll= [HW] Enable scroll wheel on MS Office and similar
+ keyboards
+
+ atkbd.softraw= [HW] Choose between synthetic and real raw mode
+ Format: <bool> (0 = real, 1 = synthetic (default))
+
+ atkbd.softrepeat= [HW]
+ Use software keyboard repeat
+
+ audit= [KNL] Enable the audit sub-system
+ Format: { "0" | "1" } (0 = disabled, 1 = enabled)
+ 0 - kernel audit is disabled and can not be enabled
+ until the next reboot
+ unset - kernel audit is initialized but disabled and
+ will be fully enabled by the userspace auditd.
+ 1 - kernel audit is initialized and partially enabled,
+ storing at most audit_backlog_limit messages in
+ RAM until it is fully enabled by the userspace
+ auditd.
+ Default: unset
+
+ audit_backlog_limit= [KNL] Set the audit queue size limit.
+ Format: <int> (must be >=0)
+ Default: 64
+
+ baycom_epp= [HW,AX25]
+ Format: <io>,<mode>
+
+ baycom_par= [HW,AX25] BayCom Parallel Port AX.25 Modem
+ Format: <io>,<mode>
+ See header of drivers/net/hamradio/baycom_par.c.
+
+ baycom_ser_fdx= [HW,AX25]
+ BayCom Serial Port AX.25 Modem (Full Duplex Mode)
+ Format: <io>,<irq>,<mode>[,<baud>]
+ See header of drivers/net/hamradio/baycom_ser_fdx.c.
+
+ baycom_ser_hdx= [HW,AX25]
+ BayCom Serial Port AX.25 Modem (Half Duplex Mode)
+ Format: <io>,<irq>,<mode>
+ See header of drivers/net/hamradio/baycom_ser_hdx.c.
+
+ blkdevparts= Manual partition parsing of block device(s) for
+ embedded devices based on command line input.
+ See Documentation/block/cmdline-partition.txt
+
+ boot_delay= Milliseconds to delay each printk during boot.
+ Values larger than 10 seconds (10000) are changed to
+ no delay (0).
+ Format: integer
+
+ bootmem_debug [KNL] Enable bootmem allocator debug messages.
+
+ bttv.card= [HW,V4L] bttv (bt848 + bt878 based grabber cards)
+ bttv.radio= Most important insmod options are available as
+ kernel args too.
+ bttv.pll= See Documentation/video4linux/bttv/Insmod-options
+ bttv.tuner=
+
+ bulk_remove=off [PPC] This parameter disables the use of the pSeries
+ firmware feature for flushing multiple hpte entries
+ at a time.
+
+ c101= [NET] Moxa C101 synchronous serial card
+
+ cachesize= [BUGS=X86-32] Override level 2 CPU cache size detection.
+ Sometimes CPU hardware bugs make them report the cache
+ size incorrectly. The kernel will attempt work arounds
+ to fix known problems, but for some CPUs it is not
+ possible to determine what the correct size should be.
+ This option provides an override for these situations.
+
+ ca_keys= [KEYS] This parameter identifies a specific key(s) on
+ the system trusted keyring to be used for certificate
+ trust validation.
+ format: { id:<keyid> | builtin }
+
+ cca= [MIPS] Override the kernel pages' cache coherency
+ algorithm. Accepted values range from 0 to 7
+ inclusive. See arch/mips/include/asm/pgtable-bits.h
+ for platform specific values (SB1, Loongson3 and
+ others).
+
+ ccw_timeout_log [S390]
+ See Documentation/s390/CommonIO for details.
+
+ cgroup_disable= [KNL] Disable a particular controller
+ Format: {name of the controller(s) to disable}
+ The effects of cgroup_disable=foo are:
+ - foo isn't auto-mounted if you mount all cgroups in
+ a single hierarchy
+ - foo isn't visible as an individually mountable
+ subsystem
+ {Currently only "memory" controller deal with this and
+ cut the overhead, others just disable the usage. So
+ only cgroup_disable=memory is actually worthy}
+
+ checkreqprot [SELINUX] Set initial checkreqprot flag value.
+ Format: { "0" | "1" }
+ See security/selinux/Kconfig help text.
+ 0 -- check protection applied by kernel (includes
+ any implied execute protection).
+ 1 -- check protection requested by application.
+ Default value is set via a kernel config option.
+ Value can be changed at runtime via
+ /selinux/checkreqprot.
+
+ cio_ignore= [S390]
+ See Documentation/s390/CommonIO for details.
+ clk_ignore_unused
+ [CLK]
+ Prevents the clock framework from automatically gating
+ clocks that have not been explicitly enabled by a Linux
+ device driver but are enabled in hardware at reset or
+ by the bootloader/firmware. Note that this does not
+ force such clocks to be always-on nor does it reserve
+ those clocks in any way. This parameter is useful for
+ debug and development, but should not be needed on a
+ platform with proper driver support. For more
+ information, see Documentation/clk.txt.
+
+ clock= [BUGS=X86-32, HW] gettimeofday clocksource override.
+ [Deprecated]
+ Forces specified clocksource (if available) to be used
+ when calculating gettimeofday(). If specified
+ clocksource is not available, it defaults to PIT.
+ Format: { pit | tsc | cyclone | pmtmr }
+
+ clocksource= Override the default clocksource
+ Format: <string>
+ Override the default clocksource and use the clocksource
+ with the name specified.
+ Some clocksource names to choose from, depending on
+ the platform:
+ [all] jiffies (this is the base, fallback clocksource)
+ [ACPI] acpi_pm
+ [ARM] imx_timer1,OSTS,netx_timer,mpu_timer2,
+ pxa_timer,timer3,32k_counter,timer0_1
+ [AVR32] avr32
+ [X86-32] pit,hpet,tsc;
+ scx200_hrt on Geode; cyclone on IBM x440
+ [MIPS] MIPS
+ [PARISC] cr16
+ [S390] tod
+ [SH] SuperH
+ [SPARC64] tick
+ [X86-64] hpet,tsc
+
+ clearcpuid=BITNUM [X86]
+ Disable CPUID feature X for the kernel. See
+ arch/x86/include/asm/cpufeature.h for the valid bit
+ numbers. Note the Linux specific bits are not necessarily
+ stable over kernel options, but the vendor specific
+ ones should be.
+ Also note that user programs calling CPUID directly
+ or using the feature without checking anything
+ will still see it. This just prevents it from
+ being used by the kernel or shown in /proc/cpuinfo.
+ Also note the kernel might malfunction if you disable
+ some critical bits.
+
+ cma=nn[MG]@[start[MG][-end[MG]]]
+ [ARM,X86,KNL]
+ Sets the size of kernel global memory area for
+ contiguous memory allocations and optionally the
+ placement constraint by the physical address range of
+ memory allocations. A value of 0 disables CMA
+ altogether. For more information, see
+ include/linux/dma-contiguous.h
+
+ cmo_free_hint= [PPC] Format: { yes | no }
+ Specify whether pages are marked as being inactive
+ when they are freed. This is used in CMO environments
+ to determine OS memory pressure for page stealing by
+ a hypervisor.
+ Default: yes
+
+ coherent_pool=nn[KMG] [ARM,KNL]
+ Sets the size of memory pool for coherent, atomic dma
+ allocations, by default set to 256K.
+
+ code_bytes [X86] How many bytes of object code to print
+ in an oops report.
+ Range: 0 - 8192
+ Default: 64
+
+ com20020= [HW,NET] ARCnet - COM20020 chipset
+ Format:
+ <io>[,<irq>[,<nodeID>[,<backplane>[,<ckp>[,<timeout>]]]]]
+
+ com90io= [HW,NET] ARCnet - COM90xx chipset (IO-mapped buffers)
+ Format: <io>[,<irq>]
+
+ com90xx= [HW,NET]
+ ARCnet - COM90xx chipset (memory-mapped buffers)
+ Format: <io>[,<irq>[,<memstart>]]
+
+ condev= [HW,S390] console device
+ conmode=
+
+ console= [KNL] Output console device and options.
+
+ tty<n> Use the virtual console device <n>.
+
+ ttyS<n>[,options]
+ ttyUSB0[,options]
+ Use the specified serial port. The options are of
+ the form "bbbbpnf", where "bbbb" is the baud rate,
+ "p" is parity ("n", "o", or "e"), "n" is number of
+ bits, and "f" is flow control ("r" for RTS or
+ omit it). Default is "9600n8".
+
+ See Documentation/serial-console.txt for more
+ information. See
+ Documentation/networking/netconsole.txt for an
+ alternative.
+
+ uart[8250],io,<addr>[,options]
+ uart[8250],mmio,<addr>[,options]
+ uart[8250],mmio32,<addr>[,options]
+ uart[8250],0x<addr>[,options]
+ Start an early, polled-mode console on the 8250/16550
+ UART at the specified I/O port or MMIO address,
+ switching to the matching ttyS device later.
+ MMIO inter-register address stride is either 8-bit
+ (mmio) or 32-bit (mmio32).
+ If none of [io|mmio|mmio32], <addr> is assumed to be
+ equivalent to 'mmio'. 'options' are specified in the
+ same format described for ttyS above; if unspecified,
+ the h/w is not re-initialized.
+
+ hvc<n> Use the hypervisor console device <n>. This is for
+ both Xen and PowerPC hypervisors.
+
+ If the device connected to the port is not a TTY but a braille
+ device, prepend "brl," before the device type, for instance
+ console=brl,ttyS0
+ For now, only VisioBraille is supported.
+
+ consoleblank= [KNL] The console blank (screen saver) timeout in
+ seconds. Defaults to 10*60 = 10mins. A value of 0
+ disables the blank timer.
+
+ coredump_filter=
+ [KNL] Change the default value for
+ /proc/<pid>/coredump_filter.
+ See also Documentation/filesystems/proc.txt.
+
+ cpuidle.off=1 [CPU_IDLE]
+ disable the cpuidle sub-system
+
+ cpcihp_generic= [HW,PCI] Generic port I/O CompactPCI driver
+ Format:
+ <first_slot>,<last_slot>,<port>,<enum_bit>[,<debug>]
+
+ crashkernel=size[KMG][@offset[KMG]]
+ [KNL] Using kexec, Linux can switch to a 'crash kernel'
+ upon panic. This parameter reserves the physical
+ memory region [offset, offset + size] for that kernel
+ image. If '@offset' is omitted, then a suitable offset
+ is selected automatically. Check
+ Documentation/kdump/kdump.txt for further details.
+
+ crashkernel=range1:size1[,range2:size2,...][@offset]
+ [KNL] Same as above, but depends on the memory
+ in the running system. The syntax of range is
+ start-[end] where start and end are both
+ a memory unit (amount[KMG]). See also
+ Documentation/kdump/kdump.txt for an example.
+
+ crashkernel=size[KMG],high
+ [KNL, x86_64] range could be above 4G. Allow kernel
+ to allocate physical memory region from top, so could
+ be above 4G if system have more than 4G ram installed.
+ Otherwise memory region will be allocated below 4G, if
+ available.
+ It will be ignored if crashkernel=X is specified.
+ crashkernel=size[KMG],low
+ [KNL, x86_64] range under 4G. When crashkernel=X,high
+ is passed, kernel could allocate physical memory region
+ above 4G, that cause second kernel crash on system
+ that require some amount of low memory, e.g. swiotlb
+ requires at least 64M+32K low memory. Kernel would
+ try to allocate 72M below 4G automatically.
+ This one let user to specify own low range under 4G
+ for second kernel instead.
+ 0: to disable low allocation.
+ It will be ignored when crashkernel=X,high is not used
+ or memory reserved is below 4G.
+
+ cs89x0_dma= [HW,NET]
+ Format: <dma>
+
+ cs89x0_media= [HW,NET]
+ Format: { rj45 | aui | bnc }
+
+ dasd= [HW,NET]
+ See header of drivers/s390/block/dasd_devmap.c.
+
+ db9.dev[2|3]= [HW,JOY] Multisystem joystick support via parallel port
+ (one device per port)
+ Format: <port#>,<type>
+ See also Documentation/input/joystick-parport.txt
+
+ ddebug_query= [KNL,DYNAMIC_DEBUG] Enable debug messages at early boot
+ time. See Documentation/dynamic-debug-howto.txt for
+ details. Deprecated, see dyndbg.
+
+ debug [KNL] Enable kernel debugging (events log level).
+
+ debug_locks_verbose=
+ [KNL] verbose self-tests
+ Format=<0|1>
+ Print debugging info while doing the locking API
+ self-tests.
+ We default to 0 (no extra messages), setting it to
+ 1 will print _a lot_ more information - normally
+ only useful to kernel developers.
+
+ debug_objects [KNL] Enable object debugging
+
+ no_debug_objects
+ [KNL] Disable object debugging
+
+ debug_guardpage_minorder=
+ [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+ parameter allows control of the order of pages that will
+ be intentionally kept free (and hence protected) by the
+ buddy allocator. Bigger value increase the probability
+ of catching random memory corruption, but reduce the
+ amount of memory for normal system use. The maximum
+ possible value is MAX_ORDER/2. Setting this parameter
+ to 1 or 2 should be enough to identify most random
+ memory corruption problems caused by bugs in kernel or
+ driver code when a CPU writes to (or reads from) a
+ random memory location. Note that there exists a class
+ of memory corruptions problems caused by buggy H/W or
+ F/W or by drivers badly programing DMA (basically when
+ memory is written at bus level and the CPU MMU is
+ bypassed) which are not detectable by
+ CONFIG_DEBUG_PAGEALLOC, hence this option will not help
+ tracking down these problems.
+
+ debug_pagealloc=
+ [KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+ parameter enables the feature at boot time. In
+ default, it is disabled. We can avoid allocating huge
+ chunk of memory for debug pagealloc if we don't enable
+ it at boot time and the system will work mostly same
+ with the kernel built without CONFIG_DEBUG_PAGEALLOC.
+ on: enable the feature
+
+ debugpat [X86] Enable PAT debugging
+
+ decnet.addr= [HW,NET]
+ Format: <area>[,<node>]
+ See also Documentation/networking/decnet.txt.
+
+ default_hugepagesz=
+ [same as hugepagesz=] The size of the default
+ HugeTLB page size. This is the size represented by
+ the legacy /proc/ hugepages APIs, used for SHM, and
+ default size when mounting hugetlbfs filesystems.
+ Defaults to the default architecture's huge page size
+ if not specified.
+
+ dhash_entries= [KNL]
+ Set number of hash buckets for dentry cache.
+
+ disable= [IPV6]
+ See Documentation/networking/ipv6.txt.
+
+ disable_cpu_apicid= [X86,APIC,SMP]
+ Format: <int>
+ The number of initial APIC ID for the
+ corresponding CPU to be disabled at boot,
+ mostly used for the kdump 2nd kernel to
+ disable BSP to wake up multiple CPUs without
+ causing system reset or hang due to sending
+ INIT from AP to BSP.
+
+ disable_ddw [PPC/PSERIES]
+ Disable Dynamic DMA Window support. Use this if
+ to workaround buggy firmware.
+
+ disable_ipv6= [IPV6]
+ See Documentation/networking/ipv6.txt.
+
+ disable_mtrr_cleanup [X86]
+ The kernel tries to adjust MTRR layout from continuous
+ to discrete, to make X server driver able to add WB
+ entry later. This parameter disables that.
+
+ disable_mtrr_trim [X86, Intel and AMD only]
+ By default the kernel will trim any uncacheable
+ memory out of your available memory pool based on
+ MTRR settings. This parameter disables that behavior,
+ possibly causing your machine to run very slowly.
+
+ disable_timer_pin_1 [X86]
+ Disable PIN 1 of APIC timer
+ Can be useful to work around chipset bugs.
+
+ dma_debug=off If the kernel is compiled with DMA_API_DEBUG support,
+ this option disables the debugging code at boot.
+
+ dma_debug_entries=<number>
+ This option allows to tune the number of preallocated
+ entries for DMA-API debugging code. One entry is
+ required per DMA-API allocation. Use this if the
+ DMA-API debugging code disables itself because the
+ architectural default is too low.
+
+ dma_debug_driver=<driver_name>
+ With this option the DMA-API debugging driver
+ filter feature can be enabled at boot time. Just
+ pass the driver to filter for as the parameter.
+ The filter can be disabled or changed to another
+ driver later using sysfs.
+
+ drm_kms_helper.edid_firmware=[<connector>:]<file>
+ Broken monitors, graphic adapters and KVMs may
+ send no or incorrect EDID data sets. This parameter
+ allows to specify an EDID data set in the
+ /lib/firmware directory that is used instead.
+ Generic built-in EDID data sets are used, if one of
+ edid/1024x768.bin, edid/1280x1024.bin,
+ edid/1680x1050.bin, or edid/1920x1080.bin is given
+ and no file with the same name exists. Details and
+ instructions how to build your own EDID data are
+ available in Documentation/EDID/HOWTO.txt. An EDID
+ data set will only be used for a particular connector,
+ if its name and a colon are prepended to the EDID
+ name.
+
+ dscc4.setup= [NET]
+
+ dyndbg[="val"] [KNL,DYNAMIC_DEBUG]
+ module.dyndbg[="val"]
+ Enable debug messages at boot time. See
+ Documentation/dynamic-debug-howto.txt for details.
+
+ eagerfpu= [X86]
+ on enable eager fpu restore
+ off disable eager fpu restore
+ auto selects the default scheme, which automatically
+ enables eagerfpu restore for xsaveopt.
+
+ early_ioremap_debug [KNL]
+ Enable debug messages in early_ioremap support. This
+ is useful for tracking down temporary early mappings
+ which are not unmapped.
+
+ earlycon= [KNL] Output early console device and options.
+
+ cdns,<addr>
+ Start an early, polled-mode console on a cadence serial
+ port at the specified address. The cadence serial port
+ must already be setup and configured. Options are not
+ yet supported.
+
+ uart[8250],io,<addr>[,options]
+ uart[8250],mmio,<addr>[,options]
+ uart[8250],mmio32,<addr>[,options]
+ uart[8250],0x<addr>[,options]
+ Start an early, polled-mode console on the 8250/16550
+ UART at the specified I/O port or MMIO address.
+ MMIO inter-register address stride is either 8-bit
+ (mmio) or 32-bit (mmio32).
+ If none of [io|mmio|mmio32], <addr> is assumed to be
+ equivalent to 'mmio'. 'options' are specified in the
+ same format described for "console=ttyS<n>"; if
+ unspecified, the h/w is not initialized.
+
+ pl011,<addr>
+ Start an early, polled-mode console on a pl011 serial
+ port at the specified address. The pl011 serial port
+ must already be setup and configured. Options are not
+ yet supported.
+
+ msm_serial,<addr>
+ Start an early, polled-mode console on an msm serial
+ port at the specified address. The serial port
+ must already be setup and configured. Options are not
+ yet supported.
+
+ msm_serial_dm,<addr>
+ Start an early, polled-mode console on an msm serial
+ dm port at the specified address. The serial port
+ must already be setup and configured. Options are not
+ yet supported.
+
+ smh Use ARM semihosting calls for early console.
+
+ s3c2410,<addr>
+ s3c2412,<addr>
+ s3c2440,<addr>
+ s3c6400,<addr>
+ s5pv210,<addr>
+ exynos4210,<addr>
+ Use early console provided by serial driver available
+ on Samsung SoCs, requires selecting proper type and
+ a correct base address of the selected UART port. The
+ serial port must already be setup and configured.
+ Options are not yet supported.
+
+ earlyprintk= [X86,SH,BLACKFIN,ARM,M68k]
+ earlyprintk=vga
+ earlyprintk=efi
+ earlyprintk=xen
+ earlyprintk=serial[,ttySn[,baudrate]]
+ earlyprintk=serial[,0x...[,baudrate]]
+ earlyprintk=ttySn[,baudrate]
+ earlyprintk=dbgp[debugController#]
+
+ earlyprintk is useful when the kernel crashes before
+ the normal console is initialized. It is not enabled by
+ default because it has some cosmetic problems.
+
+ Append ",keep" to not disable it when the real console
+ takes over.
+
+ Only one of vga, efi, serial, or usb debug port can
+ be used at a time.
+
+ Currently only ttyS0 and ttyS1 may be specified by
+ name. Other I/O ports may be explicitly specified
+ on some architectures (x86 and arm at least) by
+ replacing ttySn with an I/O port address, like this:
+ earlyprintk=serial,0x1008,115200
+ You can find the port for a given device in
+ /proc/tty/driver/serial:
+ 2: uart:ST16650V2 port:00001008 irq:18 ...
+
+ Interaction with the standard serial driver is not
+ very good.
+
+ The VGA and EFI output is eventually overwritten by
+ the real console.
+
+ The xen output can only be used by Xen PV guests.
+
+ edac_report= [HW,EDAC] Control how to report EDAC event
+ Format: {"on" | "off" | "force"}
+ on: enable EDAC to report H/W event. May be overridden
+ by other higher priority error reporting module.
+ off: disable H/W event reporting through EDAC.
+ force: enforce the use of EDAC to report H/W event.
+ default: on.
+
+ ekgdboc= [X86,KGDB] Allow early kernel console debugging
+ ekgdboc=kbd
+
+ This is designed to be used in conjunction with
+ the boot argument: earlyprintk=vga
+
+ edd= [EDD]
+ Format: {"off" | "on" | "skip[mbr]"}
+
+ efi= [EFI]
+ Format: { "old_map", "nochunk", "noruntime", "debug" }
+ old_map [X86-64]: switch to the old ioremap-based EFI
+ runtime services mapping. 32-bit still uses this one by
+ default.
+ nochunk: disable reading files in "chunks" in the EFI
+ boot stub, as chunking can cause problems with some
+ firmware implementations.
+ noruntime : disable EFI runtime services support
+ debug: enable misc debug output
+
+ efi_no_storage_paranoia [EFI; X86]
+ Using this parameter you can use more than 50% of
+ your efi variable storage. Use this parameter only if
+ you are really sure that your UEFI does sane gc and
+ fulfills the spec otherwise your board may brick.
+
+ eisa_irq_edge= [PARISC,HW]
+ See header of drivers/parisc/eisa.c.
+
+ elanfreq= [X86-32]
+ See comment before function elanfreq_setup() in
+ arch/x86/kernel/cpu/cpufreq/elanfreq.c.
+
+ elevator= [IOSCHED]
+ Format: {"cfq" | "deadline" | "noop"}
+ See Documentation/block/cfq-iosched.txt and
+ Documentation/block/deadline-iosched.txt for details.
+
+ elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390]
+ Specifies physical address of start of kernel core
+ image elf header and optionally the size. Generally
+ kexec loader will pass this option to capture kernel.
+ See Documentation/kdump/kdump.txt for details.
+
+ enable_mtrr_cleanup [X86]
+ The kernel tries to adjust MTRR layout from continuous
+ to discrete, to make X server driver able to add WB
+ entry later. This parameter enables that.
+
+ enable_timer_pin_1 [X86]
+ Enable PIN 1 of APIC timer
+ Can be useful to work around chipset bugs
+ (in particular on some ATI chipsets).
+ The kernel tries to set a reasonable default.
+
+ enforcing [SELINUX] Set initial enforcing status.
+ Format: {"0" | "1"}
+ See security/selinux/Kconfig help text.
+ 0 -- permissive (log only, no denials).
+ 1 -- enforcing (deny and log).
+ Default value is 0.
+ Value can be changed at runtime via /selinux/enforce.
+
+ erst_disable [ACPI]
+ Disable Error Record Serialization Table (ERST)
+ support.
+
+ ether= [HW,NET] Ethernet cards parameters
+ This option is obsoleted by the "netdev=" option, which
+ has equivalent usage. See its documentation for details.
+
+ evm= [EVM]
+ Format: { "fix" }
+ Permit 'security.evm' to be updated regardless of
+ current integrity status.
+
+ failslab=
+ fail_page_alloc=
+ fail_make_request=[KNL]
+ General fault injection mechanism.
+ Format: <interval>,<probability>,<space>,<times>
+ See also Documentation/fault-injection/.
+
+ floppy= [HW]
+ See Documentation/blockdev/floppy.txt.
+
+ force_pal_cache_flush
+ [IA-64] Avoid check_sal_cache_flush which may hang on
+ buggy SAL_CACHE_FLUSH implementations. Using this
+ parameter will force ia64_sal_cache_flush to call
+ ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
+
+ forcepae [X86-32]
+ Forcefully enable Physical Address Extension (PAE).
+ Many Pentium M systems disable PAE but may have a
+ functionally usable PAE implementation.
+ Warning: use of this parameter will taint the kernel
+ and may cause unknown problems.
+
+ ftrace=[tracer]
+ [FTRACE] will set and start the specified tracer
+ as early as possible in order to facilitate early
+ boot debugging.
+
+ ftrace_dump_on_oops[=orig_cpu]
+ [FTRACE] will dump the trace buffers on oops.
+ If no parameter is passed, ftrace will dump
+ buffers of all CPUs, but if you pass orig_cpu, it will
+ dump only the buffer of the CPU that triggered the
+ oops.
+
+ ftrace_filter=[function-list]
+ [FTRACE] Limit the functions traced by the function
+ tracer at boot up. function-list is a comma separated
+ list of functions. This list can be changed at run
+ time by the set_ftrace_filter file in the debugfs
+ tracing directory.
+
+ ftrace_notrace=[function-list]
+ [FTRACE] Do not trace the functions specified in
+ function-list. This list can be changed at run time
+ by the set_ftrace_notrace file in the debugfs
+ tracing directory.
+
+ ftrace_graph_filter=[function-list]
+ [FTRACE] Limit the top level callers functions traced
+ by the function graph tracer at boot up.
+ function-list is a comma separated list of functions
+ that can be changed at run time by the
+ set_graph_function file in the debugfs tracing directory.
+
+ ftrace_graph_notrace=[function-list]
+ [FTRACE] Do not trace from the functions specified in
+ function-list. This list is a comma separated list of
+ functions that can be changed at run time by the
+ set_graph_notrace file in the debugfs tracing directory.
+
+ gamecon.map[2|3]=
+ [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
+ support via parallel port (up to 5 devices per port)
+ Format: <port#>,<pad1>,<pad2>,<pad3>,<pad4>,<pad5>
+ See also Documentation/input/joystick-parport.txt
+
+ gamma= [HW,DRM]
+
+ gart_fix_e820= [X86_64] disable the fix e820 for K8 GART
+ Format: off | on
+ default: on
+
+ gcov_persist= [GCOV] When non-zero (default), profiling data for
+ kernel modules is saved and remains accessible via
+ debugfs, even when the module is unloaded/reloaded.
+ When zero, profiling data is discarded and associated
+ debugfs files are removed at module unload time.
+
+ gpt [EFI] Forces disk with valid GPT signature but
+ invalid Protective MBR to be treated as GPT. If the
+ primary GPT is corrupted, it enables the backup/alternate
+ GPT to be used instead.
+
+ grcan.enable0= [HW] Configuration of physical interface 0. Determines
+ the "Enable 0" bit of the configuration register.
+ Format: 0 | 1
+ Default: 0
+ grcan.enable1= [HW] Configuration of physical interface 1. Determines
+ the "Enable 0" bit of the configuration register.
+ Format: 0 | 1
+ Default: 0
+ grcan.select= [HW] Select which physical interface to use.
+ Format: 0 | 1
+ Default: 0
+ grcan.txsize= [HW] Sets the size of the tx buffer.
+ Format: <unsigned int> such that (txsize & ~0x1fffc0) == 0.
+ Default: 1024
+ grcan.rxsize= [HW] Sets the size of the rx buffer.
+ Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
+ Default: 1024
+
+ hashdist= [KNL,NUMA] Large hashes allocated during boot
+ are distributed across NUMA nodes. Defaults on
+ for 64-bit NUMA, off otherwise.
+ Format: 0 | 1 (for off | on)
+
+ hcl= [IA-64] SGI's Hardware Graph compatibility layer
+
+ hd= [EIDE] (E)IDE hard drive subsystem geometry
+ Format: <cyl>,<head>,<sect>
+
+ hest_disable [ACPI]
+ Disable Hardware Error Source Table (HEST) support;
+ corresponding firmware-first mode error processing
+ logic will be disabled.
+
+ highmem=nn[KMG] [KNL,BOOT] forces the highmem zone to have an exact
+ size of <nn>. This works even on boxes that have no
+ highmem otherwise. This also works to reduce highmem
+ size on bigger boxes.
+
+ highres= [KNL] Enable/disable high resolution timer mode.
+ Valid parameters: "on", "off"
+ Default: "on"
+
+ hisax= [HW,ISDN]
+ See Documentation/isdn/README.HiSax.
+
+ hlt [BUGS=ARM,SH]
+
+ hpet= [X86-32,HPET] option to control HPET usage
+ Format: { enable (default) | disable | force |
+ verbose }
+ disable: disable HPET and use PIT instead
+ force: allow force enabled of undocumented chips (ICH4,
+ VIA, nVidia)
+ verbose: show contents of HPET registers during setup
+
+ hpet_mmap= [X86, HPET_MMAP] Allow userspace to mmap HPET
+ registers. Default set by CONFIG_HPET_MMAP_DEFAULT.
+
+ hugepages= [HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
+ hugepagesz= [HW,IA-64,PPC,X86-64] The size of the HugeTLB pages.
+ On x86-64 and powerpc, this option can be specified
+ multiple times interleaved with hugepages= to reserve
+ huge pages of different sizes. Valid pages sizes on
+ x86-64 are 2M (when the CPU supports "pse") and 1G
+ (when the CPU supports the "pdpe1gb" cpuinfo flag).
+
+ hvc_iucv= [S390] Number of z/VM IUCV hypervisor console (HVC)
+ terminal devices. Valid values: 0..8
+ hvc_iucv_allow= [S390] Comma-separated list of z/VM user IDs.
+ If specified, z/VM IUCV HVC accepts connections
+ from listed z/VM user IDs only.
+
+ hwthread_map= [METAG] Comma-separated list of Linux cpu id to
+ hardware thread id mappings.
+ Format: <cpu>:<hwthread>
+
+ keep_bootcon [KNL]
+ Do not unregister boot console at start. This is only
+ useful for debugging when something happens in the window
+ between unregistering the boot console and initializing
+ the real console.
+
+ i2c_bus= [HW] Override the default board specific I2C bus speed
+ or register an additional I2C bus that is not
+ registered from board initialization code.
+ Format:
+ <bus_id>,<clkrate>
+
+ i8042.debug [HW] Toggle i8042 debug mode
+ i8042.direct [HW] Put keyboard port into non-translated mode
+ i8042.dumbkbd [HW] Pretend that controller can only read data from
+ keyboard and cannot control its state
+ (Don't attempt to blink the leds)
+ i8042.noaux [HW] Don't check for auxiliary (== mouse) port
+ i8042.nokbd [HW] Don't check/create keyboard port
+ i8042.noloop [HW] Disable the AUX Loopback command while probing
+ for the AUX port
+ i8042.nomux [HW] Don't check presence of an active multiplexing
+ controller
+ i8042.nopnp [HW] Don't use ACPIPnP / PnPBIOS to discover KBD/AUX
+ controllers
+ i8042.notimeout [HW] Ignore timeout condition signalled by controller
+ i8042.reset [HW] Reset the controller during init and cleanup
+ i8042.unlock [HW] Unlock (ignore) the keylock
+ i8042.kbdreset [HW] Reset device connected to KBD port
+
+ i810= [HW,DRM]
+
+ i8k.ignore_dmi [HW] Continue probing hardware even if DMI data
+ indicates that the driver is running on unsupported
+ hardware.
+ i8k.force [HW] Activate i8k driver even if SMM BIOS signature
+ does not match list of supported models.
+ i8k.power_status
+ [HW] Report power status in /proc/i8k
+ (disabled by default)
+ i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN
+ capability is set.
+
+ i915.invert_brightness=
+ [DRM] Invert the sense of the variable that is used to
+ set the brightness of the panel backlight. Normally a
+ brightness value of 0 indicates backlight switched off,
+ and the maximum of the brightness value sets the backlight
+ to maximum brightness. If this parameter is set to 0
+ (default) and the machine requires it, or this parameter
+ is set to 1, a brightness value of 0 sets the backlight
+ to maximum brightness, and the maximum of the brightness
+ value switches the backlight off.
+ -1 -- never invert brightness
+ 0 -- machine default
+ 1 -- force brightness inversion
+
+ icn= [HW,ISDN]
+ Format: <io>[,<membase>[,<icn_id>[,<icn_id2>]]]
+
+ ide-core.nodma= [HW] (E)IDE subsystem
+ Format: =0.0 to prevent dma on hda, =0.1 hdb =1.0 hdc
+ .vlb_clock .pci_clock .noflush .nohpa .noprobe .nowerr
+ .cdrom .chs .ignore_cable are additional options
+ See Documentation/ide/ide.txt.
+
+ ide-generic.probe-mask= [HW] (E)IDE subsystem
+ Format: <int>
+ Probe mask for legacy ISA IDE ports. Depending on
+ platform up to 6 ports are supported, enabled by
+ setting corresponding bits in the mask to 1. The
+ default value is 0x0, which has a special meaning.
+ On systems that have PCI, it triggers scanning the
+ PCI bus for the first and the second port, which
+ are then probed. On systems without PCI the value
+ of 0x0 enables probing the two first ports as if it
+ was 0x3.
+
+ ide-pci-generic.all-generic-ide [HW] (E)IDE subsystem
+ Claim all unknown PCI IDE storage controllers.
+
+ idle= [X86]
+ Format: idle=poll, idle=halt, idle=nomwait
+ Poll forces a polling idle loop that can slightly
+ improve the performance of waking up a idle CPU, but
+ will use a lot of power and make the system run hot.
+ Not recommended.
+ idle=halt: Halt is forced to be used for CPU idle.
+ In such case C2/C3 won't be used again.
+ idle=nomwait: Disable mwait for CPU C-states
+
+ ignore_loglevel [KNL]
+ Ignore loglevel setting - this will print /all/
+ kernel messages to the console. Useful for debugging.
+ We also add it as printk module parameter, so users
+ could change it dynamically, usually by
+ /sys/module/printk/parameters/ignore_loglevel.
+
+ ihash_entries= [KNL]
+ Set number of hash buckets for inode cache.
+
+ ima_appraise= [IMA] appraise integrity measurements
+ Format: { "off" | "enforce" | "fix" | "log" }
+ default: "enforce"
+
+ ima_appraise_tcb [IMA]
+ The builtin appraise policy appraises all files
+ owned by uid=0.
+
+ ima_hash= [IMA]
+ Format: { md5 | sha1 | rmd160 | sha256 | sha384
+ | sha512 | ... }
+ default: "sha1"
+
+ The list of supported hash algorithms is defined
+ in crypto/hash_info.h.
+
+ ima_policy= [IMA]
+ The builtin measurement policy to load during IMA
+ setup. Specyfing "tcb" as the value, measures all
+ programs exec'd, files mmap'd for exec, and all files
+ opened with the read mode bit set by either the
+ effective uid (euid=0) or uid=0.
+ Format: "tcb"
+
+ ima_tcb [IMA] Deprecated. Use ima_policy= instead.
+ Load a policy which meets the needs of the Trusted
+ Computing Base. This means IMA will measure all
+ programs exec'd, files mmap'd for exec, and all files
+ opened for read by uid=0.
+
+ ima_template= [IMA]
+ Select one of defined IMA measurements template formats.
+ Formats: { "ima" | "ima-ng" }
+ Default: "ima-ng"
+
+ ima_template_fmt=
+ [IMA] Define a custom template format.
+ Format: { "field1|...|fieldN" }
+
+ ima.ahash_minsize= [IMA] Minimum file size for asynchronous hash usage
+ Format: <min_file_size>
+ Set the minimal file size for using asynchronous hash.
+ If left unspecified, ahash usage is disabled.
+
+ ahash performance varies for different data sizes on
+ different crypto accelerators. This option can be used
+ to achieve the best performance for a particular HW.
+
+ ima.ahash_bufsize= [IMA] Asynchronous hash buffer size
+ Format: <bufsize>
+ Set hashing buffer size. Default: 4k.
+
+ ahash performance varies for different chunk sizes on
+ different crypto accelerators. This option can be used
+ to achieve best performance for particular HW.
+
+ init= [KNL]
+ Format: <full_path>
+ Run specified binary instead of /sbin/init as init
+ process.
+
+ initcall_debug [KNL] Trace initcalls as they are executed. Useful
+ for working out where the kernel is dying during
+ startup.
+
+ initcall_blacklist= [KNL] Do not execute a comma-separated list of
+ initcall functions. Useful for debugging built-in
+ modules and initcalls.
+
+ initrd= [BOOT] Specify the location of the initial ramdisk
+
+ inport.irq= [HW] Inport (ATI XL and Microsoft) busmouse driver
+ Format: <irq>
+
+ int_pln_enable [x86] Enable power limit notification interrupt
+
+ integrity_audit=[IMA]
+ Format: { "0" | "1" }
+ 0 -- basic integrity auditing messages. (Default)
+ 1 -- additional integrity auditing messages.
+
+ intel_iommu= [DMAR] Intel IOMMU driver (DMAR) option
+ on
+ Enable intel iommu driver.
+ off
+ Disable intel iommu driver.
+ igfx_off [Default Off]
+ By default, gfx is mapped as normal device. If a gfx
+ device has a dedicated DMAR unit, the DMAR unit is
+ bypassed by not enabling DMAR with this option. In
+ this case, gfx device will use physical address for
+ DMA.
+ forcedac [x86_64]
+ With this option iommu will not optimize to look
+ for io virtual address below 32-bit forcing dual
+ address cycle on pci bus for cards supporting greater
+ than 32-bit addressing. The default is to look
+ for translation below 32-bit and if not available
+ then look in the higher range.
+ strict [Default Off]
+ With this option on every unmap_single operation will
+ result in a hardware IOTLB flush operation as opposed
+ to batching them for performance.
+ sp_off [Default Off]
+ By default, super page will be supported if Intel IOMMU
+ has the capability. With this option, super page will
+ not be supported.
+ ecs_off [Default Off]
+ By default, extended context tables will be supported if
+ the hardware advertises that it has support both for the
+ extended tables themselves, and also PASID support. With
+ this option set, extended tables will not be used even
+ on hardware which claims to support them.
+
+ intel_idle.max_cstate= [KNL,HW,ACPI,X86]
+ 0 disables intel_idle and fall back on acpi_idle.
+ 1 to 6 specify maximum depth of C-state.
+
+ intel_pstate= [X86]
+ disable
+ Do not enable intel_pstate as the default
+ scaling driver for the supported processors
+ force
+ Enable intel_pstate on systems that prohibit it by default
+ in favor of acpi-cpufreq. Forcing the intel_pstate driver
+ instead of acpi-cpufreq may disable platform features, such
+ as thermal controls and power capping, that rely on ACPI
+ P-States information being indicated to OSPM and therefore
+ should be used with caution. This option does not work with
+ processors that aren't supported by the intel_pstate driver
+ or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
+ no_hwp
+ Do not enable hardware P state control (HWP)
+ if available.
+ hwp_only
+ Only load intel_pstate on systems which support
+ hardware P state control (HWP) if available.
+
+ intremap= [X86-64, Intel-IOMMU]
+ on enable Interrupt Remapping (default)
+ off disable Interrupt Remapping
+ nosid disable Source ID checking
+ no_x2apic_optout
+ BIOS x2APIC opt-out request will be ignored
+
+ iomem= Disable strict checking of access to MMIO memory
+ strict regions from userspace.
+ relaxed
+
+ iommu= [x86]
+ off
+ force
+ noforce
+ biomerge
+ panic
+ nopanic
+ merge
+ nomerge
+ forcesac
+ soft
+ pt [x86, IA-64]
+ nobypass [PPC/POWERNV]
+ Disable IOMMU bypass, using IOMMU for PCI devices.
+
+
+ io7= [HW] IO7 for Marvel based alpha systems
+ See comment before marvel_specify_io7 in
+ arch/alpha/kernel/core_marvel.c.
+
+ io_delay= [X86] I/O delay method
+ 0x80
+ Standard port 0x80 based delay
+ 0xed
+ Alternate port 0xed based delay (needed on some systems)
+ udelay
+ Simple two microseconds delay
+ none
+ No delay
+
+ ip= [IP_PNP]
+ See Documentation/filesystems/nfs/nfsroot.txt.
+
+ irqfixup [HW]
+ When an interrupt is not handled search all handlers
+ for it. Intended to get systems with badly broken
+ firmware running.
+
+ irqpoll [HW]
+ When an interrupt is not handled search all handlers
+ for it. Also check all handlers each timer
+ interrupt. Intended to get systems with badly broken
+ firmware running.
+
+ isapnp= [ISAPNP]
+ Format: <RDP>,<reset>,<pci_scan>,<verbosity>
+
+ isolcpus= [KNL,SMP] Isolate CPUs from the general scheduler.
+ Format:
+ <cpu number>,...,<cpu number>
+ or
+ <cpu number>-<cpu number>
+ (must be a positive range in ascending order)
+ or a mixture
+ <cpu number>,...,<cpu number>-<cpu number>
+
+ This option can be used to specify one or more CPUs
+ to isolate from the general SMP balancing and scheduling
+ algorithms. You can move a process onto or off an
+ "isolated" CPU via the CPU affinity syscalls or cpuset.
+ <cpu number> begins at 0 and the maximum value is
+ "number of CPUs in system - 1".
+
+ This option is the preferred way to isolate CPUs. The
+ alternative -- manually setting the CPU mask of all
+ tasks in the system -- can cause problems and
+ suboptimal load balancer performance.
+
+ iucv= [HW,NET]
+
+ ivrs_ioapic [HW,X86_64]
+ Provide an override to the IOAPIC-ID<->DEVICE-ID
+ mapping provided in the IVRS ACPI table. For
+ example, to map IOAPIC-ID decimal 10 to
+ PCI device 00:14.0 write the parameter as:
+ ivrs_ioapic[10]=00:14.0
+
+ ivrs_hpet [HW,X86_64]
+ Provide an override to the HPET-ID<->DEVICE-ID
+ mapping provided in the IVRS ACPI table. For
+ example, to map HPET-ID decimal 0 to
+ PCI device 00:14.0 write the parameter as:
+ ivrs_hpet[0]=00:14.0
+
+ js= [HW,JOY] Analog joystick
+ See Documentation/input/joystick.txt.
+
+ kaslr/nokaslr [X86]
+ Enable/disable kernel and module base offset ASLR
+ (Address Space Layout Randomization) if built into
+ the kernel. When CONFIG_HIBERNATION is selected,
+ kASLR is disabled by default. When kASLR is enabled,
+ hibernation will be disabled.
+
+ keepinitrd [HW,ARM]
+
+ kernelcore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter
+ specifies the amount of memory usable by the kernel
+ for non-movable allocations. The requested amount is
+ spread evenly throughout all nodes in the system. The
+ remaining memory in each node is used for Movable
+ pages. In the event, a node is too small to have both
+ kernelcore and Movable pages, kernelcore pages will
+ take priority and other nodes will have a larger number
+ of Movable pages. The Movable zone is used for the
+ allocation of pages that may be reclaimed or moved
+ by the page migration subsystem. This means that
+ HugeTLB pages may not be allocated from this zone.
+ Note that allocations like PTEs-from-HighMem still
+ use the HighMem zone if it exists, and the Normal
+ zone if it does not.
+
+ kgdbdbgp= [KGDB,HW] kgdb over EHCI usb debug port.
+ Format: <Controller#>[,poll interval]
+ The controller # is the number of the ehci usb debug
+ port as it is probed via PCI. The poll interval is
+ optional and is the number seconds in between
+ each poll cycle to the debug port in case you need
+ the functionality for interrupting the kernel with
+ gdb or control-c on the dbgp connection. When
+ not using this parameter you use sysrq-g to break into
+ the kernel debugger.
+
+ kgdboc= [KGDB,HW] kgdb over consoles.
+ Requires a tty driver that supports console polling,
+ or a supported polling keyboard driver (non-usb).
+ Serial only format: <serial_device>[,baud]
+ keyboard only format: kbd
+ keyboard and serial format: kbd,<serial_device>[,baud]
+ Optional Kernel mode setting:
+ kms, kbd format: kms,kbd
+ kms, kbd and serial format: kms,kbd,<ser_dev>[,baud]
+
+ kgdbwait [KGDB] Stop kernel execution and enter the
+ kernel debugger at the earliest opportunity.
+
+ kmac= [MIPS] korina ethernet MAC address.
+ Configure the RouterBoard 532 series on-chip
+ Ethernet adapter MAC address.
+
+ kmemleak= [KNL] Boot-time kmemleak enable/disable
+ Valid arguments: on, off
+ Default: on
+ Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
+ the default is off.
+
+ kmemcheck= [X86] Boot-time kmemcheck enable/disable/one-shot mode
+ Valid arguments: 0, 1, 2
+ kmemcheck=0 (disabled)
+ kmemcheck=1 (enabled)
+ kmemcheck=2 (one-shot mode)
+ Default: 2 (one-shot mode)
+
+ kstack=N [X86] Print N words from the kernel stack
+ in oops dumps.
+
+ kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
+ Default is 0 (don't ignore, but inject #GP)
+
+ kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
+ KVM MMU at runtime.
+ Default is 0 (off)
+
+ kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM.
+ Default is 1 (enabled)
+
+ kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU)
+ for all guests.
+ Default is 1 (enabled) if in 64-bit or 32-bit PAE mode.
+
+ kvm-intel.ept= [KVM,Intel] Disable extended page tables
+ (virtualized MMU) support on capable Intel chips.
+ Default is 1 (enabled)
+
+ kvm-intel.emulate_invalid_guest_state=
+ [KVM,Intel] Enable emulation of invalid guest states
+ Default is 0 (disabled)
+
+ kvm-intel.flexpriority=
+ [KVM,Intel] Disable FlexPriority feature (TPR shadow).
+ Default is 1 (enabled)
+
+ kvm-intel.nested=
+ [KVM,Intel] Enable VMX nesting (nVMX).
+ Default is 0 (disabled)
+
+ kvm-intel.unrestricted_guest=
+ [KVM,Intel] Disable unrestricted guest feature
+ (virtualized real and unpaged mode) on capable
+ Intel chips. Default is 1 (enabled)
+
+ kvm-intel.vpid= [KVM,Intel] Disable Virtual Processor Identification
+ feature (tagged TLBs) on capable Intel chips.
+ Default is 1 (enabled)
+
+ l2cr= [PPC]
+
+ l3cr= [PPC]
+
+ lapic [X86-32,APIC] Enable the local APIC even if BIOS
+ disabled it.
+
+ lapic= [x86,APIC] "notscdeadline" Do not use TSC deadline
+ value for LAPIC timer one-shot implementation. Default
+ back to the programmable timer unit in the LAPIC.
+
+ lapic_timer_c2_ok [X86,APIC] trust the local apic timer
+ in C2 power state.
+
+ libata.dma= [LIBATA] DMA control
+ libata.dma=0 Disable all PATA and SATA DMA
+ libata.dma=1 PATA and SATA Disk DMA only
+ libata.dma=2 ATAPI (CDROM) DMA only
+ libata.dma=4 Compact Flash DMA only
+ Combinations also work, so libata.dma=3 enables DMA
+ for disks and CDROMs, but not CFs.
+
+ libata.ignore_hpa= [LIBATA] Ignore HPA limit
+ libata.ignore_hpa=0 keep BIOS limits (default)
+ libata.ignore_hpa=1 ignore limits, using full disk
+
+ libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
+ when set.
+ Format: <int>
+
+ libata.force= [LIBATA] Force configurations. The format is comma
+ separated list of "[ID:]VAL" where ID is
+ PORT[.DEVICE]. PORT and DEVICE are decimal numbers
+ matching port, link or device. Basically, it matches
+ the ATA ID string printed on console by libata. If
+ the whole ID part is omitted, the last PORT and DEVICE
+ values are used. If ID hasn't been specified yet, the
+ configuration applies to all ports, links and devices.
+
+ If only DEVICE is omitted, the parameter applies to
+ the port and all links and devices behind it. DEVICE
+ number of 0 either selects the first device or the
+ first fan-out link behind PMP device. It does not
+ select the host link. DEVICE number of 15 selects the
+ host link and device attached to it.
+
+ The VAL specifies the configuration to force. As long
+ as there's no ambiguity shortcut notation is allowed.
+ For example, both 1.5 and 1.5G would work for 1.5Gbps.
+ The following configurations can be forced.
+
+ * Cable type: 40c, 80c, short40c, unk, ign or sata.
+ Any ID with matching PORT is used.
+
+ * SATA link speed limit: 1.5Gbps or 3.0Gbps.
+
+ * Transfer mode: pio[0-7], mwdma[0-4] and udma[0-7].
+ udma[/][16,25,33,44,66,100,133] notation is also
+ allowed.
+
+ * [no]ncq: Turn on or off NCQ.
+
+ * nohrst, nosrst, norst: suppress hard, soft
+ and both resets.
+
+ * rstonce: only attempt one reset during
+ hot-unplug link recovery
+
+ * dump_id: dump IDENTIFY data.
+
+ * atapi_dmadir: Enable ATAPI DMADIR bridge support
+
+ * disable: Disable this device.
+
+ If there are multiple matching configurations changing
+ the same attribute, the last one is used.
+
+ memblock=debug [KNL] Enable memblock debug messages.
+
+ load_ramdisk= [RAM] List of ramdisks to load from floppy
+ See Documentation/blockdev/ramdisk.txt.
+
+ lockd.nlm_grace_period=P [NFS] Assign grace period.
+ Format: <integer>
+
+ lockd.nlm_tcpport=N [NFS] Assign TCP port.
+ Format: <integer>
+
+ lockd.nlm_timeout=T [NFS] Assign timeout value.
+ Format: <integer>
+
+ lockd.nlm_udpport=M [NFS] Assign UDP port.
+ Format: <integer>
+
+ locktorture.nreaders_stress= [KNL]
+ Set the number of locking read-acquisition kthreads.
+ Defaults to being automatically set based on the
+ number of online CPUs.
+
+ locktorture.nwriters_stress= [KNL]
+ Set the number of locking write-acquisition kthreads.
+
+ locktorture.onoff_holdoff= [KNL]
+ Set time (s) after boot for CPU-hotplug testing.
+
+ locktorture.onoff_interval= [KNL]
+ Set time (s) between CPU-hotplug operations, or
+ zero to disable CPU-hotplug testing.
+
+ locktorture.shuffle_interval= [KNL]
+ Set task-shuffle interval (jiffies). Shuffling
+ tasks allows some CPUs to go into dyntick-idle
+ mode during the locktorture test.
+
+ locktorture.shutdown_secs= [KNL]
+ Set time (s) after boot system shutdown. This
+ is useful for hands-off automated testing.
+
+ locktorture.stat_interval= [KNL]
+ Time (s) between statistics printk()s.
+
+ locktorture.stutter= [KNL]
+ Time (s) to stutter testing, for example,
+ specifying five seconds causes the test to run for
+ five seconds, wait for five seconds, and so on.
+ This tests the locking primitive's ability to
+ transition abruptly to and from idle.
+
+ locktorture.torture_runnable= [BOOT]
+ Start locktorture running at boot time.
+
+ locktorture.torture_type= [KNL]
+ Specify the locking implementation to test.
+
+ locktorture.verbose= [KNL]
+ Enable additional printk() statements.
+
+ logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver
+ Format: <irq>
+
+ loglevel= All Kernel Messages with a loglevel smaller than the
+ console loglevel will be printed to the console. It can
+ also be changed with klogd or other programs. The
+ loglevels are defined as follows:
+
+ 0 (KERN_EMERG) system is unusable
+ 1 (KERN_ALERT) action must be taken immediately
+ 2 (KERN_CRIT) critical conditions
+ 3 (KERN_ERR) error conditions
+ 4 (KERN_WARNING) warning conditions
+ 5 (KERN_NOTICE) normal but significant condition
+ 6 (KERN_INFO) informational
+ 7 (KERN_DEBUG) debug-level messages
+
+ log_buf_len=n[KMG] Sets the size of the printk ring buffer,
+ in bytes. n must be a power of two and greater
+ than the minimal size. The minimal size is defined
+ by LOG_BUF_SHIFT kernel config parameter. There is
+ also CONFIG_LOG_CPU_MAX_BUF_SHIFT config parameter
+ that allows to increase the default size depending on
+ the number of CPUs. See init/Kconfig for more details.
+
+ logo.nologo [FB] Disables display of the built-in Linux logo.
+ This may be used to provide more screen space for
+ kernel log messages and is useful when debugging
+ kernel boot problems.
+
+ lp=0 [LP] Specify parallel ports to use, e.g,
+ lp=port[,port...] lp=none,parport0 (lp0 not configured, lp1 uses
+ lp=reset first parallel port). 'lp=0' disables the
+ lp=auto printer driver. 'lp=reset' (which can be
+ specified in addition to the ports) causes
+ attached printers to be reset. Using
+ lp=port1,port2,... specifies the parallel ports
+ to associate lp devices with, starting with
+ lp0. A port specification may be 'none' to skip
+ that lp device, or a parport name such as
+ 'parport0'. Specifying 'lp=auto' instead of a
+ port specification list means that device IDs
+ from each port should be examined, to see if
+ an IEEE 1284-compliant printer is attached; if
+ so, the driver will manage that printer.
+ See also header of drivers/char/lp.c.
+
+ lpj=n [KNL]
+ Sets loops_per_jiffy to given constant, thus avoiding
+ time-consuming boot-time autodetection (up to 250 ms per
+ CPU). 0 enables autodetection (default). To determine
+ the correct value for your kernel, boot with normal
+ autodetection and see what value is printed. Note that
+ on SMP systems the preset will be applied to all CPUs,
+ which is likely to cause problems if your CPUs need
+ significantly divergent settings. An incorrect value
+ will cause delays in the kernel to be wrong, leading to
+ unpredictable I/O errors and other breakage. Although
+ unlikely, in the extreme case this might damage your
+ hardware.
+
+ ltpc= [NET]
+ Format: <io>,<irq>,<dma>
+
+ machvec= [IA-64] Force the use of a particular machine-vector
+ (machvec) in a generic kernel.
+ Example: machvec=hpzx1_swiotlb
+
+ machtype= [Loongson] Share the same kernel image file between different
+ yeeloong laptop.
+ Example: machtype=lemote-yeeloong-2f-7inch
+
+ max_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory greater
+ than or equal to this physical address is ignored.
+
+ maxcpus= [SMP] Maximum number of processors that an SMP kernel
+ should make use of. maxcpus=n : n >= 0 limits the
+ kernel to using 'n' processors. n=0 is a special case,
+ it is equivalent to "nosmp", which also disables
+ the IO APIC.
+
+ max_loop= [LOOP] The number of loop block devices that get
+ (loop.max_loop) unconditionally pre-created at init time. The default
+ number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead
+ of statically allocating a predefined number, loop
+ devices can be requested on-demand with the
+ /dev/loop-control interface.
+
+ mce [X86-32] Machine Check Exception
+
+ mce=option [X86-64] See Documentation/x86/x86_64/boot-options.txt
+
+ md= [HW] RAID subsystems devices and level
+ See Documentation/md.txt.
+
+ mdacon= [MDA]
+ Format: <first>,<last>
+ Specifies range of consoles to be captured by the MDA.
+
+ mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
+ Amount of memory to be used when the kernel is not able
+ to see the whole system memory or for test.
+ [X86] Work as limiting max address. Use together
+ with memmap= to avoid physical address space collisions.
+ Without memmap= PCI devices could be placed at addresses
+ belonging to unused RAM.
+
+ mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
+ memory.
+
+ memchunk=nn[KMG]
+ [KNL,SH] Allow user to override the default size for
+ per-device physically contiguous DMA buffers.
+
+ memmap=exactmap [KNL,X86] Enable setting of an exact
+ E820 memory map, as specified by the user.
+ Such memmap=exactmap lines can be constructed based on
+ BIOS output or other requirements. See the memmap=nn@ss
+ option description.
+
+ memmap=nn[KMG]@ss[KMG]
+ [KNL] Force usage of a specific region of memory.
+ Region of memory to be used is from ss to ss+nn.
+
+ memmap=nn[KMG]#ss[KMG]
+ [KNL,ACPI] Mark specific memory as ACPI data.
+ Region of memory to be marked is from ss to ss+nn.
+
+ memmap=nn[KMG]$ss[KMG]
+ [KNL,ACPI] Mark specific memory as reserved.
+ Region of memory to be reserved is from ss to ss+nn.
+ Example: Exclude memory from 0x18690000-0x1869ffff
+ memmap=64K$0x18690000
+ or
+ memmap=0x10000$0x18690000
+
+ memmap=nn[KMG]!ss[KMG]
+ [KNL,X86] Mark specific memory as protected.
+ Region of memory to be used, from ss to ss+nn.
+ The memory region may be marked as e820 type 12 (0xc)
+ and is NVDIMM or ADR memory.
+
+ memory_corruption_check=0/1 [X86]
+ Some BIOSes seem to corrupt the first 64k of
+ memory when doing things like suspend/resume.
+ Setting this option will scan the memory
+ looking for corruption. Enabling this will
+ both detect corruption and prevent the kernel
+ from using the memory being corrupted.
+ However, its intended as a diagnostic tool; if
+ repeatable BIOS-originated corruption always
+ affects the same memory, you can use memmap=
+ to prevent the kernel from using that memory.
+
+ memory_corruption_check_size=size [X86]
+ By default it checks for corruption in the low
+ 64k, making this memory unavailable for normal
+ use. Use this parameter to scan for
+ corruption in more or less memory.
+
+ memory_corruption_check_period=seconds [X86]
+ By default it checks for corruption every 60
+ seconds. Use this parameter to check at some
+ other rate. 0 disables periodic checking.
+
+ memtest= [KNL,X86,ARM] Enable memtest
+ Format: <integer>
+ default : 0 <disable>
+ Specifies the number of memtest passes to be
+ performed. Each pass selects another test
+ pattern from a given set of patterns. Memtest
+ fills the memory with this pattern, validates
+ memory contents and reserves bad memory
+ regions that are detected.
+
+ meye.*= [HW] Set MotionEye Camera parameters
+ See Documentation/video4linux/meye.txt.
+
+ mfgpt_irq= [IA-32] Specify the IRQ to use for the
+ Multi-Function General Purpose Timers on AMD Geode
+ platforms.
+
+ mfgptfix [X86-32] Fix MFGPT timers on AMD Geode platforms when
+ the BIOS has incorrectly applied a workaround. TinyBIOS
+ version 0.98 is known to be affected, 0.99 fixes the
+ problem by letting the user disable the workaround.
+
+ mga= [HW,DRM]
+
+ min_addr=nn[KMG] [KNL,BOOT,ia64] All physical memory below this
+ physical address is ignored.
+
+ mini2440= [ARM,HW,KNL]
+ Format:[0..2][b][c][t]
+ Default: "0tb"
+ MINI2440 configuration specification:
+ 0 - The attached screen is the 3.5" TFT
+ 1 - The attached screen is the 7" TFT
+ 2 - The VGA Shield is attached (1024x768)
+ Leaving out the screen size parameter will not load
+ the TFT driver, and the framebuffer will be left
+ unconfigured.
+ b - Enable backlight. The TFT backlight pin will be
+ linked to the kernel VESA blanking code and a GPIO
+ LED. This parameter is not necessary when using the
+ VGA shield.
+ c - Enable the s3c camera interface.
+ t - Reserved for enabling touchscreen support. The
+ touchscreen support is not enabled in the mainstream
+ kernel as of 2.6.30, a preliminary port can be found
+ in the "bleeding edge" mini2440 support kernel at
+ http://repo.or.cz/w/linux-2.6/mini2440.git
+
+ mminit_loglevel=
+ [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
+ parameter allows control of the logging verbosity for
+ the additional memory initialisation checks. A value
+ of 0 disables mminit logging and a level of 4 will
+ log everything. Information is printed at KERN_DEBUG
+ so loglevel=8 may also need to be specified.
+
+ module.sig_enforce
+ [KNL] When CONFIG_MODULE_SIG is set, this means that
+ modules without (valid) signatures will fail to load.
+ Note that if CONFIG_MODULE_SIG_FORCE is set, that
+ is always true, so this option does nothing.
+
+ mousedev.tap_time=
+ [MOUSE] Maximum time between finger touching and
+ leaving touchpad surface for touch to be considered
+ a tap and be reported as a left button click (for
+ touchpads working in absolute mode only).
+ Format: <msecs>
+ mousedev.xres= [MOUSE] Horizontal screen resolution, used for devices
+ reporting absolute coordinates, such as tablets
+ mousedev.yres= [MOUSE] Vertical screen resolution, used for devices
+ reporting absolute coordinates, such as tablets
+
+ movablecore=nn[KMG] [KNL,X86,IA-64,PPC] This parameter
+ is similar to kernelcore except it specifies the
+ amount of memory used for migratable allocations.
+ If both kernelcore and movablecore is specified,
+ then kernelcore will be at *least* the specified
+ value but may be more. If movablecore on its own
+ is specified, the administrator must be careful
+ that the amount of memory usable for all allocations
+ is not too small.
+
+ movable_node [KNL,X86] Boot-time switch to enable the effects
+ of CONFIG_MOVABLE_NODE=y. See mm/Kconfig for details.
+
+ MTD_Partition= [MTD]
+ Format: <name>,<region-number>,<size>,<offset>
+
+ MTD_Region= [MTD] Format:
+ <name>,<region-number>[,<base>,<size>,<buswidth>,<altbuswidth>]
+
+ mtdparts= [MTD]
+ See drivers/mtd/cmdlinepart.c.
+
+ multitce=off [PPC] This parameter disables the use of the pSeries
+ firmware feature for updating multiple TCE entries
+ at a time.
+
+ onenand.bdry= [HW,MTD] Flex-OneNAND Boundary Configuration
+
+ Format: [die0_boundary][,die0_lock][,die1_boundary][,die1_lock]
+
+ boundary - index of last SLC block on Flex-OneNAND.
+ The remaining blocks are configured as MLC blocks.
+ lock - Configure if Flex-OneNAND boundary should be locked.
+ Once locked, the boundary cannot be changed.
+ 1 indicates lock status, 0 indicates unlock status.
+
+ mtdset= [ARM]
+ ARM/S3C2412 JIVE boot control
+
+ See arch/arm/mach-s3c2412/mach-jive.c
+
+ mtouchusb.raw_coordinates=
+ [HW] Make the MicroTouch USB driver use raw coordinates
+ ('y', default) or cooked coordinates ('n')
+
+ mtrr_chunk_size=nn[KMG] [X86]
+ used for mtrr cleanup. It is largest continuous chunk
+ that could hold holes aka. UC entries.
+
+ mtrr_gran_size=nn[KMG] [X86]
+ Used for mtrr cleanup. It is granularity of mtrr block.
+ Default is 1.
+ Large value could prevent small alignment from
+ using up MTRRs.
+
+ mtrr_spare_reg_nr=n [X86]
+ Format: <integer>
+ Range: 0,7 : spare reg number
+ Default : 1
+ Used for mtrr cleanup. It is spare mtrr entries number.
+ Set to 2 or more if your graphical card needs more.
+
+ n2= [NET] SDL Inc. RISCom/N2 synchronous serial card
+
+ netdev= [NET] Network devices parameters
+ Format: <irq>,<io>,<mem_start>,<mem_end>,<name>
+ Note that mem_start is often overloaded to mean
+ something different and driver-specific.
+ This usage is only documented in each driver source
+ file if at all.
+
+ nf_conntrack.acct=
+ [NETFILTER] Enable connection tracking flow accounting
+ 0 to disable accounting
+ 1 to enable accounting
+ Default value is 0.
+
+ nfsaddrs= [NFS] Deprecated. Use ip= instead.
+ See Documentation/filesystems/nfs/nfsroot.txt.
+
+ nfsroot= [NFS] nfs root filesystem for disk-less boxes.
+ See Documentation/filesystems/nfs/nfsroot.txt.
+
+ nfsrootdebug [NFS] enable nfsroot debugging messages.
+ See Documentation/filesystems/nfs/nfsroot.txt.
+
+ nfs.callback_tcpport=
+ [NFS] set the TCP port on which the NFSv4 callback
+ channel should listen.
+
+ nfs.cache_getent=
+ [NFS] sets the pathname to the program which is used
+ to update the NFS client cache entries.
+
+ nfs.cache_getent_timeout=
+ [NFS] sets the timeout after which an attempt to
+ update a cache entry is deemed to have failed.
+
+ nfs.idmap_cache_timeout=
+ [NFS] set the maximum lifetime for idmapper cache
+ entries.
+
+ nfs.enable_ino64=
+ [NFS] enable 64-bit inode numbers.
+ If zero, the NFS client will fake up a 32-bit inode
+ number for the readdir() and stat() syscalls instead
+ of returning the full 64-bit number.
+ The default is to return 64-bit inode numbers.
+
+ nfs.max_session_slots=
+ [NFSv4.1] Sets the maximum number of session slots
+ the client will attempt to negotiate with the server.
+ This limits the number of simultaneous RPC requests
+ that the client can send to the NFSv4.1 server.
+ Note that there is little point in setting this
+ value higher than the max_tcp_slot_table_limit.
+
+ nfs.nfs4_disable_idmapping=
+ [NFSv4] When set to the default of '1', this option
+ ensures that both the RPC level authentication
+ scheme and the NFS level operations agree to use
+ numeric uids/gids if the mount is using the
+ 'sec=sys' security flavour. In effect it is
+ disabling idmapping, which can make migration from
+ legacy NFSv2/v3 systems to NFSv4 easier.
+ Servers that do not support this mode of operation
+ will be autodetected by the client, and it will fall
+ back to using the idmapper.
+ To turn off this behaviour, set the value to '0'.
+ nfs.nfs4_unique_id=
+ [NFS4] Specify an additional fixed unique ident-
+ ification string that NFSv4 clients can insert into
+ their nfs_client_id4 string. This is typically a
+ UUID that is generated at system install time.
+
+ nfs.send_implementation_id =
+ [NFSv4.1] Send client implementation identification
+ information in exchange_id requests.
+ If zero, no implementation identification information
+ will be sent.
+ The default is to send the implementation identification
+ information.
+
+ nfs.recover_lost_locks =
+ [NFSv4] Attempt to recover locks that were lost due
+ to a lease timeout on the server. Please note that
+ doing this risks data corruption, since there are
+ no guarantees that the file will remain unchanged
+ after the locks are lost.
+ If you want to enable the kernel legacy behaviour of
+ attempting to recover these locks, then set this
+ parameter to '1'.
+ The default parameter value of '0' causes the kernel
+ not to attempt recovery of lost locks.
+
+ nfsd.nfs4_disable_idmapping=
+ [NFSv4] When set to the default of '1', the NFSv4
+ server will return only numeric uids and gids to
+ clients using auth_sys, and will accept numeric uids
+ and gids from such clients. This is intended to ease
+ migration from NFSv2/v3.
+
+ objlayoutdriver.osd_login_prog=
+ [NFS] [OBJLAYOUT] sets the pathname to the program which
+ is used to automatically discover and login into new
+ osd-targets. Please see:
+ Documentation/filesystems/pnfs.txt for more explanations
+
+ nmi_debug= [KNL,AVR32,SH] Specify one or more actions to take
+ when a NMI is triggered.
+ Format: [state][,regs][,debounce][,die]
+
+ nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels
+ Format: [panic,][nopanic,][num]
+ Valid num: 0 or 1
+ 0 - turn nmi_watchdog off
+ 1 - turn nmi_watchdog on
+ When panic is specified, panic when an NMI watchdog
+ timeout occurs (or 'nopanic' to override the opposite
+ default).
+ This is useful when you use a panic=... timeout and
+ need the box quickly up again.
+
+ netpoll.carrier_timeout=
+ [NET] Specifies amount of time (in seconds) that
+ netpoll should wait for a carrier. By default netpoll
+ waits 4 seconds.
+
+ no387 [BUGS=X86-32] Tells the kernel to use the 387 maths
+ emulation library even if a 387 maths coprocessor
+ is present.
+
+ no_console_suspend
+ [HW] Never suspend the console
+ Disable suspending of consoles during suspend and
+ hibernate operations. Once disabled, debugging
+ messages can reach various consoles while the rest
+ of the system is being put to sleep (ie, while
+ debugging driver suspend/resume hooks). This may
+ not work reliably with all consoles, but is known
+ to work with serial and VGA consoles.
+ To facilitate more flexible debugging, we also add
+ console_suspend, a printk module parameter to control
+ it. Users could use console_suspend (usually
+ /sys/module/printk/parameters/console_suspend) to
+ turn on/off it dynamically.
+
+ noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
+ caches in the slab allocator. Saves per-node memory,
+ but will impact performance.
+
+ noalign [KNL,ARM]
+
+ noapic [SMP,APIC] Tells the kernel to not make use of any
+ IOAPICs that may be present in the system.
+
+ noautogroup Disable scheduler automatic task group creation.
+
+ nobats [PPC] Do not use BATs for mapping kernel lowmem
+ on "Classic" PPC cores.
+
+ nocache [ARM]
+
+ noclflush [BUGS=X86] Don't use the CLFLUSH instruction
+
+ nodelayacct [KNL] Disable per-task delay accounting
+
+ nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
+
+ nodsp [SH] Disable hardware DSP at boot time.
+
+ noefi Disable EFI runtime services support.
+
+ noexec [IA-64]
+
+ noexec [X86]
+ On X86-32 available only on PAE configured kernels.
+ noexec=on: enable non-executable mappings (default)
+ noexec=off: disable non-executable mappings
+
+ nosmap [X86]
+ Disable SMAP (Supervisor Mode Access Prevention)
+ even if it is supported by processor.
+
+ nosmep [X86]
+ Disable SMEP (Supervisor Mode Execution Prevention)
+ even if it is supported by processor.
+
+ noexec32 [X86-64]
+ This affects only 32-bit executables.
+ noexec32=on: enable non-executable mappings (default)
+ read doesn't imply executable mappings
+ noexec32=off: disable non-executable mappings
+ read implies executable mappings
+
+ nofpu [MIPS,SH] Disable hardware FPU at boot time.
+
+ nofxsr [BUGS=X86-32] Disables x86 floating point extended
+ register save and restore. The kernel will only save
+ legacy floating-point registers on task switch.
+
+ nohugeiomap [KNL,x86] Disable kernel huge I/O mappings.
+
+ noxsave [BUGS=X86] Disables x86 extended register state save
+ and restore using xsave. The kernel will fallback to
+ enabling legacy floating-point and sse state.
+
+ noxsaveopt [X86] Disables xsaveopt used in saving x86 extended
+ register states. The kernel will fall back to use
+ xsave to save the states. By using this parameter,
+ performance of saving the states is degraded because
+ xsave doesn't support modified optimization while
+ xsaveopt supports it on xsaveopt enabled systems.
+
+ noxsaves [X86] Disables xsaves and xrstors used in saving and
+ restoring x86 extended register state in compacted
+ form of xsave area. The kernel will fall back to use
+ xsaveopt and xrstor to save and restore the states
+ in standard form of xsave area. By using this
+ parameter, xsave area per process might occupy more
+ memory on xsaves enabled systems.
+
+ nohlt [BUGS=ARM,SH] Tells the kernel that the sleep(SH) or
+ wfi(ARM) instruction doesn't work correctly and not to
+ use it. This is also useful when using JTAG debugger.
+
+ no_file_caps Tells the kernel not to honor file capabilities. The
+ only way then for a file to be executed with privilege
+ is to be setuid root or executed by root.
+
+ nohalt [IA-64] Tells the kernel not to use the power saving
+ function PAL_HALT_LIGHT when idle. This increases
+ power-consumption. On the positive side, it reduces
+ interrupt wake-up latency, which may improve performance
+ in certain environments such as networked servers or
+ real-time systems.
+
+ nohibernate [HIBERNATION] Disable hibernation and resume.
+
+ nohz= [KNL] Boottime enable/disable dynamic ticks
+ Valid arguments: on, off
+ Default: on
+
+ nohz_full= [KNL,BOOT]
+ In kernels built with CONFIG_NO_HZ_FULL=y, set
+ the specified list of CPUs whose tick will be stopped
+ whenever possible. The boot CPU will be forced outside
+ the range to maintain the timekeeping.
+ The CPUs in this range must also be included in the
+ rcu_nocbs= set.
+
+ noiotrap [SH] Disables trapped I/O port accesses.
+
+ noirqdebug [X86-32] Disables the code which attempts to detect and
+ disable unhandled interrupt sources.
+
+ no_timer_check [X86,APIC] Disables the code which tests for
+ broken timer IRQ sources.
+
+ noisapnp [ISAPNP] Disables ISA PnP code.
+
+ noinitrd [RAM] Tells the kernel not to load any configured
+ initial RAM disk.
+
+ nointremap [X86-64, Intel-IOMMU] Do not enable interrupt
+ remapping.
+ [Deprecated - use intremap=off]
+
+ nointroute [IA-64]
+
+ nojitter [IA-64] Disables jitter checking for ITC timers.
+
+ no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver
+
+ no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page
+ fault handling.
+
+ no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
+ steal time is computed, but won't influence scheduler
+ behaviour
+
+ nolapic [X86-32,APIC] Do not enable or use the local APIC.
+
+ nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
+
+ noltlbs [PPC] Do not use large page/tlb entries for kernel
+ lowmem mapping on PPC40x.
+
+ nomca [IA-64] Disable machine check abort handling
+
+ nomce [X86-32] Machine Check Exception
+
+ nomfgpt [X86-32] Disable Multi-Function General Purpose
+ Timer usage (for AMD Geode machines).
+
+ nonmi_ipi [X86] Disable using NMI IPIs during panic/reboot to
+ shutdown the other cpus. Instead use the REBOOT_VECTOR
+ irq.
+
+ nomodule Disable module load
+
+ nopat [X86] Disable PAT (page attribute table extension of
+ pagetables) support.
+
+ norandmaps Don't use address space randomization. Equivalent to
+ echo 0 > /proc/sys/kernel/randomize_va_space
+
+ noreplace-paravirt [X86,IA-64,PV_OPS] Don't patch paravirt_ops
+
+ noreplace-smp [X86-32,SMP] Don't replace SMP instructions
+ with UP alternatives
+
+ nordrand [X86] Disable kernel use of the RDRAND and
+ RDSEED instructions even if they are supported
+ by the processor. RDRAND and RDSEED are still
+ available to user space applications.
+
+ noresume [SWSUSP] Disables resume and restores original swap
+ space.
+
+ no-scroll [VGA] Disables scrollback.
+ This is required for the Braillex ib80-piezo Braille
+ reader made by F.H. Papenmeier (Germany).
+
+ nosbagart [IA-64]
+
+ nosep [BUGS=X86-32] Disables x86 SYSENTER/SYSEXIT support.
+
+ nosmp [SMP] Tells an SMP kernel to act as a UP kernel,
+ and disable the IO APIC. legacy for "maxcpus=0".
+
+ nosoftlockup [KNL] Disable the soft-lockup detector.
+
+ nosync [HW,M68K] Disables sync negotiation for all devices.
+
+ notsc [BUGS=X86-32] Disable Time Stamp Counter
+
+ nousb [USB] Disable the USB subsystem
+
+ nowatchdog [KNL] Disable both lockup detectors, i.e.
+ soft-lockup and NMI watchdog (hard-lockup).
+
+ nowb [ARM]
+
+ nox2apic [X86-64,APIC] Do not enable x2APIC mode.
+
+ cpu0_hotplug [X86] Turn on CPU0 hotplug feature when
+ CONFIG_BOOTPARAM_HOTPLUG_CPU0 is off.
+ Some features depend on CPU0. Known dependencies are:
+ 1. Resume from suspend/hibernate depends on CPU0.
+ Suspend/hibernate will fail if CPU0 is offline and you
+ need to online CPU0 before suspend/hibernate.
+ 2. PIC interrupts also depend on CPU0. CPU0 can't be
+ removed if a PIC interrupt is detected.
+ It's said poweroff/reboot may depend on CPU0 on some
+ machines although I haven't seen such issues so far
+ after CPU0 is offline on a few tested machines.
+ If the dependencies are under your control, you can
+ turn on cpu0_hotplug.
+
+ nptcg= [IA-64] Override max number of concurrent global TLB
+ purges which is reported from either PAL_VM_SUMMARY or
+ SAL PALO.
+
+ nr_cpus= [SMP] Maximum number of processors that an SMP kernel
+ could support. nr_cpus=n : n >= 1 limits the kernel to
+ supporting 'n' processors. Later in runtime you can not
+ use hotplug cpu feature to put more cpu back to online.
+ just like you compile the kernel NR_CPUS=n
+
+ nr_uarts= [SERIAL] maximum number of UARTs to be registered.
+
+ numa_balancing= [KNL,X86] Enable or disable automatic NUMA balancing.
+ Allowed values are enable and disable
+
+ numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA.
+ one of ['zone', 'node', 'default'] can be specified
+ This can be set from sysctl after boot.
+ See Documentation/sysctl/vm.txt for details.
+
+ ohci1394_dma=early [HW] enable debugging via the ohci1394 driver.
+ See Documentation/debugging-via-ohci1394.txt for more
+ info.
+
+ olpc_ec_timeout= [OLPC] ms delay when issuing EC commands
+ Rather than timing out after 20 ms if an EC
+ command is not properly ACKed, override the length
+ of the timeout. We have interrupts disabled while
+ waiting for the ACK, so if this is set too high
+ interrupts *may* be lost!
+
+ omap_mux= [OMAP] Override bootloader pin multiplexing.
+ Format: <mux_mode0.mode_name=value>...
+ For example, to override I2C bus2:
+ omap_mux=i2c2_scl.i2c2_scl=0x100,i2c2_sda.i2c2_sda=0x100
+
+ oprofile.timer= [HW]
+ Use timer interrupt instead of performance counters
+
+ oprofile.cpu_type= Force an oprofile cpu type
+ This might be useful if you have an older oprofile
+ userland or if you want common events.
+ Format: { arch_perfmon }
+ arch_perfmon: [X86] Force use of architectural
+ perfmon on Intel CPUs instead of the
+ CPU specific event set.
+ timer: [X86] Force use of architectural NMI
+ timer mode (see also oprofile.timer
+ for generic hr timer mode)
+ [s390] Force legacy basic mode sampling
+ (report cpu_type "timer")
+
+ oops=panic Always panic on oopses. Default is to just kill the
+ process, but there is a small probability of
+ deadlocking the machine.
+ This will also cause panics on machine check exceptions.
+ Useful together with panic=30 to trigger a reboot.
+
+ OSS [HW,OSS]
+ See Documentation/sound/oss/oss-parameters.txt
+
+ page_owner= [KNL] Boot-time page_owner enabling option.
+ Storage of the information about who allocated
+ each page is disabled in default. With this switch,
+ we can turn it on.
+ on: enable the feature
+
+ panic= [KNL] Kernel behaviour on panic: delay <timeout>
+ timeout > 0: seconds before rebooting
+ timeout = 0: wait forever
+ timeout < 0: reboot immediately
+ Format: <timeout>
+
+ panic_on_warn panic() instead of WARN(). Useful to cause kdump
+ on a WARN().
+
+ crash_kexec_post_notifiers
+ Run kdump after running panic-notifiers and dumping
+ kmsg. This only for the users who doubt kdump always
+ succeeds in any situation.
+ Note that this also increases risks of kdump failure,
+ because some panic notifiers can make the crashed
+ kernel more unstable.
+
+ parkbd.port= [HW] Parallel port number the keyboard adapter is
+ connected to, default is 0.
+ Format: <parport#>
+ parkbd.mode= [HW] Parallel port keyboard adapter mode of operation,
+ 0 for XT, 1 for AT (default is AT).
+ Format: <mode>
+
+ parport= [HW,PPT] Specify parallel ports. 0 disables.
+ Format: { 0 | auto | 0xBBB[,IRQ[,DMA]] }
+ Use 'auto' to force the driver to use any
+ IRQ/DMA settings detected (the default is to
+ ignore detected IRQ/DMA settings because of
+ possible conflicts). You can specify the base
+ address, IRQ, and DMA settings; IRQ and DMA
+ should be numbers, or 'auto' (for using detected
+ settings on that particular port), or 'nofifo'
+ (to avoid using a FIFO even if it is detected).
+ Parallel ports are assigned in the order they
+ are specified on the command line, starting
+ with parport0.
+
+ parport_init_mode= [HW,PPT]
+ Configure VIA parallel port to operate in
+ a specific mode. This is necessary on Pegasos
+ computer where firmware has no options for setting
+ up parallel port mode and sets it to spp.
+ Currently this function knows 686a and 8231 chips.
+ Format: [spp|ps2|epp|ecp|ecpepp]
+
+ pause_on_oops=
+ Halt all CPUs after the first oops has been printed for
+ the specified number of seconds. This is to be used if
+ your oopses keep scrolling off the screen.
+
+ pcbit= [HW,ISDN]
+
+ pcd. [PARIDE]
+ See header of drivers/block/paride/pcd.c.
+ See also Documentation/blockdev/paride.txt.
+
+ pci=option[,option...] [PCI] various PCI subsystem options:
+ earlydump [X86] dump PCI config space before the kernel
+ changes anything
+ off [X86] don't probe for the PCI bus
+ bios [X86-32] force use of PCI BIOS, don't access
+ the hardware directly. Use this if your machine
+ has a non-standard PCI host bridge.
+ nobios [X86-32] disallow use of PCI BIOS, only direct
+ hardware access methods are allowed. Use this
+ if you experience crashes upon bootup and you
+ suspect they are caused by the BIOS.
+ conf1 [X86] Force use of PCI Configuration
+ Mechanism 1.
+ conf2 [X86] Force use of PCI Configuration
+ Mechanism 2.
+ noaer [PCIE] If the PCIEAER kernel config parameter is
+ enabled, this kernel boot option can be used to
+ disable the use of PCIE advanced error reporting.
+ nodomains [PCI] Disable support for multiple PCI
+ root domains (aka PCI segments, in ACPI-speak).
+ nommconf [X86] Disable use of MMCONFIG for PCI
+ Configuration
+ check_enable_amd_mmconf [X86] check for and enable
+ properly configured MMIO access to PCI
+ config space on AMD family 10h CPU
+ nomsi [MSI] If the PCI_MSI kernel config parameter is
+ enabled, this kernel boot option can be used to
+ disable the use of MSI interrupts system-wide.
+ noioapicquirk [APIC] Disable all boot interrupt quirks.
+ Safety option to keep boot IRQs enabled. This
+ should never be necessary.
+ ioapicreroute [APIC] Enable rerouting of boot IRQs to the
+ primary IO-APIC for bridges that cannot disable
+ boot IRQs. This fixes a source of spurious IRQs
+ when the system masks IRQs.
+ noioapicreroute [APIC] Disable workaround that uses the
+ boot IRQ equivalent of an IRQ that connects to
+ a chipset where boot IRQs cannot be disabled.
+ The opposite of ioapicreroute.
+ biosirq [X86-32] Use PCI BIOS calls to get the interrupt
+ routing table. These calls are known to be buggy
+ on several machines and they hang the machine
+ when used, but on other computers it's the only
+ way to get the interrupt routing table. Try
+ this option if the kernel is unable to allocate
+ IRQs or discover secondary PCI buses on your
+ motherboard.
+ rom [X86] Assign address space to expansion ROMs.
+ Use with caution as certain devices share
+ address decoders between ROMs and other
+ resources.
+ norom [X86] Do not assign address space to
+ expansion ROMs that do not already have
+ BIOS assigned address ranges.
+ nobar [X86] Do not assign address space to the
+ BARs that weren't assigned by the BIOS.
+ irqmask=0xMMMM [X86] Set a bit mask of IRQs allowed to be
+ assigned automatically to PCI devices. You can
+ make the kernel exclude IRQs of your ISA cards
+ this way.
+ pirqaddr=0xAAAAA [X86] Specify the physical address
+ of the PIRQ table (normally generated
+ by the BIOS) if it is outside the
+ F0000h-100000h range.
+ lastbus=N [X86] Scan all buses thru bus #N. Can be
+ useful if the kernel is unable to find your
+ secondary buses and you want to tell it
+ explicitly which ones they are.
+ assign-busses [X86] Always assign all PCI bus
+ numbers ourselves, overriding
+ whatever the firmware may have done.
+ usepirqmask [X86] Honor the possible IRQ mask stored
+ in the BIOS $PIR table. This is needed on
+ some systems with broken BIOSes, notably
+ some HP Pavilion N5400 and Omnibook XE3
+ notebooks. This will have no effect if ACPI
+ IRQ routing is enabled.
+ noacpi [X86] Do not use ACPI for IRQ routing
+ or for PCI scanning.
+ use_crs [X86] Use PCI host bridge window information
+ from ACPI. On BIOSes from 2008 or later, this
+ is enabled by default. If you need to use this,
+ please report a bug.
+ nocrs [X86] Ignore PCI host bridge windows from ACPI.
+ If you need to use this, please report a bug.
+ routeirq Do IRQ routing for all PCI devices.
+ This is normally done in pci_enable_device(),
+ so this option is a temporary workaround
+ for broken drivers that don't call it.
+ skip_isa_align [X86] do not align io start addr, so can
+ handle more pci cards
+ firmware [ARM] Do not re-enumerate the bus but instead
+ just use the configuration from the
+ bootloader. This is currently used on
+ IXP2000 systems where the bus has to be
+ configured a certain way for adjunct CPUs.
+ noearly [X86] Don't do any early type 1 scanning.
+ This might help on some broken boards which
+ machine check when some devices' config space
+ is read. But various workarounds are disabled
+ and some IOMMU drivers will not work.
+ bfsort Sort PCI devices into breadth-first order.
+ This sorting is done to get a device
+ order compatible with older (<= 2.4) kernels.
+ nobfsort Don't sort PCI devices into breadth-first order.
+ pcie_bus_tune_off Disable PCIe MPS (Max Payload Size)
+ tuning and use the BIOS-configured MPS defaults.
+ pcie_bus_safe Set every device's MPS to the largest value
+ supported by all devices below the root complex.
+ pcie_bus_perf Set device MPS to the largest allowable MPS
+ based on its parent bus. Also set MRRS (Max
+ Read Request Size) to the largest supported
+ value (no larger than the MPS that the device
+ or bus can support) for best performance.
+ pcie_bus_peer2peer Set every device's MPS to 128B, which
+ every device is guaranteed to support. This
+ configuration allows peer-to-peer DMA between
+ any pair of devices, possibly at the cost of
+ reduced performance. This also guarantees
+ that hot-added devices will work.
+ cbiosize=nn[KMG] The fixed amount of bus space which is
+ reserved for the CardBus bridge's IO window.
+ The default value is 256 bytes.
+ cbmemsize=nn[KMG] The fixed amount of bus space which is
+ reserved for the CardBus bridge's memory
+ window. The default value is 64 megabytes.
+ resource_alignment=
+ Format:
+ [<order of align>@][<domain>:]<bus>:<slot>.<func>[; ...]
+ Specifies alignment and device to reassign
+ aligned memory resources.
+ If <order of align> is not specified,
+ PAGE_SIZE is used as alignment.
+ PCI-PCI bridge can be specified, if resource
+ windows need to be expanded.
+ ecrc= Enable/disable PCIe ECRC (transaction layer
+ end-to-end CRC checking).
+ bios: Use BIOS/firmware settings. This is the
+ the default.
+ off: Turn ECRC off
+ on: Turn ECRC on.
+ hpiosize=nn[KMG] The fixed amount of bus space which is
+ reserved for hotplug bridge's IO window.
+ Default size is 256 bytes.
+ hpmemsize=nn[KMG] The fixed amount of bus space which is
+ reserved for hotplug bridge's memory window.
+ Default size is 2 megabytes.
+ realloc= Enable/disable reallocating PCI bridge resources
+ if allocations done by BIOS are too small to
+ accommodate resources required by all child
+ devices.
+ off: Turn realloc off
+ on: Turn realloc on
+ realloc same as realloc=on
+ noari do not use PCIe ARI.
+ pcie_scan_all Scan all possible PCIe devices. Otherwise we
+ only look for one device below a PCIe downstream
+ port.
+
+ pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
+ Management.
+ off Disable ASPM.
+ force Enable ASPM even on devices that claim not to support it.
+ WARNING: Forcing ASPM on may cause system lockups.
+
+ pcie_hp= [PCIE] PCI Express Hotplug driver options:
+ nomsi Do not use MSI for PCI Express Native Hotplug (this
+ makes all PCIe ports use INTx for hotplug services).
+
+ pcie_ports= [PCIE] PCIe ports handling:
+ auto Ask the BIOS whether or not to use native PCIe services
+ associated with PCIe ports (PME, hot-plug, AER). Use
+ them only if that is allowed by the BIOS.
+ native Use native PCIe services associated with PCIe ports
+ unconditionally.
+ compat Treat PCIe ports as PCI-to-PCI bridges, disable the PCIe
+ ports driver.
+
+ pcie_pme= [PCIE,PM] Native PCIe PME signaling options:
+ nomsi Do not use MSI for native PCIe PME signaling (this makes
+ all PCIe root ports use INTx for all services).
+
+ pcmv= [HW,PCMCIA] BadgePAD 4
+
+ pd_ignore_unused
+ [PM]
+ Keep all power-domains already enabled by bootloader on,
+ even if no driver has claimed them. This is useful
+ for debug and development, but should not be
+ needed on a platform with proper driver support.
+
+ pd. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
+ pdcchassis= [PARISC,HW] Disable/Enable PDC Chassis Status codes at
+ boot time.
+ Format: { 0 | 1 }
+ See arch/parisc/kernel/pdc_chassis.c
+
+ percpu_alloc= Select which percpu first chunk allocator to use.
+ Currently supported values are "embed" and "page".
+ Archs may support subset or none of the selections.
+ See comments in mm/percpu.c for details on each
+ allocator. This parameter is primarily for debugging
+ and performance comparison.
+
+ pf. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
+ pg. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
+ pirq= [SMP,APIC] Manual mp-table setup
+ See Documentation/x86/i386/IO-APIC.txt.
+
+ plip= [PPT,NET] Parallel port network link
+ Format: { parport<nr> | timid | 0 }
+ See also Documentation/parport.txt.
+
+ pmtmr= [X86] Manual setup of pmtmr I/O Port.
+ Override pmtimer IOPort with a hex value.
+ e.g. pmtmr=0x508
+
+ pnp.debug=1 [PNP]
+ Enable PNP debug messages (depends on the
+ CONFIG_PNP_DEBUG_MESSAGES option). Change at run-time
+ via /sys/module/pnp/parameters/debug. We always show
+ current resource usage; turning this on also shows
+ possible settings and some assignment information.
+
+ pnpacpi= [ACPI]
+ { off }
+
+ pnpbios= [ISAPNP]
+ { on | off | curr | res | no-curr | no-res }
+
+ pnp_reserve_irq=
+ [ISAPNP] Exclude IRQs for the autoconfiguration
+
+ pnp_reserve_dma=
+ [ISAPNP] Exclude DMAs for the autoconfiguration
+
+ pnp_reserve_io= [ISAPNP] Exclude I/O ports for the autoconfiguration
+ Ranges are in pairs (I/O port base and size).
+
+ pnp_reserve_mem=
+ [ISAPNP] Exclude memory regions for the
+ autoconfiguration.
+ Ranges are in pairs (memory base and size).
+
+ ports= [IP_VS_FTP] IPVS ftp helper module
+ Default is 21.
+ Up to 8 (IP_VS_APP_MAX_PORTS) ports
+ may be specified.
+ Format: <port>,<port>....
+
+ print-fatal-signals=
+ [KNL] debug: print fatal signals
+
+ If enabled, warn about various signal handling
+ related application anomalies: too many signals,
+ too many POSIX.1 timers, fatal signals causing a
+ coredump - etc.
+
+ If you hit the warning due to signal overflow,
+ you might want to try "ulimit -i unlimited".
+
+ default: off.
+
+ printk.always_kmsg_dump=
+ Trigger kmsg_dump for cases other than kernel oops or
+ panics
+ Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+ default: disabled
+
+ printk.time= Show timing data prefixed to each printk message line
+ Format: <bool> (1/Y/y=enable, 0/N/n=disable)
+
+ processor.max_cstate= [HW,ACPI]
+ Limit processor to maximum C-state
+ max_cstate=9 overrides any DMI blacklist limit.
+
+ processor.nocst [HW,ACPI]
+ Ignore the _CST method to determine C-states,
+ instead using the legacy FADT method
+
+ profile= [KNL] Enable kernel profiling via /proc/profile
+ Format: [schedule,]<number>
+ Param: "schedule" - profile schedule points.
+ Param: <number> - step/bucket size as a power of 2 for
+ statistical time based profiling.
+ Param: "sleep" - profile D-state sleeping (millisecs).
+ Requires CONFIG_SCHEDSTATS
+ Param: "kvm" - profile VM exits.
+
+ prompt_ramdisk= [RAM] List of RAM disks to prompt for floppy disk
+ before loading.
+ See Documentation/blockdev/ramdisk.txt.
+
+ psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to
+ probe for; one of (bare|imps|exps|lifebook|any).
+ psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports
+ per second.
+ psmouse.resetafter= [HW,MOUSE]
+ Try to reset the device after so many bad packets
+ (0 = never).
+ psmouse.resolution=
+ [HW,MOUSE] Set desired mouse resolution, in dpi.
+ psmouse.smartscroll=
+ [HW,MOUSE] Controls Logitech smartscroll autorepeat.
+ 0 = disabled, 1 = enabled (default).
+
+ pstore.backend= Specify the name of the pstore backend to use
+
+ pt. [PARIDE]
+ See Documentation/blockdev/paride.txt.
+
+ pty.legacy_count=
+ [KNL] Number of legacy pty's. Overwrites compiled-in
+ default number.
+
+ quiet [KNL] Disable most log messages
+
+ r128= [HW,DRM]
+
+ raid= [HW,RAID]
+ See Documentation/md.txt.
+
+ ramdisk_blocksize= [RAM]
+ See Documentation/blockdev/ramdisk.txt.
+
+ ramdisk_size= [RAM] Sizes of RAM disks in kilobytes
+ See Documentation/blockdev/ramdisk.txt.
+
+ rcu_nocbs= [KNL]
+ In kernels built with CONFIG_RCU_NOCB_CPU=y, set
+ the specified list of CPUs to be no-callback CPUs.
+ Invocation of these CPUs' RCU callbacks will
+ be offloaded to "rcuox/N" kthreads created for
+ that purpose, where "x" is "b" for RCU-bh, "p"
+ for RCU-preempt, and "s" for RCU-sched, and "N"
+ is the CPU number. This reduces OS jitter on the
+ offloaded CPUs, which can be useful for HPC and
+ real-time workloads. It can also improve energy
+ efficiency for asymmetric multiprocessors.
+
+ rcu_nocb_poll [KNL]
+ Rather than requiring that offloaded CPUs
+ (specified by rcu_nocbs= above) explicitly
+ awaken the corresponding "rcuoN" kthreads,
+ make these kthreads poll for callbacks.
+ This improves the real-time response for the
+ offloaded CPUs by relieving them of the need to
+ wake up the corresponding kthread, but degrades
+ energy efficiency by requiring that the kthreads
+ periodically wake up to do the polling.
+
+ rcutree.blimit= [KNL]
+ Set maximum number of finished RCU callbacks to
+ process in one batch.
+
+ rcutree.gp_init_delay= [KNL]
+ Set the number of jiffies to delay each step of
+ RCU grace-period initialization. This only has
+ effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT is
+ set.
+
+ rcutree.rcu_fanout_leaf= [KNL]
+ Increase the number of CPUs assigned to each
+ leaf rcu_node structure. Useful for very large
+ systems.
+
+ rcutree.jiffies_till_sched_qs= [KNL]
+ Set required age in jiffies for a
+ given grace period before RCU starts
+ soliciting quiescent-state help from
+ rcu_note_context_switch().
+
+ rcutree.jiffies_till_first_fqs= [KNL]
+ Set delay from grace-period initialization to
+ first attempt to force quiescent states.
+ Units are jiffies, minimum value is zero,
+ and maximum value is HZ.
+
+ rcutree.jiffies_till_next_fqs= [KNL]
+ Set delay between subsequent attempts to force
+ quiescent states. Units are jiffies, minimum
+ value is one, and maximum value is HZ.
+
+ rcutree.kthread_prio= [KNL,BOOT]
+ Set the SCHED_FIFO priority of the RCU per-CPU
+ kthreads (rcuc/N). This value is also used for
+ the priority of the RCU boost threads (rcub/N)
+ and for the RCU grace-period kthreads (rcu_bh,
+ rcu_preempt, and rcu_sched). If RCU_BOOST is
+ set, valid values are 1-99 and the default is 1
+ (the least-favored priority). Otherwise, when
+ RCU_BOOST is not set, valid values are 0-99 and
+ the default is zero (non-realtime operation).
+
+ rcutree.rcu_nocb_leader_stride= [KNL]
+ Set the number of NOCB kthread groups, which
+ defaults to the square root of the number of
+ CPUs. Larger numbers reduces the wakeup overhead
+ on the per-CPU grace-period kthreads, but increases
+ that same overhead on each group's leader.
+
+ rcutree.qhimark= [KNL]
+ Set threshold of queued RCU callbacks beyond which
+ batch limiting is disabled.
+
+ rcutree.qlowmark= [KNL]
+ Set threshold of queued RCU callbacks below which
+ batch limiting is re-enabled.
+
+ rcutree.rcu_idle_gp_delay= [KNL]
+ Set wakeup interval for idle CPUs that have
+ RCU callbacks (RCU_FAST_NO_HZ=y).
+
+ rcutree.rcu_idle_lazy_gp_delay= [KNL]
+ Set wakeup interval for idle CPUs that have
+ only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
+ Lazy RCU callbacks are those which RCU can
+ prove do nothing more than free memory.
+
+ rcutorture.cbflood_inter_holdoff= [KNL]
+ Set holdoff time (jiffies) between successive
+ callback-flood tests.
+
+ rcutorture.cbflood_intra_holdoff= [KNL]
+ Set holdoff time (jiffies) between successive
+ bursts of callbacks within a given callback-flood
+ test.
+
+ rcutorture.cbflood_n_burst= [KNL]
+ Set the number of bursts making up a given
+ callback-flood test. Set this to zero to
+ disable callback-flood testing.
+
+ rcutorture.cbflood_n_per_burst= [KNL]
+ Set the number of callbacks to be registered
+ in a given burst of a callback-flood test.
+
+ rcutorture.fqs_duration= [KNL]
+ Set duration of force_quiescent_state bursts.
+
+ rcutorture.fqs_holdoff= [KNL]
+ Set holdoff time within force_quiescent_state bursts.
+
+ rcutorture.fqs_stutter= [KNL]
+ Set wait time between force_quiescent_state bursts.
+
+ rcutorture.gp_exp= [KNL]
+ Use expedited update-side primitives.
+
+ rcutorture.gp_normal= [KNL]
+ Use normal (non-expedited) update-side primitives.
+ If both gp_exp and gp_normal are set, do both.
+ If neither gp_exp nor gp_normal are set, still
+ do both.
+
+ rcutorture.n_barrier_cbs= [KNL]
+ Set callbacks/threads for rcu_barrier() testing.
+
+ rcutorture.nfakewriters= [KNL]
+ Set number of concurrent RCU writers. These just
+ stress RCU, they don't participate in the actual
+ test, hence the "fake".
+
+ rcutorture.nreaders= [KNL]
+ Set number of RCU readers.
+
+ rcutorture.object_debug= [KNL]
+ Enable debug-object double-call_rcu() testing.
+
+ rcutorture.onoff_holdoff= [KNL]
+ Set time (s) after boot for CPU-hotplug testing.
+
+ rcutorture.onoff_interval= [KNL]
+ Set time (s) between CPU-hotplug operations, or
+ zero to disable CPU-hotplug testing.
+
+ rcutorture.torture_runnable= [BOOT]
+ Start rcutorture running at boot time.
+
+ rcutorture.shuffle_interval= [KNL]
+ Set task-shuffle interval (s). Shuffling tasks
+ allows some CPUs to go into dyntick-idle mode
+ during the rcutorture test.
+
+ rcutorture.shutdown_secs= [KNL]
+ Set time (s) after boot system shutdown. This
+ is useful for hands-off automated testing.
+
+ rcutorture.stall_cpu= [KNL]
+ Duration of CPU stall (s) to test RCU CPU stall
+ warnings, zero to disable.
+
+ rcutorture.stall_cpu_holdoff= [KNL]
+ Time to wait (s) after boot before inducing stall.
+
+ rcutorture.stat_interval= [KNL]
+ Time (s) between statistics printk()s.
+
+ rcutorture.stutter= [KNL]
+ Time (s) to stutter testing, for example, specifying
+ five seconds causes the test to run for five seconds,
+ wait for five seconds, and so on. This tests RCU's
+ ability to transition abruptly to and from idle.
+
+ rcutorture.test_boost= [KNL]
+ Test RCU priority boosting? 0=no, 1=maybe, 2=yes.
+ "Maybe" means test if the RCU implementation
+ under test support RCU priority boosting.
+
+ rcutorture.test_boost_duration= [KNL]
+ Duration (s) of each individual boost test.
+
+ rcutorture.test_boost_interval= [KNL]
+ Interval (s) between each boost test.
+
+ rcutorture.test_no_idle_hz= [KNL]
+ Test RCU's dyntick-idle handling. See also the
+ rcutorture.shuffle_interval parameter.
+
+ rcutorture.torture_type= [KNL]
+ Specify the RCU implementation to test.
+
+ rcutorture.verbose= [KNL]
+ Enable additional printk() statements.
+
+ rcupdate.rcu_expedited= [KNL]
+ Use expedited grace-period primitives, for
+ example, synchronize_rcu_expedited() instead
+ of synchronize_rcu(). This reduces latency,
+ but can increase CPU utilization, degrade
+ real-time latency, and degrade energy efficiency.
+
+ rcupdate.rcu_cpu_stall_suppress= [KNL]
+ Suppress RCU CPU stall warning messages.
+
+ rcupdate.rcu_cpu_stall_timeout= [KNL]
+ Set timeout for RCU CPU stall warning messages.
+
+ rcupdate.rcu_task_stall_timeout= [KNL]
+ Set timeout in jiffies for RCU task stall warning
+ messages. Disable with a value less than or equal
+ to zero.
+
+ rcupdate.rcu_self_test= [KNL]
+ Run the RCU early boot self tests
+
+ rcupdate.rcu_self_test_bh= [KNL]
+ Run the RCU bh early boot self tests
+
+ rcupdate.rcu_self_test_sched= [KNL]
+ Run the RCU sched early boot self tests
+
+ rdinit= [KNL]
+ Format: <full_path>
+ Run specified binary instead of /init from the ramdisk,
+ used for early userspace startup. See initrd.
+
+ reboot= [KNL]
+ Format (x86 or x86_64):
+ [w[arm] | c[old] | h[ard] | s[oft] | g[pio]] \
+ [[,]s[mp]#### \
+ [[,]b[ios] | a[cpi] | k[bd] | t[riple] | e[fi] | p[ci]] \
+ [[,]f[orce]
+ Where reboot_mode is one of warm (soft) or cold (hard) or gpio,
+ reboot_type is one of bios, acpi, kbd, triple, efi, or pci,
+ reboot_force is either force or not specified,
+ reboot_cpu is s[mp]#### with #### being the processor
+ to be used for rebooting.
+
+ relax_domain_level=
+ [KNL, SMP] Set scheduler's default relax_domain_level.
+ See Documentation/cgroups/cpusets.txt.
+
+ relative_sleep_states=
+ [SUSPEND] Use sleep state labeling where the deepest
+ state available other than hibernation is always "mem".
+ Format: { "0" | "1" }
+ 0 -- Traditional sleep state labels.
+ 1 -- Relative sleep state labels.
+
+ reserve= [KNL,BUGS] Force the kernel to ignore some iomem area
+
+ reservetop= [X86-32]
+ Format: nn[KMG]
+ Reserves a hole at the top of the kernel virtual
+ address space.
+
+ reservelow= [X86]
+ Format: nn[K]
+ Set the amount of memory to reserve for BIOS at
+ the bottom of the address space.
+
+ reset_devices [KNL] Force drivers to reset the underlying device
+ during initialization.
+
+ resume= [SWSUSP]
+ Specify the partition device for software suspend
+ Format:
+ {/dev/<dev> | PARTUUID=<uuid> | <int>:<int> | <hex>}
+
+ resume_offset= [SWSUSP]
+ Specify the offset from the beginning of the partition
+ given by "resume=" at which the swap header is located,
+ in <PAGE_SIZE> units (needed only for swap files).
+ See Documentation/power/swsusp-and-swap-files.txt
+
+ resumedelay= [HIBERNATION] Delay (in seconds) to pause before attempting to
+ read the resume files
+
+ resumewait [HIBERNATION] Wait (indefinitely) for resume device to show up.
+ Useful for devices that are detected asynchronously
+ (e.g. USB and MMC devices).
+
+ hibernate= [HIBERNATION]
+ noresume Don't check if there's a hibernation image
+ present during boot.
+ nocompress Don't compress/decompress hibernation images.
+ no Disable hibernation and resume.
+
+ retain_initrd [RAM] Keep initrd memory after extraction
+
+ rfkill.default_state=
+ 0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
+ etc. communication is blocked by default.
+ 1 Unblocked.
+
+ rfkill.master_switch_mode=
+ 0 The "airplane mode" button does nothing.
+ 1 The "airplane mode" button toggles between everything
+ blocked and the previous configuration.
+ 2 The "airplane mode" button toggles between everything
+ blocked and everything unblocked.
+
+ rhash_entries= [KNL,NET]
+ Set number of hash buckets for route cache
+
+ ro [KNL] Mount root device read-only on boot
+
+ root= [KNL] Root filesystem
+ See name_to_dev_t comment in init/do_mounts.c.
+
+ rootdelay= [KNL] Delay (in seconds) to pause before attempting to
+ mount the root filesystem
+
+ rootflags= [KNL] Set root filesystem mount option string
+
+ rootfstype= [KNL] Set root filesystem type
+
+ rootwait [KNL] Wait (indefinitely) for root device to show up.
+ Useful for devices that are detected asynchronously
+ (e.g. USB and MMC devices).
+
+ rproc_mem=nn[KMG][@address]
+ [KNL,ARM,CMA] Remoteproc physical memory block.
+ Memory area to be used by remote processor image,
+ managed by CMA.
+
+ rw [KNL] Mount root device read-write on boot
+
+ S [KNL] Run init in single mode
+
+ s390_iommu= [HW,S390]
+ Set s390 IOTLB flushing mode
+ strict
+ With strict flushing every unmap operation will result in
+ an IOTLB flush. Default is lazy flushing before reuse,
+ which is faster.
+
+ sa1100ir [NET]
+ See drivers/net/irda/sa1100_ir.c.
+
+ sbni= [NET] Granch SBNI12 leased line adapter
+
+ sched_debug [KNL] Enables verbose scheduler debug messages.
+
+ skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate
+ xtime_lock contention on larger systems, and/or RCU lock
+ contention on all systems with CONFIG_MAXSMP set.
+ Format: { "0" | "1" }
+ 0 -- disable. (may be 1 via CONFIG_CMDLINE="skew_tick=1"
+ 1 -- enable.
+ Note: increases power consumption, thus should only be
+ enabled if running jitter sensitive (HPC/RT) workloads.
+
+ security= [SECURITY] Choose a security module to enable at boot.
+ If this boot parameter is not specified, only the first
+ security module asking for security registration will be
+ loaded. An invalid security module name will be treated
+ as if no module has been chosen.
+
+ selinux= [SELINUX] Disable or enable SELinux at boot time.
+ Format: { "0" | "1" }
+ See security/selinux/Kconfig help text.
+ 0 -- disable.
+ 1 -- enable.
+ Default value is set via kernel config option.
+ If enabled at boot time, /selinux/disable can be used
+ later to disable prior to initial policy load.
+
+ apparmor= [APPARMOR] Disable or enable AppArmor at boot time
+ Format: { "0" | "1" }
+ See security/apparmor/Kconfig help text
+ 0 -- disable.
+ 1 -- enable.
+ Default value is set via kernel config option.
+
+ serialnumber [BUGS=X86-32]
+
+ shapers= [NET]
+ Maximal number of shapers.
+
+ show_msr= [x86] show boot-time MSR settings
+ Format: { <integer> }
+ Show boot-time (BIOS-initialized) MSR settings.
+ The parameter means the number of CPUs to show,
+ for example 1 means boot CPU only.
+
+ simeth= [IA-64]
+ simscsi=
+
+ slram= [HW,MTD]
+
+ slab_nomerge [MM]
+ Disable merging of slabs with similar size. May be
+ necessary if there is some reason to distinguish
+ allocs to different slabs. Debug options disable
+ merging on their own.
+ For more information see Documentation/vm/slub.txt.
+
+ slab_max_order= [MM, SLAB]
+ Determines the maximum allowed order for slabs.
+ A high setting may cause OOMs due to memory
+ fragmentation. Defaults to 1 for systems with
+ more than 32MB of RAM, 0 otherwise.
+
+ slub_debug[=options[,slabs]] [MM, SLUB]
+ Enabling slub_debug allows one to determine the
+ culprit if slab objects become corrupted. Enabling
+ slub_debug can create guard zones around objects and
+ may poison objects when not in use. Also tracks the
+ last alloc / free. For more information see
+ Documentation/vm/slub.txt.
+
+ slub_max_order= [MM, SLUB]
+ Determines the maximum allowed order for slabs.
+ A high setting may cause OOMs due to memory
+ fragmentation. For more information see
+ Documentation/vm/slub.txt.
+
+ slub_min_objects= [MM, SLUB]
+ The minimum number of objects per slab. SLUB will
+ increase the slab order up to slub_max_order to
+ generate a sufficiently large slab able to contain
+ the number of objects indicated. The higher the number
+ of objects the smaller the overhead of tracking slabs
+ and the less frequently locks need to be acquired.
+ For more information see Documentation/vm/slub.txt.
+
+ slub_min_order= [MM, SLUB]
+ Determines the minimum page order for slabs. Must be
+ lower than slub_max_order.
+ For more information see Documentation/vm/slub.txt.
+
+ slub_nomerge [MM, SLUB]
+ Same with slab_nomerge. This is supported for legacy.
+ See slab_nomerge for more information.
+
+ smart2= [HW]
+ Format: <io1>[,<io2>[,...,<io8>]]
+
+ smsc-ircc2.nopnp [HW] Don't use PNP to discover SMC devices
+ smsc-ircc2.ircc_cfg= [HW] Device configuration I/O port
+ smsc-ircc2.ircc_sir= [HW] SIR base I/O port
+ smsc-ircc2.ircc_fir= [HW] FIR base I/O port
+ smsc-ircc2.ircc_irq= [HW] IRQ line
+ smsc-ircc2.ircc_dma= [HW] DMA channel
+ smsc-ircc2.ircc_transceiver= [HW] Transceiver type:
+ 0: Toshiba Satellite 1800 (GP data pin select)
+ 1: Fast pin select (default)
+ 2: ATC IRMode
+
+ softlockup_panic=
+ [KNL] Should the soft-lockup detector generate panics.
+ Format: <integer>
+
+ softlockup_all_cpu_backtrace=
+ [KNL] Should the soft-lockup detector generate
+ backtraces on all cpus.
+ Format: <integer>
+
+ sonypi.*= [HW] Sony Programmable I/O Control Device driver
+ See Documentation/laptops/sonypi.txt
+
+ spia_io_base= [HW,MTD]
+ spia_fio_base=
+ spia_pedr=
+ spia_peddr=
+
+ stacktrace [FTRACE]
+ Enabled the stack tracer on boot up.
+
+ stacktrace_filter=[function-list]
+ [FTRACE] Limit the functions that the stack tracer
+ will trace at boot up. function-list is a comma separated
+ list of functions. This list can be changed at run
+ time by the stack_trace_filter file in the debugfs
+ tracing directory. Note, this enables stack tracing
+ and the stacktrace above is not needed.
+
+ sti= [PARISC,HW]
+ Format: <num>
+ Set the STI (builtin display/keyboard on the HP-PARISC
+ machines) console (graphic card) which should be used
+ as the initial boot-console.
+ See also comment in drivers/video/console/sticore.c.
+
+ sti_font= [HW]
+ See comment in drivers/video/console/sticore.c.
+
+ stifb= [HW]
+ Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
+
+ sunrpc.min_resvport=
+ sunrpc.max_resvport=
+ [NFS,SUNRPC]
+ SunRPC servers often require that client requests
+ originate from a privileged port (i.e. a port in the
+ range 0 < portnr < 1024).
+ An administrator who wishes to reserve some of these
+ ports for other uses may adjust the range that the
+ kernel's sunrpc client considers to be privileged
+ using these two parameters to set the minimum and
+ maximum port values.
+
+ sunrpc.pool_mode=
+ [NFS]
+ Control how the NFS server code allocates CPUs to
+ service thread pools. Depending on how many NICs
+ you have and where their interrupts are bound, this
+ option will affect which CPUs will do NFS serving.
+ Note: this parameter cannot be changed while the
+ NFS server is running.
+
+ auto the server chooses an appropriate mode
+ automatically using heuristics
+ global a single global pool contains all CPUs
+ percpu one pool for each CPU
+ pernode one pool for each NUMA node (equivalent
+ to global on non-NUMA machines)
+
+ sunrpc.tcp_slot_table_entries=
+ sunrpc.udp_slot_table_entries=
+ [NFS,SUNRPC]
+ Sets the upper limit on the number of simultaneous
+ RPC calls that can be sent from the client to a
+ server. Increasing these values may allow you to
+ improve throughput, but will also increase the
+ amount of memory reserved for use by the client.
+
+ suspend.pm_test_delay=
+ [SUSPEND]
+ Sets the number of seconds to remain in a suspend test
+ mode before resuming the system (see
+ /sys/power/pm_test). Only available when CONFIG_PM_DEBUG
+ is set. Default value is 5.
+
+ swapaccount=[0|1]
+ [KNL] Enable accounting of swap in memory resource
+ controller if no parameter or 1 is given or disable
+ it if 0 is given (See Documentation/cgroups/memory.txt)
+
+ swiotlb= [ARM,IA-64,PPC,MIPS,X86]
+ Format: { <int> | force }
+ <int> -- Number of I/O TLB slabs
+ force -- force using of bounce buffers even if they
+ wouldn't be automatically used by the kernel
+
+ switches= [HW,M68k]
+
+ sysfs.deprecated=0|1 [KNL]
+ Enable/disable old style sysfs layout for old udev
+ on older distributions. When this option is enabled
+ very new udev will not work anymore. When this option
+ is disabled (or CONFIG_SYSFS_DEPRECATED not compiled)
+ in older udev will not work anymore.
+ Default depends on CONFIG_SYSFS_DEPRECATED_V2 set in
+ the kernel configuration.
+
+ sysrq_always_enabled
+ [KNL]
+ Ignore sysrq setting - this boot parameter will
+ neutralize any effect of /proc/sys/kernel/sysrq.
+ Useful for debugging.
+
+ tcpmhash_entries= [KNL,NET]
+ Set the number of tcp_metrics_hash slots.
+ Default value is 8192 or 16384 depending on total
+ ram pages. This is used to specify the TCP metrics
+ cache size. See Documentation/networking/ip-sysctl.txt
+ "tcp_no_metrics_save" section for more details.
+
+ tdfx= [HW,DRM]
+
+ test_suspend= [SUSPEND][,N]
+ Specify "mem" (for Suspend-to-RAM) or "standby" (for
+ standby suspend) or "freeze" (for suspend type freeze)
+ as the system sleep state during system startup with
+ the optional capability to repeat N number of times.
+ The system is woken from this state using a
+ wakeup-capable RTC alarm.
+
+ thash_entries= [KNL,NET]
+ Set number of hash buckets for TCP connection
+
+ thermal.act= [HW,ACPI]
+ -1: disable all active trip points in all thermal zones
+ <degrees C>: override all lowest active trip points
+
+ thermal.crt= [HW,ACPI]
+ -1: disable all critical trip points in all thermal zones
+ <degrees C>: override all critical trip points
+
+ thermal.nocrt= [HW,ACPI]
+ Set to disable actions on ACPI thermal zone
+ critical and hot trip points.
+
+ thermal.off= [HW,ACPI]
+ 1: disable ACPI thermal control
+
+ thermal.psv= [HW,ACPI]
+ -1: disable all passive trip points
+ <degrees C>: override all passive trip points to this
+ value
+
+ thermal.tzp= [HW,ACPI]
+ Specify global default ACPI thermal zone polling rate
+ <deci-seconds>: poll all this frequency
+ 0: no polling (default)
+
+ threadirqs [KNL]
+ Force threading of all interrupt handlers except those
+ marked explicitly IRQF_NO_THREAD.
+
+ tmem [KNL,XEN]
+ Enable the Transcendent memory driver if built-in.
+
+ tmem.cleancache=0|1 [KNL, XEN]
+ Default is on (1). Disable the usage of the cleancache
+ API to send anonymous pages to the hypervisor.
+
+ tmem.frontswap=0|1 [KNL, XEN]
+ Default is on (1). Disable the usage of the frontswap
+ API to send swap pages to the hypervisor. If disabled
+ the selfballooning and selfshrinking are force disabled.
+
+ tmem.selfballooning=0|1 [KNL, XEN]
+ Default is on (1). Disable the driving of swap pages
+ to the hypervisor.
+
+ tmem.selfshrinking=0|1 [KNL, XEN]
+ Default is on (1). Partial swapoff that immediately
+ transfers pages from Xen hypervisor back to the
+ kernel based on different criteria.
+
+ topology= [S390]
+ Format: {off | on}
+ Specify if the kernel should make use of the cpu
+ topology information if the hardware supports this.
+ The scheduler will make use of this information and
+ e.g. base its process migration decisions on it.
+ Default is on.
+
+ topology_updates= [KNL, PPC, NUMA]
+ Format: {off}
+ Specify if the kernel should ignore (off)
+ topology updates sent by the hypervisor to this
+ LPAR.
+
+ tp720= [HW,PS2]
+
+ tpm_suspend_pcr=[HW,TPM]
+ Format: integer pcr id
+ Specify that at suspend time, the tpm driver
+ should extend the specified pcr with zeros,
+ as a workaround for some chips which fail to
+ flush the last written pcr on TPM_SaveState.
+ This will guarantee that all the other pcrs
+ are saved.
+
+ trace_buf_size=nn[KMG]
+ [FTRACE] will set tracing buffer size on each cpu.
+
+ trace_event=[event-list]
+ [FTRACE] Set and start specified trace events in order
+ to facilitate early boot debugging.
+ See also Documentation/trace/events.txt
+
+ trace_options=[option-list]
+ [FTRACE] Enable or disable tracer options at boot.
+ The option-list is a comma delimited list of options
+ that can be enabled or disabled just as if you were
+ to echo the option name into
+
+ /sys/kernel/debug/tracing/trace_options
+
+ For example, to enable stacktrace option (to dump the
+ stack trace of each event), add to the command line:
+
+ trace_options=stacktrace
+
+ See also Documentation/trace/ftrace.txt "trace options"
+ section.
+
+ tp_printk[FTRACE]
+ Have the tracepoints sent to printk as well as the
+ tracing ring buffer. This is useful for early boot up
+ where the system hangs or reboots and does not give the
+ option for reading the tracing buffer or performing a
+ ftrace_dump_on_oops.
+
+ To turn off having tracepoints sent to printk,
+ echo 0 > /proc/sys/kernel/tracepoint_printk
+ Note, echoing 1 into this file without the
+ tracepoint_printk kernel cmdline option has no effect.
+
+ ** CAUTION **
+
+ Having tracepoints sent to printk() and activating high
+ frequency tracepoints such as irq or sched, can cause
+ the system to live lock.
+
+ traceoff_on_warning
+ [FTRACE] enable this option to disable tracing when a
+ warning is hit. This turns off "tracing_on". Tracing can
+ be enabled again by echoing '1' into the "tracing_on"
+ file located in /sys/kernel/debug/tracing/
+
+ This option is useful, as it disables the trace before
+ the WARNING dump is called, which prevents the trace to
+ be filled with content caused by the warning output.
+
+ This option can also be set at run time via the sysctl
+ option: kernel/traceoff_on_warning
+
+ transparent_hugepage=
+ [KNL]
+ Format: [always|madvise|never]
+ Can be used to control the default behavior of the system
+ with respect to transparent hugepages.
+ See Documentation/vm/transhuge.txt for more details.
+
+ tsc= Disable clocksource stability checks for TSC.
+ Format: <string>
+ [x86] reliable: mark tsc clocksource as reliable, this
+ disables clocksource verification at runtime, as well
+ as the stability checks done at bootup. Used to enable
+ high-resolution timer mode on older hardware, and in
+ virtualized environment.
+ [x86] noirqtime: Do not use TSC to do irq accounting.
+ Used to run time disable IRQ_TIME_ACCOUNTING on any
+ platforms where RDTSC is slow and this accounting
+ can add overhead.
+
+ turbografx.map[2|3]= [HW,JOY]
+ TurboGraFX parallel port interface
+ Format:
+ <port#>,<js1>,<js2>,<js3>,<js4>,<js5>,<js6>,<js7>
+ See also Documentation/input/joystick-parport.txt
+
+ udbg-immortal [PPC] When debugging early kernel crashes that
+ happen after console_init() and before a proper
+ console driver takes over, this boot options might
+ help "seeing" what's going on.
+
+ uhash_entries= [KNL,NET]
+ Set number of hash buckets for UDP/UDP-Lite connections
+
+ uhci-hcd.ignore_oc=
+ [USB] Ignore overcurrent events (default N).
+ Some badly-designed motherboards generate lots of
+ bogus events, for ports that aren't wired to
+ anything. Set this parameter to avoid log spamming.
+ Note that genuine overcurrent events won't be
+ reported either.
+
+ unknown_nmi_panic
+ [X86] Cause panic on unknown NMI.
+
+ usbcore.authorized_default=
+ [USB] Default USB device authorization:
+ (default -1 = authorized except for wireless USB,
+ 0 = not authorized, 1 = authorized)
+
+ usbcore.autosuspend=
+ [USB] The autosuspend time delay (in seconds) used
+ for newly-detected USB devices (default 2). This
+ is the time required before an idle device will be
+ autosuspended. Devices for which the delay is set
+ to a negative value won't be autosuspended at all.
+
+ usbcore.usbfs_snoop=
+ [USB] Set to log all usbfs traffic (default 0 = off).
+
+ usbcore.blinkenlights=
+ [USB] Set to cycle leds on hubs (default 0 = off).
+
+ usbcore.old_scheme_first=
+ [USB] Start with the old device initialization
+ scheme (default 0 = off).
+
+ usbcore.usbfs_memory_mb=
+ [USB] Memory limit (in MB) for buffers allocated by
+ usbfs (default = 16, 0 = max = 2047).
+
+ usbcore.use_both_schemes=
+ [USB] Try the other device initialization scheme
+ if the first one fails (default 1 = enabled).
+
+ usbcore.initial_descriptor_timeout=
+ [USB] Specifies timeout for the initial 64-byte
+ USB_REQ_GET_DESCRIPTOR request in milliseconds
+ (default 5000 = 5.0 seconds).
+
+ usbhid.mousepoll=
+ [USBHID] The interval which mice are to be polled at.
+
+ usb-storage.delay_use=
+ [UMS] The delay in seconds before a new device is
+ scanned for Logical Units (default 1).
+
+ usb-storage.quirks=
+ [UMS] A list of quirks entries to supplement or
+ override the built-in unusual_devs list. List
+ entries are separated by commas. Each entry has
+ the form VID:PID:Flags where VID and PID are Vendor
+ and Product ID values (4-digit hex numbers) and
+ Flags is a set of characters, each corresponding
+ to a common usb-storage quirk flag as follows:
+ a = SANE_SENSE (collect more than 18 bytes
+ of sense data);
+ b = BAD_SENSE (don't collect more than 18
+ bytes of sense data);
+ c = FIX_CAPACITY (decrease the reported
+ device capacity by one sector);
+ d = NO_READ_DISC_INFO (don't use
+ READ_DISC_INFO command);
+ e = NO_READ_CAPACITY_16 (don't use
+ READ_CAPACITY_16 command);
+ f = NO_REPORT_OPCODES (don't use report opcodes
+ command, uas only);
+ g = MAX_SECTORS_240 (don't transfer more than
+ 240 sectors at a time, uas only);
+ h = CAPACITY_HEURISTICS (decrease the
+ reported device capacity by one
+ sector if the number is odd);
+ i = IGNORE_DEVICE (don't bind to this
+ device);
+ l = NOT_LOCKABLE (don't try to lock and
+ unlock ejectable media);
+ m = MAX_SECTORS_64 (don't transfer more
+ than 64 sectors = 32 KB at a time);
+ n = INITIAL_READ10 (force a retry of the
+ initial READ(10) command);
+ o = CAPACITY_OK (accept the capacity
+ reported by the device);
+ p = WRITE_CACHE (the device cache is ON
+ by default);
+ r = IGNORE_RESIDUE (the device reports
+ bogus residue values);
+ s = SINGLE_LUN (the device has only one
+ Logical Unit);
+ t = NO_ATA_1X (don't allow ATA(12) and ATA(16)
+ commands, uas only);
+ u = IGNORE_UAS (don't bind to the uas driver);
+ w = NO_WP_DETECT (don't test whether the
+ medium is write-protected).
+ Example: quirks=0419:aaf5:rl,0421:0433:rc
+
+ user_debug= [KNL,ARM]
+ Format: <int>
+ See arch/arm/Kconfig.debug help text.
+ 1 - undefined instruction events
+ 2 - system calls
+ 4 - invalid data aborts
+ 8 - SIGSEGV faults
+ 16 - SIGBUS faults
+ Example: user_debug=31
+
+ userpte=
+ [X86] Flags controlling user PTE allocations.
+
+ nohigh = do not allocate PTE pages in
+ HIGHMEM regardless of setting
+ of CONFIG_HIGHPTE.
+
+ uuid_debug= (Boolean) whether to enable debugging of TuxOnIce's
+ uuid support.
+
+ vdso= [X86,SH]
+ On X86_32, this is an alias for vdso32=. Otherwise:
+
+ vdso=1: enable VDSO (the default)
+ vdso=0: disable VDSO mapping
+
+ vdso32= [X86] Control the 32-bit vDSO
+ vdso32=1: enable 32-bit VDSO
+ vdso32=0 or vdso32=2: disable 32-bit VDSO
+
+ See the help text for CONFIG_COMPAT_VDSO for more
+ details. If CONFIG_COMPAT_VDSO is set, the default is
+ vdso32=0; otherwise, the default is vdso32=1.
+
+ For compatibility with older kernels, vdso32=2 is an
+ alias for vdso32=0.
+
+ Try vdso32=0 if you encounter an error that says:
+ dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed!
+
+ vector= [IA-64,SMP]
+ vector=percpu: enable percpu vector domain
+
+ video= [FB] Frame buffer configuration
+ See Documentation/fb/modedb.txt.
+
+ video.brightness_switch_enabled= [0,1]
+ If set to 1, on receiving an ACPI notify event
+ generated by hotkey, video driver will adjust brightness
+ level and then send out the event to user space through
+ the allocated input device; If set to 0, video driver
+ will only send out the event without touching backlight
+ brightness level.
+ default: 1
+
+ virtio_mmio.device=
+ [VMMIO] Memory mapped virtio (platform) device.
+
+ <size>@<baseaddr>:<irq>[:<id>]
+ where:
+ <size> := size (can use standard suffixes
+ like K, M and G)
+ <baseaddr> := physical base address
+ <irq> := interrupt number (as passed to
+ request_irq())
+ <id> := (optional) platform device id
+ example:
+ virtio_mmio.device=1K@0x100b0000:48:7
+
+ Can be used multiple times for multiple devices.
+
+ vga= [BOOT,X86-32] Select a particular video mode
+ See Documentation/x86/boot.txt and
+ Documentation/svga.txt.
+ Use vga=ask for menu.
+ This is actually a boot loader parameter; the value is
+ passed to the kernel using a special protocol.
+
+ vmalloc=nn[KMG] [KNL,BOOT] Forces the vmalloc area to have an exact
+ size of <nn>. This can be used to increase the
+ minimum size (128MB on x86). It can also be used to
+ decrease the size and leave more room for directly
+ mapped kernel RAM.
+
+ vmhalt= [KNL,S390] Perform z/VM CP command after system halt.
+ Format: <command>
+
+ vmpanic= [KNL,S390] Perform z/VM CP command after kernel panic.
+ Format: <command>
+
+ vmpoff= [KNL,S390] Perform z/VM CP command after power off.
+ Format: <command>
+
+ vsyscall= [X86-64]
+ Controls the behavior of vsyscalls (i.e. calls to
+ fixed addresses of 0xffffffffff600x00 from legacy
+ code). Most statically-linked binaries and older
+ versions of glibc use these calls. Because these
+ functions are at fixed addresses, they make nice
+ targets for exploits that can control RIP.
+
+ emulate [default] Vsyscalls turn into traps and are
+ emulated reasonably safely.
+
+ native Vsyscalls are native syscall instructions.
+ This is a little bit faster than trapping
+ and makes a few dynamic recompilers work
+ better than they would in emulation mode.
+ It also makes exploits much easier to write.
+
+ none Vsyscalls don't work at all. This makes
+ them quite hard to use for exploits but
+ might break your system.
+
+ vt.color= [VT] Default text color.
+ Format: 0xYX, X = foreground, Y = background.
+ Default: 0x07 = light gray on black.
+
+ vt.cur_default= [VT] Default cursor shape.
+ Format: 0xCCBBAA, where AA, BB, and CC are the same as
+ the parameters of the <Esc>[?A;B;Cc escape sequence;
+ see VGA-softcursor.txt. Default: 2 = underline.
+
+ vt.default_blu= [VT]
+ Format: <blue0>,<blue1>,<blue2>,...,<blue15>
+ Change the default blue palette of the console.
+ This is a 16-member array composed of values
+ ranging from 0-255.
+
+ vt.default_grn= [VT]
+ Format: <green0>,<green1>,<green2>,...,<green15>
+ Change the default green palette of the console.
+ This is a 16-member array composed of values
+ ranging from 0-255.
+
+ vt.default_red= [VT]
+ Format: <red0>,<red1>,<red2>,...,<red15>
+ Change the default red palette of the console.
+ This is a 16-member array composed of values
+ ranging from 0-255.
+
+ vt.default_utf8=
+ [VT]
+ Format=<0|1>
+ Set system-wide default UTF-8 mode for all tty's.
+ Default is 1, i.e. UTF-8 mode is enabled for all
+ newly opened terminals.
+
+ vt.global_cursor_default=
+ [VT]
+ Format=<-1|0|1>
+ Set system-wide default for whether a cursor
+ is shown on new VTs. Default is -1,
+ i.e. cursors will be created by default unless
+ overridden by individual drivers. 0 will hide
+ cursors, 1 will display them.
+
+ vt.italic= [VT] Default color for italic text; 0-15.
+ Default: 2 = green.
+
+ vt.underline= [VT] Default color for underlined text; 0-15.
+ Default: 3 = cyan.
+
+ watchdog timers [HW,WDT] For information on watchdog timers,
+ see Documentation/watchdog/watchdog-parameters.txt
+ or other driver-specific files in the
+ Documentation/watchdog/ directory.
+
+ workqueue.disable_numa
+ By default, all work items queued to unbound
+ workqueues are affine to the NUMA nodes they're
+ issued on, which results in better behavior in
+ general. If NUMA affinity needs to be disabled for
+ whatever reason, this option can be used. Note
+ that this also can be controlled per-workqueue for
+ workqueues visible under /sys/bus/workqueue/.
+
+ workqueue.power_efficient
+ Per-cpu workqueues are generally preferred because
+ they show better performance thanks to cache
+ locality; unfortunately, per-cpu workqueues tend to
+ be more power hungry than unbound workqueues.
+
+ Enabling this makes the per-cpu workqueues which
+ were observed to contribute significantly to power
+ consumption unbound, leading to measurably lower
+ power usage at the cost of small performance
+ overhead.
+
+ The default value of this parameter is determined by
+ the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+
+ x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
+ default x2apic cluster mode on platforms
+ supporting x2apic.
+
+ x86_intel_mid_timer= [X86-32,APBT]
+ Choose timer option for x86 Intel MID platform.
+ Two valid options are apbt timer only and lapic timer
+ plus one apbt timer for broadcast timer.
+ x86_intel_mid_timer=apbt_only | lapic_and_apbt
+
+ xen_emul_unplug= [HW,X86,XEN]
+ Unplug Xen emulated devices
+ Format: [unplug0,][unplug1]
+ ide-disks -- unplug primary master IDE devices
+ aux-ide-disks -- unplug non-primary-master IDE devices
+ nics -- unplug network devices
+ all -- unplug all emulated devices (NICs and IDE disks)
+ unnecessary -- unplugging emulated devices is
+ unnecessary even if the host did not respond to
+ the unplug protocol
+ never -- do not unplug even if version check succeeds
+
+ xen_nopvspin [X86,XEN]
+ Disables the ticketlock slowpath using Xen PV
+ optimizations.
+
+ xen_nopv [X86]
+ Disables the PV optimizations forcing the HVM guest to
+ run as generic HVM guest with no PV drivers.
+
+ xirc2ps_cs= [NET,PCMCIA]
+ Format:
+ <irq>,<irq_mask>,<io>,<full_duplex>,<do_sound>,<lockup_hack>[,<irq2>[,<irq3>[,<irq4>]]]
+
+______________________________________________________________________
+
+TODO:
+
+ Add more DRM drivers.
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
new file mode 100644
index 000000000..f4cbfe0ba
--- /dev/null
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -0,0 +1,273 @@
+REDUCING OS JITTER DUE TO PER-CPU KTHREADS
+
+This document lists per-CPU kthreads in the Linux kernel and presents
+options to control their OS jitter. Note that non-per-CPU kthreads are
+not listed here. To reduce OS jitter from non-per-CPU kthreads, bind
+them to a "housekeeping" CPU dedicated to such work.
+
+
+REFERENCES
+
+o Documentation/IRQ-affinity.txt: Binding interrupts to sets of CPUs.
+
+o Documentation/cgroups: Using cgroups to bind tasks to sets of CPUs.
+
+o man taskset: Using the taskset command to bind tasks to sets
+ of CPUs.
+
+o man sched_setaffinity: Using the sched_setaffinity() system
+ call to bind tasks to sets of CPUs.
+
+o /sys/devices/system/cpu/cpuN/online: Control CPU N's hotplug state,
+ writing "0" to offline and "1" to online.
+
+o In order to locate kernel-generated OS jitter on CPU N:
+
+ cd /sys/kernel/debug/tracing
+ echo 1 > max_graph_depth # Increase the "1" for more detail
+ echo function_graph > current_tracer
+ # run workload
+ cat per_cpu/cpuN/trace
+
+
+KTHREADS
+
+Name: ehca_comp/%u
+Purpose: Periodically process Infiniband-related work.
+To reduce its OS jitter, do any of the following:
+1. Don't use eHCA Infiniband hardware, instead choosing hardware
+ that does not require per-CPU kthreads. This will prevent these
+ kthreads from being created in the first place. (This will
+ work for most people, as this hardware, though important, is
+ relatively old and is produced in relatively low unit volumes.)
+2. Do all eHCA-Infiniband-related work on other CPUs, including
+ interrupts.
+3. Rework the eHCA driver so that its per-CPU kthreads are
+ provisioned only on selected CPUs.
+
+
+Name: irq/%d-%s
+Purpose: Handle threaded interrupts.
+To reduce its OS jitter, do the following:
+1. Use irq affinity to force the irq threads to execute on
+ some other CPU.
+
+Name: kcmtpd_ctr_%d
+Purpose: Handle Bluetooth work.
+To reduce its OS jitter, do one of the following:
+1. Don't use Bluetooth, in which case these kthreads won't be
+ created in the first place.
+2. Use irq affinity to force Bluetooth-related interrupts to
+ occur on some other CPU and furthermore initiate all
+ Bluetooth activity on some other CPU.
+
+Name: ksoftirqd/%u
+Purpose: Execute softirq handlers when threaded or when under heavy load.
+To reduce its OS jitter, each softirq vector must be handled
+separately as follows:
+TIMER_SOFTIRQ: Do all of the following:
+1. To the extent possible, keep the CPU out of the kernel when it
+ is non-idle, for example, by avoiding system calls and by forcing
+ both kernel threads and interrupts to execute elsewhere.
+2. Build with CONFIG_HOTPLUG_CPU=y. After boot completes, force
+ the CPU offline, then bring it back online. This forces
+ recurring timers to migrate elsewhere. If you are concerned
+ with multiple CPUs, force them all offline before bringing the
+ first one back online. Once you have onlined the CPUs in question,
+ do not offline any other CPUs, because doing so could force the
+ timer back onto one of the CPUs in question.
+NET_TX_SOFTIRQ and NET_RX_SOFTIRQ: Do all of the following:
+1. Force networking interrupts onto other CPUs.
+2. Initiate any network I/O on other CPUs.
+3. Once your application has started, prevent CPU-hotplug operations
+ from being initiated from tasks that might run on the CPU to
+ be de-jittered. (It is OK to force this CPU offline and then
+ bring it back online before you start your application.)
+BLOCK_SOFTIRQ: Do all of the following:
+1. Force block-device interrupts onto some other CPU.
+2. Initiate any block I/O on other CPUs.
+3. Once your application has started, prevent CPU-hotplug operations
+ from being initiated from tasks that might run on the CPU to
+ be de-jittered. (It is OK to force this CPU offline and then
+ bring it back online before you start your application.)
+BLOCK_IOPOLL_SOFTIRQ: Do all of the following:
+1. Force block-device interrupts onto some other CPU.
+2. Initiate any block I/O and block-I/O polling on other CPUs.
+3. Once your application has started, prevent CPU-hotplug operations
+ from being initiated from tasks that might run on the CPU to
+ be de-jittered. (It is OK to force this CPU offline and then
+ bring it back online before you start your application.)
+TASKLET_SOFTIRQ: Do one or more of the following:
+1. Avoid use of drivers that use tasklets. (Such drivers will contain
+ calls to things like tasklet_schedule().)
+2. Convert all drivers that you must use from tasklets to workqueues.
+3. Force interrupts for drivers using tasklets onto other CPUs,
+ and also do I/O involving these drivers on other CPUs.
+SCHED_SOFTIRQ: Do all of the following:
+1. Avoid sending scheduler IPIs to the CPU to be de-jittered,
+ for example, ensure that at most one runnable kthread is present
+ on that CPU. If a thread that expects to run on the de-jittered
+ CPU awakens, the scheduler will send an IPI that can result in
+ a subsequent SCHED_SOFTIRQ.
+2. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y,
+ CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU
+ to be de-jittered is marked as an adaptive-ticks CPU using the
+ "nohz_full=" boot parameter. This reduces the number of
+ scheduler-clock interrupts that the de-jittered CPU receives,
+ minimizing its chances of being selected to do the load balancing
+ work that runs in SCHED_SOFTIRQ context.
+3. To the extent possible, keep the CPU out of the kernel when it
+ is non-idle, for example, by avoiding system calls and by
+ forcing both kernel threads and interrupts to execute elsewhere.
+ This further reduces the number of scheduler-clock interrupts
+ received by the de-jittered CPU.
+HRTIMER_SOFTIRQ: Do all of the following:
+1. To the extent possible, keep the CPU out of the kernel when it
+ is non-idle. For example, avoid system calls and force both
+ kernel threads and interrupts to execute elsewhere.
+2. Build with CONFIG_HOTPLUG_CPU=y. Once boot completes, force the
+ CPU offline, then bring it back online. This forces recurring
+ timers to migrate elsewhere. If you are concerned with multiple
+ CPUs, force them all offline before bringing the first one
+ back online. Once you have onlined the CPUs in question, do not
+ offline any other CPUs, because doing so could force the timer
+ back onto one of the CPUs in question.
+RCU_SOFTIRQ: Do at least one of the following:
+1. Offload callbacks and keep the CPU in either dyntick-idle or
+ adaptive-ticks state by doing all of the following:
+ a. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y,
+ CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU
+ to be de-jittered is marked as an adaptive-ticks CPU using
+ the "nohz_full=" boot parameter. Bind the rcuo kthreads
+ to housekeeping CPUs, which can tolerate OS jitter.
+ b. To the extent possible, keep the CPU out of the kernel
+ when it is non-idle, for example, by avoiding system
+ calls and by forcing both kernel threads and interrupts
+ to execute elsewhere.
+2. Enable RCU to do its processing remotely via dyntick-idle by
+ doing all of the following:
+ a. Build with CONFIG_NO_HZ=y and CONFIG_RCU_FAST_NO_HZ=y.
+ b. Ensure that the CPU goes idle frequently, allowing other
+ CPUs to detect that it has passed through an RCU quiescent
+ state. If the kernel is built with CONFIG_NO_HZ_FULL=y,
+ userspace execution also allows other CPUs to detect that
+ the CPU in question has passed through a quiescent state.
+ c. To the extent possible, keep the CPU out of the kernel
+ when it is non-idle, for example, by avoiding system
+ calls and by forcing both kernel threads and interrupts
+ to execute elsewhere.
+
+Name: kworker/%u:%d%s (cpu, id, priority)
+Purpose: Execute workqueue requests
+To reduce its OS jitter, do any of the following:
+1. Run your workload at a real-time priority, which will allow
+ preempting the kworker daemons.
+2. A given workqueue can be made visible in the sysfs filesystem
+ by passing the WQ_SYSFS to that workqueue's alloc_workqueue().
+ Such a workqueue can be confined to a given subset of the
+ CPUs using the /sys/devices/virtual/workqueue/*/cpumask sysfs
+ files. The set of WQ_SYSFS workqueues can be displayed using
+ "ls sys/devices/virtual/workqueue". That said, the workqueues
+ maintainer would like to caution people against indiscriminately
+ sprinkling WQ_SYSFS across all the workqueues. The reason for
+ caution is that it is easy to add WQ_SYSFS, but because sysfs is
+ part of the formal user/kernel API, it can be nearly impossible
+ to remove it, even if its addition was a mistake.
+3. Do any of the following needed to avoid jitter that your
+ application cannot tolerate:
+ a. Build your kernel with CONFIG_SLUB=y rather than
+ CONFIG_SLAB=y, thus avoiding the slab allocator's periodic
+ use of each CPU's workqueues to run its cache_reap()
+ function.
+ b. Avoid using oprofile, thus avoiding OS jitter from
+ wq_sync_buffer().
+ c. Limit your CPU frequency so that a CPU-frequency
+ governor is not required, possibly enlisting the aid of
+ special heatsinks or other cooling technologies. If done
+ correctly, and if you CPU architecture permits, you should
+ be able to build your kernel with CONFIG_CPU_FREQ=n to
+ avoid the CPU-frequency governor periodically running
+ on each CPU, including cs_dbs_timer() and od_dbs_timer().
+ WARNING: Please check your CPU specifications to
+ make sure that this is safe on your particular system.
+ d. As of v3.18, Christoph Lameter's on-demand vmstat workers
+ commit prevents OS jitter due to vmstat_update() on
+ CONFIG_SMP=y systems. Before v3.18, is not possible
+ to entirely get rid of the OS jitter, but you can
+ decrease its frequency by writing a large value to
+ /proc/sys/vm/stat_interval. The default value is HZ,
+ for an interval of one second. Of course, larger values
+ will make your virtual-memory statistics update more
+ slowly. Of course, you can also run your workload at
+ a real-time priority, thus preempting vmstat_update(),
+ but if your workload is CPU-bound, this is a bad idea.
+ However, there is an RFC patch from Christoph Lameter
+ (based on an earlier one from Gilad Ben-Yossef) that
+ reduces or even eliminates vmstat overhead for some
+ workloads at https://lkml.org/lkml/2013/9/4/379.
+ e. Boot with "elevator=noop" to avoid workqueue use by
+ the block layer.
+ f. If running on high-end powerpc servers, build with
+ CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS
+ daemon from running on each CPU every second or so.
+ (This will require editing Kconfig files and will defeat
+ this platform's RAS functionality.) This avoids jitter
+ due to the rtas_event_scan() function.
+ WARNING: Please check your CPU specifications to
+ make sure that this is safe on your particular system.
+ g. If running on Cell Processor, build your kernel with
+ CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
+ spu_gov_work().
+ WARNING: Please check your CPU specifications to
+ make sure that this is safe on your particular system.
+ h. If running on PowerMAC, build your kernel with
+ CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
+ avoiding OS jitter from rackmeter_do_timer().
+
+Name: rcuc/%u
+Purpose: Execute RCU callbacks in CONFIG_RCU_BOOST=y kernels.
+To reduce its OS jitter, do at least one of the following:
+1. Build the kernel with CONFIG_PREEMPT=n. This prevents these
+ kthreads from being created in the first place, and also obviates
+ the need for RCU priority boosting. This approach is feasible
+ for workloads that do not require high degrees of responsiveness.
+2. Build the kernel with CONFIG_RCU_BOOST=n. This prevents these
+ kthreads from being created in the first place. This approach
+ is feasible only if your workload never requires RCU priority
+ boosting, for example, if you ensure frequent idle time on all
+ CPUs that might execute within the kernel.
+3. Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y,
+ which offloads all RCU callbacks to kthreads that can be moved
+ off of CPUs susceptible to OS jitter. This approach prevents the
+ rcuc/%u kthreads from having any work to do, so that they are
+ never awakened.
+4. Ensure that the CPU never enters the kernel, and, in particular,
+ avoid initiating any CPU hotplug operations on this CPU. This is
+ another way of preventing any callbacks from being queued on the
+ CPU, again preventing the rcuc/%u kthreads from having any work
+ to do.
+
+Name: rcuob/%d, rcuop/%d, and rcuos/%d
+Purpose: Offload RCU callbacks from the corresponding CPU.
+To reduce its OS jitter, do at least one of the following:
+1. Use affinity, cgroups, or other mechanism to force these kthreads
+ to execute on some other CPU.
+2. Build with CONFIG_RCU_NOCB_CPU=n, which will prevent these
+ kthreads from being created in the first place. However, please
+ note that this will not eliminate OS jitter, but will instead
+ shift it to RCU_SOFTIRQ.
+
+Name: watchdog/%u
+Purpose: Detect software lockups on each CPU.
+To reduce its OS jitter, do at least one of the following:
+1. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these
+ kthreads from being created in the first place.
+2. Boot with "nosoftlockup=0", which will also prevent these kthreads
+ from being created. Other related watchdog and softlockup boot
+ parameters may be found in Documentation/kernel-parameters.txt
+ and Documentation/watchdog/watchdog-parameters.txt.
+3. Echo a zero to /proc/sys/kernel/watchdog to disable the
+ watchdog timer.
+4. Echo a large number of /proc/sys/kernel/watchdog_thresh in
+ order to reduce the frequency of OS jitter due to the watchdog
+ timer down to a level that is acceptable for your workload.
diff --git a/Documentation/kmemcheck.txt b/Documentation/kmemcheck.txt
new file mode 100644
index 000000000..80aae85d8
--- /dev/null
+++ b/Documentation/kmemcheck.txt
@@ -0,0 +1,754 @@
+GETTING STARTED WITH KMEMCHECK
+==============================
+
+Vegard Nossum <vegardno@ifi.uio.no>
+
+
+Contents
+========
+0. Introduction
+1. Downloading
+2. Configuring and compiling
+3. How to use
+3.1. Booting
+3.2. Run-time enable/disable
+3.3. Debugging
+3.4. Annotating false positives
+4. Reporting errors
+5. Technical description
+
+
+0. Introduction
+===============
+
+kmemcheck is a debugging feature for the Linux Kernel. More specifically, it
+is a dynamic checker that detects and warns about some uses of uninitialized
+memory.
+
+Userspace programmers might be familiar with Valgrind's memcheck. The main
+difference between memcheck and kmemcheck is that memcheck works for userspace
+programs only, and kmemcheck works for the kernel only. The implementations
+are of course vastly different. Because of this, kmemcheck is not as accurate
+as memcheck, but it turns out to be good enough in practice to discover real
+programmer errors that the compiler is not able to find through static
+analysis.
+
+Enabling kmemcheck on a kernel will probably slow it down to the extent that
+the machine will not be usable for normal workloads such as e.g. an
+interactive desktop. kmemcheck will also cause the kernel to use about twice
+as much memory as normal. For this reason, kmemcheck is strictly a debugging
+feature.
+
+
+1. Downloading
+==============
+
+As of version 2.6.31-rc1, kmemcheck is included in the mainline kernel.
+
+
+2. Configuring and compiling
+============================
+
+kmemcheck only works for the x86 (both 32- and 64-bit) platform. A number of
+configuration variables must have specific settings in order for the kmemcheck
+menu to even appear in "menuconfig". These are:
+
+ o CONFIG_CC_OPTIMIZE_FOR_SIZE=n
+
+ This option is located under "General setup" / "Optimize for size".
+
+ Without this, gcc will use certain optimizations that usually lead to
+ false positive warnings from kmemcheck. An example of this is a 16-bit
+ field in a struct, where gcc may load 32 bits, then discard the upper
+ 16 bits. kmemcheck sees only the 32-bit load, and may trigger a
+ warning for the upper 16 bits (if they're uninitialized).
+
+ o CONFIG_SLAB=y or CONFIG_SLUB=y
+
+ This option is located under "General setup" / "Choose SLAB
+ allocator".
+
+ o CONFIG_FUNCTION_TRACER=n
+
+ This option is located under "Kernel hacking" / "Tracers" / "Kernel
+ Function Tracer"
+
+ When function tracing is compiled in, gcc emits a call to another
+ function at the beginning of every function. This means that when the
+ page fault handler is called, the ftrace framework will be called
+ before kmemcheck has had a chance to handle the fault. If ftrace then
+ modifies memory that was tracked by kmemcheck, the result is an
+ endless recursive page fault.
+
+ o CONFIG_DEBUG_PAGEALLOC=n
+
+ This option is located under "Kernel hacking" / "Memory Debugging"
+ / "Debug page memory allocations".
+
+In addition, I highly recommend turning on CONFIG_DEBUG_INFO=y. This is also
+located under "Kernel hacking". With this, you will be able to get line number
+information from the kmemcheck warnings, which is extremely valuable in
+debugging a problem. This option is not mandatory, however, because it slows
+down the compilation process and produces a much bigger kernel image.
+
+Now the kmemcheck menu should be visible (under "Kernel hacking" / "Memory
+Debugging" / "kmemcheck: trap use of uninitialized memory"). Here follows
+a description of the kmemcheck configuration variables:
+
+ o CONFIG_KMEMCHECK
+
+ This must be enabled in order to use kmemcheck at all...
+
+ o CONFIG_KMEMCHECK_[DISABLED | ENABLED | ONESHOT]_BY_DEFAULT
+
+ This option controls the status of kmemcheck at boot-time. "Enabled"
+ will enable kmemcheck right from the start, "disabled" will boot the
+ kernel as normal (but with the kmemcheck code compiled in, so it can
+ be enabled at run-time after the kernel has booted), and "one-shot" is
+ a special mode which will turn kmemcheck off automatically after
+ detecting the first use of uninitialized memory.
+
+ If you are using kmemcheck to actively debug a problem, then you
+ probably want to choose "enabled" here.
+
+ The one-shot mode is mostly useful in automated test setups because it
+ can prevent floods of warnings and increase the chances of the machine
+ surviving in case something is really wrong. In other cases, the one-
+ shot mode could actually be counter-productive because it would turn
+ itself off at the very first error -- in the case of a false positive
+ too -- and this would come in the way of debugging the specific
+ problem you were interested in.
+
+ If you would like to use your kernel as normal, but with a chance to
+ enable kmemcheck in case of some problem, it might be a good idea to
+ choose "disabled" here. When kmemcheck is disabled, most of the run-
+ time overhead is not incurred, and the kernel will be almost as fast
+ as normal.
+
+ o CONFIG_KMEMCHECK_QUEUE_SIZE
+
+ Select the maximum number of error reports to store in an internal
+ (fixed-size) buffer. Since errors can occur virtually anywhere and in
+ any context, we need a temporary storage area which is guaranteed not
+ to generate any other page faults when accessed. The queue will be
+ emptied as soon as a tasklet may be scheduled. If the queue is full,
+ new error reports will be lost.
+
+ The default value of 64 is probably fine. If some code produces more
+ than 64 errors within an irqs-off section, then the code is likely to
+ produce many, many more, too, and these additional reports seldom give
+ any more information (the first report is usually the most valuable
+ anyway).
+
+ This number might have to be adjusted if you are not using serial
+ console or similar to capture the kernel log. If you are using the
+ "dmesg" command to save the log, then getting a lot of kmemcheck
+ warnings might overflow the kernel log itself, and the earlier reports
+ will get lost in that way instead. Try setting this to 10 or so on
+ such a setup.
+
+ o CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT
+
+ Select the number of shadow bytes to save along with each entry of the
+ error-report queue. These bytes indicate what parts of an allocation
+ are initialized, uninitialized, etc. and will be displayed when an
+ error is detected to help the debugging of a particular problem.
+
+ The number entered here is actually the logarithm of the number of
+ bytes that will be saved. So if you pick for example 5 here, kmemcheck
+ will save 2^5 = 32 bytes.
+
+ The default value should be fine for debugging most problems. It also
+ fits nicely within 80 columns.
+
+ o CONFIG_KMEMCHECK_PARTIAL_OK
+
+ This option (when enabled) works around certain GCC optimizations that
+ produce 32-bit reads from 16-bit variables where the upper 16 bits are
+ thrown away afterwards.
+
+ The default value (enabled) is recommended. This may of course hide
+ some real errors, but disabling it would probably produce a lot of
+ false positives.
+
+ o CONFIG_KMEMCHECK_BITOPS_OK
+
+ This option silences warnings that would be generated for bit-field
+ accesses where not all the bits are initialized at the same time. This
+ may also hide some real bugs.
+
+ This option is probably obsolete, or it should be replaced with
+ the kmemcheck-/bitfield-annotations for the code in question. The
+ default value is therefore fine.
+
+Now compile the kernel as usual.
+
+
+3. How to use
+=============
+
+3.1. Booting
+============
+
+First some information about the command-line options. There is only one
+option specific to kmemcheck, and this is called "kmemcheck". It can be used
+to override the default mode as chosen by the CONFIG_KMEMCHECK_*_BY_DEFAULT
+option. Its possible settings are:
+
+ o kmemcheck=0 (disabled)
+ o kmemcheck=1 (enabled)
+ o kmemcheck=2 (one-shot mode)
+
+If SLUB debugging has been enabled in the kernel, it may take precedence over
+kmemcheck in such a way that the slab caches which are under SLUB debugging
+will not be tracked by kmemcheck. In order to ensure that this doesn't happen
+(even though it shouldn't by default), use SLUB's boot option "slub_debug",
+like this: slub_debug=-
+
+In fact, this option may also be used for fine-grained control over SLUB vs.
+kmemcheck. For example, if the command line includes "kmemcheck=1
+slub_debug=,dentry", then SLUB debugging will be used only for the "dentry"
+slab cache, and with kmemcheck tracking all the other caches. This is advanced
+usage, however, and is not generally recommended.
+
+
+3.2. Run-time enable/disable
+============================
+
+When the kernel has booted, it is possible to enable or disable kmemcheck at
+run-time. WARNING: This feature is still experimental and may cause false
+positive warnings to appear. Therefore, try not to use this. If you find that
+it doesn't work properly (e.g. you see an unreasonable amount of warnings), I
+will be happy to take bug reports.
+
+Use the file /proc/sys/kernel/kmemcheck for this purpose, e.g.:
+
+ $ echo 0 > /proc/sys/kernel/kmemcheck # disables kmemcheck
+
+The numbers are the same as for the kmemcheck= command-line option.
+
+
+3.3. Debugging
+==============
+
+A typical report will look something like this:
+
+WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
+80000000000000000000000000000000000000000088ffff0000000000000000
+ i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
+ ^
+
+Pid: 1856, comm: ntpdate Not tainted 2.6.29-rc5 #264 945P-A
+RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
+RSP: 0018:ffff88003cdf7d98 EFLAGS: 00210002
+RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
+RDX: ffff88003e5d6018 RSI: ffff88003e5d6024 RDI: ffff88003cdf7e84
+RBP: ffff88003cdf7db8 R08: ffff88003e5d6000 R09: 0000000000000000
+R10: 0000000000000080 R11: 0000000000000000 R12: 000000000000000e
+R13: ffff88003cdf7e78 R14: ffff88003d530710 R15: ffff88003d5a98c8
+FS: 0000000000000000(0000) GS:ffff880001982000(0063) knlGS:00000
+CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033
+CR2: ffff88003f806ea0 CR3: 000000003c036000 CR4: 00000000000006a0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000ffff4ff0 DR7: 0000000000000400
+ [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
+ [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
+ [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
+ [<ffffffff8100c7b5>] int_signal+0x12/0x17
+ [<ffffffffffffffff>] 0xffffffffffffffff
+
+The single most valuable information in this report is the RIP (or EIP on 32-
+bit) value. This will help us pinpoint exactly which instruction that caused
+the warning.
+
+If your kernel was compiled with CONFIG_DEBUG_INFO=y, then all we have to do
+is give this address to the addr2line program, like this:
+
+ $ addr2line -e vmlinux -i ffffffff8104ede8
+ arch/x86/include/asm/string_64.h:12
+ include/asm-generic/siginfo.h:287
+ kernel/signal.c:380
+ kernel/signal.c:410
+
+The "-e vmlinux" tells addr2line which file to look in. IMPORTANT: This must
+be the vmlinux of the kernel that produced the warning in the first place! If
+not, the line number information will almost certainly be wrong.
+
+The "-i" tells addr2line to also print the line numbers of inlined functions.
+In this case, the flag was very important, because otherwise, it would only
+have printed the first line, which is just a call to memcpy(), which could be
+called from a thousand places in the kernel, and is therefore not very useful.
+These inlined functions would not show up in the stack trace above, simply
+because the kernel doesn't load the extra debugging information. This
+technique can of course be used with ordinary kernel oopses as well.
+
+In this case, it's the caller of memcpy() that is interesting, and it can be
+found in include/asm-generic/siginfo.h, line 287:
+
+281 static inline void copy_siginfo(struct siginfo *to, struct siginfo *from)
+282 {
+283 if (from->si_code < 0)
+284 memcpy(to, from, sizeof(*to));
+285 else
+286 /* _sigchld is currently the largest know union member */
+287 memcpy(to, from, __ARCH_SI_PREAMBLE_SIZE + sizeof(from->_sifields._sigchld));
+288 }
+
+Since this was a read (kmemcheck usually warns about reads only, though it can
+warn about writes to unallocated or freed memory as well), it was probably the
+"from" argument which contained some uninitialized bytes. Following the chain
+of calls, we move upwards to see where "from" was allocated or initialized,
+kernel/signal.c, line 380:
+
+359 static void collect_signal(int sig, struct sigpending *list, siginfo_t *info)
+360 {
+...
+367 list_for_each_entry(q, &list->list, list) {
+368 if (q->info.si_signo == sig) {
+369 if (first)
+370 goto still_pending;
+371 first = q;
+...
+377 if (first) {
+378 still_pending:
+379 list_del_init(&first->list);
+380 copy_siginfo(info, &first->info);
+381 __sigqueue_free(first);
+...
+392 }
+393 }
+
+Here, it is &first->info that is being passed on to copy_siginfo(). The
+variable "first" was found on a list -- passed in as the second argument to
+collect_signal(). We continue our journey through the stack, to figure out
+where the item on "list" was allocated or initialized. We move to line 410:
+
+395 static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
+396 siginfo_t *info)
+397 {
+...
+410 collect_signal(sig, pending, info);
+...
+414 }
+
+Now we need to follow the "pending" pointer, since that is being passed on to
+collect_signal() as "list". At this point, we've run out of lines from the
+"addr2line" output. Not to worry, we just paste the next addresses from the
+kmemcheck stack dump, i.e.:
+
+ [<ffffffff8104f04e>] dequeue_signal+0x8e/0x170
+ [<ffffffff81050bd8>] get_signal_to_deliver+0x98/0x390
+ [<ffffffff8100b87d>] do_notify_resume+0xad/0x7d0
+ [<ffffffff8100c7b5>] int_signal+0x12/0x17
+
+ $ addr2line -e vmlinux -i ffffffff8104f04e ffffffff81050bd8 \
+ ffffffff8100b87d ffffffff8100c7b5
+ kernel/signal.c:446
+ kernel/signal.c:1806
+ arch/x86/kernel/signal.c:805
+ arch/x86/kernel/signal.c:871
+ arch/x86/kernel/entry_64.S:694
+
+Remember that since these addresses were found on the stack and not as the
+RIP value, they actually point to the _next_ instruction (they are return
+addresses). This becomes obvious when we look at the code for line 446:
+
+422 int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
+423 {
+...
+431 signr = __dequeue_signal(&tsk->signal->shared_pending,
+432 mask, info);
+433 /*
+434 * itimer signal ?
+435 *
+436 * itimers are process shared and we restart periodic
+437 * itimers in the signal delivery path to prevent DoS
+438 * attacks in the high resolution timer case. This is
+439 * compliant with the old way of self restarting
+440 * itimers, as the SIGALRM is a legacy signal and only
+441 * queued once. Changing the restart behaviour to
+442 * restart the timer in the signal dequeue path is
+443 * reducing the timer noise on heavy loaded !highres
+444 * systems too.
+445 */
+446 if (unlikely(signr == SIGALRM)) {
+...
+489 }
+
+So instead of looking at 446, we should be looking at 431, which is the line
+that executes just before 446. Here we see that what we are looking for is
+&tsk->signal->shared_pending.
+
+Our next task is now to figure out which function that puts items on this
+"shared_pending" list. A crude, but efficient tool, is git grep:
+
+ $ git grep -n 'shared_pending' kernel/
+ ...
+ kernel/signal.c:828: pending = group ? &t->signal->shared_pending : &t->pending;
+ kernel/signal.c:1339: pending = group ? &t->signal->shared_pending : &t->pending;
+ ...
+
+There were more results, but none of them were related to list operations,
+and these were the only assignments. We inspect the line numbers more closely
+and find that this is indeed where items are being added to the list:
+
+816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
+817 int group)
+818 {
+...
+828 pending = group ? &t->signal->shared_pending : &t->pending;
+...
+851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
+852 (is_si_special(info) ||
+853 info->si_code >= 0)));
+854 if (q) {
+855 list_add_tail(&q->list, &pending->list);
+...
+890 }
+
+and:
+
+1309 int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
+1310 {
+....
+1339 pending = group ? &t->signal->shared_pending : &t->pending;
+1340 list_add_tail(&q->list, &pending->list);
+....
+1347 }
+
+In the first case, the list element we are looking for, "q", is being returned
+from the function __sigqueue_alloc(), which looks like an allocation function.
+Let's take a look at it:
+
+187 static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
+188 int override_rlimit)
+189 {
+190 struct sigqueue *q = NULL;
+191 struct user_struct *user;
+192
+193 /*
+194 * We won't get problems with the target's UID changing under us
+195 * because changing it requires RCU be used, and if t != current, the
+196 * caller must be holding the RCU readlock (by way of a spinlock) and
+197 * we use RCU protection here
+198 */
+199 user = get_uid(__task_cred(t)->user);
+200 atomic_inc(&user->sigpending);
+201 if (override_rlimit ||
+202 atomic_read(&user->sigpending) <=
+203 t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+204 q = kmem_cache_alloc(sigqueue_cachep, flags);
+205 if (unlikely(q == NULL)) {
+206 atomic_dec(&user->sigpending);
+207 free_uid(user);
+208 } else {
+209 INIT_LIST_HEAD(&q->list);
+210 q->flags = 0;
+211 q->user = user;
+212 }
+213
+214 return q;
+215 }
+
+We see that this function initializes q->list, q->flags, and q->user. It seems
+that now is the time to look at the definition of "struct sigqueue", e.g.:
+
+14 struct sigqueue {
+15 struct list_head list;
+16 int flags;
+17 siginfo_t info;
+18 struct user_struct *user;
+19 };
+
+And, you might remember, it was a memcpy() on &first->info that caused the
+warning, so this makes perfect sense. It also seems reasonable to assume that
+it is the caller of __sigqueue_alloc() that has the responsibility of filling
+out (initializing) this member.
+
+But just which fields of the struct were uninitialized? Let's look at
+kmemcheck's report again:
+
+WARNING: kmemcheck: Caught 32-bit read from uninitialized memory (ffff88003e4a2024)
+80000000000000000000000000000000000000000088ffff0000000000000000
+ i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
+ ^
+
+These first two lines are the memory dump of the memory object itself, and the
+shadow bytemap, respectively. The memory object itself is in this case
+&first->info. Just beware that the start of this dump is NOT the start of the
+object itself! The position of the caret (^) corresponds with the address of
+the read (ffff88003e4a2024).
+
+The shadow bytemap dump legend is as follows:
+
+ i - initialized
+ u - uninitialized
+ a - unallocated (memory has been allocated by the slab layer, but has not
+ yet been handed off to anybody)
+ f - freed (memory has been allocated by the slab layer, but has been freed
+ by the previous owner)
+
+In order to figure out where (relative to the start of the object) the
+uninitialized memory was located, we have to look at the disassembly. For
+that, we'll need the RIP address again:
+
+RIP: 0010:[<ffffffff8104ede8>] [<ffffffff8104ede8>] __dequeue_signal+0xc8/0x190
+
+ $ objdump -d --no-show-raw-insn vmlinux | grep -C 8 ffffffff8104ede8:
+ ffffffff8104edc8: mov %r8,0x8(%r8)
+ ffffffff8104edcc: test %r10d,%r10d
+ ffffffff8104edcf: js ffffffff8104ee88 <__dequeue_signal+0x168>
+ ffffffff8104edd5: mov %rax,%rdx
+ ffffffff8104edd8: mov $0xc,%ecx
+ ffffffff8104eddd: mov %r13,%rdi
+ ffffffff8104ede0: mov $0x30,%eax
+ ffffffff8104ede5: mov %rdx,%rsi
+ ffffffff8104ede8: rep movsl %ds:(%rsi),%es:(%rdi)
+ ffffffff8104edea: test $0x2,%al
+ ffffffff8104edec: je ffffffff8104edf0 <__dequeue_signal+0xd0>
+ ffffffff8104edee: movsw %ds:(%rsi),%es:(%rdi)
+ ffffffff8104edf0: test $0x1,%al
+ ffffffff8104edf2: je ffffffff8104edf5 <__dequeue_signal+0xd5>
+ ffffffff8104edf4: movsb %ds:(%rsi),%es:(%rdi)
+ ffffffff8104edf5: mov %r8,%rdi
+ ffffffff8104edf8: callq ffffffff8104de60 <__sigqueue_free>
+
+As expected, it's the "rep movsl" instruction from the memcpy() that causes
+the warning. We know about REP MOVSL that it uses the register RCX to count
+the number of remaining iterations. By taking a look at the register dump
+again (from the kmemcheck report), we can figure out how many bytes were left
+to copy:
+
+RAX: 0000000000000030 RBX: ffff88003d4ea968 RCX: 0000000000000009
+
+By looking at the disassembly, we also see that %ecx is being loaded with the
+value $0xc just before (ffffffff8104edd8), so we are very lucky. Keep in mind
+that this is the number of iterations, not bytes. And since this is a "long"
+operation, we need to multiply by 4 to get the number of bytes. So this means
+that the uninitialized value was encountered at 4 * (0xc - 0x9) = 12 bytes
+from the start of the object.
+
+We can now try to figure out which field of the "struct siginfo" that was not
+initialized. This is the beginning of the struct:
+
+40 typedef struct siginfo {
+41 int si_signo;
+42 int si_errno;
+43 int si_code;
+44
+45 union {
+..
+92 } _sifields;
+93 } siginfo_t;
+
+On 64-bit, the int is 4 bytes long, so it must the union member that has
+not been initialized. We can verify this using gdb:
+
+ $ gdb vmlinux
+ ...
+ (gdb) p &((struct siginfo *) 0)->_sifields
+ $1 = (union {...} *) 0x10
+
+Actually, it seems that the union member is located at offset 0x10 -- which
+means that gcc has inserted 4 bytes of padding between the members si_code
+and _sifields. We can now get a fuller picture of the memory dump:
+
+ _----------------------------=> si_code
+ / _--------------------=> (padding)
+ | / _------------=> _sifields(._kill._pid)
+ | | / _----=> _sifields(._kill._uid)
+ | | | /
+-------|-------|-------|-------|
+80000000000000000000000000000000000000000088ffff0000000000000000
+ i i i i u u u u i i i i i i i i u u u u u u u u u u u u u u u u
+
+This allows us to realize another important fact: si_code contains the value
+0x80. Remember that x86 is little endian, so the first 4 bytes "80000000" are
+really the number 0x00000080. With a bit of research, we find that this is
+actually the constant SI_KERNEL defined in include/asm-generic/siginfo.h:
+
+144 #define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
+
+This macro is used in exactly one place in the x86 kernel: In send_signal()
+in kernel/signal.c:
+
+816 static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
+817 int group)
+818 {
+...
+828 pending = group ? &t->signal->shared_pending : &t->pending;
+...
+851 q = __sigqueue_alloc(t, GFP_ATOMIC, (sig < SIGRTMIN &&
+852 (is_si_special(info) ||
+853 info->si_code >= 0)));
+854 if (q) {
+855 list_add_tail(&q->list, &pending->list);
+856 switch ((unsigned long) info) {
+...
+865 case (unsigned long) SEND_SIG_PRIV:
+866 q->info.si_signo = sig;
+867 q->info.si_errno = 0;
+868 q->info.si_code = SI_KERNEL;
+869 q->info.si_pid = 0;
+870 q->info.si_uid = 0;
+871 break;
+...
+890 }
+
+Not only does this match with the .si_code member, it also matches the place
+we found earlier when looking for where siginfo_t objects are enqueued on the
+"shared_pending" list.
+
+So to sum up: It seems that it is the padding introduced by the compiler
+between two struct fields that is uninitialized, and this gets reported when
+we do a memcpy() on the struct. This means that we have identified a false
+positive warning.
+
+Normally, kmemcheck will not report uninitialized accesses in memcpy() calls
+when both the source and destination addresses are tracked. (Instead, we copy
+the shadow bytemap as well). In this case, the destination address clearly
+was not tracked. We can dig a little deeper into the stack trace from above:
+
+ arch/x86/kernel/signal.c:805
+ arch/x86/kernel/signal.c:871
+ arch/x86/kernel/entry_64.S:694
+
+And we clearly see that the destination siginfo object is located on the
+stack:
+
+782 static void do_signal(struct pt_regs *regs)
+783 {
+784 struct k_sigaction ka;
+785 siginfo_t info;
+...
+804 signr = get_signal_to_deliver(&info, &ka, regs, NULL);
+...
+854 }
+
+And this &info is what eventually gets passed to copy_siginfo() as the
+destination argument.
+
+Now, even though we didn't find an actual error here, the example is still a
+good one, because it shows how one would go about to find out what the report
+was all about.
+
+
+3.4. Annotating false positives
+===============================
+
+There are a few different ways to make annotations in the source code that
+will keep kmemcheck from checking and reporting certain allocations. Here
+they are:
+
+ o __GFP_NOTRACK_FALSE_POSITIVE
+
+ This flag can be passed to kmalloc() or kmem_cache_alloc() (therefore
+ also to other functions that end up calling one of these) to indicate
+ that the allocation should not be tracked because it would lead to
+ a false positive report. This is a "big hammer" way of silencing
+ kmemcheck; after all, even if the false positive pertains to
+ particular field in a struct, for example, we will now lose the
+ ability to find (real) errors in other parts of the same struct.
+
+ Example:
+
+ /* No warnings will ever trigger on accessing any part of x */
+ x = kmalloc(sizeof *x, GFP_KERNEL | __GFP_NOTRACK_FALSE_POSITIVE);
+
+ o kmemcheck_bitfield_begin(name)/kmemcheck_bitfield_end(name) and
+ kmemcheck_annotate_bitfield(ptr, name)
+
+ The first two of these three macros can be used inside struct
+ definitions to signal, respectively, the beginning and end of a
+ bitfield. Additionally, this will assign the bitfield a name, which
+ is given as an argument to the macros.
+
+ Having used these markers, one can later use
+ kmemcheck_annotate_bitfield() at the point of allocation, to indicate
+ which parts of the allocation is part of a bitfield.
+
+ Example:
+
+ struct foo {
+ int x;
+
+ kmemcheck_bitfield_begin(flags);
+ int flag_a:1;
+ int flag_b:1;
+ kmemcheck_bitfield_end(flags);
+
+ int y;
+ };
+
+ struct foo *x = kmalloc(sizeof *x);
+
+ /* No warnings will trigger on accessing the bitfield of x */
+ kmemcheck_annotate_bitfield(x, flags);
+
+ Note that kmemcheck_annotate_bitfield() can be used even before the
+ return value of kmalloc() is checked -- in other words, passing NULL
+ as the first argument is legal (and will do nothing).
+
+
+4. Reporting errors
+===================
+
+As we have seen, kmemcheck will produce false positive reports. Therefore, it
+is not very wise to blindly post kmemcheck warnings to mailing lists and
+maintainers. Instead, I encourage maintainers and developers to find errors
+in their own code. If you get a warning, you can try to work around it, try
+to figure out if it's a real error or not, or simply ignore it. Most
+developers know their own code and will quickly and efficiently determine the
+root cause of a kmemcheck report. This is therefore also the most efficient
+way to work with kmemcheck.
+
+That said, we (the kmemcheck maintainers) will always be on the lookout for
+false positives that we can annotate and silence. So whatever you find,
+please drop us a note privately! Kernel configs and steps to reproduce (if
+available) are of course a great help too.
+
+Happy hacking!
+
+
+5. Technical description
+========================
+
+kmemcheck works by marking memory pages non-present. This means that whenever
+somebody attempts to access the page, a page fault is generated. The page
+fault handler notices that the page was in fact only hidden, and so it calls
+on the kmemcheck code to make further investigations.
+
+When the investigations are completed, kmemcheck "shows" the page by marking
+it present (as it would be under normal circumstances). This way, the
+interrupted code can continue as usual.
+
+But after the instruction has been executed, we should hide the page again, so
+that we can catch the next access too! Now kmemcheck makes use of a debugging
+feature of the processor, namely single-stepping. When the processor has
+finished the one instruction that generated the memory access, a debug
+exception is raised. From here, we simply hide the page again and continue
+execution, this time with the single-stepping feature turned off.
+
+kmemcheck requires some assistance from the memory allocator in order to work.
+The memory allocator needs to
+
+ 1. Tell kmemcheck about newly allocated pages and pages that are about to
+ be freed. This allows kmemcheck to set up and tear down the shadow memory
+ for the pages in question. The shadow memory stores the status of each
+ byte in the allocation proper, e.g. whether it is initialized or
+ uninitialized.
+
+ 2. Tell kmemcheck which parts of memory should be marked uninitialized.
+ There are actually a few more states, such as "not yet allocated" and
+ "recently freed".
+
+If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
+memory that can take page faults because of kmemcheck.
+
+If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
+request memory with the __GFP_NOTRACK or __GFP_NOTRACK_FALSE_POSITIVE flags.
+This does not prevent the page faults from occurring, however, but marks the
+object in question as being initialized so that no warnings will ever be
+produced for this object.
+
+Currently, the SLAB and SLUB allocators are supported by kmemcheck.
diff --git a/Documentation/kmemleak.txt b/Documentation/kmemleak.txt
new file mode 100644
index 000000000..45e777f4e
--- /dev/null
+++ b/Documentation/kmemleak.txt
@@ -0,0 +1,203 @@
+Kernel Memory Leak Detector
+===========================
+
+Introduction
+------------
+
+Kmemleak provides a way of detecting possible kernel memory leaks in a
+way similar to a tracing garbage collector
+(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors),
+with the difference that the orphan objects are not freed but only
+reported via /sys/kernel/debug/kmemleak. A similar method is used by the
+Valgrind tool (memcheck --leak-check) to detect the memory leaks in
+user-space applications.
+Kmemleak is supported on x86, arm, powerpc, sparc, sh, microblaze, ppc, mips, s390, metag and tile.
+
+Usage
+-----
+
+CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel
+thread scans the memory every 10 minutes (by default) and prints the
+number of new unreferenced objects found. To display the details of all
+the possible memory leaks:
+
+ # mount -t debugfs nodev /sys/kernel/debug/
+ # cat /sys/kernel/debug/kmemleak
+
+To trigger an intermediate memory scan:
+
+ # echo scan > /sys/kernel/debug/kmemleak
+
+To clear the list of all current possible memory leaks:
+
+ # echo clear > /sys/kernel/debug/kmemleak
+
+New leaks will then come up upon reading /sys/kernel/debug/kmemleak
+again.
+
+Note that the orphan objects are listed in the order they were allocated
+and one object at the beginning of the list may cause other subsequent
+objects to be reported as orphan.
+
+Memory scanning parameters can be modified at run-time by writing to the
+/sys/kernel/debug/kmemleak file. The following parameters are supported:
+
+ off - disable kmemleak (irreversible)
+ stack=on - enable the task stacks scanning (default)
+ stack=off - disable the tasks stacks scanning
+ scan=on - start the automatic memory scanning thread (default)
+ scan=off - stop the automatic memory scanning thread
+ scan=<secs> - set the automatic memory scanning period in seconds
+ (default 600, 0 to stop the automatic scanning)
+ scan - trigger a memory scan
+ clear - clear list of current memory leak suspects, done by
+ marking all current reported unreferenced objects grey,
+ or free all kmemleak objects if kmemleak has been disabled.
+ dump=<addr> - dump information about the object found at <addr>
+
+Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on
+the kernel command line.
+
+Memory may be allocated or freed before kmemleak is initialised and
+these actions are stored in an early log buffer. The size of this buffer
+is configured via the CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE option.
+
+If CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF are enabled, the kmemleak is
+disabled by default. Passing "kmemleak=on" on the kernel command
+line enables the function.
+
+Basic Algorithm
+---------------
+
+The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and
+friends are traced and the pointers, together with additional
+information like size and stack trace, are stored in a rbtree.
+The corresponding freeing function calls are tracked and the pointers
+removed from the kmemleak data structures.
+
+An allocated block of memory is considered orphan if no pointer to its
+start address or to any location inside the block can be found by
+scanning the memory (including saved registers). This means that there
+might be no way for the kernel to pass the address of the allocated
+block to a freeing function and therefore the block is considered a
+memory leak.
+
+The scanning algorithm steps:
+
+ 1. mark all objects as white (remaining white objects will later be
+ considered orphan)
+ 2. scan the memory starting with the data section and stacks, checking
+ the values against the addresses stored in the rbtree. If
+ a pointer to a white object is found, the object is added to the
+ gray list
+ 3. scan the gray objects for matching addresses (some white objects
+ can become gray and added at the end of the gray list) until the
+ gray set is finished
+ 4. the remaining white objects are considered orphan and reported via
+ /sys/kernel/debug/kmemleak
+
+Some allocated memory blocks have pointers stored in the kernel's
+internal data structures and they cannot be detected as orphans. To
+avoid this, kmemleak can also store the number of values pointing to an
+address inside the block address range that need to be found so that the
+block is not considered a leak. One example is __vmalloc().
+
+Testing specific sections with kmemleak
+---------------------------------------
+
+Upon initial bootup your /sys/kernel/debug/kmemleak output page may be
+quite extensive. This can also be the case if you have very buggy code
+when doing development. To work around these situations you can use the
+'clear' command to clear all reported unreferenced objects from the
+/sys/kernel/debug/kmemleak output. By issuing a 'scan' after a 'clear'
+you can find new unreferenced objects; this should help with testing
+specific sections of code.
+
+To test a critical section on demand with a clean kmemleak do:
+
+ # echo clear > /sys/kernel/debug/kmemleak
+ ... test your kernel or modules ...
+ # echo scan > /sys/kernel/debug/kmemleak
+
+Then as usual to get your report with:
+
+ # cat /sys/kernel/debug/kmemleak
+
+Freeing kmemleak internal objects
+---------------------------------
+
+To allow access to previously found memory leaks after kmemleak has been
+disabled by the user or due to an fatal error, internal kmemleak objects
+won't be freed when kmemleak is disabled, and those objects may occupy
+a large part of physical memory.
+
+In this situation, you may reclaim memory with:
+
+ # echo clear > /sys/kernel/debug/kmemleak
+
+Kmemleak API
+------------
+
+See the include/linux/kmemleak.h header for the functions prototype.
+
+kmemleak_init - initialize kmemleak
+kmemleak_alloc - notify of a memory block allocation
+kmemleak_alloc_percpu - notify of a percpu memory block allocation
+kmemleak_free - notify of a memory block freeing
+kmemleak_free_part - notify of a partial memory block freeing
+kmemleak_free_percpu - notify of a percpu memory block freeing
+kmemleak_update_trace - update object allocation stack trace
+kmemleak_not_leak - mark an object as not a leak
+kmemleak_ignore - do not scan or report an object as leak
+kmemleak_scan_area - add scan areas inside a memory block
+kmemleak_no_scan - do not scan a memory block
+kmemleak_erase - erase an old value in a pointer variable
+kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness
+kmemleak_free_recursive - as kmemleak_free but checks the recursiveness
+
+Dealing with false positives/negatives
+--------------------------------------
+
+The false negatives are real memory leaks (orphan objects) but not
+reported by kmemleak because values found during the memory scanning
+point to such objects. To reduce the number of false negatives, kmemleak
+provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and
+kmemleak_erase functions (see above). The task stacks also increase the
+amount of false negatives and their scanning is not enabled by default.
+
+The false positives are objects wrongly reported as being memory leaks
+(orphan). For objects known not to be leaks, kmemleak provides the
+kmemleak_not_leak function. The kmemleak_ignore could also be used if
+the memory block is known not to contain other pointers and it will no
+longer be scanned.
+
+Some of the reported leaks are only transient, especially on SMP
+systems, because of pointers temporarily stored in CPU registers or
+stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing
+the minimum age of an object to be reported as a memory leak.
+
+Limitations and Drawbacks
+-------------------------
+
+The main drawback is the reduced performance of memory allocation and
+freeing. To avoid other penalties, the memory scanning is only performed
+when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is
+intended for debugging purposes where the performance might not be the
+most important requirement.
+
+To keep the algorithm simple, kmemleak scans for values pointing to any
+address inside a block's address range. This may lead to an increased
+number of false negatives. However, it is likely that a real memory leak
+will eventually become visible.
+
+Another source of false negatives is the data stored in non-pointer
+values. In a future version, kmemleak could only scan the pointer
+members in the allocated structures. This feature would solve many of
+the false negative cases described above.
+
+The tool can report false positives. These are cases where an allocated
+block doesn't need to be freed (some cases in the init_call functions),
+the pointer is calculated by other methods than the usual container_of
+macro or the pointer is stored in a location not scanned by kmemleak.
+
+Page allocations and ioremap are not tracked.
diff --git a/Documentation/ko_KR/HOWTO b/Documentation/ko_KR/HOWTO
new file mode 100644
index 000000000..dc2ff8f61
--- /dev/null
+++ b/Documentation/ko_KR/HOWTO
@@ -0,0 +1,588 @@
+NOTE:
+This is a version of Documentation/HOWTO translated into korean
+This document is maintained by minchan Kim <minchan.kim@gmail.com>
+If you find any difference between this document and the original file or
+a problem with the translation, please contact the maintainer of this file.
+
+Please also note that the purpose of this file is to be easier to
+read for non English (read: korean) speakers and is not intended as
+a fork. So if you have any comments or updates for this file please
+try to update the original English file first.
+
+==================================
+이 문서는
+Documentation/HOWTO
+의 한글 번역입니다.
+
+역자: 김민찬 <minchan.kim@gmail.com>
+감수: 이제이미 <jamee.lee@samsung.com>
+==================================
+
+어떻게 리눅스 커널 개발을 하는가
+---------------------------------
+
+이 문서는 커널 개발에 있어 가장 중요한 문서이다. 이 문서는
+리눅스 커널 개발자가 되는 법과 리눅스 커널 개발 커뮤니티와 일하는
+법을 담고있다. 커널 프로그래밍의 기술적인 측면과 관련된 내용들은
+포함하지 않으려고 하였지만 올바른 길로 여러분을 안내하는 데는 도움이
+될 것이다.
+
+이 문서에서 오래된 것을 발견하면 문서의 아래쪽에 나열된 메인테이너에게
+패치를 보내달라.
+
+
+소개
+----
+
+자, 여러분은 리눅스 커널 개발자가 되는 법을 배우고 싶은가? 아니면
+상사로부터"이 장치를 위한 리눅스 드라이버를 작성하시오"라는 말을
+들었는가? 이 문서의 목적은 여러분이 겪게 될 과정과 커뮤니티와 협력하는
+법을 조언하여 여러분의 목적을 달성하기 위해 필요한 것 모두를 알려주기
+위함이다.
+
+커널은 대부분은 C로 작성되어 있고 몇몇 아키텍쳐의 의존적인 부분은
+어셈블리로 작성되어 있다. 커널 개발을 위해 C를 잘 이해하고 있어야 한다.
+여러분이 특정 아키텍쳐의 low-level 개발을 할 것이 아니라면
+어셈블리(특정 아키텍쳐)는 잘 알아야 할 필요는 없다.
+다음의 참고서적들은 기본에 충실한 C 교육이나 수년간의 경험에 견주지는
+못하지만 적어도 참고 용도로는 좋을 것이다
+ - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
+ - "Practical C Programming" by Steve Oualline [O'Reilly]
+ - "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
+
+커널은 GNU C와 GNU 툴체인을 사용하여 작성되었다. 이 툴들은 ISO C89 표준을
+따르는 반면 표준에 있지 않은 많은 확장기능도 가지고 있다. 커널은 표준 C
+라이브러리와는 관계없이 freestanding C 환경이어서 C 표준의 일부는
+지원되지 않는다. 임의의 long long 나누기나 floating point는 지원되지 않는다.
+때론 이런 이유로 커널이 그런 확장 기능을 가진 툴체인을 가지고 만들어졌다는
+것이 이해하기 어려울 수도 있고 게다가 불행하게도 그런 것을 정확하게 설명하는
+어떤 참고문서도 있지 않다. 정보를 얻기 위해서는 gcc info (`info gcc`)페이지를
+살펴보라.
+
+여러분은 기존의 개발 커뮤니티와 협력하는 법을 배우려고 하고 있다는 것을
+기억하라. 코딩, 스타일, 함수에 관한 훌륭한 표준을 가진 사람들이 모인
+다양한 그룹이 있다. 이 표준들은 오랜동안 크고 지역적으로 분산된 팀들에
+의해 가장 좋은 방법으로 일하기 위하여 찾은 것을 기초로 만들어져 왔다.
+그 표준들은 문서화가 잘 되어있기 때문에 가능한한 미리 많은 표준들에
+관하여 배우려고 시도하라. 다른 사람들은 여러분이나 여러분의 회사가
+일하는 방식에 적응하는 것을 원하지는 않는다.
+
+
+법적 문제
+---------
+
+리눅스 커널 소스 코드는 GPL로 배포(release)되었다. 소스트리의 메인
+디렉토리에 있는 라이센스에 관하여 상세하게 쓰여 있는 COPYING이라는
+파일을 봐라. 여러분이 라이센스에 관한 더 깊은 문제를 가지고 있다면
+리눅스 커널 메일링 리스트에 묻지말고 변호사와 연락하라. 메일링
+리스트들에 있는 사람들은 변호사가 아니기 때문에 법적 문제에 관하여
+그들의 말에 의지해서는 안된다.
+
+GPL에 관한 잦은 질문들과 답변들은 다음을 참조하라.
+ http://www.gnu.org/licenses/gpl-faq.html
+
+
+문서
+----
+
+리눅스 커널 소스 트리는 커널 커뮤니티와 협력하는 법을 배우기위해 훌륭한
+다양한 문서들을 가지고 있다. 새로운 기능들이 커널에 들어가게 될 때,
+그 기능을 어떻게 사용하는지에 관한 설명을 위하여 새로운 문서 파일을
+추가하는 것을 권장한다. 커널이 유저스페이스로 노출하는 인터페이스를
+변경하게 되면 변경을 설명하는 메뉴얼 페이지들에 대한 패치나 정보를
+mtk.manpages@gmail.com의 메인테이너에게 보낼 것을 권장한다.
+
+다음은 커널 소스 트리에 있는 읽어야 할 파일들의 리스트이다.
+ README
+ 이 파일은 리눅스 커널에 관하여 간단한 배경 설명과 커널을 설정하고
+ 빌드하기 위해 필요한 것을 설명한다. 커널에 입문하는 사람들은 여기서
+ 시작해야 한다.
+
+ Documentation/Changes
+ 이 파일은 커널을 성공적으로 빌드하고 실행시키기 위해 필요한 다양한
+ 소프트웨어 패키지들의 최소 버젼을 나열한다.
+
+ Documentation/CodingStyle
+ 이 문서는 리눅스 커널 코딩 스타일과 그렇게 한 몇몇 이유를 설명한다.
+ 모든 새로운 코드는 이 문서에 가이드라인들을 따라야 한다. 대부분의
+ 메인테이너들은 이 규칙을 따르는 패치들만을 받아들일 것이고 많은 사람들이
+ 그 패치가 올바른 스타일일 경우만 코드를 검토할 것이다.
+
+ Documentation/SubmittingPatches
+ Documentation/SubmittingDrivers
+ 이 파일들은 성공적으로 패치를 만들고 보내는 법을 다음의 내용들로
+ 굉장히 상세히 설명하고 있다(그러나 다음으로 한정되진 않는다).
+ - Email 내용들
+ - Email 양식
+ - 그것을 누구에게 보낼지
+ 이러한 규칙들을 따르는 것이 성공(역자주: 패치가 받아들여 지는 것)을
+ 보장하진 않는다(왜냐하면 모든 패치들은 내용과 스타일에 관하여
+ 면밀히 검토되기 때문이다). 그러나 규칙을 따르지 않는다면 거의
+ 성공하지도 못할 것이다.
+
+ 올바른 패치들을 만드는 법에 관한 훌륭한 다른 문서들이 있다.
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+ "Linux kernel patch submission format"
+ http://linux.yyz.us/patch-format.html
+
+ Documentation/stable_api_nonsense.txt
+ 이 문서는 의도적으로 커널이 불변하는 API를 갖지 않도록 결정한
+ 이유를 설명하며 다음과 같은 것들을 포함한다.
+ - 서브시스템 shim-layer(호환성을 위해?)
+ - 운영체제들간의 드라이버 이식성
+ - 커널 소스 트리내에 빠른 변화를 늦추는 것(또는 빠른 변화를 막는 것)
+ 이 문서는 리눅스 개발 철학을 이해하는데 필수적이며 다른 운영체제에서
+ 리눅스로 전향하는 사람들에게는 매우 중요하다.
+
+
+ Documentation/SecurityBugs
+ 여러분들이 리눅스 커널의 보안 문제를 발견했다고 생각한다면 이 문서에
+ 나온 단계에 따라서 커널 개발자들에게 알리고 그 문제를 해결할 수 있도록
+ 도와 달라.
+
+ Documentation/ManagementStyle
+ 이 문서는 리눅스 커널 메인테이너들이 그들의 방법론에 녹아 있는
+ 정신을 어떻게 공유하고 운영하는지를 설명한다. 이것은 커널 개발에 입문하는
+ 모든 사람들(또는 커널 개발에 작은 호기심이라도 있는 사람들)이
+ 읽어야 할 중요한 문서이다. 왜냐하면 이 문서는 커널 메인테이너들의
+ 독특한 행동에 관하여 흔히 있는 오해들과 혼란들을 해소하고 있기
+ 때문이다.
+
+ Documentation/stable_kernel_rules.txt
+ 이 문서는 안정적인 커널 배포가 이루어지는 규칙을 설명하고 있으며
+ 여러분들이 이러한 배포들 중 하나에 변경을 하길 원한다면
+ 무엇을 해야 하는지를 설명한다.
+
+ Documentation/kernel-docs.txt
+ 커널 개발에 관계된 외부 문서의 리스트이다. 커널 내의 포함된 문서들
+ 중에 여러분이 찾고 싶은 문서를 발견하지 못할 경우 이 리스트를
+ 살펴보라.
+
+ Documentation/applying-patches.txt
+ 패치가 무엇이며 그것을 커널의 다른 개발 브랜치들에 어떻게
+ 적용하는지에 관하여 자세히 설명하고 있는 좋은 입문서이다.
+
+커널은 소스 코드 그 자체에서 자동적으로 만들어질 수 있는 많은 문서들을
+가지고 있다. 이것은 커널 내의 API에 대한 모든 설명, 그리고 락킹을
+올바르게 처리하는 법에 관한 규칙을 포함하고 있다. 이 문서는
+Documentation/DocBook/ 디렉토리 내에서 만들어지며 PDF, Postscript, HTML,
+그리고 man 페이지들로 다음과 같이 실행하여 만들어 진다.
+ make pdfdocs
+ make psdocs
+ make htmldocs
+ make mandocs
+각각의 명령을 메인 커널 소스 디렉토리로부터 실행한다.
+
+
+커널 개발자가 되는 것
+---------------------
+
+여러분이 리눅스 커널 개발에 관하여 아무것도 모른다면 Linux KernelNewbies
+프로젝트를 봐야 한다.
+ http://kernelnewbies.org
+그곳은 거의 모든 종류의 기본적인 커널 개발 질문들(질문하기 전에 먼저
+아카이브를 찾아봐라. 과거에 이미 답변되었을 수도 있다)을 할 수 있는 도움이
+될만한 메일링 리스트가 있다. 또한 실시간으로 질문 할 수 있는 IRC 채널도
+가지고 있으며 리눅스 커널 개발을 배우는 데 유용한 문서들을 보유하고 있다.
+
+웹사이트는 코드구성, 서브시스템들, 그리고 현재 프로젝트들
+(트리 내, 외부에 존재하는)에 관한 기본적인 정보들을 가지고 있다. 또한
+그곳은 커널 컴파일이나 패치를 하는 법과 같은 기본적인 것들을 설명한다.
+
+여러분이 어디서 시작해야 할진 모르지만 커널 개발 커뮤니티에 참여할 수
+있는 일들을 찾길 원한다면 리눅스 커널 Janitor 프로젝트를 살펴봐라.
+ http://kernelnewbies.org/KernelJanitors
+그곳은 시작하기에 훌륭한 장소이다. 그곳은 리눅스 커널 소스 트리내에
+간단히 정리되고 수정될 수 있는 문제들에 관하여 설명한다. 여러분은 이
+프로젝트를 대표하는 개발자들과 일하면서 자신의 패치를 리눅스 커널 트리에
+반영하기 위한 기본적인 것들을 배우게 될것이며 여러분이 아직 아이디어를
+가지고 있지 않다면 다음에 무엇을 해야할지에 관한 방향을 배울 수 있을
+것이다.
+
+여러분들이 이미 커널 트리에 반영하길 원하는 코드 묶음을 가지고 있지만
+올바른 포맷으로 포장하는데 도움이 필요하다면 그러한 문제를 돕기 위해
+만들어진 kernel-mentors 프로젝트가 있다. 그곳은 메일링 리스트이며
+다음에서 참조할 수 있다.
+ http://selenic.com/mailman/listinfo/kernel-mentors
+
+리눅스 커널 코드에 실제 변경을 하기 전에 반드시 그 코드가 어떻게
+동작하는지 이해하고 있어야 한다. 코드를 분석하기 위하여 특정한 툴의
+도움을 빌려서라도 코드를 직접 읽는 것보다 좋은 것은 없다(대부분의
+자잘한 부분들은 잘 코멘트되어 있다). 그런 툴들 중에 특히 추천할만한
+것은 Linux Cross-Reference project이며 그것은 자기 참조 방식이며
+소스코드를 인덱스된 웹 페이지들의 형태로 보여준다. 최신의 멋진 커널
+코드 저장소는 다음을 통하여 참조할 수 있다.
+ http://lxr.linux.no/+trees
+
+
+개발 프로세스
+-------------
+
+리눅스 커널 개발 프로세스는 현재 몇몇 다른 메인 커널 "브랜치들"과
+서브시스템에 특화된 커널 브랜치들로 구성된다. 몇몇 다른 메인
+브랜치들은 다음과 같다.
+ - main 3.x 커널 트리
+ - 3.x.y - 안정된 커널 트리
+ - 3.x -git 커널 패치들
+ - 서브시스템을 위한 커널 트리들과 패치들
+ - 3.x - 통합 테스트를 위한 next 커널 트리
+
+3.x 커널 트리
+---------------
+
+3.x 커널들은 Linux Torvalds가 관리하며 kernel.org의 pub/linux/kernel/v3.x/
+디렉토리에서 참조될 수 있다.개발 프로세스는 다음과 같다.
+ - 새로운 커널이 배포되자마자 2주의 시간이 주어진다. 이 기간동은
+ 메인테이너들은 큰 diff들을 Linus에게 제출할 수 있다. 대개 이 패치들은
+ 몇 주 동안 -next 커널내에 이미 있었던 것들이다. 큰 변경들을 제출하는 데
+ 선호되는 방법은 git(커널의 소스 관리 툴, 더 많은 정보들은 http://git.or.cz/
+ 에서 참조할 수 있다)를 사용하는 것이지만 순수한 패치파일의 형식으로 보내는
+ 것도 무관하다.
+ - 2주 후에 -rc1 커널이 배포되며 지금부터는 전체 커널의 안정성에 영향을
+ 미칠수 있는 새로운 기능들을 포함하지 않는 패치들만이 추가될 수 있다.
+ 완전히 새로운 드라이버(혹은 파일시스템)는 -rc1 이후에만 받아들여진다는
+ 것을 기억해라. 왜냐하면 변경이 자체내에서만 발생하고 추가된 코드가
+ 드라이버 외부의 다른 부분에는 영향을 주지 않으므로 그런 변경은
+ 회귀(역자주: 이전에는 존재하지 않았지만 새로운 기능추가나 변경으로 인해
+ 생겨난 버그)를 일으킬 만한 위험을 가지고 있지 않기 때문이다. -rc1이
+ 배포된 이후에 git를 사용하여 패치들을 Linus에게 보낼수 있지만 패치들은
+ 공식적인 메일링 리스트로 보내서 검토를 받을 필요가 있다.
+ - 새로운 -rc는 Linus가 현재 git tree가 테스트 하기에 충분히 안정된 상태에
+ 있다고 판단될 때마다 배포된다. 목표는 새로운 -rc 커널을 매주 배포하는
+ 것이다.
+ - 이러한 프로세스는 커널이 "준비(ready)"되었다고 여겨질때까지 계속된다.
+ 프로세스는 대체로 6주간 지속된다.
+ - 각 -rc 배포에 있는 알려진 회귀의 목록들은 다음 URI에 남겨진다.
+ http://kernelnewbies.org/known_regressions
+
+커널 배포에 있어서 언급할만한 가치가 있는 리눅스 커널 메일링 리스트의
+Andrew Morton의 글이 있다.
+ "커널이 언제 배포될지는 아무도 모른다. 왜냐하면 배포는 알려진
+ 버그의 상황에 따라 배포되는 것이지 미리정해 놓은 시간에 따라
+ 배포되는 것은 아니기 때문이다."
+
+3.x.y - 안정 커널 트리
+------------------------
+
+3 자리 숫자로 이루어진 버젼의 커널들은 -stable 커널들이다. 그것들은 3.x
+커널에서 발견된 큰 회귀들이나 보안 문제들 중 비교적 작고 중요한 수정들을
+포함한다.
+
+이것은 가장 최근의 안정적인 커널을 원하는 사용자에게 추천되는 브랜치이며,
+개발/실험적 버젼을 테스트하는 것을 돕고자 하는 사용자들과는 별로 관련이 없다.
+
+어떤 3.x.y 커널도 사용할 수 없다면 그때는 가장 높은 숫자의 3.x
+커널이 현재의 안정 커널이다.
+
+3.x.y는 "stable" 팀<stable@vger.kernel.org>에 의해 관리되며 거의 매번 격주로
+배포된다.
+
+커널 트리 문서들 내에 Documentation/stable_kernel_rules.txt 파일은 어떤
+종류의 변경들이 -stable 트리로 들어왔는지와 배포 프로세스가 어떻게
+진행되는지를 설명한다.
+
+
+3.x -git 패치들
+------------------
+git 저장소(그러므로 -git이라는 이름이 붙음)에는 날마다 관리되는 Linus의
+커널 트리의 snapshot 들이 있다. 이 패치들은 일반적으로 날마다 배포되며
+Linus의 트리의 현재 상태를 나타낸다. 이 패치들은 정상적인지 조금도
+살펴보지 않고 자동적으로 생성된 것이므로 -rc 커널들 보다도 더 실험적이다.
+
+서브시스템 커널 트리들과 패치들
+-------------------------------
+다양한 커널 서브시스템의 메인테이너들 --- 그리고 많은 커널 서브시스템 개발자들
+--- 은 그들의 현재 개발 상태를 소스 저장소로 노출한다. 이를 통해 다른 사람들도
+커널의 다른 영역에 어떤 변화가 이루어지고 있는지 알 수 있다. 급속히 개발이
+진행되는 영역이 있고 그렇지 않은 영역이 있으므로, 개발자는 다른 개발자가 제출한
+수정 사항과 자신의 수정사항의 충돌이나 동일한 일을 동시에 두사람이 따로
+진행하는 사태를 방지하기 위해 급속히 개발이 진행되고 있는 영역에 작업의
+베이스를 맞춰줄 것이 요구된다.
+
+대부분의 이러한 저장소는 git 트리지만, git이 아닌 SCM으로 관리되거나, quilt
+시리즈로 제공되는 패치들도 존재한다. 이러한 서브시스템 저장소들은 MAINTAINERS
+파일에 나열되어 있다. 대부분은 http://git.kernel.org 에서 볼 수 있다.
+
+제안된 패치는 서브시스템 트리에 커밋되기 전에 메일링 리스트를 통해
+리뷰된다(아래의 관련 섹션을 참고하기 바란다). 일부 커널 서브시스템의 경우, 이
+리뷰 프로세스는 patchwork라는 도구를 통해 추적된다. patchwork은 등록된 패치와
+패치에 대한 코멘트, 패치의 버전을 볼 수 있는 웹 인터페이스를 제공하고,
+메인테이너는 패치를 리뷰 중, 리뷰 통과, 또는 반려됨으로 표시할 수 있다.
+대부분의 이러한 patchwork 사이트는 http://patchwork.kernel.org/ 또는
+http://patchwork.ozlabs.org/ 에 나열되어 있다.
+
+3.x - 통합 테스트를 위한 next 커널 트리
+-----------------------------------------
+서브시스템 트리들의 변경사항들은 mainline 3.x 트리로 들어오기 전에 통합
+테스트를 거쳐야 한다. 이런 목적으로, 모든 서브시스템 트리의 변경사항을 거의
+매일 받아가는 특수한 테스트 저장소가 존재한다:
+ http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git
+ http://linux.f-seidel.de/linux-next/pmwiki/
+
+이런 식으로, -next 커널을 통해 다음 머지 기간에 메인라인 커널에 어떤 변경이
+가해질 것인지 간략히 알 수 있다. 모험심 강한 테스터라면 -next 커널에서 테스트를
+수행하는 것도 좋을 것이다.
+
+버그 보고
+---------
+bugzilla.kernel.org는 리눅스 커널 개발자들이 커널의 버그를 추적하는 곳이다.
+사용자들은 발견한 모든 버그들을 보고하기 위하여 이 툴을 사용할 것을 권장한다.
+kernel bugzilla를 사용하는 자세한 방법은 다음을 참조하라.
+ http://test.kernel.org/bugzilla/faq.html
+
+메인 커널 소스 디렉토리에 있는 REPORTING-BUGS 파일은 커널 버그라고 생각되는
+것을 보고하는 방법에 관한 좋은 템플릿이며 문제를 추적하기 위해서 커널
+개발자들이 필요로 하는 정보가 무엇들인지를 상세히 설명하고 있다.
+
+
+버그 리포트들의 관리
+--------------------
+
+여러분의 해킹 기술을 연습하는 가장 좋은 방법 중의 하는 다른 사람들이
+보고한 버그들을 수정하는 것이다. 여러분은 커널을 더욱 안정화시키는데
+도움을 줄 뿐만이 아니라 실제있는 문제들을 수정하는 법을 배우게 되고
+그와 함께 여러분들의 기술은 향상될 것이며 다른 개발자들이 여러분의
+존재에 대해 알게 될 것이다. 버그를 수정하는 것은 개발자들 사이에서
+점수를 얻을 수 있는 가장 좋은 방법중의 하나이다. 왜냐하면 많은 사람들은
+다른 사람들의 버그들을 수정하기 위하여 시간을 낭비하지 않기 때문이다.
+
+이미 보고된 버그 리포트들을 가지고 작업하기 위해서 http://bugzilla.kernel.org를
+참조하라. 여러분이 앞으로 생겨날 버그 리포트들의 조언자가 되길 원한다면
+bugme-new 메일링 리스트나(새로운 버그 리포트들만이 이곳에서 메일로 전해진다)
+bugme-janitor 메일링 리스트(bugzilla에 모든 변화들이 여기서 메일로 전해진다)
+에 등록하면 된다.
+
+ https://lists.linux-foundation.org/mailman/listinfo/bugme-new
+ https://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
+
+
+
+메일링 리스트들
+---------------
+
+위의 몇몇 문서들이 설명하였지만 핵심 커널 개발자들의 대다수는
+리눅스 커널 메일링 리스트에 참여하고 있다. 리스트에 등록하고 해지하는
+방법에 관한 자세한 사항은 다음에서 참조할 수 있다.
+ http://vger.kernel.org/vger-lists.html#linux-kernel
+웹상의 많은 다른 곳에도 메일링 리스트의 아카이브들이 있다.
+이러한 아카이브들을 찾으려면 검색 엔진을 사용하라. 예를 들어:
+ http://dir.gmane.org/gmane.linux.kernel
+여러분이 새로운 문제에 관해 리스트에 올리기 전에 말하고 싶은 주제에 관한
+것을 아카이브에서 먼저 찾아보기를 강력히 권장한다. 이미 상세하게 토론된 많은
+것들이 메일링 리스트의 아카이브에 기록되어 있다.
+
+각각의 커널 서브시스템들의 대부분은 자신들의 개발에 관한 노력들로 이루어진
+분리된 메일링 리스트를 따로 가지고 있다. 다른 그룹들이 무슨 리스트를 가지고
+있는지는 MAINTAINERS 파일을 참조하라.
+
+많은 리스트들은 kernel.org에서 호스트되고 있다. 그 정보들은 다음에서 참조될 수 있다.
+ http://vger.kernel.org/vger-lists.html
+
+리스트들을 사용할 때는 올바른 예절을 따를 것을 유념해라.
+대단하진 않지만 다음 URL은 리스트(혹은 모든 리스트)와 대화하는 몇몇 간단한
+가이드라인을 가지고 있다.
+ http://www.albion.com/netiquette/
+
+여러 사람들이 여러분의 메일에 응답한다면 CC: 즉 수신 리스트는 꽤 커지게
+될 것이다. 아무 이유없이 CC에서 어떤 사람도 제거하거나 리스트 주소로만
+회신하지 마라. 메일을 보낸 사람으로서 하나를 받고 리스트로부터 또
+하나를 받아 두번 받는 것에 익숙하여 있으니 mail-header를 조작하려고 하지
+말아라. 사람들은 그런 것을 좋아하지 않을 것이다.
+
+여러분의 회신의 문맥을 원래대로 유지해야 한다. 여러분들의 회신의 윗부분에
+"John 커널해커는 작성했다...."를 유지하며 여러분들의 의견을 그 메일의 윗부분에
+작성하지 말고 각 인용한 단락들 사이에 넣어라.
+
+여러분들이 패치들을 메일에 넣는다면 그것들은 Documentation/SubmittingPatches에
+나와있는데로 명백히(plain) 읽을 수 있는 텍스트여야 한다. 커널 개발자들은
+첨부파일이나 압축된 패치들을 원하지 않는다. 그들은 여러분들의 패치의
+각 라인 단위로 코멘트를 하길 원하며 압축하거나 첨부하지 않고 보내는 것이
+그렇게 할 수 있는 유일한 방법이다. 여러분들이 사용하는 메일 프로그램이
+스페이스나 탭 문자들을 조작하지 않는지 확인하라. 가장 좋은 첫 테스트는
+메일을 자신에게 보내보고 스스로 그 패치를 적용해보라. 그것이 동작하지
+않는다면 여러분의 메일 프로그램을 고치던가 제대로 동작하는 프로그램으로
+바꾸어라.
+
+무엇보다도 메일링 리스트의 다른 구독자들에게 보여주려 한다는 것을 기억하라.
+
+
+커뮤니티와 협력하는 법
+--------------------
+
+커널 커뮤니티의 목적은 가능한한 가장 좋은 커널을 제공하는 것이다. 여러분이
+받아들여질 패치를 제출하게 되면 그 패치의 기술적인 이점으로 검토될 것이다.
+그럼 여러분들은 무엇을 기대하고 있어야 하는가?
+ - 비판
+ - 의견
+ - 변경을 위한 요구
+ - 당위성을 위한 요구
+ - 침묵
+
+기억하라. 이것들은 여러분의 패치가 커널로 들어가기 위한 과정이다. 여러분의
+패치들은 비판과 다른 의견을 받을 수 있고 그것들을 기술적인 레벨로 평가하고
+재작업하거나 또는 왜 수정하면 안되는지에 관하여 명료하고 간결한 이유를
+말할 수 있어야 한다. 여러분이 제출한 것에 어떤 응답도 있지 않다면 몇 일을
+기다려보고 다시 시도해라. 때론 너무 많은 메일들 속에 묻혀버리기도 한다.
+
+여러분은 무엇을 해서는 안되는가?
+ - 여러분의 패치가 아무 질문 없이 받아들여지기를 기대하는 것
+ - 방어적이 되는 것
+ - 의견을 무시하는 것
+ - 요청된 변경을 하지 않고 패치를 다시 제출하는 것
+
+가능한한 가장 좋은 기술적인 해답을 찾고 있는 커뮤니티에서는 항상
+어떤 패치가 얼마나 좋은지에 관하여 다른 의견들이 있을 수 있다. 여러분은
+협조적이어야 하고 기꺼이 여러분의 생각을 커널 내에 맞추어야 한다. 아니면
+적어도 여러분의 것이 가치있다는 것을 증명하여야 한다. 잘못된 것도 여러분이
+올바른 방향의 해결책으로 이끌어갈 의지가 있다면 받아들여질 것이라는 점을
+기억하라.
+
+여러분의 첫 패치에 여러분이 수정해야하는 십여개 정도의 회신이 오는
+경우도 흔하다. 이것은 여러분의 패치가 받아들여지지 않을 것이라는 것을
+의미하는 것이 아니고 개인적으로 여러분에게 감정이 있어서 그러는 것도
+아니다. 간단히 여러분의 패치에 제기된 문제들을 수정하고 그것을 다시
+보내라.
+
+
+커널 커뮤니티와 기업 조직간의 차이점
+-----------------------------------------------------------------
+커널 커뮤니티는 가장 전통적인 회사의 개발 환경과는 다르다. 여기에 여러분들의
+문제를 피하기 위한 목록이 있다.
+ 여러분들이 제안한 변경들에 관하여 말할 때 좋은 것들 :
+ - "이것은 여러 문제들을 해결합니다."
+ - "이것은 2000 라인의 코드를 줄입니다."
+ - "이것은 내가 말하려는 것에 관해 설명하는 패치입니다."
+ - "나는 5개의 다른 아키텍쳐에서 그것을 테스트 했으므로..."
+ - "여기에 일련의 작은 패치들이 있으므로..."
+ - "이것은 일반적인 머신에서 성능을 향상함으로..."
+
+ 여러분들이 말할 때 피해야 할 좋지 않은 것들 :
+ - "우리는 그것을 AIX/ptx/Solaris에서 이러한 방법으로 했다. 그러므로 그것은 좋은 것임에 틀림없다..."
+ - "나는 20년동안 이것을 해왔다. 그러므로..."
+ - "이것은 돈을 벌기위해 나의 회사가 필요로 하는 것이다."
+ - "이것은 우리의 엔터프라이즈 상품 라인을 위한 것이다."
+ - "여기에 나의 생각을 말하고 있는 1000 페이지 설계 문서가 있다."
+ - "나는 6달동안 이것을 했으니..."
+ - "여기에 5000 라인 짜리 패치가 있으니..."
+ - "나는 현재 뒤죽박죽인 것을 재작성했다. 그리고 여기에..."
+ - "나는 마감시한을 가지고 있으므로 이 패치는 지금 적용될 필요가 있다."
+
+커널 커뮤니티가 전통적인 소프트웨어 엔지니어링 개발 환경들과
+또 다른 점은 얼굴을 보지 않고 일한다는 점이다. 이메일과 irc를 대화의
+주요수단으로 사용하는 것의 한가지 장점은 성별이나 인종의 차별이
+없다는 것이다. 리눅스 커널의 작업 환경에서는 단지 이메일 주소만
+알수 있기 때문에 여성과 소수 민족들도 모두 받아들여진다. 국제적으로
+일하게 되는 측면은 사람의 이름에 근거하여 성별을 추측할 수 없게
+하기때문에 차별을 없애는 데 도움을 준다. Andrea라는 이름을 가진 남자와
+Pat이라는 이름을 가진 여자가 있을 수도 있는 것이다. 리눅스 커널에서
+작업하며 생각을 표현해왔던 대부분의 여성들은 긍정적인 경험을 가지고
+있다.
+
+언어 장벽은 영어에 익숙하지 않은 몇몇 사람들에게 문제가 될 수도 있다.
+언어의 훌륭한 구사는 메일링 리스트에서 올바르게 자신의 생각을
+표현하기 위하여 필요하다. 그래서 여러분은 이메일을 보내기 전에
+영어를 올바르게 사용하고 있는지를 체크하는 것이 바람직하다.
+
+
+여러분의 변경을 나누어라
+------------------------
+
+리눅스 커널 커뮤니티는 한꺼번에 굉장히 큰 코드의 묶음(chunk)을 쉽게
+받아들이지 않는다. 변경은 적절하게 소개되고, 검토되고, 각각의
+부분으로 작게 나누어져야 한다. 이것은 회사에서 하는 것과는 정확히
+반대되는 것이다. 여러분들의 제안은 개발 초기에 일찍이 소개되야 한다.
+그래서 여러분들은 자신이 하고 있는 것에 관하여 피드백을 받을 수 있게
+된다. 커뮤니티가 여러분들이 커뮤니티와 함께 일하고 있다는 것을
+느끼도록 만들고 커뮤니티가 여러분의 기능을 위한 쓰레기 장으로써
+사용되지 않고 있다는 것을 느끼게 하자. 그러나 메일링 리스트에 한번에
+50개의 이메일을 보내지는 말아라. 여러분들의 일련의 패치들은 항상
+더 작아야 한다.
+
+패치를 나누는 이유는 다음과 같다.
+
+1) 작은 패치들은 여러분의 패치들이 적용될 수 있는 확률을 높여준다.
+ 왜냐하면 다른 사람들은 정확성을 검증하기 위하여 많은 시간과 노력을
+ 들이기를 원하지 않는다. 5줄의 패치는 메인테이너가 거의 몇 초간 힐끗
+ 보면 적용될 수 있다. 그러나 500 줄의 패치는 정확성을 검토하기 위하여
+ 몇시간이 걸릴 수도 있다(걸리는 시간은 패치의 크기 혹은 다른 것에
+ 비례하여 기하급수적으로 늘어난다).
+
+ 패치를 작게 만드는 것은 무엇인가 잘못되었을 때 디버그하는 것을
+ 쉽게 만든다. 즉, 그렇게 만드는 것은 매우 큰 패치를 적용한 후에
+ 조사하는 것 보다 작은 패치를 적용한 후에 (그리고 몇몇의 것이
+ 깨졌을 때) 하나씩 패치들을 제거해가며 디버그 하기 쉽도록 만들어 준다.
+
+2) 작은 패치들을 보내는 것뿐만 아니라 패치들을 제출하기전에 재작성하고
+ 간단하게(혹은 간단한게 재배치하여) 하는 것도 중요하다.
+
+여기에 커널 개발자 Al Viro의 이야기가 있다.
+ "학생의 수학 숙제를 채점하는 선생님을 생각해보라. 선생님은 학생들이
+ 답을 얻을때까지 겪은 시행착오를 보길 원하지 않는다. 선생님들은
+ 간결하고 가장 뛰어난 답을 보길 원한다. 훌륭한 학생은 이것을 알고
+ 마지막으로 답을 얻기 전 중간 과정들을 제출하진 않는다.
+
+ 커널 개발도 마찬가지이다. 메인테이너들과 검토하는 사람들은 문제를
+ 풀어나가는 과정속에 숨겨진 과정을 보길 원하진 않는다. 그들은
+ 간결하고 멋진 답을 보길 원한다."
+
+커뮤니티와 협력하며 뛰어난 답을 찾는 것과 여러분들의 끝마치지 못한 작업들
+사이에 균형을 유지해야 하는 것은 어려울지도 모른다. 그러므로 프로세스의
+초반에 여러분의 작업을 향상시키기위한 피드백을 얻는 것 뿐만 아니라
+여러분들의 변경들을 작은 묶음으로 유지해서 심지어는 여러분의 작업의
+모든 부분이 지금은 포함될 준비가 되어있지 않지만 작은 부분은 벌써
+받아들여질 수 있도록 유지하는 것이 바람직하다.
+
+또한 완성되지 않았고 "나중에 수정될 것이다." 와 같은 것들을 포함하는
+패치들은 받아들여지지 않을 것이라는 점을 유념하라.
+
+
+변경을 정당화해라
+-----------------
+
+여러분들의 나누어진 패치들을 리눅스 커뮤니티가 왜 반영해야 하는지를
+알도록 하는 것은 매우 중요하다. 새로운 기능들이 필요하고 유용하다는
+것은 반드시 그에 합당한 이유가 있어야 한다.
+
+
+변경을 문서화해라
+-----------------
+
+여러분이 패치를 보내려 할때는 여러분이 무엇을 말하려고 하는지를 충분히
+생각하여 이메일을 작성해야 한다. 이 정보는 패치를 위한 ChangeLog가 될
+것이다. 그리고 항상 그 내용을 보길 원하는 모든 사람들을 위해 보존될
+것이다. 패치는 완벽하게 다음과 같은 내용들을 포함하여 설명해야 한다.
+ - 변경이 왜 필요한지
+ - 패치에 관한 전체 설계 접근(approach)
+ - 구현 상세들
+ - 테스트 결과들
+
+이것이 무엇인지 더 자세한 것을 알고 싶다면 다음 문서의 ChageLog 항을 봐라.
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+
+
+
+
+이 모든 것을 하는 것은 매우 어려운 일이다. 완벽히 소화하는 데는 적어도 몇년이
+걸릴 수도 있다. 많은 인내와 결심이 필요한 계속되는 개선의 과정이다. 그러나
+가능한한 포기하지 말라. 많은 사람들은 이전부터 해왔던 것이고 그 사람들도
+정확하게 여러분들이 지금 서 있는 그 곳부터 시작했었다.
+
+
+
+
+----------
+"개발 프로세스"(http://lwn.net/Articles/94386/) 섹션을
+작성하는데 있어 참고할 문서를 사용하도록 허락해준 Paolo Ciarrocchi에게
+감사한다. 여러분들이 말해야 할 것과 말해서는 안되는 것의 목록 중 일부를 제공해준
+Randy Dunlap과 Gerrit Huizenga에게 감사한다. 또한 검토와 의견 그리고
+공헌을 아끼지 않은 Pat Mochel, Hanna Linder, Randy Dunlap, Kay Sievers,
+Vojtech Pavlik, Jan Kara, Josh Boyer, Kees Cook, Andrew Morton, Andi Kleen,
+Vadim Lobanov, Jesper Juhl, Adrian Bunk, Keri Harris, Frans Pop,
+David A. Wheeler, Junio Hamano, Michael Kerrisk, and Alex Shepard에게도 감사를 전한다.
+그들의 도움이 없었다면 이 문서는 존재하지 않았을 것이다.
+
+
+
+메인테이너: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/ko_KR/stable_api_nonsense.txt b/Documentation/ko_KR/stable_api_nonsense.txt
new file mode 100644
index 000000000..51f85ade4
--- /dev/null
+++ b/Documentation/ko_KR/stable_api_nonsense.txt
@@ -0,0 +1,195 @@
+NOTE:
+This is a version of Documentation/stable_api_nonsense.txt translated
+into korean
+This document is maintained by barrios <minchan.kim@gmail.com>
+If you find any difference between this document and the original file or
+a problem with the translation, please contact the maintainer of this file.
+
+Please also note that the purpose of this file is to be easier to
+read for non English (read: korean) speakers and is not intended as
+a fork. So if you have any comments or updates for this file please
+try to update the original English file first.
+
+==================================
+이 문서는
+Documentation/stable_api_nonsense.txt
+의 한글 번역입니다.
+
+역자: 김민찬 <minchan.kim@gmail.com>
+감수: 이제이미 <jamee.lee@samsung.com>
+==================================
+
+리눅스 커널 드라이버 인터페이스
+(여러분들의 모든 질문에 대한 답 그리고 다른 몇가지)
+
+Greg Kroah-Hartman <greg@kroah.com>
+
+이 문서는 리눅스가 왜 바이너리 커널 인터페이스를 갖지 않는지, 왜 변하지
+않는(stable) 커널 인터페이스를 갖지 않는지를 설명하기 위해 쓰여졌다.
+이 문서는 커널과 유저공간 사이의 인터페이스가 아니라 커널 내부의
+인터페이스들을 설명하고 있다는 것을 유념하라. 커널과 유저공간 사이의
+인터페이스는 응용프로그램이 사용하는 syscall 인터페이스이다. 그 인터페이스는
+오랫동안 거의 변하지 않았고 앞으로도 변하지 않을 것이다. 나는 pre 0.9에서
+만들어졌지만 최신의 2.6 커널 배포에서도 잘 동작하는 프로그램을 가지고
+있다. 이 인터페이스는 사용자와 응용프로그램 개발자들이 변하지 않을 것이라고
+여길수 있는 것이다.
+
+
+초록
+----
+여러분은 변하지 않는 커널 인터페이스를 원한다고 생각하지만 실제로는
+그렇지 않으며 심지어는 그것을 알아채지 못한다. 여러분이 원하는 것은
+안정되게 실행되는 드라이버이며 드라이버가 메인 커널 트리에 있을 때
+그런 안정적인 드라이버를 얻을 수 있게 된다. 또한 여러분의 드라이버가
+메인 커널 트리에 있다면 다른 많은 좋은 이점들을 얻게 된다. 그러한 것들이
+리눅스를 강건하고, 안정적이며, 성숙한 운영체제로 만들어 놓음으로써
+여러분들로 하여금 바로 리눅스를 사용하게 만드는 이유이다.
+
+
+소개
+----
+
+커널 내부의 인터페이스가 바뀌는 것을 걱정하며 커널 드라이버를 작성하고
+싶어하는 사람은 정말 이상한 사람이다. 세상의 대다수의 사람들은 이 인터페이스를
+보지못할 것이며 전혀 걱정하지도 않는다.
+
+먼저, 나는 closed 소스, hidden 소스, binary blobs, 소스 wrappers, 또는 GPL로
+배포되었지만 소스 코드를 갖고 있지 않은 커널 드라이버들을 설명하는 어떤 다른
+용어들에 관한 어떤 법적인 문제에 관해서는 언급하지 않을 것이다. 어떤 법적인
+질문들을 가지고 있다면 변호사와 연락하라. 나는 프로그래머이므로 여기서 기술적인
+문제들만을 설명하려고 한다. (법적인 문제를 경시하는 것은 아니다. 그런 문제들은
+엄연히 현실에 있고 여러분들은 항상 그 문제들을 인식하고 있을 필요는 있다.)
+
+자, 두가지의 주요 주제가 있다. 바이너리 커널 인터페이스들과 변하지 않는
+커널 소스 인터페이들. 그것들은 서로 의존성을 가지고 있지만 바이너리
+문제를 먼저 풀고 넘어갈 것이다.
+
+
+
+바이너리 커널 인터페이스
+------------------------
+우리가 변하지 않는 커널 소스 인터페이스를 가지고 있다고 가정하자. 그러면
+바이너리 인터페이스 또한 자연적으로 변하지 않을까? 틀렸다. 리눅스 커널에
+관한 다음 사실들을 생각해보라.
+ - 여러분들이 사용하는 C 컴파일러의 버젼에 따라 다른 커널 자료 구조들은
+ 다른 alignmnet들을 갖게 될것이고 다른 방법으로(함수들을 inline으로
+ 했느냐, 아니냐) 다른 함수들을 포함하는 것도 가능한다. 중요한 것은
+ 개별적인 함수 구성이 아니라 자료 구조 패딩이 달라진다는 점이다.
+ - 여러분이 선택한 커널 빌드 옵션에 따라서 커널은 다양한 것들을 가정할
+ 수 있다.
+ - 다른 구조체들은 다른 필드들을 포함할 수 있다.
+ - 몇몇 함수들은 전혀 구현되지 않을 수도 있다(즉, 몇몇 lock들은
+ non-SMP 빌드에서는 사라져 버릴수도 있다).
+ - 커널내에 메모리는 build optoin들에 따라 다른 방법으로 align될수
+ 있다.
+ - 리눅스는 많은 다양한 프로세서 아키텍쳐에서 실행된다. 한 아키텍쳐의
+ 바이너리 드라이버를 다른 아키텍쳐에서 정상적으로 실행시킬 방법은
+ 없다.
+
+커널을 빌드했던 C 컴파일러와 정확하게 같은 것을 사용하고 정확하게 같은
+커널 구성(configuration)을 사용하여 여러분들의 모듈을 빌드하면 간단히
+많은 문제들을 해결할 수 있다. 이렇게 하는 것은 여러분들이 하나의 리눅스
+배포판의 하나의 배포 버젼을 위한 모듈만을 제공한다면 별일 아닐 것이다.
+그러나 각기 다른 리눅스 배포판마다 한번씩 빌드하는 수를 각 리눅스 배포판마다
+제공하는 다른 릴리즈의 수와 곱하게 되면 이번에는 각 릴리즈들의 다른 빌드
+옵션의 악몽과 마주하게 것이다. 또한 각 리눅스 배포판들은 다른 하드웨어
+종류에(다른 프로세서 타입과 다른 옵션들) 맞춰져 있는 많은 다른 커널들을
+배포한다. 그러므로 한번의 배포에서조차 여러분들의 모듈은 여러 버젼을
+만들 필요가 있다.
+
+나를 믿어라. 여러분들은 이러한 종류의 배포를 지원하려고 시도한다면 시간이
+지나면 미칠지경이 될 것이다. 난 이러한 것을 오래전에 아주 어렵게 배웠다...
+
+
+
+변하지않는 커널 소스 인터페이스들
+---------------------------------
+
+리눅스 커널 드라이버를 계속해서 메인 커널 트리에 반영하지 않고
+유지보수하려고 하는 사람들과 이 문제를 논의하게 되면 훨씬 더
+"논란의 여지가 많은" 주제가 될 것이다.
+
+리눅스 커널 개발은 끊임없이 빠른 속도로 이루어지고 있으며 결코
+느슨해진 적이 없다. 커널 개발자들이 현재 인터페이스들에서 버그를
+발견하거나 무엇인가 할 수 있는 더 좋은 방법을 찾게 되었다고 하자.
+그들이 발견한 것을 실행한다면 아마도 더 잘 동작하도록 현재 인터페이스들을
+수정하게 될 것이다. 그들이 그런 일을 하게되면 함수 이름들은 변하게 되고,
+구조체들은 늘어나거나 줄어들게 되고, 함수 파라미터들은 재작업될 것이다.
+이러한 일이 발생되면 커널 내에 이 인터페이스를 사용했던 인스턴스들이 동시에
+수정될 것이며 이러한 과정은 모든 것이 계속해서 올바르게 동작할 것이라는
+것을 보장한다.
+
+이러한 것의 한 예로써, 커널 내부의 USB 인터페이스들은 이 서브시스템이
+생긴 이후로 적어도 3번의 다른 재작업을 겪었다. 이 재작업들은 많은 다른
+문제들을 풀었다.
+ - 데이터 스트림들의 동기적인 모델에서 비동기적인 모델로의 변화. 이것은
+ 많은 드라이버들의 복잡성을 줄이고 처리량을 향상시켜 현재는 거의 모든
+ USB 장치들의 거의 최대 속도로 실행되고 있다.
+ - USB 드라이버가 USB 코어로부터 데이터 패킷들을 할당받로록 한 변경으로
+ 인해서 지금의 모든 드라이버들은 많은 문서화된 데드락을 수정하기 위하여
+ USB 코어에게 더 많은 정보를 제공해야만 한다.
+
+이것은 오랫동안 자신의 오래된 USB 인터페이스들을 유지해야 하는 closed 운영체제들과는
+완전히 반대되는 것이다. closed된 운영체제들은 새로운 개발자들에게 우연히 낡은
+인터페이스를 사용하게 할 기회를 주게되며, 적절하지 못한 방법으로 처리하게 되어
+운영체제의 안정성을 해치는 문제를 야기하게 된다.
+
+이 두가지의 예들 모두, 모든 개발자들은 꼭 이루어져야 하는 중요한 변화들이라고
+동의를 하였고 비교적 적은 고통으로 변경되어졌다. 리눅스가 변하지 않는 소스
+인터페이스를 고집한다면, 새로운 인터페이스가 만들어지게 되며 반면 기존의 오래된
+것들, 그리고 깨진 것들은 계속해서 유지되어야 하며 이러한 일들은 USB 개발자들에게
+또 다른 일거리를 주게 된다. 모든 리눅스 USB 개발자들에게 자신의 그들의 업무를
+마친 후 시간을 투자하여 아무 득도 없는 무료 봉사를 해달라고 하는 것은 가능성이
+희박한 일이다.
+
+보안 문제 역시 리눅스에게는 매우 중요하다. 보안 문제가 발견되면 그것은
+매우 짧은 시간 안에 수정된다. 보안 문제는 그 문제를 해결하기 위하여
+여러번 내부 커널 인터페이스들을 재작업하게 만들었다. 이러한 문제가
+발생하였을 때 그 인터페이스들을 사용하는 모든 드라이버들도 동시에
+수정되어 보안 문제가 앞으로 갑작스럽게 생기지는 않을 것이라는 것을
+보장한다. 내부 인터페이스들의 변경이 허락되지 않으면 이러한 종류의 보안
+문제를 수정하고 그것이 다시 발생하지 않을 것이라고 보장하는 것은 가능하지
+않을 것이다.
+
+커널 인터페이스들은 계속해서 정리되고 있다. 현재 인터페이스를 사용하는
+사람이 한명도 없다면 그것은 삭제된다. 이것은 커널이 가능한한 가장 작게
+유지되며 존재하는 모든 가능성이 있는 인터페이스들이 테스트된다는 것을
+보장한다(사용되지 않는 인터페이스들은 유효성 검증을 하기가 거의 불가능하다).
+
+
+무엇을 해야 하나
+---------------
+자, 여러분이 메인 커널 트리에 있지 않은 리눅스 커널 드라이버를 가지고
+있다면 여러분은 즉, 개발자는 무엇을 해야 하나? 모든 배포판마다 다른
+커널 버젼을 위한 바이너리 드라이버를 배포하는 것은 악몽이며 계속해서
+변하고 있는 커널 인터페이스들의 맞처 유지보수하려고 시도하는 것은 힘든
+일이다.
+
+간단하다. 여러분의 커널 드라이버를 메인 커널 트리에 반영하라(우리는 여기서
+GPL을 따르는 배포 드라이버에 관해 얘기하고 있다는 것을 상기하라. 여러분의
+코드가 이러한 분류에 해당되지 않는다면 행운을 빈다. 여러분 스스로 어떻게든
+해야만 한다). 여러분의 드라이버가 트리에 있게되면 커널 인터페이스가
+변경되더라도 가장 먼저 커널에 변경을 가했던 사람에 의해서 수정될 것이다.
+이것은 여러분의 드라이버가 여러분의 별다른 노력없이 항상 빌드가 가능하며
+동작하는 것을 보장한다.
+
+메인 커널 트리에 여러분의 드라이버를 반영하면 얻게 되는 장점들은 다음과 같다.
+ - 관리에 드는 비용(원래 개발자의)은 줄어줄면서 드라이버의 질은 향상될 것이다.
+ - 다른 개발자들이 여러분의 드라이버에 기능들을 추가 할 것이다.
+ - 다른 사람들은 여러분의 드라이버에 버그를 발견하고 수정할 것이다.
+ - 다른 사람들은 여러분의 드라이버의 개선점을 찾을 줄 것이다.
+ - 외부 인터페이스 변경으로 인해 여러분의 드라이버의 수정이 필요하다면 다른
+ 사람들이 드라이버를 업데이트할 것이다.
+ - 여러분의 드라이버는 별다른 노력 없이 모든 리눅스 배포판에 자동적으로
+ 추가될 것이다.
+
+리눅스는 다른 운영 체제보다 "쉽게 쓸수 있는(out of the box)" 많은 다른 장치들을
+지원하고 어떤 다른 운영 체제보다 다양한 아키텍쳐위에서 이러한 장치들을 지원하기 때문에
+이러한 증명된 개발 모델은 틀림없이 바로 가고 있는 것이다.
+
+
+
+------
+
+이 문서의 초안을 검토해주고 코멘트 해준 Randy Dunlap, Andrew Morton, David Brownell,
+Hanna Linder, Robert Love, 그리고 Nishanth Aravamudan에게 감사한다.
diff --git a/Documentation/kobject.txt b/Documentation/kobject.txt
new file mode 100644
index 000000000..1be59a3a5
--- /dev/null
+++ b/Documentation/kobject.txt
@@ -0,0 +1,415 @@
+Everything you never wanted to know about kobjects, ksets, and ktypes
+
+Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+Based on an original article by Jon Corbet for lwn.net written October 1,
+2003 and located at http://lwn.net/Articles/51437/
+
+Last updated December 19, 2007
+
+
+Part of the difficulty in understanding the driver model - and the kobject
+abstraction upon which it is built - is that there is no obvious starting
+place. Dealing with kobjects requires understanding a few different types,
+all of which make reference to each other. In an attempt to make things
+easier, we'll take a multi-pass approach, starting with vague terms and
+adding detail as we go. To that end, here are some quick definitions of
+some terms we will be working with.
+
+ - A kobject is an object of type struct kobject. Kobjects have a name
+ and a reference count. A kobject also has a parent pointer (allowing
+ objects to be arranged into hierarchies), a specific type, and,
+ usually, a representation in the sysfs virtual filesystem.
+
+ Kobjects are generally not interesting on their own; instead, they are
+ usually embedded within some other structure which contains the stuff
+ the code is really interested in.
+
+ No structure should EVER have more than one kobject embedded within it.
+ If it does, the reference counting for the object is sure to be messed
+ up and incorrect, and your code will be buggy. So do not do this.
+
+ - A ktype is the type of object that embeds a kobject. Every structure
+ that embeds a kobject needs a corresponding ktype. The ktype controls
+ what happens to the kobject when it is created and destroyed.
+
+ - A kset is a group of kobjects. These kobjects can be of the same ktype
+ or belong to different ktypes. The kset is the basic container type for
+ collections of kobjects. Ksets contain their own kobjects, but you can
+ safely ignore that implementation detail as the kset core code handles
+ this kobject automatically.
+
+ When you see a sysfs directory full of other directories, generally each
+ of those directories corresponds to a kobject in the same kset.
+
+We'll look at how to create and manipulate all of these types. A bottom-up
+approach will be taken, so we'll go back to kobjects.
+
+
+Embedding kobjects
+
+It is rare for kernel code to create a standalone kobject, with one major
+exception explained below. Instead, kobjects are used to control access to
+a larger, domain-specific object. To this end, kobjects will be found
+embedded in other structures. If you are used to thinking of things in
+object-oriented terms, kobjects can be seen as a top-level, abstract class
+from which other classes are derived. A kobject implements a set of
+capabilities which are not particularly useful by themselves, but which are
+nice to have in other objects. The C language does not allow for the
+direct expression of inheritance, so other techniques - such as structure
+embedding - must be used.
+
+(As an aside, for those familiar with the kernel linked list implementation,
+this is analogous as to how "list_head" structs are rarely useful on
+their own, but are invariably found embedded in the larger objects of
+interest.)
+
+So, for example, the UIO code in drivers/uio/uio.c has a structure that
+defines the memory region associated with a uio device:
+
+ struct uio_map {
+ struct kobject kobj;
+ struct uio_mem *mem;
+ };
+
+If you have a struct uio_map structure, finding its embedded kobject is
+just a matter of using the kobj member. Code that works with kobjects will
+often have the opposite problem, however: given a struct kobject pointer,
+what is the pointer to the containing structure? You must avoid tricks
+(such as assuming that the kobject is at the beginning of the structure)
+and, instead, use the container_of() macro, found in <linux/kernel.h>:
+
+ container_of(pointer, type, member)
+
+where:
+
+ * "pointer" is the pointer to the embedded kobject,
+ * "type" is the type of the containing structure, and
+ * "member" is the name of the structure field to which "pointer" points.
+
+The return value from container_of() is a pointer to the corresponding
+container type. So, for example, a pointer "kp" to a struct kobject
+embedded *within* a struct uio_map could be converted to a pointer to the
+*containing* uio_map structure with:
+
+ struct uio_map *u_map = container_of(kp, struct uio_map, kobj);
+
+For convenience, programmers often define a simple macro for "back-casting"
+kobject pointers to the containing type. Exactly this happens in the
+earlier drivers/uio/uio.c, as you can see here:
+
+ struct uio_map {
+ struct kobject kobj;
+ struct uio_mem *mem;
+ };
+
+ #define to_map(map) container_of(map, struct uio_map, kobj)
+
+where the macro argument "map" is a pointer to the struct kobject in
+question. That macro is subsequently invoked with:
+
+ struct uio_map *map = to_map(kobj);
+
+
+Initialization of kobjects
+
+Code which creates a kobject must, of course, initialize that object. Some
+of the internal fields are setup with a (mandatory) call to kobject_init():
+
+ void kobject_init(struct kobject *kobj, struct kobj_type *ktype);
+
+The ktype is required for a kobject to be created properly, as every kobject
+must have an associated kobj_type. After calling kobject_init(), to
+register the kobject with sysfs, the function kobject_add() must be called:
+
+ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...);
+
+This sets up the parent of the kobject and the name for the kobject
+properly. If the kobject is to be associated with a specific kset,
+kobj->kset must be assigned before calling kobject_add(). If a kset is
+associated with a kobject, then the parent for the kobject can be set to
+NULL in the call to kobject_add() and then the kobject's parent will be the
+kset itself.
+
+As the name of the kobject is set when it is added to the kernel, the name
+of the kobject should never be manipulated directly. If you must change
+the name of the kobject, call kobject_rename():
+
+ int kobject_rename(struct kobject *kobj, const char *new_name);
+
+kobject_rename does not perform any locking or have a solid notion of
+what names are valid so the caller must provide their own sanity checking
+and serialization.
+
+There is a function called kobject_set_name() but that is legacy cruft and
+is being removed. If your code needs to call this function, it is
+incorrect and needs to be fixed.
+
+To properly access the name of the kobject, use the function
+kobject_name():
+
+ const char *kobject_name(const struct kobject * kobj);
+
+There is a helper function to both initialize and add the kobject to the
+kernel at the same time, called surprisingly enough kobject_init_and_add():
+
+ int kobject_init_and_add(struct kobject *kobj, struct kobj_type *ktype,
+ struct kobject *parent, const char *fmt, ...);
+
+The arguments are the same as the individual kobject_init() and
+kobject_add() functions described above.
+
+
+Uevents
+
+After a kobject has been registered with the kobject core, you need to
+announce to the world that it has been created. This can be done with a
+call to kobject_uevent():
+
+ int kobject_uevent(struct kobject *kobj, enum kobject_action action);
+
+Use the KOBJ_ADD action for when the kobject is first added to the kernel.
+This should be done only after any attributes or children of the kobject
+have been initialized properly, as userspace will instantly start to look
+for them when this call happens.
+
+When the kobject is removed from the kernel (details on how to do that are
+below), the uevent for KOBJ_REMOVE will be automatically created by the
+kobject core, so the caller does not have to worry about doing that by
+hand.
+
+
+Reference counts
+
+One of the key functions of a kobject is to serve as a reference counter
+for the object in which it is embedded. As long as references to the object
+exist, the object (and the code which supports it) must continue to exist.
+The low-level functions for manipulating a kobject's reference counts are:
+
+ struct kobject *kobject_get(struct kobject *kobj);
+ void kobject_put(struct kobject *kobj);
+
+A successful call to kobject_get() will increment the kobject's reference
+counter and return the pointer to the kobject.
+
+When a reference is released, the call to kobject_put() will decrement the
+reference count and, possibly, free the object. Note that kobject_init()
+sets the reference count to one, so the code which sets up the kobject will
+need to do a kobject_put() eventually to release that reference.
+
+Because kobjects are dynamic, they must not be declared statically or on
+the stack, but instead, always allocated dynamically. Future versions of
+the kernel will contain a run-time check for kobjects that are created
+statically and will warn the developer of this improper usage.
+
+If all that you want to use a kobject for is to provide a reference counter
+for your structure, please use the struct kref instead; a kobject would be
+overkill. For more information on how to use struct kref, please see the
+file Documentation/kref.txt in the Linux kernel source tree.
+
+
+Creating "simple" kobjects
+
+Sometimes all that a developer wants is a way to create a simple directory
+in the sysfs hierarchy, and not have to mess with the whole complication of
+ksets, show and store functions, and other details. This is the one
+exception where a single kobject should be created. To create such an
+entry, use the function:
+
+ struct kobject *kobject_create_and_add(char *name, struct kobject *parent);
+
+This function will create a kobject and place it in sysfs in the location
+underneath the specified parent kobject. To create simple attributes
+associated with this kobject, use:
+
+ int sysfs_create_file(struct kobject *kobj, struct attribute *attr);
+or
+ int sysfs_create_group(struct kobject *kobj, struct attribute_group *grp);
+
+Both types of attributes used here, with a kobject that has been created
+with the kobject_create_and_add(), can be of type kobj_attribute, so no
+special custom attribute is needed to be created.
+
+See the example module, samples/kobject/kobject-example.c for an
+implementation of a simple kobject and attributes.
+
+
+
+ktypes and release methods
+
+One important thing still missing from the discussion is what happens to a
+kobject when its reference count reaches zero. The code which created the
+kobject generally does not know when that will happen; if it did, there
+would be little point in using a kobject in the first place. Even
+predictable object lifecycles become more complicated when sysfs is brought
+in as other portions of the kernel can get a reference on any kobject that
+is registered in the system.
+
+The end result is that a structure protected by a kobject cannot be freed
+before its reference count goes to zero. The reference count is not under
+the direct control of the code which created the kobject. So that code must
+be notified asynchronously whenever the last reference to one of its
+kobjects goes away.
+
+Once you registered your kobject via kobject_add(), you must never use
+kfree() to free it directly. The only safe way is to use kobject_put(). It
+is good practice to always use kobject_put() after kobject_init() to avoid
+errors creeping in.
+
+This notification is done through a kobject's release() method. Usually
+such a method has a form like:
+
+ void my_object_release(struct kobject *kobj)
+ {
+ struct my_object *mine = container_of(kobj, struct my_object, kobj);
+
+ /* Perform any additional cleanup on this object, then... */
+ kfree(mine);
+ }
+
+One important point cannot be overstated: every kobject must have a
+release() method, and the kobject must persist (in a consistent state)
+until that method is called. If these constraints are not met, the code is
+flawed. Note that the kernel will warn you if you forget to provide a
+release() method. Do not try to get rid of this warning by providing an
+"empty" release function; you will be mocked mercilessly by the kobject
+maintainer if you attempt this.
+
+Note, the name of the kobject is available in the release function, but it
+must NOT be changed within this callback. Otherwise there will be a memory
+leak in the kobject core, which makes people unhappy.
+
+Interestingly, the release() method is not stored in the kobject itself;
+instead, it is associated with the ktype. So let us introduce struct
+kobj_type:
+
+ struct kobj_type {
+ void (*release)(struct kobject *kobj);
+ const struct sysfs_ops *sysfs_ops;
+ struct attribute **default_attrs;
+ const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+ const void *(*namespace)(struct kobject *kobj);
+ };
+
+This structure is used to describe a particular type of kobject (or, more
+correctly, of containing object). Every kobject needs to have an associated
+kobj_type structure; a pointer to that structure must be specified when you
+call kobject_init() or kobject_init_and_add().
+
+The release field in struct kobj_type is, of course, a pointer to the
+release() method for this type of kobject. The other two fields (sysfs_ops
+and default_attrs) control how objects of this type are represented in
+sysfs; they are beyond the scope of this document.
+
+The default_attrs pointer is a list of default attributes that will be
+automatically created for any kobject that is registered with this ktype.
+
+
+ksets
+
+A kset is merely a collection of kobjects that want to be associated with
+each other. There is no restriction that they be of the same ktype, but be
+very careful if they are not.
+
+A kset serves these functions:
+
+ - It serves as a bag containing a group of objects. A kset can be used by
+ the kernel to track "all block devices" or "all PCI device drivers."
+
+ - A kset is also a subdirectory in sysfs, where the associated kobjects
+ with the kset can show up. Every kset contains a kobject which can be
+ set up to be the parent of other kobjects; the top-level directories of
+ the sysfs hierarchy are constructed in this way.
+
+ - Ksets can support the "hotplugging" of kobjects and influence how
+ uevent events are reported to user space.
+
+In object-oriented terms, "kset" is the top-level container class; ksets
+contain their own kobject, but that kobject is managed by the kset code and
+should not be manipulated by any other user.
+
+A kset keeps its children in a standard kernel linked list. Kobjects point
+back to their containing kset via their kset field. In almost all cases,
+the kobjects belonging to a kset have that kset (or, strictly, its embedded
+kobject) in their parent.
+
+As a kset contains a kobject within it, it should always be dynamically
+created and never declared statically or on the stack. To create a new
+kset use:
+ struct kset *kset_create_and_add(const char *name,
+ struct kset_uevent_ops *u,
+ struct kobject *parent);
+
+When you are finished with the kset, call:
+ void kset_unregister(struct kset *kset);
+to destroy it. This removes the kset from sysfs and decrements its reference
+count. When the reference count goes to zero, the kset will be released.
+Because other references to the kset may still exist, the release may happen
+after kset_unregister() returns.
+
+An example of using a kset can be seen in the
+samples/kobject/kset-example.c file in the kernel tree.
+
+If a kset wishes to control the uevent operations of the kobjects
+associated with it, it can use the struct kset_uevent_ops to handle it:
+
+struct kset_uevent_ops {
+ int (*filter)(struct kset *kset, struct kobject *kobj);
+ const char *(*name)(struct kset *kset, struct kobject *kobj);
+ int (*uevent)(struct kset *kset, struct kobject *kobj,
+ struct kobj_uevent_env *env);
+};
+
+
+The filter function allows a kset to prevent a uevent from being emitted to
+userspace for a specific kobject. If the function returns 0, the uevent
+will not be emitted.
+
+The name function will be called to override the default name of the kset
+that the uevent sends to userspace. By default, the name will be the same
+as the kset itself, but this function, if present, can override that name.
+
+The uevent function will be called when the uevent is about to be sent to
+userspace to allow more environment variables to be added to the uevent.
+
+One might ask how, exactly, a kobject is added to a kset, given that no
+functions which perform that function have been presented. The answer is
+that this task is handled by kobject_add(). When a kobject is passed to
+kobject_add(), its kset member should point to the kset to which the
+kobject will belong. kobject_add() will handle the rest.
+
+If the kobject belonging to a kset has no parent kobject set, it will be
+added to the kset's directory. Not all members of a kset do necessarily
+live in the kset directory. If an explicit parent kobject is assigned
+before the kobject is added, the kobject is registered with the kset, but
+added below the parent kobject.
+
+
+Kobject removal
+
+After a kobject has been registered with the kobject core successfully, it
+must be cleaned up when the code is finished with it. To do that, call
+kobject_put(). By doing this, the kobject core will automatically clean up
+all of the memory allocated by this kobject. If a KOBJ_ADD uevent has been
+sent for the object, a corresponding KOBJ_REMOVE uevent will be sent, and
+any other sysfs housekeeping will be handled for the caller properly.
+
+If you need to do a two-stage delete of the kobject (say you are not
+allowed to sleep when you need to destroy the object), then call
+kobject_del() which will unregister the kobject from sysfs. This makes the
+kobject "invisible", but it is not cleaned up, and the reference count of
+the object is still the same. At a later time call kobject_put() to finish
+the cleanup of the memory associated with the kobject.
+
+kobject_del() can be used to drop the reference to the parent object, if
+circular references are constructed. It is valid in some cases, that a
+parent objects references a child. Circular references _must_ be broken
+with an explicit call to kobject_del(), so that a release functions will be
+called, and the objects in the former circle release each other.
+
+
+Example code to copy from
+
+For a more complete example of using ksets and kobjects properly, see the
+example programs samples/kobject/{kobject-example.c,kset-example.c},
+which will be built as loadable modules if you select CONFIG_SAMPLE_KOBJECT.
diff --git a/Documentation/kprobes.txt b/Documentation/kprobes.txt
new file mode 100644
index 000000000..1f9b3e2b9
--- /dev/null
+++ b/Documentation/kprobes.txt
@@ -0,0 +1,731 @@
+Title : Kernel Probes (Kprobes)
+Authors : Jim Keniston <jkenisto@us.ibm.com>
+ : Prasanna S Panchamukhi <prasanna.panchamukhi@gmail.com>
+ : Masami Hiramatsu <mhiramat@redhat.com>
+
+CONTENTS
+
+1. Concepts: Kprobes, Jprobes, Return Probes
+2. Architectures Supported
+3. Configuring Kprobes
+4. API Reference
+5. Kprobes Features and Limitations
+6. Probe Overhead
+7. TODO
+8. Kprobes Example
+9. Jprobes Example
+10. Kretprobes Example
+Appendix A: The kprobes debugfs interface
+Appendix B: The kprobes sysctl interface
+
+1. Concepts: Kprobes, Jprobes, Return Probes
+
+Kprobes enables you to dynamically break into any kernel routine and
+collect debugging and performance information non-disruptively. You
+can trap at almost any kernel code address(*), specifying a handler
+routine to be invoked when the breakpoint is hit.
+(*: some parts of the kernel code can not be trapped, see 1.5 Blacklist)
+
+There are currently three types of probes: kprobes, jprobes, and
+kretprobes (also called return probes). A kprobe can be inserted
+on virtually any instruction in the kernel. A jprobe is inserted at
+the entry to a kernel function, and provides convenient access to the
+function's arguments. A return probe fires when a specified function
+returns.
+
+In the typical case, Kprobes-based instrumentation is packaged as
+a kernel module. The module's init function installs ("registers")
+one or more probes, and the exit function unregisters them. A
+registration function such as register_kprobe() specifies where
+the probe is to be inserted and what handler is to be called when
+the probe is hit.
+
+There are also register_/unregister_*probes() functions for batch
+registration/unregistration of a group of *probes. These functions
+can speed up unregistration process when you have to unregister
+a lot of probes at once.
+
+The next four subsections explain how the different types of
+probes work and how jump optimization works. They explain certain
+things that you'll need to know in order to make the best use of
+Kprobes -- e.g., the difference between a pre_handler and
+a post_handler, and how to use the maxactive and nmissed fields of
+a kretprobe. But if you're in a hurry to start using Kprobes, you
+can skip ahead to section 2.
+
+1.1 How Does a Kprobe Work?
+
+When a kprobe is registered, Kprobes makes a copy of the probed
+instruction and replaces the first byte(s) of the probed instruction
+with a breakpoint instruction (e.g., int3 on i386 and x86_64).
+
+When a CPU hits the breakpoint instruction, a trap occurs, the CPU's
+registers are saved, and control passes to Kprobes via the
+notifier_call_chain mechanism. Kprobes executes the "pre_handler"
+associated with the kprobe, passing the handler the addresses of the
+kprobe struct and the saved registers.
+
+Next, Kprobes single-steps its copy of the probed instruction.
+(It would be simpler to single-step the actual instruction in place,
+but then Kprobes would have to temporarily remove the breakpoint
+instruction. This would open a small time window when another CPU
+could sail right past the probepoint.)
+
+After the instruction is single-stepped, Kprobes executes the
+"post_handler," if any, that is associated with the kprobe.
+Execution then continues with the instruction following the probepoint.
+
+1.2 How Does a Jprobe Work?
+
+A jprobe is implemented using a kprobe that is placed on a function's
+entry point. It employs a simple mirroring principle to allow
+seamless access to the probed function's arguments. The jprobe
+handler routine should have the same signature (arg list and return
+type) as the function being probed, and must always end by calling
+the Kprobes function jprobe_return().
+
+Here's how it works. When the probe is hit, Kprobes makes a copy of
+the saved registers and a generous portion of the stack (see below).
+Kprobes then points the saved instruction pointer at the jprobe's
+handler routine, and returns from the trap. As a result, control
+passes to the handler, which is presented with the same register and
+stack contents as the probed function. When it is done, the handler
+calls jprobe_return(), which traps again to restore the original stack
+contents and processor state and switch to the probed function.
+
+By convention, the callee owns its arguments, so gcc may produce code
+that unexpectedly modifies that portion of the stack. This is why
+Kprobes saves a copy of the stack and restores it after the jprobe
+handler has run. Up to MAX_STACK_SIZE bytes are copied -- e.g.,
+64 bytes on i386.
+
+Note that the probed function's args may be passed on the stack
+or in registers. The jprobe will work in either case, so long as the
+handler's prototype matches that of the probed function.
+
+1.3 Return Probes
+
+1.3.1 How Does a Return Probe Work?
+
+When you call register_kretprobe(), Kprobes establishes a kprobe at
+the entry to the function. When the probed function is called and this
+probe is hit, Kprobes saves a copy of the return address, and replaces
+the return address with the address of a "trampoline." The trampoline
+is an arbitrary piece of code -- typically just a nop instruction.
+At boot time, Kprobes registers a kprobe at the trampoline.
+
+When the probed function executes its return instruction, control
+passes to the trampoline and that probe is hit. Kprobes' trampoline
+handler calls the user-specified return handler associated with the
+kretprobe, then sets the saved instruction pointer to the saved return
+address, and that's where execution resumes upon return from the trap.
+
+While the probed function is executing, its return address is
+stored in an object of type kretprobe_instance. Before calling
+register_kretprobe(), the user sets the maxactive field of the
+kretprobe struct to specify how many instances of the specified
+function can be probed simultaneously. register_kretprobe()
+pre-allocates the indicated number of kretprobe_instance objects.
+
+For example, if the function is non-recursive and is called with a
+spinlock held, maxactive = 1 should be enough. If the function is
+non-recursive and can never relinquish the CPU (e.g., via a semaphore
+or preemption), NR_CPUS should be enough. If maxactive <= 0, it is
+set to a default value. If CONFIG_PREEMPT is enabled, the default
+is max(10, 2*NR_CPUS). Otherwise, the default is NR_CPUS.
+
+It's not a disaster if you set maxactive too low; you'll just miss
+some probes. In the kretprobe struct, the nmissed field is set to
+zero when the return probe is registered, and is incremented every
+time the probed function is entered but there is no kretprobe_instance
+object available for establishing the return probe.
+
+1.3.2 Kretprobe entry-handler
+
+Kretprobes also provides an optional user-specified handler which runs
+on function entry. This handler is specified by setting the entry_handler
+field of the kretprobe struct. Whenever the kprobe placed by kretprobe at the
+function entry is hit, the user-defined entry_handler, if any, is invoked.
+If the entry_handler returns 0 (success) then a corresponding return handler
+is guaranteed to be called upon function return. If the entry_handler
+returns a non-zero error then Kprobes leaves the return address as is, and
+the kretprobe has no further effect for that particular function instance.
+
+Multiple entry and return handler invocations are matched using the unique
+kretprobe_instance object associated with them. Additionally, a user
+may also specify per return-instance private data to be part of each
+kretprobe_instance object. This is especially useful when sharing private
+data between corresponding user entry and return handlers. The size of each
+private data object can be specified at kretprobe registration time by
+setting the data_size field of the kretprobe struct. This data can be
+accessed through the data field of each kretprobe_instance object.
+
+In case probed function is entered but there is no kretprobe_instance
+object available, then in addition to incrementing the nmissed count,
+the user entry_handler invocation is also skipped.
+
+1.4 How Does Jump Optimization Work?
+
+If your kernel is built with CONFIG_OPTPROBES=y (currently this flag
+is automatically set 'y' on x86/x86-64, non-preemptive kernel) and
+the "debug.kprobes_optimization" kernel parameter is set to 1 (see
+sysctl(8)), Kprobes tries to reduce probe-hit overhead by using a jump
+instruction instead of a breakpoint instruction at each probepoint.
+
+1.4.1 Init a Kprobe
+
+When a probe is registered, before attempting this optimization,
+Kprobes inserts an ordinary, breakpoint-based kprobe at the specified
+address. So, even if it's not possible to optimize this particular
+probepoint, there'll be a probe there.
+
+1.4.2 Safety Check
+
+Before optimizing a probe, Kprobes performs the following safety checks:
+
+- Kprobes verifies that the region that will be replaced by the jump
+instruction (the "optimized region") lies entirely within one function.
+(A jump instruction is multiple bytes, and so may overlay multiple
+instructions.)
+
+- Kprobes analyzes the entire function and verifies that there is no
+jump into the optimized region. Specifically:
+ - the function contains no indirect jump;
+ - the function contains no instruction that causes an exception (since
+ the fixup code triggered by the exception could jump back into the
+ optimized region -- Kprobes checks the exception tables to verify this);
+ and
+ - there is no near jump to the optimized region (other than to the first
+ byte).
+
+- For each instruction in the optimized region, Kprobes verifies that
+the instruction can be executed out of line.
+
+1.4.3 Preparing Detour Buffer
+
+Next, Kprobes prepares a "detour" buffer, which contains the following
+instruction sequence:
+- code to push the CPU's registers (emulating a breakpoint trap)
+- a call to the trampoline code which calls user's probe handlers.
+- code to restore registers
+- the instructions from the optimized region
+- a jump back to the original execution path.
+
+1.4.4 Pre-optimization
+
+After preparing the detour buffer, Kprobes verifies that none of the
+following situations exist:
+- The probe has either a break_handler (i.e., it's a jprobe) or a
+post_handler.
+- Other instructions in the optimized region are probed.
+- The probe is disabled.
+In any of the above cases, Kprobes won't start optimizing the probe.
+Since these are temporary situations, Kprobes tries to start
+optimizing it again if the situation is changed.
+
+If the kprobe can be optimized, Kprobes enqueues the kprobe to an
+optimizing list, and kicks the kprobe-optimizer workqueue to optimize
+it. If the to-be-optimized probepoint is hit before being optimized,
+Kprobes returns control to the original instruction path by setting
+the CPU's instruction pointer to the copied code in the detour buffer
+-- thus at least avoiding the single-step.
+
+1.4.5 Optimization
+
+The Kprobe-optimizer doesn't insert the jump instruction immediately;
+rather, it calls synchronize_sched() for safety first, because it's
+possible for a CPU to be interrupted in the middle of executing the
+optimized region(*). As you know, synchronize_sched() can ensure
+that all interruptions that were active when synchronize_sched()
+was called are done, but only if CONFIG_PREEMPT=n. So, this version
+of kprobe optimization supports only kernels with CONFIG_PREEMPT=n.(**)
+
+After that, the Kprobe-optimizer calls stop_machine() to replace
+the optimized region with a jump instruction to the detour buffer,
+using text_poke_smp().
+
+1.4.6 Unoptimization
+
+When an optimized kprobe is unregistered, disabled, or blocked by
+another kprobe, it will be unoptimized. If this happens before
+the optimization is complete, the kprobe is just dequeued from the
+optimized list. If the optimization has been done, the jump is
+replaced with the original code (except for an int3 breakpoint in
+the first byte) by using text_poke_smp().
+
+(*)Please imagine that the 2nd instruction is interrupted and then
+the optimizer replaces the 2nd instruction with the jump *address*
+while the interrupt handler is running. When the interrupt
+returns to original address, there is no valid instruction,
+and it causes an unexpected result.
+
+(**)This optimization-safety checking may be replaced with the
+stop-machine method that ksplice uses for supporting a CONFIG_PREEMPT=y
+kernel.
+
+NOTE for geeks:
+The jump optimization changes the kprobe's pre_handler behavior.
+Without optimization, the pre_handler can change the kernel's execution
+path by changing regs->ip and returning 1. However, when the probe
+is optimized, that modification is ignored. Thus, if you want to
+tweak the kernel's execution path, you need to suppress optimization,
+using one of the following techniques:
+- Specify an empty function for the kprobe's post_handler or break_handler.
+ or
+- Execute 'sysctl -w debug.kprobes_optimization=n'
+
+1.5 Blacklist
+
+Kprobes can probe most of the kernel except itself. This means
+that there are some functions where kprobes cannot probe. Probing
+(trapping) such functions can cause a recursive trap (e.g. double
+fault) or the nested probe handler may never be called.
+Kprobes manages such functions as a blacklist.
+If you want to add a function into the blacklist, you just need
+to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro
+to specify a blacklisted function.
+Kprobes checks the given probe address against the blacklist and
+rejects registering it, if the given address is in the blacklist.
+
+2. Architectures Supported
+
+Kprobes, jprobes, and return probes are implemented on the following
+architectures:
+
+- i386 (Supports jump optimization)
+- x86_64 (AMD-64, EM64T) (Supports jump optimization)
+- ppc64
+- ia64 (Does not support probes on instruction slot1.)
+- sparc64 (Return probes not yet implemented.)
+- arm
+- ppc
+- mips
+- s390
+
+3. Configuring Kprobes
+
+When configuring the kernel using make menuconfig/xconfig/oldconfig,
+ensure that CONFIG_KPROBES is set to "y". Under "General setup", look
+for "Kprobes".
+
+So that you can load and unload Kprobes-based instrumentation modules,
+make sure "Loadable module support" (CONFIG_MODULES) and "Module
+unloading" (CONFIG_MODULE_UNLOAD) are set to "y".
+
+Also make sure that CONFIG_KALLSYMS and perhaps even CONFIG_KALLSYMS_ALL
+are set to "y", since kallsyms_lookup_name() is used by the in-kernel
+kprobe address resolution code.
+
+If you need to insert a probe in the middle of a function, you may find
+it useful to "Compile the kernel with debug info" (CONFIG_DEBUG_INFO),
+so you can use "objdump -d -l vmlinux" to see the source-to-object
+code mapping.
+
+4. API Reference
+
+The Kprobes API includes a "register" function and an "unregister"
+function for each type of probe. The API also includes "register_*probes"
+and "unregister_*probes" functions for (un)registering arrays of probes.
+Here are terse, mini-man-page specifications for these functions and
+the associated probe handlers that you'll write. See the files in the
+samples/kprobes/ sub-directory for examples.
+
+4.1 register_kprobe
+
+#include <linux/kprobes.h>
+int register_kprobe(struct kprobe *kp);
+
+Sets a breakpoint at the address kp->addr. When the breakpoint is
+hit, Kprobes calls kp->pre_handler. After the probed instruction
+is single-stepped, Kprobe calls kp->post_handler. If a fault
+occurs during execution of kp->pre_handler or kp->post_handler,
+or during single-stepping of the probed instruction, Kprobes calls
+kp->fault_handler. Any or all handlers can be NULL. If kp->flags
+is set KPROBE_FLAG_DISABLED, that kp will be registered but disabled,
+so, its handlers aren't hit until calling enable_kprobe(kp).
+
+NOTE:
+1. With the introduction of the "symbol_name" field to struct kprobe,
+the probepoint address resolution will now be taken care of by the kernel.
+The following will now work:
+
+ kp.symbol_name = "symbol_name";
+
+(64-bit powerpc intricacies such as function descriptors are handled
+transparently)
+
+2. Use the "offset" field of struct kprobe if the offset into the symbol
+to install a probepoint is known. This field is used to calculate the
+probepoint.
+
+3. Specify either the kprobe "symbol_name" OR the "addr". If both are
+specified, kprobe registration will fail with -EINVAL.
+
+4. With CISC architectures (such as i386 and x86_64), the kprobes code
+does not validate if the kprobe.addr is at an instruction boundary.
+Use "offset" with caution.
+
+register_kprobe() returns 0 on success, or a negative errno otherwise.
+
+User's pre-handler (kp->pre_handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+int pre_handler(struct kprobe *p, struct pt_regs *regs);
+
+Called with p pointing to the kprobe associated with the breakpoint,
+and regs pointing to the struct containing the registers saved when
+the breakpoint was hit. Return 0 here unless you're a Kprobes geek.
+
+User's post-handler (kp->post_handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+void post_handler(struct kprobe *p, struct pt_regs *regs,
+ unsigned long flags);
+
+p and regs are as described for the pre_handler. flags always seems
+to be zero.
+
+User's fault-handler (kp->fault_handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+int fault_handler(struct kprobe *p, struct pt_regs *regs, int trapnr);
+
+p and regs are as described for the pre_handler. trapnr is the
+architecture-specific trap number associated with the fault (e.g.,
+on i386, 13 for a general protection fault or 14 for a page fault).
+Returns 1 if it successfully handled the exception.
+
+4.2 register_jprobe
+
+#include <linux/kprobes.h>
+int register_jprobe(struct jprobe *jp)
+
+Sets a breakpoint at the address jp->kp.addr, which must be the address
+of the first instruction of a function. When the breakpoint is hit,
+Kprobes runs the handler whose address is jp->entry.
+
+The handler should have the same arg list and return type as the probed
+function; and just before it returns, it must call jprobe_return().
+(The handler never actually returns, since jprobe_return() returns
+control to Kprobes.) If the probed function is declared asmlinkage
+or anything else that affects how args are passed, the handler's
+declaration must match.
+
+register_jprobe() returns 0 on success, or a negative errno otherwise.
+
+4.3 register_kretprobe
+
+#include <linux/kprobes.h>
+int register_kretprobe(struct kretprobe *rp);
+
+Establishes a return probe for the function whose address is
+rp->kp.addr. When that function returns, Kprobes calls rp->handler.
+You must set rp->maxactive appropriately before you call
+register_kretprobe(); see "How Does a Return Probe Work?" for details.
+
+register_kretprobe() returns 0 on success, or a negative errno
+otherwise.
+
+User's return-probe handler (rp->handler):
+#include <linux/kprobes.h>
+#include <linux/ptrace.h>
+int kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs);
+
+regs is as described for kprobe.pre_handler. ri points to the
+kretprobe_instance object, of which the following fields may be
+of interest:
+- ret_addr: the return address
+- rp: points to the corresponding kretprobe object
+- task: points to the corresponding task struct
+- data: points to per return-instance private data; see "Kretprobe
+ entry-handler" for details.
+
+The regs_return_value(regs) macro provides a simple abstraction to
+extract the return value from the appropriate register as defined by
+the architecture's ABI.
+
+The handler's return value is currently ignored.
+
+4.4 unregister_*probe
+
+#include <linux/kprobes.h>
+void unregister_kprobe(struct kprobe *kp);
+void unregister_jprobe(struct jprobe *jp);
+void unregister_kretprobe(struct kretprobe *rp);
+
+Removes the specified probe. The unregister function can be called
+at any time after the probe has been registered.
+
+NOTE:
+If the functions find an incorrect probe (ex. an unregistered probe),
+they clear the addr field of the probe.
+
+4.5 register_*probes
+
+#include <linux/kprobes.h>
+int register_kprobes(struct kprobe **kps, int num);
+int register_kretprobes(struct kretprobe **rps, int num);
+int register_jprobes(struct jprobe **jps, int num);
+
+Registers each of the num probes in the specified array. If any
+error occurs during registration, all probes in the array, up to
+the bad probe, are safely unregistered before the register_*probes
+function returns.
+- kps/rps/jps: an array of pointers to *probe data structures
+- num: the number of the array entries.
+
+NOTE:
+You have to allocate(or define) an array of pointers and set all
+of the array entries before using these functions.
+
+4.6 unregister_*probes
+
+#include <linux/kprobes.h>
+void unregister_kprobes(struct kprobe **kps, int num);
+void unregister_kretprobes(struct kretprobe **rps, int num);
+void unregister_jprobes(struct jprobe **jps, int num);
+
+Removes each of the num probes in the specified array at once.
+
+NOTE:
+If the functions find some incorrect probes (ex. unregistered
+probes) in the specified array, they clear the addr field of those
+incorrect probes. However, other probes in the array are
+unregistered correctly.
+
+4.7 disable_*probe
+
+#include <linux/kprobes.h>
+int disable_kprobe(struct kprobe *kp);
+int disable_kretprobe(struct kretprobe *rp);
+int disable_jprobe(struct jprobe *jp);
+
+Temporarily disables the specified *probe. You can enable it again by using
+enable_*probe(). You must specify the probe which has been registered.
+
+4.8 enable_*probe
+
+#include <linux/kprobes.h>
+int enable_kprobe(struct kprobe *kp);
+int enable_kretprobe(struct kretprobe *rp);
+int enable_jprobe(struct jprobe *jp);
+
+Enables *probe which has been disabled by disable_*probe(). You must specify
+the probe which has been registered.
+
+5. Kprobes Features and Limitations
+
+Kprobes allows multiple probes at the same address. Currently,
+however, there cannot be multiple jprobes on the same function at
+the same time. Also, a probepoint for which there is a jprobe or
+a post_handler cannot be optimized. So if you install a jprobe,
+or a kprobe with a post_handler, at an optimized probepoint, the
+probepoint will be unoptimized automatically.
+
+In general, you can install a probe anywhere in the kernel.
+In particular, you can probe interrupt handlers. Known exceptions
+are discussed in this section.
+
+The register_*probe functions will return -EINVAL if you attempt
+to install a probe in the code that implements Kprobes (mostly
+kernel/kprobes.c and arch/*/kernel/kprobes.c, but also functions such
+as do_page_fault and notifier_call_chain).
+
+If you install a probe in an inline-able function, Kprobes makes
+no attempt to chase down all inline instances of the function and
+install probes there. gcc may inline a function without being asked,
+so keep this in mind if you're not seeing the probe hits you expect.
+
+A probe handler can modify the environment of the probed function
+-- e.g., by modifying kernel data structures, or by modifying the
+contents of the pt_regs struct (which are restored to the registers
+upon return from the breakpoint). So Kprobes can be used, for example,
+to install a bug fix or to inject faults for testing. Kprobes, of
+course, has no way to distinguish the deliberately injected faults
+from the accidental ones. Don't drink and probe.
+
+Kprobes makes no attempt to prevent probe handlers from stepping on
+each other -- e.g., probing printk() and then calling printk() from a
+probe handler. If a probe handler hits a probe, that second probe's
+handlers won't be run in that instance, and the kprobe.nmissed member
+of the second probe will be incremented.
+
+As of Linux v2.6.15-rc1, multiple handlers (or multiple instances of
+the same handler) may run concurrently on different CPUs.
+
+Kprobes does not use mutexes or allocate memory except during
+registration and unregistration.
+
+Probe handlers are run with preemption disabled. Depending on the
+architecture and optimization state, handlers may also run with
+interrupts disabled (e.g., kretprobe handlers and optimized kprobe
+handlers run without interrupt disabled on x86/x86-64). In any case,
+your handler should not yield the CPU (e.g., by attempting to acquire
+a semaphore).
+
+Since a return probe is implemented by replacing the return
+address with the trampoline's address, stack backtraces and calls
+to __builtin_return_address() will typically yield the trampoline's
+address instead of the real return address for kretprobed functions.
+(As far as we can tell, __builtin_return_address() is used only
+for instrumentation and error reporting.)
+
+If the number of times a function is called does not match the number
+of times it returns, registering a return probe on that function may
+produce undesirable results. In such a case, a line:
+kretprobe BUG!: Processing kretprobe d000000000041aa8 @ c00000000004f48c
+gets printed. With this information, one will be able to correlate the
+exact instance of the kretprobe that caused the problem. We have the
+do_exit() case covered. do_execve() and do_fork() are not an issue.
+We're unaware of other specific cases where this could be a problem.
+
+If, upon entry to or exit from a function, the CPU is running on
+a stack other than that of the current task, registering a return
+probe on that function may produce undesirable results. For this
+reason, Kprobes doesn't support return probes (or kprobes or jprobes)
+on the x86_64 version of __switch_to(); the registration functions
+return -EINVAL.
+
+On x86/x86-64, since the Jump Optimization of Kprobes modifies
+instructions widely, there are some limitations to optimization. To
+explain it, we introduce some terminology. Imagine a 3-instruction
+sequence consisting of a two 2-byte instructions and one 3-byte
+instruction.
+
+ IA
+ |
+[-2][-1][0][1][2][3][4][5][6][7]
+ [ins1][ins2][ ins3 ]
+ [<- DCR ->]
+ [<- JTPR ->]
+
+ins1: 1st Instruction
+ins2: 2nd Instruction
+ins3: 3rd Instruction
+IA: Insertion Address
+JTPR: Jump Target Prohibition Region
+DCR: Detoured Code Region
+
+The instructions in DCR are copied to the out-of-line buffer
+of the kprobe, because the bytes in DCR are replaced by
+a 5-byte jump instruction. So there are several limitations.
+
+a) The instructions in DCR must be relocatable.
+b) The instructions in DCR must not include a call instruction.
+c) JTPR must not be targeted by any jump or call instruction.
+d) DCR must not straddle the border between functions.
+
+Anyway, these limitations are checked by the in-kernel instruction
+decoder, so you don't need to worry about that.
+
+6. Probe Overhead
+
+On a typical CPU in use in 2005, a kprobe hit takes 0.5 to 1.0
+microseconds to process. Specifically, a benchmark that hits the same
+probepoint repeatedly, firing a simple handler each time, reports 1-2
+million hits per second, depending on the architecture. A jprobe or
+return-probe hit typically takes 50-75% longer than a kprobe hit.
+When you have a return probe set on a function, adding a kprobe at
+the entry to that function adds essentially no overhead.
+
+Here are sample overhead figures (in usec) for different architectures.
+k = kprobe; j = jprobe; r = return probe; kr = kprobe + return probe
+on same function; jr = jprobe + return probe on same function
+
+i386: Intel Pentium M, 1495 MHz, 2957.31 bogomips
+k = 0.57 usec; j = 1.00; r = 0.92; kr = 0.99; jr = 1.40
+
+x86_64: AMD Opteron 246, 1994 MHz, 3971.48 bogomips
+k = 0.49 usec; j = 0.76; r = 0.80; kr = 0.82; jr = 1.07
+
+ppc64: POWER5 (gr), 1656 MHz (SMT disabled, 1 virtual CPU per physical CPU)
+k = 0.77 usec; j = 1.31; r = 1.26; kr = 1.45; jr = 1.99
+
+6.1 Optimized Probe Overhead
+
+Typically, an optimized kprobe hit takes 0.07 to 0.1 microseconds to
+process. Here are sample overhead figures (in usec) for x86 architectures.
+k = unoptimized kprobe, b = boosted (single-step skipped), o = optimized kprobe,
+r = unoptimized kretprobe, rb = boosted kretprobe, ro = optimized kretprobe.
+
+i386: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
+k = 0.80 usec; b = 0.33; o = 0.05; r = 1.10; rb = 0.61; ro = 0.33
+
+x86-64: Intel(R) Xeon(R) E5410, 2.33GHz, 4656.90 bogomips
+k = 0.99 usec; b = 0.43; o = 0.06; r = 1.24; rb = 0.68; ro = 0.30
+
+7. TODO
+
+a. SystemTap (http://sourceware.org/systemtap): Provides a simplified
+programming interface for probe-based instrumentation. Try it out.
+b. Kernel return probes for sparc64.
+c. Support for other architectures.
+d. User-space probes.
+e. Watchpoint probes (which fire on data references).
+
+8. Kprobes Example
+
+See samples/kprobes/kprobe_example.c
+
+9. Jprobes Example
+
+See samples/kprobes/jprobe_example.c
+
+10. Kretprobes Example
+
+See samples/kprobes/kretprobe_example.c
+
+For additional information on Kprobes, refer to the following URLs:
+http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
+http://www.redhat.com/magazine/005mar05/features/kprobes/
+http://www-users.cs.umn.edu/~boutcher/kprobes/
+http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
+
+
+Appendix A: The kprobes debugfs interface
+
+With recent kernels (> 2.6.20) the list of registered kprobes is visible
+under the /sys/kernel/debug/kprobes/ directory (assuming debugfs is mounted at //sys/kernel/debug).
+
+/sys/kernel/debug/kprobes/list: Lists all registered probes on the system
+
+c015d71a k vfs_read+0x0
+c011a316 j do_fork+0x0
+c03dedc5 r tcp_v4_rcv+0x0
+
+The first column provides the kernel address where the probe is inserted.
+The second column identifies the type of probe (k - kprobe, r - kretprobe
+and j - jprobe), while the third column specifies the symbol+offset of
+the probe. If the probed function belongs to a module, the module name
+is also specified. Following columns show probe status. If the probe is on
+a virtual address that is no longer valid (module init sections, module
+virtual addresses that correspond to modules that've been unloaded),
+such probes are marked with [GONE]. If the probe is temporarily disabled,
+such probes are marked with [DISABLED]. If the probe is optimized, it is
+marked with [OPTIMIZED]. If the probe is ftrace-based, it is marked with
+[FTRACE].
+
+/sys/kernel/debug/kprobes/enabled: Turn kprobes ON/OFF forcibly.
+
+Provides a knob to globally and forcibly turn registered kprobes ON or OFF.
+By default, all kprobes are enabled. By echoing "0" to this file, all
+registered probes will be disarmed, till such time a "1" is echoed to this
+file. Note that this knob just disarms and arms all kprobes and doesn't
+change each probe's disabling state. This means that disabled kprobes (marked
+[DISABLED]) will be not enabled if you turn ON all kprobes by this knob.
+
+
+Appendix B: The kprobes sysctl interface
+
+/proc/sys/debug/kprobes-optimization: Turn kprobes optimization ON/OFF.
+
+When CONFIG_OPTPROBES=y, this sysctl interface appears and it provides
+a knob to globally and forcibly turn jump optimization (see section
+1.4) ON or OFF. By default, jump optimization is allowed (ON).
+If you echo "0" to this file or set "debug.kprobes_optimization" to
+0 via sysctl, all optimized probes will be unoptimized, and any new
+probes registered after that will not be optimized. Note that this
+knob *changes* the optimized state. This means that optimized probes
+(marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
+removed). If the knob is turned on, they will be optimized again.
+
diff --git a/Documentation/kref.txt b/Documentation/kref.txt
new file mode 100644
index 000000000..ddf85a5dd
--- /dev/null
+++ b/Documentation/kref.txt
@@ -0,0 +1,303 @@
+
+krefs allow you to add reference counters to your objects. If you
+have objects that are used in multiple places and passed around, and
+you don't have refcounts, your code is almost certainly broken. If
+you want refcounts, krefs are the way to go.
+
+To use a kref, add one to your data structures like:
+
+struct my_data
+{
+ .
+ .
+ struct kref refcount;
+ .
+ .
+};
+
+The kref can occur anywhere within the data structure.
+
+You must initialize the kref after you allocate it. To do this, call
+kref_init as so:
+
+ struct my_data *data;
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ kref_init(&data->refcount);
+
+This sets the refcount in the kref to 1.
+
+Once you have an initialized kref, you must follow the following
+rules:
+
+1) If you make a non-temporary copy of a pointer, especially if
+ it can be passed to another thread of execution, you must
+ increment the refcount with kref_get() before passing it off:
+ kref_get(&data->refcount);
+ If you already have a valid pointer to a kref-ed structure (the
+ refcount cannot go to zero) you may do this without a lock.
+
+2) When you are done with a pointer, you must call kref_put():
+ kref_put(&data->refcount, data_release);
+ If this is the last reference to the pointer, the release
+ routine will be called. If the code never tries to get
+ a valid pointer to a kref-ed structure without already
+ holding a valid pointer, it is safe to do this without
+ a lock.
+
+3) If the code attempts to gain a reference to a kref-ed structure
+ without already holding a valid pointer, it must serialize access
+ where a kref_put() cannot occur during the kref_get(), and the
+ structure must remain valid during the kref_get().
+
+For example, if you allocate some data and then pass it to another
+thread to process:
+
+void data_release(struct kref *ref)
+{
+ struct my_data *data = container_of(ref, struct my_data, refcount);
+ kfree(data);
+}
+
+void more_data_handling(void *cb_data)
+{
+ struct my_data *data = cb_data;
+ .
+ . do stuff with data here
+ .
+ kref_put(&data->refcount, data_release);
+}
+
+int my_data_handler(void)
+{
+ int rv = 0;
+ struct my_data *data;
+ struct task_struct *task;
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+ kref_init(&data->refcount);
+
+ kref_get(&data->refcount);
+ task = kthread_run(more_data_handling, data, "more_data_handling");
+ if (task == ERR_PTR(-ENOMEM)) {
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ .
+ . do stuff with data here
+ .
+ out:
+ kref_put(&data->refcount, data_release);
+ return rv;
+}
+
+This way, it doesn't matter what order the two threads handle the
+data, the kref_put() handles knowing when the data is not referenced
+any more and releasing it. The kref_get() does not require a lock,
+since we already have a valid pointer that we own a refcount for. The
+put needs no lock because nothing tries to get the data without
+already holding a pointer.
+
+Note that the "before" in rule 1 is very important. You should never
+do something like:
+
+ task = kthread_run(more_data_handling, data, "more_data_handling");
+ if (task == ERR_PTR(-ENOMEM)) {
+ rv = -ENOMEM;
+ goto out;
+ } else
+ /* BAD BAD BAD - get is after the handoff */
+ kref_get(&data->refcount);
+
+Don't assume you know what you are doing and use the above construct.
+First of all, you may not know what you are doing. Second, you may
+know what you are doing (there are some situations where locking is
+involved where the above may be legal) but someone else who doesn't
+know what they are doing may change the code or copy the code. It's
+bad style. Don't do it.
+
+There are some situations where you can optimize the gets and puts.
+For instance, if you are done with an object and enqueuing it for
+something else or passing it off to something else, there is no reason
+to do a get then a put:
+
+ /* Silly extra get and put */
+ kref_get(&obj->ref);
+ enqueue(obj);
+ kref_put(&obj->ref, obj_cleanup);
+
+Just do the enqueue. A comment about this is always welcome:
+
+ enqueue(obj);
+ /* We are done with obj, so we pass our refcount off
+ to the queue. DON'T TOUCH obj AFTER HERE! */
+
+The last rule (rule 3) is the nastiest one to handle. Say, for
+instance, you have a list of items that are each kref-ed, and you wish
+to get the first one. You can't just pull the first item off the list
+and kref_get() it. That violates rule 3 because you are not already
+holding a valid pointer. You must add a mutex (or some other lock).
+For instance:
+
+static DEFINE_MUTEX(mutex);
+static LIST_HEAD(q);
+struct my_data
+{
+ struct kref refcount;
+ struct list_head link;
+};
+
+static struct my_data *get_entry()
+{
+ struct my_data *entry = NULL;
+ mutex_lock(&mutex);
+ if (!list_empty(&q)) {
+ entry = container_of(q.next, struct my_data, link);
+ kref_get(&entry->refcount);
+ }
+ mutex_unlock(&mutex);
+ return entry;
+}
+
+static void release_entry(struct kref *ref)
+{
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+ list_del(&entry->link);
+ kfree(entry);
+}
+
+static void put_entry(struct my_data *entry)
+{
+ mutex_lock(&mutex);
+ kref_put(&entry->refcount, release_entry);
+ mutex_unlock(&mutex);
+}
+
+The kref_put() return value is useful if you do not want to hold the
+lock during the whole release operation. Say you didn't want to call
+kfree() with the lock held in the example above (since it is kind of
+pointless to do so). You could use kref_put() as follows:
+
+static void release_entry(struct kref *ref)
+{
+ /* All work is done after the return from kref_put(). */
+}
+
+static void put_entry(struct my_data *entry)
+{
+ mutex_lock(&mutex);
+ if (kref_put(&entry->refcount, release_entry)) {
+ list_del(&entry->link);
+ mutex_unlock(&mutex);
+ kfree(entry);
+ } else
+ mutex_unlock(&mutex);
+}
+
+This is really more useful if you have to call other routines as part
+of the free operations that could take a long time or might claim the
+same lock. Note that doing everything in the release routine is still
+preferred as it is a little neater.
+
+
+Corey Minyard <minyard@acm.org>
+
+A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and
+presentation on krefs, which can be found at:
+ http://www.kroah.com/linux/talks/ols_2004_kref_paper/Reprint-Kroah-Hartman-OLS2004.pdf
+and:
+ http://www.kroah.com/linux/talks/ols_2004_kref_talk/
+
+
+The above example could also be optimized using kref_get_unless_zero() in
+the following way:
+
+static struct my_data *get_entry()
+{
+ struct my_data *entry = NULL;
+ mutex_lock(&mutex);
+ if (!list_empty(&q)) {
+ entry = container_of(q.next, struct my_data, link);
+ if (!kref_get_unless_zero(&entry->refcount))
+ entry = NULL;
+ }
+ mutex_unlock(&mutex);
+ return entry;
+}
+
+static void release_entry(struct kref *ref)
+{
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+ mutex_lock(&mutex);
+ list_del(&entry->link);
+ mutex_unlock(&mutex);
+ kfree(entry);
+}
+
+static void put_entry(struct my_data *entry)
+{
+ kref_put(&entry->refcount, release_entry);
+}
+
+Which is useful to remove the mutex lock around kref_put() in put_entry(), but
+it's important that kref_get_unless_zero is enclosed in the same critical
+section that finds the entry in the lookup table,
+otherwise kref_get_unless_zero may reference already freed memory.
+Note that it is illegal to use kref_get_unless_zero without checking its
+return value. If you are sure (by already having a valid pointer) that
+kref_get_unless_zero() will return true, then use kref_get() instead.
+
+The function kref_get_unless_zero also makes it possible to use rcu
+locking for lookups in the above example:
+
+struct my_data
+{
+ struct rcu_head rhead;
+ .
+ struct kref refcount;
+ .
+ .
+};
+
+static struct my_data *get_entry_rcu()
+{
+ struct my_data *entry = NULL;
+ rcu_read_lock();
+ if (!list_empty(&q)) {
+ entry = container_of(q.next, struct my_data, link);
+ if (!kref_get_unless_zero(&entry->refcount))
+ entry = NULL;
+ }
+ rcu_read_unlock();
+ return entry;
+}
+
+static void release_entry_rcu(struct kref *ref)
+{
+ struct my_data *entry = container_of(ref, struct my_data, refcount);
+
+ mutex_lock(&mutex);
+ list_del_rcu(&entry->link);
+ mutex_unlock(&mutex);
+ kfree_rcu(entry, rhead);
+}
+
+static void put_entry(struct my_data *entry)
+{
+ kref_put(&entry->refcount, release_entry_rcu);
+}
+
+But note that the struct kref member needs to remain in valid memory for a
+rcu grace period after release_entry_rcu was called. That can be accomplished
+by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu()
+before using kfree, but note that synchronize_rcu() may sleep for a
+substantial amount of time.
+
+
+Thomas Hellstrom <thellstrom@vmware.com>
diff --git a/Documentation/kselftest.txt b/Documentation/kselftest.txt
new file mode 100644
index 000000000..a87d840ba
--- /dev/null
+++ b/Documentation/kselftest.txt
@@ -0,0 +1,69 @@
+Linux Kernel Selftests
+
+The kernel contains a set of "self tests" under the tools/testing/selftests/
+directory. These are intended to be small unit tests to exercise individual
+code paths in the kernel.
+
+On some systems, hot-plug tests could hang forever waiting for cpu and
+memory to be ready to be offlined. A special hot-plug target is created
+to run full range of hot-plug tests. In default mode, hot-plug tests run
+in safe mode with a limited scope. In limited mode, cpu-hotplug test is
+run on a single cpu as opposed to all hotplug capable cpus, and memory
+hotplug test is run on 2% of hotplug capable memory instead of 10%.
+
+Running the selftests (hotplug tests are run in limited mode)
+=============================================================
+
+To build the tests:
+ $ make -C tools/testing/selftests
+
+
+To run the tests:
+ $ make -C tools/testing/selftests run_tests
+
+To build and run the tests with a single command, use:
+ $ make kselftest
+
+- note that some tests will require root privileges.
+
+
+Running a subset of selftests
+========================================
+You can use the "TARGETS" variable on the make command line to specify
+single test to run, or a list of tests to run.
+
+To run only tests targeted for a single subsystem:
+ $ make -C tools/testing/selftests TARGETS=ptrace run_tests
+
+You can specify multiple tests to build and run:
+ $ make TARGETS="size timers" kselftest
+
+See the top-level tools/testing/selftests/Makefile for the list of all
+possible targets.
+
+
+Running the full range hotplug selftests
+========================================
+
+To build the hotplug tests:
+ $ make -C tools/testing/selftests hotplug
+
+To run the hotplug tests:
+ $ make -C tools/testing/selftests run_hotplug
+
+- note that some tests will require root privileges.
+
+
+Contributing new tests
+======================
+
+In general, the rules for for selftests are
+
+ * Do as much as you can if you're not root;
+
+ * Don't take too long;
+
+ * Don't break the build on any architecture, and
+
+ * Don't cause the top-level "make run_tests" to fail if your feature is
+ unconfigured.
diff --git a/Documentation/laptops/.gitignore b/Documentation/laptops/.gitignore
new file mode 100644
index 000000000..da2bd065f
--- /dev/null
+++ b/Documentation/laptops/.gitignore
@@ -0,0 +1,2 @@
+dslm
+freefall
diff --git a/Documentation/laptops/00-INDEX b/Documentation/laptops/00-INDEX
new file mode 100644
index 000000000..a3b4f209e
--- /dev/null
+++ b/Documentation/laptops/00-INDEX
@@ -0,0 +1,22 @@
+00-INDEX
+ - This file
+Makefile
+ - Makefile for building dslm example program.
+asus-laptop.txt
+ - information on the Asus Laptop Extras driver.
+disk-shock-protection.txt
+ - information on hard disk shock protection.
+dslm.c
+ - Simple Disk Sleep Monitor program
+freefall.c
+ - (HP/DELL) laptop accelerometer program for disk protection.
+laptop-mode.txt
+ - how to conserve battery power using laptop-mode.
+sony-laptop.txt
+ - Sony Notebook Control Driver (SNC) Readme.
+sonypi.txt
+ - info on Linux Sony Programmable I/O Device support.
+thinkpad-acpi.txt
+ - information on the (IBM and Lenovo) ThinkPad ACPI Extras driver.
+toshiba_haps.txt
+ - information on the Toshiba HDD Active Protection Sensor driver.
diff --git a/Documentation/laptops/Makefile b/Documentation/laptops/Makefile
new file mode 100644
index 000000000..2b0fa5edf
--- /dev/null
+++ b/Documentation/laptops/Makefile
@@ -0,0 +1,5 @@
+# List of programs to build
+hostprogs-y := dslm freefall
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/laptops/asus-laptop.txt b/Documentation/laptops/asus-laptop.txt
new file mode 100644
index 000000000..79a1bc675
--- /dev/null
+++ b/Documentation/laptops/asus-laptop.txt
@@ -0,0 +1,257 @@
+Asus Laptop Extras
+
+Version 0.1
+August 6, 2009
+
+Corentin Chary <corentincj@iksaif.net>
+http://acpi4asus.sf.net/
+
+ This driver provides support for extra features of ACPI-compatible ASUS laptops.
+ It may also support some MEDION, JVC or VICTOR laptops (such as MEDION 9675 or
+ VICTOR XP7210 for example). It makes all the extra buttons generate input
+ events (like keyboards).
+ On some models adds support for changing the display brightness and output,
+ switching the LCD backlight on and off, and most importantly, allows you to
+ blink those fancy LEDs intended for reporting mail and wireless status.
+
+This driver supercedes the old asus_acpi driver.
+
+Requirements
+------------
+
+ Kernel 2.6.X sources, configured for your computer, with ACPI support.
+ You also need CONFIG_INPUT and CONFIG_ACPI.
+
+Status
+------
+
+ The features currently supported are the following (see below for
+ detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - Wlan enable and disable
+ - GPS enable and disable
+ - Video output switching
+ - Ambient Light Sensor on and off
+ - LED control
+ - LED Display control
+ - LCD brightness control
+ - LCD on and off
+
+ A compatibility table by model and feature is maintained on the web
+ site, http://acpi4asus.sf.net/.
+
+Usage
+-----
+
+ Try "modprobe asus-laptop". Check your dmesg (simply type dmesg). You should
+ see some lines like this :
+
+ Asus Laptop Extras version 0.42
+ L2D model detected.
+
+ If it is not the output you have on your laptop, send it (and the laptop's
+ DSDT) to me.
+
+ That's all, now, all the events generated by the hotkeys of your laptop
+ should be reported via netlink events. You can check with
+ "acpi_genl monitor" (part of the acpica project).
+
+ Hotkeys are also reported as input keys (like keyboards) you can check
+ which key are supported using "xev" under X11.
+
+ You can get information on the version of your DSDT table by reading the
+ /sys/devices/platform/asus-laptop/infos entry. If you have a question or a
+ bug report to do, please include the output of this entry.
+
+LEDs
+----
+
+ You can modify LEDs be echoing values to /sys/class/leds/asus::*/brightness :
+ echo 1 > /sys/class/leds/asus::mail/brightness
+ will switch the mail LED on.
+ You can also know if they are on/off by reading their content and use
+ kernel triggers like ide-disk or heartbeat.
+
+Backlight
+---------
+
+ You can control lcd backlight power and brightness with
+ /sys/class/backlight/asus-laptop/. Brightness Values are between 0 and 15.
+
+Wireless devices
+---------------
+
+ You can turn the internal Bluetooth adapter on/off with the bluetooth entry
+ (only on models with Bluetooth). This usually controls the associated LED.
+ Same for Wlan adapter.
+
+Display switching
+-----------------
+
+ Note: the display switching code is currently considered EXPERIMENTAL.
+
+ Switching works for the following models:
+ L3800C
+ A2500H
+ L5800C
+ M5200N
+ W1000N (albeit with some glitches)
+ M6700R
+ A6JC
+ F3J
+
+ Switching doesn't work for the following:
+ M3700N
+ L2X00D (locks the laptop under certain conditions)
+
+ To switch the displays, echo values from 0 to 15 to
+ /sys/devices/platform/asus-laptop/display. The significance of those values
+ is as follows:
+
+ +-------+-----+-----+-----+-----+-----+
+ | Bin | Val | DVI | TV | CRT | LCD |
+ +-------+-----+-----+-----+-----+-----+
+ + 0000 + 0 + + + + +
+ +-------+-----+-----+-----+-----+-----+
+ + 0001 + 1 + + + + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 0010 + 2 + + + X + +
+ +-------+-----+-----+-----+-----+-----+
+ + 0011 + 3 + + + X + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 0100 + 4 + + X + + +
+ +-------+-----+-----+-----+-----+-----+
+ + 0101 + 5 + + X + + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 0110 + 6 + + X + X + +
+ +-------+-----+-----+-----+-----+-----+
+ + 0111 + 7 + + X + X + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 1000 + 8 + X + + + +
+ +-------+-----+-----+-----+-----+-----+
+ + 1001 + 9 + X + + + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 1010 + 10 + X + + X + +
+ +-------+-----+-----+-----+-----+-----+
+ + 1011 + 11 + X + + X + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 1100 + 12 + X + X + + +
+ +-------+-----+-----+-----+-----+-----+
+ + 1101 + 13 + X + X + + X +
+ +-------+-----+-----+-----+-----+-----+
+ + 1110 + 14 + X + X + X + +
+ +-------+-----+-----+-----+-----+-----+
+ + 1111 + 15 + X + X + X + X +
+ +-------+-----+-----+-----+-----+-----+
+
+ In most cases, the appropriate displays must be plugged in for the above
+ combinations to work. TV-Out may need to be initialized at boot time.
+
+ Debugging:
+ 1) Check whether the Fn+F8 key:
+ a) does not lock the laptop (try a boot with noapic / nolapic if it does)
+ b) generates events (0x6n, where n is the value corresponding to the
+ configuration above)
+ c) actually works
+ Record the disp value at every configuration.
+ 2) Echo values from 0 to 15 to /sys/devices/platform/asus-laptop/display.
+ Record its value, note any change. If nothing changes, try a broader range,
+ up to 65535.
+ 3) Send ANY output (both positive and negative reports are needed, unless your
+ machine is already listed above) to the acpi4asus-user mailing list.
+
+ Note: on some machines (e.g. L3C), after the module has been loaded, only 0x6n
+ events are generated and no actual switching occurs. In such a case, a line
+ like:
+
+ echo $((10#$arg-60)) > /sys/devices/platform/asus-laptop/display
+
+ will usually do the trick ($arg is the 0000006n-like event passed to acpid).
+
+ Note: there is currently no reliable way to read display status on xxN
+ (Centrino) models.
+
+LED display
+-----------
+
+ Some models like the W1N have a LED display that can be used to display
+ several items of information.
+
+ LED display works for the following models:
+ W1000N
+ W1J
+
+ To control the LED display, use the following :
+
+ echo 0x0T000DDD > /sys/devices/platform/asus-laptop/
+
+ where T control the 3 letters display, and DDD the 3 digits display,
+ according to the tables below.
+
+ DDD (digits)
+ 000 to 999 = display digits
+ AAA = ---
+ BBB to FFF = turn-off
+
+ T (type)
+ 0 = off
+ 1 = dvd
+ 2 = vcd
+ 3 = mp3
+ 4 = cd
+ 5 = tv
+ 6 = cpu
+ 7 = vol
+
+ For example "echo 0x01000001 >/sys/devices/platform/asus-laptop/ledd"
+ would display "DVD001".
+
+Driver options:
+---------------
+
+ Options can be passed to the asus-laptop driver using the standard
+ module argument syntax (<param>=<value> when passing the option to the
+ module or asus-laptop.<param>=<value> on the kernel boot line when
+ asus-laptop is statically linked into the kernel).
+
+ wapf: WAPF defines the behavior of the Fn+Fx wlan key
+ The significance of values is yet to be found, but
+ most of the time:
+ - 0x0 should do nothing
+ - 0x1 should allow to control the device with Fn+Fx key.
+ - 0x4 should send an ACPI event (0x88) while pressing the Fn+Fx key
+ - 0x5 like 0x1 or 0x4
+
+ The default value is 0x1.
+
+Unsupported models
+------------------
+
+ These models will never be supported by this module, as they use a completely
+ different mechanism to handle LEDs and extra stuff (meaning we have no clue
+ how it works):
+
+ - ASUS A1300 (A1B), A1370D
+ - ASUS L7300G
+ - ASUS L8400
+
+Patches, Errors, Questions:
+--------------------------
+
+ I appreciate any success or failure
+ reports, especially if they add to or correct the compatibility table.
+ Please include the following information in your report:
+
+ - Asus model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of /sys/devices/platform/asus-laptop/infos
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+ Any other comments or patches are also more than welcome.
+
+ acpi4asus-user@lists.sourceforge.net
+ http://sourceforge.net/projects/acpi4asus
+
diff --git a/Documentation/laptops/disk-shock-protection.txt b/Documentation/laptops/disk-shock-protection.txt
new file mode 100644
index 000000000..0e6ba2663
--- /dev/null
+++ b/Documentation/laptops/disk-shock-protection.txt
@@ -0,0 +1,149 @@
+Hard disk shock protection
+==========================
+
+Author: Elias Oltmanns <eo@nebensachen.de>
+Last modified: 2008-10-03
+
+
+0. Contents
+-----------
+
+1. Intro
+2. The interface
+3. References
+4. CREDITS
+
+
+1. Intro
+--------
+
+ATA/ATAPI-7 specifies the IDLE IMMEDIATE command with unload feature.
+Issuing this command should cause the drive to switch to idle mode and
+unload disk heads. This feature is being used in modern laptops in
+conjunction with accelerometers and appropriate software to implement
+a shock protection facility. The idea is to stop all I/O operations on
+the internal hard drive and park its heads on the ramp when critical
+situations are anticipated. The desire to have such a feature
+available on GNU/Linux systems has been the original motivation to
+implement a generic disk head parking interface in the Linux kernel.
+Please note, however, that other components have to be set up on your
+system in order to get disk shock protection working (see
+section 3. References below for pointers to more information about
+that).
+
+
+2. The interface
+----------------
+
+For each ATA device, the kernel exports the file
+block/*/device/unload_heads in sysfs (here assumed to be mounted under
+/sys). Access to /sys/block/*/device/unload_heads is denied with
+-EOPNOTSUPP if the device does not support the unload feature.
+Otherwise, writing an integer value to this file will take the heads
+of the respective drive off the platter and block all I/O operations
+for the specified number of milliseconds. When the timeout expires and
+no further disk head park request has been issued in the meantime,
+normal operation will be resumed. The maximal value accepted for a
+timeout is 30000 milliseconds. Exceeding this limit will return
+-EOVERFLOW, but heads will be parked anyway and the timeout will be
+set to 30 seconds. However, you can always change a timeout to any
+value between 0 and 30000 by issuing a subsequent head park request
+before the timeout of the previous one has expired. In particular, the
+total timeout can exceed 30 seconds and, more importantly, you can
+cancel a previously set timeout and resume normal operation
+immediately by specifying a timeout of 0. Values below -2 are rejected
+with -EINVAL (see below for the special meaning of -1 and -2). If the
+timeout specified for a recent head park request has not yet expired,
+reading from /sys/block/*/device/unload_heads will report the number
+of milliseconds remaining until normal operation will be resumed;
+otherwise, reading the unload_heads attribute will return 0.
+
+For example, do the following in order to park the heads of drive
+/dev/sda and stop all I/O operations for five seconds:
+
+# echo 5000 > /sys/block/sda/device/unload_heads
+
+A simple
+
+# cat /sys/block/sda/device/unload_heads
+
+will show you how many milliseconds are left before normal operation
+will be resumed.
+
+A word of caution: The fact that the interface operates on a basis of
+milliseconds may raise expectations that cannot be satisfied in
+reality. In fact, the ATA specs clearly state that the time for an
+unload operation to complete is vendor specific. The hint in ATA-7
+that this will typically be within 500 milliseconds apparently has
+been dropped in ATA-8.
+
+There is a technical detail of this implementation that may cause some
+confusion and should be discussed here. When a head park request has
+been issued to a device successfully, all I/O operations on the
+controller port this device is attached to will be deferred. That is
+to say, any other device that may be connected to the same port will
+be affected too. The only exception is that a subsequent head unload
+request to that other device will be executed immediately. Further
+operations on that port will be deferred until the timeout specified
+for either device on the port has expired. As far as PATA (old style
+IDE) configurations are concerned, there can only be two devices
+attached to any single port. In SATA world we have port multipliers
+which means that a user-issued head parking request to one device may
+actually result in stopping I/O to a whole bunch of devices. However,
+since this feature is supposed to be used on laptops and does not seem
+to be very useful in any other environment, there will be mostly one
+device per port. Even if the CD/DVD writer happens to be connected to
+the same port as the hard drive, it generally *should* recover just
+fine from the occasional buffer under-run incurred by a head park
+request to the HD. Actually, when you are using an ide driver rather
+than its libata counterpart (i.e. your disk is called /dev/hda
+instead of /dev/sda), then parking the heads of one drive (drive X)
+will generally not affect the mode of operation of another drive
+(drive Y) on the same port as described above. It is only when a port
+reset is required to recover from an exception on drive Y that further
+I/O operations on that drive (and the reset itself) will be delayed
+until drive X is no longer in the parked state.
+
+Finally, there are some hard drives that only comply with an earlier
+version of the ATA standard than ATA-7, but do support the unload
+feature nonetheless. Unfortunately, there is no safe way Linux can
+detect these devices, so you won't be able to write to the
+unload_heads attribute. If you know that your device really does
+support the unload feature (for instance, because the vendor of your
+laptop or the hard drive itself told you so), then you can tell the
+kernel to enable the usage of this feature for that drive by writing
+the special value -1 to the unload_heads attribute:
+
+# echo -1 > /sys/block/sda/device/unload_heads
+
+will enable the feature for /dev/sda, and giving -2 instead of -1 will
+disable it again.
+
+
+3. References
+-------------
+
+There are several laptops from different vendors featuring shock
+protection capabilities. As manufacturers have refused to support open
+source development of the required software components so far, Linux
+support for shock protection varies considerably between different
+hardware implementations. Ideally, this section should contain a list
+of pointers at different projects aiming at an implementation of shock
+protection on different systems. Unfortunately, I only know of a
+single project which, although still considered experimental, is fit
+for use. Please feel free to add projects that have been the victims
+of my ignorance.
+
+- http://www.thinkwiki.org/wiki/HDAPS
+ See this page for information about Linux support of the hard disk
+ active protection system as implemented in IBM/Lenovo Thinkpads.
+
+
+4. CREDITS
+----------
+
+This implementation of disk head parking has been inspired by a patch
+originally published by Jon Escombe <lists@dresco.co.uk>. My efforts
+to develop an implementation of this feature that is fit to be merged
+into mainline have been aided by various kernel developers, in
+particular by Tejun Heo and Bartlomiej Zolnierkiewicz.
diff --git a/Documentation/laptops/dslm.c b/Documentation/laptops/dslm.c
new file mode 100644
index 000000000..d5dd2d4b0
--- /dev/null
+++ b/Documentation/laptops/dslm.c
@@ -0,0 +1,166 @@
+/*
+ * dslm.c
+ * Simple Disk Sleep Monitor
+ * by Bartek Kania
+ * Licensed under the GPL
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/hdreg.h>
+
+#ifdef DEBUG
+#define D(x) x
+#else
+#define D(x)
+#endif
+
+int endit = 0;
+
+/* Check if the disk is in powersave-mode
+ * Most of the code is stolen from hdparm.
+ * 1 = active, 0 = standby/sleep, -1 = unknown */
+static int check_powermode(int fd)
+{
+ unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0};
+ int state;
+
+ if (ioctl(fd, HDIO_DRIVE_CMD, &args)
+ && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */
+ && ioctl(fd, HDIO_DRIVE_CMD, &args)) {
+ if (errno != EIO || args[0] != 0 || args[1] != 0) {
+ state = -1; /* "unknown"; */
+ } else
+ state = 0; /* "sleeping"; */
+ } else {
+ state = (args[2] == 255) ? 1 : 0;
+ }
+ D(printf(" drive state is: %d\n", state));
+
+ return state;
+}
+
+static char *state_name(int i)
+{
+ if (i == -1) return "unknown";
+ if (i == 0) return "sleeping";
+ if (i == 1) return "active";
+
+ return "internal error";
+}
+
+static char *myctime(time_t time)
+{
+ char *ts = ctime(&time);
+ ts[strlen(ts) - 1] = 0;
+
+ return ts;
+}
+
+static void measure(int fd)
+{
+ time_t start_time;
+ int last_state;
+ time_t last_time;
+ int curr_state;
+ time_t curr_time = 0;
+ time_t time_diff;
+ time_t active_time = 0;
+ time_t sleep_time = 0;
+ time_t unknown_time = 0;
+ time_t total_time = 0;
+ int changes = 0;
+ float tmp;
+
+ printf("Starting measurements\n");
+
+ last_state = check_powermode(fd);
+ start_time = last_time = time(0);
+ printf(" System is in state %s\n\n", state_name(last_state));
+
+ while(!endit) {
+ sleep(1);
+ curr_state = check_powermode(fd);
+
+ if (curr_state != last_state || endit) {
+ changes++;
+ curr_time = time(0);
+ time_diff = curr_time - last_time;
+
+ if (last_state == 1) active_time += time_diff;
+ else if (last_state == 0) sleep_time += time_diff;
+ else unknown_time += time_diff;
+
+ last_state = curr_state;
+ last_time = curr_time;
+
+ printf("%s: State-change to %s\n", myctime(curr_time),
+ state_name(curr_state));
+ }
+ }
+ changes--; /* Compensate for SIGINT */
+
+ total_time = time(0) - start_time;
+ printf("\nTotal running time: %lus\n", curr_time - start_time);
+ printf(" State changed %d times\n", changes);
+
+ tmp = (float)sleep_time / (float)total_time * 100;
+ printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp);
+ tmp = (float)active_time / (float)total_time * 100;
+ printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp);
+ tmp = (float)unknown_time / (float)total_time * 100;
+ printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp);
+}
+
+static void ender(int s)
+{
+ endit = 1;
+}
+
+static void usage(void)
+{
+ puts("usage: dslm [-w <time>] <disk>");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int fd;
+ char *disk = 0;
+ int settle_time = 60;
+
+ /* Parse the simple command-line */
+ if (argc == 2)
+ disk = argv[1];
+ else if (argc == 4) {
+ settle_time = atoi(argv[2]);
+ disk = argv[3];
+ } else
+ usage();
+
+ if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) {
+ printf("Can't open %s, because: %s\n", disk, strerror(errno));
+ exit(-1);
+ }
+
+ if (settle_time) {
+ printf("Waiting %d seconds for the system to settle down to "
+ "'normal'\n", settle_time);
+ sleep(settle_time);
+ } else
+ puts("Not waiting for system to settle down");
+
+ signal(SIGINT, ender);
+
+ measure(fd);
+
+ close(fd);
+
+ return 0;
+}
diff --git a/Documentation/laptops/freefall.c b/Documentation/laptops/freefall.c
new file mode 100644
index 000000000..5e44b20b1
--- /dev/null
+++ b/Documentation/laptops/freefall.c
@@ -0,0 +1,174 @@
+/* Disk protection for HP/DELL machines.
+ *
+ * Copyright 2008 Eric Piel
+ * Copyright 2009 Pavel Machek <pavel@ucw.cz>
+ * Copyright 2012 Sonal Santan
+ * Copyright 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ * GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <syslog.h>
+
+static int noled;
+static char unload_heads_path[64];
+static char device_path[32];
+static const char app_name[] = "FREE FALL";
+
+static int set_unload_heads_path(char *device)
+{
+ if (strlen(device) <= 5 || strncmp(device, "/dev/", 5) != 0)
+ return -EINVAL;
+ strncpy(device_path, device, sizeof(device_path) - 1);
+
+ snprintf(unload_heads_path, sizeof(unload_heads_path) - 1,
+ "/sys/block/%s/device/unload_heads", device+5);
+ return 0;
+}
+
+static int valid_disk(void)
+{
+ int fd = open(unload_heads_path, O_RDONLY);
+
+ if (fd < 0) {
+ perror(unload_heads_path);
+ return 0;
+ }
+
+ close(fd);
+ return 1;
+}
+
+static void write_int(char *path, int i)
+{
+ char buf[1024];
+ int fd = open(path, O_RDWR);
+
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ sprintf(buf, "%d", i);
+
+ if (write(fd, buf, strlen(buf)) != strlen(buf)) {
+ perror("write");
+ exit(1);
+ }
+
+ close(fd);
+}
+
+static void set_led(int on)
+{
+ if (noled)
+ return;
+ write_int("/sys/class/leds/hp::hddprotect/brightness", on);
+}
+
+static void protect(int seconds)
+{
+ const char *str = (seconds == 0) ? "Unparked" : "Parked";
+
+ write_int(unload_heads_path, seconds*1000);
+ syslog(LOG_INFO, "%s %s disk head\n", str, device_path);
+}
+
+static int on_ac(void)
+{
+ /* /sys/class/power_supply/AC0/online */
+ return 1;
+}
+
+static int lid_open(void)
+{
+ /* /proc/acpi/button/lid/LID/state */
+ return 1;
+}
+
+static void ignore_me(int signum)
+{
+ protect(0);
+ set_led(0);
+}
+
+int main(int argc, char **argv)
+{
+ int fd, ret;
+ struct stat st;
+ struct sched_param param;
+
+ if (argc == 1)
+ ret = set_unload_heads_path("/dev/sda");
+ else if (argc == 2)
+ ret = set_unload_heads_path(argv[1]);
+ else
+ ret = -EINVAL;
+
+ if (ret || !valid_disk()) {
+ fprintf(stderr, "usage: %s <device> (default: /dev/sda)\n",
+ argv[0]);
+ exit(1);
+ }
+
+ fd = open("/dev/freefall", O_RDONLY);
+ if (fd < 0) {
+ perror("/dev/freefall");
+ return EXIT_FAILURE;
+ }
+
+ if (stat("/sys/class/leds/hp::hddprotect/brightness", &st))
+ noled = 1;
+
+ if (daemon(0, 0) != 0) {
+ perror("daemon");
+ return EXIT_FAILURE;
+ }
+
+ openlog(app_name, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1);
+
+ param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+ sched_setscheduler(0, SCHED_FIFO, &param);
+ mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ signal(SIGALRM, ignore_me);
+
+ for (;;) {
+ unsigned char count;
+
+ ret = read(fd, &count, sizeof(count));
+ alarm(0);
+ if ((ret == -1) && (errno == EINTR)) {
+ /* Alarm expired, time to unpark the heads */
+ continue;
+ }
+
+ if (ret != sizeof(count)) {
+ perror("read");
+ break;
+ }
+
+ protect(21);
+ set_led(1);
+ if (1 || on_ac() || lid_open())
+ alarm(2);
+ else
+ alarm(20);
+ }
+
+ closelog();
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/Documentation/laptops/laptop-mode.txt b/Documentation/laptops/laptop-mode.txt
new file mode 100644
index 000000000..4ebbfc3f1
--- /dev/null
+++ b/Documentation/laptops/laptop-mode.txt
@@ -0,0 +1,782 @@
+How to conserve battery power using laptop-mode
+-----------------------------------------------
+
+Document Author: Bart Samwel (bart@samwel.tk)
+Date created: January 2, 2004
+Last modified: December 06, 2004
+
+Introduction
+------------
+
+Laptop mode is used to minimize the time that the hard disk needs to be spun up,
+to conserve battery power on laptops. It has been reported to cause significant
+power savings.
+
+Contents
+--------
+
+* Introduction
+* Installation
+* Caveats
+* The Details
+* Tips & Tricks
+* Control script
+* ACPI integration
+* Monitoring tool
+
+
+Installation
+------------
+
+To use laptop mode, you don't need to set any kernel configuration options
+or anything. Simply install all the files included in this document, and
+laptop mode will automatically be started when you're on battery. For
+your convenience, a tarball containing an installer can be downloaded at:
+
+http://www.samwel.tk/laptop_mode/laptop_mode/
+
+To configure laptop mode, you need to edit the configuration file, which is
+located in /etc/default/laptop-mode on Debian-based systems, or in
+/etc/sysconfig/laptop-mode on other systems.
+
+Unfortunately, automatic enabling of laptop mode does not work for
+laptops that don't have ACPI. On those laptops, you need to start laptop
+mode manually. To start laptop mode, run "laptop_mode start", and to
+stop it, run "laptop_mode stop". (Note: The laptop mode tools package now
+has experimental support for APM, you might want to try that first.)
+
+
+Caveats
+-------
+
+* The downside of laptop mode is that you have a chance of losing up to 10
+ minutes of work. If you cannot afford this, don't use it! The supplied ACPI
+ scripts automatically turn off laptop mode when the battery almost runs out,
+ so that you won't lose any data at the end of your battery life.
+
+* Most desktop hard drives have a very limited lifetime measured in spindown
+ cycles, typically about 50.000 times (it's usually listed on the spec sheet).
+ Check your drive's rating, and don't wear down your drive's lifetime if you
+ don't need to.
+
+* If you mount some of your ext3/reiserfs filesystems with the -n option, then
+ the control script will not be able to remount them correctly. You must set
+ DO_REMOUNTS=0 in the control script, otherwise it will remount them with the
+ wrong options -- or it will fail because it cannot write to /etc/mtab.
+
+* If you have your filesystems listed as type "auto" in fstab, like I did, then
+ the control script will not recognize them as filesystems that need remounting.
+ You must list the filesystems with their true type instead.
+
+* It has been reported that some versions of the mutt mail client use file access
+ times to determine whether a folder contains new mail. If you use mutt and
+ experience this, you must disable the noatime remounting by setting the option
+ DO_REMOUNT_NOATIME to 0 in the configuration file.
+
+
+The Details
+-----------
+
+Laptop mode is controlled by the knob /proc/sys/vm/laptop_mode. This knob is
+present for all kernels that have the laptop mode patch, regardless of any
+configuration options. When the knob is set, any physical disk I/O (that might
+have caused the hard disk to spin up) causes Linux to flush all dirty blocks. The
+result of this is that after a disk has spun down, it will not be spun up
+anymore to write dirty blocks, because those blocks had already been written
+immediately after the most recent read operation. The value of the laptop_mode
+knob determines the time between the occurrence of disk I/O and when the flush
+is triggered. A sensible value for the knob is 5 seconds. Setting the knob to
+0 disables laptop mode.
+
+To increase the effectiveness of the laptop_mode strategy, the laptop_mode
+control script increases dirty_expire_centisecs and dirty_writeback_centisecs in
+/proc/sys/vm to about 10 minutes (by default), which means that pages that are
+dirtied are not forced to be written to disk as often. The control script also
+changes the dirty background ratio, so that background writeback of dirty pages
+is not done anymore. Combined with a higher commit value (also 10 minutes) for
+ext3 or ReiserFS filesystems (also done automatically by the control script),
+this results in concentration of disk activity in a small time interval which
+occurs only once every 10 minutes, or whenever the disk is forced to spin up by
+a cache miss. The disk can then be spun down in the periods of inactivity.
+
+If you want to find out which process caused the disk to spin up, you can
+gather information by setting the flag /proc/sys/vm/block_dump. When this flag
+is set, Linux reports all disk read and write operations that take place, and
+all block dirtyings done to files. This makes it possible to debug why a disk
+needs to spin up, and to increase battery life even more. The output of
+block_dump is written to the kernel output, and it can be retrieved using
+"dmesg". When you use block_dump and your kernel logging level also includes
+kernel debugging messages, you probably want to turn off klogd, otherwise
+the output of block_dump will be logged, causing disk activity that is not
+normally there.
+
+
+Configuration
+-------------
+
+The laptop mode configuration file is located in /etc/default/laptop-mode on
+Debian-based systems, or in /etc/sysconfig/laptop-mode on other systems. It
+contains the following options:
+
+MAX_AGE:
+
+Maximum time, in seconds, of hard drive spindown time that you are
+comfortable with. Worst case, it's possible that you could lose this
+amount of work if your battery fails while you're in laptop mode.
+
+MINIMUM_BATTERY_MINUTES:
+
+Automatically disable laptop mode if the remaining number of minutes of
+battery power is less than this value. Default is 10 minutes.
+
+AC_HD/BATT_HD:
+
+The idle timeout that should be set on your hard drive when laptop mode
+is active (BATT_HD) and when it is not active (AC_HD). The defaults are
+20 seconds (value 4) for BATT_HD and 2 hours (value 244) for AC_HD. The
+possible values are those listed in the manual page for "hdparm" for the
+"-S" option.
+
+HD:
+
+The devices for which the spindown timeout should be adjusted by laptop mode.
+Default is /dev/hda. If you specify multiple devices, separate them by a space.
+
+READAHEAD:
+
+Disk readahead, in 512-byte sectors, while laptop mode is active. A large
+readahead can prevent disk accesses for things like executable pages (which are
+loaded on demand while the application executes) and sequentially accessed data
+(MP3s).
+
+DO_REMOUNTS:
+
+The control script automatically remounts any mounted journaled filesystems
+with appropriate commit interval options. When this option is set to 0, this
+feature is disabled.
+
+DO_REMOUNT_NOATIME:
+
+When remounting, should the filesystems be remounted with the noatime option?
+Normally, this is set to "1" (enabled), but there may be programs that require
+access time recording.
+
+DIRTY_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+before a writeback is forced, while laptop mode is active. Corresponds to
+the /proc/sys/vm/dirty_ratio sysctl.
+
+DIRTY_BACKGROUND_RATIO:
+
+The percentage of memory that is allowed to contain "dirty" or unsaved data
+after a forced writeback is done due to an exceeding of DIRTY_RATIO. Set
+this nice and low. This corresponds to the /proc/sys/vm/dirty_background_ratio
+sysctl.
+
+Note that the behaviour of dirty_background_ratio is quite different
+when laptop mode is active and when it isn't. When laptop mode is inactive,
+dirty_background_ratio is the threshold percentage at which background writeouts
+start taking place. When laptop mode is active, however, background writeouts
+are disabled, and the dirty_background_ratio only determines how much writeback
+is done when dirty_ratio is reached.
+
+DO_CPU:
+
+Enable CPU frequency scaling when in laptop mode. (Requires CPUFreq to be setup.
+See Documentation/cpu-freq/user-guide.txt for more info. Disabled by default.)
+
+CPU_MAXFREQ:
+
+When on battery, what is the maximum CPU speed that the system should use? Legal
+values are "slowest" for the slowest speed that your CPU is able to operate at,
+or a value listed in /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies.
+
+
+Tips & Tricks
+-------------
+
+* Bartek Kania reports getting up to 50 minutes of extra battery life (on top
+ of his regular 3 to 3.5 hours) using a spindown time of 5 seconds (BATT_HD=1).
+
+* You can spin down the disk while playing MP3, by setting disk readahead
+ to 8MB (READAHEAD=16384). Effectively, the disk will read a complete MP3 at
+ once, and will then spin down while the MP3 is playing. (Thanks to Bartek
+ Kania.)
+
+* Drew Scott Daniels observed: "I don't know why, but when I decrease the number
+ of colours that my display uses it consumes less battery power. I've seen
+ this on powerbooks too. I hope that this is a piece of information that
+ might be useful to the Laptop Mode patch or its users."
+
+* In syslog.conf, you can prefix entries with a dash ``-'' to omit syncing the
+ file after every logging. When you're using laptop-mode and your disk doesn't
+ spin down, this is a likely culprit.
+
+* Richard Atterer observed that laptop mode does not work well with noflushd
+ (http://noflushd.sourceforge.net/), it seems that noflushd prevents laptop-mode
+ from doing its thing.
+
+* If you're worried about your data, you might want to consider using a USB
+ memory stick or something like that as a "working area". (Be aware though
+ that flash memory can only handle a limited number of writes, and overuse
+ may wear out your memory stick pretty quickly. Do _not_ use journalling
+ filesystems on flash memory sticks.)
+
+
+Configuration file for control and ACPI battery scripts
+-------------------------------------------------------
+
+This allows the tunables to be changed for the scripts via an external
+configuration file
+
+It should be installed as /etc/default/laptop-mode on Debian, and as
+/etc/sysconfig/laptop-mode on Red Hat, SUSE, Mandrake, and other work-alikes.
+
+--------------------CONFIG FILE BEGIN-------------------------------------------
+# Maximum time, in seconds, of hard drive spindown time that you are
+# comfortable with. Worst case, it's possible that you could lose this
+# amount of work if your battery fails you while in laptop mode.
+#MAX_AGE=600
+
+# Automatically disable laptop mode when the number of minutes of battery
+# that you have left goes below this threshold.
+MINIMUM_BATTERY_MINUTES=10
+
+# Read-ahead, in 512-byte sectors. You can spin down the disk while playing MP3/OGG
+# by setting the disk readahead to 8MB (READAHEAD=16384). Effectively, the disk
+# will read a complete MP3 at once, and will then spin down while the MP3/OGG is
+# playing.
+#READAHEAD=4096
+
+# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+#DO_REMOUNTS=1
+
+# And shall we add the "noatime" option to that as well? (1=yes)
+#DO_REMOUNT_NOATIME=1
+
+# Dirty synchronous ratio. At this percentage of dirty pages the process
+# which
+# calls write() does its own writeback
+#DIRTY_RATIO=40
+
+#
+# Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
+# exceeded, the kernel will wake flusher threads which will then reduce the
+# amount of dirty memory to dirty_background_ratio. Set this nice and low,
+# so once some writeout has commenced, we do a lot of it.
+#
+#DIRTY_BACKGROUND_RATIO=5
+
+# kernel default dirty buffer age
+#DEF_AGE=30
+#DEF_UPDATE=5
+#DEF_DIRTY_BACKGROUND_RATIO=10
+#DEF_DIRTY_RATIO=40
+#DEF_XFS_AGE_BUFFER=15
+#DEF_XFS_SYNC_INTERVAL=30
+#DEF_XFS_BUFD_INTERVAL=1
+
+# This must be adjusted manually to the value of HZ in the running kernel
+# on 2.4, until the XFS people change their 2.4 external interfaces to work in
+# centisecs. This can be automated, but it's a work in progress that still
+# needs# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for
+# external interfaces, and that is currently always set to 100. So you don't
+# need to change this on 2.6.
+#XFS_HZ=100
+
+# Should the maximum CPU frequency be adjusted down while on battery?
+# Requires CPUFreq to be setup.
+# See Documentation/cpu-freq/user-guide.txt for more info
+#DO_CPU=0
+
+# When on battery what is the maximum CPU speed that the system should
+# use? Legal values are "slowest" for the slowest speed that your
+# CPU is able to operate at, or a value listed in:
+# /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies
+# Only applicable if DO_CPU=1.
+#CPU_MAXFREQ=slowest
+
+# Idle timeout for your hard drive (man hdparm for valid values, -S option)
+# Default is 2 hours on AC (AC_HD=244) and 20 seconds for battery (BATT_HD=4).
+#AC_HD=244
+#BATT_HD=4
+
+# The drives for which to adjust the idle timeout. Separate them by a space,
+# e.g. HD="/dev/hda /dev/hdb".
+#HD="/dev/hda"
+
+# Set the spindown timeout on a hard drive?
+#DO_HD=1
+
+--------------------CONFIG FILE END---------------------------------------------
+
+
+Control script
+--------------
+
+Please note that this control script works for the Linux 2.4 and 2.6 series (thanks
+to Kiko Piris).
+
+--------------------CONTROL SCRIPT BEGIN----------------------------------------
+#!/bin/bash
+
+# start or stop laptop_mode, best run by a power management daemon when
+# ac gets connected/disconnected from a laptop
+#
+# install as /sbin/laptop_mode
+#
+# Contributors to this script: Kiko Piris
+# Bart Samwel
+# Micha Feigin
+# Andrew Morton
+# Herve Eychenne
+# Dax Kelson
+#
+# Original Linux 2.4 version by: Jens Axboe
+
+#############################################################################
+
+# Source config
+if [ -f /etc/default/laptop-mode ] ; then
+ # Debian
+ . /etc/default/laptop-mode
+elif [ -f /etc/sysconfig/laptop-mode ] ; then
+ # Others
+ . /etc/sysconfig/laptop-mode
+fi
+
+# Don't raise an error if the config file is incomplete
+# set defaults instead:
+
+# Maximum time, in seconds, of hard drive spindown time that you are
+# comfortable with. Worst case, it's possible that you could lose this
+# amount of work if your battery fails you while in laptop mode.
+MAX_AGE=${MAX_AGE:-'600'}
+
+# Read-ahead, in kilobytes
+READAHEAD=${READAHEAD:-'4096'}
+
+# Shall we remount journaled fs. with appropriate commit interval? (1=yes)
+DO_REMOUNTS=${DO_REMOUNTS:-'1'}
+
+# And shall we add the "noatime" option to that as well? (1=yes)
+DO_REMOUNT_NOATIME=${DO_REMOUNT_NOATIME:-'1'}
+
+# Shall we adjust the idle timeout on a hard drive?
+DO_HD=${DO_HD:-'1'}
+
+# Adjust idle timeout on which hard drive?
+HD="${HD:-'/dev/hda'}"
+
+# spindown time for HD (hdparm -S values)
+AC_HD=${AC_HD:-'244'}
+BATT_HD=${BATT_HD:-'4'}
+
+# Dirty synchronous ratio. At this percentage of dirty pages the process which
+# calls write() does its own writeback
+DIRTY_RATIO=${DIRTY_RATIO:-'40'}
+
+# cpu frequency scaling
+# See Documentation/cpu-freq/user-guide.txt for more info
+DO_CPU=${CPU_MANAGE:-'0'}
+CPU_MAXFREQ=${CPU_MAXFREQ:-'slowest'}
+
+#
+# Allowed dirty background ratio, in percent. Once DIRTY_RATIO has been
+# exceeded, the kernel will wake flusher threads which will then reduce the
+# amount of dirty memory to dirty_background_ratio. Set this nice and low,
+# so once some writeout has commenced, we do a lot of it.
+#
+DIRTY_BACKGROUND_RATIO=${DIRTY_BACKGROUND_RATIO:-'5'}
+
+# kernel default dirty buffer age
+DEF_AGE=${DEF_AGE:-'30'}
+DEF_UPDATE=${DEF_UPDATE:-'5'}
+DEF_DIRTY_BACKGROUND_RATIO=${DEF_DIRTY_BACKGROUND_RATIO:-'10'}
+DEF_DIRTY_RATIO=${DEF_DIRTY_RATIO:-'40'}
+DEF_XFS_AGE_BUFFER=${DEF_XFS_AGE_BUFFER:-'15'}
+DEF_XFS_SYNC_INTERVAL=${DEF_XFS_SYNC_INTERVAL:-'30'}
+DEF_XFS_BUFD_INTERVAL=${DEF_XFS_BUFD_INTERVAL:-'1'}
+
+# This must be adjusted manually to the value of HZ in the running kernel
+# on 2.4, until the XFS people change their 2.4 external interfaces to work in
+# centisecs. This can be automated, but it's a work in progress that still needs
+# some fixes. On 2.6 kernels, XFS uses USER_HZ instead of HZ for external
+# interfaces, and that is currently always set to 100. So you don't need to
+# change this on 2.6.
+XFS_HZ=${XFS_HZ:-'100'}
+
+#############################################################################
+
+KLEVEL="$(uname -r |
+ {
+ IFS='.' read a b c
+ echo $a.$b
+ }
+)"
+case "$KLEVEL" in
+ "2.4"|"2.6")
+ ;;
+ *)
+ echo "Unhandled kernel version: $KLEVEL ('uname -r' = '$(uname -r)')" >&2
+ exit 1
+ ;;
+esac
+
+if [ ! -e /proc/sys/vm/laptop_mode ] ; then
+ echo "Kernel is not patched with laptop_mode patch." >&2
+ exit 1
+fi
+
+if [ ! -w /proc/sys/vm/laptop_mode ] ; then
+ echo "You do not have enough privileges to enable laptop_mode." >&2
+ exit 1
+fi
+
+# Remove an option (the first parameter) of the form option=<number> from
+# a mount options string (the rest of the parameters).
+parse_mount_opts () {
+ OPT="$1"
+ shift
+ echo ",$*," | sed \
+ -e 's/,'"$OPT"'=[0-9]*,/,/g' \
+ -e 's/,,*/,/g' \
+ -e 's/^,//' \
+ -e 's/,$//'
+}
+
+# Remove an option (the first parameter) without any arguments from
+# a mount option string (the rest of the parameters).
+parse_nonumber_mount_opts () {
+ OPT="$1"
+ shift
+ echo ",$*," | sed \
+ -e 's/,'"$OPT"',/,/g' \
+ -e 's/,,*/,/g' \
+ -e 's/^,//' \
+ -e 's/,$//'
+}
+
+# Find out the state of a yes/no option (e.g. "atime"/"noatime") in
+# fstab for a given filesystem, and use this state to replace the
+# value of the option in another mount options string. The device
+# is the first argument, the option name the second, and the default
+# value the third. The remainder is the mount options string.
+#
+# Example:
+# parse_yesno_opts_wfstab /dev/hda1 atime atime defaults,noatime
+#
+# If fstab contains, say, "rw" for this filesystem, then the result
+# will be "defaults,atime".
+parse_yesno_opts_wfstab () {
+ L_DEV="$1"
+ OPT="$2"
+ DEF_OPT="$3"
+ shift 3
+ L_OPTS="$*"
+ PARSEDOPTS1="$(parse_nonumber_mount_opts $OPT $L_OPTS)"
+ PARSEDOPTS1="$(parse_nonumber_mount_opts no$OPT $PARSEDOPTS1)"
+ # Watch for a default atime in fstab
+ FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+ if echo "$FSTAB_OPTS" | grep "$OPT" > /dev/null ; then
+ # option specified in fstab: extract the value and use it
+ if echo "$FSTAB_OPTS" | grep "no$OPT" > /dev/null ; then
+ echo "$PARSEDOPTS1,no$OPT"
+ else
+ # no$OPT not found -- so we must have $OPT.
+ echo "$PARSEDOPTS1,$OPT"
+ fi
+ else
+ # option not specified in fstab -- choose the default.
+ echo "$PARSEDOPTS1,$DEF_OPT"
+ fi
+}
+
+# Find out the state of a numbered option (e.g. "commit=NNN") in
+# fstab for a given filesystem, and use this state to replace the
+# value of the option in another mount options string. The device
+# is the first argument, and the option name the second. The
+# remainder is the mount options string in which the replacement
+# must be done.
+#
+# Example:
+# parse_mount_opts_wfstab /dev/hda1 commit defaults,commit=7
+#
+# If fstab contains, say, "commit=3,rw" for this filesystem, then the
+# result will be "rw,commit=3".
+parse_mount_opts_wfstab () {
+ L_DEV="$1"
+ OPT="$2"
+ shift 2
+ L_OPTS="$*"
+ PARSEDOPTS1="$(parse_mount_opts $OPT $L_OPTS)"
+ # Watch for a default commit in fstab
+ FSTAB_OPTS="$(awk '$1 == "'$L_DEV'" { print $4 }' /etc/fstab)"
+ if echo "$FSTAB_OPTS" | grep "$OPT=" > /dev/null ; then
+ # option specified in fstab: extract the value, and use it
+ echo -n "$PARSEDOPTS1,$OPT="
+ echo ",$FSTAB_OPTS," | sed \
+ -e 's/.*,'"$OPT"'=//' \
+ -e 's/,.*//'
+ else
+ # option not specified in fstab: set it to 0
+ echo "$PARSEDOPTS1,$OPT=0"
+ fi
+}
+
+deduce_fstype () {
+ MP="$1"
+ # My root filesystem unfortunately has
+ # type "unknown" in /etc/mtab. If we encounter
+ # "unknown", we try to get the type from fstab.
+ cat /etc/fstab |
+ grep -v '^#' |
+ while read FSTAB_DEV FSTAB_MP FSTAB_FST FSTAB_OPTS FSTAB_DUMP FSTAB_DUMP ; do
+ if [ "$FSTAB_MP" = "$MP" ]; then
+ echo $FSTAB_FST
+ exit 0
+ fi
+ done
+}
+
+if [ $DO_REMOUNT_NOATIME -eq 1 ] ; then
+ NOATIME_OPT=",noatime"
+fi
+
+case "$1" in
+ start)
+ AGE=$((100*$MAX_AGE))
+ XFS_AGE=$(($XFS_HZ*$MAX_AGE))
+ echo -n "Starting laptop_mode"
+
+ if [ -d /proc/sys/vm/pagebuf ] ; then
+ # (For 2.4 and early 2.6.)
+ # This only needs to be set, not reset -- it is only used when
+ # laptop mode is enabled.
+ echo $XFS_AGE > /proc/sys/vm/pagebuf/lm_flush_age
+ echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+ elif [ -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+ # (A couple of early 2.6 laptop mode patches had these.)
+ # The same goes for these.
+ echo $XFS_AGE > /proc/sys/fs/xfs/lm_age_buffer
+ echo $XFS_AGE > /proc/sys/fs/xfs/lm_sync_interval
+ elif [ -f /proc/sys/fs/xfs/age_buffer ] ; then
+ # (2.6.6)
+ # But not for these -- they are also used in normal
+ # operation.
+ echo $XFS_AGE > /proc/sys/fs/xfs/age_buffer
+ echo $XFS_AGE > /proc/sys/fs/xfs/sync_interval
+ elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+ # (2.6.7 upwards)
+ # And not for these either. These are in centisecs,
+ # not USER_HZ, so we have to use $AGE, not $XFS_AGE.
+ echo $AGE > /proc/sys/fs/xfs/age_buffer_centisecs
+ echo $AGE > /proc/sys/fs/xfs/xfssyncd_centisecs
+ echo 3000 > /proc/sys/fs/xfs/xfsbufd_centisecs
+ fi
+
+ case "$KLEVEL" in
+ "2.4")
+ echo 1 > /proc/sys/vm/laptop_mode
+ echo "30 500 0 0 $AGE $AGE 60 20 0" > /proc/sys/vm/bdflush
+ ;;
+ "2.6")
+ echo 5 > /proc/sys/vm/laptop_mode
+ echo "$AGE" > /proc/sys/vm/dirty_writeback_centisecs
+ echo "$AGE" > /proc/sys/vm/dirty_expire_centisecs
+ echo "$DIRTY_RATIO" > /proc/sys/vm/dirty_ratio
+ echo "$DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio
+ ;;
+ esac
+ if [ $DO_REMOUNTS -eq 1 ]; then
+ cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+ PARSEDOPTS="$(parse_mount_opts "$OPTS")"
+ if [ "$FST" = 'unknown' ]; then
+ FST=$(deduce_fstype $MP)
+ fi
+ case "$FST" in
+ "ext3"|"reiserfs")
+ PARSEDOPTS="$(parse_mount_opts commit "$OPTS")"
+ mount $DEV -t $FST $MP -o remount,$PARSEDOPTS,commit=$MAX_AGE$NOATIME_OPT
+ ;;
+ "xfs")
+ mount $DEV -t $FST $MP -o remount,$OPTS$NOATIME_OPT
+ ;;
+ esac
+ if [ -b $DEV ] ; then
+ blockdev --setra $(($READAHEAD * 2)) $DEV
+ fi
+ done
+ fi
+ if [ $DO_HD -eq 1 ] ; then
+ for THISHD in $HD ; do
+ /sbin/hdparm -S $BATT_HD $THISHD > /dev/null 2>&1
+ /sbin/hdparm -B 1 $THISHD > /dev/null 2>&1
+ done
+ fi
+ if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+ if [ $CPU_MAXFREQ = 'slowest' ]; then
+ CPU_MAXFREQ=`cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq`
+ fi
+ echo $CPU_MAXFREQ > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+ fi
+ echo "."
+ ;;
+ stop)
+ U_AGE=$((100*$DEF_UPDATE))
+ B_AGE=$((100*$DEF_AGE))
+ echo -n "Stopping laptop_mode"
+ echo 0 > /proc/sys/vm/laptop_mode
+ if [ -f /proc/sys/fs/xfs/age_buffer -a ! -f /proc/sys/fs/xfs/lm_age_buffer ] ; then
+ # These need to be restored, if there are no lm_*.
+ echo $(($XFS_HZ*$DEF_XFS_AGE_BUFFER)) > /proc/sys/fs/xfs/age_buffer
+ echo $(($XFS_HZ*$DEF_XFS_SYNC_INTERVAL)) > /proc/sys/fs/xfs/sync_interval
+ elif [ -f /proc/sys/fs/xfs/age_buffer_centisecs ] ; then
+ # These need to be restored as well.
+ echo $((100*$DEF_XFS_AGE_BUFFER)) > /proc/sys/fs/xfs/age_buffer_centisecs
+ echo $((100*$DEF_XFS_SYNC_INTERVAL)) > /proc/sys/fs/xfs/xfssyncd_centisecs
+ echo $((100*$DEF_XFS_BUFD_INTERVAL)) > /proc/sys/fs/xfs/xfsbufd_centisecs
+ fi
+ case "$KLEVEL" in
+ "2.4")
+ echo "30 500 0 0 $U_AGE $B_AGE 60 20 0" > /proc/sys/vm/bdflush
+ ;;
+ "2.6")
+ echo "$U_AGE" > /proc/sys/vm/dirty_writeback_centisecs
+ echo "$B_AGE" > /proc/sys/vm/dirty_expire_centisecs
+ echo "$DEF_DIRTY_RATIO" > /proc/sys/vm/dirty_ratio
+ echo "$DEF_DIRTY_BACKGROUND_RATIO" > /proc/sys/vm/dirty_background_ratio
+ ;;
+ esac
+ if [ $DO_REMOUNTS -eq 1 ] ; then
+ cat /etc/mtab | while read DEV MP FST OPTS DUMP PASS ; do
+ # Reset commit and atime options to defaults.
+ if [ "$FST" = 'unknown' ]; then
+ FST=$(deduce_fstype $MP)
+ fi
+ case "$FST" in
+ "ext3"|"reiserfs")
+ PARSEDOPTS="$(parse_mount_opts_wfstab $DEV commit $OPTS)"
+ PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $PARSEDOPTS)"
+ mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+ ;;
+ "xfs")
+ PARSEDOPTS="$(parse_yesno_opts_wfstab $DEV atime atime $OPTS)"
+ mount $DEV -t $FST $MP -o remount,$PARSEDOPTS
+ ;;
+ esac
+ if [ -b $DEV ] ; then
+ blockdev --setra 256 $DEV
+ fi
+ done
+ fi
+ if [ $DO_HD -eq 1 ] ; then
+ for THISHD in $HD ; do
+ /sbin/hdparm -S $AC_HD $THISHD > /dev/null 2>&1
+ /sbin/hdparm -B 255 $THISHD > /dev/null 2>&1
+ done
+ fi
+ if [ $DO_CPU -eq 1 -a -e /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_min_freq ]; then
+ echo `cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq` > /sys/devices/system/cpu/cpu0/cpufreq/scaling_max_freq
+ fi
+ echo "."
+ ;;
+ *)
+ echo "Usage: $0 {start|stop}" 2>&1
+ exit 1
+ ;;
+
+esac
+
+exit 0
+--------------------CONTROL SCRIPT END------------------------------------------
+
+
+ACPI integration
+----------------
+
+Dax Kelson submitted this so that the ACPI acpid daemon will
+kick off the laptop_mode script and run hdparm. The part that
+automatically disables laptop mode when the battery is low was
+written by Jan Topinski.
+
+-----------------/etc/acpi/events/ac_adapter BEGIN------------------------------
+event=ac_adapter
+action=/etc/acpi/actions/ac.sh %e
+----------------/etc/acpi/events/ac_adapter END---------------------------------
+
+
+-----------------/etc/acpi/events/battery BEGIN---------------------------------
+event=battery.*
+action=/etc/acpi/actions/battery.sh %e
+----------------/etc/acpi/events/battery END------------------------------------
+
+
+----------------/etc/acpi/actions/ac.sh BEGIN-----------------------------------
+#!/bin/bash
+
+# ac on/offline event handler
+
+status=`awk '/^state: / { print $2 }' /proc/acpi/ac_adapter/$2/state`
+
+case $status in
+ "on-line")
+ /sbin/laptop_mode stop
+ exit 0
+ ;;
+ "off-line")
+ /sbin/laptop_mode start
+ exit 0
+ ;;
+esac
+---------------------------/etc/acpi/actions/ac.sh END--------------------------
+
+
+---------------------------/etc/acpi/actions/battery.sh BEGIN-------------------
+#! /bin/bash
+
+# Automatically disable laptop mode when the battery almost runs out.
+
+BATT_INFO=/proc/acpi/battery/$2/state
+
+if [[ -f /proc/sys/vm/laptop_mode ]]
+then
+ LM=`cat /proc/sys/vm/laptop_mode`
+ if [[ $LM -gt 0 ]]
+ then
+ if [[ -f $BATT_INFO ]]
+ then
+ # Source the config file only now that we know we need
+ if [ -f /etc/default/laptop-mode ] ; then
+ # Debian
+ . /etc/default/laptop-mode
+ elif [ -f /etc/sysconfig/laptop-mode ] ; then
+ # Others
+ . /etc/sysconfig/laptop-mode
+ fi
+ MINIMUM_BATTERY_MINUTES=${MINIMUM_BATTERY_MINUTES:-'10'}
+
+ ACTION="`cat $BATT_INFO | grep charging | cut -c 26-`"
+ if [[ ACTION -eq "discharging" ]]
+ then
+ PRESENT_RATE=`cat $BATT_INFO | grep "present rate:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
+ REMAINING=`cat $BATT_INFO | grep "remaining capacity:" | sed "s/.* \([0-9][0-9]* \).*/\1/" `
+ fi
+ if (($REMAINING * 60 / $PRESENT_RATE < $MINIMUM_BATTERY_MINUTES))
+ then
+ /sbin/laptop_mode stop
+ fi
+ else
+ logger -p daemon.warning "You are using laptop mode and your battery interface $BATT_INFO is missing. This may lead to loss of data when the battery runs out. Check kernel ACPI support and /proc/acpi/battery folder, and edit /etc/acpi/battery.sh to set BATT_INFO to the correct path."
+ fi
+ fi
+fi
+---------------------------/etc/acpi/actions/battery.sh END--------------------
+
+
+Monitoring tool
+---------------
+
+Bartek Kania submitted this, it can be used to measure how much time your disk
+spends spun up/down. See Documentation/laptops/dslm.c
diff --git a/Documentation/laptops/sony-laptop.txt b/Documentation/laptops/sony-laptop.txt
new file mode 100644
index 000000000..978b1e615
--- /dev/null
+++ b/Documentation/laptops/sony-laptop.txt
@@ -0,0 +1,144 @@
+Sony Notebook Control Driver (SNC) Readme
+-----------------------------------------
+ Copyright (C) 2004- 2005 Stelian Pop <stelian@popies.net>
+ Copyright (C) 2007 Mattia Dongili <malattia@linux.it>
+
+This mini-driver drives the SNC and SPIC device present in the ACPI BIOS of the
+Sony Vaio laptops. This driver mixes both devices functions under the same
+(hopefully consistent) interface. This also means that the sonypi driver is
+obsoleted by sony-laptop now.
+
+Fn keys (hotkeys):
+------------------
+Some models report hotkeys through the SNC or SPIC devices, such events are
+reported both through the ACPI subsystem as acpi events and through the INPUT
+subsystem. See the logs of /proc/bus/input/devices to find out what those
+events are and which input devices are created by the driver.
+Additionally, loading the driver with the debug option will report all events
+in the kernel log.
+
+The "scancodes" passed to the input system (that can be remapped with udev)
+are indexes to the table "sony_laptop_input_keycode_map" in the sony-laptop.c
+module. For example the "FN/E" key combination (EJECTCD on some models)
+generates the scancode 20 (0x14).
+
+Backlight control:
+------------------
+If your laptop model supports it, you will find sysfs files in the
+/sys/class/backlight/sony/
+directory. You will be able to query and set the current screen
+brightness:
+ brightness get/set screen brightness (an integer
+ between 0 and 7)
+ actual_brightness reading from this file will query the HW
+ to get real brightness value
+ max_brightness the maximum brightness value
+
+
+Platform specific:
+------------------
+Loading the sony-laptop module will create a
+/sys/devices/platform/sony-laptop/
+directory populated with some files.
+
+You then read/write integer values from/to those files by using
+standard UNIX tools.
+
+The files are:
+ brightness_default screen brightness which will be set
+ when the laptop will be rebooted
+ cdpower power on/off the internal CD drive
+ audiopower power on/off the internal sound card
+ lanpower power on/off the internal ethernet card
+ (only in debug mode)
+ bluetoothpower power on/off the internal bluetooth device
+ fanspeed get/set the fan speed
+
+Note that some files may be missing if they are not supported
+by your particular laptop model.
+
+Example usage:
+ # echo "1" > /sys/devices/platform/sony-laptop/brightness_default
+sets the lowest screen brightness for the next and later reboots,
+ # echo "8" > /sys/devices/platform/sony-laptop/brightness_default
+sets the highest screen brightness for the next and later reboots,
+ # cat /sys/devices/platform/sony-laptop/brightness_default
+retrieves the value.
+
+ # echo "0" > /sys/devices/platform/sony-laptop/audiopower
+powers off the sound card,
+ # echo "1" > /sys/devices/platform/sony-laptop/audiopower
+powers on the sound card.
+
+
+RFkill control:
+---------------
+More recent Vaio models expose a consistent set of ACPI methods to
+control radio frequency emitting devices. If you are a lucky owner of
+such a laptop you will find the necessary rfkill devices under
+/sys/class/rfkill. Check those starting with sony-* in
+ # grep . /sys/class/rfkill/*/{state,name}
+
+
+Development:
+------------
+
+If you want to help with the development of this driver (and
+you are not afraid of any side effects doing strange things with
+your ACPI BIOS could have on your laptop), load the driver and
+pass the option 'debug=1'.
+
+REPEAT: DON'T DO THIS IF YOU DON'T LIKE RISKY BUSINESS.
+
+In your kernel logs you will find the list of all ACPI methods
+the SNC device has on your laptop.
+
+* For new models you will see a long list of meaningless method names,
+reading the DSDT table source should reveal that:
+(1) the SNC device uses an internal capability lookup table
+(2) SN00 is used to find values in the lookup table
+(3) SN06 and SN07 are used to call into the real methods based on
+ offsets you can obtain iterating the table using SN00
+(4) SN02 used to enable events.
+Some values in the capability lookup table are more or less known, see
+the code for all sony_call_snc_handle calls, others are more obscure.
+
+* For old models you can see the GCDP/GCDP methods used to pwer on/off
+the CD drive, but there are others and they are usually different from
+model to model.
+
+I HAVE NO IDEA WHAT THOSE METHODS DO.
+
+The sony-laptop driver creates, for some of those methods (the most
+current ones found on several Vaio models), an entry under
+/sys/devices/platform/sony-laptop, just like the 'cdpower' one.
+You can create other entries corresponding to your own laptop methods by
+further editing the source (see the 'sony_nc_values' table, and add a new
+entry to this table with your get/set method names using the
+SNC_HANDLE_NAMES macro).
+
+Your mission, should you accept it, is to try finding out what
+those entries are for, by reading/writing random values from/to those
+files and find out what is the impact on your laptop.
+
+Should you find anything interesting, please report it back to me,
+I will not disavow all knowledge of your actions :)
+
+See also http://www.linux.it/~malattia/wiki/index.php/Sony_drivers for other
+useful info.
+
+Bugs/Limitations:
+-----------------
+
+* This driver is not based on official documentation from Sony
+ (because there is none), so there is no guarantee this driver
+ will work at all, or do the right thing. Although this hasn't
+ happened to me, this driver could do very bad things to your
+ laptop, including permanent damage.
+
+* The sony-laptop and sonypi drivers do not interact at all. In the
+ future, sonypi will be removed and replaced by sony-laptop.
+
+* spicctrl, which is the userspace tool used to communicate with the
+ sonypi driver (through /dev/sonypi) is deprecated as well since all
+ its features are now available under the sysfs tree via sony-laptop.
diff --git a/Documentation/laptops/sonypi.txt b/Documentation/laptops/sonypi.txt
new file mode 100644
index 000000000..606bdb9ce
--- /dev/null
+++ b/Documentation/laptops/sonypi.txt
@@ -0,0 +1,152 @@
+Sony Programmable I/O Control Device Driver Readme
+--------------------------------------------------
+ Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
+ Copyright (C) 2001-2002 Alcôve <www.alcove.com>
+ Copyright (C) 2001 Michael Ashley <m.ashley@unsw.edu.au>
+ Copyright (C) 2001 Junichi Morita <jun1m@mars.dti.ne.jp>
+ Copyright (C) 2000 Takaya Kinjo <t-kinjo@tc4.so-net.ne.jp>
+ Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
+
+This driver enables access to the Sony Programmable I/O Control Device which
+can be found in many Sony Vaio laptops. Some newer Sony laptops (seems to be
+limited to new FX series laptops, at least the FX501 and the FX702) lack a
+sonypi device and are not supported at all by this driver.
+
+It will give access (through a user space utility) to some events those laptops
+generate, like:
+ - jogdial events (the small wheel on the side of Vaios)
+ - capture button events (only on Vaio Picturebook series)
+ - Fn keys
+ - bluetooth button (only on C1VR model)
+ - programmable keys, back, help, zoom, thumbphrase buttons, etc.
+ (when available)
+
+Those events (see linux/sonypi.h) can be polled using the character device node
+/dev/sonypi (major 10, minor auto allocated or specified as a option).
+A simple daemon which translates the jogdial movements into mouse wheel events
+can be downloaded at: <http://popies.net/sonypi/>
+
+Another option to intercept the events is to get them directly through the
+input layer.
+
+This driver supports also some ioctl commands for setting the LCD screen
+brightness and querying the batteries charge information (some more
+commands may be added in the future).
+
+This driver can also be used to set the camera controls on Picturebook series
+(brightness, contrast etc), and is used by the video4linux driver for the
+Motion Eye camera.
+
+Please note that this driver was created by reverse engineering the Windows
+driver and the ACPI BIOS, because Sony doesn't agree to release any programming
+specs for its laptops. If someone convinces them to do so, drop me a note.
+
+Driver options:
+---------------
+
+Several options can be passed to the sonypi driver using the standard
+module argument syntax (<param>=<value> when passing the option to the
+module or sonypi.<param>=<value> on the kernel boot line when sonypi is
+statically linked into the kernel). Those options are:
+
+ minor: minor number of the misc device /dev/sonypi,
+ default is -1 (automatic allocation, see /proc/misc
+ or kernel logs)
+
+ camera: if you have a PictureBook series Vaio (with the
+ integrated MotionEye camera), set this parameter to 1
+ in order to let the driver access to the camera
+
+ fnkeyinit: on some Vaios (C1VE, C1VR etc), the Fn key events don't
+ get enabled unless you set this parameter to 1.
+ Do not use this option unless it's actually necessary,
+ some Vaio models don't deal well with this option.
+ This option is available only if the kernel is
+ compiled without ACPI support (since it conflicts
+ with it and it shouldn't be required anyway if
+ ACPI is already enabled).
+
+ verbose: set to 1 to print unknown events received from the
+ sonypi device.
+ set to 2 to print all events received from the
+ sonypi device.
+
+ compat: uses some compatibility code for enabling the sonypi
+ events. If the driver worked for you in the past
+ (prior to version 1.5) and does not work anymore,
+ add this option and report to the author.
+
+ mask: event mask telling the driver what events will be
+ reported to the user. This parameter is required for
+ some Vaio models where the hardware reuses values
+ used in other Vaio models (like the FX series who does
+ not have a jogdial but reuses the jogdial events for
+ programmable keys events). The default event mask is
+ set to 0xffffffff, meaning that all possible events
+ will be tried. You can use the following bits to
+ construct your own event mask (from
+ drivers/char/sonypi.h):
+ SONYPI_JOGGER_MASK 0x0001
+ SONYPI_CAPTURE_MASK 0x0002
+ SONYPI_FNKEY_MASK 0x0004
+ SONYPI_BLUETOOTH_MASK 0x0008
+ SONYPI_PKEY_MASK 0x0010
+ SONYPI_BACK_MASK 0x0020
+ SONYPI_HELP_MASK 0x0040
+ SONYPI_LID_MASK 0x0080
+ SONYPI_ZOOM_MASK 0x0100
+ SONYPI_THUMBPHRASE_MASK 0x0200
+ SONYPI_MEYE_MASK 0x0400
+ SONYPI_MEMORYSTICK_MASK 0x0800
+ SONYPI_BATTERY_MASK 0x1000
+ SONYPI_WIRELESS_MASK 0x2000
+
+ useinput: if set (which is the default) two input devices are
+ created, one which interprets the jogdial events as
+ mouse events, the other one which acts like a
+ keyboard reporting the pressing of the special keys.
+
+Module use:
+-----------
+
+In order to automatically load the sonypi module on use, you can put those
+lines a configuration file in /etc/modprobe.d/:
+
+ alias char-major-10-250 sonypi
+ options sonypi minor=250
+
+This supposes the use of minor 250 for the sonypi device:
+
+ # mknod /dev/sonypi c 10 250
+
+Bugs:
+-----
+
+ - several users reported that this driver disables the BIOS-managed
+ Fn-keys which put the laptop in sleeping state, or switch the
+ external monitor on/off. There is no workaround yet, since this
+ driver disables all APM management for those keys, by enabling the
+ ACPI management (and the ACPI core stuff is not complete yet). If
+ you have one of those laptops with working Fn keys and want to
+ continue to use them, don't use this driver.
+
+ - some users reported that the laptop speed is lower (dhrystone
+ tested) when using the driver with the fnkeyinit parameter. I cannot
+ reproduce it on my laptop and not all users have this problem.
+ This happens because the fnkeyinit parameter enables the ACPI
+ mode (but without additional ACPI control, like processor
+ speed handling etc). Use ACPI instead of APM if it works on your
+ laptop.
+
+ - sonypi lacks the ability to distinguish between certain key
+ events on some models.
+
+ - some models with the nvidia card (geforce go 6200 tc) uses a
+ different way to adjust the backlighting of the screen. There
+ is a userspace utility to adjust the brightness on those models,
+ which can be downloaded from
+ http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2
+
+ - since all development was done by reverse engineering, there is
+ _absolutely no guarantee_ that this driver will not crash your
+ laptop. Permanently.
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
new file mode 100644
index 000000000..72a150d8f
--- /dev/null
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -0,0 +1,1479 @@
+ ThinkPad ACPI Extras Driver
+
+ Version 0.25
+ October 16th, 2013
+
+ Borislav Deianov <borislav@users.sf.net>
+ Henrique de Moraes Holschuh <hmh@hmh.eng.br>
+ http://ibm-acpi.sf.net/
+
+
+This is a Linux driver for the IBM and Lenovo ThinkPad laptops. It
+supports various features of these laptops which are accessible
+through the ACPI and ACPI EC framework, but not otherwise fully
+supported by the generic Linux ACPI drivers.
+
+This driver used to be named ibm-acpi until kernel 2.6.21 and release
+0.13-20070314. It used to be in the drivers/acpi tree, but it was
+moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel
+2.6.22, and release 0.14. It was moved to drivers/platform/x86 for
+kernel 2.6.29 and release 0.22.
+
+The driver is named "thinkpad-acpi". In some places, like module
+names and log messages, "thinkpad_acpi" is used because of userspace
+issues.
+
+"tpacpi" is used as a shorthand where "thinkpad-acpi" would be too
+long due to length limitations on some Linux kernel versions.
+
+Status
+------
+
+The features currently supported are the following (see below for
+detailed description):
+
+ - Fn key combinations
+ - Bluetooth enable and disable
+ - video output switching, expansion control
+ - ThinkLight on and off
+ - CMOS/UCMS control
+ - LED control
+ - ACPI sounds
+ - temperature sensors
+ - Experimental: embedded controller register dump
+ - LCD brightness control
+ - Volume control
+ - Fan control and monitoring: fan speed, fan enable/disable
+ - WAN enable and disable
+ - UWB enable and disable
+
+A compatibility table by model and feature is maintained on the web
+site, http://ibm-acpi.sf.net/. I appreciate any success or failure
+reports, especially if they add to or correct the compatibility table.
+Please include the following information in your report:
+
+ - ThinkPad model name
+ - a copy of your ACPI tables, using the "acpidump" utility
+ - a copy of the output of dmidecode, with serial numbers
+ and UUIDs masked off
+ - which driver features work and which don't
+ - the observed behavior of non-working features
+
+Any other comments or patches are also more than welcome.
+
+
+Installation
+------------
+
+If you are compiling this driver as included in the Linux kernel
+sources, look for the CONFIG_THINKPAD_ACPI Kconfig option.
+It is located on the menu path: "Device Drivers" -> "X86 Platform
+Specific Device Drivers" -> "ThinkPad ACPI Laptop Extras".
+
+
+Features
+--------
+
+The driver exports two different interfaces to userspace, which can be
+used to access the features it provides. One is a legacy procfs-based
+interface, which will be removed at some time in the future. The other
+is a new sysfs-based interface which is not complete yet.
+
+The procfs interface creates the /proc/acpi/ibm directory. There is a
+file under that directory for each feature it supports. The procfs
+interface is mostly frozen, and will change very little if at all: it
+will not be extended to add any new functionality in the driver, instead
+all new functionality will be implemented on the sysfs interface.
+
+The sysfs interface tries to blend in the generic Linux sysfs subsystems
+and classes as much as possible. Since some of these subsystems are not
+yet ready or stabilized, it is expected that this interface will change,
+and any and all userspace programs must deal with it.
+
+
+Notes about the sysfs interface:
+
+Unlike what was done with the procfs interface, correctness when talking
+to the sysfs interfaces will be enforced, as will correctness in the
+thinkpad-acpi's implementation of sysfs interfaces.
+
+Also, any bugs in the thinkpad-acpi sysfs driver code or in the
+thinkpad-acpi's implementation of the sysfs interfaces will be fixed for
+maximum correctness, even if that means changing an interface in
+non-compatible ways. As these interfaces mature both in the kernel and
+in thinkpad-acpi, such changes should become quite rare.
+
+Applications interfacing to the thinkpad-acpi sysfs interfaces must
+follow all sysfs guidelines and correctly process all errors (the sysfs
+interface makes extensive use of errors). File descriptors and open /
+close operations to the sysfs inodes must also be properly implemented.
+
+The version of thinkpad-acpi's sysfs interface is exported by the driver
+as a driver attribute (see below).
+
+Sysfs driver attributes are on the driver's sysfs attribute space,
+for 2.6.23+ this is /sys/bus/platform/drivers/thinkpad_acpi/ and
+/sys/bus/platform/drivers/thinkpad_hwmon/
+
+Sysfs device attributes are on the thinkpad_acpi device sysfs attribute
+space, for 2.6.23+ this is /sys/devices/platform/thinkpad_acpi/.
+
+Sysfs device attributes for the sensors and fan are on the
+thinkpad_hwmon device's sysfs attribute space, but you should locate it
+looking for a hwmon device with the name attribute of "thinkpad", or
+better yet, through libsensors.
+
+
+Driver version
+--------------
+
+procfs: /proc/acpi/ibm/driver
+sysfs driver attribute: version
+
+The driver name and version. No commands can be written to this file.
+
+
+Sysfs interface version
+-----------------------
+
+sysfs driver attribute: interface_version
+
+Version of the thinkpad-acpi sysfs interface, as an unsigned long
+(output in hex format: 0xAAAABBCC), where:
+ AAAA - major revision
+ BB - minor revision
+ CC - bugfix revision
+
+The sysfs interface version changelog for the driver can be found at the
+end of this document. Changes to the sysfs interface done by the kernel
+subsystems are not documented here, nor are they tracked by this
+attribute.
+
+Changes to the thinkpad-acpi sysfs interface are only considered
+non-experimental when they are submitted to Linux mainline, at which
+point the changes in this interface are documented and interface_version
+may be updated. If you are using any thinkpad-acpi features not yet
+sent to mainline for merging, you do so on your own risk: these features
+may disappear, or be implemented in a different and incompatible way by
+the time they are merged in Linux mainline.
+
+Changes that are backwards-compatible by nature (e.g. the addition of
+attributes that do not change the way the other attributes work) do not
+always warrant an update of interface_version. Therefore, one must
+expect that an attribute might not be there, and deal with it properly
+(an attribute not being there *is* a valid way to make it clear that a
+feature is not available in sysfs).
+
+
+Hot keys
+--------
+
+procfs: /proc/acpi/ibm/hotkey
+sysfs device attribute: hotkey_*
+
+In a ThinkPad, the ACPI HKEY handler is responsible for communicating
+some important events and also keyboard hot key presses to the operating
+system. Enabling the hotkey functionality of thinkpad-acpi signals the
+firmware that such a driver is present, and modifies how the ThinkPad
+firmware will behave in many situations.
+
+The driver enables the HKEY ("hot key") event reporting automatically
+when loaded, and disables it when it is removed.
+
+The driver will report HKEY events in the following format:
+
+ ibm/hotkey HKEY 00000080 0000xxxx
+
+Some of these events refer to hot key presses, but not all of them.
+
+The driver will generate events over the input layer for hot keys and
+radio switches, and over the ACPI netlink layer for other events. The
+input layer support accepts the standard IOCTLs to remap the keycodes
+assigned to each hot key.
+
+The hot key bit mask allows some control over which hot keys generate
+events. If a key is "masked" (bit set to 0 in the mask), the firmware
+will handle it. If it is "unmasked", it signals the firmware that
+thinkpad-acpi would prefer to handle it, if the firmware would be so
+kind to allow it (and it often doesn't!).
+
+Not all bits in the mask can be modified. Not all bits that can be
+modified do anything. Not all hot keys can be individually controlled
+by the mask. Some models do not support the mask at all. The behaviour
+of the mask is, therefore, highly dependent on the ThinkPad model.
+
+The driver will filter out any unmasked hotkeys, so even if the firmware
+doesn't allow disabling an specific hotkey, the driver will not report
+events for unmasked hotkeys.
+
+Note that unmasking some keys prevents their default behavior. For
+example, if Fn+F5 is unmasked, that key will no longer enable/disable
+Bluetooth by itself in firmware.
+
+Note also that not all Fn key combinations are supported through ACPI
+depending on the ThinkPad model and firmware version. On those
+ThinkPads, it is still possible to support some extra hotkeys by
+polling the "CMOS NVRAM" at least 10 times per second. The driver
+attempts to enables this functionality automatically when required.
+
+procfs notes:
+
+The following commands can be written to the /proc/acpi/ibm/hotkey file:
+
+ echo 0xffffffff > /proc/acpi/ibm/hotkey -- enable all hot keys
+ echo 0 > /proc/acpi/ibm/hotkey -- disable all possible hot keys
+ ... any other 8-hex-digit mask ...
+ echo reset > /proc/acpi/ibm/hotkey -- restore the recommended mask
+
+The following commands have been deprecated and will cause the kernel
+to log a warning:
+
+ echo enable > /proc/acpi/ibm/hotkey -- does nothing
+ echo disable > /proc/acpi/ibm/hotkey -- returns an error
+
+The procfs interface does not support NVRAM polling control. So as to
+maintain maximum bug-to-bug compatibility, it does not report any masks,
+nor does it allow one to manipulate the hot key mask when the firmware
+does not support masks at all, even if NVRAM polling is in use.
+
+sysfs notes:
+
+ hotkey_bios_enabled:
+ DEPRECATED, WILL BE REMOVED SOON.
+
+ Returns 0.
+
+ hotkey_bios_mask:
+ DEPRECATED, DON'T USE, WILL BE REMOVED IN THE FUTURE.
+
+ Returns the hot keys mask when thinkpad-acpi was loaded.
+ Upon module unload, the hot keys mask will be restored
+ to this value. This is always 0x80c, because those are
+ the hotkeys that were supported by ancient firmware
+ without mask support.
+
+ hotkey_enable:
+ DEPRECATED, WILL BE REMOVED SOON.
+
+ 0: returns -EPERM
+ 1: does nothing
+
+ hotkey_mask:
+ bit mask to enable reporting (and depending on
+ the firmware, ACPI event generation) for each hot key
+ (see above). Returns the current status of the hot keys
+ mask, and allows one to modify it.
+
+ hotkey_all_mask:
+ bit mask that should enable event reporting for all
+ supported hot keys, when echoed to hotkey_mask above.
+ Unless you know which events need to be handled
+ passively (because the firmware *will* handle them
+ anyway), do *not* use hotkey_all_mask. Use
+ hotkey_recommended_mask, instead. You have been warned.
+
+ hotkey_recommended_mask:
+ bit mask that should enable event reporting for all
+ supported hot keys, except those which are always
+ handled by the firmware anyway. Echo it to
+ hotkey_mask above, to use. This is the default mask
+ used by the driver.
+
+ hotkey_source_mask:
+ bit mask that selects which hot keys will the driver
+ poll the NVRAM for. This is auto-detected by the driver
+ based on the capabilities reported by the ACPI firmware,
+ but it can be overridden at runtime.
+
+ Hot keys whose bits are set in hotkey_source_mask are
+ polled for in NVRAM, and reported as hotkey events if
+ enabled in hotkey_mask. Only a few hot keys are
+ available through CMOS NVRAM polling.
+
+ Warning: when in NVRAM mode, the volume up/down/mute
+ keys are synthesized according to changes in the mixer,
+ which uses a single volume up or volume down hotkey
+ press to unmute, as per the ThinkPad volume mixer user
+ interface. When in ACPI event mode, volume up/down/mute
+ events are reported by the firmware and can behave
+ differently (and that behaviour changes with firmware
+ version -- not just with firmware models -- as well as
+ OSI(Linux) state).
+
+ hotkey_poll_freq:
+ frequency in Hz for hot key polling. It must be between
+ 0 and 25 Hz. Polling is only carried out when strictly
+ needed.
+
+ Setting hotkey_poll_freq to zero disables polling, and
+ will cause hot key presses that require NVRAM polling
+ to never be reported.
+
+ Setting hotkey_poll_freq too low may cause repeated
+ pressings of the same hot key to be misreported as a
+ single key press, or to not even be detected at all.
+ The recommended polling frequency is 10Hz.
+
+ hotkey_radio_sw:
+ If the ThinkPad has a hardware radio switch, this
+ attribute will read 0 if the switch is in the "radios
+ disabled" position, and 1 if the switch is in the
+ "radios enabled" position.
+
+ This attribute has poll()/select() support.
+
+ hotkey_tablet_mode:
+ If the ThinkPad has tablet capabilities, this attribute
+ will read 0 if the ThinkPad is in normal mode, and
+ 1 if the ThinkPad is in tablet mode.
+
+ This attribute has poll()/select() support.
+
+ wakeup_reason:
+ Set to 1 if the system is waking up because the user
+ requested a bay ejection. Set to 2 if the system is
+ waking up because the user requested the system to
+ undock. Set to zero for normal wake-ups or wake-ups
+ due to unknown reasons.
+
+ This attribute has poll()/select() support.
+
+ wakeup_hotunplug_complete:
+ Set to 1 if the system was waken up because of an
+ undock or bay ejection request, and that request
+ was successfully completed. At this point, it might
+ be useful to send the system back to sleep, at the
+ user's choice. Refer to HKEY events 0x4003 and
+ 0x3003, below.
+
+ This attribute has poll()/select() support.
+
+input layer notes:
+
+A Hot key is mapped to a single input layer EV_KEY event, possibly
+followed by an EV_MSC MSC_SCAN event that shall contain that key's scan
+code. An EV_SYN event will always be generated to mark the end of the
+event block.
+
+Do not use the EV_MSC MSC_SCAN events to process keys. They are to be
+used as a helper to remap keys, only. They are particularly useful when
+remapping KEY_UNKNOWN keys.
+
+The events are available in an input device, with the following id:
+
+ Bus: BUS_HOST
+ vendor: 0x1014 (PCI_VENDOR_ID_IBM) or
+ 0x17aa (PCI_VENDOR_ID_LENOVO)
+ product: 0x5054 ("TP")
+ version: 0x4101
+
+The version will have its LSB incremented if the keymap changes in a
+backwards-compatible way. The MSB shall always be 0x41 for this input
+device. If the MSB is not 0x41, do not use the device as described in
+this section, as it is either something else (e.g. another input device
+exported by a thinkpad driver, such as HDAPS) or its functionality has
+been changed in a non-backwards compatible way.
+
+Adding other event types for other functionalities shall be considered a
+backwards-compatible change for this input device.
+
+Thinkpad-acpi Hot Key event map (version 0x4101):
+
+ACPI Scan
+event code Key Notes
+
+0x1001 0x00 FN+F1 -
+
+0x1002 0x01 FN+F2 IBM: battery (rare)
+ Lenovo: Screen lock
+
+0x1003 0x02 FN+F3 Many IBM models always report
+ this hot key, even with hot keys
+ disabled or with Fn+F3 masked
+ off
+ IBM: screen lock, often turns
+ off the ThinkLight as side-effect
+ Lenovo: battery
+
+0x1004 0x03 FN+F4 Sleep button (ACPI sleep button
+ semantics, i.e. sleep-to-RAM).
+ It always generates some kind
+ of event, either the hot key
+ event or an ACPI sleep button
+ event. The firmware may
+ refuse to generate further FN+F4
+ key presses until a S3 or S4 ACPI
+ sleep cycle is performed or some
+ time passes.
+
+0x1005 0x04 FN+F5 Radio. Enables/disables
+ the internal Bluetooth hardware
+ and W-WAN card if left in control
+ of the firmware. Does not affect
+ the WLAN card.
+ Should be used to turn on/off all
+ radios (Bluetooth+W-WAN+WLAN),
+ really.
+
+0x1006 0x05 FN+F6 -
+
+0x1007 0x06 FN+F7 Video output cycle.
+ Do you feel lucky today?
+
+0x1008 0x07 FN+F8 IBM: toggle screen expand
+ Lenovo: configure UltraNav,
+ or toggle screen expand
+
+0x1009 0x08 FN+F9 -
+ .. .. ..
+0x100B 0x0A FN+F11 -
+
+0x100C 0x0B FN+F12 Sleep to disk. You are always
+ supposed to handle it yourself,
+ either through the ACPI event,
+ or through a hotkey event.
+ The firmware may refuse to
+ generate further FN+F12 key
+ press events until a S3 or S4
+ ACPI sleep cycle is performed,
+ or some time passes.
+
+0x100D 0x0C FN+BACKSPACE -
+0x100E 0x0D FN+INSERT -
+0x100F 0x0E FN+DELETE -
+
+0x1010 0x0F FN+HOME Brightness up. This key is
+ always handled by the firmware
+ in IBM ThinkPads, even when
+ unmasked. Just leave it alone.
+ For Lenovo ThinkPads with a new
+ BIOS, it has to be handled either
+ by the ACPI OSI, or by userspace.
+ The driver does the right thing,
+ never mess with this.
+0x1011 0x10 FN+END Brightness down. See brightness
+ up for details.
+
+0x1012 0x11 FN+PGUP ThinkLight toggle. This key is
+ always handled by the firmware,
+ even when unmasked.
+
+0x1013 0x12 FN+PGDOWN -
+
+0x1014 0x13 FN+SPACE Zoom key
+
+0x1015 0x14 VOLUME UP Internal mixer volume up. This
+ key is always handled by the
+ firmware, even when unmasked.
+ NOTE: Lenovo seems to be changing
+ this.
+0x1016 0x15 VOLUME DOWN Internal mixer volume up. This
+ key is always handled by the
+ firmware, even when unmasked.
+ NOTE: Lenovo seems to be changing
+ this.
+0x1017 0x16 MUTE Mute internal mixer. This
+ key is always handled by the
+ firmware, even when unmasked.
+
+0x1018 0x17 THINKPAD ThinkPad/Access IBM/Lenovo key
+
+0x1019 0x18 unknown
+.. .. ..
+0x1020 0x1F unknown
+
+The ThinkPad firmware does not allow one to differentiate when most hot
+keys are pressed or released (either that, or we don't know how to, yet).
+For these keys, the driver generates a set of events for a key press and
+immediately issues the same set of events for a key release. It is
+unknown by the driver if the ThinkPad firmware triggered these events on
+hot key press or release, but the firmware will do it for either one, not
+both.
+
+If a key is mapped to KEY_RESERVED, it generates no input events at all.
+If a key is mapped to KEY_UNKNOWN, it generates an input event that
+includes an scan code. If a key is mapped to anything else, it will
+generate input device EV_KEY events.
+
+In addition to the EV_KEY events, thinkpad-acpi may also issue EV_SW
+events for switches:
+
+SW_RFKILL_ALL T60 and later hardware rfkill rocker switch
+SW_TABLET_MODE Tablet ThinkPads HKEY events 0x5009 and 0x500A
+
+Non hotkey ACPI HKEY event map:
+-------------------------------
+
+Events that are never propagated by the driver:
+
+0x2304 System is waking up from suspend to undock
+0x2305 System is waking up from suspend to eject bay
+0x2404 System is waking up from hibernation to undock
+0x2405 System is waking up from hibernation to eject bay
+0x5001 Lid closed
+0x5002 Lid opened
+0x5009 Tablet swivel: switched to tablet mode
+0x500A Tablet swivel: switched to normal mode
+0x5010 Brightness level changed/control event
+0x6000 KEYBOARD: Numlock key pressed
+0x6005 KEYBOARD: Fn key pressed (TO BE VERIFIED)
+0x7000 Radio Switch may have changed state
+
+
+Events that are propagated by the driver to userspace:
+
+0x2313 ALARM: System is waking up from suspend because
+ the battery is nearly empty
+0x2413 ALARM: System is waking up from hibernation because
+ the battery is nearly empty
+0x3003 Bay ejection (see 0x2x05) complete, can sleep again
+0x3006 Bay hotplug request (hint to power up SATA link when
+ the optical drive tray is ejected)
+0x4003 Undocked (see 0x2x04), can sleep again
+0x4010 Docked into hotplug port replicator (non-ACPI dock)
+0x4011 Undocked from hotplug port replicator (non-ACPI dock)
+0x500B Tablet pen inserted into its storage bay
+0x500C Tablet pen removed from its storage bay
+0x6011 ALARM: battery is too hot
+0x6012 ALARM: battery is extremely hot
+0x6021 ALARM: a sensor is too hot
+0x6022 ALARM: a sensor is extremely hot
+0x6030 System thermal table changed
+0x6040 Nvidia Optimus/AC adapter related (TO BE VERIFIED)
+
+Battery nearly empty alarms are a last resort attempt to get the
+operating system to hibernate or shutdown cleanly (0x2313), or shutdown
+cleanly (0x2413) before power is lost. They must be acted upon, as the
+wake up caused by the firmware will have negated most safety nets...
+
+When any of the "too hot" alarms happen, according to Lenovo the user
+should suspend or hibernate the laptop (and in the case of battery
+alarms, unplug the AC adapter) to let it cool down. These alarms do
+signal that something is wrong, they should never happen on normal
+operating conditions.
+
+The "extremely hot" alarms are emergencies. According to Lenovo, the
+operating system is to force either an immediate suspend or hibernate
+cycle, or a system shutdown. Obviously, something is very wrong if this
+happens.
+
+
+Brightness hotkey notes:
+
+Don't mess with the brightness hotkeys in a Thinkpad. If you want
+notifications for OSD, use the sysfs backlight class event support.
+
+The driver will issue KEY_BRIGHTNESS_UP and KEY_BRIGHTNESS_DOWN events
+automatically for the cases were userspace has to do something to
+implement brightness changes. When you override these events, you will
+either fail to handle properly the ThinkPads that require explicit
+action to change backlight brightness, or the ThinkPads that require
+that no action be taken to work properly.
+
+
+Bluetooth
+---------
+
+procfs: /proc/acpi/ibm/bluetooth
+sysfs device attribute: bluetooth_enable (deprecated)
+sysfs rfkill class: switch "tpacpi_bluetooth_sw"
+
+This feature shows the presence and current state of a ThinkPad
+Bluetooth device in the internal ThinkPad CDC slot.
+
+If the ThinkPad supports it, the Bluetooth state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+Procfs notes:
+
+If Bluetooth is installed, the following commands can be used:
+
+ echo enable > /proc/acpi/ibm/bluetooth
+ echo disable > /proc/acpi/ibm/bluetooth
+
+Sysfs notes:
+
+ If the Bluetooth CDC card is installed, it can be enabled /
+ disabled through the "bluetooth_enable" thinkpad-acpi device
+ attribute, and its current status can also be queried.
+
+ enable:
+ 0: disables Bluetooth / Bluetooth is disabled
+ 1: enables Bluetooth / Bluetooth is enabled.
+
+ Note: this interface has been superseded by the generic rfkill
+ class. It has been deprecated, and it will be removed in year
+ 2010.
+
+ rfkill controller switch "tpacpi_bluetooth_sw": refer to
+ Documentation/rfkill.txt for details.
+
+
+Video output control -- /proc/acpi/ibm/video
+--------------------------------------------
+
+This feature allows control over the devices used for video output -
+LCD, CRT or DVI (if available). The following commands are available:
+
+ echo lcd_enable > /proc/acpi/ibm/video
+ echo lcd_disable > /proc/acpi/ibm/video
+ echo crt_enable > /proc/acpi/ibm/video
+ echo crt_disable > /proc/acpi/ibm/video
+ echo dvi_enable > /proc/acpi/ibm/video
+ echo dvi_disable > /proc/acpi/ibm/video
+ echo auto_enable > /proc/acpi/ibm/video
+ echo auto_disable > /proc/acpi/ibm/video
+ echo expand_toggle > /proc/acpi/ibm/video
+ echo video_switch > /proc/acpi/ibm/video
+
+NOTE: Access to this feature is restricted to processes owning the
+CAP_SYS_ADMIN capability for safety reasons, as it can interact badly
+enough with some versions of X.org to crash it.
+
+Each video output device can be enabled or disabled individually.
+Reading /proc/acpi/ibm/video shows the status of each device.
+
+Automatic video switching can be enabled or disabled. When automatic
+video switching is enabled, certain events (e.g. opening the lid,
+docking or undocking) cause the video output device to change
+automatically. While this can be useful, it also causes flickering
+and, on the X40, video corruption. By disabling automatic switching,
+the flickering or video corruption can be avoided.
+
+The video_switch command cycles through the available video outputs
+(it simulates the behavior of Fn-F7).
+
+Video expansion can be toggled through this feature. This controls
+whether the display is expanded to fill the entire LCD screen when a
+mode with less than full resolution is used. Note that the current
+video expansion status cannot be determined through this feature.
+
+Note that on many models (particularly those using Radeon graphics
+chips) the X driver configures the video card in a way which prevents
+Fn-F7 from working. This also disables the video output switching
+features of this driver, as it uses the same ACPI methods as
+Fn-F7. Video switching on the console should still work.
+
+UPDATE: refer to https://bugs.freedesktop.org/show_bug.cgi?id=2000
+
+
+ThinkLight control
+------------------
+
+procfs: /proc/acpi/ibm/light
+sysfs attributes: as per LED class, for the "tpacpi::thinklight" LED
+
+procfs notes:
+
+The ThinkLight status can be read and set through the procfs interface. A
+few models which do not make the status available will show the ThinkLight
+status as "unknown". The available commands are:
+
+ echo on > /proc/acpi/ibm/light
+ echo off > /proc/acpi/ibm/light
+
+sysfs notes:
+
+The ThinkLight sysfs interface is documented by the LED class
+documentation, in Documentation/leds/leds-class.txt. The ThinkLight LED name
+is "tpacpi::thinklight".
+
+Due to limitations in the sysfs LED class, if the status of the ThinkLight
+cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
+It is impossible to know if the status returned through sysfs is valid.
+
+
+CMOS/UCMS control
+-----------------
+
+procfs: /proc/acpi/ibm/cmos
+sysfs device attribute: cmos_command
+
+This feature is mostly used internally by the ACPI firmware to keep the legacy
+CMOS NVRAM bits in sync with the current machine state, and to record this
+state so that the ThinkPad will retain such settings across reboots.
+
+Some of these commands actually perform actions in some ThinkPad models, but
+this is expected to disappear more and more in newer models. As an example, in
+a T43 and in a X40, commands 12 and 13 still control the ThinkLight state for
+real, but commands 0 to 2 don't control the mixer anymore (they have been
+phased out) and just update the NVRAM.
+
+The range of valid cmos command numbers is 0 to 21, but not all have an
+effect and the behavior varies from model to model. Here is the behavior
+on the X40 (tpb is the ThinkPad Buttons utility):
+
+ 0 - Related to "Volume down" key press
+ 1 - Related to "Volume up" key press
+ 2 - Related to "Mute on" key press
+ 3 - Related to "Access IBM" key press
+ 4 - Related to "LCD brightness up" key press
+ 5 - Related to "LCD brightness down" key press
+ 11 - Related to "toggle screen expansion" key press/function
+ 12 - Related to "ThinkLight on"
+ 13 - Related to "ThinkLight off"
+ 14 - Related to "ThinkLight" key press (toggle ThinkLight)
+
+The cmos command interface is prone to firmware split-brain problems, as
+in newer ThinkPads it is just a compatibility layer. Do not use it, it is
+exported just as a debug tool.
+
+
+LED control
+-----------
+
+procfs: /proc/acpi/ibm/led
+sysfs attributes: as per LED class, see below for names
+
+Some of the LED indicators can be controlled through this feature. On
+some older ThinkPad models, it is possible to query the status of the
+LED indicators as well. Newer ThinkPads cannot query the real status
+of the LED indicators.
+
+Because misuse of the LEDs could induce an unaware user to perform
+dangerous actions (like undocking or ejecting a bay device while the
+buses are still active), or mask an important alarm (such as a nearly
+empty battery, or a broken battery), access to most LEDs is
+restricted.
+
+Unrestricted access to all LEDs requires that thinkpad-acpi be
+compiled with the CONFIG_THINKPAD_ACPI_UNSAFE_LEDS option enabled.
+Distributions must never enable this option. Individual users that
+are aware of the consequences are welcome to enabling it.
+
+Audio mute and microphone mute LEDs are supported, but currently not
+visible to userspace. They are used by the snd-hda-intel audio driver.
+
+procfs notes:
+
+The available commands are:
+
+ echo '<LED number> on' >/proc/acpi/ibm/led
+ echo '<LED number> off' >/proc/acpi/ibm/led
+ echo '<LED number> blink' >/proc/acpi/ibm/led
+
+The <LED number> range is 0 to 15. The set of LEDs that can be
+controlled varies from model to model. Here is the common ThinkPad
+mapping:
+
+ 0 - power
+ 1 - battery (orange)
+ 2 - battery (green)
+ 3 - UltraBase/dock
+ 4 - UltraBay
+ 5 - UltraBase battery slot
+ 6 - (unknown)
+ 7 - standby
+ 8 - dock status 1
+ 9 - dock status 2
+ 10, 11 - (unknown)
+ 12 - thinkvantage
+ 13, 14, 15 - (unknown)
+
+All of the above can be turned on and off and can be made to blink.
+
+sysfs notes:
+
+The ThinkPad LED sysfs interface is described in detail by the LED class
+documentation, in Documentation/leds/leds-class.txt.
+
+The LEDs are named (in LED ID order, from 0 to 12):
+"tpacpi::power", "tpacpi:orange:batt", "tpacpi:green:batt",
+"tpacpi::dock_active", "tpacpi::bay_active", "tpacpi::dock_batt",
+"tpacpi::unknown_led", "tpacpi::standby", "tpacpi::dock_status1",
+"tpacpi::dock_status2", "tpacpi::unknown_led2", "tpacpi::unknown_led3",
+"tpacpi::thinkvantage".
+
+Due to limitations in the sysfs LED class, if the status of the LED
+indicators cannot be read due to an error, thinkpad-acpi will report it as
+a brightness of zero (same as LED off).
+
+If the thinkpad firmware doesn't support reading the current status,
+trying to read the current LED brightness will just return whatever
+brightness was last written to that attribute.
+
+These LEDs can blink using hardware acceleration. To request that a
+ThinkPad indicator LED should blink in hardware accelerated mode, use the
+"timer" trigger, and leave the delay_on and delay_off parameters set to
+zero (to request hardware acceleration autodetection).
+
+LEDs that are known not to exist in a given ThinkPad model are not
+made available through the sysfs interface. If you have a dock and you
+notice there are LEDs listed for your ThinkPad that do not exist (and
+are not in the dock), or if you notice that there are missing LEDs,
+a report to ibm-acpi-devel@lists.sourceforge.net is appreciated.
+
+
+ACPI sounds -- /proc/acpi/ibm/beep
+----------------------------------
+
+The BEEP method is used internally by the ACPI firmware to provide
+audible alerts in various situations. This feature allows the same
+sounds to be triggered manually.
+
+The commands are non-negative integer numbers:
+
+ echo <number> >/proc/acpi/ibm/beep
+
+The valid <number> range is 0 to 17. Not all numbers trigger sounds
+and the sounds vary from model to model. Here is the behavior on the
+X40:
+
+ 0 - stop a sound in progress (but use 17 to stop 16)
+ 2 - two beeps, pause, third beep ("low battery")
+ 3 - single beep
+ 4 - high, followed by low-pitched beep ("unable")
+ 5 - single beep
+ 6 - very high, followed by high-pitched beep ("AC/DC")
+ 7 - high-pitched beep
+ 9 - three short beeps
+ 10 - very long beep
+ 12 - low-pitched beep
+ 15 - three high-pitched beeps repeating constantly, stop with 0
+ 16 - one medium-pitched beep repeating constantly, stop with 17
+ 17 - stop 16
+
+
+Temperature sensors
+-------------------
+
+procfs: /proc/acpi/ibm/thermal
+sysfs device attributes: (hwmon "thinkpad") temp*_input
+
+Most ThinkPads include six or more separate temperature sensors but only
+expose the CPU temperature through the standard ACPI methods. This
+feature shows readings from up to eight different sensors on older
+ThinkPads, and up to sixteen different sensors on newer ThinkPads.
+
+For example, on the X40, a typical output may be:
+temperatures: 42 42 45 41 36 -128 33 -128
+
+On the T43/p, a typical output may be:
+temperatures: 48 48 36 52 38 -128 31 -128 48 52 48 -128 -128 -128 -128 -128
+
+The mapping of thermal sensors to physical locations varies depending on
+system-board model (and thus, on ThinkPad model).
+
+http://thinkwiki.org/wiki/Thermal_Sensors is a public wiki page that
+tries to track down these locations for various models.
+
+Most (newer?) models seem to follow this pattern:
+
+1: CPU
+2: (depends on model)
+3: (depends on model)
+4: GPU
+5: Main battery: main sensor
+6: Bay battery: main sensor
+7: Main battery: secondary sensor
+8: Bay battery: secondary sensor
+9-15: (depends on model)
+
+For the R51 (source: Thomas Gruber):
+2: Mini-PCI
+3: Internal HDD
+
+For the T43, T43/p (source: Shmidoax/Thinkwiki.org)
+http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_T43.2C_T43p
+2: System board, left side (near PCMCIA slot), reported as HDAPS temp
+3: PCMCIA slot
+9: MCH (northbridge) to DRAM Bus
+10: Clock-generator, mini-pci card and ICH (southbridge), under Mini-PCI
+ card, under touchpad
+11: Power regulator, underside of system board, below F2 key
+
+The A31 has a very atypical layout for the thermal sensors
+(source: Milos Popovic, http://thinkwiki.org/wiki/Thermal_Sensors#ThinkPad_A31)
+1: CPU
+2: Main Battery: main sensor
+3: Power Converter
+4: Bay Battery: main sensor
+5: MCH (northbridge)
+6: PCMCIA/ambient
+7: Main Battery: secondary sensor
+8: Bay Battery: secondary sensor
+
+
+Procfs notes:
+ Readings from sensors that are not available return -128.
+ No commands can be written to this file.
+
+Sysfs notes:
+ Sensors that are not available return the ENXIO error. This
+ status may change at runtime, as there are hotplug thermal
+ sensors, like those inside the batteries and docks.
+
+ thinkpad-acpi thermal sensors are reported through the hwmon
+ subsystem, and follow all of the hwmon guidelines at
+ Documentation/hwmon.
+
+EXPERIMENTAL: Embedded controller register dump
+-----------------------------------------------
+
+This feature is not included in the thinkpad driver anymore.
+Instead the EC can be accessed through /sys/kernel/debug/ec with
+a userspace tool which can be found here:
+ftp://ftp.suse.com/pub/people/trenn/sources/ec
+
+Use it to determine the register holding the fan
+speed on some models. To do that, do the following:
+ - make sure the battery is fully charged
+ - make sure the fan is running
+ - use above mentioned tool to read out the EC
+
+Often fan and temperature values vary between
+readings. Since temperatures don't change vary fast, you can take
+several quick dumps to eliminate them.
+
+You can use a similar method to figure out the meaning of other
+embedded controller registers - e.g. make sure nothing else changes
+except the charging or discharging battery to determine which
+registers contain the current battery capacity, etc. If you experiment
+with this, do send me your results (including some complete dumps with
+a description of the conditions when they were taken.)
+
+
+LCD brightness control
+----------------------
+
+procfs: /proc/acpi/ibm/brightness
+sysfs backlight device "thinkpad_screen"
+
+This feature allows software control of the LCD brightness on ThinkPad
+models which don't have a hardware brightness slider.
+
+It has some limitations: the LCD backlight cannot be actually turned
+on or off by this interface, it just controls the backlight brightness
+level.
+
+On IBM (and some of the earlier Lenovo) ThinkPads, the backlight control
+has eight brightness levels, ranging from 0 to 7. Some of the levels
+may not be distinct. Later Lenovo models that implement the ACPI
+display backlight brightness control methods have 16 levels, ranging
+from 0 to 15.
+
+For IBM ThinkPads, there are two interfaces to the firmware for direct
+brightness control, EC and UCMS (or CMOS). To select which one should be
+used, use the brightness_mode module parameter: brightness_mode=1 selects
+EC mode, brightness_mode=2 selects UCMS mode, brightness_mode=3 selects EC
+mode with NVRAM backing (so that brightness changes are remembered across
+shutdown/reboot).
+
+The driver tries to select which interface to use from a table of
+defaults for each ThinkPad model. If it makes a wrong choice, please
+report this as a bug, so that we can fix it.
+
+Lenovo ThinkPads only support brightness_mode=2 (UCMS).
+
+When display backlight brightness controls are available through the
+standard ACPI interface, it is best to use it instead of this direct
+ThinkPad-specific interface. The driver will disable its native
+backlight brightness control interface if it detects that the standard
+ACPI interface is available in the ThinkPad.
+
+If you want to use the thinkpad-acpi backlight brightness control
+instead of the generic ACPI video backlight brightness control for some
+reason, you should use the acpi_backlight=vendor kernel parameter.
+
+The brightness_enable module parameter can be used to control whether
+the LCD brightness control feature will be enabled when available.
+brightness_enable=0 forces it to be disabled. brightness_enable=1
+forces it to be enabled when available, even if the standard ACPI
+interface is also available.
+
+Procfs notes:
+
+ The available commands are:
+
+ echo up >/proc/acpi/ibm/brightness
+ echo down >/proc/acpi/ibm/brightness
+ echo 'level <level>' >/proc/acpi/ibm/brightness
+
+Sysfs notes:
+
+The interface is implemented through the backlight sysfs class, which is
+poorly documented at this time.
+
+Locate the thinkpad_screen device under /sys/class/backlight, and inside
+it there will be the following attributes:
+
+ max_brightness:
+ Reads the maximum brightness the hardware can be set to.
+ The minimum is always zero.
+
+ actual_brightness:
+ Reads what brightness the screen is set to at this instant.
+
+ brightness:
+ Writes request the driver to change brightness to the
+ given value. Reads will tell you what brightness the
+ driver is trying to set the display to when "power" is set
+ to zero and the display has not been dimmed by a kernel
+ power management event.
+
+ power:
+ power management mode, where 0 is "display on", and 1 to 3
+ will dim the display backlight to brightness level 0
+ because thinkpad-acpi cannot really turn the backlight
+ off. Kernel power management events can temporarily
+ increase the current power management level, i.e. they can
+ dim the display.
+
+
+WARNING:
+
+ Whatever you do, do NOT ever call thinkpad-acpi backlight-level change
+ interface and the ACPI-based backlight level change interface
+ (available on newer BIOSes, and driven by the Linux ACPI video driver)
+ at the same time. The two will interact in bad ways, do funny things,
+ and maybe reduce the life of the backlight lamps by needlessly kicking
+ its level up and down at every change.
+
+
+Volume control (Console Audio control)
+--------------------------------------
+
+procfs: /proc/acpi/ibm/volume
+ALSA: "ThinkPad Console Audio Control", default ID: "ThinkPadEC"
+
+NOTE: by default, the volume control interface operates in read-only
+mode, as it is supposed to be used for on-screen-display purposes.
+The read/write mode can be enabled through the use of the
+"volume_control=1" module parameter.
+
+NOTE: distros are urged to not enable volume_control by default, this
+should be done by the local admin only. The ThinkPad UI is for the
+console audio control to be done through the volume keys only, and for
+the desktop environment to just provide on-screen-display feedback.
+Software volume control should be done only in the main AC97/HDA
+mixer.
+
+
+About the ThinkPad Console Audio control:
+
+ThinkPads have a built-in amplifier and muting circuit that drives the
+console headphone and speakers. This circuit is after the main AC97
+or HDA mixer in the audio path, and under exclusive control of the
+firmware.
+
+ThinkPads have three special hotkeys to interact with the console
+audio control: volume up, volume down and mute.
+
+It is worth noting that the normal way the mute function works (on
+ThinkPads that do not have a "mute LED") is:
+
+1. Press mute to mute. It will *always* mute, you can press it as
+ many times as you want, and the sound will remain mute.
+
+2. Press either volume key to unmute the ThinkPad (it will _not_
+ change the volume, it will just unmute).
+
+This is a very superior design when compared to the cheap software-only
+mute-toggle solution found on normal consumer laptops: you can be
+absolutely sure the ThinkPad will not make noise if you press the mute
+button, no matter the previous state.
+
+The IBM ThinkPads, and the earlier Lenovo ThinkPads have variable-gain
+amplifiers driving the speakers and headphone output, and the firmware
+also handles volume control for the headphone and speakers on these
+ThinkPads without any help from the operating system (this volume
+control stage exists after the main AC97 or HDA mixer in the audio
+path).
+
+The newer Lenovo models only have firmware mute control, and depend on
+the main HDA mixer to do volume control (which is done by the operating
+system). In this case, the volume keys are filtered out for unmute
+key press (there are some firmware bugs in this area) and delivered as
+normal key presses to the operating system (thinkpad-acpi is not
+involved).
+
+
+The ThinkPad-ACPI volume control:
+
+The preferred way to interact with the Console Audio control is the
+ALSA interface.
+
+The legacy procfs interface allows one to read the current state,
+and if volume control is enabled, accepts the following commands:
+
+ echo up >/proc/acpi/ibm/volume
+ echo down >/proc/acpi/ibm/volume
+ echo mute >/proc/acpi/ibm/volume
+ echo unmute >/proc/acpi/ibm/volume
+ echo 'level <level>' >/proc/acpi/ibm/volume
+
+The <level> number range is 0 to 14 although not all of them may be
+distinct. To unmute the volume after the mute command, use either the
+up or down command (the level command will not unmute the volume), or
+the unmute command.
+
+You can use the volume_capabilities parameter to tell the driver
+whether your thinkpad has volume control or mute-only control:
+volume_capabilities=1 for mixers with mute and volume control,
+volume_capabilities=2 for mixers with only mute control.
+
+If the driver misdetects the capabilities for your ThinkPad model,
+please report this to ibm-acpi-devel@lists.sourceforge.net, so that we
+can update the driver.
+
+There are two strategies for volume control. To select which one
+should be used, use the volume_mode module parameter: volume_mode=1
+selects EC mode, and volume_mode=3 selects EC mode with NVRAM backing
+(so that volume/mute changes are remembered across shutdown/reboot).
+
+The driver will operate in volume_mode=3 by default. If that does not
+work well on your ThinkPad model, please report this to
+ibm-acpi-devel@lists.sourceforge.net.
+
+The driver supports the standard ALSA module parameters. If the ALSA
+mixer is disabled, the driver will disable all volume functionality.
+
+
+Fan control and monitoring: fan speed, fan enable/disable
+---------------------------------------------------------
+
+procfs: /proc/acpi/ibm/fan
+sysfs device attributes: (hwmon "thinkpad") fan1_input, pwm1,
+ pwm1_enable, fan2_input
+sysfs hwmon driver attributes: fan_watchdog
+
+NOTE NOTE NOTE: fan control operations are disabled by default for
+safety reasons. To enable them, the module parameter "fan_control=1"
+must be given to thinkpad-acpi.
+
+This feature attempts to show the current fan speed, control mode and
+other fan data that might be available. The speed is read directly
+from the hardware registers of the embedded controller. This is known
+to work on later R, T, X and Z series ThinkPads but may show a bogus
+value on other models.
+
+Some Lenovo ThinkPads support a secondary fan. This fan cannot be
+controlled separately, it shares the main fan control.
+
+Fan levels:
+
+Most ThinkPad fans work in "levels" at the firmware interface. Level 0
+stops the fan. The higher the level, the higher the fan speed, although
+adjacent levels often map to the same fan speed. 7 is the highest
+level, where the fan reaches the maximum recommended speed.
+
+Level "auto" means the EC changes the fan level according to some
+internal algorithm, usually based on readings from the thermal sensors.
+
+There is also a "full-speed" level, also known as "disengaged" level.
+In this level, the EC disables the speed-locked closed-loop fan control,
+and drives the fan as fast as it can go, which might exceed hardware
+limits, so use this level with caution.
+
+The fan usually ramps up or down slowly from one speed to another, and
+it is normal for the EC to take several seconds to react to fan
+commands. The full-speed level may take up to two minutes to ramp up to
+maximum speed, and in some ThinkPads, the tachometer readings go stale
+while the EC is transitioning to the full-speed level.
+
+WARNING WARNING WARNING: do not leave the fan disabled unless you are
+monitoring all of the temperature sensor readings and you are ready to
+enable it if necessary to avoid overheating.
+
+An enabled fan in level "auto" may stop spinning if the EC decides the
+ThinkPad is cool enough and doesn't need the extra airflow. This is
+normal, and the EC will spin the fan up if the various thermal readings
+rise too much.
+
+On the X40, this seems to depend on the CPU and HDD temperatures.
+Specifically, the fan is turned on when either the CPU temperature
+climbs to 56 degrees or the HDD temperature climbs to 46 degrees. The
+fan is turned off when the CPU temperature drops to 49 degrees and the
+HDD temperature drops to 41 degrees. These thresholds cannot
+currently be controlled.
+
+The ThinkPad's ACPI DSDT code will reprogram the fan on its own when
+certain conditions are met. It will override any fan programming done
+through thinkpad-acpi.
+
+The thinkpad-acpi kernel driver can be programmed to revert the fan
+level to a safe setting if userspace does not issue one of the procfs
+fan commands: "enable", "disable", "level" or "watchdog", or if there
+are no writes to pwm1_enable (or to pwm1 *if and only if* pwm1_enable is
+set to 1, manual mode) within a configurable amount of time of up to
+120 seconds. This functionality is called fan safety watchdog.
+
+Note that the watchdog timer stops after it enables the fan. It will be
+rearmed again automatically (using the same interval) when one of the
+above mentioned fan commands is received. The fan watchdog is,
+therefore, not suitable to protect against fan mode changes made through
+means other than the "enable", "disable", and "level" procfs fan
+commands, or the hwmon fan control sysfs interface.
+
+Procfs notes:
+
+The fan may be enabled or disabled with the following commands:
+
+ echo enable >/proc/acpi/ibm/fan
+ echo disable >/proc/acpi/ibm/fan
+
+Placing a fan on level 0 is the same as disabling it. Enabling a fan
+will try to place it in a safe level if it is too slow or disabled.
+
+The fan level can be controlled with the command:
+
+ echo 'level <level>' > /proc/acpi/ibm/fan
+
+Where <level> is an integer from 0 to 7, or one of the words "auto" or
+"full-speed" (without the quotes). Not all ThinkPads support the "auto"
+and "full-speed" levels. The driver accepts "disengaged" as an alias for
+"full-speed", and reports it as "disengaged" for backwards
+compatibility.
+
+On the X31 and X40 (and ONLY on those models), the fan speed can be
+controlled to a certain degree. Once the fan is running, it can be
+forced to run faster or slower with the following command:
+
+ echo 'speed <speed>' > /proc/acpi/ibm/fan
+
+The sustainable range of fan speeds on the X40 appears to be from about
+3700 to about 7350. Values outside this range either do not have any
+effect or the fan speed eventually settles somewhere in that range. The
+fan cannot be stopped or started with this command. This functionality
+is incomplete, and not available through the sysfs interface.
+
+To program the safety watchdog, use the "watchdog" command.
+
+ echo 'watchdog <interval in seconds>' > /proc/acpi/ibm/fan
+
+If you want to disable the watchdog, use 0 as the interval.
+
+Sysfs notes:
+
+The sysfs interface follows the hwmon subsystem guidelines for the most
+part, and the exception is the fan safety watchdog.
+
+Writes to any of the sysfs attributes may return the EINVAL error if
+that operation is not supported in a given ThinkPad or if the parameter
+is out-of-bounds, and EPERM if it is forbidden. They may also return
+EINTR (interrupted system call), and EIO (I/O error while trying to talk
+to the firmware).
+
+Features not yet implemented by the driver return ENOSYS.
+
+hwmon device attribute pwm1_enable:
+ 0: PWM offline (fan is set to full-speed mode)
+ 1: Manual PWM control (use pwm1 to set fan level)
+ 2: Hardware PWM control (EC "auto" mode)
+ 3: reserved (Software PWM control, not implemented yet)
+
+ Modes 0 and 2 are not supported by all ThinkPads, and the
+ driver is not always able to detect this. If it does know a
+ mode is unsupported, it will return -EINVAL.
+
+hwmon device attribute pwm1:
+ Fan level, scaled from the firmware values of 0-7 to the hwmon
+ scale of 0-255. 0 means fan stopped, 255 means highest normal
+ speed (level 7).
+
+ This attribute only commands the fan if pmw1_enable is set to 1
+ (manual PWM control).
+
+hwmon device attribute fan1_input:
+ Fan tachometer reading, in RPM. May go stale on certain
+ ThinkPads while the EC transitions the PWM to offline mode,
+ which can take up to two minutes. May return rubbish on older
+ ThinkPads.
+
+hwmon device attribute fan2_input:
+ Fan tachometer reading, in RPM, for the secondary fan.
+ Available only on some ThinkPads. If the secondary fan is
+ not installed, will always read 0.
+
+hwmon driver attribute fan_watchdog:
+ Fan safety watchdog timer interval, in seconds. Minimum is
+ 1 second, maximum is 120 seconds. 0 disables the watchdog.
+
+To stop the fan: set pwm1 to zero, and pwm1_enable to 1.
+
+To start the fan in a safe mode: set pwm1_enable to 2. If that fails
+with EINVAL, try to set pwm1_enable to 1 and pwm1 to at least 128 (255
+would be the safest choice, though).
+
+
+WAN
+---
+
+procfs: /proc/acpi/ibm/wan
+sysfs device attribute: wwan_enable (deprecated)
+sysfs rfkill class: switch "tpacpi_wwan_sw"
+
+This feature shows the presence and current state of the built-in
+Wireless WAN device.
+
+If the ThinkPad supports it, the WWAN state is stored in NVRAM,
+so it is kept across reboots and power-off.
+
+It was tested on a Lenovo ThinkPad X60. It should probably work on other
+ThinkPad models which come with this module installed.
+
+Procfs notes:
+
+If the W-WAN card is installed, the following commands can be used:
+
+ echo enable > /proc/acpi/ibm/wan
+ echo disable > /proc/acpi/ibm/wan
+
+Sysfs notes:
+
+ If the W-WAN card is installed, it can be enabled /
+ disabled through the "wwan_enable" thinkpad-acpi device
+ attribute, and its current status can also be queried.
+
+ enable:
+ 0: disables WWAN card / WWAN card is disabled
+ 1: enables WWAN card / WWAN card is enabled.
+
+ Note: this interface has been superseded by the generic rfkill
+ class. It has been deprecated, and it will be removed in year
+ 2010.
+
+ rfkill controller switch "tpacpi_wwan_sw": refer to
+ Documentation/rfkill.txt for details.
+
+
+EXPERIMENTAL: UWB
+-----------------
+
+This feature is considered EXPERIMENTAL because it has not been extensively
+tested and validated in various ThinkPad models yet. The feature may not
+work as expected. USE WITH CAUTION! To use this feature, you need to supply
+the experimental=1 parameter when loading the module.
+
+sysfs rfkill class: switch "tpacpi_uwb_sw"
+
+This feature exports an rfkill controller for the UWB device, if one is
+present and enabled in the BIOS.
+
+Sysfs notes:
+
+ rfkill controller switch "tpacpi_uwb_sw": refer to
+ Documentation/rfkill.txt for details.
+
+Adaptive keyboard
+-----------------
+
+sysfs device attribute: adaptive_kbd_mode
+
+This sysfs attribute controls the keyboard "face" that will be shown on the
+Lenovo X1 Carbon 2nd gen (2014)'s adaptive keyboard. The value can be read
+and set.
+
+1 = Home mode
+2 = Web-browser mode
+3 = Web-conference mode
+4 = Function mode
+5 = Layflat mode
+
+For more details about which buttons will appear depending on the mode, please
+review the laptop's user guide:
+http://www.lenovo.com/shop/americas/content/user_guides/x1carbon_2_ug_en.pdf
+
+Multiple Commands, Module Parameters
+------------------------------------
+
+Multiple commands can be written to the proc files in one shot by
+separating them with commas, for example:
+
+ echo enable,0xffff > /proc/acpi/ibm/hotkey
+ echo lcd_disable,crt_enable > /proc/acpi/ibm/video
+
+Commands can also be specified when loading the thinkpad-acpi module,
+for example:
+
+ modprobe thinkpad_acpi hotkey=enable,0xffff video=auto_disable
+
+
+Enabling debugging output
+-------------------------
+
+The module takes a debug parameter which can be used to selectively
+enable various classes of debugging output, for example:
+
+ modprobe thinkpad_acpi debug=0xffff
+
+will enable all debugging output classes. It takes a bitmask, so
+to enable more than one output class, just add their values.
+
+ Debug bitmask Description
+ 0x8000 Disclose PID of userspace programs
+ accessing some functions of the driver
+ 0x0001 Initialization and probing
+ 0x0002 Removal
+ 0x0004 RF Transmitter control (RFKILL)
+ (bluetooth, WWAN, UWB...)
+ 0x0008 HKEY event interface, hotkeys
+ 0x0010 Fan control
+ 0x0020 Backlight brightness
+ 0x0040 Audio mixer/volume control
+
+There is also a kernel build option to enable more debugging
+information, which may be necessary to debug driver problems.
+
+The level of debugging information output by the driver can be changed
+at runtime through sysfs, using the driver attribute debug_level. The
+attribute takes the same bitmask as the debug module parameter above.
+
+
+Force loading of module
+-----------------------
+
+If thinkpad-acpi refuses to detect your ThinkPad, you can try to specify
+the module parameter force_load=1. Regardless of whether this works or
+not, please contact ibm-acpi-devel@lists.sourceforge.net with a report.
+
+
+Sysfs interface changelog:
+
+0x000100: Initial sysfs support, as a single platform driver and
+ device.
+0x000200: Hot key support for 32 hot keys, and radio slider switch
+ support.
+0x010000: Hot keys are now handled by default over the input
+ layer, the radio switch generates input event EV_RADIO,
+ and the driver enables hot key handling by default in
+ the firmware.
+
+0x020000: ABI fix: added a separate hwmon platform device and
+ driver, which must be located by name (thinkpad)
+ and the hwmon class for libsensors4 (lm-sensors 3)
+ compatibility. Moved all hwmon attributes to this
+ new platform device.
+
+0x020100: Marker for thinkpad-acpi with hot key NVRAM polling
+ support. If you must, use it to know you should not
+ start a userspace NVRAM poller (allows to detect when
+ NVRAM is compiled out by the user because it is
+ unneeded/undesired in the first place).
+0x020101: Marker for thinkpad-acpi with hot key NVRAM polling
+ and proper hotkey_mask semantics (version 8 of the
+ NVRAM polling patch). Some development snapshots of
+ 0.18 had an earlier version that did strange things
+ to hotkey_mask.
+
+0x020200: Add poll()/select() support to the following attributes:
+ hotkey_radio_sw, wakeup_hotunplug_complete, wakeup_reason
+
+0x020300: hotkey enable/disable support removed, attributes
+ hotkey_bios_enabled and hotkey_enable deprecated and
+ marked for removal.
+
+0x020400: Marker for 16 LEDs support. Also, LEDs that are known
+ to not exist in a given model are not registered with
+ the LED sysfs class anymore.
+
+0x020500: Updated hotkey driver, hotkey_mask is always available
+ and it is always able to disable hot keys. Very old
+ thinkpads are properly supported. hotkey_bios_mask
+ is deprecated and marked for removal.
+
+0x020600: Marker for backlight change event support.
+
+0x020700: Support for mute-only mixers.
+ Volume control in read-only mode by default.
+ Marker for ALSA mixer support.
diff --git a/Documentation/laptops/toshiba_haps.txt b/Documentation/laptops/toshiba_haps.txt
new file mode 100644
index 000000000..11dbcfdc9
--- /dev/null
+++ b/Documentation/laptops/toshiba_haps.txt
@@ -0,0 +1,76 @@
+Kernel driver toshiba_haps
+Toshiba HDD Active Protection Sensor
+====================================
+
+Author: Azael Avalos <coproscefalo@gmail.com>
+
+
+0. Contents
+-----------
+
+1. Description
+2. Interface
+3. Accelerometer axes
+4. Supported devices
+5. Usage
+
+
+1. Description
+--------------
+
+This driver provides support for the accelerometer found in various Toshiba
+laptops, being called "Toshiba HDD Protection - Shock Sensor" officialy,
+and detects laptops automatically with this device.
+On Windows, Toshiba provided software monitors this device and provides
+automatic HDD protection (head unload) on sudden moves or harsh vibrations,
+however, this driver only provides a notification via a sysfs file to let
+userspace tools or daemons act accordingly, as well as providing a sysfs
+file to set the desired protection level or sensor sensibility.
+
+
+2. Interface
+------------
+
+This device comes with 3 methods:
+_STA - Checks existence of the device, returning Zero if the device does not
+ exists or is not supported.
+PTLV - Sets the desired protection level.
+RSSS - Shuts down the HDD protection interface for a few seconds,
+ then restores normal operation.
+
+Note:
+The presence of Solid State Drives (SSD) can make this driver to fail loading,
+given the fact that such drives have no movable parts, and thus, not requiring
+any "protection" as well as failing during the evaluation of the _STA method
+found under this device.
+
+
+3. Accelerometer axes
+---------------------
+
+This device does not report any axes, however, to query the sensor position
+a couple HCI (Hardware Configuration Interface) calls (0x6D and 0xA6) are
+provided to query such information, handled by the kernel module toshiba_acpi
+since kernel version 3.15.
+
+
+4. Supported devices
+--------------------
+
+This driver binds itself to the ACPI device TOS620A, and any Toshiba laptop
+with this device is supported, given the fact that they have the presence of
+conventional HDD and not only SSD, or a combination of both HDD and SSD.
+
+
+5. Usage
+--------
+
+The sysfs files under /sys/devices/LNXSYSTM:00/LNXSYBUS:00/TOS620A:00/ are:
+protection_level - The protection_level is readable and writeable, and
+ provides a way to let userspace query the current protection
+ level, as well as set the desired protection level, the
+ available protection levels are:
+ 0 - Disabled | 1 - Low | 2 - Medium | 3 - High
+reset_protection - The reset_protection entry is writeable only, being "1"
+ the only parameter it accepts, it is used to trigger
+ a reset of the protection interface.
diff --git a/Documentation/ldm.txt b/Documentation/ldm.txt
new file mode 100644
index 000000000..4f80edd14
--- /dev/null
+++ b/Documentation/ldm.txt
@@ -0,0 +1,109 @@
+
+ LDM - Logical Disk Manager (Dynamic Disks)
+ ------------------------------------------
+
+Originally Written by FlatCap - Richard Russon <ldm@flatcap.org>.
+Last Updated by Anton Altaparmakov on 30 March 2007 for Windows Vista.
+
+Overview
+--------
+
+Windows 2000, XP, and Vista use a new partitioning scheme. It is a complete
+replacement for the MSDOS style partitions. It stores its information in a
+1MiB journalled database at the end of the physical disk. The size of
+partitions is limited only by disk space. The maximum number of partitions is
+nearly 2000.
+
+Any partitions created under the LDM are called "Dynamic Disks". There are no
+longer any primary or extended partitions. Normal MSDOS style partitions are
+now known as Basic Disks.
+
+If you wish to use Spanned, Striped, Mirrored or RAID 5 Volumes, you must use
+Dynamic Disks. The journalling allows Windows to make changes to these
+partitions and filesystems without the need to reboot.
+
+Once the LDM driver has divided up the disk, you can use the MD driver to
+assemble any multi-partition volumes, e.g. Stripes, RAID5.
+
+To prevent legacy applications from repartitioning the disk, the LDM creates a
+dummy MSDOS partition containing one disk-sized partition. This is what is
+supported with the Linux LDM driver.
+
+A newer approach that has been implemented with Vista is to put LDM on top of a
+GPT label disk. This is not supported by the Linux LDM driver yet.
+
+
+Example
+-------
+
+Below we have a 50MiB disk, divided into seven partitions.
+N.B. The missing 1MiB at the end of the disk is where the LDM database is
+ stored.
+
+ Device | Offset Bytes Sectors MiB | Size Bytes Sectors MiB
+ -------+----------------------------+---------------------------
+ hda | 0 0 0 | 52428800 102400 50
+ hda1 | 51380224 100352 49 | 1048576 2048 1
+ hda2 | 16384 32 0 | 6979584 13632 6
+ hda3 | 6995968 13664 6 | 10485760 20480 10
+ hda4 | 17481728 34144 16 | 4194304 8192 4
+ hda5 | 21676032 42336 20 | 5242880 10240 5
+ hda6 | 26918912 52576 25 | 10485760 20480 10
+ hda7 | 37404672 73056 35 | 13959168 27264 13
+
+The LDM Database may not store the partitions in the order that they appear on
+disk, but the driver will sort them.
+
+When Linux boots, you will see something like:
+
+ hda: 102400 sectors w/32KiB Cache, CHS=50/64/32
+ hda: [LDM] hda1 hda2 hda3 hda4 hda5 hda6 hda7
+
+
+Compiling LDM Support
+---------------------
+
+To enable LDM, choose the following two options:
+
+ "Advanced partition selection" CONFIG_PARTITION_ADVANCED
+ "Windows Logical Disk Manager (Dynamic Disk) support" CONFIG_LDM_PARTITION
+
+If you believe the driver isn't working as it should, you can enable the extra
+debugging code. This will produce a LOT of output. The option is:
+
+ "Windows LDM extra logging" CONFIG_LDM_DEBUG
+
+N.B. The partition code cannot be compiled as a module.
+
+As with all the partition code, if the driver doesn't see signs of its type of
+partition, it will pass control to another driver, so there is no harm in
+enabling it.
+
+If you have Dynamic Disks but don't enable the driver, then all you will see
+is a dummy MSDOS partition filling the whole disk. You won't be able to mount
+any of the volumes on the disk.
+
+
+Booting
+-------
+
+If you enable LDM support, then lilo is capable of booting from any of the
+discovered partitions. However, grub does not understand the LDM partitioning
+and cannot boot from a Dynamic Disk.
+
+
+More Documentation
+------------------
+
+There is an Overview of the LDM together with complete Technical Documentation.
+It is available for download.
+
+ http://www.linux-ntfs.org/
+
+If you have any LDM questions that aren't answered in the documentation, email
+me.
+
+Cheers,
+ FlatCap - Richard Russon
+ ldm@flatcap.org
+
diff --git a/Documentation/leds/00-INDEX b/Documentation/leds/00-INDEX
new file mode 100644
index 000000000..b4ef1f34e
--- /dev/null
+++ b/Documentation/leds/00-INDEX
@@ -0,0 +1,22 @@
+00-INDEX
+ - This file
+leds-blinkm.txt
+ - Driver for BlinkM LED-devices.
+leds-class.txt
+ - documents LED handling under Linux.
+leds-lp3944.txt
+ - notes on how to use the leds-lp3944 driver.
+leds-lp5521.txt
+ - notes on how to use the leds-lp5521 driver.
+leds-lp5523.txt
+ - notes on how to use the leds-lp5523 driver.
+leds-lp5562.txt
+ - notes on how to use the leds-lp5562 driver.
+leds-lp55xx.txt
+ - description about lp55xx common driver.
+leds-lm3556.txt
+ - notes on how to use the leds-lm3556 driver.
+ledtrig-oneshot.txt
+ - One-shot LED trigger for both sporadic and dense events.
+ledtrig-transient.txt
+ - LED Transient Trigger, one shot timer activation.
diff --git a/Documentation/leds/leds-blinkm.txt b/Documentation/leds/leds-blinkm.txt
new file mode 100644
index 000000000..9dd92f4cf
--- /dev/null
+++ b/Documentation/leds/leds-blinkm.txt
@@ -0,0 +1,80 @@
+The leds-blinkm driver supports the devices of the BlinkM family.
+
+They are RGB-LED modules driven by a (AT)tiny microcontroller and
+communicate through I2C. The default address of these modules is
+0x09 but this can be changed through a command. By this you could
+dasy-chain up to 127 BlinkMs on an I2C bus.
+
+The device accepts RGB and HSB color values through separate commands.
+Also you can store blinking sequences as "scripts" in
+the controller and run them. Also fading is an option.
+
+The interface this driver provides is 2-fold:
+
+a) LED class interface for use with triggers
+############################################
+
+The registration follows the scheme:
+blinkm-<i2c-bus-nr>-<i2c-device-nr>-<color>
+
+$ ls -h /sys/class/leds/blinkm-6-*
+/sys/class/leds/blinkm-6-9-blue:
+brightness device max_brightness power subsystem trigger uevent
+
+/sys/class/leds/blinkm-6-9-green:
+brightness device max_brightness power subsystem trigger uevent
+
+/sys/class/leds/blinkm-6-9-red:
+brightness device max_brightness power subsystem trigger uevent
+
+(same is /sys/bus/i2c/devices/6-0009/leds)
+
+We can control the colors separated into red, green and blue and
+assign triggers on each color.
+
+E.g.:
+
+$ cat blinkm-6-9-blue/brightness
+05
+
+$ echo 200 > blinkm-6-9-blue/brightness
+$
+
+$ modprobe ledtrig-heartbeat
+$ echo heartbeat > blinkm-6-9-green/trigger
+$
+
+
+b) Sysfs group to control rgb, fade, hsb, scripts ...
+#####################################################
+
+This extended interface is available as folder blinkm
+in the sysfs folder of the I2C device.
+E.g. below /sys/bus/i2c/devices/6-0009/blinkm
+
+$ ls -h /sys/bus/i2c/devices/6-0009/blinkm/
+blue green red test
+
+Currently supported is just setting red, green, blue
+and a test sequence.
+
+E.g.:
+
+$ cat *
+00
+00
+00
+#Write into test to start test sequence!#
+
+$ echo 1 > test
+$
+
+$ echo 255 > red
+$
+
+
+
+as of 6/2012
+
+dl9pf <at> gmx <dot> de
+
diff --git a/Documentation/leds/leds-class-flash.txt b/Documentation/leds/leds-class-flash.txt
new file mode 100644
index 000000000..19bb67355
--- /dev/null
+++ b/Documentation/leds/leds-class-flash.txt
@@ -0,0 +1,22 @@
+
+Flash LED handling under Linux
+==============================
+
+Some LED devices provide two modes - torch and flash. In the LED subsystem
+those modes are supported by LED class (see Documentation/leds/leds-class.txt)
+and LED Flash class respectively. The torch mode related features are enabled
+by default and the flash ones only if a driver declares it by setting
+LED_DEV_CAP_FLASH flag.
+
+In order to enable the support for flash LEDs CONFIG_LEDS_CLASS_FLASH symbol
+must be defined in the kernel config. A LED Flash class driver must be
+registered in the LED subsystem with led_classdev_flash_register function.
+
+Following sysfs attributes are exposed for controlling flash LED devices:
+(see Documentation/ABI/testing/sysfs-class-led-flash)
+ - flash_brightness
+ - max_flash_brightness
+ - flash_timeout
+ - max_flash_timeout
+ - flash_strobe
+ - flash_fault
diff --git a/Documentation/leds/leds-class.txt b/Documentation/leds/leds-class.txt
new file mode 100644
index 000000000..79699c200
--- /dev/null
+++ b/Documentation/leds/leds-class.txt
@@ -0,0 +1,97 @@
+
+LED handling under Linux
+========================
+
+If you're reading this and thinking about keyboard leds, these are
+handled by the input subsystem and the led class is *not* needed.
+
+In its simplest form, the LED class just allows control of LEDs from
+userspace. LEDs appear in /sys/class/leds/. The maximum brightness of the
+LED is defined in max_brightness file. The brightness file will set the brightness
+of the LED (taking a value 0-max_brightness). Most LEDs don't have hardware
+brightness support so will just be turned on for non-zero brightness settings.
+
+The class also introduces the optional concept of an LED trigger. A trigger
+is a kernel based source of led events. Triggers can either be simple or
+complex. A simple trigger isn't configurable and is designed to slot into
+existing subsystems with minimal additional code. Examples are the ide-disk,
+nand-disk and sharpsl-charge triggers. With led triggers disabled, the code
+optimises away.
+
+Complex triggers whilst available to all LEDs have LED specific
+parameters and work on a per LED basis. The timer trigger is an example.
+The timer trigger will periodically change the LED brightness between
+LED_OFF and the current brightness setting. The "on" and "off" time can
+be specified via /sys/class/leds/<device>/delay_{on,off} in milliseconds.
+You can change the brightness value of a LED independently of the timer
+trigger. However, if you set the brightness value to LED_OFF it will
+also disable the timer trigger.
+
+You can change triggers in a similar manner to the way an IO scheduler
+is chosen (via /sys/class/leds/<device>/trigger). Trigger specific
+parameters can appear in /sys/class/leds/<device> once a given trigger is
+selected.
+
+
+Design Philosophy
+=================
+
+The underlying design philosophy is simplicity. LEDs are simple devices
+and the aim is to keep a small amount of code giving as much functionality
+as possible. Please keep this in mind when suggesting enhancements.
+
+
+LED Device Naming
+=================
+
+Is currently of the form:
+
+"devicename:colour:function"
+
+There have been calls for LED properties such as colour to be exported as
+individual led class attributes. As a solution which doesn't incur as much
+overhead, I suggest these become part of the device name. The naming scheme
+above leaves scope for further attributes should they be needed. If sections
+of the name don't apply, just leave that section blank.
+
+
+Hardware accelerated blink of LEDs
+==================================
+
+Some LEDs can be programmed to blink without any CPU interaction. To
+support this feature, a LED driver can optionally implement the
+blink_set() function (see <linux/leds.h>). To set an LED to blinking,
+however, it is better to use the API function led_blink_set(), as it
+will check and implement software fallback if necessary.
+
+To turn off blinking again, use the API function led_brightness_set()
+as that will not just set the LED brightness but also stop any software
+timers that may have been required for blinking.
+
+The blink_set() function should choose a user friendly blinking value
+if it is called with *delay_on==0 && *delay_off==0 parameters. In this
+case the driver should give back the chosen value through delay_on and
+delay_off parameters to the leds subsystem.
+
+Setting the brightness to zero with brightness_set() callback function
+should completely turn off the LED and cancel the previously programmed
+hardware blinking function, if any.
+
+
+Known Issues
+============
+
+The LED Trigger core cannot be a module as the simple trigger functions
+would cause nightmare dependency issues. I see this as a minor issue
+compared to the benefits the simple trigger functionality brings. The
+rest of the LED subsystem can be modular.
+
+
+Future Development
+==================
+
+At the moment, a trigger can't be created specifically for a single LED.
+There are a number of cases where a trigger might only be mappable to a
+particular LED (ACPI?). The addition of triggers provided by the LED driver
+should cover this option and be possible to add without breaking the
+current interface.
diff --git a/Documentation/leds/leds-lm3556.txt b/Documentation/leds/leds-lm3556.txt
new file mode 100644
index 000000000..62278e871
--- /dev/null
+++ b/Documentation/leds/leds-lm3556.txt
@@ -0,0 +1,85 @@
+Kernel driver for lm3556
+========================
+
+*Texas Instrument:
+ 1.5 A Synchronous Boost LED Flash Driver w/ High-Side Current Source
+* Datasheet: http://www.national.com/ds/LM/LM3556.pdf
+
+Authors:
+ Daniel Jeong
+ Contact:Daniel Jeong(daniel.jeong-at-ti.com, gshark.jeong-at-gmail.com)
+
+Description
+-----------
+There are 3 functions in LM3556, Flash, Torch and Indicator.
+
+FLASH MODE
+In Flash Mode, the LED current source(LED) provides 16 target current levels
+from 93.75 mA to 1500 mA.The Flash currents are adjusted via the CURRENT
+CONTROL REGISTER(0x09).Flash mode is activated by the ENABLE REGISTER(0x0A),
+or by pulling the STROBE pin HIGH.
+LM3556 Flash can be controlled through sys/class/leds/flash/brightness file
+* if STROBE pin is enabled, below example control brightness only, and
+ON / OFF will be controlled by STROBE pin.
+
+Flash Example:
+OFF : #echo 0 > sys/class/leds/flash/brightness
+93.75 mA: #echo 1 > sys/class/leds/flash/brightness
+... .....
+1500 mA: #echo 16 > sys/class/leds/flash/brightness
+
+TORCH MODE
+In Torch Mode, the current source(LED) is programmed via the CURRENT CONTROL
+REGISTER(0x09).Torch Mode is activated by the ENABLE REGISTER(0x0A) or by the
+hardware TORCH input.
+LM3556 torch can be controlled through sys/class/leds/torch/brightness file.
+* if TORCH pin is enabled, below example control brightness only,
+and ON / OFF will be controlled by TORCH pin.
+
+Torch Example:
+OFF : #echo 0 > sys/class/leds/torch/brightness
+46.88 mA: #echo 1 > sys/class/leds/torch/brightness
+... .....
+375 mA : #echo 8 > sys/class/leds/torch/brightness
+
+INDICATOR MODE
+Indicator pattern can be set through sys/class/leds/indicator/pattern file,
+and 4 patterns are pre-defined in indicator_pattern array.
+According to N-lank, Pulse time and N Period values, different pattern wiill
+be generated.If you want new patterns for your own device, change
+indicator_pattern array with your own values and INDIC_PATTERN_SIZE.
+Please refer datasheet for more detail about N-Blank, Pulse time and N Period.
+
+Indicator pattern example:
+pattern 0: #echo 0 > sys/class/leds/indicator/pattern
+....
+pattern 3: #echo 3 > sys/class/leds/indicator/pattern
+
+Indicator brightness can be controlled through
+sys/class/leds/indicator/brightness file.
+
+Example:
+OFF : #echo 0 > sys/class/leds/indicator/brightness
+5.86 mA : #echo 1 > sys/class/leds/indicator/brightness
+........
+46.875mA : #echo 8 > sys/class/leds/indicator/brightness
+
+Notes
+-----
+Driver expects it is registered using the i2c_board_info mechanism.
+To register the chip at address 0x63 on specific adapter, set the platform data
+according to include/linux/platform_data/leds-lm3556.h, set the i2c board info
+
+Example:
+ static struct i2c_board_info board_i2c_ch4[] __initdata = {
+ {
+ I2C_BOARD_INFO(LM3556_NAME, 0x63),
+ .platform_data = &lm3556_pdata,
+ },
+ };
+
+and register it in the platform init function
+
+Example:
+ board_register_i2c_bus(4, 400,
+ board_i2c_ch4, ARRAY_SIZE(board_i2c_ch4));
diff --git a/Documentation/leds/leds-lp3944.txt b/Documentation/leds/leds-lp3944.txt
new file mode 100644
index 000000000..e88ac3b60
--- /dev/null
+++ b/Documentation/leds/leds-lp3944.txt
@@ -0,0 +1,50 @@
+Kernel driver lp3944
+====================
+
+ * National Semiconductor LP3944 Fun-light Chip
+ Prefix: 'lp3944'
+ Addresses scanned: None (see the Notes section below)
+ Datasheet: Publicly available at the National Semiconductor website
+ http://www.national.com/pf/LP/LP3944.html
+
+Authors:
+ Antonio Ospite <ospite@studenti.unina.it>
+
+
+Description
+-----------
+The LP3944 is a helper chip that can drive up to 8 leds, with two programmable
+DIM modes; it could even be used as a gpio expander but this driver assumes it
+is used as a led controller.
+
+The DIM modes are used to set _blink_ patterns for leds, the pattern is
+specified supplying two parameters:
+ - period: from 0s to 1.6s
+ - duty cycle: percentage of the period the led is on, from 0 to 100
+
+Setting a led in DIM0 or DIM1 mode makes it blink according to the pattern.
+See the datasheet for details.
+
+LP3944 can be found on Motorola A910 smartphone, where it drives the rgb
+leds, the camera flash light and the lcds power.
+
+
+Notes
+-----
+The chip is used mainly in embedded contexts, so this driver expects it is
+registered using the i2c_board_info mechanism.
+
+To register the chip at address 0x60 on adapter 0, set the platform data
+according to include/linux/leds-lp3944.h, set the i2c board info:
+
+ static struct i2c_board_info a910_i2c_board_info[] __initdata = {
+ {
+ I2C_BOARD_INFO("lp3944", 0x60),
+ .platform_data = &a910_lp3944_leds,
+ },
+ };
+
+and register it in the platform init function
+
+ i2c_register_board_info(0, a910_i2c_board_info,
+ ARRAY_SIZE(a910_i2c_board_info));
diff --git a/Documentation/leds/leds-lp5521.txt b/Documentation/leds/leds-lp5521.txt
new file mode 100644
index 000000000..d08d8c179
--- /dev/null
+++ b/Documentation/leds/leds-lp5521.txt
@@ -0,0 +1,101 @@
+Kernel driver for lp5521
+========================
+
+* National Semiconductor LP5521 led driver chip
+* Datasheet: http://www.national.com/pf/LP/LP5521.html
+
+Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
+Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
+
+Description
+-----------
+
+LP5521 can drive up to 3 channels. Leds can be controlled directly via
+the led class control interface. Channels have generic names:
+lp5521:channelx, where x is 0 .. 2
+
+All three channels can be also controlled using the engine micro programs.
+More details of the instructions can be found from the public data sheet.
+
+LP5521 has the internal program memory for running various LED patterns.
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode and enginex_load
+ Control interface for the engines:
+ x is 1 .. 3
+ enginex_mode : disabled, load, run
+ enginex_load : store program (visible only in engine load mode)
+
+ Example (start to blink the channel 2 led):
+ cd /sys/class/leds/lp5521:channel2/device
+ echo "load" > engine3_mode
+ echo "037f4d0003ff6000" > engine3_load
+ echo "run" > engine3_mode
+
+ To stop the engine:
+ echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+ For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+sysfs contains a selftest entry.
+The test communicates with the chip and checks that
+the clock mode is automatically set to the requested one.
+
+Each channel has its own led current settings.
+/sys/class/leds/lp5521:channel0/led_current - RW
+/sys/class/leds/lp5521:channel0/max_current - RO
+Format: 10x mA i.e 10 means 1.0 mA
+
+example platform data:
+
+Note: chan_nr can have values between 0 and 2.
+The name of each channel can be configurable.
+If the name field is not defined, the default name will be set to 'xxxx:channelN'
+(XXXX : pdata->label or i2c client name, N : channel number)
+
+static struct lp55xx_led_config lp5521_led_config[] = {
+ {
+ .name = "red",
+ .chan_nr = 0,
+ .led_current = 50,
+ .max_current = 130,
+ }, {
+ .name = "green",
+ .chan_nr = 1,
+ .led_current = 0,
+ .max_current = 130,
+ }, {
+ .name = "blue",
+ .chan_nr = 2,
+ .led_current = 0,
+ .max_current = 130,
+ }
+};
+
+static int lp5521_setup(void)
+{
+ /* setup HW resources */
+}
+
+static void lp5521_release(void)
+{
+ /* Release HW resources */
+}
+
+static void lp5521_enable(bool state)
+{
+ /* Control of chip enable signal */
+}
+
+static struct lp55xx_platform_data lp5521_platform_data = {
+ .led_config = lp5521_led_config,
+ .num_channels = ARRAY_SIZE(lp5521_led_config),
+ .clock_mode = LP55XX_CLOCK_EXT,
+ .setup_resources = lp5521_setup,
+ .release_resources = lp5521_release,
+ .enable = lp5521_enable,
+};
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp5523.txt b/Documentation/leds/leds-lp5523.txt
new file mode 100644
index 000000000..5b3e91d4a
--- /dev/null
+++ b/Documentation/leds/leds-lp5523.txt
@@ -0,0 +1,100 @@
+Kernel driver for lp5523
+========================
+
+* National Semiconductor LP5523 led driver chip
+* Datasheet: http://www.national.com/pf/LP/LP5523.html
+
+Authors: Mathias Nyman, Yuri Zaporozhets, Samu Onkalo
+Contact: Samu Onkalo (samu.p.onkalo-at-nokia.com)
+
+Description
+-----------
+LP5523 can drive up to 9 channels. Leds can be controlled directly via
+the led class control interface.
+The name of each channel is configurable in the platform data - name and label.
+There are three options to make the channel name.
+
+a) Define the 'name' in the platform data
+To make specific channel name, then use 'name' platform data.
+/sys/class/leds/R1 (name: 'R1')
+/sys/class/leds/B1 (name: 'B1')
+
+b) Use the 'label' with no 'name' field
+For one device name with channel number, then use 'label'.
+/sys/class/leds/RGB:channelN (label: 'RGB', N: 0 ~ 8)
+
+c) Default
+If both fields are NULL, 'lp5523' is used by default.
+/sys/class/leds/lp5523:channelN (N: 0 ~ 8)
+
+LP5523 has the internal program memory for running various LED patterns.
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode, enginex_load and enginex_leds
+ Control interface for the engines:
+ x is 1 .. 3
+ enginex_mode : disabled, load, run
+ enginex_load : microcode load (visible only in load mode)
+ enginex_leds : led mux control (visible only in load mode)
+
+ cd /sys/class/leds/lp5523:channel2/device
+ echo "load" > engine3_mode
+ echo "9d80400004ff05ff437f0000" > engine3_load
+ echo "111111111" > engine3_leds
+ echo "run" > engine3_mode
+
+ To stop the engine:
+ echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+ For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+Selftest uses always the current from the platform data.
+
+Each channel contains led current settings.
+/sys/class/leds/lp5523:channel2/led_current - RW
+/sys/class/leds/lp5523:channel2/max_current - RO
+Format: 10x mA i.e 10 means 1.0 mA
+
+Example platform data:
+
+Note - chan_nr can have values between 0 and 8.
+
+static struct lp55xx_led_config lp5523_led_config[] = {
+ {
+ .name = "D1",
+ .chan_nr = 0,
+ .led_current = 50,
+ .max_current = 130,
+ },
+...
+ {
+ .chan_nr = 8,
+ .led_current = 50,
+ .max_current = 130,
+ }
+};
+
+static int lp5523_setup(void)
+{
+ /* Setup HW resources */
+}
+
+static void lp5523_release(void)
+{
+ /* Release HW resources */
+}
+
+static void lp5523_enable(bool state)
+{
+ /* Control chip enable signal */
+}
+
+static struct lp55xx_platform_data lp5523_platform_data = {
+ .led_config = lp5523_led_config,
+ .num_channels = ARRAY_SIZE(lp5523_led_config),
+ .clock_mode = LP55XX_CLOCK_EXT,
+ .setup_resources = lp5523_setup,
+ .release_resources = lp5523_release,
+ .enable = lp5523_enable,
+};
diff --git a/Documentation/leds/leds-lp5562.txt b/Documentation/leds/leds-lp5562.txt
new file mode 100644
index 000000000..5a823ff6b
--- /dev/null
+++ b/Documentation/leds/leds-lp5562.txt
@@ -0,0 +1,120 @@
+Kernel driver for LP5562
+========================
+
+* TI LP5562 LED Driver
+
+Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+
+ LP5562 can drive up to 4 channels. R/G/B and White.
+ LEDs can be controlled directly via the led class control interface.
+
+ All four channels can be also controlled using the engine micro programs.
+ LP5562 has the internal program memory for running various LED patterns.
+ For the details, please refer to 'firmware' section in leds-lp55xx.txt
+
+Device attribute: engine_mux
+
+ 3 Engines are allocated in LP5562, but the number of channel is 4.
+ Therefore each channel should be mapped to the engine number.
+ Value : RGB or W
+
+ This attribute is used for programming LED data with the firmware interface.
+ Unlike the LP5521/LP5523/55231, LP5562 has unique feature for the engine mux,
+ so additional sysfs is required.
+
+ LED Map
+ Red ... Engine 1 (fixed)
+ Green ... Engine 2 (fixed)
+ Blue ... Engine 3 (fixed)
+ White ... Engine 1 or 2 or 3 (selective)
+
+How to load the program data using engine_mux
+
+ Before loading the LP5562 program data, engine_mux should be written between
+ the engine selection and loading the firmware.
+ Engine mux has two different mode, RGB and W.
+ RGB is used for loading RGB program data, W is used for W program data.
+
+ For example, run blinking green channel pattern,
+ echo 2 > /sys/bus/i2c/devices/xxxx/select_engine # 2 is for green channel
+ echo "RGB" > /sys/bus/i2c/devices/xxxx/engine_mux # engine mux for RGB
+ echo 1 > /sys/class/firmware/lp5562/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+ echo 0 > /sys/class/firmware/lp5562/loading
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+ To run a blinking white pattern,
+ echo 1 or 2 or 3 > /sys/bus/i2c/devices/xxxx/select_engine
+ echo "W" > /sys/bus/i2c/devices/xxxx/engine_mux
+ echo 1 > /sys/class/firmware/lp5562/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5562/data
+ echo 0 > /sys/class/firmware/lp5562/loading
+ echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+How to load the predefined patterns
+
+ Please refer to 'leds-lp55xx.txt"
+
+Setting Current of Each Channel
+
+ Like LP5521 and LP5523/55231, LP5562 provides LED current settings.
+ The 'led_current' and 'max_current' are used.
+
+(Example of Platform data)
+
+To configure the platform specific data, lp55xx_platform_data structure is used.
+
+static struct lp55xx_led_config lp5562_led_config[] = {
+ {
+ .name = "R",
+ .chan_nr = 0,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ {
+ .name = "G",
+ .chan_nr = 1,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ {
+ .name = "B",
+ .chan_nr = 2,
+ .led_current = 20,
+ .max_current = 40,
+ },
+ {
+ .name = "W",
+ .chan_nr = 3,
+ .led_current = 20,
+ .max_current = 40,
+ },
+};
+
+static int lp5562_setup(void)
+{
+ /* setup HW resources */
+}
+
+static void lp5562_release(void)
+{
+ /* Release HW resources */
+}
+
+static void lp5562_enable(bool state)
+{
+ /* Control of chip enable signal */
+}
+
+static struct lp55xx_platform_data lp5562_platform_data = {
+ .led_config = lp5562_led_config,
+ .num_channels = ARRAY_SIZE(lp5562_led_config),
+ .setup_resources = lp5562_setup,
+ .release_resources = lp5562_release,
+ .enable = lp5562_enable,
+};
+
+If the current is set to 0 in the platform data, that channel is
+disabled and it is not visible in the sysfs.
diff --git a/Documentation/leds/leds-lp55xx.txt b/Documentation/leds/leds-lp55xx.txt
new file mode 100644
index 000000000..bcea12a0c
--- /dev/null
+++ b/Documentation/leds/leds-lp55xx.txt
@@ -0,0 +1,194 @@
+LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver
+=================================================
+
+Authors: Milo(Woogyom) Kim <milo.kim@ti.com>
+
+Description
+-----------
+LP5521, LP5523/55231, LP5562 and LP8501 have common features as below.
+
+ Register access via the I2C
+ Device initialization/deinitialization
+ Create LED class devices for multiple output channels
+ Device attributes for user-space interface
+ Program memory for running LED patterns
+
+The LP55xx common driver provides these features using exported functions.
+ lp55xx_init_device() / lp55xx_deinit_device()
+ lp55xx_register_leds() / lp55xx_unregister_leds()
+ lp55xx_regsister_sysfs() / lp55xx_unregister_sysfs()
+
+( Driver Structure Data )
+
+In lp55xx common driver, two different data structure is used.
+
+o lp55xx_led
+ control multi output LED channels such as led current, channel index.
+o lp55xx_chip
+ general chip control such like the I2C and platform data.
+
+For example, LP5521 has maximum 3 LED channels.
+LP5523/55231 has 9 output channels.
+
+lp55xx_chip for LP5521 ... lp55xx_led #1
+ lp55xx_led #2
+ lp55xx_led #3
+
+lp55xx_chip for LP5523 ... lp55xx_led #1
+ lp55xx_led #2
+ .
+ .
+ lp55xx_led #9
+
+( Chip Dependent Code )
+
+To support device specific configurations, special structure
+'lpxx_device_config' is used.
+
+ Maximum number of channels
+ Reset command, chip enable command
+ Chip specific initialization
+ Brightness control register access
+ Setting LED output current
+ Program memory address access for running patterns
+ Additional device specific attributes
+
+( Firmware Interface )
+
+LP55xx family devices have the internal program memory for running
+various LED patterns.
+This pattern data is saved as a file in the user-land or
+hex byte string is written into the memory through the I2C.
+LP55xx common driver supports the firmware interface.
+
+LP55xx chips have three program engines.
+To load and run the pattern, the programming sequence is following.
+ (1) Select an engine number (1/2/3)
+ (2) Mode change to load
+ (3) Write pattern data into selected area
+ (4) Mode change to run
+
+The LP55xx common driver provides simple interfaces as below.
+select_engine : Select which engine is used for running program
+run_engine : Start program which is loaded via the firmware interface
+firmware : Load program data
+
+In case of LP5523, one more command is required, 'enginex_leds'.
+It is used for selecting LED output(s) at each engine number.
+In more details, please refer to 'leds-lp5523.txt'.
+
+For example, run blinking pattern in engine #1 of LP5521
+echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+echo 1 > /sys/class/firmware/lp5521/loading
+echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
+echo 0 > /sys/class/firmware/lp5521/loading
+echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+For example, run blinking pattern in engine #3 of LP55231
+Two LEDs are configured as pattern output channels.
+echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+echo 1 > /sys/class/firmware/lp55231/loading
+echo "9d0740ff7e0040007e00a0010000" > /sys/class/firmware/lp55231/data
+echo 0 > /sys/class/firmware/lp55231/loading
+echo "000001100" > /sys/bus/i2c/devices/xxxx/engine3_leds
+echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+To start blinking patterns in engine #2 and #3 simultaneously,
+for idx in 2 3
+do
+ echo $idx > /sys/class/leds/red/device/select_engine
+ sleep 0.1
+ echo 1 > /sys/class/firmware/lp5521/loading
+ echo "4000600040FF6000" > /sys/class/firmware/lp5521/data
+ echo 0 > /sys/class/firmware/lp5521/loading
+done
+echo 1 > /sys/class/leds/red/device/run_engine
+
+Here is another example for LP5523.
+Full LED strings are selected by 'engine2_leds'.
+echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+echo 1 > /sys/class/firmware/lp5523/loading
+echo "9d80400004ff05ff437f0000" > /sys/class/firmware/lp5523/data
+echo 0 > /sys/class/firmware/lp5523/loading
+echo "111111111" > /sys/bus/i2c/devices/xxxx/engine2_leds
+echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+As soon as 'loading' is set to 0, registered callback is called.
+Inside the callback, the selected engine is loaded and memory is updated.
+To run programmed pattern, 'run_engine' attribute should be enabled.
+
+The pattern sqeuence of LP8501 is similar to LP5523.
+However pattern data is specific.
+Ex 1) Engine 1 is used
+echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+echo 1 > /sys/class/firmware/lp8501/loading
+echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+echo 0 > /sys/class/firmware/lp8501/loading
+echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+Ex 2) Engine 2 and 3 are used at the same time
+echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+sleep 1
+echo 1 > /sys/class/firmware/lp8501/loading
+echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+echo 0 > /sys/class/firmware/lp8501/loading
+sleep 1
+echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+sleep 1
+echo 1 > /sys/class/firmware/lp8501/loading
+echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+echo 0 > /sys/class/firmware/lp8501/loading
+sleep 1
+echo 1 > /sys/class/leds/d1/device/run_engine
+
+( 'run_engine' and 'firmware_cb' )
+The sequence of running the program data is common.
+But each device has own specific register addresses for commands.
+To support this, 'run_engine' and 'firmware_cb' are configurable in each driver.
+run_engine : Control the selected engine
+firmware_cb : The callback function after loading the firmware is done.
+ Chip specific commands for loading and updating program memory.
+
+( Predefined pattern data )
+
+Without the firmware interface, LP55xx driver provides another method for
+loading a LED pattern. That is 'predefined' pattern.
+A predefined pattern is defined in the platform data and load it(or them)
+via the sysfs if needed.
+To use the predefined pattern concept, 'patterns' and 'num_patterns' should be
+configured.
+
+ Example of predefined pattern data:
+
+ /* mode_1: blinking data */
+ static const u8 mode_1[] = {
+ 0x40, 0x00, 0x60, 0x00, 0x40, 0xFF, 0x60, 0x00,
+ };
+
+ /* mode_2: always on */
+ static const u8 mode_2[] = { 0x40, 0xFF, };
+
+ struct lp55xx_predef_pattern board_led_patterns[] = {
+ {
+ .r = mode_1,
+ .size_r = ARRAY_SIZE(mode_1),
+ },
+ {
+ .b = mode_2,
+ .size_b = ARRAY_SIZE(mode_2),
+ },
+ }
+
+ struct lp55xx_platform_data lp5562_pdata = {
+ ...
+ .patterns = board_led_patterns,
+ .num_patterns = ARRAY_SIZE(board_led_patterns),
+ };
+
+Then, mode_1 and mode_2 can be run via through the sysfs.
+
+ echo 1 > /sys/bus/i2c/devices/xxxx/led_pattern # red blinking LED pattern
+ echo 2 > /sys/bus/i2c/devices/xxxx/led_pattern # blue LED always on
+
+To stop running pattern,
+ echo 0 > /sys/bus/i2c/devices/xxxx/led_pattern
diff --git a/Documentation/leds/ledtrig-oneshot.txt b/Documentation/leds/ledtrig-oneshot.txt
new file mode 100644
index 000000000..07cd1fa41
--- /dev/null
+++ b/Documentation/leds/ledtrig-oneshot.txt
@@ -0,0 +1,59 @@
+One-shot LED Trigger
+====================
+
+This is a LED trigger useful for signaling the user of an event where there are
+no clear trap points to put standard led-on and led-off settings. Using this
+trigger, the application needs only to signal the trigger when an event has
+happened, than the trigger turns the LED on and than keeps it off for a
+specified amount of time.
+
+This trigger is meant to be usable both for sporadic and dense events. In the
+first case, the trigger produces a clear single controlled blink for each
+event, while in the latter it keeps blinking at constant rate, as to signal
+that the events are arriving continuously.
+
+A one-shot LED only stays in a constant state when there are no events. An
+additional "invert" property specifies if the LED has to stay off (normal) or
+on (inverted) when not rearmed.
+
+The trigger can be activated from user space on led class devices as shown
+below:
+
+ echo oneshot > trigger
+
+This adds the following sysfs attributes to the LED:
+
+ delay_on - specifies for how many milliseconds the LED has to stay at
+ LED_FULL brightness after it has been armed.
+ Default to 100 ms.
+
+ delay_off - specifies for how many milliseconds the LED has to stay at
+ LED_OFF brightness after it has been armed.
+ Default to 100 ms.
+
+ invert - reverse the blink logic. If set to 0 (default) blink on for delay_on
+ ms, then blink off for delay_off ms, leaving the LED normally off. If
+ set to 1, blink off for delay_off ms, then blink on for delay_on ms,
+ leaving the LED normally on.
+ Setting this value also immediately change the LED state.
+
+ shot - write any non-empty string to signal an events, this starts a blink
+ sequence if not already running.
+
+Example use-case: network devices, initialization:
+
+ echo oneshot > trigger # set trigger for this led
+ echo 33 > delay_on # blink at 1 / (33 + 33) Hz on continuous traffic
+ echo 33 > delay_off
+
+interface goes up:
+
+ echo 1 > invert # set led as normally-on, turn the led on
+
+packet received/transmitted:
+
+ echo 1 > shot # led starts blinking, ignored if already blinking
+
+interface goes down
+
+ echo 0 > invert # set led as normally-off, turn the led off
diff --git a/Documentation/leds/ledtrig-transient.txt b/Documentation/leds/ledtrig-transient.txt
new file mode 100644
index 000000000..3bd38b487
--- /dev/null
+++ b/Documentation/leds/ledtrig-transient.txt
@@ -0,0 +1,152 @@
+LED Transient Trigger
+=====================
+
+The leds timer trigger does not currently have an interface to activate
+a one shot timer. The current support allows for setting two timers, one for
+specifying how long a state to be on, and the second for how long the state
+to be off. The delay_on value specifies the time period an LED should stay
+in on state, followed by a delay_off value that specifies how long the LED
+should stay in off state. The on and off cycle repeats until the trigger
+gets deactivated. There is no provision for one time activation to implement
+features that require an on or off state to be held just once and then stay in
+the original state forever.
+
+Without one shot timer interface, user space can still use timer trigger to
+set a timer to hold a state, however when user space application crashes or
+goes away without deactivating the timer, the hardware will be left in that
+state permanently.
+
+As a specific example of this use-case, let's look at vibrate feature on
+phones. Vibrate function on phones is implemented using PWM pins on SoC or
+PMIC. There is a need to activate one shot timer to control the vibrate
+feature, to prevent user space crashes leaving the phone in vibrate mode
+permanently causing the battery to drain.
+
+Transient trigger addresses the need for one shot timer activation. The
+transient trigger can be enabled and disabled just like the other leds
+triggers.
+
+When an led class device driver registers itself, it can specify all leds
+triggers it supports and a default trigger. During registration, activation
+routine for the default trigger gets called. During registration of an led
+class device, the LED state does not change.
+
+When the driver unregisters, deactivation routine for the currently active
+trigger will be called, and LED state is changed to LED_OFF.
+
+Driver suspend changes the LED state to LED_OFF and resume doesn't change
+the state. Please note that there is no explicit interaction between the
+suspend and resume actions and the currently enabled trigger. LED state
+changes are suspended while the driver is in suspend state. Any timers
+that are active at the time driver gets suspended, continue to run, without
+being able to actually change the LED state. Once driver is resumed, triggers
+start functioning again.
+
+LED state changes are controlled using brightness which is a common led
+class device property. When brightness is set to 0 from user space via
+echo 0 > brightness, it will result in deactivating the current trigger.
+
+Transient trigger uses standard register and unregister interfaces. During
+trigger registration, for each led class device that specifies this trigger
+as its default trigger, trigger activation routine will get called. During
+registration, the LED state does not change, unless there is another trigger
+active, in which case LED state changes to LED_OFF.
+
+During trigger unregistration, LED state gets changed to LED_OFF.
+
+Transient trigger activation routine doesn't change the LED state. It
+creates its properties and does its initialization. Transient trigger
+deactivation routine, will cancel any timer that is active before it cleans
+up and removes the properties it created. It will restore the LED state to
+non-transient state. When driver gets suspended, irrespective of the transient
+state, the LED state changes to LED_OFF.
+
+Transient trigger can be enabled and disabled from user space on led class
+devices, that support this trigger as shown below:
+
+echo transient > trigger
+echo none > trigger
+
+NOTE: Add a new property trigger state to control the state.
+
+This trigger exports three properties, activate, state, and duration. When
+transient trigger is activated these properties are set to default values.
+
+- duration allows setting timer value in msecs. The initial value is 0.
+- activate allows activating and deactivating the timer specified by
+ duration as needed. The initial and default value is 0. This will allow
+ duration to be set after trigger activation.
+- state allows user to specify a transient state to be held for the specified
+ duration.
+
+ activate - one shot timer activate mechanism.
+ 1 when activated, 0 when deactivated.
+ default value is zero when transient trigger is enabled,
+ to allow duration to be set.
+
+ activate state indicates a timer with a value of specified
+ duration running.
+ deactivated state indicates that there is no active timer
+ running.
+
+ duration - one shot timer value. When activate is set, duration value
+ is used to start a timer that runs once. This value doesn't
+ get changed by the trigger unless user does a set via
+ echo new_value > duration
+
+ state - transient state to be held. It has two values 0 or 1. 0 maps
+ to LED_OFF and 1 maps to LED_FULL. The specified state is
+ held for the duration of the one shot timer and then the
+ state gets changed to the non-transient state which is the
+ inverse of transient state.
+ If state = LED_FULL, when the timer runs out the state will
+ go back to LED_OFF.
+ If state = LED_OFF, when the timer runs out the state will
+ go back to LED_FULL.
+ Please note that current LED state is not checked prior to
+ changing the state to the specified state.
+ Driver could map these values to inverted depending on the
+ default states it defines for the LED in its brightness_set()
+ interface which is called from the led brightness_set()
+ interfaces to control the LED state.
+
+When timer expires activate goes back to deactivated state, duration is left
+at the set value to be used when activate is set at a future time. This will
+allow user app to set the time once and activate it to run it once for the
+specified value as needed. When timer expires, state is restored to the
+non-transient state which is the inverse of the transient state.
+
+ echo 1 > activate - starts timer = duration when duration is not 0.
+ echo 0 > activate - cancels currently running timer.
+ echo n > duration - stores timer value to be used upon next
+ activate. Currently active timer if
+ any, continues to run for the specified time.
+ echo 0 > duration - stores timer value to be used upon next
+ activate. Currently active timer if any,
+ continues to run for the specified time.
+ echo 1 > state - stores desired transient state LED_FULL to be
+ held for the specified duration.
+ echo 0 > state - stores desired transient state LED_OFF to be
+ held for the specified duration.
+
+What is not supported:
+======================
+- Timer activation is one shot and extending and/or shortening the timer
+ is not supported.
+
+Example use-case 1:
+ echo transient > trigger
+ echo n > duration
+ echo 1 > state
+repeat the following step as needed:
+ echo 1 > activate - start timer = duration to run once
+ echo 1 > activate - start timer = duration to run once
+ echo none > trigger
+
+This trigger is intended to be used for for the following example use cases:
+ - Control of vibrate (phones, tablets etc.) hardware by user space app.
+ - Use of LED by user space app as activity indicator.
+ - Use of LED by user space app as a kind of watchdog indicator -- as
+ long as the app is alive, it can keep the LED illuminated, if it dies
+ the LED will be extinguished automatically.
+ - Use by any user space app that needs a transient GPIO output.
diff --git a/Documentation/local_ops.txt b/Documentation/local_ops.txt
new file mode 100644
index 000000000..407576a23
--- /dev/null
+++ b/Documentation/local_ops.txt
@@ -0,0 +1,191 @@
+ Semantics and Behavior of Local Atomic Operations
+
+ Mathieu Desnoyers
+
+
+ This document explains the purpose of the local atomic operations, how
+to implement them for any given architecture and shows how they can be used
+properly. It also stresses on the precautions that must be taken when reading
+those local variables across CPUs when the order of memory writes matters.
+
+Note that local_t based operations are not recommended for general kernel use.
+Please use the this_cpu operations instead unless there is really a special purpose.
+Most uses of local_t in the kernel have been replaced by this_cpu operations.
+this_cpu operations combine the relocation with the local_t like semantics in
+a single instruction and yield more compact and faster executing code.
+
+
+* Purpose of local atomic operations
+
+Local atomic operations are meant to provide fast and highly reentrant per CPU
+counters. They minimize the performance cost of standard atomic operations by
+removing the LOCK prefix and memory barriers normally required to synchronize
+across CPUs.
+
+Having fast per CPU atomic counters is interesting in many cases : it does not
+require disabling interrupts to protect from interrupt handlers and it permits
+coherent counters in NMI handlers. It is especially useful for tracing purposes
+and for various performance monitoring counters.
+
+Local atomic operations only guarantee variable modification atomicity wrt the
+CPU which owns the data. Therefore, care must taken to make sure that only one
+CPU writes to the local_t data. This is done by using per cpu data and making
+sure that we modify it from within a preemption safe context. It is however
+permitted to read local_t data from any CPU : it will then appear to be written
+out of order wrt other memory writes by the owner CPU.
+
+
+* Implementation for a given architecture
+
+It can be done by slightly modifying the standard atomic operations : only
+their UP variant must be kept. It typically means removing LOCK prefix (on
+i386 and x86_64) and any SMP synchronization barrier. If the architecture does
+not have a different behavior between SMP and UP, including asm-generic/local.h
+in your architecture's local.h is sufficient.
+
+The local_t type is defined as an opaque signed long by embedding an
+atomic_long_t inside a structure. This is made so a cast from this type to a
+long fails. The definition looks like :
+
+typedef struct { atomic_long_t a; } local_t;
+
+
+* Rules to follow when using local atomic operations
+
+- Variables touched by local ops must be per cpu variables.
+- _Only_ the CPU owner of these variables must write to them.
+- This CPU can use local ops from any context (process, irq, softirq, nmi, ...)
+ to update its local_t variables.
+- Preemption (or interrupts) must be disabled when using local ops in
+ process context to make sure the process won't be migrated to a
+ different CPU between getting the per-cpu variable and doing the
+ actual local op.
+- When using local ops in interrupt context, no special care must be
+ taken on a mainline kernel, since they will run on the local CPU with
+ preemption already disabled. I suggest, however, to explicitly
+ disable preemption anyway to make sure it will still work correctly on
+ -rt kernels.
+- Reading the local cpu variable will provide the current copy of the
+ variable.
+- Reads of these variables can be done from any CPU, because updates to
+ "long", aligned, variables are always atomic. Since no memory
+ synchronization is done by the writer CPU, an outdated copy of the
+ variable can be read when reading some _other_ cpu's variables.
+
+
+* How to use local atomic operations
+
+#include <linux/percpu.h>
+#include <asm/local.h>
+
+static DEFINE_PER_CPU(local_t, counters) = LOCAL_INIT(0);
+
+
+* Counting
+
+Counting is done on all the bits of a signed long.
+
+In preemptible context, use get_cpu_var() and put_cpu_var() around local atomic
+operations : it makes sure that preemption is disabled around write access to
+the per cpu variable. For instance :
+
+ local_inc(&get_cpu_var(counters));
+ put_cpu_var(counters);
+
+If you are already in a preemption-safe context, you can use
+this_cpu_ptr() instead.
+
+ local_inc(this_cpu_ptr(&counters));
+
+
+
+* Reading the counters
+
+Those local counters can be read from foreign CPUs to sum the count. Note that
+the data seen by local_read across CPUs must be considered to be out of order
+relatively to other memory writes happening on the CPU that owns the data.
+
+ long sum = 0;
+ for_each_online_cpu(cpu)
+ sum += local_read(&per_cpu(counters, cpu));
+
+If you want to use a remote local_read to synchronize access to a resource
+between CPUs, explicit smp_wmb() and smp_rmb() memory barriers must be used
+respectively on the writer and the reader CPUs. It would be the case if you use
+the local_t variable as a counter of bytes written in a buffer : there should
+be a smp_wmb() between the buffer write and the counter increment and also a
+smp_rmb() between the counter read and the buffer read.
+
+
+Here is a sample module which implements a basic per cpu counter using local.h.
+
+--- BEGIN ---
+/* test-local.c
+ *
+ * Sample module for local.h usage.
+ */
+
+
+#include <asm/local.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+
+static DEFINE_PER_CPU(local_t, counters) = LOCAL_INIT(0);
+
+static struct timer_list test_timer;
+
+/* IPI called on each CPU. */
+static void test_each(void *info)
+{
+ /* Increment the counter from a non preemptible context */
+ printk("Increment on cpu %d\n", smp_processor_id());
+ local_inc(this_cpu_ptr(&counters));
+
+ /* This is what incrementing the variable would look like within a
+ * preemptible context (it disables preemption) :
+ *
+ * local_inc(&get_cpu_var(counters));
+ * put_cpu_var(counters);
+ */
+}
+
+static void do_test_timer(unsigned long data)
+{
+ int cpu;
+
+ /* Increment the counters */
+ on_each_cpu(test_each, NULL, 1);
+ /* Read all the counters */
+ printk("Counters read from CPU %d\n", smp_processor_id());
+ for_each_online_cpu(cpu) {
+ printk("Read : CPU %d, count %ld\n", cpu,
+ local_read(&per_cpu(counters, cpu)));
+ }
+ del_timer(&test_timer);
+ test_timer.expires = jiffies + 1000;
+ add_timer(&test_timer);
+}
+
+static int __init test_init(void)
+{
+ /* initialize the timer that will increment the counter */
+ init_timer(&test_timer);
+ test_timer.function = do_test_timer;
+ test_timer.expires = jiffies + 1;
+ add_timer(&test_timer);
+
+ return 0;
+}
+
+static void __exit test_exit(void)
+{
+ del_timer_sync(&test_timer);
+}
+
+module_init(test_init);
+module_exit(test_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Local Atomic Ops");
+--- END ---
diff --git a/Documentation/locking/00-INDEX b/Documentation/locking/00-INDEX
new file mode 100644
index 000000000..c256c9bee
--- /dev/null
+++ b/Documentation/locking/00-INDEX
@@ -0,0 +1,16 @@
+00-INDEX
+ - this file.
+lockdep-design.txt
+ - documentation on the runtime locking correctness validator.
+lockstat.txt
+ - info on collecting statistics on locks (and contention).
+mutex-design.txt
+ - info on the generic mutex subsystem.
+rt-mutex-design.txt
+ - description of the RealTime mutex implementation design.
+rt-mutex.txt
+ - desc. of RT-mutex subsystem with PI (Priority Inheritance) support.
+spinlocks.txt
+ - info on using spinlocks to provide exclusive access in kernel.
+ww-mutex-design.txt
+ - Intro to Mutex wait/would deadlock handling.s
diff --git a/Documentation/locking/lglock.txt b/Documentation/locking/lglock.txt
new file mode 100644
index 000000000..a6971e34f
--- /dev/null
+++ b/Documentation/locking/lglock.txt
@@ -0,0 +1,166 @@
+lglock - local/global locks for mostly local access patterns
+------------------------------------------------------------
+
+Origin: Nick Piggin's VFS scalability series introduced during
+ 2.6.35++ [1] [2]
+Location: kernel/locking/lglock.c
+ include/linux/lglock.h
+Users: currently only the VFS and stop_machine related code
+
+Design Goal:
+------------
+
+Improve scalability of globally used large data sets that are
+distributed over all CPUs as per_cpu elements.
+
+To manage global data structures that are partitioned over all CPUs
+as per_cpu elements but can be mostly handled by CPU local actions
+lglock will be used where the majority of accesses are cpu local
+reading and occasional cpu local writing with very infrequent
+global write access.
+
+
+* deal with things locally whenever possible
+ - very fast access to the local per_cpu data
+ - reasonably fast access to specific per_cpu data on a different
+ CPU
+* while making global action possible when needed
+ - by expensive access to all CPUs locks - effectively
+ resulting in a globally visible critical section.
+
+Design:
+-------
+
+Basically it is an array of per_cpu spinlocks with the
+lg_local_lock/unlock accessing the local CPUs lock object and the
+lg_local_lock_cpu/unlock_cpu accessing a remote CPUs lock object
+the lg_local_lock has to disable preemption as migration protection so
+that the reference to the local CPUs lock does not go out of scope.
+Due to the lg_local_lock/unlock only touching cpu-local resources it
+is fast. Taking the local lock on a different CPU will be more
+expensive but still relatively cheap.
+
+One can relax the migration constraints by acquiring the current
+CPUs lock with lg_local_lock_cpu, remember the cpu, and release that
+lock at the end of the critical section even if migrated. This should
+give most of the performance benefits without inhibiting migration
+though needs careful considerations for nesting of lglocks and
+consideration of deadlocks with lg_global_lock.
+
+The lg_global_lock/unlock locks all underlying spinlocks of all
+possible CPUs (including those off-line). The preemption disable/enable
+are needed in the non-RT kernels to prevent deadlocks like:
+
+ on cpu 1
+
+ task A task B
+ lg_global_lock
+ got cpu 0 lock
+ <<<< preempt <<<<
+ lg_local_lock_cpu for cpu 0
+ spin on cpu 0 lock
+
+On -RT this deadlock scenario is resolved by the arch_spin_locks in the
+lglocks being replaced by rt_mutexes which resolve the above deadlock
+by boosting the lock-holder.
+
+
+Implementation:
+---------------
+
+The initial lglock implementation from Nick Piggin used some complex
+macros to generate the lglock/brlock in lglock.h - they were later
+turned into a set of functions by Andi Kleen [7]. The change to functions
+was motivated by the presence of multiple lock users and also by them
+being easier to maintain than the generating macros. This change to
+functions is also the basis to eliminated the restriction of not
+being initializeable in kernel modules (the remaining problem is that
+locks are not explicitly initialized - see lockdep-design.txt)
+
+Declaration and initialization:
+-------------------------------
+
+ #include <linux/lglock.h>
+
+ DEFINE_LGLOCK(name)
+ or:
+ DEFINE_STATIC_LGLOCK(name);
+
+ lg_lock_init(&name, "lockdep_name_string");
+
+ on UP this is mapped to DEFINE_SPINLOCK(name) in both cases, note
+ also that as of 3.18-rc6 all declaration in use are of the _STATIC_
+ variant (and it seems that the non-static was never in use).
+ lg_lock_init is initializing the lockdep map only.
+
+Usage:
+------
+
+From the locking semantics it is a spinlock. It could be called a
+locality aware spinlock. lg_local_* behaves like a per_cpu
+spinlock and lg_global_* like a global spinlock.
+No surprises in the API.
+
+ lg_local_lock(*lglock);
+ access to protected per_cpu object on this CPU
+ lg_local_unlock(*lglock);
+
+ lg_local_lock_cpu(*lglock, cpu);
+ access to protected per_cpu object on other CPU cpu
+ lg_local_unlock_cpu(*lglock, cpu);
+
+ lg_global_lock(*lglock);
+ access all protected per_cpu objects on all CPUs
+ lg_global_unlock(*lglock);
+
+ There are no _trylock variants of the lglocks.
+
+Note that the lg_global_lock/unlock has to iterate over all possible
+CPUs rather than the actually present CPUs or a CPU could go off-line
+with a held lock [4] and that makes it very expensive. A discussion on
+these issues can be found at [5]
+
+Constraints:
+------------
+
+ * currently the declaration of lglocks in kernel modules is not
+ possible, though this should be doable with little change.
+ * lglocks are not recursive.
+ * suitable for code that can do most operations on the CPU local
+ data and will very rarely need the global lock
+ * lg_global_lock/unlock is *very* expensive and does not scale
+ * on UP systems all lg_* primitives are simply spinlocks
+ * in PREEMPT_RT the spinlock becomes an rt-mutex and can sleep but
+ does not change the tasks state while sleeping [6].
+ * in PREEMPT_RT the preempt_disable/enable in lg_local_lock/unlock
+ is downgraded to a migrate_disable/enable, the other
+ preempt_disable/enable are downgraded to barriers [6].
+ The deadlock noted for non-RT above is resolved due to rt_mutexes
+ boosting the lock-holder in this case which arch_spin_locks do
+ not do.
+
+lglocks were designed for very specific problems in the VFS and probably
+only are the right answer in these corner cases. Any new user that looks
+at lglocks probably wants to look at the seqlock and RCU alternatives as
+her first choice. There are also efforts to resolve the RCU issues that
+currently prevent using RCU in place of view remaining lglocks.
+
+Note on brlock history:
+-----------------------
+
+The 'Big Reader' read-write spinlocks were originally introduced by
+Ingo Molnar in 2000 (2.4/2.5 kernel series) and removed in 2003. They
+later were introduced by the VFS scalability patch set in 2.6 series
+again as the "big reader lock" brlock [2] variant of lglock which has
+been replaced by seqlock primitives or by RCU based primitives in the
+3.13 kernel series as was suggested in [3] in 2003. The brlock was
+entirely removed in the 3.13 kernel series.
+
+Link: 1 http://lkml.org/lkml/2010/8/2/81
+Link: 2 http://lwn.net/Articles/401738/
+Link: 3 http://lkml.org/lkml/2003/3/9/205
+Link: 4 https://lkml.org/lkml/2011/8/24/185
+Link: 5 http://lkml.org/lkml/2011/12/18/189
+Link: 6 https://www.kernel.org/pub/linux/kernel/projects/rt/
+ patch series - lglocks-rt.patch.patch
+Link: 7 http://lkml.org/lkml/2012/3/5/26
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
new file mode 100644
index 000000000..5001280e9
--- /dev/null
+++ b/Documentation/locking/lockdep-design.txt
@@ -0,0 +1,286 @@
+Runtime locking correctness validator
+=====================================
+
+started by Ingo Molnar <mingo@redhat.com>
+additions by Arjan van de Ven <arjan@linux.intel.com>
+
+Lock-class
+----------
+
+The basic object the validator operates upon is a 'class' of locks.
+
+A class of locks is a group of locks that are logically the same with
+respect to locking rules, even if the locks may have multiple (possibly
+tens of thousands of) instantiations. For example a lock in the inode
+struct is one class, while each inode has its own instantiation of that
+lock class.
+
+The validator tracks the 'state' of lock-classes, and it tracks
+dependencies between different lock-classes. The validator maintains a
+rolling proof that the state and the dependencies are correct.
+
+Unlike an lock instantiation, the lock-class itself never goes away: when
+a lock-class is used for the first time after bootup it gets registered,
+and all subsequent uses of that lock-class will be attached to this
+lock-class.
+
+State
+-----
+
+The validator tracks lock-class usage history into 4n + 1 separate state bits:
+
+- 'ever held in STATE context'
+- 'ever held as readlock in STATE context'
+- 'ever held with STATE enabled'
+- 'ever held as readlock with STATE enabled'
+
+Where STATE can be either one of (kernel/locking/lockdep_states.h)
+ - hardirq
+ - softirq
+ - reclaim_fs
+
+- 'ever used' [ == !unused ]
+
+When locking rules are violated, these state bits are presented in the
+locking error messages, inside curlies. A contrived example:
+
+ modprobe/2287 is trying to acquire lock:
+ (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+ but task is already holding lock:
+ (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+
+The bit position indicates STATE, STATE-read, for each of the states listed
+above, and the character displayed in each indicates:
+
+ '.' acquired while irqs disabled and not in irq context
+ '-' acquired in irq context
+ '+' acquired with irqs enabled
+ '?' acquired in irq context with irqs enabled.
+
+Unused mutexes cannot be part of the cause of an error.
+
+
+Single-lock state rules:
+------------------------
+
+A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
+following states are exclusive, and only one of them is allowed to be
+set for any lock-class:
+
+ <hardirq-safe> and <hardirq-unsafe>
+ <softirq-safe> and <softirq-unsafe>
+
+The validator detects and reports lock usage that violate these
+single-lock state rules.
+
+Multi-lock dependency rules:
+----------------------------
+
+The same lock-class must not be acquired twice, because this could lead
+to lock recursion deadlocks.
+
+Furthermore, two locks may not be taken in different order:
+
+ <L1> -> <L2>
+ <L2> -> <L1>
+
+because this could lead to lock inversion deadlocks. (The validator
+finds such dependencies in arbitrary complexity, i.e. there can be any
+other locking sequence between the acquire-lock operations, the
+validator will still track all dependencies between locks.)
+
+Furthermore, the following usage based lock dependencies are not allowed
+between any two lock-classes:
+
+ <hardirq-safe> -> <hardirq-unsafe>
+ <softirq-safe> -> <softirq-unsafe>
+
+The first rule comes from the fact the a hardirq-safe lock could be
+taken by a hardirq context, interrupting a hardirq-unsafe lock - and
+thus could result in a lock inversion deadlock. Likewise, a softirq-safe
+lock could be taken by an softirq context, interrupting a softirq-unsafe
+lock.
+
+The above rules are enforced for any locking sequence that occurs in the
+kernel: when acquiring a new lock, the validator checks whether there is
+any rule violation between the new lock and any of the held locks.
+
+When a lock-class changes its state, the following aspects of the above
+dependency rules are enforced:
+
+- if a new hardirq-safe lock is discovered, we check whether it
+ took any hardirq-unsafe lock in the past.
+
+- if a new softirq-safe lock is discovered, we check whether it took
+ any softirq-unsafe lock in the past.
+
+- if a new hardirq-unsafe lock is discovered, we check whether any
+ hardirq-safe lock took it in the past.
+
+- if a new softirq-unsafe lock is discovered, we check whether any
+ softirq-safe lock took it in the past.
+
+(Again, we do these checks too on the basis that an interrupt context
+could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
+could lead to a lock inversion deadlock - even if that lock scenario did
+not trigger in practice yet.)
+
+Exception: Nested data dependencies leading to nested locking
+-------------------------------------------------------------
+
+There are a few cases where the Linux kernel acquires more than one
+instance of the same lock-class. Such cases typically happen when there
+is some sort of hierarchy within objects of the same type. In these
+cases there is an inherent "natural" ordering between the two objects
+(defined by the properties of the hierarchy), and the kernel grabs the
+locks in this fixed order on each of the objects.
+
+An example of such an object hierarchy that results in "nested locking"
+is that of a "whole disk" block-dev object and a "partition" block-dev
+object; the partition is "part of" the whole device and as long as one
+always takes the whole disk lock as a higher lock than the partition
+lock, the lock ordering is fully correct. The validator does not
+automatically detect this natural ordering, as the locking rule behind
+the ordering is not static.
+
+In order to teach the validator about this correct usage model, new
+versions of the various locking primitives were added that allow you to
+specify a "nesting level". An example call, for the block device mutex,
+looks like this:
+
+enum bdev_bd_mutex_lock_class
+{
+ BD_MUTEX_NORMAL,
+ BD_MUTEX_WHOLE,
+ BD_MUTEX_PARTITION
+};
+
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+
+In this case the locking is done on a bdev object that is known to be a
+partition.
+
+The validator treats a lock that is taken in such a nested fashion as a
+separate (sub)class for the purposes of validation.
+
+Note: When changing code to use the _nested() primitives, be careful and
+check really thoroughly that the hierarchy is correctly mapped; otherwise
+you can get false positives or false negatives.
+
+Proof of 100% correctness:
+--------------------------
+
+The validator achieves perfect, mathematical 'closure' (proof of locking
+correctness) in the sense that for every simple, standalone single-task
+locking sequence that occurred at least once during the lifetime of the
+kernel, the validator proves it with a 100% certainty that no
+combination and timing of these locking sequences can cause any class of
+lock related deadlock. [*]
+
+I.e. complex multi-CPU and multi-task locking scenarios do not have to
+occur in practice to prove a deadlock: only the simple 'component'
+locking chains have to occur at least once (anytime, in any
+task/context) for the validator to be able to prove correctness. (For
+example, complex deadlocks that would normally need more than 3 CPUs and
+a very unlikely constellation of tasks, irq-contexts and timings to
+occur, can be detected on a plain, lightly loaded single-CPU system as
+well!)
+
+This radically decreases the complexity of locking related QA of the
+kernel: what has to be done during QA is to trigger as many "simple"
+single-task locking dependencies in the kernel as possible, at least
+once, to prove locking correctness - instead of having to trigger every
+possible combination of locking interaction between CPUs, combined with
+every possible hardirq and softirq nesting scenario (which is impossible
+to do in practice).
+
+[*] assuming that the validator itself is 100% correct, and no other
+ part of the system corrupts the state of the validator in any way.
+ We also assume that all NMI/SMM paths [which could interrupt
+ even hardirq-disabled codepaths] are correct and do not interfere
+ with the validator. We also assume that the 64-bit 'chain hash'
+ value is unique for every lock-chain in the system. Also, lock
+ recursion must not be higher than 20.
+
+Performance:
+------------
+
+The above rules require _massive_ amounts of runtime checking. If we did
+that for every lock taken and for every irqs-enable event, it would
+render the system practically unusably slow. The complexity of checking
+is O(N^2), so even with just a few hundred lock-classes we'd have to do
+tens of thousands of checks for every event.
+
+This problem is solved by checking any given 'locking scenario' (unique
+sequence of locks taken after each other) only once. A simple stack of
+held locks is maintained, and a lightweight 64-bit hash value is
+calculated, which hash is unique for every lock chain. The hash value,
+when the chain is validated for the first time, is then put into a hash
+table, which hash-table can be checked in a lockfree manner. If the
+locking chain occurs again later on, the hash table tells us that we
+dont have to validate the chain again.
+
+Troubleshooting:
+----------------
+
+The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
+Exceeding this number will trigger the following lockdep warning:
+
+ (DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+
+By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
+desktop systems have less than 1,000 lock classes, so this warning
+normally results from lock-class leakage or failure to properly
+initialize locks. These two problems are illustrated below:
+
+1. Repeated module loading and unloading while running the validator
+ will result in lock-class leakage. The issue here is that each
+ load of the module will create a new set of lock classes for
+ that module's locks, but module unloading does not remove old
+ classes (see below discussion of reuse of lock classes for why).
+ Therefore, if that module is loaded and unloaded repeatedly,
+ the number of lock classes will eventually reach the maximum.
+
+2. Using structures such as arrays that have large numbers of
+ locks that are not explicitly initialized. For example,
+ a hash table with 8192 buckets where each bucket has its own
+ spinlock_t will consume 8192 lock classes -unless- each spinlock
+ is explicitly initialized at runtime, for example, using the
+ run-time spin_lock_init() as opposed to compile-time initializers
+ such as __SPIN_LOCK_UNLOCKED(). Failure to properly initialize
+ the per-bucket spinlocks would guarantee lock-class overflow.
+ In contrast, a loop that called spin_lock_init() on each lock
+ would place all 8192 locks into a single lock class.
+
+ The moral of this story is that you should always explicitly
+ initialize your locks.
+
+One might argue that the validator should be modified to allow
+lock classes to be reused. However, if you are tempted to make this
+argument, first review the code and think through the changes that would
+be required, keeping in mind that the lock classes to be removed are
+likely to be linked into the lock-dependency graph. This turns out to
+be harder to do than to say.
+
+Of course, if you do run out of lock classes, the next thing to do is
+to find the offending lock classes. First, the following command gives
+you the number of lock classes currently in use along with the maximum:
+
+ grep "lock-classes" /proc/lockdep_stats
+
+This command produces the following output on a modest system:
+
+ lock-classes: 748 [max: 8191]
+
+If the number allocated (748 above) increases continually over time,
+then there is likely a leak. The following command can be used to
+identify the leaking lock classes:
+
+ grep "BD" /proc/lockdep
+
+Run the command and save the output, then compare against the output from
+a later run of this command to identify the leakers. This same output
+can also help you find situations where runtime lock initialization has
+been omitted.
diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt
new file mode 100644
index 000000000..568bbbace
--- /dev/null
+++ b/Documentation/locking/lockstat.txt
@@ -0,0 +1,183 @@
+
+LOCK STATISTICS
+
+- WHAT
+
+As the name suggests, it provides statistics on locks.
+
+- WHY
+
+Because things like lock contention can severely impact performance.
+
+- HOW
+
+Lockdep already has hooks in the lock functions and maps lock instances to
+lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
+The graph below shows the relation between the lock functions and the various
+hooks therein.
+
+ __acquire
+ |
+ lock _____
+ | \
+ | __contended
+ | |
+ | <wait>
+ | _______/
+ |/
+ |
+ __acquired
+ |
+ .
+ <hold>
+ .
+ |
+ __release
+ |
+ unlock
+
+lock, unlock - the regular lock functions
+__* - the hooks
+<> - states
+
+With these hooks we provide the following statistics:
+
+ con-bounces - number of lock contention that involved x-cpu data
+ contentions - number of lock acquisitions that had to wait
+ wait time min - shortest (non-0) time we ever had to wait for a lock
+ max - longest time we ever had to wait for a lock
+ total - total time we spend waiting on this lock
+ avg - average time spent waiting on this lock
+ acq-bounces - number of lock acquisitions that involved x-cpu data
+ acquisitions - number of times we took the lock
+ hold time min - shortest (non-0) time we ever held the lock
+ max - longest time we ever held the lock
+ total - total time this lock was held
+ avg - average time this lock was held
+
+These numbers are gathered per lock class, per read/write state (when
+applicable).
+
+It also tracks 4 contention points per class. A contention point is a call site
+that had to wait on lock acquisition.
+
+ - CONFIGURATION
+
+Lock statistics are enabled via CONFIG_LOCK_STAT.
+
+ - USAGE
+
+Enable collection of statistics:
+
+# echo 1 >/proc/sys/kernel/lock_stat
+
+Disable collection of statistics:
+
+# echo 0 >/proc/sys/kernel/lock_stat
+
+Look at the current lock statistics:
+
+( line numbers not part of actual output, done for clarity in the explanation
+ below )
+
+# less /proc/lock_stat
+
+01 lock_stat version 0.4
+02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+03 class name con-bounces contentions waittime-min waittime-max waittime-total waittime-avg acq-bounces acquisitions holdtime-min holdtime-max holdtime-total holdtime-avg
+04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+05
+06 &mm->mmap_sem-W: 46 84 0.26 939.10 16371.53 194.90 47291 2922365 0.16 2220301.69 17464026916.32 5975.99
+07 &mm->mmap_sem-R: 37 100 1.31 299502.61 325629.52 3256.30 212344 34316685 0.10 7744.91 95016910.20 2.77
+08 ---------------
+09 &mm->mmap_sem 1 [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+19 &mm->mmap_sem 96 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+11 &mm->mmap_sem 34 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+12 &mm->mmap_sem 17 [<ffffffff81127e71>] vm_munmap+0x41/0x80
+13 ---------------
+14 &mm->mmap_sem 1 [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+15 &mm->mmap_sem 60 [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+16 &mm->mmap_sem 41 [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+17 &mm->mmap_sem 68 [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+18
+19.............................................................................................................................................................................................................................
+20
+21 unix_table_lock: 110 112 0.21 49.24 163.91 1.46 21094 66312 0.12 624.42 31589.81 0.48
+22 ---------------
+23 unix_table_lock 45 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+24 unix_table_lock 47 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+25 unix_table_lock 15 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+26 unix_table_lock 5 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+27 ---------------
+28 unix_table_lock 39 [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+29 unix_table_lock 49 [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+30 unix_table_lock 20 [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+31 unix_table_lock 4 [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
+
+This excerpt shows the first two lock class statistics. Line 01 shows the
+output version - each time the format changes this will be updated. Line 02-04
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
+statistics. These statistics come in two parts; the actual stats separated by a
+short separator (line 08, 13) from the contention points.
+
+Lines 09-12 show the first 4 recorded contention points (the code
+which tries to get the lock) and lines 14-17 show the first 4 recorded
+contended points (the lock holder). It is possible that the max
+con-bounces point is missing in the statistics.
+
+The first lock (05-18) is a read/write lock, and shows two lines above the
+short separator. The contention points don't match the column descriptors,
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
+
+The integer part of the time values is in us.
+
+Dealing with nested locks, subclasses may appear:
+
+32...........................................................................................................................................................................................................................
+33
+34 &rq->lock: 13128 13128 0.43 190.53 103881.26 7.91 97454 3453404 0.00 401.11 13224683.11 3.82
+35 ---------
+36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+40 ---------
+41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8
+45
+46...........................................................................................................................................................................................................................
+47
+48 &rq->lock/1: 1526 11488 0.33 388.73 136294.31 11.86 21461 38404 0.00 37.93 109388.53 2.84
+49 -----------
+50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+51 -----------
+52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8
+54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+Line 48 shows statistics for the second subclass (/1) of &rq->lock class
+(subclass starts from 0), since in this case, as line 50 suggests,
+double_rq_lock actually acquires a nested lock of two spinlocks.
+
+View the top contending locks:
+
+# grep : /proc/lock_stat | head
+ clockevents_lock: 2926159 2947636 0.15 46882.81 1784540466.34 605.41 3381345 3879161 0.00 2260.97 53178395.68 13.71
+ tick_broadcast_lock: 346460 346717 0.18 2257.43 39364622.71 113.54 3642919 4242696 0.00 2263.79 49173646.60 11.59
+ &mapping->i_mmap_mutex: 203896 203899 3.36 645530.05 31767507988.39 155800.21 3361776 8893984 0.17 2254.15 14110121.02 1.59
+ &rq->lock: 135014 136909 0.18 606.09 842160.68 6.15 1540728 10436146 0.00 728.72 17606683.41 1.69
+ &(&zone->lru_lock)->rlock: 93000 94934 0.16 59.18 188253.78 1.98 1199912 3809894 0.15 391.40 3559518.81 0.93
+ tasklist_lock-W: 40667 41130 0.23 1189.42 428980.51 10.43 270278 510106 0.16 653.51 3939674.91 7.72
+ tasklist_lock-R: 21298 21305 0.20 1310.05 215511.12 10.12 186204 241258 0.14 1162.33 1179779.23 4.89
+ rcu_node_1: 47656 49022 0.16 635.41 193616.41 3.95 844888 1865423 0.00 764.26 1656226.96 0.89
+ &(&dentry->d_lockref.lock)->rlock: 39791 40179 0.15 1302.08 88851.96 2.21 2790851 12527025 0.10 1910.75 3379714.27 0.27
+ rcu_node_0: 29203 30064 0.16 786.55 1555573.00 51.74 88963 244254 0.00 398.87 428872.51 1.76
+
+Clear the statistics:
+
+# echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt
new file mode 100644
index 000000000..619f2bb13
--- /dev/null
+++ b/Documentation/locking/locktorture.txt
@@ -0,0 +1,147 @@
+Kernel Lock Torture Test Operation
+
+CONFIG_LOCK_TORTURE_TEST
+
+The CONFIG LOCK_TORTURE_TEST config option provides a kernel module
+that runs torture tests on core kernel locking primitives. The kernel
+module, 'locktorture', may be built after the fact on the running
+kernel to be tested, if desired. The tests periodically output status
+messages via printk(), which can be examined via the dmesg (perhaps
+grepping for "torture"). The test is started when the module is loaded,
+and stops when the module is unloaded. This program is based on how RCU
+is tortured, via rcutorture.
+
+This torture test consists of creating a number of kernel threads which
+acquire the lock and hold it for specific amount of time, thus simulating
+different critical region behaviors. The amount of contention on the lock
+can be simulated by either enlarging this critical region hold time and/or
+creating more kthreads.
+
+
+MODULE PARAMETERS
+
+This module has the following parameters:
+
+
+ ** Locktorture-specific **
+
+nwriters_stress Number of kernel threads that will stress exclusive lock
+ ownership (writers). The default value is twice the number
+ of online CPUs.
+
+nreaders_stress Number of kernel threads that will stress shared lock
+ ownership (readers). The default is the same amount of writer
+ locks. If the user did not specify nwriters_stress, then
+ both readers and writers be the amount of online CPUs.
+
+torture_type Type of lock to torture. By default, only spinlocks will
+ be tortured. This module can torture the following locks,
+ with string values as follows:
+
+ o "lock_busted": Simulates a buggy lock implementation.
+
+ o "spin_lock": spin_lock() and spin_unlock() pairs.
+
+ o "spin_lock_irq": spin_lock_irq() and spin_unlock_irq()
+ pairs.
+
+ o "rw_lock": read/write lock() and unlock() rwlock pairs.
+
+ o "rw_lock_irq": read/write lock_irq() and unlock_irq()
+ rwlock pairs.
+
+ o "mutex_lock": mutex_lock() and mutex_unlock() pairs.
+
+ o "rwsem_lock": read/write down() and up() semaphore pairs.
+
+torture_runnable Start locktorture at boot time in the case where the
+ module is built into the kernel, otherwise wait for
+ torture_runnable to be set via sysfs before starting.
+ By default it will begin once the module is loaded.
+
+
+ ** Torture-framework (RCU + locking) **
+
+shutdown_secs The number of seconds to run the test before terminating
+ the test and powering off the system. The default is
+ zero, which disables test termination and system shutdown.
+ This capability is useful for automated testing.
+
+onoff_interval The number of seconds between each attempt to execute a
+ randomly selected CPU-hotplug operation. Defaults
+ to zero, which disables CPU hotplugging. In
+ CONFIG_HOTPLUG_CPU=n kernels, locktorture will silently
+ refuse to do any CPU-hotplug operations regardless of
+ what value is specified for onoff_interval.
+
+onoff_holdoff The number of seconds to wait until starting CPU-hotplug
+ operations. This would normally only be used when
+ locktorture was built into the kernel and started
+ automatically at boot time, in which case it is useful
+ in order to avoid confusing boot-time code with CPUs
+ coming and going. This parameter is only useful if
+ CONFIG_HOTPLUG_CPU is enabled.
+
+stat_interval Number of seconds between statistics-related printk()s.
+ By default, locktorture will report stats every 60 seconds.
+ Setting the interval to zero causes the statistics to
+ be printed -only- when the module is unloaded, and this
+ is the default.
+
+stutter The length of time to run the test before pausing for this
+ same period of time. Defaults to "stutter=5", so as
+ to run and pause for (roughly) five-second intervals.
+ Specifying "stutter=0" causes the test to run continuously
+ without pausing, which is the old default behavior.
+
+shuffle_interval The number of seconds to keep the test threads affinitied
+ to a particular subset of the CPUs, defaults to 3 seconds.
+ Used in conjunction with test_no_idle_hz.
+
+verbose Enable verbose debugging printing, via printk(). Enabled
+ by default. This extra information is mostly related to
+ high-level errors and reports from the main 'torture'
+ framework.
+
+
+STATISTICS
+
+Statistics are printed in the following format:
+
+spin_lock-torture: Writes: Total: 93746064 Max/Min: 0/0 Fail: 0
+ (A) (B) (C) (D) (E)
+
+(A): Lock type that is being tortured -- torture_type parameter.
+
+(B): Number of writer lock acquisitions. If dealing with a read/write primitive
+ a second "Reads" statistics line is printed.
+
+(C): Number of times the lock was acquired.
+
+(D): Min and max number of times threads failed to acquire the lock.
+
+(E): true/false values if there were errors acquiring the lock. This should
+ -only- be positive if there is a bug in the locking primitive's
+ implementation. Otherwise a lock should never fail (i.e., spin_lock()).
+ Of course, the same applies for (C), above. A dummy example of this is
+ the "lock_busted" type.
+
+USAGE
+
+The following script may be used to torture locks:
+
+ #!/bin/sh
+
+ modprobe locktorture
+ sleep 3600
+ rmmod locktorture
+ dmesg | grep torture:
+
+The output can be manually inspected for the error flag of "!!!".
+One could of course create a more elaborate script that automatically
+checked for such errors. The "rmmod" command forces a "SUCCESS",
+"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first
+two are self-explanatory, while the last indicates that while there
+were no locking failures, CPU-hotplug problems were detected.
+
+Also see: Documentation/RCU/torture.txt
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt
new file mode 100644
index 000000000..60c482df1
--- /dev/null
+++ b/Documentation/locking/mutex-design.txt
@@ -0,0 +1,157 @@
+Generic Mutex Subsystem
+
+started by Ingo Molnar <mingo@redhat.com>
+updated by Davidlohr Bueso <davidlohr@hp.com>
+
+What are mutexes?
+-----------------
+
+In the Linux kernel, mutexes refer to a particular locking primitive
+that enforces serialization on shared memory systems, and not only to
+the generic term referring to 'mutual exclusion' found in academia
+or similar theoretical text books. Mutexes are sleeping locks which
+behave similarly to binary semaphores, and were introduced in 2006[1]
+as an alternative to these. This new data structure provided a number
+of advantages, including simpler interfaces, and at that time smaller
+code (see Disadvantages).
+
+[1] http://lwn.net/Articles/164802/
+
+Implementation
+--------------
+
+Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
+and implemented in kernel/locking/mutex.c. These locks use a three
+state atomic counter (->count) to represent the different possible
+transitions that can occur during the lifetime of a lock:
+
+ 1: unlocked
+ 0: locked, no waiters
+ negative: locked, with potential waiters
+
+In its most basic form it also includes a wait-queue and a spinlock
+that serializes access to it. CONFIG_SMP systems can also include
+a pointer to the lock task owner (->owner) as well as a spinner MCS
+lock (->osq), both described below in (ii).
+
+When acquiring a mutex, there are three possible paths that can be
+taken, depending on the state of the lock:
+
+(i) fastpath: tries to atomically acquire the lock by decrementing the
+ counter. If it was already taken by another task it goes to the next
+ possible path. This logic is architecture specific. On x86-64, the
+ locking fastpath is 2 instructions:
+
+ 0000000000000e10 <mutex_lock>:
+ e21: f0 ff 0b lock decl (%rbx)
+ e24: 79 08 jns e2e <mutex_lock+0x1e>
+
+ the unlocking fastpath is equally tight:
+
+ 0000000000000bc0 <mutex_unlock>:
+ bc8: f0 ff 07 lock incl (%rdi)
+ bcb: 7f 0a jg bd7 <mutex_unlock+0x17>
+
+
+(ii) midpath: aka optimistic spinning, tries to spin for acquisition
+ while the lock owner is running and there are no other tasks ready
+ to run that have higher priority (need_resched). The rationale is
+ that if the lock owner is running, it is likely to release the lock
+ soon. The mutex spinners are queued up using MCS lock so that only
+ one spinner can compete for the mutex.
+
+ The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
+ with the desirable properties of being fair and with each cpu trying
+ to acquire the lock spinning on a local variable. It avoids expensive
+ cacheline bouncing that common test-and-set spinlock implementations
+ incur. An MCS-like lock is specially tailored for optimistic spinning
+ for sleeping lock implementation. An important feature of the customized
+ MCS lock is that it has the extra property that spinners are able to exit
+ the MCS spinlock queue when they need to reschedule. This further helps
+ avoid situations where MCS spinners that need to reschedule would continue
+ waiting to spin on mutex owner, only to go directly to slowpath upon
+ obtaining the MCS lock.
+
+
+(iii) slowpath: last resort, if the lock is still unable to be acquired,
+ the task is added to the wait-queue and sleeps until woken up by the
+ unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
+
+While formally kernel mutexes are sleepable locks, it is path (ii) that
+makes them more practically a hybrid type. By simply not interrupting a
+task and busy-waiting for a few cycles instead of immediately sleeping,
+the performance of this lock has been seen to significantly improve a
+number of workloads. Note that this technique is also used for rw-semaphores.
+
+Semantics
+---------
+
+The mutex subsystem checks and enforces the following rules:
+
+ - Only one task can hold the mutex at a time.
+ - Only the owner can unlock the mutex.
+ - Multiple unlocks are not permitted.
+ - Recursive locking/unlocking is not permitted.
+ - A mutex must only be initialized via the API (see below).
+ - A task may not exit with a mutex held.
+ - Memory areas where held locks reside must not be freed.
+ - Held mutexes must not be reinitialized.
+ - Mutexes may not be used in hardware or software interrupt
+ contexts such as tasklets and timers.
+
+These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
+In addition, the mutex debugging code also implements a number of other
+features that make lock debugging easier and faster:
+
+ - Uses symbolic names of mutexes, whenever they are printed
+ in debug output.
+ - Point-of-acquire tracking, symbolic lookup of function names,
+ list of all locks held in the system, printout of them.
+ - Owner tracking.
+ - Detects self-recursing locks and prints out all relevant info.
+ - Detects multi-task circular deadlocks and prints out all affected
+ locks and tasks (and only those tasks).
+
+
+Interfaces
+----------
+Statically define the mutex:
+ DEFINE_MUTEX(name);
+
+Dynamically initialize the mutex:
+ mutex_init(mutex);
+
+Acquire the mutex, uninterruptible:
+ void mutex_lock(struct mutex *lock);
+ void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+ int mutex_trylock(struct mutex *lock);
+
+Acquire the mutex, interruptible:
+ int mutex_lock_interruptible_nested(struct mutex *lock,
+ unsigned int subclass);
+ int mutex_lock_interruptible(struct mutex *lock);
+
+Acquire the mutex, interruptible, if dec to 0:
+ int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+Unlock the mutex:
+ void mutex_unlock(struct mutex *lock);
+
+Test if the mutex is taken:
+ int mutex_is_locked(struct mutex *lock);
+
+Disadvantages
+-------------
+
+Unlike its original design and purpose, 'struct mutex' is larger than
+most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
+as large as 'struct semaphore' (24 bytes) and tied, along with rwsems,
+for the largest lock in the kernel. Larger structure sizes mean more
+CPU cache and memory footprint.
+
+When to use mutexes
+-------------------
+
+Unless the strict semantics of mutexes are unsuitable and/or the critical
+region prevents the lock from being shared, always prefer them to any other
+locking primitive.
diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt
new file mode 100644
index 000000000..8666070d3
--- /dev/null
+++ b/Documentation/locking/rt-mutex-design.txt
@@ -0,0 +1,781 @@
+#
+# Copyright (c) 2006 Steven Rostedt
+# Licensed under the GNU Free Documentation License, Version 1.2
+#
+
+RT-mutex implementation design
+------------------------------
+
+This document tries to describe the design of the rtmutex.c implementation.
+It doesn't describe the reasons why rtmutex.c exists. For that please see
+Documentation/rt-mutex.txt. Although this document does explain problems
+that happen without this code, but that is in the concept to understand
+what the code actually is doing.
+
+The goal of this document is to help others understand the priority
+inheritance (PI) algorithm that is used, as well as reasons for the
+decisions that were made to implement PI in the manner that was done.
+
+
+Unbounded Priority Inversion
+----------------------------
+
+Priority inversion is when a lower priority process executes while a higher
+priority process wants to run. This happens for several reasons, and
+most of the time it can't be helped. Anytime a high priority process wants
+to use a resource that a lower priority process has (a mutex for example),
+the high priority process must wait until the lower priority process is done
+with the resource. This is a priority inversion. What we want to prevent
+is something called unbounded priority inversion. That is when the high
+priority process is prevented from running by a lower priority process for
+an undetermined amount of time.
+
+The classic example of unbounded priority inversion is where you have three
+processes, let's call them processes A, B, and C, where A is the highest
+priority process, C is the lowest, and B is in between. A tries to grab a lock
+that C owns and must wait and lets C run to release the lock. But in the
+meantime, B executes, and since B is of a higher priority than C, it preempts C,
+but by doing so, it is in fact preempting A which is a higher priority process.
+Now there's no way of knowing how long A will be sleeping waiting for C
+to release the lock, because for all we know, B is a CPU hog and will
+never give C a chance to release the lock. This is called unbounded priority
+inversion.
+
+Here's a little ASCII art to show the problem.
+
+ grab lock L1 (owned by C)
+ |
+A ---+
+ C preempted by B
+ |
+C +----+
+
+B +-------->
+ B now keeps A from running.
+
+
+Priority Inheritance (PI)
+-------------------------
+
+There are several ways to solve this issue, but other ways are out of scope
+for this document. Here we only discuss PI.
+
+PI is where a process inherits the priority of another process if the other
+process blocks on a lock owned by the current process. To make this easier
+to understand, let's use the previous example, with processes A, B, and C again.
+
+This time, when A blocks on the lock owned by C, C would inherit the priority
+of A. So now if B becomes runnable, it would not preempt C, since C now has
+the high priority of A. As soon as C releases the lock, it loses its
+inherited priority, and A then can continue with the resource that C had.
+
+Terminology
+-----------
+
+Here I explain some terminology that is used in this document to help describe
+the design that is used to implement PI.
+
+PI chain - The PI chain is an ordered series of locks and processes that cause
+ processes to inherit priorities from a previous process that is
+ blocked on one of its locks. This is described in more detail
+ later in this document.
+
+mutex - In this document, to differentiate from locks that implement
+ PI and spin locks that are used in the PI code, from now on
+ the PI locks will be called a mutex.
+
+lock - In this document from now on, I will use the term lock when
+ referring to spin locks that are used to protect parts of the PI
+ algorithm. These locks disable preemption for UP (when
+ CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
+ entering critical sections simultaneously.
+
+spin lock - Same as lock above.
+
+waiter - A waiter is a struct that is stored on the stack of a blocked
+ process. Since the scope of the waiter is within the code for
+ a process being blocked on the mutex, it is fine to allocate
+ the waiter on the process's stack (local variable). This
+ structure holds a pointer to the task, as well as the mutex that
+ the task is blocked on. It also has the plist node structures to
+ place the task in the waiter_list of a mutex as well as the
+ pi_list of a mutex owner task (described below).
+
+ waiter is sometimes used in reference to the task that is waiting
+ on a mutex. This is the same as waiter->task.
+
+waiters - A list of processes that are blocked on a mutex.
+
+top waiter - The highest priority process waiting on a specific mutex.
+
+top pi waiter - The highest priority process waiting on one of the mutexes
+ that a specific process owns.
+
+Note: task and process are used interchangeably in this document, mostly to
+ differentiate between two processes that are being described together.
+
+
+PI chain
+--------
+
+The PI chain is a list of processes and mutexes that may cause priority
+inheritance to take place. Multiple chains may converge, but a chain
+would never diverge, since a process can't be blocked on more than one
+mutex at a time.
+
+Example:
+
+ Process: A, B, C, D, E
+ Mutexes: L1, L2, L3, L4
+
+ A owns: L1
+ B blocked on L1
+ B owns L2
+ C blocked on L2
+ C owns L3
+ D blocked on L3
+ D owns L4
+ E blocked on L4
+
+The chain would be:
+
+ E->L4->D->L3->C->L2->B->L1->A
+
+To show where two chains merge, we could add another process F and
+another mutex L5 where B owns L5 and F is blocked on mutex L5.
+
+The chain for F would be:
+
+ F->L5->B->L1->A
+
+Since a process may own more than one mutex, but never be blocked on more than
+one, the chains merge.
+
+Here we show both chains:
+
+ E->L4->D->L3->C->L2-+
+ |
+ +->B->L1->A
+ |
+ F->L5-+
+
+For PI to work, the processes at the right end of these chains (or we may
+also call it the Top of the chain) must be equal to or higher in priority
+than the processes to the left or below in the chain.
+
+Also since a mutex may have more than one process blocked on it, we can
+have multiple chains merge at mutexes. If we add another process G that is
+blocked on mutex L2:
+
+ G->L2->B->L1->A
+
+And once again, to show how this can grow I will show the merging chains
+again.
+
+ E->L4->D->L3->C-+
+ +->L2-+
+ | |
+ G-+ +->B->L1->A
+ |
+ F->L5-+
+
+
+Plist
+-----
+
+Before I go further and talk about how the PI chain is stored through lists
+on both mutexes and processes, I'll explain the plist. This is similar to
+the struct list_head functionality that is already in the kernel.
+The implementation of plist is out of scope for this document, but it is
+very important to understand what it does.
+
+There are a few differences between plist and list, the most important one
+being that plist is a priority sorted linked list. This means that the
+priorities of the plist are sorted, such that it takes O(1) to retrieve the
+highest priority item in the list. Obviously this is useful to store processes
+based on their priorities.
+
+Another difference, which is important for implementation, is that, unlike
+list, the head of the list is a different element than the nodes of a list.
+So the head of the list is declared as struct plist_head and nodes that will
+be added to the list are declared as struct plist_node.
+
+
+Mutex Waiter List
+-----------------
+
+Every mutex keeps track of all the waiters that are blocked on itself. The mutex
+has a plist to store these waiters by priority. This list is protected by
+a spin lock that is located in the struct of the mutex. This lock is called
+wait_lock. Since the modification of the waiter list is never done in
+interrupt context, the wait_lock can be taken without disabling interrupts.
+
+
+Task PI List
+------------
+
+To keep track of the PI chains, each process has its own PI list. This is
+a list of all top waiters of the mutexes that are owned by the process.
+Note that this list only holds the top waiters and not all waiters that are
+blocked on mutexes owned by the process.
+
+The top of the task's PI list is always the highest priority task that
+is waiting on a mutex that is owned by the task. So if the task has
+inherited a priority, it will always be the priority of the task that is
+at the top of this list.
+
+This list is stored in the task structure of a process as a plist called
+pi_list. This list is protected by a spin lock also in the task structure,
+called pi_lock. This lock may also be taken in interrupt context, so when
+locking the pi_lock, interrupts must be disabled.
+
+
+Depth of the PI Chain
+---------------------
+
+The maximum depth of the PI chain is not dynamic, and could actually be
+defined. But is very complex to figure it out, since it depends on all
+the nesting of mutexes. Let's look at the example where we have 3 mutexes,
+L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
+The following shows a locking order of L1->L2->L3, but may not actually
+be directly nested that way.
+
+void func1(void)
+{
+ mutex_lock(L1);
+
+ /* do anything */
+
+ mutex_unlock(L1);
+}
+
+void func2(void)
+{
+ mutex_lock(L1);
+ mutex_lock(L2);
+
+ /* do something */
+
+ mutex_unlock(L2);
+ mutex_unlock(L1);
+}
+
+void func3(void)
+{
+ mutex_lock(L2);
+ mutex_lock(L3);
+
+ /* do something else */
+
+ mutex_unlock(L3);
+ mutex_unlock(L2);
+}
+
+void func4(void)
+{
+ mutex_lock(L3);
+
+ /* do something again */
+
+ mutex_unlock(L3);
+}
+
+Now we add 4 processes that run each of these functions separately.
+Processes A, B, C, and D which run functions func1, func2, func3 and func4
+respectively, and such that D runs first and A last. With D being preempted
+in func4 in the "do something again" area, we have a locking that follows:
+
+D owns L3
+ C blocked on L3
+ C owns L2
+ B blocked on L2
+ B owns L1
+ A blocked on L1
+
+And thus we have the chain A->L1->B->L2->C->L3->D.
+
+This gives us a PI depth of 4 (four processes), but looking at any of the
+functions individually, it seems as though they only have at most a locking
+depth of two. So, although the locking depth is defined at compile time,
+it still is very difficult to find the possibilities of that depth.
+
+Now since mutexes can be defined by user-land applications, we don't want a DOS
+type of application that nests large amounts of mutexes to create a large
+PI chain, and have the code holding spin locks while looking at a large
+amount of data. So to prevent this, the implementation not only implements
+a maximum lock depth, but also only holds at most two different locks at a
+time, as it walks the PI chain. More about this below.
+
+
+Mutex owner and flags
+---------------------
+
+The mutex structure contains a pointer to the owner of the mutex. If the
+mutex is not owned, this owner is set to NULL. Since all architectures
+have the task structure on at least a four byte alignment (and if this is
+not true, the rtmutex.c code will be broken!), this allows for the two
+least significant bits to be used as flags. This part is also described
+in Documentation/rt-mutex.txt, but will also be briefly described here.
+
+Bit 0 is used as the "Pending Owner" flag. This is described later.
+Bit 1 is used as the "Has Waiters" flags. This is also described later
+ in more detail, but is set whenever there are waiters on a mutex.
+
+
+cmpxchg Tricks
+--------------
+
+Some architectures implement an atomic cmpxchg (Compare and Exchange). This
+is used (when applicable) to keep the fast path of grabbing and releasing
+mutexes short.
+
+cmpxchg is basically the following function performed atomically:
+
+unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
+{
+ unsigned long T = *A;
+ if (*A == *B) {
+ *A = *C;
+ }
+ return T;
+}
+#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
+
+This is really nice to have, since it allows you to only update a variable
+if the variable is what you expect it to be. You know if it succeeded if
+the return value (the old value of A) is equal to B.
+
+The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
+the architecture does not support CMPXCHG, then this macro is simply set
+to fail every time. But if CMPXCHG is supported, then this will
+help out extremely to keep the fast path short.
+
+The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
+the system for architectures that support it. This will also be explained
+later in this document.
+
+
+Priority adjustments
+--------------------
+
+The implementation of the PI code in rtmutex.c has several places that a
+process must adjust its priority. With the help of the pi_list of a
+process this is rather easy to know what needs to be adjusted.
+
+The functions implementing the task adjustments are rt_mutex_adjust_prio,
+__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock
+to already be taken), rt_mutex_getprio, and rt_mutex_setprio.
+
+rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio.
+
+rt_mutex_getprio returns the priority that the task should have. Either the
+task's own normal priority, or if a process of a higher priority is waiting on
+a mutex owned by the task, then that higher priority should be returned.
+Since the pi_list of a task holds an order by priority list of all the top
+waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs
+to compare the top pi waiter to its own normal priority, and return the higher
+priority back.
+
+(Note: if looking at the code, you will notice that the lower number of
+ prio is returned. This is because the prio field in the task structure
+ is an inverse order of the actual priority. So a "prio" of 5 is
+ of higher priority than a "prio" of 10.)
+
+__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
+result does not equal the task's current priority, then rt_mutex_setprio
+is called to adjust the priority of the task to the new priority.
+Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
+actual change in priority.
+
+It is interesting to note that __rt_mutex_adjust_prio can either increase
+or decrease the priority of the task. In the case that a higher priority
+process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio
+would increase/boost the task's priority. But if a higher priority task
+were for some reason to leave the mutex (timeout or signal), this same function
+would decrease/unboost the priority of the task. That is because the pi_list
+always contains the highest priority task that is waiting on a mutex owned
+by the task, so we only need to compare the priority of that top pi waiter
+to the normal priority of the given task.
+
+
+High level overview of the PI chain walk
+----------------------------------------
+
+The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
+
+The implementation has gone through several iterations, and has ended up
+with what we believe is the best. It walks the PI chain by only grabbing
+at most two locks at a time, and is very efficient.
+
+The rt_mutex_adjust_prio_chain can be used either to boost or lower process
+priorities.
+
+rt_mutex_adjust_prio_chain is called with a task to be checked for PI
+(de)boosting (the owner of a mutex that a process is blocking on), a flag to
+check for deadlocking, the mutex that the task owns, and a pointer to a waiter
+that is the process's waiter struct that is blocked on the mutex (although this
+parameter may be NULL for deboosting).
+
+For this explanation, I will not mention deadlock detection. This explanation
+will try to stay at a high level.
+
+When this function is called, there are no locks held. That also means
+that the state of the owner and lock can change when entered into this function.
+
+Before this function is called, the task has already had rt_mutex_adjust_prio
+performed on it. This means that the task is set to the priority that it
+should be at, but the plist nodes of the task's waiter have not been updated
+with the new priorities, and that this task may not be in the proper locations
+in the pi_lists and wait_lists that the task is blocked on. This function
+solves all that.
+
+A loop is entered, where task is the owner to be checked for PI changes that
+was passed by parameter (for the first iteration). The pi_lock of this task is
+taken to prevent any more changes to the pi_list of the task. This also
+prevents new tasks from completing the blocking on a mutex that is owned by this
+task.
+
+If the task is not blocked on a mutex then the loop is exited. We are at
+the top of the PI chain.
+
+A check is now done to see if the original waiter (the process that is blocked
+on the current mutex) is the top pi waiter of the task. That is, is this
+waiter on the top of the task's pi_list. If it is not, it either means that
+there is another process higher in priority that is blocked on one of the
+mutexes that the task owns, or that the waiter has just woken up via a signal
+or timeout and has left the PI chain. In either case, the loop is exited, since
+we don't need to do any more changes to the priority of the current task, or any
+task that owns a mutex that this current task is waiting on. A priority chain
+walk is only needed when a new top pi waiter is made to a task.
+
+The next check sees if the task's waiter plist node has the priority equal to
+the priority the task is set at. If they are equal, then we are done with
+the loop. Remember that the function started with the priority of the
+task adjusted, but the plist nodes that hold the task in other processes
+pi_lists have not been adjusted.
+
+Next, we look at the mutex that the task is blocked on. The mutex's wait_lock
+is taken. This is done by a spin_trylock, because the locking order of the
+pi_lock and wait_lock goes in the opposite direction. If we fail to grab the
+lock, the pi_lock is released, and we restart the loop.
+
+Now that we have both the pi_lock of the task as well as the wait_lock of
+the mutex the task is blocked on, we update the task's waiter's plist node
+that is located on the mutex's wait_list.
+
+Now we release the pi_lock of the task.
+
+Next the owner of the mutex has its pi_lock taken, so we can update the
+task's entry in the owner's pi_list. If the task is the highest priority
+process on the mutex's wait_list, then we remove the previous top waiter
+from the owner's pi_list, and replace it with the task.
+
+Note: It is possible that the task was the current top waiter on the mutex,
+ in which case the task is not yet on the pi_list of the waiter. This
+ is OK, since plist_del does nothing if the plist node is not on any
+ list.
+
+If the task was not the top waiter of the mutex, but it was before we
+did the priority updates, that means we are deboosting/lowering the
+task. In this case, the task is removed from the pi_list of the owner,
+and the new top waiter is added.
+
+Lastly, we unlock both the pi_lock of the task, as well as the mutex's
+wait_lock, and continue the loop again. On the next iteration of the
+loop, the previous owner of the mutex will be the task that will be
+processed.
+
+Note: One might think that the owner of this mutex might have changed
+ since we just grab the mutex's wait_lock. And one could be right.
+ The important thing to remember is that the owner could not have
+ become the task that is being processed in the PI chain, since
+ we have taken that task's pi_lock at the beginning of the loop.
+ So as long as there is an owner of this mutex that is not the same
+ process as the tasked being worked on, we are OK.
+
+ Looking closely at the code, one might be confused. The check for the
+ end of the PI chain is when the task isn't blocked on anything or the
+ task's waiter structure "task" element is NULL. This check is
+ protected only by the task's pi_lock. But the code to unlock the mutex
+ sets the task's waiter structure "task" element to NULL with only
+ the protection of the mutex's wait_lock, which was not taken yet.
+ Isn't this a race condition if the task becomes the new owner?
+
+ The answer is No! The trick is the spin_trylock of the mutex's
+ wait_lock. If we fail that lock, we release the pi_lock of the
+ task and continue the loop, doing the end of PI chain check again.
+
+ In the code to release the lock, the wait_lock of the mutex is held
+ the entire time, and it is not let go when we grab the pi_lock of the
+ new owner of the mutex. So if the switch of a new owner were to happen
+ after the check for end of the PI chain and the grabbing of the
+ wait_lock, the unlocking code would spin on the new owner's pi_lock
+ but never give up the wait_lock. So the PI chain loop is guaranteed to
+ fail the spin_trylock on the wait_lock, release the pi_lock, and
+ try again.
+
+ If you don't quite understand the above, that's OK. You don't have to,
+ unless you really want to make a proof out of it ;)
+
+
+Pending Owners and Lock stealing
+--------------------------------
+
+One of the flags in the owner field of the mutex structure is "Pending Owner".
+What this means is that an owner was chosen by the process releasing the
+mutex, but that owner has yet to wake up and actually take the mutex.
+
+Why is this important? Why can't we just give the mutex to another process
+and be done with it?
+
+The PI code is to help with real-time processes, and to let the highest
+priority process run as long as possible with little latencies and delays.
+If a high priority process owns a mutex that a lower priority process is
+blocked on, when the mutex is released it would be given to the lower priority
+process. What if the higher priority process wants to take that mutex again.
+The high priority process would fail to take that mutex that it just gave up
+and it would need to boost the lower priority process to run with full
+latency of that critical section (since the low priority process just entered
+it).
+
+There's no reason a high priority process that gives up a mutex should be
+penalized if it tries to take that mutex again. If the new owner of the
+mutex has not woken up yet, there's no reason that the higher priority process
+could not take that mutex away.
+
+To solve this, we introduced Pending Ownership and Lock Stealing. When a
+new process is given a mutex that it was blocked on, it is only given
+pending ownership. This means that it's the new owner, unless a higher
+priority process comes in and tries to grab that mutex. If a higher priority
+process does come along and wants that mutex, we let the higher priority
+process "steal" the mutex from the pending owner (only if it is still pending)
+and continue with the mutex.
+
+
+Taking of a mutex (The walk through)
+------------------------------------
+
+OK, now let's take a look at the detailed walk through of what happens when
+taking a mutex.
+
+The first thing that is tried is the fast taking of the mutex. This is
+done when we have CMPXCHG enabled (otherwise the fast taking automatically
+fails). Only when the owner field of the mutex is NULL can the lock be
+taken with the CMPXCHG and nothing else needs to be done.
+
+If there is contention on the lock, whether it is owned or pending owner
+we go about the slow path (rt_mutex_slowlock).
+
+The slow path function is where the task's waiter structure is created on
+the stack. This is because the waiter structure is only needed for the
+scope of this function. The waiter structure holds the nodes to store
+the task on the wait_list of the mutex, and if need be, the pi_list of
+the owner.
+
+The wait_lock of the mutex is taken since the slow path of unlocking the
+mutex also takes this lock.
+
+We then call try_to_take_rt_mutex. This is where the architecture that
+does not implement CMPXCHG would always grab the lock (if there's no
+contention).
+
+try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
+slow path. The first thing that is done here is an atomic setting of
+the "Has Waiters" flag of the mutex's owner field. Yes, this could really
+be false, because if the mutex has no owner, there are no waiters and
+the current task also won't have any waiters. But we don't have the lock
+yet, so we assume we are going to be a waiter. The reason for this is to
+play nice for those architectures that do have CMPXCHG. By setting this flag
+now, the owner of the mutex can't release the mutex without going into the
+slow unlock path, and it would then need to grab the wait_lock, which this
+code currently holds. So setting the "Has Waiters" flag forces the owner
+to synchronize with this code.
+
+Now that we know that we can't have any races with the owner releasing the
+mutex, we check to see if we can take the ownership. This is done if the
+mutex doesn't have a owner, or if we can steal the mutex from a pending
+owner. Let's look at the situations we have here.
+
+ 1) Has owner that is pending
+ ----------------------------
+
+ The mutex has a owner, but it hasn't woken up and the mutex flag
+ "Pending Owner" is set. The first check is to see if the owner isn't the
+ current task. This is because this function is also used for the pending
+ owner to grab the mutex. When a pending owner wakes up, it checks to see
+ if it can take the mutex, and this is done if the owner is already set to
+ itself. If so, we succeed and leave the function, clearing the "Pending
+ Owner" bit.
+
+ If the pending owner is not current, we check to see if the current priority is
+ higher than the pending owner. If not, we fail the function and return.
+
+ There's also something special about a pending owner. That is a pending owner
+ is never blocked on a mutex. So there is no PI chain to worry about. It also
+ means that if the mutex doesn't have any waiters, there's no accounting needed
+ to update the pending owner's pi_list, since we only worry about processes
+ blocked on the current mutex.
+
+ If there are waiters on this mutex, and we just stole the ownership, we need
+ to take the top waiter, remove it from the pi_list of the pending owner, and
+ add it to the current pi_list. Note that at this moment, the pending owner
+ is no longer on the list of waiters. This is fine, since the pending owner
+ would add itself back when it realizes that it had the ownership stolen
+ from itself. When the pending owner tries to grab the mutex, it will fail
+ in try_to_take_rt_mutex if the owner field points to another process.
+
+ 2) No owner
+ -----------
+
+ If there is no owner (or we successfully stole the lock), we set the owner
+ of the mutex to current, and set the flag of "Has Waiters" if the current
+ mutex actually has waiters, or we clear the flag if it doesn't. See, it was
+ OK that we set that flag early, since now it is cleared.
+
+ 3) Failed to grab ownership
+ ---------------------------
+
+ The most interesting case is when we fail to take ownership. This means that
+ there exists an owner, or there's a pending owner with equal or higher
+ priority than the current task.
+
+We'll continue on the failed case.
+
+If the mutex has a timeout, we set up a timer to go off to break us out
+of this mutex if we failed to get it after a specified amount of time.
+
+Now we enter a loop that will continue to try to take ownership of the mutex, or
+fail from a timeout or signal.
+
+Once again we try to take the mutex. This will usually fail the first time
+in the loop, since it had just failed to get the mutex. But the second time
+in the loop, this would likely succeed, since the task would likely be
+the pending owner.
+
+If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done
+here.
+
+The waiter structure has a "task" field that points to the task that is blocked
+on the mutex. This field can be NULL the first time it goes through the loop
+or if the task is a pending owner and had its mutex stolen. If the "task"
+field is NULL then we need to set up the accounting for it.
+
+Task blocks on mutex
+--------------------
+
+The accounting of a mutex and process is done with the waiter structure of
+the process. The "task" field is set to the process, and the "lock" field
+to the mutex. The plist nodes are initialized to the processes current
+priority.
+
+Since the wait_lock was taken at the entry of the slow lock, we can safely
+add the waiter to the wait_list. If the current process is the highest
+priority process currently waiting on this mutex, then we remove the
+previous top waiter process (if it exists) from the pi_list of the owner,
+and add the current process to that list. Since the pi_list of the owner
+has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
+should adjust its priority accordingly.
+
+If the owner is also blocked on a lock, and had its pi_list changed
+(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
+and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
+
+Now all locks are released, and if the current process is still blocked on a
+mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
+
+Waking up in the loop
+---------------------
+
+The schedule can then wake up for a few reasons.
+ 1) we were given pending ownership of the mutex.
+ 2) we received a signal and was TASK_INTERRUPTIBLE
+ 3) we had a timeout and was TASK_INTERRUPTIBLE
+
+In any of these cases, we continue the loop and once again try to grab the
+ownership of the mutex. If we succeed, we exit the loop, otherwise we continue
+and on signal and timeout, will exit the loop, or if we had the mutex stolen
+we just simply add ourselves back on the lists and go back to sleep.
+
+Note: For various reasons, because of timeout and signals, the steal mutex
+ algorithm needs to be careful. This is because the current process is
+ still on the wait_list. And because of dynamic changing of priorities,
+ especially on SCHED_OTHER tasks, the current process can be the
+ highest priority task on the wait_list.
+
+Failed to get mutex on Timeout or Signal
+----------------------------------------
+
+If a timeout or signal occurred, the waiter's "task" field would not be
+NULL and the task needs to be taken off the wait_list of the mutex and perhaps
+pi_list of the owner. If this process was a high priority process, then
+the rt_mutex_adjust_prio_chain needs to be executed again on the owner,
+but this time it will be lowering the priorities.
+
+
+Unlocking the Mutex
+-------------------
+
+The unlocking of a mutex also has a fast path for those architectures with
+CMPXCHG. Since the taking of a mutex on contention always sets the
+"Has Waiters" flag of the mutex's owner, we use this to know if we need to
+take the slow path when unlocking the mutex. If the mutex doesn't have any
+waiters, the owner field of the mutex would equal the current process and
+the mutex can be unlocked by just replacing the owner field with NULL.
+
+If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
+the slow unlock path is taken.
+
+The first thing done in the slow unlock path is to take the wait_lock of the
+mutex. This synchronizes the locking and unlocking of the mutex.
+
+A check is made to see if the mutex has waiters or not. On architectures that
+do not have CMPXCHG, this is the location that the owner of the mutex will
+determine if a waiter needs to be awoken or not. On architectures that
+do have CMPXCHG, that check is done in the fast path, but it is still needed
+in the slow path too. If a waiter of a mutex woke up because of a signal
+or timeout between the time the owner failed the fast path CMPXCHG check and
+the grabbing of the wait_lock, the mutex may not have any waiters, thus the
+owner still needs to make this check. If there are no waiters then the mutex
+owner field is set to NULL, the wait_lock is released and nothing more is
+needed.
+
+If there are waiters, then we need to wake one up and give that waiter
+pending ownership.
+
+On the wake up code, the pi_lock of the current owner is taken. The top
+waiter of the lock is found and removed from the wait_list of the mutex
+as well as the pi_list of the current owner. The task field of the new
+pending owner's waiter structure is set to NULL, and the owner field of the
+mutex is set to the new owner with the "Pending Owner" bit set, as well
+as the "Has Waiters" bit if there still are other processes blocked on the
+mutex.
+
+The pi_lock of the previous owner is released, and the new pending owner's
+pi_lock is taken. Remember that this is the trick to prevent the race
+condition in rt_mutex_adjust_prio_chain from adding itself as a waiter
+on the mutex.
+
+We now clear the "pi_blocked_on" field of the new pending owner, and if
+the mutex still has waiters pending, we add the new top waiter to the pi_list
+of the pending owner.
+
+Finally we unlock the pi_lock of the pending owner and wake it up.
+
+
+Contact
+-------
+
+For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
+
+
+Credits
+-------
+
+Author: Steven Rostedt <rostedt@goodmis.org>
+
+Reviewers: Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap
+
+Updates
+-------
+
+This document was originally written for 2.6.17-rc3-mm1
diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt
new file mode 100644
index 000000000..243393d88
--- /dev/null
+++ b/Documentation/locking/rt-mutex.txt
@@ -0,0 +1,79 @@
+RT-mutex subsystem with PI support
+----------------------------------
+
+RT-mutexes with priority inheritance are used to support PI-futexes,
+which enable pthread_mutex_t priority inheritance attributes
+(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+about PI-futexes.]
+
+This technology was developed in the -rt tree and streamlined for
+pthread_mutex support.
+
+Basic principles:
+-----------------
+
+RT-mutexes extend the semantics of simple mutexes by the priority
+inheritance protocol.
+
+A low priority owner of a rt-mutex inherits the priority of a higher
+priority waiter until the rt-mutex is released. If the temporarily
+boosted owner blocks on a rt-mutex itself it propagates the priority
+boosting to the owner of the other rt_mutex it gets blocked on. The
+priority boosting is immediately removed once the rt_mutex has been
+unlocked.
+
+This approach allows us to shorten the block of high-prio tasks on
+mutexes which protect shared resources. Priority inheritance is not a
+magic bullet for poorly designed applications, but it allows
+well-designed applications to use userspace locks in critical parts of
+an high priority thread, without losing determinism.
+
+The enqueueing of the waiters into the rtmutex waiter list is done in
+priority order. For same priorities FIFO order is chosen. For each
+rtmutex, only the top priority waiter is enqueued into the owner's
+priority waiters list. This list too queues in priority order. Whenever
+the top priority waiter of a task changes (for example it timed out or
+got a signal), the priority of the owner task is readjusted. [The
+priority enqueueing is handled by "plists", see include/linux/plist.h
+for more details.]
+
+RT-mutexes are optimized for fastpath operations and have no internal
+locking overhead when locking an uncontended mutex or unlocking a mutex
+without waiters. The optimized fastpath operations require cmpxchg
+support. [If that is not available then the rt-mutex internal spinlock
+is used]
+
+The state of the rt-mutex is tracked via the owner field of the rt-mutex
+structure:
+
+rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
+are used to keep track of the "owner is pending" and "rtmutex has
+waiters" state.
+
+ owner bit1 bit0
+ NULL 0 0 mutex is free (fast acquire possible)
+ NULL 0 1 invalid state
+ NULL 1 0 Transitional state*
+ NULL 1 1 invalid state
+ taskpointer 0 0 mutex is held (fast release possible)
+ taskpointer 0 1 task is pending owner
+ taskpointer 1 0 mutex is held and has waiters
+ taskpointer 1 1 task is pending owner and mutex has waiters
+
+Pending-ownership handling is a performance optimization:
+pending-ownership is assigned to the first (highest priority) waiter of
+the mutex, when the mutex is released. The thread is woken up and once
+it starts executing it can acquire the mutex. Until the mutex is taken
+by it (bit 0 is cleared) a competing higher priority thread can "steal"
+the mutex which puts the woken up thread back on the waiters list.
+
+The pending-ownership optimization is especially important for the
+uninterrupted workflow of high-prio tasks which repeatedly
+takes/releases locks that have lower-prio waiters. Without this
+optimization the higher-prio thread would ping-pong to the lower-prio
+task [because at unlock time we always assign a new owner].
+
+(*) The "mutex has waiters" bit gets set to take the lock. If the lock
+doesn't already have an owner, this bit is quickly cleared if there are
+no waiters. So this is a transitional state to synchronize with looking
+at the owner field of the mutex and the mutex owner releasing the lock.
diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.txt
new file mode 100644
index 000000000..ff35e40bd
--- /dev/null
+++ b/Documentation/locking/spinlocks.txt
@@ -0,0 +1,167 @@
+Lesson 1: Spin locks
+
+The most basic primitive for locking is spinlock.
+
+static DEFINE_SPINLOCK(xxx_lock);
+
+ unsigned long flags;
+
+ spin_lock_irqsave(&xxx_lock, flags);
+ ... critical section here ..
+ spin_unlock_irqrestore(&xxx_lock, flags);
+
+The above is always safe. It will disable interrupts _locally_, but the
+spinlock itself will guarantee the global lock, so it will guarantee that
+there is only one thread-of-control within the region(s) protected by that
+lock. This works well even under UP also, so the code does _not_ need to
+worry about UP vs SMP issues: the spinlocks work correctly under both.
+
+ NOTE! Implications of spin_locks for memory are further described in:
+
+ Documentation/memory-barriers.txt
+ (5) LOCK operations.
+ (6) UNLOCK operations.
+
+The above is usually pretty simple (you usually need and want only one
+spinlock for most things - using more than one spinlock can make things a
+lot more complex and even slower and is usually worth it only for
+sequences that you _know_ need to be split up: avoid it at all cost if you
+aren't sure).
+
+This is really the only really hard part about spinlocks: once you start
+using spinlocks they tend to expand to areas you might not have noticed
+before, because you have to make sure the spinlocks correctly protect the
+shared data structures _everywhere_ they are used. The spinlocks are most
+easily added to places that are completely independent of other code (for
+example, internal driver data structures that nobody else ever touches).
+
+ NOTE! The spin-lock is safe only when you _also_ use the lock itself
+ to do locking across CPU's, which implies that EVERYTHING that
+ touches a shared variable has to agree about the spinlock they want
+ to use.
+
+----
+
+Lesson 2: reader-writer spinlocks.
+
+If your data accesses have a very natural pattern where you usually tend
+to mostly read from the shared variables, the reader-writer locks
+(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
+readers to be in the same critical region at once, but if somebody wants
+to change the variables it has to get an exclusive write lock.
+
+ NOTE! reader-writer locks require more atomic memory operations than
+ simple spinlocks. Unless the reader critical section is long, you
+ are better off just using spinlocks.
+
+The routines look the same as above:
+
+ rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
+
+ unsigned long flags;
+
+ read_lock_irqsave(&xxx_lock, flags);
+ .. critical section that only reads the info ...
+ read_unlock_irqrestore(&xxx_lock, flags);
+
+ write_lock_irqsave(&xxx_lock, flags);
+ .. read and write exclusive access to the info ...
+ write_unlock_irqrestore(&xxx_lock, flags);
+
+The above kind of lock may be useful for complex data structures like
+linked lists, especially searching for entries without changing the list
+itself. The read lock allows many concurrent readers. Anything that
+_changes_ the list will have to get the write lock.
+
+ NOTE! RCU is better for list traversal, but requires careful
+ attention to design detail (see Documentation/RCU/listRCU.txt).
+
+Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
+time need to do any changes (even if you don't do it every time), you have
+to get the write-lock at the very beginning.
+
+ NOTE! We are working hard to remove reader-writer spinlocks in most
+ cases, so please don't add a new one without consensus. (Instead, see
+ Documentation/RCU/rcu.txt for complete information.)
+
+----
+
+Lesson 3: spinlocks revisited.
+
+The single spin-lock primitives above are by no means the only ones. They
+are the most safe ones, and the ones that work under all circumstances,
+but partly _because_ they are safe they are also fairly slow. They are slower
+than they'd need to be, because they do have to disable interrupts
+(which is just a single instruction on a x86, but it's an expensive one -
+and on other architectures it can be worse).
+
+If you have a case where you have to protect a data structure across
+several CPU's and you want to use spinlocks you can potentially use
+cheaper versions of the spinlocks. IFF you know that the spinlocks are
+never used in interrupt handlers, you can use the non-irq versions:
+
+ spin_lock(&lock);
+ ...
+ spin_unlock(&lock);
+
+(and the equivalent read-write versions too, of course). The spinlock will
+guarantee the same kind of exclusive access, and it will be much faster.
+This is useful if you know that the data in question is only ever
+manipulated from a "process context", ie no interrupts involved.
+
+The reasons you mustn't use these versions if you have interrupts that
+play with the spinlock is that you can get deadlocks:
+
+ spin_lock(&lock);
+ ...
+ <- interrupt comes in:
+ spin_lock(&lock);
+
+where an interrupt tries to lock an already locked variable. This is ok if
+the other interrupt happens on another CPU, but it is _not_ ok if the
+interrupt happens on the same CPU that already holds the lock, because the
+lock will obviously never be released (because the interrupt is waiting
+for the lock, and the lock-holder is interrupted by the interrupt and will
+not continue until the interrupt has been processed).
+
+(This is also the reason why the irq-versions of the spinlocks only need
+to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
+on other CPU's, because an interrupt on another CPU doesn't interrupt the
+CPU that holds the lock, so the lock-holder can continue and eventually
+releases the lock).
+
+Note that you can be clever with read-write locks and interrupts. For
+example, if you know that the interrupt only ever gets a read-lock, then
+you can use a non-irq version of read locks everywhere - because they
+don't block on each other (and thus there is no dead-lock wrt interrupts.
+But when you do the write-lock, you have to use the irq-safe version.
+
+For an example of being clever with rw-locks, see the "waitqueue_lock"
+handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
+within an interrupt, they only read the queue in order to know whom to
+wake up. So read-locks are safe (which is good: they are very common
+indeed), while write-locks need to protect themselves against interrupts.
+
+ Linus
+
+----
+
+Reference information:
+
+For dynamic initialization, use spin_lock_init() or rwlock_init() as
+appropriate:
+
+ spinlock_t xxx_lock;
+ rwlock_t xxx_rw_lock;
+
+ static int __init xxx_init(void)
+ {
+ spin_lock_init(&xxx_lock);
+ rwlock_init(&xxx_rw_lock);
+ ...
+ }
+
+ module_init(xxx_init);
+
+For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
+__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
new file mode 100644
index 000000000..8a112dc30
--- /dev/null
+++ b/Documentation/locking/ww-mutex-design.txt
@@ -0,0 +1,344 @@
+Wait/Wound Deadlock-Proof Mutex Design
+======================================
+
+Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
+
+Motivation for WW-Mutexes
+-------------------------
+
+GPU's do operations that commonly involve many buffers. Those buffers
+can be shared across contexts/processes, exist in different memory
+domains (for example VRAM vs system memory), and so on. And with
+PRIME / dmabuf, they can even be shared across devices. So there are
+a handful of situations where the driver needs to wait for buffers to
+become ready. If you think about this in terms of waiting on a buffer
+mutex for it to become available, this presents a problem because
+there is no way to guarantee that buffers appear in a execbuf/batch in
+the same order in all contexts. That is directly under control of
+userspace, and a result of the sequence of GL calls that an application
+makes. Which results in the potential for deadlock. The problem gets
+more complex when you consider that the kernel may need to migrate the
+buffer(s) into VRAM before the GPU operates on the buffer(s), which
+may in turn require evicting some other buffers (and you don't want to
+evict other buffers which are already queued up to the GPU), but for a
+simplified understanding of the problem you can ignore this.
+
+The algorithm that the TTM graphics subsystem came up with for dealing with
+this problem is quite simple. For each group of buffers (execbuf) that need
+to be locked, the caller would be assigned a unique reservation id/ticket,
+from a global counter. In case of deadlock while locking all the buffers
+associated with a execbuf, the one with the lowest reservation ticket (i.e.
+the oldest task) wins, and the one with the higher reservation id (i.e. the
+younger task) unlocks all of the buffers that it has already locked, and then
+tries again.
+
+In the RDBMS literature this deadlock handling approach is called wait/wound:
+The older tasks waits until it can acquire the contended lock. The younger tasks
+needs to back off and drop all the locks it is currently holding, i.e. the
+younger task is wounded.
+
+Concepts
+--------
+
+Compared to normal mutexes two additional concepts/objects show up in the lock
+interface for w/w mutexes:
+
+Acquire context: To ensure eventual forward progress it is important the a task
+trying to acquire locks doesn't grab a new reservation id, but keeps the one it
+acquired when starting the lock acquisition. This ticket is stored in the
+acquire context. Furthermore the acquire context keeps track of debugging state
+to catch w/w mutex interface abuse.
+
+W/w class: In contrast to normal mutexes the lock class needs to be explicit for
+w/w mutexes, since it is required to initialize the acquire context.
+
+Furthermore there are three different class of w/w lock acquire functions:
+
+* Normal lock acquisition with a context, using ww_mutex_lock.
+
+* Slowpath lock acquisition on the contending lock, used by the wounded task
+ after having dropped all already acquired locks. These functions have the
+ _slow postfix.
+
+ From a simple semantics point-of-view the _slow functions are not strictly
+ required, since simply calling the normal ww_mutex_lock functions on the
+ contending lock (after having dropped all other already acquired locks) will
+ work correctly. After all if no other ww mutex has been acquired yet there's
+ no deadlock potential and hence the ww_mutex_lock call will block and not
+ prematurely return -EDEADLK. The advantage of the _slow functions is in
+ interface safety:
+ - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
+ has a void return type. Note that since ww mutex code needs loops/retries
+ anyway the __must_check doesn't result in spurious warnings, even though the
+ very first lock operation can never fail.
+ - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
+ ww mutex have been released (preventing deadlocks) and makes sure that we
+ block on the contending lock (preventing spinning through the -EDEADLK
+ slowpath until the contended lock can be acquired).
+
+* Functions to only acquire a single w/w mutex, which results in the exact same
+ semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
+ context.
+
+ Again this is not strictly required. But often you only want to acquire a
+ single lock in which case it's pointless to set up an acquire context (and so
+ better to avoid grabbing a deadlock avoidance ticket).
+
+Of course, all the usual variants for handling wake-ups due to signals are also
+provided.
+
+Usage
+-----
+
+Three different ways to acquire locks within the same w/w class. Common
+definitions for methods #1 and #2:
+
+static DEFINE_WW_CLASS(ww_class);
+
+struct obj {
+ struct ww_mutex lock;
+ /* obj data */
+};
+
+struct obj_entry {
+ struct list_head head;
+ struct obj *obj;
+};
+
+Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
+This is useful if a list of required objects is already tracked somewhere.
+Furthermore the lock helper can use propagate the -EALREADY return code back to
+the caller as a signal that an object is twice on the list. This is useful if
+the list is constructed from userspace input and the ABI requires userspace to
+not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
+
+int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+ struct obj *res_obj = NULL;
+ struct obj_entry *contended_entry = NULL;
+ struct obj_entry *entry;
+
+ ww_acquire_init(ctx, &ww_class);
+
+retry:
+ list_for_each_entry (entry, list, head) {
+ if (entry->obj == res_obj) {
+ res_obj = NULL;
+ continue;
+ }
+ ret = ww_mutex_lock(&entry->obj->lock, ctx);
+ if (ret < 0) {
+ contended_entry = entry;
+ goto err;
+ }
+ }
+
+ ww_acquire_done(ctx);
+ return 0;
+
+err:
+ list_for_each_entry_continue_reverse (entry, list, head)
+ ww_mutex_unlock(&entry->obj->lock);
+
+ if (res_obj)
+ ww_mutex_unlock(&res_obj->lock);
+
+ if (ret == -EDEADLK) {
+ /* we lost out in a seqno race, lock and retry.. */
+ ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
+ res_obj = contended_entry->obj;
+ goto retry;
+ }
+ ww_acquire_fini(ctx);
+
+ return ret;
+}
+
+Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
+of duplicate entry detection using -EALREADY as method 1 above. But the
+list-reordering allows for a bit more idiomatic code.
+
+int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+ struct obj_entry *entry, *entry2;
+
+ ww_acquire_init(ctx, &ww_class);
+
+ list_for_each_entry (entry, list, head) {
+ ret = ww_mutex_lock(&entry->obj->lock, ctx);
+ if (ret < 0) {
+ entry2 = entry;
+
+ list_for_each_entry_continue_reverse (entry2, list, head)
+ ww_mutex_unlock(&entry2->obj->lock);
+
+ if (ret != -EDEADLK) {
+ ww_acquire_fini(ctx);
+ return ret;
+ }
+
+ /* we lost out in a seqno race, lock and retry.. */
+ ww_mutex_lock_slow(&entry->obj->lock, ctx);
+
+ /*
+ * Move buf to head of the list, this will point
+ * buf->next to the first unlocked entry,
+ * restarting the for loop.
+ */
+ list_del(&entry->head);
+ list_add(&entry->head, list);
+ }
+ }
+
+ ww_acquire_done(ctx);
+ return 0;
+}
+
+Unlocking works the same way for both methods #1 and #2:
+
+void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+ struct obj_entry *entry;
+
+ list_for_each_entry (entry, list, head)
+ ww_mutex_unlock(&entry->obj->lock);
+
+ ww_acquire_fini(ctx);
+}
+
+Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
+e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
+and edges can only be changed when holding the locks of all involved nodes. w/w
+mutexes are a natural fit for such a case for two reasons:
+- They can handle lock-acquisition in any order which allows us to start walking
+ a graph from a starting point and then iteratively discovering new edges and
+ locking down the nodes those edges connect to.
+- Due to the -EALREADY return code signalling that a given objects is already
+ held there's no need for additional book-keeping to break cycles in the graph
+ or keep track off which looks are already held (when using more than one node
+ as a starting point).
+
+Note that this approach differs in two important ways from the above methods:
+- Since the list of objects is dynamically constructed (and might very well be
+ different when retrying due to hitting the -EDEADLK wound condition) there's
+ no need to keep any object on a persistent list when it's not locked. We can
+ therefore move the list_head into the object itself.
+- On the other hand the dynamic object list construction also means that the -EALREADY return
+ code can't be propagated.
+
+Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
+list of starting nodes (passed in from userspace) using one of the above
+methods. And then lock any additional objects affected by the operations using
+method #3 below. The backoff/retry procedure will be a bit more involved, since
+when the dynamic locking step hits -EDEADLK we also need to unlock all the
+objects acquired with the fixed list. But the w/w mutex debug checks will catch
+any interface misuse for these cases.
+
+Also, method 3 can't fail the lock acquisition step since it doesn't return
+-EALREADY. Of course this would be different when using the _interruptible
+variants, but that's outside of the scope of these examples here.
+
+struct obj {
+ struct ww_mutex ww_mutex;
+ struct list_head locked_list;
+};
+
+static DEFINE_WW_CLASS(ww_class);
+
+void __unlock_objs(struct list_head *list)
+{
+ struct obj *entry, *temp;
+
+ list_for_each_entry_safe (entry, temp, list, locked_list) {
+ /* need to do that before unlocking, since only the current lock holder is
+ allowed to use object */
+ list_del(&entry->locked_list);
+ ww_mutex_unlock(entry->ww_mutex)
+ }
+}
+
+void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+ struct obj *obj;
+
+ ww_acquire_init(ctx, &ww_class);
+
+retry:
+ /* re-init loop start state */
+ loop {
+ /* magic code which walks over a graph and decides which objects
+ * to lock */
+
+ ret = ww_mutex_lock(obj->ww_mutex, ctx);
+ if (ret == -EALREADY) {
+ /* we have that one already, get to the next object */
+ continue;
+ }
+ if (ret == -EDEADLK) {
+ __unlock_objs(list);
+
+ ww_mutex_lock_slow(obj, ctx);
+ list_add(&entry->locked_list, list);
+ goto retry;
+ }
+
+ /* locked a new object, add it to the list */
+ list_add_tail(&entry->locked_list, list);
+ }
+
+ ww_acquire_done(ctx);
+ return 0;
+}
+
+void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+ __unlock_objs(list);
+ ww_acquire_fini(ctx);
+}
+
+Method 4: Only lock one single objects. In that case deadlock detection and
+prevention is obviously overkill, since with grabbing just one lock you can't
+produce a deadlock within just one class. To simplify this case the w/w mutex
+api can be used with a NULL context.
+
+Implementation Details
+----------------------
+
+Design:
+ ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
+ normal mutex locks, which are far more common. As such there is only a small
+ increase in code size if wait/wound mutexes are not used.
+
+ In general, not much contention is expected. The locks are typically used to
+ serialize access to resources for devices. The only way to make wakeups
+ smarter would be at the cost of adding a field to struct mutex_waiter. This
+ would add overhead to all cases where normal mutexes are used, and
+ ww_mutexes are generally less performance sensitive.
+
+Lockdep:
+ Special care has been taken to warn for as many cases of api abuse
+ as possible. Some common api abuses will be caught with
+ CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
+
+ Some of the errors which will be warned about:
+ - Forgetting to call ww_acquire_fini or ww_acquire_init.
+ - Attempting to lock more mutexes after ww_acquire_done.
+ - Attempting to lock the wrong mutex after -EDEADLK and
+ unlocking all mutexes.
+ - Attempting to lock the right mutex after -EDEADLK,
+ before unlocking all mutexes.
+
+ - Calling ww_mutex_lock_slow before -EDEADLK was returned.
+
+ - Unlocking mutexes with the wrong unlock function.
+ - Calling one of the ww_acquire_* twice on the same context.
+ - Using a different ww_class for the mutex than for the ww_acquire_ctx.
+ - Normal lockdep errors that can result in deadlocks.
+
+ Some of the lockdep errors that can result in deadlocks:
+ - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
+ having called ww_acquire_fini on the first.
+ - 'normal' deadlocks that can occur.
+
+FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
+implemented.
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
new file mode 100644
index 000000000..ab0baa692
--- /dev/null
+++ b/Documentation/lockup-watchdogs.txt
@@ -0,0 +1,63 @@
+===============================================================
+Softlockup detector and hardlockup detector (aka nmi_watchdog)
+===============================================================
+
+The Linux kernel can act as a watchdog to detect both soft and hard
+lockups.
+
+A 'softlockup' is defined as a bug that causes the kernel to loop in
+kernel mode for more than 20 seconds (see "Implementation" below for
+details), without giving other tasks a chance to run. The current
+stack trace is displayed upon detection and, by default, the system
+will stay locked up. Alternatively, the kernel can be configured to
+panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
+"softlockup_panic" (see "Documentation/kernel-parameters.txt" for
+details), and a compile option, "BOOTPARAM_SOFTLOCKUP_PANIC", are
+provided for this.
+
+A 'hardlockup' is defined as a bug that causes the CPU to loop in
+kernel mode for more than 10 seconds (see "Implementation" below for
+details), without letting other interrupts have a chance to run.
+Similarly to the softlockup case, the current stack trace is displayed
+upon detection and the system will stay locked up unless the default
+behavior is changed, which can be done through a compile time knob,
+"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog"
+(see "Documentation/kernel-parameters.txt" for details).
+
+The panic option can be used in combination with panic_timeout (this
+timeout is set through the confusingly named "kernel.panic" sysctl),
+to cause the system to reboot automatically after a specified amount
+of time.
+
+=== Implementation ===
+
+The soft and hard lockup detectors are built on top of the hrtimer and
+perf subsystems, respectively. A direct consequence of this is that,
+in principle, they should work in any architecture where these
+subsystems are present.
+
+A periodic hrtimer runs to generate interrupts and kick the watchdog
+task. An NMI perf event is generated every "watchdog_thresh"
+(compile-time initialized to 10 and configurable through sysctl of the
+same name) seconds to check for hardlockups. If any CPU in the system
+does not receive any hrtimer interrupt during that time the
+'hardlockup detector' (the handler for the NMI perf event) will
+generate a kernel warning or call panic, depending on the
+configuration.
+
+The watchdog task is a high priority kernel thread that updates a
+timestamp every time it is scheduled. If that timestamp is not updated
+for 2*watchdog_thresh seconds (the softlockup threshold) the
+'softlockup detector' (coded inside the hrtimer callback function)
+will dump useful debug information to the system log, after which it
+will call panic if it was instructed to do so or resume execution of
+other kernel code.
+
+The period of the hrtimer is 2*watchdog_thresh/5, which means it has
+two or three chances to generate an interrupt before the hardlockup
+detector kicks in.
+
+As explained above, a kernel knob is provided that allows
+administrators to configure the period of the hrtimer and the perf
+event. The right value for a particular environment is a trade-off
+between fast response to lockups and detection overhead.
diff --git a/Documentation/logo.gif b/Documentation/logo.gif
new file mode 100644
index 000000000..2eae75fec
--- /dev/null
+++ b/Documentation/logo.gif
Binary files differ
diff --git a/Documentation/logo.txt b/Documentation/logo.txt
new file mode 100644
index 000000000..296f0f7f6
--- /dev/null
+++ b/Documentation/logo.txt
@@ -0,0 +1,13 @@
+This is the full-colour version of the currently unofficial Linux logo
+("currently unofficial" just means that there has been no paperwork and
+that I have not really announced it yet). It was created by Larry Ewing,
+and is freely usable as long as you acknowledge Larry as the original
+artist.
+
+Note that there are black-and-white versions of this available that
+scale down to smaller sizes and are better for letterheads or whatever
+you want to use it for: for the full range of logos take a look at
+Larry's web-page:
+
+ http://www.isc.tamu.edu/~lewing/linux/
+
diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
new file mode 100644
index 000000000..ea45dd390
--- /dev/null
+++ b/Documentation/lzo.txt
@@ -0,0 +1,164 @@
+
+LZO stream format as understood by Linux's LZO decompressor
+===========================================================
+
+Introduction
+
+ This is not a specification. No specification seems to be publicly available
+ for the LZO stream format. This document describes what input format the LZO
+ decompressor as implemented in the Linux kernel understands. The file subject
+ of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on
+ the compressor nor on any other implementations though it seems likely that
+ the format matches the standard one. The purpose of this document is to
+ better understand what the code does in order to propose more efficient fixes
+ for future bug reports.
+
+Description
+
+ The stream is composed of a series of instructions, operands, and data. The
+ instructions consist in a few bits representing an opcode, and bits forming
+ the operands for the instruction, whose size and position depend on the
+ opcode and on the number of literals copied by previous instruction. The
+ operands are used to indicate :
+
+ - a distance when copying data from the dictionary (past output buffer)
+ - a length (number of bytes to copy from dictionary)
+ - the number of literals to copy, which is retained in variable "state"
+ as a piece of information for next instructions.
+
+ Optionally depending on the opcode and operands, extra data may follow. These
+ extra data can be a complement for the operand (eg: a length or a distance
+ encoded on larger values), or a literal to be copied to the output buffer.
+
+ The first byte of the block follows a different encoding from other bytes, it
+ seems to be optimized for literal use only, since there is no dictionary yet
+ prior to that byte.
+
+ Lengths are always encoded on a variable size starting with a small number
+ of bits in the operand. If the number of bits isn't enough to represent the
+ length, up to 255 may be added in increments by consuming more bytes with a
+ rate of at most 255 per extra byte (thus the compression ratio cannot exceed
+ around 255:1). The variable length encoding using #bits is always the same :
+
+ length = byte & ((1 << #bits) - 1)
+ if (!length) {
+ length = ((1 << #bits) - 1)
+ length += 255*(number of zero bytes)
+ length += first-non-zero-byte
+ }
+ length += constant (generally 2 or 3)
+
+ For references to the dictionary, distances are relative to the output
+ pointer. Distances are encoded using very few bits belonging to certain
+ ranges, resulting in multiple copy instructions using different encodings.
+ Certain encodings involve one extra byte, others involve two extra bytes
+ forming a little-endian 16-bit quantity (marked LE16 below).
+
+ After any instruction except the large literal copy, 0, 1, 2 or 3 literals
+ are copied before starting the next instruction. The number of literals that
+ were copied may change the meaning and behaviour of the next instruction. In
+ practice, only one instruction needs to know whether 0, less than 4, or more
+ literals were copied. This is the information stored in the <state> variable
+ in this implementation. This number of immediate literals to be copied is
+ generally encoded in the last two bits of the instruction but may also be
+ taken from the last two bits of an extra operand (eg: distance).
+
+ End of stream is declared when a block copy of distance 0 is seen. Only one
+ instruction may encode this distance (0001HLLL), it takes one LE16 operand
+ for the distance, thus requiring 3 bytes.
+
+ IMPORTANT NOTE : in the code some length checks are missing because certain
+ instructions are called under the assumption that a certain number of bytes
+ follow because it has already been garanteed before parsing the instructions.
+ They just have to "refill" this credit if they consume extra bytes. This is
+ an implementation design choice independant on the algorithm or encoding.
+
+Byte sequences
+
+ First byte encoding :
+
+ 0..17 : follow regular instruction encoding, see below. It is worth
+ noting that codes 16 and 17 will represent a block copy from
+ the dictionary which is empty, and that they will always be
+ invalid at this place.
+
+ 18..21 : copy 0..3 literals
+ state = (byte - 17) = 0..3 [ copy <state> literals ]
+ skip byte
+
+ 22..255 : copy literal string
+ length = (byte - 17) = 4..238
+ state = 4 [ don't copy extra literals ]
+ skip byte
+
+ Instruction encoding :
+
+ 0 0 0 0 X X X X (0..15)
+ Depends on the number of literals copied by the last instruction.
+ If last instruction did not copy any literal (state == 0), this
+ encoding will be a copy of 4 or more literal, and must be interpreted
+ like this :
+
+ 0 0 0 0 L L L L (0..15) : copy long literal string
+ length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
+ state = 4 (no extra literals are copied)
+
+ If last instruction used to copy between 1 to 3 literals (encoded in
+ the instruction's opcode or distance), the instruction is a copy of a
+ 2-byte block from the dictionary within a 1kB distance. It is worth
+ noting that this instruction provides little savings since it uses 2
+ bytes to encode a copy of 2 other bytes but it encodes the number of
+ following literals for free. It must be interpreted like this :
+
+ 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance
+ length = 2
+ state = S (copy S literals after this block)
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 2) + D + 1
+
+ If last instruction used to copy 4 or more literals (as detected by
+ state == 4), the instruction becomes a copy of a 3-byte block from the
+ dictionary from a 2..3kB distance, and must be interpreted like this :
+
+ 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance
+ length = 3
+ state = S (copy S literals after this block)
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 2) + D + 2049
+
+ 0 0 0 1 H L L L (16..31)
+ Copy of a block within 16..48kB distance (preferably less than 10B)
+ length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
+ Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
+ distance = 16384 + (H << 14) + D
+ state = S (copy S literals after this block)
+ End of stream is reached if distance == 16384
+
+ 0 0 1 L L L L L (32..63)
+ Copy of small block within 16kB distance (preferably less than 34B)
+ length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
+ Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
+ distance = D + 1
+ state = S (copy S literals after this block)
+
+ 0 1 L D D D S S (64..127)
+ Copy 3-4 bytes from block within 2kB distance
+ state = S (copy S literals after this block)
+ length = 3 + L
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 3) + D + 1
+
+ 1 L L D D D S S (128..255)
+ Copy 5-8 bytes from block within 2kB distance
+ state = S (copy S literals after this block)
+ length = 5 + L
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 3) + D + 1
+
+Authors
+
+ This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
+ analysis of the decompression code available in Linux 3.16-rc5. The code is
+ tricky, it is possible that this document contains mistakes or that a few
+ corner cases were overlooked. In any case, please report any doubt, fix, or
+ proposed updates to the author(s) so that the document can be updated.
diff --git a/Documentation/m68k/00-INDEX b/Documentation/m68k/00-INDEX
new file mode 100644
index 000000000..2be8c6b00
--- /dev/null
+++ b/Documentation/m68k/00-INDEX
@@ -0,0 +1,7 @@
+00-INDEX
+ - this file
+README.buddha
+ - Amiga Buddha and Catweasel IDE Driver
+kernel-options.txt
+ - command line options for Linux/m68k
+
diff --git a/Documentation/m68k/README.buddha b/Documentation/m68k/README.buddha
new file mode 100644
index 000000000..3ea9827ba
--- /dev/null
+++ b/Documentation/m68k/README.buddha
@@ -0,0 +1,210 @@
+
+The Amiga Buddha and Catweasel IDE Driver (part of ide.c) was written by
+Geert Uytterhoeven based on the following specifications:
+
+------------------------------------------------------------------------
+
+Register map of the Buddha IDE controller and the
+Buddha-part of the Catweasel Zorro-II version
+
+The Autoconfiguration has been implemented just as Commodore
+described in their manuals, no tricks have been used (for
+example leaving some address lines out of the equations...).
+If you want to configure the board yourself (for example let
+a Linux kernel configure the card), look at the Commodore
+Docs. Reading the nibbles should give this information:
+
+Vendor number: 4626 ($1212)
+product number: 0 (42 for Catweasel Z-II)
+Serial number: 0
+Rom-vector: $1000
+
+The card should be a Z-II board, size 64K, not for freemem
+list, Rom-Vektor is valid, no second Autoconfig-board on the
+same card, no space preference, supports "Shutup_forever".
+
+Setting the base address should be done in two steps, just
+as the Amiga Kickstart does: The lower nibble of the 8-Bit
+address is written to $4a, then the whole Byte is written to
+$48, while it doesn't matter how often you're writing to $4a
+as long as $48 is not touched. After $48 has been written,
+the whole card disappears from $e8 and is mapped to the new
+address just written. Make sure $4a is written before $48,
+otherwise your chance is only 1:16 to find the board :-).
+
+The local memory-map is even active when mapped to $e8:
+
+$0-$7e Autokonfig-space, see Z-II docs.
+
+$80-$7fd reserved
+
+$7fe Speed-select Register: Read & Write
+ (description see further down)
+
+$800-$8ff IDE-Select 0 (Port 0, Register set 0)
+
+$900-$9ff IDE-Select 1 (Port 0, Register set 1)
+
+$a00-$aff IDE-Select 2 (Port 1, Register set 0)
+
+$b00-$bff IDE-Select 3 (Port 1, Register set 1)
+
+$c00-$cff IDE-Select 4 (Port 2, Register set 0,
+ Catweasel only!)
+
+$d00-$dff IDE-Select 5 (Port 3, Register set 1,
+ Catweasel only!)
+
+$e00-$eff local expansion port, on Catweasel Z-II the
+ Catweasel registers are also mapped here.
+ Never touch, use multidisk.device!
+
+$f00 read only, Byte-access: Bit 7 shows the
+ level of the IRQ-line of IDE port 0.
+
+$f01-$f3f mirror of $f00
+
+$f40 read only, Byte-access: Bit 7 shows the
+ level of the IRQ-line of IDE port 1.
+
+$f41-$f7f mirror of $f40
+
+$f80 read only, Byte-access: Bit 7 shows the
+ level of the IRQ-line of IDE port 2.
+ (Catweasel only!)
+
+$f81-$fbf mirror of $f80
+
+$fc0 write-only: Writing any value to this
+ register enables IRQs to be passed from the
+ IDE ports to the Zorro bus. This mechanism
+ has been implemented to be compatible with
+ harddisks that are either defective or have
+ a buggy firmware and pull the IRQ line up
+ while starting up. If interrupts would
+ always be passed to the bus, the computer
+ might not start up. Once enabled, this flag
+ can not be disabled again. The level of the
+ flag can not be determined by software
+ (what for? Write to me if it's necessary!).
+
+$fc1-$fff mirror of $fc0
+
+$1000-$ffff Buddha-Rom with offset $1000 in the rom
+ chip. The addresses $0 to $fff of the rom
+ chip cannot be read. Rom is Byte-wide and
+ mapped to even addresses.
+
+The IDE ports issue an INT2. You can read the level of the
+IRQ-lines of the IDE-ports by reading from the three (two
+for Buddha-only) registers $f00, $f40 and $f80. This way
+more than one I/O request can be handled and you can easily
+determine what driver has to serve the INT2. Buddha and
+Catweasel expansion boards can issue an INT6. A separate
+memory map is available for the I/O module and the sysop's
+I/O module.
+
+The IDE ports are fed by the address lines A2 to A4, just as
+the Amiga 1200 and Amiga 4000 IDE ports are. This way
+existing drivers can be easily ported to Buddha. A move.l
+polls two words out of the same address of IDE port since
+every word is mirrored once. movem is not possible, but
+it's not necessary either, because you can only speedup
+68000 systems with this technique. A 68020 system with
+fastmem is faster with move.l.
+
+If you're using the mirrored registers of the IDE-ports with
+A6=1, the Buddha doesn't care about the speed that you have
+selected in the speed register (see further down). With
+A6=1 (for example $840 for port 0, register set 0), a 780ns
+access is being made. These registers should be used for a
+command access to the harddisk/CD-Rom, since command
+accesses are Byte-wide and have to be made slower according
+to the ATA-X3T9 manual.
+
+Now for the speed-register: The register is byte-wide, and
+only the upper three bits are used (Bits 7 to 5). Bit 4
+must always be set to 1 to be compatible with later Buddha
+versions (if I'll ever update this one). I presume that
+I'll never use the lower four bits, but they have to be set
+to 1 by definition.
+ The values in this table have to be shifted 5 bits to the
+left and or'd with $1f (this sets the lower 5 bits).
+
+All the timings have in common: Select and IOR/IOW rise at
+the same time. IOR and IOW have a propagation delay of
+about 30ns to the clocks on the Zorro bus, that's why the
+values are no multiple of 71. One clock-cycle is 71ns long
+(exactly 70,5 at 14,18 Mhz on PAL systems).
+
+value 0 (Default after reset)
+
+497ns Select (7 clock cycles) , IOR/IOW after 172ns (2 clock cycles)
+(same timing as the Amiga 1200 does on it's IDE port without
+accelerator card)
+
+value 1
+
+639ns Select (9 clock cycles), IOR/IOW after 243ns (3 clock cycles)
+
+value 2
+
+781ns Select (11 clock cycles), IOR/IOW after 314ns (4 clock cycles)
+
+value 3
+
+355ns Select (5 clock cycles), IOR/IOW after 101ns (1 clock cycle)
+
+value 4
+
+355ns Select (5 clock cycles), IOR/IOW after 172ns (2 clock cycles)
+
+value 5
+
+355ns Select (5 clock cycles), IOR/IOW after 243ns (3 clock cycles)
+
+value 6
+
+1065ns Select (15 clock cycles), IOR/IOW after 314ns (4 clock cycles)
+
+value 7
+
+355ns Select, (5 clock cycles), IOR/IOW after 101ns (1 clock cycle)
+
+When accessing IDE registers with A6=1 (for example $84x),
+the timing will always be mode 0 8-bit compatible, no matter
+what you have selected in the speed register:
+
+781ns select, IOR/IOW after 4 clock cycles (=314ns) aktive.
+
+All the timings with a very short select-signal (the 355ns
+fast accesses) depend on the accelerator card used in the
+system: Sometimes two more clock cycles are inserted by the
+bus interface, making the whole access 497ns long. This
+doesn't affect the reliability of the controller nor the
+performance of the card, since this doesn't happen very
+often.
+
+All the timings are calculated and only confirmed by
+measurements that allowed me to count the clock cycles. If
+the system is clocked by an oscillator other than 28,37516
+Mhz (for example the NTSC-frequency 28,63636 Mhz), each
+clock cycle is shortened to a bit less than 70ns (not worth
+mentioning). You could think of a small performance boost
+by overclocking the system, but you would either need a
+multisync monitor, or a graphics card, and your internal
+diskdrive would go crazy, that's why you shouldn't tune your
+Amiga this way.
+
+Giving you the possibility to write software that is
+compatible with both the Buddha and the Catweasel Z-II, The
+Buddha acts just like a Catweasel Z-II with no device
+connected to the third IDE-port. The IRQ-register $f80
+always shows a "no IRQ here" on the Buddha, and accesses to
+the third IDE port are going into data's Nirwana on the
+Buddha.
+
+ Jens Schönfeld february 19th, 1997
+ updated may 27th, 1997
+ eMail: sysop@nostlgic.tng.oche.de
+
diff --git a/Documentation/m68k/kernel-options.txt b/Documentation/m68k/kernel-options.txt
new file mode 100644
index 000000000..eaf32a1fd
--- /dev/null
+++ b/Documentation/m68k/kernel-options.txt
@@ -0,0 +1,884 @@
+
+
+ Command Line Options for Linux/m68k
+ ===================================
+
+Last Update: 2 May 1999
+Linux/m68k version: 2.2.6
+Author: Roman.Hodek@informatik.uni-erlangen.de (Roman Hodek)
+Update: jds@kom.auc.dk (Jes Sorensen) and faq@linux-m68k.org (Chris Lawrence)
+
+0) Introduction
+===============
+
+ Often I've been asked which command line options the Linux/m68k
+kernel understands, or how the exact syntax for the ... option is, or
+... about the option ... . I hope, this document supplies all the
+answers...
+
+ Note that some options might be outdated, their descriptions being
+incomplete or missing. Please update the information and send in the
+patches.
+
+
+1) Overview of the Kernel's Option Processing
+=============================================
+
+The kernel knows three kinds of options on its command line:
+
+ 1) kernel options
+ 2) environment settings
+ 3) arguments for init
+
+To which of these classes an argument belongs is determined as
+follows: If the option is known to the kernel itself, i.e. if the name
+(the part before the '=') or, in some cases, the whole argument string
+is known to the kernel, it belongs to class 1. Otherwise, if the
+argument contains an '=', it is of class 2, and the definition is put
+into init's environment. All other arguments are passed to init as
+command line options.
+
+ This document describes the valid kernel options for Linux/m68k in
+the version mentioned at the start of this file. Later revisions may
+add new such options, and some may be missing in older versions.
+
+ In general, the value (the part after the '=') of an option is a
+list of values separated by commas. The interpretation of these values
+is up to the driver that "owns" the option. This association of
+options with drivers is also the reason that some are further
+subdivided.
+
+
+2) General Kernel Options
+=========================
+
+2.1) root=
+----------
+
+Syntax: root=/dev/<device>
+ or: root=<hex_number>
+
+This tells the kernel which device it should mount as the root
+filesystem. The device must be a block device with a valid filesystem
+on it.
+
+ The first syntax gives the device by name. These names are converted
+into a major/minor number internally in the kernel in an unusual way.
+Normally, this "conversion" is done by the device files in /dev, but
+this isn't possible here, because the root filesystem (with /dev)
+isn't mounted yet... So the kernel parses the name itself, with some
+hardcoded name to number mappings. The name must always be a
+combination of two or three letters, followed by a decimal number.
+Valid names are:
+
+ /dev/ram: -> 0x0100 (initial ramdisk)
+ /dev/hda: -> 0x0300 (first IDE disk)
+ /dev/hdb: -> 0x0340 (second IDE disk)
+ /dev/sda: -> 0x0800 (first SCSI disk)
+ /dev/sdb: -> 0x0810 (second SCSI disk)
+ /dev/sdc: -> 0x0820 (third SCSI disk)
+ /dev/sdd: -> 0x0830 (forth SCSI disk)
+ /dev/sde: -> 0x0840 (fifth SCSI disk)
+ /dev/fd : -> 0x0200 (floppy disk)
+
+ The name must be followed by a decimal number, that stands for the
+partition number. Internally, the value of the number is just
+added to the device number mentioned in the table above. The
+exceptions are /dev/ram and /dev/fd, where /dev/ram refers to an
+initial ramdisk loaded by your bootstrap program (please consult the
+instructions for your bootstrap program to find out how to load an
+initial ramdisk). As of kernel version 2.0.18 you must specify
+/dev/ram as the root device if you want to boot from an initial
+ramdisk. For the floppy devices, /dev/fd, the number stands for the
+floppy drive number (there are no partitions on floppy disks). I.e.,
+/dev/fd0 stands for the first drive, /dev/fd1 for the second, and so
+on. Since the number is just added, you can also force the disk format
+by adding a number greater than 3. If you look into your /dev
+directory, use can see the /dev/fd0D720 has major 2 and minor 16. You
+can specify this device for the root FS by writing "root=/dev/fd16" on
+the kernel command line.
+
+[Strange and maybe uninteresting stuff ON]
+
+ This unusual translation of device names has some strange
+consequences: If, for example, you have a symbolic link from /dev/fd
+to /dev/fd0D720 as an abbreviation for floppy driver #0 in DD format,
+you cannot use this name for specifying the root device, because the
+kernel cannot see this symlink before mounting the root FS and it
+isn't in the table above. If you use it, the root device will not be
+set at all, without an error message. Another example: You cannot use a
+partition on e.g. the sixth SCSI disk as the root filesystem, if you
+want to specify it by name. This is, because only the devices up to
+/dev/sde are in the table above, but not /dev/sdf. Although, you can
+use the sixth SCSI disk for the root FS, but you have to specify the
+device by number... (see below). Or, even more strange, you can use the
+fact that there is no range checking of the partition number, and your
+knowledge that each disk uses 16 minors, and write "root=/dev/sde17"
+(for /dev/sdf1).
+
+[Strange and maybe uninteresting stuff OFF]
+
+ If the device containing your root partition isn't in the table
+above, you can also specify it by major and minor numbers. These are
+written in hex, with no prefix and no separator between. E.g., if you
+have a CD with contents appropriate as a root filesystem in the first
+SCSI CD-ROM drive, you boot from it by "root=0b00". Here, hex "0b" =
+decimal 11 is the major of SCSI CD-ROMs, and the minor 0 stands for
+the first of these. You can find out all valid major numbers by
+looking into include/linux/major.h.
+
+In addition to major and minor numbers, if the device containing your
+root partition uses a partition table format with unique partition
+identifiers, then you may use them. For instance,
+"root=PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF". It is also
+possible to reference another partition on the same device using a
+known partition UUID as the starting point. For example,
+if partition 5 of the device has the UUID of
+00112233-4455-6677-8899-AABBCCDDEEFF then partition 3 may be found as
+follows:
+ PARTUUID=00112233-4455-6677-8899-AABBCCDDEEFF/PARTNROFF=-2
+
+Authoritative information can be found in
+"Documentation/kernel-parameters.txt".
+
+
+2.2) ro, rw
+-----------
+
+Syntax: ro
+ or: rw
+
+These two options tell the kernel whether it should mount the root
+filesystem read-only or read-write. The default is read-only, except
+for ramdisks, which default to read-write.
+
+
+2.3) debug
+----------
+
+Syntax: debug
+
+This raises the kernel log level to 10 (the default is 7). This is the
+same level as set by the "dmesg" command, just that the maximum level
+selectable by dmesg is 8.
+
+
+2.4) debug=
+-----------
+
+Syntax: debug=<device>
+
+This option causes certain kernel messages be printed to the selected
+debugging device. This can aid debugging the kernel, since the
+messages can be captured and analyzed on some other machine. Which
+devices are possible depends on the machine type. There are no checks
+for the validity of the device name. If the device isn't implemented,
+nothing happens.
+
+ Messages logged this way are in general stack dumps after kernel
+memory faults or bad kernel traps, and kernel panics. To be exact: all
+messages of level 0 (panic messages) and all messages printed while
+the log level is 8 or more (their level doesn't matter). Before stack
+dumps, the kernel sets the log level to 10 automatically. A level of
+at least 8 can also be set by the "debug" command line option (see
+2.3) and at run time with "dmesg -n 8".
+
+Devices possible for Amiga:
+
+ - "ser": built-in serial port; parameters: 9600bps, 8N1
+ - "mem": Save the messages to a reserved area in chip mem. After
+ rebooting, they can be read under AmigaOS with the tool
+ 'dmesg'.
+
+Devices possible for Atari:
+
+ - "ser1": ST-MFP serial port ("Modem1"); parameters: 9600bps, 8N1
+ - "ser2": SCC channel B serial port ("Modem2"); parameters: 9600bps, 8N1
+ - "ser" : default serial port
+ This is "ser2" for a Falcon, and "ser1" for any other machine
+ - "midi": The MIDI port; parameters: 31250bps, 8N1
+ - "par" : parallel port
+ The printing routine for this implements a timeout for the
+ case there's no printer connected (else the kernel would
+ lock up). The timeout is not exact, but usually a few
+ seconds.
+
+
+2.6) ramdisk_size=
+-------------
+
+Syntax: ramdisk_size=<size>
+
+ This option instructs the kernel to set up a ramdisk of the given
+size in KBytes. Do not use this option if the ramdisk contents are
+passed by bootstrap! In this case, the size is selected automatically
+and should not be overwritten.
+
+ The only application is for root filesystems on floppy disks, that
+should be loaded into memory. To do that, select the corresponding
+size of the disk as ramdisk size, and set the root device to the disk
+drive (with "root=").
+
+
+2.7) swap=
+2.8) buff=
+-----------
+
+ I can't find any sign of these options in 2.2.6.
+
+
+3) General Device Options (Amiga and Atari)
+===========================================
+
+3.1) ether=
+-----------
+
+Syntax: ether=[<irq>[,<base_addr>[,<mem_start>[,<mem_end>]]]],<dev-name>
+
+ <dev-name> is the name of a net driver, as specified in
+drivers/net/Space.c in the Linux source. Most prominent are eth0, ...
+eth3, sl0, ... sl3, ppp0, ..., ppp3, dummy, and lo.
+
+ The non-ethernet drivers (sl, ppp, dummy, lo) obviously ignore the
+settings by this options. Also, the existing ethernet drivers for
+Linux/m68k (ariadne, a2065, hydra) don't use them because Zorro boards
+are really Plug-'n-Play, so the "ether=" option is useless altogether
+for Linux/m68k.
+
+
+3.2) hd=
+--------
+
+Syntax: hd=<cylinders>,<heads>,<sectors>
+
+ This option sets the disk geometry of an IDE disk. The first hd=
+option is for the first IDE disk, the second for the second one.
+(I.e., you can give this option twice.) In most cases, you won't have
+to use this option, since the kernel can obtain the geometry data
+itself. It exists just for the case that this fails for one of your
+disks.
+
+
+3.3) max_scsi_luns=
+-------------------
+
+Syntax: max_scsi_luns=<n>
+
+ Sets the maximum number of LUNs (logical units) of SCSI devices to
+be scanned. Valid values for <n> are between 1 and 8. Default is 8 if
+"Probe all LUNs on each SCSI device" was selected during the kernel
+configuration, else 1.
+
+
+3.4) st=
+--------
+
+Syntax: st=<buffer_size>,[<write_thres>,[<max_buffers>]]
+
+ Sets several parameters of the SCSI tape driver. <buffer_size> is
+the number of 512-byte buffers reserved for tape operations for each
+device. <write_thres> sets the number of blocks which must be filled
+to start an actual write operation to the tape. Maximum value is the
+total number of buffers. <max_buffer> limits the total number of
+buffers allocated for all tape devices.
+
+
+3.5) dmasound=
+--------------
+
+Syntax: dmasound=[<buffers>,<buffer-size>[,<catch-radius>]]
+
+ This option controls some configurations of the Linux/m68k DMA sound
+driver (Amiga and Atari): <buffers> is the number of buffers you want
+to use (minimum 4, default 4), <buffer-size> is the size of each
+buffer in kilobytes (minimum 4, default 32) and <catch-radius> says
+how much percent of error will be tolerated when setting a frequency
+(maximum 10, default 0). For example with 3% you can play 8000Hz
+AU-Files on the Falcon with its hardware frequency of 8195Hz and thus
+don't need to expand the sound.
+
+
+
+4) Options for Atari Only
+=========================
+
+4.1) video=
+-----------
+
+Syntax: video=<fbname>:<sub-options...>
+
+The <fbname> parameter specifies the name of the frame buffer,
+eg. most atari users will want to specify `atafb' here. The
+<sub-options> is a comma-separated list of the sub-options listed
+below.
+
+NB: Please notice that this option was renamed from `atavideo' to
+ `video' during the development of the 1.3.x kernels, thus you
+ might need to update your boot-scripts if upgrading to 2.x from
+ an 1.2.x kernel.
+
+NBB: The behavior of video= was changed in 2.1.57 so the recommended
+option is to specify the name of the frame buffer.
+
+4.1.1) Video Mode
+-----------------
+
+This sub-option may be any of the predefined video modes, as listed
+in atari/atafb.c in the Linux/m68k source tree. The kernel will
+activate the given video mode at boot time and make it the default
+mode, if the hardware allows. Currently defined names are:
+
+ - stlow : 320x200x4
+ - stmid, default5 : 640x200x2
+ - sthigh, default4: 640x400x1
+ - ttlow : 320x480x8, TT only
+ - ttmid, default1 : 640x480x4, TT only
+ - tthigh, default2: 1280x960x1, TT only
+ - vga2 : 640x480x1, Falcon only
+ - vga4 : 640x480x2, Falcon only
+ - vga16, default3 : 640x480x4, Falcon only
+ - vga256 : 640x480x8, Falcon only
+ - falh2 : 896x608x1, Falcon only
+ - falh16 : 896x608x4, Falcon only
+
+ If no video mode is given on the command line, the kernel tries the
+modes names "default<n>" in turn, until one is possible with the
+hardware in use.
+
+ A video mode setting doesn't make sense, if the external driver is
+activated by a "external:" sub-option.
+
+4.1.2) inverse
+--------------
+
+Invert the display. This affects both, text (consoles) and graphics
+(X) display. Usually, the background is chosen to be black. With this
+option, you can make the background white.
+
+4.1.3) font
+-----------
+
+Syntax: font:<fontname>
+
+Specify the font to use in text modes. Currently you can choose only
+between `VGA8x8', `VGA8x16' and `PEARL8x8'. `VGA8x8' is default, if the
+vertical size of the display is less than 400 pixel rows. Otherwise, the
+`VGA8x16' font is the default.
+
+4.1.4) hwscroll_
+----------------
+
+Syntax: hwscroll_<n>
+
+The number of additional lines of video memory to reserve for
+speeding up the scrolling ("hardware scrolling"). Hardware scrolling
+is possible only if the kernel can set the video base address in steps
+fine enough. This is true for STE, MegaSTE, TT, and Falcon. It is not
+possible with plain STs and graphics cards (The former because the
+base address must be on a 256 byte boundary there, the latter because
+the kernel doesn't know how to set the base address at all.)
+
+ By default, <n> is set to the number of visible text lines on the
+display. Thus, the amount of video memory is doubled, compared to no
+hardware scrolling. You can turn off the hardware scrolling altogether
+by setting <n> to 0.
+
+4.1.5) internal:
+----------------
+
+Syntax: internal:<xres>;<yres>[;<xres_max>;<yres_max>;<offset>]
+
+This option specifies the capabilities of some extended internal video
+hardware, like e.g. OverScan. <xres> and <yres> give the (extended)
+dimensions of the screen.
+
+ If your OverScan needs a black border, you have to write the last
+three arguments of the "internal:". <xres_max> is the maximum line
+length the hardware allows, <yres_max> the maximum number of lines.
+<offset> is the offset of the visible part of the screen memory to its
+physical start, in bytes.
+
+ Often, extended interval video hardware has to be activated somehow.
+For this, see the "sw_*" options below.
+
+4.1.6) external:
+----------------
+
+Syntax:
+ external:<xres>;<yres>;<depth>;<org>;<scrmem>[;<scrlen>[;<vgabase>\
+ [;<colw>[;<coltype>[;<xres_virtual>]]]]]
+
+[I had to break this line...]
+
+ This is probably the most complicated parameter... It specifies that
+you have some external video hardware (a graphics board), and how to
+use it under Linux/m68k. The kernel cannot know more about the hardware
+than you tell it here! The kernel also is unable to set or change any
+video modes, since it doesn't know about any board internal. So, you
+have to switch to that video mode before you start Linux, and cannot
+switch to another mode once Linux has started.
+
+ The first 3 parameters of this sub-option should be obvious: <xres>,
+<yres> and <depth> give the dimensions of the screen and the number of
+planes (depth). The depth is the logarithm to base 2 of the number
+of colors possible. (Or, the other way round: The number of colors is
+2^depth).
+
+ You have to tell the kernel furthermore how the video memory is
+organized. This is done by a letter as <org> parameter:
+
+ 'n': "normal planes", i.e. one whole plane after another
+ 'i': "interleaved planes", i.e. 16 bit of the first plane, than 16 bit
+ of the next, and so on... This mode is used only with the
+ built-in Atari video modes, I think there is no card that
+ supports this mode.
+ 'p': "packed pixels", i.e. <depth> consecutive bits stand for all
+ planes of one pixel; this is the most common mode for 8 planes
+ (256 colors) on graphic cards
+ 't': "true color" (more or less packed pixels, but without a color
+ lookup table); usually depth is 24
+
+For monochrome modes (i.e., <depth> is 1), the <org> letter has a
+different meaning:
+
+ 'n': normal colors, i.e. 0=white, 1=black
+ 'i': inverted colors, i.e. 0=black, 1=white
+
+ The next important information about the video hardware is the base
+address of the video memory. That is given in the <scrmem> parameter,
+as a hexadecimal number with a "0x" prefix. You have to find out this
+address in the documentation of your hardware.
+
+ The next parameter, <scrlen>, tells the kernel about the size of the
+video memory. If it's missing, the size is calculated from <xres>,
+<yres>, and <depth>. For now, it is not useful to write a value here.
+It would be used only for hardware scrolling (which isn't possible
+with the external driver, because the kernel cannot set the video base
+address), or for virtual resolutions under X (which the X server
+doesn't support yet). So, it's currently best to leave this field
+empty, either by ending the "external:" after the video address or by
+writing two consecutive semicolons, if you want to give a <vgabase>
+(it is allowed to leave this parameter empty).
+
+ The <vgabase> parameter is optional. If it is not given, the kernel
+cannot read or write any color registers of the video hardware, and
+thus you have to set appropriate colors before you start Linux. But if
+your card is somehow VGA compatible, you can tell the kernel the base
+address of the VGA register set, so it can change the color lookup
+table. You have to look up this address in your board's documentation.
+To avoid misunderstandings: <vgabase> is the _base_ address, i.e. a 4k
+aligned address. For read/writing the color registers, the kernel
+uses the addresses vgabase+0x3c7...vgabase+0x3c9. The <vgabase>
+parameter is written in hexadecimal with a "0x" prefix, just as
+<scrmem>.
+
+ <colw> is meaningful only if <vgabase> is specified. It tells the
+kernel how wide each of the color register is, i.e. the number of bits
+per single color (red/green/blue). Default is 6, another quite usual
+value is 8.
+
+ Also <coltype> is used together with <vgabase>. It tells the kernel
+about the color register model of your gfx board. Currently, the types
+"vga" (which is also the default) and "mv300" (SANG MV300) are
+implemented.
+
+ Parameter <xres_virtual> is required for ProMST or ET4000 cards where
+the physical linelength differs from the visible length. With ProMST,
+xres_virtual must be set to 2048. For ET4000, xres_virtual depends on the
+initialisation of the video-card.
+If you're missing a corresponding yres_virtual: the external part is legacy,
+therefore we don't support hardware-dependent functions like hardware-scroll,
+panning or blanking.
+
+4.1.7) eclock:
+--------------
+
+The external pixel clock attached to the Falcon VIDEL shifter. This
+currently works only with the ScreenWonder!
+
+4.1.8) monitorcap:
+-------------------
+
+Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+
+This describes the capabilities of a multisync monitor. Don't use it
+with a fixed-frequency monitor! For now, only the Falcon frame buffer
+uses the settings of "monitorcap:".
+
+ <vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+your monitor can work with, in Hz. <hmin> and <hmax> are the same for
+the horizontal frequency, in kHz.
+
+ The defaults are 58;62;31;32 (VGA compatible).
+
+ The defaults for TV/SC1224/SC1435 cover both PAL and NTSC standards.
+
+4.1.9) keep
+------------
+
+If this option is given, the framebuffer device doesn't do any video
+mode calculations and settings on its own. The only Atari fb device
+that does this currently is the Falcon.
+
+ What you reach with this: Settings for unknown video extensions
+aren't overridden by the driver, so you can still use the mode found
+when booting, when the driver doesn't know to set this mode itself.
+But this also means, that you can't switch video modes anymore...
+
+ An example where you may want to use "keep" is the ScreenBlaster for
+the Falcon.
+
+
+4.2) atamouse=
+--------------
+
+Syntax: atamouse=<x-threshold>,[<y-threshold>]
+
+ With this option, you can set the mouse movement reporting threshold.
+This is the number of pixels of mouse movement that have to accumulate
+before the IKBD sends a new mouse packet to the kernel. Higher values
+reduce the mouse interrupt load and thus reduce the chance of keyboard
+overruns. Lower values give a slightly faster mouse responses and
+slightly better mouse tracking.
+
+ You can set the threshold in x and y separately, but usually this is
+of little practical use. If there's just one number in the option, it
+is used for both dimensions. The default value is 2 for both
+thresholds.
+
+
+4.3) ataflop=
+-------------
+
+Syntax: ataflop=<drive type>[,<trackbuffering>[,<steprateA>[,<steprateB>]]]
+
+ The drive type may be 0, 1, or 2, for DD, HD, and ED, resp. This
+ setting affects how many buffers are reserved and which formats are
+ probed (see also below). The default is 1 (HD). Only one drive type
+ can be selected. If you have two disk drives, select the "better"
+ type.
+
+ The second parameter <trackbuffer> tells the kernel whether to use
+ track buffering (1) or not (0). The default is machine-dependent:
+ no for the Medusa and yes for all others.
+
+ With the two following parameters, you can change the default
+ steprate used for drive A and B, resp.
+
+
+4.4) atascsi=
+-------------
+
+Syntax: atascsi=<can_queue>[,<cmd_per_lun>[,<scat-gat>[,<host-id>[,<tagged>]]]]
+
+ This option sets some parameters for the Atari native SCSI driver.
+Generally, any number of arguments can be omitted from the end. And
+for each of the numbers, a negative value means "use default". The
+defaults depend on whether TT-style or Falcon-style SCSI is used.
+Below, defaults are noted as n/m, where the first value refers to
+TT-SCSI and the latter to Falcon-SCSI. If an illegal value is given
+for one parameter, an error message is printed and that one setting is
+ignored (others aren't affected).
+
+ <can_queue>:
+ This is the maximum number of SCSI commands queued internally to the
+ Atari SCSI driver. A value of 1 effectively turns off the driver
+ internal multitasking (if it causes problems). Legal values are >=
+ 1. <can_queue> can be as high as you like, but values greater than
+ <cmd_per_lun> times the number of SCSI targets (LUNs) you have
+ don't make sense. Default: 16/8.
+
+ <cmd_per_lun>:
+ Maximum number of SCSI commands issued to the driver for one
+ logical unit (LUN, usually one SCSI target). Legal values start
+ from 1. If tagged queuing (see below) is not used, values greater
+ than 2 don't make sense, but waste memory. Otherwise, the maximum
+ is the number of command tags available to the driver (currently
+ 32). Default: 8/1. (Note: Values > 1 seem to cause problems on a
+ Falcon, cause not yet known.)
+
+ The <cmd_per_lun> value at a great part determines the amount of
+ memory SCSI reserves for itself. The formula is rather
+ complicated, but I can give you some hints:
+ no scatter-gather : cmd_per_lun * 232 bytes
+ full scatter-gather: cmd_per_lun * approx. 17 Kbytes
+
+ <scat-gat>:
+ Size of the scatter-gather table, i.e. the number of requests
+ consecutive on the disk that can be merged into one SCSI command.
+ Legal values are between 0 and 255. Default: 255/0. Note: This
+ value is forced to 0 on a Falcon, since scatter-gather isn't
+ possible with the ST-DMA. Not using scatter-gather hurts
+ performance significantly.
+
+ <host-id>:
+ The SCSI ID to be used by the initiator (your Atari). This is
+ usually 7, the highest possible ID. Every ID on the SCSI bus must
+ be unique. Default: determined at run time: If the NV-RAM checksum
+ is valid, and bit 7 in byte 30 of the NV-RAM is set, the lower 3
+ bits of this byte are used as the host ID. (This method is defined
+ by Atari and also used by some TOS HD drivers.) If the above
+ isn't given, the default ID is 7. (both, TT and Falcon).
+
+ <tagged>:
+ 0 means turn off tagged queuing support, all other values > 0 mean
+ use tagged queuing for targets that support it. Default: currently
+ off, but this may change when tagged queuing handling has been
+ proved to be reliable.
+
+ Tagged queuing means that more than one command can be issued to
+ one LUN, and the SCSI device itself orders the requests so they
+ can be performed in optimal order. Not all SCSI devices support
+ tagged queuing (:-().
+
+4.5 switches=
+-------------
+
+Syntax: switches=<list of switches>
+
+ With this option you can switch some hardware lines that are often
+used to enable/disable certain hardware extensions. Examples are
+OverScan, overclocking, ...
+
+ The <list of switches> is a comma-separated list of the following
+items:
+
+ ikbd: set RTS of the keyboard ACIA high
+ midi: set RTS of the MIDI ACIA high
+ snd6: set bit 6 of the PSG port A
+ snd7: set bit 6 of the PSG port A
+
+It doesn't make sense to mention a switch more than once (no
+difference to only once), but you can give as many switches as you
+want to enable different features. The switch lines are set as early
+as possible during kernel initialization (even before determining the
+present hardware.)
+
+ All of the items can also be prefixed with "ov_", i.e. "ov_ikbd",
+"ov_midi", ... These options are meant for switching on an OverScan
+video extension. The difference to the bare option is that the
+switch-on is done after video initialization, and somehow synchronized
+to the HBLANK. A speciality is that ov_ikbd and ov_midi are switched
+off before rebooting, so that OverScan is disabled and TOS boots
+correctly.
+
+ If you give an option both, with and without the "ov_" prefix, the
+earlier initialization ("ov_"-less) takes precedence. But the
+switching-off on reset still happens in this case.
+
+5) Options for Amiga Only:
+==========================
+
+5.1) video=
+-----------
+
+Syntax: video=<fbname>:<sub-options...>
+
+The <fbname> parameter specifies the name of the frame buffer, valid
+options are `amifb', `cyber', 'virge', `retz3' and `clgen', provided
+that the respective frame buffer devices have been compiled into the
+kernel (or compiled as loadable modules). The behavior of the <fbname>
+option was changed in 2.1.57 so it is now recommended to specify this
+option.
+
+The <sub-options> is a comma-separated list of the sub-options listed
+below. This option is organized similar to the Atari version of the
+"video"-option (4.1), but knows fewer sub-options.
+
+5.1.1) video mode
+-----------------
+
+Again, similar to the video mode for the Atari (see 4.1.1). Predefined
+modes depend on the used frame buffer device.
+
+OCS, ECS and AGA machines all use the color frame buffer. The following
+predefined video modes are available:
+
+NTSC modes:
+ - ntsc : 640x200, 15 kHz, 60 Hz
+ - ntsc-lace : 640x400, 15 kHz, 60 Hz interlaced
+PAL modes:
+ - pal : 640x256, 15 kHz, 50 Hz
+ - pal-lace : 640x512, 15 kHz, 50 Hz interlaced
+ECS modes:
+ - multiscan : 640x480, 29 kHz, 57 Hz
+ - multiscan-lace : 640x960, 29 kHz, 57 Hz interlaced
+ - euro36 : 640x200, 15 kHz, 72 Hz
+ - euro36-lace : 640x400, 15 kHz, 72 Hz interlaced
+ - euro72 : 640x400, 29 kHz, 68 Hz
+ - euro72-lace : 640x800, 29 kHz, 68 Hz interlaced
+ - super72 : 800x300, 23 kHz, 70 Hz
+ - super72-lace : 800x600, 23 kHz, 70 Hz interlaced
+ - dblntsc-ff : 640x400, 27 kHz, 57 Hz
+ - dblntsc-lace : 640x800, 27 kHz, 57 Hz interlaced
+ - dblpal-ff : 640x512, 27 kHz, 47 Hz
+ - dblpal-lace : 640x1024, 27 kHz, 47 Hz interlaced
+ - dblntsc : 640x200, 27 kHz, 57 Hz doublescan
+ - dblpal : 640x256, 27 kHz, 47 Hz doublescan
+VGA modes:
+ - vga : 640x480, 31 kHz, 60 Hz
+ - vga70 : 640x400, 31 kHz, 70 Hz
+
+Please notice that the ECS and VGA modes require either an ECS or AGA
+chipset, and that these modes are limited to 2-bit color for the ECS
+chipset and 8-bit color for the AGA chipset.
+
+5.1.2) depth
+------------
+
+Syntax: depth:<nr. of bit-planes>
+
+Specify the number of bit-planes for the selected video-mode.
+
+5.1.3) inverse
+--------------
+
+Use inverted display (black on white). Functionally the same as the
+"inverse" sub-option for the Atari.
+
+5.1.4) font
+-----------
+
+Syntax: font:<fontname>
+
+Specify the font to use in text modes. Functionally the same as the
+"font" sub-option for the Atari, except that `PEARL8x8' is used instead
+of `VGA8x8' if the vertical size of the display is less than 400 pixel
+rows.
+
+5.1.5) monitorcap:
+-------------------
+
+Syntax: monitorcap:<vmin>;<vmax>;<hmin>;<hmax>
+
+This describes the capabilities of a multisync monitor. For now, only
+the color frame buffer uses the settings of "monitorcap:".
+
+ <vmin> and <vmax> are the minimum and maximum, resp., vertical frequencies
+your monitor can work with, in Hz. <hmin> and <hmax> are the same for
+the horizontal frequency, in kHz.
+
+ The defaults are 50;90;15;38 (Generic Amiga multisync monitor).
+
+
+5.2) fd_def_df0=
+----------------
+
+Syntax: fd_def_df0=<value>
+
+Sets the df0 value for "silent" floppy drives. The value should be in
+hexadecimal with "0x" prefix.
+
+
+5.3) wd33c93=
+-------------
+
+Syntax: wd33c93=<sub-options...>
+
+These options affect the A590/A2091, A3000 and GVP Series II SCSI
+controllers.
+
+The <sub-options> is a comma-separated list of the sub-options listed
+below.
+
+5.3.1) nosync
+-------------
+
+Syntax: nosync:bitmask
+
+ bitmask is a byte where the 1st 7 bits correspond with the 7
+possible SCSI devices. Set a bit to prevent sync negotiation on that
+device. To maintain backwards compatibility, a command-line such as
+"wd33c93=255" will be automatically translated to
+"wd33c93=nosync:0xff". The default is to disable sync negotiation for
+all devices, eg. nosync:0xff.
+
+5.3.2) period
+-------------
+
+Syntax: period:ns
+
+ `ns' is the minimum # of nanoseconds in a SCSI data transfer
+period. Default is 500; acceptable values are 250 - 1000.
+
+5.3.3) disconnect
+-----------------
+
+Syntax: disconnect:x
+
+ Specify x = 0 to never allow disconnects, 2 to always allow them.
+x = 1 does 'adaptive' disconnects, which is the default and generally
+the best choice.
+
+5.3.4) debug
+------------
+
+Syntax: debug:x
+
+ If `DEBUGGING_ON' is defined, x is a bit mask that causes various
+types of debug output to printed - see the DB_xxx defines in
+wd33c93.h.
+
+5.3.5) clock
+------------
+
+Syntax: clock:x
+
+ x = clock input in MHz for WD33c93 chip. Normal values would be from
+8 through 20. The default value depends on your hostadapter(s),
+default for the A3000 internal controller is 14, for the A2091 it's 8
+and for the GVP hostadapters it's either 8 or 14, depending on the
+hostadapter and the SCSI-clock jumper present on some GVP
+hostadapters.
+
+5.3.6) next
+-----------
+
+ No argument. Used to separate blocks of keywords when there's more
+than one wd33c93-based host adapter in the system.
+
+5.3.7) nodma
+------------
+
+Syntax: nodma:x
+
+ If x is 1 (or if the option is just written as "nodma"), the WD33c93
+controller will not use DMA (= direct memory access) to access the
+Amiga's memory. This is useful for some systems (like A3000's and
+A4000's with the A3640 accelerator, revision 3.0) that have problems
+using DMA to chip memory. The default is 0, i.e. to use DMA if
+possible.
+
+
+5.4) gvp11=
+-----------
+
+Syntax: gvp11=<addr-mask>
+
+ The earlier versions of the GVP driver did not handle DMA
+address-mask settings correctly which made it necessary for some
+people to use this option, in order to get their GVP controller
+running under Linux. These problems have hopefully been solved and the
+use of this option is now highly unrecommended!
+
+ Incorrect use can lead to unpredictable behavior, so please only use
+this option if you *know* what you are doing and have a reason to do
+so. In any case if you experience problems and need to use this
+option, please inform us about it by mailing to the Linux/68k kernel
+mailing list.
+
+ The address mask set by this option specifies which addresses are
+valid for DMA with the GVP Series II SCSI controller. An address is
+valid, if no bits are set except the bits that are set in the mask,
+too.
+
+ Some versions of the GVP can only DMA into a 24 bit address range,
+some can address a 25 bit address range while others can use the whole
+32 bit address range for DMA. The correct setting depends on your
+controller and should be autodetected by the driver. An example is the
+24 bit region which is specified by a mask of 0x00fffffe.
+
+
+/* Local Variables: */
+/* mode: text */
+/* End: */
diff --git a/Documentation/magic-number.txt b/Documentation/magic-number.txt
new file mode 100644
index 000000000..4c8e142db
--- /dev/null
+++ b/Documentation/magic-number.txt
@@ -0,0 +1,160 @@
+This file is a registry of magic numbers which are in use. When you
+add a magic number to a structure, you should also add it to this
+file, since it is best if the magic numbers used by various structures
+are unique.
+
+It is a *very* good idea to protect kernel data structures with magic
+numbers. This allows you to check at run time whether (a) a structure
+has been clobbered, or (b) you've passed the wrong structure to a
+routine. This last is especially useful --- particularly when you are
+passing pointers to structures via a void * pointer. The tty code,
+for example, does this frequently to pass driver-specific and line
+discipline-specific structures back and forth.
+
+The way to use magic numbers is to declare then at the beginning of
+the structure, like so:
+
+struct tty_ldisc {
+ int magic;
+ ...
+};
+
+Please follow this discipline when you are adding future enhancements
+to the kernel! It has saved me countless hours of debugging,
+especially in the screwy cases where an array has been overrun and
+structures following the array have been overwritten. Using this
+discipline, these cases get detected quickly and safely.
+
+ Theodore Ts'o
+ 31 Mar 94
+
+The magic table is current to Linux 2.1.55.
+
+ Michael Chastain
+ <mailto:mec@shout.net>
+ 22 Sep 1997
+
+Now it should be up to date with Linux 2.1.112. Because
+we are in feature freeze time it is very unlikely that
+something will change before 2.2.x. The entries are
+sorted by number field.
+
+ Krzysztof G. Baranowski
+ <mailto: kgb@knm.org.pl>
+ 29 Jul 1998
+
+Updated the magic table to Linux 2.5.45. Right over the feature freeze,
+but it is possible that some new magic numbers will sneak into the
+kernel before 2.6.x yet.
+
+ Petr Baudis
+ <pasky@ucw.cz>
+ 03 Nov 2002
+
+Updated the magic table to Linux 2.5.74.
+
+ Fabian Frederick
+ <ffrederick@users.sourceforge.net>
+ 09 Jul 2003
+
+
+Magic Name Number Structure File
+===========================================================================
+PG_MAGIC 'P' pg_{read,write}_hdr include/linux/pg.h
+CMAGIC 0x0111 user include/linux/a.out.h
+MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h
+HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c
+APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c
+CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h
+DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c
+DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c
+FASYNC_MAGIC 0x4601 fasync_struct include/linux/fs.h
+FF_MAGIC 0x4646 fc_info drivers/net/iph5526_novram.c
+ISICOM_MAGIC 0x4d54 isi_port include/linux/isicom.h
+PTY_MAGIC 0x5001 drivers/char/pty.c
+PPP_MAGIC 0x5002 ppp include/linux/if_pppvar.h
+SERIAL_MAGIC 0x5301 async_struct include/linux/serial.h
+SSTATE_MAGIC 0x5302 serial_state include/linux/serial.h
+SLIP_MAGIC 0x5302 slip drivers/net/slip.h
+STRIP_MAGIC 0x5303 strip drivers/net/strip.c
+X25_ASY_MAGIC 0x5303 x25_asy drivers/net/x25_asy.h
+SIXPACK_MAGIC 0x5304 sixpack drivers/net/hamradio/6pack.h
+AX25_MAGIC 0x5316 ax_disp drivers/net/mkiss.h
+TTY_MAGIC 0x5401 tty_struct include/linux/tty.h
+MGSL_MAGIC 0x5401 mgsl_info drivers/char/synclink.c
+TTY_DRIVER_MAGIC 0x5402 tty_driver include/linux/tty_driver.h
+MGSLPC_MAGIC 0x5402 mgslpc_info drivers/char/pcmcia/synclink_cs.c
+TTY_LDISC_MAGIC 0x5403 tty_ldisc include/linux/tty_ldisc.h
+USB_SERIAL_MAGIC 0x6702 usb_serial drivers/usb/serial/usb-serial.h
+FULL_DUPLEX_MAGIC 0x6969 drivers/net/ethernet/dec/tulip/de2104x.c
+USB_BLUETOOTH_MAGIC 0x6d02 usb_bluetooth drivers/usb/class/bluetty.c
+RFCOMM_TTY_MAGIC 0x6d02 net/bluetooth/rfcomm/tty.c
+USB_SERIAL_PORT_MAGIC 0x7301 usb_serial_port drivers/usb/serial/usb-serial.h
+CG_MAGIC 0x00090255 ufs_cylinder_group include/linux/ufs_fs.h
+RPORT_MAGIC 0x00525001 r_port drivers/char/rocket_int.h
+LSEMAGIC 0x05091998 lse drivers/fc4/fc.c
+GDTIOCTL_MAGIC 0x06030f07 gdth_iowr_str drivers/scsi/gdth_ioctl.h
+RIEBL_MAGIC 0x09051990 drivers/net/atarilance.c
+NBD_REQUEST_MAGIC 0x12560953 nbd_request include/linux/nbd.h
+RED_MAGIC2 0x170fc2a5 (any) mm/slab.c
+BAYCOM_MAGIC 0x19730510 baycom_state drivers/net/baycom_epp.c
+ISDN_X25IFACE_MAGIC 0x1e75a2b9 isdn_x25iface_proto_data
+ drivers/isdn/isdn_x25iface.h
+ECP_MAGIC 0x21504345 cdkecpsig include/linux/cdk.h
+LSOMAGIC 0x27091997 lso drivers/fc4/fc.c
+LSMAGIC 0x2a3b4d2a ls drivers/fc4/fc.c
+WANPIPE_MAGIC 0x414C4453 sdla_{dump,exec} include/linux/wanpipe.h
+CS_CARD_MAGIC 0x43525553 cs_card sound/oss/cs46xx.c
+LABELCL_MAGIC 0x4857434c labelcl_info_s include/asm/ia64/sn/labelcl.h
+ISDN_ASYNC_MAGIC 0x49344C01 modem_info include/linux/isdn.h
+CTC_ASYNC_MAGIC 0x49344C01 ctc_tty_info drivers/s390/net/ctctty.c
+ISDN_NET_MAGIC 0x49344C02 isdn_net_local_s drivers/isdn/i4l/isdn_net_lib.h
+SAVEKMSG_MAGIC2 0x4B4D5347 savekmsg arch/*/amiga/config.c
+CS_STATE_MAGIC 0x4c4f4749 cs_state sound/oss/cs46xx.c
+SLAB_C_MAGIC 0x4f17a36d kmem_cache mm/slab.c
+COW_MAGIC 0x4f4f4f4d cow_header_v1 arch/um/drivers/ubd_user.c
+I810_CARD_MAGIC 0x5072696E i810_card sound/oss/i810_audio.c
+TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c
+ROUTER_MAGIC 0x524d4157 wan_device [in wanrouter.h pre 3.9]
+SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h
+SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c
+GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h
+RED_MAGIC1 0x5a2cf071 (any) mm/slab.c
+EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c
+HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h
+PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h
+KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h
+I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c
+TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c
+M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c
+FW_HEADER_MAGIC 0x65726F66 fw_header drivers/atm/fore200e.h
+SLOT_MAGIC 0x67267321 slot drivers/hotplug/cpqphp.h
+SLOT_MAGIC 0x67267322 slot drivers/hotplug/acpiphp.h
+LO_MAGIC 0x68797548 nbd_device include/linux/nbd.h
+OPROFILE_MAGIC 0x6f70726f super_block drivers/oprofile/oprofilefs.h
+M3_STATE_MAGIC 0x734d724d m3_state sound/oss/maestro3.c
+VMALLOC_MAGIC 0x87654320 snd_alloc_track sound/core/memory.c
+KMALLOC_MAGIC 0x87654321 snd_alloc_track sound/core/memory.c
+PWC_MAGIC 0x89DC10AB pwc_device drivers/usb/media/pwc.h
+NBD_REPLY_MAGIC 0x96744668 nbd_reply include/linux/nbd.h
+ENI155_MAGIC 0xa54b872d midway_eprom drivers/atm/eni.h
+SCI_MAGIC 0xbabeface gs_port drivers/char/sh-sci.h
+CODA_MAGIC 0xC0DAC0DA coda_file_info fs/coda/coda_fs_i.h
+DPMEM_MAGIC 0xc0ffee11 gdt_pci_sram drivers/scsi/gdth.h
+YAM_MAGIC 0xF10A7654 yam_port drivers/net/hamradio/yam.c
+CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c
+QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c
+QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c
+HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c
+NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h
+
+Note that there are also defined special per-driver magic numbers in sound
+memory management. See include/sound/sndmagic.h for complete list of them. Many
+OSS sound drivers have their magic numbers constructed from the soundcard PCI
+ID - these are not listed here as well.
+
+IrDA subsystem also uses large number of own magic numbers, see
+include/net/irda/irda.h for a complete list of them.
+
+HFS is another larger user of magic numbers - you can find them in
+fs/hfs/hfs.h.
diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 000000000..1092ad957
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
+ The Common Mailbox Framework
+ Jassi Brar <jaswinder.singh@linaro.org>
+
+ This document aims to help developers write client and controller
+drivers for the API. But before we start, let us note that the
+client (especially) and controller drivers are likely going to be
+very platform specific because the remote firmware is likely to be
+proprietary and implement non-standard protocol. So even if two
+platforms employ, say, PL320 controller, the client drivers can't
+be shared across them. Even the PL320 driver might need to accommodate
+some platform specific quirks. So the API is meant mainly to avoid
+similar copies of code written for each platform. Having said that,
+nothing prevents the remote f/w to also be Linux based and use the
+same api there. However none of that helps us locally because we only
+ever deal at client's protocol level.
+ Some of the choices made during implementation are the result of this
+peculiarity of this "common" framework.
+
+
+
+ Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+
+ Allocate mbox_controller and the array of mbox_chan.
+Populate mbox_chan_ops, except peek_data() all are mandatory.
+The controller driver might know a message has been consumed
+by the remote by getting an IRQ or polling some hardware flag
+or it can never know (the client knows by way of the protocol).
+The method in order of preference is IRQ -> Poll -> None, which
+the controller driver should set via 'txdone_irq' or 'txdone_poll'
+or neither.
+
+
+ Part 2 - Client Driver (See include/linux/mailbox_client.h)
+
+ The client might want to operate in blocking mode (synchronously
+send a message through before returning) or non-blocking/async mode (submit
+a message and a callback function to the API and return immediately).
+
+
+struct demo_client {
+ struct mbox_client cl;
+ struct mbox_chan *mbox;
+ struct completion c;
+ bool async;
+ /* ... */
+};
+
+/*
+ * This is the handler for data received from remote. The behaviour is purely
+ * dependent upon the protocol. This is just an example.
+ */
+static void message_from_remote(struct mbox_client *cl, void *mssg)
+{
+ struct demo_client *dc = container_of(mbox_client,
+ struct demo_client, cl);
+ if (dc->async) {
+ if (is_an_ack(mssg)) {
+ /* An ACK to our last sample sent */
+ return; /* Or do something else here */
+ } else { /* A new message from remote */
+ queue_req(mssg);
+ }
+ } else {
+ /* Remote f/w sends only ACK packets on this channel */
+ return;
+ }
+}
+
+static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+{
+ struct demo_client *dc = container_of(mbox_client,
+ struct demo_client, cl);
+ complete(&dc->c);
+}
+
+static void client_demo(struct platform_device *pdev)
+{
+ struct demo_client *dc_sync, *dc_async;
+ /* The controller already knows async_pkt and sync_pkt */
+ struct async_pkt ap;
+ struct sync_pkt sp;
+
+ dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+ dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+ /* Populate non-blocking mode client */
+ dc_async->cl.dev = &pdev->dev;
+ dc_async->cl.rx_callback = message_from_remote;
+ dc_async->cl.tx_done = sample_sent;
+ dc_async->cl.tx_block = false;
+ dc_async->cl.tx_tout = 0; /* doesn't matter here */
+ dc_async->cl.knows_txdone = false; /* depending upon protocol */
+ dc_async->async = true;
+ init_completion(&dc_async->c);
+
+ /* Populate blocking mode client */
+ dc_sync->cl.dev = &pdev->dev;
+ dc_sync->cl.rx_callback = message_from_remote;
+ dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+ dc_sync->cl.tx_block = true;
+ dc_sync->cl.tx_tout = 500; /* by half a second */
+ dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+ dc_sync->async = false;
+
+ /* ASync mailbox is listed second in 'mboxes' property */
+ dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+ /* Populate data packet */
+ /* ap.xxx = 123; etc */
+ /* Send async message to remote */
+ mbox_send_message(dc_async->mbox, &ap);
+
+ /* Sync mailbox is listed first in 'mboxes' property */
+ dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+ /* Populate data packet */
+ /* sp.abc = 123; etc */
+ /* Send message to remote in blocking mode */
+ mbox_send_message(dc_sync->mbox, &sp);
+ /* At this point 'sp' has been sent */
+
+ /* Now wait for async chan to be done */
+ wait_for_completion(&dc_async->c);
+}
diff --git a/Documentation/md-cluster.txt b/Documentation/md-cluster.txt
new file mode 100644
index 000000000..de1af7db3
--- /dev/null
+++ b/Documentation/md-cluster.txt
@@ -0,0 +1,176 @@
+The cluster MD is a shared-device RAID for a cluster.
+
+
+1. On-disk format
+
+Separate write-intent-bitmap are used for each cluster node.
+The bitmaps record all writes that may have been started on that node,
+and may not yet have finished. The on-disk layout is:
+
+0 4k 8k 12k
+-------------------------------------------------------------------
+| idle | md super | bm super [0] + bits |
+| bm bits[0, contd] | bm super[1] + bits | bm bits[1, contd] |
+| bm super[2] + bits | bm bits [2, contd] | bm super[3] + bits |
+| bm bits [3, contd] | | |
+
+During "normal" functioning we assume the filesystem ensures that only one
+node writes to any given block at a time, so a write
+request will
+ - set the appropriate bit (if not already set)
+ - commit the write to all mirrors
+ - schedule the bit to be cleared after a timeout.
+
+Reads are just handled normally. It is up to the filesystem to
+ensure one node doesn't read from a location where another node (or the same
+node) is writing.
+
+
+2. DLM Locks for management
+
+There are two locks for managing the device:
+
+2.1 Bitmap lock resource (bm_lockres)
+
+ The bm_lockres protects individual node bitmaps. They are named in the
+ form bitmap001 for node 1, bitmap002 for node and so on. When a node
+ joins the cluster, it acquires the lock in PW mode and it stays so
+ during the lifetime the node is part of the cluster. The lock resource
+ number is based on the slot number returned by the DLM subsystem. Since
+ DLM starts node count from one and bitmap slots start from zero, one is
+ subtracted from the DLM slot number to arrive at the bitmap slot number.
+
+3. Communication
+
+Each node has to communicate with other nodes when starting or ending
+resync, and metadata superblock updates.
+
+3.1 Message Types
+
+ There are 3 types, of messages which are passed
+
+ 3.1.1 METADATA_UPDATED: informs other nodes that the metadata has been
+ updated, and the node must re-read the md superblock. This is performed
+ synchronously.
+
+ 3.1.2 RESYNC: informs other nodes that a resync is initiated or ended
+ so that each node may suspend or resume the region.
+
+3.2 Communication mechanism
+
+ The DLM LVB is used to communicate within nodes of the cluster. There
+ are three resources used for the purpose:
+
+ 3.2.1 Token: The resource which protects the entire communication
+ system. The node having the token resource is allowed to
+ communicate.
+
+ 3.2.2 Message: The lock resource which carries the data to
+ communicate.
+
+ 3.2.3 Ack: The resource, acquiring which means the message has been
+ acknowledged by all nodes in the cluster. The BAST of the resource
+ is used to inform the receive node that a node wants to communicate.
+
+The algorithm is:
+
+ 1. receive status
+
+ sender receiver receiver
+ ACK:CR ACK:CR ACK:CR
+
+ 2. sender get EX of TOKEN
+ sender get EX of MESSAGE
+ sender receiver receiver
+ TOKEN:EX ACK:CR ACK:CR
+ MESSAGE:EX
+ ACK:CR
+
+ Sender checks that it still needs to send a message. Messages received
+ or other events that happened while waiting for the TOKEN may have made
+ this message inappropriate or redundant.
+
+ 3. sender write LVB.
+ sender down-convert MESSAGE from EX to CR
+ sender try to get EX of ACK
+ [ wait until all receiver has *processed* the MESSAGE ]
+
+ [ triggered by bast of ACK ]
+ receiver get CR of MESSAGE
+ receiver read LVB
+ receiver processes the message
+ [ wait finish ]
+ receiver release ACK
+
+ sender receiver receiver
+ TOKEN:EX MESSAGE:CR MESSAGE:CR
+ MESSAGE:CR
+ ACK:EX
+
+ 4. triggered by grant of EX on ACK (indicating all receivers have processed
+ message)
+ sender down-convert ACK from EX to CR
+ sender release MESSAGE
+ sender release TOKEN
+ receiver upconvert to EX of MESSAGE
+ receiver get CR of ACK
+ receiver release MESSAGE
+
+ sender receiver receiver
+ ACK:CR ACK:CR ACK:CR
+
+
+4. Handling Failures
+
+4.1 Node Failure
+ When a node fails, the DLM informs the cluster with the slot. The node
+ starts a cluster recovery thread. The cluster recovery thread:
+ - acquires the bitmap<number> lock of the failed node
+ - opens the bitmap
+ - reads the bitmap of the failed node
+ - copies the set bitmap to local node
+ - cleans the bitmap of the failed node
+ - releases bitmap<number> lock of the failed node
+ - initiates resync of the bitmap on the current node
+
+ The resync process, is the regular md resync. However, in a clustered
+ environment when a resync is performed, it needs to tell other nodes
+ of the areas which are suspended. Before a resync starts, the node
+ send out RESYNC_START with the (lo,hi) range of the area which needs
+ to be suspended. Each node maintains a suspend_list, which contains
+ the list of ranges which are currently suspended. On receiving
+ RESYNC_START, the node adds the range to the suspend_list. Similarly,
+ when the node performing resync finishes, it send RESYNC_FINISHED
+ to other nodes and other nodes remove the corresponding entry from
+ the suspend_list.
+
+ A helper function, should_suspend() can be used to check if a particular
+ I/O range should be suspended or not.
+
+4.2 Device Failure
+ Device failures are handled and communicated with the metadata update
+ routine.
+
+5. Adding a new Device
+For adding a new device, it is necessary that all nodes "see" the new device
+to be added. For this, the following algorithm is used:
+
+ 1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
+ ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD)
+ 2. Node 1 sends NEWDISK with uuid and slot number
+ 3. Other nodes issue kobject_uevent_env with uuid and slot number
+ (Steps 4,5 could be a udev rule)
+ 4. In userspace, the node searches for the disk, perhaps
+ using blkid -t SUB_UUID=""
+ 5. Other nodes issue either of the following depending on whether the disk
+ was found:
+ ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
+ disc.number set to slot number)
+ ioctl(CLUSTERED_DISK_NACK)
+ 6. Other nodes drop lock on no-new-devs (CR) if device is found
+ 7. Node 1 attempts EX lock on no-new-devs
+ 8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk
+ as SpareLocal
+ 9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED
+ 10. Other nodes get the information whether a disk is added or not
+ by the following METADATA_UPDATED.
diff --git a/Documentation/md.txt b/Documentation/md.txt
new file mode 100644
index 000000000..f925666e4
--- /dev/null
+++ b/Documentation/md.txt
@@ -0,0 +1,613 @@
+Tools that manage md devices can be found at
+ http://www.kernel.org/pub/linux/utils/raid/
+
+
+Boot time assembly of RAID arrays
+---------------------------------
+
+You can boot with your md device with the following kernel command
+lines:
+
+for old raid arrays without persistent superblocks:
+ md=<md device no.>,<raid level>,<chunk size factor>,<fault level>,dev0,dev1,...,devn
+
+for raid arrays with persistent superblocks
+ md=<md device no.>,dev0,dev1,...,devn
+or, to assemble a partitionable array:
+ md=d<md device no.>,dev0,dev1,...,devn
+
+md device no. = the number of the md device ...
+ 0 means md0,
+ 1 md1,
+ 2 md2,
+ 3 md3,
+ 4 md4
+
+raid level = -1 linear mode
+ 0 striped mode
+ other modes are only supported with persistent super blocks
+
+chunk size factor = (raid-0 and raid-1 only)
+ Set the chunk size as 4k << n.
+
+fault level = totally ignored
+
+dev0-devn: e.g. /dev/hda1,/dev/hdc1,/dev/sda1,/dev/sdb1
+
+A possible loadlin line (Harald Hoyer <HarryH@Royal.Net>) looks like this:
+
+e:\loadlin\loadlin e:\zimage root=/dev/md0 md=0,0,4,0,/dev/hdb2,/dev/hdc3 ro
+
+
+Boot time autodetection of RAID arrays
+--------------------------------------
+
+When md is compiled into the kernel (not as module), partitions of
+type 0xfd are scanned and automatically assembled into RAID arrays.
+This autodetection may be suppressed with the kernel parameter
+"raid=noautodetect". As of kernel 2.6.9, only drives with a type 0
+superblock can be autodetected and run at boot time.
+
+The kernel parameter "raid=partitionable" (or "raid=part") means
+that all auto-detected arrays are assembled as partitionable.
+
+Boot time assembly of degraded/dirty arrays
+-------------------------------------------
+
+If a raid5 or raid6 array is both dirty and degraded, it could have
+undetectable data corruption. This is because the fact that it is
+'dirty' means that the parity cannot be trusted, and the fact that it
+is degraded means that some datablocks are missing and cannot reliably
+be reconstructed (due to no parity).
+
+For this reason, md will normally refuse to start such an array. This
+requires the sysadmin to take action to explicitly start the array
+despite possible corruption. This is normally done with
+ mdadm --assemble --force ....
+
+This option is not really available if the array has the root
+filesystem on it. In order to support this booting from such an
+array, md supports a module parameter "start_dirty_degraded" which,
+when set to 1, bypassed the checks and will allows dirty degraded
+arrays to be started.
+
+So, to boot with a root filesystem of a dirty degraded raid[56], use
+
+ md-mod.start_dirty_degraded=1
+
+
+Superblock formats
+------------------
+
+The md driver can support a variety of different superblock formats.
+Currently, it supports superblock formats "0.90.0" and the "md-1" format
+introduced in the 2.5 development series.
+
+The kernel will autodetect which format superblock is being used.
+
+Superblock format '0' is treated differently to others for legacy
+reasons - it is the original superblock format.
+
+
+General Rules - apply for all superblock formats
+------------------------------------------------
+
+An array is 'created' by writing appropriate superblocks to all
+devices.
+
+It is 'assembled' by associating each of these devices with an
+particular md virtual device. Once it is completely assembled, it can
+be accessed.
+
+An array should be created by a user-space tool. This will write
+superblocks to all devices. It will usually mark the array as
+'unclean', or with some devices missing so that the kernel md driver
+can create appropriate redundancy (copying in raid1, parity
+calculation in raid4/5).
+
+When an array is assembled, it is first initialized with the
+SET_ARRAY_INFO ioctl. This contains, in particular, a major and minor
+version number. The major version number selects which superblock
+format is to be used. The minor number might be used to tune handling
+of the format, such as suggesting where on each device to look for the
+superblock.
+
+Then each device is added using the ADD_NEW_DISK ioctl. This
+provides, in particular, a major and minor number identifying the
+device to add.
+
+The array is started with the RUN_ARRAY ioctl.
+
+Once started, new devices can be added. They should have an
+appropriate superblock written to them, and then be passed in with
+ADD_NEW_DISK.
+
+Devices that have failed or are not yet active can be detached from an
+array using HOT_REMOVE_DISK.
+
+
+Specific Rules that apply to format-0 super block arrays, and
+ arrays with no superblock (non-persistent).
+-------------------------------------------------------------
+
+An array can be 'created' by describing the array (level, chunksize
+etc) in a SET_ARRAY_INFO ioctl. This must have major_version==0 and
+raid_disks != 0.
+
+Then uninitialized devices can be added with ADD_NEW_DISK. The
+structure passed to ADD_NEW_DISK must specify the state of the device
+and its role in the array.
+
+Once started with RUN_ARRAY, uninitialized spares can be added with
+HOT_ADD_DISK.
+
+
+
+MD devices in sysfs
+-------------------
+md devices appear in sysfs (/sys) as regular block devices,
+e.g.
+ /sys/block/md0
+
+Each 'md' device will contain a subdirectory called 'md' which
+contains further md-specific information about the device.
+
+All md devices contain:
+ level
+ a text file indicating the 'raid level'. e.g. raid0, raid1,
+ raid5, linear, multipath, faulty.
+ If no raid level has been set yet (array is still being
+ assembled), the value will reflect whatever has been written
+ to it, which may be a name like the above, or may be a number
+ such as '0', '5', etc.
+
+ raid_disks
+ a text file with a simple number indicating the number of devices
+ in a fully functional array. If this is not yet known, the file
+ will be empty. If an array is being resized this will contain
+ the new number of devices.
+ Some raid levels allow this value to be set while the array is
+ active. This will reconfigure the array. Otherwise it can only
+ be set while assembling an array.
+ A change to this attribute will not be permitted if it would
+ reduce the size of the array. To reduce the number of drives
+ in an e.g. raid5, the array size must first be reduced by
+ setting the 'array_size' attribute.
+
+ chunk_size
+ This is the size in bytes for 'chunks' and is only relevant to
+ raid levels that involve striping (0,4,5,6,10). The address space
+ of the array is conceptually divided into chunks and consecutive
+ chunks are striped onto neighbouring devices.
+ The size should be at least PAGE_SIZE (4k) and should be a power
+ of 2. This can only be set while assembling an array
+
+ layout
+ The "layout" for the array for the particular level. This is
+ simply a number that is interpretted differently by different
+ levels. It can be written while assembling an array.
+
+ array_size
+ This can be used to artificially constrain the available space in
+ the array to be less than is actually available on the combined
+ devices. Writing a number (in Kilobytes) which is less than
+ the available size will set the size. Any reconfiguration of the
+ array (e.g. adding devices) will not cause the size to change.
+ Writing the word 'default' will cause the effective size of the
+ array to be whatever size is actually available based on
+ 'level', 'chunk_size' and 'component_size'.
+
+ This can be used to reduce the size of the array before reducing
+ the number of devices in a raid4/5/6, or to support external
+ metadata formats which mandate such clipping.
+
+ reshape_position
+ This is either "none" or a sector number within the devices of
+ the array where "reshape" is up to. If this is set, the three
+ attributes mentioned above (raid_disks, chunk_size, layout) can
+ potentially have 2 values, an old and a new value. If these
+ values differ, reading the attribute returns
+ new (old)
+ and writing will effect the 'new' value, leaving the 'old'
+ unchanged.
+
+ component_size
+ For arrays with data redundancy (i.e. not raid0, linear, faulty,
+ multipath), all components must be the same size - or at least
+ there must a size that they all provide space for. This is a key
+ part or the geometry of the array. It is measured in sectors
+ and can be read from here. Writing to this value may resize
+ the array if the personality supports it (raid1, raid5, raid6),
+ and if the component drives are large enough.
+
+ metadata_version
+ This indicates the format that is being used to record metadata
+ about the array. It can be 0.90 (traditional format), 1.0, 1.1,
+ 1.2 (newer format in varying locations) or "none" indicating that
+ the kernel isn't managing metadata at all.
+ Alternately it can be "external:" followed by a string which
+ is set by user-space. This indicates that metadata is managed
+ by a user-space program. Any device failure or other event that
+ requires a metadata update will cause array activity to be
+ suspended until the event is acknowledged.
+
+ resync_start
+ The point at which resync should start. If no resync is needed,
+ this will be a very large number (or 'none' since 2.6.30-rc1). At
+ array creation it will default to 0, though starting the array as
+ 'clean' will set it much larger.
+
+ new_dev
+ This file can be written but not read. The value written should
+ be a block device number as major:minor. e.g. 8:0
+ This will cause that device to be attached to the array, if it is
+ available. It will then appear at md/dev-XXX (depending on the
+ name of the device) and further configuration is then possible.
+
+ safe_mode_delay
+ When an md array has seen no write requests for a certain period
+ of time, it will be marked as 'clean'. When another write
+ request arrives, the array is marked as 'dirty' before the write
+ commences. This is known as 'safe_mode'.
+ The 'certain period' is controlled by this file which stores the
+ period as a number of seconds. The default is 200msec (0.200).
+ Writing a value of 0 disables safemode.
+
+ array_state
+ This file contains a single word which describes the current
+ state of the array. In many cases, the state can be set by
+ writing the word for the desired state, however some states
+ cannot be explicitly set, and some transitions are not allowed.
+
+ Select/poll works on this file. All changes except between
+ active_idle and active (which can be frequent and are not
+ very interesting) are notified. active->active_idle is
+ reported if the metadata is externally managed.
+
+ clear
+ No devices, no size, no level
+ Writing is equivalent to STOP_ARRAY ioctl
+ inactive
+ May have some settings, but array is not active
+ all IO results in error
+ When written, doesn't tear down array, but just stops it
+ suspended (not supported yet)
+ All IO requests will block. The array can be reconfigured.
+ Writing this, if accepted, will block until array is quiessent
+ readonly
+ no resync can happen. no superblocks get written.
+ write requests fail
+ read-auto
+ like readonly, but behaves like 'clean' on a write request.
+
+ clean - no pending writes, but otherwise active.
+ When written to inactive array, starts without resync
+ If a write request arrives then
+ if metadata is known, mark 'dirty' and switch to 'active'.
+ if not known, block and switch to write-pending
+ If written to an active array that has pending writes, then fails.
+ active
+ fully active: IO and resync can be happening.
+ When written to inactive array, starts with resync
+
+ write-pending
+ clean, but writes are blocked waiting for 'active' to be written.
+
+ active-idle
+ like active, but no writes have been seen for a while (safe_mode_delay).
+
+ bitmap/location
+ This indicates where the write-intent bitmap for the array is
+ stored.
+ It can be one of "none", "file" or "[+-]N".
+ "file" may later be extended to "file:/file/name"
+ "[+-]N" means that many sectors from the start of the metadata.
+ This is replicated on all devices. For arrays with externally
+ managed metadata, the offset is from the beginning of the
+ device.
+ bitmap/chunksize
+ The size, in bytes, of the chunk which will be represented by a
+ single bit. For RAID456, it is a portion of an individual
+ device. For RAID10, it is a portion of the array. For RAID1, it
+ is both (they come to the same thing).
+ bitmap/time_base
+ The time, in seconds, between looking for bits in the bitmap to
+ be cleared. In the current implementation, a bit will be cleared
+ between 2 and 3 times "time_base" after all the covered blocks
+ are known to be in-sync.
+ bitmap/backlog
+ When write-mostly devices are active in a RAID1, write requests
+ to those devices proceed in the background - the filesystem (or
+ other user of the device) does not have to wait for them.
+ 'backlog' sets a limit on the number of concurrent background
+ writes. If there are more than this, new writes will by
+ synchronous.
+ bitmap/metadata
+ This can be either 'internal' or 'external'.
+ 'internal' is the default and means the metadata for the bitmap
+ is stored in the first 256 bytes of the allocated space and is
+ managed by the md module.
+ 'external' means that bitmap metadata is managed externally to
+ the kernel (i.e. by some userspace program)
+ bitmap/can_clear
+ This is either 'true' or 'false'. If 'true', then bits in the
+ bitmap will be cleared when the corresponding blocks are thought
+ to be in-sync. If 'false', bits will never be cleared.
+ This is automatically set to 'false' if a write happens on a
+ degraded array, or if the array becomes degraded during a write.
+ When metadata is managed externally, it should be set to true
+ once the array becomes non-degraded, and this fact has been
+ recorded in the metadata.
+
+
+
+
+As component devices are added to an md array, they appear in the 'md'
+directory as new directories named
+ dev-XXX
+where XXX is a name that the kernel knows for the device, e.g. hdb1.
+Each directory contains:
+
+ block
+ a symlink to the block device in /sys/block, e.g.
+ /sys/block/md0/md/dev-hdb1/block -> ../../../../block/hdb/hdb1
+
+ super
+ A file containing an image of the superblock read from, or
+ written to, that device.
+
+ state
+ A file recording the current state of the device in the array
+ which can be a comma separated list of
+ faulty - device has been kicked from active use due to
+ a detected fault, or it has unacknowledged bad
+ blocks
+ in_sync - device is a fully in-sync member of the array
+ writemostly - device will only be subject to read
+ requests if there are no other options.
+ This applies only to raid1 arrays.
+ blocked - device has failed, and the failure hasn't been
+ acknowledged yet by the metadata handler.
+ Writes that would write to this device if
+ it were not faulty are blocked.
+ spare - device is working, but not a full member.
+ This includes spares that are in the process
+ of being recovered to
+ write_error - device has ever seen a write error.
+ want_replacement - device is (mostly) working but probably
+ should be replaced, either due to errors or
+ due to user request.
+ replacement - device is a replacement for another active
+ device with same raid_disk.
+
+
+ This list may grow in future.
+ This can be written to.
+ Writing "faulty" simulates a failure on the device.
+ Writing "remove" removes the device from the array.
+ Writing "writemostly" sets the writemostly flag.
+ Writing "-writemostly" clears the writemostly flag.
+ Writing "blocked" sets the "blocked" flag.
+ Writing "-blocked" clears the "blocked" flags and allows writes
+ to complete and possibly simulates an error.
+ Writing "in_sync" sets the in_sync flag.
+ Writing "write_error" sets writeerrorseen flag.
+ Writing "-write_error" clears writeerrorseen flag.
+ Writing "want_replacement" is allowed at any time except to a
+ replacement device or a spare. It sets the flag.
+ Writing "-want_replacement" is allowed at any time. It clears
+ the flag.
+ Writing "replacement" or "-replacement" is only allowed before
+ starting the array. It sets or clears the flag.
+
+
+ This file responds to select/poll. Any change to 'faulty'
+ or 'blocked' causes an event.
+
+ errors
+ An approximate count of read errors that have been detected on
+ this device but have not caused the device to be evicted from
+ the array (either because they were corrected or because they
+ happened while the array was read-only). When using version-1
+ metadata, this value persists across restarts of the array.
+
+ This value can be written while assembling an array thus
+ providing an ongoing count for arrays with metadata managed by
+ userspace.
+
+ slot
+ This gives the role that the device has in the array. It will
+ either be 'none' if the device is not active in the array
+ (i.e. is a spare or has failed) or an integer less than the
+ 'raid_disks' number for the array indicating which position
+ it currently fills. This can only be set while assembling an
+ array. A device for which this is set is assumed to be working.
+
+ offset
+ This gives the location in the device (in sectors from the
+ start) where data from the array will be stored. Any part of
+ the device before this offset is not touched, unless it is
+ used for storing metadata (Formats 1.1 and 1.2).
+
+ size
+ The amount of the device, after the offset, that can be used
+ for storage of data. This will normally be the same as the
+ component_size. This can be written while assembling an
+ array. If a value less than the current component_size is
+ written, it will be rejected.
+
+ recovery_start
+ When the device is not 'in_sync', this records the number of
+ sectors from the start of the device which are known to be
+ correct. This is normally zero, but during a recovery
+ operation it will steadily increase, and if the recovery is
+ interrupted, restoring this value can cause recovery to
+ avoid repeating the earlier blocks. With v1.x metadata, this
+ value is saved and restored automatically.
+
+ This can be set whenever the device is not an active member of
+ the array, either before the array is activated, or before
+ the 'slot' is set.
+
+ Setting this to 'none' is equivalent to setting 'in_sync'.
+ Setting to any other value also clears the 'in_sync' flag.
+
+ bad_blocks
+ This gives the list of all known bad blocks in the form of
+ start address and length (in sectors respectively). If output
+ is too big to fit in a page, it will be truncated. Writing
+ "sector length" to this file adds new acknowledged (i.e.
+ recorded to disk safely) bad blocks.
+
+ unacknowledged_bad_blocks
+ This gives the list of known-but-not-yet-saved-to-disk bad
+ blocks in the same form of 'bad_blocks'. If output is too big
+ to fit in a page, it will be truncated. Writing to this file
+ adds bad blocks without acknowledging them. This is largely
+ for testing.
+
+
+
+An active md device will also contain an entry for each active device
+in the array. These are named
+
+ rdNN
+
+where 'NN' is the position in the array, starting from 0.
+So for a 3 drive array there will be rd0, rd1, rd2.
+These are symbolic links to the appropriate 'dev-XXX' entry.
+Thus, for example,
+ cat /sys/block/md*/md/rd*/state
+will show 'in_sync' on every line.
+
+
+
+Active md devices for levels that support data redundancy (1,4,5,6,10)
+also have
+
+ sync_action
+ a text file that can be used to monitor and control the rebuild
+ process. It contains one word which can be one of:
+ resync - redundancy is being recalculated after unclean
+ shutdown or creation
+ recover - a hot spare is being built to replace a
+ failed/missing device
+ idle - nothing is happening
+ check - A full check of redundancy was requested and is
+ happening. This reads all blocks and checks
+ them. A repair may also happen for some raid
+ levels.
+ repair - A full check and repair is happening. This is
+ similar to 'resync', but was requested by the
+ user, and the write-intent bitmap is NOT used to
+ optimise the process.
+
+ This file is writable, and each of the strings that could be
+ read are meaningful for writing.
+
+ 'idle' will stop an active resync/recovery etc. There is no
+ guarantee that another resync/recovery may not be automatically
+ started again, though some event will be needed to trigger
+ this.
+ 'resync' or 'recovery' can be used to restart the
+ corresponding operation if it was stopped with 'idle'.
+ 'check' and 'repair' will start the appropriate process
+ providing the current state is 'idle'.
+
+ This file responds to select/poll. Any important change in the value
+ triggers a poll event. Sometimes the value will briefly be
+ "recover" if a recovery seems to be needed, but cannot be
+ achieved. In that case, the transition to "recover" isn't
+ notified, but the transition away is.
+
+ degraded
+ This contains a count of the number of devices by which the
+ arrays is degraded. So an optimal array will show '0'. A
+ single failed/missing drive will show '1', etc.
+ This file responds to select/poll, any increase or decrease
+ in the count of missing devices will trigger an event.
+
+ mismatch_count
+ When performing 'check' and 'repair', and possibly when
+ performing 'resync', md will count the number of errors that are
+ found. The count in 'mismatch_cnt' is the number of sectors
+ that were re-written, or (for 'check') would have been
+ re-written. As most raid levels work in units of pages rather
+ than sectors, this may be larger than the number of actual errors
+ by a factor of the number of sectors in a page.
+
+ bitmap_set_bits
+ If the array has a write-intent bitmap, then writing to this
+ attribute can set bits in the bitmap, indicating that a resync
+ would need to check the corresponding blocks. Either individual
+ numbers or start-end pairs can be written. Multiple numbers
+ can be separated by a space.
+ Note that the numbers are 'bit' numbers, not 'block' numbers.
+ They should be scaled by the bitmap_chunksize.
+
+ sync_speed_min
+ sync_speed_max
+ This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
+ however they only apply to the particular array.
+ If no value has been written to these, of if the word 'system'
+ is written, then the system-wide value is used. If a value,
+ in kibibytes-per-second is written, then it is used.
+ When the files are read, they show the currently active value
+ followed by "(local)" or "(system)" depending on whether it is
+ a locally set or system-wide value.
+
+ sync_completed
+ This shows the number of sectors that have been completed of
+ whatever the current sync_action is, followed by the number of
+ sectors in total that could need to be processed. The two
+ numbers are separated by a '/' thus effectively showing one
+ value, a fraction of the process that is complete.
+ A 'select' on this attribute will return when resync completes,
+ when it reaches the current sync_max (below) and possibly at
+ other times.
+
+ sync_speed
+ This shows the current actual speed, in K/sec, of the current
+ sync_action. It is averaged over the last 30 seconds.
+
+ suspend_lo
+ suspend_hi
+ The two values, given as numbers of sectors, indicate a range
+ within the array where IO will be blocked. This is currently
+ only supported for raid4/5/6.
+
+ sync_min
+ sync_max
+ The two values, given as numbers of sectors, indicate a range
+ within the array where 'check'/'repair' will operate. Must be
+ a multiple of chunk_size. When it reaches "sync_max" it will
+ pause, rather than complete.
+ You can use 'select' or 'poll' on "sync_completed" to wait for
+ that number to reach sync_max. Then you can either increase
+ "sync_max", or can write 'idle' to "sync_action".
+
+ The value of 'max' for "sync_max" effectively disables the limit.
+ When a resync is active, the value can only ever be increased,
+ never decreased.
+ The value of '0' is the minimum for "sync_min".
+
+
+
+Each active md device may also have attributes specific to the
+personality module that manages it.
+These are specific to the implementation of the module and could
+change substantially if the implementation changes.
+
+These currently include
+
+ stripe_cache_size (currently raid5 only)
+ number of entries in the stripe cache. This is writable, but
+ there are upper and lower limits (32768, 16). Default is 128.
+ strip_cache_active (currently raid5 only)
+ number of active entries in the stripe cache
+ preread_bypass_threshold (currently raid5 only)
+ number of times a stripe requiring preread will be bypassed by
+ a stripe that does not require preread. For fairness defaults
+ to 1. Setting this to 0 disables bypass accounting and
+ requires preread stripes to wait until all full-width stripe-
+ writes are complete. Valid values are 0 to stripe_cache_size.
diff --git a/Documentation/media-framework.txt b/Documentation/media-framework.txt
new file mode 100644
index 000000000..f552a75c0
--- /dev/null
+++ b/Documentation/media-framework.txt
@@ -0,0 +1,372 @@
+Linux kernel media framework
+============================
+
+This document describes the Linux kernel media framework, its data structures,
+functions and their usage.
+
+
+Introduction
+------------
+
+The media controller API is documented in DocBook format in
+Documentation/DocBook/media/v4l/media-controller.xml. This document will focus
+on the kernel-side implementation of the media framework.
+
+
+Abstract media device model
+---------------------------
+
+Discovering a device internal topology, and configuring it at runtime, is one
+of the goals of the media framework. To achieve this, hardware devices are
+modelled as an oriented graph of building blocks called entities connected
+through pads.
+
+An entity is a basic media hardware building block. It can correspond to
+a large variety of logical blocks such as physical hardware devices
+(CMOS sensor for instance), logical hardware devices (a building block
+in a System-on-Chip image processing pipeline), DMA channels or physical
+connectors.
+
+A pad is a connection endpoint through which an entity can interact with
+other entities. Data (not restricted to video) produced by an entity
+flows from the entity's output to one or more entity inputs. Pads should
+not be confused with physical pins at chip boundaries.
+
+A link is a point-to-point oriented connection between two pads, either
+on the same entity or on different entities. Data flows from a source
+pad to a sink pad.
+
+
+Media device
+------------
+
+A media device is represented by a struct media_device instance, defined in
+include/media/media-device.h. Allocation of the structure is handled by the
+media device driver, usually by embedding the media_device instance in a
+larger driver-specific structure.
+
+Drivers register media device instances by calling
+
+ media_device_register(struct media_device *mdev);
+
+The caller is responsible for initializing the media_device structure before
+registration. The following fields must be set:
+
+ - dev must point to the parent device (usually a pci_dev, usb_interface or
+ platform_device instance).
+
+ - model must be filled with the device model name as a NUL-terminated UTF-8
+ string. The device/model revision must not be stored in this field.
+
+The following fields are optional:
+
+ - serial is a unique serial number stored as a NUL-terminated ASCII string.
+ The field is big enough to store a GUID in text form. If the hardware
+ doesn't provide a unique serial number this field must be left empty.
+
+ - bus_info represents the location of the device in the system as a
+ NUL-terminated ASCII string. For PCI/PCIe devices bus_info must be set to
+ "PCI:" (or "PCIe:") followed by the value of pci_name(). For USB devices,
+ the usb_make_path() function must be used. This field is used by
+ applications to distinguish between otherwise identical devices that don't
+ provide a serial number.
+
+ - hw_revision is the hardware device revision in a driver-specific format.
+ When possible the revision should be formatted with the KERNEL_VERSION
+ macro.
+
+ - driver_version is formatted with the KERNEL_VERSION macro. The version
+ minor must be incremented when new features are added to the userspace API
+ without breaking binary compatibility. The version major must be
+ incremented when binary compatibility is broken.
+
+Upon successful registration a character device named media[0-9]+ is created.
+The device major and minor numbers are dynamic. The model name is exported as
+a sysfs attribute.
+
+Drivers unregister media device instances by calling
+
+ media_device_unregister(struct media_device *mdev);
+
+Unregistering a media device that hasn't been registered is *NOT* safe.
+
+
+Entities, pads and links
+------------------------
+
+- Entities
+
+Entities are represented by a struct media_entity instance, defined in
+include/media/media-entity.h. The structure is usually embedded into a
+higher-level structure, such as a v4l2_subdev or video_device instance,
+although drivers can allocate entities directly.
+
+Drivers initialize entities by calling
+
+ media_entity_init(struct media_entity *entity, u16 num_pads,
+ struct media_pad *pads, u16 extra_links);
+
+The media_entity name, type, flags, revision and group_id fields can be
+initialized before or after calling media_entity_init. Entities embedded in
+higher-level standard structures can have some of those fields set by the
+higher-level framework.
+
+As the number of pads is known in advance, the pads array is not allocated
+dynamically but is managed by the entity driver. Most drivers will embed the
+pads array in a driver-specific structure, avoiding dynamic allocation.
+
+Drivers must set the direction of every pad in the pads array before calling
+media_entity_init. The function will initialize the other pads fields.
+
+Unlike the number of pads, the total number of links isn't always known in
+advance by the entity driver. As an initial estimate, media_entity_init
+pre-allocates a number of links equal to the number of pads plus an optional
+number of extra links. The links array will be reallocated if it grows beyond
+the initial estimate.
+
+Drivers register entities with a media device by calling
+
+ media_device_register_entity(struct media_device *mdev,
+ struct media_entity *entity);
+
+Entities are identified by a unique positive integer ID. Drivers can provide an
+ID by filling the media_entity id field prior to registration, or request the
+media controller framework to assign an ID automatically. Drivers that provide
+IDs manually must ensure that all IDs are unique. IDs are not guaranteed to be
+contiguous even when they are all assigned automatically by the framework.
+
+Drivers unregister entities by calling
+
+ media_device_unregister_entity(struct media_entity *entity);
+
+Unregistering an entity will not change the IDs of the other entities, and the
+ID will never be reused for a newly registered entity.
+
+When a media device is unregistered, all its entities are unregistered
+automatically. No manual entities unregistration is then required.
+
+Drivers free resources associated with an entity by calling
+
+ media_entity_cleanup(struct media_entity *entity);
+
+This function must be called during the cleanup phase after unregistering the
+entity. Note that the media_entity instance itself must be freed explicitly by
+the driver if required.
+
+Entities have flags that describe the entity capabilities and state.
+
+ MEDIA_ENT_FL_DEFAULT indicates the default entity for a given type.
+ This can be used to report the default audio and video devices or the
+ default camera sensor.
+
+Logical entity groups can be defined by setting the group ID of all member
+entities to the same non-zero value. An entity group serves no purpose in the
+kernel, but is reported to userspace during entities enumeration. The group_id
+field belongs to the media device driver and must not by touched by entity
+drivers.
+
+Media device drivers should define groups if several entities are logically
+bound together. Example usages include reporting
+
+ - ALSA, VBI and video nodes that carry the same media stream
+ - lens and flash controllers associated with a sensor
+
+- Pads
+
+Pads are represented by a struct media_pad instance, defined in
+include/media/media-entity.h. Each entity stores its pads in a pads array
+managed by the entity driver. Drivers usually embed the array in a
+driver-specific structure.
+
+Pads are identified by their entity and their 0-based index in the pads array.
+Both information are stored in the media_pad structure, making the media_pad
+pointer the canonical way to store and pass link references.
+
+Pads have flags that describe the pad capabilities and state.
+
+ MEDIA_PAD_FL_SINK indicates that the pad supports sinking data.
+ MEDIA_PAD_FL_SOURCE indicates that the pad supports sourcing data.
+
+One and only one of MEDIA_PAD_FL_SINK and MEDIA_PAD_FL_SOURCE must be set for
+each pad.
+
+- Links
+
+Links are represented by a struct media_link instance, defined in
+include/media/media-entity.h. Each entity stores all links originating at or
+targeting any of its pads in a links array. A given link is thus stored
+twice, once in the source entity and once in the target entity. The array is
+pre-allocated and grows dynamically as needed.
+
+Drivers create links by calling
+
+ media_entity_create_link(struct media_entity *source, u16 source_pad,
+ struct media_entity *sink, u16 sink_pad,
+ u32 flags);
+
+An entry in the link array of each entity is allocated and stores pointers
+to source and sink pads.
+
+Links have flags that describe the link capabilities and state.
+
+ MEDIA_LNK_FL_ENABLED indicates that the link is enabled and can be used
+ to transfer media data. When two or more links target a sink pad, only
+ one of them can be enabled at a time.
+ MEDIA_LNK_FL_IMMUTABLE indicates that the link enabled state can't be
+ modified at runtime. If MEDIA_LNK_FL_IMMUTABLE is set, then
+ MEDIA_LNK_FL_ENABLED must also be set since an immutable link is always
+ enabled.
+
+
+Graph traversal
+---------------
+
+The media framework provides APIs to iterate over entities in a graph.
+
+To iterate over all entities belonging to a media device, drivers can use the
+media_device_for_each_entity macro, defined in include/media/media-device.h.
+
+ struct media_entity *entity;
+
+ media_device_for_each_entity(entity, mdev) {
+ /* entity will point to each entity in turn */
+ ...
+ }
+
+Drivers might also need to iterate over all entities in a graph that can be
+reached only through enabled links starting at a given entity. The media
+framework provides a depth-first graph traversal API for that purpose.
+
+Note that graphs with cycles (whether directed or undirected) are *NOT*
+supported by the graph traversal API. To prevent infinite loops, the graph
+traversal code limits the maximum depth to MEDIA_ENTITY_ENUM_MAX_DEPTH,
+currently defined as 16.
+
+Drivers initiate a graph traversal by calling
+
+ media_entity_graph_walk_start(struct media_entity_graph *graph,
+ struct media_entity *entity);
+
+The graph structure, provided by the caller, is initialized to start graph
+traversal at the given entity.
+
+Drivers can then retrieve the next entity by calling
+
+ media_entity_graph_walk_next(struct media_entity_graph *graph);
+
+When the graph traversal is complete the function will return NULL.
+
+Graph traversal can be interrupted at any moment. No cleanup function call is
+required and the graph structure can be freed normally.
+
+Helper functions can be used to find a link between two given pads, or a pad
+connected to another pad through an enabled link
+
+ media_entity_find_link(struct media_pad *source,
+ struct media_pad *sink);
+
+ media_entity_remote_pad(struct media_pad *pad);
+
+Refer to the kerneldoc documentation for more information.
+
+
+Use count and power handling
+----------------------------
+
+Due to the wide differences between drivers regarding power management needs,
+the media controller does not implement power management. However, the
+media_entity structure includes a use_count field that media drivers can use to
+track the number of users of every entity for power management needs.
+
+The use_count field is owned by media drivers and must not be touched by entity
+drivers. Access to the field must be protected by the media device graph_mutex
+lock.
+
+
+Links setup
+-----------
+
+Link properties can be modified at runtime by calling
+
+ media_entity_setup_link(struct media_link *link, u32 flags);
+
+The flags argument contains the requested new link flags.
+
+The only configurable property is the ENABLED link flag to enable/disable a
+link. Links marked with the IMMUTABLE link flag can not be enabled or disabled.
+
+When a link is enabled or disabled, the media framework calls the
+link_setup operation for the two entities at the source and sink of the link,
+in that order. If the second link_setup call fails, another link_setup call is
+made on the first entity to restore the original link flags.
+
+Media device drivers can be notified of link setup operations by setting the
+media_device::link_notify pointer to a callback function. If provided, the
+notification callback will be called before enabling and after disabling
+links.
+
+Entity drivers must implement the link_setup operation if any of their links
+is non-immutable. The operation must either configure the hardware or store
+the configuration information to be applied later.
+
+Link configuration must not have any side effect on other links. If an enabled
+link at a sink pad prevents another link at the same pad from being enabled,
+the link_setup operation must return -EBUSY and can't implicitly disable the
+first enabled link.
+
+
+Pipelines and media streams
+---------------------------
+
+When starting streaming, drivers must notify all entities in the pipeline to
+prevent link states from being modified during streaming by calling
+
+ media_entity_pipeline_start(struct media_entity *entity,
+ struct media_pipeline *pipe);
+
+The function will mark all entities connected to the given entity through
+enabled links, either directly or indirectly, as streaming.
+
+The media_pipeline instance pointed to by the pipe argument will be stored in
+every entity in the pipeline. Drivers should embed the media_pipeline structure
+in higher-level pipeline structures and can then access the pipeline through
+the media_entity pipe field.
+
+Calls to media_entity_pipeline_start() can be nested. The pipeline pointer must
+be identical for all nested calls to the function.
+
+media_entity_pipeline_start() may return an error. In that case, it will
+clean up any of the changes it did by itself.
+
+When stopping the stream, drivers must notify the entities with
+
+ media_entity_pipeline_stop(struct media_entity *entity);
+
+If multiple calls to media_entity_pipeline_start() have been made the same
+number of media_entity_pipeline_stop() calls are required to stop streaming. The
+media_entity pipe field is reset to NULL on the last nested stop call.
+
+Link configuration will fail with -EBUSY by default if either end of the link is
+a streaming entity. Links that can be modified while streaming must be marked
+with the MEDIA_LNK_FL_DYNAMIC flag.
+
+If other operations need to be disallowed on streaming entities (such as
+changing entities configuration parameters) drivers can explicitly check the
+media_entity stream_count field to find out if an entity is streaming. This
+operation must be done with the media_device graph_mutex held.
+
+
+Link validation
+---------------
+
+Link validation is performed by media_entity_pipeline_start() for any
+entity which has sink pads in the pipeline. The
+media_entity::link_validate() callback is used for that purpose. In
+link_validate() callback, entity driver should check that the properties of
+the source pad of the connected entity and its own sink pad match. It is up
+to the type of the entity (and in the end, the properties of the hardware)
+what matching actually means.
+
+Subsystems should facilitate link validation by providing subsystem specific
+helper functions to provide easy access for commonly needed information, and
+in the end provide a way to use driver-specific callbacks.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
new file mode 100644
index 000000000..f95746189
--- /dev/null
+++ b/Documentation/memory-barriers.txt
@@ -0,0 +1,3064 @@
+ ============================
+ LINUX KERNEL MEMORY BARRIERS
+ ============================
+
+By: David Howells <dhowells@redhat.com>
+ Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+Contents:
+
+ (*) Abstract memory access model.
+
+ - Device operations.
+ - Guarantees.
+
+ (*) What are memory barriers?
+
+ - Varieties of memory barrier.
+ - What may not be assumed about memory barriers?
+ - Data dependency barriers.
+ - Control dependencies.
+ - SMP barrier pairing.
+ - Examples of memory barrier sequences.
+ - Read memory barriers vs load speculation.
+ - Transitivity
+
+ (*) Explicit kernel barriers.
+
+ - Compiler barrier.
+ - CPU memory barriers.
+ - MMIO write barrier.
+
+ (*) Implicit kernel memory barriers.
+
+ - Locking functions.
+ - Interrupt disabling functions.
+ - Sleep and wake-up functions.
+ - Miscellaneous functions.
+
+ (*) Inter-CPU locking barrier effects.
+
+ - Locks vs memory accesses.
+ - Locks vs I/O accesses.
+
+ (*) Where are memory barriers needed?
+
+ - Interprocessor interaction.
+ - Atomic operations.
+ - Accessing devices.
+ - Interrupts.
+
+ (*) Kernel I/O barrier effects.
+
+ (*) Assumed minimum execution ordering model.
+
+ (*) The effects of the cpu cache.
+
+ - Cache coherency.
+ - Cache coherency vs DMA.
+ - Cache coherency vs MMIO.
+
+ (*) The things CPUs get up to.
+
+ - And then there's the Alpha.
+
+ (*) Example uses.
+
+ - Circular buffers.
+
+ (*) References.
+
+
+============================
+ABSTRACT MEMORY ACCESS MODEL
+============================
+
+Consider the following abstract model of the system:
+
+ : :
+ : :
+ : :
+ +-------+ : +--------+ : +-------+
+ | | : | | : | |
+ | | : | | : | |
+ | CPU 1 |<----->| Memory |<----->| CPU 2 |
+ | | : | | : | |
+ | | : | | : | |
+ +-------+ : +--------+ : +-------+
+ ^ : ^ : ^
+ | : | : |
+ | : | : |
+ | : v : |
+ | : +--------+ : |
+ | : | | : |
+ | : | | : |
+ +---------->| Device |<----------+
+ : | | :
+ : | | :
+ : +--------+ :
+ : :
+
+Each CPU executes a program that generates memory access operations. In the
+abstract CPU, memory operation ordering is very relaxed, and a CPU may actually
+perform the memory operations in any order it likes, provided program causality
+appears to be maintained. Similarly, the compiler may also arrange the
+instructions it emits in any order it likes, provided it doesn't affect the
+apparent operation of the program.
+
+So in the above diagram, the effects of the memory operations performed by a
+CPU are perceived by the rest of the system as the operations cross the
+interface between the CPU and rest of the system (the dotted lines).
+
+
+For example, consider the following sequence of events:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1; B == 2 }
+ A = 3; x = B;
+ B = 4; y = A;
+
+The set of accesses as seen by the memory system in the middle can be arranged
+in 24 different combinations:
+
+ STORE A=3, STORE B=4, y=LOAD A->3, x=LOAD B->4
+ STORE A=3, STORE B=4, x=LOAD B->4, y=LOAD A->3
+ STORE A=3, y=LOAD A->3, STORE B=4, x=LOAD B->4
+ STORE A=3, y=LOAD A->3, x=LOAD B->2, STORE B=4
+ STORE A=3, x=LOAD B->2, STORE B=4, y=LOAD A->3
+ STORE A=3, x=LOAD B->2, y=LOAD A->3, STORE B=4
+ STORE B=4, STORE A=3, y=LOAD A->3, x=LOAD B->4
+ STORE B=4, ...
+ ...
+
+and can thus result in four different combinations of values:
+
+ x == 2, y == 1
+ x == 2, y == 3
+ x == 4, y == 1
+ x == 4, y == 3
+
+
+Furthermore, the stores committed by a CPU to the memory system may not be
+perceived by the loads made by another CPU in the same order as the stores were
+committed.
+
+
+As a further example, consider this sequence of events:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4; Q = P;
+ P = &B D = *Q;
+
+There is an obvious data dependency here, as the value loaded into D depends on
+the address retrieved from P by CPU 2. At the end of the sequence, any of the
+following results are possible:
+
+ (Q == &A) and (D == 1)
+ (Q == &B) and (D == 2)
+ (Q == &B) and (D == 4)
+
+Note that CPU 2 will never try and load C into D because the CPU will load P
+into Q before issuing the load of *Q.
+
+
+DEVICE OPERATIONS
+-----------------
+
+Some devices present their control interfaces as collections of memory
+locations, but the order in which the control registers are accessed is very
+important. For instance, imagine an ethernet card with a set of internal
+registers that are accessed through an address port register (A) and a data
+port register (D). To read internal register 5, the following code might then
+be used:
+
+ *A = 5;
+ x = *D;
+
+but this might show up as either of the following two sequences:
+
+ STORE *A = 5, x = LOAD *D
+ x = LOAD *D, STORE *A = 5
+
+the second of which will almost certainly result in a malfunction, since it set
+the address _after_ attempting to read the register.
+
+
+GUARANTEES
+----------
+
+There are some minimal guarantees that may be expected of a CPU:
+
+ (*) On any given CPU, dependent memory accesses will be issued in order, with
+ respect to itself. This means that for:
+
+ ACCESS_ONCE(Q) = P; smp_read_barrier_depends(); D = ACCESS_ONCE(*Q);
+
+ the CPU will issue the following memory operations:
+
+ Q = LOAD P, D = LOAD *Q
+
+ and always in that order. On most systems, smp_read_barrier_depends()
+ does nothing, but it is required for DEC Alpha. The ACCESS_ONCE()
+ is required to prevent compiler mischief. Please note that you
+ should normally use something like rcu_dereference() instead of
+ open-coding smp_read_barrier_depends().
+
+ (*) Overlapping loads and stores within a particular CPU will appear to be
+ ordered within that CPU. This means that for:
+
+ a = ACCESS_ONCE(*X); ACCESS_ONCE(*X) = b;
+
+ the CPU will only issue the following sequence of memory operations:
+
+ a = LOAD *X, STORE *X = b
+
+ And for:
+
+ ACCESS_ONCE(*X) = c; d = ACCESS_ONCE(*X);
+
+ the CPU will only issue:
+
+ STORE *X = c, d = LOAD *X
+
+ (Loads and stores overlap if they are targeted at overlapping pieces of
+ memory).
+
+And there are a number of things that _must_ or _must_not_ be assumed:
+
+ (*) It _must_not_ be assumed that the compiler will do what you want with
+ memory references that are not protected by ACCESS_ONCE(). Without
+ ACCESS_ONCE(), the compiler is within its rights to do all sorts
+ of "creative" transformations, which are covered in the Compiler
+ Barrier section.
+
+ (*) It _must_not_ be assumed that independent loads and stores will be issued
+ in the order given. This means that for:
+
+ X = *A; Y = *B; *D = Z;
+
+ we may get any of the following sequences:
+
+ X = LOAD *A, Y = LOAD *B, STORE *D = Z
+ X = LOAD *A, STORE *D = Z, Y = LOAD *B
+ Y = LOAD *B, X = LOAD *A, STORE *D = Z
+ Y = LOAD *B, STORE *D = Z, X = LOAD *A
+ STORE *D = Z, X = LOAD *A, Y = LOAD *B
+ STORE *D = Z, Y = LOAD *B, X = LOAD *A
+
+ (*) It _must_ be assumed that overlapping memory accesses may be merged or
+ discarded. This means that for:
+
+ X = *A; Y = *(A + 4);
+
+ we may get any one of the following sequences:
+
+ X = LOAD *A; Y = LOAD *(A + 4);
+ Y = LOAD *(A + 4); X = LOAD *A;
+ {X, Y} = LOAD {*A, *(A + 4) };
+
+ And for:
+
+ *A = X; *(A + 4) = Y;
+
+ we may get any of:
+
+ STORE *A = X; STORE *(A + 4) = Y;
+ STORE *(A + 4) = Y; STORE *A = X;
+ STORE {*A, *(A + 4) } = {X, Y};
+
+And there are anti-guarantees:
+
+ (*) These guarantees do not apply to bitfields, because compilers often
+ generate code to modify these using non-atomic read-modify-write
+ sequences. Do not attempt to use bitfields to synchronize parallel
+ algorithms.
+
+ (*) Even in cases where bitfields are protected by locks, all fields
+ in a given bitfield must be protected by one lock. If two fields
+ in a given bitfield are protected by different locks, the compiler's
+ non-atomic read-modify-write sequences can cause an update to one
+ field to corrupt the value of an adjacent field.
+
+ (*) These guarantees apply only to properly aligned and sized scalar
+ variables. "Properly sized" currently means variables that are
+ the same size as "char", "short", "int" and "long". "Properly
+ aligned" means the natural alignment, thus no constraints for
+ "char", two-byte alignment for "short", four-byte alignment for
+ "int", and either four-byte or eight-byte alignment for "long",
+ on 32-bit and 64-bit systems, respectively. Note that these
+ guarantees were introduced into the C11 standard, so beware when
+ using older pre-C11 compilers (for example, gcc 4.6). The portion
+ of the standard containing this guarantee is Section 3.14, which
+ defines "memory location" as follows:
+
+ memory location
+ either an object of scalar type, or a maximal sequence
+ of adjacent bit-fields all having nonzero width
+
+ NOTE 1: Two threads of execution can update and access
+ separate memory locations without interfering with
+ each other.
+
+ NOTE 2: A bit-field and an adjacent non-bit-field member
+ are in separate memory locations. The same applies
+ to two bit-fields, if one is declared inside a nested
+ structure declaration and the other is not, or if the two
+ are separated by a zero-length bit-field declaration,
+ or if they are separated by a non-bit-field member
+ declaration. It is not safe to concurrently update two
+ bit-fields in the same structure if all members declared
+ between them are also bit-fields, no matter what the
+ sizes of those intervening bit-fields happen to be.
+
+
+=========================
+WHAT ARE MEMORY BARRIERS?
+=========================
+
+As can be seen above, independent memory operations are effectively performed
+in random order, but this can be a problem for CPU-CPU interaction and for I/O.
+What is required is some way of intervening to instruct the compiler and the
+CPU to restrict the order.
+
+Memory barriers are such interventions. They impose a perceived partial
+ordering over the memory operations on either side of the barrier.
+
+Such enforcement is important because the CPUs and other devices in a system
+can use a variety of tricks to improve performance, including reordering,
+deferral and combination of memory operations; speculative loads; speculative
+branch prediction and various types of caching. Memory barriers are used to
+override or suppress these tricks, allowing the code to sanely control the
+interaction of multiple CPUs and/or devices.
+
+
+VARIETIES OF MEMORY BARRIER
+---------------------------
+
+Memory barriers come in four basic varieties:
+
+ (1) Write (or store) memory barriers.
+
+ A write memory barrier gives a guarantee that all the STORE operations
+ specified before the barrier will appear to happen before all the STORE
+ operations specified after the barrier with respect to the other
+ components of the system.
+
+ A write barrier is a partial ordering on stores only; it is not required
+ to have any effect on loads.
+
+ A CPU can be viewed as committing a sequence of store operations to the
+ memory system as time progresses. All stores before a write barrier will
+ occur in the sequence _before_ all the stores after the write barrier.
+
+ [!] Note that write barriers should normally be paired with read or data
+ dependency barriers; see the "SMP barrier pairing" subsection.
+
+
+ (2) Data dependency barriers.
+
+ A data dependency barrier is a weaker form of read barrier. In the case
+ where two loads are performed such that the second depends on the result
+ of the first (eg: the first load retrieves the address to which the second
+ load will be directed), a data dependency barrier would be required to
+ make sure that the target of the second load is updated before the address
+ obtained by the first load is accessed.
+
+ A data dependency barrier is a partial ordering on interdependent loads
+ only; it is not required to have any effect on stores, independent loads
+ or overlapping loads.
+
+ As mentioned in (1), the other CPUs in the system can be viewed as
+ committing sequences of stores to the memory system that the CPU being
+ considered can then perceive. A data dependency barrier issued by the CPU
+ under consideration guarantees that for any load preceding it, if that
+ load touches one of a sequence of stores from another CPU, then by the
+ time the barrier completes, the effects of all the stores prior to that
+ touched by the load will be perceptible to any loads issued after the data
+ dependency barrier.
+
+ See the "Examples of memory barrier sequences" subsection for diagrams
+ showing the ordering constraints.
+
+ [!] Note that the first load really has to have a _data_ dependency and
+ not a control dependency. If the address for the second load is dependent
+ on the first load, but the dependency is through a conditional rather than
+ actually loading the address itself, then it's a _control_ dependency and
+ a full read barrier or better is required. See the "Control dependencies"
+ subsection for more information.
+
+ [!] Note that data dependency barriers should normally be paired with
+ write barriers; see the "SMP barrier pairing" subsection.
+
+
+ (3) Read (or load) memory barriers.
+
+ A read barrier is a data dependency barrier plus a guarantee that all the
+ LOAD operations specified before the barrier will appear to happen before
+ all the LOAD operations specified after the barrier with respect to the
+ other components of the system.
+
+ A read barrier is a partial ordering on loads only; it is not required to
+ have any effect on stores.
+
+ Read memory barriers imply data dependency barriers, and so can substitute
+ for them.
+
+ [!] Note that read barriers should normally be paired with write barriers;
+ see the "SMP barrier pairing" subsection.
+
+
+ (4) General memory barriers.
+
+ A general memory barrier gives a guarantee that all the LOAD and STORE
+ operations specified before the barrier will appear to happen before all
+ the LOAD and STORE operations specified after the barrier with respect to
+ the other components of the system.
+
+ A general memory barrier is a partial ordering over both loads and stores.
+
+ General memory barriers imply both read and write memory barriers, and so
+ can substitute for either.
+
+
+And a couple of implicit varieties:
+
+ (5) ACQUIRE operations.
+
+ This acts as a one-way permeable barrier. It guarantees that all memory
+ operations after the ACQUIRE operation will appear to happen after the
+ ACQUIRE operation with respect to the other components of the system.
+ ACQUIRE operations include LOCK operations and smp_load_acquire()
+ operations.
+
+ Memory operations that occur before an ACQUIRE operation may appear to
+ happen after it completes.
+
+ An ACQUIRE operation should almost always be paired with a RELEASE
+ operation.
+
+
+ (6) RELEASE operations.
+
+ This also acts as a one-way permeable barrier. It guarantees that all
+ memory operations before the RELEASE operation will appear to happen
+ before the RELEASE operation with respect to the other components of the
+ system. RELEASE operations include UNLOCK operations and
+ smp_store_release() operations.
+
+ Memory operations that occur after a RELEASE operation may appear to
+ happen before it completes.
+
+ The use of ACQUIRE and RELEASE operations generally precludes the need
+ for other sorts of memory barrier (but note the exceptions mentioned in
+ the subsection "MMIO write barrier"). In addition, a RELEASE+ACQUIRE
+ pair is -not- guaranteed to act as a full memory barrier. However, after
+ an ACQUIRE on a given variable, all memory accesses preceding any prior
+ RELEASE on that same variable are guaranteed to be visible. In other
+ words, within a given variable's critical section, all accesses of all
+ previous critical sections for that variable are guaranteed to have
+ completed.
+
+ This means that ACQUIRE acts as a minimal "acquire" operation and
+ RELEASE acts as a minimal "release" operation.
+
+
+Memory barriers are only required where there's a possibility of interaction
+between two CPUs or between a CPU and a device. If it can be guaranteed that
+there won't be any such interaction in any particular piece of code, then
+memory barriers are unnecessary in that piece of code.
+
+
+Note that these are the _minimum_ guarantees. Different architectures may give
+more substantial guarantees, but they may _not_ be relied upon outside of arch
+specific code.
+
+
+WHAT MAY NOT BE ASSUMED ABOUT MEMORY BARRIERS?
+----------------------------------------------
+
+There are certain things that the Linux kernel memory barriers do not guarantee:
+
+ (*) There is no guarantee that any of the memory accesses specified before a
+ memory barrier will be _complete_ by the completion of a memory barrier
+ instruction; the barrier can be considered to draw a line in that CPU's
+ access queue that accesses of the appropriate type may not cross.
+
+ (*) There is no guarantee that issuing a memory barrier on one CPU will have
+ any direct effect on another CPU or any other hardware in the system. The
+ indirect effect will be the order in which the second CPU sees the effects
+ of the first CPU's accesses occur, but see the next point:
+
+ (*) There is no guarantee that a CPU will see the correct order of effects
+ from a second CPU's accesses, even _if_ the second CPU uses a memory
+ barrier, unless the first CPU _also_ uses a matching memory barrier (see
+ the subsection on "SMP Barrier Pairing").
+
+ (*) There is no guarantee that some intervening piece of off-the-CPU
+ hardware[*] will not reorder the memory accesses. CPU cache coherency
+ mechanisms should propagate the indirect effects of a memory barrier
+ between CPUs, but might not do so in order.
+
+ [*] For information on bus mastering DMA and coherency please read:
+
+ Documentation/PCI/pci.txt
+ Documentation/DMA-API-HOWTO.txt
+ Documentation/DMA-API.txt
+
+
+DATA DEPENDENCY BARRIERS
+------------------------
+
+The usage requirements of data dependency barriers are a little subtle, and
+it's not always obvious that they're needed. To illustrate, consider the
+following sequence of events:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4;
+ <write barrier>
+ ACCESS_ONCE(P) = &B
+ Q = ACCESS_ONCE(P);
+ D = *Q;
+
+There's a clear data dependency here, and it would seem that by the end of the
+sequence, Q must be either &A or &B, and that:
+
+ (Q == &A) implies (D == 1)
+ (Q == &B) implies (D == 4)
+
+But! CPU 2's perception of P may be updated _before_ its perception of B, thus
+leading to the following situation:
+
+ (Q == &B) and (D == 2) ????
+
+Whilst this may seem like a failure of coherency or causality maintenance, it
+isn't, and this behaviour can be observed on certain real CPUs (such as the DEC
+Alpha).
+
+To deal with this, a data dependency barrier or better must be inserted
+between the address load and the data load:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { A == 1, B == 2, C = 3, P == &A, Q == &C }
+ B = 4;
+ <write barrier>
+ ACCESS_ONCE(P) = &B
+ Q = ACCESS_ONCE(P);
+ <data dependency barrier>
+ D = *Q;
+
+This enforces the occurrence of one of the two implications, and prevents the
+third possibility from arising.
+
+[!] Note that this extremely counterintuitive situation arises most easily on
+machines with split caches, so that, for example, one cache bank processes
+even-numbered cache lines and the other bank processes odd-numbered cache
+lines. The pointer P might be stored in an odd-numbered cache line, and the
+variable B might be stored in an even-numbered cache line. Then, if the
+even-numbered bank of the reading CPU's cache is extremely busy while the
+odd-numbered bank is idle, one can see the new value of the pointer P (&B),
+but the old value of the variable B (2).
+
+
+Another example of where data dependency barriers might be required is where a
+number is read from memory and then used to calculate the index for an array
+access:
+
+ CPU 1 CPU 2
+ =============== ===============
+ { M[0] == 1, M[1] == 2, M[3] = 3, P == 0, Q == 3 }
+ M[1] = 4;
+ <write barrier>
+ ACCESS_ONCE(P) = 1
+ Q = ACCESS_ONCE(P);
+ <data dependency barrier>
+ D = M[Q];
+
+
+The data dependency barrier is very important to the RCU system,
+for example. See rcu_assign_pointer() and rcu_dereference() in
+include/linux/rcupdate.h. This permits the current target of an RCU'd
+pointer to be replaced with a new modified target, without the replacement
+target appearing to be incompletely initialised.
+
+See also the subsection on "Cache Coherency" for a more thorough example.
+
+
+CONTROL DEPENDENCIES
+--------------------
+
+A load-load control dependency requires a full read memory barrier, not
+simply a data dependency barrier to make it work correctly. Consider the
+following bit of code:
+
+ q = ACCESS_ONCE(a);
+ if (q) {
+ <data dependency barrier> /* BUG: No data dependency!!! */
+ p = ACCESS_ONCE(b);
+ }
+
+This will not have the desired effect because there is no actual data
+dependency, but rather a control dependency that the CPU may short-circuit
+by attempting to predict the outcome in advance, so that other CPUs see
+the load from b as having happened before the load from a. In such a
+case what's actually required is:
+
+ q = ACCESS_ONCE(a);
+ if (q) {
+ <read barrier>
+ p = ACCESS_ONCE(b);
+ }
+
+However, stores are not speculated. This means that ordering -is- provided
+for load-store control dependencies, as in the following example:
+
+ q = ACCESS_ONCE(a);
+ if (q) {
+ ACCESS_ONCE(b) = p;
+ }
+
+Control dependencies pair normally with other types of barriers.
+That said, please note that ACCESS_ONCE() is not optional! Without the
+ACCESS_ONCE(), might combine the load from 'a' with other loads from
+'a', and the store to 'b' with other stores to 'b', with possible highly
+counterintuitive effects on ordering.
+
+Worse yet, if the compiler is able to prove (say) that the value of
+variable 'a' is always non-zero, it would be well within its rights
+to optimize the original example by eliminating the "if" statement
+as follows:
+
+ q = a;
+ b = p; /* BUG: Compiler and CPU can both reorder!!! */
+
+So don't leave out the ACCESS_ONCE().
+
+It is tempting to try to enforce ordering on identical stores on both
+branches of the "if" statement as follows:
+
+ q = ACCESS_ONCE(a);
+ if (q) {
+ barrier();
+ ACCESS_ONCE(b) = p;
+ do_something();
+ } else {
+ barrier();
+ ACCESS_ONCE(b) = p;
+ do_something_else();
+ }
+
+Unfortunately, current compilers will transform this as follows at high
+optimization levels:
+
+ q = ACCESS_ONCE(a);
+ barrier();
+ ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */
+ if (q) {
+ /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+ do_something();
+ } else {
+ /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
+ do_something_else();
+ }
+
+Now there is no conditional between the load from 'a' and the store to
+'b', which means that the CPU is within its rights to reorder them:
+The conditional is absolutely required, and must be present in the
+assembly code even after all compiler optimizations have been applied.
+Therefore, if you need ordering in this example, you need explicit
+memory barriers, for example, smp_store_release():
+
+ q = ACCESS_ONCE(a);
+ if (q) {
+ smp_store_release(&b, p);
+ do_something();
+ } else {
+ smp_store_release(&b, p);
+ do_something_else();
+ }
+
+In contrast, without explicit memory barriers, two-legged-if control
+ordering is guaranteed only when the stores differ, for example:
+
+ q = ACCESS_ONCE(a);
+ if (q) {
+ ACCESS_ONCE(b) = p;
+ do_something();
+ } else {
+ ACCESS_ONCE(b) = r;
+ do_something_else();
+ }
+
+The initial ACCESS_ONCE() is still required to prevent the compiler from
+proving the value of 'a'.
+
+In addition, you need to be careful what you do with the local variable 'q',
+otherwise the compiler might be able to guess the value and again remove
+the needed conditional. For example:
+
+ q = ACCESS_ONCE(a);
+ if (q % MAX) {
+ ACCESS_ONCE(b) = p;
+ do_something();
+ } else {
+ ACCESS_ONCE(b) = r;
+ do_something_else();
+ }
+
+If MAX is defined to be 1, then the compiler knows that (q % MAX) is
+equal to zero, in which case the compiler is within its rights to
+transform the above code into the following:
+
+ q = ACCESS_ONCE(a);
+ ACCESS_ONCE(b) = p;
+ do_something_else();
+
+Given this transformation, the CPU is not required to respect the ordering
+between the load from variable 'a' and the store to variable 'b'. It is
+tempting to add a barrier(), but this does not help. The conditional
+is gone, and the barrier won't bring it back. Therefore, if you are
+relying on this ordering, you should make sure that MAX is greater than
+one, perhaps as follows:
+
+ q = ACCESS_ONCE(a);
+ BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
+ if (q % MAX) {
+ ACCESS_ONCE(b) = p;
+ do_something();
+ } else {
+ ACCESS_ONCE(b) = r;
+ do_something_else();
+ }
+
+Please note once again that the stores to 'b' differ. If they were
+identical, as noted earlier, the compiler could pull this store outside
+of the 'if' statement.
+
+You must also be careful not to rely too much on boolean short-circuit
+evaluation. Consider this example:
+
+ q = ACCESS_ONCE(a);
+ if (a || 1 > 0)
+ ACCESS_ONCE(b) = 1;
+
+Because the second condition is always true, the compiler can transform
+this example as following, defeating control dependency:
+
+ q = ACCESS_ONCE(a);
+ ACCESS_ONCE(b) = 1;
+
+This example underscores the need to ensure that the compiler cannot
+out-guess your code. More generally, although ACCESS_ONCE() does force
+the compiler to actually emit code for a given load, it does not force
+the compiler to use the results.
+
+Finally, control dependencies do -not- provide transitivity. This is
+demonstrated by two related examples, with the initial values of
+x and y both being zero:
+
+ CPU 0 CPU 1
+ ===================== =====================
+ r1 = ACCESS_ONCE(x); r2 = ACCESS_ONCE(y);
+ if (r1 > 0) if (r2 > 0)
+ ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1;
+
+ assert(!(r1 == 1 && r2 == 1));
+
+The above two-CPU example will never trigger the assert(). However,
+if control dependencies guaranteed transitivity (which they do not),
+then adding the following CPU would guarantee a related assertion:
+
+ CPU 2
+ =====================
+ ACCESS_ONCE(x) = 2;
+
+ assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
+
+But because control dependencies do -not- provide transitivity, the above
+assertion can fail after the combined three-CPU example completes. If you
+need the three-CPU example to provide ordering, you will need smp_mb()
+between the loads and stores in the CPU 0 and CPU 1 code fragments,
+that is, just before or just after the "if" statements.
+
+These two examples are the LB and WWC litmus tests from this paper:
+http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this
+site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
+
+In summary:
+
+ (*) Control dependencies can order prior loads against later stores.
+ However, they do -not- guarantee any other sort of ordering:
+ Not prior loads against later loads, nor prior stores against
+ later anything. If you need these other forms of ordering,
+ use smp_rmb(), smp_wmb(), or, in the case of prior stores and
+ later loads, smp_mb().
+
+ (*) If both legs of the "if" statement begin with identical stores
+ to the same variable, a barrier() statement is required at the
+ beginning of each leg of the "if" statement.
+
+ (*) Control dependencies require at least one run-time conditional
+ between the prior load and the subsequent store, and this
+ conditional must involve the prior load. If the compiler
+ is able to optimize the conditional away, it will have also
+ optimized away the ordering. Careful use of ACCESS_ONCE() can
+ help to preserve the needed conditional.
+
+ (*) Control dependencies require that the compiler avoid reordering the
+ dependency into nonexistence. Careful use of ACCESS_ONCE() or
+ barrier() can help to preserve your control dependency. Please
+ see the Compiler Barrier section for more information.
+
+ (*) Control dependencies pair normally with other types of barriers.
+
+ (*) Control dependencies do -not- provide transitivity. If you
+ need transitivity, use smp_mb().
+
+
+SMP BARRIER PAIRING
+-------------------
+
+When dealing with CPU-CPU interactions, certain types of memory barrier should
+always be paired. A lack of appropriate pairing is almost certainly an error.
+
+General barriers pair with each other, though they also pair with most
+other types of barriers, albeit without transitivity. An acquire barrier
+pairs with a release barrier, but both may also pair with other barriers,
+including of course general barriers. A write barrier pairs with a data
+dependency barrier, a control dependency, an acquire barrier, a release
+barrier, a read barrier, or a general barrier. Similarly a read barrier,
+control dependency, or a data dependency barrier pairs with a write
+barrier, an acquire barrier, a release barrier, or a general barrier:
+
+ CPU 1 CPU 2
+ =============== ===============
+ ACCESS_ONCE(a) = 1;
+ <write barrier>
+ ACCESS_ONCE(b) = 2; x = ACCESS_ONCE(b);
+ <read barrier>
+ y = ACCESS_ONCE(a);
+
+Or:
+
+ CPU 1 CPU 2
+ =============== ===============================
+ a = 1;
+ <write barrier>
+ ACCESS_ONCE(b) = &a; x = ACCESS_ONCE(b);
+ <data dependency barrier>
+ y = *x;
+
+Or even:
+
+ CPU 1 CPU 2
+ =============== ===============================
+ r1 = ACCESS_ONCE(y);
+ <general barrier>
+ ACCESS_ONCE(y) = 1; if (r2 = ACCESS_ONCE(x)) {
+ <implicit control dependency>
+ ACCESS_ONCE(y) = 1;
+ }
+
+ assert(r1 == 0 || r2 == 0);
+
+Basically, the read barrier always has to be there, even though it can be of
+the "weaker" type.
+
+[!] Note that the stores before the write barrier would normally be expected to
+match the loads after the read barrier or the data dependency barrier, and vice
+versa:
+
+ CPU 1 CPU 2
+ =================== ===================
+ ACCESS_ONCE(a) = 1; }---- --->{ v = ACCESS_ONCE(c);
+ ACCESS_ONCE(b) = 2; } \ / { w = ACCESS_ONCE(d);
+ <write barrier> \ <read barrier>
+ ACCESS_ONCE(c) = 3; } / \ { x = ACCESS_ONCE(a);
+ ACCESS_ONCE(d) = 4; }---- --->{ y = ACCESS_ONCE(b);
+
+
+EXAMPLES OF MEMORY BARRIER SEQUENCES
+------------------------------------
+
+Firstly, write barriers act as partial orderings on store operations.
+Consider the following sequence of events:
+
+ CPU 1
+ =======================
+ STORE A = 1
+ STORE B = 2
+ STORE C = 3
+ <write barrier>
+ STORE D = 4
+ STORE E = 5
+
+This sequence of events is committed to the memory coherence system in an order
+that the rest of the system might perceive as the unordered set of { STORE A,
+STORE B, STORE C } all occurring before the unordered set of { STORE D, STORE E
+}:
+
+ +-------+ : :
+ | | +------+
+ | |------>| C=3 | } /\
+ | | : +------+ }----- \ -----> Events perceptible to
+ | | : | A=1 | } \/ the rest of the system
+ | | : +------+ }
+ | CPU 1 | : | B=2 | }
+ | | +------+ }
+ | | wwwwwwwwwwwwwwww } <--- At this point the write barrier
+ | | +------+ } requires all stores prior to the
+ | | : | E=5 | } barrier to be committed before
+ | | : +------+ } further stores may take place
+ | |------>| D=4 | }
+ | | +------+
+ +-------+ : :
+ |
+ | Sequence in which stores are committed to the
+ | memory system by CPU 1
+ V
+
+
+Secondly, data dependency barriers act as partial orderings on data-dependent
+loads. Consider the following sequence of events:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { B = 7; X = 9; Y = 8; C = &Y }
+ STORE A = 1
+ STORE B = 2
+ <write barrier>
+ STORE C = &B LOAD X
+ STORE D = 4 LOAD C (gets &B)
+ LOAD *C (reads B)
+
+Without intervention, CPU 2 may perceive the events on CPU 1 in some
+effectively random order, despite the write barrier issued by CPU 1:
+
+ +-------+ : : : :
+ | | +------+ +-------+ | Sequence of update
+ | |------>| B=2 |----- --->| Y->8 | | of perception on
+ | | : +------+ \ +-------+ | CPU 2
+ | CPU 1 | : | A=1 | \ --->| C->&Y | V
+ | | +------+ | +-------+
+ | | wwwwwwwwwwwwwwww | : :
+ | | +------+ | : :
+ | | : | C=&B |--- | : : +-------+
+ | | : +------+ \ | +-------+ | |
+ | |------>| D=4 | ----------->| C->&B |------>| |
+ | | +------+ | +-------+ | |
+ +-------+ : : | : : | |
+ | : : | |
+ | : : | CPU 2 |
+ | +-------+ | |
+ Apparently incorrect ---> | | B->7 |------>| |
+ perception of B (!) | +-------+ | |
+ | : : | |
+ | +-------+ | |
+ The load of X holds ---> \ | X->9 |------>| |
+ up the maintenance \ +-------+ | |
+ of coherence of B ----->| B->2 | +-------+
+ +-------+
+ : :
+
+
+In the above example, CPU 2 perceives that B is 7, despite the load of *C
+(which would be B) coming after the LOAD of C.
+
+If, however, a data dependency barrier were to be placed between the load of C
+and the load of *C (ie: B) on CPU 2:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { B = 7; X = 9; Y = 8; C = &Y }
+ STORE A = 1
+ STORE B = 2
+ <write barrier>
+ STORE C = &B LOAD X
+ STORE D = 4 LOAD C (gets &B)
+ <data dependency barrier>
+ LOAD *C (reads B)
+
+then the following will occur:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| B=2 |----- --->| Y->8 |
+ | | : +------+ \ +-------+
+ | CPU 1 | : | A=1 | \ --->| C->&Y |
+ | | +------+ | +-------+
+ | | wwwwwwwwwwwwwwww | : :
+ | | +------+ | : :
+ | | : | C=&B |--- | : : +-------+
+ | | : +------+ \ | +-------+ | |
+ | |------>| D=4 | ----------->| C->&B |------>| |
+ | | +------+ | +-------+ | |
+ +-------+ : : | : : | |
+ | : : | |
+ | : : | CPU 2 |
+ | +-------+ | |
+ | | X->9 |------>| |
+ | +-------+ | |
+ Makes sure all effects ---> \ ddddddddddddddddd | |
+ prior to the store of C \ +-------+ | |
+ are perceptible to ----->| B->2 |------>| |
+ subsequent loads +-------+ | |
+ : : +-------+
+
+
+And thirdly, a read barrier acts as a partial order on loads. Consider the
+following sequence of events:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <write barrier>
+ STORE B=2
+ LOAD B
+ LOAD A
+
+Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
+some effectively random order, despite the write barrier issued by CPU 1:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | | A->0 |------>| |
+ | +-------+ | |
+ | : : +-------+
+ \ : :
+ \ +-------+
+ ---->| A->1 |
+ +-------+
+ : :
+
+
+If, however, a read barrier were to be placed between the load of B and the
+load of A on CPU 2:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <write barrier>
+ STORE B=2
+ LOAD B
+ <read barrier>
+ LOAD A
+
+then the partial ordering imposed by CPU 1 will be perceived correctly by CPU
+2:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ | : : | |
+ At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
+ barrier causes all effects \ +-------+ | |
+ prior to the storage of B ---->| A->1 |------>| |
+ to be perceptible to CPU 2 +-------+ | |
+ : : +-------+
+
+
+To illustrate this more completely, consider what could happen if the code
+contained a load of A either side of the read barrier:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <write barrier>
+ STORE B=2
+ LOAD B
+ LOAD A [first load of A]
+ <read barrier>
+ LOAD A [second load of A]
+
+Even though the two loads of A both occur after the load of B, they may both
+come up with different values:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ | : : | |
+ | +-------+ | |
+ | | A->0 |------>| 1st |
+ | +-------+ | |
+ At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
+ barrier causes all effects \ +-------+ | |
+ prior to the storage of B ---->| A->1 |------>| 2nd |
+ to be perceptible to CPU 2 +-------+ | |
+ : : +-------+
+
+
+But it may be that the update to A from CPU 1 becomes perceptible to CPU 2
+before the read barrier completes anyway:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ \ : : | |
+ \ +-------+ | |
+ ---->| A->1 |------>| 1st |
+ +-------+ | |
+ rrrrrrrrrrrrrrrrr | |
+ +-------+ | |
+ | A->1 |------>| 2nd |
+ +-------+ | |
+ : : +-------+
+
+
+The guarantee is that the second load will always come up with A == 1 if the
+load of B came up with B == 2. No such guarantee exists for the first load of
+A; that may come up with either A == 0 or A == 1.
+
+
+READ MEMORY BARRIERS VS LOAD SPECULATION
+----------------------------------------
+
+Many CPUs speculate with loads: that is they see that they will need to load an
+item from memory, and they find a time where they're not using the bus for any
+other loads, and so do the load in advance - even though they haven't actually
+got to that point in the instruction execution flow yet. This permits the
+actual load instruction to potentially complete immediately because the CPU
+already has the value to hand.
+
+It may turn out that the CPU didn't actually need the value - perhaps because a
+branch circumvented the load - in which case it can discard the value or just
+cache it for later use.
+
+Consider:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ LOAD B
+ DIVIDE } Divide instructions generally
+ DIVIDE } take a long time to perform
+ LOAD A
+
+Which might appear as this:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ The CPU being busy doing a ---> --->| A->0 |~~~~ | |
+ division speculates on the +-------+ ~ | |
+ LOAD of A : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ Once the divisions are complete --> : : ~-->| |
+ the CPU can then perform the : : | |
+ LOAD with immediate effect : : +-------+
+
+
+Placing a read barrier or a data dependency barrier just before the second
+load:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ LOAD B
+ DIVIDE
+ DIVIDE
+ <read barrier>
+ LOAD A
+
+will force any value speculatively obtained to be reconsidered to an extent
+dependent on the type of barrier used. If there was no change made to the
+speculated memory location, then the speculated value will just be used:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ The CPU being busy doing a ---> --->| A->0 |~~~~ | |
+ division speculates on the +-------+ ~ | |
+ LOAD of A : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ : : ~ | |
+ rrrrrrrrrrrrrrrr~ | |
+ : : ~ | |
+ : : ~-->| |
+ : : | |
+ : : +-------+
+
+
+but if there was an update or an invalidation from another CPU pending, then
+the speculation will be cancelled and the value reloaded:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ The CPU being busy doing a ---> --->| A->0 |~~~~ | |
+ division speculates on the +-------+ ~ | |
+ LOAD of A : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ : : ~ | |
+ rrrrrrrrrrrrrrrrr | |
+ +-------+ | |
+ The speculation is discarded ---> --->| A->1 |------>| |
+ and an updated value is +-------+ | |
+ retrieved : : +-------+
+
+
+TRANSITIVITY
+------------
+
+Transitivity is a deeply intuitive notion about ordering that is not
+always provided by real computer systems. The following example
+demonstrates transitivity (also called "cumulativity"):
+
+ CPU 1 CPU 2 CPU 3
+ ======================= ======================= =======================
+ { X = 0, Y = 0 }
+ STORE X=1 LOAD X STORE Y=1
+ <general barrier> <general barrier>
+ LOAD Y LOAD X
+
+Suppose that CPU 2's load from X returns 1 and its load from Y returns 0.
+This indicates that CPU 2's load from X in some sense follows CPU 1's
+store to X and that CPU 2's load from Y in some sense preceded CPU 3's
+store to Y. The question is then "Can CPU 3's load from X return 0?"
+
+Because CPU 2's load from X in some sense came after CPU 1's store, it
+is natural to expect that CPU 3's load from X must therefore return 1.
+This expectation is an example of transitivity: if a load executing on
+CPU A follows a load from the same variable executing on CPU B, then
+CPU A's load must either return the same value that CPU B's load did,
+or must return some later value.
+
+In the Linux kernel, use of general memory barriers guarantees
+transitivity. Therefore, in the above example, if CPU 2's load from X
+returns 1 and its load from Y returns 0, then CPU 3's load from X must
+also return 1.
+
+However, transitivity is -not- guaranteed for read or write barriers.
+For example, suppose that CPU 2's general barrier in the above example
+is changed to a read barrier as shown below:
+
+ CPU 1 CPU 2 CPU 3
+ ======================= ======================= =======================
+ { X = 0, Y = 0 }
+ STORE X=1 LOAD X STORE Y=1
+ <read barrier> <general barrier>
+ LOAD Y LOAD X
+
+This substitution destroys transitivity: in this example, it is perfectly
+legal for CPU 2's load from X to return 1, its load from Y to return 0,
+and CPU 3's load from X to return 0.
+
+The key point is that although CPU 2's read barrier orders its pair
+of loads, it does not guarantee to order CPU 1's store. Therefore, if
+this example runs on a system where CPUs 1 and 2 share a store buffer
+or a level of cache, CPU 2 might have early access to CPU 1's writes.
+General barriers are therefore required to ensure that all CPUs agree
+on the combined order of CPU 1's and CPU 2's accesses.
+
+To reiterate, if your code requires transitivity, use general barriers
+throughout.
+
+
+========================
+EXPLICIT KERNEL BARRIERS
+========================
+
+The Linux kernel has a variety of different barriers that act at different
+levels:
+
+ (*) Compiler barrier.
+
+ (*) CPU memory barriers.
+
+ (*) MMIO write barrier.
+
+
+COMPILER BARRIER
+----------------
+
+The Linux kernel has an explicit compiler barrier function that prevents the
+compiler from moving the memory accesses either side of it to the other side:
+
+ barrier();
+
+This is a general barrier -- there are no read-read or write-write variants
+of barrier(). However, ACCESS_ONCE() can be thought of as a weak form
+for barrier() that affects only the specific accesses flagged by the
+ACCESS_ONCE().
+
+The barrier() function has the following effects:
+
+ (*) Prevents the compiler from reordering accesses following the
+ barrier() to precede any accesses preceding the barrier().
+ One example use for this property is to ease communication between
+ interrupt-handler code and the code that was interrupted.
+
+ (*) Within a loop, forces the compiler to load the variables used
+ in that loop's conditional on each pass through that loop.
+
+The ACCESS_ONCE() function can prevent any number of optimizations that,
+while perfectly safe in single-threaded code, can be fatal in concurrent
+code. Here are some examples of these sorts of optimizations:
+
+ (*) The compiler is within its rights to reorder loads and stores
+ to the same variable, and in some cases, the CPU is within its
+ rights to reorder loads to the same variable. This means that
+ the following code:
+
+ a[0] = x;
+ a[1] = x;
+
+ Might result in an older value of x stored in a[1] than in a[0].
+ Prevent both the compiler and the CPU from doing this as follows:
+
+ a[0] = ACCESS_ONCE(x);
+ a[1] = ACCESS_ONCE(x);
+
+ In short, ACCESS_ONCE() provides cache coherence for accesses from
+ multiple CPUs to a single variable.
+
+ (*) The compiler is within its rights to merge successive loads from
+ the same variable. Such merging can cause the compiler to "optimize"
+ the following code:
+
+ while (tmp = a)
+ do_something_with(tmp);
+
+ into the following code, which, although in some sense legitimate
+ for single-threaded code, is almost certainly not what the developer
+ intended:
+
+ if (tmp = a)
+ for (;;)
+ do_something_with(tmp);
+
+ Use ACCESS_ONCE() to prevent the compiler from doing this to you:
+
+ while (tmp = ACCESS_ONCE(a))
+ do_something_with(tmp);
+
+ (*) The compiler is within its rights to reload a variable, for example,
+ in cases where high register pressure prevents the compiler from
+ keeping all data of interest in registers. The compiler might
+ therefore optimize the variable 'tmp' out of our previous example:
+
+ while (tmp = a)
+ do_something_with(tmp);
+
+ This could result in the following code, which is perfectly safe in
+ single-threaded code, but can be fatal in concurrent code:
+
+ while (a)
+ do_something_with(a);
+
+ For example, the optimized version of this code could result in
+ passing a zero to do_something_with() in the case where the variable
+ a was modified by some other CPU between the "while" statement and
+ the call to do_something_with().
+
+ Again, use ACCESS_ONCE() to prevent the compiler from doing this:
+
+ while (tmp = ACCESS_ONCE(a))
+ do_something_with(tmp);
+
+ Note that if the compiler runs short of registers, it might save
+ tmp onto the stack. The overhead of this saving and later restoring
+ is why compilers reload variables. Doing so is perfectly safe for
+ single-threaded code, so you need to tell the compiler about cases
+ where it is not safe.
+
+ (*) The compiler is within its rights to omit a load entirely if it knows
+ what the value will be. For example, if the compiler can prove that
+ the value of variable 'a' is always zero, it can optimize this code:
+
+ while (tmp = a)
+ do_something_with(tmp);
+
+ Into this:
+
+ do { } while (0);
+
+ This transformation is a win for single-threaded code because it gets
+ rid of a load and a branch. The problem is that the compiler will
+ carry out its proof assuming that the current CPU is the only one
+ updating variable 'a'. If variable 'a' is shared, then the compiler's
+ proof will be erroneous. Use ACCESS_ONCE() to tell the compiler
+ that it doesn't know as much as it thinks it does:
+
+ while (tmp = ACCESS_ONCE(a))
+ do_something_with(tmp);
+
+ But please note that the compiler is also closely watching what you
+ do with the value after the ACCESS_ONCE(). For example, suppose you
+ do the following and MAX is a preprocessor macro with the value 1:
+
+ while ((tmp = ACCESS_ONCE(a)) % MAX)
+ do_something_with(tmp);
+
+ Then the compiler knows that the result of the "%" operator applied
+ to MAX will always be zero, again allowing the compiler to optimize
+ the code into near-nonexistence. (It will still load from the
+ variable 'a'.)
+
+ (*) Similarly, the compiler is within its rights to omit a store entirely
+ if it knows that the variable already has the value being stored.
+ Again, the compiler assumes that the current CPU is the only one
+ storing into the variable, which can cause the compiler to do the
+ wrong thing for shared variables. For example, suppose you have
+ the following:
+
+ a = 0;
+ /* Code that does not store to variable a. */
+ a = 0;
+
+ The compiler sees that the value of variable 'a' is already zero, so
+ it might well omit the second store. This would come as a fatal
+ surprise if some other CPU might have stored to variable 'a' in the
+ meantime.
+
+ Use ACCESS_ONCE() to prevent the compiler from making this sort of
+ wrong guess:
+
+ ACCESS_ONCE(a) = 0;
+ /* Code that does not store to variable a. */
+ ACCESS_ONCE(a) = 0;
+
+ (*) The compiler is within its rights to reorder memory accesses unless
+ you tell it not to. For example, consider the following interaction
+ between process-level code and an interrupt handler:
+
+ void process_level(void)
+ {
+ msg = get_message();
+ flag = true;
+ }
+
+ void interrupt_handler(void)
+ {
+ if (flag)
+ process_message(msg);
+ }
+
+ There is nothing to prevent the compiler from transforming
+ process_level() to the following, in fact, this might well be a
+ win for single-threaded code:
+
+ void process_level(void)
+ {
+ flag = true;
+ msg = get_message();
+ }
+
+ If the interrupt occurs between these two statement, then
+ interrupt_handler() might be passed a garbled msg. Use ACCESS_ONCE()
+ to prevent this as follows:
+
+ void process_level(void)
+ {
+ ACCESS_ONCE(msg) = get_message();
+ ACCESS_ONCE(flag) = true;
+ }
+
+ void interrupt_handler(void)
+ {
+ if (ACCESS_ONCE(flag))
+ process_message(ACCESS_ONCE(msg));
+ }
+
+ Note that the ACCESS_ONCE() wrappers in interrupt_handler()
+ are needed if this interrupt handler can itself be interrupted
+ by something that also accesses 'flag' and 'msg', for example,
+ a nested interrupt or an NMI. Otherwise, ACCESS_ONCE() is not
+ needed in interrupt_handler() other than for documentation purposes.
+ (Note also that nested interrupts do not typically occur in modern
+ Linux kernels, in fact, if an interrupt handler returns with
+ interrupts enabled, you will get a WARN_ONCE() splat.)
+
+ You should assume that the compiler can move ACCESS_ONCE() past
+ code not containing ACCESS_ONCE(), barrier(), or similar primitives.
+
+ This effect could also be achieved using barrier(), but ACCESS_ONCE()
+ is more selective: With ACCESS_ONCE(), the compiler need only forget
+ the contents of the indicated memory locations, while with barrier()
+ the compiler must discard the value of all memory locations that
+ it has currented cached in any machine registers. Of course,
+ the compiler must also respect the order in which the ACCESS_ONCE()s
+ occur, though the CPU of course need not do so.
+
+ (*) The compiler is within its rights to invent stores to a variable,
+ as in the following example:
+
+ if (a)
+ b = a;
+ else
+ b = 42;
+
+ The compiler might save a branch by optimizing this as follows:
+
+ b = 42;
+ if (a)
+ b = a;
+
+ In single-threaded code, this is not only safe, but also saves
+ a branch. Unfortunately, in concurrent code, this optimization
+ could cause some other CPU to see a spurious value of 42 -- even
+ if variable 'a' was never zero -- when loading variable 'b'.
+ Use ACCESS_ONCE() to prevent this as follows:
+
+ if (a)
+ ACCESS_ONCE(b) = a;
+ else
+ ACCESS_ONCE(b) = 42;
+
+ The compiler can also invent loads. These are usually less
+ damaging, but they can result in cache-line bouncing and thus in
+ poor performance and scalability. Use ACCESS_ONCE() to prevent
+ invented loads.
+
+ (*) For aligned memory locations whose size allows them to be accessed
+ with a single memory-reference instruction, prevents "load tearing"
+ and "store tearing," in which a single large access is replaced by
+ multiple smaller accesses. For example, given an architecture having
+ 16-bit store instructions with 7-bit immediate fields, the compiler
+ might be tempted to use two 16-bit store-immediate instructions to
+ implement the following 32-bit store:
+
+ p = 0x00010002;
+
+ Please note that GCC really does use this sort of optimization,
+ which is not surprising given that it would likely take more
+ than two instructions to build the constant and then store it.
+ This optimization can therefore be a win in single-threaded code.
+ In fact, a recent bug (since fixed) caused GCC to incorrectly use
+ this optimization in a volatile store. In the absence of such bugs,
+ use of ACCESS_ONCE() prevents store tearing in the following example:
+
+ ACCESS_ONCE(p) = 0x00010002;
+
+ Use of packed structures can also result in load and store tearing,
+ as in this example:
+
+ struct __attribute__((__packed__)) foo {
+ short a;
+ int b;
+ short c;
+ };
+ struct foo foo1, foo2;
+ ...
+
+ foo2.a = foo1.a;
+ foo2.b = foo1.b;
+ foo2.c = foo1.c;
+
+ Because there are no ACCESS_ONCE() wrappers and no volatile markings,
+ the compiler would be well within its rights to implement these three
+ assignment statements as a pair of 32-bit loads followed by a pair
+ of 32-bit stores. This would result in load tearing on 'foo1.b'
+ and store tearing on 'foo2.b'. ACCESS_ONCE() again prevents tearing
+ in this example:
+
+ foo2.a = foo1.a;
+ ACCESS_ONCE(foo2.b) = ACCESS_ONCE(foo1.b);
+ foo2.c = foo1.c;
+
+All that aside, it is never necessary to use ACCESS_ONCE() on a variable
+that has been marked volatile. For example, because 'jiffies' is marked
+volatile, it is never necessary to say ACCESS_ONCE(jiffies). The reason
+for this is that ACCESS_ONCE() is implemented as a volatile cast, which
+has no effect when its argument is already marked volatile.
+
+Please note that these compiler barriers have no direct effect on the CPU,
+which may then reorder things however it wishes.
+
+
+CPU MEMORY BARRIERS
+-------------------
+
+The Linux kernel has eight basic CPU memory barriers:
+
+ TYPE MANDATORY SMP CONDITIONAL
+ =============== ======================= ===========================
+ GENERAL mb() smp_mb()
+ WRITE wmb() smp_wmb()
+ READ rmb() smp_rmb()
+ DATA DEPENDENCY read_barrier_depends() smp_read_barrier_depends()
+
+
+All memory barriers except the data dependency barriers imply a compiler
+barrier. Data dependencies do not impose any additional compiler ordering.
+
+Aside: In the case of data dependencies, the compiler would be expected to
+issue the loads in the correct order (eg. `a[b]` would have to load the value
+of b before loading a[b]), however there is no guarantee in the C specification
+that the compiler may not speculate the value of b (eg. is equal to 1) and load
+a before b (eg. tmp = a[1]; if (b != 1) tmp = a[b]; ). There is also the
+problem of a compiler reloading b after having loaded a[b], thus having a newer
+copy of b than a[b]. A consensus has not yet been reached about these problems,
+however the ACCESS_ONCE macro is a good place to start looking.
+
+SMP memory barriers are reduced to compiler barriers on uniprocessor compiled
+systems because it is assumed that a CPU will appear to be self-consistent,
+and will order overlapping accesses correctly with respect to itself.
+
+[!] Note that SMP memory barriers _must_ be used to control the ordering of
+references to shared memory on SMP systems, though the use of locking instead
+is sufficient.
+
+Mandatory barriers should not be used to control SMP effects, since mandatory
+barriers unnecessarily impose overhead on UP systems. They may, however, be
+used to control MMIO effects on accesses through relaxed memory I/O windows.
+These are required even on non-SMP systems as they affect the order in which
+memory operations appear to a device by prohibiting both the compiler and the
+CPU from reordering them.
+
+
+There are some more advanced barrier functions:
+
+ (*) set_mb(var, value)
+
+ This assigns the value to the variable and then inserts a full memory
+ barrier after it, depending on the function. It isn't guaranteed to
+ insert anything more than a compiler barrier in a UP compilation.
+
+
+ (*) smp_mb__before_atomic();
+ (*) smp_mb__after_atomic();
+
+ These are for use with atomic (such as add, subtract, increment and
+ decrement) functions that don't return a value, especially when used for
+ reference counting. These functions do not imply memory barriers.
+
+ These are also used for atomic bitop functions that do not return a
+ value (such as set_bit and clear_bit).
+
+ As an example, consider a piece of code that marks an object as being dead
+ and then decrements the object's reference count:
+
+ obj->dead = 1;
+ smp_mb__before_atomic();
+ atomic_dec(&obj->ref_count);
+
+ This makes sure that the death mark on the object is perceived to be set
+ *before* the reference counter is decremented.
+
+ See Documentation/atomic_ops.txt for more information. See the "Atomic
+ operations" subsection for information on where to use these.
+
+
+ (*) dma_wmb();
+ (*) dma_rmb();
+
+ These are for use with consistent memory to guarantee the ordering
+ of writes or reads of shared memory accessible to both the CPU and a
+ DMA capable device.
+
+ For example, consider a device driver that shares memory with a device
+ and uses a descriptor status value to indicate if the descriptor belongs
+ to the device or the CPU, and a doorbell to notify it when new
+ descriptors are available:
+
+ if (desc->status != DEVICE_OWN) {
+ /* do not read data until we own descriptor */
+ dma_rmb();
+
+ /* read/modify data */
+ read_data = desc->data;
+ desc->data = write_data;
+
+ /* flush modifications before status update */
+ dma_wmb();
+
+ /* assign ownership */
+ desc->status = DEVICE_OWN;
+
+ /* force memory to sync before notifying device via MMIO */
+ wmb();
+
+ /* notify device of new descriptors */
+ writel(DESC_NOTIFY, doorbell);
+ }
+
+ The dma_rmb() allows us guarantee the device has released ownership
+ before we read the data from the descriptor, and the dma_wmb() allows
+ us to guarantee the data is written to the descriptor before the device
+ can see it now has ownership. The wmb() is needed to guarantee that the
+ cache coherent memory writes have completed before attempting a write to
+ the cache incoherent MMIO region.
+
+ See Documentation/DMA-API.txt for more information on consistent memory.
+
+MMIO WRITE BARRIER
+------------------
+
+The Linux kernel also has a special barrier for use with memory-mapped I/O
+writes:
+
+ mmiowb();
+
+This is a variation on the mandatory write barrier that causes writes to weakly
+ordered I/O regions to be partially ordered. Its effects may go beyond the
+CPU->Hardware interface and actually affect the hardware at some level.
+
+See the subsection "Locks vs I/O accesses" for more information.
+
+
+===============================
+IMPLICIT KERNEL MEMORY BARRIERS
+===============================
+
+Some of the other functions in the linux kernel imply memory barriers, amongst
+which are locking and scheduling functions.
+
+This specification is a _minimum_ guarantee; any particular architecture may
+provide more substantial guarantees, but these may not be relied upon outside
+of arch specific code.
+
+
+ACQUIRING FUNCTIONS
+-------------------
+
+The Linux kernel has a number of locking constructs:
+
+ (*) spin locks
+ (*) R/W spin locks
+ (*) mutexes
+ (*) semaphores
+ (*) R/W semaphores
+ (*) RCU
+
+In all cases there are variants on "ACQUIRE" operations and "RELEASE" operations
+for each construct. These operations all imply certain barriers:
+
+ (1) ACQUIRE operation implication:
+
+ Memory operations issued after the ACQUIRE will be completed after the
+ ACQUIRE operation has completed.
+
+ Memory operations issued before the ACQUIRE may be completed after
+ the ACQUIRE operation has completed. An smp_mb__before_spinlock(),
+ combined with a following ACQUIRE, orders prior loads against
+ subsequent loads and stores and also orders prior stores against
+ subsequent stores. Note that this is weaker than smp_mb()! The
+ smp_mb__before_spinlock() primitive is free on many architectures.
+
+ (2) RELEASE operation implication:
+
+ Memory operations issued before the RELEASE will be completed before the
+ RELEASE operation has completed.
+
+ Memory operations issued after the RELEASE may be completed before the
+ RELEASE operation has completed.
+
+ (3) ACQUIRE vs ACQUIRE implication:
+
+ All ACQUIRE operations issued before another ACQUIRE operation will be
+ completed before that ACQUIRE operation.
+
+ (4) ACQUIRE vs RELEASE implication:
+
+ All ACQUIRE operations issued before a RELEASE operation will be
+ completed before the RELEASE operation.
+
+ (5) Failed conditional ACQUIRE implication:
+
+ Certain locking variants of the ACQUIRE operation may fail, either due to
+ being unable to get the lock immediately, or due to receiving an unblocked
+ signal whilst asleep waiting for the lock to become available. Failed
+ locks do not imply any sort of barrier.
+
+[!] Note: one of the consequences of lock ACQUIREs and RELEASEs being only
+one-way barriers is that the effects of instructions outside of a critical
+section may seep into the inside of the critical section.
+
+An ACQUIRE followed by a RELEASE may not be assumed to be full memory barrier
+because it is possible for an access preceding the ACQUIRE to happen after the
+ACQUIRE, and an access following the RELEASE to happen before the RELEASE, and
+the two accesses can themselves then cross:
+
+ *A = a;
+ ACQUIRE M
+ RELEASE M
+ *B = b;
+
+may occur as:
+
+ ACQUIRE M, STORE *B, STORE *A, RELEASE M
+
+When the ACQUIRE and RELEASE are a lock acquisition and release,
+respectively, this same reordering can occur if the lock's ACQUIRE and
+RELEASE are to the same lock variable, but only from the perspective of
+another CPU not holding that lock. In short, a ACQUIRE followed by an
+RELEASE may -not- be assumed to be a full memory barrier.
+
+Similarly, the reverse case of a RELEASE followed by an ACQUIRE does not
+imply a full memory barrier. If it is necessary for a RELEASE-ACQUIRE
+pair to produce a full barrier, the ACQUIRE can be followed by an
+smp_mb__after_unlock_lock() invocation. This will produce a full barrier
+if either (a) the RELEASE and the ACQUIRE are executed by the same
+CPU or task, or (b) the RELEASE and ACQUIRE act on the same variable.
+The smp_mb__after_unlock_lock() primitive is free on many architectures.
+Without smp_mb__after_unlock_lock(), the CPU's execution of the critical
+sections corresponding to the RELEASE and the ACQUIRE can cross, so that:
+
+ *A = a;
+ RELEASE M
+ ACQUIRE N
+ *B = b;
+
+could occur as:
+
+ ACQUIRE N, STORE *B, STORE *A, RELEASE M
+
+It might appear that this reordering could introduce a deadlock.
+However, this cannot happen because if such a deadlock threatened,
+the RELEASE would simply complete, thereby avoiding the deadlock.
+
+ Why does this work?
+
+ One key point is that we are only talking about the CPU doing
+ the reordering, not the compiler. If the compiler (or, for
+ that matter, the developer) switched the operations, deadlock
+ -could- occur.
+
+ But suppose the CPU reordered the operations. In this case,
+ the unlock precedes the lock in the assembly code. The CPU
+ simply elected to try executing the later lock operation first.
+ If there is a deadlock, this lock operation will simply spin (or
+ try to sleep, but more on that later). The CPU will eventually
+ execute the unlock operation (which preceded the lock operation
+ in the assembly code), which will unravel the potential deadlock,
+ allowing the lock operation to succeed.
+
+ But what if the lock is a sleeplock? In that case, the code will
+ try to enter the scheduler, where it will eventually encounter
+ a memory barrier, which will force the earlier unlock operation
+ to complete, again unraveling the deadlock. There might be
+ a sleep-unlock race, but the locking primitive needs to resolve
+ such races properly in any case.
+
+With smp_mb__after_unlock_lock(), the two critical sections cannot overlap.
+For example, with the following code, the store to *A will always be
+seen by other CPUs before the store to *B:
+
+ *A = a;
+ RELEASE M
+ ACQUIRE N
+ smp_mb__after_unlock_lock();
+ *B = b;
+
+The operations will always occur in one of the following orders:
+
+ STORE *A, RELEASE, ACQUIRE, smp_mb__after_unlock_lock(), STORE *B
+ STORE *A, ACQUIRE, RELEASE, smp_mb__after_unlock_lock(), STORE *B
+ ACQUIRE, STORE *A, RELEASE, smp_mb__after_unlock_lock(), STORE *B
+
+If the RELEASE and ACQUIRE were instead both operating on the same lock
+variable, only the first of these alternatives can occur. In addition,
+the more strongly ordered systems may rule out some of the above orders.
+But in any case, as noted earlier, the smp_mb__after_unlock_lock()
+ensures that the store to *A will always be seen as happening before
+the store to *B.
+
+Locks and semaphores may not provide any guarantee of ordering on UP compiled
+systems, and so cannot be counted on in such a situation to actually achieve
+anything at all - especially with respect to I/O accesses - unless combined
+with interrupt disabling operations.
+
+See also the section on "Inter-CPU locking barrier effects".
+
+
+As an example, consider the following:
+
+ *A = a;
+ *B = b;
+ ACQUIRE
+ *C = c;
+ *D = d;
+ RELEASE
+ *E = e;
+ *F = f;
+
+The following sequence of events is acceptable:
+
+ ACQUIRE, {*F,*A}, *E, {*C,*D}, *B, RELEASE
+
+ [+] Note that {*F,*A} indicates a combined access.
+
+But none of the following are:
+
+ {*F,*A}, *B, ACQUIRE, *C, *D, RELEASE, *E
+ *A, *B, *C, ACQUIRE, *D, RELEASE, *E, *F
+ *A, *B, ACQUIRE, *C, RELEASE, *D, *E, *F
+ *B, ACQUIRE, *C, *D, RELEASE, {*F,*A}, *E
+
+
+
+INTERRUPT DISABLING FUNCTIONS
+-----------------------------
+
+Functions that disable interrupts (ACQUIRE equivalent) and enable interrupts
+(RELEASE equivalent) will act as compiler barriers only. So if memory or I/O
+barriers are required in such a situation, they must be provided from some
+other means.
+
+
+SLEEP AND WAKE-UP FUNCTIONS
+---------------------------
+
+Sleeping and waking on an event flagged in global data can be viewed as an
+interaction between two pieces of data: the task state of the task waiting for
+the event and the global data used to indicate the event. To make sure that
+these appear to happen in the right order, the primitives to begin the process
+of going to sleep, and the primitives to initiate a wake up imply certain
+barriers.
+
+Firstly, the sleeper normally follows something like this sequence of events:
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (event_indicated)
+ break;
+ schedule();
+ }
+
+A general memory barrier is interpolated automatically by set_current_state()
+after it has altered the task state:
+
+ CPU 1
+ ===============================
+ set_current_state();
+ set_mb();
+ STORE current->state
+ <general barrier>
+ LOAD event_indicated
+
+set_current_state() may be wrapped by:
+
+ prepare_to_wait();
+ prepare_to_wait_exclusive();
+
+which therefore also imply a general memory barrier after setting the state.
+The whole sequence above is available in various canned forms, all of which
+interpolate the memory barrier in the right place:
+
+ wait_event();
+ wait_event_interruptible();
+ wait_event_interruptible_exclusive();
+ wait_event_interruptible_timeout();
+ wait_event_killable();
+ wait_event_timeout();
+ wait_on_bit();
+ wait_on_bit_lock();
+
+
+Secondly, code that performs a wake up normally follows something like this:
+
+ event_indicated = 1;
+ wake_up(&event_wait_queue);
+
+or:
+
+ event_indicated = 1;
+ wake_up_process(event_daemon);
+
+A write memory barrier is implied by wake_up() and co. if and only if they wake
+something up. The barrier occurs before the task state is cleared, and so sits
+between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ set_current_state(); STORE event_indicated
+ set_mb(); wake_up();
+ STORE current->state <write barrier>
+ <general barrier> STORE current->state
+ LOAD event_indicated
+
+To repeat, this write memory barrier is present if and only if something
+is actually awakened. To see this, consider the following sequence of
+events, where X and Y are both initially zero:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ X = 1; STORE event_indicated
+ smp_mb(); wake_up();
+ Y = 1; wait_event(wq, Y == 1);
+ wake_up(); load from Y sees 1, no memory barrier
+ load from X might see 0
+
+In contrast, if a wakeup does occur, CPU 2's load from X would be guaranteed
+to see 1.
+
+The available waker functions include:
+
+ complete();
+ wake_up();
+ wake_up_all();
+ wake_up_bit();
+ wake_up_interruptible();
+ wake_up_interruptible_all();
+ wake_up_interruptible_nr();
+ wake_up_interruptible_poll();
+ wake_up_interruptible_sync();
+ wake_up_interruptible_sync_poll();
+ wake_up_locked();
+ wake_up_locked_poll();
+ wake_up_nr();
+ wake_up_poll();
+ wake_up_process();
+
+
+[!] Note that the memory barriers implied by the sleeper and the waker do _not_
+order multiple stores before the wake-up with respect to loads of those stored
+values after the sleeper has called set_current_state(). For instance, if the
+sleeper does:
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (event_indicated)
+ break;
+ __set_current_state(TASK_RUNNING);
+ do_something(my_data);
+
+and the waker does:
+
+ my_data = value;
+ event_indicated = 1;
+ wake_up(&event_wait_queue);
+
+there's no guarantee that the change to event_indicated will be perceived by
+the sleeper as coming after the change to my_data. In such a circumstance, the
+code on both sides must interpolate its own memory barriers between the
+separate data accesses. Thus the above sleeper ought to do:
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (event_indicated) {
+ smp_rmb();
+ do_something(my_data);
+ }
+
+and the waker should do:
+
+ my_data = value;
+ smp_wmb();
+ event_indicated = 1;
+ wake_up(&event_wait_queue);
+
+
+MISCELLANEOUS FUNCTIONS
+-----------------------
+
+Other functions that imply barriers:
+
+ (*) schedule() and similar imply full memory barriers.
+
+
+===================================
+INTER-CPU ACQUIRING BARRIER EFFECTS
+===================================
+
+On SMP systems locking primitives give a more substantial form of barrier: one
+that does affect memory access ordering on other CPUs, within the context of
+conflict on any particular lock.
+
+
+ACQUIRES VS MEMORY ACCESSES
+---------------------------
+
+Consider the following: the system has a pair of spinlocks (M) and (Q), and
+three CPUs; then should the following sequence of events occur:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ ACCESS_ONCE(*A) = a; ACCESS_ONCE(*E) = e;
+ ACQUIRE M ACQUIRE Q
+ ACCESS_ONCE(*B) = b; ACCESS_ONCE(*F) = f;
+ ACCESS_ONCE(*C) = c; ACCESS_ONCE(*G) = g;
+ RELEASE M RELEASE Q
+ ACCESS_ONCE(*D) = d; ACCESS_ONCE(*H) = h;
+
+Then there is no guarantee as to what order CPU 3 will see the accesses to *A
+through *H occur in, other than the constraints imposed by the separate locks
+on the separate CPUs. It might, for example, see:
+
+ *E, ACQUIRE M, ACQUIRE Q, *G, *C, *F, *A, *B, RELEASE Q, *D, *H, RELEASE M
+
+But it won't see any of:
+
+ *B, *C or *D preceding ACQUIRE M
+ *A, *B or *C following RELEASE M
+ *F, *G or *H preceding ACQUIRE Q
+ *E, *F or *G following RELEASE Q
+
+
+However, if the following occurs:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ ACCESS_ONCE(*A) = a;
+ ACQUIRE M [1]
+ ACCESS_ONCE(*B) = b;
+ ACCESS_ONCE(*C) = c;
+ RELEASE M [1]
+ ACCESS_ONCE(*D) = d; ACCESS_ONCE(*E) = e;
+ ACQUIRE M [2]
+ smp_mb__after_unlock_lock();
+ ACCESS_ONCE(*F) = f;
+ ACCESS_ONCE(*G) = g;
+ RELEASE M [2]
+ ACCESS_ONCE(*H) = h;
+
+CPU 3 might see:
+
+ *E, ACQUIRE M [1], *C, *B, *A, RELEASE M [1],
+ ACQUIRE M [2], *H, *F, *G, RELEASE M [2], *D
+
+But assuming CPU 1 gets the lock first, CPU 3 won't see any of:
+
+ *B, *C, *D, *F, *G or *H preceding ACQUIRE M [1]
+ *A, *B or *C following RELEASE M [1]
+ *F, *G or *H preceding ACQUIRE M [2]
+ *A, *B, *C, *E, *F or *G following RELEASE M [2]
+
+Note that the smp_mb__after_unlock_lock() is critically important
+here: Without it CPU 3 might see some of the above orderings.
+Without smp_mb__after_unlock_lock(), the accesses are not guaranteed
+to be seen in order unless CPU 3 holds lock M.
+
+
+ACQUIRES VS I/O ACCESSES
+------------------------
+
+Under certain circumstances (especially involving NUMA), I/O accesses within
+two spinlocked sections on two different CPUs may be seen as interleaved by the
+PCI bridge, because the PCI bridge does not necessarily participate in the
+cache-coherence protocol, and is therefore incapable of issuing the required
+read memory barriers.
+
+For example:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ writel(1, DATA);
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ writel(5, DATA);
+ spin_unlock(Q);
+
+may be seen by the PCI bridge as follows:
+
+ STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
+
+which would probably cause the hardware to malfunction.
+
+
+What is necessary here is to intervene with an mmiowb() before dropping the
+spinlock, for example:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ writel(1, DATA);
+ mmiowb();
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ writel(5, DATA);
+ mmiowb();
+ spin_unlock(Q);
+
+this will ensure that the two stores issued on CPU 1 appear at the PCI bridge
+before either of the stores issued on CPU 2.
+
+
+Furthermore, following a store by a load from the same device obviates the need
+for the mmiowb(), because the load forces the store to complete before the load
+is performed:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ spin_lock(Q)
+ writel(0, ADDR)
+ a = readl(DATA);
+ spin_unlock(Q);
+ spin_lock(Q);
+ writel(4, ADDR);
+ b = readl(DATA);
+ spin_unlock(Q);
+
+
+See Documentation/DocBook/deviceiobook.tmpl for more information.
+
+
+=================================
+WHERE ARE MEMORY BARRIERS NEEDED?
+=================================
+
+Under normal operation, memory operation reordering is generally not going to
+be a problem as a single-threaded linear piece of code will still appear to
+work correctly, even if it's in an SMP kernel. There are, however, four
+circumstances in which reordering definitely _could_ be a problem:
+
+ (*) Interprocessor interaction.
+
+ (*) Atomic operations.
+
+ (*) Accessing devices.
+
+ (*) Interrupts.
+
+
+INTERPROCESSOR INTERACTION
+--------------------------
+
+When there's a system with more than one processor, more than one CPU in the
+system may be working on the same data set at the same time. This can cause
+synchronisation problems, and the usual way of dealing with them is to use
+locks. Locks, however, are quite expensive, and so it may be preferable to
+operate without the use of a lock if at all possible. In such a case
+operations that affect both CPUs may have to be carefully ordered to prevent
+a malfunction.
+
+Consider, for example, the R/W semaphore slow path. Here a waiting process is
+queued on the semaphore, by virtue of it having a piece of its stack linked to
+the semaphore's list of waiting processes:
+
+ struct rw_semaphore {
+ ...
+ spinlock_t lock;
+ struct list_head waiters;
+ };
+
+ struct rwsem_waiter {
+ struct list_head list;
+ struct task_struct *task;
+ };
+
+To wake up a particular waiter, the up_read() or up_write() functions have to:
+
+ (1) read the next pointer from this waiter's record to know as to where the
+ next waiter record is;
+
+ (2) read the pointer to the waiter's task structure;
+
+ (3) clear the task pointer to tell the waiter it has been given the semaphore;
+
+ (4) call wake_up_process() on the task; and
+
+ (5) release the reference held on the waiter's task struct.
+
+In other words, it has to perform this sequence of events:
+
+ LOAD waiter->list.next;
+ LOAD waiter->task;
+ STORE waiter->task;
+ CALL wakeup
+ RELEASE task
+
+and if any of these steps occur out of order, then the whole thing may
+malfunction.
+
+Once it has queued itself and dropped the semaphore lock, the waiter does not
+get the lock again; it instead just waits for its task pointer to be cleared
+before proceeding. Since the record is on the waiter's stack, this means that
+if the task pointer is cleared _before_ the next pointer in the list is read,
+another CPU might start processing the waiter and might clobber the waiter's
+stack before the up*() function has a chance to read the next pointer.
+
+Consider then what might happen to the above sequence of events:
+
+ CPU 1 CPU 2
+ =============================== ===============================
+ down_xxx()
+ Queue waiter
+ Sleep
+ up_yyy()
+ LOAD waiter->task;
+ STORE waiter->task;
+ Woken up by other event
+ <preempt>
+ Resume processing
+ down_xxx() returns
+ call foo()
+ foo() clobbers *waiter
+ </preempt>
+ LOAD waiter->list.next;
+ --- OOPS ---
+
+This could be dealt with using the semaphore lock, but then the down_xxx()
+function has to needlessly get the spinlock again after being woken up.
+
+The way to deal with this is to insert a general SMP memory barrier:
+
+ LOAD waiter->list.next;
+ LOAD waiter->task;
+ smp_mb();
+ STORE waiter->task;
+ CALL wakeup
+ RELEASE task
+
+In this case, the barrier makes a guarantee that all memory accesses before the
+barrier will appear to happen before all the memory accesses after the barrier
+with respect to the other CPUs on the system. It does _not_ guarantee that all
+the memory accesses before the barrier will be complete by the time the barrier
+instruction itself is complete.
+
+On a UP system - where this wouldn't be a problem - the smp_mb() is just a
+compiler barrier, thus making sure the compiler emits the instructions in the
+right order without actually intervening in the CPU. Since there's only one
+CPU, that CPU's dependency ordering logic will take care of everything else.
+
+
+ATOMIC OPERATIONS
+-----------------
+
+Whilst they are technically interprocessor interaction considerations, atomic
+operations are noted specially as some of them imply full memory barriers and
+some don't, but they're very heavily relied on as a group throughout the
+kernel.
+
+Any atomic operation that modifies some state in memory and returns information
+about the state (old or new) implies an SMP-conditional general memory barrier
+(smp_mb()) on each side of the actual operation (with the exception of
+explicit lock operations, described later). These include:
+
+ xchg();
+ cmpxchg();
+ atomic_xchg(); atomic_long_xchg();
+ atomic_cmpxchg(); atomic_long_cmpxchg();
+ atomic_inc_return(); atomic_long_inc_return();
+ atomic_dec_return(); atomic_long_dec_return();
+ atomic_add_return(); atomic_long_add_return();
+ atomic_sub_return(); atomic_long_sub_return();
+ atomic_inc_and_test(); atomic_long_inc_and_test();
+ atomic_dec_and_test(); atomic_long_dec_and_test();
+ atomic_sub_and_test(); atomic_long_sub_and_test();
+ atomic_add_negative(); atomic_long_add_negative();
+ test_and_set_bit();
+ test_and_clear_bit();
+ test_and_change_bit();
+
+ /* when succeeds (returns 1) */
+ atomic_add_unless(); atomic_long_add_unless();
+
+These are used for such things as implementing ACQUIRE-class and RELEASE-class
+operations and adjusting reference counters towards object destruction, and as
+such the implicit memory barrier effects are necessary.
+
+
+The following operations are potential problems as they do _not_ imply memory
+barriers, but might be used for implementing such things as RELEASE-class
+operations:
+
+ atomic_set();
+ set_bit();
+ clear_bit();
+ change_bit();
+
+With these the appropriate explicit memory barrier should be used if necessary
+(smp_mb__before_atomic() for instance).
+
+
+The following also do _not_ imply memory barriers, and so may require explicit
+memory barriers under some circumstances (smp_mb__before_atomic() for
+instance):
+
+ atomic_add();
+ atomic_sub();
+ atomic_inc();
+ atomic_dec();
+
+If they're used for statistics generation, then they probably don't need memory
+barriers, unless there's a coupling between statistical data.
+
+If they're used for reference counting on an object to control its lifetime,
+they probably don't need memory barriers because either the reference count
+will be adjusted inside a locked section, or the caller will already hold
+sufficient references to make the lock, and thus a memory barrier unnecessary.
+
+If they're used for constructing a lock of some description, then they probably
+do need memory barriers as a lock primitive generally has to do things in a
+specific order.
+
+Basically, each usage case has to be carefully considered as to whether memory
+barriers are needed or not.
+
+The following operations are special locking primitives:
+
+ test_and_set_bit_lock();
+ clear_bit_unlock();
+ __clear_bit_unlock();
+
+These implement ACQUIRE-class and RELEASE-class operations. These should be used in
+preference to other operations when implementing locking primitives, because
+their implementations can be optimised on many architectures.
+
+[!] Note that special memory barrier primitives are available for these
+situations because on some CPUs the atomic instructions used imply full memory
+barriers, and so barrier instructions are superfluous in conjunction with them,
+and in such cases the special barrier primitives will be no-ops.
+
+See Documentation/atomic_ops.txt for more information.
+
+
+ACCESSING DEVICES
+-----------------
+
+Many devices can be memory mapped, and so appear to the CPU as if they're just
+a set of memory locations. To control such a device, the driver usually has to
+make the right memory accesses in exactly the right order.
+
+However, having a clever CPU or a clever compiler creates a potential problem
+in that the carefully sequenced accesses in the driver code won't reach the
+device in the requisite order if the CPU or the compiler thinks it is more
+efficient to reorder, combine or merge accesses - something that would cause
+the device to malfunction.
+
+Inside of the Linux kernel, I/O should be done through the appropriate accessor
+routines - such as inb() or writel() - which know how to make such accesses
+appropriately sequential. Whilst this, for the most part, renders the explicit
+use of memory barriers unnecessary, there are a couple of situations where they
+might be needed:
+
+ (1) On some systems, I/O stores are not strongly ordered across all CPUs, and
+ so for _all_ general drivers locks should be used and mmiowb() must be
+ issued prior to unlocking the critical section.
+
+ (2) If the accessor functions are used to refer to an I/O memory window with
+ relaxed memory access properties, then _mandatory_ memory barriers are
+ required to enforce ordering.
+
+See Documentation/DocBook/deviceiobook.tmpl for more information.
+
+
+INTERRUPTS
+----------
+
+A driver may be interrupted by its own interrupt service routine, and thus the
+two parts of the driver may interfere with each other's attempts to control or
+access the device.
+
+This may be alleviated - at least in part - by disabling local interrupts (a
+form of locking), such that the critical operations are all contained within
+the interrupt-disabled section in the driver. Whilst the driver's interrupt
+routine is executing, the driver's core may not run on the same CPU, and its
+interrupt is not permitted to happen again until the current interrupt has been
+handled, thus the interrupt handler does not need to lock against that.
+
+However, consider a driver that was talking to an ethernet card that sports an
+address register and a data register. If that driver's core talks to the card
+under interrupt-disablement and then the driver's interrupt handler is invoked:
+
+ LOCAL IRQ DISABLE
+ writew(ADDR, 3);
+ writew(DATA, y);
+ LOCAL IRQ ENABLE
+ <interrupt>
+ writew(ADDR, 4);
+ q = readw(DATA);
+ </interrupt>
+
+The store to the data register might happen after the second store to the
+address register if ordering rules are sufficiently relaxed:
+
+ STORE *ADDR = 3, STORE *ADDR = 4, STORE *DATA = y, q = LOAD *DATA
+
+
+If ordering rules are relaxed, it must be assumed that accesses done inside an
+interrupt disabled section may leak outside of it and may interleave with
+accesses performed in an interrupt - and vice versa - unless implicit or
+explicit barriers are used.
+
+Normally this won't be a problem because the I/O accesses done inside such
+sections will include synchronous load operations on strictly ordered I/O
+registers that form implicit I/O barriers. If this isn't sufficient then an
+mmiowb() may need to be used explicitly.
+
+
+A similar situation may occur between an interrupt routine and two routines
+running on separate CPUs that communicate with each other. If such a case is
+likely, then interrupt-disabling locks should be used to guarantee ordering.
+
+
+==========================
+KERNEL I/O BARRIER EFFECTS
+==========================
+
+When accessing I/O memory, drivers should use the appropriate accessor
+functions:
+
+ (*) inX(), outX():
+
+ These are intended to talk to I/O space rather than memory space, but
+ that's primarily a CPU-specific concept. The i386 and x86_64 processors do
+ indeed have special I/O space access cycles and instructions, but many
+ CPUs don't have such a concept.
+
+ The PCI bus, amongst others, defines an I/O space concept which - on such
+ CPUs as i386 and x86_64 - readily maps to the CPU's concept of I/O
+ space. However, it may also be mapped as a virtual I/O space in the CPU's
+ memory map, particularly on those CPUs that don't support alternate I/O
+ spaces.
+
+ Accesses to this space may be fully synchronous (as on i386), but
+ intermediary bridges (such as the PCI host bridge) may not fully honour
+ that.
+
+ They are guaranteed to be fully ordered with respect to each other.
+
+ They are not guaranteed to be fully ordered with respect to other types of
+ memory and I/O operation.
+
+ (*) readX(), writeX():
+
+ Whether these are guaranteed to be fully ordered and uncombined with
+ respect to each other on the issuing CPU depends on the characteristics
+ defined for the memory window through which they're accessing. On later
+ i386 architecture machines, for example, this is controlled by way of the
+ MTRR registers.
+
+ Ordinarily, these will be guaranteed to be fully ordered and uncombined,
+ provided they're not accessing a prefetchable device.
+
+ However, intermediary hardware (such as a PCI bridge) may indulge in
+ deferral if it so wishes; to flush a store, a load from the same location
+ is preferred[*], but a load from the same device or from configuration
+ space should suffice for PCI.
+
+ [*] NOTE! attempting to load from the same location as was written to may
+ cause a malfunction - consider the 16550 Rx/Tx serial registers for
+ example.
+
+ Used with prefetchable I/O memory, an mmiowb() barrier may be required to
+ force stores to be ordered.
+
+ Please refer to the PCI specification for more information on interactions
+ between PCI transactions.
+
+ (*) readX_relaxed(), writeX_relaxed()
+
+ These are similar to readX() and writeX(), but provide weaker memory
+ ordering guarantees. Specifically, they do not guarantee ordering with
+ respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
+ ordering with respect to LOCK or UNLOCK operations. If the latter is
+ required, an mmiowb() barrier can be used. Note that relaxed accesses to
+ the same peripheral are guaranteed to be ordered with respect to each
+ other.
+
+ (*) ioreadX(), iowriteX()
+
+ These will perform appropriately for the type of access they're actually
+ doing, be it inX()/outX() or readX()/writeX().
+
+
+========================================
+ASSUMED MINIMUM EXECUTION ORDERING MODEL
+========================================
+
+It has to be assumed that the conceptual CPU is weakly-ordered but that it will
+maintain the appearance of program causality with respect to itself. Some CPUs
+(such as i386 or x86_64) are more constrained than others (such as powerpc or
+frv), and so the most relaxed case (namely DEC Alpha) must be assumed outside
+of arch-specific code.
+
+This means that it must be considered that the CPU will execute its instruction
+stream in any order it feels like - or even in parallel - provided that if an
+instruction in the stream depends on an earlier instruction, then that
+earlier instruction must be sufficiently complete[*] before the later
+instruction may proceed; in other words: provided that the appearance of
+causality is maintained.
+
+ [*] Some instructions have more than one effect - such as changing the
+ condition codes, changing registers or changing memory - and different
+ instructions may depend on different effects.
+
+A CPU may also discard any instruction sequence that winds up having no
+ultimate effect. For example, if two adjacent instructions both load an
+immediate value into the same register, the first may be discarded.
+
+
+Similarly, it has to be assumed that compiler might reorder the instruction
+stream in any way it sees fit, again provided the appearance of causality is
+maintained.
+
+
+============================
+THE EFFECTS OF THE CPU CACHE
+============================
+
+The way cached memory operations are perceived across the system is affected to
+a certain extent by the caches that lie between CPUs and memory, and by the
+memory coherence system that maintains the consistency of state in the system.
+
+As far as the way a CPU interacts with another part of the system through the
+caches goes, the memory system has to include the CPU's caches, and memory
+barriers for the most part act at the interface between the CPU and its cache
+(memory barriers logically act on the dotted line in the following diagram):
+
+ <--- CPU ---> : <----------- Memory ----------->
+ :
+ +--------+ +--------+ : +--------+ +-----------+
+ | | | | : | | | | +--------+
+ | CPU | | Memory | : | CPU | | | | |
+ | Core |--->| Access |----->| Cache |<-->| | | |
+ | | | Queue | : | | | |--->| Memory |
+ | | | | : | | | | | |
+ +--------+ +--------+ : +--------+ | | | |
+ : | Cache | +--------+
+ : | Coherency |
+ : | Mechanism | +--------+
+ +--------+ +--------+ : +--------+ | | | |
+ | | | | : | | | | | |
+ | CPU | | Memory | : | CPU | | |--->| Device |
+ | Core |--->| Access |----->| Cache |<-->| | | |
+ | | | Queue | : | | | | | |
+ | | | | : | | | | +--------+
+ +--------+ +--------+ : +--------+ +-----------+
+ :
+ :
+
+Although any particular load or store may not actually appear outside of the
+CPU that issued it since it may have been satisfied within the CPU's own cache,
+it will still appear as if the full memory access had taken place as far as the
+other CPUs are concerned since the cache coherency mechanisms will migrate the
+cacheline over to the accessing CPU and propagate the effects upon conflict.
+
+The CPU core may execute instructions in any order it deems fit, provided the
+expected program causality appears to be maintained. Some of the instructions
+generate load and store operations which then go into the queue of memory
+accesses to be performed. The core may place these in the queue in any order
+it wishes, and continue execution until it is forced to wait for an instruction
+to complete.
+
+What memory barriers are concerned with is controlling the order in which
+accesses cross from the CPU side of things to the memory side of things, and
+the order in which the effects are perceived to happen by the other observers
+in the system.
+
+[!] Memory barriers are _not_ needed within a given CPU, as CPUs always see
+their own loads and stores as if they had happened in program order.
+
+[!] MMIO or other device accesses may bypass the cache system. This depends on
+the properties of the memory window through which devices are accessed and/or
+the use of any special device communication instructions the CPU may have.
+
+
+CACHE COHERENCY
+---------------
+
+Life isn't quite as simple as it may appear above, however: for while the
+caches are expected to be coherent, there's no guarantee that that coherency
+will be ordered. This means that whilst changes made on one CPU will
+eventually become visible on all CPUs, there's no guarantee that they will
+become apparent in the same order on those other CPUs.
+
+
+Consider dealing with a system that has a pair of CPUs (1 & 2), each of which
+has a pair of parallel data caches (CPU 1 has A/B, and CPU 2 has C/D):
+
+ :
+ : +--------+
+ : +---------+ | |
+ +--------+ : +--->| Cache A |<------->| |
+ | | : | +---------+ | |
+ | CPU 1 |<---+ | |
+ | | : | +---------+ | |
+ +--------+ : +--->| Cache B |<------->| |
+ : +---------+ | |
+ : | Memory |
+ : +---------+ | System |
+ +--------+ : +--->| Cache C |<------->| |
+ | | : | +---------+ | |
+ | CPU 2 |<---+ | |
+ | | : | +---------+ | |
+ +--------+ : +--->| Cache D |<------->| |
+ : +---------+ | |
+ : +--------+
+ :
+
+Imagine the system has the following properties:
+
+ (*) an odd-numbered cache line may be in cache A, cache C or it may still be
+ resident in memory;
+
+ (*) an even-numbered cache line may be in cache B, cache D or it may still be
+ resident in memory;
+
+ (*) whilst the CPU core is interrogating one cache, the other cache may be
+ making use of the bus to access the rest of the system - perhaps to
+ displace a dirty cacheline or to do a speculative load;
+
+ (*) each cache has a queue of operations that need to be applied to that cache
+ to maintain coherency with the rest of the system;
+
+ (*) the coherency queue is not flushed by normal loads to lines already
+ present in the cache, even though the contents of the queue may
+ potentially affect those loads.
+
+Imagine, then, that two writes are made on the first CPU, with a write barrier
+between them to guarantee that they will appear to reach that CPU's caches in
+the requisite order:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb(); Make sure change to v is visible before
+ change to p
+ <A:modify v=2> v is now in cache A exclusively
+ p = &v;
+ <B:modify p=&v> p is now in cache B exclusively
+
+The write memory barrier forces the other CPUs in the system to perceive that
+the local CPU's caches have apparently been updated in the correct order. But
+now imagine that the second CPU wants to read those values:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ ...
+ q = p;
+ x = *q;
+
+The above pair of reads may then fail to happen in the expected order, as the
+cacheline holding p may get updated in one of the second CPU's caches whilst
+the update to the cacheline holding v is delayed in the other of the second
+CPU's caches by some other cache event:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb();
+ <A:modify v=2> <C:busy>
+ <C:queue v=2>
+ p = &v; q = p;
+ <D:request p>
+ <B:modify p=&v> <D:commit p=&v>
+ <D:read p>
+ x = *q;
+ <C:read *q> Reads from v before v updated in cache
+ <C:unbusy>
+ <C:commit v=2>
+
+Basically, whilst both cachelines will be updated on CPU 2 eventually, there's
+no guarantee that, without intervention, the order of update will be the same
+as that committed on CPU 1.
+
+
+To intervene, we need to interpolate a data dependency barrier or a read
+barrier between the loads. This will force the cache to commit its coherency
+queue before processing any further requests:
+
+ CPU 1 CPU 2 COMMENT
+ =============== =============== =======================================
+ u == 0, v == 1 and p == &u, q == &u
+ v = 2;
+ smp_wmb();
+ <A:modify v=2> <C:busy>
+ <C:queue v=2>
+ p = &v; q = p;
+ <D:request p>
+ <B:modify p=&v> <D:commit p=&v>
+ <D:read p>
+ smp_read_barrier_depends()
+ <C:unbusy>
+ <C:commit v=2>
+ x = *q;
+ <C:read *q> Reads from v after v updated in cache
+
+
+This sort of problem can be encountered on DEC Alpha processors as they have a
+split cache that improves performance by making better use of the data bus.
+Whilst most CPUs do imply a data dependency barrier on the read when a memory
+access depends on a read, not all do, so it may not be relied on.
+
+Other CPUs may also have split caches, but must coordinate between the various
+cachelets for normal memory accesses. The semantics of the Alpha removes the
+need for coordination in the absence of memory barriers.
+
+
+CACHE COHERENCY VS DMA
+----------------------
+
+Not all systems maintain cache coherency with respect to devices doing DMA. In
+such cases, a device attempting DMA may obtain stale data from RAM because
+dirty cache lines may be resident in the caches of various CPUs, and may not
+have been written back to RAM yet. To deal with this, the appropriate part of
+the kernel must flush the overlapping bits of cache on each CPU (and maybe
+invalidate them as well).
+
+In addition, the data DMA'd to RAM by a device may be overwritten by dirty
+cache lines being written back to RAM from a CPU's cache after the device has
+installed its own data, or cache lines present in the CPU's cache may simply
+obscure the fact that RAM has been updated, until at such time as the cacheline
+is discarded from the CPU's cache and reloaded. To deal with this, the
+appropriate part of the kernel must invalidate the overlapping bits of the
+cache on each CPU.
+
+See Documentation/cachetlb.txt for more information on cache management.
+
+
+CACHE COHERENCY VS MMIO
+-----------------------
+
+Memory mapped I/O usually takes place through memory locations that are part of
+a window in the CPU's memory space that has different properties assigned than
+the usual RAM directed window.
+
+Amongst these properties is usually the fact that such accesses bypass the
+caching entirely and go directly to the device buses. This means MMIO accesses
+may, in effect, overtake accesses to cached memory that were emitted earlier.
+A memory barrier isn't sufficient in such a case, but rather the cache must be
+flushed between the cached memory write and the MMIO access if the two are in
+any way dependent.
+
+
+=========================
+THE THINGS CPUS GET UP TO
+=========================
+
+A programmer might take it for granted that the CPU will perform memory
+operations in exactly the order specified, so that if the CPU is, for example,
+given the following piece of code to execute:
+
+ a = ACCESS_ONCE(*A);
+ ACCESS_ONCE(*B) = b;
+ c = ACCESS_ONCE(*C);
+ d = ACCESS_ONCE(*D);
+ ACCESS_ONCE(*E) = e;
+
+they would then expect that the CPU will complete the memory operation for each
+instruction before moving on to the next one, leading to a definite sequence of
+operations as seen by external observers in the system:
+
+ LOAD *A, STORE *B, LOAD *C, LOAD *D, STORE *E.
+
+
+Reality is, of course, much messier. With many CPUs and compilers, the above
+assumption doesn't hold because:
+
+ (*) loads are more likely to need to be completed immediately to permit
+ execution progress, whereas stores can often be deferred without a
+ problem;
+
+ (*) loads may be done speculatively, and the result discarded should it prove
+ to have been unnecessary;
+
+ (*) loads may be done speculatively, leading to the result having been fetched
+ at the wrong time in the expected sequence of events;
+
+ (*) the order of the memory accesses may be rearranged to promote better use
+ of the CPU buses and caches;
+
+ (*) loads and stores may be combined to improve performance when talking to
+ memory or I/O hardware that can do batched accesses of adjacent locations,
+ thus cutting down on transaction setup costs (memory and PCI devices may
+ both be able to do this); and
+
+ (*) the CPU's data cache may affect the ordering, and whilst cache-coherency
+ mechanisms may alleviate this - once the store has actually hit the cache
+ - there's no guarantee that the coherency management will be propagated in
+ order to other CPUs.
+
+So what another CPU, say, might actually observe from the above piece of code
+is:
+
+ LOAD *A, ..., LOAD {*C,*D}, STORE *E, STORE *B
+
+ (Where "LOAD {*C,*D}" is a combined load)
+
+
+However, it is guaranteed that a CPU will be self-consistent: it will see its
+_own_ accesses appear to be correctly ordered, without the need for a memory
+barrier. For instance with the following code:
+
+ U = ACCESS_ONCE(*A);
+ ACCESS_ONCE(*A) = V;
+ ACCESS_ONCE(*A) = W;
+ X = ACCESS_ONCE(*A);
+ ACCESS_ONCE(*A) = Y;
+ Z = ACCESS_ONCE(*A);
+
+and assuming no intervention by an external influence, it can be assumed that
+the final result will appear to be:
+
+ U == the original value of *A
+ X == W
+ Z == Y
+ *A == Y
+
+The code above may cause the CPU to generate the full sequence of memory
+accesses:
+
+ U=LOAD *A, STORE *A=V, STORE *A=W, X=LOAD *A, STORE *A=Y, Z=LOAD *A
+
+in that order, but, without intervention, the sequence may have almost any
+combination of elements combined or discarded, provided the program's view of
+the world remains consistent. Note that ACCESS_ONCE() is -not- optional
+in the above example, as there are architectures where a given CPU might
+reorder successive loads to the same location. On such architectures,
+ACCESS_ONCE() does whatever is necessary to prevent this, for example, on
+Itanium the volatile casts used by ACCESS_ONCE() cause GCC to emit the
+special ld.acq and st.rel instructions that prevent such reordering.
+
+The compiler may also combine, discard or defer elements of the sequence before
+the CPU even sees them.
+
+For instance:
+
+ *A = V;
+ *A = W;
+
+may be reduced to:
+
+ *A = W;
+
+since, without either a write barrier or an ACCESS_ONCE(), it can be
+assumed that the effect of the storage of V to *A is lost. Similarly:
+
+ *A = Y;
+ Z = *A;
+
+may, without a memory barrier or an ACCESS_ONCE(), be reduced to:
+
+ *A = Y;
+ Z = Y;
+
+and the LOAD operation never appear outside of the CPU.
+
+
+AND THEN THERE'S THE ALPHA
+--------------------------
+
+The DEC Alpha CPU is one of the most relaxed CPUs there is. Not only that,
+some versions of the Alpha CPU have a split data cache, permitting them to have
+two semantically-related cache lines updated at separate times. This is where
+the data dependency barrier really becomes necessary as this synchronises both
+caches with the memory coherence system, thus making it seem like pointer
+changes vs new data occur in the right order.
+
+The Alpha defines the Linux kernel's memory barrier model.
+
+See the subsection on "Cache Coherency" above.
+
+
+============
+EXAMPLE USES
+============
+
+CIRCULAR BUFFERS
+----------------
+
+Memory barriers can be used to implement circular buffering without the need
+of a lock to serialise the producer with the consumer. See:
+
+ Documentation/circular-buffers.txt
+
+for details.
+
+
+==========
+REFERENCES
+==========
+
+Alpha AXP Architecture Reference Manual, Second Edition (Sites & Witek,
+Digital Press)
+ Chapter 5.2: Physical Address Space Characteristics
+ Chapter 5.4: Caches and Write Buffers
+ Chapter 5.5: Data Sharing
+ Chapter 5.6: Read/Write Ordering
+
+AMD64 Architecture Programmer's Manual Volume 2: System Programming
+ Chapter 7.1: Memory-Access Ordering
+ Chapter 7.4: Buffering and Combining Memory Writes
+
+IA-32 Intel Architecture Software Developer's Manual, Volume 3:
+System Programming Guide
+ Chapter 7.1: Locked Atomic Operations
+ Chapter 7.2: Memory Ordering
+ Chapter 7.4: Serializing Instructions
+
+The SPARC Architecture Manual, Version 9
+ Chapter 8: Memory Models
+ Appendix D: Formal Specification of the Memory Models
+ Appendix J: Programming with the Memory Models
+
+UltraSPARC Programmer Reference Manual
+ Chapter 5: Memory Accesses and Cacheability
+ Chapter 15: Sparc-V9 Memory Models
+
+UltraSPARC III Cu User's Manual
+ Chapter 9: Memory Models
+
+UltraSPARC IIIi Processor User's Manual
+ Chapter 8: Memory Models
+
+UltraSPARC Architecture 2005
+ Chapter 9: Memory
+ Appendix D: Formal Specifications of the Memory Models
+
+UltraSPARC T1 Supplement to the UltraSPARC Architecture 2005
+ Chapter 8: Memory Models
+ Appendix F: Caches and Cache Coherency
+
+Solaris Internals, Core Kernel Architecture, p63-68:
+ Chapter 3.3: Hardware Considerations for Locks and
+ Synchronization
+
+Unix Systems for Modern Architectures, Symmetric Multiprocessing and Caching
+for Kernel Programmers:
+ Chapter 13: Other Memory Models
+
+Intel Itanium Architecture Software Developer's Manual: Volume 1:
+ Section 2.6: Speculation
+ Section 4.4: Memory Access
diff --git a/Documentation/memory-devices/ti-emif.txt b/Documentation/memory-devices/ti-emif.txt
new file mode 100644
index 000000000..f4ad9a7d0
--- /dev/null
+++ b/Documentation/memory-devices/ti-emif.txt
@@ -0,0 +1,57 @@
+TI EMIF SDRAM Controller Driver:
+
+Author
+========
+Aneesh V <aneesh@ti.com>
+
+Location
+============
+driver/memory/emif.c
+
+Supported SoCs:
+===================
+TI OMAP44xx
+TI OMAP54xx
+
+Menuconfig option:
+==========================
+Device Drivers
+ Memory devices
+ Texas Instruments EMIF driver
+
+Description
+===========
+This driver is for the EMIF module available in Texas Instruments
+SoCs. EMIF is an SDRAM controller that, based on its revision,
+supports one or more of DDR2, DDR3, and LPDDR2 SDRAM protocols.
+This driver takes care of only LPDDR2 memories presently. The
+functions of the driver includes re-configuring AC timing
+parameters and other settings during frequency, voltage and
+temperature changes
+
+Platform Data (see include/linux/platform_data/emif_plat.h):
+=====================================================================
+DDR device details and other board dependent and SoC dependent
+information can be passed through platform data (struct emif_platform_data)
+- DDR device details: 'struct ddr_device_info'
+- Device AC timings: 'struct lpddr2_timings' and 'struct lpddr2_min_tck'
+- Custom configurations: customizable policy options through
+ 'struct emif_custom_configs'
+- IP revision
+- PHY type
+
+Interface to the external world:
+================================
+EMIF driver registers notifiers for voltage and frequency changes
+affecting EMIF and takes appropriate actions when these are invoked.
+- freq_pre_notify_handling()
+- freq_post_notify_handling()
+- volt_notify_handling()
+
+Debugfs
+========
+The driver creates two debugfs entries per device.
+- regcache_dump : dump of register values calculated and saved for all
+ frequencies used so far.
+- mr4 : last polled value of MR4 register in the LPDDR2 device. MR4
+ indicates the current temperature level of the device.
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
new file mode 100644
index 000000000..ce2cfcf35
--- /dev/null
+++ b/Documentation/memory-hotplug.txt
@@ -0,0 +1,450 @@
+==============
+Memory Hotplug
+==============
+
+Created: Jul 28 2007
+Add description of notifier of memory hotplug Oct 11 2007
+
+This document is about memory hotplug including how-to-use and current status.
+Because Memory Hotplug is still under development, contents of this text will
+be changed often.
+
+1. Introduction
+ 1.1 purpose of memory hotplug
+ 1.2. Phases of memory hotplug
+ 1.3. Unit of Memory online/offline operation
+2. Kernel Configuration
+3. sysfs files for memory hotplug
+4. Physical memory hot-add phase
+ 4.1 Hardware(Firmware) Support
+ 4.2 Notify memory hot-add event by hand
+5. Logical Memory hot-add phase
+ 5.1. State of memory
+ 5.2. How to online memory
+6. Logical memory remove
+ 6.1 Memory offline and ZONE_MOVABLE
+ 6.2. How to offline memory
+7. Physical memory remove
+8. Memory hotplug event notifier
+9. Future Work List
+
+Note(1): x86_64's has special implementation for memory hotplug.
+ This text does not describe it.
+Note(2): This text assumes that sysfs is mounted at /sys.
+
+
+---------------
+1. Introduction
+---------------
+
+1.1 purpose of memory hotplug
+------------
+Memory Hotplug allows users to increase/decrease the amount of memory.
+Generally, there are two purposes.
+
+(A) For changing the amount of memory.
+ This is to allow a feature like capacity on demand.
+(B) For installing/removing DIMMs or NUMA-nodes physically.
+ This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
+
+(A) is required by highly virtualized environments and (B) is required by
+hardware which supports memory power management.
+
+Linux memory hotplug is designed for both purpose.
+
+
+1.2. Phases of memory hotplug
+---------------
+There are 2 phases in Memory Hotplug.
+ 1) Physical Memory Hotplug phase
+ 2) Logical Memory Hotplug phase.
+
+The First phase is to communicate hardware/firmware and make/erase
+environment for hotplugged memory. Basically, this phase is necessary
+for the purpose (B), but this is good phase for communication between
+highly virtualized environments too.
+
+When memory is hotplugged, the kernel recognizes new memory, makes new memory
+management tables, and makes sysfs files for new memory's operation.
+
+If firmware supports notification of connection of new memory to OS,
+this phase is triggered automatically. ACPI can notify this event. If not,
+"probe" operation by system administration is used instead.
+(see Section 4.).
+
+Logical Memory Hotplug phase is to change memory state into
+available/unavailable for users. Amount of memory from user's view is
+changed by this phase. The kernel makes all memory in it as free pages
+when a memory range is available.
+
+In this document, this phase is described as online/offline.
+
+Logical Memory Hotplug phase is triggered by write of sysfs file by system
+administrator. For the hot-add case, it must be executed after Physical Hotplug
+phase by hand.
+(However, if you writes udev's hotplug scripts for memory hotplug, these
+ phases can be execute in seamless way.)
+
+
+1.3. Unit of Memory online/offline operation
+------------
+Memory hotplug uses SPARSEMEM memory model which allows memory to be divided
+into chunks of the same size. These chunks are called "sections". The size of
+a memory section is architecture dependent. For example, power uses 16MiB, ia64
+uses 1GiB.
+
+Memory sections are combined into chunks referred to as "memory blocks". The
+size of a memory block is architecture dependent and represents the logical
+unit upon which memory online/offline operations are to be performed. The
+default size of a memory block is the same as memory section size unless an
+architecture specifies otherwise. (see Section 3.)
+
+To determine the size (in bytes) of a memory block please read this file:
+
+/sys/devices/system/memory/block_size_bytes
+
+
+-----------------------
+2. Kernel Configuration
+-----------------------
+To use memory hotplug feature, kernel must be compiled with following
+config options.
+
+- For all memory hotplug
+ Memory model -> Sparse Memory (CONFIG_SPARSEMEM)
+ Allow for memory hot-add (CONFIG_MEMORY_HOTPLUG)
+
+- To enable memory removal, the followings are also necessary
+ Allow for memory hot remove (CONFIG_MEMORY_HOTREMOVE)
+ Page Migration (CONFIG_MIGRATION)
+
+- For ACPI memory hotplug, the followings are also necessary
+ Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
+ This option can be kernel module.
+
+- As a related configuration, if your box has a feature of NUMA-node hotplug
+ via ACPI, then this option is necessary too.
+ ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
+ (CONFIG_ACPI_CONTAINER).
+ This option can be kernel module too.
+
+
+--------------------------------
+3 sysfs files for memory hotplug
+--------------------------------
+All memory blocks have their device information in sysfs. Each memory block
+is described under /sys/devices/system/memory as
+
+/sys/devices/system/memory/memoryXXX
+(XXX is the memory block id.)
+
+For the memory block covered by the sysfs directory. It is expected that all
+memory sections in this range are present and no memory holes exist in the
+range. Currently there is no way to determine if there is a memory hole, but
+the existence of one should not affect the hotplug capabilities of the memory
+block.
+
+For example, assume 1GiB memory block size. A device for a memory starting at
+0x100000000 is /sys/device/system/memory/memory4
+(0x100000000 / 1Gib = 4)
+This device covers address range [0x100000000 ... 0x140000000)
+
+Under each memory block, you can see 5 files:
+
+/sys/devices/system/memory/memoryXXX/phys_index
+/sys/devices/system/memory/memoryXXX/phys_device
+/sys/devices/system/memory/memoryXXX/state
+/sys/devices/system/memory/memoryXXX/removable
+/sys/devices/system/memory/memoryXXX/valid_zones
+
+'phys_index' : read-only and contains memory block id, same as XXX.
+'state' : read-write
+ at read: contains online/offline state of memory.
+ at write: user can specify "online_kernel",
+ "online_movable", "online", "offline" command
+ which will be performed on all sections in the block.
+'phys_device' : read-only: designed to show the name of physical memory
+ device. This is not well implemented now.
+'removable' : read-only: contains an integer value indicating
+ whether the memory block is removable or not
+ removable. A value of 1 indicates that the memory
+ block is removable and a value of 0 indicates that
+ it is not removable. A memory block is removable only if
+ every section in the block is removable.
+'valid_zones' : read-only: designed to show which zones this memory block
+ can be onlined to.
+ The first column shows it's default zone.
+ "memory6/valid_zones: Normal Movable" shows this memoryblock
+ can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE
+ by online_movable.
+ "memory7/valid_zones: Movable Normal" shows this memoryblock
+ can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL
+ by online_kernel.
+
+NOTE:
+ These directories/files appear after physical memory hotplug phase.
+
+If CONFIG_NUMA is enabled the memoryXXX/ directories can also be accessed
+via symbolic links located in the /sys/devices/system/node/node* directories.
+
+For example:
+/sys/devices/system/node/node0/memory9 -> ../../memory/memory9
+
+A backlink will also be created:
+/sys/devices/system/memory/memory9/node0 -> ../../node/node0
+
+
+--------------------------------
+4. Physical memory hot-add phase
+--------------------------------
+
+4.1 Hardware(Firmware) Support
+------------
+On x86_64/ia64 platform, memory hotplug by ACPI is supported.
+
+In general, the firmware (ACPI) which supports memory hotplug defines
+memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
+Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
+script. This will be done automatically.
+
+But scripts for memory hotplug are not contained in generic udev package(now).
+You may have to write it by yourself or online/offline memory by hand.
+Please see "How to online memory", "How to offline memory" in this text.
+
+If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
+"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
+calls hotplug code for all of objects which are defined in it.
+If memory device is found, memory hotplug code will be called.
+
+
+4.2 Notify memory hot-add event by hand
+------------
+On some architectures, the firmware may not notify the kernel of a memory
+hotplug event. Therefore, the memory "probe" interface is supported to
+explicitly notify the kernel. This interface depends on
+CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86
+if hotplug is supported, although for x86 this should be handled by ACPI
+notification.
+
+Probe interface is located at
+/sys/devices/system/memory/probe
+
+You can tell the physical address of new memory to the kernel by
+
+% echo start_address_of_new_memory > /sys/devices/system/memory/probe
+
+Then, [start_address_of_new_memory, start_address_of_new_memory +
+memory_block_size] memory range is hot-added. In this case, hotplug script is
+not called (in current implementation). You'll have to online memory by
+yourself. Please see "How to online memory" in this text.
+
+
+------------------------------
+5. Logical Memory hot-add phase
+------------------------------
+
+5.1. State of memory
+------------
+To see (online/offline) state of a memory block, read 'state' file.
+
+% cat /sys/device/system/memory/memoryXXX/state
+
+
+If the memory block is online, you'll read "online".
+If the memory block is offline, you'll read "offline".
+
+
+5.2. How to online memory
+------------
+Even if the memory is hot-added, it is not at ready-to-use state.
+For using newly added memory, you have to "online" the memory block.
+
+For onlining, you have to write "online" to the memory block's state file as:
+
+% echo online > /sys/devices/system/memory/memoryXXX/state
+
+This onlining will not change the ZONE type of the target memory block,
+If the memory block is in ZONE_NORMAL, you can change it to ZONE_MOVABLE:
+
+% echo online_movable > /sys/devices/system/memory/memoryXXX/state
+(NOTE: current limit: this memory block must be adjacent to ZONE_MOVABLE)
+
+And if the memory block is in ZONE_MOVABLE, you can change it to ZONE_NORMAL:
+
+% echo online_kernel > /sys/devices/system/memory/memoryXXX/state
+(NOTE: current limit: this memory block must be adjacent to ZONE_NORMAL)
+
+After this, memory block XXX's state will be 'online' and the amount of
+available memory will be increased.
+
+Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
+This may be changed in future.
+
+
+
+------------------------
+6. Logical memory remove
+------------------------
+
+6.1 Memory offline and ZONE_MOVABLE
+------------
+Memory offlining is more complicated than memory online. Because memory offline
+has to make the whole memory block be unused, memory offline can fail if
+the memory block includes memory which cannot be freed.
+
+In general, memory offline can use 2 techniques.
+
+(1) reclaim and free all memory in the memory block.
+(2) migrate all pages in the memory block.
+
+In the current implementation, Linux's memory offline uses method (2), freeing
+all pages in the memory block by page migration. But not all pages are
+migratable. Under current Linux, migratable pages are anonymous pages and
+page caches. For offlining a memory block by migration, the kernel has to
+guarantee that the memory block contains only migratable pages.
+
+Now, a boot option for making a memory block which consists of migratable pages
+is supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
+create ZONE_MOVABLE...a zone which is just used for movable pages.
+(See also Documentation/kernel-parameters.txt)
+
+Assume the system has "TOTAL" amount of memory at boot time, this boot option
+creates ZONE_MOVABLE as following.
+
+1) When kernelcore=YYYY boot option is used,
+ Size of memory not for movable pages (not for offline) is YYYY.
+ Size of memory for movable pages (for offline) is TOTAL-YYYY.
+
+2) When movablecore=ZZZZ boot option is used,
+ Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
+ Size of memory for movable pages (for offline) is ZZZZ.
+
+
+Note: Unfortunately, there is no information to show which memory block belongs
+to ZONE_MOVABLE. This is TBD.
+
+
+6.2. How to offline memory
+------------
+You can offline a memory block by using the same sysfs interface that was used
+in memory onlining.
+
+% echo offline > /sys/devices/system/memory/memoryXXX/state
+
+If offline succeeds, the state of the memory block is changed to be "offline".
+If it fails, some error core (like -EBUSY) will be returned by the kernel.
+Even if a memory block does not belong to ZONE_MOVABLE, you can try to offline
+it. If it doesn't contain 'unmovable' memory, you'll get success.
+
+A memory block under ZONE_MOVABLE is considered to be able to be offlined
+easily. But under some busy state, it may return -EBUSY. Even if a memory
+block cannot be offlined due to -EBUSY, you can retry offlining it and may be
+able to offline it (or not). (For example, a page is referred to by some kernel
+internal call and released soon.)
+
+Consideration:
+Memory hotplug's design direction is to make the possibility of memory offlining
+higher and to guarantee unplugging memory under any situation. But it needs
+more work. Returning -EBUSY under some situation may be good because the user
+can decide to retry more or not by himself. Currently, memory offlining code
+does some amount of retry with 120 seconds timeout.
+
+-------------------------
+7. Physical memory remove
+-------------------------
+Need more implementation yet....
+ - Notification completion of remove works by OS to firmware.
+ - Guard from remove if not yet.
+
+--------------------------------
+8. Memory hotplug event notifier
+--------------------------------
+Hotplugging events are sent to a notification queue.
+
+There are six types of notification defined in include/linux/memory.h:
+
+MEM_GOING_ONLINE
+ Generated before new memory becomes available in order to be able to
+ prepare subsystems to handle memory. The page allocator is still unable
+ to allocate from the new memory.
+
+MEM_CANCEL_ONLINE
+ Generated if MEMORY_GOING_ONLINE fails.
+
+MEM_ONLINE
+ Generated when memory has successfully brought online. The callback may
+ allocate pages from the new memory.
+
+MEM_GOING_OFFLINE
+ Generated to begin the process of offlining memory. Allocations are no
+ longer possible from the memory but some of the memory to be offlined
+ is still in use. The callback can be used to free memory known to a
+ subsystem from the indicated memory block.
+
+MEM_CANCEL_OFFLINE
+ Generated if MEMORY_GOING_OFFLINE fails. Memory is available again from
+ the memory block that we attempted to offline.
+
+MEM_OFFLINE
+ Generated after offlining memory is complete.
+
+A callback routine can be registered by calling
+
+ hotplug_memory_notifier(callback_func, priority)
+
+Callback functions with higher values of priority are called before callback
+functions with lower values.
+
+A callback function must have the following prototype:
+
+ int callback_func(
+ struct notifier_block *self, unsigned long action, void *arg);
+
+The first argument of the callback function (self) is a pointer to the block
+of the notifier chain that points to the callback function itself.
+The second argument (action) is one of the event types described above.
+The third argument (arg) passes a pointer of struct memory_notify.
+
+struct memory_notify {
+ unsigned long start_pfn;
+ unsigned long nr_pages;
+ int status_change_nid_normal;
+ int status_change_nid_high;
+ int status_change_nid;
+}
+
+start_pfn is start_pfn of online/offline memory.
+nr_pages is # of pages of online/offline memory.
+status_change_nid_normal is set node id when N_NORMAL_MEMORY of nodemask
+is (will be) set/clear, if this is -1, then nodemask status is not changed.
+status_change_nid_high is set node id when N_HIGH_MEMORY of nodemask
+is (will be) set/clear, if this is -1, then nodemask status is not changed.
+status_change_nid is set node id when N_MEMORY of nodemask is (will be)
+set/clear. It means a new(memoryless) node gets new memory by online and a
+node loses all memory. If this is -1, then nodemask status is not changed.
+If status_changed_nid* >= 0, callback should create/discard structures for the
+node if necessary.
+
+The callback routine shall return one of the values
+NOTIFY_DONE, NOTIFY_OK, NOTIFY_BAD, NOTIFY_STOP
+defined in include/linux/notifier.h
+
+NOTIFY_DONE and NOTIFY_OK have no effect on the further processing.
+
+NOTIFY_BAD is used as response to the MEM_GOING_ONLINE, MEM_GOING_OFFLINE,
+MEM_ONLINE, or MEM_OFFLINE action to cancel hotplugging. It stops
+further processing of the notification queue.
+
+NOTIFY_STOP stops further processing of the notification queue.
+
+--------------
+9. Future Work
+--------------
+ - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
+ sysctl or new control file.
+ - showing memory block and physical device relationship.
+ - test and make it better memory offlining.
+ - support HugeTLB page migration and offlining.
+ - memmap removing at memory offline.
+ - physical remove memory.
+
diff --git a/Documentation/metag/00-INDEX b/Documentation/metag/00-INDEX
new file mode 100644
index 000000000..db11c513b
--- /dev/null
+++ b/Documentation/metag/00-INDEX
@@ -0,0 +1,4 @@
+00-INDEX
+ - this file
+kernel-ABI.txt
+ - Documents metag ABI details
diff --git a/Documentation/metag/kernel-ABI.txt b/Documentation/metag/kernel-ABI.txt
new file mode 100644
index 000000000..628216603
--- /dev/null
+++ b/Documentation/metag/kernel-ABI.txt
@@ -0,0 +1,256 @@
+ ==========================
+ KERNEL ABIS FOR METAG ARCH
+ ==========================
+
+This document describes the Linux ABIs for the metag architecture, and has the
+following sections:
+
+ (*) Outline of registers
+ (*) Userland registers
+ (*) Kernel registers
+ (*) System call ABI
+ (*) Calling conventions
+
+
+====================
+OUTLINE OF REGISTERS
+====================
+
+The main Meta core registers are arranged in units:
+
+ UNIT Type DESCRIPTION GP EXT PRIV GLOBAL
+ ======= ======= =============== ======= ======= ======= =======
+ CT Special Control unit
+ D0 General Data unit 0 0-7 8-15 16-31 16-31
+ D1 General Data unit 1 0-7 8-15 16-31 16-31
+ A0 General Address unit 0 0-3 4-7 8-15 8-15
+ A1 General Address unit 1 0-3 4-7 8-15 8-15
+ PC Special PC unit 0 1
+ PORT Special Ports
+ TR Special Trigger unit 0-7
+ TT Special Trace unit 0-5
+ FX General FP unit 0-15
+
+GP registers form part of the main context.
+
+Extended context registers (EXT) may not be present on all hardware threads and
+can be context switched if support is enabled and the appropriate bits are set
+in e.g. the D0.8 register to indicate what extended state to preserve.
+
+Global registers are shared between threads and are privilege protected.
+
+See arch/metag/include/asm/metag_regs.h for definitions relating to core
+registers and the fields and bits they contain. See the TRMs for further details
+about special registers.
+
+Several special registers are preserved in the main context, these are the
+interesting ones:
+
+ REG (ALIAS) PURPOSE
+ ======================= ===============================================
+ CT.1 (TXMODE) Processor mode bits (particularly for DSP)
+ CT.2 (TXSTATUS) Condition flags and LSM_STEP (MGET/MSET step)
+ CT.3 (TXRPT) Branch repeat counter
+ PC.0 (PC) Program counter
+
+Some of the general registers have special purposes in the ABI and therefore
+have aliases:
+
+ D0 REG (ALIAS) PURPOSE D1 REG (ALIAS) PURPOSE
+ =============== =============== =============== =======================
+ D0.0 (D0Re0) 32bit result D1.0 (D1Re0) Top half of 64bit result
+ D0.1 (D0Ar6) Argument 6 D1.1 (D1Ar5) Argument 5
+ D0.2 (D0Ar4) Argument 4 D1.2 (D1Ar3) Argument 3
+ D0.3 (D0Ar2) Argument 2 D1.3 (D1Ar1) Argument 1
+ D0.4 (D0FrT) Frame temp D1.4 (D1RtP) Return pointer
+ D0.5 Call preserved D1.5 Call preserved
+ D0.6 Call preserved D1.6 Call preserved
+ D0.7 Call preserved D1.7 Call preserved
+
+ A0 REG (ALIAS) PURPOSE A1 REG (ALIAS) PURPOSE
+ =============== =============== =============== =======================
+ A0.0 (A0StP) Stack pointer A1.0 (A1GbP) Global base pointer
+ A0.1 (A0FrP) Frame pointer A1.1 (A1LbP) Local base pointer
+ A0.2 A1.2
+ A0.3 A1.3
+
+
+==================
+USERLAND REGISTERS
+==================
+
+All the general purpose D0, D1, A0, A1 registers are preserved when entering the
+kernel (including asynchronous events such as interrupts and timer ticks) except
+the following which have special purposes in the ABI:
+
+ REGISTERS WHEN STATUS PURPOSE
+ =============== ======= =============== ===============================
+ D0.8 DSP Preserved ECH, determines what extended
+ DSP state to preserve.
+ A0.0 (A0StP) ALWAYS Preserved Stack >= A0StP may be clobbered
+ at any time by the creation of a
+ signal frame.
+ A1.0 (A1GbP) SMP Clobbered Used as temporary for loading
+ kernel stack pointer and saving
+ core context.
+ A0.15 !SMP Protected Stores kernel stack pointer.
+ A1.15 ALWAYS Protected Stores kernel base pointer.
+
+On UP A0.15 is used to store the kernel stack pointer for storing the userland
+context. A0.15 is global between hardware threads though which means it cannot
+be used on SMP for this purpose. Since no protected local registers are
+available A1GbP is reserved for use as a temporary to allow a percpu stack
+pointer to be loaded for storing the rest of the context.
+
+
+================
+KERNEL REGISTERS
+================
+
+When in the kernel the following registers have special purposes in the ABI:
+
+ REGISTERS WHEN STATUS PURPOSE
+ =============== ======= =============== ===============================
+ A0.0 (A0StP) ALWAYS Preserved Stack >= A0StP may be clobbered
+ at any time by the creation of
+ an irq signal frame.
+ A1.0 (A1GbP) ALWAYS Preserved Reserved (kernel base pointer).
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+When a system call is made, the following registers are effective:
+
+ REGISTERS CALL RETURN
+ =============== ======================= ===============================
+ D0.0 (D0Re0) Return value (or -errno)
+ D1.0 (D1Re0) System call number Clobbered
+ D0.1 (D0Ar6) Syscall arg #6 Preserved
+ D1.1 (D1Ar5) Syscall arg #5 Preserved
+ D0.2 (D0Ar4) Syscall arg #4 Preserved
+ D1.2 (D1Ar3) Syscall arg #3 Preserved
+ D0.3 (D0Ar2) Syscall arg #2 Preserved
+ D1.3 (D1Ar1) Syscall arg #1 Preserved
+
+Due to the limited number of argument registers and some system calls with badly
+aligned 64-bit arguments, 64-bit values are always packed in consecutive
+arguments, even if this is contrary to the normal calling conventions (where the
+two halves would go in a matching pair of data registers).
+
+For example fadvise64_64 usually has the signature:
+
+ long sys_fadvise64_64(i32 fd, i64 offs, i64 len, i32 advice);
+
+But for metag fadvise64_64 is wrapped so that the 64-bit arguments are packed:
+
+ long sys_fadvise64_64_metag(i32 fd, i32 offs_lo,
+ i32 offs_hi, i32 len_lo,
+ i32 len_hi, i32 advice)
+
+So the arguments are packed in the registers like this:
+
+ D0 REG (ALIAS) VALUE D1 REG (ALIAS) VALUE
+ =============== =============== =============== =======================
+ D0.1 (D0Ar6) advice D1.1 (D1Ar5) hi(len)
+ D0.2 (D0Ar4) lo(len) D1.2 (D1Ar3) hi(offs)
+ D0.3 (D0Ar2) lo(offs) D1.3 (D1Ar1) fd
+
+
+===================
+CALLING CONVENTIONS
+===================
+
+These calling conventions apply to both user and kernel code. The stack grows
+from low addresses to high addresses in the metag ABI. The stack pointer (A0StP)
+should always point to the next free address on the stack and should at all
+times be 64-bit aligned. The following registers are effective at the point of a
+call:
+
+ REGISTERS CALL RETURN
+ =============== ======================= ===============================
+ D0.0 (D0Re0) 32bit return value
+ D1.0 (D1Re0) Upper half of 64bit return value
+ D0.1 (D0Ar6) 32bit argument #6 Clobbered
+ D1.1 (D1Ar5) 32bit argument #5 Clobbered
+ D0.2 (D0Ar4) 32bit argument #4 Clobbered
+ D1.2 (D1Ar3) 32bit argument #3 Clobbered
+ D0.3 (D0Ar2) 32bit argument #2 Clobbered
+ D1.3 (D1Ar1) 32bit argument #1 Clobbered
+ D0.4 (D0FrT) Clobbered
+ D1.4 (D1RtP) Return pointer Clobbered
+ D{0-1}.{5-7} Preserved
+ A0.0 (A0StP) Stack pointer Preserved
+ A1.0 (A0GbP) Preserved
+ A0.1 (A0FrP) Frame pointer Preserved
+ A1.1 (A0LbP) Preserved
+ A{0-1},{2-3} Clobbered
+
+64-bit arguments are placed in matching pairs of registers (i.e. the same
+register number in both D0 and D1 units), with the least significant half in D0
+and the most significant half in D1, leaving a gap where necessary. Further
+arguments are stored on the stack in reverse order (earlier arguments at higher
+addresses):
+
+ ADDRESS 0 1 2 3 4 5 6 7
+ =============== ===== ===== ===== ===== ===== ===== ===== =====
+ A0StP -->
+ A0StP-0x08 32bit argument #8 32bit argument #7
+ A0StP-0x10 32bit argument #10 32bit argument #9
+
+Function prologues tend to look a bit like this:
+
+ /* If frame pointer in use, move it to frame temp register so it can be
+ easily pushed onto stack */
+ MOV D0FrT,A0FrP
+
+ /* If frame pointer in use, set it to stack pointer */
+ ADD A0FrP,A0StP,#0
+
+ /* Preserve D0FrT, D1RtP, D{0-1}.{5-7} on stack, incrementing A0StP */
+ MSETL [A0StP++],D0FrT,D0.5,D0.6,D0.7
+
+ /* Allocate some stack space for local variables */
+ ADD A0StP,A0StP,#0x10
+
+At this point the stack would look like this:
+
+ ADDRESS 0 1 2 3 4 5 6 7
+ =============== ===== ===== ===== ===== ===== ===== ===== =====
+ A0StP -->
+ A0StP-0x08
+ A0StP-0x10
+ A0StP-0x18 Old D0.7 Old D1.7
+ A0StP-0x20 Old D0.6 Old D1.6
+ A0StP-0x28 Old D0.5 Old D1.5
+ A0FrP --> Old A0FrP (frame ptr) Old D1RtP (return ptr)
+ A0FrP-0x08 32bit argument #8 32bit argument #7
+ A0FrP-0x10 32bit argument #10 32bit argument #9
+
+Function epilogues tend to differ depending on the use of a frame pointer. An
+example of a frame pointer epilogue:
+
+ /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack, incrementing A0FrP */
+ MGETL D0FrT,D0.5,D0.6,D0.7,[A0FrP++]
+ /* Restore stack pointer to where frame pointer was before increment */
+ SUB A0StP,A0FrP,#0x20
+ /* Restore frame pointer from frame temp */
+ MOV A0FrP,D0FrT
+ /* Return to caller via restored return pointer */
+ MOV PC,D1RtP
+
+If the function hasn't touched the frame pointer, MGETL cannot be safely used
+with A0StP as it always increments and that would expose the stack to clobbering
+by interrupts (kernel) or signals (user). Therefore it's common to see the MGETL
+split into separate GETL instructions:
+
+ /* Restore D0FrT, D1RtP, D{0-1}.{5-7} from stack */
+ GETL D0FrT,D1RtP,[A0StP+#-0x30]
+ GETL D0.5,D1.5,[A0StP+#-0x28]
+ GETL D0.6,D1.6,[A0StP+#-0x20]
+ GETL D0.7,D1.7,[A0StP+#-0x18]
+ /* Restore stack pointer */
+ SUB A0StP,A0StP,#0x30
+ /* Return to caller via restored return pointer */
+ MOV PC,D1RtP
diff --git a/Documentation/mic/Makefile b/Documentation/mic/Makefile
new file mode 100644
index 000000000..a191d453b
--- /dev/null
+++ b/Documentation/mic/Makefile
@@ -0,0 +1 @@
+subdir-y := mpssd
diff --git a/Documentation/mic/mic_overview.txt b/Documentation/mic/mic_overview.txt
new file mode 100644
index 000000000..77c541802
--- /dev/null
+++ b/Documentation/mic/mic_overview.txt
@@ -0,0 +1,66 @@
+An Intel MIC X100 device is a PCIe form factor add-in coprocessor
+card based on the Intel Many Integrated Core (MIC) architecture
+that runs a Linux OS. It is a PCIe endpoint in a platform and therefore
+implements the three required standard address spaces i.e. configuration,
+memory and I/O. The host OS loads a device driver as is typical for
+PCIe devices. The card itself runs a bootstrap after reset that
+transfers control to the card OS downloaded from the host driver. The
+host driver supports OSPM suspend and resume operations. It shuts down
+the card during suspend and reboots the card OS during resume.
+The card OS as shipped by Intel is a Linux kernel with modifications
+for the X100 devices.
+
+Since it is a PCIe card, it does not have the ability to host hardware
+devices for networking, storage and console. We provide these devices
+on X100 coprocessors thus enabling a self-bootable equivalent environment
+for applications. A key benefit of our solution is that it leverages
+the standard virtio framework for network, disk and console devices,
+though in our case the virtio framework is used across a PCIe bus.
+
+MIC PCIe card has a dma controller with 8 channels. These channels are
+shared between the host s/w and the card s/w. 0 to 3 are used by host
+and 4 to 7 by card. As the dma device doesn't show up as PCIe device,
+a virtual bus called mic bus is created and virtual dma devices are
+created on it by the host/card drivers. On host the channels are private
+and used only by the host driver to transfer data for the virtio devices.
+
+Here is a block diagram of the various components described above. The
+virtio backends are situated on the host rather than the card given better
+single threaded performance for the host compared to MIC, the ability of
+the host to initiate DMA's to/from the card using the MIC DMA engine and
+the fact that the virtio block storage backend can only be on the host.
+
+ |
+ +----------+ | +----------+
+ | Card OS | | | Host OS |
+ +----------+ | +----------+
+ |
+ +-------+ +--------+ +------+ | +---------+ +--------+ +--------+
+ | Virtio| |Virtio | |Virtio| | |Virtio | |Virtio | |Virtio |
+ | Net | |Console | |Block | | |Net | |Console | |Block |
+ | Driver| |Driver | |Driver| | |backend | |backend | |backend |
+ +-------+ +--------+ +------+ | +---------+ +--------+ +--------+
+ | | | | | | |
+ | | | |User | | |
+ | | | |------|------------|---------|-------
+ +-------------------+ |Kernel +--------------------------+
+ | | | Virtio over PCIe IOCTLs |
+ | | +--------------------------+
++-----------+ | | | +-----------+
+| MIC DMA | | | | | MIC DMA |
+| Driver | | | | | Driver |
++-----------+ | | | +-----------+
+ | | | | |
++---------------+ | | | +----------------+
+|MIC virtual Bus| | | | |MIC virtual Bus |
++---------------+ | | | +----------------+
+ | | | | |
+ | +--------------+ | +---------------+ |
+ | |Intel MIC | | |Intel MIC | |
+ +---|Card Driver | | |Host Driver | |
+ +--------------+ | +---------------+-----+
+ | | |
+ +-------------------------------------------------------------+
+ | |
+ | PCIe Bus |
+ +-------------------------------------------------------------+
diff --git a/Documentation/mic/mpssd/.gitignore b/Documentation/mic/mpssd/.gitignore
new file mode 100644
index 000000000..8b7c72f07
--- /dev/null
+++ b/Documentation/mic/mpssd/.gitignore
@@ -0,0 +1 @@
+mpssd
diff --git a/Documentation/mic/mpssd/Makefile b/Documentation/mic/mpssd/Makefile
new file mode 100644
index 000000000..f47fe6ba7
--- /dev/null
+++ b/Documentation/mic/mpssd/Makefile
@@ -0,0 +1,19 @@
+# List of programs to build
+hostprogs-$(CONFIG_X86_64) := mpssd
+
+mpssd-objs := mpssd.o sysfs.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS += -I$(objtree)/usr/include -I$(srctree)/tools/include
+
+ifdef DEBUG
+HOSTCFLAGS += -DDEBUG=$(DEBUG)
+endif
+
+HOSTLOADLIBES_mpssd := -lpthread
+
+install:
+ install mpssd /usr/sbin/mpssd
+ install micctrl /usr/sbin/micctrl
diff --git a/Documentation/mic/mpssd/micctrl b/Documentation/mic/mpssd/micctrl
new file mode 100755
index 000000000..8f2629b41
--- /dev/null
+++ b/Documentation/mic/mpssd/micctrl
@@ -0,0 +1,173 @@
+#!/bin/bash
+# Intel MIC Platform Software Stack (MPSS)
+#
+# Copyright(c) 2013 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Intel MIC User Space Tools.
+#
+# micctrl - Controls MIC boot/start/stop.
+#
+# chkconfig: 2345 95 05
+# description: start MPSS stack processing.
+#
+### BEGIN INIT INFO
+# Provides: micctrl
+### END INIT INFO
+
+# Source function library.
+. /etc/init.d/functions
+
+sysfs="/sys/class/mic"
+
+_status()
+{
+ f=$sysfs/$1
+ echo -e $1 state: "`cat $f/state`" shutdown_status: "`cat $f/shutdown_status`"
+}
+
+status()
+{
+ if [ "`echo $1 | head -c3`" == "mic" ]; then
+ _status $1
+ return $?
+ fi
+ for f in $sysfs/*
+ do
+ _status `basename $f`
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && return $RETVAL
+ done
+ return 0
+}
+
+_reset()
+{
+ f=$sysfs/$1
+ echo reset > $f/state
+}
+
+reset()
+{
+ if [ "`echo $1 | head -c3`" == "mic" ]; then
+ _reset $1
+ return $?
+ fi
+ for f in $sysfs/*
+ do
+ _reset `basename $f`
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && return $RETVAL
+ done
+ return 0
+}
+
+_boot()
+{
+ f=$sysfs/$1
+ echo "linux" > $f/bootmode
+ echo "mic/uos.img" > $f/firmware
+ echo "mic/$1.image" > $f/ramdisk
+ echo "boot" > $f/state
+}
+
+boot()
+{
+ if [ "`echo $1 | head -c3`" == "mic" ]; then
+ _boot $1
+ return $?
+ fi
+ for f in $sysfs/*
+ do
+ _boot `basename $f`
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && return $RETVAL
+ done
+ return 0
+}
+
+_shutdown()
+{
+ f=$sysfs/$1
+ echo shutdown > $f/state
+}
+
+shutdown()
+{
+ if [ "`echo $1 | head -c3`" == "mic" ]; then
+ _shutdown $1
+ return $?
+ fi
+ for f in $sysfs/*
+ do
+ _shutdown `basename $f`
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && return $RETVAL
+ done
+ return 0
+}
+
+_wait()
+{
+ f=$sysfs/$1
+ while [ "`cat $f/state`" != "offline" -a "`cat $f/state`" != "online" ]
+ do
+ sleep 1
+ echo -e "Waiting for $1 to go offline"
+ done
+}
+
+wait()
+{
+ if [ "`echo $1 | head -c3`" == "mic" ]; then
+ _wait $1
+ return $?
+ fi
+ # Wait for the cards to go offline
+ for f in $sysfs/*
+ do
+ _wait `basename $f`
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && return $RETVAL
+ done
+ return 0
+}
+
+if [ ! -d "$sysfs" ]; then
+ echo -e $"Module unloaded "
+ exit 3
+fi
+
+case $1 in
+ -s)
+ status $2
+ ;;
+ -r)
+ reset $2
+ ;;
+ -b)
+ boot $2
+ ;;
+ -S)
+ shutdown $2
+ ;;
+ -w)
+ wait $2
+ ;;
+ *)
+ echo $"Usage: $0 {-s (status) |-r (reset) |-b (boot) |-S (shutdown) |-w (wait)}"
+ exit 2
+esac
+
+exit $?
diff --git a/Documentation/mic/mpssd/mpss b/Documentation/mic/mpssd/mpss
new file mode 100755
index 000000000..cacbdb0ae
--- /dev/null
+++ b/Documentation/mic/mpssd/mpss
@@ -0,0 +1,202 @@
+#!/bin/bash
+# Intel MIC Platform Software Stack (MPSS)
+#
+# Copyright(c) 2013 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2, as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Intel MIC User Space Tools.
+#
+# mpss Start mpssd.
+#
+# chkconfig: 2345 95 05
+# description: start MPSS stack processing.
+#
+### BEGIN INIT INFO
+# Provides: mpss
+# Required-Start:
+# Required-Stop:
+# Short-Description: MPSS stack control
+# Description: MPSS stack control
+### END INIT INFO
+
+# Source function library.
+. /etc/init.d/functions
+
+exec=/usr/sbin/mpssd
+sysfs="/sys/class/mic"
+
+start()
+{
+ [ -x $exec ] || exit 5
+
+ if [ "`ps -e | awk '{print $4}' | grep mpssd | head -1`" = "mpssd" ]; then
+ echo -e $"MPSSD already running! "
+ success
+ echo
+ return 0
+ fi
+
+ echo -e $"Starting MPSS Stack"
+ echo -e $"Loading MIC_X100_DMA & MIC_HOST Modules"
+
+ for f in "mic_host" "mic_x100_dma"
+ do
+ modprobe $f
+ RETVAL=$?
+ if [ $RETVAL -ne 0 ]; then
+ failure
+ echo
+ return $RETVAL
+ fi
+ done
+
+ # Start the daemon
+ echo -n $"Starting MPSSD "
+ $exec
+ RETVAL=$?
+ if [ $RETVAL -ne 0 ]; then
+ failure
+ echo
+ return $RETVAL
+ fi
+ success
+ echo
+
+ sleep 5
+
+ # Boot the cards
+ micctrl -b
+
+ # Wait till ping works
+ for f in $sysfs/*
+ do
+ count=100
+ ipaddr=`cat $f/cmdline`
+ ipaddr=${ipaddr#*address,}
+ ipaddr=`echo $ipaddr | cut -d, -f1 | cut -d\; -f1`
+ while [ $count -ge 0 ]
+ do
+ echo -e "Pinging "`basename $f`" "
+ ping -c 1 $ipaddr &> /dev/null
+ RETVAL=$?
+ if [ $RETVAL -eq 0 ]; then
+ success
+ break
+ fi
+ sleep 1
+ count=`expr $count - 1`
+ done
+ [ $RETVAL -ne 0 ] && failure || success
+ echo
+ done
+ return $RETVAL
+}
+
+stop()
+{
+ echo -e $"Shutting down MPSS Stack: "
+
+ # Bail out if module is unloaded
+ if [ ! -d "$sysfs" ]; then
+ echo -n $"Module unloaded "
+ success
+ echo
+ return 0
+ fi
+
+ # Shut down the cards.
+ micctrl -S
+
+ # Wait for the cards to go offline
+ for f in $sysfs/*
+ do
+ while [ "`cat $f/state`" != "offline" ]
+ do
+ sleep 1
+ echo -e "Waiting for "`basename $f`" to go offline"
+ done
+ done
+
+ # Display the status of the cards
+ micctrl -s
+
+ # Kill MPSSD now
+ echo -n $"Killing MPSSD"
+ killall -9 mpssd 2>/dev/null
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && failure || success
+ echo
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ sleep 5
+ start
+}
+
+status()
+{
+ micctrl -s
+ if [ "`ps -e | awk '{print $4}' | grep mpssd | head -n 1`" = "mpssd" ]; then
+ echo "mpssd is running"
+ else
+ echo "mpssd is stopped"
+ fi
+ return 0
+}
+
+unload()
+{
+ if [ ! -d "$sysfs" ]; then
+ echo -n $"No MIC_HOST Module: "
+ success
+ echo
+ return
+ fi
+
+ stop
+
+ sleep 5
+ echo -n $"Removing MIC_HOST & MIC_X100_DMA Modules: "
+ modprobe -r mic_host mic_x100_dma
+ RETVAL=$?
+ [ $RETVAL -ne 0 ] && failure || success
+ echo
+ return $RETVAL
+}
+
+case $1 in
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ restart
+ ;;
+ status)
+ status
+ ;;
+ unload)
+ unload
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|restart|status|unload}"
+ exit 2
+esac
+
+exit $?
diff --git a/Documentation/mic/mpssd/mpssd.c b/Documentation/mic/mpssd/mpssd.c
new file mode 100644
index 000000000..3c5c379fc
--- /dev/null
+++ b/Documentation/mic/mpssd/mpssd.c
@@ -0,0 +1,1728 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC User Space Tools.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <poll.h>
+#include <features.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_blk.h>
+#include <linux/version.h>
+#include "mpssd.h"
+#include <linux/mic_ioctl.h>
+#include <linux/mic_common.h>
+#include <tools/endian.h>
+
+static void init_mic(struct mic_info *mic);
+
+static FILE *logfp;
+static struct mic_info mic_list;
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define min_t(type, x, y) ({ \
+ type __min1 = (x); \
+ type __min2 = (y); \
+ __min1 < __min2 ? __min1 : __min2; })
+
+/* align addr on a size boundary - adjust address up/down if needed */
+#define _ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
+#define _ALIGN_UP(addr, size) _ALIGN_DOWN(addr + size - 1, size)
+
+/* align addr on a size boundary - adjust address up if needed */
+#define _ALIGN(addr, size) _ALIGN_UP(addr, size)
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE)
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define GSO_ENABLED 1
+#define MAX_GSO_SIZE (64 * 1024)
+#define ETH_H_LEN 14
+#define MAX_NET_PKT_SIZE (_ALIGN_UP(MAX_GSO_SIZE + ETH_H_LEN, 64))
+#define MIC_DEVICE_PAGE_END 0x1000
+
+#ifndef VIRTIO_NET_HDR_F_DATA_VALID
+#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
+#endif
+
+static struct {
+ struct mic_device_desc dd;
+ struct mic_vqconfig vqconfig[2];
+ __u32 host_features, guest_acknowledgements;
+ struct virtio_console_config cons_config;
+} virtcons_dev_page = {
+ .dd = {
+ .type = VIRTIO_ID_CONSOLE,
+ .num_vq = ARRAY_SIZE(virtcons_dev_page.vqconfig),
+ .feature_len = sizeof(virtcons_dev_page.host_features),
+ .config_len = sizeof(virtcons_dev_page.cons_config),
+ },
+ .vqconfig[0] = {
+ .num = htole16(MIC_VRING_ENTRIES),
+ },
+ .vqconfig[1] = {
+ .num = htole16(MIC_VRING_ENTRIES),
+ },
+};
+
+static struct {
+ struct mic_device_desc dd;
+ struct mic_vqconfig vqconfig[2];
+ __u32 host_features, guest_acknowledgements;
+ struct virtio_net_config net_config;
+} virtnet_dev_page = {
+ .dd = {
+ .type = VIRTIO_ID_NET,
+ .num_vq = ARRAY_SIZE(virtnet_dev_page.vqconfig),
+ .feature_len = sizeof(virtnet_dev_page.host_features),
+ .config_len = sizeof(virtnet_dev_page.net_config),
+ },
+ .vqconfig[0] = {
+ .num = htole16(MIC_VRING_ENTRIES),
+ },
+ .vqconfig[1] = {
+ .num = htole16(MIC_VRING_ENTRIES),
+ },
+#if GSO_ENABLED
+ .host_features = htole32(
+ 1 << VIRTIO_NET_F_CSUM |
+ 1 << VIRTIO_NET_F_GSO |
+ 1 << VIRTIO_NET_F_GUEST_TSO4 |
+ 1 << VIRTIO_NET_F_GUEST_TSO6 |
+ 1 << VIRTIO_NET_F_GUEST_ECN |
+ 1 << VIRTIO_NET_F_GUEST_UFO),
+#else
+ .host_features = 0,
+#endif
+};
+
+static const char *mic_config_dir = "/etc/sysconfig/mic";
+static const char *virtblk_backend = "VIRTBLK_BACKEND";
+static struct {
+ struct mic_device_desc dd;
+ struct mic_vqconfig vqconfig[1];
+ __u32 host_features, guest_acknowledgements;
+ struct virtio_blk_config blk_config;
+} virtblk_dev_page = {
+ .dd = {
+ .type = VIRTIO_ID_BLOCK,
+ .num_vq = ARRAY_SIZE(virtblk_dev_page.vqconfig),
+ .feature_len = sizeof(virtblk_dev_page.host_features),
+ .config_len = sizeof(virtblk_dev_page.blk_config),
+ },
+ .vqconfig[0] = {
+ .num = htole16(MIC_VRING_ENTRIES),
+ },
+ .host_features =
+ htole32(1<<VIRTIO_BLK_F_SEG_MAX),
+ .blk_config = {
+ .seg_max = htole32(MIC_VRING_ENTRIES - 2),
+ .capacity = htole64(0),
+ }
+};
+
+static char *myname;
+
+static int
+tap_configure(struct mic_info *mic, char *dev)
+{
+ pid_t pid;
+ char *ifargv[7];
+ char ipaddr[IFNAMSIZ];
+ int ret = 0;
+
+ pid = fork();
+ if (pid == 0) {
+ ifargv[0] = "ip";
+ ifargv[1] = "link";
+ ifargv[2] = "set";
+ ifargv[3] = dev;
+ ifargv[4] = "up";
+ ifargv[5] = NULL;
+ mpsslog("Configuring %s\n", dev);
+ ret = execvp("ip", ifargv);
+ if (ret < 0) {
+ mpsslog("%s execvp failed errno %s\n",
+ mic->name, strerror(errno));
+ return ret;
+ }
+ }
+ if (pid < 0) {
+ mpsslog("%s fork failed errno %s\n",
+ mic->name, strerror(errno));
+ return ret;
+ }
+
+ ret = waitpid(pid, NULL, 0);
+ if (ret < 0) {
+ mpsslog("%s waitpid failed errno %s\n",
+ mic->name, strerror(errno));
+ return ret;
+ }
+
+ snprintf(ipaddr, IFNAMSIZ, "172.31.%d.254/24", mic->id);
+
+ pid = fork();
+ if (pid == 0) {
+ ifargv[0] = "ip";
+ ifargv[1] = "addr";
+ ifargv[2] = "add";
+ ifargv[3] = ipaddr;
+ ifargv[4] = "dev";
+ ifargv[5] = dev;
+ ifargv[6] = NULL;
+ mpsslog("Configuring %s ipaddr %s\n", dev, ipaddr);
+ ret = execvp("ip", ifargv);
+ if (ret < 0) {
+ mpsslog("%s execvp failed errno %s\n",
+ mic->name, strerror(errno));
+ return ret;
+ }
+ }
+ if (pid < 0) {
+ mpsslog("%s fork failed errno %s\n",
+ mic->name, strerror(errno));
+ return ret;
+ }
+
+ ret = waitpid(pid, NULL, 0);
+ if (ret < 0) {
+ mpsslog("%s waitpid failed errno %s\n",
+ mic->name, strerror(errno));
+ return ret;
+ }
+ mpsslog("MIC name %s %s %d DONE!\n",
+ mic->name, __func__, __LINE__);
+ return 0;
+}
+
+static int tun_alloc(struct mic_info *mic, char *dev)
+{
+ struct ifreq ifr;
+ int fd, err;
+#if GSO_ENABLED
+ unsigned offload;
+#endif
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ mpsslog("Could not open /dev/net/tun %s\n", strerror(errno));
+ goto done;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+ if (*dev)
+ strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, (void *)&ifr);
+ if (err < 0) {
+ mpsslog("%s %s %d TUNSETIFF failed %s\n",
+ mic->name, __func__, __LINE__, strerror(errno));
+ close(fd);
+ return err;
+ }
+#if GSO_ENABLED
+ offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+ TUN_F_TSO_ECN | TUN_F_UFO;
+
+ err = ioctl(fd, TUNSETOFFLOAD, offload);
+ if (err < 0) {
+ mpsslog("%s %s %d TUNSETOFFLOAD failed %s\n",
+ mic->name, __func__, __LINE__, strerror(errno));
+ close(fd);
+ return err;
+ }
+#endif
+ strcpy(dev, ifr.ifr_name);
+ mpsslog("Created TAP %s\n", dev);
+done:
+ return fd;
+}
+
+#define NET_FD_VIRTIO_NET 0
+#define NET_FD_TUN 1
+#define MAX_NET_FD 2
+
+static void set_dp(struct mic_info *mic, int type, void *dp)
+{
+ switch (type) {
+ case VIRTIO_ID_CONSOLE:
+ mic->mic_console.console_dp = dp;
+ return;
+ case VIRTIO_ID_NET:
+ mic->mic_net.net_dp = dp;
+ return;
+ case VIRTIO_ID_BLOCK:
+ mic->mic_virtblk.block_dp = dp;
+ return;
+ }
+ mpsslog("%s %s %d not found\n", mic->name, __func__, type);
+ assert(0);
+}
+
+static void *get_dp(struct mic_info *mic, int type)
+{
+ switch (type) {
+ case VIRTIO_ID_CONSOLE:
+ return mic->mic_console.console_dp;
+ case VIRTIO_ID_NET:
+ return mic->mic_net.net_dp;
+ case VIRTIO_ID_BLOCK:
+ return mic->mic_virtblk.block_dp;
+ }
+ mpsslog("%s %s %d not found\n", mic->name, __func__, type);
+ assert(0);
+ return NULL;
+}
+
+static struct mic_device_desc *get_device_desc(struct mic_info *mic, int type)
+{
+ struct mic_device_desc *d;
+ int i;
+ void *dp = get_dp(mic, type);
+
+ for (i = sizeof(struct mic_bootparam); i < PAGE_SIZE;
+ i += mic_total_desc_size(d)) {
+ d = dp + i;
+
+ /* End of list */
+ if (d->type == 0)
+ break;
+
+ if (d->type == -1)
+ continue;
+
+ mpsslog("%s %s d-> type %d d %p\n",
+ mic->name, __func__, d->type, d);
+
+ if (d->type == (__u8)type)
+ return d;
+ }
+ mpsslog("%s %s %d not found\n", mic->name, __func__, type);
+ assert(0);
+ return NULL;
+}
+
+/* See comments in vhost.c for explanation of next_desc() */
+static unsigned next_desc(struct vring_desc *desc)
+{
+ unsigned int next;
+
+ if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT))
+ return -1U;
+ next = le16toh(desc->next);
+ return next;
+}
+
+/* Sum up all the IOVEC length */
+static ssize_t
+sum_iovec_len(struct mic_copy_desc *copy)
+{
+ ssize_t sum = 0;
+ int i;
+
+ for (i = 0; i < copy->iovcnt; i++)
+ sum += copy->iov[i].iov_len;
+ return sum;
+}
+
+static inline void verify_out_len(struct mic_info *mic,
+ struct mic_copy_desc *copy)
+{
+ if (copy->out_len != sum_iovec_len(copy)) {
+ mpsslog("%s %s %d BUG copy->out_len 0x%x len 0x%zx\n",
+ mic->name, __func__, __LINE__,
+ copy->out_len, sum_iovec_len(copy));
+ assert(copy->out_len == sum_iovec_len(copy));
+ }
+}
+
+/* Display an iovec */
+static void
+disp_iovec(struct mic_info *mic, struct mic_copy_desc *copy,
+ const char *s, int line)
+{
+ int i;
+
+ for (i = 0; i < copy->iovcnt; i++)
+ mpsslog("%s %s %d copy->iov[%d] addr %p len 0x%zx\n",
+ mic->name, s, line, i,
+ copy->iov[i].iov_base, copy->iov[i].iov_len);
+}
+
+static inline __u16 read_avail_idx(struct mic_vring *vr)
+{
+ return ACCESS_ONCE(vr->info->avail_idx);
+}
+
+static inline void txrx_prepare(int type, bool tx, struct mic_vring *vr,
+ struct mic_copy_desc *copy, ssize_t len)
+{
+ copy->vr_idx = tx ? 0 : 1;
+ copy->update_used = true;
+ if (type == VIRTIO_ID_NET)
+ copy->iov[1].iov_len = len - sizeof(struct virtio_net_hdr);
+ else
+ copy->iov[0].iov_len = len;
+}
+
+/* Central API which triggers the copies */
+static int
+mic_virtio_copy(struct mic_info *mic, int fd,
+ struct mic_vring *vr, struct mic_copy_desc *copy)
+{
+ int ret;
+
+ ret = ioctl(fd, MIC_VIRTIO_COPY_DESC, copy);
+ if (ret) {
+ mpsslog("%s %s %d errno %s ret %d\n",
+ mic->name, __func__, __LINE__,
+ strerror(errno), ret);
+ }
+ return ret;
+}
+
+/*
+ * This initialization routine requires at least one
+ * vring i.e. vr0. vr1 is optional.
+ */
+static void *
+init_vr(struct mic_info *mic, int fd, int type,
+ struct mic_vring *vr0, struct mic_vring *vr1, int num_vq)
+{
+ int vr_size;
+ char *va;
+
+ vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
+ MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
+ va = mmap(NULL, MIC_DEVICE_PAGE_END + vr_size * num_vq,
+ PROT_READ, MAP_SHARED, fd, 0);
+ if (MAP_FAILED == va) {
+ mpsslog("%s %s %d mmap failed errno %s\n",
+ mic->name, __func__, __LINE__,
+ strerror(errno));
+ goto done;
+ }
+ set_dp(mic, type, va);
+ vr0->va = (struct mic_vring *)&va[MIC_DEVICE_PAGE_END];
+ vr0->info = vr0->va +
+ vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN);
+ vring_init(&vr0->vr,
+ MIC_VRING_ENTRIES, vr0->va, MIC_VIRTIO_RING_ALIGN);
+ mpsslog("%s %s vr0 %p vr0->info %p vr_size 0x%x vring 0x%x ",
+ __func__, mic->name, vr0->va, vr0->info, vr_size,
+ vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
+ mpsslog("magic 0x%x expected 0x%x\n",
+ le32toh(vr0->info->magic), MIC_MAGIC + type);
+ assert(le32toh(vr0->info->magic) == MIC_MAGIC + type);
+ if (vr1) {
+ vr1->va = (struct mic_vring *)
+ &va[MIC_DEVICE_PAGE_END + vr_size];
+ vr1->info = vr1->va + vring_size(MIC_VRING_ENTRIES,
+ MIC_VIRTIO_RING_ALIGN);
+ vring_init(&vr1->vr,
+ MIC_VRING_ENTRIES, vr1->va, MIC_VIRTIO_RING_ALIGN);
+ mpsslog("%s %s vr1 %p vr1->info %p vr_size 0x%x vring 0x%x ",
+ __func__, mic->name, vr1->va, vr1->info, vr_size,
+ vring_size(MIC_VRING_ENTRIES, MIC_VIRTIO_RING_ALIGN));
+ mpsslog("magic 0x%x expected 0x%x\n",
+ le32toh(vr1->info->magic), MIC_MAGIC + type + 1);
+ assert(le32toh(vr1->info->magic) == MIC_MAGIC + type + 1);
+ }
+done:
+ return va;
+}
+
+static void
+wait_for_card_driver(struct mic_info *mic, int fd, int type)
+{
+ struct pollfd pollfd;
+ int err;
+ struct mic_device_desc *desc = get_device_desc(mic, type);
+
+ pollfd.fd = fd;
+ mpsslog("%s %s Waiting .... desc-> type %d status 0x%x\n",
+ mic->name, __func__, type, desc->status);
+ while (1) {
+ pollfd.events = POLLIN;
+ pollfd.revents = 0;
+ err = poll(&pollfd, 1, -1);
+ if (err < 0) {
+ mpsslog("%s %s poll failed %s\n",
+ mic->name, __func__, strerror(errno));
+ continue;
+ }
+
+ if (pollfd.revents) {
+ mpsslog("%s %s Waiting... desc-> type %d status 0x%x\n",
+ mic->name, __func__, type, desc->status);
+ if (desc->status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ mpsslog("%s %s poll.revents %d\n",
+ mic->name, __func__, pollfd.revents);
+ mpsslog("%s %s desc-> type %d status 0x%x\n",
+ mic->name, __func__, type,
+ desc->status);
+ break;
+ }
+ }
+ }
+}
+
+/* Spin till we have some descriptors */
+static void
+spin_for_descriptors(struct mic_info *mic, struct mic_vring *vr)
+{
+ __u16 avail_idx = read_avail_idx(vr);
+
+ while (avail_idx == le16toh(ACCESS_ONCE(vr->vr.avail->idx))) {
+#ifdef DEBUG
+ mpsslog("%s %s waiting for desc avail %d info_avail %d\n",
+ mic->name, __func__,
+ le16toh(vr->vr.avail->idx), vr->info->avail_idx);
+#endif
+ sched_yield();
+ }
+}
+
+static void *
+virtio_net(void *arg)
+{
+ static __u8 vnet_hdr[2][sizeof(struct virtio_net_hdr)];
+ static __u8 vnet_buf[2][MAX_NET_PKT_SIZE] __attribute__ ((aligned(64)));
+ struct iovec vnet_iov[2][2] = {
+ { { .iov_base = vnet_hdr[0], .iov_len = sizeof(vnet_hdr[0]) },
+ { .iov_base = vnet_buf[0], .iov_len = sizeof(vnet_buf[0]) } },
+ { { .iov_base = vnet_hdr[1], .iov_len = sizeof(vnet_hdr[1]) },
+ { .iov_base = vnet_buf[1], .iov_len = sizeof(vnet_buf[1]) } },
+ };
+ struct iovec *iov0 = vnet_iov[0], *iov1 = vnet_iov[1];
+ struct mic_info *mic = (struct mic_info *)arg;
+ char if_name[IFNAMSIZ];
+ struct pollfd net_poll[MAX_NET_FD];
+ struct mic_vring tx_vr, rx_vr;
+ struct mic_copy_desc copy;
+ struct mic_device_desc *desc;
+ int err;
+
+ snprintf(if_name, IFNAMSIZ, "mic%d", mic->id);
+ mic->mic_net.tap_fd = tun_alloc(mic, if_name);
+ if (mic->mic_net.tap_fd < 0)
+ goto done;
+
+ if (tap_configure(mic, if_name))
+ goto done;
+ mpsslog("MIC name %s id %d\n", mic->name, mic->id);
+
+ net_poll[NET_FD_VIRTIO_NET].fd = mic->mic_net.virtio_net_fd;
+ net_poll[NET_FD_VIRTIO_NET].events = POLLIN;
+ net_poll[NET_FD_TUN].fd = mic->mic_net.tap_fd;
+ net_poll[NET_FD_TUN].events = POLLIN;
+
+ if (MAP_FAILED == init_vr(mic, mic->mic_net.virtio_net_fd,
+ VIRTIO_ID_NET, &tx_vr, &rx_vr,
+ virtnet_dev_page.dd.num_vq)) {
+ mpsslog("%s init_vr failed %s\n",
+ mic->name, strerror(errno));
+ goto done;
+ }
+
+ copy.iovcnt = 2;
+ desc = get_device_desc(mic, VIRTIO_ID_NET);
+
+ while (1) {
+ ssize_t len;
+
+ net_poll[NET_FD_VIRTIO_NET].revents = 0;
+ net_poll[NET_FD_TUN].revents = 0;
+
+ /* Start polling for data from tap and virtio net */
+ err = poll(net_poll, 2, -1);
+ if (err < 0) {
+ mpsslog("%s poll failed %s\n",
+ __func__, strerror(errno));
+ continue;
+ }
+ if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ wait_for_card_driver(mic, mic->mic_net.virtio_net_fd,
+ VIRTIO_ID_NET);
+ /*
+ * Check if there is data to be read from TUN and write to
+ * virtio net fd if there is.
+ */
+ if (net_poll[NET_FD_TUN].revents & POLLIN) {
+ copy.iov = iov0;
+ len = readv(net_poll[NET_FD_TUN].fd,
+ copy.iov, copy.iovcnt);
+ if (len > 0) {
+ struct virtio_net_hdr *hdr
+ = (struct virtio_net_hdr *)vnet_hdr[0];
+
+ /* Disable checksums on the card since we are on
+ a reliable PCIe link */
+ hdr->flags |= VIRTIO_NET_HDR_F_DATA_VALID;
+#ifdef DEBUG
+ mpsslog("%s %s %d hdr->flags 0x%x ", mic->name,
+ __func__, __LINE__, hdr->flags);
+ mpsslog("copy.out_len %d hdr->gso_type 0x%x\n",
+ copy.out_len, hdr->gso_type);
+#endif
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__, __LINE__);
+ mpsslog("%s %s %d read from tap 0x%lx\n",
+ mic->name, __func__, __LINE__,
+ len);
+#endif
+ spin_for_descriptors(mic, &tx_vr);
+ txrx_prepare(VIRTIO_ID_NET, 1, &tx_vr, &copy,
+ len);
+
+ err = mic_virtio_copy(mic,
+ mic->mic_net.virtio_net_fd, &tx_vr,
+ &copy);
+ if (err < 0) {
+ mpsslog("%s %s %d mic_virtio_copy %s\n",
+ mic->name, __func__, __LINE__,
+ strerror(errno));
+ }
+ if (!err)
+ verify_out_len(mic, &copy);
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__, __LINE__);
+ mpsslog("%s %s %d wrote to net 0x%lx\n",
+ mic->name, __func__, __LINE__,
+ sum_iovec_len(&copy));
+#endif
+ /* Reinitialize IOV for next run */
+ iov0[1].iov_len = MAX_NET_PKT_SIZE;
+ } else if (len < 0) {
+ disp_iovec(mic, &copy, __func__, __LINE__);
+ mpsslog("%s %s %d read failed %s ", mic->name,
+ __func__, __LINE__, strerror(errno));
+ mpsslog("cnt %d sum %zd\n",
+ copy.iovcnt, sum_iovec_len(&copy));
+ }
+ }
+
+ /*
+ * Check if there is data to be read from virtio net and
+ * write to TUN if there is.
+ */
+ if (net_poll[NET_FD_VIRTIO_NET].revents & POLLIN) {
+ while (rx_vr.info->avail_idx !=
+ le16toh(rx_vr.vr.avail->idx)) {
+ copy.iov = iov1;
+ txrx_prepare(VIRTIO_ID_NET, 0, &rx_vr, &copy,
+ MAX_NET_PKT_SIZE
+ + sizeof(struct virtio_net_hdr));
+
+ err = mic_virtio_copy(mic,
+ mic->mic_net.virtio_net_fd, &rx_vr,
+ &copy);
+ if (!err) {
+#ifdef DEBUG
+ struct virtio_net_hdr *hdr
+ = (struct virtio_net_hdr *)
+ vnet_hdr[1];
+
+ mpsslog("%s %s %d hdr->flags 0x%x, ",
+ mic->name, __func__, __LINE__,
+ hdr->flags);
+ mpsslog("out_len %d gso_type 0x%x\n",
+ copy.out_len,
+ hdr->gso_type);
+#endif
+ /* Set the correct output iov_len */
+ iov1[1].iov_len = copy.out_len -
+ sizeof(struct virtio_net_hdr);
+ verify_out_len(mic, &copy);
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__,
+ __LINE__);
+ mpsslog("%s %s %d ",
+ mic->name, __func__, __LINE__);
+ mpsslog("read from net 0x%lx\n",
+ sum_iovec_len(copy));
+#endif
+ len = writev(net_poll[NET_FD_TUN].fd,
+ copy.iov, copy.iovcnt);
+ if (len != sum_iovec_len(&copy)) {
+ mpsslog("Tun write failed %s ",
+ strerror(errno));
+ mpsslog("len 0x%zx ", len);
+ mpsslog("read_len 0x%zx\n",
+ sum_iovec_len(&copy));
+ } else {
+#ifdef DEBUG
+ disp_iovec(mic, &copy, __func__,
+ __LINE__);
+ mpsslog("%s %s %d ",
+ mic->name, __func__,
+ __LINE__);
+ mpsslog("wrote to tap 0x%lx\n",
+ len);
+#endif
+ }
+ } else {
+ mpsslog("%s %s %d mic_virtio_copy %s\n",
+ mic->name, __func__, __LINE__,
+ strerror(errno));
+ break;
+ }
+ }
+ }
+ if (net_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
+ mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
+ }
+done:
+ pthread_exit(NULL);
+}
+
+/* virtio_console */
+#define VIRTIO_CONSOLE_FD 0
+#define MONITOR_FD (VIRTIO_CONSOLE_FD + 1)
+#define MAX_CONSOLE_FD (MONITOR_FD + 1) /* must be the last one + 1 */
+#define MAX_BUFFER_SIZE PAGE_SIZE
+
+static void *
+virtio_console(void *arg)
+{
+ static __u8 vcons_buf[2][PAGE_SIZE];
+ struct iovec vcons_iov[2] = {
+ { .iov_base = vcons_buf[0], .iov_len = sizeof(vcons_buf[0]) },
+ { .iov_base = vcons_buf[1], .iov_len = sizeof(vcons_buf[1]) },
+ };
+ struct iovec *iov0 = &vcons_iov[0], *iov1 = &vcons_iov[1];
+ struct mic_info *mic = (struct mic_info *)arg;
+ int err;
+ struct pollfd console_poll[MAX_CONSOLE_FD];
+ int pty_fd;
+ char *pts_name;
+ ssize_t len;
+ struct mic_vring tx_vr, rx_vr;
+ struct mic_copy_desc copy;
+ struct mic_device_desc *desc;
+
+ pty_fd = posix_openpt(O_RDWR);
+ if (pty_fd < 0) {
+ mpsslog("can't open a pseudoterminal master device: %s\n",
+ strerror(errno));
+ goto _return;
+ }
+ pts_name = ptsname(pty_fd);
+ if (pts_name == NULL) {
+ mpsslog("can't get pts name\n");
+ goto _close_pty;
+ }
+ printf("%s console message goes to %s\n", mic->name, pts_name);
+ mpsslog("%s console message goes to %s\n", mic->name, pts_name);
+ err = grantpt(pty_fd);
+ if (err < 0) {
+ mpsslog("can't grant access: %s %s\n",
+ pts_name, strerror(errno));
+ goto _close_pty;
+ }
+ err = unlockpt(pty_fd);
+ if (err < 0) {
+ mpsslog("can't unlock a pseudoterminal: %s %s\n",
+ pts_name, strerror(errno));
+ goto _close_pty;
+ }
+ console_poll[MONITOR_FD].fd = pty_fd;
+ console_poll[MONITOR_FD].events = POLLIN;
+
+ console_poll[VIRTIO_CONSOLE_FD].fd = mic->mic_console.virtio_console_fd;
+ console_poll[VIRTIO_CONSOLE_FD].events = POLLIN;
+
+ if (MAP_FAILED == init_vr(mic, mic->mic_console.virtio_console_fd,
+ VIRTIO_ID_CONSOLE, &tx_vr, &rx_vr,
+ virtcons_dev_page.dd.num_vq)) {
+ mpsslog("%s init_vr failed %s\n",
+ mic->name, strerror(errno));
+ goto _close_pty;
+ }
+
+ copy.iovcnt = 1;
+ desc = get_device_desc(mic, VIRTIO_ID_CONSOLE);
+
+ for (;;) {
+ console_poll[MONITOR_FD].revents = 0;
+ console_poll[VIRTIO_CONSOLE_FD].revents = 0;
+ err = poll(console_poll, MAX_CONSOLE_FD, -1);
+ if (err < 0) {
+ mpsslog("%s %d: poll failed: %s\n", __func__, __LINE__,
+ strerror(errno));
+ continue;
+ }
+ if (!(desc->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ wait_for_card_driver(mic,
+ mic->mic_console.virtio_console_fd,
+ VIRTIO_ID_CONSOLE);
+
+ if (console_poll[MONITOR_FD].revents & POLLIN) {
+ copy.iov = iov0;
+ len = readv(pty_fd, copy.iov, copy.iovcnt);
+ if (len > 0) {
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__, __LINE__);
+ mpsslog("%s %s %d read from tap 0x%lx\n",
+ mic->name, __func__, __LINE__,
+ len);
+#endif
+ spin_for_descriptors(mic, &tx_vr);
+ txrx_prepare(VIRTIO_ID_CONSOLE, 1, &tx_vr,
+ &copy, len);
+
+ err = mic_virtio_copy(mic,
+ mic->mic_console.virtio_console_fd,
+ &tx_vr, &copy);
+ if (err < 0) {
+ mpsslog("%s %s %d mic_virtio_copy %s\n",
+ mic->name, __func__, __LINE__,
+ strerror(errno));
+ }
+ if (!err)
+ verify_out_len(mic, &copy);
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__, __LINE__);
+ mpsslog("%s %s %d wrote to net 0x%lx\n",
+ mic->name, __func__, __LINE__,
+ sum_iovec_len(copy));
+#endif
+ /* Reinitialize IOV for next run */
+ iov0->iov_len = PAGE_SIZE;
+ } else if (len < 0) {
+ disp_iovec(mic, &copy, __func__, __LINE__);
+ mpsslog("%s %s %d read failed %s ",
+ mic->name, __func__, __LINE__,
+ strerror(errno));
+ mpsslog("cnt %d sum %zd\n",
+ copy.iovcnt, sum_iovec_len(&copy));
+ }
+ }
+
+ if (console_poll[VIRTIO_CONSOLE_FD].revents & POLLIN) {
+ while (rx_vr.info->avail_idx !=
+ le16toh(rx_vr.vr.avail->idx)) {
+ copy.iov = iov1;
+ txrx_prepare(VIRTIO_ID_CONSOLE, 0, &rx_vr,
+ &copy, PAGE_SIZE);
+
+ err = mic_virtio_copy(mic,
+ mic->mic_console.virtio_console_fd,
+ &rx_vr, &copy);
+ if (!err) {
+ /* Set the correct output iov_len */
+ iov1->iov_len = copy.out_len;
+ verify_out_len(mic, &copy);
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__,
+ __LINE__);
+ mpsslog("%s %s %d ",
+ mic->name, __func__, __LINE__);
+ mpsslog("read from net 0x%lx\n",
+ sum_iovec_len(copy));
+#endif
+ len = writev(pty_fd,
+ copy.iov, copy.iovcnt);
+ if (len != sum_iovec_len(&copy)) {
+ mpsslog("Tun write failed %s ",
+ strerror(errno));
+ mpsslog("len 0x%zx ", len);
+ mpsslog("read_len 0x%zx\n",
+ sum_iovec_len(&copy));
+ } else {
+#ifdef DEBUG
+ disp_iovec(mic, copy, __func__,
+ __LINE__);
+ mpsslog("%s %s %d ",
+ mic->name, __func__,
+ __LINE__);
+ mpsslog("wrote to tap 0x%lx\n",
+ len);
+#endif
+ }
+ } else {
+ mpsslog("%s %s %d mic_virtio_copy %s\n",
+ mic->name, __func__, __LINE__,
+ strerror(errno));
+ break;
+ }
+ }
+ }
+ if (console_poll[NET_FD_VIRTIO_NET].revents & POLLERR)
+ mpsslog("%s: %s: POLLERR\n", __func__, mic->name);
+ }
+_close_pty:
+ close(pty_fd);
+_return:
+ pthread_exit(NULL);
+}
+
+static void
+add_virtio_device(struct mic_info *mic, struct mic_device_desc *dd)
+{
+ char path[PATH_MAX];
+ int fd, err;
+
+ snprintf(path, PATH_MAX, "/dev/mic%d", mic->id);
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ mpsslog("Could not open %s %s\n", path, strerror(errno));
+ return;
+ }
+
+ err = ioctl(fd, MIC_VIRTIO_ADD_DEVICE, dd);
+ if (err < 0) {
+ mpsslog("Could not add %d %s\n", dd->type, strerror(errno));
+ close(fd);
+ return;
+ }
+ switch (dd->type) {
+ case VIRTIO_ID_NET:
+ mic->mic_net.virtio_net_fd = fd;
+ mpsslog("Added VIRTIO_ID_NET for %s\n", mic->name);
+ break;
+ case VIRTIO_ID_CONSOLE:
+ mic->mic_console.virtio_console_fd = fd;
+ mpsslog("Added VIRTIO_ID_CONSOLE for %s\n", mic->name);
+ break;
+ case VIRTIO_ID_BLOCK:
+ mic->mic_virtblk.virtio_block_fd = fd;
+ mpsslog("Added VIRTIO_ID_BLOCK for %s\n", mic->name);
+ break;
+ }
+}
+
+static bool
+set_backend_file(struct mic_info *mic)
+{
+ FILE *config;
+ char buff[PATH_MAX], *line, *evv, *p;
+
+ snprintf(buff, PATH_MAX, "%s/mpssd%03d.conf", mic_config_dir, mic->id);
+ config = fopen(buff, "r");
+ if (config == NULL)
+ return false;
+ do { /* look for "virtblk_backend=XXXX" */
+ line = fgets(buff, PATH_MAX, config);
+ if (line == NULL)
+ break;
+ if (*line == '#')
+ continue;
+ p = strchr(line, '\n');
+ if (p)
+ *p = '\0';
+ } while (strncmp(line, virtblk_backend, strlen(virtblk_backend)) != 0);
+ fclose(config);
+ if (line == NULL)
+ return false;
+ evv = strchr(line, '=');
+ if (evv == NULL)
+ return false;
+ mic->mic_virtblk.backend_file = malloc(strlen(evv) + 1);
+ if (mic->mic_virtblk.backend_file == NULL) {
+ mpsslog("%s %d can't allocate memory\n", mic->name, mic->id);
+ return false;
+ }
+ strcpy(mic->mic_virtblk.backend_file, evv + 1);
+ return true;
+}
+
+#define SECTOR_SIZE 512
+static bool
+set_backend_size(struct mic_info *mic)
+{
+ mic->mic_virtblk.backend_size = lseek(mic->mic_virtblk.backend, 0,
+ SEEK_END);
+ if (mic->mic_virtblk.backend_size < 0) {
+ mpsslog("%s: can't seek: %s\n",
+ mic->name, mic->mic_virtblk.backend_file);
+ return false;
+ }
+ virtblk_dev_page.blk_config.capacity =
+ mic->mic_virtblk.backend_size / SECTOR_SIZE;
+ if ((mic->mic_virtblk.backend_size % SECTOR_SIZE) != 0)
+ virtblk_dev_page.blk_config.capacity++;
+
+ virtblk_dev_page.blk_config.capacity =
+ htole64(virtblk_dev_page.blk_config.capacity);
+
+ return true;
+}
+
+static bool
+open_backend(struct mic_info *mic)
+{
+ if (!set_backend_file(mic))
+ goto _error_exit;
+ mic->mic_virtblk.backend = open(mic->mic_virtblk.backend_file, O_RDWR);
+ if (mic->mic_virtblk.backend < 0) {
+ mpsslog("%s: can't open: %s\n", mic->name,
+ mic->mic_virtblk.backend_file);
+ goto _error_free;
+ }
+ if (!set_backend_size(mic))
+ goto _error_close;
+ mic->mic_virtblk.backend_addr = mmap(NULL,
+ mic->mic_virtblk.backend_size,
+ PROT_READ|PROT_WRITE, MAP_SHARED,
+ mic->mic_virtblk.backend, 0L);
+ if (mic->mic_virtblk.backend_addr == MAP_FAILED) {
+ mpsslog("%s: can't map: %s %s\n",
+ mic->name, mic->mic_virtblk.backend_file,
+ strerror(errno));
+ goto _error_close;
+ }
+ return true;
+
+ _error_close:
+ close(mic->mic_virtblk.backend);
+ _error_free:
+ free(mic->mic_virtblk.backend_file);
+ _error_exit:
+ return false;
+}
+
+static void
+close_backend(struct mic_info *mic)
+{
+ munmap(mic->mic_virtblk.backend_addr, mic->mic_virtblk.backend_size);
+ close(mic->mic_virtblk.backend);
+ free(mic->mic_virtblk.backend_file);
+}
+
+static bool
+start_virtblk(struct mic_info *mic, struct mic_vring *vring)
+{
+ if (((unsigned long)&virtblk_dev_page.blk_config % 8) != 0) {
+ mpsslog("%s: blk_config is not 8 byte aligned.\n",
+ mic->name);
+ return false;
+ }
+ add_virtio_device(mic, &virtblk_dev_page.dd);
+ if (MAP_FAILED == init_vr(mic, mic->mic_virtblk.virtio_block_fd,
+ VIRTIO_ID_BLOCK, vring, NULL,
+ virtblk_dev_page.dd.num_vq)) {
+ mpsslog("%s init_vr failed %s\n",
+ mic->name, strerror(errno));
+ return false;
+ }
+ return true;
+}
+
+static void
+stop_virtblk(struct mic_info *mic)
+{
+ int vr_size, ret;
+
+ vr_size = PAGE_ALIGN(vring_size(MIC_VRING_ENTRIES,
+ MIC_VIRTIO_RING_ALIGN) + sizeof(struct _mic_vring_info));
+ ret = munmap(mic->mic_virtblk.block_dp,
+ MIC_DEVICE_PAGE_END + vr_size * virtblk_dev_page.dd.num_vq);
+ if (ret < 0)
+ mpsslog("%s munmap errno %d\n", mic->name, errno);
+ close(mic->mic_virtblk.virtio_block_fd);
+}
+
+static __u8
+header_error_check(struct vring_desc *desc)
+{
+ if (le32toh(desc->len) != sizeof(struct virtio_blk_outhdr)) {
+ mpsslog("%s() %d: length is not sizeof(virtio_blk_outhd)\n",
+ __func__, __LINE__);
+ return -EIO;
+ }
+ if (!(le16toh(desc->flags) & VRING_DESC_F_NEXT)) {
+ mpsslog("%s() %d: alone\n",
+ __func__, __LINE__);
+ return -EIO;
+ }
+ if (le16toh(desc->flags) & VRING_DESC_F_WRITE) {
+ mpsslog("%s() %d: not read\n",
+ __func__, __LINE__);
+ return -EIO;
+ }
+ return 0;
+}
+
+static int
+read_header(int fd, struct virtio_blk_outhdr *hdr, __u32 desc_idx)
+{
+ struct iovec iovec;
+ struct mic_copy_desc copy;
+
+ iovec.iov_len = sizeof(*hdr);
+ iovec.iov_base = hdr;
+ copy.iov = &iovec;
+ copy.iovcnt = 1;
+ copy.vr_idx = 0; /* only one vring on virtio_block */
+ copy.update_used = false; /* do not update used index */
+ return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
+}
+
+static int
+transfer_blocks(int fd, struct iovec *iovec, __u32 iovcnt)
+{
+ struct mic_copy_desc copy;
+
+ copy.iov = iovec;
+ copy.iovcnt = iovcnt;
+ copy.vr_idx = 0; /* only one vring on virtio_block */
+ copy.update_used = false; /* do not update used index */
+ return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
+}
+
+static __u8
+status_error_check(struct vring_desc *desc)
+{
+ if (le32toh(desc->len) != sizeof(__u8)) {
+ mpsslog("%s() %d: length is not sizeof(status)\n",
+ __func__, __LINE__);
+ return -EIO;
+ }
+ return 0;
+}
+
+static int
+write_status(int fd, __u8 *status)
+{
+ struct iovec iovec;
+ struct mic_copy_desc copy;
+
+ iovec.iov_base = status;
+ iovec.iov_len = sizeof(*status);
+ copy.iov = &iovec;
+ copy.iovcnt = 1;
+ copy.vr_idx = 0; /* only one vring on virtio_block */
+ copy.update_used = true; /* Update used index */
+ return ioctl(fd, MIC_VIRTIO_COPY_DESC, &copy);
+}
+
+static void *
+virtio_block(void *arg)
+{
+ struct mic_info *mic = (struct mic_info *)arg;
+ int ret;
+ struct pollfd block_poll;
+ struct mic_vring vring;
+ __u16 avail_idx;
+ __u32 desc_idx;
+ struct vring_desc *desc;
+ struct iovec *iovec, *piov;
+ __u8 status;
+ __u32 buffer_desc_idx;
+ struct virtio_blk_outhdr hdr;
+ void *fos;
+
+ for (;;) { /* forever */
+ if (!open_backend(mic)) { /* No virtblk */
+ for (mic->mic_virtblk.signaled = 0;
+ !mic->mic_virtblk.signaled;)
+ sleep(1);
+ continue;
+ }
+
+ /* backend file is specified. */
+ if (!start_virtblk(mic, &vring))
+ goto _close_backend;
+ iovec = malloc(sizeof(*iovec) *
+ le32toh(virtblk_dev_page.blk_config.seg_max));
+ if (!iovec) {
+ mpsslog("%s: can't alloc iovec: %s\n",
+ mic->name, strerror(ENOMEM));
+ goto _stop_virtblk;
+ }
+
+ block_poll.fd = mic->mic_virtblk.virtio_block_fd;
+ block_poll.events = POLLIN;
+ for (mic->mic_virtblk.signaled = 0;
+ !mic->mic_virtblk.signaled;) {
+ block_poll.revents = 0;
+ /* timeout in 1 sec to see signaled */
+ ret = poll(&block_poll, 1, 1000);
+ if (ret < 0) {
+ mpsslog("%s %d: poll failed: %s\n",
+ __func__, __LINE__,
+ strerror(errno));
+ continue;
+ }
+
+ if (!(block_poll.revents & POLLIN)) {
+#ifdef DEBUG
+ mpsslog("%s %d: block_poll.revents=0x%x\n",
+ __func__, __LINE__, block_poll.revents);
+#endif
+ continue;
+ }
+
+ /* POLLIN */
+ while (vring.info->avail_idx !=
+ le16toh(vring.vr.avail->idx)) {
+ /* read header element */
+ avail_idx =
+ vring.info->avail_idx &
+ (vring.vr.num - 1);
+ desc_idx = le16toh(
+ vring.vr.avail->ring[avail_idx]);
+ desc = &vring.vr.desc[desc_idx];
+#ifdef DEBUG
+ mpsslog("%s() %d: avail_idx=%d ",
+ __func__, __LINE__,
+ vring.info->avail_idx);
+ mpsslog("vring.vr.num=%d desc=%p\n",
+ vring.vr.num, desc);
+#endif
+ status = header_error_check(desc);
+ ret = read_header(
+ mic->mic_virtblk.virtio_block_fd,
+ &hdr, desc_idx);
+ if (ret < 0) {
+ mpsslog("%s() %d %s: ret=%d %s\n",
+ __func__, __LINE__,
+ mic->name, ret,
+ strerror(errno));
+ break;
+ }
+ /* buffer element */
+ piov = iovec;
+ status = 0;
+ fos = mic->mic_virtblk.backend_addr +
+ (hdr.sector * SECTOR_SIZE);
+ buffer_desc_idx = next_desc(desc);
+ desc_idx = buffer_desc_idx;
+ for (desc = &vring.vr.desc[buffer_desc_idx];
+ desc->flags & VRING_DESC_F_NEXT;
+ desc_idx = next_desc(desc),
+ desc = &vring.vr.desc[desc_idx]) {
+ piov->iov_len = desc->len;
+ piov->iov_base = fos;
+ piov++;
+ fos += desc->len;
+ }
+ /* Returning NULLs for VIRTIO_BLK_T_GET_ID. */
+ if (hdr.type & ~(VIRTIO_BLK_T_OUT |
+ VIRTIO_BLK_T_GET_ID)) {
+ /*
+ VIRTIO_BLK_T_IN - does not do
+ anything. Probably for documenting.
+ VIRTIO_BLK_T_SCSI_CMD - for
+ virtio_scsi.
+ VIRTIO_BLK_T_FLUSH - turned off in
+ config space.
+ VIRTIO_BLK_T_BARRIER - defined but not
+ used in anywhere.
+ */
+ mpsslog("%s() %d: type %x ",
+ __func__, __LINE__,
+ hdr.type);
+ mpsslog("is not supported\n");
+ status = -ENOTSUP;
+
+ } else {
+ ret = transfer_blocks(
+ mic->mic_virtblk.virtio_block_fd,
+ iovec,
+ piov - iovec);
+ if (ret < 0 &&
+ status != 0)
+ status = ret;
+ }
+ /* write status and update used pointer */
+ if (status != 0)
+ status = status_error_check(desc);
+ ret = write_status(
+ mic->mic_virtblk.virtio_block_fd,
+ &status);
+#ifdef DEBUG
+ mpsslog("%s() %d: write status=%d on desc=%p\n",
+ __func__, __LINE__,
+ status, desc);
+#endif
+ }
+ }
+ free(iovec);
+_stop_virtblk:
+ stop_virtblk(mic);
+_close_backend:
+ close_backend(mic);
+ } /* forever */
+
+ pthread_exit(NULL);
+}
+
+static void
+reset(struct mic_info *mic)
+{
+#define RESET_TIMEOUT 120
+ int i = RESET_TIMEOUT;
+ setsysfs(mic->name, "state", "reset");
+ while (i) {
+ char *state;
+ state = readsysfs(mic->name, "state");
+ if (!state)
+ goto retry;
+ mpsslog("%s: %s %d state %s\n",
+ mic->name, __func__, __LINE__, state);
+
+ /*
+ * If the shutdown was initiated by OSPM, the state stays
+ * in "suspended" which is also a valid condition for reset.
+ */
+ if ((!strcmp(state, "offline")) ||
+ (!strcmp(state, "suspended"))) {
+ free(state);
+ break;
+ }
+ free(state);
+retry:
+ sleep(1);
+ i--;
+ }
+}
+
+static int
+get_mic_shutdown_status(struct mic_info *mic, char *shutdown_status)
+{
+ if (!strcmp(shutdown_status, "nop"))
+ return MIC_NOP;
+ if (!strcmp(shutdown_status, "crashed"))
+ return MIC_CRASHED;
+ if (!strcmp(shutdown_status, "halted"))
+ return MIC_HALTED;
+ if (!strcmp(shutdown_status, "poweroff"))
+ return MIC_POWER_OFF;
+ if (!strcmp(shutdown_status, "restart"))
+ return MIC_RESTART;
+ mpsslog("%s: BUG invalid status %s\n", mic->name, shutdown_status);
+ /* Invalid state */
+ assert(0);
+};
+
+static int get_mic_state(struct mic_info *mic, char *state)
+{
+ if (!strcmp(state, "offline"))
+ return MIC_OFFLINE;
+ if (!strcmp(state, "online"))
+ return MIC_ONLINE;
+ if (!strcmp(state, "shutting_down"))
+ return MIC_SHUTTING_DOWN;
+ if (!strcmp(state, "reset_failed"))
+ return MIC_RESET_FAILED;
+ if (!strcmp(state, "suspending"))
+ return MIC_SUSPENDING;
+ if (!strcmp(state, "suspended"))
+ return MIC_SUSPENDED;
+ mpsslog("%s: BUG invalid state %s\n", mic->name, state);
+ /* Invalid state */
+ assert(0);
+};
+
+static void mic_handle_shutdown(struct mic_info *mic)
+{
+#define SHUTDOWN_TIMEOUT 60
+ int i = SHUTDOWN_TIMEOUT, ret, stat = 0;
+ char *shutdown_status;
+ while (i) {
+ shutdown_status = readsysfs(mic->name, "shutdown_status");
+ if (!shutdown_status)
+ continue;
+ mpsslog("%s: %s %d shutdown_status %s\n",
+ mic->name, __func__, __LINE__, shutdown_status);
+ switch (get_mic_shutdown_status(mic, shutdown_status)) {
+ case MIC_RESTART:
+ mic->restart = 1;
+ case MIC_HALTED:
+ case MIC_POWER_OFF:
+ case MIC_CRASHED:
+ free(shutdown_status);
+ goto reset;
+ default:
+ break;
+ }
+ free(shutdown_status);
+ sleep(1);
+ i--;
+ }
+reset:
+ ret = kill(mic->pid, SIGTERM);
+ mpsslog("%s: %s %d kill pid %d ret %d\n",
+ mic->name, __func__, __LINE__,
+ mic->pid, ret);
+ if (!ret) {
+ ret = waitpid(mic->pid, &stat,
+ WIFSIGNALED(stat));
+ mpsslog("%s: %s %d waitpid ret %d pid %d\n",
+ mic->name, __func__, __LINE__,
+ ret, mic->pid);
+ }
+ if (ret == mic->pid)
+ reset(mic);
+}
+
+static void *
+mic_config(void *arg)
+{
+ struct mic_info *mic = (struct mic_info *)arg;
+ char *state = NULL;
+ char pathname[PATH_MAX];
+ int fd, ret;
+ struct pollfd ufds[1];
+ char value[4096];
+
+ snprintf(pathname, PATH_MAX - 1, "%s/%s/%s",
+ MICSYSFSDIR, mic->name, "state");
+
+ fd = open(pathname, O_RDONLY);
+ if (fd < 0) {
+ mpsslog("%s: opening file %s failed %s\n",
+ mic->name, pathname, strerror(errno));
+ goto error;
+ }
+
+ do {
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ mpsslog("%s: Failed to seek to file start '%s': %s\n",
+ mic->name, pathname, strerror(errno));
+ goto close_error1;
+ }
+ ret = read(fd, value, sizeof(value));
+ if (ret < 0) {
+ mpsslog("%s: Failed to read sysfs entry '%s': %s\n",
+ mic->name, pathname, strerror(errno));
+ goto close_error1;
+ }
+retry:
+ state = readsysfs(mic->name, "state");
+ if (!state)
+ goto retry;
+ mpsslog("%s: %s %d state %s\n",
+ mic->name, __func__, __LINE__, state);
+ switch (get_mic_state(mic, state)) {
+ case MIC_SHUTTING_DOWN:
+ mic_handle_shutdown(mic);
+ goto close_error;
+ case MIC_SUSPENDING:
+ mic->boot_on_resume = 1;
+ setsysfs(mic->name, "state", "suspend");
+ mic_handle_shutdown(mic);
+ goto close_error;
+ case MIC_OFFLINE:
+ if (mic->boot_on_resume) {
+ setsysfs(mic->name, "state", "boot");
+ mic->boot_on_resume = 0;
+ }
+ break;
+ default:
+ break;
+ }
+ free(state);
+
+ ufds[0].fd = fd;
+ ufds[0].events = POLLERR | POLLPRI;
+ ret = poll(ufds, 1, -1);
+ if (ret < 0) {
+ mpsslog("%s: poll failed %s\n",
+ mic->name, strerror(errno));
+ goto close_error1;
+ }
+ } while (1);
+close_error:
+ free(state);
+close_error1:
+ close(fd);
+error:
+ init_mic(mic);
+ pthread_exit(NULL);
+}
+
+static void
+set_cmdline(struct mic_info *mic)
+{
+ char buffer[PATH_MAX];
+ int len;
+
+ len = snprintf(buffer, PATH_MAX,
+ "clocksource=tsc highres=off nohz=off ");
+ len += snprintf(buffer + len, PATH_MAX - len,
+ "cpufreq_on;corec6_off;pc3_off;pc6_off ");
+ len += snprintf(buffer + len, PATH_MAX - len,
+ "ifcfg=static;address,172.31.%d.1;netmask,255.255.255.0",
+ mic->id);
+
+ setsysfs(mic->name, "cmdline", buffer);
+ mpsslog("%s: Command line: \"%s\"\n", mic->name, buffer);
+ snprintf(buffer, PATH_MAX, "172.31.%d.1", mic->id);
+ mpsslog("%s: IPADDR: \"%s\"\n", mic->name, buffer);
+}
+
+static void
+set_log_buf_info(struct mic_info *mic)
+{
+ int fd;
+ off_t len;
+ char system_map[] = "/lib/firmware/mic/System.map";
+ char *map, *temp, log_buf[17] = {'\0'};
+
+ fd = open(system_map, O_RDONLY);
+ if (fd < 0) {
+ mpsslog("%s: Opening System.map failed: %d\n",
+ mic->name, errno);
+ return;
+ }
+ len = lseek(fd, 0, SEEK_END);
+ if (len < 0) {
+ mpsslog("%s: Reading System.map size failed: %d\n",
+ mic->name, errno);
+ close(fd);
+ return;
+ }
+ map = mmap(NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (map == MAP_FAILED) {
+ mpsslog("%s: mmap of System.map failed: %d\n",
+ mic->name, errno);
+ close(fd);
+ return;
+ }
+ temp = strstr(map, "__log_buf");
+ if (!temp) {
+ mpsslog("%s: __log_buf not found: %d\n", mic->name, errno);
+ munmap(map, len);
+ close(fd);
+ return;
+ }
+ strncpy(log_buf, temp - 19, 16);
+ setsysfs(mic->name, "log_buf_addr", log_buf);
+ mpsslog("%s: log_buf_addr: %s\n", mic->name, log_buf);
+ temp = strstr(map, "log_buf_len");
+ if (!temp) {
+ mpsslog("%s: log_buf_len not found: %d\n", mic->name, errno);
+ munmap(map, len);
+ close(fd);
+ return;
+ }
+ strncpy(log_buf, temp - 19, 16);
+ setsysfs(mic->name, "log_buf_len", log_buf);
+ mpsslog("%s: log_buf_len: %s\n", mic->name, log_buf);
+ munmap(map, len);
+ close(fd);
+}
+
+static void init_mic(struct mic_info *mic);
+
+static void
+change_virtblk_backend(int x, siginfo_t *siginfo, void *p)
+{
+ struct mic_info *mic;
+
+ for (mic = mic_list.next; mic != NULL; mic = mic->next)
+ mic->mic_virtblk.signaled = 1/* true */;
+}
+
+static void
+init_mic(struct mic_info *mic)
+{
+ struct sigaction ignore = {
+ .sa_flags = 0,
+ .sa_handler = SIG_IGN
+ };
+ struct sigaction act = {
+ .sa_flags = SA_SIGINFO,
+ .sa_sigaction = change_virtblk_backend,
+ };
+ char buffer[PATH_MAX];
+ int err;
+
+ /*
+ * Currently, one virtio block device is supported for each MIC card
+ * at a time. Any user (or test) can send a SIGUSR1 to the MIC daemon.
+ * The signal informs the virtio block backend about a change in the
+ * configuration file which specifies the virtio backend file name on
+ * the host. Virtio block backend then re-reads the configuration file
+ * and switches to the new block device. This signalling mechanism may
+ * not be required once multiple virtio block devices are supported by
+ * the MIC daemon.
+ */
+ sigaction(SIGUSR1, &ignore, NULL);
+
+ mic->pid = fork();
+ switch (mic->pid) {
+ case 0:
+ set_log_buf_info(mic);
+ set_cmdline(mic);
+ add_virtio_device(mic, &virtcons_dev_page.dd);
+ add_virtio_device(mic, &virtnet_dev_page.dd);
+ err = pthread_create(&mic->mic_console.console_thread, NULL,
+ virtio_console, mic);
+ if (err)
+ mpsslog("%s virtcons pthread_create failed %s\n",
+ mic->name, strerror(err));
+ err = pthread_create(&mic->mic_net.net_thread, NULL,
+ virtio_net, mic);
+ if (err)
+ mpsslog("%s virtnet pthread_create failed %s\n",
+ mic->name, strerror(err));
+ err = pthread_create(&mic->mic_virtblk.block_thread, NULL,
+ virtio_block, mic);
+ if (err)
+ mpsslog("%s virtblk pthread_create failed %s\n",
+ mic->name, strerror(err));
+ sigemptyset(&act.sa_mask);
+ err = sigaction(SIGUSR1, &act, NULL);
+ if (err)
+ mpsslog("%s sigaction SIGUSR1 failed %s\n",
+ mic->name, strerror(errno));
+ while (1)
+ sleep(60);
+ case -1:
+ mpsslog("fork failed MIC name %s id %d errno %d\n",
+ mic->name, mic->id, errno);
+ break;
+ default:
+ if (mic->restart) {
+ snprintf(buffer, PATH_MAX, "boot");
+ setsysfs(mic->name, "state", buffer);
+ mpsslog("%s restarting mic %d\n",
+ mic->name, mic->restart);
+ mic->restart = 0;
+ }
+ pthread_create(&mic->config_thread, NULL, mic_config, mic);
+ }
+}
+
+static void
+start_daemon(void)
+{
+ struct mic_info *mic;
+
+ for (mic = mic_list.next; mic != NULL; mic = mic->next)
+ init_mic(mic);
+
+ while (1)
+ sleep(60);
+}
+
+static int
+init_mic_list(void)
+{
+ struct mic_info *mic = &mic_list;
+ struct dirent *file;
+ DIR *dp;
+ int cnt = 0;
+
+ dp = opendir(MICSYSFSDIR);
+ if (!dp)
+ return 0;
+
+ while ((file = readdir(dp)) != NULL) {
+ if (!strncmp(file->d_name, "mic", 3)) {
+ mic->next = calloc(1, sizeof(struct mic_info));
+ if (mic->next) {
+ mic = mic->next;
+ mic->id = atoi(&file->d_name[3]);
+ mic->name = malloc(strlen(file->d_name) + 16);
+ if (mic->name)
+ strcpy(mic->name, file->d_name);
+ mpsslog("MIC name %s id %d\n", mic->name,
+ mic->id);
+ cnt++;
+ }
+ }
+ }
+
+ closedir(dp);
+ return cnt;
+}
+
+void
+mpsslog(char *format, ...)
+{
+ va_list args;
+ char buffer[4096];
+ char ts[52], *ts1;
+ time_t t;
+
+ if (logfp == NULL)
+ return;
+
+ va_start(args, format);
+ vsprintf(buffer, format, args);
+ va_end(args);
+
+ time(&t);
+ ts1 = ctime_r(&t, ts);
+ ts1[strlen(ts1) - 1] = '\0';
+ fprintf(logfp, "%s: %s", ts1, buffer);
+
+ fflush(logfp);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int cnt;
+ pid_t pid;
+
+ myname = argv[0];
+
+ logfp = fopen(LOGFILE_NAME, "a+");
+ if (!logfp) {
+ fprintf(stderr, "cannot open logfile '%s'\n", LOGFILE_NAME);
+ exit(1);
+ }
+ pid = fork();
+ switch (pid) {
+ case 0:
+ break;
+ case -1:
+ exit(2);
+ default:
+ exit(0);
+ }
+
+ mpsslog("MIC Daemon start\n");
+
+ cnt = init_mic_list();
+ if (cnt == 0) {
+ mpsslog("MIC module not loaded\n");
+ exit(3);
+ }
+ mpsslog("MIC found %d devices\n", cnt);
+
+ start_daemon();
+
+ exit(0);
+}
diff --git a/Documentation/mic/mpssd/mpssd.h b/Documentation/mic/mpssd/mpssd.h
new file mode 100644
index 000000000..f5f18b15d
--- /dev/null
+++ b/Documentation/mic/mpssd/mpssd.h
@@ -0,0 +1,102 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC User Space Tools.
+ */
+#ifndef _MPSSD_H_
+#define _MPSSD_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <time.h>
+#include <errno.h>
+#include <sys/dir.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <signal.h>
+#include <limits.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <linux/if_tun.h>
+#include <linux/virtio_ids.h>
+
+#define MICSYSFSDIR "/sys/class/mic"
+#define LOGFILE_NAME "/var/log/mpssd"
+#define PAGE_SIZE 4096
+
+struct mic_console_info {
+ pthread_t console_thread;
+ int virtio_console_fd;
+ void *console_dp;
+};
+
+struct mic_net_info {
+ pthread_t net_thread;
+ int virtio_net_fd;
+ int tap_fd;
+ void *net_dp;
+};
+
+struct mic_virtblk_info {
+ pthread_t block_thread;
+ int virtio_block_fd;
+ void *block_dp;
+ volatile sig_atomic_t signaled;
+ char *backend_file;
+ int backend;
+ void *backend_addr;
+ long backend_size;
+};
+
+struct mic_info {
+ int id;
+ char *name;
+ pthread_t config_thread;
+ pid_t pid;
+ struct mic_console_info mic_console;
+ struct mic_net_info mic_net;
+ struct mic_virtblk_info mic_virtblk;
+ int restart;
+ int boot_on_resume;
+ struct mic_info *next;
+};
+
+__attribute__((format(printf, 1, 2)))
+void mpsslog(char *format, ...);
+char *readsysfs(char *dir, char *entry);
+int setsysfs(char *dir, char *entry, char *value);
+#endif
diff --git a/Documentation/mic/mpssd/sysfs.c b/Documentation/mic/mpssd/sysfs.c
new file mode 100644
index 000000000..8dd326936
--- /dev/null
+++ b/Documentation/mic/mpssd/sysfs.c
@@ -0,0 +1,102 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Intel MIC User Space Tools.
+ */
+
+#include "mpssd.h"
+
+#define PAGE_SIZE 4096
+
+char *
+readsysfs(char *dir, char *entry)
+{
+ char filename[PATH_MAX];
+ char value[PAGE_SIZE];
+ char *string = NULL;
+ int fd;
+ int len;
+
+ if (dir == NULL)
+ snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
+ else
+ snprintf(filename, PATH_MAX,
+ "%s/%s/%s", MICSYSFSDIR, dir, entry);
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ mpsslog("Failed to open sysfs entry '%s': %s\n",
+ filename, strerror(errno));
+ return NULL;
+ }
+
+ len = read(fd, value, sizeof(value));
+ if (len < 0) {
+ mpsslog("Failed to read sysfs entry '%s': %s\n",
+ filename, strerror(errno));
+ goto readsys_ret;
+ }
+ if (len == 0)
+ goto readsys_ret;
+
+ value[len - 1] = '\0';
+
+ string = malloc(strlen(value) + 1);
+ if (string)
+ strcpy(string, value);
+
+readsys_ret:
+ close(fd);
+ return string;
+}
+
+int
+setsysfs(char *dir, char *entry, char *value)
+{
+ char filename[PATH_MAX];
+ char *oldvalue;
+ int fd, ret = 0;
+
+ if (dir == NULL)
+ snprintf(filename, PATH_MAX, "%s/%s", MICSYSFSDIR, entry);
+ else
+ snprintf(filename, PATH_MAX, "%s/%s/%s",
+ MICSYSFSDIR, dir, entry);
+
+ oldvalue = readsysfs(dir, entry);
+
+ fd = open(filename, O_RDWR);
+ if (fd < 0) {
+ ret = errno;
+ mpsslog("Failed to open sysfs entry '%s': %s\n",
+ filename, strerror(errno));
+ goto done;
+ }
+
+ if (!oldvalue || strcmp(value, oldvalue)) {
+ if (write(fd, value, strlen(value)) < 0) {
+ ret = errno;
+ mpsslog("Failed to write new sysfs entry '%s': %s\n",
+ filename, strerror(errno));
+ }
+ }
+ close(fd);
+done:
+ if (oldvalue)
+ free(oldvalue);
+ return ret;
+}
diff --git a/Documentation/mips/00-INDEX b/Documentation/mips/00-INDEX
new file mode 100644
index 000000000..8ae9cffc2
--- /dev/null
+++ b/Documentation/mips/00-INDEX
@@ -0,0 +1,4 @@
+00-INDEX
+ - this file.
+AU1xxx_IDE.README
+ - README for MIPS AU1XXX IDE driver.
diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README
new file mode 100644
index 000000000..52844a58c
--- /dev/null
+++ b/Documentation/mips/AU1xxx_IDE.README
@@ -0,0 +1,122 @@
+README for MIPS AU1XXX IDE driver - Released 2005-07-15
+
+ABOUT
+-----
+This file describes the 'drivers/ide/au1xxx-ide.c', related files and the
+services they provide.
+
+If you are short in patience and just want to know how to add your hard disc to
+the white or black list, go to the 'ADD NEW HARD DISC TO WHITE OR BLACK LIST'
+section.
+
+
+LICENSE
+-------
+
+Copyright (c) 2003-2005 AMD, Personal Connectivity Solutions
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 2 of the License, or (at your option) any later
+version.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+675 Mass Ave, Cambridge, MA 02139, USA.
+
+Note: for more information, please refer "AMD Alchemy Au1200/Au1550 IDE
+ Interface and Linux Device Driver" Application Note.
+
+
+FILES, CONFIGS AND COMPATIBILITY
+--------------------------------
+
+Two files are introduced:
+
+ a) 'arch/mips/include/asm/mach-au1x00/au1xxx_ide.h'
+ contains : struct _auide_hwif
+ timing parameters for PIO mode 0/1/2/3/4
+ timing parameters for MWDMA 0/1/2
+
+ b) 'drivers/ide/mips/au1xxx-ide.c'
+ contains the functionality of the AU1XXX IDE driver
+
+Following extra configs variables are introduced:
+
+ CONFIG_BLK_DEV_IDE_AU1XXX_PIO_DBDMA - enable the PIO+DBDMA mode
+ CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA - enable the MWDMA mode
+ CONFIG_BLK_DEV_IDE_AU1XXX_BURSTABLE_ON - set Burstable FIFO in DBDMA
+ controller
+
+
+SUPPORTED IDE MODES
+-------------------
+
+The AU1XXX IDE driver supported all PIO modes - PIO mode 0/1/2/3/4 - and all
+MWDMA modes - MWDMA 0/1/2 -. There is no support for SWDMA and UDMA mode.
+
+To change the PIO mode use the program hdparm with option -p, e.g.
+'hdparm -p0 [device]' for PIO mode 0. To enable the MWDMA mode use the option
+-X, e.g. 'hdparm -X32 [device]' for MWDMA mode 0.
+
+
+PERFORMANCE CONFIGURATIONS
+--------------------------
+
+If the used system doesn't need USB support enable the following kernel configs:
+
+CONFIG_IDE=y
+CONFIG_BLK_DEV_IDE=y
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+CONFIG_IDEDMA_PCI_AUTO=y
+CONFIG_BLK_DEV_IDE_AU1XXX=y
+CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA=y
+CONFIG_BLK_DEV_IDEDMA=y
+CONFIG_IDEDMA_AUTO=y
+
+Also define 'IDE_AU1XXX_BURSTMODE' in 'drivers/ide/mips/au1xxx-ide.c' to enable
+the burst support on DBDMA controller.
+
+If the used system need the USB support enable the following kernel configs for
+high IDE to USB throughput.
+
+CONFIG_BLK_DEV_IDEDISK=y
+CONFIG_IDE_GENERIC=y
+CONFIG_BLK_DEV_IDEPCI=y
+CONFIG_BLK_DEV_GENERIC=y
+CONFIG_BLK_DEV_IDEDMA_PCI=y
+CONFIG_IDEDMA_PCI_AUTO=y
+CONFIG_BLK_DEV_IDE_AU1XXX=y
+CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA=y
+CONFIG_BLK_DEV_IDEDMA=y
+CONFIG_IDEDMA_AUTO=y
+
+Also undefine 'IDE_AU1XXX_BURSTMODE' in 'drivers/ide/mips/au1xxx-ide.c' to
+disable the burst support on DBDMA controller.
+
+
+ACKNOWLEDGMENTS
+---------------
+
+These drivers wouldn't have been done without the base of kernel 2.4.x AU1XXX
+IDE driver from AMD.
+
+Additional input also from:
+Matthias Lenk <matthias.lenk@amd.com>
+
+Happy hacking!
+Enrico Walther <enrico.walther@amd.com>
diff --git a/Documentation/misc-devices/Makefile b/Documentation/misc-devices/Makefile
new file mode 100644
index 000000000..e2b7aa4c9
--- /dev/null
+++ b/Documentation/misc-devices/Makefile
@@ -0,0 +1 @@
+subdir-y := mei
diff --git a/Documentation/misc-devices/ad525x_dpot.txt b/Documentation/misc-devices/ad525x_dpot.txt
new file mode 100644
index 000000000..0c9413b1c
--- /dev/null
+++ b/Documentation/misc-devices/ad525x_dpot.txt
@@ -0,0 +1,57 @@
+---------------------------------
+ AD525x Digital Potentiometers
+---------------------------------
+
+The ad525x_dpot driver exports a simple sysfs interface. This allows you to
+work with the immediate resistance settings as well as update the saved startup
+settings. Access to the factory programmed tolerance is also provided, but
+interpretation of this settings is required by the end application according to
+the specific part in use.
+
+---------
+ Files
+---------
+
+Each dpot device will have a set of eeprom, rdac, and tolerance files. How
+many depends on the actual part you have, as will the range of allowed values.
+
+The eeprom files are used to program the startup value of the device.
+
+The rdac files are used to program the immediate value of the device.
+
+The tolerance files are the read-only factory programmed tolerance settings
+and may vary greatly on a part-by-part basis. For exact interpretation of
+this field, please consult the datasheet for your part. This is presented
+as a hex file for easier parsing.
+
+-----------
+ Example
+-----------
+
+Locate the device in your sysfs tree. This is probably easiest by going into
+the common i2c directory and locating the device by the i2c slave address.
+
+ # ls /sys/bus/i2c/devices/
+ 0-0022 0-0027 0-002f
+
+So assuming the device in question is on the first i2c bus and has the slave
+address of 0x2f, we descend (unrelated sysfs entries have been trimmed).
+
+ # ls /sys/bus/i2c/devices/0-002f/
+ eeprom0 rdac0 tolerance0
+
+You can use simple reads/writes to access these files:
+
+ # cd /sys/bus/i2c/devices/0-002f/
+
+ # cat eeprom0
+ 0
+ # echo 10 > eeprom0
+ # cat eeprom0
+ 10
+
+ # cat rdac0
+ 5
+ # echo 3 > rdac0
+ # cat rdac0
+ 3
diff --git a/Documentation/misc-devices/apds990x.txt b/Documentation/misc-devices/apds990x.txt
new file mode 100644
index 000000000..d5408cade
--- /dev/null
+++ b/Documentation/misc-devices/apds990x.txt
@@ -0,0 +1,111 @@
+Kernel driver apds990x
+======================
+
+Supported chips:
+Avago APDS990X
+
+Data sheet:
+Not freely available
+
+Author:
+Samu Onkalo <samu.p.onkalo@nokia.com>
+
+Description
+-----------
+
+APDS990x is a combined ambient light and proximity sensor. ALS and proximity
+functionality are highly connected. ALS measurement path must be running
+while the proximity functionality is enabled.
+
+ALS produces raw measurement values for two channels: Clear channel
+(infrared + visible light) and IR only. However, threshold comparisons happen
+using clear channel only. Lux value and the threshold level on the HW
+might vary quite much depending the spectrum of the light source.
+
+Driver makes necessary conversions to both directions so that user handles
+only lux values. Lux value is calculated using information from the both
+channels. HW threshold level is calculated from the given lux value to match
+with current type of the lightning. Sometimes inaccuracy of the estimations
+lead to false interrupt, but that doesn't harm.
+
+ALS contains 4 different gain steps. Driver automatically
+selects suitable gain step. After each measurement, reliability of the results
+is estimated and new measurement is trigged if necessary.
+
+Platform data can provide tuned values to the conversion formulas if
+values are known. Otherwise plain sensor default values are used.
+
+Proximity side is little bit simpler. There is no need for complex conversions.
+It produces directly usable values.
+
+Driver controls chip operational state using pm_runtime framework.
+Voltage regulators are controlled based on chip operational state.
+
+SYSFS
+-----
+
+
+chip_id
+ RO - shows detected chip type and version
+
+power_state
+ RW - enable / disable chip. Uses counting logic
+ 1 enables the chip
+ 0 disables the chip
+lux0_input
+ RO - measured lux value
+ sysfs_notify called when threshold interrupt occurs
+
+lux0_sensor_range
+ RO - lux0_input max value. Actually never reaches since sensor tends
+ to saturate much before that. Real max value varies depending
+ on the light spectrum etc.
+
+lux0_rate
+ RW - measurement rate in Hz
+
+lux0_rate_avail
+ RO - supported measurement rates
+
+lux0_calibscale
+ RW - calibration value. Set to neutral value by default.
+ Output results are multiplied with calibscale / calibscale_default
+ value.
+
+lux0_calibscale_default
+ RO - neutral calibration value
+
+lux0_thresh_above_value
+ RW - HI level threshold value. All results above the value
+ trigs an interrupt. 65535 (i.e. sensor_range) disables the above
+ interrupt.
+
+lux0_thresh_below_value
+ RW - LO level threshold value. All results below the value
+ trigs an interrupt. 0 disables the below interrupt.
+
+prox0_raw
+ RO - measured proximity value
+ sysfs_notify called when threshold interrupt occurs
+
+prox0_sensor_range
+ RO - prox0_raw max value (1023)
+
+prox0_raw_en
+ RW - enable / disable proximity - uses counting logic
+ 1 enables the proximity
+ 0 disables the proximity
+
+prox0_reporting_mode
+ RW - trigger / periodic. In "trigger" mode the driver tells two possible
+ values: 0 or prox0_sensor_range value. 0 means no proximity,
+ 1023 means proximity. This causes minimal number of interrupts.
+ In "periodic" mode the driver reports all values above
+ prox0_thresh_above. This causes more interrupts, but it can give
+ _rough_ estimate about the distance.
+
+prox0_reporting_mode_avail
+ RO - accepted values to prox0_reporting_mode (trigger, periodic)
+
+prox0_thresh_above_value
+ RW - threshold level which trigs proximity events.
diff --git a/Documentation/misc-devices/bh1770glc.txt b/Documentation/misc-devices/bh1770glc.txt
new file mode 100644
index 000000000..7d64c014d
--- /dev/null
+++ b/Documentation/misc-devices/bh1770glc.txt
@@ -0,0 +1,116 @@
+Kernel driver bh1770glc
+=======================
+
+Supported chips:
+ROHM BH1770GLC
+OSRAM SFH7770
+
+Data sheet:
+Not freely available
+
+Author:
+Samu Onkalo <samu.p.onkalo@nokia.com>
+
+Description
+-----------
+BH1770GLC and SFH7770 are combined ambient light and proximity sensors.
+ALS and proximity parts operates on their own, but they shares common I2C
+interface and interrupt logic. In principle they can run on their own,
+but ALS side results are used to estimate reliability of the proximity sensor.
+
+ALS produces 16 bit lux values. The chip contains interrupt logic to produce
+low and high threshold interrupts.
+
+Proximity part contains IR-led driver up to 3 IR leds. The chip measures
+amount of reflected IR light and produces proximity result. Resolution is
+8 bit. Driver supports only one channel. Driver uses ALS results to estimate
+reliability of the proximity results. Thus ALS is always running while
+proximity detection is needed.
+
+Driver uses threshold interrupts to avoid need for polling the values.
+Proximity low interrupt doesn't exists in the chip. This is simulated
+by using a delayed work. As long as there is proximity threshold above
+interrupts the delayed work is pushed forward. So, when proximity level goes
+below the threshold value, there is no interrupt and the delayed work will
+finally run. This is handled as no proximity indication.
+
+Chip state is controlled via runtime pm framework when enabled in config.
+
+Calibscale factor is used to hide differences between the chips. By default
+value set to neutral state meaning factor of 1.00. To get proper values,
+calibrated source of light is needed as a reference. Calibscale factor is set
+so that measurement produces about the expected lux value.
+
+SYSFS
+-----
+
+chip_id
+ RO - shows detected chip type and version
+
+power_state
+ RW - enable / disable chip. Uses counting logic
+ 1 enables the chip
+ 0 disables the chip
+
+lux0_input
+ RO - measured lux value
+ sysfs_notify called when threshold interrupt occurs
+
+lux0_sensor_range
+ RO - lux0_input max value
+
+lux0_rate
+ RW - measurement rate in Hz
+
+lux0_rate_avail
+ RO - supported measurement rates
+
+lux0_thresh_above_value
+ RW - HI level threshold value. All results above the value
+ trigs an interrupt. 65535 (i.e. sensor_range) disables the above
+ interrupt.
+
+lux0_thresh_below_value
+ RW - LO level threshold value. All results below the value
+ trigs an interrupt. 0 disables the below interrupt.
+
+lux0_calibscale
+ RW - calibration value. Set to neutral value by default.
+ Output results are multiplied with calibscale / calibscale_default
+ value.
+
+lux0_calibscale_default
+ RO - neutral calibration value
+
+prox0_raw
+ RO - measured proximity value
+ sysfs_notify called when threshold interrupt occurs
+
+prox0_sensor_range
+ RO - prox0_raw max value
+
+prox0_raw_en
+ RW - enable / disable proximity - uses counting logic
+ 1 enables the proximity
+ 0 disables the proximity
+
+prox0_thresh_above_count
+ RW - number of proximity interrupts needed before triggering the event
+
+prox0_rate_above
+ RW - Measurement rate (in Hz) when the level is above threshold
+ i.e. when proximity on has been reported.
+
+prox0_rate_below
+ RW - Measurement rate (in Hz) when the level is below threshold
+ i.e. when proximity off has been reported.
+
+prox0_rate_avail
+ RO - Supported proximity measurement rates in Hz
+
+prox0_thresh_above0_value
+ RW - threshold level which trigs proximity events.
+ Filtered by persistence filter (prox0_thresh_above_count)
+
+prox0_thresh_above1_value
+ RW - threshold level which trigs event immediately
diff --git a/Documentation/misc-devices/c2port.txt b/Documentation/misc-devices/c2port.txt
new file mode 100644
index 000000000..ea7344465
--- /dev/null
+++ b/Documentation/misc-devices/c2port.txt
@@ -0,0 +1,90 @@
+ C2 port support
+ ---------------
+
+(C) Copyright 2007 Rodolfo Giometti <giometti@enneenne.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+
+
+Overview
+--------
+
+This driver implements the support for Linux of Silicon Labs (Silabs)
+C2 Interface used for in-system programming of micro controllers.
+
+By using this driver you can reprogram the in-system flash without EC2
+or EC3 debug adapter. This solution is also useful in those systems
+where the micro controller is connected via special GPIOs pins.
+
+References
+----------
+
+The C2 Interface main references are at (http://www.silabs.com)
+Silicon Laboratories site], see:
+
+- AN127: FLASH Programming via the C2 Interface at
+http://www.silabs.com/Support Documents/TechnicalDocs/an127.pdf
+
+- C2 Specification at
+http://www.silabs.com/pages/DownloadDoc.aspx?FILEURL=Support%20Documents/TechnicalDocs/an127.pdf&src=SearchResults
+
+however it implements a two wire serial communication protocol (bit
+banging) designed to enable in-system programming, debugging, and
+boundary-scan testing on low pin-count Silicon Labs devices. Currently
+this code supports only flash programming but extensions are easy to
+add.
+
+Using the driver
+----------------
+
+Once the driver is loaded you can use sysfs support to get C2port's
+info or read/write in-system flash.
+
+# ls /sys/class/c2port/c2port0/
+access flash_block_size flash_erase rev_id
+dev_id flash_blocks_num flash_size subsystem/
+flash_access flash_data reset uevent
+
+Initially the C2port access is disabled since you hardware may have
+such lines multiplexed with other devices so, to get access to the
+C2port, you need the command:
+
+# echo 1 > /sys/class/c2port/c2port0/access
+
+after that you should read the device ID and revision ID of the
+connected micro controller:
+
+# cat /sys/class/c2port/c2port0/dev_id
+8
+# cat /sys/class/c2port/c2port0/rev_id
+1
+
+However, for security reasons, the in-system flash access in not
+enabled yet, to do so you need the command:
+
+# echo 1 > /sys/class/c2port/c2port0/flash_access
+
+After that you can read the whole flash:
+
+# cat /sys/class/c2port/c2port0/flash_data > image
+
+erase it:
+
+# echo 1 > /sys/class/c2port/c2port0/flash_erase
+
+and write it:
+
+# cat image > /sys/class/c2port/c2port0/flash_data
+
+after writing you have to reset the device to execute the new code:
+
+# echo 1 > /sys/class/c2port/c2port0/reset
diff --git a/Documentation/misc-devices/eeprom b/Documentation/misc-devices/eeprom
new file mode 100644
index 000000000..ba692011f
--- /dev/null
+++ b/Documentation/misc-devices/eeprom
@@ -0,0 +1,96 @@
+Kernel driver eeprom
+====================
+
+Supported chips:
+ * Any EEPROM chip in the designated address range
+ Prefix: 'eeprom'
+ Addresses scanned: I2C 0x50 - 0x57
+ Datasheets: Publicly available from:
+ Atmel (www.atmel.com),
+ Catalyst (www.catsemi.com),
+ Fairchild (www.fairchildsemi.com),
+ Microchip (www.microchip.com),
+ Philips (www.semiconductor.philips.com),
+ Rohm (www.rohm.com),
+ ST (www.st.com),
+ Xicor (www.xicor.com),
+ and others.
+
+ Chip Size (bits) Address
+ 24C01 1K 0x50 (shadows at 0x51 - 0x57)
+ 24C01A 1K 0x50 - 0x57 (Typical device on DIMMs)
+ 24C02 2K 0x50 - 0x57
+ 24C04 4K 0x50, 0x52, 0x54, 0x56
+ (additional data at 0x51, 0x53, 0x55, 0x57)
+ 24C08 8K 0x50, 0x54 (additional data at 0x51, 0x52,
+ 0x53, 0x55, 0x56, 0x57)
+ 24C16 16K 0x50 (additional data at 0x51 - 0x57)
+ Sony 2K 0x57
+
+ Atmel 34C02B 2K 0x50 - 0x57, SW write protect at 0x30-37
+ Catalyst 34FC02 2K 0x50 - 0x57, SW write protect at 0x30-37
+ Catalyst 34RC02 2K 0x50 - 0x57, SW write protect at 0x30-37
+ Fairchild 34W02 2K 0x50 - 0x57, SW write protect at 0x30-37
+ Microchip 24AA52 2K 0x50 - 0x57, SW write protect at 0x30-37
+ ST M34C02 2K 0x50 - 0x57, SW write protect at 0x30-37
+
+
+Authors:
+ Frodo Looijaard <frodol@dds.nl>,
+ Philip Edelbrock <phil@netroedge.com>,
+ Jean Delvare <jdelvare@suse.de>,
+ Greg Kroah-Hartman <greg@kroah.com>,
+ IBM Corp.
+
+Description
+-----------
+
+This is a simple EEPROM module meant to enable reading the first 256 bytes
+of an EEPROM (on a SDRAM DIMM for example). However, it will access serial
+EEPROMs on any I2C adapter. The supported devices are generically called
+24Cxx, and are listed above; however the numbering for these
+industry-standard devices may vary by manufacturer.
+
+This module was a programming exercise to get used to the new project
+organization laid out by Frodo, but it should be at least completely
+effective for decoding the contents of EEPROMs on DIMMs.
+
+DIMMS will typically contain a 24C01A or 24C02, or the 34C02 variants.
+The other devices will not be found on a DIMM because they respond to more
+than one address.
+
+DDC Monitors may contain any device. Often a 24C01, which responds to all 8
+addresses, is found.
+
+Recent Sony Vaio laptops have an EEPROM at 0x57. We couldn't get the
+specification, so it is guess work and far from being complete.
+
+The Microchip 24AA52/24LCS52, ST M34C02, and others support an additional
+software write protect register at 0x30 - 0x37 (0x20 less than the memory
+location). The chip responds to "write quick" detection at this address but
+does not respond to byte reads. If this register is present, the lower 128
+bytes of the memory array are not write protected. Any byte data write to
+this address will write protect the memory array permanently, and the
+device will no longer respond at the 0x30-37 address. The eeprom driver
+does not support this register.
+
+Lacking functionality:
+
+* Full support for larger devices (24C04, 24C08, 24C16). These are not
+typically found on a PC. These devices will appear as separate devices at
+multiple addresses.
+
+* Support for really large devices (24C32, 24C64, 24C128, 24C256, 24C512).
+These devices require two-byte address fields and are not supported.
+
+* Enable Writing. Again, no technical reason why not, but making it easy
+to change the contents of the EEPROMs (on DIMMs anyway) also makes it easy
+to disable the DIMMs (potentially preventing the computer from booting)
+until the values are restored somehow.
+
+Use:
+
+After inserting the module (and any other required SMBus/i2c modules), you
+should have some EEPROM directories in /sys/bus/i2c/devices/* of names such
+as "0-0050". Inside each of these is a series of files, the eeprom file
+contains the binary data from EEPROM.
diff --git a/Documentation/misc-devices/ics932s401 b/Documentation/misc-devices/ics932s401
new file mode 100644
index 000000000..bdac67ff6
--- /dev/null
+++ b/Documentation/misc-devices/ics932s401
@@ -0,0 +1,31 @@
+Kernel driver ics932s401
+======================
+
+Supported chips:
+ * IDT ICS932S401
+ Prefix: 'ics932s401'
+ Addresses scanned: I2C 0x69
+ Datasheet: Publicly available at the IDT website
+
+Author: Darrick J. Wong
+
+Description
+-----------
+
+This driver implements support for the IDT ICS932S401 chip family.
+
+This chip has 4 clock outputs--a base clock for the CPU (which is likely
+multiplied to get the real CPU clock), a system clock, a PCI clock, a USB
+clock, and a reference clock. The driver reports selected and actual
+frequency. If spread spectrum mode is enabled, the driver also reports by what
+percent the clock signal is being spread, which should be between 0 and -0.5%.
+All frequencies are reported in KHz.
+
+The ICS932S401 monitors all inputs continuously. The driver will not read
+the registers more often than once every other second.
+
+Special Features
+----------------
+
+The clocks could be reprogrammed to increase system speed. I will not help you
+do this, as you risk damaging your system!
diff --git a/Documentation/misc-devices/isl29003 b/Documentation/misc-devices/isl29003
new file mode 100644
index 000000000..c4ff5f38e
--- /dev/null
+++ b/Documentation/misc-devices/isl29003
@@ -0,0 +1,62 @@
+Kernel driver isl29003
+=====================
+
+Supported chips:
+* Intersil ISL29003
+Prefix: 'isl29003'
+Addresses scanned: none
+Datasheet:
+http://www.intersil.com/data/fn/fn7464.pdf
+
+Author: Daniel Mack <daniel@caiaq.de>
+
+
+Description
+-----------
+The ISL29003 is an integrated light sensor with a 16-bit integrating type
+ADC, I2C user programmable lux range select for optimized counts/lux, and
+I2C multi-function control and monitoring capabilities. The internal ADC
+provides 16-bit resolution while rejecting 50Hz and 60Hz flicker caused by
+artificial light sources.
+
+The driver allows to set the lux range, the bit resolution, the operational
+mode (see below) and the power state of device and can read the current lux
+value, of course.
+
+
+Detection
+---------
+
+The ISL29003 does not have an ID register which could be used to identify
+it, so the detection routine will just try to read from the configured I2C
+addess and consider the device to be present as soon as it ACKs the
+transfer.
+
+
+Sysfs entries
+-------------
+
+range:
+ 0: 0 lux to 1000 lux (default)
+ 1: 0 lux to 4000 lux
+ 2: 0 lux to 16,000 lux
+ 3: 0 lux to 64,000 lux
+
+resolution:
+ 0: 2^16 cycles (default)
+ 1: 2^12 cycles
+ 2: 2^8 cycles
+ 3: 2^4 cycles
+
+mode:
+ 0: diode1's current (unsigned 16bit) (default)
+ 1: diode1's current (unsigned 16bit)
+ 2: difference between diodes (l1 - l2, signed 15bit)
+
+power_state:
+ 0: device is disabled (default)
+ 1: device is enabled
+
+lux (read only):
+ returns the value from the last sensor reading
+
diff --git a/Documentation/misc-devices/lis3lv02d b/Documentation/misc-devices/lis3lv02d
new file mode 100644
index 000000000..f89960a0f
--- /dev/null
+++ b/Documentation/misc-devices/lis3lv02d
@@ -0,0 +1,93 @@
+Kernel driver lis3lv02d
+=======================
+
+Supported chips:
+
+ * STMicroelectronics LIS3LV02DL, LIS3LV02DQ (12 bits precision)
+ * STMicroelectronics LIS302DL, LIS3L02DQ, LIS331DL (8 bits) and
+ LIS331DLH (16 bits)
+
+Authors:
+ Yan Burman <burman.yan@gmail.com>
+ Eric Piel <eric.piel@tremplin-utc.net>
+
+
+Description
+-----------
+
+This driver provides support for the accelerometer found in various HP laptops
+sporting the feature officially called "HP Mobile Data Protection System 3D" or
+"HP 3D DriveGuard". It detects automatically laptops with this sensor. Known
+models (full list can be found in drivers/platform/x86/hp_accel.c) will have
+their axis automatically oriented on standard way (eg: you can directly play
+neverball). The accelerometer data is readable via
+/sys/devices/platform/lis3lv02d. Reported values are scaled
+to mg values (1/1000th of earth gravity).
+
+Sysfs attributes under /sys/devices/platform/lis3lv02d/:
+position - 3D position that the accelerometer reports. Format: "(x,y,z)"
+rate - read reports the sampling rate of the accelerometer device in HZ.
+ write changes sampling rate of the accelerometer device.
+ Only values which are supported by HW are accepted.
+selftest - performs selftest for the chip as specified by chip manufacturer.
+
+This driver also provides an absolute input class device, allowing
+the laptop to act as a pinball machine-esque joystick. Joystick device can be
+calibrated. Joystick device can be in two different modes.
+By default output values are scaled between -32768 .. 32767. In joystick raw
+mode, joystick and sysfs position entry have the same scale. There can be
+small difference due to input system fuzziness feature.
+Events are also available as input event device.
+
+Selftest is meant only for hardware diagnostic purposes. It is not meant to be
+used during normal operations. Position data is not corrupted during selftest
+but interrupt behaviour is not guaranteed to work reliably. In test mode, the
+sensing element is internally moved little bit. Selftest measures difference
+between normal mode and test mode. Chip specifications tell the acceptance
+limit for each type of the chip. Limits are provided via platform data
+to allow adjustment of the limits without a change to the actual driver.
+Seltest returns either "OK x y z" or "FAIL x y z" where x, y and z are
+measured difference between modes. Axes are not remapped in selftest mode.
+Measurement values are provided to help HW diagnostic applications to make
+final decision.
+
+On HP laptops, if the led infrastructure is activated, support for a led
+indicating disk protection will be provided as /sys/class/leds/hp::hddprotect.
+
+Another feature of the driver is misc device called "freefall" that
+acts similar to /dev/rtc and reacts on free-fall interrupts received
+from the device. It supports blocking operations, poll/select and
+fasync operation modes. You must read 1 bytes from the device. The
+result is number of free-fall interrupts since the last successful
+read (or 255 if number of interrupts would not fit). See the freefall.c
+file for an example on using the device.
+
+
+Axes orientation
+----------------
+
+For better compatibility between the various laptops. The values reported by
+the accelerometer are converted into a "standard" organisation of the axes
+(aka "can play neverball out of the box"):
+ * When the laptop is horizontal the position reported is about 0 for X and Y
+ and a positive value for Z
+ * If the left side is elevated, X increases (becomes positive)
+ * If the front side (where the touchpad is) is elevated, Y decreases
+ (becomes negative)
+ * If the laptop is put upside-down, Z becomes negative
+
+If your laptop model is not recognized (cf "dmesg"), you can send an
+email to the maintainer to add it to the database. When reporting a new
+laptop, please include the output of "dmidecode" plus the value of
+/sys/devices/platform/lis3lv02d/position in these four cases.
+
+Q&A
+---
+
+Q: How do I safely simulate freefall? I have an HP "portable
+workstation" which has about 3.5kg and a plastic case, so letting it
+fall to the ground is out of question...
+
+A: The sensor is pretty sensitive, so your hands can do it. Lift it
+into free space, follow the fall with your hands for like 10
+centimeters. That should be enough to trigger the detection.
diff --git a/Documentation/misc-devices/max6875 b/Documentation/misc-devices/max6875
new file mode 100644
index 000000000..1e89ee3cc
--- /dev/null
+++ b/Documentation/misc-devices/max6875
@@ -0,0 +1,110 @@
+Kernel driver max6875
+=====================
+
+Supported chips:
+ * Maxim MAX6874, MAX6875
+ Prefix: 'max6875'
+ Addresses scanned: None (see below)
+ Datasheet:
+ http://pdfserv.maxim-ic.com/en/ds/MAX6874-MAX6875.pdf
+
+Author: Ben Gardner <bgardner@wabtec.com>
+
+
+Description
+-----------
+
+The Maxim MAX6875 is an EEPROM-programmable power-supply sequencer/supervisor.
+It provides timed outputs that can be used as a watchdog, if properly wired.
+It also provides 512 bytes of user EEPROM.
+
+At reset, the MAX6875 reads the configuration EEPROM into its configuration
+registers. The chip then begins to operate according to the values in the
+registers.
+
+The Maxim MAX6874 is a similar, mostly compatible device, with more intputs
+and outputs:
+ vin gpi vout
+MAX6874 6 4 8
+MAX6875 4 3 5
+
+See the datasheet for more information.
+
+
+Sysfs entries
+-------------
+
+eeprom - 512 bytes of user-defined EEPROM space.
+
+
+General Remarks
+---------------
+
+Valid addresses for the MAX6875 are 0x50 and 0x52.
+Valid addresses for the MAX6874 are 0x50, 0x52, 0x54 and 0x56.
+The driver does not probe any address, so you explicitly instantiate the
+devices.
+
+Example:
+$ modprobe max6875
+$ echo max6875 0x50 > /sys/bus/i2c/devices/i2c-0/new_device
+
+The MAX6874/MAX6875 ignores address bit 0, so this driver attaches to multiple
+addresses. For example, for address 0x50, it also reserves 0x51.
+The even-address instance is called 'max6875', the odd one is 'dummy'.
+
+
+Programming the chip using i2c-dev
+----------------------------------
+
+Use the i2c-dev interface to access and program the chips.
+Reads and writes are performed differently depending on the address range.
+
+The configuration registers are at addresses 0x00 - 0x45.
+Use i2c_smbus_write_byte_data() to write a register and
+i2c_smbus_read_byte_data() to read a register.
+The command is the register number.
+
+Examples:
+To write a 1 to register 0x45:
+ i2c_smbus_write_byte_data(fd, 0x45, 1);
+
+To read register 0x45:
+ value = i2c_smbus_read_byte_data(fd, 0x45);
+
+
+The configuration EEPROM is at addresses 0x8000 - 0x8045.
+The user EEPROM is at addresses 0x8100 - 0x82ff.
+
+Use i2c_smbus_write_word_data() to write a byte to EEPROM.
+
+The command is the upper byte of the address: 0x80, 0x81, or 0x82.
+The data word is the lower part of the address or'd with data << 8.
+ cmd = address >> 8;
+ val = (address & 0xff) | (data << 8);
+
+Example:
+To write 0x5a to address 0x8003:
+ i2c_smbus_write_word_data(fd, 0x80, 0x5a03);
+
+
+Reading data from the EEPROM is a little more complicated.
+Use i2c_smbus_write_byte_data() to set the read address and then
+i2c_smbus_read_byte() or i2c_smbus_read_i2c_block_data() to read the data.
+
+Example:
+To read data starting at offset 0x8100, first set the address:
+ i2c_smbus_write_byte_data(fd, 0x81, 0x00);
+
+And then read the data
+ value = i2c_smbus_read_byte(fd);
+
+ or
+
+ count = i2c_smbus_read_i2c_block_data(fd, 0x84, 16, buffer);
+
+The block read should read 16 bytes.
+0x84 is the block read command.
+
+See the datasheet for more details.
+
diff --git a/Documentation/misc-devices/mei/.gitignore b/Documentation/misc-devices/mei/.gitignore
new file mode 100644
index 000000000..f356b81ca
--- /dev/null
+++ b/Documentation/misc-devices/mei/.gitignore
@@ -0,0 +1 @@
+mei-amt-version
diff --git a/Documentation/misc-devices/mei/Makefile b/Documentation/misc-devices/mei/Makefile
new file mode 100644
index 000000000..d758047d1
--- /dev/null
+++ b/Documentation/misc-devices/mei/Makefile
@@ -0,0 +1,5 @@
+# List of programs to build
+hostprogs-y := mei-amt-version
+HOSTCFLAGS_mei-amt-version.o += -I$(objtree)/usr/include
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/misc-devices/mei/TODO b/Documentation/misc-devices/mei/TODO
new file mode 100644
index 000000000..6b3625d30
--- /dev/null
+++ b/Documentation/misc-devices/mei/TODO
@@ -0,0 +1,2 @@
+TODO:
+ - Cleanup and split the timer function
diff --git a/Documentation/misc-devices/mei/mei-amt-version.c b/Documentation/misc-devices/mei/mei-amt-version.c
new file mode 100644
index 000000000..57d0d871d
--- /dev/null
+++ b/Documentation/misc-devices/mei/mei-amt-version.c
@@ -0,0 +1,479 @@
+/******************************************************************************
+ * Intel Management Engine Interface (Intel MEI) Linux driver
+ * Intel MEI Interface Header
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110,
+ * USA
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * Contact Information:
+ * Intel Corporation.
+ * linux-mei@linux.intel.com
+ * http://www.intel.com
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2003 - 2012 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bits/wordsize.h>
+#include <linux/mei.h>
+
+/*****************************************************************************
+ * Intel Management Engine Interface
+ *****************************************************************************/
+
+#define mei_msg(_me, fmt, ARGS...) do { \
+ if (_me->verbose) \
+ fprintf(stderr, fmt, ##ARGS); \
+} while (0)
+
+#define mei_err(_me, fmt, ARGS...) do { \
+ fprintf(stderr, "Error: " fmt, ##ARGS); \
+} while (0)
+
+struct mei {
+ uuid_le guid;
+ bool initialized;
+ bool verbose;
+ unsigned int buf_size;
+ unsigned char prot_ver;
+ int fd;
+};
+
+static void mei_deinit(struct mei *cl)
+{
+ if (cl->fd != -1)
+ close(cl->fd);
+ cl->fd = -1;
+ cl->buf_size = 0;
+ cl->prot_ver = 0;
+ cl->initialized = false;
+}
+
+static bool mei_init(struct mei *me, const uuid_le *guid,
+ unsigned char req_protocol_version, bool verbose)
+{
+ int result;
+ struct mei_client *cl;
+ struct mei_connect_client_data data;
+
+ me->verbose = verbose;
+
+ me->fd = open("/dev/mei", O_RDWR);
+ if (me->fd == -1) {
+ mei_err(me, "Cannot establish a handle to the Intel MEI driver\n");
+ goto err;
+ }
+ memcpy(&me->guid, guid, sizeof(*guid));
+ memset(&data, 0, sizeof(data));
+ me->initialized = true;
+
+ memcpy(&data.in_client_uuid, &me->guid, sizeof(me->guid));
+ result = ioctl(me->fd, IOCTL_MEI_CONNECT_CLIENT, &data);
+ if (result) {
+ mei_err(me, "IOCTL_MEI_CONNECT_CLIENT receive message. err=%d\n", result);
+ goto err;
+ }
+ cl = &data.out_client_properties;
+ mei_msg(me, "max_message_length %d\n", cl->max_msg_length);
+ mei_msg(me, "protocol_version %d\n", cl->protocol_version);
+
+ if ((req_protocol_version > 0) &&
+ (cl->protocol_version != req_protocol_version)) {
+ mei_err(me, "Intel MEI protocol version not supported\n");
+ goto err;
+ }
+
+ me->buf_size = cl->max_msg_length;
+ me->prot_ver = cl->protocol_version;
+
+ return true;
+err:
+ mei_deinit(me);
+ return false;
+}
+
+static ssize_t mei_recv_msg(struct mei *me, unsigned char *buffer,
+ ssize_t len, unsigned long timeout)
+{
+ ssize_t rc;
+
+ mei_msg(me, "call read length = %zd\n", len);
+
+ rc = read(me->fd, buffer, len);
+ if (rc < 0) {
+ mei_err(me, "read failed with status %zd %s\n",
+ rc, strerror(errno));
+ mei_deinit(me);
+ } else {
+ mei_msg(me, "read succeeded with result %zd\n", rc);
+ }
+ return rc;
+}
+
+static ssize_t mei_send_msg(struct mei *me, const unsigned char *buffer,
+ ssize_t len, unsigned long timeout)
+{
+ struct timeval tv;
+ ssize_t written;
+ ssize_t rc;
+ fd_set set;
+
+ tv.tv_sec = timeout / 1000;
+ tv.tv_usec = (timeout % 1000) * 1000000;
+
+ mei_msg(me, "call write length = %zd\n", len);
+
+ written = write(me->fd, buffer, len);
+ if (written < 0) {
+ rc = -errno;
+ mei_err(me, "write failed with status %zd %s\n",
+ written, strerror(errno));
+ goto out;
+ }
+
+ FD_ZERO(&set);
+ FD_SET(me->fd, &set);
+ rc = select(me->fd + 1 , &set, NULL, NULL, &tv);
+ if (rc > 0 && FD_ISSET(me->fd, &set)) {
+ mei_msg(me, "write success\n");
+ } else if (rc == 0) {
+ mei_err(me, "write failed on timeout with status\n");
+ goto out;
+ } else { /* rc < 0 */
+ mei_err(me, "write failed on select with status %zd\n", rc);
+ goto out;
+ }
+
+ rc = written;
+out:
+ if (rc < 0)
+ mei_deinit(me);
+
+ return rc;
+}
+
+/***************************************************************************
+ * Intel Advanced Management Technology ME Client
+ ***************************************************************************/
+
+#define AMT_MAJOR_VERSION 1
+#define AMT_MINOR_VERSION 1
+
+#define AMT_STATUS_SUCCESS 0x0
+#define AMT_STATUS_INTERNAL_ERROR 0x1
+#define AMT_STATUS_NOT_READY 0x2
+#define AMT_STATUS_INVALID_AMT_MODE 0x3
+#define AMT_STATUS_INVALID_MESSAGE_LENGTH 0x4
+
+#define AMT_STATUS_HOST_IF_EMPTY_RESPONSE 0x4000
+#define AMT_STATUS_SDK_RESOURCES 0x1004
+
+
+#define AMT_BIOS_VERSION_LEN 65
+#define AMT_VERSIONS_NUMBER 50
+#define AMT_UNICODE_STRING_LEN 20
+
+struct amt_unicode_string {
+ uint16_t length;
+ char string[AMT_UNICODE_STRING_LEN];
+} __attribute__((packed));
+
+struct amt_version_type {
+ struct amt_unicode_string description;
+ struct amt_unicode_string version;
+} __attribute__((packed));
+
+struct amt_version {
+ uint8_t major;
+ uint8_t minor;
+} __attribute__((packed));
+
+struct amt_code_versions {
+ uint8_t bios[AMT_BIOS_VERSION_LEN];
+ uint32_t count;
+ struct amt_version_type versions[AMT_VERSIONS_NUMBER];
+} __attribute__((packed));
+
+/***************************************************************************
+ * Intel Advanced Management Technology Host Interface
+ ***************************************************************************/
+
+struct amt_host_if_msg_header {
+ struct amt_version version;
+ uint16_t _reserved;
+ uint32_t command;
+ uint32_t length;
+} __attribute__((packed));
+
+struct amt_host_if_resp_header {
+ struct amt_host_if_msg_header header;
+ uint32_t status;
+ unsigned char data[0];
+} __attribute__((packed));
+
+const uuid_le MEI_IAMTHIF = UUID_LE(0x12f80028, 0xb4b7, 0x4b2d, \
+ 0xac, 0xa8, 0x46, 0xe0, 0xff, 0x65, 0x81, 0x4c);
+
+#define AMT_HOST_IF_CODE_VERSIONS_REQUEST 0x0400001A
+#define AMT_HOST_IF_CODE_VERSIONS_RESPONSE 0x0480001A
+
+const struct amt_host_if_msg_header CODE_VERSION_REQ = {
+ .version = {AMT_MAJOR_VERSION, AMT_MINOR_VERSION},
+ ._reserved = 0,
+ .command = AMT_HOST_IF_CODE_VERSIONS_REQUEST,
+ .length = 0
+};
+
+
+struct amt_host_if {
+ struct mei mei_cl;
+ unsigned long send_timeout;
+ bool initialized;
+};
+
+
+static bool amt_host_if_init(struct amt_host_if *acmd,
+ unsigned long send_timeout, bool verbose)
+{
+ acmd->send_timeout = (send_timeout) ? send_timeout : 20000;
+ acmd->initialized = mei_init(&acmd->mei_cl, &MEI_IAMTHIF, 0, verbose);
+ return acmd->initialized;
+}
+
+static void amt_host_if_deinit(struct amt_host_if *acmd)
+{
+ mei_deinit(&acmd->mei_cl);
+ acmd->initialized = false;
+}
+
+static uint32_t amt_verify_code_versions(const struct amt_host_if_resp_header *resp)
+{
+ uint32_t status = AMT_STATUS_SUCCESS;
+ struct amt_code_versions *code_ver;
+ size_t code_ver_len;
+ uint32_t ver_type_cnt;
+ uint32_t len;
+ uint32_t i;
+
+ code_ver = (struct amt_code_versions *)resp->data;
+ /* length - sizeof(status) */
+ code_ver_len = resp->header.length - sizeof(uint32_t);
+ ver_type_cnt = code_ver_len -
+ sizeof(code_ver->bios) -
+ sizeof(code_ver->count);
+ if (code_ver->count != ver_type_cnt / sizeof(struct amt_version_type)) {
+ status = AMT_STATUS_INTERNAL_ERROR;
+ goto out;
+ }
+
+ for (i = 0; i < code_ver->count; i++) {
+ len = code_ver->versions[i].description.length;
+
+ if (len > AMT_UNICODE_STRING_LEN) {
+ status = AMT_STATUS_INTERNAL_ERROR;
+ goto out;
+ }
+
+ len = code_ver->versions[i].version.length;
+ if (code_ver->versions[i].version.string[len] != '\0' ||
+ len != strlen(code_ver->versions[i].version.string)) {
+ status = AMT_STATUS_INTERNAL_ERROR;
+ goto out;
+ }
+ }
+out:
+ return status;
+}
+
+static uint32_t amt_verify_response_header(uint32_t command,
+ const struct amt_host_if_msg_header *resp_hdr,
+ uint32_t response_size)
+{
+ if (response_size < sizeof(struct amt_host_if_resp_header)) {
+ return AMT_STATUS_INTERNAL_ERROR;
+ } else if (response_size != (resp_hdr->length +
+ sizeof(struct amt_host_if_msg_header))) {
+ return AMT_STATUS_INTERNAL_ERROR;
+ } else if (resp_hdr->command != command) {
+ return AMT_STATUS_INTERNAL_ERROR;
+ } else if (resp_hdr->_reserved != 0) {
+ return AMT_STATUS_INTERNAL_ERROR;
+ } else if (resp_hdr->version.major != AMT_MAJOR_VERSION ||
+ resp_hdr->version.minor < AMT_MINOR_VERSION) {
+ return AMT_STATUS_INTERNAL_ERROR;
+ }
+ return AMT_STATUS_SUCCESS;
+}
+
+static uint32_t amt_host_if_call(struct amt_host_if *acmd,
+ const unsigned char *command, ssize_t command_sz,
+ uint8_t **read_buf, uint32_t rcmd,
+ unsigned int expected_sz)
+{
+ uint32_t in_buf_sz;
+ uint32_t out_buf_sz;
+ ssize_t written;
+ uint32_t status;
+ struct amt_host_if_resp_header *msg_hdr;
+
+ in_buf_sz = acmd->mei_cl.buf_size;
+ *read_buf = (uint8_t *)malloc(sizeof(uint8_t) * in_buf_sz);
+ if (*read_buf == NULL)
+ return AMT_STATUS_SDK_RESOURCES;
+ memset(*read_buf, 0, in_buf_sz);
+ msg_hdr = (struct amt_host_if_resp_header *)*read_buf;
+
+ written = mei_send_msg(&acmd->mei_cl,
+ command, command_sz, acmd->send_timeout);
+ if (written != command_sz)
+ return AMT_STATUS_INTERNAL_ERROR;
+
+ out_buf_sz = mei_recv_msg(&acmd->mei_cl, *read_buf, in_buf_sz, 2000);
+ if (out_buf_sz <= 0)
+ return AMT_STATUS_HOST_IF_EMPTY_RESPONSE;
+
+ status = msg_hdr->status;
+ if (status != AMT_STATUS_SUCCESS)
+ return status;
+
+ status = amt_verify_response_header(rcmd,
+ &msg_hdr->header, out_buf_sz);
+ if (status != AMT_STATUS_SUCCESS)
+ return status;
+
+ if (expected_sz && expected_sz != out_buf_sz)
+ return AMT_STATUS_INTERNAL_ERROR;
+
+ return AMT_STATUS_SUCCESS;
+}
+
+
+static uint32_t amt_get_code_versions(struct amt_host_if *cmd,
+ struct amt_code_versions *versions)
+{
+ struct amt_host_if_resp_header *response = NULL;
+ uint32_t status;
+
+ status = amt_host_if_call(cmd,
+ (const unsigned char *)&CODE_VERSION_REQ,
+ sizeof(CODE_VERSION_REQ),
+ (uint8_t **)&response,
+ AMT_HOST_IF_CODE_VERSIONS_RESPONSE, 0);
+
+ if (status != AMT_STATUS_SUCCESS)
+ goto out;
+
+ status = amt_verify_code_versions(response);
+ if (status != AMT_STATUS_SUCCESS)
+ goto out;
+
+ memcpy(versions, response->data, sizeof(struct amt_code_versions));
+out:
+ if (response != NULL)
+ free(response);
+
+ return status;
+}
+
+/************************** end of amt_host_if_command ***********************/
+int main(int argc, char **argv)
+{
+ struct amt_code_versions ver;
+ struct amt_host_if acmd;
+ unsigned int i;
+ uint32_t status;
+ int ret;
+ bool verbose;
+
+ verbose = (argc > 1 && strcmp(argv[1], "-v") == 0);
+
+ if (!amt_host_if_init(&acmd, 5000, verbose)) {
+ ret = 1;
+ goto out;
+ }
+
+ status = amt_get_code_versions(&acmd, &ver);
+
+ amt_host_if_deinit(&acmd);
+
+ switch (status) {
+ case AMT_STATUS_HOST_IF_EMPTY_RESPONSE:
+ printf("Intel AMT: DISABLED\n");
+ ret = 0;
+ break;
+ case AMT_STATUS_SUCCESS:
+ printf("Intel AMT: ENABLED\n");
+ for (i = 0; i < ver.count; i++) {
+ printf("%s:\t%s\n", ver.versions[i].description.string,
+ ver.versions[i].version.string);
+ }
+ ret = 0;
+ break;
+ default:
+ printf("An error has occurred\n");
+ ret = 1;
+ break;
+ }
+
+out:
+ return ret;
+}
diff --git a/Documentation/misc-devices/mei/mei-client-bus.txt b/Documentation/misc-devices/mei/mei-client-bus.txt
new file mode 100644
index 000000000..743be4ec8
--- /dev/null
+++ b/Documentation/misc-devices/mei/mei-client-bus.txt
@@ -0,0 +1,141 @@
+Intel(R) Management Engine (ME) Client bus API
+==============================================
+
+
+Rationale
+=========
+
+MEI misc character device is useful for dedicated applications to send and receive
+data to the many FW appliance found in Intel's ME from the user space.
+However for some of the ME functionalities it make sense to leverage existing software
+stack and expose them through existing kernel subsystems.
+
+In order to plug seamlessly into the kernel device driver model we add kernel virtual
+bus abstraction on top of the MEI driver. This allows implementing linux kernel drivers
+for the various MEI features as a stand alone entities found in their respective subsystem.
+Existing device drivers can even potentially be re-used by adding an MEI CL bus layer to
+the existing code.
+
+
+MEI CL bus API
+==============
+
+A driver implementation for an MEI Client is very similar to existing bus
+based device drivers. The driver registers itself as an MEI CL bus driver through
+the mei_cl_driver structure:
+
+struct mei_cl_driver {
+ struct device_driver driver;
+ const char *name;
+
+ const struct mei_cl_device_id *id_table;
+
+ int (*probe)(struct mei_cl_device *dev, const struct mei_cl_id *id);
+ int (*remove)(struct mei_cl_device *dev);
+};
+
+struct mei_cl_id {
+ char name[MEI_NAME_SIZE];
+ kernel_ulong_t driver_info;
+};
+
+The mei_cl_id structure allows the driver to bind itself against a device name.
+
+To actually register a driver on the ME Client bus one must call the mei_cl_add_driver()
+API. This is typically called at module init time.
+
+Once registered on the ME Client bus, a driver will typically try to do some I/O on
+this bus and this should be done through the mei_cl_send() and mei_cl_recv()
+routines. The latter is synchronous (blocks and sleeps until data shows up).
+In order for drivers to be notified of pending events waiting for them (e.g.
+an Rx event) they can register an event handler through the
+mei_cl_register_event_cb() routine. Currently only the MEI_EVENT_RX event
+will trigger an event handler call and the driver implementation is supposed
+to call mei_recv() from the event handler in order to fetch the pending
+received buffers.
+
+
+Example
+=======
+
+As a theoretical example let's pretend the ME comes with a "contact" NFC IP.
+The driver init and exit routines for this device would look like:
+
+#define CONTACT_DRIVER_NAME "contact"
+
+static struct mei_cl_device_id contact_mei_cl_tbl[] = {
+ { CONTACT_DRIVER_NAME, },
+
+ /* required last entry */
+ { }
+};
+MODULE_DEVICE_TABLE(mei_cl, contact_mei_cl_tbl);
+
+static struct mei_cl_driver contact_driver = {
+ .id_table = contact_mei_tbl,
+ .name = CONTACT_DRIVER_NAME,
+
+ .probe = contact_probe,
+ .remove = contact_remove,
+};
+
+static int contact_init(void)
+{
+ int r;
+
+ r = mei_cl_driver_register(&contact_driver);
+ if (r) {
+ pr_err(CONTACT_DRIVER_NAME ": driver registration failed\n");
+ return r;
+ }
+
+ return 0;
+}
+
+static void __exit contact_exit(void)
+{
+ mei_cl_driver_unregister(&contact_driver);
+}
+
+module_init(contact_init);
+module_exit(contact_exit);
+
+And the driver's simplified probe routine would look like that:
+
+int contact_probe(struct mei_cl_device *dev, struct mei_cl_device_id *id)
+{
+ struct contact_driver *contact;
+
+ [...]
+ mei_cl_enable_device(dev);
+
+ mei_cl_register_event_cb(dev, contact_event_cb, contact);
+
+ return 0;
+}
+
+In the probe routine the driver first enable the MEI device and then registers
+an ME bus event handler which is as close as it can get to registering a
+threaded IRQ handler.
+The handler implementation will typically call some I/O routine depending on
+the pending events:
+
+#define MAX_NFC_PAYLOAD 128
+
+static void contact_event_cb(struct mei_cl_device *dev, u32 events,
+ void *context)
+{
+ struct contact_driver *contact = context;
+
+ if (events & BIT(MEI_EVENT_RX)) {
+ u8 payload[MAX_NFC_PAYLOAD];
+ int payload_size;
+
+ payload_size = mei_recv(dev, payload, MAX_NFC_PAYLOAD);
+ if (payload_size <= 0)
+ return;
+
+ /* Hook to the NFC subsystem */
+ nfc_hci_recv_frame(contact->hdev, payload, payload_size);
+ }
+}
diff --git a/Documentation/misc-devices/mei/mei.txt b/Documentation/misc-devices/mei/mei.txt
new file mode 100644
index 000000000..8d47501bb
--- /dev/null
+++ b/Documentation/misc-devices/mei/mei.txt
@@ -0,0 +1,223 @@
+Intel(R) Management Engine Interface (Intel(R) MEI)
+===================================================
+
+Introduction
+============
+
+The Intel Management Engine (Intel ME) is an isolated and protected computing
+resource (Co-processor) residing inside certain Intel chipsets. The Intel ME
+provides support for computer/IT management features. The feature set
+depends on the Intel chipset SKU.
+
+The Intel Management Engine Interface (Intel MEI, previously known as HECI)
+is the interface between the Host and Intel ME. This interface is exposed
+to the host as a PCI device. The Intel MEI Driver is in charge of the
+communication channel between a host application and the Intel ME feature.
+
+Each Intel ME feature (Intel ME Client) is addressed by a GUID/UUID and
+each client has its own protocol. The protocol is message-based with a
+header and payload up to 512 bytes.
+
+Prominent usage of the Intel ME Interface is to communicate with Intel(R)
+Active Management Technology (Intel AMT) implemented in firmware running on
+the Intel ME.
+
+Intel AMT provides the ability to manage a host remotely out-of-band (OOB)
+even when the operating system running on the host processor has crashed or
+is in a sleep state.
+
+Some examples of Intel AMT usage are:
+ - Monitoring hardware state and platform components
+ - Remote power off/on (useful for green computing or overnight IT
+ maintenance)
+ - OS updates
+ - Storage of useful platform information such as software assets
+ - Built-in hardware KVM
+ - Selective network isolation of Ethernet and IP protocol flows based
+ on policies set by a remote management console
+ - IDE device redirection from remote management console
+
+Intel AMT (OOB) communication is based on SOAP (deprecated
+starting with Release 6.0) over HTTP/S or WS-Management protocol over
+HTTP/S that are received from a remote management console application.
+
+For more information about Intel AMT:
+http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+
+
+Intel MEI Driver
+================
+
+The driver exposes a misc device called /dev/mei.
+
+An application maintains communication with an Intel ME feature while
+/dev/mei is open. The binding to a specific feature is performed by calling
+MEI_CONNECT_CLIENT_IOCTL, which passes the desired UUID.
+The number of instances of an Intel ME feature that can be opened
+at the same time depends on the Intel ME feature, but most of the
+features allow only a single instance.
+
+The Intel AMT Host Interface (Intel AMTHI) feature supports multiple
+simultaneous user connected applications. The Intel MEI driver
+handles this internally by maintaining request queues for the applications.
+
+The driver is transparent to data that are passed between firmware feature
+and host application.
+
+Because some of the Intel ME features can change the system
+configuration, the driver by default allows only a privileged
+user to access it.
+
+A code snippet for an application communicating with Intel AMTHI client:
+
+ struct mei_connect_client_data data;
+ fd = open(MEI_DEVICE);
+
+ data.d.in_client_uuid = AMTHI_UUID;
+
+ ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &data);
+
+ printf("Ver=%d, MaxLen=%ld\n",
+ data.d.in_client_uuid.protocol_version,
+ data.d.in_client_uuid.max_msg_length);
+
+ [...]
+
+ write(fd, amthi_req_data, amthi_req_data_len);
+
+ [...]
+
+ read(fd, &amthi_res_data, amthi_res_data_len);
+
+ [...]
+ close(fd);
+
+
+IOCTL
+=====
+
+The Intel MEI Driver supports the following IOCTL command:
+ IOCTL_MEI_CONNECT_CLIENT Connect to firmware Feature (client).
+
+ usage:
+ struct mei_connect_client_data clientData;
+ ioctl(fd, IOCTL_MEI_CONNECT_CLIENT, &clientData);
+
+ inputs:
+ mei_connect_client_data struct contain the following
+ input field:
+
+ in_client_uuid - UUID of the FW Feature that needs
+ to connect to.
+ outputs:
+ out_client_properties - Client Properties: MTU and Protocol Version.
+
+ error returns:
+ EINVAL Wrong IOCTL Number
+ ENODEV Device or Connection is not initialized or ready.
+ (e.g. Wrong UUID)
+ ENOMEM Unable to allocate memory to client internal data.
+ EFAULT Fatal Error (e.g. Unable to access user input data)
+ EBUSY Connection Already Open
+
+ Notes:
+ max_msg_length (MTU) in client properties describes the maximum
+ data that can be sent or received. (e.g. if MTU=2K, can send
+ requests up to bytes 2k and received responses up to 2k bytes).
+
+
+Intel ME Applications
+=====================
+
+ 1) Intel Local Management Service (Intel LMS)
+
+ Applications running locally on the platform communicate with Intel AMT Release
+ 2.0 and later releases in the same way that network applications do via SOAP
+ over HTTP (deprecated starting with Release 6.0) or with WS-Management over
+ SOAP over HTTP. This means that some Intel AMT features can be accessed from a
+ local application using the same network interface as a remote application
+ communicating with Intel AMT over the network.
+
+ When a local application sends a message addressed to the local Intel AMT host
+ name, the Intel LMS, which listens for traffic directed to the host name,
+ intercepts the message and routes it to the Intel MEI.
+ For more information:
+ http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+ Under "About Intel AMT" => "Local Access"
+
+ For downloading Intel LMS:
+ http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
+
+ The Intel LMS opens a connection using the Intel MEI driver to the Intel LMS
+ firmware feature using a defined UUID and then communicates with the feature
+ using a protocol called Intel AMT Port Forwarding Protocol (Intel APF protocol).
+ The protocol is used to maintain multiple sessions with Intel AMT from a
+ single application.
+
+ See the protocol specification in the Intel AMT Software Development Kit (SDK)
+ http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+ Under "SDK Resources" => "Intel(R) vPro(TM) Gateway (MPS)"
+ => "Information for Intel(R) vPro(TM) Gateway Developers"
+ => "Description of the Intel AMT Port Forwarding (APF) Protocol"
+
+ 2) Intel AMT Remote configuration using a Local Agent
+
+ A Local Agent enables IT personnel to configure Intel AMT out-of-the-box
+ without requiring installing additional data to enable setup. The remote
+ configuration process may involve an ISV-developed remote configuration
+ agent that runs on the host.
+ For more information:
+ http://software.intel.com/sites/manageability/AMT_Implementation_and_Reference_Guide
+ Under "Setup and Configuration of Intel AMT" =>
+ "SDK Tools Supporting Setup and Configuration" =>
+ "Using the Local Agent Sample"
+
+ An open source Intel AMT configuration utility, implementing a local agent
+ that accesses the Intel MEI driver, can be found here:
+ http://software.intel.com/en-us/articles/download-the-latest-intel-amt-open-source-drivers/
+
+
+Intel AMT OS Health Watchdog
+============================
+
+The Intel AMT Watchdog is an OS Health (Hang/Crash) watchdog.
+Whenever the OS hangs or crashes, Intel AMT will send an event
+to any subscriber to this event. This mechanism means that
+IT knows when a platform crashes even when there is a hard failure on the host.
+
+The Intel AMT Watchdog is composed of two parts:
+ 1) Firmware feature - receives the heartbeats
+ and sends an event when the heartbeats stop.
+ 2) Intel MEI driver - connects to the watchdog feature, configures the
+ watchdog and sends the heartbeats.
+
+The Intel MEI driver uses the kernel watchdog API to configure the Intel AMT
+Watchdog and to send heartbeats to it. The default timeout of the
+watchdog is 120 seconds.
+
+If the Intel AMT Watchdog feature does not exist (i.e. the connection failed),
+the Intel MEI driver will disable the sending of heartbeats.
+
+
+Supported Chipsets
+==================
+
+7 Series Chipset Family
+6 Series Chipset Family
+5 Series Chipset Family
+4 Series Chipset Family
+Mobile 4 Series Chipset Family
+ICH9
+82946GZ/GL
+82G35 Express
+82Q963/Q965
+82P965/G965
+Mobile PM965/GM965
+Mobile GME965/GLE960
+82Q35 Express
+82G33/G31/P35/P31 Express
+82Q33 Express
+82X38/X48 Express
+
+---
+linux-mei@linux.intel.com
diff --git a/Documentation/misc-devices/spear-pcie-gadget.txt b/Documentation/misc-devices/spear-pcie-gadget.txt
new file mode 100644
index 000000000..02c13ef5e
--- /dev/null
+++ b/Documentation/misc-devices/spear-pcie-gadget.txt
@@ -0,0 +1,130 @@
+Spear PCIe Gadget Driver:
+
+Author
+=============
+Pratyush Anand (pratyush.anand@st.com)
+
+Location
+============
+driver/misc/spear13xx_pcie_gadget.c
+
+Supported Chip:
+===================
+SPEAr1300
+SPEAr1310
+
+Menuconfig option:
+==========================
+Device Drivers
+ Misc devices
+ PCIe gadget support for SPEAr13XX platform
+purpose
+===========
+This driver has several nodes which can be read/written by configfs interface.
+Its main purpose is to configure selected dual mode PCIe controller as device
+and then program its various registers to configure it as a particular device
+type. This driver can be used to show spear's PCIe device capability.
+
+Description of different nodes:
+=================================
+
+read behavior of nodes:
+------------------------------
+link :gives ltssm status.
+int_type :type of supported interrupt
+no_of_msi :zero if MSI is not enabled by host. A positive value is the
+ number of MSI vector granted.
+vendor_id :returns programmed vendor id (hex)
+device_id :returns programmed device id(hex)
+bar0_size: :returns size of bar0 in hex.
+bar0_address :returns address of bar0 mapped area in hex.
+bar0_rw_offset :returns offset of bar0 for which bar0_data will return value.
+bar0_data :returns data at bar0_rw_offset.
+
+write behavior of nodes:
+------------------------------
+link :write UP to enable ltsmm DOWN to disable
+int_type :write interrupt type to be configured and (int_type could be
+ INTA, MSI or NO_INT). Select MSI only when you have programmed
+ no_of_msi node.
+no_of_msi :number of MSI vector needed.
+inta :write 1 to assert INTA and 0 to de-assert.
+send_msi :write MSI vector to be sent.
+vendor_id :write vendor id(hex) to be programmed.
+device_id :write device id(hex) to be programmed.
+bar0_size :write size of bar0 in hex. default bar0 size is 1000 (hex)
+ bytes.
+bar0_address :write address of bar0 mapped area in hex. (default mapping of
+ bar0 is SYSRAM1(E0800000). Always program bar size before bar
+ address. Kernel might modify bar size and address for alignment, so
+ read back bar size and address after writing to cross check.
+bar0_rw_offset :write offset of bar0 for which bar0_data will write value.
+bar0_data :write data to be written at bar0_rw_offset.
+
+Node programming example
+===========================
+Program all PCIe registers in such a way that when this device is connected
+to the PCIe host, then host sees this device as 1MB RAM.
+#mount -t configfs none /Config
+For nth PCIe Device Controller
+# cd /config/pcie_gadget.n/
+Now you have all the nodes in this directory.
+program vendor id as 0x104a
+# echo 104A >> vendor_id
+
+program device id as 0xCD80
+# echo CD80 >> device_id
+
+program BAR0 size as 1MB
+# echo 100000 >> bar0_size
+
+check for programmed bar0 size
+# cat bar0_size
+
+Program BAR0 Address as DDR (0x2100000). This is the physical address of
+memory, which is to be made visible to PCIe host. Similarly any other peripheral
+can also be made visible to PCIe host. E.g., if you program base address of UART
+as BAR0 address then when this device will be connected to a host, it will be
+visible as UART.
+# echo 2100000 >> bar0_address
+
+program interrupt type : INTA
+# echo INTA >> int_type
+
+go for link up now.
+# echo UP >> link
+
+It will have to be insured that, once link up is done on gadget, then only host
+is initialized and start to search PCIe devices on its port.
+
+/*wait till link is up*/
+# cat link
+wait till it returns UP.
+
+To assert INTA
+# echo 1 >> inta
+
+To de-assert INTA
+# echo 0 >> inta
+
+if MSI is to be used as interrupt, program no of msi vector needed (say4)
+# echo 4 >> no_of_msi
+
+select MSI as interrupt type
+# echo MSI >> int_type
+
+go for link up now
+# echo UP >> link
+
+wait till link is up
+# cat link
+An application can repetitively read this node till link is found UP. It can
+sleep between two read.
+
+wait till msi is enabled
+# cat no_of_msi
+Should return 4 (number of requested MSI vector)
+
+to send msi vector 2
+# echo 2 >> send_msi
+#cd -
diff --git a/Documentation/mmc/00-INDEX b/Documentation/mmc/00-INDEX
new file mode 100644
index 000000000..a9ba6720f
--- /dev/null
+++ b/Documentation/mmc/00-INDEX
@@ -0,0 +1,8 @@
+00-INDEX
+ - this file
+mmc-dev-attrs.txt
+ - info on SD and MMC device attributes
+mmc-dev-parts.txt
+ - info on SD and MMC device partitions
+mmc-async-req.txt
+ - info on mmc asynchronous requests
diff --git a/Documentation/mmc/mmc-async-req.txt b/Documentation/mmc/mmc-async-req.txt
new file mode 100644
index 000000000..ae1907b10
--- /dev/null
+++ b/Documentation/mmc/mmc-async-req.txt
@@ -0,0 +1,87 @@
+Rationale
+=========
+
+How significant is the cache maintenance overhead?
+It depends. Fast eMMC and multiple cache levels with speculative cache
+pre-fetch makes the cache overhead relatively significant. If the DMA
+preparations for the next request are done in parallel with the current
+transfer, the DMA preparation overhead would not affect the MMC performance.
+The intention of non-blocking (asynchronous) MMC requests is to minimize the
+time between when an MMC request ends and another MMC request begins.
+Using mmc_wait_for_req(), the MMC controller is idle while dma_map_sg and
+dma_unmap_sg are processing. Using non-blocking MMC requests makes it
+possible to prepare the caches for next job in parallel with an active
+MMC request.
+
+MMC block driver
+================
+
+The mmc_blk_issue_rw_rq() in the MMC block driver is made non-blocking.
+The increase in throughput is proportional to the time it takes to
+prepare (major part of preparations are dma_map_sg() and dma_unmap_sg())
+a request and how fast the memory is. The faster the MMC/SD is the
+more significant the prepare request time becomes. Roughly the expected
+performance gain is 5% for large writes and 10% on large reads on a L2 cache
+platform. In power save mode, when clocks run on a lower frequency, the DMA
+preparation may cost even more. As long as these slower preparations are run
+in parallel with the transfer performance won't be affected.
+
+Details on measurements from IOZone and mmc_test
+================================================
+
+https://wiki.linaro.org/WorkingGroups/Kernel/Specs/StoragePerfMMC-async-req
+
+MMC core API extension
+======================
+
+There is one new public function mmc_start_req().
+It starts a new MMC command request for a host. The function isn't
+truly non-blocking. If there is an ongoing async request it waits
+for completion of that request and starts the new one and returns. It
+doesn't wait for the new request to complete. If there is no ongoing
+request it starts the new request and returns immediately.
+
+MMC host extensions
+===================
+
+There are two optional members in the mmc_host_ops -- pre_req() and
+post_req() -- that the host driver may implement in order to move work
+to before and after the actual mmc_host_ops.request() function is called.
+In the DMA case pre_req() may do dma_map_sg() and prepare the DMA
+descriptor, and post_req() runs the dma_unmap_sg().
+
+Optimize for the first request
+==============================
+
+The first request in a series of requests can't be prepared in parallel
+with the previous transfer, since there is no previous request.
+The argument is_first_req in pre_req() indicates that there is no previous
+request. The host driver may optimize for this scenario to minimize
+the performance loss. A way to optimize for this is to split the current
+request in two chunks, prepare the first chunk and start the request,
+and finally prepare the second chunk and start the transfer.
+
+Pseudocode to handle is_first_req scenario with minimal prepare overhead:
+
+if (is_first_req && req->size > threshold)
+ /* start MMC transfer for the complete transfer size */
+ mmc_start_command(MMC_CMD_TRANSFER_FULL_SIZE);
+
+ /*
+ * Begin to prepare DMA while cmd is being processed by MMC.
+ * The first chunk of the request should take the same time
+ * to prepare as the "MMC process command time".
+ * If prepare time exceeds MMC cmd time
+ * the transfer is delayed, guesstimate max 4k as first chunk size.
+ */
+ prepare_1st_chunk_for_dma(req);
+ /* flush pending desc to the DMAC (dmaengine.h) */
+ dma_issue_pending(req->dma_desc);
+
+ prepare_2nd_chunk_for_dma(req);
+ /*
+ * The second issue_pending should be called before MMC runs out
+ * of the first chunk. If the MMC runs out of the first data chunk
+ * before this call, the transfer is delayed.
+ */
+ dma_issue_pending(req->dma_desc);
diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
new file mode 100644
index 000000000..189bab092
--- /dev/null
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -0,0 +1,84 @@
+SD and MMC Block Device Attributes
+==================================
+
+These attributes are defined for the block devices associated with the
+SD or MMC device.
+
+The following attributes are read/write.
+
+ force_ro Enforce read-only access even if write protect switch is off.
+
+SD and MMC Device Attributes
+============================
+
+All attributes are read-only.
+
+ cid Card Identifaction Register
+ csd Card Specific Data Register
+ scr SD Card Configuration Register (SD only)
+ date Manufacturing Date (from CID Register)
+ fwrev Firmware/Product Revision (from CID Register) (SD and MMCv1 only)
+ hwrev Hardware/Product Revision (from CID Register) (SD and MMCv1 only)
+ manfid Manufacturer ID (from CID Register)
+ name Product Name (from CID Register)
+ oemid OEM/Application ID (from CID Register)
+ prv Product Revision (from CID Register) (SD and MMCv4 only)
+ serial Product Serial Number (from CID Register)
+ erase_size Erase group size
+ preferred_erase_size Preferred erase size
+ raw_rpmb_size_mult RPMB partition size
+ rel_sectors Reliable write sector count
+
+Note on Erase Size and Preferred Erase Size:
+
+ "erase_size" is the minimum size, in bytes, of an erase
+ operation. For MMC, "erase_size" is the erase group size
+ reported by the card. Note that "erase_size" does not apply
+ to trim or secure trim operations where the minimum size is
+ always one 512 byte sector. For SD, "erase_size" is 512
+ if the card is block-addressed, 0 otherwise.
+
+ SD/MMC cards can erase an arbitrarily large area up to and
+ including the whole card. When erasing a large area it may
+ be desirable to do it in smaller chunks for three reasons:
+ 1. A single erase command will make all other I/O on
+ the card wait. This is not a problem if the whole card
+ is being erased, but erasing one partition will make
+ I/O for another partition on the same card wait for the
+ duration of the erase - which could be a several
+ minutes.
+ 2. To be able to inform the user of erase progress.
+ 3. The erase timeout becomes too large to be very
+ useful. Because the erase timeout contains a margin
+ which is multiplied by the size of the erase area,
+ the value can end up being several minutes for large
+ areas.
+
+ "erase_size" is not the most efficient unit to erase
+ (especially for SD where it is just one sector),
+ hence "preferred_erase_size" provides a good chunk
+ size for erasing large areas.
+
+ For MMC, "preferred_erase_size" is the high-capacity
+ erase size if a card specifies one, otherwise it is
+ based on the capacity of the card.
+
+ For SD, "preferred_erase_size" is the allocation unit
+ size specified by the card.
+
+ "preferred_erase_size" is in bytes.
+
+Note on raw_rpmb_size_mult:
+ "raw_rpmb_size_mult" is a mutliple of 128kB block.
+ RPMB size in byte is calculated by using the following equation:
+ RPMB partition size = 128kB x raw_rpmb_size_mult
+
+SD/MMC/SDIO Clock Gating Attribute
+==================================
+
+Read and write access is provided to following attribute.
+This attribute appears only if CONFIG_MMC_CLKGATE is enabled.
+
+ clkgate_delay Tune the clock gating delay with desired value in milliseconds.
+
+echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay
diff --git a/Documentation/mmc/mmc-dev-parts.txt b/Documentation/mmc/mmc-dev-parts.txt
new file mode 100644
index 000000000..f08d078d4
--- /dev/null
+++ b/Documentation/mmc/mmc-dev-parts.txt
@@ -0,0 +1,40 @@
+SD and MMC Device Partitions
+============================
+
+Device partitions are additional logical block devices present on the
+SD/MMC device.
+
+As of this writing, MMC boot partitions as supported and exposed as
+/dev/mmcblkXboot0 and /dev/mmcblkXboot1, where X is the index of the
+parent /dev/mmcblkX.
+
+MMC Boot Partitions
+===================
+
+Read and write access is provided to the two MMC boot partitions. Due to
+the sensitive nature of the boot partition contents, which often store
+a bootloader or bootloader configuration tables crucial to booting the
+platform, write access is disabled by default to reduce the chance of
+accidental bricking.
+
+To enable write access to /dev/mmcblkXbootY, disable the forced read-only
+access with:
+
+echo 0 > /sys/block/mmcblkXbootY/force_ro
+
+To re-enable read-only access:
+
+echo 1 > /sys/block/mmcblkXbootY/force_ro
+
+The boot partitions can also be locked read only until the next power on,
+with:
+
+echo 1 > /sys/block/mmcblkXbootY/ro_lock_until_next_power_on
+
+This is a feature of the card and not of the kernel. If the card does
+not support boot partition locking, the file will not exist. If the
+feature has been disabled on the card, the file will be read-only.
+
+The boot partitions can also be locked permanently, but this feature is
+not accessible through sysfs in order to avoid accidental or malicious
+bricking.
diff --git a/Documentation/mn10300/ABI.txt b/Documentation/mn10300/ABI.txt
new file mode 100644
index 000000000..d3507bad4
--- /dev/null
+++ b/Documentation/mn10300/ABI.txt
@@ -0,0 +1,149 @@
+ =========================
+ MN10300 FUNCTION CALL ABI
+ =========================
+
+=======
+GENERAL
+=======
+
+The MN10300/AM33 kernel runs in little-endian mode; big-endian mode is not
+supported.
+
+The stack grows downwards, and should always be 32-bit aligned. There are
+separate stack pointer registers for userspace and the kernel.
+
+
+================
+ARGUMENT PASSING
+================
+
+The first two arguments (assuming up to 32-bits per argument) to a function are
+passed in the D0 and D1 registers respectively; all other arguments are passed
+on the stack.
+
+If 64-bit arguments are being passed, then they are never split between
+registers and the stack. If the first argument is a 64-bit value, it will be
+passed in D0:D1. If the first argument is not a 64-bit value, but the second
+is, the second will be passed entirely on the stack and D1 will be unused.
+
+Arguments smaller than 32-bits are not coalesced within a register or a stack
+word. For example, two byte-sized arguments will always be passed in separate
+registers or word-sized stack slots.
+
+
+=================
+CALLING FUNCTIONS
+=================
+
+The caller must allocate twelve bytes on the stack for the callee's use before
+it inserts a CALL instruction. The CALL instruction will write into the TOS
+word, but won't actually modify the stack pointer; similarly, the RET
+instruction reads from the TOS word of the stack, but doesn't move the stack
+pointer beyond it.
+
+
+ Stack:
+ | |
+ | |
+ |---------------| SP+20
+ | 4th Arg |
+ |---------------| SP+16
+ | 3rd Arg |
+ |---------------| SP+12
+ | D1 Save Slot |
+ |---------------| SP+8
+ | D0 Save Slot |
+ |---------------| SP+4
+ | Return Addr |
+ |---------------| SP
+ | |
+ | |
+
+
+The caller must leave space on the stack (hence an allocation of twelve bytes)
+in which the callee may store the first two arguments.
+
+
+============
+RETURN VALUE
+============
+
+The return value is passed in D0 for an integer (or D0:D1 for a 64-bit value),
+or A0 for a pointer.
+
+If the return value is a value larger than 64-bits, or is a structure or an
+array, then a hidden first argument will be passed to the callee by the caller:
+this will point to a piece of memory large enough to hold the result of the
+function. In this case, the callee will return the value in that piece of
+memory, and no value will be returned in D0 or A0.
+
+
+===================
+REGISTER CLOBBERING
+===================
+
+The values in certain registers may be clobbered by the callee, and other
+values must be saved:
+
+ Clobber: D0-D1, A0-A1, E0-E3
+ Save: D2-D3, A2-A3, E4-E7, SP
+
+All other non-supervisor-only registers are clobberable (such as MDR, MCRL,
+MCRH).
+
+
+=================
+SPECIAL REGISTERS
+=================
+
+Certain ordinary registers may carry special usage for the compiler:
+
+ A3: Frame pointer
+ E2: TLS pointer
+
+
+==========
+KERNEL ABI
+==========
+
+The kernel may use a slightly different ABI internally.
+
+ (*) E2
+
+ If CONFIG_MN10300_CURRENT_IN_E2 is defined, then the current task pointer
+ will be kept in the E2 register, and that register will be marked
+ unavailable for the compiler to use as a scratch register.
+
+ Normally the kernel uses something like:
+
+ MOV SP,An
+ AND 0xFFFFE000,An
+ MOV (An),Rm // Rm holds current
+ MOV (yyy,Rm) // Access current->yyy
+
+ To find the address of current; but since this option permits current to
+ be carried globally in an register, it can use:
+
+ MOV (yyy,E2) // Access current->yyy
+
+ instead.
+
+
+===============
+SYSTEM CALL ABI
+===============
+
+System calls are called with the following convention:
+
+ REGISTER ENTRY EXIT
+ =============== ======================= =======================
+ D0 Syscall number Return value
+ A0 1st syscall argument Saved
+ D1 2nd syscall argument Saved
+ A3 3rd syscall argument Saved
+ A2 4th syscall argument Saved
+ D3 5th syscall argument Saved
+ D2 6th syscall argument Saved
+
+All other registers are saved. The layout is a consequence of the way the MOVM
+instruction stores registers onto the stack.
diff --git a/Documentation/mn10300/compartmentalisation.txt b/Documentation/mn10300/compartmentalisation.txt
new file mode 100644
index 000000000..8958b51da
--- /dev/null
+++ b/Documentation/mn10300/compartmentalisation.txt
@@ -0,0 +1,60 @@
+ =========================================
+ PART-SPECIFIC SOURCE COMPARTMENTALISATION
+ =========================================
+
+The sources for various parts are compartmentalised at two different levels:
+
+ (1) Processor level
+
+ The "processor level" is a CPU core plus the other on-silicon
+ peripherals.
+
+ Processor-specific header files are divided among directories in a similar
+ way to the CPU level:
+
+ (*) include/asm-mn10300/proc-mn103e010/
+
+ Support for the AM33v2 CPU core.
+
+ The appropriate processor is selected by a CONFIG_MN10300_PROC_YYYY option
+ from the "Processor support" choice menu in the arch/mn10300/Kconfig file.
+
+
+ (2) Unit level
+
+ The "unit level" is a processor plus all the external peripherals
+ controlled by that processor.
+
+ Unit-specific header files are divided among directories in a similar way
+ to the CPU level; not only that, but specific sources may also be
+ segregated into separate directories under the arch directory:
+
+ (*) include/asm-mn10300/unit-asb2303/
+ (*) arch/mn10300/unit-asb2303/
+
+ Support for the ASB2303 board with an ASB2308 daughter board.
+
+ (*) include/asm-mn10300/unit-asb2305/
+ (*) arch/mn10300/unit-asb2305/
+
+ Support for the ASB2305 board.
+
+ The appropriate processor is selected by a CONFIG_MN10300_UNIT_ZZZZ option
+ from the "Unit type" choice menu in the arch/mn10300/Kconfig file.
+
+
+============
+COMPILE TIME
+============
+
+When the kernel is compiled, symbolic links will be made in the asm header file
+directory for this arch:
+
+ include/asm-mn10300/proc => include/asm-mn10300/proc-YYYY/
+ include/asm-mn10300/unit => include/asm-mn10300/unit-ZZZZ/
+
+So that the header files contained in those directories can be accessed without
+lots of #ifdef-age.
+
+The appropriate arch/mn10300/unit-ZZZZ directory will also be entered by the
+compilation process; all other unit-specific directories will be ignored.
diff --git a/Documentation/module-signing.txt b/Documentation/module-signing.txt
new file mode 100644
index 000000000..c72702ec1
--- /dev/null
+++ b/Documentation/module-signing.txt
@@ -0,0 +1,241 @@
+ ==============================
+ KERNEL MODULE SIGNING FACILITY
+ ==============================
+
+CONTENTS
+
+ - Overview.
+ - Configuring module signing.
+ - Generating signing keys.
+ - Public keys in the kernel.
+ - Manually signing modules.
+ - Signed modules and stripping.
+ - Loading signed modules.
+ - Non-valid signatures and unsigned modules.
+ - Administering/protecting the private key.
+
+
+========
+OVERVIEW
+========
+
+The kernel module signing facility cryptographically signs modules during
+installation and then checks the signature upon loading the module. This
+allows increased kernel security by disallowing the loading of unsigned modules
+or modules signed with an invalid key. Module signing increases security by
+making it harder to load a malicious module into the kernel. The module
+signature checking is done by the kernel so that it is not necessary to have
+trusted userspace bits.
+
+This facility uses X.509 ITU-T standard certificates to encode the public keys
+involved. The signatures are not themselves encoded in any industrial standard
+type. The facility currently only supports the RSA public key encryption
+standard (though it is pluggable and permits others to be used). The possible
+hash algorithms that can be used are SHA-1, SHA-224, SHA-256, SHA-384, and
+SHA-512 (the algorithm is selected by data in the signature).
+
+
+==========================
+CONFIGURING MODULE SIGNING
+==========================
+
+The module signing facility is enabled by going to the "Enable Loadable Module
+Support" section of the kernel configuration and turning on
+
+ CONFIG_MODULE_SIG "Module signature verification"
+
+This has a number of options available:
+
+ (1) "Require modules to be validly signed" (CONFIG_MODULE_SIG_FORCE)
+
+ This specifies how the kernel should deal with a module that has a
+ signature for which the key is not known or a module that is unsigned.
+
+ If this is off (ie. "permissive"), then modules for which the key is not
+ available and modules that are unsigned are permitted, but the kernel will
+ be marked as being tainted, and the concerned modules will be marked as
+ tainted, shown with the character 'E'.
+
+ If this is on (ie. "restrictive"), only modules that have a valid
+ signature that can be verified by a public key in the kernel's possession
+ will be loaded. All other modules will generate an error.
+
+ Irrespective of the setting here, if the module has a signature block that
+ cannot be parsed, it will be rejected out of hand.
+
+
+ (2) "Automatically sign all modules" (CONFIG_MODULE_SIG_ALL)
+
+ If this is on then modules will be automatically signed during the
+ modules_install phase of a build. If this is off, then the modules must
+ be signed manually using:
+
+ scripts/sign-file
+
+
+ (3) "Which hash algorithm should modules be signed with?"
+
+ This presents a choice of which hash algorithm the installation phase will
+ sign the modules with:
+
+ CONFIG_MODULE_SIG_SHA1 "Sign modules with SHA-1"
+ CONFIG_MODULE_SIG_SHA224 "Sign modules with SHA-224"
+ CONFIG_MODULE_SIG_SHA256 "Sign modules with SHA-256"
+ CONFIG_MODULE_SIG_SHA384 "Sign modules with SHA-384"
+ CONFIG_MODULE_SIG_SHA512 "Sign modules with SHA-512"
+
+ The algorithm selected here will also be built into the kernel (rather
+ than being a module) so that modules signed with that algorithm can have
+ their signatures checked without causing a dependency loop.
+
+
+=======================
+GENERATING SIGNING KEYS
+=======================
+
+Cryptographic keypairs are required to generate and check signatures. A
+private key is used to generate a signature and the corresponding public key is
+used to check it. The private key is only needed during the build, after which
+it can be deleted or stored securely. The public key gets built into the
+kernel so that it can be used to check the signatures as the modules are
+loaded.
+
+Under normal conditions, the kernel build will automatically generate a new
+keypair using openssl if one does not exist in the files:
+
+ signing_key.priv
+ signing_key.x509
+
+during the building of vmlinux (the public part of the key needs to be built
+into vmlinux) using parameters in the:
+
+ x509.genkey
+
+file (which is also generated if it does not already exist).
+
+It is strongly recommended that you provide your own x509.genkey file.
+
+Most notably, in the x509.genkey file, the req_distinguished_name section
+should be altered from the default:
+
+ [ req_distinguished_name ]
+ #O = Unspecified company
+ CN = Build time autogenerated kernel key
+ #emailAddress = unspecified.user@unspecified.company
+
+The generated RSA key size can also be set with:
+
+ [ req ]
+ default_bits = 4096
+
+
+It is also possible to manually generate the key private/public files using the
+x509.genkey key generation configuration file in the root node of the Linux
+kernel sources tree and the openssl command. The following is an example to
+generate the public/private key files:
+
+ openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 \
+ -config x509.genkey -outform DER -out signing_key.x509 \
+ -keyout signing_key.priv
+
+
+=========================
+PUBLIC KEYS IN THE KERNEL
+=========================
+
+The kernel contains a ring of public keys that can be viewed by root. They're
+in a keyring called ".system_keyring" that can be seen by:
+
+ [root@deneb ~]# cat /proc/keys
+ ...
+ 223c7853 I------ 1 perm 1f030000 0 0 keyring .system_keyring: 1
+ 302d2d52 I------ 1 perm 1f010000 0 0 asymmetri Fedora kernel signing key: d69a84e6bce3d216b979e9505b3e3ef9a7118079: X509.RSA a7118079 []
+ ...
+
+Beyond the public key generated specifically for module signing, any file
+placed in the kernel source root directory or the kernel build root directory
+whose name is suffixed with ".x509" will be assumed to be an X.509 public key
+and will be added to the keyring.
+
+Further, the architecture code may take public keys from a hardware store and
+add those in also (e.g. from the UEFI key database).
+
+Finally, it is possible to add additional public keys by doing:
+
+ keyctl padd asymmetric "" [.system_keyring-ID] <[key-file]
+
+e.g.:
+
+ keyctl padd asymmetric "" 0x223c7853 <my_public_key.x509
+
+Note, however, that the kernel will only permit keys to be added to
+.system_keyring _if_ the new key's X.509 wrapper is validly signed by a key
+that is already resident in the .system_keyring at the time the key was added.
+
+
+=========================
+MANUALLY SIGNING MODULES
+=========================
+
+To manually sign a module, use the scripts/sign-file tool available in
+the Linux kernel source tree. The script requires 4 arguments:
+
+ 1. The hash algorithm (e.g., sha256)
+ 2. The private key filename
+ 3. The public key filename
+ 4. The kernel module to be signed
+
+The following is an example to sign a kernel module:
+
+ scripts/sign-file sha512 kernel-signkey.priv \
+ kernel-signkey.x509 module.ko
+
+The hash algorithm used does not have to match the one configured, but if it
+doesn't, you should make sure that hash algorithm is either built into the
+kernel or can be loaded without requiring itself.
+
+
+============================
+SIGNED MODULES AND STRIPPING
+============================
+
+A signed module has a digital signature simply appended at the end. The string
+"~Module signature appended~." at the end of the module's file confirms that a
+signature is present but it does not confirm that the signature is valid!
+
+Signed modules are BRITTLE as the signature is outside of the defined ELF
+container. Thus they MAY NOT be stripped once the signature is computed and
+attached. Note the entire module is the signed payload, including any and all
+debug information present at the time of signing.
+
+
+======================
+LOADING SIGNED MODULES
+======================
+
+Modules are loaded with insmod, modprobe, init_module() or finit_module(),
+exactly as for unsigned modules as no processing is done in userspace. The
+signature checking is all done within the kernel.
+
+
+=========================================
+NON-VALID SIGNATURES AND UNSIGNED MODULES
+=========================================
+
+If CONFIG_MODULE_SIG_FORCE is enabled or enforcemodulesig=1 is supplied on
+the kernel command line, the kernel will only load validly signed modules
+for which it has a public key. Otherwise, it will also load modules that are
+unsigned. Any module for which the kernel has a key, but which proves to have
+a signature mismatch will not be permitted to load.
+
+Any module that has an unparseable signature will be rejected.
+
+
+=========================================
+ADMINISTERING/PROTECTING THE PRIVATE KEY
+=========================================
+
+Since the private key is used to sign modules, viruses and malware could use
+the private key to sign modules and compromise the operating system. The
+private key must be either destroyed or moved to a secure location and not kept
+in the root node of the kernel source tree.
diff --git a/Documentation/mono.txt b/Documentation/mono.txt
new file mode 100644
index 000000000..d01ac6052
--- /dev/null
+++ b/Documentation/mono.txt
@@ -0,0 +1,66 @@
+ Mono(tm) Binary Kernel Support for Linux
+ -----------------------------------------
+
+To configure Linux to automatically execute Mono-based .NET binaries
+(in the form of .exe files) without the need to use the mono CLR
+wrapper, you can use the BINFMT_MISC kernel support.
+
+This will allow you to execute Mono-based .NET binaries just like any
+other program after you have done the following:
+
+1) You MUST FIRST install the Mono CLR support, either by downloading
+ a binary package, a source tarball or by installing from CVS. Binary
+ packages for several distributions can be found at:
+
+ http://go-mono.com/download.html
+
+ Instructions for compiling Mono can be found at:
+
+ http://www.go-mono.com/compiling.html
+
+ Once the Mono CLR support has been installed, just check that
+ /usr/bin/mono (which could be located elsewhere, for example
+ /usr/local/bin/mono) is working.
+
+2) You have to compile BINFMT_MISC either as a module or into
+ the kernel (CONFIG_BINFMT_MISC) and set it up properly.
+ If you choose to compile it as a module, you will have
+ to insert it manually with modprobe/insmod, as kmod
+ cannot be easily supported with binfmt_misc.
+ Read the file 'binfmt_misc.txt' in this directory to know
+ more about the configuration process.
+
+3) Add the following entries to /etc/rc.local or similar script
+ to be run at system startup:
+
+# Insert BINFMT_MISC module into the kernel
+if [ ! -e /proc/sys/fs/binfmt_misc/register ]; then
+ /sbin/modprobe binfmt_misc
+ # Some distributions, like Fedora Core, perform
+ # the following command automatically when the
+ # binfmt_misc module is loaded into the kernel
+ # or during normal boot up (systemd-based systems).
+ # Thus, it is possible that the following line
+ # is not needed at all.
+ mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
+fi
+
+# Register support for .NET CLR binaries
+if [ -e /proc/sys/fs/binfmt_misc/register ]; then
+ # Replace /usr/bin/mono with the correct pathname to
+ # the Mono CLR runtime (usually /usr/local/bin/mono
+ # when compiling from sources or CVS).
+ echo ':CLR:M::MZ::/usr/bin/mono:' > /proc/sys/fs/binfmt_misc/register
+else
+ echo "No binfmt_misc support"
+ exit 1
+fi
+
+4) Check that .exe binaries can be ran without the need of a
+ wrapper script, simply by launching the .exe file directly
+ from a command prompt, for example:
+
+ /usr/bin/xsd.exe
+
+ NOTE: If this fails with a permission denied error, check
+ that the .exe file has execute permissions.
diff --git a/Documentation/mtd/nand/pxa3xx-nand.txt b/Documentation/mtd/nand/pxa3xx-nand.txt
new file mode 100644
index 000000000..1074cbc67
--- /dev/null
+++ b/Documentation/mtd/nand/pxa3xx-nand.txt
@@ -0,0 +1,113 @@
+
+About this document
+===================
+
+Some notes about Marvell's NAND controller available in PXA and Armada 370/XP
+SoC (aka NFCv1 and NFCv2), with an emphasis on the latter.
+
+NFCv2 controller background
+===========================
+
+The controller has a 2176 bytes FIFO buffer. Therefore, in order to support
+larger pages, I/O operations on 4 KiB and 8 KiB pages is done with a set of
+chunked transfers.
+
+For instance, if we choose a 2048 data chunk and set "BCH" ECC (see below)
+we'll have this layout in the pages:
+
+ ------------------------------------------------------------------------------
+ | 2048B data | 32B spare | 30B ECC || 2048B data | 32B spare | 30B ECC | ... |
+ ------------------------------------------------------------------------------
+
+The driver reads the data and spare portions independently and builds an internal
+buffer with this layout (in the 4 KiB page case):
+
+ ------------------------------------------
+ | 4096B data | 64B spare |
+ ------------------------------------------
+
+Also, for the READOOB command the driver disables the ECC and reads a 'spare + ECC'
+OOB, one per chunk read.
+
+ -------------------------------------------------------------------
+ | 4096B data | 32B spare | 30B ECC | 32B spare | 30B ECC |
+ -------------------------------------------------------------------
+
+So, in order to achieve reading (for instance), we issue several READ0 commands
+(with some additional controller-specific magic) and read two chunks of 2080B
+(2048 data + 32 spare) each.
+The driver accommodates this data to expose the NAND core a contiguous buffer
+(4096 data + spare) or (4096 + spare + ECC + spare + ECC).
+
+ECC
+===
+
+The controller has built-in hardware ECC capabilities. In addition it is
+configurable between two modes: 1) Hamming, 2) BCH.
+
+Note that the actual BCH mode: BCH-4 or BCH-8 will depend on the way
+the controller is configured to transfer the data.
+
+In the BCH mode the ECC code will be calculated for each transferred chunk
+and expected to be located (when reading/programming) right after the spare
+bytes as the figure above shows.
+
+So, repeating the above scheme, a 2048B data chunk will be followed by 32B
+spare, and then the ECC controller will read/write the ECC code (30B in
+this case):
+
+ ------------------------------------
+ | 2048B data | 32B spare | 30B ECC |
+ ------------------------------------
+
+If the ECC mode is 'BCH' then the ECC is *always* 30 bytes long.
+If the ECC mode is 'Hamming' the ECC is 6 bytes long, for each 512B block.
+So in Hamming mode, a 2048B page will have a 24B ECC.
+
+Despite all of the above, the controller requires the driver to only read or
+write in multiples of 8-bytes, because the data buffer is 64-bits.
+
+OOB
+===
+
+Because of the above scheme, and because the "spare" OOB is really located in
+the middle of a page, spare OOB cannot be read or write independently of the
+data area. In other words, in order to read the OOB (aka READOOB), the entire
+page (aka READ0) has to be read.
+
+In the same sense, in order to write to the spare OOB the driver has to write
+an *entire* page.
+
+Factory bad blocks handling
+===========================
+
+Given the ECC BCH requires to layout the device's pages in a split
+data/OOB/data/OOB way, the controller has a view of the flash page that's
+different from the specified (aka the manufacturer's) view. In other words,
+
+Factory view:
+
+ -----------------------------------------------
+ | Data |x OOB |
+ -----------------------------------------------
+
+Driver's view:
+
+ -----------------------------------------------
+ | Data | OOB | Data x | OOB |
+ -----------------------------------------------
+
+It can be seen from the above, that the factory bad block marker must be
+searched within the 'data' region, and not in the usual OOB region.
+
+In addition, this means under regular usage the driver will write such
+position (since it belongs to the data region) and every used block is
+likely to be marked as bad.
+
+For this reason, marking the block as bad in the OOB is explicitly
+disabled by using the NAND_BBT_NO_OOB_BBM option in the driver. The rationale
+for this is that there's no point in marking a block as bad, because good
+blocks are also 'marked as bad' (in the OOB BBM sense) under normal usage.
+
+Instead, the driver relies on the bad block table alone, and should only perform
+the bad block scan on the very first time (when the device hasn't been used).
diff --git a/Documentation/mtd/nand_ecc.txt b/Documentation/mtd/nand_ecc.txt
new file mode 100644
index 000000000..e129b2479
--- /dev/null
+++ b/Documentation/mtd/nand_ecc.txt
@@ -0,0 +1,714 @@
+Introduction
+============
+
+Having looked at the linux mtd/nand driver and more specific at nand_ecc.c
+I felt there was room for optimisation. I bashed the code for a few hours
+performing tricks like table lookup removing superfluous code etc.
+After that the speed was increased by 35-40%.
+Still I was not too happy as I felt there was additional room for improvement.
+
+Bad! I was hooked.
+I decided to annotate my steps in this file. Perhaps it is useful to someone
+or someone learns something from it.
+
+
+The problem
+===========
+
+NAND flash (at least SLC one) typically has sectors of 256 bytes.
+However NAND flash is not extremely reliable so some error detection
+(and sometimes correction) is needed.
+
+This is done by means of a Hamming code. I'll try to explain it in
+laymans terms (and apologies to all the pro's in the field in case I do
+not use the right terminology, my coding theory class was almost 30
+years ago, and I must admit it was not one of my favourites).
+
+As I said before the ecc calculation is performed on sectors of 256
+bytes. This is done by calculating several parity bits over the rows and
+columns. The parity used is even parity which means that the parity bit = 1
+if the data over which the parity is calculated is 1 and the parity bit = 0
+if the data over which the parity is calculated is 0. So the total
+number of bits over the data over which the parity is calculated + the
+parity bit is even. (see wikipedia if you can't follow this).
+Parity is often calculated by means of an exclusive or operation,
+sometimes also referred to as xor. In C the operator for xor is ^
+
+Back to ecc.
+Let's give a small figure:
+
+byte 0: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp4 ... rp14
+byte 1: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp2 rp4 ... rp14
+byte 2: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp4 ... rp14
+byte 3: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp4 ... rp14
+byte 4: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp2 rp5 ... rp14
+....
+byte 254: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp0 rp3 rp5 ... rp15
+byte 255: bit7 bit6 bit5 bit4 bit3 bit2 bit1 bit0 rp1 rp3 rp5 ... rp15
+ cp1 cp0 cp1 cp0 cp1 cp0 cp1 cp0
+ cp3 cp3 cp2 cp2 cp3 cp3 cp2 cp2
+ cp5 cp5 cp5 cp5 cp4 cp4 cp4 cp4
+
+This figure represents a sector of 256 bytes.
+cp is my abbreviation for column parity, rp for row parity.
+
+Let's start to explain column parity.
+cp0 is the parity that belongs to all bit0, bit2, bit4, bit6.
+so the sum of all bit0, bit2, bit4 and bit6 values + cp0 itself is even.
+Similarly cp1 is the sum of all bit1, bit3, bit5 and bit7.
+cp2 is the parity over bit0, bit1, bit4 and bit5
+cp3 is the parity over bit2, bit3, bit6 and bit7.
+cp4 is the parity over bit0, bit1, bit2 and bit3.
+cp5 is the parity over bit4, bit5, bit6 and bit7.
+Note that each of cp0 .. cp5 is exactly one bit.
+
+Row parity actually works almost the same.
+rp0 is the parity of all even bytes (0, 2, 4, 6, ... 252, 254)
+rp1 is the parity of all odd bytes (1, 3, 5, 7, ..., 253, 255)
+rp2 is the parity of all bytes 0, 1, 4, 5, 8, 9, ...
+(so handle two bytes, then skip 2 bytes).
+rp3 is covers the half rp2 does not cover (bytes 2, 3, 6, 7, 10, 11, ...)
+for rp4 the rule is cover 4 bytes, skip 4 bytes, cover 4 bytes, skip 4 etc.
+so rp4 calculates parity over bytes 0, 1, 2, 3, 8, 9, 10, 11, 16, ...)
+and rp5 covers the other half, so bytes 4, 5, 6, 7, 12, 13, 14, 15, 20, ..
+The story now becomes quite boring. I guess you get the idea.
+rp6 covers 8 bytes then skips 8 etc
+rp7 skips 8 bytes then covers 8 etc
+rp8 covers 16 bytes then skips 16 etc
+rp9 skips 16 bytes then covers 16 etc
+rp10 covers 32 bytes then skips 32 etc
+rp11 skips 32 bytes then covers 32 etc
+rp12 covers 64 bytes then skips 64 etc
+rp13 skips 64 bytes then covers 64 etc
+rp14 covers 128 bytes then skips 128
+rp15 skips 128 bytes then covers 128
+
+In the end the parity bits are grouped together in three bytes as
+follows:
+ECC Bit 7 Bit 6 Bit 5 Bit 4 Bit 3 Bit 2 Bit 1 Bit 0
+ECC 0 rp07 rp06 rp05 rp04 rp03 rp02 rp01 rp00
+ECC 1 rp15 rp14 rp13 rp12 rp11 rp10 rp09 rp08
+ECC 2 cp5 cp4 cp3 cp2 cp1 cp0 1 1
+
+I detected after writing this that ST application note AN1823
+(http://www.st.com/stonline/) gives a much
+nicer picture.(but they use line parity as term where I use row parity)
+Oh well, I'm graphically challenged, so suffer with me for a moment :-)
+And I could not reuse the ST picture anyway for copyright reasons.
+
+
+Attempt 0
+=========
+
+Implementing the parity calculation is pretty simple.
+In C pseudocode:
+for (i = 0; i < 256; i++)
+{
+ if (i & 0x01)
+ rp1 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+ else
+ rp0 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp1;
+ if (i & 0x02)
+ rp3 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp3;
+ else
+ rp2 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp2;
+ if (i & 0x04)
+ rp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp5;
+ else
+ rp4 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp4;
+ if (i & 0x08)
+ rp7 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp7;
+ else
+ rp6 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp6;
+ if (i & 0x10)
+ rp9 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp9;
+ else
+ rp8 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp8;
+ if (i & 0x20)
+ rp11 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp11;
+ else
+ rp10 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp10;
+ if (i & 0x40)
+ rp13 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp13;
+ else
+ rp12 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp12;
+ if (i & 0x80)
+ rp15 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp15;
+ else
+ rp14 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ bit3 ^ bit2 ^ bit1 ^ bit0 ^ rp14;
+ cp0 = bit6 ^ bit4 ^ bit2 ^ bit0 ^ cp0;
+ cp1 = bit7 ^ bit5 ^ bit3 ^ bit1 ^ cp1;
+ cp2 = bit5 ^ bit4 ^ bit1 ^ bit0 ^ cp2;
+ cp3 = bit7 ^ bit6 ^ bit3 ^ bit2 ^ cp3
+ cp4 = bit3 ^ bit2 ^ bit1 ^ bit0 ^ cp4
+ cp5 = bit7 ^ bit6 ^ bit5 ^ bit4 ^ cp5
+}
+
+
+Analysis 0
+==========
+
+C does have bitwise operators but not really operators to do the above
+efficiently (and most hardware has no such instructions either).
+Therefore without implementing this it was clear that the code above was
+not going to bring me a Nobel prize :-)
+
+Fortunately the exclusive or operation is commutative, so we can combine
+the values in any order. So instead of calculating all the bits
+individually, let us try to rearrange things.
+For the column parity this is easy. We can just xor the bytes and in the
+end filter out the relevant bits. This is pretty nice as it will bring
+all cp calculation out of the if loop.
+
+Similarly we can first xor the bytes for the various rows.
+This leads to:
+
+
+Attempt 1
+=========
+
+const char parity[256] = {
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1,
+ 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0
+};
+
+void ecc1(const unsigned char *buf, unsigned char *code)
+{
+ int i;
+ const unsigned char *bp = buf;
+ unsigned char cur;
+ unsigned char rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ unsigned char rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+ unsigned char par;
+
+ par = 0;
+ rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+ rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+ rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+ rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+ for (i = 0; i < 256; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ if (i & 0x01) rp1 ^= cur; else rp0 ^= cur;
+ if (i & 0x02) rp3 ^= cur; else rp2 ^= cur;
+ if (i & 0x04) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x08) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x10) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x20) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x40) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x80) rp15 ^= cur; else rp14 ^= cur;
+ }
+ code[0] =
+ (parity[rp7] << 7) |
+ (parity[rp6] << 6) |
+ (parity[rp5] << 5) |
+ (parity[rp4] << 4) |
+ (parity[rp3] << 3) |
+ (parity[rp2] << 2) |
+ (parity[rp1] << 1) |
+ (parity[rp0]);
+ code[1] =
+ (parity[rp15] << 7) |
+ (parity[rp14] << 6) |
+ (parity[rp13] << 5) |
+ (parity[rp12] << 4) |
+ (parity[rp11] << 3) |
+ (parity[rp10] << 2) |
+ (parity[rp9] << 1) |
+ (parity[rp8]);
+ code[2] =
+ (parity[par & 0xf0] << 7) |
+ (parity[par & 0x0f] << 6) |
+ (parity[par & 0xcc] << 5) |
+ (parity[par & 0x33] << 4) |
+ (parity[par & 0xaa] << 3) |
+ (parity[par & 0x55] << 2);
+ code[0] = ~code[0];
+ code[1] = ~code[1];
+ code[2] = ~code[2];
+}
+
+Still pretty straightforward. The last three invert statements are there to
+give a checksum of 0xff 0xff 0xff for an empty flash. In an empty flash
+all data is 0xff, so the checksum then matches.
+
+I also introduced the parity lookup. I expected this to be the fastest
+way to calculate the parity, but I will investigate alternatives later
+on.
+
+
+Analysis 1
+==========
+
+The code works, but is not terribly efficient. On my system it took
+almost 4 times as much time as the linux driver code. But hey, if it was
+*that* easy this would have been done long before.
+No pain. no gain.
+
+Fortunately there is plenty of room for improvement.
+
+In step 1 we moved from bit-wise calculation to byte-wise calculation.
+However in C we can also use the unsigned long data type and virtually
+every modern microprocessor supports 32 bit operations, so why not try
+to write our code in such a way that we process data in 32 bit chunks.
+
+Of course this means some modification as the row parity is byte by
+byte. A quick analysis:
+for the column parity we use the par variable. When extending to 32 bits
+we can in the end easily calculate p0 and p1 from it.
+(because par now consists of 4 bytes, contributing to rp1, rp0, rp1, rp0
+respectively)
+also rp2 and rp3 can be easily retrieved from par as rp3 covers the
+first two bytes and rp2 the last two bytes.
+
+Note that of course now the loop is executed only 64 times (256/4).
+And note that care must taken wrt byte ordering. The way bytes are
+ordered in a long is machine dependent, and might affect us.
+Anyway, if there is an issue: this code is developed on x86 (to be
+precise: a DELL PC with a D920 Intel CPU)
+
+And of course the performance might depend on alignment, but I expect
+that the I/O buffers in the nand driver are aligned properly (and
+otherwise that should be fixed to get maximum performance).
+
+Let's give it a try...
+
+
+Attempt 2
+=========
+
+extern const char parity[256];
+
+void ecc2(const unsigned char *buf, unsigned char *code)
+{
+ int i;
+ const unsigned long *bp = (unsigned long *)buf;
+ unsigned long cur;
+ unsigned long rp0, rp1, rp2, rp3, rp4, rp5, rp6, rp7;
+ unsigned long rp8, rp9, rp10, rp11, rp12, rp13, rp14, rp15;
+ unsigned long par;
+
+ par = 0;
+ rp0 = 0; rp1 = 0; rp2 = 0; rp3 = 0;
+ rp4 = 0; rp5 = 0; rp6 = 0; rp7 = 0;
+ rp8 = 0; rp9 = 0; rp10 = 0; rp11 = 0;
+ rp12 = 0; rp13 = 0; rp14 = 0; rp15 = 0;
+
+ for (i = 0; i < 64; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+ }
+ /*
+ we need to adapt the code generation for the fact that rp vars are now
+ long; also the column parity calculation needs to be changed.
+ we'll bring rp4 to 15 back to single byte entities by shifting and
+ xoring
+ */
+ rp4 ^= (rp4 >> 16); rp4 ^= (rp4 >> 8); rp4 &= 0xff;
+ rp5 ^= (rp5 >> 16); rp5 ^= (rp5 >> 8); rp5 &= 0xff;
+ rp6 ^= (rp6 >> 16); rp6 ^= (rp6 >> 8); rp6 &= 0xff;
+ rp7 ^= (rp7 >> 16); rp7 ^= (rp7 >> 8); rp7 &= 0xff;
+ rp8 ^= (rp8 >> 16); rp8 ^= (rp8 >> 8); rp8 &= 0xff;
+ rp9 ^= (rp9 >> 16); rp9 ^= (rp9 >> 8); rp9 &= 0xff;
+ rp10 ^= (rp10 >> 16); rp10 ^= (rp10 >> 8); rp10 &= 0xff;
+ rp11 ^= (rp11 >> 16); rp11 ^= (rp11 >> 8); rp11 &= 0xff;
+ rp12 ^= (rp12 >> 16); rp12 ^= (rp12 >> 8); rp12 &= 0xff;
+ rp13 ^= (rp13 >> 16); rp13 ^= (rp13 >> 8); rp13 &= 0xff;
+ rp14 ^= (rp14 >> 16); rp14 ^= (rp14 >> 8); rp14 &= 0xff;
+ rp15 ^= (rp15 >> 16); rp15 ^= (rp15 >> 8); rp15 &= 0xff;
+ rp3 = (par >> 16); rp3 ^= (rp3 >> 8); rp3 &= 0xff;
+ rp2 = par & 0xffff; rp2 ^= (rp2 >> 8); rp2 &= 0xff;
+ par ^= (par >> 16);
+ rp1 = (par >> 8); rp1 &= 0xff;
+ rp0 = (par & 0xff);
+ par ^= (par >> 8); par &= 0xff;
+
+ code[0] =
+ (parity[rp7] << 7) |
+ (parity[rp6] << 6) |
+ (parity[rp5] << 5) |
+ (parity[rp4] << 4) |
+ (parity[rp3] << 3) |
+ (parity[rp2] << 2) |
+ (parity[rp1] << 1) |
+ (parity[rp0]);
+ code[1] =
+ (parity[rp15] << 7) |
+ (parity[rp14] << 6) |
+ (parity[rp13] << 5) |
+ (parity[rp12] << 4) |
+ (parity[rp11] << 3) |
+ (parity[rp10] << 2) |
+ (parity[rp9] << 1) |
+ (parity[rp8]);
+ code[2] =
+ (parity[par & 0xf0] << 7) |
+ (parity[par & 0x0f] << 6) |
+ (parity[par & 0xcc] << 5) |
+ (parity[par & 0x33] << 4) |
+ (parity[par & 0xaa] << 3) |
+ (parity[par & 0x55] << 2);
+ code[0] = ~code[0];
+ code[1] = ~code[1];
+ code[2] = ~code[2];
+}
+
+The parity array is not shown any more. Note also that for these
+examples I kinda deviated from my regular programming style by allowing
+multiple statements on a line, not using { } in then and else blocks
+with only a single statement and by using operators like ^=
+
+
+Analysis 2
+==========
+
+The code (of course) works, and hurray: we are a little bit faster than
+the linux driver code (about 15%). But wait, don't cheer too quickly.
+THere is more to be gained.
+If we look at e.g. rp14 and rp15 we see that we either xor our data with
+rp14 or with rp15. However we also have par which goes over all data.
+This means there is no need to calculate rp14 as it can be calculated from
+rp15 through rp14 = par ^ rp15;
+(or if desired we can avoid calculating rp15 and calculate it from
+rp14). That is why some places refer to inverse parity.
+Of course the same thing holds for rp4/5, rp6/7, rp8/9, rp10/11 and rp12/13.
+Effectively this means we can eliminate the else clause from the if
+statements. Also we can optimise the calculation in the end a little bit
+by going from long to byte first. Actually we can even avoid the table
+lookups
+
+Attempt 3
+=========
+
+Odd replaced:
+ if (i & 0x01) rp5 ^= cur; else rp4 ^= cur;
+ if (i & 0x02) rp7 ^= cur; else rp6 ^= cur;
+ if (i & 0x04) rp9 ^= cur; else rp8 ^= cur;
+ if (i & 0x08) rp11 ^= cur; else rp10 ^= cur;
+ if (i & 0x10) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x20) rp15 ^= cur; else rp14 ^= cur;
+with
+ if (i & 0x01) rp5 ^= cur;
+ if (i & 0x02) rp7 ^= cur;
+ if (i & 0x04) rp9 ^= cur;
+ if (i & 0x08) rp11 ^= cur;
+ if (i & 0x10) rp13 ^= cur;
+ if (i & 0x20) rp15 ^= cur;
+
+ and outside the loop added:
+ rp4 = par ^ rp5;
+ rp6 = par ^ rp7;
+ rp8 = par ^ rp9;
+ rp10 = par ^ rp11;
+ rp12 = par ^ rp13;
+ rp14 = par ^ rp15;
+
+And after that the code takes about 30% more time, although the number of
+statements is reduced. This is also reflected in the assembly code.
+
+
+Analysis 3
+==========
+
+Very weird. Guess it has to do with caching or instruction parallellism
+or so. I also tried on an eeePC (Celeron, clocked at 900 Mhz). Interesting
+observation was that this one is only 30% slower (according to time)
+executing the code as my 3Ghz D920 processor.
+
+Well, it was expected not to be easy so maybe instead move to a
+different track: let's move back to the code from attempt2 and do some
+loop unrolling. This will eliminate a few if statements. I'll try
+different amounts of unrolling to see what works best.
+
+
+Attempt 4
+=========
+
+Unrolled the loop 1, 2, 3 and 4 times.
+For 4 the code starts with:
+
+ for (i = 0; i < 4; i++)
+ {
+ cur = *bp++;
+ par ^= cur;
+ rp4 ^= cur;
+ rp6 ^= cur;
+ rp8 ^= cur;
+ rp10 ^= cur;
+ if (i & 0x1) rp13 ^= cur; else rp12 ^= cur;
+ if (i & 0x2) rp15 ^= cur; else rp14 ^= cur;
+ cur = *bp++;
+ par ^= cur;
+ rp5 ^= cur;
+ rp6 ^= cur;
+ ...
+
+
+Analysis 4
+==========
+
+Unrolling once gains about 15%
+Unrolling twice keeps the gain at about 15%
+Unrolling three times gives a gain of 30% compared to attempt 2.
+Unrolling four times gives a marginal improvement compared to unrolling
+three times.
+
+I decided to proceed with a four time unrolled loop anyway. It was my gut
+feeling that in the next steps I would obtain additional gain from it.
+
+The next step was triggered by the fact that par contains the xor of all
+bytes and rp4 and rp5 each contain the xor of half of the bytes.
+So in effect par = rp4 ^ rp5. But as xor is commutative we can also say
+that rp5 = par ^ rp4. So no need to keep both rp4 and rp5 around. We can
+eliminate rp5 (or rp4, but I already foresaw another optimisation).
+The same holds for rp6/7, rp8/9, rp10/11 rp12/13 and rp14/15.
+
+
+Attempt 5
+=========
+
+Effectively so all odd digit rp assignments in the loop were removed.
+This included the else clause of the if statements.
+Of course after the loop we need to correct things by adding code like:
+ rp5 = par ^ rp4;
+Also the initial assignments (rp5 = 0; etc) could be removed.
+Along the line I also removed the initialisation of rp0/1/2/3.
+
+
+Analysis 5
+==========
+
+Measurements showed this was a good move. The run-time roughly halved
+compared with attempt 4 with 4 times unrolled, and we only require 1/3rd
+of the processor time compared to the current code in the linux kernel.
+
+However, still I thought there was more. I didn't like all the if
+statements. Why not keep a running parity and only keep the last if
+statement. Time for yet another version!
+
+
+Attempt 6
+=========
+
+THe code within the for loop was changed to:
+
+ for (i = 0; i < 4; i++)
+ {
+ cur = *bp++; tmppar = cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp8 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp8 ^= cur;
+
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur;
+
+ par ^= tmppar;
+ if ((i & 0x1) == 0) rp12 ^= tmppar;
+ if ((i & 0x2) == 0) rp14 ^= tmppar;
+ }
+
+As you can see tmppar is used to accumulate the parity within a for
+iteration. In the last 3 statements is added to par and, if needed,
+to rp12 and rp14.
+
+While making the changes I also found that I could exploit that tmppar
+contains the running parity for this iteration. So instead of having:
+rp4 ^= cur; rp6 = cur;
+I removed the rp6 = cur; statement and did rp6 ^= tmppar; on next
+statement. A similar change was done for rp8 and rp10
+
+
+Analysis 6
+==========
+
+Measuring this code again showed big gain. When executing the original
+linux code 1 million times, this took about 1 second on my system.
+(using time to measure the performance). After this iteration I was back
+to 0.075 sec. Actually I had to decide to start measuring over 10
+million iterations in order not to lose too much accuracy. This one
+definitely seemed to be the jackpot!
+
+There is a little bit more room for improvement though. There are three
+places with statements:
+rp4 ^= cur; rp6 ^= cur;
+It seems more efficient to also maintain a variable rp4_6 in the while
+loop; This eliminates 3 statements per loop. Of course after the loop we
+need to correct by adding:
+ rp4 ^= rp4_6;
+ rp6 ^= rp4_6
+Furthermore there are 4 sequential assignments to rp8. This can be
+encoded slightly more efficiently by saving tmppar before those 4 lines
+and later do rp8 = rp8 ^ tmppar ^ notrp8;
+(where notrp8 is the value of rp8 before those 4 lines).
+Again a use of the commutative property of xor.
+Time for a new test!
+
+
+Attempt 7
+=========
+
+The new code now looks like:
+
+ for (i = 0; i < 4; i++)
+ {
+ cur = *bp++; tmppar = cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= tmppar;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp8 ^= tmppar;
+
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp10 ^= tmppar;
+
+ notrp8 = tmppar;
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur;
+ rp8 = rp8 ^ tmppar ^ notrp8;
+
+ cur = *bp++; tmppar ^= cur; rp4_6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp6 ^= cur;
+ cur = *bp++; tmppar ^= cur; rp4 ^= cur;
+ cur = *bp++; tmppar ^= cur;
+
+ par ^= tmppar;
+ if ((i & 0x1) == 0) rp12 ^= tmppar;
+ if ((i & 0x2) == 0) rp14 ^= tmppar;
+ }
+ rp4 ^= rp4_6;
+ rp6 ^= rp4_6;
+
+
+Not a big change, but every penny counts :-)
+
+
+Analysis 7
+==========
+
+Actually this made things worse. Not very much, but I don't want to move
+into the wrong direction. Maybe something to investigate later. Could
+have to do with caching again.
+
+Guess that is what there is to win within the loop. Maybe unrolling one
+more time will help. I'll keep the optimisations from 7 for now.
+
+
+Attempt 8
+=========
+
+Unrolled the loop one more time.
+
+
+Analysis 8
+==========
+
+This makes things worse. Let's stick with attempt 6 and continue from there.
+Although it seems that the code within the loop cannot be optimised
+further there is still room to optimize the generation of the ecc codes.
+We can simply calculate the total parity. If this is 0 then rp4 = rp5
+etc. If the parity is 1, then rp4 = !rp5;
+But if rp4 = rp5 we do not need rp5 etc. We can just write the even bits
+in the result byte and then do something like
+ code[0] |= (code[0] << 1);
+Lets test this.
+
+
+Attempt 9
+=========
+
+Changed the code but again this slightly degrades performance. Tried all
+kind of other things, like having dedicated parity arrays to avoid the
+shift after parity[rp7] << 7; No gain.
+Change the lookup using the parity array by using shift operators (e.g.
+replace parity[rp7] << 7 with:
+rp7 ^= (rp7 << 4);
+rp7 ^= (rp7 << 2);
+rp7 ^= (rp7 << 1);
+rp7 &= 0x80;
+No gain.
+
+The only marginal change was inverting the parity bits, so we can remove
+the last three invert statements.
+
+Ah well, pity this does not deliver more. Then again 10 million
+iterations using the linux driver code takes between 13 and 13.5
+seconds, whereas my code now takes about 0.73 seconds for those 10
+million iterations. So basically I've improved the performance by a
+factor 18 on my system. Not that bad. Of course on different hardware
+you will get different results. No warranties!
+
+But of course there is no such thing as a free lunch. The codesize almost
+tripled (from 562 bytes to 1434 bytes). Then again, it is not that much.
+
+
+Correcting errors
+=================
+
+For correcting errors I again used the ST application note as a starter,
+but I also peeked at the existing code.
+The algorithm itself is pretty straightforward. Just xor the given and
+the calculated ecc. If all bytes are 0 there is no problem. If 11 bits
+are 1 we have one correctable bit error. If there is 1 bit 1, we have an
+error in the given ecc code.
+It proved to be fastest to do some table lookups. Performance gain
+introduced by this is about a factor 2 on my system when a repair had to
+be done, and 1% or so if no repair had to be done.
+Code size increased from 330 bytes to 686 bytes for this function.
+(gcc 4.2, -O3)
+
+
+Conclusion
+==========
+
+The gain when calculating the ecc is tremendous. Om my development hardware
+a speedup of a factor of 18 for ecc calculation was achieved. On a test on an
+embedded system with a MIPS core a factor 7 was obtained.
+On a test with a Linksys NSLU2 (ARMv5TE processor) the speedup was a factor
+5 (big endian mode, gcc 4.1.2, -O3)
+For correction not much gain could be obtained (as bitflips are rare). Then
+again there are also much less cycles spent there.
+
+It seems there is not much more gain possible in this, at least when
+programmed in C. Of course it might be possible to squeeze something more
+out of it with an assembler program, but due to pipeline behaviour etc
+this is very tricky (at least for intel hw).
+
+Author: Frans Meulenbroeks
+Copyright (C) 2008 Koninklijke Philips Electronics NV.
diff --git a/Documentation/mtd/spi-nor.txt b/Documentation/mtd/spi-nor.txt
new file mode 100644
index 000000000..548d6306e
--- /dev/null
+++ b/Documentation/mtd/spi-nor.txt
@@ -0,0 +1,62 @@
+ SPI NOR framework
+ ============================================
+
+Part I - Why do we need this framework?
+---------------------------------------
+
+SPI bus controllers (drivers/spi/) only deal with streams of bytes; the bus
+controller operates agnostic of the specific device attached. However, some
+controllers (such as Freescale's QuadSPI controller) cannot easily handle
+arbitrary streams of bytes, but rather are designed specifically for SPI NOR.
+
+In particular, Freescale's QuadSPI controller must know the NOR commands to
+find the right LUT sequence. Unfortunately, the SPI subsystem has no notion of
+opcodes, addresses, or data payloads; a SPI controller simply knows to send or
+receive bytes (Tx and Rx). Therefore, we must define a new layering scheme under
+which the controller driver is aware of the opcodes, addressing, and other
+details of the SPI NOR protocol.
+
+Part II - How does the framework work?
+--------------------------------------
+
+This framework just adds a new layer between the MTD and the SPI bus driver.
+With this new layer, the SPI NOR controller driver does not depend on the
+m25p80 code anymore.
+
+ Before this framework, the layer is like:
+
+ MTD
+ ------------------------
+ m25p80
+ ------------------------
+ SPI bus driver
+ ------------------------
+ SPI NOR chip
+
+ After this framework, the layer is like:
+ MTD
+ ------------------------
+ SPI NOR framework
+ ------------------------
+ m25p80
+ ------------------------
+ SPI bus driver
+ ------------------------
+ SPI NOR chip
+
+ With the SPI NOR controller driver (Freescale QuadSPI), it looks like:
+ MTD
+ ------------------------
+ SPI NOR framework
+ ------------------------
+ fsl-quadSPI
+ ------------------------
+ SPI NOR chip
+
+Part III - How can drivers use the framework?
+---------------------------------------------
+
+The main API is spi_nor_scan(). Before you call the hook, a driver should
+initialize the necessary fields for spi_nor{}. Please see
+drivers/mtd/spi-nor/spi-nor.c for detail. Please also refer to fsl-quadspi.c
+when you want to write a new driver for a SPI NOR controller.
diff --git a/Documentation/namespaces/compatibility-list.txt b/Documentation/namespaces/compatibility-list.txt
new file mode 100644
index 000000000..defc5589b
--- /dev/null
+++ b/Documentation/namespaces/compatibility-list.txt
@@ -0,0 +1,39 @@
+ Namespaces compatibility list
+
+This document contains the information about the problems user
+may have when creating tasks living in different namespaces.
+
+Here's the summary. This matrix shows the known problems, that
+occur when tasks share some namespace (the columns) while living
+in different other namespaces (the rows):
+
+ UTS IPC VFS PID User Net
+UTS X
+IPC X 1
+VFS X
+PID 1 1 X
+User 2 2 X
+Net X
+
+1. Both the IPC and the PID namespaces provide IDs to address
+ object inside the kernel. E.g. semaphore with IPCID or
+ process group with pid.
+
+ In both cases, tasks shouldn't try exposing this ID to some
+ other task living in a different namespace via a shared filesystem
+ or IPC shmem/message. The fact is that this ID is only valid
+ within the namespace it was obtained in and may refer to some
+ other object in another namespace.
+
+2. Intentionally, two equal user IDs in different user namespaces
+ should not be equal from the VFS point of view. In other
+ words, user 10 in one user namespace shouldn't have the same
+ access permissions to files, belonging to user 10 in another
+ namespace.
+
+ The same is true for the IPC namespaces being shared - two users
+ from different user namespaces should not access the same IPC objects
+ even having equal UIDs.
+
+ But currently this is not so.
+
diff --git a/Documentation/namespaces/resource-control.txt b/Documentation/namespaces/resource-control.txt
new file mode 100644
index 000000000..abc13c394
--- /dev/null
+++ b/Documentation/namespaces/resource-control.txt
@@ -0,0 +1,14 @@
+There are a lot of kinds of objects in the kernel that don't have
+individual limits or that have limits that are ineffective when a set
+of processes is allowed to switch user ids. With user namespaces
+enabled in a kernel for people who don't trust their users or their
+users programs to play nice this problems becomes more acute.
+
+Therefore it is recommended that memory control groups be enabled in
+kernels that enable user namespaces, and it is further recommended
+that userspace configure memory control groups to limit how much
+memory user's they don't trust to play nice can use.
+
+Memory control groups can be configured by installing the libcgroup
+package present on most distros editing /etc/cgrules.conf,
+/etc/cgconfig.conf and setting up libpam-cgroup.
diff --git a/Documentation/netlabel/00-INDEX b/Documentation/netlabel/00-INDEX
new file mode 100644
index 000000000..837bf3599
--- /dev/null
+++ b/Documentation/netlabel/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+ - this file.
+cipso_ipv4.txt
+ - documentation on the IPv4 CIPSO protocol engine.
+draft-ietf-cipso-ipsecurity-01.txt
+ - IETF draft of the CIPSO protocol, dated 16 July 1992.
+introduction.txt
+ - NetLabel introduction, READ THIS FIRST.
+lsm_interface.txt
+ - documentation on the NetLabel kernel security module API.
diff --git a/Documentation/netlabel/cipso_ipv4.txt b/Documentation/netlabel/cipso_ipv4.txt
new file mode 100644
index 000000000..93dacb132
--- /dev/null
+++ b/Documentation/netlabel/cipso_ipv4.txt
@@ -0,0 +1,48 @@
+NetLabel CIPSO/IPv4 Protocol Engine
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial IP
+Security Option (CIPSO) draft from July 16, 1992. A copy of this draft can be
+found in this directory, consult '00-INDEX' for the filename. While the IETF
+draft never made it to an RFC standard it has become a de-facto standard for
+labeled networking and is used in many trusted operating systems.
+
+ * Outbound Packet Processing
+
+The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by
+adding the CIPSO label to the socket. This causes all packets leaving the
+system through the socket to have the CIPSO IP option applied. The socket's
+CIPSO label can be changed at any point in time, however, it is recommended
+that it is set upon the socket's creation. The LSM can set the socket's CIPSO
+label by using the NetLabel security module API; if the NetLabel "domain" is
+configured to use CIPSO for packet labeling then a CIPSO IP option will be
+generated and attached to the socket.
+
+ * Inbound Packet Processing
+
+The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the
+IP layer without any special handling required by the LSM. However, in order
+to decode and translate the CIPSO label on the packet the LSM must use the
+NetLabel security module API to extract the security attributes of the packet.
+This is typically done at the socket layer using the 'socket_sock_rcv_skb()'
+LSM hook.
+
+ * Label Translation
+
+The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security
+attributes such as sensitivity level and category to values which are
+appropriate for the host. These mappings are defined as part of a CIPSO
+Domain Of Interpretation (DOI) definition and are configured through the
+NetLabel user space communication layer. Each DOI definition can have a
+different security attribute mapping table.
+
+ * Label Translation Cache
+
+The NetLabel system provides a framework for caching security attribute
+mappings from the network labels to the corresponding LSM identifiers. The
+CIPSO/IPv4 protocol engine supports this caching mechanism.
diff --git a/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
new file mode 100644
index 000000000..256c2c9d4
--- /dev/null
+++ b/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt
@@ -0,0 +1,791 @@
+IETF CIPSO Working Group
+16 July, 1992
+
+
+
+ COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
+
+
+
+1. Status
+
+This Internet Draft provides the high level specification for a Commercial
+IP Security Option (CIPSO). This draft reflects the version as approved by
+the CIPSO IETF Working Group. Distribution of this memo is unlimited.
+
+This document is an Internet Draft. Internet Drafts are working documents
+of the Internet Engineering Task Force (IETF), its Areas, and its Working
+Groups. Note that other groups may also distribute working documents as
+Internet Drafts.
+
+Internet Drafts are draft documents valid for a maximum of six months.
+Internet Drafts may be updated, replaced, or obsoleted by other documents
+at any time. It is not appropriate to use Internet Drafts as reference
+material or to cite them other than as a "working draft" or "work in
+progress."
+
+Please check the I-D abstract listing contained in each Internet Draft
+directory to learn the current status of this or any other Internet Draft.
+
+
+
+
+2. Background
+
+Currently the Internet Protocol includes two security options. One of
+these options is the DoD Basic Security Option (BSO) (Type 130) which allows
+IP datagrams to be labeled with security classifications. This option
+provides sixteen security classifications and a variable number of handling
+restrictions. To handle additional security information, such as security
+categories or compartments, another security option (Type 133) exists and
+is referred to as the DoD Extended Security Option (ESO). The values for
+the fixed fields within these two options are administered by the Defense
+Information Systems Agency (DISA).
+
+Computer vendors are now building commercial operating systems with
+mandatory access controls and multi-level security. These systems are
+no longer built specifically for a particular group in the defense or
+intelligence communities. They are generally available commercial systems
+for use in a variety of government and civil sector environments.
+
+The small number of ESO format codes can not support all the possible
+applications of a commercial security option. The BSO and ESO were
+designed to only support the United States DoD. CIPSO has been designed
+to support multiple security policies. This Internet Draft provides the
+format and procedures required to support a Mandatory Access Control
+security policy. Support for additional security policies shall be
+defined in future RFCs.
+
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 1]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+
+3. CIPSO Format
+
+Option type: 134 (Class 0, Number 6, Copy on Fragmentation)
+Option length: Variable
+
+This option permits security related information to be passed between
+systems within a single Domain of Interpretation (DOI). A DOI is a
+collection of systems which agree on the meaning of particular values
+in the security option. An authority that has been assigned a DOI
+identifier will define a mapping between appropriate CIPSO field values
+and their human readable equivalent. This authority will distribute that
+mapping to hosts within the authority's domain. These mappings may be
+sensitive, therefore a DOI authority is not required to make these
+mappings available to anyone other than the systems that are included in
+the DOI.
+
+This option MUST be copied on fragmentation. This option appears at most
+once in a datagram. All multi-octet fields in the option are defined to be
+transmitted in network byte order. The format of this option is as follows:
+
++----------+----------+------//------+-----------//---------+
+| 10000110 | LLLLLLLL | DDDDDDDDDDDD | TTTTTTTTTTTTTTTTTTTT |
++----------+----------+------//------+-----------//---------+
+
+ TYPE=134 OPTION DOMAIN OF TAGS
+ LENGTH INTERPRETATION
+
+
+ Figure 1. CIPSO Format
+
+
+3.1 Type
+
+This field is 1 octet in length. Its value is 134.
+
+
+3.2 Length
+
+This field is 1 octet in length. It is the total length of the option
+including the type and length fields. With the current IP header length
+restriction of 40 octets the value of this field MUST not exceed 40.
+
+
+3.3 Domain of Interpretation Identifier
+
+This field is an unsigned 32 bit integer. The value 0 is reserved and MUST
+not appear as the DOI identifier in any CIPSO option. Implementations
+should assume that the DOI identifier field is not aligned on any particular
+byte boundary.
+
+To conserve space in the protocol, security levels and categories are
+represented by numbers rather than their ASCII equivalent. This requires
+a mapping table within CIPSO hosts to map these numbers to their
+corresponding ASCII representations. Non-related groups of systems may
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 2]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+have their own unique mappings. For example, one group of systems may
+use the number 5 to represent Unclassified while another group may use the
+number 1 to represent that same security level. The DOI identifier is used
+to identify which mapping was used for the values within the option.
+
+
+3.4 Tag Types
+
+A common format for passing security related information is necessary
+for interoperability. CIPSO uses sets of "tags" to contain the security
+information relevant to the data in the IP packet. Each tag begins with
+a tag type identifier followed by the length of the tag and ends with the
+actual security information to be passed. All multi-octet fields in a tag
+are defined to be transmitted in network byte order. Like the DOI
+identifier field in the CIPSO header, implementations should assume that
+all tags, as well as fields within a tag, are not aligned on any particular
+octet boundary. The tag types defined in this document contain alignment
+bytes to assist alignment of some information, however alignment can not
+be guaranteed if CIPSO is not the first IP option.
+
+CIPSO tag types 0 through 127 are reserved for defining standard tag
+formats. Their definitions will be published in RFCs. Tag types whose
+identifiers are greater than 127 are defined by the DOI authority and may
+only be meaningful in certain Domains of Interpretation. For these tag
+types, implementations will require the DOI identifier as well as the tag
+number to determine the security policy and the format associated with the
+tag. Use of tag types above 127 are restricted to closed networks where
+interoperability with other networks will not be an issue. Implementations
+that support a tag type greater than 127 MUST support at least one DOI that
+requires only tag types 1 to 127.
+
+Tag type 0 is reserved. Tag types 1, 2, and 5 are defined in this
+Internet Draft. Types 3 and 4 are reserved for work in progress.
+The standard format for all current and future CIPSO tags is shown below:
+
++----------+----------+--------//--------+
+| TTTTTTTT | LLLLLLLL | IIIIIIIIIIIIIIII |
++----------+----------+--------//--------+
+ TAG TAG TAG
+ TYPE LENGTH INFORMATION
+
+ Figure 2: Standard Tag Format
+
+In the three tag types described in this document, the length and count
+restrictions are based on the current IP limitation of 40 octets for all
+IP options. If the IP header is later expanded, then the length and count
+restrictions specified in this document may increase to use the full area
+provided for IP options.
+
+
+3.4.1 Tag Type Classes
+
+Tag classes consist of tag types that have common processing requirements
+and support the same security policy. The three tags defined in this
+Internet Draft belong to the Mandatory Access Control (MAC) Sensitivity
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 3]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+class and support the MAC Sensitivity security policy.
+
+
+3.4.2 Tag Type 1
+
+This is referred to as the "bit-mapped" tag type. Tag type 1 is included
+in the MAC Sensitivity tag type class. The format of this tag type is as
+follows:
+
++----------+----------+----------+----------+--------//---------+
+| 00000001 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+--------//---------+
+
+ TAG TAG ALIGNMENT SENSITIVITY BIT MAP OF
+ TYPE LENGTH OCTET LEVEL CATEGORIES
+
+ Figure 3. Tag Type 1 Format
+
+
+3.4.2.1 Tag Type
+
+This field is 1 octet in length and has a value of 1.
+
+
+3.4.2.2 Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields. With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.2.3 Alignment Octet
+
+This field is 1 octet in length and always has the value of 0. Its purpose
+is to align the category bitmap field on an even octet boundary. This will
+speed many implementations including router implementations.
+
+
+3.4.2.4 Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255. The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.2.5 Bit Map of Categories
+
+The length of this field is variable and ranges from 0 to 30 octets. This
+provides representation of categories 0 to 239. The ordering of the bits
+is left to right or MSB to LSB. For example category 0 is represented by
+the most significant bit of the first byte and category 15 is represented
+by the least significant bit of the second byte. Figure 4 graphically
+shows this ordering. Bit N is binary 1 if category N is part of the label
+for the datagram, and bit N is binary 0 if category N is not part of the
+label. Except for the optimized tag 1 format described in the next section,
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 4]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+minimal encoding SHOULD be used resulting in no trailing zero octets in the
+category bitmap.
+
+ octet 0 octet 1 octet 2 octet 3 octet 4 octet 5
+ XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX . . .
+bit 01234567 89111111 11112222 22222233 33333333 44444444
+number 012345 67890123 45678901 23456789 01234567
+
+ Figure 4. Ordering of Bits in Tag 1 Bit Map
+
+
+3.4.2.6 Optimized Tag 1 Format
+
+Routers work most efficiently when processing fixed length fields. To
+support these routers there is an optimized form of tag type 1. The format
+does not change. The only change is to the category bitmap which is set to
+a constant length of 10 octets. Trailing octets required to fill out the 10
+octets are zero filled. Ten octets, allowing for 80 categories, was chosen
+because it makes the total length of the CIPSO option 20 octets. If CIPSO
+is the only option then the option will be full word aligned and additional
+filler octets will not be required.
+
+
+3.4.3 Tag Type 2
+
+This is referred to as the "enumerated" tag type. It is used to describe
+large but sparsely populated sets of categories. Tag type 2 is in the MAC
+Sensitivity tag type class. The format of this tag type is as follows:
+
++----------+----------+----------+----------+-------------//-------------+
+| 00000010 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCCCCCCCCCCC |
++----------+----------+----------+----------+-------------//-------------+
+
+ TAG TAG ALIGNMENT SENSITIVITY ENUMERATED
+ TYPE LENGTH OCTET LEVEL CATEGORIES
+
+ Figure 5. Tag Type 2 Format
+
+
+3.4.3.1 Tag Type
+
+This field is one octet in length and has a value of 2.
+
+
+3.4.3.2 Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields. With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.3.3 Alignment Octet
+
+This field is 1 octet in length and always has the value of 0. Its purpose
+is to align the category field on an even octet boundary. This will
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 5]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+speed many implementations including router implementations.
+
+
+3.4.3.4 Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255. The values
+are ordered with 0 being the minimum value and 255 representing the
+maximum value.
+
+
+3.4.3.5 Enumerated Categories
+
+In this tag, categories are represented by their actual value rather than
+by their position within a bit field. The length of each category is 2
+octets. Up to 15 categories may be represented by this tag. Valid values
+for categories are 0 to 65534. Category 65535 is not a valid category
+value. The categories MUST be listed in ascending order within the tag.
+
+
+3.4.4 Tag Type 5
+
+This is referred to as the "range" tag type. It is used to represent
+labels where all categories in a range, or set of ranges, are included
+in the sensitivity label. Tag type 5 is in the MAC Sensitivity tag type
+class. The format of this tag type is as follows:
+
++----------+----------+----------+----------+------------//-------------+
+| 00000101 | LLLLLLLL | 00000000 | LLLLLLLL | Top/Bottom | Top/Bottom |
++----------+----------+----------+----------+------------//-------------+
+
+ TAG TAG ALIGNMENT SENSITIVITY CATEGORY RANGES
+ TYPE LENGTH OCTET LEVEL
+
+ Figure 6. Tag Type 5 Format
+
+
+3.4.4.1 Tag Type
+
+This field is one octet in length and has a value of 5.
+
+
+3.4.4.2 Tag Length
+
+This field is 1 octet in length. It is the total length of the tag type
+including the type and length fields. With the current IP header length
+restriction of 40 bytes the value within this field is between 4 and 34.
+
+
+3.4.4.3 Alignment Octet
+
+This field is 1 octet in length and always has the value of 0. Its purpose
+is to align the category range field on an even octet boundary. This will
+speed many implementations including router implementations.
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 6]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+3.4.4.4 Sensitivity Level
+
+This field is 1 octet in length. Its value is from 0 to 255. The values
+are ordered with 0 being the minimum value and 255 representing the maximum
+value.
+
+
+3.4.4.5 Category Ranges
+
+A category range is a 4 octet field comprised of the 2 octet index of the
+highest numbered category followed by the 2 octet index of the lowest
+numbered category. These range endpoints are inclusive within the range of
+categories. All categories within a range are included in the sensitivity
+label. This tag may contain a maximum of 7 category pairs. The bottom
+category endpoint for the last pair in the tag MAY be omitted and SHOULD be
+assumed to be 0. The ranges MUST be non-overlapping and be listed in
+descending order. Valid values for categories are 0 to 65534. Category
+65535 is not a valid category value.
+
+
+3.4.5 Minimum Requirements
+
+A CIPSO implementation MUST be capable of generating at least tag type 1 in
+the non-optimized form. In addition, a CIPSO implementation MUST be able
+to receive any valid tag type 1 even those using the optimized tag type 1
+format.
+
+
+4. Configuration Parameters
+
+The configuration parameters defined below are required for all CIPSO hosts,
+gateways, and routers that support multiple sensitivity labels. A CIPSO
+host is defined to be the origination or destination system for an IP
+datagram. A CIPSO gateway provides IP routing services between two or more
+IP networks and may be required to perform label translations between
+networks. A CIPSO gateway may be an enhanced CIPSO host or it may just
+provide gateway services with no end system CIPSO capabilities. A CIPSO
+router is a dedicated IP router that routes IP datagrams between two or more
+IP networks.
+
+An implementation of CIPSO on a host MUST have the capability to reject a
+datagram for reasons that the information contained can not be adequately
+protected by the receiving host or if acceptance may result in violation of
+the host or network security policy. In addition, a CIPSO gateway or router
+MUST be able to reject datagrams going to networks that can not provide
+adequate protection or may violate the network's security policy. To
+provide this capability the following minimal set of configuration
+parameters are required for CIPSO implementations:
+
+HOST_LABEL_MAX - This parameter contains the maximum sensitivity label that
+a CIPSO host is authorized to handle. All datagrams that have a label
+greater than this maximum MUST be rejected by the CIPSO host. This
+parameter does not apply to CIPSO gateways or routers. This parameter need
+not be defined explicitly as it can be implicitly derived from the
+PORT_LABEL_MAX parameters for the associated interfaces.
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 7]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+
+HOST_LABEL_MIN - This parameter contains the minimum sensitivity label that
+a CIPSO host is authorized to handle. All datagrams that have a label less
+than this minimum MUST be rejected by the CIPSO host. This parameter does
+not apply to CIPSO gateways or routers. This parameter need not be defined
+explicitly as it can be implicitly derived from the PORT_LABEL_MIN
+parameters for the associated interfaces.
+
+PORT_LABEL_MAX - This parameter contains the maximum sensitivity label for
+all datagrams that may exit a particular network interface port. All
+outgoing datagrams that have a label greater than this maximum MUST be
+rejected by the CIPSO system. The label within this parameter MUST be
+less than or equal to the label within the HOST_LABEL_MAX parameter. This
+parameter does not apply to CIPSO hosts that support only one network port.
+
+PORT_LABEL_MIN - This parameter contains the minimum sensitivity label for
+all datagrams that may exit a particular network interface port. All
+outgoing datagrams that have a label less than this minimum MUST be
+rejected by the CIPSO system. The label within this parameter MUST be
+greater than or equal to the label within the HOST_LABEL_MIN parameter.
+This parameter does not apply to CIPSO hosts that support only one network
+port.
+
+PORT_DOI - This parameter is used to assign a DOI identifier value to a
+particular network interface port. All CIPSO labels within datagrams
+going out this port MUST use the specified DOI identifier. All CIPSO
+hosts and gateways MUST support either this parameter, the NET_DOI
+parameter, or the HOST_DOI parameter.
+
+NET_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP network address. All CIPSO labels within datagrams destined
+for the particular IP network MUST use the specified DOI identifier. All
+CIPSO hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the HOST_DOI parameter.
+
+HOST_DOI - This parameter is used to assign a DOI identifier value to a
+particular IP host address. All CIPSO labels within datagrams destined for
+the particular IP host will use the specified DOI identifier. All CIPSO
+hosts and gateways MUST support either this parameter, the PORT_DOI
+parameter, or the NET_DOI parameter.
+
+This list represents the minimal set of configuration parameters required
+to be compliant. Implementors are encouraged to add to this list to
+provide enhanced functionality and control. For example, many security
+policies may require both incoming and outgoing datagrams be checked against
+the port and host label ranges.
+
+
+4.1 Port Range Parameters
+
+The labels represented by the PORT_LABEL_MAX and PORT_LABEL_MIN parameters
+MAY be in CIPSO or local format. Some CIPSO systems, such as routers, may
+want to have the range parameters expressed in CIPSO format so that incoming
+labels do not have to be converted to a local format before being compared
+against the range. If multiple DOIs are supported by one of these CIPSO
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 8]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+systems then multiple port range parameters would be needed, one set for
+each DOI supported on a particular port.
+
+The port range will usually represent the total set of labels that may
+exist on the logical network accessed through the corresponding network
+interface. It may, however, represent a subset of these labels that are
+allowed to enter the CIPSO system.
+
+
+4.2 Single Label CIPSO Hosts
+
+CIPSO implementations that support only one label are not required to
+support the parameters described above. These limited implementations are
+only required to support a NET_LABEL parameter. This parameter contains
+the CIPSO label that may be inserted in datagrams that exit the host. In
+addition, the host MUST reject any incoming datagram that has a label which
+is not equivalent to the NET_LABEL parameter.
+
+
+5. Handling Procedures
+
+This section describes the processing requirements for incoming and
+outgoing IP datagrams. Just providing the correct CIPSO label format
+is not enough. Assumptions will be made by one system on how a
+receiving system will handle the CIPSO label. Wrong assumptions may
+lead to non-interoperability or even a security incident. The
+requirements described below represent the minimal set needed for
+interoperability and that provide users some level of confidence.
+Many other requirements could be added to increase user confidence,
+however at the risk of restricting creativity and limiting vendor
+participation.
+
+
+5.1 Input Procedures
+
+All datagrams received through a network port MUST have a security label
+associated with them, either contained in the datagram or assigned to the
+receiving port. Without this label the host, gateway, or router will not
+have the information it needs to make security decisions. This security
+label will be obtained from the CIPSO if the option is present in the
+datagram. See section 4.1.2 for handling procedures for unlabeled
+datagrams. This label will be compared against the PORT (if appropriate)
+and HOST configuration parameters defined in section 3.
+
+If any field within the CIPSO option, such as the DOI identifier, is not
+recognized the IP datagram is discarded and an ICMP "parameter problem"
+(type 12) is generated and returned. The ICMP code field is set to "bad
+parameter" (code 0) and the pointer is set to the start of the CIPSO field
+that is unrecognized.
+
+If the contents of the CIPSO are valid but the security label is
+outside of the configured host or port label range, the datagram is
+discarded and an ICMP "destination unreachable" (type 3) is generated
+and returned. The code field of the ICMP is set to "communication with
+destination network administratively prohibited" (code 9) or to
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 9]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+"communication with destination host administratively prohibited"
+(code 10). The value of the code field used is dependent upon whether
+the originator of the ICMP message is acting as a CIPSO host or a CIPSO
+gateway. The recipient of the ICMP message MUST be able to handle either
+value. The same procedure is performed if a CIPSO can not be added to an
+IP packet because it is too large to fit in the IP options area.
+
+If the error is triggered by receipt of an ICMP message, the message
+is discarded and no response is permitted (consistent with general ICMP
+processing rules).
+
+
+5.1.1 Unrecognized tag types
+
+The default condition for any CIPSO implementation is that an
+unrecognized tag type MUST be treated as a "parameter problem" and
+handled as described in section 4.1. A CIPSO implementation MAY allow
+the system administrator to identify tag types that may safely be
+ignored. This capability is an allowable enhancement, not a
+requirement.
+
+
+5.1.2 Unlabeled Packets
+
+A network port may be configured to not require a CIPSO label for all
+incoming datagrams. For this configuration a CIPSO label must be
+assigned to that network port and associated with all unlabeled IP
+datagrams. This capability might be used for single level networks or
+networks that have CIPSO and non-CIPSO hosts and the non-CIPSO hosts
+all operate at the same label.
+
+If a CIPSO option is required and none is found, the datagram is
+discarded and an ICMP "parameter problem" (type 12) is generated and
+returned to the originator of the datagram. The code field of the ICMP
+is set to "option missing" (code 1) and the ICMP pointer is set to 134
+(the value of the option type for the missing CIPSO option).
+
+
+5.2 Output Procedures
+
+A CIPSO option MUST appear only once in a datagram. Only one tag type
+from the MAC Sensitivity class MAY be included in a CIPSO option. Given
+the current set of defined tag types, this means that CIPSO labels at
+first will contain only one tag.
+
+All datagrams leaving a CIPSO system MUST meet the following condition:
+
+ PORT_LABEL_MIN <= CIPSO label <= PORT_LABEL_MAX
+
+If this condition is not satisfied the datagram MUST be discarded.
+If the CIPSO system only supports one port, the HOST_LABEL_MIN and the
+HOST_LABEL_MAX parameters MAY be substituted for the PORT parameters in
+the above condition.
+
+The DOI identifier to be used for all outgoing datagrams is configured by
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 10]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+the administrator. If port level DOI identifier assignment is used, then
+the PORT_DOI configuration parameter MUST contain the DOI identifier to
+use. If network level DOI assignment is used, then the NET_DOI parameter
+MUST contain the DOI identifier to use. And if host level DOI assignment
+is employed, then the HOST_DOI parameter MUST contain the DOI identifier
+to use. A CIPSO implementation need only support one level of DOI
+assignment.
+
+
+5.3 DOI Processing Requirements
+
+A CIPSO implementation MUST support at least one DOI and SHOULD support
+multiple DOIs. System and network administrators are cautioned to
+ensure that at least one DOI is common within an IP network to allow for
+broadcasting of IP datagrams.
+
+CIPSO gateways MUST be capable of translating a CIPSO option from one
+DOI to another when forwarding datagrams between networks. For
+efficiency purposes this capability is only a desired feature for CIPSO
+routers.
+
+
+5.4 Label of ICMP Messages
+
+The CIPSO label to be used on all outgoing ICMP messages MUST be equivalent
+to the label of the datagram that caused the ICMP message. If the ICMP was
+generated due to a problem associated with the original CIPSO label then the
+following responses are allowed:
+
+ a. Use the CIPSO label of the original IP datagram
+ b. Drop the original datagram with no return message generated
+
+In most cases these options will have the same effect. If you can not
+interpret the label or if it is outside the label range of your host or
+interface then an ICMP message with the same label will probably not be
+able to exit the system.
+
+
+6. Assignment of DOI Identifier Numbers =
+
+Requests for assignment of a DOI identifier number should be addressed to
+the Internet Assigned Numbers Authority (IANA).
+
+
+7. Acknowledgements
+
+Much of the material in this RFC is based on (and copied from) work
+done by Gary Winiger of Sun Microsystems and published as Commercial
+IP Security Option at the INTEROP 89, Commercial IPSO Workshop.
+
+
+8. Author's Address
+
+To submit mail for distribution to members of the IETF CIPSO Working
+Group, send mail to: cipso@wdl1.wdl.loral.com.
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 11]
+
+
+
+CIPSO INTERNET DRAFT 16 July, 1992
+
+
+
+
+To be added to or deleted from this distribution, send mail to:
+cipso-request@wdl1.wdl.loral.com.
+
+
+9. References
+
+RFC 1038, "Draft Revised IP Security Option", M. St. Johns, IETF, January
+1988.
+
+RFC 1108, "U.S. Department of Defense Security Options
+for the Internet Protocol", Stephen Kent, IAB, 1 March, 1991.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Internet Draft, Expires 15 Jan 93 [PAGE 12]
+
+
+
diff --git a/Documentation/netlabel/introduction.txt b/Documentation/netlabel/introduction.txt
new file mode 100644
index 000000000..5ecd8d1dc
--- /dev/null
+++ b/Documentation/netlabel/introduction.txt
@@ -0,0 +1,46 @@
+NetLabel Introduction
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+August 2, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can be used by kernel security modules to attach
+security attributes to outgoing network packets generated from user space
+applications and read security attributes from incoming network packets. It
+is composed of three main components, the protocol engines, the communication
+layer, and the kernel security module API.
+
+ * Protocol Engines
+
+The protocol engines are responsible for both applying and retrieving the
+network packet's security attributes. If any translation between the network
+security attributes and those on the host are required then the protocol
+engine will handle those tasks as well. Other kernel subsystems should
+refrain from calling the protocol engines directly, instead they should use
+the NetLabel kernel security module API described below.
+
+Detailed information about each NetLabel protocol engine can be found in this
+directory, consult '00-INDEX' for filenames.
+
+ * Communication Layer
+
+The communication layer exists to allow NetLabel configuration and monitoring
+from user space. The NetLabel communication layer uses a message based
+protocol built on top of the Generic NETLINK transport mechanism. The exact
+formatting of these NetLabel messages as well as the Generic NETLINK family
+names can be found in the 'net/netlabel/' directory as comments in the
+header files as well as in 'include/net/netlabel.h'.
+
+ * Security Module API
+
+The purpose of the NetLabel security module API is to provide a protocol
+independent interface to the underlying NetLabel protocol engines. In addition
+to protocol independence, the security module API is designed to be completely
+LSM independent which should allow multiple LSMs to leverage the same code
+base.
+
+Detailed information about the NetLabel security module API can be found in the
+'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file
+found in this directory.
diff --git a/Documentation/netlabel/lsm_interface.txt b/Documentation/netlabel/lsm_interface.txt
new file mode 100644
index 000000000..638c74f7d
--- /dev/null
+++ b/Documentation/netlabel/lsm_interface.txt
@@ -0,0 +1,47 @@
+NetLabel Linux Security Module Interface
+==============================================================================
+Paul Moore, paul.moore@hp.com
+
+May 17, 2006
+
+ * Overview
+
+NetLabel is a mechanism which can set and retrieve security attributes from
+network packets. It is intended to be used by LSM developers who want to make
+use of a common code base for several different packet labeling protocols.
+The NetLabel security module API is defined in 'include/net/netlabel.h' but a
+brief overview is given below.
+
+ * NetLabel Security Attributes
+
+Since NetLabel supports multiple different packet labeling protocols and LSMs
+it uses the concept of security attributes to refer to the packet's security
+labels. The NetLabel security attributes are defined by the
+'netlbl_lsm_secattr' structure in the NetLabel header file. Internally the
+NetLabel subsystem converts the security attributes to and from the correct
+low-level packet label depending on the NetLabel build time and run time
+configuration. It is up to the LSM developer to translate the NetLabel
+security attributes into whatever security identifiers are in use for their
+particular LSM.
+
+ * NetLabel LSM Protocol Operations
+
+These are the functions which allow the LSM developer to manipulate the labels
+on outgoing packets as well as read the labels on incoming packets. Functions
+exist to operate both on sockets as well as the sk_buffs directly. These high
+level functions are translated into low level protocol operations based on how
+the administrator has configured the NetLabel subsystem.
+
+ * NetLabel Label Mapping Cache Operations
+
+Depending on the exact configuration, translation between the network packet
+label and the internal LSM security identifier can be time consuming. The
+NetLabel label mapping cache is a caching mechanism which can be used to
+sidestep much of this overhead once a mapping has been established. Once the
+LSM has received a packet, used NetLabel to decode its security attributes,
+and translated the security attributes into a LSM internal identifier the LSM
+can use the NetLabel caching functions to associate the LSM internal
+identifier with the network packet's label. This means that in the future
+when a incoming packet matches a cached value not only are the internal
+NetLabel translation mechanisms bypassed but the LSM translation mechanisms are
+bypassed as well which should result in a significant reduction in overhead.
diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
new file mode 100644
index 000000000..df27a1a50
--- /dev/null
+++ b/Documentation/networking/00-INDEX
@@ -0,0 +1,238 @@
+00-INDEX
+ - this file
+3c509.txt
+ - information on the 3Com Etherlink III Series Ethernet cards.
+6pack.txt
+ - info on the 6pack protocol, an alternative to KISS for AX.25
+LICENSE.qla3xxx
+ - GPLv2 for QLogic Linux Networking HBA Driver
+LICENSE.qlge
+ - GPLv2 for QLogic Linux qlge NIC Driver
+LICENSE.qlcnic
+ - GPLv2 for QLogic Linux qlcnic NIC Driver
+Makefile
+ - Makefile for docsrc.
+PLIP.txt
+ - PLIP: The Parallel Line Internet Protocol device driver
+README.ipw2100
+ - README for the Intel PRO/Wireless 2100 driver.
+README.ipw2200
+ - README for the Intel PRO/Wireless 2915ABG and 2200BG driver.
+README.sb1000
+ - info on General Instrument/NextLevel SURFboard1000 cable modem.
+alias.txt
+ - info on using alias network devices.
+altera_tse.txt
+ - Altera Triple-Speed Ethernet controller.
+arcnet-hardware.txt
+ - tons of info on ARCnet, hubs, jumper settings for ARCnet cards, etc.
+arcnet.txt
+ - info on the using the ARCnet driver itself.
+atm.txt
+ - info on where to get ATM programs and support for Linux.
+ax25.txt
+ - info on using AX.25 and NET/ROM code for Linux
+batman-adv.txt
+ - B.A.T.M.A.N routing protocol on top of layer 2 Ethernet Frames.
+baycom.txt
+ - info on the driver for Baycom style amateur radio modems
+bonding.txt
+ - Linux Ethernet Bonding Driver HOWTO: link aggregation in Linux.
+bridge.txt
+ - where to get user space programs for ethernet bridging with Linux.
+can.txt
+ - documentation on CAN protocol family.
+cdc_mbim.txt
+ - 3G/LTE USB modem (Mobile Broadband Interface Model)
+cops.txt
+ - info on the COPS LocalTalk Linux driver
+cs89x0.txt
+ - the Crystal LAN (CS8900/20-based) Ethernet ISA adapter driver
+cxacru.txt
+ - Conexant AccessRunner USB ADSL Modem
+cxacru-cf.py
+ - Conexant AccessRunner USB ADSL Modem configuration file parser
+cxgb.txt
+ - Release Notes for the Chelsio N210 Linux device driver.
+dccp.txt
+ - the Datagram Congestion Control Protocol (DCCP) (RFC 4340..42).
+dctcp.txt
+ - DataCenter TCP congestion control
+de4x5.txt
+ - the Digital EtherWORKS DE4?? and DE5?? PCI Ethernet driver
+decnet.txt
+ - info on using the DECnet networking layer in Linux.
+dl2k.txt
+ - README for D-Link DL2000-based Gigabit Ethernet Adapters (dl2k.ko).
+dm9000.txt
+ - README for the Simtec DM9000 Network driver.
+dmfe.txt
+ - info on the Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver.
+dns_resolver.txt
+ - The DNS resolver module allows kernel servies to make DNS queries.
+driver.txt
+ - Softnet driver issues.
+e100.txt
+ - info on Intel's EtherExpress PRO/100 line of 10/100 boards
+e1000.txt
+ - info on Intel's E1000 line of gigabit ethernet boards
+e1000e.txt
+ - README for the Intel Gigabit Ethernet Driver (e1000e).
+eql.txt
+ - serial IP load balancing
+fib_trie.txt
+ - Level Compressed Trie (LC-trie) notes: a structure for routing.
+filter.txt
+ - Linux Socket Filtering
+fore200e.txt
+ - FORE Systems PCA-200E/SBA-200E ATM NIC driver info.
+framerelay.txt
+ - info on using Frame Relay/Data Link Connection Identifier (DLCI).
+gen_stats.txt
+ - Generic networking statistics for netlink users.
+generic-hdlc.txt
+ - The generic High Level Data Link Control (HDLC) layer.
+generic_netlink.txt
+ - info on Generic Netlink
+gianfar.txt
+ - Gianfar Ethernet Driver.
+i40e.txt
+ - README for the Intel Ethernet Controller XL710 Driver (i40e).
+i40evf.txt
+ - Short note on the Driver for the Intel(R) XL710 X710 Virtual Function
+ieee802154.txt
+ - Linux IEEE 802.15.4 implementation, API and drivers
+igb.txt
+ - README for the Intel Gigabit Ethernet Driver (igb).
+igbvf.txt
+ - README for the Intel Gigabit Ethernet Driver (igbvf).
+ip-sysctl.txt
+ - /proc/sys/net/ipv4/* variables
+ip_dynaddr.txt
+ - IP dynamic address hack e.g. for auto-dialup links
+ipddp.txt
+ - AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
+iphase.txt
+ - Interphase PCI ATM (i)Chip IA Linux driver info.
+ipsec.txt
+ - Note on not compressing IPSec payload and resulting failed policy check.
+ipv6.txt
+ - Options to the ipv6 kernel module.
+ipvs-sysctl.txt
+ - Per-inode explanation of the /proc/sys/net/ipv4/vs interface.
+irda.txt
+ - where to get IrDA (infrared) utilities and info for Linux.
+ixgb.txt
+ - README for the Intel 10 Gigabit Ethernet Driver (ixgb).
+ixgbe.txt
+ - README for the Intel 10 Gigabit Ethernet Driver (ixgbe).
+ixgbevf.txt
+ - README for the Intel Virtual Function (VF) Driver (ixgbevf).
+l2tp.txt
+ - User guide to the L2TP tunnel protocol.
+lapb-module.txt
+ - programming information of the LAPB module.
+ltpc.txt
+ - the Apple or Farallon LocalTalk PC card driver
+mac80211-auth-assoc-deauth.txt
+ - authentication and association / deauth-disassoc with max80211
+mac80211-injection.txt
+ - HOWTO use packet injection with mac80211
+multiqueue.txt
+ - HOWTO for multiqueue network device support.
+netconsole.txt
+ - The network console module netconsole.ko: configuration and notes.
+netdev-FAQ.txt
+ - FAQ describing how to submit net changes to netdev mailing list.
+netdev-features.txt
+ - Network interface features API description.
+netdevices.txt
+ - info on network device driver functions exported to the kernel.
+netif-msg.txt
+ - Design of the network interface message level setting (NETIF_MSG_*).
+netlink_mmap.txt
+ - memory mapped I/O with netlink
+nf_conntrack-sysctl.txt
+ - list of netfilter-sysctl knobs.
+nfc.txt
+ - The Linux Near Field Communication (NFS) subsystem.
+openvswitch.txt
+ - Open vSwitch developer documentation.
+operstates.txt
+ - Overview of network interface operational states.
+packet_mmap.txt
+ - User guide to memory mapped packet socket rings (PACKET_[RT]X_RING).
+phonet.txt
+ - The Phonet packet protocol used in Nokia cellular modems.
+phy.txt
+ - The PHY abstraction layer.
+pktgen.txt
+ - User guide to the kernel packet generator (pktgen.ko).
+policy-routing.txt
+ - IP policy-based routing
+ppp_generic.txt
+ - Information about the generic PPP driver.
+proc_net_tcp.txt
+ - Per inode overview of the /proc/net/tcp and /proc/net/tcp6 interfaces.
+radiotap-headers.txt
+ - Background on radiotap headers.
+ray_cs.txt
+ - Raylink Wireless LAN card driver info.
+rds.txt
+ - Background on the reliable, ordered datagram delivery method RDS.
+regulatory.txt
+ - Overview of the Linux wireless regulatory infrastructure.
+rxrpc.txt
+ - Guide to the RxRPC protocol.
+s2io.txt
+ - Release notes for Neterion Xframe I/II 10GbE driver.
+scaling.txt
+ - Explanation of network scaling techniques: RSS, RPS, RFS, aRFS, XPS.
+sctp.txt
+ - Notes on the Linux kernel implementation of the SCTP protocol.
+secid.txt
+ - Explanation of the secid member in flow structures.
+skfp.txt
+ - SysKonnect FDDI (SK-5xxx, Compaq Netelligent) driver info.
+smc9.txt
+ - the driver for SMC's 9000 series of Ethernet cards
+spider_net.txt
+ - README for the Spidernet Driver (as found in PS3 / Cell BE).
+stmmac.txt
+ - README for the STMicro Synopsys Ethernet driver.
+tc-actions-env-rules.txt
+ - rules for traffic control (tc) actions.
+timestamping.txt
+ - overview of network packet timestamping variants.
+tcp.txt
+ - short blurb on how TCP output takes place.
+tcp-thin.txt
+ - kernel tuning options for low rate 'thin' TCP streams.
+team.txt
+ - pointer to information for ethernet teaming devices.
+tlan.txt
+ - ThunderLAN (Compaq Netelligent 10/100, Olicom OC-2xxx) driver info.
+tproxy.txt
+ - Transparent proxy support user guide.
+tuntap.txt
+ - TUN/TAP device driver, allowing user space Rx/Tx of packets.
+udplite.txt
+ - UDP-Lite protocol (RFC 3828) introduction.
+vortex.txt
+ - info on using 3Com Vortex (3c590, 3c592, 3c595, 3c597) Ethernet cards.
+vxge.txt
+ - README for the Neterion X3100 PCIe Server Adapter.
+vxlan.txt
+ - Virtual extensible LAN overview
+x25.txt
+ - general info on X.25 development.
+x25-iface.txt
+ - description of the X.25 Packet Layer to LAPB device interface.
+xfrm_proc.txt
+ - description of the statistics package for XFRM.
+xfrm_sync.txt
+ - sync patches for XFRM enable migration of an SA between hosts.
+xfrm_sysctl.txt
+ - description of the XFRM configuration options.
+z8530drv.txt
+ - info about Linux driver for Z8530 based HDLC cards for AX.25
diff --git a/Documentation/networking/3c509.txt b/Documentation/networking/3c509.txt
new file mode 100644
index 000000000..fbf722e15
--- /dev/null
+++ b/Documentation/networking/3c509.txt
@@ -0,0 +1,213 @@
+Linux and the 3Com EtherLink III Series Ethercards (driver v1.18c and higher)
+----------------------------------------------------------------------------
+
+This file contains the instructions and caveats for v1.18c and higher versions
+of the 3c509 driver. You should not use the driver without reading this file.
+
+release 1.0
+28 February 2002
+Current maintainer (corrections to):
+ David Ruggiero <jdr@farfalle.com>
+
+----------------------------------------------------------------------------
+
+(0) Introduction
+
+The following are notes and information on using the 3Com EtherLink III series
+ethercards in Linux. These cards are commonly known by the most widely-used
+card's 3Com model number, 3c509. They are all 10mb/s ISA-bus cards and shouldn't
+be (but sometimes are) confused with the similarly-numbered PCI-bus "3c905"
+(aka "Vortex" or "Boomerang") series. Kernel support for the 3c509 family is
+provided by the module 3c509.c, which has code to support all of the following
+models:
+
+ 3c509 (original ISA card)
+ 3c509B (later revision of the ISA card; supports full-duplex)
+ 3c589 (PCMCIA)
+ 3c589B (later revision of the 3c589; supports full-duplex)
+ 3c579 (EISA)
+
+Large portions of this documentation were heavily borrowed from the guide
+written the original author of the 3c509 driver, Donald Becker. The master
+copy of that document, which contains notes on older versions of the driver,
+currently resides on Scyld web server: http://www.scyld.com/.
+
+
+(1) Special Driver Features
+
+Overriding card settings
+
+The driver allows boot- or load-time overriding of the card's detected IOADDR,
+IRQ, and transceiver settings, although this capability shouldn't generally be
+needed except to enable full-duplex mode (see below). An example of the syntax
+for LILO parameters for doing this:
+
+ ether=10,0x310,3,0x3c509,eth0
+
+This configures the first found 3c509 card for IRQ 10, base I/O 0x310, and
+transceiver type 3 (10base2). The flag "0x3c509" must be set to avoid conflicts
+with other card types when overriding the I/O address. When the driver is
+loaded as a module, only the IRQ may be overridden. For example,
+setting two cards to IRQ10 and IRQ11 is done by using the irq module
+option:
+
+ options 3c509 irq=10,11
+
+
+(2) Full-duplex mode
+
+The v1.18c driver added support for the 3c509B's full-duplex capabilities.
+In order to enable and successfully use full-duplex mode, three conditions
+must be met:
+
+(a) You must have a Etherlink III card model whose hardware supports full-
+duplex operations. Currently, the only members of the 3c509 family that are
+positively known to support full-duplex are the 3c509B (ISA bus) and 3c589B
+(PCMCIA) cards. Cards without the "B" model designation do *not* support
+full-duplex mode; these include the original 3c509 (no "B"), the original
+3c589, the 3c529 (MCA bus), and the 3c579 (EISA bus).
+
+(b) You must be using your card's 10baseT transceiver (i.e., the RJ-45
+connector), not its AUI (thick-net) or 10base2 (thin-net/coax) interfaces.
+AUI and 10base2 network cabling is physically incapable of full-duplex
+operation.
+
+(c) Most importantly, your 3c509B must be connected to a link partner that is
+itself full-duplex capable. This is almost certainly one of two things: a full-
+duplex-capable Ethernet switch (*not* a hub), or a full-duplex-capable NIC on
+another system that's connected directly to the 3c509B via a crossover cable.
+
+Full-duplex mode can be enabled using 'ethtool'.
+
+/////Extremely important caution concerning full-duplex mode/////
+Understand that the 3c509B's hardware's full-duplex support is much more
+limited than that provide by more modern network interface cards. Although
+at the physical layer of the network it fully supports full-duplex operation,
+the card was designed before the current Ethernet auto-negotiation (N-way)
+spec was written. This means that the 3c509B family ***cannot and will not
+auto-negotiate a full-duplex connection with its link partner under any
+circumstances, no matter how it is initialized***. If the full-duplex mode
+of the 3c509B is enabled, its link partner will very likely need to be
+independently _forced_ into full-duplex mode as well; otherwise various nasty
+failures will occur - at the very least, you'll see massive numbers of packet
+collisions. This is one of very rare circumstances where disabling auto-
+negotiation and forcing the duplex mode of a network interface card or switch
+would ever be necessary or desirable.
+
+
+(3) Available Transceiver Types
+
+For versions of the driver v1.18c and above, the available transceiver types are:
+
+0 transceiver type from EEPROM config (normally 10baseT); force half-duplex
+1 AUI (thick-net / DB15 connector)
+2 (undefined)
+3 10base2 (thin-net == coax / BNC connector)
+4 10baseT (RJ-45 connector); force half-duplex mode
+8 transceiver type and duplex mode taken from card's EEPROM config settings
+12 10baseT (RJ-45 connector); force full-duplex mode
+
+Prior to driver version 1.18c, only transceiver codes 0-4 were supported. Note
+that the new transceiver codes 8 and 12 are the *only* ones that will enable
+full-duplex mode, no matter what the card's detected EEPROM settings might be.
+This insured that merely upgrading the driver from an earlier version would
+never automatically enable full-duplex mode in an existing installation;
+it must always be explicitly enabled via one of these code in order to be
+activated.
+
+The transceiver type can be changed using 'ethtool'.
+
+
+(4a) Interpretation of error messages and common problems
+
+Error Messages
+
+eth0: Infinite loop in interrupt, status 2011.
+These are "mostly harmless" message indicating that the driver had too much
+work during that interrupt cycle. With a status of 0x2011 you are receiving
+packets faster than they can be removed from the card. This should be rare
+or impossible in normal operation. Possible causes of this error report are:
+
+ - a "green" mode enabled that slows the processor down when there is no
+ keyboard activity.
+
+ - some other device or device driver hogging the bus or disabling interrupts.
+ Check /proc/interrupts for excessive interrupt counts. The timer tick
+ interrupt should always be incrementing faster than the others.
+
+No received packets
+If a 3c509, 3c562 or 3c589 can successfully transmit packets, but never
+receives packets (as reported by /proc/net/dev or 'ifconfig') you likely
+have an interrupt line problem. Check /proc/interrupts to verify that the
+card is actually generating interrupts. If the interrupt count is not
+increasing you likely have a physical conflict with two devices trying to
+use the same ISA IRQ line. The common conflict is with a sound card on IRQ10
+or IRQ5, and the easiest solution is to move the 3c509 to a different
+interrupt line. If the device is receiving packets but 'ping' doesn't work,
+you have a routing problem.
+
+Tx Carrier Errors Reported in /proc/net/dev
+If an EtherLink III appears to transmit packets, but the "Tx carrier errors"
+field in /proc/net/dev increments as quickly as the Tx packet count, you
+likely have an unterminated network or the incorrect media transceiver selected.
+
+3c509B card is not detected on machines with an ISA PnP BIOS.
+While the updated driver works with most PnP BIOS programs, it does not work
+with all. This can be fixed by disabling PnP support using the 3Com-supplied
+setup program.
+
+3c509 card is not detected on overclocked machines
+Increase the delay time in id_read_eeprom() from the current value, 500,
+to an absurdly high value, such as 5000.
+
+
+(4b) Decoding Status and Error Messages
+
+The bits in the main status register are:
+
+value description
+0x01 Interrupt latch
+0x02 Tx overrun, or Rx underrun
+0x04 Tx complete
+0x08 Tx FIFO room available
+0x10 A complete Rx packet has arrived
+0x20 A Rx packet has started to arrive
+0x40 The driver has requested an interrupt
+0x80 Statistics counter nearly full
+
+The bits in the transmit (Tx) status word are:
+
+value description
+0x02 Out-of-window collision.
+0x04 Status stack overflow (normally impossible).
+0x08 16 collisions.
+0x10 Tx underrun (not enough PCI bus bandwidth).
+0x20 Tx jabber.
+0x40 Tx interrupt requested.
+0x80 Status is valid (this should always be set).
+
+
+When a transmit error occurs the driver produces a status message such as
+
+ eth0: Transmit error, Tx status register 82
+
+The two values typically seen here are:
+
+0x82
+Out of window collision. This typically occurs when some other Ethernet
+host is incorrectly set to full duplex on a half duplex network.
+
+0x88
+16 collisions. This typically occurs when the network is exceptionally busy
+or when another host doesn't correctly back off after a collision. If this
+error is mixed with 0x82 errors it is the result of a host incorrectly set
+to full duplex (see above).
+
+Both of these errors are the result of network problems that should be
+corrected. They do not represent driver malfunction.
+
+
+(5) Revision history (this file)
+
+28Feb02 v1.0 DR New; major portions based on Becker original 3c509 docs
+
diff --git a/Documentation/networking/6pack.txt b/Documentation/networking/6pack.txt
new file mode 100644
index 000000000..8f339428f
--- /dev/null
+++ b/Documentation/networking/6pack.txt
@@ -0,0 +1,175 @@
+This is the 6pack-mini-HOWTO, written by
+
+Andreas Könsgen DG3KQ
+Internet: ajk@comnets.uni-bremen.de
+AMPR-net: dg3kq@db0pra.ampr.org
+AX.25: dg3kq@db0ach.#nrw.deu.eu
+
+Last update: April 7, 1998
+
+1. What is 6pack, and what are the advantages to KISS?
+
+6pack is a transmission protocol for data exchange between the PC and
+the TNC over a serial line. It can be used as an alternative to KISS.
+
+6pack has two major advantages:
+- The PC is given full control over the radio
+ channel. Special control data is exchanged between the PC and the TNC so
+ that the PC knows at any time if the TNC is receiving data, if a TNC
+ buffer underrun or overrun has occurred, if the PTT is
+ set and so on. This control data is processed at a higher priority than
+ normal data, so a data stream can be interrupted at any time to issue an
+ important event. This helps to improve the channel access and timing
+ algorithms as everything is computed in the PC. It would even be possible
+ to experiment with something completely different from the known CSMA and
+ DAMA channel access methods.
+ This kind of real-time control is especially important to supply several
+ TNCs that are connected between each other and the PC by a daisy chain
+ (however, this feature is not supported yet by the Linux 6pack driver).
+
+- Each packet transferred over the serial line is supplied with a checksum,
+ so it is easy to detect errors due to problems on the serial line.
+ Received packets that are corrupt are not passed on to the AX.25 layer.
+ Damaged packets that the TNC has received from the PC are not transmitted.
+
+More details about 6pack are described in the file 6pack.ps that is located
+in the doc directory of the AX.25 utilities package.
+
+2. Who has developed the 6pack protocol?
+
+The 6pack protocol has been developed by Ekki Plicht DF4OR, Henning Rech
+DF9IC and Gunter Jost DK7WJ. A driver for 6pack, written by Gunter Jost and
+Matthias Welwarsky DG2FEF, comes along with the PC version of FlexNet.
+They have also written a firmware for TNCs to perform the 6pack
+protocol (see section 4 below).
+
+3. Where can I get the latest version of 6pack for LinuX?
+
+At the moment, the 6pack stuff can obtained via anonymous ftp from
+db0bm.automation.fh-aachen.de. In the directory /incoming/dg3kq,
+there is a file named 6pack.tgz.
+
+4. Preparing the TNC for 6pack operation
+
+To be able to use 6pack, a special firmware for the TNC is needed. The EPROM
+of a newly bought TNC does not contain 6pack, so you will have to
+program an EPROM yourself. The image file for 6pack EPROMs should be
+available on any packet radio box where PC/FlexNet can be found. The name of
+the file is 6pack.bin. This file is copyrighted and maintained by the FlexNet
+team. It can be used under the terms of the license that comes along
+with PC/FlexNet. Please do not ask me about the internals of this file as I
+don't know anything about it. I used a textual description of the 6pack
+protocol to program the Linux driver.
+
+TNCs contain a 64kByte EPROM, the lower half of which is used for
+the firmware/KISS. The upper half is either empty or is sometimes
+programmed with software called TAPR. In the latter case, the TNC
+is supplied with a DIP switch so you can easily change between the
+two systems. When programming a new EPROM, one of the systems is replaced
+by 6pack. It is useful to replace TAPR, as this software is rarely used
+nowadays. If your TNC is not equipped with the switch mentioned above, you
+can build in one yourself that switches over the highest address pin
+of the EPROM between HIGH and LOW level. After having inserted the new EPROM
+and switched to 6pack, apply power to the TNC for a first test. The connect
+and the status LED are lit for about a second if the firmware initialises
+the TNC correctly.
+
+5. Building and installing the 6pack driver
+
+The driver has been tested with kernel version 2.1.90. Use with older
+kernels may lead to a compilation error because the interface to a kernel
+function has been changed in the 2.1.8x kernels.
+
+How to turn on 6pack support:
+
+- In the linux kernel configuration program, select the code maturity level
+ options menu and turn on the prompting for development drivers.
+
+- Select the amateur radio support menu and turn on the serial port 6pack
+ driver.
+
+- Compile and install the kernel and the modules.
+
+To use the driver, the kissattach program delivered with the AX.25 utilities
+has to be modified.
+
+- Do a cd to the directory that holds the kissattach sources. Edit the
+ kissattach.c file. At the top, insert the following lines:
+
+ #ifndef N_6PACK
+ #define N_6PACK (N_AX25+1)
+ #endif
+
+ Then find the line
+
+ int disc = N_AX25;
+
+ and replace N_AX25 by N_6PACK.
+
+- Recompile kissattach. Rename it to spattach to avoid confusions.
+
+Installing the driver:
+
+- Do an insmod 6pack. Look at your /var/log/messages file to check if the
+ module has printed its initialization message.
+
+- Do a spattach as you would launch kissattach when starting a KISS port.
+ Check if the kernel prints the message '6pack: TNC found'.
+
+- From here, everything should work as if you were setting up a KISS port.
+ The only difference is that the network device that represents
+ the 6pack port is called sp instead of sl or ax. So, sp0 would be the
+ first 6pack port.
+
+Although the driver has been tested on various platforms, I still declare it
+ALPHA. BE CAREFUL! Sync your disks before insmoding the 6pack module
+and spattaching. Watch out if your computer behaves strangely. Read section
+6 of this file about known problems.
+
+Note that the connect and status LEDs of the TNC are controlled in a
+different way than they are when the TNC is used with PC/FlexNet. When using
+FlexNet, the connect LED is on if there is a connection; the status LED is
+on if there is data in the buffer of the PC's AX.25 engine that has to be
+transmitted. Under Linux, the 6pack layer is beyond the AX.25 layer,
+so the 6pack driver doesn't know anything about connects or data that
+has not yet been transmitted. Therefore the LEDs are controlled
+as they are in KISS mode: The connect LED is turned on if data is transferred
+from the PC to the TNC over the serial line, the status LED if data is
+sent to the PC.
+
+6. Known problems
+
+When testing the driver with 2.0.3x kernels and
+operating with data rates on the radio channel of 9600 Baud or higher,
+the driver may, on certain systems, sometimes print the message '6pack:
+bad checksum', which is due to data loss if the other station sends two
+or more subsequent packets. I have been told that this is due to a problem
+with the serial driver of 2.0.3x kernels. I don't know yet if the problem
+still exists with 2.1.x kernels, as I have heard that the serial driver
+code has been changed with 2.1.x.
+
+When shutting down the sp interface with ifconfig, the kernel crashes if
+there is still an AX.25 connection left over which an IP connection was
+running, even if that IP connection is already closed. The problem does not
+occur when there is a bare AX.25 connection still running. I don't know if
+this is a problem of the 6pack driver or something else in the kernel.
+
+The driver has been tested as a module, not yet as a kernel-builtin driver.
+
+The 6pack protocol supports daisy-chaining of TNCs in a token ring, which is
+connected to one serial port of the PC. This feature is not implemented
+and at least at the moment I won't be able to do it because I do not have
+the opportunity to build a TNC daisy-chain and test it.
+
+Some of the comments in the source code are inaccurate. They are left from
+the SLIP/KISS driver, from which the 6pack driver has been derived.
+I haven't modified or removed them yet -- sorry! The code itself needs
+some cleaning and optimizing. This will be done in a later release.
+
+If you encounter a bug or if you have a question or suggestion concerning the
+driver, feel free to mail me, using the addresses given at the beginning of
+this file.
+
+Have fun!
+
+Andreas
diff --git a/Documentation/networking/LICENSE.qla3xxx b/Documentation/networking/LICENSE.qla3xxx
new file mode 100644
index 000000000..2f2077e34
--- /dev/null
+++ b/Documentation/networking/LICENSE.qla3xxx
@@ -0,0 +1,46 @@
+Copyright (c) 2003-2006 QLogic Corporation
+QLogic Linux Networking HBA Driver
+
+This program includes a device driver for Linux 2.6 that may be
+distributed with QLogic hardware specific firmware binary file.
+You may modify and redistribute the device driver code under the
+GNU General Public License as published by the Free Software
+Foundation (version 2 or a later version).
+
+You may redistribute the hardware specific firmware binary file
+under the following terms:
+
+ 1. Redistribution of source code (only if applicable),
+ must retain the above copyright notice, this list of
+ conditions and the following disclaimer.
+
+ 2. Redistribution in binary form must reproduce the above
+ copyright notice, this list of conditions and the
+ following disclaimer in the documentation and/or other
+ materials provided with the distribution.
+
+ 3. The name of QLogic Corporation may not be used to
+ endorse or promote products derived from this software
+ without specific prior written permission
+
+REGARDLESS OF WHAT LICENSING MECHANISM IS USED OR APPLICABLE,
+THIS PROGRAM IS PROVIDED BY QLOGIC CORPORATION "AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+USER ACKNOWLEDGES AND AGREES THAT USE OF THIS PROGRAM WILL NOT
+CREATE OR GIVE GROUNDS FOR A LICENSE BY IMPLICATION, ESTOPPEL, OR
+OTHERWISE IN ANY INTELLECTUAL PROPERTY RIGHTS (PATENT, COPYRIGHT,
+TRADE SECRET, MASK WORK, OR OTHER PROPRIETARY RIGHT) EMBODIED IN
+ANY OTHER QLOGIC HARDWARE OR SOFTWARE EITHER SOLELY OR IN
+COMBINATION WITH THIS PROGRAM.
+
diff --git a/Documentation/networking/LICENSE.qlcnic b/Documentation/networking/LICENSE.qlcnic
new file mode 100644
index 000000000..2ae3b6498
--- /dev/null
+++ b/Documentation/networking/LICENSE.qlcnic
@@ -0,0 +1,288 @@
+Copyright (c) 2009-2013 QLogic Corporation
+QLogic Linux qlcnic NIC Driver
+
+You may modify and redistribute the device driver code under the
+GNU General Public License (a copy of which is attached hereto as
+Exhibit A) published by the Free Software Foundation (version 2).
+
+
+EXHIBIT A
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/Documentation/networking/LICENSE.qlge b/Documentation/networking/LICENSE.qlge
new file mode 100644
index 000000000..ce64e4d15
--- /dev/null
+++ b/Documentation/networking/LICENSE.qlge
@@ -0,0 +1,288 @@
+Copyright (c) 2003-2011 QLogic Corporation
+QLogic Linux qlge NIC Driver
+
+You may modify and redistribute the device driver code under the
+GNU General Public License (a copy of which is attached hereto as
+Exhibit A) published by the Free Software Foundation (version 2).
+
+
+EXHIBIT A
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/Documentation/networking/Makefile b/Documentation/networking/Makefile
new file mode 100644
index 000000000..4c5d7c485
--- /dev/null
+++ b/Documentation/networking/Makefile
@@ -0,0 +1 @@
+subdir-y := timestamping
diff --git a/Documentation/networking/PLIP.txt b/Documentation/networking/PLIP.txt
new file mode 100644
index 000000000..ad7e3f7c3
--- /dev/null
+++ b/Documentation/networking/PLIP.txt
@@ -0,0 +1,215 @@
+PLIP: The Parallel Line Internet Protocol Device
+
+Donald Becker (becker@super.org)
+I.D.A. Supercomputing Research Center, Bowie MD 20715
+
+At some point T. Thorn will probably contribute text,
+Tommy Thorn (tthorn@daimi.aau.dk)
+
+PLIP Introduction
+-----------------
+
+This document describes the parallel port packet pusher for Net/LGX.
+This device interface allows a point-to-point connection between two
+parallel ports to appear as a IP network interface.
+
+What is PLIP?
+=============
+
+PLIP is Parallel Line IP, that is, the transportation of IP packages
+over a parallel port. In the case of a PC, the obvious choice is the
+printer port. PLIP is a non-standard, but [can use] uses the standard
+LapLink null-printer cable [can also work in turbo mode, with a PLIP
+cable]. [The protocol used to pack IP packages, is a simple one
+initiated by Crynwr.]
+
+Advantages of PLIP
+==================
+
+It's cheap, it's available everywhere, and it's easy.
+
+The PLIP cable is all that's needed to connect two Linux boxes, and it
+can be built for very few bucks.
+
+Connecting two Linux boxes takes only a second's decision and a few
+minutes' work, no need to search for a [supported] netcard. This might
+even be especially important in the case of notebooks, where netcards
+are not easily available.
+
+Not requiring a netcard also means that apart from connecting the
+cables, everything else is software configuration [which in principle
+could be made very easy.]
+
+Disadvantages of PLIP
+=====================
+
+Doesn't work over a modem, like SLIP and PPP. Limited range, 15 m.
+Can only be used to connect three (?) Linux boxes. Doesn't connect to
+an existing Ethernet. Isn't standard (not even de facto standard, like
+SLIP).
+
+Performance
+===========
+
+PLIP easily outperforms Ethernet cards....(ups, I was dreaming, but
+it *is* getting late. EOB)
+
+PLIP driver details
+-------------------
+
+The Linux PLIP driver is an implementation of the original Crynwr protocol,
+that uses the parallel port subsystem of the kernel in order to properly
+share parallel ports between PLIP and other services.
+
+IRQs and trigger timeouts
+=========================
+
+When a parallel port used for a PLIP driver has an IRQ configured to it, the
+PLIP driver is signaled whenever data is sent to it via the cable, such that
+when no data is available, the driver isn't being used.
+
+However, on some machines it is hard, if not impossible, to configure an IRQ
+to a certain parallel port, mainly because it is used by some other device.
+On these machines, the PLIP driver can be used in IRQ-less mode, where
+the PLIP driver would constantly poll the parallel port for data waiting,
+and if such data is available, process it. This mode is less efficient than
+the IRQ mode, because the driver has to check the parallel port many times
+per second, even when no data at all is sent. Some rough measurements
+indicate that there isn't a noticeable performance drop when using IRQ-less
+mode as compared to IRQ mode as far as the data transfer speed is involved.
+There is a performance drop on the machine hosting the driver.
+
+When the PLIP driver is used in IRQ mode, the timeout used for triggering a
+data transfer (the maximal time the PLIP driver would allow the other side
+before announcing a timeout, when trying to handshake a transfer of some
+data) is, by default, 500usec. As IRQ delivery is more or less immediate,
+this timeout is quite sufficient.
+
+When in IRQ-less mode, the PLIP driver polls the parallel port HZ times
+per second (where HZ is typically 100 on most platforms, and 1024 on an
+Alpha, as of this writing). Between two such polls, there are 10^6/HZ usecs.
+On an i386, for example, 10^6/100 = 10000usec. It is easy to see that it is
+quite possible for the trigger timeout to expire between two such polls, as
+the timeout is only 500usec long. As a result, it is required to change the
+trigger timeout on the *other* side of a PLIP connection, to about
+10^6/HZ usecs. If both sides of a PLIP connection are used in IRQ-less mode,
+this timeout is required on both sides.
+
+It appears that in practice, the trigger timeout can be shorter than in the
+above calculation. It isn't an important issue, unless the wire is faulty,
+in which case a long timeout would stall the machine when, for whatever
+reason, bits are dropped.
+
+A utility that can perform this change in Linux is plipconfig, which is part
+of the net-tools package (its location can be found in the
+Documentation/Changes file). An example command would be
+'plipconfig plipX trigger 10000', where plipX is the appropriate
+PLIP device.
+
+PLIP hardware interconnection
+-----------------------------
+
+PLIP uses several different data transfer methods. The first (and the
+only one implemented in the early version of the code) uses a standard
+printer "null" cable to transfer data four bits at a time using
+data bit outputs connected to status bit inputs.
+
+The second data transfer method relies on both machines having
+bi-directional parallel ports, rather than output-only ``printer''
+ports. This allows byte-wide transfers and avoids reconstructing
+nibbles into bytes, leading to much faster transfers.
+
+Parallel Transfer Mode 0 Cable
+==============================
+
+The cable for the first transfer mode is a standard
+printer "null" cable which transfers data four bits at a time using
+data bit outputs of the first port (machine T) connected to the
+status bit inputs of the second port (machine R). There are five
+status inputs, and they are used as four data inputs and a clock (data
+strobe) input, arranged so that the data input bits appear as contiguous
+bits with standard status register implementation.
+
+A cable that implements this protocol is available commercially as a
+"Null Printer" or "Turbo Laplink" cable. It can be constructed with
+two DB-25 male connectors symmetrically connected as follows:
+
+ STROBE output 1*
+ D0->ERROR 2 - 15 15 - 2
+ D1->SLCT 3 - 13 13 - 3
+ D2->PAPOUT 4 - 12 12 - 4
+ D3->ACK 5 - 10 10 - 5
+ D4->BUSY 6 - 11 11 - 6
+ D5,D6,D7 are 7*, 8*, 9*
+ AUTOFD output 14*
+ INIT output 16*
+ SLCTIN 17 - 17
+ extra grounds are 18*,19*,20*,21*,22*,23*,24*
+ GROUND 25 - 25
+* Do not connect these pins on either end
+
+If the cable you are using has a metallic shield it should be
+connected to the metallic DB-25 shell at one end only.
+
+Parallel Transfer Mode 1
+========================
+
+The second data transfer method relies on both machines having
+bi-directional parallel ports, rather than output-only ``printer''
+ports. This allows byte-wide transfers, and avoids reconstructing
+nibbles into bytes. This cable should not be used on unidirectional
+``printer'' (as opposed to ``parallel'') ports or when the machine
+isn't configured for PLIP, as it will result in output driver
+conflicts and the (unlikely) possibility of damage.
+
+The cable for this transfer mode should be constructed as follows:
+
+ STROBE->BUSY 1 - 11
+ D0->D0 2 - 2
+ D1->D1 3 - 3
+ D2->D2 4 - 4
+ D3->D3 5 - 5
+ D4->D4 6 - 6
+ D5->D5 7 - 7
+ D6->D6 8 - 8
+ D7->D7 9 - 9
+ INIT -> ACK 16 - 10
+ AUTOFD->PAPOUT 14 - 12
+ SLCT->SLCTIN 13 - 17
+ GND->ERROR 18 - 15
+ extra grounds are 19*,20*,21*,22*,23*,24*
+ GROUND 25 - 25
+* Do not connect these pins on either end
+
+Once again, if the cable you are using has a metallic shield it should
+be connected to the metallic DB-25 shell at one end only.
+
+PLIP Mode 0 transfer protocol
+=============================
+
+The PLIP driver is compatible with the "Crynwr" parallel port transfer
+standard in Mode 0. That standard specifies the following protocol:
+
+ send header nibble '0x8'
+ count-low octet
+ count-high octet
+ ... data octets
+ checksum octet
+
+Each octet is sent as
+ <wait for rx. '0x1?'> <send 0x10+(octet&0x0F)>
+ <wait for rx. '0x0?'> <send 0x00+((octet>>4)&0x0F)>
+
+To start a transfer the transmitting machine outputs a nibble 0x08.
+That raises the ACK line, triggering an interrupt in the receiving
+machine. The receiving machine disables interrupts and raises its own ACK
+line.
+
+Restated:
+
+(OUT is bit 0-4, OUT.j is bit j from OUT. IN likewise)
+Send_Byte:
+ OUT := low nibble, OUT.4 := 1
+ WAIT FOR IN.4 = 1
+ OUT := high nibble, OUT.4 := 0
+ WAIT FOR IN.4 = 0
diff --git a/Documentation/networking/README.ipw2100 b/Documentation/networking/README.ipw2100
new file mode 100644
index 000000000..6f85e1d06
--- /dev/null
+++ b/Documentation/networking/README.ipw2100
@@ -0,0 +1,293 @@
+
+Intel(R) PRO/Wireless 2100 Driver for Linux in support of:
+
+Intel(R) PRO/Wireless 2100 Network Connection
+
+Copyright (C) 2003-2006, Intel Corporation
+
+README.ipw2100
+
+Version: git-1.1.5
+Date : January 25, 2006
+
+Index
+-----------------------------------------------
+0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+1. Introduction
+2. Release git-1.1.5 Current Features
+3. Command Line Parameters
+4. Sysfs Helper Files
+5. Radio Kill Switch
+6. Dynamic Firmware
+7. Power Management
+8. Support
+9. License
+
+
+0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+-----------------------------------------------
+
+Important Notice FOR ALL USERS OR DISTRIBUTORS!!!!
+
+Intel wireless LAN adapters are engineered, manufactured, tested, and
+quality checked to ensure that they meet all necessary local and
+governmental regulatory agency requirements for the regions that they
+are designated and/or marked to ship into. Since wireless LANs are
+generally unlicensed devices that share spectrum with radars,
+satellites, and other licensed and unlicensed devices, it is sometimes
+necessary to dynamically detect, avoid, and limit usage to avoid
+interference with these devices. In many instances Intel is required to
+provide test data to prove regional and local compliance to regional and
+governmental regulations before certification or approval to use the
+product is granted. Intel's wireless LAN's EEPROM, firmware, and
+software driver are designed to carefully control parameters that affect
+radio operation and to ensure electromagnetic compliance (EMC). These
+parameters include, without limitation, RF power, spectrum usage,
+channel scanning, and human exposure.
+
+For these reasons Intel cannot permit any manipulation by third parties
+of the software provided in binary format with the wireless WLAN
+adapters (e.g., the EEPROM and firmware). Furthermore, if you use any
+patches, utilities, or code with the Intel wireless LAN adapters that
+have been manipulated by an unauthorized party (i.e., patches,
+utilities, or code (including open source code modifications) which have
+not been validated by Intel), (i) you will be solely responsible for
+ensuring the regulatory compliance of the products, (ii) Intel will bear
+no liability, under any theory of liability for any issues associated
+with the modified products, including without limitation, claims under
+the warranty and/or issues arising from regulatory non-compliance, and
+(iii) Intel will not provide or be required to assist in providing
+support to any third parties for such modified products.
+
+Note: Many regulatory agencies consider Wireless LAN adapters to be
+modules, and accordingly, condition system-level regulatory approval
+upon receipt and review of test data documenting that the antennas and
+system configuration do not cause the EMC and radio operation to be
+non-compliant.
+
+The drivers available for download from SourceForge are provided as a
+part of a development project. Conformance to local regulatory
+requirements is the responsibility of the individual developer. As
+such, if you are interested in deploying or shipping a driver as part of
+solution intended to be used for purposes other than development, please
+obtain a tested driver from Intel Customer Support at:
+
+http://www.intel.com/support/wireless/sb/CS-006408.htm
+
+1. Introduction
+-----------------------------------------------
+
+This document provides a brief overview of the features supported by the
+IPW2100 driver project. The main project website, where the latest
+development version of the driver can be found, is:
+
+ http://ipw2100.sourceforge.net
+
+There you can find the not only the latest releases, but also information about
+potential fixes and patches, as well as links to the development mailing list
+for the driver project.
+
+
+2. Release git-1.1.5 Current Supported Features
+-----------------------------------------------
+- Managed (BSS) and Ad-Hoc (IBSS)
+- WEP (shared key and open)
+- Wireless Tools support
+- 802.1x (tested with XSupplicant 1.0.1)
+
+Enabled (but not supported) features:
+- Monitor/RFMon mode
+- WPA/WPA2
+
+The distinction between officially supported and enabled is a reflection
+on the amount of validation and interoperability testing that has been
+performed on a given feature.
+
+
+3. Command Line Parameters
+-----------------------------------------------
+
+If the driver is built as a module, the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax:
+
+ modprobe ipw2100 [<option>=<VAL1><,VAL2>...]
+
+For example, to disable the radio on driver loading, enter:
+
+ modprobe ipw2100 disable=1
+
+The ipw2100 driver supports the following module parameters:
+
+Name Value Example:
+debug 0x0-0xffffffff debug=1024
+mode 0,1,2 mode=1 /* AdHoc */
+channel int channel=3 /* Only valid in AdHoc or Monitor */
+associate boolean associate=0 /* Do NOT auto associate */
+disable boolean disable=1 /* Do not power the HW */
+
+
+4. Sysfs Helper Files
+---------------------------
+-----------------------------------------------
+
+There are several ways to control the behavior of the driver. Many of the
+general capabilities are exposed through the Wireless Tools (iwconfig). There
+are a few capabilities that are exposed through entries in the Linux Sysfs.
+
+
+----- Driver Level ------
+For the driver level files, look in /sys/bus/pci/drivers/ipw2100/
+
+ debug_level
+
+ This controls the same global as the 'debug' module parameter. For
+ information on the various debugging levels available, run the 'dvals'
+ script found in the driver source directory.
+
+ NOTE: 'debug_level' is only enabled if CONFIG_IPW2100_DEBUG is turn
+ on.
+
+----- Device Level ------
+For the device level files look in
+
+ /sys/bus/pci/drivers/ipw2100/{PCI-ID}/
+
+For example:
+ /sys/bus/pci/drivers/ipw2100/0000:02:01.0
+
+For the device level files, see /sys/bus/pci/drivers/ipw2100:
+
+ rf_kill
+ read -
+ 0 = RF kill not enabled (radio on)
+ 1 = SW based RF kill active (radio off)
+ 2 = HW based RF kill active (radio off)
+ 3 = Both HW and SW RF kill active (radio off)
+ write -
+ 0 = If SW based RF kill active, turn the radio back on
+ 1 = If radio is on, activate SW based RF kill
+
+ NOTE: If you enable the SW based RF kill and then toggle the HW
+ based RF kill from ON -> OFF -> ON, the radio will NOT come back on
+
+
+5. Radio Kill Switch
+-----------------------------------------------
+Most laptops provide the ability for the user to physically disable the radio.
+Some vendors have implemented this as a physical switch that requires no
+software to turn the radio off and on. On other laptops, however, the switch
+is controlled through a button being pressed and a software driver then making
+calls to turn the radio off and on. This is referred to as a "software based
+RF kill switch"
+
+See the Sysfs helper file 'rf_kill' for determining the state of the RF switch
+on your system.
+
+
+6. Dynamic Firmware
+-----------------------------------------------
+As the firmware is licensed under a restricted use license, it can not be
+included within the kernel sources. To enable the IPW2100 you will need a
+firmware image to load into the wireless NIC's processors.
+
+You can obtain these images from <http://ipw2100.sf.net/firmware.php>.
+
+See INSTALL for instructions on installing the firmware.
+
+
+7. Power Management
+-----------------------------------------------
+The IPW2100 supports the configuration of the Power Save Protocol
+through a private wireless extension interface. The IPW2100 supports
+the following different modes:
+
+ off No power management. Radio is always on.
+ on Automatic power management
+ 1-5 Different levels of power management. The higher the
+ number the greater the power savings, but with an impact to
+ packet latencies.
+
+Power management works by powering down the radio after a certain
+interval of time has passed where no packets are passed through the
+radio. Once powered down, the radio remains in that state for a given
+period of time. For higher power savings, the interval between last
+packet processed to sleep is shorter and the sleep period is longer.
+
+When the radio is asleep, the access point sending data to the station
+must buffer packets at the AP until the station wakes up and requests
+any buffered packets. If you have an AP that does not correctly support
+the PSP protocol you may experience packet loss or very poor performance
+while power management is enabled. If this is the case, you will need
+to try and find a firmware update for your AP, or disable power
+management (via `iwconfig eth1 power off`)
+
+To configure the power level on the IPW2100 you use a combination of
+iwconfig and iwpriv. iwconfig is used to turn power management on, off,
+and set it to auto.
+
+ iwconfig eth1 power off Disables radio power down
+ iwconfig eth1 power on Enables radio power management to
+ last set level (defaults to AUTO)
+ iwpriv eth1 set_power 0 Sets power level to AUTO and enables
+ power management if not previously
+ enabled.
+ iwpriv eth1 set_power 1-5 Set the power level as specified,
+ enabling power management if not
+ previously enabled.
+
+You can view the current power level setting via:
+
+ iwpriv eth1 get_power
+
+It will return the current period or timeout that is configured as a string
+in the form of xxxx/yyyy (z) where xxxx is the timeout interval (amount of
+time after packet processing), yyyy is the period to sleep (amount of time to
+wait before powering the radio and querying the access point for buffered
+packets), and z is the 'power level'. If power management is turned off the
+xxxx/yyyy will be replaced with 'off' -- the level reported will be the active
+level if `iwconfig eth1 power on` is invoked.
+
+
+8. Support
+-----------------------------------------------
+
+For general development information and support,
+go to:
+
+ http://ipw2100.sf.net/
+
+The ipw2100 1.1.0 driver and firmware can be downloaded from:
+
+ http://support.intel.com
+
+For installation support on the ipw2100 1.1.0 driver on Linux kernels
+2.6.8 or greater, email support is available from:
+
+ http://supportmail.intel.com
+
+9. License
+-----------------------------------------------
+
+ Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License (version 2) as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ License Contact Information:
+ James P. Ketrenos <ipw2100-admin@linux.intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
diff --git a/Documentation/networking/README.ipw2200 b/Documentation/networking/README.ipw2200
new file mode 100644
index 000000000..b7658bed4
--- /dev/null
+++ b/Documentation/networking/README.ipw2200
@@ -0,0 +1,472 @@
+
+Intel(R) PRO/Wireless 2915ABG Driver for Linux in support of:
+
+Intel(R) PRO/Wireless 2200BG Network Connection
+Intel(R) PRO/Wireless 2915ABG Network Connection
+
+Note: The Intel(R) PRO/Wireless 2915ABG Driver for Linux and Intel(R)
+PRO/Wireless 2200BG Driver for Linux is a unified driver that works on
+both hardware adapters listed above. In this document the Intel(R)
+PRO/Wireless 2915ABG Driver for Linux will be used to reference the
+unified driver.
+
+Copyright (C) 2004-2006, Intel Corporation
+
+README.ipw2200
+
+Version: 1.1.2
+Date : March 30, 2006
+
+
+Index
+-----------------------------------------------
+0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+1. Introduction
+1.1. Overview of features
+1.2. Module parameters
+1.3. Wireless Extension Private Methods
+1.4. Sysfs Helper Files
+1.5. Supported channels
+2. Ad-Hoc Networking
+3. Interacting with Wireless Tools
+3.1. iwconfig mode
+3.2. iwconfig sens
+4. About the Version Numbers
+5. Firmware installation
+6. Support
+7. License
+
+
+0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+-----------------------------------------------
+
+Important Notice FOR ALL USERS OR DISTRIBUTORS!!!!
+
+Intel wireless LAN adapters are engineered, manufactured, tested, and
+quality checked to ensure that they meet all necessary local and
+governmental regulatory agency requirements for the regions that they
+are designated and/or marked to ship into. Since wireless LANs are
+generally unlicensed devices that share spectrum with radars,
+satellites, and other licensed and unlicensed devices, it is sometimes
+necessary to dynamically detect, avoid, and limit usage to avoid
+interference with these devices. In many instances Intel is required to
+provide test data to prove regional and local compliance to regional and
+governmental regulations before certification or approval to use the
+product is granted. Intel's wireless LAN's EEPROM, firmware, and
+software driver are designed to carefully control parameters that affect
+radio operation and to ensure electromagnetic compliance (EMC). These
+parameters include, without limitation, RF power, spectrum usage,
+channel scanning, and human exposure.
+
+For these reasons Intel cannot permit any manipulation by third parties
+of the software provided in binary format with the wireless WLAN
+adapters (e.g., the EEPROM and firmware). Furthermore, if you use any
+patches, utilities, or code with the Intel wireless LAN adapters that
+have been manipulated by an unauthorized party (i.e., patches,
+utilities, or code (including open source code modifications) which have
+not been validated by Intel), (i) you will be solely responsible for
+ensuring the regulatory compliance of the products, (ii) Intel will bear
+no liability, under any theory of liability for any issues associated
+with the modified products, including without limitation, claims under
+the warranty and/or issues arising from regulatory non-compliance, and
+(iii) Intel will not provide or be required to assist in providing
+support to any third parties for such modified products.
+
+Note: Many regulatory agencies consider Wireless LAN adapters to be
+modules, and accordingly, condition system-level regulatory approval
+upon receipt and review of test data documenting that the antennas and
+system configuration do not cause the EMC and radio operation to be
+non-compliant.
+
+The drivers available for download from SourceForge are provided as a
+part of a development project. Conformance to local regulatory
+requirements is the responsibility of the individual developer. As
+such, if you are interested in deploying or shipping a driver as part of
+solution intended to be used for purposes other than development, please
+obtain a tested driver from Intel Customer Support at:
+
+http://support.intel.com
+
+
+1. Introduction
+-----------------------------------------------
+The following sections attempt to provide a brief introduction to using
+the Intel(R) PRO/Wireless 2915ABG Driver for Linux.
+
+This document is not meant to be a comprehensive manual on
+understanding or using wireless technologies, but should be sufficient
+to get you moving without wires on Linux.
+
+For information on building and installing the driver, see the INSTALL
+file.
+
+
+1.1. Overview of Features
+-----------------------------------------------
+The current release (1.1.2) supports the following features:
+
++ BSS mode (Infrastructure, Managed)
++ IBSS mode (Ad-Hoc)
++ WEP (OPEN and SHARED KEY mode)
++ 802.1x EAP via wpa_supplicant and xsupplicant
++ Wireless Extension support
++ Full B and G rate support (2200 and 2915)
++ Full A rate support (2915 only)
++ Transmit power control
++ S state support (ACPI suspend/resume)
+
+The following features are currently enabled, but not officially
+supported:
+
++ WPA
++ long/short preamble support
++ Monitor mode (aka RFMon)
+
+The distinction between officially supported and enabled is a reflection
+on the amount of validation and interoperability testing that has been
+performed on a given feature.
+
+
+
+1.2. Command Line Parameters
+-----------------------------------------------
+
+Like many modules used in the Linux kernel, the Intel(R) PRO/Wireless
+2915ABG Driver for Linux allows configuration options to be provided
+as module parameters. The most common way to specify a module parameter
+is via the command line.
+
+The general form is:
+
+% modprobe ipw2200 parameter=value
+
+Where the supported parameter are:
+
+ associate
+ Set to 0 to disable the auto scan-and-associate functionality of the
+ driver. If disabled, the driver will not attempt to scan
+ for and associate to a network until it has been configured with
+ one or more properties for the target network, for example configuring
+ the network SSID. Default is 0 (do not auto-associate)
+
+ Example: % modprobe ipw2200 associate=0
+
+ auto_create
+ Set to 0 to disable the auto creation of an Ad-Hoc network
+ matching the channel and network name parameters provided.
+ Default is 1.
+
+ channel
+ channel number for association. The normal method for setting
+ the channel would be to use the standard wireless tools
+ (i.e. `iwconfig eth1 channel 10`), but it is useful sometimes
+ to set this while debugging. Channel 0 means 'ANY'
+
+ debug
+ If using a debug build, this is used to control the amount of debug
+ info is logged. See the 'dvals' and 'load' script for more info on
+ how to use this (the dvals and load scripts are provided as part
+ of the ipw2200 development snapshot releases available from the
+ SourceForge project at http://ipw2200.sf.net)
+
+ led
+ Can be used to turn on experimental LED code.
+ 0 = Off, 1 = On. Default is 1.
+
+ mode
+ Can be used to set the default mode of the adapter.
+ 0 = Managed, 1 = Ad-Hoc, 2 = Monitor
+
+
+1.3. Wireless Extension Private Methods
+-----------------------------------------------
+
+As an interface designed to handle generic hardware, there are certain
+capabilities not exposed through the normal Wireless Tool interface. As
+such, a provision is provided for a driver to declare custom, or
+private, methods. The Intel(R) PRO/Wireless 2915ABG Driver for Linux
+defines several of these to configure various settings.
+
+The general form of using the private wireless methods is:
+
+ % iwpriv $IFNAME method parameters
+
+Where $IFNAME is the interface name the device is registered with
+(typically eth1, customized via one of the various network interface
+name managers, such as ifrename)
+
+The supported private methods are:
+
+ get_mode
+ Can be used to report out which IEEE mode the driver is
+ configured to support. Example:
+
+ % iwpriv eth1 get_mode
+ eth1 get_mode:802.11bg (6)
+
+ set_mode
+ Can be used to configure which IEEE mode the driver will
+ support.
+
+ Usage:
+ % iwpriv eth1 set_mode {mode}
+ Where {mode} is a number in the range 1-7:
+ 1 802.11a (2915 only)
+ 2 802.11b
+ 3 802.11ab (2915 only)
+ 4 802.11g
+ 5 802.11ag (2915 only)
+ 6 802.11bg
+ 7 802.11abg (2915 only)
+
+ get_preamble
+ Can be used to report configuration of preamble length.
+
+ set_preamble
+ Can be used to set the configuration of preamble length:
+
+ Usage:
+ % iwpriv eth1 set_preamble {mode}
+ Where {mode} is one of:
+ 1 Long preamble only
+ 0 Auto (long or short based on connection)
+
+
+1.4. Sysfs Helper Files:
+-----------------------------------------------
+
+The Linux kernel provides a pseudo file system that can be used to
+access various components of the operating system. The Intel(R)
+PRO/Wireless 2915ABG Driver for Linux exposes several configuration
+parameters through this mechanism.
+
+An entry in the sysfs can support reading and/or writing. You can
+typically query the contents of a sysfs entry through the use of cat,
+and can set the contents via echo. For example:
+
+% cat /sys/bus/pci/drivers/ipw2200/debug_level
+
+Will report the current debug level of the driver's logging subsystem
+(only available if CONFIG_IPW2200_DEBUG was configured when the driver
+was built).
+
+You can set the debug level via:
+
+% echo $VALUE > /sys/bus/pci/drivers/ipw2200/debug_level
+
+Where $VALUE would be a number in the case of this sysfs entry. The
+input to sysfs files does not have to be a number. For example, the
+firmware loader used by hotplug utilizes sysfs entries for transferring
+the firmware image from user space into the driver.
+
+The Intel(R) PRO/Wireless 2915ABG Driver for Linux exposes sysfs entries
+at two levels -- driver level, which apply to all instances of the driver
+(in the event that there are more than one device installed) and device
+level, which applies only to the single specific instance.
+
+
+1.4.1 Driver Level Sysfs Helper Files
+-----------------------------------------------
+
+For the driver level files, look in /sys/bus/pci/drivers/ipw2200/
+
+ debug_level
+
+ This controls the same global as the 'debug' module parameter
+
+
+
+1.4.2 Device Level Sysfs Helper Files
+-----------------------------------------------
+
+For the device level files, look in
+
+ /sys/bus/pci/drivers/ipw2200/{PCI-ID}/
+
+For example:
+ /sys/bus/pci/drivers/ipw2200/0000:02:01.0
+
+For the device level files, see /sys/bus/pci/drivers/ipw2200:
+
+ rf_kill
+ read -
+ 0 = RF kill not enabled (radio on)
+ 1 = SW based RF kill active (radio off)
+ 2 = HW based RF kill active (radio off)
+ 3 = Both HW and SW RF kill active (radio off)
+ write -
+ 0 = If SW based RF kill active, turn the radio back on
+ 1 = If radio is on, activate SW based RF kill
+
+ NOTE: If you enable the SW based RF kill and then toggle the HW
+ based RF kill from ON -> OFF -> ON, the radio will NOT come back on
+
+ ucode
+ read-only access to the ucode version number
+
+ led
+ read -
+ 0 = LED code disabled
+ 1 = LED code enabled
+ write -
+ 0 = Disable LED code
+ 1 = Enable LED code
+
+ NOTE: The LED code has been reported to hang some systems when
+ running ifconfig and is therefore disabled by default.
+
+
+1.5. Supported channels
+-----------------------------------------------
+
+Upon loading the Intel(R) PRO/Wireless 2915ABG Driver for Linux, a
+message stating the detected geography code and the number of 802.11
+channels supported by the card will be displayed in the log.
+
+The geography code corresponds to a regulatory domain as shown in the
+table below.
+
+ Supported channels
+Code Geography 802.11bg 802.11a
+
+--- Restricted 11 0
+ZZF Custom US/Canada 11 8
+ZZD Rest of World 13 0
+ZZA Custom USA & Europe & High 11 13
+ZZB Custom NA & Europe 11 13
+ZZC Custom Japan 11 4
+ZZM Custom 11 0
+ZZE Europe 13 19
+ZZJ Custom Japan 14 4
+ZZR Rest of World 14 0
+ZZH High Band 13 4
+ZZG Custom Europe 13 4
+ZZK Europe 13 24
+ZZL Europe 11 13
+
+
+2. Ad-Hoc Networking
+-----------------------------------------------
+
+When using a device in an Ad-Hoc network, it is useful to understand the
+sequence and requirements for the driver to be able to create, join, or
+merge networks.
+
+The following attempts to provide enough information so that you can
+have a consistent experience while using the driver as a member of an
+Ad-Hoc network.
+
+2.1. Joining an Ad-Hoc Network
+-----------------------------------------------
+
+The easiest way to get onto an Ad-Hoc network is to join one that
+already exists.
+
+2.2. Creating an Ad-Hoc Network
+-----------------------------------------------
+
+An Ad-Hoc networks is created using the syntax of the Wireless tool.
+
+For Example:
+iwconfig eth1 mode ad-hoc essid testing channel 2
+
+2.3. Merging Ad-Hoc Networks
+-----------------------------------------------
+
+
+3. Interaction with Wireless Tools
+-----------------------------------------------
+
+3.1 iwconfig mode
+-----------------------------------------------
+
+When configuring the mode of the adapter, all run-time configured parameters
+are reset to the value used when the module was loaded. This includes
+channels, rates, ESSID, etc.
+
+3.2 iwconfig sens
+-----------------------------------------------
+
+The 'iwconfig ethX sens XX' command will not set the signal sensitivity
+threshold, as described in iwconfig documentation, but rather the number
+of consecutive missed beacons that will trigger handover, i.e. roaming
+to another access point. At the same time, it will set the disassociation
+threshold to 3 times the given value.
+
+
+4. About the Version Numbers
+-----------------------------------------------
+
+Due to the nature of open source development projects, there are
+frequently changes being incorporated that have not gone through
+a complete validation process. These changes are incorporated into
+development snapshot releases.
+
+Releases are numbered with a three level scheme:
+
+ major.minor.development
+
+Any version where the 'development' portion is 0 (for example
+1.0.0, 1.1.0, etc.) indicates a stable version that will be made
+available for kernel inclusion.
+
+Any version where the 'development' portion is not a 0 (for
+example 1.0.1, 1.1.5, etc.) indicates a development version that is
+being made available for testing and cutting edge users. The stability
+and functionality of the development releases are not know. We make
+efforts to try and keep all snapshots reasonably stable, but due to the
+frequency of their release, and the desire to get those releases
+available as quickly as possible, unknown anomalies should be expected.
+
+The major version number will be incremented when significant changes
+are made to the driver. Currently, there are no major changes planned.
+
+5. Firmware installation
+----------------------------------------------
+
+The driver requires a firmware image, download it and extract the
+files under /lib/firmware (or wherever your hotplug's firmware.agent
+will look for firmware files)
+
+The firmware can be downloaded from the following URL:
+
+ http://ipw2200.sf.net/
+
+
+6. Support
+-----------------------------------------------
+
+For direct support of the 1.0.0 version, you can contact
+http://supportmail.intel.com, or you can use the open source project
+support.
+
+For general information and support, go to:
+
+ http://ipw2200.sf.net/
+
+
+7. License
+-----------------------------------------------
+
+ Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License version 2 as
+ published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc., 59
+ Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ The full GNU General Public License is included in this distribution in the
+ file called LICENSE.
+
+ Contact Information:
+ James P. Ketrenos <ipw2100-admin@linux.intel.com>
+ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
diff --git a/Documentation/networking/README.sb1000 b/Documentation/networking/README.sb1000
new file mode 100644
index 000000000..f92c2aac5
--- /dev/null
+++ b/Documentation/networking/README.sb1000
@@ -0,0 +1,207 @@
+sb1000 is a module network device driver for the General Instrument (also known
+as NextLevel) SURFboard1000 internal cable modem board. This is an ISA card
+which is used by a number of cable TV companies to provide cable modem access.
+It's a one-way downstream-only cable modem, meaning that your upstream net link
+is provided by your regular phone modem.
+
+This driver was written by Franco Venturi <fventuri@mediaone.net>. He deserves
+a great deal of thanks for this wonderful piece of code!
+
+-----------------------------------------------------------------------------
+
+Support for this device is now a part of the standard Linux kernel. The
+driver source code file is drivers/net/sb1000.c. In addition to this
+you will need:
+
+1.) The "cmconfig" program. This is a utility which supplements "ifconfig"
+to configure the cable modem and network interface (usually called "cm0");
+and
+
+2.) Several PPP scripts which live in /etc/ppp to make connecting via your
+cable modem easy.
+
+ These utilities can be obtained from:
+
+ http://www.jacksonville.net/~fventuri/
+
+ in Franco's original source code distribution .tar.gz file. Support for
+ the sb1000 driver can be found at:
+
+ http://web.archive.org/web/*/http://home.adelphia.net/~siglercm/sb1000.html
+ http://web.archive.org/web/*/http://linuxpower.cx/~cable/
+
+ along with these utilities.
+
+3.) The standard isapnp tools. These are necessary to configure your SB1000
+card at boot time (or afterwards by hand) since it's a PnP card.
+
+ If you don't have these installed as a standard part of your Linux
+ distribution, you can find them at:
+
+ http://www.roestock.demon.co.uk/isapnptools/
+
+ or check your Linux distribution binary CD or their web site. For help with
+ isapnp, pnpdump, or /etc/isapnp.conf, go to:
+
+ http://www.roestock.demon.co.uk/isapnptools/isapnpfaq.html
+
+-----------------------------------------------------------------------------
+
+To make the SB1000 card work, follow these steps:
+
+1.) Run `make config', or `make menuconfig', or `make xconfig', whichever
+you prefer, in the top kernel tree directory to set up your kernel
+configuration. Make sure to say "Y" to "Prompt for development drivers"
+and to say "M" to the sb1000 driver. Also say "Y" or "M" to all the standard
+networking questions to get TCP/IP and PPP networking support.
+
+2.) *BEFORE* you build the kernel, edit drivers/net/sb1000.c. Make sure
+to redefine the value of READ_DATA_PORT to match the I/O address used
+by isapnp to access your PnP cards. This is the value of READPORT in
+/etc/isapnp.conf or given by the output of pnpdump.
+
+3.) Build and install the kernel and modules as usual.
+
+4.) Boot your new kernel following the usual procedures.
+
+5.) Set up to configure the new SB1000 PnP card by capturing the output
+of "pnpdump" to a file and editing this file to set the correct I/O ports,
+IRQ, and DMA settings for all your PnP cards. Make sure none of the settings
+conflict with one another. Then test this configuration by running the
+"isapnp" command with your new config file as the input. Check for
+errors and fix as necessary. (As an aside, I use I/O ports 0x110 and
+0x310 and IRQ 11 for my SB1000 card and these work well for me. YMMV.)
+Then save the finished config file as /etc/isapnp.conf for proper configuration
+on subsequent reboots.
+
+6.) Download the original file sb1000-1.1.2.tar.gz from Franco's site or one of
+the others referenced above. As root, unpack it into a temporary directory and
+do a `make cmconfig' and then `install -c cmconfig /usr/local/sbin'. Don't do
+`make install' because it expects to find all the utilities built and ready for
+installation, not just cmconfig.
+
+7.) As root, copy all the files under the ppp/ subdirectory in Franco's
+tar file into /etc/ppp, being careful not to overwrite any files that are
+already in there. Then modify ppp@gi-on to set the correct login name,
+phone number, and frequency for the cable modem. Also edit pap-secrets
+to specify your login name and password and any site-specific information
+you need.
+
+8.) Be sure to modify /etc/ppp/firewall to use ipchains instead of
+the older ipfwadm commands from the 2.0.x kernels. There's a neat utility to
+convert ipfwadm commands to ipchains commands:
+
+ http://users.dhp.com/~whisper/ipfwadm2ipchains/
+
+You may also wish to modify the firewall script to implement a different
+firewalling scheme.
+
+9.) Start the PPP connection via the script /etc/ppp/ppp@gi-on. You must be
+root to do this. It's better to use a utility like sudo to execute
+frequently used commands like this with root permissions if possible. If you
+connect successfully the cable modem interface will come up and you'll see a
+driver message like this at the console:
+
+ cm0: sb1000 at (0x110,0x310), csn 1, S/N 0x2a0d16d8, IRQ 11.
+ sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)
+
+The "ifconfig" command should show two new interfaces, ppp0 and cm0.
+The command "cmconfig cm0" will give you information about the cable modem
+interface.
+
+10.) Try pinging a site via `ping -c 5 www.yahoo.com', for example. You should
+see packets received.
+
+11.) If you can't get site names (like www.yahoo.com) to resolve into
+IP addresses (like 204.71.200.67), be sure your /etc/resolv.conf file
+has no syntax errors and has the right nameserver IP addresses in it.
+If this doesn't help, try something like `ping -c 5 204.71.200.67' to
+see if the networking is running but the DNS resolution is where the
+problem lies.
+
+12.) If you still have problems, go to the support web sites mentioned above
+and read the information and documentation there.
+
+-----------------------------------------------------------------------------
+
+Common problems:
+
+1.) Packets go out on the ppp0 interface but don't come back on the cm0
+interface. It looks like I'm connected but I can't even ping any
+numerical IP addresses. (This happens predominantly on Debian systems due
+to a default boot-time configuration script.)
+
+Solution -- As root `echo 0 > /proc/sys/net/ipv4/conf/cm0/rp_filter' so it
+can share the same IP address as the ppp0 interface. Note that this
+command should probably be added to the /etc/ppp/cablemodem script
+*right*between* the "/sbin/ifconfig" and "/sbin/cmconfig" commands.
+You may need to do this to /proc/sys/net/ipv4/conf/ppp0/rp_filter as well.
+If you do this to /proc/sys/net/ipv4/conf/default/rp_filter on each reboot
+(in rc.local or some such) then any interfaces can share the same IP
+addresses.
+
+2.) I get "unresolved symbol" error messages on executing `insmod sb1000.o'.
+
+Solution -- You probably have a non-matching kernel source tree and
+/usr/include/linux and /usr/include/asm header files. Make sure you
+install the correct versions of the header files in these two directories.
+Then rebuild and reinstall the kernel.
+
+3.) When isapnp runs it reports an error, and my SB1000 card isn't working.
+
+Solution -- There's a problem with later versions of isapnp using the "(CHECK)"
+option in the lines that allocate the two I/O addresses for the SB1000 card.
+This first popped up on RH 6.0. Delete "(CHECK)" for the SB1000 I/O addresses.
+Make sure they don't conflict with any other pieces of hardware first! Then
+rerun isapnp and go from there.
+
+4.) I can't execute the /etc/ppp/ppp@gi-on file.
+
+Solution -- As root do `chmod ug+x /etc/ppp/ppp@gi-on'.
+
+5.) The firewall script isn't working (with 2.2.x and higher kernels).
+
+Solution -- Use the ipfwadm2ipchains script referenced above to convert the
+/etc/ppp/firewall script from the deprecated ipfwadm commands to ipchains.
+
+6.) I'm getting *tons* of firewall deny messages in the /var/kern.log,
+/var/messages, and/or /var/syslog files, and they're filling up my /var
+partition!!!
+
+Solution -- First, tell your ISP that you're receiving DoS (Denial of Service)
+and/or portscanning (UDP connection attempts) attacks! Look over the deny
+messages to figure out what the attack is and where it's coming from. Next,
+edit /etc/ppp/cablemodem and make sure the ",nobroadcast" option is turned on
+to the "cmconfig" command (uncomment that line). If you're not receiving these
+denied packets on your broadcast interface (IP address xxx.yyy.zzz.255
+typically), then someone is attacking your machine in particular. Be careful
+out there....
+
+7.) Everything seems to work fine but my computer locks up after a while
+(and typically during a lengthy download through the cable modem)!
+
+Solution -- You may need to add a short delay in the driver to 'slow down' the
+SURFboard because your PC might not be able to keep up with the transfer rate
+of the SB1000. To do this, it's probably best to download Franco's
+sb1000-1.1.2.tar.gz archive and build and install sb1000.o manually. You'll
+want to edit the 'Makefile' and look for the 'SB1000_DELAY'
+define. Uncomment those 'CFLAGS' lines (and comment out the default ones)
+and try setting the delay to something like 60 microseconds with:
+'-DSB1000_DELAY=60'. Then do `make' and as root `make install' and try
+it out. If it still doesn't work or you like playing with the driver, you may
+try other numbers. Remember though that the higher the delay, the slower the
+driver (which slows down the rest of the PC too when it is actively
+used). Thanks to Ed Daiga for this tip!
+
+-----------------------------------------------------------------------------
+
+Credits: This README came from Franco Venturi's original README file which is
+still supplied with his driver .tar.gz archive. I and all other sb1000 users
+owe Franco a tremendous "Thank you!" Additional thanks goes to Carl Patten
+and Ralph Bonnell who are now managing the Linux SB1000 web site, and to
+the SB1000 users who reported and helped debug the common problems listed
+above.
+
+
+ Clemmitt Sigler
+ csigler@vt.edu
diff --git a/Documentation/networking/alias.txt b/Documentation/networking/alias.txt
new file mode 100644
index 000000000..85046f53f
--- /dev/null
+++ b/Documentation/networking/alias.txt
@@ -0,0 +1,40 @@
+
+IP-Aliasing:
+============
+
+IP-aliases are an obsolete way to manage multiple IP-addresses/masks
+per interface. Newer tools such as iproute2 support multiple
+address/prefixes per interface, but aliases are still supported
+for backwards compatibility.
+
+An alias is formed by adding a colon and a string when running ifconfig.
+This string is usually numeric, but this is not a must.
+
+o Alias creation.
+ Alias creation is done by 'magic' interface naming: eg. to create a
+ 200.1.1.1 alias for eth0 ...
+
+ # ifconfig eth0:0 200.1.1.1 etc,etc....
+ ~~ -> request alias #0 creation (if not yet exists) for eth0
+
+ The corresponding route is also set up by this command.
+ Please note: The route always points to the base interface.
+
+
+o Alias deletion.
+ The alias is removed by shutting the alias down:
+
+ # ifconfig eth0:0 down
+ ~~~~~~~~~~ -> will delete alias
+
+
+o Alias (re-)configuring
+
+ Aliases are not real devices, but programs should be able to configure and
+ refer to them as usual (ifconfig, route, etc).
+
+
+o Relationship with main device
+
+ If the base device is shut down the added aliases will be deleted
+ too.
diff --git a/Documentation/networking/altera_tse.txt b/Documentation/networking/altera_tse.txt
new file mode 100644
index 000000000..3f24df8c6
--- /dev/null
+++ b/Documentation/networking/altera_tse.txt
@@ -0,0 +1,263 @@
+ Altera Triple-Speed Ethernet MAC driver
+
+Copyright (C) 2008-2014 Altera Corporation
+
+This is the driver for the Altera Triple-Speed Ethernet (TSE) controllers
+using the SGDMA and MSGDMA soft DMA IP components. The driver uses the
+platform bus to obtain component resources. The designs used to test this
+driver were built for a Cyclone(R) V SOC FPGA board, a Cyclone(R) V FPGA board,
+and tested with ARM and NIOS processor hosts seperately. The anticipated use
+cases are simple communications between an embedded system and an external peer
+for status and simple configuration of the embedded system.
+
+For more information visit www.altera.com and www.rocketboards.org. Support
+forums for the driver may be found on www.rocketboards.org, and a design used
+to test this driver may be found there as well. Support is also available from
+the maintainer of this driver, found in MAINTAINERS.
+
+The Triple-Speed Ethernet, SGDMA, and MSGDMA components are all soft IP
+components that can be assembled and built into an FPGA using the Altera
+Quartus toolchain. Quartus 13.1 and 14.0 were used to build the design that
+this driver was tested against. The sopc2dts tool is used to create the
+device tree for the driver, and may be found at rocketboards.org.
+
+The driver probe function examines the device tree and determines if the
+Triple-Speed Ethernet instance is using an SGDMA or MSGDMA component. The
+probe function then installs the appropriate set of DMA routines to
+initialize, setup transmits, receives, and interrupt handling primitives for
+the respective configurations.
+
+The SGDMA component is to be deprecated in the near future (over the next 1-2
+years as of this writing in early 2014) in favor of the MSGDMA component.
+SGDMA support is included for existing designs and reference in case a
+developer wishes to support their own soft DMA logic and driver support. Any
+new designs should not use the SGDMA.
+
+The SGDMA supports only a single transmit or receive operation at a time, and
+therefore will not perform as well compared to the MSGDMA soft IP. Please
+visit www.altera.com for known, documented SGDMA errata.
+
+Scatter-gather DMA is not supported by the SGDMA or MSGDMA at this time.
+Scatter-gather DMA will be added to a future maintenance update to this
+driver.
+
+Jumbo frames are not supported at this time.
+
+The driver limits PHY operations to 10/100Mbps, and has not yet been fully
+tested for 1Gbps. This support will be added in a future maintenance update.
+
+1) Kernel Configuration
+The kernel configuration option is ALTERA_TSE:
+ Device Drivers ---> Network device support ---> Ethernet driver support --->
+ Altera Triple-Speed Ethernet MAC support (ALTERA_TSE)
+
+2) Driver parameters list:
+ debug: message level (0: no output, 16: all);
+ dma_rx_num: Number of descriptors in the RX list (default is 64);
+ dma_tx_num: Number of descriptors in the TX list (default is 64).
+
+3) Command line options
+Driver parameters can be also passed in command line by using:
+ altera_tse=dma_rx_num:128,dma_tx_num:512
+
+4) Driver information and notes
+
+4.1) Transmit process
+When the driver's transmit routine is called by the kernel, it sets up a
+transmit descriptor by calling the underlying DMA transmit routine (SGDMA or
+MSGDMA), and initites a transmit operation. Once the transmit is complete, an
+interrupt is driven by the transmit DMA logic. The driver handles the transmit
+completion in the context of the interrupt handling chain by recycling
+resource required to send and track the requested transmit operation.
+
+4.2) Receive process
+The driver will post receive buffers to the receive DMA logic during driver
+intialization. Receive buffers may or may not be queued depending upon the
+underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able
+to queue receive buffers to the SGDMA receive logic). When a packet is
+received, the DMA logic generates an interrupt. The driver handles a receive
+interrupt by obtaining the DMA receive logic status, reaping receive
+completions until no more receive completions are available.
+
+4.3) Interrupt Mitigation
+The driver is able to mitigate the number of its DMA interrupts
+using NAPI for receive operations. Interrupt mitigation is not yet supported
+for transmit operations, but will be added in a future maintenance release.
+
+4.4) Ethtool support
+Ethtool is supported. Driver statistics and internal errors can be taken using:
+ethtool -S ethX command. It is possible to dump registers etc.
+
+4.5) PHY Support
+The driver is compatible with PAL to work with PHY and GPHY devices.
+
+4.7) List of source files:
+ o Kconfig
+ o Makefile
+ o altera_tse_main.c: main network device driver
+ o altera_tse_ethtool.c: ethtool support
+ o altera_tse.h: private driver structure and common definitions
+ o altera_msgdma.h: MSGDMA implementation function definitions
+ o altera_sgdma.h: SGDMA implementation function definitions
+ o altera_msgdma.c: MSGDMA implementation
+ o altera_sgdma.c: SGDMA implementation
+ o altera_sgdmahw.h: SGDMA register and descriptor definitions
+ o altera_msgdmahw.h: MSGDMA register and descriptor definitions
+ o altera_utils.c: Driver utility functions
+ o altera_utils.h: Driver utility function definitions
+
+5) Debug Information
+
+The driver exports debug information such as internal statistics,
+debug information, MAC and DMA registers etc.
+
+A user may use the ethtool support to get statistics:
+e.g. using: ethtool -S ethX (that shows the statistics counters)
+or sees the MAC registers: e.g. using: ethtool -d ethX
+
+The developer can also use the "debug" module parameter to get
+further debug information.
+
+6) Statistics Support
+
+The controller and driver support a mix of IEEE standard defined statistics,
+RFC defined statistics, and driver or Altera defined statistics. The four
+specifications containing the standard definitions for these statistics are
+as follows:
+
+ o IEEE 802.3-2012 - IEEE Standard for Ethernet.
+ o RFC 2863 found at http://www.rfc-editor.org/rfc/rfc2863.txt.
+ o RFC 2819 found at http://www.rfc-editor.org/rfc/rfc2819.txt.
+ o Altera Triple Speed Ethernet User Guide, found at http://www.altera.com
+
+The statistics supported by the TSE and the device driver are as follows:
+
+"tx_packets" is equivalent to aFramesTransmittedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.2. This statistics is the count of frames that are successfully
+transmitted.
+
+"rx_packets" is equivalent to aFramesReceivedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.5. This statistic is the count of frames that are successfully
+received. This count does not include any error packets such as CRC errors,
+length errors, or alignment errors.
+
+"rx_crc_errors" is equivalent to aFrameCheckSequenceErrors defined in IEEE
+802.3-2012, Section 5.2.2.1.6. This statistic is the count of frames that are
+an integral number of bytes in length and do not pass the CRC test as the frame
+is received.
+
+"rx_align_errors" is equivalent to aAlignmentErrors defined in IEEE 802.3-2012,
+Section 5.2.2.1.7. This statistic is the count of frames that are not an
+integral number of bytes in length and do not pass the CRC test as the frame is
+received.
+
+"tx_bytes" is equivalent to aOctetsTransmittedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.8. This statistic is the count of data and pad bytes
+successfully transmitted from the interface.
+
+"rx_bytes" is equivalent to aOctetsReceivedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.14. This statistic is the count of data and pad bytes
+successfully received by the controller.
+
+"tx_pause" is equivalent to aPAUSEMACCtrlFramesTransmitted defined in IEEE
+802.3-2012, Section 30.3.4.2. This statistic is a count of PAUSE frames
+transmitted from the network controller.
+
+"rx_pause" is equivalent to aPAUSEMACCtrlFramesReceived defined in IEEE
+802.3-2012, Section 30.3.4.3. This statistic is a count of PAUSE frames
+received by the network controller.
+
+"rx_errors" is equivalent to ifInErrors defined in RFC 2863. This statistic is
+a count of the number of packets received containing errors that prevented the
+packet from being delivered to a higher level protocol.
+
+"tx_errors" is equivalent to ifOutErrors defined in RFC 2863. This statistic
+is a count of the number of packets that could not be transmitted due to errors.
+
+"rx_unicast" is equivalent to ifInUcastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were not addressed
+to the broadcast address or a multicast group.
+
+"rx_multicast" is equivalent to ifInMulticastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were addressed to
+a multicast address group.
+
+"rx_broadcast" is equivalent to ifInBroadcastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were addressed to
+the broadcast address.
+
+"tx_discards" is equivalent to ifOutDiscards defined in RFC 2863. This
+statistic is the number of outbound packets not transmitted even though an
+error was not detected. An example of a reason this might occur is to free up
+internal buffer space.
+
+"tx_unicast" is equivalent to ifOutUcastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were not addressed to
+a multicast group or broadcast address.
+
+"tx_multicast" is equivalent to ifOutMulticastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were addressed to a
+multicast group.
+
+"tx_broadcast" is equivalent to ifOutBroadcastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were addressed to a
+broadcast address.
+
+"ether_drops" is equivalent to etherStatsDropEvents defined in RFC 2819.
+This statistic counts the number of packets dropped due to lack of internal
+controller resources.
+
+"rx_total_bytes" is equivalent to etherStatsOctets defined in RFC 2819.
+This statistic counts the total number of bytes received by the controller,
+including error and discarded packets.
+
+"rx_total_packets" is equivalent to etherStatsPkts defined in RFC 2819.
+This statistic counts the total number of packets received by the controller,
+including error, discarded, unicast, multicast, and broadcast packets.
+
+"rx_undersize" is equivalent to etherStatsUndersizePkts defined in RFC 2819.
+This statistic counts the number of correctly formed packets received less
+than 64 bytes long.
+
+"rx_oversize" is equivalent to etherStatsOversizePkts defined in RFC 2819.
+This statistic counts the number of correctly formed packets greater than 1518
+bytes long.
+
+"rx_64_bytes" is equivalent to etherStatsPkts64Octets defined in RFC 2819.
+This statistic counts the total number of packets received that were 64 octets
+in length.
+
+"rx_65_127_bytes" is equivalent to etherStatsPkts65to127Octets defined in RFC
+2819. This statistic counts the total number of packets received that were
+between 65 and 127 octets in length inclusive.
+
+"rx_128_255_bytes" is equivalent to etherStatsPkts128to255Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 128 and 255 octets in length inclusive.
+
+"rx_256_511_bytes" is equivalent to etherStatsPkts256to511Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 256 and 511 octets in length inclusive.
+
+"rx_512_1023_bytes" is equivalent to etherStatsPkts512to1023Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 512 and 1023 octets in length inclusive.
+
+"rx_1024_1518_bytes" is equivalent to etherStatsPkts1024to1518Octets define
+in RFC 2819. This statistic is the total number of packets received that were
+between 1024 and 1518 octets in length inclusive.
+
+"rx_gte_1519_bytes" is a statistic defined specific to the behavior of the
+Altera TSE. This statistics counts the number of received good and errored
+frames between the length of 1519 and the maximum frame length configured
+in the frm_length register. See the Altera TSE User Guide for More details.
+
+"rx_jabbers" is equivalent to etherStatsJabbers defined in RFC 2819. This
+statistic is the total number of packets received that were longer than 1518
+octets, and had either a bad CRC with an integral number of octets (CRC Error)
+or a bad CRC with a non-integral number of octets (Alignment Error).
+
+"rx_runts" is equivalent to etherStatsFragments defined in RFC 2819. This
+statistic is the total number of packets received that were less than 64 octets
+in length and had either a bad CRC with an integral number of octets (CRC
+error) or a bad CRC with a non-integral number of octets (Alignment Error).
diff --git a/Documentation/networking/arcnet-hardware.txt b/Documentation/networking/arcnet-hardware.txt
new file mode 100644
index 000000000..731de4115
--- /dev/null
+++ b/Documentation/networking/arcnet-hardware.txt
@@ -0,0 +1,3133 @@
+
+-----------------------------------------------------------------------------
+1) This file is a supplement to arcnet.txt. Please read that for general
+ driver configuration help.
+-----------------------------------------------------------------------------
+2) This file is no longer Linux-specific. It should probably be moved out of
+ the kernel sources. Ideas?
+-----------------------------------------------------------------------------
+
+Because so many people (myself included) seem to have obtained ARCnet cards
+without manuals, this file contains a quick introduction to ARCnet hardware,
+some cabling tips, and a listing of all jumper settings I can find. Please
+e-mail apenwarr@worldvisions.ca with any settings for your particular card,
+or any other information you have!
+
+
+INTRODUCTION TO ARCNET
+----------------------
+
+ARCnet is a network type which works in a way similar to popular Ethernet
+networks but which is also different in some very important ways.
+
+First of all, you can get ARCnet cards in at least two speeds: 2.5 Mbps
+(slower than Ethernet) and 100 Mbps (faster than normal Ethernet). In fact,
+there are others as well, but these are less common. The different hardware
+types, as far as I'm aware, are not compatible and so you cannot wire a
+100 Mbps card to a 2.5 Mbps card, and so on. From what I hear, my driver does
+work with 100 Mbps cards, but I haven't been able to verify this myself,
+since I only have the 2.5 Mbps variety. It is probably not going to saturate
+your 100 Mbps card. Stop complaining. :)
+
+You also cannot connect an ARCnet card to any kind of Ethernet card and
+expect it to work.
+
+There are two "types" of ARCnet - STAR topology and BUS topology. This
+refers to how the cards are meant to be wired together. According to most
+available documentation, you can only connect STAR cards to STAR cards and
+BUS cards to BUS cards. That makes sense, right? Well, it's not quite
+true; see below under "Cabling."
+
+Once you get past these little stumbling blocks, ARCnet is actually quite a
+well-designed standard. It uses something called "modified token passing"
+which makes it completely incompatible with so-called "Token Ring" cards,
+but which makes transfers much more reliable than Ethernet does. In fact,
+ARCnet will guarantee that a packet arrives safely at the destination, and
+even if it can't possibly be delivered properly (ie. because of a cable
+break, or because the destination computer does not exist) it will at least
+tell the sender about it.
+
+Because of the carefully defined action of the "token", it will always make
+a pass around the "ring" within a maximum length of time. This makes it
+useful for realtime networks.
+
+In addition, all known ARCnet cards have an (almost) identical programming
+interface. This means that with one ARCnet driver you can support any
+card, whereas with Ethernet each manufacturer uses what is sometimes a
+completely different programming interface, leading to a lot of different,
+sometimes very similar, Ethernet drivers. Of course, always using the same
+programming interface also means that when high-performance hardware
+facilities like PCI bus mastering DMA appear, it's hard to take advantage of
+them. Let's not go into that.
+
+One thing that makes ARCnet cards difficult to program for, however, is the
+limit on their packet sizes; standard ARCnet can only send packets that are
+up to 508 bytes in length. This is smaller than the Internet "bare minimum"
+of 576 bytes, let alone the Ethernet MTU of 1500. To compensate, an extra
+level of encapsulation is defined by RFC1201, which I call "packet
+splitting," that allows "virtual packets" to grow as large as 64K each,
+although they are generally kept down to the Ethernet-style 1500 bytes.
+
+For more information on the advantages and disadvantages (mostly the
+advantages) of ARCnet networks, you might try the "ARCnet Trade Association"
+WWW page:
+ http://www.arcnet.com
+
+
+CABLING ARCNET NETWORKS
+-----------------------
+
+This section was rewritten by
+ Vojtech Pavlik <vojtech@suse.cz>
+using information from several people, including:
+ Avery Pennraun <apenwarr@worldvisions.ca>
+ Stephen A. Wood <saw@hallc1.cebaf.gov>
+ John Paul Morrison <jmorriso@bogomips.ee.ubc.ca>
+ Joachim Koenig <jojo@repas.de>
+and Avery touched it up a bit, at Vojtech's request.
+
+ARCnet (the classic 2.5 Mbps version) can be connected by two different
+types of cabling: coax and twisted pair. The other ARCnet-type networks
+(100 Mbps TCNS and 320 kbps - 32 Mbps ARCnet Plus) use different types of
+cabling (Type1, Fiber, C1, C4, C5).
+
+For a coax network, you "should" use 93 Ohm RG-62 cable. But other cables
+also work fine, because ARCnet is a very stable network. I personally use 75
+Ohm TV antenna cable.
+
+Cards for coax cabling are shipped in two different variants: for BUS and
+STAR network topologies. They are mostly the same. The only difference
+lies in the hybrid chip installed. BUS cards use high impedance output,
+while STAR use low impedance. Low impedance card (STAR) is electrically
+equal to a high impedance one with a terminator installed.
+
+Usually, the ARCnet networks are built up from STAR cards and hubs. There
+are two types of hubs - active and passive. Passive hubs are small boxes
+with four BNC connectors containing four 47 Ohm resistors:
+
+ | | wires
+ R + junction
+-R-+-R- R 47 Ohm resistors
+ R
+ |
+
+The shielding is connected together. Active hubs are much more complicated;
+they are powered and contain electronics to amplify the signal and send it
+to other segments of the net. They usually have eight connectors. Active
+hubs come in two variants - dumb and smart. The dumb variant just
+amplifies, but the smart one decodes to digital and encodes back all packets
+coming through. This is much better if you have several hubs in the net,
+since many dumb active hubs may worsen the signal quality.
+
+And now to the cabling. What you can connect together:
+
+1. A card to a card. This is the simplest way of creating a 2-computer
+ network.
+
+2. A card to a passive hub. Remember that all unused connectors on the hub
+ must be properly terminated with 93 Ohm (or something else if you don't
+ have the right ones) terminators.
+ (Avery's note: oops, I didn't know that. Mine (TV cable) works
+ anyway, though.)
+
+3. A card to an active hub. Here is no need to terminate the unused
+ connectors except some kind of aesthetic feeling. But, there may not be
+ more than eleven active hubs between any two computers. That of course
+ doesn't limit the number of active hubs on the network.
+
+4. An active hub to another.
+
+5. An active hub to passive hub.
+
+Remember that you cannot connect two passive hubs together. The power loss
+implied by such a connection is too high for the net to operate reliably.
+
+An example of a typical ARCnet network:
+
+ R S - STAR type card
+ S------H--------A-------S R - Terminator
+ | | H - Hub
+ | | A - Active hub
+ | S----H----S
+ S |
+ |
+ S
+
+The BUS topology is very similar to the one used by Ethernet. The only
+difference is in cable and terminators: they should be 93 Ohm. Ethernet
+uses 50 Ohm impedance. You use T connectors to put the computers on a single
+line of cable, the bus. You have to put terminators at both ends of the
+cable. A typical BUS ARCnet network looks like:
+
+ RT----T------T------T------T------TR
+ B B B B B B
+
+ B - BUS type card
+ R - Terminator
+ T - T connector
+
+But that is not all! The two types can be connected together. According to
+the official documentation the only way of connecting them is using an active
+hub:
+
+ A------T------T------TR
+ | B B B
+ S---H---S
+ |
+ S
+
+The official docs also state that you can use STAR cards at the ends of
+BUS network in place of a BUS card and a terminator:
+
+ S------T------T------S
+ B B
+
+But, according to my own experiments, you can simply hang a BUS type card
+anywhere in middle of a cable in a STAR topology network. And more - you
+can use the bus card in place of any star card if you use a terminator. Then
+you can build very complicated networks fulfilling all your needs! An
+example:
+
+ S
+ |
+ RT------T-------T------H------S
+ B B B |
+ | R
+ S------A------T-------T-------A-------H------TR
+ | B B | | B
+ | S BT |
+ | | | S----A-----S
+ S------H---A----S | |
+ | | S------T----H---S |
+ S S B R S
+
+A basically different cabling scheme is used with Twisted Pair cabling. Each
+of the TP cards has two RJ (phone-cord style) connectors. The cards are
+then daisy-chained together using a cable connecting every two neighboring
+cards. The ends are terminated with RJ 93 Ohm terminators which plug into
+the empty connectors of cards on the ends of the chain. An example:
+
+ ___________ ___________
+ _R_|_ _|_|_ _|_R_
+ | | | | | |
+ |Card | |Card | |Card |
+ |_____| |_____| |_____|
+
+
+There are also hubs for the TP topology. There is nothing difficult
+involved in using them; you just connect a TP chain to a hub on any end or
+even at both. This way you can create almost any network configuration.
+The maximum of 11 hubs between any two computers on the net applies here as
+well. An example:
+
+ RP-------P--------P--------H-----P------P-----PR
+ |
+ RP-----H--------P--------H-----P------PR
+ | |
+ PR PR
+
+ R - RJ Terminator
+ P - TP Card
+ H - TP Hub
+
+Like any network, ARCnet has a limited cable length. These are the maximum
+cable lengths between two active ends (an active end being an active hub or
+a STAR card).
+
+ RG-62 93 Ohm up to 650 m
+ RG-59/U 75 Ohm up to 457 m
+ RG-11/U 75 Ohm up to 533 m
+ IBM Type 1 150 Ohm up to 200 m
+ IBM Type 3 100 Ohm up to 100 m
+
+The maximum length of all cables connected to a passive hub is limited to 65
+meters for RG-62 cabling; less for others. You can see that using passive
+hubs in a large network is a bad idea. The maximum length of a single "BUS
+Trunk" is about 300 meters for RG-62. The maximum distance between the two
+most distant points of the net is limited to 3000 meters. The maximum length
+of a TP cable between two cards/hubs is 650 meters.
+
+
+SETTING THE JUMPERS
+-------------------
+
+All ARCnet cards should have a total of four or five different settings:
+
+ - the I/O address: this is the "port" your ARCnet card is on. Probed
+ values in the Linux ARCnet driver are only from 0x200 through 0x3F0. (If
+ your card has additional ones, which is possible, please tell me.) This
+ should not be the same as any other device on your system. According to
+ a doc I got from Novell, MS Windows prefers values of 0x300 or more,
+ eating net connections on my system (at least) otherwise. My guess is
+ this may be because, if your card is at 0x2E0, probing for a serial port
+ at 0x2E8 will reset the card and probably mess things up royally.
+ - Avery's favourite: 0x300.
+
+ - the IRQ: on 8-bit cards, it might be 2 (9), 3, 4, 5, or 7.
+ on 16-bit cards, it might be 2 (9), 3, 4, 5, 7, or 10-15.
+
+ Make sure this is different from any other card on your system. Note
+ that IRQ2 is the same as IRQ9, as far as Linux is concerned. You can
+ "cat /proc/interrupts" for a somewhat complete list of which ones are in
+ use at any given time. Here is a list of common usages from Vojtech
+ Pavlik <vojtech@suse.cz>:
+ ("Not on bus" means there is no way for a card to generate this
+ interrupt)
+ IRQ 0 - Timer 0 (Not on bus)
+ IRQ 1 - Keyboard (Not on bus)
+ IRQ 2 - IRQ Controller 2 (Not on bus, nor does interrupt the CPU)
+ IRQ 3 - COM2
+ IRQ 4 - COM1
+ IRQ 5 - FREE (LPT2 if you have it; sometimes COM3; maybe PLIP)
+ IRQ 6 - Floppy disk controller
+ IRQ 7 - FREE (LPT1 if you don't use the polling driver; PLIP)
+ IRQ 8 - Realtime Clock Interrupt (Not on bus)
+ IRQ 9 - FREE (VGA vertical sync interrupt if enabled)
+ IRQ 10 - FREE
+ IRQ 11 - FREE
+ IRQ 12 - FREE
+ IRQ 13 - Numeric Coprocessor (Not on bus)
+ IRQ 14 - Fixed Disk Controller
+ IRQ 15 - FREE (Fixed Disk Controller 2 if you have it)
+
+ Note: IRQ 9 is used on some video cards for the "vertical retrace"
+ interrupt. This interrupt would have been handy for things like
+ video games, as it occurs exactly once per screen refresh, but
+ unfortunately IBM cancelled this feature starting with the original
+ VGA and thus many VGA/SVGA cards do not support it. For this
+ reason, no modern software uses this interrupt and it can almost
+ always be safely disabled, if your video card supports it at all.
+
+ If your card for some reason CANNOT disable this IRQ (usually there
+ is a jumper), one solution would be to clip the printed circuit
+ contact on the board: it's the fourth contact from the left on the
+ back side. I take no responsibility if you try this.
+
+ - Avery's favourite: IRQ2 (actually IRQ9). Watch that VGA, though.
+
+ - the memory address: Unlike most cards, ARCnets use "shared memory" for
+ copying buffers around. Make SURE it doesn't conflict with any other
+ used memory in your system!
+ A0000 - VGA graphics memory (ok if you don't have VGA)
+ B0000 - Monochrome text mode
+ C0000 \ One of these is your VGA BIOS - usually C0000.
+ E0000 /
+ F0000 - System BIOS
+
+ Anything less than 0xA0000 is, well, a BAD idea since it isn't above
+ 640k.
+ - Avery's favourite: 0xD0000
+
+ - the station address: Every ARCnet card has its own "unique" network
+ address from 0 to 255. Unlike Ethernet, you can set this address
+ yourself with a jumper or switch (or on some cards, with special
+ software). Since it's only 8 bits, you can only have 254 ARCnet cards
+ on a network. DON'T use 0 or 255, since these are reserved (although
+ neat stuff will probably happen if you DO use them). By the way, if you
+ haven't already guessed, don't set this the same as any other ARCnet on
+ your network!
+ - Avery's favourite: 3 and 4. Not that it matters.
+
+ - There may be ETS1 and ETS2 settings. These may or may not make a
+ difference on your card (many manuals call them "reserved"), but are
+ used to change the delays used when powering up a computer on the
+ network. This is only necessary when wiring VERY long range ARCnet
+ networks, on the order of 4km or so; in any case, the only real
+ requirement here is that all cards on the network with ETS1 and ETS2
+ jumpers have them in the same position. Chris Hindy <chrish@io.org>
+ sent in a chart with actual values for this:
+ ET1 ET2 Response Time Reconfiguration Time
+ --- --- ------------- --------------------
+ open open 74.7us 840us
+ open closed 283.4us 1680us
+ closed open 561.8us 1680us
+ closed closed 1118.6us 1680us
+
+ Make sure you set ETS1 and ETS2 to the SAME VALUE for all cards on your
+ network.
+
+Also, on many cards (not mine, though) there are red and green LED's.
+Vojtech Pavlik <vojtech@suse.cz> tells me this is what they mean:
+ GREEN RED Status
+ ----- --- ------
+ OFF OFF Power off
+ OFF Short flashes Cabling problems (broken cable or not
+ terminated)
+ OFF (short) ON Card init
+ ON ON Normal state - everything OK, nothing
+ happens
+ ON Long flashes Data transfer
+ ON OFF Never happens (maybe when wrong ID)
+
+
+The following is all the specific information people have sent me about
+their own particular ARCnet cards. It is officially a mess, and contains
+huge amounts of duplicated information. I have no time to fix it. If you
+want to, PLEASE DO! Just send me a 'diff -u' of all your changes.
+
+The model # is listed right above specifics for that card, so you should be
+able to use your text viewer's "search" function to find the entry you want.
+If you don't KNOW what kind of card you have, try looking through the
+various diagrams to see if you can tell.
+
+If your model isn't listed and/or has different settings, PLEASE PLEASE
+tell me. I had to figure mine out without the manual, and it WASN'T FUN!
+
+Even if your ARCnet model isn't listed, but has the same jumpers as another
+model that is, please e-mail me to say so.
+
+Cards Listed in this file (in this order, mostly):
+
+ Manufacturer Model # Bits
+ ------------ ------- ----
+ SMC PC100 8
+ SMC PC110 8
+ SMC PC120 8
+ SMC PC130 8
+ SMC PC270E 8
+ SMC PC500 16
+ SMC PC500Longboard 16
+ SMC PC550Longboard 16
+ SMC PC600 16
+ SMC PC710 8
+ SMC? LCS-8830(-T) 8/16
+ Puredata PDI507 8
+ CNet Tech CN120-Series 8
+ CNet Tech CN160-Series 16
+ Lantech? UM9065L chipset 8
+ Acer 5210-003 8
+ Datapoint? LAN-ARC-8 8
+ Topware TA-ARC/10 8
+ Thomas-Conrad 500-6242-0097 REV A 8
+ Waterloo? (C)1985 Waterloo Micro. 8
+ No Name -- 8/16
+ No Name Taiwan R.O.C? 8
+ No Name Model 9058 8
+ Tiara Tiara Lancard? 8
+
+
+** SMC = Standard Microsystems Corp.
+** CNet Tech = CNet Technology, Inc.
+
+
+Unclassified Stuff
+------------------
+ - Please send any other information you can find.
+
+ - And some other stuff (more info is welcome!):
+ From: root@ultraworld.xs4all.nl (Timo Hilbrink)
+ To: apenwarr@foxnet.net (Avery Pennarun)
+ Date: Wed, 26 Oct 1994 02:10:32 +0000 (GMT)
+ Reply-To: timoh@xs4all.nl
+
+ [...parts deleted...]
+
+ About the jumpers: On my PC130 there is one more jumper, located near the
+ cable-connector and it's for changing to star or bus topology;
+ closed: star - open: bus
+ On the PC500 are some more jumper-pins, one block labeled with RX,PDN,TXI
+ and another with ALE,LA17,LA18,LA19 these are undocumented..
+
+ [...more parts deleted...]
+
+ --- CUT ---
+
+
+** Standard Microsystems Corp (SMC) **
+PC100, PC110, PC120, PC130 (8-bit cards)
+PC500, PC600 (16-bit cards)
+---------------------------------
+ - mainly from Avery Pennarun <apenwarr@worldvisions.ca>. Values depicted
+ are from Avery's setup.
+ - special thanks to Timo Hilbrink <timoh@xs4all.nl> for noting that PC120,
+ 130, 500, and 600 all have the same switches as Avery's PC100.
+ PC500/600 have several extra, undocumented pins though. (?)
+ - PC110 settings were verified by Stephen A. Wood <saw@cebaf.gov>
+ - Also, the JP- and S-numbers probably don't match your card exactly. Try
+ to find jumpers/switches with the same number of settings - it's
+ probably more reliable.
+
+
+ JP5 [|] : : : :
+(IRQ Setting) IRQ2 IRQ3 IRQ4 IRQ5 IRQ7
+ Put exactly one jumper on exactly one set of pins.
+
+
+ 1 2 3 4 5 6 7 8 9 10
+ S1 /----------------------------------\
+(I/O and Memory | 1 1 * 0 0 0 0 * 1 1 0 1 |
+ addresses) \----------------------------------/
+ |--| |--------| |--------|
+ (a) (b) (m)
+
+ WARNING. It's very important when setting these which way
+ you're holding the card, and which way you think is '1'!
+
+ If you suspect that your settings are not being made
+ correctly, try reversing the direction or inverting the
+ switch positions.
+
+ a: The first digit of the I/O address.
+ Setting Value
+ ------- -----
+ 00 0
+ 01 1
+ 10 2
+ 11 3
+
+ b: The second digit of the I/O address.
+ Setting Value
+ ------- -----
+ 0000 0
+ 0001 1
+ 0010 2
+ ... ...
+ 1110 E
+ 1111 F
+
+ The I/O address is in the form ab0. For example, if
+ a is 0x2 and b is 0xE, the address will be 0x2E0.
+
+ DO NOT SET THIS LESS THAN 0x200!!!!!
+
+
+ m: The first digit of the memory address.
+ Setting Value
+ ------- -----
+ 0000 0
+ 0001 1
+ 0010 2
+ ... ...
+ 1110 E
+ 1111 F
+
+ The memory address is in the form m0000. For example, if
+ m is D, the address will be 0xD0000.
+
+ DO NOT SET THIS TO C0000, F0000, OR LESS THAN A0000!
+
+ 1 2 3 4 5 6 7 8
+ S2 /--------------------------\
+(Station Address) | 1 1 0 0 0 0 0 0 |
+ \--------------------------/
+
+ Setting Value
+ ------- -----
+ 00000000 00
+ 10000000 01
+ 01000000 02
+ ...
+ 01111111 FE
+ 11111111 FF
+
+ Note that this is binary with the digits reversed!
+
+ DO NOT SET THIS TO 0 OR 255 (0xFF)!
+
+
+*****************************************************************************
+
+** Standard Microsystems Corp (SMC) **
+PC130E/PC270E (8-bit cards)
+---------------------------
+ - from Juergen Seifert <seifert@htwm.de>
+
+
+STANDARD MICROSYSTEMS CORPORATION (SMC) ARCNET(R)-PC130E/PC270E
+===============================================================
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original SMC Manual
+
+ "Configuration Guide for
+ ARCNET(R)-PC130E/PC270
+ Network Controller Boards
+ Pub. # 900.044A
+ June, 1989"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+SMC is a registered trademark of the Standard Microsystems Corporation
+
+The PC130E is an enhanced version of the PC130 board, is equipped with a
+standard BNC female connector for connection to RG-62/U coax cable.
+Since this board is designed both for point-to-point connection in star
+networks and for connection to bus networks, it is downwardly compatible
+with all the other standard boards designed for coax networks (that is,
+the PC120, PC110 and PC100 star topology boards and the PC220, PC210 and
+PC200 bus topology boards).
+
+The PC270E is an enhanced version of the PC260 board, is equipped with two
+modular RJ11-type jacks for connection to twisted pair wiring.
+It can be used in a star or a daisy-chained network.
+
+
+ 8 7 6 5 4 3 2 1
+ ________________________________________________________________
+ | | S1 | |
+ | |_________________| |
+ | Offs|Base |I/O Addr |
+ | RAM Addr | ___|
+ | ___ ___ CR3 |___|
+ | | \/ | CR4 |___|
+ | | PROM | ___|
+ | | | N | | 8
+ | | SOCKET | o | | 7
+ | |________| d | | 6
+ | ___________________ e | | 5
+ | | | A | S | 4
+ | |oo| EXT2 | | d | 2 | 3
+ | |oo| EXT1 | SMC | d | | 2
+ | |oo| ROM | 90C63 | r |___| 1
+ | |oo| IRQ7 | | |o| _____|
+ | |oo| IRQ5 | | |o| | J1 |
+ | |oo| IRQ4 | | STAR |_____|
+ | |oo| IRQ3 | | | J2 |
+ | |oo| IRQ2 |___________________| |_____|
+ |___ ______________|
+ | |
+ |_____________________________________________|
+
+Legend:
+
+SMC 90C63 ARCNET Controller / Transceiver /Logic
+S1 1-3: I/O Base Address Select
+ 4-6: Memory Base Address Select
+ 7-8: RAM Offset Select
+S2 1-8: Node ID Select
+EXT Extended Timeout Select
+ROM ROM Enable Select
+STAR Selected - Star Topology (PC130E only)
+ Deselected - Bus Topology (PC130E only)
+CR3/CR4 Diagnostic LEDs
+J1 BNC RG62/U Connector (PC130E only)
+J1 6-position Telephone Jack (PC270E only)
+J2 6-position Telephone Jack (PC270E only)
+
+Setting one of the switches to Off/Open means "1", On/Closed means "0".
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in group S2 are used to set the node ID.
+These switches work in a way similar to the PC100-series cards; see that
+entry for more information.
+
+
+Setting the I/O Base Address
+----------------------------
+
+The first three switches in switch group S1 are used to select one
+of eight possible I/O Base addresses using the following table
+
+
+ Switch | Hex I/O
+ 1 2 3 | Address
+ -------|--------
+ 0 0 0 | 260
+ 0 0 1 | 290
+ 0 1 0 | 2E0 (Manufacturer's default)
+ 0 1 1 | 2F0
+ 1 0 0 | 300
+ 1 0 1 | 350
+ 1 1 0 | 380
+ 1 1 1 | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 4-6 of switch group S1 select the Base of the 16K block.
+Within that 16K address space, the buffer may be assigned any one of four
+positions, determined by the offset, switches 7 and 8 of group S1.
+
+ Switch | Hex RAM | Hex ROM
+ 4 5 6 7 8 | Address | Address *)
+ -----------|---------|-----------
+ 0 0 0 0 0 | C0000 | C2000
+ 0 0 0 0 1 | C0800 | C2000
+ 0 0 0 1 0 | C1000 | C2000
+ 0 0 0 1 1 | C1800 | C2000
+ | |
+ 0 0 1 0 0 | C4000 | C6000
+ 0 0 1 0 1 | C4800 | C6000
+ 0 0 1 1 0 | C5000 | C6000
+ 0 0 1 1 1 | C5800 | C6000
+ | |
+ 0 1 0 0 0 | CC000 | CE000
+ 0 1 0 0 1 | CC800 | CE000
+ 0 1 0 1 0 | CD000 | CE000
+ 0 1 0 1 1 | CD800 | CE000
+ | |
+ 0 1 1 0 0 | D0000 | D2000 (Manufacturer's default)
+ 0 1 1 0 1 | D0800 | D2000
+ 0 1 1 1 0 | D1000 | D2000
+ 0 1 1 1 1 | D1800 | D2000
+ | |
+ 1 0 0 0 0 | D4000 | D6000
+ 1 0 0 0 1 | D4800 | D6000
+ 1 0 0 1 0 | D5000 | D6000
+ 1 0 0 1 1 | D5800 | D6000
+ | |
+ 1 0 1 0 0 | D8000 | DA000
+ 1 0 1 0 1 | D8800 | DA000
+ 1 0 1 1 0 | D9000 | DA000
+ 1 0 1 1 1 | D9800 | DA000
+ | |
+ 1 1 0 0 0 | DC000 | DE000
+ 1 1 0 0 1 | DC800 | DE000
+ 1 1 0 1 0 | DD000 | DE000
+ 1 1 0 1 1 | DD800 | DE000
+ | |
+ 1 1 1 0 0 | E0000 | E2000
+ 1 1 1 0 1 | E0800 | E2000
+ 1 1 1 1 0 | E1000 | E2000
+ 1 1 1 1 1 | E1800 | E2000
+
+*) To enable the 8K Boot PROM install the jumper ROM.
+ The default is jumper ROM not installed.
+
+
+Setting the Timeouts and Interrupt
+----------------------------------
+
+The jumpers labeled EXT1 and EXT2 are used to determine the timeout
+parameters. These two jumpers are normally left open.
+
+To select a hardware interrupt level set one (only one!) of the jumpers
+IRQ2, IRQ3, IRQ4, IRQ5, IRQ7. The Manufacturer's default is IRQ2.
+
+
+Configuring the PC130E for Star or Bus Topology
+-----------------------------------------------
+
+The single jumper labeled STAR is used to configure the PC130E board for
+star or bus topology.
+When the jumper is installed, the board may be used in a star network, when
+it is removed, the board can be used in a bus topology.
+
+
+Diagnostic LEDs
+---------------
+
+Two diagnostic LEDs are visible on the rear bracket of the board.
+The green LED monitors the network activity: the red one shows the
+board activity:
+
+ Green | Status Red | Status
+ -------|------------------- ---------|-------------------
+ on | normal activity flash/on | data transfer
+ blink | reconfiguration off | no data transfer;
+ off | defective board or | incorrect memory or
+ | node ID is zero | I/O address
+
+
+*****************************************************************************
+
+** Standard Microsystems Corp (SMC) **
+PC500/PC550 Longboard (16-bit cards)
+-------------------------------------
+ - from Juergen Seifert <seifert@htwm.de>
+
+
+STANDARD MICROSYSTEMS CORPORATION (SMC) ARCNET-PC500/PC550 Long Board
+=====================================================================
+
+Note: There is another Version of the PC500 called Short Version, which
+ is different in hard- and software! The most important differences
+ are:
+ - The long board has no Shared memory.
+ - On the long board the selection of the interrupt is done by binary
+ coded switch, on the short board directly by jumper.
+
+[Avery's note: pay special attention to that: the long board HAS NO SHARED
+MEMORY. This means the current Linux-ARCnet driver can't use these cards.
+I have obtained a PC500Longboard and will be doing some experiments on it in
+the future, but don't hold your breath. Thanks again to Juergen Seifert for
+his advice about this!]
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original SMC Manual
+
+ "Configuration Guide for
+ SMC ARCNET-PC500/PC550
+ Series Network Controller Boards
+ Pub. # 900.033 Rev. A
+ November, 1989"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+SMC is a registered trademark of the Standard Microsystems Corporation
+
+The PC500 is equipped with a standard BNC female connector for connection
+to RG-62/U coax cable.
+The board is designed both for point-to-point connection in star networks
+and for connection to bus networks.
+
+The PC550 is equipped with two modular RJ11-type jacks for connection
+to twisted pair wiring.
+It can be used in a star or a daisy-chained (BUS) network.
+
+ 1
+ 0 9 8 7 6 5 4 3 2 1 6 5 4 3 2 1
+ ____________________________________________________________________
+ < | SW1 | | SW2 | |
+ > |_____________________| |_____________| |
+ < IRQ |I/O Addr |
+ > ___|
+ < CR4 |___|
+ > CR3 |___|
+ < ___|
+ > N | | 8
+ < o | | 7
+ > d | S | 6
+ < e | W | 5
+ > A | 3 | 4
+ < d | | 3
+ > d | | 2
+ < r |___| 1
+ > |o| _____|
+ < |o| | J1 |
+ > 3 1 JP6 |_____|
+ < |o|o| JP2 | J2 |
+ > |o|o| |_____|
+ < 4 2__ ______________|
+ > | | |
+ <____| |_____________________________________________|
+
+Legend:
+
+SW1 1-6: I/O Base Address Select
+ 7-10: Interrupt Select
+SW2 1-6: Reserved for Future Use
+SW3 1-8: Node ID Select
+JP2 1-4: Extended Timeout Select
+JP6 Selected - Star Topology (PC500 only)
+ Deselected - Bus Topology (PC500 only)
+CR3 Green Monitors Network Activity
+CR4 Red Monitors Board Activity
+J1 BNC RG62/U Connector (PC500 only)
+J1 6-position Telephone Jack (PC550 only)
+J2 6-position Telephone Jack (PC550 only)
+
+Setting one of the switches to Off/Open means "1", On/Closed means "0".
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in group SW3 are used to set the node ID. Each node
+attached to the network must have an unique node ID which must be
+different from 0.
+Switch 1 serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Value
+ -------|-------
+ 1 | 1
+ 2 | 2
+ 3 | 4
+ 4 | 8
+ 5 | 16
+ 6 | 32
+ 7 | 64
+ 8 | 128
+
+Some Examples:
+
+ Switch | Hex | Decimal
+ 8 7 6 5 4 3 2 1 | Node ID | Node ID
+ ----------------|---------|---------
+ 0 0 0 0 0 0 0 0 | not allowed
+ 0 0 0 0 0 0 0 1 | 1 | 1
+ 0 0 0 0 0 0 1 0 | 2 | 2
+ 0 0 0 0 0 0 1 1 | 3 | 3
+ . . . | |
+ 0 1 0 1 0 1 0 1 | 55 | 85
+ . . . | |
+ 1 0 1 0 1 0 1 0 | AA | 170
+ . . . | |
+ 1 1 1 1 1 1 0 1 | FD | 253
+ 1 1 1 1 1 1 1 0 | FE | 254
+ 1 1 1 1 1 1 1 1 | FF | 255
+
+
+Setting the I/O Base Address
+----------------------------
+
+The first six switches in switch group SW1 are used to select one
+of 32 possible I/O Base addresses using the following table
+
+ Switch | Hex I/O
+ 6 5 4 3 2 1 | Address
+ -------------|--------
+ 0 1 0 0 0 0 | 200
+ 0 1 0 0 0 1 | 210
+ 0 1 0 0 1 0 | 220
+ 0 1 0 0 1 1 | 230
+ 0 1 0 1 0 0 | 240
+ 0 1 0 1 0 1 | 250
+ 0 1 0 1 1 0 | 260
+ 0 1 0 1 1 1 | 270
+ 0 1 1 0 0 0 | 280
+ 0 1 1 0 0 1 | 290
+ 0 1 1 0 1 0 | 2A0
+ 0 1 1 0 1 1 | 2B0
+ 0 1 1 1 0 0 | 2C0
+ 0 1 1 1 0 1 | 2D0
+ 0 1 1 1 1 0 | 2E0 (Manufacturer's default)
+ 0 1 1 1 1 1 | 2F0
+ 1 1 0 0 0 0 | 300
+ 1 1 0 0 0 1 | 310
+ 1 1 0 0 1 0 | 320
+ 1 1 0 0 1 1 | 330
+ 1 1 0 1 0 0 | 340
+ 1 1 0 1 0 1 | 350
+ 1 1 0 1 1 0 | 360
+ 1 1 0 1 1 1 | 370
+ 1 1 1 0 0 0 | 380
+ 1 1 1 0 0 1 | 390
+ 1 1 1 0 1 0 | 3A0
+ 1 1 1 0 1 1 | 3B0
+ 1 1 1 1 0 0 | 3C0
+ 1 1 1 1 0 1 | 3D0
+ 1 1 1 1 1 0 | 3E0
+ 1 1 1 1 1 1 | 3F0
+
+
+Setting the Interrupt
+---------------------
+
+Switches seven through ten of switch group SW1 are used to select the
+interrupt level. The interrupt level is binary coded, so selections
+from 0 to 15 would be possible, but only the following eight values will
+be supported: 3, 4, 5, 7, 9, 10, 11, 12.
+
+ Switch | IRQ
+ 10 9 8 7 |
+ ---------|--------
+ 0 0 1 1 | 3
+ 0 1 0 0 | 4
+ 0 1 0 1 | 5
+ 0 1 1 1 | 7
+ 1 0 0 1 | 9 (=2) (default)
+ 1 0 1 0 | 10
+ 1 0 1 1 | 11
+ 1 1 0 0 | 12
+
+
+Setting the Timeouts
+--------------------
+
+The two jumpers JP2 (1-4) are used to determine the timeout parameters.
+These two jumpers are normally left open.
+Refer to the COM9026 Data Sheet for alternate configurations.
+
+
+Configuring the PC500 for Star or Bus Topology
+----------------------------------------------
+
+The single jumper labeled JP6 is used to configure the PC500 board for
+star or bus topology.
+When the jumper is installed, the board may be used in a star network, when
+it is removed, the board can be used in a bus topology.
+
+
+Diagnostic LEDs
+---------------
+
+Two diagnostic LEDs are visible on the rear bracket of the board.
+The green LED monitors the network activity: the red one shows the
+board activity:
+
+ Green | Status Red | Status
+ -------|------------------- ---------|-------------------
+ on | normal activity flash/on | data transfer
+ blink | reconfiguration off | no data transfer;
+ off | defective board or | incorrect memory or
+ | node ID is zero | I/O address
+
+
+*****************************************************************************
+
+** SMC **
+PC710 (8-bit card)
+------------------
+ - from J.S. van Oosten <jvoosten@compiler.tdcnet.nl>
+
+Note: this data is gathered by experimenting and looking at info of other
+cards. However, I'm sure I got 99% of the settings right.
+
+The SMC710 card resembles the PC270 card, but is much more basic (i.e. no
+LEDs, RJ11 jacks, etc.) and 8 bit. Here's a little drawing:
+
+ _______________________________________
+ | +---------+ +---------+ |____
+ | | S2 | | S1 | |
+ | +---------+ +---------+ |
+ | |
+ | +===+ __ |
+ | | R | | | X-tal ###___
+ | | O | |__| ####__'|
+ | | M | || ###
+ | +===+ |
+ | |
+ | .. JP1 +----------+ |
+ | .. | big chip | |
+ | .. | 90C63 | |
+ | .. | | |
+ | .. +----------+ |
+ ------- -----------
+ |||||||||||||||||||||
+
+The row of jumpers at JP1 actually consists of 8 jumpers, (sometimes
+labelled) the same as on the PC270, from top to bottom: EXT2, EXT1, ROM,
+IRQ7, IRQ5, IRQ4, IRQ3, IRQ2 (gee, wonder what they would do? :-) )
+
+S1 and S2 perform the same function as on the PC270, only their numbers
+are swapped (S1 is the nodeaddress, S2 sets IO- and RAM-address).
+
+I know it works when connected to a PC110 type ARCnet board.
+
+
+*****************************************************************************
+
+** Possibly SMC **
+LCS-8830(-T) (8 and 16-bit cards)
+---------------------------------
+ - from Mathias Katzer <mkatzer@HRZ.Uni-Bielefeld.DE>
+ - Marek Michalkiewicz <marekm@i17linuxb.ists.pwr.wroc.pl> says the
+ LCS-8830 is slightly different from LCS-8830-T. These are 8 bit, BUS
+ only (the JP0 jumper is hardwired), and BNC only.
+
+This is a LCS-8830-T made by SMC, I think ('SMC' only appears on one PLCC,
+nowhere else, not even on the few Xeroxed sheets from the manual).
+
+SMC ARCnet Board Type LCS-8830-T
+
+ ------------------------------------
+ | |
+ | JP3 88 8 JP2 |
+ | ##### | \ |
+ | ##### ET1 ET2 ###|
+ | 8 ###|
+ | U3 SW 1 JP0 ###| Phone Jacks
+ | -- ###|
+ | | | |
+ | | | SW2 |
+ | | | |
+ | | | ##### |
+ | -- ##### #### BNC Connector
+ | ####
+ | 888888 JP1 |
+ | 234567 |
+ -- -------
+ |||||||||||||||||||||||||||
+ --------------------------
+
+
+SW1: DIP-Switches for Station Address
+SW2: DIP-Switches for Memory Base and I/O Base addresses
+
+JP0: If closed, internal termination on (default open)
+JP1: IRQ Jumpers
+JP2: Boot-ROM enabled if closed
+JP3: Jumpers for response timeout
+
+U3: Boot-ROM Socket
+
+
+ET1 ET2 Response Time Idle Time Reconfiguration Time
+
+ 78 86 840
+ X 285 316 1680
+ X 563 624 1680
+ X X 1130 1237 1680
+
+(X means closed jumper)
+
+(DIP-Switch downwards means "0")
+
+The station address is binary-coded with SW1.
+
+The I/O base address is coded with DIP-Switches 6,7 and 8 of SW2:
+
+Switches Base
+678 Address
+000 260-26f
+100 290-29f
+010 2e0-2ef
+110 2f0-2ff
+001 300-30f
+101 350-35f
+011 380-38f
+111 3e0-3ef
+
+
+DIP Switches 1-5 of SW2 encode the RAM and ROM Address Range:
+
+Switches RAM ROM
+12345 Address Range Address Range
+00000 C:0000-C:07ff C:2000-C:3fff
+10000 C:0800-C:0fff
+01000 C:1000-C:17ff
+11000 C:1800-C:1fff
+00100 C:4000-C:47ff C:6000-C:7fff
+10100 C:4800-C:4fff
+01100 C:5000-C:57ff
+11100 C:5800-C:5fff
+00010 C:C000-C:C7ff C:E000-C:ffff
+10010 C:C800-C:Cfff
+01010 C:D000-C:D7ff
+11010 C:D800-C:Dfff
+00110 D:0000-D:07ff D:2000-D:3fff
+10110 D:0800-D:0fff
+01110 D:1000-D:17ff
+11110 D:1800-D:1fff
+00001 D:4000-D:47ff D:6000-D:7fff
+10001 D:4800-D:4fff
+01001 D:5000-D:57ff
+11001 D:5800-D:5fff
+00101 D:8000-D:87ff D:A000-D:bfff
+10101 D:8800-D:8fff
+01101 D:9000-D:97ff
+11101 D:9800-D:9fff
+00011 D:C000-D:c7ff D:E000-D:ffff
+10011 D:C800-D:cfff
+01011 D:D000-D:d7ff
+11011 D:D800-D:dfff
+00111 E:0000-E:07ff E:2000-E:3fff
+10111 E:0800-E:0fff
+01111 E:1000-E:17ff
+11111 E:1800-E:1fff
+
+
+*****************************************************************************
+
+** PureData Corp **
+PDI507 (8-bit card)
+--------------------
+ - from Mark Rejhon <mdrejhon@magi.com> (slight modifications by Avery)
+ - Avery's note: I think PDI508 cards (but definitely NOT PDI508Plus cards)
+ are mostly the same as this. PDI508Plus cards appear to be mainly
+ software-configured.
+
+Jumpers:
+ There is a jumper array at the bottom of the card, near the edge
+ connector. This array is labelled J1. They control the IRQs and
+ something else. Put only one jumper on the IRQ pins.
+
+ ETS1, ETS2 are for timing on very long distance networks. See the
+ more general information near the top of this file.
+
+ There is a J2 jumper on two pins. A jumper should be put on them,
+ since it was already there when I got the card. I don't know what
+ this jumper is for though.
+
+ There is a two-jumper array for J3. I don't know what it is for,
+ but there were already two jumpers on it when I got the card. It's
+ a six pin grid in a two-by-three fashion. The jumpers were
+ configured as follows:
+
+ .-------.
+ o | o o |
+ :-------: ------> Accessible end of card with connectors
+ o | o o | in this direction ------->
+ `-------'
+
+Carl de Billy <CARL@carainfo.com> explains J3 and J4:
+
+ J3 Diagram:
+
+ .-------.
+ o | o o |
+ :-------: TWIST Technology
+ o | o o |
+ `-------'
+ .-------.
+ | o o | o
+ :-------: COAX Technology
+ | o o | o
+ `-------'
+
+ - If using coax cable in a bus topology the J4 jumper must be removed;
+ place it on one pin.
+
+ - If using bus topology with twisted pair wiring move the J3
+ jumpers so they connect the middle pin and the pins closest to the RJ11
+ Connectors. Also the J4 jumper must be removed; place it on one pin of
+ J4 jumper for storage.
+
+ - If using star topology with twisted pair wiring move the J3
+ jumpers so they connect the middle pin and the pins closest to the RJ11
+ connectors.
+
+
+DIP Switches:
+
+ The DIP switches accessible on the accessible end of the card while
+ it is installed, is used to set the ARCnet address. There are 8
+ switches. Use an address from 1 to 254.
+
+ Switch No.
+ 12345678 ARCnet address
+ -----------------------------------------
+ 00000000 FF (Don't use this!)
+ 00000001 FE
+ 00000010 FD
+ ....
+ 11111101 2
+ 11111110 1
+ 11111111 0 (Don't use this!)
+
+ There is another array of eight DIP switches at the top of the
+ card. There are five labelled MS0-MS4 which seem to control the
+ memory address, and another three labelled IO0-IO2 which seem to
+ control the base I/O address of the card.
+
+ This was difficult to test by trial and error, and the I/O addresses
+ are in a weird order. This was tested by setting the DIP switches,
+ rebooting the computer, and attempting to load ARCETHER at various
+ addresses (mostly between 0x200 and 0x400). The address that caused
+ the red transmit LED to blink, is the one that I thought works.
+
+ Also, the address 0x3D0 seem to have a special meaning, since the
+ ARCETHER packet driver loaded fine, but without the red LED
+ blinking. I don't know what 0x3D0 is for though. I recommend using
+ an address of 0x300 since Windows may not like addresses below
+ 0x300.
+
+ IO Switch No.
+ 210 I/O address
+ -------------------------------
+ 111 0x260
+ 110 0x290
+ 101 0x2E0
+ 100 0x2F0
+ 011 0x300
+ 010 0x350
+ 001 0x380
+ 000 0x3E0
+
+ The memory switches set a reserved address space of 0x1000 bytes
+ (0x100 segment units, or 4k). For example if I set an address of
+ 0xD000, it will use up addresses 0xD000 to 0xD100.
+
+ The memory switches were tested by booting using QEMM386 stealth,
+ and using LOADHI to see what address automatically became excluded
+ from the upper memory regions, and then attempting to load ARCETHER
+ using these addresses.
+
+ I recommend using an ARCnet memory address of 0xD000, and putting
+ the EMS page frame at 0xC000 while using QEMM stealth mode. That
+ way, you get contiguous high memory from 0xD100 almost all the way
+ the end of the megabyte.
+
+ Memory Switch 0 (MS0) didn't seem to work properly when set to OFF
+ on my card. It could be malfunctioning on my card. Experiment with
+ it ON first, and if it doesn't work, set it to OFF. (It may be a
+ modifier for the 0x200 bit?)
+
+ MS Switch No.
+ 43210 Memory address
+ --------------------------------
+ 00001 0xE100 (guessed - was not detected by QEMM)
+ 00011 0xE000 (guessed - was not detected by QEMM)
+ 00101 0xDD00
+ 00111 0xDC00
+ 01001 0xD900
+ 01011 0xD800
+ 01101 0xD500
+ 01111 0xD400
+ 10001 0xD100
+ 10011 0xD000
+ 10101 0xCD00
+ 10111 0xCC00
+ 11001 0xC900 (guessed - crashes tested system)
+ 11011 0xC800 (guessed - crashes tested system)
+ 11101 0xC500 (guessed - crashes tested system)
+ 11111 0xC400 (guessed - crashes tested system)
+
+
+*****************************************************************************
+
+** CNet Technology Inc. **
+120 Series (8-bit cards)
+------------------------
+ - from Juergen Seifert <seifert@htwm.de>
+
+
+CNET TECHNOLOGY INC. (CNet) ARCNET 120A SERIES
+==============================================
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original CNet Manual
+
+ "ARCNET
+ USER'S MANUAL
+ for
+ CN120A
+ CN120AB
+ CN120TP
+ CN120ST
+ CN120SBT
+ P/N:12-01-0007
+ Revision 3.00"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+
+P/N 120A ARCNET 8 bit XT/AT Star
+P/N 120AB ARCNET 8 bit XT/AT Bus
+P/N 120TP ARCNET 8 bit XT/AT Twisted Pair
+P/N 120ST ARCNET 8 bit XT/AT Star, Twisted Pair
+P/N 120SBT ARCNET 8 bit XT/AT Star, Bus, Twisted Pair
+
+ __________________________________________________________________
+ | |
+ | ___|
+ | LED |___|
+ | ___|
+ | N | | ID7
+ | o | | ID6
+ | d | S | ID5
+ | e | W | ID4
+ | ___________________ A | 2 | ID3
+ | | | d | | ID2
+ | | | 1 2 3 4 5 6 7 8 d | | ID1
+ | | | _________________ r |___| ID0
+ | | 90C65 || SW1 | ____|
+ | JP 8 7 | ||_________________| | |
+ | |o|o| JP1 | | | J2 |
+ | |o|o| |oo| | | JP 1 1 1 | |
+ | ______________ | | 0 1 2 |____|
+ | | PROM | |___________________| |o|o|o| _____|
+ | > SOCKET | JP 6 5 4 3 2 |o|o|o| | J1 |
+ | |______________| |o|o|o|o|o| |o|o|o| |_____|
+ |_____ |o|o|o|o|o| ______________|
+ | |
+ |_____________________________________________|
+
+Legend:
+
+90C65 ARCNET Probe
+S1 1-5: Base Memory Address Select
+ 6-8: Base I/O Address Select
+S2 1-8: Node ID Select (ID0-ID7)
+JP1 ROM Enable Select
+JP2 IRQ2
+JP3 IRQ3
+JP4 IRQ4
+JP5 IRQ5
+JP6 IRQ7
+JP7/JP8 ET1, ET2 Timeout Parameters
+JP10/JP11 Coax / Twisted Pair Select (CN120ST/SBT only)
+JP12 Terminator Select (CN120AB/ST/SBT only)
+J1 BNC RG62/U Connector (all except CN120TP)
+J2 Two 6-position Telephone Jack (CN120TP/ST/SBT only)
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must be different from 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Label | Value
+ -------|-------|-------
+ 1 | ID0 | 1
+ 2 | ID1 | 2
+ 3 | ID2 | 4
+ 4 | ID3 | 8
+ 5 | ID4 | 16
+ 6 | ID5 | 32
+ 7 | ID6 | 64
+ 8 | ID7 | 128
+
+Some Examples:
+
+ Switch | Hex | Decimal
+ 8 7 6 5 4 3 2 1 | Node ID | Node ID
+ ----------------|---------|---------
+ 0 0 0 0 0 0 0 0 | not allowed
+ 0 0 0 0 0 0 0 1 | 1 | 1
+ 0 0 0 0 0 0 1 0 | 2 | 2
+ 0 0 0 0 0 0 1 1 | 3 | 3
+ . . . | |
+ 0 1 0 1 0 1 0 1 | 55 | 85
+ . . . | |
+ 1 0 1 0 1 0 1 0 | AA | 170
+ . . . | |
+ 1 1 1 1 1 1 0 1 | FD | 253
+ 1 1 1 1 1 1 1 0 | FE | 254
+ 1 1 1 1 1 1 1 1 | FF | 255
+
+
+Setting the I/O Base Address
+----------------------------
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table
+
+
+ Switch | Hex I/O
+ 6 7 8 | Address
+ ------------|--------
+ ON ON ON | 260
+ OFF ON ON | 290
+ ON OFF ON | 2E0 (Manufacturer's default)
+ OFF OFF ON | 2F0
+ ON ON OFF | 300
+ OFF ON OFF | 350
+ ON OFF OFF | 380
+ OFF OFF OFF | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 8K or memory base + 0x2000.
+Switches 1-5 of switch block SW1 select the Memory Base address.
+
+ Switch | Hex RAM | Hex ROM
+ 1 2 3 4 5 | Address | Address *)
+ --------------------|---------|-----------
+ ON ON ON ON ON | C0000 | C2000
+ ON ON OFF ON ON | C4000 | C6000
+ ON ON ON OFF ON | CC000 | CE000
+ ON ON OFF OFF ON | D0000 | D2000 (Manufacturer's default)
+ ON ON ON ON OFF | D4000 | D6000
+ ON ON OFF ON OFF | D8000 | DA000
+ ON ON ON OFF OFF | DC000 | DE000
+ ON ON OFF OFF OFF | E0000 | E2000
+
+*) To enable the Boot ROM install the jumper JP1
+
+Note: Since the switches 1 and 2 are always set to ON it may be possible
+ that they can be used to add an offset of 2K, 4K or 6K to the base
+ address, but this feature is not documented in the manual and I
+ haven't tested it yet.
+
+
+Setting the Interrupt Line
+--------------------------
+
+To select a hardware interrupt level install one (only one!) of the jumpers
+JP2, JP3, JP4, JP5, JP6. JP2 is the default.
+
+ Jumper | IRQ
+ -------|-----
+ 2 | 2
+ 3 | 3
+ 4 | 4
+ 5 | 5
+ 6 | 7
+
+
+Setting the Internal Terminator on CN120AB/TP/SBT
+--------------------------------------------------
+
+The jumper JP12 is used to enable the internal terminator.
+
+ -----
+ 0 | 0 |
+ ----- ON | | ON
+ | 0 | | 0 |
+ | | OFF ----- OFF
+ | 0 | 0
+ -----
+ Terminator Terminator
+ disabled enabled
+
+
+Selecting the Connector Type on CN120ST/SBT
+-------------------------------------------
+
+ JP10 JP11 JP10 JP11
+ ----- -----
+ 0 0 | 0 | | 0 |
+ ----- ----- | | | |
+ | 0 | | 0 | | 0 | | 0 |
+ | | | | ----- -----
+ | 0 | | 0 | 0 0
+ ----- -----
+ Coaxial Cable Twisted Pair Cable
+ (Default)
+
+
+Setting the Timeout Parameters
+------------------------------
+
+The jumpers labeled EXT1 and EXT2 are used to determine the timeout
+parameters. These two jumpers are normally left open.
+
+
+
+*****************************************************************************
+
+** CNet Technology Inc. **
+160 Series (16-bit cards)
+-------------------------
+ - from Juergen Seifert <seifert@htwm.de>
+
+CNET TECHNOLOGY INC. (CNet) ARCNET 160A SERIES
+==============================================
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original CNet Manual
+
+ "ARCNET
+ USER'S MANUAL
+ for
+ CN160A
+ CN160AB
+ CN160TP
+ P/N:12-01-0006
+ Revision 3.00"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+
+P/N 160A ARCNET 16 bit XT/AT Star
+P/N 160AB ARCNET 16 bit XT/AT Bus
+P/N 160TP ARCNET 16 bit XT/AT Twisted Pair
+
+ ___________________________________________________________________
+ < _________________________ ___|
+ > |oo| JP2 | | LED |___|
+ < |oo| JP1 | 9026 | LED |___|
+ > |_________________________| ___|
+ < N | | ID7
+ > 1 o | | ID6
+ < 1 2 3 4 5 6 7 8 9 0 d | S | ID5
+ > _______________ _____________________ e | W | ID4
+ < | PROM | | SW1 | A | 2 | ID3
+ > > SOCKET | |_____________________| d | | ID2
+ < |_______________| | IO-Base | MEM | d | | ID1
+ > r |___| ID0
+ < ____|
+ > | |
+ < | J1 |
+ > | |
+ < |____|
+ > 1 1 1 1 |
+ < 3 4 5 6 7 JP 8 9 0 1 2 3 |
+ > |o|o|o|o|o| |o|o|o|o|o|o| |
+ < |o|o|o|o|o| __ |o|o|o|o|o|o| ___________|
+ > | | |
+ <____________| |_______________________________________|
+
+Legend:
+
+9026 ARCNET Probe
+SW1 1-6: Base I/O Address Select
+ 7-10: Base Memory Address Select
+SW2 1-8: Node ID Select (ID0-ID7)
+JP1/JP2 ET1, ET2 Timeout Parameters
+JP3-JP13 Interrupt Select
+J1 BNC RG62/U Connector (CN160A/AB only)
+J1 Two 6-position Telephone Jack (CN160TP only)
+LED
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must be different from 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Label | Value
+ -------|-------|-------
+ 1 | ID0 | 1
+ 2 | ID1 | 2
+ 3 | ID2 | 4
+ 4 | ID3 | 8
+ 5 | ID4 | 16
+ 6 | ID5 | 32
+ 7 | ID6 | 64
+ 8 | ID7 | 128
+
+Some Examples:
+
+ Switch | Hex | Decimal
+ 8 7 6 5 4 3 2 1 | Node ID | Node ID
+ ----------------|---------|---------
+ 0 0 0 0 0 0 0 0 | not allowed
+ 0 0 0 0 0 0 0 1 | 1 | 1
+ 0 0 0 0 0 0 1 0 | 2 | 2
+ 0 0 0 0 0 0 1 1 | 3 | 3
+ . . . | |
+ 0 1 0 1 0 1 0 1 | 55 | 85
+ . . . | |
+ 1 0 1 0 1 0 1 0 | AA | 170
+ . . . | |
+ 1 1 1 1 1 1 0 1 | FD | 253
+ 1 1 1 1 1 1 1 0 | FE | 254
+ 1 1 1 1 1 1 1 1 | FF | 255
+
+
+Setting the I/O Base Address
+----------------------------
+
+The first six switches in switch block SW1 are used to select the I/O Base
+address using the following table:
+
+ Switch | Hex I/O
+ 1 2 3 4 5 6 | Address
+ ------------------------|--------
+ OFF ON ON OFF OFF ON | 260
+ OFF ON OFF ON ON OFF | 290
+ OFF ON OFF OFF OFF ON | 2E0 (Manufacturer's default)
+ OFF ON OFF OFF OFF OFF | 2F0
+ OFF OFF ON ON ON ON | 300
+ OFF OFF ON OFF ON OFF | 350
+ OFF OFF OFF ON ON ON | 380
+ OFF OFF OFF OFF OFF ON | 3E0
+
+Note: Other IO-Base addresses seem to be selectable, but only the above
+ combinations are documented.
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The switches 7-10 of switch block SW1 are used to select the Memory
+Base address of the RAM (2K) and the PROM.
+
+ Switch | Hex RAM | Hex ROM
+ 7 8 9 10 | Address | Address
+ ----------------|---------|-----------
+ OFF OFF ON ON | C0000 | C8000
+ OFF OFF ON OFF | D0000 | D8000 (Default)
+ OFF OFF OFF ON | E0000 | E8000
+
+Note: Other MEM-Base addresses seem to be selectable, but only the above
+ combinations are documented.
+
+
+Setting the Interrupt Line
+--------------------------
+
+To select a hardware interrupt level install one (only one!) of the jumpers
+JP3 through JP13 using the following table:
+
+ Jumper | IRQ
+ -------|-----------------
+ 3 | 14
+ 4 | 15
+ 5 | 12
+ 6 | 11
+ 7 | 10
+ 8 | 3
+ 9 | 4
+ 10 | 5
+ 11 | 6
+ 12 | 7
+ 13 | 2 (=9) Default!
+
+Note: - Do not use JP11=IRQ6, it may conflict with your Floppy Disk
+ Controller
+ - Use JP3=IRQ14 only, if you don't have an IDE-, MFM-, or RLL-
+ Hard Disk, it may conflict with their controllers
+
+
+Setting the Timeout Parameters
+------------------------------
+
+The jumpers labeled JP1 and JP2 are used to determine the timeout
+parameters. These two jumpers are normally left open.
+
+
+*****************************************************************************
+
+** Lantech **
+8-bit card, unknown model
+-------------------------
+ - from Vlad Lungu <vlungu@ugal.ro> - his e-mail address seemed broken at
+ the time I tried to reach him. Sorry Vlad, if you didn't get my reply.
+
+ ________________________________________________________________
+ | 1 8 |
+ | ___________ __|
+ | | SW1 | LED |__|
+ | |__________| |
+ | ___|
+ | _____________________ |S | 8
+ | | | |W |
+ | | | |2 |
+ | | | |__| 1
+ | | UM9065L | |o| JP4 ____|____
+ | | | |o| | CN |
+ | | | |________|
+ | | | |
+ | |___________________| |
+ | |
+ | |
+ | _____________ |
+ | | | |
+ | | PROM | |ooooo| JP6 |
+ | |____________| |ooooo| |
+ |_____________ _ _|
+ |____________________________________________| |__|
+
+
+UM9065L : ARCnet Controller
+
+SW 1 : Shared Memory Address and I/O Base
+
+ ON=0
+
+ 12345|Memory Address
+ -----|--------------
+ 00001| D4000
+ 00010| CC000
+ 00110| D0000
+ 01110| D1000
+ 01101| D9000
+ 10010| CC800
+ 10011| DC800
+ 11110| D1800
+
+It seems that the bits are considered in reverse order. Also, you must
+observe that some of those addresses are unusual and I didn't probe them; I
+used a memory dump in DOS to identify them. For the 00000 configuration and
+some others that I didn't write here the card seems to conflict with the
+video card (an S3 GENDAC). I leave the full decoding of those addresses to
+you.
+
+ 678| I/O Address
+ ---|------------
+ 000| 260
+ 001| failed probe
+ 010| 2E0
+ 011| 380
+ 100| 290
+ 101| 350
+ 110| failed probe
+ 111| 3E0
+
+SW 2 : Node ID (binary coded)
+
+JP 4 : Boot PROM enable CLOSE - enabled
+ OPEN - disabled
+
+JP 6 : IRQ set (ONLY ONE jumper on 1-5 for IRQ 2-6)
+
+
+*****************************************************************************
+
+** Acer **
+8-bit card, Model 5210-003
+--------------------------
+ - from Vojtech Pavlik <vojtech@suse.cz> using portions of the existing
+ arcnet-hardware file.
+
+This is a 90C26 based card. Its configuration seems similar to the SMC
+PC100, but has some additional jumpers I don't know the meaning of.
+
+ __
+ | |
+ ___________|__|_________________________
+ | | | |
+ | | BNC | |
+ | |______| ___|
+ | _____________________ |___
+ | | | |
+ | | Hybrid IC | |
+ | | | o|o J1 |
+ | |_____________________| 8|8 |
+ | 8|8 J5 |
+ | o|o |
+ | 8|8 |
+ |__ 8|8 |
+ (|__| LED o|o |
+ | 8|8 |
+ | 8|8 J15 |
+ | |
+ | _____ |
+ | | | _____ |
+ | | | | | ___|
+ | | | | | |
+ | _____ | ROM | | UFS | |
+ | | | | | | | |
+ | | | ___ | | | | |
+ | | | | | |__.__| |__.__| |
+ | | NCR | |XTL| _____ _____ |
+ | | | |___| | | | | |
+ | |90C26| | | | | |
+ | | | | RAM | | UFS | |
+ | | | J17 o|o | | | | |
+ | | | J16 o|o | | | | |
+ | |__.__| |__.__| |__.__| |
+ | ___ |
+ | | |8 |
+ | |SW2| |
+ | | | |
+ | |___|1 |
+ | ___ |
+ | | |10 J18 o|o |
+ | | | o|o |
+ | |SW1| o|o |
+ | | | J21 o|o |
+ | |___|1 |
+ | |
+ |____________________________________|
+
+
+Legend:
+
+90C26 ARCNET Chip
+XTL 20 MHz Crystal
+SW1 1-6 Base I/O Address Select
+ 7-10 Memory Address Select
+SW2 1-8 Node ID Select (ID0-ID7)
+J1-J5 IRQ Select
+J6-J21 Unknown (Probably extra timeouts & ROM enable ...)
+LED1 Activity LED
+BNC Coax connector (STAR ARCnet)
+RAM 2k of SRAM
+ROM Boot ROM socket
+UFS Unidentified Flying Sockets
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must not be 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+Setting one of the switches to OFF means "1", ON means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Value
+ -------|-------
+ 1 | 1
+ 2 | 2
+ 3 | 4
+ 4 | 8
+ 5 | 16
+ 6 | 32
+ 7 | 64
+ 8 | 128
+
+Don't set this to 0 or 255; these values are reserved.
+
+
+Setting the I/O Base Address
+----------------------------
+
+The switches 1 to 6 of switch block SW1 are used to select one
+of 32 possible I/O Base addresses using the following tables
+
+ | Hex
+ Switch | Value
+ -------|-------
+ 1 | 200
+ 2 | 100
+ 3 | 80
+ 4 | 40
+ 5 | 20
+ 6 | 10
+
+The I/O address is sum of all switches set to "1". Remember that
+the I/O address space bellow 0x200 is RESERVED for mainboard, so
+switch 1 should be ALWAYS SET TO OFF.
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of sixteen positions. However, the addresses below
+A0000 are likely to cause system hang because there's main RAM.
+
+Jumpers 7-10 of switch block SW1 select the Memory Base address.
+
+ Switch | Hex RAM
+ 7 8 9 10 | Address
+ ----------------|---------
+ OFF OFF OFF OFF | F0000 (conflicts with main BIOS)
+ OFF OFF OFF ON | E0000
+ OFF OFF ON OFF | D0000
+ OFF OFF ON ON | C0000 (conflicts with video BIOS)
+ OFF ON OFF OFF | B0000 (conflicts with mono video)
+ OFF ON OFF ON | A0000 (conflicts with graphics)
+
+
+Setting the Interrupt Line
+--------------------------
+
+Jumpers 1-5 of the jumper block J1 control the IRQ level. ON means
+shorted, OFF means open.
+
+ Jumper | IRQ
+ 1 2 3 4 5 |
+ ----------------------------
+ ON OFF OFF OFF OFF | 7
+ OFF ON OFF OFF OFF | 5
+ OFF OFF ON OFF OFF | 4
+ OFF OFF OFF ON OFF | 3
+ OFF OFF OFF OFF ON | 2
+
+
+Unknown jumpers & sockets
+-------------------------
+
+I know nothing about these. I just guess that J16&J17 are timeout
+jumpers and maybe one of J18-J21 selects ROM. Also J6-J10 and
+J11-J15 are connecting IRQ2-7 to some pins on the UFSs. I can't
+guess the purpose.
+
+
+*****************************************************************************
+
+** Datapoint? **
+LAN-ARC-8, an 8-bit card
+------------------------
+ - from Vojtech Pavlik <vojtech@suse.cz>
+
+This is another SMC 90C65-based ARCnet card. I couldn't identify the
+manufacturer, but it might be DataPoint, because the card has the
+original arcNet logo in its upper right corner.
+
+ _______________________________________________________
+ | _________ |
+ | | SW2 | ON arcNet |
+ | |_________| OFF ___|
+ | _____________ 1 ______ 8 | | 8
+ | | | SW1 | XTAL | ____________ | S |
+ | > RAM (2k) | |______|| | | W |
+ | |_____________| | H | | 3 |
+ | _________|_____ y | |___| 1
+ | _________ | | |b | |
+ | |_________| | | |r | |
+ | | SMC | |i | |
+ | | 90C65| |d | |
+ | _________ | | | | |
+ | | SW1 | ON | | |I | |
+ | |_________| OFF |_________|_____/C | _____|
+ | 1 8 | | | |___
+ | ______________ | | | BNC |___|
+ | | | |____________| |_____|
+ | > EPROM SOCKET | _____________ |
+ | |______________| |_____________| |
+ | ______________|
+ | |
+ |________________________________________|
+
+Legend:
+
+90C65 ARCNET Chip
+SW1 1-5: Base Memory Address Select
+ 6-8: Base I/O Address Select
+SW2 1-8: Node ID Select
+SW3 1-5: IRQ Select
+ 6-7: Extra Timeout
+ 8 : ROM Enable
+BNC Coax connector
+XTAL 20 MHz Crystal
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in SW3 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must not be 0.
+Switch 1 serves as the least significant bit (LSB).
+
+Setting one of the switches to Off means "1", On means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Value
+ -------|-------
+ 1 | 1
+ 2 | 2
+ 3 | 4
+ 4 | 8
+ 5 | 16
+ 6 | 32
+ 7 | 64
+ 8 | 128
+
+
+Setting the I/O Base Address
+----------------------------
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table
+
+
+ Switch | Hex I/O
+ 6 7 8 | Address
+ ------------|--------
+ ON ON ON | 260
+ OFF ON ON | 290
+ ON OFF ON | 2E0 (Manufacturer's default)
+ OFF OFF ON | 2F0
+ ON ON OFF | 300
+ OFF ON OFF | 350
+ ON OFF OFF | 380
+ OFF OFF OFF | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 0x2000.
+Jumpers 3-5 of switch block SW1 select the Memory Base address.
+
+ Switch | Hex RAM | Hex ROM
+ 1 2 3 4 5 | Address | Address *)
+ --------------------|---------|-----------
+ ON ON ON ON ON | C0000 | C2000
+ ON ON OFF ON ON | C4000 | C6000
+ ON ON ON OFF ON | CC000 | CE000
+ ON ON OFF OFF ON | D0000 | D2000 (Manufacturer's default)
+ ON ON ON ON OFF | D4000 | D6000
+ ON ON OFF ON OFF | D8000 | DA000
+ ON ON ON OFF OFF | DC000 | DE000
+ ON ON OFF OFF OFF | E0000 | E2000
+
+*) To enable the Boot ROM set the switch 8 of switch block SW3 to position ON.
+
+The switches 1 and 2 probably add 0x0800 and 0x1000 to RAM base address.
+
+
+Setting the Interrupt Line
+--------------------------
+
+Switches 1-5 of the switch block SW3 control the IRQ level.
+
+ Jumper | IRQ
+ 1 2 3 4 5 |
+ ----------------------------
+ ON OFF OFF OFF OFF | 3
+ OFF ON OFF OFF OFF | 4
+ OFF OFF ON OFF OFF | 5
+ OFF OFF OFF ON OFF | 7
+ OFF OFF OFF OFF ON | 2
+
+
+Setting the Timeout Parameters
+------------------------------
+
+The switches 6-7 of the switch block SW3 are used to determine the timeout
+parameters. These two switches are normally left in the OFF position.
+
+
+*****************************************************************************
+
+** Topware **
+8-bit card, TA-ARC/10
+-------------------------
+ - from Vojtech Pavlik <vojtech@suse.cz>
+
+This is another very similar 90C65 card. Most of the switches and jumpers
+are the same as on other clones.
+
+ _____________________________________________________________________
+| ___________ | | ______ |
+| |SW2 NODE ID| | | | XTAL | |
+| |___________| | Hybrid IC | |______| |
+| ___________ | | __|
+| |SW1 MEM+I/O| |_________________________| LED1|__|)
+| |___________| 1 2 |
+| J3 |o|o| TIMEOUT ______|
+| ______________ |o|o| | |
+| | | ___________________ | RJ |
+| > EPROM SOCKET | | \ |------|
+|J2 |______________| | | | |
+||o| | | |______|
+||o| ROM ENABLE | SMC | _________ |
+| _____________ | 90C65 | |_________| _____|
+| | | | | | |___
+| > RAM (2k) | | | | BNC |___|
+| |_____________| | | |_____|
+| |____________________| |
+| ________ IRQ 2 3 4 5 7 ___________ |
+||________| |o|o|o|o|o| |___________| |
+|________ J1|o|o|o|o|o| ______________|
+ | |
+ |_____________________________________________|
+
+Legend:
+
+90C65 ARCNET Chip
+XTAL 20 MHz Crystal
+SW1 1-5 Base Memory Address Select
+ 6-8 Base I/O Address Select
+SW2 1-8 Node ID Select (ID0-ID7)
+J1 IRQ Select
+J2 ROM Enable
+J3 Extra Timeout
+LED1 Activity LED
+BNC Coax connector (BUS ARCnet)
+RJ Twisted Pair Connector (daisy chain)
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in SW2 are used to set the node ID. Each node attached to
+the network must have an unique node ID which must not be 0. Switch 1 (ID0)
+serves as the least significant bit (LSB).
+
+Setting one of the switches to Off means "1", On means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Label | Value
+ -------|-------|-------
+ 1 | ID0 | 1
+ 2 | ID1 | 2
+ 3 | ID2 | 4
+ 4 | ID3 | 8
+ 5 | ID4 | 16
+ 6 | ID5 | 32
+ 7 | ID6 | 64
+ 8 | ID7 | 128
+
+Setting the I/O Base Address
+----------------------------
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table:
+
+
+ Switch | Hex I/O
+ 6 7 8 | Address
+ ------------|--------
+ ON ON ON | 260 (Manufacturer's default)
+ OFF ON ON | 290
+ ON OFF ON | 2E0
+ OFF OFF ON | 2F0
+ ON ON OFF | 300
+ OFF ON OFF | 350
+ ON OFF OFF | 380
+ OFF OFF OFF | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 0x2000.
+Jumpers 3-5 of switch block SW1 select the Memory Base address.
+
+ Switch | Hex RAM | Hex ROM
+ 1 2 3 4 5 | Address | Address *)
+ --------------------|---------|-----------
+ ON ON ON ON ON | C0000 | C2000
+ ON ON OFF ON ON | C4000 | C6000 (Manufacturer's default)
+ ON ON ON OFF ON | CC000 | CE000
+ ON ON OFF OFF ON | D0000 | D2000
+ ON ON ON ON OFF | D4000 | D6000
+ ON ON OFF ON OFF | D8000 | DA000
+ ON ON ON OFF OFF | DC000 | DE000
+ ON ON OFF OFF OFF | E0000 | E2000
+
+*) To enable the Boot ROM short the jumper J2.
+
+The jumpers 1 and 2 probably add 0x0800 and 0x1000 to RAM address.
+
+
+Setting the Interrupt Line
+--------------------------
+
+Jumpers 1-5 of the jumper block J1 control the IRQ level. ON means
+shorted, OFF means open.
+
+ Jumper | IRQ
+ 1 2 3 4 5 |
+ ----------------------------
+ ON OFF OFF OFF OFF | 2
+ OFF ON OFF OFF OFF | 3
+ OFF OFF ON OFF OFF | 4
+ OFF OFF OFF ON OFF | 5
+ OFF OFF OFF OFF ON | 7
+
+
+Setting the Timeout Parameters
+------------------------------
+
+The jumpers J3 are used to set the timeout parameters. These two
+jumpers are normally left open.
+
+
+*****************************************************************************
+
+** Thomas-Conrad **
+Model #500-6242-0097 REV A (8-bit card)
+---------------------------------------
+ - from Lars Karlsson <100617.3473@compuserve.com>
+
+ ________________________________________________________
+ | ________ ________ |_____
+ | |........| |........| |
+ | |________| |________| ___|
+ | SW 3 SW 1 | |
+ | Base I/O Base Addr. Station | |
+ | address | |
+ | ______ switch | |
+ | | | | |
+ | | | |___|
+ | | | ______ |___._
+ | |______| |______| ____| BNC
+ | Jumper- _____| Connector
+ | Main chip block _ __| '
+ | | | | RJ Connector
+ | |_| | with 110 Ohm
+ | |__ Terminator
+ | ___________ __|
+ | |...........| | RJ-jack
+ | |...........| _____ | (unused)
+ | |___________| |_____| |__
+ | Boot PROM socket IRQ-jumpers |_ Diagnostic
+ |________ __ _| LED (red)
+ | | | | | | | | | | | | | | | | | | | | | |
+ | | | | | | | | | | | | | | | | | | | | |________|
+ |
+ |
+
+And here are the settings for some of the switches and jumpers on the cards.
+
+
+ I/O
+
+ 1 2 3 4 5 6 7 8
+
+2E0----- 0 0 0 1 0 0 0 1
+2F0----- 0 0 0 1 0 0 0 0
+300----- 0 0 0 0 1 1 1 1
+350----- 0 0 0 0 1 1 1 0
+
+"0" in the above example means switch is off "1" means that it is on.
+
+
+ ShMem address.
+
+ 1 2 3 4 5 6 7 8
+
+CX00--0 0 1 1 | | |
+DX00--0 0 1 0 |
+X000--------- 1 1 |
+X400--------- 1 0 |
+X800--------- 0 1 |
+XC00--------- 0 0
+ENHANCED----------- 1
+COMPATIBLE--------- 0
+
+
+ IRQ
+
+
+ 3 4 5 7 2
+ . . . . .
+ . . . . .
+
+
+There is a DIP-switch with 8 switches, used to set the shared memory address
+to be used. The first 6 switches set the address, the 7th doesn't have any
+function, and the 8th switch is used to select "compatible" or "enhanced".
+When I got my two cards, one of them had this switch set to "enhanced". That
+card didn't work at all, it wasn't even recognized by the driver. The other
+card had this switch set to "compatible" and it behaved absolutely normally. I
+guess that the switch on one of the cards, must have been changed accidentally
+when the card was taken out of its former host. The question remains
+unanswered, what is the purpose of the "enhanced" position?
+
+[Avery's note: "enhanced" probably either disables shared memory (use IO
+ports instead) or disables IO ports (use memory addresses instead). This
+varies by the type of card involved. I fail to see how either of these
+enhance anything. Send me more detailed information about this mode, or
+just use "compatible" mode instead.]
+
+
+*****************************************************************************
+
+** Waterloo Microsystems Inc. ?? **
+8-bit card (C) 1985
+-------------------
+ - from Robert Michael Best <rmb117@cs.usask.ca>
+
+[Avery's note: these don't work with my driver for some reason. These cards
+SEEM to have settings similar to the PDI508Plus, which is
+software-configured and doesn't work with my driver either. The "Waterloo
+chip" is a boot PROM, probably designed specifically for the University of
+Waterloo. If you have any further information about this card, please
+e-mail me.]
+
+The probe has not been able to detect the card on any of the J2 settings,
+and I tried them again with the "Waterloo" chip removed.
+
+ _____________________________________________________________________
+| \/ \/ ___ __ __ |
+| C4 C4 |^| | M || ^ ||^| |
+| -- -- |_| | 5 || || | C3 |
+| \/ \/ C10 |___|| ||_| |
+| C4 C4 _ _ | | ?? |
+| -- -- | \/ || | |
+| | || | |
+| | || C1 | |
+| | || | \/ _____|
+| | C6 || | C9 | |___
+| | || | -- | BNC |___|
+| | || | >C7| |_____|
+| | || | |
+| __ __ |____||_____| 1 2 3 6 |
+|| ^ | >C4| |o|o|o|o|o|o| J2 >C4| |
+|| | |o|o|o|o|o|o| |
+|| C2 | >C4| >C4| |
+|| | >C8| |
+|| | 2 3 4 5 6 7 IRQ >C4| |
+||_____| |o|o|o|o|o|o| J3 |
+|_______ |o|o|o|o|o|o| _______________|
+ | |
+ |_____________________________________________|
+
+C1 -- "COM9026
+ SMC 8638"
+ In a chip socket.
+
+C2 -- "@Copyright
+ Waterloo Microsystems Inc.
+ 1985"
+ In a chip Socket with info printed on a label covering a round window
+ showing the circuit inside. (The window indicates it is an EPROM chip.)
+
+C3 -- "COM9032
+ SMC 8643"
+ In a chip socket.
+
+C4 -- "74LS"
+ 9 total no sockets.
+
+M5 -- "50006-136
+ 20.000000 MHZ
+ MTQ-T1-S3
+ 0 M-TRON 86-40"
+ Metallic case with 4 pins, no socket.
+
+C6 -- "MOSTEK@TC8643
+ MK6116N-20
+ MALAYSIA"
+ No socket.
+
+C7 -- No stamp or label but in a 20 pin chip socket.
+
+C8 -- "PAL10L8CN
+ 8623"
+ In a 20 pin socket.
+
+C9 -- "PAl16R4A-2CN
+ 8641"
+ In a 20 pin socket.
+
+C10 -- "M8640
+ NMC
+ 9306N"
+ In an 8 pin socket.
+
+?? -- Some components on a smaller board and attached with 20 pins all
+ along the side closest to the BNC connector. The are coated in a dark
+ resin.
+
+On the board there are two jumper banks labeled J2 and J3. The
+manufacturer didn't put a J1 on the board. The two boards I have both
+came with a jumper box for each bank.
+
+J2 -- Numbered 1 2 3 4 5 6.
+ 4 and 5 are not stamped due to solder points.
+
+J3 -- IRQ 2 3 4 5 6 7
+
+The board itself has a maple leaf stamped just above the irq jumpers
+and "-2 46-86" beside C2. Between C1 and C6 "ASS 'Y 300163" and "@1986
+CORMAN CUSTOM ELECTRONICS CORP." stamped just below the BNC connector.
+Below that "MADE IN CANADA"
+
+
+*****************************************************************************
+
+** No Name **
+8-bit cards, 16-bit cards
+-------------------------
+ - from Juergen Seifert <seifert@htwm.de>
+
+NONAME 8-BIT ARCNET
+===================
+
+I have named this ARCnet card "NONAME", since there is no name of any
+manufacturer on the Installation manual nor on the shipping box. The only
+hint to the existence of a manufacturer at all is written in copper,
+it is "Made in Taiwan"
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the Original
+ "ARCnet Installation Manual"
+
+
+ ________________________________________________________________
+ | |STAR| BUS| T/P| |
+ | |____|____|____| |
+ | _____________________ |
+ | | | |
+ | | | |
+ | | | |
+ | | SMC | |
+ | | | |
+ | | COM90C65 | |
+ | | | |
+ | | | |
+ | |__________-__________| |
+ | _____|
+ | _______________ | CN |
+ | | PROM | |_____|
+ | > SOCKET | |
+ | |_______________| 1 2 3 4 5 6 7 8 1 2 3 4 5 6 7 8 |
+ | _______________ _______________ |
+ | |o|o|o|o|o|o|o|o| | SW1 || SW2 ||
+ | |o|o|o|o|o|o|o|o| |_______________||_______________||
+ |___ 2 3 4 5 7 E E R Node ID IOB__|__MEM____|
+ | \ IRQ / T T O |
+ |__________________1_2_M______________________|
+
+Legend:
+
+COM90C65: ARCnet Probe
+S1 1-8: Node ID Select
+S2 1-3: I/O Base Address Select
+ 4-6: Memory Base Address Select
+ 7-8: RAM Offset Select
+ET1, ET2 Extended Timeout Select
+ROM ROM Enable Select
+CN RG62 Coax Connector
+STAR| BUS | T/P Three fields for placing a sign (colored circle)
+ indicating the topology of the card
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in group SW1 are used to set the node ID.
+Each node attached to the network must have an unique node ID which
+must be different from 0.
+Switch 8 serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Value
+ -------|-------
+ 8 | 1
+ 7 | 2
+ 6 | 4
+ 5 | 8
+ 4 | 16
+ 3 | 32
+ 2 | 64
+ 1 | 128
+
+Some Examples:
+
+ Switch | Hex | Decimal
+ 1 2 3 4 5 6 7 8 | Node ID | Node ID
+ ----------------|---------|---------
+ 0 0 0 0 0 0 0 0 | not allowed
+ 0 0 0 0 0 0 0 1 | 1 | 1
+ 0 0 0 0 0 0 1 0 | 2 | 2
+ 0 0 0 0 0 0 1 1 | 3 | 3
+ . . . | |
+ 0 1 0 1 0 1 0 1 | 55 | 85
+ . . . | |
+ 1 0 1 0 1 0 1 0 | AA | 170
+ . . . | |
+ 1 1 1 1 1 1 0 1 | FD | 253
+ 1 1 1 1 1 1 1 0 | FE | 254
+ 1 1 1 1 1 1 1 1 | FF | 255
+
+
+Setting the I/O Base Address
+----------------------------
+
+The first three switches in switch group SW2 are used to select one
+of eight possible I/O Base addresses using the following table
+
+ Switch | Hex I/O
+ 1 2 3 | Address
+ ------------|--------
+ ON ON ON | 260
+ ON ON OFF | 290
+ ON OFF ON | 2E0 (Manufacturer's default)
+ ON OFF OFF | 2F0
+ OFF ON ON | 300
+ OFF ON OFF | 350
+ OFF OFF ON | 380
+ OFF OFF OFF | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 4-6 of switch group SW2 select the Base of the 16K block.
+Within that 16K address space, the buffer may be assigned any one of four
+positions, determined by the offset, switches 7 and 8 of group SW2.
+
+ Switch | Hex RAM | Hex ROM
+ 4 5 6 7 8 | Address | Address *)
+ -----------|---------|-----------
+ 0 0 0 0 0 | C0000 | C2000
+ 0 0 0 0 1 | C0800 | C2000
+ 0 0 0 1 0 | C1000 | C2000
+ 0 0 0 1 1 | C1800 | C2000
+ | |
+ 0 0 1 0 0 | C4000 | C6000
+ 0 0 1 0 1 | C4800 | C6000
+ 0 0 1 1 0 | C5000 | C6000
+ 0 0 1 1 1 | C5800 | C6000
+ | |
+ 0 1 0 0 0 | CC000 | CE000
+ 0 1 0 0 1 | CC800 | CE000
+ 0 1 0 1 0 | CD000 | CE000
+ 0 1 0 1 1 | CD800 | CE000
+ | |
+ 0 1 1 0 0 | D0000 | D2000 (Manufacturer's default)
+ 0 1 1 0 1 | D0800 | D2000
+ 0 1 1 1 0 | D1000 | D2000
+ 0 1 1 1 1 | D1800 | D2000
+ | |
+ 1 0 0 0 0 | D4000 | D6000
+ 1 0 0 0 1 | D4800 | D6000
+ 1 0 0 1 0 | D5000 | D6000
+ 1 0 0 1 1 | D5800 | D6000
+ | |
+ 1 0 1 0 0 | D8000 | DA000
+ 1 0 1 0 1 | D8800 | DA000
+ 1 0 1 1 0 | D9000 | DA000
+ 1 0 1 1 1 | D9800 | DA000
+ | |
+ 1 1 0 0 0 | DC000 | DE000
+ 1 1 0 0 1 | DC800 | DE000
+ 1 1 0 1 0 | DD000 | DE000
+ 1 1 0 1 1 | DD800 | DE000
+ | |
+ 1 1 1 0 0 | E0000 | E2000
+ 1 1 1 0 1 | E0800 | E2000
+ 1 1 1 1 0 | E1000 | E2000
+ 1 1 1 1 1 | E1800 | E2000
+
+*) To enable the 8K Boot PROM install the jumper ROM.
+ The default is jumper ROM not installed.
+
+
+Setting Interrupt Request Lines (IRQ)
+-------------------------------------
+
+To select a hardware interrupt level set one (only one!) of the jumpers
+IRQ2, IRQ3, IRQ4, IRQ5 or IRQ7. The manufacturer's default is IRQ2.
+
+
+Setting the Timeouts
+--------------------
+
+The two jumpers labeled ET1 and ET2 are used to determine the timeout
+parameters (response and reconfiguration time). Every node in a network
+must be set to the same timeout values.
+
+ ET1 ET2 | Response Time (us) | Reconfiguration Time (ms)
+ --------|--------------------|--------------------------
+ Off Off | 78 | 840 (Default)
+ Off On | 285 | 1680
+ On Off | 563 | 1680
+ On On | 1130 | 1680
+
+On means jumper installed, Off means jumper not installed
+
+
+NONAME 16-BIT ARCNET
+====================
+
+The manual of my 8-Bit NONAME ARCnet Card contains another description
+of a 16-Bit Coax / Twisted Pair Card. This description is incomplete,
+because there are missing two pages in the manual booklet. (The table
+of contents reports pages ... 2-9, 2-11, 2-12, 3-1, ... but inside
+the booklet there is a different way of counting ... 2-9, 2-10, A-1,
+(empty page), 3-1, ..., 3-18, A-1 (again), A-2)
+Also the picture of the board layout is not as good as the picture of
+8-Bit card, because there isn't any letter like "SW1" written to the
+picture.
+Should somebody have such a board, please feel free to complete this
+description or to send a mail to me!
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the Original
+ "ARCnet Installation Manual"
+
+
+ ___________________________________________________________________
+ < _________________ _________________ |
+ > | SW? || SW? | |
+ < |_________________||_________________| |
+ > ____________________ |
+ < | | |
+ > | | |
+ < | | |
+ > | | |
+ < | | |
+ > | | |
+ < | | |
+ > |____________________| |
+ < ____|
+ > ____________________ | |
+ < | | | J1 |
+ > | < | |
+ < |____________________| ? ? ? ? ? ? |____|
+ > |o|o|o|o|o|o| |
+ < |o|o|o|o|o|o| |
+ > |
+ < __ ___________|
+ > | | |
+ <____________| |_______________________________________|
+
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in group SW2 are used to set the node ID.
+Each node attached to the network must have an unique node ID which
+must be different from 0.
+Switch 8 serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Value
+ -------|-------
+ 8 | 1
+ 7 | 2
+ 6 | 4
+ 5 | 8
+ 4 | 16
+ 3 | 32
+ 2 | 64
+ 1 | 128
+
+Some Examples:
+
+ Switch | Hex | Decimal
+ 1 2 3 4 5 6 7 8 | Node ID | Node ID
+ ----------------|---------|---------
+ 0 0 0 0 0 0 0 0 | not allowed
+ 0 0 0 0 0 0 0 1 | 1 | 1
+ 0 0 0 0 0 0 1 0 | 2 | 2
+ 0 0 0 0 0 0 1 1 | 3 | 3
+ . . . | |
+ 0 1 0 1 0 1 0 1 | 55 | 85
+ . . . | |
+ 1 0 1 0 1 0 1 0 | AA | 170
+ . . . | |
+ 1 1 1 1 1 1 0 1 | FD | 253
+ 1 1 1 1 1 1 1 0 | FE | 254
+ 1 1 1 1 1 1 1 1 | FF | 255
+
+
+Setting the I/O Base Address
+----------------------------
+
+The first three switches in switch group SW1 are used to select one
+of eight possible I/O Base addresses using the following table
+
+ Switch | Hex I/O
+ 3 2 1 | Address
+ ------------|--------
+ ON ON ON | 260
+ ON ON OFF | 290
+ ON OFF ON | 2E0 (Manufacturer's default)
+ ON OFF OFF | 2F0
+ OFF ON ON | 300
+ OFF ON OFF | 350
+ OFF OFF ON | 380
+ OFF OFF OFF | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 6-8 of switch group SW1 select the Base of the 16K block.
+Within that 16K address space, the buffer may be assigned any one of four
+positions, determined by the offset, switches 4 and 5 of group SW1.
+
+ Switch | Hex RAM | Hex ROM
+ 8 7 6 5 4 | Address | Address
+ -----------|---------|-----------
+ 0 0 0 0 0 | C0000 | C2000
+ 0 0 0 0 1 | C0800 | C2000
+ 0 0 0 1 0 | C1000 | C2000
+ 0 0 0 1 1 | C1800 | C2000
+ | |
+ 0 0 1 0 0 | C4000 | C6000
+ 0 0 1 0 1 | C4800 | C6000
+ 0 0 1 1 0 | C5000 | C6000
+ 0 0 1 1 1 | C5800 | C6000
+ | |
+ 0 1 0 0 0 | CC000 | CE000
+ 0 1 0 0 1 | CC800 | CE000
+ 0 1 0 1 0 | CD000 | CE000
+ 0 1 0 1 1 | CD800 | CE000
+ | |
+ 0 1 1 0 0 | D0000 | D2000 (Manufacturer's default)
+ 0 1 1 0 1 | D0800 | D2000
+ 0 1 1 1 0 | D1000 | D2000
+ 0 1 1 1 1 | D1800 | D2000
+ | |
+ 1 0 0 0 0 | D4000 | D6000
+ 1 0 0 0 1 | D4800 | D6000
+ 1 0 0 1 0 | D5000 | D6000
+ 1 0 0 1 1 | D5800 | D6000
+ | |
+ 1 0 1 0 0 | D8000 | DA000
+ 1 0 1 0 1 | D8800 | DA000
+ 1 0 1 1 0 | D9000 | DA000
+ 1 0 1 1 1 | D9800 | DA000
+ | |
+ 1 1 0 0 0 | DC000 | DE000
+ 1 1 0 0 1 | DC800 | DE000
+ 1 1 0 1 0 | DD000 | DE000
+ 1 1 0 1 1 | DD800 | DE000
+ | |
+ 1 1 1 0 0 | E0000 | E2000
+ 1 1 1 0 1 | E0800 | E2000
+ 1 1 1 1 0 | E1000 | E2000
+ 1 1 1 1 1 | E1800 | E2000
+
+
+Setting Interrupt Request Lines (IRQ)
+-------------------------------------
+
+??????????????????????????????????????
+
+
+Setting the Timeouts
+--------------------
+
+??????????????????????????????????????
+
+
+*****************************************************************************
+
+** No Name **
+8-bit cards ("Made in Taiwan R.O.C.")
+-----------
+ - from Vojtech Pavlik <vojtech@suse.cz>
+
+I have named this ARCnet card "NONAME", since I got only the card with
+no manual at all and the only text identifying the manufacturer is
+"MADE IN TAIWAN R.O.C" printed on the card.
+
+ ____________________________________________________________
+ | 1 2 3 4 5 6 7 8 |
+ | |o|o| JP1 o|o|o|o|o|o|o|o| ON |
+ | + o|o|o|o|o|o|o|o| ___|
+ | _____________ o|o|o|o|o|o|o|o| OFF _____ | | ID7
+ | | | SW1 | | | | ID6
+ | > RAM (2k) | ____________________ | H | | S | ID5
+ | |_____________| | || y | | W | ID4
+ | | || b | | 2 | ID3
+ | | || r | | | ID2
+ | | || i | | | ID1
+ | | 90C65 || d | |___| ID0
+ | SW3 | || | |
+ | |o|o|o|o|o|o|o|o| ON | || I | |
+ | |o|o|o|o|o|o|o|o| | || C | |
+ | |o|o|o|o|o|o|o|o| OFF |____________________|| | _____|
+ | 1 2 3 4 5 6 7 8 | | | |___
+ | ______________ | | | BNC |___|
+ | | | |_____| |_____|
+ | > EPROM SOCKET | |
+ | |______________| |
+ | ______________|
+ | |
+ |_____________________________________________|
+
+Legend:
+
+90C65 ARCNET Chip
+SW1 1-5: Base Memory Address Select
+ 6-8: Base I/O Address Select
+SW2 1-8: Node ID Select (ID0-ID7)
+SW3 1-5: IRQ Select
+ 6-7: Extra Timeout
+ 8 : ROM Enable
+JP1 Led connector
+BNC Coax connector
+
+Although the jumpers SW1 and SW3 are marked SW, not JP, they are jumpers, not
+switches.
+
+Setting the jumpers to ON means connecting the upper two pins, off the bottom
+two - or - in case of IRQ setting, connecting none of them at all.
+
+Setting the Node ID
+-------------------
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must not be 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+Setting one of the switches to Off means "1", On means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+ Switch | Label | Value
+ -------|-------|-------
+ 1 | ID0 | 1
+ 2 | ID1 | 2
+ 3 | ID2 | 4
+ 4 | ID3 | 8
+ 5 | ID4 | 16
+ 6 | ID5 | 32
+ 7 | ID6 | 64
+ 8 | ID7 | 128
+
+Some Examples:
+
+ Switch | Hex | Decimal
+ 8 7 6 5 4 3 2 1 | Node ID | Node ID
+ ----------------|---------|---------
+ 0 0 0 0 0 0 0 0 | not allowed
+ 0 0 0 0 0 0 0 1 | 1 | 1
+ 0 0 0 0 0 0 1 0 | 2 | 2
+ 0 0 0 0 0 0 1 1 | 3 | 3
+ . . . | |
+ 0 1 0 1 0 1 0 1 | 55 | 85
+ . . . | |
+ 1 0 1 0 1 0 1 0 | AA | 170
+ . . . | |
+ 1 1 1 1 1 1 0 1 | FD | 253
+ 1 1 1 1 1 1 1 0 | FE | 254
+ 1 1 1 1 1 1 1 1 | FF | 255
+
+
+Setting the I/O Base Address
+----------------------------
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table
+
+
+ Switch | Hex I/O
+ 6 7 8 | Address
+ ------------|--------
+ ON ON ON | 260
+ OFF ON ON | 290
+ ON OFF ON | 2E0 (Manufacturer's default)
+ OFF OFF ON | 2F0
+ ON ON OFF | 300
+ OFF ON OFF | 350
+ ON OFF OFF | 380
+ OFF OFF OFF | 3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+--------------------------------------------
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 0x2000.
+Jumpers 3-5 of jumper block SW1 select the Memory Base address.
+
+ Switch | Hex RAM | Hex ROM
+ 1 2 3 4 5 | Address | Address *)
+ --------------------|---------|-----------
+ ON ON ON ON ON | C0000 | C2000
+ ON ON OFF ON ON | C4000 | C6000
+ ON ON ON OFF ON | CC000 | CE000
+ ON ON OFF OFF ON | D0000 | D2000 (Manufacturer's default)
+ ON ON ON ON OFF | D4000 | D6000
+ ON ON OFF ON OFF | D8000 | DA000
+ ON ON ON OFF OFF | DC000 | DE000
+ ON ON OFF OFF OFF | E0000 | E2000
+
+*) To enable the Boot ROM set the jumper 8 of jumper block SW3 to position ON.
+
+The jumpers 1 and 2 probably add 0x0800, 0x1000 and 0x1800 to RAM adders.
+
+Setting the Interrupt Line
+--------------------------
+
+Jumpers 1-5 of the jumper block SW3 control the IRQ level.
+
+ Jumper | IRQ
+ 1 2 3 4 5 |
+ ----------------------------
+ ON OFF OFF OFF OFF | 2
+ OFF ON OFF OFF OFF | 3
+ OFF OFF ON OFF OFF | 4
+ OFF OFF OFF ON OFF | 5
+ OFF OFF OFF OFF ON | 7
+
+
+Setting the Timeout Parameters
+------------------------------
+
+The jumpers 6-7 of the jumper block SW3 are used to determine the timeout
+parameters. These two jumpers are normally left in the OFF position.
+
+
+*****************************************************************************
+
+** No Name **
+(Generic Model 9058)
+--------------------
+ - from Andrew J. Kroll <ag784@freenet.buffalo.edu>
+ - Sorry this sat in my to-do box for so long, Andrew! (yikes - over a
+ year!)
+ _____
+ | <
+ | .---'
+ ________________________________________________________________ | |
+ | | SW2 | | |
+ | ___________ |_____________| | |
+ | | | 1 2 3 4 5 6 ___| |
+ | > 6116 RAM | _________ 8 | | |
+ | |___________| |20MHzXtal| 7 | | |
+ | |_________| __________ 6 | S | |
+ | 74LS373 | |- 5 | W | |
+ | _________ | E |- 4 | | |
+ | >_______| ______________|..... P |- 3 | 3 | |
+ | | | : O |- 2 | | |
+ | | | : X |- 1 |___| |
+ | ________________ | | : Y |- | |
+ | | SW1 | | SL90C65 | : |- | |
+ | |________________| | | : B |- | |
+ | 1 2 3 4 5 6 7 8 | | : O |- | |
+ | |_________o____|..../ A |- _______| |
+ | ____________________ | R |- | |------,
+ | | | | D |- | BNC | # |
+ | > 2764 PROM SOCKET | |__________|- |_______|------'
+ | |____________________| _________ | |
+ | >________| <- 74LS245 | |
+ | | |
+ |___ ______________| |
+ |H H H H H H H H H H H H H H H H H H H H H H H| | |
+ |U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U| | |
+ \|
+Legend:
+
+SL90C65 ARCNET Controller / Transceiver /Logic
+SW1 1-5: IRQ Select
+ 6: ET1
+ 7: ET2
+ 8: ROM ENABLE
+SW2 1-3: Memory Buffer/PROM Address
+ 3-6: I/O Address Map
+SW3 1-8: Node ID Select
+BNC BNC RG62/U Connection
+ *I* have had success using RG59B/U with *NO* terminators!
+ What gives?!
+
+SW1: Timeouts, Interrupt and ROM
+---------------------------------
+
+To select a hardware interrupt level set one (only one!) of the dip switches
+up (on) SW1...(switches 1-5)
+IRQ3, IRQ4, IRQ5, IRQ7, IRQ2. The Manufacturer's default is IRQ2.
+
+The switches on SW1 labeled EXT1 (switch 6) and EXT2 (switch 7)
+are used to determine the timeout parameters. These two dip switches
+are normally left off (down).
+
+ To enable the 8K Boot PROM position SW1 switch 8 on (UP) labeled ROM.
+ The default is jumper ROM not installed.
+
+
+Setting the I/O Base Address
+----------------------------
+
+The last three switches in switch group SW2 are used to select one
+of eight possible I/O Base addresses using the following table
+
+
+ Switch | Hex I/O
+ 4 5 6 | Address
+ -------|--------
+ 0 0 0 | 260
+ 0 0 1 | 290
+ 0 1 0 | 2E0 (Manufacturer's default)
+ 0 1 1 | 2F0
+ 1 0 0 | 300
+ 1 0 1 | 350
+ 1 1 0 | 380
+ 1 1 1 | 3E0
+
+
+Setting the Base Memory Address (RAM & ROM)
+-------------------------------------------
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 1-3 of switch group SW2 select the Base of the 16K block.
+(0 = DOWN, 1 = UP)
+I could, however, only verify two settings...
+
+ Switch| Hex RAM | Hex ROM
+ 1 2 3 | Address | Address
+ ------|---------|-----------
+ 0 0 0 | E0000 | E2000
+ 0 0 1 | D0000 | D2000 (Manufacturer's default)
+ 0 1 0 | ????? | ?????
+ 0 1 1 | ????? | ?????
+ 1 0 0 | ????? | ?????
+ 1 0 1 | ????? | ?????
+ 1 1 0 | ????? | ?????
+ 1 1 1 | ????? | ?????
+
+
+Setting the Node ID
+-------------------
+
+The eight switches in group SW3 are used to set the node ID.
+Each node attached to the network must have an unique node ID which
+must be different from 0.
+Switch 1 serves as the least significant bit (LSB).
+switches in the DOWN position are OFF (0) and in the UP position are ON (1)
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+ Switch | Value
+ -------|-------
+ 1 | 1
+ 2 | 2
+ 3 | 4
+ 4 | 8
+ 5 | 16
+ 6 | 32
+ 7 | 64
+ 8 | 128
+
+Some Examples:
+
+ Switch# | Hex | Decimal
+8 7 6 5 4 3 2 1 | Node ID | Node ID
+----------------|---------|---------
+0 0 0 0 0 0 0 0 | not allowed <-.
+0 0 0 0 0 0 0 1 | 1 | 1 |
+0 0 0 0 0 0 1 0 | 2 | 2 |
+0 0 0 0 0 0 1 1 | 3 | 3 |
+ . . . | | |
+0 1 0 1 0 1 0 1 | 55 | 85 |
+ . . . | | + Don't use 0 or 255!
+1 0 1 0 1 0 1 0 | AA | 170 |
+ . . . | | |
+1 1 1 1 1 1 0 1 | FD | 253 |
+1 1 1 1 1 1 1 0 | FE | 254 |
+1 1 1 1 1 1 1 1 | FF | 255 <-'
+
+
+*****************************************************************************
+
+** Tiara **
+(model unknown)
+-------------------------
+ - from Christoph Lameter <christoph@lameter.com>
+
+
+Here is information about my card as far as I could figure it out:
+----------------------------------------------- tiara
+Tiara LanCard of Tiara Computer Systems.
+
++----------------------------------------------+
+! ! Transmitter Unit ! !
+! +------------------+ -------
+! MEM Coax Connector
+! ROM 7654321 <- I/O -------
+! : : +--------+ !
+! : : ! 90C66LJ! +++
+! : : ! ! !D Switch to set
+! : : ! ! !I the Nodenumber
+! : : +--------+ !P
+! !++
+! 234567 <- IRQ !
++------------!!!!!!!!!!!!!!!!!!!!!!!!--------+
+ !!!!!!!!!!!!!!!!!!!!!!!!
+
+0 = Jumper Installed
+1 = Open
+
+Top Jumper line Bit 7 = ROM Enable 654=Memory location 321=I/O
+
+Settings for Memory Location (Top Jumper Line)
+456 Address selected
+000 C0000
+001 C4000
+010 CC000
+011 D0000
+100 D4000
+101 D8000
+110 DC000
+111 E0000
+
+Settings for I/O Address (Top Jumper Line)
+123 Port
+000 260
+001 290
+010 2E0
+011 2F0
+100 300
+101 350
+110 380
+111 3E0
+
+Settings for IRQ Selection (Lower Jumper Line)
+234567
+011111 IRQ 2
+101111 IRQ 3
+110111 IRQ 4
+111011 IRQ 5
+111110 IRQ 7
+
+*****************************************************************************
+
+
+Other Cards
+-----------
+
+I have no information on other models of ARCnet cards at the moment. Please
+send any and all info to:
+ apenwarr@worldvisions.ca
+
+Thanks.
diff --git a/Documentation/networking/arcnet.txt b/Documentation/networking/arcnet.txt
new file mode 100644
index 000000000..aff97f47c
--- /dev/null
+++ b/Documentation/networking/arcnet.txt
@@ -0,0 +1,556 @@
+----------------------------------------------------------------------------
+NOTE: See also arcnet-hardware.txt in this directory for jumper-setting
+and cabling information if you're like many of us and didn't happen to get a
+manual with your ARCnet card.
+----------------------------------------------------------------------------
+
+Since no one seems to listen to me otherwise, perhaps a poem will get your
+attention:
+ This driver's getting fat and beefy,
+ But my cat is still named Fifi.
+
+Hmm, I think I'm allowed to call that a poem, even though it's only two
+lines. Hey, I'm in Computer Science, not English. Give me a break.
+
+The point is: I REALLY REALLY REALLY REALLY REALLY want to hear from you if
+you test this and get it working. Or if you don't. Or anything.
+
+ARCnet 0.32 ALPHA first made it into the Linux kernel 1.1.80 - this was
+nice, but after that even FEWER people started writing to me because they
+didn't even have to install the patch. <sigh>
+
+Come on, be a sport! Send me a success report!
+
+(hey, that was even better than my original poem... this is getting bad!)
+
+
+--------
+WARNING:
+--------
+
+If you don't e-mail me about your success/failure soon, I may be forced to
+start SINGING. And we don't want that, do we?
+
+(You know, it might be argued that I'm pushing this point a little too much.
+If you think so, why not flame me in a quick little e-mail? Please also
+include the type of card(s) you're using, software, size of network, and
+whether it's working or not.)
+
+My e-mail address is: apenwarr@worldvisions.ca
+
+
+---------------------------------------------------------------------------
+
+
+These are the ARCnet drivers for Linux.
+
+
+This new release (2.91) has been put together by David Woodhouse
+<dwmw2@infradead.org>, in an attempt to tidy up the driver after adding support
+for yet another chipset. Now the generic support has been separated from the
+individual chipset drivers, and the source files aren't quite so packed with
+#ifdefs! I've changed this file a bit, but kept it in the first person from
+Avery, because I didn't want to completely rewrite it.
+
+The previous release resulted from many months of on-and-off effort from me
+(Avery Pennarun), many bug reports/fixes and suggestions from others, and in
+particular a lot of input and coding from Tomasz Motylewski. Starting with
+ARCnet 2.10 ALPHA, Tomasz's all-new-and-improved RFC1051 support has been
+included and seems to be working fine!
+
+
+Where do I discuss these drivers?
+---------------------------------
+
+Tomasz has been so kind as to set up a new and improved mailing list.
+Subscribe by sending a message with the BODY "subscribe linux-arcnet YOUR
+REAL NAME" to listserv@tichy.ch.uj.edu.pl. Then, to submit messages to the
+list, mail to linux-arcnet@tichy.ch.uj.edu.pl.
+
+There are archives of the mailing list at:
+ http://epistolary.org/mailman/listinfo.cgi/arcnet
+
+The people on linux-net@vger.kernel.org (now defunct, replaced by
+netdev@vger.kernel.org) have also been known to be very helpful, especially
+when we're talking about ALPHA Linux kernels that may or may not work right
+in the first place.
+
+
+Other Drivers and Info
+----------------------
+
+You can try my ARCNET page on the World Wide Web at:
+ http://www.qis.net/~jschmitz/arcnet/
+
+Also, SMC (one of the companies that makes ARCnet cards) has a WWW site you
+might be interested in, which includes several drivers for various cards
+including ARCnet. Try:
+ http://www.smc.com/
+
+Performance Technologies makes various network software that supports
+ARCnet:
+ http://www.perftech.com/ or ftp to ftp.perftech.com.
+
+Novell makes a networking stack for DOS which includes ARCnet drivers. Try
+FTPing to ftp.novell.com.
+
+You can get the Crynwr packet driver collection (including arcether.com, the
+one you'll want to use with ARCnet cards) from
+oak.oakland.edu:/simtel/msdos/pktdrvr. It won't work perfectly on a 386+
+without patches, though, and also doesn't like several cards. Fixed
+versions are available on my WWW page, or via e-mail if you don't have WWW
+access.
+
+
+Installing the Driver
+---------------------
+
+All you will need to do in order to install the driver is:
+ make config
+ (be sure to choose ARCnet in the network devices
+ and at least one chipset driver.)
+ make clean
+ make zImage
+
+If you obtained this ARCnet package as an upgrade to the ARCnet driver in
+your current kernel, you will need to first copy arcnet.c over the one in
+the linux/drivers/net directory.
+
+You will know the driver is installed properly if you get some ARCnet
+messages when you reboot into the new Linux kernel.
+
+There are four chipset options:
+
+ 1. Standard ARCnet COM90xx chipset.
+
+This is the normal ARCnet card, which you've probably got. This is the only
+chipset driver which will autoprobe if not told where the card is.
+It following options on the command line:
+ com90xx=[<io>[,<irq>[,<shmem>]]][,<name>] | <name>
+
+If you load the chipset support as a module, the options are:
+ io=<io> irq=<irq> shmem=<shmem> device=<name>
+
+To disable the autoprobe, just specify "com90xx=" on the kernel command line.
+To specify the name alone, but allow autoprobe, just put "com90xx=<name>"
+
+ 2. ARCnet COM20020 chipset.
+
+This is the new chipset from SMC with support for promiscuous mode (packet
+sniffing), extra diagnostic information, etc. Unfortunately, there is no
+sensible method of autoprobing for these cards. You must specify the I/O
+address on the kernel command line.
+The command line options are:
+ com20020=<io>[,<irq>[,<node_ID>[,backplane[,CKP[,timeout]]]]][,name]
+
+If you load the chipset support as a module, the options are:
+ io=<io> irq=<irq> node=<node_ID> backplane=<backplane> clock=<CKP>
+ timeout=<timeout> device=<name>
+
+The COM20020 chipset allows you to set the node ID in software, overriding the
+default which is still set in DIP switches on the card. If you don't have the
+COM20020 data sheets, and you don't know what the other three options refer
+to, then they won't interest you - forget them.
+
+ 3. ARCnet COM90xx chipset in IO-mapped mode.
+
+This will also work with the normal ARCnet cards, but doesn't use the shared
+memory. It performs less well than the above driver, but is provided in case
+you have a card which doesn't support shared memory, or (strangely) in case
+you have so many ARCnet cards in your machine that you run out of shmem slots.
+If you don't give the IO address on the kernel command line, then the driver
+will not find the card.
+The command line options are:
+ com90io=<io>[,<irq>][,<name>]
+
+If you load the chipset support as a module, the options are:
+ io=<io> irq=<irq> device=<name>
+
+ 4. ARCnet RIM I cards.
+
+These are COM90xx chips which are _completely_ memory mapped. The support for
+these is not tested. If you have one, please mail the author with a success
+report. All options must be specified, except the device name.
+Command line options:
+ arcrimi=<shmem>,<irq>,<node_ID>[,<name>]
+
+If you load the chipset support as a module, the options are:
+ shmem=<shmem> irq=<irq> node=<node_ID> device=<name>
+
+
+Loadable Module Support
+-----------------------
+
+Configure and rebuild Linux. When asked, answer 'm' to "Generic ARCnet
+support" and to support for your ARCnet chipset if you want to use the
+loadable module. You can also say 'y' to "Generic ARCnet support" and 'm'
+to the chipset support if you wish.
+
+ make config
+ make clean
+ make zImage
+ make modules
+
+If you're using a loadable module, you need to use insmod to load it, and
+you can specify various characteristics of your card on the command
+line. (In recent versions of the driver, autoprobing is much more reliable
+and works as a module, so most of this is now unnecessary.)
+
+For example:
+ cd /usr/src/linux/modules
+ insmod arcnet.o
+ insmod com90xx.o
+ insmod com20020.o io=0x2e0 device=eth1
+
+
+Using the Driver
+----------------
+
+If you build your kernel with ARCnet COM90xx support included, it should
+probe for your card automatically when you boot. If you use a different
+chipset driver complied into the kernel, you must give the necessary options
+on the kernel command line, as detailed above.
+
+Go read the NET-2-HOWTO and ETHERNET-HOWTO for Linux; they should be
+available where you picked up this driver. Think of your ARCnet as a
+souped-up (or down, as the case may be) Ethernet card.
+
+By the way, be sure to change all references from "eth0" to "arc0" in the
+HOWTOs. Remember that ARCnet isn't a "true" Ethernet, and the device name
+is DIFFERENT.
+
+
+Multiple Cards in One Computer
+------------------------------
+
+Linux has pretty good support for this now, but since I've been busy, the
+ARCnet driver has somewhat suffered in this respect. COM90xx support, if
+compiled into the kernel, will (try to) autodetect all the installed cards.
+
+If you have other cards, with support compiled into the kernel, then you can
+just repeat the options on the kernel command line, e.g.:
+LILO: linux com20020=0x2e0 com20020=0x380 com90io=0x260
+
+If you have the chipset support built as a loadable module, then you need to
+do something like this:
+ insmod -o arc0 com90xx
+ insmod -o arc1 com20020 io=0x2e0
+ insmod -o arc2 com90xx
+The ARCnet drivers will now sort out their names automatically.
+
+
+How do I get it to work with...?
+--------------------------------
+
+NFS: Should be fine linux->linux, just pretend you're using Ethernet cards.
+ oak.oakland.edu:/simtel/msdos/nfs has some nice DOS clients. There
+ is also a DOS-based NFS server called SOSS. It doesn't multitask
+ quite the way Linux does (actually, it doesn't multitask AT ALL) but
+ you never know what you might need.
+
+ With AmiTCP (and possibly others), you may need to set the following
+ options in your Amiga nfstab: MD 1024 MR 1024 MW 1024
+ (Thanks to Christian Gottschling <ferksy@indigo.tng.oche.de>
+ for this.)
+
+ Probably these refer to maximum NFS data/read/write block sizes. I
+ don't know why the defaults on the Amiga didn't work; write to me if
+ you know more.
+
+DOS: If you're using the freeware arcether.com, you might want to install
+ the driver patch from my web page. It helps with PC/TCP, and also
+ can get arcether to load if it timed out too quickly during
+ initialization. In fact, if you use it on a 386+ you REALLY need
+ the patch, really.
+
+Windows: See DOS :) Trumpet Winsock works fine with either the Novell or
+ Arcether client, assuming you remember to load winpkt of course.
+
+LAN Manager and Windows for Workgroups: These programs use protocols that
+ are incompatible with the Internet standard. They try to pretend
+ the cards are Ethernet, and confuse everyone else on the network.
+
+ However, v2.00 and higher of the Linux ARCnet driver supports this
+ protocol via the 'arc0e' device. See the section on "Multiprotocol
+ Support" for more information.
+
+ Using the freeware Samba server and clients for Linux, you can now
+ interface quite nicely with TCP/IP-based WfWg or Lan Manager
+ networks.
+
+Windows 95: Tools are included with Win95 that let you use either the LANMAN
+ style network drivers (NDIS) or Novell drivers (ODI) to handle your
+ ARCnet packets. If you use ODI, you'll need to use the 'arc0'
+ device with Linux. If you use NDIS, then try the 'arc0e' device.
+ See the "Multiprotocol Support" section below if you need arc0e,
+ you're completely insane, and/or you need to build some kind of
+ hybrid network that uses both encapsulation types.
+
+OS/2: I've been told it works under Warp Connect with an ARCnet driver from
+ SMC. You need to use the 'arc0e' interface for this. If you get
+ the SMC driver to work with the TCP/IP stuff included in the
+ "normal" Warp Bonus Pack, let me know.
+
+ ftp.microsoft.com also has a freeware "Lan Manager for OS/2" client
+ which should use the same protocol as WfWg does. I had no luck
+ installing it under Warp, however. Please mail me with any results.
+
+NetBSD/AmiTCP: These use an old version of the Internet standard ARCnet
+ protocol (RFC1051) which is compatible with the Linux driver v2.10
+ ALPHA and above using the arc0s device. (See "Multiprotocol ARCnet"
+ below.) ** Newer versions of NetBSD apparently support RFC1201.
+
+
+Using Multiprotocol ARCnet
+--------------------------
+
+The ARCnet driver v2.10 ALPHA supports three protocols, each on its own
+"virtual network device":
+
+ arc0 - RFC1201 protocol, the official Internet standard which just
+ happens to be 100% compatible with Novell's TRXNET driver.
+ Version 1.00 of the ARCnet driver supported _only_ this
+ protocol. arc0 is the fastest of the three protocols (for
+ whatever reason), and allows larger packets to be used
+ because it supports RFC1201 "packet splitting" operations.
+ Unless you have a specific need to use a different protocol,
+ I strongly suggest that you stick with this one.
+
+ arc0e - "Ethernet-Encapsulation" which sends packets over ARCnet
+ that are actually a lot like Ethernet packets, including the
+ 6-byte hardware addresses. This protocol is compatible with
+ Microsoft's NDIS ARCnet driver, like the one in WfWg and
+ LANMAN. Because the MTU of 493 is actually smaller than the
+ one "required" by TCP/IP (576), there is a chance that some
+ network operations will not function properly. The Linux
+ TCP/IP layer can compensate in most cases, however, by
+ automatically fragmenting the TCP/IP packets to make them
+ fit. arc0e also works slightly more slowly than arc0, for
+ reasons yet to be determined. (Probably it's the smaller
+ MTU that does it.)
+
+ arc0s - The "[s]imple" RFC1051 protocol is the "previous" Internet
+ standard that is completely incompatible with the new
+ standard. Some software today, however, continues to
+ support the old standard (and only the old standard)
+ including NetBSD and AmiTCP. RFC1051 also does not support
+ RFC1201's packet splitting, and the MTU of 507 is still
+ smaller than the Internet "requirement," so it's quite
+ possible that you may run into problems. It's also slower
+ than RFC1201 by about 25%, for the same reason as arc0e.
+
+ The arc0s support was contributed by Tomasz Motylewski
+ and modified somewhat by me. Bugs are probably my fault.
+
+You can choose not to compile arc0e and arc0s into the driver if you want -
+this will save you a bit of memory and avoid confusion when eg. trying to
+use the "NFS-root" stuff in recent Linux kernels.
+
+The arc0e and arc0s devices are created automatically when you first
+ifconfig the arc0 device. To actually use them, though, you need to also
+ifconfig the other virtual devices you need. There are a number of ways you
+can set up your network then:
+
+
+1. Single Protocol.
+
+ This is the simplest way to configure your network: use just one of the
+ two available protocols. As mentioned above, it's a good idea to use
+ only arc0 unless you have a good reason (like some other software, ie.
+ WfWg, that only works with arc0e).
+
+ If you need only arc0, then the following commands should get you going:
+ ifconfig arc0 MY.IP.ADD.RESS
+ route add MY.IP.ADD.RESS arc0
+ route add -net SUB.NET.ADD.RESS arc0
+ [add other local routes here]
+
+ If you need arc0e (and only arc0e), it's a little different:
+ ifconfig arc0 MY.IP.ADD.RESS
+ ifconfig arc0e MY.IP.ADD.RESS
+ route add MY.IP.ADD.RESS arc0e
+ route add -net SUB.NET.ADD.RESS arc0e
+
+ arc0s works much the same way as arc0e.
+
+
+2. More than one protocol on the same wire.
+
+ Now things start getting confusing. To even try it, you may need to be
+ partly crazy. Here's what *I* did. :) Note that I don't include arc0s in
+ my home network; I don't have any NetBSD or AmiTCP computers, so I only
+ use arc0s during limited testing.
+
+ I have three computers on my home network; two Linux boxes (which prefer
+ RFC1201 protocol, for reasons listed above), and one XT that can't run
+ Linux but runs the free Microsoft LANMAN Client instead.
+
+ Worse, one of the Linux computers (freedom) also has a modem and acts as
+ a router to my Internet provider. The other Linux box (insight) also has
+ its own IP address and needs to use freedom as its default gateway. The
+ XT (patience), however, does not have its own Internet IP address and so
+ I assigned it one on a "private subnet" (as defined by RFC1597).
+
+ To start with, take a simple network with just insight and freedom.
+ Insight needs to:
+ - talk to freedom via RFC1201 (arc0) protocol, because I like it
+ more and it's faster.
+ - use freedom as its Internet gateway.
+
+ That's pretty easy to do. Set up insight like this:
+ ifconfig arc0 insight
+ route add insight arc0
+ route add freedom arc0 /* I would use the subnet here (like I said
+ to to in "single protocol" above),
+ but the rest of the subnet
+ unfortunately lies across the PPP
+ link on freedom, which confuses
+ things. */
+ route add default gw freedom
+
+ And freedom gets configured like so:
+ ifconfig arc0 freedom
+ route add freedom arc0
+ route add insight arc0
+ /* and default gateway is configured by pppd */
+
+ Great, now insight talks to freedom directly on arc0, and sends packets
+ to the Internet through freedom. If you didn't know how to do the above,
+ you should probably stop reading this section now because it only gets
+ worse.
+
+ Now, how do I add patience into the network? It will be using LANMAN
+ Client, which means I need the arc0e device. It needs to be able to talk
+ to both insight and freedom, and also use freedom as a gateway to the
+ Internet. (Recall that patience has a "private IP address" which won't
+ work on the Internet; that's okay, I configured Linux IP masquerading on
+ freedom for this subnet).
+
+ So patience (necessarily; I don't have another IP number from my
+ provider) has an IP address on a different subnet than freedom and
+ insight, but needs to use freedom as an Internet gateway. Worse, most
+ DOS networking programs, including LANMAN, have braindead networking
+ schemes that rely completely on the netmask and a 'default gateway' to
+ determine how to route packets. This means that to get to freedom or
+ insight, patience WILL send through its default gateway, regardless of
+ the fact that both freedom and insight (courtesy of the arc0e device)
+ could understand a direct transmission.
+
+ I compensate by giving freedom an extra IP address - aliased 'gatekeeper'
+ - that is on my private subnet, the same subnet that patience is on. I
+ then define gatekeeper to be the default gateway for patience.
+
+ To configure freedom (in addition to the commands above):
+ ifconfig arc0e gatekeeper
+ route add gatekeeper arc0e
+ route add patience arc0e
+
+ This way, freedom will send all packets for patience through arc0e,
+ giving its IP address as gatekeeper (on the private subnet). When it
+ talks to insight or the Internet, it will use its "freedom" Internet IP
+ address.
+
+ You will notice that we haven't configured the arc0e device on insight.
+ This would work, but is not really necessary, and would require me to
+ assign insight another special IP number from my private subnet. Since
+ both insight and patience are using freedom as their default gateway, the
+ two can already talk to each other.
+
+ It's quite fortunate that I set things up like this the first time (cough
+ cough) because it's really handy when I boot insight into DOS. There, it
+ runs the Novell ODI protocol stack, which only works with RFC1201 ARCnet.
+ In this mode it would be impossible for insight to communicate directly
+ with patience, since the Novell stack is incompatible with Microsoft's
+ Ethernet-Encap. Without changing any settings on freedom or patience, I
+ simply set freedom as the default gateway for insight (now in DOS,
+ remember) and all the forwarding happens "automagically" between the two
+ hosts that would normally not be able to communicate at all.
+
+ For those who like diagrams, I have created two "virtual subnets" on the
+ same physical ARCnet wire. You can picture it like this:
+
+
+ [RFC1201 NETWORK] [ETHER-ENCAP NETWORK]
+ (registered Internet subnet) (RFC1597 private subnet)
+
+ (IP Masquerade)
+ /---------------\ * /---------------\
+ | | * | |
+ | +-Freedom-*-Gatekeeper-+ |
+ | | | * | |
+ \-------+-------/ | * \-------+-------/
+ | | |
+ Insight | Patience
+ (Internet)
+
+
+
+It works: what now?
+-------------------
+
+Send mail describing your setup, preferably including driver version, kernel
+version, ARCnet card model, CPU type, number of systems on your network, and
+list of software in use to me at the following address:
+ apenwarr@worldvisions.ca
+
+I do send (sometimes automated) replies to all messages I receive. My email
+can be weird (and also usually gets forwarded all over the place along the
+way to me), so if you don't get a reply within a reasonable time, please
+resend.
+
+
+It doesn't work: what now?
+--------------------------
+
+Do the same as above, but also include the output of the ifconfig and route
+commands, as well as any pertinent log entries (ie. anything that starts
+with "arcnet:" and has shown up since the last reboot) in your mail.
+
+If you want to try fixing it yourself (I strongly recommend that you mail me
+about the problem first, since it might already have been solved) you may
+want to try some of the debug levels available. For heavy testing on
+D_DURING or more, it would be a REALLY good idea to kill your klogd daemon
+first! D_DURING displays 4-5 lines for each packet sent or received. D_TX,
+D_RX, and D_SKB actually DISPLAY each packet as it is sent or received,
+which is obviously quite big.
+
+Starting with v2.40 ALPHA, the autoprobe routines have changed
+significantly. In particular, they won't tell you why the card was not
+found unless you turn on the D_INIT_REASONS debugging flag.
+
+Once the driver is running, you can run the arcdump shell script (available
+from me or in the full ARCnet package, if you have it) as root to list the
+contents of the arcnet buffers at any time. To make any sense at all out of
+this, you should grab the pertinent RFCs. (some are listed near the top of
+arcnet.c). arcdump assumes your card is at 0xD0000. If it isn't, edit the
+script.
+
+Buffers 0 and 1 are used for receiving, and Buffers 2 and 3 are for sending.
+Ping-pong buffers are implemented both ways.
+
+If your debug level includes D_DURING and you did NOT define SLOW_XMIT_COPY,
+the buffers are cleared to a constant value of 0x42 every time the card is
+reset (which should only happen when you do an ifconfig up, or when Linux
+decides that the driver is broken). During a transmit, unused parts of the
+buffer will be cleared to 0x42 as well. This is to make it easier to figure
+out which bytes are being used by a packet.
+
+You can change the debug level without recompiling the kernel by typing:
+ ifconfig arc0 down metric 1xxx
+ /etc/rc.d/rc.inet1
+where "xxx" is the debug level you want. For example, "metric 1015" would put
+you at debug level 15. Debug level 7 is currently the default.
+
+Note that the debug level is (starting with v1.90 ALPHA) a binary
+combination of different debug flags; so debug level 7 is really 1+2+4 or
+D_NORMAL+D_EXTRA+D_INIT. To include D_DURING, you would add 16 to this,
+resulting in debug level 23.
+
+If you don't understand that, you probably don't want to know anyway.
+E-mail me about your problem.
+
+
+I want to send money: what now?
+-------------------------------
+
+Go take a nap or something. You'll feel better in the morning.
diff --git a/Documentation/networking/atm.txt b/Documentation/networking/atm.txt
new file mode 100644
index 000000000..82921cee7
--- /dev/null
+++ b/Documentation/networking/atm.txt
@@ -0,0 +1,8 @@
+In order to use anything but the most primitive functions of ATM,
+several user-mode programs are required to assist the kernel. These
+programs and related material can be found via the ATM on Linux Web
+page at http://linux-atm.sourceforge.net/
+
+If you encounter problems with ATM, please report them on the ATM
+on Linux mailing list. Subscription information, archives, etc.,
+can be found on http://linux-atm.sourceforge.net/
diff --git a/Documentation/networking/ax25.txt b/Documentation/networking/ax25.txt
new file mode 100644
index 000000000..8257dbf9b
--- /dev/null
+++ b/Documentation/networking/ax25.txt
@@ -0,0 +1,10 @@
+To use the amateur radio protocols within Linux you will need to get a
+suitable copy of the AX.25 Utilities. More detailed information about
+AX.25, NET/ROM and ROSE, associated programs and and utilities can be
+found on http://www.linux-ax25.org.
+
+There is an active mailing list for discussing Linux amateur radio matters
+called linux-hams@vger.kernel.org. To subscribe to it, send a message to
+majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body
+of the message, the subject field is ignored. You don't need to be
+subscribed to post but of course that means you might miss an answer.
diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt
new file mode 100644
index 000000000..58e49042f
--- /dev/null
+++ b/Documentation/networking/batman-adv.txt
@@ -0,0 +1,202 @@
+BATMAN-ADV
+----------
+
+Batman advanced is a new approach to wireless networking which
+does no longer operate on the IP basis. Unlike the batman daemon,
+which exchanges information using UDP packets and sets routing
+tables, batman-advanced operates on ISO/OSI Layer 2 only and uses
+and routes (or better: bridges) Ethernet Frames. It emulates a
+virtual network switch of all nodes participating. Therefore all
+nodes appear to be link local, thus all higher operating proto-
+cols won't be affected by any changes within the network. You can
+run almost any protocol above batman advanced, prominent examples
+are: IPv4, IPv6, DHCP, IPX.
+
+Batman advanced was implemented as a Linux kernel driver to re-
+duce the overhead to a minimum. It does not depend on any (other)
+network driver, and can be used on wifi as well as ethernet lan,
+vpn, etc ... (anything with ethernet-style layer 2).
+
+
+CONFIGURATION
+-------------
+
+Load the batman-adv module into your kernel:
+
+# insmod batman-adv.ko
+
+The module is now waiting for activation. You must add some in-
+terfaces on which batman can operate. After loading the module
+batman advanced will scan your systems interfaces to search for
+compatible interfaces. Once found, it will create subfolders in
+the /sys directories of each supported interface, e.g.
+
+# ls /sys/class/net/eth0/batman_adv/
+# iface_status mesh_iface
+
+If an interface does not have the "batman_adv" subfolder it prob-
+ably is not supported. Not supported interfaces are: loopback,
+non-ethernet and batman's own interfaces.
+
+Note: After the module was loaded it will continuously watch for
+new interfaces to verify the compatibility. There is no need to
+reload the module if you plug your USB wifi adapter into your ma-
+chine after batman advanced was initially loaded.
+
+To activate a given interface simply write "bat0" into its
+"mesh_iface" file inside the batman_adv subfolder:
+
+# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface
+
+Repeat this step for all interfaces you wish to add. Now batman
+starts using/broadcasting on this/these interface(s).
+
+By reading the "iface_status" file you can check its status:
+
+# cat /sys/class/net/eth0/batman_adv/iface_status
+# active
+
+To deactivate an interface you have to write "none" into its
+"mesh_iface" file:
+
+# echo none > /sys/class/net/eth0/batman_adv/mesh_iface
+
+
+All mesh wide settings can be found in batman's own interface
+folder:
+
+# ls /sys/class/net/bat0/mesh/
+#aggregated_ogms distributed_arp_table gw_sel_class orig_interval
+#ap_isolation fragmentation hop_penalty routing_algo
+#bonding gw_bandwidth isolation_mark vlan0
+#bridge_loop_avoidance gw_mode log_level
+
+There is a special folder for debugging information:
+
+# ls /sys/kernel/debug/batman_adv/bat0/
+# bla_backbone_table log transtable_global
+# bla_claim_table originators transtable_local
+# gateways socket
+
+Some of the files contain all sort of status information regard-
+ing the mesh network. For example, you can view the table of
+originators (mesh participants) with:
+
+# cat /sys/kernel/debug/batman_adv/bat0/originators
+
+Other files allow to change batman's behaviour to better fit your
+requirements. For instance, you can check the current originator
+interval (value in milliseconds which determines how often batman
+sends its broadcast packets):
+
+# cat /sys/class/net/bat0/mesh/orig_interval
+# 1000
+
+and also change its value:
+
+# echo 3000 > /sys/class/net/bat0/mesh/orig_interval
+
+In very mobile scenarios, you might want to adjust the originator
+interval to a lower value. This will make the mesh more respon-
+sive to topology changes, but will also increase the overhead.
+
+
+USAGE
+-----
+
+To make use of your newly created mesh, batman advanced provides
+a new interface "bat0" which you should use from this point on.
+All interfaces added to batman advanced are not relevant any
+longer because batman handles them for you. Basically, one "hands
+over" the data by using the batman interface and batman will make
+sure it reaches its destination.
+
+The "bat0" interface can be used like any other regular inter-
+face. It needs an IP address which can be either statically con-
+figured or dynamically (by using DHCP or similar services):
+
+# NodeA: ifconfig bat0 192.168.0.1
+# NodeB: ifconfig bat0 192.168.0.2
+# NodeB: ping 192.168.0.1
+
+Note: In order to avoid problems remove all IP addresses previ-
+ously assigned to interfaces now used by batman advanced, e.g.
+
+# ifconfig eth0 0.0.0.0
+
+
+LOGGING/DEBUGGING
+-----------------
+
+All error messages, warnings and information messages are sent to
+the kernel log. Depending on your operating system distribution
+this can be read in one of a number of ways. Try using the com-
+mands: dmesg, logread, or looking in the files /var/log/kern.log
+or /var/log/syslog. All batman-adv messages are prefixed with
+"batman-adv:" So to see just these messages try
+
+# dmesg | grep batman-adv
+
+When investigating problems with your mesh network it is some-
+times necessary to see more detail debug messages. This must be
+enabled when compiling the batman-adv module. When building bat-
+man-adv as part of kernel, use "make menuconfig" and enable the
+option "B.A.T.M.A.N. debugging".
+
+Those additional debug messages can be accessed using a special
+file in debugfs
+
+# cat /sys/kernel/debug/batman_adv/bat0/log
+
+The additional debug output is by default disabled. It can be en-
+abled during run time. Following log_levels are defined:
+
+0 - All debug output disabled
+1 - Enable messages related to routing / flooding / broadcasting
+2 - Enable messages related to route added / changed / deleted
+4 - Enable messages related to translation table operations
+8 - Enable messages related to bridge loop avoidance
+16 - Enable messaged related to DAT, ARP snooping and parsing
+31 - Enable all messages
+
+The debug output can be changed at runtime using the file
+/sys/class/net/bat0/mesh/log_level. e.g.
+
+# echo 6 > /sys/class/net/bat0/mesh/log_level
+
+will enable debug messages for when routes change.
+
+Counters for different types of packets entering and leaving the
+batman-adv module are available through ethtool:
+
+# ethtool --statistics bat0
+
+
+BATCTL
+------
+
+As batman advanced operates on layer 2 all hosts participating in
+the virtual switch are completely transparent for all protocols
+above layer 2. Therefore the common diagnosis tools do not work
+as expected. To overcome these problems batctl was created. At
+the moment the batctl contains ping, traceroute, tcpdump and
+interfaces to the kernel module settings.
+
+For more information, please see the manpage (man batctl).
+
+batctl is available on http://www.open-mesh.org/
+
+
+CONTACT
+-------
+
+Please send us comments, experiences, questions, anything :)
+
+IRC: #batman on irc.freenode.org
+Mailing-list: b.a.t.m.a.n@open-mesh.org (optional subscription
+ at https://lists.open-mesh.org/mm/listinfo/b.a.t.m.a.n)
+
+You can also contact the Authors:
+
+Marek Lindner <mareklindner@neomailbox.ch>
+Simon Wunderlich <sw@simonwunderlich.de>
diff --git a/Documentation/networking/baycom.txt b/Documentation/networking/baycom.txt
new file mode 100644
index 000000000..688f18fd4
--- /dev/null
+++ b/Documentation/networking/baycom.txt
@@ -0,0 +1,158 @@
+ LINUX DRIVERS FOR BAYCOM MODEMS
+
+ Thomas M. Sailer, HB9JNX/AE4WA, <sailer@ife.ee.ethz.ch>
+
+!!NEW!! (04/98) The drivers for the baycom modems have been split into
+separate drivers as they did not share any code, and the driver
+and device names have changed.
+
+This document describes the Linux Kernel Drivers for simple Baycom style
+amateur radio modems.
+
+The following drivers are available:
+
+baycom_ser_fdx:
+ This driver supports the SER12 modems either full or half duplex.
+ Its baud rate may be changed via the `baud' module parameter,
+ therefore it supports just about every bit bang modem on a
+ serial port. Its devices are called bcsf0 through bcsf3.
+ This is the recommended driver for SER12 type modems,
+ however if you have a broken UART clone that does not have working
+ delta status bits, you may try baycom_ser_hdx.
+
+baycom_ser_hdx:
+ This is an alternative driver for SER12 type modems.
+ It only supports half duplex, and only 1200 baud. Its devices
+ are called bcsh0 through bcsh3. Use this driver only if baycom_ser_fdx
+ does not work with your UART.
+
+baycom_par:
+ This driver supports the par96 and picpar modems.
+ Its devices are called bcp0 through bcp3.
+
+baycom_epp:
+ This driver supports the EPP modem.
+ Its devices are called bce0 through bce3.
+ This driver is work-in-progress.
+
+The following modems are supported:
+
+ser12: This is a very simple 1200 baud AFSK modem. The modem consists only
+ of a modulator/demodulator chip, usually a TI TCM3105. The computer
+ is responsible for regenerating the receiver bit clock, as well as
+ for handling the HDLC protocol. The modem connects to a serial port,
+ hence the name. Since the serial port is not used as an async serial
+ port, the kernel driver for serial ports cannot be used, and this
+ driver only supports standard serial hardware (8250, 16450, 16550)
+
+par96: This is a modem for 9600 baud FSK compatible to the G3RUH standard.
+ The modem does all the filtering and regenerates the receiver clock.
+ Data is transferred from and to the PC via a shift register.
+ The shift register is filled with 16 bits and an interrupt is signalled.
+ The PC then empties the shift register in a burst. This modem connects
+ to the parallel port, hence the name. The modem leaves the
+ implementation of the HDLC protocol and the scrambler polynomial to
+ the PC.
+
+picpar: This is a redesign of the par96 modem by Henning Rech, DF9IC. The modem
+ is protocol compatible to par96, but uses only three low power ICs
+ and can therefore be fed from the parallel port and does not require
+ an additional power supply. Furthermore, it incorporates a carrier
+ detect circuitry.
+
+EPP: This is a high-speed modem adaptor that connects to an enhanced parallel port.
+ Its target audience is users working over a high speed hub (76.8kbit/s).
+
+eppfpga: This is a redesign of the EPP adaptor.
+
+
+
+All of the above modems only support half duplex communications. However,
+the driver supports the KISS (see below) fullduplex command. It then simply
+starts to send as soon as there's a packet to transmit and does not care
+about DCD, i.e. it starts to send even if there's someone else on the channel.
+This command is required by some implementations of the DAMA channel
+access protocol.
+
+
+The Interface of the drivers
+
+Unlike previous drivers, these drivers are no longer character devices,
+but they are now true kernel network interfaces. Installation is therefore
+simple. Once installed, four interfaces named bc{sf,sh,p,e}[0-3] are available.
+sethdlc from the ax25 utilities may be used to set driver states etc.
+Users of userland AX.25 stacks may use the net2kiss utility (also available
+in the ax25 utilities package) to convert packets of a network interface
+to a KISS stream on a pseudo tty. There's also a patch available from
+me for WAMPES which allows attaching a kernel network interface directly.
+
+
+Configuring the driver
+
+Every time a driver is inserted into the kernel, it has to know which
+modems it should access at which ports. This can be done with the setbaycom
+utility. If you are only using one modem, you can also configure the
+driver from the insmod command line (or by means of an option line in
+/etc/modprobe.d/*.conf).
+
+Examples:
+ modprobe baycom_ser_fdx mode="ser12*" iobase=0x3f8 irq=4
+ sethdlc -i bcsf0 -p mode "ser12*" io 0x3f8 irq 4
+
+Both lines configure the first port to drive a ser12 modem at the first
+serial port (COM1 under DOS). The * in the mode parameter instructs the driver to use
+the software DCD algorithm (see below).
+
+ insmod baycom_par mode="picpar" iobase=0x378
+ sethdlc -i bcp0 -p mode "picpar" io 0x378
+
+Both lines configure the first port to drive a picpar modem at the
+first parallel port (LPT1 under DOS). (Note: picpar implies
+hardware DCD, par96 implies software DCD).
+
+The channel access parameters can be set with sethdlc -a or kissparms.
+Note that both utilities interpret the values slightly differently.
+
+
+Hardware DCD versus Software DCD
+
+To avoid collisions on the air, the driver must know when the channel is
+busy. This is the task of the DCD circuitry/software. The driver may either
+utilise a software DCD algorithm (options=1) or use a DCD signal from
+the hardware (options=0).
+
+ser12: if software DCD is utilised, the radio's squelch should always be
+ open. It is highly recommended to use the software DCD algorithm,
+ as it is much faster than most hardware squelch circuitry. The
+ disadvantage is a slightly higher load on the system.
+
+par96: the software DCD algorithm for this type of modem is rather poor.
+ The modem simply does not provide enough information to implement
+ a reasonable DCD algorithm in software. Therefore, if your radio
+ feeds the DCD input of the PAR96 modem, the use of the hardware
+ DCD circuitry is recommended.
+
+picpar: the picpar modem features a builtin DCD hardware, which is highly
+ recommended.
+
+
+
+Compatibility with the rest of the Linux kernel
+
+The serial driver and the baycom serial drivers compete
+for the same hardware resources. Of course only one driver can access a given
+interface at a time. The serial driver grabs all interfaces it can find at
+startup time. Therefore the baycom drivers subsequently won't be able to
+access a serial port. You might therefore find it necessary to release
+a port owned by the serial driver with 'setserial /dev/ttyS# uart none', where
+# is the number of the interface. The baycom drivers do not reserve any
+ports at startup, unless one is specified on the 'insmod' command line. Another
+method to solve the problem is to compile all drivers as modules and
+leave it to kmod to load the correct driver depending on the application.
+
+The parallel port drivers (baycom_par, baycom_epp) now use the parport subsystem
+to arbitrate the ports between different client drivers.
+
+vy 73s de
+Tom Sailer, sailer@ife.ee.ethz.ch
+hb9jnx @ hb9w.ampr.org
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
new file mode 100644
index 000000000..83bf4986b
--- /dev/null
+++ b/Documentation/networking/bonding.txt
@@ -0,0 +1,2743 @@
+
+ Linux Ethernet Bonding Driver HOWTO
+
+ Latest update: 27 April 2011
+
+Initial release : Thomas Davis <tadavis at lbl.gov>
+Corrections, HA extensions : 2000/10/03-15 :
+ - Willy Tarreau <willy at meta-x.org>
+ - Constantine Gavrilov <const-g at xpert.com>
+ - Chad N. Tindel <ctindel at ieee dot org>
+ - Janice Girouard <girouard at us dot ibm dot com>
+ - Jay Vosburgh <fubar at us dot ibm dot com>
+
+Reorganized and updated Feb 2005 by Jay Vosburgh
+Added Sysfs information: 2006/04/24
+ - Mitch Williams <mitch.a.williams at intel.com>
+
+Introduction
+============
+
+ The Linux bonding driver provides a method for aggregating
+multiple network interfaces into a single logical "bonded" interface.
+The behavior of the bonded interfaces depends upon the mode; generally
+speaking, modes provide either hot standby or load balancing services.
+Additionally, link integrity monitoring may be performed.
+
+ The bonding driver originally came from Donald Becker's
+beowulf patches for kernel 2.0. It has changed quite a bit since, and
+the original tools from extreme-linux and beowulf sites will not work
+with this version of the driver.
+
+ For new versions of the driver, updated userspace tools, and
+who to ask for help, please follow the links at the end of this file.
+
+Table of Contents
+=================
+
+1. Bonding Driver Installation
+
+2. Bonding Driver Options
+
+3. Configuring Bonding Devices
+3.1 Configuration with Sysconfig Support
+3.1.1 Using DHCP with Sysconfig
+3.1.2 Configuring Multiple Bonds with Sysconfig
+3.2 Configuration with Initscripts Support
+3.2.1 Using DHCP with Initscripts
+3.2.2 Configuring Multiple Bonds with Initscripts
+3.3 Configuring Bonding Manually with Ifenslave
+3.3.1 Configuring Multiple Bonds Manually
+3.4 Configuring Bonding Manually via Sysfs
+3.5 Configuration with Interfaces Support
+3.6 Overriding Configuration for Special Cases
+
+4. Querying Bonding Configuration
+4.1 Bonding Configuration
+4.2 Network Configuration
+
+5. Switch Configuration
+
+6. 802.1q VLAN Support
+
+7. Link Monitoring
+7.1 ARP Monitor Operation
+7.2 Configuring Multiple ARP Targets
+7.3 MII Monitor Operation
+
+8. Potential Trouble Sources
+8.1 Adventures in Routing
+8.2 Ethernet Device Renaming
+8.3 Painfully Slow Or No Failed Link Detection By Miimon
+
+9. SNMP agents
+
+10. Promiscuous mode
+
+11. Configuring Bonding for High Availability
+11.1 High Availability in a Single Switch Topology
+11.2 High Availability in a Multiple Switch Topology
+11.2.1 HA Bonding Mode Selection for Multiple Switch Topology
+11.2.2 HA Link Monitoring for Multiple Switch Topology
+
+12. Configuring Bonding for Maximum Throughput
+12.1 Maximum Throughput in a Single Switch Topology
+12.1.1 MT Bonding Mode Selection for Single Switch Topology
+12.1.2 MT Link Monitoring for Single Switch Topology
+12.2 Maximum Throughput in a Multiple Switch Topology
+12.2.1 MT Bonding Mode Selection for Multiple Switch Topology
+12.2.2 MT Link Monitoring for Multiple Switch Topology
+
+13. Switch Behavior Issues
+13.1 Link Establishment and Failover Delays
+13.2 Duplicated Incoming Packets
+
+14. Hardware Specific Considerations
+14.1 IBM BladeCenter
+
+15. Frequently Asked Questions
+
+16. Resources and Links
+
+
+1. Bonding Driver Installation
+==============================
+
+ Most popular distro kernels ship with the bonding driver
+already available as a module. If your distro does not, or you
+have need to compile bonding from source (e.g., configuring and
+installing a mainline kernel from kernel.org), you'll need to perform
+the following steps:
+
+1.1 Configure and build the kernel with bonding
+-----------------------------------------------
+
+ The current version of the bonding driver is available in the
+drivers/net/bonding subdirectory of the most recent kernel source
+(which is available on http://kernel.org). Most users "rolling their
+own" will want to use the most recent kernel from kernel.org.
+
+ Configure kernel with "make menuconfig" (or "make xconfig" or
+"make config"), then select "Bonding driver support" in the "Network
+device support" section. It is recommended that you configure the
+driver as module since it is currently the only way to pass parameters
+to the driver or configure more than one bonding device.
+
+ Build and install the new kernel and modules.
+
+1.2 Bonding Control Utility
+-------------------------------------
+
+ It is recommended to configure bonding via iproute2 (netlink)
+or sysfs, the old ifenslave control utility is obsolete.
+
+2. Bonding Driver Options
+=========================
+
+ Options for the bonding driver are supplied as parameters to the
+bonding module at load time, or are specified via sysfs.
+
+ Module options may be given as command line arguments to the
+insmod or modprobe command, but are usually specified in either the
+/etc/modrobe.d/*.conf configuration files, or in a distro-specific
+configuration file (some of which are detailed in the next section).
+
+ Details on bonding support for sysfs is provided in the
+"Configuring Bonding Manually via Sysfs" section, below.
+
+ The available bonding driver parameters are listed below. If a
+parameter is not specified the default value is used. When initially
+configuring a bond, it is recommended "tail -f /var/log/messages" be
+run in a separate window to watch for bonding driver error messages.
+
+ It is critical that either the miimon or arp_interval and
+arp_ip_target parameters be specified, otherwise serious network
+degradation will occur during link failures. Very few devices do not
+support at least miimon, so there is really no reason not to use it.
+
+ Options with textual values will accept either the text name
+or, for backwards compatibility, the option value. E.g.,
+"mode=802.3ad" and "mode=4" set the same mode.
+
+ The parameters are as follows:
+
+active_slave
+
+ Specifies the new active slave for modes that support it
+ (active-backup, balance-alb and balance-tlb). Possible values
+ are the name of any currently enslaved interface, or an empty
+ string. If a name is given, the slave and its link must be up in order
+ to be selected as the new active slave. If an empty string is
+ specified, the current active slave is cleared, and a new active
+ slave is selected automatically.
+
+ Note that this is only available through the sysfs interface. No module
+ parameter by this name exists.
+
+ The normal value of this option is the name of the currently
+ active slave, or the empty string if there is no active slave or
+ the current mode does not use an active slave.
+
+ad_select
+
+ Specifies the 802.3ad aggregation selection logic to use. The
+ possible values and their effects are:
+
+ stable or 0
+
+ The active aggregator is chosen by largest aggregate
+ bandwidth.
+
+ Reselection of the active aggregator occurs only when all
+ slaves of the active aggregator are down or the active
+ aggregator has no slaves.
+
+ This is the default value.
+
+ bandwidth or 1
+
+ The active aggregator is chosen by largest aggregate
+ bandwidth. Reselection occurs if:
+
+ - A slave is added to or removed from the bond
+
+ - Any slave's link state changes
+
+ - Any slave's 802.3ad association state changes
+
+ - The bond's administrative state changes to up
+
+ count or 2
+
+ The active aggregator is chosen by the largest number of
+ ports (slaves). Reselection occurs as described under the
+ "bandwidth" setting, above.
+
+ The bandwidth and count selection policies permit failover of
+ 802.3ad aggregations when partial failure of the active aggregator
+ occurs. This keeps the aggregator with the highest availability
+ (either in bandwidth or in number of ports) active at all times.
+
+ This option was added in bonding version 3.4.0.
+
+all_slaves_active
+
+ Specifies that duplicate frames (received on inactive ports) should be
+ dropped (0) or delivered (1).
+
+ Normally, bonding will drop duplicate frames (received on inactive
+ ports), which is desirable for most users. But there are some times
+ it is nice to allow duplicate frames to be delivered.
+
+ The default value is 0 (drop duplicate frames received on inactive
+ ports).
+
+arp_interval
+
+ Specifies the ARP link monitoring frequency in milliseconds.
+
+ The ARP monitor works by periodically checking the slave
+ devices to determine whether they have sent or received
+ traffic recently (the precise criteria depends upon the
+ bonding mode, and the state of the slave). Regular traffic is
+ generated via ARP probes issued for the addresses specified by
+ the arp_ip_target option.
+
+ This behavior can be modified by the arp_validate option,
+ below.
+
+ If ARP monitoring is used in an etherchannel compatible mode
+ (modes 0 and 2), the switch should be configured in a mode
+ that evenly distributes packets across all links. If the
+ switch is configured to distribute the packets in an XOR
+ fashion, all replies from the ARP targets will be received on
+ the same link which could cause the other team members to
+ fail. ARP monitoring should not be used in conjunction with
+ miimon. A value of 0 disables ARP monitoring. The default
+ value is 0.
+
+arp_ip_target
+
+ Specifies the IP addresses to use as ARP monitoring peers when
+ arp_interval is > 0. These are the targets of the ARP request
+ sent to determine the health of the link to the targets.
+ Specify these values in ddd.ddd.ddd.ddd format. Multiple IP
+ addresses must be separated by a comma. At least one IP
+ address must be given for ARP monitoring to function. The
+ maximum number of targets that can be specified is 16. The
+ default value is no IP addresses.
+
+arp_validate
+
+ Specifies whether or not ARP probes and replies should be
+ validated in any mode that supports arp monitoring, or whether
+ non-ARP traffic should be filtered (disregarded) for link
+ monitoring purposes.
+
+ Possible values are:
+
+ none or 0
+
+ No validation or filtering is performed.
+
+ active or 1
+
+ Validation is performed only for the active slave.
+
+ backup or 2
+
+ Validation is performed only for backup slaves.
+
+ all or 3
+
+ Validation is performed for all slaves.
+
+ filter or 4
+
+ Filtering is applied to all slaves. No validation is
+ performed.
+
+ filter_active or 5
+
+ Filtering is applied to all slaves, validation is performed
+ only for the active slave.
+
+ filter_backup or 6
+
+ Filtering is applied to all slaves, validation is performed
+ only for backup slaves.
+
+ Validation:
+
+ Enabling validation causes the ARP monitor to examine the incoming
+ ARP requests and replies, and only consider a slave to be up if it
+ is receiving the appropriate ARP traffic.
+
+ For an active slave, the validation checks ARP replies to confirm
+ that they were generated by an arp_ip_target. Since backup slaves
+ do not typically receive these replies, the validation performed
+ for backup slaves is on the broadcast ARP request sent out via the
+ active slave. It is possible that some switch or network
+ configurations may result in situations wherein the backup slaves
+ do not receive the ARP requests; in such a situation, validation
+ of backup slaves must be disabled.
+
+ The validation of ARP requests on backup slaves is mainly helping
+ bonding to decide which slaves are more likely to work in case of
+ the active slave failure, it doesn't really guarantee that the
+ backup slave will work if it's selected as the next active slave.
+
+ Validation is useful in network configurations in which multiple
+ bonding hosts are concurrently issuing ARPs to one or more targets
+ beyond a common switch. Should the link between the switch and
+ target fail (but not the switch itself), the probe traffic
+ generated by the multiple bonding instances will fool the standard
+ ARP monitor into considering the links as still up. Use of
+ validation can resolve this, as the ARP monitor will only consider
+ ARP requests and replies associated with its own instance of
+ bonding.
+
+ Filtering:
+
+ Enabling filtering causes the ARP monitor to only use incoming ARP
+ packets for link availability purposes. Arriving packets that are
+ not ARPs are delivered normally, but do not count when determining
+ if a slave is available.
+
+ Filtering operates by only considering the reception of ARP
+ packets (any ARP packet, regardless of source or destination) when
+ determining if a slave has received traffic for link availability
+ purposes.
+
+ Filtering is useful in network configurations in which significant
+ levels of third party broadcast traffic would fool the standard
+ ARP monitor into considering the links as still up. Use of
+ filtering can resolve this, as only ARP traffic is considered for
+ link availability purposes.
+
+ This option was added in bonding version 3.1.0.
+
+arp_all_targets
+
+ Specifies the quantity of arp_ip_targets that must be reachable
+ in order for the ARP monitor to consider a slave as being up.
+ This option affects only active-backup mode for slaves with
+ arp_validation enabled.
+
+ Possible values are:
+
+ any or 0
+
+ consider the slave up only when any of the arp_ip_targets
+ is reachable
+
+ all or 1
+
+ consider the slave up only when all of the arp_ip_targets
+ are reachable
+
+downdelay
+
+ Specifies the time, in milliseconds, to wait before disabling
+ a slave after a link failure has been detected. This option
+ is only valid for the miimon link monitor. The downdelay
+ value should be a multiple of the miimon value; if not, it
+ will be rounded down to the nearest multiple. The default
+ value is 0.
+
+fail_over_mac
+
+ Specifies whether active-backup mode should set all slaves to
+ the same MAC address at enslavement (the traditional
+ behavior), or, when enabled, perform special handling of the
+ bond's MAC address in accordance with the selected policy.
+
+ Possible values are:
+
+ none or 0
+
+ This setting disables fail_over_mac, and causes
+ bonding to set all slaves of an active-backup bond to
+ the same MAC address at enslavement time. This is the
+ default.
+
+ active or 1
+
+ The "active" fail_over_mac policy indicates that the
+ MAC address of the bond should always be the MAC
+ address of the currently active slave. The MAC
+ address of the slaves is not changed; instead, the MAC
+ address of the bond changes during a failover.
+
+ This policy is useful for devices that cannot ever
+ alter their MAC address, or for devices that refuse
+ incoming broadcasts with their own source MAC (which
+ interferes with the ARP monitor).
+
+ The down side of this policy is that every device on
+ the network must be updated via gratuitous ARP,
+ vs. just updating a switch or set of switches (which
+ often takes place for any traffic, not just ARP
+ traffic, if the switch snoops incoming traffic to
+ update its tables) for the traditional method. If the
+ gratuitous ARP is lost, communication may be
+ disrupted.
+
+ When this policy is used in conjunction with the mii
+ monitor, devices which assert link up prior to being
+ able to actually transmit and receive are particularly
+ susceptible to loss of the gratuitous ARP, and an
+ appropriate updelay setting may be required.
+
+ follow or 2
+
+ The "follow" fail_over_mac policy causes the MAC
+ address of the bond to be selected normally (normally
+ the MAC address of the first slave added to the bond).
+ However, the second and subsequent slaves are not set
+ to this MAC address while they are in a backup role; a
+ slave is programmed with the bond's MAC address at
+ failover time (and the formerly active slave receives
+ the newly active slave's MAC address).
+
+ This policy is useful for multiport devices that
+ either become confused or incur a performance penalty
+ when multiple ports are programmed with the same MAC
+ address.
+
+
+ The default policy is none, unless the first slave cannot
+ change its MAC address, in which case the active policy is
+ selected by default.
+
+ This option may be modified via sysfs only when no slaves are
+ present in the bond.
+
+ This option was added in bonding version 3.2.0. The "follow"
+ policy was added in bonding version 3.3.0.
+
+lacp_rate
+
+ Option specifying the rate in which we'll ask our link partner
+ to transmit LACPDU packets in 802.3ad mode. Possible values
+ are:
+
+ slow or 0
+ Request partner to transmit LACPDUs every 30 seconds
+
+ fast or 1
+ Request partner to transmit LACPDUs every 1 second
+
+ The default is slow.
+
+max_bonds
+
+ Specifies the number of bonding devices to create for this
+ instance of the bonding driver. E.g., if max_bonds is 3, and
+ the bonding driver is not already loaded, then bond0, bond1
+ and bond2 will be created. The default value is 1. Specifying
+ a value of 0 will load bonding, but will not create any devices.
+
+miimon
+
+ Specifies the MII link monitoring frequency in milliseconds.
+ This determines how often the link state of each slave is
+ inspected for link failures. A value of zero disables MII
+ link monitoring. A value of 100 is a good starting point.
+ The use_carrier option, below, affects how the link state is
+ determined. See the High Availability section for additional
+ information. The default value is 0.
+
+min_links
+
+ Specifies the minimum number of links that must be active before
+ asserting carrier. It is similar to the Cisco EtherChannel min-links
+ feature. This allows setting the minimum number of member ports that
+ must be up (link-up state) before marking the bond device as up
+ (carrier on). This is useful for situations where higher level services
+ such as clustering want to ensure a minimum number of low bandwidth
+ links are active before switchover. This option only affect 802.3ad
+ mode.
+
+ The default value is 0. This will cause carrier to be asserted (for
+ 802.3ad mode) whenever there is an active aggregator, regardless of the
+ number of available links in that aggregator. Note that, because an
+ aggregator cannot be active without at least one available link,
+ setting this option to 0 or to 1 has the exact same effect.
+
+mode
+
+ Specifies one of the bonding policies. The default is
+ balance-rr (round robin). Possible values are:
+
+ balance-rr or 0
+
+ Round-robin policy: Transmit packets in sequential
+ order from the first available slave through the
+ last. This mode provides load balancing and fault
+ tolerance.
+
+ active-backup or 1
+
+ Active-backup policy: Only one slave in the bond is
+ active. A different slave becomes active if, and only
+ if, the active slave fails. The bond's MAC address is
+ externally visible on only one port (network adapter)
+ to avoid confusing the switch.
+
+ In bonding version 2.6.2 or later, when a failover
+ occurs in active-backup mode, bonding will issue one
+ or more gratuitous ARPs on the newly active slave.
+ One gratuitous ARP is issued for the bonding master
+ interface and each VLAN interfaces configured above
+ it, provided that the interface has at least one IP
+ address configured. Gratuitous ARPs issued for VLAN
+ interfaces are tagged with the appropriate VLAN id.
+
+ This mode provides fault tolerance. The primary
+ option, documented below, affects the behavior of this
+ mode.
+
+ balance-xor or 2
+
+ XOR policy: Transmit based on the selected transmit
+ hash policy. The default policy is a simple [(source
+ MAC address XOR'd with destination MAC address XOR
+ packet type ID) modulo slave count]. Alternate transmit
+ policies may be selected via the xmit_hash_policy option,
+ described below.
+
+ This mode provides load balancing and fault tolerance.
+
+ broadcast or 3
+
+ Broadcast policy: transmits everything on all slave
+ interfaces. This mode provides fault tolerance.
+
+ 802.3ad or 4
+
+ IEEE 802.3ad Dynamic link aggregation. Creates
+ aggregation groups that share the same speed and
+ duplex settings. Utilizes all slaves in the active
+ aggregator according to the 802.3ad specification.
+
+ Slave selection for outgoing traffic is done according
+ to the transmit hash policy, which may be changed from
+ the default simple XOR policy via the xmit_hash_policy
+ option, documented below. Note that not all transmit
+ policies may be 802.3ad compliant, particularly in
+ regards to the packet mis-ordering requirements of
+ section 43.2.4 of the 802.3ad standard. Differing
+ peer implementations will have varying tolerances for
+ noncompliance.
+
+ Prerequisites:
+
+ 1. Ethtool support in the base drivers for retrieving
+ the speed and duplex of each slave.
+
+ 2. A switch that supports IEEE 802.3ad Dynamic link
+ aggregation.
+
+ Most switches will require some type of configuration
+ to enable 802.3ad mode.
+
+ balance-tlb or 5
+
+ Adaptive transmit load balancing: channel bonding that
+ does not require any special switch support.
+
+ In tlb_dynamic_lb=1 mode; the outgoing traffic is
+ distributed according to the current load (computed
+ relative to the speed) on each slave.
+
+ In tlb_dynamic_lb=0 mode; the load balancing based on
+ current load is disabled and the load is distributed
+ only using the hash distribution.
+
+ Incoming traffic is received by the current slave.
+ If the receiving slave fails, another slave takes over
+ the MAC address of the failed receiving slave.
+
+ Prerequisite:
+
+ Ethtool support in the base drivers for retrieving the
+ speed of each slave.
+
+ balance-alb or 6
+
+ Adaptive load balancing: includes balance-tlb plus
+ receive load balancing (rlb) for IPV4 traffic, and
+ does not require any special switch support. The
+ receive load balancing is achieved by ARP negotiation.
+ The bonding driver intercepts the ARP Replies sent by
+ the local system on their way out and overwrites the
+ source hardware address with the unique hardware
+ address of one of the slaves in the bond such that
+ different peers use different hardware addresses for
+ the server.
+
+ Receive traffic from connections created by the server
+ is also balanced. When the local system sends an ARP
+ Request the bonding driver copies and saves the peer's
+ IP information from the ARP packet. When the ARP
+ Reply arrives from the peer, its hardware address is
+ retrieved and the bonding driver initiates an ARP
+ reply to this peer assigning it to one of the slaves
+ in the bond. A problematic outcome of using ARP
+ negotiation for balancing is that each time that an
+ ARP request is broadcast it uses the hardware address
+ of the bond. Hence, peers learn the hardware address
+ of the bond and the balancing of receive traffic
+ collapses to the current slave. This is handled by
+ sending updates (ARP Replies) to all the peers with
+ their individually assigned hardware address such that
+ the traffic is redistributed. Receive traffic is also
+ redistributed when a new slave is added to the bond
+ and when an inactive slave is re-activated. The
+ receive load is distributed sequentially (round robin)
+ among the group of highest speed slaves in the bond.
+
+ When a link is reconnected or a new slave joins the
+ bond the receive traffic is redistributed among all
+ active slaves in the bond by initiating ARP Replies
+ with the selected MAC address to each of the
+ clients. The updelay parameter (detailed below) must
+ be set to a value equal or greater than the switch's
+ forwarding delay so that the ARP Replies sent to the
+ peers will not be blocked by the switch.
+
+ Prerequisites:
+
+ 1. Ethtool support in the base drivers for retrieving
+ the speed of each slave.
+
+ 2. Base driver support for setting the hardware
+ address of a device while it is open. This is
+ required so that there will always be one slave in the
+ team using the bond hardware address (the
+ curr_active_slave) while having a unique hardware
+ address for each slave in the bond. If the
+ curr_active_slave fails its hardware address is
+ swapped with the new curr_active_slave that was
+ chosen.
+
+num_grat_arp
+num_unsol_na
+
+ Specify the number of peer notifications (gratuitous ARPs and
+ unsolicited IPv6 Neighbor Advertisements) to be issued after a
+ failover event. As soon as the link is up on the new slave
+ (possibly immediately) a peer notification is sent on the
+ bonding device and each VLAN sub-device. This is repeated at
+ each link monitor interval (arp_interval or miimon, whichever
+ is active) if the number is greater than 1.
+
+ The valid range is 0 - 255; the default value is 1. These options
+ affect only the active-backup mode. These options were added for
+ bonding versions 3.3.0 and 3.4.0 respectively.
+
+ From Linux 3.0 and bonding version 3.7.1, these notifications
+ are generated by the ipv4 and ipv6 code and the numbers of
+ repetitions cannot be set independently.
+
+packets_per_slave
+
+ Specify the number of packets to transmit through a slave before
+ moving to the next one. When set to 0 then a slave is chosen at
+ random.
+
+ The valid range is 0 - 65535; the default value is 1. This option
+ has effect only in balance-rr mode.
+
+primary
+
+ A string (eth0, eth2, etc) specifying which slave is the
+ primary device. The specified device will always be the
+ active slave while it is available. Only when the primary is
+ off-line will alternate devices be used. This is useful when
+ one slave is preferred over another, e.g., when one slave has
+ higher throughput than another.
+
+ The primary option is only valid for active-backup(1),
+ balance-tlb (5) and balance-alb (6) mode.
+
+primary_reselect
+
+ Specifies the reselection policy for the primary slave. This
+ affects how the primary slave is chosen to become the active slave
+ when failure of the active slave or recovery of the primary slave
+ occurs. This option is designed to prevent flip-flopping between
+ the primary slave and other slaves. Possible values are:
+
+ always or 0 (default)
+
+ The primary slave becomes the active slave whenever it
+ comes back up.
+
+ better or 1
+
+ The primary slave becomes the active slave when it comes
+ back up, if the speed and duplex of the primary slave is
+ better than the speed and duplex of the current active
+ slave.
+
+ failure or 2
+
+ The primary slave becomes the active slave only if the
+ current active slave fails and the primary slave is up.
+
+ The primary_reselect setting is ignored in two cases:
+
+ If no slaves are active, the first slave to recover is
+ made the active slave.
+
+ When initially enslaved, the primary slave is always made
+ the active slave.
+
+ Changing the primary_reselect policy via sysfs will cause an
+ immediate selection of the best active slave according to the new
+ policy. This may or may not result in a change of the active
+ slave, depending upon the circumstances.
+
+ This option was added for bonding version 3.6.0.
+
+tlb_dynamic_lb
+
+ Specifies if dynamic shuffling of flows is enabled in tlb
+ mode. The value has no effect on any other modes.
+
+ The default behavior of tlb mode is to shuffle active flows across
+ slaves based on the load in that interval. This gives nice lb
+ characteristics but can cause packet reordering. If re-ordering is
+ a concern use this variable to disable flow shuffling and rely on
+ load balancing provided solely by the hash distribution.
+ xmit-hash-policy can be used to select the appropriate hashing for
+ the setup.
+
+ The sysfs entry can be used to change the setting per bond device
+ and the initial value is derived from the module parameter. The
+ sysfs entry is allowed to be changed only if the bond device is
+ down.
+
+ The default value is "1" that enables flow shuffling while value "0"
+ disables it. This option was added in bonding driver 3.7.1
+
+
+updelay
+
+ Specifies the time, in milliseconds, to wait before enabling a
+ slave after a link recovery has been detected. This option is
+ only valid for the miimon link monitor. The updelay value
+ should be a multiple of the miimon value; if not, it will be
+ rounded down to the nearest multiple. The default value is 0.
+
+use_carrier
+
+ Specifies whether or not miimon should use MII or ETHTOOL
+ ioctls vs. netif_carrier_ok() to determine the link
+ status. The MII or ETHTOOL ioctls are less efficient and
+ utilize a deprecated calling sequence within the kernel. The
+ netif_carrier_ok() relies on the device driver to maintain its
+ state with netif_carrier_on/off; at this writing, most, but
+ not all, device drivers support this facility.
+
+ If bonding insists that the link is up when it should not be,
+ it may be that your network device driver does not support
+ netif_carrier_on/off. The default state for netif_carrier is
+ "carrier on," so if a driver does not support netif_carrier,
+ it will appear as if the link is always up. In this case,
+ setting use_carrier to 0 will cause bonding to revert to the
+ MII / ETHTOOL ioctl method to determine the link state.
+
+ A value of 1 enables the use of netif_carrier_ok(), a value of
+ 0 will use the deprecated MII / ETHTOOL ioctls. The default
+ value is 1.
+
+xmit_hash_policy
+
+ Selects the transmit hash policy to use for slave selection in
+ balance-xor, 802.3ad, and tlb modes. Possible values are:
+
+ layer2
+
+ Uses XOR of hardware MAC addresses and packet type ID
+ field to generate the hash. The formula is
+
+ hash = source MAC XOR destination MAC XOR packet type ID
+ slave number = hash modulo slave count
+
+ This algorithm will place all traffic to a particular
+ network peer on the same slave.
+
+ This algorithm is 802.3ad compliant.
+
+ layer2+3
+
+ This policy uses a combination of layer2 and layer3
+ protocol information to generate the hash.
+
+ Uses XOR of hardware MAC addresses and IP addresses to
+ generate the hash. The formula is
+
+ hash = source MAC XOR destination MAC XOR packet type ID
+ hash = hash XOR source IP XOR destination IP
+ hash = hash XOR (hash RSHIFT 16)
+ hash = hash XOR (hash RSHIFT 8)
+ And then hash is reduced modulo slave count.
+
+ If the protocol is IPv6 then the source and destination
+ addresses are first hashed using ipv6_addr_hash.
+
+ This algorithm will place all traffic to a particular
+ network peer on the same slave. For non-IP traffic,
+ the formula is the same as for the layer2 transmit
+ hash policy.
+
+ This policy is intended to provide a more balanced
+ distribution of traffic than layer2 alone, especially
+ in environments where a layer3 gateway device is
+ required to reach most destinations.
+
+ This algorithm is 802.3ad compliant.
+
+ layer3+4
+
+ This policy uses upper layer protocol information,
+ when available, to generate the hash. This allows for
+ traffic to a particular network peer to span multiple
+ slaves, although a single connection will not span
+ multiple slaves.
+
+ The formula for unfragmented TCP and UDP packets is
+
+ hash = source port, destination port (as in the header)
+ hash = hash XOR source IP XOR destination IP
+ hash = hash XOR (hash RSHIFT 16)
+ hash = hash XOR (hash RSHIFT 8)
+ And then hash is reduced modulo slave count.
+
+ If the protocol is IPv6 then the source and destination
+ addresses are first hashed using ipv6_addr_hash.
+
+ For fragmented TCP or UDP packets and all other IPv4 and
+ IPv6 protocol traffic, the source and destination port
+ information is omitted. For non-IP traffic, the
+ formula is the same as for the layer2 transmit hash
+ policy.
+
+ This algorithm is not fully 802.3ad compliant. A
+ single TCP or UDP conversation containing both
+ fragmented and unfragmented packets will see packets
+ striped across two interfaces. This may result in out
+ of order delivery. Most traffic types will not meet
+ this criteria, as TCP rarely fragments traffic, and
+ most UDP traffic is not involved in extended
+ conversations. Other implementations of 802.3ad may
+ or may not tolerate this noncompliance.
+
+ encap2+3
+
+ This policy uses the same formula as layer2+3 but it
+ relies on skb_flow_dissect to obtain the header fields
+ which might result in the use of inner headers if an
+ encapsulation protocol is used. For example this will
+ improve the performance for tunnel users because the
+ packets will be distributed according to the encapsulated
+ flows.
+
+ encap3+4
+
+ This policy uses the same formula as layer3+4 but it
+ relies on skb_flow_dissect to obtain the header fields
+ which might result in the use of inner headers if an
+ encapsulation protocol is used. For example this will
+ improve the performance for tunnel users because the
+ packets will be distributed according to the encapsulated
+ flows.
+
+ The default value is layer2. This option was added in bonding
+ version 2.6.3. In earlier versions of bonding, this parameter
+ does not exist, and the layer2 policy is the only policy. The
+ layer2+3 value was added for bonding version 3.2.2.
+
+resend_igmp
+
+ Specifies the number of IGMP membership reports to be issued after
+ a failover event. One membership report is issued immediately after
+ the failover, subsequent packets are sent in each 200ms interval.
+
+ The valid range is 0 - 255; the default value is 1. A value of 0
+ prevents the IGMP membership report from being issued in response
+ to the failover event.
+
+ This option is useful for bonding modes balance-rr (0), active-backup
+ (1), balance-tlb (5) and balance-alb (6), in which a failover can
+ switch the IGMP traffic from one slave to another. Therefore a fresh
+ IGMP report must be issued to cause the switch to forward the incoming
+ IGMP traffic over the newly selected slave.
+
+ This option was added for bonding version 3.7.0.
+
+lp_interval
+
+ Specifies the number of seconds between instances where the bonding
+ driver sends learning packets to each slaves peer switch.
+
+ The valid range is 1 - 0x7fffffff; the default value is 1. This Option
+ has effect only in balance-tlb and balance-alb modes.
+
+3. Configuring Bonding Devices
+==============================
+
+ You can configure bonding using either your distro's network
+initialization scripts, or manually using either iproute2 or the
+sysfs interface. Distros generally use one of three packages for the
+network initialization scripts: initscripts, sysconfig or interfaces.
+Recent versions of these packages have support for bonding, while older
+versions do not.
+
+ We will first describe the options for configuring bonding for
+distros using versions of initscripts, sysconfig and interfaces with full
+or partial support for bonding, then provide information on enabling
+bonding without support from the network initialization scripts (i.e.,
+older versions of initscripts or sysconfig).
+
+ If you're unsure whether your distro uses sysconfig,
+initscripts or interfaces, or don't know if it's new enough, have no fear.
+Determining this is fairly straightforward.
+
+ First, look for a file called interfaces in /etc/network directory.
+If this file is present in your system, then your system use interfaces. See
+Configuration with Interfaces Support.
+
+ Else, issue the command:
+
+$ rpm -qf /sbin/ifup
+
+ It will respond with a line of text starting with either
+"initscripts" or "sysconfig," followed by some numbers. This is the
+package that provides your network initialization scripts.
+
+ Next, to determine if your installation supports bonding,
+issue the command:
+
+$ grep ifenslave /sbin/ifup
+
+ If this returns any matches, then your initscripts or
+sysconfig has support for bonding.
+
+3.1 Configuration with Sysconfig Support
+----------------------------------------
+
+ This section applies to distros using a version of sysconfig
+with bonding support, for example, SuSE Linux Enterprise Server 9.
+
+ SuSE SLES 9's networking configuration system does support
+bonding, however, at this writing, the YaST system configuration
+front end does not provide any means to work with bonding devices.
+Bonding devices can be managed by hand, however, as follows.
+
+ First, if they have not already been configured, configure the
+slave devices. On SLES 9, this is most easily done by running the
+yast2 sysconfig configuration utility. The goal is for to create an
+ifcfg-id file for each slave device. The simplest way to accomplish
+this is to configure the devices for DHCP (this is only to get the
+file ifcfg-id file created; see below for some issues with DHCP). The
+name of the configuration file for each device will be of the form:
+
+ifcfg-id-xx:xx:xx:xx:xx:xx
+
+ Where the "xx" portion will be replaced with the digits from
+the device's permanent MAC address.
+
+ Once the set of ifcfg-id-xx:xx:xx:xx:xx:xx files has been
+created, it is necessary to edit the configuration files for the slave
+devices (the MAC addresses correspond to those of the slave devices).
+Before editing, the file will contain multiple lines, and will look
+something like this:
+
+BOOTPROTO='dhcp'
+STARTMODE='on'
+USERCTL='no'
+UNIQUE='XNzu.WeZGOGF+4wE'
+_nm_name='bus-pci-0001:61:01.0'
+
+ Change the BOOTPROTO and STARTMODE lines to the following:
+
+BOOTPROTO='none'
+STARTMODE='off'
+
+ Do not alter the UNIQUE or _nm_name lines. Remove any other
+lines (USERCTL, etc).
+
+ Once the ifcfg-id-xx:xx:xx:xx:xx:xx files have been modified,
+it's time to create the configuration file for the bonding device
+itself. This file is named ifcfg-bondX, where X is the number of the
+bonding device to create, starting at 0. The first such file is
+ifcfg-bond0, the second is ifcfg-bond1, and so on. The sysconfig
+network configuration system will correctly start multiple instances
+of bonding.
+
+ The contents of the ifcfg-bondX file is as follows:
+
+BOOTPROTO="static"
+BROADCAST="10.0.2.255"
+IPADDR="10.0.2.10"
+NETMASK="255.255.0.0"
+NETWORK="10.0.2.0"
+REMOTE_IPADDR=""
+STARTMODE="onboot"
+BONDING_MASTER="yes"
+BONDING_MODULE_OPTS="mode=active-backup miimon=100"
+BONDING_SLAVE0="eth0"
+BONDING_SLAVE1="bus-pci-0000:06:08.1"
+
+ Replace the sample BROADCAST, IPADDR, NETMASK and NETWORK
+values with the appropriate values for your network.
+
+ The STARTMODE specifies when the device is brought online.
+The possible values are:
+
+ onboot: The device is started at boot time. If you're not
+ sure, this is probably what you want.
+
+ manual: The device is started only when ifup is called
+ manually. Bonding devices may be configured this
+ way if you do not wish them to start automatically
+ at boot for some reason.
+
+ hotplug: The device is started by a hotplug event. This is not
+ a valid choice for a bonding device.
+
+ off or ignore: The device configuration is ignored.
+
+ The line BONDING_MASTER='yes' indicates that the device is a
+bonding master device. The only useful value is "yes."
+
+ The contents of BONDING_MODULE_OPTS are supplied to the
+instance of the bonding module for this device. Specify the options
+for the bonding mode, link monitoring, and so on here. Do not include
+the max_bonds bonding parameter; this will confuse the configuration
+system if you have multiple bonding devices.
+
+ Finally, supply one BONDING_SLAVEn="slave device" for each
+slave. where "n" is an increasing value, one for each slave. The
+"slave device" is either an interface name, e.g., "eth0", or a device
+specifier for the network device. The interface name is easier to
+find, but the ethN names are subject to change at boot time if, e.g.,
+a device early in the sequence has failed. The device specifiers
+(bus-pci-0000:06:08.1 in the example above) specify the physical
+network device, and will not change unless the device's bus location
+changes (for example, it is moved from one PCI slot to another). The
+example above uses one of each type for demonstration purposes; most
+configurations will choose one or the other for all slave devices.
+
+ When all configuration files have been modified or created,
+networking must be restarted for the configuration changes to take
+effect. This can be accomplished via the following:
+
+# /etc/init.d/network restart
+
+ Note that the network control script (/sbin/ifdown) will
+remove the bonding module as part of the network shutdown processing,
+so it is not necessary to remove the module by hand if, e.g., the
+module parameters have changed.
+
+ Also, at this writing, YaST/YaST2 will not manage bonding
+devices (they do not show bonding interfaces on its list of network
+devices). It is necessary to edit the configuration file by hand to
+change the bonding configuration.
+
+ Additional general options and details of the ifcfg file
+format can be found in an example ifcfg template file:
+
+/etc/sysconfig/network/ifcfg.template
+
+ Note that the template does not document the various BONDING_
+settings described above, but does describe many of the other options.
+
+3.1.1 Using DHCP with Sysconfig
+-------------------------------
+
+ Under sysconfig, configuring a device with BOOTPROTO='dhcp'
+will cause it to query DHCP for its IP address information. At this
+writing, this does not function for bonding devices; the scripts
+attempt to obtain the device address from DHCP prior to adding any of
+the slave devices. Without active slaves, the DHCP requests are not
+sent to the network.
+
+3.1.2 Configuring Multiple Bonds with Sysconfig
+-----------------------------------------------
+
+ The sysconfig network initialization system is capable of
+handling multiple bonding devices. All that is necessary is for each
+bonding instance to have an appropriately configured ifcfg-bondX file
+(as described above). Do not specify the "max_bonds" parameter to any
+instance of bonding, as this will confuse sysconfig. If you require
+multiple bonding devices with identical parameters, create multiple
+ifcfg-bondX files.
+
+ Because the sysconfig scripts supply the bonding module
+options in the ifcfg-bondX file, it is not necessary to add them to
+the system /etc/modules.d/*.conf configuration files.
+
+3.2 Configuration with Initscripts Support
+------------------------------------------
+
+ This section applies to distros using a recent version of
+initscripts with bonding support, for example, Red Hat Enterprise Linux
+version 3 or later, Fedora, etc. On these systems, the network
+initialization scripts have knowledge of bonding, and can be configured to
+control bonding devices. Note that older versions of the initscripts
+package have lower levels of support for bonding; this will be noted where
+applicable.
+
+ These distros will not automatically load the network adapter
+driver unless the ethX device is configured with an IP address.
+Because of this constraint, users must manually configure a
+network-script file for all physical adapters that will be members of
+a bondX link. Network script files are located in the directory:
+
+/etc/sysconfig/network-scripts
+
+ The file name must be prefixed with "ifcfg-eth" and suffixed
+with the adapter's physical adapter number. For example, the script
+for eth0 would be named /etc/sysconfig/network-scripts/ifcfg-eth0.
+Place the following text in the file:
+
+DEVICE=eth0
+USERCTL=no
+ONBOOT=yes
+MASTER=bond0
+SLAVE=yes
+BOOTPROTO=none
+
+ The DEVICE= line will be different for every ethX device and
+must correspond with the name of the file, i.e., ifcfg-eth1 must have
+a device line of DEVICE=eth1. The setting of the MASTER= line will
+also depend on the final bonding interface name chosen for your bond.
+As with other network devices, these typically start at 0, and go up
+one for each device, i.e., the first bonding instance is bond0, the
+second is bond1, and so on.
+
+ Next, create a bond network script. The file name for this
+script will be /etc/sysconfig/network-scripts/ifcfg-bondX where X is
+the number of the bond. For bond0 the file is named "ifcfg-bond0",
+for bond1 it is named "ifcfg-bond1", and so on. Within that file,
+place the following text:
+
+DEVICE=bond0
+IPADDR=192.168.1.1
+NETMASK=255.255.255.0
+NETWORK=192.168.1.0
+BROADCAST=192.168.1.255
+ONBOOT=yes
+BOOTPROTO=none
+USERCTL=no
+
+ Be sure to change the networking specific lines (IPADDR,
+NETMASK, NETWORK and BROADCAST) to match your network configuration.
+
+ For later versions of initscripts, such as that found with Fedora
+7 (or later) and Red Hat Enterprise Linux version 5 (or later), it is possible,
+and, indeed, preferable, to specify the bonding options in the ifcfg-bond0
+file, e.g. a line of the format:
+
+BONDING_OPTS="mode=active-backup arp_interval=60 arp_ip_target=192.168.1.254"
+
+ will configure the bond with the specified options. The options
+specified in BONDING_OPTS are identical to the bonding module parameters
+except for the arp_ip_target field when using versions of initscripts older
+than and 8.57 (Fedora 8) and 8.45.19 (Red Hat Enterprise Linux 5.2). When
+using older versions each target should be included as a separate option and
+should be preceded by a '+' to indicate it should be added to the list of
+queried targets, e.g.,
+
+ arp_ip_target=+192.168.1.1 arp_ip_target=+192.168.1.2
+
+ is the proper syntax to specify multiple targets. When specifying
+options via BONDING_OPTS, it is not necessary to edit /etc/modprobe.d/*.conf.
+
+ For even older versions of initscripts that do not support
+BONDING_OPTS, it is necessary to edit /etc/modprobe.d/*.conf, depending upon
+your distro) to load the bonding module with your desired options when the
+bond0 interface is brought up. The following lines in /etc/modprobe.d/*.conf
+will load the bonding module, and select its options:
+
+alias bond0 bonding
+options bond0 mode=balance-alb miimon=100
+
+ Replace the sample parameters with the appropriate set of
+options for your configuration.
+
+ Finally run "/etc/rc.d/init.d/network restart" as root. This
+will restart the networking subsystem and your bond link should be now
+up and running.
+
+3.2.1 Using DHCP with Initscripts
+---------------------------------
+
+ Recent versions of initscripts (the versions supplied with Fedora
+Core 3 and Red Hat Enterprise Linux 4, or later versions, are reported to
+work) have support for assigning IP information to bonding devices via
+DHCP.
+
+ To configure bonding for DHCP, configure it as described
+above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
+and add a line consisting of "TYPE=Bonding". Note that the TYPE value
+is case sensitive.
+
+3.2.2 Configuring Multiple Bonds with Initscripts
+-------------------------------------------------
+
+ Initscripts packages that are included with Fedora 7 and Red Hat
+Enterprise Linux 5 support multiple bonding interfaces by simply
+specifying the appropriate BONDING_OPTS= in ifcfg-bondX where X is the
+number of the bond. This support requires sysfs support in the kernel,
+and a bonding driver of version 3.0.0 or later. Other configurations may
+not support this method for specifying multiple bonding interfaces; for
+those instances, see the "Configuring Multiple Bonds Manually" section,
+below.
+
+3.3 Configuring Bonding Manually with iproute2
+-----------------------------------------------
+
+ This section applies to distros whose network initialization
+scripts (the sysconfig or initscripts package) do not have specific
+knowledge of bonding. One such distro is SuSE Linux Enterprise Server
+version 8.
+
+ The general method for these systems is to place the bonding
+module parameters into a config file in /etc/modprobe.d/ (as
+appropriate for the installed distro), then add modprobe and/or
+`ip link` commands to the system's global init script. The name of
+the global init script differs; for sysconfig, it is
+/etc/init.d/boot.local and for initscripts it is /etc/rc.d/rc.local.
+
+ For example, if you wanted to make a simple bond of two e100
+devices (presumed to be eth0 and eth1), and have it persist across
+reboots, edit the appropriate file (/etc/init.d/boot.local or
+/etc/rc.d/rc.local), and add the following:
+
+modprobe bonding mode=balance-alb miimon=100
+modprobe e100
+ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
+ip link set eth0 master bond0
+ip link set eth1 master bond0
+
+ Replace the example bonding module parameters and bond0
+network configuration (IP address, netmask, etc) with the appropriate
+values for your configuration.
+
+ Unfortunately, this method will not provide support for the
+ifup and ifdown scripts on the bond devices. To reload the bonding
+configuration, it is necessary to run the initialization script, e.g.,
+
+# /etc/init.d/boot.local
+
+ or
+
+# /etc/rc.d/rc.local
+
+ It may be desirable in such a case to create a separate script
+which only initializes the bonding configuration, then call that
+separate script from within boot.local. This allows for bonding to be
+enabled without re-running the entire global init script.
+
+ To shut down the bonding devices, it is necessary to first
+mark the bonding device itself as being down, then remove the
+appropriate device driver modules. For our example above, you can do
+the following:
+
+# ifconfig bond0 down
+# rmmod bonding
+# rmmod e100
+
+ Again, for convenience, it may be desirable to create a script
+with these commands.
+
+
+3.3.1 Configuring Multiple Bonds Manually
+-----------------------------------------
+
+ This section contains information on configuring multiple
+bonding devices with differing options for those systems whose network
+initialization scripts lack support for configuring multiple bonds.
+
+ If you require multiple bonding devices, but all with the same
+options, you may wish to use the "max_bonds" module parameter,
+documented above.
+
+ To create multiple bonding devices with differing options, it is
+preferable to use bonding parameters exported by sysfs, documented in the
+section below.
+
+ For versions of bonding without sysfs support, the only means to
+provide multiple instances of bonding with differing options is to load
+the bonding driver multiple times. Note that current versions of the
+sysconfig network initialization scripts handle this automatically; if
+your distro uses these scripts, no special action is needed. See the
+section Configuring Bonding Devices, above, if you're not sure about your
+network initialization scripts.
+
+ To load multiple instances of the module, it is necessary to
+specify a different name for each instance (the module loading system
+requires that every loaded module, even multiple instances of the same
+module, have a unique name). This is accomplished by supplying multiple
+sets of bonding options in /etc/modprobe.d/*.conf, for example:
+
+alias bond0 bonding
+options bond0 -o bond0 mode=balance-rr miimon=100
+
+alias bond1 bonding
+options bond1 -o bond1 mode=balance-alb miimon=50
+
+ will load the bonding module two times. The first instance is
+named "bond0" and creates the bond0 device in balance-rr mode with an
+miimon of 100. The second instance is named "bond1" and creates the
+bond1 device in balance-alb mode with an miimon of 50.
+
+ In some circumstances (typically with older distributions),
+the above does not work, and the second bonding instance never sees
+its options. In that case, the second options line can be substituted
+as follows:
+
+install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
+ mode=balance-alb miimon=50
+
+ This may be repeated any number of times, specifying a new and
+unique name in place of bond1 for each subsequent instance.
+
+ It has been observed that some Red Hat supplied kernels are unable
+to rename modules at load time (the "-o bond1" part). Attempts to pass
+that option to modprobe will produce an "Operation not permitted" error.
+This has been reported on some Fedora Core kernels, and has been seen on
+RHEL 4 as well. On kernels exhibiting this problem, it will be impossible
+to configure multiple bonds with differing parameters (as they are older
+kernels, and also lack sysfs support).
+
+3.4 Configuring Bonding Manually via Sysfs
+------------------------------------------
+
+ Starting with version 3.0.0, Channel Bonding may be configured
+via the sysfs interface. This interface allows dynamic configuration
+of all bonds in the system without unloading the module. It also
+allows for adding and removing bonds at runtime. Ifenslave is no
+longer required, though it is still supported.
+
+ Use of the sysfs interface allows you to use multiple bonds
+with different configurations without having to reload the module.
+It also allows you to use multiple, differently configured bonds when
+bonding is compiled into the kernel.
+
+ You must have the sysfs filesystem mounted to configure
+bonding this way. The examples in this document assume that you
+are using the standard mount point for sysfs, e.g. /sys. If your
+sysfs filesystem is mounted elsewhere, you will need to adjust the
+example paths accordingly.
+
+Creating and Destroying Bonds
+-----------------------------
+To add a new bond foo:
+# echo +foo > /sys/class/net/bonding_masters
+
+To remove an existing bond bar:
+# echo -bar > /sys/class/net/bonding_masters
+
+To show all existing bonds:
+# cat /sys/class/net/bonding_masters
+
+NOTE: due to 4K size limitation of sysfs files, this list may be
+truncated if you have more than a few hundred bonds. This is unlikely
+to occur under normal operating conditions.
+
+Adding and Removing Slaves
+--------------------------
+ Interfaces may be enslaved to a bond using the file
+/sys/class/net/<bond>/bonding/slaves. The semantics for this file
+are the same as for the bonding_masters file.
+
+To enslave interface eth0 to bond bond0:
+# ifconfig bond0 up
+# echo +eth0 > /sys/class/net/bond0/bonding/slaves
+
+To free slave eth0 from bond bond0:
+# echo -eth0 > /sys/class/net/bond0/bonding/slaves
+
+ When an interface is enslaved to a bond, symlinks between the
+two are created in the sysfs filesystem. In this case, you would get
+/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and
+/sys/class/net/eth0/master pointing to /sys/class/net/bond0.
+
+ This means that you can tell quickly whether or not an
+interface is enslaved by looking for the master symlink. Thus:
+# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves
+will free eth0 from whatever bond it is enslaved to, regardless of
+the name of the bond interface.
+
+Changing a Bond's Configuration
+-------------------------------
+ Each bond may be configured individually by manipulating the
+files located in /sys/class/net/<bond name>/bonding
+
+ The names of these files correspond directly with the command-
+line parameters described elsewhere in this file, and, with the
+exception of arp_ip_target, they accept the same values. To see the
+current setting, simply cat the appropriate file.
+
+ A few examples will be given here; for specific usage
+guidelines for each parameter, see the appropriate section in this
+document.
+
+To configure bond0 for balance-alb mode:
+# ifconfig bond0 down
+# echo 6 > /sys/class/net/bond0/bonding/mode
+ - or -
+# echo balance-alb > /sys/class/net/bond0/bonding/mode
+ NOTE: The bond interface must be down before the mode can be
+changed.
+
+To enable MII monitoring on bond0 with a 1 second interval:
+# echo 1000 > /sys/class/net/bond0/bonding/miimon
+ NOTE: If ARP monitoring is enabled, it will disabled when MII
+monitoring is enabled, and vice-versa.
+
+To add ARP targets:
+# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
+# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
+ NOTE: up to 16 target addresses may be specified.
+
+To remove an ARP target:
+# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
+
+To configure the interval between learning packet transmits:
+# echo 12 > /sys/class/net/bond0/bonding/lp_interval
+ NOTE: the lp_inteval is the number of seconds between instances where
+the bonding driver sends learning packets to each slaves peer switch. The
+default interval is 1 second.
+
+Example Configuration
+---------------------
+ We begin with the same example that is shown in section 3.3,
+executed with sysfs, and without using ifenslave.
+
+ To make a simple bond of two e100 devices (presumed to be eth0
+and eth1), and have it persist across reboots, edit the appropriate
+file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the
+following:
+
+modprobe bonding
+modprobe e100
+echo balance-alb > /sys/class/net/bond0/bonding/mode
+ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
+echo 100 > /sys/class/net/bond0/bonding/miimon
+echo +eth0 > /sys/class/net/bond0/bonding/slaves
+echo +eth1 > /sys/class/net/bond0/bonding/slaves
+
+ To add a second bond, with two e1000 interfaces in
+active-backup mode, using ARP monitoring, add the following lines to
+your init script:
+
+modprobe e1000
+echo +bond1 > /sys/class/net/bonding_masters
+echo active-backup > /sys/class/net/bond1/bonding/mode
+ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up
+echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target
+echo 2000 > /sys/class/net/bond1/bonding/arp_interval
+echo +eth2 > /sys/class/net/bond1/bonding/slaves
+echo +eth3 > /sys/class/net/bond1/bonding/slaves
+
+3.5 Configuration with Interfaces Support
+-----------------------------------------
+
+ This section applies to distros which use /etc/network/interfaces file
+to describe network interface configuration, most notably Debian and it's
+derivatives.
+
+ The ifup and ifdown commands on Debian don't support bonding out of
+the box. The ifenslave-2.6 package should be installed to provide bonding
+support. Once installed, this package will provide bond-* options to be used
+into /etc/network/interfaces.
+
+ Note that ifenslave-2.6 package will load the bonding module and use
+the ifenslave command when appropriate.
+
+Example Configurations
+----------------------
+
+In /etc/network/interfaces, the following stanza will configure bond0, in
+active-backup mode, with eth0 and eth1 as slaves.
+
+auto bond0
+iface bond0 inet dhcp
+ bond-slaves eth0 eth1
+ bond-mode active-backup
+ bond-miimon 100
+ bond-primary eth0 eth1
+
+If the above configuration doesn't work, you might have a system using
+upstart for system startup. This is most notably true for recent
+Ubuntu versions. The following stanza in /etc/network/interfaces will
+produce the same result on those systems.
+
+auto bond0
+iface bond0 inet dhcp
+ bond-slaves none
+ bond-mode active-backup
+ bond-miimon 100
+
+auto eth0
+iface eth0 inet manual
+ bond-master bond0
+ bond-primary eth0 eth1
+
+auto eth1
+iface eth1 inet manual
+ bond-master bond0
+ bond-primary eth0 eth1
+
+For a full list of bond-* supported options in /etc/network/interfaces and some
+more advanced examples tailored to you particular distros, see the files in
+/usr/share/doc/ifenslave-2.6.
+
+3.6 Overriding Configuration for Special Cases
+----------------------------------------------
+
+When using the bonding driver, the physical port which transmits a frame is
+typically selected by the bonding driver, and is not relevant to the user or
+system administrator. The output port is simply selected using the policies of
+the selected bonding mode. On occasion however, it is helpful to direct certain
+classes of traffic to certain physical interfaces on output to implement
+slightly more complex policies. For example, to reach a web server over a
+bonded interface in which eth0 connects to a private network, while eth1
+connects via a public network, it may be desirous to bias the bond to send said
+traffic over eth0 first, using eth1 only as a fall back, while all other traffic
+can safely be sent over either interface. Such configurations may be achieved
+using the traffic control utilities inherent in linux.
+
+By default the bonding driver is multiqueue aware and 16 queues are created
+when the driver initializes (see Documentation/networking/multiqueue.txt
+for details). If more or less queues are desired the module parameter
+tx_queues can be used to change this value. There is no sysfs parameter
+available as the allocation is done at module init time.
+
+The output of the file /proc/net/bonding/bondX has changed so the output Queue
+ID is now printed for each slave:
+
+Bonding Mode: fault-tolerance (active-backup)
+Primary Slave: None
+Currently Active Slave: eth0
+MII Status: up
+MII Polling Interval (ms): 0
+Up Delay (ms): 0
+Down Delay (ms): 0
+
+Slave Interface: eth0
+MII Status: up
+Link Failure Count: 0
+Permanent HW addr: 00:1a:a0:12:8f:cb
+Slave queue ID: 0
+
+Slave Interface: eth1
+MII Status: up
+Link Failure Count: 0
+Permanent HW addr: 00:1a:a0:12:8f:cc
+Slave queue ID: 2
+
+The queue_id for a slave can be set using the command:
+
+# echo "eth1:2" > /sys/class/net/bond0/bonding/queue_id
+
+Any interface that needs a queue_id set should set it with multiple calls
+like the one above until proper priorities are set for all interfaces. On
+distributions that allow configuration via initscripts, multiple 'queue_id'
+arguments can be added to BONDING_OPTS to set all needed slave queues.
+
+These queue id's can be used in conjunction with the tc utility to configure
+a multiqueue qdisc and filters to bias certain traffic to transmit on certain
+slave devices. For instance, say we wanted, in the above configuration to
+force all traffic bound to 192.168.1.100 to use eth1 in the bond as its output
+device. The following commands would accomplish this:
+
+# tc qdisc add dev bond0 handle 1 root multiq
+
+# tc filter add dev bond0 protocol ip parent 1: prio 1 u32 match ip dst \
+ 192.168.1.100 action skbedit queue_mapping 2
+
+These commands tell the kernel to attach a multiqueue queue discipline to the
+bond0 interface and filter traffic enqueued to it, such that packets with a dst
+ip of 192.168.1.100 have their output queue mapping value overwritten to 2.
+This value is then passed into the driver, causing the normal output path
+selection policy to be overridden, selecting instead qid 2, which maps to eth1.
+
+Note that qid values begin at 1. Qid 0 is reserved to initiate to the driver
+that normal output policy selection should take place. One benefit to simply
+leaving the qid for a slave to 0 is the multiqueue awareness in the bonding
+driver that is now present. This awareness allows tc filters to be placed on
+slave devices as well as bond devices and the bonding driver will simply act as
+a pass-through for selecting output queues on the slave device rather than
+output port selection.
+
+This feature first appeared in bonding driver version 3.7.0 and support for
+output slave selection was limited to round-robin and active-backup modes.
+
+4 Querying Bonding Configuration
+=================================
+
+4.1 Bonding Configuration
+-------------------------
+
+ Each bonding device has a read-only file residing in the
+/proc/net/bonding directory. The file contents include information
+about the bonding configuration, options and state of each slave.
+
+ For example, the contents of /proc/net/bonding/bond0 after the
+driver is loaded with parameters of mode=0 and miimon=1000 is
+generally as follows:
+
+ Ethernet Channel Bonding Driver: 2.6.1 (October 29, 2004)
+ Bonding Mode: load balancing (round-robin)
+ Currently Active Slave: eth0
+ MII Status: up
+ MII Polling Interval (ms): 1000
+ Up Delay (ms): 0
+ Down Delay (ms): 0
+
+ Slave Interface: eth1
+ MII Status: up
+ Link Failure Count: 1
+
+ Slave Interface: eth0
+ MII Status: up
+ Link Failure Count: 1
+
+ The precise format and contents will change depending upon the
+bonding configuration, state, and version of the bonding driver.
+
+4.2 Network configuration
+-------------------------
+
+ The network configuration can be inspected using the ifconfig
+command. Bonding devices will have the MASTER flag set; Bonding slave
+devices will have the SLAVE flag set. The ifconfig output does not
+contain information on which slaves are associated with which masters.
+
+ In the example below, the bond0 interface is the master
+(MASTER) while eth0 and eth1 are slaves (SLAVE). Notice all slaves of
+bond0 have the same MAC address (HWaddr) as bond0 for all modes except
+TLB and ALB that require a unique MAC address for each slave.
+
+# /sbin/ifconfig
+bond0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
+ inet addr:XXX.XXX.XXX.YYY Bcast:XXX.XXX.XXX.255 Mask:255.255.252.0
+ UP BROADCAST RUNNING MASTER MULTICAST MTU:1500 Metric:1
+ RX packets:7224794 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:3286647 errors:1 dropped:0 overruns:1 carrier:0
+ collisions:0 txqueuelen:0
+
+eth0 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
+ UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1
+ RX packets:3573025 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:1643167 errors:1 dropped:0 overruns:1 carrier:0
+ collisions:0 txqueuelen:100
+ Interrupt:10 Base address:0x1080
+
+eth1 Link encap:Ethernet HWaddr 00:C0:F0:1F:37:B4
+ UP BROADCAST RUNNING SLAVE MULTICAST MTU:1500 Metric:1
+ RX packets:3651769 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:1643480 errors:0 dropped:0 overruns:0 carrier:0
+ collisions:0 txqueuelen:100
+ Interrupt:9 Base address:0x1400
+
+5. Switch Configuration
+=======================
+
+ For this section, "switch" refers to whatever system the
+bonded devices are directly connected to (i.e., where the other end of
+the cable plugs into). This may be an actual dedicated switch device,
+or it may be another regular system (e.g., another computer running
+Linux),
+
+ The active-backup, balance-tlb and balance-alb modes do not
+require any specific configuration of the switch.
+
+ The 802.3ad mode requires that the switch have the appropriate
+ports configured as an 802.3ad aggregation. The precise method used
+to configure this varies from switch to switch, but, for example, a
+Cisco 3550 series switch requires that the appropriate ports first be
+grouped together in a single etherchannel instance, then that
+etherchannel is set to mode "lacp" to enable 802.3ad (instead of
+standard EtherChannel).
+
+ The balance-rr, balance-xor and broadcast modes generally
+require that the switch have the appropriate ports grouped together.
+The nomenclature for such a group differs between switches, it may be
+called an "etherchannel" (as in the Cisco example, above), a "trunk
+group" or some other similar variation. For these modes, each switch
+will also have its own configuration options for the switch's transmit
+policy to the bond. Typical choices include XOR of either the MAC or
+IP addresses. The transmit policy of the two peers does not need to
+match. For these three modes, the bonding mode really selects a
+transmit policy for an EtherChannel group; all three will interoperate
+with another EtherChannel group.
+
+
+6. 802.1q VLAN Support
+======================
+
+ It is possible to configure VLAN devices over a bond interface
+using the 8021q driver. However, only packets coming from the 8021q
+driver and passing through bonding will be tagged by default. Self
+generated packets, for example, bonding's learning packets or ARP
+packets generated by either ALB mode or the ARP monitor mechanism, are
+tagged internally by bonding itself. As a result, bonding must
+"learn" the VLAN IDs configured above it, and use those IDs to tag
+self generated packets.
+
+ For reasons of simplicity, and to support the use of adapters
+that can do VLAN hardware acceleration offloading, the bonding
+interface declares itself as fully hardware offloading capable, it gets
+the add_vid/kill_vid notifications to gather the necessary
+information, and it propagates those actions to the slaves. In case
+of mixed adapter types, hardware accelerated tagged packets that
+should go through an adapter that is not offloading capable are
+"un-accelerated" by the bonding driver so the VLAN tag sits in the
+regular location.
+
+ VLAN interfaces *must* be added on top of a bonding interface
+only after enslaving at least one slave. The bonding interface has a
+hardware address of 00:00:00:00:00:00 until the first slave is added.
+If the VLAN interface is created prior to the first enslavement, it
+would pick up the all-zeroes hardware address. Once the first slave
+is attached to the bond, the bond device itself will pick up the
+slave's hardware address, which is then available for the VLAN device.
+
+ Also, be aware that a similar problem can occur if all slaves
+are released from a bond that still has one or more VLAN interfaces on
+top of it. When a new slave is added, the bonding interface will
+obtain its hardware address from the first slave, which might not
+match the hardware address of the VLAN interfaces (which was
+ultimately copied from an earlier slave).
+
+ There are two methods to insure that the VLAN device operates
+with the correct hardware address if all slaves are removed from a
+bond interface:
+
+ 1. Remove all VLAN interfaces then recreate them
+
+ 2. Set the bonding interface's hardware address so that it
+matches the hardware address of the VLAN interfaces.
+
+ Note that changing a VLAN interface's HW address would set the
+underlying device -- i.e. the bonding interface -- to promiscuous
+mode, which might not be what you want.
+
+
+7. Link Monitoring
+==================
+
+ The bonding driver at present supports two schemes for
+monitoring a slave device's link state: the ARP monitor and the MII
+monitor.
+
+ At the present time, due to implementation restrictions in the
+bonding driver itself, it is not possible to enable both ARP and MII
+monitoring simultaneously.
+
+7.1 ARP Monitor Operation
+-------------------------
+
+ The ARP monitor operates as its name suggests: it sends ARP
+queries to one or more designated peer systems on the network, and
+uses the response as an indication that the link is operating. This
+gives some assurance that traffic is actually flowing to and from one
+or more peers on the local network.
+
+ The ARP monitor relies on the device driver itself to verify
+that traffic is flowing. In particular, the driver must keep up to
+date the last receive time, dev->last_rx, and transmit start time,
+dev->trans_start. If these are not updated by the driver, then the
+ARP monitor will immediately fail any slaves using that driver, and
+those slaves will stay down. If networking monitoring (tcpdump, etc)
+shows the ARP requests and replies on the network, then it may be that
+your device driver is not updating last_rx and trans_start.
+
+7.2 Configuring Multiple ARP Targets
+------------------------------------
+
+ While ARP monitoring can be done with just one target, it can
+be useful in a High Availability setup to have several targets to
+monitor. In the case of just one target, the target itself may go
+down or have a problem making it unresponsive to ARP requests. Having
+an additional target (or several) increases the reliability of the ARP
+monitoring.
+
+ Multiple ARP targets must be separated by commas as follows:
+
+# example options for ARP monitoring with three targets
+alias bond0 bonding
+options bond0 arp_interval=60 arp_ip_target=192.168.0.1,192.168.0.3,192.168.0.9
+
+ For just a single target the options would resemble:
+
+# example options for ARP monitoring with one target
+alias bond0 bonding
+options bond0 arp_interval=60 arp_ip_target=192.168.0.100
+
+
+7.3 MII Monitor Operation
+-------------------------
+
+ The MII monitor monitors only the carrier state of the local
+network interface. It accomplishes this in one of three ways: by
+depending upon the device driver to maintain its carrier state, by
+querying the device's MII registers, or by making an ethtool query to
+the device.
+
+ If the use_carrier module parameter is 1 (the default value),
+then the MII monitor will rely on the driver for carrier state
+information (via the netif_carrier subsystem). As explained in the
+use_carrier parameter information, above, if the MII monitor fails to
+detect carrier loss on the device (e.g., when the cable is physically
+disconnected), it may be that the driver does not support
+netif_carrier.
+
+ If use_carrier is 0, then the MII monitor will first query the
+device's (via ioctl) MII registers and check the link state. If that
+request fails (not just that it returns carrier down), then the MII
+monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain
+the same information. If both methods fail (i.e., the driver either
+does not support or had some error in processing both the MII register
+and ethtool requests), then the MII monitor will assume the link is
+up.
+
+8. Potential Sources of Trouble
+===============================
+
+8.1 Adventures in Routing
+-------------------------
+
+ When bonding is configured, it is important that the slave
+devices not have routes that supersede routes of the master (or,
+generally, not have routes at all). For example, suppose the bonding
+device bond0 has two slaves, eth0 and eth1, and the routing table is
+as follows:
+
+Kernel IP routing table
+Destination Gateway Genmask Flags MSS Window irtt Iface
+10.0.0.0 0.0.0.0 255.255.0.0 U 40 0 0 eth0
+10.0.0.0 0.0.0.0 255.255.0.0 U 40 0 0 eth1
+10.0.0.0 0.0.0.0 255.255.0.0 U 40 0 0 bond0
+127.0.0.0 0.0.0.0 255.0.0.0 U 40 0 0 lo
+
+ This routing configuration will likely still update the
+receive/transmit times in the driver (needed by the ARP monitor), but
+may bypass the bonding driver (because outgoing traffic to, in this
+case, another host on network 10 would use eth0 or eth1 before bond0).
+
+ The ARP monitor (and ARP itself) may become confused by this
+configuration, because ARP requests (generated by the ARP monitor)
+will be sent on one interface (bond0), but the corresponding reply
+will arrive on a different interface (eth0). This reply looks to ARP
+as an unsolicited ARP reply (because ARP matches replies on an
+interface basis), and is discarded. The MII monitor is not affected
+by the state of the routing table.
+
+ The solution here is simply to insure that slaves do not have
+routes of their own, and if for some reason they must, those routes do
+not supersede routes of their master. This should generally be the
+case, but unusual configurations or errant manual or automatic static
+route additions may cause trouble.
+
+8.2 Ethernet Device Renaming
+----------------------------
+
+ On systems with network configuration scripts that do not
+associate physical devices directly with network interface names (so
+that the same physical device always has the same "ethX" name), it may
+be necessary to add some special logic to config files in
+/etc/modprobe.d/.
+
+ For example, given a modules.conf containing the following:
+
+alias bond0 bonding
+options bond0 mode=some-mode miimon=50
+alias eth0 tg3
+alias eth1 tg3
+alias eth2 e1000
+alias eth3 e1000
+
+ If neither eth0 and eth1 are slaves to bond0, then when the
+bond0 interface comes up, the devices may end up reordered. This
+happens because bonding is loaded first, then its slave device's
+drivers are loaded next. Since no other drivers have been loaded,
+when the e1000 driver loads, it will receive eth0 and eth1 for its
+devices, but the bonding configuration tries to enslave eth2 and eth3
+(which may later be assigned to the tg3 devices).
+
+ Adding the following:
+
+add above bonding e1000 tg3
+
+ causes modprobe to load e1000 then tg3, in that order, when
+bonding is loaded. This command is fully documented in the
+modules.conf manual page.
+
+ On systems utilizing modprobe an equivalent problem can occur.
+In this case, the following can be added to config files in
+/etc/modprobe.d/ as:
+
+softdep bonding pre: tg3 e1000
+
+ This will load tg3 and e1000 modules before loading the bonding one.
+Full documentation on this can be found in the modprobe.d and modprobe
+manual pages.
+
+8.3. Painfully Slow Or No Failed Link Detection By Miimon
+---------------------------------------------------------
+
+ By default, bonding enables the use_carrier option, which
+instructs bonding to trust the driver to maintain carrier state.
+
+ As discussed in the options section, above, some drivers do
+not support the netif_carrier_on/_off link state tracking system.
+With use_carrier enabled, bonding will always see these links as up,
+regardless of their actual state.
+
+ Additionally, other drivers do support netif_carrier, but do
+not maintain it in real time, e.g., only polling the link state at
+some fixed interval. In this case, miimon will detect failures, but
+only after some long period of time has expired. If it appears that
+miimon is very slow in detecting link failures, try specifying
+use_carrier=0 to see if that improves the failure detection time. If
+it does, then it may be that the driver checks the carrier state at a
+fixed interval, but does not cache the MII register values (so the
+use_carrier=0 method of querying the registers directly works). If
+use_carrier=0 does not improve the failover, then the driver may cache
+the registers, or the problem may be elsewhere.
+
+ Also, remember that miimon only checks for the device's
+carrier state. It has no way to determine the state of devices on or
+beyond other ports of a switch, or if a switch is refusing to pass
+traffic while still maintaining carrier on.
+
+9. SNMP agents
+===============
+
+ If running SNMP agents, the bonding driver should be loaded
+before any network drivers participating in a bond. This requirement
+is due to the interface index (ipAdEntIfIndex) being associated to
+the first interface found with a given IP address. That is, there is
+only one ipAdEntIfIndex for each IP address. For example, if eth0 and
+eth1 are slaves of bond0 and the driver for eth0 is loaded before the
+bonding driver, the interface for the IP address will be associated
+with the eth0 interface. This configuration is shown below, the IP
+address 192.168.1.1 has an interface index of 2 which indexes to eth0
+in the ifDescr table (ifDescr.2).
+
+ interfaces.ifTable.ifEntry.ifDescr.1 = lo
+ interfaces.ifTable.ifEntry.ifDescr.2 = eth0
+ interfaces.ifTable.ifEntry.ifDescr.3 = eth1
+ interfaces.ifTable.ifEntry.ifDescr.4 = eth2
+ interfaces.ifTable.ifEntry.ifDescr.5 = eth3
+ interfaces.ifTable.ifEntry.ifDescr.6 = bond0
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 5
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 4
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1
+
+ This problem is avoided by loading the bonding driver before
+any network drivers participating in a bond. Below is an example of
+loading the bonding driver first, the IP address 192.168.1.1 is
+correctly associated with ifDescr.2.
+
+ interfaces.ifTable.ifEntry.ifDescr.1 = lo
+ interfaces.ifTable.ifEntry.ifDescr.2 = bond0
+ interfaces.ifTable.ifEntry.ifDescr.3 = eth0
+ interfaces.ifTable.ifEntry.ifDescr.4 = eth1
+ interfaces.ifTable.ifEntry.ifDescr.5 = eth2
+ interfaces.ifTable.ifEntry.ifDescr.6 = eth3
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 6
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 5
+ ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1
+
+ While some distributions may not report the interface name in
+ifDescr, the association between the IP address and IfIndex remains
+and SNMP functions such as Interface_Scan_Next will report that
+association.
+
+10. Promiscuous mode
+====================
+
+ When running network monitoring tools, e.g., tcpdump, it is
+common to enable promiscuous mode on the device, so that all traffic
+is seen (instead of seeing only traffic destined for the local host).
+The bonding driver handles promiscuous mode changes to the bonding
+master device (e.g., bond0), and propagates the setting to the slave
+devices.
+
+ For the balance-rr, balance-xor, broadcast, and 802.3ad modes,
+the promiscuous mode setting is propagated to all slaves.
+
+ For the active-backup, balance-tlb and balance-alb modes, the
+promiscuous mode setting is propagated only to the active slave.
+
+ For balance-tlb mode, the active slave is the slave currently
+receiving inbound traffic.
+
+ For balance-alb mode, the active slave is the slave used as a
+"primary." This slave is used for mode-specific control traffic, for
+sending to peers that are unassigned or if the load is unbalanced.
+
+ For the active-backup, balance-tlb and balance-alb modes, when
+the active slave changes (e.g., due to a link failure), the
+promiscuous setting will be propagated to the new active slave.
+
+11. Configuring Bonding for High Availability
+=============================================
+
+ High Availability refers to configurations that provide
+maximum network availability by having redundant or backup devices,
+links or switches between the host and the rest of the world. The
+goal is to provide the maximum availability of network connectivity
+(i.e., the network always works), even though other configurations
+could provide higher throughput.
+
+11.1 High Availability in a Single Switch Topology
+--------------------------------------------------
+
+ If two hosts (or a host and a single switch) are directly
+connected via multiple physical links, then there is no availability
+penalty to optimizing for maximum bandwidth. In this case, there is
+only one switch (or peer), so if it fails, there is no alternative
+access to fail over to. Additionally, the bonding load balance modes
+support link monitoring of their members, so if individual links fail,
+the load will be rebalanced across the remaining devices.
+
+ See Section 12, "Configuring Bonding for Maximum Throughput"
+for information on configuring bonding with one peer device.
+
+11.2 High Availability in a Multiple Switch Topology
+----------------------------------------------------
+
+ With multiple switches, the configuration of bonding and the
+network changes dramatically. In multiple switch topologies, there is
+a trade off between network availability and usable bandwidth.
+
+ Below is a sample network, configured to maximize the
+availability of the network:
+
+ | |
+ |port3 port3|
+ +-----+----+ +-----+----+
+ | |port2 ISL port2| |
+ | switch A +--------------------------+ switch B |
+ | | | |
+ +-----+----+ +-----++---+
+ |port1 port1|
+ | +-------+ |
+ +-------------+ host1 +---------------+
+ eth0 +-------+ eth1
+
+ In this configuration, there is a link between the two
+switches (ISL, or inter switch link), and multiple ports connecting to
+the outside world ("port3" on each switch). There is no technical
+reason that this could not be extended to a third switch.
+
+11.2.1 HA Bonding Mode Selection for Multiple Switch Topology
+-------------------------------------------------------------
+
+ In a topology such as the example above, the active-backup and
+broadcast modes are the only useful bonding modes when optimizing for
+availability; the other modes require all links to terminate on the
+same peer for them to behave rationally.
+
+active-backup: This is generally the preferred mode, particularly if
+ the switches have an ISL and play together well. If the
+ network configuration is such that one switch is specifically
+ a backup switch (e.g., has lower capacity, higher cost, etc),
+ then the primary option can be used to insure that the
+ preferred link is always used when it is available.
+
+broadcast: This mode is really a special purpose mode, and is suitable
+ only for very specific needs. For example, if the two
+ switches are not connected (no ISL), and the networks beyond
+ them are totally independent. In this case, if it is
+ necessary for some specific one-way traffic to reach both
+ independent networks, then the broadcast mode may be suitable.
+
+11.2.2 HA Link Monitoring Selection for Multiple Switch Topology
+----------------------------------------------------------------
+
+ The choice of link monitoring ultimately depends upon your
+switch. If the switch can reliably fail ports in response to other
+failures, then either the MII or ARP monitors should work. For
+example, in the above example, if the "port3" link fails at the remote
+end, the MII monitor has no direct means to detect this. The ARP
+monitor could be configured with a target at the remote end of port3,
+thus detecting that failure without switch support.
+
+ In general, however, in a multiple switch topology, the ARP
+monitor can provide a higher level of reliability in detecting end to
+end connectivity failures (which may be caused by the failure of any
+individual component to pass traffic for any reason). Additionally,
+the ARP monitor should be configured with multiple targets (at least
+one for each switch in the network). This will insure that,
+regardless of which switch is active, the ARP monitor has a suitable
+target to query.
+
+ Note, also, that of late many switches now support a functionality
+generally referred to as "trunk failover." This is a feature of the
+switch that causes the link state of a particular switch port to be set
+down (or up) when the state of another switch port goes down (or up).
+Its purpose is to propagate link failures from logically "exterior" ports
+to the logically "interior" ports that bonding is able to monitor via
+miimon. Availability and configuration for trunk failover varies by
+switch, but this can be a viable alternative to the ARP monitor when using
+suitable switches.
+
+12. Configuring Bonding for Maximum Throughput
+==============================================
+
+12.1 Maximizing Throughput in a Single Switch Topology
+------------------------------------------------------
+
+ In a single switch configuration, the best method to maximize
+throughput depends upon the application and network environment. The
+various load balancing modes each have strengths and weaknesses in
+different environments, as detailed below.
+
+ For this discussion, we will break down the topologies into
+two categories. Depending upon the destination of most traffic, we
+categorize them into either "gatewayed" or "local" configurations.
+
+ In a gatewayed configuration, the "switch" is acting primarily
+as a router, and the majority of traffic passes through this router to
+other networks. An example would be the following:
+
+
+ +----------+ +----------+
+ | |eth0 port1| | to other networks
+ | Host A +---------------------+ router +------------------->
+ | +---------------------+ | Hosts B and C are out
+ | |eth1 port2| | here somewhere
+ +----------+ +----------+
+
+ The router may be a dedicated router device, or another host
+acting as a gateway. For our discussion, the important point is that
+the majority of traffic from Host A will pass through the router to
+some other network before reaching its final destination.
+
+ In a gatewayed network configuration, although Host A may
+communicate with many other systems, all of its traffic will be sent
+and received via one other peer on the local network, the router.
+
+ Note that the case of two systems connected directly via
+multiple physical links is, for purposes of configuring bonding, the
+same as a gatewayed configuration. In that case, it happens that all
+traffic is destined for the "gateway" itself, not some other network
+beyond the gateway.
+
+ In a local configuration, the "switch" is acting primarily as
+a switch, and the majority of traffic passes through this switch to
+reach other stations on the same network. An example would be the
+following:
+
+ +----------+ +----------+ +--------+
+ | |eth0 port1| +-------+ Host B |
+ | Host A +------------+ switch |port3 +--------+
+ | +------------+ | +--------+
+ | |eth1 port2| +------------------+ Host C |
+ +----------+ +----------+port4 +--------+
+
+
+ Again, the switch may be a dedicated switch device, or another
+host acting as a gateway. For our discussion, the important point is
+that the majority of traffic from Host A is destined for other hosts
+on the same local network (Hosts B and C in the above example).
+
+ In summary, in a gatewayed configuration, traffic to and from
+the bonded device will be to the same MAC level peer on the network
+(the gateway itself, i.e., the router), regardless of its final
+destination. In a local configuration, traffic flows directly to and
+from the final destinations, thus, each destination (Host B, Host C)
+will be addressed directly by their individual MAC addresses.
+
+ This distinction between a gatewayed and a local network
+configuration is important because many of the load balancing modes
+available use the MAC addresses of the local network source and
+destination to make load balancing decisions. The behavior of each
+mode is described below.
+
+
+12.1.1 MT Bonding Mode Selection for Single Switch Topology
+-----------------------------------------------------------
+
+ This configuration is the easiest to set up and to understand,
+although you will have to decide which bonding mode best suits your
+needs. The trade offs for each mode are detailed below:
+
+balance-rr: This mode is the only mode that will permit a single
+ TCP/IP connection to stripe traffic across multiple
+ interfaces. It is therefore the only mode that will allow a
+ single TCP/IP stream to utilize more than one interface's
+ worth of throughput. This comes at a cost, however: the
+ striping generally results in peer systems receiving packets out
+ of order, causing TCP/IP's congestion control system to kick
+ in, often by retransmitting segments.
+
+ It is possible to adjust TCP/IP's congestion limits by
+ altering the net.ipv4.tcp_reordering sysctl parameter. The
+ usual default value is 3. But keep in mind TCP stack is able
+ to automatically increase this when it detects reorders.
+
+ Note that the fraction of packets that will be delivered out of
+ order is highly variable, and is unlikely to be zero. The level
+ of reordering depends upon a variety of factors, including the
+ networking interfaces, the switch, and the topology of the
+ configuration. Speaking in general terms, higher speed network
+ cards produce more reordering (due to factors such as packet
+ coalescing), and a "many to many" topology will reorder at a
+ higher rate than a "many slow to one fast" configuration.
+
+ Many switches do not support any modes that stripe traffic
+ (instead choosing a port based upon IP or MAC level addresses);
+ for those devices, traffic for a particular connection flowing
+ through the switch to a balance-rr bond will not utilize greater
+ than one interface's worth of bandwidth.
+
+ If you are utilizing protocols other than TCP/IP, UDP for
+ example, and your application can tolerate out of order
+ delivery, then this mode can allow for single stream datagram
+ performance that scales near linearly as interfaces are added
+ to the bond.
+
+ This mode requires the switch to have the appropriate ports
+ configured for "etherchannel" or "trunking."
+
+active-backup: There is not much advantage in this network topology to
+ the active-backup mode, as the inactive backup devices are all
+ connected to the same peer as the primary. In this case, a
+ load balancing mode (with link monitoring) will provide the
+ same level of network availability, but with increased
+ available bandwidth. On the plus side, active-backup mode
+ does not require any configuration of the switch, so it may
+ have value if the hardware available does not support any of
+ the load balance modes.
+
+balance-xor: This mode will limit traffic such that packets destined
+ for specific peers will always be sent over the same
+ interface. Since the destination is determined by the MAC
+ addresses involved, this mode works best in a "local" network
+ configuration (as described above), with destinations all on
+ the same local network. This mode is likely to be suboptimal
+ if all your traffic is passed through a single router (i.e., a
+ "gatewayed" network configuration, as described above).
+
+ As with balance-rr, the switch ports need to be configured for
+ "etherchannel" or "trunking."
+
+broadcast: Like active-backup, there is not much advantage to this
+ mode in this type of network topology.
+
+802.3ad: This mode can be a good choice for this type of network
+ topology. The 802.3ad mode is an IEEE standard, so all peers
+ that implement 802.3ad should interoperate well. The 802.3ad
+ protocol includes automatic configuration of the aggregates,
+ so minimal manual configuration of the switch is needed
+ (typically only to designate that some set of devices is
+ available for 802.3ad). The 802.3ad standard also mandates
+ that frames be delivered in order (within certain limits), so
+ in general single connections will not see misordering of
+ packets. The 802.3ad mode does have some drawbacks: the
+ standard mandates that all devices in the aggregate operate at
+ the same speed and duplex. Also, as with all bonding load
+ balance modes other than balance-rr, no single connection will
+ be able to utilize more than a single interface's worth of
+ bandwidth.
+
+ Additionally, the linux bonding 802.3ad implementation
+ distributes traffic by peer (using an XOR of MAC addresses
+ and packet type ID), so in a "gatewayed" configuration, all
+ outgoing traffic will generally use the same device. Incoming
+ traffic may also end up on a single device, but that is
+ dependent upon the balancing policy of the peer's 8023.ad
+ implementation. In a "local" configuration, traffic will be
+ distributed across the devices in the bond.
+
+ Finally, the 802.3ad mode mandates the use of the MII monitor,
+ therefore, the ARP monitor is not available in this mode.
+
+balance-tlb: The balance-tlb mode balances outgoing traffic by peer.
+ Since the balancing is done according to MAC address, in a
+ "gatewayed" configuration (as described above), this mode will
+ send all traffic across a single device. However, in a
+ "local" network configuration, this mode balances multiple
+ local network peers across devices in a vaguely intelligent
+ manner (not a simple XOR as in balance-xor or 802.3ad mode),
+ so that mathematically unlucky MAC addresses (i.e., ones that
+ XOR to the same value) will not all "bunch up" on a single
+ interface.
+
+ Unlike 802.3ad, interfaces may be of differing speeds, and no
+ special switch configuration is required. On the down side,
+ in this mode all incoming traffic arrives over a single
+ interface, this mode requires certain ethtool support in the
+ network device driver of the slave interfaces, and the ARP
+ monitor is not available.
+
+balance-alb: This mode is everything that balance-tlb is, and more.
+ It has all of the features (and restrictions) of balance-tlb,
+ and will also balance incoming traffic from local network
+ peers (as described in the Bonding Module Options section,
+ above).
+
+ The only additional down side to this mode is that the network
+ device driver must support changing the hardware address while
+ the device is open.
+
+12.1.2 MT Link Monitoring for Single Switch Topology
+----------------------------------------------------
+
+ The choice of link monitoring may largely depend upon which
+mode you choose to use. The more advanced load balancing modes do not
+support the use of the ARP monitor, and are thus restricted to using
+the MII monitor (which does not provide as high a level of end to end
+assurance as the ARP monitor).
+
+12.2 Maximum Throughput in a Multiple Switch Topology
+-----------------------------------------------------
+
+ Multiple switches may be utilized to optimize for throughput
+when they are configured in parallel as part of an isolated network
+between two or more systems, for example:
+
+ +-----------+
+ | Host A |
+ +-+---+---+-+
+ | | |
+ +--------+ | +---------+
+ | | |
+ +------+---+ +-----+----+ +-----+----+
+ | Switch A | | Switch B | | Switch C |
+ +------+---+ +-----+----+ +-----+----+
+ | | |
+ +--------+ | +---------+
+ | | |
+ +-+---+---+-+
+ | Host B |
+ +-----------+
+
+ In this configuration, the switches are isolated from one
+another. One reason to employ a topology such as this is for an
+isolated network with many hosts (a cluster configured for high
+performance, for example), using multiple smaller switches can be more
+cost effective than a single larger switch, e.g., on a network with 24
+hosts, three 24 port switches can be significantly less expensive than
+a single 72 port switch.
+
+ If access beyond the network is required, an individual host
+can be equipped with an additional network device connected to an
+external network; this host then additionally acts as a gateway.
+
+12.2.1 MT Bonding Mode Selection for Multiple Switch Topology
+-------------------------------------------------------------
+
+ In actual practice, the bonding mode typically employed in
+configurations of this type is balance-rr. Historically, in this
+network configuration, the usual caveats about out of order packet
+delivery are mitigated by the use of network adapters that do not do
+any kind of packet coalescing (via the use of NAPI, or because the
+device itself does not generate interrupts until some number of
+packets has arrived). When employed in this fashion, the balance-rr
+mode allows individual connections between two hosts to effectively
+utilize greater than one interface's bandwidth.
+
+12.2.2 MT Link Monitoring for Multiple Switch Topology
+------------------------------------------------------
+
+ Again, in actual practice, the MII monitor is most often used
+in this configuration, as performance is given preference over
+availability. The ARP monitor will function in this topology, but its
+advantages over the MII monitor are mitigated by the volume of probes
+needed as the number of systems involved grows (remember that each
+host in the network is configured with bonding).
+
+13. Switch Behavior Issues
+==========================
+
+13.1 Link Establishment and Failover Delays
+-------------------------------------------
+
+ Some switches exhibit undesirable behavior with regard to the
+timing of link up and down reporting by the switch.
+
+ First, when a link comes up, some switches may indicate that
+the link is up (carrier available), but not pass traffic over the
+interface for some period of time. This delay is typically due to
+some type of autonegotiation or routing protocol, but may also occur
+during switch initialization (e.g., during recovery after a switch
+failure). If you find this to be a problem, specify an appropriate
+value to the updelay bonding module option to delay the use of the
+relevant interface(s).
+
+ Second, some switches may "bounce" the link state one or more
+times while a link is changing state. This occurs most commonly while
+the switch is initializing. Again, an appropriate updelay value may
+help.
+
+ Note that when a bonding interface has no active links, the
+driver will immediately reuse the first link that goes up, even if the
+updelay parameter has been specified (the updelay is ignored in this
+case). If there are slave interfaces waiting for the updelay timeout
+to expire, the interface that first went into that state will be
+immediately reused. This reduces down time of the network if the
+value of updelay has been overestimated, and since this occurs only in
+cases with no connectivity, there is no additional penalty for
+ignoring the updelay.
+
+ In addition to the concerns about switch timings, if your
+switches take a long time to go into backup mode, it may be desirable
+to not activate a backup interface immediately after a link goes down.
+Failover may be delayed via the downdelay bonding module option.
+
+13.2 Duplicated Incoming Packets
+--------------------------------
+
+ NOTE: Starting with version 3.0.2, the bonding driver has logic to
+suppress duplicate packets, which should largely eliminate this problem.
+The following description is kept for reference.
+
+ It is not uncommon to observe a short burst of duplicated
+traffic when the bonding device is first used, or after it has been
+idle for some period of time. This is most easily observed by issuing
+a "ping" to some other host on the network, and noticing that the
+output from ping flags duplicates (typically one per slave).
+
+ For example, on a bond in active-backup mode with five slaves
+all connected to one switch, the output may appear as follows:
+
+# ping -n 10.0.4.2
+PING 10.0.4.2 (10.0.4.2) from 10.0.3.10 : 56(84) bytes of data.
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.7 ms
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+64 bytes from 10.0.4.2: icmp_seq=2 ttl=64 time=0.216 ms
+64 bytes from 10.0.4.2: icmp_seq=3 ttl=64 time=0.267 ms
+64 bytes from 10.0.4.2: icmp_seq=4 ttl=64 time=0.222 ms
+
+ This is not due to an error in the bonding driver, rather, it
+is a side effect of how many switches update their MAC forwarding
+tables. Initially, the switch does not associate the MAC address in
+the packet with a particular switch port, and so it may send the
+traffic to all ports until its MAC forwarding table is updated. Since
+the interfaces attached to the bond may occupy multiple ports on a
+single switch, when the switch (temporarily) floods the traffic to all
+ports, the bond device receives multiple copies of the same packet
+(one per slave device).
+
+ The duplicated packet behavior is switch dependent, some
+switches exhibit this, and some do not. On switches that display this
+behavior, it can be induced by clearing the MAC forwarding table (on
+most Cisco switches, the privileged command "clear mac address-table
+dynamic" will accomplish this).
+
+14. Hardware Specific Considerations
+====================================
+
+ This section contains additional information for configuring
+bonding on specific hardware platforms, or for interfacing bonding
+with particular switches or other devices.
+
+14.1 IBM BladeCenter
+--------------------
+
+ This applies to the JS20 and similar systems.
+
+ On the JS20 blades, the bonding driver supports only
+balance-rr, active-backup, balance-tlb and balance-alb modes. This is
+largely due to the network topology inside the BladeCenter, detailed
+below.
+
+JS20 network adapter information
+--------------------------------
+
+ All JS20s come with two Broadcom Gigabit Ethernet ports
+integrated on the planar (that's "motherboard" in IBM-speak). In the
+BladeCenter chassis, the eth0 port of all JS20 blades is hard wired to
+I/O Module #1; similarly, all eth1 ports are wired to I/O Module #2.
+An add-on Broadcom daughter card can be installed on a JS20 to provide
+two more Gigabit Ethernet ports. These ports, eth2 and eth3, are
+wired to I/O Modules 3 and 4, respectively.
+
+ Each I/O Module may contain either a switch or a passthrough
+module (which allows ports to be directly connected to an external
+switch). Some bonding modes require a specific BladeCenter internal
+network topology in order to function; these are detailed below.
+
+ Additional BladeCenter-specific networking information can be
+found in two IBM Redbooks (www.ibm.com/redbooks):
+
+"IBM eServer BladeCenter Networking Options"
+"IBM eServer BladeCenter Layer 2-7 Network Switching"
+
+BladeCenter networking configuration
+------------------------------------
+
+ Because a BladeCenter can be configured in a very large number
+of ways, this discussion will be confined to describing basic
+configurations.
+
+ Normally, Ethernet Switch Modules (ESMs) are used in I/O
+modules 1 and 2. In this configuration, the eth0 and eth1 ports of a
+JS20 will be connected to different internal switches (in the
+respective I/O modules).
+
+ A passthrough module (OPM or CPM, optical or copper,
+passthrough module) connects the I/O module directly to an external
+switch. By using PMs in I/O module #1 and #2, the eth0 and eth1
+interfaces of a JS20 can be redirected to the outside world and
+connected to a common external switch.
+
+ Depending upon the mix of ESMs and PMs, the network will
+appear to bonding as either a single switch topology (all PMs) or as a
+multiple switch topology (one or more ESMs, zero or more PMs). It is
+also possible to connect ESMs together, resulting in a configuration
+much like the example in "High Availability in a Multiple Switch
+Topology," above.
+
+Requirements for specific modes
+-------------------------------
+
+ The balance-rr mode requires the use of passthrough modules
+for devices in the bond, all connected to an common external switch.
+That switch must be configured for "etherchannel" or "trunking" on the
+appropriate ports, as is usual for balance-rr.
+
+ The balance-alb and balance-tlb modes will function with
+either switch modules or passthrough modules (or a mix). The only
+specific requirement for these modes is that all network interfaces
+must be able to reach all destinations for traffic sent over the
+bonding device (i.e., the network must converge at some point outside
+the BladeCenter).
+
+ The active-backup mode has no additional requirements.
+
+Link monitoring issues
+----------------------
+
+ When an Ethernet Switch Module is in place, only the ARP
+monitor will reliably detect link loss to an external switch. This is
+nothing unusual, but examination of the BladeCenter cabinet would
+suggest that the "external" network ports are the ethernet ports for
+the system, when it fact there is a switch between these "external"
+ports and the devices on the JS20 system itself. The MII monitor is
+only able to detect link failures between the ESM and the JS20 system.
+
+ When a passthrough module is in place, the MII monitor does
+detect failures to the "external" port, which is then directly
+connected to the JS20 system.
+
+Other concerns
+--------------
+
+ The Serial Over LAN (SoL) link is established over the primary
+ethernet (eth0) only, therefore, any loss of link to eth0 will result
+in losing your SoL connection. It will not fail over with other
+network traffic, as the SoL system is beyond the control of the
+bonding driver.
+
+ It may be desirable to disable spanning tree on the switch
+(either the internal Ethernet Switch Module, or an external switch) to
+avoid fail-over delay issues when using bonding.
+
+
+15. Frequently Asked Questions
+==============================
+
+1. Is it SMP safe?
+
+ Yes. The old 2.0.xx channel bonding patch was not SMP safe.
+The new driver was designed to be SMP safe from the start.
+
+2. What type of cards will work with it?
+
+ Any Ethernet type cards (you can even mix cards - a Intel
+EtherExpress PRO/100 and a 3com 3c905b, for example). For most modes,
+devices need not be of the same speed.
+
+ Starting with version 3.2.1, bonding also supports Infiniband
+slaves in active-backup mode.
+
+3. How many bonding devices can I have?
+
+ There is no limit.
+
+4. How many slaves can a bonding device have?
+
+ This is limited only by the number of network interfaces Linux
+supports and/or the number of network cards you can place in your
+system.
+
+5. What happens when a slave link dies?
+
+ If link monitoring is enabled, then the failing device will be
+disabled. The active-backup mode will fail over to a backup link, and
+other modes will ignore the failed link. The link will continue to be
+monitored, and should it recover, it will rejoin the bond (in whatever
+manner is appropriate for the mode). See the sections on High
+Availability and the documentation for each mode for additional
+information.
+
+ Link monitoring can be enabled via either the miimon or
+arp_interval parameters (described in the module parameters section,
+above). In general, miimon monitors the carrier state as sensed by
+the underlying network device, and the arp monitor (arp_interval)
+monitors connectivity to another host on the local network.
+
+ If no link monitoring is configured, the bonding driver will
+be unable to detect link failures, and will assume that all links are
+always available. This will likely result in lost packets, and a
+resulting degradation of performance. The precise performance loss
+depends upon the bonding mode and network configuration.
+
+6. Can bonding be used for High Availability?
+
+ Yes. See the section on High Availability for details.
+
+7. Which switches/systems does it work with?
+
+ The full answer to this depends upon the desired mode.
+
+ In the basic balance modes (balance-rr and balance-xor), it
+works with any system that supports etherchannel (also called
+trunking). Most managed switches currently available have such
+support, and many unmanaged switches as well.
+
+ The advanced balance modes (balance-tlb and balance-alb) do
+not have special switch requirements, but do need device drivers that
+support specific features (described in the appropriate section under
+module parameters, above).
+
+ In 802.3ad mode, it works with systems that support IEEE
+802.3ad Dynamic Link Aggregation. Most managed and many unmanaged
+switches currently available support 802.3ad.
+
+ The active-backup mode should work with any Layer-II switch.
+
+8. Where does a bonding device get its MAC address from?
+
+ When using slave devices that have fixed MAC addresses, or when
+the fail_over_mac option is enabled, the bonding device's MAC address is
+the MAC address of the active slave.
+
+ For other configurations, if not explicitly configured (with
+ifconfig or ip link), the MAC address of the bonding device is taken from
+its first slave device. This MAC address is then passed to all following
+slaves and remains persistent (even if the first slave is removed) until
+the bonding device is brought down or reconfigured.
+
+ If you wish to change the MAC address, you can set it with
+ifconfig or ip link:
+
+# ifconfig bond0 hw ether 00:11:22:33:44:55
+
+# ip link set bond0 address 66:77:88:99:aa:bb
+
+ The MAC address can be also changed by bringing down/up the
+device and then changing its slaves (or their order):
+
+# ifconfig bond0 down ; modprobe -r bonding
+# ifconfig bond0 .... up
+# ifenslave bond0 eth...
+
+ This method will automatically take the address from the next
+slave that is added.
+
+ To restore your slaves' MAC addresses, you need to detach them
+from the bond (`ifenslave -d bond0 eth0'). The bonding driver will
+then restore the MAC addresses that the slaves had before they were
+enslaved.
+
+16. Resources and Links
+=======================
+
+ The latest version of the bonding driver can be found in the latest
+version of the linux kernel, found on http://kernel.org
+
+ The latest version of this document can be found in the latest kernel
+source (named Documentation/networking/bonding.txt).
+
+ Discussions regarding the usage of the bonding driver take place on the
+bonding-devel mailing list, hosted at sourceforge.net. If you have questions or
+problems, post them to the list. The list address is:
+
+bonding-devel@lists.sourceforge.net
+
+ The administrative interface (to subscribe or unsubscribe) can
+be found at:
+
+https://lists.sourceforge.net/lists/listinfo/bonding-devel
+
+ Discussions regarding the development of the bonding driver take place
+on the main Linux network mailing list, hosted at vger.kernel.org. The list
+address is:
+
+netdev@vger.kernel.org
+
+ The administrative interface (to subscribe or unsubscribe) can
+be found at:
+
+http://vger.kernel.org/vger-lists.html#netdev
+
+Donald Becker's Ethernet Drivers and diag programs may be found at :
+ - http://web.archive.org/web/*/http://www.scyld.com/network/
+
+You will also find a lot of information regarding Ethernet, NWay, MII,
+etc. at www.scyld.com.
+
+-- END --
diff --git a/Documentation/networking/bridge.txt b/Documentation/networking/bridge.txt
new file mode 100644
index 000000000..a27cb6214
--- /dev/null
+++ b/Documentation/networking/bridge.txt
@@ -0,0 +1,15 @@
+In order to use the Ethernet bridging functionality, you'll need the
+userspace tools.
+
+Documentation for Linux bridging is on:
+ http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
+
+The bridge-utilities are maintained at:
+ git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git
+
+Additionally, the iproute2 utilities can be used to configure
+bridge devices.
+
+If you still have questions, don't hesitate to post to the mailing list
+(more info https://lists.linux-foundation.org/mailman/listinfo/bridge).
+
diff --git a/Documentation/networking/caif/Linux-CAIF.txt b/Documentation/networking/caif/Linux-CAIF.txt
new file mode 100644
index 000000000..0aa4bd381
--- /dev/null
+++ b/Documentation/networking/caif/Linux-CAIF.txt
@@ -0,0 +1,175 @@
+Linux CAIF
+===========
+copyright (C) ST-Ericsson AB 2010
+Author: Sjur Brendeland/ sjur.brandeland@stericsson.com
+License terms: GNU General Public License (GPL) version 2
+
+
+Introduction
+------------
+CAIF is a MUX protocol used by ST-Ericsson cellular modems for
+communication between Modem and host. The host processes can open virtual AT
+channels, initiate GPRS Data connections, Video channels and Utility Channels.
+The Utility Channels are general purpose pipes between modem and host.
+
+ST-Ericsson modems support a number of transports between modem
+and host. Currently, UART and Loopback are available for Linux.
+
+
+Architecture:
+------------
+The implementation of CAIF is divided into:
+* CAIF Socket Layer and GPRS IP Interface.
+* CAIF Core Protocol Implementation
+* CAIF Link Layer, implemented as NET devices.
+
+
+ RTNL
+ !
+ ! +------+ +------+
+ ! +------+! +------+!
+ ! ! IP !! !Socket!!
+ +-------> !interf!+ ! API !+ <- CAIF Client APIs
+ ! +------+ +------!
+ ! ! !
+ ! +-----------+
+ ! !
+ ! +------+ <- CAIF Core Protocol
+ ! ! CAIF !
+ ! ! Core !
+ ! +------+
+ ! +----------!---------+
+ ! ! ! !
+ ! +------+ +-----+ +------+
+ +--> ! HSI ! ! TTY ! ! USB ! <- Link Layer (Net Devices)
+ +------+ +-----+ +------+
+
+
+
+I M P L E M E N T A T I O N
+===========================
+
+
+CAIF Core Protocol Layer
+=========================================
+
+CAIF Core layer implements the CAIF protocol as defined by ST-Ericsson.
+It implements the CAIF protocol stack in a layered approach, where
+each layer described in the specification is implemented as a separate layer.
+The architecture is inspired by the design patterns "Protocol Layer" and
+"Protocol Packet".
+
+== CAIF structure ==
+The Core CAIF implementation contains:
+ - Simple implementation of CAIF.
+ - Layered architecture (a la Streams), each layer in the CAIF
+ specification is implemented in a separate c-file.
+ - Clients must call configuration function to add PHY layer.
+ - Clients must implement CAIF layer to consume/produce
+ CAIF payload with receive and transmit functions.
+ - Clients must call configuration function to add and connect the
+ Client layer.
+ - When receiving / transmitting CAIF Packets (cfpkt), ownership is passed
+ to the called function (except for framing layers' receive function)
+
+Layered Architecture
+--------------------
+The CAIF protocol can be divided into two parts: Support functions and Protocol
+Implementation. The support functions include:
+
+ - CFPKT CAIF Packet. Implementation of CAIF Protocol Packet. The
+ CAIF Packet has functions for creating, destroying and adding content
+ and for adding/extracting header and trailers to protocol packets.
+
+The CAIF Protocol implementation contains:
+
+ - CFCNFG CAIF Configuration layer. Configures the CAIF Protocol
+ Stack and provides a Client interface for adding Link-Layer and
+ Driver interfaces on top of the CAIF Stack.
+
+ - CFCTRL CAIF Control layer. Encodes and Decodes control messages
+ such as enumeration and channel setup. Also matches request and
+ response messages.
+
+ - CFSERVL General CAIF Service Layer functionality; handles flow
+ control and remote shutdown requests.
+
+ - CFVEI CAIF VEI layer. Handles CAIF AT Channels on VEI (Virtual
+ External Interface). This layer encodes/decodes VEI frames.
+
+ - CFDGML CAIF Datagram layer. Handles CAIF Datagram layer (IP
+ traffic), encodes/decodes Datagram frames.
+
+ - CFMUX CAIF Mux layer. Handles multiplexing between multiple
+ physical bearers and multiple channels such as VEI, Datagram, etc.
+ The MUX keeps track of the existing CAIF Channels and
+ Physical Instances and selects the appropriate instance based
+ on Channel-Id and Physical-ID.
+
+ - CFFRML CAIF Framing layer. Handles Framing i.e. Frame length
+ and frame checksum.
+
+ - CFSERL CAIF Serial layer. Handles concatenation/split of frames
+ into CAIF Frames with correct length.
+
+
+
+ +---------+
+ | Config |
+ | CFCNFG |
+ +---------+
+ !
+ +---------+ +---------+ +---------+
+ | AT | | Control | | Datagram|
+ | CFVEIL | | CFCTRL | | CFDGML |
+ +---------+ +---------+ +---------+
+ \_____________!______________/
+ !
+ +---------+
+ | MUX |
+ | |
+ +---------+
+ _____!_____
+ / \
+ +---------+ +---------+
+ | CFFRML | | CFFRML |
+ | Framing | | Framing |
+ +---------+ +---------+
+ ! !
+ +---------+ +---------+
+ | | | Serial |
+ | | | CFSERL |
+ +---------+ +---------+
+
+
+In this layered approach the following "rules" apply.
+ - All layers embed the same structure "struct cflayer"
+ - A layer does not depend on any other layer's private data.
+ - Layers are stacked by setting the pointers
+ layer->up , layer->dn
+ - In order to send data upwards, each layer should do
+ layer->up->receive(layer->up, packet);
+ - In order to send data downwards, each layer should do
+ layer->dn->transmit(layer->dn, packet);
+
+
+CAIF Socket and IP interface
+===========================
+
+The IP interface and CAIF socket API are implemented on top of the
+CAIF Core protocol. The IP Interface and CAIF socket have an instance of
+'struct cflayer', just like the CAIF Core protocol stack.
+Net device and Socket implement the 'receive()' function defined by
+'struct cflayer', just like the rest of the CAIF stack. In this way, transmit and
+receive of packets is handled as by the rest of the layers: the 'dn->transmit()'
+function is called in order to transmit data.
+
+Configuration of Link Layer
+---------------------------
+The Link Layer is implemented as Linux network devices (struct net_device).
+Payload handling and registration is done using standard Linux mechanisms.
+
+The CAIF Protocol relies on a loss-less link layer without implementing
+retransmission. This implies that packet drops must not happen.
+Therefore a flow-control mechanism is implemented where the physical
+interface can initiate flow stop for all CAIF Channels.
diff --git a/Documentation/networking/caif/README b/Documentation/networking/caif/README
new file mode 100644
index 000000000..757ccfaa1
--- /dev/null
+++ b/Documentation/networking/caif/README
@@ -0,0 +1,109 @@
+Copyright (C) ST-Ericsson AB 2010
+Author: Sjur Brendeland/ sjur.brandeland@stericsson.com
+License terms: GNU General Public License (GPL) version 2
+---------------------------------------------------------
+
+=== Start ===
+If you have compiled CAIF for modules do:
+
+$modprobe crc_ccitt
+$modprobe caif
+$modprobe caif_socket
+$modprobe chnl_net
+
+
+=== Preparing the setup with a STE modem ===
+
+If you are working on integration of CAIF you should make sure
+that the kernel is built with module support.
+
+There are some things that need to be tweaked to get the host TTY correctly
+set up to talk to the modem.
+Since the CAIF stack is running in the kernel and we want to use the existing
+TTY, we are installing our physical serial driver as a line discipline above
+the TTY device.
+
+To achieve this we need to install the N_CAIF ldisc from user space.
+The benefit is that we can hook up to any TTY.
+
+The use of Start-of-frame-extension (STX) must also be set as
+module parameter "ser_use_stx".
+
+Normally Frame Checksum is always used on UART, but this is also provided as a
+module parameter "ser_use_fcs".
+
+$ modprobe caif_serial ser_ttyname=/dev/ttyS0 ser_use_stx=yes
+$ ifconfig caif_ttyS0 up
+
+PLEASE NOTE: There is a limitation in Android shell.
+ It only accepts one argument to insmod/modprobe!
+
+=== Trouble shooting ===
+
+There are debugfs parameters provided for serial communication.
+/sys/kernel/debug/caif_serial/<tty-name>/
+
+* ser_state: Prints the bit-mask status where
+ - 0x02 means SENDING, this is a transient state.
+ - 0x10 means FLOW_OFF_SENT, i.e. the previous frame has not been sent
+ and is blocking further send operation. Flow OFF has been propagated
+ to all CAIF Channels using this TTY.
+
+* tty_status: Prints the bit-mask tty status information
+ - 0x01 - tty->warned is on.
+ - 0x02 - tty->low_latency is on.
+ - 0x04 - tty->packed is on.
+ - 0x08 - tty->flow_stopped is on.
+ - 0x10 - tty->hw_stopped is on.
+ - 0x20 - tty->stopped is on.
+
+* last_tx_msg: Binary blob Prints the last transmitted frame.
+ This can be printed with
+ $od --format=x1 /sys/kernel/debug/caif_serial/<tty>/last_rx_msg.
+ The first two tx messages sent look like this. Note: The initial
+ byte 02 is start of frame extension (STX) used for re-syncing
+ upon errors.
+
+ - Enumeration:
+ 0000000 02 05 00 00 03 01 d2 02
+ | | | | | |
+ STX(1) | | | |
+ Length(2)| | |
+ Control Channel(1)
+ Command:Enumeration(1)
+ Link-ID(1)
+ Checksum(2)
+ - Channel Setup:
+ 0000000 02 07 00 00 00 21 a1 00 48 df
+ | | | | | | | |
+ STX(1) | | | | | |
+ Length(2)| | | | |
+ Control Channel(1)
+ Command:Channel Setup(1)
+ Channel Type(1)
+ Priority and Link-ID(1)
+ Endpoint(1)
+ Checksum(2)
+
+* last_rx_msg: Prints the last transmitted frame.
+ The RX messages for LinkSetup look almost identical but they have the
+ bit 0x20 set in the command bit, and Channel Setup has added one byte
+ before Checksum containing Channel ID.
+ NOTE: Several CAIF Messages might be concatenated. The maximum debug
+ buffer size is 128 bytes.
+
+== Error Scenarios:
+- last_tx_msg contains channel setup message and last_rx_msg is empty ->
+ The host seems to be able to send over the UART, at least the CAIF ldisc get
+ notified that sending is completed.
+
+- last_tx_msg contains enumeration message and last_rx_msg is empty ->
+ The host is not able to send the message from UART, the tty has not been
+ able to complete the transmit operation.
+
+- if /sys/kernel/debug/caif_serial/<tty>/tty_status is non-zero there
+ might be problems transmitting over UART.
+ E.g. host and modem wiring is not correct you will typically see
+ tty_status = 0x10 (hw_stopped) and ser_state = 0x10 (FLOW_OFF_SENT).
+ You will probably see the enumeration message in last_tx_message
+ and empty last_rx_message.
diff --git a/Documentation/networking/caif/spi_porting.txt b/Documentation/networking/caif/spi_porting.txt
new file mode 100644
index 000000000..9efd0687d
--- /dev/null
+++ b/Documentation/networking/caif/spi_porting.txt
@@ -0,0 +1,208 @@
+- CAIF SPI porting -
+
+- CAIF SPI basics:
+
+Running CAIF over SPI needs some extra setup, owing to the nature of SPI.
+Two extra GPIOs have been added in order to negotiate the transfers
+ between the master and the slave. The minimum requirement for running
+CAIF over SPI is a SPI slave chip and two GPIOs (more details below).
+Please note that running as a slave implies that you need to keep up
+with the master clock. An overrun or underrun event is fatal.
+
+- CAIF SPI framework:
+
+To make porting as easy as possible, the CAIF SPI has been divided in
+two parts. The first part (called the interface part) deals with all
+generic functionality such as length framing, SPI frame negotiation
+and SPI frame delivery and transmission. The other part is the CAIF
+SPI slave device part, which is the module that you have to write if
+you want to run SPI CAIF on a new hardware. This part takes care of
+the physical hardware, both with regard to SPI and to GPIOs.
+
+- Implementing a CAIF SPI device:
+
+ - Functionality provided by the CAIF SPI slave device:
+
+ In order to implement a SPI device you will, as a minimum,
+ need to implement the following
+ functions:
+
+ int (*init_xfer) (struct cfspi_xfer * xfer, struct cfspi_dev *dev):
+
+ This function is called by the CAIF SPI interface to give
+ you a chance to set up your hardware to be ready to receive
+ a stream of data from the master. The xfer structure contains
+ both physical and logical addresses, as well as the total length
+ of the transfer in both directions.The dev parameter can be used
+ to map to different CAIF SPI slave devices.
+
+ void (*sig_xfer) (bool xfer, struct cfspi_dev *dev):
+
+ This function is called by the CAIF SPI interface when the output
+ (SPI_INT) GPIO needs to change state. The boolean value of the xfer
+ variable indicates whether the GPIO should be asserted (HIGH) or
+ deasserted (LOW). The dev parameter can be used to map to different CAIF
+ SPI slave devices.
+
+ - Functionality provided by the CAIF SPI interface:
+
+ void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
+
+ This function is called by the CAIF SPI slave device in order to
+ signal a change of state of the input GPIO (SS) to the interface.
+ Only active edges are mandatory to be reported.
+ This function can be called from IRQ context (recommended in order
+ not to introduce latency). The ifc parameter should be the pointer
+ returned from the platform probe function in the SPI device structure.
+
+ void (*xfer_done_cb) (struct cfspi_ifc *ifc);
+
+ This function is called by the CAIF SPI slave device in order to
+ report that a transfer is completed. This function should only be
+ called once both the transmission and the reception are completed.
+ This function can be called from IRQ context (recommended in order
+ not to introduce latency). The ifc parameter should be the pointer
+ returned from the platform probe function in the SPI device structure.
+
+ - Connecting the bits and pieces:
+
+ - Filling in the SPI slave device structure:
+
+ Connect the necessary callback functions.
+ Indicate clock speed (used to calculate toggle delays).
+ Chose a suitable name (helps debugging if you use several CAIF
+ SPI slave devices).
+ Assign your private data (can be used to map to your structure).
+
+ - Filling in the SPI slave platform device structure:
+ Add name of driver to connect to ("cfspi_sspi").
+ Assign the SPI slave device structure as platform data.
+
+- Padding:
+
+In order to optimize throughput, a number of SPI padding options are provided.
+Padding can be enabled independently for uplink and downlink transfers.
+Padding can be enabled for the head, the tail and for the total frame size.
+The padding needs to be correctly configured on both sides of the link.
+The padding can be changed via module parameters in cfspi_sspi.c or via
+the sysfs directory of the cfspi_sspi driver (before device registration).
+
+- CAIF SPI device template:
+
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author: Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL), version 2.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/wait.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+#include <net/caif/caif_spi.h>
+
+MODULE_LICENSE("GPL");
+
+struct sspi_struct {
+ struct cfspi_dev sdev;
+ struct cfspi_xfer *xfer;
+};
+
+static struct sspi_struct slave;
+static struct platform_device slave_device;
+
+static irqreturn_t sspi_irq(int irq, void *arg)
+{
+ /* You only need to trigger on an edge to the active state of the
+ * SS signal. Once a edge is detected, the ss_cb() function should be
+ * called with the parameter assert set to true. It is OK
+ * (and even advised) to call the ss_cb() function in IRQ context in
+ * order not to add any delay. */
+
+ return IRQ_HANDLED;
+}
+
+static void sspi_complete(void *context)
+{
+ /* Normally the DMA or the SPI framework will call you back
+ * in something similar to this. The only thing you need to
+ * do is to call the xfer_done_cb() function, providing the pointer
+ * to the CAIF SPI interface. It is OK to call this function
+ * from IRQ context. */
+}
+
+static int sspi_init_xfer(struct cfspi_xfer *xfer, struct cfspi_dev *dev)
+{
+ /* Store transfer info. For a normal implementation you should
+ * set up your DMA here and make sure that you are ready to
+ * receive the data from the master SPI. */
+
+ struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+
+ sspi->xfer = xfer;
+
+ return 0;
+}
+
+void sspi_sig_xfer(bool xfer, struct cfspi_dev *dev)
+{
+ /* If xfer is true then you should assert the SPI_INT to indicate to
+ * the master that you are ready to receive the data from the master
+ * SPI. If xfer is false then you should de-assert SPI_INT to indicate
+ * that the transfer is done.
+ */
+
+ struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+}
+
+static void sspi_release(struct device *dev)
+{
+ /*
+ * Here you should release your SPI device resources.
+ */
+}
+
+static int __init sspi_init(void)
+{
+ /* Here you should initialize your SPI device by providing the
+ * necessary functions, clock speed, name and private data. Once
+ * done, you can register your device with the
+ * platform_device_register() function. This function will return
+ * with the CAIF SPI interface initialized. This is probably also
+ * the place where you should set up your GPIOs, interrupts and SPI
+ * resources. */
+
+ int res = 0;
+
+ /* Initialize slave device. */
+ slave.sdev.init_xfer = sspi_init_xfer;
+ slave.sdev.sig_xfer = sspi_sig_xfer;
+ slave.sdev.clk_mhz = 13;
+ slave.sdev.priv = &slave;
+ slave.sdev.name = "spi_sspi";
+ slave_device.dev.release = sspi_release;
+
+ /* Initialize platform device. */
+ slave_device.name = "cfspi_sspi";
+ slave_device.dev.platform_data = &slave.sdev;
+
+ /* Register platform device. */
+ res = platform_device_register(&slave_device);
+ if (res) {
+ printk(KERN_WARNING "sspi_init: failed to register dev.\n");
+ return -ENODEV;
+ }
+
+ return res;
+}
+
+static void __exit sspi_exit(void)
+{
+ platform_device_del(&slave_device);
+}
+
+module_init(sspi_init);
+module_exit(sspi_exit);
diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
new file mode 100644
index 000000000..5abad1e92
--- /dev/null
+++ b/Documentation/networking/can.txt
@@ -0,0 +1,1216 @@
+============================================================================
+
+can.txt
+
+Readme file for the Controller Area Network Protocol Family (aka SocketCAN)
+
+This file contains
+
+ 1 Overview / What is SocketCAN
+
+ 2 Motivation / Why using the socket API
+
+ 3 SocketCAN concept
+ 3.1 receive lists
+ 3.2 local loopback of sent frames
+ 3.3 network problem notifications
+
+ 4 How to use SocketCAN
+ 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
+ 4.1.1 RAW socket option CAN_RAW_FILTER
+ 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
+ 4.1.3 RAW socket option CAN_RAW_LOOPBACK
+ 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
+ 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
+ 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+ 4.1.7 RAW socket returned message flags
+ 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
+ 4.2.1 Broadcast Manager operations
+ 4.2.2 Broadcast Manager message flags
+ 4.2.3 Broadcast Manager transmission timers
+ 4.2.4 Broadcast Manager message sequence transmission
+ 4.2.5 Broadcast Manager receive filter timers
+ 4.2.6 Broadcast Manager multiplex message receive filter
+ 4.3 connected transport protocols (SOCK_SEQPACKET)
+ 4.4 unconnected transport protocols (SOCK_DGRAM)
+
+ 5 SocketCAN core module
+ 5.1 can.ko module params
+ 5.2 procfs content
+ 5.3 writing own CAN protocol modules
+
+ 6 CAN network drivers
+ 6.1 general settings
+ 6.2 local loopback of sent frames
+ 6.3 CAN controller hardware filters
+ 6.4 The virtual CAN driver (vcan)
+ 6.5 The CAN network device driver interface
+ 6.5.1 Netlink interface to set/get devices properties
+ 6.5.2 Setting the CAN bit-timing
+ 6.5.3 Starting and stopping the CAN network device
+ 6.6 CAN FD (flexible data rate) driver support
+ 6.7 supported CAN hardware
+
+ 7 SocketCAN resources
+
+ 8 Credits
+
+============================================================================
+
+1. Overview / What is SocketCAN
+--------------------------------
+
+The socketcan package is an implementation of CAN protocols
+(Controller Area Network) for Linux. CAN is a networking technology
+which has widespread use in automation, embedded devices, and
+automotive fields. While there have been other CAN implementations
+for Linux based on character devices, SocketCAN uses the Berkeley
+socket API, the Linux network stack and implements the CAN device
+drivers as network interfaces. The CAN socket API has been designed
+as similar as possible to the TCP/IP protocols to allow programmers,
+familiar with network programming, to easily learn how to use CAN
+sockets.
+
+2. Motivation / Why using the socket API
+----------------------------------------
+
+There have been CAN implementations for Linux before SocketCAN so the
+question arises, why we have started another project. Most existing
+implementations come as a device driver for some CAN hardware, they
+are based on character devices and provide comparatively little
+functionality. Usually, there is only a hardware-specific device
+driver which provides a character device interface to send and
+receive raw CAN frames, directly to/from the controller hardware.
+Queueing of frames and higher-level transport protocols like ISO-TP
+have to be implemented in user space applications. Also, most
+character-device implementations support only one single process to
+open the device at a time, similar to a serial interface. Exchanging
+the CAN controller requires employment of another device driver and
+often the need for adaption of large parts of the application to the
+new driver's API.
+
+SocketCAN was designed to overcome all of these limitations. A new
+protocol family has been implemented which provides a socket interface
+to user space applications and which builds upon the Linux network
+layer, enabling use all of the provided queueing functionality. A device
+driver for CAN controller hardware registers itself with the Linux
+network layer as a network device, so that CAN frames from the
+controller can be passed up to the network layer and on to the CAN
+protocol family module and also vice-versa. Also, the protocol family
+module provides an API for transport protocol modules to register, so
+that any number of transport protocols can be loaded or unloaded
+dynamically. In fact, the can core module alone does not provide any
+protocol and cannot be used without loading at least one additional
+protocol module. Multiple sockets can be opened at the same time,
+on different or the same protocol module and they can listen/send
+frames on different or the same CAN IDs. Several sockets listening on
+the same interface for frames with the same CAN ID are all passed the
+same received matching CAN frames. An application wishing to
+communicate using a specific transport protocol, e.g. ISO-TP, just
+selects that protocol when opening the socket, and then can read and
+write application data byte streams, without having to deal with
+CAN-IDs, frames, etc.
+
+Similar functionality visible from user-space could be provided by a
+character device, too, but this would lead to a technically inelegant
+solution for a couple of reasons:
+
+* Intricate usage. Instead of passing a protocol argument to
+ socket(2) and using bind(2) to select a CAN interface and CAN ID, an
+ application would have to do all these operations using ioctl(2)s.
+
+* Code duplication. A character device cannot make use of the Linux
+ network queueing code, so all that code would have to be duplicated
+ for CAN networking.
+
+* Abstraction. In most existing character-device implementations, the
+ hardware-specific device driver for a CAN controller directly
+ provides the character device for the application to work with.
+ This is at least very unusual in Unix systems for both, char and
+ block devices. For example you don't have a character device for a
+ certain UART of a serial interface, a certain sound chip in your
+ computer, a SCSI or IDE controller providing access to your hard
+ disk or tape streamer device. Instead, you have abstraction layers
+ which provide a unified character or block device interface to the
+ application on the one hand, and a interface for hardware-specific
+ device drivers on the other hand. These abstractions are provided
+ by subsystems like the tty layer, the audio subsystem or the SCSI
+ and IDE subsystems for the devices mentioned above.
+
+ The easiest way to implement a CAN device driver is as a character
+ device without such a (complete) abstraction layer, as is done by most
+ existing drivers. The right way, however, would be to add such a
+ layer with all the functionality like registering for certain CAN
+ IDs, supporting several open file descriptors and (de)multiplexing
+ CAN frames between them, (sophisticated) queueing of CAN frames, and
+ providing an API for device drivers to register with. However, then
+ it would be no more difficult, or may be even easier, to use the
+ networking framework provided by the Linux kernel, and this is what
+ SocketCAN does.
+
+ The use of the networking framework of the Linux kernel is just the
+ natural and most appropriate way to implement CAN for Linux.
+
+3. SocketCAN concept
+---------------------
+
+ As described in chapter 2 it is the main goal of SocketCAN to
+ provide a socket interface to user space applications which builds
+ upon the Linux network layer. In contrast to the commonly known
+ TCP/IP and ethernet networking, the CAN bus is a broadcast-only(!)
+ medium that has no MAC-layer addressing like ethernet. The CAN-identifier
+ (can_id) is used for arbitration on the CAN-bus. Therefore the CAN-IDs
+ have to be chosen uniquely on the bus. When designing a CAN-ECU
+ network the CAN-IDs are mapped to be sent by a specific ECU.
+ For this reason a CAN-ID can be treated best as a kind of source address.
+
+ 3.1 receive lists
+
+ The network transparent access of multiple applications leads to the
+ problem that different applications may be interested in the same
+ CAN-IDs from the same CAN network interface. The SocketCAN core
+ module - which implements the protocol family CAN - provides several
+ high efficient receive lists for this reason. If e.g. a user space
+ application opens a CAN RAW socket, the raw protocol module itself
+ requests the (range of) CAN-IDs from the SocketCAN core that are
+ requested by the user. The subscription and unsubscription of
+ CAN-IDs can be done for specific CAN interfaces or for all(!) known
+ CAN interfaces with the can_rx_(un)register() functions provided to
+ CAN protocol modules by the SocketCAN core (see chapter 5).
+ To optimize the CPU usage at runtime the receive lists are split up
+ into several specific lists per device that match the requested
+ filter complexity for a given use-case.
+
+ 3.2 local loopback of sent frames
+
+ As known from other networking concepts the data exchanging
+ applications may run on the same or different nodes without any
+ change (except for the according addressing information):
+
+ ___ ___ ___ _______ ___
+ | _ | | _ | | _ | | _ _ | | _ |
+ ||A|| ||B|| ||C|| ||A| |B|| ||C||
+ |___| |___| |___| |_______| |___|
+ | | | | |
+ -----------------(1)- CAN bus -(2)---------------
+
+ To ensure that application A receives the same information in the
+ example (2) as it would receive in example (1) there is need for
+ some kind of local loopback of the sent CAN frames on the appropriate
+ node.
+
+ The Linux network devices (by default) just can handle the
+ transmission and reception of media dependent frames. Due to the
+ arbitration on the CAN bus the transmission of a low prio CAN-ID
+ may be delayed by the reception of a high prio CAN frame. To
+ reflect the correct* traffic on the node the loopback of the sent
+ data has to be performed right after a successful transmission. If
+ the CAN network interface is not capable of performing the loopback for
+ some reason the SocketCAN core can do this task as a fallback solution.
+ See chapter 6.2 for details (recommended).
+
+ The loopback functionality is enabled by default to reflect standard
+ networking behaviour for CAN applications. Due to some requests from
+ the RT-SocketCAN group the loopback optionally may be disabled for each
+ separate socket. See sockopts from the CAN RAW sockets in chapter 4.1.
+
+ * = you really like to have this when you're running analyser tools
+ like 'candump' or 'cansniffer' on the (same) node.
+
+ 3.3 network problem notifications
+
+ The use of the CAN bus may lead to several problems on the physical
+ and media access control layer. Detecting and logging of these lower
+ layer problems is a vital requirement for CAN users to identify
+ hardware issues on the physical transceiver layer as well as
+ arbitration problems and error frames caused by the different
+ ECUs. The occurrence of detected errors are important for diagnosis
+ and have to be logged together with the exact timestamp. For this
+ reason the CAN interface driver can generate so called Error Message
+ Frames that can optionally be passed to the user application in the
+ same way as other CAN frames. Whenever an error on the physical layer
+ or the MAC layer is detected (e.g. by the CAN controller) the driver
+ creates an appropriate error message frame. Error messages frames can
+ be requested by the user application using the common CAN filter
+ mechanisms. Inside this filter definition the (interested) type of
+ errors may be selected. The reception of error messages is disabled
+ by default. The format of the CAN error message frame is briefly
+ described in the Linux header file "include/uapi/linux/can/error.h".
+
+4. How to use SocketCAN
+------------------------
+
+ Like TCP/IP, you first need to open a socket for communicating over a
+ CAN network. Since SocketCAN implements a new protocol family, you
+ need to pass PF_CAN as the first argument to the socket(2) system
+ call. Currently, there are two CAN protocols to choose from, the raw
+ socket protocol and the broadcast manager (BCM). So to open a socket,
+ you would write
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+ and
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+ respectively. After the successful creation of the socket, you would
+ normally use the bind(2) system call to bind the socket to a CAN
+ interface (which is different from TCP/IP due to different addressing
+ - see chapter 3). After binding (CAN_RAW) or connecting (CAN_BCM)
+ the socket, you can read(2) and write(2) from/to the socket or use
+ send(2), sendto(2), sendmsg(2) and the recv* counterpart operations
+ on the socket as usual. There are also CAN specific socket options
+ described below.
+
+ The basic CAN frame structure and the sockaddr structure are defined
+ in include/linux/can.h:
+
+ struct can_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 can_dlc; /* frame payload length in byte (0 .. 8) */
+ __u8 data[8] __attribute__((aligned(8)));
+ };
+
+ The alignment of the (linear) payload data[] to a 64bit boundary
+ allows the user to define their own structs and unions to easily access
+ the CAN payload. There is no given byteorder on the CAN bus by
+ default. A read(2) system call on a CAN_RAW socket transfers a
+ struct can_frame to the user space.
+
+ The sockaddr_can structure has an interface index like the
+ PF_PACKET socket, that also binds to a specific interface:
+
+ struct sockaddr_can {
+ sa_family_t can_family;
+ int can_ifindex;
+ union {
+ /* transport protocol class address info (e.g. ISOTP) */
+ struct { canid_t rx_id, tx_id; } tp;
+
+ /* reserved for future CAN protocols address information */
+ } can_addr;
+ };
+
+ To determine the interface index an appropriate ioctl() has to
+ be used (example for CAN_RAW sockets without error checking):
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+
+ strcpy(ifr.ifr_name, "can0" );
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ bind(s, (struct sockaddr *)&addr, sizeof(addr));
+
+ (..)
+
+ To bind a socket to all(!) CAN interfaces the interface index must
+ be 0 (zero). In this case the socket receives CAN frames from every
+ enabled CAN interface. To determine the originating CAN interface
+ the system call recvfrom(2) may be used instead of read(2). To send
+ on a socket that is bound to 'any' interface sendto(2) is needed to
+ specify the outgoing interface.
+
+ Reading CAN frames from a bound CAN_RAW socket (see above) consists
+ of reading a struct can_frame:
+
+ struct can_frame frame;
+
+ nbytes = read(s, &frame, sizeof(struct can_frame));
+
+ if (nbytes < 0) {
+ perror("can raw socket read");
+ return 1;
+ }
+
+ /* paranoid check ... */
+ if (nbytes < sizeof(struct can_frame)) {
+ fprintf(stderr, "read: incomplete CAN frame\n");
+ return 1;
+ }
+
+ /* do something with the received CAN frame */
+
+ Writing CAN frames can be done similarly, with the write(2) system call:
+
+ nbytes = write(s, &frame, sizeof(struct can_frame));
+
+ When the CAN interface is bound to 'any' existing CAN interface
+ (addr.can_ifindex = 0) it is recommended to use recvfrom(2) if the
+ information about the originating CAN interface is needed:
+
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ socklen_t len = sizeof(addr);
+ struct can_frame frame;
+
+ nbytes = recvfrom(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, &len);
+
+ /* get interface name of the received CAN frame */
+ ifr.ifr_ifindex = addr.can_ifindex;
+ ioctl(s, SIOCGIFNAME, &ifr);
+ printf("Received a CAN frame from interface %s", ifr.ifr_name);
+
+ To write CAN frames on sockets bound to 'any' CAN interface the
+ outgoing interface has to be defined certainly.
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+ addr.can_ifindex = ifr.ifr_ifindex;
+ addr.can_family = AF_CAN;
+
+ nbytes = sendto(s, &frame, sizeof(struct can_frame),
+ 0, (struct sockaddr*)&addr, sizeof(addr));
+
+ Remark about CAN FD (flexible data rate) support:
+
+ Generally the handling of CAN FD is very similar to the formerly described
+ examples. The new CAN FD capable CAN controllers support two different
+ bitrates for the arbitration phase and the payload phase of the CAN FD frame
+ and up to 64 bytes of payload. This extended payload length breaks all the
+ kernel interfaces (ABI) which heavily rely on the CAN frame with fixed eight
+ bytes of payload (struct can_frame) like the CAN_RAW socket. Therefore e.g.
+ the CAN_RAW socket supports a new socket option CAN_RAW_FD_FRAMES that
+ switches the socket into a mode that allows the handling of CAN FD frames
+ and (legacy) CAN frames simultaneously (see section 4.1.5).
+
+ The struct canfd_frame is defined in include/linux/can.h:
+
+ struct canfd_frame {
+ canid_t can_id; /* 32 bit CAN_ID + EFF/RTR/ERR flags */
+ __u8 len; /* frame payload length in byte (0 .. 64) */
+ __u8 flags; /* additional flags for CAN FD */
+ __u8 __res0; /* reserved / padding */
+ __u8 __res1; /* reserved / padding */
+ __u8 data[64] __attribute__((aligned(8)));
+ };
+
+ The struct canfd_frame and the existing struct can_frame have the can_id,
+ the payload length and the payload data at the same offset inside their
+ structures. This allows to handle the different structures very similar.
+ When the content of a struct can_frame is copied into a struct canfd_frame
+ all structure elements can be used as-is - only the data[] becomes extended.
+
+ When introducing the struct canfd_frame it turned out that the data length
+ code (DLC) of the struct can_frame was used as a length information as the
+ length and the DLC has a 1:1 mapping in the range of 0 .. 8. To preserve
+ the easy handling of the length information the canfd_frame.len element
+ contains a plain length value from 0 .. 64. So both canfd_frame.len and
+ can_frame.can_dlc are equal and contain a length information and no DLC.
+ For details about the distinction of CAN and CAN FD capable devices and
+ the mapping to the bus-relevant data length code (DLC), see chapter 6.6.
+
+ The length of the two CAN(FD) frame structures define the maximum transfer
+ unit (MTU) of the CAN(FD) network interface and skbuff data length. Two
+ definitions are specified for CAN specific MTUs in include/linux/can.h :
+
+ #define CAN_MTU (sizeof(struct can_frame)) == 16 => 'legacy' CAN frame
+ #define CANFD_MTU (sizeof(struct canfd_frame)) == 72 => CAN FD frame
+
+ 4.1 RAW protocol sockets with can_filters (SOCK_RAW)
+
+ Using CAN_RAW sockets is extensively comparable to the commonly
+ known access to CAN character devices. To meet the new possibilities
+ provided by the multi user SocketCAN approach, some reasonable
+ defaults are set at RAW socket binding time:
+
+ - The filters are set to exactly one filter receiving everything
+ - The socket only receives valid data frames (=> no error message frames)
+ - The loopback of sent CAN frames is enabled (see chapter 3.2)
+ - The socket does not receive its own sent frames (in loopback mode)
+
+ These default settings may be changed before or after binding the socket.
+ To use the referenced definitions of the socket options for CAN_RAW
+ sockets, include <linux/can/raw.h>.
+
+ 4.1.1 RAW socket option CAN_RAW_FILTER
+
+ The reception of CAN frames using CAN_RAW sockets can be controlled
+ by defining 0 .. n filters with the CAN_RAW_FILTER socket option.
+
+ The CAN filter structure is defined in include/linux/can.h:
+
+ struct can_filter {
+ canid_t can_id;
+ canid_t can_mask;
+ };
+
+ A filter matches, when
+
+ <received_can_id> & mask == can_id & mask
+
+ which is analogous to known CAN controllers hardware filter semantics.
+ The filter can be inverted in this semantic, when the CAN_INV_FILTER
+ bit is set in can_id element of the can_filter structure. In
+ contrast to CAN controller hardware filters the user may set 0 .. n
+ receive filters for each open socket separately:
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+ rfilter[1].can_id = 0x200;
+ rfilter[1].can_mask = 0x700;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+ To disable the reception of CAN frames on the selected CAN_RAW socket:
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, NULL, 0);
+
+ To set the filters to zero filters is quite obsolete as to not read
+ data causes the raw socket to discard the received CAN frames. But
+ having this 'send only' use-case we may remove the receive list in the
+ Kernel to save a little (really a very little!) CPU usage.
+
+ 4.1.1.1 CAN filter usage optimisation
+
+ The CAN filters are processed in per-device filter lists at CAN frame
+ reception time. To reduce the number of checks that need to be performed
+ while walking through the filter lists the CAN core provides an optimized
+ filter handling when the filter subscription focusses on a single CAN ID.
+
+ For the possible 2048 SFF CAN identifiers the identifier is used as an index
+ to access the corresponding subscription list without any further checks.
+ For the 2^29 possible EFF CAN identifiers a 10 bit XOR folding is used as
+ hash function to retrieve the EFF table index.
+
+ To benefit from the optimized filters for single CAN identifiers the
+ CAN_SFF_MASK or CAN_EFF_MASK have to be set into can_filter.mask together
+ with set CAN_EFF_FLAG and CAN_RTR_FLAG bits. A set CAN_EFF_FLAG bit in the
+ can_filter.mask makes clear that it matters whether a SFF or EFF CAN ID is
+ subscribed. E.g. in the example from above
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = CAN_SFF_MASK;
+
+ both SFF frames with CAN ID 0x123 and EFF frames with 0xXXXXX123 can pass.
+
+ To filter for only 0x123 (SFF) and 0x12345678 (EFF) CAN identifiers the
+ filter has to be defined in this way to benefit from the optimized filters:
+
+ struct can_filter rfilter[2];
+
+ rfilter[0].can_id = 0x123;
+ rfilter[0].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_SFF_MASK);
+ rfilter[1].can_id = 0x12345678 | CAN_EFF_FLAG;
+ rfilter[1].can_mask = (CAN_EFF_FLAG | CAN_RTR_FLAG | CAN_EFF_MASK);
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_FILTER, &rfilter, sizeof(rfilter));
+
+ 4.1.2 RAW socket option CAN_RAW_ERR_FILTER
+
+ As described in chapter 3.4 the CAN interface driver can generate so
+ called Error Message Frames that can optionally be passed to the user
+ application in the same way as other CAN frames. The possible
+ errors are divided into different error classes that may be filtered
+ using the appropriate error mask. To register for every possible
+ error condition CAN_ERR_MASK can be used as value for the error mask.
+ The values for the error mask are defined in linux/can/error.h .
+
+ can_err_mask_t err_mask = ( CAN_ERR_TX_TIMEOUT | CAN_ERR_BUSOFF );
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_ERR_FILTER,
+ &err_mask, sizeof(err_mask));
+
+ 4.1.3 RAW socket option CAN_RAW_LOOPBACK
+
+ To meet multi user needs the local loopback is enabled by default
+ (see chapter 3.2 for details). But in some embedded use-cases
+ (e.g. when only one application uses the CAN bus) this loopback
+ functionality can be disabled (separately for each socket):
+
+ int loopback = 0; /* 0 = disabled, 1 = enabled (default) */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_LOOPBACK, &loopback, sizeof(loopback));
+
+ 4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
+
+ When the local loopback is enabled, all the sent CAN frames are
+ looped back to the open CAN sockets that registered for the CAN
+ frames' CAN-ID on this given interface to meet the multi user
+ needs. The reception of the CAN frames on the same socket that was
+ sending the CAN frame is assumed to be unwanted and therefore
+ disabled by default. This default behaviour may be changed on
+ demand:
+
+ int recv_own_msgs = 1; /* 0 = disabled (default), 1 = enabled */
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+ 4.1.5 RAW socket option CAN_RAW_FD_FRAMES
+
+ CAN FD support in CAN_RAW sockets can be enabled with a new socket option
+ CAN_RAW_FD_FRAMES which is off by default. When the new socket option is
+ not supported by the CAN_RAW socket (e.g. on older kernels), switching the
+ CAN_RAW_FD_FRAMES option returns the error -ENOPROTOOPT.
+
+ Once CAN_RAW_FD_FRAMES is enabled the application can send both CAN frames
+ and CAN FD frames. OTOH the application has to handle CAN and CAN FD frames
+ when reading from the socket.
+
+ CAN_RAW_FD_FRAMES enabled: CAN_MTU and CANFD_MTU are allowed
+ CAN_RAW_FD_FRAMES disabled: only CAN_MTU is allowed (default)
+
+ Example:
+ [ remember: CANFD_MTU == sizeof(struct canfd_frame) ]
+
+ struct canfd_frame cfd;
+
+ nbytes = read(s, &cfd, CANFD_MTU);
+
+ if (nbytes == CANFD_MTU) {
+ printf("got CAN FD frame with length %d\n", cfd.len);
+ /* cfd.flags contains valid data */
+ } else if (nbytes == CAN_MTU) {
+ printf("got legacy CAN frame with length %d\n", cfd.len);
+ /* cfd.flags is undefined */
+ } else {
+ fprintf(stderr, "read: invalid CAN(FD) frame\n");
+ return 1;
+ }
+
+ /* the content can be handled independently from the received MTU size */
+
+ printf("can_id: %X data length: %d data: ", cfd.can_id, cfd.len);
+ for (i = 0; i < cfd.len; i++)
+ printf("%02X ", cfd.data[i]);
+
+ When reading with size CANFD_MTU only returns CAN_MTU bytes that have
+ been received from the socket a legacy CAN frame has been read into the
+ provided CAN FD structure. Note that the canfd_frame.flags data field is
+ not specified in the struct can_frame and therefore it is only valid in
+ CANFD_MTU sized CAN FD frames.
+
+ Implementation hint for new CAN applications:
+
+ To build a CAN FD aware application use struct canfd_frame as basic CAN
+ data structure for CAN_RAW based applications. When the application is
+ executed on an older Linux kernel and switching the CAN_RAW_FD_FRAMES
+ socket option returns an error: No problem. You'll get legacy CAN frames
+ or CAN FD frames and can process them the same way.
+
+ When sending to CAN devices make sure that the device is capable to handle
+ CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
+ The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+
+ 4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+
+ The CAN_RAW socket can set multiple CAN identifier specific filters that
+ lead to multiple filters in the af_can.c filter processing. These filters
+ are indenpendent from each other which leads to logical OR'ed filters when
+ applied (see 4.1.1).
+
+ This socket option joines the given CAN filters in the way that only CAN
+ frames are passed to user space that matched *all* given CAN filters. The
+ semantic for the applied filters is therefore changed to a logical AND.
+
+ This is useful especially when the filterset is a combination of filters
+ where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+ CAN ID ranges from the incoming traffic.
+
+ 4.1.7 RAW socket returned message flags
+
+ When using recvmsg() call, the msg->msg_flags may contain following flags:
+
+ MSG_DONTROUTE: set when the received frame was created on the local host.
+
+ MSG_CONFIRM: set when the frame was sent via the socket it is received on.
+ This flag can be interpreted as a 'transmission confirmation' when the
+ CAN driver supports the echo of frames on driver level, see 3.2 and 6.2.
+ In order to receive such messages, CAN_RAW_RECV_OWN_MSGS must be set.
+
+ 4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
+
+ The Broadcast Manager protocol provides a command based configuration
+ interface to filter and send (e.g. cyclic) CAN messages in kernel space.
+
+ Receive filters can be used to down sample frequent messages; detect events
+ such as message contents changes, packet length changes, and do time-out
+ monitoring of received messages.
+
+ Periodic transmission tasks of CAN frames or a sequence of CAN frames can be
+ created and modified at runtime; both the message content and the two
+ possible transmit intervals can be altered.
+
+ A BCM socket is not intended for sending individual CAN frames using the
+ struct can_frame as known from the CAN_RAW socket. Instead a special BCM
+ configuration message is defined. The basic BCM configuration message used
+ to communicate with the broadcast manager and the available operations are
+ defined in the linux/can/bcm.h include. The BCM message consists of a
+ message header with a command ('opcode') followed by zero or more CAN frames.
+ The broadcast manager sends responses to user space in the same form:
+
+ struct bcm_msg_head {
+ __u32 opcode; /* command */
+ __u32 flags; /* special flags */
+ __u32 count; /* run 'count' times with ival1 */
+ struct timeval ival1, ival2; /* count and subsequent interval */
+ canid_t can_id; /* unique can_id for task */
+ __u32 nframes; /* number of can_frames following */
+ struct can_frame frames[0];
+ };
+
+ The aligned payload 'frames' uses the same basic CAN frame structure defined
+ at the beginning of section 4 and in the include/linux/can.h include. All
+ messages to the broadcast manager from user space have this structure.
+
+ Note a CAN_BCM socket must be connected instead of bound after socket
+ creation (example without error checking):
+
+ int s;
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+
+ s = socket(PF_CAN, SOCK_DGRAM, CAN_BCM);
+
+ strcpy(ifr.ifr_name, "can0");
+ ioctl(s, SIOCGIFINDEX, &ifr);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ connect(s, (struct sockaddr *)&addr, sizeof(addr))
+
+ (..)
+
+ The broadcast manager socket is able to handle any number of in flight
+ transmissions or receive filters concurrently. The different RX/TX jobs are
+ distinguished by the unique can_id in each BCM message. However additional
+ CAN_BCM sockets are recommended to communicate on multiple CAN interfaces.
+ When the broadcast manager socket is bound to 'any' CAN interface (=> the
+ interface index is set to zero) the configured receive filters apply to any
+ CAN interface unless the sendto() syscall is used to overrule the 'any' CAN
+ interface index. When using recvfrom() instead of read() to retrieve BCM
+ socket messages the originating CAN interface is provided in can_ifindex.
+
+ 4.2.1 Broadcast Manager operations
+
+ The opcode defines the operation for the broadcast manager to carry out,
+ or details the broadcast managers response to several events, including
+ user requests.
+
+ Transmit Operations (user space to broadcast manager):
+
+ TX_SETUP: Create (cyclic) transmission task.
+
+ TX_DELETE: Remove (cyclic) transmission task, requires only can_id.
+
+ TX_READ: Read properties of (cyclic) transmission task for can_id.
+
+ TX_SEND: Send one CAN frame.
+
+ Transmit Responses (broadcast manager to user space):
+
+ TX_STATUS: Reply to TX_READ request (transmission task configuration).
+
+ TX_EXPIRED: Notification when counter finishes sending at initial interval
+ 'ival1'. Requires the TX_COUNTEVT flag to be set at TX_SETUP.
+
+ Receive Operations (user space to broadcast manager):
+
+ RX_SETUP: Create RX content filter subscription.
+
+ RX_DELETE: Remove RX content filter subscription, requires only can_id.
+
+ RX_READ: Read properties of RX content filter subscription for can_id.
+
+ Receive Responses (broadcast manager to user space):
+
+ RX_STATUS: Reply to RX_READ request (filter task configuration).
+
+ RX_TIMEOUT: Cyclic message is detected to be absent (timer ival1 expired).
+
+ RX_CHANGED: BCM message with updated CAN frame (detected content change).
+ Sent on first message received or on receipt of revised CAN messages.
+
+ 4.2.2 Broadcast Manager message flags
+
+ When sending a message to the broadcast manager the 'flags' element may
+ contain the following flag definitions which influence the behaviour:
+
+ SETTIMER: Set the values of ival1, ival2 and count
+
+ STARTTIMER: Start the timer with the actual values of ival1, ival2
+ and count. Starting the timer leads simultaneously to emit a CAN frame.
+
+ TX_COUNTEVT: Create the message TX_EXPIRED when count expires
+
+ TX_ANNOUNCE: A change of data by the process is emitted immediately.
+
+ TX_CP_CAN_ID: Copies the can_id from the message header to each
+ subsequent frame in frames. This is intended as usage simplification. For
+ TX tasks the unique can_id from the message header may differ from the
+ can_id(s) stored for transmission in the subsequent struct can_frame(s).
+
+ RX_FILTER_ID: Filter by can_id alone, no frames required (nframes=0).
+
+ RX_CHECK_DLC: A change of the DLC leads to an RX_CHANGED.
+
+ RX_NO_AUTOTIMER: Prevent automatically starting the timeout monitor.
+
+ RX_ANNOUNCE_RESUME: If passed at RX_SETUP and a receive timeout occurred, a
+ RX_CHANGED message will be generated when the (cyclic) receive restarts.
+
+ TX_RESET_MULTI_IDX: Reset the index for the multiple frame transmission.
+
+ RX_RTR_FRAME: Send reply for RTR-request (placed in op->frames[0]).
+
+ 4.2.3 Broadcast Manager transmission timers
+
+ Periodic transmission configurations may use up to two interval timers.
+ In this case the BCM sends a number of messages ('count') at an interval
+ 'ival1', then continuing to send at another given interval 'ival2'. When
+ only one timer is needed 'count' is set to zero and only 'ival2' is used.
+ When SET_TIMER and START_TIMER flag were set the timers are activated.
+ The timer values can be altered at runtime when only SET_TIMER is set.
+
+ 4.2.4 Broadcast Manager message sequence transmission
+
+ Up to 256 CAN frames can be transmitted in a sequence in the case of a cyclic
+ TX task configuration. The number of CAN frames is provided in the 'nframes'
+ element of the BCM message head. The defined number of CAN frames are added
+ as array to the TX_SETUP BCM configuration message.
+
+ /* create a struct to set up a sequence of four CAN frames */
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[4];
+ } mytxmsg;
+
+ (..)
+ mytxmsg.nframes = 4;
+ (..)
+
+ write(s, &mytxmsg, sizeof(mytxmsg));
+
+ With every transmission the index in the array of CAN frames is increased
+ and set to zero at index overflow.
+
+ 4.2.5 Broadcast Manager receive filter timers
+
+ The timer values ival1 or ival2 may be set to non-zero values at RX_SETUP.
+ When the SET_TIMER flag is set the timers are enabled:
+
+ ival1: Send RX_TIMEOUT when a received message is not received again within
+ the given time. When START_TIMER is set at RX_SETUP the timeout detection
+ is activated directly - even without a former CAN frame reception.
+
+ ival2: Throttle the received message rate down to the value of ival2. This
+ is useful to reduce messages for the application when the signal inside the
+ CAN frame is stateless as state changes within the ival2 periode may get
+ lost.
+
+ 4.2.6 Broadcast Manager multiplex message receive filter
+
+ To filter for content changes in multiplex message sequences an array of more
+ than one CAN frames can be passed in a RX_SETUP configuration message. The
+ data bytes of the first CAN frame contain the mask of relevant bits that
+ have to match in the subsequent CAN frames with the received CAN frame.
+ If one of the subsequent CAN frames is matching the bits in that frame data
+ mark the relevant content to be compared with the previous received content.
+ Up to 257 CAN frames (multiplex filter bit mask CAN frame plus 256 CAN
+ filters) can be added as array to the TX_SETUP BCM configuration message.
+
+ /* usually used to clear CAN frame data[] - beware of endian problems! */
+ #define U64_DATA(p) (*(unsigned long long*)(p)->data)
+
+ struct {
+ struct bcm_msg_head msg_head;
+ struct can_frame frame[5];
+ } msg;
+
+ msg.msg_head.opcode = RX_SETUP;
+ msg.msg_head.can_id = 0x42;
+ msg.msg_head.flags = 0;
+ msg.msg_head.nframes = 5;
+ U64_DATA(&msg.frame[0]) = 0xFF00000000000000ULL; /* MUX mask */
+ U64_DATA(&msg.frame[1]) = 0x01000000000000FFULL; /* data mask (MUX 0x01) */
+ U64_DATA(&msg.frame[2]) = 0x0200FFFF000000FFULL; /* data mask (MUX 0x02) */
+ U64_DATA(&msg.frame[3]) = 0x330000FFFFFF0003ULL; /* data mask (MUX 0x33) */
+ U64_DATA(&msg.frame[4]) = 0x4F07FC0FF0000000ULL; /* data mask (MUX 0x4F) */
+
+ write(s, &msg, sizeof(msg));
+
+ 4.3 connected transport protocols (SOCK_SEQPACKET)
+ 4.4 unconnected transport protocols (SOCK_DGRAM)
+
+
+5. SocketCAN core module
+-------------------------
+
+ The SocketCAN core module implements the protocol family
+ PF_CAN. CAN protocol modules are loaded by the core module at
+ runtime. The core module provides an interface for CAN protocol
+ modules to subscribe needed CAN IDs (see chapter 3.1).
+
+ 5.1 can.ko module params
+
+ - stats_timer: To calculate the SocketCAN core statistics
+ (e.g. current/maximum frames per second) this 1 second timer is
+ invoked at can.ko module start time by default. This timer can be
+ disabled by using stattimer=0 on the module commandline.
+
+ - debug: (removed since SocketCAN SVN r546)
+
+ 5.2 procfs content
+
+ As described in chapter 3.1 the SocketCAN core uses several filter
+ lists to deliver received CAN frames to CAN protocol modules. These
+ receive lists, their filters and the count of filter matches can be
+ checked in the appropriate receive list. All entries contain the
+ device and a protocol module identifier:
+
+ foo@bar:~$ cat /proc/net/can/rcvlist_all
+
+ receive list 'rx_all':
+ (vcan3: no entry)
+ (vcan2: no entry)
+ (vcan1: no entry)
+ device can_id can_mask function userdata matches ident
+ vcan0 000 00000000 f88e6370 f6c6f400 0 raw
+ (any: no entry)
+
+ In this example an application requests any CAN traffic from vcan0.
+
+ rcvlist_all - list for unfiltered entries (no filter operations)
+ rcvlist_eff - list for single extended frame (EFF) entries
+ rcvlist_err - list for error message frames masks
+ rcvlist_fil - list for mask/value filters
+ rcvlist_inv - list for mask/value filters (inverse semantic)
+ rcvlist_sff - list for single standard frame (SFF) entries
+
+ Additional procfs files in /proc/net/can
+
+ stats - SocketCAN core statistics (rx/tx frames, match ratios, ...)
+ reset_stats - manual statistic reset
+ version - prints the SocketCAN core version and the ABI version
+
+ 5.3 writing own CAN protocol modules
+
+ To implement a new protocol in the protocol family PF_CAN a new
+ protocol has to be defined in include/linux/can.h .
+ The prototypes and definitions to use the SocketCAN core can be
+ accessed by including include/linux/can/core.h .
+ In addition to functions that register the CAN protocol and the
+ CAN device notifier chain there are functions to subscribe CAN
+ frames received by CAN interfaces and to send CAN frames:
+
+ can_rx_register - subscribe CAN frames from a specific interface
+ can_rx_unregister - unsubscribe CAN frames from a specific interface
+ can_send - transmit a CAN frame (optional with local loopback)
+
+ For details see the kerneldoc documentation in net/can/af_can.c or
+ the source code of net/can/raw.c or net/can/bcm.c .
+
+6. CAN network drivers
+----------------------
+
+ Writing a CAN network device driver is much easier than writing a
+ CAN character device driver. Similar to other known network device
+ drivers you mainly have to deal with:
+
+ - TX: Put the CAN frame from the socket buffer to the CAN controller.
+ - RX: Put the CAN frame from the CAN controller to the socket buffer.
+
+ See e.g. at Documentation/networking/netdevices.txt . The differences
+ for writing CAN network device driver are described below:
+
+ 6.1 general settings
+
+ dev->type = ARPHRD_CAN; /* the netdevice hardware type */
+ dev->flags = IFF_NOARP; /* CAN has no arp */
+
+ dev->mtu = CAN_MTU; /* sizeof(struct can_frame) -> legacy CAN interface */
+
+ or alternative, when the controller supports CAN with flexible data rate:
+ dev->mtu = CANFD_MTU; /* sizeof(struct canfd_frame) -> CAN FD interface */
+
+ The struct can_frame or struct canfd_frame is the payload of each socket
+ buffer (skbuff) in the protocol family PF_CAN.
+
+ 6.2 local loopback of sent frames
+
+ As described in chapter 3.2 the CAN network device driver should
+ support a local loopback functionality similar to the local echo
+ e.g. of tty devices. In this case the driver flag IFF_ECHO has to be
+ set to prevent the PF_CAN core from locally echoing sent frames
+ (aka loopback) as fallback solution:
+
+ dev->flags = (IFF_NOARP | IFF_ECHO);
+
+ 6.3 CAN controller hardware filters
+
+ To reduce the interrupt load on deep embedded systems some CAN
+ controllers support the filtering of CAN IDs or ranges of CAN IDs.
+ These hardware filter capabilities vary from controller to
+ controller and have to be identified as not feasible in a multi-user
+ networking approach. The use of the very controller specific
+ hardware filters could make sense in a very dedicated use-case, as a
+ filter on driver level would affect all users in the multi-user
+ system. The high efficient filter sets inside the PF_CAN core allow
+ to set different multiple filters for each socket separately.
+ Therefore the use of hardware filters goes to the category 'handmade
+ tuning on deep embedded systems'. The author is running a MPC603e
+ @133MHz with four SJA1000 CAN controllers from 2002 under heavy bus
+ load without any problems ...
+
+ 6.4 The virtual CAN driver (vcan)
+
+ Similar to the network loopback devices, vcan offers a virtual local
+ CAN interface. A full qualified address on CAN consists of
+
+ - a unique CAN Identifier (CAN ID)
+ - the CAN bus this CAN ID is transmitted on (e.g. can0)
+
+ so in common use cases more than one virtual CAN interface is needed.
+
+ The virtual CAN interfaces allow the transmission and reception of CAN
+ frames without real CAN controller hardware. Virtual CAN network
+ devices are usually named 'vcanX', like vcan0 vcan1 vcan2 ...
+ When compiled as a module the virtual CAN driver module is called vcan.ko
+
+ Since Linux Kernel version 2.6.24 the vcan driver supports the Kernel
+ netlink interface to create vcan network devices. The creation and
+ removal of vcan network devices can be managed with the ip(8) tool:
+
+ - Create a virtual CAN network interface:
+ $ ip link add type vcan
+
+ - Create a virtual CAN network interface with a specific name 'vcan42':
+ $ ip link add dev vcan42 type vcan
+
+ - Remove a (virtual CAN) network interface 'vcan42':
+ $ ip link del vcan42
+
+ 6.5 The CAN network device driver interface
+
+ The CAN network device driver interface provides a generic interface
+ to setup, configure and monitor CAN network devices. The user can then
+ configure the CAN device, like setting the bit-timing parameters, via
+ the netlink interface using the program "ip" from the "IPROUTE2"
+ utility suite. The following chapter describes briefly how to use it.
+ Furthermore, the interface uses a common data structure and exports a
+ set of common functions, which all real CAN network device drivers
+ should use. Please have a look to the SJA1000 or MSCAN driver to
+ understand how to use them. The name of the module is can-dev.ko.
+
+ 6.5.1 Netlink interface to set/get devices properties
+
+ The CAN device must be configured via netlink interface. The supported
+ netlink message types are defined and briefly described in
+ "include/linux/can/netlink.h". CAN link support for the program "ip"
+ of the IPROUTE2 utility suite is available and it can be used as shown
+ below:
+
+ - Setting CAN device properties:
+
+ $ ip link set can0 type can help
+ Usage: ip link set DEVICE type can
+ [ bitrate BITRATE [ sample-point SAMPLE-POINT] ] |
+ [ tq TQ prop-seg PROP_SEG phase-seg1 PHASE-SEG1
+ phase-seg2 PHASE-SEG2 [ sjw SJW ] ]
+
+ [ loopback { on | off } ]
+ [ listen-only { on | off } ]
+ [ triple-sampling { on | off } ]
+
+ [ restart-ms TIME-MS ]
+ [ restart ]
+
+ Where: BITRATE := { 1..1000000 }
+ SAMPLE-POINT := { 0.000..0.999 }
+ TQ := { NUMBER }
+ PROP-SEG := { 1..8 }
+ PHASE-SEG1 := { 1..8 }
+ PHASE-SEG2 := { 1..8 }
+ SJW := { 1..4 }
+ RESTART-MS := { 0 | NUMBER }
+
+ - Display CAN device details and statistics:
+
+ $ ip -details -statistics link show can0
+ 2: can0: <NOARP,UP,LOWER_UP,ECHO> mtu 16 qdisc pfifo_fast state UP qlen 10
+ link/can
+ can <TRIPLE-SAMPLING> state ERROR-ACTIVE restart-ms 100
+ bitrate 125000 sample_point 0.875
+ tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1
+ sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+ clock 8000000
+ re-started bus-errors arbit-lost error-warn error-pass bus-off
+ 41 17457 0 41 42 41
+ RX: bytes packets errors dropped overrun mcast
+ 140859 17608 17457 0 0 0
+ TX: bytes packets errors dropped carrier collsns
+ 861 112 0 41 0 0
+
+ More info to the above output:
+
+ "<TRIPLE-SAMPLING>"
+ Shows the list of selected CAN controller modes: LOOPBACK,
+ LISTEN-ONLY, or TRIPLE-SAMPLING.
+
+ "state ERROR-ACTIVE"
+ The current state of the CAN controller: "ERROR-ACTIVE",
+ "ERROR-WARNING", "ERROR-PASSIVE", "BUS-OFF" or "STOPPED"
+
+ "restart-ms 100"
+ Automatic restart delay time. If set to a non-zero value, a
+ restart of the CAN controller will be triggered automatically
+ in case of a bus-off condition after the specified delay time
+ in milliseconds. By default it's off.
+
+ "bitrate 125000 sample-point 0.875"
+ Shows the real bit-rate in bits/sec and the sample-point in the
+ range 0.000..0.999. If the calculation of bit-timing parameters
+ is enabled in the kernel (CONFIG_CAN_CALC_BITTIMING=y), the
+ bit-timing can be defined by setting the "bitrate" argument.
+ Optionally the "sample-point" can be specified. By default it's
+ 0.000 assuming CIA-recommended sample-points.
+
+ "tq 125 prop-seg 6 phase-seg1 7 phase-seg2 2 sjw 1"
+ Shows the time quanta in ns, propagation segment, phase buffer
+ segment 1 and 2 and the synchronisation jump width in units of
+ tq. They allow to define the CAN bit-timing in a hardware
+ independent format as proposed by the Bosch CAN 2.0 spec (see
+ chapter 8 of http://www.semiconductors.bosch.de/pdf/can2spec.pdf).
+
+ "sja1000: tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+ clock 8000000"
+ Shows the bit-timing constants of the CAN controller, here the
+ "sja1000". The minimum and maximum values of the time segment 1
+ and 2, the synchronisation jump width in units of tq, the
+ bitrate pre-scaler and the CAN system clock frequency in Hz.
+ These constants could be used for user-defined (non-standard)
+ bit-timing calculation algorithms in user-space.
+
+ "re-started bus-errors arbit-lost error-warn error-pass bus-off"
+ Shows the number of restarts, bus and arbitration lost errors,
+ and the state changes to the error-warning, error-passive and
+ bus-off state. RX overrun errors are listed in the "overrun"
+ field of the standard network statistics.
+
+ 6.5.2 Setting the CAN bit-timing
+
+ The CAN bit-timing parameters can always be defined in a hardware
+ independent format as proposed in the Bosch CAN 2.0 specification
+ specifying the arguments "tq", "prop_seg", "phase_seg1", "phase_seg2"
+ and "sjw":
+
+ $ ip link set canX type can tq 125 prop-seg 6 \
+ phase-seg1 7 phase-seg2 2 sjw 1
+
+ If the kernel option CONFIG_CAN_CALC_BITTIMING is enabled, CIA
+ recommended CAN bit-timing parameters will be calculated if the bit-
+ rate is specified with the argument "bitrate":
+
+ $ ip link set canX type can bitrate 125000
+
+ Note that this works fine for the most common CAN controllers with
+ standard bit-rates but may *fail* for exotic bit-rates or CAN system
+ clock frequencies. Disabling CONFIG_CAN_CALC_BITTIMING saves some
+ space and allows user-space tools to solely determine and set the
+ bit-timing parameters. The CAN controller specific bit-timing
+ constants can be used for that purpose. They are listed by the
+ following command:
+
+ $ ip -details link show can0
+ ...
+ sja1000: clock 8000000 tseg1 1..16 tseg2 1..8 sjw 1..4 brp 1..64 brp-inc 1
+
+ 6.5.3 Starting and stopping the CAN network device
+
+ A CAN network device is started or stopped as usual with the command
+ "ifconfig canX up/down" or "ip link set canX up/down". Be aware that
+ you *must* define proper bit-timing parameters for real CAN devices
+ before you can start it to avoid error-prone default settings:
+
+ $ ip link set canX up type can bitrate 125000
+
+ A device may enter the "bus-off" state if too many errors occurred on
+ the CAN bus. Then no more messages are received or sent. An automatic
+ bus-off recovery can be enabled by setting the "restart-ms" to a
+ non-zero value, e.g.:
+
+ $ ip link set canX type can restart-ms 100
+
+ Alternatively, the application may realize the "bus-off" condition
+ by monitoring CAN error message frames and do a restart when
+ appropriate with the command:
+
+ $ ip link set canX type can restart
+
+ Note that a restart will also create a CAN error message frame (see
+ also chapter 3.4).
+
+ 6.6 CAN FD (flexible data rate) driver support
+
+ CAN FD capable CAN controllers support two different bitrates for the
+ arbitration phase and the payload phase of the CAN FD frame. Therefore a
+ second bit timing has to be specified in order to enable the CAN FD bitrate.
+
+ Additionally CAN FD capable CAN controllers support up to 64 bytes of
+ payload. The representation of this length in can_frame.can_dlc and
+ canfd_frame.len for userspace applications and inside the Linux network
+ layer is a plain value from 0 .. 64 instead of the CAN 'data length code'.
+ The data length code was a 1:1 mapping to the payload length in the legacy
+ CAN frames anyway. The payload length to the bus-relevant DLC mapping is
+ only performed inside the CAN drivers, preferably with the helper
+ functions can_dlc2len() and can_len2dlc().
+
+ The CAN netdevice driver capabilities can be distinguished by the network
+ devices maximum transfer unit (MTU):
+
+ MTU = 16 (CAN_MTU) => sizeof(struct can_frame) => 'legacy' CAN device
+ MTU = 72 (CANFD_MTU) => sizeof(struct canfd_frame) => CAN FD capable device
+
+ The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
+ N.B. CAN FD capable devices can also handle and send legacy CAN frames.
+
+ FIXME: Add details about the CAN FD controller configuration when available.
+
+ 6.7 Supported CAN hardware
+
+ Please check the "Kconfig" file in "drivers/net/can" to get an actual
+ list of the support CAN hardware. On the SocketCAN project website
+ (see chapter 7) there might be further drivers available, also for
+ older kernel versions.
+
+7. SocketCAN resources
+-----------------------
+
+ The Linux CAN / SocketCAN project ressources (project site / mailing list)
+ are referenced in the MAINTAINERS file in the Linux source tree.
+ Search for CAN NETWORK [LAYERS|DRIVERS].
+
+8. Credits
+----------
+
+ Oliver Hartkopp (PF_CAN core, filters, drivers, bcm, SJA1000 driver)
+ Urs Thuermann (PF_CAN core, kernel integration, socket interfaces, raw, vcan)
+ Jan Kizka (RT-SocketCAN core, Socket-API reconciliation)
+ Wolfgang Grandegger (RT-SocketCAN core & drivers, Raw Socket-API reviews,
+ CAN device driver interface, MSCAN driver)
+ Robert Schwebel (design reviews, PTXdist integration)
+ Marc Kleine-Budde (design reviews, Kernel 2.6 cleanups, drivers)
+ Benedikt Spranger (reviews)
+ Thomas Gleixner (LKML reviews, coding style, posting hints)
+ Andrey Volkov (kernel subtree structure, ioctls, MSCAN driver)
+ Matthias Brukner (first SJA1000 CAN netdevice implementation Q2/2003)
+ Klaus Hitschler (PEAK driver integration)
+ Uwe Koppe (CAN netdevices with PF_PACKET approach)
+ Michael Schulze (driver layer loopback requirement, RT CAN drivers review)
+ Pavel Pisa (Bit-timing calculation)
+ Sascha Hauer (SJA1000 platform driver)
+ Sebastian Haas (SJA1000 EMS PCI driver)
+ Markus Plessing (SJA1000 EMS PCI driver)
+ Per Dalen (SJA1000 Kvaser PCI driver)
+ Sam Ravnborg (reviews, coding style, kbuild help)
diff --git a/Documentation/networking/cdc_mbim.txt b/Documentation/networking/cdc_mbim.txt
new file mode 100644
index 000000000..a15ea602a
--- /dev/null
+++ b/Documentation/networking/cdc_mbim.txt
@@ -0,0 +1,339 @@
+ cdc_mbim - Driver for CDC MBIM Mobile Broadband modems
+ ========================================================
+
+The cdc_mbim driver supports USB devices conforming to the "Universal
+Serial Bus Communications Class Subclass Specification for Mobile
+Broadband Interface Model" [1], which is a further development of
+"Universal Serial Bus Communications Class Subclass Specifications for
+Network Control Model Devices" [2] optimized for Mobile Broadband
+devices, aka "3G/LTE modems".
+
+
+Command Line Parameters
+=======================
+
+The cdc_mbim driver has no parameters of its own. But the probing
+behaviour for NCM 1.0 backwards compatible MBIM functions (an
+"NCM/MBIM function" as defined in section 3.2 of [1]) is affected
+by a cdc_ncm driver parameter:
+
+prefer_mbim
+-----------
+Type: Boolean
+Valid Range: N/Y (0-1)
+Default Value: Y (MBIM is preferred)
+
+This parameter sets the system policy for NCM/MBIM functions. Such
+functions will be handled by either the cdc_ncm driver or the cdc_mbim
+driver depending on the prefer_mbim setting. Setting prefer_mbim=N
+makes the cdc_mbim driver ignore these functions and lets the cdc_ncm
+driver handle them instead.
+
+The parameter is writable, and can be changed at any time. A manual
+unbind/bind is required to make the change effective for NCM/MBIM
+functions bound to the "wrong" driver
+
+
+Basic usage
+===========
+
+MBIM functions are inactive when unmanaged. The cdc_mbim driver only
+provides an userspace interface to the MBIM control channel, and will
+not participate in the management of the function. This implies that a
+userspace MBIM management application always is required to enable a
+MBIM function.
+
+Such userspace applications includes, but are not limited to:
+ - mbimcli (included with the libmbim [3] library), and
+ - ModemManager [4]
+
+Establishing a MBIM IP session reequires at least these actions by the
+management application:
+ - open the control channel
+ - configure network connection settings
+ - connect to network
+ - configure IP interface
+
+Management application development
+----------------------------------
+The driver <-> userspace interfaces are described below. The MBIM
+control channel protocol is described in [1].
+
+
+MBIM control channel userspace ABI
+==================================
+
+/dev/cdc-wdmX character device
+------------------------------
+The driver creates a two-way pipe to the MBIM function control channel
+using the cdc-wdm driver as a subdriver. The userspace end of the
+control channel pipe is a /dev/cdc-wdmX character device.
+
+The cdc_mbim driver does not process or police messages on the control
+channel. The channel is fully delegated to the userspace management
+application. It is therefore up to this application to ensure that it
+complies with all the control channel requirements in [1].
+
+The cdc-wdmX device is created as a child of the MBIM control
+interface USB device. The character device associated with a specific
+MBIM function can be looked up using sysfs. For example:
+
+ bjorn@nemi:~$ ls /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc
+ cdc-wdm0
+
+ bjorn@nemi:~$ grep . /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc/cdc-wdm0/dev
+ 180:0
+
+
+USB configuration descriptors
+-----------------------------
+The wMaxControlMessage field of the CDC MBIM functional descriptor
+limits the maximum control message size. The managament application is
+responsible for negotiating a control message size complying with the
+requirements in section 9.3.1 of [1], taking this descriptor field
+into consideration.
+
+The userspace application can access the CDC MBIM functional
+descriptor of a MBIM function using either of the two USB
+configuration descriptor kernel interfaces described in [6] or [7].
+
+See also the ioctl documentation below.
+
+
+Fragmentation
+-------------
+The userspace application is responsible for all control message
+fragmentation and defragmentaion, as described in section 9.5 of [1].
+
+
+/dev/cdc-wdmX write()
+---------------------
+The MBIM control messages from the management application *must not*
+exceed the negotiated control message size.
+
+
+/dev/cdc-wdmX read()
+--------------------
+The management application *must* accept control messages of up the
+negotiated control message size.
+
+
+/dev/cdc-wdmX ioctl()
+--------------------
+IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size
+This ioctl returns the wMaxControlMessage field of the CDC MBIM
+functional descriptor for MBIM devices. This is intended as a
+convenience, eliminating the need to parse the USB descriptors from
+userspace.
+
+ #include <stdio.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+ #include <linux/types.h>
+ #include <linux/usb/cdc-wdm.h>
+ int main()
+ {
+ __u16 max;
+ int fd = open("/dev/cdc-wdm0", O_RDWR);
+ if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max))
+ printf("wMaxControlMessage is %d\n", max);
+ }
+
+
+Custom device services
+----------------------
+The MBIM specification allows vendors to freely define additional
+services. This is fully supported by the cdc_mbim driver.
+
+Support for new MBIM services, including vendor specified services, is
+implemented entirely in userspace, like the rest of the MBIM control
+protocol
+
+New services should be registered in the MBIM Registry [5].
+
+
+
+MBIM data channel userspace ABI
+===============================
+
+wwanY network device
+--------------------
+The cdc_mbim driver represents the MBIM data channel as a single
+network device of the "wwan" type. This network device is initially
+mapped to MBIM IP session 0.
+
+
+Multiplexed IP sessions (IPS)
+-----------------------------
+MBIM allows multiplexing up to 256 IP sessions over a single USB data
+channel. The cdc_mbim driver models such IP sessions as 802.1q VLAN
+subdevices of the master wwanY device, mapping MBIM IP session Z to
+VLAN ID Z for all values of Z greater than 0.
+
+The device maximum Z is given in the MBIM_DEVICE_CAPS_INFO structure
+described in section 10.5.1 of [1].
+
+The userspace management application is responsible for adding new
+VLAN links prior to establishing MBIM IP sessions where the SessionId
+is greater than 0. These links can be added by using the normal VLAN
+kernel interfaces, either ioctl or netlink.
+
+For example, adding a link for a MBIM IP session with SessionId 3:
+
+ ip link add link wwan0 name wwan0.3 type vlan id 3
+
+The driver will automatically map the "wwan0.3" network device to MBIM
+IP session 3.
+
+
+Device Service Streams (DSS)
+----------------------------
+MBIM also allows up to 256 non-IP data streams to be multiplexed over
+the same shared USB data channel. The cdc_mbim driver models these
+sessions as another set of 802.1q VLAN subdevices of the master wwanY
+device, mapping MBIM DSS session A to VLAN ID (256 + A) for all values
+of A.
+
+The device maximum A is given in the MBIM_DEVICE_SERVICES_INFO
+structure described in section 10.5.29 of [1].
+
+The DSS VLAN subdevices are used as a practical interface between the
+shared MBIM data channel and a MBIM DSS aware userspace application.
+It is not intended to be presented as-is to an end user. The
+assumption is that an userspace application initiating a DSS session
+also takes care of the necessary framing of the DSS data, presenting
+the stream to the end user in an appropriate way for the stream type.
+
+The network device ABI requires a dummy ethernet header for every DSS
+data frame being transported. The contents of this header is
+arbitrary, with the following exceptions:
+ - TX frames using an IP protocol (0x0800 or 0x86dd) will be dropped
+ - RX frames will have the protocol field set to ETH_P_802_3 (but will
+ not be properly formatted 802.3 frames)
+ - RX frames will have the destination address set to the hardware
+ address of the master device
+
+The DSS supporting userspace management application is responsible for
+adding the dummy ethernet header on TX and stripping it on RX.
+
+This is a simple example using tools commonly available, exporting
+DssSessionId 5 as a pty character device pointed to by a /dev/nmea
+symlink:
+
+ ip link add link wwan0 name wwan0.dss5 type vlan id 261
+ ip link set dev wwan0.dss5 up
+ socat INTERFACE:wwan0.dss5,type=2 PTY:,echo=0,link=/dev/nmea
+
+This is only an example, most suitable for testing out a DSS
+service. Userspace applications supporting specific MBIM DSS services
+are expected to use the tools and programming interfaces required by
+that service.
+
+Note that adding VLAN links for DSS sessions is entirely optional. A
+management application may instead choose to bind a packet socket
+directly to the master network device, using the received VLAN tags to
+map frames to the correct DSS session and adding 18 byte VLAN ethernet
+headers with the appropriate tag on TX. In this case using a socket
+filter is recommended, matching only the DSS VLAN subset. This avoid
+unnecessary copying of unrelated IP session data to userspace. For
+example:
+
+ static struct sock_filter dssfilter[] = {
+ /* use special negative offsets to get VLAN tag */
+ BPF_STMT(BPF_LD|BPF_B|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 1, 0, 6), /* true */
+
+ /* verify DSS VLAN range */
+ BPF_STMT(BPF_LD|BPF_H|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG),
+ BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 256, 0, 4), /* 256 is first DSS VLAN */
+ BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 512, 3, 0), /* 511 is last DSS VLAN */
+
+ /* verify ethertype */
+ BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 2 * ETH_ALEN),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ETH_P_802_3, 0, 1),
+
+ BPF_STMT(BPF_RET|BPF_K, (u_int)-1), /* accept */
+ BPF_STMT(BPF_RET|BPF_K, 0), /* ignore */
+ };
+
+
+
+Tagged IP session 0 VLAN
+------------------------
+As described above, MBIM IP session 0 is treated as special by the
+driver. It is initially mapped to untagged frames on the wwanY
+network device.
+
+This mapping implies a few restrictions on multiplexed IPS and DSS
+sessions, which may not always be practical:
+ - no IPS or DSS session can use a frame size greater than the MTU on
+ IP session 0
+ - no IPS or DSS session can be in the up state unless the network
+ device representing IP session 0 also is up
+
+These problems can be avoided by optionally making the driver map IP
+session 0 to a VLAN subdevice, similar to all other IP sessions. This
+behaviour is triggered by adding a VLAN link for the magic VLAN ID
+4094. The driver will then immediately start mapping MBIM IP session
+0 to this VLAN, and will drop untagged frames on the master wwanY
+device.
+
+Tip: It might be less confusing to the end user to name this VLAN
+subdevice after the MBIM SessionID instead of the VLAN ID. For
+example:
+
+ ip link add link wwan0 name wwan0.0 type vlan id 4094
+
+
+VLAN mapping
+------------
+
+Summarizing the cdc_mbim driver mapping described above, we have this
+relationship between VLAN tags on the wwanY network device and MBIM
+sessions on the shared USB data channel:
+
+ VLAN ID MBIM type MBIM SessionID Notes
+ ---------------------------------------------------------
+ untagged IPS 0 a)
+ 1 - 255 IPS 1 - 255 <VLANID>
+ 256 - 511 DSS 0 - 255 <VLANID - 256>
+ 512 - 4093 b)
+ 4094 IPS 0 c)
+
+ a) if no VLAN ID 4094 link exists, else dropped
+ b) unsupported VLAN range, unconditionally dropped
+ c) if a VLAN ID 4094 link exists, else dropped
+
+
+
+
+References
+==========
+
+[1] USB Implementers Forum, Inc. - "Universal Serial Bus
+ Communications Class Subclass Specification for Mobile Broadband
+ Interface Model", Revision 1.0 (Errata 1), May 1, 2013
+ - http://www.usb.org/developers/docs/devclass_docs/
+
+[2] USB Implementers Forum, Inc. - "Universal Serial Bus
+ Communications Class Subclass Specifications for Network Control
+ Model Devices", Revision 1.0 (Errata 1), November 24, 2010
+ - http://www.usb.org/developers/docs/devclass_docs/
+
+[3] libmbim - "a glib-based library for talking to WWAN modems and
+ devices which speak the Mobile Interface Broadband Model (MBIM)
+ protocol"
+ - http://www.freedesktop.org/wiki/Software/libmbim/
+
+[4] ModemManager - "a DBus-activated daemon which controls mobile
+ broadband (2G/3G/4G) devices and connections"
+ - http://www.freedesktop.org/wiki/Software/ModemManager/
+
+[5] "MBIM (Mobile Broadband Interface Model) Registry"
+ - http://compliance.usb.org/mbim/
+
+[6] "/proc/bus/usb filesystem output"
+ - Documentation/usb/proc_usb_info.txt
+
+[7] "/sys/bus/usb/devices/.../descriptors"
+ - Documentation/ABI/stable/sysfs-bus-usb
diff --git a/Documentation/networking/cops.txt b/Documentation/networking/cops.txt
new file mode 100644
index 000000000..3e344b448
--- /dev/null
+++ b/Documentation/networking/cops.txt
@@ -0,0 +1,63 @@
+Text File for the COPS LocalTalk Linux driver (cops.c).
+ By Jay Schulist <jschlst@samba.org>
+
+This driver has two modes and they are: Dayna mode and Tangent mode.
+Each mode corresponds with the type of card. It has been found
+that there are 2 main types of cards and all other cards are
+the same and just have different names or only have minor differences
+such as more IO ports. As this driver is tested it will
+become more clear exactly what cards are supported.
+
+Right now these cards are known to work with the COPS driver. The
+LT-200 cards work in a somewhat more limited capacity than the
+DL200 cards, which work very well and are in use by many people.
+
+TANGENT driver mode:
+ Tangent ATB-II, Novell NL-1000, Daystar Digital LT-200
+DAYNA driver mode:
+ Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95,
+ Farallon PhoneNET PC III, Farallon PhoneNET PC II
+Other cards possibly supported mode unknown though:
+ Dayna DL2000 (Full length)
+
+The COPS driver defaults to using Dayna mode. To change the driver's
+mode if you built a driver with dual support use board_type=1 or
+board_type=2 for Dayna or Tangent with insmod.
+
+** Operation/loading of the driver.
+Use modprobe like this: /sbin/modprobe cops.o (IO #) (IRQ #)
+If you do not specify any options the driver will try and use the IO = 0x240,
+IRQ = 5. As of right now I would only use IRQ 5 for the card, if autoprobing.
+
+To load multiple COPS driver Localtalk cards you can do one of the following.
+
+insmod cops io=0x240 irq=5
+insmod -o cops2 cops io=0x260 irq=3
+
+Or in lilo.conf put something like this:
+ append="ether=5,0x240,lt0 ether=3,0x260,lt1"
+
+Then bring up the interface with ifconfig. It will look something like this:
+lt0 Link encap:UNSPEC HWaddr 00-00-00-00-00-00-00-F7-00-00-00-00-00-00-00-00
+ inet addr:192.168.1.2 Bcast:192.168.1.255 Mask:255.255.255.0
+ UP BROADCAST RUNNING NOARP MULTICAST MTU:600 Metric:1
+ RX packets:0 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 coll:0
+
+** Netatalk Configuration
+You will need to configure atalkd with something like the following to make
+it work with the cops.c driver.
+
+* For single LTalk card use.
+dummy -seed -phase 2 -net 2000 -addr 2000.10 -zone "1033"
+lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033"
+
+* For multiple cards, Ethernet and LocalTalk.
+eth0 -seed -phase 2 -net 3000 -addr 3000.20 -zone "1033"
+lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033"
+
+* For multiple LocalTalk cards, and an Ethernet card.
+* Order seems to matter here, Ethernet last.
+lt0 -seed -phase 1 -net 1000 -addr 1000.10 -zone "LocalTalk1"
+lt1 -seed -phase 1 -net 2000 -addr 2000.20 -zone "LocalTalk2"
+eth0 -seed -phase 2 -net 3000 -addr 3000.30 -zone "EtherTalk"
diff --git a/Documentation/networking/cs89x0.txt b/Documentation/networking/cs89x0.txt
new file mode 100644
index 000000000..0e190180e
--- /dev/null
+++ b/Documentation/networking/cs89x0.txt
@@ -0,0 +1,624 @@
+
+NOTE
+----
+
+This document was contributed by Cirrus Logic for kernel 2.2.5. This version
+has been updated for 2.3.48 by Andrew Morton.
+
+Cirrus make a copy of this driver available at their website, as
+described below. In general, you should use the driver version which
+comes with your Linux distribution.
+
+
+
+CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
+Linux Network Interface Driver ver. 2.00 <kernel 2.3.48>
+===============================================================================
+
+
+TABLE OF CONTENTS
+
+1.0 CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
+ 1.1 Product Overview
+ 1.2 Driver Description
+ 1.2.1 Driver Name
+ 1.2.2 File in the Driver Package
+ 1.3 System Requirements
+ 1.4 Licensing Information
+
+2.0 ADAPTER INSTALLATION and CONFIGURATION
+ 2.1 CS8900-based Adapter Configuration
+ 2.2 CS8920-based Adapter Configuration
+
+3.0 LOADING THE DRIVER AS A MODULE
+
+4.0 COMPILING THE DRIVER
+ 4.1 Compiling the Driver as a Loadable Module
+ 4.2 Compiling the driver to support memory mode
+ 4.3 Compiling the driver to support Rx DMA
+
+5.0 TESTING AND TROUBLESHOOTING
+ 5.1 Known Defects and Limitations
+ 5.2 Testing the Adapter
+ 5.2.1 Diagnostic Self-Test
+ 5.2.2 Diagnostic Network Test
+ 5.3 Using the Adapter's LEDs
+ 5.4 Resolving I/O Conflicts
+
+6.0 TECHNICAL SUPPORT
+ 6.1 Contacting Cirrus Logic's Technical Support
+ 6.2 Information Required Before Contacting Technical Support
+ 6.3 Obtaining the Latest Driver Version
+ 6.4 Current maintainer
+ 6.5 Kernel boot parameters
+
+
+1.0 CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
+===============================================================================
+
+
+1.1 PRODUCT OVERVIEW
+
+The CS8900-based ISA Ethernet Adapters from Cirrus Logic follow
+IEEE 802.3 standards and support half or full-duplex operation in ISA bus
+computers on 10 Mbps Ethernet networks. The adapters are designed for operation
+in 16-bit ISA or EISA bus expansion slots and are available in
+10BaseT-only or 3-media configurations (10BaseT, 10Base2, and AUI for 10Base-5
+or fiber networks).
+
+CS8920-based adapters are similar to the CS8900-based adapter with additional
+features for Plug and Play (PnP) support and Wakeup Frame recognition. As
+such, the configuration procedures differ somewhat between the two types of
+adapters. Refer to the "Adapter Configuration" section for details on
+configuring both types of adapters.
+
+
+1.2 DRIVER DESCRIPTION
+
+The CS8900/CS8920 Ethernet Adapter driver for Linux supports the Linux
+v2.3.48 or greater kernel. It can be compiled directly into the kernel
+or loaded at run-time as a device driver module.
+
+1.2.1 Driver Name: cs89x0
+
+1.2.2 Files in the Driver Archive:
+
+The files in the driver at Cirrus' website include:
+
+ readme.txt - this file
+ build - batch file to compile cs89x0.c.
+ cs89x0.c - driver C code
+ cs89x0.h - driver header file
+ cs89x0.o - pre-compiled module (for v2.2.5 kernel)
+ config/Config.in - sample file to include cs89x0 driver in the kernel.
+ config/Makefile - sample file to include cs89x0 driver in the kernel.
+ config/Space.c - sample file to include cs89x0 driver in the kernel.
+
+
+
+1.3 SYSTEM REQUIREMENTS
+
+The following hardware is required:
+
+ * Cirrus Logic LAN (CS8900/20-based) Ethernet ISA Adapter
+
+ * IBM or IBM-compatible PC with:
+ * An 80386 or higher processor
+ * 16 bytes of contiguous IO space available between 210h - 370h
+ * One available IRQ (5,10,11,or 12 for the CS8900, 3-7,9-15 for CS8920).
+
+ * Appropriate cable (and connector for AUI, 10BASE-2) for your network
+ topology.
+
+The following software is required:
+
+* LINUX kernel version 2.3.48 or higher
+
+ * CS8900/20 Setup Utility (DOS-based)
+
+ * LINUX kernel sources for your kernel (if compiling into kernel)
+
+ * GNU Toolkit (gcc and make) v2.6 or above (if compiling into kernel
+ or a module)
+
+
+
+1.4 LICENSING INFORMATION
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation, version 1.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+For a full copy of the GNU General Public License, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+2.0 ADAPTER INSTALLATION and CONFIGURATION
+===============================================================================
+
+Both the CS8900 and CS8920-based adapters can be configured using parameters
+stored in an on-board EEPROM. You must use the DOS-based CS8900/20 Setup
+Utility if you want to change the adapter's configuration in EEPROM.
+
+When loading the driver as a module, you can specify many of the adapter's
+configuration parameters on the command-line to override the EEPROM's settings
+or for interface configuration when an EEPROM is not used. (CS8920-based
+adapters must use an EEPROM.) See Section 3.0 LOADING THE DRIVER AS A MODULE.
+
+Since the CS8900/20 Setup Utility is a DOS-based application, you must install
+and configure the adapter in a DOS-based system using the CS8900/20 Setup
+Utility before installation in the target LINUX system. (Not required if
+installing a CS8900-based adapter and the default configuration is acceptable.)
+
+
+2.1 CS8900-BASED ADAPTER CONFIGURATION
+
+CS8900-based adapters shipped from Cirrus Logic have been configured
+with the following "default" settings:
+
+ Operation Mode: Memory Mode
+ IRQ: 10
+ Base I/O Address: 300
+ Memory Base Address: D0000
+ Optimization: DOS Client
+ Transmission Mode: Half-duplex
+ BootProm: None
+ Media Type: Autodetect (3-media cards) or
+ 10BASE-T (10BASE-T only adapter)
+
+You should only change the default configuration settings if conflicts with
+another adapter exists. To change the adapter's configuration, run the
+CS8900/20 Setup Utility.
+
+
+2.2 CS8920-BASED ADAPTER CONFIGURATION
+
+CS8920-based adapters are shipped from Cirrus Logic configured as Plug
+and Play (PnP) enabled. However, since the cs89x0 driver does NOT
+support PnP, you must install the CS8920 adapter in a DOS-based PC and
+run the CS8900/20 Setup Utility to disable PnP and configure the
+adapter before installation in the target Linux system. Failure to do
+this will leave the adapter inactive and the driver will be unable to
+communicate with the adapter.
+
+
+ ****************************************************************
+ * CS8920-BASED ADAPTERS: *
+ * *
+ * CS8920-BASED ADAPTERS ARE PLUG and PLAY ENABLED BY DEFAULT. *
+ * THE CS89X0 DRIVER DOES NOT SUPPORT PnP. THEREFORE, YOU MUST *
+ * RUN THE CS8900/20 SETUP UTILITY TO DISABLE PnP SUPPORT AND *
+ * TO ACTIVATE THE ADAPTER. *
+ ****************************************************************
+
+
+
+
+3.0 LOADING THE DRIVER AS A MODULE
+===============================================================================
+
+If the driver is compiled as a loadable module, you can load the driver module
+with the 'modprobe' command. Many of the adapter's configuration parameters can
+be specified as command-line arguments to the load command. This facility
+provides a means to override the EEPROM's settings or for interface
+configuration when an EEPROM is not used.
+
+Example:
+
+ insmod cs89x0.o io=0x200 irq=0xA media=aui
+
+This example loads the module and configures the adapter to use an IO port base
+address of 200h, interrupt 10, and use the AUI media connection. The following
+configuration options are available on the command line:
+
+* io=### - specify IO address (200h-360h)
+* irq=## - specify interrupt level
+* use_dma=1 - Enable DMA
+* dma=# - specify dma channel (Driver is compiled to support
+ Rx DMA only)
+* dmasize=# (16 or 64) - DMA size 16K or 64K. Default value is set to 16.
+* media=rj45 - specify media type
+ or media=bnc
+ or media=aui
+ or media=auto
+* duplex=full - specify forced half/full/autonegotiate duplex
+ or duplex=half
+ or duplex=auto
+* debug=# - debug level (only available if the driver was compiled
+ for debugging)
+
+NOTES:
+
+a) If an EEPROM is present, any specified command-line parameter
+ will override the corresponding configuration value stored in
+ EEPROM.
+
+b) The "io" parameter must be specified on the command-line.
+
+c) The driver's hardware probe routine is designed to avoid
+ writing to I/O space until it knows that there is a cs89x0
+ card at the written addresses. This could cause problems
+ with device probing. To avoid this behaviour, add one
+ to the `io=' module parameter. This doesn't actually change
+ the I/O address, but it is a flag to tell the driver
+ to partially initialise the hardware before trying to
+ identify the card. This could be dangerous if you are
+ not sure that there is a cs89x0 card at the provided address.
+
+ For example, to scan for an adapter located at IO base 0x300,
+ specify an IO address of 0x301.
+
+d) The "duplex=auto" parameter is only supported for the CS8920.
+
+e) The minimum command-line configuration required if an EEPROM is
+ not present is:
+
+ io
+ irq
+ media type (no autodetect)
+
+f) The following additional parameters are CS89XX defaults (values
+ used with no EEPROM or command-line argument).
+
+ * DMA Burst = enabled
+ * IOCHRDY Enabled = enabled
+ * UseSA = enabled
+ * CS8900 defaults to half-duplex if not specified on command-line
+ * CS8920 defaults to autoneg if not specified on command-line
+ * Use reset defaults for other config parameters
+ * dma_mode = 0
+
+g) You can use ifconfig to set the adapter's Ethernet address.
+
+h) Many Linux distributions use the 'modprobe' command to load
+ modules. This program uses the '/etc/conf.modules' file to
+ determine configuration information which is passed to a driver
+ module when it is loaded. All the configuration options which are
+ described above may be placed within /etc/conf.modules.
+
+ For example:
+
+ > cat /etc/conf.modules
+ ...
+ alias eth0 cs89x0
+ options cs89x0 io=0x0200 dma=5 use_dma=1
+ ...
+
+ In this example we are telling the module system that the
+ ethernet driver for this machine should use the cs89x0 driver. We
+ are asking 'modprobe' to pass the 'io', 'dma' and 'use_dma'
+ arguments to the driver when it is loaded.
+
+i) Cirrus recommend that the cs89x0 use the ISA DMA channels 5, 6 or
+ 7. You will probably find that other DMA channels will not work.
+
+j) The cs89x0 supports DMA for receiving only. DMA mode is
+ significantly more efficient. Flooding a 400 MHz Celeron machine
+ with large ping packets consumes 82% of its CPU capacity in non-DMA
+ mode. With DMA this is reduced to 45%.
+
+k) If your Linux kernel was compiled with inbuilt plug-and-play
+ support you will be able to find information about the cs89x0 card
+ with the command
+
+ cat /proc/isapnp
+
+l) If during DMA operation you find erratic behavior or network data
+ corruption you should use your PC's BIOS to slow the EISA bus clock.
+
+m) If the cs89x0 driver is compiled directly into the kernel
+ (non-modular) then its I/O address is automatically determined by
+ ISA bus probing. The IRQ number, media options, etc are determined
+ from the card's EEPROM.
+
+n) If the cs89x0 driver is compiled directly into the kernel, DMA
+ mode may be selected by providing the kernel with a boot option
+ 'cs89x0_dma=N' where 'N' is the desired DMA channel number (5, 6 or 7).
+
+ Kernel boot options may be provided on the LILO command line:
+
+ LILO boot: linux cs89x0_dma=5
+
+ or they may be placed in /etc/lilo.conf:
+
+ image=/boot/bzImage-2.3.48
+ append="cs89x0_dma=5"
+ label=linux
+ root=/dev/hda5
+ read-only
+
+ The DMA Rx buffer size is hardwired to 16 kbytes in this mode.
+ (64k mode is not available).
+
+
+4.0 COMPILING THE DRIVER
+===============================================================================
+
+The cs89x0 driver can be compiled directly into the kernel or compiled into
+a loadable device driver module.
+
+
+4.1 COMPILING THE DRIVER AS A LOADABLE MODULE
+
+To compile the driver into a loadable module, use the following command
+(single command line, without quotes):
+
+"gcc -D__KERNEL__ -I/usr/src/linux/include -I/usr/src/linux/net/inet -Wall
+-Wstrict-prototypes -O2 -fomit-frame-pointer -DMODULE -DCONFIG_MODVERSIONS
+-c cs89x0.c"
+
+4.2 COMPILING THE DRIVER TO SUPPORT MEMORY MODE
+
+Support for memory mode was not carried over into the 2.3 series kernels.
+
+4.3 COMPILING THE DRIVER TO SUPPORT Rx DMA
+
+The compile-time optionality for DMA was removed in the 2.3 kernel
+series. DMA support is now unconditionally part of the driver. It is
+enabled by the 'use_dma=1' module option.
+
+
+5.0 TESTING AND TROUBLESHOOTING
+===============================================================================
+
+5.1 KNOWN DEFECTS and LIMITATIONS
+
+Refer to the RELEASE.TXT file distributed as part of this archive for a list of
+known defects, driver limitations, and work arounds.
+
+
+5.2 TESTING THE ADAPTER
+
+Once the adapter has been installed and configured, the diagnostic option of
+the CS8900/20 Setup Utility can be used to test the functionality of the
+adapter and its network connection. Use the diagnostics 'Self Test' option to
+test the functionality of the adapter with the hardware configuration you have
+assigned. You can use the diagnostics 'Network Test' to test the ability of the
+adapter to communicate across the Ethernet with another PC equipped with a
+CS8900/20-based adapter card (it must also be running the CS8900/20 Setup
+Utility).
+
+ NOTE: The Setup Utility's diagnostics are designed to run in a
+ DOS-only operating system environment. DO NOT run the diagnostics
+ from a DOS or command prompt session under Windows 95, Windows NT,
+ OS/2, or other operating system.
+
+To run the diagnostics tests on the CS8900/20 adapter:
+
+ 1.) Boot DOS on the PC and start the CS8900/20 Setup Utility.
+
+ 2.) The adapter's current configuration is displayed. Hit the ENTER key to
+ get to the main menu.
+
+ 4.) Select 'Diagnostics' (ALT-G) from the main menu.
+ * Select 'Self-Test' to test the adapter's basic functionality.
+ * Select 'Network Test' to test the network connection and cabling.
+
+
+5.2.1 DIAGNOSTIC SELF-TEST
+
+The diagnostic self-test checks the adapter's basic functionality as well as
+its ability to communicate across the ISA bus based on the system resources
+assigned during hardware configuration. The following tests are performed:
+
+ * IO Register Read/Write Test
+ The IO Register Read/Write test insures that the CS8900/20 can be
+ accessed in IO mode, and that the IO base address is correct.
+
+ * Shared Memory Test
+ The Shared Memory test insures the CS8900/20 can be accessed in memory
+ mode and that the range of memory addresses assigned does not conflict
+ with other devices in the system.
+
+ * Interrupt Test
+ The Interrupt test insures there are no conflicts with the assigned IRQ
+ signal.
+
+ * EEPROM Test
+ The EEPROM test insures the EEPROM can be read.
+
+ * Chip RAM Test
+ The Chip RAM test insures the 4K of memory internal to the CS8900/20 is
+ working properly.
+
+ * Internal Loop-back Test
+ The Internal Loop Back test insures the adapter's transmitter and
+ receiver are operating properly. If this test fails, make sure the
+ adapter's cable is connected to the network (check for LED activity for
+ example).
+
+ * Boot PROM Test
+ The Boot PROM test insures the Boot PROM is present, and can be read.
+ Failure indicates the Boot PROM was not successfully read due to a
+ hardware problem or due to a conflicts on the Boot PROM address
+ assignment. (Test only applies if the adapter is configured to use the
+ Boot PROM option.)
+
+Failure of a test item indicates a possible system resource conflict with
+another device on the ISA bus. In this case, you should use the Manual Setup
+option to reconfigure the adapter by selecting a different value for the system
+resource that failed.
+
+
+5.2.2 DIAGNOSTIC NETWORK TEST
+
+The Diagnostic Network Test verifies a working network connection by
+transferring data between two CS8900/20 adapters installed in different PCs
+on the same network. (Note: the diagnostic network test should not be run
+between two nodes across a router.)
+
+This test requires that each of the two PCs have a CS8900/20-based adapter
+installed and have the CS8900/20 Setup Utility running. The first PC is
+configured as a Responder and the other PC is configured as an Initiator.
+Once the Initiator is started, it sends data frames to the Responder which
+returns the frames to the Initiator.
+
+The total number of frames received and transmitted are displayed on the
+Initiator's display, along with a count of the number of frames received and
+transmitted OK or in error. The test can be terminated anytime by the user at
+either PC.
+
+To setup the Diagnostic Network Test:
+
+ 1.) Select a PC with a CS8900/20-based adapter and a known working network
+ connection to act as the Responder. Run the CS8900/20 Setup Utility
+ and select 'Diagnostics -> Network Test -> Responder' from the main
+ menu. Hit ENTER to start the Responder.
+
+ 2.) Return to the PC with the CS8900/20-based adapter you want to test and
+ start the CS8900/20 Setup Utility.
+
+ 3.) From the main menu, Select 'Diagnostic -> Network Test -> Initiator'.
+ Hit ENTER to start the test.
+
+You may stop the test on the Initiator at any time while allowing the Responder
+to continue running. In this manner, you can move to additional PCs and test
+them by starting the Initiator on another PC without having to stop/start the
+Responder.
+
+
+
+5.3 USING THE ADAPTER'S LEDs
+
+The 2 and 3-media adapters have two LEDs visible on the back end of the board
+located near the 10Base-T connector.
+
+Link Integrity LED: A "steady" ON of the green LED indicates a valid 10Base-T
+connection. (Only applies to 10Base-T. The green LED has no significance for
+a 10Base-2 or AUI connection.)
+
+TX/RX LED: The yellow LED lights briefly each time the adapter transmits or
+receives data. (The yellow LED will appear to "flicker" on a typical network.)
+
+
+5.4 RESOLVING I/O CONFLICTS
+
+An IO conflict occurs when two or more adapter use the same ISA resource (IO
+address, memory address or IRQ). You can usually detect an IO conflict in one
+of four ways after installing and or configuring the CS8900/20-based adapter:
+
+ 1.) The system does not boot properly (or at all).
+
+ 2.) The driver cannot communicate with the adapter, reporting an "Adapter
+ not found" error message.
+
+ 3.) You cannot connect to the network or the driver will not load.
+
+ 4.) If you have configured the adapter to run in memory mode but the driver
+ reports it is using IO mode when loading, this is an indication of a
+ memory address conflict.
+
+If an IO conflict occurs, run the CS8900/20 Setup Utility and perform a
+diagnostic self-test. Normally, the ISA resource in conflict will fail the
+self-test. If so, reconfigure the adapter selecting another choice for the
+resource in conflict. Run the diagnostics again to check for further IO
+conflicts.
+
+In some cases, such as when the PC will not boot, it may be necessary to remove
+the adapter and reconfigure it by installing it in another PC to run the
+CS8900/20 Setup Utility. Once reinstalled in the target system, run the
+diagnostics self-test to ensure the new configuration is free of conflicts
+before loading the driver again.
+
+When manually configuring the adapter, keep in mind the typical ISA system
+resource usage as indicated in the tables below.
+
+I/O Address Device IRQ Device
+----------- -------- --- --------
+ 200-20F Game I/O adapter 3 COM2, Bus Mouse
+ 230-23F Bus Mouse 4 COM1
+ 270-27F LPT3: third parallel port 5 LPT2
+ 2F0-2FF COM2: second serial port 6 Floppy Disk controller
+ 320-32F Fixed disk controller 7 LPT1
+ 8 Real-time Clock
+ 9 EGA/VGA display adapter
+ 12 Mouse (PS/2)
+Memory Address Device 13 Math Coprocessor
+-------------- --------------------- 14 Hard Disk controller
+A000-BFFF EGA Graphics Adapter
+A000-C7FF VGA Graphics Adapter
+B000-BFFF Mono Graphics Adapter
+B800-BFFF Color Graphics Adapter
+E000-FFFF AT BIOS
+
+
+
+
+6.0 TECHNICAL SUPPORT
+===============================================================================
+
+6.1 CONTACTING CIRRUS LOGIC'S TECHNICAL SUPPORT
+
+Cirrus Logic's CS89XX Technical Application Support can be reached at:
+
+Telephone :(800) 888-5016 (from inside U.S. and Canada)
+ :(512) 442-7555 (from outside the U.S. and Canada)
+Fax :(512) 912-3871
+Email :ethernet@crystal.cirrus.com
+WWW :http://www.cirrus.com
+
+
+6.2 INFORMATION REQUIRED BEFORE CONTACTING TECHNICAL SUPPORT
+
+Before contacting Cirrus Logic for technical support, be prepared to provide as
+Much of the following information as possible.
+
+1.) Adapter type (CRD8900, CDB8900, CDB8920, etc.)
+
+2.) Adapter configuration
+
+ * IO Base, Memory Base, IO or memory mode enabled, IRQ, DMA channel
+ * Plug and Play enabled/disabled (CS8920-based adapters only)
+ * Configured for media auto-detect or specific media type (which type).
+
+3.) PC System's Configuration
+
+ * Plug and Play system (yes/no)
+ * BIOS (make and version)
+ * System make and model
+ * CPU (type and speed)
+ * System RAM
+ * SCSI Adapter
+
+4.) Software
+
+ * CS89XX driver and version
+ * Your network operating system and version
+ * Your system's OS version
+ * Version of all protocol support files
+
+5.) Any Error Message displayed.
+
+
+
+6.3 OBTAINING THE LATEST DRIVER VERSION
+
+You can obtain the latest CS89XX drivers and support software from Cirrus Logic's
+Web site. You can also contact Cirrus Logic's Technical Support (email:
+ethernet@crystal.cirrus.com) and request that you be registered for automatic
+software-update notification.
+
+Cirrus Logic maintains a web page at http://www.cirrus.com with the
+latest drivers and technical publications.
+
+
+6.4 Current maintainer
+
+In February 2000 the maintenance of this driver was assumed by Andrew
+Morton.
+
+6.5 Kernel module parameters
+
+For use in embedded environments with no cs89x0 EEPROM, the kernel boot
+parameter `cs89x0_media=' has been implemented. Usage is:
+
+ cs89x0_media=rj45 or
+ cs89x0_media=aui or
+ cs89x0_media=bnc
+
diff --git a/Documentation/networking/cxacru-cf.py b/Documentation/networking/cxacru-cf.py
new file mode 100644
index 000000000..b41d29839
--- /dev/null
+++ b/Documentation/networking/cxacru-cf.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# Copyright 2009 Simon Arlott
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59
+# Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# Usage: cxacru-cf.py < cxacru-cf.bin
+# Output: values string suitable for the sysfs adsl_config attribute
+#
+# Warning: cxacru-cf.bin with MD5 hash cdbac2689969d5ed5d4850f117702110
+# contains mis-aligned values which will stop the modem from being able
+# to make a connection. If the first and last two bytes are removed then
+# the values become valid, but the modulation will be forced to ANSI
+# T1.413 only which may not be appropriate.
+#
+# The original binary format is a packed list of le32 values.
+
+import sys
+import struct
+
+i = 0
+while True:
+ buf = sys.stdin.read(4)
+
+ if len(buf) == 0:
+ break
+ elif len(buf) != 4:
+ sys.stdout.write("\n")
+ sys.stderr.write("Error: read {0} not 4 bytes\n".format(len(buf)))
+ sys.exit(1)
+
+ if i > 0:
+ sys.stdout.write(" ")
+ sys.stdout.write("{0:x}={1}".format(i, struct.unpack("<I", buf)[0]))
+ i += 1
+
+sys.stdout.write("\n")
diff --git a/Documentation/networking/cxacru.txt b/Documentation/networking/cxacru.txt
new file mode 100644
index 000000000..2cce04457
--- /dev/null
+++ b/Documentation/networking/cxacru.txt
@@ -0,0 +1,100 @@
+Firmware is required for this device: http://accessrunner.sourceforge.net/
+
+While it is capable of managing/maintaining the ADSL connection without the
+module loaded, the device will sometimes stop responding after unloading the
+driver and it is necessary to unplug/remove power to the device to fix this.
+
+Note: support for cxacru-cf.bin has been removed. It was not loaded correctly
+so it had no effect on the device configuration. Fixing it could have stopped
+existing devices working when an invalid configuration is supplied.
+
+There is a script cxacru-cf.py to convert an existing file to the sysfs form.
+
+Detected devices will appear as ATM devices named "cxacru". In /sys/class/atm/
+these are directories named cxacruN where N is the device number. A symlink
+named device points to the USB interface device's directory which contains
+several sysfs attribute files for retrieving device statistics:
+
+* adsl_controller_version
+
+* adsl_headend
+* adsl_headend_environment
+ Information about the remote headend.
+
+* adsl_config
+ Configuration writing interface.
+ Write parameters in hexadecimal format <index>=<value>,
+ separated by whitespace, e.g.:
+ "1=0 a=5"
+ Up to 7 parameters at a time will be sent and the modem will restart
+ the ADSL connection when any value is set. These are logged for future
+ reference.
+
+* downstream_attenuation (dB)
+* downstream_bits_per_frame
+* downstream_rate (kbps)
+* downstream_snr_margin (dB)
+ Downstream stats.
+
+* upstream_attenuation (dB)
+* upstream_bits_per_frame
+* upstream_rate (kbps)
+* upstream_snr_margin (dB)
+* transmitter_power (dBm/Hz)
+ Upstream stats.
+
+* downstream_crc_errors
+* downstream_fec_errors
+* downstream_hec_errors
+* upstream_crc_errors
+* upstream_fec_errors
+* upstream_hec_errors
+ Error counts.
+
+* line_startable
+ Indicates that ADSL support on the device
+ is/can be enabled, see adsl_start.
+
+* line_status
+ "initialising"
+ "down"
+ "attempting to activate"
+ "training"
+ "channel analysis"
+ "exchange"
+ "waiting"
+ "up"
+
+ Changes between "down" and "attempting to activate"
+ if there is no signal.
+
+* link_status
+ "not connected"
+ "connected"
+ "lost"
+
+* mac_address
+
+* modulation
+ "" (when not connected)
+ "ANSI T1.413"
+ "ITU-T G.992.1 (G.DMT)"
+ "ITU-T G.992.2 (G.LITE)"
+
+* startup_attempts
+ Count of total attempts to initialise ADSL.
+
+To enable/disable ADSL, the following can be written to the adsl_state file:
+ "start"
+ "stop
+ "restart" (stops, waits 1.5s, then starts)
+ "poll" (used to resume status polling if it was disabled due to failure)
+
+Changes in adsl/line state are reported via kernel log messages:
+ [4942145.150704] ATM dev 0: ADSL state: running
+ [4942243.663766] ATM dev 0: ADSL line: down
+ [4942249.665075] ATM dev 0: ADSL line: attempting to activate
+ [4942253.654954] ATM dev 0: ADSL line: training
+ [4942255.666387] ATM dev 0: ADSL line: channel analysis
+ [4942259.656262] ATM dev 0: ADSL line: exchange
+ [2635357.696901] ATM dev 0: ADSL line: up (8128 kb/s down | 832 kb/s up)
diff --git a/Documentation/networking/cxgb.txt b/Documentation/networking/cxgb.txt
new file mode 100644
index 000000000..20a887615
--- /dev/null
+++ b/Documentation/networking/cxgb.txt
@@ -0,0 +1,352 @@
+ Chelsio N210 10Gb Ethernet Network Controller
+
+ Driver Release Notes for Linux
+
+ Version 2.1.1
+
+ June 20, 2005
+
+CONTENTS
+========
+ INTRODUCTION
+ FEATURES
+ PERFORMANCE
+ DRIVER MESSAGES
+ KNOWN ISSUES
+ SUPPORT
+
+
+INTRODUCTION
+============
+
+ This document describes the Linux driver for Chelsio 10Gb Ethernet Network
+ Controller. This driver supports the Chelsio N210 NIC and is backward
+ compatible with the Chelsio N110 model 10Gb NICs.
+
+
+FEATURES
+========
+
+ Adaptive Interrupts (adaptive-rx)
+ ---------------------------------
+
+ This feature provides an adaptive algorithm that adjusts the interrupt
+ coalescing parameters, allowing the driver to dynamically adapt the latency
+ settings to achieve the highest performance during various types of network
+ load.
+
+ The interface used to control this feature is ethtool. Please see the
+ ethtool manpage for additional usage information.
+
+ By default, adaptive-rx is disabled.
+ To enable adaptive-rx:
+
+ ethtool -C <interface> adaptive-rx on
+
+ To disable adaptive-rx, use ethtool:
+
+ ethtool -C <interface> adaptive-rx off
+
+ After disabling adaptive-rx, the timer latency value will be set to 50us.
+ You may set the timer latency after disabling adaptive-rx:
+
+ ethtool -C <interface> rx-usecs <microseconds>
+
+ An example to set the timer latency value to 100us on eth0:
+
+ ethtool -C eth0 rx-usecs 100
+
+ You may also provide a timer latency value while disabling adaptive-rx:
+
+ ethtool -C <interface> adaptive-rx off rx-usecs <microseconds>
+
+ If adaptive-rx is disabled and a timer latency value is specified, the timer
+ will be set to the specified value until changed by the user or until
+ adaptive-rx is enabled.
+
+ To view the status of the adaptive-rx and timer latency values:
+
+ ethtool -c <interface>
+
+
+ TCP Segmentation Offloading (TSO) Support
+ -----------------------------------------
+
+ This feature, also known as "large send", enables a system's protocol stack
+ to offload portions of outbound TCP processing to a network interface card
+ thereby reducing system CPU utilization and enhancing performance.
+
+ The interface used to control this feature is ethtool version 1.8 or higher.
+ Please see the ethtool manpage for additional usage information.
+
+ By default, TSO is enabled.
+ To disable TSO:
+
+ ethtool -K <interface> tso off
+
+ To enable TSO:
+
+ ethtool -K <interface> tso on
+
+ To view the status of TSO:
+
+ ethtool -k <interface>
+
+
+PERFORMANCE
+===========
+
+ The following information is provided as an example of how to change system
+ parameters for "performance tuning" an what value to use. You may or may not
+ want to change these system parameters, depending on your server/workstation
+ application. Doing so is not warranted in any way by Chelsio Communications,
+ and is done at "YOUR OWN RISK". Chelsio will not be held responsible for loss
+ of data or damage to equipment.
+
+ Your distribution may have a different way of doing things, or you may prefer
+ a different method. These commands are shown only to provide an example of
+ what to do and are by no means definitive.
+
+ Making any of the following system changes will only last until you reboot
+ your system. You may want to write a script that runs at boot-up which
+ includes the optimal settings for your system.
+
+ Setting PCI Latency Timer:
+ setpci -d 1425:* 0x0c.l=0x0000F800
+
+ Disabling TCP timestamp:
+ sysctl -w net.ipv4.tcp_timestamps=0
+
+ Disabling SACK:
+ sysctl -w net.ipv4.tcp_sack=0
+
+ Setting large number of incoming connection requests:
+ sysctl -w net.ipv4.tcp_max_syn_backlog=3000
+
+ Setting maximum receive socket buffer size:
+ sysctl -w net.core.rmem_max=1024000
+
+ Setting maximum send socket buffer size:
+ sysctl -w net.core.wmem_max=1024000
+
+ Set smp_affinity (on a multiprocessor system) to a single CPU:
+ echo 1 > /proc/irq/<interrupt_number>/smp_affinity
+
+ Setting default receive socket buffer size:
+ sysctl -w net.core.rmem_default=524287
+
+ Setting default send socket buffer size:
+ sysctl -w net.core.wmem_default=524287
+
+ Setting maximum option memory buffers:
+ sysctl -w net.core.optmem_max=524287
+
+ Setting maximum backlog (# of unprocessed packets before kernel drops):
+ sysctl -w net.core.netdev_max_backlog=300000
+
+ Setting TCP read buffers (min/default/max):
+ sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
+
+ Setting TCP write buffers (min/pressure/max):
+ sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
+
+ Setting TCP buffer space (min/pressure/max):
+ sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000"
+
+ TCP window size for single connections:
+ The receive buffer (RX_WINDOW) size must be at least as large as the
+ Bandwidth-Delay Product of the communication link between the sender and
+ receiver. Due to the variations of RTT, you may want to increase the buffer
+ size up to 2 times the Bandwidth-Delay Product. Reference page 289 of
+ "TCP/IP Illustrated, Volume 1, The Protocols" by W. Richard Stevens.
+ At 10Gb speeds, use the following formula:
+ RX_WINDOW >= 1.25MBytes * RTT(in milliseconds)
+ Example for RTT with 100us: RX_WINDOW = (1,250,000 * 0.1) = 125,000
+ RX_WINDOW sizes of 256KB - 512KB should be sufficient.
+ Setting the min, max, and default receive buffer (RX_WINDOW) size:
+ sysctl -w net.ipv4.tcp_rmem="<min> <default> <max>"
+
+ TCP window size for multiple connections:
+ The receive buffer (RX_WINDOW) size may be calculated the same as single
+ connections, but should be divided by the number of connections. The
+ smaller window prevents congestion and facilitates better pacing,
+ especially if/when MAC level flow control does not work well or when it is
+ not supported on the machine. Experimentation may be necessary to attain
+ the correct value. This method is provided as a starting point for the
+ correct receive buffer size.
+ Setting the min, max, and default receive buffer (RX_WINDOW) size is
+ performed in the same manner as single connection.
+
+
+DRIVER MESSAGES
+===============
+
+ The following messages are the most common messages logged by syslog. These
+ may be found in /var/log/messages.
+
+ Driver up:
+ Chelsio Network Driver - version 2.1.1
+
+ NIC detected:
+ eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit
+
+ Link up:
+ eth#: link is up at 10 Gbps, full duplex
+
+ Link down:
+ eth#: link is down
+
+
+KNOWN ISSUES
+============
+
+ These issues have been identified during testing. The following information
+ is provided as a workaround to the problem. In some cases, this problem is
+ inherent to Linux or to a particular Linux Distribution and/or hardware
+ platform.
+
+ 1. Large number of TCP retransmits on a multiprocessor (SMP) system.
+
+ On a system with multiple CPUs, the interrupt (IRQ) for the network
+ controller may be bound to more than one CPU. This will cause TCP
+ retransmits if the packet data were to be split across different CPUs
+ and re-assembled in a different order than expected.
+
+ To eliminate the TCP retransmits, set smp_affinity on the particular
+ interrupt to a single CPU. You can locate the interrupt (IRQ) used on
+ the N110/N210 by using ifconfig:
+ ifconfig <dev_name> | grep Interrupt
+ Set the smp_affinity to a single CPU:
+ echo 1 > /proc/irq/<interrupt_number>/smp_affinity
+
+ It is highly suggested that you do not run the irqbalance daemon on your
+ system, as this will change any smp_affinity setting you have applied.
+ The irqbalance daemon runs on a 10 second interval and binds interrupts
+ to the least loaded CPU determined by the daemon. To disable this daemon:
+ chkconfig --level 2345 irqbalance off
+
+ By default, some Linux distributions enable the kernel feature,
+ irqbalance, which performs the same function as the daemon. To disable
+ this feature, add the following line to your bootloader:
+ noirqbalance
+
+ Example using the Grub bootloader:
+ title Red Hat Enterprise Linux AS (2.4.21-27.ELsmp)
+ root (hd0,0)
+ kernel /vmlinuz-2.4.21-27.ELsmp ro root=/dev/hda3 noirqbalance
+ initrd /initrd-2.4.21-27.ELsmp.img
+
+ 2. After running insmod, the driver is loaded and the incorrect network
+ interface is brought up without running ifup.
+
+ When using 2.4.x kernels, including RHEL kernels, the Linux kernel
+ invokes a script named "hotplug". This script is primarily used to
+ automatically bring up USB devices when they are plugged in, however,
+ the script also attempts to automatically bring up a network interface
+ after loading the kernel module. The hotplug script does this by scanning
+ the ifcfg-eth# config files in /etc/sysconfig/network-scripts, looking
+ for HWADDR=<mac_address>.
+
+ If the hotplug script does not find the HWADDRR within any of the
+ ifcfg-eth# files, it will bring up the device with the next available
+ interface name. If this interface is already configured for a different
+ network card, your new interface will have incorrect IP address and
+ network settings.
+
+ To solve this issue, you can add the HWADDR=<mac_address> key to the
+ interface config file of your network controller.
+
+ To disable this "hotplug" feature, you may add the driver (module name)
+ to the "blacklist" file located in /etc/hotplug. It has been noted that
+ this does not work for network devices because the net.agent script
+ does not use the blacklist file. Simply remove, or rename, the net.agent
+ script located in /etc/hotplug to disable this feature.
+
+ 3. Transport Protocol (TP) hangs when running heavy multi-connection traffic
+ on an AMD Opteron system with HyperTransport PCI-X Tunnel chipset.
+
+ If your AMD Opteron system uses the AMD-8131 HyperTransport PCI-X Tunnel
+ chipset, you may experience the "133-Mhz Mode Split Completion Data
+ Corruption" bug identified by AMD while using a 133Mhz PCI-X card on the
+ bus PCI-X bus.
+
+ AMD states, "Under highly specific conditions, the AMD-8131 PCI-X Tunnel
+ can provide stale data via split completion cycles to a PCI-X card that
+ is operating at 133 Mhz", causing data corruption.
+
+ AMD's provides three workarounds for this problem, however, Chelsio
+ recommends the first option for best performance with this bug:
+
+ For 133Mhz secondary bus operation, limit the transaction length and
+ the number of outstanding transactions, via BIOS configuration
+ programming of the PCI-X card, to the following:
+
+ Data Length (bytes): 1k
+ Total allowed outstanding transactions: 2
+
+ Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004,
+ section 56, "133-MHz Mode Split Completion Data Corruption" for more
+ details with this bug and workarounds suggested by AMD.
+
+ It may be possible to work outside AMD's recommended PCI-X settings, try
+ increasing the Data Length to 2k bytes for increased performance. If you
+ have issues with these settings, please revert to the "safe" settings
+ and duplicate the problem before submitting a bug or asking for support.
+
+ NOTE: The default setting on most systems is 8 outstanding transactions
+ and 2k bytes data length.
+
+ 4. On multiprocessor systems, it has been noted that an application which
+ is handling 10Gb networking can switch between CPUs causing degraded
+ and/or unstable performance.
+
+ If running on an SMP system and taking performance measurements, it
+ is suggested you either run the latest netperf-2.4.0+ or use a binding
+ tool such as Tim Hockin's procstate utilities (runon)
+ <http://www.hockin.org/~thockin/procstate/>.
+
+ Binding netserver and netperf (or other applications) to particular
+ CPUs will have a significant difference in performance measurements.
+ You may need to experiment which CPU to bind the application to in
+ order to achieve the best performance for your system.
+
+ If you are developing an application designed for 10Gb networking,
+ please keep in mind you may want to look at kernel functions
+ sched_setaffinity & sched_getaffinity to bind your application.
+
+ If you are just running user-space applications such as ftp, telnet,
+ etc., you may want to try the runon tool provided by Tim Hockin's
+ procstate utility. You could also try binding the interface to a
+ particular CPU: runon 0 ifup eth0
+
+
+SUPPORT
+=======
+
+ If you have problems with the software or hardware, please contact our
+ customer support team via email at support@chelsio.com or check our website
+ at http://www.chelsio.com
+
+===============================================================================
+
+ Chelsio Communications
+ 370 San Aleso Ave.
+ Suite 100
+ Sunnyvale, CA 94085
+ http://www.chelsio.com
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License, version 2, as
+published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+ Copyright (c) 2003-2005 Chelsio Communications. All rights reserved.
+
+===============================================================================
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
new file mode 100644
index 000000000..55c575fca
--- /dev/null
+++ b/Documentation/networking/dccp.txt
@@ -0,0 +1,207 @@
+DCCP protocol
+=============
+
+
+Contents
+========
+- Introduction
+- Missing features
+- Socket options
+- Sysctl variables
+- IOCTLs
+- Other tunables
+- Notes
+
+
+Introduction
+============
+Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
+oriented protocol designed to solve issues present in UDP and TCP, particularly
+for real-time and multimedia (streaming) traffic.
+It divides into a base protocol (RFC 4340) and pluggable congestion control
+modules called CCIDs. Like pluggable TCP congestion control, at least one CCID
+needs to be enabled in order for the protocol to function properly. In the Linux
+implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as
+the TCP-friendly CCID3 (RFC 4342), are optional.
+For a brief introduction to CCIDs and suggestions for choosing a CCID to match
+given applications, see section 10 of RFC 4340.
+
+It has a base protocol and pluggable congestion control IDs (CCIDs).
+
+DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol
+is at http://www.ietf.org/html.charters/dccp-charter.html
+
+
+Missing features
+================
+The Linux DCCP implementation does not currently support all the features that are
+specified in RFCs 4340...42.
+
+The known bugs are at:
+ http://www.linuxfoundation.org/collaborate/workgroups/networking/todo#DCCP
+
+For more up-to-date versions of the DCCP implementation, please consider using
+the experimental DCCP test tree; instructions for checking this out are on:
+http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp_testing#Experimental_DCCP_source_tree
+
+
+Socket options
+==============
+DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
+a policy ID as argument and can only be set before the connection (i.e. changes
+during an established connection are not supported). Currently, two policies are
+defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
+and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
+u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
+a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
+be formatted using a cmsg(3) message header filled in as follows:
+ cmsg->cmsg_level = SOL_DCCP;
+ cmsg->cmsg_type = DCCP_SCM_PRIORITY;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); /* or CMSG_LEN(4) */
+
+DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
+value is always interpreted as unbounded queue length. If different from zero,
+the interpretation of this parameter depends on the current dequeuing policy
+(see above): the "simple" policy will enforce a fixed queue size by returning
+EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
+lowest-priority packet first. The default value for this parameter is
+initialised from /proc/sys/net/dccp/default/tx_qlen.
+
+DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
+service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
+the socket will fall back to 0 (which means that no meaningful service code
+is present). On active sockets this is set before connect(); specifying more
+than one code has no effect (all subsequent service codes are ignored). The
+case is different for passive sockets, where multiple service codes (up to 32)
+can be set before calling bind().
+
+DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
+size (application payload size) in bytes, see RFC 4340, section 14.
+
+DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
+supported by the endpoint. The option value is an array of type uint8_t whose
+size is passed as option length. The minimum array size is 4 elements, the
+value returned in the optlen argument always reflects the true number of
+built-in CCIDs.
+
+DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
+time, combining the operation of the next two socket options. This option is
+preferable over the latter two, since often applications will use the same
+type of CCID for both directions; and mixed use of CCIDs is not currently well
+understood. This socket option takes as argument at least one uint8_t value, or
+an array of uint8_t values, which must match available CCIDS (see above). CCIDs
+must be registered on the socket before calling connect() or listen().
+
+DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
+the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
+Please note that the getsockopt argument type here is `int', not uint8_t.
+
+DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
+
+DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
+timewait state when closing the connection (RFC 4340, 8.3). The usual case is
+that the closing server sends a CloseReq, whereupon the client holds timewait
+state. When this boolean socket option is on, the server sends a Close instead
+and will enter TIMEWAIT. This option must be set after accept() returns.
+
+DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
+partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
+always cover the entire packet and that only fully covered application data is
+accepted by the receiver. Hence, when using this feature on the sender, it must
+be enabled at the receiver, too with suitable choice of CsCov.
+
+DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
+ range 0..15 are acceptable. The default setting is 0 (full coverage),
+ values between 1..15 indicate partial coverage.
+DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
+ sets a threshold, where again values 0..15 are acceptable. The default
+ of 0 means that all packets with a partial coverage will be discarded.
+ Values in the range 1..15 indicate that packets with minimally such a
+ coverage value are also acceptable. The higher the number, the more
+ restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
+ settings are inherited to the child socket after accept().
+
+The following two options apply to CCID 3 exclusively and are getsockopt()-only.
+In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
+DCCP_SOCKOPT_CCID_RX_INFO
+ Returns a `struct tfrc_rx_info' in optval; the buffer for optval and
+ optlen must be set to at least sizeof(struct tfrc_rx_info).
+DCCP_SOCKOPT_CCID_TX_INFO
+ Returns a `struct tfrc_tx_info' in optval; the buffer for optval and
+ optlen must be set to at least sizeof(struct tfrc_tx_info).
+
+On unidirectional connections it is useful to close the unused half-connection
+via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs.
+
+
+Sysctl variables
+================
+Several DCCP default parameters can be managed by the following sysctls
+(sysctl net.dccp.default or /proc/sys/net/dccp/default):
+
+request_retries
+ The number of active connection initiation retries (the number of
+ Requests minus one) before timing out. In addition, it also governs
+ the behaviour of the other, passive side: this variable also sets
+ the number of times DCCP repeats sending a Response when the initial
+ handshake does not progress from RESPOND to OPEN (i.e. when no Ack
+ is received after the initial Request). This value should be greater
+ than 0, suggested is less than 10. Analogue of tcp_syn_retries.
+
+retries1
+ How often a DCCP Response is retransmitted until the listening DCCP
+ side considers its connecting peer dead. Analogue of tcp_retries1.
+
+retries2
+ The number of times a general DCCP packet is retransmitted. This has
+ importance for retransmitted acknowledgments and feature negotiation,
+ data packets are never retransmitted. Analogue of tcp_retries2.
+
+tx_ccid = 2
+ Default CCID for the sender-receiver half-connection. Depending on the
+ choice of CCID, the Send Ack Vector feature is enabled automatically.
+
+rx_ccid = 2
+ Default CCID for the receiver-sender half-connection; see tx_ccid.
+
+seq_window = 100
+ The initial sequence window (sec. 7.5.2) of the sender. This influences
+ the local ackno validity and the remote seqno validity windows (7.5.1).
+ Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set.
+
+tx_qlen = 5
+ The size of the transmit buffer in packets. A value of 0 corresponds
+ to an unbounded transmit buffer.
+
+sync_ratelimit = 125 ms
+ The timeout between subsequent DCCP-Sync packets sent in response to
+ sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
+ of this parameter is milliseconds; a value of 0 disables rate-limiting.
+
+
+IOCTLS
+======
+FIONREAD
+ Works as in udp(7): returns in the `int' argument pointer the size of
+ the next pending datagram in bytes, or 0 when no datagram is pending.
+
+
+Other tunables
+==============
+Per-route rto_min support
+ CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value
+ of the RTO timer. This setting can be modified via the 'rto_min' option
+ of iproute2; for example:
+ > ip route change 10.0.0.0/24 rto_min 250j dev wlan0
+ > ip route add 10.0.0.254/32 rto_min 800j dev wlan0
+ > ip route show dev wlan0
+ CCID-3 also supports the rto_min setting: it is used to define the lower
+ bound for the expiry of the nofeedback timer. This can be useful on LANs
+ with very low RTTs (e.g., loopback, Gbit ethernet).
+
+
+Notes
+=====
+DCCP does not travel through NAT successfully at present on many boxes. This is
+because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
+support for DCCP has been added.
diff --git a/Documentation/networking/dctcp.txt b/Documentation/networking/dctcp.txt
new file mode 100644
index 000000000..0d5dfbc89
--- /dev/null
+++ b/Documentation/networking/dctcp.txt
@@ -0,0 +1,43 @@
+DCTCP (DataCenter TCP)
+----------------------
+
+DCTCP is an enhancement to the TCP congestion control algorithm for data
+center networks and leverages Explicit Congestion Notification (ECN) in
+the data center network to provide multi-bit feedback to the end hosts.
+
+To enable it on end hosts:
+
+ sysctl -w net.ipv4.tcp_congestion_control=dctcp
+
+All switches in the data center network running DCTCP must support ECN
+marking and be configured for marking when reaching defined switch buffer
+thresholds. The default ECN marking threshold heuristic for DCTCP on
+switches is 20 packets (30KB) at 1Gbps, and 65 packets (~100KB) at 10Gbps,
+but might need further careful tweaking.
+
+For more details, see below documents:
+
+Paper:
+
+The algorithm is further described in detail in the following two
+SIGCOMM/SIGMETRICS papers:
+
+ i) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye,
+ Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan:
+ "Data Center TCP (DCTCP)", Data Center Networks session
+ Proc. ACM SIGCOMM, New Delhi, 2010.
+ http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+ http://www.sigcomm.org/ccr/papers/2010/October/1851275.1851192
+
+ii) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar:
+ "Analysis of DCTCP: Stability, Convergence, and Fairness"
+ Proc. ACM SIGMETRICS, San Jose, 2011.
+ http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
+
+IETF informational draft:
+
+ http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00
+
+DCTCP site:
+
+ http://simula.stanford.edu/~alizade/Site/DCTCP.html
diff --git a/Documentation/networking/de4x5.txt b/Documentation/networking/de4x5.txt
new file mode 100644
index 000000000..c8e4ca9b2
--- /dev/null
+++ b/Documentation/networking/de4x5.txt
@@ -0,0 +1,178 @@
+ Originally, this driver was written for the Digital Equipment
+ Corporation series of EtherWORKS Ethernet cards:
+
+ DE425 TP/COAX EISA
+ DE434 TP PCI
+ DE435 TP/COAX/AUI PCI
+ DE450 TP/COAX/AUI PCI
+ DE500 10/100 PCI Fasternet
+
+ but it will now attempt to support all cards which conform to the
+ Digital Semiconductor SROM Specification. The driver currently
+ recognises the following chips:
+
+ DC21040 (no SROM)
+ DC21041[A]
+ DC21140[A]
+ DC21142
+ DC21143
+
+ So far the driver is known to work with the following cards:
+
+ KINGSTON
+ Linksys
+ ZNYX342
+ SMC8432
+ SMC9332 (w/new SROM)
+ ZNYX31[45]
+ ZNYX346 10/100 4 port (can act as a 10/100 bridge!)
+
+ The driver has been tested on a relatively busy network using the DE425,
+ DE434, DE435 and DE500 cards and benchmarked with 'ttcp': it transferred
+ 16M of data to a DECstation 5000/200 as follows:
+
+ TCP UDP
+ TX RX TX RX
+ DE425 1030k 997k 1170k 1128k
+ DE434 1063k 995k 1170k 1125k
+ DE435 1063k 995k 1170k 1125k
+ DE500 1063k 998k 1170k 1125k in 10Mb/s mode
+
+ All values are typical (in kBytes/sec) from a sample of 4 for each
+ measurement. Their error is +/-20k on a quiet (private) network and also
+ depend on what load the CPU has.
+
+ =========================================================================
+
+ The ability to load this driver as a loadable module has been included
+ and used extensively during the driver development (to save those long
+ reboot sequences). Loadable module support under PCI and EISA has been
+ achieved by letting the driver autoprobe as if it were compiled into the
+ kernel. Do make sure you're not sharing interrupts with anything that
+ cannot accommodate interrupt sharing!
+
+ To utilise this ability, you have to do 8 things:
+
+ 0) have a copy of the loadable modules code installed on your system.
+ 1) copy de4x5.c from the /linux/drivers/net directory to your favourite
+ temporary directory.
+ 2) for fixed autoprobes (not recommended), edit the source code near
+ line 5594 to reflect the I/O address you're using, or assign these when
+ loading by:
+
+ insmod de4x5 io=0xghh where g = bus number
+ hh = device number
+
+ NB: autoprobing for modules is now supported by default. You may just
+ use:
+
+ insmod de4x5
+
+ to load all available boards. For a specific board, still use
+ the 'io=?' above.
+ 3) compile de4x5.c, but include -DMODULE in the command line to ensure
+ that the correct bits are compiled (see end of source code).
+ 4) if you are wanting to add a new card, goto 5. Otherwise, recompile a
+ kernel with the de4x5 configuration turned off and reboot.
+ 5) insmod de4x5 [io=0xghh]
+ 6) run the net startup bits for your new eth?? interface(s) manually
+ (usually /etc/rc.inet[12] at boot time).
+ 7) enjoy!
+
+ To unload a module, turn off the associated interface(s)
+ 'ifconfig eth?? down' then 'rmmod de4x5'.
+
+ Automedia detection is included so that in principle you can disconnect
+ from, e.g. TP, reconnect to BNC and things will still work (after a
+ pause whilst the driver figures out where its media went). My tests
+ using ping showed that it appears to work....
+
+ By default, the driver will now autodetect any DECchip based card.
+ Should you have a need to restrict the driver to DIGITAL only cards, you
+ can compile with a DEC_ONLY define, or if loading as a module, use the
+ 'dec_only=1' parameter.
+
+ I've changed the timing routines to use the kernel timer and scheduling
+ functions so that the hangs and other assorted problems that occurred
+ while autosensing the media should be gone. A bonus for the DC21040
+ auto media sense algorithm is that it can now use one that is more in
+ line with the rest (the DC21040 chip doesn't have a hardware timer).
+ The downside is the 1 'jiffies' (10ms) resolution.
+
+ IEEE 802.3u MII interface code has been added in anticipation that some
+ products may use it in the future.
+
+ The SMC9332 card has a non-compliant SROM which needs fixing - I have
+ patched this driver to detect it because the SROM format used complies
+ to a previous DEC-STD format.
+
+ I have removed the buffer copies needed for receive on Intels. I cannot
+ remove them for Alphas since the Tulip hardware only does longword
+ aligned DMA transfers and the Alphas get alignment traps with non
+ longword aligned data copies (which makes them really slow). No comment.
+
+ I have added SROM decoding routines to make this driver work with any
+ card that supports the Digital Semiconductor SROM spec. This will help
+ all cards running the dc2114x series chips in particular. Cards using
+ the dc2104x chips should run correctly with the basic driver. I'm in
+ debt to <mjacob@feral.com> for the testing and feedback that helped get
+ this feature working. So far we have tested KINGSTON, SMC8432, SMC9332
+ (with the latest SROM complying with the SROM spec V3: their first was
+ broken), ZNYX342 and LinkSys. ZNYX314 (dual 21041 MAC) and ZNYX 315
+ (quad 21041 MAC) cards also appear to work despite their incorrectly
+ wired IRQs.
+
+ I have added a temporary fix for interrupt problems when some SCSI cards
+ share the same interrupt as the DECchip based cards. The problem occurs
+ because the SCSI card wants to grab the interrupt as a fast interrupt
+ (runs the service routine with interrupts turned off) vs. this card
+ which really needs to run the service routine with interrupts turned on.
+ This driver will now add the interrupt service routine as a fast
+ interrupt if it is bounced from the slow interrupt. THIS IS NOT A
+ RECOMMENDED WAY TO RUN THE DRIVER and has been done for a limited time
+ until people sort out their compatibility issues and the kernel
+ interrupt service code is fixed. YOU SHOULD SEPARATE OUT THE FAST
+ INTERRUPT CARDS FROM THE SLOW INTERRUPT CARDS to ensure that they do not
+ run on the same interrupt. PCMCIA/CardBus is another can of worms...
+
+ Finally, I think I have really fixed the module loading problem with
+ more than one DECchip based card. As a side effect, I don't mess with
+ the device structure any more which means that if more than 1 card in
+ 2.0.x is installed (4 in 2.1.x), the user will have to edit
+ linux/drivers/net/Space.c to make room for them. Hence, module loading
+ is the preferred way to use this driver, since it doesn't have this
+ limitation.
+
+ Where SROM media detection is used and full duplex is specified in the
+ SROM, the feature is ignored unless lp->params.fdx is set at compile
+ time OR during a module load (insmod de4x5 args='eth??:fdx' [see
+ below]). This is because there is no way to automatically detect full
+ duplex links except through autonegotiation. When I include the
+ autonegotiation feature in the SROM autoconf code, this detection will
+ occur automatically for that case.
+
+ Command line arguments are now allowed, similar to passing arguments
+ through LILO. This will allow a per adapter board set up of full duplex
+ and media. The only lexical constraints are: the board name (dev->name)
+ appears in the list before its parameters. The list of parameters ends
+ either at the end of the parameter list or with another board name. The
+ following parameters are allowed:
+
+ fdx for full duplex
+ autosense to set the media/speed; with the following
+ sub-parameters:
+ TP, TP_NW, BNC, AUI, BNC_AUI, 100Mb, 10Mb, AUTO
+
+ Case sensitivity is important for the sub-parameters. They *must* be
+ upper case. Examples:
+
+ insmod de4x5 args='eth1:fdx autosense=BNC eth0:autosense=100Mb'.
+
+ For a compiled in driver, in linux/drivers/net/CONFIG, place e.g.
+ DE4X5_OPTS = -DDE4X5_PARM='"eth0:fdx autosense=AUI eth2:autosense=TP"'
+
+ Yes, I know full duplex isn't permissible on BNC or AUI; they're just
+ examples. By default, full duplex is turned off and AUTO is the default
+ autosense setting. In reality, I expect only the full duplex option to
+ be used. Note the use of single quotes in the two examples above and the
+ lack of commas to separate items.
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt
new file mode 100644
index 000000000..e12a4900c
--- /dev/null
+++ b/Documentation/networking/decnet.txt
@@ -0,0 +1,232 @@
+ Linux DECnet Networking Layer Information
+ ===========================================
+
+1) Other documentation....
+
+ o Project Home Pages
+ http://www.chygwyn.com/ - Kernel info
+ http://linux-decnet.sourceforge.net/ - Userland tools
+ http://www.sourceforge.net/projects/linux-decnet/ - Status page
+
+2) Configuring the kernel
+
+Be sure to turn on the following options:
+
+ CONFIG_DECNET (obviously)
+ CONFIG_PROC_FS (to see what's going on)
+ CONFIG_SYSCTL (for easy configuration)
+
+if you want to try out router support (not properly debugged yet)
+you'll need the following options as well...
+
+ CONFIG_DECNET_ROUTER (to be able to add/delete routes)
+ CONFIG_NETFILTER (will be required for the DECnet routing daemon)
+
+ CONFIG_DECNET_ROUTE_FWMARK is optional
+
+Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
+that you need it, in general you won't and it can cause ifconfig to
+malfunction.
+
+Run time configuration has changed slightly from the 2.4 system. If you
+want to configure an endnode, then the simplified procedure is as follows:
+
+ o Set the MAC address on your ethernet card before starting _any_ other
+ network protocols.
+
+As soon as your network card is brought into the UP state, DECnet should
+start working. If you need something more complicated or are unsure how
+to set the MAC address, see the next section. Also all configurations which
+worked with 2.4 will work under 2.5 with no change.
+
+3) Command line options
+
+You can set a DECnet address on the kernel command line for compatibility
+with the 2.4 configuration procedure, but in general it's not needed any more.
+If you do st a DECnet address on the command line, it has only one purpose
+which is that its added to the addresses on the loopback device.
+
+With 2.4 kernels, DECnet would only recognise addresses as local if they
+were added to the loopback device. In 2.5, any local interface address
+can be used to loop back to the local machine. Of course this does not
+prevent you adding further addresses to the loopback device if you
+want to.
+
+N.B. Since the address list of an interface determines the addresses for
+which "hello" messages are sent, if you don't set an address on the loopback
+interface then you won't see any entries in /proc/net/neigh for the local
+host until such time as you start a connection. This doesn't affect the
+operation of the local communications in any other way though.
+
+The kernel command line takes options looking like the following:
+
+ decnet.addr=1,2
+
+the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels
+and early 2.3.xx kernels, you must use a comma when specifying the
+DECnet address like this. For more recent 2.3.xx kernels, you may
+use almost any character except space, although a `.` would be the most
+obvious choice :-)
+
+There used to be a third number specifying the node type. This option
+has gone away in favour of a per interface node type. This is now set
+using /proc/sys/net/decnet/conf/<dev>/forwarding. This file can be
+set with a single digit, 0=EndNode, 1=L1 Router and 2=L2 Router.
+
+There are also equivalent options for modules. The node address can
+also be set through the /proc/sys/net/decnet/ files, as can other system
+parameters.
+
+Currently the only supported devices are ethernet and ip_gre. The
+ethernet address of your ethernet card has to be set according to the DECnet
+address of the node in order for it to be autoconfigured (and then appear in
+/proc/net/decnet_dev). There is a utility available at the above
+FTP sites called dn2ethaddr which can compute the correct ethernet
+address to use. The address can be set by ifconfig either before or
+at the time the device is brought up. If you are using RedHat you can
+add the line:
+
+ MACADDR=AA:00:04:00:03:04
+
+or something similar, to /etc/sysconfig/network-scripts/ifcfg-eth0 or
+wherever your network card's configuration lives. Setting the MAC address
+of your ethernet card to an address starting with "hi-ord" will cause a
+DECnet address which matches to be added to the interface (which you can
+verify with iproute2).
+
+The default device for routing can be set through the /proc filesystem
+by setting /proc/sys/net/decnet/default_device to the
+device you want DECnet to route packets out of when no specific route
+is available. Usually this will be eth0, for example:
+
+ echo -n "eth0" >/proc/sys/net/decnet/default_device
+
+If you don't set the default device, then it will default to the first
+ethernet card which has been autoconfigured as described above. You can
+confirm that by looking in the default_device file of course.
+
+There is a list of what the other files under /proc/sys/net/decnet/ do
+on the kernel patch web site (shown above).
+
+4) Run time kernel configuration
+
+This is either done through the sysctl/proc interface (see the kernel web
+pages for details on what the various options do) or through the iproute2
+package in the same way as IPv4/6 configuration is performed.
+
+Documentation for iproute2 is included with the package, although there is
+as yet no specific section on DECnet, most of the features apply to both
+IP and DECnet, albeit with DECnet addresses instead of IP addresses and
+a reduced functionality.
+
+If you want to configure a DECnet router you'll need the iproute2 package
+since its the _only_ way to add and delete routes currently. Eventually
+there will be a routing daemon to send and receive routing messages for
+each interface and update the kernel routing tables accordingly. The
+routing daemon will use netfilter to listen to routing packets, and
+rtnetlink to update the kernels routing tables.
+
+The DECnet raw socket layer has been removed since it was there purely
+for use by the routing daemon which will now use netfilter (a much cleaner
+and more generic solution) instead.
+
+5) How can I tell if its working ?
+
+Here is a quick guide of what to look for in order to know if your DECnet
+kernel subsystem is working.
+
+ - Is the node address set (see /proc/sys/net/decnet/node_address)
+ - Is the node of the correct type
+ (see /proc/sys/net/decnet/conf/<dev>/forwarding)
+ - Is the Ethernet MAC address of each Ethernet card set to match
+ the DECnet address. If in doubt use the dn2ethaddr utility available
+ at the ftp archive.
+ - If the previous two steps are satisfied, and the Ethernet card is up,
+ you should find that it is listed in /proc/net/decnet_dev and also
+ that it appears as a directory in /proc/sys/net/decnet/conf/. The
+ loopback device (lo) should also appear and is required to communicate
+ within a node.
+ - If you have any DECnet routers on your network, they should appear
+ in /proc/net/decnet_neigh, otherwise this file will only contain the
+ entry for the node itself (if it doesn't check to see if lo is up).
+ - If you want to send to any node which is not listed in the
+ /proc/net/decnet_neigh file, you'll need to set the default device
+ to point to an Ethernet card with connection to a router. This is
+ again done with the /proc/sys/net/decnet/default_device file.
+ - Try starting a simple server and client, like the dnping/dnmirror
+ over the loopback interface. With luck they should communicate.
+ For this step and those after, you'll need the DECnet library
+ which can be obtained from the above ftp sites as well as the
+ actual utilities themselves.
+ - If this seems to work, then try talking to a node on your local
+ network, and see if you can obtain the same results.
+ - At this point you are on your own... :-)
+
+6) How to send a bug report
+
+If you've found a bug and want to report it, then there are several things
+you can do to help me work out exactly what it is that is wrong. Useful
+information (_most_ of which _is_ _essential_) includes:
+
+ - What kernel version are you running ?
+ - What version of the patch are you running ?
+ - How far though the above set of tests can you get ?
+ - What is in the /proc/decnet* files and /proc/sys/net/decnet/* files ?
+ - Which services are you running ?
+ - Which client caused the problem ?
+ - How much data was being transferred ?
+ - Was the network congested ?
+ - How can the problem be reproduced ?
+ - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
+ tcpdump don't understand how to dump DECnet properly, so including
+ the hex listing of the packet contents is _essential_, usually the -x flag.
+ You may also need to increase the length grabbed with the -s flag. The
+ -e flag also provides very useful information (ethernet MAC addresses))
+
+7) MAC FAQ
+
+A quick FAQ on ethernet MAC addresses to explain how Linux and DECnet
+interact and how to get the best performance from your hardware.
+
+Ethernet cards are designed to normally only pass received network frames
+to a host computer when they are addressed to it, or to the broadcast address.
+
+Linux has an interface which allows the setting of extra addresses for
+an ethernet card to listen to. If the ethernet card supports it, the
+filtering operation will be done in hardware, if not the extra unwanted packets
+received will be discarded by the host computer. In the latter case,
+significant processor time and bus bandwidth can be used up on a busy
+network (see the NAPI documentation for a longer explanation of these
+effects).
+
+DECnet makes use of this interface to allow running DECnet on an ethernet
+card which has already been configured using TCP/IP (presumably using the
+built in MAC address of the card, as usual) and/or to allow multiple DECnet
+addresses on each physical interface. If you do this, be aware that if your
+ethernet card doesn't support perfect hashing in its MAC address filter
+then your computer will be doing more work than required. Some cards
+will simply set themselves into promiscuous mode in order to receive
+packets from the DECnet specified addresses. So if you have one of these
+cards its better to set the MAC address of the card as described above
+to gain the best efficiency. Better still is to use a card which supports
+NAPI as well.
+
+
+8) Mailing list
+
+If you are keen to get involved in development, or want to ask questions
+about configuration, or even just report bugs, then there is a mailing
+list that you can join, details are at:
+
+http://sourceforge.net/mail/?group_id=4993
+
+9) Legal Info
+
+The Linux DECnet project team have placed their code under the GPL. The
+software is provided "as is" and without warranty express or implied.
+DECnet is a trademark of Compaq. This software is not a product of
+Compaq. We acknowledge the help of people at Compaq in providing extra
+documentation above and beyond what was previously publicly available.
+
+Steve Whitehouse <SteveW@ACM.org>
+
diff --git a/Documentation/networking/dl2k.txt b/Documentation/networking/dl2k.txt
new file mode 100644
index 000000000..cba74f7a3
--- /dev/null
+++ b/Documentation/networking/dl2k.txt
@@ -0,0 +1,282 @@
+
+ D-Link DL2000-based Gigabit Ethernet Adapter Installation
+ for Linux
+ May 23, 2002
+
+Contents
+========
+ - Compatibility List
+ - Quick Install
+ - Compiling the Driver
+ - Installing the Driver
+ - Option parameter
+ - Configuration Script Sample
+ - Troubleshooting
+
+
+Compatibility List
+=================
+Adapter Support:
+
+D-Link DGE-550T Gigabit Ethernet Adapter.
+D-Link DGE-550SX Gigabit Ethernet Adapter.
+D-Link DL2000-based Gigabit Ethernet Adapter.
+
+
+The driver support Linux kernel 2.4.7 later. We had tested it
+on the environments below.
+
+ . Red Hat v6.2 (update kernel to 2.4.7)
+ . Red Hat v7.0 (update kernel to 2.4.7)
+ . Red Hat v7.1 (kernel 2.4.7)
+ . Red Hat v7.2 (kernel 2.4.7-10)
+
+
+Quick Install
+=============
+Install linux driver as following command:
+
+1. make all
+2. insmod dl2k.ko
+3. ifconfig eth0 up 10.xxx.xxx.xxx netmask 255.0.0.0
+ ^^^^^^^^^^^^^^^\ ^^^^^^^^\
+ IP NETMASK
+Now eth0 should active, you can test it by "ping" or get more information by
+"ifconfig". If tested ok, continue the next step.
+
+4. cp dl2k.ko /lib/modules/`uname -r`/kernel/drivers/net
+5. Add the following line to /etc/modprobe.d/dl2k.conf:
+ alias eth0 dl2k
+6. Run depmod to updated module indexes.
+7. Run "netconfig" or "netconf" to create configuration script ifcfg-eth0
+ located at /etc/sysconfig/network-scripts or create it manually.
+ [see - Configuration Script Sample]
+8. Driver will automatically load and configure at next boot time.
+
+Compiling the Driver
+====================
+ In Linux, NIC drivers are most commonly configured as loadable modules.
+The approach of building a monolithic kernel has become obsolete. The driver
+can be compiled as part of a monolithic kernel, but is strongly discouraged.
+The remainder of this section assumes the driver is built as a loadable module.
+In the Linux environment, it is a good idea to rebuild the driver from the
+source instead of relying on a precompiled version. This approach provides
+better reliability since a precompiled driver might depend on libraries or
+kernel features that are not present in a given Linux installation.
+
+The 3 files necessary to build Linux device driver are dl2k.c, dl2k.h and
+Makefile. To compile, the Linux installation must include the gcc compiler,
+the kernel source, and the kernel headers. The Linux driver supports Linux
+Kernels 2.4.7. Copy the files to a directory and enter the following command
+to compile and link the driver:
+
+CD-ROM drive
+------------
+
+[root@XXX /] mkdir cdrom
+[root@XXX /] mount -r -t iso9660 -o conv=auto /dev/cdrom /cdrom
+[root@XXX /] cd root
+[root@XXX /root] mkdir dl2k
+[root@XXX /root] cd dl2k
+[root@XXX dl2k] cp /cdrom/linux/dl2k.tgz /root/dl2k
+[root@XXX dl2k] tar xfvz dl2k.tgz
+[root@XXX dl2k] make all
+
+Floppy disc drive
+-----------------
+
+[root@XXX /] cd root
+[root@XXX /root] mkdir dl2k
+[root@XXX /root] cd dl2k
+[root@XXX dl2k] mcopy a:/linux/dl2k.tgz /root/dl2k
+[root@XXX dl2k] tar xfvz dl2k.tgz
+[root@XXX dl2k] make all
+
+Installing the Driver
+=====================
+
+ Manual Installation
+ -------------------
+ Once the driver has been compiled, it must be loaded, enabled, and bound
+ to a protocol stack in order to establish network connectivity. To load a
+ module enter the command:
+
+ insmod dl2k.o
+
+ or
+
+ insmod dl2k.o <optional parameter> ; add parameter
+
+ ===============================================================
+ example: insmod dl2k.o media=100mbps_hd
+ or insmod dl2k.o media=3
+ or insmod dl2k.o media=3,2 ; for 2 cards
+ ===============================================================
+
+ Please reference the list of the command line parameters supported by
+ the Linux device driver below.
+
+ The insmod command only loads the driver and gives it a name of the form
+ eth0, eth1, etc. To bring the NIC into an operational state,
+ it is necessary to issue the following command:
+
+ ifconfig eth0 up
+
+ Finally, to bind the driver to the active protocol (e.g., TCP/IP with
+ Linux), enter the following command:
+
+ ifup eth0
+
+ Note that this is meaningful only if the system can find a configuration
+ script that contains the necessary network information. A sample will be
+ given in the next paragraph.
+
+ The commands to unload a driver are as follows:
+
+ ifdown eth0
+ ifconfig eth0 down
+ rmmod dl2k.o
+
+ The following are the commands to list the currently loaded modules and
+ to see the current network configuration.
+
+ lsmod
+ ifconfig
+
+
+ Automated Installation
+ ----------------------
+ This section describes how to install the driver such that it is
+ automatically loaded and configured at boot time. The following description
+ is based on a Red Hat 6.0/7.0 distribution, but it can easily be ported to
+ other distributions as well.
+
+ Red Hat v6.x/v7.x
+ -----------------
+ 1. Copy dl2k.o to the network modules directory, typically
+ /lib/modules/2.x.x-xx/net or /lib/modules/2.x.x/kernel/drivers/net.
+ 2. Locate the boot module configuration file, most commonly in the
+ /etc/modprobe.d/ directory. Add the following lines:
+
+ alias ethx dl2k
+ options dl2k <optional parameters>
+
+ where ethx will be eth0 if the NIC is the only ethernet adapter, eth1 if
+ one other ethernet adapter is installed, etc. Refer to the table in the
+ previous section for the list of optional parameters.
+ 3. Locate the network configuration scripts, normally the
+ /etc/sysconfig/network-scripts directory, and create a configuration
+ script named ifcfg-ethx that contains network information.
+ 4. Note that for most Linux distributions, Red Hat included, a configuration
+ utility with a graphical user interface is provided to perform steps 2
+ and 3 above.
+
+
+Parameter Description
+=====================
+You can install this driver without any additional parameter. However, if you
+are going to have extensive functions then it is necessary to set extra
+parameter. Below is a list of the command line parameters supported by the
+Linux device
+driver.
+
+mtu=packet_size - Specifies the maximum packet size. default
+ is 1500.
+
+media=media_type - Specifies the media type the NIC operates at.
+ autosense Autosensing active media.
+ 10mbps_hd 10Mbps half duplex.
+ 10mbps_fd 10Mbps full duplex.
+ 100mbps_hd 100Mbps half duplex.
+ 100mbps_fd 100Mbps full duplex.
+ 1000mbps_fd 1000Mbps full duplex.
+ 1000mbps_hd 1000Mbps half duplex.
+ 0 Autosensing active media.
+ 1 10Mbps half duplex.
+ 2 10Mbps full duplex.
+ 3 100Mbps half duplex.
+ 4 100Mbps full duplex.
+ 5 1000Mbps half duplex.
+ 6 1000Mbps full duplex.
+
+ By default, the NIC operates at autosense.
+ 1000mbps_fd and 1000mbps_hd types are only
+ available for fiber adapter.
+
+vlan=n - Specifies the VLAN ID. If vlan=0, the
+ Virtual Local Area Network (VLAN) function is
+ disable.
+
+jumbo=[0|1] - Specifies the jumbo frame support. If jumbo=1,
+ the NIC accept jumbo frames. By default, this
+ function is disabled.
+ Jumbo frame usually improve the performance
+ int gigabit.
+ This feature need jumbo frame compatible
+ remote.
+
+rx_coalesce=m - Number of rx frame handled each interrupt.
+rx_timeout=n - Rx DMA wait time for an interrupt.
+ If set rx_coalesce > 0, hardware only assert
+ an interrupt for m frames. Hardware won't
+ assert rx interrupt until m frames received or
+ reach timeout of n * 640 nano seconds.
+ Set proper rx_coalesce and rx_timeout can
+ reduce congestion collapse and overload which
+ has been a bottleneck for high speed network.
+
+ For example, rx_coalesce=10 rx_timeout=800.
+ that is, hardware assert only 1 interrupt
+ for 10 frames received or timeout of 512 us.
+
+tx_coalesce=n - Number of tx frame handled each interrupt.
+ Set n > 1 can reduce the interrupts
+ congestion usually lower performance of
+ high speed network card. Default is 16.
+
+tx_flow=[1|0] - Specifies the Tx flow control. If tx_flow=0,
+ the Tx flow control disable else driver
+ autodetect.
+rx_flow=[1|0] - Specifies the Rx flow control. If rx_flow=0,
+ the Rx flow control enable else driver
+ autodetect.
+
+
+Configuration Script Sample
+===========================
+Here is a sample of a simple configuration script:
+
+DEVICE=eth0
+USERCTL=no
+ONBOOT=yes
+POOTPROTO=none
+BROADCAST=207.200.5.255
+NETWORK=207.200.5.0
+NETMASK=255.255.255.0
+IPADDR=207.200.5.2
+
+
+Troubleshooting
+===============
+Q1. Source files contain ^ M behind every line.
+ Make sure all files are Unix file format (no LF). Try the following
+ shell command to convert files.
+
+ cat dl2k.c | col -b > dl2k.tmp
+ mv dl2k.tmp dl2k.c
+
+ OR
+
+ cat dl2k.c | tr -d "\r" > dl2k.tmp
+ mv dl2k.tmp dl2k.c
+
+Q2: Could not find header files (*.h) ?
+ To compile the driver, you need kernel header files. After
+ installing the kernel source, the header files are usually located in
+ /usr/src/linux/include, which is the default include directory configured
+ in Makefile. For some distributions, there is a copy of header files in
+ /usr/src/include/linux and /usr/src/include/asm, that you can change the
+ INCLUDEDIR in Makefile to /usr/include without installing kernel source.
+ Note that RH 7.0 didn't provide correct header files in /usr/include,
+ including those files will make a wrong version driver.
+
diff --git a/Documentation/networking/dm9000.txt b/Documentation/networking/dm9000.txt
new file mode 100644
index 000000000..5552e2e57
--- /dev/null
+++ b/Documentation/networking/dm9000.txt
@@ -0,0 +1,167 @@
+DM9000 Network driver
+=====================
+
+Copyright 2008 Simtec Electronics,
+ Ben Dooks <ben@simtec.co.uk> <ben-linux@fluff.org>
+
+
+Introduction
+------------
+
+This file describes how to use the DM9000 platform-device based network driver
+that is contained in the files drivers/net/dm9000.c and drivers/net/dm9000.h.
+
+The driver supports three DM9000 variants, the DM9000E which is the first chip
+supported as well as the newer DM9000A and DM9000B devices. It is currently
+maintained and tested by Ben Dooks, who should be CC: to any patches for this
+driver.
+
+
+Defining the platform device
+----------------------------
+
+The minimum set of resources attached to the platform device are as follows:
+
+ 1) The physical address of the address register
+ 2) The physical address of the data register
+ 3) The IRQ line the device's interrupt pin is connected to.
+
+These resources should be specified in that order, as the ordering of the
+two address regions is important (the driver expects these to be address
+and then data).
+
+An example from arch/arm/mach-s3c2410/mach-bast.c is:
+
+static struct resource bast_dm9k_resource[] = {
+ [0] = {
+ .start = S3C2410_CS5 + BAST_PA_DM9000,
+ .end = S3C2410_CS5 + BAST_PA_DM9000 + 3,
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = S3C2410_CS5 + BAST_PA_DM9000 + 0x40,
+ .end = S3C2410_CS5 + BAST_PA_DM9000 + 0x40 + 0x3f,
+ .flags = IORESOURCE_MEM,
+ },
+ [2] = {
+ .start = IRQ_DM9000,
+ .end = IRQ_DM9000,
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
+ }
+};
+
+static struct platform_device bast_device_dm9k = {
+ .name = "dm9000",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(bast_dm9k_resource),
+ .resource = bast_dm9k_resource,
+};
+
+Note the setting of the IRQ trigger flag in bast_dm9k_resource[2].flags,
+as this will generate a warning if it is not present. The trigger from
+the flags field will be passed to request_irq() when registering the IRQ
+handler to ensure that the IRQ is setup correctly.
+
+This shows a typical platform device, without the optional configuration
+platform data supplied. The next example uses the same resources, but adds
+the optional platform data to pass extra configuration data:
+
+static struct dm9000_plat_data bast_dm9k_platdata = {
+ .flags = DM9000_PLATF_16BITONLY,
+};
+
+static struct platform_device bast_device_dm9k = {
+ .name = "dm9000",
+ .id = 0,
+ .num_resources = ARRAY_SIZE(bast_dm9k_resource),
+ .resource = bast_dm9k_resource,
+ .dev = {
+ .platform_data = &bast_dm9k_platdata,
+ }
+};
+
+The platform data is defined in include/linux/dm9000.h and described below.
+
+
+Platform data
+-------------
+
+Extra platform data for the DM9000 can describe the IO bus width to the
+device, whether or not an external PHY is attached to the device and
+the availability of an external configuration EEPROM.
+
+The flags for the platform data .flags field are as follows:
+
+DM9000_PLATF_8BITONLY
+
+ The IO should be done with 8bit operations.
+
+DM9000_PLATF_16BITONLY
+
+ The IO should be done with 16bit operations.
+
+DM9000_PLATF_32BITONLY
+
+ The IO should be done with 32bit operations.
+
+DM9000_PLATF_EXT_PHY
+
+ The chip is connected to an external PHY.
+
+DM9000_PLATF_NO_EEPROM
+
+ This can be used to signify that the board does not have an
+ EEPROM, or that the EEPROM should be hidden from the user.
+
+DM9000_PLATF_SIMPLE_PHY
+
+ Switch to using the simpler PHY polling method which does not
+ try and read the MII PHY state regularly. This is only available
+ when using the internal PHY. See the section on link state polling
+ for more information.
+
+ The config symbol DM9000_FORCE_SIMPLE_PHY_POLL, Kconfig entry
+ "Force simple NSR based PHY polling" allows this flag to be
+ forced on at build time.
+
+
+PHY Link state polling
+----------------------
+
+The driver keeps track of the link state and informs the network core
+about link (carrier) availability. This is managed by several methods
+depending on the version of the chip and on which PHY is being used.
+
+For the internal PHY, the original (and currently default) method is
+to read the MII state, either when the status changes if we have the
+necessary interrupt support in the chip or every two seconds via a
+periodic timer.
+
+To reduce the overhead for the internal PHY, there is now the option
+of using the DM9000_FORCE_SIMPLE_PHY_POLL config, or DM9000_PLATF_SIMPLE_PHY
+platform data option to read the summary information without the
+expensive MII accesses. This method is faster, but does not print
+as much information.
+
+When using an external PHY, the driver currently has to poll the MII
+link status as there is no method for getting an interrupt on link change.
+
+
+DM9000A / DM9000B
+-----------------
+
+These chips are functionally similar to the DM9000E and are supported easily
+by the same driver. The features are:
+
+ 1) Interrupt on internal PHY state change. This means that the periodic
+ polling of the PHY status may be disabled on these devices when using
+ the internal PHY.
+
+ 2) TCP/UDP checksum offloading, which the driver does not currently support.
+
+
+ethtool
+-------
+
+The driver supports the ethtool interface for access to the driver
+state information, the PHY state and the EEPROM.
diff --git a/Documentation/networking/dmfe.txt b/Documentation/networking/dmfe.txt
new file mode 100644
index 000000000..25320bf19
--- /dev/null
+++ b/Documentation/networking/dmfe.txt
@@ -0,0 +1,66 @@
+Note: This driver doesn't have a maintainer.
+
+Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux.
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+
+This driver provides kernel support for Davicom DM9102(A)/DM9132/DM9801 ethernet cards ( CNET
+10/100 ethernet cards uses Davicom chipset too, so this driver supports CNET cards too ).If you
+didn't compile this driver as a module, it will automatically load itself on boot and print a
+line similar to :
+
+ dmfe: Davicom DM9xxx net driver, version 1.36.4 (2002-01-17)
+
+If you compiled this driver as a module, you have to load it on boot.You can load it with command :
+
+ insmod dmfe
+
+This way it will autodetect the device mode.This is the suggested way to load the module.Or you can pass
+a mode= setting to module while loading, like :
+
+ insmod dmfe mode=0 # Force 10M Half Duplex
+ insmod dmfe mode=1 # Force 100M Half Duplex
+ insmod dmfe mode=4 # Force 10M Full Duplex
+ insmod dmfe mode=5 # Force 100M Full Duplex
+
+Next you should configure your network interface with a command similar to :
+
+ ifconfig eth0 172.22.3.18
+ ^^^^^^^^^^^
+ Your IP Address
+
+Then you may have to modify the default routing table with command :
+
+ route add default eth0
+
+
+Now your ethernet card should be up and running.
+
+
+TODO:
+
+Implement pci_driver::suspend() and pci_driver::resume() power management methods.
+Check on 64 bit boxes.
+Check and fix on big endian boxes.
+Test and make sure PCI latency is now correct for all cases.
+
+
+Authors:
+
+Sten Wang <sten_wang@davicom.com.tw > : Original Author
+
+Contributors:
+
+Marcelo Tosatti <marcelo@conectiva.com.br>
+Alan Cox <alan@lxorguk.ukuu.org.uk>
+Jeff Garzik <jgarzik@pobox.com>
+Vojtech Pavlik <vojtech@suse.cz>
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
new file mode 100644
index 000000000..d86adcdae
--- /dev/null
+++ b/Documentation/networking/dns_resolver.txt
@@ -0,0 +1,157 @@
+ ===================
+ DNS Resolver Module
+ ===================
+
+Contents:
+
+ - Overview.
+ - Compilation.
+ - Setting up.
+ - Usage.
+ - Mechanism.
+ - Debugging.
+
+
+========
+OVERVIEW
+========
+
+The DNS resolver module provides a way for kernel services to make DNS queries
+by way of requesting a key of key type dns_resolver. These queries are
+upcalled to userspace through /sbin/request-key.
+
+These routines must be supported by userspace tools dns.upcall, cifs.upcall and
+request-key. It is under development and does not yet provide the full feature
+set. The features it does support include:
+
+ (*) Implements the dns_resolver key_type to contact userspace.
+
+It does not yet support the following AFS features:
+
+ (*) Dns query support for AFSDB resource record.
+
+This code is extracted from the CIFS filesystem.
+
+
+===========
+COMPILATION
+===========
+
+The module should be enabled by turning on the kernel configuration options:
+
+ CONFIG_DNS_RESOLVER - tristate "DNS Resolver support"
+
+
+==========
+SETTING UP
+==========
+
+To set up this facility, the /etc/request-key.conf file must be altered so that
+/sbin/request-key can appropriately direct the upcalls. For example, to handle
+basic dname to IPv4/IPv6 address resolution, the following line should be
+added:
+
+ #OP TYPE DESC CO-INFO PROGRAM ARG1 ARG2 ARG3 ...
+ #====== ============ ======= ======= ==========================
+ create dns_resolver * * /usr/sbin/cifs.upcall %k
+
+To direct a query for query type 'foo', a line of the following should be added
+before the more general line given above as the first match is the one taken.
+
+ create dns_resolver foo:* * /usr/sbin/dns.foo %k
+
+
+=====
+USAGE
+=====
+
+To make use of this facility, one of the following functions that are
+implemented in the module can be called after doing:
+
+ #include <linux/dns_resolver.h>
+
+ (1) int dns_query(const char *type, const char *name, size_t namelen,
+ const char *options, char **_result, time_t *_expiry);
+
+ This is the basic access function. It looks for a cached DNS query and if
+ it doesn't find it, it upcalls to userspace to make a new DNS query, which
+ may then be cached. The key description is constructed as a string of the
+ form:
+
+ [<type>:]<name>
+
+ where <type> optionally specifies the particular upcall program to invoke,
+ and thus the type of query to do, and <name> specifies the string to be
+ looked up. The default query type is a straight hostname to IP address
+ set lookup.
+
+ The name parameter is not required to be a NUL-terminated string, and its
+ length should be given by the namelen argument.
+
+ The options parameter may be NULL or it may be a set of options
+ appropriate to the query type.
+
+ The return value is a string appropriate to the query type. For instance,
+ for the default query type it is just a list of comma-separated IPv4 and
+ IPv6 addresses. The caller must free the result.
+
+ The length of the result string is returned on success, and a negative
+ error code is returned otherwise. -EKEYREJECTED will be returned if the
+ DNS lookup failed.
+
+ If _expiry is non-NULL, the expiry time (TTL) of the result will be
+ returned also.
+
+The kernel maintains an internal keyring in which it caches looked up keys.
+This can be cleared by any process that has the CAP_SYS_ADMIN capability by
+the use of KEYCTL_KEYRING_CLEAR on the keyring ID.
+
+
+===============================
+READING DNS KEYS FROM USERSPACE
+===============================
+
+Keys of dns_resolver type can be read from userspace using keyctl_read() or
+"keyctl read/print/pipe".
+
+
+=========
+MECHANISM
+=========
+
+The dnsresolver module registers a key type called "dns_resolver". Keys of
+this type are used to transport and cache DNS lookup results from userspace.
+
+When dns_query() is invoked, it calls request_key() to search the local
+keyrings for a cached DNS result. If that fails to find one, it upcalls to
+userspace to get a new result.
+
+Upcalls to userspace are made through the request_key() upcall vector, and are
+directed by means of configuration lines in /etc/request-key.conf that tell
+/sbin/request-key what program to run to instantiate the key.
+
+The upcall handler program is responsible for querying the DNS, processing the
+result into a form suitable for passing to the keyctl_instantiate_key()
+routine. This then passes the data to dns_resolver_instantiate() which strips
+off and processes any options included in the data, and then attaches the
+remainder of the string to the key as its payload.
+
+The upcall handler program should set the expiry time on the key to that of the
+lowest TTL of all the records it has extracted a result from. This means that
+the key will be discarded and recreated when the data it holds has expired.
+
+dns_query() returns a copy of the value attached to the key, or an error if
+that is indicated instead.
+
+See <file:Documentation/security/keys-request-key.txt> for further
+information about request-key function.
+
+
+=========
+DEBUGGING
+=========
+
+Debugging messages can be turned on dynamically by writing a 1 into the
+following file:
+
+ /sys/module/dnsresolver/parameters/debug
diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt
new file mode 100644
index 000000000..da59e2884
--- /dev/null
+++ b/Documentation/networking/driver.txt
@@ -0,0 +1,93 @@
+Document about softnet driver issues
+
+Transmit path guidelines:
+
+1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
+ any normal circumstances. It is considered a hard error unless
+ there is no way your device can tell ahead of time when it's
+ transmit function will become busy.
+
+ Instead it must maintain the queue properly. For example,
+ for a driver implementing scatter-gather this means:
+
+ static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+ {
+ struct drv *dp = netdev_priv(dev);
+
+ lock_tx(dp);
+ ...
+ /* This is a hard error log it. */
+ if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+ netif_stop_queue(dev);
+ unlock_tx(dp);
+ printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+ dev->name);
+ return NETDEV_TX_BUSY;
+ }
+
+ ... queue packet to card ...
+ ... update tx consumer index ...
+
+ if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1))
+ netif_stop_queue(dev);
+
+ ...
+ unlock_tx(dp);
+ ...
+ return NETDEV_TX_OK;
+ }
+
+ And then at the end of your TX reclamation event handling:
+
+ if (netif_queue_stopped(dp->dev) &&
+ TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
+ netif_wake_queue(dp->dev);
+
+ For a non-scatter-gather supporting card, the three tests simply become:
+
+ /* This is a hard error log it. */
+ if (TX_BUFFS_AVAIL(dp) <= 0)
+
+ and:
+
+ if (TX_BUFFS_AVAIL(dp) == 0)
+
+ and:
+
+ if (netif_queue_stopped(dp->dev) &&
+ TX_BUFFS_AVAIL(dp) > 0)
+ netif_wake_queue(dp->dev);
+
+2) An ndo_start_xmit method must not modify the shared parts of a
+ cloned SKB.
+
+3) Do not forget that once you return NETDEV_TX_OK from your
+ ndo_start_xmit method, it is your driver's responsibility to free
+ up the SKB and in some finite amount of time.
+
+ For example, this means that it is not allowed for your TX
+ mitigation scheme to let TX packets "hang out" in the TX
+ ring unreclaimed forever if no new TX packets are sent.
+ This error can deadlock sockets waiting for send buffer room
+ to be freed up.
+
+ If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
+ must not keep any reference to that SKB and you must not attempt
+ to free it up.
+
+Probing guidelines:
+
+1) Any hardware layer address you obtain for your device should
+ be verified. For example, for ethernet check it with
+ linux/etherdevice.h:is_valid_ether_addr()
+
+Close/stop guidelines:
+
+1) After the ndo_stop routine has been called, the hardware must
+ not receive or transmit any data. All in flight packets must
+ be aborted. If necessary, poll or wait for completion of
+ any reset commands.
+
+2) The ndo_stop routine will be called by unregister_netdevice
+ if device is still UP.
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt
new file mode 100644
index 000000000..f862cf3af
--- /dev/null
+++ b/Documentation/networking/e100.txt
@@ -0,0 +1,197 @@
+Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
+==============================================================
+
+March 15, 2011
+
+Contents
+========
+
+- In This Release
+- Identifying Your Adapter
+- Building and Installation
+- Driver Configuration Parameters
+- Additional Configurations
+- Known Issues
+- Support
+
+
+In This Release
+===============
+
+This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of
+Adapters. This driver includes support for Itanium(R)2-based systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel PRO/100 adapter.
+
+The following features are now available in supported kernels:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+
+Identifying Your Adapter
+========================
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/network/adapter/pro100/21397.htm
+
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
+networking link on the left to search for your adapter:
+
+ http://downloadfinder.intel.com/scripts-df/support_intel.asp
+
+Driver Configuration Parameters
+===============================
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+Rx Descriptors: Number of receive descriptors. A receive descriptor is a data
+ structure that describes a receive buffer and its attributes to the network
+ controller. The data in the descriptor is used by the controller to write
+ data from the controller to host memory. In the 3.x.x driver the valid range
+ for this parameter is 64-256. The default value is 64. This parameter can be
+ changed using the command:
+
+ ethtool -G eth? rx n, where n is the number of desired rx descriptors.
+
+Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data
+ structure that describes a transmit buffer and its attributes to the network
+ controller. The data in the descriptor is used by the controller to read
+ data from the host memory to the controller. In the 3.x.x driver the valid
+ range for this parameter is 64-256. The default value is 64. This parameter
+ can be changed using the command:
+
+ ethtool -G eth? tx n, where n is the number of desired tx descriptors.
+
+Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by
+ default. The ethtool utility can be used as follows to force speed/duplex.
+
+ ethtool -s eth? autoneg off speed {10|100} duplex {full|half}
+
+ NOTE: setting the speed/duplex to incorrect values will cause the link to
+ fail.
+
+Event Log Message Level: The driver uses the message level flag to log events
+ to syslog. The message level can be set at driver load time. It can also be
+ set using the command:
+
+ ethtool -s eth? msglvl n
+
+
+Additional Configurations
+=========================
+
+ Configuring the Driver on Different Distributions
+ -------------------------------------------------
+
+ Configuring a network driver to load properly when the system is started is
+ distribution dependent. Typically, the configuration process involves adding
+ an alias line to /etc/modprobe.d/*.conf as well as editing other system
+ startup scripts and/or configuration files. Many popular Linux
+ distributions ship with tools to make these changes for you. To learn the
+ proper way to configure a network device for your system, refer to your
+ distribution documentation. If during this process you are asked for the
+ driver or module name, the name for the Linux Base Driver for the Intel
+ PRO/100 Family of Adapters is e100.
+
+ As an example, if you install the e100 driver for two PRO/100 adapters
+ (eth0 and eth1), add the following to a configuration file in /etc/modprobe.d/
+
+ alias eth0 e100
+ alias eth1 e100
+
+ Viewing Link Messages
+ ---------------------
+ In order to see link messages and other Intel driver information on your
+ console, you must set the dmesg level up to six. This can be done by
+ entering the following on the command line before loading the e100 driver:
+
+ dmesg -n 8
+
+ If you wish to see all messages issued by the driver, including debug
+ messages, set the dmesg level to eight.
+
+ NOTE: This setting is not saved across reboots.
+
+
+ ethtool
+ -------
+
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The ethtool
+ version 1.6 or later is required for this functionality.
+
+ The latest release of ethtool can be found from
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+ Enabling Wake on LAN* (WoL)
+ ---------------------------
+ WoL is provided through the ethtool* utility. For instructions on enabling
+ WoL with ethtool, refer to the ethtool man page.
+
+ WoL will be enabled on the system during the next shut down or reboot. For
+ this driver version, in order to enable WoL, the e100 driver must be
+ loaded when shutting down or rebooting the system.
+
+ NAPI
+ ----
+
+ NAPI (Rx polling mode) is supported in the e100 driver.
+
+ See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
+
+ Multiple Interfaces on Same Ethernet Broadcast Network
+ ------------------------------------------------------
+
+ Due to the default ARP behavior on Linux, it is not possible to have
+ one system on two IP networks in the same Ethernet broadcast domain
+ (non-partitioned switch) behave as expected. All Ethernet interfaces
+ will respond to IP traffic for any IP address assigned to the system.
+ This results in unbalanced receive traffic.
+
+ If you have multiple interfaces in a server, either turn on ARP
+ filtering by
+
+ (1) entering: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+ (this only works if your kernel's version is higher than 2.4.5), or
+
+ (2) installing the interfaces in separate broadcast domains (either
+ in different switches or in a switch partitioned to VLANs).
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+ or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related to the
+issue to e1000-devel@lists.sourceforge.net.
+
+
+License
+=======
+
+This software program is released under the terms of a license agreement
+between you ('Licensee') and Intel. Do not use or load this software or any
+associated materials (collectively, the 'Software') until you have carefully
+read the full terms and conditions of the file COPYING located in this software
+package. By loading or using the Software, you agree to the terms of this
+Agreement. If you do not agree with the terms of this Agreement, do not install
+or use the Software.
+
+* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt
new file mode 100644
index 000000000..437b2099c
--- /dev/null
+++ b/Documentation/networking/e1000.txt
@@ -0,0 +1,461 @@
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Speed and Duplex Configuration
+- Additional Configurations
+- Support
+
+Identifying Your Adapter
+========================
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/go/network/adapter/idguide.htm
+
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
+networking link on the left to search for your adapter:
+
+ http://support.intel.com/support/go/network/adapter/home.htm
+
+Command Line Parameters
+=======================
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+NOTES: For more information about the AutoNeg, Duplex, and Speed
+ parameters, see the "Speed and Duplex Configuration" section in
+ this document.
+
+ For more information about the InterruptThrottleRate,
+ RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay
+ parameters, see the application note at:
+ http://www.intel.com/design/network/applnots/ap450.htm
+
+AutoNeg
+-------
+(Supported only on adapters with copper connections)
+Valid Range: 0x01-0x0F, 0x20-0x2F
+Default Value: 0x2F
+
+This parameter is a bit-mask that specifies the speed and duplex settings
+advertised by the adapter. When this parameter is used, the Speed and
+Duplex parameters must not be specified.
+
+NOTE: Refer to the Speed and Duplex section of this readme for more
+ information on the AutoNeg parameter.
+
+Duplex
+------
+(Supported only on adapters with copper connections)
+Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full)
+Default Value: 0
+
+This defines the direction in which data is allowed to flow. Can be
+either one or two-directional. If both Duplex and the link partner are
+set to auto-negotiate, the board auto-detects the correct duplex. If the
+link partner is forced (either full or half), Duplex defaults to half-
+duplex.
+
+FlowControl
+-----------
+Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+Default Value: Reads flow control settings from the EEPROM
+
+This parameter controls the automatic generation(Tx) and response(Rx)
+to Ethernet PAUSE frames.
+
+InterruptThrottleRate
+---------------------
+(not supported on Intel(R) 82542, 82543 or 82544-based adapters)
+Valid Range: 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative,
+ 4=simplified balancing)
+Default Value: 3
+
+The driver can limit the amount of interrupts per second that the adapter
+will generate for incoming packets. It does this by writing a value to the
+adapter that is based on the maximum amount of interrupts that the adapter
+will generate per second.
+
+Setting InterruptThrottleRate to a value greater or equal to 100
+will program the adapter to send out a maximum of that many interrupts
+per second, even if more packets have come in. This reduces interrupt
+load on the system and can lower CPU utilization under heavy load,
+but will increase latency as packets are not processed as quickly.
+
+The default behaviour of the driver previously assumed a static
+InterruptThrottleRate value of 8000, providing a good fallback value for
+all traffic types,but lacking in small packet performance and latency.
+The hardware can handle many more small packets per second however, and
+for this reason an adaptive interrupt moderation algorithm was implemented.
+
+Since 7.3.x, the driver has two adaptive modes (setting 1 or 3) in which
+it dynamically adjusts the InterruptThrottleRate value based on the traffic
+that it receives. After determining the type of incoming traffic in the last
+timeframe, it will adjust the InterruptThrottleRate to an appropriate value
+for that traffic.
+
+The algorithm classifies the incoming traffic every interval into
+classes. Once the class is determined, the InterruptThrottleRate value is
+adjusted to suit that traffic type the best. There are three classes defined:
+"Bulk traffic", for large amounts of packets of normal size; "Low latency",
+for small amounts of traffic and/or a significant percentage of small
+packets; and "Lowest latency", for almost completely small packets or
+minimal traffic.
+
+In dynamic conservative mode, the InterruptThrottleRate value is set to 4000
+for traffic that falls in class "Bulk traffic". If traffic falls in the "Low
+latency" or "Lowest latency" class, the InterruptThrottleRate is increased
+stepwise to 20000. This default mode is suitable for most applications.
+
+For situations where low latency is vital such as cluster or
+grid computing, the algorithm can reduce latency even more when
+InterruptThrottleRate is set to mode 1. In this mode, which operates
+the same as mode 3, the InterruptThrottleRate will be increased stepwise to
+70000 for traffic in class "Lowest latency".
+
+In simplified mode the interrupt rate is based on the ratio of TX and
+RX traffic. If the bytes per second rate is approximately equal, the
+interrupt rate will drop as low as 2000 interrupts per second. If the
+traffic is mostly transmit or mostly receive, the interrupt rate could
+be as high as 8000.
+
+Setting InterruptThrottleRate to 0 turns off any interrupt moderation
+and may improve small packet latency, but is generally not suitable
+for bulk throughput traffic.
+
+NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+ RxAbsIntDelay parameters. In other words, minimizing the receive
+ and/or transmit absolute delays does not force the controller to
+ generate more interrupts than what the Interrupt Throttle Rate
+ allows.
+
+CAUTION: If you are using the Intel(R) PRO/1000 CT Network Connection
+ (controller 82547), setting InterruptThrottleRate to a value
+ greater than 75,000, may hang (stop transmitting) adapters
+ under certain network conditions. If this occurs a NETDEV
+ WATCHDOG message is logged in the system event log. In
+ addition, the controller is automatically reset, restoring
+ the network connection. To eliminate the potential for the
+ hang, ensure that InterruptThrottleRate is set no greater
+ than 75,000 and is not set to 0.
+
+NOTE: When e1000 is loaded with default settings and multiple adapters
+ are in use simultaneously, the CPU utilization may increase non-
+ linearly. In order to limit the CPU utilization without impacting
+ the overall throughput, we recommend that you load the driver as
+ follows:
+
+ modprobe e1000 InterruptThrottleRate=3000,3000,3000
+
+ This sets the InterruptThrottleRate to 3000 interrupts/sec for
+ the first, second, and third instances of the driver. The range
+ of 2000 to 3000 interrupts per second works on a majority of
+ systems and is a good starting point, but the optimal value will
+ be platform-specific. If CPU utilization is not a concern, use
+ RX_POLLING (NAPI) and default driver settings.
+
+RxDescriptors
+-------------
+Valid Range: 80-256 for 82542 and 82543-based adapters
+ 80-4096 for all other supported adapters
+Default Value: 256
+
+This value specifies the number of receive buffer descriptors allocated
+by the driver. Increasing this value allows the driver to buffer more
+incoming packets, at the expense of increased system memory utilization.
+
+Each descriptor is 16 bytes. A receive buffer is also allocated for each
+descriptor and can be either 2048, 4096, 8192, or 16384 bytes, depending
+on the MTU setting. The maximum MTU size is 16110.
+
+NOTE: MTU designates the frame size. It only needs to be set for Jumbo
+ Frames. Depending on the available system resources, the request
+ for a higher number of receive descriptors may be denied. In this
+ case, use a lower number.
+
+RxIntDelay
+----------
+Valid Range: 0-65535 (0=off)
+Default Value: 0
+
+This value delays the generation of receive interrupts in units of 1.024
+microseconds. Receive interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. Increasing this value adds
+extra latency to frame reception and can end up decreasing the throughput
+of TCP traffic. If the system is reporting dropped receives, this value
+may be set too high, causing the driver to run out of available receive
+descriptors.
+
+CAUTION: When setting RxIntDelay to a value other than 0, adapters may
+ hang (stop transmitting) under certain network conditions. If
+ this occurs a NETDEV WATCHDOG message is logged in the system
+ event log. In addition, the controller is automatically reset,
+ restoring the network connection. To eliminate the potential
+ for the hang ensure that RxIntDelay is set to 0.
+
+RxAbsIntDelay
+-------------
+(This parameter is supported only on 82540, 82545 and later adapters.)
+Valid Range: 0-65535 (0=off)
+Default Value: 128
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+receive interrupt is generated. Useful only if RxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is received within the set amount of time. Proper tuning,
+along with RxIntDelay, may improve traffic throughput in specific network
+conditions.
+
+Speed
+-----
+(This parameter is supported only on adapters with copper connections.)
+Valid Settings: 0, 10, 100, 1000
+Default Value: 0 (auto-negotiate at all supported speeds)
+
+Speed forces the line speed to the specified value in megabits per second
+(Mbps). If this parameter is not specified or is set to 0 and the link
+partner is set to auto-negotiate, the board will auto-detect the correct
+speed. Duplex should also be set when Speed is set to either 10 or 100.
+
+TxDescriptors
+-------------
+Valid Range: 80-256 for 82542 and 82543-based adapters
+ 80-4096 for all other supported adapters
+Default Value: 256
+
+This value is the number of transmit descriptors allocated by the driver.
+Increasing this value allows the driver to queue more transmits. Each
+descriptor is 16 bytes.
+
+NOTE: Depending on the available system resources, the request for a
+ higher number of transmit descriptors may be denied. In this case,
+ use a lower number.
+
+TxDescriptorStep
+----------------
+Valid Range: 1 (use every Tx Descriptor)
+ 4 (use every 4th Tx Descriptor)
+
+Default Value: 1 (use every Tx Descriptor)
+
+On certain non-Intel architectures, it has been observed that intense TX
+traffic bursts of short packets may result in an improper descriptor
+writeback. If this occurs, the driver will report a "TX Timeout" and reset
+the adapter, after which the transmit flow will restart, though data may
+have stalled for as much as 10 seconds before it resumes.
+
+The improper writeback does not occur on the first descriptor in a system
+memory cache-line, which is typically 32 bytes, or 4 descriptors long.
+
+Setting TxDescriptorStep to a value of 4 will ensure that all TX descriptors
+are aligned to the start of a system memory cache line, and so this problem
+will not occur.
+
+NOTES: Setting TxDescriptorStep to 4 effectively reduces the number of
+ TxDescriptors available for transmits to 1/4 of the normal allocation.
+ This has a possible negative performance impact, which may be
+ compensated for by allocating more descriptors using the TxDescriptors
+ module parameter.
+
+ There are other conditions which may result in "TX Timeout", which will
+ not be resolved by the use of the TxDescriptorStep parameter. As the
+ issue addressed by this parameter has never been observed on Intel
+ Architecture platforms, it should not be used on Intel platforms.
+
+TxIntDelay
+----------
+Valid Range: 0-65535 (0=off)
+Default Value: 64
+
+This value delays the generation of transmit interrupts in units of
+1.024 microseconds. Transmit interrupt reduction can improve CPU
+efficiency if properly tuned for specific network traffic. If the
+system is reporting dropped transmits, this value may be set too high
+causing the driver to run out of available transmit descriptors.
+
+TxAbsIntDelay
+-------------
+(This parameter is supported only on 82540, 82545 and later adapters.)
+Valid Range: 0-65535 (0=off)
+Default Value: 64
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is sent on the wire within the set amount of time. Proper tuning,
+along with TxIntDelay, may improve traffic throughput in specific
+network conditions.
+
+XsumRX
+------
+(This parameter is NOT supported on the 82542-based adapter.)
+Valid Range: 0-1
+Default Value: 1
+
+A value of '1' indicates that the driver should enable IP checksum
+offload for received packets (both UDP and TCP) to the adapter hardware.
+
+Copybreak
+---------
+Valid Range: 0-xxxxxxx (0=off)
+Default Value: 256
+Usage: insmod e1000.ko copybreak=128
+
+Driver copies all packets below or equaling this size to a fresh RX
+buffer before handing it up the stack.
+
+This parameter is different than other parameters, in that it is a
+single (not 1,1,1 etc.) parameter applied to all driver instances and
+it is also available during runtime at
+/sys/module/e1000/parameters/copybreak
+
+SmartPowerDownEnable
+--------------------
+Valid Range: 0-1
+Default Value: 0 (disabled)
+
+Allows PHY to turn off in lower power states. The user can turn off
+this parameter in supported chipsets.
+
+KumeranLockLoss
+---------------
+Valid Range: 0-1
+Default Value: 1 (enabled)
+
+This workaround skips resetting the PHY at shutdown for the initial
+silicon releases of ICH8 systems.
+
+Speed and Duplex Configuration
+==============================
+
+Three keywords are used to control the speed and duplex configuration.
+These keywords are Speed, Duplex, and AutoNeg.
+
+If the board uses a fiber interface, these keywords are ignored, and the
+fiber interface board only links at 1000 Mbps full-duplex.
+
+For copper-based boards, the keywords interact as follows:
+
+ The default operation is auto-negotiate. The board advertises all
+ supported speed and duplex combinations, and it links at the highest
+ common speed and duplex mode IF the link partner is set to auto-negotiate.
+
+ If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps
+ is advertised (The 1000BaseT spec requires auto-negotiation.)
+
+ If Speed = 10 or 100, then both Speed and Duplex should be set. Auto-
+ negotiation is disabled, and the AutoNeg parameter is ignored. Partner
+ SHOULD also be forced.
+
+The AutoNeg parameter is used when more control is required over the
+auto-negotiation process. It should be used when you wish to control which
+speed and duplex combinations are advertised during the auto-negotiation
+process.
+
+The parameter may be specified as either a decimal or hexadecimal value as
+determined by the bitmap below.
+
+Bit position 7 6 5 4 3 2 1 0
+Decimal Value 128 64 32 16 8 4 2 1
+Hex value 80 40 20 10 8 4 2 1
+Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10
+Duplex Full Full Half Full Half
+
+Some examples of using AutoNeg:
+
+ modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half)
+ modprobe e1000 AutoNeg=1 (Same as above)
+ modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full)
+ modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full)
+ modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half)
+ modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100
+ Half)
+ modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full)
+ modprobe e1000 AutoNeg=32 (Same as above)
+
+Note that when this parameter is used, Speed and Duplex must not be specified.
+
+If the link partner is forced to a specific speed and duplex, then this
+parameter should not be used. Instead, use the Speed and Duplex parameters
+previously mentioned to force the adapter to the same speed and duplex.
+
+Additional Configurations
+=========================
+
+ Jumbo Frames
+ ------------
+ Jumbo Frames support is enabled by changing the MTU to a value larger than
+ the default of 1500. Use the ifconfig command to increase the MTU size.
+ For example:
+
+ ifconfig eth<x> mtu 9000 up
+
+ This setting is not saved across reboots. It can be made permanent if
+ you add:
+
+ MTU=9000
+
+ to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>. This example
+ applies to the Red Hat distributions; other distributions may store this
+ setting in a different location.
+
+ Notes:
+ Degradation in throughput performance may be observed in some Jumbo frames
+ environments. If this is observed, increasing the application's socket buffer
+ size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
+ See the specific application manual and /usr/src/linux*/Documentation/
+ networking/ip-sysctl.txt for more details.
+
+ - The maximum MTU setting for Jumbo Frames is 16110. This value coincides
+ with the maximum Jumbo Frames size of 16128.
+
+ - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+ poor performance or loss of link.
+
+ - Adapters based on the Intel(R) 82542 and 82573V/E controller do not
+ support Jumbo Frames. These correspond to the following product names:
+ Intel(R) PRO/1000 Gigabit Server Adapter
+ Intel(R) PRO/1000 PM Network Connection
+
+ ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The ethtool
+ version 1.6 or later is required for this functionality.
+
+ The latest release of ethtool can be found from
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+ Enabling Wake on LAN* (WoL)
+ ---------------------------
+ WoL is configured through the ethtool* utility.
+
+ WoL will be enabled on the system during the next shut down or reboot.
+ For this driver version, in order to enable WoL, the e1000 driver must be
+ loaded when shutting down or rebooting the system.
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/e1000e.txt b/Documentation/networking/e1000e.txt
new file mode 100644
index 000000000..ad2d9f38c
--- /dev/null
+++ b/Documentation/networking/e1000e.txt
@@ -0,0 +1,312 @@
+Linux* Driver for Intel(R) Ethernet Network Connection
+======================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Command Line Parameters
+- Additional Configurations
+- Support
+
+Identifying Your Adapter
+========================
+
+The e1000e driver supports all PCI Express Intel(R) Gigabit Network
+Connections, except those that are 82575, 82576 and 82580-based*.
+
+* NOTE: The Intel(R) PRO/1000 P Dual Port Server Adapter is supported by
+ the e1000 driver, not the e1000e driver due to the 82546 part being used
+ behind a PCI Express bridge.
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/go/network/adapter/idguide.htm
+
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
+networking link on the left to search for your adapter:
+
+ http://support.intel.com/support/go/network/adapter/home.htm
+
+Command Line Parameters
+=======================
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+NOTES: For more information about the InterruptThrottleRate,
+ RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay
+ parameters, see the application note at:
+ http://www.intel.com/design/network/applnots/ap450.htm
+
+InterruptThrottleRate
+---------------------
+Valid Range: 0,1,3,4,100-100000 (0=off, 1=dynamic, 3=dynamic conservative,
+ 4=simplified balancing)
+Default Value: 3
+
+The driver can limit the amount of interrupts per second that the adapter
+will generate for incoming packets. It does this by writing a value to the
+adapter that is based on the maximum amount of interrupts that the adapter
+will generate per second.
+
+Setting InterruptThrottleRate to a value greater or equal to 100
+will program the adapter to send out a maximum of that many interrupts
+per second, even if more packets have come in. This reduces interrupt
+load on the system and can lower CPU utilization under heavy load,
+but will increase latency as packets are not processed as quickly.
+
+The default behaviour of the driver previously assumed a static
+InterruptThrottleRate value of 8000, providing a good fallback value for
+all traffic types, but lacking in small packet performance and latency.
+The hardware can handle many more small packets per second however, and
+for this reason an adaptive interrupt moderation algorithm was implemented.
+
+The driver has two adaptive modes (setting 1 or 3) in which
+it dynamically adjusts the InterruptThrottleRate value based on the traffic
+that it receives. After determining the type of incoming traffic in the last
+timeframe, it will adjust the InterruptThrottleRate to an appropriate value
+for that traffic.
+
+The algorithm classifies the incoming traffic every interval into
+classes. Once the class is determined, the InterruptThrottleRate value is
+adjusted to suit that traffic type the best. There are three classes defined:
+"Bulk traffic", for large amounts of packets of normal size; "Low latency",
+for small amounts of traffic and/or a significant percentage of small
+packets; and "Lowest latency", for almost completely small packets or
+minimal traffic.
+
+In dynamic conservative mode, the InterruptThrottleRate value is set to 4000
+for traffic that falls in class "Bulk traffic". If traffic falls in the "Low
+latency" or "Lowest latency" class, the InterruptThrottleRate is increased
+stepwise to 20000. This default mode is suitable for most applications.
+
+For situations where low latency is vital such as cluster or
+grid computing, the algorithm can reduce latency even more when
+InterruptThrottleRate is set to mode 1. In this mode, which operates
+the same as mode 3, the InterruptThrottleRate will be increased stepwise to
+70000 for traffic in class "Lowest latency".
+
+In simplified mode the interrupt rate is based on the ratio of TX and
+RX traffic. If the bytes per second rate is approximately equal, the
+interrupt rate will drop as low as 2000 interrupts per second. If the
+traffic is mostly transmit or mostly receive, the interrupt rate could
+be as high as 8000.
+
+Setting InterruptThrottleRate to 0 turns off any interrupt moderation
+and may improve small packet latency, but is generally not suitable
+for bulk throughput traffic.
+
+NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+ RxAbsIntDelay parameters. In other words, minimizing the receive
+ and/or transmit absolute delays does not force the controller to
+ generate more interrupts than what the Interrupt Throttle Rate
+ allows.
+
+NOTE: When e1000e is loaded with default settings and multiple adapters
+ are in use simultaneously, the CPU utilization may increase non-
+ linearly. In order to limit the CPU utilization without impacting
+ the overall throughput, we recommend that you load the driver as
+ follows:
+
+ modprobe e1000e InterruptThrottleRate=3000,3000,3000
+
+ This sets the InterruptThrottleRate to 3000 interrupts/sec for
+ the first, second, and third instances of the driver. The range
+ of 2000 to 3000 interrupts per second works on a majority of
+ systems and is a good starting point, but the optimal value will
+ be platform-specific. If CPU utilization is not a concern, use
+ RX_POLLING (NAPI) and default driver settings.
+
+RxIntDelay
+----------
+Valid Range: 0-65535 (0=off)
+Default Value: 0
+
+This value delays the generation of receive interrupts in units of 1.024
+microseconds. Receive interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. Increasing this value adds
+extra latency to frame reception and can end up decreasing the throughput
+of TCP traffic. If the system is reporting dropped receives, this value
+may be set too high, causing the driver to run out of available receive
+descriptors.
+
+CAUTION: When setting RxIntDelay to a value other than 0, adapters may
+ hang (stop transmitting) under certain network conditions. If
+ this occurs a NETDEV WATCHDOG message is logged in the system
+ event log. In addition, the controller is automatically reset,
+ restoring the network connection. To eliminate the potential
+ for the hang ensure that RxIntDelay is set to 0.
+
+RxAbsIntDelay
+-------------
+Valid Range: 0-65535 (0=off)
+Default Value: 8
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+receive interrupt is generated. Useful only if RxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is received within the set amount of time. Proper tuning,
+along with RxIntDelay, may improve traffic throughput in specific network
+conditions.
+
+TxIntDelay
+----------
+Valid Range: 0-65535 (0=off)
+Default Value: 8
+
+This value delays the generation of transmit interrupts in units of
+1.024 microseconds. Transmit interrupt reduction can improve CPU
+efficiency if properly tuned for specific network traffic. If the
+system is reporting dropped transmits, this value may be set too high
+causing the driver to run out of available transmit descriptors.
+
+TxAbsIntDelay
+-------------
+Valid Range: 0-65535 (0=off)
+Default Value: 32
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is sent on the wire within the set amount of time. Proper tuning,
+along with TxIntDelay, may improve traffic throughput in specific
+network conditions.
+
+Copybreak
+---------
+Valid Range: 0-xxxxxxx (0=off)
+Default Value: 256
+
+Driver copies all packets below or equaling this size to a fresh RX
+buffer before handing it up the stack.
+
+This parameter is different than other parameters, in that it is a
+single (not 1,1,1 etc.) parameter applied to all driver instances and
+it is also available during runtime at
+/sys/module/e1000e/parameters/copybreak
+
+SmartPowerDownEnable
+--------------------
+Valid Range: 0-1
+Default Value: 0 (disabled)
+
+Allows PHY to turn off in lower power states. The user can set this parameter
+in supported chipsets.
+
+KumeranLockLoss
+---------------
+Valid Range: 0-1
+Default Value: 1 (enabled)
+
+This workaround skips resetting the PHY at shutdown for the initial
+silicon releases of ICH8 systems.
+
+IntMode
+-------
+Valid Range: 0-2 (0=legacy, 1=MSI, 2=MSI-X)
+Default Value: 2
+
+Allows changing the interrupt mode at module load time, without requiring a
+recompile. If the driver load fails to enable a specific interrupt mode, the
+driver will try other interrupt modes, from least to most compatible. The
+interrupt order is MSI-X, MSI, Legacy. If specifying MSI (IntMode=1)
+interrupts, only MSI and Legacy will be attempted.
+
+CrcStripping
+------------
+Valid Range: 0-1
+Default Value: 1 (enabled)
+
+Strip the CRC from received packets before sending up the network stack. If
+you have a machine with a BMC enabled but cannot receive IPMI traffic after
+loading or enabling the driver, try disabling this feature.
+
+WriteProtectNVM
+---------------
+Valid Range: 0,1
+Default Value: 1
+
+If set to 1, configure the hardware to ignore all write/erase cycles to the
+GbE region in the ICHx NVM (in order to prevent accidental corruption of the
+NVM). This feature can be disabled by setting the parameter to 0 during initial
+driver load.
+NOTE: The machine must be power cycled (full off/on) when enabling NVM writes
+via setting the parameter to zero. Once the NVM has been locked (via the
+parameter at 1 when the driver loads) it cannot be unlocked except via power
+cycle.
+
+Additional Configurations
+=========================
+
+ Jumbo Frames
+ ------------
+ Jumbo Frames support is enabled by changing the MTU to a value larger than
+ the default of 1500. Use the ifconfig command to increase the MTU size.
+ For example:
+
+ ifconfig eth<x> mtu 9000 up
+
+ This setting is not saved across reboots.
+
+ Notes:
+
+ - The maximum MTU setting for Jumbo Frames is 9216. This value coincides
+ with the maximum Jumbo Frames size of 9234 bytes.
+
+ - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+ poor performance or loss of link.
+
+ - Some adapters limit Jumbo Frames sized packets to a maximum of
+ 4096 bytes and some adapters do not support Jumbo Frames.
+
+ - Jumbo Frames cannot be configured on an 82579-based Network device, if
+ MACSec is enabled on the system.
+
+ ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. We
+ strongly recommend downloading the latest version of ethtool at:
+
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+ NOTE: When validating enable/disable tests on some parts (82578, for example)
+ you need to add a few seconds between tests when working with ethtool.
+
+ Speed and Duplex
+ ----------------
+ Speed and Duplex are configured through the ethtool* utility. For
+ instructions, refer to the ethtool man page.
+
+ Enabling Wake on LAN* (WoL)
+ ---------------------------
+ WoL is configured through the ethtool* utility. For instructions on
+ enabling WoL with ethtool, refer to the ethtool man page.
+
+ WoL will be enabled on the system during the next shut down or reboot.
+ For this driver version, in order to enable WoL, the e1000e driver must be
+ loaded when shutting down or rebooting the system.
+
+ In most cases Wake On LAN is only supported on port A for multiple port
+ adapters. To verify if a port supports Wake on Lan run ethtool eth<X>.
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/eql.txt b/Documentation/networking/eql.txt
new file mode 100644
index 000000000..0f1550150
--- /dev/null
+++ b/Documentation/networking/eql.txt
@@ -0,0 +1,528 @@
+ EQL Driver: Serial IP Load Balancing HOWTO
+ Simon "Guru Aleph-Null" Janes, simon@ncm.com
+ v1.1, February 27, 1995
+
+ This is the manual for the EQL device driver. EQL is a software device
+ that lets you load-balance IP serial links (SLIP or uncompressed PPP)
+ to increase your bandwidth. It will not reduce your latency (i.e. ping
+ times) except in the case where you already have lots of traffic on
+ your link, in which it will help them out. This driver has been tested
+ with the 1.1.75 kernel, and is known to have patched cleanly with
+ 1.1.86. Some testing with 1.1.92 has been done with the v1.1 patch
+ which was only created to patch cleanly in the very latest kernel
+ source trees. (Yes, it worked fine.)
+
+ 1. Introduction
+
+ Which is worse? A huge fee for a 56K leased line or two phone lines?
+ It's probably the former. If you find yourself craving more bandwidth,
+ and have a ISP that is flexible, it is now possible to bind modems
+ together to work as one point-to-point link to increase your
+ bandwidth. All without having to have a special black box on either
+ side.
+
+
+ The eql driver has only been tested with the Livingston PortMaster-2e
+ terminal server. I do not know if other terminal servers support load-
+ balancing, but I do know that the PortMaster does it, and does it
+ almost as well as the eql driver seems to do it (-- Unfortunately, in
+ my testing so far, the Livingston PortMaster 2e's load-balancing is a
+ good 1 to 2 KB/s slower than the test machine working with a 28.8 Kbps
+ and 14.4 Kbps connection. However, I am not sure that it really is
+ the PortMaster, or if it's Linux's TCP drivers. I'm told that Linux's
+ TCP implementation is pretty fast though.--)
+
+
+ I suggest to ISPs out there that it would probably be fair to charge
+ a load-balancing client 75% of the cost of the second line and 50% of
+ the cost of the third line etc...
+
+
+ Hey, we can all dream you know...
+
+
+ 2. Kernel Configuration
+
+ Here I describe the general steps of getting a kernel up and working
+ with the eql driver. From patching, building, to installing.
+
+
+ 2.1. Patching The Kernel
+
+ If you do not have or cannot get a copy of the kernel with the eql
+ driver folded into it, get your copy of the driver from
+ ftp://slaughter.ncm.com/pub/Linux/LOAD_BALANCING/eql-1.1.tar.gz.
+ Unpack this archive someplace obvious like /usr/local/src/. It will
+ create the following files:
+
+
+
+ ______________________________________________________________________
+ -rw-r--r-- guru/ncm 198 Jan 19 18:53 1995 eql-1.1/NO-WARRANTY
+ -rw-r--r-- guru/ncm 30620 Feb 27 21:40 1995 eql-1.1/eql-1.1.patch
+ -rwxr-xr-x guru/ncm 16111 Jan 12 22:29 1995 eql-1.1/eql_enslave
+ -rw-r--r-- guru/ncm 2195 Jan 10 21:48 1995 eql-1.1/eql_enslave.c
+ ______________________________________________________________________
+
+ Unpack a recent kernel (something after 1.1.92) someplace convenient
+ like say /usr/src/linux-1.1.92.eql. Use symbolic links to point
+ /usr/src/linux to this development directory.
+
+
+ Apply the patch by running the commands:
+
+
+ ______________________________________________________________________
+ cd /usr/src
+ patch </usr/local/src/eql-1.1/eql-1.1.patch
+ ______________________________________________________________________
+
+
+
+
+
+ 2.2. Building The Kernel
+
+ After patching the kernel, run make config and configure the kernel
+ for your hardware.
+
+
+ After configuration, make and install according to your habit.
+
+
+ 3. Network Configuration
+
+ So far, I have only used the eql device with the DSLIP SLIP connection
+ manager by Matt Dillon (-- "The man who sold his soul to code so much
+ so quickly."--) . How you configure it for other "connection"
+ managers is up to you. Most other connection managers that I've seen
+ don't do a very good job when it comes to handling more than one
+ connection.
+
+
+ 3.1. /etc/rc.d/rc.inet1
+
+ In rc.inet1, ifconfig the eql device to the IP address you usually use
+ for your machine, and the MTU you prefer for your SLIP lines. One
+ could argue that MTU should be roughly half the usual size for two
+ modems, one-third for three, one-fourth for four, etc... But going
+ too far below 296 is probably overkill. Here is an example ifconfig
+ command that sets up the eql device:
+
+
+
+ ______________________________________________________________________
+ ifconfig eql 198.67.33.239 mtu 1006
+ ______________________________________________________________________
+
+
+
+
+
+ Once the eql device is up and running, add a static default route to
+ it in the routing table using the cool new route syntax that makes
+ life so much easier:
+
+
+
+ ______________________________________________________________________
+ route add default eql
+ ______________________________________________________________________
+
+
+ 3.2. Enslaving Devices By Hand
+
+ Enslaving devices by hand requires two utility programs: eql_enslave
+ and eql_emancipate (-- eql_emancipate hasn't been written because when
+ an enslaved device "dies", it is automatically taken out of the queue.
+ I haven't found a good reason to write it yet... other than for
+ completeness, but that isn't a good motivator is it?--)
+
+
+ The syntax for enslaving a device is "eql_enslave <master-name>
+ <slave-name> <estimated-bps>". Here are some example enslavings:
+
+
+
+ ______________________________________________________________________
+ eql_enslave eql sl0 28800
+ eql_enslave eql ppp0 14400
+ eql_enslave eql sl1 57600
+ ______________________________________________________________________
+
+
+
+
+
+ When you want to free a device from its life of slavery, you can
+ either down the device with ifconfig (eql will automatically bury the
+ dead slave and remove it from its queue) or use eql_emancipate to free
+ it. (-- Or just ifconfig it down, and the eql driver will take it out
+ for you.--)
+
+
+
+ ______________________________________________________________________
+ eql_emancipate eql sl0
+ eql_emancipate eql ppp0
+ eql_emancipate eql sl1
+ ______________________________________________________________________
+
+
+
+
+
+ 3.3. DSLIP Configuration for the eql Device
+
+ The general idea is to bring up and keep up as many SLIP connections
+ as you need, automatically.
+
+
+ 3.3.1. /etc/slip/runslip.conf
+
+ Here is an example runslip.conf:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ______________________________________________________________________
+ name sl-line-1
+ enabled
+ baud 38400
+ mtu 576
+ ducmd -e /etc/slip/dialout/cua2-288.xp -t 9
+ command eql_enslave eql $interface 28800
+ address 198.67.33.239
+ line /dev/cua2
+
+ name sl-line-2
+ enabled
+ baud 38400
+ mtu 576
+ ducmd -e /etc/slip/dialout/cua3-288.xp -t 9
+ command eql_enslave eql $interface 28800
+ address 198.67.33.239
+ line /dev/cua3
+ ______________________________________________________________________
+
+
+
+
+
+ 3.4. Using PPP and the eql Device
+
+ I have not yet done any load-balancing testing for PPP devices, mainly
+ because I don't have a PPP-connection manager like SLIP has with
+ DSLIP. I did find a good tip from LinuxNET:Billy for PPP performance:
+ make sure you have asyncmap set to something so that control
+ characters are not escaped.
+
+
+ I tried to fix up a PPP script/system for redialing lost PPP
+ connections for use with the eql driver the weekend of Feb 25-26 '95
+ (Hereafter known as the 8-hour PPP Hate Festival). Perhaps later this
+ year.
+
+
+ 4. About the Slave Scheduler Algorithm
+
+ The slave scheduler probably could be replaced with a dozen other
+ things and push traffic much faster. The formula in the current set
+ up of the driver was tuned to handle slaves with wildly different
+ bits-per-second "priorities".
+
+
+ All testing I have done was with two 28.8 V.FC modems, one connecting
+ at 28800 bps or slower, and the other connecting at 14400 bps all the
+ time.
+
+
+ One version of the scheduler was able to push 5.3 K/s through the
+ 28800 and 14400 connections, but when the priorities on the links were
+ very wide apart (57600 vs. 14400) the "faster" modem received all
+ traffic and the "slower" modem starved.
+
+
+ 5. Testers' Reports
+
+ Some people have experimented with the eql device with newer
+ kernels (than 1.1.75). I have since updated the driver to patch
+ cleanly in newer kernels because of the removal of the old "slave-
+ balancing" driver config option.
+
+
+ o icee from LinuxNET patched 1.1.86 without any rejects and was able
+ to boot the kernel and enslave a couple of ISDN PPP links.
+
+ 5.1. Randolph Bentson's Test Report
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ From bentson@grieg.seaslug.org Wed Feb 8 19:08:09 1995
+ Date: Tue, 7 Feb 95 22:57 PST
+ From: Randolph Bentson <bentson@grieg.seaslug.org>
+ To: guru@ncm.com
+ Subject: EQL driver tests
+
+
+ I have been checking out your eql driver. (Nice work, that!)
+ Although you may already done this performance testing, here
+ are some data I've discovered.
+
+ Randolph Bentson
+ bentson@grieg.seaslug.org
+
+ ---------------------------------------------------------
+
+
+ A pseudo-device driver, EQL, written by Simon Janes, can be used
+ to bundle multiple SLIP connections into what appears to be a
+ single connection. This allows one to improve dial-up network
+ connectivity gradually, without having to buy expensive DSU/CSU
+ hardware and services.
+
+ I have done some testing of this software, with two goals in
+ mind: first, to ensure it actually works as described and
+ second, as a method of exercising my device driver.
+
+ The following performance measurements were derived from a set
+ of SLIP connections run between two Linux systems (1.1.84) using
+ a 486DX2/66 with a Cyclom-8Ys and a 486SLC/40 with a Cyclom-16Y.
+ (Ports 0,1,2,3 were used. A later configuration will distribute
+ port selection across the different Cirrus chips on the boards.)
+ Once a link was established, I timed a binary ftp transfer of
+ 289284 bytes of data. If there were no overhead (packet headers,
+ inter-character and inter-packet delays, etc.) the transfers
+ would take the following times:
+
+ bits/sec seconds
+ 345600 8.3
+ 234600 12.3
+ 172800 16.7
+ 153600 18.8
+ 76800 37.6
+ 57600 50.2
+ 38400 75.3
+ 28800 100.4
+ 19200 150.6
+ 9600 301.3
+
+ A single line running at the lower speeds and with large packets
+ comes to within 2% of this. Performance is limited for the higher
+ speeds (as predicted by the Cirrus databook) to an aggregate of
+ about 160 kbits/sec. The next round of testing will distribute
+ the load across two or more Cirrus chips.
+
+ The good news is that one gets nearly the full advantage of the
+ second, third, and fourth line's bandwidth. (The bad news is
+ that the connection establishment seemed fragile for the higher
+ speeds. Once established, the connection seemed robust enough.)
+
+ #lines speed mtu seconds theory actual %of
+ kbit/sec duration speed speed max
+ 3 115200 900 _ 345600
+ 3 115200 400 18.1 345600 159825 46
+ 2 115200 900 _ 230400
+ 2 115200 600 18.1 230400 159825 69
+ 2 115200 400 19.3 230400 149888 65
+ 4 57600 900 _ 234600
+ 4 57600 600 _ 234600
+ 4 57600 400 _ 234600
+ 3 57600 600 20.9 172800 138413 80
+ 3 57600 900 21.2 172800 136455 78
+ 3 115200 600 21.7 345600 133311 38
+ 3 57600 400 22.5 172800 128571 74
+ 4 38400 900 25.2 153600 114795 74
+ 4 38400 600 26.4 153600 109577 71
+ 4 38400 400 27.3 153600 105965 68
+ 2 57600 900 29.1 115200 99410.3 86
+ 1 115200 900 30.7 115200 94229.3 81
+ 2 57600 600 30.2 115200 95789.4 83
+ 3 38400 900 30.3 115200 95473.3 82
+ 3 38400 600 31.2 115200 92719.2 80
+ 1 115200 600 31.3 115200 92423 80
+ 2 57600 400 32.3 115200 89561.6 77
+ 1 115200 400 32.8 115200 88196.3 76
+ 3 38400 400 33.5 115200 86353.4 74
+ 2 38400 900 43.7 76800 66197.7 86
+ 2 38400 600 44 76800 65746.4 85
+ 2 38400 400 47.2 76800 61289 79
+ 4 19200 900 50.8 76800 56945.7 74
+ 4 19200 400 53.2 76800 54376.7 70
+ 4 19200 600 53.7 76800 53870.4 70
+ 1 57600 900 54.6 57600 52982.4 91
+ 1 57600 600 56.2 57600 51474 89
+ 3 19200 900 60.5 57600 47815.5 83
+ 1 57600 400 60.2 57600 48053.8 83
+ 3 19200 600 62 57600 46658.7 81
+ 3 19200 400 64.7 57600 44711.6 77
+ 1 38400 900 79.4 38400 36433.8 94
+ 1 38400 600 82.4 38400 35107.3 91
+ 2 19200 900 84.4 38400 34275.4 89
+ 1 38400 400 86.8 38400 33327.6 86
+ 2 19200 600 87.6 38400 33023.3 85
+ 2 19200 400 91.2 38400 31719.7 82
+ 4 9600 900 94.7 38400 30547.4 79
+ 4 9600 400 106 38400 27290.9 71
+ 4 9600 600 110 38400 26298.5 68
+ 3 9600 900 118 28800 24515.6 85
+ 3 9600 600 120 28800 24107 83
+ 3 9600 400 131 28800 22082.7 76
+ 1 19200 900 155 19200 18663.5 97
+ 1 19200 600 161 19200 17968 93
+ 1 19200 400 170 19200 17016.7 88
+ 2 9600 600 176 19200 16436.6 85
+ 2 9600 900 180 19200 16071.3 83
+ 2 9600 400 181 19200 15982.5 83
+ 1 9600 900 305 9600 9484.72 98
+ 1 9600 600 314 9600 9212.87 95
+ 1 9600 400 332 9600 8713.37 90
+
+
+
+
+
+ 5.2. Anthony Healy's Report
+
+
+
+
+
+
+
+ Date: Mon, 13 Feb 1995 16:17:29 +1100 (EST)
+ From: Antony Healey <ahealey@st.nepean.uws.edu.au>
+ To: Simon Janes <guru@ncm.com>
+ Subject: Re: Load Balancing
+
+ Hi Simon,
+ I've installed your patch and it works great. I have trialed
+ it over twin SL/IP lines, just over null modems, but I was
+ able to data at over 48Kb/s [ISDN link -Simon]. I managed a
+ transfer of up to 7.5 Kbyte/s on one go, but averaged around
+ 6.4 Kbyte/s, which I think is pretty cool. :)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Documentation/networking/fib_trie.txt b/Documentation/networking/fib_trie.txt
new file mode 100644
index 000000000..fe7193885
--- /dev/null
+++ b/Documentation/networking/fib_trie.txt
@@ -0,0 +1,145 @@
+ LC-trie implementation notes.
+
+Node types
+----------
+leaf
+ An end node with data. This has a copy of the relevant key, along
+ with 'hlist' with routing table entries sorted by prefix length.
+ See struct leaf and struct leaf_info.
+
+trie node or tnode
+ An internal node, holding an array of child (leaf or tnode) pointers,
+ indexed through a subset of the key. See Level Compression.
+
+A few concepts explained
+------------------------
+Bits (tnode)
+ The number of bits in the key segment used for indexing into the
+ child array - the "child index". See Level Compression.
+
+Pos (tnode)
+ The position (in the key) of the key segment used for indexing into
+ the child array. See Path Compression.
+
+Path Compression / skipped bits
+ Any given tnode is linked to from the child array of its parent, using
+ a segment of the key specified by the parent's "pos" and "bits"
+ In certain cases, this tnode's own "pos" will not be immediately
+ adjacent to the parent (pos+bits), but there will be some bits
+ in the key skipped over because they represent a single path with no
+ deviations. These "skipped bits" constitute Path Compression.
+ Note that the search algorithm will simply skip over these bits when
+ searching, making it necessary to save the keys in the leaves to
+ verify that they actually do match the key we are searching for.
+
+Level Compression / child arrays
+ the trie is kept level balanced moving, under certain conditions, the
+ children of a full child (see "full_children") up one level, so that
+ instead of a pure binary tree, each internal node ("tnode") may
+ contain an arbitrarily large array of links to several children.
+ Conversely, a tnode with a mostly empty child array (see empty_children)
+ may be "halved", having some of its children moved downwards one level,
+ in order to avoid ever-increasing child arrays.
+
+empty_children
+ the number of positions in the child array of a given tnode that are
+ NULL.
+
+full_children
+ the number of children of a given tnode that aren't path compressed.
+ (in other words, they aren't NULL or leaves and their "pos" is equal
+ to this tnode's "pos"+"bits").
+
+ (The word "full" here is used more in the sense of "complete" than
+ as the opposite of "empty", which might be a tad confusing.)
+
+Comments
+---------
+
+We have tried to keep the structure of the code as close to fib_hash as
+possible to allow verification and help up reviewing.
+
+fib_find_node()
+ A good start for understanding this code. This function implements a
+ straightforward trie lookup.
+
+fib_insert_node()
+ Inserts a new leaf node in the trie. This is bit more complicated than
+ fib_find_node(). Inserting a new node means we might have to run the
+ level compression algorithm on part of the trie.
+
+trie_leaf_remove()
+ Looks up a key, deletes it and runs the level compression algorithm.
+
+trie_rebalance()
+ The key function for the dynamic trie after any change in the trie
+ it is run to optimize and reorganize. It will walk the trie upwards
+ towards the root from a given tnode, doing a resize() at each step
+ to implement level compression.
+
+resize()
+ Analyzes a tnode and optimizes the child array size by either inflating
+ or shrinking it repeatedly until it fulfills the criteria for optimal
+ level compression. This part follows the original paper pretty closely
+ and there may be some room for experimentation here.
+
+inflate()
+ Doubles the size of the child array within a tnode. Used by resize().
+
+halve()
+ Halves the size of the child array within a tnode - the inverse of
+ inflate(). Used by resize();
+
+fn_trie_insert(), fn_trie_delete(), fn_trie_select_default()
+ The route manipulation functions. Should conform pretty closely to the
+ corresponding functions in fib_hash.
+
+fn_trie_flush()
+ This walks the full trie (using nextleaf()) and searches for empty
+ leaves which have to be removed.
+
+fn_trie_dump()
+ Dumps the routing table ordered by prefix length. This is somewhat
+ slower than the corresponding fib_hash function, as we have to walk the
+ entire trie for each prefix length. In comparison, fib_hash is organized
+ as one "zone"/hash per prefix length.
+
+Locking
+-------
+
+fib_lock is used for an RW-lock in the same way that this is done in fib_hash.
+However, the functions are somewhat separated for other possible locking
+scenarios. It might conceivably be possible to run trie_rebalance via RCU
+to avoid read_lock in the fn_trie_lookup() function.
+
+Main lookup mechanism
+---------------------
+fn_trie_lookup() is the main lookup function.
+
+The lookup is in its simplest form just like fib_find_node(). We descend the
+trie, key segment by key segment, until we find a leaf. check_leaf() does
+the fib_semantic_match in the leaf's sorted prefix hlist.
+
+If we find a match, we are done.
+
+If we don't find a match, we enter prefix matching mode. The prefix length,
+starting out at the same as the key length, is reduced one step at a time,
+and we backtrack upwards through the trie trying to find a longest matching
+prefix. The goal is always to reach a leaf and get a positive result from the
+fib_semantic_match mechanism.
+
+Inside each tnode, the search for longest matching prefix consists of searching
+through the child array, chopping off (zeroing) the least significant "1" of
+the child index until we find a match or the child index consists of nothing but
+zeros.
+
+At this point we backtrack (t->stats.backtrack++) up the trie, continuing to
+chop off part of the key in order to find the longest matching prefix.
+
+At this point we will repeatedly descend subtries to look for a match, and there
+are some optimizations available that can provide us with "shortcuts" to avoid
+descending into dead ends. Look for "HL_OPTIMIZE" sections in the code.
+
+To alleviate any doubts about the correctness of the route selection process,
+a new netlink operation has been added. Look for NETLINK_FIB_LOOKUP, which
+gives userland access to fib_lookup().
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
new file mode 100644
index 000000000..135581f01
--- /dev/null
+++ b/Documentation/networking/filter.txt
@@ -0,0 +1,1297 @@
+Linux Socket Filtering aka Berkeley Packet Filter (BPF)
+=======================================================
+
+Introduction
+------------
+
+Linux Socket Filtering (LSF) is derived from the Berkeley Packet Filter.
+Though there are some distinct differences between the BSD and Linux
+Kernel filtering, but when we speak of BPF or LSF in Linux context, we
+mean the very same mechanism of filtering in the Linux kernel.
+
+BPF allows a user-space program to attach a filter onto any socket and
+allow or disallow certain types of data to come through the socket. LSF
+follows exactly the same filter code structure as BSD's BPF, so referring
+to the BSD bpf.4 manpage is very helpful in creating filters.
+
+On Linux, BPF is much simpler than on BSD. One does not have to worry
+about devices or anything like that. You simply create your filter code,
+send it to the kernel via the SO_ATTACH_FILTER option and if your filter
+code passes the kernel check on it, you then immediately begin filtering
+data on that socket.
+
+You can also detach filters from your socket via the SO_DETACH_FILTER
+option. This will probably not be used much since when you close a socket
+that has a filter on it the filter is automagically removed. The other
+less common case may be adding a different filter on the same socket where
+you had another filter that is still running: the kernel takes care of
+removing the old one and placing your new one in its place, assuming your
+filter has passed the checks, otherwise if it fails the old filter will
+remain on that socket.
+
+SO_LOCK_FILTER option allows to lock the filter attached to a socket. Once
+set, a filter cannot be removed or changed. This allows one process to
+setup a socket, attach a filter, lock it then drop privileges and be
+assured that the filter will be kept until the socket is closed.
+
+The biggest user of this construct might be libpcap. Issuing a high-level
+filter command like `tcpdump -i em1 port 22` passes through the libpcap
+internal compiler that generates a structure that can eventually be loaded
+via SO_ATTACH_FILTER to the kernel. `tcpdump -i em1 port 22 -ddd`
+displays what is being placed into this structure.
+
+Although we were only speaking about sockets here, BPF in Linux is used
+in many more places. There's xt_bpf for netfilter, cls_bpf in the kernel
+qdisc layer, SECCOMP-BPF (SECure COMPuting [1]), and lots of other places
+such as team driver, PTP code, etc where BPF is being used.
+
+ [1] Documentation/prctl/seccomp_filter.txt
+
+Original BPF paper:
+
+Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
+architecture for user-level packet capture. In Proceedings of the
+USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
+Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
+CA, USA, 2-2. [http://www.tcpdump.org/papers/bpf-usenix93.pdf]
+
+Structure
+---------
+
+User space applications include <linux/filter.h> which contains the
+following relevant structures:
+
+struct sock_filter { /* Filter block */
+ __u16 code; /* Actual filter code */
+ __u8 jt; /* Jump true */
+ __u8 jf; /* Jump false */
+ __u32 k; /* Generic multiuse field */
+};
+
+Such a structure is assembled as an array of 4-tuples, that contains
+a code, jt, jf and k value. jt and jf are jump offsets and k a generic
+value to be used for a provided code.
+
+struct sock_fprog { /* Required for SO_ATTACH_FILTER. */
+ unsigned short len; /* Number of filter blocks */
+ struct sock_filter __user *filter;
+};
+
+For socket filtering, a pointer to this structure (as shown in
+follow-up example) is being passed to the kernel through setsockopt(2).
+
+Example
+-------
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+/* ... */
+
+/* From the example above: tcpdump -i em1 port 22 -dd */
+struct sock_filter code[] = {
+ { 0x28, 0, 0, 0x0000000c },
+ { 0x15, 0, 8, 0x000086dd },
+ { 0x30, 0, 0, 0x00000014 },
+ { 0x15, 2, 0, 0x00000084 },
+ { 0x15, 1, 0, 0x00000006 },
+ { 0x15, 0, 17, 0x00000011 },
+ { 0x28, 0, 0, 0x00000036 },
+ { 0x15, 14, 0, 0x00000016 },
+ { 0x28, 0, 0, 0x00000038 },
+ { 0x15, 12, 13, 0x00000016 },
+ { 0x15, 0, 12, 0x00000800 },
+ { 0x30, 0, 0, 0x00000017 },
+ { 0x15, 2, 0, 0x00000084 },
+ { 0x15, 1, 0, 0x00000006 },
+ { 0x15, 0, 8, 0x00000011 },
+ { 0x28, 0, 0, 0x00000014 },
+ { 0x45, 6, 0, 0x00001fff },
+ { 0xb1, 0, 0, 0x0000000e },
+ { 0x48, 0, 0, 0x0000000e },
+ { 0x15, 2, 0, 0x00000016 },
+ { 0x48, 0, 0, 0x00000010 },
+ { 0x15, 0, 1, 0x00000016 },
+ { 0x06, 0, 0, 0x0000ffff },
+ { 0x06, 0, 0, 0x00000000 },
+};
+
+struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(code),
+ .filter = code,
+};
+
+sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+if (sock < 0)
+ /* ... bail out ... */
+
+ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
+if (ret < 0)
+ /* ... bail out ... */
+
+/* ... */
+close(sock);
+
+The above example code attaches a socket filter for a PF_PACKET socket
+in order to let all IPv4/IPv6 packets with port 22 pass. The rest will
+be dropped for this socket.
+
+The setsockopt(2) call to SO_DETACH_FILTER doesn't need any arguments
+and SO_LOCK_FILTER for preventing the filter to be detached, takes an
+integer value with 0 or 1.
+
+Note that socket filters are not restricted to PF_PACKET sockets only,
+but can also be used on other socket families.
+
+Summary of system calls:
+
+ * setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_FILTER, &val, sizeof(val));
+ * setsockopt(sockfd, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val));
+ * setsockopt(sockfd, SOL_SOCKET, SO_LOCK_FILTER, &val, sizeof(val));
+
+Normally, most use cases for socket filtering on packet sockets will be
+covered by libpcap in high-level syntax, so as an application developer
+you should stick to that. libpcap wraps its own layer around all that.
+
+Unless i) using/linking to libpcap is not an option, ii) the required BPF
+filters use Linux extensions that are not supported by libpcap's compiler,
+iii) a filter might be more complex and not cleanly implementable with
+libpcap's compiler, or iv) particular filter codes should be optimized
+differently than libpcap's internal compiler does; then in such cases
+writing such a filter "by hand" can be of an alternative. For example,
+xt_bpf and cls_bpf users might have requirements that could result in
+more complex filter code, or one that cannot be expressed with libpcap
+(e.g. different return codes for various code paths). Moreover, BPF JIT
+implementors may wish to manually write test cases and thus need low-level
+access to BPF code as well.
+
+BPF engine and instruction set
+------------------------------
+
+Under tools/net/ there's a small helper tool called bpf_asm which can
+be used to write low-level filters for example scenarios mentioned in the
+previous section. Asm-like syntax mentioned here has been implemented in
+bpf_asm and will be used for further explanations (instead of dealing with
+less readable opcodes directly, principles are the same). The syntax is
+closely modelled after Steven McCanne's and Van Jacobson's BPF paper.
+
+The BPF architecture consists of the following basic elements:
+
+ Element Description
+
+ A 32 bit wide accumulator
+ X 32 bit wide X register
+ M[] 16 x 32 bit wide misc registers aka "scratch memory
+ store", addressable from 0 to 15
+
+A program, that is translated by bpf_asm into "opcodes" is an array that
+consists of the following elements (as already mentioned):
+
+ op:16, jt:8, jf:8, k:32
+
+The element op is a 16 bit wide opcode that has a particular instruction
+encoded. jt and jf are two 8 bit wide jump targets, one for condition
+"jump if true", the other one "jump if false". Eventually, element k
+contains a miscellaneous argument that can be interpreted in different
+ways depending on the given instruction in op.
+
+The instruction set consists of load, store, branch, alu, miscellaneous
+and return instructions that are also represented in bpf_asm syntax. This
+table lists all bpf_asm instructions available resp. what their underlying
+opcodes as defined in linux/filter.h stand for:
+
+ Instruction Addressing mode Description
+
+ ld 1, 2, 3, 4, 10 Load word into A
+ ldi 4 Load word into A
+ ldh 1, 2 Load half-word into A
+ ldb 1, 2 Load byte into A
+ ldx 3, 4, 5, 10 Load word into X
+ ldxi 4 Load word into X
+ ldxb 5 Load byte into X
+
+ st 3 Store A into M[]
+ stx 3 Store X into M[]
+
+ jmp 6 Jump to label
+ ja 6 Jump to label
+ jeq 7, 8 Jump on k == A
+ jneq 8 Jump on k != A
+ jne 8 Jump on k != A
+ jlt 8 Jump on k < A
+ jle 8 Jump on k <= A
+ jgt 7, 8 Jump on k > A
+ jge 7, 8 Jump on k >= A
+ jset 7, 8 Jump on k & A
+
+ add 0, 4 A + <x>
+ sub 0, 4 A - <x>
+ mul 0, 4 A * <x>
+ div 0, 4 A / <x>
+ mod 0, 4 A % <x>
+ neg 0, 4 !A
+ and 0, 4 A & <x>
+ or 0, 4 A | <x>
+ xor 0, 4 A ^ <x>
+ lsh 0, 4 A << <x>
+ rsh 0, 4 A >> <x>
+
+ tax Copy A into X
+ txa Copy X into A
+
+ ret 4, 9 Return
+
+The next table shows addressing formats from the 2nd column:
+
+ Addressing mode Syntax Description
+
+ 0 x/%x Register X
+ 1 [k] BHW at byte offset k in the packet
+ 2 [x + k] BHW at the offset X + k in the packet
+ 3 M[k] Word at offset k in M[]
+ 4 #k Literal value stored in k
+ 5 4*([k]&0xf) Lower nibble * 4 at byte offset k in the packet
+ 6 L Jump label L
+ 7 #k,Lt,Lf Jump to Lt if true, otherwise jump to Lf
+ 8 #k,Lt Jump to Lt if predicate is true
+ 9 a/%a Accumulator A
+ 10 extension BPF extension
+
+The Linux kernel also has a couple of BPF extensions that are used along
+with the class of load instructions by "overloading" the k argument with
+a negative offset + a particular extension offset. The result of such BPF
+extensions are loaded into A.
+
+Possible BPF extensions are shown in the following table:
+
+ Extension Description
+
+ len skb->len
+ proto skb->protocol
+ type skb->pkt_type
+ poff Payload start offset
+ ifidx skb->dev->ifindex
+ nla Netlink attribute of type X with offset A
+ nlan Nested Netlink attribute of type X with offset A
+ mark skb->mark
+ queue skb->queue_mapping
+ hatype skb->dev->type
+ rxhash skb->hash
+ cpu raw_smp_processor_id()
+ vlan_tci skb_vlan_tag_get(skb)
+ vlan_avail skb_vlan_tag_present(skb)
+ vlan_tpid skb->vlan_proto
+ rand prandom_u32()
+
+These extensions can also be prefixed with '#'.
+Examples for low-level BPF:
+
+** ARP packets:
+
+ ldh [12]
+ jne #0x806, drop
+ ret #-1
+ drop: ret #0
+
+** IPv4 TCP packets:
+
+ ldh [12]
+ jne #0x800, drop
+ ldb [23]
+ jneq #6, drop
+ ret #-1
+ drop: ret #0
+
+** (Accelerated) VLAN w/ id 10:
+
+ ld vlan_tci
+ jneq #10, drop
+ ret #-1
+ drop: ret #0
+
+** icmp random packet sampling, 1 in 4
+ ldh [12]
+ jne #0x800, drop
+ ldb [23]
+ jneq #1, drop
+ # get a random uint32 number
+ ld rand
+ mod #4
+ jneq #1, drop
+ ret #-1
+ drop: ret #0
+
+** SECCOMP filter example:
+
+ ld [4] /* offsetof(struct seccomp_data, arch) */
+ jne #0xc000003e, bad /* AUDIT_ARCH_X86_64 */
+ ld [0] /* offsetof(struct seccomp_data, nr) */
+ jeq #15, good /* __NR_rt_sigreturn */
+ jeq #231, good /* __NR_exit_group */
+ jeq #60, good /* __NR_exit */
+ jeq #0, good /* __NR_read */
+ jeq #1, good /* __NR_write */
+ jeq #5, good /* __NR_fstat */
+ jeq #9, good /* __NR_mmap */
+ jeq #14, good /* __NR_rt_sigprocmask */
+ jeq #13, good /* __NR_rt_sigaction */
+ jeq #35, good /* __NR_nanosleep */
+ bad: ret #0 /* SECCOMP_RET_KILL */
+ good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */
+
+The above example code can be placed into a file (here called "foo"), and
+then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
+and cls_bpf understands and can directly be loaded with. Example with above
+ARP code:
+
+$ ./bpf_asm foo
+4,40 0 0 12,21 0 1 2054,6 0 0 4294967295,6 0 0 0,
+
+In copy and paste C-like output:
+
+$ ./bpf_asm -c foo
+{ 0x28, 0, 0, 0x0000000c },
+{ 0x15, 0, 1, 0x00000806 },
+{ 0x06, 0, 0, 0xffffffff },
+{ 0x06, 0, 0, 0000000000 },
+
+In particular, as usage with xt_bpf or cls_bpf can result in more complex BPF
+filters that might not be obvious at first, it's good to test filters before
+attaching to a live system. For that purpose, there's a small tool called
+bpf_dbg under tools/net/ in the kernel source directory. This debugger allows
+for testing BPF filters against given pcap files, single stepping through the
+BPF code on the pcap's packets and to do BPF machine register dumps.
+
+Starting bpf_dbg is trivial and just requires issuing:
+
+# ./bpf_dbg
+
+In case input and output do not equal stdin/stdout, bpf_dbg takes an
+alternative stdin source as a first argument, and an alternative stdout
+sink as a second one, e.g. `./bpf_dbg test_in.txt test_out.txt`.
+
+Other than that, a particular libreadline configuration can be set via
+file "~/.bpf_dbg_init" and the command history is stored in the file
+"~/.bpf_dbg_history".
+
+Interaction in bpf_dbg happens through a shell that also has auto-completion
+support (follow-up example commands starting with '>' denote bpf_dbg shell).
+The usual workflow would be to ...
+
+> load bpf 6,40 0 0 12,21 0 3 2048,48 0 0 23,21 0 1 1,6 0 0 65535,6 0 0 0
+ Loads a BPF filter from standard output of bpf_asm, or transformed via
+ e.g. `tcpdump -iem1 -ddd port 22 | tr '\n' ','`. Note that for JIT
+ debugging (next section), this command creates a temporary socket and
+ loads the BPF code into the kernel. Thus, this will also be useful for
+ JIT developers.
+
+> load pcap foo.pcap
+ Loads standard tcpdump pcap file.
+
+> run [<n>]
+bpf passes:1 fails:9
+ Runs through all packets from a pcap to account how many passes and fails
+ the filter will generate. A limit of packets to traverse can be given.
+
+> disassemble
+l0: ldh [12]
+l1: jeq #0x800, l2, l5
+l2: ldb [23]
+l3: jeq #0x1, l4, l5
+l4: ret #0xffff
+l5: ret #0
+ Prints out BPF code disassembly.
+
+> dump
+/* { op, jt, jf, k }, */
+{ 0x28, 0, 0, 0x0000000c },
+{ 0x15, 0, 3, 0x00000800 },
+{ 0x30, 0, 0, 0x00000017 },
+{ 0x15, 0, 1, 0x00000001 },
+{ 0x06, 0, 0, 0x0000ffff },
+{ 0x06, 0, 0, 0000000000 },
+ Prints out C-style BPF code dump.
+
+> breakpoint 0
+breakpoint at: l0: ldh [12]
+> breakpoint 1
+breakpoint at: l1: jeq #0x800, l2, l5
+ ...
+ Sets breakpoints at particular BPF instructions. Issuing a `run` command
+ will walk through the pcap file continuing from the current packet and
+ break when a breakpoint is being hit (another `run` will continue from
+ the currently active breakpoint executing next instructions):
+
+ > run
+ -- register dump --
+ pc: [0] <-- program counter
+ code: [40] jt[0] jf[0] k[12] <-- plain BPF code of current instruction
+ curr: l0: ldh [12] <-- disassembly of current instruction
+ A: [00000000][0] <-- content of A (hex, decimal)
+ X: [00000000][0] <-- content of X (hex, decimal)
+ M[0,15]: [00000000][0] <-- folded content of M (hex, decimal)
+ -- packet dump -- <-- Current packet from pcap (hex)
+ len: 42
+ 0: 00 19 cb 55 55 a4 00 14 a4 43 78 69 08 06 00 01
+ 16: 08 00 06 04 00 01 00 14 a4 43 78 69 0a 3b 01 26
+ 32: 00 00 00 00 00 00 0a 3b 01 01
+ (breakpoint)
+ >
+
+> breakpoint
+breakpoints: 0 1
+ Prints currently set breakpoints.
+
+> step [-<n>, +<n>]
+ Performs single stepping through the BPF program from the current pc
+ offset. Thus, on each step invocation, above register dump is issued.
+ This can go forwards and backwards in time, a plain `step` will break
+ on the next BPF instruction, thus +1. (No `run` needs to be issued here.)
+
+> select <n>
+ Selects a given packet from the pcap file to continue from. Thus, on
+ the next `run` or `step`, the BPF program is being evaluated against
+ the user pre-selected packet. Numbering starts just as in Wireshark
+ with index 1.
+
+> quit
+#
+ Exits bpf_dbg.
+
+JIT compiler
+------------
+
+The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC, PowerPC,
+ARM, ARM64, MIPS and s390 and can be enabled through CONFIG_BPF_JIT. The JIT
+compiler is transparently invoked for each attached filter from user space
+or for internal kernel users if it has been previously enabled by root:
+
+ echo 1 > /proc/sys/net/core/bpf_jit_enable
+
+For JIT developers, doing audits etc, each compile run can output the generated
+opcode image into the kernel log via:
+
+ echo 2 > /proc/sys/net/core/bpf_jit_enable
+
+Example output from dmesg:
+
+[ 3389.935842] flen=6 proglen=70 pass=3 image=ffffffffa0069c8f
+[ 3389.935847] JIT code: 00000000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 68
+[ 3389.935849] JIT code: 00000010: 44 2b 4f 6c 4c 8b 87 d8 00 00 00 be 0c 00 00 00
+[ 3389.935850] JIT code: 00000020: e8 1d 94 ff e0 3d 00 08 00 00 75 16 be 17 00 00
+[ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
+[ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3
+
+In the kernel source tree under tools/net/, there's bpf_jit_disasm for
+generating disassembly out of the kernel log's hexdump:
+
+# ./bpf_jit_disasm
+70 bytes emitted from JIT compiler (pass:3, flen:6)
+ffffffffa0069c8f + <x>:
+ 0: push %rbp
+ 1: mov %rsp,%rbp
+ 4: sub $0x60,%rsp
+ 8: mov %rbx,-0x8(%rbp)
+ c: mov 0x68(%rdi),%r9d
+ 10: sub 0x6c(%rdi),%r9d
+ 14: mov 0xd8(%rdi),%r8
+ 1b: mov $0xc,%esi
+ 20: callq 0xffffffffe0ff9442
+ 25: cmp $0x800,%eax
+ 2a: jne 0x0000000000000042
+ 2c: mov $0x17,%esi
+ 31: callq 0xffffffffe0ff945e
+ 36: cmp $0x1,%eax
+ 39: jne 0x0000000000000042
+ 3b: mov $0xffff,%eax
+ 40: jmp 0x0000000000000044
+ 42: xor %eax,%eax
+ 44: leaveq
+ 45: retq
+
+Issuing option `-o` will "annotate" opcodes to resulting assembler
+instructions, which can be very useful for JIT developers:
+
+# ./bpf_jit_disasm -o
+70 bytes emitted from JIT compiler (pass:3, flen:6)
+ffffffffa0069c8f + <x>:
+ 0: push %rbp
+ 55
+ 1: mov %rsp,%rbp
+ 48 89 e5
+ 4: sub $0x60,%rsp
+ 48 83 ec 60
+ 8: mov %rbx,-0x8(%rbp)
+ 48 89 5d f8
+ c: mov 0x68(%rdi),%r9d
+ 44 8b 4f 68
+ 10: sub 0x6c(%rdi),%r9d
+ 44 2b 4f 6c
+ 14: mov 0xd8(%rdi),%r8
+ 4c 8b 87 d8 00 00 00
+ 1b: mov $0xc,%esi
+ be 0c 00 00 00
+ 20: callq 0xffffffffe0ff9442
+ e8 1d 94 ff e0
+ 25: cmp $0x800,%eax
+ 3d 00 08 00 00
+ 2a: jne 0x0000000000000042
+ 75 16
+ 2c: mov $0x17,%esi
+ be 17 00 00 00
+ 31: callq 0xffffffffe0ff945e
+ e8 28 94 ff e0
+ 36: cmp $0x1,%eax
+ 83 f8 01
+ 39: jne 0x0000000000000042
+ 75 07
+ 3b: mov $0xffff,%eax
+ b8 ff ff 00 00
+ 40: jmp 0x0000000000000044
+ eb 02
+ 42: xor %eax,%eax
+ 31 c0
+ 44: leaveq
+ c9
+ 45: retq
+ c3
+
+For BPF JIT developers, bpf_jit_disasm, bpf_asm and bpf_dbg provides a useful
+toolchain for developing and testing the kernel's JIT compiler.
+
+BPF kernel internals
+--------------------
+Internally, for the kernel interpreter, a different instruction set
+format with similar underlying principles from BPF described in previous
+paragraphs is being used. However, the instruction set format is modelled
+closer to the underlying architecture to mimic native instruction sets, so
+that a better performance can be achieved (more details later). This new
+ISA is called 'eBPF' or 'internal BPF' interchangeably. (Note: eBPF which
+originates from [e]xtended BPF is not the same as BPF extensions! While
+eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading'
+of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.)
+
+It is designed to be JITed with one to one mapping, which can also open up
+the possibility for GCC/LLVM compilers to generate optimized eBPF code through
+an eBPF backend that performs almost as fast as natively compiled code.
+
+The new instruction set was originally designed with the possible goal in
+mind to write programs in "restricted C" and compile into eBPF with a optional
+GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with
+minimal performance overhead over two steps, that is, C -> eBPF -> native code.
+
+Currently, the new format is being used for running user BPF programs, which
+includes seccomp BPF, classic socket filters, cls_bpf traffic classifier,
+team driver's classifier for its load-balancing mode, netfilter's xt_bpf
+extension, PTP dissector/classifier, and much more. They are all internally
+converted by the kernel into the new instruction set representation and run
+in the eBPF interpreter. For in-kernel handlers, this all works transparently
+by using bpf_prog_create() for setting up the filter, resp.
+bpf_prog_destroy() for destroying it. The macro
+BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+code to run the filter. 'filter' is a pointer to struct bpf_prog that we
+got from bpf_prog_create(), and 'ctx' the given context (e.g.
+skb pointer). All constraints and restrictions from bpf_check_classic() apply
+before a conversion to the new layout is being done behind the scenes!
+
+Currently, the classic BPF format is being used for JITing on most of the
+architectures. Only x86-64 performs JIT compilation from eBPF instruction set,
+however, future work will migrate other JIT compilers as well, so that they
+will profit from the very same benefits.
+
+Some core changes of the new internal format:
+
+- Number of registers increase from 2 to 10:
+
+ The old format had two registers A and X, and a hidden frame pointer. The
+ new layout extends this to be 10 internal registers and a read-only frame
+ pointer. Since 64-bit CPUs are passing arguments to functions via registers
+ the number of args from eBPF program to in-kernel function is restricted
+ to 5 and one register is used to accept return value from an in-kernel
+ function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
+ sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
+ registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
+
+ Therefore, eBPF calling convention is defined as:
+
+ * R0 - return value from in-kernel function, and exit value for eBPF program
+ * R1 - R5 - arguments from eBPF program to in-kernel function
+ * R6 - R9 - callee saved registers that in-kernel function will preserve
+ * R10 - read-only frame pointer to access stack
+
+ Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
+ etc, and eBPF calling convention maps directly to ABIs used by the kernel on
+ 64-bit architectures.
+
+ On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
+ and may let more complex programs to be interpreted.
+
+ R0 - R5 are scratch registers and eBPF program needs spill/fill them if
+ necessary across calls. Note that there is only one eBPF program (== one
+ eBPF main routine) and it cannot call other eBPF functions, it can only
+ call predefined in-kernel functions, though.
+
+- Register width increases from 32-bit to 64-bit:
+
+ Still, the semantics of the original 32-bit ALU operations are preserved
+ via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
+ subregisters that zero-extend into 64-bit if they are being written to.
+ That behavior maps directly to x86_64 and arm64 subregister definition, but
+ makes other JITs more difficult.
+
+ 32-bit architectures run 64-bit internal BPF programs via interpreter.
+ Their JITs may convert BPF programs that only use 32-bit subregisters into
+ native instruction set and let the rest being interpreted.
+
+ Operation is 64-bit, because on 64-bit architectures, pointers are also
+ 64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
+ so 32-bit eBPF registers would otherwise require to define register-pair
+ ABI, thus, there won't be able to use a direct eBPF register to HW register
+ mapping and JIT would need to do combine/split/move operations for every
+ register in and out of the function, which is complex, bug prone and slow.
+ Another reason is the use of atomic 64-bit counters.
+
+- Conditional jt/jf targets replaced with jt/fall-through:
+
+ While the original design has constructs such as "if (cond) jump_true;
+ else jump_false;", they are being replaced into alternative constructs like
+ "if (cond) jump_true; /* else fall-through */".
+
+- Introduces bpf_call insn and register passing convention for zero overhead
+ calls from/to other kernel functions:
+
+ Before an in-kernel function call, the internal BPF program needs to
+ place function arguments into R1 to R5 registers to satisfy calling
+ convention, then the interpreter will take them from registers and pass
+ to in-kernel function. If R1 - R5 registers are mapped to CPU registers
+ that are used for argument passing on given architecture, the JIT compiler
+ doesn't need to emit extra moves. Function arguments will be in the correct
+ registers and BPF_CALL instruction will be JITed as single 'call' HW
+ instruction. This calling convention was picked to cover common call
+ situations without performance penalty.
+
+ After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
+ a return value of the function. Since R6 - R9 are callee saved, their state
+ is preserved across the call.
+
+ For example, consider three C functions:
+
+ u64 f1() { return (*_f2)(1); }
+ u64 f2(u64 a) { return f3(a + 1, a); }
+ u64 f3(u64 a, u64 b) { return a - b; }
+
+ GCC can compile f1, f3 into x86_64:
+
+ f1:
+ movl $1, %edi
+ movq _f2(%rip), %rax
+ jmp *%rax
+ f3:
+ movq %rdi, %rax
+ subq %rsi, %rax
+ ret
+
+ Function f2 in eBPF may look like:
+
+ f2:
+ bpf_mov R2, R1
+ bpf_add R1, 1
+ bpf_call f3
+ bpf_exit
+
+ If f2 is JITed and the pointer stored to '_f2'. The calls f1 -> f2 -> f3 and
+ returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to
+ be used to call into f2.
+
+ For practical reasons all eBPF programs have only one argument 'ctx' which is
+ already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs
+ can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
+ are currently not supported, but these restrictions can be lifted if necessary
+ in the future.
+
+ On 64-bit architectures all register map to HW registers one to one. For
+ example, x86_64 JIT compiler can map them as ...
+
+ R0 - rax
+ R1 - rdi
+ R2 - rsi
+ R3 - rdx
+ R4 - rcx
+ R5 - r8
+ R6 - rbx
+ R7 - r13
+ R8 - r14
+ R9 - r15
+ R10 - rbp
+
+ ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
+ and rbx, r12 - r15 are callee saved.
+
+ Then the following internal BPF pseudo-program:
+
+ bpf_mov R6, R1 /* save ctx */
+ bpf_mov R2, 2
+ bpf_mov R3, 3
+ bpf_mov R4, 4
+ bpf_mov R5, 5
+ bpf_call foo
+ bpf_mov R7, R0 /* save foo() return value */
+ bpf_mov R1, R6 /* restore ctx for next call */
+ bpf_mov R2, 6
+ bpf_mov R3, 7
+ bpf_mov R4, 8
+ bpf_mov R5, 9
+ bpf_call bar
+ bpf_add R0, R7
+ bpf_exit
+
+ After JIT to x86_64 may look like:
+
+ push %rbp
+ mov %rsp,%rbp
+ sub $0x228,%rsp
+ mov %rbx,-0x228(%rbp)
+ mov %r13,-0x220(%rbp)
+ mov %rdi,%rbx
+ mov $0x2,%esi
+ mov $0x3,%edx
+ mov $0x4,%ecx
+ mov $0x5,%r8d
+ callq foo
+ mov %rax,%r13
+ mov %rbx,%rdi
+ mov $0x2,%esi
+ mov $0x3,%edx
+ mov $0x4,%ecx
+ mov $0x5,%r8d
+ callq bar
+ add %r13,%rax
+ mov -0x228(%rbp),%rbx
+ mov -0x220(%rbp),%r13
+ leaveq
+ retq
+
+ Which is in this example equivalent in C to:
+
+ u64 bpf_filter(u64 ctx)
+ {
+ return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
+ }
+
+ In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
+ arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
+ registers and place their return value into '%rax' which is R0 in eBPF.
+ Prologue and epilogue are emitted by JIT and are implicit in the
+ interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
+ them across the calls as defined by calling convention.
+
+ For example the following program is invalid:
+
+ bpf_mov R1, 1
+ bpf_call foo
+ bpf_mov R0, R1
+ bpf_exit
+
+ After the call the registers R1-R5 contain junk values and cannot be read.
+ In the future an eBPF verifier can be used to validate internal BPF programs.
+
+Also in the new design, eBPF is limited to 4096 insns, which means that any
+program will terminate quickly and will only call a fixed number of kernel
+functions. Original BPF and the new format are two operand instructions,
+which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
+
+The input context pointer for invoking the interpreter function is generic,
+its content is defined by a specific use case. For seccomp register R1 points
+to seccomp_data, for converted BPF filters R1 points to a skb.
+
+A program, that is translated internally consists of the following elements:
+
+ op:16, jt:8, jf:8, k:32 ==> op:8, dst_reg:4, src_reg:4, off:16, imm:32
+
+So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
+has room for new instructions. Some of them may use 16/24/32 byte encoding. New
+instructions must be multiple of 8 bytes to preserve backward compatibility.
+
+Internal BPF is a general purpose RISC instruction set. Not every register and
+every instruction are used during translation from original BPF to new format.
+For example, socket filters are not using 'exclusive add' instruction, but
+tracing filters may do to maintain counters of events, for example. Register R9
+is not used by socket filters either, but more complex filters may be running
+out of registers and would have to resort to spill/fill to stack.
+
+Internal BPF can used as generic assembler for last step performance
+optimizations, socket filters and seccomp are using it as assembler. Tracing
+filters may use it as assembler to generate code from kernel. In kernel usage
+may not be bounded by security considerations, since generated internal BPF code
+may be optimizing internal code path and not being exposed to the user space.
+Safety of internal BPF can come from a verifier (TBD). In such use cases as
+described, it may be used as safe instruction set.
+
+Just like the original BPF, the new format runs within a controlled environment,
+is deterministic and the kernel can easily prove that. The safety of the program
+can be determined in two steps: first step does depth-first-search to disallow
+loops and other CFG validation; second step starts from the first insn and
+descends all possible paths. It simulates execution of every insn and observes
+the state change of registers and stack.
+
+eBPF opcode encoding
+--------------------
+
+eBPF is reusing most of the opcode encoding from classic to simplify conversion
+of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
+field is divided into three parts:
+
+ +----------------+--------+--------------------+
+ | 4 bits | 1 bit | 3 bits |
+ | operation code | source | instruction class |
+ +----------------+--------+--------------------+
+ (MSB) (LSB)
+
+Three LSB bits store instruction class which is one of:
+
+ Classic BPF classes: eBPF classes:
+
+ BPF_LD 0x00 BPF_LD 0x00
+ BPF_LDX 0x01 BPF_LDX 0x01
+ BPF_ST 0x02 BPF_ST 0x02
+ BPF_STX 0x03 BPF_STX 0x03
+ BPF_ALU 0x04 BPF_ALU 0x04
+ BPF_JMP 0x05 BPF_JMP 0x05
+ BPF_RET 0x06 [ class 6 unused, for future if needed ]
+ BPF_MISC 0x07 BPF_ALU64 0x07
+
+When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
+
+ BPF_K 0x00
+ BPF_X 0x08
+
+ * in classic BPF, this means:
+
+ BPF_SRC(code) == BPF_X - use register X as source operand
+ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+ * in eBPF, this means:
+
+ BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
+ BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+... and four MSB bits store operation code.
+
+If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
+
+ BPF_ADD 0x00
+ BPF_SUB 0x10
+ BPF_MUL 0x20
+ BPF_DIV 0x30
+ BPF_OR 0x40
+ BPF_AND 0x50
+ BPF_LSH 0x60
+ BPF_RSH 0x70
+ BPF_NEG 0x80
+ BPF_MOD 0x90
+ BPF_XOR 0xa0
+ BPF_MOV 0xb0 /* eBPF only: mov reg to reg */
+ BPF_ARSH 0xc0 /* eBPF only: sign extending shift right */
+ BPF_END 0xd0 /* eBPF only: endianness conversion */
+
+If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of:
+
+ BPF_JA 0x00
+ BPF_JEQ 0x10
+ BPF_JGT 0x20
+ BPF_JGE 0x30
+ BPF_JSET 0x40
+ BPF_JNE 0x50 /* eBPF only: jump != */
+ BPF_JSGT 0x60 /* eBPF only: signed '>' */
+ BPF_JSGE 0x70 /* eBPF only: signed '>=' */
+ BPF_CALL 0x80 /* eBPF only: function call */
+ BPF_EXIT 0x90 /* eBPF only: function return */
+
+So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
+and eBPF. There are only two registers in classic BPF, so it means A += X.
+In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
+BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
+src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
+
+Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
+eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
+BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
+exactly the same operations as BPF_ALU, but with 64-bit wide operands
+instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
+dst_reg = dst_reg + src_reg
+
+Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
+operation. Classic BPF_RET | BPF_K means copy imm32 into return register
+and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
+in eBPF means function exit only. The eBPF program needs to store return
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is currently
+unused and reserved for future use.
+
+For load and store instructions the 8-bit 'code' field is divided as:
+
+ +--------+--------+-------------------+
+ | 3 bits | 2 bits | 3 bits |
+ | mode | size | instruction class |
+ +--------+--------+-------------------+
+ (MSB) (LSB)
+
+Size modifier is one of ...
+
+ BPF_W 0x00 /* word */
+ BPF_H 0x08 /* half word */
+ BPF_B 0x10 /* byte */
+ BPF_DW 0x18 /* eBPF only, double word */
+
+... which encodes size of load/store operation:
+
+ B - 1 byte
+ H - 2 byte
+ W - 4 byte
+ DW - 8 byte (eBPF only)
+
+Mode modifier is one of:
+
+ BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+ BPF_ABS 0x20
+ BPF_IND 0x40
+ BPF_MEM 0x60
+ BPF_LEN 0x80 /* classic BPF only, reserved in eBPF */
+ BPF_MSH 0xa0 /* classic BPF only, reserved in eBPF */
+ BPF_XADD 0xc0 /* eBPF only, exclusive add */
+
+eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
+(BPF_IND | <size> | BPF_LD) which are used to access packet data.
+
+They had to be carried over from classic to have strong performance of
+socket filters running in eBPF interpreter. These instructions can only
+be used when interpreter context is a pointer to 'struct sk_buff' and
+have seven implicit operands. Register R6 is an implicit input that must
+contain pointer to sk_buff. Register R0 is an implicit output which contains
+the data fetched from the packet. Registers R1-R5 are scratch registers
+and must not be used to store the data across BPF_ABS | BPF_LD or
+BPF_IND | BPF_LD instructions.
+
+These instructions have implicit program exit condition as well. When
+eBPF program is trying to access the data beyond the packet boundary,
+the interpreter will abort the execution of the program. JIT compilers
+therefore must preserve this property. src_reg and imm32 fields are
+explicit inputs to these instructions.
+
+For example:
+
+ BPF_IND | BPF_W | BPF_LD means:
+
+ R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
+ and R1 - R5 were scratched.
+
+Unlike classic BPF instruction set, eBPF has generic load/store operations:
+
+BPF_MEM | <size> | BPF_STX: *(size *) (dst_reg + off) = src_reg
+BPF_MEM | <size> | BPF_ST: *(size *) (dst_reg + off) = imm32
+BPF_MEM | <size> | BPF_LDX: dst_reg = *(size *) (src_reg + off)
+BPF_XADD | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
+BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
+
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
+2 byte atomic increments are not supported.
+
+eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single
+instruction that loads 64-bit immediate value into a dst_reg.
+Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
+32-bit immediate value into a register.
+
+eBPF verifier
+-------------
+The safety of the eBPF program is determined in two steps.
+
+First step does DAG check to disallow loops and other CFG validation.
+In particular it will detect programs that have unreachable instructions.
+(though classic BPF checker allows them)
+
+Second step starts from the first insn and descends all possible paths.
+It simulates execution of every insn and observes the state change of
+registers and stack.
+
+At the start of the program the register R1 contains a pointer to context
+and has type PTR_TO_CTX.
+If verifier sees an insn that does R2=R1, then R2 has now type
+PTR_TO_CTX as well and can be used on the right hand side of expression.
+If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=UNKNOWN_VALUE,
+since addition of two valid pointers makes invalid pointer.
+(In 'secure' mode verifier will reject any type of pointer arithmetic to make
+sure that kernel addresses don't leak to unprivileged users)
+
+If register was never written to, it's not readable:
+ bpf_mov R0 = R2
+ bpf_exit
+will be rejected, since R2 is unreadable at the start of the program.
+
+After kernel function call, R1-R5 are reset to unreadable and
+R0 has a return type of the function.
+
+Since R6-R9 are callee saved, their state is preserved across the call.
+ bpf_mov R6 = 1
+ bpf_call foo
+ bpf_mov R0 = R6
+ bpf_exit
+is a correct program. If there was R1 instead of R6, it would have
+been rejected.
+
+load/store instructions are allowed only with registers of valid types, which
+are PTR_TO_CTX, PTR_TO_MAP, FRAME_PTR. They are bounds and alignment checked.
+For example:
+ bpf_mov R1 = 1
+ bpf_mov R2 = 2
+ bpf_xadd *(u32 *)(R1 + 3) += R2
+ bpf_exit
+will be rejected, since R1 doesn't have a valid pointer type at the time of
+execution of instruction bpf_xadd.
+
+At the start R1 type is PTR_TO_CTX (a pointer to generic 'struct bpf_context')
+A callback is used to customize verifier to restrict eBPF program access to only
+certain fields within ctx structure with specified size and alignment.
+
+For example, the following insn:
+ bpf_ld R0 = *(u32 *)(R6 + 8)
+intends to load a word from address R6 + 8 and store it into R0
+If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
+that offset 8 of size 4 bytes can be accessed for reading, otherwise
+the verifier will reject the program.
+If R6=FRAME_PTR, then access should be aligned and be within
+stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
+so it will fail verification, since it's out of bounds.
+
+The verifier will allow eBPF program to read data from stack only after
+it wrote into it.
+Classic BPF verifier does similar check with M[0-15] memory slots.
+For example:
+ bpf_ld R0 = *(u32 *)(R10 - 4)
+ bpf_exit
+is invalid program.
+Though R10 is correct read-only register and has type FRAME_PTR
+and R10 - 4 is within stack bounds, there were no stores into that location.
+
+Pointer register spill/fill is tracked as well, since four (R6-R9)
+callee saved registers may not be enough for some programs.
+
+Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
+The eBPF verifier will check that registers match argument constraints.
+After the call register R0 will be set to return type of the function.
+
+Function calls is a main mechanism to extend functionality of eBPF programs.
+Socket filters may let programs to call one set of functions, whereas tracing
+filters may allow completely different set.
+
+If a function made accessible to eBPF program, it needs to be thought through
+from safety point of view. The verifier will guarantee that the function is
+called with valid arguments.
+
+seccomp vs socket filters have different security restrictions for classic BPF.
+Seccomp solves this by two stage verifier: classic BPF verifier is followed
+by seccomp verifier. In case of eBPF one configurable verifier is shared for
+all use cases.
+
+See details of eBPF verifier in kernel/bpf/verifier.c
+
+eBPF maps
+---------
+'maps' is a generic storage of different types for sharing data between kernel
+and userspace.
+
+The maps are accessed from user space via BPF syscall, which has commands:
+- create a map with given type and attributes
+ map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
+ using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
+ returns process-local file descriptor or negative error
+
+- lookup key in a given map
+ err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+ using attr->map_fd, attr->key, attr->value
+ returns zero and stores found elem into value or negative error
+
+- create or update key/value pair in a given map
+ err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+ using attr->map_fd, attr->key, attr->value
+ returns zero or negative error
+
+- find and delete element by key in a given map
+ err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+ using attr->map_fd, attr->key
+
+- to delete map: close(fd)
+ Exiting process will delete maps automatically
+
+userspace programs use this syscall to create/access maps that eBPF programs
+are concurrently updating.
+
+maps can have different types: hash, array, bloom filter, radix-tree, etc.
+
+The map is defined by:
+ . type
+ . max number of elements
+ . key size in bytes
+ . value size in bytes
+
+Understanding eBPF verifier messages
+------------------------------------
+
+The following are few examples of invalid eBPF programs and verifier error
+messages as seen in the log:
+
+Program with unreachable instructions:
+static struct bpf_insn prog[] = {
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+};
+Error:
+ unreachable insn 1
+
+Program that reads uninitialized register:
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (bf) r0 = r2
+ R2 !read_ok
+
+Program that doesn't initialize R0 before exiting:
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (bf) r2 = r1
+ 1: (95) exit
+ R0 !read_ok
+
+Program that accesses stack out of bounds:
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (7a) *(u64 *)(r10 +8) = 0
+ invalid stack off=8 size=8
+
+Program that doesn't initialize stack before passing its address into function:
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (bf) r2 = r10
+ 1: (07) r2 += -8
+ 2: (b7) r1 = 0x0
+ 3: (85) call 1
+ invalid indirect read from stack off -8+0 size 8
+
+Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 0x0
+ 4: (85) call 1
+ fd 0 is not pointing to valid bpf_map
+
+Program that doesn't check return value of map_lookup_elem() before accessing
+map element:
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 0x0
+ 4: (85) call 1
+ 5: (7a) *(u64 *)(r0 +0) = 0
+ R0 invalid mem access 'map_value_or_null'
+
+Program that correctly checks map_lookup_elem() returned value for NULL, but
+accesses the memory with incorrect alignment:
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 1
+ 4: (85) call 1
+ 5: (15) if r0 == 0x0 goto pc+1
+ R0=map_ptr R10=fp
+ 6: (7a) *(u64 *)(r0 +4) = 0
+ misaligned access off 4 size 8
+
+Program that correctly checks map_lookup_elem() returned value for NULL and
+accesses memory with correct alignment in one side of 'if' branch, but fails
+to do so in the other side of 'if' branch:
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+Error:
+ 0: (7a) *(u64 *)(r10 -8) = 0
+ 1: (bf) r2 = r10
+ 2: (07) r2 += -8
+ 3: (b7) r1 = 1
+ 4: (85) call 1
+ 5: (15) if r0 == 0x0 goto pc+2
+ R0=map_ptr R10=fp
+ 6: (7a) *(u64 *)(r0 +0) = 0
+ 7: (95) exit
+
+ from 5 to 8: R0=imm0 R10=fp
+ 8: (7a) *(u64 *)(r0 +0) = 1
+ R0 invalid mem access 'imm'
+
+Testing
+-------
+
+Next to the BPF toolchain, the kernel also ships a test module that contains
+various test cases for classic and internal BPF that can be executed against
+the BPF interpreter and JIT compiler. It can be found in lib/test_bpf.c and
+enabled via Kconfig:
+
+ CONFIG_TEST_BPF=m
+
+After the module has been built and installed, the test suite can be executed
+via insmod or modprobe against 'test_bpf' module. Results of the test cases
+including timings in nsec can be found in the kernel log (dmesg).
+
+Misc
+----
+
+Also trinity, the Linux syscall fuzzer, has built-in support for BPF and
+SECCOMP-BPF kernel fuzzing.
+
+Written by
+----------
+
+The document was written in the hope that it is found useful and in order
+to give potential BPF hackers or security auditors a better overview of
+the underlying architecture.
+
+Jay Schulist <jschlst@samba.org>
+Daniel Borkmann <dborkman@redhat.com>
+Alexei Starovoitov <ast@plumgrid.com>
diff --git a/Documentation/networking/fore200e.txt b/Documentation/networking/fore200e.txt
new file mode 100644
index 000000000..7afbdcc8c
--- /dev/null
+++ b/Documentation/networking/fore200e.txt
@@ -0,0 +1,39 @@
+
+FORE Systems PCA-200E/SBA-200E ATM NIC driver
+---------------------------------------------
+
+This driver adds support for the FORE Systems 200E-series ATM adapters
+to the Linux operating system. It is based on the earlier PCA-200E driver
+written by Uwe Dannowski.
+
+The driver simultaneously supports PCA-200E and SBA-200E adapters on
+i386, alpha (untested), powerpc, sparc and sparc64 archs.
+
+The intent is to enable the use of different models of FORE adapters at the
+same time, by hosts that have several bus interfaces (such as PCI+SBUS,
+or PCI+EISA).
+
+Only PCI and SBUS devices are currently supported by the driver, but support
+for other bus interfaces such as EISA should not be too hard to add.
+
+
+Firmware Copyright Notice
+-------------------------
+
+Please read the fore200e_firmware_copyright file present
+in the linux/drivers/atm directory for details and restrictions.
+
+
+Firmware Updates
+----------------
+
+The FORE Systems 200E-series driver is shipped with firmware data being
+uploaded to the ATM adapters at system boot time or at module loading time.
+/*(DEBLOBBED)*/
+
+
+Feedback
+--------
+
+Feedback is welcome. Please send success stories/bug reports/
+patches/improvement/comments/flames to <lizzi@cnam.fr>.
diff --git a/Documentation/networking/framerelay.txt b/Documentation/networking/framerelay.txt
new file mode 100644
index 000000000..1a0b72044
--- /dev/null
+++ b/Documentation/networking/framerelay.txt
@@ -0,0 +1,39 @@
+Frame Relay (FR) support for linux is built into a two tiered system of device
+drivers. The upper layer implements RFC1490 FR specification, and uses the
+Data Link Connection Identifier (DLCI) as its hardware address. Usually these
+are assigned by your network supplier, they give you the number/numbers of
+the Virtual Connections (VC) assigned to you.
+
+Each DLCI is a point-to-point link between your machine and a remote one.
+As such, a separate device is needed to accommodate the routing. Within the
+net-tools archives is 'dlcicfg'. This program will communicate with the
+base "DLCI" device, and create new net devices named 'dlci00', 'dlci01'...
+The configuration script will ask you how many DLCIs you need, as well as
+how many DLCIs you want to assign to each Frame Relay Access Device (FRAD).
+
+The DLCI uses a number of function calls to communicate with the FRAD, all
+of which are stored in the FRAD's private data area. assoc/deassoc,
+activate/deactivate and dlci_config. The DLCI supplies a receive function
+to the FRAD to accept incoming packets.
+
+With this initial offering, only 1 FRAD driver is available. With many thanks
+to Sangoma Technologies, David Mandelstam & Gene Kozin, the S502A, S502E &
+S508 are supported. This driver is currently set up for only FR, but as
+Sangoma makes more firmware modules available, it can be updated to provide
+them as well.
+
+Configuration of the FRAD makes use of another net-tools program, 'fradcfg'.
+This program makes use of a configuration file (which dlcicfg can also read)
+to specify the types of boards to be configured as FRADs, as well as perform
+any board specific configuration. The Sangoma module of fradcfg loads the
+FR firmware into the card, sets the irq/port/memory information, and provides
+an initial configuration.
+
+Additional FRAD device drivers can be added as hardware is available.
+
+At this time, the dlcicfg and fradcfg programs have not been incorporated into
+the net-tools distribution. They can be found at ftp.invlogic.com, in
+/pub/linux. Note that with OS/2 FTPD, you end up in /pub by default, so just
+use 'cd linux'. v0.10 is for use on pre-2.0.3 and earlier, v0.15 is for
+pre-2.0.4 and later.
+
diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt
new file mode 100644
index 000000000..70e6275b7
--- /dev/null
+++ b/Documentation/networking/gen_stats.txt
@@ -0,0 +1,117 @@
+Generic networking statistics for netlink users
+======================================================================
+
+Statistic counters are grouped into structs:
+
+Struct TLV type Description
+----------------------------------------------------------------------
+gnet_stats_basic TCA_STATS_BASIC Basic statistics
+gnet_stats_rate_est TCA_STATS_RATE_EST Rate estimator
+gnet_stats_queue TCA_STATS_QUEUE Queue statistics
+none TCA_STATS_APP Application specific
+
+
+Collecting:
+-----------
+
+Declare the statistic structs you need:
+struct mystruct {
+ struct gnet_stats_basic bstats;
+ struct gnet_stats_queue qstats;
+ ...
+};
+
+Update statistics:
+mystruct->tstats.packet++;
+mystruct->qstats.backlog += skb->pkt_len;
+
+
+Export to userspace (Dump):
+---------------------------
+
+my_dumping_routine(struct sk_buff *skb, ...)
+{
+ struct gnet_dump dump;
+
+ if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump) < 0)
+ goto rtattr_failure;
+
+ if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 ||
+ gnet_stats_copy_queue(&dump, &mystruct->qstats) < 0 ||
+ gnet_stats_copy_app(&dump, &xstats, sizeof(xstats)) < 0)
+ goto rtattr_failure;
+
+ if (gnet_stats_finish_copy(&dump) < 0)
+ goto rtattr_failure;
+ ...
+}
+
+TCA_STATS/TCA_XSTATS backward compatibility:
+--------------------------------------------
+
+Prior users of struct tc_stats and xstats can maintain backward
+compatibility by calling the compat wrappers to keep providing the
+existing TLV types.
+
+my_dumping_routine(struct sk_buff *skb, ...)
+{
+ if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+ TCA_XSTATS, &mystruct->lock, &dump) < 0)
+ goto rtattr_failure;
+ ...
+}
+
+A struct tc_stats will be filled out during gnet_stats_copy_* calls
+and appended to the skb. TCA_XSTATS is provided if gnet_stats_copy_app
+was called.
+
+
+Locking:
+--------
+
+Locks are taken before writing and released once all statistics have
+been written. Locks are always released in case of an error. You
+are responsible for making sure that the lock is initialized.
+
+
+Rate Estimator:
+--------------
+
+0) Prepare an estimator attribute. Most likely this would be in user
+ space. The value of this TLV should contain a tc_estimator structure.
+ As usual, such a TLV needs to be 32 bit aligned and therefore the
+ length needs to be appropriately set, etc. The estimator interval
+ and ewma log need to be converted to the appropriate values.
+ tc_estimator.c::tc_setup_estimator() is advisable to be used as the
+ conversion routine. It does a few clever things. It takes a time
+ interval in microsecs, a time constant also in microsecs and a struct
+ tc_estimator to be populated. The returned tc_estimator can be
+ transported to the kernel. Transfer such a structure in a TLV of type
+ TCA_RATE to your code in the kernel.
+
+In the kernel when setting up:
+1) make sure you have basic stats and rate stats setup first.
+2) make sure you have initialized stats lock that is used to setup such
+ stats.
+3) Now initialize a new estimator:
+
+ int ret = gen_new_estimator(my_basicstats,my_rate_est_stats,
+ mystats_lock, attr_with_tcestimator_struct);
+
+ if ret == 0
+ success
+ else
+ failed
+
+From now on, every time you dump my_rate_est_stats it will contain
+up-to-date info.
+
+Once you are done, call gen_kill_estimator(my_basicstats,
+my_rate_est_stats) Make sure that my_basicstats and my_rate_est_stats
+are still valid (i.e still exist) at the time of making this call.
+
+
+Authors:
+--------
+Thomas Graf <tgraf@suug.ch>
+Jamal Hadi Salim <hadi@cyberus.ca>
diff --git a/Documentation/networking/generic-hdlc.txt b/Documentation/networking/generic-hdlc.txt
new file mode 100644
index 000000000..4eb3cc40b
--- /dev/null
+++ b/Documentation/networking/generic-hdlc.txt
@@ -0,0 +1,132 @@
+Generic HDLC layer
+Krzysztof Halasa <khc@pm.waw.pl>
+
+
+Generic HDLC layer currently supports:
+1. Frame Relay (ANSI, CCITT, Cisco and no LMI)
+ - Normal (routed) and Ethernet-bridged (Ethernet device emulation)
+ interfaces can share a single PVC.
+ - ARP support (no InARP support in the kernel - there is an
+ experimental InARP user-space daemon available on:
+ http://www.kernel.org/pub/linux/utils/net/hdlc/).
+2. raw HDLC - either IP (IPv4) interface or Ethernet device emulation
+3. Cisco HDLC
+4. PPP
+5. X.25 (uses X.25 routines).
+
+Generic HDLC is a protocol driver only - it needs a low-level driver
+for your particular hardware.
+
+Ethernet device emulation (using HDLC or Frame-Relay PVC) is compatible
+with IEEE 802.1Q (VLANs) and 802.1D (Ethernet bridging).
+
+
+Make sure the hdlc.o and the hardware driver are loaded. It should
+create a number of "hdlc" (hdlc0 etc) network devices, one for each
+WAN port. You'll need the "sethdlc" utility, get it from:
+ http://www.kernel.org/pub/linux/utils/net/hdlc/
+
+Compile sethdlc.c utility:
+ gcc -O2 -Wall -o sethdlc sethdlc.c
+Make sure you're using a correct version of sethdlc for your kernel.
+
+Use sethdlc to set physical interface, clock rate, HDLC mode used,
+and add any required PVCs if using Frame Relay.
+Usually you want something like:
+
+ sethdlc hdlc0 clock int rate 128000
+ sethdlc hdlc0 cisco interval 10 timeout 25
+or
+ sethdlc hdlc0 rs232 clock ext
+ sethdlc hdlc0 fr lmi ansi
+ sethdlc hdlc0 create 99
+ ifconfig hdlc0 up
+ ifconfig pvc0 localIP pointopoint remoteIP
+
+In Frame Relay mode, ifconfig master hdlc device up (without assigning
+any IP address to it) before using pvc devices.
+
+
+Setting interface:
+
+* v35 | rs232 | x21 | t1 | e1 - sets physical interface for a given port
+ if the card has software-selectable interfaces
+ loopback - activate hardware loopback (for testing only)
+* clock ext - both RX clock and TX clock external
+* clock int - both RX clock and TX clock internal
+* clock txint - RX clock external, TX clock internal
+* clock txfromrx - RX clock external, TX clock derived from RX clock
+* rate - sets clock rate in bps (for "int" or "txint" clock only)
+
+
+Setting protocol:
+
+* hdlc - sets raw HDLC (IP-only) mode
+ nrz / nrzi / fm-mark / fm-space / manchester - sets transmission code
+ no-parity / crc16 / crc16-pr0 (CRC16 with preset zeros) / crc32-itu
+ crc16-itu (CRC16 with ITU-T polynomial) / crc16-itu-pr0 - sets parity
+
+* hdlc-eth - Ethernet device emulation using HDLC. Parity and encoding
+ as above.
+
+* cisco - sets Cisco HDLC mode (IP, IPv6 and IPX supported)
+ interval - time in seconds between keepalive packets
+ timeout - time in seconds after last received keepalive packet before
+ we assume the link is down
+
+* ppp - sets synchronous PPP mode
+
+* x25 - sets X.25 mode
+
+* fr - Frame Relay mode
+ lmi ansi / ccitt / cisco / none - LMI (link management) type
+ dce - Frame Relay DCE (network) side LMI instead of default DTE (user).
+ It has nothing to do with clocks!
+ t391 - link integrity verification polling timer (in seconds) - user
+ t392 - polling verification timer (in seconds) - network
+ n391 - full status polling counter - user
+ n392 - error threshold - both user and network
+ n393 - monitored events count - both user and network
+
+Frame-Relay only:
+* create n | delete n - adds / deletes PVC interface with DLCI #n.
+ Newly created interface will be named pvc0, pvc1 etc.
+
+* create ether n | delete ether n - adds a device for Ethernet-bridged
+ frames. The device will be named pvceth0, pvceth1 etc.
+
+
+
+
+Board-specific issues
+---------------------
+
+n2.o and c101.o need parameters to work:
+
+ insmod n2 hw=io,irq,ram,ports[:io,irq,...]
+example:
+ insmod n2 hw=0x300,10,0xD0000,01
+
+or
+ insmod c101 hw=irq,ram[:irq,...]
+example:
+ insmod c101 hw=9,0xdc000
+
+If built into the kernel, these drivers need kernel (command line) parameters:
+ n2.hw=io,irq,ram,ports:...
+or
+ c101.hw=irq,ram:...
+
+
+
+If you have a problem with N2, C101 or PLX200SYN card, you can issue the
+"private" command to see port's packet descriptor rings (in kernel logs):
+
+ sethdlc hdlc0 private
+
+The hardware driver has to be build with #define DEBUG_RINGS.
+Attaching this info to bug reports would be helpful. Anyway, let me know
+if you have problems using this.
+
+For patches and other info look at:
+<http://www.kernel.org/pub/linux/utils/net/hdlc/>.
diff --git a/Documentation/networking/generic_netlink.txt b/Documentation/networking/generic_netlink.txt
new file mode 100644
index 000000000..3e071115c
--- /dev/null
+++ b/Documentation/networking/generic_netlink.txt
@@ -0,0 +1,3 @@
+A wiki document on how to use Generic Netlink can be found here:
+
+ * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto
diff --git a/Documentation/networking/gianfar.txt b/Documentation/networking/gianfar.txt
new file mode 100644
index 000000000..ba1daea7f
--- /dev/null
+++ b/Documentation/networking/gianfar.txt
@@ -0,0 +1,42 @@
+The Gianfar Ethernet Driver
+
+Author: Andy Fleming <afleming@freescale.com>
+Updated: 2005-07-28
+
+
+CHECKSUM OFFLOADING
+
+The eTSEC controller (first included in parts from late 2005 like
+the 8548) has the ability to perform TCP, UDP, and IP checksums
+in hardware. The Linux kernel only offloads the TCP and UDP
+checksums (and always performs the pseudo header checksums), so
+the driver only supports checksumming for TCP/IP and UDP/IP
+packets. Use ethtool to enable or disable this feature for RX
+and TX.
+
+VLAN
+
+In order to use VLAN, please consult Linux documentation on
+configuring VLANs. The gianfar driver supports hardware insertion and
+extraction of VLAN headers, but not filtering. Filtering will be
+done by the kernel.
+
+MULTICASTING
+
+The gianfar driver supports using the group hash table on the
+TSEC (and the extended hash table on the eTSEC) for multicast
+filtering. On the eTSEC, the exact-match MAC registers are used
+before the hash tables. See Linux documentation on how to join
+multicast groups.
+
+PADDING
+
+The gianfar driver supports padding received frames with 2 bytes
+to align the IP header to a 16-byte boundary, when supported by
+hardware.
+
+ETHTOOL
+
+The gianfar driver supports the use of ethtool for many
+configuration options. You must run ethtool only on currently
+open interfaces. See ethtool documentation for details.
diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt
new file mode 100644
index 000000000..a251bf4fe
--- /dev/null
+++ b/Documentation/networking/i40e.txt
@@ -0,0 +1,118 @@
+Linux Base Driver for the Intel(R) Ethernet Controller XL710 Family
+===================================================================
+
+Intel i40e Linux driver.
+Copyright(c) 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Performance Tuning
+- Known Issues
+- Support
+
+
+Identifying Your Adapter
+========================
+
+The driver in this release is compatible with the Intel Ethernet
+Controller XL710 Family.
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/network/sb/CS-012904.htm
+
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command:
+
+ Make oldconfig/silentoldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+ -> Device Drivers
+ -> Network device support (NETDEVICES [=y])
+ -> Ethernet driver support
+ -> Intel devices
+ -> Intel(R) Ethernet Controller XL710 Family
+
+Additional Configurations
+=========================
+
+ Generic Receive Offload (GRO)
+ -----------------------------
+ The driver supports the in-kernel software implementation of GRO. GRO has
+ shown that by coalescing Rx traffic into larger chunks of data, CPU
+ utilization can be significantly reduced when under large Rx load. GRO is
+ an evolution of the previously-used LRO interface. GRO is able to coalesce
+ other protocols besides TCP. It's also safe to use with configurations that
+ are problematic for LRO, namely bridging and iSCSI.
+
+ Ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The latest
+ ethtool version is required for this functionality.
+
+ The latest release of ethtool can be found from
+ https://www.kernel.org/pub/software/network/ethtool
+
+ Data Center Bridging (DCB)
+ --------------------------
+ DCB configuration is not currently supported.
+
+ FCoE
+ ----
+ The driver supports Fiber Channel over Ethernet (FCoE) and Data Center
+ Bridging (DCB) functionality. Configuring DCB and FCoE is outside the scope
+ of this driver doc. Refer to http://www.open-fcoe.org/ for FCoE project
+ information and http://www.open-lldp.org/ or email list
+ e1000-eedc@lists.sourceforge.net for DCB information.
+
+ MAC and VLAN anti-spoofing feature
+ ----------------------------------
+ When a malicious driver attempts to send a spoofed packet, it is dropped by
+ the hardware and not transmitted. An interrupt is sent to the PF driver
+ notifying it of the spoof attempt.
+
+ When a spoofed packet is detected the PF driver will send the following
+ message to the system log (displayed by the "dmesg" command):
+
+ Spoof event(s) detected on VF (n)
+
+ Where n=the VF that attempted to do the spoofing.
+
+
+Performance Tuning
+==================
+
+An excellent article on performance tuning can be found at:
+
+http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
+
+
+Known Issues
+============
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://e1000.sourceforge.net
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sourceforge.net and copy
+netdev@vger.kernel.org.
diff --git a/Documentation/networking/i40evf.txt b/Documentation/networking/i40evf.txt
new file mode 100644
index 000000000..21e41271a
--- /dev/null
+++ b/Documentation/networking/i40evf.txt
@@ -0,0 +1,47 @@
+Linux* Base Driver for Intel(R) Network Connection
+==================================================
+
+Intel XL710 X710 Virtual Function Linux driver.
+Copyright(c) 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Known Issues/Troubleshooting
+- Support
+
+This file describes the i40evf Linux* Base Driver for the Intel(R) XL710
+X710 Virtual Function.
+
+The i40evf driver supports XL710 and X710 virtual function devices that
+can only be activated on kernels with CONFIG_PCI_IOV enabled.
+
+The guest OS loading the i40evf driver must support MSI-X interrupts.
+
+Identifying Your Adapter
+========================
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/go/network/adapter/idguide.htm
+
+Known Issues/Troubleshooting
+============================
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ieee802154.txt b/Documentation/networking/ieee802154.txt
new file mode 100644
index 000000000..22bbc7225
--- /dev/null
+++ b/Documentation/networking/ieee802154.txt
@@ -0,0 +1,151 @@
+
+ Linux IEEE 802.15.4 implementation
+
+
+Introduction
+============
+The IEEE 802.15.4 working group focuses on standardization of bottom
+two layers: Medium Access Control (MAC) and Physical (PHY). And there
+are mainly two options available for upper layers:
+ - ZigBee - proprietary protocol from ZigBee Alliance
+ - 6LowPAN - IPv6 networking over low rate personal area networks
+
+The Linux-ZigBee project goal is to provide complete implementation
+of IEEE 802.15.4 and 6LoWPAN protocols. IEEE 802.15.4 is a stack
+of protocols for organizing Low-Rate Wireless Personal Area Networks.
+
+The stack is composed of three main parts:
+ - IEEE 802.15.4 layer; We have chosen to use plain Berkeley socket API,
+ the generic Linux networking stack to transfer IEEE 802.15.4 messages
+ and a special protocol over genetlink for configuration/management
+ - MAC - provides access to shared channel and reliable data delivery
+ - PHY - represents device drivers
+
+
+Socket API
+==========
+
+int sd = socket(PF_IEEE802154, SOCK_DGRAM, 0);
+.....
+
+The address family, socket addresses etc. are defined in the
+include/net/af_ieee802154.h header or in the special header
+in our userspace package (see either linux-zigbee sourceforge download page
+or git tree at git://linux-zigbee.git.sourceforge.net/gitroot/linux-zigbee).
+
+One can use SOCK_RAW for passing raw data towards device xmit function. YMMV.
+
+
+Kernel side
+=============
+
+Like with WiFi, there are several types of devices implementing IEEE 802.15.4.
+1) 'HardMAC'. The MAC layer is implemented in the device itself, the device
+ exports MLME and data API.
+2) 'SoftMAC' or just radio. These types of devices are just radio transceivers
+ possibly with some kinds of acceleration like automatic CRC computation and
+ comparation, automagic ACK handling, address matching, etc.
+
+Those types of devices require different approach to be hooked into Linux kernel.
+
+
+MLME - MAC Level Management
+============================
+
+Most of IEEE 802.15.4 MLME interfaces are directly mapped on netlink commands.
+See the include/net/nl802154.h header. Our userspace tools package
+(see above) provides CLI configuration utility for radio interfaces and simple
+coordinator for IEEE 802.15.4 networks as an example users of MLME protocol.
+
+
+HardMAC
+=======
+
+See the header include/net/ieee802154_netdev.h. You have to implement Linux
+net_device, with .type = ARPHRD_IEEE802154. Data is exchanged with socket family
+code via plain sk_buffs. On skb reception skb->cb must contain additional
+info as described in the struct ieee802154_mac_cb. During packet transmission
+the skb->cb is used to provide additional data to device's header_ops->create
+function. Be aware that this data can be overridden later (when socket code
+submits skb to qdisc), so if you need something from that cb later, you should
+store info in the skb->data on your own.
+
+To hook the MLME interface you have to populate the ml_priv field of your
+net_device with a pointer to struct ieee802154_mlme_ops instance. The fields
+assoc_req, assoc_resp, disassoc_req, start_req, and scan_req are optional.
+All other fields are required.
+
+We provide an example of simple HardMAC driver at drivers/ieee802154/fakehard.c
+
+
+SoftMAC
+=======
+
+The MAC is the middle layer in the IEEE 802.15.4 Linux stack. This moment it
+provides interface for drivers registration and management of slave interfaces.
+
+NOTE: Currently the only monitor device type is supported - it's IEEE 802.15.4
+stack interface for network sniffers (e.g. WireShark).
+
+This layer is going to be extended soon.
+
+See header include/net/mac802154.h and several drivers in drivers/ieee802154/.
+
+
+Device drivers API
+==================
+
+The include/net/mac802154.h defines following functions:
+ - struct ieee802154_dev *ieee802154_alloc_device
+ (size_t priv_size, struct ieee802154_ops *ops):
+ allocation of IEEE 802.15.4 compatible device
+
+ - void ieee802154_free_device(struct ieee802154_dev *dev):
+ freeing allocated device
+
+ - int ieee802154_register_device(struct ieee802154_dev *dev):
+ register PHY in the system
+
+ - void ieee802154_unregister_device(struct ieee802154_dev *dev):
+ freeing registered PHY
+
+Moreover IEEE 802.15.4 device operations structure should be filled.
+
+Fake drivers
+============
+
+In addition there are two drivers available which simulate real devices with
+HardMAC (fakehard) and SoftMAC (fakelb - IEEE 802.15.4 loopback driver)
+interfaces. This option provides possibility to test and debug stack without
+usage of real hardware.
+
+See sources in drivers/ieee802154 folder for more details.
+
+
+6LoWPAN Linux implementation
+============================
+
+The IEEE 802.15.4 standard specifies an MTU of 128 bytes, yielding about 80
+octets of actual MAC payload once security is turned on, on a wireless link
+with a link throughput of 250 kbps or less. The 6LoWPAN adaptation format
+[RFC4944] was specified to carry IPv6 datagrams over such constrained links,
+taking into account limited bandwidth, memory, or energy resources that are
+expected in applications such as wireless Sensor Networks. [RFC4944] defines
+a Mesh Addressing header to support sub-IP forwarding, a Fragmentation header
+to support the IPv6 minimum MTU requirement [RFC2460], and stateless header
+compression for IPv6 datagrams (LOWPAN_HC1 and LOWPAN_HC2) to reduce the
+relatively large IPv6 and UDP headers down to (in the best case) several bytes.
+
+In Semptember 2011 the standard update was published - [RFC6282].
+It deprecates HC1 and HC2 compression and defines IPHC encoding format which is
+used in this Linux implementation.
+
+All the code related to 6lowpan you may find in files: net/ieee802154/6lowpan.*
+
+To setup 6lowpan interface you need (busybox release > 1.17.0):
+1. Add IEEE802.15.4 interface and initialize PANid;
+2. Add 6lowpan interface by command like:
+ # ip link add link wpan0 name lowpan0 type lowpan
+3. Set MAC (if needs):
+ # ip link set lowpan0 address de:ad:be:ef:ca:fe:ba:be
+4. Bring up 'lowpan0' interface
diff --git a/Documentation/networking/igb.txt b/Documentation/networking/igb.txt
new file mode 100644
index 000000000..15534fdd0
--- /dev/null
+++ b/Documentation/networking/igb.txt
@@ -0,0 +1,129 @@
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Support
+
+Identifying Your Adapter
+========================
+
+This driver supports all 82575, 82576 and 82580-based Intel (R) gigabit network
+connections.
+
+For specific information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/go/network/adapter/idguide.htm
+
+Command Line Parameters
+=======================
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+max_vfs
+-------
+Valid Range: 0-7
+Default Value: 0
+
+This parameter adds support for SR-IOV. It causes the driver to spawn up to
+max_vfs worth of virtual function.
+
+Additional Configurations
+=========================
+
+ Jumbo Frames
+ ------------
+ Jumbo Frames support is enabled by changing the MTU to a value larger than
+ the default of 1500. Use the ip command to increase the MTU size.
+ For example:
+
+ ip link set dev eth<x> mtu 9000
+
+ This setting is not saved across reboots.
+
+ Notes:
+
+ - The maximum MTU setting for Jumbo Frames is 9216. This value coincides
+ with the maximum Jumbo Frames size of 9234 bytes.
+
+ - Using Jumbo frames at 10 or 100 Mbps is not supported and may result in
+ poor performance or loss of link.
+
+ ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The latest
+ version of ethtool can be found at:
+
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+ Enabling Wake on LAN* (WoL)
+ ---------------------------
+ WoL is configured through the ethtool* utility.
+
+ For instructions on enabling WoL with ethtool, refer to the ethtool man page.
+
+ WoL will be enabled on the system during the next shut down or reboot.
+ For this driver version, in order to enable WoL, the igb driver must be
+ loaded when shutting down or rebooting the system.
+
+ Wake On LAN is only supported on port A of multi-port adapters.
+
+ Wake On LAN is not supported for the Intel(R) Gigabit VT Quad Port Server
+ Adapter.
+
+ Multiqueue
+ ----------
+ In this mode, a separate MSI-X vector is allocated for each queue and one
+ for "other" interrupts such as link status change and errors. All
+ interrupts are throttled via interrupt moderation. Interrupt moderation
+ must be used to avoid interrupt storms while the driver is processing one
+ interrupt. The moderation value should be at least as large as the expected
+ time for the driver to process an interrupt. Multiqueue is off by default.
+
+ REQUIREMENTS: MSI-X support is required for Multiqueue. If MSI-X is not
+ found, the system will fallback to MSI or to Legacy interrupts.
+
+ MAC and VLAN anti-spoofing feature
+ ----------------------------------
+ When a malicious driver attempts to send a spoofed packet, it is dropped by
+ the hardware and not transmitted. An interrupt is sent to the PF driver
+ notifying it of the spoof attempt.
+
+ When a spoofed packet is detected the PF driver will send the following
+ message to the system log (displayed by the "dmesg" command):
+
+ Spoof event(s) detected on VF(n)
+
+ Where n=the VF that attempted to do the spoofing.
+
+ Setting MAC Address, VLAN and Rate Limit Using IProute2 Tool
+ ------------------------------------------------------------
+ You can set a MAC address of a Virtual Function (VF), a default VLAN and the
+ rate limit using the IProute2 tool. Download the latest version of the
+ iproute2 tool from Sourceforge if your version does not have all the
+ features you require.
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ www.intel.com/support/
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/igbvf.txt b/Documentation/networking/igbvf.txt
new file mode 100644
index 000000000..40db17a66
--- /dev/null
+++ b/Documentation/networking/igbvf.txt
@@ -0,0 +1,80 @@
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Support
+
+This file describes the igbvf Linux* Base Driver for Intel Network Connection.
+
+The igbvf driver supports 82576-based virtual function devices that can only
+be activated on kernels that support SR-IOV. SR-IOV requires the correct
+platform and OS support.
+
+The igbvf driver requires the igb driver, version 2.0 or later. The igbvf
+driver supports virtual functions generated by the igb driver with a max_vfs
+value of 1 or greater. For more information on the max_vfs parameter refer
+to the README included with the igb driver.
+
+The guest OS loading the igbvf driver must support MSI-X interrupts.
+
+This driver is only supported as a loadable module at this time. Intel is
+not supplying patches against the kernel source to allow for static linking
+of the driver. For questions related to hardware requirements, refer to the
+documentation supplied with your Intel Gigabit adapter. All hardware
+requirements listed apply to use with Linux.
+
+Instructions on updating ethtool can be found in the section "Additional
+Configurations" later in this document.
+
+VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
+
+Identifying Your Adapter
+========================
+
+The igbvf driver supports 82576-based virtual function devices that can only
+be activated on kernels that support SR-IOV.
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/go/network/adapter/idguide.htm
+
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
+networking link on the left to search for your adapter:
+
+ http://downloadcenter.intel.com/scripts-df-external/Support_Intel.aspx
+
+Additional Configurations
+=========================
+
+ ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The ethtool
+ version 3.0 or later is required for this functionality, although we
+ strongly recommend downloading the latest version at:
+
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
new file mode 100644
index 000000000..071fb18dc
--- /dev/null
+++ b/Documentation/networking/ip-sysctl.txt
@@ -0,0 +1,1858 @@
+/proc/sys/net/ipv4/* Variables:
+
+ip_forward - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ Forward Packets between interfaces.
+
+ This variable is special, its change resets all configuration
+ parameters to their default state (RFC1122 for hosts, RFC1812
+ for routers)
+
+ip_default_ttl - INTEGER
+ Default value of TTL field (Time To Live) for outgoing (but not
+ forwarded) IP packets. Should be between 1 and 255 inclusive.
+ Default: 64 (as recommended by RFC1700)
+
+ip_no_pmtu_disc - INTEGER
+ Disable Path MTU Discovery. If enabled in mode 1 and a
+ fragmentation-required ICMP is received, the PMTU to this
+ destination will be set to min_pmtu (see below). You will need
+ to raise min_pmtu to the smallest interface MTU on your system
+ manually if you want to avoid locally generated fragments.
+
+ In mode 2 incoming Path MTU Discovery messages will be
+ discarded. Outgoing frames are handled the same as in mode 1,
+ implicitly setting IP_PMTUDISC_DONT on every created socket.
+
+ Mode 3 is a hardend pmtu discover mode. The kernel will only
+ accept fragmentation-needed errors if the underlying protocol
+ can verify them besides a plain socket lookup. Current
+ protocols for which pmtu events will be honored are TCP, SCTP
+ and DCCP as they verify e.g. the sequence number or the
+ association. This mode should not be enabled globally but is
+ only intended to secure e.g. name servers in namespaces where
+ TCP path mtu must still work but path MTU information of other
+ protocols should be discarded. If enabled globally this mode
+ could break other protocols.
+
+ Possible values: 0-3
+ Default: FALSE
+
+min_pmtu - INTEGER
+ default 552 - minimum discovered Path MTU
+
+ip_forward_use_pmtu - BOOLEAN
+ By default we don't trust protocol path MTUs while forwarding
+ because they could be easily forged and can lead to unwanted
+ fragmentation by the router.
+ You only need to enable this if you have user-space software
+ which tries to discover path mtus by itself and depends on the
+ kernel honoring this information. This is normally not the
+ case.
+ Default: 0 (disabled)
+ Possible values:
+ 0 - disabled
+ 1 - enabled
+
+fwmark_reflect - BOOLEAN
+ Controls the fwmark of kernel-generated IPv4 reply packets that are not
+ associated with a socket for example, TCP RSTs or ICMP echo replies).
+ If unset, these packets have a fwmark of zero. If set, they have the
+ fwmark of the packet they are replying to.
+ Default: 0
+
+route/max_size - INTEGER
+ Maximum number of routes allowed in the kernel. Increase
+ this when using large numbers of interfaces and/or routes.
+ From linux kernel 3.6 onwards, this is deprecated for ipv4
+ as route cache is no longer used.
+
+neigh/default/gc_thresh1 - INTEGER
+ Minimum number of entries to keep. Garbage collector will not
+ purge entries if there are fewer than this number.
+ Default: 128
+
+neigh/default/gc_thresh2 - INTEGER
+ Threshold when garbage collector becomes more aggressive about
+ purging entries. Entries older than 5 seconds will be cleared
+ when over this number.
+ Default: 512
+
+neigh/default/gc_thresh3 - INTEGER
+ Maximum number of neighbor entries allowed. Increase this
+ when using large numbers of interfaces and when communicating
+ with large numbers of directly-connected peers.
+ Default: 1024
+
+neigh/default/unres_qlen_bytes - INTEGER
+ The maximum number of bytes which may be used by packets
+ queued for each unresolved address by other network layers.
+ (added in linux 3.3)
+ Setting negative value is meaningless and will return error.
+ Default: 65536 Bytes(64KB)
+
+neigh/default/unres_qlen - INTEGER
+ The maximum number of packets which may be queued for each
+ unresolved address by other network layers.
+ (deprecated in linux 3.3) : use unres_qlen_bytes instead.
+ Prior to linux 3.3, the default value is 3 which may cause
+ unexpected packet loss. The current default value is calculated
+ according to default value of unres_qlen_bytes and true size of
+ packet.
+ Default: 31
+
+mtu_expires - INTEGER
+ Time, in seconds, that cached PMTU information is kept.
+
+min_adv_mss - INTEGER
+ The advertised MSS depends on the first hop route MTU, but will
+ never be lower than this setting.
+
+IP Fragmentation:
+
+ipfrag_high_thresh - INTEGER
+ Maximum memory used to reassemble IP fragments. When
+ ipfrag_high_thresh bytes of memory is allocated for this purpose,
+ the fragment handler will toss packets until ipfrag_low_thresh
+ is reached. This also serves as a maximum limit to namespaces
+ different from the initial one.
+
+ipfrag_low_thresh - INTEGER
+ Maximum memory used to reassemble IP fragments before the kernel
+ begins to remove incomplete fragment queues to free up resources.
+ The kernel still accepts new fragments for defragmentation.
+
+ipfrag_time - INTEGER
+ Time in seconds to keep an IP fragment in memory.
+
+ipfrag_max_dist - INTEGER
+ ipfrag_max_dist is a non-negative integer value which defines the
+ maximum "disorder" which is allowed among fragments which share a
+ common IP source address. Note that reordering of packets is
+ not unusual, but if a large number of fragments arrive from a source
+ IP address while a particular fragment queue remains incomplete, it
+ probably indicates that one or more fragments belonging to that queue
+ have been lost. When ipfrag_max_dist is positive, an additional check
+ is done on fragments before they are added to a reassembly queue - if
+ ipfrag_max_dist (or more) fragments have arrived from a particular IP
+ address between additions to any IP fragment queue using that source
+ address, it's presumed that one or more fragments in the queue are
+ lost. The existing fragment queue will be dropped, and a new one
+ started. An ipfrag_max_dist value of zero disables this check.
+
+ Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
+ result in unnecessarily dropping fragment queues when normal
+ reordering of packets occurs, which could lead to poor application
+ performance. Using a very large value, e.g. 50000, increases the
+ likelihood of incorrectly reassembling IP fragments that originate
+ from different IP datagrams, which could result in data corruption.
+ Default: 64
+
+INET peer storage:
+
+inet_peer_threshold - INTEGER
+ The approximate size of the storage. Starting from this threshold
+ entries will be thrown aggressively. This threshold also determines
+ entries' time-to-live and time intervals between garbage collection
+ passes. More entries, less time-to-live, less GC interval.
+
+inet_peer_minttl - INTEGER
+ Minimum time-to-live of entries. Should be enough to cover fragment
+ time-to-live on the reassembling side. This minimum time-to-live is
+ guaranteed if the pool size is less than inet_peer_threshold.
+ Measured in seconds.
+
+inet_peer_maxttl - INTEGER
+ Maximum time-to-live of entries. Unused entries will expire after
+ this period of time if there is no memory pressure on the pool (i.e.
+ when the number of entries in the pool is very small).
+ Measured in seconds.
+
+TCP variables:
+
+somaxconn - INTEGER
+ Limit of socket listen() backlog, known in userspace as SOMAXCONN.
+ Defaults to 128. See also tcp_max_syn_backlog for additional tuning
+ for TCP sockets.
+
+tcp_abort_on_overflow - BOOLEAN
+ If listening service is too slow to accept new connections,
+ reset them. Default state is FALSE. It means that if overflow
+ occurred due to a burst, connection will recover. Enable this
+ option _only_ if you are really sure that listening daemon
+ cannot be tuned to accept connections faster. Enabling this
+ option can harm clients of your server.
+
+tcp_adv_win_scale - INTEGER
+ Count buffering overhead as bytes/2^tcp_adv_win_scale
+ (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
+ if it is <= 0.
+ Possible values are [-31, 31], inclusive.
+ Default: 1
+
+tcp_allowed_congestion_control - STRING
+ Show/set the congestion control choices available to non-privileged
+ processes. The list is a subset of those listed in
+ tcp_available_congestion_control.
+ Default is "reno" and the default setting (tcp_congestion_control).
+
+tcp_app_win - INTEGER
+ Reserve max(window/2^tcp_app_win, mss) of window for application
+ buffer. Value 0 is special, it means that nothing is reserved.
+ Default: 31
+
+tcp_autocorking - BOOLEAN
+ Enable TCP auto corking :
+ When applications do consecutive small write()/sendmsg() system calls,
+ we try to coalesce these small writes as much as possible, to lower
+ total amount of sent packets. This is done if at least one prior
+ packet for the flow is waiting in Qdisc queues or device transmit
+ queue. Applications can still use TCP_CORK for optimal behavior
+ when they know how/when to uncork their sockets.
+ Default : 1
+
+tcp_available_congestion_control - STRING
+ Shows the available congestion control choices that are registered.
+ More congestion control algorithms may be available as modules,
+ but not loaded.
+
+tcp_base_mss - INTEGER
+ The initial value of search_low to be used by the packetization layer
+ Path MTU discovery (MTU probing). If MTU probing is enabled,
+ this is the initial MSS used by the connection.
+
+tcp_congestion_control - STRING
+ Set the congestion control algorithm to be used for new
+ connections. The algorithm "reno" is always available, but
+ additional choices may be available based on kernel configuration.
+ Default is set as part of kernel configuration.
+ For passive connections, the listener congestion control choice
+ is inherited.
+ [see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ]
+
+tcp_dsack - BOOLEAN
+ Allows TCP to send "duplicate" SACKs.
+
+tcp_early_retrans - INTEGER
+ Enable Early Retransmit (ER), per RFC 5827. ER lowers the threshold
+ for triggering fast retransmit when the amount of outstanding data is
+ small and when no previously unsent data can be transmitted (such
+ that limited transmit could be used). Also controls the use of
+ Tail loss probe (TLP) that converts RTOs occurring due to tail
+ losses into fast recovery (draft-dukkipati-tcpm-tcp-loss-probe-01).
+ Possible values:
+ 0 disables ER
+ 1 enables ER
+ 2 enables ER but delays fast recovery and fast retransmit
+ by a fourth of RTT. This mitigates connection falsely
+ recovers when network has a small degree of reordering
+ (less than 3 packets).
+ 3 enables delayed ER and TLP.
+ 4 enables TLP only.
+ Default: 3
+
+tcp_ecn - INTEGER
+ Control use of Explicit Congestion Notification (ECN) by TCP.
+ ECN is used only when both ends of the TCP connection indicate
+ support for it. This feature is useful in avoiding losses due
+ to congestion by allowing supporting routers to signal
+ congestion before having to drop packets.
+ Possible values are:
+ 0 Disable ECN. Neither initiate nor accept ECN.
+ 1 Enable ECN when requested by incoming connections and
+ also request ECN on outgoing connection attempts.
+ 2 Enable ECN when requested by incoming connections
+ but do not request ECN on outgoing connections.
+ Default: 2
+
+tcp_fack - BOOLEAN
+ Enable FACK congestion avoidance and fast retransmission.
+ The value is not used, if tcp_sack is not enabled.
+
+tcp_fin_timeout - INTEGER
+ The length of time an orphaned (no longer referenced by any
+ application) connection will remain in the FIN_WAIT_2 state
+ before it is aborted at the local end. While a perfectly
+ valid "receive only" state for an un-orphaned connection, an
+ orphaned connection in FIN_WAIT_2 state could otherwise wait
+ forever for the remote to close its end of the connection.
+ Cf. tcp_max_orphans
+ Default: 60 seconds
+
+tcp_frto - INTEGER
+ Enables Forward RTO-Recovery (F-RTO) defined in RFC5682.
+ F-RTO is an enhanced recovery algorithm for TCP retransmission
+ timeouts. It is particularly beneficial in networks where the
+ RTT fluctuates (e.g., wireless). F-RTO is sender-side only
+ modification. It does not require any support from the peer.
+
+ By default it's enabled with a non-zero value. 0 disables F-RTO.
+
+tcp_invalid_ratelimit - INTEGER
+ Limit the maximal rate for sending duplicate acknowledgments
+ in response to incoming TCP packets that are for an existing
+ connection but that are invalid due to any of these reasons:
+
+ (a) out-of-window sequence number,
+ (b) out-of-window acknowledgment number, or
+ (c) PAWS (Protection Against Wrapped Sequence numbers) check failure
+
+ This can help mitigate simple "ack loop" DoS attacks, wherein
+ a buggy or malicious middlebox or man-in-the-middle can
+ rewrite TCP header fields in manner that causes each endpoint
+ to think that the other is sending invalid TCP segments, thus
+ causing each side to send an unterminating stream of duplicate
+ acknowledgments for invalid segments.
+
+ Using 0 disables rate-limiting of dupacks in response to
+ invalid segments; otherwise this value specifies the minimal
+ space between sending such dupacks, in milliseconds.
+
+ Default: 500 (milliseconds).
+
+tcp_keepalive_time - INTEGER
+ How often TCP sends out keepalive messages when keepalive is enabled.
+ Default: 2hours.
+
+tcp_keepalive_probes - INTEGER
+ How many keepalive probes TCP sends out, until it decides that the
+ connection is broken. Default value: 9.
+
+tcp_keepalive_intvl - INTEGER
+ How frequently the probes are send out. Multiplied by
+ tcp_keepalive_probes it is time to kill not responding connection,
+ after probes started. Default value: 75sec i.e. connection
+ will be aborted after ~11 minutes of retries.
+
+tcp_low_latency - BOOLEAN
+ If set, the TCP stack makes decisions that prefer lower
+ latency as opposed to higher throughput. By default, this
+ option is not set meaning that higher throughput is preferred.
+ An example of an application where this default should be
+ changed would be a Beowulf compute cluster.
+ Default: 0
+
+tcp_max_orphans - INTEGER
+ Maximal number of TCP sockets not attached to any user file handle,
+ held by system. If this number is exceeded orphaned connections are
+ reset immediately and warning is printed. This limit exists
+ only to prevent simple DoS attacks, you _must_ not rely on this
+ or lower the limit artificially, but rather increase it
+ (probably, after increasing installed memory),
+ if network conditions require more than default value,
+ and tune network services to linger and kill such states
+ more aggressively. Let me to remind again: each orphan eats
+ up to ~64K of unswappable memory.
+
+tcp_max_syn_backlog - INTEGER
+ Maximal number of remembered connection requests, which have not
+ received an acknowledgment from connecting client.
+ The minimal value is 128 for low memory machines, and it will
+ increase in proportion to the memory of machine.
+ If server suffers from overload, try increasing this number.
+
+tcp_max_tw_buckets - INTEGER
+ Maximal number of timewait sockets held by system simultaneously.
+ If this number is exceeded time-wait socket is immediately destroyed
+ and warning is printed. This limit exists only to prevent
+ simple DoS attacks, you _must_ not lower the limit artificially,
+ but rather increase it (probably, after increasing installed memory),
+ if network conditions require more than default value.
+
+tcp_mem - vector of 3 INTEGERs: min, pressure, max
+ min: below this number of pages TCP is not bothered about its
+ memory appetite.
+
+ pressure: when amount of memory allocated by TCP exceeds this number
+ of pages, TCP moderates its memory consumption and enters memory
+ pressure mode, which is exited when memory consumption falls
+ under "min".
+
+ max: number of pages allowed for queueing by all TCP sockets.
+
+ Defaults are calculated at boot time from amount of available
+ memory.
+
+tcp_moderate_rcvbuf - BOOLEAN
+ If set, TCP performs receive buffer auto-tuning, attempting to
+ automatically size the buffer (no greater than tcp_rmem[2]) to
+ match the size required by the path for full throughput. Enabled by
+ default.
+
+tcp_mtu_probing - INTEGER
+ Controls TCP Packetization-Layer Path MTU Discovery. Takes three
+ values:
+ 0 - Disabled
+ 1 - Disabled by default, enabled when an ICMP black hole detected
+ 2 - Always enabled, use initial MSS of tcp_base_mss.
+
+tcp_probe_interval - INTEGER
+ Controls how often to start TCP Packetization-Layer Path MTU
+ Discovery reprobe. The default is reprobing every 10 minutes as
+ per RFC4821.
+
+tcp_probe_threshold - INTEGER
+ Controls when TCP Packetization-Layer Path MTU Discovery probing
+ will stop in respect to the width of search range in bytes. Default
+ is 8 bytes.
+
+tcp_no_metrics_save - BOOLEAN
+ By default, TCP saves various connection metrics in the route cache
+ when the connection closes, so that connections established in the
+ near future can use these to set initial conditions. Usually, this
+ increases overall performance, but may sometimes cause performance
+ degradation. If set, TCP will not cache metrics on closing
+ connections.
+
+tcp_orphan_retries - INTEGER
+ This value influences the timeout of a locally closed TCP connection,
+ when RTO retransmissions remain unacknowledged.
+ See tcp_retries2 for more details.
+
+ The default value is 8.
+ If your machine is a loaded WEB server,
+ you should think about lowering this value, such sockets
+ may consume significant resources. Cf. tcp_max_orphans.
+
+tcp_reordering - INTEGER
+ Initial reordering level of packets in a TCP stream.
+ TCP stack can then dynamically adjust flow reordering level
+ between this initial value and tcp_max_reordering
+ Default: 3
+
+tcp_max_reordering - INTEGER
+ Maximal reordering level of packets in a TCP stream.
+ 300 is a fairly conservative value, but you might increase it
+ if paths are using per packet load balancing (like bonding rr mode)
+ Default: 300
+
+tcp_retrans_collapse - BOOLEAN
+ Bug-to-bug compatibility with some broken printers.
+ On retransmit try to send bigger packets to work around bugs in
+ certain TCP stacks.
+
+tcp_retries1 - INTEGER
+ This value influences the time, after which TCP decides, that
+ something is wrong due to unacknowledged RTO retransmissions,
+ and reports this suspicion to the network layer.
+ See tcp_retries2 for more details.
+
+ RFC 1122 recommends at least 3 retransmissions, which is the
+ default.
+
+tcp_retries2 - INTEGER
+ This value influences the timeout of an alive TCP connection,
+ when RTO retransmissions remain unacknowledged.
+ Given a value of N, a hypothetical TCP connection following
+ exponential backoff with an initial RTO of TCP_RTO_MIN would
+ retransmit N times before killing the connection at the (N+1)th RTO.
+
+ The default value of 15 yields a hypothetical timeout of 924.6
+ seconds and is a lower bound for the effective timeout.
+ TCP will effectively time out at the first RTO which exceeds the
+ hypothetical timeout.
+
+ RFC 1122 recommends at least 100 seconds for the timeout,
+ which corresponds to a value of at least 8.
+
+tcp_rfc1337 - BOOLEAN
+ If set, the TCP stack behaves conforming to RFC1337. If unset,
+ we are not conforming to RFC, but prevent TCP TIME_WAIT
+ assassination.
+ Default: 0
+
+tcp_rmem - vector of 3 INTEGERs: min, default, max
+ min: Minimal size of receive buffer used by TCP sockets.
+ It is guaranteed to each TCP socket, even under moderate memory
+ pressure.
+ Default: 1 page
+
+ default: initial size of receive buffer used by TCP sockets.
+ This value overrides net.core.rmem_default used by other protocols.
+ Default: 87380 bytes. This value results in window of 65535 with
+ default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
+ less for default tcp_app_win. See below about these variables.
+
+ max: maximal size of receive buffer allowed for automatically
+ selected receiver buffers for TCP socket. This value does not override
+ net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables
+ automatic tuning of that socket's receive buffer size, in which
+ case this value is ignored.
+ Default: between 87380B and 6MB, depending on RAM size.
+
+tcp_sack - BOOLEAN
+ Enable select acknowledgments (SACKS).
+
+tcp_slow_start_after_idle - BOOLEAN
+ If set, provide RFC2861 behavior and time out the congestion
+ window after an idle period. An idle period is defined at
+ the current RTO. If unset, the congestion window will not
+ be timed out after an idle period.
+ Default: 1
+
+tcp_stdurg - BOOLEAN
+ Use the Host requirements interpretation of the TCP urgent pointer field.
+ Most hosts use the older BSD interpretation, so if you turn this on
+ Linux might not communicate correctly with them.
+ Default: FALSE
+
+tcp_synack_retries - INTEGER
+ Number of times SYNACKs for a passive TCP connection attempt will
+ be retransmitted. Should not be higher than 255. Default value
+ is 5, which corresponds to 31seconds till the last retransmission
+ with the current initial RTO of 1second. With this the final timeout
+ for a passive TCP connection will happen after 63seconds.
+
+tcp_syncookies - BOOLEAN
+ Only valid when the kernel was compiled with CONFIG_SYN_COOKIES
+ Send out syncookies when the syn backlog queue of a socket
+ overflows. This is to prevent against the common 'SYN flood attack'
+ Default: 1
+
+ Note, that syncookies is fallback facility.
+ It MUST NOT be used to help highly loaded servers to stand
+ against legal connection rate. If you see SYN flood warnings
+ in your logs, but investigation shows that they occur
+ because of overload with legal connections, you should tune
+ another parameters until this warning disappear.
+ See: tcp_max_syn_backlog, tcp_synack_retries, tcp_abort_on_overflow.
+
+ syncookies seriously violate TCP protocol, do not allow
+ to use TCP extensions, can result in serious degradation
+ of some services (f.e. SMTP relaying), visible not by you,
+ but your clients and relays, contacting you. While you see
+ SYN flood warnings in logs not being really flooded, your server
+ is seriously misconfigured.
+
+ If you want to test which effects syncookies have to your
+ network connections you can set this knob to 2 to enable
+ unconditionally generation of syncookies.
+
+tcp_fastopen - INTEGER
+ Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data
+ in the opening SYN packet. To use this feature, the client application
+ must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than
+ connect() to perform a TCP handshake automatically.
+
+ The values (bitmap) are
+ 1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN.
+ 2: Enables TCP Fast Open on the server side, i.e., allowing data in
+ a SYN packet to be accepted and passed to the application before
+ 3-way hand shake finishes.
+ 4: Send data in the opening SYN regardless of cookie availability and
+ without a cookie option.
+ 0x100: Accept SYN data w/o validating the cookie.
+ 0x200: Accept data-in-SYN w/o any cookie option present.
+ 0x400/0x800: Enable Fast Open on all listeners regardless of the
+ TCP_FASTOPEN socket option. The two different flags designate two
+ different ways of setting max_qlen without the TCP_FASTOPEN socket
+ option.
+
+ Default: 1
+
+ Note that the client & server side Fast Open flags (1 and 2
+ respectively) must be also enabled before the rest of flags can take
+ effect.
+
+ See include/net/tcp.h and the code for more details.
+
+tcp_syn_retries - INTEGER
+ Number of times initial SYNs for an active TCP connection attempt
+ will be retransmitted. Should not be higher than 255. Default value
+ is 6, which corresponds to 63seconds till the last retransmission
+ with the current initial RTO of 1second. With this the final timeout
+ for an active TCP connection attempt will happen after 127seconds.
+
+tcp_timestamps - BOOLEAN
+ Enable timestamps as defined in RFC1323.
+
+tcp_min_tso_segs - INTEGER
+ Minimal number of segments per TSO frame.
+ Since linux-3.12, TCP does an automatic sizing of TSO frames,
+ depending on flow rate, instead of filling 64Kbytes packets.
+ For specific usages, it's possible to force TCP to build big
+ TSO frames. Note that TCP stack might split too big TSO packets
+ if available window is too small.
+ Default: 2
+
+tcp_tso_win_divisor - INTEGER
+ This allows control over what percentage of the congestion window
+ can be consumed by a single TSO frame.
+ The setting of this parameter is a choice between burstiness and
+ building larger TSO frames.
+ Default: 3
+
+tcp_tw_recycle - BOOLEAN
+ Enable fast recycling TIME-WAIT sockets. Default value is 0.
+ It should not be changed without advice/request of technical
+ experts.
+
+tcp_tw_reuse - BOOLEAN
+ Allow to reuse TIME-WAIT sockets for new connections when it is
+ safe from protocol viewpoint. Default value is 0.
+ It should not be changed without advice/request of technical
+ experts.
+
+tcp_window_scaling - BOOLEAN
+ Enable window scaling as defined in RFC1323.
+
+tcp_wmem - vector of 3 INTEGERs: min, default, max
+ min: Amount of memory reserved for send buffers for TCP sockets.
+ Each TCP socket has rights to use it due to fact of its birth.
+ Default: 1 page
+
+ default: initial size of send buffer used by TCP sockets. This
+ value overrides net.core.wmem_default used by other protocols.
+ It is usually lower than net.core.wmem_default.
+ Default: 16K
+
+ max: Maximal amount of memory allowed for automatically tuned
+ send buffers for TCP sockets. This value does not override
+ net.core.wmem_max. Calling setsockopt() with SO_SNDBUF disables
+ automatic tuning of that socket's send buffer size, in which case
+ this value is ignored.
+ Default: between 64K and 4MB, depending on RAM size.
+
+tcp_notsent_lowat - UNSIGNED INTEGER
+ A TCP socket can control the amount of unsent bytes in its write queue,
+ thanks to TCP_NOTSENT_LOWAT socket option. poll()/select()/epoll()
+ reports POLLOUT events if the amount of unsent bytes is below a per
+ socket value, and if the write queue is not full. sendmsg() will
+ also not add new buffers if the limit is hit.
+
+ This global variable controls the amount of unsent data for
+ sockets not using TCP_NOTSENT_LOWAT. For these sockets, a change
+ to the global variable has immediate effect.
+
+ Default: UINT_MAX (0xFFFFFFFF)
+
+tcp_workaround_signed_windows - BOOLEAN
+ If set, assume no receipt of a window scaling option means the
+ remote TCP is broken and treats the window as a signed quantity.
+ If unset, assume the remote TCP is not broken even if we do
+ not receive a window scaling option from them.
+ Default: 0
+
+tcp_thin_linear_timeouts - BOOLEAN
+ Enable dynamic triggering of linear timeouts for thin streams.
+ If set, a check is performed upon retransmission by timeout to
+ determine if the stream is thin (less than 4 packets in flight).
+ As long as the stream is found to be thin, up to 6 linear
+ timeouts may be performed before exponential backoff mode is
+ initiated. This improves retransmission latency for
+ non-aggressive thin streams, often found to be time-dependent.
+ For more information on thin streams, see
+ Documentation/networking/tcp-thin.txt
+ Default: 0
+
+tcp_thin_dupack - BOOLEAN
+ Enable dynamic triggering of retransmissions after one dupACK
+ for thin streams. If set, a check is performed upon reception
+ of a dupACK to determine if the stream is thin (less than 4
+ packets in flight). As long as the stream is found to be thin,
+ data is retransmitted on the first received dupACK. This
+ improves retransmission latency for non-aggressive thin
+ streams, often found to be time-dependent.
+ For more information on thin streams, see
+ Documentation/networking/tcp-thin.txt
+ Default: 0
+
+tcp_limit_output_bytes - INTEGER
+ Controls TCP Small Queue limit per tcp socket.
+ TCP bulk sender tends to increase packets in flight until it
+ gets losses notifications. With SNDBUF autotuning, this can
+ result in a large amount of packets queued in qdisc/device
+ on the local machine, hurting latency of other flows, for
+ typical pfifo_fast qdiscs.
+ tcp_limit_output_bytes limits the number of bytes on qdisc
+ or device to reduce artificial RTT/cwnd and reduce bufferbloat.
+ Default: 131072
+
+tcp_challenge_ack_limit - INTEGER
+ Limits number of Challenge ACK sent per second, as recommended
+ in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
+ Default: 100
+
+UDP variables:
+
+udp_mem - vector of 3 INTEGERs: min, pressure, max
+ Number of pages allowed for queueing by all UDP sockets.
+
+ min: Below this number of pages UDP is not bothered about its
+ memory appetite. When amount of memory allocated by UDP exceeds
+ this number, UDP starts to moderate memory usage.
+
+ pressure: This value was introduced to follow format of tcp_mem.
+
+ max: Number of pages allowed for queueing by all UDP sockets.
+
+ Default is calculated at boot time from amount of available memory.
+
+udp_rmem_min - INTEGER
+ Minimal size of receive buffer used by UDP sockets in moderation.
+ Each UDP socket is able to use the size for receiving data, even if
+ total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
+ Default: 1 page
+
+udp_wmem_min - INTEGER
+ Minimal size of send buffer used by UDP sockets in moderation.
+ Each UDP socket is able to use the size for sending data, even if
+ total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
+ Default: 1 page
+
+CIPSOv4 Variables:
+
+cipso_cache_enable - BOOLEAN
+ If set, enable additions to and lookups from the CIPSO label mapping
+ cache. If unset, additions are ignored and lookups always result in a
+ miss. However, regardless of the setting the cache is still
+ invalidated when required when means you can safely toggle this on and
+ off and the cache will always be "safe".
+ Default: 1
+
+cipso_cache_bucket_size - INTEGER
+ The CIPSO label cache consists of a fixed size hash table with each
+ hash bucket containing a number of cache entries. This variable limits
+ the number of entries in each hash bucket; the larger the value the
+ more CIPSO label mappings that can be cached. When the number of
+ entries in a given hash bucket reaches this limit adding new entries
+ causes the oldest entry in the bucket to be removed to make room.
+ Default: 10
+
+cipso_rbm_optfmt - BOOLEAN
+ Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of
+ the CIPSO draft specification (see Documentation/netlabel for details).
+ This means that when set the CIPSO tag will be padded with empty
+ categories in order to make the packet data 32-bit aligned.
+ Default: 0
+
+cipso_rbm_structvalid - BOOLEAN
+ If set, do a very strict check of the CIPSO option when
+ ip_options_compile() is called. If unset, relax the checks done during
+ ip_options_compile(). Either way is "safe" as errors are caught else
+ where in the CIPSO processing code but setting this to 0 (False) should
+ result in less work (i.e. it should be faster) but could cause problems
+ with other implementations that require strict checking.
+ Default: 0
+
+IP Variables:
+
+ip_local_port_range - 2 INTEGERS
+ Defines the local port range that is used by TCP and UDP to
+ choose the local port. The first number is the first, the
+ second the last local port number. The default values are
+ 32768 and 61000 respectively.
+
+ip_local_reserved_ports - list of comma separated ranges
+ Specify the ports which are reserved for known third-party
+ applications. These ports will not be used by automatic port
+ assignments (e.g. when calling connect() or bind() with port
+ number 0). Explicit port allocation behavior is unchanged.
+
+ The format used for both input and output is a comma separated
+ list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
+ 10). Writing to the file will clear all previously reserved
+ ports and update the current list with the one given in the
+ input.
+
+ Note that ip_local_port_range and ip_local_reserved_ports
+ settings are independent and both are considered by the kernel
+ when determining which ports are available for automatic port
+ assignments.
+
+ You can reserve ports which are not in the current
+ ip_local_port_range, e.g.:
+
+ $ cat /proc/sys/net/ipv4/ip_local_port_range
+ 32000 61000
+ $ cat /proc/sys/net/ipv4/ip_local_reserved_ports
+ 8080,9148
+
+ although this is redundant. However such a setting is useful
+ if later the port range is changed to a value that will
+ include the reserved ports.
+
+ Default: Empty
+
+ip_nonlocal_bind - BOOLEAN
+ If set, allows processes to bind() to non-local IP addresses,
+ which can be quite useful - but may break some applications.
+ Default: 0
+
+ip_dynaddr - BOOLEAN
+ If set non-zero, enables support for dynamic addresses.
+ If set to a non-zero value larger than 1, a kernel log
+ message will be printed when dynamic address rewriting
+ occurs.
+ Default: 0
+
+ip_early_demux - BOOLEAN
+ Optimize input packet processing down to one demux for
+ certain kinds of local sockets. Currently we only do this
+ for established TCP sockets.
+
+ It may add an additional cost for pure routing workloads that
+ reduces overall throughput, in such case you should disable it.
+ Default: 1
+
+icmp_echo_ignore_all - BOOLEAN
+ If set non-zero, then the kernel will ignore all ICMP ECHO
+ requests sent to it.
+ Default: 0
+
+icmp_echo_ignore_broadcasts - BOOLEAN
+ If set non-zero, then the kernel will ignore all ICMP ECHO and
+ TIMESTAMP requests sent to it via broadcast/multicast.
+ Default: 1
+
+icmp_ratelimit - INTEGER
+ Limit the maximal rates for sending ICMP packets whose type matches
+ icmp_ratemask (see below) to specific targets.
+ 0 to disable any limiting,
+ otherwise the minimal space between responses in milliseconds.
+ Note that another sysctl, icmp_msgs_per_sec limits the number
+ of ICMP packets sent on all targets.
+ Default: 1000
+
+icmp_msgs_per_sec - INTEGER
+ Limit maximal number of ICMP packets sent per second from this host.
+ Only messages whose type matches icmp_ratemask (see below) are
+ controlled by this limit.
+ Default: 1000
+
+icmp_msgs_burst - INTEGER
+ icmp_msgs_per_sec controls number of ICMP packets sent per second,
+ while icmp_msgs_burst controls the burst size of these packets.
+ Default: 50
+
+icmp_ratemask - INTEGER
+ Mask made of ICMP types for which rates are being limited.
+ Significant bits: IHGFEDCBA9876543210
+ Default mask: 0000001100000011000 (6168)
+
+ Bit definitions (see include/linux/icmp.h):
+ 0 Echo Reply
+ 3 Destination Unreachable *
+ 4 Source Quench *
+ 5 Redirect
+ 8 Echo Request
+ B Time Exceeded *
+ C Parameter Problem *
+ D Timestamp Request
+ E Timestamp Reply
+ F Info Request
+ G Info Reply
+ H Address Mask Request
+ I Address Mask Reply
+
+ * These are rate limited by default (see default mask above)
+
+icmp_ignore_bogus_error_responses - BOOLEAN
+ Some routers violate RFC1122 by sending bogus responses to broadcast
+ frames. Such violations are normally logged via a kernel warning.
+ If this is set to TRUE, the kernel will not give such warnings, which
+ will avoid log file clutter.
+ Default: 1
+
+icmp_errors_use_inbound_ifaddr - BOOLEAN
+
+ If zero, icmp error messages are sent with the primary address of
+ the exiting interface.
+
+ If non-zero, the message will be sent with the primary address of
+ the interface that received the packet that caused the icmp error.
+ This is the behaviour network many administrators will expect from
+ a router. And it can make debugging complicated network layouts
+ much easier.
+
+ Note that if no primary address exists for the interface selected,
+ then the primary address of the first non-loopback interface that
+ has one will be used regardless of this setting.
+
+ Default: 0
+
+igmp_max_memberships - INTEGER
+ Change the maximum number of multicast groups we can subscribe to.
+ Default: 20
+
+ Theoretical maximum value is bounded by having to send a membership
+ report in a single datagram (i.e. the report can't span multiple
+ datagrams, or risk confusing the switch and leaving groups you don't
+ intend to).
+
+ The number of supported groups 'M' is bounded by the number of group
+ report entries you can fit into a single datagram of 65535 bytes.
+
+ M = 65536-sizeof (ip header)/(sizeof(Group record))
+
+ Group records are variable length, with a minimum of 12 bytes.
+ So net.ipv4.igmp_max_memberships should not be set higher than:
+
+ (65536-24) / 12 = 5459
+
+ The value 5459 assumes no IP header options, so in practice
+ this number may be lower.
+
+ conf/interface/* changes special settings per interface (where
+ "interface" is the name of your network interface)
+
+ conf/all/* is special, changes the settings for all interfaces
+
+igmp_qrv - INTEGER
+ Controls the IGMP query robustness variable (see RFC2236 8.1).
+ Default: 2 (as specified by RFC2236 8.1)
+ Minimum: 1 (as specified by RFC6636 4.5)
+
+log_martians - BOOLEAN
+ Log packets with impossible addresses to kernel log.
+ log_martians for the interface will be enabled if at least one of
+ conf/{all,interface}/log_martians is set to TRUE,
+ it will be disabled otherwise
+
+accept_redirects - BOOLEAN
+ Accept ICMP redirect messages.
+ accept_redirects for the interface will be enabled if:
+ - both conf/{all,interface}/accept_redirects are TRUE in the case
+ forwarding for the interface is enabled
+ or
+ - at least one of conf/{all,interface}/accept_redirects is TRUE in the
+ case forwarding for the interface is disabled
+ accept_redirects for the interface will be disabled otherwise
+ default TRUE (host)
+ FALSE (router)
+
+forwarding - BOOLEAN
+ Enable IP forwarding on this interface.
+
+mc_forwarding - BOOLEAN
+ Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
+ and a multicast routing daemon is required.
+ conf/all/mc_forwarding must also be set to TRUE to enable multicast
+ routing for the interface
+
+medium_id - INTEGER
+ Integer value used to differentiate the devices by the medium they
+ are attached to. Two devices can have different id values when
+ the broadcast packets are received only on one of them.
+ The default value 0 means that the device is the only interface
+ to its medium, value of -1 means that medium is not known.
+
+ Currently, it is used to change the proxy_arp behavior:
+ the proxy_arp feature is enabled for packets forwarded between
+ two devices attached to different media.
+
+proxy_arp - BOOLEAN
+ Do proxy arp.
+ proxy_arp for the interface will be enabled if at least one of
+ conf/{all,interface}/proxy_arp is set to TRUE,
+ it will be disabled otherwise
+
+proxy_arp_pvlan - BOOLEAN
+ Private VLAN proxy arp.
+ Basically allow proxy arp replies back to the same interface
+ (from which the ARP request/solicitation was received).
+
+ This is done to support (ethernet) switch features, like RFC
+ 3069, where the individual ports are NOT allowed to
+ communicate with each other, but they are allowed to talk to
+ the upstream router. As described in RFC 3069, it is possible
+ to allow these hosts to communicate through the upstream
+ router by proxy_arp'ing. Don't need to be used together with
+ proxy_arp.
+
+ This technology is known by different names:
+ In RFC 3069 it is called VLAN Aggregation.
+ Cisco and Allied Telesyn call it Private VLAN.
+ Hewlett-Packard call it Source-Port filtering or port-isolation.
+ Ericsson call it MAC-Forced Forwarding (RFC Draft).
+
+shared_media - BOOLEAN
+ Send(router) or accept(host) RFC1620 shared media redirects.
+ Overrides ip_secure_redirects.
+ shared_media for the interface will be enabled if at least one of
+ conf/{all,interface}/shared_media is set to TRUE,
+ it will be disabled otherwise
+ default TRUE
+
+secure_redirects - BOOLEAN
+ Accept ICMP redirect messages only for gateways,
+ listed in default gateway list.
+ secure_redirects for the interface will be enabled if at least one of
+ conf/{all,interface}/secure_redirects is set to TRUE,
+ it will be disabled otherwise
+ default TRUE
+
+send_redirects - BOOLEAN
+ Send redirects, if router.
+ send_redirects for the interface will be enabled if at least one of
+ conf/{all,interface}/send_redirects is set to TRUE,
+ it will be disabled otherwise
+ Default: TRUE
+
+bootp_relay - BOOLEAN
+ Accept packets with source address 0.b.c.d destined
+ not to this host as local ones. It is supposed, that
+ BOOTP relay daemon will catch and forward such packets.
+ conf/all/bootp_relay must also be set to TRUE to enable BOOTP relay
+ for the interface
+ default FALSE
+ Not Implemented Yet.
+
+accept_source_route - BOOLEAN
+ Accept packets with SRR option.
+ conf/all/accept_source_route must also be set to TRUE to accept packets
+ with SRR option on the interface
+ default TRUE (router)
+ FALSE (host)
+
+accept_local - BOOLEAN
+ Accept packets with local source addresses. In combination with
+ suitable routing, this can be used to direct packets between two
+ local interfaces over the wire and have them accepted properly.
+ default FALSE
+
+route_localnet - BOOLEAN
+ Do not consider loopback addresses as martian source or destination
+ while routing. This enables the use of 127/8 for local routing purposes.
+ default FALSE
+
+rp_filter - INTEGER
+ 0 - No source validation.
+ 1 - Strict mode as defined in RFC3704 Strict Reverse Path
+ Each incoming packet is tested against the FIB and if the interface
+ is not the best reverse path the packet check will fail.
+ By default failed packets are discarded.
+ 2 - Loose mode as defined in RFC3704 Loose Reverse Path
+ Each incoming packet's source address is also tested against the FIB
+ and if the source address is not reachable via any interface
+ the packet check will fail.
+
+ Current recommended practice in RFC3704 is to enable strict mode
+ to prevent IP spoofing from DDos attacks. If using asymmetric routing
+ or other complicated routing, then loose mode is recommended.
+
+ The max value from conf/{all,interface}/rp_filter is used
+ when doing source validation on the {interface}.
+
+ Default value is 0. Note that some distributions enable it
+ in startup scripts.
+
+arp_filter - BOOLEAN
+ 1 - Allows you to have multiple network interfaces on the same
+ subnet, and have the ARPs for each interface be answered
+ based on whether or not the kernel would route a packet from
+ the ARP'd IP out that interface (therefore you must use source
+ based routing for this to work). In other words it allows control
+ of which cards (usually 1) will respond to an arp request.
+
+ 0 - (default) The kernel can respond to arp requests with addresses
+ from other interfaces. This may seem wrong but it usually makes
+ sense, because it increases the chance of successful communication.
+ IP addresses are owned by the complete host on Linux, not by
+ particular interfaces. Only for more complex setups like load-
+ balancing, does this behaviour cause problems.
+
+ arp_filter for the interface will be enabled if at least one of
+ conf/{all,interface}/arp_filter is set to TRUE,
+ it will be disabled otherwise
+
+arp_announce - INTEGER
+ Define different restriction levels for announcing the local
+ source IP address from IP packets in ARP requests sent on
+ interface:
+ 0 - (default) Use any local address, configured on any interface
+ 1 - Try to avoid local addresses that are not in the target's
+ subnet for this interface. This mode is useful when target
+ hosts reachable via this interface require the source IP
+ address in ARP requests to be part of their logical network
+ configured on the receiving interface. When we generate the
+ request we will check all our subnets that include the
+ target IP and will preserve the source address if it is from
+ such subnet. If there is no such subnet we select source
+ address according to the rules for level 2.
+ 2 - Always use the best local address for this target.
+ In this mode we ignore the source address in the IP packet
+ and try to select local address that we prefer for talks with
+ the target host. Such local address is selected by looking
+ for primary IP addresses on all our subnets on the outgoing
+ interface that include the target IP address. If no suitable
+ local address is found we select the first local address
+ we have on the outgoing interface or on all other interfaces,
+ with the hope we will receive reply for our request and
+ even sometimes no matter the source IP address we announce.
+
+ The max value from conf/{all,interface}/arp_announce is used.
+
+ Increasing the restriction level gives more chance for
+ receiving answer from the resolved target while decreasing
+ the level announces more valid sender's information.
+
+arp_ignore - INTEGER
+ Define different modes for sending replies in response to
+ received ARP requests that resolve local target IP addresses:
+ 0 - (default): reply for any local target IP address, configured
+ on any interface
+ 1 - reply only if the target IP address is local address
+ configured on the incoming interface
+ 2 - reply only if the target IP address is local address
+ configured on the incoming interface and both with the
+ sender's IP address are part from same subnet on this interface
+ 3 - do not reply for local addresses configured with scope host,
+ only resolutions for global and link addresses are replied
+ 4-7 - reserved
+ 8 - do not reply for all local addresses
+
+ The max value from conf/{all,interface}/arp_ignore is used
+ when ARP request is received on the {interface}
+
+arp_notify - BOOLEAN
+ Define mode for notification of address and device changes.
+ 0 - (default): do nothing
+ 1 - Generate gratuitous arp requests when device is brought up
+ or hardware address changes.
+
+arp_accept - BOOLEAN
+ Define behavior for gratuitous ARP frames who's IP is not
+ already present in the ARP table:
+ 0 - don't create new entries in the ARP table
+ 1 - create new entries in the ARP table
+
+ Both replies and requests type gratuitous arp will trigger the
+ ARP table to be updated, if this setting is on.
+
+ If the ARP table already contains the IP address of the
+ gratuitous arp frame, the arp table will be updated regardless
+ if this setting is on or off.
+
+mcast_solicit - INTEGER
+ The maximum number of multicast probes in INCOMPLETE state,
+ when the associated hardware address is unknown. Defaults
+ to 3.
+
+ucast_solicit - INTEGER
+ The maximum number of unicast probes in PROBE state, when
+ the hardware address is being reconfirmed. Defaults to 3.
+
+app_solicit - INTEGER
+ The maximum number of probes to send to the user space ARP daemon
+ via netlink before dropping back to multicast probes (see
+ mcast_resolicit). Defaults to 0.
+
+mcast_resolicit - INTEGER
+ The maximum number of multicast probes after unicast and
+ app probes in PROBE state. Defaults to 0.
+
+disable_policy - BOOLEAN
+ Disable IPSEC policy (SPD) for this interface
+
+disable_xfrm - BOOLEAN
+ Disable IPSEC encryption on this interface, whatever the policy
+
+igmpv2_unsolicited_report_interval - INTEGER
+ The interval in milliseconds in which the next unsolicited
+ IGMPv1 or IGMPv2 report retransmit will take place.
+ Default: 10000 (10 seconds)
+
+igmpv3_unsolicited_report_interval - INTEGER
+ The interval in milliseconds in which the next unsolicited
+ IGMPv3 report retransmit will take place.
+ Default: 1000 (1 seconds)
+
+promote_secondaries - BOOLEAN
+ When a primary IP address is removed from this interface
+ promote a corresponding secondary IP address instead of
+ removing all the corresponding secondary IP addresses.
+
+
+tag - INTEGER
+ Allows you to write a number, which can be used as required.
+ Default value is 0.
+
+Alexey Kuznetsov.
+kuznet@ms2.inr.ac.ru
+
+Updated by:
+Andi Kleen
+ak@muc.de
+Nicolas Delon
+delon.nicolas@wanadoo.fr
+
+
+
+
+/proc/sys/net/ipv6/* Variables:
+
+IPv6 has no global variables such as tcp_*. tcp_* settings under ipv4/ also
+apply to IPv6 [XXX?].
+
+bindv6only - BOOLEAN
+ Default value for IPV6_V6ONLY socket option,
+ which restricts use of the IPv6 socket to IPv6 communication
+ only.
+ TRUE: disable IPv4-mapped address feature
+ FALSE: enable IPv4-mapped address feature
+
+ Default: FALSE (as specified in RFC3493)
+
+flowlabel_consistency - BOOLEAN
+ Protect the consistency (and unicity) of flow label.
+ You have to disable it to use IPV6_FL_F_REFLECT flag on the
+ flow label manager.
+ TRUE: enabled
+ FALSE: disabled
+ Default: TRUE
+
+auto_flowlabels - BOOLEAN
+ Automatically generate flow labels based based on a flow hash
+ of the packet. This allows intermediate devices, such as routers,
+ to idenfify packet flows for mechanisms like Equal Cost Multipath
+ Routing (see RFC 6438).
+ TRUE: enabled
+ FALSE: disabled
+ Default: false
+
+anycast_src_echo_reply - BOOLEAN
+ Controls the use of anycast addresses as source addresses for ICMPv6
+ echo reply
+ TRUE: enabled
+ FALSE: disabled
+ Default: FALSE
+
+idgen_delay - INTEGER
+ Controls the delay in seconds after which time to retry
+ privacy stable address generation if a DAD conflict is
+ detected.
+ Default: 1 (as specified in RFC7217)
+
+idgen_retries - INTEGER
+ Controls the number of retries to generate a stable privacy
+ address if a DAD conflict is detected.
+ Default: 3 (as specified in RFC7217)
+
+mld_qrv - INTEGER
+ Controls the MLD query robustness variable (see RFC3810 9.1).
+ Default: 2 (as specified by RFC3810 9.1)
+ Minimum: 1 (as specified by RFC6636 4.5)
+
+IPv6 Fragmentation:
+
+ip6frag_high_thresh - INTEGER
+ Maximum memory used to reassemble IPv6 fragments. When
+ ip6frag_high_thresh bytes of memory is allocated for this purpose,
+ the fragment handler will toss packets until ip6frag_low_thresh
+ is reached.
+
+ip6frag_low_thresh - INTEGER
+ See ip6frag_high_thresh
+
+ip6frag_time - INTEGER
+ Time in seconds to keep an IPv6 fragment in memory.
+
+conf/default/*:
+ Change the interface-specific default settings.
+
+
+conf/all/*:
+ Change all the interface-specific settings.
+
+ [XXX: Other special features than forwarding?]
+
+conf/all/forwarding - BOOLEAN
+ Enable global IPv6 forwarding between all interfaces.
+
+ IPv4 and IPv6 work differently here; e.g. netfilter must be used
+ to control which interfaces may forward packets and which not.
+
+ This also sets all interfaces' Host/Router setting
+ 'forwarding' to the specified value. See below for details.
+
+ This referred to as global forwarding.
+
+proxy_ndp - BOOLEAN
+ Do proxy ndp.
+
+fwmark_reflect - BOOLEAN
+ Controls the fwmark of kernel-generated IPv6 reply packets that are not
+ associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
+ If unset, these packets have a fwmark of zero. If set, they have the
+ fwmark of the packet they are replying to.
+ Default: 0
+
+conf/interface/*:
+ Change special settings per interface.
+
+ The functional behaviour for certain settings is different
+ depending on whether local forwarding is enabled or not.
+
+accept_ra - INTEGER
+ Accept Router Advertisements; autoconfigure using them.
+
+ It also determines whether or not to transmit Router
+ Solicitations. If and only if the functional setting is to
+ accept Router Advertisements, Router Solicitations will be
+ transmitted.
+
+ Possible values are:
+ 0 Do not accept Router Advertisements.
+ 1 Accept Router Advertisements if forwarding is disabled.
+ 2 Overrule forwarding behaviour. Accept Router Advertisements
+ even if forwarding is enabled.
+
+ Functional default: enabled if local forwarding is disabled.
+ disabled if local forwarding is enabled.
+
+accept_ra_defrtr - BOOLEAN
+ Learn default router in Router Advertisement.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
+accept_ra_from_local - BOOLEAN
+ Accept RA with source-address that is found on local machine
+ if the RA is otherwise proper and able to be accepted.
+ Default is to NOT accept these as it may be an un-intended
+ network loop.
+
+ Functional default:
+ enabled if accept_ra_from_local is enabled
+ on a specific interface.
+ disabled if accept_ra_from_local is disabled
+ on a specific interface.
+
+accept_ra_pinfo - BOOLEAN
+ Learn Prefix Information in Router Advertisement.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
+accept_ra_rt_info_max_plen - INTEGER
+ Maximum prefix length of Route Information in RA.
+
+ Route Information w/ prefix larger than or equal to this
+ variable shall be ignored.
+
+ Functional default: 0 if accept_ra_rtr_pref is enabled.
+ -1 if accept_ra_rtr_pref is disabled.
+
+accept_ra_rtr_pref - BOOLEAN
+ Accept Router Preference in RA.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
+accept_ra_mtu - BOOLEAN
+ Apply the MTU value specified in RA option 5 (RFC4861). If
+ disabled, the MTU specified in the RA will be ignored.
+
+ Functional default: enabled if accept_ra is enabled.
+ disabled if accept_ra is disabled.
+
+accept_redirects - BOOLEAN
+ Accept Redirects.
+
+ Functional default: enabled if local forwarding is disabled.
+ disabled if local forwarding is enabled.
+
+accept_source_route - INTEGER
+ Accept source routing (routing extension header).
+
+ >= 0: Accept only routing header type 2.
+ < 0: Do not accept routing header.
+
+ Default: 0
+
+autoconf - BOOLEAN
+ Autoconfigure addresses using Prefix Information in Router
+ Advertisements.
+
+ Functional default: enabled if accept_ra_pinfo is enabled.
+ disabled if accept_ra_pinfo is disabled.
+
+dad_transmits - INTEGER
+ The amount of Duplicate Address Detection probes to send.
+ Default: 1
+
+forwarding - INTEGER
+ Configure interface-specific Host/Router behaviour.
+
+ Note: It is recommended to have the same setting on all
+ interfaces; mixed router/host scenarios are rather uncommon.
+
+ Possible values are:
+ 0 Forwarding disabled
+ 1 Forwarding enabled
+
+ FALSE (0):
+
+ By default, Host behaviour is assumed. This means:
+
+ 1. IsRouter flag is not set in Neighbour Advertisements.
+ 2. If accept_ra is TRUE (default), transmit Router
+ Solicitations.
+ 3. If accept_ra is TRUE (default), accept Router
+ Advertisements (and do autoconfiguration).
+ 4. If accept_redirects is TRUE (default), accept Redirects.
+
+ TRUE (1):
+
+ If local forwarding is enabled, Router behaviour is assumed.
+ This means exactly the reverse from the above:
+
+ 1. IsRouter flag is set in Neighbour Advertisements.
+ 2. Router Solicitations are not sent unless accept_ra is 2.
+ 3. Router Advertisements are ignored unless accept_ra is 2.
+ 4. Redirects are ignored.
+
+ Default: 0 (disabled) if global forwarding is disabled (default),
+ otherwise 1 (enabled).
+
+hop_limit - INTEGER
+ Default Hop Limit to set.
+ Default: 64
+
+mtu - INTEGER
+ Default Maximum Transfer Unit
+ Default: 1280 (IPv6 required minimum)
+
+router_probe_interval - INTEGER
+ Minimum interval (in seconds) between Router Probing described
+ in RFC4191.
+
+ Default: 60
+
+router_solicitation_delay - INTEGER
+ Number of seconds to wait after interface is brought up
+ before sending Router Solicitations.
+ Default: 1
+
+router_solicitation_interval - INTEGER
+ Number of seconds to wait between Router Solicitations.
+ Default: 4
+
+router_solicitations - INTEGER
+ Number of Router Solicitations to send until assuming no
+ routers are present.
+ Default: 3
+
+use_tempaddr - INTEGER
+ Preference for Privacy Extensions (RFC3041).
+ <= 0 : disable Privacy Extensions
+ == 1 : enable Privacy Extensions, but prefer public
+ addresses over temporary addresses.
+ > 1 : enable Privacy Extensions and prefer temporary
+ addresses over public addresses.
+ Default: 0 (for most devices)
+ -1 (for point-to-point devices and loopback devices)
+
+temp_valid_lft - INTEGER
+ valid lifetime (in seconds) for temporary addresses.
+ Default: 604800 (7 days)
+
+temp_prefered_lft - INTEGER
+ Preferred lifetime (in seconds) for temporary addresses.
+ Default: 86400 (1 day)
+
+max_desync_factor - INTEGER
+ Maximum value for DESYNC_FACTOR, which is a random value
+ that ensures that clients don't synchronize with each
+ other and generate new addresses at exactly the same time.
+ value is in seconds.
+ Default: 600
+
+regen_max_retry - INTEGER
+ Number of attempts before give up attempting to generate
+ valid temporary addresses.
+ Default: 5
+
+max_addresses - INTEGER
+ Maximum number of autoconfigured addresses per interface. Setting
+ to zero disables the limitation. It is not recommended to set this
+ value too large (or to zero) because it would be an easy way to
+ crash the kernel by allowing too many addresses to be created.
+ Default: 16
+
+disable_ipv6 - BOOLEAN
+ Disable IPv6 operation. If accept_dad is set to 2, this value
+ will be dynamically set to TRUE if DAD fails for the link-local
+ address.
+ Default: FALSE (enable IPv6 operation)
+
+ When this value is changed from 1 to 0 (IPv6 is being enabled),
+ it will dynamically create a link-local address on the given
+ interface and start Duplicate Address Detection, if necessary.
+
+ When this value is changed from 0 to 1 (IPv6 is being disabled),
+ it will dynamically delete all address on the given interface.
+
+accept_dad - INTEGER
+ Whether to accept DAD (Duplicate Address Detection).
+ 0: Disable DAD
+ 1: Enable DAD (default)
+ 2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
+ link-local address has been found.
+
+force_tllao - BOOLEAN
+ Enable sending the target link-layer address option even when
+ responding to a unicast neighbor solicitation.
+ Default: FALSE
+
+ Quoting from RFC 2461, section 4.4, Target link-layer address:
+
+ "The option MUST be included for multicast solicitations in order to
+ avoid infinite Neighbor Solicitation "recursion" when the peer node
+ does not have a cache entry to return a Neighbor Advertisements
+ message. When responding to unicast solicitations, the option can be
+ omitted since the sender of the solicitation has the correct link-
+ layer address; otherwise it would not have be able to send the unicast
+ solicitation in the first place. However, including the link-layer
+ address in this case adds little overhead and eliminates a potential
+ race condition where the sender deletes the cached link-layer address
+ prior to receiving a response to a previous solicitation."
+
+ndisc_notify - BOOLEAN
+ Define mode for notification of address and device changes.
+ 0 - (default): do nothing
+ 1 - Generate unsolicited neighbour advertisements when device is brought
+ up or hardware address changes.
+
+mldv1_unsolicited_report_interval - INTEGER
+ The interval in milliseconds in which the next unsolicited
+ MLDv1 report retransmit will take place.
+ Default: 10000 (10 seconds)
+
+mldv2_unsolicited_report_interval - INTEGER
+ The interval in milliseconds in which the next unsolicited
+ MLDv2 report retransmit will take place.
+ Default: 1000 (1 second)
+
+force_mld_version - INTEGER
+ 0 - (default) No enforcement of a MLD version, MLDv1 fallback allowed
+ 1 - Enforce to use MLD version 1
+ 2 - Enforce to use MLD version 2
+
+suppress_frag_ndisc - INTEGER
+ Control RFC 6980 (Security Implications of IPv6 Fragmentation
+ with IPv6 Neighbor Discovery) behavior:
+ 1 - (default) discard fragmented neighbor discovery packets
+ 0 - allow fragmented neighbor discovery packets
+
+optimistic_dad - BOOLEAN
+ Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
+ 0: disabled (default)
+ 1: enabled
+
+use_optimistic - BOOLEAN
+ If enabled, do not classify optimistic addresses as deprecated during
+ source address selection. Preferred addresses will still be chosen
+ before optimistic addresses, subject to other ranking in the source
+ address selection algorithm.
+ 0: disabled (default)
+ 1: enabled
+
+stable_secret - IPv6 address
+ This IPv6 address will be used as a secret to generate IPv6
+ addresses for link-local addresses and autoconfigured
+ ones. All addresses generated after setting this secret will
+ be stable privacy ones by default. This can be changed via the
+ addrgenmode ip-link. conf/default/stable_secret is used as the
+ secret for the namespace, the interface specific ones can
+ overwrite that. Writes to conf/all/stable_secret are refused.
+
+ It is recommended to generate this secret during installation
+ of a system and keep it stable after that.
+
+ By default the stable secret is unset.
+
+icmp/*:
+ratelimit - INTEGER
+ Limit the maximal rates for sending ICMPv6 packets.
+ 0 to disable any limiting,
+ otherwise the minimal space between responses in milliseconds.
+ Default: 1000
+
+
+IPv6 Update by:
+Pekka Savola <pekkas@netcore.fi>
+YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
+
+
+/proc/sys/net/bridge/* Variables:
+
+bridge-nf-call-arptables - BOOLEAN
+ 1 : pass bridged ARP traffic to arptables' FORWARD chain.
+ 0 : disable this.
+ Default: 1
+
+bridge-nf-call-iptables - BOOLEAN
+ 1 : pass bridged IPv4 traffic to iptables' chains.
+ 0 : disable this.
+ Default: 1
+
+bridge-nf-call-ip6tables - BOOLEAN
+ 1 : pass bridged IPv6 traffic to ip6tables' chains.
+ 0 : disable this.
+ Default: 1
+
+bridge-nf-filter-vlan-tagged - BOOLEAN
+ 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
+ 0 : disable this.
+ Default: 0
+
+bridge-nf-filter-pppoe-tagged - BOOLEAN
+ 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
+ 0 : disable this.
+ Default: 0
+
+bridge-nf-pass-vlan-input-dev - BOOLEAN
+ 1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
+ interface on the bridge and set the netfilter input device to the vlan.
+ This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT
+ target work with vlan-on-top-of-bridge interfaces. When no matching
+ vlan interface is found, or this switch is off, the input device is
+ set to the bridge interface.
+ 0: disable bridge netfilter vlan interface lookup.
+ Default: 0
+
+proc/sys/net/sctp/* Variables:
+
+addip_enable - BOOLEAN
+ Enable or disable extension of Dynamic Address Reconfiguration
+ (ADD-IP) functionality specified in RFC5061. This extension provides
+ the ability to dynamically add and remove new addresses for the SCTP
+ associations.
+
+ 1: Enable extension.
+
+ 0: Disable extension.
+
+ Default: 0
+
+addip_noauth_enable - BOOLEAN
+ Dynamic Address Reconfiguration (ADD-IP) requires the use of
+ authentication to protect the operations of adding or removing new
+ addresses. This requirement is mandated so that unauthorized hosts
+ would not be able to hijack associations. However, older
+ implementations may not have implemented this requirement while
+ allowing the ADD-IP extension. For reasons of interoperability,
+ we provide this variable to control the enforcement of the
+ authentication requirement.
+
+ 1: Allow ADD-IP extension to be used without authentication. This
+ should only be set in a closed environment for interoperability
+ with older implementations.
+
+ 0: Enforce the authentication requirement
+
+ Default: 0
+
+auth_enable - BOOLEAN
+ Enable or disable Authenticated Chunks extension. This extension
+ provides the ability to send and receive authenticated chunks and is
+ required for secure operation of Dynamic Address Reconfiguration
+ (ADD-IP) extension.
+
+ 1: Enable this extension.
+ 0: Disable this extension.
+
+ Default: 0
+
+prsctp_enable - BOOLEAN
+ Enable or disable the Partial Reliability extension (RFC3758) which
+ is used to notify peers that a given DATA should no longer be expected.
+
+ 1: Enable extension
+ 0: Disable
+
+ Default: 1
+
+max_burst - INTEGER
+ The limit of the number of new packets that can be initially sent. It
+ controls how bursty the generated traffic can be.
+
+ Default: 4
+
+association_max_retrans - INTEGER
+ Set the maximum number for retransmissions that an association can
+ attempt deciding that the remote end is unreachable. If this value
+ is exceeded, the association is terminated.
+
+ Default: 10
+
+max_init_retransmits - INTEGER
+ The maximum number of retransmissions of INIT and COOKIE-ECHO chunks
+ that an association will attempt before declaring the destination
+ unreachable and terminating.
+
+ Default: 8
+
+path_max_retrans - INTEGER
+ The maximum number of retransmissions that will be attempted on a given
+ path. Once this threshold is exceeded, the path is considered
+ unreachable, and new traffic will use a different path when the
+ association is multihomed.
+
+ Default: 5
+
+pf_retrans - INTEGER
+ The number of retransmissions that will be attempted on a given path
+ before traffic is redirected to an alternate transport (should one
+ exist). Note this is distinct from path_max_retrans, as a path that
+ passes the pf_retrans threshold can still be used. Its only
+ deprioritized when a transmission path is selected by the stack. This
+ setting is primarily used to enable fast failover mechanisms without
+ having to reduce path_max_retrans to a very low value. See:
+ http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
+ for details. Note also that a value of pf_retrans > path_max_retrans
+ disables this feature
+
+ Default: 0
+
+rto_initial - INTEGER
+ The initial round trip timeout value in milliseconds that will be used
+ in calculating round trip times. This is the initial time interval
+ for retransmissions.
+
+ Default: 3000
+
+rto_max - INTEGER
+ The maximum value (in milliseconds) of the round trip timeout. This
+ is the largest time interval that can elapse between retransmissions.
+
+ Default: 60000
+
+rto_min - INTEGER
+ The minimum value (in milliseconds) of the round trip timeout. This
+ is the smallest time interval the can elapse between retransmissions.
+
+ Default: 1000
+
+hb_interval - INTEGER
+ The interval (in milliseconds) between HEARTBEAT chunks. These chunks
+ are sent at the specified interval on idle paths to probe the state of
+ a given path between 2 associations.
+
+ Default: 30000
+
+sack_timeout - INTEGER
+ The amount of time (in milliseconds) that the implementation will wait
+ to send a SACK.
+
+ Default: 200
+
+valid_cookie_life - INTEGER
+ The default lifetime of the SCTP cookie (in milliseconds). The cookie
+ is used during association establishment.
+
+ Default: 60000
+
+cookie_preserve_enable - BOOLEAN
+ Enable or disable the ability to extend the lifetime of the SCTP cookie
+ that is used during the establishment phase of SCTP association
+
+ 1: Enable cookie lifetime extension.
+ 0: Disable
+
+ Default: 1
+
+cookie_hmac_alg - STRING
+ Select the hmac algorithm used when generating the cookie value sent by
+ a listening sctp socket to a connecting client in the INIT-ACK chunk.
+ Valid values are:
+ * md5
+ * sha1
+ * none
+ Ability to assign md5 or sha1 as the selected alg is predicated on the
+ configuration of those algorithms at build time (CONFIG_CRYPTO_MD5 and
+ CONFIG_CRYPTO_SHA1).
+
+ Default: Dependent on configuration. MD5 if available, else SHA1 if
+ available, else none.
+
+rcvbuf_policy - INTEGER
+ Determines if the receive buffer is attributed to the socket or to
+ association. SCTP supports the capability to create multiple
+ associations on a single socket. When using this capability, it is
+ possible that a single stalled association that's buffering a lot
+ of data may block other associations from delivering their data by
+ consuming all of the receive buffer space. To work around this,
+ the rcvbuf_policy could be set to attribute the receiver buffer space
+ to each association instead of the socket. This prevents the described
+ blocking.
+
+ 1: rcvbuf space is per association
+ 0: rcvbuf space is per socket
+
+ Default: 0
+
+sndbuf_policy - INTEGER
+ Similar to rcvbuf_policy above, this applies to send buffer space.
+
+ 1: Send buffer is tracked per association
+ 0: Send buffer is tracked per socket.
+
+ Default: 0
+
+sctp_mem - vector of 3 INTEGERs: min, pressure, max
+ Number of pages allowed for queueing by all SCTP sockets.
+
+ min: Below this number of pages SCTP is not bothered about its
+ memory appetite. When amount of memory allocated by SCTP exceeds
+ this number, SCTP starts to moderate memory usage.
+
+ pressure: This value was introduced to follow format of tcp_mem.
+
+ max: Number of pages allowed for queueing by all SCTP sockets.
+
+ Default is calculated at boot time from amount of available memory.
+
+sctp_rmem - vector of 3 INTEGERs: min, default, max
+ Only the first value ("min") is used, "default" and "max" are
+ ignored.
+
+ min: Minimal size of receive buffer used by SCTP socket.
+ It is guaranteed to each SCTP socket (but not association) even
+ under moderate memory pressure.
+
+ Default: 1 page
+
+sctp_wmem - vector of 3 INTEGERs: min, default, max
+ Currently this tunable has no effect.
+
+addr_scope_policy - INTEGER
+ Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+
+ 0 - Disable IPv4 address scoping
+ 1 - Enable IPv4 address scoping
+ 2 - Follow draft but allow IPv4 private addresses
+ 3 - Follow draft but allow IPv4 link local addresses
+
+ Default: 1
+
+
+/proc/sys/net/core/*
+ Please see: Documentation/sysctl/net.txt for descriptions of these entries.
+
+
+/proc/sys/net/unix/*
+max_dgram_qlen - INTEGER
+ The maximum length of dgram socket receive queue
+
+ Default: 10
+
+
+UNDOCUMENTED:
+
+/proc/sys/net/irda/*
+ fast_poll_increase FIXME
+ warn_noreply_time FIXME
+ discovery_slots FIXME
+ slot_timeout FIXME
+ max_baud_rate FIXME
+ discovery_timeout FIXME
+ lap_keepalive_time FIXME
+ max_noreply_time FIXME
+ max_tx_data_size FIXME
+ max_tx_window FIXME
+ min_tx_turn_time FIXME
diff --git a/Documentation/networking/ip_dynaddr.txt b/Documentation/networking/ip_dynaddr.txt
new file mode 100644
index 000000000..45f3c1268
--- /dev/null
+++ b/Documentation/networking/ip_dynaddr.txt
@@ -0,0 +1,29 @@
+IP dynamic address hack-port v0.03
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+This stuff allows diald ONESHOT connections to get established by
+dynamically changing packet source address (and socket's if local procs).
+It is implemented for TCP diald-box connections(1) and IP_MASQuerading(2).
+
+If enabled[*] and forwarding interface has changed:
+ 1) Socket (and packet) source address is rewritten ON RETRANSMISSIONS
+ while in SYN_SENT state (diald-box processes).
+ 2) Out-bounded MASQueraded source address changes ON OUTPUT (when
+ internal host does retransmission) until a packet from outside is
+ received by the tunnel.
+
+This is specially helpful for auto dialup links (diald), where the
+``actual'' outgoing address is unknown at the moment the link is
+going up. So, the *same* (local AND masqueraded) connections requests that
+bring the link up will be able to get established.
+
+[*] At boot, by default no address rewriting is attempted.
+ To enable:
+ # echo 1 > /proc/sys/net/ipv4/ip_dynaddr
+ To enable verbose mode:
+ # echo 2 > /proc/sys/net/ipv4/ip_dynaddr
+ To disable (default)
+ # echo 0 > /proc/sys/net/ipv4/ip_dynaddr
+
+Enjoy!
+
+-- Juanjo <jjciarla@raiz.uncu.edu.ar>
diff --git a/Documentation/networking/ipddp.txt b/Documentation/networking/ipddp.txt
new file mode 100644
index 000000000..ba5c217ff
--- /dev/null
+++ b/Documentation/networking/ipddp.txt
@@ -0,0 +1,73 @@
+Text file for ipddp.c:
+ AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
+
+This text file is written by Jay Schulist <jschlst@samba.org>
+
+Introduction
+------------
+
+AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk
+networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams
+inside AppleTalk packets.
+
+Through this driver you can either allow your Linux box to communicate
+IP over an AppleTalk network or you can provide IP gatewaying functions
+for your AppleTalk users.
+
+You can currently encapsulate or decapsulate AppleTalk-IP on LocalTalk,
+EtherTalk and PPPTalk. The only limit on the protocol is that of what
+kernel AppleTalk layer and drivers are available.
+
+Each mode requires its own user space software.
+
+Compiling AppleTalk-IP Decapsulation/Encapsulation
+=================================================
+
+AppleTalk-IP decapsulation needs to be compiled into your kernel. You
+will need to turn on AppleTalk-IP driver support. Then you will need to
+select ONE of the two options; IP to AppleTalk-IP encapsulation support or
+AppleTalk-IP to IP decapsulation support. If you compile the driver
+statically you will only be able to use the driver for the function you have
+enabled in the kernel. If you compile the driver as a module you can
+select what mode you want it to run in via a module loading param.
+ipddp_mode=1 for AppleTalk-IP encapsulation and ipddp_mode=2 for
+AppleTalk-IP to IP decapsulation.
+
+Basic instructions for user space tools
+=======================================
+
+I will briefly describe the operation of the tools, but you will
+need to consult the supporting documentation for each set of tools.
+
+Decapsulation - You will need to download a software package called
+MacGate. In this distribution there will be a tool called MacRoute
+which enables you to add routes to the kernel for your Macs by hand.
+Also the tool MacRegGateWay is included to register the
+proper IP Gateway and IP addresses for your machine. Included in this
+distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from
+ftp.u.washington.edu/pub/user-supported/asun/) this patch is optional
+but it allows automatic adding and deleting of routes for Macs. (Handy
+for locations with large Mac installations)
+
+Encapsulation - You will need to download a software daemon called ipddpd.
+This software expects there to be an AppleTalk-IP gateway on the network.
+You will also need to add the proper routes to route your Linux box's IP
+traffic out the ipddp interface.
+
+Common Uses of ipddp.c
+----------------------
+Of course AppleTalk-IP decapsulation and encapsulation, but specifically
+decapsulation is being used most for connecting LocalTalk networks to
+IP networks. Although it has been used on EtherTalk networks to allow
+Macs that are only able to tunnel IP over EtherTalk.
+
+Encapsulation has been used to allow a Linux box stuck on a LocalTalk
+network to use IP. It should work equally well if you are stuck on an
+EtherTalk only network.
+
+Further Assistance
+-------------------
+You can contact me (Jay Schulist <jschlst@samba.org>) with any
+questions regarding decapsulation or encapsulation. Bradford W. Johnson
+<johns393@maroon.tc.umn.edu> originally wrote the ipddp.c driver for IP
+encapsulation in AppleTalk.
diff --git a/Documentation/networking/iphase.txt b/Documentation/networking/iphase.txt
new file mode 100644
index 000000000..670b72f16
--- /dev/null
+++ b/Documentation/networking/iphase.txt
@@ -0,0 +1,158 @@
+
+ READ ME FISRT
+ ATM (i)Chip IA Linux Driver Source
+--------------------------------------------------------------------------------
+ Read This Before You Begin!
+--------------------------------------------------------------------------------
+
+Description
+-----------
+
+This is the README file for the Interphase PCI ATM (i)Chip IA Linux driver
+source release.
+
+The features and limitations of this driver are as follows:
+ - A single VPI (VPI value of 0) is supported.
+ - Supports 4K VCs for the server board (with 512K control memory) and 1K
+ VCs for the client board (with 128K control memory).
+ - UBR, ABR and CBR service categories are supported.
+ - Only AAL5 is supported.
+ - Supports setting of PCR on the VCs.
+ - Multiple adapters in a system are supported.
+ - All variants of Interphase ATM PCI (i)Chip adapter cards are supported,
+ including x575 (OC3, control memory 128K , 512K and packet memory 128K,
+ 512K and 1M), x525 (UTP25) and x531 (DS3 and E3). See
+ http://www.iphase.com/
+ for details.
+ - Only x86 platforms are supported.
+ - SMP is supported.
+
+
+Before You Start
+----------------
+
+
+Installation
+------------
+
+1. Installing the adapters in the system
+ To install the ATM adapters in the system, follow the steps below.
+ a. Login as root.
+ b. Shut down the system and power off the system.
+ c. Install one or more ATM adapters in the system.
+ d. Connect each adapter to a port on an ATM switch. The green 'Link'
+ LED on the front panel of the adapter will be on if the adapter is
+ connected to the switch properly when the system is powered up.
+ e. Power on and boot the system.
+
+2. [ Removed ]
+
+3. Rebuild kernel with ABR support
+ [ a. and b. removed ]
+ c. Reconfigure the kernel, choose the Interphase ia driver through "make
+ menuconfig" or "make xconfig".
+ d. Rebuild the kernel, loadable modules and the atm tools.
+ e. Install the new built kernel and modules and reboot.
+
+4. Load the adapter hardware driver (ia driver) if it is built as a module
+ a. Login as root.
+ b. Change directory to /lib/modules/<kernel-version>/atm.
+ c. Run "insmod suni.o;insmod iphase.o"
+ The yellow 'status' LED on the front panel of the adapter will blink
+ while the driver is loaded in the system.
+ d. To verify that the 'ia' driver is loaded successfully, run the
+ following command:
+
+ cat /proc/atm/devices
+
+ If the driver is loaded successfully, the output of the command will
+ be similar to the following lines:
+
+ Itf Type ESI/"MAC"addr AAL(TX,err,RX,err,drop) ...
+ 0 ia xxxxxxxxx 0 ( 0 0 0 0 0 ) 5 ( 0 0 0 0 0 )
+
+ You can also check the system log file /var/log/messages for messages
+ related to the ATM driver.
+
+5. Ia Driver Configuration
+
+5.1 Configuration of adapter buffers
+ The (i)Chip boards have 3 different packet RAM size variants: 128K, 512K and
+ 1M. The RAM size decides the number of buffers and buffer size. The default
+ size and number of buffers are set as following:
+
+ Total Rx RAM Tx RAM Rx Buf Tx Buf Rx buf Tx buf
+ RAM size size size size size cnt cnt
+ -------- ------ ------ ------ ------ ------ ------
+ 128K 64K 64K 10K 10K 6 6
+ 512K 256K 256K 10K 10K 25 25
+ 1M 512K 512K 10K 10K 51 51
+
+ These setting should work well in most environments, but can be
+ changed by typing the following command:
+
+ insmod <IA_DIR>/ia.o IA_RX_BUF=<RX_CNT> IA_RX_BUF_SZ=<RX_SIZE> \
+ IA_TX_BUF=<TX_CNT> IA_TX_BUF_SZ=<TX_SIZE>
+ Where:
+ RX_CNT = number of receive buffers in the range (1-128)
+ RX_SIZE = size of receive buffers in the range (48-64K)
+ TX_CNT = number of transmit buffers in the range (1-128)
+ TX_SIZE = size of transmit buffers in the range (48-64K)
+
+ 1. Transmit and receive buffer size must be a multiple of 4.
+ 2. Care should be taken so that the memory required for the
+ transmit and receive buffers is less than or equal to the
+ total adapter packet memory.
+
+5.2 Turn on ia debug trace
+
+ When the ia driver is built with the CONFIG_ATM_IA_DEBUG flag, the driver
+ can provide more debug trace if needed. There is a bit mask variable,
+ IADebugFlag, which controls the output of the traces. You can find the bit
+ map of the IADebugFlag in iphase.h.
+ The debug trace can be turn on through the insmod command line option, for
+ example, "insmod iphase.o IADebugFlag=0xffffffff" can turn on all the debug
+ traces together with loading the driver.
+
+6. Ia Driver Test Using ttcp_atm and PVC
+
+ For the PVC setup, the test machines can either be connected back-to-back or
+ through a switch. If connected through the switch, the switch must be
+ configured for the PVC(s).
+
+ a. For UBR test:
+ At the test machine intended to receive data, type:
+ ttcp_atm -r -a -s 0.100
+ At the other test machine, type:
+ ttcp_atm -t -a -s 0.100 -n 10000
+ Run "ttcp_atm -h" to display more options of the ttcp_atm tool.
+ b. For ABR test:
+ It is the same as the UBR testing, but with an extra command option:
+ -Pabr:max_pcr=<xxx>
+ where:
+ xxx = the maximum peak cell rate, from 170 - 353207.
+ This option must be set on both the machines.
+ c. For CBR test:
+ It is the same as the UBR testing, but with an extra command option:
+ -Pcbr:max_pcr=<xxx>
+ where:
+ xxx = the maximum peak cell rate, from 170 - 353207.
+ This option may only be set on the transmit machine.
+
+
+OUTSTANDING ISSUES
+------------------
+
+
+
+Contact Information
+-------------------
+
+ Customer Support:
+ United States: Telephone: (214) 654-5555
+ Fax: (214) 654-5500
+ E-Mail: intouch@iphase.com
+ Europe: Telephone: 33 (0)1 41 15 44 00
+ Fax: 33 (0)1 41 15 12 13
+ World Wide Web: http://www.iphase.com
+ Anonymous FTP: ftp.iphase.com
diff --git a/Documentation/networking/ipsec.txt b/Documentation/networking/ipsec.txt
new file mode 100644
index 000000000..8dbc08b7e
--- /dev/null
+++ b/Documentation/networking/ipsec.txt
@@ -0,0 +1,38 @@
+
+Here documents known IPsec corner cases which need to be keep in mind when
+deploy various IPsec configuration in real world production environment.
+
+1. IPcomp: Small IP packet won't get compressed at sender, and failed on
+ policy check on receiver.
+
+Quote from RFC3173:
+2.2. Non-Expansion Policy
+
+ If the total size of a compressed payload and the IPComp header, as
+ defined in section 3, is not smaller than the size of the original
+ payload, the IP datagram MUST be sent in the original non-compressed
+ form. To clarify: If an IP datagram is sent non-compressed, no
+
+ IPComp header is added to the datagram. This policy ensures saving
+ the decompression processing cycles and avoiding incurring IP
+ datagram fragmentation when the expanded datagram is larger than the
+ MTU.
+
+ Small IP datagrams are likely to expand as a result of compression.
+ Therefore, a numeric threshold should be applied before compression,
+ where IP datagrams of size smaller than the threshold are sent in the
+ original form without attempting compression. The numeric threshold
+ is implementation dependent.
+
+Current IPComp implementation is indeed by the book, while as in practice
+when sending non-compressed packet to the peer(whether or not packet len
+is smaller than the threshold or the compressed len is large than original
+packet len), the packet is dropped when checking the policy as this packet
+matches the selector but not coming from any XFRM layer, i.e., with no
+security path. Such naked packet will not eventually make it to upper layer.
+The result is much more wired to the user when ping peer with different
+payload length.
+
+One workaround is try to set "level use" for each policy if user observed
+above scenario. The consequence of doing so is small packet(uncompressed)
+will skip policy checking on receiver side.
diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt
new file mode 100644
index 000000000..6cd74fa55
--- /dev/null
+++ b/Documentation/networking/ipv6.txt
@@ -0,0 +1,72 @@
+
+Options for the ipv6 module are supplied as parameters at load time.
+
+Module options may be given as command line arguments to the insmod
+or modprobe command, but are usually specified in either
+/etc/modules.d/*.conf configuration files, or in a distro-specific
+configuration file.
+
+The available ipv6 module parameters are listed below. If a parameter
+is not specified the default value is used.
+
+The parameters are as follows:
+
+disable
+
+ Specifies whether to load the IPv6 module, but disable all
+ its functionality. This might be used when another module
+ has a dependency on the IPv6 module being loaded, but no
+ IPv6 addresses or operations are desired.
+
+ The possible values and their effects are:
+
+ 0
+ IPv6 is enabled.
+
+ This is the default value.
+
+ 1
+ IPv6 is disabled.
+
+ No IPv6 addresses will be added to interfaces, and
+ it will not be possible to open an IPv6 socket.
+
+ A reboot is required to enable IPv6.
+
+autoconf
+
+ Specifies whether to enable IPv6 address autoconfiguration
+ on all interfaces. This might be used when one does not wish
+ for addresses to be automatically generated from prefixes
+ received in Router Advertisements.
+
+ The possible values and their effects are:
+
+ 0
+ IPv6 address autoconfiguration is disabled on all interfaces.
+
+ Only the IPv6 loopback address (::1) and link-local addresses
+ will be added to interfaces.
+
+ 1
+ IPv6 address autoconfiguration is enabled on all interfaces.
+
+ This is the default value.
+
+disable_ipv6
+
+ Specifies whether to disable IPv6 on all interfaces.
+ This might be used when no IPv6 addresses are desired.
+
+ The possible values and their effects are:
+
+ 0
+ IPv6 is enabled on all interfaces.
+
+ This is the default value.
+
+ 1
+ IPv6 is disabled on all interfaces.
+
+ No IPv6 addresses will be added to interfaces.
+
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
new file mode 100644
index 000000000..cf996394e
--- /dev/null
+++ b/Documentation/networking/ipvlan.txt
@@ -0,0 +1,107 @@
+
+ IPVLAN Driver HOWTO
+
+Initial Release:
+ Mahesh Bandewar <maheshb AT google.com>
+
+1. Introduction:
+ This is conceptually very similar to the macvlan driver with one major
+exception of using L3 for mux-ing /demux-ing among slaves. This property makes
+the master device share the L2 with it's slave devices. I have developed this
+driver in conjuntion with network namespaces and not sure if there is use case
+outside of it.
+
+
+2. Building and Installation:
+ In order to build the driver, please select the config item CONFIG_IPVLAN.
+The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
+(CONFIG_IPVLAN=m).
+
+
+3. Configuration:
+ There are no module parameters for this driver and it can be configured
+using IProute2/ip utility.
+
+ ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 }
+
+ e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
+
+
+4. Operating modes:
+ IPvlan has two modes of operation - L2 and L3. For a given master device,
+you can select one of these two modes and all slaves on that master will
+operate in the same (selected) mode. The RX mode is almost identical except
+that in L3 mode the slaves wont receive any multicast / broadcast traffic.
+L3 mode is more restrictive since routing is controlled from the other (mostly)
+default namespace.
+
+4.1 L2 mode:
+ In this mode TX processing happens on the stack instance attached to the
+slave device and packets are switched and queued to the master device to send
+out. In this mode the slaves will RX/TX multicast and broadcast (if applicable)
+as well.
+
+4.2 L3 mode:
+ In this mode TX processing upto L3 happens on the stack instance attached
+to the slave device and packets are switched to the stack instance of the
+master device for the L2 processing and routing from that instance will be
+used before packets are queued on the outbound device. In this mode the slaves
+will not receive nor can send multicast / broadcast traffic.
+
+
+5. What to choose (macvlan vs. ipvlan)?
+ These two devices are very similar in many regards and the specific use
+case could very well define which device to choose. if one of the following
+situations defines your use case then you can choose to use ipvlan -
+ (a) The Linux host that is connected to the external switch / router has
+policy configured that allows only one mac per port.
+ (b) No of virtual devices created on a master exceed the mac capacity and
+puts the NIC in promiscous mode and degraded performance is a concern.
+ (c) If the slave device is to be put into the hostile / untrusted network
+namespace where L2 on the slave could be changed / misused.
+
+
+6. Example configuration:
+
+ +=============================================================+
+ | Host: host1 |
+ | |
+ | +----------------------+ +----------------------+ |
+ | | NS:ns0 | | NS:ns1 | |
+ | | | | | |
+ | | | | | |
+ | | ipvl0 | | ipvl1 | |
+ | +----------#-----------+ +-----------#----------+ |
+ | # # |
+ | ################################ |
+ | # eth0 |
+ +==============================#==============================+
+
+
+ (a) Create two network namespaces - ns0, ns1
+ ip netns add ns0
+ ip netns add ns1
+
+ (b) Create two ipvlan slaves on eth0 (master device)
+ ip link add link eth0 ipvl0 type ipvlan mode l2
+ ip link add link eth0 ipvl1 type ipvlan mode l2
+
+ (c) Assign slaves to the respective network namespaces
+ ip link set dev ipvl0 netns ns0
+ ip link set dev ipvl1 netns ns1
+
+ (d) Now switch to the namespace (ns0 or ns1) to configure the slave devices
+ - For ns0
+ (1) ip netns exec ns0 bash
+ (2) ip link set dev ipvl0 up
+ (3) ip link set dev lo up
+ (4) ip -4 addr add 127.0.0.1 dev lo
+ (5) ip -4 addr add $IPADDR dev ipvl0
+ (6) ip -4 route add default via $ROUTER dev ipvl0
+ - For ns1
+ (1) ip netns exec ns1 bash
+ (2) ip link set dev ipvl1 up
+ (3) ip link set dev lo up
+ (4) ip -4 addr add 127.0.0.1 dev lo
+ (5) ip -4 addr add $IPADDR dev ipvl1
+ (6) ip -4 route add default via $ROUTER dev ipvl1
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
new file mode 100644
index 000000000..3ba709531
--- /dev/null
+++ b/Documentation/networking/ipvs-sysctl.txt
@@ -0,0 +1,232 @@
+/proc/sys/net/ipv4/vs/* Variables:
+
+am_droprate - INTEGER
+ default 10
+
+ It sets the always mode drop rate, which is used in the mode 3
+ of the drop_rate defense.
+
+amemthresh - INTEGER
+ default 1024
+
+ It sets the available memory threshold (in pages), which is
+ used in the automatic modes of defense. When there is no
+ enough available memory, the respective strategy will be
+ enabled and the variable is automatically set to 2, otherwise
+ the strategy is disabled and the variable is set to 1.
+
+backup_only - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ If set, disable the director function while the server is
+ in backup mode to avoid packet loops for DR/TUN methods.
+
+conn_reuse_mode - INTEGER
+ 1 - default
+
+ Controls how ipvs will deal with connections that are detected
+ port reuse. It is a bitmap, with the values being:
+
+ 0: disable any special handling on port reuse. The new
+ connection will be delivered to the same real server that was
+ servicing the previous connection. This will effectively
+ disable expire_nodest_conn.
+
+ bit 1: enable rescheduling of new connections when it is safe.
+ That is, whenever expire_nodest_conn and for TCP sockets, when
+ the connection is in TIME_WAIT state (which is only possible if
+ you use NAT mode).
+
+ bit 2: it is bit 1 plus, for TCP connections, when connections
+ are in FIN_WAIT state, as this is the last state seen by load
+ balancer in Direct Routing mode. This bit helps on adding new
+ real servers to a very busy cluster.
+
+conntrack - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ If set, maintain connection tracking entries for
+ connections handled by IPVS.
+
+ This should be enabled if connections handled by IPVS are to be
+ also handled by stateful firewall rules. That is, iptables rules
+ that make use of connection tracking. It is a performance
+ optimisation to disable this setting otherwise.
+
+ Connections handled by the IPVS FTP application module
+ will have connection tracking entries regardless of this setting.
+
+ Only available when IPVS is compiled with CONFIG_IP_VS_NFCT enabled.
+
+cache_bypass - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ If it is enabled, forward packets to the original destination
+ directly when no cache server is available and destination
+ address is not local (iph->daddr is RTN_UNICAST). It is mostly
+ used in transparent web cache cluster.
+
+debug_level - INTEGER
+ 0 - transmission error messages (default)
+ 1 - non-fatal error messages
+ 2 - configuration
+ 3 - destination trash
+ 4 - drop entry
+ 5 - service lookup
+ 6 - scheduling
+ 7 - connection new/expire, lookup and synchronization
+ 8 - state transition
+ 9 - binding destination, template checks and applications
+ 10 - IPVS packet transmission
+ 11 - IPVS packet handling (ip_vs_in/ip_vs_out)
+ 12 or more - packet traversal
+
+ Only available when IPVS is compiled with CONFIG_IP_VS_DEBUG enabled.
+
+ Higher debugging levels include the messages for lower debugging
+ levels, so setting debug level 2, includes level 0, 1 and 2
+ messages. Thus, logging becomes more and more verbose the higher
+ the level.
+
+drop_entry - INTEGER
+ 0 - disabled (default)
+
+ The drop_entry defense is to randomly drop entries in the
+ connection hash table, just in order to collect back some
+ memory for new connections. In the current code, the
+ drop_entry procedure can be activated every second, then it
+ randomly scans 1/32 of the whole and drops entries that are in
+ the SYN-RECV/SYNACK state, which should be effective against
+ syn-flooding attack.
+
+ The valid values of drop_entry are from 0 to 3, where 0 means
+ that this strategy is always disabled, 1 and 2 mean automatic
+ modes (when there is no enough available memory, the strategy
+ is enabled and the variable is automatically set to 2,
+ otherwise the strategy is disabled and the variable is set to
+ 1), and 3 means that that the strategy is always enabled.
+
+drop_packet - INTEGER
+ 0 - disabled (default)
+
+ The drop_packet defense is designed to drop 1/rate packets
+ before forwarding them to real servers. If the rate is 1, then
+ drop all the incoming packets.
+
+ The value definition is the same as that of the drop_entry. In
+ the automatic mode, the rate is determined by the follow
+ formula: rate = amemthresh / (amemthresh - available_memory)
+ when available memory is less than the available memory
+ threshold. When the mode 3 is set, the always mode drop rate
+ is controlled by the /proc/sys/net/ipv4/vs/am_droprate.
+
+expire_nodest_conn - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ The default value is 0, the load balancer will silently drop
+ packets when its destination server is not available. It may
+ be useful, when user-space monitoring program deletes the
+ destination server (because of server overload or wrong
+ detection) and add back the server later, and the connections
+ to the server can continue.
+
+ If this feature is enabled, the load balancer will expire the
+ connection immediately when a packet arrives and its
+ destination server is not available, then the client program
+ will be notified that the connection is closed. This is
+ equivalent to the feature some people requires to flush
+ connections when its destination is not available.
+
+expire_quiescent_template - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ When set to a non-zero value, the load balancer will expire
+ persistent templates when the destination server is quiescent.
+ This may be useful, when a user makes a destination server
+ quiescent by setting its weight to 0 and it is desired that
+ subsequent otherwise persistent connections are sent to a
+ different destination server. By default new persistent
+ connections are allowed to quiescent destination servers.
+
+ If this feature is enabled, the load balancer will expire the
+ persistence template if it is to be used to schedule a new
+ connection and the destination server is quiescent.
+
+nat_icmp_send - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ It controls sending icmp error messages (ICMP_DEST_UNREACH)
+ for VS/NAT when the load balancer receives packets from real
+ servers but the connection entries don't exist.
+
+secure_tcp - INTEGER
+ 0 - disabled (default)
+
+ The secure_tcp defense is to use a more complicated TCP state
+ transition table. For VS/NAT, it also delays entering the
+ TCP ESTABLISHED state until the three way handshake is completed.
+
+ The value definition is the same as that of drop_entry and
+ drop_packet.
+
+sync_threshold - INTEGER
+ default 3
+
+ It sets synchronization threshold, which is the minimum number
+ of incoming packets that a connection needs to receive before
+ the connection will be synchronized. A connection will be
+ synchronized, every time the number of its incoming packets
+ modulus 50 equals the threshold. The range of the threshold is
+ from 0 to 49.
+
+snat_reroute - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ If enabled, recalculate the route of SNATed packets from
+ realservers so that they are routed as if they originate from the
+ director. Otherwise they are routed as if they are forwarded by the
+ director.
+
+ If policy routing is in effect then it is possible that the route
+ of a packet originating from a director is routed differently to a
+ packet being forwarded by the director.
+
+ If policy routing is not in effect then the recalculated route will
+ always be the same as the original route so it is an optimisation
+ to disable snat_reroute and avoid the recalculation.
+
+sync_persist_mode - INTEGER
+ default 0
+
+ Controls the synchronisation of connections when using persistence
+
+ 0: All types of connections are synchronised
+ 1: Attempt to reduce the synchronisation traffic depending on
+ the connection type. For persistent services avoid synchronisation
+ for normal connections, do it only for persistence templates.
+ In such case, for TCP and SCTP it may need enabling sloppy_tcp and
+ sloppy_sctp flags on backup servers. For non-persistent services
+ such optimization is not applied, mode 0 is assumed.
+
+sync_version - INTEGER
+ default 1
+
+ The version of the synchronisation protocol used when sending
+ synchronisation messages.
+
+ 0 selects the original synchronisation protocol (version 0). This
+ should be used when sending synchronisation messages to a legacy
+ system that only understands the original synchronisation protocol.
+
+ 1 selects the current synchronisation protocol (version 1). This
+ should be used where possible.
+
+ Kernels with this sync_version entry are able to receive messages
+ of both version 1 and version 2 of the synchronisation protocol.
diff --git a/Documentation/networking/irda.txt b/Documentation/networking/irda.txt
new file mode 100644
index 000000000..bff26c138
--- /dev/null
+++ b/Documentation/networking/irda.txt
@@ -0,0 +1,10 @@
+To use the IrDA protocols within Linux you will need to get a suitable copy
+of the IrDA Utilities. More detailed information about these and associated
+programs can be found on http://irda.sourceforge.net/
+
+For more information about how to use the IrDA protocol stack, see the
+Linux Infrared HOWTO by Werner Heuser <wehe@tuxmobil.org>:
+<http://www.tuxmobil.org/Infrared-HOWTO/Infrared-HOWTO.html>
+
+There is an active mailing list for discussing Linux-IrDA matters called
+ irda-users@lists.sourceforge.net
diff --git a/Documentation/networking/ixgb.txt b/Documentation/networking/ixgb.txt
new file mode 100644
index 000000000..9b4a10a1c
--- /dev/null
+++ b/Documentation/networking/ixgb.txt
@@ -0,0 +1,433 @@
+Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
+=====================================================================
+
+March 14, 2011
+
+
+Contents
+========
+
+- In This Release
+- Identifying Your Adapter
+- Building and Installation
+- Command Line Parameters
+- Improving Performance
+- Additional Configurations
+- Known Issues/Troubleshooting
+- Support
+
+
+
+In This Release
+===============
+
+This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R)
+Network Connection. This driver includes support for Itanium(R)2-based
+systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your 10 Gigabit adapter. All hardware requirements listed apply
+to use with Linux.
+
+The following features are available in this kernel:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+The driver information previously displayed in the /proc filesystem is not
+supported in this release. Alternatively, you can use ethtool (version 1.6
+or later), lspci, and iproute2 to obtain the same information.
+
+Instructions on updating ethtool can be found in the section "Additional
+Configurations" later in this document.
+
+
+Identifying Your Adapter
+========================
+
+The following Intel network adapters are compatible with the drivers in this
+release:
+
+Controller Adapter Name Physical Layer
+---------- ------------ --------------
+82597EX Intel(R) PRO/10GbE LR/SR/CX4 10G Base-LR (1310 nm optical fiber)
+ Server Adapters 10G Base-SR (850 nm optical fiber)
+ 10G Base-CX4(twin-axial copper cabling)
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/network/sb/CS-012904.htm
+
+
+Building and Installation
+=========================
+
+select m for "Intel(R) PRO/10GbE support" located at:
+ Location:
+ -> Device Drivers
+ -> Network device support (NETDEVICES [=y])
+ -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
+1. make modules && make modules_install
+
+2. Load the module:
+
+    modprobe ixgb <parameter>=<value>
+
+ The insmod command can be used if the full
+ path to the driver module is specified. For example:
+
+ insmod /lib/modules/<KERNEL VERSION>/kernel/drivers/net/ixgb/ixgb.ko
+
+ With 2.6 based kernels also make sure that older ixgb drivers are
+ removed from the kernel, before loading the new module:
+
+ rmmod ixgb; modprobe ixgb
+
+3. Assign an IP address to the interface by entering the following, where
+ x is the interface number:
+
+ ip addr add ethx <IP_address>
+
+4. Verify that the interface works. Enter the following, where <IP_address>
+ is the IP address for another machine on the same subnet as the interface
+ that is being tested:
+
+ ping <IP_address>
+
+
+Command Line Parameters
+=======================
+
+If the driver is built as a module, the following optional parameters are
+used by entering them on the command line with the modprobe command using
+this syntax:
+
+ modprobe ixgb [<option>=<VAL1>,<VAL2>,...]
+
+For example, with two 10GbE PCI adapters, entering:
+
+ modprobe ixgb TxDescriptors=80,128
+
+loads the ixgb driver with 80 TX resources for the first adapter and 128 TX
+resources for the second adapter.
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+FlowControl
+Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+Default: Read from the EEPROM
+ If EEPROM is not detected, default is 1
+ This parameter controls the automatic generation(Tx) and response(Rx) to
+ Ethernet PAUSE frames. There are hardware bugs associated with enabling
+ Tx flow control so beware.
+
+RxDescriptors
+Valid Range: 64-512
+Default Value: 512
+ This value is the number of receive descriptors allocated by the driver.
+ Increasing this value allows the driver to buffer more incoming packets.
+ Each descriptor is 16 bytes. A receive buffer is also allocated for
+ each descriptor and can be either 2048, 4056, 8192, or 16384 bytes,
+ depending on the MTU setting. When the MTU size is 1500 or less, the
+ receive buffer size is 2048 bytes. When the MTU is greater than 1500 the
+ receive buffer size will be either 4056, 8192, or 16384 bytes. The
+ maximum MTU size is 16114.
+
+RxIntDelay
+Valid Range: 0-65535 (0=off)
+Default Value: 72
+ This value delays the generation of receive interrupts in units of
+ 0.8192 microseconds. Receive interrupt reduction can improve CPU
+ efficiency if properly tuned for specific network traffic. Increasing
+ this value adds extra latency to frame reception and can end up
+ decreasing the throughput of TCP traffic. If the system is reporting
+ dropped receives, this value may be set too high, causing the driver to
+ run out of available receive descriptors.
+
+TxDescriptors
+Valid Range: 64-4096
+Default Value: 256
+ This value is the number of transmit descriptors allocated by the driver.
+ Increasing this value allows the driver to queue more transmits. Each
+ descriptor is 16 bytes.
+
+XsumRX
+Valid Range: 0-1
+Default Value: 1
+ A value of '1' indicates that the driver should enable IP checksum
+ offload for received packets (both UDP and TCP) to the adapter hardware.
+
+
+Improving Performance
+=====================
+
+With the 10 Gigabit server adapters, the default Linux configuration will
+very likely limit the total available throughput artificially. There is a set
+of configuration changes that, when applied together, will increase the ability
+of Linux to transmit and receive data. The following enhancements were
+originally acquired from settings published at http://www.spec.org/web99/ for
+various submitted results using Linux.
+
+NOTE: These changes are only suggestions, and serve as a starting point for
+ tuning your network performance.
+
+The changes are made in three major ways, listed in order of greatest effect:
+- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
+ parameter.
+- Use sysctl to modify /proc parameters (essentially kernel tuning)
+- Use setpci to modify the MMRBC field in PCI-X configuration space to increase
+ transmit burst lengths on the bus.
+
+NOTE: setpci modifies the adapter's configuration registers to allow it to read
+up to 4k bytes at a time (for transmits). However, for some systems the
+behavior after modifying this register may be undefined (possibly errors of
+some kind). A power-cycle, hard reset or explicitly setting the e6 register
+back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a
+stable configuration.
+
+- COPY these lines and paste them into ixgb_perf.sh:
+#!/bin/bash
+echo "configuring network performance , edit this file to change the interface
+or device ID of 10GbE card"
+# set mmrbc to 4k reads, modify only Intel 10GbE device IDs
+# replace 1a48 with appropriate 10GbE device's ID installed on the system,
+# if needed.
+setpci -d 8086:1a48 e6.b=2e
+# set the MTU (max transmission unit) - it requires your switch and clients
+# to change as well.
+# set the txqueuelen
+# your ixgb adapter should be loaded as eth1 for this to work, change if needed
+ip li set dev eth1 mtu 9000 txqueuelen 1000 up
+# call the sysctl utility to modify /proc/sys entries
+sysctl -p ./sysctl_ixgb.conf
+- END ixgb_perf.sh
+
+- COPY these lines and paste them into sysctl_ixgb.conf:
+# some of the defaults may be different for your kernel
+# call this file with sysctl -p <this file>
+# these are just suggested values that worked well to increase throughput in
+# several network benchmark tests, your mileage may vary
+
+### IPV4 specific settings
+# turn TCP timestamp support off, default 1, reduces CPU use
+net.ipv4.tcp_timestamps = 0
+# turn SACK support off, default on
+# on systems with a VERY fast bus -> memory interface this is the big gainer
+net.ipv4.tcp_sack = 0
+# set min/default/max TCP read buffer, default 4096 87380 174760
+net.ipv4.tcp_rmem = 10000000 10000000 10000000
+# set min/pressure/max TCP write buffer, default 4096 16384 131072
+net.ipv4.tcp_wmem = 10000000 10000000 10000000
+# set min/pressure/max TCP buffer space, default 31744 32256 32768
+net.ipv4.tcp_mem = 10000000 10000000 10000000
+
+### CORE settings (mostly for socket and UDP effect)
+# set maximum receive socket buffer size, default 131071
+net.core.rmem_max = 524287
+# set maximum send socket buffer size, default 131071
+net.core.wmem_max = 524287
+# set default receive socket buffer size, default 65535
+net.core.rmem_default = 524287
+# set default send socket buffer size, default 65535
+net.core.wmem_default = 524287
+# set maximum amount of option memory buffers, default 10240
+net.core.optmem_max = 524287
+# set number of unprocessed input packets before kernel starts dropping them; default 300
+net.core.netdev_max_backlog = 300000
+- END sysctl_ixgb.conf
+
+Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface
+your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's
+ID installed on the system.
+
+NOTE: Unless these scripts are added to the boot process, these changes will
+ only last only until the next system reboot.
+
+
+Resolving Slow UDP Traffic
+--------------------------
+If your server does not seem to be able to receive UDP traffic as fast as it
+can receive TCP traffic, it could be because Linux, by default, does not set
+the network stack buffers as large as they need to be to support high UDP
+transfer rates. One way to alleviate this problem is to allow more memory to
+be used by the IP stack to store incoming data.
+
+For instance, use the commands:
+ sysctl -w net.core.rmem_max=262143
+and
+ sysctl -w net.core.rmem_default=262143
+to increase the read buffer memory max and default to 262143 (256k - 1) from
+defaults of max=131071 (128k - 1) and default=65535 (64k - 1). These variables
+will increase the amount of memory used by the network stack for receives, and
+can be increased significantly more if necessary for your application.
+
+
+Additional Configurations
+=========================
+
+ Configuring the Driver on Different Distributions
+ -------------------------------------------------
+ Configuring a network driver to load properly when the system is started is
+ distribution dependent. Typically, the configuration process involves adding
+ an alias line to /etc/modprobe.conf as well as editing other system startup
+ scripts and/or configuration files. Many popular Linux distributions ship
+ with tools to make these changes for you. To learn the proper way to
+ configure a network device for your system, refer to your distribution
+ documentation. If during this process you are asked for the driver or module
+ name, the name for the Linux Base Driver for the Intel 10GbE Family of
+ Adapters is ixgb.
+
+ Viewing Link Messages
+ ---------------------
+ Link messages will not be displayed to the console if the distribution is
+ restricting system messages. In order to see network driver link messages on
+ your console, set dmesg to eight by entering the following:
+
+ dmesg -n 8
+
+ NOTE: This setting is not saved across reboots.
+
+
+ Jumbo Frames
+ ------------
+ The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+ enabled by changing the MTU to a value larger than the default of 1500.
+ The maximum value for the MTU is 16114. Use the ip command to
+ increase the MTU size. For example:
+
+ ip li set dev ethx mtu 9000
+
+ The maximum MTU setting for Jumbo Frames is 16114. This value coincides
+ with the maximum Jumbo Frames size of 16128.
+
+
+ ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The ethtool
+ version 1.6 or later is required for this functionality.
+
+ The latest release of ethtool can be found from
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+ NOTE: The ethtool version 1.6 only supports a limited set of ethtool options.
+ Support for a more complete ethtool feature set can be enabled by
+ upgrading to the latest version.
+
+
+ NAPI
+ ----
+
+ NAPI (Rx polling mode) is supported in the ixgb driver. NAPI is enabled
+ or disabled based on the configuration of the kernel. see CONFIG_IXGB_NAPI
+
+ See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
+
+
+Known Issues/Troubleshooting
+============================
+
+ NOTE: After installing the driver, if your Intel Network Connection is not
+ working, verify in the "In This Release" section of the readme that you have
+ installed the correct driver.
+
+ Intel(R) PRO/10GbE CX4 Server Adapter Cable Interoperability Issue with
+ Fujitsu XENPAK Module in SmartBits Chassis
+ ---------------------------------------------------------------------
+ Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4
+ Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits
+ chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni.
+ The CRC errors may be received either by the Intel(R) PRO/10GbE CX4
+ Server adapter or the SmartBits. If this situation occurs using a different
+ cable assembly may resolve the issue.
+
+ CX4 Server Adapter Cable Interoperability Issues with HP Procurve 3400cl
+ Switch Port
+ ------------------------------------------------------------------------
+ Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server
+ adapter is connected to an HP Procurve 3400cl switch port using short cables
+ (1 m or shorter). If this situation occurs, using a longer cable may resolve
+ the issue.
+
+ Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that
+ Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC
+ errors may be received either by the CX4 Server adapter or at the switch. If
+ this situation occurs, using a different cable assembly may resolve the issue.
+
+
+ Jumbo Frames System Requirement
+ -------------------------------
+ Memory allocation failures have been observed on Linux systems with 64 MB
+ of RAM or less that are running Jumbo Frames. If you are using Jumbo
+ Frames, your system may require more than the advertised minimum
+ requirement of 64 MB of system memory.
+
+
+ Performance Degradation with Jumbo Frames
+ -----------------------------------------
+ Degradation in throughput performance may be observed in some Jumbo frames
+ environments. If this is observed, increasing the application's socket buffer
+ size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
+ See the specific application manual and /usr/src/linux*/Documentation/
+ networking/ip-sysctl.txt for more details.
+
+
+ Allocating Rx Buffers when Using Jumbo Frames
+ ---------------------------------------------
+ Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if
+ the available memory is heavily fragmented. This issue may be seen with PCI-X
+ adapters or with packet split disabled. This can be reduced or eliminated
+ by changing the amount of available memory for receive buffer allocation, by
+ increasing /proc/sys/vm/min_free_kbytes.
+
+
+ Multiple Interfaces on Same Ethernet Broadcast Network
+ ------------------------------------------------------
+ Due to the default ARP behavior on Linux, it is not possible to have
+ one system on two IP networks in the same Ethernet broadcast domain
+ (non-partitioned switch) behave as expected. All Ethernet interfaces
+ will respond to IP traffic for any IP address assigned to the system.
+ This results in unbalanced receive traffic.
+
+ If you have multiple interfaces in a server, do either of the following:
+
+ - Turn on ARP filtering by entering:
+ echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+
+ - Install the interfaces in separate broadcast domains - either in
+ different switches or in a switch partitioned to VLANs.
+
+
+ UDP Stress Test Dropped Packet Issue
+ --------------------------------------
+ Under small packets UDP stress test with 10GbE driver, the Linux system
+ may drop UDP packets due to the fullness of socket buffers. You may want
+ to change the driver's Flow Control variables to the minimum value for
+ controlling packet reception.
+
+
+ Tx Hangs Possible Under Stress
+ ------------------------------
+ Under stress conditions, if TX hangs occur, turning off TSO
+ "ethtool -K eth0 tso off" may resolve the problem.
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgbe.txt b/Documentation/networking/ixgbe.txt
new file mode 100644
index 000000000..6f0cb57b5
--- /dev/null
+++ b/Documentation/networking/ixgbe.txt
@@ -0,0 +1,349 @@
+Linux* Base Driver for the Intel(R) Ethernet 10 Gigabit PCI Express Family of
+Adapters
+=============================================================================
+
+Intel 10 Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Performance Tuning
+- Known Issues
+- Support
+
+Identifying Your Adapter
+========================
+
+The driver in this release is compatible with 82598, 82599 and X540-based
+Intel Network Connections.
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/network/sb/CS-012904.htm
+
+SFP+ Devices with Pluggable Optics
+----------------------------------
+
+82599-BASED ADAPTERS
+
+NOTES: If your 82599-based Intel(R) Network Adapter came with Intel optics, or
+is an Intel(R) Ethernet Server Adapter X520-2, then it only supports Intel
+optics and/or the direct attach cables listed below.
+
+When 82599-based SFP+ devices are connected back to back, they should be set to
+the same Speed setting via ethtool. Results may vary if you mix speed settings.
+82598-based adapters support all passive direct attach cables that comply
+with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach
+cables are not supported.
+
+Supplier Type Part Numbers
+
+SR Modules
+Intel DUAL RATE 1G/10G SFP+ SR (bailed) FTLX8571D3BCV-IT
+Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDDZ-IN1
+Intel DUAL RATE 1G/10G SFP+ SR (bailed) AFBR-703SDZ-IN2
+LR Modules
+Intel DUAL RATE 1G/10G SFP+ LR (bailed) FTLX1471D3BCV-IT
+Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDDZ-IN1
+Intel DUAL RATE 1G/10G SFP+ LR (bailed) AFCT-701SDZ-IN2
+
+The following is a list of 3rd party SFP+ modules and direct attach cables that
+have received some testing. Not all modules are applicable to all devices.
+
+Supplier Type Part Numbers
+
+Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL
+Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ
+Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL
+
+Finisar DUAL RATE 1G/10G SFP+ SR (No Bail) FTLX8571D3QCV-IT
+Avago DUAL RATE 1G/10G SFP+ SR (No Bail) AFBR-703SDZ-IN1
+Finisar DUAL RATE 1G/10G SFP+ LR (No Bail) FTLX1471D3QCV-IT
+Avago DUAL RATE 1G/10G SFP+ LR (No Bail) AFCT-701SDZ-IN1
+Finistar 1000BASE-T SFP FCLF8522P2BTL
+Avago 1000BASE-T SFP ABCU-5710RZ
+
+82599-based adapters support all passive and active limiting direct attach
+cables that comply with SFF-8431 v4.1 and SFF-8472 v10.4 specifications.
+
+Laser turns off for SFP+ when device is down
+-------------------------------------------
+"ip link set down" turns off the laser for 82599-based SFP+ fiber adapters.
+"ip link set up" turns on the laser.
+
+
+82598-BASED ADAPTERS
+
+NOTES for 82598-Based Adapters:
+- Intel(R) Network Adapters that support removable optical modules only support
+ their original module type (i.e., the Intel(R) 10 Gigabit SR Dual Port
+ Express Module only supports SR optical modules). If you plug in a different
+ type of module, the driver will not load.
+- Hot Swapping/hot plugging optical modules is not supported.
+- Only single speed, 10 gigabit modules are supported.
+- LAN on Motherboard (LOMs) may support DA, SR, or LR modules. Other module
+ types are not supported. Please see your system documentation for details.
+
+The following is a list of 3rd party SFP+ modules and direct attach cables that
+have received some testing. Not all modules are applicable to all devices.
+
+Supplier Type Part Numbers
+
+Finisar SFP+ SR bailed, 10g single rate FTLX8571D3BCL
+Avago SFP+ SR bailed, 10g single rate AFBR-700SDZ
+Finisar SFP+ LR bailed, 10g single rate FTLX1471D3BCL
+
+82598-based adapters support all passive direct attach cables that comply
+with SFF-8431 v4.1 and SFF-8472 v10.4 specifications. Active direct attach
+cables are not supported.
+
+
+Flow Control
+------------
+Ethernet Flow Control (IEEE 802.3x) can be configured with ethtool to enable
+receiving and transmitting pause frames for ixgbe. When TX is enabled, PAUSE
+frames are generated when the receive packet buffer crosses a predefined
+threshold. When rx is enabled, the transmit unit will halt for the time delay
+specified when a PAUSE frame is received.
+
+Flow Control is enabled by default. If you want to disable a flow control
+capable link partner, use ethtool:
+
+ ethtool -A eth? autoneg off RX off TX off
+
+NOTE: For 82598 backplane cards entering 1 gig mode, flow control default
+behavior is changed to off. Flow control in 1 gig mode on these devices can
+lead to Tx hangs.
+
+Intel(R) Ethernet Flow Director
+-------------------------------
+Supports advanced filters that direct receive packets by their flows to
+different queues. Enables tight control on routing a flow in the platform.
+Matches flows and CPU cores for flow affinity. Supports multiple parameters
+for flexible flow classification and load balancing.
+
+Flow director is enabled only if the kernel is multiple TX queue capable.
+
+An included script (set_irq_affinity.sh) automates setting the IRQ to CPU
+affinity.
+
+You can verify that the driver is using Flow Director by looking at the counter
+in ethtool: fdir_miss and fdir_match.
+
+Other ethtool Commands:
+To enable Flow Director
+ ethtool -K ethX ntuple on
+To add a filter
+ Use -U switch. e.g., ethtool -U ethX flow-type tcp4 src-ip 10.0.128.23
+ action 1
+To see the list of filters currently present:
+ ethtool -u ethX
+
+Perfect Filter: Perfect filter is an interface to load the filter table that
+funnels all flow into queue_0 unless an alternative queue is specified using
+"action". In that case, any flow that matches the filter criteria will be
+directed to the appropriate queue.
+
+If the queue is defined as -1, filter will drop matching packets.
+
+To account for filter matches and misses, there are two stats in ethtool:
+fdir_match and fdir_miss. In addition, rx_queue_N_packets shows the number of
+packets processed by the Nth queue.
+
+NOTE: Receive Packet Steering (RPS) and Receive Flow Steering (RFS) are not
+compatible with Flow Director. IF Flow Director is enabled, these will be
+disabled.
+
+The following three parameters impact Flow Director.
+
+FdirMode
+--------
+Valid Range: 0-2 (0=off, 1=ATR, 2=Perfect filter mode)
+Default Value: 1
+
+ Flow Director filtering modes.
+
+FdirPballoc
+-----------
+Valid Range: 0-2 (0=64k, 1=128k, 2=256k)
+Default Value: 0
+
+ Flow Director allocated packet buffer size.
+
+AtrSampleRate
+--------------
+Valid Range: 1-100
+Default Value: 20
+
+ Software ATR Tx packet sample rate. For example, when set to 20, every 20th
+ packet, looks to see if the packet will create a new flow.
+
+Node
+----
+Valid Range: 0-n
+Default Value: 1 (off)
+
+ 0 - n: where n is the number of NUMA nodes (i.e. 0 - 3) currently online in
+ your system
+ 1: turns this option off
+
+ The Node parameter will allow you to pick which NUMA node you want to have
+ the adapter allocate memory on.
+
+max_vfs
+-------
+Valid Range: 1-63
+Default Value: 0
+
+ If the value is greater than 0 it will also force the VMDq parameter to be 1
+ or more.
+
+ This parameter adds support for SR-IOV. It causes the driver to spawn up to
+ max_vfs worth of virtual function.
+
+
+Additional Configurations
+=========================
+
+ Jumbo Frames
+ ------------
+ The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+ enabled by changing the MTU to a value larger than the default of 1500.
+ The maximum value for the MTU is 16110. Use the ip command to
+ increase the MTU size. For example:
+
+ ip link set dev ethx mtu 9000
+
+ The maximum MTU setting for Jumbo Frames is 9710. This value coincides
+ with the maximum Jumbo Frames size of 9728.
+
+ Generic Receive Offload, aka GRO
+ --------------------------------
+ The driver supports the in-kernel software implementation of GRO. GRO has
+ shown that by coalescing Rx traffic into larger chunks of data, CPU
+ utilization can be significantly reduced when under large Rx load. GRO is an
+ evolution of the previously-used LRO interface. GRO is able to coalesce
+ other protocols besides TCP. It's also safe to use with configurations that
+ are problematic for LRO, namely bridging and iSCSI.
+
+ Data Center Bridging, aka DCB
+ -----------------------------
+ DCB is a configuration Quality of Service implementation in hardware.
+ It uses the VLAN priority tag (802.1p) to filter traffic. That means
+ that there are 8 different priorities that traffic can be filtered into.
+ It also enables priority flow control which can limit or eliminate the
+ number of dropped packets during network stress. Bandwidth can be
+ allocated to each of these priorities, which is enforced at the hardware
+ level.
+
+ To enable DCB support in ixgbe, you must enable the DCB netlink layer to
+ allow the userspace tools (see below) to communicate with the driver.
+ This can be found in the kernel configuration here:
+
+ -> Networking support
+ -> Networking options
+ -> Data Center Bridging support
+
+ Once this is selected, DCB support must be selected for ixgbe. This can
+ be found here:
+
+ -> Device Drivers
+ -> Network device support (NETDEVICES [=y])
+ -> Ethernet (10000 Mbit) (NETDEV_10000 [=y])
+ -> Intel(R) 10GbE PCI Express adapters support
+ -> Data Center Bridging (DCB) Support
+
+ After these options are selected, you must rebuild your kernel and your
+ modules.
+
+ In order to use DCB, userspace tools must be downloaded and installed.
+ The dcbd tools can be found at:
+
+ http://e1000.sf.net
+
+ Ethtool
+ -------
+ The driver utilizes the ethtool interface for driver configuration and
+ diagnostics, as well as displaying statistical information. The latest
+ ethtool version is required for this functionality.
+
+ The latest release of ethtool can be found from
+ http://ftp.kernel.org/pub/software/network/ethtool/
+
+ FCoE
+ ----
+ This release of the ixgbe driver contains new code to enable users to use
+ Fiber Channel over Ethernet (FCoE) and Data Center Bridging (DCB)
+ functionality that is supported by the 82598-based hardware. This code has
+ no default effect on the regular driver operation, and configuring DCB and
+ FCoE is outside the scope of this driver README. Refer to
+ http://www.open-fcoe.org/ for FCoE project information and contact
+ e1000-eedc@lists.sourceforge.net for DCB information.
+
+ MAC and VLAN anti-spoofing feature
+ ----------------------------------
+ When a malicious driver attempts to send a spoofed packet, it is dropped by
+ the hardware and not transmitted. An interrupt is sent to the PF driver
+ notifying it of the spoof attempt.
+
+ When a spoofed packet is detected the PF driver will send the following
+ message to the system log (displayed by the "dmesg" command):
+
+ Spoof event(s) detected on VF (n)
+
+ Where n=the VF that attempted to do the spoofing.
+
+
+Performance Tuning
+==================
+
+An excellent article on performance tuning can be found at:
+
+http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
+
+
+Known Issues
+============
+
+ Enabling SR-IOV in a 32-bit or 64-bit Microsoft* Windows* Server 2008/R2
+ Guest OS using Intel (R) 82576-based GbE or Intel (R) 82599-based 10GbE
+ controller under KVM
+ ------------------------------------------------------------------------
+ KVM Hypervisor/VMM supports direct assignment of a PCIe device to a VM. This
+ includes traditional PCIe devices, as well as SR-IOV-capable devices using
+ Intel 82576-based and 82599-based controllers.
+
+ While direct assignment of a PCIe device or an SR-IOV Virtual Function (VF)
+ to a Linux-based VM running 2.6.32 or later kernel works fine, there is a
+ known issue with Microsoft Windows Server 2008 VM that results in a "yellow
+ bang" error. This problem is within the KVM VMM itself, not the Intel driver,
+ or the SR-IOV logic of the VMM, but rather that KVM emulates an older CPU
+ model for the guests, and this older CPU model does not support MSI-X
+ interrupts, which is a requirement for Intel SR-IOV.
+
+ If you wish to use the Intel 82576 or 82599-based controllers in SR-IOV mode
+ with KVM and a Microsoft Windows Server 2008 guest try the following
+ workaround. The workaround is to tell KVM to emulate a different model of CPU
+ when using qemu to create the KVM guest:
+
+ "-cpu qemu64,model=13"
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://e1000.sourceforge.net
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/ixgbevf.txt b/Documentation/networking/ixgbevf.txt
new file mode 100644
index 000000000..53d8d2a5a
--- /dev/null
+++ b/Documentation/networking/ixgbevf.txt
@@ -0,0 +1,52 @@
+Linux* Base Driver for Intel(R) Ethernet Network Connection
+===========================================================
+
+Intel Gigabit Linux driver.
+Copyright(c) 1999 - 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Known Issues/Troubleshooting
+- Support
+
+This file describes the ixgbevf Linux* Base Driver for Intel Network
+Connection.
+
+The ixgbevf driver supports 82599-based virtual function devices that can only
+be activated on kernels with CONFIG_PCI_IOV enabled.
+
+The ixgbevf driver supports virtual functions generated by the ixgbe driver
+with a max_vfs value of 1 or greater.
+
+The guest OS loading the ixgbevf driver must support MSI-X interrupts.
+
+VLANs: There is a limit of a total of 32 shared VLANs to 1 or more VFs.
+
+Identifying Your Adapter
+========================
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+ http://support.intel.com/support/go/network/adapter/idguide.htm
+
+Known Issues/Troubleshooting
+============================
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+ http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sf.net
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
new file mode 100644
index 000000000..c74434de2
--- /dev/null
+++ b/Documentation/networking/l2tp.txt
@@ -0,0 +1,348 @@
+This document describes how to use the kernel's L2TP drivers to
+provide L2TP functionality. L2TP is a protocol that tunnels one or
+more sessions over an IP tunnel. It is commonly used for VPNs
+(L2TP/IPSec) and by ISPs to tunnel subscriber PPP sessions over an IP
+network infrastructure. With L2TPv3, it is also useful as a Layer-2
+tunneling infrastructure.
+
+Features
+========
+
+L2TPv2 (PPP over L2TP (UDP tunnels)).
+L2TPv3 ethernet pseudowires.
+L2TPv3 PPP pseudowires.
+L2TPv3 IP encapsulation.
+Netlink sockets for L2TPv3 configuration management.
+
+History
+=======
+
+The original pppol2tp driver was introduced in 2.6.23 and provided
+L2TPv2 functionality (rfc2661). L2TPv2 is used to tunnel one or more PPP
+sessions over a UDP tunnel.
+
+L2TPv3 (rfc3931) changes the protocol to allow different frame types
+to be passed over an L2TP tunnel by moving the PPP-specific parts of
+the protocol out of the core L2TP packet headers. Each frame type is
+known as a pseudowire type. Ethernet, PPP, HDLC, Frame Relay and ATM
+pseudowires for L2TP are defined in separate RFC standards. Another
+change for L2TPv3 is that it can be carried directly over IP with no
+UDP header (UDP is optional). It is also possible to create static
+unmanaged L2TPv3 tunnels manually without a control protocol
+(userspace daemon) to manage them.
+
+To support L2TPv3, the original pppol2tp driver was split up to
+separate the L2TP and PPP functionality. Existing L2TPv2 userspace
+apps should be unaffected as the original pppol2tp sockets API is
+retained. L2TPv3, however, uses netlink to manage L2TPv3 tunnels and
+sessions.
+
+Design
+======
+
+The L2TP protocol separates control and data frames. The L2TP kernel
+drivers handle only L2TP data frames; control frames are always
+handled by userspace. L2TP control frames carry messages between L2TP
+clients/servers and are used to setup / teardown tunnels and
+sessions. An L2TP client or server is implemented in userspace.
+
+Each L2TP tunnel is implemented using a UDP or L2TPIP socket; L2TPIP
+provides L2TPv3 IP encapsulation (no UDP) and is implemented using a
+new l2tpip socket family. The tunnel socket is typically created by
+userspace, though for unmanaged L2TPv3 tunnels, the socket can also be
+created by the kernel. Each L2TP session (pseudowire) gets a network
+interface instance. In the case of PPP, these interfaces are created
+indirectly by pppd using a pppol2tp socket. In the case of ethernet,
+the netdevice is created upon a netlink request to create an L2TPv3
+ethernet pseudowire.
+
+For PPP, the PPPoL2TP driver, net/l2tp/l2tp_ppp.c, provides a
+mechanism by which PPP frames carried through an L2TP session are
+passed through the kernel's PPP subsystem. The standard PPP daemon,
+pppd, handles all PPP interaction with the peer. PPP network
+interfaces are created for each local PPP endpoint. The kernel's PPP
+subsystem arranges for PPP control frames to be delivered to pppd,
+while data frames are forwarded as usual.
+
+For ethernet, the L2TPETH driver, net/l2tp/l2tp_eth.c, implements a
+netdevice driver, managing virtual ethernet devices, one per
+pseudowire. These interfaces can be managed using standard Linux tools
+such as "ip" and "ifconfig". If only IP frames are passed over the
+tunnel, the interface can be given an IP addresses of itself and its
+peer. If non-IP frames are to be passed over the tunnel, the interface
+can be added to a bridge using brctl. All L2TP datapath protocol
+functions are handled by the L2TP core driver.
+
+Each tunnel and session within a tunnel is assigned a unique tunnel_id
+and session_id. These ids are carried in the L2TP header of every
+control and data packet. (Actually, in L2TPv3, the tunnel_id isn't
+present in data frames - it is inferred from the IP connection on
+which the packet was received.) The L2TP driver uses the ids to lookup
+internal tunnel and/or session contexts to determine how to handle the
+packet. Zero tunnel / session ids are treated specially - zero ids are
+never assigned to tunnels or sessions in the network. In the driver,
+the tunnel context keeps a reference to the tunnel UDP or L2TPIP
+socket. The session context holds data that lets the driver interface
+to the kernel's network frame type subsystems, i.e. PPP, ethernet.
+
+Userspace Programming
+=====================
+
+For L2TPv2, there are a number of requirements on the userspace L2TP
+daemon in order to use the pppol2tp driver.
+
+1. Use a UDP socket per tunnel.
+
+2. Create a single PPPoL2TP socket per tunnel bound to a special null
+ session id. This is used only for communicating with the driver but
+ must remain open while the tunnel is active. Opening this tunnel
+ management socket causes the driver to mark the tunnel socket as an
+ L2TP UDP encapsulation socket and flags it for use by the
+ referenced tunnel id. This hooks up the UDP receive path via
+ udp_encap_rcv() in net/ipv4/udp.c. PPP data frames are never passed
+ in this special PPPoX socket.
+
+3. Create a PPPoL2TP socket per L2TP session. This is typically done
+ by starting pppd with the pppol2tp plugin and appropriate
+ arguments. A PPPoL2TP tunnel management socket (Step 2) must be
+ created before the first PPPoL2TP session socket is created.
+
+When creating PPPoL2TP sockets, the application provides information
+to the driver about the socket in a socket connect() call. Source and
+destination tunnel and session ids are provided, as well as the file
+descriptor of a UDP socket. See struct pppol2tp_addr in
+include/linux/if_pppol2tp.h. Note that zero tunnel / session ids are
+treated specially. When creating the per-tunnel PPPoL2TP management
+socket in Step 2 above, zero source and destination session ids are
+specified, which tells the driver to prepare the supplied UDP file
+descriptor for use as an L2TP tunnel socket.
+
+Userspace may control behavior of the tunnel or session using
+setsockopt and ioctl on the PPPoX socket. The following socket
+options are supported:-
+
+DEBUG - bitmask of debug message categories. See below.
+SENDSEQ - 0 => don't send packets with sequence numbers
+ 1 => send packets with sequence numbers
+RECVSEQ - 0 => receive packet sequence numbers are optional
+ 1 => drop receive packets without sequence numbers
+LNSMODE - 0 => act as LAC.
+ 1 => act as LNS.
+REORDERTO - reorder timeout (in millisecs). If 0, don't try to reorder.
+
+Only the DEBUG option is supported by the special tunnel management
+PPPoX socket.
+
+In addition to the standard PPP ioctls, a PPPIOCGL2TPSTATS is provided
+to retrieve tunnel and session statistics from the kernel using the
+PPPoX socket of the appropriate tunnel or session.
+
+For L2TPv3, userspace must use the netlink API defined in
+include/linux/l2tp.h to manage tunnel and session contexts. The
+general procedure to create a new L2TP tunnel with one session is:-
+
+1. Open a GENL socket using L2TP_GENL_NAME for configuring the kernel
+ using netlink.
+
+2. Create a UDP or L2TPIP socket for the tunnel.
+
+3. Create a new L2TP tunnel using a L2TP_CMD_TUNNEL_CREATE
+ request. Set attributes according to desired tunnel parameters,
+ referencing the UDP or L2TPIP socket created in the previous step.
+
+4. Create a new L2TP session in the tunnel using a
+ L2TP_CMD_SESSION_CREATE request.
+
+The tunnel and all of its sessions are closed when the tunnel socket
+is closed. The netlink API may also be used to delete sessions and
+tunnels. Configuration and status info may be set or read using netlink.
+
+The L2TP driver also supports static (unmanaged) L2TPv3 tunnels. These
+are where there is no L2TP control message exchange with the peer to
+setup the tunnel; the tunnel is configured manually at each end of the
+tunnel. There is no need for an L2TP userspace application in this
+case -- the tunnel socket is created by the kernel and configured
+using parameters sent in the L2TP_CMD_TUNNEL_CREATE netlink
+request. The "ip" utility of iproute2 has commands for managing static
+L2TPv3 tunnels; do "ip l2tp help" for more information.
+
+Debugging
+=========
+
+The driver supports a flexible debug scheme where kernel trace
+messages may be optionally enabled per tunnel and per session. Care is
+needed when debugging a live system since the messages are not
+rate-limited and a busy system could be swamped. Userspace uses
+setsockopt on the PPPoX socket to set a debug mask.
+
+The following debug mask bits are available:
+
+PPPOL2TP_MSG_DEBUG verbose debug (if compiled in)
+PPPOL2TP_MSG_CONTROL userspace - kernel interface
+PPPOL2TP_MSG_SEQ sequence numbers handling
+PPPOL2TP_MSG_DATA data packets
+
+If enabled, files under a l2tp debugfs directory can be used to dump
+kernel state about L2TP tunnels and sessions. To access it, the
+debugfs filesystem must first be mounted.
+
+# mount -t debugfs debugfs /debug
+
+Files under the l2tp directory can then be accessed.
+
+# cat /debug/l2tp/tunnels
+
+The debugfs files should not be used by applications to obtain L2TP
+state information because the file format is subject to change. It is
+implemented to provide extra debug information to help diagnose
+problems.) Users should use the netlink API.
+
+/proc/net/pppol2tp is also provided for backwards compatibility with
+the original pppol2tp driver. It lists information about L2TPv2
+tunnels and sessions only. Its use is discouraged.
+
+Unmanaged L2TPv3 Tunnels
+========================
+
+Some commercial L2TP products support unmanaged L2TPv3 ethernet
+tunnels, where there is no L2TP control protocol; tunnels are
+configured at each side manually. New commands are available in
+iproute2's ip utility to support this.
+
+To create an L2TPv3 ethernet pseudowire between local host 192.168.1.1
+and peer 192.168.1.2, using IP addresses 10.5.1.1 and 10.5.1.2 for the
+tunnel endpoints:-
+
+# modprobe l2tp_eth
+# modprobe l2tp_netlink
+
+# ip l2tp add tunnel tunnel_id 1 peer_tunnel_id 1 udp_sport 5000 \
+ udp_dport 5000 encap udp local 192.168.1.1 remote 192.168.1.2
+# ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
+# ifconfig -a
+# ip addr add 10.5.1.2/32 peer 10.5.1.1/32 dev l2tpeth0
+# ifconfig l2tpeth0 up
+
+Choose IP addresses to be the address of a local IP interface and that
+of the remote system. The IP addresses of the l2tpeth0 interface can be
+anything suitable.
+
+Repeat the above at the peer, with ports, tunnel/session ids and IP
+addresses reversed. The tunnel and session IDs can be any non-zero
+32-bit number, but the values must be reversed at the peer.
+
+Host 1 Host2
+udp_sport=5000 udp_sport=5001
+udp_dport=5001 udp_dport=5000
+tunnel_id=42 tunnel_id=45
+peer_tunnel_id=45 peer_tunnel_id=42
+session_id=128 session_id=5196755
+peer_session_id=5196755 peer_session_id=128
+
+When done at both ends of the tunnel, it should be possible to send
+data over the network. e.g.
+
+# ping 10.5.1.1
+
+
+Sample Userspace Code
+=====================
+
+1. Create tunnel management PPPoX socket
+
+ kernel_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ if (kernel_fd >= 0) {
+ struct sockaddr_pppol2tp sax;
+ struct sockaddr_in const *peer_addr;
+
+ peer_addr = l2tp_tunnel_get_peer_addr(tunnel);
+ memset(&sax, 0, sizeof(sax));
+ sax.sa_family = AF_PPPOX;
+ sax.sa_protocol = PX_PROTO_OL2TP;
+ sax.pppol2tp.fd = udp_fd; /* fd of tunnel UDP socket */
+ sax.pppol2tp.addr.sin_addr.s_addr = peer_addr->sin_addr.s_addr;
+ sax.pppol2tp.addr.sin_port = peer_addr->sin_port;
+ sax.pppol2tp.addr.sin_family = AF_INET;
+ sax.pppol2tp.s_tunnel = tunnel_id;
+ sax.pppol2tp.s_session = 0; /* special case: mgmt socket */
+ sax.pppol2tp.d_tunnel = 0;
+ sax.pppol2tp.d_session = 0; /* special case: mgmt socket */
+
+ if(connect(kernel_fd, (struct sockaddr *)&sax, sizeof(sax) ) < 0 ) {
+ perror("connect failed");
+ result = -errno;
+ goto err;
+ }
+ }
+
+2. Create session PPPoX data socket
+
+ struct sockaddr_pppol2tp sax;
+ int fd;
+
+ /* Note, the target socket must be bound already, else it will not be ready */
+ sax.sa_family = AF_PPPOX;
+ sax.sa_protocol = PX_PROTO_OL2TP;
+ sax.pppol2tp.fd = tunnel_fd;
+ sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ sax.pppol2tp.addr.sin_port = addr->sin_port;
+ sax.pppol2tp.addr.sin_family = AF_INET;
+ sax.pppol2tp.s_tunnel = tunnel_id;
+ sax.pppol2tp.s_session = session_id;
+ sax.pppol2tp.d_tunnel = peer_tunnel_id;
+ sax.pppol2tp.d_session = peer_session_id;
+
+ /* session_fd is the fd of the session's PPPoL2TP socket.
+ * tunnel_fd is the fd of the tunnel UDP socket.
+ */
+ fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
+ if (fd < 0 ) {
+ return -errno;
+ }
+ return 0;
+
+Internal Implementation
+=======================
+
+The driver keeps a struct l2tp_tunnel context per L2TP tunnel and a
+struct l2tp_session context for each session. The l2tp_tunnel is
+always associated with a UDP or L2TP/IP socket and keeps a list of
+sessions in the tunnel. The l2tp_session context keeps kernel state
+about the session. It has private data which is used for data specific
+to the session type. With L2TPv2, the session always carried PPP
+traffic. With L2TPv3, the session can also carry ethernet frames
+(ethernet pseudowire) or other data types such as ATM, HDLC or Frame
+Relay.
+
+When a tunnel is first opened, the reference count on the socket is
+increased using sock_hold(). This ensures that the kernel socket
+cannot be removed while L2TP's data structures reference it.
+
+Some L2TP sessions also have a socket (PPP pseudowires) while others
+do not (ethernet pseudowires). We can't use the socket reference count
+as the reference count for session contexts. The L2TP implementation
+therefore has its own internal reference counts on the session
+contexts.
+
+To Do
+=====
+
+Add L2TP tunnel switching support. This would route tunneled traffic
+from one L2TP tunnel into another. Specified in
+http://tools.ietf.org/html/draft-ietf-l2tpext-tunnel-switching-08
+
+Add L2TPv3 VLAN pseudowire support.
+
+Add L2TPv3 IP pseudowire support.
+
+Add L2TPv3 ATM pseudowire support.
+
+Miscellaneous
+=============
+
+The L2TP drivers were developed as part of the OpenL2TP project by
+Katalix Systems Ltd. OpenL2TP is a full-featured L2TP client / server,
+designed from the ground up to have the L2TP datapath in the
+kernel. The project also implemented the pppol2tp plugin for pppd
+which allows pppd to use the kernel driver. Details can be found at
+http://www.openl2tp.org.
diff --git a/Documentation/networking/lapb-module.txt b/Documentation/networking/lapb-module.txt
new file mode 100644
index 000000000..d4fc8f221
--- /dev/null
+++ b/Documentation/networking/lapb-module.txt
@@ -0,0 +1,263 @@
+ The Linux LAPB Module Interface 1.3
+
+ Jonathan Naylor 29.12.96
+
+Changed (Henner Eisen, 2000-10-29): int return value for data_indication()
+
+The LAPB module will be a separately compiled module for use by any parts of
+the Linux operating system that require a LAPB service. This document
+defines the interfaces to, and the services provided by this module. The
+term module in this context does not imply that the LAPB module is a
+separately loadable module, although it may be. The term module is used in
+its more standard meaning.
+
+The interface to the LAPB module consists of functions to the module,
+callbacks from the module to indicate important state changes, and
+structures for getting and setting information about the module.
+
+Structures
+----------
+
+Probably the most important structure is the skbuff structure for holding
+received and transmitted data, however it is beyond the scope of this
+document.
+
+The two LAPB specific structures are the LAPB initialisation structure and
+the LAPB parameter structure. These will be defined in a standard header
+file, <linux/lapb.h>. The header file <net/lapb.h> is internal to the LAPB
+module and is not for use.
+
+LAPB Initialisation Structure
+-----------------------------
+
+This structure is used only once, in the call to lapb_register (see below).
+It contains information about the device driver that requires the services
+of the LAPB module.
+
+struct lapb_register_struct {
+ void (*connect_confirmation)(int token, int reason);
+ void (*connect_indication)(int token, int reason);
+ void (*disconnect_confirmation)(int token, int reason);
+ void (*disconnect_indication)(int token, int reason);
+ int (*data_indication)(int token, struct sk_buff *skb);
+ void (*data_transmit)(int token, struct sk_buff *skb);
+};
+
+Each member of this structure corresponds to a function in the device driver
+that is called when a particular event in the LAPB module occurs. These will
+be described in detail below. If a callback is not required (!!) then a NULL
+may be substituted.
+
+
+LAPB Parameter Structure
+------------------------
+
+This structure is used with the lapb_getparms and lapb_setparms functions
+(see below). They are used to allow the device driver to get and set the
+operational parameters of the LAPB implementation for a given connection.
+
+struct lapb_parms_struct {
+ unsigned int t1;
+ unsigned int t1timer;
+ unsigned int t2;
+ unsigned int t2timer;
+ unsigned int n2;
+ unsigned int n2count;
+ unsigned int window;
+ unsigned int state;
+ unsigned int mode;
+};
+
+T1 and T2 are protocol timing parameters and are given in units of 100ms. N2
+is the maximum number of tries on the link before it is declared a failure.
+The window size is the maximum number of outstanding data packets allowed to
+be unacknowledged by the remote end, the value of the window is between 1
+and 7 for a standard LAPB link, and between 1 and 127 for an extended LAPB
+link.
+
+The mode variable is a bit field used for setting (at present) three values.
+The bit fields have the following meanings:
+
+Bit Meaning
+0 LAPB operation (0=LAPB_STANDARD 1=LAPB_EXTENDED).
+1 [SM]LP operation (0=LAPB_SLP 1=LAPB=MLP).
+2 DTE/DCE operation (0=LAPB_DTE 1=LAPB_DCE)
+3-31 Reserved, must be 0.
+
+Extended LAPB operation indicates the use of extended sequence numbers and
+consequently larger window sizes, the default is standard LAPB operation.
+MLP operation is the same as SLP operation except that the addresses used by
+LAPB are different to indicate the mode of operation, the default is Single
+Link Procedure. The difference between DCE and DTE operation is (i) the
+addresses used for commands and responses, and (ii) when the DCE is not
+connected, it sends DM without polls set, every T1. The upper case constant
+names will be defined in the public LAPB header file.
+
+
+Functions
+---------
+
+The LAPB module provides a number of function entry points.
+
+
+int lapb_register(void *token, struct lapb_register_struct);
+
+This must be called before the LAPB module may be used. If the call is
+successful then LAPB_OK is returned. The token must be a unique identifier
+generated by the device driver to allow for the unique identification of the
+instance of the LAPB link. It is returned by the LAPB module in all of the
+callbacks, and is used by the device driver in all calls to the LAPB module.
+For multiple LAPB links in a single device driver, multiple calls to
+lapb_register must be made. The format of the lapb_register_struct is given
+above. The return values are:
+
+LAPB_OK LAPB registered successfully.
+LAPB_BADTOKEN Token is already registered.
+LAPB_NOMEM Out of memory
+
+
+int lapb_unregister(void *token);
+
+This releases all the resources associated with a LAPB link. Any current
+LAPB link will be abandoned without further messages being passed. After
+this call, the value of token is no longer valid for any calls to the LAPB
+function. The valid return values are:
+
+LAPB_OK LAPB unregistered successfully.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+
+
+int lapb_getparms(void *token, struct lapb_parms_struct *parms);
+
+This allows the device driver to get the values of the current LAPB
+variables, the lapb_parms_struct is described above. The valid return values
+are:
+
+LAPB_OK LAPB getparms was successful.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+
+
+int lapb_setparms(void *token, struct lapb_parms_struct *parms);
+
+This allows the device driver to set the values of the current LAPB
+variables, the lapb_parms_struct is described above. The values of t1timer,
+t2timer and n2count are ignored, likewise changing the mode bits when
+connected will be ignored. An error implies that none of the values have
+been changed. The valid return values are:
+
+LAPB_OK LAPB getparms was successful.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+LAPB_INVALUE One of the values was out of its allowable range.
+
+
+int lapb_connect_request(void *token);
+
+Initiate a connect using the current parameter settings. The valid return
+values are:
+
+LAPB_OK LAPB is starting to connect.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+LAPB_CONNECTED LAPB module is already connected.
+
+
+int lapb_disconnect_request(void *token);
+
+Initiate a disconnect. The valid return values are:
+
+LAPB_OK LAPB is starting to disconnect.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+LAPB_NOTCONNECTED LAPB module is not connected.
+
+
+int lapb_data_request(void *token, struct sk_buff *skb);
+
+Queue data with the LAPB module for transmitting over the link. If the call
+is successful then the skbuff is owned by the LAPB module and may not be
+used by the device driver again. The valid return values are:
+
+LAPB_OK LAPB has accepted the data.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+LAPB_NOTCONNECTED LAPB module is not connected.
+
+
+int lapb_data_received(void *token, struct sk_buff *skb);
+
+Queue data with the LAPB module which has been received from the device. It
+is expected that the data passed to the LAPB module has skb->data pointing
+to the beginning of the LAPB data. If the call is successful then the skbuff
+is owned by the LAPB module and may not be used by the device driver again.
+The valid return values are:
+
+LAPB_OK LAPB has accepted the data.
+LAPB_BADTOKEN Invalid/unknown LAPB token.
+
+
+Callbacks
+---------
+
+These callbacks are functions provided by the device driver for the LAPB
+module to call when an event occurs. They are registered with the LAPB
+module with lapb_register (see above) in the structure lapb_register_struct
+(see above).
+
+
+void (*connect_confirmation)(void *token, int reason);
+
+This is called by the LAPB module when a connection is established after
+being requested by a call to lapb_connect_request (see above). The reason is
+always LAPB_OK.
+
+
+void (*connect_indication)(void *token, int reason);
+
+This is called by the LAPB module when the link is established by the remote
+system. The value of reason is always LAPB_OK.
+
+
+void (*disconnect_confirmation)(void *token, int reason);
+
+This is called by the LAPB module when an event occurs after the device
+driver has called lapb_disconnect_request (see above). The reason indicates
+what has happened. In all cases the LAPB link can be regarded as being
+terminated. The values for reason are:
+
+LAPB_OK The LAPB link was terminated normally.
+LAPB_NOTCONNECTED The remote system was not connected.
+LAPB_TIMEDOUT No response was received in N2 tries from the remote
+ system.
+
+
+void (*disconnect_indication)(void *token, int reason);
+
+This is called by the LAPB module when the link is terminated by the remote
+system or another event has occurred to terminate the link. This may be
+returned in response to a lapb_connect_request (see above) if the remote
+system refused the request. The values for reason are:
+
+LAPB_OK The LAPB link was terminated normally by the remote
+ system.
+LAPB_REFUSED The remote system refused the connect request.
+LAPB_NOTCONNECTED The remote system was not connected.
+LAPB_TIMEDOUT No response was received in N2 tries from the remote
+ system.
+
+
+int (*data_indication)(void *token, struct sk_buff *skb);
+
+This is called by the LAPB module when data has been received from the
+remote system that should be passed onto the next layer in the protocol
+stack. The skbuff becomes the property of the device driver and the LAPB
+module will not perform any more actions on it. The skb->data pointer will
+be pointing to the first byte of data after the LAPB header.
+
+This method should return NET_RX_DROP (as defined in the header
+file include/linux/netdevice.h) if and only if the frame was dropped
+before it could be delivered to the upper layer.
+
+
+void (*data_transmit)(void *token, struct sk_buff *skb);
+
+This is called by the LAPB module when data is to be transmitted to the
+remote system by the device driver. The skbuff becomes the property of the
+device driver and the LAPB module will not perform any more actions on it.
+The skb->data pointer will be pointing to the first byte of the LAPB header.
diff --git a/Documentation/networking/ltpc.txt b/Documentation/networking/ltpc.txt
new file mode 100644
index 000000000..0bf3220c7
--- /dev/null
+++ b/Documentation/networking/ltpc.txt
@@ -0,0 +1,131 @@
+This is the ALPHA version of the ltpc driver.
+
+In order to use it, you will need at least version 1.3.3 of the
+netatalk package, and the Apple or Farallon LocalTalk PC card.
+There are a number of different LocalTalk cards for the PC; this
+driver applies only to the one with the 65c02 processor chip on it.
+
+To include it in the kernel, select the CONFIG_LTPC switch in the
+configuration dialog. You can also compile it as a module.
+
+While the driver will attempt to autoprobe the I/O port address, IRQ
+line, and DMA channel of the card, this does not always work. For
+this reason, you should be prepared to supply these parameters
+yourself. (see "Card Configuration" below for how to determine or
+change the settings on your card)
+
+When the driver is compiled into the kernel, you can add a line such
+as the following to your /etc/lilo.conf:
+
+ append="ltpc=0x240,9,1"
+
+where the parameters (in order) are the port address, IRQ, and DMA
+channel. The second and third values can be omitted, in which case
+the driver will try to determine them itself.
+
+If you load the driver as a module, you can pass the parameters "io=",
+"irq=", and "dma=" on the command line with insmod or modprobe, or add
+them as options in a configuration file in /etc/modprobe.d/ directory:
+
+ alias lt0 ltpc # autoload the module when the interface is configured
+ options ltpc io=0x240 irq=9 dma=1
+
+Before starting up the netatalk demons (perhaps in rc.local), you
+need to add a line such as:
+
+ /sbin/ifconfig lt0 127.0.0.42
+
+The address is unimportant - however, the card needs to be configured
+with ifconfig so that Netatalk can find it.
+
+The appropriate netatalk configuration depends on whether you are
+attached to a network that includes AppleTalk routers or not. If,
+like me, you are simply connecting to your home Macintoshes and
+printers, you need to set up netatalk to "seed". The way I do this
+is to have the lines
+
+ dummy -seed -phase 2 -net 2000 -addr 2000.26 -zone "1033"
+ lt0 -seed -phase 1 -net 1033 -addr 1033.27 -zone "1033"
+
+in my atalkd.conf. What is going on here is that I need to fool
+netatalk into thinking that there are two AppleTalk interfaces
+present; otherwise, it refuses to seed. This is a hack, and a more
+permanent solution would be to alter the netatalk code. Also, make
+sure you have the correct name for the dummy interface - If it's
+compiled as a module, you will need to refer to it as "dummy0" or some
+such.
+
+If you are attached to an extended AppleTalk network, with routers on
+it, then you don't need to fool around with this -- the appropriate
+line in atalkd.conf is
+
+ lt0 -phase 1
+
+--------------------------------------
+
+Card Configuration:
+
+The interrupts and so forth are configured via the dipswitch on the
+board. Set the switches so as not to conflict with other hardware.
+
+ Interrupts -- set at most one. If none are set, the driver uses
+ polled mode. Because the card was developed in the XT era, the
+ original documentation refers to IRQ2. Since you'll be running
+ this on an AT (or later) class machine, that really means IRQ9.
+
+ SW1 IRQ 4
+ SW2 IRQ 3
+ SW3 IRQ 9 (2 in original card documentation only applies to XT)
+
+
+ DMA -- choose DMA 1 or 3, and set both corresponding switches.
+
+ SW4 DMA 3
+ SW5 DMA 1
+ SW6 DMA 3
+ SW7 DMA 1
+
+
+ I/O address -- choose one.
+
+ SW8 220 / 240
+
+--------------------------------------
+
+IP:
+
+Yes, it is possible to do IP over LocalTalk. However, you can't just
+treat the LocalTalk device like an ordinary Ethernet device, even if
+that's what it looks like to Netatalk.
+
+Instead, you follow the same procedure as for doing IP in EtherTalk.
+See Documentation/networking/ipddp.txt for more information about the
+kernel driver and userspace tools needed.
+
+--------------------------------------
+
+BUGS:
+
+IRQ autoprobing often doesn't work on a cold boot. To get around
+this, either compile the driver as a module, or pass the parameters
+for the card to the kernel as described above.
+
+Also, as usual, autoprobing is not recommended when you use the driver
+as a module. (though it usually works at boot time, at least)
+
+Polled mode is *really* slow sometimes, but this seems to depend on
+the configuration of the network.
+
+It may theoretically be possible to use two LTPC cards in the same
+machine, but this is unsupported, so if you really want to do this,
+you'll probably have to hack the initialization code a bit.
+
+______________________________________
+
+THANKS:
+ Thanks to Alan Cox for helpful discussions early on in this
+work, and to Denis Hainsworth for doing the bleeding-edge testing.
+
+-- Bradford Johnson <bradford@math.umn.edu>
+
+-- Updated 11/09/1998 by David Huggins-Daines <dhd@debian.org>
diff --git a/Documentation/networking/mac80211-auth-assoc-deauth.txt b/Documentation/networking/mac80211-auth-assoc-deauth.txt
new file mode 100644
index 000000000..d7a15fe91
--- /dev/null
+++ b/Documentation/networking/mac80211-auth-assoc-deauth.txt
@@ -0,0 +1,95 @@
+#
+# This outlines the Linux authentication/association and
+# deauthentication/disassociation flows.
+#
+# This can be converted into a diagram using the service
+# at http://www.websequencediagrams.com/
+#
+
+participant userspace
+participant mac80211
+participant driver
+
+alt authentication needed (not FT)
+userspace->mac80211: authenticate
+
+alt authenticated/authenticating already
+mac80211->driver: sta_state(AP, not-exists)
+mac80211->driver: bss_info_changed(clear BSSID)
+else associated
+note over mac80211,driver
+like deauth/disassoc, without sending the
+BA session stop & deauth/disassoc frames
+end note
+end
+
+mac80211->driver: config(channel, channel type)
+mac80211->driver: bss_info_changed(set BSSID, basic rate bitmap)
+mac80211->driver: sta_state(AP, exists)
+
+alt no probe request data known
+mac80211->driver: TX directed probe request
+driver->mac80211: RX probe response
+end
+
+mac80211->driver: TX auth frame
+driver->mac80211: RX auth frame
+
+alt WEP shared key auth
+mac80211->driver: TX auth frame
+driver->mac80211: RX auth frame
+end
+
+mac80211->driver: sta_state(AP, authenticated)
+mac80211->userspace: RX auth frame
+
+end
+
+userspace->mac80211: associate
+alt authenticated or associated
+note over mac80211,driver: cleanup like for authenticate
+end
+
+alt not previously authenticated (FT)
+mac80211->driver: config(channel, channel type)
+mac80211->driver: bss_info_changed(set BSSID, basic rate bitmap)
+mac80211->driver: sta_state(AP, exists)
+mac80211->driver: sta_state(AP, authenticated)
+end
+mac80211->driver: TX assoc
+driver->mac80211: RX assoc response
+note over mac80211: init rate control
+mac80211->driver: sta_state(AP, associated)
+
+alt not using WPA
+mac80211->driver: sta_state(AP, authorized)
+end
+
+mac80211->driver: set up QoS parameters
+
+mac80211->driver: bss_info_changed(QoS, HT, associated with AID)
+mac80211->userspace: associated
+
+note left of userspace: associated now
+
+alt using WPA
+note over userspace
+do 4-way-handshake
+(data frames)
+end note
+userspace->mac80211: authorized
+mac80211->driver: sta_state(AP, authorized)
+end
+
+userspace->mac80211: deauthenticate/disassociate
+mac80211->driver: stop BA sessions
+mac80211->driver: TX deauth/disassoc
+mac80211->driver: flush frames
+mac80211->driver: sta_state(AP,associated)
+mac80211->driver: sta_state(AP,authenticated)
+mac80211->driver: sta_state(AP,exists)
+mac80211->driver: sta_state(AP,not-exists)
+mac80211->driver: turn off powersave
+mac80211->driver: bss_info_changed(clear BSSID, not associated, no QoS, ...)
+mac80211->driver: config(channel type to non-HT)
+mac80211->userspace: disconnected
diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt
new file mode 100644
index 000000000..3a930072b
--- /dev/null
+++ b/Documentation/networking/mac80211-injection.txt
@@ -0,0 +1,67 @@
+How to use packet injection with mac80211
+=========================================
+
+mac80211 now allows arbitrary packets to be injected down any Monitor Mode
+interface from userland. The packet you inject needs to be composed in the
+following format:
+
+ [ radiotap header ]
+ [ ieee80211 header ]
+ [ payload ]
+
+The radiotap format is discussed in
+./Documentation/networking/radiotap-headers.txt.
+
+Despite many radiotap parameters being currently defined, most only make sense
+to appear on received packets. The following information is parsed from the
+radiotap headers and used to control injection:
+
+ * IEEE80211_RADIOTAP_FLAGS
+
+ IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
+ IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
+ IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
+ current fragmentation threshold.
+
+ * IEEE80211_RADIOTAP_TX_FLAGS
+
+ IEEE80211_RADIOTAP_F_TX_NOACK: frame should be sent without waiting for
+ an ACK even if it is a unicast frame
+
+The injection code can also skip all other currently defined radiotap fields
+facilitating replay of captured radiotap headers directly.
+
+Here is an example valid radiotap header defining some parameters
+
+ 0x00, 0x00, // <-- radiotap version
+ 0x0b, 0x00, // <- radiotap header length
+ 0x04, 0x0c, 0x00, 0x00, // <-- bitmap
+ 0x6c, // <-- rate
+ 0x0c, //<-- tx power
+ 0x01 //<-- antenna
+
+The ieee80211 header follows immediately afterwards, looking for example like
+this:
+
+ 0x08, 0x01, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x13, 0x22, 0x33, 0x44, 0x55, 0x66,
+ 0x13, 0x22, 0x33, 0x44, 0x55, 0x66,
+ 0x10, 0x86
+
+Then lastly there is the payload.
+
+After composing the packet contents, it is sent by send()-ing it to a logical
+mac80211 interface that is in Monitor mode. Libpcap can also be used,
+(which is easier than doing the work to bind the socket to the right
+interface), along the following lines:
+
+ ppcap = pcap_open_live(szInterfaceName, 800, 1, 20, szErrbuf);
+...
+ r = pcap_inject(ppcap, u8aSendBuffer, nLength);
+
+You can also find a link to a complete inject application here:
+
+http://wireless.kernel.org/en/users/Documentation/packetspammer
+
+Andy Green <andy@warmcat.com>
diff --git a/Documentation/networking/mac80211_hwsim/README b/Documentation/networking/mac80211_hwsim/README
new file mode 100644
index 000000000..24ac91d56
--- /dev/null
+++ b/Documentation/networking/mac80211_hwsim/README
@@ -0,0 +1,68 @@
+mac80211_hwsim - software simulator of 802.11 radio(s) for mac80211
+Copyright (c) 2008, Jouni Malinen <j@w1.fi>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License version 2 as
+published by the Free Software Foundation.
+
+
+Introduction
+
+mac80211_hwsim is a Linux kernel module that can be used to simulate
+arbitrary number of IEEE 802.11 radios for mac80211. It can be used to
+test most of the mac80211 functionality and user space tools (e.g.,
+hostapd and wpa_supplicant) in a way that matches very closely with
+the normal case of using real WLAN hardware. From the mac80211 view
+point, mac80211_hwsim is yet another hardware driver, i.e., no changes
+to mac80211 are needed to use this testing tool.
+
+The main goal for mac80211_hwsim is to make it easier for developers
+to test their code and work with new features to mac80211, hostapd,
+and wpa_supplicant. The simulated radios do not have the limitations
+of real hardware, so it is easy to generate an arbitrary test setup
+and always reproduce the same setup for future tests. In addition,
+since all radio operation is simulated, any channel can be used in
+tests regardless of regulatory rules.
+
+mac80211_hwsim kernel module has a parameter 'radios' that can be used
+to select how many radios are simulated (default 2). This allows
+configuration of both very simply setups (e.g., just a single access
+point and a station) or large scale tests (multiple access points with
+hundreds of stations).
+
+mac80211_hwsim works by tracking the current channel of each virtual
+radio and copying all transmitted frames to all other radios that are
+currently enabled and on the same channel as the transmitting
+radio. Software encryption in mac80211 is used so that the frames are
+actually encrypted over the virtual air interface to allow more
+complete testing of encryption.
+
+A global monitoring netdev, hwsim#, is created independent of
+mac80211. This interface can be used to monitor all transmitted frames
+regardless of channel.
+
+
+Simple example
+
+This example shows how to use mac80211_hwsim to simulate two radios:
+one to act as an access point and the other as a station that
+associates with the AP. hostapd and wpa_supplicant are used to take
+care of WPA2-PSK authentication. In addition, hostapd is also
+processing access point side of association.
+
+
+# Build mac80211_hwsim as part of kernel configuration
+
+# Load the module
+modprobe mac80211_hwsim
+
+# Run hostapd (AP) for wlan0
+hostapd hostapd.conf
+
+# Run wpa_supplicant (station) for wlan1
+wpa_supplicant -Dwext -iwlan1 -c wpa_supplicant.conf
+
+
+More test cases are available in hostap.git:
+git://w1.fi/srv/git/hostap.git and mac80211_hwsim/tests subdirectory
+(http://w1.fi/gitweb/gitweb.cgi?p=hostap.git;a=tree;f=mac80211_hwsim/tests)
diff --git a/Documentation/networking/mac80211_hwsim/hostapd.conf b/Documentation/networking/mac80211_hwsim/hostapd.conf
new file mode 100644
index 000000000..08cde7e35
--- /dev/null
+++ b/Documentation/networking/mac80211_hwsim/hostapd.conf
@@ -0,0 +1,11 @@
+interface=wlan0
+driver=nl80211
+
+hw_mode=g
+channel=1
+ssid=mac80211 test
+
+wpa=2
+wpa_key_mgmt=WPA-PSK
+wpa_pairwise=CCMP
+wpa_passphrase=12345678
diff --git a/Documentation/networking/mac80211_hwsim/wpa_supplicant.conf b/Documentation/networking/mac80211_hwsim/wpa_supplicant.conf
new file mode 100644
index 000000000..299128cff
--- /dev/null
+++ b/Documentation/networking/mac80211_hwsim/wpa_supplicant.conf
@@ -0,0 +1,10 @@
+ctrl_interface=/var/run/wpa_supplicant
+
+network={
+ ssid="mac80211 test"
+ psk="12345678"
+ key_mgmt=WPA-PSK
+ proto=WPA2
+ pairwise=CCMP
+ group=CCMP
+}
diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
new file mode 100644
index 000000000..9ed15f86c
--- /dev/null
+++ b/Documentation/networking/mpls-sysctl.txt
@@ -0,0 +1,29 @@
+/proc/sys/net/mpls/* Variables:
+
+platform_labels - INTEGER
+ Number of entries in the platform label table. It is not
+ possible to configure forwarding for label values equal to or
+ greater than the number of platform labels.
+
+ A dense utliziation of the entries in the platform label table
+ is possible and expected aas the platform labels are locally
+ allocated.
+
+ If the number of platform label table entries is set to 0 no
+ label will be recognized by the kernel and mpls forwarding
+ will be disabled.
+
+ Reducing this value will remove all label routing entries that
+ no longer fit in the table.
+
+ Possible values: 0 - 1048575
+ Default: 0
+
+conf/<interface>/input - BOOL
+ Control whether packets can be input on this interface.
+
+ If disabled, packets will be discarded without further
+ processing.
+
+ 0 - disabled (default)
+ not 0 - enabled
diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
new file mode 100644
index 000000000..4caa0e314
--- /dev/null
+++ b/Documentation/networking/multiqueue.txt
@@ -0,0 +1,79 @@
+
+ HOWTO for multiqueue network device support
+ ===========================================
+
+Section 1: Base driver requirements for implementing multiqueue support
+
+Intro: Kernel support for multiqueue devices
+---------------------------------------------------------
+
+Kernel support for multiqueue devices is always present.
+
+Section 1: Base driver requirements for implementing multiqueue support
+-----------------------------------------------------------------------
+
+Base drivers are required to use the new alloc_etherdev_mq() or
+alloc_netdev_mq() functions to allocate the subqueues for the device. The
+underlying kernel API will take care of the allocation and deallocation of
+the subqueue memory, as well as netdev configuration of where the queues
+exist in memory.
+
+The base driver will also need to manage the queues as it does the global
+netdev->queue_lock today. Therefore base drivers should use the
+netif_{start|stop|wake}_subqueue() functions to manage each queue while the
+device is still operational. netdev->queue_lock is still used when the device
+comes online or when it's completely shut down (unregister_netdev(), etc.).
+
+
+Section 2: Qdisc support for multiqueue devices
+
+-----------------------------------------------
+
+Currently two qdiscs are optimized for multiqueue devices. The first is the
+default pfifo_fast qdisc. This qdisc supports one qdisc per hardware queue.
+A new round-robin qdisc, sch_multiq also supports multiple hardware queues. The
+qdisc is responsible for classifying the skb's and then directing the skb's to
+bands and queues based on the value in skb->queue_mapping. Use this field in
+the base driver to determine which queue to send the skb to.
+
+sch_multiq has been added for hardware that wishes to avoid head-of-line
+blocking. It will cycle though the bands and verify that the hardware queue
+associated with the band is not stopped prior to dequeuing a packet.
+
+On qdisc load, the number of bands is based on the number of queues on the
+hardware. Once the association is made, any skb with skb->queue_mapping set,
+will be queued to the band associated with the hardware queue.
+
+
+Section 3: Brief howto using MULTIQ for multiqueue devices
+---------------------------------------------------------------
+
+The userspace command 'tc,' part of the iproute2 package, is used to configure
+qdiscs. To add the MULTIQ qdisc to your network device, assuming the device
+is called eth0, run the following command:
+
+# tc qdisc add dev eth0 root handle 1: multiq
+
+The qdisc will allocate the number of bands to equal the number of queues that
+the device reports, and bring the qdisc online. Assuming eth0 has 4 Tx
+queues, the band mapping would look like:
+
+band 0 => queue 0
+band 1 => queue 1
+band 2 => queue 2
+band 3 => queue 3
+
+Traffic will begin flowing through each queue based on either the simple_tx_hash
+function or based on netdev->select_queue() if you have it defined.
+
+The behavior of tc filters remains the same. However a new tc action,
+skbedit, has been added. Assuming you wanted to route all traffic to a
+specific host, for example 192.168.0.3, through a specific queue you could use
+this action and establish a filter such as:
+
+tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \
+ match ip dst 192.168.0.3 \
+ action skbedit queue_mapping 3
+
+Author: Alexander Duyck <alexander.h.duyck@intel.com>
+Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/Documentation/networking/netconsole.txt b/Documentation/networking/netconsole.txt
new file mode 100644
index 000000000..a5d574a9a
--- /dev/null
+++ b/Documentation/networking/netconsole.txt
@@ -0,0 +1,177 @@
+
+started by Ingo Molnar <mingo@redhat.com>, 2001.09.17
+2.6 port and netpoll api by Matt Mackall <mpm@selenic.com>, Sep 9 2003
+IPv6 support by Cong Wang <xiyou.wangcong@gmail.com>, Jan 1 2013
+
+Please send bug reports to Matt Mackall <mpm@selenic.com>
+Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
+
+Introduction:
+=============
+
+This module logs kernel printk messages over UDP allowing debugging of
+problem where disk logging fails and serial consoles are impractical.
+
+It can be used either built-in or as a module. As a built-in,
+netconsole initializes immediately after NIC cards and will bring up
+the specified interface as soon as possible. While this doesn't allow
+capture of early kernel panics, it does capture most of the boot
+process.
+
+Sender and receiver configuration:
+==================================
+
+It takes a string configuration parameter "netconsole" in the
+following format:
+
+ netconsole=[src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+
+ where
+ src-port source for UDP packets (defaults to 6665)
+ src-ip source IP to use (interface address)
+ dev network interface (eth0)
+ tgt-port port for logging agent (6666)
+ tgt-ip IP address for logging agent
+ tgt-macaddr ethernet MAC address for logging agent (broadcast)
+
+Examples:
+
+ linux netconsole=4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc
+
+ or
+
+ insmod netconsole netconsole=@/,@10.0.0.2/
+
+ or using IPv6
+
+ insmod netconsole netconsole=@/,@fd00:1:2:3::1/
+
+It also supports logging to multiple remote agents by specifying
+parameters for the multiple agents separated by semicolons and the
+complete string enclosed in "quotes", thusly:
+
+ modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/"
+
+Built-in netconsole starts immediately after the TCP stack is
+initialized and attempts to bring up the supplied dev at the supplied
+address.
+
+The remote host has several options to receive the kernel messages,
+for example:
+
+1) syslogd
+
+2) netcat
+
+ On distributions using a BSD-based netcat version (e.g. Fedora,
+ openSUSE and Ubuntu) the listening port must be specified without
+ the -p switch:
+
+ 'nc -u -l -p <port>' / 'nc -u -l <port>' or
+ 'netcat -u -l -p <port>' / 'netcat -u -l <port>'
+
+3) socat
+
+ 'socat udp-recv:<port> -'
+
+Dynamic reconfiguration:
+========================
+
+Dynamic reconfigurability is a useful addition to netconsole that enables
+remote logging targets to be dynamically added, removed, or have their
+parameters reconfigured at runtime from a configfs-based userspace interface.
+[ Note that the parameters of netconsole targets that were specified/created
+from the boot/module option are not exposed via this interface, and hence
+cannot be modified dynamically. ]
+
+To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the
+netconsole module (or kernel, if netconsole is built-in).
+
+Some examples follow (where configfs is mounted at the /sys/kernel/config
+mountpoint).
+
+To add a remote logging target (target names can be arbitrary):
+
+ cd /sys/kernel/config/netconsole/
+ mkdir target1
+
+Note that newly created targets have default parameter values (as mentioned
+above) and are disabled by default -- they must first be enabled by writing
+"1" to the "enabled" attribute (usually after setting parameters accordingly)
+as described below.
+
+To remove a target:
+
+ rmdir /sys/kernel/config/netconsole/othertarget/
+
+The interface exposes these parameters of a netconsole target to userspace:
+
+ enabled Is this target currently enabled? (read-write)
+ dev_name Local network interface name (read-write)
+ local_port Source UDP port to use (read-write)
+ remote_port Remote agent's UDP port (read-write)
+ local_ip Source IP address to use (read-write)
+ remote_ip Remote agent's IP address (read-write)
+ local_mac Local interface's MAC address (read-only)
+ remote_mac Remote agent's MAC address (read-write)
+
+The "enabled" attribute is also used to control whether the parameters of
+a target can be updated or not -- you can modify the parameters of only
+disabled targets (i.e. if "enabled" is 0).
+
+To update a target's parameters:
+
+ cat enabled # check if enabled is 1
+ echo 0 > enabled # disable the target (if required)
+ echo eth2 > dev_name # set local interface
+ echo 10.0.0.4 > remote_ip # update some parameter
+ echo cb:a9:87:65:43:21 > remote_mac # update more parameters
+ echo 1 > enabled # enable target again
+
+You can also update the local interface dynamically. This is especially
+useful if you want to use interfaces that have newly come up (and may not
+have existed when netconsole was loaded / initialized).
+
+Miscellaneous notes:
+====================
+
+WARNING: the default target ethernet setting uses the broadcast
+ethernet address to send packets, which can cause increased load on
+other systems on the same ethernet segment.
+
+TIP: some LAN switches may be configured to suppress ethernet broadcasts
+so it is advised to explicitly specify the remote agents' MAC addresses
+from the config parameters passed to netconsole.
+
+TIP: to find out the MAC address of, say, 10.0.0.2, you may try using:
+
+ ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
+
+TIP: in case the remote logging agent is on a separate LAN subnet than
+the sender, it is suggested to try specifying the MAC address of the
+default gateway (you may use /sbin/route -n to find it out) as the
+remote MAC address instead.
+
+NOTE: the network device (eth1 in the above case) can run any kind
+of other network traffic, netconsole is not intrusive. Netconsole
+might cause slight delays in other traffic if the volume of kernel
+messages is high, but should have no other impact.
+
+NOTE: if you find that the remote logging agent is not receiving or
+printing all messages from the sender, it is likely that you have set
+the "console_loglevel" parameter (on the sender) to only send high
+priority messages to the console. You can change this at runtime using:
+
+ dmesg -n 8
+
+or by specifying "debug" on the kernel command line at boot, to send
+all kernel messages to the console. A specific value for this parameter
+can also be set using the "loglevel" kernel boot option. See the
+dmesg(8) man page and Documentation/kernel-parameters.txt for details.
+
+Netconsole was designed to be as instantaneous as possible, to
+enable the logging of even the most critical kernel bugs. It works
+from IRQ contexts as well, and does not enable interrupts while
+sending packets. Due to these unique needs, configuration cannot
+be more automatic, and some fundamental limitations will remain:
+only IP networks, UDP packets and ethernet devices are supported.
diff --git a/Documentation/networking/netdev-FAQ.txt b/Documentation/networking/netdev-FAQ.txt
new file mode 100644
index 000000000..0fe1c6e0d
--- /dev/null
+++ b/Documentation/networking/netdev-FAQ.txt
@@ -0,0 +1,224 @@
+
+Information you need to know about netdev
+-----------------------------------------
+
+Q: What is netdev?
+
+A: It is a mailing list for all network-related Linux stuff. This includes
+ anything found under net/ (i.e. core code like IPv6) and drivers/net
+ (i.e. hardware specific drivers) in the Linux source tree.
+
+ Note that some subsystems (e.g. wireless drivers) which have a high volume
+ of traffic have their own specific mailing lists.
+
+ The netdev list is managed (like many other Linux mailing lists) through
+ VGER ( http://vger.kernel.org/ ) and archives can be found below:
+
+ http://marc.info/?l=linux-netdev
+ http://www.spinics.net/lists/netdev/
+
+ Aside from subsystems like that mentioned above, all network-related Linux
+ development (i.e. RFC, review, comments, etc.) takes place on netdev.
+
+Q: How do the changes posted to netdev make their way into Linux?
+
+A: There are always two trees (git repositories) in play. Both are driven
+ by David Miller, the main network maintainer. There is the "net" tree,
+ and the "net-next" tree. As you can probably guess from the names, the
+ net tree is for fixes to existing code already in the mainline tree from
+ Linus, and net-next is where the new code goes for the future release.
+ You can find the trees here:
+
+ http://git.kernel.org/?p=linux/kernel/git/davem/net.git
+ http://git.kernel.org/?p=linux/kernel/git/davem/net-next.git
+
+Q: How often do changes from these trees make it to the mainline Linus tree?
+
+A: To understand this, you need to know a bit of background information
+ on the cadence of Linux development. Each new release starts off with
+ a two week "merge window" where the main maintainers feed their new
+ stuff to Linus for merging into the mainline tree. After the two weeks,
+ the merge window is closed, and it is called/tagged "-rc1". No new
+ features get mainlined after this -- only fixes to the rc1 content
+ are expected. After roughly a week of collecting fixes to the rc1
+ content, rc2 is released. This repeats on a roughly weekly basis
+ until rc7 (typically; sometimes rc6 if things are quiet, or rc8 if
+ things are in a state of churn), and a week after the last vX.Y-rcN
+ was done, the official "vX.Y" is released.
+
+ Relating that to netdev: At the beginning of the 2-week merge window,
+ the net-next tree will be closed - no new changes/features. The
+ accumulated new content of the past ~10 weeks will be passed onto
+ mainline/Linus via a pull request for vX.Y -- at the same time,
+ the "net" tree will start accumulating fixes for this pulled content
+ relating to vX.Y
+
+ An announcement indicating when net-next has been closed is usually
+ sent to netdev, but knowing the above, you can predict that in advance.
+
+ IMPORTANT: Do not send new net-next content to netdev during the
+ period during which net-next tree is closed.
+
+ Shortly after the two weeks have passed (and vX.Y-rc1 is released), the
+ tree for net-next reopens to collect content for the next (vX.Y+1) release.
+
+ If you aren't subscribed to netdev and/or are simply unsure if net-next
+ has re-opened yet, simply check the net-next git repository link above for
+ any new networking-related commits.
+
+ The "net" tree continues to collect fixes for the vX.Y content, and
+ is fed back to Linus at regular (~weekly) intervals. Meaning that the
+ focus for "net" is on stabilization and bugfixes.
+
+ Finally, the vX.Y gets released, and the whole cycle starts over.
+
+Q: So where are we now in this cycle?
+
+A: Load the mainline (Linus) page here:
+
+ http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git
+
+ and note the top of the "tags" section. If it is rc1, it is early
+ in the dev cycle. If it was tagged rc7 a week ago, then a release
+ is probably imminent.
+
+Q: How do I indicate which tree (net vs. net-next) my patch should be in?
+
+A: Firstly, think whether you have a bug fix or new "next-like" content.
+ Then once decided, assuming that you use git, use the prefix flag, i.e.
+
+ git format-patch --subject-prefix='PATCH net-next' start..finish
+
+ Use "net" instead of "net-next" (always lower case) in the above for
+ bug-fix net content. If you don't use git, then note the only magic in
+ the above is just the subject text of the outgoing e-mail, and you can
+ manually change it yourself with whatever MUA you are comfortable with.
+
+Q: I sent a patch and I'm wondering what happened to it. How can I tell
+ whether it got merged?
+
+A: Start by looking at the main patchworks queue for netdev:
+
+ http://patchwork.ozlabs.org/project/netdev/list/
+
+ The "State" field will tell you exactly where things are at with
+ your patch.
+
+Q: The above only says "Under Review". How can I find out more?
+
+A: Generally speaking, the patches get triaged quickly (in less than 48h).
+ So be patient. Asking the maintainer for status updates on your
+ patch is a good way to ensure your patch is ignored or pushed to
+ the bottom of the priority list.
+
+Q: How can I tell what patches are queued up for backporting to the
+ various stable releases?
+
+A: Normally Greg Kroah-Hartman collects stable commits himself, but
+ for networking, Dave collects up patches he deems critical for the
+ networking subsystem, and then hands them off to Greg.
+
+ There is a patchworks queue that you can see here:
+ http://patchwork.ozlabs.org/bundle/davem/stable/?state=*
+
+ It contains the patches which Dave has selected, but not yet handed
+ off to Greg. If Greg already has the patch, then it will be here:
+ http://git.kernel.org/cgit/linux/kernel/git/stable/stable-queue.git
+
+ A quick way to find whether the patch is in this stable-queue is
+ to simply clone the repo, and then git grep the mainline commit ID, e.g.
+
+ stable-queue$ git grep -l 284041ef21fdf2e
+ releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
+ releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
+ releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
+ stable/stable-queue$
+
+Q: I see a network patch and I think it should be backported to stable.
+ Should I request it via "stable@vger.kernel.org" like the references in
+ the kernel's Documentation/stable_kernel_rules.txt file say?
+
+A: No, not for networking. Check the stable queues as per above 1st to see
+ if it is already queued. If not, then send a mail to netdev, listing
+ the upstream commit ID and why you think it should be a stable candidate.
+
+ Before you jump to go do the above, do note that the normal stable rules
+ in Documentation/stable_kernel_rules.txt still apply. So you need to
+ explicitly indicate why it is a critical fix and exactly what users are
+ impacted. In addition, you need to convince yourself that you _really_
+ think it has been overlooked, vs. having been considered and rejected.
+
+ Generally speaking, the longer it has had a chance to "soak" in mainline,
+ the better the odds that it is an OK candidate for stable. So scrambling
+ to request a commit be added the day after it appears should be avoided.
+
+Q: I have created a network patch and I think it should be backported to
+ stable. Should I add a "Cc: stable@vger.kernel.org" like the references
+ in the kernel's Documentation/ directory say?
+
+A: No. See above answer. In short, if you think it really belongs in
+ stable, then ensure you write a decent commit log that describes who
+ gets impacted by the bugfix and how it manifests itself, and when the
+ bug was introduced. If you do that properly, then the commit will
+ get handled appropriately and most likely get put in the patchworks
+ stable queue if it really warrants it.
+
+ If you think there is some valid information relating to it being in
+ stable that does _not_ belong in the commit log, then use the three
+ dash marker line as described in Documentation/SubmittingPatches to
+ temporarily embed that information into the patch that you send.
+
+Q: Someone said that the comment style and coding convention is different
+ for the networking content. Is this true?
+
+A: Yes, in a largely trivial way. Instead of this:
+
+ /*
+ * foobar blah blah blah
+ * another line of text
+ */
+
+ it is requested that you make it look like this:
+
+ /* foobar blah blah blah
+ * another line of text
+ */
+
+Q: I am working in existing code that has the former comment style and not the
+ latter. Should I submit new code in the former style or the latter?
+
+A: Make it the latter style, so that eventually all code in the domain of
+ netdev is of this format.
+
+Q: I found a bug that might have possible security implications or similar.
+ Should I mail the main netdev maintainer off-list?
+
+A: No. The current netdev maintainer has consistently requested that people
+ use the mailing lists and not reach out directly. If you aren't OK with
+ that, then perhaps consider mailing "security@kernel.org" or reading about
+ http://oss-security.openwall.org/wiki/mailing-lists/distros
+ as possible alternative mechanisms.
+
+Q: What level of testing is expected before I submit my change?
+
+A: If your changes are against net-next, the expectation is that you
+ have tested by layering your changes on top of net-next. Ideally you
+ will have done run-time testing specific to your change, but at a
+ minimum, your changes should survive an "allyesconfig" and an
+ "allmodconfig" build without new warnings or failures.
+
+Q: Any other tips to help ensure my net/net-next patch gets OK'd?
+
+A: Attention to detail. Re-read your own work as if you were the
+ reviewer. You can start with using checkpatch.pl, perhaps even
+ with the "--strict" flag. But do not be mindlessly robotic in
+ doing so. If your change is a bug-fix, make sure your commit log
+ indicates the end-user visible symptom, the underlying reason as
+ to why it happens, and then if necessary, explain why the fix proposed
+ is the best way to get things done. Don't mangle whitespace, and as
+ is common, don't mis-indent function arguments that span multiple lines.
+ If it is your first patch, mail it to yourself so you can test apply
+ it to an unpatched tree to confirm infrastructure didn't mangle it.
+
+ Finally, go back and read Documentation/SubmittingPatches to be
+ sure you are not repeating some common mistake documented there.
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
new file mode 100644
index 000000000..f310edec8
--- /dev/null
+++ b/Documentation/networking/netdev-features.txt
@@ -0,0 +1,167 @@
+Netdev features mess and how to get out from it alive
+=====================================================
+
+Author:
+ Michał Mirosław <mirq-linux@rere.qmqm.pl>
+
+
+
+ Part I: Feature sets
+======================
+
+Long gone are the days when a network card would just take and give packets
+verbatim. Today's devices add multiple features and bugs (read: offloads)
+that relieve an OS of various tasks like generating and checking checksums,
+splitting packets, classifying them. Those capabilities and their state
+are commonly referred to as netdev features in Linux kernel world.
+
+There are currently three sets of features relevant to the driver, and
+one used internally by network core:
+
+ 1. netdev->hw_features set contains features whose state may possibly
+ be changed (enabled or disabled) for a particular device by user's
+ request. This set should be initialized in ndo_init callback and not
+ changed later.
+
+ 2. netdev->features set contains features which are currently enabled
+ for a device. This should be changed only by network core or in
+ error paths of ndo_set_features callback.
+
+ 3. netdev->vlan_features set contains features whose state is inherited
+ by child VLAN devices (limits netdev->features set). This is currently
+ used for all VLAN devices whether tags are stripped or inserted in
+ hardware or software.
+
+ 4. netdev->wanted_features set contains feature set requested by user.
+ This set is filtered by ndo_fix_features callback whenever it or
+ some device-specific conditions change. This set is internal to
+ networking core and should not be referenced in drivers.
+
+
+
+ Part II: Controlling enabled features
+=======================================
+
+When current feature set (netdev->features) is to be changed, new set
+is calculated and filtered by calling ndo_fix_features callback
+and netdev_fix_features(). If the resulting set differs from current
+set, it is passed to ndo_set_features callback and (if the callback
+returns success) replaces value stored in netdev->features.
+NETDEV_FEAT_CHANGE notification is issued after that whenever current
+set might have changed.
+
+The following events trigger recalculation:
+ 1. device's registration, after ndo_init returned success
+ 2. user requested changes in features state
+ 3. netdev_update_features() is called
+
+ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
+are treated as always returning success.
+
+A driver that wants to trigger recalculation must do so by calling
+netdev_update_features() while holding rtnl_lock. This should not be done
+from ndo_*_features callbacks. netdev->features should not be modified by
+driver except by means of ndo_fix_features callback.
+
+
+
+ Part III: Implementation hints
+================================
+
+ * ndo_fix_features:
+
+All dependencies between features should be resolved here. The resulting
+set can be reduced further by networking core imposed limitations (as coded
+in netdev_fix_features()). For this reason it is safer to disable a feature
+when its dependencies are not met instead of forcing the dependency on.
+
+This callback should not modify hardware nor driver state (should be
+stateless). It can be called multiple times between successive
+ndo_set_features calls.
+
+Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
+NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
+care must be taken as the change won't affect already configured VLANs.
+
+ * ndo_set_features:
+
+Hardware should be reconfigured to match passed feature set. The set
+should not be altered unless some error condition happens that can't
+be reliably detected in ndo_fix_features. In this case, the callback
+should update netdev->features to match resulting hardware state.
+Errors returned are not (and cannot be) propagated anywhere except dmesg.
+(Note: successful return is zero, >0 means silent error.)
+
+
+
+ Part IV: Features
+===================
+
+For current list of features, see include/linux/netdev_features.h.
+This section describes semantics of some of them.
+
+ * Transmit checksumming
+
+For complete description, see comments near the top of include/linux/skbuff.h.
+
+Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
+It means that device can fill TCP/UDP-like checksum anywhere in the packets
+whatever headers there might be.
+
+ * Transmit TCP segmentation offload
+
+NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
+set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
+
+ * Transmit DMA from high memory
+
+On platforms where this is relevant, NETIF_F_HIGHDMA signals that
+ndo_start_xmit can handle skbs with frags in high memory.
+
+ * Transmit scatter-gather
+
+Those features say that ndo_start_xmit can handle fragmented skbs:
+NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
+chained skbs (skb->next/prev list).
+
+ * Software features
+
+Features contained in NETIF_F_SOFT_FEATURES are features of networking
+stack. Driver should not change behaviour based on them.
+
+ * LLTX driver (deprecated for hardware drivers)
+
+NETIF_F_LLTX should be set in drivers that implement their own locking in
+transmit path or don't need locking at all (e.g. software tunnels).
+In ndo_start_xmit, it is recommended to use a try_lock and return
+NETDEV_TX_LOCKED when the spin lock fails. The locking should also properly
+protect against other callbacks (the rules you need to find out).
+
+Don't use it for new drivers.
+
+ * netns-local device
+
+NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
+network namespaces (e.g. loopback).
+
+Don't use it in drivers.
+
+ * VLAN challenged
+
+NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
+headers. Some drivers set this because the cards can't handle the bigger MTU.
+[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
+VLANs. This may be not useful, though.]
+
+* rx-fcs
+
+This requests that the NIC append the Ethernet Frame Checksum (FCS)
+to the end of the skb data. This allows sniffers and other tools to
+read the CRC recorded by the NIC on receipt of the packet.
+
+* rx-all
+
+This requests that the NIC receive all possible frames, including errored
+frames (such as bad FCS, etc). This can be helpful when sniffing a link with
+bad packets on it. Some NICs may receive more packets if also put into normal
+PROMISC mode.
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
new file mode 100644
index 000000000..0b1cf6b2a
--- /dev/null
+++ b/Documentation/networking/netdevices.txt
@@ -0,0 +1,107 @@
+
+Network Devices, the Kernel, and You!
+
+
+Introduction
+============
+The following is a random collection of documentation regarding
+network devices.
+
+struct net_device allocation rules
+==================================
+Network device structures need to persist even after module is unloaded and
+must be allocated with alloc_netdev_mqs() and friends.
+If device has registered successfully, it will be freed on last use
+by free_netdev(). This is required to handle the pathologic case cleanly
+(example: rmmod mydriver </sys/class/net/myeth/mtu )
+
+alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver
+private data which gets freed when the network device is freed. If
+separately allocated data is attached to the network device
+(netdev_priv(dev)) then it is up to the module exit handler to free that.
+
+MTU
+===
+Each network device has a Maximum Transfer Unit. The MTU does not
+include any link layer protocol overhead. Upper layer protocols must
+not pass a socket buffer (skb) to a device to transmit with more data
+than the mtu. The MTU does not include link layer header overhead, so
+for example on Ethernet if the standard MTU is 1500 bytes used, the
+actual skb will contain up to 1514 bytes because of the Ethernet
+header. Devices should allow for the 4 byte VLAN header as well.
+
+Segmentation Offload (GSO, TSO) is an exception to this rule. The
+upper layer protocol may pass a large socket buffer to the device
+transmit routine, and the device will break that up into separate
+packets based on the current MTU.
+
+MTU is symmetrical and applies both to receive and transmit. A device
+must be able to receive at least the maximum size packet allowed by
+the MTU. A network device may use the MTU as mechanism to size receive
+buffers, but the device should allow packets with VLAN header. With
+standard Ethernet mtu of 1500 bytes, the device should allow up to
+1518 byte packets (1500 + 14 header + 4 tag). The device may either:
+drop, truncate, or pass up oversize packets, but dropping oversize
+packets is preferred.
+
+
+struct net_device synchronization rules
+=======================================
+ndo_open:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+
+ndo_stop:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+ Note: netif_running() is guaranteed false
+
+ndo_do_ioctl:
+ Synchronization: rtnl_lock() semaphore.
+ Context: process
+
+ndo_get_stats:
+ Synchronization: dev_base_lock rwlock.
+ Context: nominally process, but don't sleep inside an rwlock
+
+ndo_start_xmit:
+ Synchronization: __netif_tx_lock spinlock.
+
+ When the driver sets NETIF_F_LLTX in dev->features this will be
+ called without holding netif_tx_lock. In this case the driver
+ has to lock by itself when needed. It is recommended to use a try lock
+ for this and return NETDEV_TX_LOCKED when the spin lock fails.
+ The locking there should also properly protect against
+ set_rx_mode. Note that the use of NETIF_F_LLTX is deprecated.
+ Don't use it for new drivers.
+
+ Context: Process with BHs disabled or BH (timer),
+ will be called with interrupts disabled by netconsole.
+
+ Return codes:
+ o NETDEV_TX_OK everything ok.
+ o NETDEV_TX_BUSY Cannot transmit packet, try later
+ Usually a bug, means queue start/stop flow control is broken in
+ the driver. Note: the driver must NOT put the skb in its DMA ring.
+ o NETDEV_TX_LOCKED Locking failed, please retry quickly.
+ Only valid when NETIF_F_LLTX is set.
+
+ndo_tx_timeout:
+ Synchronization: netif_tx_lock spinlock; all TX queues frozen.
+ Context: BHs disabled
+ Notes: netif_queue_stopped() is guaranteed true
+
+ndo_set_rx_mode:
+ Synchronization: netif_addr_lock spinlock.
+ Context: BHs disabled
+
+struct napi_struct synchronization rules
+========================================
+napi->poll:
+ Synchronization: NAPI_STATE_SCHED bit in napi->state. Device
+ driver's ndo_stop method will invoke napi_disable() on
+ all NAPI instances which will do a sleeping poll on the
+ NAPI_STATE_SCHED napi->state bit, waiting for all pending
+ NAPI activity to cease.
+ Context: softirq
+ will be called with interrupts disabled by netconsole.
diff --git a/Documentation/networking/netif-msg.txt b/Documentation/networking/netif-msg.txt
new file mode 100644
index 000000000..c967ddb90
--- /dev/null
+++ b/Documentation/networking/netif-msg.txt
@@ -0,0 +1,79 @@
+
+________________
+NETIF Msg Level
+
+The design of the network interface message level setting.
+
+History
+
+ The design of the debugging message interface was guided and
+ constrained by backwards compatibility previous practice. It is useful
+ to understand the history and evolution in order to understand current
+ practice and relate it to older driver source code.
+
+ From the beginning of Linux, each network device driver has had a local
+ integer variable that controls the debug message level. The message
+ level ranged from 0 to 7, and monotonically increased in verbosity.
+
+ The message level was not precisely defined past level 3, but were
+ always implemented within +-1 of the specified level. Drivers tended
+ to shed the more verbose level messages as they matured.
+ 0 Minimal messages, only essential information on fatal errors.
+ 1 Standard messages, initialization status. No run-time messages
+ 2 Special media selection messages, generally timer-driver.
+ 3 Interface starts and stops, including normal status messages
+ 4 Tx and Rx frame error messages, and abnormal driver operation
+ 5 Tx packet queue information, interrupt events.
+ 6 Status on each completed Tx packet and received Rx packets
+ 7 Initial contents of Tx and Rx packets
+
+ Initially this message level variable was uniquely named in each driver
+ e.g. "lance_debug", so that a kernel symbolic debugger could locate and
+ modify the setting. When kernel modules became common, the variables
+ were consistently renamed to "debug" and allowed to be set as a module
+ parameter.
+
+ This approach worked well. However there is always a demand for
+ additional features. Over the years the following emerged as
+ reasonable and easily implemented enhancements
+ Using an ioctl() call to modify the level.
+ Per-interface rather than per-driver message level setting.
+ More selective control over the type of messages emitted.
+
+ The netif_msg recommendation adds these features with only a minor
+ complexity and code size increase.
+
+ The recommendation is the following points
+ Retaining the per-driver integer variable "debug" as a module
+ parameter with a default level of '1'.
+
+ Adding a per-interface private variable named "msg_enable". The
+ variable is a bit map rather than a level, and is initialized as
+ 1 << debug
+ Or more precisely
+ debug < 0 ? 0 : 1 << min(sizeof(int)-1, debug)
+
+ Messages should changes from
+ if (debug > 1)
+ printk(MSG_DEBUG "%s: ...
+ to
+ if (np->msg_enable & NETIF_MSG_LINK)
+ printk(MSG_DEBUG "%s: ...
+
+
+The set of message levels is named
+ Old level Name Bit position
+ 0 NETIF_MSG_DRV 0x0001
+ 1 NETIF_MSG_PROBE 0x0002
+ 2 NETIF_MSG_LINK 0x0004
+ 2 NETIF_MSG_TIMER 0x0004
+ 3 NETIF_MSG_IFDOWN 0x0008
+ 3 NETIF_MSG_IFUP 0x0008
+ 4 NETIF_MSG_RX_ERR 0x0010
+ 4 NETIF_MSG_TX_ERR 0x0010
+ 5 NETIF_MSG_TX_QUEUED 0x0020
+ 5 NETIF_MSG_INTR 0x0020
+ 6 NETIF_MSG_TX_DONE 0x0040
+ 6 NETIF_MSG_RX_STATUS 0x0040
+ 7 NETIF_MSG_PKTDATA 0x0080
+
diff --git a/Documentation/networking/netlink_mmap.txt b/Documentation/networking/netlink_mmap.txt
new file mode 100644
index 000000000..54f10478e
--- /dev/null
+++ b/Documentation/networking/netlink_mmap.txt
@@ -0,0 +1,332 @@
+This file documents how to use memory mapped I/O with netlink.
+
+Author: Patrick McHardy <kaber@trash.net>
+
+Overview
+--------
+
+Memory mapped netlink I/O can be used to increase throughput and decrease
+overhead of unicast receive and transmit operations. Some netlink subsystems
+require high throughput, these are mainly the netfilter subsystems
+nfnetlink_queue and nfnetlink_log, but it can also help speed up large
+dump operations of f.i. the routing database.
+
+Memory mapped netlink I/O used two circular ring buffers for RX and TX which
+are mapped into the processes address space.
+
+The RX ring is used by the kernel to directly construct netlink messages into
+user-space memory without copying them as done with regular socket I/O,
+additionally as long as the ring contains messages no recvmsg() or poll()
+syscalls have to be issued by user-space to get more message.
+
+The TX ring is used to process messages directly from user-space memory, the
+kernel processes all messages contained in the ring using a single sendmsg()
+call.
+
+Usage overview
+--------------
+
+In order to use memory mapped netlink I/O, user-space needs three main changes:
+
+- ring setup
+- conversion of the RX path to get messages from the ring instead of recvmsg()
+- conversion of the TX path to construct messages into the ring
+
+Ring setup is done using setsockopt() to provide the ring parameters to the
+kernel, then a call to mmap() to map the ring into the processes address space:
+
+- setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &params, sizeof(params));
+- setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &params, sizeof(params));
+- ring = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0)
+
+Usage of either ring is optional, but even if only the RX ring is used the
+mapping still needs to be writable in order to update the frame status after
+processing.
+
+Conversion of the reception path involves calling poll() on the file
+descriptor, once the socket is readable the frames from the ring are
+processed in order until no more messages are available, as indicated by
+a status word in the frame header.
+
+On kernel side, in order to make use of memory mapped I/O on receive, the
+originating netlink subsystem needs to support memory mapped I/O, otherwise
+it will use an allocated socket buffer as usual and the contents will be
+ copied to the ring on transmission, nullifying most of the performance gains.
+Dumps of kernel databases automatically support memory mapped I/O.
+
+Conversion of the transmit path involves changing message construction to
+use memory from the TX ring instead of (usually) a buffer declared on the
+stack and setting up the frame header appropriately. Optionally poll() can
+be used to wait for free frames in the TX ring.
+
+Structured and definitions for using memory mapped I/O are contained in
+<linux/netlink.h>.
+
+RX and TX rings
+----------------
+
+Each ring contains a number of continuous memory blocks, containing frames of
+fixed size dependent on the parameters used for ring setup.
+
+Ring: [ block 0 ]
+ [ frame 0 ]
+ [ frame 1 ]
+ [ block 1 ]
+ [ frame 2 ]
+ [ frame 3 ]
+ ...
+ [ block n ]
+ [ frame 2 * n ]
+ [ frame 2 * n + 1 ]
+
+The blocks are only visible to the kernel, from the point of view of user-space
+the ring just contains the frames in a continuous memory zone.
+
+The ring parameters used for setting up the ring are defined as follows:
+
+struct nl_mmap_req {
+ unsigned int nm_block_size;
+ unsigned int nm_block_nr;
+ unsigned int nm_frame_size;
+ unsigned int nm_frame_nr;
+};
+
+Frames are grouped into blocks, where each block is a continuous region of memory
+and holds nm_block_size / nm_frame_size frames. The total number of frames in
+the ring is nm_frame_nr. The following invariants hold:
+
+- frames_per_block = nm_block_size / nm_frame_size
+
+- nm_frame_nr = frames_per_block * nm_block_nr
+
+Some parameters are constrained, specifically:
+
+- nm_block_size must be a multiple of the architectures memory page size.
+ The getpagesize() function can be used to get the page size.
+
+- nm_frame_size must be equal or larger to NL_MMAP_HDRLEN, IOW a frame must be
+ able to hold at least the frame header
+
+- nm_frame_size must be smaller or equal to nm_block_size
+
+- nm_frame_size must be a multiple of NL_MMAP_MSG_ALIGNMENT
+
+- nm_frame_nr must equal the actual number of frames as specified above.
+
+When the kernel can't allocate physically continuous memory for a ring block,
+it will fall back to use physically discontinuous memory. This might affect
+performance negatively, in order to avoid this the nm_frame_size parameter
+should be chosen to be as small as possible for the required frame size and
+the number of blocks should be increased instead.
+
+Ring frames
+------------
+
+Each frames contain a frame header, consisting of a synchronization word and some
+meta-data, and the message itself.
+
+Frame: [ header message ]
+
+The frame header is defined as follows:
+
+struct nl_mmap_hdr {
+ unsigned int nm_status;
+ unsigned int nm_len;
+ __u32 nm_group;
+ /* credentials */
+ __u32 nm_pid;
+ __u32 nm_uid;
+ __u32 nm_gid;
+};
+
+- nm_status is used for synchronizing processing between the kernel and user-
+ space and specifies ownership of the frame as well as the operation to perform
+
+- nm_len contains the length of the message contained in the data area
+
+- nm_group specified the destination multicast group of message
+
+- nm_pid, nm_uid and nm_gid contain the netlink pid, UID and GID of the sending
+ process. These values correspond to the data available using SOCK_PASSCRED in
+ the SCM_CREDENTIALS cmsg.
+
+The possible values in the status word are:
+
+- NL_MMAP_STATUS_UNUSED:
+ RX ring: frame belongs to the kernel and contains no message
+ for user-space. Approriate action is to invoke poll()
+ to wait for new messages.
+
+ TX ring: frame belongs to user-space and can be used for
+ message construction.
+
+- NL_MMAP_STATUS_RESERVED:
+ RX ring only: frame is currently used by the kernel for message
+ construction and contains no valid message yet.
+ Appropriate action is to invoke poll() to wait for
+ new messages.
+
+- NL_MMAP_STATUS_VALID:
+ RX ring: frame contains a valid message. Approriate action is
+ to process the message and release the frame back to
+ the kernel by setting the status to
+ NL_MMAP_STATUS_UNUSED or queue the frame by setting the
+ status to NL_MMAP_STATUS_SKIP.
+
+ TX ring: the frame contains a valid message from user-space to
+ be processed by the kernel. After completing processing
+ the kernel will release the frame back to user-space by
+ setting the status to NL_MMAP_STATUS_UNUSED.
+
+- NL_MMAP_STATUS_COPY:
+ RX ring only: a message is ready to be processed but could not be
+ stored in the ring, either because it exceeded the
+ frame size or because the originating subsystem does
+ not support memory mapped I/O. Appropriate action is
+ to invoke recvmsg() to receive the message and release
+ the frame back to the kernel by setting the status to
+ NL_MMAP_STATUS_UNUSED.
+
+- NL_MMAP_STATUS_SKIP:
+ RX ring only: user-space queued the message for later processing, but
+ processed some messages following it in the ring. The
+ kernel should skip this frame when looking for unused
+ frames.
+
+The data area of a frame begins at a offset of NL_MMAP_HDRLEN relative to the
+frame header.
+
+TX limitations
+--------------
+
+As of Jan 2015 the message is always copied from the ring frame to an
+allocated buffer due to unresolved security concerns.
+See commit 4682a0358639b29cf ("netlink: Always copy on mmap TX.").
+
+Example
+-------
+
+Ring setup:
+
+ unsigned int block_size = 16 * getpagesize();
+ struct nl_mmap_req req = {
+ .nm_block_size = block_size,
+ .nm_block_nr = 64,
+ .nm_frame_size = 16384,
+ .nm_frame_nr = 64 * block_size / 16384,
+ };
+ unsigned int ring_size;
+ void *rx_ring, *tx_ring;
+
+ /* Configure ring parameters */
+ if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
+ exit(1);
+ if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
+ exit(1)
+
+ /* Calculate size of each individual ring */
+ ring_size = req.nm_block_nr * req.nm_block_size;
+
+ /* Map RX/TX rings. The TX ring is located after the RX ring */
+ rx_ring = mmap(NULL, 2 * ring_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if ((long)rx_ring == -1L)
+ exit(1);
+ tx_ring = rx_ring + ring_size:
+
+Message reception:
+
+This example assumes some ring parameters of the ring setup are available.
+
+ unsigned int frame_offset = 0;
+ struct nl_mmap_hdr *hdr;
+ struct nlmsghdr *nlh;
+ unsigned char buf[16384];
+ ssize_t len;
+
+ while (1) {
+ struct pollfd pfds[1];
+
+ pfds[0].fd = fd;
+ pfds[0].events = POLLIN | POLLERR;
+ pfds[0].revents = 0;
+
+ if (poll(pfds, 1, -1) < 0 && errno != -EINTR)
+ exit(1);
+
+ /* Check for errors. Error handling omitted */
+ if (pfds[0].revents & POLLERR)
+ <handle error>
+
+ /* If no new messages, poll again */
+ if (!(pfds[0].revents & POLLIN))
+ continue;
+
+ /* Process all frames */
+ while (1) {
+ /* Get next frame header */
+ hdr = rx_ring + frame_offset;
+
+ if (hdr->nm_status == NL_MMAP_STATUS_VALID) {
+ /* Regular memory mapped frame */
+ nlh = (void *)hdr + NL_MMAP_HDRLEN;
+ len = hdr->nm_len;
+
+ /* Release empty message immediately. May happen
+ * on error during message construction.
+ */
+ if (len == 0)
+ goto release;
+ } else if (hdr->nm_status == NL_MMAP_STATUS_COPY) {
+ /* Frame queued to socket receive queue */
+ len = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+ if (len <= 0)
+ break;
+ nlh = buf;
+ } else
+ /* No more messages to process, continue polling */
+ break;
+
+ process_msg(nlh);
+release:
+ /* Release frame back to the kernel */
+ hdr->nm_status = NL_MMAP_STATUS_UNUSED;
+
+ /* Advance frame offset to next frame */
+ frame_offset = (frame_offset + frame_size) % ring_size;
+ }
+ }
+
+Message transmission:
+
+This example assumes some ring parameters of the ring setup are available.
+A single message is constructed and transmitted, to send multiple messages
+at once they would be constructed in consecutive frames before a final call
+to sendto().
+
+ unsigned int frame_offset = 0;
+ struct nl_mmap_hdr *hdr;
+ struct nlmsghdr *nlh;
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+
+ hdr = tx_ring + frame_offset;
+ if (hdr->nm_status != NL_MMAP_STATUS_UNUSED)
+ /* No frame available. Use poll() to avoid. */
+ exit(1);
+
+ nlh = (void *)hdr + NL_MMAP_HDRLEN;
+
+ /* Build message */
+ build_message(nlh);
+
+ /* Fill frame header: length and status need to be set */
+ hdr->nm_len = nlh->nlmsg_len;
+ hdr->nm_status = NL_MMAP_STATUS_VALID;
+
+ if (sendto(fd, NULL, 0, 0, &addr, sizeof(addr)) < 0)
+ exit(1);
+
+ /* Advance frame offset to next frame */
+ frame_offset = (frame_offset + frame_size) % ring_size;
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
new file mode 100644
index 000000000..f55599c62
--- /dev/null
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -0,0 +1,177 @@
+/proc/sys/net/netfilter/nf_conntrack_* Variables:
+
+nf_conntrack_acct - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ Enable connection tracking flow accounting. 64-bit byte and packet
+ counters per flow are added.
+
+nf_conntrack_buckets - INTEGER (read-only)
+ Size of hash table. If not specified as parameter during module
+ loading, the default size is calculated by dividing total memory
+ by 16384 to determine the number of buckets but the hash table will
+ never have fewer than 32 and limited to 16384 buckets. For systems
+ with more than 4GB of memory it will be 65536 buckets.
+
+nf_conntrack_checksum - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ Verify checksum of incoming packets. Packets with bad checksums are
+ in INVALID state. If this is enabled, such packets will not be
+ considered for connection tracking.
+
+nf_conntrack_count - INTEGER (read-only)
+ Number of currently allocated flow entries.
+
+nf_conntrack_events - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ If this option is enabled, the connection tracking code will
+ provide userspace with connection tracking events via ctnetlink.
+
+nf_conntrack_events_retry_timeout - INTEGER (seconds)
+ default 15
+
+ This option is only relevant when "reliable connection tracking
+ events" are used. Normally, ctnetlink is "lossy", that is,
+ events are normally dropped when userspace listeners can't keep up.
+
+ Userspace can request "reliable event mode". When this mode is
+ active, the conntrack will only be destroyed after the event was
+ delivered. If event delivery fails, the kernel periodically
+ re-tries to send the event to userspace.
+
+ This is the maximum interval the kernel should use when re-trying
+ to deliver the destroy event.
+
+ A higher number means there will be fewer delivery retries and it
+ will take longer for a backlog to be processed.
+
+nf_conntrack_expect_max - INTEGER
+ Maximum size of expectation table. Default value is
+ nf_conntrack_buckets / 256. Minimum is 1.
+
+nf_conntrack_frag6_high_thresh - INTEGER
+ default 262144
+
+ Maximum memory used to reassemble IPv6 fragments. When
+ nf_conntrack_frag6_high_thresh bytes of memory is allocated for this
+ purpose, the fragment handler will toss packets until
+ nf_conntrack_frag6_low_thresh is reached.
+
+nf_conntrack_frag6_low_thresh - INTEGER
+ default 196608
+
+ See nf_conntrack_frag6_low_thresh
+
+nf_conntrack_frag6_timeout - INTEGER (seconds)
+ default 60
+
+ Time to keep an IPv6 fragment in memory.
+
+nf_conntrack_generic_timeout - INTEGER (seconds)
+ default 600
+
+ Default for generic timeout. This refers to layer 4 unknown/unsupported
+ protocols.
+
+nf_conntrack_helper - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ Enable automatic conntrack helper assignment.
+
+nf_conntrack_icmp_timeout - INTEGER (seconds)
+ default 30
+
+ Default for ICMP timeout.
+
+nf_conntrack_icmpv6_timeout - INTEGER (seconds)
+ default 30
+
+ Default for ICMP6 timeout.
+
+nf_conntrack_log_invalid - INTEGER
+ 0 - disable (default)
+ 1 - log ICMP packets
+ 6 - log TCP packets
+ 17 - log UDP packets
+ 33 - log DCCP packets
+ 41 - log ICMPv6 packets
+ 136 - log UDPLITE packets
+ 255 - log packets of any protocol
+
+ Log invalid packets of a type specified by value.
+
+nf_conntrack_max - INTEGER
+ Size of connection tracking table. Default value is
+ nf_conntrack_buckets value * 4.
+
+nf_conntrack_tcp_be_liberal - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ Be conservative in what you do, be liberal in what you accept from others.
+ If it's non-zero, we mark only out of window RST segments as INVALID.
+
+nf_conntrack_tcp_loose - BOOLEAN
+ 0 - disabled
+ not 0 - enabled (default)
+
+ If it is set to zero, we disable picking up already established
+ connections.
+
+nf_conntrack_tcp_max_retrans - INTEGER
+ default 3
+
+ Maximum number of packets that can be retransmitted without
+ received an (acceptable) ACK from the destination. If this number
+ is reached, a shorter timer will be started.
+
+nf_conntrack_tcp_timeout_close - INTEGER (seconds)
+ default 10
+
+nf_conntrack_tcp_timeout_close_wait - INTEGER (seconds)
+ default 60
+
+nf_conntrack_tcp_timeout_established - INTEGER (seconds)
+ default 432000 (5 days)
+
+nf_conntrack_tcp_timeout_fin_wait - INTEGER (seconds)
+ default 120
+
+nf_conntrack_tcp_timeout_last_ack - INTEGER (seconds)
+ default 30
+
+nf_conntrack_tcp_timeout_max_retrans - INTEGER (seconds)
+ default 300
+
+nf_conntrack_tcp_timeout_syn_recv - INTEGER (seconds)
+ default 60
+
+nf_conntrack_tcp_timeout_syn_sent - INTEGER (seconds)
+ default 120
+
+nf_conntrack_tcp_timeout_time_wait - INTEGER (seconds)
+ default 120
+
+nf_conntrack_tcp_timeout_unacknowledged - INTEGER (seconds)
+ default 300
+
+nf_conntrack_timestamp - BOOLEAN
+ 0 - disabled (default)
+ not 0 - enabled
+
+ Enable connection tracking flow timestamping.
+
+nf_conntrack_udp_timeout - INTEGER (seconds)
+ default 30
+
+nf_conntrack_udp_timeout_stream2 - INTEGER (seconds)
+ default 180
+
+ This extended timeout will be used in case there is an UDP stream
+ detected.
diff --git a/Documentation/networking/nfc.txt b/Documentation/networking/nfc.txt
new file mode 100644
index 000000000..b24c29bda
--- /dev/null
+++ b/Documentation/networking/nfc.txt
@@ -0,0 +1,128 @@
+Linux NFC subsystem
+===================
+
+The Near Field Communication (NFC) subsystem is required to standardize the
+NFC device drivers development and to create an unified userspace interface.
+
+This document covers the architecture overview, the device driver interface
+description and the userspace interface description.
+
+Architecture overview
+---------------------
+
+The NFC subsystem is responsible for:
+ - NFC adapters management;
+ - Polling for targets;
+ - Low-level data exchange;
+
+The subsystem is divided in some parts. The 'core' is responsible for
+providing the device driver interface. On the other side, it is also
+responsible for providing an interface to control operations and low-level
+data exchange.
+
+The control operations are available to userspace via generic netlink.
+
+The low-level data exchange interface is provided by the new socket family
+PF_NFC. The NFC_SOCKPROTO_RAW performs raw communication with NFC targets.
+
+
+ +--------------------------------------+
+ | USER SPACE |
+ +--------------------------------------+
+ ^ ^
+ | low-level | control
+ | data exchange | operations
+ | |
+ | v
+ | +-----------+
+ | AF_NFC | netlink |
+ | socket +-----------+
+ | raw ^
+ | |
+ v v
+ +---------+ +-----------+
+ | rawsock | <--------> | core |
+ +---------+ +-----------+
+ ^
+ |
+ v
+ +-----------+
+ | driver |
+ +-----------+
+
+Device Driver Interface
+-----------------------
+
+When registering on the NFC subsystem, the device driver must inform the core
+of the set of supported NFC protocols and the set of ops callbacks. The ops
+callbacks that must be implemented are the following:
+
+* start_poll - setup the device to poll for targets
+* stop_poll - stop on progress polling operation
+* activate_target - select and initialize one of the targets found
+* deactivate_target - deselect and deinitialize the selected target
+* data_exchange - send data and receive the response (transceive operation)
+
+Userspace interface
+--------------------
+
+The userspace interface is divided in control operations and low-level data
+exchange operation.
+
+CONTROL OPERATIONS:
+
+Generic netlink is used to implement the interface to the control operations.
+The operations are composed by commands and events, all listed below:
+
+* NFC_CMD_GET_DEVICE - get specific device info or dump the device list
+* NFC_CMD_START_POLL - setup a specific device to polling for targets
+* NFC_CMD_STOP_POLL - stop the polling operation in a specific device
+* NFC_CMD_GET_TARGET - dump the list of targets found by a specific device
+
+* NFC_EVENT_DEVICE_ADDED - reports an NFC device addition
+* NFC_EVENT_DEVICE_REMOVED - reports an NFC device removal
+* NFC_EVENT_TARGETS_FOUND - reports START_POLL results when 1 or more targets
+are found
+
+The user must call START_POLL to poll for NFC targets, passing the desired NFC
+protocols through NFC_ATTR_PROTOCOLS attribute. The device remains in polling
+state until it finds any target. However, the user can stop the polling
+operation by calling STOP_POLL command. In this case, it will be checked if
+the requester of STOP_POLL is the same of START_POLL.
+
+If the polling operation finds one or more targets, the event TARGETS_FOUND is
+sent (including the device id). The user must call GET_TARGET to get the list of
+all targets found by such device. Each reply message has target attributes with
+relevant information such as the supported NFC protocols.
+
+All polling operations requested through one netlink socket are stopped when
+it's closed.
+
+LOW-LEVEL DATA EXCHANGE:
+
+The userspace must use PF_NFC sockets to perform any data communication with
+targets. All NFC sockets use AF_NFC:
+
+struct sockaddr_nfc {
+ sa_family_t sa_family;
+ __u32 dev_idx;
+ __u32 target_idx;
+ __u32 nfc_protocol;
+};
+
+To establish a connection with one target, the user must create an
+NFC_SOCKPROTO_RAW socket and call the 'connect' syscall with the sockaddr_nfc
+struct correctly filled. All information comes from NFC_EVENT_TARGETS_FOUND
+netlink event. As a target can support more than one NFC protocol, the user
+must inform which protocol it wants to use.
+
+Internally, 'connect' will result in an activate_target call to the driver.
+When the socket is closed, the target is deactivated.
+
+The data format exchanged through the sockets is NFC protocol dependent. For
+instance, when communicating with MIFARE tags, the data exchanged are MIFARE
+commands and their responses.
+
+The first received package is the response to the first sent package and so
+on. In order to allow valid "empty" responses, every data received has a NULL
+header of 1 byte.
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
new file mode 100644
index 000000000..b3b9ac61d
--- /dev/null
+++ b/Documentation/networking/openvswitch.txt
@@ -0,0 +1,248 @@
+Open vSwitch datapath developer documentation
+=============================================
+
+The Open vSwitch kernel module allows flexible userspace control over
+flow-level packet processing on selected network devices. It can be
+used to implement a plain Ethernet switch, network device bonding,
+VLAN processing, network access control, flow-based network control,
+and so on.
+
+The kernel module implements multiple "datapaths" (analogous to
+bridges), each of which can have multiple "vports" (analogous to ports
+within a bridge). Each datapath also has associated with it a "flow
+table" that userspace populates with "flows" that map from keys based
+on packet headers and metadata to sets of actions. The most common
+action forwards the packet to another vport; other actions are also
+implemented.
+
+When a packet arrives on a vport, the kernel module processes it by
+extracting its flow key and looking it up in the flow table. If there
+is a matching flow, it executes the associated actions. If there is
+no match, it queues the packet to userspace for processing (as part of
+its processing, userspace will likely set up a flow to handle further
+packets of the same type entirely in-kernel).
+
+
+Flow key compatibility
+----------------------
+
+Network protocols evolve over time. New protocols become important
+and existing protocols lose their prominence. For the Open vSwitch
+kernel module to remain relevant, it must be possible for newer
+versions to parse additional protocols as part of the flow key. It
+might even be desirable, someday, to drop support for parsing
+protocols that have become obsolete. Therefore, the Netlink interface
+to Open vSwitch is designed to allow carefully written userspace
+applications to work with any version of the flow key, past or future.
+
+To support this forward and backward compatibility, whenever the
+kernel module passes a packet to userspace, it also passes along the
+flow key that it parsed from the packet. Userspace then extracts its
+own notion of a flow key from the packet and compares it against the
+kernel-provided version:
+
+ - If userspace's notion of the flow key for the packet matches the
+ kernel's, then nothing special is necessary.
+
+ - If the kernel's flow key includes more fields than the userspace
+ version of the flow key, for example if the kernel decoded IPv6
+ headers but userspace stopped at the Ethernet type (because it
+ does not understand IPv6), then again nothing special is
+ necessary. Userspace can still set up a flow in the usual way,
+ as long as it uses the kernel-provided flow key to do it.
+
+ - If the userspace flow key includes more fields than the
+ kernel's, for example if userspace decoded an IPv6 header but
+ the kernel stopped at the Ethernet type, then userspace can
+ forward the packet manually, without setting up a flow in the
+ kernel. This case is bad for performance because every packet
+ that the kernel considers part of the flow must go to userspace,
+ but the forwarding behavior is correct. (If userspace can
+ determine that the values of the extra fields would not affect
+ forwarding behavior, then it could set up a flow anyway.)
+
+How flow keys evolve over time is important to making this work, so
+the following sections go into detail.
+
+
+Flow key format
+---------------
+
+A flow key is passed over a Netlink socket as a sequence of Netlink
+attributes. Some attributes represent packet metadata, defined as any
+information about a packet that cannot be extracted from the packet
+itself, e.g. the vport on which the packet was received. Most
+attributes, however, are extracted from headers within the packet,
+e.g. source and destination addresses from Ethernet, IP, or TCP
+headers.
+
+The <linux/openvswitch.h> header file defines the exact format of the
+flow key attributes. For informal explanatory purposes here, we write
+them as comma-separated strings, with parentheses indicating arguments
+and nesting. For example, the following could represent a flow key
+corresponding to a TCP packet that arrived on vport 1:
+
+ in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4),
+ eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0,
+ frag=no), tcp(src=49163, dst=80)
+
+Often we ellipsize arguments not important to the discussion, e.g.:
+
+ in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
+
+
+Wildcarded flow key format
+--------------------------
+
+A wildcarded flow is described with two sequences of Netlink attributes
+passed over the Netlink socket. A flow key, exactly as described above, and an
+optional corresponding flow mask.
+
+A wildcarded flow can represent a group of exact match flows. Each '1' bit
+in the mask specifies a exact match with the corresponding bit in the flow key.
+A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
+of a incoming packet. Using wildcarded flow can improve the flow set up rate
+by reduce the number of new flows need to be processed by the user space program.
+
+Support for the mask Netlink attribute is optional for both the kernel and user
+space program. The kernel can ignore the mask attribute, installing an exact
+match flow, or reduce the number of don't care bits in the kernel to less than
+what was specified by the user space program. In this case, variations in bits
+that the kernel does not implement will simply result in additional flow setups.
+The kernel module will also work with user space programs that neither support
+nor supply flow mask attributes.
+
+Since the kernel may ignore or modify wildcard bits, it can be difficult for
+the userspace program to know exactly what matches are installed. There are
+two possible approaches: reactively install flows as they miss the kernel
+flow table (and therefore not attempt to determine wildcard changes at all)
+or use the kernel's response messages to determine the installed wildcards.
+
+When interacting with userspace, the kernel should maintain the match portion
+of the key exactly as originally installed. This will provides a handle to
+identify the flow for all future operations. However, when reporting the
+mask of an installed flow, the mask should include any restrictions imposed
+by the kernel.
+
+The behavior when using overlapping wildcarded flows is undefined. It is the
+responsibility of the user space program to ensure that any incoming packet
+can match at most one flow, wildcarded or not. The current implementation
+performs best-effort detection of overlapping wildcarded flows and may reject
+some but not all of them. However, this behavior may change in future versions.
+
+
+Unique flow identifiers
+-----------------------
+
+An alternative to using the original match portion of a key as the handle for
+flow identification is a unique flow identifier, or "UFID". UFIDs are optional
+for both the kernel and user space program.
+
+User space programs that support UFID are expected to provide it during flow
+setup in addition to the flow, then refer to the flow using the UFID for all
+future operations. The kernel is not required to index flows by the original
+flow key if a UFID is specified.
+
+
+Basic rule for evolving flow keys
+---------------------------------
+
+Some care is needed to really maintain forward and backward
+compatibility for applications that follow the rules listed under
+"Flow key compatibility" above.
+
+The basic rule is obvious:
+
+ ------------------------------------------------------------------
+ New network protocol support must only supplement existing flow
+ key attributes. It must not change the meaning of already defined
+ flow key attributes.
+ ------------------------------------------------------------------
+
+This rule does have less-obvious consequences so it is worth working
+through a few examples. Suppose, for example, that the kernel module
+did not already implement VLAN parsing. Instead, it just interpreted
+the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the
+packet. The flow key for any packet with an 802.1Q header would look
+essentially like this, ignoring metadata:
+
+ eth(...), eth_type(0x8100)
+
+Naively, to add VLAN support, it makes sense to add a new "vlan" flow
+key attribute to contain the VLAN tag, then continue to decode the
+encapsulated headers beyond the VLAN tag using the existing field
+definitions. With this change, a TCP packet in VLAN 10 would have a
+flow key much like this:
+
+ eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...)
+
+But this change would negatively affect a userspace application that
+has not been updated to understand the new "vlan" flow key attribute.
+The application could, following the flow compatibility rules above,
+ignore the "vlan" attribute that it does not understand and therefore
+assume that the flow contained IP packets. This is a bad assumption
+(the flow only contains IP packets if one parses and skips over the
+802.1Q header) and it could cause the application's behavior to change
+across kernel versions even though it follows the compatibility rules.
+
+The solution is to use a set of nested attributes. This is, for
+example, why 802.1Q support uses nested attributes. A TCP packet in
+VLAN 10 is actually expressed as:
+
+ eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800),
+ ip(proto=6, ...), tcp(...)))
+
+Notice how the "eth_type", "ip", and "tcp" flow key attributes are
+nested inside the "encap" attribute. Thus, an application that does
+not understand the "vlan" key will not see either of those attributes
+and therefore will not misinterpret them. (Also, the outer eth_type
+is still 0x8100, not changed to 0x0800.)
+
+Handling malformed packets
+--------------------------
+
+Don't drop packets in the kernel for malformed protocol headers, bad
+checksums, etc. This would prevent userspace from implementing a
+simple Ethernet switch that forwards every packet.
+
+Instead, in such a case, include an attribute with "empty" content.
+It doesn't matter if the empty content could be valid protocol values,
+as long as those values are rarely seen in practice, because userspace
+can always forward all packets with those values to userspace and
+handle them individually.
+
+For example, consider a packet that contains an IP header that
+indicates protocol 6 for TCP, but which is truncated just after the IP
+header, so that the TCP header is missing. The flow key for this
+packet would include a tcp attribute with all-zero src and dst, like
+this:
+
+ eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0)
+
+As another example, consider a packet with an Ethernet type of 0x8100,
+indicating that a VLAN TCI should follow, but which is truncated just
+after the Ethernet type. The flow key for this packet would include
+an all-zero-bits vlan and an empty encap attribute, like this:
+
+ eth(...), eth_type(0x8100), vlan(0), encap()
+
+Unlike a TCP packet with source and destination ports 0, an
+all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka
+VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan
+attribute expressly to allow this situation to be distinguished.
+Thus, the flow key in this second example unambiguously indicates a
+missing or malformed VLAN TCI.
+
+Other rules
+-----------
+
+The other rules for flow keys are much less subtle:
+
+ - Duplicate attributes are not allowed at a given nesting level.
+
+ - Ordering of attributes is not significant.
+
+ - When the kernel sends a given flow key to userspace, it always
+ composes it the same way. This allows userspace to hash and
+ compare entire flow keys that it may not be able to fully
+ interpret.
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt
new file mode 100644
index 000000000..355c6d8ef
--- /dev/null
+++ b/Documentation/networking/operstates.txt
@@ -0,0 +1,162 @@
+
+1. Introduction
+
+Linux distinguishes between administrative and operational state of an
+interface. Administrative state is the result of "ip link set dev
+<dev> up or down" and reflects whether the administrator wants to use
+the device for traffic.
+
+However, an interface is not usable just because the admin enabled it
+- ethernet requires to be plugged into the switch and, depending on
+a site's networking policy and configuration, an 802.1X authentication
+to be performed before user data can be transferred. Operational state
+shows the ability of an interface to transmit this user data.
+
+Thanks to 802.1X, userspace must be granted the possibility to
+influence operational state. To accommodate this, operational state is
+split into two parts: Two flags that can be set by the driver only, and
+a RFC2863 compatible state that is derived from these flags, a policy,
+and changeable from userspace under certain rules.
+
+
+2. Querying from userspace
+
+Both admin and operational state can be queried via the netlink
+operation RTM_GETLINK. It is also possible to subscribe to RTMGRP_LINK
+to be notified of updates. This is important for setting from userspace.
+
+These values contain interface state:
+
+ifinfomsg::if_flags & IFF_UP:
+ Interface is admin up
+ifinfomsg::if_flags & IFF_RUNNING:
+ Interface is in RFC2863 operational state UP or UNKNOWN. This is for
+ backward compatibility, routing daemons, dhcp clients can use this
+ flag to determine whether they should use the interface.
+ifinfomsg::if_flags & IFF_LOWER_UP:
+ Driver has signaled netif_carrier_on()
+ifinfomsg::if_flags & IFF_DORMANT:
+ Driver has signaled netif_dormant_on()
+
+TLV IFLA_OPERSTATE
+
+contains RFC2863 state of the interface in numeric representation:
+
+IF_OPER_UNKNOWN (0):
+ Interface is in unknown state, neither driver nor userspace has set
+ operational state. Interface must be considered for user data as
+ setting operational state has not been implemented in every driver.
+IF_OPER_NOTPRESENT (1):
+ Unused in current kernel (notpresent interfaces normally disappear),
+ just a numerical placeholder.
+IF_OPER_DOWN (2):
+ Interface is unable to transfer data on L1, f.e. ethernet is not
+ plugged or interface is ADMIN down.
+IF_OPER_LOWERLAYERDOWN (3):
+ Interfaces stacked on an interface that is IF_OPER_DOWN show this
+ state (f.e. VLAN).
+IF_OPER_TESTING (4):
+ Unused in current kernel.
+IF_OPER_DORMANT (5):
+ Interface is L1 up, but waiting for an external event, f.e. for a
+ protocol to establish. (802.1X)
+IF_OPER_UP (6):
+ Interface is operational up and can be used.
+
+This TLV can also be queried via sysfs.
+
+TLV IFLA_LINKMODE
+
+contains link policy. This is needed for userspace interaction
+described below.
+
+This TLV can also be queried via sysfs.
+
+
+3. Kernel driver API
+
+Kernel drivers have access to two flags that map to IFF_LOWER_UP and
+IFF_DORMANT. These flags can be set from everywhere, even from
+interrupts. It is guaranteed that only the driver has write access,
+however, if different layers of the driver manipulate the same flag,
+the driver has to provide the synchronisation needed.
+
+__LINK_STATE_NOCARRIER, maps to !IFF_LOWER_UP:
+
+The driver uses netif_carrier_on() to clear and netif_carrier_off() to
+set this flag. On netif_carrier_off(), the scheduler stops sending
+packets. The name 'carrier' and the inversion are historical, think of
+it as lower layer.
+
+Note that for certain kind of soft-devices, which are not managing any
+real hardware, it is possible to set this bit from userspace. One
+should use TVL IFLA_CARRIER to do so.
+
+netif_carrier_ok() can be used to query that bit.
+
+__LINK_STATE_DORMANT, maps to IFF_DORMANT:
+
+Set by the driver to express that the device cannot yet be used
+because some driver controlled protocol establishment has to
+complete. Corresponding functions are netif_dormant_on() to set the
+flag, netif_dormant_off() to clear it and netif_dormant() to query.
+
+On device allocation, networking core sets the flags equivalent to
+netif_carrier_ok() and !netif_dormant().
+
+
+Whenever the driver CHANGES one of these flags, a workqueue event is
+scheduled to translate the flag combination to IFLA_OPERSTATE as
+follows:
+
+!netif_carrier_ok():
+ IF_OPER_LOWERLAYERDOWN if the interface is stacked, IF_OPER_DOWN
+ otherwise. Kernel can recognise stacked interfaces because their
+ ifindex != iflink.
+
+netif_carrier_ok() && netif_dormant():
+ IF_OPER_DORMANT
+
+netif_carrier_ok() && !netif_dormant():
+ IF_OPER_UP if userspace interaction is disabled. Otherwise
+ IF_OPER_DORMANT with the possibility for userspace to initiate the
+ IF_OPER_UP transition afterwards.
+
+
+4. Setting from userspace
+
+Applications have to use the netlink interface to influence the
+RFC2863 operational state of an interface. Setting IFLA_LINKMODE to 1
+via RTM_SETLINK instructs the kernel that an interface should go to
+IF_OPER_DORMANT instead of IF_OPER_UP when the combination
+netif_carrier_ok() && !netif_dormant() is set by the
+driver. Afterwards, the userspace application can set IFLA_OPERSTATE
+to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set
+netif_carrier_off() or netif_dormant_on(). Changes made by userspace
+are multicasted on the netlink group RTMGRP_LINK.
+
+So basically a 802.1X supplicant interacts with the kernel like this:
+
+-subscribe to RTMGRP_LINK
+-set IFLA_LINKMODE to 1 via RTM_SETLINK
+-query RTM_GETLINK once to get initial state
+-if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until
+ netlink multicast signals this state
+-do 802.1X, eventually abort if flags go down again
+-send RTM_SETLINK to set operstate to IF_OPER_UP if authentication
+ succeeds, IF_OPER_DORMANT otherwise
+-see how operstate and IFF_RUNNING is echoed via netlink multicast
+-set interface back to IF_OPER_DORMANT if 802.1X reauthentication
+ fails
+-restart if kernel changes IFF_LOWER_UP or IFF_DORMANT flag
+
+if supplicant goes down, bring back IFLA_LINKMODE to 0 and
+IFLA_OPERSTATE to a sane value.
+
+A routing daemon or dhcp client just needs to care for IFF_RUNNING or
+waiting for operstate to go IF_OPER_UP/IF_OPER_UNKNOWN before
+considering the interface / querying a DHCP address.
+
+
+For technical questions and/or comments please e-mail to Stefan Rompf
+(stefan at loplof.de).
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
new file mode 100644
index 000000000..daa015af1
--- /dev/null
+++ b/Documentation/networking/packet_mmap.txt
@@ -0,0 +1,1068 @@
+--------------------------------------------------------------------------------
++ ABSTRACT
+--------------------------------------------------------------------------------
+
+This file documents the mmap() facility available with the PACKET
+socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
+i) capture network traffic with utilities like tcpdump, ii) transmit network
+traffic, or any other that needs raw access to network interface.
+
+You can find the latest version of this document at:
+ http://wiki.ipxwarzone.com/index.php5?title=Linux_packet_mmap
+
+Howto can be found at:
+ http://wiki.gnu-log.net (packet_mmap)
+
+Please send your comments to
+ Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
+ Johann Baudy <johann.baudy@gnu-log.net>
+
+-------------------------------------------------------------------------------
++ Why use PACKET_MMAP
+--------------------------------------------------------------------------------
+
+In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very
+inefficient. It uses very limited buffers and requires one system call to
+capture each packet, it requires two if you want to get packet's timestamp
+(like libpcap always does).
+
+In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size
+configurable circular buffer mapped in user space that can be used to either
+send or receive packets. This way reading packets just needs to wait for them,
+most of the time there is no need to issue a single system call. Concerning
+transmission, multiple packets can be sent through one system call to get the
+highest bandwidth. By using a shared buffer between the kernel and the user
+also has the benefit of minimizing packet copies.
+
+It's fine to use PACKET_MMAP to improve the performance of the capture and
+transmission process, but it isn't everything. At least, if you are capturing
+at high speeds (this is relative to the cpu speed), you should check if the
+device driver of your network interface card supports some sort of interrupt
+load mitigation or (even better) if it supports NAPI, also make sure it is
+enabled. For transmission, check the MTU (Maximum Transmission Unit) used and
+supported by devices of your network. CPU IRQ pinning of your network interface
+card can also be an advantage.
+
+--------------------------------------------------------------------------------
++ How to use mmap() to improve capture process
+--------------------------------------------------------------------------------
+
+From the user standpoint, you should use the higher level libpcap library, which
+is a de facto standard, portable across nearly all operating systems
+including Win32.
+
+Said that, at time of this writing, official libpcap 0.8.1 is out and doesn't include
+support for PACKET_MMAP, and also probably the libpcap included in your distribution.
+
+I'm aware of two implementations of PACKET_MMAP in libpcap:
+
+ http://wiki.ipxwarzone.com/ (by Simon Patarin, based on libpcap 0.6.2)
+ http://public.lanl.gov/cpw/ (by Phil Wood, based on lastest libpcap)
+
+The rest of this document is intended for people who want to understand
+the low level details or want to improve libpcap by including PACKET_MMAP
+support.
+
+--------------------------------------------------------------------------------
++ How to use mmap() directly to improve capture process
+--------------------------------------------------------------------------------
+
+From the system calls stand point, the use of PACKET_MMAP involves
+the following process:
+
+
+[setup] socket() -------> creation of the capture socket
+ setsockopt() ---> allocation of the circular buffer (ring)
+ option: PACKET_RX_RING
+ mmap() ---------> mapping of the allocated buffer to the
+ user process
+
+[capture] poll() ---------> to wait for incoming packets
+
+[shutdown] close() --------> destruction of the capture socket and
+ deallocation of all associated
+ resources.
+
+
+socket creation and destruction is straight forward, and is done
+the same way with or without PACKET_MMAP:
+
+ int fd = socket(PF_PACKET, mode, htons(ETH_P_ALL));
+
+where mode is SOCK_RAW for the raw interface were link level
+information can be captured or SOCK_DGRAM for the cooked
+interface where link level information capture is not
+supported and a link level pseudo-header is provided
+by the kernel.
+
+The destruction of the socket and all associated resources
+is done by a simple call to close(fd).
+
+Similarly as without PACKET_MMAP, it is possible to use one socket
+for capture and transmission. This can be done by mapping the
+allocated RX and TX buffer ring with a single mmap() call.
+See "Mapping and use of the circular buffer (ring)".
+
+Next I will describe PACKET_MMAP settings and its constraints,
+also the mapping of the circular buffer in the user process and
+the use of this buffer.
+
+--------------------------------------------------------------------------------
++ How to use mmap() directly to improve transmission process
+--------------------------------------------------------------------------------
+Transmission process is similar to capture as shown below.
+
+[setup] socket() -------> creation of the transmission socket
+ setsockopt() ---> allocation of the circular buffer (ring)
+ option: PACKET_TX_RING
+ bind() ---------> bind transmission socket with a network interface
+ mmap() ---------> mapping of the allocated buffer to the
+ user process
+
+[transmission] poll() ---------> wait for free packets (optional)
+ send() ---------> send all packets that are set as ready in
+ the ring
+ The flag MSG_DONTWAIT can be used to return
+ before end of transfer.
+
+[shutdown] close() --------> destruction of the transmission socket and
+ deallocation of all associated resources.
+
+Socket creation and destruction is also straight forward, and is done
+the same way as in capturing described in the previous paragraph:
+
+ int fd = socket(PF_PACKET, mode, 0);
+
+The protocol can optionally be 0 in case we only want to transmit
+via this socket, which avoids an expensive call to packet_rcv().
+In this case, you also need to bind(2) the TX_RING with sll_protocol = 0
+set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example.
+
+Binding the socket to your network interface is mandatory (with zero copy) to
+know the header size of frames used in the circular buffer.
+
+As capture, each frame contains two parts:
+
+ --------------------
+| struct tpacket_hdr | Header. It contains the status of
+| | of this frame
+|--------------------|
+| data buffer |
+. . Data that will be sent over the network interface.
+. .
+ --------------------
+
+ bind() associates the socket to your network interface thanks to
+ sll_ifindex parameter of struct sockaddr_ll.
+
+ Initialization example:
+
+ struct sockaddr_ll my_addr;
+ struct ifreq s_ifr;
+ ...
+
+ strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
+
+ /* get interface index of eth0 */
+ ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
+
+ /* fill sockaddr_ll struct to prepare binding */
+ my_addr.sll_family = AF_PACKET;
+ my_addr.sll_protocol = htons(ETH_P_ALL);
+ my_addr.sll_ifindex = s_ifr.ifr_ifindex;
+
+ /* bind socket to eth0 */
+ bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
+
+ A complete tutorial is available at: http://wiki.gnu-log.net/
+
+By default, the user should put data at :
+ frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
+
+So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW),
+the beginning of the user data will be at :
+ frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
+
+If you wish to put user data at a custom offset from the beginning of
+the frame (for payload alignment with SOCK_RAW mode for instance) you
+can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order
+to make this work it must be enabled previously with setsockopt()
+and the PACKET_TX_HAS_OFF option.
+
+--------------------------------------------------------------------------------
++ PACKET_MMAP settings
+--------------------------------------------------------------------------------
+
+To setup PACKET_MMAP from user level code is done with a call like
+
+ - Capture process
+ setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req))
+ - Transmission process
+ setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req))
+
+The most significant argument in the previous call is the req parameter,
+this parameter must to have the following structure:
+
+ struct tpacket_req
+ {
+ unsigned int tp_block_size; /* Minimal size of contiguous block */
+ unsigned int tp_block_nr; /* Number of blocks */
+ unsigned int tp_frame_size; /* Size of frame */
+ unsigned int tp_frame_nr; /* Total number of frames */
+ };
+
+This structure is defined in /usr/include/linux/if_packet.h and establishes a
+circular buffer (ring) of unswappable memory.
+Being mapped in the capture process allows reading the captured frames and
+related meta-information like timestamps without requiring a system call.
+
+Frames are grouped in blocks. Each block is a physically contiguous
+region of memory and holds tp_block_size/tp_frame_size frames. The total number
+of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because
+
+ frames_per_block = tp_block_size/tp_frame_size
+
+indeed, packet_set_ring checks that the following condition is true
+
+ frames_per_block * tp_block_nr == tp_frame_nr
+
+Lets see an example, with the following values:
+
+ tp_block_size= 4096
+ tp_frame_size= 2048
+ tp_block_nr = 4
+ tp_frame_nr = 8
+
+we will get the following buffer structure:
+
+ block #1 block #2
++---------+---------+ +---------+---------+
+| frame 1 | frame 2 | | frame 3 | frame 4 |
++---------+---------+ +---------+---------+
+
+ block #3 block #4
++---------+---------+ +---------+---------+
+| frame 5 | frame 6 | | frame 7 | frame 8 |
++---------+---------+ +---------+---------+
+
+A frame can be of any size with the only condition it can fit in a block. A block
+can only hold an integer number of frames, or in other words, a frame cannot
+be spawned across two blocks, so there are some details you have to take into
+account when choosing the frame_size. See "Mapping and use of the circular
+buffer (ring)".
+
+--------------------------------------------------------------------------------
++ PACKET_MMAP setting constraints
+--------------------------------------------------------------------------------
+
+In kernel versions prior to 2.4.26 (for the 2.4 branch) and 2.6.5 (2.6 branch),
+the PACKET_MMAP buffer could hold only 32768 frames in a 32 bit architecture or
+16384 in a 64 bit architecture. For information on these kernel versions
+see http://pusa.uv.es/~ulisses/packet_mmap/packet_mmap.pre-2.4.26_2.6.5.txt
+
+ Block size limit
+------------------
+
+As stated earlier, each block is a contiguous physical region of memory. These
+memory regions are allocated with calls to the __get_free_pages() function. As
+the name indicates, this function allocates pages of memory, and the second
+argument is "order" or a power of two number of pages, that is
+(for PAGE_SIZE == 4096) order=0 ==> 4096 bytes, order=1 ==> 8192 bytes,
+order=2 ==> 16384 bytes, etc. The maximum size of a
+region allocated by __get_free_pages is determined by the MAX_ORDER macro. More
+precisely the limit can be calculated as:
+
+ PAGE_SIZE << MAX_ORDER
+
+ In a i386 architecture PAGE_SIZE is 4096 bytes
+ In a 2.4/i386 kernel MAX_ORDER is 10
+ In a 2.6/i386 kernel MAX_ORDER is 11
+
+So get_free_pages can allocate as much as 4MB or 8MB in a 2.4/2.6 kernel
+respectively, with an i386 architecture.
+
+User space programs can include /usr/include/sys/user.h and
+/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_ORDER declarations.
+
+The pagesize can also be determined dynamically with the getpagesize (2)
+system call.
+
+ Block number limit
+--------------------
+
+To understand the constraints of PACKET_MMAP, we have to see the structure
+used to hold the pointers to each block.
+
+Currently, this structure is a dynamically allocated vector with kmalloc
+called pg_vec, its size limits the number of blocks that can be allocated.
+
+ +---+---+---+---+
+ | x | x | x | x |
+ +---+---+---+---+
+ | | | |
+ | | | v
+ | | v block #4
+ | v block #3
+ v block #2
+ block #1
+
+kmalloc allocates any number of bytes of physically contiguous memory from
+a pool of pre-determined sizes. This pool of memory is maintained by the slab
+allocator which is at the end the responsible for doing the allocation and
+hence which imposes the maximum memory that kmalloc can allocate.
+
+In a 2.4/2.6 kernel and the i386 architecture, the limit is 131072 bytes. The
+predetermined sizes that kmalloc uses can be checked in the "size-<bytes>"
+entries of /proc/slabinfo
+
+In a 32 bit architecture, pointers are 4 bytes long, so the total number of
+pointers to blocks is
+
+ 131072/4 = 32768 blocks
+
+ PACKET_MMAP buffer size calculator
+------------------------------------
+
+Definitions:
+
+<size-max> : is the maximum size of allocable with kmalloc (see /proc/slabinfo)
+<pointer size>: depends on the architecture -- sizeof(void *)
+<page size> : depends on the architecture -- PAGE_SIZE or getpagesize (2)
+<max-order> : is the value defined with MAX_ORDER
+<frame size> : it's an upper bound of frame's capture size (more on this later)
+
+from these definitions we will derive
+
+ <block number> = <size-max>/<pointer size>
+ <block size> = <pagesize> << <max-order>
+
+so, the max buffer size is
+
+ <block number> * <block size>
+
+and, the number of frames be
+
+ <block number> * <block size> / <frame size>
+
+Suppose the following parameters, which apply for 2.6 kernel and an
+i386 architecture:
+
+ <size-max> = 131072 bytes
+ <pointer size> = 4 bytes
+ <pagesize> = 4096 bytes
+ <max-order> = 11
+
+and a value for <frame size> of 2048 bytes. These parameters will yield
+
+ <block number> = 131072/4 = 32768 blocks
+ <block size> = 4096 << 11 = 8 MiB.
+
+and hence the buffer will have a 262144 MiB size. So it can hold
+262144 MiB / 2048 bytes = 134217728 frames
+
+Actually, this buffer size is not possible with an i386 architecture.
+Remember that the memory is allocated in kernel space, in the case of
+an i386 kernel's memory size is limited to 1GiB.
+
+All memory allocations are not freed until the socket is closed. The memory
+allocations are done with GFP_KERNEL priority, this basically means that
+the allocation can wait and swap other process' memory in order to allocate
+the necessary memory, so normally limits can be reached.
+
+ Other constraints
+-------------------
+
+If you check the source code you will see that what I draw here as a frame
+is not only the link level frame. At the beginning of each frame there is a
+header called struct tpacket_hdr used in PACKET_MMAP to hold link level's frame
+meta information like timestamp. So what we draw here a frame it's really
+the following (from include/linux/if_packet.h):
+
+/*
+ Frame structure:
+
+ - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
+ - struct tpacket_hdr
+ - pad to TPACKET_ALIGNMENT=16
+ - struct sockaddr_ll
+ - Gap, chosen so that packet data (Start+tp_net) aligns to
+ TPACKET_ALIGNMENT=16
+ - Start+tp_mac: [ Optional MAC header ]
+ - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
+ - Pad to align to TPACKET_ALIGNMENT=16
+ */
+
+ The following are conditions that are checked in packet_set_ring
+
+ tp_block_size must be a multiple of PAGE_SIZE (1)
+ tp_frame_size must be greater than TPACKET_HDRLEN (obvious)
+ tp_frame_size must be a multiple of TPACKET_ALIGNMENT
+ tp_frame_nr must be exactly frames_per_block*tp_block_nr
+
+Note that tp_block_size should be chosen to be a power of two or there will
+be a waste of memory.
+
+--------------------------------------------------------------------------------
++ Mapping and use of the circular buffer (ring)
+--------------------------------------------------------------------------------
+
+The mapping of the buffer in the user process is done with the conventional
+mmap function. Even the circular buffer is compound of several physically
+discontiguous blocks of memory, they are contiguous to the user space, hence
+just one call to mmap is needed:
+
+ mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+
+If tp_frame_size is a divisor of tp_block_size frames will be
+contiguously spaced by tp_frame_size bytes. If not, each
+tp_block_size/tp_frame_size frames there will be a gap between
+the frames. This is because a frame cannot be spawn across two
+blocks.
+
+To use one socket for capture and transmission, the mapping of both the
+RX and TX buffer ring has to be done with one call to mmap:
+
+ ...
+ setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &foo, sizeof(foo));
+ setsockopt(fd, SOL_PACKET, PACKET_TX_RING, &bar, sizeof(bar));
+ ...
+ rx_ring = mmap(0, size * 2, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ tx_ring = rx_ring + size;
+
+RX must be the first as the kernel maps the TX ring memory right
+after the RX one.
+
+At the beginning of each frame there is an status field (see
+struct tpacket_hdr). If this field is 0 means that the frame is ready
+to be used for the kernel, If not, there is a frame the user can read
+and the following flags apply:
+
++++ Capture process:
+ from include/linux/if_packet.h
+
+ #define TP_STATUS_COPY (1 << 1)
+ #define TP_STATUS_LOSING (1 << 2)
+ #define TP_STATUS_CSUMNOTREADY (1 << 3)
+ #define TP_STATUS_CSUM_VALID (1 << 7)
+
+TP_STATUS_COPY : This flag indicates that the frame (and associated
+ meta information) has been truncated because it's
+ larger than tp_frame_size. This packet can be
+ read entirely with recvfrom().
+
+ In order to make this work it must to be
+ enabled previously with setsockopt() and
+ the PACKET_COPY_THRESH option.
+
+ The number of frames that can be buffered to
+ be read with recvfrom is limited like a normal socket.
+ See the SO_RCVBUF option in the socket (7) man page.
+
+TP_STATUS_LOSING : indicates there were packet drops from last time
+ statistics where checked with getsockopt() and
+ the PACKET_STATISTICS option.
+
+TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which
+ its checksum will be done in hardware. So while
+ reading the packet we should not try to check the
+ checksum.
+
+TP_STATUS_CSUM_VALID : This flag indicates that at least the transport
+ header checksum of the packet has been already
+ validated on the kernel side. If the flag is not set
+ then we are free to check the checksum by ourselves
+ provided that TP_STATUS_CSUMNOTREADY is also not set.
+
+for convenience there are also the following defines:
+
+ #define TP_STATUS_KERNEL 0
+ #define TP_STATUS_USER 1
+
+The kernel initializes all frames to TP_STATUS_KERNEL, when the kernel
+receives a packet it puts in the buffer and updates the status with
+at least the TP_STATUS_USER flag. Then the user can read the packet,
+once the packet is read the user must zero the status field, so the kernel
+can use again that frame buffer.
+
+The user can use poll (any other variant should apply too) to check if new
+packets are in the ring:
+
+ struct pollfd pfd;
+
+ pfd.fd = fd;
+ pfd.revents = 0;
+ pfd.events = POLLIN|POLLRDNORM|POLLERR;
+
+ if (status == TP_STATUS_KERNEL)
+ retval = poll(&pfd, 1, timeout);
+
+It doesn't incur in a race condition to first check the status value and
+then poll for frames.
+
+++ Transmission process
+Those defines are also used for transmission:
+
+ #define TP_STATUS_AVAILABLE 0 // Frame is available
+ #define TP_STATUS_SEND_REQUEST 1 // Frame will be sent on next send()
+ #define TP_STATUS_SENDING 2 // Frame is currently in transmission
+ #define TP_STATUS_WRONG_FORMAT 4 // Frame format is not correct
+
+First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a
+packet, the user fills a data buffer of an available frame, sets tp_len to
+current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST.
+This can be done on multiple frames. Once the user is ready to transmit, it
+calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are
+forwarded to the network device. The kernel updates each status of sent
+frames with TP_STATUS_SENDING until the end of transfer.
+At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE.
+
+ header->tp_len = in_i_size;
+ header->tp_status = TP_STATUS_SEND_REQUEST;
+ retval = send(this->socket, NULL, 0, 0);
+
+The user can also use poll() to check if a buffer is available:
+(status == TP_STATUS_SENDING)
+
+ struct pollfd pfd;
+ pfd.fd = fd;
+ pfd.revents = 0;
+ pfd.events = POLLOUT;
+ retval = poll(&pfd, 1, timeout);
+
+-------------------------------------------------------------------------------
++ What TPACKET versions are available and when to use them?
+-------------------------------------------------------------------------------
+
+ int val = tpacket_version;
+ setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val));
+ getsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val));
+
+where 'tpacket_version' can be TPACKET_V1 (default), TPACKET_V2, TPACKET_V3.
+
+TPACKET_V1:
+ - Default if not otherwise specified by setsockopt(2)
+ - RX_RING, TX_RING available
+
+TPACKET_V1 --> TPACKET_V2:
+ - Made 64 bit clean due to unsigned long usage in TPACKET_V1
+ structures, thus this also works on 64 bit kernel with 32 bit
+ userspace and the like
+ - Timestamp resolution in nanoseconds instead of microseconds
+ - RX_RING, TX_RING available
+ - VLAN metadata information available for packets
+ (TP_STATUS_VLAN_VALID, TP_STATUS_VLAN_TPID_VALID),
+ in the tpacket2_hdr structure:
+ - TP_STATUS_VLAN_VALID bit being set into the tp_status field indicates
+ that the tp_vlan_tci field has valid VLAN TCI value
+ - TP_STATUS_VLAN_TPID_VALID bit being set into the tp_status field
+ indicates that the tp_vlan_tpid field has valid VLAN TPID value
+ - How to switch to TPACKET_V2:
+ 1. Replace struct tpacket_hdr by struct tpacket2_hdr
+ 2. Query header len and save
+ 3. Set protocol version to 2, set up ring as usual
+ 4. For getting the sockaddr_ll,
+ use (void *)hdr + TPACKET_ALIGN(hdrlen) instead of
+ (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
+
+TPACKET_V2 --> TPACKET_V3:
+ - Flexible buffer implementation:
+ 1. Blocks can be configured with non-static frame-size
+ 2. Read/poll is at a block-level (as opposed to packet-level)
+ 3. Added poll timeout to avoid indefinite user-space wait
+ on idle links
+ 4. Added user-configurable knobs:
+ 4.1 block::timeout
+ 4.2 tpkt_hdr::sk_rxhash
+ - RX Hash data available in user space
+ - Currently only RX_RING available
+
+-------------------------------------------------------------------------------
++ AF_PACKET fanout mode
+-------------------------------------------------------------------------------
+
+In the AF_PACKET fanout mode, packet reception can be load balanced among
+processes. This also works in combination with mmap(2) on packet sockets.
+
+Currently implemented fanout policies are:
+
+ - PACKET_FANOUT_HASH: schedule to socket by skb's packet hash
+ - PACKET_FANOUT_LB: schedule to socket by round-robin
+ - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on
+ - PACKET_FANOUT_RND: schedule to socket by random selection
+ - PACKET_FANOUT_ROLLOVER: if one socket is full, rollover to another
+ - PACKET_FANOUT_QM: schedule to socket by skbs recorded queue_mapping
+
+Minimal example code by David S. Miller (try things like "./test eth0 hash",
+"./test eth0 lb", etc.):
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <unistd.h>
+
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+
+#include <net/if.h>
+
+static const char *device_name;
+static int fanout_type;
+static int fanout_id;
+
+#ifndef PACKET_FANOUT
+# define PACKET_FANOUT 18
+# define PACKET_FANOUT_HASH 0
+# define PACKET_FANOUT_LB 1
+#endif
+
+static int setup_socket(void)
+{
+ int err, fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+ struct sockaddr_ll ll;
+ struct ifreq ifr;
+ int fanout_arg;
+
+ if (fd < 0) {
+ perror("socket");
+ return EXIT_FAILURE;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, device_name);
+ err = ioctl(fd, SIOCGIFINDEX, &ifr);
+ if (err < 0) {
+ perror("SIOCGIFINDEX");
+ return EXIT_FAILURE;
+ }
+
+ memset(&ll, 0, sizeof(ll));
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = ifr.ifr_ifindex;
+ err = bind(fd, (struct sockaddr *) &ll, sizeof(ll));
+ if (err < 0) {
+ perror("bind");
+ return EXIT_FAILURE;
+ }
+
+ fanout_arg = (fanout_id | (fanout_type << 16));
+ err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &fanout_arg, sizeof(fanout_arg));
+ if (err) {
+ perror("setsockopt");
+ return EXIT_FAILURE;
+ }
+
+ return fd;
+}
+
+static void fanout_thread(void)
+{
+ int fd = setup_socket();
+ int limit = 10000;
+
+ if (fd < 0)
+ exit(fd);
+
+ while (limit-- > 0) {
+ char buf[1600];
+ int err;
+
+ err = read(fd, buf, sizeof(buf));
+ if (err < 0) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+ if ((limit % 10) == 0)
+ fprintf(stdout, "(%d) \n", getpid());
+ }
+
+ fprintf(stdout, "%d: Received 10000 packets\n", getpid());
+
+ close(fd);
+ exit(0);
+}
+
+int main(int argc, char **argp)
+{
+ int fd, err;
+ int i;
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s INTERFACE {hash|lb}\n", argp[0]);
+ return EXIT_FAILURE;
+ }
+
+ if (!strcmp(argp[2], "hash"))
+ fanout_type = PACKET_FANOUT_HASH;
+ else if (!strcmp(argp[2], "lb"))
+ fanout_type = PACKET_FANOUT_LB;
+ else {
+ fprintf(stderr, "Unknown fanout type [%s]\n", argp[2]);
+ exit(EXIT_FAILURE);
+ }
+
+ device_name = argp[1];
+ fanout_id = getpid() & 0xffff;
+
+ for (i = 0; i < 4; i++) {
+ pid_t pid = fork();
+
+ switch (pid) {
+ case 0:
+ fanout_thread();
+
+ case -1:
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (i = 0; i < 4; i++) {
+ int status;
+
+ wait(&status);
+ }
+
+ return 0;
+}
+
+-------------------------------------------------------------------------------
++ AF_PACKET TPACKET_V3 example
+-------------------------------------------------------------------------------
+
+AF_PACKET's TPACKET_V3 ring buffer can be configured to use non-static frame
+sizes by doing it's own memory management. It is based on blocks where polling
+works on a per block basis instead of per ring as in TPACKET_V2 and predecessor.
+
+It is said that TPACKET_V3 brings the following benefits:
+ *) ~15 - 20% reduction in CPU-usage
+ *) ~20% increase in packet capture rate
+ *) ~2x increase in packet density
+ *) Port aggregation analysis
+ *) Non static frame size to capture entire packet payload
+
+So it seems to be a good candidate to be used with packet fanout.
+
+Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile
+it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.):
+
+/* Written from scratch, but kernel-to-user space API usage
+ * dissected from lolpcap:
+ * Copyright 2011, Chetan Loke <loke.chetan@gmail.com>
+ * License: GPL, version 2.0
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <net/if.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <poll.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if_packet.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+#ifndef likely
+# define likely(x) __builtin_expect(!!(x), 1)
+#endif
+#ifndef unlikely
+# define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+struct block_desc {
+ uint32_t version;
+ uint32_t offset_to_priv;
+ struct tpacket_hdr_v1 h1;
+};
+
+struct ring {
+ struct iovec *rd;
+ uint8_t *map;
+ struct tpacket_req3 req;
+};
+
+static unsigned long packets_total = 0, bytes_total = 0;
+static sig_atomic_t sigint = 0;
+
+static void sighandler(int num)
+{
+ sigint = 1;
+}
+
+static int setup_socket(struct ring *ring, char *netdev)
+{
+ int err, i, fd, v = TPACKET_V3;
+ struct sockaddr_ll ll;
+ unsigned int blocksiz = 1 << 22, framesiz = 1 << 11;
+ unsigned int blocknum = 64;
+
+ fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (fd < 0) {
+ perror("socket");
+ exit(1);
+ }
+
+ err = setsockopt(fd, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
+ if (err < 0) {
+ perror("setsockopt");
+ exit(1);
+ }
+
+ memset(&ring->req, 0, sizeof(ring->req));
+ ring->req.tp_block_size = blocksiz;
+ ring->req.tp_frame_size = framesiz;
+ ring->req.tp_block_nr = blocknum;
+ ring->req.tp_frame_nr = (blocksiz * blocknum) / framesiz;
+ ring->req.tp_retire_blk_tov = 60;
+ ring->req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req,
+ sizeof(ring->req));
+ if (err < 0) {
+ perror("setsockopt");
+ exit(1);
+ }
+
+ ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0);
+ if (ring->map == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ ring->rd = malloc(ring->req.tp_block_nr * sizeof(*ring->rd));
+ assert(ring->rd);
+ for (i = 0; i < ring->req.tp_block_nr; ++i) {
+ ring->rd[i].iov_base = ring->map + (i * ring->req.tp_block_size);
+ ring->rd[i].iov_len = ring->req.tp_block_size;
+ }
+
+ memset(&ll, 0, sizeof(ll));
+ ll.sll_family = PF_PACKET;
+ ll.sll_protocol = htons(ETH_P_ALL);
+ ll.sll_ifindex = if_nametoindex(netdev);
+ ll.sll_hatype = 0;
+ ll.sll_pkttype = 0;
+ ll.sll_halen = 0;
+
+ err = bind(fd, (struct sockaddr *) &ll, sizeof(ll));
+ if (err < 0) {
+ perror("bind");
+ exit(1);
+ }
+
+ return fd;
+}
+
+static void display(struct tpacket3_hdr *ppd)
+{
+ struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac);
+ struct iphdr *ip = (struct iphdr *) ((uint8_t *) eth + ETH_HLEN);
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ struct sockaddr_in ss, sd;
+ char sbuff[NI_MAXHOST], dbuff[NI_MAXHOST];
+
+ memset(&ss, 0, sizeof(ss));
+ ss.sin_family = PF_INET;
+ ss.sin_addr.s_addr = ip->saddr;
+ getnameinfo((struct sockaddr *) &ss, sizeof(ss),
+ sbuff, sizeof(sbuff), NULL, 0, NI_NUMERICHOST);
+
+ memset(&sd, 0, sizeof(sd));
+ sd.sin_family = PF_INET;
+ sd.sin_addr.s_addr = ip->daddr;
+ getnameinfo((struct sockaddr *) &sd, sizeof(sd),
+ dbuff, sizeof(dbuff), NULL, 0, NI_NUMERICHOST);
+
+ printf("%s -> %s, ", sbuff, dbuff);
+ }
+
+ printf("rxhash: 0x%x\n", ppd->hv1.tp_rxhash);
+}
+
+static void walk_block(struct block_desc *pbd, const int block_num)
+{
+ int num_pkts = pbd->h1.num_pkts, i;
+ unsigned long bytes = 0;
+ struct tpacket3_hdr *ppd;
+
+ ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
+ pbd->h1.offset_to_first_pkt);
+ for (i = 0; i < num_pkts; ++i) {
+ bytes += ppd->tp_snaplen;
+ display(ppd);
+
+ ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd +
+ ppd->tp_next_offset);
+ }
+
+ packets_total += num_pkts;
+ bytes_total += bytes;
+}
+
+static void flush_block(struct block_desc *pbd)
+{
+ pbd->h1.block_status = TP_STATUS_KERNEL;
+}
+
+static void teardown_socket(struct ring *ring, int fd)
+{
+ munmap(ring->map, ring->req.tp_block_size * ring->req.tp_block_nr);
+ free(ring->rd);
+ close(fd);
+}
+
+int main(int argc, char **argp)
+{
+ int fd, err;
+ socklen_t len;
+ struct ring ring;
+ struct pollfd pfd;
+ unsigned int block_num = 0, blocks = 64;
+ struct block_desc *pbd;
+ struct tpacket_stats_v3 stats;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s INTERFACE\n", argp[0]);
+ return EXIT_FAILURE;
+ }
+
+ signal(SIGINT, sighandler);
+
+ memset(&ring, 0, sizeof(ring));
+ fd = setup_socket(&ring, argp[argc - 1]);
+ assert(fd > 0);
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = fd;
+ pfd.events = POLLIN | POLLERR;
+ pfd.revents = 0;
+
+ while (likely(!sigint)) {
+ pbd = (struct block_desc *) ring.rd[block_num].iov_base;
+
+ if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
+ poll(&pfd, 1, -1);
+ continue;
+ }
+
+ walk_block(pbd, block_num);
+ flush_block(pbd);
+ block_num = (block_num + 1) % blocks;
+ }
+
+ len = sizeof(stats);
+ err = getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len);
+ if (err < 0) {
+ perror("getsockopt");
+ exit(1);
+ }
+
+ fflush(stdout);
+ printf("\nReceived %u packets, %lu bytes, %u dropped, freeze_q_cnt: %u\n",
+ stats.tp_packets, bytes_total, stats.tp_drops,
+ stats.tp_freeze_q_cnt);
+
+ teardown_socket(&ring, fd);
+ return 0;
+}
+
+-------------------------------------------------------------------------------
++ PACKET_QDISC_BYPASS
+-------------------------------------------------------------------------------
+
+If there is a requirement to load the network with many packets in a similar
+fashion as pktgen does, you might set the following option after socket
+creation:
+
+ int one = 1;
+ setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one));
+
+This has the side-effect, that packets sent through PF_PACKET will bypass the
+kernel's qdisc layer and are forcedly pushed to the driver directly. Meaning,
+packet are not buffered, tc disciplines are ignored, increased loss can occur
+and such packets are also not visible to other PF_PACKET sockets anymore. So,
+you have been warned; generally, this can be useful for stress testing various
+components of a system.
+
+On default, PACKET_QDISC_BYPASS is disabled and needs to be explicitly enabled
+on PF_PACKET sockets.
+
+-------------------------------------------------------------------------------
++ PACKET_TIMESTAMP
+-------------------------------------------------------------------------------
+
+The PACKET_TIMESTAMP setting determines the source of the timestamp in
+the packet meta information for mmap(2)ed RX_RING and TX_RINGs. If your
+NIC is capable of timestamping packets in hardware, you can request those
+hardware timestamps to be used. Note: you may need to enable the generation
+of hardware timestamps with SIOCSHWTSTAMP (see related information from
+Documentation/networking/timestamping.txt).
+
+PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING:
+
+ int req = SOF_TIMESTAMPING_RAW_HARDWARE;
+ setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
+
+For the mmap(2)ed ring buffers, such timestamps are stored in the
+tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine
+what kind of timestamp has been reported, the tp_status field is binary |'ed
+with the following possible bits ...
+
+ TP_STATUS_TS_RAW_HARDWARE
+ TP_STATUS_TS_SOFTWARE
+
+... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the
+RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a
+software fallback was invoked *within* PF_PACKET's processing code (less
+precise).
+
+Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
+ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
+frames to be updated resp. the frame handed over to the application, iv) walk
+through the frames to pick up the individual hw/sw timestamps.
+
+Only (!) if transmit timestamping is enabled, then these bits are combined
+with binary | with TP_STATUS_AVAILABLE, so you must check for that in your
+application (e.g. !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))
+in a first step to see if the frame belongs to the application, and then
+one can extract the type of timestamp in a second step from tp_status)!
+
+If you don't care about them, thus having it disabled, checking for
+TP_STATUS_AVAILABLE resp. TP_STATUS_WRONG_FORMAT is sufficient. If in the
+TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec
+members do not contain a valid value. For TX_RINGs, by default no timestamp
+is generated!
+
+See include/linux/net_tstamp.h and Documentation/networking/timestamping
+for more information on hardware timestamps.
+
+-------------------------------------------------------------------------------
++ Miscellaneous bits
+-------------------------------------------------------------------------------
+
+- Packet sockets work well together with Linux socket filters, thus you also
+ might want to have a look at Documentation/networking/filter.txt
+
+--------------------------------------------------------------------------------
++ THANKS
+--------------------------------------------------------------------------------
+
+ Jesse Brandeburg, for fixing my grammathical/spelling errors
+
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt
new file mode 100644
index 000000000..81003581f
--- /dev/null
+++ b/Documentation/networking/phonet.txt
@@ -0,0 +1,214 @@
+Linux Phonet protocol family
+============================
+
+Introduction
+------------
+
+Phonet is a packet protocol used by Nokia cellular modems for both IPC
+and RPC. With the Linux Phonet socket family, Linux host processes can
+receive and send messages from/to the modem, or any other external
+device attached to the modem. The modem takes care of routing.
+
+Phonet packets can be exchanged through various hardware connections
+depending on the device, such as:
+ - USB with the CDC Phonet interface,
+ - infrared,
+ - Bluetooth,
+ - an RS232 serial port (with a dedicated "FBUS" line discipline),
+ - the SSI bus with some TI OMAP processors.
+
+
+Packets format
+--------------
+
+Phonet packets have a common header as follows:
+
+ struct phonethdr {
+ uint8_t pn_media; /* Media type (link-layer identifier) */
+ uint8_t pn_rdev; /* Receiver device ID */
+ uint8_t pn_sdev; /* Sender device ID */
+ uint8_t pn_res; /* Resource ID or function */
+ uint16_t pn_length; /* Big-endian message byte length (minus 6) */
+ uint8_t pn_robj; /* Receiver object ID */
+ uint8_t pn_sobj; /* Sender object ID */
+ };
+
+On Linux, the link-layer header includes the pn_media byte (see below).
+The next 7 bytes are part of the network-layer header.
+
+The device ID is split: the 6 higher-order bits constitute the device
+address, while the 2 lower-order bits are used for multiplexing, as are
+the 8-bit object identifiers. As such, Phonet can be considered as a
+network layer with 6 bits of address space and 10 bits for transport
+protocol (much like port numbers in IP world).
+
+The modem always has address number zero. All other device have a their
+own 6-bit address.
+
+
+Link layer
+----------
+
+Phonet links are always point-to-point links. The link layer header
+consists of a single Phonet media type byte. It uniquely identifies the
+link through which the packet is transmitted, from the modem's
+perspective. Each Phonet network device shall prepend and set the media
+type byte as appropriate. For convenience, a common phonet_header_ops
+link-layer header operations structure is provided. It sets the
+media type according to the network device hardware address.
+
+Linux Phonet network interfaces support a dedicated link layer packets
+type (ETH_P_PHONET) which is out of the Ethernet type range. They can
+only send and receive Phonet packets.
+
+The virtual TUN tunnel device driver can also be used for Phonet. This
+requires IFF_TUN mode, _without_ the IFF_NO_PI flag. In this case,
+there is no link-layer header, so there is no Phonet media type byte.
+
+Note that Phonet interfaces are not allowed to re-order packets, so
+only the (default) Linux FIFO qdisc should be used with them.
+
+
+Network layer
+-------------
+
+The Phonet socket address family maps the Phonet packet header:
+
+ struct sockaddr_pn {
+ sa_family_t spn_family; /* AF_PHONET */
+ uint8_t spn_obj; /* Object ID */
+ uint8_t spn_dev; /* Device ID */
+ uint8_t spn_resource; /* Resource or function */
+ uint8_t spn_zero[...]; /* Padding */
+ };
+
+The resource field is only used when sending and receiving;
+It is ignored by bind() and getsockname().
+
+
+Low-level datagram protocol
+---------------------------
+
+Applications can send Phonet messages using the Phonet datagram socket
+protocol from the PF_PHONET family. Each socket is bound to one of the
+2^10 object IDs available, and can send and receive packets with any
+other peer.
+
+ struct sockaddr_pn addr = { .spn_family = AF_PHONET, };
+ ssize_t len;
+ socklen_t addrlen = sizeof(addr);
+ int fd;
+
+ fd = socket(PF_PHONET, SOCK_DGRAM, 0);
+ bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ /* ... */
+
+ sendto(fd, msg, msglen, 0, (struct sockaddr *)&addr, sizeof(addr));
+ len = recvfrom(fd, buf, sizeof(buf), 0,
+ (struct sockaddr *)&addr, &addrlen);
+
+This protocol follows the SOCK_DGRAM connection-less semantics.
+However, connect() and getpeername() are not supported, as they did
+not seem useful with Phonet usages (could be added easily).
+
+
+Resource subscription
+---------------------
+
+A Phonet datagram socket can be subscribed to any number of 8-bits
+Phonet resources, as follow:
+
+ uint32_t res = 0xXX;
+ ioctl(fd, SIOCPNADDRESOURCE, &res);
+
+Subscription is similarly cancelled using the SIOCPNDELRESOURCE I/O
+control request, or when the socket is closed.
+
+Note that no more than one socket can be subcribed to any given
+resource at a time. If not, ioctl() will return EBUSY.
+
+
+Phonet Pipe protocol
+--------------------
+
+The Phonet Pipe protocol is a simple sequenced packets protocol
+with end-to-end congestion control. It uses the passive listening
+socket paradigm. The listening socket is bound to an unique free object
+ID. Each listening socket can handle up to 255 simultaneous
+connections, one per accept()'d socket.
+
+ int lfd, cfd;
+
+ lfd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE);
+ listen (lfd, INT_MAX);
+
+ /* ... */
+ cfd = accept(lfd, NULL, NULL);
+ for (;;)
+ {
+ char buf[...];
+ ssize_t len = read(cfd, buf, sizeof(buf));
+
+ /* ... */
+
+ write(cfd, msg, msglen);
+ }
+
+Connections are traditionally established between two endpoints by a
+"third party" application. This means that both endpoints are passive.
+
+
+As of Linux kernel version 2.6.39, it is also possible to connect
+two endpoints directly, using connect() on the active side. This is
+intended to support the newer Nokia Wireless Modem API, as found in
+e.g. the Nokia Slim Modem in the ST-Ericsson U8500 platform:
+
+ struct sockaddr_spn spn;
+ int fd;
+
+ fd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE);
+ memset(&spn, 0, sizeof(spn));
+ spn.spn_family = AF_PHONET;
+ spn.spn_obj = ...;
+ spn.spn_dev = ...;
+ spn.spn_resource = 0xD9;
+ connect(fd, (struct sockaddr *)&spn, sizeof(spn));
+ /* normal I/O here ... */
+ close(fd);
+
+
+WARNING:
+When polling a connected pipe socket for writability, there is an
+intrinsic race condition whereby writability might be lost between the
+polling and the writing system calls. In this case, the socket will
+block until write becomes possible again, unless non-blocking mode
+is enabled.
+
+
+The pipe protocol provides two socket options at the SOL_PNPIPE level:
+
+ PNPIPE_ENCAP accepts one integer value (int) of:
+
+ PNPIPE_ENCAP_NONE: The socket operates normally (default).
+
+ PNPIPE_ENCAP_IP: The socket is used as a backend for a virtual IP
+ interface. This requires CAP_NET_ADMIN capability. GPRS data
+ support on Nokia modems can use this. Note that the socket cannot
+ be reliably poll()'d or read() from while in this mode.
+
+ PNPIPE_IFINDEX is a read-only integer value. It contains the
+ interface index of the network interface created by PNPIPE_ENCAP,
+ or zero if encapsulation is off.
+
+ PNPIPE_HANDLE is a read-only integer value. It contains the underlying
+ identifier ("pipe handle") of the pipe. This is only defined for
+ socket descriptors that are already connected or being connected.
+
+
+Authors
+-------
+
+Linux Phonet was initially written by Sakari Ailus.
+Other contributors include Mikä Liljeberg, Andras Domokos,
+Carlos Chinea and Rémi Denis-Courmont.
+Copyright (C) 2008 Nokia Corporation.
diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
new file mode 100644
index 000000000..e839e7efc
--- /dev/null
+++ b/Documentation/networking/phy.txt
@@ -0,0 +1,339 @@
+
+-------
+PHY Abstraction Layer
+(Updated 2008-04-08)
+
+Purpose
+
+ Most network devices consist of set of registers which provide an interface
+ to a MAC layer, which communicates with the physical connection through a
+ PHY. The PHY concerns itself with negotiating link parameters with the link
+ partner on the other side of the network connection (typically, an ethernet
+ cable), and provides a register interface to allow drivers to determine what
+ settings were chosen, and to configure what settings are allowed.
+
+ While these devices are distinct from the network devices, and conform to a
+ standard layout for the registers, it has been common practice to integrate
+ the PHY management code with the network driver. This has resulted in large
+ amounts of redundant code. Also, on embedded systems with multiple (and
+ sometimes quite different) ethernet controllers connected to the same
+ management bus, it is difficult to ensure safe use of the bus.
+
+ Since the PHYs are devices, and the management busses through which they are
+ accessed are, in fact, busses, the PHY Abstraction Layer treats them as such.
+ In doing so, it has these goals:
+
+ 1) Increase code-reuse
+ 2) Increase overall code-maintainability
+ 3) Speed development time for new network drivers, and for new systems
+
+ Basically, this layer is meant to provide an interface to PHY devices which
+ allows network driver writers to write as little code as possible, while
+ still providing a full feature set.
+
+The MDIO bus
+
+ Most network devices are connected to a PHY by means of a management bus.
+ Different devices use different busses (though some share common interfaces).
+ In order to take advantage of the PAL, each bus interface needs to be
+ registered as a distinct device.
+
+ 1) read and write functions must be implemented. Their prototypes are:
+
+ int write(struct mii_bus *bus, int mii_id, int regnum, u16 value);
+ int read(struct mii_bus *bus, int mii_id, int regnum);
+
+ mii_id is the address on the bus for the PHY, and regnum is the register
+ number. These functions are guaranteed not to be called from interrupt
+ time, so it is safe for them to block, waiting for an interrupt to signal
+ the operation is complete
+
+ 2) A reset function is optional. This is used to return the bus to an
+ initialized state.
+
+ 3) A probe function is needed. This function should set up anything the bus
+ driver needs, setup the mii_bus structure, and register with the PAL using
+ mdiobus_register. Similarly, there's a remove function to undo all of
+ that (use mdiobus_unregister).
+
+ 4) Like any driver, the device_driver structure must be configured, and init
+ exit functions are used to register the driver.
+
+ 5) The bus must also be declared somewhere as a device, and registered.
+
+ As an example for how one driver implemented an mdio bus driver, see
+ drivers/net/ethernet/freescale/fsl_pq_mdio.c and an associated DTS file
+ for one of the users. (e.g. "git grep fsl,.*-mdio arch/powerpc/boot/dts/")
+
+Connecting to a PHY
+
+ Sometime during startup, the network driver needs to establish a connection
+ between the PHY device, and the network device. At this time, the PHY's bus
+ and drivers need to all have been loaded, so it is ready for the connection.
+ At this point, there are several ways to connect to the PHY:
+
+ 1) The PAL handles everything, and only calls the network driver when
+ the link state changes, so it can react.
+
+ 2) The PAL handles everything except interrupts (usually because the
+ controller has the interrupt registers).
+
+ 3) The PAL handles everything, but checks in with the driver every second,
+ allowing the network driver to react first to any changes before the PAL
+ does.
+
+ 4) The PAL serves only as a library of functions, with the network device
+ manually calling functions to update status, and configure the PHY
+
+
+Letting the PHY Abstraction Layer do Everything
+
+ If you choose option 1 (The hope is that every driver can, but to still be
+ useful to drivers that can't), connecting to the PHY is simple:
+
+ First, you need a function to react to changes in the link state. This
+ function follows this protocol:
+
+ static void adjust_link(struct net_device *dev);
+
+ Next, you need to know the device name of the PHY connected to this device.
+ The name will look something like, "0:00", where the first number is the
+ bus id, and the second is the PHY's address on that bus. Typically,
+ the bus is responsible for making its ID unique.
+
+ Now, to connect, just call this function:
+
+ phydev = phy_connect(dev, phy_name, &adjust_link, interface);
+
+ phydev is a pointer to the phy_device structure which represents the PHY. If
+ phy_connect is successful, it will return the pointer. dev, here, is the
+ pointer to your net_device. Once done, this function will have started the
+ PHY's software state machine, and registered for the PHY's interrupt, if it
+ has one. The phydev structure will be populated with information about the
+ current state, though the PHY will not yet be truly operational at this
+ point.
+
+ PHY-specific flags should be set in phydev->dev_flags prior to the call
+ to phy_connect() such that the underlying PHY driver can check for flags
+ and perform specific operations based on them.
+ This is useful if the system has put hardware restrictions on
+ the PHY/controller, of which the PHY needs to be aware.
+
+ interface is a u32 which specifies the connection type used
+ between the controller and the PHY. Examples are GMII, MII,
+ RGMII, and SGMII. For a full list, see include/linux/phy.h
+
+ Now just make sure that phydev->supported and phydev->advertising have any
+ values pruned from them which don't make sense for your controller (a 10/100
+ controller may be connected to a gigabit capable PHY, so you would need to
+ mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions
+ for these bitfields. Note that you should not SET any bits, or the PHY may
+ get put into an unsupported state.
+
+ Lastly, once the controller is ready to handle network traffic, you call
+ phy_start(phydev). This tells the PAL that you are ready, and configures the
+ PHY to connect to the network. If you want to handle your own interrupts,
+ just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start.
+ Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL.
+
+ When you want to disconnect from the network (even if just briefly), you call
+ phy_stop(phydev).
+
+Keeping Close Tabs on the PAL
+
+ It is possible that the PAL's built-in state machine needs a little help to
+ keep your network device and the PHY properly in sync. If so, you can
+ register a helper function when connecting to the PHY, which will be called
+ every second before the state machine reacts to any changes. To do this, you
+ need to manually call phy_attach() and phy_prepare_link(), and then call
+ phy_start_machine() with the second argument set to point to your special
+ handler.
+
+ Currently there are no examples of how to use this functionality, and testing
+ on it has been limited because the author does not have any drivers which use
+ it (they all use option 1). So Caveat Emptor.
+
+Doing it all yourself
+
+ There's a remote chance that the PAL's built-in state machine cannot track
+ the complex interactions between the PHY and your network device. If this is
+ so, you can simply call phy_attach(), and not call phy_start_machine or
+ phy_prepare_link(). This will mean that phydev->state is entirely yours to
+ handle (phy_start and phy_stop toggle between some of the states, so you
+ might need to avoid them).
+
+ An effort has been made to make sure that useful functionality can be
+ accessed without the state-machine running, and most of these functions are
+ descended from functions which did not interact with a complex state-machine.
+ However, again, no effort has been made so far to test running without the
+ state machine, so tryer beware.
+
+ Here is a brief rundown of the functions:
+
+ int phy_read(struct phy_device *phydev, u16 regnum);
+ int phy_write(struct phy_device *phydev, u16 regnum, u16 val);
+
+ Simple read/write primitives. They invoke the bus's read/write function
+ pointers.
+
+ void phy_print_status(struct phy_device *phydev);
+
+ A convenience function to print out the PHY status neatly.
+
+ int phy_start_interrupts(struct phy_device *phydev);
+ int phy_stop_interrupts(struct phy_device *phydev);
+
+ Requests the IRQ for the PHY interrupts, then enables them for
+ start, or disables then frees them for stop.
+
+ struct phy_device * phy_attach(struct net_device *dev, const char *phy_id,
+ phy_interface_t interface);
+
+ Attaches a network device to a particular PHY, binding the PHY to a generic
+ driver if none was found during bus initialization.
+
+ int phy_start_aneg(struct phy_device *phydev);
+
+ Using variables inside the phydev structure, either configures advertising
+ and resets autonegotiation, or disables autonegotiation, and configures
+ forced settings.
+
+ static inline int phy_read_status(struct phy_device *phydev);
+
+ Fills the phydev structure with up-to-date information about the current
+ settings in the PHY.
+
+ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+ int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd);
+
+ Ethtool convenience functions.
+
+ int phy_mii_ioctl(struct phy_device *phydev,
+ struct mii_ioctl_data *mii_data, int cmd);
+
+ The MII ioctl. Note that this function will completely screw up the state
+ machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to
+ use this only to write registers which are not standard, and don't set off
+ a renegotiation.
+
+
+PHY Device Drivers
+
+ With the PHY Abstraction Layer, adding support for new PHYs is
+ quite easy. In some cases, no work is required at all! However,
+ many PHYs require a little hand-holding to get up-and-running.
+
+Generic PHY driver
+
+ If the desired PHY doesn't have any errata, quirks, or special
+ features you want to support, then it may be best to not add
+ support, and let the PHY Abstraction Layer's Generic PHY Driver
+ do all of the work.
+
+Writing a PHY driver
+
+ If you do need to write a PHY driver, the first thing to do is
+ make sure it can be matched with an appropriate PHY device.
+ This is done during bus initialization by reading the device's
+ UID (stored in registers 2 and 3), then comparing it to each
+ driver's phy_id field by ANDing it with each driver's
+ phy_id_mask field. Also, it needs a name. Here's an example:
+
+ static struct phy_driver dm9161_driver = {
+ .phy_id = 0x0181b880,
+ .name = "Davicom DM9161E",
+ .phy_id_mask = 0x0ffffff0,
+ ...
+ }
+
+ Next, you need to specify what features (speed, duplex, autoneg,
+ etc) your PHY device and driver support. Most PHYs support
+ PHY_BASIC_FEATURES, but you can look in include/mii.h for other
+ features.
+
+ Each driver consists of a number of function pointers:
+
+ soft_reset: perform a PHY software reset
+ config_init: configures PHY into a sane state after a reset.
+ For instance, a Davicom PHY requires descrambling disabled.
+ probe: Allocate phy->priv, optionally refuse to bind.
+ PHY may not have been reset or had fixups run yet.
+ suspend/resume: power management
+ config_aneg: Changes the speed/duplex/negotiation settings
+ aneg_done: Determines the auto-negotiation result
+ read_status: Reads the current speed/duplex/negotiation settings
+ ack_interrupt: Clear a pending interrupt
+ did_interrupt: Checks if the PHY generated an interrupt
+ config_intr: Enable or disable interrupts
+ remove: Does any driver take-down
+ ts_info: Queries about the HW timestamping status
+ hwtstamp: Set the PHY HW timestamping configuration
+ rxtstamp: Requests a receive timestamp at the PHY level for a 'skb'
+ txtsamp: Requests a transmit timestamp at the PHY level for a 'skb'
+ set_wol: Enable Wake-on-LAN at the PHY level
+ get_wol: Get the Wake-on-LAN status at the PHY level
+ read_mmd_indirect: Read PHY MMD indirect register
+ write_mmd_indirect: Write PHY MMD indirect register
+
+ Of these, only config_aneg and read_status are required to be
+ assigned by the driver code. The rest are optional. Also, it is
+ preferred to use the generic phy driver's versions of these two
+ functions if at all possible: genphy_read_status and
+ genphy_config_aneg. If this is not possible, it is likely that
+ you only need to perform some actions before and after invoking
+ these functions, and so your functions will wrap the generic
+ ones.
+
+ Feel free to look at the Marvell, Cicada, and Davicom drivers in
+ drivers/net/phy/ for examples (the lxt and qsemi drivers have
+ not been tested as of this writing).
+
+ The PHY's MMD register accesses are handled by the PAL framework
+ by default, but can be overridden by a specific PHY driver if
+ required. This could be the case if a PHY was released for
+ manufacturing before the MMD PHY register definitions were
+ standardized by the IEEE. Most modern PHYs will be able to use
+ the generic PAL framework for accessing the PHY's MMD registers.
+ An example of such usage is for Energy Efficient Ethernet support,
+ implemented in the PAL. This support uses the PAL to access MMD
+ registers for EEE query and configuration if the PHY supports
+ the IEEE standard access mechanisms, or can use the PHY's specific
+ access interfaces if overridden by the specific PHY driver. See
+ the Micrel driver in drivers/net/phy/ for an example of how this
+ can be implemented.
+
+Board Fixups
+
+ Sometimes the specific interaction between the platform and the PHY requires
+ special handling. For instance, to change where the PHY's clock input is,
+ or to add a delay to account for latency issues in the data path. In order
+ to support such contingencies, the PHY Layer allows platform code to register
+ fixups to be run when the PHY is brought up (or subsequently reset).
+
+ When the PHY Layer brings up a PHY it checks to see if there are any fixups
+ registered for it, matching based on UID (contained in the PHY device's phy_id
+ field) and the bus identifier (contained in phydev->dev.bus_id). Both must
+ match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as
+ wildcards for the bus ID and UID, respectively.
+
+ When a match is found, the PHY layer will invoke the run function associated
+ with the fixup. This function is passed a pointer to the phy_device of
+ interest. It should therefore only operate on that PHY.
+
+ The platform code can either register the fixup using phy_register_fixup():
+
+ int phy_register_fixup(const char *phy_id,
+ u32 phy_uid, u32 phy_uid_mask,
+ int (*run)(struct phy_device *));
+
+ Or using one of the two stubs, phy_register_fixup_for_uid() and
+ phy_register_fixup_for_id():
+
+ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
+ int (*run)(struct phy_device *));
+ int phy_register_fixup_for_id(const char *phy_id,
+ int (*run)(struct phy_device *));
+
+ The stubs set one of the two matching criteria, and set the other one to
+ match anything.
+
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
new file mode 100644
index 000000000..0344f1d45
--- /dev/null
+++ b/Documentation/networking/pktgen.txt
@@ -0,0 +1,323 @@
+
+
+ HOWTO for the linux packet generator
+ ------------------------------------
+
+Enable CONFIG_NET_PKTGEN to compile and build pktgen either in-kernel
+or as a module. A module is preferred; modprobe pktgen if needed. Once
+running, pktgen creates a thread for each CPU with affinity to that CPU.
+Monitoring and controlling is done via /proc. It is easiest to select a
+suitable sample script and configure that.
+
+On a dual CPU:
+
+ps aux | grep pkt
+root 129 0.3 0.0 0 0 ? SW 2003 523:20 [pktgen/0]
+root 130 0.3 0.0 0 0 ? SW 2003 509:50 [pktgen/1]
+
+
+For monitoring and control pktgen creates:
+ /proc/net/pktgen/pgctrl
+ /proc/net/pktgen/kpktgend_X
+ /proc/net/pktgen/ethX
+
+
+Tuning NIC for max performance
+==============================
+
+The default NIC settings are (likely) not tuned for pktgen's artificial
+overload type of benchmarking, as this could hurt the normal use-case.
+
+Specifically increasing the TX ring buffer in the NIC:
+ # ethtool -G ethX tx 1024
+
+A larger TX ring can improve pktgen's performance, while it can hurt
+in the general case, 1) because the TX ring buffer might get larger
+than the CPU's L1/L2 cache, 2) because it allows more queueing in the
+NIC HW layer (which is bad for bufferbloat).
+
+One should hesitate to conclude that packets/descriptors in the HW
+TX ring cause delay. Drivers usually delay cleaning up the
+ring-buffers for various performance reasons, and packets stalling
+the TX ring might just be waiting for cleanup.
+
+This cleanup issue is specifically the case for the driver ixgbe
+(Intel 82599 chip). This driver (ixgbe) combines TX+RX ring cleanups,
+and the cleanup interval is affected by the ethtool --coalesce setting
+of parameter "rx-usecs".
+
+For ixgbe use e.g. "30" resulting in approx 33K interrupts/sec (1/30*10^6):
+ # ethtool -C ethX rx-usecs 30
+
+
+Viewing threads
+===============
+/proc/net/pktgen/kpktgend_0
+Name: kpktgend_0 max_before_softirq: 10000
+Running:
+Stopped: eth1
+Result: OK: max_before_softirq=10000
+
+Most important are the devices assigned to the thread. Note that a
+device can only belong to one thread.
+
+
+Viewing devices
+===============
+
+The Params section holds configured information. The Current section
+holds running statistics. The Result is printed after a run or after
+interruption. Example:
+
+/proc/net/pktgen/eth1
+
+Params: count 10000000 min_pkt_size: 60 max_pkt_size: 60
+ frags: 0 delay: 0 clone_skb: 1000000 ifname: eth1
+ flows: 0 flowlen: 0
+ dst_min: 10.10.11.2 dst_max:
+ src_min: src_max:
+ src_mac: 00:00:00:00:00:00 dst_mac: 00:04:23:AC:FD:82
+ udp_src_min: 9 udp_src_max: 9 udp_dst_min: 9 udp_dst_max: 9
+ src_mac_count: 0 dst_mac_count: 0
+ Flags:
+Current:
+ pkts-sofar: 10000000 errors: 39664
+ started: 1103053986245187us stopped: 1103053999346329us idle: 880401us
+ seq_num: 10000011 cur_dst_mac_offset: 0 cur_src_mac_offset: 0
+ cur_saddr: 0x10a0a0a cur_daddr: 0x20b0a0a
+ cur_udp_dst: 9 cur_udp_src: 9
+ flows: 0
+Result: OK: 13101142(c12220741+d880401) usec, 10000000 (60byte,0frags)
+ 763292pps 390Mb/sec (390805504bps) errors: 39664
+
+Configuring threads and devices
+================================
+This is done via the /proc interface, and most easily done via pgset
+as defined in the sample scripts.
+
+Examples:
+
+ pgset "clone_skb 1" sets the number of copies of the same packet
+ pgset "clone_skb 0" use single SKB for all transmits
+ pgset "burst 8" uses xmit_more API to queue 8 copies of the same
+ packet and update HW tx queue tail pointer once.
+ "burst 1" is the default
+ pgset "pkt_size 9014" sets packet size to 9014
+ pgset "frags 5" packet will consist of 5 fragments
+ pgset "count 200000" sets number of packets to send, set to zero
+ for continuous sends until explicitly stopped.
+
+ pgset "delay 5000" adds delay to hard_start_xmit(). nanoseconds
+
+ pgset "dst 10.0.0.1" sets IP destination address
+ (BEWARE! This generator is very aggressive!)
+
+ pgset "dst_min 10.0.0.1" Same as dst
+ pgset "dst_max 10.0.0.254" Set the maximum destination IP.
+ pgset "src_min 10.0.0.1" Set the minimum (or only) source IP.
+ pgset "src_max 10.0.0.254" Set the maximum source IP.
+ pgset "dst6 fec0::1" IPV6 destination address
+ pgset "src6 fec0::2" IPV6 source address
+ pgset "dstmac 00:00:00:00:00:00" sets MAC destination address
+ pgset "srcmac 00:00:00:00:00:00" sets MAC source address
+
+ pgset "queue_map_min 0" Sets the min value of tx queue interval
+ pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
+ To select queue 1 of a given device,
+ use queue_map_min=1 and queue_map_max=1
+
+ pgset "src_mac_count 1" Sets the number of MACs we'll range through.
+ The 'minimum' MAC is what you set with srcmac.
+
+ pgset "dst_mac_count 1" Sets the number of MACs we'll range through.
+ The 'minimum' MAC is what you set with dstmac.
+
+ pgset "flag [name]" Set a flag to determine behaviour. Current flags
+ are: IPSRC_RND # IP source is random (between min/max)
+ IPDST_RND # IP destination is random
+ UDPSRC_RND, UDPDST_RND,
+ MACSRC_RND, MACDST_RND
+ TXSIZE_RND, IPV6,
+ MPLS_RND, VID_RND, SVID_RND
+ FLOW_SEQ,
+ QUEUE_MAP_RND # queue map random
+ QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
+ UDPCSUM,
+ IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
+ NODE_ALLOC # node specific memory allocation
+
+ pgset spi SPI_VALUE Set specific SA used to transform packet.
+
+ pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
+ cycle through the port range.
+
+ pgset "udp_src_max 9" set UDP source port max.
+ pgset "udp_dst_min 9" set UDP destination port min, If < udp_dst_max, then
+ cycle through the port range.
+ pgset "udp_dst_max 9" set UDP destination port max.
+
+ pgset "mpls 0001000a,0002000a,0000000a" set MPLS labels (in this example
+ outer label=16,middle label=32,
+ inner label=0 (IPv4 NULL)) Note that
+ there must be no spaces between the
+ arguments. Leading zeros are required.
+ Do not set the bottom of stack bit,
+ that's done automatically. If you do
+ set the bottom of stack bit, that
+ indicates that you want to randomly
+ generate that address and the flag
+ MPLS_RND will be turned on. You
+ can have any mix of random and fixed
+ labels in the label stack.
+
+ pgset "mpls 0" turn off mpls (or any invalid argument works too!)
+
+ pgset "vlan_id 77" set VLAN ID 0-4095
+ pgset "vlan_p 3" set priority bit 0-7 (default 0)
+ pgset "vlan_cfi 0" set canonical format identifier 0-1 (default 0)
+
+ pgset "svlan_id 22" set SVLAN ID 0-4095
+ pgset "svlan_p 3" set priority bit 0-7 (default 0)
+ pgset "svlan_cfi 0" set canonical format identifier 0-1 (default 0)
+
+ pgset "vlan_id 9999" > 4095 remove vlan and svlan tags
+ pgset "svlan 9999" > 4095 remove svlan tag
+
+
+ pgset "tos XX" set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
+ pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)
+
+ pgset stop aborts injection. Also, ^C aborts generator.
+
+ pgset "rate 300M" set rate to 300 Mb/s
+ pgset "ratep 1000000" set rate to 1Mpps
+
+Sample scripts
+==============
+
+A collection of small tutorial scripts for pktgen is in the
+samples/pktgen directory:
+
+pktgen.conf-1-1 # 1 CPU 1 dev
+pktgen.conf-1-2 # 1 CPU 2 dev
+pktgen.conf-2-1 # 2 CPU's 1 dev
+pktgen.conf-2-2 # 2 CPU's 2 dev
+pktgen.conf-1-1-rdos # 1 CPU 1 dev w. route DoS
+pktgen.conf-1-1-ip6 # 1 CPU 1 dev ipv6
+pktgen.conf-1-1-ip6-rdos # 1 CPU 1 dev ipv6 w. route DoS
+pktgen.conf-1-1-flows # 1 CPU 1 dev multiple flows.
+
+Run in shell: ./pktgen.conf-X-Y
+This does all the setup including sending.
+
+
+Interrupt affinity
+===================
+Note that when adding devices to a specific CPU it is a good idea to
+also assign /proc/irq/XX/smp_affinity so that the TX interrupts are bound
+to the same CPU. This reduces cache bouncing when freeing skbs.
+
+Enable IPsec
+============
+Default IPsec transformation with ESP encapsulation plus transport mode
+can be enabled by simply setting:
+
+pgset "flag IPSEC"
+pgset "flows 1"
+
+To avoid breaking existing testbed scripts for using AH type and tunnel mode,
+you can use "pgset spi SPI_VALUE" to specify which transformation mode
+to employ.
+
+
+Current commands and configuration options
+==========================================
+
+** Pgcontrol commands:
+
+start
+stop
+
+** Thread commands:
+
+add_device
+rem_device_all
+max_before_softirq
+
+
+** Device commands:
+
+count
+clone_skb
+debug
+
+frags
+delay
+
+src_mac_count
+dst_mac_count
+
+pkt_size
+min_pkt_size
+max_pkt_size
+
+mpls
+
+udp_src_min
+udp_src_max
+
+udp_dst_min
+udp_dst_max
+
+flag
+ IPSRC_RND
+ IPDST_RND
+ UDPSRC_RND
+ UDPDST_RND
+ MACSRC_RND
+ MACDST_RND
+ TXSIZE_RND
+ IPV6
+ MPLS_RND
+ VID_RND
+ SVID_RND
+ FLOW_SEQ
+ QUEUE_MAP_RND
+ QUEUE_MAP_CPU
+ UDPCSUM
+ IPSEC
+ NODE_ALLOC
+
+dst_min
+dst_max
+
+src_min
+src_max
+
+dst_mac
+src_mac
+
+clear_counters
+
+dst6
+src6
+
+flows
+flowlen
+
+rate
+ratep
+
+References:
+ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
+ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
+
+Paper from Linux-Kongress in Erlangen 2004.
+ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf
+
+Thanks to:
+Grant Grundler for testing on IA-64 and parisc, Harald Welte, Lennert Buytenhek
+Stephen Hemminger, Andi Kleen, Dave Miller and many others.
+
+
+Good luck with the linux net-development.
diff --git a/Documentation/networking/policy-routing.txt b/Documentation/networking/policy-routing.txt
new file mode 100644
index 000000000..36f6936d7
--- /dev/null
+++ b/Documentation/networking/policy-routing.txt
@@ -0,0 +1,150 @@
+Classes
+-------
+
+ "Class" is a complete routing table in common sense.
+ I.e. it is tree of nodes (destination prefix, tos, metric)
+ with attached information: gateway, device etc.
+ This tree is looked up as specified in RFC1812 5.2.4.3
+ 1. Basic match
+ 2. Longest match
+ 3. Weak TOS.
+ 4. Metric. (should not be in kernel space, but they are)
+ 5. Additional pruning rules. (not in kernel space).
+
+ We have two special type of nodes:
+ REJECT - abort route lookup and return an error value.
+ THROW - abort route lookup in this class.
+
+
+ Currently the number of classes is limited to 255
+ (0 is reserved for "not specified class")
+
+ Three classes are builtin:
+
+ RT_CLASS_LOCAL=255 - local interface addresses,
+ broadcasts, nat addresses.
+
+ RT_CLASS_MAIN=254 - all normal routes are put there
+ by default.
+
+ RT_CLASS_DEFAULT=253 - if ip_fib_model==1, then
+ normal default routes are put there, if ip_fib_model==2
+ all gateway routes are put there.
+
+
+Rules
+-----
+ Rule is a record of (src prefix, src interface, tos, dst prefix)
+ with attached information.
+
+ Rule types:
+ RTP_ROUTE - lookup in attached class
+ RTP_NAT - lookup in attached class and if a match is found,
+ translate packet source address.
+ RTP_MASQUERADE - lookup in attached class and if a match is found,
+ masquerade packet as sourced by us.
+ RTP_DROP - silently drop the packet.
+ RTP_REJECT - drop the packet and send ICMP NET UNREACHABLE.
+ RTP_PROHIBIT - drop the packet and send ICMP COMM. ADM. PROHIBITED.
+
+ Rule flags:
+ RTRF_LOG - log route creations.
+ RTRF_VALVE - One way route (used with masquerading)
+
+Default setup:
+
+root@amber:/pub/ip-routing # iproute -r
+Kernel routing policy rules
+Pref Source Destination TOS Iface Cl
+ 0 default default 00 * 255
+ 254 default default 00 * 254
+ 255 default default 00 * 253
+
+
+Lookup algorithm
+----------------
+
+ We scan rules list, and if a rule is matched, apply it.
+ If a route is found, return it.
+ If it is not found or a THROW node was matched, continue
+ to scan rules.
+
+Applications
+------------
+
+1. Just ignore classes. All the routes are put into MAIN class
+ (and/or into DEFAULT class).
+
+ HOWTO: iproute add PREFIX [ tos TOS ] [ gw GW ] [ dev DEV ]
+ [ metric METRIC ] [ reject ] ... (look at iproute utility)
+
+ or use route utility from current net-tools.
+
+2. Opposite case. Just forget all that you know about routing
+ tables. Every rule is supplied with its own gateway, device
+ info. record. This approach is not appropriate for automated
+ route maintenance, but it is ideal for manual configuration.
+
+ HOWTO: iproute addrule [ from PREFIX ] [ to PREFIX ] [ tos TOS ]
+ [ dev INPUTDEV] [ pref PREFERENCE ] route [ gw GATEWAY ]
+ [ dev OUTDEV ] .....
+
+ Warning: As of now the size of the routing table in this
+ approach is limited to 256. If someone likes this model, I'll
+ relax this limitation.
+
+3. OSPF classes (see RFC1583, RFC1812 E.3.3)
+ Very clean, stable and robust algorithm for OSPF routing
+ domains. Unfortunately, it is not widely used in the Internet.
+
+ Proposed setup:
+ 255 local addresses
+ 254 interface routes
+ 253 ASE routes with external metric
+ 252 ASE routes with internal metric
+ 251 inter-area routes
+ 250 intra-area routes for 1st area
+ 249 intra-area routes for 2nd area
+ etc.
+
+ Rules:
+ iproute addrule class 253
+ iproute addrule class 252
+ iproute addrule class 251
+ iproute addrule to a-prefix-for-1st-area class 250
+ iproute addrule to another-prefix-for-1st-area class 250
+ ...
+ iproute addrule to a-prefix-for-2nd-area class 249
+ ...
+
+ Area classes must be terminated with reject record.
+ iproute add default reject class 250
+ iproute add default reject class 249
+ ...
+
+4. The Variant Router Requirements Algorithm (RFC1812 E.3.2)
+ Create 16 classes for different TOS values.
+ It is a funny, but pretty useless algorithm.
+ I listed it just to show the power of new routing code.
+
+5. All the variety of combinations......
+
+
+GATED
+-----
+
+ Gated does not understand classes, but it will work
+ happily in MAIN+DEFAULT. All policy routes can be set
+ and maintained manually.
+
+IMPORTANT NOTE
+--------------
+ route.c has a compilation time switch CONFIG_IP_LOCAL_RT_POLICY.
+ If it is set, locally originated packets are routed
+ using all the policy list. This is not very convenient and
+ pretty ambiguous when used with NAT and masquerading.
+ I set it to FALSE by default.
+
+
+Alexey Kuznetov
+kuznet@ms2.inr.ac.ru
diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt
new file mode 100644
index 000000000..091d20273
--- /dev/null
+++ b/Documentation/networking/ppp_generic.txt
@@ -0,0 +1,432 @@
+ PPP Generic Driver and Channel Interface
+ ----------------------------------------
+
+ Paul Mackerras
+ paulus@samba.org
+ 7 Feb 2002
+
+The generic PPP driver in linux-2.4 provides an implementation of the
+functionality which is of use in any PPP implementation, including:
+
+* the network interface unit (ppp0 etc.)
+* the interface to the networking code
+* PPP multilink: splitting datagrams between multiple links, and
+ ordering and combining received fragments
+* the interface to pppd, via a /dev/ppp character device
+* packet compression and decompression
+* TCP/IP header compression and decompression
+* detecting network traffic for demand dialling and for idle timeouts
+* simple packet filtering
+
+For sending and receiving PPP frames, the generic PPP driver calls on
+the services of PPP `channels'. A PPP channel encapsulates a
+mechanism for transporting PPP frames from one machine to another. A
+PPP channel implementation can be arbitrarily complex internally but
+has a very simple interface with the generic PPP code: it merely has
+to be able to send PPP frames, receive PPP frames, and optionally
+handle ioctl requests. Currently there are PPP channel
+implementations for asynchronous serial ports, synchronous serial
+ports, and for PPP over ethernet.
+
+This architecture makes it possible to implement PPP multilink in a
+natural and straightforward way, by allowing more than one channel to
+be linked to each ppp network interface unit. The generic layer is
+responsible for splitting datagrams on transmit and recombining them
+on receive.
+
+
+PPP channel API
+---------------
+
+See include/linux/ppp_channel.h for the declaration of the types and
+functions used to communicate between the generic PPP layer and PPP
+channels.
+
+Each channel has to provide two functions to the generic PPP layer,
+via the ppp_channel.ops pointer:
+
+* start_xmit() is called by the generic layer when it has a frame to
+ send. The channel has the option of rejecting the frame for
+ flow-control reasons. In this case, start_xmit() should return 0
+ and the channel should call the ppp_output_wakeup() function at a
+ later time when it can accept frames again, and the generic layer
+ will then attempt to retransmit the rejected frame(s). If the frame
+ is accepted, the start_xmit() function should return 1.
+
+* ioctl() provides an interface which can be used by a user-space
+ program to control aspects of the channel's behaviour. This
+ procedure will be called when a user-space program does an ioctl
+ system call on an instance of /dev/ppp which is bound to the
+ channel. (Usually it would only be pppd which would do this.)
+
+The generic PPP layer provides seven functions to channels:
+
+* ppp_register_channel() is called when a channel has been created, to
+ notify the PPP generic layer of its presence. For example, setting
+ a serial port to the PPPDISC line discipline causes the ppp_async
+ channel code to call this function.
+
+* ppp_unregister_channel() is called when a channel is to be
+ destroyed. For example, the ppp_async channel code calls this when
+ a hangup is detected on the serial port.
+
+* ppp_output_wakeup() is called by a channel when it has previously
+ rejected a call to its start_xmit function, and can now accept more
+ packets.
+
+* ppp_input() is called by a channel when it has received a complete
+ PPP frame.
+
+* ppp_input_error() is called by a channel when it has detected that a
+ frame has been lost or dropped (for example, because of a FCS (frame
+ check sequence) error).
+
+* ppp_channel_index() returns the channel index assigned by the PPP
+ generic layer to this channel. The channel should provide some way
+ (e.g. an ioctl) to transmit this back to user-space, as user-space
+ will need it to attach an instance of /dev/ppp to this channel.
+
+* ppp_unit_number() returns the unit number of the ppp network
+ interface to which this channel is connected, or -1 if the channel
+ is not connected.
+
+Connecting a channel to the ppp generic layer is initiated from the
+channel code, rather than from the generic layer. The channel is
+expected to have some way for a user-level process to control it
+independently of the ppp generic layer. For example, with the
+ppp_async channel, this is provided by the file descriptor to the
+serial port.
+
+Generally a user-level process will initialize the underlying
+communications medium and prepare it to do PPP. For example, with an
+async tty, this can involve setting the tty speed and modes, issuing
+modem commands, and then going through some sort of dialog with the
+remote system to invoke PPP service there. We refer to this process
+as `discovery'. Then the user-level process tells the medium to
+become a PPP channel and register itself with the generic PPP layer.
+The channel then has to report the channel number assigned to it back
+to the user-level process. From that point, the PPP negotiation code
+in the PPP daemon (pppd) can take over and perform the PPP
+negotiation, accessing the channel through the /dev/ppp interface.
+
+At the interface to the PPP generic layer, PPP frames are stored in
+skbuff structures and start with the two-byte PPP protocol number.
+The frame does *not* include the 0xff `address' byte or the 0x03
+`control' byte that are optionally used in async PPP. Nor is there
+any escaping of control characters, nor are there any FCS or framing
+characters included. That is all the responsibility of the channel
+code, if it is needed for the particular medium. That is, the skbuffs
+presented to the start_xmit() function contain only the 2-byte
+protocol number and the data, and the skbuffs presented to ppp_input()
+must be in the same format.
+
+The channel must provide an instance of a ppp_channel struct to
+represent the channel. The channel is free to use the `private' field
+however it wishes. The channel should initialize the `mtu' and
+`hdrlen' fields before calling ppp_register_channel() and not change
+them until after ppp_unregister_channel() returns. The `mtu' field
+represents the maximum size of the data part of the PPP frames, that
+is, it does not include the 2-byte protocol number.
+
+If the channel needs some headroom in the skbuffs presented to it for
+transmission (i.e., some space free in the skbuff data area before the
+start of the PPP frame), it should set the `hdrlen' field of the
+ppp_channel struct to the amount of headroom required. The generic
+PPP layer will attempt to provide that much headroom but the channel
+should still check if there is sufficient headroom and copy the skbuff
+if there isn't.
+
+On the input side, channels should ideally provide at least 2 bytes of
+headroom in the skbuffs presented to ppp_input(). The generic PPP
+code does not require this but will be more efficient if this is done.
+
+
+Buffering and flow control
+--------------------------
+
+The generic PPP layer has been designed to minimize the amount of data
+that it buffers in the transmit direction. It maintains a queue of
+transmit packets for the PPP unit (network interface device) plus a
+queue of transmit packets for each attached channel. Normally the
+transmit queue for the unit will contain at most one packet; the
+exceptions are when pppd sends packets by writing to /dev/ppp, and
+when the core networking code calls the generic layer's start_xmit()
+function with the queue stopped, i.e. when the generic layer has
+called netif_stop_queue(), which only happens on a transmit timeout.
+The start_xmit function always accepts and queues the packet which it
+is asked to transmit.
+
+Transmit packets are dequeued from the PPP unit transmit queue and
+then subjected to TCP/IP header compression and packet compression
+(Deflate or BSD-Compress compression), as appropriate. After this
+point the packets can no longer be reordered, as the decompression
+algorithms rely on receiving compressed packets in the same order that
+they were generated.
+
+If multilink is not in use, this packet is then passed to the attached
+channel's start_xmit() function. If the channel refuses to take
+the packet, the generic layer saves it for later transmission. The
+generic layer will call the channel's start_xmit() function again
+when the channel calls ppp_output_wakeup() or when the core
+networking code calls the generic layer's start_xmit() function
+again. The generic layer contains no timeout and retransmission
+logic; it relies on the core networking code for that.
+
+If multilink is in use, the generic layer divides the packet into one
+or more fragments and puts a multilink header on each fragment. It
+decides how many fragments to use based on the length of the packet
+and the number of channels which are potentially able to accept a
+fragment at the moment. A channel is potentially able to accept a
+fragment if it doesn't have any fragments currently queued up for it
+to transmit. The channel may still refuse a fragment; in this case
+the fragment is queued up for the channel to transmit later. This
+scheme has the effect that more fragments are given to higher-
+bandwidth channels. It also means that under light load, the generic
+layer will tend to fragment large packets across all the channels,
+thus reducing latency, while under heavy load, packets will tend to be
+transmitted as single fragments, thus reducing the overhead of
+fragmentation.
+
+
+SMP safety
+----------
+
+The PPP generic layer has been designed to be SMP-safe. Locks are
+used around accesses to the internal data structures where necessary
+to ensure their integrity. As part of this, the generic layer
+requires that the channels adhere to certain requirements and in turn
+provides certain guarantees to the channels. Essentially the channels
+are required to provide the appropriate locking on the ppp_channel
+structures that form the basis of the communication between the
+channel and the generic layer. This is because the channel provides
+the storage for the ppp_channel structure, and so the channel is
+required to provide the guarantee that this storage exists and is
+valid at the appropriate times.
+
+The generic layer requires these guarantees from the channel:
+
+* The ppp_channel object must exist from the time that
+ ppp_register_channel() is called until after the call to
+ ppp_unregister_channel() returns.
+
+* No thread may be in a call to any of ppp_input(), ppp_input_error(),
+ ppp_output_wakeup(), ppp_channel_index() or ppp_unit_number() for a
+ channel at the time that ppp_unregister_channel() is called for that
+ channel.
+
+* ppp_register_channel() and ppp_unregister_channel() must be called
+ from process context, not interrupt or softirq/BH context.
+
+* The remaining generic layer functions may be called at softirq/BH
+ level but must not be called from a hardware interrupt handler.
+
+* The generic layer may call the channel start_xmit() function at
+ softirq/BH level but will not call it at interrupt level. Thus the
+ start_xmit() function may not block.
+
+* The generic layer will only call the channel ioctl() function in
+ process context.
+
+The generic layer provides these guarantees to the channels:
+
+* The generic layer will not call the start_xmit() function for a
+ channel while any thread is already executing in that function for
+ that channel.
+
+* The generic layer will not call the ioctl() function for a channel
+ while any thread is already executing in that function for that
+ channel.
+
+* By the time a call to ppp_unregister_channel() returns, no thread
+ will be executing in a call from the generic layer to that channel's
+ start_xmit() or ioctl() function, and the generic layer will not
+ call either of those functions subsequently.
+
+
+Interface to pppd
+-----------------
+
+The PPP generic layer exports a character device interface called
+/dev/ppp. This is used by pppd to control PPP interface units and
+channels. Although there is only one /dev/ppp, each open instance of
+/dev/ppp acts independently and can be attached either to a PPP unit
+or a PPP channel. This is achieved using the file->private_data field
+to point to a separate object for each open instance of /dev/ppp. In
+this way an effect similar to Solaris' clone open is obtained,
+allowing us to control an arbitrary number of PPP interfaces and
+channels without having to fill up /dev with hundreds of device names.
+
+When /dev/ppp is opened, a new instance is created which is initially
+unattached. Using an ioctl call, it can then be attached to an
+existing unit, attached to a newly-created unit, or attached to an
+existing channel. An instance attached to a unit can be used to send
+and receive PPP control frames, using the read() and write() system
+calls, along with poll() if necessary. Similarly, an instance
+attached to a channel can be used to send and receive PPP frames on
+that channel.
+
+In multilink terms, the unit represents the bundle, while the channels
+represent the individual physical links. Thus, a PPP frame sent by a
+write to the unit (i.e., to an instance of /dev/ppp attached to the
+unit) will be subject to bundle-level compression and to fragmentation
+across the individual links (if multilink is in use). In contrast, a
+PPP frame sent by a write to the channel will be sent as-is on that
+channel, without any multilink header.
+
+A channel is not initially attached to any unit. In this state it can
+be used for PPP negotiation but not for the transfer of data packets.
+It can then be connected to a PPP unit with an ioctl call, which
+makes it available to send and receive data packets for that unit.
+
+The ioctl calls which are available on an instance of /dev/ppp depend
+on whether it is unattached, attached to a PPP interface, or attached
+to a PPP channel. The ioctl calls which are available on an
+unattached instance are:
+
+* PPPIOCNEWUNIT creates a new PPP interface and makes this /dev/ppp
+ instance the "owner" of the interface. The argument should point to
+ an int which is the desired unit number if >= 0, or -1 to assign the
+ lowest unused unit number. Being the owner of the interface means
+ that the interface will be shut down if this instance of /dev/ppp is
+ closed.
+
+* PPPIOCATTACH attaches this instance to an existing PPP interface.
+ The argument should point to an int containing the unit number.
+ This does not make this instance the owner of the PPP interface.
+
+* PPPIOCATTCHAN attaches this instance to an existing PPP channel.
+ The argument should point to an int containing the channel number.
+
+The ioctl calls available on an instance of /dev/ppp attached to a
+channel are:
+
+* PPPIOCDETACH detaches the instance from the channel. This ioctl is
+ deprecated since the same effect can be achieved by closing the
+ instance. In order to prevent possible races this ioctl will fail
+ with an EINVAL error if more than one file descriptor refers to this
+ instance (i.e. as a result of dup(), dup2() or fork()).
+
+* PPPIOCCONNECT connects this channel to a PPP interface. The
+ argument should point to an int containing the interface unit
+ number. It will return an EINVAL error if the channel is already
+ connected to an interface, or ENXIO if the requested interface does
+ not exist.
+
+* PPPIOCDISCONN disconnects this channel from the PPP interface that
+ it is connected to. It will return an EINVAL error if the channel
+ is not connected to an interface.
+
+* All other ioctl commands are passed to the channel ioctl() function.
+
+The ioctl calls that are available on an instance that is attached to
+an interface unit are:
+
+* PPPIOCSMRU sets the MRU (maximum receive unit) for the interface.
+ The argument should point to an int containing the new MRU value.
+
+* PPPIOCSFLAGS sets flags which control the operation of the
+ interface. The argument should be a pointer to an int containing
+ the new flags value. The bits in the flags value that can be set
+ are:
+ SC_COMP_TCP enable transmit TCP header compression
+ SC_NO_TCP_CCID disable connection-id compression for
+ TCP header compression
+ SC_REJ_COMP_TCP disable receive TCP header decompression
+ SC_CCP_OPEN Compression Control Protocol (CCP) is
+ open, so inspect CCP packets
+ SC_CCP_UP CCP is up, may (de)compress packets
+ SC_LOOP_TRAFFIC send IP traffic to pppd
+ SC_MULTILINK enable PPP multilink fragmentation on
+ transmitted packets
+ SC_MP_SHORTSEQ expect short multilink sequence
+ numbers on received multilink fragments
+ SC_MP_XSHORTSEQ transmit short multilink sequence nos.
+
+ The values of these flags are defined in <linux/ppp-ioctl.h>. Note
+ that the values of the SC_MULTILINK, SC_MP_SHORTSEQ and
+ SC_MP_XSHORTSEQ bits are ignored if the CONFIG_PPP_MULTILINK option
+ is not selected.
+
+* PPPIOCGFLAGS returns the value of the status/control flags for the
+ interface unit. The argument should point to an int where the ioctl
+ will store the flags value. As well as the values listed above for
+ PPPIOCSFLAGS, the following bits may be set in the returned value:
+ SC_COMP_RUN CCP compressor is running
+ SC_DECOMP_RUN CCP decompressor is running
+ SC_DC_ERROR CCP decompressor detected non-fatal error
+ SC_DC_FERROR CCP decompressor detected fatal error
+
+* PPPIOCSCOMPRESS sets the parameters for packet compression or
+ decompression. The argument should point to a ppp_option_data
+ structure (defined in <linux/ppp-ioctl.h>), which contains a
+ pointer/length pair which should describe a block of memory
+ containing a CCP option specifying a compression method and its
+ parameters. The ppp_option_data struct also contains a `transmit'
+ field. If this is 0, the ioctl will affect the receive path,
+ otherwise the transmit path.
+
+* PPPIOCGUNIT returns, in the int pointed to by the argument, the unit
+ number of this interface unit.
+
+* PPPIOCSDEBUG sets the debug flags for the interface to the value in
+ the int pointed to by the argument. Only the least significant bit
+ is used; if this is 1 the generic layer will print some debug
+ messages during its operation. This is only intended for debugging
+ the generic PPP layer code; it is generally not helpful for working
+ out why a PPP connection is failing.
+
+* PPPIOCGDEBUG returns the debug flags for the interface in the int
+ pointed to by the argument.
+
+* PPPIOCGIDLE returns the time, in seconds, since the last data
+ packets were sent and received. The argument should point to a
+ ppp_idle structure (defined in <linux/ppp_defs.h>). If the
+ CONFIG_PPP_FILTER option is enabled, the set of packets which reset
+ the transmit and receive idle timers is restricted to those which
+ pass the `active' packet filter.
+
+* PPPIOCSMAXCID sets the maximum connection-ID parameter (and thus the
+ number of connection slots) for the TCP header compressor and
+ decompressor. The lower 16 bits of the int pointed to by the
+ argument specify the maximum connection-ID for the compressor. If
+ the upper 16 bits of that int are non-zero, they specify the maximum
+ connection-ID for the decompressor, otherwise the decompressor's
+ maximum connection-ID is set to 15.
+
+* PPPIOCSNPMODE sets the network-protocol mode for a given network
+ protocol. The argument should point to an npioctl struct (defined
+ in <linux/ppp-ioctl.h>). The `protocol' field gives the PPP protocol
+ number for the protocol to be affected, and the `mode' field
+ specifies what to do with packets for that protocol:
+
+ NPMODE_PASS normal operation, transmit and receive packets
+ NPMODE_DROP silently drop packets for this protocol
+ NPMODE_ERROR drop packets and return an error on transmit
+ NPMODE_QUEUE queue up packets for transmit, drop received
+ packets
+
+ At present NPMODE_ERROR and NPMODE_QUEUE have the same effect as
+ NPMODE_DROP.
+
+* PPPIOCGNPMODE returns the network-protocol mode for a given
+ protocol. The argument should point to an npioctl struct with the
+ `protocol' field set to the PPP protocol number for the protocol of
+ interest. On return the `mode' field will be set to the network-
+ protocol mode for that protocol.
+
+* PPPIOCSPASS and PPPIOCSACTIVE set the `pass' and `active' packet
+ filters. These ioctls are only available if the CONFIG_PPP_FILTER
+ option is selected. The argument should point to a sock_fprog
+ structure (defined in <linux/filter.h>) containing the compiled BPF
+ instructions for the filter. Packets are dropped if they fail the
+ `pass' filter; otherwise, if they fail the `active' filter they are
+ passed but they do not reset the transmit or receive idle timer.
+
+* PPPIOCSMRRU enables or disables multilink processing for received
+ packets and sets the multilink MRRU (maximum reconstructed receive
+ unit). The argument should point to an int containing the new MRRU
+ value. If the MRRU value is 0, processing of received multilink
+ fragments is disabled. This ioctl is only available if the
+ CONFIG_PPP_MULTILINK option is selected.
+
+Last modified: 7-feb-2002
diff --git a/Documentation/networking/proc_net_tcp.txt b/Documentation/networking/proc_net_tcp.txt
new file mode 100644
index 000000000..4a79209e7
--- /dev/null
+++ b/Documentation/networking/proc_net_tcp.txt
@@ -0,0 +1,48 @@
+This document describes the interfaces /proc/net/tcp and /proc/net/tcp6.
+Note that these interfaces are deprecated in favor of tcp_diag.
+
+These /proc interfaces provide information about currently active TCP
+connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c
+and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively.
+
+It will first list all listening TCP sockets, and next list all established
+TCP connections. A typical entry of /proc/net/tcp would look like this (split
+up into 3 parts because of the length of the line):
+
+ 46: 010310AC:9C4C 030310AC:1770 01
+ | | | | | |--> connection state
+ | | | | |------> remote TCP port number
+ | | | |-------------> remote IPv4 address
+ | | |--------------------> local TCP port number
+ | |---------------------------> local IPv4 address
+ |----------------------------------> number of entry
+
+ 00000150:00000000 01:00000019 00000000
+ | | | | |--> number of unrecovered RTO timeouts
+ | | | |----------> number of jiffies until timer expires
+ | | |----------------> timer_active (see below)
+ | |----------------------> receive-queue
+ |-------------------------------> transmit-queue
+
+ 1000 0 54165785 4 cd1e6040 25 4 27 3 -1
+ | | | | | | | | | |--> slow start size threshold,
+ | | | | | | | | | or -1 if the threshold
+ | | | | | | | | | is >= 0xFFFF
+ | | | | | | | | |----> sending congestion window
+ | | | | | | | |-------> (ack.quick<<1)|ack.pingpong
+ | | | | | | |---------> Predicted tick of soft clock
+ | | | | | | (delayed ACK control data)
+ | | | | | |------------> retransmit timeout
+ | | | | |------------------> location of socket in memory
+ | | | |-----------------------> socket reference count
+ | | |-----------------------------> inode
+ | |----------------------------------> unanswered 0-window probes
+ |---------------------------------------------> uid
+
+timer_active:
+ 0 no timer is pending
+ 1 retransmit-timer is pending
+ 2 another timer (e.g. delayed ack or keepalive) is pending
+ 3 this is a socket in TIME_WAIT state. Not all fields will contain
+ data (or even exist)
+ 4 zero window probe timer is pending
diff --git a/Documentation/networking/radiotap-headers.txt b/Documentation/networking/radiotap-headers.txt
new file mode 100644
index 000000000..953331c79
--- /dev/null
+++ b/Documentation/networking/radiotap-headers.txt
@@ -0,0 +1,152 @@
+How to use radiotap headers
+===========================
+
+Pointer to the radiotap include file
+------------------------------------
+
+Radiotap headers are variable-length and extensible, you can get most of the
+information you need to know on them from:
+
+./include/net/ieee80211_radiotap.h
+
+This document gives an overview and warns on some corner cases.
+
+
+Structure of the header
+-----------------------
+
+There is a fixed portion at the start which contains a u32 bitmap that defines
+if the possible argument associated with that bit is present or not. So if b0
+of the it_present member of ieee80211_radiotap_header is set, it means that
+the header for argument index 0 (IEEE80211_RADIOTAP_TSFT) is present in the
+argument area.
+
+ < 8-byte ieee80211_radiotap_header >
+ [ <possible argument bitmap extensions ... > ]
+ [ <argument> ... ]
+
+At the moment there are only 13 possible argument indexes defined, but in case
+we run out of space in the u32 it_present member, it is defined that b31 set
+indicates that there is another u32 bitmap following (shown as "possible
+argument bitmap extensions..." above), and the start of the arguments is moved
+forward 4 bytes each time.
+
+Note also that the it_len member __le16 is set to the total number of bytes
+covered by the ieee80211_radiotap_header and any arguments following.
+
+
+Requirements for arguments
+--------------------------
+
+After the fixed part of the header, the arguments follow for each argument
+index whose matching bit is set in the it_present member of
+ieee80211_radiotap_header.
+
+ - the arguments are all stored little-endian!
+
+ - the argument payload for a given argument index has a fixed size. So
+ IEEE80211_RADIOTAP_TSFT being present always indicates an 8-byte argument is
+ present. See the comments in ./include/net/ieee80211_radiotap.h for a nice
+ breakdown of all the argument sizes
+
+ - the arguments must be aligned to a boundary of the argument size using
+ padding. So a u16 argument must start on the next u16 boundary if it isn't
+ already on one, a u32 must start on the next u32 boundary and so on.
+
+ - "alignment" is relative to the start of the ieee80211_radiotap_header, ie,
+ the first byte of the radiotap header. The absolute alignment of that first
+ byte isn't defined. So even if the whole radiotap header is starting at, eg,
+ address 0x00000003, still the first byte of the radiotap header is treated as
+ 0 for alignment purposes.
+
+ - the above point that there may be no absolute alignment for multibyte
+ entities in the fixed radiotap header or the argument region means that you
+ have to take special evasive action when trying to access these multibyte
+ entities. Some arches like Blackfin cannot deal with an attempt to
+ dereference, eg, a u16 pointer that is pointing to an odd address. Instead
+ you have to use a kernel API get_unaligned() to dereference the pointer,
+ which will do it bytewise on the arches that require that.
+
+ - The arguments for a given argument index can be a compound of multiple types
+ together. For example IEEE80211_RADIOTAP_CHANNEL has an argument payload
+ consisting of two u16s of total length 4. When this happens, the padding
+ rule is applied dealing with a u16, NOT dealing with a 4-byte single entity.
+
+
+Example valid radiotap header
+-----------------------------
+
+ 0x00, 0x00, // <-- radiotap version + pad byte
+ 0x0b, 0x00, // <- radiotap header length
+ 0x04, 0x0c, 0x00, 0x00, // <-- bitmap
+ 0x6c, // <-- rate (in 500kHz units)
+ 0x0c, //<-- tx power
+ 0x01 //<-- antenna
+
+
+Using the Radiotap Parser
+-------------------------
+
+If you are having to parse a radiotap struct, you can radically simplify the
+job by using the radiotap parser that lives in net/wireless/radiotap.c and has
+its prototypes available in include/net/cfg80211.h. You use it like this:
+
+#include <net/cfg80211.h>
+
+/* buf points to the start of the radiotap header part */
+
+int MyFunction(u8 * buf, int buflen)
+{
+ int pkt_rate_100kHz = 0, antenna = 0, pwr = 0;
+ struct ieee80211_radiotap_iterator iterator;
+ int ret = ieee80211_radiotap_iterator_init(&iterator, buf, buflen);
+
+ while (!ret) {
+
+ ret = ieee80211_radiotap_iterator_next(&iterator);
+
+ if (ret)
+ continue;
+
+ /* see if this argument is something we can use */
+
+ switch (iterator.this_arg_index) {
+ /*
+ * You must take care when dereferencing iterator.this_arg
+ * for multibyte types... the pointer is not aligned. Use
+ * get_unaligned((type *)iterator.this_arg) to dereference
+ * iterator.this_arg for type "type" safely on all arches.
+ */
+ case IEEE80211_RADIOTAP_RATE:
+ /* radiotap "rate" u8 is in
+ * 500kbps units, eg, 0x02=1Mbps
+ */
+ pkt_rate_100kHz = (*iterator.this_arg) * 5;
+ break;
+
+ case IEEE80211_RADIOTAP_ANTENNA:
+ /* radiotap uses 0 for 1st ant */
+ antenna = *iterator.this_arg);
+ break;
+
+ case IEEE80211_RADIOTAP_DBM_TX_POWER:
+ pwr = *iterator.this_arg;
+ break;
+
+ default:
+ break;
+ }
+ } /* while more rt headers */
+
+ if (ret != -ENOENT)
+ return TXRX_DROP;
+
+ /* discard the radiotap header part */
+ buf += iterator.max_length;
+ buflen -= iterator.max_length;
+
+ ...
+
+}
+
+Andy Green <andy@warmcat.com>
diff --git a/Documentation/networking/ray_cs.txt b/Documentation/networking/ray_cs.txt
new file mode 100644
index 000000000..c0c12307e
--- /dev/null
+++ b/Documentation/networking/ray_cs.txt
@@ -0,0 +1,150 @@
+September 21, 1999
+
+Copyright (c) 1998 Corey Thomas (corey@world.std.com)
+
+This file is the documentation for the Raylink Wireless LAN card driver for
+Linux. The Raylink wireless LAN card is a PCMCIA card which provides IEEE
+802.11 compatible wireless network connectivity at 1 and 2 megabits/second.
+See http://www.raytheon.com/micro/raylink/ for more information on the Raylink
+card. This driver is in early development and does have bugs. See the known
+bugs and limitations at the end of this document for more information.
+This driver also works with WebGear's Aviator 2.4 and Aviator Pro
+wireless LAN cards.
+
+As of kernel 2.3.18, the ray_cs driver is part of the Linux kernel
+source. My web page for the development of ray_cs is at
+http://web.ralinktech.com/ralink/Home/Support/Linux.html
+and I can be emailed at corey@world.std.com
+
+The kernel driver is based on ray_cs-1.62.tgz
+
+The driver at my web page is intended to be used as an add on to
+David Hinds pcmcia package. All the command line parameters are
+available when compiled as a module. When built into the kernel, only
+the essid= string parameter is available via the kernel command line.
+This will change after the method of sorting out parameters for all
+the PCMCIA drivers is agreed upon. If you must have a built in driver
+with nondefault parameters, they can be edited in
+/usr/src/linux/drivers/net/pcmcia/ray_cs.c. Searching for module_param
+will find them all.
+
+Information on card services is available at:
+ http://pcmcia-cs.sourceforge.net/
+
+
+Card services user programs are still required for PCMCIA devices.
+pcmcia-cs-3.1.1 or greater is required for the kernel version of
+the driver.
+
+Currently, ray_cs is not part of David Hinds card services package,
+so the following magic is required.
+
+At the end of the /etc/pcmcia/config.opts file, add the line:
+source ./ray_cs.opts
+This will make card services read the ray_cs.opts file
+when starting. Create the file /etc/pcmcia/ray_cs.opts containing the
+following:
+
+#### start of /etc/pcmcia/ray_cs.opts ###################
+# Configuration options for Raylink Wireless LAN PCMCIA card
+device "ray_cs"
+ class "network" module "misc/ray_cs"
+
+card "RayLink PC Card WLAN Adapter"
+ manfid 0x01a6, 0x0000
+ bind "ray_cs"
+
+module "misc/ray_cs" opts ""
+#### end of /etc/pcmcia/ray_cs.opts #####################
+
+
+To join an existing network with
+different parameters, contact the network administrator for the
+configuration information, and edit /etc/pcmcia/ray_cs.opts.
+Add the parameters below between the empty quotes.
+
+Parameters for ray_cs driver which may be specified in ray_cs.opts:
+
+bc integer 0 = normal mode (802.11 timing)
+ 1 = slow down inter frame timing to allow
+ operation with older breezecom access
+ points.
+
+beacon_period integer beacon period in Kilo-microseconds
+ legal values = must be integer multiple
+ of hop dwell
+ default = 256
+
+country integer 1 = USA (default)
+ 2 = Europe
+ 3 = Japan
+ 4 = Korea
+ 5 = Spain
+ 6 = France
+ 7 = Israel
+ 8 = Australia
+
+essid string ESS ID - network name to join
+ string with maximum length of 32 chars
+ default value = "ADHOC_ESSID"
+
+hop_dwell integer hop dwell time in Kilo-microseconds
+ legal values = 16,32,64,128(default),256
+
+irq_mask integer linux standard 16 bit value 1bit/IRQ
+ lsb is IRQ 0, bit 1 is IRQ 1 etc.
+ Used to restrict choice of IRQ's to use.
+ Recommended method for controlling
+ interrupts is in /etc/pcmcia/config.opts
+
+net_type integer 0 (default) = adhoc network,
+ 1 = infrastructure
+
+phy_addr string string containing new MAC address in
+ hex, must start with x eg
+ x00008f123456
+
+psm integer 0 = continuously active
+ 1 = power save mode (not useful yet)
+
+pc_debug integer (0-5) larger values for more verbose
+ logging. Replaces ray_debug.
+
+ray_debug integer Replaced with pc_debug
+
+ray_mem_speed integer defaults to 500
+
+sniffer integer 0 = not sniffer (default)
+ 1 = sniffer which can be used to record all
+ network traffic using tcpdump or similar,
+ but no normal network use is allowed.
+
+translate integer 0 = no translation (encapsulate frames)
+ 1 = translation (RFC1042/802.1)
+
+
+More on sniffer mode:
+
+tcpdump does not understand 802.11 headers, so it can't
+interpret the contents, but it can record to a file. This is only
+useful for debugging 802.11 lowlevel protocols that are not visible to
+linux. If you want to watch ftp xfers, or do similar things, you
+don't need to use sniffer mode. Also, some packet types are never
+sent up by the card, so you will never see them (ack, rts, cts, probe
+etc.) There is a simple program (showcap) included in the ray_cs
+package which parses the 802.11 headers.
+
+Known Problems and missing features
+
+ Does not work with non x86
+
+ Does not work with SMP
+
+ Support for defragmenting frames is not yet debugged, and in
+ fact is known to not work. I have never encountered a net set
+ up to fragment, but still, it should be fixed.
+
+ The ioctl support is incomplete. The hardware address cannot be set
+ using ifconfig yet. If a different hardware address is needed, it may
+ be set using the phy_addr parameter in ray_cs.opts. This requires
+ a card insertion to take effect.
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
new file mode 100644
index 000000000..e1a3d59bb
--- /dev/null
+++ b/Documentation/networking/rds.txt
@@ -0,0 +1,355 @@
+
+Overview
+========
+
+This readme tries to provide some background on the hows and whys of RDS,
+and will hopefully help you find your way around the code.
+
+In addition, please see this email about RDS origins:
+http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
+
+RDS Architecture
+================
+
+RDS provides reliable, ordered datagram delivery by using a single
+reliable connection between any two nodes in the cluster. This allows
+applications to use a single socket to talk to any other process in the
+cluster - so in a cluster with N processes you need N sockets, in contrast
+to N*N if you use a connection-oriented socket transport like TCP.
+
+RDS is not Infiniband-specific; it was designed to support different
+transports. The current implementation used to support RDS over TCP as well
+as IB. Work is in progress to support RDS over iWARP, and using DCE to
+guarantee no dropped packets on Ethernet, it may be possible to use RDS over
+UDP in the future.
+
+The high-level semantics of RDS from the application's point of view are
+
+ * Addressing
+ RDS uses IPv4 addresses and 16bit port numbers to identify
+ the end point of a connection. All socket operations that involve
+ passing addresses between kernel and user space generally
+ use a struct sockaddr_in.
+
+ The fact that IPv4 addresses are used does not mean the underlying
+ transport has to be IP-based. In fact, RDS over IB uses a
+ reliable IB connection; the IP address is used exclusively to
+ locate the remote node's GID (by ARPing for the given IP).
+
+ The port space is entirely independent of UDP, TCP or any other
+ protocol.
+
+ * Socket interface
+ RDS sockets work *mostly* as you would expect from a BSD
+ socket. The next section will cover the details. At any rate,
+ all I/O is performed through the standard BSD socket API.
+ Some additions like zerocopy support are implemented through
+ control messages, while other extensions use the getsockopt/
+ setsockopt calls.
+
+ Sockets must be bound before you can send or receive data.
+ This is needed because binding also selects a transport and
+ attaches it to the socket. Once bound, the transport assignment
+ does not change. RDS will tolerate IPs moving around (eg in
+ a active-active HA scenario), but only as long as the address
+ doesn't move to a different transport.
+
+ * sysctls
+ RDS supports a number of sysctls in /proc/sys/net/rds
+
+
+Socket Interface
+================
+
+ AF_RDS, PF_RDS, SOL_RDS
+ AF_RDS and PF_RDS are the domain type to be used with socket(2)
+ to create RDS sockets. SOL_RDS is the socket-level to be used
+ with setsockopt(2) and getsockopt(2) for RDS specific socket
+ options.
+
+ fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+ This creates a new, unbound RDS socket.
+
+ setsockopt(SOL_SOCKET): send and receive buffer size
+ RDS honors the send and receive buffer size socket options.
+ You are not allowed to queue more than SO_SNDSIZE bytes to
+ a socket. A message is queued when sendmsg is called, and
+ it leaves the queue when the remote system acknowledges
+ its arrival.
+
+ The SO_RCVSIZE option controls the maximum receive queue length.
+ This is a soft limit rather than a hard limit - RDS will
+ continue to accept and queue incoming messages, even if that
+ takes the queue length over the limit. However, it will also
+ mark the port as "congested" and send a congestion update to
+ the source node. The source node is supposed to throttle any
+ processes sending to this congested port.
+
+ bind(fd, &sockaddr_in, ...)
+ This binds the socket to a local IP address and port, and a
+ transport.
+
+ sendmsg(fd, ...)
+ Sends a message to the indicated recipient. The kernel will
+ transparently establish the underlying reliable connection
+ if it isn't up yet.
+
+ An attempt to send a message that exceeds SO_SNDSIZE will
+ return with -EMSGSIZE
+
+ An attempt to send a message that would take the total number
+ of queued bytes over the SO_SNDSIZE threshold will return
+ EAGAIN.
+
+ An attempt to send a message to a destination that is marked
+ as "congested" will return ENOBUFS.
+
+ recvmsg(fd, ...)
+ Receives a message that was queued to this socket. The sockets
+ recv queue accounting is adjusted, and if the queue length
+ drops below SO_SNDSIZE, the port is marked uncongested, and
+ a congestion update is sent to all peers.
+
+ Applications can ask the RDS kernel module to receive
+ notifications via control messages (for instance, there is a
+ notification when a congestion update arrived, or when a RDMA
+ operation completes). These notifications are received through
+ the msg.msg_control buffer of struct msghdr. The format of the
+ messages is described in manpages.
+
+ poll(fd)
+ RDS supports the poll interface to allow the application
+ to implement async I/O.
+
+ POLLIN handling is pretty straightforward. When there's an
+ incoming message queued to the socket, or a pending notification,
+ we signal POLLIN.
+
+ POLLOUT is a little harder. Since you can essentially send
+ to any destination, RDS will always signal POLLOUT as long as
+ there's room on the send queue (ie the number of bytes queued
+ is less than the sendbuf size).
+
+ However, the kernel will refuse to accept messages to
+ a destination marked congested - in this case you will loop
+ forever if you rely on poll to tell you what to do.
+ This isn't a trivial problem, but applications can deal with
+ this - by using congestion notifications, and by checking for
+ ENOBUFS errors returned by sendmsg.
+
+ setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
+ This allows the application to discard all messages queued to a
+ specific destination on this particular socket.
+
+ This allows the application to cancel outstanding messages if
+ it detects a timeout. For instance, if it tried to send a message,
+ and the remote host is unreachable, RDS will keep trying forever.
+ The application may decide it's not worth it, and cancel the
+ operation. In this case, it would use RDS_CANCEL_SENT_TO to
+ nuke any pending messages.
+
+
+RDMA for RDS
+============
+
+ see rds-rdma(7) manpage (available in rds-tools)
+
+
+Congestion Notifications
+========================
+
+ see rds(7) manpage
+
+
+RDS Protocol
+============
+
+ Message header
+
+ The message header is a 'struct rds_header' (see rds.h):
+ Fields:
+ h_sequence:
+ per-packet sequence number
+ h_ack:
+ piggybacked acknowledgment of last packet received
+ h_len:
+ length of data, not including header
+ h_sport:
+ source port
+ h_dport:
+ destination port
+ h_flags:
+ CONG_BITMAP - this is a congestion update bitmap
+ ACK_REQUIRED - receiver must ack this packet
+ RETRANSMITTED - packet has previously been sent
+ h_credit:
+ indicate to other end of connection that
+ it has more credits available (i.e. there is
+ more send room)
+ h_padding[4]:
+ unused, for future use
+ h_csum:
+ header checksum
+ h_exthdr:
+ optional data can be passed here. This is currently used for
+ passing RDMA-related information.
+
+ ACK and retransmit handling
+
+ One might think that with reliable IB connections you wouldn't need
+ to ack messages that have been received. The problem is that IB
+ hardware generates an ack message before it has DMAed the message
+ into memory. This creates a potential message loss if the HCA is
+ disabled for any reason between when it sends the ack and before
+ the message is DMAed and processed. This is only a potential issue
+ if another HCA is available for fail-over.
+
+ Sending an ack immediately would allow the sender to free the sent
+ message from their send queue quickly, but could cause excessive
+ traffic to be used for acks. RDS piggybacks acks on sent data
+ packets. Ack-only packets are reduced by only allowing one to be
+ in flight at a time, and by the sender only asking for acks when
+ its send buffers start to fill up. All retransmissions are also
+ acked.
+
+ Flow Control
+
+ RDS's IB transport uses a credit-based mechanism to verify that
+ there is space in the peer's receive buffers for more data. This
+ eliminates the need for hardware retries on the connection.
+
+ Congestion
+
+ Messages waiting in the receive queue on the receiving socket
+ are accounted against the sockets SO_RCVBUF option value. Only
+ the payload bytes in the message are accounted for. If the
+ number of bytes queued equals or exceeds rcvbuf then the socket
+ is congested. All sends attempted to this socket's address
+ should return block or return -EWOULDBLOCK.
+
+ Applications are expected to be reasonably tuned such that this
+ situation very rarely occurs. An application encountering this
+ "back-pressure" is considered a bug.
+
+ This is implemented by having each node maintain bitmaps which
+ indicate which ports on bound addresses are congested. As the
+ bitmap changes it is sent through all the connections which
+ terminate in the local address of the bitmap which changed.
+
+ The bitmaps are allocated as connections are brought up. This
+ avoids allocation in the interrupt handling path which queues
+ sages on sockets. The dense bitmaps let transports send the
+ entire bitmap on any bitmap change reasonably efficiently. This
+ is much easier to implement than some finer-grained
+ communication of per-port congestion. The sender does a very
+ inexpensive bit test to test if the port it's about to send to
+ is congested or not.
+
+
+RDS Transport Layer
+==================
+
+ As mentioned above, RDS is not IB-specific. Its code is divided
+ into a general RDS layer and a transport layer.
+
+ The general layer handles the socket API, congestion handling,
+ loopback, stats, usermem pinning, and the connection state machine.
+
+ The transport layer handles the details of the transport. The IB
+ transport, for example, handles all the queue pairs, work requests,
+ CM event handlers, and other Infiniband details.
+
+
+RDS Kernel Structures
+=====================
+
+ struct rds_message
+ aka possibly "rds_outgoing", the generic RDS layer copies data to
+ be sent and sets header fields as needed, based on the socket API.
+ This is then queued for the individual connection and sent by the
+ connection's transport.
+ struct rds_incoming
+ a generic struct referring to incoming data that can be handed from
+ the transport to the general code and queued by the general code
+ while the socket is awoken. It is then passed back to the transport
+ code to handle the actual copy-to-user.
+ struct rds_socket
+ per-socket information
+ struct rds_connection
+ per-connection information
+ struct rds_transport
+ pointers to transport-specific functions
+ struct rds_statistics
+ non-transport-specific statistics
+ struct rds_cong_map
+ wraps the raw congestion bitmap, contains rbnode, waitq, etc.
+
+Connection management
+=====================
+
+ Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
+ ERROR states.
+
+ The first time an attempt is made by an RDS socket to send data to
+ a node, a connection is allocated and connected. That connection is
+ then maintained forever -- if there are transport errors, the
+ connection will be dropped and re-established.
+
+ Dropping a connection while packets are queued will cause queued or
+ partially-sent datagrams to be retransmitted when the connection is
+ re-established.
+
+
+The send path
+=============
+
+ rds_sendmsg()
+ struct rds_message built from incoming data
+ CMSGs parsed (e.g. RDMA ops)
+ transport connection alloced and connected if not already
+ rds_message placed on send queue
+ send worker awoken
+ rds_send_worker()
+ calls rds_send_xmit() until queue is empty
+ rds_send_xmit()
+ transmits congestion map if one is pending
+ may set ACK_REQUIRED
+ calls transport to send either non-RDMA or RDMA message
+ (RDMA ops never retransmitted)
+ rds_ib_xmit()
+ allocs work requests from send ring
+ adds any new send credits available to peer (h_credits)
+ maps the rds_message's sg list
+ piggybacks ack
+ populates work requests
+ post send to connection's queue pair
+
+The recv path
+=============
+
+ rds_ib_recv_cq_comp_handler()
+ looks at write completions
+ unmaps recv buffer from device
+ no errors, call rds_ib_process_recv()
+ refill recv ring
+ rds_ib_process_recv()
+ validate header checksum
+ copy header to rds_ib_incoming struct if start of a new datagram
+ add to ibinc's fraglist
+ if competed datagram:
+ update cong map if datagram was cong update
+ call rds_recv_incoming() otherwise
+ note if ack is required
+ rds_recv_incoming()
+ drop duplicate packets
+ respond to pings
+ find the sock associated with this datagram
+ add to sock queue
+ wake up sock
+ do some congestion calculations
+ rds_recvmsg
+ copy data into user iovec
+ handle CMSGs
+ return to application
+
+
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
new file mode 100644
index 000000000..356f791af
--- /dev/null
+++ b/Documentation/networking/regulatory.txt
@@ -0,0 +1,214 @@
+Linux wireless regulatory documentation
+---------------------------------------
+
+This document gives a brief review over how the Linux wireless
+regulatory infrastructure works.
+
+More up to date information can be obtained at the project's web page:
+
+http://wireless.kernel.org/en/developers/Regulatory
+
+Keeping regulatory domains in userspace
+---------------------------------------
+
+Due to the dynamic nature of regulatory domains we keep them
+in userspace and provide a framework for userspace to upload
+to the kernel one regulatory domain to be used as the central
+core regulatory domain all wireless devices should adhere to.
+
+How to get regulatory domains to the kernel
+-------------------------------------------
+
+Userspace gets a regulatory domain in the kernel by having
+a userspace agent build it and send it via nl80211. Only
+expected regulatory domains will be respected by the kernel.
+
+A currently available userspace agent which can accomplish this
+is CRDA - central regulatory domain agent. Its documented here:
+
+http://wireless.kernel.org/en/developers/Regulatory/CRDA
+
+Essentially the kernel will send a udev event when it knows
+it needs a new regulatory domain. A udev rule can be put in place
+to trigger crda to send the respective regulatory domain for a
+specific ISO/IEC 3166 alpha2.
+
+Below is an example udev rule which can be used:
+
+# Example file, should be put in /etc/udev/rules.d/regulatory.rules
+KERNEL=="regulatory*", ACTION=="change", SUBSYSTEM=="platform", RUN+="/sbin/crda"
+
+The alpha2 is passed as an environment variable under the variable COUNTRY.
+
+Who asks for regulatory domains?
+--------------------------------
+
+* Users
+
+Users can use iw:
+
+http://wireless.kernel.org/en/users/Documentation/iw
+
+An example:
+
+ # set regulatory domain to "Costa Rica"
+ iw reg set CR
+
+This will request the kernel to set the regulatory domain to
+the specificied alpha2. The kernel in turn will then ask userspace
+to provide a regulatory domain for the alpha2 specified by the user
+by sending a uevent.
+
+* Wireless subsystems for Country Information elements
+
+The kernel will send a uevent to inform userspace a new
+regulatory domain is required. More on this to be added
+as its integration is added.
+
+* Drivers
+
+If drivers determine they need a specific regulatory domain
+set they can inform the wireless core using regulatory_hint().
+They have two options -- they either provide an alpha2 so that
+crda can provide back a regulatory domain for that country or
+they can build their own regulatory domain based on internal
+custom knowledge so the wireless core can respect it.
+
+*Most* drivers will rely on the first mechanism of providing a
+regulatory hint with an alpha2. For these drivers there is an additional
+check that can be used to ensure compliance based on custom EEPROM
+regulatory data. This additional check can be used by drivers by
+registering on its struct wiphy a reg_notifier() callback. This notifier
+is called when the core's regulatory domain has been changed. The driver
+can use this to review the changes made and also review who made them
+(driver, user, country IE) and determine what to allow based on its
+internal EEPROM data. Devices drivers wishing to be capable of world
+roaming should use this callback. More on world roaming will be
+added to this document when its support is enabled.
+
+Device drivers who provide their own built regulatory domain
+do not need a callback as the channels registered by them are
+the only ones that will be allowed and therefore *additional*
+channels cannot be enabled.
+
+Example code - drivers hinting an alpha2:
+------------------------------------------
+
+This example comes from the zd1211rw device driver. You can start
+by having a mapping of your device's EEPROM country/regulatory
+domain value to a specific alpha2 as follows:
+
+static struct zd_reg_alpha2_map reg_alpha2_map[] = {
+ { ZD_REGDOMAIN_FCC, "US" },
+ { ZD_REGDOMAIN_IC, "CA" },
+ { ZD_REGDOMAIN_ETSI, "DE" }, /* Generic ETSI, use most restrictive */
+ { ZD_REGDOMAIN_JAPAN, "JP" },
+ { ZD_REGDOMAIN_JAPAN_ADD, "JP" },
+ { ZD_REGDOMAIN_SPAIN, "ES" },
+ { ZD_REGDOMAIN_FRANCE, "FR" },
+
+Then you can define a routine to map your read EEPROM value to an alpha2,
+as follows:
+
+static int zd_reg2alpha2(u8 regdomain, char *alpha2)
+{
+ unsigned int i;
+ struct zd_reg_alpha2_map *reg_map;
+ for (i = 0; i < ARRAY_SIZE(reg_alpha2_map); i++) {
+ reg_map = &reg_alpha2_map[i];
+ if (regdomain == reg_map->reg) {
+ alpha2[0] = reg_map->alpha2[0];
+ alpha2[1] = reg_map->alpha2[1];
+ return 0;
+ }
+ }
+ return 1;
+}
+
+Lastly, you can then hint to the core of your discovered alpha2, if a match
+was found. You need to do this after you have registered your wiphy. You
+are expected to do this during initialization.
+
+ r = zd_reg2alpha2(mac->regdomain, alpha2);
+ if (!r)
+ regulatory_hint(hw->wiphy, alpha2);
+
+Example code - drivers providing a built in regulatory domain:
+--------------------------------------------------------------
+
+[NOTE: This API is not currently available, it can be added when required]
+
+If you have regulatory information you can obtain from your
+driver and you *need* to use this we let you build a regulatory domain
+structure and pass it to the wireless core. To do this you should
+kmalloc() a structure big enough to hold your regulatory domain
+structure and you should then fill it with your data. Finally you simply
+call regulatory_hint() with the regulatory domain structure in it.
+
+Bellow is a simple example, with a regulatory domain cached using the stack.
+Your implementation may vary (read EEPROM cache instead, for example).
+
+Example cache of some regulatory domain
+
+struct ieee80211_regdomain mydriver_jp_regdom = {
+ .n_reg_rules = 3,
+ .alpha2 = "JP",
+ //.alpha2 = "99", /* If I have no alpha2 to map it to */
+ .reg_rules = {
+ /* IEEE 802.11b/g, channels 1..14 */
+ REG_RULE(2412-20, 2484+20, 40, 6, 20, 0),
+ /* IEEE 802.11a, channels 34..48 */
+ REG_RULE(5170-20, 5240+20, 40, 6, 20,
+ NL80211_RRF_NO_IR),
+ /* IEEE 802.11a, channels 52..64 */
+ REG_RULE(5260-20, 5320+20, 40, 6, 20,
+ NL80211_RRF_NO_IR|
+ NL80211_RRF_DFS),
+ }
+};
+
+Then in some part of your code after your wiphy has been registered:
+
+ struct ieee80211_regdomain *rd;
+ int size_of_regd;
+ int num_rules = mydriver_jp_regdom.n_reg_rules;
+ unsigned int i;
+
+ size_of_regd = sizeof(struct ieee80211_regdomain) +
+ (num_rules * sizeof(struct ieee80211_reg_rule));
+
+ rd = kzalloc(size_of_regd, GFP_KERNEL);
+ if (!rd)
+ return -ENOMEM;
+
+ memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
+
+ for (i=0; i < num_rules; i++)
+ memcpy(&rd->reg_rules[i],
+ &mydriver_jp_regdom.reg_rules[i],
+ sizeof(struct ieee80211_reg_rule));
+ regulatory_struct_hint(rd);
+
+Statically compiled regulatory database
+---------------------------------------
+
+In most situations the userland solution using CRDA as described
+above is the preferred solution. However in some cases a set of
+rules built into the kernel itself may be desirable. To account
+for this situation, a configuration option has been provided
+(i.e. CONFIG_CFG80211_INTERNAL_REGDB). With this option enabled,
+the wireless database information contained in net/wireless/db.txt is
+used to generate a data structure encoded in net/wireless/regdb.c.
+That option also enables code in net/wireless/reg.c which queries
+the data in regdb.c as an alternative to using CRDA.
+
+The file net/wireless/db.txt should be kept up-to-date with the db.txt
+file available in the git repository here:
+
+ git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git
+
+Again, most users in most situations should be using the CRDA package
+provided with their distribution, and in most other situations users
+should be building and using CRDA on their own rather than using
+this option. If you are not absolutely sure that you should be using
+CONFIG_CFG80211_INTERNAL_REGDB then _DO_NOT_USE_IT_.
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
new file mode 100644
index 000000000..16a924c48
--- /dev/null
+++ b/Documentation/networking/rxrpc.txt
@@ -0,0 +1,947 @@
+ ======================
+ RxRPC NETWORK PROTOCOL
+ ======================
+
+The RxRPC protocol driver provides a reliable two-phase transport on top of UDP
+that can be used to perform RxRPC remote operations. This is done over sockets
+of AF_RXRPC family, using sendmsg() and recvmsg() with control data to send and
+receive data, aborts and errors.
+
+Contents of this document:
+
+ (*) Overview.
+
+ (*) RxRPC protocol summary.
+
+ (*) AF_RXRPC driver model.
+
+ (*) Control messages.
+
+ (*) Socket options.
+
+ (*) Security.
+
+ (*) Example client usage.
+
+ (*) Example server usage.
+
+ (*) AF_RXRPC kernel interface.
+
+ (*) Configurable parameters.
+
+
+========
+OVERVIEW
+========
+
+RxRPC is a two-layer protocol. There is a session layer which provides
+reliable virtual connections using UDP over IPv4 (or IPv6) as the transport
+layer, but implements a real network protocol; and there's the presentation
+layer which renders structured data to binary blobs and back again using XDR
+(as does SunRPC):
+
+ +-------------+
+ | Application |
+ +-------------+
+ | XDR | Presentation
+ +-------------+
+ | RxRPC | Session
+ +-------------+
+ | UDP | Transport
+ +-------------+
+
+
+AF_RXRPC provides:
+
+ (1) Part of an RxRPC facility for both kernel and userspace applications by
+ making the session part of it a Linux network protocol (AF_RXRPC).
+
+ (2) A two-phase protocol. The client transmits a blob (the request) and then
+ receives a blob (the reply), and the server receives the request and then
+ transmits the reply.
+
+ (3) Retention of the reusable bits of the transport system set up for one call
+ to speed up subsequent calls.
+
+ (4) A secure protocol, using the Linux kernel's key retention facility to
+ manage security on the client end. The server end must of necessity be
+ more active in security negotiations.
+
+AF_RXRPC does not provide XDR marshalling/presentation facilities. That is
+left to the application. AF_RXRPC only deals in blobs. Even the operation ID
+is just the first four bytes of the request blob, and as such is beyond the
+kernel's interest.
+
+
+Sockets of AF_RXRPC family are:
+
+ (1) created as type SOCK_DGRAM;
+
+ (2) provided with a protocol of the type of underlying transport they're going
+ to use - currently only PF_INET is supported.
+
+
+The Andrew File System (AFS) is an example of an application that uses this and
+that has both kernel (filesystem) and userspace (utility) components.
+
+
+======================
+RXRPC PROTOCOL SUMMARY
+======================
+
+An overview of the RxRPC protocol:
+
+ (*) RxRPC sits on top of another networking protocol (UDP is the only option
+ currently), and uses this to provide network transport. UDP ports, for
+ example, provide transport endpoints.
+
+ (*) RxRPC supports multiple virtual "connections" from any given transport
+ endpoint, thus allowing the endpoints to be shared, even to the same
+ remote endpoint.
+
+ (*) Each connection goes to a particular "service". A connection may not go
+ to multiple services. A service may be considered the RxRPC equivalent of
+ a port number. AF_RXRPC permits multiple services to share an endpoint.
+
+ (*) Client-originating packets are marked, thus a transport endpoint can be
+ shared between client and server connections (connections have a
+ direction).
+
+ (*) Up to a billion connections may be supported concurrently between one
+ local transport endpoint and one service on one remote endpoint. An RxRPC
+ connection is described by seven numbers:
+
+ Local address }
+ Local port } Transport (UDP) address
+ Remote address }
+ Remote port }
+ Direction
+ Connection ID
+ Service ID
+
+ (*) Each RxRPC operation is a "call". A connection may make up to four
+ billion calls, but only up to four calls may be in progress on a
+ connection at any one time.
+
+ (*) Calls are two-phase and asymmetric: the client sends its request data,
+ which the service receives; then the service transmits the reply data
+ which the client receives.
+
+ (*) The data blobs are of indefinite size, the end of a phase is marked with a
+ flag in the packet. The number of packets of data making up one blob may
+ not exceed 4 billion, however, as this would cause the sequence number to
+ wrap.
+
+ (*) The first four bytes of the request data are the service operation ID.
+
+ (*) Security is negotiated on a per-connection basis. The connection is
+ initiated by the first data packet on it arriving. If security is
+ requested, the server then issues a "challenge" and then the client
+ replies with a "response". If the response is successful, the security is
+ set for the lifetime of that connection, and all subsequent calls made
+ upon it use that same security. In the event that the server lets a
+ connection lapse before the client, the security will be renegotiated if
+ the client uses the connection again.
+
+ (*) Calls use ACK packets to handle reliability. Data packets are also
+ explicitly sequenced per call.
+
+ (*) There are two types of positive acknowledgment: hard-ACKs and soft-ACKs.
+ A hard-ACK indicates to the far side that all the data received to a point
+ has been received and processed; a soft-ACK indicates that the data has
+ been received but may yet be discarded and re-requested. The sender may
+ not discard any transmittable packets until they've been hard-ACK'd.
+
+ (*) Reception of a reply data packet implicitly hard-ACK's all the data
+ packets that make up the request.
+
+ (*) An call is complete when the request has been sent, the reply has been
+ received and the final hard-ACK on the last packet of the reply has
+ reached the server.
+
+ (*) An call may be aborted by either end at any time up to its completion.
+
+
+=====================
+AF_RXRPC DRIVER MODEL
+=====================
+
+About the AF_RXRPC driver:
+
+ (*) The AF_RXRPC protocol transparently uses internal sockets of the transport
+ protocol to represent transport endpoints.
+
+ (*) AF_RXRPC sockets map onto RxRPC connection bundles. Actual RxRPC
+ connections are handled transparently. One client socket may be used to
+ make multiple simultaneous calls to the same service. One server socket
+ may handle calls from many clients.
+
+ (*) Additional parallel client connections will be initiated to support extra
+ concurrent calls, up to a tunable limit.
+
+ (*) Each connection is retained for a certain amount of time [tunable] after
+ the last call currently using it has completed in case a new call is made
+ that could reuse it.
+
+ (*) Each internal UDP socket is retained [tunable] for a certain amount of
+ time [tunable] after the last connection using it discarded, in case a new
+ connection is made that could use it.
+
+ (*) A client-side connection is only shared between calls if they have have
+ the same key struct describing their security (and assuming the calls
+ would otherwise share the connection). Non-secured calls would also be
+ able to share connections with each other.
+
+ (*) A server-side connection is shared if the client says it is.
+
+ (*) ACK'ing is handled by the protocol driver automatically, including ping
+ replying.
+
+ (*) SO_KEEPALIVE automatically pings the other side to keep the connection
+ alive [TODO].
+
+ (*) If an ICMP error is received, all calls affected by that error will be
+ aborted with an appropriate network error passed through recvmsg().
+
+
+Interaction with the user of the RxRPC socket:
+
+ (*) A socket is made into a server socket by binding an address with a
+ non-zero service ID.
+
+ (*) In the client, sending a request is achieved with one or more sendmsgs,
+ followed by the reply being received with one or more recvmsgs.
+
+ (*) The first sendmsg for a request to be sent from a client contains a tag to
+ be used in all other sendmsgs or recvmsgs associated with that call. The
+ tag is carried in the control data.
+
+ (*) connect() is used to supply a default destination address for a client
+ socket. This may be overridden by supplying an alternate address to the
+ first sendmsg() of a call (struct msghdr::msg_name).
+
+ (*) If connect() is called on an unbound client, a random local port will
+ bound before the operation takes place.
+
+ (*) A server socket may also be used to make client calls. To do this, the
+ first sendmsg() of the call must specify the target address. The server's
+ transport endpoint is used to send the packets.
+
+ (*) Once the application has received the last message associated with a call,
+ the tag is guaranteed not to be seen again, and so it can be used to pin
+ client resources. A new call can then be initiated with the same tag
+ without fear of interference.
+
+ (*) In the server, a request is received with one or more recvmsgs, then the
+ the reply is transmitted with one or more sendmsgs, and then the final ACK
+ is received with a last recvmsg.
+
+ (*) When sending data for a call, sendmsg is given MSG_MORE if there's more
+ data to come on that call.
+
+ (*) When receiving data for a call, recvmsg flags MSG_MORE if there's more
+ data to come for that call.
+
+ (*) When receiving data or messages for a call, MSG_EOR is flagged by recvmsg
+ to indicate the terminal message for that call.
+
+ (*) A call may be aborted by adding an abort control message to the control
+ data. Issuing an abort terminates the kernel's use of that call's tag.
+ Any messages waiting in the receive queue for that call will be discarded.
+
+ (*) Aborts, busy notifications and challenge packets are delivered by recvmsg,
+ and control data messages will be set to indicate the context. Receiving
+ an abort or a busy message terminates the kernel's use of that call's tag.
+
+ (*) The control data part of the msghdr struct is used for a number of things:
+
+ (*) The tag of the intended or affected call.
+
+ (*) Sending or receiving errors, aborts and busy notifications.
+
+ (*) Notifications of incoming calls.
+
+ (*) Sending debug requests and receiving debug replies [TODO].
+
+ (*) When the kernel has received and set up an incoming call, it sends a
+ message to server application to let it know there's a new call awaiting
+ its acceptance [recvmsg reports a special control message]. The server
+ application then uses sendmsg to assign a tag to the new call. Once that
+ is done, the first part of the request data will be delivered by recvmsg.
+
+ (*) The server application has to provide the server socket with a keyring of
+ secret keys corresponding to the security types it permits. When a secure
+ connection is being set up, the kernel looks up the appropriate secret key
+ in the keyring and then sends a challenge packet to the client and
+ receives a response packet. The kernel then checks the authorisation of
+ the packet and either aborts the connection or sets up the security.
+
+ (*) The name of the key a client will use to secure its communications is
+ nominated by a socket option.
+
+
+Notes on recvmsg:
+
+ (*) If there's a sequence of data messages belonging to a particular call on
+ the receive queue, then recvmsg will keep working through them until:
+
+ (a) it meets the end of that call's received data,
+
+ (b) it meets a non-data message,
+
+ (c) it meets a message belonging to a different call, or
+
+ (d) it fills the user buffer.
+
+ If recvmsg is called in blocking mode, it will keep sleeping, awaiting the
+ reception of further data, until one of the above four conditions is met.
+
+ (2) MSG_PEEK operates similarly, but will return immediately if it has put any
+ data in the buffer rather than sleeping until it can fill the buffer.
+
+ (3) If a data message is only partially consumed in filling a user buffer,
+ then the remainder of that message will be left on the front of the queue
+ for the next taker. MSG_TRUNC will never be flagged.
+
+ (4) If there is more data to be had on a call (it hasn't copied the last byte
+ of the last data message in that phase yet), then MSG_MORE will be
+ flagged.
+
+
+================
+CONTROL MESSAGES
+================
+
+AF_RXRPC makes use of control messages in sendmsg() and recvmsg() to multiplex
+calls, to invoke certain actions and to report certain conditions. These are:
+
+ MESSAGE ID SRT DATA MEANING
+ ======================= === =========== ===============================
+ RXRPC_USER_CALL_ID sr- User ID App's call specifier
+ RXRPC_ABORT srt Abort code Abort code to issue/received
+ RXRPC_ACK -rt n/a Final ACK received
+ RXRPC_NET_ERROR -rt error num Network error on call
+ RXRPC_BUSY -rt n/a Call rejected (server busy)
+ RXRPC_LOCAL_ERROR -rt error num Local error encountered
+ RXRPC_NEW_CALL -r- n/a New call received
+ RXRPC_ACCEPT s-- n/a Accept new call
+
+ (SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
+
+ (*) RXRPC_USER_CALL_ID
+
+ This is used to indicate the application's call ID. It's an unsigned long
+ that the app specifies in the client by attaching it to the first data
+ message or in the server by passing it in association with an RXRPC_ACCEPT
+ message. recvmsg() passes it in conjunction with all messages except
+ those of the RXRPC_NEW_CALL message.
+
+ (*) RXRPC_ABORT
+
+ This is can be used by an application to abort a call by passing it to
+ sendmsg, or it can be delivered by recvmsg to indicate a remote abort was
+ received. Either way, it must be associated with an RXRPC_USER_CALL_ID to
+ specify the call affected. If an abort is being sent, then error EBADSLT
+ will be returned if there is no call with that user ID.
+
+ (*) RXRPC_ACK
+
+ This is delivered to a server application to indicate that the final ACK
+ of a call was received from the client. It will be associated with an
+ RXRPC_USER_CALL_ID to indicate the call that's now complete.
+
+ (*) RXRPC_NET_ERROR
+
+ This is delivered to an application to indicate that an ICMP error message
+ was encountered in the process of trying to talk to the peer. An
+ errno-class integer value will be included in the control message data
+ indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
+ affected.
+
+ (*) RXRPC_BUSY
+
+ This is delivered to a client application to indicate that a call was
+ rejected by the server due to the server being busy. It will be
+ associated with an RXRPC_USER_CALL_ID to indicate the rejected call.
+
+ (*) RXRPC_LOCAL_ERROR
+
+ This is delivered to an application to indicate that a local error was
+ encountered and that a call has been aborted because of it. An
+ errno-class integer value will be included in the control message data
+ indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
+ affected.
+
+ (*) RXRPC_NEW_CALL
+
+ This is delivered to indicate to a server application that a new call has
+ arrived and is awaiting acceptance. No user ID is associated with this,
+ as a user ID must subsequently be assigned by doing an RXRPC_ACCEPT.
+
+ (*) RXRPC_ACCEPT
+
+ This is used by a server application to attempt to accept a call and
+ assign it a user ID. It should be associated with an RXRPC_USER_CALL_ID
+ to indicate the user ID to be assigned. If there is no call to be
+ accepted (it may have timed out, been aborted, etc.), then sendmsg will
+ return error ENODATA. If the user ID is already in use by another call,
+ then error EBADSLT will be returned.
+
+
+==============
+SOCKET OPTIONS
+==============
+
+AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
+
+ (*) RXRPC_SECURITY_KEY
+
+ This is used to specify the description of the key to be used. The key is
+ extracted from the calling process's keyrings with request_key() and
+ should be of "rxrpc" type.
+
+ The optval pointer points to the description string, and optlen indicates
+ how long the string is, without the NUL terminator.
+
+ (*) RXRPC_SECURITY_KEYRING
+
+ Similar to above but specifies a keyring of server secret keys to use (key
+ type "keyring"). See the "Security" section.
+
+ (*) RXRPC_EXCLUSIVE_CONNECTION
+
+ This is used to request that new connections should be used for each call
+ made subsequently on this socket. optval should be NULL and optlen 0.
+
+ (*) RXRPC_MIN_SECURITY_LEVEL
+
+ This is used to specify the minimum security level required for calls on
+ this socket. optval must point to an int containing one of the following
+ values:
+
+ (a) RXRPC_SECURITY_PLAIN
+
+ Encrypted checksum only.
+
+ (b) RXRPC_SECURITY_AUTH
+
+ Encrypted checksum plus packet padded and first eight bytes of packet
+ encrypted - which includes the actual packet length.
+
+ (c) RXRPC_SECURITY_ENCRYPTED
+
+ Encrypted checksum plus entire packet padded and encrypted, including
+ actual packet length.
+
+
+========
+SECURITY
+========
+
+Currently, only the kerberos 4 equivalent protocol has been implemented
+(security index 2 - rxkad). This requires the rxkad module to be loaded and,
+on the client, tickets of the appropriate type to be obtained from the AFS
+kaserver or the kerberos server and installed as "rxrpc" type keys. This is
+normally done using the klog program. An example simple klog program can be
+found at:
+
+ http://people.redhat.com/~dhowells/rxrpc/klog.c
+
+The payload provided to add_key() on the client should be of the following
+form:
+
+ struct rxrpc_key_sec2_v1 {
+ uint16_t security_index; /* 2 */
+ uint16_t ticket_length; /* length of ticket[] */
+ uint32_t expiry; /* time at which expires */
+ uint8_t kvno; /* key version number */
+ uint8_t __pad[3];
+ uint8_t session_key[8]; /* DES session key */
+ uint8_t ticket[0]; /* the encrypted ticket */
+ };
+
+Where the ticket blob is just appended to the above structure.
+
+
+For the server, keys of type "rxrpc_s" must be made available to the server.
+They have a description of "<serviceID>:<securityIndex>" (eg: "52:2" for an
+rxkad key for the AFS VL service). When such a key is created, it should be
+given the server's secret key as the instantiation data (see the example
+below).
+
+ add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
+
+A keyring is passed to the server socket by naming it in a sockopt. The server
+socket then looks the server secret keys up in this keyring when secure
+incoming connections are made. This can be seen in an example program that can
+be found at:
+
+ http://people.redhat.com/~dhowells/rxrpc/listen.c
+
+
+====================
+EXAMPLE CLIENT USAGE
+====================
+
+A client would issue an operation by:
+
+ (1) An RxRPC socket is set up by:
+
+ client = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
+
+ Where the third parameter indicates the protocol family of the transport
+ socket used - usually IPv4 but it can also be IPv6 [TODO].
+
+ (2) A local address can optionally be bound:
+
+ struct sockaddr_rxrpc srx = {
+ .srx_family = AF_RXRPC,
+ .srx_service = 0, /* we're a client */
+ .transport_type = SOCK_DGRAM, /* type of transport socket */
+ .transport.sin_family = AF_INET,
+ .transport.sin_port = htons(7000), /* AFS callback */
+ .transport.sin_address = 0, /* all local interfaces */
+ };
+ bind(client, &srx, sizeof(srx));
+
+ This specifies the local UDP port to be used. If not given, a random
+ non-privileged port will be used. A UDP port may be shared between
+ several unrelated RxRPC sockets. Security is handled on a basis of
+ per-RxRPC virtual connection.
+
+ (3) The security is set:
+
+ const char *key = "AFS:cambridge.redhat.com";
+ setsockopt(client, SOL_RXRPC, RXRPC_SECURITY_KEY, key, strlen(key));
+
+ This issues a request_key() to get the key representing the security
+ context. The minimum security level can be set:
+
+ unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
+ setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+ &sec, sizeof(sec));
+
+ (4) The server to be contacted can then be specified (alternatively this can
+ be done through sendmsg):
+
+ struct sockaddr_rxrpc srx = {
+ .srx_family = AF_RXRPC,
+ .srx_service = VL_SERVICE_ID,
+ .transport_type = SOCK_DGRAM, /* type of transport socket */
+ .transport.sin_family = AF_INET,
+ .transport.sin_port = htons(7005), /* AFS volume manager */
+ .transport.sin_address = ...,
+ };
+ connect(client, &srx, sizeof(srx));
+
+ (5) The request data should then be posted to the server socket using a series
+ of sendmsg() calls, each with the following control message attached:
+
+ RXRPC_USER_CALL_ID - specifies the user ID for this call
+
+ MSG_MORE should be set in msghdr::msg_flags on all but the last part of
+ the request. Multiple requests may be made simultaneously.
+
+ If a call is intended to go to a destination other than the default
+ specified through connect(), then msghdr::msg_name should be set on the
+ first request message of that call.
+
+ (6) The reply data will then be posted to the server socket for recvmsg() to
+ pick up. MSG_MORE will be flagged by recvmsg() if there's more reply data
+ for a particular call to be read. MSG_EOR will be set on the terminal
+ read for a call.
+
+ All data will be delivered with the following control message attached:
+
+ RXRPC_USER_CALL_ID - specifies the user ID for this call
+
+ If an abort or error occurred, this will be returned in the control data
+ buffer instead, and MSG_EOR will be flagged to indicate the end of that
+ call.
+
+
+====================
+EXAMPLE SERVER USAGE
+====================
+
+A server would be set up to accept operations in the following manner:
+
+ (1) An RxRPC socket is created by:
+
+ server = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
+
+ Where the third parameter indicates the address type of the transport
+ socket used - usually IPv4.
+
+ (2) Security is set up if desired by giving the socket a keyring with server
+ secret keys in it:
+
+ keyring = add_key("keyring", "AFSkeys", NULL, 0,
+ KEY_SPEC_PROCESS_KEYRING);
+
+ const char secret_key[8] = {
+ 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+ add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
+
+ setsockopt(server, SOL_RXRPC, RXRPC_SECURITY_KEYRING, "AFSkeys", 7);
+
+ The keyring can be manipulated after it has been given to the socket. This
+ permits the server to add more keys, replace keys, etc. whilst it is live.
+
+ (2) A local address must then be bound:
+
+ struct sockaddr_rxrpc srx = {
+ .srx_family = AF_RXRPC,
+ .srx_service = VL_SERVICE_ID, /* RxRPC service ID */
+ .transport_type = SOCK_DGRAM, /* type of transport socket */
+ .transport.sin_family = AF_INET,
+ .transport.sin_port = htons(7000), /* AFS callback */
+ .transport.sin_address = 0, /* all local interfaces */
+ };
+ bind(server, &srx, sizeof(srx));
+
+ (3) The server is then set to listen out for incoming calls:
+
+ listen(server, 100);
+
+ (4) The kernel notifies the server of pending incoming connections by sending
+ it a message for each. This is received with recvmsg() on the server
+ socket. It has no data, and has a single dataless control message
+ attached:
+
+ RXRPC_NEW_CALL
+
+ The address that can be passed back by recvmsg() at this point should be
+ ignored since the call for which the message was posted may have gone by
+ the time it is accepted - in which case the first call still on the queue
+ will be accepted.
+
+ (5) The server then accepts the new call by issuing a sendmsg() with two
+ pieces of control data and no actual data:
+
+ RXRPC_ACCEPT - indicate connection acceptance
+ RXRPC_USER_CALL_ID - specify user ID for this call
+
+ (6) The first request data packet will then be posted to the server socket for
+ recvmsg() to pick up. At that point, the RxRPC address for the call can
+ be read from the address fields in the msghdr struct.
+
+ Subsequent request data will be posted to the server socket for recvmsg()
+ to collect as it arrives. All but the last piece of the request data will
+ be delivered with MSG_MORE flagged.
+
+ All data will be delivered with the following control message attached:
+
+ RXRPC_USER_CALL_ID - specifies the user ID for this call
+
+ (8) The reply data should then be posted to the server socket using a series
+ of sendmsg() calls, each with the following control messages attached:
+
+ RXRPC_USER_CALL_ID - specifies the user ID for this call
+
+ MSG_MORE should be set in msghdr::msg_flags on all but the last message
+ for a particular call.
+
+ (9) The final ACK from the client will be posted for retrieval by recvmsg()
+ when it is received. It will take the form of a dataless message with two
+ control messages attached:
+
+ RXRPC_USER_CALL_ID - specifies the user ID for this call
+ RXRPC_ACK - indicates final ACK (no data)
+
+ MSG_EOR will be flagged to indicate that this is the final message for
+ this call.
+
+(10) Up to the point the final packet of reply data is sent, the call can be
+ aborted by calling sendmsg() with a dataless message with the following
+ control messages attached:
+
+ RXRPC_USER_CALL_ID - specifies the user ID for this call
+ RXRPC_ABORT - indicates abort code (4 byte data)
+
+ Any packets waiting in the socket's receive queue will be discarded if
+ this is issued.
+
+Note that all the communications for a particular service take place through
+the one server socket, using control messages on sendmsg() and recvmsg() to
+determine the call affected.
+
+
+=========================
+AF_RXRPC KERNEL INTERFACE
+=========================
+
+The AF_RXRPC module also provides an interface for use by in-kernel utilities
+such as the AFS filesystem. This permits such a utility to:
+
+ (1) Use different keys directly on individual client calls on one socket
+ rather than having to open a whole slew of sockets, one for each key it
+ might want to use.
+
+ (2) Avoid having RxRPC call request_key() at the point of issue of a call or
+ opening of a socket. Instead the utility is responsible for requesting a
+ key at the appropriate point. AFS, for instance, would do this during VFS
+ operations such as open() or unlink(). The key is then handed through
+ when the call is initiated.
+
+ (3) Request the use of something other than GFP_KERNEL to allocate memory.
+
+ (4) Avoid the overhead of using the recvmsg() call. RxRPC messages can be
+ intercepted before they get put into the socket Rx queue and the socket
+ buffers manipulated directly.
+
+To use the RxRPC facility, a kernel utility must still open an AF_RXRPC socket,
+bind an address as appropriate and listen if it's to be a server socket, but
+then it passes this to the kernel interface functions.
+
+The kernel interface functions are as follows:
+
+ (*) Begin a new client call.
+
+ struct rxrpc_call *
+ rxrpc_kernel_begin_call(struct socket *sock,
+ struct sockaddr_rxrpc *srx,
+ struct key *key,
+ unsigned long user_call_ID,
+ gfp_t gfp);
+
+ This allocates the infrastructure to make a new RxRPC call and assigns
+ call and connection numbers. The call will be made on the UDP port that
+ the socket is bound to. The call will go to the destination address of a
+ connected client socket unless an alternative is supplied (srx is
+ non-NULL).
+
+ If a key is supplied then this will be used to secure the call instead of
+ the key bound to the socket with the RXRPC_SECURITY_KEY sockopt. Calls
+ secured in this way will still share connections if at all possible.
+
+ The user_call_ID is equivalent to that supplied to sendmsg() in the
+ control data buffer. It is entirely feasible to use this to point to a
+ kernel data structure.
+
+ If this function is successful, an opaque reference to the RxRPC call is
+ returned. The caller now holds a reference on this and it must be
+ properly ended.
+
+ (*) End a client call.
+
+ void rxrpc_kernel_end_call(struct rxrpc_call *call);
+
+ This is used to end a previously begun call. The user_call_ID is expunged
+ from AF_RXRPC's knowledge and will not be seen again in association with
+ the specified call.
+
+ (*) Send data through a call.
+
+ int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
+ size_t len);
+
+ This is used to supply either the request part of a client call or the
+ reply part of a server call. msg.msg_iovlen and msg.msg_iov specify the
+ data buffers to be used. msg_iov may not be NULL and must point
+ exclusively to in-kernel virtual addresses. msg.msg_flags may be given
+ MSG_MORE if there will be subsequent data sends for this call.
+
+ The msg must not specify a destination address, control data or any flags
+ other than MSG_MORE. len is the total amount of data to transmit.
+
+ (*) Abort a call.
+
+ void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code);
+
+ This is used to abort a call if it's still in an abortable state. The
+ abort code specified will be placed in the ABORT message sent.
+
+ (*) Intercept received RxRPC messages.
+
+ typedef void (*rxrpc_interceptor_t)(struct sock *sk,
+ unsigned long user_call_ID,
+ struct sk_buff *skb);
+
+ void
+ rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+ rxrpc_interceptor_t interceptor);
+
+ This installs an interceptor function on the specified AF_RXRPC socket.
+ All messages that would otherwise wind up in the socket's Rx queue are
+ then diverted to this function. Note that care must be taken to process
+ the messages in the right order to maintain DATA message sequentiality.
+
+ The interceptor function itself is provided with the address of the socket
+ and handling the incoming message, the ID assigned by the kernel utility
+ to the call and the socket buffer containing the message.
+
+ The skb->mark field indicates the type of message:
+
+ MARK MEANING
+ =============================== =======================================
+ RXRPC_SKB_MARK_DATA Data message
+ RXRPC_SKB_MARK_FINAL_ACK Final ACK received for an incoming call
+ RXRPC_SKB_MARK_BUSY Client call rejected as server busy
+ RXRPC_SKB_MARK_REMOTE_ABORT Call aborted by peer
+ RXRPC_SKB_MARK_NET_ERROR Network error detected
+ RXRPC_SKB_MARK_LOCAL_ERROR Local error encountered
+ RXRPC_SKB_MARK_NEW_CALL New incoming call awaiting acceptance
+
+ The remote abort message can be probed with rxrpc_kernel_get_abort_code().
+ The two error messages can be probed with rxrpc_kernel_get_error_number().
+ A new call can be accepted with rxrpc_kernel_accept_call().
+
+ Data messages can have their contents extracted with the usual bunch of
+ socket buffer manipulation functions. A data message can be determined to
+ be the last one in a sequence with rxrpc_kernel_is_data_last(). When a
+ data message has been used up, rxrpc_kernel_data_delivered() should be
+ called on it..
+
+ Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose
+ of. It is possible to get extra refs on all types of message for later
+ freeing, but this may pin the state of a call until the message is finally
+ freed.
+
+ (*) Accept an incoming call.
+
+ struct rxrpc_call *
+ rxrpc_kernel_accept_call(struct socket *sock,
+ unsigned long user_call_ID);
+
+ This is used to accept an incoming call and to assign it a call ID. This
+ function is similar to rxrpc_kernel_begin_call() and calls accepted must
+ be ended in the same way.
+
+ If this function is successful, an opaque reference to the RxRPC call is
+ returned. The caller now holds a reference on this and it must be
+ properly ended.
+
+ (*) Reject an incoming call.
+
+ int rxrpc_kernel_reject_call(struct socket *sock);
+
+ This is used to reject the first incoming call on the socket's queue with
+ a BUSY message. -ENODATA is returned if there were no incoming calls.
+ Other errors may be returned if the call had been aborted (-ECONNABORTED)
+ or had timed out (-ETIME).
+
+ (*) Record the delivery of a data message and free it.
+
+ void rxrpc_kernel_data_delivered(struct sk_buff *skb);
+
+ This is used to record a data message as having been delivered and to
+ update the ACK state for the call. The socket buffer will be freed.
+
+ (*) Free a message.
+
+ void rxrpc_kernel_free_skb(struct sk_buff *skb);
+
+ This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC
+ socket.
+
+ (*) Determine if a data message is the last one on a call.
+
+ bool rxrpc_kernel_is_data_last(struct sk_buff *skb);
+
+ This is used to determine if a socket buffer holds the last data message
+ to be received for a call (true will be returned if it does, false
+ if not).
+
+ The data message will be part of the reply on a client call and the
+ request on an incoming call. In the latter case there will be more
+ messages, but in the former case there will not.
+
+ (*) Get the abort code from an abort message.
+
+ u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb);
+
+ This is used to extract the abort code from a remote abort message.
+
+ (*) Get the error number from a local or network error message.
+
+ int rxrpc_kernel_get_error_number(struct sk_buff *skb);
+
+ This is used to extract the error number from a message indicating either
+ a local error occurred or a network error occurred.
+
+ (*) Allocate a null key for doing anonymous security.
+
+ struct key *rxrpc_get_null_key(const char *keyname);
+
+ This is used to allocate a null RxRPC key that can be used to indicate
+ anonymous security for a particular domain.
+
+
+=======================
+CONFIGURABLE PARAMETERS
+=======================
+
+The RxRPC protocol driver has a number of configurable parameters that can be
+adjusted through sysctls in /proc/net/rxrpc/:
+
+ (*) req_ack_delay
+
+ The amount of time in milliseconds after receiving a packet with the
+ request-ack flag set before we honour the flag and actually send the
+ requested ack.
+
+ Usually the other side won't stop sending packets until the advertised
+ reception window is full (to a maximum of 255 packets), so delaying the
+ ACK permits several packets to be ACK'd in one go.
+
+ (*) soft_ack_delay
+
+ The amount of time in milliseconds after receiving a new packet before we
+ generate a soft-ACK to tell the sender that it doesn't need to resend.
+
+ (*) idle_ack_delay
+
+ The amount of time in milliseconds after all the packets currently in the
+ received queue have been consumed before we generate a hard-ACK to tell
+ the sender it can free its buffers, assuming no other reason occurs that
+ we would send an ACK.
+
+ (*) resend_timeout
+
+ The amount of time in milliseconds after transmitting a packet before we
+ transmit it again, assuming no ACK is received from the receiver telling
+ us they got it.
+
+ (*) max_call_lifetime
+
+ The maximum amount of time in seconds that a call may be in progress
+ before we preemptively kill it.
+
+ (*) dead_call_expiry
+
+ The amount of time in seconds before we remove a dead call from the call
+ list. Dead calls are kept around for a little while for the purpose of
+ repeating ACK and ABORT packets.
+
+ (*) connection_expiry
+
+ The amount of time in seconds after a connection was last used before we
+ remove it from the connection list. Whilst a connection is in existence,
+ it serves as a placeholder for negotiated security; when it is deleted,
+ the security must be renegotiated.
+
+ (*) transport_expiry
+
+ The amount of time in seconds after a transport was last used before we
+ remove it from the transport list. Whilst a transport is in existence, it
+ serves to anchor the peer data and keeps the connection ID counter.
+
+ (*) rxrpc_rx_window_size
+
+ The size of the receive window in packets. This is the maximum number of
+ unconsumed received packets we're willing to hold in memory for any
+ particular call.
+
+ (*) rxrpc_rx_mtu
+
+ The maximum packet MTU size that we're willing to receive in bytes. This
+ indicates to the peer whether we're willing to accept jumbo packets.
+
+ (*) rxrpc_rx_jumbo_max
+
+ The maximum number of packets that we're willing to accept in a jumbo
+ packet. Non-terminal packets in a jumbo packet must contain a four byte
+ header plus exactly 1412 bytes of data. The terminal packet must contain
+ a four byte header plus any amount of data. In any event, a jumbo packet
+ may not exceed rxrpc_rx_mtu in size.
diff --git a/Documentation/networking/s2io.txt b/Documentation/networking/s2io.txt
new file mode 100644
index 000000000..0362a42f7
--- /dev/null
+++ b/Documentation/networking/s2io.txt
@@ -0,0 +1,141 @@
+Release notes for Neterion's (Formerly S2io) Xframe I/II PCI-X 10GbE driver.
+
+Contents
+=======
+- 1. Introduction
+- 2. Identifying the adapter/interface
+- 3. Features supported
+- 4. Command line parameters
+- 5. Performance suggestions
+- 6. Available Downloads
+
+
+1. Introduction:
+This Linux driver supports Neterion's Xframe I PCI-X 1.0 and
+Xframe II PCI-X 2.0 adapters. It supports several features
+such as jumbo frames, MSI/MSI-X, checksum offloads, TSO, UFO and so on.
+See below for complete list of features.
+All features are supported for both IPv4 and IPv6.
+
+2. Identifying the adapter/interface:
+a. Insert the adapter(s) in your system.
+b. Build and load driver
+# insmod s2io.ko
+c. View log messages
+# dmesg | tail -40
+You will see messages similar to:
+eth3: Neterion Xframe I 10GbE adapter (rev 3), Version 2.0.9.1, Intr type INTA
+eth4: Neterion Xframe II 10GbE adapter (rev 2), Version 2.0.9.1, Intr type INTA
+eth4: Device is on 64 bit 133MHz PCIX(M1) bus
+
+The above messages identify the adapter type(Xframe I/II), adapter revision,
+driver version, interface name(eth3, eth4), Interrupt type(INTA, MSI, MSI-X).
+In case of Xframe II, the PCI/PCI-X bus width and frequency are displayed
+as well.
+
+To associate an interface with a physical adapter use "ethtool -p <ethX>".
+The corresponding adapter's LED will blink multiple times.
+
+3. Features supported:
+a. Jumbo frames. Xframe I/II supports MTU up to 9600 bytes,
+modifiable using ip command.
+
+b. Offloads. Supports checksum offload(TCP/UDP/IP) on transmit
+and receive, TSO.
+
+c. Multi-buffer receive mode. Scattering of packet across multiple
+buffers. Currently driver supports 2-buffer mode which yields
+significant performance improvement on certain platforms(SGI Altix,
+IBM xSeries).
+
+d. MSI/MSI-X. Can be enabled on platforms which support this feature
+(IA64, Xeon) resulting in noticeable performance improvement(up to 7%
+on certain platforms).
+
+e. Statistics. Comprehensive MAC-level and software statistics displayed
+using "ethtool -S" option.
+
+f. Multi-FIFO/Ring. Supports up to 8 transmit queues and receive rings,
+with multiple steering options.
+
+4. Command line parameters
+a. tx_fifo_num
+Number of transmit queues
+Valid range: 1-8
+Default: 1
+
+b. rx_ring_num
+Number of receive rings
+Valid range: 1-8
+Default: 1
+
+c. tx_fifo_len
+Size of each transmit queue
+Valid range: Total length of all queues should not exceed 8192
+Default: 4096
+
+d. rx_ring_sz
+Size of each receive ring(in 4K blocks)
+Valid range: Limited by memory on system
+Default: 30
+
+e. intr_type
+Specifies interrupt type. Possible values 0(INTA), 2(MSI-X)
+Valid values: 0, 2
+Default: 2
+
+5. Performance suggestions
+General:
+a. Set MTU to maximum(9000 for switch setup, 9600 in back-to-back configuration)
+b. Set TCP windows size to optimal value.
+For instance, for MTU=1500 a value of 210K has been observed to result in
+good performance.
+# sysctl -w net.ipv4.tcp_rmem="210000 210000 210000"
+# sysctl -w net.ipv4.tcp_wmem="210000 210000 210000"
+For MTU=9000, TCP window size of 10 MB is recommended.
+# sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
+# sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
+
+Transmit performance:
+a. By default, the driver respects BIOS settings for PCI bus parameters.
+However, you may want to experiment with PCI bus parameters
+max-split-transactions(MOST) and MMRBC (use setpci command).
+A MOST value of 2 has been found optimal for Opterons and 3 for Itanium.
+It could be different for your hardware.
+Set MMRBC to 4K**.
+
+For example you can set
+For opteron
+#setpci -d 17d5:* 62=1d
+For Itanium
+#setpci -d 17d5:* 62=3d
+
+For detailed description of the PCI registers, please see Xframe User Guide.
+
+b. Ensure Transmit Checksum offload is enabled. Use ethtool to set/verify this
+parameter.
+c. Turn on TSO(using "ethtool -K")
+# ethtool -K <ethX> tso on
+
+Receive performance:
+a. By default, the driver respects BIOS settings for PCI bus parameters.
+However, you may want to set PCI latency timer to 248.
+#setpci -d 17d5:* LATENCY_TIMER=f8
+For detailed description of the PCI registers, please see Xframe User Guide.
+b. Use 2-buffer mode. This results in large performance boost on
+certain platforms(eg. SGI Altix, IBM xSeries).
+c. Ensure Receive Checksum offload is enabled. Use "ethtool -K ethX" command to
+set/verify this option.
+d. Enable NAPI feature(in kernel configuration Device Drivers ---> Network
+device support ---> Ethernet (10000 Mbit) ---> S2IO 10Gbe Xframe NIC) to
+bring down CPU utilization.
+
+** For AMD opteron platforms with 8131 chipset, MMRBC=1 and MOST=1 are
+recommended as safe parameters.
+For more information, please review the AMD8131 errata at
+http://vip.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/
+26310_AMD-8131_HyperTransport_PCI-X_Tunnel_Revision_Guide_rev_3_18.pdf
+
+6. Support
+For further support please contact either your 10GbE Xframe NIC vendor (IBM,
+HP, SGI etc.)
diff --git a/Documentation/networking/scaling.txt b/Documentation/networking/scaling.txt
new file mode 100644
index 000000000..59f4db2a0
--- /dev/null
+++ b/Documentation/networking/scaling.txt
@@ -0,0 +1,445 @@
+Scaling in the Linux Networking Stack
+
+
+Introduction
+============
+
+This document describes a set of complementary techniques in the Linux
+networking stack to increase parallelism and improve performance for
+multi-processor systems.
+
+The following technologies are described:
+
+ RSS: Receive Side Scaling
+ RPS: Receive Packet Steering
+ RFS: Receive Flow Steering
+ Accelerated Receive Flow Steering
+ XPS: Transmit Packet Steering
+
+
+RSS: Receive Side Scaling
+=========================
+
+Contemporary NICs support multiple receive and transmit descriptor queues
+(multi-queue). On reception, a NIC can send different packets to different
+queues to distribute processing among CPUs. The NIC distributes packets by
+applying a filter to each packet that assigns it to one of a small number
+of logical flows. Packets for each flow are steered to a separate receive
+queue, which in turn can be processed by separate CPUs. This mechanism is
+generally known as “Receive-side Scaling” (RSS). The goal of RSS and
+the other scaling techniques is to increase performance uniformly.
+Multi-queue distribution can also be used for traffic prioritization, but
+that is not the focus of these techniques.
+
+The filter used in RSS is typically a hash function over the network
+and/or transport layer headers-- for example, a 4-tuple hash over
+IP addresses and TCP ports of a packet. The most common hardware
+implementation of RSS uses a 128-entry indirection table where each entry
+stores a queue number. The receive queue for a packet is determined
+by masking out the low order seven bits of the computed hash for the
+packet (usually a Toeplitz hash), taking this number as a key into the
+indirection table and reading the corresponding value.
+
+Some advanced NICs allow steering packets to queues based on
+programmable filters. For example, webserver bound TCP port 80 packets
+can be directed to their own receive queue. Such “n-tuple” filters can
+be configured from ethtool (--config-ntuple).
+
+==== RSS Configuration
+
+The driver for a multi-queue capable NIC typically provides a kernel
+module parameter for specifying the number of hardware queues to
+configure. In the bnx2x driver, for instance, this parameter is called
+num_queues. A typical RSS configuration would be to have one receive queue
+for each CPU if the device supports enough queues, or otherwise at least
+one for each memory domain, where a memory domain is a set of CPUs that
+share a particular memory level (L1, L2, NUMA node, etc.).
+
+The indirection table of an RSS device, which resolves a queue by masked
+hash, is usually programmed by the driver at initialization. The
+default mapping is to distribute the queues evenly in the table, but the
+indirection table can be retrieved and modified at runtime using ethtool
+commands (--show-rxfh-indir and --set-rxfh-indir). Modifying the
+indirection table could be done to give different queues different
+relative weights.
+
+== RSS IRQ Configuration
+
+Each receive queue has a separate IRQ associated with it. The NIC triggers
+this to notify a CPU when new packets arrive on the given queue. The
+signaling path for PCIe devices uses message signaled interrupts (MSI-X),
+that can route each interrupt to a particular CPU. The active mapping
+of queues to IRQs can be determined from /proc/interrupts. By default,
+an IRQ may be handled on any CPU. Because a non-negligible part of packet
+processing takes place in receive interrupt handling, it is advantageous
+to spread receive interrupts between CPUs. To manually adjust the IRQ
+affinity of each interrupt see Documentation/IRQ-affinity.txt. Some systems
+will be running irqbalance, a daemon that dynamically optimizes IRQ
+assignments and as a result may override any manual settings.
+
+== Suggested Configuration
+
+RSS should be enabled when latency is a concern or whenever receive
+interrupt processing forms a bottleneck. Spreading load between CPUs
+decreases queue length. For low latency networking, the optimal setting
+is to allocate as many queues as there are CPUs in the system (or the
+NIC maximum, if lower). The most efficient high-rate configuration
+is likely the one with the smallest number of receive queues where no
+receive queue overflows due to a saturated CPU, because in default
+mode with interrupt coalescing enabled, the aggregate number of
+interrupts (and thus work) grows with each additional queue.
+
+Per-cpu load can be observed using the mpstat utility, but note that on
+processors with hyperthreading (HT), each hyperthread is represented as
+a separate CPU. For interrupt handling, HT has shown no benefit in
+initial tests, so limit the number of queues to the number of CPU cores
+in the system.
+
+
+RPS: Receive Packet Steering
+============================
+
+Receive Packet Steering (RPS) is logically a software implementation of
+RSS. Being in software, it is necessarily called later in the datapath.
+Whereas RSS selects the queue and hence CPU that will run the hardware
+interrupt handler, RPS selects the CPU to perform protocol processing
+above the interrupt handler. This is accomplished by placing the packet
+on the desired CPU’s backlog queue and waking up the CPU for processing.
+RPS has some advantages over RSS: 1) it can be used with any NIC,
+2) software filters can easily be added to hash over new protocols,
+3) it does not increase hardware device interrupt rate (although it does
+introduce inter-processor interrupts (IPIs)).
+
+RPS is called during bottom half of the receive interrupt handler, when
+a driver sends a packet up the network stack with netif_rx() or
+netif_receive_skb(). These call the get_rps_cpu() function, which
+selects the queue that should process a packet.
+
+The first step in determining the target CPU for RPS is to calculate a
+flow hash over the packet’s addresses or ports (2-tuple or 4-tuple hash
+depending on the protocol). This serves as a consistent hash of the
+associated flow of the packet. The hash is either provided by hardware
+or will be computed in the stack. Capable hardware can pass the hash in
+the receive descriptor for the packet; this would usually be the same
+hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
+skb->rx_hash and can be used elsewhere in the stack as a hash of the
+packet’s flow.
+
+Each receive hardware queue has an associated list of CPUs to which
+RPS may enqueue packets for processing. For each received packet,
+an index into the list is computed from the flow hash modulo the size
+of the list. The indexed CPU is the target for processing the packet,
+and the packet is queued to the tail of that CPU’s backlog queue. At
+the end of the bottom half routine, IPIs are sent to any CPUs for which
+packets have been queued to their backlog queue. The IPI wakes backlog
+processing on the remote CPU, and any queued packets are then processed
+up the networking stack.
+
+==== RPS Configuration
+
+RPS requires a kernel compiled with the CONFIG_RPS kconfig symbol (on
+by default for SMP). Even when compiled in, RPS remains disabled until
+explicitly configured. The list of CPUs to which RPS may forward traffic
+can be configured for each receive queue using a sysfs file entry:
+
+ /sys/class/net/<dev>/queues/rx-<n>/rps_cpus
+
+This file implements a bitmap of CPUs. RPS is disabled when it is zero
+(the default), in which case packets are processed on the interrupting
+CPU. Documentation/IRQ-affinity.txt explains how CPUs are assigned to
+the bitmap.
+
+== Suggested Configuration
+
+For a single queue device, a typical RPS configuration would be to set
+the rps_cpus to the CPUs in the same memory domain of the interrupting
+CPU. If NUMA locality is not an issue, this could also be all CPUs in
+the system. At high interrupt rate, it might be wise to exclude the
+interrupting CPU from the map since that already performs much work.
+
+For a multi-queue system, if RSS is configured so that a hardware
+receive queue is mapped to each CPU, then RPS is probably redundant
+and unnecessary. If there are fewer hardware queues than CPUs, then
+RPS might be beneficial if the rps_cpus for each queue are the ones that
+share the same memory domain as the interrupting CPU for that queue.
+
+==== RPS Flow Limit
+
+RPS scales kernel receive processing across CPUs without introducing
+reordering. The trade-off to sending all packets from the same flow
+to the same CPU is CPU load imbalance if flows vary in packet rate.
+In the extreme case a single flow dominates traffic. Especially on
+common server workloads with many concurrent connections, such
+behavior indicates a problem such as a misconfiguration or spoofed
+source Denial of Service attack.
+
+Flow Limit is an optional RPS feature that prioritizes small flows
+during CPU contention by dropping packets from large flows slightly
+ahead of those from small flows. It is active only when an RPS or RFS
+destination CPU approaches saturation. Once a CPU's input packet
+queue exceeds half the maximum queue length (as set by sysctl
+net.core.netdev_max_backlog), the kernel starts a per-flow packet
+count over the last 256 packets. If a flow exceeds a set ratio (by
+default, half) of these packets when a new packet arrives, then the
+new packet is dropped. Packets from other flows are still only
+dropped once the input packet queue reaches netdev_max_backlog.
+No packets are dropped when the input packet queue length is below
+the threshold, so flow limit does not sever connections outright:
+even large flows maintain connectivity.
+
+== Interface
+
+Flow limit is compiled in by default (CONFIG_NET_FLOW_LIMIT), but not
+turned on. It is implemented for each CPU independently (to avoid lock
+and cache contention) and toggled per CPU by setting the relevant bit
+in sysctl net.core.flow_limit_cpu_bitmap. It exposes the same CPU
+bitmap interface as rps_cpus (see above) when called from procfs:
+
+ /proc/sys/net/core/flow_limit_cpu_bitmap
+
+Per-flow rate is calculated by hashing each packet into a hashtable
+bucket and incrementing a per-bucket counter. The hash function is
+the same that selects a CPU in RPS, but as the number of buckets can
+be much larger than the number of CPUs, flow limit has finer-grained
+identification of large flows and fewer false positives. The default
+table has 4096 buckets. This value can be modified through sysctl
+
+ net.core.flow_limit_table_len
+
+The value is only consulted when a new table is allocated. Modifying
+it does not update active tables.
+
+== Suggested Configuration
+
+Flow limit is useful on systems with many concurrent connections,
+where a single connection taking up 50% of a CPU indicates a problem.
+In such environments, enable the feature on all CPUs that handle
+network rx interrupts (as set in /proc/irq/N/smp_affinity).
+
+The feature depends on the input packet queue length to exceed
+the flow limit threshold (50%) + the flow history length (256).
+Setting net.core.netdev_max_backlog to either 1000 or 10000
+performed well in experiments.
+
+
+RFS: Receive Flow Steering
+==========================
+
+While RPS steers packets solely based on hash, and thus generally
+provides good load distribution, it does not take into account
+application locality. This is accomplished by Receive Flow Steering
+(RFS). The goal of RFS is to increase datacache hitrate by steering
+kernel processing of packets to the CPU where the application thread
+consuming the packet is running. RFS relies on the same RPS mechanisms
+to enqueue packets onto the backlog of another CPU and to wake up that
+CPU.
+
+In RFS, packets are not forwarded directly by the value of their hash,
+but the hash is used as index into a flow lookup table. This table maps
+flows to the CPUs where those flows are being processed. The flow hash
+(see RPS section above) is used to calculate the index into this table.
+The CPU recorded in each entry is the one which last processed the flow.
+If an entry does not hold a valid CPU, then packets mapped to that entry
+are steered using plain RPS. Multiple table entries may point to the
+same CPU. Indeed, with many flows and few CPUs, it is very likely that
+a single application thread handles flows with many different flow hashes.
+
+rps_sock_flow_table is a global flow table that contains the *desired* CPU
+for flows: the CPU that is currently processing the flow in userspace.
+Each table value is a CPU index that is updated during calls to recvmsg
+and sendmsg (specifically, inet_recvmsg(), inet_sendmsg(), inet_sendpage()
+and tcp_splice_read()).
+
+When the scheduler moves a thread to a new CPU while it has outstanding
+receive packets on the old CPU, packets may arrive out of order. To
+avoid this, RFS uses a second flow table to track outstanding packets
+for each flow: rps_dev_flow_table is a table specific to each hardware
+receive queue of each device. Each table value stores a CPU index and a
+counter. The CPU index represents the *current* CPU onto which packets
+for this flow are enqueued for further kernel processing. Ideally, kernel
+and userspace processing occur on the same CPU, and hence the CPU index
+in both tables is identical. This is likely false if the scheduler has
+recently migrated a userspace thread while the kernel still has packets
+enqueued for kernel processing on the old CPU.
+
+The counter in rps_dev_flow_table values records the length of the current
+CPU's backlog when a packet in this flow was last enqueued. Each backlog
+queue has a head counter that is incremented on dequeue. A tail counter
+is computed as head counter + queue length. In other words, the counter
+in rps_dev_flow[i] records the last element in flow i that has
+been enqueued onto the currently designated CPU for flow i (of course,
+entry i is actually selected by hash and multiple flows may hash to the
+same entry i).
+
+And now the trick for avoiding out of order packets: when selecting the
+CPU for packet processing (from get_rps_cpu()) the rps_sock_flow table
+and the rps_dev_flow table of the queue that the packet was received on
+are compared. If the desired CPU for the flow (found in the
+rps_sock_flow table) matches the current CPU (found in the rps_dev_flow
+table), the packet is enqueued onto that CPU’s backlog. If they differ,
+the current CPU is updated to match the desired CPU if one of the
+following is true:
+
+- The current CPU's queue head counter >= the recorded tail counter
+ value in rps_dev_flow[i]
+- The current CPU is unset (>= nr_cpu_ids)
+- The current CPU is offline
+
+After this check, the packet is sent to the (possibly updated) current
+CPU. These rules aim to ensure that a flow only moves to a new CPU when
+there are no packets outstanding on the old CPU, as the outstanding
+packets could arrive later than those about to be processed on the new
+CPU.
+
+==== RFS Configuration
+
+RFS is only available if the kconfig symbol CONFIG_RPS is enabled (on
+by default for SMP). The functionality remains disabled until explicitly
+configured. The number of entries in the global flow table is set through:
+
+ /proc/sys/net/core/rps_sock_flow_entries
+
+The number of entries in the per-queue flow table are set through:
+
+ /sys/class/net/<dev>/queues/rx-<n>/rps_flow_cnt
+
+== Suggested Configuration
+
+Both of these need to be set before RFS is enabled for a receive queue.
+Values for both are rounded up to the nearest power of two. The
+suggested flow count depends on the expected number of active connections
+at any given time, which may be significantly less than the number of open
+connections. We have found that a value of 32768 for rps_sock_flow_entries
+works fairly well on a moderately loaded server.
+
+For a single queue device, the rps_flow_cnt value for the single queue
+would normally be configured to the same value as rps_sock_flow_entries.
+For a multi-queue device, the rps_flow_cnt for each queue might be
+configured as rps_sock_flow_entries / N, where N is the number of
+queues. So for instance, if rps_sock_flow_entries is set to 32768 and there
+are 16 configured receive queues, rps_flow_cnt for each queue might be
+configured as 2048.
+
+
+Accelerated RFS
+===============
+
+Accelerated RFS is to RFS what RSS is to RPS: a hardware-accelerated load
+balancing mechanism that uses soft state to steer flows based on where
+the application thread consuming the packets of each flow is running.
+Accelerated RFS should perform better than RFS since packets are sent
+directly to a CPU local to the thread consuming the data. The target CPU
+will either be the same CPU where the application runs, or at least a CPU
+which is local to the application thread’s CPU in the cache hierarchy.
+
+To enable accelerated RFS, the networking stack calls the
+ndo_rx_flow_steer driver function to communicate the desired hardware
+queue for packets matching a particular flow. The network stack
+automatically calls this function every time a flow entry in
+rps_dev_flow_table is updated. The driver in turn uses a device specific
+method to program the NIC to steer the packets.
+
+The hardware queue for a flow is derived from the CPU recorded in
+rps_dev_flow_table. The stack consults a CPU to hardware queue map which
+is maintained by the NIC driver. This is an auto-generated reverse map of
+the IRQ affinity table shown by /proc/interrupts. Drivers can use
+functions in the cpu_rmap (“CPU affinity reverse map”) kernel library
+to populate the map. For each CPU, the corresponding queue in the map is
+set to be one whose processing CPU is closest in cache locality.
+
+==== Accelerated RFS Configuration
+
+Accelerated RFS is only available if the kernel is compiled with
+CONFIG_RFS_ACCEL and support is provided by the NIC device and driver.
+It also requires that ntuple filtering is enabled via ethtool. The map
+of CPU to queues is automatically deduced from the IRQ affinities
+configured for each receive queue by the driver, so no additional
+configuration should be necessary.
+
+== Suggested Configuration
+
+This technique should be enabled whenever one wants to use RFS and the
+NIC supports hardware acceleration.
+
+XPS: Transmit Packet Steering
+=============================
+
+Transmit Packet Steering is a mechanism for intelligently selecting
+which transmit queue to use when transmitting a packet on a multi-queue
+device. To accomplish this, a mapping from CPU to hardware queue(s) is
+recorded. The goal of this mapping is usually to assign queues
+exclusively to a subset of CPUs, where the transmit completions for
+these queues are processed on a CPU within this set. This choice
+provides two benefits. First, contention on the device queue lock is
+significantly reduced since fewer CPUs contend for the same queue
+(contention can be eliminated completely if each CPU has its own
+transmit queue). Secondly, cache miss rate on transmit completion is
+reduced, in particular for data cache lines that hold the sk_buff
+structures.
+
+XPS is configured per transmit queue by setting a bitmap of CPUs that
+may use that queue to transmit. The reverse mapping, from CPUs to
+transmit queues, is computed and maintained for each network device.
+When transmitting the first packet in a flow, the function
+get_xps_queue() is called to select a queue. This function uses the ID
+of the running CPU as a key into the CPU-to-queue lookup table. If the
+ID matches a single queue, that is used for transmission. If multiple
+queues match, one is selected by using the flow hash to compute an index
+into the set.
+
+The queue chosen for transmitting a particular flow is saved in the
+corresponding socket structure for the flow (e.g. a TCP connection).
+This transmit queue is used for subsequent packets sent on the flow to
+prevent out of order (ooo) packets. The choice also amortizes the cost
+of calling get_xps_queues() over all packets in the flow. To avoid
+ooo packets, the queue for a flow can subsequently only be changed if
+skb->ooo_okay is set for a packet in the flow. This flag indicates that
+there are no outstanding packets in the flow, so the transmit queue can
+change without the risk of generating out of order packets. The
+transport layer is responsible for setting ooo_okay appropriately. TCP,
+for instance, sets the flag when all data for a connection has been
+acknowledged.
+
+==== XPS Configuration
+
+XPS is only available if the kconfig symbol CONFIG_XPS is enabled (on by
+default for SMP). The functionality remains disabled until explicitly
+configured. To enable XPS, the bitmap of CPUs that may use a transmit
+queue is configured using the sysfs file entry:
+
+/sys/class/net/<dev>/queues/tx-<n>/xps_cpus
+
+== Suggested Configuration
+
+For a network device with a single transmission queue, XPS configuration
+has no effect, since there is no choice in this case. In a multi-queue
+system, XPS is preferably configured so that each CPU maps onto one queue.
+If there are as many queues as there are CPUs in the system, then each
+queue can also map onto one CPU, resulting in exclusive pairings that
+experience no contention. If there are fewer queues than CPUs, then the
+best CPUs to share a given queue are probably those that share the cache
+with the CPU that processes transmit completions for that queue
+(transmit interrupts).
+
+Per TX Queue rate limitation:
+=============================
+
+These are rate-limitation mechanisms implemented by HW, where currently
+a max-rate attribute is supported, by setting a Mbps value to
+
+/sys/class/net/<dev>/queues/tx-<n>/tx_maxrate
+
+A value of zero means disabled, and this is the default.
+
+Further Information
+===================
+RPS and RFS were introduced in kernel 2.6.35. XPS was incorporated into
+2.6.38. Original patches were submitted by Tom Herbert
+(therbert@google.com)
+
+Accelerated RFS was introduced in 2.6.35. Original patches were
+submitted by Ben Hutchings (bwh@kernel.org)
+
+Authors:
+Tom Herbert (therbert@google.com)
+Willem de Bruijn (willemb@google.com)
diff --git a/Documentation/networking/sctp.txt b/Documentation/networking/sctp.txt
new file mode 100644
index 000000000..97b810ca9
--- /dev/null
+++ b/Documentation/networking/sctp.txt
@@ -0,0 +1,35 @@
+Linux Kernel SCTP
+
+This is the current BETA release of the Linux Kernel SCTP reference
+implementation.
+
+SCTP (Stream Control Transmission Protocol) is a IP based, message oriented,
+reliable transport protocol, with congestion control, support for
+transparent multi-homing, and multiple ordered streams of messages.
+RFC2960 defines the core protocol. The IETF SIGTRAN working group originally
+developed the SCTP protocol and later handed the protocol over to the
+Transport Area (TSVWG) working group for the continued evolvement of SCTP as a
+general purpose transport.
+
+See the IETF website (http://www.ietf.org) for further documents on SCTP.
+See http://www.ietf.org/rfc/rfc2960.txt
+
+The initial project goal is to create an Linux kernel reference implementation
+of SCTP that is RFC 2960 compliant and provides an programming interface
+referred to as the UDP-style API of the Sockets Extensions for SCTP, as
+proposed in IETF Internet-Drafts.
+
+Caveats:
+
+-lksctp can be built as statically or as a module. However, be aware that
+module removal of lksctp is not yet a safe activity.
+
+-There is tentative support for IPv6, but most work has gone towards
+implementation and testing lksctp on IPv4.
+
+
+For more information, please visit the lksctp project website:
+ http://www.sf.net/projects/lksctp
+
+Or contact the lksctp developers through the mailing list:
+ <linux-sctp@vger.kernel.org>
diff --git a/Documentation/networking/secid.txt b/Documentation/networking/secid.txt
new file mode 100644
index 000000000..95ea06784
--- /dev/null
+++ b/Documentation/networking/secid.txt
@@ -0,0 +1,14 @@
+flowi structure:
+
+The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate
+the label of the flow. This label of the flow is currently used in selecting
+matching labeled xfrm(s).
+
+If this is an outbound flow, the label is derived from the socket, if any, or
+the incoming packet this flow is being generated as a response to (e.g. tcp
+resets, timewait ack, etc.). It is also conceivable that the label could be
+derived from other sources such as process context, device, etc., in special
+cases, as may be appropriate.
+
+If this is an inbound flow, the label is derived from the IPSec security
+associations, if any, used by the packet.
diff --git a/Documentation/networking/skfp.txt b/Documentation/networking/skfp.txt
new file mode 100644
index 000000000..203ec66c9
--- /dev/null
+++ b/Documentation/networking/skfp.txt
@@ -0,0 +1,220 @@
+(C)Copyright 1998-2000 SysKonnect,
+===========================================================================
+
+skfp.txt created 11-May-2000
+
+Readme File for skfp.o v2.06
+
+
+This file contains
+(1) OVERVIEW
+(2) SUPPORTED ADAPTERS
+(3) GENERAL INFORMATION
+(4) INSTALLATION
+(5) INCLUSION OF THE ADAPTER IN SYSTEM START
+(6) TROUBLESHOOTING
+(7) FUNCTION OF THE ADAPTER LEDS
+(8) HISTORY
+
+===========================================================================
+
+
+
+(1) OVERVIEW
+============
+
+This README explains how to use the driver 'skfp' for Linux with your
+network adapter.
+
+Chapter 2: Contains a list of all network adapters that are supported by
+ this driver.
+
+Chapter 3: Gives some general information.
+
+Chapter 4: Describes common problems and solutions.
+
+Chapter 5: Shows the changed functionality of the adapter LEDs.
+
+Chapter 6: History of development.
+
+***
+
+
+(2) SUPPORTED ADAPTERS
+======================
+
+The network driver 'skfp' supports the following network adapters:
+SysKonnect adapters:
+ - SK-5521 (SK-NET FDDI-UP)
+ - SK-5522 (SK-NET FDDI-UP DAS)
+ - SK-5541 (SK-NET FDDI-FP)
+ - SK-5543 (SK-NET FDDI-LP)
+ - SK-5544 (SK-NET FDDI-LP DAS)
+ - SK-5821 (SK-NET FDDI-UP64)
+ - SK-5822 (SK-NET FDDI-UP64 DAS)
+ - SK-5841 (SK-NET FDDI-FP64)
+ - SK-5843 (SK-NET FDDI-LP64)
+ - SK-5844 (SK-NET FDDI-LP64 DAS)
+Compaq adapters (not tested):
+ - Netelligent 100 FDDI DAS Fibre SC
+ - Netelligent 100 FDDI SAS Fibre SC
+ - Netelligent 100 FDDI DAS UTP
+ - Netelligent 100 FDDI SAS UTP
+ - Netelligent 100 FDDI SAS Fibre MIC
+***
+
+
+(3) GENERAL INFORMATION
+=======================
+
+From v2.01 on, the driver is integrated in the linux kernel sources.
+Therefore, the installation is the same as for any other adapter
+supported by the kernel.
+Refer to the manual of your distribution about the installation
+of network adapters.
+Makes my life much easier :-)
+***
+
+
+(4) TROUBLESHOOTING
+===================
+
+If you run into problems during installation, check those items:
+
+Problem: The FDDI adapter cannot be found by the driver.
+Reason: Look in /proc/pci for the following entry:
+ 'FDDI network controller: SysKonnect SK-FDDI-PCI ...'
+ If this entry exists, then the FDDI adapter has been
+ found by the system and should be able to be used.
+ If this entry does not exist or if the file '/proc/pci'
+ is not there, then you may have a hardware problem or PCI
+ support may not be enabled in your kernel.
+ The adapter can be checked using the diagnostic program
+ which is available from the SysKonnect web site:
+ www.syskonnect.de
+ Some COMPAQ machines have a problem with PCI under
+ Linux. This is described in the 'PCI howto' document
+ (included in some distributions or available from the
+ www, e.g. at 'www.linux.org') and no workaround is available.
+
+Problem: You want to use your computer as a router between
+ multiple IP subnetworks (using multiple adapters), but
+ you cannot reach computers in other subnetworks.
+Reason: Either the router's kernel is not configured for IP
+ forwarding or there is a problem with the routing table
+ and gateway configuration in at least one of the
+ computers.
+
+If your problem is not listed here, please contact our
+technical support for help.
+You can send email to:
+ linux@syskonnect.de
+When contacting our technical support,
+please ensure that the following information is available:
+- System Manufacturer and Model
+- Boards in your system
+- Distribution
+- Kernel version
+
+***
+
+
+(5) FUNCTION OF THE ADAPTER LEDS
+================================
+
+ The functionality of the LED's on the FDDI network adapters was
+ changed in SMT version v2.82. With this new SMT version, the yellow
+ LED works as a ring operational indicator. An active yellow LED
+ indicates that the ring is down. The green LED on the adapter now
+ works as a link indicator where an active GREEN LED indicates that
+ the respective port has a physical connection.
+
+ With versions of SMT prior to v2.82 a ring up was indicated if the
+ yellow LED was off while the green LED(s) showed the connection
+ status of the adapter. During a ring down the green LED was off and
+ the yellow LED was on.
+
+ All implementations indicate that a driver is not loaded if
+ all LEDs are off.
+
+***
+
+
+(6) HISTORY
+===========
+
+v2.06 (20000511) (In-Kernel version)
+ New features:
+ - 64 bit support
+ - new pci dma interface
+ - in kernel 2.3.99
+
+v2.05 (20000217) (In-Kernel version)
+ New features:
+ - Changes for 2.3.45 kernel
+
+v2.04 (20000207) (Standalone version)
+ New features:
+ - Added rx/tx byte counter
+
+v2.03 (20000111) (Standalone version)
+ Problems fixed:
+ - Fixed printk statements from v2.02
+
+v2.02 (991215) (Standalone version)
+ Problems fixed:
+ - Removed unnecessary output
+ - Fixed path for "printver.sh" in makefile
+
+v2.01 (991122) (In-Kernel version)
+ New features:
+ - Integration in Linux kernel sources
+ - Support for memory mapped I/O.
+
+v2.00 (991112)
+ New features:
+ - Full source released under GPL
+
+v1.05 (991023)
+ Problems fixed:
+ - Compilation with kernel version 2.2.13 failed
+
+v1.04 (990427)
+ Changes:
+ - New SMT module included, changing LED functionality
+ Problems fixed:
+ - Synchronization on SMP machines was buggy
+
+v1.03 (990325)
+ Problems fixed:
+ - Interrupt routing on SMP machines could be incorrect
+
+v1.02 (990310)
+ New features:
+ - Support for kernel versions 2.2.x added
+ - Kernel patch instead of private duplicate of kernel functions
+
+v1.01 (980812)
+ Problems fixed:
+ Connection hangup with telnet
+ Slow telnet connection
+
+v1.00 beta 01 (980507)
+ New features:
+ None.
+ Problems fixed:
+ None.
+ Known limitations:
+ - tar archive instead of standard package format (rpm).
+ - FDDI statistic is empty.
+ - not tested with 2.1.xx kernels
+ - integration in kernel not tested
+ - not tested simultaneously with FDDI adapters from other vendors.
+ - only X86 processors supported.
+ - SBA (Synchronous Bandwidth Allocator) parameters can
+ not be configured.
+ - does not work on some COMPAQ machines. See the PCI howto
+ document for details about this problem.
+ - data corruption with kernel versions below 2.0.33.
+
+*** End of information file ***
diff --git a/Documentation/networking/smc9.txt b/Documentation/networking/smc9.txt
new file mode 100644
index 000000000..d1e15074e
--- /dev/null
+++ b/Documentation/networking/smc9.txt
@@ -0,0 +1,42 @@
+
+SMC 9xxxx Driver
+Revision 0.12
+3/5/96
+Copyright 1996 Erik Stahlman
+Released under terms of the GNU General Public License.
+
+This file contains the instructions and caveats for my SMC9xxx driver. You
+should not be using the driver without reading this file.
+
+Things to note about installation:
+
+ 1. The driver should work on all kernels from 1.2.13 until 1.3.71.
+ (A kernel patch is supplied for 1.3.71 )
+
+ 2. If you include this into the kernel, you might need to change some
+ options, such as for forcing IRQ.
+
+
+ 3. To compile as a module, run 'make' .
+ Make will give you the appropriate options for various kernel support.
+
+ 4. Loading the driver as a module :
+
+ use: insmod smc9194.o
+ optional parameters:
+ io=xxxx : your base address
+ irq=xx : your irq
+ ifport=x : 0 for whatever is default
+ 1 for twisted pair
+ 2 for AUI ( or BNC on some cards )
+
+How to obtain the latest version?
+
+FTP:
+ ftp://fenris.campus.vt.edu/smc9/smc9-12.tar.gz
+ ftp://sfbox.vt.edu/filebox/F/fenris/smc9/smc9-12.tar.gz
+
+
+Contacting me:
+ erik@mail.vt.edu
+
diff --git a/Documentation/networking/spider_net.txt b/Documentation/networking/spider_net.txt
new file mode 100644
index 000000000..b0b75f846
--- /dev/null
+++ b/Documentation/networking/spider_net.txt
@@ -0,0 +1,204 @@
+
+ The Spidernet Device Driver
+ ===========================
+
+Written by Linas Vepstas <linas@austin.ibm.com>
+
+Version of 7 June 2007
+
+Abstract
+========
+This document sketches the structure of portions of the spidernet
+device driver in the Linux kernel tree. The spidernet is a gigabit
+ethernet device built into the Toshiba southbridge commonly used
+in the SONY Playstation 3 and the IBM QS20 Cell blade.
+
+The Structure of the RX Ring.
+=============================
+The receive (RX) ring is a circular linked list of RX descriptors,
+together with three pointers into the ring that are used to manage its
+contents.
+
+The elements of the ring are called "descriptors" or "descrs"; they
+describe the received data. This includes a pointer to a buffer
+containing the received data, the buffer size, and various status bits.
+
+There are three primary states that a descriptor can be in: "empty",
+"full" and "not-in-use". An "empty" or "ready" descriptor is ready
+to receive data from the hardware. A "full" descriptor has data in it,
+and is waiting to be emptied and processed by the OS. A "not-in-use"
+descriptor is neither empty or full; it is simply not ready. It may
+not even have a data buffer in it, or is otherwise unusable.
+
+During normal operation, on device startup, the OS (specifically, the
+spidernet device driver) allocates a set of RX descriptors and RX
+buffers. These are all marked "empty", ready to receive data. This
+ring is handed off to the hardware, which sequentially fills in the
+buffers, and marks them "full". The OS follows up, taking the full
+buffers, processing them, and re-marking them empty.
+
+This filling and emptying is managed by three pointers, the "head"
+and "tail" pointers, managed by the OS, and a hardware current
+descriptor pointer (GDACTDPA). The GDACTDPA points at the descr
+currently being filled. When this descr is filled, the hardware
+marks it full, and advances the GDACTDPA by one. Thus, when there is
+flowing RX traffic, every descr behind it should be marked "full",
+and everything in front of it should be "empty". If the hardware
+discovers that the current descr is not empty, it will signal an
+interrupt, and halt processing.
+
+The tail pointer tails or trails the hardware pointer. When the
+hardware is ahead, the tail pointer will be pointing at a "full"
+descr. The OS will process this descr, and then mark it "not-in-use",
+and advance the tail pointer. Thus, when there is flowing RX traffic,
+all of the descrs in front of the tail pointer should be "full", and
+all of those behind it should be "not-in-use". When RX traffic is not
+flowing, then the tail pointer can catch up to the hardware pointer.
+The OS will then note that the current tail is "empty", and halt
+processing.
+
+The head pointer (somewhat mis-named) follows after the tail pointer.
+When traffic is flowing, then the head pointer will be pointing at
+a "not-in-use" descr. The OS will perform various housekeeping duties
+on this descr. This includes allocating a new data buffer and
+dma-mapping it so as to make it visible to the hardware. The OS will
+then mark the descr as "empty", ready to receive data. Thus, when there
+is flowing RX traffic, everything in front of the head pointer should
+be "not-in-use", and everything behind it should be "empty". If no
+RX traffic is flowing, then the head pointer can catch up to the tail
+pointer, at which point the OS will notice that the head descr is
+"empty", and it will halt processing.
+
+Thus, in an idle system, the GDACTDPA, tail and head pointers will
+all be pointing at the same descr, which should be "empty". All of the
+other descrs in the ring should be "empty" as well.
+
+The show_rx_chain() routine will print out the locations of the
+GDACTDPA, tail and head pointers. It will also summarize the contents
+of the ring, starting at the tail pointer, and listing the status
+of the descrs that follow.
+
+A typical example of the output, for a nearly idle system, might be
+
+net eth1: Total number of descrs=256
+net eth1: Chain tail located at descr=20
+net eth1: Chain head is at 20
+net eth1: HW curr desc (GDACTDPA) is at 21
+net eth1: Have 1 descrs with stat=x40800101
+net eth1: HW next desc (GDACNEXTDA) is at 22
+net eth1: Last 255 descrs with stat=xa0800000
+
+In the above, the hardware has filled in one descr, number 20. Both
+head and tail are pointing at 20, because it has not yet been emptied.
+Meanwhile, hw is pointing at 21, which is free.
+
+The "Have nnn decrs" refers to the descr starting at the tail: in this
+case, nnn=1 descr, starting at descr 20. The "Last nnn descrs" refers
+to all of the rest of the descrs, from the last status change. The "nnn"
+is a count of how many descrs have exactly the same status.
+
+The status x4... corresponds to "full" and status xa... corresponds
+to "empty". The actual value printed is RXCOMST_A.
+
+In the device driver source code, a different set of names are
+used for these same concepts, so that
+
+"empty" == SPIDER_NET_DESCR_CARDOWNED == 0xa
+"full" == SPIDER_NET_DESCR_FRAME_END == 0x4
+"not in use" == SPIDER_NET_DESCR_NOT_IN_USE == 0xf
+
+
+The RX RAM full bug/feature
+===========================
+
+As long as the OS can empty out the RX buffers at a rate faster than
+the hardware can fill them, there is no problem. If, for some reason,
+the OS fails to empty the RX ring fast enough, the hardware GDACTDPA
+pointer will catch up to the head, notice the not-empty condition,
+ad stop. However, RX packets may still continue arriving on the wire.
+The spidernet chip can save some limited number of these in local RAM.
+When this local ram fills up, the spider chip will issue an interrupt
+indicating this (GHIINT0STS will show ERRINT, and the GRMFLLINT bit
+will be set in GHIINT1STS). When the RX ram full condition occurs,
+a certain bug/feature is triggered that has to be specially handled.
+This section describes the special handling for this condition.
+
+When the OS finally has a chance to run, it will empty out the RX ring.
+In particular, it will clear the descriptor on which the hardware had
+stopped. However, once the hardware has decided that a certain
+descriptor is invalid, it will not restart at that descriptor; instead
+it will restart at the next descr. This potentially will lead to a
+deadlock condition, as the tail pointer will be pointing at this descr,
+which, from the OS point of view, is empty; the OS will be waiting for
+this descr to be filled. However, the hardware has skipped this descr,
+and is filling the next descrs. Since the OS doesn't see this, there
+is a potential deadlock, with the OS waiting for one descr to fill,
+while the hardware is waiting for a different set of descrs to become
+empty.
+
+A call to show_rx_chain() at this point indicates the nature of the
+problem. A typical print when the network is hung shows the following:
+
+net eth1: Spider RX RAM full, incoming packets might be discarded!
+net eth1: Total number of descrs=256
+net eth1: Chain tail located at descr=255
+net eth1: Chain head is at 255
+net eth1: HW curr desc (GDACTDPA) is at 0
+net eth1: Have 1 descrs with stat=xa0800000
+net eth1: HW next desc (GDACNEXTDA) is at 1
+net eth1: Have 127 descrs with stat=x40800101
+net eth1: Have 1 descrs with stat=x40800001
+net eth1: Have 126 descrs with stat=x40800101
+net eth1: Last 1 descrs with stat=xa0800000
+
+Both the tail and head pointers are pointing at descr 255, which is
+marked xa... which is "empty". Thus, from the OS point of view, there
+is nothing to be done. In particular, there is the implicit assumption
+that everything in front of the "empty" descr must surely also be empty,
+as explained in the last section. The OS is waiting for descr 255 to
+become non-empty, which, in this case, will never happen.
+
+The HW pointer is at descr 0. This descr is marked 0x4.. or "full".
+Since its already full, the hardware can do nothing more, and thus has
+halted processing. Notice that descrs 0 through 254 are all marked
+"full", while descr 254 and 255 are empty. (The "Last 1 descrs" is
+descr 254, since tail was at 255.) Thus, the system is deadlocked,
+and there can be no forward progress; the OS thinks there's nothing
+to do, and the hardware has nowhere to put incoming data.
+
+This bug/feature is worked around with the spider_net_resync_head_ptr()
+routine. When the driver receives RX interrupts, but an examination
+of the RX chain seems to show it is empty, then it is probable that
+the hardware has skipped a descr or two (sometimes dozens under heavy
+network conditions). The spider_net_resync_head_ptr() subroutine will
+search the ring for the next full descr, and the driver will resume
+operations there. Since this will leave "holes" in the ring, there
+is also a spider_net_resync_tail_ptr() that will skip over such holes.
+
+As of this writing, the spider_net_resync() strategy seems to work very
+well, even under heavy network loads.
+
+
+The TX ring
+===========
+The TX ring uses a low-watermark interrupt scheme to make sure that
+the TX queue is appropriately serviced for large packet sizes.
+
+For packet sizes greater than about 1KBytes, the kernel can fill
+the TX ring quicker than the device can drain it. Once the ring
+is full, the netdev is stopped. When there is room in the ring,
+the netdev needs to be reawakened, so that more TX packets are placed
+in the ring. The hardware can empty the ring about four times per jiffy,
+so its not appropriate to wait for the poll routine to refill, since
+the poll routine runs only once per jiffy. The low-watermark mechanism
+marks a descr about 1/4th of the way from the bottom of the queue, so
+that an interrupt is generated when the descr is processed. This
+interrupt wakes up the netdev, which can then refill the queue.
+For large packets, this mechanism generates a relatively small number
+of interrupts, about 1K/sec. For smaller packets, this will drop to zero
+interrupts, as the hardware can empty the queue faster than the kernel
+can fill it.
+
+
+ ======= END OF DOCUMENT ========
+
diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
new file mode 100644
index 000000000..e655e2453
--- /dev/null
+++ b/Documentation/networking/stmmac.txt
@@ -0,0 +1,369 @@
+ STMicroelectronics 10/100/1000 Synopsys Ethernet driver
+
+Copyright (C) 2007-2014 STMicroelectronics Ltd
+Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+
+This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
+(Synopsys IP blocks).
+
+Currently this network device driver is for all STi embedded MAC/GMAC
+(i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000
+FF1152AMT0221 D1215994A VIRTEX FPGA board.
+
+DWC Ether MAC 10/100/1000 Universal version 3.70a (and older) and DWC Ether
+MAC 10/100 Universal version 4.0 have been used for developing this driver.
+
+This driver supports both the platform bus and PCI.
+
+Please, for more information also visit: www.stlinux.com
+
+1) Kernel Configuration
+The kernel configuration option is STMMAC_ETH:
+ Device Drivers ---> Network device support ---> Ethernet (1000 Mbit) --->
+ STMicroelectronics 10/100/1000 Ethernet driver (STMMAC_ETH)
+
+CONFIG_STMMAC_PLATFORM: is to enable the platform driver.
+CONFIG_STMMAC_PCI: is to enable the pci driver.
+
+2) Driver parameters list:
+ debug: message level (0: no output, 16: all);
+ phyaddr: to manually provide the physical address to the PHY device;
+ dma_rxsize: DMA rx ring size;
+ dma_txsize: DMA tx ring size;
+ buf_sz: DMA buffer size;
+ tc: control the HW FIFO threshold;
+ watchdog: transmit timeout (in milliseconds);
+ flow_ctrl: Flow control ability [on/off];
+ pause: Flow Control Pause Time;
+ eee_timer: tx EEE timer;
+ chain_mode: select chain mode instead of ring.
+
+3) Command line options
+Driver parameters can be also passed in command line by using:
+ stmmaceth=dma_rxsize:128,dma_txsize:512
+
+4) Driver information and notes
+
+4.1) Transmit process
+The xmit method is invoked when the kernel needs to transmit a packet; it sets
+the descriptors in the ring and informs the DMA engine that there is a packet
+ready to be transmitted.
+By default, the driver sets the NETIF_F_SG bit in the features field of the
+net_device structure enabling the scatter-gather feature. This is true on
+chips and configurations where the checksum can be done in hardware.
+Once the controller has finished transmitting the packet, napi will be
+scheduled to release the transmit resources.
+
+4.2) Receive process
+When one or more packets are received, an interrupt happens. The interrupts
+are not queued so the driver has to scan all the descriptors in the ring during
+the receive process.
+This is based on NAPI so the interrupt handler signals only if there is work
+to be done, and it exits.
+Then the poll method will be scheduled at some future point.
+The incoming packets are stored, by the DMA, in a list of pre-allocated socket
+buffers in order to avoid the memcpy (zero-copy).
+
+4.3) Interrupt Mitigation
+The driver is able to mitigate the number of its DMA interrupts
+using NAPI for the reception on chips older than the 3.50.
+New chips have an HW RX-Watchdog used for this mitigation.
+Mitigation parameters can be tuned by ethtool.
+
+4.4) WOL
+Wake up on Lan feature through Magic and Unicast frames are supported for the
+GMAC core.
+
+4.5) DMA descriptors
+Driver handles both normal and alternate descriptors. The latter has been only
+tested on DWC Ether MAC 10/100/1000 Universal version 3.41a and later.
+
+STMMAC supports DMA descriptor to operate both in dual buffer (RING)
+and linked-list(CHAINED) mode. In RING each descriptor points to two
+data buffer pointers whereas in CHAINED mode they point to only one data
+buffer pointer. RING mode is the default.
+
+In CHAINED mode each descriptor will have pointer to next descriptor in
+the list, hence creating the explicit chaining in the descriptor itself,
+whereas such explicit chaining is not possible in RING mode.
+
+4.5.1) Extended descriptors
+ The extended descriptors give us information about the Ethernet payload
+ when it is carrying PTP packets or TCP/UDP/ICMP over IP.
+ These are not available on GMAC Synopsys chips older than the 3.50.
+ At probe time the driver will decide if these can be actually used.
+ This support also is mandatory for PTPv2 because the extra descriptors
+ are used for saving the hardware timestamps and Extended Status.
+
+4.6) Ethtool support
+Ethtool is supported.
+
+For example, driver statistics (including RMON), internal errors can be taken
+using:
+ # ethtool -S ethX command
+
+4.7) Jumbo and Segmentation Offloading
+Jumbo frames are supported and tested for the GMAC.
+The GSO has been also added but it's performed in software.
+LRO is not supported.
+
+4.8) Physical
+The driver is compatible with Physical Abstraction Layer to be connected with
+PHY and GPHY devices.
+
+4.9) Platform information
+Several information can be passed through the platform and device-tree.
+
+struct plat_stmmacenet_data {
+ char *phy_bus_name;
+ int bus_id;
+ int phy_addr;
+ int interface;
+ struct stmmac_mdio_bus_data *mdio_bus_data;
+ struct stmmac_dma_cfg *dma_cfg;
+ int clk_csr;
+ int has_gmac;
+ int enh_desc;
+ int tx_coe;
+ int rx_coe;
+ int bugged_jumbo;
+ int pmt;
+ int force_sf_dma_mode;
+ int force_thresh_dma_mode;
+ int riwt_off;
+ int max_speed;
+ int maxmtu;
+ void (*fix_mac_speed)(void *priv, unsigned int speed);
+ void (*bus_setup)(void __iomem *ioaddr);
+ void *(*setup)(struct platform_device *pdev);
+ void (*free)(struct platform_device *pdev, void *priv);
+ int (*init)(struct platform_device *pdev, void *priv);
+ void (*exit)(struct platform_device *pdev, void *priv);
+ void *custom_cfg;
+ void *custom_data;
+ void *bsp_priv;
+};
+
+Where:
+ o phy_bus_name: phy bus name to attach to the stmmac.
+ o bus_id: bus identifier.
+ o phy_addr: the physical address can be passed from the platform.
+ If it is set to -1 the driver will automatically
+ detect it at run-time by probing all the 32 addresses.
+ o interface: PHY device's interface.
+ o mdio_bus_data: specific platform fields for the MDIO bus.
+ o dma_cfg: internal DMA parameters
+ o pbl: the Programmable Burst Length is maximum number of beats to
+ be transferred in one DMA transaction.
+ GMAC also enables the 4xPBL by default.
+ o fixed_burst/mixed_burst/burst_len
+ o clk_csr: fixed CSR Clock range selection.
+ o has_gmac: uses the GMAC core.
+ o enh_desc: if sets the MAC will use the enhanced descriptor structure.
+ o tx_coe: core is able to perform the tx csum in HW.
+ o rx_coe: the supports three check sum offloading engine types:
+ type_1, type_2 (full csum) and no RX coe.
+ o bugged_jumbo: some HWs are not able to perform the csum in HW for
+ over-sized frames due to limited buffer sizes.
+ Setting this flag the csum will be done in SW on
+ JUMBO frames.
+ o pmt: core has the embedded power module (optional).
+ o force_sf_dma_mode: force DMA to use the Store and Forward mode
+ instead of the Threshold.
+ o force_thresh_dma_mode: force DMA to use the Threshold mode other than
+ the Store and Forward mode.
+ o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode.
+ o fix_mac_speed: this callback is used for modifying some syscfg registers
+ (on ST SoCs) according to the link speed negotiated by the
+ physical layer .
+ o bus_setup: perform HW setup of the bus. For example, on some ST platforms
+ this field is used to configure the AMBA bridge to generate more
+ efficient STBus traffic.
+ o setup/init/exit: callbacks used for calling a custom initialization;
+ this is sometime necessary on some platforms (e.g. ST boxes)
+ where the HW needs to have set some PIO lines or system cfg
+ registers. setup should return a pointer to private data,
+ which will be stored in bsp_priv, and then passed to init and
+ exit callbacks. init/exit callbacks should not use or modify
+ platform data.
+ o custom_cfg/custom_data: this is a custom configuration that can be passed
+ while initializing the resources.
+ o bsp_priv: another private pointer.
+
+For MDIO bus The we have:
+
+ struct stmmac_mdio_bus_data {
+ int (*phy_reset)(void *priv);
+ unsigned int phy_mask;
+ int *irqs;
+ int probed_phy_irq;
+ };
+
+Where:
+ o phy_reset: hook to reset the phy device attached to the bus.
+ o phy_mask: phy mask passed when register the MDIO bus within the driver.
+ o irqs: list of IRQs, one per PHY.
+ o probed_phy_irq: if irqs is NULL, use this for probed PHY.
+
+For DMA engine we have the following internal fields that should be
+tuned according to the HW capabilities.
+
+struct stmmac_dma_cfg {
+ int pbl;
+ int fixed_burst;
+ int burst_len_supported;
+};
+
+Where:
+ o pbl: Programmable Burst Length
+ o fixed_burst: program the DMA to use the fixed burst mode
+ o burst_len: this is the value we put in the register
+ supported values are provided as macros in
+ linux/stmmac.h header file.
+
+---
+
+Below an example how the structures above are using on ST platforms.
+
+ static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
+ .has_gmac = 0,
+ .enh_desc = 0,
+ .fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
+ |
+ |-> to write an internal syscfg
+ | on this platform when the
+ | link speed changes from 10 to
+ | 100 and viceversa
+ .init = &stmmac_claim_resource,
+ |
+ |-> On ST SoC this calls own "PAD"
+ | manager framework to claim
+ | all the resources necessary
+ | (GPIO ...). The .custom_cfg field
+ | is used to pass a custom config.
+};
+
+Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
+there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
+with fixed_link support.
+
+static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
+ .phy_reset = phy_reset;
+ |
+ |-> function to provide the phy_reset on this board
+ .phy_mask = 0,
+};
+
+static struct fixed_phy_status stmmac0_fixed_phy_status = {
+ .link = 1,
+ .speed = 100,
+ .duplex = 1,
+};
+
+During the board's device_init we can configure the first
+MAC for fixed_link by calling:
+ fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status));)
+and the second one, with a real PHY device attached to the bus,
+by using the stmmac_mdio_bus_data structure (to provide the id, the
+reset procedure etc).
+
+Note that, starting from new chips, where it is available the HW capability
+register, many configurations are discovered at run-time for example to
+understand if EEE, HW csum, PTP, enhanced descriptor etc are actually
+available. As strategy adopted in this driver, the information from the HW
+capability register can replace what has been passed from the platform.
+
+4.10) Device-tree support.
+
+Please see the following document:
+ Documentation/devicetree/bindings/net/stmmac.txt
+
+and the stmmac_of_data structure inside the include/linux/stmmac.h header file.
+
+4.11) This is a summary of the content of some relevant files:
+ o stmmac_main.c: to implement the main network device driver;
+ o stmmac_mdio.c: to provide mdio functions;
+ o stmmac_pci: this the PCI driver;
+ o stmmac_platform.c: this the platform driver (OF supported)
+ o stmmac_ethtool.c: to implement the ethtool support;
+ o stmmac.h: private driver structure;
+ o common.h: common definitions and VFTs;
+ o descs.h: descriptor structure definitions;
+ o dwmac1000_core.c: dwmac GiGa core functions;
+ o dwmac1000_dma.c: dma functions for the GMAC chip;
+ o dwmac1000.h: specific header file for the dwmac GiGa;
+ o dwmac100_core: dwmac 100 core code;
+ o dwmac100_dma.c: dma functions for the dwmac 100 chip;
+ o dwmac1000.h: specific header file for the MAC;
+ o dwmac_lib.c: generic DMA functions;
+ o enh_desc.c: functions for handling enhanced descriptors;
+ o norm_desc.c: functions for handling normal descriptors;
+ o chain_mode.c/ring_mode.c:: functions to manage RING/CHAINED modes;
+ o mmc_core.c/mmc.h: Management MAC Counters;
+ o stmmac_hwtstamp.c: HW timestamp support for PTP;
+ o stmmac_ptp.c: PTP 1588 clock;
+ o dwmac-<XXX>.c: these are for the platform glue-logic file; e.g. dwmac-sti.c
+ for STMicroelectronics SoCs.
+
+5) Debug Information
+
+The driver exports many information i.e. internal statistics,
+debug information, MAC and DMA registers etc.
+
+These can be read in several ways depending on the
+type of the information actually needed.
+
+For example a user can be use the ethtool support
+to get statistics: e.g. using: ethtool -S ethX
+(that shows the Management counters (MMC) if supported)
+or sees the MAC/DMA registers: e.g. using: ethtool -d ethX
+
+Compiling the Kernel with CONFIG_DEBUG_FS the driver will export the following
+debugfs entries:
+
+/sys/kernel/debug/stmmaceth/descriptors_status
+ To show the DMA TX/RX descriptor rings
+
+Developer can also use the "debug" module parameter to get further debug
+information (please see: NETIF Msg Level).
+
+6) Energy Efficient Ethernet
+
+Energy Efficient Ethernet(EEE) enables IEEE 802.3 MAC sublayer along
+with a family of Physical layer to operate in the Low power Idle(LPI)
+mode. The EEE mode supports the IEEE 802.3 MAC operation at 100Mbps,
+1000Mbps & 10Gbps.
+
+The LPI mode allows power saving by switching off parts of the
+communication device functionality when there is no data to be
+transmitted & received. The system on both the side of the link can
+disable some functionalities & save power during the period of low-link
+utilization. The MAC controls whether the system should enter or exit
+the LPI mode & communicate this to PHY.
+
+As soon as the interface is opened, the driver verifies if the EEE can
+be supported. This is done by looking at both the DMA HW capability
+register and the PHY devices MCD registers.
+To enter in Tx LPI mode the driver needs to have a software timer
+that enable and disable the LPI mode when there is nothing to be
+transmitted.
+
+7) Precision Time Protocol (PTP)
+The driver supports the IEEE 1588-2002, Precision Time Protocol (PTP),
+which enables precise synchronization of clocks in measurement and
+control systems implemented with technologies such as network
+communication.
+
+In addition to the basic timestamp features mentioned in IEEE 1588-2002
+Timestamps, new GMAC cores support the advanced timestamp features.
+IEEE 1588-2008 that can be enabled when configure the Kernel.
+
+8) SGMII/RGMII supports
+New GMAC devices provide own way to manage RGMII/SGMII.
+This information is available at run-time by looking at the
+HW capability register. This means that the stmmac can manage
+auto-negotiation and link status w/o using the PHYLIB stuff
+In fact, the HW provides a subset of extended registers to
+restart the ANE, verify Full/Half duplex mode and Speed.
+Also thanks to these registers it is possible to look at the
+Auto-negotiated Link Parter Ability.
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
new file mode 100644
index 000000000..f981a9295
--- /dev/null
+++ b/Documentation/networking/switchdev.txt
@@ -0,0 +1,59 @@
+Switch (and switch-ish) device drivers HOWTO
+===========================
+
+Please note that the word "switch" is here used in very generic meaning.
+This include devices supporting L2/L3 but also various flow offloading chips,
+including switches embedded into SR-IOV NICs.
+
+Lets describe a topology a bit. Imagine the following example:
+
+ +----------------------------+ +---------------+
+ | SOME switch chip | | CPU |
+ +----------------------------+ +---------------+
+ port1 port2 port3 port4 MNGMNT | PCI-E |
+ | | | | | +---------------+
+ PHY PHY | | | | NIC0 NIC1
+ | | | | | |
+ | | +- PCI-E -+ | |
+ | +------- MII -------+ |
+ +------------- MII ------------+
+
+In this example, there are two independent lines between the switch silicon
+and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are
+separate from the switch driver. SOME switch chip is by managed by a driver
+via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be
+connected to some other type of bus.
+
+Now, for the previous example show the representation in kernel:
+
+ +----------------------------+ +---------------+
+ | SOME switch chip | | CPU |
+ +----------------------------+ +---------------+
+ sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT | PCI-E |
+ | | | | | +---------------+
+ PHY PHY | | | | eth0 eth1
+ | | | | | |
+ | | +- PCI-E -+ | |
+ | +------- MII -------+ |
+ +------------- MII ------------+
+
+Lets call the example switch driver for SOME switch chip "SOMEswitch". This
+driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX
+created for each port of a switch. These netdevices are instances
+of "SOMEswitch" driver. sw0pX netdevices serve as a "representation"
+of the switch chip. eth0 and eth1 are instances of some other existing driver.
+
+The only difference of the switch-port netdevice from the ordinary netdevice
+is that is implements couple more NDOs:
+
+ ndo_switch_parent_id_get - This returns the same ID for two port netdevices
+ of the same physical switch chip. This is
+ mandatory to be implemented by all switch drivers
+ and serves the caller for recognition of a port
+ netdevice.
+ ndo_switch_parent_* - Functions that serve for a manipulation of the switch
+ chip itself (it can be though of as a "parent" of the
+ port, therefore the name). They are not port-specific.
+ Caller might use arbitrary port netdevice of the same
+ switch and it will make no difference.
+ ndo_switch_port_* - Functions that serve for a port-specific manipulation.
diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt
new file mode 100644
index 000000000..70d6cf608
--- /dev/null
+++ b/Documentation/networking/tc-actions-env-rules.txt
@@ -0,0 +1,30 @@
+
+The "environmental" rules for authors of any new tc actions are:
+
+1) If you stealeth or borroweth any packet thou shalt be branching
+from the righteous path and thou shalt cloneth.
+
+For example if your action queues a packet to be processed later,
+or intentionally branches by redirecting a packet, then you need to
+clone the packet.
+
+There are certain fields in the skb tc_verd that need to be reset so we
+avoid loops, etc. A few are generic enough that skb_act_clone()
+resets them for you, so invoke skb_act_clone() rather than skb_clone().
+
+2) If you munge any packet thou shalt call pskb_expand_head in the case
+someone else is referencing the skb. After that you "own" the skb.
+You must also tell us if it is ok to munge the packet (TC_OK2MUNGE),
+this way any action downstream can stomp on the packet.
+
+3) Dropping packets you don't own is a no-no. You simply return
+TC_ACT_SHOT to the caller and they will drop it.
+
+The "environmental" rules for callers of actions (qdiscs etc) are:
+
+*) Thou art responsible for freeing anything returned as being
+TC_ACT_SHOT/STOLEN/QUEUED. If none of TC_ACT_SHOT/STOLEN/QUEUED is
+returned, then all is great and you don't need to do anything.
+
+Post on netdev if something is unclear.
+
diff --git a/Documentation/networking/tcp-thin.txt b/Documentation/networking/tcp-thin.txt
new file mode 100644
index 000000000..151e22998
--- /dev/null
+++ b/Documentation/networking/tcp-thin.txt
@@ -0,0 +1,47 @@
+Thin-streams and TCP
+====================
+A wide range of Internet-based services that use reliable transport
+protocols display what we call thin-stream properties. This means
+that the application sends data with such a low rate that the
+retransmission mechanisms of the transport protocol are not fully
+effective. In time-dependent scenarios (like online games, control
+systems, stock trading etc.) where the user experience depends
+on the data delivery latency, packet loss can be devastating for
+the service quality. Extreme latencies are caused by TCP's
+dependency on the arrival of new data from the application to trigger
+retransmissions effectively through fast retransmit instead of
+waiting for long timeouts.
+
+After analysing a large number of time-dependent interactive
+applications, we have seen that they often produce thin streams
+and also stay with this traffic pattern throughout its entire
+lifespan. The combination of time-dependency and the fact that the
+streams provoke high latencies when using TCP is unfortunate.
+
+In order to reduce application-layer latency when packets are lost,
+a set of mechanisms has been made, which address these latency issues
+for thin streams. In short, if the kernel detects a thin stream,
+the retransmission mechanisms are modified in the following manner:
+
+1) If the stream is thin, fast retransmit on the first dupACK.
+2) If the stream is thin, do not apply exponential backoff.
+
+These enhancements are applied only if the stream is detected as
+thin. This is accomplished by defining a threshold for the number
+of packets in flight. If there are less than 4 packets in flight,
+fast retransmissions can not be triggered, and the stream is prone
+to experience high retransmission latencies.
+
+Since these mechanisms are targeted at time-dependent applications,
+they must be specifically activated by the application using the
+TCP_THIN_LINEAR_TIMEOUTS and TCP_THIN_DUPACK IOCTLS or the
+tcp_thin_linear_timeouts and tcp_thin_dupack sysctls. Both
+modifications are turned off by default.
+
+References
+==========
+More information on the modifications, as well as a wide range of
+experimental data can be found here:
+"Improving latency for interactive, thin-stream applications over
+reliable transport"
+http://simula.no/research/nd/publications/Simula.nd.477/simula_pdf_file
diff --git a/Documentation/networking/tcp.txt b/Documentation/networking/tcp.txt
new file mode 100644
index 000000000..bdc4c0db5
--- /dev/null
+++ b/Documentation/networking/tcp.txt
@@ -0,0 +1,106 @@
+TCP protocol
+============
+
+Last updated: 9 February 2008
+
+Contents
+========
+
+- Congestion control
+- How the new TCP output machine [nyi] works
+
+Congestion control
+==================
+
+The following variables are used in the tcp_sock for congestion control:
+snd_cwnd The size of the congestion window
+snd_ssthresh Slow start threshold. We are in slow start if
+ snd_cwnd is less than this.
+snd_cwnd_cnt A counter used to slow down the rate of increase
+ once we exceed slow start threshold.
+snd_cwnd_clamp This is the maximum size that snd_cwnd can grow to.
+snd_cwnd_stamp Timestamp for when congestion window last validated.
+snd_cwnd_used Used as a highwater mark for how much of the
+ congestion window is in use. It is used to adjust
+ snd_cwnd down when the link is limited by the
+ application rather than the network.
+
+As of 2.6.13, Linux supports pluggable congestion control algorithms.
+A congestion control mechanism can be registered through functions in
+tcp_cong.c. The functions used by the congestion control mechanism are
+registered via passing a tcp_congestion_ops struct to
+tcp_register_congestion_control. As a minimum name, ssthresh,
+cong_avoid must be valid.
+
+Private data for a congestion control mechanism is stored in tp->ca_priv.
+tcp_ca(tp) returns a pointer to this space. This is preallocated space - it
+is important to check the size of your private data will fit this space, or
+alternatively space could be allocated elsewhere and a pointer to it could
+be stored here.
+
+There are three kinds of congestion control algorithms currently: The
+simplest ones are derived from TCP reno (highspeed, scalable) and just
+provide an alternative the congestion window calculation. More complex
+ones like BIC try to look at other events to provide better
+heuristics. There are also round trip time based algorithms like
+Vegas and Westwood+.
+
+Good TCP congestion control is a complex problem because the algorithm
+needs to maintain fairness and performance. Please review current
+research and RFC's before developing new modules.
+
+The method that is used to determine which congestion control mechanism is
+determined by the setting of the sysctl net.ipv4.tcp_congestion_control.
+The default congestion control will be the last one registered (LIFO);
+so if you built everything as modules, the default will be reno. If you
+build with the defaults from Kconfig, then CUBIC will be builtin (not a
+module) and it will end up the default.
+
+If you really want a particular default value then you will need
+to set it with the sysctl. If you use a sysctl, the module will be autoloaded
+if needed and you will get the expected protocol. If you ask for an
+unknown congestion method, then the sysctl attempt will fail.
+
+If you remove a tcp congestion control module, then you will get the next
+available one. Since reno cannot be built as a module, and cannot be
+deleted, it will always be available.
+
+How the new TCP output machine [nyi] works.
+===========================================
+
+Data is kept on a single queue. The skb->users flag tells us if the frame is
+one that has been queued already. To add a frame we throw it on the end. Ack
+walks down the list from the start.
+
+We keep a set of control flags
+
+
+ sk->tcp_pend_event
+
+ TCP_PEND_ACK Ack needed
+ TCP_ACK_NOW Needed now
+ TCP_WINDOW Window update check
+ TCP_WINZERO Zero probing
+
+
+ sk->transmit_queue The transmission frame begin
+ sk->transmit_new First new frame pointer
+ sk->transmit_end Where to add frames
+
+ sk->tcp_last_tx_ack Last ack seen
+ sk->tcp_dup_ack Dup ack count for fast retransmit
+
+
+Frames are queued for output by tcp_write. We do our best to send the frames
+off immediately if possible, but otherwise queue and compute the body
+checksum in the copy.
+
+When a write is done we try to clear any pending events and piggy back them.
+If the window is full we queue full sized frames. On the first timeout in
+zero window we split this.
+
+On a timer we walk the retransmit list to send any retransmits, update the
+backoff timers etc. A change of route table stamp causes a change of header
+and recompute. We add any new tcp level headers and refinish the checksum
+before sending.
+
diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt
new file mode 100644
index 000000000..5a013686b
--- /dev/null
+++ b/Documentation/networking/team.txt
@@ -0,0 +1,2 @@
+Team devices are driven from userspace via libteam library which is here:
+ https://github.com/jpirko/libteam
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
new file mode 100644
index 000000000..5f0922613
--- /dev/null
+++ b/Documentation/networking/timestamping.txt
@@ -0,0 +1,457 @@
+
+1. Control Interfaces
+
+The interfaces for receiving network packages timestamps are:
+
+* SO_TIMESTAMP
+ Generates a timestamp for each incoming packet in (not necessarily
+ monotonic) system time. Reports the timestamp via recvmsg() in a
+ control message as struct timeval (usec resolution).
+
+* SO_TIMESTAMPNS
+ Same timestamping mechanism as SO_TIMESTAMP, but reports the
+ timestamp as struct timespec (nsec resolution).
+
+* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
+ Only for multicast:approximate transmit timestamp obtained by
+ reading the looped packet receive timestamp.
+
+* SO_TIMESTAMPING
+ Generates timestamps on reception, transmission or both. Supports
+ multiple timestamp sources, including hardware. Supports generating
+ timestamps for stream sockets.
+
+
+1.1 SO_TIMESTAMP:
+
+This socket option enables timestamping of datagrams on the reception
+path. Because the destination socket, if any, is not known early in
+the network stack, the feature has to be enabled for all packets. The
+same is true for all early receive timestamp options.
+
+For interface details, see `man 7 socket`.
+
+
+1.2 SO_TIMESTAMPNS:
+
+This option is identical to SO_TIMESTAMP except for the returned data type.
+Its struct timespec allows for higher resolution (ns) timestamps than the
+timeval of SO_TIMESTAMP (ms).
+
+
+1.3 SO_TIMESTAMPING:
+
+Supports multiple types of timestamp requests. As a result, this
+socket option takes a bitmap of flags, not a boolean. In
+
+ err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (void *) val, &val);
+
+val is an integer with any of the following bits set. Setting other
+bit returns EINVAL and does not change the current state.
+
+
+1.3.1 Timestamp Generation
+
+Some bits are requests to the stack to try to generate timestamps. Any
+combination of them is valid. Changes to these bits apply to newly
+created packets, not to packets already in the stack. As a result, it
+is possible to selectively request timestamps for a subset of packets
+(e.g., for sampling) by embedding an send() call within two setsockopt
+calls, one to enable timestamp generation and one to disable it.
+Timestamps may also be generated for reasons other than being
+requested by a particular socket, such as when receive timestamping is
+enabled system wide, as explained earlier.
+
+SOF_TIMESTAMPING_RX_HARDWARE:
+ Request rx timestamps generated by the network adapter.
+
+SOF_TIMESTAMPING_RX_SOFTWARE:
+ Request rx timestamps when data enters the kernel. These timestamps
+ are generated just after a device driver hands a packet to the
+ kernel receive stack.
+
+SOF_TIMESTAMPING_TX_HARDWARE:
+ Request tx timestamps generated by the network adapter.
+
+SOF_TIMESTAMPING_TX_SOFTWARE:
+ Request tx timestamps when data leaves the kernel. These timestamps
+ are generated in the device driver as close as possible, but always
+ prior to, passing the packet to the network interface. Hence, they
+ require driver support and may not be available for all devices.
+
+SOF_TIMESTAMPING_TX_SCHED:
+ Request tx timestamps prior to entering the packet scheduler. Kernel
+ transmit latency is, if long, often dominated by queuing delay. The
+ difference between this timestamp and one taken at
+ SOF_TIMESTAMPING_TX_SOFTWARE will expose this latency independent
+ of protocol processing. The latency incurred in protocol
+ processing, if any, can be computed by subtracting a userspace
+ timestamp taken immediately before send() from this timestamp. On
+ machines with virtual devices where a transmitted packet travels
+ through multiple devices and, hence, multiple packet schedulers,
+ a timestamp is generated at each layer. This allows for fine
+ grained measurement of queuing delay.
+
+SOF_TIMESTAMPING_TX_ACK:
+ Request tx timestamps when all data in the send buffer has been
+ acknowledged. This only makes sense for reliable protocols. It is
+ currently only implemented for TCP. For that protocol, it may
+ over-report measurement, because the timestamp is generated when all
+ data up to and including the buffer at send() was acknowledged: the
+ cumulative acknowledgment. The mechanism ignores SACK and FACK.
+
+
+1.3.2 Timestamp Reporting
+
+The other three bits control which timestamps will be reported in a
+generated control message. Changes to the bits take immediate
+effect at the timestamp reporting locations in the stack. Timestamps
+are only reported for packets that also have the relevant timestamp
+generation request set.
+
+SOF_TIMESTAMPING_SOFTWARE:
+ Report any software timestamps when available.
+
+SOF_TIMESTAMPING_SYS_HARDWARE:
+ This option is deprecated and ignored.
+
+SOF_TIMESTAMPING_RAW_HARDWARE:
+ Report hardware timestamps as generated by
+ SOF_TIMESTAMPING_TX_HARDWARE when available.
+
+
+1.3.3 Timestamp Options
+
+The interface supports the options
+
+SOF_TIMESTAMPING_OPT_ID:
+
+ Generate a unique identifier along with each packet. A process can
+ have multiple concurrent timestamping requests outstanding. Packets
+ can be reordered in the transmit path, for instance in the packet
+ scheduler. In that case timestamps will be queued onto the error
+ queue out of order from the original send() calls. It is not always
+ possible to uniquely match timestamps to the original send() calls
+ based on timestamp order or payload inspection alone, then.
+
+ This option associates each packet at send() with a unique
+ identifier and returns that along with the timestamp. The identifier
+ is derived from a per-socket u32 counter (that wraps). For datagram
+ sockets, the counter increments with each sent packet. For stream
+ sockets, it increments with every byte.
+
+ The counter starts at zero. It is initialized the first time that
+ the socket option is enabled. It is reset each time the option is
+ enabled after having been disabled. Resetting the counter does not
+ change the identifiers of existing packets in the system.
+
+ This option is implemented only for transmit timestamps. There, the
+ timestamp is always looped along with a struct sock_extended_err.
+ The option modifies field ee_data to pass an id that is unique
+ among all possibly concurrently outstanding timestamp requests for
+ that socket.
+
+
+SOF_TIMESTAMPING_OPT_CMSG:
+
+ Support recv() cmsg for all timestamped packets. Control messages
+ are already supported unconditionally on all packets with receive
+ timestamps and on IPv6 packets with transmit timestamp. This option
+ extends them to IPv4 packets with transmit timestamp. One use case
+ is to correlate packets with their egress device, by enabling socket
+ option IP_PKTINFO simultaneously.
+
+
+SOF_TIMESTAMPING_OPT_TSONLY:
+
+ Applies to transmit timestamps only. Makes the kernel return the
+ timestamp as a cmsg alongside an empty packet, as opposed to
+ alongside the original packet. This reduces the amount of memory
+ charged to the socket's receive budget (SO_RCVBUF) and delivers
+ the timestamp even if sysctl net.core.tstamp_allow_data is 0.
+ This option disables SOF_TIMESTAMPING_OPT_CMSG.
+
+
+New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
+disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
+regardless of the setting of sysctl net.core.tstamp_allow_data.
+
+An exception is when a process needs additional cmsg data, for
+instance SOL_IP/IP_PKTINFO to detect the egress network interface.
+Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
+having access to the contents of the original packet, so cannot be
+combined with SOF_TIMESTAMPING_OPT_TSONLY.
+
+
+1.4 Bytestream Timestamps
+
+The SO_TIMESTAMPING interface supports timestamping of bytes in a
+bytestream. Each request is interpreted as a request for when the
+entire contents of the buffer has passed a timestamping point. That
+is, for streams option SOF_TIMESTAMPING_TX_SOFTWARE will record
+when all bytes have reached the device driver, regardless of how
+many packets the data has been converted into.
+
+In general, bytestreams have no natural delimiters and therefore
+correlating a timestamp with data is non-trivial. A range of bytes
+may be split across segments, any segments may be merged (possibly
+coalescing sections of previously segmented buffers associated with
+independent send() calls). Segments can be reordered and the same
+byte range can coexist in multiple segments for protocols that
+implement retransmissions.
+
+It is essential that all timestamps implement the same semantics,
+regardless of these possible transformations, as otherwise they are
+incomparable. Handling "rare" corner cases differently from the
+simple case (a 1:1 mapping from buffer to skb) is insufficient
+because performance debugging often needs to focus on such outliers.
+
+In practice, timestamps can be correlated with segments of a
+bytestream consistently, if both semantics of the timestamp and the
+timing of measurement are chosen correctly. This challenge is no
+different from deciding on a strategy for IP fragmentation. There, the
+definition is that only the first fragment is timestamped. For
+bytestreams, we chose that a timestamp is generated only when all
+bytes have passed a point. SOF_TIMESTAMPING_TX_ACK as defined is easy to
+implement and reason about. An implementation that has to take into
+account SACK would be more complex due to possible transmission holes
+and out of order arrival.
+
+On the host, TCP can also break the simple 1:1 mapping from buffer to
+skbuff as a result of Nagle, cork, autocork, segmentation and GSO. The
+implementation ensures correctness in all cases by tracking the
+individual last byte passed to send(), even if it is no longer the
+last byte after an skbuff extend or merge operation. It stores the
+relevant sequence number in skb_shinfo(skb)->tskey. Because an skbuff
+has only one such field, only one timestamp can be generated.
+
+In rare cases, a timestamp request can be missed if two requests are
+collapsed onto the same skb. A process can detect this situation by
+enabling SOF_TIMESTAMPING_OPT_ID and comparing the byte offset at
+send time with the value returned for each timestamp. It can prevent
+the situation by always flushing the TCP stack in between requests,
+for instance by enabling TCP_NODELAY and disabling TCP_CORK and
+autocork.
+
+These precautions ensure that the timestamp is generated only when all
+bytes have passed a timestamp point, assuming that the network stack
+itself does not reorder the segments. The stack indeed tries to avoid
+reordering. The one exception is under administrator control: it is
+possible to construct a packet scheduler configuration that delays
+segments from the same stream differently. Such a setup would be
+unusual.
+
+
+2 Data Interfaces
+
+Timestamps are read using the ancillary data feature of recvmsg().
+See `man 3 cmsg` for details of this interface. The socket manual
+page (`man 7 socket`) describes how timestamps generated with
+SO_TIMESTAMP and SO_TIMESTAMPNS records can be retrieved.
+
+
+2.1 SCM_TIMESTAMPING records
+
+These timestamps are returned in a control message with cmsg_level
+SOL_SOCKET, cmsg_type SCM_TIMESTAMPING, and payload of type
+
+struct scm_timestamping {
+ struct timespec ts[3];
+};
+
+The structure can return up to three timestamps. This is a legacy
+feature. Only one field is non-zero at any time. Most timestamps
+are passed in ts[0]. Hardware timestamps are passed in ts[2].
+
+ts[1] used to hold hardware timestamps converted to system time.
+Instead, expose the hardware clock device on the NIC directly as
+a HW PTP clock source, to allow time conversion in userspace and
+optionally synchronize system time with a userspace PTP stack such
+as linuxptp. For the PTP clock API, see Documentation/ptp/ptp.txt.
+
+2.1.1 Transmit timestamps with MSG_ERRQUEUE
+
+For transmit timestamps the outgoing packet is looped back to the
+socket's error queue with the send timestamp(s) attached. A process
+receives the timestamps by calling recvmsg() with flag MSG_ERRQUEUE
+set and with a msg_control buffer sufficiently large to receive the
+relevant metadata structures. The recvmsg call returns the original
+outgoing data packet with two ancillary messages attached.
+
+A message of cm_level SOL_IP(V6) and cm_type IP(V6)_RECVERR
+embeds a struct sock_extended_err. This defines the error type. For
+timestamps, the ee_errno field is ENOMSG. The other ancillary message
+will have cm_level SOL_SOCKET and cm_type SCM_TIMESTAMPING. This
+embeds the struct scm_timestamping.
+
+
+2.1.1.2 Timestamp types
+
+The semantics of the three struct timespec are defined by field
+ee_info in the extended error structure. It contains a value of
+type SCM_TSTAMP_* to define the actual timestamp passed in
+scm_timestamping.
+
+The SCM_TSTAMP_* types are 1:1 matches to the SOF_TIMESTAMPING_*
+control fields discussed previously, with one exception. For legacy
+reasons, SCM_TSTAMP_SND is equal to zero and can be set for both
+SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE. It
+is the first if ts[2] is non-zero, the second otherwise, in which
+case the timestamp is stored in ts[0].
+
+
+2.1.1.3 Fragmentation
+
+Fragmentation of outgoing datagrams is rare, but is possible, e.g., by
+explicitly disabling PMTU discovery. If an outgoing packet is fragmented,
+then only the first fragment is timestamped and returned to the sending
+socket.
+
+
+2.1.1.4 Packet Payload
+
+The calling application is often not interested in receiving the whole
+packet payload that it passed to the stack originally: the socket
+error queue mechanism is just a method to piggyback the timestamp on.
+In this case, the application can choose to read datagrams with a
+smaller buffer, possibly even of length 0. The payload is truncated
+accordingly. Until the process calls recvmsg() on the error queue,
+however, the full packet is queued, taking up budget from SO_RCVBUF.
+
+
+2.1.1.5 Blocking Read
+
+Reading from the error queue is always a non-blocking operation. To
+block waiting on a timestamp, use poll or select. poll() will return
+POLLERR in pollfd.revents if any data is ready on the error queue.
+There is no need to pass this flag in pollfd.events. This flag is
+ignored on request. See also `man 2 poll`.
+
+
+2.1.2 Receive timestamps
+
+On reception, there is no reason to read from the socket error queue.
+The SCM_TIMESTAMPING ancillary data is sent along with the packet data
+on a normal recvmsg(). Since this is not a socket error, it is not
+accompanied by a message SOL_IP(V6)/IP(V6)_RECVERROR. In this case,
+the meaning of the three fields in struct scm_timestamping is
+implicitly defined. ts[0] holds a software timestamp if set, ts[1]
+is again deprecated and ts[2] holds a hardware timestamp if set.
+
+
+3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP
+
+Hardware time stamping must also be initialized for each device driver
+that is expected to do hardware time stamping. The parameter is defined in
+/include/linux/net_tstamp.h as:
+
+struct hwtstamp_config {
+ int flags; /* no flags defined right now, must be zero */
+ int tx_type; /* HWTSTAMP_TX_* */
+ int rx_filter; /* HWTSTAMP_FILTER_* */
+};
+
+Desired behavior is passed into the kernel and to a specific device by
+calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
+ifr_data points to a struct hwtstamp_config. The tx_type and
+rx_filter are hints to the driver what it is expected to do. If
+the requested fine-grained filtering for incoming packets is not
+supported, the driver may time stamp more than just the requested types
+of packets.
+
+A driver which supports hardware time stamping shall update the struct
+with the actual, possibly more permissive configuration. If the
+requested packets cannot be time stamped, then nothing should be
+changed and ERANGE shall be returned (in contrast to EINVAL, which
+indicates that SIOCSHWTSTAMP is not supported at all).
+
+Only a processes with admin rights may change the configuration. User
+space is responsible to ensure that multiple processes don't interfere
+with each other and that the settings are reset.
+
+Any process can read the actual configuration by passing this
+structure to ioctl(SIOCGHWTSTAMP) in the same way. However, this has
+not been implemented in all drivers.
+
+/* possible values for hwtstamp_config->tx_type */
+enum {
+ /*
+ * no outgoing packet will need hardware time stamping;
+ * should a packet arrive which asks for it, no hardware
+ * time stamping will be done
+ */
+ HWTSTAMP_TX_OFF,
+
+ /*
+ * enables hardware time stamping for outgoing packets;
+ * the sender of the packet decides which are to be
+ * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE
+ * before sending the packet
+ */
+ HWTSTAMP_TX_ON,
+};
+
+/* possible values for hwtstamp_config->rx_filter */
+enum {
+ /* time stamp no incoming packet at all */
+ HWTSTAMP_FILTER_NONE,
+
+ /* time stamp any incoming packet */
+ HWTSTAMP_FILTER_ALL,
+
+ /* return value: time stamp all packets requested plus some others */
+ HWTSTAMP_FILTER_SOME,
+
+ /* PTP v1, UDP, any kind of event packet */
+ HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
+
+ /* for the complete list of values, please check
+ * the include file /include/linux/net_tstamp.h
+ */
+};
+
+3.1 Hardware Timestamping Implementation: Device Drivers
+
+A driver which supports hardware time stamping must support the
+SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with
+the actual values as described in the section on SIOCSHWTSTAMP. It
+should also support SIOCGHWTSTAMP.
+
+Time stamps for received packets must be stored in the skb. To get a pointer
+to the shared time stamp structure of the skb call skb_hwtstamps(). Then
+set the time stamps in the structure:
+
+struct skb_shared_hwtstamps {
+ /* hardware time stamp transformed into duration
+ * since arbitrary point in time
+ */
+ ktime_t hwtstamp;
+};
+
+Time stamps for outgoing packets are to be generated as follows:
+- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+ is set no-zero. If yes, then the driver is expected to do hardware time
+ stamping.
+- If this is possible for the skb and requested, then declare
+ that the driver is doing the time stamping by setting the flag
+ SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with
+
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ You might want to keep a pointer to the associated skb for the next step
+ and not free the skb. A driver not supporting hardware time stamping doesn't
+ do that. A driver must never touch sk_buff::tstamp! It is used to store
+ software generated time stamps by the network subsystem.
+- Driver should call skb_tx_timestamp() as close to passing sk_buff to hardware
+ as possible. skb_tx_timestamp() provides a software time stamp if requested
+ and hardware timestamping is not possible (SKBTX_IN_PROGRESS not set).
+- As soon as the driver has sent the packet and/or obtained a
+ hardware time stamp for it, it passes the time stamp back by
+ calling skb_hwtstamp_tx() with the original skb, the raw
+ hardware time stamp. skb_hwtstamp_tx() clones the original skb and
+ adds the timestamps, therefore the original skb has to be freed now.
+ If obtaining the hardware time stamp somehow fails, then the driver
+ should not fall back to software time stamping. The rationale is that
+ this would occur at a later time in the processing pipeline than other
+ software time stamping and therefore could lead to unexpected deltas
+ between time stamps.
diff --git a/Documentation/networking/timestamping/.gitignore b/Documentation/networking/timestamping/.gitignore
new file mode 100644
index 000000000..9e69e982f
--- /dev/null
+++ b/Documentation/networking/timestamping/.gitignore
@@ -0,0 +1,3 @@
+timestamping
+txtimestamp
+hwtstamp_config
diff --git a/Documentation/networking/timestamping/Makefile b/Documentation/networking/timestamping/Makefile
new file mode 100644
index 000000000..8c20dfaa4
--- /dev/null
+++ b/Documentation/networking/timestamping/Makefile
@@ -0,0 +1,14 @@
+# To compile, from the source root
+#
+# make headers_install
+# make M=documentation
+
+# List of programs to build
+hostprogs-y := hwtstamp_config timestamping txtimestamp
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_timestamping.o += -I$(objtree)/usr/include
+HOSTCFLAGS_txtimestamp.o += -I$(objtree)/usr/include
+HOSTCFLAGS_hwtstamp_config.o += -I$(objtree)/usr/include
diff --git a/Documentation/networking/timestamping/hwtstamp_config.c b/Documentation/networking/timestamping/hwtstamp_config.c
new file mode 100644
index 000000000..e8b685a7f
--- /dev/null
+++ b/Documentation/networking/timestamping/hwtstamp_config.c
@@ -0,0 +1,134 @@
+/* Test program for SIOC{G,S}HWTSTAMP
+ * Copyright 2013 Solarflare Communications
+ * Author: Ben Hutchings
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/net_tstamp.h>
+#include <linux/sockios.h>
+
+static int
+lookup_value(const char **names, int size, const char *name)
+{
+ int value;
+
+ for (value = 0; value < size; value++)
+ if (names[value] && strcasecmp(names[value], name) == 0)
+ return value;
+
+ return -1;
+}
+
+static const char *
+lookup_name(const char **names, int size, int value)
+{
+ return (value >= 0 && value < size) ? names[value] : NULL;
+}
+
+static void list_names(FILE *f, const char **names, int size)
+{
+ int value;
+
+ for (value = 0; value < size; value++)
+ if (names[value])
+ fprintf(f, " %s\n", names[value]);
+}
+
+static const char *tx_types[] = {
+#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name
+ TX_TYPE(OFF),
+ TX_TYPE(ON),
+ TX_TYPE(ONESTEP_SYNC)
+#undef TX_TYPE
+};
+#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+
+static const char *rx_filters[] = {
+#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
+ RX_FILTER(NONE),
+ RX_FILTER(ALL),
+ RX_FILTER(SOME),
+ RX_FILTER(PTP_V1_L4_EVENT),
+ RX_FILTER(PTP_V1_L4_SYNC),
+ RX_FILTER(PTP_V1_L4_DELAY_REQ),
+ RX_FILTER(PTP_V2_L4_EVENT),
+ RX_FILTER(PTP_V2_L4_SYNC),
+ RX_FILTER(PTP_V2_L4_DELAY_REQ),
+ RX_FILTER(PTP_V2_L2_EVENT),
+ RX_FILTER(PTP_V2_L2_SYNC),
+ RX_FILTER(PTP_V2_L2_DELAY_REQ),
+ RX_FILTER(PTP_V2_EVENT),
+ RX_FILTER(PTP_V2_SYNC),
+ RX_FILTER(PTP_V2_DELAY_REQ),
+#undef RX_FILTER
+};
+#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+
+static void usage(void)
+{
+ fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n"
+ "tx_type is any of (case-insensitive):\n",
+ stderr);
+ list_names(stderr, tx_types, N_TX_TYPES);
+ fputs("rx_filter is any of (case-insensitive):\n", stderr);
+ list_names(stderr, rx_filters, N_RX_FILTERS);
+}
+
+int main(int argc, char **argv)
+{
+ struct ifreq ifr;
+ struct hwtstamp_config config;
+ const char *name;
+ int sock;
+
+ if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) {
+ usage();
+ return 2;
+ }
+
+ if (argc == 4) {
+ config.flags = 0;
+ config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]);
+ config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]);
+ if (config.tx_type < 0 || config.rx_filter < 0) {
+ usage();
+ return 2;
+ }
+ }
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ strcpy(ifr.ifr_name, argv[1]);
+ ifr.ifr_data = (caddr_t)&config;
+
+ if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) {
+ perror("ioctl");
+ return 1;
+ }
+
+ printf("flags = %#x\n", config.flags);
+ name = lookup_name(tx_types, N_TX_TYPES, config.tx_type);
+ if (name)
+ printf("tx_type = %s\n", name);
+ else
+ printf("tx_type = %d\n", config.tx_type);
+ name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter);
+ if (name)
+ printf("rx_filter = %s\n", name);
+ else
+ printf("rx_filter = %d\n", config.rx_filter);
+
+ return 0;
+}
diff --git a/Documentation/networking/timestamping/timestamping.c b/Documentation/networking/timestamping/timestamping.c
new file mode 100644
index 000000000..5cdfd7434
--- /dev/null
+++ b/Documentation/networking/timestamping/timestamping.c
@@ -0,0 +1,528 @@
+/*
+ * This program demonstrates how the various time stamping features in
+ * the Linux kernel work. It emulates the behavior of a PTP
+ * implementation in stand-alone master mode by sending PTPv1 Sync
+ * multicasts once every second. It looks for similar packets, but
+ * beyond that doesn't actually implement PTP.
+ *
+ * Outgoing packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support.
+ *
+ * Incoming packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
+ * SO_TIMESTAMP[NS].
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#ifndef SO_TIMESTAMPING
+# define SO_TIMESTAMPING 37
+# define SCM_TIMESTAMPING SO_TIMESTAMPING
+#endif
+
+#ifndef SO_TIMESTAMPNS
+# define SO_TIMESTAMPNS 35
+#endif
+
+#ifndef SIOCGSTAMPNS
+# define SIOCGSTAMPNS 0x8907
+#endif
+
+#ifndef SIOCSHWTSTAMP
+# define SIOCSHWTSTAMP 0x89b0
+#endif
+
+static void usage(const char *error)
+{
+ if (error)
+ printf("invalid option: %s\n", error);
+ printf("timestamping interface option*\n\n"
+ "Options:\n"
+ " IP_MULTICAST_LOOP - looping outgoing multicasts\n"
+ " SO_TIMESTAMP - normal software time stamping, ms resolution\n"
+ " SO_TIMESTAMPNS - more accurate software time stamping\n"
+ " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
+ " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
+ " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
+ " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
+ " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
+ " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+ " SIOCGSTAMP - check last socket time stamp\n"
+ " SIOCGSTAMPNS - more accurate socket time stamp\n");
+ exit(1);
+}
+
+static void bail(const char *error)
+{
+ printf("%s: %s\n", error, strerror(errno));
+ exit(1);
+}
+
+static const unsigned char sync[] = {
+ 0x00, 0x01, 0x00, 0x01,
+ 0x5f, 0x44, 0x46, 0x4c,
+ 0x54, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x01, 0x00, 0x37,
+ 0x00, 0x00, 0x00, 0x08,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x49, 0x05, 0xcd, 0x01,
+ 0x29, 0xb1, 0x8d, 0xb0,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x00, 0x00, 0x37,
+ 0x00, 0x00, 0x00, 0x04,
+ 0x44, 0x46, 0x4c, 0x54,
+ 0x00, 0x00, 0xf0, 0x60,
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x04,
+ 0x44, 0x46, 0x4c, 0x54,
+ 0x00, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+{
+ struct timeval now;
+ int res;
+
+ res = sendto(sock, sync, sizeof(sync), 0,
+ addr, addr_len);
+ gettimeofday(&now, 0);
+ if (res < 0)
+ printf("%s: %s\n", "send", strerror(errno));
+ else
+ printf("%ld.%06ld: sent %d bytes\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ res);
+}
+
+static void printpacket(struct msghdr *msg, int res,
+ char *data,
+ int sock, int recvmsg_flags,
+ int siocgstamp, int siocgstampns)
+{
+ struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+ struct cmsghdr *cmsg;
+ struct timeval tv;
+ struct timespec ts;
+ struct timeval now;
+
+ gettimeofday(&now, 0);
+
+ printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+ res,
+ inet_ntoa(from_addr->sin_addr),
+ msg->msg_controllen);
+ for (cmsg = CMSG_FIRSTHDR(msg);
+ cmsg;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ printf(" cmsg len %zu: ", cmsg->cmsg_len);
+ switch (cmsg->cmsg_level) {
+ case SOL_SOCKET:
+ printf("SOL_SOCKET ");
+ switch (cmsg->cmsg_type) {
+ case SO_TIMESTAMP: {
+ struct timeval *stamp =
+ (struct timeval *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMP %ld.%06ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_usec);
+ break;
+ }
+ case SO_TIMESTAMPNS: {
+ struct timespec *stamp =
+ (struct timespec *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMPNS %ld.%09ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ break;
+ }
+ case SO_TIMESTAMPING: {
+ struct timespec *stamp =
+ (struct timespec *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMPING ");
+ printf("SW %ld.%09ld ",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ stamp++;
+ /* skip deprecated HW transformed */
+ stamp++;
+ printf("HW raw %ld.%09ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ break;
+ }
+ default:
+ printf("type %d", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ case IPPROTO_IP:
+ printf("IPPROTO_IP ");
+ switch (cmsg->cmsg_type) {
+ case IP_RECVERR: {
+ struct sock_extended_err *err =
+ (struct sock_extended_err *)CMSG_DATA(cmsg);
+ printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
+ strerror(err->ee_errno),
+ err->ee_origin,
+#ifdef SO_EE_ORIGIN_TIMESTAMPING
+ err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
+ "bounced packet" : "unexpected origin"
+#else
+ "probably SO_EE_ORIGIN_TIMESTAMPING"
+#endif
+ );
+ if (res < sizeof(sync))
+ printf(" => truncated data?!");
+ else if (!memcmp(sync, data + res - sizeof(sync),
+ sizeof(sync)))
+ printf(" => GOT OUR DATA BACK (HURRAY!)");
+ break;
+ }
+ case IP_PKTINFO: {
+ struct in_pktinfo *pktinfo =
+ (struct in_pktinfo *)CMSG_DATA(cmsg);
+ printf("IP_PKTINFO interface index %u",
+ pktinfo->ipi_ifindex);
+ break;
+ }
+ default:
+ printf("type %d", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ default:
+ printf("level %d type %d",
+ cmsg->cmsg_level,
+ cmsg->cmsg_type);
+ break;
+ }
+ printf("\n");
+ }
+
+ if (siocgstamp) {
+ if (ioctl(sock, SIOCGSTAMP, &tv))
+ printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno));
+ else
+ printf("SIOCGSTAMP %ld.%06ld\n",
+ (long)tv.tv_sec,
+ (long)tv.tv_usec);
+ }
+ if (siocgstampns) {
+ if (ioctl(sock, SIOCGSTAMPNS, &ts))
+ printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
+ else
+ printf("SIOCGSTAMPNS %ld.%09ld\n",
+ (long)ts.tv_sec,
+ (long)ts.tv_nsec);
+ }
+}
+
+static void recvpacket(int sock, int recvmsg_flags,
+ int siocgstamp, int siocgstampns)
+{
+ char data[256];
+ struct msghdr msg;
+ struct iovec entry;
+ struct sockaddr_in from_addr;
+ struct {
+ struct cmsghdr cm;
+ char control[512];
+ } control;
+ int res;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ entry.iov_base = data;
+ entry.iov_len = sizeof(data);
+ msg.msg_name = (caddr_t)&from_addr;
+ msg.msg_namelen = sizeof(from_addr);
+ msg.msg_control = &control;
+ msg.msg_controllen = sizeof(control);
+
+ res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
+ if (res < 0) {
+ printf("%s %s: %s\n",
+ "recvmsg",
+ (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+ strerror(errno));
+ } else {
+ printpacket(&msg, res, data,
+ sock, recvmsg_flags,
+ siocgstamp, siocgstampns);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int so_timestamping_flags = 0;
+ int so_timestamp = 0;
+ int so_timestampns = 0;
+ int siocgstamp = 0;
+ int siocgstampns = 0;
+ int ip_multicast_loop = 0;
+ char *interface;
+ int i;
+ int enabled = 1;
+ int sock;
+ struct ifreq device;
+ struct ifreq hwtstamp;
+ struct hwtstamp_config hwconfig, hwconfig_requested;
+ struct sockaddr_in addr;
+ struct ip_mreq imr;
+ struct in_addr iaddr;
+ int val;
+ socklen_t len;
+ struct timeval next;
+
+ if (argc < 2)
+ usage(0);
+ interface = argv[1];
+
+ for (i = 2; i < argc; i++) {
+ if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
+ so_timestamp = 1;
+ else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
+ so_timestampns = 1;
+ else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
+ siocgstamp = 1;
+ else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
+ siocgstampns = 1;
+ else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
+ ip_multicast_loop = 1;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ else
+ usage(argv[i]);
+ }
+
+ sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (sock < 0)
+ bail("socket");
+
+ memset(&device, 0, sizeof(device));
+ strncpy(device.ifr_name, interface, sizeof(device.ifr_name));
+ if (ioctl(sock, SIOCGIFADDR, &device) < 0)
+ bail("getting interface IP address");
+
+ memset(&hwtstamp, 0, sizeof(hwtstamp));
+ strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name));
+ hwtstamp.ifr_data = (void *)&hwconfig;
+ memset(&hwconfig, 0, sizeof(hwconfig));
+ hwconfig.tx_type =
+ (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+ HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+ hwconfig.rx_filter =
+ (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
+ hwconfig_requested = hwconfig;
+ if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
+ if ((errno == EINVAL || errno == ENOTSUP) &&
+ hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
+ hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
+ printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
+ else
+ bail("SIOCSHWTSTAMP");
+ }
+ printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
+ hwconfig_requested.tx_type, hwconfig.tx_type,
+ hwconfig_requested.rx_filter, hwconfig.rx_filter);
+
+ /* bind to PTP port */
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = htons(319 /* PTP event port */);
+ if (bind(sock,
+ (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in)) < 0)
+ bail("bind");
+
+ /* set multicast group for outgoing packets */
+ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
+ addr.sin_addr = iaddr;
+ imr.imr_multiaddr.s_addr = iaddr.s_addr;
+ imr.imr_interface.s_addr =
+ ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
+ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
+ &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
+ bail("set multicast");
+
+ /* join multicast group, loop our own packet */
+ if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+ &imr, sizeof(struct ip_mreq)) < 0)
+ bail("join multicast group");
+
+ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
+ &ip_multicast_loop, sizeof(enabled)) < 0) {
+ bail("loop multicast");
+ }
+
+ /* set socket options for time stamping */
+ if (so_timestamp &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
+ &enabled, sizeof(enabled)) < 0)
+ bail("setsockopt SO_TIMESTAMP");
+
+ if (so_timestampns &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
+ &enabled, sizeof(enabled)) < 0)
+ bail("setsockopt SO_TIMESTAMPNS");
+
+ if (so_timestamping_flags &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
+ &so_timestamping_flags,
+ sizeof(so_timestamping_flags)) < 0)
+ bail("setsockopt SO_TIMESTAMPING");
+
+ /* request IP_PKTINFO for debugging purposes */
+ if (setsockopt(sock, SOL_IP, IP_PKTINFO,
+ &enabled, sizeof(enabled)) < 0)
+ printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
+
+ /* verify socket options */
+ len = sizeof(val);
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
+ else
+ printf("SO_TIMESTAMP %d\n", val);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
+ strerror(errno));
+ else
+ printf("SO_TIMESTAMPNS %d\n", val);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
+ strerror(errno));
+ } else {
+ printf("SO_TIMESTAMPING %d\n", val);
+ if (val != so_timestamping_flags)
+ printf(" not the expected value %d\n",
+ so_timestamping_flags);
+ }
+
+ /* send packets forever every five seconds */
+ gettimeofday(&next, 0);
+ next.tv_sec = (next.tv_sec + 1) / 5 * 5;
+ next.tv_usec = 0;
+ while (1) {
+ struct timeval now;
+ struct timeval delta;
+ long delta_us;
+ int res;
+ fd_set readfs, errorfs;
+
+ gettimeofday(&now, 0);
+ delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
+ (long)(next.tv_usec - now.tv_usec);
+ if (delta_us > 0) {
+ /* continue waiting for timeout or data */
+ delta.tv_sec = delta_us / 1000000;
+ delta.tv_usec = delta_us % 1000000;
+
+ FD_ZERO(&readfs);
+ FD_ZERO(&errorfs);
+ FD_SET(sock, &readfs);
+ FD_SET(sock, &errorfs);
+ printf("%ld.%06ld: select %ldus\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ delta_us);
+ res = select(sock + 1, &readfs, 0, &errorfs, &delta);
+ gettimeofday(&now, 0);
+ printf("%ld.%06ld: select returned: %d, %s\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ res,
+ res < 0 ? strerror(errno) : "success");
+ if (res > 0) {
+ if (FD_ISSET(sock, &readfs))
+ printf("ready for reading\n");
+ if (FD_ISSET(sock, &errorfs))
+ printf("has error\n");
+ recvpacket(sock, 0,
+ siocgstamp,
+ siocgstampns);
+ recvpacket(sock, MSG_ERRQUEUE,
+ siocgstamp,
+ siocgstampns);
+ }
+ } else {
+ /* write one packet */
+ sendpacket(sock,
+ (struct sockaddr *)&addr,
+ sizeof(addr));
+ next.tv_sec += 5;
+ continue;
+ }
+ }
+
+ return 0;
+}
diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c
new file mode 100644
index 000000000..8217510d3
--- /dev/null
+++ b/Documentation/networking/timestamping/txtimestamp.c
@@ -0,0 +1,549 @@
+/*
+ * Copyright 2014 Google Inc.
+ * Author: willemb@google.com (Willem de Bruijn)
+ *
+ * Test software tx timestamping, including
+ *
+ * - SCHED, SND and ACK timestamps
+ * - RAW, UDP and TCP
+ * - IPv4 and IPv6
+ * - various packet sizes (to test GSO and TSO)
+ *
+ * Consult the command line arguments for help on running
+ * the various testcases.
+ *
+ * This test requires a dummy TCP server.
+ * A simple `nc6 [-u] -l -p $DESTPORT` will do
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <error.h>
+#include <errno.h>
+#include <linux/errqueue.h>
+#include <linux/if_ether.h>
+#include <linux/net_tstamp.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <netpacket/packet.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+/* command line parameters */
+static int cfg_proto = SOCK_STREAM;
+static int cfg_ipproto = IPPROTO_TCP;
+static int cfg_num_pkts = 4;
+static int do_ipv4 = 1;
+static int do_ipv6 = 1;
+static int cfg_payload_len = 10;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
+static bool cfg_loop_nodata;
+static uint16_t dest_port = 9000;
+
+static struct sockaddr_in daddr;
+static struct sockaddr_in6 daddr6;
+static struct timespec ts_prev;
+
+static void __print_timestamp(const char *name, struct timespec *cur,
+ uint32_t key, int payload_len)
+{
+ if (!(cur->tv_sec | cur->tv_nsec))
+ return;
+
+ fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec / 1000,
+ key, payload_len);
+
+ if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
+ int64_t cur_ms, prev_ms;
+
+ cur_ms = (long) cur->tv_sec * 1000 * 1000;
+ cur_ms += cur->tv_nsec / 1000;
+
+ prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
+ prev_ms += ts_prev.tv_nsec / 1000;
+
+ fprintf(stderr, " (%+ld us)", cur_ms - prev_ms);
+ }
+
+ ts_prev = *cur;
+ fprintf(stderr, "\n");
+}
+
+static void print_timestamp_usr(void)
+{
+ struct timespec ts;
+ struct timeval tv; /* avoid dependency on -lrt */
+
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * 1000;
+
+ __print_timestamp(" USR", &ts, 0, 0);
+}
+
+static void print_timestamp(struct scm_timestamping *tss, int tstype,
+ int tskey, int payload_len)
+{
+ const char *tsname;
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ tsname = " ENQ";
+ break;
+ case SCM_TSTAMP_SND:
+ tsname = " SND";
+ break;
+ case SCM_TSTAMP_ACK:
+ tsname = " ACK";
+ break;
+ default:
+ error(1, 0, "unknown timestamp type: %u",
+ tstype);
+ }
+ __print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
+}
+
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+ int i;
+
+ if (!len)
+ return;
+
+ if (len > 70)
+ len = 70;
+
+ fprintf(stderr, "payload: ");
+ for (i = 0; i < len; i++)
+ fprintf(stderr, "%02hhx ", data[i]);
+ fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+ char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+ fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n",
+ ifindex,
+ saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+ daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
+static void __poll(int fd)
+{
+ struct pollfd pollfd;
+ int ret;
+
+ memset(&pollfd, 0, sizeof(pollfd));
+ pollfd.fd = fd;
+ ret = poll(&pollfd, 1, 100);
+ if (ret != 1)
+ error(1, errno, "poll");
+}
+
+static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
+{
+ struct sock_extended_err *serr = NULL;
+ struct scm_timestamping *tss = NULL;
+ struct cmsghdr *cm;
+ int batch = 0;
+
+ for (cm = CMSG_FIRSTHDR(msg);
+ cm && cm->cmsg_len;
+ cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_TIMESTAMPING) {
+ tss = (void *) CMSG_DATA(cm);
+ } else if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR)) {
+ serr = (void *) CMSG_DATA(cm);
+ if (serr->ee_errno != ENOMSG ||
+ serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
+ fprintf(stderr, "unknown ip error %d %d\n",
+ serr->ee_errno,
+ serr->ee_origin);
+ serr = NULL;
+ }
+ } else if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_PKTINFO) {
+ struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET, info->ipi_ifindex,
+ &info->ipi_spec_dst, &info->ipi_addr);
+ } else if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+ NULL, &info6->ipi6_addr);
+ } else
+ fprintf(stderr, "unknown cmsg %d,%d\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ if (serr && tss) {
+ print_timestamp(tss, serr->ee_info, serr->ee_data,
+ payload_len);
+ serr = NULL;
+ tss = NULL;
+ batch++;
+ }
+ }
+
+ if (batch > 1)
+ fprintf(stderr, "batched %d timestamps\n", batch);
+}
+
+static int recv_errmsg(int fd)
+{
+ static char ctrl[1024 /* overprovision*/];
+ static struct msghdr msg;
+ struct iovec entry;
+ static char *data;
+ int ret = 0;
+
+ data = malloc(cfg_payload_len);
+ if (!data)
+ error(1, 0, "malloc");
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&entry, 0, sizeof(entry));
+ memset(ctrl, 0, sizeof(ctrl));
+
+ entry.iov_base = data;
+ entry.iov_len = cfg_payload_len;
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno != EAGAIN)
+ error(1, errno, "recvmsg");
+
+ if (ret >= 0) {
+ __recv_errmsg_cmsg(&msg, ret);
+ if (cfg_show_payload)
+ print_payload(data, cfg_payload_len);
+ }
+
+ free(data);
+ return ret == -1;
+}
+
+static void do_test(int family, unsigned int opt)
+{
+ char *buf;
+ int fd, i, val = 1, total_len;
+
+ if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
+ /* due to lack of checksum generation code */
+ fprintf(stderr, "test: skipping datagram over IPv6\n");
+ return;
+ }
+
+ total_len = cfg_payload_len;
+ if (cfg_proto == SOCK_RAW) {
+ total_len += sizeof(struct udphdr);
+ if (cfg_ipproto == IPPROTO_RAW)
+ total_len += sizeof(struct iphdr);
+ }
+
+ buf = malloc(total_len);
+ if (!buf)
+ error(1, 0, "malloc");
+
+ fd = socket(family, cfg_proto, cfg_ipproto);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ if (cfg_proto == SOCK_STREAM) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
+ (char*) &val, sizeof(val)))
+ error(1, 0, "setsockopt no nagle");
+
+ if (family == PF_INET) {
+ if (connect(fd, (void *) &daddr, sizeof(daddr)))
+ error(1, errno, "connect ipv4");
+ } else {
+ if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
+ error(1, errno, "connect ipv6");
+ }
+ }
+
+ if (cfg_do_pktinfo) {
+ if (family == AF_INET6) {
+ if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv6");
+ } else {
+ if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv4");
+ }
+ }
+
+ opt |= SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_CMSG |
+ SOF_TIMESTAMPING_OPT_ID;
+ if (cfg_loop_nodata)
+ opt |= SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ (char *) &opt, sizeof(opt)))
+ error(1, 0, "setsockopt timestamping");
+
+ for (i = 0; i < cfg_num_pkts; i++) {
+ memset(&ts_prev, 0, sizeof(ts_prev));
+ memset(buf, 'a' + i, total_len);
+
+ if (cfg_proto == SOCK_RAW) {
+ struct udphdr *udph;
+ int off = 0;
+
+ if (cfg_ipproto == IPPROTO_RAW) {
+ struct iphdr *iph = (void *) buf;
+
+ memset(iph, 0, sizeof(*iph));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 2;
+ iph->daddr = daddr.sin_addr.s_addr;
+ iph->protocol = IPPROTO_UDP;
+ /* kernel writes saddr, csum, len */
+
+ off = sizeof(*iph);
+ }
+
+ udph = (void *) buf + off;
+ udph->source = ntohs(9000); /* random spoof */
+ udph->dest = ntohs(dest_port);
+ udph->len = ntohs(sizeof(*udph) + cfg_payload_len);
+ udph->check = 0; /* not allowed for IPv6 */
+ }
+
+ print_timestamp_usr();
+ if (cfg_proto != SOCK_STREAM) {
+ if (family == PF_INET)
+ val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
+ else
+ val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
+ } else {
+ val = send(fd, buf, cfg_payload_len, 0);
+ }
+ if (val != total_len)
+ error(1, errno, "send");
+
+ /* wait for all errors to be queued, else ACKs arrive OOO */
+ usleep(50 * 1000);
+
+ __poll(fd);
+
+ while (!recv_errmsg(fd)) {}
+ }
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ free(buf);
+ usleep(400 * 1000);
+}
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+ fprintf(stderr, "\nUsage: %s [options] hostname\n"
+ "\nwhere options are:\n"
+ " -4: only IPv4\n"
+ " -6: only IPv6\n"
+ " -h: show this message\n"
+ " -I: request PKTINFO\n"
+ " -l N: send N bytes at a time\n"
+ " -n: set no-payload option\n"
+ " -r: use raw\n"
+ " -R: use raw (IP_HDRINCL)\n"
+ " -p N: connect to port N\n"
+ " -u: use udp\n"
+ " -x: show payload (up to 70 bytes)\n",
+ filepath);
+ exit(1);
+}
+
+static void parse_opt(int argc, char **argv)
+{
+ int proto_count = 0;
+ char c;
+
+ while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) {
+ switch (c) {
+ case '4':
+ do_ipv6 = 0;
+ break;
+ case '6':
+ do_ipv4 = 0;
+ break;
+ case 'I':
+ cfg_do_pktinfo = true;
+ break;
+ case 'n':
+ cfg_loop_nodata = true;
+ break;
+ case 'r':
+ proto_count++;
+ cfg_proto = SOCK_RAW;
+ cfg_ipproto = IPPROTO_UDP;
+ break;
+ case 'R':
+ proto_count++;
+ cfg_proto = SOCK_RAW;
+ cfg_ipproto = IPPROTO_RAW;
+ break;
+ case 'u':
+ proto_count++;
+ cfg_proto = SOCK_DGRAM;
+ cfg_ipproto = IPPROTO_UDP;
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 10);
+ break;
+ case 'p':
+ dest_port = strtoul(optarg, NULL, 10);
+ break;
+ case 'x':
+ cfg_show_payload = true;
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ if (!cfg_payload_len)
+ error(1, 0, "payload may not be nonzero");
+ if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
+ error(1, 0, "udp packet might exceed expected MTU");
+ if (!do_ipv4 && !do_ipv6)
+ error(1, 0, "pass -4 or -6, not both");
+ if (proto_count > 1)
+ error(1, 0, "pass -r, -R or -u, not multiple");
+
+ if (optind != argc - 1)
+ error(1, 0, "missing required hostname argument");
+}
+
+static void resolve_hostname(const char *hostname)
+{
+ struct addrinfo *addrs, *cur;
+ int have_ipv4 = 0, have_ipv6 = 0;
+
+ if (getaddrinfo(hostname, NULL, NULL, &addrs))
+ error(1, errno, "getaddrinfo");
+
+ cur = addrs;
+ while (cur && !have_ipv4 && !have_ipv6) {
+ if (!have_ipv4 && cur->ai_family == AF_INET) {
+ memcpy(&daddr, cur->ai_addr, sizeof(daddr));
+ daddr.sin_port = htons(dest_port);
+ have_ipv4 = 1;
+ }
+ else if (!have_ipv6 && cur->ai_family == AF_INET6) {
+ memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
+ daddr6.sin6_port = htons(dest_port);
+ have_ipv6 = 1;
+ }
+ cur = cur->ai_next;
+ }
+ if (addrs)
+ freeaddrinfo(addrs);
+
+ do_ipv4 &= have_ipv4;
+ do_ipv6 &= have_ipv6;
+}
+
+static void do_main(int family)
+{
+ fprintf(stderr, "family: %s\n",
+ family == PF_INET ? "INET" : "INET6");
+
+ fprintf(stderr, "test SND\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
+
+ fprintf(stderr, "test ENQ\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED);
+
+ fprintf(stderr, "test ENQ + SND\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE);
+
+ if (cfg_proto == SOCK_STREAM) {
+ fprintf(stderr, "\ntest ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_ACK);
+
+ fprintf(stderr, "\ntest SND + ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK);
+
+ fprintf(stderr, "\ntest ENQ + SND + ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK);
+ }
+}
+
+const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
+
+int main(int argc, char **argv)
+{
+ if (argc == 1)
+ usage(argv[0]);
+
+ parse_opt(argc, argv);
+ resolve_hostname(argv[argc - 1]);
+
+ fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]);
+ fprintf(stderr, "payload: %u\n", cfg_payload_len);
+ fprintf(stderr, "server port: %u\n", dest_port);
+ fprintf(stderr, "\n");
+
+ if (do_ipv4)
+ do_main(PF_INET);
+ if (do_ipv6)
+ do_main(PF_INET6);
+
+ return 0;
+}
diff --git a/Documentation/networking/tlan.txt b/Documentation/networking/tlan.txt
new file mode 100644
index 000000000..34550dfce
--- /dev/null
+++ b/Documentation/networking/tlan.txt
@@ -0,0 +1,117 @@
+(C) 1997-1998 Caldera, Inc.
+(C) 1998 James Banks
+(C) 1999-2001 Torben Mathiasen <tmm@image.dk, torben.mathiasen@compaq.com>
+
+For driver information/updates visit http://www.compaq.com
+
+
+TLAN driver for Linux, version 1.14a
+README
+
+
+I. Supported Devices.
+
+ Only PCI devices will work with this driver.
+
+ Supported:
+ Vendor ID Device ID Name
+ 0e11 ae32 Compaq Netelligent 10/100 TX PCI UTP
+ 0e11 ae34 Compaq Netelligent 10 T PCI UTP
+ 0e11 ae35 Compaq Integrated NetFlex 3/P
+ 0e11 ae40 Compaq Netelligent Dual 10/100 TX PCI UTP
+ 0e11 ae43 Compaq Netelligent Integrated 10/100 TX UTP
+ 0e11 b011 Compaq Netelligent 10/100 TX Embedded UTP
+ 0e11 b012 Compaq Netelligent 10 T/2 PCI UTP/Coax
+ 0e11 b030 Compaq Netelligent 10/100 TX UTP
+ 0e11 f130 Compaq NetFlex 3/P
+ 0e11 f150 Compaq NetFlex 3/P
+ 108d 0012 Olicom OC-2325
+ 108d 0013 Olicom OC-2183
+ 108d 0014 Olicom OC-2326
+
+
+ Caveats:
+
+ I am not sure if 100BaseTX daughterboards (for those cards which
+ support such things) will work. I haven't had any solid evidence
+ either way.
+
+ However, if a card supports 100BaseTx without requiring an add
+ on daughterboard, it should work with 100BaseTx.
+
+ The "Netelligent 10 T/2 PCI UTP/Coax" (b012) device is untested,
+ but I do not expect any problems.
+
+
+II. Driver Options
+ 1. You can append debug=x to the end of the insmod line to get
+ debug messages, where x is a bit field where the bits mean
+ the following:
+
+ 0x01 Turn on general debugging messages.
+ 0x02 Turn on receive debugging messages.
+ 0x04 Turn on transmit debugging messages.
+ 0x08 Turn on list debugging messages.
+
+ 2. You can append aui=1 to the end of the insmod line to cause
+ the adapter to use the AUI interface instead of the 10 Base T
+ interface. This is also what to do if you want to use the BNC
+ connector on a TLAN based device. (Setting this option on a
+ device that does not have an AUI/BNC connector will probably
+ cause it to not function correctly.)
+
+ 3. You can set duplex=1 to force half duplex, and duplex=2 to
+ force full duplex.
+
+ 4. You can set speed=10 to force 10Mbs operation, and speed=100
+ to force 100Mbs operation. (I'm not sure what will happen
+ if a card which only supports 10Mbs is forced into 100Mbs
+ mode.)
+
+ 5. You have to use speed=X duplex=Y together now. If you just
+ do "insmod tlan.o speed=100" the driver will do Auto-Neg.
+ To force a 10Mbps Half-Duplex link do "insmod tlan.o speed=10
+ duplex=1".
+
+ 6. If the driver is built into the kernel, you can use the 3rd
+ and 4th parameters to set aui and debug respectively. For
+ example:
+
+ ether=0,0,0x1,0x7,eth0
+
+ This sets aui to 0x1 and debug to 0x7, assuming eth0 is a
+ supported TLAN device.
+
+ The bits in the third byte are assigned as follows:
+
+ 0x01 = aui
+ 0x02 = use half duplex
+ 0x04 = use full duplex
+ 0x08 = use 10BaseT
+ 0x10 = use 100BaseTx
+
+ You also need to set both speed and duplex settings when forcing
+ speeds with kernel-parameters.
+ ether=0,0,0x12,0,eth0 will force link to 100Mbps Half-Duplex.
+
+ 7. If you have more than one tlan adapter in your system, you can
+ use the above options on a per adapter basis. To force a 100Mbit/HD
+ link with your eth1 adapter use:
+
+ insmod tlan speed=0,100 duplex=0,1
+
+ Now eth0 will use auto-neg and eth1 will be forced to 100Mbit/HD.
+ Note that the tlan driver supports a maximum of 8 adapters.
+
+
+III. Things to try if you have problems.
+ 1. Make sure your card's PCI id is among those listed in
+ section I, above.
+ 2. Make sure routing is correct.
+ 3. Try forcing different speed/duplex settings
+
+
+There is also a tlan mailing list which you can join by sending "subscribe tlan"
+in the body of an email to majordomo@vuser.vu.union.edu.
+There is also a tlan website at http://www.compaq.com
+
diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt
new file mode 100644
index 000000000..ec11429e1
--- /dev/null
+++ b/Documentation/networking/tproxy.txt
@@ -0,0 +1,84 @@
+Transparent proxy support
+=========================
+
+This feature adds Linux 2.2-like transparent proxy support to current kernels.
+To use it, enable the socket match and the TPROXY target in your kernel config.
+You will need policy routing too, so be sure to enable that as well.
+
+
+1. Making non-local sockets work
+================================
+
+The idea is that you identify packets with destination address matching a local
+socket on your box, set the packet mark to a certain value, and then match on that
+value using policy routing to have those packets delivered locally:
+
+# iptables -t mangle -N DIVERT
+# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT
+# iptables -t mangle -A DIVERT -j MARK --set-mark 1
+# iptables -t mangle -A DIVERT -j ACCEPT
+
+# ip rule add fwmark 1 lookup 100
+# ip route add local 0.0.0.0/0 dev lo table 100
+
+Because of certain restrictions in the IPv4 routing output code you'll have to
+modify your application to allow it to send datagrams _from_ non-local IP
+addresses. All you have to do is enable the (SOL_IP, IP_TRANSPARENT) socket
+option before calling bind:
+
+fd = socket(AF_INET, SOCK_STREAM, 0);
+/* - 8< -*/
+int value = 1;
+setsockopt(fd, SOL_IP, IP_TRANSPARENT, &value, sizeof(value));
+/* - 8< -*/
+name.sin_family = AF_INET;
+name.sin_port = htons(0xCAFE);
+name.sin_addr.s_addr = htonl(0xDEADBEEF);
+bind(fd, &name, sizeof(name));
+
+A trivial patch for netcat is available here:
+http://people.netfilter.org/hidden/tproxy/netcat-ip_transparent-support.patch
+
+
+2. Redirecting traffic
+======================
+
+Transparent proxying often involves "intercepting" traffic on a router. This is
+usually done with the iptables REDIRECT target; however, there are serious
+limitations of that method. One of the major issues is that it actually
+modifies the packets to change the destination address -- which might not be
+acceptable in certain situations. (Think of proxying UDP for example: you won't
+be able to find out the original destination address. Even in case of TCP
+getting the original destination address is racy.)
+
+The 'TPROXY' target provides similar functionality without relying on NAT. Simply
+add rules like this to the iptables ruleset above:
+
+# iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \
+ --tproxy-mark 0x1/0x1 --on-port 50080
+
+Note that for this to work you'll have to modify the proxy to enable (SOL_IP,
+IP_TRANSPARENT) for the listening socket.
+
+
+3. Iptables extensions
+======================
+
+To use tproxy you'll need to have the 'socket' and 'TPROXY' modules
+compiled for iptables. A patched version of iptables is available
+here: http://git.balabit.hu/?p=bazsi/iptables-tproxy.git
+
+
+4. Application support
+======================
+
+4.1. Squid
+----------
+
+Squid 3.HEAD has support built-in. To use it, pass
+'--enable-linux-netfilter' to configure and set the 'tproxy' option on
+the HTTP listener you redirect traffic to with the TPROXY iptables
+target.
+
+For more information please consult the following page on the Squid
+wiki: http://wiki.squid-cache.org/Features/Tproxy4
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
new file mode 100644
index 000000000..949d5dcdd
--- /dev/null
+++ b/Documentation/networking/tuntap.txt
@@ -0,0 +1,227 @@
+Universal TUN/TAP device driver.
+Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+
+ Linux, Solaris drivers
+ Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+
+ FreeBSD TAP driver
+ Copyright (c) 1999-2000 Maksim Yevmenkin <m_evmenkin@yahoo.com>
+
+ Revision of this document 2002 by Florian Thiel <florian.thiel@gmx.net>
+
+1. Description
+ TUN/TAP provides packet reception and transmission for user space programs.
+ It can be seen as a simple Point-to-Point or Ethernet device, which,
+ instead of receiving packets from physical media, receives them from
+ user space program and instead of sending packets via physical media
+ writes them to the user space program.
+
+ In order to use the driver a program has to open /dev/net/tun and issue a
+ corresponding ioctl() to register a network device with the kernel. A network
+ device will appear as tunXX or tapXX, depending on the options chosen. When
+ the program closes the file descriptor, the network device and all
+ corresponding routes will disappear.
+
+ Depending on the type of device chosen the userspace program has to read/write
+ IP packets (with tun) or ethernet frames (with tap). Which one is being used
+ depends on the flags given with the ioctl().
+
+ The package from http://vtun.sourceforge.net/tun contains two simple examples
+ for how to use tun and tap devices. Both programs work like a bridge between
+ two network interfaces.
+ br_select.c - bridge based on select system call.
+ br_sigio.c - bridge based on async io and SIGIO signal.
+ However, the best example is VTun http://vtun.sourceforge.net :))
+
+2. Configuration
+ Create device node:
+ mkdir /dev/net (if it doesn't exist already)
+ mknod /dev/net/tun c 10 200
+
+ Set permissions:
+ e.g. chmod 0666 /dev/net/tun
+ There's no harm in allowing the device to be accessible by non-root users,
+ since CAP_NET_ADMIN is required for creating network devices or for
+ connecting to network devices which aren't owned by the user in question.
+ If you want to create persistent devices and give ownership of them to
+ unprivileged users, then you need the /dev/net/tun device to be usable by
+ those users.
+
+ Driver module autoloading
+
+ Make sure that "Kernel module loader" - module auto-loading
+ support is enabled in your kernel. The kernel should load it on
+ first access.
+
+ Manual loading
+ insert the module by hand:
+ modprobe tun
+
+ If you do it the latter way, you have to load the module every time you
+ need it, if you do it the other way it will be automatically loaded when
+ /dev/net/tun is being opened.
+
+3. Program interface
+ 3.1 Network device allocation:
+
+ char *dev should be the name of the device with a format string (e.g.
+ "tun%d"), but (as far as I can see) this can be any valid network device name.
+ Note that the character pointer becomes overwritten with the real device name
+ (e.g. "tun0")
+
+ #include <linux/if.h>
+ #include <linux/if_tun.h>
+
+ int tun_alloc(char *dev)
+ {
+ struct ifreq ifr;
+ int fd, err;
+
+ if( (fd = open("/dev/net/tun", O_RDWR)) < 0 )
+ return tun_alloc_old(dev);
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ /* Flags: IFF_TUN - TUN device (no Ethernet headers)
+ * IFF_TAP - TAP device
+ *
+ * IFF_NO_PI - Do not provide packet information
+ */
+ ifr.ifr_flags = IFF_TUN;
+ if( *dev )
+ strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+
+ if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
+ close(fd);
+ return err;
+ }
+ strcpy(dev, ifr.ifr_name);
+ return fd;
+ }
+
+ 3.2 Frame format:
+ If flag IFF_NO_PI is not set each frame format is:
+ Flags [2 bytes]
+ Proto [2 bytes]
+ Raw protocol(IP, IPv6, etc) frame.
+
+ 3.3 Multiqueue tuntap interface:
+
+ From version 3.8, Linux supports multiqueue tuntap which can uses multiple
+ file descriptors (queues) to parallelize packets sending or receiving. The
+ device allocation is the same as before, and if user wants to create multiple
+ queues, TUNSETIFF with the same device name must be called many times with
+ IFF_MULTI_QUEUE flag.
+
+ char *dev should be the name of the device, queues is the number of queues to
+ be created, fds is used to store and return the file descriptors (queues)
+ created to the caller. Each file descriptor were served as the interface of a
+ queue which could be accessed by userspace.
+
+ #include <linux/if.h>
+ #include <linux/if_tun.h>
+
+ int tun_alloc_mq(char *dev, int queues, int *fds)
+ {
+ struct ifreq ifr;
+ int fd, err, i;
+
+ if (!dev)
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+ /* Flags: IFF_TUN - TUN device (no Ethernet headers)
+ * IFF_TAP - TAP device
+ *
+ * IFF_NO_PI - Do not provide packet information
+ * IFF_MULTI_QUEUE - Create a queue of multiqueue device
+ */
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE;
+ strcpy(ifr.ifr_name, dev);
+
+ for (i = 0; i < queues; i++) {
+ if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
+ goto err;
+ err = ioctl(fd, TUNSETIFF, (void *)&ifr);
+ if (err) {
+ close(fd);
+ goto err;
+ }
+ fds[i] = fd;
+ }
+
+ return 0;
+ err:
+ for (--i; i >= 0; i--)
+ close(fds[i]);
+ return err;
+ }
+
+ A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When
+ calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when
+ calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were
+ enabled by default after it was created through TUNSETIFF.
+
+ fd is the file descriptor (queue) that we want to enable or disable, when
+ enable is true we enable it, otherwise we disable it
+
+ #include <linux/if.h>
+ #include <linux/if_tun.h>
+
+ int tun_set_queue(int fd, int enable)
+ {
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ if (enable)
+ ifr.ifr_flags = IFF_ATTACH_QUEUE;
+ else
+ ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+ return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
+ }
+
+Universal TUN/TAP device driver Frequently Asked Question.
+
+1. What platforms are supported by TUN/TAP driver ?
+Currently driver has been written for 3 Unices:
+ Linux kernels 2.2.x, 2.4.x
+ FreeBSD 3.x, 4.x, 5.x
+ Solaris 2.6, 7.0, 8.0
+
+2. What is TUN/TAP driver used for?
+As mentioned above, main purpose of TUN/TAP driver is tunneling.
+It is used by VTun (http://vtun.sourceforge.net).
+
+Another interesting application using TUN/TAP is pipsecd
+(http://perso.enst.fr/~beyssac/pipsec/), a userspace IPSec
+implementation that can use complete kernel routing (unlike FreeS/WAN).
+
+3. How does Virtual network device actually work ?
+Virtual network device can be viewed as a simple Point-to-Point or
+Ethernet device, which instead of receiving packets from a physical
+media, receives them from user space program and instead of sending
+packets via physical media sends them to the user space program.
+
+Let's say that you configured IPX on the tap0, then whenever
+the kernel sends an IPX packet to tap0, it is passed to the application
+(VTun for example). The application encrypts, compresses and sends it to
+the other side over TCP or UDP. The application on the other side decompresses
+and decrypts the data received and writes the packet to the TAP device,
+the kernel handles the packet like it came from real physical device.
+
+4. What is the difference between TUN driver and TAP driver?
+TUN works with IP frames. TAP works with Ethernet frames.
+
+This means that you have to read/write IP packets when you are using tun and
+ethernet frames when using tap.
+
+5. What is the difference between BPF and TUN/TAP driver?
+BPF is an advanced packet filter. It can be attached to existing
+network interface. It does not provide a virtual network interface.
+A TUN/TAP driver does provide a virtual network interface and it is possible
+to attach BPF to this interface.
+
+6. Does TAP driver support kernel Ethernet bridging?
+Yes. Linux and FreeBSD drivers support Ethernet bridging.
diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt
new file mode 100644
index 000000000..53a726855
--- /dev/null
+++ b/Documentation/networking/udplite.txt
@@ -0,0 +1,278 @@
+ ===========================================================================
+ The UDP-Lite protocol (RFC 3828)
+ ===========================================================================
+
+
+ UDP-Lite is a Standards-Track IETF transport protocol whose characteristic
+ is a variable-length checksum. This has advantages for transport of multimedia
+ (video, VoIP) over wireless networks, as partly damaged packets can still be
+ fed into the codec instead of being discarded due to a failed checksum test.
+
+ This file briefly describes the existing kernel support and the socket API.
+ For in-depth information, you can consult:
+
+ o The UDP-Lite Homepage:
+ http://web.archive.org/web/*/http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/
+ From here you can also download some example application source code.
+
+ o The UDP-Lite HOWTO on
+ http://web.archive.org/web/*/http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/
+ files/UDP-Lite-HOWTO.txt
+
+ o The Wireshark UDP-Lite WiKi (with capture files):
+ https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
+
+ o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt
+
+
+ I) APPLICATIONS
+
+ Several applications have been ported successfully to UDP-Lite. Ethereal
+ (now called wireshark) has UDP-Litev4/v6 support by default.
+ Porting applications to UDP-Lite is straightforward: only socket level and
+ IPPROTO need to be changed; senders additionally set the checksum coverage
+ length (default = header length = 8). Details are in the next section.
+
+
+ II) PROGRAMMING API
+
+ UDP-Lite provides a connectionless, unreliable datagram service and hence
+ uses the same socket type as UDP. In fact, porting from UDP to UDP-Lite is
+ very easy: simply add `IPPROTO_UDPLITE' as the last argument of the socket(2)
+ call so that the statement looks like:
+
+ s = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE);
+
+ or, respectively,
+
+ s = socket(PF_INET6, SOCK_DGRAM, IPPROTO_UDPLITE);
+
+ With just the above change you are able to run UDP-Lite services or connect
+ to UDP-Lite servers. The kernel will assume that you are not interested in
+ using partial checksum coverage and so emulate UDP mode (full coverage).
+
+ To make use of the partial checksum coverage facilities requires setting a
+ single socket option, which takes an integer specifying the coverage length:
+
+ * Sender checksum coverage: UDPLITE_SEND_CSCOV
+
+ For example,
+
+ int val = 20;
+ setsockopt(s, SOL_UDPLITE, UDPLITE_SEND_CSCOV, &val, sizeof(int));
+
+ sets the checksum coverage length to 20 bytes (12b data + 8b header).
+ Of each packet only the first 20 bytes (plus the pseudo-header) will be
+ checksummed. This is useful for RTP applications which have a 12-byte
+ base header.
+
+
+ * Receiver checksum coverage: UDPLITE_RECV_CSCOV
+
+ This option is the receiver-side analogue. It is truly optional, i.e. not
+ required to enable traffic with partial checksum coverage. Its function is
+ that of a traffic filter: when enabled, it instructs the kernel to drop
+ all packets which have a coverage _less_ than this value. For example, if
+ RTP and UDP headers are to be protected, a receiver can enforce that only
+ packets with a minimum coverage of 20 are admitted:
+
+ int min = 20;
+ setsockopt(s, SOL_UDPLITE, UDPLITE_RECV_CSCOV, &min, sizeof(int));
+
+ The calls to getsockopt(2) are analogous. Being an extension and not a stand-
+ alone protocol, all socket options known from UDP can be used in exactly the
+ same manner as before, e.g. UDP_CORK or UDP_ENCAP.
+
+ A detailed discussion of UDP-Lite checksum coverage options is in section IV.
+
+
+ III) HEADER FILES
+
+ The socket API requires support through header files in /usr/include:
+
+ * /usr/include/netinet/in.h
+ to define IPPROTO_UDPLITE
+
+ * /usr/include/netinet/udplite.h
+ for UDP-Lite header fields and protocol constants
+
+ For testing purposes, the following can serve as a `mini' header file:
+
+ #define IPPROTO_UDPLITE 136
+ #define SOL_UDPLITE 136
+ #define UDPLITE_SEND_CSCOV 10
+ #define UDPLITE_RECV_CSCOV 11
+
+ Ready-made header files for various distros are in the UDP-Lite tarball.
+
+
+ IV) KERNEL BEHAVIOUR WITH REGARD TO THE VARIOUS SOCKET OPTIONS
+
+ To enable debugging messages, the log level need to be set to 8, as most
+ messages use the KERN_DEBUG level (7).
+
+ 1) Sender Socket Options
+
+ If the sender specifies a value of 0 as coverage length, the module
+ assumes full coverage, transmits a packet with coverage length of 0
+ and according checksum. If the sender specifies a coverage < 8 and
+ different from 0, the kernel assumes 8 as default value. Finally,
+ if the specified coverage length exceeds the packet length, the packet
+ length is used instead as coverage length.
+
+ 2) Receiver Socket Options
+
+ The receiver specifies the minimum value of the coverage length it
+ is willing to accept. A value of 0 here indicates that the receiver
+ always wants the whole of the packet covered. In this case, all
+ partially covered packets are dropped and an error is logged.
+
+ It is not possible to specify illegal values (<0 and <8); in these
+ cases the default of 8 is assumed.
+
+ All packets arriving with a coverage value less than the specified
+ threshold are discarded, these events are also logged.
+
+ 3) Disabling the Checksum Computation
+
+ On both sender and receiver, checksumming will always be performed
+ and cannot be disabled using SO_NO_CHECK. Thus
+
+ setsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, ... );
+
+ will always will be ignored, while the value of
+
+ getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...);
+
+ is meaningless (as in TCP). Packets with a zero checksum field are
+ illegal (cf. RFC 3828, sec. 3.1) and will be silently discarded.
+
+ 4) Fragmentation
+
+ The checksum computation respects both buffersize and MTU. The size
+ of UDP-Lite packets is determined by the size of the send buffer. The
+ minimum size of the send buffer is 2048 (defined as SOCK_MIN_SNDBUF
+ in include/net/sock.h), the default value is configurable as
+ net.core.wmem_default or via setting the SO_SNDBUF socket(7)
+ option. The maximum upper bound for the send buffer is determined
+ by net.core.wmem_max.
+
+ Given a payload size larger than the send buffer size, UDP-Lite will
+ split the payload into several individual packets, filling up the
+ send buffer size in each case.
+
+ The precise value also depends on the interface MTU. The interface MTU,
+ in turn, may trigger IP fragmentation. In this case, the generated
+ UDP-Lite packet is split into several IP packets, of which only the
+ first one contains the L4 header.
+
+ The send buffer size has implications on the checksum coverage length.
+ Consider the following example:
+
+ Payload: 1536 bytes Send Buffer: 1024 bytes
+ MTU: 1500 bytes Coverage Length: 856 bytes
+
+ UDP-Lite will ship the 1536 bytes in two separate packets:
+
+ Packet 1: 1024 payload + 8 byte header + 20 byte IP header = 1052 bytes
+ Packet 2: 512 payload + 8 byte header + 20 byte IP header = 540 bytes
+
+ The coverage packet covers the UDP-Lite header and 848 bytes of the
+ payload in the first packet, the second packet is fully covered. Note
+ that for the second packet, the coverage length exceeds the packet
+ length. The kernel always re-adjusts the coverage length to the packet
+ length in such cases.
+
+ As an example of what happens when one UDP-Lite packet is split into
+ several tiny fragments, consider the following example.
+
+ Payload: 1024 bytes Send buffer size: 1024 bytes
+ MTU: 300 bytes Coverage length: 575 bytes
+
+ +-+-----------+--------------+--------------+--------------+
+ |8| 272 | 280 | 280 | 280 |
+ +-+-----------+--------------+--------------+--------------+
+ 280 560 840 1032
+ ^
+ *****checksum coverage*************
+
+ The UDP-Lite module generates one 1032 byte packet (1024 + 8 byte
+ header). According to the interface MTU, these are split into 4 IP
+ packets (280 byte IP payload + 20 byte IP header). The kernel module
+ sums the contents of the entire first two packets, plus 15 bytes of
+ the last packet before releasing the fragments to the IP module.
+
+ To see the analogous case for IPv6 fragmentation, consider a link
+ MTU of 1280 bytes and a write buffer of 3356 bytes. If the checksum
+ coverage is less than 1232 bytes (MTU minus IPv6/fragment header
+ lengths), only the first fragment needs to be considered. When using
+ larger checksum coverage lengths, each eligible fragment needs to be
+ checksummed. Suppose we have a checksum coverage of 3062. The buffer
+ of 3356 bytes will be split into the following fragments:
+
+ Fragment 1: 1280 bytes carrying 1232 bytes of UDP-Lite data
+ Fragment 2: 1280 bytes carrying 1232 bytes of UDP-Lite data
+ Fragment 3: 948 bytes carrying 900 bytes of UDP-Lite data
+
+ The first two fragments have to be checksummed in full, of the last
+ fragment only 598 (= 3062 - 2*1232) bytes are checksummed.
+
+ While it is important that such cases are dealt with correctly, they
+ are (annoyingly) rare: UDP-Lite is designed for optimising multimedia
+ performance over wireless (or generally noisy) links and thus smaller
+ coverage lengths are likely to be expected.
+
+
+ V) UDP-LITE RUNTIME STATISTICS AND THEIR MEANING
+
+ Exceptional and error conditions are logged to syslog at the KERN_DEBUG
+ level. Live statistics about UDP-Lite are available in /proc/net/snmp
+ and can (with newer versions of netstat) be viewed using
+
+ netstat -svu
+
+ This displays UDP-Lite statistics variables, whose meaning is as follows.
+
+ InDatagrams: The total number of datagrams delivered to users.
+
+ NoPorts: Number of packets received to an unknown port.
+ These cases are counted separately (not as InErrors).
+
+ InErrors: Number of erroneous UDP-Lite packets. Errors include:
+ * internal socket queue receive errors
+ * packet too short (less than 8 bytes or stated
+ coverage length exceeds received length)
+ * xfrm4_policy_check() returned with error
+ * application has specified larger min. coverage
+ length than that of incoming packet
+ * checksum coverage violated
+ * bad checksum
+
+ OutDatagrams: Total number of sent datagrams.
+
+ These statistics derive from the UDP MIB (RFC 2013).
+
+
+ VI) IPTABLES
+
+ There is packet match support for UDP-Lite as well as support for the LOG target.
+ If you copy and paste the following line into /etc/protocols,
+
+ udplite 136 UDP-Lite # UDP-Lite [RFC 3828]
+
+ then
+ iptables -A INPUT -p udplite -j LOG
+
+ will produce logging output to syslog. Dropping and rejecting packets also works.
+
+
+ VII) MAINTAINER ADDRESS
+
+ The UDP-Lite patch was developed at
+ University of Aberdeen
+ Electronics Research Group
+ Department of Engineering
+ Fraser Noble Building
+ Aberdeen AB24 3UE; UK
+ The current maintainer is Gerrit Renker, <gerrit@erg.abdn.ac.uk>. Initial
+ code was developed by William Stanislaus, <william@erg.abdn.ac.uk>.
diff --git a/Documentation/networking/vortex.txt b/Documentation/networking/vortex.txt
new file mode 100644
index 000000000..97282da82
--- /dev/null
+++ b/Documentation/networking/vortex.txt
@@ -0,0 +1,448 @@
+Documentation/networking/vortex.txt
+Andrew Morton
+30 April 2000
+
+
+This document describes the usage and errata of the 3Com "Vortex" device
+driver for Linux, 3c59x.c.
+
+The driver was written by Donald Becker <becker@scyld.com>
+
+Don is no longer the prime maintainer of this version of the driver.
+Please report problems to one or more of:
+
+ Andrew Morton
+ Netdev mailing list <netdev@vger.kernel.org>
+ Linux kernel mailing list <linux-kernel@vger.kernel.org>
+
+Please note the 'Reporting and Diagnosing Problems' section at the end
+of this file.
+
+
+Since kernel 2.3.99-pre6, this driver incorporates the support for the
+3c575-series Cardbus cards which used to be handled by 3c575_cb.c.
+
+This driver supports the following hardware:
+
+ 3c590 Vortex 10Mbps
+ 3c592 EISA 10Mbps Demon/Vortex
+ 3c597 EISA Fast Demon/Vortex
+ 3c595 Vortex 100baseTx
+ 3c595 Vortex 100baseT4
+ 3c595 Vortex 100base-MII
+ 3c900 Boomerang 10baseT
+ 3c900 Boomerang 10Mbps Combo
+ 3c900 Cyclone 10Mbps TPO
+ 3c900 Cyclone 10Mbps Combo
+ 3c900 Cyclone 10Mbps TPC
+ 3c900B-FL Cyclone 10base-FL
+ 3c905 Boomerang 100baseTx
+ 3c905 Boomerang 100baseT4
+ 3c905B Cyclone 100baseTx
+ 3c905B Cyclone 10/100/BNC
+ 3c905B-FX Cyclone 100baseFx
+ 3c905C Tornado
+ 3c920B-EMB-WNM (ATI Radeon 9100 IGP)
+ 3c980 Cyclone
+ 3c980C Python-T
+ 3cSOHO100-TX Hurricane
+ 3c555 Laptop Hurricane
+ 3c556 Laptop Tornado
+ 3c556B Laptop Hurricane
+ 3c575 [Megahertz] 10/100 LAN CardBus
+ 3c575 Boomerang CardBus
+ 3CCFE575BT Cyclone CardBus
+ 3CCFE575CT Tornado CardBus
+ 3CCFE656 Cyclone CardBus
+ 3CCFEM656B Cyclone+Winmodem CardBus
+ 3CXFEM656C Tornado+Winmodem CardBus
+ 3c450 HomePNA Tornado
+ 3c920 Tornado
+ 3c982 Hydra Dual Port A
+ 3c982 Hydra Dual Port B
+ 3c905B-T4
+ 3c920B-EMB-WNM Tornado
+
+Module parameters
+=================
+
+There are several parameters which may be provided to the driver when
+its module is loaded. These are usually placed in /etc/modprobe.d/*.conf
+configuration files. Example:
+
+options 3c59x debug=3 rx_copybreak=300
+
+If you are using the PCMCIA tools (cardmgr) then the options may be
+placed in /etc/pcmcia/config.opts:
+
+module "3c59x" opts "debug=3 rx_copybreak=300"
+
+
+The supported parameters are:
+
+debug=N
+
+ Where N is a number from 0 to 7. Anything above 3 produces a lot
+ of output in your system logs. debug=1 is default.
+
+options=N1,N2,N3,...
+
+ Each number in the list provides an option to the corresponding
+ network card. So if you have two 3c905's and you wish to provide
+ them with option 0x204 you would use:
+
+ options=0x204,0x204
+
+ The individual options are composed of a number of bitfields which
+ have the following meanings:
+
+ Possible media type settings
+ 0 10baseT
+ 1 10Mbs AUI
+ 2 undefined
+ 3 10base2 (BNC)
+ 4 100base-TX
+ 5 100base-FX
+ 6 MII (Media Independent Interface)
+ 7 Use default setting from EEPROM
+ 8 Autonegotiate
+ 9 External MII
+ 10 Use default setting from EEPROM
+
+ When generating a value for the 'options' setting, the above media
+ selection values may be OR'ed (or added to) the following:
+
+ 0x8000 Set driver debugging level to 7
+ 0x4000 Set driver debugging level to 2
+ 0x0400 Enable Wake-on-LAN
+ 0x0200 Force full duplex mode.
+ 0x0010 Bus-master enable bit (Old Vortex cards only)
+
+ For example:
+
+ insmod 3c59x options=0x204
+
+ will force full-duplex 100base-TX, rather than allowing the usual
+ autonegotiation.
+
+global_options=N
+
+ Sets the `options' parameter for all 3c59x NICs in the machine.
+ Entries in the `options' array above will override any setting of
+ this.
+
+full_duplex=N1,N2,N3...
+
+ Similar to bit 9 of 'options'. Forces the corresponding card into
+ full-duplex mode. Please use this in preference to the `options'
+ parameter.
+
+ In fact, please don't use this at all! You're better off getting
+ autonegotiation working properly.
+
+global_full_duplex=N1
+
+ Sets full duplex mode for all 3c59x NICs in the machine. Entries
+ in the `full_duplex' array above will override any setting of this.
+
+flow_ctrl=N1,N2,N3...
+
+ Use 802.3x MAC-layer flow control. The 3com cards only support the
+ PAUSE command, which means that they will stop sending packets for a
+ short period if they receive a PAUSE frame from the link partner.
+
+ The driver only allows flow control on a link which is operating in
+ full duplex mode.
+
+ This feature does not appear to work on the 3c905 - only 3c905B and
+ 3c905C have been tested.
+
+ The 3com cards appear to only respond to PAUSE frames which are
+ sent to the reserved destination address of 01:80:c2:00:00:01. They
+ do not honour PAUSE frames which are sent to the station MAC address.
+
+rx_copybreak=M
+
+ The driver preallocates 32 full-sized (1536 byte) network buffers
+ for receiving. When a packet arrives, the driver has to decide
+ whether to leave the packet in its full-sized buffer, or to allocate
+ a smaller buffer and copy the packet across into it.
+
+ This is a speed/space tradeoff.
+
+ The value of rx_copybreak is used to decide when to make the copy.
+ If the packet size is less than rx_copybreak, the packet is copied.
+ The default value for rx_copybreak is 200 bytes.
+
+max_interrupt_work=N
+
+ The driver's interrupt service routine can handle many receive and
+ transmit packets in a single invocation. It does this in a loop.
+ The value of max_interrupt_work governs how many times the interrupt
+ service routine will loop. The default value is 32 loops. If this
+ is exceeded the interrupt service routine gives up and generates a
+ warning message "eth0: Too much work in interrupt".
+
+hw_checksums=N1,N2,N3,...
+
+ Recent 3com NICs are able to generate IPv4, TCP and UDP checksums
+ in hardware. Linux has used the Rx checksumming for a long time.
+ The "zero copy" patch which is planned for the 2.4 kernel series
+ allows you to make use of the NIC's DMA scatter/gather and transmit
+ checksumming as well.
+
+ The driver is set up so that, when the zerocopy patch is applied,
+ all Tornado and Cyclone devices will use S/G and Tx checksums.
+
+ This module parameter has been provided so you can override this
+ decision. If you think that Tx checksums are causing a problem, you
+ may disable the feature with `hw_checksums=0'.
+
+ If you think your NIC should be performing Tx checksumming and the
+ driver isn't enabling it, you can force the use of hardware Tx
+ checksumming with `hw_checksums=1'.
+
+ The driver drops a message in the logfiles to indicate whether or
+ not it is using hardware scatter/gather and hardware Tx checksums.
+
+ Scatter/gather and hardware checksums provide considerable
+ performance improvement for the sendfile() system call, but a small
+ decrease in throughput for send(). There is no effect upon receive
+ efficiency.
+
+compaq_ioaddr=N
+compaq_irq=N
+compaq_device_id=N
+
+ "Variables to work-around the Compaq PCI BIOS32 problem"....
+
+watchdog=N
+
+ Sets the time duration (in milliseconds) after which the kernel
+ decides that the transmitter has become stuck and needs to be reset.
+ This is mainly for debugging purposes, although it may be advantageous
+ to increase this value on LANs which have very high collision rates.
+ The default value is 5000 (5.0 seconds).
+
+enable_wol=N1,N2,N3,...
+
+ Enable Wake-on-LAN support for the relevant interface. Donald
+ Becker's `ether-wake' application may be used to wake suspended
+ machines.
+
+ Also enables the NIC's power management support.
+
+global_enable_wol=N
+
+ Sets enable_wol mode for all 3c59x NICs in the machine. Entries in
+ the `enable_wol' array above will override any setting of this.
+
+Media selection
+---------------
+
+A number of the older NICs such as the 3c590 and 3c900 series have
+10base2 and AUI interfaces.
+
+Prior to January, 2001 this driver would autoeselect the 10base2 or AUI
+port if it didn't detect activity on the 10baseT port. It would then
+get stuck on the 10base2 port and a driver reload was necessary to
+switch back to 10baseT. This behaviour could not be prevented with a
+module option override.
+
+Later (current) versions of the driver _do_ support locking of the
+media type. So if you load the driver module with
+
+ modprobe 3c59x options=0
+
+it will permanently select the 10baseT port. Automatic selection of
+other media types does not occur.
+
+
+Transmit error, Tx status register 82
+-------------------------------------
+
+This is a common error which is almost always caused by another host on
+the same network being in full-duplex mode, while this host is in
+half-duplex mode. You need to find that other host and make it run in
+half-duplex mode or fix this host to run in full-duplex mode.
+
+As a last resort, you can force the 3c59x driver into full-duplex mode
+with
+
+ options 3c59x full_duplex=1
+
+but this has to be viewed as a workaround for broken network gear and
+should only really be used for equipment which cannot autonegotiate.
+
+
+Additional resources
+--------------------
+
+Details of the device driver implementation are at the top of the source file.
+
+Additional documentation is available at Don Becker's Linux Drivers site:
+
+ http://www.scyld.com/vortex.html
+
+Donald Becker's driver development site:
+
+ http://www.scyld.com/network.html
+
+Donald's vortex-diag program is useful for inspecting the NIC's state:
+
+ http://www.scyld.com/ethercard_diag.html
+
+Donald's mii-diag program may be used for inspecting and manipulating
+the NIC's Media Independent Interface subsystem:
+
+ http://www.scyld.com/ethercard_diag.html#mii-diag
+
+Donald's wake-on-LAN page:
+
+ http://www.scyld.com/wakeonlan.html
+
+3Com's DOS-based application for setting up the NICs EEPROMs:
+
+ ftp://ftp.3com.com/pub/nic/3c90x/3c90xx2.exe
+
+
+Autonegotiation notes
+---------------------
+
+ The driver uses a one-minute heartbeat for adapting to changes in
+ the external LAN environment if link is up and 5 seconds if link is down.
+ This means that when, for example, a machine is unplugged from a hubbed
+ 10baseT LAN plugged into a switched 100baseT LAN, the throughput
+ will be quite dreadful for up to sixty seconds. Be patient.
+
+ Cisco interoperability note from Walter Wong <wcw+@CMU.EDU>:
+
+ On a side note, adding HAS_NWAY seems to share a problem with the
+ Cisco 6509 switch. Specifically, you need to change the spanning
+ tree parameter for the port the machine is plugged into to 'portfast'
+ mode. Otherwise, the negotiation fails. This has been an issue
+ we've noticed for a while but haven't had the time to track down.
+
+ Cisco switches (Jeff Busch <jbusch@deja.com>)
+
+ My "standard config" for ports to which PC's/servers connect directly:
+
+ interface FastEthernet0/N
+ description machinename
+ load-interval 30
+ spanning-tree portfast
+
+ If autonegotiation is a problem, you may need to specify "speed
+ 100" and "duplex full" as well (or "speed 10" and "duplex half").
+
+ WARNING: DO NOT hook up hubs/switches/bridges to these
+ specially-configured ports! The switch will become very confused.
+
+
+Reporting and diagnosing problems
+---------------------------------
+
+Maintainers find that accurate and complete problem reports are
+invaluable in resolving driver problems. We are frequently not able to
+reproduce problems and must rely on your patience and efforts to get to
+the bottom of the problem.
+
+If you believe you have a driver problem here are some of the
+steps you should take:
+
+- Is it really a driver problem?
+
+ Eliminate some variables: try different cards, different
+ computers, different cables, different ports on the switch/hub,
+ different versions of the kernel or of the driver, etc.
+
+- OK, it's a driver problem.
+
+ You need to generate a report. Typically this is an email to the
+ maintainer and/or netdev@vger.kernel.org. The maintainer's
+ email address will be in the driver source or in the MAINTAINERS file.
+
+- The contents of your report will vary a lot depending upon the
+ problem. If it's a kernel crash then you should refer to the
+ REPORTING-BUGS file.
+
+ But for most problems it is useful to provide the following:
+
+ o Kernel version, driver version
+
+ o A copy of the banner message which the driver generates when
+ it is initialised. For example:
+
+ eth0: 3Com PCI 3c905C Tornado at 0xa400, 00:50:da:6a:88:f0, IRQ 19
+ 8K byte-wide RAM 5:3 Rx:Tx split, autoselect/Autonegotiate interface.
+ MII transceiver found at address 24, status 782d.
+ Enabling bus-master transmits and whole-frame receives.
+
+ NOTE: You must provide the `debug=2' modprobe option to generate
+ a full detection message. Please do this:
+
+ modprobe 3c59x debug=2
+
+ o If it is a PCI device, the relevant output from 'lspci -vx', eg:
+
+ 00:09.0 Ethernet controller: 3Com Corporation 3c905C-TX [Fast Etherlink] (rev 74)
+ Subsystem: 3Com Corporation: Unknown device 9200
+ Flags: bus master, medium devsel, latency 32, IRQ 19
+ I/O ports at a400 [size=128]
+ Memory at db000000 (32-bit, non-prefetchable) [size=128]
+ Expansion ROM at <unassigned> [disabled] [size=128K]
+ Capabilities: [dc] Power Management version 2
+ 00: b7 10 00 92 07 00 10 02 74 00 00 02 08 20 00 00
+ 10: 01 a4 00 00 00 00 00 db 00 00 00 00 00 00 00 00
+ 20: 00 00 00 00 00 00 00 00 00 00 00 00 b7 10 00 10
+ 30: 00 00 00 00 dc 00 00 00 00 00 00 00 05 01 0a 0a
+
+ o A description of the environment: 10baseT? 100baseT?
+ full/half duplex? switched or hubbed?
+
+ o Any additional module parameters which you may be providing to the driver.
+
+ o Any kernel logs which are produced. The more the merrier.
+ If this is a large file and you are sending your report to a
+ mailing list, mention that you have the logfile, but don't send
+ it. If you're reporting direct to the maintainer then just send
+ it.
+
+ To ensure that all kernel logs are available, add the
+ following line to /etc/syslog.conf:
+
+ kern.* /var/log/messages
+
+ Then restart syslogd with:
+
+ /etc/rc.d/init.d/syslog restart
+
+ (The above may vary, depending upon which Linux distribution you use).
+
+ o If your problem is reproducible then that's great. Try the
+ following:
+
+ 1) Increase the debug level. Usually this is done via:
+
+ a) modprobe driver debug=7
+ b) In /etc/modprobe.d/driver.conf:
+ options driver debug=7
+
+ 2) Recreate the problem with the higher debug level,
+ send all logs to the maintainer.
+
+ 3) Download you card's diagnostic tool from Donald
+ Becker's website <http://www.scyld.com/ethercard_diag.html>.
+ Download mii-diag.c as well. Build these.
+
+ a) Run 'vortex-diag -aaee' and 'mii-diag -v' when the card is
+ working correctly. Save the output.
+
+ b) Run the above commands when the card is malfunctioning. Send
+ both sets of output.
+
+Finally, please be patient and be prepared to do some work. You may
+end up working on this problem for a week or more as the maintainer
+asks more questions, asks for more tests, asks for patches to be
+applied, etc. At the end of it all, the problem may even remain
+unresolved.
diff --git a/Documentation/networking/vxge.txt b/Documentation/networking/vxge.txt
new file mode 100644
index 000000000..abfec245f
--- /dev/null
+++ b/Documentation/networking/vxge.txt
@@ -0,0 +1,93 @@
+Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver
+==============================================================================
+
+Contents
+--------
+
+1) Introduction
+2) Features supported
+3) Configurable driver parameters
+4) Troubleshooting
+
+1) Introduction:
+----------------
+This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O
+Virtualized Server adapters.
+The X3100 series supports four modes of operation, configurable via
+firmware -
+ Single function mode
+ Multi function mode
+ SRIOV mode
+ MRIOV mode
+The functions share a 10GbE link and the pci-e bus, but hardly anything else
+inside the ASIC. Features like independent hw reset, statistics, bandwidth/
+priority allocation and guarantees, GRO, TSO, interrupt moderation etc are
+supported independently on each function.
+
+(See below for a complete list of features supported for both IPv4 and IPv6)
+
+2) Features supported:
+----------------------
+
+i) Single function mode (up to 17 queues)
+
+ii) Multi function mode (up to 17 functions)
+
+iii) PCI-SIG's I/O Virtualization
+ - Single Root mode: v1.0 (up to 17 functions)
+ - Multi-Root mode: v1.0 (up to 17 functions)
+
+iv) Jumbo frames
+ X3100 Series supports MTU up to 9600 bytes, modifiable using
+ ip command.
+
+v) Offloads supported: (Enabled by default)
+ Checksum offload (TCP/UDP/IP) on transmit and receive paths
+ TCP Segmentation Offload (TSO) on transmit path
+ Generic Receive Offload (GRO) on receive path
+
+vi) MSI-X: (Enabled by default)
+ Resulting in noticeable performance improvement (up to 7% on certain
+ platforms).
+
+vii) NAPI: (Enabled by default)
+ For better Rx interrupt moderation.
+
+viii)RTH (Receive Traffic Hash): (Enabled by default)
+ Receive side steering for better scaling.
+
+ix) Statistics
+ Comprehensive MAC-level and software statistics displayed using
+ "ethtool -S" option.
+
+x) Multiple hardware queues: (Enabled by default)
+ Up to 17 hardware based transmit and receive data channels, with
+ multiple steering options (transmit multiqueue enabled by default).
+
+3) Configurable driver parameters:
+----------------------------------
+
+i) max_config_dev
+ Specifies maximum device functions to be enabled.
+ Valid range: 1-8
+
+ii) max_config_port
+ Specifies number of ports to be enabled.
+ Valid range: 1,2
+ Default: 1
+
+iii)max_config_vpath
+ Specifies maximum VPATH(s) configured for each device function.
+ Valid range: 1-17
+
+iv) vlan_tag_strip
+ Enables/disables vlan tag stripping from all received tagged frames that
+ are not replicated at the internal L2 switch.
+ Valid range: 0,1 (disabled, enabled respectively)
+ Default: 1
+
+v) addr_learn_en
+ Enable learning the mac address of the guest OS interface in
+ virtualization environment.
+ Valid range: 0,1 (disabled, enabled respectively)
+ Default: 0
diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt
new file mode 100644
index 000000000..6d993510f
--- /dev/null
+++ b/Documentation/networking/vxlan.txt
@@ -0,0 +1,47 @@
+Virtual eXtensible Local Area Networking documentation
+======================================================
+
+The VXLAN protocol is a tunnelling protocol that is designed to
+solve the problem of limited number of available VLAN's (4096).
+With VXLAN identifier is expanded to 24 bits.
+
+It is a draft RFC standard, that is implemented by Cisco Nexus,
+Vmware and Brocade. The protocol runs over UDP using a single
+destination port (still not standardized by IANA).
+This document describes the Linux kernel tunnel device,
+there is also an implantation of VXLAN for Openvswitch.
+
+Unlike most tunnels, a VXLAN is a 1 to N network, not just point
+to point. A VXLAN device can either dynamically learn the IP address
+of the other end, in a manner similar to a learning bridge, or the
+forwarding entries can be configured statically.
+
+The management of vxlan is done in a similar fashion to it's
+too closest neighbors GRE and VLAN. Configuring VXLAN requires
+the version of iproute2 that matches the kernel release
+where VXLAN was first merged upstream.
+
+1. Create vxlan device
+ # ip li add vxlan0 type vxlan id 42 group 239.1.1.1 dev eth1
+
+This creates a new device (vxlan0). The device uses the
+the multicast group 239.1.1.1 over eth1 to handle packets where
+no entry is in the forwarding table.
+
+2. Delete vxlan device
+ # ip link delete vxlan0
+
+3. Show vxlan info
+ # ip -d link show vxlan0
+
+It is possible to create, destroy and display the vxlan
+forwarding table using the new bridge command.
+
+1. Create forwarding table entry
+ # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0
+
+2. Delete forwarding table entry
+ # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0
+
+3. Show forwarding table
+ # bridge fdb show dev vxlan0
diff --git a/Documentation/networking/x25-iface.txt b/Documentation/networking/x25-iface.txt
new file mode 100644
index 000000000..7f213b556
--- /dev/null
+++ b/Documentation/networking/x25-iface.txt
@@ -0,0 +1,123 @@
+ X.25 Device Driver Interface 1.1
+
+ Jonathan Naylor 26.12.96
+
+This is a description of the messages to be passed between the X.25 Packet
+Layer and the X.25 device driver. They are designed to allow for the easy
+setting of the LAPB mode from within the Packet Layer.
+
+The X.25 device driver will be coded normally as per the Linux device driver
+standards. Most X.25 device drivers will be moderately similar to the
+already existing Ethernet device drivers. However unlike those drivers, the
+X.25 device driver has a state associated with it, and this information
+needs to be passed to and from the Packet Layer for proper operation.
+
+All messages are held in sk_buff's just like real data to be transmitted
+over the LAPB link. The first byte of the skbuff indicates the meaning of
+the rest of the skbuff, if any more information does exist.
+
+
+Packet Layer to Device Driver
+-----------------------------
+
+First Byte = 0x00 (X25_IFACE_DATA)
+
+This indicates that the rest of the skbuff contains data to be transmitted
+over the LAPB link. The LAPB link should already exist before any data is
+passed down.
+
+First Byte = 0x01 (X25_IFACE_CONNECT)
+
+Establish the LAPB link. If the link is already established then the connect
+confirmation message should be returned as soon as possible.
+
+First Byte = 0x02 (X25_IFACE_DISCONNECT)
+
+Terminate the LAPB link. If it is already disconnected then the disconnect
+confirmation message should be returned as soon as possible.
+
+First Byte = 0x03 (X25_IFACE_PARAMS)
+
+LAPB parameters. To be defined.
+
+
+Device Driver to Packet Layer
+-----------------------------
+
+First Byte = 0x00 (X25_IFACE_DATA)
+
+This indicates that the rest of the skbuff contains data that has been
+received over the LAPB link.
+
+First Byte = 0x01 (X25_IFACE_CONNECT)
+
+LAPB link has been established. The same message is used for both a LAPB
+link connect_confirmation and a connect_indication.
+
+First Byte = 0x02 (X25_IFACE_DISCONNECT)
+
+LAPB link has been terminated. This same message is used for both a LAPB
+link disconnect_confirmation and a disconnect_indication.
+
+First Byte = 0x03 (X25_IFACE_PARAMS)
+
+LAPB parameters. To be defined.
+
+
+
+Possible Problems
+=================
+
+(Henner Eisen, 2000-10-28)
+
+The X.25 packet layer protocol depends on a reliable datalink service.
+The LAPB protocol provides such reliable service. But this reliability
+is not preserved by the Linux network device driver interface:
+
+- With Linux 2.4.x (and above) SMP kernels, packet ordering is not
+ preserved. Even if a device driver calls netif_rx(skb1) and later
+ netif_rx(skb2), skb2 might be delivered to the network layer
+ earlier that skb1.
+- Data passed upstream by means of netif_rx() might be dropped by the
+ kernel if the backlog queue is congested.
+
+The X.25 packet layer protocol will detect this and reset the virtual
+call in question. But many upper layer protocols are not designed to
+handle such N-Reset events gracefully. And frequent N-Reset events
+will always degrade performance.
+
+Thus, driver authors should make netif_rx() as reliable as possible:
+
+SMP re-ordering will not occur if the driver's interrupt handler is
+always executed on the same CPU. Thus,
+
+- Driver authors should use irq affinity for the interrupt handler.
+
+The probability of packet loss due to backlog congestion can be
+reduced by the following measures or a combination thereof:
+
+(1) Drivers for kernel versions 2.4.x and above should always check the
+ return value of netif_rx(). If it returns NET_RX_DROP, the
+ driver's LAPB protocol must not confirm reception of the frame
+ to the peer.
+ This will reliably suppress packet loss. The LAPB protocol will
+ automatically cause the peer to re-transmit the dropped packet
+ later.
+ The lapb module interface was modified to support this. Its
+ data_indication() method should now transparently pass the
+ netif_rx() return value to the (lapb module) caller.
+(2) Drivers for kernel versions 2.2.x should always check the global
+ variable netdev_dropping when a new frame is received. The driver
+ should only call netif_rx() if netdev_dropping is zero. Otherwise
+ the driver should not confirm delivery of the frame and drop it.
+ Alternatively, the driver can queue the frame internally and call
+ netif_rx() later when netif_dropping is 0 again. In that case, delivery
+ confirmation should also be deferred such that the internal queue
+ cannot grow to much.
+ This will not reliably avoid packet loss, but the probability
+ of packet loss in netif_rx() path will be significantly reduced.
+(3) Additionally, driver authors might consider to support
+ CONFIG_NET_HW_FLOWCONTROL. This allows the driver to be woken up
+ when a previously congested backlog queue becomes empty again.
+ The driver could uses this for flow-controlling the peer by means
+ of the LAPB protocol's flow-control service.
diff --git a/Documentation/networking/x25.txt b/Documentation/networking/x25.txt
new file mode 100644
index 000000000..c91c6d715
--- /dev/null
+++ b/Documentation/networking/x25.txt
@@ -0,0 +1,44 @@
+Linux X.25 Project
+
+As my third year dissertation at University I have taken it upon myself to
+write an X.25 implementation for Linux. My aim is to provide a complete X.25
+Packet Layer and a LAPB module to allow for "normal" X.25 to be run using
+Linux. There are two sorts of X.25 cards available, intelligent ones that
+implement LAPB on the card itself, and unintelligent ones that simply do
+framing, bit-stuffing and checksumming. These both need to be handled by the
+system.
+
+I therefore decided to write the implementation such that as far as the
+Packet Layer is concerned, the link layer was being performed by a lower
+layer of the Linux kernel and therefore it did not concern itself with
+implementation of LAPB. Therefore the LAPB modules would be called by
+unintelligent X.25 card drivers and not by intelligent ones, this would
+provide a uniform device driver interface, and simplify configuration.
+
+To confuse matters a little, an 802.2 LLC implementation for Linux is being
+written which will allow X.25 to be run over an Ethernet (or Token Ring) and
+conform with the JNT "Pink Book", this will have a different interface to
+the Packet Layer but there will be no confusion since the class of device
+being served by the LLC will be completely separate from LAPB. The LLC
+implementation is being done as part of another protocol project (SNA) and
+by a different author.
+
+Just when you thought that it could not become more confusing, another
+option appeared, XOT. This allows X.25 Packet Layer frames to operate over
+the Internet using TCP/IP as a reliable link layer. RFC1613 specifies the
+format and behaviour of the protocol. If time permits this option will also
+be actively considered.
+
+A linux-x25 mailing list has been created at vger.kernel.org to support the
+development and use of Linux X.25. It is early days yet, but interested
+parties are welcome to subscribe to it. Just send a message to
+majordomo@vger.kernel.org with the following in the message body:
+
+subscribe linux-x25
+end
+
+The contents of the Subject line are ignored.
+
+Jonathan
+
+g4klx@g4klx.demon.co.uk
diff --git a/Documentation/networking/xfrm_proc.txt b/Documentation/networking/xfrm_proc.txt
new file mode 100644
index 000000000..d0d8bafa9
--- /dev/null
+++ b/Documentation/networking/xfrm_proc.txt
@@ -0,0 +1,74 @@
+XFRM proc - /proc/net/xfrm_* files
+==================================
+Masahide NAKAMURA <nakam@linux-ipv6.org>
+
+
+Transformation Statistics
+-------------------------
+xfrm_proc is a statistics shown factor dropped by transformation
+for developer.
+It is a counter designed from current transformation source code
+and defined like linux private MIB.
+
+Inbound statistics
+~~~~~~~~~~~~~~~~~~
+XfrmInError:
+ All errors which is not matched others
+XfrmInBufferError:
+ No buffer is left
+XfrmInHdrError:
+ Header error
+XfrmInNoStates:
+ No state is found
+ i.e. Either inbound SPI, address, or IPsec protocol at SA is wrong
+XfrmInStateProtoError:
+ Transformation protocol specific error
+ e.g. SA key is wrong
+XfrmInStateModeError:
+ Transformation mode specific error
+XfrmInStateSeqError:
+ Sequence error
+ i.e. Sequence number is out of window
+XfrmInStateExpired:
+ State is expired
+XfrmInStateMismatch:
+ State has mismatch option
+ e.g. UDP encapsulation type is mismatch
+XfrmInStateInvalid:
+ State is invalid
+XfrmInTmplMismatch:
+ No matching template for states
+ e.g. Inbound SAs are correct but SP rule is wrong
+XfrmInNoPols:
+ No policy is found for states
+ e.g. Inbound SAs are correct but no SP is found
+XfrmInPolBlock:
+ Policy discards
+XfrmInPolError:
+ Policy error
+
+Outbound errors
+~~~~~~~~~~~~~~~
+XfrmOutError:
+ All errors which is not matched others
+XfrmOutBundleGenError:
+ Bundle generation error
+XfrmOutBundleCheckError:
+ Bundle check error
+XfrmOutNoStates:
+ No state is found
+XfrmOutStateProtoError:
+ Transformation protocol specific error
+XfrmOutStateModeError:
+ Transformation mode specific error
+XfrmOutStateSeqError:
+ Sequence error
+ i.e. Sequence number overflow
+XfrmOutStateExpired:
+ State is expired
+XfrmOutPolBlock:
+ Policy discards
+XfrmOutPolDead:
+ Policy is dead
+XfrmOutPolError:
+ Policy error
diff --git a/Documentation/networking/xfrm_sync.txt b/Documentation/networking/xfrm_sync.txt
new file mode 100644
index 000000000..d7aac9ded
--- /dev/null
+++ b/Documentation/networking/xfrm_sync.txt
@@ -0,0 +1,169 @@
+
+The sync patches work is based on initial patches from
+Krisztian <hidden@balabit.hu> and others and additional patches
+from Jamal <hadi@cyberus.ca>.
+
+The end goal for syncing is to be able to insert attributes + generate
+events so that the an SA can be safely moved from one machine to another
+for HA purposes.
+The idea is to synchronize the SA so that the takeover machine can do
+the processing of the SA as accurate as possible if it has access to it.
+
+We already have the ability to generate SA add/del/upd events.
+These patches add ability to sync and have accurate lifetime byte (to
+ensure proper decay of SAs) and replay counters to avoid replay attacks
+with as minimal loss at failover time.
+This way a backup stays as closely uptodate as an active member.
+
+Because the above items change for every packet the SA receives,
+it is possible for a lot of the events to be generated.
+For this reason, we also add a nagle-like algorithm to restrict
+the events. i.e we are going to set thresholds to say "let me
+know if the replay sequence threshold is reached or 10 secs have passed"
+These thresholds are set system-wide via sysctls or can be updated
+per SA.
+
+The identified items that need to be synchronized are:
+- the lifetime byte counter
+note that: lifetime time limit is not important if you assume the failover
+machine is known ahead of time since the decay of the time countdown
+is not driven by packet arrival.
+- the replay sequence for both inbound and outbound
+
+1) Message Structure
+----------------------
+
+nlmsghdr:aevent_id:optional-TLVs.
+
+The netlink message types are:
+
+XFRM_MSG_NEWAE and XFRM_MSG_GETAE.
+
+A XFRM_MSG_GETAE does not have TLVs.
+A XFRM_MSG_NEWAE will have at least two TLVs (as is
+discussed further below).
+
+aevent_id structure looks like:
+
+ struct xfrm_aevent_id {
+ struct xfrm_usersa_id sa_id;
+ xfrm_address_t saddr;
+ __u32 flags;
+ __u32 reqid;
+ };
+
+The unique SA is identified by the combination of xfrm_usersa_id,
+reqid and saddr.
+
+flags are used to indicate different things. The possible
+flags are:
+ XFRM_AE_RTHR=1, /* replay threshold*/
+ XFRM_AE_RVAL=2, /* replay value */
+ XFRM_AE_LVAL=4, /* lifetime value */
+ XFRM_AE_ETHR=8, /* expiry timer threshold */
+ XFRM_AE_CR=16, /* Event cause is replay update */
+ XFRM_AE_CE=32, /* Event cause is timer expiry */
+ XFRM_AE_CU=64, /* Event cause is policy update */
+
+How these flags are used is dependent on the direction of the
+message (kernel<->user) as well the cause (config, query or event).
+This is described below in the different messages.
+
+The pid will be set appropriately in netlink to recognize direction
+(0 to the kernel and pid = processid that created the event
+when going from kernel to user space)
+
+A program needs to subscribe to multicast group XFRMNLGRP_AEVENTS
+to get notified of these events.
+
+2) TLVS reflect the different parameters:
+-----------------------------------------
+
+a) byte value (XFRMA_LTIME_VAL)
+This TLV carries the running/current counter for byte lifetime since
+last event.
+
+b)replay value (XFRMA_REPLAY_VAL)
+This TLV carries the running/current counter for replay sequence since
+last event.
+
+c)replay threshold (XFRMA_REPLAY_THRESH)
+This TLV carries the threshold being used by the kernel to trigger events
+when the replay sequence is exceeded.
+
+d) expiry timer (XFRMA_ETIMER_THRESH)
+This is a timer value in milliseconds which is used as the nagle
+value to rate limit the events.
+
+3) Default configurations for the parameters:
+----------------------------------------------
+
+By default these events should be turned off unless there is
+at least one listener registered to listen to the multicast
+group XFRMNLGRP_AEVENTS.
+
+Programs installing SAs will need to specify the two thresholds, however,
+in order to not change existing applications such as racoon
+we also provide default threshold values for these different parameters
+in case they are not specified.
+
+the two sysctls/proc entries are:
+a) /proc/sys/net/core/sysctl_xfrm_aevent_etime
+used to provide default values for the XFRMA_ETIMER_THRESH in incremental
+units of time of 100ms. The default is 10 (1 second)
+
+b) /proc/sys/net/core/sysctl_xfrm_aevent_rseqth
+used to provide default values for XFRMA_REPLAY_THRESH parameter
+in incremental packet count. The default is two packets.
+
+4) Message types
+----------------
+
+a) XFRM_MSG_GETAE issued by user-->kernel.
+XFRM_MSG_GETAE does not carry any TLVs.
+The response is a XFRM_MSG_NEWAE which is formatted based on what
+XFRM_MSG_GETAE queried for.
+The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+*if XFRM_AE_RTHR flag is set, then XFRMA_REPLAY_THRESH is also retrieved
+*if XFRM_AE_ETHR flag is set, then XFRMA_ETIMER_THRESH is also retrieved
+
+b) XFRM_MSG_NEWAE is issued by either user space to configure
+or kernel to announce events or respond to a XFRM_MSG_GETAE.
+
+i) user --> kernel to configure a specific SA.
+any of the values or threshold parameters can be updated by passing the
+appropriate TLV.
+A response is issued back to the sender in user space to indicate success
+or failure.
+In the case of success, additionally an event with
+XFRM_MSG_NEWAE is also issued to any listeners as described in iii).
+
+ii) kernel->user direction as a response to XFRM_MSG_GETAE
+The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+The threshold TLVs will be included if explicitly requested in
+the XFRM_MSG_GETAE message.
+
+iii) kernel->user to report as event if someone sets any values or
+thresholds for an SA using XFRM_MSG_NEWAE (as described in #i above).
+In such a case XFRM_AE_CU flag is set to inform the user that
+the change happened as a result of an update.
+The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+
+iv) kernel->user to report event when replay threshold or a timeout
+is exceeded.
+In such a case either XFRM_AE_CR (replay exceeded) or XFRM_AE_CE (timeout
+happened) is set to inform the user what happened.
+Note the two flags are mutually exclusive.
+The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+
+Exceptions to threshold settings
+--------------------------------
+
+If you have an SA that is getting hit by traffic in bursts such that
+there is a period where the timer threshold expires with no packets
+seen, then an odd behavior is seen as follows:
+The first packet arrival after a timer expiry will trigger a timeout
+aevent; i.e we dont wait for a timeout period or a packet threshold
+to be reached. This is done for simplicity and efficiency reasons.
+
+-JHS
diff --git a/Documentation/networking/xfrm_sysctl.txt b/Documentation/networking/xfrm_sysctl.txt
new file mode 100644
index 000000000..5bbd16792
--- /dev/null
+++ b/Documentation/networking/xfrm_sysctl.txt
@@ -0,0 +1,4 @@
+/proc/sys/net/core/xfrm_* Variables:
+
+xfrm_acq_expires - INTEGER
+ default 30 - hard timeout in seconds for acquire requests
diff --git a/Documentation/networking/z8530drv.txt b/Documentation/networking/z8530drv.txt
new file mode 100644
index 000000000..2206abbc3
--- /dev/null
+++ b/Documentation/networking/z8530drv.txt
@@ -0,0 +1,657 @@
+This is a subset of the documentation. To use this driver you MUST have the
+full package from:
+
+Internet:
+=========
+
+1. ftp://ftp.ccac.rwth-aachen.de/pub/jr/z8530drv-utils_3.0-3.tar.gz
+
+2. ftp://ftp.pspt.fi/pub/ham/linux/ax25/z8530drv-utils_3.0-3.tar.gz
+
+Please note that the information in this document may be hopelessly outdated.
+A new version of the documentation, along with links to other important
+Linux Kernel AX.25 documentation and programs, is available on
+http://yaina.de/jreuter
+
+-----------------------------------------------------------------------------
+
+
+ SCC.C - Linux driver for Z8530 based HDLC cards for AX.25
+
+ ********************************************************************
+
+ (c) 1993,2000 by Joerg Reuter DL1BKE <jreuter@yaina.de>
+
+ portions (c) 1993 Guido ten Dolle PE1NNZ
+
+ for the complete copyright notice see >> Copying.Z8530DRV <<
+
+ ********************************************************************
+
+
+1. Initialization of the driver
+===============================
+
+To use the driver, 3 steps must be performed:
+
+ 1. if compiled as module: loading the module
+ 2. Setup of hardware, MODEM and KISS parameters with sccinit
+ 3. Attach each channel to the Linux kernel AX.25 with "ifconfig"
+
+Unlike the versions below 2.4 this driver is a real network device
+driver. If you want to run xNOS instead of our fine kernel AX.25
+use a 2.x version (available from above sites) or read the
+AX.25-HOWTO on how to emulate a KISS TNC on network device drivers.
+
+
+1.1 Loading the module
+======================
+
+(If you're going to compile the driver as a part of the kernel image,
+ skip this chapter and continue with 1.2)
+
+Before you can use a module, you'll have to load it with
+
+ insmod scc.o
+
+please read 'man insmod' that comes with module-init-tools.
+
+You should include the insmod in one of the /etc/rc.d/rc.* files,
+and don't forget to insert a call of sccinit after that. It
+will read your /etc/z8530drv.conf.
+
+1.2. /etc/z8530drv.conf
+=======================
+
+To setup all parameters you must run /sbin/sccinit from one
+of your rc.*-files. This has to be done BEFORE you can
+"ifconfig" an interface. Sccinit reads the file /etc/z8530drv.conf
+and sets the hardware, MODEM and KISS parameters. A sample file is
+delivered with this package. Change it to your needs.
+
+The file itself consists of two main sections.
+
+1.2.1 configuration of hardware parameters
+==========================================
+
+The hardware setup section defines the following parameters for each
+Z8530:
+
+chip 1
+data_a 0x300 # data port A
+ctrl_a 0x304 # control port A
+data_b 0x301 # data port B
+ctrl_b 0x305 # control port B
+irq 5 # IRQ No. 5
+pclock 4915200 # clock
+board BAYCOM # hardware type
+escc no # enhanced SCC chip? (8580/85180/85280)
+vector 0 # latch for interrupt vector
+special no # address of special function register
+option 0 # option to set via sfr
+
+
+chip - this is just a delimiter to make sccinit a bit simpler to
+ program. A parameter has no effect.
+
+data_a - the address of the data port A of this Z8530 (needed)
+ctrl_a - the address of the control port A (needed)
+data_b - the address of the data port B (needed)
+ctrl_b - the address of the control port B (needed)
+
+irq - the used IRQ for this chip. Different chips can use different
+ IRQs or the same. If they share an interrupt, it needs to be
+ specified within one chip-definition only.
+
+pclock - the clock at the PCLK pin of the Z8530 (option, 4915200 is
+ default), measured in Hertz
+
+board - the "type" of the board:
+
+ SCC type value
+ ---------------------------------
+ PA0HZP SCC card PA0HZP
+ EAGLE card EAGLE
+ PC100 card PC100
+ PRIMUS-PC (DG9BL) card PRIMUS
+ BayCom (U)SCC card BAYCOM
+
+escc - if you want support for ESCC chips (8580, 85180, 85280), set
+ this to "yes" (option, defaults to "no")
+
+vector - address of the vector latch (aka "intack port") for PA0HZP
+ cards. There can be only one vector latch for all chips!
+ (option, defaults to 0)
+
+special - address of the special function register on several cards.
+ (option, defaults to 0)
+
+option - The value you write into that register (option, default is 0)
+
+You can specify up to four chips (8 channels). If this is not enough,
+just change
+
+ #define MAXSCC 4
+
+to a higher value.
+
+Example for the BAYCOM USCC:
+----------------------------
+
+chip 1
+data_a 0x300 # data port A
+ctrl_a 0x304 # control port A
+data_b 0x301 # data port B
+ctrl_b 0x305 # control port B
+irq 5 # IRQ No. 5 (#)
+board BAYCOM # hardware type (*)
+#
+# SCC chip 2
+#
+chip 2
+data_a 0x302
+ctrl_a 0x306
+data_b 0x303
+ctrl_b 0x307
+board BAYCOM
+
+An example for a PA0HZP card:
+-----------------------------
+
+chip 1
+data_a 0x153
+data_b 0x151
+ctrl_a 0x152
+ctrl_b 0x150
+irq 9
+pclock 4915200
+board PA0HZP
+vector 0x168
+escc no
+#
+#
+#
+chip 2
+data_a 0x157
+data_b 0x155
+ctrl_a 0x156
+ctrl_b 0x154
+irq 9
+pclock 4915200
+board PA0HZP
+vector 0x168
+escc no
+
+A DRSI would should probably work with this:
+--------------------------------------------
+(actually: two DRSI cards...)
+
+chip 1
+data_a 0x303
+data_b 0x301
+ctrl_a 0x302
+ctrl_b 0x300
+irq 7
+pclock 4915200
+board DRSI
+escc no
+#
+#
+#
+chip 2
+data_a 0x313
+data_b 0x311
+ctrl_a 0x312
+ctrl_b 0x310
+irq 7
+pclock 4915200
+board DRSI
+escc no
+
+Note that you cannot use the on-board baudrate generator off DRSI
+cards. Use "mode dpll" for clock source (see below).
+
+This is based on information provided by Mike Bilow (and verified
+by Paul Helay)
+
+The utility "gencfg"
+--------------------
+
+If you only know the parameters for the PE1CHL driver for DOS,
+run gencfg. It will generate the correct port addresses (I hope).
+Its parameters are exactly the same as the ones you use with
+the "attach scc" command in net, except that the string "init" must
+not appear. Example:
+
+gencfg 2 0x150 4 2 0 1 0x168 9 4915200
+
+will print a skeleton z8530drv.conf for the OptoSCC to stdout.
+
+gencfg 2 0x300 2 4 5 -4 0 7 4915200 0x10
+
+does the same for the BAYCOM USCC card. In my opinion it is much easier
+to edit scc_config.h...
+
+
+1.2.2 channel configuration
+===========================
+
+The channel definition is divided into three sub sections for each
+channel:
+
+An example for scc0:
+
+# DEVICE
+
+device scc0 # the device for the following params
+
+# MODEM / BUFFERS
+
+speed 1200 # the default baudrate
+clock dpll # clock source:
+ # dpll = normal half duplex operation
+ # external = MODEM provides own Rx/Tx clock
+ # divider = use full duplex divider if
+ # installed (1)
+mode nrzi # HDLC encoding mode
+ # nrzi = 1k2 MODEM, G3RUH 9k6 MODEM
+ # nrz = DF9IC 9k6 MODEM
+ #
+bufsize 384 # size of buffers. Note that this must include
+ # the AX.25 header, not only the data field!
+ # (optional, defaults to 384)
+
+# KISS (Layer 1)
+
+txdelay 36 # (see chapter 1.4)
+persist 64
+slot 8
+tail 8
+fulldup 0
+wait 12
+min 3
+maxkey 7
+idle 3
+maxdef 120
+group 0
+txoff off
+softdcd on
+slip off
+
+The order WITHIN these sections is unimportant. The order OF these
+sections IS important. The MODEM parameters are set with the first
+recognized KISS parameter...
+
+Please note that you can initialize the board only once after boot
+(or insmod). You can change all parameters but "mode" and "clock"
+later with the Sccparam program or through KISS. Just to avoid
+security holes...
+
+(1) this divider is usually mounted on the SCC-PBC (PA0HZP) or not
+ present at all (BayCom). It feeds back the output of the DPLL
+ (digital pll) as transmit clock. Using this mode without a divider
+ installed will normally result in keying the transceiver until
+ maxkey expires --- of course without sending anything (useful).
+
+2. Attachment of a channel by your AX.25 software
+=================================================
+
+2.1 Kernel AX.25
+================
+
+To set up an AX.25 device you can simply type:
+
+ ifconfig scc0 44.128.1.1 hw ax25 dl0tha-7
+
+This will create a network interface with the IP number 44.128.20.107
+and the callsign "dl0tha". If you do not have any IP number (yet) you
+can use any of the 44.128.0.0 network. Note that you do not need
+axattach. The purpose of axattach (like slattach) is to create a KISS
+network device linked to a TTY. Please read the documentation of the
+ax25-utils and the AX.25-HOWTO to learn how to set the parameters of
+the kernel AX.25.
+
+2.2 NOS, NET and TFKISS
+=======================
+
+Since the TTY driver (aka KISS TNC emulation) is gone you need
+to emulate the old behaviour. The cost of using these programs is
+that you probably need to compile the kernel AX.25, regardless of whether
+you actually use it or not. First setup your /etc/ax25/axports,
+for example:
+
+ 9k6 dl0tha-9 9600 255 4 9600 baud port (scc3)
+ axlink dl0tha-15 38400 255 4 Link to NOS
+
+Now "ifconfig" the scc device:
+
+ ifconfig scc3 44.128.1.1 hw ax25 dl0tha-9
+
+You can now axattach a pseudo-TTY:
+
+ axattach /dev/ptys0 axlink
+
+and start your NOS and attach /dev/ptys0 there. The problem is that
+NOS is reachable only via digipeating through the kernel AX.25
+(disastrous on a DAMA controlled channel). To solve this problem,
+configure "rxecho" to echo the incoming frames from "9k6" to "axlink"
+and outgoing frames from "axlink" to "9k6" and start:
+
+ rxecho
+
+Or simply use "kissbridge" coming with z8530drv-utils:
+
+ ifconfig scc3 hw ax25 dl0tha-9
+ kissbridge scc3 /dev/ptys0
+
+
+3. Adjustment and Display of parameters
+=======================================
+
+3.1 Displaying SCC Parameters:
+==============================
+
+Once a SCC channel has been attached, the parameter settings and
+some statistic information can be shown using the param program:
+
+dl1bke-u:~$ sccstat scc0
+
+Parameters:
+
+speed : 1200 baud
+txdelay : 36
+persist : 255
+slottime : 0
+txtail : 8
+fulldup : 1
+waittime : 12
+mintime : 3 sec
+maxkeyup : 7 sec
+idletime : 3 sec
+maxdefer : 120 sec
+group : 0x00
+txoff : off
+softdcd : on
+SLIP : off
+
+Status:
+
+HDLC Z8530 Interrupts Buffers
+-----------------------------------------------------------------------
+Sent : 273 RxOver : 0 RxInts : 125074 Size : 384
+Received : 1095 TxUnder: 0 TxInts : 4684 NoSpace : 0
+RxErrors : 1591 ExInts : 11776
+TxErrors : 0 SpInts : 1503
+Tx State : idle
+
+
+The status info shown is:
+
+Sent - number of frames transmitted
+Received - number of frames received
+RxErrors - number of receive errors (CRC, ABORT)
+TxErrors - number of discarded Tx frames (due to various reasons)
+Tx State - status of the Tx interrupt handler: idle/busy/active/tail (2)
+RxOver - number of receiver overruns
+TxUnder - number of transmitter underruns
+RxInts - number of receiver interrupts
+TxInts - number of transmitter interrupts
+EpInts - number of receiver special condition interrupts
+SpInts - number of external/status interrupts
+Size - maximum size of an AX.25 frame (*with* AX.25 headers!)
+NoSpace - number of times a buffer could not get allocated
+
+An overrun is abnormal. If lots of these occur, the product of
+baudrate and number of interfaces is too high for the processing
+power of your computer. NoSpace errors are unlikely to be caused by the
+driver or the kernel AX.25.
+
+
+3.2 Setting Parameters
+======================
+
+
+The setting of parameters of the emulated KISS TNC is done in the
+same way in the SCC driver. You can change parameters by using
+the kissparms program from the ax25-utils package or use the program
+"sccparam":
+
+ sccparam <device> <paramname> <decimal-|hexadecimal value>
+
+You can change the following parameters:
+
+param : value
+------------------------
+speed : 1200
+txdelay : 36
+persist : 255
+slottime : 0
+txtail : 8
+fulldup : 1
+waittime : 12
+mintime : 3
+maxkeyup : 7
+idletime : 3
+maxdefer : 120
+group : 0x00
+txoff : off
+softdcd : on
+SLIP : off
+
+
+The parameters have the following meaning:
+
+speed:
+ The baudrate on this channel in bits/sec
+
+ Example: sccparam /dev/scc3 speed 9600
+
+txdelay:
+ The delay (in units of 10 ms) after keying of the
+ transmitter, until the first byte is sent. This is usually
+ called "TXDELAY" in a TNC. When 0 is specified, the driver
+ will just wait until the CTS signal is asserted. This
+ assumes the presence of a timer or other circuitry in the
+ MODEM and/or transmitter, that asserts CTS when the
+ transmitter is ready for data.
+ A normal value of this parameter is 30-36.
+
+ Example: sccparam /dev/scc0 txd 20
+
+persist:
+ This is the probability that the transmitter will be keyed
+ when the channel is found to be free. It is a value from 0
+ to 255, and the probability is (value+1)/256. The value
+ should be somewhere near 50-60, and should be lowered when
+ the channel is used more heavily.
+
+ Example: sccparam /dev/scc2 persist 20
+
+slottime:
+ This is the time between samples of the channel. It is
+ expressed in units of 10 ms. About 200-300 ms (value 20-30)
+ seems to be a good value.
+
+ Example: sccparam /dev/scc0 slot 20
+
+tail:
+ The time the transmitter will remain keyed after the last
+ byte of a packet has been transferred to the SCC. This is
+ necessary because the CRC and a flag still have to leave the
+ SCC before the transmitter is keyed down. The value depends
+ on the baudrate selected. A few character times should be
+ sufficient, e.g. 40ms at 1200 baud. (value 4)
+ The value of this parameter is in 10 ms units.
+
+ Example: sccparam /dev/scc2 4
+
+full:
+ The full-duplex mode switch. This can be one of the following
+ values:
+
+ 0: The interface will operate in CSMA mode (the normal
+ half-duplex packet radio operation)
+ 1: Fullduplex mode, i.e. the transmitter will be keyed at
+ any time, without checking the received carrier. It
+ will be unkeyed when there are no packets to be sent.
+ 2: Like 1, but the transmitter will remain keyed, also
+ when there are no packets to be sent. Flags will be
+ sent in that case, until a timeout (parameter 10)
+ occurs.
+
+ Example: sccparam /dev/scc0 fulldup off
+
+wait:
+ The initial waittime before any transmit attempt, after the
+ frame has been queue for transmit. This is the length of
+ the first slot in CSMA mode. In full duplex modes it is
+ set to 0 for maximum performance.
+ The value of this parameter is in 10 ms units.
+
+ Example: sccparam /dev/scc1 wait 4
+
+maxkey:
+ The maximal time the transmitter will be keyed to send
+ packets, in seconds. This can be useful on busy CSMA
+ channels, to avoid "getting a bad reputation" when you are
+ generating a lot of traffic. After the specified time has
+ elapsed, no new frame will be started. Instead, the trans-
+ mitter will be switched off for a specified time (parameter
+ min), and then the selected algorithm for keyup will be
+ started again.
+ The value 0 as well as "off" will disable this feature,
+ and allow infinite transmission time.
+
+ Example: sccparam /dev/scc0 maxk 20
+
+min:
+ This is the time the transmitter will be switched off when
+ the maximum transmission time is exceeded.
+
+ Example: sccparam /dev/scc3 min 10
+
+idle
+ This parameter specifies the maximum idle time in full duplex
+ 2 mode, in seconds. When no frames have been sent for this
+ time, the transmitter will be keyed down. A value of 0 is
+ has same result as the fullduplex mode 1. This parameter
+ can be disabled.
+
+ Example: sccparam /dev/scc2 idle off # transmit forever
+
+maxdefer
+ This is the maximum time (in seconds) to wait for a free channel
+ to send. When this timer expires the transmitter will be keyed
+ IMMEDIATELY. If you love to get trouble with other users you
+ should set this to a very low value ;-)
+
+ Example: sccparam /dev/scc0 maxdefer 240 # 2 minutes
+
+
+txoff:
+ When this parameter has the value 0, the transmission of packets
+ is enable. Otherwise it is disabled.
+
+ Example: sccparam /dev/scc2 txoff on
+
+group:
+ It is possible to build special radio equipment to use more than
+ one frequency on the same band, e.g. using several receivers and
+ only one transmitter that can be switched between frequencies.
+ Also, you can connect several radios that are active on the same
+ band. In these cases, it is not possible, or not a good idea, to
+ transmit on more than one frequency. The SCC driver provides a
+ method to lock transmitters on different interfaces, using the
+ "param <interface> group <x>" command. This will only work when
+ you are using CSMA mode (parameter full = 0).
+ The number <x> must be 0 if you want no group restrictions, and
+ can be computed as follows to create restricted groups:
+ <x> is the sum of some OCTAL numbers:
+
+ 200 This transmitter will only be keyed when all other
+ transmitters in the group are off.
+ 100 This transmitter will only be keyed when the carrier
+ detect of all other interfaces in the group is off.
+ 0xx A byte that can be used to define different groups.
+ Interfaces are in the same group, when the logical AND
+ between their xx values is nonzero.
+
+ Examples:
+ When 2 interfaces use group 201, their transmitters will never be
+ keyed at the same time.
+ When 2 interfaces use group 101, the transmitters will only key
+ when both channels are clear at the same time. When group 301,
+ the transmitters will not be keyed at the same time.
+
+ Don't forget to convert the octal numbers into decimal before
+ you set the parameter.
+
+ Example: (to be written)
+
+softdcd:
+ use a software dcd instead of the real one... Useful for a very
+ slow squelch.
+
+ Example: sccparam /dev/scc0 soft on
+
+
+4. Problems
+===========
+
+If you have tx-problems with your BayCom USCC card please check
+the manufacturer of the 8530. SGS chips have a slightly
+different timing. Try Zilog... A solution is to write to register 8
+instead to the data port, but this won't work with the ESCC chips.
+*SIGH!*
+
+A very common problem is that the PTT locks until the maxkeyup timer
+expires, although interrupts and clock source are correct. In most
+cases compiling the driver with CONFIG_SCC_DELAY (set with
+make config) solves the problems. For more hints read the (pseudo) FAQ
+and the documentation coming with z8530drv-utils.
+
+I got reports that the driver has problems on some 386-based systems.
+(i.e. Amstrad) Those systems have a bogus AT bus timing which will
+lead to delayed answers on interrupts. You can recognize these
+problems by looking at the output of Sccstat for the suspected
+port. If it shows under- and overruns you own such a system.
+
+Delayed processing of received data: This depends on
+
+- the kernel version
+
+- kernel profiling compiled or not
+
+- a high interrupt load
+
+- a high load of the machine --- running X, Xmorph, XV and Povray,
+ while compiling the kernel... hmm ... even with 32 MB RAM ... ;-)
+ Or running a named for the whole .ampr.org domain on an 8 MB
+ box...
+
+- using information from rxecho or kissbridge.
+
+Kernel panics: please read /linux/README and find out if it
+really occurred within the scc driver.
+
+If you cannot solve a problem, send me
+
+- a description of the problem,
+- information on your hardware (computer system, scc board, modem)
+- your kernel version
+- the output of cat /proc/net/z8530
+
+4. Thor RLC100
+==============
+
+Mysteriously this board seems not to work with the driver. Anyone
+got it up-and-running?
+
+
+Many thanks to Linus Torvalds and Alan Cox for including the driver
+in the Linux standard distribution and their support.
+
+Joerg Reuter ampr-net: dl1bke@db0pra.ampr.org
+ AX-25 : DL1BKE @ DB0ABH.#BAY.DEU.EU
+ Internet: jreuter@yaina.de
+ WWW : http://yaina.de/jreuter
diff --git a/Documentation/nfc/nfc-hci.txt b/Documentation/nfc/nfc-hci.txt
new file mode 100644
index 000000000..0686c9e21
--- /dev/null
+++ b/Documentation/nfc/nfc-hci.txt
@@ -0,0 +1,290 @@
+HCI backend for NFC Core
+
+Author: Eric Lapuyade, Samuel Ortiz
+Contact: eric.lapuyade@intel.com, samuel.ortiz@intel.com
+
+General
+-------
+
+The HCI layer implements much of the ETSI TS 102 622 V10.2.0 specification. It
+enables easy writing of HCI-based NFC drivers. The HCI layer runs as an NFC Core
+backend, implementing an abstract nfc device and translating NFC Core API
+to HCI commands and events.
+
+HCI
+---
+
+HCI registers as an nfc device with NFC Core. Requests coming from userspace are
+routed through netlink sockets to NFC Core and then to HCI. From this point,
+they are translated in a sequence of HCI commands sent to the HCI layer in the
+host controller (the chip). Commands can be executed synchronously (the sending
+context blocks waiting for response) or asynchronously (the response is returned
+from HCI Rx context).
+HCI events can also be received from the host controller. They will be handled
+and a translation will be forwarded to NFC Core as needed. There are hooks to
+let the HCI driver handle proprietary events or override standard behavior.
+HCI uses 2 execution contexts:
+- one for executing commands : nfc_hci_msg_tx_work(). Only one command
+can be executing at any given moment.
+- one for dispatching received events and commands : nfc_hci_msg_rx_work().
+
+HCI Session initialization:
+---------------------------
+
+The Session initialization is an HCI standard which must unfortunately
+support proprietary gates. This is the reason why the driver will pass a list
+of proprietary gates that must be part of the session. HCI will ensure all
+those gates have pipes connected when the hci device is set up.
+In case the chip supports pre-opened gates and pseudo-static pipes, the driver
+can pass that information to HCI core.
+
+HCI Gates and Pipes
+-------------------
+
+A gate defines the 'port' where some service can be found. In order to access
+a service, one must create a pipe to that gate and open it. In this
+implementation, pipes are totally hidden. The public API only knows gates.
+This is consistent with the driver need to send commands to proprietary gates
+without knowing the pipe connected to it.
+
+Driver interface
+----------------
+
+A driver is generally written in two parts : the physical link management and
+the HCI management. This makes it easier to maintain a driver for a chip that
+can be connected using various phy (i2c, spi, ...)
+
+HCI Management
+--------------
+
+A driver would normally register itself with HCI and provide the following
+entry points:
+
+struct nfc_hci_ops {
+ int (*open)(struct nfc_hci_dev *hdev);
+ void (*close)(struct nfc_hci_dev *hdev);
+ int (*hci_ready) (struct nfc_hci_dev *hdev);
+ int (*xmit) (struct nfc_hci_dev *hdev, struct sk_buff *skb);
+ int (*start_poll) (struct nfc_hci_dev *hdev,
+ u32 im_protocols, u32 tm_protocols);
+ int (*dep_link_up)(struct nfc_hci_dev *hdev, struct nfc_target *target,
+ u8 comm_mode, u8 *gb, size_t gb_len);
+ int (*dep_link_down)(struct nfc_hci_dev *hdev);
+ int (*target_from_gate) (struct nfc_hci_dev *hdev, u8 gate,
+ struct nfc_target *target);
+ int (*complete_target_discovered) (struct nfc_hci_dev *hdev, u8 gate,
+ struct nfc_target *target);
+ int (*im_transceive) (struct nfc_hci_dev *hdev,
+ struct nfc_target *target, struct sk_buff *skb,
+ data_exchange_cb_t cb, void *cb_context);
+ int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
+ int (*check_presence)(struct nfc_hci_dev *hdev,
+ struct nfc_target *target);
+ int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
+ struct sk_buff *skb);
+};
+
+- open() and close() shall turn the hardware on and off.
+- hci_ready() is an optional entry point that is called right after the hci
+session has been set up. The driver can use it to do additional initialization
+that must be performed using HCI commands.
+- xmit() shall simply write a frame to the physical link.
+- start_poll() is an optional entrypoint that shall set the hardware in polling
+mode. This must be implemented only if the hardware uses proprietary gates or a
+mechanism slightly different from the HCI standard.
+- dep_link_up() is called after a p2p target has been detected, to finish
+the p2p connection setup with hardware parameters that need to be passed back
+to nfc core.
+- dep_link_down() is called to bring the p2p link down.
+- target_from_gate() is an optional entrypoint to return the nfc protocols
+corresponding to a proprietary gate.
+- complete_target_discovered() is an optional entry point to let the driver
+perform additional proprietary processing necessary to auto activate the
+discovered target.
+- im_transceive() must be implemented by the driver if proprietary HCI commands
+are required to send data to the tag. Some tag types will require custom
+commands, others can be written to using the standard HCI commands. The driver
+can check the tag type and either do proprietary processing, or return 1 to ask
+for standard processing. The data exchange command itself must be sent
+asynchronously.
+- tm_send() is called to send data in the case of a p2p connection
+- check_presence() is an optional entry point that will be called regularly
+by the core to check that an activated tag is still in the field. If this is
+not implemented, the core will not be able to push tag_lost events to the user
+space
+- event_received() is called to handle an event coming from the chip. Driver
+can handle the event or return 1 to let HCI attempt standard processing.
+
+On the rx path, the driver is responsible to push incoming HCP frames to HCI
+using nfc_hci_recv_frame(). HCI will take care of re-aggregation and handling
+This must be done from a context that can sleep.
+
+PHY Management
+--------------
+
+The physical link (i2c, ...) management is defined by the following struture:
+
+struct nfc_phy_ops {
+ int (*write)(void *dev_id, struct sk_buff *skb);
+ int (*enable)(void *dev_id);
+ void (*disable)(void *dev_id);
+};
+
+enable(): turn the phy on (power on), make it ready to transfer data
+disable(): turn the phy off
+write(): Send a data frame to the chip. Note that to enable higher
+layers such as an llc to store the frame for re-emission, this function must
+not alter the skb. It must also not return a positive result (return 0 for
+success, negative for failure).
+
+Data coming from the chip shall be sent directly to nfc_hci_recv_frame().
+
+LLC
+---
+
+Communication between the CPU and the chip often requires some link layer
+protocol. Those are isolated as modules managed by the HCI layer. There are
+currently two modules : nop (raw transfert) and shdlc.
+A new llc must implement the following functions:
+
+struct nfc_llc_ops {
+ void *(*init) (struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv,
+ rcv_to_hci_t rcv_to_hci, int tx_headroom,
+ int tx_tailroom, int *rx_headroom, int *rx_tailroom,
+ llc_failure_t llc_failure);
+ void (*deinit) (struct nfc_llc *llc);
+ int (*start) (struct nfc_llc *llc);
+ int (*stop) (struct nfc_llc *llc);
+ void (*rcv_from_drv) (struct nfc_llc *llc, struct sk_buff *skb);
+ int (*xmit_from_hci) (struct nfc_llc *llc, struct sk_buff *skb);
+};
+
+- init() : allocate and init your private storage
+- deinit() : cleanup
+- start() : establish the logical connection
+- stop () : terminate the logical connection
+- rcv_from_drv() : handle data coming from the chip, going to HCI
+- xmit_from_hci() : handle data sent by HCI, going to the chip
+
+The llc must be registered with nfc before it can be used. Do that by
+calling nfc_llc_register(const char *name, struct nfc_llc_ops *ops);
+
+Again, note that the llc does not handle the physical link. It is thus very
+easy to mix any physical link with any llc for a given chip driver.
+
+Included Drivers
+----------------
+
+An HCI based driver for an NXP PN544, connected through I2C bus, and using
+shdlc is included.
+
+Execution Contexts
+------------------
+
+The execution contexts are the following:
+- IRQ handler (IRQH):
+fast, cannot sleep. sends incoming frames to HCI where they are passed to
+the current llc. In case of shdlc, the frame is queued in shdlc rx queue.
+
+- SHDLC State Machine worker (SMW)
+Only when llc_shdlc is used: handles shdlc rx & tx queues.
+Dispatches HCI cmd responses.
+
+- HCI Tx Cmd worker (MSGTXWQ)
+Serializes execution of HCI commands. Completes execution in case of response
+timeout.
+
+- HCI Rx worker (MSGRXWQ)
+Dispatches incoming HCI commands or events.
+
+- Syscall context from a userspace call (SYSCALL)
+Any entrypoint in HCI called from NFC Core
+
+Workflow executing an HCI command (using shdlc)
+-----------------------------------------------
+
+Executing an HCI command can easily be performed synchronously using the
+following API:
+
+int nfc_hci_send_cmd (struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
+ const u8 *param, size_t param_len, struct sk_buff **skb)
+
+The API must be invoked from a context that can sleep. Most of the time, this
+will be the syscall context. skb will return the result that was received in
+the response.
+
+Internally, execution is asynchronous. So all this API does is to enqueue the
+HCI command, setup a local wait queue on stack, and wait_event() for completion.
+The wait is not interruptible because it is guaranteed that the command will
+complete after some short timeout anyway.
+
+MSGTXWQ context will then be scheduled and invoke nfc_hci_msg_tx_work().
+This function will dequeue the next pending command and send its HCP fragments
+to the lower layer which happens to be shdlc. It will then start a timer to be
+able to complete the command with a timeout error if no response arrive.
+
+SMW context gets scheduled and invokes nfc_shdlc_sm_work(). This function
+handles shdlc framing in and out. It uses the driver xmit to send frames and
+receives incoming frames in an skb queue filled from the driver IRQ handler.
+SHDLC I(nformation) frames payload are HCP fragments. They are aggregated to
+form complete HCI frames, which can be a response, command, or event.
+
+HCI Responses are dispatched immediately from this context to unblock
+waiting command execution. Response processing involves invoking the completion
+callback that was provided by nfc_hci_msg_tx_work() when it sent the command.
+The completion callback will then wake the syscall context.
+
+It is also possible to execute the command asynchronously using this API:
+
+static int nfc_hci_execute_cmd_async(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+ const u8 *param, size_t param_len,
+ data_exchange_cb_t cb, void *cb_context)
+
+The workflow is the same, except that the API call returns immediately, and
+the callback will be called with the result from the SMW context.
+
+Workflow receiving an HCI event or command
+------------------------------------------
+
+HCI commands or events are not dispatched from SMW context. Instead, they are
+queued to HCI rx_queue and will be dispatched from HCI rx worker
+context (MSGRXWQ). This is done this way to allow a cmd or event handler
+to also execute other commands (for example, handling the
+NFC_HCI_EVT_TARGET_DISCOVERED event from PN544 requires to issue an
+ANY_GET_PARAMETER to the reader A gate to get information on the target
+that was discovered).
+
+Typically, such an event will be propagated to NFC Core from MSGRXWQ context.
+
+Error management
+----------------
+
+Errors that occur synchronously with the execution of an NFC Core request are
+simply returned as the execution result of the request. These are easy.
+
+Errors that occur asynchronously (e.g. in a background protocol handling thread)
+must be reported such that upper layers don't stay ignorant that something
+went wrong below and know that expected events will probably never happen.
+Handling of these errors is done as follows:
+
+- driver (pn544) fails to deliver an incoming frame: it stores the error such
+that any subsequent call to the driver will result in this error. Then it calls
+the standard nfc_shdlc_recv_frame() with a NULL argument to report the problem
+above. shdlc stores a EREMOTEIO sticky status, which will trigger SMW to
+report above in turn.
+
+- SMW is basically a background thread to handle incoming and outgoing shdlc
+frames. This thread will also check the shdlc sticky status and report to HCI
+when it discovers it is not able to run anymore because of an unrecoverable
+error that happened within shdlc or below. If the problem occurs during shdlc
+connection, the error is reported through the connect completion.
+
+- HCI: if an internal HCI error happens (frame is lost), or HCI is reported an
+error from a lower layer, HCI will either complete the currently executing
+command with that error, or notify NFC Core directly if no command is executing.
+
+- NFC Core: when NFC Core is notified of an error from below and polling is
+active, it will send a tag discovered event with an empty tag list to the user
+space to let it know that the poll operation will never be able to detect a tag.
+If polling is not active and the error was sticky, lower levels will return it
+at next invocation.
diff --git a/Documentation/nfc/nfc-pn544.txt b/Documentation/nfc/nfc-pn544.txt
new file mode 100644
index 000000000..b36ca14ca
--- /dev/null
+++ b/Documentation/nfc/nfc-pn544.txt
@@ -0,0 +1,32 @@
+Kernel driver for the NXP Semiconductors PN544 Near Field
+Communication chip
+
+General
+-------
+
+The PN544 is an integrated transmission module for contactless
+communication. The driver goes under drives/nfc/ and is compiled as a
+module named "pn544".
+
+Host Interfaces: I2C, SPI and HSU, this driver supports currently only I2C.
+
+Protocols
+---------
+
+In the normal (HCI) mode and in the firmware update mode read and
+write functions behave a bit differently because the message formats
+or the protocols are different.
+
+In the normal (HCI) mode the protocol used is derived from the ETSI
+HCI specification. The firmware is updated using a specific protocol,
+which is different from HCI.
+
+HCI messages consist of an eight bit header and the message body. The
+header contains the message length. Maximum size for an HCI message is
+33. In HCI mode sent messages are tested for a correct
+checksum. Firmware update messages have the length in the second (MSB)
+and third (LSB) bytes of the message. The maximum FW message length is
+1024 bytes.
+
+For the ETSI HCI specification see
+http://www.etsi.org/WebSite/Technologies/ProtocolSpecification.aspx
diff --git a/Documentation/nios2/README b/Documentation/nios2/README
new file mode 100644
index 000000000..054a67d55
--- /dev/null
+++ b/Documentation/nios2/README
@@ -0,0 +1,23 @@
+Linux on the Nios II architecture
+=================================
+
+This is a port of Linux to Nios II (nios2) processor.
+
+In order to compile for Nios II, you need a version of GCC with support for the generic
+system call ABI. Please see this link for more information on how compiling and booting
+software for the Nios II platform:
+http://www.rocketboards.org/foswiki/Documentation/NiosIILinuxUserManual
+
+For reference, please see the following link:
+http://www.altera.com/literature/lit-nio2.jsp
+
+What is Nios II?
+================
+Nios II is a 32-bit embedded-processor architecture designed specifically for the
+Altera family of FPGAs. In order to support Linux, Nios II needs to be configured
+with MMU and hardware multiplier enabled.
+
+Nios II ABI
+===========
+Please refer to chapter "Application Binary Interface" in Nios II Processor Reference
+Handbook.
diff --git a/Documentation/nommu-mmap.txt b/Documentation/nommu-mmap.txt
new file mode 100644
index 000000000..ae57b9ea0
--- /dev/null
+++ b/Documentation/nommu-mmap.txt
@@ -0,0 +1,291 @@
+ =============================
+ NO-MMU MEMORY MAPPING SUPPORT
+ =============================
+
+The kernel has limited support for memory mapping under no-MMU conditions, such
+as are used in uClinux environments. From the userspace point of view, memory
+mapping is made use of in conjunction with the mmap() system call, the shmat()
+call and the execve() system call. From the kernel's point of view, execve()
+mapping is actually performed by the binfmt drivers, which call back into the
+mmap() routines to do the actual work.
+
+Memory mapping behaviour also involves the way fork(), vfork(), clone() and
+ptrace() work. Under uClinux there is no fork(), and clone() must be supplied
+the CLONE_VM flag.
+
+The behaviour is similar between the MMU and no-MMU cases, but not identical;
+and it's also much more restricted in the latter case:
+
+ (*) Anonymous mapping, MAP_PRIVATE
+
+ In the MMU case: VM regions backed by arbitrary pages; copy-on-write
+ across fork.
+
+ In the no-MMU case: VM regions backed by arbitrary contiguous runs of
+ pages.
+
+ (*) Anonymous mapping, MAP_SHARED
+
+ These behave very much like private mappings, except that they're
+ shared across fork() or clone() without CLONE_VM in the MMU case. Since
+ the no-MMU case doesn't support these, behaviour is identical to
+ MAP_PRIVATE there.
+
+ (*) File, MAP_PRIVATE, PROT_READ / PROT_EXEC, !PROT_WRITE
+
+ In the MMU case: VM regions backed by pages read from file; changes to
+ the underlying file are reflected in the mapping; copied across fork.
+
+ In the no-MMU case:
+
+ - If one exists, the kernel will re-use an existing mapping to the
+ same segment of the same file if that has compatible permissions,
+ even if this was created by another process.
+
+ - If possible, the file mapping will be directly on the backing device
+ if the backing device has the NOMMU_MAP_DIRECT capability and
+ appropriate mapping protection capabilities. Ramfs, romfs, cramfs
+ and mtd might all permit this.
+
+ - If the backing device device can't or won't permit direct sharing,
+ but does have the NOMMU_MAP_COPY capability, then a copy of the
+ appropriate bit of the file will be read into a contiguous bit of
+ memory and any extraneous space beyond the EOF will be cleared
+
+ - Writes to the file do not affect the mapping; writes to the mapping
+ are visible in other processes (no MMU protection), but should not
+ happen.
+
+ (*) File, MAP_PRIVATE, PROT_READ / PROT_EXEC, PROT_WRITE
+
+ In the MMU case: like the non-PROT_WRITE case, except that the pages in
+ question get copied before the write actually happens. From that point
+ on writes to the file underneath that page no longer get reflected into
+ the mapping's backing pages. The page is then backed by swap instead.
+
+ In the no-MMU case: works much like the non-PROT_WRITE case, except
+ that a copy is always taken and never shared.
+
+ (*) Regular file / blockdev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+
+ In the MMU case: VM regions backed by pages read from file; changes to
+ pages written back to file; writes to file reflected into pages backing
+ mapping; shared across fork.
+
+ In the no-MMU case: not supported.
+
+ (*) Memory backed regular file, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+
+ In the MMU case: As for ordinary regular files.
+
+ In the no-MMU case: The filesystem providing the memory-backed file
+ (such as ramfs or tmpfs) may choose to honour an open, truncate, mmap
+ sequence by providing a contiguous sequence of pages to map. In that
+ case, a shared-writable memory mapping will be possible. It will work
+ as for the MMU case. If the filesystem does not provide any such
+ support, then the mapping request will be denied.
+
+ (*) Memory backed blockdev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+
+ In the MMU case: As for ordinary regular files.
+
+ In the no-MMU case: As for memory backed regular files, but the
+ blockdev must be able to provide a contiguous run of pages without
+ truncate being called. The ramdisk driver could do this if it allocated
+ all its memory as a contiguous array upfront.
+
+ (*) Memory backed chardev, MAP_SHARED, PROT_READ / PROT_EXEC / PROT_WRITE
+
+ In the MMU case: As for ordinary regular files.
+
+ In the no-MMU case: The character device driver may choose to honour
+ the mmap() by providing direct access to the underlying device if it
+ provides memory or quasi-memory that can be accessed directly. Examples
+ of such are frame buffers and flash devices. If the driver does not
+ provide any such support, then the mapping request will be denied.
+
+
+============================
+FURTHER NOTES ON NO-MMU MMAP
+============================
+
+ (*) A request for a private mapping of a file may return a buffer that is not
+ page-aligned. This is because XIP may take place, and the data may not be
+ paged aligned in the backing store.
+
+ (*) A request for an anonymous mapping will always be page aligned. If
+ possible the size of the request should be a power of two otherwise some
+ of the space may be wasted as the kernel must allocate a power-of-2
+ granule but will only discard the excess if appropriately configured as
+ this has an effect on fragmentation.
+
+ (*) The memory allocated by a request for an anonymous mapping will normally
+ be cleared by the kernel before being returned in accordance with the
+ Linux man pages (ver 2.22 or later).
+
+ In the MMU case this can be achieved with reasonable performance as
+ regions are backed by virtual pages, with the contents only being mapped
+ to cleared physical pages when a write happens on that specific page
+ (prior to which, the pages are effectively mapped to the global zero page
+ from which reads can take place). This spreads out the time it takes to
+ initialize the contents of a page - depending on the write-usage of the
+ mapping.
+
+ In the no-MMU case, however, anonymous mappings are backed by physical
+ pages, and the entire map is cleared at allocation time. This can cause
+ significant delays during a userspace malloc() as the C library does an
+ anonymous mapping and the kernel then does a memset for the entire map.
+
+ However, for memory that isn't required to be precleared - such as that
+ returned by malloc() - mmap() can take a MAP_UNINITIALIZED flag to
+ indicate to the kernel that it shouldn't bother clearing the memory before
+ returning it. Note that CONFIG_MMAP_ALLOW_UNINITIALIZED must be enabled
+ to permit this, otherwise the flag will be ignored.
+
+ uClibc uses this to speed up malloc(), and the ELF-FDPIC binfmt uses this
+ to allocate the brk and stack region.
+
+ (*) A list of all the private copy and anonymous mappings on the system is
+ visible through /proc/maps in no-MMU mode.
+
+ (*) A list of all the mappings in use by a process is visible through
+ /proc/<pid>/maps in no-MMU mode.
+
+ (*) Supplying MAP_FIXED or a requesting a particular mapping address will
+ result in an error.
+
+ (*) Files mapped privately usually have to have a read method provided by the
+ driver or filesystem so that the contents can be read into the memory
+ allocated if mmap() chooses not to map the backing device directly. An
+ error will result if they don't. This is most likely to be encountered
+ with character device files, pipes, fifos and sockets.
+
+
+==========================
+INTERPROCESS SHARED MEMORY
+==========================
+
+Both SYSV IPC SHM shared memory and POSIX shared memory is supported in NOMMU
+mode. The former through the usual mechanism, the latter through files created
+on ramfs or tmpfs mounts.
+
+
+=======
+FUTEXES
+=======
+
+Futexes are supported in NOMMU mode if the arch supports them. An error will
+be given if an address passed to the futex system call lies outside the
+mappings made by a process or if the mapping in which the address lies does not
+support futexes (such as an I/O chardev mapping).
+
+
+=============
+NO-MMU MREMAP
+=============
+
+The mremap() function is partially supported. It may change the size of a
+mapping, and may move it[*] if MREMAP_MAYMOVE is specified and if the new size
+of the mapping exceeds the size of the slab object currently occupied by the
+memory to which the mapping refers, or if a smaller slab object could be used.
+
+MREMAP_FIXED is not supported, though it is ignored if there's no change of
+address and the object does not need to be moved.
+
+Shared mappings may not be moved. Shareable mappings may not be moved either,
+even if they are not currently shared.
+
+The mremap() function must be given an exact match for base address and size of
+a previously mapped object. It may not be used to create holes in existing
+mappings, move parts of existing mappings or resize parts of mappings. It must
+act on a complete mapping.
+
+[*] Not currently supported.
+
+
+============================================
+PROVIDING SHAREABLE CHARACTER DEVICE SUPPORT
+============================================
+
+To provide shareable character device support, a driver must provide a
+file->f_op->get_unmapped_area() operation. The mmap() routines will call this
+to get a proposed address for the mapping. This may return an error if it
+doesn't wish to honour the mapping because it's too long, at a weird offset,
+under some unsupported combination of flags or whatever.
+
+The driver should also provide backing device information with capabilities set
+to indicate the permitted types of mapping on such devices. The default is
+assumed to be readable and writable, not executable, and only shareable
+directly (can't be copied).
+
+The file->f_op->mmap() operation will be called to actually inaugurate the
+mapping. It can be rejected at that point. Returning the ENOSYS error will
+cause the mapping to be copied instead if NOMMU_MAP_COPY is specified.
+
+The vm_ops->close() routine will be invoked when the last mapping on a chardev
+is removed. An existing mapping will be shared, partially or not, if possible
+without notifying the driver.
+
+It is permitted also for the file->f_op->get_unmapped_area() operation to
+return -ENOSYS. This will be taken to mean that this operation just doesn't
+want to handle it, despite the fact it's got an operation. For instance, it
+might try directing the call to a secondary driver which turns out not to
+implement it. Such is the case for the framebuffer driver which attempts to
+direct the call to the device-specific driver. Under such circumstances, the
+mapping request will be rejected if NOMMU_MAP_COPY is not specified, and a
+copy mapped otherwise.
+
+IMPORTANT NOTE:
+
+ Some types of device may present a different appearance to anyone
+ looking at them in certain modes. Flash chips can be like this; for
+ instance if they're in programming or erase mode, you might see the
+ status reflected in the mapping, instead of the data.
+
+ In such a case, care must be taken lest userspace see a shared or a
+ private mapping showing such information when the driver is busy
+ controlling the device. Remember especially: private executable
+ mappings may still be mapped directly off the device under some
+ circumstances!
+
+
+==============================================
+PROVIDING SHAREABLE MEMORY-BACKED FILE SUPPORT
+==============================================
+
+Provision of shared mappings on memory backed files is similar to the provision
+of support for shared mapped character devices. The main difference is that the
+filesystem providing the service will probably allocate a contiguous collection
+of pages and permit mappings to be made on that.
+
+It is recommended that a truncate operation applied to such a file that
+increases the file size, if that file is empty, be taken as a request to gather
+enough pages to honour a mapping. This is required to support POSIX shared
+memory.
+
+Memory backed devices are indicated by the mapping's backing device info having
+the memory_backed flag set.
+
+
+========================================
+PROVIDING SHAREABLE BLOCK DEVICE SUPPORT
+========================================
+
+Provision of shared mappings on block device files is exactly the same as for
+character devices. If there isn't a real device underneath, then the driver
+should allocate sufficient contiguous memory to honour any supported mapping.
+
+
+=================================
+ADJUSTING PAGE TRIMMING BEHAVIOUR
+=================================
+
+NOMMU mmap automatically rounds up to the nearest power-of-2 number of pages
+when performing an allocation. This can have adverse effects on memory
+fragmentation, and as such, is left configurable. The default behaviour is to
+aggressively trim allocations and discard any excess pages back in to the page
+allocator. In order to retain finer-grained control over fragmentation, this
+behaviour can either be disabled completely, or bumped up to a higher page
+watermark where trimming begins.
+
+Page trimming behaviour is configurable via the sysctl `vm.nr_trim_pages'.
diff --git a/Documentation/numastat.txt b/Documentation/numastat.txt
new file mode 100644
index 000000000..520327790
--- /dev/null
+++ b/Documentation/numastat.txt
@@ -0,0 +1,27 @@
+
+Numa policy hit/miss statistics
+
+/sys/devices/system/node/node*/numastat
+
+All units are pages. Hugepages have separate counters.
+
+numa_hit A process wanted to allocate memory from this node,
+ and succeeded.
+
+numa_miss A process wanted to allocate memory from another node,
+ but ended up with memory from this node.
+
+numa_foreign A process wanted to allocate on this node,
+ but ended up with memory from another one.
+
+local_node A process ran on this node and got memory from it.
+
+other_node A process ran on this node and got memory from another node.
+
+interleave_hit Interleaving wanted to allocate from this node
+ and succeeded.
+
+For easier reading you can use the numastat utility from the numactl package
+(http://oss.sgi.com/projects/libnuma/). Note that it only works
+well right now on machines with a small number of CPUs.
+
diff --git a/Documentation/oops-tracing.txt b/Documentation/oops-tracing.txt
new file mode 100644
index 000000000..f3ac05cc2
--- /dev/null
+++ b/Documentation/oops-tracing.txt
@@ -0,0 +1,279 @@
+NOTE: ksymoops is useless on 2.6. Please use the Oops in its original format
+(from dmesg, etc). Ignore any references in this or other docs to "decoding
+the Oops" or "running it through ksymoops". If you post an Oops from 2.6 that
+has been run through ksymoops, people will just tell you to repost it.
+
+Quick Summary
+-------------
+
+Find the Oops and send it to the maintainer of the kernel area that seems to be
+involved with the problem. Don't worry too much about getting the wrong person.
+If you are unsure send it to the person responsible for the code relevant to
+what you were doing. If it occurs repeatably try and describe how to recreate
+it. That's worth even more than the oops.
+
+If you are totally stumped as to whom to send the report, send it to
+linux-kernel@vger.kernel.org. Thanks for your help in making Linux as
+stable as humanly possible.
+
+Where is the Oops?
+----------------------
+
+Normally the Oops text is read from the kernel buffers by klogd and
+handed to syslogd which writes it to a syslog file, typically
+/var/log/messages (depends on /etc/syslog.conf). Sometimes klogd dies,
+in which case you can run dmesg > file to read the data from the kernel
+buffers and save it. Or you can cat /proc/kmsg > file, however you
+have to break in to stop the transfer, kmsg is a "never ending file".
+If the machine has crashed so badly that you cannot enter commands or
+the disk is not available then you have three options :-
+
+(1) Hand copy the text from the screen and type it in after the machine
+ has restarted. Messy but it is the only option if you have not
+ planned for a crash. Alternatively, you can take a picture of
+ the screen with a digital camera - not nice, but better than
+ nothing. If the messages scroll off the top of the console, you
+ may find that booting with a higher resolution (eg, vga=791)
+ will allow you to read more of the text. (Caveat: This needs vesafb,
+ so won't help for 'early' oopses)
+
+(2) Boot with a serial console (see Documentation/serial-console.txt),
+ run a null modem to a second machine and capture the output there
+ using your favourite communication program. Minicom works well.
+
+(3) Use Kdump (see Documentation/kdump/kdump.txt),
+ extract the kernel ring buffer from old memory with using dmesg
+ gdbmacro in Documentation/kdump/gdbmacros.txt.
+
+
+Full Information
+----------------
+
+NOTE: the message from Linus below applies to 2.4 kernel. I have preserved it
+for historical reasons, and because some of the information in it still
+applies. Especially, please ignore any references to ksymoops.
+
+From: Linus Torvalds <torvalds@osdl.org>
+
+How to track down an Oops.. [originally a mail to linux-kernel]
+
+The main trick is having 5 years of experience with those pesky oops
+messages ;-)
+
+Actually, there are things you can do that make this easier. I have two
+separate approaches:
+
+ gdb /usr/src/linux/vmlinux
+ gdb> disassemble <offending_function>
+
+That's the easy way to find the problem, at least if the bug-report is
+well made (like this one was - run through ksymoops to get the
+information of which function and the offset in the function that it
+happened in).
+
+Oh, it helps if the report happens on a kernel that is compiled with the
+same compiler and similar setups.
+
+The other thing to do is disassemble the "Code:" part of the bug report:
+ksymoops will do this too with the correct tools, but if you don't have
+the tools you can just do a silly program:
+
+ char str[] = "\xXX\xXX\xXX...";
+ main(){}
+
+and compile it with gcc -g and then do "disassemble str" (where the "XX"
+stuff are the values reported by the Oops - you can just cut-and-paste
+and do a replace of spaces to "\x" - that's what I do, as I'm too lazy
+to write a program to automate this all).
+
+Alternatively, you can use the shell script in scripts/decodecode.
+Its usage is: decodecode < oops.txt
+
+The hex bytes that follow "Code:" may (in some architectures) have a series
+of bytes that precede the current instruction pointer as well as bytes at and
+following the current instruction pointer. In some cases, one instruction
+byte or word is surrounded by <> or (), as in "<86>" or "(f00d)". These
+<> or () markings indicate the current instruction pointer. Example from
+i386, split into multiple lines for readability:
+
+Code: f9 0f 8d f9 00 00 00 8d 42 0c e8 dd 26 11 c7 a1 60 ea 2b f9 8b 50 08 a1
+64 ea 2b f9 8d 34 82 8b 1e 85 db 74 6d 8b 15 60 ea 2b f9 <8b> 43 04 39 42 54
+7e 04 40 89 42 54 8b 43 04 3b 05 00 f6 52 c0
+
+Finally, if you want to see where the code comes from, you can do
+
+ cd /usr/src/linux
+ make fs/buffer.s # or whatever file the bug happened in
+
+and then you get a better idea of what happens than with the gdb
+disassembly.
+
+Now, the trick is just then to combine all the data you have: the C
+sources (and general knowledge of what it _should_ do), the assembly
+listing and the code disassembly (and additionally the register dump you
+also get from the "oops" message - that can be useful to see _what_ the
+corrupted pointers were, and when you have the assembler listing you can
+also match the other registers to whatever C expressions they were used
+for).
+
+Essentially, you just look at what doesn't match (in this case it was the
+"Code" disassembly that didn't match with what the compiler generated).
+Then you need to find out _why_ they don't match. Often it's simple - you
+see that the code uses a NULL pointer and then you look at the code and
+wonder how the NULL pointer got there, and if it's a valid thing to do
+you just check against it..
+
+Now, if somebody gets the idea that this is time-consuming and requires
+some small amount of concentration, you're right. Which is why I will
+mostly just ignore any panic reports that don't have the symbol table
+info etc looked up: it simply gets too hard to look it up (I have some
+programs to search for specific patterns in the kernel code segment, and
+sometimes I have been able to look up those kinds of panics too, but
+that really requires pretty good knowledge of the kernel just to be able
+to pick out the right sequences etc..)
+
+_Sometimes_ it happens that I just see the disassembled code sequence
+from the panic, and I know immediately where it's coming from. That's when
+I get worried that I've been doing this for too long ;-)
+
+ Linus
+
+
+---------------------------------------------------------------------------
+Notes on Oops tracing with klogd:
+
+In order to help Linus and the other kernel developers there has been
+substantial support incorporated into klogd for processing protection
+faults. In order to have full support for address resolution at least
+version 1.3-pl3 of the sysklogd package should be used.
+
+When a protection fault occurs the klogd daemon automatically
+translates important addresses in the kernel log messages to their
+symbolic equivalents. This translated kernel message is then
+forwarded through whatever reporting mechanism klogd is using. The
+protection fault message can be simply cut out of the message files
+and forwarded to the kernel developers.
+
+Two types of address resolution are performed by klogd. The first is
+static translation and the second is dynamic translation. Static
+translation uses the System.map file in much the same manner that
+ksymoops does. In order to do static translation the klogd daemon
+must be able to find a system map file at daemon initialization time.
+See the klogd man page for information on how klogd searches for map
+files.
+
+Dynamic address translation is important when kernel loadable modules
+are being used. Since memory for kernel modules is allocated from the
+kernel's dynamic memory pools there are no fixed locations for either
+the start of the module or for functions and symbols in the module.
+
+The kernel supports system calls which allow a program to determine
+which modules are loaded and their location in memory. Using these
+system calls the klogd daemon builds a symbol table which can be used
+to debug a protection fault which occurs in a loadable kernel module.
+
+At the very minimum klogd will provide the name of the module which
+generated the protection fault. There may be additional symbolic
+information available if the developer of the loadable module chose to
+export symbol information from the module.
+
+Since the kernel module environment can be dynamic there must be a
+mechanism for notifying the klogd daemon when a change in module
+environment occurs. There are command line options available which
+allow klogd to signal the currently executing daemon that symbol
+information should be refreshed. See the klogd manual page for more
+information.
+
+A patch is included with the sysklogd distribution which modifies the
+modules-2.0.0 package to automatically signal klogd whenever a module
+is loaded or unloaded. Applying this patch provides essentially
+seamless support for debugging protection faults which occur with
+kernel loadable modules.
+
+The following is an example of a protection fault in a loadable module
+processed by klogd:
+---------------------------------------------------------------------------
+Aug 29 09:51:01 blizard kernel: Unable to handle kernel paging request at virtual address f15e97cc
+Aug 29 09:51:01 blizard kernel: current->tss.cr3 = 0062d000, %cr3 = 0062d000
+Aug 29 09:51:01 blizard kernel: *pde = 00000000
+Aug 29 09:51:01 blizard kernel: Oops: 0002
+Aug 29 09:51:01 blizard kernel: CPU: 0
+Aug 29 09:51:01 blizard kernel: EIP: 0010:[oops:_oops+16/3868]
+Aug 29 09:51:01 blizard kernel: EFLAGS: 00010212
+Aug 29 09:51:01 blizard kernel: eax: 315e97cc ebx: 003a6f80 ecx: 001be77b edx: 00237c0c
+Aug 29 09:51:01 blizard kernel: esi: 00000000 edi: bffffdb3 ebp: 00589f90 esp: 00589f8c
+Aug 29 09:51:01 blizard kernel: ds: 0018 es: 0018 fs: 002b gs: 002b ss: 0018
+Aug 29 09:51:01 blizard kernel: Process oops_test (pid: 3374, process nr: 21, stackpage=00589000)
+Aug 29 09:51:01 blizard kernel: Stack: 315e97cc 00589f98 0100b0b4 bffffed4 0012e38e 00240c64 003a6f80 00000001
+Aug 29 09:51:01 blizard kernel: 00000000 00237810 bfffff00 0010a7fa 00000003 00000001 00000000 bfffff00
+Aug 29 09:51:01 blizard kernel: bffffdb3 bffffed4 ffffffda 0000002b 0007002b 0000002b 0000002b 00000036
+Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
+Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
+---------------------------------------------------------------------------
+
+Dr. G.W. Wettstein Oncology Research Div. Computing Facility
+Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com
+820 4th St. N.
+Fargo, ND 58122
+Phone: 701-234-7556
+
+
+---------------------------------------------------------------------------
+Tainted kernels:
+
+Some oops reports contain the string 'Tainted: ' after the program
+counter. This indicates that the kernel has been tainted by some
+mechanism. The string is followed by a series of position-sensitive
+characters, each representing a particular tainted value.
+
+ 1: 'G' if all modules loaded have a GPL or compatible license, 'P' if
+ any proprietary module has been loaded. Modules without a
+ MODULE_LICENSE or with a MODULE_LICENSE that is not recognised by
+ insmod as GPL compatible are assumed to be proprietary.
+
+ 2: 'F' if any module was force loaded by "insmod -f", ' ' if all
+ modules were loaded normally.
+
+ 3: 'S' if the oops occurred on an SMP kernel running on hardware that
+ hasn't been certified as safe to run multiprocessor.
+ Currently this occurs only on various Athlons that are not
+ SMP capable.
+
+ 4: 'R' if a module was force unloaded by "rmmod -f", ' ' if all
+ modules were unloaded normally.
+
+ 5: 'M' if any processor has reported a Machine Check Exception,
+ ' ' if no Machine Check Exceptions have occurred.
+
+ 6: 'B' if a page-release function has found a bad page reference or
+ some unexpected page flags.
+
+ 7: 'U' if a user or user application specifically requested that the
+ Tainted flag be set, ' ' otherwise.
+
+ 8: 'D' if the kernel has died recently, i.e. there was an OOPS or BUG.
+
+ 9: 'A' if the ACPI table has been overridden.
+
+ 10: 'W' if a warning has previously been issued by the kernel.
+ (Though some warnings may set more specific taint flags.)
+
+ 11: 'C' if a staging driver has been loaded.
+
+ 12: 'I' if the kernel is working around a severe bug in the platform
+ firmware (BIOS or similar).
+
+ 13: 'O' if an externally-built ("out-of-tree") module has been loaded.
+
+ 14: 'E' if an unsigned module has been loaded in a kernel supporting
+ module signature.
+
+ 15: 'L' if a soft lockup has previously occurred on the system.
+
+ 16: 'K' if the kernel has been live patched.
+
+The primary reason for the 'Tainted: ' string is to tell kernel
+debuggers if this is a clean kernel or if anything unusual has
+occurred. Tainting is permanent: even if an offending module is
+unloaded, the tainted value remains to indicate that the kernel is not
+trustworthy.
diff --git a/Documentation/padata.txt b/Documentation/padata.txt
new file mode 100644
index 000000000..7ddfe216a
--- /dev/null
+++ b/Documentation/padata.txt
@@ -0,0 +1,160 @@
+The padata parallel execution mechanism
+Last updated for 2.6.36
+
+Padata is a mechanism by which the kernel can farm work out to be done in
+parallel on multiple CPUs while retaining the ordering of tasks. It was
+developed for use with the IPsec code, which needs to be able to perform
+encryption and decryption on large numbers of packets without reordering
+those packets. The crypto developers made a point of writing padata in a
+sufficiently general fashion that it could be put to other uses as well.
+
+The first step in using padata is to set up a padata_instance structure for
+overall control of how tasks are to be run:
+
+ #include <linux/padata.h>
+
+ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+ const struct cpumask *pcpumask,
+ const struct cpumask *cbcpumask);
+
+The pcpumask describes which processors will be used to execute work
+submitted to this instance in parallel. The cbcpumask defines which
+processors are allowed to be used as the serialization callback processor.
+The workqueue wq is where the work will actually be done; it should be
+a multithreaded queue, naturally.
+
+To allocate a padata instance with the cpu_possible_mask for both
+cpumasks this helper function can be used:
+
+ struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq);
+
+Note: Padata maintains two kinds of cpumasks internally. The user supplied
+cpumasks, submitted by padata_alloc/padata_alloc_possible and the 'usable'
+cpumasks. The usable cpumasks are always a subset of active CPUs in the
+user supplied cpumasks; these are the cpumasks padata actually uses. So
+it is legal to supply a cpumask to padata that contains offline CPUs.
+Once an offline CPU in the user supplied cpumask comes online, padata
+is going to use it.
+
+There are functions for enabling and disabling the instance:
+
+ int padata_start(struct padata_instance *pinst);
+ void padata_stop(struct padata_instance *pinst);
+
+These functions are setting or clearing the "PADATA_INIT" flag;
+if that flag is not set, other functions will refuse to work.
+padata_start returns zero on success (flag set) or -EINVAL if the
+padata cpumask contains no active CPU (flag not set).
+padata_stop clears the flag and blocks until the padata instance
+is unused.
+
+The list of CPUs to be used can be adjusted with these functions:
+
+ int padata_set_cpumasks(struct padata_instance *pinst,
+ cpumask_var_t pcpumask,
+ cpumask_var_t cbcpumask);
+ int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
+ cpumask_var_t cpumask);
+ int padata_add_cpu(struct padata_instance *pinst, int cpu, int mask);
+ int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask);
+
+Changing the CPU masks are expensive operations, though, so it should not be
+done with great frequency.
+
+It's possible to change both cpumasks of a padata instance with
+padata_set_cpumasks by specifying the cpumasks for parallel execution (pcpumask)
+and for the serial callback function (cbcpumask). padata_set_cpumask is used to
+change just one of the cpumasks. Here cpumask_type is one of PADATA_CPU_SERIAL,
+PADATA_CPU_PARALLEL and cpumask specifies the new cpumask to use.
+To simply add or remove one CPU from a certain cpumask the functions
+padata_add_cpu/padata_remove_cpu are used. cpu specifies the CPU to add or
+remove and mask is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL.
+
+If a user is interested in padata cpumask changes, he can register to
+the padata cpumask change notifier:
+
+ int padata_register_cpumask_notifier(struct padata_instance *pinst,
+ struct notifier_block *nblock);
+
+To unregister from that notifier:
+
+ int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
+ struct notifier_block *nblock);
+
+The padata cpumask change notifier notifies about changes of the usable
+cpumasks, i.e. the subset of active CPUs in the user supplied cpumask.
+
+Padata calls the notifier chain with:
+
+ blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
+ notification_mask,
+ &pd_new->cpumask);
+
+Here cpumask_change_notifier is registered notifier, notification_mask
+is one of PADATA_CPU_SERIAL, PADATA_CPU_PARALLEL and cpumask is a pointer
+to a struct padata_cpumask that contains the new cpumask information.
+
+Actually submitting work to the padata instance requires the creation of a
+padata_priv structure:
+
+ struct padata_priv {
+ /* Other stuff here... */
+ void (*parallel)(struct padata_priv *padata);
+ void (*serial)(struct padata_priv *padata);
+ };
+
+This structure will almost certainly be embedded within some larger
+structure specific to the work to be done. Most of its fields are private to
+padata, but the structure should be zeroed at initialisation time, and the
+parallel() and serial() functions should be provided. Those functions will
+be called in the process of getting the work done as we will see
+momentarily.
+
+The submission of work is done with:
+
+ int padata_do_parallel(struct padata_instance *pinst,
+ struct padata_priv *padata, int cb_cpu);
+
+The pinst and padata structures must be set up as described above; cb_cpu
+specifies which CPU will be used for the final callback when the work is
+done; it must be in the current instance's CPU mask. The return value from
+padata_do_parallel() is zero on success, indicating that the work is in
+progress. -EBUSY means that somebody, somewhere else is messing with the
+instance's CPU mask, while -EINVAL is a complaint about cb_cpu not being
+in that CPU mask or about a not running instance.
+
+Each task submitted to padata_do_parallel() will, in turn, be passed to
+exactly one call to the above-mentioned parallel() function, on one CPU, so
+true parallelism is achieved by submitting multiple tasks. Despite the
+fact that the workqueue is used to make these calls, parallel() is run with
+software interrupts disabled and thus cannot sleep. The parallel()
+function gets the padata_priv structure pointer as its lone parameter;
+information about the actual work to be done is probably obtained by using
+container_of() to find the enclosing structure.
+
+Note that parallel() has no return value; the padata subsystem assumes that
+parallel() will take responsibility for the task from this point. The work
+need not be completed during this call, but, if parallel() leaves work
+outstanding, it should be prepared to be called again with a new job before
+the previous one completes. When a task does complete, parallel() (or
+whatever function actually finishes the job) should inform padata of the
+fact with a call to:
+
+ void padata_do_serial(struct padata_priv *padata);
+
+At some point in the future, padata_do_serial() will trigger a call to the
+serial() function in the padata_priv structure. That call will happen on
+the CPU requested in the initial call to padata_do_parallel(); it, too, is
+done through the workqueue, but with local software interrupts disabled.
+Note that this call may be deferred for a while since the padata code takes
+pains to ensure that tasks are completed in the order in which they were
+submitted.
+
+The one remaining function in the padata API should be called to clean up
+when a padata instance is no longer needed:
+
+ void padata_free(struct padata_instance *pinst);
+
+This function will busy-wait while any remaining tasks are completed, so it
+might be best not to call it while there is work outstanding. Shutting
+down the workqueue, if necessary, should be done separately.
diff --git a/Documentation/parisc/00-INDEX b/Documentation/parisc/00-INDEX
new file mode 100644
index 000000000..cbd060961
--- /dev/null
+++ b/Documentation/parisc/00-INDEX
@@ -0,0 +1,6 @@
+00-INDEX
+ - this file.
+debugging
+ - some debugging hints for real-mode code
+registers
+ - current/planned usage of registers
diff --git a/Documentation/parisc/debugging b/Documentation/parisc/debugging
new file mode 100644
index 000000000..7d75223fa
--- /dev/null
+++ b/Documentation/parisc/debugging
@@ -0,0 +1,39 @@
+okay, here are some hints for debugging the lower-level parts of
+linux/parisc.
+
+
+1. Absolute addresses
+
+A lot of the assembly code currently runs in real mode, which means
+absolute addresses are used instead of virtual addresses as in the
+rest of the kernel. To translate an absolute address to a virtual
+address you can lookup in System.map, add __PAGE_OFFSET (0x10000000
+currently).
+
+
+2. HPMCs
+
+When real-mode code tries to access non-existent memory, you'll get
+an HPMC instead of a kernel oops. To debug an HPMC, try to find
+the System Responder/Requestor addresses. The System Requestor
+address should match (one of the) processor HPAs (high addresses in
+the I/O range); the System Responder address is the address real-mode
+code tried to access.
+
+Typical values for the System Responder address are addresses larger
+than __PAGE_OFFSET (0x10000000) which mean a virtual address didn't
+get translated to a physical address before real-mode code tried to
+access it.
+
+
+3. Q bit fun
+
+Certain, very critical code has to clear the Q bit in the PSW. What
+happens when the Q bit is cleared is the CPU does not update the
+registers interruption handlers read to find out where the machine
+was interrupted - so if you get an interruption between the instruction
+that clears the Q bit and the RFI that sets it again you don't know
+where exactly it happened. If you're lucky the IAOQ will point to the
+instruction that cleared the Q bit, if you're not it points anywhere
+at all. Usually Q bit problems will show themselves in unexplainable
+system hangs or running off the end of physical memory.
diff --git a/Documentation/parisc/registers b/Documentation/parisc/registers
new file mode 100644
index 000000000..10c7d1730
--- /dev/null
+++ b/Documentation/parisc/registers
@@ -0,0 +1,129 @@
+Register Usage for Linux/PA-RISC
+
+[ an asterisk is used for planned usage which is currently unimplemented ]
+
+ General Registers as specified by ABI
+
+ Control Registers
+
+CR 0 (Recovery Counter) used for ptrace
+CR 1-CR 7(undefined) unused
+CR 8 (Protection ID) per-process value*
+CR 9, 12, 13 (PIDS) unused
+CR10 (CCR) lazy FPU saving*
+CR11 as specified by ABI (SAR)
+CR14 (interruption vector) initialized to fault_vector
+CR15 (EIEM) initialized to all ones*
+CR16 (Interval Timer) read for cycle count/write starts Interval Tmr
+CR17-CR22 interruption parameters
+CR19 Interrupt Instruction Register
+CR20 Interrupt Space Register
+CR21 Interrupt Offset Register
+CR22 Interrupt PSW
+CR23 (EIRR) read for pending interrupts/write clears bits
+CR24 (TR 0) Kernel Space Page Directory Pointer
+CR25 (TR 1) User Space Page Directory Pointer
+CR26 (TR 2) not used
+CR27 (TR 3) Thread descriptor pointer
+CR28 (TR 4) not used
+CR29 (TR 5) not used
+CR30 (TR 6) current / 0
+CR31 (TR 7) Temporary register, used in various places
+
+ Space Registers (kernel mode)
+
+SR0 temporary space register
+SR4-SR7 set to 0
+SR1 temporary space register
+SR2 kernel should not clobber this
+SR3 used for userspace accesses (current process)
+
+ Space Registers (user mode)
+
+SR0 temporary space register
+SR1 temporary space register
+SR2 holds space of linux gateway page
+SR3 holds user address space value while in kernel
+SR4-SR7 Defines short address space for user/kernel
+
+
+ Processor Status Word
+
+W (64-bit addresses) 0
+E (Little-endian) 0
+S (Secure Interval Timer) 0
+T (Taken Branch Trap) 0
+H (Higher-privilege trap) 0
+L (Lower-privilege trap) 0
+N (Nullify next instruction) used by C code
+X (Data memory break disable) 0
+B (Taken Branch) used by C code
+C (code address translation) 1, 0 while executing real-mode code
+V (divide step correction) used by C code
+M (HPMC mask) 0, 1 while executing HPMC handler*
+C/B (carry/borrow bits) used by C code
+O (ordered references) 1*
+F (performance monitor) 0
+R (Recovery Counter trap) 0
+Q (collect interruption state) 1 (0 in code directly preceding an rfi)
+P (Protection Identifiers) 1*
+D (Data address translation) 1, 0 while executing real-mode code
+I (external interrupt mask) used by cli()/sti() macros
+
+ "Invisible" Registers
+
+PSW default W value 0
+PSW default E value 0
+Shadow Registers used by interruption handler code
+TOC enable bit 1
+
+=========================================================================
+
+The PA-RISC architecture defines 7 registers as "shadow registers".
+Those are used in RETURN FROM INTERRUPTION AND RESTORE instruction to reduce
+the state save and restore time by eliminating the need for general register
+(GR) saves and restores in interruption handlers.
+Shadow registers are the GRs 1, 8, 9, 16, 17, 24, and 25.
+
+=========================================================================
+Register usage notes, originally from John Marvin, with some additional
+notes from Randolph Chung.
+
+For the general registers:
+
+r1,r2,r19-r26,r28,r29 & r31 can be used without saving them first. And of
+course, you need to save them if you care about them, before calling
+another procedure. Some of the above registers do have special meanings
+that you should be aware of:
+
+ r1: The addil instruction is hardwired to place its result in r1,
+ so if you use that instruction be aware of that.
+
+ r2: This is the return pointer. In general you don't want to
+ use this, since you need the pointer to get back to your
+ caller. However, it is grouped with this set of registers
+ since the caller can't rely on the value being the same
+ when you return, i.e. you can copy r2 to another register
+ and return through that register after trashing r2, and
+ that should not cause a problem for the calling routine.
+
+ r19-r22: these are generally regarded as temporary registers.
+ Note that in 64 bit they are arg7-arg4.
+
+ r23-r26: these are arg3-arg0, i.e. you can use them if you
+ don't care about the values that were passed in anymore.
+
+ r28,r29: are ret0 and ret1. They are what you pass return values
+ in. r28 is the primary return. When returning small structures
+ r29 may also be used to pass data back to the caller.
+
+ r30: stack pointer
+
+ r31: the ble instruction puts the return pointer in here.
+
+
+r3-r18,r27,r30 need to be saved and restored. r3-r18 are just
+ general purpose registers. r27 is the data pointer, and is
+ used to make references to global variables easier. r30 is
+ the stack pointer.
+
diff --git a/Documentation/parport-lowlevel.txt b/Documentation/parport-lowlevel.txt
new file mode 100644
index 000000000..120eb20db
--- /dev/null
+++ b/Documentation/parport-lowlevel.txt
@@ -0,0 +1,1471 @@
+PARPORT interface documentation
+-------------------------------
+
+Time-stamp: <2000-02-24 13:30:20 twaugh>
+
+Described here are the following functions:
+
+Global functions:
+ parport_register_driver
+ parport_unregister_driver
+ parport_enumerate
+ parport_register_device
+ parport_unregister_device
+ parport_claim
+ parport_claim_or_block
+ parport_release
+ parport_yield
+ parport_yield_blocking
+ parport_wait_peripheral
+ parport_poll_peripheral
+ parport_wait_event
+ parport_negotiate
+ parport_read
+ parport_write
+ parport_open
+ parport_close
+ parport_device_id
+ parport_device_coords
+ parport_find_class
+ parport_find_device
+ parport_set_timeout
+
+Port functions (can be overridden by low-level drivers):
+ SPP:
+ port->ops->read_data
+ port->ops->write_data
+ port->ops->read_status
+ port->ops->read_control
+ port->ops->write_control
+ port->ops->frob_control
+ port->ops->enable_irq
+ port->ops->disable_irq
+ port->ops->data_forward
+ port->ops->data_reverse
+
+ EPP:
+ port->ops->epp_write_data
+ port->ops->epp_read_data
+ port->ops->epp_write_addr
+ port->ops->epp_read_addr
+
+ ECP:
+ port->ops->ecp_write_data
+ port->ops->ecp_read_data
+ port->ops->ecp_write_addr
+
+ Other:
+ port->ops->nibble_read_data
+ port->ops->byte_read_data
+ port->ops->compat_write_data
+
+The parport subsystem comprises 'parport' (the core port-sharing
+code), and a variety of low-level drivers that actually do the port
+accesses. Each low-level driver handles a particular style of port
+(PC, Amiga, and so on).
+
+The parport interface to the device driver author can be broken down
+into global functions and port functions.
+
+The global functions are mostly for communicating between the device
+driver and the parport subsystem: acquiring a list of available ports,
+claiming a port for exclusive use, and so on. They also include
+'generic' functions for doing standard things that will work on any
+IEEE 1284-capable architecture.
+
+The port functions are provided by the low-level drivers, although the
+core parport module provides generic 'defaults' for some routines.
+The port functions can be split into three groups: SPP, EPP, and ECP.
+
+SPP (Standard Parallel Port) functions modify so-called 'SPP'
+registers: data, status, and control. The hardware may not actually
+have registers exactly like that, but the PC does and this interface is
+modelled after common PC implementations. Other low-level drivers may
+be able to emulate most of the functionality.
+
+EPP (Enhanced Parallel Port) functions are provided for reading and
+writing in IEEE 1284 EPP mode, and ECP (Extended Capabilities Port)
+functions are used for IEEE 1284 ECP mode. (What about BECP? Does
+anyone care?)
+
+Hardware assistance for EPP and/or ECP transfers may or may not be
+available, and if it is available it may or may not be used. If
+hardware is not used, the transfer will be software-driven. In order
+to cope with peripherals that only tenuously support IEEE 1284, a
+low-level driver specific function is provided, for altering 'fudge
+factors'.
+
+GLOBAL FUNCTIONS
+----------------
+
+parport_register_driver - register a device driver with parport
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_driver {
+ const char *name;
+ void (*attach) (struct parport *);
+ void (*detach) (struct parport *);
+ struct parport_driver *next;
+};
+int parport_register_driver (struct parport_driver *driver);
+
+DESCRIPTION
+
+In order to be notified about parallel ports when they are detected,
+parport_register_driver should be called. Your driver will
+immediately be notified of all ports that have already been detected,
+and of each new port as low-level drivers are loaded.
+
+A 'struct parport_driver' contains the textual name of your driver,
+a pointer to a function to handle new ports, and a pointer to a
+function to handle ports going away due to a low-level driver
+unloading. Ports will only be detached if they are not being used
+(i.e. there are no devices registered on them).
+
+The visible parts of the 'struct parport *' argument given to
+attach/detach are:
+
+struct parport
+{
+ struct parport *next; /* next parport in list */
+ const char *name; /* port's name */
+ unsigned int modes; /* bitfield of hardware modes */
+ struct parport_device_info probe_info;
+ /* IEEE1284 info */
+ int number; /* parport index */
+ struct parport_operations *ops;
+ ...
+};
+
+There are other members of the structure, but they should not be
+touched.
+
+The 'modes' member summarises the capabilities of the underlying
+hardware. It consists of flags which may be bitwise-ored together:
+
+ PARPORT_MODE_PCSPP IBM PC registers are available,
+ i.e. functions that act on data,
+ control and status registers are
+ probably writing directly to the
+ hardware.
+ PARPORT_MODE_TRISTATE The data drivers may be turned off.
+ This allows the data lines to be used
+ for reverse (peripheral to host)
+ transfers.
+ PARPORT_MODE_COMPAT The hardware can assist with
+ compatibility-mode (printer)
+ transfers, i.e. compat_write_block.
+ PARPORT_MODE_EPP The hardware can assist with EPP
+ transfers.
+ PARPORT_MODE_ECP The hardware can assist with ECP
+ transfers.
+ PARPORT_MODE_DMA The hardware can use DMA, so you might
+ want to pass ISA DMA-able memory
+ (i.e. memory allocated using the
+ GFP_DMA flag with kmalloc) to the
+ low-level driver in order to take
+ advantage of it.
+
+There may be other flags in 'modes' as well.
+
+The contents of 'modes' is advisory only. For example, if the
+hardware is capable of DMA, and PARPORT_MODE_DMA is in 'modes', it
+doesn't necessarily mean that DMA will always be used when possible.
+Similarly, hardware that is capable of assisting ECP transfers won't
+necessarily be used.
+
+RETURN VALUE
+
+Zero on success, otherwise an error code.
+
+ERRORS
+
+None. (Can it fail? Why return int?)
+
+EXAMPLE
+
+static void lp_attach (struct parport *port)
+{
+ ...
+ private = kmalloc (...);
+ dev[count++] = parport_register_device (...);
+ ...
+}
+
+static void lp_detach (struct parport *port)
+{
+ ...
+}
+
+static struct parport_driver lp_driver = {
+ "lp",
+ lp_attach,
+ lp_detach,
+ NULL /* always put NULL here */
+};
+
+int lp_init (void)
+{
+ ...
+ if (parport_register_driver (&lp_driver)) {
+ /* Failed; nothing we can do. */
+ return -EIO;
+ }
+ ...
+}
+
+SEE ALSO
+
+parport_unregister_driver, parport_register_device, parport_enumerate
+
+parport_unregister_driver - tell parport to forget about this driver
+-------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_driver {
+ const char *name;
+ void (*attach) (struct parport *);
+ void (*detach) (struct parport *);
+ struct parport_driver *next;
+};
+void parport_unregister_driver (struct parport_driver *driver);
+
+DESCRIPTION
+
+This tells parport not to notify the device driver of new ports or of
+ports going away. Registered devices belonging to that driver are NOT
+unregistered: parport_unregister_device must be used for each one.
+
+EXAMPLE
+
+void cleanup_module (void)
+{
+ ...
+ /* Stop notifications. */
+ parport_unregister_driver (&lp_driver);
+
+ /* Unregister devices. */
+ for (i = 0; i < NUM_DEVS; i++)
+ parport_unregister_device (dev[i]);
+ ...
+}
+
+SEE ALSO
+
+parport_register_driver, parport_enumerate
+
+parport_enumerate - retrieve a list of parallel ports (DEPRECATED)
+-----------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport *parport_enumerate (void);
+
+DESCRIPTION
+
+Retrieve the first of a list of valid parallel ports for this machine.
+Successive parallel ports can be found using the 'struct parport
+*next' element of the 'struct parport *' that is returned. If 'next'
+is NULL, there are no more parallel ports in the list. The number of
+ports in the list will not exceed PARPORT_MAX.
+
+RETURN VALUE
+
+A 'struct parport *' describing a valid parallel port for the machine,
+or NULL if there are none.
+
+ERRORS
+
+This function can return NULL to indicate that there are no parallel
+ports to use.
+
+EXAMPLE
+
+int detect_device (void)
+{
+ struct parport *port;
+
+ for (port = parport_enumerate ();
+ port != NULL;
+ port = port->next) {
+ /* Try to detect a device on the port... */
+ ...
+ }
+ }
+
+ ...
+}
+
+NOTES
+
+parport_enumerate is deprecated; parport_register_driver should be
+used instead.
+
+SEE ALSO
+
+parport_register_driver, parport_unregister_driver
+
+parport_register_device - register to use a port
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+typedef int (*preempt_func) (void *handle);
+typedef void (*wakeup_func) (void *handle);
+typedef int (*irq_func) (int irq, void *handle, struct pt_regs *);
+
+struct pardevice *parport_register_device(struct parport *port,
+ const char *name,
+ preempt_func preempt,
+ wakeup_func wakeup,
+ irq_func irq,
+ int flags,
+ void *handle);
+
+DESCRIPTION
+
+Use this function to register your device driver on a parallel port
+('port'). Once you have done that, you will be able to use
+parport_claim and parport_release in order to use the port.
+
+The ('name') argument is the name of the device that appears in /proc
+filesystem. The string must be valid for the whole lifetime of the
+device (until parport_unregister_device is called).
+
+This function will register three callbacks into your driver:
+'preempt', 'wakeup' and 'irq'. Each of these may be NULL in order to
+indicate that you do not want a callback.
+
+When the 'preempt' function is called, it is because another driver
+wishes to use the parallel port. The 'preempt' function should return
+non-zero if the parallel port cannot be released yet -- if zero is
+returned, the port is lost to another driver and the port must be
+re-claimed before use.
+
+The 'wakeup' function is called once another driver has released the
+port and no other driver has yet claimed it. You can claim the
+parallel port from within the 'wakeup' function (in which case the
+claim is guaranteed to succeed), or choose not to if you don't need it
+now.
+
+If an interrupt occurs on the parallel port your driver has claimed,
+the 'irq' function will be called. (Write something about shared
+interrupts here.)
+
+The 'handle' is a pointer to driver-specific data, and is passed to
+the callback functions.
+
+'flags' may be a bitwise combination of the following flags:
+
+ Flag Meaning
+ PARPORT_DEV_EXCL The device cannot share the parallel port at all.
+ Use this only when absolutely necessary.
+
+The typedefs are not actually defined -- they are only shown in order
+to make the function prototype more readable.
+
+The visible parts of the returned 'struct pardevice' are:
+
+struct pardevice {
+ struct parport *port; /* Associated port */
+ void *private; /* Device driver's 'handle' */
+ ...
+};
+
+RETURN VALUE
+
+A 'struct pardevice *': a handle to the registered parallel port
+device that can be used for parport_claim, parport_release, etc.
+
+ERRORS
+
+A return value of NULL indicates that there was a problem registering
+a device on that port.
+
+EXAMPLE
+
+static int preempt (void *handle)
+{
+ if (busy_right_now)
+ return 1;
+
+ must_reclaim_port = 1;
+ return 0;
+}
+
+static void wakeup (void *handle)
+{
+ struct toaster *private = handle;
+ struct pardevice *dev = private->dev;
+ if (!dev) return; /* avoid races */
+
+ if (want_port)
+ parport_claim (dev);
+}
+
+static int toaster_detect (struct toaster *private, struct parport *port)
+{
+ private->dev = parport_register_device (port, "toaster", preempt,
+ wakeup, NULL, 0,
+ private);
+ if (!private->dev)
+ /* Couldn't register with parport. */
+ return -EIO;
+
+ must_reclaim_port = 0;
+ busy_right_now = 1;
+ parport_claim_or_block (private->dev);
+ ...
+ /* Don't need the port while the toaster warms up. */
+ busy_right_now = 0;
+ ...
+ busy_right_now = 1;
+ if (must_reclaim_port) {
+ parport_claim_or_block (private->dev);
+ must_reclaim_port = 0;
+ }
+ ...
+}
+
+SEE ALSO
+
+parport_unregister_device, parport_claim
+
+parport_unregister_device - finish using a port
+-------------------------
+
+SYNPOPSIS
+
+#include <linux/parport.h>
+
+void parport_unregister_device (struct pardevice *dev);
+
+DESCRIPTION
+
+This function is the opposite of parport_register_device. After using
+parport_unregister_device, 'dev' is no longer a valid device handle.
+
+You should not unregister a device that is currently claimed, although
+if you do it will be released automatically.
+
+EXAMPLE
+
+ ...
+ kfree (dev->private); /* before we lose the pointer */
+ parport_unregister_device (dev);
+ ...
+
+SEE ALSO
+
+parport_unregister_driver
+
+parport_claim, parport_claim_or_block - claim the parallel port for a device
+-------------------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_claim (struct pardevice *dev);
+int parport_claim_or_block (struct pardevice *dev);
+
+DESCRIPTION
+
+These functions attempt to gain control of the parallel port on which
+'dev' is registered. 'parport_claim' does not block, but
+'parport_claim_or_block' may do. (Put something here about blocking
+interruptibly or non-interruptibly.)
+
+You should not try to claim a port that you have already claimed.
+
+RETURN VALUE
+
+A return value of zero indicates that the port was successfully
+claimed, and the caller now has possession of the parallel port.
+
+If 'parport_claim_or_block' blocks before returning successfully, the
+return value is positive.
+
+ERRORS
+
+ -EAGAIN The port is unavailable at the moment, but another attempt
+ to claim it may succeed.
+
+SEE ALSO
+
+parport_release
+
+parport_release - release the parallel port
+---------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+void parport_release (struct pardevice *dev);
+
+DESCRIPTION
+
+Once a parallel port device has been claimed, it can be released using
+'parport_release'. It cannot fail, but you should not release a
+device that you do not have possession of.
+
+EXAMPLE
+
+static size_t write (struct pardevice *dev, const void *buf,
+ size_t len)
+{
+ ...
+ written = dev->port->ops->write_ecp_data (dev->port, buf,
+ len);
+ parport_release (dev);
+ ...
+}
+
+
+SEE ALSO
+
+change_mode, parport_claim, parport_claim_or_block, parport_yield
+
+parport_yield, parport_yield_blocking - temporarily release a parallel port
+-------------------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_yield (struct pardevice *dev)
+int parport_yield_blocking (struct pardevice *dev);
+
+DESCRIPTION
+
+When a driver has control of a parallel port, it may allow another
+driver to temporarily 'borrow' it. 'parport_yield' does not block;
+'parport_yield_blocking' may do.
+
+RETURN VALUE
+
+A return value of zero indicates that the caller still owns the port
+and the call did not block.
+
+A positive return value from 'parport_yield_blocking' indicates that
+the caller still owns the port and the call blocked.
+
+A return value of -EAGAIN indicates that the caller no longer owns the
+port, and it must be re-claimed before use.
+
+ERRORS
+
+ -EAGAIN Ownership of the parallel port was given away.
+
+SEE ALSO
+
+parport_release
+
+parport_wait_peripheral - wait for status lines, up to 35ms
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_wait_peripheral (struct parport *port,
+ unsigned char mask,
+ unsigned char val);
+
+DESCRIPTION
+
+Wait for the status lines in mask to match the values in val.
+
+RETURN VALUE
+
+ -EINTR a signal is pending
+ 0 the status lines in mask have values in val
+ 1 timed out while waiting (35ms elapsed)
+
+SEE ALSO
+
+parport_poll_peripheral
+
+parport_poll_peripheral - wait for status lines, in usec
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_poll_peripheral (struct parport *port,
+ unsigned char mask,
+ unsigned char val,
+ int usec);
+
+DESCRIPTION
+
+Wait for the status lines in mask to match the values in val.
+
+RETURN VALUE
+
+ -EINTR a signal is pending
+ 0 the status lines in mask have values in val
+ 1 timed out while waiting (usec microseconds have elapsed)
+
+SEE ALSO
+
+parport_wait_peripheral
+
+parport_wait_event - wait for an event on a port
+------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_wait_event (struct parport *port, signed long timeout)
+
+DESCRIPTION
+
+Wait for an event (e.g. interrupt) on a port. The timeout is in
+jiffies.
+
+RETURN VALUE
+
+ 0 success
+ <0 error (exit as soon as possible)
+ >0 timed out
+
+parport_negotiate - perform IEEE 1284 negotiation
+-----------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_negotiate (struct parport *, int mode);
+
+DESCRIPTION
+
+Perform IEEE 1284 negotiation.
+
+RETURN VALUE
+
+ 0 handshake OK; IEEE 1284 peripheral and mode available
+ -1 handshake failed; peripheral not compliant (or none present)
+ 1 handshake OK; IEEE 1284 peripheral present but mode not
+ available
+
+SEE ALSO
+
+parport_read, parport_write
+
+parport_read - read data from device
+------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+ssize_t parport_read (struct parport *, void *buf, size_t len);
+
+DESCRIPTION
+
+Read data from device in current IEEE 1284 transfer mode. This only
+works for modes that support reverse data transfer.
+
+RETURN VALUE
+
+If negative, an error code; otherwise the number of bytes transferred.
+
+SEE ALSO
+
+parport_write, parport_negotiate
+
+parport_write - write data to device
+-------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+ssize_t parport_write (struct parport *, const void *buf, size_t len);
+
+DESCRIPTION
+
+Write data to device in current IEEE 1284 transfer mode. This only
+works for modes that support forward data transfer.
+
+RETURN VALUE
+
+If negative, an error code; otherwise the number of bytes transferred.
+
+SEE ALSO
+
+parport_read, parport_negotiate
+
+parport_open - register device for particular device number
+------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct pardevice *parport_open (int devnum, const char *name,
+ int (*pf) (void *),
+ void (*kf) (void *),
+ void (*irqf) (int, void *,
+ struct pt_regs *),
+ int flags, void *handle);
+
+DESCRIPTION
+
+This is like parport_register_device but takes a device number instead
+of a pointer to a struct parport.
+
+RETURN VALUE
+
+See parport_register_device. If no device is associated with devnum,
+NULL is returned.
+
+SEE ALSO
+
+parport_register_device
+
+parport_close - unregister device for particular device number
+-------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+void parport_close (struct pardevice *dev);
+
+DESCRIPTION
+
+This is the equivalent of parport_unregister_device for parport_open.
+
+SEE ALSO
+
+parport_unregister_device, parport_open
+
+parport_device_id - obtain IEEE 1284 Device ID
+-----------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+ssize_t parport_device_id (int devnum, char *buffer, size_t len);
+
+DESCRIPTION
+
+Obtains the IEEE 1284 Device ID associated with a given device.
+
+RETURN VALUE
+
+If negative, an error code; otherwise, the number of bytes of buffer
+that contain the device ID. The format of the device ID is as
+follows:
+
+[length][ID]
+
+The first two bytes indicate the inclusive length of the entire Device
+ID, and are in big-endian order. The ID is a sequence of pairs of the
+form:
+
+key:value;
+
+NOTES
+
+Many devices have ill-formed IEEE 1284 Device IDs.
+
+SEE ALSO
+
+parport_find_class, parport_find_device
+
+parport_device_coords - convert device number to device coordinates
+------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_device_coords (int devnum, int *parport, int *mux,
+ int *daisy);
+
+DESCRIPTION
+
+Convert between device number (zero-based) and device coordinates
+(port, multiplexor, daisy chain address).
+
+RETURN VALUE
+
+Zero on success, in which case the coordinates are (*parport, *mux,
+*daisy).
+
+SEE ALSO
+
+parport_open, parport_device_id
+
+parport_find_class - find a device by its class
+------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+typedef enum {
+ PARPORT_CLASS_LEGACY = 0, /* Non-IEEE1284 device */
+ PARPORT_CLASS_PRINTER,
+ PARPORT_CLASS_MODEM,
+ PARPORT_CLASS_NET,
+ PARPORT_CLASS_HDC, /* Hard disk controller */
+ PARPORT_CLASS_PCMCIA,
+ PARPORT_CLASS_MEDIA, /* Multimedia device */
+ PARPORT_CLASS_FDC, /* Floppy disk controller */
+ PARPORT_CLASS_PORTS,
+ PARPORT_CLASS_SCANNER,
+ PARPORT_CLASS_DIGCAM,
+ PARPORT_CLASS_OTHER, /* Anything else */
+ PARPORT_CLASS_UNSPEC, /* No CLS field in ID */
+ PARPORT_CLASS_SCSIADAPTER
+} parport_device_class;
+
+int parport_find_class (parport_device_class cls, int from);
+
+DESCRIPTION
+
+Find a device by class. The search starts from device number from+1.
+
+RETURN VALUE
+
+The device number of the next device in that class, or -1 if no such
+device exists.
+
+NOTES
+
+Example usage:
+
+int devnum = -1;
+while ((devnum = parport_find_class (PARPORT_CLASS_DIGCAM, devnum)) != -1) {
+ struct pardevice *dev = parport_open (devnum, ...);
+ ...
+}
+
+SEE ALSO
+
+parport_find_device, parport_open, parport_device_id
+
+parport_find_device - find a device by its class
+------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+int parport_find_device (const char *mfg, const char *mdl, int from);
+
+DESCRIPTION
+
+Find a device by vendor and model. The search starts from device
+number from+1.
+
+RETURN VALUE
+
+The device number of the next device matching the specifications, or
+-1 if no such device exists.
+
+NOTES
+
+Example usage:
+
+int devnum = -1;
+while ((devnum = parport_find_device ("IOMEGA", "ZIP+", devnum)) != -1) {
+ struct pardevice *dev = parport_open (devnum, ...);
+ ...
+}
+
+SEE ALSO
+
+parport_find_class, parport_open, parport_device_id
+
+parport_set_timeout - set the inactivity timeout
+-------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+long parport_set_timeout (struct pardevice *dev, long inactivity);
+
+DESCRIPTION
+
+Set the inactivity timeout, in jiffies, for a registered device. The
+previous timeout is returned.
+
+RETURN VALUE
+
+The previous timeout, in jiffies.
+
+NOTES
+
+Some of the port->ops functions for a parport may take time, owing to
+delays at the peripheral. After the peripheral has not responded for
+'inactivity' jiffies, a timeout will occur and the blocking function
+will return.
+
+A timeout of 0 jiffies is a special case: the function must do as much
+as it can without blocking or leaving the hardware in an unknown
+state. If port operations are performed from within an interrupt
+handler, for instance, a timeout of 0 jiffies should be used.
+
+Once set for a registered device, the timeout will remain at the set
+value until set again.
+
+SEE ALSO
+
+port->ops->xxx_read/write_yyy
+
+PORT FUNCTIONS
+--------------
+
+The functions in the port->ops structure (struct parport_operations)
+are provided by the low-level driver responsible for that port.
+
+port->ops->read_data - read the data register
+--------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ unsigned char (*read_data) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+If port->modes contains the PARPORT_MODE_TRISTATE flag and the
+PARPORT_CONTROL_DIRECTION bit in the control register is set, this
+returns the value on the data pins. If port->modes contains the
+PARPORT_MODE_TRISTATE flag and the PARPORT_CONTROL_DIRECTION bit is
+not set, the return value _may_ be the last value written to the data
+register. Otherwise the return value is undefined.
+
+SEE ALSO
+
+write_data, read_status, write_control
+
+port->ops->write_data - write the data register
+---------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ void (*write_data) (struct parport *port, unsigned char d);
+ ...
+};
+
+DESCRIPTION
+
+Writes to the data register. May have side-effects (a STROBE pulse,
+for instance).
+
+SEE ALSO
+
+read_data, read_status, write_control
+
+port->ops->read_status - read the status register
+----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ unsigned char (*read_status) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+Reads from the status register. This is a bitmask:
+
+- PARPORT_STATUS_ERROR (printer fault, "nFault")
+- PARPORT_STATUS_SELECT (on-line, "Select")
+- PARPORT_STATUS_PAPEROUT (no paper, "PError")
+- PARPORT_STATUS_ACK (handshake, "nAck")
+- PARPORT_STATUS_BUSY (busy, "Busy")
+
+There may be other bits set.
+
+SEE ALSO
+
+read_data, write_data, write_control
+
+port->ops->read_control - read the control register
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ unsigned char (*read_control) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+Returns the last value written to the control register (either from
+write_control or frob_control). No port access is performed.
+
+SEE ALSO
+
+read_data, write_data, read_status, write_control
+
+port->ops->write_control - write the control register
+------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ void (*write_control) (struct parport *port, unsigned char s);
+ ...
+};
+
+DESCRIPTION
+
+Writes to the control register. This is a bitmask:
+ _______
+- PARPORT_CONTROL_STROBE (nStrobe)
+ _______
+- PARPORT_CONTROL_AUTOFD (nAutoFd)
+ _____
+- PARPORT_CONTROL_INIT (nInit)
+ _________
+- PARPORT_CONTROL_SELECT (nSelectIn)
+
+SEE ALSO
+
+read_data, write_data, read_status, frob_control
+
+port->ops->frob_control - write control register bits
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ unsigned char (*frob_control) (struct parport *port,
+ unsigned char mask,
+ unsigned char val);
+ ...
+};
+
+DESCRIPTION
+
+This is equivalent to reading from the control register, masking out
+the bits in mask, exclusive-or'ing with the bits in val, and writing
+the result to the control register.
+
+As some ports don't allow reads from the control port, a software copy
+of its contents is maintained, so frob_control is in fact only one
+port access.
+
+SEE ALSO
+
+read_data, write_data, read_status, write_control
+
+port->ops->enable_irq - enable interrupt generation
+---------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ void (*enable_irq) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+The parallel port hardware is instructed to generate interrupts at
+appropriate moments, although those moments are
+architecture-specific. For the PC architecture, interrupts are
+commonly generated on the rising edge of nAck.
+
+SEE ALSO
+
+disable_irq
+
+port->ops->disable_irq - disable interrupt generation
+----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ void (*disable_irq) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+The parallel port hardware is instructed not to generate interrupts.
+The interrupt itself is not masked.
+
+SEE ALSO
+
+enable_irq
+
+port->ops->data_forward - enable data drivers
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ void (*data_forward) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+Enables the data line drivers, for 8-bit host-to-peripheral
+communications.
+
+SEE ALSO
+
+data_reverse
+
+port->ops->data_reverse - tristate the buffer
+-----------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ void (*data_reverse) (struct parport *port);
+ ...
+};
+
+DESCRIPTION
+
+Places the data bus in a high impedance state, if port->modes has the
+PARPORT_MODE_TRISTATE bit set.
+
+SEE ALSO
+
+data_forward
+
+port->ops->epp_write_data - write EPP data
+-------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*epp_write_data) (struct parport *port, const void *buf,
+ size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Writes data in EPP mode, and returns the number of bytes written.
+
+The 'flags' parameter may be one or more of the following,
+bitwise-or'ed together:
+
+PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and
+ 32-bit registers. However, if a transfer
+ times out, the return value may be unreliable.
+
+SEE ALSO
+
+epp_read_data, epp_write_addr, epp_read_addr
+
+port->ops->epp_read_data - read EPP data
+------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*epp_read_data) (struct parport *port, void *buf,
+ size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Reads data in EPP mode, and returns the number of bytes read.
+
+The 'flags' parameter may be one or more of the following,
+bitwise-or'ed together:
+
+PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and
+ 32-bit registers. However, if a transfer
+ times out, the return value may be unreliable.
+
+SEE ALSO
+
+epp_write_data, epp_write_addr, epp_read_addr
+
+port->ops->epp_write_addr - write EPP address
+-------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*epp_write_addr) (struct parport *port,
+ const void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Writes EPP addresses (8 bits each), and returns the number written.
+
+The 'flags' parameter may be one or more of the following,
+bitwise-or'ed together:
+
+PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and
+ 32-bit registers. However, if a transfer
+ times out, the return value may be unreliable.
+
+(Does PARPORT_EPP_FAST make sense for this function?)
+
+SEE ALSO
+
+epp_write_data, epp_read_data, epp_read_addr
+
+port->ops->epp_read_addr - read EPP address
+------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*epp_read_addr) (struct parport *port, void *buf,
+ size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Reads EPP addresses (8 bits each), and returns the number read.
+
+The 'flags' parameter may be one or more of the following,
+bitwise-or'ed together:
+
+PARPORT_EPP_FAST Use fast transfers. Some chips provide 16-bit and
+ 32-bit registers. However, if a transfer
+ times out, the return value may be unreliable.
+
+(Does PARPORT_EPP_FAST make sense for this function?)
+
+SEE ALSO
+
+epp_write_data, epp_read_data, epp_write_addr
+
+port->ops->ecp_write_data - write a block of ECP data
+-------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*ecp_write_data) (struct parport *port,
+ const void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Writes a block of ECP data. The 'flags' parameter is ignored.
+
+RETURN VALUE
+
+The number of bytes written.
+
+SEE ALSO
+
+ecp_read_data, ecp_write_addr
+
+port->ops->ecp_read_data - read a block of ECP data
+------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*ecp_read_data) (struct parport *port,
+ void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Reads a block of ECP data. The 'flags' parameter is ignored.
+
+RETURN VALUE
+
+The number of bytes read. NB. There may be more unread data in a
+FIFO. Is there a way of stunning the FIFO to prevent this?
+
+SEE ALSO
+
+ecp_write_block, ecp_write_addr
+
+port->ops->ecp_write_addr - write a block of ECP addresses
+-------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*ecp_write_addr) (struct parport *port,
+ const void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Writes a block of ECP addresses. The 'flags' parameter is ignored.
+
+RETURN VALUE
+
+The number of bytes written.
+
+NOTES
+
+This may use a FIFO, and if so shall not return until the FIFO is empty.
+
+SEE ALSO
+
+ecp_read_data, ecp_write_data
+
+port->ops->nibble_read_data - read a block of data in nibble mode
+---------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*nibble_read_data) (struct parport *port,
+ void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Reads a block of data in nibble mode. The 'flags' parameter is ignored.
+
+RETURN VALUE
+
+The number of whole bytes read.
+
+SEE ALSO
+
+byte_read_data, compat_write_data
+
+port->ops->byte_read_data - read a block of data in byte mode
+-------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*byte_read_data) (struct parport *port,
+ void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Reads a block of data in byte mode. The 'flags' parameter is ignored.
+
+RETURN VALUE
+
+The number of bytes read.
+
+SEE ALSO
+
+nibble_read_data, compat_write_data
+
+port->ops->compat_write_data - write a block of data in compatibility mode
+----------------------------
+
+SYNOPSIS
+
+#include <linux/parport.h>
+
+struct parport_operations {
+ ...
+ size_t (*compat_write_data) (struct parport *port,
+ const void *buf, size_t len, int flags);
+ ...
+};
+
+DESCRIPTION
+
+Writes a block of data in compatibility mode. The 'flags' parameter
+is ignored.
+
+RETURN VALUE
+
+The number of bytes written.
+
+SEE ALSO
+
+nibble_read_data, byte_read_data
diff --git a/Documentation/parport.txt b/Documentation/parport.txt
new file mode 100644
index 000000000..c208e4366
--- /dev/null
+++ b/Documentation/parport.txt
@@ -0,0 +1,267 @@
+The `parport' code provides parallel-port support under Linux. This
+includes the ability to share one port between multiple device
+drivers.
+
+You can pass parameters to the parport code to override its automatic
+detection of your hardware. This is particularly useful if you want
+to use IRQs, since in general these can't be autoprobed successfully.
+By default IRQs are not used even if they _can_ be probed. This is
+because there are a lot of people using the same IRQ for their
+parallel port and a sound card or network card.
+
+The parport code is split into two parts: generic (which deals with
+port-sharing) and architecture-dependent (which deals with actually
+using the port).
+
+
+Parport as modules
+==================
+
+If you load the parport code as a module, say
+
+ # insmod parport
+
+to load the generic parport code. You then must load the
+architecture-dependent code with (for example):
+
+ # insmod parport_pc io=0x3bc,0x378,0x278 irq=none,7,auto
+
+to tell the parport code that you want three PC-style ports, one at
+0x3bc with no IRQ, one at 0x378 using IRQ 7, and one at 0x278 with an
+auto-detected IRQ. Currently, PC-style (parport_pc), Sun `bpp',
+Amiga, Atari, and MFC3 hardware is supported.
+
+PCI parallel I/O card support comes from parport_pc. Base I/O
+addresses should not be specified for supported PCI cards since they
+are automatically detected.
+
+
+modprobe
+--------
+
+If you use modprobe , you will find it useful to add lines as below to a
+configuration file in /etc/modprobe.d/ directory:.
+
+ alias parport_lowlevel parport_pc
+ options parport_pc io=0x378,0x278 irq=7,auto
+
+modprobe will load parport_pc (with the options "io=0x378,0x278 irq=7,auto")
+whenever a parallel port device driver (such as lp) is loaded.
+
+Note that these are example lines only! You shouldn't in general need
+to specify any options to parport_pc in order to be able to use a
+parallel port.
+
+
+Parport probe [optional]
+-------------
+
+In 2.2 kernels there was a module called parport_probe, which was used
+for collecting IEEE 1284 device ID information. This has now been
+enhanced and now lives with the IEEE 1284 support. When a parallel
+port is detected, the devices that are connected to it are analysed,
+and information is logged like this:
+
+ parport0: Printer, BJC-210 (Canon)
+
+The probe information is available from files in /proc/sys/dev/parport/.
+
+
+Parport linked into the kernel statically
+=========================================
+
+If you compile the parport code into the kernel, then you can use
+kernel boot parameters to get the same effect. Add something like the
+following to your LILO command line:
+
+ parport=0x3bc parport=0x378,7 parport=0x278,auto,nofifo
+
+You can have many `parport=...' statements, one for each port you want
+to add. Adding `parport=0' to the kernel command-line will disable
+parport support entirely. Adding `parport=auto' to the kernel
+command-line will make parport use any IRQ lines or DMA channels that
+it auto-detects.
+
+
+Files in /proc
+==============
+
+If you have configured the /proc filesystem into your kernel, you will
+see a new directory entry: /proc/sys/dev/parport. In there will be a
+directory entry for each parallel port for which parport is
+configured. In each of those directories are a collection of files
+describing that parallel port.
+
+The /proc/sys/dev/parport directory tree looks like:
+
+parport
+|-- default
+| |-- spintime
+| `-- timeslice
+|-- parport0
+| |-- autoprobe
+| |-- autoprobe0
+| |-- autoprobe1
+| |-- autoprobe2
+| |-- autoprobe3
+| |-- devices
+| | |-- active
+| | `-- lp
+| | `-- timeslice
+| |-- base-addr
+| |-- irq
+| |-- dma
+| |-- modes
+| `-- spintime
+`-- parport1
+ |-- autoprobe
+ |-- autoprobe0
+ |-- autoprobe1
+ |-- autoprobe2
+ |-- autoprobe3
+ |-- devices
+ | |-- active
+ | `-- ppa
+ | `-- timeslice
+ |-- base-addr
+ |-- irq
+ |-- dma
+ |-- modes
+ `-- spintime
+
+
+File: Contents:
+
+devices/active A list of the device drivers using that port. A "+"
+ will appear by the name of the device currently using
+ the port (it might not appear against any). The
+ string "none" means that there are no device drivers
+ using that port.
+
+base-addr Parallel port's base address, or addresses if the port
+ has more than one in which case they are separated
+ with tabs. These values might not have any sensible
+ meaning for some ports.
+
+irq Parallel port's IRQ, or -1 if none is being used.
+
+dma Parallel port's DMA channel, or -1 if none is being
+ used.
+
+modes Parallel port's hardware modes, comma-separated,
+ meaning:
+
+ PCSPP PC-style SPP registers are available.
+ TRISTATE Port is bidirectional.
+ COMPAT Hardware acceleration for printers is
+ available and will be used.
+ EPP Hardware acceleration for EPP protocol
+ is available and will be used.
+ ECP Hardware acceleration for ECP protocol
+ is available and will be used.
+ DMA DMA is available and will be used.
+
+ Note that the current implementation will only take
+ advantage of COMPAT and ECP modes if it has an IRQ
+ line to use.
+
+autoprobe Any IEEE-1284 device ID information that has been
+ acquired from the (non-IEEE 1284.3) device.
+
+autoprobe[0-3] IEEE 1284 device ID information retrieved from
+ daisy-chain devices that conform to IEEE 1284.3.
+
+spintime The number of microseconds to busy-loop while waiting
+ for the peripheral to respond. You might find that
+ adjusting this improves performance, depending on your
+ peripherals. This is a port-wide setting, i.e. it
+ applies to all devices on a particular port.
+
+timeslice The number of milliseconds that a device driver is
+ allowed to keep a port claimed for. This is advisory,
+ and driver can ignore it if it must.
+
+default/* The defaults for spintime and timeslice. When a new
+ port is registered, it picks up the default spintime.
+ When a new device is registered, it picks up the
+ default timeslice.
+
+Device drivers
+==============
+
+Once the parport code is initialised, you can attach device drivers to
+specific ports. Normally this happens automatically; if the lp driver
+is loaded it will create one lp device for each port found. You can
+override this, though, by using parameters either when you load the lp
+driver:
+
+ # insmod lp parport=0,2
+
+or on the LILO command line:
+
+ lp=parport0 lp=parport2
+
+Both the above examples would inform lp that you want /dev/lp0 to be
+the first parallel port, and /dev/lp1 to be the _third_ parallel port,
+with no lp device associated with the second port (parport1). Note
+that this is different to the way older kernels worked; there used to
+be a static association between the I/O port address and the device
+name, so /dev/lp0 was always the port at 0x3bc. This is no longer the
+case - if you only have one port, it will default to being /dev/lp0,
+regardless of base address.
+
+Also:
+
+ * If you selected the IEEE 1284 support at compile time, you can say
+ `lp=auto' on the kernel command line, and lp will create devices
+ only for those ports that seem to have printers attached.
+
+ * If you give PLIP the `timid' parameter, either with `plip=timid' on
+ the command line, or with `insmod plip timid=1' when using modules,
+ it will avoid any ports that seem to be in use by other devices.
+
+ * IRQ autoprobing works only for a few port types at the moment.
+
+Reporting printer problems with parport
+=======================================
+
+If you are having problems printing, please go through these steps to
+try to narrow down where the problem area is.
+
+When reporting problems with parport, really you need to give all of
+the messages that parport_pc spits out when it initialises. There are
+several code paths:
+
+o polling
+o interrupt-driven, protocol in software
+o interrupt-driven, protocol in hardware using PIO
+o interrupt-driven, protocol in hardware using DMA
+
+The kernel messages that parport_pc logs give an indication of which
+code path is being used. (They could be a lot better actually..)
+
+For normal printer protocol, having IEEE 1284 modes enabled or not
+should not make a difference.
+
+To turn off the 'protocol in hardware' code paths, disable
+CONFIG_PARPORT_PC_FIFO. Note that when they are enabled they are not
+necessarily _used_; it depends on whether the hardware is available,
+enabled by the BIOS, and detected by the driver.
+
+So, to start with, disable CONFIG_PARPORT_PC_FIFO, and load parport_pc
+with 'irq=none'. See if printing works then. It really should,
+because this is the simplest code path.
+
+If that works fine, try with 'io=0x378 irq=7' (adjust for your
+hardware), to make it use interrupt-driven in-software protocol.
+
+If _that_ works fine, then one of the hardware modes isn't working
+right. Enable CONFIG_PARPORT_PC_FIFO (no, it isn't a module option,
+and yes, it should be), set the port to ECP mode in the BIOS and note
+the DMA channel, and try with:
+
+ io=0x378 irq=7 dma=none (for PIO)
+ io=0x378 irq=7 dma=3 (for DMA)
+--
+philb@gnu.org
+tim@cyberelk.net
diff --git a/Documentation/pcmcia/.gitignore b/Documentation/pcmcia/.gitignore
new file mode 100644
index 000000000..53d081336
--- /dev/null
+++ b/Documentation/pcmcia/.gitignore
@@ -0,0 +1 @@
+crc32hash
diff --git a/Documentation/pcmcia/Makefile b/Documentation/pcmcia/Makefile
new file mode 100644
index 000000000..47a8fa162
--- /dev/null
+++ b/Documentation/pcmcia/Makefile
@@ -0,0 +1,7 @@
+# List of programs to build
+hostprogs-y := crc32hash
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_crc32hash.o += -I$(objtree)/usr/include
diff --git a/Documentation/pcmcia/crc32hash.c b/Documentation/pcmcia/crc32hash.c
new file mode 100644
index 000000000..44f8beea7
--- /dev/null
+++ b/Documentation/pcmcia/crc32hash.c
@@ -0,0 +1,32 @@
+/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
+/* Usage example:
+$ ./crc32hash "Dual Speed"
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+static unsigned int crc32(unsigned char const *p, unsigned int len)
+{
+ int i;
+ unsigned int crc = 0;
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+ }
+ return crc;
+}
+
+int main(int argc, char **argv) {
+ unsigned int result;
+ if (argc != 2) {
+ printf("no string passed as argument\n");
+ return -1;
+ }
+ result = crc32((unsigned char const *)argv[1], strlen(argv[1]));
+ printf("0x%x\n", result);
+ return 0;
+}
diff --git a/Documentation/pcmcia/devicetable.txt b/Documentation/pcmcia/devicetable.txt
new file mode 100644
index 000000000..199afd100
--- /dev/null
+++ b/Documentation/pcmcia/devicetable.txt
@@ -0,0 +1,33 @@
+Matching of PCMCIA devices to drivers is done using one or more of the
+following criteria:
+
+- manufactor ID
+- card ID
+- product ID strings _and_ hashes of these strings
+- function ID
+- device function (actual and pseudo)
+
+You should use the helpers in include/pcmcia/device_id.h for generating the
+struct pcmcia_device_id[] entries which match devices to drivers.
+
+If you want to match product ID strings, you also need to pass the crc32
+hashes of the string to the macro, e.g. if you want to match the product ID
+string 1, you need to use
+
+PCMCIA_DEVICE_PROD_ID1("some_string", 0x(hash_of_some_string)),
+
+If the hash is incorrect, the kernel will inform you about this in "dmesg"
+upon module initialization, and tell you of the correct hash.
+
+You can determine the hash of the product ID strings by catting the file
+"modalias" in the sysfs directory of the PCMCIA device. It generates a string
+in the following form:
+pcmcia:m0149cC1ABf06pfn00fn00pa725B842DpbF1EFEE84pc0877B627pd00000000
+
+The hex value after "pa" is the hash of product ID string 1, after "pb" for
+string 2 and so on.
+
+Alternatively, you can use crc32hash (see Documentation/pcmcia/crc32hash.c)
+to determine the crc32 hash. Simply pass the string you want to evaluate
+as argument to this program, e.g.:
+$ ./crc32hash "Dual Speed"
diff --git a/Documentation/pcmcia/driver-changes.txt b/Documentation/pcmcia/driver-changes.txt
new file mode 100644
index 000000000..dd04361dd
--- /dev/null
+++ b/Documentation/pcmcia/driver-changes.txt
@@ -0,0 +1,152 @@
+This file details changes in 2.6 which affect PCMCIA card driver authors:
+* pcmcia_loop_config() and autoconfiguration (as of 2.6.36)
+ If struct pcmcia_device *p_dev->config_flags is set accordingly,
+ pcmcia_loop_config() now sets up certain configuration values
+ automatically, though the driver may still override the settings
+ in the callback function. The following autoconfiguration options
+ are provided at the moment:
+ CONF_AUTO_CHECK_VCC : check for matching Vcc
+ CONF_AUTO_SET_VPP : set Vpp
+ CONF_AUTO_AUDIO : auto-enable audio line, if required
+ CONF_AUTO_SET_IO : set ioport resources (->resource[0,1])
+ CONF_AUTO_SET_IOMEM : set first iomem resource (->resource[2])
+
+* pcmcia_request_configuration -> pcmcia_enable_device (as of 2.6.36)
+ pcmcia_request_configuration() got renamed to pcmcia_enable_device(),
+ as it mirrors pcmcia_disable_device(). Configuration settings are now
+ stored in struct pcmcia_device, e.g. in the fields config_flags,
+ config_index, config_base, vpp.
+
+* pcmcia_request_window changes (as of 2.6.36)
+ Instead of win_req_t, drivers are now requested to fill out
+ struct pcmcia_device *p_dev->resource[2,3,4,5] for up to four ioport
+ ranges. After a call to pcmcia_request_window(), the regions found there
+ are reserved and may be used immediately -- until pcmcia_release_window()
+ is called.
+
+* pcmcia_request_io changes (as of 2.6.36)
+ Instead of io_req_t, drivers are now requested to fill out
+ struct pcmcia_device *p_dev->resource[0,1] for up to two ioport
+ ranges. After a call to pcmcia_request_io(), the ports found there
+ are reserved, after calling pcmcia_request_configuration(), they may
+ be used.
+
+* No dev_info_t, no cs_types.h (as of 2.6.36)
+ dev_info_t and a few other typedefs are removed. No longer use them
+ in PCMCIA device drivers. Also, do not include pcmcia/cs_types.h, as
+ this file is gone.
+
+* No dev_node_t (as of 2.6.35)
+ There is no more need to fill out a "dev_node_t" structure.
+
+* New IRQ request rules (as of 2.6.35)
+ Instead of the old pcmcia_request_irq() interface, drivers may now
+ choose between:
+ - calling request_irq/free_irq directly. Use the IRQ from *p_dev->irq.
+ - use pcmcia_request_irq(p_dev, handler_t); the PCMCIA core will
+ clean up automatically on calls to pcmcia_disable_device() or
+ device ejection.
+ - drivers still not capable of IRQF_SHARED (or not telling us so) may
+ use the deprecated pcmcia_request_exclusive_irq() for the time
+ being; they might receive a shared IRQ nonetheless.
+
+* no cs_error / CS_CHECK / CONFIG_PCMCIA_DEBUG (as of 2.6.33)
+ Instead of the cs_error() callback or the CS_CHECK() macro, please use
+ Linux-style checking of return values, and -- if necessary -- debug
+ messages using "dev_dbg()" or "pr_debug()".
+
+* New CIS tuple access (as of 2.6.33)
+ Instead of pcmcia_get_{first,next}_tuple(), pcmcia_get_tuple_data() and
+ pcmcia_parse_tuple(), a driver shall use "pcmcia_get_tuple()" if it is
+ only interested in one (raw) tuple, or "pcmcia_loop_tuple()" if it is
+ interested in all tuples of one type. To decode the MAC from CISTPL_FUNCE,
+ a new helper "pcmcia_get_mac_from_cis()" was added.
+
+* New configuration loop helper (as of 2.6.28)
+ By calling pcmcia_loop_config(), a driver can iterate over all available
+ configuration options. During a driver's probe() phase, one doesn't need
+ to use pcmcia_get_{first,next}_tuple, pcmcia_get_tuple_data and
+ pcmcia_parse_tuple directly in most if not all cases.
+
+* New release helper (as of 2.6.17)
+ Instead of calling pcmcia_release_{configuration,io,irq,win}, all that's
+ necessary now is calling pcmcia_disable_device. As there is no valid
+ reason left to call pcmcia_release_io and pcmcia_release_irq, the
+ exports for them were removed.
+
+* Unify detach and REMOVAL event code, as well as attach and INSERTION
+ code (as of 2.6.16)
+ void (*remove) (struct pcmcia_device *dev);
+ int (*probe) (struct pcmcia_device *dev);
+
+* Move suspend, resume and reset out of event handler (as of 2.6.16)
+ int (*suspend) (struct pcmcia_device *dev);
+ int (*resume) (struct pcmcia_device *dev);
+ should be initialized in struct pcmcia_driver, and handle
+ (SUSPEND == RESET_PHYSICAL) and (RESUME == CARD_RESET) events
+
+* event handler initialization in struct pcmcia_driver (as of 2.6.13)
+ The event handler is notified of all events, and must be initialized
+ as the event() callback in the driver's struct pcmcia_driver.
+
+* pcmcia/version.h should not be used (as of 2.6.13)
+ This file will be removed eventually.
+
+* in-kernel device<->driver matching (as of 2.6.13)
+ PCMCIA devices and their correct drivers can now be matched in
+ kernelspace. See 'devicetable.txt' for details.
+
+* Device model integration (as of 2.6.11)
+ A struct pcmcia_device is registered with the device model core,
+ and can be used (e.g. for SET_NETDEV_DEV) by using
+ handle_to_dev(client_handle_t * handle).
+
+* Convert internal I/O port addresses to unsigned int (as of 2.6.11)
+ ioaddr_t should be replaced by unsigned int in PCMCIA card drivers.
+
+* irq_mask and irq_list parameters (as of 2.6.11)
+ The irq_mask and irq_list parameters should no longer be used in
+ PCMCIA card drivers. Instead, it is the job of the PCMCIA core to
+ determine which IRQ should be used. Therefore, link->irq.IRQInfo2
+ is ignored.
+
+* client->PendingEvents is gone (as of 2.6.11)
+ client->PendingEvents is no longer available.
+
+* client->Attributes are gone (as of 2.6.11)
+ client->Attributes is unused, therefore it is removed from all
+ PCMCIA card drivers
+
+* core functions no longer available (as of 2.6.11)
+ The following functions have been removed from the kernel source
+ because they are unused by all in-kernel drivers, and no external
+ driver was reported to rely on them:
+ pcmcia_get_first_region()
+ pcmcia_get_next_region()
+ pcmcia_modify_window()
+ pcmcia_set_event_mask()
+ pcmcia_get_first_window()
+ pcmcia_get_next_window()
+
+* device list iteration upon module removal (as of 2.6.10)
+ It is no longer necessary to iterate on the driver's internal
+ client list and call the ->detach() function upon module removal.
+
+* Resource management. (as of 2.6.8)
+ Although the PCMCIA subsystem will allocate resources for cards,
+ it no longer marks these resources busy. This means that driver
+ authors are now responsible for claiming your resources as per
+ other drivers in Linux. You should use request_region() to mark
+ your IO regions in-use, and request_mem_region() to mark your
+ memory regions in-use. The name argument should be a pointer to
+ your driver name. Eg, for pcnet_cs, name should point to the
+ string "pcnet_cs".
+
+* CardServices is gone
+ CardServices() in 2.4 is just a big switch statement to call various
+ services. In 2.6, all of those entry points are exported and called
+ directly (except for pcmcia_report_error(), just use cs_error() instead).
+
+* struct pcmcia_driver
+ You need to use struct pcmcia_driver and pcmcia_{un,}register_driver
+ instead of {un,}register_pccard_driver
diff --git a/Documentation/pcmcia/driver.txt b/Documentation/pcmcia/driver.txt
new file mode 100644
index 000000000..0ac167920
--- /dev/null
+++ b/Documentation/pcmcia/driver.txt
@@ -0,0 +1,30 @@
+PCMCIA Driver
+-------------
+
+
+sysfs
+-----
+
+New PCMCIA IDs may be added to a device driver pcmcia_device_id table at
+runtime as shown below:
+
+echo "match_flags manf_id card_id func_id function device_no \
+prod_id_hash[0] prod_id_hash[1] prod_id_hash[2] prod_id_hash[3]" > \
+/sys/bus/pcmcia/drivers/{driver}/new_id
+
+All fields are passed in as hexadecimal values (no leading 0x).
+The meaning is described in the PCMCIA specification, the match_flags is
+a bitwise or-ed combination from PCMCIA_DEV_ID_MATCH_* constants
+defined in include/linux/mod_devicetable.h.
+
+Once added, the driver probe routine will be invoked for any unclaimed
+PCMCIA device listed in its (newly updated) pcmcia_device_id list.
+
+A common use-case is to add a new device according to the manufacturer ID
+and the card ID (form the manf_id and card_id file in the device tree).
+For this, just use:
+
+echo "0x3 manf_id card_id 0 0 0 0 0 0 0" > \
+ /sys/bus/pcmcia/drivers/{driver}/new_id
+
+after loading the driver.
diff --git a/Documentation/pcmcia/locking.txt b/Documentation/pcmcia/locking.txt
new file mode 100644
index 000000000..68f622bc4
--- /dev/null
+++ b/Documentation/pcmcia/locking.txt
@@ -0,0 +1,118 @@
+This file explains the locking and exclusion scheme used in the PCCARD
+and PCMCIA subsystems.
+
+
+A) Overview, Locking Hierarchy:
+===============================
+
+pcmcia_socket_list_rwsem - protects only the list of sockets
+- skt_mutex - serializes card insert / ejection
+ - ops_mutex - serializes socket operation
+
+
+B) Exclusion
+============
+
+The following functions and callbacks to struct pcmcia_socket must
+be called with "skt_mutex" held:
+
+ socket_detect_change()
+ send_event()
+ socket_reset()
+ socket_shutdown()
+ socket_setup()
+ socket_remove()
+ socket_insert()
+ socket_early_resume()
+ socket_late_resume()
+ socket_resume()
+ socket_suspend()
+
+ struct pcmcia_callback *callback
+
+The following functions and callbacks to struct pcmcia_socket must
+be called with "ops_mutex" held:
+
+ socket_reset()
+ socket_setup()
+
+ struct pccard_operations *ops
+ struct pccard_resource_ops *resource_ops;
+
+Note that send_event() and struct pcmcia_callback *callback must not be
+called with "ops_mutex" held.
+
+
+C) Protection
+=============
+
+1. Global Data:
+---------------
+struct list_head pcmcia_socket_list;
+
+protected by pcmcia_socket_list_rwsem;
+
+
+2. Per-Socket Data:
+-------------------
+The resource_ops and their data are protected by ops_mutex.
+
+The "main" struct pcmcia_socket is protected as follows (read-only fields
+or single-use fields not mentioned):
+
+- by pcmcia_socket_list_rwsem:
+ struct list_head socket_list;
+
+- by thread_lock:
+ unsigned int thread_events;
+
+- by skt_mutex:
+ u_int suspended_state;
+ void (*tune_bridge);
+ struct pcmcia_callback *callback;
+ int resume_status;
+
+- by ops_mutex:
+ socket_state_t socket;
+ u_int state;
+ u_short lock_count;
+ pccard_mem_map cis_mem;
+ void __iomem *cis_virt;
+ struct { } irq;
+ io_window_t io[];
+ pccard_mem_map win[];
+ struct list_head cis_cache;
+ size_t fake_cis_len;
+ u8 *fake_cis;
+ u_int irq_mask;
+ void (*zoom_video);
+ int (*power_hook);
+ u8 resource...;
+ struct list_head devices_list;
+ u8 device_count;
+ struct pcmcia_state;
+
+
+3. Per PCMCIA-device Data:
+--------------------------
+
+The "main" struct pcmcia_devie is protected as follows (read-only fields
+or single-use fields not mentioned):
+
+
+- by pcmcia_socket->ops_mutex:
+ struct list_head socket_device_list;
+ struct config_t *function_config;
+ u16 _irq:1;
+ u16 _io:1;
+ u16 _win:4;
+ u16 _locked:1;
+ u16 allow_func_id_match:1;
+ u16 suspended:1;
+ u16 _removed:1;
+
+- by the PCMCIA driver:
+ io_req_t io;
+ irq_req_t irq;
+ config_req_t conf;
+ window_handle_t win;
diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/percpu-rw-semaphore.txt
new file mode 100644
index 000000000..7d3c82431
--- /dev/null
+++ b/Documentation/percpu-rw-semaphore.txt
@@ -0,0 +1,27 @@
+Percpu rw semaphores
+--------------------
+
+Percpu rw semaphores is a new read-write semaphore design that is
+optimized for locking for reading.
+
+The problem with traditional read-write semaphores is that when multiple
+cores take the lock for reading, the cache line containing the semaphore
+is bouncing between L1 caches of the cores, causing performance
+degradation.
+
+Locking for reading is very fast, it uses RCU and it avoids any atomic
+instruction in the lock and unlock path. On the other hand, locking for
+writing is very expensive, it calls synchronize_rcu() that can take
+hundreds of milliseconds.
+
+The lock is declared with "struct percpu_rw_semaphore" type.
+The lock is initialized percpu_init_rwsem, it returns 0 on success and
+-ENOMEM on allocation failure.
+The lock must be freed with percpu_free_rwsem to avoid memory leak.
+
+The lock is locked for read with percpu_down_read, percpu_up_read and
+for write with percpu_down_write, percpu_up_write.
+
+The idea of using RCU for optimized rw-lock was introduced by
+Eric Dumazet <eric.dumazet@gmail.com>.
+The code was written by Mikulas Patocka <mpatocka@redhat.com>
diff --git a/Documentation/phy.txt b/Documentation/phy.txt
new file mode 100644
index 000000000..371361c69
--- /dev/null
+++ b/Documentation/phy.txt
@@ -0,0 +1,152 @@
+ PHY SUBSYSTEM
+ Kishon Vijay Abraham I <kishon@ti.com>
+
+This document explains the Generic PHY Framework along with the APIs provided,
+and how-to-use.
+
+1. Introduction
+
+*PHY* is the abbreviation for physical layer. It is used to connect a device
+to the physical medium e.g., the USB controller has a PHY to provide functions
+such as serialization, de-serialization, encoding, decoding and is responsible
+for obtaining the required data transmission rate. Note that some USB
+controllers have PHY functionality embedded into it and others use an external
+PHY. Other peripherals that use PHY include Wireless LAN, Ethernet,
+SATA etc.
+
+The intention of creating this framework is to bring the PHY drivers spread
+all over the Linux kernel to drivers/phy to increase code re-use and for
+better code maintainability.
+
+This framework will be of use only to devices that use external PHY (PHY
+functionality is not embedded within the controller).
+
+2. Registering/Unregistering the PHY provider
+
+PHY provider refers to an entity that implements one or more PHY instances.
+For the simple case where the PHY provider implements only a single instance of
+the PHY, the framework provides its own implementation of of_xlate in
+of_phy_simple_xlate. If the PHY provider implements multiple instances, it
+should provide its own implementation of of_xlate. of_xlate is used only for
+dt boot case.
+
+#define of_phy_provider_register(dev, xlate) \
+ __of_phy_provider_register((dev), THIS_MODULE, (xlate))
+
+#define devm_of_phy_provider_register(dev, xlate) \
+ __devm_of_phy_provider_register((dev), THIS_MODULE, (xlate))
+
+of_phy_provider_register and devm_of_phy_provider_register macros can be used to
+register the phy_provider and it takes device and of_xlate as
+arguments. For the dt boot case, all PHY providers should use one of the above
+2 macros to register the PHY provider.
+
+void devm_of_phy_provider_unregister(struct device *dev,
+ struct phy_provider *phy_provider);
+void of_phy_provider_unregister(struct phy_provider *phy_provider);
+
+devm_of_phy_provider_unregister and of_phy_provider_unregister can be used to
+unregister the PHY.
+
+3. Creating the PHY
+
+The PHY driver should create the PHY in order for other peripheral controllers
+to make use of it. The PHY framework provides 2 APIs to create the PHY.
+
+struct phy *phy_create(struct device *dev, struct device_node *node,
+ const struct phy_ops *ops);
+struct phy *devm_phy_create(struct device *dev, struct device_node *node,
+ const struct phy_ops *ops);
+
+The PHY drivers can use one of the above 2 APIs to create the PHY by passing
+the device pointer and phy ops.
+phy_ops is a set of function pointers for performing PHY operations such as
+init, exit, power_on and power_off.
+
+Inorder to dereference the private data (in phy_ops), the phy provider driver
+can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in
+phy_ops to get back the private data.
+
+4. Getting a reference to the PHY
+
+Before the controller can make use of the PHY, it has to get a reference to
+it. This framework provides the following APIs to get a reference to the PHY.
+
+struct phy *phy_get(struct device *dev, const char *string);
+struct phy *phy_optional_get(struct device *dev, const char *string);
+struct phy *devm_phy_get(struct device *dev, const char *string);
+struct phy *devm_phy_optional_get(struct device *dev, const char *string);
+
+phy_get, phy_optional_get, devm_phy_get and devm_phy_optional_get can
+be used to get the PHY. In the case of dt boot, the string arguments
+should contain the phy name as given in the dt data and in the case of
+non-dt boot, it should contain the label of the PHY. The two
+devm_phy_get associates the device with the PHY using devres on
+successful PHY get. On driver detach, release function is invoked on
+the the devres data and devres data is freed. phy_optional_get and
+devm_phy_optional_get should be used when the phy is optional. These
+two functions will never return -ENODEV, but instead returns NULL when
+the phy cannot be found.
+
+It should be noted that NULL is a valid phy reference. All phy
+consumer calls on the NULL phy become NOPs. That is the release calls,
+the phy_init() and phy_exit() calls, and phy_power_on() and
+phy_power_off() calls are all NOP when applied to a NULL phy. The NULL
+phy is useful in devices for handling optional phy devices.
+
+5. Releasing a reference to the PHY
+
+When the controller no longer needs the PHY, it has to release the reference
+to the PHY it has obtained using the APIs mentioned in the above section. The
+PHY framework provides 2 APIs to release a reference to the PHY.
+
+void phy_put(struct phy *phy);
+void devm_phy_put(struct device *dev, struct phy *phy);
+
+Both these APIs are used to release a reference to the PHY and devm_phy_put
+destroys the devres associated with this PHY.
+
+6. Destroying the PHY
+
+When the driver that created the PHY is unloaded, it should destroy the PHY it
+created using one of the following 2 APIs.
+
+void phy_destroy(struct phy *phy);
+void devm_phy_destroy(struct device *dev, struct phy *phy);
+
+Both these APIs destroy the PHY and devm_phy_destroy destroys the devres
+associated with this PHY.
+
+7. PM Runtime
+
+This subsystem is pm runtime enabled. So while creating the PHY,
+pm_runtime_enable of the phy device created by this subsystem is called and
+while destroying the PHY, pm_runtime_disable is called. Note that the phy
+device created by this subsystem will be a child of the device that calls
+phy_create (PHY provider device).
+
+So pm_runtime_get_sync of the phy_device created by this subsystem will invoke
+pm_runtime_get_sync of PHY provider device because of parent-child relationship.
+It should also be noted that phy_power_on and phy_power_off performs
+phy_pm_runtime_get_sync and phy_pm_runtime_put respectively.
+There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
+phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
+phy_pm_runtime_forbid for performing PM operations.
+
+8. PHY Mappings
+
+In order to get reference to a PHY without help from DeviceTree, the framework
+offers lookups which can be compared to clkdev that allow clk structures to be
+bound to devices. A lookup can be made be made during runtime when a handle to
+the struct phy already exists.
+
+The framework offers the following API for registering and unregistering the
+lookups.
+
+int phy_create_lookup(struct phy *phy, const char *con_id, const char *dev_id);
+void phy_remove_lookup(struct phy *phy, const char *con_id, const char *dev_id);
+
+9. DeviceTree Binding
+
+The documentation for PHY dt binding can be found @
+Documentation/devicetree/bindings/phy/phy-bindings.txt
diff --git a/Documentation/phy/samsung-usb2.txt b/Documentation/phy/samsung-usb2.txt
new file mode 100644
index 000000000..ed12d4371
--- /dev/null
+++ b/Documentation/phy/samsung-usb2.txt
@@ -0,0 +1,135 @@
+.------------------------------------------------------------------------------+
+| Samsung USB 2.0 PHY adaptation layer |
++-----------------------------------------------------------------------------+'
+
+| 1. Description
++----------------
+
+The architecture of the USB 2.0 PHY module in Samsung SoCs is similar
+among many SoCs. In spite of the similarities it proved difficult to
+create a one driver that would fit all these PHY controllers. Often
+the differences were minor and were found in particular bits of the
+registers of the PHY. In some rare cases the order of register writes or
+the PHY powering up process had to be altered. This adaptation layer is
+a compromise between having separate drivers and having a single driver
+with added support for many special cases.
+
+| 2. Files description
++----------------------
+
+- phy-samsung-usb2.c
+ This is the main file of the adaptation layer. This file contains
+ the probe function and provides two callbacks to the Generic PHY
+ Framework. This two callbacks are used to power on and power off the
+ phy. They carry out the common work that has to be done on all version
+ of the PHY module. Depending on which SoC was chosen they execute SoC
+ specific callbacks. The specific SoC version is selected by choosing
+ the appropriate compatible string. In addition, this file contains
+ struct of_device_id definitions for particular SoCs.
+
+- phy-samsung-usb2.h
+ This is the include file. It declares the structures used by this
+ driver. In addition it should contain extern declarations for
+ structures that describe particular SoCs.
+
+| 3. Supporting SoCs
++--------------------
+
+To support a new SoC a new file should be added to the drivers/phy
+directory. Each SoC's configuration is stored in an instance of the
+struct samsung_usb2_phy_config.
+
+struct samsung_usb2_phy_config {
+ const struct samsung_usb2_common_phy *phys;
+ int (*rate_to_clk)(unsigned long, u32 *);
+ unsigned int num_phys;
+ bool has_mode_switch;
+};
+
+The num_phys is the number of phys handled by the driver. *phys is an
+array that contains the configuration for each phy. The has_mode_switch
+property is a boolean flag that determines whether the SoC has USB host
+and device on a single pair of pins. If so, a special register has to
+be modified to change the internal routing of these pins between a USB
+device or host module.
+
+For example the configuration for Exynos 4210 is following:
+
+const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
+ .has_mode_switch = 0,
+ .num_phys = EXYNOS4210_NUM_PHYS,
+ .phys = exynos4210_phys,
+ .rate_to_clk = exynos4210_rate_to_clk,
+}
+
+- int (*rate_to_clk)(unsigned long, u32 *)
+ The rate_to_clk callback is to convert the rate of the clock
+ used as the reference clock for the PHY module to the value
+ that should be written in the hardware register.
+
+The exynos4210_phys configuration array is as follows:
+
+static const struct samsung_usb2_common_phy exynos4210_phys[] = {
+ {
+ .label = "device",
+ .id = EXYNOS4210_DEVICE,
+ .power_on = exynos4210_power_on,
+ .power_off = exynos4210_power_off,
+ },
+ {
+ .label = "host",
+ .id = EXYNOS4210_HOST,
+ .power_on = exynos4210_power_on,
+ .power_off = exynos4210_power_off,
+ },
+ {
+ .label = "hsic0",
+ .id = EXYNOS4210_HSIC0,
+ .power_on = exynos4210_power_on,
+ .power_off = exynos4210_power_off,
+ },
+ {
+ .label = "hsic1",
+ .id = EXYNOS4210_HSIC1,
+ .power_on = exynos4210_power_on,
+ .power_off = exynos4210_power_off,
+ },
+ {},
+};
+
+- int (*power_on)(struct samsung_usb2_phy_instance *);
+- int (*power_off)(struct samsung_usb2_phy_instance *);
+ These two callbacks are used to power on and power off the phy
+ by modifying appropriate registers.
+
+Final change to the driver is adding appropriate compatible value to the
+phy-samsung-usb2.c file. In case of Exynos 4210 the following lines were
+added to the struct of_device_id samsung_usb2_phy_of_match[] array:
+
+#ifdef CONFIG_PHY_EXYNOS4210_USB2
+ {
+ .compatible = "samsung,exynos4210-usb2-phy",
+ .data = &exynos4210_usb2_phy_config,
+ },
+#endif
+
+To add further flexibility to the driver the Kconfig file enables to
+include support for selected SoCs in the compiled driver. The Kconfig
+entry for Exynos 4210 is following:
+
+config PHY_EXYNOS4210_USB2
+ bool "Support for Exynos 4210"
+ depends on PHY_SAMSUNG_USB2
+ depends on CPU_EXYNOS4210
+ help
+ Enable USB PHY support for Exynos 4210. This option requires that
+ Samsung USB 2.0 PHY driver is enabled and means that support for this
+ particular SoC is compiled in the driver. In case of Exynos 4210 four
+ phys are available - device, host, HSCI0 and HSCI1.
+
+The newly created file that supports the new SoC has to be also added to the
+Makefile. In case of Exynos 4210 the added line is following:
+
+obj-$(CONFIG_PHY_EXYNOS4210_USB2) += phy-exynos4210-usb2.o
+
+After completing these steps the support for the new SoC should be ready.
diff --git a/Documentation/pi-futex.txt b/Documentation/pi-futex.txt
new file mode 100644
index 000000000..9a5bc8651
--- /dev/null
+++ b/Documentation/pi-futex.txt
@@ -0,0 +1,121 @@
+Lightweight PI-futexes
+----------------------
+
+We are calling them lightweight for 3 reasons:
+
+ - in the user-space fastpath a PI-enabled futex involves no kernel work
+ (or any other PI complexity) at all. No registration, no extra kernel
+ calls - just pure fast atomic ops in userspace.
+
+ - even in the slowpath, the system call and scheduling pattern is very
+ similar to normal futexes.
+
+ - the in-kernel PI implementation is streamlined around the mutex
+ abstraction, with strict rules that keep the implementation
+ relatively simple: only a single owner may own a lock (i.e. no
+ read-write lock support), only the owner may unlock a lock, no
+ recursive locking, etc.
+
+Priority Inheritance - why?
+---------------------------
+
+The short reply: user-space PI helps achieving/improving determinism for
+user-space applications. In the best-case, it can help achieve
+determinism and well-bound latencies. Even in the worst-case, PI will
+improve the statistical distribution of locking related application
+delays.
+
+The longer reply:
+-----------------
+
+Firstly, sharing locks between multiple tasks is a common programming
+technique that often cannot be replaced with lockless algorithms. As we
+can see it in the kernel [which is a quite complex program in itself],
+lockless structures are rather the exception than the norm - the current
+ratio of lockless vs. locky code for shared data structures is somewhere
+between 1:10 and 1:100. Lockless is hard, and the complexity of lockless
+algorithms often endangers to ability to do robust reviews of said code.
+I.e. critical RT apps often choose lock structures to protect critical
+data structures, instead of lockless algorithms. Furthermore, there are
+cases (like shared hardware, or other resource limits) where lockless
+access is mathematically impossible.
+
+Media players (such as Jack) are an example of reasonable application
+design with multiple tasks (with multiple priority levels) sharing
+short-held locks: for example, a highprio audio playback thread is
+combined with medium-prio construct-audio-data threads and low-prio
+display-colory-stuff threads. Add video and decoding to the mix and
+we've got even more priority levels.
+
+So once we accept that synchronization objects (locks) are an
+unavoidable fact of life, and once we accept that multi-task userspace
+apps have a very fair expectation of being able to use locks, we've got
+to think about how to offer the option of a deterministic locking
+implementation to user-space.
+
+Most of the technical counter-arguments against doing priority
+inheritance only apply to kernel-space locks. But user-space locks are
+different, there we cannot disable interrupts or make the task
+non-preemptible in a critical section, so the 'use spinlocks' argument
+does not apply (user-space spinlocks have the same priority inversion
+problems as other user-space locking constructs). Fact is, pretty much
+the only technique that currently enables good determinism for userspace
+locks (such as futex-based pthread mutexes) is priority inheritance:
+
+Currently (without PI), if a high-prio and a low-prio task shares a lock
+[this is a quite common scenario for most non-trivial RT applications],
+even if all critical sections are coded carefully to be deterministic
+(i.e. all critical sections are short in duration and only execute a
+limited number of instructions), the kernel cannot guarantee any
+deterministic execution of the high-prio task: any medium-priority task
+could preempt the low-prio task while it holds the shared lock and
+executes the critical section, and could delay it indefinitely.
+
+Implementation:
+---------------
+
+As mentioned before, the userspace fastpath of PI-enabled pthread
+mutexes involves no kernel work at all - they behave quite similarly to
+normal futex-based locks: a 0 value means unlocked, and a value==TID
+means locked. (This is the same method as used by list-based robust
+futexes.) Userspace uses atomic ops to lock/unlock these mutexes without
+entering the kernel.
+
+To handle the slowpath, we have added two new futex ops:
+
+ FUTEX_LOCK_PI
+ FUTEX_UNLOCK_PI
+
+If the lock-acquire fastpath fails, [i.e. an atomic transition from 0 to
+TID fails], then FUTEX_LOCK_PI is called. The kernel does all the
+remaining work: if there is no futex-queue attached to the futex address
+yet then the code looks up the task that owns the futex [it has put its
+own TID into the futex value], and attaches a 'PI state' structure to
+the futex-queue. The pi_state includes an rt-mutex, which is a PI-aware,
+kernel-based synchronization object. The 'other' task is made the owner
+of the rt-mutex, and the FUTEX_WAITERS bit is atomically set in the
+futex value. Then this task tries to lock the rt-mutex, on which it
+blocks. Once it returns, it has the mutex acquired, and it sets the
+futex value to its own TID and returns. Userspace has no other work to
+perform - it now owns the lock, and futex value contains
+FUTEX_WAITERS|TID.
+
+If the unlock side fastpath succeeds, [i.e. userspace manages to do a
+TID -> 0 atomic transition of the futex value], then no kernel work is
+triggered.
+
+If the unlock fastpath fails (because the FUTEX_WAITERS bit is set),
+then FUTEX_UNLOCK_PI is called, and the kernel unlocks the futex on the
+behalf of userspace - and it also unlocks the attached
+pi_state->rt_mutex and thus wakes up any potential waiters.
+
+Note that under this approach, contrary to previous PI-futex approaches,
+there is no prior 'registration' of a PI-futex. [which is not quite
+possible anyway, due to existing ABI properties of pthread mutexes.]
+
+Also, under this scheme, 'robustness' and 'PI' are two orthogonal
+properties of futexes, and all four combinations are possible: futex,
+robust-futex, PI-futex, robust+PI-futex.
+
+More details about priority inheritance can be found in
+Documentation/rt-mutex.txt.
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
new file mode 100644
index 000000000..a9b47163b
--- /dev/null
+++ b/Documentation/pinctrl.txt
@@ -0,0 +1,1408 @@
+PINCTRL (PIN CONTROL) subsystem
+This document outlines the pin control subsystem in Linux
+
+This subsystem deals with:
+
+- Enumerating and naming controllable pins
+
+- Multiplexing of pins, pads, fingers (etc) see below for details
+
+- Configuration of pins, pads, fingers (etc), such as software-controlled
+ biasing and driving mode specific pins, such as pull-up/down, open drain,
+ load capacitance etc.
+
+Top-level interface
+===================
+
+Definition of PIN CONTROLLER:
+
+- A pin controller is a piece of hardware, usually a set of registers, that
+ can control PINs. It may be able to multiplex, bias, set load capacitance,
+ set drive strength, etc. for individual pins or groups of pins.
+
+Definition of PIN:
+
+- PINS are equal to pads, fingers, balls or whatever packaging input or
+ output line you want to control and these are denoted by unsigned integers
+ in the range 0..maxpin. This numberspace is local to each PIN CONTROLLER, so
+ there may be several such number spaces in a system. This pin space may
+ be sparse - i.e. there may be gaps in the space with numbers where no
+ pin exists.
+
+When a PIN CONTROLLER is instantiated, it will register a descriptor to the
+pin control framework, and this descriptor contains an array of pin descriptors
+describing the pins handled by this specific pin controller.
+
+Here is an example of a PGA (Pin Grid Array) chip seen from underneath:
+
+ A B C D E F G H
+
+ 8 o o o o o o o o
+
+ 7 o o o o o o o o
+
+ 6 o o o o o o o o
+
+ 5 o o o o o o o o
+
+ 4 o o o o o o o o
+
+ 3 o o o o o o o o
+
+ 2 o o o o o o o o
+
+ 1 o o o o o o o o
+
+To register a pin controller and name all the pins on this package we can do
+this in our driver:
+
+#include <linux/pinctrl/pinctrl.h>
+
+const struct pinctrl_pin_desc foo_pins[] = {
+ PINCTRL_PIN(0, "A8"),
+ PINCTRL_PIN(1, "B8"),
+ PINCTRL_PIN(2, "C8"),
+ ...
+ PINCTRL_PIN(61, "F1"),
+ PINCTRL_PIN(62, "G1"),
+ PINCTRL_PIN(63, "H1"),
+};
+
+static struct pinctrl_desc foo_desc = {
+ .name = "foo",
+ .pins = foo_pins,
+ .npins = ARRAY_SIZE(foo_pins),
+ .owner = THIS_MODULE,
+};
+
+int __init foo_probe(void)
+{
+ struct pinctrl_dev *pctl;
+
+ pctl = pinctrl_register(&foo_desc, <PARENT>, NULL);
+ if (!pctl)
+ pr_err("could not register foo pin driver\n");
+}
+
+To enable the pinctrl subsystem and the subgroups for PINMUX and PINCONF and
+selected drivers, you need to select them from your machine's Kconfig entry,
+since these are so tightly integrated with the machines they are used on.
+See for example arch/arm/mach-u300/Kconfig for an example.
+
+Pins usually have fancier names than this. You can find these in the datasheet
+for your chip. Notice that the core pinctrl.h file provides a fancy macro
+called PINCTRL_PIN() to create the struct entries. As you can see I enumerated
+the pins from 0 in the upper left corner to 63 in the lower right corner.
+This enumeration was arbitrarily chosen, in practice you need to think
+through your numbering system so that it matches the layout of registers
+and such things in your driver, or the code may become complicated. You must
+also consider matching of offsets to the GPIO ranges that may be handled by
+the pin controller.
+
+For a padring with 467 pads, as opposed to actual pins, I used an enumeration
+like this, walking around the edge of the chip, which seems to be industry
+standard too (all these pads had names, too):
+
+
+ 0 ..... 104
+ 466 105
+ . .
+ . .
+ 358 224
+ 357 .... 225
+
+
+Pin groups
+==========
+
+Many controllers need to deal with groups of pins, so the pin controller
+subsystem has a mechanism for enumerating groups of pins and retrieving the
+actual enumerated pins that are part of a certain group.
+
+For example, say that we have a group of pins dealing with an SPI interface
+on { 0, 8, 16, 24 }, and a group of pins dealing with an I2C interface on pins
+on { 24, 25 }.
+
+These two groups are presented to the pin control subsystem by implementing
+some generic pinctrl_ops like this:
+
+#include <linux/pinctrl/pinctrl.h>
+
+struct foo_group {
+ const char *name;
+ const unsigned int *pins;
+ const unsigned num_pins;
+};
+
+static const unsigned int spi0_pins[] = { 0, 8, 16, 24 };
+static const unsigned int i2c0_pins[] = { 24, 25 };
+
+static const struct foo_group foo_groups[] = {
+ {
+ .name = "spi0_grp",
+ .pins = spi0_pins,
+ .num_pins = ARRAY_SIZE(spi0_pins),
+ },
+ {
+ .name = "i2c0_grp",
+ .pins = i2c0_pins,
+ .num_pins = ARRAY_SIZE(i2c0_pins),
+ },
+};
+
+
+static int foo_get_groups_count(struct pinctrl_dev *pctldev)
+{
+ return ARRAY_SIZE(foo_groups);
+}
+
+static const char *foo_get_group_name(struct pinctrl_dev *pctldev,
+ unsigned selector)
+{
+ return foo_groups[selector].name;
+}
+
+static int foo_get_group_pins(struct pinctrl_dev *pctldev, unsigned selector,
+ const unsigned **pins,
+ unsigned *num_pins)
+{
+ *pins = (unsigned *) foo_groups[selector].pins;
+ *num_pins = foo_groups[selector].num_pins;
+ return 0;
+}
+
+static struct pinctrl_ops foo_pctrl_ops = {
+ .get_groups_count = foo_get_groups_count,
+ .get_group_name = foo_get_group_name,
+ .get_group_pins = foo_get_group_pins,
+};
+
+
+static struct pinctrl_desc foo_desc = {
+ ...
+ .pctlops = &foo_pctrl_ops,
+};
+
+The pin control subsystem will call the .get_groups_count() function to
+determine the total number of legal selectors, then it will call the other functions
+to retrieve the name and pins of the group. Maintaining the data structure of
+the groups is up to the driver, this is just a simple example - in practice you
+may need more entries in your group structure, for example specific register
+ranges associated with each group and so on.
+
+
+Pin configuration
+=================
+
+Pins can sometimes be software-configured in various ways, mostly related
+to their electronic properties when used as inputs or outputs. For example you
+may be able to make an output pin high impedance, or "tristate" meaning it is
+effectively disconnected. You may be able to connect an input pin to VDD or GND
+using a certain resistor value - pull up and pull down - so that the pin has a
+stable value when nothing is driving the rail it is connected to, or when it's
+unconnected.
+
+Pin configuration can be programmed by adding configuration entries into the
+mapping table; see section "Board/machine configuration" below.
+
+The format and meaning of the configuration parameter, PLATFORM_X_PULL_UP
+above, is entirely defined by the pin controller driver.
+
+The pin configuration driver implements callbacks for changing pin
+configuration in the pin controller ops like this:
+
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinconf.h>
+#include "platform_x_pindefs.h"
+
+static int foo_pin_config_get(struct pinctrl_dev *pctldev,
+ unsigned offset,
+ unsigned long *config)
+{
+ struct my_conftype conf;
+
+ ... Find setting for pin @ offset ...
+
+ *config = (unsigned long) conf;
+}
+
+static int foo_pin_config_set(struct pinctrl_dev *pctldev,
+ unsigned offset,
+ unsigned long config)
+{
+ struct my_conftype *conf = (struct my_conftype *) config;
+
+ switch (conf) {
+ case PLATFORM_X_PULL_UP:
+ ...
+ }
+ }
+}
+
+static int foo_pin_config_group_get (struct pinctrl_dev *pctldev,
+ unsigned selector,
+ unsigned long *config)
+{
+ ...
+}
+
+static int foo_pin_config_group_set (struct pinctrl_dev *pctldev,
+ unsigned selector,
+ unsigned long config)
+{
+ ...
+}
+
+static struct pinconf_ops foo_pconf_ops = {
+ .pin_config_get = foo_pin_config_get,
+ .pin_config_set = foo_pin_config_set,
+ .pin_config_group_get = foo_pin_config_group_get,
+ .pin_config_group_set = foo_pin_config_group_set,
+};
+
+/* Pin config operations are handled by some pin controller */
+static struct pinctrl_desc foo_desc = {
+ ...
+ .confops = &foo_pconf_ops,
+};
+
+Since some controllers have special logic for handling entire groups of pins
+they can exploit the special whole-group pin control function. The
+pin_config_group_set() callback is allowed to return the error code -EAGAIN,
+for groups it does not want to handle, or if it just wants to do some
+group-level handling and then fall through to iterate over all pins, in which
+case each individual pin will be treated by separate pin_config_set() calls as
+well.
+
+
+Interaction with the GPIO subsystem
+===================================
+
+The GPIO drivers may want to perform operations of various types on the same
+physical pins that are also registered as pin controller pins.
+
+First and foremost, the two subsystems can be used as completely orthogonal,
+see the section named "pin control requests from drivers" and
+"drivers needing both pin control and GPIOs" below for details. But in some
+situations a cross-subsystem mapping between pins and GPIOs is needed.
+
+Since the pin controller subsystem have its pinspace local to the pin
+controller we need a mapping so that the pin control subsystem can figure out
+which pin controller handles control of a certain GPIO pin. Since a single
+pin controller may be muxing several GPIO ranges (typically SoCs that have
+one set of pins, but internally several GPIO silicon blocks, each modelled as
+a struct gpio_chip) any number of GPIO ranges can be added to a pin controller
+instance like this:
+
+struct gpio_chip chip_a;
+struct gpio_chip chip_b;
+
+static struct pinctrl_gpio_range gpio_range_a = {
+ .name = "chip a",
+ .id = 0,
+ .base = 32,
+ .pin_base = 32,
+ .npins = 16,
+ .gc = &chip_a;
+};
+
+static struct pinctrl_gpio_range gpio_range_b = {
+ .name = "chip b",
+ .id = 0,
+ .base = 48,
+ .pin_base = 64,
+ .npins = 8,
+ .gc = &chip_b;
+};
+
+{
+ struct pinctrl_dev *pctl;
+ ...
+ pinctrl_add_gpio_range(pctl, &gpio_range_a);
+ pinctrl_add_gpio_range(pctl, &gpio_range_b);
+}
+
+So this complex system has one pin controller handling two different
+GPIO chips. "chip a" has 16 pins and "chip b" has 8 pins. The "chip a" and
+"chip b" have different .pin_base, which means a start pin number of the
+GPIO range.
+
+The GPIO range of "chip a" starts from the GPIO base of 32 and actual
+pin range also starts from 32. However "chip b" has different starting
+offset for the GPIO range and pin range. The GPIO range of "chip b" starts
+from GPIO number 48, while the pin range of "chip b" starts from 64.
+
+We can convert a gpio number to actual pin number using this "pin_base".
+They are mapped in the global GPIO pin space at:
+
+chip a:
+ - GPIO range : [32 .. 47]
+ - pin range : [32 .. 47]
+chip b:
+ - GPIO range : [48 .. 55]
+ - pin range : [64 .. 71]
+
+The above examples assume the mapping between the GPIOs and pins is
+linear. If the mapping is sparse or haphazard, an array of arbitrary pin
+numbers can be encoded in the range like this:
+
+static const unsigned range_pins[] = { 14, 1, 22, 17, 10, 8, 6, 2 };
+
+static struct pinctrl_gpio_range gpio_range = {
+ .name = "chip",
+ .id = 0,
+ .base = 32,
+ .pins = &range_pins,
+ .npins = ARRAY_SIZE(range_pins),
+ .gc = &chip;
+};
+
+In this case the pin_base property will be ignored. If the name of a pin
+group is known, the pins and npins elements of the above structure can be
+initialised using the function pinctrl_get_group_pins(), e.g. for pin
+group "foo":
+
+pinctrl_get_group_pins(pctl, "foo", &gpio_range.pins, &gpio_range.npins);
+
+When GPIO-specific functions in the pin control subsystem are called, these
+ranges will be used to look up the appropriate pin controller by inspecting
+and matching the pin to the pin ranges across all controllers. When a
+pin controller handling the matching range is found, GPIO-specific functions
+will be called on that specific pin controller.
+
+For all functionalities dealing with pin biasing, pin muxing etc, the pin
+controller subsystem will look up the corresponding pin number from the passed
+in gpio number, and use the range's internals to retrieve a pin number. After
+that, the subsystem passes it on to the pin control driver, so the driver
+will get a pin number into its handled number range. Further it is also passed
+the range ID value, so that the pin controller knows which range it should
+deal with.
+
+Calling pinctrl_add_gpio_range from pinctrl driver is DEPRECATED. Please see
+section 2.1 of Documentation/devicetree/bindings/gpio/gpio.txt on how to bind
+pinctrl and gpio drivers.
+
+
+PINMUX interfaces
+=================
+
+These calls use the pinmux_* naming prefix. No other calls should use that
+prefix.
+
+
+What is pinmuxing?
+==================
+
+PINMUX, also known as padmux, ballmux, alternate functions or mission modes
+is a way for chip vendors producing some kind of electrical packages to use
+a certain physical pin (ball, pad, finger, etc) for multiple mutually exclusive
+functions, depending on the application. By "application" in this context
+we usually mean a way of soldering or wiring the package into an electronic
+system, even though the framework makes it possible to also change the function
+at runtime.
+
+Here is an example of a PGA (Pin Grid Array) chip seen from underneath:
+
+ A B C D E F G H
+ +---+
+ 8 | o | o o o o o o o
+ | |
+ 7 | o | o o o o o o o
+ | |
+ 6 | o | o o o o o o o
+ +---+---+
+ 5 | o | o | o o o o o o
+ +---+---+ +---+
+ 4 o o o o o o | o | o
+ | |
+ 3 o o o o o o | o | o
+ | |
+ 2 o o o o o o | o | o
+ +-------+-------+-------+---+---+
+ 1 | o o | o o | o o | o | o |
+ +-------+-------+-------+---+---+
+
+This is not tetris. The game to think of is chess. Not all PGA/BGA packages
+are chessboard-like, big ones have "holes" in some arrangement according to
+different design patterns, but we're using this as a simple example. Of the
+pins you see some will be taken by things like a few VCC and GND to feed power
+to the chip, and quite a few will be taken by large ports like an external
+memory interface. The remaining pins will often be subject to pin multiplexing.
+
+The example 8x8 PGA package above will have pin numbers 0 through 63 assigned
+to its physical pins. It will name the pins { A1, A2, A3 ... H6, H7, H8 } using
+pinctrl_register_pins() and a suitable data set as shown earlier.
+
+In this 8x8 BGA package the pins { A8, A7, A6, A5 } can be used as an SPI port
+(these are four pins: CLK, RXD, TXD, FRM). In that case, pin B5 can be used as
+some general-purpose GPIO pin. However, in another setting, pins { A5, B5 } can
+be used as an I2C port (these are just two pins: SCL, SDA). Needless to say,
+we cannot use the SPI port and I2C port at the same time. However in the inside
+of the package the silicon performing the SPI logic can alternatively be routed
+out on pins { G4, G3, G2, G1 }.
+
+On the bottom row at { A1, B1, C1, D1, E1, F1, G1, H1 } we have something
+special - it's an external MMC bus that can be 2, 4 or 8 bits wide, and it will
+consume 2, 4 or 8 pins respectively, so either { A1, B1 } are taken or
+{ A1, B1, C1, D1 } or all of them. If we use all 8 bits, we cannot use the SPI
+port on pins { G4, G3, G2, G1 } of course.
+
+This way the silicon blocks present inside the chip can be multiplexed "muxed"
+out on different pin ranges. Often contemporary SoC (systems on chip) will
+contain several I2C, SPI, SDIO/MMC, etc silicon blocks that can be routed to
+different pins by pinmux settings.
+
+Since general-purpose I/O pins (GPIO) are typically always in shortage, it is
+common to be able to use almost any pin as a GPIO pin if it is not currently
+in use by some other I/O port.
+
+
+Pinmux conventions
+==================
+
+The purpose of the pinmux functionality in the pin controller subsystem is to
+abstract and provide pinmux settings to the devices you choose to instantiate
+in your machine configuration. It is inspired by the clk, GPIO and regulator
+subsystems, so devices will request their mux setting, but it's also possible
+to request a single pin for e.g. GPIO.
+
+Definitions:
+
+- FUNCTIONS can be switched in and out by a driver residing with the pin
+ control subsystem in the drivers/pinctrl/* directory of the kernel. The
+ pin control driver knows the possible functions. In the example above you can
+ identify three pinmux functions, one for spi, one for i2c and one for mmc.
+
+- FUNCTIONS are assumed to be enumerable from zero in a one-dimensional array.
+ In this case the array could be something like: { spi0, i2c0, mmc0 }
+ for the three available functions.
+
+- FUNCTIONS have PIN GROUPS as defined on the generic level - so a certain
+ function is *always* associated with a certain set of pin groups, could
+ be just a single one, but could also be many. In the example above the
+ function i2c is associated with the pins { A5, B5 }, enumerated as
+ { 24, 25 } in the controller pin space.
+
+ The Function spi is associated with pin groups { A8, A7, A6, A5 }
+ and { G4, G3, G2, G1 }, which are enumerated as { 0, 8, 16, 24 } and
+ { 38, 46, 54, 62 } respectively.
+
+ Group names must be unique per pin controller, no two groups on the same
+ controller may have the same name.
+
+- The combination of a FUNCTION and a PIN GROUP determine a certain function
+ for a certain set of pins. The knowledge of the functions and pin groups
+ and their machine-specific particulars are kept inside the pinmux driver,
+ from the outside only the enumerators are known, and the driver core can:
+
+ - Request the name of a function with a certain selector (>= 0)
+ - A list of groups associated with a certain function
+ - Request that a certain group in that list to be activated for a certain
+ function
+
+ As already described above, pin groups are in turn self-descriptive, so
+ the core will retrieve the actual pin range in a certain group from the
+ driver.
+
+- FUNCTIONS and GROUPS on a certain PIN CONTROLLER are MAPPED to a certain
+ device by the board file, device tree or similar machine setup configuration
+ mechanism, similar to how regulators are connected to devices, usually by
+ name. Defining a pin controller, function and group thus uniquely identify
+ the set of pins to be used by a certain device. (If only one possible group
+ of pins is available for the function, no group name need to be supplied -
+ the core will simply select the first and only group available.)
+
+ In the example case we can define that this particular machine shall
+ use device spi0 with pinmux function fspi0 group gspi0 and i2c0 on function
+ fi2c0 group gi2c0, on the primary pin controller, we get mappings
+ like these:
+
+ {
+ {"map-spi0", spi0, pinctrl0, fspi0, gspi0},
+ {"map-i2c0", i2c0, pinctrl0, fi2c0, gi2c0}
+ }
+
+ Every map must be assigned a state name, pin controller, device and
+ function. The group is not compulsory - if it is omitted the first group
+ presented by the driver as applicable for the function will be selected,
+ which is useful for simple cases.
+
+ It is possible to map several groups to the same combination of device,
+ pin controller and function. This is for cases where a certain function on
+ a certain pin controller may use different sets of pins in different
+ configurations.
+
+- PINS for a certain FUNCTION using a certain PIN GROUP on a certain
+ PIN CONTROLLER are provided on a first-come first-serve basis, so if some
+ other device mux setting or GPIO pin request has already taken your physical
+ pin, you will be denied the use of it. To get (activate) a new setting, the
+ old one has to be put (deactivated) first.
+
+Sometimes the documentation and hardware registers will be oriented around
+pads (or "fingers") rather than pins - these are the soldering surfaces on the
+silicon inside the package, and may or may not match the actual number of
+pins/balls underneath the capsule. Pick some enumeration that makes sense to
+you. Define enumerators only for the pins you can control if that makes sense.
+
+Assumptions:
+
+We assume that the number of possible function maps to pin groups is limited by
+the hardware. I.e. we assume that there is no system where any function can be
+mapped to any pin, like in a phone exchange. So the available pin groups for
+a certain function will be limited to a few choices (say up to eight or so),
+not hundreds or any amount of choices. This is the characteristic we have found
+by inspecting available pinmux hardware, and a necessary assumption since we
+expect pinmux drivers to present *all* possible function vs pin group mappings
+to the subsystem.
+
+
+Pinmux drivers
+==============
+
+The pinmux core takes care of preventing conflicts on pins and calling
+the pin controller driver to execute different settings.
+
+It is the responsibility of the pinmux driver to impose further restrictions
+(say for example infer electronic limitations due to load, etc.) to determine
+whether or not the requested function can actually be allowed, and in case it
+is possible to perform the requested mux setting, poke the hardware so that
+this happens.
+
+Pinmux drivers are required to supply a few callback functions, some are
+optional. Usually the set_mux() function is implemented, writing values into
+some certain registers to activate a certain mux setting for a certain pin.
+
+A simple driver for the above example will work by setting bits 0, 1, 2, 3 or 4
+into some register named MUX to select a certain function with a certain
+group of pins would work something like this:
+
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+
+struct foo_group {
+ const char *name;
+ const unsigned int *pins;
+ const unsigned num_pins;
+};
+
+static const unsigned spi0_0_pins[] = { 0, 8, 16, 24 };
+static const unsigned spi0_1_pins[] = { 38, 46, 54, 62 };
+static const unsigned i2c0_pins[] = { 24, 25 };
+static const unsigned mmc0_1_pins[] = { 56, 57 };
+static const unsigned mmc0_2_pins[] = { 58, 59 };
+static const unsigned mmc0_3_pins[] = { 60, 61, 62, 63 };
+
+static const struct foo_group foo_groups[] = {
+ {
+ .name = "spi0_0_grp",
+ .pins = spi0_0_pins,
+ .num_pins = ARRAY_SIZE(spi0_0_pins),
+ },
+ {
+ .name = "spi0_1_grp",
+ .pins = spi0_1_pins,
+ .num_pins = ARRAY_SIZE(spi0_1_pins),
+ },
+ {
+ .name = "i2c0_grp",
+ .pins = i2c0_pins,
+ .num_pins = ARRAY_SIZE(i2c0_pins),
+ },
+ {
+ .name = "mmc0_1_grp",
+ .pins = mmc0_1_pins,
+ .num_pins = ARRAY_SIZE(mmc0_1_pins),
+ },
+ {
+ .name = "mmc0_2_grp",
+ .pins = mmc0_2_pins,
+ .num_pins = ARRAY_SIZE(mmc0_2_pins),
+ },
+ {
+ .name = "mmc0_3_grp",
+ .pins = mmc0_3_pins,
+ .num_pins = ARRAY_SIZE(mmc0_3_pins),
+ },
+};
+
+
+static int foo_get_groups_count(struct pinctrl_dev *pctldev)
+{
+ return ARRAY_SIZE(foo_groups);
+}
+
+static const char *foo_get_group_name(struct pinctrl_dev *pctldev,
+ unsigned selector)
+{
+ return foo_groups[selector].name;
+}
+
+static int foo_get_group_pins(struct pinctrl_dev *pctldev, unsigned selector,
+ unsigned ** const pins,
+ unsigned * const num_pins)
+{
+ *pins = (unsigned *) foo_groups[selector].pins;
+ *num_pins = foo_groups[selector].num_pins;
+ return 0;
+}
+
+static struct pinctrl_ops foo_pctrl_ops = {
+ .get_groups_count = foo_get_groups_count,
+ .get_group_name = foo_get_group_name,
+ .get_group_pins = foo_get_group_pins,
+};
+
+struct foo_pmx_func {
+ const char *name;
+ const char * const *groups;
+ const unsigned num_groups;
+};
+
+static const char * const spi0_groups[] = { "spi0_0_grp", "spi0_1_grp" };
+static const char * const i2c0_groups[] = { "i2c0_grp" };
+static const char * const mmc0_groups[] = { "mmc0_1_grp", "mmc0_2_grp",
+ "mmc0_3_grp" };
+
+static const struct foo_pmx_func foo_functions[] = {
+ {
+ .name = "spi0",
+ .groups = spi0_groups,
+ .num_groups = ARRAY_SIZE(spi0_groups),
+ },
+ {
+ .name = "i2c0",
+ .groups = i2c0_groups,
+ .num_groups = ARRAY_SIZE(i2c0_groups),
+ },
+ {
+ .name = "mmc0",
+ .groups = mmc0_groups,
+ .num_groups = ARRAY_SIZE(mmc0_groups),
+ },
+};
+
+static int foo_get_functions_count(struct pinctrl_dev *pctldev)
+{
+ return ARRAY_SIZE(foo_functions);
+}
+
+static const char *foo_get_fname(struct pinctrl_dev *pctldev, unsigned selector)
+{
+ return foo_functions[selector].name;
+}
+
+static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector,
+ const char * const **groups,
+ unsigned * const num_groups)
+{
+ *groups = foo_functions[selector].groups;
+ *num_groups = foo_functions[selector].num_groups;
+ return 0;
+}
+
+static int foo_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+ unsigned group)
+{
+ u8 regbit = (1 << selector + group);
+
+ writeb((readb(MUX)|regbit), MUX)
+ return 0;
+}
+
+static struct pinmux_ops foo_pmxops = {
+ .get_functions_count = foo_get_functions_count,
+ .get_function_name = foo_get_fname,
+ .get_function_groups = foo_get_groups,
+ .set_mux = foo_set_mux,
+};
+
+/* Pinmux operations are handled by some pin controller */
+static struct pinctrl_desc foo_desc = {
+ ...
+ .pctlops = &foo_pctrl_ops,
+ .pmxops = &foo_pmxops,
+};
+
+In the example activating muxing 0 and 1 at the same time setting bits
+0 and 1, uses one pin in common so they would collide.
+
+The beauty of the pinmux subsystem is that since it keeps track of all
+pins and who is using them, it will already have denied an impossible
+request like that, so the driver does not need to worry about such
+things - when it gets a selector passed in, the pinmux subsystem makes
+sure no other device or GPIO assignment is already using the selected
+pins. Thus bits 0 and 1 in the control register will never be set at the
+same time.
+
+All the above functions are mandatory to implement for a pinmux driver.
+
+
+Pin control interaction with the GPIO subsystem
+===============================================
+
+Note that the following implies that the use case is to use a certain pin
+from the Linux kernel using the API in <linux/gpio.h> with gpio_request()
+and similar functions. There are cases where you may be using something
+that your datasheet calls "GPIO mode", but actually is just an electrical
+configuration for a certain device. See the section below named
+"GPIO mode pitfalls" for more details on this scenario.
+
+The public pinmux API contains two functions named pinctrl_request_gpio()
+and pinctrl_free_gpio(). These two functions shall *ONLY* be called from
+gpiolib-based drivers as part of their gpio_request() and
+gpio_free() semantics. Likewise the pinctrl_gpio_direction_[input|output]
+shall only be called from within respective gpio_direction_[input|output]
+gpiolib implementation.
+
+NOTE that platforms and individual drivers shall *NOT* request GPIO pins to be
+controlled e.g. muxed in. Instead, implement a proper gpiolib driver and have
+that driver request proper muxing and other control for its pins.
+
+The function list could become long, especially if you can convert every
+individual pin into a GPIO pin independent of any other pins, and then try
+the approach to define every pin as a function.
+
+In this case, the function array would become 64 entries for each GPIO
+setting and then the device functions.
+
+For this reason there are two functions a pin control driver can implement
+to enable only GPIO on an individual pin: .gpio_request_enable() and
+.gpio_disable_free().
+
+This function will pass in the affected GPIO range identified by the pin
+controller core, so you know which GPIO pins are being affected by the request
+operation.
+
+If your driver needs to have an indication from the framework of whether the
+GPIO pin shall be used for input or output you can implement the
+.gpio_set_direction() function. As described this shall be called from the
+gpiolib driver and the affected GPIO range, pin offset and desired direction
+will be passed along to this function.
+
+Alternatively to using these special functions, it is fully allowed to use
+named functions for each GPIO pin, the pinctrl_request_gpio() will attempt to
+obtain the function "gpioN" where "N" is the global GPIO pin number if no
+special GPIO-handler is registered.
+
+
+GPIO mode pitfalls
+==================
+
+Due to the naming conventions used by hardware engineers, where "GPIO"
+is taken to mean different things than what the kernel does, the developer
+may be confused by a datasheet talking about a pin being possible to set
+into "GPIO mode". It appears that what hardware engineers mean with
+"GPIO mode" is not necessarily the use case that is implied in the kernel
+interface <linux/gpio.h>: a pin that you grab from kernel code and then
+either listen for input or drive high/low to assert/deassert some
+external line.
+
+Rather hardware engineers think that "GPIO mode" means that you can
+software-control a few electrical properties of the pin that you would
+not be able to control if the pin was in some other mode, such as muxed in
+for a device.
+
+The GPIO portions of a pin and its relation to a certain pin controller
+configuration and muxing logic can be constructed in several ways. Here
+are two examples:
+
+(A)
+ pin config
+ logic regs
+ | +- SPI
+ Physical pins --- pad --- pinmux -+- I2C
+ | +- mmc
+ | +- GPIO
+ pin
+ multiplex
+ logic regs
+
+Here some electrical properties of the pin can be configured no matter
+whether the pin is used for GPIO or not. If you multiplex a GPIO onto a
+pin, you can also drive it high/low from "GPIO" registers.
+Alternatively, the pin can be controlled by a certain peripheral, while
+still applying desired pin config properties. GPIO functionality is thus
+orthogonal to any other device using the pin.
+
+In this arrangement the registers for the GPIO portions of the pin controller,
+or the registers for the GPIO hardware module are likely to reside in a
+separate memory range only intended for GPIO driving, and the register
+range dealing with pin config and pin multiplexing get placed into a
+different memory range and a separate section of the data sheet.
+
+(B)
+
+ pin config
+ logic regs
+ | +- SPI
+ Physical pins --- pad --- pinmux -+- I2C
+ | | +- mmc
+ | |
+ GPIO pin
+ multiplex
+ logic regs
+
+In this arrangement, the GPIO functionality can always be enabled, such that
+e.g. a GPIO input can be used to "spy" on the SPI/I2C/MMC signal while it is
+pulsed out. It is likely possible to disrupt the traffic on the pin by doing
+wrong things on the GPIO block, as it is never really disconnected. It is
+possible that the GPIO, pin config and pin multiplex registers are placed into
+the same memory range and the same section of the data sheet, although that
+need not be the case.
+
+From a kernel point of view, however, these are different aspects of the
+hardware and shall be put into different subsystems:
+
+- Registers (or fields within registers) that control electrical
+ properties of the pin such as biasing and drive strength should be
+ exposed through the pinctrl subsystem, as "pin configuration" settings.
+
+- Registers (or fields within registers) that control muxing of signals
+ from various other HW blocks (e.g. I2C, MMC, or GPIO) onto pins should
+ be exposed through the pinctrl subsystem, as mux functions.
+
+- Registers (or fields within registers) that control GPIO functionality
+ such as setting a GPIO's output value, reading a GPIO's input value, or
+ setting GPIO pin direction should be exposed through the GPIO subsystem,
+ and if they also support interrupt capabilities, through the irqchip
+ abstraction.
+
+Depending on the exact HW register design, some functions exposed by the
+GPIO subsystem may call into the pinctrl subsystem in order to
+co-ordinate register settings across HW modules. In particular, this may
+be needed for HW with separate GPIO and pin controller HW modules, where
+e.g. GPIO direction is determined by a register in the pin controller HW
+module rather than the GPIO HW module.
+
+Electrical properties of the pin such as biasing and drive strength
+may be placed at some pin-specific register in all cases or as part
+of the GPIO register in case (B) especially. This doesn't mean that such
+properties necessarily pertain to what the Linux kernel calls "GPIO".
+
+Example: a pin is usually muxed in to be used as a UART TX line. But during
+system sleep, we need to put this pin into "GPIO mode" and ground it.
+
+If you make a 1-to-1 map to the GPIO subsystem for this pin, you may start
+to think that you need to come up with something really complex, that the
+pin shall be used for UART TX and GPIO at the same time, that you will grab
+a pin control handle and set it to a certain state to enable UART TX to be
+muxed in, then twist it over to GPIO mode and use gpio_direction_output()
+to drive it low during sleep, then mux it over to UART TX again when you
+wake up and maybe even gpio_request/gpio_free as part of this cycle. This
+all gets very complicated.
+
+The solution is to not think that what the datasheet calls "GPIO mode"
+has to be handled by the <linux/gpio.h> interface. Instead view this as
+a certain pin config setting. Look in e.g. <linux/pinctrl/pinconf-generic.h>
+and you find this in the documentation:
+
+ PIN_CONFIG_OUTPUT: this will configure the pin in output, use argument
+ 1 to indicate high level, argument 0 to indicate low level.
+
+So it is perfectly possible to push a pin into "GPIO mode" and drive the
+line low as part of the usual pin control map. So for example your UART
+driver may look like this:
+
+#include <linux/pinctrl/consumer.h>
+
+struct pinctrl *pinctrl;
+struct pinctrl_state *pins_default;
+struct pinctrl_state *pins_sleep;
+
+pins_default = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_DEFAULT);
+pins_sleep = pinctrl_lookup_state(uap->pinctrl, PINCTRL_STATE_SLEEP);
+
+/* Normal mode */
+retval = pinctrl_select_state(pinctrl, pins_default);
+/* Sleep mode */
+retval = pinctrl_select_state(pinctrl, pins_sleep);
+
+And your machine configuration may look like this:
+--------------------------------------------------
+
+static unsigned long uart_default_mode[] = {
+ PIN_CONF_PACKED(PIN_CONFIG_DRIVE_PUSH_PULL, 0),
+};
+
+static unsigned long uart_sleep_mode[] = {
+ PIN_CONF_PACKED(PIN_CONFIG_OUTPUT, 0),
+};
+
+static struct pinctrl_map pinmap[] __initdata = {
+ PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo",
+ "u0_group", "u0"),
+ PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_DEFAULT, "pinctrl-foo",
+ "UART_TX_PIN", uart_default_mode),
+ PIN_MAP_MUX_GROUP("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo",
+ "u0_group", "gpio-mode"),
+ PIN_MAP_CONFIGS_PIN("uart", PINCTRL_STATE_SLEEP, "pinctrl-foo",
+ "UART_TX_PIN", uart_sleep_mode),
+};
+
+foo_init(void) {
+ pinctrl_register_mappings(pinmap, ARRAY_SIZE(pinmap));
+}
+
+Here the pins we want to control are in the "u0_group" and there is some
+function called "u0" that can be enabled on this group of pins, and then
+everything is UART business as usual. But there is also some function
+named "gpio-mode" that can be mapped onto the same pins to move them into
+GPIO mode.
+
+This will give the desired effect without any bogus interaction with the
+GPIO subsystem. It is just an electrical configuration used by that device
+when going to sleep, it might imply that the pin is set into something the
+datasheet calls "GPIO mode", but that is not the point: it is still used
+by that UART device to control the pins that pertain to that very UART
+driver, putting them into modes needed by the UART. GPIO in the Linux
+kernel sense are just some 1-bit line, and is a different use case.
+
+How the registers are poked to attain the push or pull, and output low
+configuration and the muxing of the "u0" or "gpio-mode" group onto these
+pins is a question for the driver.
+
+Some datasheets will be more helpful and refer to the "GPIO mode" as
+"low power mode" rather than anything to do with GPIO. This often means
+the same thing electrically speaking, but in this latter case the
+software engineers will usually quickly identify that this is some
+specific muxing or configuration rather than anything related to the GPIO
+API.
+
+
+Board/machine configuration
+==================================
+
+Boards and machines define how a certain complete running system is put
+together, including how GPIOs and devices are muxed, how regulators are
+constrained and how the clock tree looks. Of course pinmux settings are also
+part of this.
+
+A pin controller configuration for a machine looks pretty much like a simple
+regulator configuration, so for the example array above we want to enable i2c
+and spi on the second function mapping:
+
+#include <linux/pinctrl/machine.h>
+
+static const struct pinctrl_map mapping[] __initconst = {
+ {
+ .dev_name = "foo-spi.0",
+ .name = PINCTRL_STATE_DEFAULT,
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .data.mux.function = "spi0",
+ },
+ {
+ .dev_name = "foo-i2c.0",
+ .name = PINCTRL_STATE_DEFAULT,
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .data.mux.function = "i2c0",
+ },
+ {
+ .dev_name = "foo-mmc.0",
+ .name = PINCTRL_STATE_DEFAULT,
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .data.mux.function = "mmc0",
+ },
+};
+
+The dev_name here matches to the unique device name that can be used to look
+up the device struct (just like with clockdev or regulators). The function name
+must match a function provided by the pinmux driver handling this pin range.
+
+As you can see we may have several pin controllers on the system and thus
+we need to specify which one of them contains the functions we wish to map.
+
+You register this pinmux mapping to the pinmux subsystem by simply:
+
+ ret = pinctrl_register_mappings(mapping, ARRAY_SIZE(mapping));
+
+Since the above construct is pretty common there is a helper macro to make
+it even more compact which assumes you want to use pinctrl-foo and position
+0 for mapping, for example:
+
+static struct pinctrl_map mapping[] __initdata = {
+ PIN_MAP_MUX_GROUP("foo-i2c.o", PINCTRL_STATE_DEFAULT, "pinctrl-foo", NULL, "i2c0"),
+};
+
+The mapping table may also contain pin configuration entries. It's common for
+each pin/group to have a number of configuration entries that affect it, so
+the table entries for configuration reference an array of config parameters
+and values. An example using the convenience macros is shown below:
+
+static unsigned long i2c_grp_configs[] = {
+ FOO_PIN_DRIVEN,
+ FOO_PIN_PULLUP,
+};
+
+static unsigned long i2c_pin_configs[] = {
+ FOO_OPEN_COLLECTOR,
+ FOO_SLEW_RATE_SLOW,
+};
+
+static struct pinctrl_map mapping[] __initdata = {
+ PIN_MAP_MUX_GROUP("foo-i2c.0", PINCTRL_STATE_DEFAULT, "pinctrl-foo", "i2c0", "i2c0"),
+ PIN_MAP_CONFIGS_GROUP("foo-i2c.0", PINCTRL_STATE_DEFAULT, "pinctrl-foo", "i2c0", i2c_grp_configs),
+ PIN_MAP_CONFIGS_PIN("foo-i2c.0", PINCTRL_STATE_DEFAULT, "pinctrl-foo", "i2c0scl", i2c_pin_configs),
+ PIN_MAP_CONFIGS_PIN("foo-i2c.0", PINCTRL_STATE_DEFAULT, "pinctrl-foo", "i2c0sda", i2c_pin_configs),
+};
+
+Finally, some devices expect the mapping table to contain certain specific
+named states. When running on hardware that doesn't need any pin controller
+configuration, the mapping table must still contain those named states, in
+order to explicitly indicate that the states were provided and intended to
+be empty. Table entry macro PIN_MAP_DUMMY_STATE serves the purpose of defining
+a named state without causing any pin controller to be programmed:
+
+static struct pinctrl_map mapping[] __initdata = {
+ PIN_MAP_DUMMY_STATE("foo-i2c.0", PINCTRL_STATE_DEFAULT),
+};
+
+
+Complex mappings
+================
+
+As it is possible to map a function to different groups of pins an optional
+.group can be specified like this:
+
+...
+{
+ .dev_name = "foo-spi.0",
+ .name = "spi0-pos-A",
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "spi0",
+ .group = "spi0_0_grp",
+},
+{
+ .dev_name = "foo-spi.0",
+ .name = "spi0-pos-B",
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "spi0",
+ .group = "spi0_1_grp",
+},
+...
+
+This example mapping is used to switch between two positions for spi0 at
+runtime, as described further below under the heading "Runtime pinmuxing".
+
+Further it is possible for one named state to affect the muxing of several
+groups of pins, say for example in the mmc0 example above, where you can
+additively expand the mmc0 bus from 2 to 4 to 8 pins. If we want to use all
+three groups for a total of 2+2+4 = 8 pins (for an 8-bit MMC bus as is the
+case), we define a mapping like this:
+
+...
+{
+ .dev_name = "foo-mmc.0",
+ .name = "2bit"
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "mmc0",
+ .group = "mmc0_1_grp",
+},
+{
+ .dev_name = "foo-mmc.0",
+ .name = "4bit"
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "mmc0",
+ .group = "mmc0_1_grp",
+},
+{
+ .dev_name = "foo-mmc.0",
+ .name = "4bit"
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "mmc0",
+ .group = "mmc0_2_grp",
+},
+{
+ .dev_name = "foo-mmc.0",
+ .name = "8bit"
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "mmc0",
+ .group = "mmc0_1_grp",
+},
+{
+ .dev_name = "foo-mmc.0",
+ .name = "8bit"
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "mmc0",
+ .group = "mmc0_2_grp",
+},
+{
+ .dev_name = "foo-mmc.0",
+ .name = "8bit"
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "mmc0",
+ .group = "mmc0_3_grp",
+},
+...
+
+The result of grabbing this mapping from the device with something like
+this (see next paragraph):
+
+ p = devm_pinctrl_get(dev);
+ s = pinctrl_lookup_state(p, "8bit");
+ ret = pinctrl_select_state(p, s);
+
+or more simply:
+
+ p = devm_pinctrl_get_select(dev, "8bit");
+
+Will be that you activate all the three bottom records in the mapping at
+once. Since they share the same name, pin controller device, function and
+device, and since we allow multiple groups to match to a single device, they
+all get selected, and they all get enabled and disable simultaneously by the
+pinmux core.
+
+
+Pin control requests from drivers
+=================================
+
+When a device driver is about to probe the device core will automatically
+attempt to issue pinctrl_get_select_default() on these devices.
+This way driver writers do not need to add any of the boilerplate code
+of the type found below. However when doing fine-grained state selection
+and not using the "default" state, you may have to do some device driver
+handling of the pinctrl handles and states.
+
+So if you just want to put the pins for a certain device into the default
+state and be done with it, there is nothing you need to do besides
+providing the proper mapping table. The device core will take care of
+the rest.
+
+Generally it is discouraged to let individual drivers get and enable pin
+control. So if possible, handle the pin control in platform code or some other
+place where you have access to all the affected struct device * pointers. In
+some cases where a driver needs to e.g. switch between different mux mappings
+at runtime this is not possible.
+
+A typical case is if a driver needs to switch bias of pins from normal
+operation and going to sleep, moving from the PINCTRL_STATE_DEFAULT to
+PINCTRL_STATE_SLEEP at runtime, re-biasing or even re-muxing pins to save
+current in sleep mode.
+
+A driver may request a certain control state to be activated, usually just the
+default state like this:
+
+#include <linux/pinctrl/consumer.h>
+
+struct foo_state {
+ struct pinctrl *p;
+ struct pinctrl_state *s;
+ ...
+};
+
+foo_probe()
+{
+ /* Allocate a state holder named "foo" etc */
+ struct foo_state *foo = ...;
+
+ foo->p = devm_pinctrl_get(&device);
+ if (IS_ERR(foo->p)) {
+ /* FIXME: clean up "foo" here */
+ return PTR_ERR(foo->p);
+ }
+
+ foo->s = pinctrl_lookup_state(foo->p, PINCTRL_STATE_DEFAULT);
+ if (IS_ERR(foo->s)) {
+ /* FIXME: clean up "foo" here */
+ return PTR_ERR(s);
+ }
+
+ ret = pinctrl_select_state(foo->s);
+ if (ret < 0) {
+ /* FIXME: clean up "foo" here */
+ return ret;
+ }
+}
+
+This get/lookup/select/put sequence can just as well be handled by bus drivers
+if you don't want each and every driver to handle it and you know the
+arrangement on your bus.
+
+The semantics of the pinctrl APIs are:
+
+- pinctrl_get() is called in process context to obtain a handle to all pinctrl
+ information for a given client device. It will allocate a struct from the
+ kernel memory to hold the pinmux state. All mapping table parsing or similar
+ slow operations take place within this API.
+
+- devm_pinctrl_get() is a variant of pinctrl_get() that causes pinctrl_put()
+ to be called automatically on the retrieved pointer when the associated
+ device is removed. It is recommended to use this function over plain
+ pinctrl_get().
+
+- pinctrl_lookup_state() is called in process context to obtain a handle to a
+ specific state for a client device. This operation may be slow, too.
+
+- pinctrl_select_state() programs pin controller hardware according to the
+ definition of the state as given by the mapping table. In theory, this is a
+ fast-path operation, since it only involved blasting some register settings
+ into hardware. However, note that some pin controllers may have their
+ registers on a slow/IRQ-based bus, so client devices should not assume they
+ can call pinctrl_select_state() from non-blocking contexts.
+
+- pinctrl_put() frees all information associated with a pinctrl handle.
+
+- devm_pinctrl_put() is a variant of pinctrl_put() that may be used to
+ explicitly destroy a pinctrl object returned by devm_pinctrl_get().
+ However, use of this function will be rare, due to the automatic cleanup
+ that will occur even without calling it.
+
+ pinctrl_get() must be paired with a plain pinctrl_put().
+ pinctrl_get() may not be paired with devm_pinctrl_put().
+ devm_pinctrl_get() can optionally be paired with devm_pinctrl_put().
+ devm_pinctrl_get() may not be paired with plain pinctrl_put().
+
+Usually the pin control core handled the get/put pair and call out to the
+device drivers bookkeeping operations, like checking available functions and
+the associated pins, whereas select_state pass on to the pin controller
+driver which takes care of activating and/or deactivating the mux setting by
+quickly poking some registers.
+
+The pins are allocated for your device when you issue the devm_pinctrl_get()
+call, after this you should be able to see this in the debugfs listing of all
+pins.
+
+NOTE: the pinctrl system will return -EPROBE_DEFER if it cannot find the
+requested pinctrl handles, for example if the pinctrl driver has not yet
+registered. Thus make sure that the error path in your driver gracefully
+cleans up and is ready to retry the probing later in the startup process.
+
+
+Drivers needing both pin control and GPIOs
+==========================================
+
+Again, it is discouraged to let drivers lookup and select pin control states
+themselves, but again sometimes this is unavoidable.
+
+So say that your driver is fetching its resources like this:
+
+#include <linux/pinctrl/consumer.h>
+#include <linux/gpio.h>
+
+struct pinctrl *pinctrl;
+int gpio;
+
+pinctrl = devm_pinctrl_get_select_default(&dev);
+gpio = devm_gpio_request(&dev, 14, "foo");
+
+Here we first request a certain pin state and then request GPIO 14 to be
+used. If you're using the subsystems orthogonally like this, you should
+nominally always get your pinctrl handle and select the desired pinctrl
+state BEFORE requesting the GPIO. This is a semantic convention to avoid
+situations that can be electrically unpleasant, you will certainly want to
+mux in and bias pins in a certain way before the GPIO subsystems starts to
+deal with them.
+
+The above can be hidden: using the device core, the pinctrl core may be
+setting up the config and muxing for the pins right before the device is
+probing, nevertheless orthogonal to the GPIO subsystem.
+
+But there are also situations where it makes sense for the GPIO subsystem
+to communicate directly with the pinctrl subsystem, using the latter as a
+back-end. This is when the GPIO driver may call out to the functions
+described in the section "Pin control interaction with the GPIO subsystem"
+above. This only involves per-pin multiplexing, and will be completely
+hidden behind the gpio_*() function namespace. In this case, the driver
+need not interact with the pin control subsystem at all.
+
+If a pin control driver and a GPIO driver is dealing with the same pins
+and the use cases involve multiplexing, you MUST implement the pin controller
+as a back-end for the GPIO driver like this, unless your hardware design
+is such that the GPIO controller can override the pin controller's
+multiplexing state through hardware without the need to interact with the
+pin control system.
+
+
+System pin control hogging
+==========================
+
+Pin control map entries can be hogged by the core when the pin controller
+is registered. This means that the core will attempt to call pinctrl_get(),
+lookup_state() and select_state() on it immediately after the pin control
+device has been registered.
+
+This occurs for mapping table entries where the client device name is equal
+to the pin controller device name, and the state name is PINCTRL_STATE_DEFAULT.
+
+{
+ .dev_name = "pinctrl-foo",
+ .name = PINCTRL_STATE_DEFAULT,
+ .type = PIN_MAP_TYPE_MUX_GROUP,
+ .ctrl_dev_name = "pinctrl-foo",
+ .function = "power_func",
+},
+
+Since it may be common to request the core to hog a few always-applicable
+mux settings on the primary pin controller, there is a convenience macro for
+this:
+
+PIN_MAP_MUX_GROUP_HOG_DEFAULT("pinctrl-foo", NULL /* group */, "power_func")
+
+This gives the exact same result as the above construction.
+
+
+Runtime pinmuxing
+=================
+
+It is possible to mux a certain function in and out at runtime, say to move
+an SPI port from one set of pins to another set of pins. Say for example for
+spi0 in the example above, we expose two different groups of pins for the same
+function, but with different named in the mapping as described under
+"Advanced mapping" above. So that for an SPI device, we have two states named
+"pos-A" and "pos-B".
+
+This snippet first initializes a state object for both groups (in foo_probe()),
+then muxes the function in the pins defined by group A, and finally muxes it in
+on the pins defined by group B:
+
+#include <linux/pinctrl/consumer.h>
+
+struct pinctrl *p;
+struct pinctrl_state *s1, *s2;
+
+foo_probe()
+{
+ /* Setup */
+ p = devm_pinctrl_get(&device);
+ if (IS_ERR(p))
+ ...
+
+ s1 = pinctrl_lookup_state(foo->p, "pos-A");
+ if (IS_ERR(s1))
+ ...
+
+ s2 = pinctrl_lookup_state(foo->p, "pos-B");
+ if (IS_ERR(s2))
+ ...
+}
+
+foo_switch()
+{
+ /* Enable on position A */
+ ret = pinctrl_select_state(s1);
+ if (ret < 0)
+ ...
+
+ ...
+
+ /* Enable on position B */
+ ret = pinctrl_select_state(s2);
+ if (ret < 0)
+ ...
+
+ ...
+}
+
+The above has to be done from process context. The reservation of the pins
+will be done when the state is activated, so in effect one specific pin
+can be used by different functions at different times on a running system.
diff --git a/Documentation/platform/x86-laptop-drivers.txt b/Documentation/platform/x86-laptop-drivers.txt
new file mode 100644
index 000000000..01facd259
--- /dev/null
+++ b/Documentation/platform/x86-laptop-drivers.txt
@@ -0,0 +1,18 @@
+compal-laptop
+=============
+List of supported hardware:
+
+by Compal:
+ Compal FL90/IFL90
+ Compal FL91/IFL91
+ Compal FL92/JFL92
+ Compal FT00/IFT00
+
+by Dell:
+ Dell Vostro 1200
+ Dell Mini 9 (Inspiron 910)
+ Dell Mini 10 (Inspiron 1010)
+ Dell Mini 10v (Inspiron 1011)
+ Dell Mini 1012 (Inspiron 1012)
+ Dell Inspiron 11z (Inspiron 1110)
+ Dell Mini 12 (Inspiron 1210)
diff --git a/Documentation/pnp.txt b/Documentation/pnp.txt
new file mode 100644
index 000000000..763e4659b
--- /dev/null
+++ b/Documentation/pnp.txt
@@ -0,0 +1,251 @@
+Linux Plug and Play Documentation
+by Adam Belay <ambx1@neo.rr.com>
+last updated: Oct. 16, 2002
+---------------------------------------------------------------------------------------
+
+
+
+Overview
+--------
+ Plug and Play provides a means of detecting and setting resources for legacy or
+otherwise unconfigurable devices. The Linux Plug and Play Layer provides these
+services to compatible drivers.
+
+
+
+The User Interface
+------------------
+ The Linux Plug and Play user interface provides a means to activate PnP devices
+for legacy and user level drivers that do not support Linux Plug and Play. The
+user interface is integrated into sysfs.
+
+In addition to the standard sysfs file the following are created in each
+device's directory:
+id - displays a list of support EISA IDs
+options - displays possible resource configurations
+resources - displays currently allocated resources and allows resource changes
+
+-activating a device
+
+#echo "auto" > resources
+
+this will invoke the automatic resource config system to activate the device
+
+-manually activating a device
+
+#echo "manual <depnum> <mode>" > resources
+<depnum> - the configuration number
+<mode> - static or dynamic
+ static = for next boot
+ dynamic = now
+
+-disabling a device
+
+#echo "disable" > resources
+
+
+EXAMPLE:
+
+Suppose you need to activate the floppy disk controller.
+1.) change to the proper directory, in my case it is
+/driver/bus/pnp/devices/00:0f
+# cd /driver/bus/pnp/devices/00:0f
+# cat name
+PC standard floppy disk controller
+
+2.) check if the device is already active
+# cat resources
+DISABLED
+
+- Notice the string "DISABLED". This means the device is not active.
+
+3.) check the device's possible configurations (optional)
+# cat options
+Dependent: 01 - Priority acceptable
+ port 0x3f0-0x3f0, align 0x7, size 0x6, 16-bit address decoding
+ port 0x3f7-0x3f7, align 0x0, size 0x1, 16-bit address decoding
+ irq 6
+ dma 2 8-bit compatible
+Dependent: 02 - Priority acceptable
+ port 0x370-0x370, align 0x7, size 0x6, 16-bit address decoding
+ port 0x377-0x377, align 0x0, size 0x1, 16-bit address decoding
+ irq 6
+ dma 2 8-bit compatible
+
+4.) now activate the device
+# echo "auto" > resources
+
+5.) finally check if the device is active
+# cat resources
+io 0x3f0-0x3f5
+io 0x3f7-0x3f7
+irq 6
+dma 2
+
+also there are a series of kernel parameters:
+pnp_reserve_irq=irq1[,irq2] ....
+pnp_reserve_dma=dma1[,dma2] ....
+pnp_reserve_io=io1,size1[,io2,size2] ....
+pnp_reserve_mem=mem1,size1[,mem2,size2] ....
+
+
+
+The Unified Plug and Play Layer
+-------------------------------
+ All Plug and Play drivers, protocols, and services meet at a central location
+called the Plug and Play Layer. This layer is responsible for the exchange of
+information between PnP drivers and PnP protocols. Thus it automatically
+forwards commands to the proper protocol. This makes writing PnP drivers
+significantly easier.
+
+The following functions are available from the Plug and Play Layer:
+
+pnp_get_protocol
+- increments the number of uses by one
+
+pnp_put_protocol
+- deincrements the number of uses by one
+
+pnp_register_protocol
+- use this to register a new PnP protocol
+
+pnp_unregister_protocol
+- use this function to remove a PnP protocol from the Plug and Play Layer
+
+pnp_register_driver
+- adds a PnP driver to the Plug and Play Layer
+- this includes driver model integration
+- returns zero for success or a negative error number for failure; count
+ calls to the .add() method if you need to know how many devices bind to
+ the driver
+
+pnp_unregister_driver
+- removes a PnP driver from the Plug and Play Layer
+
+
+
+Plug and Play Protocols
+-----------------------
+ This section contains information for PnP protocol developers.
+
+The following Protocols are currently available in the computing world:
+- PNPBIOS: used for system devices such as serial and parallel ports.
+- ISAPNP: provides PnP support for the ISA bus
+- ACPI: among its many uses, ACPI provides information about system level
+devices.
+It is meant to replace the PNPBIOS. It is not currently supported by Linux
+Plug and Play but it is planned to be in the near future.
+
+
+Requirements for a Linux PnP protocol:
+1.) the protocol must use EISA IDs
+2.) the protocol must inform the PnP Layer of a device's current configuration
+- the ability to set resources is optional but preferred.
+
+The following are PnP protocol related functions:
+
+pnp_add_device
+- use this function to add a PnP device to the PnP layer
+- only call this function when all wanted values are set in the pnp_dev
+structure
+
+pnp_init_device
+- call this to initialize the PnP structure
+
+pnp_remove_device
+- call this to remove a device from the Plug and Play Layer.
+- it will fail if the device is still in use.
+- automatically will free mem used by the device and related structures
+
+pnp_add_id
+- adds an EISA ID to the list of supported IDs for the specified device
+
+For more information consult the source of a protocol such as
+/drivers/pnp/pnpbios/core.c.
+
+
+
+Linux Plug and Play Drivers
+---------------------------
+ This section contains information for Linux PnP driver developers.
+
+The New Way
+...........
+1.) first make a list of supported EISA IDS
+ex:
+static const struct pnp_id pnp_dev_table[] = {
+ /* Standard LPT Printer Port */
+ {.id = "PNP0400", .driver_data = 0},
+ /* ECP Printer Port */
+ {.id = "PNP0401", .driver_data = 0},
+ {.id = ""}
+};
+
+Please note that the character 'X' can be used as a wild card in the function
+portion (last four characters).
+ex:
+ /* Unknown PnP modems */
+ { "PNPCXXX", UNKNOWN_DEV },
+
+Supported PnP card IDs can optionally be defined.
+ex:
+static const struct pnp_id pnp_card_table[] = {
+ { "ANYDEVS", 0 },
+ { "", 0 }
+};
+
+2.) Optionally define probe and remove functions. It may make sense not to
+define these functions if the driver already has a reliable method of detecting
+the resources, such as the parport_pc driver.
+ex:
+static int
+serial_pnp_probe(struct pnp_dev * dev, const struct pnp_id *card_id, const
+ struct pnp_id *dev_id)
+{
+. . .
+
+ex:
+static void serial_pnp_remove(struct pnp_dev * dev)
+{
+. . .
+
+consult /drivers/serial/8250_pnp.c for more information.
+
+3.) create a driver structure
+ex:
+
+static struct pnp_driver serial_pnp_driver = {
+ .name = "serial",
+ .card_id_table = pnp_card_table,
+ .id_table = pnp_dev_table,
+ .probe = serial_pnp_probe,
+ .remove = serial_pnp_remove,
+};
+
+* name and id_table cannot be NULL.
+
+4.) register the driver
+ex:
+
+static int __init serial8250_pnp_init(void)
+{
+ return pnp_register_driver(&serial_pnp_driver);
+}
+
+The Old Way
+...........
+
+A series of compatibility functions have been created to make it easy to convert
+ISAPNP drivers. They should serve as a temporary solution only.
+
+They are as follows:
+
+struct pnp_card *pnp_find_card(unsigned short vendor,
+ unsigned short device,
+ struct pnp_card *from)
+
+struct pnp_dev *pnp_find_dev(struct pnp_card *card,
+ unsigned short vendor,
+ unsigned short function,
+ struct pnp_dev *from)
+
diff --git a/Documentation/power/00-INDEX b/Documentation/power/00-INDEX
new file mode 100644
index 000000000..ad04cc809
--- /dev/null
+++ b/Documentation/power/00-INDEX
@@ -0,0 +1,46 @@
+00-INDEX
+ - This file
+apm-acpi.txt
+ - basic info about the APM and ACPI support.
+basic-pm-debugging.txt
+ - Debugging suspend and resume
+charger-manager.txt
+ - Battery charger management.
+devices.txt
+ - How drivers interact with system-wide power management
+drivers-testing.txt
+ - Testing suspend and resume support in device drivers
+freezing-of-tasks.txt
+ - How processes and controlled during suspend
+interface.txt
+ - Power management user interface in /sys/power
+notifiers.txt
+ - Registering suspend notifiers in device drivers
+opp.txt
+ - Operating Performance Point library
+pci.txt
+ - How the PCI Subsystem Does Power Management
+pm_qos_interface.txt
+ - info on Linux PM Quality of Service interface
+power_supply_class.txt
+ - Tells userspace about battery, UPS, AC or DC power supply properties
+runtime_pm.txt
+ - Power management framework for I/O devices.
+s2ram.txt
+ - How to get suspend to ram working (and debug it when it isn't)
+states.txt
+ - System power management states
+suspend-and-cpuhotplug.txt
+ - Explains the interaction between Suspend-to-RAM (S3) and CPU hotplug
+swsusp-and-swap-files.txt
+ - Using swap files with software suspend (to disk)
+swsusp-dmcrypt.txt
+ - How to use dm-crypt and software suspend (to disk) together
+swsusp.txt
+ - Goals, implementation, and usage of software suspend (ACPI S3)
+tricks.txt
+ - How to trick software suspend (to disk) into working when it isn't
+userland-swsusp.txt
+ - Experimental implementation of software suspend in userspace
+video.txt
+ - Video issues during resume from suspend
diff --git a/Documentation/power/apm-acpi.txt b/Documentation/power/apm-acpi.txt
new file mode 100644
index 000000000..6cc423d36
--- /dev/null
+++ b/Documentation/power/apm-acpi.txt
@@ -0,0 +1,32 @@
+APM or ACPI?
+------------
+If you have a relatively recent x86 mobile, desktop, or server system,
+odds are it supports either Advanced Power Management (APM) or
+Advanced Configuration and Power Interface (ACPI). ACPI is the newer
+of the two technologies and puts power management in the hands of the
+operating system, allowing for more intelligent power management than
+is possible with BIOS controlled APM.
+
+The best way to determine which, if either, your system supports is to
+build a kernel with both ACPI and APM enabled (as of 2.3.x ACPI is
+enabled by default). If a working ACPI implementation is found, the
+ACPI driver will override and disable APM, otherwise the APM driver
+will be used.
+
+No, sorry, you cannot have both ACPI and APM enabled and running at
+once. Some people with broken ACPI or broken APM implementations
+would like to use both to get a full set of working features, but you
+simply cannot mix and match the two. Only one power management
+interface can be in control of the machine at once. Think about it..
+
+User-space Daemons
+------------------
+Both APM and ACPI rely on user-space daemons, apmd and acpid
+respectively, to be completely functional. Obtain both of these
+daemons from your Linux distribution or from the Internet (see below)
+and be sure that they are started sometime in the system boot process.
+Go ahead and start both. If ACPI or APM is not available on your
+system the associated daemon will exit gracefully.
+
+ apmd: http://ftp.debian.org/pool/main/a/apmd/
+ acpid: http://acpid.sf.net/
diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt
new file mode 100644
index 000000000..b96098ccf
--- /dev/null
+++ b/Documentation/power/basic-pm-debugging.txt
@@ -0,0 +1,229 @@
+Debugging hibernation and suspend
+ (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+1. Testing hibernation (aka suspend to disk or STD)
+
+To check if hibernation works, you can try to hibernate in the "reboot" mode:
+
+# echo reboot > /sys/power/disk
+# echo disk > /sys/power/state
+
+and the system should create a hibernation image, reboot, resume and get back to
+the command prompt where you have started the transition. If that happens,
+hibernation is most likely to work correctly. Still, you need to repeat the
+test at least a couple of times in a row for confidence. [This is necessary,
+because some problems only show up on a second attempt at suspending and
+resuming the system.] Moreover, hibernating in the "reboot" and "shutdown"
+modes causes the PM core to skip some platform-related callbacks which on ACPI
+systems might be necessary to make hibernation work. Thus, if your machine fails
+to hibernate or resume in the "reboot" mode, you should try the "platform" mode:
+
+# echo platform > /sys/power/disk
+# echo disk > /sys/power/state
+
+which is the default and recommended mode of hibernation.
+
+Unfortunately, the "platform" mode of hibernation does not work on some systems
+with broken BIOSes. In such cases the "shutdown" mode of hibernation might
+work:
+
+# echo shutdown > /sys/power/disk
+# echo disk > /sys/power/state
+
+(it is similar to the "reboot" mode, but it requires you to press the power
+button to make the system resume).
+
+If neither "platform" nor "shutdown" hibernation mode works, you will need to
+identify what goes wrong.
+
+a) Test modes of hibernation
+
+To find out why hibernation fails on your system, you can use a special testing
+facility available if the kernel is compiled with CONFIG_PM_DEBUG set. Then,
+there is the file /sys/power/pm_test that can be used to make the hibernation
+core run in a test mode. There are 5 test modes available:
+
+freezer
+- test the freezing of processes
+
+devices
+- test the freezing of processes and suspending of devices
+
+platform
+- test the freezing of processes, suspending of devices and platform
+ global control methods(*)
+
+processors
+- test the freezing of processes, suspending of devices, platform
+ global control methods(*) and the disabling of nonboot CPUs
+
+core
+- test the freezing of processes, suspending of devices, platform global
+ control methods(*), the disabling of nonboot CPUs and suspending of
+ platform/system devices
+
+(*) the platform global control methods are only available on ACPI systems
+ and are only tested if the hibernation mode is set to "platform"
+
+To use one of them it is necessary to write the corresponding string to
+/sys/power/pm_test (eg. "devices" to test the freezing of processes and
+suspending devices) and issue the standard hibernation commands. For example,
+to use the "devices" test mode along with the "platform" mode of hibernation,
+you should do the following:
+
+# echo devices > /sys/power/pm_test
+# echo platform > /sys/power/disk
+# echo disk > /sys/power/state
+
+Then, the kernel will try to freeze processes, suspend devices, wait a few
+seconds (5 by default, but configurable by the suspend.pm_test_delay module
+parameter), resume devices and thaw processes. If "platform" is written to
+/sys/power/pm_test , then after suspending devices the kernel will additionally
+invoke the global control methods (eg. ACPI global control methods) used to
+prepare the platform firmware for hibernation. Next, it will wait a
+configurable number of seconds and invoke the platform (eg. ACPI) global
+methods used to cancel hibernation etc.
+
+Writing "none" to /sys/power/pm_test causes the kernel to switch to the normal
+hibernation/suspend operations. Also, when open for reading, /sys/power/pm_test
+contains a space-separated list of all available tests (including "none" that
+represents the normal functionality) in which the current test level is
+indicated by square brackets.
+
+Generally, as you can see, each test level is more "invasive" than the previous
+one and the "core" level tests the hardware and drivers as deeply as possible
+without creating a hibernation image. Obviously, if the "devices" test fails,
+the "platform" test will fail as well and so on. Thus, as a rule of thumb, you
+should try the test modes starting from "freezer", through "devices", "platform"
+and "processors" up to "core" (repeat the test on each level a couple of times
+to make sure that any random factors are avoided).
+
+If the "freezer" test fails, there is a task that cannot be frozen (in that case
+it usually is possible to identify the offending task by analysing the output of
+dmesg obtained after the failing test). Failure at this level usually means
+that there is a problem with the tasks freezer subsystem that should be
+reported.
+
+If the "devices" test fails, most likely there is a driver that cannot suspend
+or resume its device (in the latter case the system may hang or become unstable
+after the test, so please take that into consideration). To find this driver,
+you can carry out a binary search according to the rules:
+- if the test fails, unload a half of the drivers currently loaded and repeat
+(that would probably involve rebooting the system, so always note what drivers
+have been loaded before the test),
+- if the test succeeds, load a half of the drivers you have unloaded most
+recently and repeat.
+
+Once you have found the failing driver (there can be more than just one of
+them), you have to unload it every time before hibernation. In that case please
+make sure to report the problem with the driver.
+
+It is also possible that the "devices" test will still fail after you have
+unloaded all modules. In that case, you may want to look in your kernel
+configuration for the drivers that can be compiled as modules (and test again
+with these drivers compiled as modules). You may also try to use some special
+kernel command line options such as "noapic", "noacpi" or even "acpi=off".
+
+If the "platform" test fails, there is a problem with the handling of the
+platform (eg. ACPI) firmware on your system. In that case the "platform" mode
+of hibernation is not likely to work. You can try the "shutdown" mode, but that
+is rather a poor man's workaround.
+
+If the "processors" test fails, the disabling/enabling of nonboot CPUs does not
+work (of course, this only may be an issue on SMP systems) and the problem
+should be reported. In that case you can also try to switch the nonboot CPUs
+off and on using the /sys/devices/system/cpu/cpu*/online sysfs attributes and
+see if that works.
+
+If the "core" test fails, which means that suspending of the system/platform
+devices has failed (these devices are suspended on one CPU with interrupts off),
+the problem is most probably hardware-related and serious, so it should be
+reported.
+
+A failure of any of the "platform", "processors" or "core" tests may cause your
+system to hang or become unstable, so please beware. Such a failure usually
+indicates a serious problem that very well may be related to the hardware, but
+please report it anyway.
+
+b) Testing minimal configuration
+
+If all of the hibernation test modes work, you can boot the system with the
+"init=/bin/bash" command line parameter and attempt to hibernate in the
+"reboot", "shutdown" and "platform" modes. If that does not work, there
+probably is a problem with a driver statically compiled into the kernel and you
+can try to compile more drivers as modules, so that they can be tested
+individually. Otherwise, there is a problem with a modular driver and you can
+find it by loading a half of the modules you normally use and binary searching
+in accordance with the algorithm:
+- if there are n modules loaded and the attempt to suspend and resume fails,
+unload n/2 of the modules and try again (that would probably involve rebooting
+the system),
+- if there are n modules loaded and the attempt to suspend and resume succeeds,
+load n/2 modules more and try again.
+
+Again, if you find the offending module(s), it(they) must be unloaded every time
+before hibernation, and please report the problem with it(them).
+
+c) Advanced debugging
+
+In case that hibernation does not work on your system even in the minimal
+configuration and compiling more drivers as modules is not practical or some
+modules cannot be unloaded, you can use one of the more advanced debugging
+techniques to find the problem. First, if there is a serial port in your box,
+you can boot the kernel with the 'no_console_suspend' parameter and try to log
+kernel messages using the serial console. This may provide you with some
+information about the reasons of the suspend (resume) failure. Alternatively,
+it may be possible to use a FireWire port for debugging with firescope
+(http://v3.sk/~lkundrak/firescope/). On x86 it is also possible to
+use the PM_TRACE mechanism documented in Documentation/power/s2ram.txt .
+
+2. Testing suspend to RAM (STR)
+
+To verify that the STR works, it is generally more convenient to use the s2ram
+tool available from http://suspend.sf.net and documented at
+http://en.opensuse.org/SDB:Suspend_to_RAM (S2RAM_LINK).
+
+Namely, after writing "freezer", "devices", "platform", "processors", or "core"
+into /sys/power/pm_test (available if the kernel is compiled with
+CONFIG_PM_DEBUG set) the suspend code will work in the test mode corresponding
+to given string. The STR test modes are defined in the same way as for
+hibernation, so please refer to Section 1 for more information about them. In
+particular, the "core" test allows you to test everything except for the actual
+invocation of the platform firmware in order to put the system into the sleep
+state.
+
+Among other things, the testing with the help of /sys/power/pm_test may allow
+you to identify drivers that fail to suspend or resume their devices. They
+should be unloaded every time before an STR transition.
+
+Next, you can follow the instructions at S2RAM_LINK to test the system, but if
+it does not work "out of the box", you may need to boot it with
+"init=/bin/bash" and test s2ram in the minimal configuration. In that case,
+you may be able to search for failing drivers by following the procedure
+analogous to the one described in section 1. If you find some failing drivers,
+you will have to unload them every time before an STR transition (ie. before
+you run s2ram), and please report the problems with them.
+
+There is a debugfs entry which shows the suspend to RAM statistics. Here is an
+example of its output.
+ # mount -t debugfs none /sys/kernel/debug
+ # cat /sys/kernel/debug/suspend_stats
+ success: 20
+ fail: 5
+ failed_freeze: 0
+ failed_prepare: 0
+ failed_suspend: 5
+ failed_suspend_noirq: 0
+ failed_resume: 0
+ failed_resume_noirq: 0
+ failures:
+ last_failed_dev: alarm
+ adc
+ last_failed_errno: -16
+ -16
+ last_failed_step: suspend
+ suspend
+Field success means the success number of suspend to RAM, and field fail means
+the failure number. Others are the failure number of different steps of suspend
+to RAM. suspend_stats just lists the last 2 failed devices, error number and
+failed step of suspend.
diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
new file mode 100644
index 000000000..9ff1105e5
--- /dev/null
+++ b/Documentation/power/charger-manager.txt
@@ -0,0 +1,200 @@
+Charger Manager
+ (C) 2011 MyungJoo Ham <myungjoo.ham@samsung.com>, GPL
+
+Charger Manager provides in-kernel battery charger management that
+requires temperature monitoring during suspend-to-RAM state
+and where each battery may have multiple chargers attached and the userland
+wants to look at the aggregated information of the multiple chargers.
+
+Charger Manager is a platform_driver with power-supply-class entries.
+An instance of Charger Manager (a platform-device created with Charger-Manager)
+represents an independent battery with chargers. If there are multiple
+batteries with their own chargers acting independently in a system,
+the system may need multiple instances of Charger Manager.
+
+1. Introduction
+===============
+
+Charger Manager supports the following:
+
+* Support for multiple chargers (e.g., a device with USB, AC, and solar panels)
+ A system may have multiple chargers (or power sources) and some of
+ they may be activated at the same time. Each charger may have its
+ own power-supply-class and each power-supply-class can provide
+ different information about the battery status. This framework
+ aggregates charger-related information from multiple sources and
+ shows combined information as a single power-supply-class.
+
+* Support for in suspend-to-RAM polling (with suspend_again callback)
+ While the battery is being charged and the system is in suspend-to-RAM,
+ we may need to monitor the battery health by looking at the ambient or
+ battery temperature. We can accomplish this by waking up the system
+ periodically. However, such a method wakes up devices unnecessarily for
+ monitoring the battery health and tasks, and user processes that are
+ supposed to be kept suspended. That, in turn, incurs unnecessary power
+ consumption and slow down charging process. Or even, such peak power
+ consumption can stop chargers in the middle of charging
+ (external power input < device power consumption), which not
+ only affects the charging time, but the lifespan of the battery.
+
+ Charger Manager provides a function "cm_suspend_again" that can be
+ used as suspend_again callback of platform_suspend_ops. If the platform
+ requires tasks other than cm_suspend_again, it may implement its own
+ suspend_again callback that calls cm_suspend_again in the middle.
+ Normally, the platform will need to resume and suspend some devices
+ that are used by Charger Manager.
+
+* Support for premature full-battery event handling
+ If the battery voltage drops by "fullbatt_vchkdrop_uV" after
+ "fullbatt_vchkdrop_ms" from the full-battery event, the framework
+ restarts charging. This check is also performed while suspended by
+ setting wakeup time accordingly and using suspend_again.
+
+* Support for uevent-notify
+ With the charger-related events, the device sends
+ notification to users with UEVENT.
+
+2. Global Charger-Manager Data related with suspend_again
+========================================================
+In order to setup Charger Manager with suspend-again feature
+(in-suspend monitoring), the user should provide charger_global_desc
+with setup_charger_manager(struct charger_global_desc *).
+This charger_global_desc data for in-suspend monitoring is global
+as the name suggests. Thus, the user needs to provide only once even
+if there are multiple batteries. If there are multiple batteries, the
+multiple instances of Charger Manager share the same charger_global_desc
+and it will manage in-suspend monitoring for all instances of Charger Manager.
+
+The user needs to provide all the three entries properly in order to activate
+in-suspend monitoring:
+
+struct charger_global_desc {
+
+char *rtc_name;
+ : The name of rtc (e.g., "rtc0") used to wakeup the system from
+ suspend for Charger Manager. The alarm interrupt (AIE) of the rtc
+ should be able to wake up the system from suspend. Charger Manager
+ saves and restores the alarm value and use the previously-defined
+ alarm if it is going to go off earlier than Charger Manager so that
+ Charger Manager does not interfere with previously-defined alarms.
+
+bool (*rtc_only_wakeup)(void);
+ : This callback should let CM know whether
+ the wakeup-from-suspend is caused only by the alarm of "rtc" in the
+ same struct. If there is any other wakeup source triggered the
+ wakeup, it should return false. If the "rtc" is the only wakeup
+ reason, it should return true.
+
+bool assume_timer_stops_in_suspend;
+ : if true, Charger Manager assumes that
+ the timer (CM uses jiffies as timer) stops during suspend. Then, CM
+ assumes that the suspend-duration is same as the alarm length.
+};
+
+3. How to setup suspend_again
+=============================
+Charger Manager provides a function "extern bool cm_suspend_again(void)".
+When cm_suspend_again is called, it monitors every battery. The suspend_ops
+callback of the system's platform_suspend_ops can call cm_suspend_again
+function to know whether Charger Manager wants to suspend again or not.
+If there are no other devices or tasks that want to use suspend_again
+feature, the platform_suspend_ops may directly refer to cm_suspend_again
+for its suspend_again callback.
+
+The cm_suspend_again() returns true (meaning "I want to suspend again")
+if the system was woken up by Charger Manager and the polling
+(in-suspend monitoring) results in "normal".
+
+4. Charger-Manager Data (struct charger_desc)
+=============================================
+For each battery charged independently from other batteries (if a series of
+batteries are charged by a single charger, they are counted as one independent
+battery), an instance of Charger Manager is attached to it.
+
+struct charger_desc {
+
+char *psy_name;
+ : The power-supply-class name of the battery. Default is
+ "battery" if psy_name is NULL. Users can access the psy entries
+ at "/sys/class/power_supply/[psy_name]/".
+
+enum polling_modes polling_mode;
+ : CM_POLL_DISABLE: do not poll this battery.
+ CM_POLL_ALWAYS: always poll this battery.
+ CM_POLL_EXTERNAL_POWER_ONLY: poll this battery if and only if
+ an external power source is attached.
+ CM_POLL_CHARGING_ONLY: poll this battery if and only if the
+ battery is being charged.
+
+unsigned int fullbatt_vchkdrop_ms;
+unsigned int fullbatt_vchkdrop_uV;
+ : If both have non-zero values, Charger Manager will check the
+ battery voltage drop fullbatt_vchkdrop_ms after the battery is fully
+ charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger
+ Manager will try to recharge the battery by disabling and enabling
+ chargers. Recharge with voltage drop condition only (without delay
+ condition) is needed to be implemented with hardware interrupts from
+ fuel gauges or charger devices/chips.
+
+unsigned int fullbatt_uV;
+ : If specified with a non-zero value, Charger Manager assumes
+ that the battery is full (capacity = 100) if the battery is not being
+ charged and the battery voltage is equal to or greater than
+ fullbatt_uV.
+
+unsigned int polling_interval_ms;
+ : Required polling interval in ms. Charger Manager will poll
+ this battery every polling_interval_ms or more frequently.
+
+enum data_source battery_present;
+ : CM_BATTERY_PRESENT: assume that the battery exists.
+ CM_NO_BATTERY: assume that the battery does not exists.
+ CM_FUEL_GAUGE: get battery presence information from fuel gauge.
+ CM_CHARGER_STAT: get battery presence from chargers.
+
+char **psy_charger_stat;
+ : An array ending with NULL that has power-supply-class names of
+ chargers. Each power-supply-class should provide "PRESENT" (if
+ battery_present is "CM_CHARGER_STAT"), "ONLINE" (shows whether an
+ external power source is attached or not), and "STATUS" (shows whether
+ the battery is {"FULL" or not FULL} or {"FULL", "Charging",
+ "Discharging", "NotCharging"}).
+
+int num_charger_regulators;
+struct regulator_bulk_data *charger_regulators;
+ : Regulators representing the chargers in the form for
+ regulator framework's bulk functions.
+
+char *psy_fuel_gauge;
+ : Power-supply-class name of the fuel gauge.
+
+int (*temperature_out_of_range)(int *mC);
+bool measure_battery_temp;
+ : This callback returns 0 if the temperature is safe for charging,
+ a positive number if it is too hot to charge, and a negative number
+ if it is too cold to charge. With the variable mC, the callback returns
+ the temperature in 1/1000 of centigrade.
+ The source of temperature can be battery or ambient one according to
+ the value of measure_battery_temp.
+};
+
+5. Notify Charger-Manager of charger events: cm_notify_event()
+=========================================================
+If there is an charger event is required to notify
+Charger Manager, a charger device driver that triggers the event can call
+cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager.
+In the function, psy is the charger driver's power_supply pointer, which is
+associated with Charger-Manager. The parameter "type"
+is the same as irq's type (enum cm_event_types). The event message "msg" is
+optional and is effective only if the event type is "UNDESCRIBED" or "OTHERS".
+
+6. Other Considerations
+=======================
+
+At the charger/battery-related events such as battery-pulled-out,
+charger-pulled-out, charger-inserted, DCIN-over/under-voltage, charger-stopped,
+and others critical to chargers, the system should be configured to wake up.
+At least the following should wake up the system from a suspend:
+a) charger-on/off b) external-power-in/out c) battery-in/out (while charging)
+
+It is usually accomplished by configuring the PMIC as a wakeup source.
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
new file mode 100644
index 000000000..d172bce0f
--- /dev/null
+++ b/Documentation/power/devices.txt
@@ -0,0 +1,697 @@
+Device Power Management
+
+Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
+Copyright (c) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+Most of the code in Linux is device drivers, so most of the Linux power
+management (PM) code is also driver-specific. Most drivers will do very
+little; others, especially for platforms with small batteries (like cell
+phones), will do a lot.
+
+This writeup gives an overview of how drivers interact with system-wide
+power management goals, emphasizing the models and interfaces that are
+shared by everything that hooks up to the driver model core. Read it as
+background for the domain-specific work you'd do with any specific driver.
+
+
+Two Models for Device Power Management
+======================================
+Drivers will use one or both of these models to put devices into low-power
+states:
+
+ System Sleep model:
+ Drivers can enter low-power states as part of entering system-wide
+ low-power states like "suspend" (also known as "suspend-to-RAM"), or
+ (mostly for systems with disks) "hibernation" (also known as
+ "suspend-to-disk").
+
+ This is something that device, bus, and class drivers collaborate on
+ by implementing various role-specific suspend and resume methods to
+ cleanly power down hardware and software subsystems, then reactivate
+ them without loss of data.
+
+ Some drivers can manage hardware wakeup events, which make the system
+ leave the low-power state. This feature may be enabled or disabled
+ using the relevant /sys/devices/.../power/wakeup file (for Ethernet
+ drivers the ioctl interface used by ethtool may also be used for this
+ purpose); enabling it may cost some power usage, but let the whole
+ system enter low-power states more often.
+
+ Runtime Power Management model:
+ Devices may also be put into low-power states while the system is
+ running, independently of other power management activity in principle.
+ However, devices are not generally independent of each other (for
+ example, a parent device cannot be suspended unless all of its child
+ devices have been suspended). Moreover, depending on the bus type the
+ device is on, it may be necessary to carry out some bus-specific
+ operations on the device for this purpose. Devices put into low power
+ states at run time may require special handling during system-wide power
+ transitions (suspend or hibernation).
+
+ For these reasons not only the device driver itself, but also the
+ appropriate subsystem (bus type, device type or device class) driver and
+ the PM core are involved in runtime power management. As in the system
+ sleep power management case, they need to collaborate by implementing
+ various role-specific suspend and resume methods, so that the hardware
+ is cleanly powered down and reactivated without data or service loss.
+
+There's not a lot to be said about those low-power states except that they are
+very system-specific, and often device-specific. Also, that if enough devices
+have been put into low-power states (at runtime), the effect may be very similar
+to entering some system-wide low-power state (system sleep) ... and that
+synergies exist, so that several drivers using runtime PM might put the system
+into a state where even deeper power saving options are available.
+
+Most suspended devices will have quiesced all I/O: no more DMA or IRQs (except
+for wakeup events), no more data read or written, and requests from upstream
+drivers are no longer accepted. A given bus or platform may have different
+requirements though.
+
+Examples of hardware wakeup events include an alarm from a real time clock,
+network wake-on-LAN packets, keyboard or mouse activity, and media insertion
+or removal (for PCMCIA, MMC/SD, USB, and so on).
+
+
+Interfaces for Entering System Sleep States
+===========================================
+There are programming interfaces provided for subsystems (bus type, device type,
+device class) and device drivers to allow them to participate in the power
+management of devices they are concerned with. These interfaces cover both
+system sleep and runtime power management.
+
+
+Device Power Management Operations
+----------------------------------
+Device power management operations, at the subsystem level as well as at the
+device driver level, are implemented by defining and populating objects of type
+struct dev_pm_ops:
+
+struct dev_pm_ops {
+ int (*prepare)(struct device *dev);
+ void (*complete)(struct device *dev);
+ int (*suspend)(struct device *dev);
+ int (*resume)(struct device *dev);
+ int (*freeze)(struct device *dev);
+ int (*thaw)(struct device *dev);
+ int (*poweroff)(struct device *dev);
+ int (*restore)(struct device *dev);
+ int (*suspend_late)(struct device *dev);
+ int (*resume_early)(struct device *dev);
+ int (*freeze_late)(struct device *dev);
+ int (*thaw_early)(struct device *dev);
+ int (*poweroff_late)(struct device *dev);
+ int (*restore_early)(struct device *dev);
+ int (*suspend_noirq)(struct device *dev);
+ int (*resume_noirq)(struct device *dev);
+ int (*freeze_noirq)(struct device *dev);
+ int (*thaw_noirq)(struct device *dev);
+ int (*poweroff_noirq)(struct device *dev);
+ int (*restore_noirq)(struct device *dev);
+ int (*runtime_suspend)(struct device *dev);
+ int (*runtime_resume)(struct device *dev);
+ int (*runtime_idle)(struct device *dev);
+};
+
+This structure is defined in include/linux/pm.h and the methods included in it
+are also described in that file. Their roles will be explained in what follows.
+For now, it should be sufficient to remember that the last three methods are
+specific to runtime power management while the remaining ones are used during
+system-wide power transitions.
+
+There also is a deprecated "old" or "legacy" interface for power management
+operations available at least for some subsystems. This approach does not use
+struct dev_pm_ops objects and it is suitable only for implementing system sleep
+power management methods. Therefore it is not described in this document, so
+please refer directly to the source code for more information about it.
+
+
+Subsystem-Level Methods
+-----------------------
+The core methods to suspend and resume devices reside in struct dev_pm_ops
+pointed to by the ops member of struct dev_pm_domain, or by the pm member of
+struct bus_type, struct device_type and struct class. They are mostly of
+interest to the people writing infrastructure for platforms and buses, like PCI
+or USB, or device type and device class drivers. They also are relevant to the
+writers of device drivers whose subsystems (PM domains, device types, device
+classes and bus types) don't provide all power management methods.
+
+Bus drivers implement these methods as appropriate for the hardware and the
+drivers using it; PCI works differently from USB, and so on. Not many people
+write subsystem-level drivers; most driver code is a "device driver" that builds
+on top of bus-specific framework code.
+
+For more information on these driver calls, see the description later;
+they are called in phases for every device, respecting the parent-child
+sequencing in the driver model tree.
+
+
+/sys/devices/.../power/wakeup files
+-----------------------------------
+All device objects in the driver model contain fields that control the handling
+of system wakeup events (hardware signals that can force the system out of a
+sleep state). These fields are initialized by bus or device driver code using
+device_set_wakeup_capable() and device_set_wakeup_enable(), defined in
+include/linux/pm_wakeup.h.
+
+The "power.can_wakeup" flag just records whether the device (and its driver) can
+physically support wakeup events. The device_set_wakeup_capable() routine
+affects this flag. The "power.wakeup" field is a pointer to an object of type
+struct wakeup_source used for controlling whether or not the device should use
+its system wakeup mechanism and for notifying the PM core of system wakeup
+events signaled by the device. This object is only present for wakeup-capable
+devices (i.e. devices whose "can_wakeup" flags are set) and is created (or
+removed) by device_set_wakeup_capable().
+
+Whether or not a device is capable of issuing wakeup events is a hardware
+matter, and the kernel is responsible for keeping track of it. By contrast,
+whether or not a wakeup-capable device should issue wakeup events is a policy
+decision, and it is managed by user space through a sysfs attribute: the
+"power/wakeup" file. User space can write the strings "enabled" or "disabled"
+to it to indicate whether or not, respectively, the device is supposed to signal
+system wakeup. This file is only present if the "power.wakeup" object exists
+for the given device and is created (or removed) along with that object, by
+device_set_wakeup_capable(). Reads from the file will return the corresponding
+string.
+
+The "power/wakeup" file is supposed to contain the "disabled" string initially
+for the majority of devices; the major exceptions are power buttons, keyboards,
+and Ethernet adapters whose WoL (wake-on-LAN) feature has been set up with
+ethtool. It should also default to "enabled" for devices that don't generate
+wakeup requests on their own but merely forward wakeup requests from one bus to
+another (like PCI Express ports).
+
+The device_may_wakeup() routine returns true only if the "power.wakeup" object
+exists and the corresponding "power/wakeup" file contains the string "enabled".
+This information is used by subsystems, like the PCI bus type code, to see
+whether or not to enable the devices' wakeup mechanisms. If device wakeup
+mechanisms are enabled or disabled directly by drivers, they also should use
+device_may_wakeup() to decide what to do during a system sleep transition.
+Device drivers, however, are not supposed to call device_set_wakeup_enable()
+directly in any case.
+
+It ought to be noted that system wakeup is conceptually different from "remote
+wakeup" used by runtime power management, although it may be supported by the
+same physical mechanism. Remote wakeup is a feature allowing devices in
+low-power states to trigger specific interrupts to signal conditions in which
+they should be put into the full-power state. Those interrupts may or may not
+be used to signal system wakeup events, depending on the hardware design. On
+some systems it is impossible to trigger them from system sleep states. In any
+case, remote wakeup should always be enabled for runtime power management for
+all devices and drivers that support it.
+
+/sys/devices/.../power/control files
+------------------------------------
+Each device in the driver model has a flag to control whether it is subject to
+runtime power management. This flag, called runtime_auto, is initialized by the
+bus type (or generally subsystem) code using pm_runtime_allow() or
+pm_runtime_forbid(); the default is to allow runtime power management.
+
+The setting can be adjusted by user space by writing either "on" or "auto" to
+the device's power/control sysfs file. Writing "auto" calls pm_runtime_allow(),
+setting the flag and allowing the device to be runtime power-managed by its
+driver. Writing "on" calls pm_runtime_forbid(), clearing the flag, returning
+the device to full power if it was in a low-power state, and preventing the
+device from being runtime power-managed. User space can check the current value
+of the runtime_auto flag by reading the file.
+
+The device's runtime_auto flag has no effect on the handling of system-wide
+power transitions. In particular, the device can (and in the majority of cases
+should and will) be put into a low-power state during a system-wide transition
+to a sleep state even though its runtime_auto flag is clear.
+
+For more information about the runtime power management framework, refer to
+Documentation/power/runtime_pm.txt.
+
+
+Calling Drivers to Enter and Leave System Sleep States
+======================================================
+When the system goes into a sleep state, each device's driver is asked to
+suspend the device by putting it into a state compatible with the target
+system state. That's usually some version of "off", but the details are
+system-specific. Also, wakeup-enabled devices will usually stay partly
+functional in order to wake the system.
+
+When the system leaves that low-power state, the device's driver is asked to
+resume it by returning it to full power. The suspend and resume operations
+always go together, and both are multi-phase operations.
+
+For simple drivers, suspend might quiesce the device using class code
+and then turn its hardware as "off" as possible during suspend_noirq. The
+matching resume calls would then completely reinitialize the hardware
+before reactivating its class I/O queues.
+
+More power-aware drivers might prepare the devices for triggering system wakeup
+events.
+
+
+Call Sequence Guarantees
+------------------------
+To ensure that bridges and similar links needing to talk to a device are
+available when the device is suspended or resumed, the device tree is
+walked in a bottom-up order to suspend devices. A top-down order is
+used to resume those devices.
+
+The ordering of the device tree is defined by the order in which devices
+get registered: a child can never be registered, probed or resumed before
+its parent; and can't be removed or suspended after that parent.
+
+The policy is that the device tree should match hardware bus topology.
+(Or at least the control bus, for devices which use multiple busses.)
+In particular, this means that a device registration may fail if the parent of
+the device is suspending (i.e. has been chosen by the PM core as the next
+device to suspend) or has already suspended, as well as after all of the other
+devices have been suspended. Device drivers must be prepared to cope with such
+situations.
+
+
+System Power Management Phases
+------------------------------
+Suspending or resuming the system is done in several phases. Different phases
+are used for freeze, standby, and memory sleep states ("suspend-to-RAM") and the
+hibernation state ("suspend-to-disk"). Each phase involves executing callbacks
+for every device before the next phase begins. Not all busses or classes
+support all these callbacks and not all drivers use all the callbacks. The
+various phases always run after tasks have been frozen and before they are
+unfrozen. Furthermore, the *_noirq phases run at a time when IRQ handlers have
+been disabled (except for those marked with the IRQF_NO_SUSPEND flag).
+
+All phases use PM domain, bus, type, class or driver callbacks (that is, methods
+defined in dev->pm_domain->ops, dev->bus->pm, dev->type->pm, dev->class->pm or
+dev->driver->pm). These callbacks are regarded by the PM core as mutually
+exclusive. Moreover, PM domain callbacks always take precedence over all of the
+other callbacks and, for example, type callbacks take precedence over bus, class
+and driver callbacks. To be precise, the following rules are used to determine
+which callback to execute in the given phase:
+
+ 1. If dev->pm_domain is present, the PM core will choose the callback
+ included in dev->pm_domain->ops for execution
+
+ 2. Otherwise, if both dev->type and dev->type->pm are present, the callback
+ included in dev->type->pm will be chosen for execution.
+
+ 3. Otherwise, if both dev->class and dev->class->pm are present, the
+ callback included in dev->class->pm will be chosen for execution.
+
+ 4. Otherwise, if both dev->bus and dev->bus->pm are present, the callback
+ included in dev->bus->pm will be chosen for execution.
+
+This allows PM domains and device types to override callbacks provided by bus
+types or device classes if necessary.
+
+The PM domain, type, class and bus callbacks may in turn invoke device- or
+driver-specific methods stored in dev->driver->pm, but they don't have to do
+that.
+
+If the subsystem callback chosen for execution is not present, the PM core will
+execute the corresponding method from dev->driver->pm instead if there is one.
+
+
+Entering System Suspend
+-----------------------
+When the system goes into the freeze, standby or memory sleep state,
+the phases are:
+
+ prepare, suspend, suspend_late, suspend_noirq.
+
+ 1. The prepare phase is meant to prevent races by preventing new devices
+ from being registered; the PM core would never know that all the
+ children of a device had been suspended if new children could be
+ registered at will. (By contrast, devices may be unregistered at any
+ time.) Unlike the other suspend-related phases, during the prepare
+ phase the device tree is traversed top-down.
+
+ After the prepare callback method returns, no new children may be
+ registered below the device. The method may also prepare the device or
+ driver in some way for the upcoming system power transition, but it
+ should not put the device into a low-power state.
+
+ For devices supporting runtime power management, the return value of the
+ prepare callback can be used to indicate to the PM core that it may
+ safely leave the device in runtime suspend (if runtime-suspended
+ already), provided that all of the device's descendants are also left in
+ runtime suspend. Namely, if the prepare callback returns a positive
+ number and that happens for all of the descendants of the device too,
+ and all of them (including the device itself) are runtime-suspended, the
+ PM core will skip the suspend, suspend_late and suspend_noirq suspend
+ phases as well as the resume_noirq, resume_early and resume phases of
+ the following system resume for all of these devices. In that case,
+ the complete callback will be called directly after the prepare callback
+ and is entirely responsible for bringing the device back to the
+ functional state as appropriate.
+
+ 2. The suspend methods should quiesce the device to stop it from performing
+ I/O. They also may save the device registers and put it into the
+ appropriate low-power state, depending on the bus type the device is on,
+ and they may enable wakeup events.
+
+ 3 For a number of devices it is convenient to split suspend into the
+ "quiesce device" and "save device state" phases, in which cases
+ suspend_late is meant to do the latter. It is always executed after
+ runtime power management has been disabled for all devices.
+
+ 4. The suspend_noirq phase occurs after IRQ handlers have been disabled,
+ which means that the driver's interrupt handler will not be called while
+ the callback method is running. The methods should save the values of
+ the device's registers that weren't saved previously and finally put the
+ device into the appropriate low-power state.
+
+ The majority of subsystems and device drivers need not implement this
+ callback. However, bus types allowing devices to share interrupt
+ vectors, like PCI, generally need it; otherwise a driver might encounter
+ an error during the suspend phase by fielding a shared interrupt
+ generated by some other device after its own device had been set to low
+ power.
+
+At the end of these phases, drivers should have stopped all I/O transactions
+(DMA, IRQs), saved enough state that they can re-initialize or restore previous
+state (as needed by the hardware), and placed the device into a low-power state.
+On many platforms they will gate off one or more clock sources; sometimes they
+will also switch off power supplies or reduce voltages. (Drivers supporting
+runtime PM may already have performed some or all of these steps.)
+
+If device_may_wakeup(dev) returns true, the device should be prepared for
+generating hardware wakeup signals to trigger a system wakeup event when the
+system is in the sleep state. For example, enable_irq_wake() might identify
+GPIO signals hooked up to a switch or other external hardware, and
+pci_enable_wake() does something similar for the PCI PME signal.
+
+If any of these callbacks returns an error, the system won't enter the desired
+low-power state. Instead the PM core will unwind its actions by resuming all
+the devices that were suspended.
+
+
+Leaving System Suspend
+----------------------
+When resuming from freeze, standby or memory sleep, the phases are:
+
+ resume_noirq, resume_early, resume, complete.
+
+ 1. The resume_noirq callback methods should perform any actions needed
+ before the driver's interrupt handlers are invoked. This generally
+ means undoing the actions of the suspend_noirq phase. If the bus type
+ permits devices to share interrupt vectors, like PCI, the method should
+ bring the device and its driver into a state in which the driver can
+ recognize if the device is the source of incoming interrupts, if any,
+ and handle them correctly.
+
+ For example, the PCI bus type's ->pm.resume_noirq() puts the device into
+ the full-power state (D0 in the PCI terminology) and restores the
+ standard configuration registers of the device. Then it calls the
+ device driver's ->pm.resume_noirq() method to perform device-specific
+ actions.
+
+ 2. The resume_early methods should prepare devices for the execution of
+ the resume methods. This generally involves undoing the actions of the
+ preceding suspend_late phase.
+
+ 3 The resume methods should bring the device back to its operating
+ state, so that it can perform normal I/O. This generally involves
+ undoing the actions of the suspend phase.
+
+ 4. The complete phase should undo the actions of the prepare phase. Note,
+ however, that new children may be registered below the device as soon as
+ the resume callbacks occur; it's not necessary to wait until the
+ complete phase.
+
+ Moreover, if the preceding prepare callback returned a positive number,
+ the device may have been left in runtime suspend throughout the whole
+ system suspend and resume (the suspend, suspend_late, suspend_noirq
+ phases of system suspend and the resume_noirq, resume_early, resume
+ phases of system resume may have been skipped for it). In that case,
+ the complete callback is entirely responsible for bringing the device
+ back to the functional state after system suspend if necessary. [For
+ example, it may need to queue up a runtime resume request for the device
+ for this purpose.] To check if that is the case, the complete callback
+ can consult the device's power.direct_complete flag. Namely, if that
+ flag is set when the complete callback is being run, it has been called
+ directly after the preceding prepare and special action may be required
+ to make the device work correctly afterward.
+
+At the end of these phases, drivers should be as functional as they were before
+suspending: I/O can be performed using DMA and IRQs, and the relevant clocks are
+gated on.
+
+However, the details here may again be platform-specific. For example,
+some systems support multiple "run" states, and the mode in effect at
+the end of resume might not be the one which preceded suspension.
+That means availability of certain clocks or power supplies changed,
+which could easily affect how a driver works.
+
+Drivers need to be able to handle hardware which has been reset since the
+suspend methods were called, for example by complete reinitialization.
+This may be the hardest part, and the one most protected by NDA'd documents
+and chip errata. It's simplest if the hardware state hasn't changed since
+the suspend was carried out, but that can't be guaranteed (in fact, it usually
+is not the case).
+
+Drivers must also be prepared to notice that the device has been removed
+while the system was powered down, whenever that's physically possible.
+PCMCIA, MMC, USB, Firewire, SCSI, and even IDE are common examples of busses
+where common Linux platforms will see such removal. Details of how drivers
+will notice and handle such removals are currently bus-specific, and often
+involve a separate thread.
+
+These callbacks may return an error value, but the PM core will ignore such
+errors since there's nothing it can do about them other than printing them in
+the system log.
+
+
+Entering Hibernation
+--------------------
+Hibernating the system is more complicated than putting it into the other
+sleep states, because it involves creating and saving a system image.
+Therefore there are more phases for hibernation, with a different set of
+callbacks. These phases always run after tasks have been frozen and memory has
+been freed.
+
+The general procedure for hibernation is to quiesce all devices (freeze), create
+an image of the system memory while everything is stable, reactivate all
+devices (thaw), write the image to permanent storage, and finally shut down the
+system (poweroff). The phases used to accomplish this are:
+
+ prepare, freeze, freeze_late, freeze_noirq, thaw_noirq, thaw_early,
+ thaw, complete, prepare, poweroff, poweroff_late, poweroff_noirq
+
+ 1. The prepare phase is discussed in the "Entering System Suspend" section
+ above.
+
+ 2. The freeze methods should quiesce the device so that it doesn't generate
+ IRQs or DMA, and they may need to save the values of device registers.
+ However the device does not have to be put in a low-power state, and to
+ save time it's best not to do so. Also, the device should not be
+ prepared to generate wakeup events.
+
+ 3. The freeze_late phase is analogous to the suspend_late phase described
+ above, except that the device should not be put in a low-power state and
+ should not be allowed to generate wakeup events by it.
+
+ 4. The freeze_noirq phase is analogous to the suspend_noirq phase discussed
+ above, except again that the device should not be put in a low-power
+ state and should not be allowed to generate wakeup events.
+
+At this point the system image is created. All devices should be inactive and
+the contents of memory should remain undisturbed while this happens, so that the
+image forms an atomic snapshot of the system state.
+
+ 5. The thaw_noirq phase is analogous to the resume_noirq phase discussed
+ above. The main difference is that its methods can assume the device is
+ in the same state as at the end of the freeze_noirq phase.
+
+ 6. The thaw_early phase is analogous to the resume_early phase described
+ above. Its methods should undo the actions of the preceding
+ freeze_late, if necessary.
+
+ 7. The thaw phase is analogous to the resume phase discussed above. Its
+ methods should bring the device back to an operating state, so that it
+ can be used for saving the image if necessary.
+
+ 8. The complete phase is discussed in the "Leaving System Suspend" section
+ above.
+
+At this point the system image is saved, and the devices then need to be
+prepared for the upcoming system shutdown. This is much like suspending them
+before putting the system into the freeze, standby or memory sleep state,
+and the phases are similar.
+
+ 9. The prepare phase is discussed above.
+
+ 10. The poweroff phase is analogous to the suspend phase.
+
+ 11. The poweroff_late phase is analogous to the suspend_late phase.
+
+ 12. The poweroff_noirq phase is analogous to the suspend_noirq phase.
+
+The poweroff, poweroff_late and poweroff_noirq callbacks should do essentially
+the same things as the suspend, suspend_late and suspend_noirq callbacks,
+respectively. The only notable difference is that they need not store the
+device register values, because the registers should already have been stored
+during the freeze, freeze_late or freeze_noirq phases.
+
+
+Leaving Hibernation
+-------------------
+Resuming from hibernation is, again, more complicated than resuming from a sleep
+state in which the contents of main memory are preserved, because it requires
+a system image to be loaded into memory and the pre-hibernation memory contents
+to be restored before control can be passed back to the image kernel.
+
+Although in principle, the image might be loaded into memory and the
+pre-hibernation memory contents restored by the boot loader, in practice this
+can't be done because boot loaders aren't smart enough and there is no
+established protocol for passing the necessary information. So instead, the
+boot loader loads a fresh instance of the kernel, called the boot kernel, into
+memory and passes control to it in the usual way. Then the boot kernel reads
+the system image, restores the pre-hibernation memory contents, and passes
+control to the image kernel. Thus two different kernels are involved in
+resuming from hibernation. In fact, the boot kernel may be completely different
+from the image kernel: a different configuration and even a different version.
+This has important consequences for device drivers and their subsystems.
+
+To be able to load the system image into memory, the boot kernel needs to
+include at least a subset of device drivers allowing it to access the storage
+medium containing the image, although it doesn't need to include all of the
+drivers present in the image kernel. After the image has been loaded, the
+devices managed by the boot kernel need to be prepared for passing control back
+to the image kernel. This is very similar to the initial steps involved in
+creating a system image, and it is accomplished in the same way, using prepare,
+freeze, and freeze_noirq phases. However the devices affected by these phases
+are only those having drivers in the boot kernel; other devices will still be in
+whatever state the boot loader left them.
+
+Should the restoration of the pre-hibernation memory contents fail, the boot
+kernel would go through the "thawing" procedure described above, using the
+thaw_noirq, thaw, and complete phases, and then continue running normally. This
+happens only rarely. Most often the pre-hibernation memory contents are
+restored successfully and control is passed to the image kernel, which then
+becomes responsible for bringing the system back to the working state.
+
+To achieve this, the image kernel must restore the devices' pre-hibernation
+functionality. The operation is much like waking up from the memory sleep
+state, although it involves different phases:
+
+ restore_noirq, restore_early, restore, complete
+
+ 1. The restore_noirq phase is analogous to the resume_noirq phase.
+
+ 2. The restore_early phase is analogous to the resume_early phase.
+
+ 3. The restore phase is analogous to the resume phase.
+
+ 4. The complete phase is discussed above.
+
+The main difference from resume[_early|_noirq] is that restore[_early|_noirq]
+must assume the device has been accessed and reconfigured by the boot loader or
+the boot kernel. Consequently the state of the device may be different from the
+state remembered from the freeze, freeze_late and freeze_noirq phases. The
+device may even need to be reset and completely re-initialized. In many cases
+this difference doesn't matter, so the resume[_early|_noirq] and
+restore[_early|_norq] method pointers can be set to the same routines.
+Nevertheless, different callback pointers are used in case there is a situation
+where it actually does matter.
+
+
+Device Power Management Domains
+-------------------------------
+Sometimes devices share reference clocks or other power resources. In those
+cases it generally is not possible to put devices into low-power states
+individually. Instead, a set of devices sharing a power resource can be put
+into a low-power state together at the same time by turning off the shared
+power resource. Of course, they also need to be put into the full-power state
+together, by turning the shared power resource on. A set of devices with this
+property is often referred to as a power domain.
+
+Support for power domains is provided through the pm_domain field of struct
+device. This field is a pointer to an object of type struct dev_pm_domain,
+defined in include/linux/pm.h, providing a set of power management callbacks
+analogous to the subsystem-level and device driver callbacks that are executed
+for the given device during all power transitions, instead of the respective
+subsystem-level callbacks. Specifically, if a device's pm_domain pointer is
+not NULL, the ->suspend() callback from the object pointed to by it will be
+executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
+analogously for all of the remaining callbacks. In other words, power
+management domain callbacks, if defined for the given device, always take
+precedence over the callbacks provided by the device's subsystem (e.g. bus
+type).
+
+The support for device power management domains is only relevant to platforms
+needing to use the same device driver power management callbacks in many
+different power domain configurations and wanting to avoid incorporating the
+support for power domains into subsystem-level callbacks, for example by
+modifying the platform bus type. Other platforms need not implement it or take
+it into account in any way.
+
+
+Device Low Power (suspend) States
+---------------------------------
+Device low-power states aren't standard. One device might only handle
+"on" and "off", while another might support a dozen different versions of
+"on" (how many engines are active?), plus a state that gets back to "on"
+faster than from a full "off".
+
+Some busses define rules about what different suspend states mean. PCI
+gives one example: after the suspend sequence completes, a non-legacy
+PCI device may not perform DMA or issue IRQs, and any wakeup events it
+issues would be issued through the PME# bus signal. Plus, there are
+several PCI-standard device states, some of which are optional.
+
+In contrast, integrated system-on-chip processors often use IRQs as the
+wakeup event sources (so drivers would call enable_irq_wake) and might
+be able to treat DMA completion as a wakeup event (sometimes DMA can stay
+active too, it'd only be the CPU and some peripherals that sleep).
+
+Some details here may be platform-specific. Systems may have devices that
+can be fully active in certain sleep states, such as an LCD display that's
+refreshed using DMA while most of the system is sleeping lightly ... and
+its frame buffer might even be updated by a DSP or other non-Linux CPU while
+the Linux control processor stays idle.
+
+Moreover, the specific actions taken may depend on the target system state.
+One target system state might allow a given device to be very operational;
+another might require a hard shut down with re-initialization on resume.
+And two different target systems might use the same device in different
+ways; the aforementioned LCD might be active in one product's "standby",
+but a different product using the same SOC might work differently.
+
+
+Power Management Notifiers
+--------------------------
+There are some operations that cannot be carried out by the power management
+callbacks discussed above, because the callbacks occur too late or too early.
+To handle these cases, subsystems and device drivers may register power
+management notifiers that are called before tasks are frozen and after they have
+been thawed. Generally speaking, the PM notifiers are suitable for performing
+actions that either require user space to be available, or at least won't
+interfere with user space.
+
+For details refer to Documentation/power/notifiers.txt.
+
+
+Runtime Power Management
+========================
+Many devices are able to dynamically power down while the system is still
+running. This feature is useful for devices that are not being used, and
+can offer significant power savings on a running system. These devices
+often support a range of runtime power states, which might use names such
+as "off", "sleep", "idle", "active", and so on. Those states will in some
+cases (like PCI) be partially constrained by the bus the device uses, and will
+usually include hardware states that are also used in system sleep states.
+
+A system-wide power transition can be started while some devices are in low
+power states due to runtime power management. The system sleep PM callbacks
+should recognize such situations and react to them appropriately, but the
+necessary actions are subsystem-specific.
+
+In some cases the decision may be made at the subsystem level while in other
+cases the device driver may be left to decide. In some cases it may be
+desirable to leave a suspended device in that state during a system-wide power
+transition, but in other cases the device must be put back into the full-power
+state temporarily, for example so that its system wakeup capability can be
+disabled. This all depends on the hardware and the design of the subsystem and
+device driver in question.
+
+During system-wide resume from a sleep state it's easiest to put devices into
+the full-power state, as explained in Documentation/power/runtime_pm.txt. Refer
+to that document for more information regarding this particular issue as well as
+for information on the device runtime power management framework in general.
diff --git a/Documentation/power/drivers-testing.txt b/Documentation/power/drivers-testing.txt
new file mode 100644
index 000000000..638afdf4d
--- /dev/null
+++ b/Documentation/power/drivers-testing.txt
@@ -0,0 +1,46 @@
+Testing suspend and resume support in device drivers
+ (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+1. Preparing the test system
+
+Unfortunately, to effectively test the support for the system-wide suspend and
+resume transitions in a driver, it is necessary to suspend and resume a fully
+functional system with this driver loaded. Moreover, that should be done
+several times, preferably several times in a row, and separately for hibernation
+(aka suspend to disk or STD) and suspend to RAM (STR), because each of these
+cases involves slightly different operations and different interactions with
+the machine's BIOS.
+
+Of course, for this purpose the test system has to be known to suspend and
+resume without the driver being tested. Thus, if possible, you should first
+resolve all suspend/resume-related problems in the test system before you start
+testing the new driver. Please see Documentation/power/basic-pm-debugging.txt
+for more information about the debugging of suspend/resume functionality.
+
+2. Testing the driver
+
+Once you have resolved the suspend/resume-related problems with your test system
+without the new driver, you are ready to test it:
+
+a) Build the driver as a module, load it and try the test modes of hibernation
+ (see: Documentation/power/basic-pm-debugging.txt, 1).
+
+b) Load the driver and attempt to hibernate in the "reboot", "shutdown" and
+ "platform" modes (see: Documentation/power/basic-pm-debugging.txt, 1).
+
+c) Compile the driver directly into the kernel and try the test modes of
+ hibernation.
+
+d) Attempt to hibernate with the driver compiled directly into the kernel
+ in the "reboot", "shutdown" and "platform" modes.
+
+e) Try the test modes of suspend (see: Documentation/power/basic-pm-debugging.txt,
+ 2). [As far as the STR tests are concerned, it should not matter whether or
+ not the driver is built as a module.]
+
+f) Attempt to suspend to RAM using the s2ram tool with the driver loaded
+ (see: Documentation/power/basic-pm-debugging.txt, 2).
+
+Each of the above tests should be repeated several times and the STD tests
+should be mixed with the STR tests. If any of them fails, the driver cannot be
+regarded as suspend/resume-safe.
diff --git a/Documentation/power/freezing-of-tasks.txt b/Documentation/power/freezing-of-tasks.txt
new file mode 100644
index 000000000..85894d83b
--- /dev/null
+++ b/Documentation/power/freezing-of-tasks.txt
@@ -0,0 +1,230 @@
+Freezing of tasks
+ (C) 2007 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+I. What is the freezing of tasks?
+
+The freezing of tasks is a mechanism by which user space processes and some
+kernel threads are controlled during hibernation or system-wide suspend (on some
+architectures).
+
+II. How does it work?
+
+There are three per-task flags used for that, PF_NOFREEZE, PF_FROZEN
+and PF_FREEZER_SKIP (the last one is auxiliary). The tasks that have
+PF_NOFREEZE unset (all user space processes and some kernel threads) are
+regarded as 'freezable' and treated in a special way before the system enters a
+suspend state as well as before a hibernation image is created (in what follows
+we only consider hibernation, but the description also applies to suspend).
+
+Namely, as the first step of the hibernation procedure the function
+freeze_processes() (defined in kernel/power/process.c) is called. A system-wide
+variable system_freezing_cnt (as opposed to a per-task flag) is used to indicate
+whether the system is to undergo a freezing operation. And freeze_processes()
+sets this variable. After this, it executes try_to_freeze_tasks() that sends a
+fake signal to all user space processes, and wakes up all the kernel threads.
+All freezable tasks must react to that by calling try_to_freeze(), which
+results in a call to __refrigerator() (defined in kernel/freezer.c), which sets
+the task's PF_FROZEN flag, changes its state to TASK_UNINTERRUPTIBLE and makes
+it loop until PF_FROZEN is cleared for it. Then, we say that the task is
+'frozen' and therefore the set of functions handling this mechanism is referred
+to as 'the freezer' (these functions are defined in kernel/power/process.c,
+kernel/freezer.c & include/linux/freezer.h). User space processes are generally
+frozen before kernel threads.
+
+__refrigerator() must not be called directly. Instead, use the
+try_to_freeze() function (defined in include/linux/freezer.h), that checks
+if the task is to be frozen and makes the task enter __refrigerator().
+
+For user space processes try_to_freeze() is called automatically from the
+signal-handling code, but the freezable kernel threads need to call it
+explicitly in suitable places or use the wait_event_freezable() or
+wait_event_freezable_timeout() macros (defined in include/linux/freezer.h)
+that combine interruptible sleep with checking if the task is to be frozen and
+calling try_to_freeze(). The main loop of a freezable kernel thread may look
+like the following one:
+
+ set_freezable();
+ do {
+ hub_events();
+ wait_event_freezable(khubd_wait,
+ !list_empty(&hub_event_list) ||
+ kthread_should_stop());
+ } while (!kthread_should_stop() || !list_empty(&hub_event_list));
+
+(from drivers/usb/core/hub.c::hub_thread()).
+
+If a freezable kernel thread fails to call try_to_freeze() after the freezer has
+initiated a freezing operation, the freezing of tasks will fail and the entire
+hibernation operation will be cancelled. For this reason, freezable kernel
+threads must call try_to_freeze() somewhere or use one of the
+wait_event_freezable() and wait_event_freezable_timeout() macros.
+
+After the system memory state has been restored from a hibernation image and
+devices have been reinitialized, the function thaw_processes() is called in
+order to clear the PF_FROZEN flag for each frozen task. Then, the tasks that
+have been frozen leave __refrigerator() and continue running.
+
+
+Rationale behind the functions dealing with freezing and thawing of tasks:
+-------------------------------------------------------------------------
+
+freeze_processes():
+ - freezes only userspace tasks
+
+freeze_kernel_threads():
+ - freezes all tasks (including kernel threads) because we can't freeze
+ kernel threads without freezing userspace tasks
+
+thaw_kernel_threads():
+ - thaws only kernel threads; this is particularly useful if we need to do
+ anything special in between thawing of kernel threads and thawing of
+ userspace tasks, or if we want to postpone the thawing of userspace tasks
+
+thaw_processes():
+ - thaws all tasks (including kernel threads) because we can't thaw userspace
+ tasks without thawing kernel threads
+
+
+III. Which kernel threads are freezable?
+
+Kernel threads are not freezable by default. However, a kernel thread may clear
+PF_NOFREEZE for itself by calling set_freezable() (the resetting of PF_NOFREEZE
+directly is not allowed). From this point it is regarded as freezable
+and must call try_to_freeze() in a suitable place.
+
+IV. Why do we do that?
+
+Generally speaking, there is a couple of reasons to use the freezing of tasks:
+
+1. The principal reason is to prevent filesystems from being damaged after
+hibernation. At the moment we have no simple means of checkpointing
+filesystems, so if there are any modifications made to filesystem data and/or
+metadata on disks, we cannot bring them back to the state from before the
+modifications. At the same time each hibernation image contains some
+filesystem-related information that must be consistent with the state of the
+on-disk data and metadata after the system memory state has been restored from
+the image (otherwise the filesystems will be damaged in a nasty way, usually
+making them almost impossible to repair). We therefore freeze tasks that might
+cause the on-disk filesystems' data and metadata to be modified after the
+hibernation image has been created and before the system is finally powered off.
+The majority of these are user space processes, but if any of the kernel threads
+may cause something like this to happen, they have to be freezable.
+
+2. Next, to create the hibernation image we need to free a sufficient amount of
+memory (approximately 50% of available RAM) and we need to do that before
+devices are deactivated, because we generally need them for swapping out. Then,
+after the memory for the image has been freed, we don't want tasks to allocate
+additional memory and we prevent them from doing that by freezing them earlier.
+[Of course, this also means that device drivers should not allocate substantial
+amounts of memory from their .suspend() callbacks before hibernation, but this
+is a separate issue.]
+
+3. The third reason is to prevent user space processes and some kernel threads
+from interfering with the suspending and resuming of devices. A user space
+process running on a second CPU while we are suspending devices may, for
+example, be troublesome and without the freezing of tasks we would need some
+safeguards against race conditions that might occur in such a case.
+
+Although Linus Torvalds doesn't like the freezing of tasks, he said this in one
+of the discussions on LKML (http://lkml.org/lkml/2007/4/27/608):
+
+"RJW:> Why we freeze tasks at all or why we freeze kernel threads?
+
+Linus: In many ways, 'at all'.
+
+I _do_ realize the IO request queue issues, and that we cannot actually do
+s2ram with some devices in the middle of a DMA. So we want to be able to
+avoid *that*, there's no question about that. And I suspect that stopping
+user threads and then waiting for a sync is practically one of the easier
+ways to do so.
+
+So in practice, the 'at all' may become a 'why freeze kernel threads?' and
+freezing user threads I don't find really objectionable."
+
+Still, there are kernel threads that may want to be freezable. For example, if
+a kernel thread that belongs to a device driver accesses the device directly, it
+in principle needs to know when the device is suspended, so that it doesn't try
+to access it at that time. However, if the kernel thread is freezable, it will
+be frozen before the driver's .suspend() callback is executed and it will be
+thawed after the driver's .resume() callback has run, so it won't be accessing
+the device while it's suspended.
+
+4. Another reason for freezing tasks is to prevent user space processes from
+realizing that hibernation (or suspend) operation takes place. Ideally, user
+space processes should not notice that such a system-wide operation has occurred
+and should continue running without any problems after the restore (or resume
+from suspend). Unfortunately, in the most general case this is quite difficult
+to achieve without the freezing of tasks. Consider, for example, a process
+that depends on all CPUs being online while it's running. Since we need to
+disable nonboot CPUs during the hibernation, if this process is not frozen, it
+may notice that the number of CPUs has changed and may start to work incorrectly
+because of that.
+
+V. Are there any problems related to the freezing of tasks?
+
+Yes, there are.
+
+First of all, the freezing of kernel threads may be tricky if they depend one
+on another. For example, if kernel thread A waits for a completion (in the
+TASK_UNINTERRUPTIBLE state) that needs to be done by freezable kernel thread B
+and B is frozen in the meantime, then A will be blocked until B is thawed, which
+may be undesirable. That's why kernel threads are not freezable by default.
+
+Second, there are the following two problems related to the freezing of user
+space processes:
+1. Putting processes into an uninterruptible sleep distorts the load average.
+2. Now that we have FUSE, plus the framework for doing device drivers in
+userspace, it gets even more complicated because some userspace processes are
+now doing the sorts of things that kernel threads do
+(https://lists.linux-foundation.org/pipermail/linux-pm/2007-May/012309.html).
+
+The problem 1. seems to be fixable, although it hasn't been fixed so far. The
+other one is more serious, but it seems that we can work around it by using
+hibernation (and suspend) notifiers (in that case, though, we won't be able to
+avoid the realization by the user space processes that the hibernation is taking
+place).
+
+There are also problems that the freezing of tasks tends to expose, although
+they are not directly related to it. For example, if request_firmware() is
+called from a device driver's .resume() routine, it will timeout and eventually
+fail, because the user land process that should respond to the request is frozen
+at this point. So, seemingly, the failure is due to the freezing of tasks.
+Suppose, however, that the firmware file is located on a filesystem accessible
+only through another device that hasn't been resumed yet. In that case,
+request_firmware() will fail regardless of whether or not the freezing of tasks
+is used. Consequently, the problem is not really related to the freezing of
+tasks, since it generally exists anyway.
+
+A driver must have all firmwares it may need in RAM before suspend() is called.
+If keeping them is not practical, for example due to their size, they must be
+requested early enough using the suspend notifier API described in notifiers.txt.
+
+VI. Are there any precautions to be taken to prevent freezing failures?
+
+Yes, there are.
+
+First of all, grabbing the 'pm_mutex' lock to mutually exclude a piece of code
+from system-wide sleep such as suspend/hibernation is not encouraged.
+If possible, that piece of code must instead hook onto the suspend/hibernation
+notifiers to achieve mutual exclusion. Look at the CPU-Hotplug code
+(kernel/cpu.c) for an example.
+
+However, if that is not feasible, and grabbing 'pm_mutex' is deemed necessary,
+it is strongly discouraged to directly call mutex_[un]lock(&pm_mutex) since
+that could lead to freezing failures, because if the suspend/hibernate code
+successfully acquired the 'pm_mutex' lock, and hence that other entity failed
+to acquire the lock, then that task would get blocked in TASK_UNINTERRUPTIBLE
+state. As a consequence, the freezer would not be able to freeze that task,
+leading to freezing failure.
+
+However, the [un]lock_system_sleep() APIs are safe to use in this scenario,
+since they ask the freezer to skip freezing this task, since it is anyway
+"frozen enough" as it is blocked on 'pm_mutex', which will be released
+only after the entire suspend/hibernation sequence is complete.
+So, to summarize, use [un]lock_system_sleep() instead of directly using
+mutex_[un]lock(&pm_mutex). That would prevent freezing failures.
+
+V. Miscellaneous
+/sys/power/pm_freeze_timeout controls how long it will cost at most to freeze
+all user space processes or all freezable kernel threads, in unit of millisecond.
+The default value is 20000, with range of unsigned integer.
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
new file mode 100644
index 000000000..f1f0f59a7
--- /dev/null
+++ b/Documentation/power/interface.txt
@@ -0,0 +1,75 @@
+Power Management Interface
+
+
+The power management subsystem provides a unified sysfs interface to
+userspace, regardless of what architecture or platform one is
+running. The interface exists in /sys/power/ directory (assuming sysfs
+is mounted at /sys).
+
+/sys/power/state controls system power state. Reading from this file
+returns what states are supported, which is hard-coded to 'freeze',
+'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk'
+(Suspend-to-Disk).
+
+Writing to this file one of those strings causes the system to
+transition into that state. Please see the file
+Documentation/power/states.txt for a description of each of those
+states.
+
+
+/sys/power/disk controls the operating mode of the suspend-to-disk
+mechanism. Suspend-to-disk can be handled in several ways. We have a
+few options for putting the system to sleep - using the platform driver
+(e.g. ACPI or other suspend_ops), powering off the system or rebooting the
+system (for testing).
+
+Additionally, /sys/power/disk can be used to turn on one of the two testing
+modes of the suspend-to-disk mechanism: 'testproc' or 'test'. If the
+suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
+/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
+tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs. If it is
+in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
+to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
+for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then,
+we are able to look in the log messages and work out, for example, which code
+is being slow and which device drivers are misbehaving.
+
+Reading from this file will display all supported modes and the currently
+selected one in brackets, for example
+
+ [shutdown] reboot test testproc
+
+Writing to this file will accept one of
+
+ 'platform' (only if the platform supports it)
+ 'shutdown'
+ 'reboot'
+ 'testproc'
+ 'test'
+
+/sys/power/image_size controls the size of the image created by
+the suspend-to-disk mechanism. It can be written a string
+representing a non-negative integer that will be used as an upper
+limit of the image size, in bytes. The suspend-to-disk mechanism will
+do its best to ensure the image size will not exceed that number. However,
+if this turns out to be impossible, it will try to suspend anyway using the
+smallest image possible. In particular, if "0" is written to this file, the
+suspend image will be as small as possible.
+
+Reading from this file will display the current image size limit, which
+is set to 2/5 of available RAM by default.
+
+/sys/power/pm_trace controls the code which saves the last PM event point in
+the RTC across reboots, so that you can debug a machine that just hangs
+during suspend (or more commonly, during resume). Namely, the RTC is only
+used to save the last PM event point if this file contains '1'. Initially it
+contains '0' which may be changed to '1' by writing a string representing a
+nonzero integer into it.
+
+To use this debugging feature you should attempt to suspend the machine, then
+reboot it and run
+
+ dmesg -s 1000000 | grep 'hash matches'
+
+CAUTION: Using it will cause your machine's real-time (CMOS) clock to be
+set to a random invalid time after a resume.
diff --git a/Documentation/power/notifiers.txt b/Documentation/power/notifiers.txt
new file mode 100644
index 000000000..a81fa2543
--- /dev/null
+++ b/Documentation/power/notifiers.txt
@@ -0,0 +1,55 @@
+Suspend notifiers
+ (C) 2007-2011 Rafael J. Wysocki <rjw@sisk.pl>, GPL
+
+There are some operations that subsystems or drivers may want to carry out
+before hibernation/suspend or after restore/resume, but they require the system
+to be fully functional, so the drivers' and subsystems' .suspend() and .resume()
+or even .prepare() and .complete() callbacks are not suitable for this purpose.
+For example, device drivers may want to upload firmware to their devices after
+resume/restore, but they cannot do it by calling request_firmware() from their
+.resume() or .complete() routines (user land processes are frozen at these
+points). The solution may be to load the firmware into memory before processes
+are frozen and upload it from there in the .resume() routine.
+A suspend/hibernation notifier may be used for this purpose.
+
+The subsystems or drivers having such needs can register suspend notifiers that
+will be called upon the following events by the PM core:
+
+PM_HIBERNATION_PREPARE The system is going to hibernate, tasks will be frozen
+ immediately. This is different from PM_SUSPEND_PREPARE
+ below because here we do additional work between notifiers
+ and drivers freezing.
+
+PM_POST_HIBERNATION The system memory state has been restored from a
+ hibernation image or an error occurred during
+ hibernation. Device drivers' restore callbacks have
+ been executed and tasks have been thawed.
+
+PM_RESTORE_PREPARE The system is going to restore a hibernation image.
+ If all goes well, the restored kernel will issue a
+ PM_POST_HIBERNATION notification.
+
+PM_POST_RESTORE An error occurred during restore from hibernation.
+ Device drivers' restore callbacks have been executed
+ and tasks have been thawed.
+
+PM_SUSPEND_PREPARE The system is preparing for suspend.
+
+PM_POST_SUSPEND The system has just resumed or an error occurred during
+ suspend. Device drivers' resume callbacks have been
+ executed and tasks have been thawed.
+
+It is generally assumed that whatever the notifiers do for
+PM_HIBERNATION_PREPARE, should be undone for PM_POST_HIBERNATION. Analogously,
+operations performed for PM_SUSPEND_PREPARE should be reversed for
+PM_POST_SUSPEND. Additionally, all of the notifiers are called for
+PM_POST_HIBERNATION if one of them fails for PM_HIBERNATION_PREPARE, and
+all of the notifiers are called for PM_POST_SUSPEND if one of them fails for
+PM_SUSPEND_PREPARE.
+
+The hibernation and suspend notifiers are called with pm_mutex held. They are
+defined in the usual way, but their last argument is meaningless (it is always
+NULL). To register and/or unregister a suspend notifier use the functions
+register_pm_notifier() and unregister_pm_notifier(), respectively, defined in
+include/linux/suspend.h . If you don't need to unregister the notifier, you can
+also use the pm_notifier() macro defined in include/linux/suspend.h .
diff --git a/Documentation/power/opp.txt b/Documentation/power/opp.txt
new file mode 100644
index 000000000..c6279c2be
--- /dev/null
+++ b/Documentation/power/opp.txt
@@ -0,0 +1,362 @@
+Operating Performance Points (OPP) Library
+==========================================
+
+(C) 2009-2010 Nishanth Menon <nm@ti.com>, Texas Instruments Incorporated
+
+Contents
+--------
+1. Introduction
+2. Initial OPP List Registration
+3. OPP Search Functions
+4. OPP Availability Control Functions
+5. OPP Data Retrieval Functions
+6. Data Structures
+
+1. Introduction
+===============
+1.1 What is an Operating Performance Point (OPP)?
+
+Complex SoCs of today consists of a multiple sub-modules working in conjunction.
+In an operational system executing varied use cases, not all modules in the SoC
+need to function at their highest performing frequency all the time. To
+facilitate this, sub-modules in a SoC are grouped into domains, allowing some
+domains to run at lower voltage and frequency while other domains run at
+voltage/frequency pairs that are higher.
+
+The set of discrete tuples consisting of frequency and voltage pairs that
+the device will support per domain are called Operating Performance Points or
+OPPs.
+
+As an example:
+Let us consider an MPU device which supports the following:
+{300MHz at minimum voltage of 1V}, {800MHz at minimum voltage of 1.2V},
+{1GHz at minimum voltage of 1.3V}
+
+We can represent these as three OPPs as the following {Hz, uV} tuples:
+{300000000, 1000000}
+{800000000, 1200000}
+{1000000000, 1300000}
+
+1.2 Operating Performance Points Library
+
+OPP library provides a set of helper functions to organize and query the OPP
+information. The library is located in drivers/base/power/opp.c and the header
+is located in include/linux/pm_opp.h. OPP library can be enabled by enabling
+CONFIG_PM_OPP from power management menuconfig menu. OPP library depends on
+CONFIG_PM as certain SoCs such as Texas Instrument's OMAP framework allows to
+optionally boot at a certain OPP without needing cpufreq.
+
+Typical usage of the OPP library is as follows:
+(users) -> registers a set of default OPPs -> (library)
+SoC framework -> modifies on required cases certain OPPs -> OPP layer
+ -> queries to search/retrieve information ->
+
+OPP layer expects each domain to be represented by a unique device pointer. SoC
+framework registers a set of initial OPPs per device with the OPP layer. This
+list is expected to be an optimally small number typically around 5 per device.
+This initial list contains a set of OPPs that the framework expects to be safely
+enabled by default in the system.
+
+Note on OPP Availability:
+------------------------
+As the system proceeds to operate, SoC framework may choose to make certain
+OPPs available or not available on each device based on various external
+factors. Example usage: Thermal management or other exceptional situations where
+SoC framework might choose to disable a higher frequency OPP to safely continue
+operations until that OPP could be re-enabled if possible.
+
+OPP library facilitates this concept in it's implementation. The following
+operational functions operate only on available opps:
+opp_find_freq_{ceil, floor}, dev_pm_opp_get_voltage, dev_pm_opp_get_freq, dev_pm_opp_get_opp_count
+
+dev_pm_opp_find_freq_exact is meant to be used to find the opp pointer which can then
+be used for dev_pm_opp_enable/disable functions to make an opp available as required.
+
+WARNING: Users of OPP library should refresh their availability count using
+get_opp_count if dev_pm_opp_enable/disable functions are invoked for a device, the
+exact mechanism to trigger these or the notification mechanism to other
+dependent subsystems such as cpufreq are left to the discretion of the SoC
+specific framework which uses the OPP library. Similar care needs to be taken
+care to refresh the cpufreq table in cases of these operations.
+
+WARNING on OPP List locking mechanism:
+-------------------------------------------------
+OPP library uses RCU for exclusivity. RCU allows the query functions to operate
+in multiple contexts and this synchronization mechanism is optimal for a read
+intensive operations on data structure as the OPP library caters to.
+
+To ensure that the data retrieved are sane, the users such as SoC framework
+should ensure that the section of code operating on OPP queries are locked
+using RCU read locks. The opp_find_freq_{exact,ceil,floor},
+opp_get_{voltage, freq, opp_count} fall into this category.
+
+opp_{add,enable,disable} are updaters which use mutex and implement it's own
+RCU locking mechanisms. These functions should *NOT* be called under RCU locks
+and other contexts that prevent blocking functions in RCU or mutex operations
+from working.
+
+2. Initial OPP List Registration
+================================
+The SoC implementation calls dev_pm_opp_add function iteratively to add OPPs per
+device. It is expected that the SoC framework will register the OPP entries
+optimally- typical numbers range to be less than 5. The list generated by
+registering the OPPs is maintained by OPP library throughout the device
+operation. The SoC framework can subsequently control the availability of the
+OPPs dynamically using the dev_pm_opp_enable / disable functions.
+
+dev_pm_opp_add - Add a new OPP for a specific domain represented by the device pointer.
+ The OPP is defined using the frequency and voltage. Once added, the OPP
+ is assumed to be available and control of it's availability can be done
+ with the dev_pm_opp_enable/disable functions. OPP library internally stores
+ and manages this information in the opp struct. This function may be
+ used by SoC framework to define a optimal list as per the demands of
+ SoC usage environment.
+
+ WARNING: Do not use this function in interrupt context.
+
+ Example:
+ soc_pm_init()
+ {
+ /* Do things */
+ r = dev_pm_opp_add(mpu_dev, 1000000, 900000);
+ if (!r) {
+ pr_err("%s: unable to register mpu opp(%d)\n", r);
+ goto no_cpufreq;
+ }
+ /* Do cpufreq things */
+ no_cpufreq:
+ /* Do remaining things */
+ }
+
+3. OPP Search Functions
+=======================
+High level framework such as cpufreq operates on frequencies. To map the
+frequency back to the corresponding OPP, OPP library provides handy functions
+to search the OPP list that OPP library internally manages. These search
+functions return the matching pointer representing the opp if a match is
+found, else returns error. These errors are expected to be handled by standard
+error checks such as IS_ERR() and appropriate actions taken by the caller.
+
+dev_pm_opp_find_freq_exact - Search for an OPP based on an *exact* frequency and
+ availability. This function is especially useful to enable an OPP which
+ is not available by default.
+ Example: In a case when SoC framework detects a situation where a
+ higher frequency could be made available, it can use this function to
+ find the OPP prior to call the dev_pm_opp_enable to actually make it available.
+ rcu_read_lock();
+ opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+ rcu_read_unlock();
+ /* dont operate on the pointer.. just do a sanity check.. */
+ if (IS_ERR(opp)) {
+ pr_err("frequency not disabled!\n");
+ /* trigger appropriate actions.. */
+ } else {
+ dev_pm_opp_enable(dev,1000000000);
+ }
+
+ NOTE: This is the only search function that operates on OPPs which are
+ not available.
+
+dev_pm_opp_find_freq_floor - Search for an available OPP which is *at most* the
+ provided frequency. This function is useful while searching for a lesser
+ match OR operating on OPP information in the order of decreasing
+ frequency.
+ Example: To find the highest opp for a device:
+ freq = ULONG_MAX;
+ rcu_read_lock();
+ dev_pm_opp_find_freq_floor(dev, &freq);
+ rcu_read_unlock();
+
+dev_pm_opp_find_freq_ceil - Search for an available OPP which is *at least* the
+ provided frequency. This function is useful while searching for a
+ higher match OR operating on OPP information in the order of increasing
+ frequency.
+ Example 1: To find the lowest opp for a device:
+ freq = 0;
+ rcu_read_lock();
+ dev_pm_opp_find_freq_ceil(dev, &freq);
+ rcu_read_unlock();
+ Example 2: A simplified implementation of a SoC cpufreq_driver->target:
+ soc_cpufreq_target(..)
+ {
+ /* Do stuff like policy checks etc. */
+ /* Find the best frequency match for the req */
+ rcu_read_lock();
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ rcu_read_unlock();
+ if (!IS_ERR(opp))
+ soc_switch_to_freq_voltage(freq);
+ else
+ /* do something when we can't satisfy the req */
+ /* do other stuff */
+ }
+
+4. OPP Availability Control Functions
+=====================================
+A default OPP list registered with the OPP library may not cater to all possible
+situation. The OPP library provides a set of functions to modify the
+availability of a OPP within the OPP list. This allows SoC frameworks to have
+fine grained dynamic control of which sets of OPPs are operationally available.
+These functions are intended to *temporarily* remove an OPP in conditions such
+as thermal considerations (e.g. don't use OPPx until the temperature drops).
+
+WARNING: Do not use these functions in interrupt context.
+
+dev_pm_opp_enable - Make a OPP available for operation.
+ Example: Lets say that 1GHz OPP is to be made available only if the
+ SoC temperature is lower than a certain threshold. The SoC framework
+ implementation might choose to do something as follows:
+ if (cur_temp < temp_low_thresh) {
+ /* Enable 1GHz if it was disabled */
+ rcu_read_lock();
+ opp = dev_pm_opp_find_freq_exact(dev, 1000000000, false);
+ rcu_read_unlock();
+ /* just error check */
+ if (!IS_ERR(opp))
+ ret = dev_pm_opp_enable(dev, 1000000000);
+ else
+ goto try_something_else;
+ }
+
+dev_pm_opp_disable - Make an OPP to be not available for operation
+ Example: Lets say that 1GHz OPP is to be disabled if the temperature
+ exceeds a threshold value. The SoC framework implementation might
+ choose to do something as follows:
+ if (cur_temp > temp_high_thresh) {
+ /* Disable 1GHz if it was enabled */
+ rcu_read_lock();
+ opp = dev_pm_opp_find_freq_exact(dev, 1000000000, true);
+ rcu_read_unlock();
+ /* just error check */
+ if (!IS_ERR(opp))
+ ret = dev_pm_opp_disable(dev, 1000000000);
+ else
+ goto try_something_else;
+ }
+
+5. OPP Data Retrieval Functions
+===============================
+Since OPP library abstracts away the OPP information, a set of functions to pull
+information from the OPP structure is necessary. Once an OPP pointer is
+retrieved using the search functions, the following functions can be used by SoC
+framework to retrieve the information represented inside the OPP layer.
+
+dev_pm_opp_get_voltage - Retrieve the voltage represented by the opp pointer.
+ Example: At a cpufreq transition to a different frequency, SoC
+ framework requires to set the voltage represented by the OPP using
+ the regulator framework to the Power Management chip providing the
+ voltage.
+ soc_switch_to_freq_voltage(freq)
+ {
+ /* do things */
+ rcu_read_lock();
+ opp = dev_pm_opp_find_freq_ceil(dev, &freq);
+ v = dev_pm_opp_get_voltage(opp);
+ rcu_read_unlock();
+ if (v)
+ regulator_set_voltage(.., v);
+ /* do other things */
+ }
+
+dev_pm_opp_get_freq - Retrieve the freq represented by the opp pointer.
+ Example: Lets say the SoC framework uses a couple of helper functions
+ we could pass opp pointers instead of doing additional parameters to
+ handle quiet a bit of data parameters.
+ soc_cpufreq_target(..)
+ {
+ /* do things.. */
+ max_freq = ULONG_MAX;
+ rcu_read_lock();
+ max_opp = dev_pm_opp_find_freq_floor(dev,&max_freq);
+ requested_opp = dev_pm_opp_find_freq_ceil(dev,&freq);
+ if (!IS_ERR(max_opp) && !IS_ERR(requested_opp))
+ r = soc_test_validity(max_opp, requested_opp);
+ rcu_read_unlock();
+ /* do other things */
+ }
+ soc_test_validity(..)
+ {
+ if(dev_pm_opp_get_voltage(max_opp) < dev_pm_opp_get_voltage(requested_opp))
+ return -EINVAL;
+ if(dev_pm_opp_get_freq(max_opp) < dev_pm_opp_get_freq(requested_opp))
+ return -EINVAL;
+ /* do things.. */
+ }
+
+dev_pm_opp_get_opp_count - Retrieve the number of available opps for a device
+ Example: Lets say a co-processor in the SoC needs to know the available
+ frequencies in a table, the main processor can notify as following:
+ soc_notify_coproc_available_frequencies()
+ {
+ /* Do things */
+ rcu_read_lock();
+ num_available = dev_pm_opp_get_opp_count(dev);
+ speeds = kzalloc(sizeof(u32) * num_available, GFP_KERNEL);
+ /* populate the table in increasing order */
+ freq = 0;
+ while (!IS_ERR(opp = dev_pm_opp_find_freq_ceil(dev, &freq))) {
+ speeds[i] = freq;
+ freq++;
+ i++;
+ }
+ rcu_read_unlock();
+
+ soc_notify_coproc(AVAILABLE_FREQs, speeds, num_available);
+ /* Do other things */
+ }
+
+6. Data Structures
+==================
+Typically an SoC contains multiple voltage domains which are variable. Each
+domain is represented by a device pointer. The relationship to OPP can be
+represented as follows:
+SoC
+ |- device 1
+ | |- opp 1 (availability, freq, voltage)
+ | |- opp 2 ..
+ ... ...
+ | `- opp n ..
+ |- device 2
+ ...
+ `- device m
+
+OPP library maintains a internal list that the SoC framework populates and
+accessed by various functions as described above. However, the structures
+representing the actual OPPs and domains are internal to the OPP library itself
+to allow for suitable abstraction reusable across systems.
+
+struct dev_pm_opp - The internal data structure of OPP library which is used to
+ represent an OPP. In addition to the freq, voltage, availability
+ information, it also contains internal book keeping information required
+ for the OPP library to operate on. Pointer to this structure is
+ provided back to the users such as SoC framework to be used as a
+ identifier for OPP in the interactions with OPP layer.
+
+ WARNING: The struct dev_pm_opp pointer should not be parsed or modified by the
+ users. The defaults of for an instance is populated by dev_pm_opp_add, but the
+ availability of the OPP can be modified by dev_pm_opp_enable/disable functions.
+
+struct device - This is used to identify a domain to the OPP layer. The
+ nature of the device and it's implementation is left to the user of
+ OPP library such as the SoC framework.
+
+Overall, in a simplistic view, the data structure operations is represented as
+following:
+
+Initialization / modification:
+ +-----+ /- dev_pm_opp_enable
+dev_pm_opp_add --> | opp | <-------
+ | +-----+ \- dev_pm_opp_disable
+ \-------> domain_info(device)
+
+Search functions:
+ /-- dev_pm_opp_find_freq_ceil ---\ +-----+
+domain_info<---- dev_pm_opp_find_freq_exact -----> | opp |
+ \-- dev_pm_opp_find_freq_floor ---/ +-----+
+
+Retrieval functions:
++-----+ /- dev_pm_opp_get_voltage
+| opp | <---
++-----+ \- dev_pm_opp_get_freq
+
+domain_info <- dev_pm_opp_get_opp_count
diff --git a/Documentation/power/pci.txt b/Documentation/power/pci.txt
new file mode 100644
index 000000000..62328d76b
--- /dev/null
+++ b/Documentation/power/pci.txt
@@ -0,0 +1,1025 @@
+PCI Power Management
+
+Copyright (c) 2010 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+
+An overview of concepts and the Linux kernel's interfaces related to PCI power
+management. Based on previous work by Patrick Mochel <mochel@transmeta.com>
+(and others).
+
+This document only covers the aspects of power management specific to PCI
+devices. For general description of the kernel's interfaces related to device
+power management refer to Documentation/power/devices.txt and
+Documentation/power/runtime_pm.txt.
+
+---------------------------------------------------------------------------
+
+1. Hardware and Platform Support for PCI Power Management
+2. PCI Subsystem and Device Power Management
+3. PCI Device Drivers and Power Management
+4. Resources
+
+
+1. Hardware and Platform Support for PCI Power Management
+=========================================================
+
+1.1. Native and Platform-Based Power Management
+-----------------------------------------------
+In general, power management is a feature allowing one to save energy by putting
+devices into states in which they draw less power (low-power states) at the
+price of reduced functionality or performance.
+
+Usually, a device is put into a low-power state when it is underutilized or
+completely inactive. However, when it is necessary to use the device once
+again, it has to be put back into the "fully functional" state (full-power
+state). This may happen when there are some data for the device to handle or
+as a result of an external event requiring the device to be active, which may
+be signaled by the device itself.
+
+PCI devices may be put into low-power states in two ways, by using the device
+capabilities introduced by the PCI Bus Power Management Interface Specification,
+or with the help of platform firmware, such as an ACPI BIOS. In the first
+approach, that is referred to as the native PCI power management (native PCI PM)
+in what follows, the device power state is changed as a result of writing a
+specific value into one of its standard configuration registers. The second
+approach requires the platform firmware to provide special methods that may be
+used by the kernel to change the device's power state.
+
+Devices supporting the native PCI PM usually can generate wakeup signals called
+Power Management Events (PMEs) to let the kernel know about external events
+requiring the device to be active. After receiving a PME the kernel is supposed
+to put the device that sent it into the full-power state. However, the PCI Bus
+Power Management Interface Specification doesn't define any standard method of
+delivering the PME from the device to the CPU and the operating system kernel.
+It is assumed that the platform firmware will perform this task and therefore,
+even though a PCI device is set up to generate PMEs, it also may be necessary to
+prepare the platform firmware for notifying the CPU of the PMEs coming from the
+device (e.g. by generating interrupts).
+
+In turn, if the methods provided by the platform firmware are used for changing
+the power state of a device, usually the platform also provides a method for
+preparing the device to generate wakeup signals. In that case, however, it
+often also is necessary to prepare the device for generating PMEs using the
+native PCI PM mechanism, because the method provided by the platform depends on
+that.
+
+Thus in many situations both the native and the platform-based power management
+mechanisms have to be used simultaneously to obtain the desired result.
+
+1.2. Native PCI Power Management
+--------------------------------
+The PCI Bus Power Management Interface Specification (PCI PM Spec) was
+introduced between the PCI 2.1 and PCI 2.2 Specifications. It defined a
+standard interface for performing various operations related to power
+management.
+
+The implementation of the PCI PM Spec is optional for conventional PCI devices,
+but it is mandatory for PCI Express devices. If a device supports the PCI PM
+Spec, it has an 8 byte power management capability field in its PCI
+configuration space. This field is used to describe and control the standard
+features related to the native PCI power management.
+
+The PCI PM Spec defines 4 operating states for devices (D0-D3) and for buses
+(B0-B3). The higher the number, the less power is drawn by the device or bus
+in that state. However, the higher the number, the longer the latency for
+the device or bus to return to the full-power state (D0 or B0, respectively).
+
+There are two variants of the D3 state defined by the specification. The first
+one is D3hot, referred to as the software accessible D3, because devices can be
+programmed to go into it. The second one, D3cold, is the state that PCI devices
+are in when the supply voltage (Vcc) is removed from them. It is not possible
+to program a PCI device to go into D3cold, although there may be a programmable
+interface for putting the bus the device is on into a state in which Vcc is
+removed from all devices on the bus.
+
+PCI bus power management, however, is not supported by the Linux kernel at the
+time of this writing and therefore it is not covered by this document.
+
+Note that every PCI device can be in the full-power state (D0) or in D3cold,
+regardless of whether or not it implements the PCI PM Spec. In addition to
+that, if the PCI PM Spec is implemented by the device, it must support D3hot
+as well as D0. The support for the D1 and D2 power states is optional.
+
+PCI devices supporting the PCI PM Spec can be programmed to go to any of the
+supported low-power states (except for D3cold). While in D1-D3hot the
+standard configuration registers of the device must be accessible to software
+(i.e. the device is required to respond to PCI configuration accesses), although
+its I/O and memory spaces are then disabled. This allows the device to be
+programmatically put into D0. Thus the kernel can switch the device back and
+forth between D0 and the supported low-power states (except for D3cold) and the
+possible power state transitions the device can undergo are the following:
+
++----------------------------+
+| Current State | New State |
++----------------------------+
+| D0 | D1, D2, D3 |
++----------------------------+
+| D1 | D2, D3 |
++----------------------------+
+| D2 | D3 |
++----------------------------+
+| D1, D2, D3 | D0 |
++----------------------------+
+
+The transition from D3cold to D0 occurs when the supply voltage is provided to
+the device (i.e. power is restored). In that case the device returns to D0 with
+a full power-on reset sequence and the power-on defaults are restored to the
+device by hardware just as at initial power up.
+
+PCI devices supporting the PCI PM Spec can be programmed to generate PMEs
+while in a low-power state (D1-D3), but they are not required to be capable
+of generating PMEs from all supported low-power states. In particular, the
+capability of generating PMEs from D3cold is optional and depends on the
+presence of additional voltage (3.3Vaux) allowing the device to remain
+sufficiently active to generate a wakeup signal.
+
+1.3. ACPI Device Power Management
+---------------------------------
+The platform firmware support for the power management of PCI devices is
+system-specific. However, if the system in question is compliant with the
+Advanced Configuration and Power Interface (ACPI) Specification, like the
+majority of x86-based systems, it is supposed to implement device power
+management interfaces defined by the ACPI standard.
+
+For this purpose the ACPI BIOS provides special functions called "control
+methods" that may be executed by the kernel to perform specific tasks, such as
+putting a device into a low-power state. These control methods are encoded
+using special byte-code language called the ACPI Machine Language (AML) and
+stored in the machine's BIOS. The kernel loads them from the BIOS and executes
+them as needed using an AML interpreter that translates the AML byte code into
+computations and memory or I/O space accesses. This way, in theory, a BIOS
+writer can provide the kernel with a means to perform actions depending
+on the system design in a system-specific fashion.
+
+ACPI control methods may be divided into global control methods, that are not
+associated with any particular devices, and device control methods, that have
+to be defined separately for each device supposed to be handled with the help of
+the platform. This means, in particular, that ACPI device control methods can
+only be used to handle devices that the BIOS writer knew about in advance. The
+ACPI methods used for device power management fall into that category.
+
+The ACPI specification assumes that devices can be in one of four power states
+labeled as D0, D1, D2, and D3 that roughly correspond to the native PCI PM
+D0-D3 states (although the difference between D3hot and D3cold is not taken
+into account by ACPI). Moreover, for each power state of a device there is a
+set of power resources that have to be enabled for the device to be put into
+that state. These power resources are controlled (i.e. enabled or disabled)
+with the help of their own control methods, _ON and _OFF, that have to be
+defined individually for each of them.
+
+To put a device into the ACPI power state Dx (where x is a number between 0 and
+3 inclusive) the kernel is supposed to (1) enable the power resources required
+by the device in this state using their _ON control methods and (2) execute the
+_PSx control method defined for the device. In addition to that, if the device
+is going to be put into a low-power state (D1-D3) and is supposed to generate
+wakeup signals from that state, the _DSW (or _PSW, replaced with _DSW by ACPI
+3.0) control method defined for it has to be executed before _PSx. Power
+resources that are not required by the device in the target power state and are
+not required any more by any other device should be disabled (by executing their
+_OFF control methods). If the current power state of the device is D3, it can
+only be put into D0 this way.
+
+However, quite often the power states of devices are changed during a
+system-wide transition into a sleep state or back into the working state. ACPI
+defines four system sleep states, S1, S2, S3, and S4, and denotes the system
+working state as S0. In general, the target system sleep (or working) state
+determines the highest power (lowest number) state the device can be put
+into and the kernel is supposed to obtain this information by executing the
+device's _SxD control method (where x is a number between 0 and 4 inclusive).
+If the device is required to wake up the system from the target sleep state, the
+lowest power (highest number) state it can be put into is also determined by the
+target state of the system. The kernel is then supposed to use the device's
+_SxW control method to obtain the number of that state. It also is supposed to
+use the device's _PRW control method to learn which power resources need to be
+enabled for the device to be able to generate wakeup signals.
+
+1.4. Wakeup Signaling
+---------------------
+Wakeup signals generated by PCI devices, either as native PCI PMEs, or as
+a result of the execution of the _DSW (or _PSW) ACPI control method before
+putting the device into a low-power state, have to be caught and handled as
+appropriate. If they are sent while the system is in the working state
+(ACPI S0), they should be translated into interrupts so that the kernel can
+put the devices generating them into the full-power state and take care of the
+events that triggered them. In turn, if they are sent while the system is
+sleeping, they should cause the system's core logic to trigger wakeup.
+
+On ACPI-based systems wakeup signals sent by conventional PCI devices are
+converted into ACPI General-Purpose Events (GPEs) which are hardware signals
+from the system core logic generated in response to various events that need to
+be acted upon. Every GPE is associated with one or more sources of potentially
+interesting events. In particular, a GPE may be associated with a PCI device
+capable of signaling wakeup. The information on the connections between GPEs
+and event sources is recorded in the system's ACPI BIOS from where it can be
+read by the kernel.
+
+If a PCI device known to the system's ACPI BIOS signals wakeup, the GPE
+associated with it (if there is one) is triggered. The GPEs associated with PCI
+bridges may also be triggered in response to a wakeup signal from one of the
+devices below the bridge (this also is the case for root bridges) and, for
+example, native PCI PMEs from devices unknown to the system's ACPI BIOS may be
+handled this way.
+
+A GPE may be triggered when the system is sleeping (i.e. when it is in one of
+the ACPI S1-S4 states), in which case system wakeup is started by its core logic
+(the device that was the source of the signal causing the system wakeup to occur
+may be identified later). The GPEs used in such situations are referred to as
+wakeup GPEs.
+
+Usually, however, GPEs are also triggered when the system is in the working
+state (ACPI S0) and in that case the system's core logic generates a System
+Control Interrupt (SCI) to notify the kernel of the event. Then, the SCI
+handler identifies the GPE that caused the interrupt to be generated which,
+in turn, allows the kernel to identify the source of the event (that may be
+a PCI device signaling wakeup). The GPEs used for notifying the kernel of
+events occurring while the system is in the working state are referred to as
+runtime GPEs.
+
+Unfortunately, there is no standard way of handling wakeup signals sent by
+conventional PCI devices on systems that are not ACPI-based, but there is one
+for PCI Express devices. Namely, the PCI Express Base Specification introduced
+a native mechanism for converting native PCI PMEs into interrupts generated by
+root ports. For conventional PCI devices native PMEs are out-of-band, so they
+are routed separately and they need not pass through bridges (in principle they
+may be routed directly to the system's core logic), but for PCI Express devices
+they are in-band messages that have to pass through the PCI Express hierarchy,
+including the root port on the path from the device to the Root Complex. Thus
+it was possible to introduce a mechanism by which a root port generates an
+interrupt whenever it receives a PME message from one of the devices below it.
+The PCI Express Requester ID of the device that sent the PME message is then
+recorded in one of the root port's configuration registers from where it may be
+read by the interrupt handler allowing the device to be identified. [PME
+messages sent by PCI Express endpoints integrated with the Root Complex don't
+pass through root ports, but instead they cause a Root Complex Event Collector
+(if there is one) to generate interrupts.]
+
+In principle the native PCI Express PME signaling may also be used on ACPI-based
+systems along with the GPEs, but to use it the kernel has to ask the system's
+ACPI BIOS to release control of root port configuration registers. The ACPI
+BIOS, however, is not required to allow the kernel to control these registers
+and if it doesn't do that, the kernel must not modify their contents. Of course
+the native PCI Express PME signaling cannot be used by the kernel in that case.
+
+
+2. PCI Subsystem and Device Power Management
+============================================
+
+2.1. Device Power Management Callbacks
+--------------------------------------
+The PCI Subsystem participates in the power management of PCI devices in a
+number of ways. First of all, it provides an intermediate code layer between
+the device power management core (PM core) and PCI device drivers.
+Specifically, the pm field of the PCI subsystem's struct bus_type object,
+pci_bus_type, points to a struct dev_pm_ops object, pci_dev_pm_ops, containing
+pointers to several device power management callbacks:
+
+const struct dev_pm_ops pci_dev_pm_ops = {
+ .prepare = pci_pm_prepare,
+ .complete = pci_pm_complete,
+ .suspend = pci_pm_suspend,
+ .resume = pci_pm_resume,
+ .freeze = pci_pm_freeze,
+ .thaw = pci_pm_thaw,
+ .poweroff = pci_pm_poweroff,
+ .restore = pci_pm_restore,
+ .suspend_noirq = pci_pm_suspend_noirq,
+ .resume_noirq = pci_pm_resume_noirq,
+ .freeze_noirq = pci_pm_freeze_noirq,
+ .thaw_noirq = pci_pm_thaw_noirq,
+ .poweroff_noirq = pci_pm_poweroff_noirq,
+ .restore_noirq = pci_pm_restore_noirq,
+ .runtime_suspend = pci_pm_runtime_suspend,
+ .runtime_resume = pci_pm_runtime_resume,
+ .runtime_idle = pci_pm_runtime_idle,
+};
+
+These callbacks are executed by the PM core in various situations related to
+device power management and they, in turn, execute power management callbacks
+provided by PCI device drivers. They also perform power management operations
+involving some standard configuration registers of PCI devices that device
+drivers need not know or care about.
+
+The structure representing a PCI device, struct pci_dev, contains several fields
+that these callbacks operate on:
+
+struct pci_dev {
+ ...
+ pci_power_t current_state; /* Current operating state. */
+ int pm_cap; /* PM capability offset in the
+ configuration space */
+ unsigned int pme_support:5; /* Bitmask of states from which PME#
+ can be generated */
+ unsigned int pme_interrupt:1;/* Is native PCIe PME signaling used? */
+ unsigned int d1_support:1; /* Low power state D1 is supported */
+ unsigned int d2_support:1; /* Low power state D2 is supported */
+ unsigned int no_d1d2:1; /* D1 and D2 are forbidden */
+ unsigned int wakeup_prepared:1; /* Device prepared for wake up */
+ unsigned int d3_delay; /* D3->D0 transition time in ms */
+ ...
+};
+
+They also indirectly use some fields of the struct device that is embedded in
+struct pci_dev.
+
+2.2. Device Initialization
+--------------------------
+The PCI subsystem's first task related to device power management is to
+prepare the device for power management and initialize the fields of struct
+pci_dev used for this purpose. This happens in two functions defined in
+drivers/pci/pci.c, pci_pm_init() and platform_pci_wakeup_init().
+
+The first of these functions checks if the device supports native PCI PM
+and if that's the case the offset of its power management capability structure
+in the configuration space is stored in the pm_cap field of the device's struct
+pci_dev object. Next, the function checks which PCI low-power states are
+supported by the device and from which low-power states the device can generate
+native PCI PMEs. The power management fields of the device's struct pci_dev and
+the struct device embedded in it are updated accordingly and the generation of
+PMEs by the device is disabled.
+
+The second function checks if the device can be prepared to signal wakeup with
+the help of the platform firmware, such as the ACPI BIOS. If that is the case,
+the function updates the wakeup fields in struct device embedded in the
+device's struct pci_dev and uses the firmware-provided method to prevent the
+device from signaling wakeup.
+
+At this point the device is ready for power management. For driverless devices,
+however, this functionality is limited to a few basic operations carried out
+during system-wide transitions to a sleep state and back to the working state.
+
+2.3. Runtime Device Power Management
+------------------------------------
+The PCI subsystem plays a vital role in the runtime power management of PCI
+devices. For this purpose it uses the general runtime power management
+(runtime PM) framework described in Documentation/power/runtime_pm.txt.
+Namely, it provides subsystem-level callbacks:
+
+ pci_pm_runtime_suspend()
+ pci_pm_runtime_resume()
+ pci_pm_runtime_idle()
+
+that are executed by the core runtime PM routines. It also implements the
+entire mechanics necessary for handling runtime wakeup signals from PCI devices
+in low-power states, which at the time of this writing works for both the native
+PCI Express PME signaling and the ACPI GPE-based wakeup signaling described in
+Section 1.
+
+First, a PCI device is put into a low-power state, or suspended, with the help
+of pm_schedule_suspend() or pm_runtime_suspend() which for PCI devices call
+pci_pm_runtime_suspend() to do the actual job. For this to work, the device's
+driver has to provide a pm->runtime_suspend() callback (see below), which is
+run by pci_pm_runtime_suspend() as the first action. If the driver's callback
+returns successfully, the device's standard configuration registers are saved,
+the device is prepared to generate wakeup signals and, finally, it is put into
+the target low-power state.
+
+The low-power state to put the device into is the lowest-power (highest number)
+state from which it can signal wakeup. The exact method of signaling wakeup is
+system-dependent and is determined by the PCI subsystem on the basis of the
+reported capabilities of the device and the platform firmware. To prepare the
+device for signaling wakeup and put it into the selected low-power state, the
+PCI subsystem can use the platform firmware as well as the device's native PCI
+PM capabilities, if supported.
+
+It is expected that the device driver's pm->runtime_suspend() callback will
+not attempt to prepare the device for signaling wakeup or to put it into a
+low-power state. The driver ought to leave these tasks to the PCI subsystem
+that has all of the information necessary to perform them.
+
+A suspended device is brought back into the "active" state, or resumed,
+with the help of pm_request_resume() or pm_runtime_resume() which both call
+pci_pm_runtime_resume() for PCI devices. Again, this only works if the device's
+driver provides a pm->runtime_resume() callback (see below). However, before
+the driver's callback is executed, pci_pm_runtime_resume() brings the device
+back into the full-power state, prevents it from signaling wakeup while in that
+state and restores its standard configuration registers. Thus the driver's
+callback need not worry about the PCI-specific aspects of the device resume.
+
+Note that generally pci_pm_runtime_resume() may be called in two different
+situations. First, it may be called at the request of the device's driver, for
+example if there are some data for it to process. Second, it may be called
+as a result of a wakeup signal from the device itself (this sometimes is
+referred to as "remote wakeup"). Of course, for this purpose the wakeup signal
+is handled in one of the ways described in Section 1 and finally converted into
+a notification for the PCI subsystem after the source device has been
+identified.
+
+The pci_pm_runtime_idle() function, called for PCI devices by pm_runtime_idle()
+and pm_request_idle(), executes the device driver's pm->runtime_idle()
+callback, if defined, and if that callback doesn't return error code (or is not
+present at all), suspends the device with the help of pm_runtime_suspend().
+Sometimes pci_pm_runtime_idle() is called automatically by the PM core (for
+example, it is called right after the device has just been resumed), in which
+cases it is expected to suspend the device if that makes sense. Usually,
+however, the PCI subsystem doesn't really know if the device really can be
+suspended, so it lets the device's driver decide by running its
+pm->runtime_idle() callback.
+
+2.4. System-Wide Power Transitions
+----------------------------------
+There are a few different types of system-wide power transitions, described in
+Documentation/power/devices.txt. Each of them requires devices to be handled
+in a specific way and the PM core executes subsystem-level power management
+callbacks for this purpose. They are executed in phases such that each phase
+involves executing the same subsystem-level callback for every device belonging
+to the given subsystem before the next phase begins. These phases always run
+after tasks have been frozen.
+
+2.4.1. System Suspend
+
+When the system is going into a sleep state in which the contents of memory will
+be preserved, such as one of the ACPI sleep states S1-S3, the phases are:
+
+ prepare, suspend, suspend_noirq.
+
+The following PCI bus type's callbacks, respectively, are used in these phases:
+
+ pci_pm_prepare()
+ pci_pm_suspend()
+ pci_pm_suspend_noirq()
+
+The pci_pm_prepare() routine first puts the device into the "fully functional"
+state with the help of pm_runtime_resume(). Then, it executes the device
+driver's pm->prepare() callback if defined (i.e. if the driver's struct
+dev_pm_ops object is present and the prepare pointer in that object is valid).
+
+The pci_pm_suspend() routine first checks if the device's driver implements
+legacy PCI suspend routines (see Section 3), in which case the driver's legacy
+suspend callback is executed, if present, and its result is returned. Next, if
+the device's driver doesn't provide a struct dev_pm_ops object (containing
+pointers to the driver's callbacks), pci_pm_default_suspend() is called, which
+simply turns off the device's bus master capability and runs
+pcibios_disable_device() to disable it, unless the device is a bridge (PCI
+bridges are ignored by this routine). Next, the device driver's pm->suspend()
+callback is executed, if defined, and its result is returned if it fails.
+Finally, pci_fixup_device() is called to apply hardware suspend quirks related
+to the device if necessary.
+
+Note that the suspend phase is carried out asynchronously for PCI devices, so
+the pci_pm_suspend() callback may be executed in parallel for any pair of PCI
+devices that don't depend on each other in a known way (i.e. none of the paths
+in the device tree from the root bridge to a leaf device contains both of them).
+
+The pci_pm_suspend_noirq() routine is executed after suspend_device_irqs() has
+been called, which means that the device driver's interrupt handler won't be
+invoked while this routine is running. It first checks if the device's driver
+implements legacy PCI suspends routines (Section 3), in which case the legacy
+late suspend routine is called and its result is returned (the standard
+configuration registers of the device are saved if the driver's callback hasn't
+done that). Second, if the device driver's struct dev_pm_ops object is not
+present, the device's standard configuration registers are saved and the routine
+returns success. Otherwise the device driver's pm->suspend_noirq() callback is
+executed, if present, and its result is returned if it fails. Next, if the
+device's standard configuration registers haven't been saved yet (one of the
+device driver's callbacks executed before might do that), pci_pm_suspend_noirq()
+saves them, prepares the device to signal wakeup (if necessary) and puts it into
+a low-power state.
+
+The low-power state to put the device into is the lowest-power (highest number)
+state from which it can signal wakeup while the system is in the target sleep
+state. Just like in the runtime PM case described above, the mechanism of
+signaling wakeup is system-dependent and determined by the PCI subsystem, which
+is also responsible for preparing the device to signal wakeup from the system's
+target sleep state as appropriate.
+
+PCI device drivers (that don't implement legacy power management callbacks) are
+generally not expected to prepare devices for signaling wakeup or to put them
+into low-power states. However, if one of the driver's suspend callbacks
+(pm->suspend() or pm->suspend_noirq()) saves the device's standard configuration
+registers, pci_pm_suspend_noirq() will assume that the device has been prepared
+to signal wakeup and put into a low-power state by the driver (the driver is
+then assumed to have used the helper functions provided by the PCI subsystem for
+this purpose). PCI device drivers are not encouraged to do that, but in some
+rare cases doing that in the driver may be the optimum approach.
+
+2.4.2. System Resume
+
+When the system is undergoing a transition from a sleep state in which the
+contents of memory have been preserved, such as one of the ACPI sleep states
+S1-S3, into the working state (ACPI S0), the phases are:
+
+ resume_noirq, resume, complete.
+
+The following PCI bus type's callbacks, respectively, are executed in these
+phases:
+
+ pci_pm_resume_noirq()
+ pci_pm_resume()
+ pci_pm_complete()
+
+The pci_pm_resume_noirq() routine first puts the device into the full-power
+state, restores its standard configuration registers and applies early resume
+hardware quirks related to the device, if necessary. This is done
+unconditionally, regardless of whether or not the device's driver implements
+legacy PCI power management callbacks (this way all PCI devices are in the
+full-power state and their standard configuration registers have been restored
+when their interrupt handlers are invoked for the first time during resume,
+which allows the kernel to avoid problems with the handling of shared interrupts
+by drivers whose devices are still suspended). If legacy PCI power management
+callbacks (see Section 3) are implemented by the device's driver, the legacy
+early resume callback is executed and its result is returned. Otherwise, the
+device driver's pm->resume_noirq() callback is executed, if defined, and its
+result is returned.
+
+The pci_pm_resume() routine first checks if the device's standard configuration
+registers have been restored and restores them if that's not the case (this
+only is necessary in the error path during a failing suspend). Next, resume
+hardware quirks related to the device are applied, if necessary, and if the
+device's driver implements legacy PCI power management callbacks (see
+Section 3), the driver's legacy resume callback is executed and its result is
+returned. Otherwise, the device's wakeup signaling mechanisms are blocked and
+its driver's pm->resume() callback is executed, if defined (the callback's
+result is then returned).
+
+The resume phase is carried out asynchronously for PCI devices, like the
+suspend phase described above, which means that if two PCI devices don't depend
+on each other in a known way, the pci_pm_resume() routine may be executed for
+the both of them in parallel.
+
+The pci_pm_complete() routine only executes the device driver's pm->complete()
+callback, if defined.
+
+2.4.3. System Hibernation
+
+System hibernation is more complicated than system suspend, because it requires
+a system image to be created and written into a persistent storage medium. The
+image is created atomically and all devices are quiesced, or frozen, before that
+happens.
+
+The freezing of devices is carried out after enough memory has been freed (at
+the time of this writing the image creation requires at least 50% of system RAM
+to be free) in the following three phases:
+
+ prepare, freeze, freeze_noirq
+
+that correspond to the PCI bus type's callbacks:
+
+ pci_pm_prepare()
+ pci_pm_freeze()
+ pci_pm_freeze_noirq()
+
+This means that the prepare phase is exactly the same as for system suspend.
+The other two phases, however, are different.
+
+The pci_pm_freeze() routine is quite similar to pci_pm_suspend(), but it runs
+the device driver's pm->freeze() callback, if defined, instead of pm->suspend(),
+and it doesn't apply the suspend-related hardware quirks. It is executed
+asynchronously for different PCI devices that don't depend on each other in a
+known way.
+
+The pci_pm_freeze_noirq() routine, in turn, is similar to
+pci_pm_suspend_noirq(), but it calls the device driver's pm->freeze_noirq()
+routine instead of pm->suspend_noirq(). It also doesn't attempt to prepare the
+device for signaling wakeup and put it into a low-power state. Still, it saves
+the device's standard configuration registers if they haven't been saved by one
+of the driver's callbacks.
+
+Once the image has been created, it has to be saved. However, at this point all
+devices are frozen and they cannot handle I/O, while their ability to handle
+I/O is obviously necessary for the image saving. Thus they have to be brought
+back to the fully functional state and this is done in the following phases:
+
+ thaw_noirq, thaw, complete
+
+using the following PCI bus type's callbacks:
+
+ pci_pm_thaw_noirq()
+ pci_pm_thaw()
+ pci_pm_complete()
+
+respectively.
+
+The first of them, pci_pm_thaw_noirq(), is analogous to pci_pm_resume_noirq(),
+but it doesn't put the device into the full power state and doesn't attempt to
+restore its standard configuration registers. It also executes the device
+driver's pm->thaw_noirq() callback, if defined, instead of pm->resume_noirq().
+
+The pci_pm_thaw() routine is similar to pci_pm_resume(), but it runs the device
+driver's pm->thaw() callback instead of pm->resume(). It is executed
+asynchronously for different PCI devices that don't depend on each other in a
+known way.
+
+The complete phase it the same as for system resume.
+
+After saving the image, devices need to be powered down before the system can
+enter the target sleep state (ACPI S4 for ACPI-based systems). This is done in
+three phases:
+
+ prepare, poweroff, poweroff_noirq
+
+where the prepare phase is exactly the same as for system suspend. The other
+two phases are analogous to the suspend and suspend_noirq phases, respectively.
+The PCI subsystem-level callbacks they correspond to
+
+ pci_pm_poweroff()
+ pci_pm_poweroff_noirq()
+
+work in analogy with pci_pm_suspend() and pci_pm_poweroff_noirq(), respectively,
+although they don't attempt to save the device's standard configuration
+registers.
+
+2.4.4. System Restore
+
+System restore requires a hibernation image to be loaded into memory and the
+pre-hibernation memory contents to be restored before the pre-hibernation system
+activity can be resumed.
+
+As described in Documentation/power/devices.txt, the hibernation image is loaded
+into memory by a fresh instance of the kernel, called the boot kernel, which in
+turn is loaded and run by a boot loader in the usual way. After the boot kernel
+has loaded the image, it needs to replace its own code and data with the code
+and data of the "hibernated" kernel stored within the image, called the image
+kernel. For this purpose all devices are frozen just like before creating
+the image during hibernation, in the
+
+ prepare, freeze, freeze_noirq
+
+phases described above. However, the devices affected by these phases are only
+those having drivers in the boot kernel; other devices will still be in whatever
+state the boot loader left them.
+
+Should the restoration of the pre-hibernation memory contents fail, the boot
+kernel would go through the "thawing" procedure described above, using the
+thaw_noirq, thaw, and complete phases (that will only affect the devices having
+drivers in the boot kernel), and then continue running normally.
+
+If the pre-hibernation memory contents are restored successfully, which is the
+usual situation, control is passed to the image kernel, which then becomes
+responsible for bringing the system back to the working state. To achieve this,
+it must restore the devices' pre-hibernation functionality, which is done much
+like waking up from the memory sleep state, although it involves different
+phases:
+
+ restore_noirq, restore, complete
+
+The first two of these are analogous to the resume_noirq and resume phases
+described above, respectively, and correspond to the following PCI subsystem
+callbacks:
+
+ pci_pm_restore_noirq()
+ pci_pm_restore()
+
+These callbacks work in analogy with pci_pm_resume_noirq() and pci_pm_resume(),
+respectively, but they execute the device driver's pm->restore_noirq() and
+pm->restore() callbacks, if available.
+
+The complete phase is carried out in exactly the same way as during system
+resume.
+
+
+3. PCI Device Drivers and Power Management
+==========================================
+
+3.1. Power Management Callbacks
+-------------------------------
+PCI device drivers participate in power management by providing callbacks to be
+executed by the PCI subsystem's power management routines described above and by
+controlling the runtime power management of their devices.
+
+At the time of this writing there are two ways to define power management
+callbacks for a PCI device driver, the recommended one, based on using a
+dev_pm_ops structure described in Documentation/power/devices.txt, and the
+"legacy" one, in which the .suspend(), .suspend_late(), .resume_early(), and
+.resume() callbacks from struct pci_driver are used. The legacy approach,
+however, doesn't allow one to define runtime power management callbacks and is
+not really suitable for any new drivers. Therefore it is not covered by this
+document (refer to the source code to learn more about it).
+
+It is recommended that all PCI device drivers define a struct dev_pm_ops object
+containing pointers to power management (PM) callbacks that will be executed by
+the PCI subsystem's PM routines in various circumstances. A pointer to the
+driver's struct dev_pm_ops object has to be assigned to the driver.pm field in
+its struct pci_driver object. Once that has happened, the "legacy" PM callbacks
+in struct pci_driver are ignored (even if they are not NULL).
+
+The PM callbacks in struct dev_pm_ops are not mandatory and if they are not
+defined (i.e. the respective fields of struct dev_pm_ops are unset) the PCI
+subsystem will handle the device in a simplified default manner. If they are
+defined, though, they are expected to behave as described in the following
+subsections.
+
+3.1.1. prepare()
+
+The prepare() callback is executed during system suspend, during hibernation
+(when a hibernation image is about to be created), during power-off after
+saving a hibernation image and during system restore, when a hibernation image
+has just been loaded into memory.
+
+This callback is only necessary if the driver's device has children that in
+general may be registered at any time. In that case the role of the prepare()
+callback is to prevent new children of the device from being registered until
+one of the resume_noirq(), thaw_noirq(), or restore_noirq() callbacks is run.
+
+In addition to that the prepare() callback may carry out some operations
+preparing the device to be suspended, although it should not allocate memory
+(if additional memory is required to suspend the device, it has to be
+preallocated earlier, for example in a suspend/hibernate notifier as described
+in Documentation/power/notifiers.txt).
+
+3.1.2. suspend()
+
+The suspend() callback is only executed during system suspend, after prepare()
+callbacks have been executed for all devices in the system.
+
+This callback is expected to quiesce the device and prepare it to be put into a
+low-power state by the PCI subsystem. It is not required (in fact it even is
+not recommended) that a PCI driver's suspend() callback save the standard
+configuration registers of the device, prepare it for waking up the system, or
+put it into a low-power state. All of these operations can very well be taken
+care of by the PCI subsystem, without the driver's participation.
+
+However, in some rare case it is convenient to carry out these operations in
+a PCI driver. Then, pci_save_state(), pci_prepare_to_sleep(), and
+pci_set_power_state() should be used to save the device's standard configuration
+registers, to prepare it for system wakeup (if necessary), and to put it into a
+low-power state, respectively. Moreover, if the driver calls pci_save_state(),
+the PCI subsystem will not execute either pci_prepare_to_sleep(), or
+pci_set_power_state() for its device, so the driver is then responsible for
+handling the device as appropriate.
+
+While the suspend() callback is being executed, the driver's interrupt handler
+can be invoked to handle an interrupt from the device, so all suspend-related
+operations relying on the driver's ability to handle interrupts should be
+carried out in this callback.
+
+3.1.3. suspend_noirq()
+
+The suspend_noirq() callback is only executed during system suspend, after
+suspend() callbacks have been executed for all devices in the system and
+after device interrupts have been disabled by the PM core.
+
+The difference between suspend_noirq() and suspend() is that the driver's
+interrupt handler will not be invoked while suspend_noirq() is running. Thus
+suspend_noirq() can carry out operations that would cause race conditions to
+arise if they were performed in suspend().
+
+3.1.4. freeze()
+
+The freeze() callback is hibernation-specific and is executed in two situations,
+during hibernation, after prepare() callbacks have been executed for all devices
+in preparation for the creation of a system image, and during restore,
+after a system image has been loaded into memory from persistent storage and the
+prepare() callbacks have been executed for all devices.
+
+The role of this callback is analogous to the role of the suspend() callback
+described above. In fact, they only need to be different in the rare cases when
+the driver takes the responsibility for putting the device into a low-power
+state.
+
+In that cases the freeze() callback should not prepare the device system wakeup
+or put it into a low-power state. Still, either it or freeze_noirq() should
+save the device's standard configuration registers using pci_save_state().
+
+3.1.5. freeze_noirq()
+
+The freeze_noirq() callback is hibernation-specific. It is executed during
+hibernation, after prepare() and freeze() callbacks have been executed for all
+devices in preparation for the creation of a system image, and during restore,
+after a system image has been loaded into memory and after prepare() and
+freeze() callbacks have been executed for all devices. It is always executed
+after device interrupts have been disabled by the PM core.
+
+The role of this callback is analogous to the role of the suspend_noirq()
+callback described above and it very rarely is necessary to define
+freeze_noirq().
+
+The difference between freeze_noirq() and freeze() is analogous to the
+difference between suspend_noirq() and suspend().
+
+3.1.6. poweroff()
+
+The poweroff() callback is hibernation-specific. It is executed when the system
+is about to be powered off after saving a hibernation image to a persistent
+storage. prepare() callbacks are executed for all devices before poweroff() is
+called.
+
+The role of this callback is analogous to the role of the suspend() and freeze()
+callbacks described above, although it does not need to save the contents of
+the device's registers. In particular, if the driver wants to put the device
+into a low-power state itself instead of allowing the PCI subsystem to do that,
+the poweroff() callback should use pci_prepare_to_sleep() and
+pci_set_power_state() to prepare the device for system wakeup and to put it
+into a low-power state, respectively, but it need not save the device's standard
+configuration registers.
+
+3.1.7. poweroff_noirq()
+
+The poweroff_noirq() callback is hibernation-specific. It is executed after
+poweroff() callbacks have been executed for all devices in the system.
+
+The role of this callback is analogous to the role of the suspend_noirq() and
+freeze_noirq() callbacks described above, but it does not need to save the
+contents of the device's registers.
+
+The difference between poweroff_noirq() and poweroff() is analogous to the
+difference between suspend_noirq() and suspend().
+
+3.1.8. resume_noirq()
+
+The resume_noirq() callback is only executed during system resume, after the
+PM core has enabled the non-boot CPUs. The driver's interrupt handler will not
+be invoked while resume_noirq() is running, so this callback can carry out
+operations that might race with the interrupt handler.
+
+Since the PCI subsystem unconditionally puts all devices into the full power
+state in the resume_noirq phase of system resume and restores their standard
+configuration registers, resume_noirq() is usually not necessary. In general
+it should only be used for performing operations that would lead to race
+conditions if carried out by resume().
+
+3.1.9. resume()
+
+The resume() callback is only executed during system resume, after
+resume_noirq() callbacks have been executed for all devices in the system and
+device interrupts have been enabled by the PM core.
+
+This callback is responsible for restoring the pre-suspend configuration of the
+device and bringing it back to the fully functional state. The device should be
+able to process I/O in a usual way after resume() has returned.
+
+3.1.10. thaw_noirq()
+
+The thaw_noirq() callback is hibernation-specific. It is executed after a
+system image has been created and the non-boot CPUs have been enabled by the PM
+core, in the thaw_noirq phase of hibernation. It also may be executed if the
+loading of a hibernation image fails during system restore (it is then executed
+after enabling the non-boot CPUs). The driver's interrupt handler will not be
+invoked while thaw_noirq() is running.
+
+The role of this callback is analogous to the role of resume_noirq(). The
+difference between these two callbacks is that thaw_noirq() is executed after
+freeze() and freeze_noirq(), so in general it does not need to modify the
+contents of the device's registers.
+
+3.1.11. thaw()
+
+The thaw() callback is hibernation-specific. It is executed after thaw_noirq()
+callbacks have been executed for all devices in the system and after device
+interrupts have been enabled by the PM core.
+
+This callback is responsible for restoring the pre-freeze configuration of
+the device, so that it will work in a usual way after thaw() has returned.
+
+3.1.12. restore_noirq()
+
+The restore_noirq() callback is hibernation-specific. It is executed in the
+restore_noirq phase of hibernation, when the boot kernel has passed control to
+the image kernel and the non-boot CPUs have been enabled by the image kernel's
+PM core.
+
+This callback is analogous to resume_noirq() with the exception that it cannot
+make any assumption on the previous state of the device, even if the BIOS (or
+generally the platform firmware) is known to preserve that state over a
+suspend-resume cycle.
+
+For the vast majority of PCI device drivers there is no difference between
+resume_noirq() and restore_noirq().
+
+3.1.13. restore()
+
+The restore() callback is hibernation-specific. It is executed after
+restore_noirq() callbacks have been executed for all devices in the system and
+after the PM core has enabled device drivers' interrupt handlers to be invoked.
+
+This callback is analogous to resume(), just like restore_noirq() is analogous
+to resume_noirq(). Consequently, the difference between restore_noirq() and
+restore() is analogous to the difference between resume_noirq() and resume().
+
+For the vast majority of PCI device drivers there is no difference between
+resume() and restore().
+
+3.1.14. complete()
+
+The complete() callback is executed in the following situations:
+ - during system resume, after resume() callbacks have been executed for all
+ devices,
+ - during hibernation, before saving the system image, after thaw() callbacks
+ have been executed for all devices,
+ - during system restore, when the system is going back to its pre-hibernation
+ state, after restore() callbacks have been executed for all devices.
+It also may be executed if the loading of a hibernation image into memory fails
+(in that case it is run after thaw() callbacks have been executed for all
+devices that have drivers in the boot kernel).
+
+This callback is entirely optional, although it may be necessary if the
+prepare() callback performs operations that need to be reversed.
+
+3.1.15. runtime_suspend()
+
+The runtime_suspend() callback is specific to device runtime power management
+(runtime PM). It is executed by the PM core's runtime PM framework when the
+device is about to be suspended (i.e. quiesced and put into a low-power state)
+at run time.
+
+This callback is responsible for freezing the device and preparing it to be
+put into a low-power state, but it must allow the PCI subsystem to perform all
+of the PCI-specific actions necessary for suspending the device.
+
+3.1.16. runtime_resume()
+
+The runtime_resume() callback is specific to device runtime PM. It is executed
+by the PM core's runtime PM framework when the device is about to be resumed
+(i.e. put into the full-power state and programmed to process I/O normally) at
+run time.
+
+This callback is responsible for restoring the normal functionality of the
+device after it has been put into the full-power state by the PCI subsystem.
+The device is expected to be able to process I/O in the usual way after
+runtime_resume() has returned.
+
+3.1.17. runtime_idle()
+
+The runtime_idle() callback is specific to device runtime PM. It is executed
+by the PM core's runtime PM framework whenever it may be desirable to suspend
+the device according to the PM core's information. In particular, it is
+automatically executed right after runtime_resume() has returned in case the
+resume of the device has happened as a result of a spurious event.
+
+This callback is optional, but if it is not implemented or if it returns 0, the
+PCI subsystem will call pm_runtime_suspend() for the device, which in turn will
+cause the driver's runtime_suspend() callback to be executed.
+
+3.1.18. Pointing Multiple Callback Pointers to One Routine
+
+Although in principle each of the callbacks described in the previous
+subsections can be defined as a separate function, it often is convenient to
+point two or more members of struct dev_pm_ops to the same routine. There are
+a few convenience macros that can be used for this purpose.
+
+The SIMPLE_DEV_PM_OPS macro declares a struct dev_pm_ops object with one
+suspend routine pointed to by the .suspend(), .freeze(), and .poweroff()
+members and one resume routine pointed to by the .resume(), .thaw(), and
+.restore() members. The other function pointers in this struct dev_pm_ops are
+unset.
+
+The UNIVERSAL_DEV_PM_OPS macro is similar to SIMPLE_DEV_PM_OPS, but it
+additionally sets the .runtime_resume() pointer to the same value as
+.resume() (and .thaw(), and .restore()) and the .runtime_suspend() pointer to
+the same value as .suspend() (and .freeze() and .poweroff()).
+
+The SET_SYSTEM_SLEEP_PM_OPS can be used inside of a declaration of struct
+dev_pm_ops to indicate that one suspend routine is to be pointed to by the
+.suspend(), .freeze(), and .poweroff() members and one resume routine is to
+be pointed to by the .resume(), .thaw(), and .restore() members.
+
+3.2. Device Runtime Power Management
+------------------------------------
+In addition to providing device power management callbacks PCI device drivers
+are responsible for controlling the runtime power management (runtime PM) of
+their devices.
+
+The PCI device runtime PM is optional, but it is recommended that PCI device
+drivers implement it at least in the cases where there is a reliable way of
+verifying that the device is not used (like when the network cable is detached
+from an Ethernet adapter or there are no devices attached to a USB controller).
+
+To support the PCI runtime PM the driver first needs to implement the
+runtime_suspend() and runtime_resume() callbacks. It also may need to implement
+the runtime_idle() callback to prevent the device from being suspended again
+every time right after the runtime_resume() callback has returned
+(alternatively, the runtime_suspend() callback will have to check if the
+device should really be suspended and return -EAGAIN if that is not the case).
+
+The runtime PM of PCI devices is disabled by default. It is also blocked by
+pci_pm_init() that runs the pm_runtime_forbid() helper function. If a PCI
+driver implements the runtime PM callbacks and intends to use the runtime PM
+framework provided by the PM core and the PCI subsystem, it should enable this
+feature by executing the pm_runtime_enable() helper function. However, the
+driver should not call the pm_runtime_allow() helper function unblocking
+the runtime PM of the device. Instead, it should allow user space or some
+platform-specific code to do that (user space can do it via sysfs), although
+once it has called pm_runtime_enable(), it must be prepared to handle the
+runtime PM of the device correctly as soon as pm_runtime_allow() is called
+(which may happen at any time). [It also is possible that user space causes
+pm_runtime_allow() to be called via sysfs before the driver is loaded, so in
+fact the driver has to be prepared to handle the runtime PM of the device as
+soon as it calls pm_runtime_enable().]
+
+The runtime PM framework works by processing requests to suspend or resume
+devices, or to check if they are idle (in which cases it is reasonable to
+subsequently request that they be suspended). These requests are represented
+by work items put into the power management workqueue, pm_wq. Although there
+are a few situations in which power management requests are automatically
+queued by the PM core (for example, after processing a request to resume a
+device the PM core automatically queues a request to check if the device is
+idle), device drivers are generally responsible for queuing power management
+requests for their devices. For this purpose they should use the runtime PM
+helper functions provided by the PM core, discussed in
+Documentation/power/runtime_pm.txt.
+
+Devices can also be suspended and resumed synchronously, without placing a
+request into pm_wq. In the majority of cases this also is done by their
+drivers that use helper functions provided by the PM core for this purpose.
+
+For more information on the runtime PM of devices refer to
+Documentation/power/runtime_pm.txt.
+
+
+4. Resources
+============
+
+PCI Local Bus Specification, Rev. 3.0
+PCI Bus Power Management Interface Specification, Rev. 1.2
+Advanced Configuration and Power Interface (ACPI) Specification, Rev. 3.0b
+PCI Express Base Specification, Rev. 2.0
+Documentation/power/devices.txt
+Documentation/power/runtime_pm.txt
diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
new file mode 100644
index 000000000..129f7c0e1
--- /dev/null
+++ b/Documentation/power/pm_qos_interface.txt
@@ -0,0 +1,224 @@
+PM Quality Of Service Interface.
+
+This interface provides a kernel and user mode interface for registering
+performance expectations by drivers, subsystems and user space applications on
+one of the parameters.
+
+Two different PM QoS frameworks are available:
+1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
+memory_bandwidth.
+2. the per-device PM QoS framework provides the API to manage the per-device latency
+constraints and PM QoS flags.
+
+Each parameters have defined units:
+ * latency: usec
+ * timeout: usec
+ * throughput: kbs (kilo bit / sec)
+ * memory bandwidth: mbs (mega bit / sec)
+
+
+1. PM QoS framework
+
+The infrastructure exposes multiple misc device nodes one per implemented
+parameter. The set of parameters implement is defined by pm_qos_power_init()
+and pm_qos_params.h. This is done because having the available parameters
+being runtime configurable or changeable from a driver was seen as too easy to
+abuse.
+
+For each parameter a list of performance requests is maintained along with
+an aggregated target value. The aggregated target value is updated with
+changes to the request list or elements of the list. Typically the
+aggregated target value is simply the max or min of the request values held
+in the parameter list elements.
+Note: the aggregated target value is implemented as an atomic variable so that
+reading the aggregated value does not require any locking mechanism.
+
+
+From kernel mode the use of this interface is simple:
+
+void pm_qos_add_request(handle, param_class, target_value):
+Will insert an element into the list for that identified PM QoS class with the
+target value. Upon change to this list the new target is recomputed and any
+registered notifiers are called only if the target value is now different.
+Clients of pm_qos need to save the returned handle for future use in other
+pm_qos API functions.
+
+void pm_qos_update_request(handle, new_target_value):
+Will update the list element pointed to by the handle with the new target value
+and recompute the new aggregated target, calling the notification tree if the
+target is changed.
+
+void pm_qos_remove_request(handle):
+Will remove the element. After removal it will update the aggregate target and
+call the notification tree if the target was changed as a result of removing
+the request.
+
+int pm_qos_request(param_class):
+Returns the aggregated value for a given PM QoS class.
+
+int pm_qos_request_active(handle):
+Returns if the request is still active, i.e. it has not been removed from a
+PM QoS class constraints list.
+
+int pm_qos_add_notifier(param_class, notifier):
+Adds a notification callback function to the PM QoS class. The callback is
+called when the aggregated value for the PM QoS class is changed.
+
+int pm_qos_remove_notifier(int param_class, notifier):
+Removes the notification callback function for the PM QoS class.
+
+
+From user mode:
+Only processes can register a pm_qos request. To provide for automatic
+cleanup of a process, the interface requires the process to register its
+parameter requests in the following way:
+
+To register the default pm_qos target for the specific parameter, the process
+must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
+
+As long as the device node is held open that process has a registered
+request on the parameter.
+
+To change the requested target value the process needs to write an s32 value to
+the open device node. Alternatively the user mode program could write a hex
+string for the value using 10 char long format e.g. "0x12345678". This
+translates to a pm_qos_update_request call.
+
+To remove the user mode request for a target value simply close the device
+node.
+
+
+2. PM QoS per-device latency and flags framework
+
+For each device, there are three lists of PM QoS requests. Two of them are
+maintained along with the aggregated targets of resume latency and active
+state latency tolerance (in microseconds) and the third one is for PM QoS flags.
+Values are updated in response to changes of the request list.
+
+The target values of resume latency and active state latency tolerance are
+simply the minimum of the request values held in the parameter list elements.
+The PM QoS flags aggregate value is a gather (bitwise OR) of all list elements'
+values. Two device PM QoS flags are defined currently: PM_QOS_FLAG_NO_POWER_OFF
+and PM_QOS_FLAG_REMOTE_WAKEUP.
+
+Note: The aggregated target values are implemented in such a way that reading
+the aggregated value does not require any locking mechanism.
+
+
+From kernel mode the use of this interface is the following:
+
+int dev_pm_qos_add_request(device, handle, type, value):
+Will insert an element into the list for that identified device with the
+target value. Upon change to this list the new target is recomputed and any
+registered notifiers are called only if the target value is now different.
+Clients of dev_pm_qos need to save the handle for future use in other
+dev_pm_qos API functions.
+
+int dev_pm_qos_update_request(handle, new_value):
+Will update the list element pointed to by the handle with the new target value
+and recompute the new aggregated target, calling the notification trees if the
+target is changed.
+
+int dev_pm_qos_remove_request(handle):
+Will remove the element. After removal it will update the aggregate target and
+call the notification trees if the target was changed as a result of removing
+the request.
+
+s32 dev_pm_qos_read_value(device):
+Returns the aggregated value for a given device's constraints list.
+
+enum pm_qos_flags_status dev_pm_qos_flags(device, mask)
+Check PM QoS flags of the given device against the given mask of flags.
+The meaning of the return values is as follows:
+ PM_QOS_FLAGS_ALL: All flags from the mask are set
+ PM_QOS_FLAGS_SOME: Some flags from the mask are set
+ PM_QOS_FLAGS_NONE: No flags from the mask are set
+ PM_QOS_FLAGS_UNDEFINED: The device's PM QoS structure has not been
+ initialized or the list of requests is empty.
+
+int dev_pm_qos_add_ancestor_request(dev, handle, type, value)
+Add a PM QoS request for the first direct ancestor of the given device whose
+power.ignore_children flag is unset (for DEV_PM_QOS_RESUME_LATENCY requests)
+or whose power.set_latency_tolerance callback pointer is not NULL (for
+DEV_PM_QOS_LATENCY_TOLERANCE requests).
+
+int dev_pm_qos_expose_latency_limit(device, value)
+Add a request to the device's PM QoS list of resume latency constraints and
+create a sysfs attribute pm_qos_resume_latency_us under the device's power
+directory allowing user space to manipulate that request.
+
+void dev_pm_qos_hide_latency_limit(device)
+Drop the request added by dev_pm_qos_expose_latency_limit() from the device's
+PM QoS list of resume latency constraints and remove sysfs attribute
+pm_qos_resume_latency_us from the device's power directory.
+
+int dev_pm_qos_expose_flags(device, value)
+Add a request to the device's PM QoS list of flags and create sysfs attributes
+pm_qos_no_power_off and pm_qos_remote_wakeup under the device's power directory
+allowing user space to change these flags' value.
+
+void dev_pm_qos_hide_flags(device)
+Drop the request added by dev_pm_qos_expose_flags() from the device's PM QoS list
+of flags and remove sysfs attributes pm_qos_no_power_off and pm_qos_remote_wakeup
+under the device's power directory.
+
+Notification mechanisms:
+The per-device PM QoS framework has 2 different and distinct notification trees:
+a per-device notification tree and a global notification tree.
+
+int dev_pm_qos_add_notifier(device, notifier):
+Adds a notification callback function for the device.
+The callback is called when the aggregated value of the device constraints list
+is changed (for resume latency device PM QoS only).
+
+int dev_pm_qos_remove_notifier(device, notifier):
+Removes the notification callback function for the device.
+
+int dev_pm_qos_add_global_notifier(notifier):
+Adds a notification callback function in the global notification tree of the
+framework.
+The callback is called when the aggregated value for any device is changed
+(for resume latency device PM QoS only).
+
+int dev_pm_qos_remove_global_notifier(notifier):
+Removes the notification callback function from the global notification tree
+of the framework.
+
+
+Active state latency tolerance
+
+This device PM QoS type is used to support systems in which hardware may switch
+to energy-saving operation modes on the fly. In those systems, if the operation
+mode chosen by the hardware attempts to save energy in an overly aggressive way,
+it may cause excess latencies to be visible to software, causing it to miss
+certain protocol requirements or target frame or sample rates etc.
+
+If there is a latency tolerance control mechanism for a given device available
+to software, the .set_latency_tolerance callback in that device's dev_pm_info
+structure should be populated. The routine pointed to by it is should implement
+whatever is necessary to transfer the effective requirement value to the
+hardware.
+
+Whenever the effective latency tolerance changes for the device, its
+.set_latency_tolerance() callback will be executed and the effective value will
+be passed to it. If that value is negative, which means that the list of
+latency tolerance requirements for the device is empty, the callback is expected
+to switch the underlying hardware latency tolerance control mechanism to an
+autonomous mode if available. If that value is PM_QOS_LATENCY_ANY, in turn, and
+the hardware supports a special "no requirement" setting, the callback is
+expected to use it. That allows software to prevent the hardware from
+automatically updating the device's latency tolerance in response to its power
+state changes (e.g. during transitions from D3cold to D0), which generally may
+be done in the autonomous latency tolerance control mode.
+
+If .set_latency_tolerance() is present for the device, sysfs attribute
+pm_qos_latency_tolerance_us will be present in the devivce's power directory.
+Then, user space can use that attribute to specify its latency tolerance
+requirement for the device, if any. Writing "any" to it means "no requirement,
+but do not let the hardware control latency tolerance" and writing "auto" to it
+allows the hardware to be switched to the autonomous mode if there are no other
+requirements from the kernel side in the device's list.
+
+Kernel code can use the functions described above along with the
+DEV_PM_QOS_LATENCY_TOLERANCE device PM QoS type to add, remove and update
+latency tolerance requirements for devices.
diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
new file mode 100644
index 000000000..82dacc06e
--- /dev/null
+++ b/Documentation/power/power_supply_class.txt
@@ -0,0 +1,214 @@
+Linux power supply class
+========================
+
+Synopsis
+~~~~~~~~
+Power supply class used to represent battery, UPS, AC or DC power supply
+properties to user-space.
+
+It defines core set of attributes, which should be applicable to (almost)
+every power supply out there. Attributes are available via sysfs and uevent
+interfaces.
+
+Each attribute has well defined meaning, up to unit of measure used. While
+the attributes provided are believed to be universally applicable to any
+power supply, specific monitoring hardware may not be able to provide them
+all, so any of them may be skipped.
+
+Power supply class is extensible, and allows to define drivers own attributes.
+The core attribute set is subject to the standard Linux evolution (i.e.
+if it will be found that some attribute is applicable to many power supply
+types or their drivers, it can be added to the core set).
+
+It also integrates with LED framework, for the purpose of providing
+typically expected feedback of battery charging/fully charged status and
+AC/USB power supply online status. (Note that specific details of the
+indication (including whether to use it at all) are fully controllable by
+user and/or specific machine defaults, per design principles of LED
+framework).
+
+
+Attributes/properties
+~~~~~~~~~~~~~~~~~~~~~
+Power supply class has predefined set of attributes, this eliminates code
+duplication across drivers. Power supply class insist on reusing its
+predefined attributes *and* their units.
+
+So, userspace gets predictable set of attributes and their units for any
+kind of power supply, and can process/present them to a user in consistent
+manner. Results for different power supplies and machines are also directly
+comparable.
+
+See drivers/power/ds2760_battery.c and drivers/power/pda_power.c for the
+example how to declare and handle attributes.
+
+
+Units
+~~~~~
+Quoting include/linux/power_supply.h:
+
+ All voltages, currents, charges, energies, time and temperatures in µV,
+ µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
+ stated. It's driver's job to convert its raw values to units in which
+ this class operates.
+
+
+Attributes/properties detailed
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+~ ~ ~ ~ ~ ~ ~ Charge/Energy/Capacity - how to not confuse ~ ~ ~ ~ ~ ~ ~
+~ ~
+~ Because both "charge" (µAh) and "energy" (µWh) represents "capacity" ~
+~ of battery, this class distinguish these terms. Don't mix them! ~
+~ ~
+~ CHARGE_* attributes represents capacity in µAh only. ~
+~ ENERGY_* attributes represents capacity in µWh only. ~
+~ CAPACITY attribute represents capacity in *percents*, from 0 to 100. ~
+~ ~
+~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
+
+Postfixes:
+_AVG - *hardware* averaged value, use it if your hardware is really able to
+report averaged values.
+_NOW - momentary/instantaneous values.
+
+STATUS - this attribute represents operating status (charging, full,
+discharging (i.e. powering a load), etc.). This corresponds to
+BATTERY_STATUS_* values, as defined in battery.h.
+
+CHARGE_TYPE - batteries can typically charge at different rates.
+This defines trickle and fast charges. For batteries that
+are already charged or discharging, 'n/a' can be displayed (or
+'unknown', if the status is not known).
+
+AUTHENTIC - indicates the power supply (battery or charger) connected
+to the platform is authentic(1) or non authentic(0).
+
+HEALTH - represents health of the battery, values corresponds to
+POWER_SUPPLY_HEALTH_*, defined in battery.h.
+
+VOLTAGE_OCV - open circuit voltage of the battery.
+
+VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN - design values for maximal and
+minimal power supply voltages. Maximal/minimal means values of voltages
+when battery considered "full"/"empty" at normal conditions. Yes, there is
+no direct relation between voltage and battery capacity, but some dumb
+batteries use voltage for very approximated calculation of capacity.
+Battery driver also can use this attribute just to inform userspace
+about maximal and minimal voltage thresholds of a given battery.
+
+VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that
+these ones should be used if hardware could only guess (measure and
+retain) the thresholds of a given power supply.
+
+VOLTAGE_BOOT - Reports the voltage measured during boot
+
+CURRENT_BOOT - Reports the current measured during boot
+
+CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when
+battery considered full/empty.
+
+ENERGY_FULL_DESIGN, ENERGY_EMPTY_DESIGN - same as above but for energy.
+
+CHARGE_FULL, CHARGE_EMPTY - These attributes means "last remembered value
+of charge when battery became full/empty". It also could mean "value of
+charge when battery considered full/empty at given conditions (temperature,
+age)". I.e. these attributes represents real thresholds, not design values.
+
+CHARGE_COUNTER - the current charge counter (in µAh). This could easily
+be negative; there is no empty or full value. It is only useful for
+relative, time-based measurements.
+
+CONSTANT_CHARGE_CURRENT - constant charge current programmed by charger.
+CONSTANT_CHARGE_CURRENT_MAX - maximum charge current supported by the
+power supply object.
+INPUT_CURRENT_LIMIT - input current limit programmed by charger. Indicates
+the current drawn from a charging source.
+CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge
+condition.
+
+CALIBRATE - battery or coulomb counter calibration status
+
+CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
+CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
+power supply object.
+
+CHARGE_CONTROL_LIMIT - current charge control limit setting
+CHARGE_CONTROL_LIMIT_MAX - maximum charge control limit setting
+
+ENERGY_FULL, ENERGY_EMPTY - same as above but for energy.
+
+CAPACITY - capacity in percents.
+CAPACITY_ALERT_MIN - minimum capacity alert value in percents.
+CAPACITY_ALERT_MAX - maximum capacity alert value in percents.
+CAPACITY_LEVEL - capacity level. This corresponds to
+POWER_SUPPLY_CAPACITY_LEVEL_*.
+
+TEMP - temperature of the power supply.
+TEMP_ALERT_MIN - minimum battery temperature alert.
+TEMP_ALERT_MAX - maximum battery temperature alert.
+TEMP_AMBIENT - ambient temperature.
+TEMP_AMBIENT_ALERT_MIN - minimum ambient temperature alert.
+TEMP_AMBIENT_ALERT_MAX - maximum ambient temperature alert.
+TEMP_MIN - minimum operatable temperature
+TEMP_MAX - maximum operatable temperature
+
+TIME_TO_EMPTY - seconds left for battery to be considered empty (i.e.
+while battery powers a load)
+TIME_TO_FULL - seconds left for battery to be considered full (i.e.
+while battery is charging)
+
+
+Battery <-> external power supply interaction
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Often power supplies are acting as supplies and supplicants at the same
+time. Batteries are good example. So, batteries usually care if they're
+externally powered or not.
+
+For that case, power supply class implements notification mechanism for
+batteries.
+
+External power supply (AC) lists supplicants (batteries) names in
+"supplied_to" struct member, and each power_supply_changed() call
+issued by external power supply will notify supplicants via
+external_power_changed callback.
+
+
+QA
+~~
+Q: Where is POWER_SUPPLY_PROP_XYZ attribute?
+A: If you cannot find attribute suitable for your driver needs, feel free
+ to add it and send patch along with your driver.
+
+ The attributes available currently are the ones currently provided by the
+ drivers written.
+
+ Good candidates to add in future: model/part#, cycle_time, manufacturer,
+ etc.
+
+
+Q: I have some very specific attribute (e.g. battery color), should I add
+ this attribute to standard ones?
+A: Most likely, no. Such attribute can be placed in the driver itself, if
+ it is useful. Of course, if the attribute in question applicable to
+ large set of batteries, provided by many drivers, and/or comes from
+ some general battery specification/standard, it may be a candidate to
+ be added to the core attribute set.
+
+
+Q: Suppose, my battery monitoring chip/firmware does not provides capacity
+ in percents, but provides charge_{now,full,empty}. Should I calculate
+ percentage capacity manually, inside the driver, and register CAPACITY
+ attribute? The same question about time_to_empty/time_to_full.
+A: Most likely, no. This class is designed to export properties which are
+ directly measurable by the specific hardware available.
+
+ Inferring not available properties using some heuristics or mathematical
+ model is not subject of work for a battery driver. Such functionality
+ should be factored out, and in fact, apm_power, the driver to serve
+ legacy APM API on top of power supply class, uses a simple heuristic of
+ approximating remaining battery capacity based on its charge, current,
+ voltage and so on. But full-fledged battery model is likely not subject
+ for kernel at all, as it would require floating point calculation to deal
+ with things like differential equations and Kalman filters. This is
+ better be handled by batteryd/libbattery, yet to be written.
diff --git a/Documentation/power/powercap/powercap.txt b/Documentation/power/powercap/powercap.txt
new file mode 100644
index 000000000..1e6ef164e
--- /dev/null
+++ b/Documentation/power/powercap/powercap.txt
@@ -0,0 +1,236 @@
+Power Capping Framework
+==================================
+
+The power capping framework provides a consistent interface between the kernel
+and the user space that allows power capping drivers to expose the settings to
+user space in a uniform way.
+
+Terminology
+=========================
+The framework exposes power capping devices to user space via sysfs in the
+form of a tree of objects. The objects at the root level of the tree represent
+'control types', which correspond to different methods of power capping. For
+example, the intel-rapl control type represents the Intel "Running Average
+Power Limit" (RAPL) technology, whereas the 'idle-injection' control type
+corresponds to the use of idle injection for controlling power.
+
+Power zones represent different parts of the system, which can be controlled and
+monitored using the power capping method determined by the control type the
+given zone belongs to. They each contain attributes for monitoring power, as
+well as controls represented in the form of power constraints. If the parts of
+the system represented by different power zones are hierarchical (that is, one
+bigger part consists of multiple smaller parts that each have their own power
+controls), those power zones may also be organized in a hierarchy with one
+parent power zone containing multiple subzones and so on to reflect the power
+control topology of the system. In that case, it is possible to apply power
+capping to a set of devices together using the parent power zone and if more
+fine grained control is required, it can be applied through the subzones.
+
+
+Example sysfs interface tree:
+
+/sys/devices/virtual/powercap
+??? intel-rapl
+ ??? intel-rapl:0
+ ?   ??? constraint_0_name
+ ?   ??? constraint_0_power_limit_uw
+ ?   ??? constraint_0_time_window_us
+ ?   ??? constraint_1_name
+ ?   ??? constraint_1_power_limit_uw
+ ?   ??? constraint_1_time_window_us
+ ?   ??? device -> ../../intel-rapl
+ ?   ??? energy_uj
+ ?   ??? intel-rapl:0:0
+ ?   ?   ??? constraint_0_name
+ ?   ?   ??? constraint_0_power_limit_uw
+ ?   ?   ??? constraint_0_time_window_us
+ ?   ?   ??? constraint_1_name
+ ?   ?   ??? constraint_1_power_limit_uw
+ ?   ?   ??? constraint_1_time_window_us
+ ?   ?   ??? device -> ../../intel-rapl:0
+ ?   ?   ??? energy_uj
+ ?   ?   ??? max_energy_range_uj
+ ?   ?   ??? name
+ ?   ?   ??? enabled
+ ?   ?   ??? power
+ ?   ?   ?   ??? async
+ ?   ?   ?   []
+ ?   ?   ??? subsystem -> ../../../../../../class/power_cap
+ ?   ?   ??? uevent
+ ?   ??? intel-rapl:0:1
+ ?   ?   ??? constraint_0_name
+ ?   ?   ??? constraint_0_power_limit_uw
+ ?   ?   ??? constraint_0_time_window_us
+ ?   ?   ??? constraint_1_name
+ ?   ?   ??? constraint_1_power_limit_uw
+ ?   ?   ??? constraint_1_time_window_us
+ ?   ?   ??? device -> ../../intel-rapl:0
+ ?   ?   ??? energy_uj
+ ?   ?   ??? max_energy_range_uj
+ ?   ?   ??? name
+ ?   ?   ??? enabled
+ ?   ?   ??? power
+ ?   ?   ?   ??? async
+ ?   ?   ?   []
+ ?   ?   ??? subsystem -> ../../../../../../class/power_cap
+ ?   ?   ??? uevent
+ ?   ??? max_energy_range_uj
+ ?   ??? max_power_range_uw
+ ?   ??? name
+ ?   ??? enabled
+ ?   ??? power
+ ?   ?   ??? async
+ ?   ?   []
+ ?   ??? subsystem -> ../../../../../class/power_cap
+ ?   ??? enabled
+ ?   ??? uevent
+ ??? intel-rapl:1
+ ?   ??? constraint_0_name
+ ?   ??? constraint_0_power_limit_uw
+ ?   ??? constraint_0_time_window_us
+ ?   ??? constraint_1_name
+ ?   ??? constraint_1_power_limit_uw
+ ?   ??? constraint_1_time_window_us
+ ?   ??? device -> ../../intel-rapl
+ ?   ??? energy_uj
+ ?   ??? intel-rapl:1:0
+ ?   ?   ??? constraint_0_name
+ ?   ?   ??? constraint_0_power_limit_uw
+ ?   ?   ??? constraint_0_time_window_us
+ ?   ?   ??? constraint_1_name
+ ?   ?   ??? constraint_1_power_limit_uw
+ ?   ?   ??? constraint_1_time_window_us
+ ?   ?   ??? device -> ../../intel-rapl:1
+ ?   ?   ??? energy_uj
+ ?   ?   ??? max_energy_range_uj
+ ?   ?   ??? name
+ ?   ?   ??? enabled
+ ?   ?   ??? power
+ ?   ?   ?   ??? async
+ ?   ?   ?   []
+ ?   ?   ??? subsystem -> ../../../../../../class/power_cap
+ ?   ?   ??? uevent
+ ?   ??? intel-rapl:1:1
+ ?   ?   ??? constraint_0_name
+ ?   ?   ??? constraint_0_power_limit_uw
+ ?   ?   ??? constraint_0_time_window_us
+ ?   ?   ??? constraint_1_name
+ ?   ?   ??? constraint_1_power_limit_uw
+ ?   ?   ??? constraint_1_time_window_us
+ ?   ?   ??? device -> ../../intel-rapl:1
+ ?   ?   ??? energy_uj
+ ?   ?   ??? max_energy_range_uj
+ ?   ?   ??? name
+ ?   ?   ??? enabled
+ ?   ?   ??? power
+ ?   ?   ?   ??? async
+ ?   ?   ?   []
+ ?   ?   ??? subsystem -> ../../../../../../class/power_cap
+ ?   ?   ??? uevent
+ ?   ??? max_energy_range_uj
+ ?   ??? max_power_range_uw
+ ?   ??? name
+ ?   ??? enabled
+ ?   ??? power
+ ?   ?   ??? async
+ ?   ?   []
+ ?   ??? subsystem -> ../../../../../class/power_cap
+ ?   ??? uevent
+ ??? power
+ ?   ??? async
+ ?   []
+ ??? subsystem -> ../../../../class/power_cap
+ ??? enabled
+ ??? uevent
+
+The above example illustrates a case in which the Intel RAPL technology,
+available in Intel® IA-64 and IA-32 Processor Architectures, is used. There is one
+control type called intel-rapl which contains two power zones, intel-rapl:0 and
+intel-rapl:1, representing CPU packages. Each of these power zones contains
+two subzones, intel-rapl:j:0 and intel-rapl:j:1 (j = 0, 1), representing the
+"core" and the "uncore" parts of the given CPU package, respectively. All of
+the zones and subzones contain energy monitoring attributes (energy_uj,
+max_energy_range_uj) and constraint attributes (constraint_*) allowing controls
+to be applied (the constraints in the 'package' power zones apply to the whole
+CPU packages and the subzone constraints only apply to the respective parts of
+the given package individually). Since Intel RAPL doesn't provide instantaneous
+power value, there is no power_uw attribute.
+
+In addition to that, each power zone contains a name attribute, allowing the
+part of the system represented by that zone to be identified.
+For example:
+
+cat /sys/class/power_cap/intel-rapl/intel-rapl:0/name
+package-0
+
+The Intel RAPL technology allows two constraints, short term and long term,
+with two different time windows to be applied to each power zone. Thus for
+each zone there are 2 attributes representing the constraint names, 2 power
+limits and 2 attributes representing the sizes of the time windows. Such that,
+constraint_j_* attributes correspond to the jth constraint (j = 0,1).
+
+For example:
+ constraint_0_name
+ constraint_0_power_limit_uw
+ constraint_0_time_window_us
+ constraint_1_name
+ constraint_1_power_limit_uw
+ constraint_1_time_window_us
+
+Power Zone Attributes
+=================================
+Monitoring attributes
+----------------------
+
+energy_uj (rw): Current energy counter in micro joules. Write "0" to reset.
+If the counter can not be reset, then this attribute is read only.
+
+max_energy_range_uj (ro): Range of the above energy counter in micro-joules.
+
+power_uw (ro): Current power in micro watts.
+
+max_power_range_uw (ro): Range of the above power value in micro-watts.
+
+name (ro): Name of this power zone.
+
+It is possible that some domains have both power ranges and energy counter ranges;
+however, only one is mandatory.
+
+Constraints
+----------------
+constraint_X_power_limit_uw (rw): Power limit in micro watts, which should be
+applicable for the time window specified by "constraint_X_time_window_us".
+
+constraint_X_time_window_us (rw): Time window in micro seconds.
+
+constraint_X_name (ro): An optional name of the constraint
+
+constraint_X_max_power_uw(ro): Maximum allowed power in micro watts.
+
+constraint_X_min_power_uw(ro): Minimum allowed power in micro watts.
+
+constraint_X_max_time_window_us(ro): Maximum allowed time window in micro seconds.
+
+constraint_X_min_time_window_us(ro): Minimum allowed time window in micro seconds.
+
+Except power_limit_uw and time_window_us other fields are optional.
+
+Common zone and control type attributes
+----------------------------------------
+enabled (rw): Enable/Disable controls at zone level or for all zones using
+a control type.
+
+Power Cap Client Driver Interface
+==================================
+The API summary:
+
+Call powercap_register_control_type() to register control type object.
+Call powercap_register_zone() to register a power zone (under a given
+control type), either as a top-level power zone or as a subzone of another
+power zone registered earlier.
+The number of constraints in a power zone and the corresponding callbacks have
+to be defined prior to calling powercap_register_zone() to register that zone.
+
+To Free a power zone call powercap_unregister_zone().
+To free a control type object call powercap_unregister_control_type().
+Detailed API can be generated using kernel-doc on include/linux/powercap.h.
diff --git a/Documentation/power/regulator/consumer.txt b/Documentation/power/regulator/consumer.txt
new file mode 100644
index 000000000..e51564c1a
--- /dev/null
+++ b/Documentation/power/regulator/consumer.txt
@@ -0,0 +1,218 @@
+Regulator Consumer Driver Interface
+===================================
+
+This text describes the regulator interface for consumer device drivers.
+Please see overview.txt for a description of the terms used in this text.
+
+
+1. Consumer Regulator Access (static & dynamic drivers)
+=======================================================
+
+A consumer driver can get access to its supply regulator by calling :-
+
+regulator = regulator_get(dev, "Vcc");
+
+The consumer passes in its struct device pointer and power supply ID. The core
+then finds the correct regulator by consulting a machine specific lookup table.
+If the lookup is successful then this call will return a pointer to the struct
+regulator that supplies this consumer.
+
+To release the regulator the consumer driver should call :-
+
+regulator_put(regulator);
+
+Consumers can be supplied by more than one regulator e.g. codec consumer with
+analog and digital supplies :-
+
+digital = regulator_get(dev, "Vcc"); /* digital core */
+analog = regulator_get(dev, "Avdd"); /* analog */
+
+The regulator access functions regulator_get() and regulator_put() will
+usually be called in your device drivers probe() and remove() respectively.
+
+
+2. Regulator Output Enable & Disable (static & dynamic drivers)
+====================================================================
+
+A consumer can enable its power supply by calling:-
+
+int regulator_enable(regulator);
+
+NOTE: The supply may already be enabled before regulator_enabled() is called.
+This may happen if the consumer shares the regulator or the regulator has been
+previously enabled by bootloader or kernel board initialization code.
+
+A consumer can determine if a regulator is enabled by calling :-
+
+int regulator_is_enabled(regulator);
+
+This will return > zero when the regulator is enabled.
+
+
+A consumer can disable its supply when no longer needed by calling :-
+
+int regulator_disable(regulator);
+
+NOTE: This may not disable the supply if it's shared with other consumers. The
+regulator will only be disabled when the enabled reference count is zero.
+
+Finally, a regulator can be forcefully disabled in the case of an emergency :-
+
+int regulator_force_disable(regulator);
+
+NOTE: this will immediately and forcefully shutdown the regulator output. All
+consumers will be powered off.
+
+
+3. Regulator Voltage Control & Status (dynamic drivers)
+======================================================
+
+Some consumer drivers need to be able to dynamically change their supply
+voltage to match system operating points. e.g. CPUfreq drivers can scale
+voltage along with frequency to save power, SD drivers may need to select the
+correct card voltage, etc.
+
+Consumers can control their supply voltage by calling :-
+
+int regulator_set_voltage(regulator, min_uV, max_uV);
+
+Where min_uV and max_uV are the minimum and maximum acceptable voltages in
+microvolts.
+
+NOTE: this can be called when the regulator is enabled or disabled. If called
+when enabled, then the voltage changes instantly, otherwise the voltage
+configuration changes and the voltage is physically set when the regulator is
+next enabled.
+
+The regulators configured voltage output can be found by calling :-
+
+int regulator_get_voltage(regulator);
+
+NOTE: get_voltage() will return the configured output voltage whether the
+regulator is enabled or disabled and should NOT be used to determine regulator
+output state. However this can be used in conjunction with is_enabled() to
+determine the regulator physical output voltage.
+
+
+4. Regulator Current Limit Control & Status (dynamic drivers)
+===========================================================
+
+Some consumer drivers need to be able to dynamically change their supply
+current limit to match system operating points. e.g. LCD backlight driver can
+change the current limit to vary the backlight brightness, USB drivers may want
+to set the limit to 500mA when supplying power.
+
+Consumers can control their supply current limit by calling :-
+
+int regulator_set_current_limit(regulator, min_uA, max_uA);
+
+Where min_uA and max_uA are the minimum and maximum acceptable current limit in
+microamps.
+
+NOTE: this can be called when the regulator is enabled or disabled. If called
+when enabled, then the current limit changes instantly, otherwise the current
+limit configuration changes and the current limit is physically set when the
+regulator is next enabled.
+
+A regulators current limit can be found by calling :-
+
+int regulator_get_current_limit(regulator);
+
+NOTE: get_current_limit() will return the current limit whether the regulator
+is enabled or disabled and should not be used to determine regulator current
+load.
+
+
+5. Regulator Operating Mode Control & Status (dynamic drivers)
+=============================================================
+
+Some consumers can further save system power by changing the operating mode of
+their supply regulator to be more efficient when the consumers operating state
+changes. e.g. consumer driver is idle and subsequently draws less current
+
+Regulator operating mode can be changed indirectly or directly.
+
+Indirect operating mode control.
+--------------------------------
+Consumer drivers can request a change in their supply regulator operating mode
+by calling :-
+
+int regulator_set_load(struct regulator *regulator, int load_uA);
+
+This will cause the core to recalculate the total load on the regulator (based
+on all its consumers) and change operating mode (if necessary and permitted)
+to best match the current operating load.
+
+The load_uA value can be determined from the consumer's datasheet. e.g. most
+datasheets have tables showing the maximum current consumed in certain
+situations.
+
+Most consumers will use indirect operating mode control since they have no
+knowledge of the regulator or whether the regulator is shared with other
+consumers.
+
+Direct operating mode control.
+------------------------------
+Bespoke or tightly coupled drivers may want to directly control regulator
+operating mode depending on their operating point. This can be achieved by
+calling :-
+
+int regulator_set_mode(struct regulator *regulator, unsigned int mode);
+unsigned int regulator_get_mode(struct regulator *regulator);
+
+Direct mode will only be used by consumers that *know* about the regulator and
+are not sharing the regulator with other consumers.
+
+
+6. Regulator Events
+===================
+Regulators can notify consumers of external events. Events could be received by
+consumers under regulator stress or failure conditions.
+
+Consumers can register interest in regulator events by calling :-
+
+int regulator_register_notifier(struct regulator *regulator,
+ struct notifier_block *nb);
+
+Consumers can unregister interest by calling :-
+
+int regulator_unregister_notifier(struct regulator *regulator,
+ struct notifier_block *nb);
+
+Regulators use the kernel notifier framework to send event to their interested
+consumers.
+
+7. Regulator Direct Register Access
+===================================
+Some kinds of power management hardware or firmware are designed such that
+they need to do low-level hardware access to regulators, with no involvement
+from the kernel. Examples of such devices are:
+
+- clocksource with a voltage-controlled oscillator and control logic to change
+ the supply voltage over I2C to achieve a desired output clock rate
+- thermal management firmware that can issue an arbitrary I2C transaction to
+ perform system poweroff during overtemperature conditions
+
+To set up such a device/firmware, various parameters like I2C address of the
+regulator, addresses of various regulator registers etc. need to be configured
+to it. The regulator framework provides the following helpers for querying
+these details.
+
+Bus-specific details, like I2C addresses or transfer rates are handled by the
+regmap framework. To get the regulator's regmap (if supported), use :-
+
+struct regmap *regulator_get_regmap(struct regulator *regulator);
+
+To obtain the hardware register offset and bitmask for the regulator's voltage
+selector register, use :-
+
+int regulator_get_hardware_vsel_register(struct regulator *regulator,
+ unsigned *vsel_reg,
+ unsigned *vsel_mask);
+
+To convert a regulator framework voltage selector code (used by
+regulator_list_voltage) to a hardware-specific voltage selector that can be
+directly written to the voltage selector register, use :-
+
+int regulator_list_hardware_vsel(struct regulator *regulator,
+ unsigned selector);
diff --git a/Documentation/power/regulator/design.txt b/Documentation/power/regulator/design.txt
new file mode 100644
index 000000000..fdd919b96
--- /dev/null
+++ b/Documentation/power/regulator/design.txt
@@ -0,0 +1,33 @@
+Regulator API design notes
+==========================
+
+This document provides a brief, partially structured, overview of some
+of the design considerations which impact the regulator API design.
+
+Safety
+------
+
+ - Errors in regulator configuration can have very serious consequences
+ for the system, potentially including lasting hardware damage.
+ - It is not possible to automatically determine the power configuration
+ of the system - software-equivalent variants of the same chip may
+ have different power requirements, and not all components with power
+ requirements are visible to software.
+
+ => The API should make no changes to the hardware state unless it has
+ specific knowledge that these changes are safe to perform on this
+ particular system.
+
+Consumer use cases
+------------------
+
+ - The overwhelming majority of devices in a system will have no
+ requirement to do any runtime configuration of their power beyond
+ being able to turn it on or off.
+
+ - Many of the power supplies in the system will be shared between many
+ different consumers.
+
+ => The consumer API should be structured so that these use cases are
+ very easy to handle and so that consumers will work with shared
+ supplies without any additional effort.
diff --git a/Documentation/power/regulator/machine.txt b/Documentation/power/regulator/machine.txt
new file mode 100644
index 000000000..757e3b53d
--- /dev/null
+++ b/Documentation/power/regulator/machine.txt
@@ -0,0 +1,100 @@
+Regulator Machine Driver Interface
+===================================
+
+The regulator machine driver interface is intended for board/machine specific
+initialisation code to configure the regulator subsystem.
+
+Consider the following machine :-
+
+ Regulator-1 -+-> Regulator-2 --> [Consumer A @ 1.8 - 2.0V]
+ |
+ +-> [Consumer B @ 3.3V]
+
+The drivers for consumers A & B must be mapped to the correct regulator in
+order to control their power supplies. This mapping can be achieved in machine
+initialisation code by creating a struct regulator_consumer_supply for
+each regulator.
+
+struct regulator_consumer_supply {
+ const char *dev_name; /* consumer dev_name() */
+ const char *supply; /* consumer supply - e.g. "vcc" */
+};
+
+e.g. for the machine above
+
+static struct regulator_consumer_supply regulator1_consumers[] = {
+{
+ .dev_name = "dev_name(consumer B)",
+ .supply = "Vcc",
+},};
+
+static struct regulator_consumer_supply regulator2_consumers[] = {
+{
+ .dev = "dev_name(consumer A"),
+ .supply = "Vcc",
+},};
+
+This maps Regulator-1 to the 'Vcc' supply for Consumer B and maps Regulator-2
+to the 'Vcc' supply for Consumer A.
+
+Constraints can now be registered by defining a struct regulator_init_data
+for each regulator power domain. This structure also maps the consumers
+to their supply regulators :-
+
+static struct regulator_init_data regulator1_data = {
+ .constraints = {
+ .name = "Regulator-1",
+ .min_uV = 3300000,
+ .max_uV = 3300000,
+ .valid_modes_mask = REGULATOR_MODE_NORMAL,
+ },
+ .num_consumer_supplies = ARRAY_SIZE(regulator1_consumers),
+ .consumer_supplies = regulator1_consumers,
+};
+
+The name field should be set to something that is usefully descriptive
+for the board for configuration of supplies for other regulators and
+for use in logging and other diagnostic output. Normally the name
+used for the supply rail in the schematic is a good choice. If no
+name is provided then the subsystem will choose one.
+
+Regulator-1 supplies power to Regulator-2. This relationship must be registered
+with the core so that Regulator-1 is also enabled when Consumer A enables its
+supply (Regulator-2). The supply regulator is set by the supply_regulator
+field below and co:-
+
+static struct regulator_init_data regulator2_data = {
+ .supply_regulator = "Regulator-1",
+ .constraints = {
+ .min_uV = 1800000,
+ .max_uV = 2000000,
+ .valid_ops_mask = REGULATOR_CHANGE_VOLTAGE,
+ .valid_modes_mask = REGULATOR_MODE_NORMAL,
+ },
+ .num_consumer_supplies = ARRAY_SIZE(regulator2_consumers),
+ .consumer_supplies = regulator2_consumers,
+};
+
+Finally the regulator devices must be registered in the usual manner.
+
+static struct platform_device regulator_devices[] = {
+{
+ .name = "regulator",
+ .id = DCDC_1,
+ .dev = {
+ .platform_data = &regulator1_data,
+ },
+},
+{
+ .name = "regulator",
+ .id = DCDC_2,
+ .dev = {
+ .platform_data = &regulator2_data,
+ },
+},
+};
+/* register regulator 1 device */
+platform_device_register(&regulator_devices[0]);
+
+/* register regulator 2 device */
+platform_device_register(&regulator_devices[1]);
diff --git a/Documentation/power/regulator/overview.txt b/Documentation/power/regulator/overview.txt
new file mode 100644
index 000000000..40ca2d6e2
--- /dev/null
+++ b/Documentation/power/regulator/overview.txt
@@ -0,0 +1,171 @@
+Linux voltage and current regulator framework
+=============================================
+
+About
+=====
+
+This framework is designed to provide a standard kernel interface to control
+voltage and current regulators.
+
+The intention is to allow systems to dynamically control regulator power output
+in order to save power and prolong battery life. This applies to both voltage
+regulators (where voltage output is controllable) and current sinks (where
+current limit is controllable).
+
+(C) 2008 Wolfson Microelectronics PLC.
+Author: Liam Girdwood <lrg@slimlogic.co.uk>
+
+
+Nomenclature
+============
+
+Some terms used in this document:-
+
+ o Regulator - Electronic device that supplies power to other devices.
+ Most regulators can enable and disable their output whilst
+ some can control their output voltage and or current.
+
+ Input Voltage -> Regulator -> Output Voltage
+
+
+ o PMIC - Power Management IC. An IC that contains numerous regulators
+ and often contains other subsystems.
+
+
+ o Consumer - Electronic device that is supplied power by a regulator.
+ Consumers can be classified into two types:-
+
+ Static: consumer does not change its supply voltage or
+ current limit. It only needs to enable or disable its
+ power supply. Its supply voltage is set by the hardware,
+ bootloader, firmware or kernel board initialisation code.
+
+ Dynamic: consumer needs to change its supply voltage or
+ current limit to meet operation demands.
+
+
+ o Power Domain - Electronic circuit that is supplied its input power by the
+ output power of a regulator, switch or by another power
+ domain.
+
+ The supply regulator may be behind a switch(s). i.e.
+
+ Regulator -+-> Switch-1 -+-> Switch-2 --> [Consumer A]
+ | |
+ | +-> [Consumer B], [Consumer C]
+ |
+ +-> [Consumer D], [Consumer E]
+
+ That is one regulator and three power domains:
+
+ Domain 1: Switch-1, Consumers D & E.
+ Domain 2: Switch-2, Consumers B & C.
+ Domain 3: Consumer A.
+
+ and this represents a "supplies" relationship:
+
+ Domain-1 --> Domain-2 --> Domain-3.
+
+ A power domain may have regulators that are supplied power
+ by other regulators. i.e.
+
+ Regulator-1 -+-> Regulator-2 -+-> [Consumer A]
+ |
+ +-> [Consumer B]
+
+ This gives us two regulators and two power domains:
+
+ Domain 1: Regulator-2, Consumer B.
+ Domain 2: Consumer A.
+
+ and a "supplies" relationship:
+
+ Domain-1 --> Domain-2
+
+
+ o Constraints - Constraints are used to define power levels for performance
+ and hardware protection. Constraints exist at three levels:
+
+ Regulator Level: This is defined by the regulator hardware
+ operating parameters and is specified in the regulator
+ datasheet. i.e.
+
+ - voltage output is in the range 800mV -> 3500mV.
+ - regulator current output limit is 20mA @ 5V but is
+ 10mA @ 10V.
+
+ Power Domain Level: This is defined in software by kernel
+ level board initialisation code. It is used to constrain a
+ power domain to a particular power range. i.e.
+
+ - Domain-1 voltage is 3300mV
+ - Domain-2 voltage is 1400mV -> 1600mV
+ - Domain-3 current limit is 0mA -> 20mA.
+
+ Consumer Level: This is defined by consumer drivers
+ dynamically setting voltage or current limit levels.
+
+ e.g. a consumer backlight driver asks for a current increase
+ from 5mA to 10mA to increase LCD illumination. This passes
+ to through the levels as follows :-
+
+ Consumer: need to increase LCD brightness. Lookup and
+ request next current mA value in brightness table (the
+ consumer driver could be used on several different
+ personalities based upon the same reference device).
+
+ Power Domain: is the new current limit within the domain
+ operating limits for this domain and system state (e.g.
+ battery power, USB power)
+
+ Regulator Domains: is the new current limit within the
+ regulator operating parameters for input/output voltage.
+
+ If the regulator request passes all the constraint tests
+ then the new regulator value is applied.
+
+
+Design
+======
+
+The framework is designed and targeted at SoC based devices but may also be
+relevant to non SoC devices and is split into the following four interfaces:-
+
+
+ 1. Consumer driver interface.
+
+ This uses a similar API to the kernel clock interface in that consumer
+ drivers can get and put a regulator (like they can with clocks atm) and
+ get/set voltage, current limit, mode, enable and disable. This should
+ allow consumers complete control over their supply voltage and current
+ limit. This also compiles out if not in use so drivers can be reused in
+ systems with no regulator based power control.
+
+ See Documentation/power/regulator/consumer.txt
+
+ 2. Regulator driver interface.
+
+ This allows regulator drivers to register their regulators and provide
+ operations to the core. It also has a notifier call chain for propagating
+ regulator events to clients.
+
+ See Documentation/power/regulator/regulator.txt
+
+ 3. Machine interface.
+
+ This interface is for machine specific code and allows the creation of
+ voltage/current domains (with constraints) for each regulator. It can
+ provide regulator constraints that will prevent device damage through
+ overvoltage or overcurrent caused by buggy client drivers. It also
+ allows the creation of a regulator tree whereby some regulators are
+ supplied by others (similar to a clock tree).
+
+ See Documentation/power/regulator/machine.txt
+
+ 4. Userspace ABI.
+
+ The framework also exports a lot of useful voltage/current/opmode data to
+ userspace via sysfs. This could be used to help monitor device power
+ consumption and status.
+
+ See Documentation/ABI/testing/sysfs-class-regulator
diff --git a/Documentation/power/regulator/regulator.txt b/Documentation/power/regulator/regulator.txt
new file mode 100644
index 000000000..b17e5833c
--- /dev/null
+++ b/Documentation/power/regulator/regulator.txt
@@ -0,0 +1,30 @@
+Regulator Driver Interface
+==========================
+
+The regulator driver interface is relatively simple and designed to allow
+regulator drivers to register their services with the core framework.
+
+
+Registration
+============
+
+Drivers can register a regulator by calling :-
+
+struct regulator_dev *regulator_register(struct regulator_desc *regulator_desc,
+ const struct regulator_config *config);
+
+This will register the regulator's capabilities and operations to the regulator
+core.
+
+Regulators can be unregistered by calling :-
+
+void regulator_unregister(struct regulator_dev *rdev);
+
+
+Regulator Events
+================
+Regulators can send events (e.g. overtemperature, undervoltage, etc) to
+consumer drivers by calling :-
+
+int regulator_notifier_call_chain(struct regulator_dev *rdev,
+ unsigned long event, void *data);
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
new file mode 100644
index 000000000..44fe1d28a
--- /dev/null
+++ b/Documentation/power/runtime_pm.txt
@@ -0,0 +1,912 @@
+Runtime Power Management Framework for I/O Devices
+
+(C) 2009-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
+(C) 2010 Alan Stern <stern@rowland.harvard.edu>
+(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+1. Introduction
+
+Support for runtime power management (runtime PM) of I/O devices is provided
+at the power management core (PM core) level by means of:
+
+* The power management workqueue pm_wq in which bus types and device drivers can
+ put their PM-related work items. It is strongly recommended that pm_wq be
+ used for queuing all work items related to runtime PM, because this allows
+ them to be synchronized with system-wide power transitions (suspend to RAM,
+ hibernation and resume from system sleep states). pm_wq is declared in
+ include/linux/pm_runtime.h and defined in kernel/power/main.c.
+
+* A number of runtime PM fields in the 'power' member of 'struct device' (which
+ is of the type 'struct dev_pm_info', defined in include/linux/pm.h) that can
+ be used for synchronizing runtime PM operations with one another.
+
+* Three device runtime PM callbacks in 'struct dev_pm_ops' (defined in
+ include/linux/pm.h).
+
+* A set of helper functions defined in drivers/base/power/runtime.c that can be
+ used for carrying out runtime PM operations in such a way that the
+ synchronization between them is taken care of by the PM core. Bus types and
+ device drivers are encouraged to use these functions.
+
+The runtime PM callbacks present in 'struct dev_pm_ops', the device runtime PM
+fields of 'struct dev_pm_info' and the core helper functions provided for
+runtime PM are described below.
+
+2. Device Runtime PM Callbacks
+
+There are three device runtime PM callbacks defined in 'struct dev_pm_ops':
+
+struct dev_pm_ops {
+ ...
+ int (*runtime_suspend)(struct device *dev);
+ int (*runtime_resume)(struct device *dev);
+ int (*runtime_idle)(struct device *dev);
+ ...
+};
+
+The ->runtime_suspend(), ->runtime_resume() and ->runtime_idle() callbacks
+are executed by the PM core for the device's subsystem that may be either of
+the following:
+
+ 1. PM domain of the device, if the device's PM domain object, dev->pm_domain,
+ is present.
+
+ 2. Device type of the device, if both dev->type and dev->type->pm are present.
+
+ 3. Device class of the device, if both dev->class and dev->class->pm are
+ present.
+
+ 4. Bus type of the device, if both dev->bus and dev->bus->pm are present.
+
+If the subsystem chosen by applying the above rules doesn't provide the relevant
+callback, the PM core will invoke the corresponding driver callback stored in
+dev->driver->pm directly (if present).
+
+The PM core always checks which callback to use in the order given above, so the
+priority order of callbacks from high to low is: PM domain, device type, class
+and bus type. Moreover, the high-priority one will always take precedence over
+a low-priority one. The PM domain, bus type, device type and class callbacks
+are referred to as subsystem-level callbacks in what follows.
+
+By default, the callbacks are always invoked in process context with interrupts
+enabled. However, the pm_runtime_irq_safe() helper function can be used to tell
+the PM core that it is safe to run the ->runtime_suspend(), ->runtime_resume()
+and ->runtime_idle() callbacks for the given device in atomic context with
+interrupts disabled. This implies that the callback routines in question must
+not block or sleep, but it also means that the synchronous helper functions
+listed at the end of Section 4 may be used for that device within an interrupt
+handler or generally in an atomic context.
+
+The subsystem-level suspend callback, if present, is _entirely_ _responsible_
+for handling the suspend of the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_suspend() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_suspend()
+callback in a device driver as long as the subsystem-level suspend callback
+knows what to do to handle the device).
+
+ * Once the subsystem-level suspend callback (or the driver suspend callback,
+ if invoked directly) has completed successfully for the given device, the PM
+ core regards the device as suspended, which need not mean that it has been
+ put into a low power state. It is supposed to mean, however, that the
+ device will not process data and will not communicate with the CPU(s) and
+ RAM until the appropriate resume callback is executed for it. The runtime
+ PM status of a device after successful execution of the suspend callback is
+ 'suspended'.
+
+ * If the suspend callback returns -EBUSY or -EAGAIN, the device's runtime PM
+ status remains 'active', which means that the device _must_ be fully
+ operational afterwards.
+
+ * If the suspend callback returns an error code different from -EBUSY and
+ -EAGAIN, the PM core regards this as a fatal error and will refuse to run
+ the helper functions described in Section 4 for the device until its status
+ is directly set to either'active', or 'suspended' (the PM core provides
+ special helper functions for this purpose).
+
+In particular, if the driver requires remote wakeup capability (i.e. hardware
+mechanism allowing the device to request a change of its power state, such as
+PCI PME) for proper functioning and device_run_wake() returns 'false' for the
+device, then ->runtime_suspend() should return -EBUSY. On the other hand, if
+device_run_wake() returns 'true' for the device and the device is put into a
+low-power state during the execution of the suspend callback, it is expected
+that remote wakeup will be enabled for the device. Generally, remote wakeup
+should be enabled for all input devices put into low-power states at run time.
+
+The subsystem-level resume callback, if present, is _entirely_ _responsible_ for
+handling the resume of the device as appropriate, which may, but need not
+include executing the device driver's own ->runtime_resume() callback (from the
+PM core's point of view it is not necessary to implement a ->runtime_resume()
+callback in a device driver as long as the subsystem-level resume callback knows
+what to do to handle the device).
+
+ * Once the subsystem-level resume callback (or the driver resume callback, if
+ invoked directly) has completed successfully, the PM core regards the device
+ as fully operational, which means that the device _must_ be able to complete
+ I/O operations as needed. The runtime PM status of the device is then
+ 'active'.
+
+ * If the resume callback returns an error code, the PM core regards this as a
+ fatal error and will refuse to run the helper functions described in Section
+ 4 for the device, until its status is directly set to either 'active', or
+ 'suspended' (by means of special helper functions provided by the PM core
+ for this purpose).
+
+The idle callback (a subsystem-level one, if present, or the driver one) is
+executed by the PM core whenever the device appears to be idle, which is
+indicated to the PM core by two counters, the device's usage counter and the
+counter of 'active' children of the device.
+
+ * If any of these counters is decreased using a helper function provided by
+ the PM core and it turns out to be equal to zero, the other counter is
+ checked. If that counter also is equal to zero, the PM core executes the
+ idle callback with the device as its argument.
+
+The action performed by the idle callback is totally dependent on the subsystem
+(or driver) in question, but the expected and recommended action is to check
+if the device can be suspended (i.e. if all of the conditions necessary for
+suspending the device are satisfied) and to queue up a suspend request for the
+device in that case. If there is no idle callback, or if the callback returns
+0, then the PM core will attempt to carry out a runtime suspend of the device,
+also respecting devices configured for autosuspend. In essence this means a
+call to pm_runtime_autosuspend() (do note that drivers needs to update the
+device last busy mark, pm_runtime_mark_last_busy(), to control the delay under
+this circumstance). To prevent this (for example, if the callback routine has
+started a delayed suspend), the routine must return a non-zero value. Negative
+error return codes are ignored by the PM core.
+
+The helper functions provided by the PM core, described in Section 4, guarantee
+that the following constraints are met with respect to runtime PM callbacks for
+one device:
+
+(1) The callbacks are mutually exclusive (e.g. it is forbidden to execute
+ ->runtime_suspend() in parallel with ->runtime_resume() or with another
+ instance of ->runtime_suspend() for the same device) with the exception that
+ ->runtime_suspend() or ->runtime_resume() can be executed in parallel with
+ ->runtime_idle() (although ->runtime_idle() will not be started while any
+ of the other callbacks is being executed for the same device).
+
+(2) ->runtime_idle() and ->runtime_suspend() can only be executed for 'active'
+ devices (i.e. the PM core will only execute ->runtime_idle() or
+ ->runtime_suspend() for the devices the runtime PM status of which is
+ 'active').
+
+(3) ->runtime_idle() and ->runtime_suspend() can only be executed for a device
+ the usage counter of which is equal to zero _and_ either the counter of
+ 'active' children of which is equal to zero, or the 'power.ignore_children'
+ flag of which is set.
+
+(4) ->runtime_resume() can only be executed for 'suspended' devices (i.e. the
+ PM core will only execute ->runtime_resume() for the devices the runtime
+ PM status of which is 'suspended').
+
+Additionally, the helper functions provided by the PM core obey the following
+rules:
+
+ * If ->runtime_suspend() is about to be executed or there's a pending request
+ to execute it, ->runtime_idle() will not be executed for the same device.
+
+ * A request to execute or to schedule the execution of ->runtime_suspend()
+ will cancel any pending requests to execute ->runtime_idle() for the same
+ device.
+
+ * If ->runtime_resume() is about to be executed or there's a pending request
+ to execute it, the other callbacks will not be executed for the same device.
+
+ * A request to execute ->runtime_resume() will cancel any pending or
+ scheduled requests to execute the other callbacks for the same device,
+ except for scheduled autosuspends.
+
+3. Runtime PM Device Fields
+
+The following device runtime PM fields are present in 'struct dev_pm_info', as
+defined in include/linux/pm.h:
+
+ struct timer_list suspend_timer;
+ - timer used for scheduling (delayed) suspend and autosuspend requests
+
+ unsigned long timer_expires;
+ - timer expiration time, in jiffies (if this is different from zero, the
+ timer is running and will expire at that time, otherwise the timer is not
+ running)
+
+ struct work_struct work;
+ - work structure used for queuing up requests (i.e. work items in pm_wq)
+
+ wait_queue_head_t wait_queue;
+ - wait queue used if any of the helper functions needs to wait for another
+ one to complete
+
+ spinlock_t lock;
+ - lock used for synchronisation
+
+ atomic_t usage_count;
+ - the usage counter of the device
+
+ atomic_t child_count;
+ - the count of 'active' children of the device
+
+ unsigned int ignore_children;
+ - if set, the value of child_count is ignored (but still updated)
+
+ unsigned int disable_depth;
+ - used for disabling the helper functions (they work normally if this is
+ equal to zero); the initial value of it is 1 (i.e. runtime PM is
+ initially disabled for all devices)
+
+ int runtime_error;
+ - if set, there was a fatal error (one of the callbacks returned error code
+ as described in Section 2), so the helper functions will not work until
+ this flag is cleared; this is the error code returned by the failing
+ callback
+
+ unsigned int idle_notification;
+ - if set, ->runtime_idle() is being executed
+
+ unsigned int request_pending;
+ - if set, there's a pending request (i.e. a work item queued up into pm_wq)
+
+ enum rpm_request request;
+ - type of request that's pending (valid if request_pending is set)
+
+ unsigned int deferred_resume;
+ - set if ->runtime_resume() is about to be run while ->runtime_suspend() is
+ being executed for that device and it is not practical to wait for the
+ suspend to complete; means "start a resume as soon as you've suspended"
+
+ unsigned int run_wake;
+ - set if the device is capable of generating runtime wake-up events
+
+ enum rpm_status runtime_status;
+ - the runtime PM status of the device; this field's initial value is
+ RPM_SUSPENDED, which means that each device is initially regarded by the
+ PM core as 'suspended', regardless of its real hardware status
+
+ unsigned int runtime_auto;
+ - if set, indicates that the user space has allowed the device driver to
+ power manage the device at run time via the /sys/devices/.../power/control
+ interface; it may only be modified with the help of the pm_runtime_allow()
+ and pm_runtime_forbid() helper functions
+
+ unsigned int no_callbacks;
+ - indicates that the device does not use the runtime PM callbacks (see
+ Section 8); it may be modified only by the pm_runtime_no_callbacks()
+ helper function
+
+ unsigned int irq_safe;
+ - indicates that the ->runtime_suspend() and ->runtime_resume() callbacks
+ will be invoked with the spinlock held and interrupts disabled
+
+ unsigned int use_autosuspend;
+ - indicates that the device's driver supports delayed autosuspend (see
+ Section 9); it may be modified only by the
+ pm_runtime{_dont}_use_autosuspend() helper functions
+
+ unsigned int timer_autosuspends;
+ - indicates that the PM core should attempt to carry out an autosuspend
+ when the timer expires rather than a normal suspend
+
+ int autosuspend_delay;
+ - the delay time (in milliseconds) to be used for autosuspend
+
+ unsigned long last_busy;
+ - the time (in jiffies) when the pm_runtime_mark_last_busy() helper
+ function was last called for this device; used in calculating inactivity
+ periods for autosuspend
+
+All of the above fields are members of the 'power' member of 'struct device'.
+
+4. Runtime PM Device Helper Functions
+
+The following runtime PM helper functions are defined in
+drivers/base/power/runtime.c and include/linux/pm_runtime.h:
+
+ void pm_runtime_init(struct device *dev);
+ - initialize the device runtime PM fields in 'struct dev_pm_info'
+
+ void pm_runtime_remove(struct device *dev);
+ - make sure that the runtime PM of the device will be disabled after
+ removing the device from device hierarchy
+
+ int pm_runtime_idle(struct device *dev);
+ - execute the subsystem-level idle callback for the device; returns an
+ error code on failure, where -EINPROGRESS means that ->runtime_idle() is
+ already being executed; if there is no callback or the callback returns 0
+ then run pm_runtime_autosuspend(dev) and return its result
+
+ int pm_runtime_suspend(struct device *dev);
+ - execute the subsystem-level suspend callback for the device; returns 0 on
+ success, 1 if the device's runtime PM status was already 'suspended', or
+ error code on failure, where -EAGAIN or -EBUSY means it is safe to attempt
+ to suspend the device again in future and -EACCES means that
+ 'power.disable_depth' is different from 0
+
+ int pm_runtime_autosuspend(struct device *dev);
+ - same as pm_runtime_suspend() except that the autosuspend delay is taken
+ into account; if pm_runtime_autosuspend_expiration() says the delay has
+ not yet expired then an autosuspend is scheduled for the appropriate time
+ and 0 is returned
+
+ int pm_runtime_resume(struct device *dev);
+ - execute the subsystem-level resume callback for the device; returns 0 on
+ success, 1 if the device's runtime PM status was already 'active' or
+ error code on failure, where -EAGAIN means it may be safe to attempt to
+ resume the device again in future, but 'power.runtime_error' should be
+ checked additionally, and -EACCES means that 'power.disable_depth' is
+ different from 0
+
+ int pm_request_idle(struct device *dev);
+ - submit a request to execute the subsystem-level idle callback for the
+ device (the request is represented by a work item in pm_wq); returns 0 on
+ success or error code if the request has not been queued up
+
+ int pm_request_autosuspend(struct device *dev);
+ - schedule the execution of the subsystem-level suspend callback for the
+ device when the autosuspend delay has expired; if the delay has already
+ expired then the work item is queued up immediately
+
+ int pm_schedule_suspend(struct device *dev, unsigned int delay);
+ - schedule the execution of the subsystem-level suspend callback for the
+ device in future, where 'delay' is the time to wait before queuing up a
+ suspend work item in pm_wq, in milliseconds (if 'delay' is zero, the work
+ item is queued up immediately); returns 0 on success, 1 if the device's PM
+ runtime status was already 'suspended', or error code if the request
+ hasn't been scheduled (or queued up if 'delay' is 0); if the execution of
+ ->runtime_suspend() is already scheduled and not yet expired, the new
+ value of 'delay' will be used as the time to wait
+
+ int pm_request_resume(struct device *dev);
+ - submit a request to execute the subsystem-level resume callback for the
+ device (the request is represented by a work item in pm_wq); returns 0 on
+ success, 1 if the device's runtime PM status was already 'active', or
+ error code if the request hasn't been queued up
+
+ void pm_runtime_get_noresume(struct device *dev);
+ - increment the device's usage counter
+
+ int pm_runtime_get(struct device *dev);
+ - increment the device's usage counter, run pm_request_resume(dev) and
+ return its result
+
+ int pm_runtime_get_sync(struct device *dev);
+ - increment the device's usage counter, run pm_runtime_resume(dev) and
+ return its result
+
+ void pm_runtime_put_noidle(struct device *dev);
+ - decrement the device's usage counter
+
+ int pm_runtime_put(struct device *dev);
+ - decrement the device's usage counter; if the result is 0 then run
+ pm_request_idle(dev) and return its result
+
+ int pm_runtime_put_autosuspend(struct device *dev);
+ - decrement the device's usage counter; if the result is 0 then run
+ pm_request_autosuspend(dev) and return its result
+
+ int pm_runtime_put_sync(struct device *dev);
+ - decrement the device's usage counter; if the result is 0 then run
+ pm_runtime_idle(dev) and return its result
+
+ int pm_runtime_put_sync_suspend(struct device *dev);
+ - decrement the device's usage counter; if the result is 0 then run
+ pm_runtime_suspend(dev) and return its result
+
+ int pm_runtime_put_sync_autosuspend(struct device *dev);
+ - decrement the device's usage counter; if the result is 0 then run
+ pm_runtime_autosuspend(dev) and return its result
+
+ void pm_runtime_enable(struct device *dev);
+ - decrement the device's 'power.disable_depth' field; if that field is equal
+ to zero, the runtime PM helper functions can execute subsystem-level
+ callbacks described in Section 2 for the device
+
+ int pm_runtime_disable(struct device *dev);
+ - increment the device's 'power.disable_depth' field (if the value of that
+ field was previously zero, this prevents subsystem-level runtime PM
+ callbacks from being run for the device), make sure that all of the
+ pending runtime PM operations on the device are either completed or
+ canceled; returns 1 if there was a resume request pending and it was
+ necessary to execute the subsystem-level resume callback for the device
+ to satisfy that request, otherwise 0 is returned
+
+ int pm_runtime_barrier(struct device *dev);
+ - check if there's a resume request pending for the device and resume it
+ (synchronously) in that case, cancel any other pending runtime PM requests
+ regarding it and wait for all runtime PM operations on it in progress to
+ complete; returns 1 if there was a resume request pending and it was
+ necessary to execute the subsystem-level resume callback for the device to
+ satisfy that request, otherwise 0 is returned
+
+ void pm_suspend_ignore_children(struct device *dev, bool enable);
+ - set/unset the power.ignore_children flag of the device
+
+ int pm_runtime_set_active(struct device *dev);
+ - clear the device's 'power.runtime_error' flag, set the device's runtime
+ PM status to 'active' and update its parent's counter of 'active'
+ children as appropriate (it is only valid to use this function if
+ 'power.runtime_error' is set or 'power.disable_depth' is greater than
+ zero); it will fail and return error code if the device has a parent
+ which is not active and the 'power.ignore_children' flag of which is unset
+
+ void pm_runtime_set_suspended(struct device *dev);
+ - clear the device's 'power.runtime_error' flag, set the device's runtime
+ PM status to 'suspended' and update its parent's counter of 'active'
+ children as appropriate (it is only valid to use this function if
+ 'power.runtime_error' is set or 'power.disable_depth' is greater than
+ zero)
+
+ bool pm_runtime_active(struct device *dev);
+ - return true if the device's runtime PM status is 'active' or its
+ 'power.disable_depth' field is not equal to zero, or false otherwise
+
+ bool pm_runtime_suspended(struct device *dev);
+ - return true if the device's runtime PM status is 'suspended' and its
+ 'power.disable_depth' field is equal to zero, or false otherwise
+
+ bool pm_runtime_status_suspended(struct device *dev);
+ - return true if the device's runtime PM status is 'suspended'
+
+ bool pm_runtime_suspended_if_enabled(struct device *dev);
+ - return true if the device's runtime PM status is 'suspended' and its
+ 'power.disable_depth' field is equal to 1
+
+ void pm_runtime_allow(struct device *dev);
+ - set the power.runtime_auto flag for the device and decrease its usage
+ counter (used by the /sys/devices/.../power/control interface to
+ effectively allow the device to be power managed at run time)
+
+ void pm_runtime_forbid(struct device *dev);
+ - unset the power.runtime_auto flag for the device and increase its usage
+ counter (used by the /sys/devices/.../power/control interface to
+ effectively prevent the device from being power managed at run time)
+
+ void pm_runtime_no_callbacks(struct device *dev);
+ - set the power.no_callbacks flag for the device and remove the runtime
+ PM attributes from /sys/devices/.../power (or prevent them from being
+ added when the device is registered)
+
+ void pm_runtime_irq_safe(struct device *dev);
+ - set the power.irq_safe flag for the device, causing the runtime-PM
+ callbacks to be invoked with interrupts off
+
+ bool pm_runtime_is_irq_safe(struct device *dev);
+ - return true if power.irq_safe flag was set for the device, causing
+ the runtime-PM callbacks to be invoked with interrupts off
+
+ void pm_runtime_mark_last_busy(struct device *dev);
+ - set the power.last_busy field to the current time
+
+ void pm_runtime_use_autosuspend(struct device *dev);
+ - set the power.use_autosuspend flag, enabling autosuspend delays
+
+ void pm_runtime_dont_use_autosuspend(struct device *dev);
+ - clear the power.use_autosuspend flag, disabling autosuspend delays
+
+ void pm_runtime_set_autosuspend_delay(struct device *dev, int delay);
+ - set the power.autosuspend_delay value to 'delay' (expressed in
+ milliseconds); if 'delay' is negative then runtime suspends are
+ prevented
+
+ unsigned long pm_runtime_autosuspend_expiration(struct device *dev);
+ - calculate the time when the current autosuspend delay period will expire,
+ based on power.last_busy and power.autosuspend_delay; if the delay time
+ is 1000 ms or larger then the expiration time is rounded up to the
+ nearest second; returns 0 if the delay period has already expired or
+ power.use_autosuspend isn't set, otherwise returns the expiration time
+ in jiffies
+
+It is safe to execute the following helper functions from interrupt context:
+
+pm_request_idle()
+pm_request_autosuspend()
+pm_schedule_suspend()
+pm_request_resume()
+pm_runtime_get_noresume()
+pm_runtime_get()
+pm_runtime_put_noidle()
+pm_runtime_put()
+pm_runtime_put_autosuspend()
+pm_runtime_enable()
+pm_suspend_ignore_children()
+pm_runtime_set_active()
+pm_runtime_set_suspended()
+pm_runtime_suspended()
+pm_runtime_mark_last_busy()
+pm_runtime_autosuspend_expiration()
+
+If pm_runtime_irq_safe() has been called for a device then the following helper
+functions may also be used in interrupt context:
+
+pm_runtime_idle()
+pm_runtime_suspend()
+pm_runtime_autosuspend()
+pm_runtime_resume()
+pm_runtime_get_sync()
+pm_runtime_put_sync()
+pm_runtime_put_sync_suspend()
+pm_runtime_put_sync_autosuspend()
+
+5. Runtime PM Initialization, Device Probing and Removal
+
+Initially, the runtime PM is disabled for all devices, which means that the
+majority of the runtime PM helper functions described in Section 4 will return
+-EAGAIN until pm_runtime_enable() is called for the device.
+
+In addition to that, the initial runtime PM status of all devices is
+'suspended', but it need not reflect the actual physical state of the device.
+Thus, if the device is initially active (i.e. it is able to process I/O), its
+runtime PM status must be changed to 'active', with the help of
+pm_runtime_set_active(), before pm_runtime_enable() is called for the device.
+
+However, if the device has a parent and the parent's runtime PM is enabled,
+calling pm_runtime_set_active() for the device will affect the parent, unless
+the parent's 'power.ignore_children' flag is set. Namely, in that case the
+parent won't be able to suspend at run time, using the PM core's helper
+functions, as long as the child's status is 'active', even if the child's
+runtime PM is still disabled (i.e. pm_runtime_enable() hasn't been called for
+the child yet or pm_runtime_disable() has been called for it). For this reason,
+once pm_runtime_set_active() has been called for the device, pm_runtime_enable()
+should be called for it too as soon as reasonably possible or its runtime PM
+status should be changed back to 'suspended' with the help of
+pm_runtime_set_suspended().
+
+If the default initial runtime PM status of the device (i.e. 'suspended')
+reflects the actual state of the device, its bus type's or its driver's
+->probe() callback will likely need to wake it up using one of the PM core's
+helper functions described in Section 4. In that case, pm_runtime_resume()
+should be used. Of course, for this purpose the device's runtime PM has to be
+enabled earlier by calling pm_runtime_enable().
+
+It may be desirable to suspend the device once ->probe() has finished.
+Therefore the driver core uses the asyncronous pm_request_idle() to submit a
+request to execute the subsystem-level idle callback for the device at that
+time. A driver that makes use of the runtime autosuspend feature, may want to
+update the last busy mark before returning from ->probe().
+
+Moreover, the driver core prevents runtime PM callbacks from racing with the bus
+notifier callback in __device_release_driver(), which is necessary, because the
+notifier is used by some subsystems to carry out operations affecting the
+runtime PM functionality. It does so by calling pm_runtime_get_sync() before
+driver_sysfs_remove() and the BUS_NOTIFY_UNBIND_DRIVER notifications. This
+resumes the device if it's in the suspended state and prevents it from
+being suspended again while those routines are being executed.
+
+To allow bus types and drivers to put devices into the suspended state by
+calling pm_runtime_suspend() from their ->remove() routines, the driver core
+executes pm_runtime_put_sync() after running the BUS_NOTIFY_UNBIND_DRIVER
+notifications in __device_release_driver(). This requires bus types and
+drivers to make their ->remove() callbacks avoid races with runtime PM directly,
+but also it allows of more flexibility in the handling of devices during the
+removal of their drivers.
+
+The user space can effectively disallow the driver of the device to power manage
+it at run time by changing the value of its /sys/devices/.../power/control
+attribute to "on", which causes pm_runtime_forbid() to be called. In principle,
+this mechanism may also be used by the driver to effectively turn off the
+runtime power management of the device until the user space turns it on.
+Namely, during the initialization the driver can make sure that the runtime PM
+status of the device is 'active' and call pm_runtime_forbid(). It should be
+noted, however, that if the user space has already intentionally changed the
+value of /sys/devices/.../power/control to "auto" to allow the driver to power
+manage the device at run time, the driver may confuse it by using
+pm_runtime_forbid() this way.
+
+6. Runtime PM and System Sleep
+
+Runtime PM and system sleep (i.e., system suspend and hibernation, also known
+as suspend-to-RAM and suspend-to-disk) interact with each other in a couple of
+ways. If a device is active when a system sleep starts, everything is
+straightforward. But what should happen if the device is already suspended?
+
+The device may have different wake-up settings for runtime PM and system sleep.
+For example, remote wake-up may be enabled for runtime suspend but disallowed
+for system sleep (device_may_wakeup(dev) returns 'false'). When this happens,
+the subsystem-level system suspend callback is responsible for changing the
+device's wake-up setting (it may leave that to the device driver's system
+suspend routine). It may be necessary to resume the device and suspend it again
+in order to do so. The same is true if the driver uses different power levels
+or other settings for runtime suspend and system sleep.
+
+During system resume, the simplest approach is to bring all devices back to full
+power, even if they had been suspended before the system suspend began. There
+are several reasons for this, including:
+
+ * The device might need to switch power levels, wake-up settings, etc.
+
+ * Remote wake-up events might have been lost by the firmware.
+
+ * The device's children may need the device to be at full power in order
+ to resume themselves.
+
+ * The driver's idea of the device state may not agree with the device's
+ physical state. This can happen during resume from hibernation.
+
+ * The device might need to be reset.
+
+ * Even though the device was suspended, if its usage counter was > 0 then most
+ likely it would need a runtime resume in the near future anyway.
+
+If the device had been suspended before the system suspend began and it's
+brought back to full power during resume, then its runtime PM status will have
+to be updated to reflect the actual post-system sleep status. The way to do
+this is:
+
+ pm_runtime_disable(dev);
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+
+The PM core always increments the runtime usage counter before calling the
+->suspend() callback and decrements it after calling the ->resume() callback.
+Hence disabling runtime PM temporarily like this will not cause any runtime
+suspend attempts to be permanently lost. If the usage count goes to zero
+following the return of the ->resume() callback, the ->runtime_idle() callback
+will be invoked as usual.
+
+On some systems, however, system sleep is not entered through a global firmware
+or hardware operation. Instead, all hardware components are put into low-power
+states directly by the kernel in a coordinated way. Then, the system sleep
+state effectively follows from the states the hardware components end up in
+and the system is woken up from that state by a hardware interrupt or a similar
+mechanism entirely under the kernel's control. As a result, the kernel never
+gives control away and the states of all devices during resume are precisely
+known to it. If that is the case and none of the situations listed above takes
+place (in particular, if the system is not waking up from hibernation), it may
+be more efficient to leave the devices that had been suspended before the system
+suspend began in the suspended state.
+
+To this end, the PM core provides a mechanism allowing some coordination between
+different levels of device hierarchy. Namely, if a system suspend .prepare()
+callback returns a positive number for a device, that indicates to the PM core
+that the device appears to be runtime-suspended and its state is fine, so it
+may be left in runtime suspend provided that all of its descendants are also
+left in runtime suspend. If that happens, the PM core will not execute any
+system suspend and resume callbacks for all of those devices, except for the
+complete callback, which is then entirely responsible for handling the device
+as appropriate. This only applies to system suspend transitions that are not
+related to hibernation (see Documentation/power/devices.txt for more
+information).
+
+The PM core does its best to reduce the probability of race conditions between
+the runtime PM and system suspend/resume (and hibernation) callbacks by carrying
+out the following operations:
+
+ * During system suspend pm_runtime_get_noresume() is called for every device
+ right before executing the subsystem-level .prepare() callback for it and
+ pm_runtime_barrier() is called for every device right before executing the
+ subsystem-level .suspend() callback for it. In addition to that the PM core
+ calls __pm_runtime_disable() with 'false' as the second argument for every
+ device right before executing the subsystem-level .suspend_late() callback
+ for it.
+
+ * During system resume pm_runtime_enable() and pm_runtime_put() are called for
+ every device right after executing the subsystem-level .resume_early()
+ callback and right after executing the subsystem-level .complete() callback
+ for it, respectively.
+
+7. Generic subsystem callbacks
+
+Subsystems may wish to conserve code space by using the set of generic power
+management callbacks provided by the PM core, defined in
+driver/base/power/generic_ops.c:
+
+ int pm_generic_runtime_suspend(struct device *dev);
+ - invoke the ->runtime_suspend() callback provided by the driver of this
+ device and return its result, or return 0 if not defined
+
+ int pm_generic_runtime_resume(struct device *dev);
+ - invoke the ->runtime_resume() callback provided by the driver of this
+ device and return its result, or return 0 if not defined
+
+ int pm_generic_suspend(struct device *dev);
+ - if the device has not been suspended at run time, invoke the ->suspend()
+ callback provided by its driver and return its result, or return 0 if not
+ defined
+
+ int pm_generic_suspend_noirq(struct device *dev);
+ - if pm_runtime_suspended(dev) returns "false", invoke the ->suspend_noirq()
+ callback provided by the device's driver and return its result, or return
+ 0 if not defined
+
+ int pm_generic_resume(struct device *dev);
+ - invoke the ->resume() callback provided by the driver of this device and,
+ if successful, change the device's runtime PM status to 'active'
+
+ int pm_generic_resume_noirq(struct device *dev);
+ - invoke the ->resume_noirq() callback provided by the driver of this device
+
+ int pm_generic_freeze(struct device *dev);
+ - if the device has not been suspended at run time, invoke the ->freeze()
+ callback provided by its driver and return its result, or return 0 if not
+ defined
+
+ int pm_generic_freeze_noirq(struct device *dev);
+ - if pm_runtime_suspended(dev) returns "false", invoke the ->freeze_noirq()
+ callback provided by the device's driver and return its result, or return
+ 0 if not defined
+
+ int pm_generic_thaw(struct device *dev);
+ - if the device has not been suspended at run time, invoke the ->thaw()
+ callback provided by its driver and return its result, or return 0 if not
+ defined
+
+ int pm_generic_thaw_noirq(struct device *dev);
+ - if pm_runtime_suspended(dev) returns "false", invoke the ->thaw_noirq()
+ callback provided by the device's driver and return its result, or return
+ 0 if not defined
+
+ int pm_generic_poweroff(struct device *dev);
+ - if the device has not been suspended at run time, invoke the ->poweroff()
+ callback provided by its driver and return its result, or return 0 if not
+ defined
+
+ int pm_generic_poweroff_noirq(struct device *dev);
+ - if pm_runtime_suspended(dev) returns "false", run the ->poweroff_noirq()
+ callback provided by the device's driver and return its result, or return
+ 0 if not defined
+
+ int pm_generic_restore(struct device *dev);
+ - invoke the ->restore() callback provided by the driver of this device and,
+ if successful, change the device's runtime PM status to 'active'
+
+ int pm_generic_restore_noirq(struct device *dev);
+ - invoke the ->restore_noirq() callback provided by the device's driver
+
+These functions are the defaults used by the PM core, if a subsystem doesn't
+provide its own callbacks for ->runtime_idle(), ->runtime_suspend(),
+->runtime_resume(), ->suspend(), ->suspend_noirq(), ->resume(),
+->resume_noirq(), ->freeze(), ->freeze_noirq(), ->thaw(), ->thaw_noirq(),
+->poweroff(), ->poweroff_noirq(), ->restore(), ->restore_noirq() in the
+subsystem-level dev_pm_ops structure.
+
+Device drivers that wish to use the same function as a system suspend, freeze,
+poweroff and runtime suspend callback, and similarly for system resume, thaw,
+restore, and runtime resume, can achieve this with the help of the
+UNIVERSAL_DEV_PM_OPS macro defined in include/linux/pm.h (possibly setting its
+last argument to NULL).
+
+8. "No-Callback" Devices
+
+Some "devices" are only logical sub-devices of their parent and cannot be
+power-managed on their own. (The prototype example is a USB interface. Entire
+USB devices can go into low-power mode or send wake-up requests, but neither is
+possible for individual interfaces.) The drivers for these devices have no
+need of runtime PM callbacks; if the callbacks did exist, ->runtime_suspend()
+and ->runtime_resume() would always return 0 without doing anything else and
+->runtime_idle() would always call pm_runtime_suspend().
+
+Subsystems can tell the PM core about these devices by calling
+pm_runtime_no_callbacks(). This should be done after the device structure is
+initialized and before it is registered (although after device registration is
+also okay). The routine will set the device's power.no_callbacks flag and
+prevent the non-debugging runtime PM sysfs attributes from being created.
+
+When power.no_callbacks is set, the PM core will not invoke the
+->runtime_idle(), ->runtime_suspend(), or ->runtime_resume() callbacks.
+Instead it will assume that suspends and resumes always succeed and that idle
+devices should be suspended.
+
+As a consequence, the PM core will never directly inform the device's subsystem
+or driver about runtime power changes. Instead, the driver for the device's
+parent must take responsibility for telling the device's driver when the
+parent's power state changes.
+
+9. Autosuspend, or automatically-delayed suspends
+
+Changing a device's power state isn't free; it requires both time and energy.
+A device should be put in a low-power state only when there's some reason to
+think it will remain in that state for a substantial time. A common heuristic
+says that a device which hasn't been used for a while is liable to remain
+unused; following this advice, drivers should not allow devices to be suspended
+at runtime until they have been inactive for some minimum period. Even when
+the heuristic ends up being non-optimal, it will still prevent devices from
+"bouncing" too rapidly between low-power and full-power states.
+
+The term "autosuspend" is an historical remnant. It doesn't mean that the
+device is automatically suspended (the subsystem or driver still has to call
+the appropriate PM routines); rather it means that runtime suspends will
+automatically be delayed until the desired period of inactivity has elapsed.
+
+Inactivity is determined based on the power.last_busy field. Drivers should
+call pm_runtime_mark_last_busy() to update this field after carrying out I/O,
+typically just before calling pm_runtime_put_autosuspend(). The desired length
+of the inactivity period is a matter of policy. Subsystems can set this length
+initially by calling pm_runtime_set_autosuspend_delay(), but after device
+registration the length should be controlled by user space, using the
+/sys/devices/.../power/autosuspend_delay_ms attribute.
+
+In order to use autosuspend, subsystems or drivers must call
+pm_runtime_use_autosuspend() (preferably before registering the device), and
+thereafter they should use the various *_autosuspend() helper functions instead
+of the non-autosuspend counterparts:
+
+ Instead of: pm_runtime_suspend use: pm_runtime_autosuspend;
+ Instead of: pm_schedule_suspend use: pm_request_autosuspend;
+ Instead of: pm_runtime_put use: pm_runtime_put_autosuspend;
+ Instead of: pm_runtime_put_sync use: pm_runtime_put_sync_autosuspend.
+
+Drivers may also continue to use the non-autosuspend helper functions; they
+will behave normally, not taking the autosuspend delay into account.
+Similarly, if the power.use_autosuspend field isn't set then the autosuspend
+helper functions will behave just like the non-autosuspend counterparts.
+
+Under some circumstances a driver or subsystem may want to prevent a device
+from autosuspending immediately, even though the usage counter is zero and the
+autosuspend delay time has expired. If the ->runtime_suspend() callback
+returns -EAGAIN or -EBUSY, and if the next autosuspend delay expiration time is
+in the future (as it normally would be if the callback invoked
+pm_runtime_mark_last_busy()), the PM core will automatically reschedule the
+autosuspend. The ->runtime_suspend() callback can't do this rescheduling
+itself because no suspend requests of any kind are accepted while the device is
+suspending (i.e., while the callback is running).
+
+The implementation is well suited for asynchronous use in interrupt contexts.
+However such use inevitably involves races, because the PM core can't
+synchronize ->runtime_suspend() callbacks with the arrival of I/O requests.
+This synchronization must be handled by the driver, using its private lock.
+Here is a schematic pseudo-code example:
+
+ foo_read_or_write(struct foo_priv *foo, void *data)
+ {
+ lock(&foo->private_lock);
+ add_request_to_io_queue(foo, data);
+ if (foo->num_pending_requests++ == 0)
+ pm_runtime_get(&foo->dev);
+ if (!foo->is_suspended)
+ foo_process_next_request(foo);
+ unlock(&foo->private_lock);
+ }
+
+ foo_io_completion(struct foo_priv *foo, void *req)
+ {
+ lock(&foo->private_lock);
+ if (--foo->num_pending_requests == 0) {
+ pm_runtime_mark_last_busy(&foo->dev);
+ pm_runtime_put_autosuspend(&foo->dev);
+ } else {
+ foo_process_next_request(foo);
+ }
+ unlock(&foo->private_lock);
+ /* Send req result back to the user ... */
+ }
+
+ int foo_runtime_suspend(struct device *dev)
+ {
+ struct foo_priv foo = container_of(dev, ...);
+ int ret = 0;
+
+ lock(&foo->private_lock);
+ if (foo->num_pending_requests > 0) {
+ ret = -EBUSY;
+ } else {
+ /* ... suspend the device ... */
+ foo->is_suspended = 1;
+ }
+ unlock(&foo->private_lock);
+ return ret;
+ }
+
+ int foo_runtime_resume(struct device *dev)
+ {
+ struct foo_priv foo = container_of(dev, ...);
+
+ lock(&foo->private_lock);
+ /* ... resume the device ... */
+ foo->is_suspended = 0;
+ pm_runtime_mark_last_busy(&foo->dev);
+ if (foo->num_pending_requests > 0)
+ foo_process_next_request(foo);
+ unlock(&foo->private_lock);
+ return 0;
+ }
+
+The important point is that after foo_io_completion() asks for an autosuspend,
+the foo_runtime_suspend() callback may race with foo_read_or_write().
+Therefore foo_runtime_suspend() has to check whether there are any pending I/O
+requests (while holding the private lock) before allowing the suspend to
+proceed.
+
+In addition, the power.autosuspend_delay field can be changed by user space at
+any time. If a driver cares about this, it can call
+pm_runtime_autosuspend_expiration() from within the ->runtime_suspend()
+callback while holding its private lock. If the function returns a nonzero
+value then the delay has not yet expired and the callback should return
+-EAGAIN.
diff --git a/Documentation/power/s2ram.txt b/Documentation/power/s2ram.txt
new file mode 100644
index 000000000..4685aee19
--- /dev/null
+++ b/Documentation/power/s2ram.txt
@@ -0,0 +1,85 @@
+ How to get s2ram working
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+ 2006 Linus Torvalds
+ 2006 Pavel Machek
+
+1) Check suspend.sf.net, program s2ram there has long whitelist of
+ "known ok" machines, along with tricks to use on each one.
+
+2) If that does not help, try reading tricks.txt and
+ video.txt. Perhaps problem is as simple as broken module, and
+ simple module unload can fix it.
+
+3) You can use Linus' TRACE_RESUME infrastructure, described below.
+
+ Using TRACE_RESUME
+ ~~~~~~~~~~~~~~~~~~
+
+I've been working at making the machines I have able to STR, and almost
+always it's a driver that is buggy. Thank God for the suspend/resume
+debugging - the thing that Chuck tried to disable. That's often the _only_
+way to debug these things, and it's actually pretty powerful (but
+time-consuming - having to insert TRACE_RESUME() markers into the device
+driver that doesn't resume and recompile and reboot).
+
+Anyway, the way to debug this for people who are interested (have a
+machine that doesn't boot) is:
+
+ - enable PM_DEBUG, and PM_TRACE
+
+ - use a script like this:
+
+ #!/bin/sh
+ sync
+ echo 1 > /sys/power/pm_trace
+ echo mem > /sys/power/state
+
+ to suspend
+
+ - if it doesn't come back up (which is usually the problem), reboot by
+ holding the power button down, and look at the dmesg output for things
+ like
+
+ Magic number: 4:156:725
+ hash matches drivers/base/power/resume.c:28
+ hash matches device 0000:01:00.0
+
+ which means that the last trace event was just before trying to resume
+ device 0000:01:00.0. Then figure out what driver is controlling that
+ device (lspci and /sys/devices/pci* is your friend), and see if you can
+ fix it, disable it, or trace into its resume function.
+
+ If no device matches the hash (or any matches appear to be false positives),
+ the culprit may be a device from a loadable kernel module that is not loaded
+ until after the hash is checked. You can check the hash against the current
+ devices again after more modules are loaded using sysfs:
+
+ cat /sys/power/pm_trace_dev_match
+
+For example, the above happens to be the VGA device on my EVO, which I
+used to run with "radeonfb" (it's an ATI Radeon mobility). It turns out
+that "radeonfb" simply cannot resume that device - it tries to set the
+PLL's, and it just _hangs_. Using the regular VGA console and letting X
+resume it instead works fine.
+
+NOTE
+====
+pm_trace uses the system's Real Time Clock (RTC) to save the magic number.
+Reason for this is that the RTC is the only reliably available piece of
+hardware during resume operations where a value can be set that will
+survive a reboot.
+
+pm_trace is not compatible with asynchronous suspend, so it turns
+asynchronous suspend off (which may work around timing or
+ordering-sensitive bugs).
+
+Consequence is that after a resume (even if it is successful) your system
+clock will have a value corresponding to the magic number instead of the
+correct date/time! It is therefore advisable to use a program like ntp-date
+or rdate to reset the correct date/time from an external time source when
+using this trace option.
+
+As the clock keeps ticking it is also essential that the reboot is done
+quickly after the resume failure. The trace option does not use the seconds
+or the low order bits of the minutes of the RTC, but a too long delay will
+corrupt the magic value.
diff --git a/Documentation/power/states.txt b/Documentation/power/states.txt
new file mode 100644
index 000000000..50f3ef917
--- /dev/null
+++ b/Documentation/power/states.txt
@@ -0,0 +1,109 @@
+System Power Management Sleep States
+
+(C) 2014 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+The kernel supports up to four system sleep states generically, although three
+of them depend on the platform support code to implement the low-level details
+for each state.
+
+The states are represented by strings that can be read or written to the
+/sys/power/state file. Those strings may be "mem", "standby", "freeze" and
+"disk", where the last one always represents hibernation (Suspend-To-Disk) and
+the meaning of the remaining ones depends on the relative_sleep_states command
+line argument.
+
+For relative_sleep_states=1, the strings "mem", "standby" and "freeze" label the
+available non-hibernation sleep states from the deepest to the shallowest,
+respectively. In that case, "mem" is always present in /sys/power/state,
+because there is at least one non-hibernation sleep state in every system. If
+the given system supports two non-hibernation sleep states, "standby" is present
+in /sys/power/state in addition to "mem". If the system supports three
+non-hibernation sleep states, "freeze" will be present in /sys/power/state in
+addition to "mem" and "standby".
+
+For relative_sleep_states=0, which is the default, the following descriptions
+apply.
+
+state: Suspend-To-Idle
+ACPI state: S0
+Label: "freeze"
+
+This state is a generic, pure software, light-weight, system sleep state.
+It allows more energy to be saved relative to runtime idle by freezing user
+space and putting all I/O devices into low-power states (possibly
+lower-power than available at run time), such that the processors can
+spend more time in their idle states.
+
+This state can be used for platforms without Power-On Suspend/Suspend-to-RAM
+support, or it can be used in addition to Suspend-to-RAM (memory sleep)
+to provide reduced resume latency. It is always supported.
+
+
+State: Standby / Power-On Suspend
+ACPI State: S1
+Label: "standby"
+
+This state, if supported, offers moderate, though real, power savings, while
+providing a relatively low-latency transition back to a working system. No
+operating state is lost (the CPU retains power), so the system easily starts up
+again where it left off.
+
+In addition to freezing user space and putting all I/O devices into low-power
+states, which is done for Suspend-To-Idle too, nonboot CPUs are taken offline
+and all low-level system functions are suspended during transitions into this
+state. For this reason, it should allow more energy to be saved relative to
+Suspend-To-Idle, but the resume latency will generally be greater than for that
+state.
+
+
+State: Suspend-to-RAM
+ACPI State: S3
+Label: "mem"
+
+This state, if supported, offers significant power savings as everything in the
+system is put into a low-power state, except for memory, which should be placed
+into the self-refresh mode to retain its contents. All of the steps carried out
+when entering Power-On Suspend are also carried out during transitions to STR.
+Additional operations may take place depending on the platform capabilities. In
+particular, on ACPI systems the kernel passes control to the BIOS (platform
+firmware) as the last step during STR transitions and that usually results in
+powering down some more low-level components that aren't directly controlled by
+the kernel.
+
+System and device state is saved and kept in memory. All devices are suspended
+and put into low-power states. In many cases, all peripheral buses lose power
+when entering STR, so devices must be able to handle the transition back to the
+"on" state.
+
+For at least ACPI, STR requires some minimal boot-strapping code to resume the
+system from it. This may be the case on other platforms too.
+
+
+State: Suspend-to-disk
+ACPI State: S4
+Label: "disk"
+
+This state offers the greatest power savings, and can be used even in
+the absence of low-level platform support for power management. This
+state operates similarly to Suspend-to-RAM, but includes a final step
+of writing memory contents to disk. On resume, this is read and memory
+is restored to its pre-suspend state.
+
+STD can be handled by the firmware or the kernel. If it is handled by
+the firmware, it usually requires a dedicated partition that must be
+setup via another operating system for it to use. Despite the
+inconvenience, this method requires minimal work by the kernel, since
+the firmware will also handle restoring memory contents on resume.
+
+For suspend-to-disk, a mechanism called 'swsusp' (Swap Suspend) is used
+to write memory contents to free swap space. swsusp has some restrictive
+requirements, but should work in most cases. Some, albeit outdated,
+documentation can be found in Documentation/power/swsusp.txt.
+Alternatively, userspace can do most of the actual suspend to disk work,
+see userland-swsusp.txt.
+
+Once memory state is written to disk, the system may either enter a
+low-power state (like ACPI S4), or it may simply power down. Powering
+down offers greater savings, and allows this mechanism to work on any
+system. However, entering a real low-power state allows the user to
+trigger wake up events (e.g. pressing a key or opening a laptop lid).
diff --git a/Documentation/power/suspend-and-cpuhotplug.txt b/Documentation/power/suspend-and-cpuhotplug.txt
new file mode 100644
index 000000000..2850df3bf
--- /dev/null
+++ b/Documentation/power/suspend-and-cpuhotplug.txt
@@ -0,0 +1,275 @@
+Interaction of Suspend code (S3) with the CPU hotplug infrastructure
+
+ (C) 2011 - 2014 Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
+
+
+I. How does the regular CPU hotplug code differ from how the Suspend-to-RAM
+ infrastructure uses it internally? And where do they share common code?
+
+Well, a picture is worth a thousand words... So ASCII art follows :-)
+
+[This depicts the current design in the kernel, and focusses only on the
+interactions involving the freezer and CPU hotplug and also tries to explain
+the locking involved. It outlines the notifications involved as well.
+But please note that here, only the call paths are illustrated, with the aim
+of describing where they take different paths and where they share code.
+What happens when regular CPU hotplug and Suspend-to-RAM race with each other
+is not depicted here.]
+
+On a high level, the suspend-resume cycle goes like this:
+
+|Freeze| -> |Disable nonboot| -> |Do suspend| -> |Enable nonboot| -> |Thaw |
+|tasks | | cpus | | | | cpus | |tasks|
+
+
+More details follow:
+
+ Suspend call path
+ -----------------
+
+ Write 'mem' to
+ /sys/power/state
+ sysfs file
+ |
+ v
+ Acquire pm_mutex lock
+ |
+ v
+ Send PM_SUSPEND_PREPARE
+ notifications
+ |
+ v
+ Freeze tasks
+ |
+ |
+ v
+ disable_nonboot_cpus()
+ /* start */
+ |
+ v
+ Acquire cpu_add_remove_lock
+ |
+ v
+ Iterate over CURRENTLY
+ online CPUs
+ |
+ |
+ | ----------
+ v | L
+ ======> _cpu_down() |
+ | [This takes cpuhotplug.lock |
+ Common | before taking down the CPU |
+ code | and releases it when done] | O
+ | While it is at it, notifications |
+ | are sent when notable events occur, |
+ ======> by running all registered callbacks. |
+ | | O
+ | |
+ | |
+ v |
+ Note down these cpus in | P
+ frozen_cpus mask ----------
+ |
+ v
+ Disable regular cpu hotplug
+ by setting cpu_hotplug_disabled=1
+ |
+ v
+ Release cpu_add_remove_lock
+ |
+ v
+ /* disable_nonboot_cpus() complete */
+ |
+ v
+ Do suspend
+
+
+
+Resuming back is likewise, with the counterparts being (in the order of
+execution during resume):
+* enable_nonboot_cpus() which involves:
+ | Acquire cpu_add_remove_lock
+ | Reset cpu_hotplug_disabled to 0, thereby enabling regular cpu hotplug
+ | Call _cpu_up() [for all those cpus in the frozen_cpus mask, in a loop]
+ | Release cpu_add_remove_lock
+ v
+
+* thaw tasks
+* send PM_POST_SUSPEND notifications
+* Release pm_mutex lock.
+
+
+It is to be noted here that the pm_mutex lock is acquired at the very
+beginning, when we are just starting out to suspend, and then released only
+after the entire cycle is complete (i.e., suspend + resume).
+
+
+
+ Regular CPU hotplug call path
+ -----------------------------
+
+ Write 0 (or 1) to
+ /sys/devices/system/cpu/cpu*/online
+ sysfs file
+ |
+ |
+ v
+ cpu_down()
+ |
+ v
+ Acquire cpu_add_remove_lock
+ |
+ v
+ If cpu_hotplug_disabled is 1
+ return gracefully
+ |
+ |
+ v
+ ======> _cpu_down()
+ | [This takes cpuhotplug.lock
+ Common | before taking down the CPU
+ code | and releases it when done]
+ | While it is at it, notifications
+ | are sent when notable events occur,
+ ======> by running all registered callbacks.
+ |
+ |
+ v
+ Release cpu_add_remove_lock
+ [That's it!, for
+ regular CPU hotplug]
+
+
+
+So, as can be seen from the two diagrams (the parts marked as "Common code"),
+regular CPU hotplug and the suspend code path converge at the _cpu_down() and
+_cpu_up() functions. They differ in the arguments passed to these functions,
+in that during regular CPU hotplug, 0 is passed for the 'tasks_frozen'
+argument. But during suspend, since the tasks are already frozen by the time
+the non-boot CPUs are offlined or onlined, the _cpu_*() functions are called
+with the 'tasks_frozen' argument set to 1.
+[See below for some known issues regarding this.]
+
+
+Important files and functions/entry points:
+------------------------------------------
+
+kernel/power/process.c : freeze_processes(), thaw_processes()
+kernel/power/suspend.c : suspend_prepare(), suspend_enter(), suspend_finish()
+kernel/cpu.c: cpu_[up|down](), _cpu_[up|down](), [disable|enable]_nonboot_cpus()
+
+
+
+II. What are the issues involved in CPU hotplug?
+ -------------------------------------------
+
+There are some interesting situations involving CPU hotplug and microcode
+update on the CPUs, as discussed below:
+
+[Please bear in mind that the kernel requests the microcode images from
+userspace, using the request_firmware() function defined in
+drivers/base/firmware_class.c]
+
+
+a. When all the CPUs are identical:
+
+ This is the most common situation and it is quite straightforward: we want
+ to apply the same microcode revision to each of the CPUs.
+ To give an example of x86, the collect_cpu_info() function defined in
+ arch/x86/kernel/microcode_core.c helps in discovering the type of the CPU
+ and thereby in applying the correct microcode revision to it.
+ But note that the kernel does not maintain a common microcode image for the
+ all CPUs, in order to handle case 'b' described below.
+
+
+b. When some of the CPUs are different than the rest:
+
+ In this case since we probably need to apply different microcode revisions
+ to different CPUs, the kernel maintains a copy of the correct microcode
+ image for each CPU (after appropriate CPU type/model discovery using
+ functions such as collect_cpu_info()).
+
+
+c. When a CPU is physically hot-unplugged and a new (and possibly different
+ type of) CPU is hot-plugged into the system:
+
+ In the current design of the kernel, whenever a CPU is taken offline during
+ a regular CPU hotplug operation, upon receiving the CPU_DEAD notification
+ (which is sent by the CPU hotplug code), the microcode update driver's
+ callback for that event reacts by freeing the kernel's copy of the
+ microcode image for that CPU.
+
+ Hence, when a new CPU is brought online, since the kernel finds that it
+ doesn't have the microcode image, it does the CPU type/model discovery
+ afresh and then requests the userspace for the appropriate microcode image
+ for that CPU, which is subsequently applied.
+
+ For example, in x86, the mc_cpu_callback() function (which is the microcode
+ update driver's callback registered for CPU hotplug events) calls
+ microcode_update_cpu() which would call microcode_init_cpu() in this case,
+ instead of microcode_resume_cpu() when it finds that the kernel doesn't
+ have a valid microcode image. This ensures that the CPU type/model
+ discovery is performed and the right microcode is applied to the CPU after
+ getting it from userspace.
+
+
+d. Handling microcode update during suspend/hibernate:
+
+ Strictly speaking, during a CPU hotplug operation which does not involve
+ physically removing or inserting CPUs, the CPUs are not actually powered
+ off during a CPU offline. They are just put to the lowest C-states possible.
+ Hence, in such a case, it is not really necessary to re-apply microcode
+ when the CPUs are brought back online, since they wouldn't have lost the
+ image during the CPU offline operation.
+
+ This is the usual scenario encountered during a resume after a suspend.
+ However, in the case of hibernation, since all the CPUs are completely
+ powered off, during restore it becomes necessary to apply the microcode
+ images to all the CPUs.
+
+ [Note that we don't expect someone to physically pull out nodes and insert
+ nodes with a different type of CPUs in-between a suspend-resume or a
+ hibernate/restore cycle.]
+
+ In the current design of the kernel however, during a CPU offline operation
+ as part of the suspend/hibernate cycle (the CPU_DEAD_FROZEN notification),
+ the existing copy of microcode image in the kernel is not freed up.
+ And during the CPU online operations (during resume/restore), since the
+ kernel finds that it already has copies of the microcode images for all the
+ CPUs, it just applies them to the CPUs, avoiding any re-discovery of CPU
+ type/model and the need for validating whether the microcode revisions are
+ right for the CPUs or not (due to the above assumption that physical CPU
+ hotplug will not be done in-between suspend/resume or hibernate/restore
+ cycles).
+
+
+III. Are there any known problems when regular CPU hotplug and suspend race
+ with each other?
+
+Yes, they are listed below:
+
+1. When invoking regular CPU hotplug, the 'tasks_frozen' argument passed to
+ the _cpu_down() and _cpu_up() functions is *always* 0.
+ This might not reflect the true current state of the system, since the
+ tasks could have been frozen by an out-of-band event such as a suspend
+ operation in progress. Hence, it will lead to wrong notifications being
+ sent during the cpu online/offline events (eg, CPU_ONLINE notification
+ instead of CPU_ONLINE_FROZEN) which in turn will lead to execution of
+ inappropriate code by the callbacks registered for such CPU hotplug events.
+
+2. If a regular CPU hotplug stress test happens to race with the freezer due
+ to a suspend operation in progress at the same time, then we could hit the
+ situation described below:
+
+ * A regular cpu online operation continues its journey from userspace
+ into the kernel, since the freezing has not yet begun.
+ * Then freezer gets to work and freezes userspace.
+ * If cpu online has not yet completed the microcode update stuff by now,
+ it will now start waiting on the frozen userspace in the
+ TASK_UNINTERRUPTIBLE state, in order to get the microcode image.
+ * Now the freezer continues and tries to freeze the remaining tasks. But
+ due to this wait mentioned above, the freezer won't be able to freeze
+ the cpu online hotplug task and hence freezing of tasks fails.
+
+ As a result of this task freezing failure, the suspend operation gets
+ aborted.
diff --git a/Documentation/power/suspend-and-interrupts.txt b/Documentation/power/suspend-and-interrupts.txt
new file mode 100644
index 000000000..8afb29a86
--- /dev/null
+++ b/Documentation/power/suspend-and-interrupts.txt
@@ -0,0 +1,135 @@
+System Suspend and Device Interrupts
+
+Copyright (C) 2014 Intel Corp.
+Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+
+Suspending and Resuming Device IRQs
+-----------------------------------
+
+Device interrupt request lines (IRQs) are generally disabled during system
+suspend after the "late" phase of suspending devices (that is, after all of the
+->prepare, ->suspend and ->suspend_late callbacks have been executed for all
+devices). That is done by suspend_device_irqs().
+
+The rationale for doing so is that after the "late" phase of device suspend
+there is no legitimate reason why any interrupts from suspended devices should
+trigger and if any devices have not been suspended properly yet, it is better to
+block interrupts from them anyway. Also, in the past we had problems with
+interrupt handlers for shared IRQs that device drivers implementing them were
+not prepared for interrupts triggering after their devices had been suspended.
+In some cases they would attempt to access, for example, memory address spaces
+of suspended devices and cause unpredictable behavior to ensue as a result.
+Unfortunately, such problems are very difficult to debug and the introduction
+of suspend_device_irqs(), along with the "noirq" phase of device suspend and
+resume, was the only practical way to mitigate them.
+
+Device IRQs are re-enabled during system resume, right before the "early" phase
+of resuming devices (that is, before starting to execute ->resume_early
+callbacks for devices). The function doing that is resume_device_irqs().
+
+
+The IRQF_NO_SUSPEND Flag
+------------------------
+
+There are interrupts that can legitimately trigger during the entire system
+suspend-resume cycle, including the "noirq" phases of suspending and resuming
+devices as well as during the time when nonboot CPUs are taken offline and
+brought back online. That applies to timer interrupts in the first place,
+but also to IPIs and to some other special-purpose interrupts.
+
+The IRQF_NO_SUSPEND flag is used to indicate that to the IRQ subsystem when
+requesting a special-purpose interrupt. It causes suspend_device_irqs() to
+leave the corresponding IRQ enabled so as to allow the interrupt to work as
+expected during the suspend-resume cycle, but does not guarantee that the
+interrupt will wake the system from a suspended state -- for such cases it is
+necessary to use enable_irq_wake().
+
+Note that the IRQF_NO_SUSPEND flag affects the entire IRQ and not just one
+user of it. Thus, if the IRQ is shared, all of the interrupt handlers installed
+for it will be executed as usual after suspend_device_irqs(), even if the
+IRQF_NO_SUSPEND flag was not passed to request_irq() (or equivalent) by some of
+the IRQ's users. For this reason, using IRQF_NO_SUSPEND and IRQF_SHARED at the
+same time should be avoided.
+
+
+System Wakeup Interrupts, enable_irq_wake() and disable_irq_wake()
+------------------------------------------------------------------
+
+System wakeup interrupts generally need to be configured to wake up the system
+from sleep states, especially if they are used for different purposes (e.g. as
+I/O interrupts) in the working state.
+
+That may involve turning on a special signal handling logic within the platform
+(such as an SoC) so that signals from a given line are routed in a different way
+during system sleep so as to trigger a system wakeup when needed. For example,
+the platform may include a dedicated interrupt controller used specifically for
+handling system wakeup events. Then, if a given interrupt line is supposed to
+wake up the system from sleep sates, the corresponding input of that interrupt
+controller needs to be enabled to receive signals from the line in question.
+After wakeup, it generally is better to disable that input to prevent the
+dedicated controller from triggering interrupts unnecessarily.
+
+The IRQ subsystem provides two helper functions to be used by device drivers for
+those purposes. Namely, enable_irq_wake() turns on the platform's logic for
+handling the given IRQ as a system wakeup interrupt line and disable_irq_wake()
+turns that logic off.
+
+Calling enable_irq_wake() causes suspend_device_irqs() to treat the given IRQ
+in a special way. Namely, the IRQ remains enabled, by on the first interrupt
+it will be disabled, marked as pending and "suspended" so that it will be
+re-enabled by resume_device_irqs() during the subsequent system resume. Also
+the PM core is notified about the event which causes the system suspend in
+progress to be aborted (that doesn't have to happen immediately, but at one
+of the points where the suspend thread looks for pending wakeup events).
+
+This way every interrupt from a wakeup interrupt source will either cause the
+system suspend currently in progress to be aborted or wake up the system if
+already suspended. However, after suspend_device_irqs() interrupt handlers are
+not executed for system wakeup IRQs. They are only executed for IRQF_NO_SUSPEND
+IRQs at that time, but those IRQs should not be configured for system wakeup
+using enable_irq_wake().
+
+
+Interrupts and Suspend-to-Idle
+------------------------------
+
+Suspend-to-idle (also known as the "freeze" sleep state) is a relatively new
+system sleep state that works by idling all of the processors and waiting for
+interrupts right after the "noirq" phase of suspending devices.
+
+Of course, this means that all of the interrupts with the IRQF_NO_SUSPEND flag
+set will bring CPUs out of idle while in that state, but they will not cause the
+IRQ subsystem to trigger a system wakeup.
+
+System wakeup interrupts, in turn, will trigger wakeup from suspend-to-idle in
+analogy with what they do in the full system suspend case. The only difference
+is that the wakeup from suspend-to-idle is signaled using the usual working
+state interrupt delivery mechanisms and doesn't require the platform to use
+any special interrupt handling logic for it to work.
+
+
+IRQF_NO_SUSPEND and enable_irq_wake()
+-------------------------------------
+
+There are very few valid reasons to use both enable_irq_wake() and the
+IRQF_NO_SUSPEND flag on the same IRQ, and it is never valid to use both for the
+same device.
+
+First of all, if the IRQ is not shared, the rules for handling IRQF_NO_SUSPEND
+interrupts (interrupt handlers are invoked after suspend_device_irqs()) are
+directly at odds with the rules for handling system wakeup interrupts (interrupt
+handlers are not invoked after suspend_device_irqs()).
+
+Second, both enable_irq_wake() and IRQF_NO_SUSPEND apply to entire IRQs and not
+to individual interrupt handlers, so sharing an IRQ between a system wakeup
+interrupt source and an IRQF_NO_SUSPEND interrupt source does not generally
+make sense.
+
+In rare cases an IRQ can be shared between a wakeup device driver and an
+IRQF_NO_SUSPEND user. In order for this to be safe, the wakeup device driver
+must be able to discern spurious IRQs from genuine wakeup events (signalling
+the latter to the core with pm_system_wakeup()), must use enable_irq_wake() to
+ensure that the IRQ will function as a wakeup source, and must request the IRQ
+with IRQF_COND_SUSPEND to tell the core that it meets these requirements. If
+these requirements are not met, it is not valid to use IRQF_COND_SUSPEND.
diff --git a/Documentation/power/swsusp-and-swap-files.txt b/Documentation/power/swsusp-and-swap-files.txt
new file mode 100644
index 000000000..f281886de
--- /dev/null
+++ b/Documentation/power/swsusp-and-swap-files.txt
@@ -0,0 +1,60 @@
+Using swap files with software suspend (swsusp)
+ (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+The Linux kernel handles swap files almost in the same way as it handles swap
+partitions and there are only two differences between these two types of swap
+areas:
+(1) swap files need not be contiguous,
+(2) the header of a swap file is not in the first block of the partition that
+holds it. From the swsusp's point of view (1) is not a problem, because it is
+already taken care of by the swap-handling code, but (2) has to be taken into
+consideration.
+
+In principle the location of a swap file's header may be determined with the
+help of appropriate filesystem driver. Unfortunately, however, it requires the
+filesystem holding the swap file to be mounted, and if this filesystem is
+journaled, it cannot be mounted during resume from disk. For this reason to
+identify a swap file swsusp uses the name of the partition that holds the file
+and the offset from the beginning of the partition at which the swap file's
+header is located. For convenience, this offset is expressed in <PAGE_SIZE>
+units.
+
+In order to use a swap file with swsusp, you need to:
+
+1) Create the swap file and make it active, eg.
+
+# dd if=/dev/zero of=<swap_file_path> bs=1024 count=<swap_file_size_in_k>
+# mkswap <swap_file_path>
+# swapon <swap_file_path>
+
+2) Use an application that will bmap the swap file with the help of the
+FIBMAP ioctl and determine the location of the file's swap header, as the
+offset, in <PAGE_SIZE> units, from the beginning of the partition which
+holds the swap file.
+
+3) Add the following parameters to the kernel command line:
+
+resume=<swap_file_partition> resume_offset=<swap_file_offset>
+
+where <swap_file_partition> is the partition on which the swap file is located
+and <swap_file_offset> is the offset of the swap header determined by the
+application in 2) (of course, this step may be carried out automatically
+by the same application that determines the swap file's header offset using the
+FIBMAP ioctl)
+
+OR
+
+Use a userland suspend application that will set the partition and offset
+with the help of the SNAPSHOT_SET_SWAP_AREA ioctl described in
+Documentation/power/userland-swsusp.txt (this is the only method to suspend
+to a swap file allowing the resume to be initiated from an initrd or initramfs
+image).
+
+Now, swsusp will use the swap file in the same way in which it would use a swap
+partition. In particular, the swap file has to be active (ie. be present in
+/proc/swaps) so that it can be used for suspending.
+
+Note that if the swap file used for suspending is deleted and recreated,
+the location of its header need not be the same as before. Thus every time
+this happens the value of the "resume_offset=" kernel command line parameter
+has to be updated.
diff --git a/Documentation/power/swsusp-dmcrypt.txt b/Documentation/power/swsusp-dmcrypt.txt
new file mode 100644
index 000000000..59931b46f
--- /dev/null
+++ b/Documentation/power/swsusp-dmcrypt.txt
@@ -0,0 +1,138 @@
+Author: Andreas Steinmetz <ast@domdv.de>
+
+
+How to use dm-crypt and swsusp together:
+========================================
+
+Some prerequisites:
+You know how dm-crypt works. If not, visit the following web page:
+http://www.saout.de/misc/dm-crypt/
+You have read Documentation/power/swsusp.txt and understand it.
+You did read Documentation/initrd.txt and know how an initrd works.
+You know how to create or how to modify an initrd.
+
+Now your system is properly set up, your disk is encrypted except for
+the swap device(s) and the boot partition which may contain a mini
+system for crypto setup and/or rescue purposes. You may even have
+an initrd that does your current crypto setup already.
+
+At this point you want to encrypt your swap, too. Still you want to
+be able to suspend using swsusp. This, however, means that you
+have to be able to either enter a passphrase or that you read
+the key(s) from an external device like a pcmcia flash disk
+or an usb stick prior to resume. So you need an initrd, that sets
+up dm-crypt and then asks swsusp to resume from the encrypted
+swap device.
+
+The most important thing is that you set up dm-crypt in such
+a way that the swap device you suspend to/resume from has
+always the same major/minor within the initrd as well as
+within your running system. The easiest way to achieve this is
+to always set up this swap device first with dmsetup, so that
+it will always look like the following:
+
+brw------- 1 root root 254, 0 Jul 28 13:37 /dev/mapper/swap0
+
+Now set up your kernel to use /dev/mapper/swap0 as the default
+resume partition, so your kernel .config contains:
+
+CONFIG_PM_STD_PARTITION="/dev/mapper/swap0"
+
+Prepare your boot loader to use the initrd you will create or
+modify. For lilo the simplest setup looks like the following
+lines:
+
+image=/boot/vmlinuz
+initrd=/boot/initrd.gz
+label=linux
+append="root=/dev/ram0 init=/linuxrc rw"
+
+Finally you need to create or modify your initrd. Lets assume
+you create an initrd that reads the required dm-crypt setup
+from a pcmcia flash disk card. The card is formatted with an ext2
+fs which resides on /dev/hde1 when the card is inserted. The
+card contains at least the encrypted swap setup in a file
+named "swapkey". /etc/fstab of your initrd contains something
+like the following:
+
+/dev/hda1 /mnt ext3 ro 0 0
+none /proc proc defaults,noatime,nodiratime 0 0
+none /sys sysfs defaults,noatime,nodiratime 0 0
+
+/dev/hda1 contains an unencrypted mini system that sets up all
+of your crypto devices, again by reading the setup from the
+pcmcia flash disk. What follows now is a /linuxrc for your
+initrd that allows you to resume from encrypted swap and that
+continues boot with your mini system on /dev/hda1 if resume
+does not happen:
+
+#!/bin/sh
+PATH=/sbin:/bin:/usr/sbin:/usr/bin
+mount /proc
+mount /sys
+mapped=0
+noresume=`grep -c noresume /proc/cmdline`
+if [ "$*" != "" ]
+then
+ noresume=1
+fi
+dmesg -n 1
+/sbin/cardmgr -q
+for i in 1 2 3 4 5 6 7 8 9 0
+do
+ if [ -f /proc/ide/hde/media ]
+ then
+ usleep 500000
+ mount -t ext2 -o ro /dev/hde1 /mnt
+ if [ -f /mnt/swapkey ]
+ then
+ dmsetup create swap0 /mnt/swapkey > /dev/null 2>&1 && mapped=1
+ fi
+ umount /mnt
+ break
+ fi
+ usleep 500000
+done
+killproc /sbin/cardmgr
+dmesg -n 6
+if [ $mapped = 1 ]
+then
+ if [ $noresume != 0 ]
+ then
+ mkswap /dev/mapper/swap0 > /dev/null 2>&1
+ fi
+ echo 254:0 > /sys/power/resume
+ dmsetup remove swap0
+fi
+umount /sys
+mount /mnt
+umount /proc
+cd /mnt
+pivot_root . mnt
+mount /proc
+umount -l /mnt
+umount /proc
+exec chroot . /sbin/init $* < dev/console > dev/console 2>&1
+
+Please don't mind the weird loop above, busybox's msh doesn't know
+the let statement. Now, what is happening in the script?
+First we have to decide if we want to try to resume, or not.
+We will not resume if booting with "noresume" or any parameters
+for init like "single" or "emergency" as boot parameters.
+
+Then we need to set up dmcrypt with the setup data from the
+pcmcia flash disk. If this succeeds we need to reset the swap
+device if we don't want to resume. The line "echo 254:0 > /sys/power/resume"
+then attempts to resume from the first device mapper device.
+Note that it is important to set the device in /sys/power/resume,
+regardless if resuming or not, otherwise later suspend will fail.
+If resume starts, script execution terminates here.
+
+Otherwise we just remove the encrypted swap device and leave it to the
+mini system on /dev/hda1 to set the whole crypto up (it is up to
+you to modify this to your taste).
+
+What then follows is the well known process to change the root
+file system and continue booting from there. I prefer to unmount
+the initrd prior to continue booting but it is up to you to modify
+this.
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
new file mode 100644
index 000000000..f732a8321
--- /dev/null
+++ b/Documentation/power/swsusp.txt
@@ -0,0 +1,429 @@
+Some warnings, first.
+
+ * BIG FAT WARNING *********************************************************
+ *
+ * If you touch anything on disk between suspend and resume...
+ * ...kiss your data goodbye.
+ *
+ * If you do resume from initrd after your filesystems are mounted...
+ * ...bye bye root partition.
+ * [this is actually same case as above]
+ *
+ * If you have unsupported (*) devices using DMA, you may have some
+ * problems. If your disk driver does not support suspend... (IDE does),
+ * it may cause some problems, too. If you change kernel command line
+ * between suspend and resume, it may do something wrong. If you change
+ * your hardware while system is suspended... well, it was not good idea;
+ * but it will probably only crash.
+ *
+ * (*) suspend/resume support is needed to make it safe.
+ *
+ * If you have any filesystems on USB devices mounted before software suspend,
+ * they won't be accessible after resume and you may lose data, as though
+ * you have unplugged the USB devices with mounted filesystems on them;
+ * see the FAQ below for details. (This is not true for more traditional
+ * power states like "standby", which normally don't turn USB off.)
+
+You need to append resume=/dev/your_swap_partition to kernel command
+line. Then you suspend by
+
+echo shutdown > /sys/power/disk; echo disk > /sys/power/state
+
+. If you feel ACPI works pretty well on your system, you might try
+
+echo platform > /sys/power/disk; echo disk > /sys/power/state
+
+. If you would like to write hibernation image to swap and then suspend
+to RAM (provided your platform supports it), you can try
+
+echo suspend > /sys/power/disk; echo disk > /sys/power/state
+
+. If you have SATA disks, you'll need recent kernels with SATA suspend
+support. For suspend and resume to work, make sure your disk drivers
+are built into kernel -- not modules. [There's way to make
+suspend/resume with modular disk drivers, see FAQ, but you probably
+should not do that.]
+
+If you want to limit the suspend image size to N bytes, do
+
+echo N > /sys/power/image_size
+
+before suspend (it is limited to 500 MB by default).
+
+. The resume process checks for the presence of the resume device,
+if found, it then checks the contents for the hibernation image signature.
+If both are found, it resumes the hibernation image.
+
+. The resume process may be triggered in two ways:
+ 1) During lateinit: If resume=/dev/your_swap_partition is specified on
+ the kernel command line, lateinit runs the resume process. If the
+ resume device has not been probed yet, the resume process fails and
+ bootup continues.
+ 2) Manually from an initrd or initramfs: May be run from
+ the init script by using the /sys/power/resume file. It is vital
+ that this be done prior to remounting any filesystems (even as
+ read-only) otherwise data may be corrupted.
+
+Article about goals and implementation of Software Suspend for Linux
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Author: Gábor Kuti
+Last revised: 2003-10-20 by Pavel Machek
+
+Idea and goals to achieve
+
+Nowadays it is common in several laptops that they have a suspend button. It
+saves the state of the machine to a filesystem or to a partition and switches
+to standby mode. Later resuming the machine the saved state is loaded back to
+ram and the machine can continue its work. It has two real benefits. First we
+save ourselves the time machine goes down and later boots up, energy costs
+are real high when running from batteries. The other gain is that we don't have to
+interrupt our programs so processes that are calculating something for a long
+time shouldn't need to be written interruptible.
+
+swsusp saves the state of the machine into active swaps and then reboots or
+powerdowns. You must explicitly specify the swap partition to resume from with
+``resume='' kernel option. If signature is found it loads and restores saved
+state. If the option ``noresume'' is specified as a boot parameter, it skips
+the resuming. If the option ``hibernate=nocompress'' is specified as a boot
+parameter, it saves hibernation image without compression.
+
+In the meantime while the system is suspended you should not add/remove any
+of the hardware, write to the filesystems, etc.
+
+Sleep states summary
+====================
+
+There are three different interfaces you can use, /proc/acpi should
+work like this:
+
+In a really perfect world:
+echo 1 > /proc/acpi/sleep # for standby
+echo 2 > /proc/acpi/sleep # for suspend to ram
+echo 3 > /proc/acpi/sleep # for suspend to ram, but with more power conservative
+echo 4 > /proc/acpi/sleep # for suspend to disk
+echo 5 > /proc/acpi/sleep # for shutdown unfriendly the system
+
+and perhaps
+echo 4b > /proc/acpi/sleep # for suspend to disk via s4bios
+
+Frequently Asked Questions
+==========================
+
+Q: well, suspending a server is IMHO a really stupid thing,
+but... (Diego Zuccato):
+
+A: You bought new UPS for your server. How do you install it without
+bringing machine down? Suspend to disk, rearrange power cables,
+resume.
+
+You have your server on UPS. Power died, and UPS is indicating 30
+seconds to failure. What do you do? Suspend to disk.
+
+
+Q: Maybe I'm missing something, but why don't the regular I/O paths work?
+
+A: We do use the regular I/O paths. However we cannot restore the data
+to its original location as we load it. That would create an
+inconsistent kernel state which would certainly result in an oops.
+Instead, we load the image into unused memory and then atomically copy
+it back to it original location. This implies, of course, a maximum
+image size of half the amount of memory.
+
+There are two solutions to this:
+
+* require half of memory to be free during suspend. That way you can
+read "new" data onto free spots, then cli and copy
+
+* assume we had special "polling" ide driver that only uses memory
+between 0-640KB. That way, I'd have to make sure that 0-640KB is free
+during suspending, but otherwise it would work...
+
+suspend2 shares this fundamental limitation, but does not include user
+data and disk caches into "used memory" by saving them in
+advance. That means that the limitation goes away in practice.
+
+Q: Does linux support ACPI S4?
+
+A: Yes. That's what echo platform > /sys/power/disk does.
+
+Q: What is 'suspend2'?
+
+A: suspend2 is 'Software Suspend 2', a forked implementation of
+suspend-to-disk which is available as separate patches for 2.4 and 2.6
+kernels from swsusp.sourceforge.net. It includes support for SMP, 4GB
+highmem and preemption. It also has a extensible architecture that
+allows for arbitrary transformations on the image (compression,
+encryption) and arbitrary backends for writing the image (eg to swap
+or an NFS share[Work In Progress]). Questions regarding suspend2
+should be sent to the mailing list available through the suspend2
+website, and not to the Linux Kernel Mailing List. We are working
+toward merging suspend2 into the mainline kernel.
+
+Q: What is the freezing of tasks and why are we using it?
+
+A: The freezing of tasks is a mechanism by which user space processes and some
+kernel threads are controlled during hibernation or system-wide suspend (on some
+architectures). See freezing-of-tasks.txt for details.
+
+Q: What is the difference between "platform" and "shutdown"?
+
+A:
+
+shutdown: save state in linux, then tell bios to powerdown
+
+platform: save state in linux, then tell bios to powerdown and blink
+ "suspended led"
+
+"platform" is actually right thing to do where supported, but
+"shutdown" is most reliable (except on ACPI systems).
+
+Q: I do not understand why you have such strong objections to idea of
+selective suspend.
+
+A: Do selective suspend during runtime power management, that's okay. But
+it's useless for suspend-to-disk. (And I do not see how you could use
+it for suspend-to-ram, I hope you do not want that).
+
+Lets see, so you suggest to
+
+* SUSPEND all but swap device and parents
+* Snapshot
+* Write image to disk
+* SUSPEND swap device and parents
+* Powerdown
+
+Oh no, that does not work, if swap device or its parents uses DMA,
+you've corrupted data. You'd have to do
+
+* SUSPEND all but swap device and parents
+* FREEZE swap device and parents
+* Snapshot
+* UNFREEZE swap device and parents
+* Write
+* SUSPEND swap device and parents
+
+Which means that you still need that FREEZE state, and you get more
+complicated code. (And I have not yet introduce details like system
+devices).
+
+Q: There don't seem to be any generally useful behavioral
+distinctions between SUSPEND and FREEZE.
+
+A: Doing SUSPEND when you are asked to do FREEZE is always correct,
+but it may be unnecessarily slow. If you want your driver to stay simple,
+slowness may not matter to you. It can always be fixed later.
+
+For devices like disk it does matter, you do not want to spindown for
+FREEZE.
+
+Q: After resuming, system is paging heavily, leading to very bad interactivity.
+
+A: Try running
+
+cat /proc/[0-9]*/maps | grep / | sed 's:.* /:/:' | sort -u | while read file
+do
+ test -f "$file" && cat "$file" > /dev/null
+done
+
+after resume. swapoff -a; swapon -a may also be useful.
+
+Q: What happens to devices during swsusp? They seem to be resumed
+during system suspend?
+
+A: That's correct. We need to resume them if we want to write image to
+disk. Whole sequence goes like
+
+ Suspend part
+ ~~~~~~~~~~~~
+ running system, user asks for suspend-to-disk
+
+ user processes are stopped
+
+ suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
+ with state snapshot
+
+ state snapshot: copy of whole used memory is taken with interrupts disabled
+
+ resume(): devices are woken up so that we can write image to swap
+
+ write image to swap
+
+ suspend(PMSG_SUSPEND): suspend devices so that we can power off
+
+ turn the power off
+
+ Resume part
+ ~~~~~~~~~~~
+ (is actually pretty similar)
+
+ running system, user asks for suspend-to-disk
+
+ user processes are stopped (in common case there are none, but with resume-from-initrd, no one knows)
+
+ read image from disk
+
+ suspend(PMSG_FREEZE): devices are frozen so that they don't interfere
+ with image restoration
+
+ image restoration: rewrite memory with image
+
+ resume(): devices are woken up so that system can continue
+
+ thaw all user processes
+
+Q: What is this 'Encrypt suspend image' for?
+
+A: First of all: it is not a replacement for dm-crypt encrypted swap.
+It cannot protect your computer while it is suspended. Instead it does
+protect from leaking sensitive data after resume from suspend.
+
+Think of the following: you suspend while an application is running
+that keeps sensitive data in memory. The application itself prevents
+the data from being swapped out. Suspend, however, must write these
+data to swap to be able to resume later on. Without suspend encryption
+your sensitive data are then stored in plaintext on disk. This means
+that after resume your sensitive data are accessible to all
+applications having direct access to the swap device which was used
+for suspend. If you don't need swap after resume these data can remain
+on disk virtually forever. Thus it can happen that your system gets
+broken in weeks later and sensitive data which you thought were
+encrypted and protected are retrieved and stolen from the swap device.
+To prevent this situation you should use 'Encrypt suspend image'.
+
+During suspend a temporary key is created and this key is used to
+encrypt the data written to disk. When, during resume, the data was
+read back into memory the temporary key is destroyed which simply
+means that all data written to disk during suspend are then
+inaccessible so they can't be stolen later on. The only thing that
+you must then take care of is that you call 'mkswap' for the swap
+partition used for suspend as early as possible during regular
+boot. This asserts that any temporary key from an oopsed suspend or
+from a failed or aborted resume is erased from the swap device.
+
+As a rule of thumb use encrypted swap to protect your data while your
+system is shut down or suspended. Additionally use the encrypted
+suspend image to prevent sensitive data from being stolen after
+resume.
+
+Q: Can I suspend to a swap file?
+
+A: Generally, yes, you can. However, it requires you to use the "resume=" and
+"resume_offset=" kernel command line parameters, so the resume from a swap file
+cannot be initiated from an initrd or initramfs image. See
+swsusp-and-swap-files.txt for details.
+
+Q: Is there a maximum system RAM size that is supported by swsusp?
+
+A: It should work okay with highmem.
+
+Q: Does swsusp (to disk) use only one swap partition or can it use
+multiple swap partitions (aggregate them into one logical space)?
+
+A: Only one swap partition, sorry.
+
+Q: If my application(s) causes lots of memory & swap space to be used
+(over half of the total system RAM), is it correct that it is likely
+to be useless to try to suspend to disk while that app is running?
+
+A: No, it should work okay, as long as your app does not mlock()
+it. Just prepare big enough swap partition.
+
+Q: What information is useful for debugging suspend-to-disk problems?
+
+A: Well, last messages on the screen are always useful. If something
+is broken, it is usually some kernel driver, therefore trying with as
+little as possible modules loaded helps a lot. I also prefer people to
+suspend from console, preferably without X running. Booting with
+init=/bin/bash, then swapon and starting suspend sequence manually
+usually does the trick. Then it is good idea to try with latest
+vanilla kernel.
+
+Q: How can distributions ship a swsusp-supporting kernel with modular
+disk drivers (especially SATA)?
+
+A: Well, it can be done, load the drivers, then do echo into
+/sys/power/resume file from initrd. Be sure not to mount
+anything, not even read-only mount, or you are going to lose your
+data.
+
+Q: How do I make suspend more verbose?
+
+A: If you want to see any non-error kernel messages on the virtual
+terminal the kernel switches to during suspend, you have to set the
+kernel console loglevel to at least 4 (KERN_WARNING), for example by
+doing
+
+ # save the old loglevel
+ read LOGLEVEL DUMMY < /proc/sys/kernel/printk
+ # set the loglevel so we see the progress bar.
+ # if the level is higher than needed, we leave it alone.
+ if [ $LOGLEVEL -lt 5 ]; then
+ echo 5 > /proc/sys/kernel/printk
+ fi
+
+ IMG_SZ=0
+ read IMG_SZ < /sys/power/image_size
+ echo -n disk > /sys/power/state
+ RET=$?
+ #
+ # the logic here is:
+ # if image_size > 0 (without kernel support, IMG_SZ will be zero),
+ # then try again with image_size set to zero.
+ if [ $RET -ne 0 -a $IMG_SZ -ne 0 ]; then # try again with minimal image size
+ echo 0 > /sys/power/image_size
+ echo -n disk > /sys/power/state
+ RET=$?
+ fi
+
+ # restore previous loglevel
+ echo $LOGLEVEL > /proc/sys/kernel/printk
+ exit $RET
+
+Q: Is this true that if I have a mounted filesystem on a USB device and
+I suspend to disk, I can lose data unless the filesystem has been mounted
+with "sync"?
+
+A: That's right ... if you disconnect that device, you may lose data.
+In fact, even with "-o sync" you can lose data if your programs have
+information in buffers they haven't written out to a disk you disconnect,
+or if you disconnect before the device finished saving data you wrote.
+
+Software suspend normally powers down USB controllers, which is equivalent
+to disconnecting all USB devices attached to your system.
+
+Your system might well support low-power modes for its USB controllers
+while the system is asleep, maintaining the connection, using true sleep
+modes like "suspend-to-RAM" or "standby". (Don't write "disk" to the
+/sys/power/state file; write "standby" or "mem".) We've not seen any
+hardware that can use these modes through software suspend, although in
+theory some systems might support "platform" modes that won't break the
+USB connections.
+
+Remember that it's always a bad idea to unplug a disk drive containing a
+mounted filesystem. That's true even when your system is asleep! The
+safest thing is to unmount all filesystems on removable media (such USB,
+Firewire, CompactFlash, MMC, external SATA, or even IDE hotplug bays)
+before suspending; then remount them after resuming.
+
+There is a work-around for this problem. For more information, see
+Documentation/usb/persist.txt.
+
+Q: Can I suspend-to-disk using a swap partition under LVM?
+
+A: No. You can suspend successfully, but you'll not be able to
+resume. uswsusp should be able to work with LVM. See suspend.sf.net.
+
+Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were
+compiled with the similar configuration files. Anyway I found that
+suspend to disk (and resume) is much slower on 2.6.16 compared to
+2.6.15. Any idea for why that might happen or how can I speed it up?
+
+A: This is because the size of the suspend image is now greater than
+for 2.6.15 (by saving more data we can get more responsive system
+after resume).
+
+There's the /sys/power/image_size knob that controls the size of the
+image. If you set it to 0 (eg. by echo 0 > /sys/power/image_size as
+root), the 2.6.15 behavior should be restored. If it is still too
+slow, take a look at suspend.sf.net -- userland suspend is faster and
+supports LZF compression to speed it up further.
diff --git a/Documentation/power/tricks.txt b/Documentation/power/tricks.txt
new file mode 100644
index 000000000..a1b8f7249
--- /dev/null
+++ b/Documentation/power/tricks.txt
@@ -0,0 +1,27 @@
+ swsusp/S3 tricks
+ ~~~~~~~~~~~~~~~~
+Pavel Machek <pavel@ucw.cz>
+
+If you want to trick swsusp/S3 into working, you might want to try:
+
+* go with minimal config, turn off drivers like USB, AGP you don't
+ really need
+
+* turn off APIC and preempt
+
+* use ext2. At least it has working fsck. [If something seems to go
+ wrong, force fsck when you have a chance]
+
+* turn off modules
+
+* use vga text console, shut down X. [If you really want X, you might
+ want to try vesafb later]
+
+* try running as few processes as possible, preferably go to single
+ user mode.
+
+* due to video issues, swsusp should be easier to get working than
+ S3. Try that first.
+
+When you make it work, try to find out what exactly was it that broke
+suspend, and preferably fix that.
diff --git a/Documentation/power/tuxonice-internals.txt b/Documentation/power/tuxonice-internals.txt
new file mode 100644
index 000000000..0c6a2163a
--- /dev/null
+++ b/Documentation/power/tuxonice-internals.txt
@@ -0,0 +1,532 @@
+ TuxOnIce 4.0 Internal Documentation.
+ Updated to 23 March 2015
+
+(Please note that incremental image support mentioned in this document is work
+in progress. This document may need updating prior to the actual release of
+4.0!)
+
+1. Introduction.
+
+ TuxOnIce 4.0 is an addition to the Linux Kernel, designed to
+ allow the user to quickly shutdown and quickly boot a computer, without
+ needing to close documents or programs. It is equivalent to the
+ hibernate facility in some laptops. This implementation, however,
+ requires no special BIOS or hardware support.
+
+ The code in these files is based upon the original implementation
+ prepared by Gabor Kuti and additional work by Pavel Machek and a
+ host of others. This code has been substantially reworked by Nigel
+ Cunningham, again with the help and testing of many others, not the
+ least of whom are Bernard Blackham and Michael Frank. At its heart,
+ however, the operation is essentially the same as Gabor's version.
+
+2. Overview of operation.
+
+ The basic sequence of operations is as follows:
+
+ a. Quiesce all other activity.
+ b. Ensure enough memory and storage space are available, and attempt
+ to free memory/storage if necessary.
+ c. Allocate the required memory and storage space.
+ d. Write the image.
+ e. Power down.
+
+ There are a number of complicating factors which mean that things are
+ not as simple as the above would imply, however...
+
+ o The activity of each process must be stopped at a point where it will
+ not be holding locks necessary for saving the image, or unexpectedly
+ restart operations due to something like a timeout and thereby make
+ our image inconsistent.
+
+ o It is desirous that we sync outstanding I/O to disk before calculating
+ image statistics. This reduces corruption if one should suspend but
+ then not resume, and also makes later parts of the operation safer (see
+ below).
+
+ o We need to get as close as we can to an atomic copy of the data.
+ Inconsistencies in the image will result in inconsistent memory contents at
+ resume time, and thus in instability of the system and/or file system
+ corruption. This would appear to imply a maximum image size of one half of
+ the amount of RAM, but we have a solution... (again, below).
+
+ o In 2.6 and later, we choose to play nicely with the other suspend-to-disk
+ implementations.
+
+3. Detailed description of internals.
+
+ a. Quiescing activity.
+
+ Safely quiescing the system is achieved using three separate but related
+ aspects.
+
+ First, we use the vanilla kerne's support for freezing processes. This code
+ is based on the observation that the vast majority of processes don't need
+ to run during suspend. They can be 'frozen'. The kernel therefore
+ implements a refrigerator routine, which processes enter and in which they
+ remain until the cycle is complete. Processes enter the refrigerator via
+ try_to_freeze() invocations at appropriate places. A process cannot be
+ frozen in any old place. It must not be holding locks that will be needed
+ for writing the image or freezing other processes. For this reason,
+ userspace processes generally enter the refrigerator via the signal
+ handling code, and kernel threads at the place in their event loops where
+ they drop locks and yield to other processes or sleep. The task of freezing
+ processes is complicated by the fact that there can be interdependencies
+ between processes. Freezing process A before process B may mean that
+ process B cannot be frozen, because it stops at waiting for process A
+ rather than in the refrigerator. This issue is seen where userspace waits
+ on freezeable kernel threads or fuse filesystem threads. To address this
+ issue, we implement the following algorithm for quiescing activity:
+
+ - Freeze filesystems (including fuse - userspace programs starting
+ new requests are immediately frozen; programs already running
+ requests complete their work before being frozen in the next
+ step)
+ - Freeze userspace
+ - Thaw filesystems (this is safe now that userspace is frozen and no
+ fuse requests are outstanding).
+ - Invoke sys_sync (noop on fuse).
+ - Freeze filesystems
+ - Freeze kernel threads
+
+ If we need to free memory, we thaw kernel threads and filesystems, but not
+ userspace. We can then free caches without worrying about deadlocks due to
+ swap files being on frozen filesystems or such like.
+
+ b. Ensure enough memory & storage are available.
+
+ We have a number of constraints to meet in order to be able to successfully
+ suspend and resume.
+
+ First, the image will be written in two parts, described below. One of
+ these parts needs to have an atomic copy made, which of course implies a
+ maximum size of one half of the amount of system memory. The other part
+ ('pageset') is not atomically copied, and can therefore be as large or
+ small as desired.
+
+ Second, we have constraints on the amount of storage available. In these
+ calculations, we may also consider any compression that will be done. The
+ cryptoapi module allows the user to configure an expected compression ratio.
+
+ Third, the user can specify an arbitrary limit on the image size, in
+ megabytes. This limit is treated as a soft limit, so that we don't fail the
+ attempt to suspend if we cannot meet this constraint.
+
+ c. Allocate the required memory and storage space.
+
+ Having done the initial freeze, we determine whether the above constraints
+ are met, and seek to allocate the metadata for the image. If the constraints
+ are not met, or we fail to allocate the required space for the metadata, we
+ seek to free the amount of memory that we calculate is needed and try again.
+ We allow up to four iterations of this loop before aborting the cycle. If
+ we do fail, it should only be because of a bug in TuxOnIce's calculations
+ or the vanilla kernel code for freeing memory.
+
+ These steps are merged together in the prepare_image function, found in
+ prepare_image.c. The functions are merged because of the cyclical nature
+ of the problem of calculating how much memory and storage is needed. Since
+ the data structures containing the information about the image must
+ themselves take memory and use storage, the amount of memory and storage
+ required changes as we prepare the image. Since the changes are not large,
+ only one or two iterations will be required to achieve a solution.
+
+ The recursive nature of the algorithm is miminised by keeping user space
+ frozen while preparing the image, and by the fact that our records of which
+ pages are to be saved and which pageset they are saved in use bitmaps (so
+ that changes in number or fragmentation of the pages to be saved don't
+ feedback via changes in the amount of memory needed for metadata). The
+ recursiveness is thus limited to any extra slab pages allocated to store the
+ extents that record storage used, and the effects of seeking to free memory.
+
+ d. Write the image.
+
+ We previously mentioned the need to create an atomic copy of the data, and
+ the half-of-memory limitation that is implied in this. This limitation is
+ circumvented by dividing the memory to be saved into two parts, called
+ pagesets.
+
+ Pageset2 contains most of the page cache - the pages on the active and
+ inactive LRU lists that aren't needed or modified while TuxOnIce is
+ running, so they can be safely written without an atomic copy. They are
+ therefore saved first and reloaded last. While saving these pages,
+ TuxOnIce carefully ensures that the work of writing the pages doesn't make
+ the image inconsistent. With the support for Kernel (Video) Mode Setting
+ going into the kernel at the time of writing, we need to check for pages
+ on the LRU that are used by KMS, and exclude them from pageset2. They are
+ atomically copied as part of pageset 1.
+
+ Once pageset2 has been saved, we prepare to do the atomic copy of remaining
+ memory. As part of the preparation, we power down drivers, thereby providing
+ them with the opportunity to have their state recorded in the image. The
+ amount of memory allocated by drivers for this is usually negligible, but if
+ DRI is in use, video drivers may require significants amounts. Ideally we
+ would be able to query drivers while preparing the image as to the amount of
+ memory they will need. Unfortunately no such mechanism exists at the time of
+ writing. For this reason, TuxOnIce allows the user to set an
+ 'extra_pages_allowance', which is used to seek to ensure sufficient memory
+ is available for drivers at this point. TuxOnIce also lets the user set this
+ value to 0. In this case, a test driver suspend is done while preparing the
+ image, and the difference (plus a margin) used instead. TuxOnIce will also
+ automatically restart the hibernation process (twice at most) if it finds
+ that the extra pages allowance is not sufficient. It will then use what was
+ actually needed (plus a margin, again). Failure to hibernate should thus
+ be an extremely rare occurence.
+
+ Having suspended the drivers, we save the CPU context before making an
+ atomic copy of pageset1, resuming the drivers and saving the atomic copy.
+ After saving the two pagesets, we just need to save our metadata before
+ powering down.
+
+ As we mentioned earlier, the contents of pageset2 pages aren't needed once
+ they've been saved. We therefore use them as the destination of our atomic
+ copy. In the unlikely event that pageset1 is larger, extra pages are
+ allocated while the image is being prepared. This is normally only a real
+ possibility when the system has just been booted and the page cache is
+ small.
+
+ This is where we need to be careful about syncing, however. Pageset2 will
+ probably contain filesystem meta data. If this is overwritten with pageset1
+ and then a sync occurs, the filesystem will be corrupted - at least until
+ resume time and another sync of the restored data. Since there is a
+ possibility that the user might not resume or (may it never be!) that
+ TuxOnIce might oops, we do our utmost to avoid syncing filesystems after
+ copying pageset1.
+
+ e. Incremental images
+
+ TuxOnIce 4.0 introduces a new incremental image mode which changes things a
+ little. When incremental images are enabled, we save a 'normal' image the
+ first time we hibernate. One resume however, we do not free the image or
+ the associated storage. Instead, it is retained until the next attempt at
+ hibernating and a mechanism is enabled which is used to track which pages
+ of memory are modified between the two cycles. The modified pages can then
+ be added to the existing image, rather than unmodified pages being saved
+ again unnecessarily.
+
+ Incremental image support is available in 64 bit Linux only, due to the
+ requirement for extra page flags.
+
+ This support is accomplished in the following way:
+
+ 1) Tracking of pages.
+
+ The tracking of changed pages is accomplished using the page fault
+ mechanism. When we reach a point at which we want to start tracking
+ changes, most pages are marked read-only and also flagged as being
+ read-only because of this support. Since this cannot happen for every page
+ of RAM, some are marked as untracked and always treated as modified whn
+ preparing an incremental iamge. When a process attempts to modify a page
+ that is marked read-only in this way, a page fault occurs, with TuxOnIce
+ code marking the page writable and dirty before allowing the write to
+ continue. In this way, the effect of incremental images on performance is
+ minimised - a page only causes a fault once. Small modifications to the
+ page allocator further reduce the number of faults that occur - free pages
+ are not tracked; they are made writable and marked as dirty as part of
+ being allocated.
+
+ 2) Saving the incremental image / atomicity.
+
+ The page fault mechanism is also used to improve the means by which
+ atomicity of the image is acheived. When it is time to do an atomic copy,
+ the flags for pages are reset, with the result being that it is no longer
+ necessary for us to do an atomic of pageset1. Instead, we normally write
+ the uncopied pages to disk. When an attempt is made to modify a page that
+ has not yet been saved, the page-fault mechanism makes a copy of the page
+ prior to allowing the write. This copy is then written to disk. Likewise,
+ on resume, if a process attempts to write to a page that has been read
+ while the rest of the image is still being loaded, a copy of that page is
+ made prior to the write being allowed. At the end of loading the image,
+ modified pages can thus be restored to their 'atomic copy' contents prior
+ to restarting normal operation. We also mark pages that are yet to be read
+ as invalid PFNs, so that we can capture as a bug any attempt by a
+ half-restored kernel to access a page that hasn't yet been reloaded.
+
+ f. Power down.
+
+ Powering down uses standard kernel routines. TuxOnIce supports powering down
+ using the ACPI S3, S4 and S5 methods or the kernel's non-ACPI power-off.
+ Supporting suspend to ram (S3) as a power off option might sound strange,
+ but it allows the user to quickly get their system up and running again if
+ the battery doesn't run out (we just need to re-read the overwritten pages)
+ and if the battery does run out (or the user removes power), they can still
+ resume.
+
+4. Data Structures.
+
+ TuxOnIce uses three main structures to store its metadata and configuration
+ information:
+
+ a) Pageflags bitmaps.
+
+ TuxOnIce records which pages will be in pageset1, pageset2, the destination
+ of the atomic copy and the source of the atomically restored image using
+ bitmaps. The code used is that written for swsusp, with small improvements
+ to match TuxOnIce's requirements.
+
+ The pageset1 bitmap is thus easily stored in the image header for use at
+ resume time.
+
+ As mentioned above, using bitmaps also means that the amount of memory and
+ storage required for recording the above information is constant. This
+ greatly simplifies the work of preparing the image. In earlier versions of
+ TuxOnIce, extents were used to record which pages would be stored. In that
+ case, however, eating memory could result in greater fragmentation of the
+ lists of pages, which in turn required more memory to store the extents and
+ more storage in the image header. These could in turn require further
+ freeing of memory, and another iteration. All of this complexity is removed
+ by having bitmaps.
+
+ Bitmaps also make a lot of sense because TuxOnIce only ever iterates
+ through the lists. There is therefore no cost to not being able to find the
+ nth page in order 0 time. We only need to worry about the cost of finding
+ the n+1th page, given the location of the nth page. Bitwise optimisations
+ help here.
+
+ b) Extents for block data.
+
+ TuxOnIce supports writing the image to multiple block devices. In the case
+ of swap, multiple partitions and/or files may be in use, and we happily use
+ them all (with the exception of compcache pages, which we allocate but do
+ not use). This use of multiple block devices is accomplished as follows:
+
+ Whatever the actual source of the allocated storage, the destination of the
+ image can be viewed in terms of one or more block devices, and on each
+ device, a list of sectors. To simplify matters, we only use contiguous,
+ PAGE_SIZE aligned sectors, like the swap code does.
+
+ Since sector numbers on each bdev may well not start at 0, it makes much
+ more sense to use extents here. Contiguous ranges of pages can thus be
+ represented in the extents by contiguous values.
+
+ Variations in block size are taken account of in transforming this data
+ into the parameters for bio submission.
+
+ We can thus implement a layer of abstraction wherein the core of TuxOnIce
+ doesn't have to worry about which device we're currently writing to or
+ where in the device we are. It simply requests that the next page in the
+ pageset or header be written, leaving the details to this lower layer.
+ The lower layer remembers where in the sequence of devices and blocks each
+ pageset starts. The header always starts at the beginning of the allocated
+ storage.
+
+ So extents are:
+
+ struct extent {
+ unsigned long minimum, maximum;
+ struct extent *next;
+ }
+
+ These are combined into chains of extents for a device:
+
+ struct extent_chain {
+ int size; /* size of the extent ie sum (max-min+1) */
+ int allocs, frees;
+ char *name;
+ struct extent *first, *last_touched;
+ };
+
+ For each bdev, we need to store a little more info (simplified definition):
+
+ struct toi_bdev_info {
+ struct block_device *bdev;
+
+ char uuid[17];
+ dev_t dev_t;
+ int bmap_shift;
+ int blocks_per_page;
+ };
+
+ The uuid is the main means used to identify the device in the storage
+ image. This means we can cope with the dev_t representation of a device
+ changing between saving the image and restoring it, as may happen on some
+ bioses or in the LVM case.
+
+ bmap_shift and blocks_per_page apply the effects of variations in blocks
+ per page settings for the filesystem and underlying bdev. For most
+ filesystems, these are the same, but for xfs, they can have independant
+ values.
+
+ Combining these two structures together, we have everything we need to
+ record what devices and what blocks on each device are being used to
+ store the image, and to submit i/o using bio_submit.
+
+ The last elements in the picture are a means of recording how the storage
+ is being used.
+
+ We do this first and foremost by implementing a layer of abstraction on
+ top of the devices and extent chains which allows us to view however many
+ devices there might be as one long storage tape, with a single 'head' that
+ tracks a 'current position' on the tape:
+
+ struct extent_iterate_state {
+ struct extent_chain *chains;
+ int num_chains;
+ int current_chain;
+ struct extent *current_extent;
+ unsigned long current_offset;
+ };
+
+ That is, *chains points to an array of size num_chains of extent chains.
+ For the filewriter, this is always a single chain. For the swapwriter, the
+ array is of size MAX_SWAPFILES.
+
+ current_chain, current_extent and current_offset thus point to the current
+ index in the chains array (and into a matching array of struct
+ suspend_bdev_info), the current extent in that chain (to optimise access),
+ and the current value in the offset.
+
+ The image is divided into three parts:
+ - The header
+ - Pageset 1
+ - Pageset 2
+
+ The header always starts at the first device and first block. We know its
+ size before we begin to save the image because we carefully account for
+ everything that will be stored in it.
+
+ The second pageset (LRU) is stored first. It begins on the next page after
+ the end of the header.
+
+ The first pageset is stored second. It's start location is only known once
+ pageset2 has been saved, since pageset2 may be compressed as it is written.
+ This location is thus recorded at the end of saving pageset2. It is page
+ aligned also.
+
+ Since this information is needed at resume time, and the location of extents
+ in memory will differ at resume time, this needs to be stored in a portable
+ way:
+
+ struct extent_iterate_saved_state {
+ int chain_num;
+ int extent_num;
+ unsigned long offset;
+ };
+
+ We can thus implement a layer of abstraction wherein the core of TuxOnIce
+ doesn't have to worry about which device we're currently writing to or
+ where in the device we are. It simply requests that the next page in the
+ pageset or header be written, leaving the details to this layer, and
+ invokes the routines to remember and restore the position, without having
+ to worry about the details of how the data is arranged on disk or such like.
+
+ c) Modules
+
+ One aim in designing TuxOnIce was to make it flexible. We wanted to allow
+ for the implementation of different methods of transforming a page to be
+ written to disk and different methods of getting the pages stored.
+
+ In early versions (the betas and perhaps Suspend1), compression support was
+ inlined in the image writing code, and the data structures and code for
+ managing swap were intertwined with the rest of the code. A number of people
+ had expressed interest in implementing image encryption, and alternative
+ methods of storing the image.
+
+ In order to achieve this, TuxOnIce was given a modular design.
+
+ A module is a single file which encapsulates the functionality needed
+ to transform a pageset of data (encryption or compression, for example),
+ or to write the pageset to a device. The former type of module is called
+ a 'page-transformer', the later a 'writer'.
+
+ Modules are linked together in pipeline fashion. There may be zero or more
+ page transformers in a pipeline, and there is always exactly one writer.
+ The pipeline follows this pattern:
+
+ ---------------------------------
+ | TuxOnIce Core |
+ ---------------------------------
+ |
+ |
+ ---------------------------------
+ | Page transformer 1 |
+ ---------------------------------
+ |
+ |
+ ---------------------------------
+ | Page transformer 2 |
+ ---------------------------------
+ |
+ |
+ ---------------------------------
+ | Writer |
+ ---------------------------------
+
+ During the writing of an image, the core code feeds pages one at a time
+ to the first module. This module performs whatever transformations it
+ implements on the incoming data, completely consuming the incoming data and
+ feeding output in a similar manner to the next module.
+
+ All routines are SMP safe, and the final result of the transformations is
+ written with an index (provided by the core) and size of the output by the
+ writer. As a result, we can have multithreaded I/O without needing to
+ worry about the sequence in which pages are written (or read).
+
+ During reading, the pipeline works in the reverse direction. The core code
+ calls the first module with the address of a buffer which should be filled.
+ (Note that the buffer size is always PAGE_SIZE at this time). This module
+ will in turn request data from the next module and so on down until the
+ writer is made to read from the stored image.
+
+ Part of definition of the structure of a module thus looks like this:
+
+ int (*rw_init) (int rw, int stream_number);
+ int (*rw_cleanup) (int rw);
+ int (*write_chunk) (struct page *buffer_page);
+ int (*read_chunk) (struct page *buffer_page, int sync);
+
+ It should be noted that the _cleanup routine may be called before the
+ full stream of data has been read or written. While writing the image,
+ the user may (depending upon settings) choose to abort suspending, and
+ if we are in the midst of writing the last portion of the image, a portion
+ of the second pageset may be reread. This may also happen if an error
+ occurs and we seek to abort the process of writing the image.
+
+ The modular design is also useful in a number of other ways. It provides
+ a means where by we can add support for:
+
+ - providing overall initialisation and cleanup routines;
+ - serialising configuration information in the image header;
+ - providing debugging information to the user;
+ - determining memory and image storage requirements;
+ - dis/enabling components at run-time;
+ - configuring the module (see below);
+
+ ...and routines for writers specific to their work:
+ - Parsing a resume= location;
+ - Determining whether an image exists;
+ - Marking a resume as having been attempted;
+ - Invalidating an image;
+
+ Since some parts of the core - the user interface and storage manager
+ support - have use for some of these functions, they are registered as
+ 'miscellaneous' modules as well.
+
+ d) Sysfs data structures.
+
+ This brings us naturally to support for configuring TuxOnIce. We desired to
+ provide a way to make TuxOnIce as flexible and configurable as possible.
+ The user shouldn't have to reboot just because they want to now hibernate to
+ a file instead of a partition, for example.
+
+ To accomplish this, TuxOnIce implements a very generic means whereby the
+ core and modules can register new sysfs entries. All TuxOnIce entries use
+ a single _store and _show routine, both of which are found in
+ tuxonice_sysfs.c in the kernel/power directory. These routines handle the
+ most common operations - getting and setting the values of bits, integers,
+ longs, unsigned longs and strings in one place, and allow overrides for
+ customised get and set options as well as side-effect routines for all
+ reads and writes.
+
+ When combined with some simple macros, a new sysfs entry can then be defined
+ in just a couple of lines:
+
+ SYSFS_INT("progress_granularity", SYSFS_RW, &progress_granularity, 1,
+ 2048, 0, NULL),
+
+ This defines a sysfs entry named "progress_granularity" which is rw and
+ allows the user to access an integer stored at &progress_granularity, giving
+ it a value between 1 and 2048 inclusive.
+
+ Sysfs entries are registered under /sys/power/tuxonice, and entries for
+ modules are located in a subdirectory named after the module.
+
diff --git a/Documentation/power/tuxonice.txt b/Documentation/power/tuxonice.txt
new file mode 100644
index 000000000..3bf0575ef
--- /dev/null
+++ b/Documentation/power/tuxonice.txt
@@ -0,0 +1,948 @@
+ --- TuxOnIce, version 3.0 ---
+
+1. What is it?
+2. Why would you want it?
+3. What do you need to use it?
+4. Why not just use the version already in the kernel?
+5. How do you use it?
+6. What do all those entries in /sys/power/tuxonice do?
+7. How do you get support?
+8. I think I've found a bug. What should I do?
+9. When will XXX be supported?
+10 How does it work?
+11. Who wrote TuxOnIce?
+
+1. What is it?
+
+ Imagine you're sitting at your computer, working away. For some reason, you
+ need to turn off your computer for a while - perhaps it's time to go home
+ for the day. When you come back to your computer next, you're going to want
+ to carry on where you left off. Now imagine that you could push a button and
+ have your computer store the contents of its memory to disk and power down.
+ Then, when you next start up your computer, it loads that image back into
+ memory and you can carry on from where you were, just as if you'd never
+ turned the computer off. You have far less time to start up, no reopening of
+ applications or finding what directory you put that file in yesterday.
+ That's what TuxOnIce does.
+
+ TuxOnIce has a long heritage. It began life as work by Gabor Kuti, who,
+ with some help from Pavel Machek, got an early version going in 1999. The
+ project was then taken over by Florent Chabaud while still in alpha version
+ numbers. Nigel Cunningham came on the scene when Florent was unable to
+ continue, moving the project into betas, then 1.0, 2.0 and so on up to
+ the present series. During the 2.0 series, the name was contracted to
+ Suspend2 and the website suspend2.net created. Beginning around July 2007,
+ a transition to calling the software TuxOnIce was made, to seek to help
+ make it clear that TuxOnIce is more concerned with hibernation than suspend
+ to ram.
+
+ Pavel Machek's swsusp code, which was merged around 2.5.17 retains the
+ original name, and was essentially a fork of the beta code until Rafael
+ Wysocki came on the scene in 2005 and began to improve it further.
+
+2. Why would you want it?
+
+ Why wouldn't you want it?
+
+ Being able to save the state of your system and quickly restore it improves
+ your productivity - you get a useful system in far less time than through
+ the normal boot process. You also get to be completely 'green', using zero
+ power, or as close to that as possible (the computer may still provide
+ minimal power to some devices, so they can initiate a power on, but that
+ will be the same amount of power as would be used if you told the computer
+ to shutdown.
+
+3. What do you need to use it?
+
+ a. Kernel Support.
+
+ i) The TuxOnIce patch.
+
+ TuxOnIce is part of the Linux Kernel. This version is not part of Linus's
+ 2.6 tree at the moment, so you will need to download the kernel source and
+ apply the latest patch. Having done that, enable the appropriate options in
+ make [menu|x]config (under Power Management Options - look for "Enhanced
+ Hibernation"), compile and install your kernel. TuxOnIce works with SMP,
+ Highmem, preemption, fuse filesystems, x86-32, PPC and x86_64.
+
+ TuxOnIce patches are available from http://tuxonice.net.
+
+ ii) Compression support.
+
+ Compression support is implemented via the cryptoapi. You will therefore want
+ to select any Cryptoapi transforms that you want to use on your image from
+ the Cryptoapi menu while configuring your kernel. We recommend the use of the
+ LZO compression method - it is very fast and still achieves good compression.
+
+ You can also tell TuxOnIce to write its image to an encrypted and/or
+ compressed filesystem/swap partition. In that case, you don't need to do
+ anything special for TuxOnIce when it comes to kernel configuration.
+
+ iii) Configuring other options.
+
+ While you're configuring your kernel, try to configure as much as possible
+ to build as modules. We recommend this because there are a number of drivers
+ that are still in the process of implementing proper power management
+ support. In those cases, the best way to work around their current lack is
+ to build them as modules and remove the modules while hibernating. You might
+ also bug the driver authors to get their support up to speed, or even help!
+
+ b. Storage.
+
+ i) Swap.
+
+ TuxOnIce can store the hibernation image in your swap partition, a swap file or
+ a combination thereof. Whichever combination you choose, you will probably
+ want to create enough swap space to store the largest image you could have,
+ plus the space you'd normally use for swap. A good rule of thumb would be
+ to calculate the amount of swap you'd want without using TuxOnIce, and then
+ add the amount of memory you have. This swapspace can be arranged in any way
+ you'd like. It can be in one partition or file, or spread over a number. The
+ only requirement is that they be active when you start a hibernation cycle.
+
+ There is one exception to this requirement. TuxOnIce has the ability to turn
+ on one swap file or partition at the start of hibernating and turn it back off
+ at the end. If you want to ensure you have enough memory to store a image
+ when your memory is fully used, you might want to make one swap partition or
+ file for 'normal' use, and another for TuxOnIce to activate & deactivate
+ automatically. (Further details below).
+
+ ii) Normal files.
+
+ TuxOnIce includes a 'file allocator'. The file allocator can store your
+ image in a simple file. Since Linux has the concept of everything being a
+ file, this is more powerful than it initially sounds. If, for example, you
+ were to set up a network block device file, you could hibernate to a network
+ server. This has been tested and works to a point, but nbd itself isn't
+ stateless enough for our purposes.
+
+ Take extra care when setting up the file allocator. If you just type
+ commands without thinking and then try to hibernate, you could cause
+ irreversible corruption on your filesystems! Make sure you have backups.
+
+ Most people will only want to hibernate to a local file. To achieve that, do
+ something along the lines of:
+
+ echo "TuxOnIce" > /hibernation-file
+ dd if=/dev/zero bs=1M count=512 >> /hibernation-file
+
+ This will create a 512MB file called /hibernation-file. To get TuxOnIce to use
+ it:
+
+ echo /hibernation-file > /sys/power/tuxonice/file/target
+
+ Then
+
+ cat /sys/power/tuxonice/resume
+
+ Put the results of this into your bootloader's configuration (see also step
+ C, below):
+
+ ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+ # cat /sys/power/tuxonice/resume
+ file:/dev/hda2:0x1e001
+
+ In this example, we would edit the append= line of our lilo.conf|menu.lst
+ so that it included:
+
+ resume=file:/dev/hda2:0x1e001
+ ---EXAMPLE-ONLY-DON'T-COPY-AND-PASTE---
+
+ For those who are thinking 'Could I make the file sparse?', the answer is
+ 'No!'. At the moment, there is no way for TuxOnIce to fill in the holes in
+ a sparse file while hibernating. In the longer term (post merge!), I'd like
+ to change things so that the file could be dynamically resized and have
+ holes filled as needed. Right now, however, that's not possible and not a
+ priority.
+
+ c. Bootloader configuration.
+
+ Using TuxOnIce also requires that you add an extra parameter to
+ your lilo.conf or equivalent. Here's an example for a swap partition:
+
+ append="resume=swap:/dev/hda1"
+
+ This would tell TuxOnIce that /dev/hda1 is a swap partition you
+ have. TuxOnIce will use the swap signature of this partition as a
+ pointer to your data when you hibernate. This means that (in this example)
+ /dev/hda1 doesn't need to be _the_ swap partition where all of your data
+ is actually stored. It just needs to be a swap partition that has a
+ valid signature.
+
+ You don't need to have a swap partition for this purpose. TuxOnIce
+ can also use a swap file, but usage is a little more complex. Having made
+ your swap file, turn it on and do
+
+ cat /sys/power/tuxonice/swap/headerlocations
+
+ (this assumes you've already compiled your kernel with TuxOnIce
+ support and booted it). The results of the cat command will tell you
+ what you need to put in lilo.conf:
+
+ For swap partitions like /dev/hda1, simply use resume=/dev/hda1.
+ For swapfile `swapfile`, use resume=swap:/dev/hda2:0x242d.
+
+ If the swapfile changes for any reason (it is moved to a different
+ location, it is deleted and recreated, or the filesystem is
+ defragmented) then you will have to check
+ /sys/power/tuxonice/swap/headerlocations for a new resume_block value.
+
+ Once you've compiled and installed the kernel and adjusted your bootloader
+ configuration, you should only need to reboot for the most basic part
+ of TuxOnIce to be ready.
+
+ If you only compile in the swap allocator, or only compile in the file
+ allocator, you don't need to add the "swap:" part of the resume=
+ parameters above. resume=/dev/hda2:0x242d will work just as well. If you
+ have compiled both and your storage is on swap, you can also use this
+ format (the swap allocator is the default allocator).
+
+ When compiling your kernel, one of the options in the 'Power Management
+ Support' menu, just above the 'Enhanced Hibernation (TuxOnIce)' entry is
+ called 'Default resume partition'. This can be used to set a default value
+ for the resume= parameter.
+
+ d. The hibernate script.
+
+ Since the driver model in 2.6 kernels is still being developed, you may need
+ to do more than just configure TuxOnIce. Users of TuxOnIce usually start the
+ process via a script which prepares for the hibernation cycle, tells the
+ kernel to do its stuff and then restore things afterwards. This script might
+ involve:
+
+ - Switching to a text console and back if X doesn't like the video card
+ status on resume.
+ - Un/reloading drivers that don't play well with hibernation.
+
+ Note that you might not be able to unload some drivers if there are
+ processes using them. You might have to kill off processes that hold
+ devices open. Hint: if your X server accesses an USB mouse, doing a
+ 'chvt' to a text console releases the device and you can unload the
+ module.
+
+ Check out the latest script (available on tuxonice.net).
+
+ e. The userspace user interface.
+
+ TuxOnIce has very limited support for displaying status if you only apply
+ the kernel patch - it can printk messages, but that is all. In addition,
+ some of the functions mentioned in this document (such as cancelling a cycle
+ or performing interactive debugging) are unavailable. To utilise these
+ functions, or simply get a nice display, you need the 'userui' component.
+ Userui comes in three flavours, usplash, fbsplash and text. Text should
+ work on any console. Usplash and fbsplash require the appropriate
+ (distro specific?) support.
+
+ To utilise a userui, TuxOnIce just needs to be told where to find the
+ userspace binary:
+
+ echo "/usr/local/sbin/tuxoniceui_fbsplash" > /sys/power/tuxonice/user_interface/program
+
+ The hibernate script can do this for you, and a default value for this
+ setting can be configured when compiling the kernel. This path is also
+ stored in the image header, so if you have an initrd or initramfs, you can
+ use the userui during the first part of resuming (prior to the atomic
+ restore) by putting the binary in the same path in your initrd/ramfs.
+ Alternatively, you can put it in a different location and do an echo
+ similar to the above prior to the echo > do_resume. The value saved in the
+ image header will then be ignored.
+
+4. Why not just use the version already in the kernel?
+
+ The version in the vanilla kernel has a number of drawbacks. The most
+ serious of these are:
+ - it has a maximum image size of 1/2 total memory;
+ - it doesn't allocate storage until after it has snapshotted memory.
+ This means that you can't be sure hibernating will work until you
+ see it start to write the image;
+ - it does not allow you to press escape to cancel a cycle;
+ - it does not allow you to press escape to cancel resuming;
+ - it does not allow you to automatically swapon a file when
+ starting a cycle;
+ - it does not allow you to use multiple swap partitions or files;
+ - it does not allow you to use ordinary files;
+ - it just invalidates an image and continues to boot if you
+ accidentally boot the wrong kernel after hibernating;
+ - it doesn't support any sort of nice display while hibernating;
+ - it is moving toward requiring that you have an initrd/initramfs
+ to ever have a hope of resuming (uswsusp). While uswsusp will
+ address some of the concerns above, it won't address all of them,
+ and will be more complicated to get set up;
+ - it doesn't have support for suspend-to-both (write a hibernation
+ image, then suspend to ram; I think this is known as ReadySafe
+ under M$).
+
+5. How do you use it?
+
+ A hibernation cycle can be started directly by doing:
+
+ echo > /sys/power/tuxonice/do_hibernate
+
+ In practice, though, you'll probably want to use the hibernate script
+ to unload modules, configure the kernel the way you like it and so on.
+ In that case, you'd do (as root):
+
+ hibernate
+
+ See the hibernate script's man page for more details on the options it
+ takes.
+
+ If you're using the text or splash user interface modules, one feature of
+ TuxOnIce that you might find useful is that you can press Escape at any time
+ during hibernating, and the process will be aborted.
+
+ Due to the way hibernation works, this means you'll have your system back and
+ perfectly usable almost instantly. The only exception is when it's at the
+ very end of writing the image. Then it will need to reload a small (usually
+ 4-50MBs, depending upon the image characteristics) portion first.
+
+ Likewise, when resuming, you can press escape and resuming will be aborted.
+ The computer will then powerdown again according to settings at that time for
+ the powerdown method or rebooting.
+
+ You can change the settings for powering down while the image is being
+ written by pressing 'R' to toggle rebooting and 'O' to toggle between
+ suspending to ram and powering down completely).
+
+ If you run into problems with resuming, adding the "noresume" option to
+ the kernel command line will let you skip the resume step and recover your
+ system. This option shouldn't normally be needed, because TuxOnIce modifies
+ the image header prior to the atomic restore, and will thus prompt you
+ if it detects that you've tried to resume an image before (this flag is
+ removed if you press Escape to cancel a resume, so you won't be prompted
+ then).
+
+ Recent kernels (2.6.24 onwards) add support for resuming from a different
+ kernel to the one that was hibernated (thanks to Rafael for his work on
+ this - I've just embraced and enhanced the support for TuxOnIce). This
+ should further reduce the need for you to use the noresume option.
+
+6. What do all those entries in /sys/power/tuxonice do?
+
+ /sys/power/tuxonice is the directory which contains files you can use to
+ tune and configure TuxOnIce to your liking. The exact contents of
+ the directory will depend upon the version of TuxOnIce you're
+ running and the options you selected at compile time. In the following
+ descriptions, names in brackets refer to compile time options.
+ (Note that they're all dependant upon you having selected CONFIG_TUXONICE
+ in the first place!).
+
+ Since the values of these settings can open potential security risks, the
+ writeable ones are accessible only to the root user. You may want to
+ configure sudo to allow you to invoke your hibernate script as an ordinary
+ user.
+
+ - alloc/failure_test
+
+ This debugging option provides a way of testing TuxOnIce's handling of
+ memory allocation failures. Each allocation type that TuxOnIce makes has
+ been given a unique number (see the source code). Echo the appropriate
+ number into this entry, and when TuxOnIce attempts to do that allocation,
+ it will pretend there was a failure and act accordingly.
+
+ - alloc/find_max_mem_allocated
+
+ This debugging option will cause TuxOnIce to find the maximum amount of
+ memory it used during a cycle, and report that information in debugging
+ information at the end of the cycle.
+
+ - alt_resume_param
+
+ Instead of powering down after writing a hibernation image, TuxOnIce
+ supports resuming from a different image. This entry lets you set the
+ location of the signature for that image (the resume= value you'd use
+ for it). Using an alternate image and keep_image mode, you can do things
+ like using an alternate image to power down an uninterruptible power
+ supply.
+
+ - block_io/target_outstanding_io
+
+ This value controls the amount of memory that the block I/O code says it
+ needs when the core code is calculating how much memory is needed for
+ hibernating and for resuming. It doesn't directly control the amount of
+ I/O that is submitted at any one time - that depends on the amount of
+ available memory (we may have more available than we asked for), the
+ throughput that is being achieved and the ability of the CPU to keep up
+ with disk throughput (particularly where we're compressing pages).
+
+ - checksum/enabled
+
+ Use cryptoapi hashing routines to verify that Pageset2 pages don't change
+ while we're saving the first part of the image, and to get any pages that
+ do change resaved in the atomic copy. This should normally not be needed,
+ but if you're seeing issues, please enable this. If your issues stop you
+ being able to resume, enable this option, hibernate and cancel the cycle
+ after the atomic copy is done. If the debugging info shows a non-zero
+ number of pages resaved, please report this to Nigel.
+
+ - compression/algorithm
+
+ Set the cryptoapi algorithm used for compressing the image.
+
+ - compression/expected_compression
+
+ These values allow you to set an expected compression ratio, which TuxOnice
+ will use in calculating whether it meets constraints on the image size. If
+ this expected compression ratio is not attained, the hibernation cycle will
+ abort, so it is wise to allow some spare. You can see what compression
+ ratio is achieved in the logs after hibernating.
+
+ - debug_info:
+
+ This file returns information about your configuration that may be helpful
+ in diagnosing problems with hibernating.
+
+ - did_suspend_to_both:
+
+ This file can be used when you hibernate with powerdown method 3 (ie suspend
+ to ram after writing the image). There can be two outcomes in this case. We
+ can resume from the suspend-to-ram before the battery runs out, or we can run
+ out of juice and and up resuming like normal. This entry lets you find out,
+ post resume, which way we went. If the value is 1, we resumed from suspend
+ to ram. This can be useful when actions need to be run post suspend-to-ram
+ that don't need to be run if we did the normal resume from power off.
+
+ - do_hibernate:
+
+ When anything is written to this file, the kernel side of TuxOnIce will
+ begin to attempt to write an image to disk and power down. You'll normally
+ want to run the hibernate script instead, to get modules unloaded first.
+
+ - do_resume:
+
+ When anything is written to this file TuxOnIce will attempt to read and
+ restore an image. If there is no image, it will return almost immediately.
+ If an image exists, the echo > will never return. Instead, the original
+ kernel context will be restored and the original echo > do_hibernate will
+ return.
+
+ - */enabled
+
+ These option can be used to temporarily disable various parts of TuxOnIce.
+
+ - extra_pages_allowance
+
+ When TuxOnIce does its atomic copy, it calls the driver model suspend
+ and resume methods. If you have DRI enabled with a driver such as fglrx,
+ this can result in the driver allocating a substantial amount of memory
+ for storing its state. Extra_pages_allowance tells TuxOnIce how much
+ extra memory it should ensure is available for those allocations. If
+ your attempts at hibernating end with a message in dmesg indicating that
+ insufficient extra pages were allowed, you need to increase this value.
+
+ - file/target:
+
+ Read this value to get the current setting. Write to it to point TuxOnice
+ at a new storage location for the file allocator. See section 3.b.ii above
+ for details of how to set up the file allocator.
+
+ - freezer_test
+
+ This entry can be used to get TuxOnIce to just test the freezer and prepare
+ an image without actually doing a hibernation cycle. It is useful for
+ diagnosing freezing and image preparation issues.
+
+ - full_pageset2
+
+ TuxOnIce divides the pages that are stored in an image into two sets. The
+ difference between the two sets is that pages in pageset 1 are atomically
+ copied, and pages in pageset 2 are written to disk without being copied
+ first. A page CAN be written to disk without being copied first if and only
+ if its contents will not be modified or used at any time after userspace
+ processes are frozen. A page MUST be in pageset 1 if its contents are
+ modified or used at any time after userspace processes have been frozen.
+
+ Normally (ie if this option is enabled), TuxOnIce will put all pages on the
+ per-zone LRUs in pageset2, then remove those pages used by any userspace
+ user interface helper and TuxOnIce storage manager that are running,
+ together with pages used by the GEM memory manager introduced around 2.6.28
+ kernels.
+
+ If this option is disabled, a much more conservative approach will be taken.
+ The only pages in pageset2 will be those belonging to userspace processes,
+ with the exclusion of those belonging to the TuxOnIce userspace helpers
+ mentioned above. This will result in a much smaller pageset2, and will
+ therefore result in smaller images than are possible with this option
+ enabled.
+
+ - ignore_rootfs
+
+ TuxOnIce records which device is mounted as the root filesystem when
+ writing the hibernation image. It will normally check at resume time that
+ this device isn't already mounted - that would be a cause of filesystem
+ corruption. In some particular cases (RAM based root filesystems), you
+ might want to disable this check. This option allows you to do that.
+
+ - image_exists:
+
+ Can be used in a script to determine whether a valid image exists at the
+ location currently pointed to by resume=. Returns up to three lines.
+ The first is whether an image exists (-1 for unsure, otherwise 0 or 1).
+ If an image eixsts, additional lines will return the machine and version.
+ Echoing anything to this entry removes any current image.
+
+ - image_size_limit:
+
+ The maximum size of hibernation image written to disk, measured in megabytes
+ (1024*1024).
+
+ - last_result:
+
+ The result of the last hibernation cycle, as defined in
+ include/linux/suspend-debug.h with the values SUSPEND_ABORTED to
+ SUSPEND_KEPT_IMAGE. This is a bitmask.
+
+ - late_cpu_hotplug:
+
+ This sysfs entry controls whether cpu hotplugging is done - as normal - just
+ before (unplug) and after (replug) the atomic copy/restore (so that all
+ CPUs/cores are available for multithreaded I/O). The alternative is to
+ unplug all secondary CPUs/cores at the start of hibernating/resuming, and
+ replug them at the end of resuming. No multithreaded I/O will be possible in
+ this configuration, but the odd machine has been reported to require it.
+
+ - lid_file:
+
+ This determines which ACPI button file we look in to determine whether the
+ lid is open or closed after resuming from suspend to disk or power off.
+ If the entry is set to "lid/LID", we'll open /proc/acpi/button/lid/LID/state
+ and check its contents at the appropriate moment. See post_wake_state below
+ for more details on how this entry is used.
+
+ - log_everything (CONFIG_PM_DEBUG):
+
+ Setting this option results in all messages printed being logged. Normally,
+ only a subset are logged, so as to not slow the process and not clutter the
+ logs. Useful for debugging. It can be toggled during a cycle by pressing
+ 'L'.
+
+ - no_load_direct:
+
+ This is a debugging option. If, when loading the atomically copied pages of
+ an image, TuxOnIce finds that the destination address for a page is free,
+ it will normally allocate the image, load the data directly into that
+ address and skip it in the atomic restore. If this option is disabled, the
+ page will be loaded somewhere else and atomically restored like other pages.
+
+ - no_flusher_thread:
+
+ When doing multithreaded I/O (see below), the first online CPU can be used
+ to _just_ submit compressed pages when writing the image, rather than
+ compressing and submitting data. This option is normally disabled, but has
+ been included because Nigel would like to see whether it will be more useful
+ as the number of cores/cpus in computers increases.
+
+ - no_multithreaded_io:
+
+ TuxOnIce will normally create one thread per cpu/core on your computer,
+ each of which will then perform I/O. This will generally result in
+ throughput that's the maximum the storage medium can handle. There
+ shouldn't be any reason to disable multithreaded I/O now, but this option
+ has been retained for debugging purposes.
+
+ - no_pageset2
+
+ See the entry for full_pageset2 above for an explanation of pagesets.
+ Enabling this option causes TuxOnIce to do an atomic copy of all pages,
+ thereby limiting the maximum image size to 1/2 of memory, as swsusp does.
+
+ - no_pageset2_if_unneeded
+
+ See the entry for full_pageset2 above for an explanation of pagesets.
+ Enabling this option causes TuxOnIce to act like no_pageset2 was enabled
+ if and only it isn't needed anyway. This option may still make TuxOnIce
+ less reliable because pageset2 pages are normally used to store the
+ atomic copy - drivers that want to do allocations of larger amounts of
+ memory in one shot will be more likely to find that those amounts aren't
+ available if this option is enabled.
+
+ - pause_between_steps (CONFIG_PM_DEBUG):
+
+ This option is used during debugging, to make TuxOnIce pause between
+ each step of the process. It is ignored when the nice display is on.
+
+ - post_wake_state:
+
+ TuxOnIce provides support for automatically waking after a user-selected
+ delay, and using a different powerdown method if the lid is still closed.
+ (Yes, we're assuming a laptop). This entry lets you choose what state
+ should be entered next. The values are those described under
+ powerdown_method, below. It can be used to suspend to RAM after hibernating,
+ then powerdown properly (say) 20 minutes. It can also be used to power down
+ properly, then wake at (say) 6.30am and suspend to RAM until you're ready
+ to use the machine.
+
+ - powerdown_method:
+
+ Used to select a method by which TuxOnIce should powerdown after writing the
+ image. Currently:
+
+ 0: Don't use ACPI to power off.
+ 3: Attempt to enter Suspend-to-ram.
+ 4: Attempt to enter ACPI S4 mode.
+ 5: Attempt to power down via ACPI S5 mode.
+
+ Note that these options are highly dependant upon your hardware & software:
+
+ 3: When succesful, your machine suspends to ram instead of powering off.
+ The advantage of using this mode is that it doesn't matter whether your
+ battery has enough charge to make it through to your next resume. If it
+ lasts, you will simply resume from suspend to ram (and the image on disk
+ will be discarded). If the battery runs out, you will resume from disk
+ instead. The disadvantage is that it takes longer than a normal
+ suspend-to-ram to enter the state, since the suspend-to-disk image needs
+ to be written first.
+ 4/5: When successful, your machine will be off and comsume (almost) no power.
+ But it might still react to some external events like opening the lid or
+ trafic on a network or usb device. For the bios, resume is then the same
+ as warm boot, similar to a situation where you used the command `reboot'
+ to reboot your machine. If your machine has problems on warm boot or if
+ you want to protect your machine with the bios password, this is probably
+ not the right choice. Mode 4 may be necessary on some machines where ACPI
+ wake up methods need to be run to properly reinitialise hardware after a
+ hibernation cycle.
+ 0: Switch the machine completely off. The only possible wakeup is the power
+ button. For the bios, resume is then the same as a cold boot, in
+ particular you would have to provide your bios boot password if your
+ machine uses that feature for booting.
+
+ - progressbar_granularity_limit:
+
+ This option can be used to limit the granularity of the progress bar
+ displayed with a bootsplash screen. The value is the maximum number of
+ steps. That is, 10 will make the progress bar jump in 10% increments.
+
+ - reboot:
+
+ This option causes TuxOnIce to reboot rather than powering down
+ at the end of saving an image. It can be toggled during a cycle by pressing
+ 'R'.
+
+ - resume:
+
+ This sysfs entry can be used to read and set the location in which TuxOnIce
+ will look for the signature of an image - the value set using resume= at
+ boot time or CONFIG_PM_STD_PARTITION ("Default resume partition"). By
+ writing to this file as well as modifying your bootloader's configuration
+ file (eg menu.lst), you can set or reset the location of your image or the
+ method of storing the image without rebooting.
+
+ - replace_swsusp (CONFIG_TOI_REPLACE_SWSUSP):
+
+ This option makes
+
+ echo disk > /sys/power/state
+
+ activate TuxOnIce instead of swsusp. Regardless of whether this option is
+ enabled, any invocation of swsusp's resume time trigger will cause TuxOnIce
+ to check for an image too. This is due to the fact that at resume time, we
+ can't know whether this option was enabled until we see if an image is there
+ for us to resume from. (And when an image exists, we don't care whether we
+ did replace swsusp anyway - we just want to resume).
+
+ - resume_commandline:
+
+ This entry can be read after resuming to see the commandline that was used
+ when resuming began. You might use this to set up two bootloader entries
+ that are the same apart from the fact that one includes a extra append=
+ argument "at_work=1". You could then grep resume_commandline in your
+ post-resume scripts and configure networking (for example) differently
+ depending upon whether you're at home or work. resume_commandline can be
+ set to arbitrary text if you wish to remove sensitive contents.
+
+ - swap/swapfilename:
+
+ This entry is used to specify the swapfile or partition that
+ TuxOnIce will attempt to swapon/swapoff automatically. Thus, if
+ I normally use /dev/hda1 for swap, and want to use /dev/hda2 for specifically
+ for my hibernation image, I would
+
+ echo /dev/hda2 > /sys/power/tuxonice/swap/swapfile
+
+ /dev/hda2 would then be automatically swapon'd and swapoff'd. Note that the
+ swapon and swapoff occur while other processes are frozen (including kswapd)
+ so this swap file will not be used up when attempting to free memory. The
+ parition/file is also given the highest priority, so other swapfiles/partitions
+ will only be used to save the image when this one is filled.
+
+ The value of this file is used by headerlocations along with any currently
+ activated swapfiles/partitions.
+
+ - swap/headerlocations:
+
+ This option tells you the resume= options to use for swap devices you
+ currently have activated. It is particularly useful when you only want to
+ use a swap file to store your image. See above for further details.
+
+ - test_bio
+
+ This is a debugging option. When enabled, TuxOnIce will not hibernate.
+ Instead, when asked to write an image, it will skip the atomic copy,
+ just doing the writing of the image and then returning control to the
+ user at the point where it would have powered off. This is useful for
+ testing throughput in different configurations.
+
+ - test_filter_speed
+
+ This is a debugging option. When enabled, TuxOnIce will not hibernate.
+ Instead, when asked to write an image, it will not write anything or do
+ an atomic copy, but will only run any enabled compression algorithm on the
+ data that would have been written (the source pages of the atomic copy in
+ the case of pageset 1). This is useful for comparing the performance of
+ compression algorithms and for determining the extent to which an upgrade
+ to your storage method would improve hibernation speed.
+
+ - user_interface/debug_sections (CONFIG_PM_DEBUG):
+
+ This value, together with the console log level, controls what debugging
+ information is displayed. The console log level determines the level of
+ detail, and this value determines what detail is displayed. This value is
+ a bit vector, and the meaning of the bits can be found in the kernel tree
+ in include/linux/tuxonice.h. It can be overridden using the kernel's
+ command line option suspend_dbg.
+
+ - user_interface/default_console_level (CONFIG_PM_DEBUG):
+
+ This determines the value of the console log level at the start of a
+ hibernation cycle. If debugging is compiled in, the console log level can be
+ changed during a cycle by pressing the digit keys. Meanings are:
+
+ 0: Nice display.
+ 1: Nice display plus numerical progress.
+ 2: Errors only.
+ 3: Low level debugging info.
+ 4: Medium level debugging info.
+ 5: High level debugging info.
+ 6: Verbose debugging info.
+
+ - user_interface/enable_escape:
+
+ Setting this to "1" will enable you abort a hibernation cycle or resuming by
+ pressing escape, "0" (default) disables this feature. Note that enabling
+ this option means that you cannot initiate a hibernation cycle and then walk
+ away from your computer, expecting it to be secure. With feature disabled,
+ you can validly have this expectation once TuxOnice begins to write the
+ image to disk. (Prior to this point, it is possible that TuxOnice might
+ about because of failure to freeze all processes or because constraints
+ on its ability to save the image are not met).
+
+ - user_interface/program
+
+ This entry is used to tell TuxOnice what userspace program to use for
+ providing a user interface while hibernating. The program uses a netlink
+ socket to pass messages back and forward to the kernel, allowing all of the
+ functions formerly implemented in the kernel user interface components.
+
+ - version:
+
+ The version of TuxOnIce you have compiled into the currently running kernel.
+
+ - wake_alarm_dir:
+
+ As mentioned above (post_wake_state), TuxOnIce supports automatically waking
+ after some delay. This entry allows you to select which wake alarm to use.
+ It should contain the value "rtc0" if you're wanting to use
+ /sys/class/rtc/rtc0.
+
+ - wake_delay:
+
+ This value determines the delay from the end of writing the image until the
+ wake alarm is triggered. You can set an absolute time by writing the desired
+ time into /sys/class/rtc/<wake_alarm_dir>/wakealarm and leaving these values
+ empty.
+
+ Note that for the wakeup to actually occur, you may need to modify entries
+ in /proc/acpi/wakeup. This is done by echoing the name of the button in the
+ first column (eg PBTN) into the file.
+
+7. How do you get support?
+
+ Glad you asked. TuxOnIce is being actively maintained and supported
+ by Nigel (the guy doing most of the kernel coding at the moment), Bernard
+ (who maintains the hibernate script and userspace user interface components)
+ and its users.
+
+ Resources availble include HowTos, FAQs and a Wiki, all available via
+ tuxonice.net. You can find the mailing lists there.
+
+8. I think I've found a bug. What should I do?
+
+ By far and a way, the most common problems people have with TuxOnIce
+ related to drivers not having adequate power management support. In this
+ case, it is not a bug with TuxOnIce, but we can still help you. As we
+ mentioned above, such issues can usually be worked around by building the
+ functionality as modules and unloading them while hibernating. Please visit
+ the Wiki for up-to-date lists of known issues and work arounds.
+
+ If this information doesn't help, try running:
+
+ hibernate --bug-report
+
+ ..and sending the output to the users mailing list.
+
+ Good information on how to provide us with useful information from an
+ oops is found in the file REPORTING-BUGS, in the top level directory
+ of the kernel tree. If you get an oops, please especially note the
+ information about running what is printed on the screen through ksymoops.
+ The raw information is useless.
+
+9. When will XXX be supported?
+
+ If there's a feature missing from TuxOnIce that you'd like, feel free to
+ ask. We try to be obliging, within reason.
+
+ Patches are welcome. Please send to the list.
+
+10. How does it work?
+
+ TuxOnIce does its work in a number of steps.
+
+ a. Freezing system activity.
+
+ The first main stage in hibernating is to stop all other activity. This is
+ achieved in stages. Processes are considered in fours groups, which we will
+ describe in reverse order for clarity's sake: Threads with the PF_NOFREEZE
+ flag, kernel threads without this flag, userspace processes with the
+ PF_SYNCTHREAD flag and all other processes. The first set (PF_NOFREEZE) are
+ untouched by the refrigerator code. They are allowed to run during hibernating
+ and resuming, and are used to support user interaction, storage access or the
+ like. Other kernel threads (those unneeded while hibernating) are frozen last.
+ This leaves us with userspace processes that need to be frozen. When a
+ process enters one of the *_sync system calls, we set a PF_SYNCTHREAD flag on
+ that process for the duration of that call. Processes that have this flag are
+ frozen after processes without it, so that we can seek to ensure that dirty
+ data is synced to disk as quickly as possible in a situation where other
+ processes may be submitting writes at the same time. Freezing the processes
+ that are submitting data stops new I/O from being submitted. Syncthreads can
+ then cleanly finish their work. So the order is:
+
+ - Userspace processes without PF_SYNCTHREAD or PF_NOFREEZE;
+ - Userspace processes with PF_SYNCTHREAD (they won't have NOFREEZE);
+ - Kernel processes without PF_NOFREEZE.
+
+ b. Eating memory.
+
+ For a successful hibernation cycle, you need to have enough disk space to store the
+ image and enough memory for the various limitations of TuxOnIce's
+ algorithm. You can also specify a maximum image size. In order to attain
+ to those constraints, TuxOnIce may 'eat' memory. If, after freezing
+ processes, the constraints aren't met, TuxOnIce will thaw all the
+ other processes and begin to eat memory until its calculations indicate
+ the constraints are met. It will then freeze processes again and recheck
+ its calculations.
+
+ c. Allocation of storage.
+
+ Next, TuxOnIce allocates the storage that will be used to save
+ the image.
+
+ The core of TuxOnIce knows nothing about how or where pages are stored. We
+ therefore request the active allocator (remember you might have compiled in
+ more than one!) to allocate enough storage for our expect image size. If
+ this request cannot be fulfilled, we eat more memory and try again. If it
+ is fulfiled, we seek to allocate additional storage, just in case our
+ expected compression ratio (if any) isn't achieved. This time, however, we
+ just continue if we can't allocate enough storage.
+
+ If these calls to our allocator change the characteristics of the image
+ such that we haven't allocated enough memory, we also loop. (The allocator
+ may well need to allocate space for its storage information).
+
+ d. Write the first part of the image.
+
+ TuxOnIce stores the image in two sets of pages called 'pagesets'.
+ Pageset 2 contains pages on the active and inactive lists; essentially
+ the page cache. Pageset 1 contains all other pages, including the kernel.
+ We use two pagesets for one important reason: We need to make an atomic copy
+ of the kernel to ensure consistency of the image. Without a second pageset,
+ that would limit us to an image that was at most half the amount of memory
+ available. Using two pagesets allows us to store a full image. Since pageset
+ 2 pages won't be needed in saving pageset 1, we first save pageset 2 pages.
+ We can then make our atomic copy of the remaining pages using both pageset 2
+ pages and any other pages that are free. While saving both pagesets, we are
+ careful not to corrupt the image. Among other things, we use lowlevel block
+ I/O routines that don't change the pagecache contents.
+
+ The next step, then, is writing pageset 2.
+
+ e. Suspending drivers and storing processor context.
+
+ Having written pageset2, TuxOnIce calls the power management functions to
+ notify drivers of the hibernation, and saves the processor state in preparation
+ for the atomic copy of memory we are about to make.
+
+ f. Atomic copy.
+
+ At this stage, everything else but the TuxOnIce code is halted. Processes
+ are frozen or idling, drivers are quiesced and have stored (ideally and where
+ necessary) their configuration in memory we are about to atomically copy.
+ In our lowlevel architecture specific code, we have saved the CPU state.
+ We can therefore now do our atomic copy before resuming drivers etc.
+
+ g. Save the atomic copy (pageset 1).
+
+ TuxOnice can then write the atomic copy of the remaining pages. Since we
+ have copied the pages into other locations, we can continue to use the
+ normal block I/O routines without fear of corruption our image.
+
+ f. Save the image header.
+
+ Nearly there! We save our settings and other parameters needed for
+ reloading pageset 1 in an 'image header'. We also tell our allocator to
+ serialise its data at this stage, so that it can reread the image at resume
+ time.
+
+ g. Set the image header.
+
+ Finally, we edit the header at our resume= location. The signature is
+ changed by the allocator to reflect the fact that an image exists, and to
+ point to the start of that data if necessary (swap allocator).
+
+ h. Power down.
+
+ Or reboot if we're debugging and the appropriate option is selected.
+
+ Whew!
+
+ Reloading the image.
+ --------------------
+
+ Reloading the image is essentially the reverse of all the above. We load
+ our copy of pageset 1, being careful to choose locations that aren't going
+ to be overwritten as we copy it back (We start very early in the boot
+ process, so there are no other processes to quiesce here). We then copy
+ pageset 1 back to its original location in memory and restore the process
+ context. We are now running with the original kernel. Next, we reload the
+ pageset 2 pages, free the memory and swap used by TuxOnIce, restore
+ the pageset header and restart processes. Sounds easy in comparison to
+ hibernating, doesn't it!
+
+ There is of course more to TuxOnIce than this, but this explanation
+ should be a good start. If there's interest, I'll write further
+ documentation on range pages and the low level I/O.
+
+11. Who wrote TuxOnIce?
+
+ (Answer based on the writings of Florent Chabaud, credits in files and
+ Nigel's limited knowledge; apologies to anyone missed out!)
+
+ The main developers of TuxOnIce have been...
+
+ Gabor Kuti
+ Pavel Machek
+ Florent Chabaud
+ Bernard Blackham
+ Nigel Cunningham
+
+ Significant portions of swsusp, the code in the vanilla kernel which
+ TuxOnIce enhances, have been worked on by Rafael Wysocki. Thanks should
+ also be expressed to him.
+
+ The above mentioned developers have been aided in their efforts by a host
+ of hundreds, if not thousands of testers and people who have submitted bug
+ fixes & suggestions. Of special note are the efforts of Michael Frank, who
+ had his computers repetitively hibernate and resume for literally tens of
+ thousands of cycles and developed scripts to stress the system and test
+ TuxOnIce far beyond the point most of us (Nigel included!) would consider
+ testing. His efforts have contributed as much to TuxOnIce as any of the
+ names above.
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
new file mode 100644
index 000000000..bbfcd1bbe
--- /dev/null
+++ b/Documentation/power/userland-swsusp.txt
@@ -0,0 +1,170 @@
+Documentation for userland software suspend interface
+ (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
+
+First, the warnings at the beginning of swsusp.txt still apply.
+
+Second, you should read the FAQ in swsusp.txt _now_ if you have not
+done it already.
+
+Now, to use the userland interface for software suspend you need special
+utilities that will read/write the system memory snapshot from/to the
+kernel. Such utilities are available, for example, from
+<http://suspend.sourceforge.net>. You may want to have a look at them if you
+are going to develop your own suspend/resume utilities.
+
+The interface consists of a character device providing the open(),
+release(), read(), and write() operations as well as several ioctl()
+commands defined in include/linux/suspend_ioctls.h . The major and minor
+numbers of the device are, respectively, 10 and 231, and they can
+be read from /sys/class/misc/snapshot/dev.
+
+The device can be open either for reading or for writing. If open for
+reading, it is considered to be in the suspend mode. Otherwise it is
+assumed to be in the resume mode. The device cannot be open for simultaneous
+reading and writing. It is also impossible to have the device open more than
+once at a time.
+
+Even opening the device has side effects. Data structures are
+allocated, and PM_HIBERNATION_PREPARE / PM_RESTORE_PREPARE chains are
+called.
+
+The ioctl() commands recognized by the device are:
+
+SNAPSHOT_FREEZE - freeze user space processes (the current process is
+ not frozen); this is required for SNAPSHOT_CREATE_IMAGE
+ and SNAPSHOT_ATOMIC_RESTORE to succeed
+
+SNAPSHOT_UNFREEZE - thaw user space processes frozen by SNAPSHOT_FREEZE
+
+SNAPSHOT_CREATE_IMAGE - create a snapshot of the system memory; the
+ last argument of ioctl() should be a pointer to an int variable,
+ the value of which will indicate whether the call returned after
+ creating the snapshot (1) or after restoring the system memory state
+ from it (0) (after resume the system finds itself finishing the
+ SNAPSHOT_CREATE_IMAGE ioctl() again); after the snapshot
+ has been created the read() operation can be used to transfer
+ it out of the kernel
+
+SNAPSHOT_ATOMIC_RESTORE - restore the system memory state from the
+ uploaded snapshot image; before calling it you should transfer
+ the system memory snapshot back to the kernel using the write()
+ operation; this call will not succeed if the snapshot
+ image is not available to the kernel
+
+SNAPSHOT_FREE - free memory allocated for the snapshot image
+
+SNAPSHOT_PREF_IMAGE_SIZE - set the preferred maximum size of the image
+ (the kernel will do its best to ensure the image size will not exceed
+ this number, but if it turns out to be impossible, the kernel will
+ create the smallest image possible)
+
+SNAPSHOT_GET_IMAGE_SIZE - return the actual size of the hibernation image
+
+SNAPSHOT_AVAIL_SWAP_SIZE - return the amount of available swap in bytes (the
+ last argument should be a pointer to an unsigned int variable that will
+ contain the result if the call is successful).
+
+SNAPSHOT_ALLOC_SWAP_PAGE - allocate a swap page from the resume partition
+ (the last argument should be a pointer to a loff_t variable that
+ will contain the swap page offset if the call is successful)
+
+SNAPSHOT_FREE_SWAP_PAGES - free all swap pages allocated by
+ SNAPSHOT_ALLOC_SWAP_PAGE
+
+SNAPSHOT_SET_SWAP_AREA - set the resume partition and the offset (in <PAGE_SIZE>
+ units) from the beginning of the partition at which the swap header is
+ located (the last ioctl() argument should point to a struct
+ resume_swap_area, as defined in kernel/power/suspend_ioctls.h,
+ containing the resume device specification and the offset); for swap
+ partitions the offset is always 0, but it is different from zero for
+ swap files (see Documentation/power/swsusp-and-swap-files.txt for
+ details).
+
+SNAPSHOT_PLATFORM_SUPPORT - enable/disable the hibernation platform support,
+ depending on the argument value (enable, if the argument is nonzero)
+
+SNAPSHOT_POWER_OFF - make the kernel transition the system to the hibernation
+ state (eg. ACPI S4) using the platform (eg. ACPI) driver
+
+SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to
+ immediately enter the suspend-to-RAM state, so this call must always
+ be preceded by the SNAPSHOT_FREEZE call and it is also necessary
+ to use the SNAPSHOT_UNFREEZE call after the system wakes up. This call
+ is needed to implement the suspend-to-both mechanism in which the
+ suspend image is first created, as though the system had been suspended
+ to disk, and then the system is suspended to RAM (this makes it possible
+ to resume the system from RAM if there's enough battery power or restore
+ its state on the basis of the saved suspend image otherwise)
+
+The device's read() operation can be used to transfer the snapshot image from
+the kernel. It has the following limitations:
+- you cannot read() more than one virtual memory page at a time
+- read()s across page boundaries are impossible (ie. if you read() 1/2 of
+ a page in the previous call, you will only be able to read()
+ _at_ _most_ 1/2 of the page in the next call)
+
+The device's write() operation is used for uploading the system memory snapshot
+into the kernel. It has the same limitations as the read() operation.
+
+The release() operation frees all memory allocated for the snapshot image
+and all swap pages allocated with SNAPSHOT_ALLOC_SWAP_PAGE (if any).
+Thus it is not necessary to use either SNAPSHOT_FREE or
+SNAPSHOT_FREE_SWAP_PAGES before closing the device (in fact it will also
+unfreeze user space processes frozen by SNAPSHOT_UNFREEZE if they are
+still frozen when the device is being closed).
+
+Currently it is assumed that the userland utilities reading/writing the
+snapshot image from/to the kernel will use a swap partition, called the resume
+partition, or a swap file as storage space (if a swap file is used, the resume
+partition is the partition that holds this file). However, this is not really
+required, as they can use, for example, a special (blank) suspend partition or
+a file on a partition that is unmounted before SNAPSHOT_CREATE_IMAGE and
+mounted afterwards.
+
+These utilities MUST NOT make any assumptions regarding the ordering of
+data within the snapshot image. The contents of the image are entirely owned
+by the kernel and its structure may be changed in future kernel releases.
+
+The snapshot image MUST be written to the kernel unaltered (ie. all of the image
+data, metadata and header MUST be written in _exactly_ the same amount, form
+and order in which they have been read). Otherwise, the behavior of the
+resumed system may be totally unpredictable.
+
+While executing SNAPSHOT_ATOMIC_RESTORE the kernel checks if the
+structure of the snapshot image is consistent with the information stored
+in the image header. If any inconsistencies are detected,
+SNAPSHOT_ATOMIC_RESTORE will not succeed. Still, this is not a fool-proof
+mechanism and the userland utilities using the interface SHOULD use additional
+means, such as checksums, to ensure the integrity of the snapshot image.
+
+The suspending and resuming utilities MUST lock themselves in memory,
+preferably using mlockall(), before calling SNAPSHOT_FREEZE.
+
+The suspending utility MUST check the value stored by SNAPSHOT_CREATE_IMAGE
+in the memory location pointed to by the last argument of ioctl() and proceed
+in accordance with it:
+1. If the value is 1 (ie. the system memory snapshot has just been
+ created and the system is ready for saving it):
+ (a) The suspending utility MUST NOT close the snapshot device
+ _unless_ the whole suspend procedure is to be cancelled, in
+ which case, if the snapshot image has already been saved, the
+ suspending utility SHOULD destroy it, preferably by zapping
+ its header. If the suspend is not to be cancelled, the
+ system MUST be powered off or rebooted after the snapshot
+ image has been saved.
+ (b) The suspending utility SHOULD NOT attempt to perform any
+ file system operations (including reads) on the file systems
+ that were mounted before SNAPSHOT_CREATE_IMAGE has been
+ called. However, it MAY mount a file system that was not
+ mounted at that time and perform some operations on it (eg.
+ use it for saving the image).
+2. If the value is 0 (ie. the system state has just been restored from
+ the snapshot image), the suspending utility MUST close the snapshot
+ device. Afterwards it will be treated as a regular userland process,
+ so it need not exit.
+
+The resuming utility SHOULD NOT attempt to mount any file systems that could
+be mounted before suspend and SHOULD NOT attempt to perform any operations
+involving such file systems.
+
+For details, please refer to the source code.
diff --git a/Documentation/power/video.txt b/Documentation/power/video.txt
new file mode 100644
index 000000000..3e6272bc4
--- /dev/null
+++ b/Documentation/power/video.txt
@@ -0,0 +1,185 @@
+
+ Video issues with S3 resume
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 2003-2006, Pavel Machek
+
+During S3 resume, hardware needs to be reinitialized. For most
+devices, this is easy, and kernel driver knows how to do
+it. Unfortunately there's one exception: video card. Those are usually
+initialized by BIOS, and kernel does not have enough information to
+boot video card. (Kernel usually does not even contain video card
+driver -- vesafb and vgacon are widely used).
+
+This is not problem for swsusp, because during swsusp resume, BIOS is
+run normally so video card is normally initialized. It should not be
+problem for S1 standby, because hardware should retain its state over
+that.
+
+We either have to run video BIOS during early resume, or interpret it
+using vbetool later, or maybe nothing is necessary on particular
+system because video state is preserved. Unfortunately different
+methods work on different systems, and no known method suits all of
+them.
+
+Userland application called s2ram has been developed; it contains long
+whitelist of systems, and automatically selects working method for a
+given system. It can be downloaded from CVS at
+www.sf.net/projects/suspend . If you get a system that is not in the
+whitelist, please try to find a working solution, and submit whitelist
+entry so that work does not need to be repeated.
+
+Currently, VBE_SAVE method (6 below) works on most
+systems. Unfortunately, vbetool only runs after userland is resumed,
+so it makes debugging of early resume problems
+hard/impossible. Methods that do not rely on userland are preferable.
+
+Details
+~~~~~~~
+
+There are a few types of systems where video works after S3 resume:
+
+(1) systems where video state is preserved over S3.
+
+(2) systems where it is possible to call the video BIOS during S3
+ resume. Unfortunately, it is not correct to call the video BIOS at
+ that point, but it happens to work on some machines. Use
+ acpi_sleep=s3_bios.
+
+(3) systems that initialize video card into vga text mode and where
+ the BIOS works well enough to be able to set video mode. Use
+ acpi_sleep=s3_mode on these.
+
+(4) on some systems s3_bios kicks video into text mode, and
+ acpi_sleep=s3_bios,s3_mode is needed.
+
+(5) radeon systems, where X can soft-boot your video card. You'll need
+ a new enough X, and a plain text console (no vesafb or radeonfb). See
+ http://www.doesi.gmxhome.de/linux/tm800s3/s3.html for more information.
+ Alternatively, you should use vbetool (6) instead.
+
+(6) other radeon systems, where vbetool is enough to bring system back
+ to life. It needs text console to be working. Do vbetool vbestate
+ save > /tmp/delme; echo 3 > /proc/acpi/sleep; vbetool post; vbetool
+ vbestate restore < /tmp/delme; setfont <whatever>, and your video
+ should work.
+
+(7) on some systems, it is possible to boot most of kernel, and then
+ POSTing bios works. Ole Rohne has patch to do just that at
+ http://dev.gentoo.org/~marineam/patch-radeonfb-2.6.11-rc2-mm2.
+
+(8) on some systems, you can use the video_post utility and or
+ do echo 3 > /sys/power/state && /usr/sbin/video_post - which will
+ initialize the display in console mode. If you are in X, you can switch
+ to a virtual terminal and back to X using CTRL+ALT+F1 - CTRL+ALT+F7 to get
+ the display working in graphical mode again.
+
+Now, if you pass acpi_sleep=something, and it does not work with your
+bios, you'll get a hard crash during resume. Be careful. Also it is
+safest to do your experiments with plain old VGA console. The vesafb
+and radeonfb (etc) drivers have a tendency to crash the machine during
+resume.
+
+You may have a system where none of above works. At that point you
+either invent another ugly hack that works, or write proper driver for
+your video card (good luck getting docs :-(). Maybe suspending from X
+(proper X, knowing your hardware, not XF68_FBcon) might have better
+chance of working.
+
+Table of known working notebooks:
+
+Model hack (or "how to do it")
+------------------------------------------------------------------------------
+Acer Aspire 1406LC ole's late BIOS init (7), turn off DRI
+Acer TM 230 s3_bios (2)
+Acer TM 242FX vbetool (6)
+Acer TM C110 video_post (8)
+Acer TM C300 vga=normal (only suspend on console, not in X), vbetool (6) or video_post (8)
+Acer TM 4052LCi s3_bios (2)
+Acer TM 636Lci s3_bios,s3_mode (4)
+Acer TM 650 (Radeon M7) vga=normal plus boot-radeon (5) gets text console back
+Acer TM 660 ??? (*)
+Acer TM 800 vga=normal, X patches, see webpage (5) or vbetool (6)
+Acer TM 803 vga=normal, X patches, see webpage (5) or vbetool (6)
+Acer TM 803LCi vga=normal, vbetool (6)
+Arima W730a vbetool needed (6)
+Asus L2400D s3_mode (3)(***) (S1 also works OK)
+Asus L3350M (SiS 740) (6)
+Asus L3800C (Radeon M7) s3_bios (2) (S1 also works OK)
+Asus M6887Ne vga=normal, s3_bios (2), use radeon driver instead of fglrx in x.org
+Athlon64 desktop prototype s3_bios (2)
+Compal CL-50 ??? (*)
+Compaq Armada E500 - P3-700 none (1) (S1 also works OK)
+Compaq Evo N620c vga=normal, s3_bios (2)
+Dell 600m, ATI R250 Lf none (1), but needs xorg-x11-6.8.1.902-1
+Dell D600, ATI RV250 vga=normal and X, or try vbestate (6)
+Dell D610 vga=normal and X (possibly vbestate (6) too, but not tested)
+Dell Inspiron 4000 ??? (*)
+Dell Inspiron 500m ??? (*)
+Dell Inspiron 510m ???
+Dell Inspiron 5150 vbetool needed (6)
+Dell Inspiron 600m ??? (*)
+Dell Inspiron 8200 ??? (*)
+Dell Inspiron 8500 ??? (*)
+Dell Inspiron 8600 ??? (*)
+eMachines athlon64 machines vbetool needed (6) (someone please get me model #s)
+HP NC6000 s3_bios, may not use radeonfb (2); or vbetool (6)
+HP NX7000 ??? (*)
+HP Pavilion ZD7000 vbetool post needed, need open-source nv driver for X
+HP Omnibook XE3 athlon version none (1)
+HP Omnibook XE3GC none (1), video is S3 Savage/IX-MV
+HP Omnibook XE3L-GF vbetool (6)
+HP Omnibook 5150 none (1), (S1 also works OK)
+IBM TP T20, model 2647-44G none (1), video is S3 Inc. 86C270-294 Savage/IX-MV, vesafb gets "interesting" but X work.
+IBM TP A31 / Type 2652-M5G s3_mode (3) [works ok with BIOS 1.04 2002-08-23, but not at all with BIOS 1.11 2004-11-05 :-(]
+IBM TP R32 / Type 2658-MMG none (1)
+IBM TP R40 2722B3G ??? (*)
+IBM TP R50p / Type 1832-22U s3_bios (2)
+IBM TP R51 none (1)
+IBM TP T30 236681A ??? (*)
+IBM TP T40 / Type 2373-MU4 none (1)
+IBM TP T40p none (1)
+IBM TP R40p s3_bios (2)
+IBM TP T41p s3_bios (2), switch to X after resume
+IBM TP T42 s3_bios (2)
+IBM ThinkPad T42p (2373-GTG) s3_bios (2)
+IBM TP X20 ??? (*)
+IBM TP X30 s3_bios, s3_mode (4)
+IBM TP X31 / Type 2672-XXH none (1), use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
+IBM TP X32 none (1), but backlight is on and video is trashed after long suspend. s3_bios,s3_mode (4) works too. Perhaps that gets better results?
+IBM Thinkpad X40 Type 2371-7JG s3_bios,s3_mode (4)
+IBM TP 600e none(1), but a switch to console and back to X is needed
+Medion MD4220 ??? (*)
+Samsung P35 vbetool needed (6)
+Sharp PC-AR10 (ATI rage) none (1), backlight does not switch off
+Sony Vaio PCG-C1VRX/K s3_bios (2)
+Sony Vaio PCG-F403 ??? (*)
+Sony Vaio PCG-GRT995MP none (1), works with 'nv' X driver
+Sony Vaio PCG-GR7/K none (1), but needs radeonfb, use radeontool (http://fdd.com/software/radeon/) to turn off backlight.
+Sony Vaio PCG-N505SN ??? (*)
+Sony Vaio vgn-s260 X or boot-radeon can init it (5)
+Sony Vaio vgn-S580BH vga=normal, but suspend from X. Console will be blank unless you return to X.
+Sony Vaio vgn-FS115B s3_bios (2),s3_mode (4)
+Toshiba Libretto L5 none (1)
+Toshiba Libretto 100CT/110CT vbetool (6)
+Toshiba Portege 3020CT s3_mode (3)
+Toshiba Satellite 4030CDT s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4080XCDT s3_mode (3) (S1 also works OK)
+Toshiba Satellite 4090XCDT ??? (*)
+Toshiba Satellite P10-554 s3_bios,s3_mode (4)(****)
+Toshiba M30 (2) xor X with nvidia driver using internal AGP
+Uniwill 244IIO ??? (*)
+
+Known working desktop systems
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Mainboard Graphics card hack (or "how to do it")
+------------------------------------------------------------------------------
+Asus A7V8X nVidia RIVA TNT2 model 64 s3_bios,s3_mode (4)
+
+
+(*) from https://wiki.ubuntu.com/HoaryPMResults, not sure
+ which options to use. If you know, please tell me.
+
+(***) To be tested with a newer kernel.
+
+(****) Not with SMP kernel, UP only.
diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
new file mode 100644
index 000000000..6fd0e8bb8
--- /dev/null
+++ b/Documentation/powerpc/00-INDEX
@@ -0,0 +1,32 @@
+Index of files in Documentation/powerpc. If you think something about
+Linux/PPC needs an entry here, needs correction or you've written one
+please mail me.
+ Cort Dougan (cort@fsmlabs.com)
+
+00-INDEX
+ - this file
+bootwrapper.txt
+ - Information on how the powerpc kernel is wrapped for boot on various
+ different platforms.
+cpu_features.txt
+ - info on how we support a variety of CPUs with minimal compile-time
+ options.
+cxl.txt
+ - Overview of the CXL driver.
+eeh-pci-error-recovery.txt
+ - info on PCI Bus EEH Error Recovery
+firmware-assisted-dump.txt
+ - Documentation on the firmware assisted dump mechanism "fadump".
+hvcs.txt
+ - IBM "Hypervisor Virtual Console Server" Installation Guide
+mpc52xx.txt
+ - Linux 2.6.x on MPC52xx family
+pmu-ebb.txt
+ - Description of the API for using the PMU with Event Based Branches.
+qe_firmware.txt
+ - describes the layout of firmware binaries for the Freescale QUICC
+ Engine and the code that parses and uploads the microcode therein.
+ptrace.txt
+ - Information on the ptrace interfaces for hardware debug registers.
+transactional_memory.txt
+ - Overview of the Power8 transactional memory support.
diff --git a/Documentation/powerpc/bootwrapper.txt b/Documentation/powerpc/bootwrapper.txt
new file mode 100644
index 000000000..d60fced5e
--- /dev/null
+++ b/Documentation/powerpc/bootwrapper.txt
@@ -0,0 +1,141 @@
+The PowerPC boot wrapper
+------------------------
+Copyright (C) Secret Lab Technologies Ltd.
+
+PowerPC image targets compresses and wraps the kernel image (vmlinux) with
+a boot wrapper to make it usable by the system firmware. There is no
+standard PowerPC firmware interface, so the boot wrapper is designed to
+be adaptable for each kind of image that needs to be built.
+
+The boot wrapper can be found in the arch/powerpc/boot/ directory. The
+Makefile in that directory has targets for all the available image types.
+The different image types are used to support all of the various firmware
+interfaces found on PowerPC platforms. OpenFirmware is the most commonly
+used firmware type on general purpose PowerPC systems from Apple, IBM and
+others. U-Boot is typically found on embedded PowerPC hardware, but there
+are a handful of other firmware implementations which are also popular. Each
+firmware interface requires a different image format.
+
+The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and
+it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
+image. The details of the build system is discussed in the next section.
+Currently, the following image format targets exist:
+
+ cuImage.%: Backwards compatible uImage for older version of
+ U-Boot (for versions that don't understand the device
+ tree). This image embeds a device tree blob inside
+ the image. The boot wrapper, kernel and device tree
+ are all embedded inside the U-Boot uImage file format
+ with boot wrapper code that extracts data from the old
+ bd_info structure and loads the data into the device
+ tree before jumping into the kernel.
+ Because of the series of #ifdefs found in the
+ bd_info structure used in the old U-Boot interfaces,
+ cuImages are platform specific. Each specific
+ U-Boot platform has a different platform init file
+ which populates the embedded device tree with data
+ from the platform specific bd_info file. The platform
+ specific cuImage platform init code can be found in
+ arch/powerpc/boot/cuboot.*.c. Selection of the correct
+ cuImage init code for a specific board can be found in
+ the wrapper structure.
+ dtbImage.%: Similar to zImage, except device tree blob is embedded
+ inside the image instead of provided by firmware. The
+ output image file can be either an elf file or a flat
+ binary depending on the platform.
+ dtbImages are used on systems which do not have an
+ interface for passing a device tree directly.
+ dtbImages are similar to simpleImages except that
+ dtbImages have platform specific code for extracting
+ data from the board firmware, but simpleImages do not
+ talk to the firmware at all.
+ PlayStation 3 support uses dtbImage. So do Embedded
+ Planet boards using the PlanetCore firmware. Board
+ specific initialization code is typically found in a
+ file named arch/powerpc/boot/<platform>.c; but this
+ can be overridden by the wrapper script.
+ simpleImage.%: Firmware independent compressed image that does not
+ depend on any particular firmware interface and embeds
+ a device tree blob. This image is a flat binary that
+ can be loaded to any location in RAM and jumped to.
+ Firmware cannot pass any configuration data to the
+ kernel with this image type and it depends entirely on
+ the embedded device tree for all information.
+ The simpleImage is useful for booting systems with
+ an unknown firmware interface or for booting from
+ a debugger when no firmware is present (such as on
+ the Xilinx Virtex platform). The only assumption that
+ simpleImage makes is that RAM is correctly initialized
+ and that the MMU is either off or has RAM mapped to
+ base address 0.
+ simpleImage also supports inserting special platform
+ specific initialization code to the start of the bootup
+ sequence. The virtex405 platform uses this feature to
+ ensure that the cache is invalidated before caching
+ is enabled. Platform specific initialization code is
+ added as part of the wrapper script and is keyed on
+ the image target name. For example, all
+ simpleImage.virtex405-* targets will add the
+ virtex405-head.S initialization code (This also means
+ that the dts file for virtex405 targets should be
+ named (virtex405-<board>.dts). Search the wrapper
+ script for 'virtex405' and see the file
+ arch/powerpc/boot/virtex405-head.S for details.
+ treeImage.%; Image format for used with OpenBIOS firmware found
+ on some ppc4xx hardware. This image embeds a device
+ tree blob inside the image.
+ uImage: Native image format used by U-Boot. The uImage target
+ does not add any boot code. It just wraps a compressed
+ vmlinux in the uImage data structure. This image
+ requires a version of U-Boot that is able to pass
+ a device tree to the kernel at boot. If using an older
+ version of U-Boot, then you need to use a cuImage
+ instead.
+ zImage.%: Image format which does not embed a device tree.
+ Used by OpenFirmware and other firmware interfaces
+ which are able to supply a device tree. This image
+ expects firmware to provide the device tree at boot.
+ Typically, if you have general purpose PowerPC
+ hardware then you want this image format.
+
+Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
+and cuImage) all generate the device tree blob from a file in the
+arch/powerpc/boot/dts/ directory. The Makefile selects the correct device
+tree source based on the name of the target. Therefore, if the kernel is
+built with 'make treeImage.walnut simpleImage.virtex405-ml403', then the
+build system will use arch/powerpc/boot/dts/walnut.dts to build
+treeImage.walnut and arch/powerpc/boot/dts/virtex405-ml403.dts to build
+the simpleImage.virtex405-ml403.
+
+Two special targets called 'zImage' and 'zImage.initrd' also exist. These
+targets build all the default images as selected by the kernel configuration.
+Default images are selected by the boot wrapper Makefile
+(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look
+at the Makefile to see which default image targets are available.
+
+How it is built
+---------------
+arch/powerpc is designed to support multiplatform kernels, which means
+that a single vmlinux image can be booted on many different target boards.
+It also means that the boot wrapper must be able to wrap for many kinds of
+images on a single build. The design decision was made to not use any
+conditional compilation code (#ifdef, etc) in the boot wrapper source code.
+All of the boot wrapper pieces are buildable at any time regardless of the
+kernel configuration. Building all the wrapper bits on every kernel build
+also ensures that obscure parts of the wrapper are at the very least compile
+tested in a large variety of environments.
+
+The wrapper is adapted for different image types at link time by linking in
+just the wrapper bits that are appropriate for the image type. The 'wrapper
+script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and
+is responsible for selecting the correct wrapper bits for the image type.
+The arguments are well documented in the script's comment block, so they
+are not repeated here. However, it is worth mentioning that the script
+uses the -p (platform) argument as the main method of deciding which wrapper
+bits to compile in. Look for the large 'case "$platform" in' block in the
+middle of the script. This is also the place where platform specific fixups
+can be selected by changing the link order.
+
+In particular, care should be taken when working with cuImages. cuImage
+wrapper bits are very board specific and care should be taken to make sure
+the target you are trying to build is supported by the wrapper bits.
diff --git a/Documentation/powerpc/cpu_families.txt b/Documentation/powerpc/cpu_families.txt
new file mode 100644
index 000000000..fc08e22fe
--- /dev/null
+++ b/Documentation/powerpc/cpu_families.txt
@@ -0,0 +1,221 @@
+CPU Families
+============
+
+This document tries to summarise some of the different cpu families that exist
+and are supported by arch/powerpc.
+
+
+Book3S (aka sPAPR)
+------------------
+
+ - Hash MMU
+ - Mix of 32 & 64 bit
+
+ +--------------+ +----------------+
+ | Old POWER | --------------> | RS64 (threads) |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+ +----------------+ +------+
+ | 601 | --------------> | 603 | ---> | e300 |
+ +--------------+ +----------------+ +------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+ +-------+
+ | 604 | | 750 (G3) | ---> | 750CX |
+ +--------------+ +----------------+ +-------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +----------------+ +-------+
+ | 620 (64 bit) | | 7400 | | 750CL |
+ +--------------+ +----------------+ +-------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +----------------+ +-------+
+ | POWER3/630 | | 7410 | | 750FX |
+ +--------------+ +----------------+ +-------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+
+ | POWER3+ | | 7450 |
+ +--------------+ +----------------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+
+ | POWER4 | | 7455 |
+ +--------------+ +----------------+
+ | |
+ | |
+ v v
+ +--------------+ +-------+ +----------------+
+ | POWER4+ | --> | 970 | | 7447 |
+ +--------------+ +-------+ +----------------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +-------+ +----------------+
+ | POWER5 | | 970FX | | 7448 |
+ +--------------+ +-------+ +----------------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +-------+ +----------------+
+ | POWER5+ | | 970MP | | e600 |
+ +--------------+ +-------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER5++ |
+ +--------------+
+ |
+ |
+ v
+ +--------------+ +-------+
+ | POWER6 | <-?-> | Cell |
+ +--------------+ +-------+
+ |
+ |
+ v
+ +--------------+
+ | POWER7 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER7+ |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER8 |
+ +--------------+
+
+
+ +---------------+
+ | PA6T (64 bit) |
+ +---------------+
+
+
+IBM BookE
+---------
+
+ - Software loaded TLB.
+ - All 32 bit
+
+ +--------------+
+ | 401 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 403 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 405 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 440 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+ +----------------+
+ | 450 | --> | BG/P |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | 460 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 476 |
+ +--------------+
+
+
+Motorola/Freescale 8xx
+----------------------
+
+ - Software loaded with hardware assist.
+ - All 32 bit
+
+ +-------------+
+ | MPC8xx Core |
+ +-------------+
+
+
+Freescale BookE
+---------------
+
+ - Software loaded TLB.
+ - e6500 adds HW loaded indirect TLB entries.
+ - Mix of 32 & 64 bit
+
+ +--------------+
+ | e200 |
+ +--------------+
+
+
+ +--------------------------------+
+ | e500 |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e500v2 |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e500mc (Book3e) |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e5500 (64 bit) |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e6500 (HW TLB) (Multithreaded) |
+ +--------------------------------+
+
+
+IBM A2 core
+-----------
+
+ - Book3E, software loaded TLB + HW loaded indirect TLB entries.
+ - 64 bit
+
+ +--------------+ +----------------+
+ | A2 core | --> | WSP |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | BG/Q |
+ +--------------+
diff --git a/Documentation/powerpc/cpu_features.txt b/Documentation/powerpc/cpu_features.txt
new file mode 100644
index 000000000..ae09df872
--- /dev/null
+++ b/Documentation/powerpc/cpu_features.txt
@@ -0,0 +1,56 @@
+Hollis Blanchard <hollis@austin.ibm.com>
+5 Jun 2002
+
+This document describes the system (including self-modifying code) used in the
+PPC Linux kernel to support a variety of PowerPC CPUs without requiring
+compile-time selection.
+
+Early in the boot process the ppc32 kernel detects the current CPU type and
+chooses a set of features accordingly. Some examples include Altivec support,
+split instruction and data caches, and if the CPU supports the DOZE and NAP
+sleep modes.
+
+Detection of the feature set is simple. A list of processors can be found in
+arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with
+each value in the list. If a match is found, the cpu_features of cur_cpu_spec
+is assigned to the feature bitmask for this processor and a __setup_cpu
+function is called.
+
+C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a
+particular feature bit. This is done in quite a few places, for example
+in ppc_setup_l2cr().
+
+Implementing cpufeatures in assembly is a little more involved. There are
+several paths that are performance-critical and would suffer if an array
+index, structure dereference, and conditional branch were added. To avoid the
+performance penalty but still allow for runtime (rather than compile-time) CPU
+selection, unused code is replaced by 'nop' instructions. This nop'ing is
+based on CPU 0's capabilities, so a multi-processor system with non-identical
+processors will not work (but such a system would likely have other problems
+anyways).
+
+After detecting the processor type, the kernel patches out sections of code
+that shouldn't be used by writing nop's over it. Using cpufeatures requires
+just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
+transfer_to_handler:
+
+ #ifdef CONFIG_ALTIVEC
+ BEGIN_FTR_SECTION
+ mfspr r22,SPRN_VRSAVE /* if G4, save vrsave register value */
+ stw r22,THREAD_VRSAVE(r23)
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ #endif /* CONFIG_ALTIVEC */
+
+If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both
+instructions are replaced with nop's.
+
+The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET
+and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in
+cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros
+should be used in the majority of cases.
+
+The END_FTR_SECTION macros are implemented by storing information about this
+code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups
+(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in
+__ftr_fixup, and if the required feature is not present it will loop writing
+nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION.
diff --git a/Documentation/powerpc/cxl.txt b/Documentation/powerpc/cxl.txt
new file mode 100644
index 000000000..2c71ecc51
--- /dev/null
+++ b/Documentation/powerpc/cxl.txt
@@ -0,0 +1,379 @@
+Coherent Accelerator Interface (CXL)
+====================================
+
+Introduction
+============
+
+ The coherent accelerator interface is designed to allow the
+ coherent connection of accelerators (FPGAs and other devices) to a
+ POWER system. These devices need to adhere to the Coherent
+ Accelerator Interface Architecture (CAIA).
+
+ IBM refers to this as the Coherent Accelerator Processor Interface
+ or CAPI. In the kernel it's referred to by the name CXL to avoid
+ confusion with the ISDN CAPI subsystem.
+
+ Coherent in this context means that the accelerator and CPUs can
+ both access system memory directly and with the same effective
+ addresses.
+
+
+Hardware overview
+=================
+
+ POWER8 FPGA
+ +----------+ +---------+
+ | | | |
+ | CPU | | AFU |
+ | | | |
+ | | | |
+ | | | |
+ +----------+ +---------+
+ | PHB | | |
+ | +------+ | PSL |
+ | | CAPP |<------>| |
+ +---+------+ PCIE +---------+
+
+ The POWER8 chip has a Coherently Attached Processor Proxy (CAPP)
+ unit which is part of the PCIe Host Bridge (PHB). This is managed
+ by Linux by calls into OPAL. Linux doesn't directly program the
+ CAPP.
+
+ The FPGA (or coherently attached device) consists of two parts.
+ The POWER Service Layer (PSL) and the Accelerator Function Unit
+ (AFU). The AFU is used to implement specific functionality behind
+ the PSL. The PSL, among other things, provides memory address
+ translation services to allow each AFU direct access to userspace
+ memory.
+
+ The AFU is the core part of the accelerator (eg. the compression,
+ crypto etc function). The kernel has no knowledge of the function
+ of the AFU. Only userspace interacts directly with the AFU.
+
+ The PSL provides the translation and interrupt services that the
+ AFU needs. This is what the kernel interacts with. For example, if
+ the AFU needs to read a particular effective address, it sends
+ that address to the PSL, the PSL then translates it, fetches the
+ data from memory and returns it to the AFU. If the PSL has a
+ translation miss, it interrupts the kernel and the kernel services
+ the fault. The context to which this fault is serviced is based on
+ who owns that acceleration function.
+
+
+AFU Modes
+=========
+
+ There are two programming modes supported by the AFU. Dedicated
+ and AFU directed. AFU may support one or both modes.
+
+ When using dedicated mode only one MMU context is supported. In
+ this mode, only one userspace process can use the accelerator at
+ time.
+
+ When using AFU directed mode, up to 16K simultaneous contexts can
+ be supported. This means up to 16K simultaneous userspace
+ applications may use the accelerator (although specific AFUs may
+ support fewer). In this mode, the AFU sends a 16 bit context ID
+ with each of its requests. This tells the PSL which context is
+ associated with each operation. If the PSL can't translate an
+ operation, the ID can also be accessed by the kernel so it can
+ determine the userspace context associated with an operation.
+
+
+MMIO space
+==========
+
+ A portion of the accelerator MMIO space can be directly mapped
+ from the AFU to userspace. Either the whole space can be mapped or
+ just a per context portion. The hardware is self describing, hence
+ the kernel can determine the offset and size of the per context
+ portion.
+
+
+Interrupts
+==========
+
+ AFUs may generate interrupts that are destined for userspace. These
+ are received by the kernel as hardware interrupts and passed onto
+ userspace by a read syscall documented below.
+
+ Data storage faults and error interrupts are handled by the kernel
+ driver.
+
+
+Work Element Descriptor (WED)
+=============================
+
+ The WED is a 64-bit parameter passed to the AFU when a context is
+ started. Its format is up to the AFU hence the kernel has no
+ knowledge of what it represents. Typically it will be the
+ effective address of a work queue or status block where the AFU
+ and userspace can share control and status information.
+
+
+
+
+User API
+========
+
+ For AFUs operating in AFU directed mode, two character device
+ files will be created. /dev/cxl/afu0.0m will correspond to a
+ master context and /dev/cxl/afu0.0s will correspond to a slave
+ context. Master contexts have access to the full MMIO space an
+ AFU provides. Slave contexts have access to only the per process
+ MMIO space an AFU provides.
+
+ For AFUs operating in dedicated process mode, the driver will
+ only create a single character device per AFU called
+ /dev/cxl/afu0.0d. This will have access to the entire MMIO space
+ that the AFU provides (like master contexts in AFU directed).
+
+ The types described below are defined in include/uapi/misc/cxl.h
+
+ The following file operations are supported on both slave and
+ master devices.
+
+
+open
+----
+
+ Opens the device and allocates a file descriptor to be used with
+ the rest of the API.
+
+ A dedicated mode AFU only has one context and only allows the
+ device to be opened once.
+
+ An AFU directed mode AFU can have many contexts, the device can be
+ opened once for each context that is available.
+
+ When all available contexts are allocated the open call will fail
+ and return -ENOSPC.
+
+ Note: IRQs need to be allocated for each context, which may limit
+ the number of contexts that can be created, and therefore
+ how many times the device can be opened. The POWER8 CAPP
+ supports 2040 IRQs and 3 are used by the kernel, so 2037 are
+ left. If 1 IRQ is needed per context, then only 2037
+ contexts can be allocated. If 4 IRQs are needed per context,
+ then only 2037/4 = 509 contexts can be allocated.
+
+
+ioctl
+-----
+
+ CXL_IOCTL_START_WORK:
+ Starts the AFU context and associates it with the current
+ process. Once this ioctl is successfully executed, all memory
+ mapped into this process is accessible to this AFU context
+ using the same effective addresses. No additional calls are
+ required to map/unmap memory. The AFU memory context will be
+ updated as userspace allocates and frees memory. This ioctl
+ returns once the AFU context is started.
+
+ Takes a pointer to a struct cxl_ioctl_start_work:
+
+ struct cxl_ioctl_start_work {
+ __u64 flags;
+ __u64 work_element_descriptor;
+ __u64 amr;
+ __s16 num_interrupts;
+ __s16 reserved1;
+ __s32 reserved2;
+ __u64 reserved3;
+ __u64 reserved4;
+ __u64 reserved5;
+ __u64 reserved6;
+ };
+
+ flags:
+ Indicates which optional fields in the structure are
+ valid.
+
+ work_element_descriptor:
+ The Work Element Descriptor (WED) is a 64-bit argument
+ defined by the AFU. Typically this is an effective
+ address pointing to an AFU specific structure
+ describing what work to perform.
+
+ amr:
+ Authority Mask Register (AMR), same as the powerpc
+ AMR. This field is only used by the kernel when the
+ corresponding CXL_START_WORK_AMR value is specified in
+ flags. If not specified the kernel will use a default
+ value of 0.
+
+ num_interrupts:
+ Number of userspace interrupts to request. This field
+ is only used by the kernel when the corresponding
+ CXL_START_WORK_NUM_IRQS value is specified in flags.
+ If not specified the minimum number required by the
+ AFU will be allocated. The min and max number can be
+ obtained from sysfs.
+
+ reserved fields:
+ For ABI padding and future extensions
+
+ CXL_IOCTL_GET_PROCESS_ELEMENT:
+ Get the current context id, also known as the process element.
+ The value is returned from the kernel as a __u32.
+
+
+mmap
+----
+
+ An AFU may have an MMIO space to facilitate communication with the
+ AFU. If it does, the MMIO space can be accessed via mmap. The size
+ and contents of this area are specific to the particular AFU. The
+ size can be discovered via sysfs.
+
+ In AFU directed mode, master contexts are allowed to map all of
+ the MMIO space and slave contexts are allowed to only map the per
+ process MMIO space associated with the context. In dedicated
+ process mode the entire MMIO space can always be mapped.
+
+ This mmap call must be done after the START_WORK ioctl.
+
+ Care should be taken when accessing MMIO space. Only 32 and 64-bit
+ accesses are supported by POWER8. Also, the AFU will be designed
+ with a specific endianness, so all MMIO accesses should consider
+ endianness (recommend endian(3) variants like: le64toh(),
+ be64toh() etc). These endian issues equally apply to shared memory
+ queues the WED may describe.
+
+
+read
+----
+
+ Reads events from the AFU. Blocks if no events are pending
+ (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
+ unrecoverable error or if the card is removed.
+
+ read() will always return an integral number of events.
+
+ The buffer passed to read() must be at least 4K bytes.
+
+ The result of the read will be a buffer of one or more events,
+ each event is of type struct cxl_event, of varying size.
+
+ struct cxl_event {
+ struct cxl_event_header header;
+ union {
+ struct cxl_event_afu_interrupt irq;
+ struct cxl_event_data_storage fault;
+ struct cxl_event_afu_error afu_error;
+ };
+ };
+
+ The struct cxl_event_header is defined as:
+
+ struct cxl_event_header {
+ __u16 type;
+ __u16 size;
+ __u16 process_element;
+ __u16 reserved1;
+ };
+
+ type:
+ This defines the type of event. The type determines how
+ the rest of the event is structured. These types are
+ described below and defined by enum cxl_event_type.
+
+ size:
+ This is the size of the event in bytes including the
+ struct cxl_event_header. The start of the next event can
+ be found at this offset from the start of the current
+ event.
+
+ process_element:
+ Context ID of the event.
+
+ reserved field:
+ For future extensions and padding.
+
+ If the event type is CXL_EVENT_AFU_INTERRUPT then the event
+ structure is defined as:
+
+ struct cxl_event_afu_interrupt {
+ __u16 flags;
+ __u16 irq; /* Raised AFU interrupt number */
+ __u32 reserved1;
+ };
+
+ flags:
+ These flags indicate which optional fields are present
+ in this struct. Currently all fields are mandatory.
+
+ irq:
+ The IRQ number sent by the AFU.
+
+ reserved field:
+ For future extensions and padding.
+
+ If the event type is CXL_EVENT_DATA_STORAGE then the event
+ structure is defined as:
+
+ struct cxl_event_data_storage {
+ __u16 flags;
+ __u16 reserved1;
+ __u32 reserved2;
+ __u64 addr;
+ __u64 dsisr;
+ __u64 reserved3;
+ };
+
+ flags:
+ These flags indicate which optional fields are present in
+ this struct. Currently all fields are mandatory.
+
+ address:
+ The address that the AFU unsuccessfully attempted to
+ access. Valid accesses will be handled transparently by the
+ kernel but invalid accesses will generate this event.
+
+ dsisr:
+ This field gives information on the type of fault. It is a
+ copy of the DSISR from the PSL hardware when the address
+ fault occurred. The form of the DSISR is as defined in the
+ CAIA.
+
+ reserved fields:
+ For future extensions
+
+ If the event type is CXL_EVENT_AFU_ERROR then the event structure
+ is defined as:
+
+ struct cxl_event_afu_error {
+ __u16 flags;
+ __u16 reserved1;
+ __u32 reserved2;
+ __u64 error;
+ };
+
+ flags:
+ These flags indicate which optional fields are present in
+ this struct. Currently all fields are Mandatory.
+
+ error:
+ Error status from the AFU. Defined by the AFU.
+
+ reserved fields:
+ For future extensions and padding
+
+Sysfs Class
+===========
+
+ A cxl sysfs class is added under /sys/class/cxl to facilitate
+ enumeration and tuning of the accelerators. Its layout is
+ described in Documentation/ABI/testing/sysfs-class-cxl
+
+Udev rules
+==========
+
+ The following udev rules could be used to create a symlink to the
+ most logical chardev to use in any programming mode (afuX.Yd for
+ dedicated, afuX.Ys for afu directed), since the API is virtually
+ identical for each:
+
+ SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
+ SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
+ KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
diff --git a/Documentation/powerpc/eeh-pci-error-recovery.txt b/Documentation/powerpc/eeh-pci-error-recovery.txt
new file mode 100644
index 000000000..9d4e33df6
--- /dev/null
+++ b/Documentation/powerpc/eeh-pci-error-recovery.txt
@@ -0,0 +1,334 @@
+
+
+ PCI Bus EEH Error Recovery
+ --------------------------
+ Linas Vepstas
+ <linas@austin.ibm.com>
+ 12 January 2005
+
+
+Overview:
+---------
+The IBM POWER-based pSeries and iSeries computers include PCI bus
+controller chips that have extended capabilities for detecting and
+reporting a large variety of PCI bus error conditions. These features
+go under the name of "EEH", for "Extended Error Handling". The EEH
+hardware features allow PCI bus errors to be cleared and a PCI
+card to be "rebooted", without also having to reboot the operating
+system.
+
+This is in contrast to traditional PCI error handling, where the
+PCI chip is wired directly to the CPU, and an error would cause
+a CPU machine-check/check-stop condition, halting the CPU entirely.
+Another "traditional" technique is to ignore such errors, which
+can lead to data corruption, both of user data or of kernel data,
+hung/unresponsive adapters, or system crashes/lockups. Thus,
+the idea behind EEH is that the operating system can become more
+reliable and robust by protecting it from PCI errors, and giving
+the OS the ability to "reboot"/recover individual PCI devices.
+
+Future systems from other vendors, based on the PCI-E specification,
+may contain similar features.
+
+
+Causes of EEH Errors
+--------------------
+EEH was originally designed to guard against hardware failure, such
+as PCI cards dying from heat, humidity, dust, vibration and bad
+electrical connections. The vast majority of EEH errors seen in
+"real life" are due to either poorly seated PCI cards, or,
+unfortunately quite commonly, due to device driver bugs, device firmware
+bugs, and sometimes PCI card hardware bugs.
+
+The most common software bug, is one that causes the device to
+attempt to DMA to a location in system memory that has not been
+reserved for DMA access for that card. This is a powerful feature,
+as it prevents what; otherwise, would have been silent memory
+corruption caused by the bad DMA. A number of device driver
+bugs have been found and fixed in this way over the past few
+years. Other possible causes of EEH errors include data or
+address line parity errors (for example, due to poor electrical
+connectivity due to a poorly seated card), and PCI-X split-completion
+errors (due to software, device firmware, or device PCI hardware bugs).
+The vast majority of "true hardware failures" can be cured by
+physically removing and re-seating the PCI card.
+
+
+Detection and Recovery
+----------------------
+In the following discussion, a generic overview of how to detect
+and recover from EEH errors will be presented. This is followed
+by an overview of how the current implementation in the Linux
+kernel does it. The actual implementation is subject to change,
+and some of the finer points are still being debated. These
+may in turn be swayed if or when other architectures implement
+similar functionality.
+
+When a PCI Host Bridge (PHB, the bus controller connecting the
+PCI bus to the system CPU electronics complex) detects a PCI error
+condition, it will "isolate" the affected PCI card. Isolation
+will block all writes (either to the card from the system, or
+from the card to the system), and it will cause all reads to
+return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads).
+This value was chosen because it is the same value you would
+get if the device was physically unplugged from the slot.
+This includes access to PCI memory, I/O space, and PCI config
+space. Interrupts; however, will continued to be delivered.
+
+Detection and recovery are performed with the aid of ppc64
+firmware. The programming interfaces in the Linux kernel
+into the firmware are referred to as RTAS (Run-Time Abstraction
+Services). The Linux kernel does not (should not) access
+the EEH function in the PCI chipsets directly, primarily because
+there are a number of different chipsets out there, each with
+different interfaces and quirks. The firmware provides a
+uniform abstraction layer that will work with all pSeries
+and iSeries hardware (and be forwards-compatible).
+
+If the OS or device driver suspects that a PCI slot has been
+EEH-isolated, there is a firmware call it can make to determine if
+this is the case. If so, then the device driver should put itself
+into a consistent state (given that it won't be able to complete any
+pending work) and start recovery of the card. Recovery normally
+would consist of resetting the PCI device (holding the PCI #RST
+line high for two seconds), followed by setting up the device
+config space (the base address registers (BAR's), latency timer,
+cache line size, interrupt line, and so on). This is followed by a
+reinitialization of the device driver. In a worst-case scenario,
+the power to the card can be toggled, at least on hot-plug-capable
+slots. In principle, layers far above the device driver probably
+do not need to know that the PCI card has been "rebooted" in this
+way; ideally, there should be at most a pause in Ethernet/disk/USB
+I/O while the card is being reset.
+
+If the card cannot be recovered after three or four resets, the
+kernel/device driver should assume the worst-case scenario, that the
+card has died completely, and report this error to the sysadmin.
+In addition, error messages are reported through RTAS and also through
+syslogd (/var/log/messages) to alert the sysadmin of PCI resets.
+The correct way to deal with failed adapters is to use the standard
+PCI hotplug tools to remove and replace the dead card.
+
+
+Current PPC64 Linux EEH Implementation
+--------------------------------------
+At this time, a generic EEH recovery mechanism has been implemented,
+so that individual device drivers do not need to be modified to support
+EEH recovery. This generic mechanism piggy-backs on the PCI hotplug
+infrastructure, and percolates events up through the userspace/udev
+infrastructure. Following is a detailed description of how this is
+accomplished.
+
+EEH must be enabled in the PHB's very early during the boot process,
+and if a PCI slot is hot-plugged. The former is performed by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
+drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
+EEH must be enabled before a PCI scan of the device can proceed.
+Current Power5 hardware will not work unless EEH is enabled;
+although older Power4 can run with it disabled. Effectively,
+EEH can no longer be turned off. PCI devices *must* be
+registered with the EEH code; the EEH code needs to know about
+the I/O address ranges of the PCI device in order to detect an
+error. Given an arbitrary address, the routine
+pci_get_device_by_addr() will find the pci device associated
+with that address (if any).
+
+The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(),
+etc. include a check to see if the i/o read returned all-0xff's.
+If so, these make a call to eeh_dn_check_failure(), which in turn
+asks the firmware if the all-ff's value is the sign of a true EEH
+error. If it is not, processing continues as normal. The grand
+total number of these false alarms or "false positives" can be
+seen in /proc/ppc64/eeh (subject to change). Normally, almost
+all of these occur during boot, when the PCI bus is scanned, where
+a large number of 0xff reads are part of the bus scan procedure.
+
+If a frozen slot is detected, code in
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
+syslog (/var/log/messages). This stack trace has proven to be very
+useful to device-driver authors for finding out at what point the EEH
+error was detected, as the error itself usually occurs slightly
+beforehand.
+
+Next, it uses the Linux kernel notifier chain/work queue mechanism to
+allow any interested parties to find out about the failure. Device
+drivers, or other parts of the kernel, can use
+eeh_register_notifier(struct notifier_block *) to find out about EEH
+events. The event will include a pointer to the pci device, the
+device node and some state info. Receivers of the event can "do as
+they wish"; the default handler will be described further in this
+section.
+
+To assist in the recovery of the device, eeh.c exports the
+following functions:
+
+rtas_set_slot_reset() -- assert the PCI #RST line for 1/8th of a second
+rtas_configure_bridge() -- ask firmware to configure any PCI bridges
+ located topologically under the pci slot.
+eeh_save_bars() and eeh_restore_bars(): save and restore the PCI
+ config-space info for a device and any devices under it.
+
+
+A handler for the EEH notifier_block events is implemented in
+drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
+It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
+This last call causes the device driver for the card to be stopped,
+which causes uevents to go out to user space. This triggers
+user-space scripts that might issue commands such as "ifdown eth0"
+for ethernet cards, and so on. This handler then sleeps for 5 seconds,
+hoping to give the user-space scripts enough time to complete.
+It then resets the PCI card, reconfigures the device BAR's, and
+any bridges underneath. It then calls rpaphp_enable_pci_slot(),
+which restarts the device driver and triggers more user-space
+events (for example, calling "ifup eth0" for ethernet cards).
+
+
+Device Shutdown and User-Space Events
+-------------------------------------
+This section documents what happens when a pci slot is unconfigured,
+focusing on how the device driver gets shut down, and on how the
+events get delivered to user-space scripts.
+
+Following is an example sequence of events that cause a device driver
+close function to be called during the first phase of an EEH reset.
+The following sequence is an example of the pcnet32 device driver.
+
+ rpa_php_unconfig_pci_adapter (struct slot *) // in rpaphp_pci.c
+ {
+ calls
+ pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c
+ {
+ calls
+ pci_destroy_dev (struct pci_dev *)
+ {
+ calls
+ device_unregister (&dev->dev) // in /drivers/base/core.c
+ {
+ calls
+ device_del (struct device *)
+ {
+ calls
+ bus_remove_device() // in /drivers/base/bus.c
+ {
+ calls
+ device_release_driver()
+ {
+ calls
+ struct device_driver->remove() which is just
+ pci_device_remove() // in /drivers/pci/pci_driver.c
+ {
+ calls
+ struct pci_driver->remove() which is just
+ pcnet32_remove_one() // in /drivers/net/pcnet32.c
+ {
+ calls
+ unregister_netdev() // in /net/core/dev.c
+ {
+ calls
+ dev_close() // in /net/core/dev.c
+ {
+ calls dev->stop();
+ which is just pcnet32_close() // in pcnet32.c
+ {
+ which does what you wanted
+ to stop the device
+ }
+ }
+ }
+ which
+ frees pcnet32 device driver memory
+ }
+ }}}}}}
+
+
+ in drivers/pci/pci_driver.c,
+ struct device_driver->remove() is just pci_device_remove()
+ which calls struct pci_driver->remove() which is pcnet32_remove_one()
+ which calls unregister_netdev() (in net/core/dev.c)
+ which calls dev_close() (in net/core/dev.c)
+ which calls dev->stop() which is pcnet32_close()
+ which then does the appropriate shutdown.
+
+---
+Following is the analogous stack trace for events sent to user-space
+when the pci device is unconfigured.
+
+rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c
+ calls
+ pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
+ calls
+ pci_destroy_dev (struct pci_dev *) {
+ calls
+ device_unregister (&dev->dev) { // in /drivers/base/core.c
+ calls
+ device_del(struct device * dev) { // in /drivers/base/core.c
+ calls
+ kobject_del() { //in /libs/kobject.c
+ calls
+ kobject_uevent() { // in /libs/kobject.c
+ calls
+ kset_uevent() { // in /lib/kobject.c
+ calls
+ kset->uevent_ops->uevent() // which is really just
+ a call to
+ dev_uevent() { // in /drivers/base/core.c
+ calls
+ dev->bus->uevent() which is really just a call to
+ pci_uevent () { // in drivers/pci/hotplug.c
+ which prints device name, etc....
+ }
+ }
+ then kobject_uevent() sends a netlink uevent to userspace
+ --> userspace uevent
+ (during early boot, nobody listens to netlink events and
+ kobject_uevent() executes uevent_helper[], which runs the
+ event process /sbin/hotplug)
+ }
+ }
+ kobject_del() then calls sysfs_remove_dir(), which would
+ trigger any user-space daemon that was watching /sysfs,
+ and notice the delete event.
+
+
+Pro's and Con's of the Current Design
+-------------------------------------
+There are several issues with the current EEH software recovery design,
+which may be addressed in future revisions. But first, note that the
+big plus of the current design is that no changes need to be made to
+individual device drivers, so that the current design throws a wide net.
+The biggest negative of the design is that it potentially disturbs
+network daemons and file systems that didn't need to be disturbed.
+
+-- A minor complaint is that resetting the network card causes
+ user-space back-to-back ifdown/ifup burps that potentially disturb
+ network daemons, that didn't need to even know that the pci
+ card was being rebooted.
+
+-- A more serious concern is that the same reset, for SCSI devices,
+ causes havoc to mounted file systems. Scripts cannot post-facto
+ unmount a file system without flushing pending buffers, but this
+ is impossible, because I/O has already been stopped. Thus,
+ ideally, the reset should happen at or below the block layer,
+ so that the file systems are not disturbed.
+
+ Reiserfs does not tolerate errors returned from the block device.
+ Ext3fs seems to be tolerant, retrying reads/writes until it does
+ succeed. Both have been only lightly tested in this scenario.
+
+ The SCSI-generic subsystem already has built-in code for performing
+ SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter
+ (HBA) resets. These are cascaded into a chain of attempted
+ resets if a SCSI command fails. These are completely hidden
+ from the block layer. It would be very natural to add an EEH
+ reset into this chain of events.
+
+-- If a SCSI error occurs for the root device, all is lost unless
+ the sysadmin had the foresight to run /bin, /sbin, /etc, /var
+ and so on, out of ramdisk/tmpfs.
+
+
+Conclusions
+-----------
+There's forward progress ...
+
+
diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt
new file mode 100644
index 000000000..3007bc98a
--- /dev/null
+++ b/Documentation/powerpc/firmware-assisted-dump.txt
@@ -0,0 +1,270 @@
+
+ Firmware-Assisted Dump
+ ------------------------
+ July 2011
+
+The goal of firmware-assisted dump is to enable the dump of
+a crashed system, and to do so from a fully-reset system, and
+to minimize the total elapsed time until the system is back
+in production use.
+
+- Firmware assisted dump (fadump) infrastructure is intended to replace
+ the existing phyp assisted dump.
+- Fadump uses the same firmware interfaces and memory reservation model
+ as phyp assisted dump.
+- Unlike phyp dump, fadump exports the memory dump through /proc/vmcore
+ in the ELF format in the same way as kdump. This helps us reuse the
+ kdump infrastructure for dump capture and filtering.
+- Unlike phyp dump, userspace tool does not need to refer any sysfs
+ interface while reading /proc/vmcore.
+- Unlike phyp dump, fadump allows user to release all the memory reserved
+ for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
+- Once enabled through kernel boot parameter, fadump can be
+ started/stopped through /sys/kernel/fadump_registered interface (see
+ sysfs files section below) and can be easily integrated with kdump
+ service start/stop init scripts.
+
+Comparing with kdump or other strategies, firmware-assisted
+dump offers several strong, practical advantages:
+
+-- Unlike kdump, the system has been reset, and loaded
+ with a fresh copy of the kernel. In particular,
+ PCI and I/O devices have been reinitialized and are
+ in a clean, consistent state.
+-- Once the dump is copied out, the memory that held the dump
+ is immediately available to the running kernel. And therefore,
+ unlike kdump, fadump doesn't need a 2nd reboot to get back
+ the system to the production configuration.
+
+The above can only be accomplished by coordination with,
+and assistance from the Power firmware. The procedure is
+as follows:
+
+-- The first kernel registers the sections of memory with the
+ Power firmware for dump preservation during OS initialization.
+ These registered sections of memory are reserved by the first
+ kernel during early boot.
+
+-- When a system crashes, the Power firmware will save
+ the low memory (boot memory of size larger of 5% of system RAM
+ or 256MB) of RAM to the previous registered region. It will
+ also save system registers, and hardware PTE's.
+
+ NOTE: The term 'boot memory' means size of the low memory chunk
+ that is required for a kernel to boot successfully when
+ booted with restricted memory. By default, the boot memory
+ size will be the larger of 5% of system RAM or 256MB.
+ Alternatively, user can also specify boot memory size
+ through boot parameter 'fadump_reserve_mem=' which will
+ override the default calculated size. Use this option
+ if default boot memory size is not sufficient for second
+ kernel to boot successfully.
+
+-- After the low memory (boot memory) area has been saved, the
+ firmware will reset PCI and other hardware state. It will
+ *not* clear the RAM. It will then launch the bootloader, as
+ normal.
+
+-- The freshly booted kernel will notice that there is a new
+ node (ibm,dump-kernel) in the device tree, indicating that
+ there is crash data available from a previous boot. During
+ the early boot OS will reserve rest of the memory above
+ boot memory size effectively booting with restricted memory
+ size. This will make sure that the second kernel will not
+ touch any of the dump memory area.
+
+-- User-space tools will read /proc/vmcore to obtain the contents
+ of memory, which holds the previous crashed kernel dump in ELF
+ format. The userspace tools may copy this info to disk, or
+ network, nas, san, iscsi, etc. as desired.
+
+-- Once the userspace tool is done saving dump, it will echo
+ '1' to /sys/kernel/fadump_release_mem to release the reserved
+ memory back to general use, except the memory required for
+ next firmware-assisted dump registration.
+
+ e.g.
+ # echo 1 > /sys/kernel/fadump_release_mem
+
+Please note that the firmware-assisted dump feature
+is only available on Power6 and above systems with recent
+firmware versions.
+
+Implementation details:
+----------------------
+
+During boot, a check is made to see if firmware supports
+this feature on that particular machine. If it does, then
+we check to see if an active dump is waiting for us. If yes
+then everything but boot memory size of RAM is reserved during
+early boot (See Fig. 2). This area is released once we finish
+collecting the dump from user land scripts (e.g. kdump scripts)
+that are run. If there is dump data, then the
+/sys/kernel/fadump_release_mem file is created, and the reserved
+memory is held.
+
+If there is no waiting dump data, then only the memory required
+to hold CPU state, HPTE region, boot memory dump and elfcore
+header, is reserved at the top of memory (see Fig. 1). This area
+is *not* released: this region will be kept permanently reserved,
+so that it can act as a receptacle for a copy of the boot memory
+content in addition to CPU state and HPTE region, in the case a
+crash does occur.
+
+ o Memory Reservation during first kernel
+
+ Low memory Top of memory
+ 0 boot memory size |
+ | | |<--Reserved dump area -->|
+ V V | Permanent Reservation V
+ +-----------+----------/ /----------+---+----+-----------+----+
+ | | |CPU|HPTE| DUMP |ELF |
+ +-----------+----------/ /----------+---+----+-----------+----+
+ | ^
+ | |
+ \ /
+ -------------------------------------------
+ Boot memory content gets transferred to
+ reserved area by firmware at the time of
+ crash
+ Fig. 1
+
+ o Memory Reservation during second kernel after crash
+
+ Low memory Top of memory
+ 0 boot memory size |
+ | |<------------- Reserved dump area ----------- -->|
+ V V V
+ +-----------+----------/ /----------+---+----+-----------+----+
+ | | |CPU|HPTE| DUMP |ELF |
+ +-----------+----------/ /----------+---+----+-----------+----+
+ | |
+ V V
+ Used by second /proc/vmcore
+ kernel to boot
+ Fig. 2
+
+Currently the dump will be copied from /proc/vmcore to a
+a new file upon user intervention. The dump data available through
+/proc/vmcore will be in ELF format. Hence the existing kdump
+infrastructure (kdump scripts) to save the dump works fine with
+minor modifications.
+
+The tools to examine the dump will be same as the ones
+used for kdump.
+
+How to enable firmware-assisted dump (fadump):
+-------------------------------------
+
+1. Set config option CONFIG_FA_DUMP=y and build kernel.
+2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
+3. Optionally, user can also set 'fadump_reserve_mem=' kernel cmdline
+ to specify size of the memory to reserve for boot memory dump
+ preservation.
+
+NOTE: If firmware-assisted dump fails to reserve memory then it will
+ fallback to existing kdump mechanism if 'crashkernel=' option
+ is set at kernel cmdline.
+
+Sysfs/debugfs files:
+------------
+
+Firmware-assisted dump feature uses sysfs file system to hold
+the control files and debugfs file to display memory reserved region.
+
+Here is the list of files under kernel sysfs:
+
+ /sys/kernel/fadump_enabled
+
+ This is used to display the fadump status.
+ 0 = fadump is disabled
+ 1 = fadump is enabled
+
+ This interface can be used by kdump init scripts to identify if
+ fadump is enabled in the kernel and act accordingly.
+
+ /sys/kernel/fadump_registered
+
+ This is used to display the fadump registration status as well
+ as to control (start/stop) the fadump registration.
+ 0 = fadump is not registered.
+ 1 = fadump is registered and ready to handle system crash.
+
+ To register fadump echo 1 > /sys/kernel/fadump_registered and
+ echo 0 > /sys/kernel/fadump_registered for un-register and stop the
+ fadump. Once the fadump is un-registered, the system crash will not
+ be handled and vmcore will not be captured. This interface can be
+ easily integrated with kdump service start/stop.
+
+ /sys/kernel/fadump_release_mem
+
+ This file is available only when fadump is active during
+ second kernel. This is used to release the reserved memory
+ region that are held for saving crash dump. To release the
+ reserved memory echo 1 to it:
+
+ echo 1 > /sys/kernel/fadump_release_mem
+
+ After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
+ file will change to reflect the new memory reservations.
+
+ The existing userspace tools (kdump infrastructure) can be easily
+ enhanced to use this interface to release the memory reserved for
+ dump and continue without 2nd reboot.
+
+Here is the list of files under powerpc debugfs:
+(Assuming debugfs is mounted on /sys/kernel/debug directory.)
+
+ /sys/kernel/debug/powerpc/fadump_region
+
+ This file shows the reserved memory regions if fadump is
+ enabled otherwise this file is empty. The output format
+ is:
+ <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
+
+ e.g.
+ Contents when fadump is registered during first kernel
+
+ # cat /sys/kernel/debug/powerpc/fadump_region
+ CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
+ HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
+ DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
+
+ Contents when fadump is active during second kernel
+
+ # cat /sys/kernel/debug/powerpc/fadump_region
+ CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
+ HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
+ DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
+ : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
+
+NOTE: Please refer to Documentation/filesystems/debugfs.txt on
+ how to mount the debugfs filesystem.
+
+
+TODO:
+-----
+ o Need to come up with the better approach to find out more
+ accurate boot memory size that is required for a kernel to
+ boot successfully when booted with restricted memory.
+ o The fadump implementation introduces a fadump crash info structure
+ in the scratch area before the ELF core header. The idea of introducing
+ this structure is to pass some important crash info data to the second
+ kernel which will help second kernel to populate ELF core header with
+ correct data before it gets exported through /proc/vmcore. The current
+ design implementation does not address a possibility of introducing
+ additional fields (in future) to this structure without affecting
+ compatibility. Need to come up with the better approach to address this.
+ The possible approaches are:
+ 1. Introduce version field for version tracking, bump up the version
+ whenever a new field is added to the structure in future. The version
+ field can be used to find out what fields are valid for the current
+ version of the structure.
+ 2. Reserve the area of predefined size (say PAGE_SIZE) for this
+ structure and have unused area as reserved (initialized to zero)
+ for future field additions.
+ The advantage of approach 1 over 2 is we don't need to reserve extra space.
+---
+Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+This document is based on the original documentation written for phyp
+assisted dump by Linas Vepstas and Manish Ahuja.
diff --git a/Documentation/powerpc/hvcs.txt b/Documentation/powerpc/hvcs.txt
new file mode 100644
index 000000000..a730ca5a0
--- /dev/null
+++ b/Documentation/powerpc/hvcs.txt
@@ -0,0 +1,567 @@
+===========================================================================
+ HVCS
+ IBM "Hypervisor Virtual Console Server" Installation Guide
+ for Linux Kernel 2.6.4+
+ Copyright (C) 2004 IBM Corporation
+
+===========================================================================
+NOTE:Eight space tabs are the optimum editor setting for reading this file.
+===========================================================================
+
+ Author(s) : Ryan S. Arnold <rsa@us.ibm.com>
+ Date Created: March, 02, 2004
+ Last Changed: August, 24, 2004
+
+---------------------------------------------------------------------------
+Table of contents:
+
+ 1. Driver Introduction:
+ 2. System Requirements
+ 3. Build Options:
+ 3.1 Built-in:
+ 3.2 Module:
+ 4. Installation:
+ 5. Connection:
+ 6. Disconnection:
+ 7. Configuration:
+ 8. Questions & Answers:
+ 9. Reporting Bugs:
+
+---------------------------------------------------------------------------
+1. Driver Introduction:
+
+This is the device driver for the IBM Hypervisor Virtual Console Server,
+"hvcs". The IBM hvcs provides a tty driver interface to allow Linux user
+space applications access to the system consoles of logically partitioned
+operating systems (Linux and AIX) running on the same partitioned Power5
+ppc64 system. Physical hardware consoles per partition are not practical
+on this hardware so system consoles are accessed by this driver using
+firmware interfaces to virtual terminal devices.
+
+---------------------------------------------------------------------------
+2. System Requirements:
+
+This device driver was written using 2.6.4 Linux kernel APIs and will only
+build and run on kernels of this version or later.
+
+This driver was written to operate solely on IBM Power5 ppc64 hardware
+though some care was taken to abstract the architecture dependent firmware
+calls from the driver code.
+
+Sysfs must be mounted on the system so that the user can determine which
+major and minor numbers are associated with each vty-server. Directions
+for sysfs mounting are outside the scope of this document.
+
+---------------------------------------------------------------------------
+3. Build Options:
+
+The hvcs driver registers itself as a tty driver. The tty layer
+dynamically allocates a block of major and minor numbers in a quantity
+requested by the registering driver. The hvcs driver asks the tty layer
+for 64 of these major/minor numbers by default to use for hvcs device node
+entries.
+
+If the default number of device entries is adequate then this driver can be
+built into the kernel. If not, the default can be over-ridden by inserting
+the driver as a module with insmod parameters.
+
+---------------------------------------------------------------------------
+3.1 Built-in:
+
+The following menuconfig example demonstrates selecting to build this
+driver into the kernel.
+
+ Device Drivers --->
+ Character devices --->
+ <*> IBM Hypervisor Virtual Console Server Support
+
+Begin the kernel make process.
+
+---------------------------------------------------------------------------
+3.2 Module:
+
+The following menuconfig example demonstrates selecting to build this
+driver as a kernel module.
+
+ Device Drivers --->
+ Character devices --->
+ <M> IBM Hypervisor Virtual Console Server Support
+
+The make process will build the following kernel modules:
+
+ hvcs.ko
+ hvcserver.ko
+
+To insert the module with the default allocation execute the following
+commands in the order they appear:
+
+ insmod hvcserver.ko
+ insmod hvcs.ko
+
+The hvcserver module contains architecture specific firmware calls and must
+be inserted first, otherwise the hvcs module will not find some of the
+symbols it expects.
+
+To override the default use an insmod parameter as follows (requesting 4
+tty devices as an example):
+
+ insmod hvcs.ko hvcs_parm_num_devs=4
+
+There is a maximum number of dev entries that can be specified on insmod.
+We think that 1024 is currently a decent maximum number of server adapters
+to allow. This can always be changed by modifying the constant in the
+source file before building.
+
+NOTE: The length of time it takes to insmod the driver seems to be related
+to the number of tty interfaces the registering driver requests.
+
+In order to remove the driver module execute the following command:
+
+ rmmod hvcs.ko
+
+The recommended method for installing hvcs as a module is to use depmod to
+build a current modules.dep file in /lib/modules/`uname -r` and then
+execute:
+
+modprobe hvcs hvcs_parm_num_devs=4
+
+The modules.dep file indicates that hvcserver.ko needs to be inserted
+before hvcs.ko and modprobe uses this file to smartly insert the modules in
+the proper order.
+
+The following modprobe command is used to remove hvcs and hvcserver in the
+proper order:
+
+modprobe -r hvcs
+
+---------------------------------------------------------------------------
+4. Installation:
+
+The tty layer creates sysfs entries which contain the major and minor
+numbers allocated for the hvcs driver. The following snippet of "tree"
+output of the sysfs directory shows where these numbers are presented:
+
+ sys/
+ |-- *other sysfs base dirs*
+ |
+ |-- class
+ | |-- *other classes of devices*
+ | |
+ | `-- tty
+ | |-- *other tty devices*
+ | |
+ | |-- hvcs0
+ | | `-- dev
+ | |-- hvcs1
+ | | `-- dev
+ | |-- hvcs2
+ | | `-- dev
+ | |-- hvcs3
+ | | `-- dev
+ | |
+ | |-- *other tty devices*
+ |
+ |-- *other sysfs base dirs*
+
+For the above examples the following output is a result of cat'ing the
+"dev" entry in the hvcs directory:
+
+ Pow5:/sys/class/tty/hvcs0/ # cat dev
+ 254:0
+
+ Pow5:/sys/class/tty/hvcs1/ # cat dev
+ 254:1
+
+ Pow5:/sys/class/tty/hvcs2/ # cat dev
+ 254:2
+
+ Pow5:/sys/class/tty/hvcs3/ # cat dev
+ 254:3
+
+The output from reading the "dev" attribute is the char device major and
+minor numbers that the tty layer has allocated for this driver's use. Most
+systems running hvcs will already have the device entries created or udev
+will do it automatically.
+
+Given the example output above, to manually create a /dev/hvcs* node entry
+mknod can be used as follows:
+
+ mknod /dev/hvcs0 c 254 0
+ mknod /dev/hvcs1 c 254 1
+ mknod /dev/hvcs2 c 254 2
+ mknod /dev/hvcs3 c 254 3
+
+Using mknod to manually create the device entries makes these device nodes
+persistent. Once created they will exist prior to the driver insmod.
+
+Attempting to connect an application to /dev/hvcs* prior to insertion of
+the hvcs module will result in an error message similar to the following:
+
+ "/dev/hvcs*: No such device".
+
+NOTE: Just because there is a device node present doesn't mean that there
+is a vty-server device configured for that node.
+
+---------------------------------------------------------------------------
+5. Connection
+
+Since this driver controls devices that provide a tty interface a user can
+interact with the device node entries using any standard tty-interactive
+method (e.g. "cat", "dd", "echo"). The intent of this driver however, is
+to provide real time console interaction with a Linux partition's console,
+which requires the use of applications that provide bi-directional,
+interactive I/O with a tty device.
+
+Applications (e.g. "minicom" and "screen") that act as terminal emulators
+or perform terminal type control sequence conversion on the data being
+passed through them are NOT acceptable for providing interactive console
+I/O. These programs often emulate antiquated terminal types (vt100 and
+ANSI) and expect inbound data to take the form of one of these supported
+terminal types but they either do not convert, or do not _adequately_
+convert, outbound data into the terminal type of the terminal which invoked
+them (though screen makes an attempt and can apparently be configured with
+much termcap wrestling.)
+
+For this reason kermit and cu are two of the recommended applications for
+interacting with a Linux console via an hvcs device. These programs simply
+act as a conduit for data transfer to and from the tty device. They do not
+require inbound data to take the form of a particular terminal type, nor do
+they cook outbound data to a particular terminal type.
+
+In order to ensure proper functioning of console applications one must make
+sure that once connected to a /dev/hvcs console that the console's $TERM
+env variable is set to the exact terminal type of the terminal emulator
+used to launch the interactive I/O application. If one is using xterm and
+kermit to connect to /dev/hvcs0 when the console prompt becomes available
+one should "export TERM=xterm" on the console. This tells ncurses
+applications that are invoked from the console that they should output
+control sequences that xterm can understand.
+
+As a precautionary measure an hvcs user should always "exit" from their
+session before disconnecting an application such as kermit from the device
+node. If this is not done, the next user to connect to the console will
+continue using the previous user's logged in session which includes
+using the $TERM variable that the previous user supplied.
+
+Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node
+is used to connect to each vty-server adapter. In order to determine which
+vty-server adapter is associated with which /dev/hvcs* node a special sysfs
+attribute has been added to each vty-server sysfs entry. This entry is
+called "index" and showing it reveals an integer that refers to the
+/dev/hvcs* entry to use to connect to that device. For instance cating the
+index attribute of vty-server adapter 30000004 shows the following.
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
+ 2
+
+This index of '2' means that in order to connect to vty-server adapter
+30000004 the user should interact with /dev/hvcs2.
+
+It should be noted that due to the system hotplug I/O capabilities of a
+system the /dev/hvcs* entry that interacts with a particular vty-server
+adapter is not guaranteed to remain the same across system reboots. Look
+in the Q & A section for more on this issue.
+
+---------------------------------------------------------------------------
+6. Disconnection
+
+As a security feature to prevent the delivery of stale data to an
+unintended target the Power5 system firmware disables the fetching of data
+and discards that data when a connection between a vty-server and a vty has
+been severed. As an example, when a vty-server is immediately disconnected
+from a vty following output of data to the vty the vty adapter may not have
+enough time between when it received the data interrupt and when the
+connection was severed to fetch the data from firmware before the fetch is
+disabled by firmware.
+
+When hvcs is being used to serve consoles this behavior is not a huge issue
+because the adapter stays connected for large amounts of time following
+almost all data writes. When hvcs is being used as a tty conduit to tunnel
+data between two partitions [see Q & A below] this is a huge problem
+because the standard Linux behavior when cat'ing or dd'ing data to a device
+is to open the tty, send the data, and then close the tty. If this driver
+manually terminated vty-server connections on tty close this would close
+the vty-server and vty connection before the target vty has had a chance to
+fetch the data.
+
+Additionally, disconnecting a vty-server and vty only on module removal or
+adapter removal is impractical because other vty-servers in other
+partitions may require the usage of the target vty at any time.
+
+Due to this behavioral restriction disconnection of vty-servers from the
+connected vty is a manual procedure using a write to a sysfs attribute
+outlined below, on the other hand the initial vty-server connection to a
+vty is established automatically by this driver. Manual vty-server
+connection is never required.
+
+In order to terminate the connection between a vty-server and vty the
+"vterm_state" sysfs attribute within each vty-server's sysfs entry is used.
+Reading this attribute reveals the current connection state of the
+vty-server adapter. A zero means that the vty-server is not connected to a
+vty. A one indicates that a connection is active.
+
+Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM
+connection between the vty-server and target vty ONLY if the vterm_state
+previously read '1'. The write directive is ignored if the vterm_state
+read '0' or if any value other than '0' was written to the vterm_state
+attribute. The following example will show the method used for verifying
+the vty-server connection status and disconnecting a vty-server connection.
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+ 1
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+ 0
+
+All vty-server connections are automatically terminated when the device is
+hotplug removed and when the module is removed.
+
+---------------------------------------------------------------------------
+7. Configuration
+
+Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
+is symlinked in several other sysfs tree directories, notably under the
+hvcs driver entry, which looks like the following example:
+
+ Pow5:/sys/bus/vio/drivers/hvcs # ls
+ . .. 30000003 30000004 rescan
+
+By design, firmware notifies the hvcs driver of vty-server lifetimes and
+partner vty removals but not the addition of partner vtys. Since an HMC
+Super Admin can add partner info dynamically we have provided the hvcs
+driver sysfs directory with the "rescan" update attribute which will query
+firmware and update the partner info for all the vty-servers that this
+driver manages. Writing a '1' to the attribute triggers the update. An
+explicit example follows:
+
+ Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan
+
+Reading the attribute will indicate a state of '1' or '0'. A one indicates
+that an update is in process. A zero indicates that an update has
+completed or was never executed.
+
+Vty-server entries in this directory are a 32 bit partition unique unit
+address that is created by firmware. An example vty-server sysfs entry
+looks like the following:
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
+ . current_vty devspec name partner_vtys
+ .. index partner_clcs vterm_state
+
+Each entry is provided, by default with a "name" attribute. Reading the
+"name" attribute will reveal the device type as shown in the following
+example:
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
+ vty-server
+
+Each entry is also provided, by default, with a "devspec" attribute which
+reveals the full device specification when read, as shown in the following
+example:
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
+ /vdevice/vty-server@30000004
+
+Each vty-server sysfs dir is provided with two read-only attributes that
+provide lists of easily parsed partner vty data: "partner_vtys" and
+"partner_clcs".
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
+ 30000000
+ 30000001
+ 30000002
+ 30000000
+ 30000000
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs
+ U5112.428.103048A-V3-C0
+ U5112.428.103048A-V3-C2
+ U5112.428.103048A-V3-C3
+ U5112.428.103048A-V4-C0
+ U5112.428.103048A-V5-C0
+
+Reading partner_vtys returns a list of partner vtys. Vty unit address
+numbering is only per-partition-unique so entries will frequently repeat.
+
+Reading partner_clcs returns a list of "converged location codes" which are
+composed of a system serial number followed by "-V*", where the '*' is the
+target partition number, and "-C*", where the '*' is the slot of the
+adapter. The first vty partner corresponds to the first clc item, the
+second vty partner to the second clc item, etc.
+
+A vty-server can only be connected to a single vty at a time. The entry,
+"current_vty" prints the clc of the currently selected partner vty when
+read.
+
+The current_vty can be changed by writing a valid partner clc to the entry
+as in the following example:
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
+ 8A-V4-C0 > current_vty
+
+Changing the current_vty when a vty-server is already connected to a vty
+does not affect the current connection. The change takes effect when the
+currently open connection is freed.
+
+Information on the "vterm_state" attribute was covered earlier on the
+chapter entitled "disconnection".
+
+---------------------------------------------------------------------------
+8. Questions & Answers:
+===========================================================================
+Q: What are the security concerns involving hvcs?
+
+A: There are three main security concerns:
+
+ 1. The creator of the /dev/hvcs* nodes has the ability to restrict
+ the access of the device entries to certain users or groups. It
+ may be best to create a special hvcs group privilege for providing
+ access to system consoles.
+
+ 2. To provide network security when grabbing the console it is
+ suggested that the user connect to the console hosting partition
+ using a secure method, such as SSH or sit at a hardware console.
+
+ 3. Make sure to exit the user session when done with a console or
+ the next vty-server connection (which may be from another
+ partition) will experience the previously logged in session.
+
+---------------------------------------------------------------------------
+Q: How do I multiplex a console that I grab through hvcs so that other
+people can see it:
+
+A: You can use "screen" to directly connect to the /dev/hvcs* device and
+setup a session on your machine with the console group privileges. As
+pointed out earlier by default screen doesn't provide the termcap settings
+for most terminal emulators to provide adequate character conversion from
+term type "screen" to others. This means that curses based programs may
+not display properly in screen sessions.
+
+---------------------------------------------------------------------------
+Q: Why are the colors all messed up?
+Q: Why are the control characters acting strange or not working?
+Q: Why is the console output all strange and unintelligible?
+
+A: Please see the preceding section on "Connection" for a discussion of how
+applications can affect the display of character control sequences.
+Additionally, just because you logged into the console using and xterm
+doesn't mean someone else didn't log into the console with the HMC console
+(vt320) before you and leave the session logged in. The best thing to do
+is to export TERM to the terminal type of your terminal emulator when you
+get the console. Additionally make sure to "exit" the console before you
+disconnect from the console. This will ensure that the next user gets
+their own TERM type set when they login.
+
+---------------------------------------------------------------------------
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, can't open connection: /dev/hvcs*"What is happening?
+
+A: Some other Power5 console mechanism has a connection to the vty and
+isn't giving it up. You can try to force disconnect the consoles from the
+HMC by right clicking on the partition and then selecting "close terminal".
+Otherwise you have to hunt down the people who have console authority. It
+is possible that you already have the console open using another kermit
+session and just forgot about it. Please review the console options for
+Power5 systems to determine the many ways a system console can be held.
+
+OR
+
+A: Another user may not have a connectivity method currently attached to a
+/dev/hvcs device but the vterm_state may reveal that they still have the
+vty-server connection established. They need to free this using the method
+outlined in the section on "Disconnection" in order for others to connect
+to the target vty.
+
+OR
+
+A: The user profile you are using to execute kermit probably doesn't have
+permissions to use the /dev/hvcs* device.
+
+OR
+
+A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs*
+entry still exists (on systems without udev).
+
+OR
+
+A: There is not a corresponding vty-server device that maps to an existing
+/dev/hvcs* entry.
+
+---------------------------------------------------------------------------
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, write access to UUCP lockfile directory denied."
+
+A: The /dev/hvcs* entry you have specified doesn't exist where you said it
+does? Maybe you haven't inserted the module (on systems with udev).
+
+---------------------------------------------------------------------------
+Q: If I already have one Linux partition installed can I use hvcs on said
+partition to provide the console for the install of a second Linux
+partition?
+
+A: Yes granted that your are connected to the /dev/hvcs* device using
+kermit or cu or some other program that doesn't provide terminal emulation.
+
+---------------------------------------------------------------------------
+Q: Can I connect to more than one partition's console at a time using this
+driver?
+
+A: Yes. Of course this means that there must be more than one vty-server
+configured for this partition and each must point to a disconnected vty.
+
+---------------------------------------------------------------------------
+Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
+
+A: Yes, if you have dlpar and hotplug enabled for your system and it has
+been built into the kernel the hvcs drivers is configured to dynamically
+handle additions of new devices and removals of unused devices.
+
+---------------------------------------------------------------------------
+Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
+after a reboot. What happened?
+
+A: Assignment of vty-server adapters to /dev/hvcs* entries is always done
+in the order that the adapters are exposed. Due to hotplug capabilities of
+this driver assignment of hotplug added vty-servers may be in a different
+order than how they would be exposed on module load. Rebooting or
+reloading the module after dynamic addition may result in the /dev/hvcs*
+and vty-server coupling changing if a vty-server adapter was added in a
+slot between two other vty-server adapters. Refer to the section above
+on how to determine which vty-server goes with which /dev/hvcs* node.
+Hint; look at the sysfs "index" attribute for the vty-server.
+
+---------------------------------------------------------------------------
+Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
+device on that partition as the other end of the pipe?
+
+A: Yes, on Power5 platforms the hvc_console driver provides a tty interface
+for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console).
+In order to get a tty conduit working between the two partitions the HMC
+Super Admin must create an additional "serial server" for the target
+partition with the HMC gui which will show up as /dev/hvc* when the target
+partition is rebooted.
+
+The HMC Super Admin then creates an additional "serial client" for the
+current partition and points this at the target partition's newly created
+"serial server" adapter (remember the slot). This shows up as an
+additional /dev/hvcs* device.
+
+Now a program on the target system can be configured to read or write to
+/dev/hvc* and another program on the current partition can be configured to
+read or write to /dev/hvcs*. Now you have a tty conduit between two
+partitions.
+
+---------------------------------------------------------------------------
+9. Reporting Bugs:
+
+The proper channel for reporting bugs is either through the Linux OS
+distribution company that provided your OS or by posting issues to the
+PowerPC development mailing list at:
+
+linuxppc-dev@lists.ozlabs.org
+
+This request is to provide a documented and searchable public exchange
+of the problems and solutions surrounding this driver for the benefit of
+all users.
diff --git a/Documentation/powerpc/mpc52xx.txt b/Documentation/powerpc/mpc52xx.txt
new file mode 100644
index 000000000..0d540a31e
--- /dev/null
+++ b/Documentation/powerpc/mpc52xx.txt
@@ -0,0 +1,39 @@
+Linux 2.6.x on MPC52xx family
+-----------------------------
+
+For the latest info, go to http://www.246tNt.com/mpc52xx/
+
+To compile/use :
+
+ - U-Boot:
+ # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+ if you wish to ).
+ # make lite5200_defconfig
+ # make uImage
+
+ then, on U-boot:
+ => tftpboot 200000 uImage
+ => tftpboot 400000 pRamdisk
+ => bootm 200000 400000
+
+ - DBug:
+ # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+ if you wish to ).
+ # make lite5200_defconfig
+ # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz
+ # make zImage.initrd
+ # make
+
+ then in DBug:
+ DBug> dn -i zImage.initrd.lite5200
+
+
+Some remarks :
+ - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
+ is not supported, and I'm not sure anyone is interesting in working on it
+ so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
+ nothing to do with the MPC5200. I also included the 'MPC' for the same
+ reason.
+ - Of course, I inspired myself from the 2.4 port. If you think I forgot to
+ mention you/your company in the copyright of some code, I'll correct it
+ ASAP.
diff --git a/Documentation/powerpc/pci_iov_resource_on_powernv.txt b/Documentation/powerpc/pci_iov_resource_on_powernv.txt
new file mode 100644
index 000000000..b55c5cd83
--- /dev/null
+++ b/Documentation/powerpc/pci_iov_resource_on_powernv.txt
@@ -0,0 +1,301 @@
+Wei Yang <weiyang@linux.vnet.ibm.com>
+Benjamin Herrenschmidt <benh@au1.ibm.com>
+Bjorn Helgaas <bhelgaas@google.com>
+26 Aug 2014
+
+This document describes the requirement from hardware for PCI MMIO resource
+sizing and assignment on PowerKVM and how generic PCI code handles this
+requirement. The first two sections describe the concepts of Partitionable
+Endpoints and the implementation on P8 (IODA2). The next two sections talks
+about considerations on enabling SRIOV on IODA2.
+
+1. Introduction to Partitionable Endpoints
+
+A Partitionable Endpoint (PE) is a way to group the various resources
+associated with a device or a set of devices to provide isolation between
+partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
+to freeze a device that is causing errors in order to limit the possibility
+of propagation of bad data.
+
+There is thus, in HW, a table of PE states that contains a pair of "frozen"
+state bits (one for MMIO and one for DMA, they get set together but can be
+cleared independently) for each PE.
+
+When a PE is frozen, all stores in any direction are dropped and all loads
+return all 1's value. MSIs are also blocked. There's a bit more state that
+captures things like the details of the error that caused the freeze etc., but
+that's not critical.
+
+The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
+are matched to their corresponding PEs.
+
+The following section provides a rough description of what we have on P8
+(IODA2). Keep in mind that this is all per PHB (PCI host bridge). Each PHB
+is a completely separate HW entity that replicates the entire logic, so has
+its own set of PEs, etc.
+
+2. Implementation of Partitionable Endpoints on P8 (IODA2)
+
+P8 supports up to 256 Partitionable Endpoints per PHB.
+
+ * Inbound
+
+ For DMA, MSIs and inbound PCIe error messages, we have a table (in
+ memory but accessed in HW by the chip) that provides a direct
+ correspondence between a PCIe RID (bus/dev/fn) with a PE number.
+ We call this the RTT.
+
+ - For DMA we then provide an entire address space for each PE that can
+ contain two "windows", depending on the value of PCI address bit 59.
+ Each window can be configured to be remapped via a "TCE table" (IOMMU
+ translation table), which has various configurable characteristics
+ not described here.
+
+ - For MSIs, we have two windows in the address space (one at the top of
+ the 32-bit space and one much higher) which, via a combination of the
+ address and MSI value, will result in one of the 2048 interrupts per
+ bridge being triggered. There's a PE# in the interrupt controller
+ descriptor table as well which is compared with the PE# obtained from
+ the RTT to "authorize" the device to emit that specific interrupt.
+
+ - Error messages just use the RTT.
+
+ * Outbound. That's where the tricky part is.
+
+ Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
+ from the CPU address space to the PCI address space. There is one M32
+ window and sixteen M64 windows. They have different characteristics.
+ First what they have in common: they forward a configurable portion of
+ the CPU address space to the PCIe bus and must be naturally aligned
+ power of two in size. The rest is different:
+
+ - The M32 window:
+
+ * Is limited to 4GB in size.
+
+ * Drops the top bits of the address (above the size) and replaces
+ them with a configurable value. This is typically used to generate
+ 32-bit PCIe accesses. We configure that window at boot from FW and
+ don't touch it from Linux; it's usually set to forward a 2GB
+ portion of address space from the CPU to PCIe
+ 0x8000_0000..0xffff_ffff. (Note: The top 64KB are actually
+ reserved for MSIs but this is not a problem at this point; we just
+ need to ensure Linux doesn't assign anything there, the M32 logic
+ ignores that however and will forward in that space if we try).
+
+ * It is divided into 256 segments of equal size. A table in the chip
+ maps each segment to a PE#. That allows portions of the MMIO space
+ to be assigned to PEs on a segment granularity. For a 2GB window,
+ the segment granularity is 2GB/256 = 8MB.
+
+ Now, this is the "main" window we use in Linux today (excluding
+ SR-IOV). We basically use the trick of forcing the bridge MMIO windows
+ onto a segment alignment/granularity so that the space behind a bridge
+ can be assigned to a PE.
+
+ Ideally we would like to be able to have individual functions in PEs
+ but that would mean using a completely different address allocation
+ scheme where individual function BARs can be "grouped" to fit in one or
+ more segments.
+
+ - The M64 windows:
+
+ * Must be at least 256MB in size.
+
+ * Do not translate addresses (the address on PCIe is the same as the
+ address on the PowerBus). There is a way to also set the top 14
+ bits which are not conveyed by PowerBus but we don't use this.
+
+ * Can be configured to be segmented. When not segmented, we can
+ specify the PE# for the entire window. When segmented, a window
+ has 256 segments; however, there is no table for mapping a segment
+ to a PE#. The segment number *is* the PE#.
+
+ * Support overlaps. If an address is covered by multiple windows,
+ there's a defined ordering for which window applies.
+
+ We have code (fairly new compared to the M32 stuff) that exploits that
+ for large BARs in 64-bit space:
+
+ We configure an M64 window to cover the entire region of address space
+ that has been assigned by FW for the PHB (about 64GB, ignore the space
+ for the M32, it comes out of a different "reserve"). We configure it
+ as segmented.
+
+ Then we do the same thing as with M32, using the bridge alignment
+ trick, to match to those giant segments.
+
+ Since we cannot remap, we have two additional constraints:
+
+ - We do the PE# allocation *after* the 64-bit space has been assigned
+ because the addresses we use directly determine the PE#. We then
+ update the M32 PE# for the devices that use both 32-bit and 64-bit
+ spaces or assign the remaining PE# to 32-bit only devices.
+
+ - We cannot "group" segments in HW, so if a device ends up using more
+ than one segment, we end up with more than one PE#. There is a HW
+ mechanism to make the freeze state cascade to "companion" PEs but
+ that only works for PCIe error messages (typically used so that if
+ you freeze a switch, it freezes all its children). So we do it in
+ SW. We lose a bit of effectiveness of EEH in that case, but that's
+ the best we found. So when any of the PEs freezes, we freeze the
+ other ones for that "domain". We thus introduce the concept of
+ "master PE" which is the one used for DMA, MSIs, etc., and "secondary
+ PEs" that are used for the remaining M64 segments.
+
+ We would like to investigate using additional M64 windows in "single
+ PE" mode to overlay over specific BARs to work around some of that, for
+ example for devices with very large BARs, e.g., GPUs. It would make
+ sense, but we haven't done it yet.
+
+3. Considerations for SR-IOV on PowerKVM
+
+ * SR-IOV Background
+
+ The PCIe SR-IOV feature allows a single Physical Function (PF) to
+ support several Virtual Functions (VFs). Registers in the PF's SR-IOV
+ Capability control the number of VFs and whether they are enabled.
+
+ When VFs are enabled, they appear in Configuration Space like normal
+ PCI devices, but the BARs in VF config space headers are unusual. For
+ a non-VF device, software uses BARs in the config space header to
+ discover the BAR sizes and assign addresses for them. For VF devices,
+ software uses VF BAR registers in the *PF* SR-IOV Capability to
+ discover sizes and assign addresses. The BARs in the VF's config space
+ header are read-only zeros.
+
+ When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
+ base address for all the corresponding VF(n) BARs. For example, if the
+ PF SR-IOV Capability is programmed to enable eight VFs, and it has a
+ 1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
+ This region is divided into eight contiguous 1MB regions, each of which
+ is a BAR0 for one of the VFs. Note that even though the VF BAR
+ describes an 8MB region, the alignment requirement is for a single VF,
+ i.e., 1MB in this example.
+
+ There are several strategies for isolating VFs in PEs:
+
+ - M32 window: There's one M32 window, and it is split into 256
+ equally-sized segments. The finest granularity possible is a 256MB
+ window with 1MB segments. VF BARs that are 1MB or larger could be
+ mapped to separate PEs in this window. Each segment can be
+ individually mapped to a PE via the lookup table, so this is quite
+ flexible, but it works best when all the VF BARs are the same size. If
+ they are different sizes, the entire window has to be small enough that
+ the segment size matches the smallest VF BAR, which means larger VF
+ BARs span several segments.
+
+ - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
+ to a single PE, so it could only isolate one VF.
+
+ - Single segmented M64 windows: A segmented M64 window could be used just
+ like the M32 window, but the segments can't be individually mapped to
+ PEs (the segment number is the PE#), so there isn't as much
+ flexibility. A VF with multiple BARs would have to be in a "domain" of
+ multiple PEs, which is not as well isolated as a single PE.
+
+ - Multiple segmented M64 windows: As usual, each window is split into 256
+ equally-sized segments, and the segment number is the PE#. But if we
+ use several M64 windows, they can be set to different base addresses
+ and different segment sizes. If we have VFs that each have a 1MB BAR
+ and a 32MB BAR, we could use one M64 window to assign 1MB segments and
+ another M64 window to assign 32MB segments.
+
+ Finally, the plan to use M64 windows for SR-IOV, which will be described
+ more in the next two sections. For a given VF BAR, we need to
+ effectively reserve the entire 256 segments (256 * VF BAR size) and
+ position the VF BAR to start at the beginning of a free range of
+ segments/PEs inside that M64 window.
+
+ The goal is of course to be able to give a separate PE for each VF.
+
+ The IODA2 platform has 16 M64 windows, which are used to map MMIO
+ range to PE#. Each M64 window defines one MMIO range and this range is
+ divided into 256 segments, with each segment corresponding to one PE.
+
+ We decide to leverage this M64 window to map VFs to individual PEs, since
+ SR-IOV VF BARs are all the same size.
+
+ But doing so introduces another problem: total_VFs is usually smaller
+ than the number of M64 window segments, so if we map one VF BAR directly
+ to one M64 window, some part of the M64 window will map to another
+ device's MMIO range.
+
+ IODA supports 256 PEs, so segmented windows contain 256 segments, so if
+ total_VFs is less than 256, we have the situation in Figure 1.0, where
+ segments [total_VFs, 255] of the M64 window may map to some MMIO range on
+ other devices:
+
+ 0 1 total_VFs - 1
+ +------+------+- -+------+------+
+ | | | ... | | |
+ +------+------+- -+------+------+
+
+ VF(n) BAR space
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ M64 window
+
+ Figure 1.0 Direct map VF(n) BAR space
+
+ Our current solution is to allocate 256 segments even if the VF(n) BAR
+ space doesn't need that much, as shown in Figure 1.1:
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ VF(n) BAR space + extra
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ M64 window
+
+ Figure 1.1 Map VF(n) BAR space + extra
+
+ Allocating the extra space ensures that the entire M64 window will be
+ assigned to this one SR-IOV device and none of the space will be
+ available for other devices. Note that this only expands the space
+ reserved in software; there are still only total_VFs VFs, and they only
+ respond to segments [0, total_VFs - 1]. There's nothing in hardware that
+ responds to segments [total_VFs, 255].
+
+4. Implications for the Generic PCI Code
+
+The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
+aligned to the size of an individual VF BAR.
+
+In IODA2, the MMIO address determines the PE#. If the address is in an M32
+window, we can set the PE# by updating the table that translates segments
+to PE#s. Similarly, if the address is in an unsegmented M64 window, we can
+set the PE# for the window. But if it's in a segmented M64 window, the
+segment number is the PE#.
+
+Therefore, the only way to control the PE# for a VF is to change the base
+of the VF(n) BAR space in the VF BAR. If the PCI core allocates the exact
+amount of space required for the VF(n) BAR space, the VF BAR value is fixed
+and cannot be changed.
+
+On the other hand, if the PCI core allocates additional space, the VF BAR
+value can be changed as long as the entire VF(n) BAR space remains inside
+the space allocated by the core.
+
+Ideally the segment size will be the same as an individual VF BAR size.
+Then each VF will be in its own PE. The VF BARs (and therefore the PE#s)
+are contiguous. If VF0 is in PE(x), then VF(n) is in PE(x+n). If we
+allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
+
+If the segment size is smaller than the VF BAR size, it will take several
+segments to cover a VF BAR, and a VF will be in several PEs. This is
+possible, but the isolation isn't as good, and it reduces the number of PE#
+choices because instead of consuming only numVFs segments, the VF(n) BAR
+space will consume (numVFs * n) segments. That means there aren't as many
+available segments for adjusting base of the VF(n) BAR space.
diff --git a/Documentation/powerpc/pmu-ebb.txt b/Documentation/powerpc/pmu-ebb.txt
new file mode 100644
index 000000000..73cd163db
--- /dev/null
+++ b/Documentation/powerpc/pmu-ebb.txt
@@ -0,0 +1,137 @@
+PMU Event Based Branches
+========================
+
+Event Based Branches (EBBs) are a feature which allows the hardware to
+branch directly to a specified user space address when certain events occur.
+
+The full specification is available in Power ISA v2.07:
+
+ https://www.power.org/documentation/power-isa-version-2-07/
+
+One type of event for which EBBs can be configured is PMU exceptions. This
+document describes the API for configuring the Power PMU to generate EBBs,
+using the Linux perf_events API.
+
+
+Terminology
+-----------
+
+Throughout this document we will refer to an "EBB event" or "EBB events". This
+just refers to a struct perf_event which has set the "EBB" flag in its
+attr.config. All events which can be configured on the hardware PMU are
+possible "EBB events".
+
+
+Background
+----------
+
+When a PMU EBB occurs it is delivered to the currently running process. As such
+EBBs can only sensibly be used by programs for self-monitoring.
+
+It is a feature of the perf_events API that events can be created on other
+processes, subject to standard permission checks. This is also true of EBB
+events, however unless the target process enables EBBs (via mtspr(BESCR)) no
+EBBs will ever be delivered.
+
+This makes it possible for a process to enable EBBs for itself, but not
+actually configure any events. At a later time another process can come along
+and attach an EBB event to the process, which will then cause EBBs to be
+delivered to the first process. It's not clear if this is actually useful.
+
+
+When the PMU is configured for EBBs, all PMU interrupts are delivered to the
+user process. This means once an EBB event is scheduled on the PMU, no non-EBB
+events can be configured. This means that EBB events can not be run
+concurrently with regular 'perf' commands, or any other perf events.
+
+It is however safe to run 'perf' commands on a process which is using EBBs. The
+kernel will in general schedule the EBB event, and perf will be notified that
+its events could not run.
+
+The exclusion between EBB events and regular events is implemented using the
+existing "pinned" and "exclusive" attributes of perf_events. This means EBB
+events will be given priority over other events, unless they are also pinned.
+If an EBB event and a regular event are both pinned, then whichever is enabled
+first will be scheduled and the other will be put in error state. See the
+section below titled "Enabling an EBB event" for more information.
+
+
+Creating an EBB event
+---------------------
+
+To request that an event is counted using EBB, the event code should have bit
+63 set.
+
+EBB events must be created with a particular, and restrictive, set of
+attributes - this is so that they interoperate correctly with the rest of the
+perf_events subsystem.
+
+An EBB event must be created with the "pinned" and "exclusive" attributes set.
+Note that if you are creating a group of EBB events, only the leader can have
+these attributes set.
+
+An EBB event must NOT set any of the "inherit", "sample_period", "freq" or
+"enable_on_exec" attributes.
+
+An EBB event must be attached to a task. This is specified to perf_event_open()
+by passing a pid value, typically 0 indicating the current task.
+
+All events in a group must agree on whether they want EBB. That is all events
+must request EBB, or none may request EBB.
+
+EBB events must specify the PMC they are to be counted on. This ensures
+userspace is able to reliably determine which PMC the event is scheduled on.
+
+
+Enabling an EBB event
+---------------------
+
+Once an EBB event has been successfully opened, it must be enabled with the
+perf_events API. This can be achieved either via the ioctl() interface, or the
+prctl() interface.
+
+However, due to the design of the perf_events API, enabling an event does not
+guarantee that it has been scheduled on the PMU. To ensure that the EBB event
+has been scheduled on the PMU, you must perform a read() on the event. If the
+read() returns EOF, then the event has not been scheduled and EBBs are not
+enabled.
+
+This behaviour occurs because the EBB event is pinned and exclusive. When the
+EBB event is enabled it will force all other non-pinned events off the PMU. In
+this case the enable will be successful. However if there is already an event
+pinned on the PMU then the enable will not be successful.
+
+
+Reading an EBB event
+--------------------
+
+It is possible to read() from an EBB event. However the results are
+meaningless. Because interrupts are being delivered to the user process the
+kernel is not able to count the event, and so will return a junk value.
+
+
+Closing an EBB event
+--------------------
+
+When an EBB event is finished with, you can close it using close() as for any
+regular event. If this is the last EBB event the PMU will be deconfigured and
+no further PMU EBBs will be delivered.
+
+
+EBB Handler
+-----------
+
+The EBB handler is just regular userspace code, however it must be written in
+the style of an interrupt handler. When the handler is entered all registers
+are live (possibly) and so must be saved somehow before the handler can invoke
+other code.
+
+It's up to the program how to handle this. For C programs a relatively simple
+option is to create an interrupt frame on the stack and save registers there.
+
+Fork
+----
+
+EBB events are not inherited across fork. If the child process wishes to use
+EBBs it should open a new event for itself. Similarly the EBB state in
+BESCR/EBBHR/EBBRR is cleared across fork().
diff --git a/Documentation/powerpc/ptrace.txt b/Documentation/powerpc/ptrace.txt
new file mode 100644
index 000000000..99c5ce88d
--- /dev/null
+++ b/Documentation/powerpc/ptrace.txt
@@ -0,0 +1,151 @@
+GDB intends to support the following hardware debug features of BookE
+processors:
+
+4 hardware breakpoints (IAC)
+2 hardware watchpoints (read, write and read-write) (DAC)
+2 value conditions for the hardware watchpoints (DVC)
+
+For that, we need to extend ptrace so that GDB can query and set these
+resources. Since we're extending, we're trying to create an interface
+that's extendable and that covers both BookE and server processors, so
+that GDB doesn't need to special-case each of them. We added the
+following 3 new ptrace requests.
+
+1. PTRACE_PPC_GETHWDEBUGINFO
+
+Query for GDB to discover the hardware debug features. The main info to
+be returned here is the minimum alignment for the hardware watchpoints.
+BookE processors don't have restrictions here, but server processors have
+an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
+adding special cases to GDB based on what it sees in AUXV.
+
+Since we're at it, we added other useful info that the kernel can return to
+GDB: this query will return the number of hardware breakpoints, hardware
+watchpoints and whether it supports a range of addresses and a condition.
+The query will fill the following structure provided by the requesting process:
+
+struct ppc_debug_info {
+ unit32_t version;
+ unit32_t num_instruction_bps;
+ unit32_t num_data_bps;
+ unit32_t num_condition_regs;
+ unit32_t data_bp_alignment;
+ unit32_t sizeof_condition; /* size of the DVC register */
+ uint64_t features; /* bitmask of the individual flags */
+};
+
+features will have bits indicating whether there is support for:
+
+#define PPC_DEBUG_FEATURE_INSN_BP_RANGE 0x1
+#define PPC_DEBUG_FEATURE_INSN_BP_MASK 0x2
+#define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4
+#define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8
+#define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10
+
+2. PTRACE_SETHWDEBUG
+
+Sets a hardware breakpoint or watchpoint, according to the provided structure:
+
+struct ppc_hw_breakpoint {
+ uint32_t version;
+#define PPC_BREAKPOINT_TRIGGER_EXECUTE 0x1
+#define PPC_BREAKPOINT_TRIGGER_READ 0x2
+#define PPC_BREAKPOINT_TRIGGER_WRITE 0x4
+ uint32_t trigger_type; /* only some combinations allowed */
+#define PPC_BREAKPOINT_MODE_EXACT 0x0
+#define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE 0x1
+#define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE 0x2
+#define PPC_BREAKPOINT_MODE_MASK 0x3
+ uint32_t addr_mode; /* address match mode */
+
+#define PPC_BREAKPOINT_CONDITION_MODE 0x3
+#define PPC_BREAKPOINT_CONDITION_NONE 0x0
+#define PPC_BREAKPOINT_CONDITION_AND 0x1
+#define PPC_BREAKPOINT_CONDITION_EXACT 0x1 /* different name for the same thing as above */
+#define PPC_BREAKPOINT_CONDITION_OR 0x2
+#define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
+#define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000 /* byte enable bits */
+#define PPC_BREAKPOINT_CONDITION_BE(n) (1<<((n)+16))
+ uint32_t condition_mode; /* break/watchpoint condition flags */
+
+ uint64_t addr;
+ uint64_t addr2;
+ uint64_t condition_value;
+};
+
+A request specifies one event, not necessarily just one register to be set.
+For instance, if the request is for a watchpoint with a condition, both the
+DAC and DVC registers will be set in the same request.
+
+With this GDB can ask for all kinds of hardware breakpoints and watchpoints
+that the BookE supports. COMEFROM breakpoints available in server processors
+are not contemplated, but that is out of the scope of this work.
+
+ptrace will return an integer (handle) uniquely identifying the breakpoint or
+watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
+request to ask for its removal. Return -ENOSPC if the requested breakpoint
+can't be allocated on the registers.
+
+Some examples of using the structure to:
+
+- set a breakpoint in the first breakpoint register
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = 0;
+
+- set a watchpoint which triggers on reads in the second watchpoint register
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = 0;
+
+- set a watchpoint which triggers only with a specific value
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = (uint64_t) condition;
+
+- set a ranged hardware breakpoint
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+ p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) begin_range;
+ p.addr2 = (uint64_t) end_range;
+ p.condition_value = 0;
+
+- set a watchpoint in server processors (BookS)
+
+ p.version = 1;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+ p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ or
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) begin_range;
+ /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
+ * addr2 - addr <= 8 Bytes.
+ */
+ p.addr2 = (uint64_t) end_range;
+ p.condition_value = 0;
+
+3. PTRACE_DELHWDEBUG
+
+Takes an integer which identifies an existing breakpoint or watchpoint
+(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
+corresponding breakpoint or watchpoint..
diff --git a/Documentation/powerpc/qe_firmware.txt b/Documentation/powerpc/qe_firmware.txt
new file mode 100644
index 000000000..2031ddb33
--- /dev/null
+++ b/Documentation/powerpc/qe_firmware.txt
@@ -0,0 +1,295 @@
+ Freescale QUICC Engine Firmware Uploading
+ -----------------------------------------
+
+(c) 2007 Timur Tabi <timur at freescale.com>,
+ Freescale Semiconductor
+
+Table of Contents
+=================
+
+ I - Software License for Firmware
+
+ II - Microcode Availability
+
+ III - Description and Terminology
+
+ IV - Microcode Programming Details
+
+ V - Firmware Structure Layout
+
+ VI - Sample Code for Creating Firmware Files
+
+Revision Information
+====================
+
+November 30, 2007: Rev 1.0 - Initial version
+
+I - Software License for Firmware
+=================================
+
+Each firmware file comes with its own software license. For information on
+the particular license, please see the license text that is distributed with
+the firmware.
+
+II - Microcode Availability
+===========================
+
+Firmware files are distributed through various channels. Some are available on
+http://opensource.freescale.com. For other firmware files, please contact
+your Freescale representative or your operating system vendor.
+
+III - Description and Terminology
+================================
+
+In this document, the term 'microcode' refers to the sequence of 32-bit
+integers that compose the actual QE microcode.
+
+The term 'firmware' refers to a binary blob that contains the microcode as
+well as other data that
+
+ 1) describes the microcode's purpose
+ 2) describes how and where to upload the microcode
+ 3) specifies the values of various registers
+ 4) includes additional data for use by specific device drivers
+
+Firmware files are binary files that contain only a firmware.
+
+IV - Microcode Programming Details
+===================================
+
+The QE architecture allows for only one microcode present in I-RAM for each
+RISC processor. To replace any current microcode, a full QE reset (which
+disables the microcode) must be performed first.
+
+QE microcode is uploaded using the following procedure:
+
+1) The microcode is placed into I-RAM at a specific location, using the
+ IRAM.IADD and IRAM.IDATA registers.
+
+2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
+ needs split I-RAM. Split I-RAM is only meaningful for SOCs that have
+ QEs with multiple RISC processors, such as the 8360. Splitting the I-RAM
+ allows each processor to run a different microcode, effectively creating an
+ asymmetric multiprocessing (AMP) system.
+
+3) The TIBCR trap registers are loaded with the addresses of the trap handlers
+ in the microcode.
+
+4) The RSP.ECCR register is programmed with the value provided.
+
+5) If necessary, device drivers that need the virtual traps and extended mode
+ data will use them.
+
+Virtual Microcode Traps
+
+These virtual traps are conditional branches in the microcode. These are
+"soft" provisional introduced in the ROMcode in order to enable higher
+flexibility and save h/w traps If new features are activated or an issue is
+being fixed in the RAM package utilizing they should be activated. This data
+structure signals the microcode which of these virtual traps is active.
+
+This structure contains 6 words that the application should copy to some
+specific been defined. This table describes the structure.
+
+ ---------------------------------------------------------------
+ | Offset in | | Destination Offset | Size of |
+ | array | Protocol | within PRAM | Operand |
+ --------------------------------------------------------------|
+ | 0 | Ethernet | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 4 | ATM | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 8 | PPP | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 12 | Ethernet RX | 0x22 | 1 byte |
+ | | Distributor Page | | |
+ ---------------------------------------------------------------
+ | 16 | ATM Globtal | 0x28 | 1 byte |
+ | | Params Table | | |
+ ---------------------------------------------------------------
+ | 20 | Insert Frame | 0xF8 | 4 bytes |
+ ---------------------------------------------------------------
+
+
+Extended Modes
+
+This is a double word bit array (64 bits) that defines special functionality
+which has an impact on the softwarew drivers. Each bit has its own impact
+and has special instructions for the s/w associated with it. This structure is
+described in this table:
+
+ -----------------------------------------------------------------------
+ | Bit # | Name | Description |
+ -----------------------------------------------------------------------
+ | 0 | General | Indicates that prior to each host command |
+ | | push command | given by the application, the software must |
+ | | | assert a special host command (push command)|
+ | | | CECDR = 0x00800000. |
+ | | | CECR = 0x01c1000f. |
+ -----------------------------------------------------------------------
+ | 1 | UCC ATM | Indicates that after issuing ATM RX INIT |
+ | | RX INIT | command, the host must issue another special|
+ | | push command | command (push command) and immediately |
+ | | | following that re-issue the ATM RX INIT |
+ | | | command. (This makes the sequence of |
+ | | | initializing the ATM receiver a sequence of |
+ | | | three host commands) |
+ | | | CECDR = 0x00800000. |
+ | | | CECR = 0x01c1000f. |
+ -----------------------------------------------------------------------
+ | 2 | Add/remove | Indicates that following the specific host |
+ | | command | command: "Add/Remove entry in Hash Lookup |
+ | | validation | Table" used in Interworking setup, the user |
+ | | | must issue another command. |
+ | | | CECDR = 0xce000003. |
+ | | | CECR = 0x01c10f58. |
+ -----------------------------------------------------------------------
+ | 3 | General push | Indicates that the s/w has to initialize |
+ | | command | some pointers in the Ethernet thread pages |
+ | | | which are used when Header Compression is |
+ | | | activated. The full details of these |
+ | | | pointers is located in the software drivers.|
+ -----------------------------------------------------------------------
+ | 4 | General push | Indicates that after issuing Ethernet TX |
+ | | command | INIT command, user must issue this command |
+ | | | for each SNUM of Ethernet TX thread. |
+ | | | CECDR = 0x00800003. |
+ | | | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM}, |
+ | | | 1'b{1}, 12'b{0}, 4'b{1} |
+ -----------------------------------------------------------------------
+ | 5 - 31 | N/A | Reserved, set to zero. |
+ -----------------------------------------------------------------------
+
+V - Firmware Structure Layout
+==============================
+
+QE microcode from Freescale is typically provided as a header file. This
+header file contains macros that define the microcode binary itself as well as
+some other data used in uploading that microcode. The format of these files
+do not lend themselves to simple inclusion into other code. Hence,
+the need for a more portable format. This section defines that format.
+
+Instead of distributing a header file, the microcode and related data are
+embedded into a binary blob. This blob is passed to the qe_upload_firmware()
+function, which parses the blob and performs everything necessary to upload
+the microcode.
+
+All integers are big-endian. See the comments for function
+qe_upload_firmware() for up-to-date implementation information.
+
+This structure supports versioning, where the version of the structure is
+embedded into the structure itself. To ensure forward and backwards
+compatibility, all versions of the structure must use the same 'qe_header'
+structure at the beginning.
+
+'header' (type: struct qe_header):
+ The 'length' field is the size, in bytes, of the entire structure,
+ including all the microcode embedded in it, as well as the CRC (if
+ present).
+
+ The 'magic' field is an array of three bytes that contains the letters
+ 'Q', 'E', and 'F'. This is an identifier that indicates that this
+ structure is a QE Firmware structure.
+
+ The 'version' field is a single byte that indicates the version of this
+ structure. If the layout of the structure should ever need to be
+ changed to add support for additional types of microcode, then the
+ version number should also be changed.
+
+The 'id' field is a null-terminated string(suitable for printing) that
+identifies the firmware.
+
+The 'count' field indicates the number of 'microcode' structures. There
+must be one and only one 'microcode' structure for each RISC processor.
+Therefore, this field also represents the number of RISC processors for this
+SOC.
+
+The 'soc' structure contains the SOC numbers and revisions used to match
+the microcode to the SOC itself. Normally, the microcode loader should
+check the data in this structure with the SOC number and revisions, and
+only upload the microcode if there's a match. However, this check is not
+made on all platforms.
+
+Although it is not recommended, you can specify '0' in the soc.model
+field to skip matching SOCs altogether.
+
+The 'model' field is a 16-bit number that matches the actual SOC. The
+'major' and 'minor' fields are the major and minor revision numbers,
+respectively, of the SOC.
+
+For example, to match the 8323, revision 1.0:
+ soc.model = 8323
+ soc.major = 1
+ soc.minor = 0
+
+'padding' is necessary for structure alignment. This field ensures that the
+'extended_modes' field is aligned on a 64-bit boundary.
+
+'extended_modes' is a bitfield that defines special functionality which has an
+impact on the device drivers. Each bit has its own impact and has special
+instructions for the driver associated with it. This field is stored in
+the QE library and available to any driver that calles qe_get_firmware_info().
+
+'vtraps' is an array of 8 words that contain virtual trap values for each
+virtual traps. As with 'extended_modes', this field is stored in the QE
+library and available to any driver that calles qe_get_firmware_info().
+
+'microcode' (type: struct qe_microcode):
+ For each RISC processor there is one 'microcode' structure. The first
+ 'microcode' structure is for the first RISC, and so on.
+
+ The 'id' field is a null-terminated string suitable for printing that
+ identifies this particular microcode.
+
+ 'traps' is an array of 16 words that contain hardware trap values
+ for each of the 16 traps. If trap[i] is 0, then this particular
+ trap is to be ignored (i.e. not written to TIBCR[i]). The entire value
+ is written as-is to the TIBCR[i] register, so be sure to set the EN
+ and T_IBP bits if necessary.
+
+ 'eccr' is the value to program into the ECCR register.
+
+ 'iram_offset' is the offset into IRAM to start writing the
+ microcode.
+
+ 'count' is the number of 32-bit words in the microcode.
+
+ 'code_offset' is the offset, in bytes, from the beginning of this
+ structure where the microcode itself can be found. The first
+ microcode binary should be located immediately after the 'microcode'
+ array.
+
+ 'major', 'minor', and 'revision' are the major, minor, and revision
+ version numbers, respectively, of the microcode. If all values are 0,
+ then these fields are ignored.
+
+ 'reserved' is necessary for structure alignment. Since 'microcode'
+ is an array, the 64-bit 'extended_modes' field needs to be aligned
+ on a 64-bit boundary, and this can only happen if the size of
+ 'microcode' is a multiple of 8 bytes. To ensure that, we add
+ 'reserved'.
+
+After the last microcode is a 32-bit CRC. It can be calculated using
+this algorithm:
+
+u32 crc32(const u8 *p, unsigned int len)
+{
+ unsigned int i;
+ u32 crc = 0;
+
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+ }
+ return crc;
+}
+
+VI - Sample Code for Creating Firmware Files
+============================================
+
+A Python program that creates firmware binaries from the header files normally
+distributed by Freescale can be found on http://opensource.freescale.com.
diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt
new file mode 100644
index 000000000..ded69794a
--- /dev/null
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -0,0 +1,198 @@
+Transactional Memory support
+============================
+
+POWER kernel support for this feature is currently limited to supporting
+its use by user programs. It is not currently used by the kernel itself.
+
+This file aims to sum up how it is supported by Linux and what behaviour you
+can expect from your user programs.
+
+
+Basic overview
+==============
+
+Hardware Transactional Memory is supported on POWER8 processors, and is a
+feature that enables a different form of atomic memory access. Several new
+instructions are presented to delimit transactions; transactions are
+guaranteed to either complete atomically or roll back and undo any partial
+changes.
+
+A simple transaction looks like this:
+
+begin_move_money:
+ tbegin
+ beq abort_handler
+
+ ld r4, SAVINGS_ACCT(r3)
+ ld r5, CURRENT_ACCT(r3)
+ subi r5, r5, 1
+ addi r4, r4, 1
+ std r4, SAVINGS_ACCT(r3)
+ std r5, CURRENT_ACCT(r3)
+
+ tend
+
+ b continue
+
+abort_handler:
+ ... test for odd failures ...
+
+ /* Retry the transaction if it failed because it conflicted with
+ * someone else: */
+ b begin_move_money
+
+
+The 'tbegin' instruction denotes the start point, and 'tend' the end point.
+Between these points the processor is in 'Transactional' state; any memory
+references will complete in one go if there are no conflicts with other
+transactional or non-transactional accesses within the system. In this
+example, the transaction completes as though it were normal straight-line code
+IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
+atomic move of money from the current account to the savings account has been
+performed. Even though the normal ld/std instructions are used (note no
+lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
+updated, or neither will be updated.
+
+If, in the meantime, there is a conflict with the locations accessed by the
+transaction, the transaction will be aborted by the CPU. Register and memory
+state will roll back to that at the 'tbegin', and control will continue from
+'tbegin+4'. The branch to abort_handler will be taken this second time; the
+abort handler can check the cause of the failure, and retry.
+
+Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
+and a few other status/flag regs; see the ISA for details.
+
+Causes of transaction aborts
+============================
+
+- Conflicts with cache lines used by other processors
+- Signals
+- Context switches
+- See the ISA for full documentation of everything that will abort transactions.
+
+
+Syscalls
+========
+
+Performing syscalls from within transaction is not recommended, and can lead
+to unpredictable results.
+
+Syscalls do not by design abort transactions, but beware: The kernel code will
+not be running in transactional state. The effect of syscalls will always
+remain visible, but depending on the call they may abort your transaction as a
+side-effect, read soon-to-be-aborted transactional data that should not remain
+invisible, etc. If you constantly retry a transaction that constantly aborts
+itself by calling a syscall, you'll have a livelock & make no progress.
+
+Simple syscalls (e.g. sigprocmask()) "could" be OK. Even things like write()
+from, say, printf() should be OK as long as the kernel does not access any
+memory that was accessed transactionally.
+
+Consider any syscalls that happen to work as debug-only -- not recommended for
+production use. Best to queue them up till after the transaction is over.
+
+
+Signals
+=======
+
+Delivery of signals (both sync and async) during transactions provides a second
+thread state (ucontext/mcontext) to represent the second transactional register
+state. Signal delivery 'treclaim's to capture both register states, so signals
+abort transactions. The usual ucontext_t passed to the signal handler
+represents the checkpointed/original register state; the signal appears to have
+arisen at 'tbegin+4'.
+
+If the sighandler ucontext has uc_link set, a second ucontext has been
+delivered. For future compatibility the MSR.TS field should be checked to
+determine the transactional state -- if so, the second ucontext in uc->uc_link
+represents the active transactional registers at the point of the signal.
+
+For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
+field shows the transactional mode.
+
+For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
+bits are stored in the MSR of the second ucontext, i.e. in
+uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional
+state TS.
+
+However, basic signal handlers don't need to be aware of transactions
+and simply returning from the handler will deal with things correctly:
+
+Transaction-aware signal handlers can read the transactional register state
+from the second ucontext. This will be necessary for crash handlers to
+determine, for example, the address of the instruction causing the SIGSEGV.
+
+Example signal handler:
+
+ void crash_handler(int sig, siginfo_t *si, void *uc)
+ {
+ ucontext_t *ucp = uc;
+ ucontext_t *transactional_ucp = ucp->uc_link;
+
+ if (ucp_link) {
+ u64 msr = ucp->uc_mcontext.regs->msr;
+ /* May have transactional ucontext! */
+#ifndef __powerpc64__
+ msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
+#endif
+ if (MSR_TM_ACTIVE(msr)) {
+ /* Yes, we crashed during a transaction. Oops. */
+ fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
+ "crashy instruction was at 0x%llx\n",
+ ucp->uc_mcontext.regs->nip,
+ transactional_ucp->uc_mcontext.regs->nip);
+ }
+ }
+
+ fix_the_problem(ucp->dar);
+ }
+
+When in an active transaction that takes a signal, we need to be careful with
+the stack. It's possible that the stack has moved back up after the tbegin.
+The obvious case here is when the tbegin is called inside a function that
+returns before a tend. In this case, the stack is part of the checkpointed
+transactional memory state. If we write over this non transactionally or in
+suspend, we are in trouble because if we get a tm abort, the program counter and
+stack pointer will be back at the tbegin but our in memory stack won't be valid
+anymore.
+
+To avoid this, when taking a signal in an active transaction, we need to use
+the stack pointer from the checkpointed state, rather than the speculated
+state. This ensures that the signal context (written tm suspended) will be
+written below the stack required for the rollback. The transaction is aborted
+because of the treclaim, so any memory written between the tbegin and the
+signal will be rolled back anyway.
+
+For signals taken in non-TM or suspended mode, we use the
+normal/non-checkpointed stack pointer.
+
+
+Failure cause codes used by kernel
+==================================
+
+These are defined in <asm/reg.h>, and distinguish different reasons why the
+kernel aborted a transaction:
+
+ TM_CAUSE_RESCHED Thread was rescheduled.
+ TM_CAUSE_TLBI Software TLB invalid.
+ TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap.
+ TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort
+ transactions for consistency will use this.
+ TM_CAUSE_SIGNAL Signal delivered.
+ TM_CAUSE_MISC Currently unused.
+ TM_CAUSE_ALIGNMENT Alignment fault.
+ TM_CAUSE_EMULATE Emulation that touched memory.
+
+These can be checked by the user program's abort handler as TEXASR[0:7]. If
+bit 7 is set, it indicates that the error is consider persistent. For example
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
+
+GDB
+===
+
+GDB and ptrace are not currently TM-aware. If one stops during a transaction,
+it looks like the transaction has just started (the checkpointed state is
+presented). The transaction cannot then be continued and will take the failure
+handler route. Furthermore, the transactional 2nd register state will be
+inaccessible. GDB can currently be used on programs using TM, but not sensibly
+in parts within transactions.
diff --git a/Documentation/pps/pps.txt b/Documentation/pps/pps.txt
new file mode 100644
index 000000000..c03b1be5e
--- /dev/null
+++ b/Documentation/pps/pps.txt
@@ -0,0 +1,233 @@
+
+ PPS - Pulse Per Second
+ ----------------------
+
+(C) Copyright 2007 Rodolfo Giometti <giometti@enneenne.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+
+
+Overview
+--------
+
+LinuxPPS provides a programming interface (API) to define in the
+system several PPS sources.
+
+PPS means "pulse per second" and a PPS source is just a device which
+provides a high precision signal each second so that an application
+can use it to adjust system clock time.
+
+A PPS source can be connected to a serial port (usually to the Data
+Carrier Detect pin) or to a parallel port (ACK-pin) or to a special
+CPU's GPIOs (this is the common case in embedded systems) but in each
+case when a new pulse arrives the system must apply to it a timestamp
+and record it for userland.
+
+Common use is the combination of the NTPD as userland program, with a
+GPS receiver as PPS source, to obtain a wallclock-time with
+sub-millisecond synchronisation to UTC.
+
+
+RFC considerations
+------------------
+
+While implementing a PPS API as RFC 2783 defines and using an embedded
+CPU GPIO-Pin as physical link to the signal, I encountered a deeper
+problem:
+
+ At startup it needs a file descriptor as argument for the function
+ time_pps_create().
+
+This implies that the source has a /dev/... entry. This assumption is
+ok for the serial and parallel port, where you can do something
+useful besides(!) the gathering of timestamps as it is the central
+task for a PPS-API. But this assumption does not work for a single
+purpose GPIO line. In this case even basic file-related functionality
+(like read() and write()) makes no sense at all and should not be a
+precondition for the use of a PPS-API.
+
+The problem can be simply solved if you consider that a PPS source is
+not always connected with a GPS data source.
+
+So your programs should check if the GPS data source (the serial port
+for instance) is a PPS source too, and if not they should provide the
+possibility to open another device as PPS source.
+
+In LinuxPPS the PPS sources are simply char devices usually mapped
+into files /dev/pps0, /dev/pps1, etc..
+
+
+PPS with USB to serial devices
+------------------------------
+
+It is possible to grab the PPS from an USB to serial device. However,
+you should take into account the latencies and jitter introduced by
+the USB stack. Users has reported clock instability around +-1ms when
+synchronized with PPS through USB. This isn't suited for time server
+synchronization.
+
+If your device doesn't report PPS, you can check that the feature is
+supported by its driver. Most of the time, you only need to add a call
+to usb_serial_handle_dcd_change after checking the DCD status (see
+ch341 and pl2303 examples).
+
+
+Coding example
+--------------
+
+To register a PPS source into the kernel you should define a struct
+pps_source_info_s as follows:
+
+ static struct pps_source_info pps_ktimer_info = {
+ .name = "ktimer",
+ .path = "",
+ .mode = PPS_CAPTUREASSERT | PPS_OFFSETASSERT | \
+ PPS_ECHOASSERT | \
+ PPS_CANWAIT | PPS_TSFMT_TSPEC,
+ .echo = pps_ktimer_echo,
+ .owner = THIS_MODULE,
+ };
+
+and then calling the function pps_register_source() in your
+intialization routine as follows:
+
+ source = pps_register_source(&pps_ktimer_info,
+ PPS_CAPTUREASSERT | PPS_OFFSETASSERT);
+
+The pps_register_source() prototype is:
+
+ int pps_register_source(struct pps_source_info_s *info, int default_params)
+
+where "info" is a pointer to a structure that describes a particular
+PPS source, "default_params" tells the system what the initial default
+parameters for the device should be (it is obvious that these parameters
+must be a subset of ones defined in the struct
+pps_source_info_s which describe the capabilities of the driver).
+
+Once you have registered a new PPS source into the system you can
+signal an assert event (for example in the interrupt handler routine)
+just using:
+
+ pps_event(source, &ts, PPS_CAPTUREASSERT, ptr)
+
+where "ts" is the event's timestamp.
+
+The same function may also run the defined echo function
+(pps_ktimer_echo(), passing to it the "ptr" pointer) if the user
+asked for that... etc..
+
+Please see the file drivers/pps/clients/ktimer.c for example code.
+
+
+SYSFS support
+-------------
+
+If the SYSFS filesystem is enabled in the kernel it provides a new class:
+
+ $ ls /sys/class/pps/
+ pps0/ pps1/ pps2/
+
+Every directory is the ID of a PPS sources defined in the system and
+inside you find several files:
+
+ $ ls /sys/class/pps/pps0/
+ assert clear echo mode name path subsystem@ uevent
+
+Inside each "assert" and "clear" file you can find the timestamp and a
+sequence number:
+
+ $ cat /sys/class/pps/pps0/assert
+ 1170026870.983207967#8
+
+Where before the "#" is the timestamp in seconds; after it is the
+sequence number. Other files are:
+
+* echo: reports if the PPS source has an echo function or not;
+
+* mode: reports available PPS functioning modes;
+
+* name: reports the PPS source's name;
+
+* path: reports the PPS source's device path, that is the device the
+ PPS source is connected to (if it exists).
+
+
+Testing the PPS support
+-----------------------
+
+In order to test the PPS support even without specific hardware you can use
+the ktimer driver (see the client subsection in the PPS configuration menu)
+and the userland tools provided into Documentaion/pps/ directory.
+
+Once you have enabled the compilation of ktimer just modprobe it (if
+not statically compiled):
+
+ # modprobe ktimer
+
+and the run ppstest as follow:
+
+ $ ./ppstest /dev/pps0
+ trying PPS source "/dev/pps1"
+ found PPS source "/dev/pps1"
+ ok, found 1 source(s), now start fetching data...
+ source 0 - assert 1186592699.388832443, sequence: 364 - clear 0.000000000, sequence: 0
+ source 0 - assert 1186592700.388931295, sequence: 365 - clear 0.000000000, sequence: 0
+ source 0 - assert 1186592701.389032765, sequence: 366 - clear 0.000000000, sequence: 0
+
+Please, note that to compile userland programs you need the file timepps.h
+(see Documentation/pps/).
+
+
+Generators
+----------
+
+Sometimes one needs to be able not only to catch PPS signals but to produce
+them also. For example, running a distributed simulation, which requires
+computers' clock to be synchronized very tightly. One way to do this is to
+invent some complicated hardware solutions but it may be neither necessary
+nor affordable. The cheap way is to load a PPS generator on one of the
+computers (master) and PPS clients on others (slaves), and use very simple
+cables to deliver signals using parallel ports, for example.
+
+Parallel port cable pinout:
+pin name master slave
+1 STROBE *------ *
+2 D0 * | *
+3 D1 * | *
+4 D2 * | *
+5 D3 * | *
+6 D4 * | *
+7 D5 * | *
+8 D6 * | *
+9 D7 * | *
+10 ACK * ------*
+11 BUSY * *
+12 PE * *
+13 SEL * *
+14 AUTOFD * *
+15 ERROR * *
+16 INIT * *
+17 SELIN * *
+18-25 GND *-----------*
+
+Please note that parallel port interrupt occurs only on high->low transition,
+so it is used for PPS assert edge. PPS clear edge can be determined only
+using polling in the interrupt handler which actually can be done way more
+precisely because interrupt handling delays can be quite big and random. So
+current parport PPS generator implementation (pps_gen_parport module) is
+geared towards using the clear edge for time synchronization.
+
+Clear edge polling is done with disabled interrupts so it's better to select
+delay between assert and clear edge as small as possible to reduce system
+latencies. But if it is too small slave won't be able to capture clear edge
+transition. The default of 30us should be good enough in most situations.
+The delay can be selected using 'delay' pps_gen_parport module parameter.
diff --git a/Documentation/prctl/.gitignore b/Documentation/prctl/.gitignore
new file mode 100644
index 000000000..0b5c27447
--- /dev/null
+++ b/Documentation/prctl/.gitignore
@@ -0,0 +1,3 @@
+disable-tsc-ctxt-sw-stress-test
+disable-tsc-on-off-stress-test
+disable-tsc-test
diff --git a/Documentation/prctl/Makefile b/Documentation/prctl/Makefile
new file mode 100644
index 000000000..2948b7b12
--- /dev/null
+++ b/Documentation/prctl/Makefile
@@ -0,0 +1,8 @@
+# List of programs to build
+hostprogs-$(CONFIG_X86) := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test disable-tsc-test
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_disable-tsc-ctxt-sw-stress-test.o += -I$(objtree)/usr/include
+HOSTCFLAGS_disable-tsc-on-off-stress-test.o += -I$(objtree)/usr/include
+HOSTCFLAGS_disable-tsc-test.o += -I$(objtree)/usr/include
diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644
index 000000000..81fdd425a
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,97 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * at context switches
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_expect(int sig)
+{
+ /* */
+}
+
+static void segvtask(void)
+{
+ if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ signal(SIGSEGV, sigsegv_expect);
+ alarm(10);
+ rdtsc();
+ fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+ exit(0);
+}
+
+
+static void sigsegv_fail(int sig)
+{
+ fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+ exit(0);
+}
+
+static void rdtsctask(void)
+{
+ if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ signal(SIGSEGV, sigsegv_fail);
+ alarm(10);
+ for(;;) rdtsc();
+}
+
+
+int main(int argc, char **argv)
+{
+ int n_tasks = 100, i;
+
+ fprintf(stderr, "[No further output means we're allright]\n");
+
+ for (i=0; i<n_tasks; i++)
+ if (fork() == 0)
+ {
+ if (i & 1)
+ segvtask();
+ else
+ rdtsctask();
+ }
+
+ for (i=0; i<n_tasks; i++)
+ wait(NULL);
+
+ exit(0);
+}
+
diff --git a/Documentation/prctl/disable-tsc-on-off-stress-test.c b/Documentation/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644
index 000000000..4d83a2762
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,96 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * when set with prctl()
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+/* snippet from wikipedia :-) */
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+int should_segv = 0;
+
+static void sigsegv_cb(int sig)
+{
+ if (!should_segv)
+ {
+ fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+ exit(0);
+ }
+ if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ should_segv = 0;
+
+ rdtsc();
+}
+
+static void task(void)
+{
+ signal(SIGSEGV, sigsegv_cb);
+ alarm(10);
+ for(;;)
+ {
+ rdtsc();
+ if (should_segv)
+ {
+ fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+ exit(0);
+ }
+ if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ should_segv = 1;
+ }
+}
+
+
+int main(int argc, char **argv)
+{
+ int n_tasks = 100, i;
+
+ fprintf(stderr, "[No further output means we're allright]\n");
+
+ for (i=0; i<n_tasks; i++)
+ if (fork() == 0)
+ task();
+
+ for (i=0; i<n_tasks; i++)
+ wait(NULL);
+
+ exit(0);
+}
+
diff --git a/Documentation/prctl/disable-tsc-test.c b/Documentation/prctl/disable-tsc-test.c
new file mode 100644
index 000000000..2541e65cb
--- /dev/null
+++ b/Documentation/prctl/disable-tsc-test.c
@@ -0,0 +1,95 @@
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+const char *tsc_names[] =
+{
+ [0] = "[not set]",
+ [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
+ [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
+};
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_cb(int sig)
+{
+ int tsc_val = 0;
+
+ printf("[ SIG_SEGV ]\n");
+ printf("prctl(PR_GET_TSC, &tsc_val); ");
+ fflush(stdout);
+
+ if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+ perror("prctl");
+
+ printf("tsc_val == %s\n", tsc_names[tsc_val]);
+ printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+ fflush(stdout);
+ if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == ");
+}
+
+int main(int argc, char **argv)
+{
+ int tsc_val = 0;
+
+ signal(SIGSEGV, sigsegv_cb);
+
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_GET_TSC, &tsc_val); ");
+ fflush(stdout);
+
+ if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+ perror("prctl");
+
+ printf("tsc_val == %s\n", tsc_names[tsc_val]);
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+ fflush(stdout);
+
+ if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
+ fflush(stdout);
+
+ if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == ");
+ fflush(stdout);
+ printf("%llu\n", (unsigned long long)rdtsc());
+ fflush(stdout);
+
+ exit(EXIT_SUCCESS);
+}
+
diff --git a/Documentation/prctl/no_new_privs.txt b/Documentation/prctl/no_new_privs.txt
new file mode 100644
index 000000000..f7be84fba
--- /dev/null
+++ b/Documentation/prctl/no_new_privs.txt
@@ -0,0 +1,57 @@
+The execve system call can grant a newly-started program privileges that
+its parent did not have. The most obvious examples are setuid/setgid
+programs and file capabilities. To prevent the parent program from
+gaining these privileges as well, the kernel and user code must be
+careful to prevent the parent from doing anything that could subvert the
+child. For example:
+
+ - The dynamic loader handles LD_* environment variables differently if
+ a program is setuid.
+
+ - chroot is disallowed to unprivileged processes, since it would allow
+ /etc/passwd to be replaced from the point of view of a process that
+ inherited chroot.
+
+ - The exec code has special handling for ptrace.
+
+These are all ad-hoc fixes. The no_new_privs bit (since Linux 3.5) is a
+new, generic mechanism to make it safe for a process to modify its
+execution environment in a manner that persists across execve. Any task
+can set no_new_privs. Once the bit is set, it is inherited across fork,
+clone, and execve and cannot be unset. With no_new_privs set, execve
+promises not to grant the privilege to do anything that could not have
+been done without the execve call. For example, the setuid and setgid
+bits will no longer change the uid or gid; file capabilities will not
+add to the permitted set, and LSMs will not relax constraints after
+execve.
+
+To set no_new_privs, use prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0).
+
+Be careful, though: LSMs might also not tighten constraints on exec
+in no_new_privs mode. (This means that setting up a general-purpose
+service launcher to set no_new_privs before execing daemons may
+interfere with LSM-based sandboxing.)
+
+Note that no_new_privs does not prevent privilege changes that do not
+involve execve. An appropriately privileged task can still call
+setuid(2) and receive SCM_RIGHTS datagrams.
+
+There are two main use cases for no_new_privs so far:
+
+ - Filters installed for the seccomp mode 2 sandbox persist across
+ execve and can change the behavior of newly-executed programs.
+ Unprivileged users are therefore only allowed to install such filters
+ if no_new_privs is set.
+
+ - By itself, no_new_privs can be used to reduce the attack surface
+ available to an unprivileged user. If everything running with a
+ given uid has no_new_privs set, then that uid will be unable to
+ escalate its privileges by directly attacking setuid, setgid, and
+ fcap-using binaries; it will need to compromise something without the
+ no_new_privs bit set first.
+
+In the future, other potentially dangerous kernel features could become
+available to unprivileged tasks if no_new_privs is set. In principle,
+several options to unshare(2) and clone(2) would be safe when
+no_new_privs is set, and no_new_privs + chroot is considerable less
+dangerous than chroot by itself.
diff --git a/Documentation/prctl/seccomp_filter.txt b/Documentation/prctl/seccomp_filter.txt
new file mode 100644
index 000000000..1e469ef75
--- /dev/null
+++ b/Documentation/prctl/seccomp_filter.txt
@@ -0,0 +1,225 @@
+ SECure COMPuting with filters
+ =============================
+
+Introduction
+------------
+
+A large number of system calls are exposed to every userland process
+with many of them going unused for the entire lifetime of the process.
+As system calls change and mature, bugs are found and eradicated. A
+certain subset of userland applications benefit by having a reduced set
+of available system calls. The resulting set reduces the total kernel
+surface exposed to the application. System call filtering is meant for
+use with those applications.
+
+Seccomp filtering provides a means for a process to specify a filter for
+incoming system calls. The filter is expressed as a Berkeley Packet
+Filter (BPF) program, as with socket filters, except that the data
+operated on is related to the system call being made: system call
+number and the system call arguments. This allows for expressive
+filtering of system calls using a filter program language with a long
+history of being exposed to userland and a straightforward data set.
+
+Additionally, BPF makes it impossible for users of seccomp to fall prey
+to time-of-check-time-of-use (TOCTOU) attacks that are common in system
+call interposition frameworks. BPF programs may not dereference
+pointers which constrains all filters to solely evaluating the system
+call arguments directly.
+
+What it isn't
+-------------
+
+System call filtering isn't a sandbox. It provides a clearly defined
+mechanism for minimizing the exposed kernel surface. It is meant to be
+a tool for sandbox developers to use. Beyond that, policy for logical
+behavior and information flow should be managed with a combination of
+other system hardening techniques and, potentially, an LSM of your
+choosing. Expressive, dynamic filters provide further options down this
+path (avoiding pathological sizes or selecting which of the multiplexed
+system calls in socketcall() is allowed, for instance) which could be
+construed, incorrectly, as a more complete sandboxing solution.
+
+Usage
+-----
+
+An additional seccomp mode is added and is enabled using the same
+prctl(2) call as the strict seccomp. If the architecture has
+CONFIG_HAVE_ARCH_SECCOMP_FILTER, then filters may be added as below:
+
+PR_SET_SECCOMP:
+ Now takes an additional argument which specifies a new filter
+ using a BPF program.
+ The BPF program will be executed over struct seccomp_data
+ reflecting the system call number, arguments, and other
+ metadata. The BPF program must then return one of the
+ acceptable values to inform the kernel which action should be
+ taken.
+
+ Usage:
+ prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, prog);
+
+ The 'prog' argument is a pointer to a struct sock_fprog which
+ will contain the filter program. If the program is invalid, the
+ call will return -1 and set errno to EINVAL.
+
+ If fork/clone and execve are allowed by @prog, any child
+ processes will be constrained to the same filters and system
+ call ABI as the parent.
+
+ Prior to use, the task must call prctl(PR_SET_NO_NEW_PRIVS, 1) or
+ run with CAP_SYS_ADMIN privileges in its namespace. If these are not
+ true, -EACCES will be returned. This requirement ensures that filter
+ programs cannot be applied to child processes with greater privileges
+ than the task that installed them.
+
+ Additionally, if prctl(2) is allowed by the attached filter,
+ additional filters may be layered on which will increase evaluation
+ time, but allow for further decreasing the attack surface during
+ execution of a process.
+
+The above call returns 0 on success and non-zero on error.
+
+Return values
+-------------
+A seccomp filter may return any of the following values. If multiple
+filters exist, the return value for the evaluation of a given system
+call will always use the highest precedent value. (For example,
+SECCOMP_RET_KILL will always take precedence.)
+
+In precedence order, they are:
+
+SECCOMP_RET_KILL:
+ Results in the task exiting immediately without executing the
+ system call. The exit status of the task (status & 0x7f) will
+ be SIGSYS, not SIGKILL.
+
+SECCOMP_RET_TRAP:
+ Results in the kernel sending a SIGSYS signal to the triggering
+ task without executing the system call. siginfo->si_call_addr
+ will show the address of the system call instruction, and
+ siginfo->si_syscall and siginfo->si_arch will indicate which
+ syscall was attempted. The program counter will be as though
+ the syscall happened (i.e. it will not point to the syscall
+ instruction). The return value register will contain an arch-
+ dependent value -- if resuming execution, set it to something
+ sensible. (The architecture dependency is because replacing
+ it with -ENOSYS could overwrite some useful information.)
+
+ The SECCOMP_RET_DATA portion of the return value will be passed
+ as si_errno.
+
+ SIGSYS triggered by seccomp will have a si_code of SYS_SECCOMP.
+
+SECCOMP_RET_ERRNO:
+ Results in the lower 16-bits of the return value being passed
+ to userland as the errno without executing the system call.
+
+SECCOMP_RET_TRACE:
+ When returned, this value will cause the kernel to attempt to
+ notify a ptrace()-based tracer prior to executing the system
+ call. If there is no tracer present, -ENOSYS is returned to
+ userland and the system call is not executed.
+
+ A tracer will be notified if it requests PTRACE_O_TRACESECCOMP
+ using ptrace(PTRACE_SETOPTIONS). The tracer will be notified
+ of a PTRACE_EVENT_SECCOMP and the SECCOMP_RET_DATA portion of
+ the BPF program return value will be available to the tracer
+ via PTRACE_GETEVENTMSG.
+
+ The tracer can skip the system call by changing the syscall number
+ to -1. Alternatively, the tracer can change the system call
+ requested by changing the system call to a valid syscall number. If
+ the tracer asks to skip the system call, then the system call will
+ appear to return the value that the tracer puts in the return value
+ register.
+
+ The seccomp check will not be run again after the tracer is
+ notified. (This means that seccomp-based sandboxes MUST NOT
+ allow use of ptrace, even of other sandboxed processes, without
+ extreme care; ptracers can use this mechanism to escape.)
+
+SECCOMP_RET_ALLOW:
+ Results in the system call being executed.
+
+If multiple filters exist, the return value for the evaluation of a
+given system call will always use the highest precedent value.
+
+Precedence is only determined using the SECCOMP_RET_ACTION mask. When
+multiple filters return values of the same precedence, only the
+SECCOMP_RET_DATA from the most recently installed filter will be
+returned.
+
+Pitfalls
+--------
+
+The biggest pitfall to avoid during use is filtering on system call
+number without checking the architecture value. Why? On any
+architecture that supports multiple system call invocation conventions,
+the system call numbers may vary based on the specific invocation. If
+the numbers in the different calling conventions overlap, then checks in
+the filters may be abused. Always check the arch value!
+
+Example
+-------
+
+The samples/seccomp/ directory contains both an x86-specific example
+and a more generic example of a higher level macro interface for BPF
+program generation.
+
+
+
+Adding architecture support
+-----------------------
+
+See arch/Kconfig for the authoritative requirements. In general, if an
+architecture supports both ptrace_event and seccomp, it will be able to
+support seccomp filter with minor fixup: SIGSYS support and seccomp return
+value checking. Then it must just add CONFIG_HAVE_ARCH_SECCOMP_FILTER
+to its arch-specific Kconfig.
+
+
+
+Caveats
+-------
+
+The vDSO can cause some system calls to run entirely in userspace,
+leading to surprises when you run programs on different machines that
+fall back to real syscalls. To minimize these surprises on x86, make
+sure you test with
+/sys/devices/system/clocksource/clocksource0/current_clocksource set to
+something like acpi_pm.
+
+On x86-64, vsyscall emulation is enabled by default. (vsyscalls are
+legacy variants on vDSO calls.) Currently, emulated vsyscalls will honor seccomp, with a few oddities:
+
+- A return value of SECCOMP_RET_TRAP will set a si_call_addr pointing to
+ the vsyscall entry for the given call and not the address after the
+ 'syscall' instruction. Any code which wants to restart the call
+ should be aware that (a) a ret instruction has been emulated and (b)
+ trying to resume the syscall will again trigger the standard vsyscall
+ emulation security checks, making resuming the syscall mostly
+ pointless.
+
+- A return value of SECCOMP_RET_TRACE will signal the tracer as usual,
+ but the syscall may not be changed to another system call using the
+ orig_rax register. It may only be changed to -1 order to skip the
+ currently emulated call. Any other change MAY terminate the process.
+ The rip value seen by the tracer will be the syscall entry address;
+ this is different from normal behavior. The tracer MUST NOT modify
+ rip or rsp. (Do not rely on other changes terminating the process.
+ They might work. For example, on some kernels, choosing a syscall
+ that only exists in future kernels will be correctly emulated (by
+ returning -ENOSYS).
+
+To detect this quirky behavior, check for addr & ~0x0C00 ==
+0xFFFFFFFFFF600000. (For SECCOMP_RET_TRACE, use rip. For
+SECCOMP_RET_TRAP, use siginfo->si_call_addr.) Do not check any other
+condition: future kernels may improve vsyscall emulation and current
+kernels in vsyscall=native mode will behave differently, but the
+instructions at 0xF...F600{0,4,8,C}00 will not be system calls in these
+cases.
+
+Note that modern systems are unlikely to use vsyscalls at all -- they
+are a legacy feature and they are considerably slower than standard
+syscalls. New code will use the vDSO, and vDSO-issued system calls
+are indistinguishable from normal system calls.
diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt
new file mode 100644
index 000000000..57883ca24
--- /dev/null
+++ b/Documentation/preempt-locking.txt
@@ -0,0 +1,135 @@
+ Proper Locking Under a Preemptible Kernel:
+ Keeping Kernel Code Preempt-Safe
+ Robert Love <rml@tech9.net>
+ Last Updated: 28 Aug 2002
+
+
+INTRODUCTION
+
+
+A preemptible kernel creates new locking issues. The issues are the same as
+those under SMP: concurrency and reentrancy. Thankfully, the Linux preemptible
+kernel model leverages existing SMP locking mechanisms. Thus, the kernel
+requires explicit additional locking for very few additional situations.
+
+This document is for all kernel hackers. Developing code in the kernel
+requires protecting these situations.
+
+
+RULE #1: Per-CPU data structures need explicit protection
+
+
+Two similar problems arise. An example code snippet:
+
+ struct this_needs_locking tux[NR_CPUS];
+ tux[smp_processor_id()] = some_value;
+ /* task is preempted here... */
+ something = tux[smp_processor_id()];
+
+First, since the data is per-CPU, it may not have explicit SMP locking, but
+require it otherwise. Second, when a preempted task is finally rescheduled,
+the previous value of smp_processor_id may not equal the current. You must
+protect these situations by disabling preemption around them.
+
+You can also use put_cpu() and get_cpu(), which will disable preemption.
+
+
+RULE #2: CPU state must be protected.
+
+
+Under preemption, the state of the CPU must be protected. This is arch-
+dependent, but includes CPU structures and state not preserved over a context
+switch. For example, on x86, entering and exiting FPU mode is now a critical
+section that must occur while preemption is disabled. Think what would happen
+if the kernel is executing a floating-point instruction and is then preempted.
+Remember, the kernel does not save FPU state except for user tasks. Therefore,
+upon preemption, the FPU registers will be sold to the lowest bidder. Thus,
+preemption must be disabled around such regions.
+
+Note, some FPU functions are already explicitly preempt safe. For example,
+kernel_fpu_begin and kernel_fpu_end will disable and enable preemption.
+However, math_state_restore must be called with preemption disabled.
+
+
+RULE #3: Lock acquire and release must be performed by same task
+
+
+A lock acquired in one task must be released by the same task. This
+means you can't do oddball things like acquire a lock and go off to
+play while another task releases it. If you want to do something
+like this, acquire and release the task in the same code path and
+have the caller wait on an event by the other task.
+
+
+SOLUTION
+
+
+Data protection under preemption is achieved by disabling preemption for the
+duration of the critical region.
+
+preempt_enable() decrement the preempt counter
+preempt_disable() increment the preempt counter
+preempt_enable_no_resched() decrement, but do not immediately preempt
+preempt_check_resched() if needed, reschedule
+preempt_count() return the preempt counter
+
+The functions are nestable. In other words, you can call preempt_disable
+n-times in a code path, and preemption will not be reenabled until the n-th
+call to preempt_enable. The preempt statements define to nothing if
+preemption is not enabled.
+
+Note that you do not need to explicitly prevent preemption if you are holding
+any locks or interrupts are disabled, since preemption is implicitly disabled
+in those cases.
+
+But keep in mind that 'irqs disabled' is a fundamentally unsafe way of
+disabling preemption - any spin_unlock() decreasing the preemption count
+to 0 might trigger a reschedule. A simple printk() might trigger a reschedule.
+So use this implicit preemption-disabling property only if you know that the
+affected codepath does not do any of this. Best policy is to use this only for
+small, atomic code that you wrote and which calls no complex functions.
+
+Example:
+
+ cpucache_t *cc; /* this is per-CPU */
+ preempt_disable();
+ cc = cc_data(searchp);
+ if (cc && cc->avail) {
+ __free_block(searchp, cc_entry(cc), cc->avail);
+ cc->avail = 0;
+ }
+ preempt_enable();
+ return 0;
+
+Notice how the preemption statements must encompass every reference of the
+critical variables. Another example:
+
+ int buf[NR_CPUS];
+ set_cpu_val(buf);
+ if (buf[smp_processor_id()] == -1) printf(KERN_INFO "wee!\n");
+ spin_lock(&buf_lock);
+ /* ... */
+
+This code is not preempt-safe, but see how easily we can fix it by simply
+moving the spin_lock up two lines.
+
+
+PREVENTING PREEMPTION USING INTERRUPT DISABLING
+
+
+It is possible to prevent a preemption event using local_irq_disable and
+local_irq_save. Note, when doing so, you must be very careful to not cause
+an event that would set need_resched and result in a preemption check. When
+in doubt, rely on locking or explicit preemption disabling.
+
+Note in 2.5 interrupt disabling is now only per-CPU (e.g. local).
+
+An additional concern is proper usage of local_irq_disable and local_irq_save.
+These may be used to protect from preemption, however, on exit, if preemption
+may be enabled, a test to see if preemption is required should be done. If
+these are called from the spin_lock and read/write lock macros, the right thing
+is done. They may also be called within a spin-lock protected region, however,
+if they are ever called outside of this context, a test for preemption should
+be made. Do note that calls from interrupt context or bottom half/ tasklets
+are also protected by preemption locks and so may use the versions which do
+not check preemption.
diff --git a/Documentation/printk-formats.txt b/Documentation/printk-formats.txt
new file mode 100644
index 000000000..2216eb187
--- /dev/null
+++ b/Documentation/printk-formats.txt
@@ -0,0 +1,291 @@
+If variable is of Type, use printk format specifier:
+---------------------------------------------------------
+ int %d or %x
+ unsigned int %u or %x
+ long %ld or %lx
+ unsigned long %lu or %lx
+ long long %lld or %llx
+ unsigned long long %llu or %llx
+ size_t %zu or %zx
+ ssize_t %zd or %zx
+ s32 %d or %x
+ u32 %u or %x
+ s64 %lld or %llx
+ u64 %llu or %llx
+
+If <type> is dependent on a config option for its size (e.g., sector_t,
+blkcnt_t) or is architecture-dependent for its size (e.g., tcflag_t), use a
+format specifier of its largest possible type and explicitly cast to it.
+Example:
+
+ printk("test: sector number/total blocks: %llu/%llu\n",
+ (unsigned long long)sector, (unsigned long long)blockcount);
+
+Reminder: sizeof() result is of type size_t.
+
+
+Raw pointer value SHOULD be printed with %p. The kernel supports
+the following extended format specifiers for pointer types:
+
+Symbols/Function Pointers:
+
+ %pF versatile_init+0x0/0x110
+ %pf versatile_init
+ %pS versatile_init+0x0/0x110
+ %pSR versatile_init+0x9/0x110
+ (with __builtin_extract_return_addr() translation)
+ %ps versatile_init
+ %pB prev_fn_of_versatile_init+0x88/0x88
+
+ For printing symbols and function pointers. The 'S' and 's' specifiers
+ result in the symbol name with ('S') or without ('s') offsets. Where
+ this is used on a kernel without KALLSYMS - the symbol address is
+ printed instead.
+
+ The 'B' specifier results in the symbol name with offsets and should be
+ used when printing stack backtraces. The specifier takes into
+ consideration the effect of compiler optimisations which may occur
+ when tail-call's are used and marked with the noreturn GCC attribute.
+
+ On ia64, ppc64 and parisc64 architectures function pointers are
+ actually function descriptors which must first be resolved. The 'F' and
+ 'f' specifiers perform this resolution and then provide the same
+ functionality as the 'S' and 's' specifiers.
+
+Kernel Pointers:
+
+ %pK 0x01234567 or 0x0123456789abcdef
+
+ For printing kernel pointers which should be hidden from unprivileged
+ users. The behaviour of %pK depends on the kptr_restrict sysctl - see
+ Documentation/sysctl/kernel.txt for more details.
+
+Struct Resources:
+
+ %pr [mem 0x60000000-0x6fffffff flags 0x2200] or
+ [mem 0x0000000060000000-0x000000006fffffff flags 0x2200]
+ %pR [mem 0x60000000-0x6fffffff pref] or
+ [mem 0x0000000060000000-0x000000006fffffff pref]
+
+ For printing struct resources. The 'R' and 'r' specifiers result in a
+ printed resource with ('R') or without ('r') a decoded flags member.
+ Passed by reference.
+
+Physical addresses types phys_addr_t:
+
+ %pa[p] 0x01234567 or 0x0123456789abcdef
+
+ For printing a phys_addr_t type (and its derivatives, such as
+ resource_size_t) which can vary based on build options, regardless of
+ the width of the CPU data path. Passed by reference.
+
+DMA addresses types dma_addr_t:
+
+ %pad 0x01234567 or 0x0123456789abcdef
+
+ For printing a dma_addr_t type which can vary based on build options,
+ regardless of the width of the CPU data path. Passed by reference.
+
+Raw buffer as an escaped string:
+
+ %*pE[achnops]
+
+ For printing raw buffer as an escaped string. For the following buffer
+
+ 1b 62 20 5c 43 07 22 90 0d 5d
+
+ few examples show how the conversion would be done (the result string
+ without surrounding quotes):
+
+ %*pE "\eb \C\a"\220\r]"
+ %*pEhp "\x1bb \C\x07"\x90\x0d]"
+ %*pEa "\e\142\040\\\103\a\042\220\r\135"
+
+ The conversion rules are applied according to an optional combination
+ of flags (see string_escape_mem() kernel documentation for the
+ details):
+ a - ESCAPE_ANY
+ c - ESCAPE_SPECIAL
+ h - ESCAPE_HEX
+ n - ESCAPE_NULL
+ o - ESCAPE_OCTAL
+ p - ESCAPE_NP
+ s - ESCAPE_SPACE
+ By default ESCAPE_ANY_NP is used.
+
+ ESCAPE_ANY_NP is the sane choice for many cases, in particularly for
+ printing SSIDs.
+
+ If field width is omitted the 1 byte only will be escaped.
+
+Raw buffer as a hex string:
+ %*ph 00 01 02 ... 3f
+ %*phC 00:01:02: ... :3f
+ %*phD 00-01-02- ... -3f
+ %*phN 000102 ... 3f
+
+ For printing a small buffers (up to 64 bytes long) as a hex string with
+ certain separator. For the larger buffers consider to use
+ print_hex_dump().
+
+MAC/FDDI addresses:
+
+ %pM 00:01:02:03:04:05
+ %pMR 05:04:03:02:01:00
+ %pMF 00-01-02-03-04-05
+ %pm 000102030405
+ %pmR 050403020100
+
+ For printing 6-byte MAC/FDDI addresses in hex notation. The 'M' and 'm'
+ specifiers result in a printed address with ('M') or without ('m') byte
+ separators. The default byte separator is the colon (':').
+
+ Where FDDI addresses are concerned the 'F' specifier can be used after
+ the 'M' specifier to use dash ('-') separators instead of the default
+ separator.
+
+ For Bluetooth addresses the 'R' specifier shall be used after the 'M'
+ specifier to use reversed byte order suitable for visual interpretation
+ of Bluetooth addresses which are in the little endian order.
+
+ Passed by reference.
+
+IPv4 addresses:
+
+ %pI4 1.2.3.4
+ %pi4 001.002.003.004
+ %p[Ii]4[hnbl]
+
+ For printing IPv4 dot-separated decimal addresses. The 'I4' and 'i4'
+ specifiers result in a printed address with ('i4') or without ('I4')
+ leading zeros.
+
+ The additional 'h', 'n', 'b', and 'l' specifiers are used to specify
+ host, network, big or little endian order addresses respectively. Where
+ no specifier is provided the default network/big endian order is used.
+
+ Passed by reference.
+
+IPv6 addresses:
+
+ %pI6 0001:0002:0003:0004:0005:0006:0007:0008
+ %pi6 00010002000300040005000600070008
+ %pI6c 1:2:3:4:5:6:7:8
+
+ For printing IPv6 network-order 16-bit hex addresses. The 'I6' and 'i6'
+ specifiers result in a printed address with ('I6') or without ('i6')
+ colon-separators. Leading zeros are always used.
+
+ The additional 'c' specifier can be used with the 'I' specifier to
+ print a compressed IPv6 address as described by
+ http://tools.ietf.org/html/rfc5952
+
+ Passed by reference.
+
+IPv4/IPv6 addresses (generic, with port, flowinfo, scope):
+
+ %pIS 1.2.3.4 or 0001:0002:0003:0004:0005:0006:0007:0008
+ %piS 001.002.003.004 or 00010002000300040005000600070008
+ %pISc 1.2.3.4 or 1:2:3:4:5:6:7:8
+ %pISpc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345
+ %p[Ii]S[pfschnbl]
+
+ For printing an IP address without the need to distinguish whether it's
+ of type AF_INET or AF_INET6, a pointer to a valid 'struct sockaddr',
+ specified through 'IS' or 'iS', can be passed to this format specifier.
+
+ The additional 'p', 'f', and 's' specifiers are used to specify port
+ (IPv4, IPv6), flowinfo (IPv6) and scope (IPv6). Ports have a ':' prefix,
+ flowinfo a '/' and scope a '%', each followed by the actual value.
+
+ In case of an IPv6 address the compressed IPv6 address as described by
+ http://tools.ietf.org/html/rfc5952 is being used if the additional
+ specifier 'c' is given. The IPv6 address is surrounded by '[', ']' in
+ case of additional specifiers 'p', 'f' or 's' as suggested by
+ https://tools.ietf.org/html/draft-ietf-6man-text-addr-representation-07
+
+ In case of IPv4 addresses, the additional 'h', 'n', 'b', and 'l'
+ specifiers can be used as well and are ignored in case of an IPv6
+ address.
+
+ Passed by reference.
+
+ Further examples:
+
+ %pISfc 1.2.3.4 or [1:2:3:4:5:6:7:8]/123456789
+ %pISsc 1.2.3.4 or [1:2:3:4:5:6:7:8]%1234567890
+ %pISpfc 1.2.3.4:12345 or [1:2:3:4:5:6:7:8]:12345/123456789
+
+UUID/GUID addresses:
+
+ %pUb 00010203-0405-0607-0809-0a0b0c0d0e0f
+ %pUB 00010203-0405-0607-0809-0A0B0C0D0E0F
+ %pUl 03020100-0504-0706-0809-0a0b0c0e0e0f
+ %pUL 03020100-0504-0706-0809-0A0B0C0E0E0F
+
+ For printing 16-byte UUID/GUIDs addresses. The additional 'l', 'L',
+ 'b' and 'B' specifiers are used to specify a little endian order in
+ lower ('l') or upper case ('L') hex characters - and big endian order
+ in lower ('b') or upper case ('B') hex characters.
+
+ Where no additional specifiers are used the default big endian
+ order with lower case hex characters will be printed.
+
+ Passed by reference.
+
+dentry names:
+ %pd{,2,3,4}
+ %pD{,2,3,4}
+
+ For printing dentry name; if we race with d_move(), the name might be
+ a mix of old and new ones, but it won't oops. %pd dentry is a safer
+ equivalent of %s dentry->d_name.name we used to use, %pd<n> prints
+ n last components. %pD does the same thing for struct file.
+
+ Passed by reference.
+
+struct va_format:
+
+ %pV
+
+ For printing struct va_format structures. These contain a format string
+ and va_list as follows:
+
+ struct va_format {
+ const char *fmt;
+ va_list *va;
+ };
+
+ Do not use this feature without some mechanism to verify the
+ correctness of the format string and va_list arguments.
+
+ Passed by reference.
+
+struct clk:
+
+ %pC pll1
+ %pCn pll1
+ %pCr 1560000000
+
+ For printing struct clk structures. '%pC' and '%pCn' print the name
+ (Common Clock Framework) or address (legacy clock framework) of the
+ structure; '%pCr' prints the current clock rate.
+
+ Passed by reference.
+
+bitmap and its derivatives such as cpumask and nodemask:
+
+ %*pb 0779
+ %*pbl 0,3-6,8-10
+
+ For printing bitmap and its derivatives such as cpumask and nodemask,
+ %*pb output the bitmap with field width as the number of bits and %*pbl
+ output the bitmap as range list with field width as the number of bits.
+
+ Passed by reference.
+
+Thank you for your cooperation and attention.
+
+
+By Randy Dunlap <rdunlap@infradead.org> and
+Andrew Murray <amurray@mpc-data.co.uk>
diff --git a/Documentation/pti/pti_intel_mid.txt b/Documentation/pti/pti_intel_mid.txt
new file mode 100644
index 000000000..e7a5b6d1f
--- /dev/null
+++ b/Documentation/pti/pti_intel_mid.txt
@@ -0,0 +1,99 @@
+The Intel MID PTI project is HW implemented in Intel Atom
+system-on-a-chip designs based on the Parallel Trace
+Interface for MIPI P1149.7 cJTAG standard. The kernel solution
+for this platform involves the following files:
+
+./include/linux/pti.h
+./drivers/.../n_tracesink.h
+./drivers/.../n_tracerouter.c
+./drivers/.../n_tracesink.c
+./drivers/.../pti.c
+
+pti.c is the driver that enables various debugging features
+popular on platforms from certain mobile manufacturers.
+n_tracerouter.c and n_tracesink.c allow extra system information to
+be collected and routed to the pti driver, such as trace
+debugging data from a modem. Although n_tracerouter
+and n_tracesink are a part of the complete PTI solution,
+these two line disciplines can work separately from
+pti.c and route any data stream from one /dev/tty node
+to another /dev/tty node via kernel-space. This provides
+a stable, reliable connection that will not break unless
+the user-space application shuts down (plus avoids
+kernel->user->kernel context switch overheads of routing
+data).
+
+An example debugging usage for this driver system:
+ *Hook /dev/ttyPTI0 to syslogd. Opening this port will also start
+ a console device to further capture debugging messages to PTI.
+ *Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
+ This is where n_tracerouter and n_tracesink are used.
+ *Hook /dev/pti to a user-level debugging application for writing
+ to PTI HW.
+ *Use mipi_* Kernel Driver API in other device drivers for
+ debugging to PTI by first requesting a PTI write address via
+ mipi_request_masterchannel(1).
+
+Below is example pseudo-code on how a 'privileged' application
+can hook up n_tracerouter and n_tracesink to any tty on
+a system. 'Privileged' means the application has enough
+privileges to successfully manipulate the ldisc drivers
+but is not just blindly executing as 'root'. Keep in mind
+the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
+and n_tracesink line discpline drivers but is a generic
+operation for a program to use a line discpline driver
+on a tty port other than the default n_tty.
+
+/////////// To hook up n_tracerouter and n_tracesink /////////
+
+// Note that n_tracerouter depends on n_tracesink.
+#include <errno.h>
+#define ONE_TTY "/dev/ttyOne"
+#define TWO_TTY "/dev/ttyTwo"
+
+// needed global to hand onto ldisc connection
+static int g_fd_source = -1;
+static int g_fd_sink = -1;
+
+// these two vars used to grab LDISC values from loaded ldisc drivers
+// in OS. Look at /proc/tty/ldiscs to get the right numbers from
+// the ldiscs loaded in the system.
+int source_ldisc_num, sink_ldisc_num = -1;
+int retval;
+
+g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
+g_fd_sink = open(TWO_TTY, O_RDWR); // must be R/W
+
+if (g_fd_source <= 0) || (g_fd_sink <= 0) {
+ // doubt you'll want to use these exact error lines of code
+ printf("Error on open(). errno: %d\n",errno);
+ return errno;
+}
+
+retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
+if (retval < 0) {
+ printf("Error on ioctl(). errno: %d\n", errno);
+ return errno;
+}
+
+retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
+if (retval < 0) {
+ printf("Error on ioctl(). errno: %d\n", errno);
+ return errno;
+}
+
+/////////// To disconnect n_tracerouter and n_tracesink ////////
+
+// First make sure data through the ldiscs has stopped.
+
+// Second, disconnect ldiscs. This provides a
+// little cleaner shutdown on tty stack.
+sink_ldisc_num = 0;
+source_ldisc_num = 0;
+ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
+ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
+
+// Three, program closes connection, and cleanup:
+close(g_fd_uart);
+close(g_fd_gadget);
+g_fd_uart = g_fd_gadget = NULL;
diff --git a/Documentation/ptp/.gitignore b/Documentation/ptp/.gitignore
new file mode 100644
index 000000000..f562e49d6
--- /dev/null
+++ b/Documentation/ptp/.gitignore
@@ -0,0 +1 @@
+testptp
diff --git a/Documentation/ptp/Makefile b/Documentation/ptp/Makefile
new file mode 100644
index 000000000..293d6c09a
--- /dev/null
+++ b/Documentation/ptp/Makefile
@@ -0,0 +1,8 @@
+# List of programs to build
+hostprogs-y := testptp
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_testptp.o += -I$(objtree)/usr/include
+HOSTLOADLIBES_testptp := -lrt
diff --git a/Documentation/ptp/ptp.txt b/Documentation/ptp/ptp.txt
new file mode 100644
index 000000000..ae8fef86b
--- /dev/null
+++ b/Documentation/ptp/ptp.txt
@@ -0,0 +1,89 @@
+
+* PTP hardware clock infrastructure for Linux
+
+ This patch set introduces support for IEEE 1588 PTP clocks in
+ Linux. Together with the SO_TIMESTAMPING socket options, this
+ presents a standardized method for developing PTP user space
+ programs, synchronizing Linux with external clocks, and using the
+ ancillary features of PTP hardware clocks.
+
+ A new class driver exports a kernel interface for specific clock
+ drivers and a user space interface. The infrastructure supports a
+ complete set of PTP hardware clock functionality.
+
+ + Basic clock operations
+ - Set time
+ - Get time
+ - Shift the clock by a given offset atomically
+ - Adjust clock frequency
+
+ + Ancillary clock features
+ - One short or periodic alarms, with signal delivery to user program
+ - Time stamp external events
+ - Period output signals configurable from user space
+ - Synchronization of the Linux system time via the PPS subsystem
+
+** PTP hardware clock kernel API
+
+ A PTP clock driver registers itself with the class driver. The
+ class driver handles all of the dealings with user space. The
+ author of a clock driver need only implement the details of
+ programming the clock hardware. The clock driver notifies the class
+ driver of asynchronous events (alarms and external time stamps) via
+ a simple message passing interface.
+
+ The class driver supports multiple PTP clock drivers. In normal use
+ cases, only one PTP clock is needed. However, for testing and
+ development, it can be useful to have more than one clock in a
+ single system, in order to allow performance comparisons.
+
+** PTP hardware clock user space API
+
+ The class driver also creates a character device for each
+ registered clock. User space can use an open file descriptor from
+ the character device as a POSIX clock id and may call
+ clock_gettime, clock_settime, and clock_adjtime. These calls
+ implement the basic clock operations.
+
+ User space programs may control the clock using standardized
+ ioctls. A program may query, enable, configure, and disable the
+ ancillary clock features. User space can receive time stamped
+ events via blocking read() and poll(). One shot and periodic
+ signals may be configured via the POSIX timer_settime() system
+ call.
+
+** Writing clock drivers
+
+ Clock drivers include include/linux/ptp_clock_kernel.h and register
+ themselves by presenting a 'struct ptp_clock_info' to the
+ registration method. Clock drivers must implement all of the
+ functions in the interface. If a clock does not offer a particular
+ ancillary feature, then the driver should just return -EOPNOTSUPP
+ from those functions.
+
+ Drivers must ensure that all of the methods in interface are
+ reentrant. Since most hardware implementations treat the time value
+ as a 64 bit integer accessed as two 32 bit registers, drivers
+ should use spin_lock_irqsave/spin_unlock_irqrestore to protect
+ against concurrent access. This locking cannot be accomplished in
+ class driver, since the lock may also be needed by the clock
+ driver's interrupt service routine.
+
+** Supported hardware
+
+ + Freescale eTSEC gianfar
+ - 2 Time stamp external triggers, programmable polarity (opt. interrupt)
+ - 2 Alarm registers (optional interrupt)
+ - 3 Periodic signals (optional interrupt)
+
+ + National DP83640
+ - 6 GPIOs programmable as inputs or outputs
+ - 6 GPIOs with dedicated functions (LED/JTAG/clock) can also be
+ used as general inputs or outputs
+ - GPIO inputs can time stamp external triggers
+ - GPIO outputs can produce periodic signals
+ - 1 interrupt pin
+
+ + Intel IXP465
+ - Auxiliary Slave/Master Mode Snapshot (optional interrupt)
+ - Target Time (optional interrupt)
diff --git a/Documentation/ptp/testptp.c b/Documentation/ptp/testptp.c
new file mode 100644
index 000000000..2bc8abc57
--- /dev/null
+++ b/Documentation/ptp/testptp.c
@@ -0,0 +1,519 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* clock_adjtime is not available in GLIBC < 2.14 */
+#if !__GLIBC_PREREQ(2, 14)
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+ return syscall(__NR_clock_adjtime, id, tx);
+}
+#endif
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+#define FD_TO_CLOCKID(fd) ((~(clockid_t) (fd) << 3) | CLOCKFD)
+
+ return FD_TO_CLOCKID(fd);
+}
+
+static void handle_alarm(int s)
+{
+ printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+ struct sigaction action;
+ sigset_t mask;
+
+ /* Unblock the signal. */
+ sigemptyset(&mask);
+ sigaddset(&mask, signum);
+ sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+ /* Install the signal handler. */
+ action.sa_handler = handler;
+ action.sa_flags = 0;
+ sigemptyset(&action.sa_mask);
+ sigaction(signum, &action, NULL);
+
+ return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+ /*
+ * The 'freq' field in the 'struct timex' is in parts per
+ * million, but with a 16 bit binary fractional field.
+ * Instead of calculating either one of
+ *
+ * scaled_ppm = (ppb / 1000) << 16 [1]
+ * scaled_ppm = (ppb << 16) / 1000 [2]
+ *
+ * we simply use double precision math, in order to avoid the
+ * truncation in [1] and the possible overflow in [2].
+ */
+ return (long) (ppb * 65.536);
+}
+
+static int64_t pctns(struct ptp_clock_time *t)
+{
+ return t->sec * 1000000000LL + t->nsec;
+}
+
+static void usage(char *progname)
+{
+ fprintf(stderr,
+ "usage: %s [options]\n"
+ " -a val request a one-shot alarm after 'val' seconds\n"
+ " -A val request a periodic alarm every 'val' seconds\n"
+ " -c query the ptp clock's capabilities\n"
+ " -d name device to open\n"
+ " -e val read 'val' external time stamp events\n"
+ " -f val adjust the ptp clock frequency by 'val' ppb\n"
+ " -g get the ptp clock time\n"
+ " -h prints this message\n"
+ " -i val index for event/trigger\n"
+ " -k val measure the time offset between system and phc clock\n"
+ " for 'val' times (Maximum 25)\n"
+ " -l list the current pin configuration\n"
+ " -L pin,val configure pin index 'pin' with function 'val'\n"
+ " the channel index is taken from the '-i' option\n"
+ " 'val' specifies the auxiliary function:\n"
+ " 0 - none\n"
+ " 1 - external time stamp\n"
+ " 2 - periodic output\n"
+ " -p val enable output with a period of 'val' nanoseconds\n"
+ " -P val enable or disable (val=1|0) the system clock PPS\n"
+ " -s set the ptp clock time from the system time\n"
+ " -S set the system time from the ptp clock time\n"
+ " -t val shift the ptp clock time by 'val' seconds\n"
+ " -T val set the ptp clock time to 'val' seconds\n",
+ progname);
+}
+
+int main(int argc, char *argv[])
+{
+ struct ptp_clock_caps caps;
+ struct ptp_extts_event event;
+ struct ptp_extts_request extts_request;
+ struct ptp_perout_request perout_request;
+ struct ptp_pin_desc desc;
+ struct timespec ts;
+ struct timex tx;
+
+ static timer_t timerid;
+ struct itimerspec timeout;
+ struct sigevent sigevent;
+
+ struct ptp_clock_time *pct;
+ struct ptp_sys_offset *sysoff;
+
+
+ char *progname;
+ int i, c, cnt, fd;
+
+ char *device = DEVICE;
+ clockid_t clkid;
+ int adjfreq = 0x7fffffff;
+ int adjtime = 0;
+ int capabilities = 0;
+ int extts = 0;
+ int gettime = 0;
+ int index = 0;
+ int list_pins = 0;
+ int oneshot = 0;
+ int pct_offset = 0;
+ int n_samples = 0;
+ int periodic = 0;
+ int perout = -1;
+ int pin_index = -1, pin_func;
+ int pps = -1;
+ int seconds = 0;
+ int settime = 0;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+ while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+ switch (c) {
+ case 'a':
+ oneshot = atoi(optarg);
+ break;
+ case 'A':
+ periodic = atoi(optarg);
+ break;
+ case 'c':
+ capabilities = 1;
+ break;
+ case 'd':
+ device = optarg;
+ break;
+ case 'e':
+ extts = atoi(optarg);
+ break;
+ case 'f':
+ adjfreq = atoi(optarg);
+ break;
+ case 'g':
+ gettime = 1;
+ break;
+ case 'i':
+ index = atoi(optarg);
+ break;
+ case 'k':
+ pct_offset = 1;
+ n_samples = atoi(optarg);
+ break;
+ case 'l':
+ list_pins = 1;
+ break;
+ case 'L':
+ cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func);
+ if (cnt != 2) {
+ usage(progname);
+ return -1;
+ }
+ break;
+ case 'p':
+ perout = atoi(optarg);
+ break;
+ case 'P':
+ pps = atoi(optarg);
+ break;
+ case 's':
+ settime = 1;
+ break;
+ case 'S':
+ settime = 2;
+ break;
+ case 't':
+ adjtime = atoi(optarg);
+ break;
+ case 'T':
+ settime = 3;
+ seconds = atoi(optarg);
+ break;
+ case 'h':
+ usage(progname);
+ return 0;
+ case '?':
+ default:
+ usage(progname);
+ return -1;
+ }
+ }
+
+ fd = open(device, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+ return -1;
+ }
+
+ clkid = get_clockid(fd);
+ if (CLOCK_INVALID == clkid) {
+ fprintf(stderr, "failed to read clock id\n");
+ return -1;
+ }
+
+ if (capabilities) {
+ if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+ perror("PTP_CLOCK_GETCAPS");
+ } else {
+ printf("capabilities:\n"
+ " %d maximum frequency adjustment (ppb)\n"
+ " %d programmable alarms\n"
+ " %d external time stamp channels\n"
+ " %d programmable periodic signals\n"
+ " %d pulse per second\n"
+ " %d programmable pins\n",
+ caps.max_adj,
+ caps.n_alarm,
+ caps.n_ext_ts,
+ caps.n_per_out,
+ caps.pps,
+ caps.n_pins);
+ }
+ }
+
+ if (0x7fffffff != adjfreq) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppb_to_scaled_ppm(adjfreq);
+ if (clock_adjtime(clkid, &tx)) {
+ perror("clock_adjtime");
+ } else {
+ puts("frequency adjustment okay");
+ }
+ }
+
+ if (adjtime) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_SETOFFSET;
+ tx.time.tv_sec = adjtime;
+ tx.time.tv_usec = 0;
+ if (clock_adjtime(clkid, &tx) < 0) {
+ perror("clock_adjtime");
+ } else {
+ puts("time shift okay");
+ }
+ }
+
+ if (gettime) {
+ if (clock_gettime(clkid, &ts)) {
+ perror("clock_gettime");
+ } else {
+ printf("clock time: %ld.%09ld or %s",
+ ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+ }
+ }
+
+ if (settime == 1) {
+ clock_gettime(CLOCK_REALTIME, &ts);
+ if (clock_settime(clkid, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (settime == 2) {
+ clock_gettime(clkid, &ts);
+ if (clock_settime(CLOCK_REALTIME, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (settime == 3) {
+ ts.tv_sec = seconds;
+ ts.tv_nsec = 0;
+ if (clock_settime(clkid, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (extts) {
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+ extts_request.flags = PTP_ENABLE_FEATURE;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ extts = 0;
+ } else {
+ puts("external time stamp request okay");
+ }
+ for (; extts; extts--) {
+ cnt = read(fd, &event, sizeof(event));
+ if (cnt != sizeof(event)) {
+ perror("read");
+ break;
+ }
+ printf("event index %u at %lld.%09u\n", event.index,
+ event.t.sec, event.t.nsec);
+ fflush(stdout);
+ }
+ /* Disable the feature again. */
+ extts_request.flags = 0;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ }
+ }
+
+ if (list_pins) {
+ int n_pins = 0;
+ if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+ perror("PTP_CLOCK_GETCAPS");
+ } else {
+ n_pins = caps.n_pins;
+ }
+ for (i = 0; i < n_pins; i++) {
+ desc.index = i;
+ if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) {
+ perror("PTP_PIN_GETFUNC");
+ break;
+ }
+ printf("name %s index %u func %u chan %u\n",
+ desc.name, desc.index, desc.func, desc.chan);
+ }
+ }
+
+ if (oneshot) {
+ install_handler(SIGALRM, handle_alarm);
+ /* Create a timer. */
+ sigevent.sigev_notify = SIGEV_SIGNAL;
+ sigevent.sigev_signo = SIGALRM;
+ if (timer_create(clkid, &sigevent, &timerid)) {
+ perror("timer_create");
+ return -1;
+ }
+ /* Start the timer. */
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.it_value.tv_sec = oneshot;
+ if (timer_settime(timerid, 0, &timeout, NULL)) {
+ perror("timer_settime");
+ return -1;
+ }
+ pause();
+ timer_delete(timerid);
+ }
+
+ if (periodic) {
+ install_handler(SIGALRM, handle_alarm);
+ /* Create a timer. */
+ sigevent.sigev_notify = SIGEV_SIGNAL;
+ sigevent.sigev_signo = SIGALRM;
+ if (timer_create(clkid, &sigevent, &timerid)) {
+ perror("timer_create");
+ return -1;
+ }
+ /* Start the timer. */
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.it_interval.tv_sec = periodic;
+ timeout.it_value.tv_sec = periodic;
+ if (timer_settime(timerid, 0, &timeout, NULL)) {
+ perror("timer_settime");
+ return -1;
+ }
+ while (1) {
+ pause();
+ }
+ timer_delete(timerid);
+ }
+
+ if (perout >= 0) {
+ if (clock_gettime(clkid, &ts)) {
+ perror("clock_gettime");
+ return -1;
+ }
+ memset(&perout_request, 0, sizeof(perout_request));
+ perout_request.index = index;
+ perout_request.start.sec = ts.tv_sec + 2;
+ perout_request.start.nsec = 0;
+ perout_request.period.sec = 0;
+ perout_request.period.nsec = perout;
+ if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+ perror("PTP_PEROUT_REQUEST");
+ } else {
+ puts("periodic output request okay");
+ }
+ }
+
+ if (pin_index >= 0) {
+ memset(&desc, 0, sizeof(desc));
+ desc.index = pin_index;
+ desc.func = pin_func;
+ desc.chan = index;
+ if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+ perror("PTP_PIN_SETFUNC");
+ } else {
+ puts("set pin function okay");
+ }
+ }
+
+ if (pps != -1) {
+ int enable = pps ? 1 : 0;
+ if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+ perror("PTP_ENABLE_PPS");
+ } else {
+ puts("pps for system time request okay");
+ }
+ }
+
+ if (pct_offset) {
+ if (n_samples <= 0 || n_samples > 25) {
+ puts("n_samples should be between 1 and 25");
+ usage(progname);
+ return -1;
+ }
+
+ sysoff = calloc(1, sizeof(*sysoff));
+ if (!sysoff) {
+ perror("calloc");
+ return -1;
+ }
+ sysoff->n_samples = n_samples;
+
+ if (ioctl(fd, PTP_SYS_OFFSET, sysoff))
+ perror("PTP_SYS_OFFSET");
+ else
+ puts("system and phc clock time offset request okay");
+
+ pct = &sysoff->ts[0];
+ for (i = 0; i < sysoff->n_samples; i++) {
+ t1 = pctns(pct+2*i);
+ tp = pctns(pct+2*i+1);
+ t2 = pctns(pct+2*i+2);
+ interval = t2 - t1;
+ offset = (t2 + t1) / 2 - tp;
+
+ printf("system time: %lld.%u\n",
+ (pct+2*i)->sec, (pct+2*i)->nsec);
+ printf("phc time: %lld.%u\n",
+ (pct+2*i+1)->sec, (pct+2*i+1)->nsec);
+ printf("system time: %lld.%u\n",
+ (pct+2*i+2)->sec, (pct+2*i+2)->nsec);
+ printf("system/phc clock time offset is %" PRId64 " ns\n"
+ "system clock time delay is %" PRId64 " ns\n",
+ offset, interval);
+ }
+
+ free(sysoff);
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/Documentation/ptp/testptp.mk b/Documentation/ptp/testptp.mk
new file mode 100644
index 000000000..4ef2d9755
--- /dev/null
+++ b/Documentation/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC = $(CROSS_COMPILE)gcc
+INC = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS = -Wall $(INC)
+LDLIBS = -lrt
+PROGS = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+ rm -f testptp.o
+
+distclean: clean
+ rm -f $(PROGS)
diff --git a/Documentation/pwm.txt b/Documentation/pwm.txt
new file mode 100644
index 000000000..ca895fd21
--- /dev/null
+++ b/Documentation/pwm.txt
@@ -0,0 +1,122 @@
+Pulse Width Modulation (PWM) interface
+
+This provides an overview about the Linux PWM interface
+
+PWMs are commonly used for controlling LEDs, fans or vibrators in
+cell phones. PWMs with a fixed purpose have no need implementing
+the Linux PWM API (although they could). However, PWMs are often
+found as discrete devices on SoCs which have no fixed purpose. It's
+up to the board designer to connect them to LEDs or fans. To provide
+this kind of flexibility the generic PWM API exists.
+
+Identifying PWMs
+----------------
+
+Users of the legacy PWM API use unique IDs to refer to PWM devices.
+
+Instead of referring to a PWM device via its unique ID, board setup code
+should instead register a static mapping that can be used to match PWM
+consumers to providers, as given in the following example:
+
+ static struct pwm_lookup board_pwm_lookup[] = {
+ PWM_LOOKUP("tegra-pwm", 0, "pwm-backlight", NULL,
+ 50000, PWM_POLARITY_NORMAL),
+ };
+
+ static void __init board_init(void)
+ {
+ ...
+ pwm_add_table(board_pwm_lookup, ARRAY_SIZE(board_pwm_lookup));
+ ...
+ }
+
+Using PWMs
+----------
+
+Legacy users can request a PWM device using pwm_request() and free it
+after usage with pwm_free().
+
+New users should use the pwm_get() function and pass to it the consumer
+device or a consumer name. pwm_put() is used to free the PWM device. Managed
+variants of these functions, devm_pwm_get() and devm_pwm_put(), also exist.
+
+After being requested, a PWM has to be configured using:
+
+int pwm_config(struct pwm_device *pwm, int duty_ns, int period_ns);
+
+To start/stop toggling the PWM output use pwm_enable()/pwm_disable().
+
+Using PWMs with the sysfs interface
+-----------------------------------
+
+If CONFIG_SYSFS is enabled in your kernel configuration a simple sysfs
+interface is provided to use the PWMs from userspace. It is exposed at
+/sys/class/pwm/. Each probed PWM controller/chip will be exported as
+pwmchipN, where N is the base of the PWM chip. Inside the directory you
+will find:
+
+npwm - The number of PWM channels this chip supports (read-only).
+
+export - Exports a PWM channel for use with sysfs (write-only).
+
+unexport - Unexports a PWM channel from sysfs (write-only).
+
+The PWM channels are numbered using a per-chip index from 0 to npwm-1.
+
+When a PWM channel is exported a pwmX directory will be created in the
+pwmchipN directory it is associated with, where X is the number of the
+channel that was exported. The following properties will then be available:
+
+period - The total period of the PWM signal (read/write).
+ Value is in nanoseconds and is the sum of the active and inactive
+ time of the PWM.
+
+duty_cycle - The active time of the PWM signal (read/write).
+ Value is in nanoseconds and must be less than the period.
+
+polarity - Changes the polarity of the PWM signal (read/write).
+ Writes to this property only work if the PWM chip supports changing
+ the polarity. The polarity can only be changed if the PWM is not
+ enabled. Value is the string "normal" or "inversed".
+
+enable - Enable/disable the PWM signal (read/write).
+ 0 - disabled
+ 1 - enabled
+
+Implementing a PWM driver
+-------------------------
+
+Currently there are two ways to implement pwm drivers. Traditionally
+there only has been the barebone API meaning that each driver has
+to implement the pwm_*() functions itself. This means that it's impossible
+to have multiple PWM drivers in the system. For this reason it's mandatory
+for new drivers to use the generic PWM framework.
+
+A new PWM controller/chip can be added using pwmchip_add() and removed
+again with pwmchip_remove(). pwmchip_add() takes a filled in struct
+pwm_chip as argument which provides a description of the PWM chip, the
+number of PWM devices provided by the chip and the chip-specific
+implementation of the supported PWM operations to the framework.
+
+When implementing polarity support in a PWM driver, make sure to respect the
+signal conventions in the PWM framework. By definition, normal polarity
+characterizes a signal starts high for the duration of the duty cycle and
+goes low for the remainder of the period. Conversely, a signal with inversed
+polarity starts low for the duration of the duty cycle and goes high for the
+remainder of the period.
+
+Locking
+-------
+
+The PWM core list manipulations are protected by a mutex, so pwm_request()
+and pwm_free() may not be called from an atomic context. Currently the
+PWM core does not enforce any locking to pwm_enable(), pwm_disable() and
+pwm_config(), so the calling context is currently driver specific. This
+is an issue derived from the former barebone API and should be fixed soon.
+
+Helpers
+-------
+
+Currently a PWM can only be configured with period_ns and duty_ns. For several
+use cases freq_hz and duty_percent might be better. Instead of calculating
+this in your driver please consider adding appropriate helpers to the framework.
diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt
new file mode 100644
index 000000000..5d8675615
--- /dev/null
+++ b/Documentation/ramoops.txt
@@ -0,0 +1,128 @@
+Ramoops oops/panic logger
+=========================
+
+Sergiu Iordache <sergiu@chromium.org>
+
+Updated: 17 November 2011
+
+0. Introduction
+
+Ramoops is an oops/panic logger that writes its logs to RAM before the system
+crashes. It works by logging oopses and panics in a circular buffer. Ramoops
+needs a system with persistent RAM so that the content of that area can
+survive after a restart.
+
+1. Ramoops concepts
+
+Ramoops uses a predefined memory area to store the dump. The start and size
+and type of the memory area are set using three variables:
+ * "mem_address" for the start
+ * "mem_size" for the size. The memory size will be rounded down to a
+ power of two.
+ * "mem_type" to specifiy if the memory type (default is pgprot_writecombine).
+
+Typically the default value of mem_type=0 should be used as that sets the pstore
+mapping to pgprot_writecombine. Setting mem_type=1 attempts to use
+pgprot_noncached, which only works on some platforms. This is because pstore
+depends on atomic operations. At least on ARM, pgprot_noncached causes the
+memory to be mapped strongly ordered, and atomic operations on strongly ordered
+memory are implementation defined, and won't work on many ARMs such as omaps.
+
+The memory area is divided into "record_size" chunks (also rounded down to
+power of two) and each oops/panic writes a "record_size" chunk of
+information.
+
+Dumping both oopses and panics can be done by setting 1 in the "dump_oops"
+variable while setting 0 in that variable dumps only the panics.
+
+The module uses a counter to record multiple dumps but the counter gets reset
+on restart (i.e. new dumps after the restart will overwrite old ones).
+
+Ramoops also supports software ECC protection of persistent memory regions.
+This might be useful when a hardware reset was used to bring the machine back
+to life (i.e. a watchdog triggered). In such cases, RAM may be somewhat
+corrupt, but usually it is restorable.
+
+2. Setting the parameters
+
+Setting the ramoops parameters can be done in 2 different manners:
+ 1. Use the module parameters (which have the names of the variables described
+ as before).
+ For quick debugging, you can also reserve parts of memory during boot
+ and then use the reserved memory for ramoops. For example, assuming a machine
+ with > 128 MB of memory, the following kernel command line will tell the
+ kernel to use only the first 128 MB of memory, and place ECC-protected ramoops
+ region at 128 MB boundary:
+ "mem=128M ramoops.mem_address=0x8000000 ramoops.ecc=1"
+ 2. Use a platform device and set the platform data. The parameters can then
+ be set through that platform data. An example of doing that is:
+
+#include <linux/pstore_ram.h>
+[...]
+
+static struct ramoops_platform_data ramoops_data = {
+ .mem_size = <...>,
+ .mem_address = <...>,
+ .mem_type = <...>,
+ .record_size = <...>,
+ .dump_oops = <...>,
+ .ecc = <...>,
+};
+
+static struct platform_device ramoops_dev = {
+ .name = "ramoops",
+ .dev = {
+ .platform_data = &ramoops_data,
+ },
+};
+
+[... inside a function ...]
+int ret;
+
+ret = platform_device_register(&ramoops_dev);
+if (ret) {
+ printk(KERN_ERR "unable to register platform device\n");
+ return ret;
+}
+
+You can specify either RAM memory or peripheral devices' memory. However, when
+specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
+very early in the architecture code, e.g.:
+
+#include <linux/memblock.h>
+
+memblock_reserve(ramoops_data.mem_address, ramoops_data.mem_size);
+
+3. Dump format
+
+The data dump begins with a header, currently defined as "====" followed by a
+timestamp and a new line. The dump then continues with the actual data.
+
+4. Reading the data
+
+The dump data can be read from the pstore filesystem. The format for these
+files is "dmesg-ramoops-N", where N is the record number in memory. To delete
+a stored record from RAM, simply unlink the respective pstore file.
+
+5. Persistent function tracing
+
+Persistent function tracing might be useful for debugging software or hardware
+related hangs. The functions call chain log is stored in a "ftrace-ramoops"
+file. Here is an example of usage:
+
+ # mount -t debugfs debugfs /sys/kernel/debug/
+ # echo 1 > /sys/kernel/debug/pstore/record_ftrace
+ # reboot -f
+ [...]
+ # mount -t pstore pstore /mnt/
+ # tail /mnt/ftrace-ramoops
+ 0 ffffffff8101ea64 ffffffff8101bcda native_apic_mem_read <- disconnect_bsp_APIC+0x6a/0xc0
+ 0 ffffffff8101ea44 ffffffff8101bcf6 native_apic_mem_write <- disconnect_bsp_APIC+0x86/0xc0
+ 0 ffffffff81020084 ffffffff8101a4b5 hpet_disable <- native_machine_shutdown+0x75/0x90
+ 0 ffffffff81005f94 ffffffff8101a4bb iommu_shutdown_noop <- native_machine_shutdown+0x7b/0x90
+ 0 ffffffff8101a6a1 ffffffff8101a437 native_machine_emergency_restart <- native_machine_restart+0x37/0x40
+ 0 ffffffff811f9876 ffffffff8101a73a acpi_reboot <- native_machine_emergency_restart+0xaa/0x1e0
+ 0 ffffffff8101a514 ffffffff8101a772 mach_reboot_fixups <- native_machine_emergency_restart+0xe2/0x1e0
+ 0 ffffffff811d9c54 ffffffff8101a7a0 __const_udelay <- native_machine_emergency_restart+0x110/0x1e0
+ 0 ffffffff811d9c34 ffffffff811d9c80 __delay <- __const_udelay+0x30/0x40
+ 0 ffffffff811d9d14 ffffffff811d9c3f delay_tsc <- __delay+0xf/0x20
diff --git a/Documentation/rapidio/rapidio.txt b/Documentation/rapidio/rapidio.txt
new file mode 100644
index 000000000..28fbd877f
--- /dev/null
+++ b/Documentation/rapidio/rapidio.txt
@@ -0,0 +1,351 @@
+ The Linux RapidIO Subsystem
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The RapidIO standard is a packet-based fabric interconnect standard designed for
+use in embedded systems. Development of the RapidIO standard is directed by the
+RapidIO Trade Association (RTA). The current version of the RapidIO specification
+is publicly available for download from the RTA web-site [1].
+
+This document describes the basics of the Linux RapidIO subsystem and provides
+information on its major components.
+
+1 Overview
+----------
+
+Because the RapidIO subsystem follows the Linux device model it is integrated
+into the kernel similarly to other buses by defining RapidIO-specific device and
+bus types and registering them within the device model.
+
+The Linux RapidIO subsystem is architecture independent and therefore defines
+architecture-specific interfaces that provide support for common RapidIO
+subsystem operations.
+
+2. Core Components
+------------------
+
+A typical RapidIO network is a combination of endpoints and switches.
+Each of these components is represented in the subsystem by an associated data
+structure. The core logical components of the RapidIO subsystem are defined
+in include/linux/rio.h file.
+
+2.1 Master Port
+
+A master port (or mport) is a RapidIO interface controller that is local to the
+processor executing the Linux code. A master port generates and receives RapidIO
+packets (transactions). In the RapidIO subsystem each master port is represented
+by a rio_mport data structure. This structure contains master port specific
+resources such as mailboxes and doorbells. The rio_mport also includes a unique
+host device ID that is valid when a master port is configured as an enumerating
+host.
+
+RapidIO master ports are serviced by subsystem specific mport device drivers
+that provide functionality defined for this subsystem. To provide a hardware
+independent interface for RapidIO subsystem operations, rio_mport structure
+includes rio_ops data structure which contains pointers to hardware specific
+implementations of RapidIO functions.
+
+2.2 Device
+
+A RapidIO device is any endpoint (other than mport) or switch in the network.
+All devices are presented in the RapidIO subsystem by corresponding rio_dev data
+structure. Devices form one global device list and per-network device lists
+(depending on number of available mports and networks).
+
+2.3 Switch
+
+A RapidIO switch is a special class of device that routes packets between its
+ports towards their final destination. The packet destination port within a
+switch is defined by an internal routing table. A switch is presented in the
+RapidIO subsystem by rio_dev data structure expanded by additional rio_switch
+data structure, which contains switch specific information such as copy of the
+routing table and pointers to switch specific functions.
+
+The RapidIO subsystem defines the format and initialization method for subsystem
+specific switch drivers that are designed to provide hardware-specific
+implementation of common switch management routines.
+
+2.4 Network
+
+A RapidIO network is a combination of interconnected endpoint and switch devices.
+Each RapidIO network known to the system is represented by corresponding rio_net
+data structure. This structure includes lists of all devices and local master
+ports that form the same network. It also contains a pointer to the default
+master port that is used to communicate with devices within the network.
+
+2.5 Device Drivers
+
+RapidIO device-specific drivers follow Linux Kernel Driver Model and are
+intended to support specific RapidIO devices attached to the RapidIO network.
+
+2.6 Subsystem Interfaces
+
+RapidIO interconnect specification defines features that may be used to provide
+one or more common service layers for all participating RapidIO devices. These
+common services may act separately from device-specific drivers or be used by
+device-specific drivers. Example of such service provider is the RIONET driver
+which implements Ethernet-over-RapidIO interface. Because only one driver can be
+registered for a device, all common RapidIO services have to be registered as
+subsystem interfaces. This allows to have multiple common services attached to
+the same device without blocking attachment of a device-specific driver.
+
+3. Subsystem Initialization
+---------------------------
+
+In order to initialize the RapidIO subsystem, a platform must initialize and
+register at least one master port within the RapidIO network. To register mport
+within the subsystem controller driver's initialization code calls function
+rio_register_mport() for each available master port.
+
+After all active master ports are registered with a RapidIO subsystem,
+an enumeration and/or discovery routine may be called automatically or
+by user-space command.
+
+RapidIO subsystem can be configured to be built as a statically linked or
+modular component of the kernel (see details below).
+
+4. Enumeration and Discovery
+----------------------------
+
+4.1 Overview
+------------
+
+RapidIO subsystem configuration options allow users to build enumeration and
+discovery methods as statically linked components or loadable modules.
+An enumeration/discovery method implementation and available input parameters
+define how any given method can be attached to available RapidIO mports:
+simply to all available mports OR individually to the specified mport device.
+
+Depending on selected enumeration/discovery build configuration, there are
+several methods to initiate an enumeration and/or discovery process:
+
+ (a) Statically linked enumeration and discovery process can be started
+ automatically during kernel initialization time using corresponding module
+ parameters. This was the original method used since introduction of RapidIO
+ subsystem. Now this method relies on enumerator module parameter which is
+ 'rio-scan.scan' for existing basic enumeration/discovery method.
+ When automatic start of enumeration/discovery is used a user has to ensure
+ that all discovering endpoints are started before the enumerating endpoint
+ and are waiting for enumeration to be completed.
+ Configuration option CONFIG_RAPIDIO_DISC_TIMEOUT defines time that discovering
+ endpoint waits for enumeration to be completed. If the specified timeout
+ expires the discovery process is terminated without obtaining RapidIO network
+ information. NOTE: a timed out discovery process may be restarted later using
+ a user-space command as it is described below (if the given endpoint was
+ enumerated successfully).
+
+ (b) Statically linked enumeration and discovery process can be started by
+ a command from user space. This initiation method provides more flexibility
+ for a system startup compared to the option (a) above. After all participating
+ endpoints have been successfully booted, an enumeration process shall be
+ started first by issuing a user-space command, after an enumeration is
+ completed a discovery process can be started on all remaining endpoints.
+
+ (c) Modular enumeration and discovery process can be started by a command from
+ user space. After an enumeration/discovery module is loaded, a network scan
+ process can be started by issuing a user-space command.
+ Similar to the option (b) above, an enumerator has to be started first.
+
+ (d) Modular enumeration and discovery process can be started by a module
+ initialization routine. In this case an enumerating module shall be loaded
+ first.
+
+When a network scan process is started it calls an enumeration or discovery
+routine depending on the configured role of a master port: host or agent.
+
+Enumeration is performed by a master port if it is configured as a host port by
+assigning a host destination ID greater than or equal to zero. The host
+destination ID can be assigned to a master port using various methods depending
+on RapidIO subsystem build configuration:
+
+ (a) For a statically linked RapidIO subsystem core use command line parameter
+ "rapidio.hdid=" with a list of destination ID assignments in order of mport
+ device registration. For example, in a system with two RapidIO controllers
+ the command line parameter "rapidio.hdid=-1,7" will result in assignment of
+ the host destination ID=7 to the second RapidIO controller, while the first
+ one will be assigned destination ID=-1.
+
+ (b) If the RapidIO subsystem core is built as a loadable module, in addition
+ to the method shown above, the host destination ID(s) can be specified using
+ traditional methods of passing module parameter "hdid=" during its loading:
+ - from command line: "modprobe rapidio hdid=-1,7", or
+ - from modprobe configuration file using configuration command "options",
+ like in this example: "options rapidio hdid=-1,7". An example of modprobe
+ configuration file is provided in the section below.
+
+ NOTES:
+ (i) if "hdid=" parameter is omitted all available mport will be assigned
+ destination ID = -1;
+ (ii) the "hdid=" parameter in systems with multiple mports can have
+ destination ID assignments omitted from the end of list (default = -1).
+
+If the host device ID for a specific master port is set to -1, the discovery
+process will be performed for it.
+
+The enumeration and discovery routines use RapidIO maintenance transactions
+to access the configuration space of devices.
+
+NOTE: If RapidIO switch-specific device drivers are built as loadable modules
+they must be loaded before enumeration/discovery process starts.
+This requirement is cased by the fact that enumeration/discovery methods invoke
+vendor-specific callbacks on early stages.
+
+4.2 Automatic Start of Enumeration and Discovery
+------------------------------------------------
+
+Automatic enumeration/discovery start method is applicable only to built-in
+enumeration/discovery RapidIO configuration selection. To enable automatic
+enumeration/discovery start by existing basic enumerator method set use boot
+command line parameter "rio-scan.scan=1".
+
+This configuration requires synchronized start of all RapidIO endpoints that
+form a network which will be enumerated/discovered. Discovering endpoints have
+to be started before an enumeration starts to ensure that all RapidIO
+controllers have been initialized and are ready to be discovered. Configuration
+parameter CONFIG_RAPIDIO_DISC_TIMEOUT defines time (in seconds) which
+a discovering endpoint will wait for enumeration to be completed.
+
+When automatic enumeration/discovery start is selected, basic method's
+initialization routine calls rio_init_mports() to perform enumeration or
+discovery for all known mport devices.
+
+Depending on RapidIO network size and configuration this automatic
+enumeration/discovery start method may be difficult to use due to the
+requirement for synchronized start of all endpoints.
+
+4.3 User-space Start of Enumeration and Discovery
+-------------------------------------------------
+
+User-space start of enumeration and discovery can be used with built-in and
+modular build configurations. For user-space controlled start RapidIO subsystem
+creates the sysfs write-only attribute file '/sys/bus/rapidio/scan'. To initiate
+an enumeration or discovery process on specific mport device, a user needs to
+write mport_ID (not RapidIO destination ID) into that file. The mport_ID is a
+sequential number (0 ... RIO_MAX_MPORTS) assigned during mport device
+registration. For example for machine with single RapidIO controller, mport_ID
+for that controller always will be 0.
+
+To initiate RapidIO enumeration/discovery on all available mports a user may
+write '-1' (or RIO_MPORT_ANY) into the scan attribute file.
+
+4.4 Basic Enumeration Method
+----------------------------
+
+This is an original enumeration/discovery method which is available since
+first release of RapidIO subsystem code. The enumeration process is
+implemented according to the enumeration algorithm outlined in the RapidIO
+Interconnect Specification: Annex I [1].
+
+This method can be configured as statically linked or loadable module.
+The method's single parameter "scan" allows to trigger the enumeration/discovery
+process from module initialization routine.
+
+This enumeration/discovery method can be started only once and does not support
+unloading if it is built as a module.
+
+The enumeration process traverses the network using a recursive depth-first
+algorithm. When a new device is found, the enumerator takes ownership of that
+device by writing into the Host Device ID Lock CSR. It does this to ensure that
+the enumerator has exclusive right to enumerate the device. If device ownership
+is successfully acquired, the enumerator allocates a new rio_dev structure and
+initializes it according to device capabilities.
+
+If the device is an endpoint, a unique device ID is assigned to it and its value
+is written into the device's Base Device ID CSR.
+
+If the device is a switch, the enumerator allocates an additional rio_switch
+structure to store switch specific information. Then the switch's vendor ID and
+device ID are queried against a table of known RapidIO switches. Each switch
+table entry contains a pointer to a switch-specific initialization routine that
+initializes pointers to the rest of switch specific operations, and performs
+hardware initialization if necessary. A RapidIO switch does not have a unique
+device ID; it relies on hopcount and routing for device ID of an attached
+endpoint if access to its configuration registers is required. If a switch (or
+chain of switches) does not have any endpoint (except enumerator) attached to
+it, a fake device ID will be assigned to configure a route to that switch.
+In the case of a chain of switches without endpoint, one fake device ID is used
+to configure a route through the entire chain and switches are differentiated by
+their hopcount value.
+
+For both endpoints and switches the enumerator writes a unique component tag
+into device's Component Tag CSR. That unique value is used by the error
+management notification mechanism to identify a device that is reporting an
+error management event.
+
+Enumeration beyond a switch is completed by iterating over each active egress
+port of that switch. For each active link, a route to a default device ID
+(0xFF for 8-bit systems and 0xFFFF for 16-bit systems) is temporarily written
+into the routing table. The algorithm recurs by calling itself with hopcount + 1
+and the default device ID in order to access the device on the active port.
+
+After the host has completed enumeration of the entire network it releases
+devices by clearing device ID locks (calls rio_clear_locks()). For each endpoint
+in the system, it sets the Discovered bit in the Port General Control CSR
+to indicate that enumeration is completed and agents are allowed to execute
+passive discovery of the network.
+
+The discovery process is performed by agents and is similar to the enumeration
+process that is described above. However, the discovery process is performed
+without changes to the existing routing because agents only gather information
+about RapidIO network structure and are building an internal map of discovered
+devices. This way each Linux-based component of the RapidIO subsystem has
+a complete view of the network. The discovery process can be performed
+simultaneously by several agents. After initializing its RapidIO master port
+each agent waits for enumeration completion by the host for the configured wait
+time period. If this wait time period expires before enumeration is completed,
+an agent skips RapidIO discovery and continues with remaining kernel
+initialization.
+
+4.5 Adding New Enumeration/Discovery Method
+-------------------------------------------
+
+RapidIO subsystem code organization allows addition of new enumeration/discovery
+methods as new configuration options without significant impact to the core
+RapidIO code.
+
+A new enumeration/discovery method has to be attached to one or more mport
+devices before an enumeration/discovery process can be started. Normally,
+method's module initialization routine calls rio_register_scan() to attach
+an enumerator to a specified mport device (or devices). The basic enumerator
+implementation demonstrates this process.
+
+4.6 Using Loadable RapidIO Switch Drivers
+-----------------------------------------
+
+In the case when RapidIO switch drivers are built as loadable modules a user
+must ensure that they are loaded before the enumeration/discovery starts.
+This process can be automated by specifying pre- or post- dependencies in the
+RapidIO-specific modprobe configuration file as shown in the example below.
+
+ File /etc/modprobe.d/rapidio.conf:
+ ----------------------------------
+
+ # Configure RapidIO subsystem modules
+
+ # Set enumerator host destination ID (overrides kernel command line option)
+ options rapidio hdid=-1,2
+
+ # Load RapidIO switch drivers immediately after rapidio core module was loaded
+ softdep rapidio post: idt_gen2 idtcps tsi57x
+
+ # OR :
+
+ # Load RapidIO switch drivers just before rio-scan enumerator module is loaded
+ softdep rio-scan pre: idt_gen2 idtcps tsi57x
+
+ --------------------------
+
+NOTE: In the example above, one of "softdep" commands must be removed or
+commented out to keep required module loading sequence.
+
+A. References
+-------------
+
+[1] RapidIO Trade Association. RapidIO Interconnect Specifications.
+ http://www.rapidio.org.
+[2] Rapidio TA. Technology Comparisons.
+ http://www.rapidio.org/education/technology_comparisons/
+[3] RapidIO support for Linux.
+ http://lwn.net/Articles/139118/
+[4] Matt Porter. RapidIO for Linux. Ottawa Linux Symposium, 2005
+ http://www.kernel.org/doc/ols/2005/ols2005v2-pages-43-56.pdf
diff --git a/Documentation/rapidio/sysfs.txt b/Documentation/rapidio/sysfs.txt
new file mode 100644
index 000000000..47ce9a533
--- /dev/null
+++ b/Documentation/rapidio/sysfs.txt
@@ -0,0 +1,158 @@
+ RapidIO sysfs Files
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+1. RapidIO Device Subdirectories
+--------------------------------
+
+For each RapidIO device, the RapidIO subsystem creates files in an individual
+subdirectory with the following name, /sys/bus/rapidio/devices/<device_name>.
+
+The format of device_name is "nn:d:iiii", where:
+
+nn - two-digit hexadecimal ID of RapidIO network where the device resides
+d - device typr: 'e' - for endpoint or 's' - for switch
+iiii - four-digit device destID for endpoints, or switchID for switches
+
+For example, below is a list of device directories that represents a typical
+RapidIO network with one switch, one host, and two agent endpoints, as it is
+seen by the enumerating host (destID = 1):
+
+/sys/bus/rapidio/devices/00:e:0000
+/sys/bus/rapidio/devices/00:e:0002
+/sys/bus/rapidio/devices/00:s:0001
+
+NOTE: An enumerating or discovering endpoint does not create a sysfs entry for
+itself, this is why an endpoint with destID=1 is not shown in the list.
+
+2. Attributes Common for All RapidIO Devices
+--------------------------------------------
+
+Each device subdirectory contains the following informational read-only files:
+
+ did - returns the device identifier
+ vid - returns the device vendor identifier
+device_rev - returns the device revision level
+ asm_did - returns identifier for the assembly containing the device
+ asm_rev - returns revision level of the assembly containing the device
+ asm_vid - returns vendor identifier of the assembly containing the device
+ destid - returns device destination ID assigned by the enumeration routine
+ (see 4.1 for switch specific details)
+ lprev - returns name of previous device (switch) on the path to the device
+ that that owns this attribute
+ modalias - returns the device modalias
+
+In addition to the files listed above, each device has a binary attribute file
+that allows read/write access to the device configuration registers using
+the RapidIO maintenance transactions:
+
+ config - reads from and writes to the device configuration registers.
+
+This attribute is similar in behavior to the "config" attribute of PCI devices
+and provides an access to the RapidIO device registers using standard file read
+and write operations.
+
+3. RapidIO Endpoint Device Attributes
+-------------------------------------
+
+Currently Linux RapidIO subsystem does not create any endpoint specific sysfs
+attributes. It is possible that RapidIO master port drivers and endpoint device
+drivers will add their device-specific sysfs attributes but such attributes are
+outside the scope of this document.
+
+4. RapidIO Switch Device Attributes
+-----------------------------------
+
+RapidIO switches have additional attributes in sysfs. RapidIO subsystem supports
+common and device-specific sysfs attributes for switches. Because switches are
+integrated into the RapidIO subsystem, it offers a method to create
+device-specific sysfs attributes by specifying a callback function that may be
+set by the switch initialization routine during enumeration or discovery process.
+
+4.1 Common Switch Attributes
+
+ routes - reports switch routing information in "destID port" format. This
+ attribute reports only valid routing table entries, one line for
+ each entry.
+ destid - device destination ID that defines a route to the switch
+ hopcount - number of hops on the path to the switch
+ lnext - returns names of devices linked to the switch except one of a device
+ linked to the ingress port (reported as "lprev"). This is an array
+ names with number of lines equal to number of ports in switch. If
+ a switch port has no attached device, returns "null" instead of
+ a device name.
+
+4.2 Device-specific Switch Attributes
+
+Device-specific switch attributes are listed for each RapidIO switch driver
+that exports additional attributes.
+
+IDT_GEN2:
+ errlog - reads contents of device error log until it is empty.
+
+
+5. RapidIO Bus Attributes
+-------------------------
+
+RapidIO bus subdirectory /sys/bus/rapidio implements the following bus-specific
+attribute:
+
+ scan - allows to trigger enumeration discovery process from user space. This
+ is a write-only attribute. To initiate an enumeration or discovery
+ process on specific mport device, a user needs to write mport_ID (not
+ RapidIO destination ID) into this file. The mport_ID is a sequential
+ number (0 ... RIO_MAX_MPORTS) assigned to the mport device.
+ For example, for a machine with a single RapidIO controller, mport_ID
+ for that controller always will be 0.
+ To initiate RapidIO enumeration/discovery on all available mports
+ a user must write '-1' (or RIO_MPORT_ANY) into this attribute file.
+
+
+6. RapidIO Bus Controllers/Ports
+--------------------------------
+
+On-chip RapidIO controllers and PCIe-to-RapidIO bridges (referenced as
+"Master Port" or "mport") are presented in sysfs as the special class of
+devices: "rapidio_port".
+
+The /sys/class/rapidio_port subdirectory contains individual subdirectories
+named as "rapidioN" where N = mport ID registered with RapidIO subsystem.
+
+NOTE: An mport ID is not a RapidIO destination ID assigned to a given local
+mport device.
+
+Each mport device subdirectory in addition to standard entries contains the
+following device-specific attributes:
+
+ port_destid - reports RapidIO destination ID assigned to the given RapidIO
+ mport device. If value 0xFFFFFFFF is returned this means that
+ no valid destination ID have been assigned to the mport (yet).
+ Normally, before enumeration/discovery have been executed only
+ fabric enumerating mports have a valid destination ID assigned
+ to them using "hdid=..." rapidio module parameter.
+ sys_size - reports RapidIO common transport system size:
+ 0 = small (8-bit destination ID, max. 256 devices),
+ 1 = large (16-bit destination ID, max. 65536 devices).
+
+After enumeration or discovery was performed for a given mport device,
+the corresponding subdirectory will also contain subdirectories for each
+child RapidIO device connected to the mport. Naming conventions for RapidIO
+devices are described in Section 1 above.
+
+The example below shows mport device subdirectory with several child RapidIO
+devices attached to it.
+
+[rio@rapidio ~]$ ls /sys/class/rapidio_port/rapidio0/ -l
+total 0
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0001
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0004
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:e:0007
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0002
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0003
+drwxr-xr-x 3 root root 0 Feb 11 15:10 00:s:0005
+lrwxrwxrwx 1 root root 0 Feb 11 15:11 device -> ../../../0000:01:00.0
+-r--r--r-- 1 root root 4096 Feb 11 15:11 port_destid
+drwxr-xr-x 2 root root 0 Feb 11 15:11 power
+lrwxrwxrwx 1 root root 0 Feb 11 15:04 subsystem -> ../../../../../../class/rapidio_port
+-r--r--r-- 1 root root 4096 Feb 11 15:11 sys_size
+-rw-r--r-- 1 root root 4096 Feb 11 15:04 uevent
diff --git a/Documentation/rapidio/tsi721.txt b/Documentation/rapidio/tsi721.txt
new file mode 100644
index 000000000..626052f40
--- /dev/null
+++ b/Documentation/rapidio/tsi721.txt
@@ -0,0 +1,62 @@
+RapidIO subsystem mport driver for IDT Tsi721 PCI Express-to-SRIO bridge.
+=========================================================================
+
+I. Overview
+
+This driver implements all currently defined RapidIO mport callback functions.
+It supports maintenance read and write operations, inbound and outbound RapidIO
+doorbells, inbound maintenance port-writes and RapidIO messaging.
+
+To generate SRIO maintenance transactions this driver uses one of Tsi721 DMA
+channels. This mechanism provides access to larger range of hop counts and
+destination IDs without need for changes in outbound window translation.
+
+RapidIO messaging support uses dedicated messaging channels for each mailbox.
+For inbound messages this driver uses destination ID matching to forward messages
+into the corresponding message queue. Messaging callbacks are implemented to be
+fully compatible with RIONET driver (Ethernet over RapidIO messaging services).
+
+II. Known problems
+
+ None.
+
+III. DMA Engine Support
+
+Tsi721 mport driver supports DMA data transfers between local system memory and
+remote RapidIO devices. This functionality is implemented according to SLAVE
+mode API defined by common Linux kernel DMA Engine framework.
+
+Depending on system requirements RapidIO DMA operations can be included/excluded
+by setting CONFIG_RAPIDIO_DMA_ENGINE option. Tsi721 miniport driver uses seven
+out of eight available BDMA channels to support DMA data transfers.
+One BDMA channel is reserved for generation of maintenance read/write requests.
+
+If Tsi721 mport driver have been built with RAPIDIO_DMA_ENGINE support included,
+this driver will accept DMA-specific module parameter:
+ "dma_desc_per_channel" - defines number of hardware buffer descriptors used by
+ each BDMA channel of Tsi721 (by default - 128).
+
+IV. Version History
+
+ 1.1.0 - DMA operations re-worked to support data scatter/gather lists larger
+ than hardware buffer descriptors ring.
+ 1.0.0 - Initial driver release.
+
+V. License
+-----------------------------------------------
+
+ Copyright(c) 2011 Integrated Device Technology, Inc. All rights reserved.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ You should have received a copy of the GNU General Public License along with
+ this program; if not, write to the Free Software Foundation, Inc.,
+ 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
new file mode 100644
index 000000000..39873ef41
--- /dev/null
+++ b/Documentation/rbtree.txt
@@ -0,0 +1,392 @@
+Red-black Trees (rbtree) in Linux
+January 18, 2007
+Rob Landley <rob@landley.net>
+=============================
+
+What are red-black trees, and what are they for?
+------------------------------------------------
+
+Red-black trees are a type of self-balancing binary search tree, used for
+storing sortable key/value data pairs. This differs from radix trees (which
+are used to efficiently store sparse arrays and thus use long integer indexes
+to insert/access/delete nodes) and hash tables (which are not kept sorted to
+be easily traversed in order, and must be tuned for a specific size and
+hash function where rbtrees scale gracefully storing arbitrary keys).
+
+Red-black trees are similar to AVL trees, but provide faster real-time bounded
+worst case performance for insertion and deletion (at most two rotations and
+three rotations, respectively, to balance the tree), with slightly slower
+(but still O(log n)) lookup time.
+
+To quote Linux Weekly News:
+
+ There are a number of red-black trees in use in the kernel.
+ The deadline and CFQ I/O schedulers employ rbtrees to
+ track requests; the packet CD/DVD driver does the same.
+ The high-resolution timer code uses an rbtree to organize outstanding
+ timer requests. The ext3 filesystem tracks directory entries in a
+ red-black tree. Virtual memory areas (VMAs) are tracked with red-black
+ trees, as are epoll file descriptors, cryptographic keys, and network
+ packets in the "hierarchical token bucket" scheduler.
+
+This document covers use of the Linux rbtree implementation. For more
+information on the nature and implementation of Red Black Trees, see:
+
+ Linux Weekly News article on red-black trees
+ http://lwn.net/Articles/184495/
+
+ Wikipedia entry on red-black trees
+ http://en.wikipedia.org/wiki/Red-black_tree
+
+Linux implementation of red-black trees
+---------------------------------------
+
+Linux's rbtree implementation lives in the file "lib/rbtree.c". To use it,
+"#include <linux/rbtree.h>".
+
+The Linux rbtree implementation is optimized for speed, and thus has one
+less layer of indirection (and better cache locality) than more traditional
+tree implementations. Instead of using pointers to separate rb_node and data
+structures, each instance of struct rb_node is embedded in the data structure
+it organizes. And instead of using a comparison callback function pointer,
+users are expected to write their own tree search and insert functions
+which call the provided rbtree functions. Locking is also left up to the
+user of the rbtree code.
+
+Creating a new rbtree
+---------------------
+
+Data nodes in an rbtree tree are structures containing a struct rb_node member:
+
+ struct mytype {
+ struct rb_node node;
+ char *keystring;
+ };
+
+When dealing with a pointer to the embedded struct rb_node, the containing data
+structure may be accessed with the standard container_of() macro. In addition,
+individual members may be accessed directly via rb_entry(node, type, member).
+
+At the root of each rbtree is an rb_root structure, which is initialized to be
+empty via:
+
+ struct rb_root mytree = RB_ROOT;
+
+Searching for a value in an rbtree
+----------------------------------
+
+Writing a search function for your tree is fairly straightforward: start at the
+root, compare each value, and follow the left or right branch as necessary.
+
+Example:
+
+ struct mytype *my_search(struct rb_root *root, char *string)
+ {
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct mytype *data = container_of(node, struct mytype, node);
+ int result;
+
+ result = strcmp(string, data->keystring);
+
+ if (result < 0)
+ node = node->rb_left;
+ else if (result > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+ }
+
+Inserting data into an rbtree
+-----------------------------
+
+Inserting data in the tree involves first searching for the place to insert the
+new node, then inserting the node and rebalancing ("recoloring") the tree.
+
+The search for insertion differs from the previous search by finding the
+location of the pointer on which to graft the new node. The new node also
+needs a link to its parent node for rebalancing purposes.
+
+Example:
+
+ int my_insert(struct rb_root *root, struct mytype *data)
+ {
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct mytype *this = container_of(*new, struct mytype, node);
+ int result = strcmp(data->keystring, this->keystring);
+
+ parent = *new;
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else
+ return FALSE;
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+
+ return TRUE;
+ }
+
+Removing or replacing existing data in an rbtree
+------------------------------------------------
+
+To remove an existing node from a tree, call:
+
+ void rb_erase(struct rb_node *victim, struct rb_root *tree);
+
+Example:
+
+ struct mytype *data = mysearch(&mytree, "walrus");
+
+ if (data) {
+ rb_erase(&data->node, &mytree);
+ myfree(data);
+ }
+
+To replace an existing node in a tree with a new one with the same key, call:
+
+ void rb_replace_node(struct rb_node *old, struct rb_node *new,
+ struct rb_root *tree);
+
+Replacing a node this way does not re-sort the tree: If the new node doesn't
+have the same key as the old node, the rbtree will probably become corrupted.
+
+Iterating through the elements stored in an rbtree (in sort order)
+------------------------------------------------------------------
+
+Four functions are provided for iterating through an rbtree's contents in
+sorted order. These work on arbitrary trees, and should not need to be
+modified or wrapped (except for locking purposes):
+
+ struct rb_node *rb_first(struct rb_root *tree);
+ struct rb_node *rb_last(struct rb_root *tree);
+ struct rb_node *rb_next(struct rb_node *node);
+ struct rb_node *rb_prev(struct rb_node *node);
+
+To start iterating, call rb_first() or rb_last() with a pointer to the root
+of the tree, which will return a pointer to the node structure contained in
+the first or last element in the tree. To continue, fetch the next or previous
+node by calling rb_next() or rb_prev() on the current node. This will return
+NULL when there are no more nodes left.
+
+The iterator functions return a pointer to the embedded struct rb_node, from
+which the containing data structure may be accessed with the container_of()
+macro, and individual members may be accessed directly via
+rb_entry(node, type, member).
+
+Example:
+
+ struct rb_node *node;
+ for (node = rb_first(&mytree); node; node = rb_next(node))
+ printk("key=%s\n", rb_entry(node, struct mytype, node)->keystring);
+
+Support for Augmented rbtrees
+-----------------------------
+
+Augmented rbtree is an rbtree with "some" additional data stored in
+each node, where the additional data for node N must be a function of
+the contents of all nodes in the subtree rooted at N. This data can
+be used to augment some new functionality to rbtree. Augmented rbtree
+is an optional feature built on top of basic rbtree infrastructure.
+An rbtree user who wants this feature will have to call the augmentation
+functions with the user provided augmentation callback when inserting
+and erasing nodes.
+
+C files implementing augmented rbtree manipulation must include
+<linux/rbtree_augmented.h> instead of <linus/rbtree.h>. Note that
+linux/rbtree_augmented.h exposes some rbtree implementations details
+you are not expected to rely on; please stick to the documented APIs
+there and do not include <linux/rbtree_augmented.h> from header files
+either so as to minimize chances of your users accidentally relying on
+such implementation details.
+
+On insertion, the user must update the augmented information on the path
+leading to the inserted node, then call rb_link_node() as usual and
+rb_augment_inserted() instead of the usual rb_insert_color() call.
+If rb_augment_inserted() rebalances the rbtree, it will callback into
+a user provided function to update the augmented information on the
+affected subtrees.
+
+When erasing a node, the user must call rb_erase_augmented() instead of
+rb_erase(). rb_erase_augmented() calls back into user provided functions
+to updated the augmented information on affected subtrees.
+
+In both cases, the callbacks are provided through struct rb_augment_callbacks.
+3 callbacks must be defined:
+
+- A propagation callback, which updates the augmented value for a given
+ node and its ancestors, up to a given stop point (or NULL to update
+ all the way to the root).
+
+- A copy callback, which copies the augmented value for a given subtree
+ to a newly assigned subtree root.
+
+- A tree rotation callback, which copies the augmented value for a given
+ subtree to a newly assigned subtree root AND recomputes the augmented
+ information for the former subtree root.
+
+The compiled code for rb_erase_augmented() may inline the propagation and
+copy callbacks, which results in a large function, so each augmented rbtree
+user should have a single rb_erase_augmented() call site in order to limit
+compiled code size.
+
+
+Sample usage:
+
+Interval tree is an example of augmented rb tree. Reference -
+"Introduction to Algorithms" by Cormen, Leiserson, Rivest and Stein.
+More details about interval trees:
+
+Classical rbtree has a single key and it cannot be directly used to store
+interval ranges like [lo:hi] and do a quick lookup for any overlap with a new
+lo:hi or to find whether there is an exact match for a new lo:hi.
+
+However, rbtree can be augmented to store such interval ranges in a structured
+way making it possible to do efficient lookup and exact match.
+
+This "extra information" stored in each node is the maximum hi
+(max_hi) value among all the nodes that are its descendants. This
+information can be maintained at each node just be looking at the node
+and its immediate children. And this will be used in O(log n) lookup
+for lowest match (lowest start address among all possible matches)
+with something like:
+
+struct interval_tree_node *
+interval_tree_first_match(struct rb_root *root,
+ unsigned long start, unsigned long last)
+{
+ struct interval_tree_node *node;
+
+ if (!root->rb_node)
+ return NULL;
+ node = rb_entry(root->rb_node, struct interval_tree_node, rb);
+
+ while (true) {
+ if (node->rb.rb_left) {
+ struct interval_tree_node *left =
+ rb_entry(node->rb.rb_left,
+ struct interval_tree_node, rb);
+ if (left->__subtree_last >= start) {
+ /*
+ * Some nodes in left subtree satisfy Cond2.
+ * Iterate to find the leftmost such node N.
+ * If it also satisfies Cond1, that's the match
+ * we are looking for. Otherwise, there is no
+ * matching interval as nodes to the right of N
+ * can't satisfy Cond1 either.
+ */
+ node = left;
+ continue;
+ }
+ }
+ if (node->start <= last) { /* Cond1 */
+ if (node->last >= start) /* Cond2 */
+ return node; /* node is leftmost match */
+ if (node->rb.rb_right) {
+ node = rb_entry(node->rb.rb_right,
+ struct interval_tree_node, rb);
+ if (node->__subtree_last >= start)
+ continue;
+ }
+ }
+ return NULL; /* No match */
+ }
+}
+
+Insertion/removal are defined using the following augmented callbacks:
+
+static inline unsigned long
+compute_subtree_last(struct interval_tree_node *node)
+{
+ unsigned long max = node->last, subtree_last;
+ if (node->rb.rb_left) {
+ subtree_last = rb_entry(node->rb.rb_left,
+ struct interval_tree_node, rb)->__subtree_last;
+ if (max < subtree_last)
+ max = subtree_last;
+ }
+ if (node->rb.rb_right) {
+ subtree_last = rb_entry(node->rb.rb_right,
+ struct interval_tree_node, rb)->__subtree_last;
+ if (max < subtree_last)
+ max = subtree_last;
+ }
+ return max;
+}
+
+static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
+{
+ while (rb != stop) {
+ struct interval_tree_node *node =
+ rb_entry(rb, struct interval_tree_node, rb);
+ unsigned long subtree_last = compute_subtree_last(node);
+ if (node->__subtree_last == subtree_last)
+ break;
+ node->__subtree_last = subtree_last;
+ rb = rb_parent(&node->rb);
+ }
+}
+
+static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+ struct interval_tree_node *old =
+ rb_entry(rb_old, struct interval_tree_node, rb);
+ struct interval_tree_node *new =
+ rb_entry(rb_new, struct interval_tree_node, rb);
+
+ new->__subtree_last = old->__subtree_last;
+}
+
+static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+ struct interval_tree_node *old =
+ rb_entry(rb_old, struct interval_tree_node, rb);
+ struct interval_tree_node *new =
+ rb_entry(rb_new, struct interval_tree_node, rb);
+
+ new->__subtree_last = old->__subtree_last;
+ old->__subtree_last = compute_subtree_last(old);
+}
+
+static const struct rb_augment_callbacks augment_callbacks = {
+ augment_propagate, augment_copy, augment_rotate
+};
+
+void interval_tree_insert(struct interval_tree_node *node,
+ struct rb_root *root)
+{
+ struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+ unsigned long start = node->start, last = node->last;
+ struct interval_tree_node *parent;
+
+ while (*link) {
+ rb_parent = *link;
+ parent = rb_entry(rb_parent, struct interval_tree_node, rb);
+ if (parent->__subtree_last < last)
+ parent->__subtree_last = last;
+ if (start < parent->start)
+ link = &parent->rb.rb_left;
+ else
+ link = &parent->rb.rb_right;
+ }
+
+ node->__subtree_last = last;
+ rb_link_node(&node->rb, rb_parent, link);
+ rb_insert_augmented(&node->rb, root, &augment_callbacks);
+}
+
+void interval_tree_remove(struct interval_tree_node *node,
+ struct rb_root *root)
+{
+ rb_erase_augmented(&node->rb, root, &augment_callbacks);
+}
diff --git a/Documentation/remoteproc.txt b/Documentation/remoteproc.txt
new file mode 100644
index 000000000..e6469fdcf
--- /dev/null
+++ b/Documentation/remoteproc.txt
@@ -0,0 +1,299 @@
+Remote Processor Framework
+
+1. Introduction
+
+Modern SoCs typically have heterogeneous remote processor devices in asymmetric
+multiprocessing (AMP) configurations, which may be running different instances
+of operating system, whether it's Linux or any other flavor of real-time OS.
+
+OMAP4, for example, has dual Cortex-A9, dual Cortex-M3 and a C64x+ DSP.
+In a typical configuration, the dual cortex-A9 is running Linux in a SMP
+configuration, and each of the other three cores (two M3 cores and a DSP)
+is running its own instance of RTOS in an AMP configuration.
+
+The remoteproc framework allows different platforms/architectures to
+control (power on, load firmware, power off) those remote processors while
+abstracting the hardware differences, so the entire driver doesn't need to be
+duplicated. In addition, this framework also adds rpmsg virtio devices
+for remote processors that supports this kind of communication. This way,
+platform-specific remoteproc drivers only need to provide a few low-level
+handlers, and then all rpmsg drivers will then just work
+(for more information about the virtio-based rpmsg bus and its drivers,
+please read Documentation/rpmsg.txt).
+Registration of other types of virtio devices is now also possible. Firmwares
+just need to publish what kind of virtio devices do they support, and then
+remoteproc will add those devices. This makes it possible to reuse the
+existing virtio drivers with remote processor backends at a minimal development
+cost.
+
+2. User API
+
+ int rproc_boot(struct rproc *rproc)
+ - Boot a remote processor (i.e. load its firmware, power it on, ...).
+ If the remote processor is already powered on, this function immediately
+ returns (successfully).
+ Returns 0 on success, and an appropriate error value otherwise.
+ Note: to use this function you should already have a valid rproc
+ handle. There are several ways to achieve that cleanly (devres, pdata,
+ the way remoteproc_rpmsg.c does this, or, if this becomes prevalent, we
+ might also consider using dev_archdata for this).
+
+ void rproc_shutdown(struct rproc *rproc)
+ - Power off a remote processor (previously booted with rproc_boot()).
+ In case @rproc is still being used by an additional user(s), then
+ this function will just decrement the power refcount and exit,
+ without really powering off the device.
+ Every call to rproc_boot() must (eventually) be accompanied by a call
+ to rproc_shutdown(). Calling rproc_shutdown() redundantly is a bug.
+ Notes:
+ - we're not decrementing the rproc's refcount, only the power refcount.
+ which means that the @rproc handle stays valid even after
+ rproc_shutdown() returns, and users can still use it with a subsequent
+ rproc_boot(), if needed.
+
+3. Typical usage
+
+#include <linux/remoteproc.h>
+
+/* in case we were given a valid 'rproc' handle */
+int dummy_rproc_example(struct rproc *my_rproc)
+{
+ int ret;
+
+ /* let's power on and boot our remote processor */
+ ret = rproc_boot(my_rproc);
+ if (ret) {
+ /*
+ * something went wrong. handle it and leave.
+ */
+ }
+
+ /*
+ * our remote processor is now powered on... give it some work
+ */
+
+ /* let's shut it down now */
+ rproc_shutdown(my_rproc);
+}
+
+4. API for implementors
+
+ struct rproc *rproc_alloc(struct device *dev, const char *name,
+ const struct rproc_ops *ops,
+ const char *firmware, int len)
+ - Allocate a new remote processor handle, but don't register
+ it yet. Required parameters are the underlying device, the
+ name of this remote processor, platform-specific ops handlers,
+ the name of the firmware to boot this rproc with, and the
+ length of private data needed by the allocating rproc driver (in bytes).
+
+ This function should be used by rproc implementations during
+ initialization of the remote processor.
+ After creating an rproc handle using this function, and when ready,
+ implementations should then call rproc_add() to complete
+ the registration of the remote processor.
+ On success, the new rproc is returned, and on failure, NULL.
+
+ Note: _never_ directly deallocate @rproc, even if it was not registered
+ yet. Instead, when you need to unroll rproc_alloc(), use rproc_put().
+
+ void rproc_put(struct rproc *rproc)
+ - Free an rproc handle that was allocated by rproc_alloc.
+ This function essentially unrolls rproc_alloc(), by decrementing the
+ rproc's refcount. It doesn't directly free rproc; that would happen
+ only if there are no other references to rproc and its refcount now
+ dropped to zero.
+
+ int rproc_add(struct rproc *rproc)
+ - Register @rproc with the remoteproc framework, after it has been
+ allocated with rproc_alloc().
+ This is called by the platform-specific rproc implementation, whenever
+ a new remote processor device is probed.
+ Returns 0 on success and an appropriate error code otherwise.
+ Note: this function initiates an asynchronous firmware loading
+ context, which will look for virtio devices supported by the rproc's
+ firmware.
+ If found, those virtio devices will be created and added, so as a result
+ of registering this remote processor, additional virtio drivers might get
+ probed.
+
+ int rproc_del(struct rproc *rproc)
+ - Unroll rproc_add().
+ This function should be called when the platform specific rproc
+ implementation decides to remove the rproc device. it should
+ _only_ be called if a previous invocation of rproc_add()
+ has completed successfully.
+
+ After rproc_del() returns, @rproc is still valid, and its
+ last refcount should be decremented by calling rproc_put().
+
+ Returns 0 on success and -EINVAL if @rproc isn't valid.
+
+ void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type)
+ - Report a crash in a remoteproc
+ This function must be called every time a crash is detected by the
+ platform specific rproc implementation. This should not be called from a
+ non-remoteproc driver. This function can be called from atomic/interrupt
+ context.
+
+5. Implementation callbacks
+
+These callbacks should be provided by platform-specific remoteproc
+drivers:
+
+/**
+ * struct rproc_ops - platform-specific device handlers
+ * @start: power on the device and boot it
+ * @stop: power off the device
+ * @kick: kick a virtqueue (virtqueue id given as a parameter)
+ */
+struct rproc_ops {
+ int (*start)(struct rproc *rproc);
+ int (*stop)(struct rproc *rproc);
+ void (*kick)(struct rproc *rproc, int vqid);
+};
+
+Every remoteproc implementation should at least provide the ->start and ->stop
+handlers. If rpmsg/virtio functionality is also desired, then the ->kick handler
+should be provided as well.
+
+The ->start() handler takes an rproc handle and should then power on the
+device and boot it (use rproc->priv to access platform-specific private data).
+The boot address, in case needed, can be found in rproc->bootaddr (remoteproc
+core puts there the ELF entry point).
+On success, 0 should be returned, and on failure, an appropriate error code.
+
+The ->stop() handler takes an rproc handle and powers the device down.
+On success, 0 is returned, and on failure, an appropriate error code.
+
+The ->kick() handler takes an rproc handle, and an index of a virtqueue
+where new message was placed in. Implementations should interrupt the remote
+processor and let it know it has pending messages. Notifying remote processors
+the exact virtqueue index to look in is optional: it is easy (and not
+too expensive) to go through the existing virtqueues and look for new buffers
+in the used rings.
+
+6. Binary Firmware Structure
+
+At this point remoteproc only supports ELF32 firmware binaries. However,
+it is quite expected that other platforms/devices which we'd want to
+support with this framework will be based on different binary formats.
+
+When those use cases show up, we will have to decouple the binary format
+from the framework core, so we can support several binary formats without
+duplicating common code.
+
+When the firmware is parsed, its various segments are loaded to memory
+according to the specified device address (might be a physical address
+if the remote processor is accessing memory directly).
+
+In addition to the standard ELF segments, most remote processors would
+also include a special section which we call "the resource table".
+
+The resource table contains system resources that the remote processor
+requires before it should be powered on, such as allocation of physically
+contiguous memory, or iommu mapping of certain on-chip peripherals.
+Remotecore will only power up the device after all the resource table's
+requirement are met.
+
+In addition to system resources, the resource table may also contain
+resource entries that publish the existence of supported features
+or configurations by the remote processor, such as trace buffers and
+supported virtio devices (and their configurations).
+
+The resource table begins with this header:
+
+/**
+ * struct resource_table - firmware resource table header
+ * @ver: version number
+ * @num: number of resource entries
+ * @reserved: reserved (must be zero)
+ * @offset: array of offsets pointing at the various resource entries
+ *
+ * The header of the resource table, as expressed by this structure,
+ * contains a version number (should we need to change this format in the
+ * future), the number of available resource entries, and their offsets
+ * in the table.
+ */
+struct resource_table {
+ u32 ver;
+ u32 num;
+ u32 reserved[2];
+ u32 offset[0];
+} __packed;
+
+Immediately following this header are the resource entries themselves,
+each of which begins with the following resource entry header:
+
+/**
+ * struct fw_rsc_hdr - firmware resource entry header
+ * @type: resource type
+ * @data: resource data
+ *
+ * Every resource entry begins with a 'struct fw_rsc_hdr' header providing
+ * its @type. The content of the entry itself will immediately follow
+ * this header, and it should be parsed according to the resource type.
+ */
+struct fw_rsc_hdr {
+ u32 type;
+ u8 data[0];
+} __packed;
+
+Some resources entries are mere announcements, where the host is informed
+of specific remoteproc configuration. Other entries require the host to
+do something (e.g. allocate a system resource). Sometimes a negotiation
+is expected, where the firmware requests a resource, and once allocated,
+the host should provide back its details (e.g. address of an allocated
+memory region).
+
+Here are the various resource types that are currently supported:
+
+/**
+ * enum fw_resource_type - types of resource entries
+ *
+ * @RSC_CARVEOUT: request for allocation of a physically contiguous
+ * memory region.
+ * @RSC_DEVMEM: request to iommu_map a memory-based peripheral.
+ * @RSC_TRACE: announces the availability of a trace buffer into which
+ * the remote processor will be writing logs.
+ * @RSC_VDEV: declare support for a virtio device, and serve as its
+ * virtio header.
+ * @RSC_LAST: just keep this one at the end
+ *
+ * Please note that these values are used as indices to the rproc_handle_rsc
+ * lookup table, so please keep them sane. Moreover, @RSC_LAST is used to
+ * check the validity of an index before the lookup table is accessed, so
+ * please update it as needed.
+ */
+enum fw_resource_type {
+ RSC_CARVEOUT = 0,
+ RSC_DEVMEM = 1,
+ RSC_TRACE = 2,
+ RSC_VDEV = 3,
+ RSC_LAST = 4,
+};
+
+For more details regarding a specific resource type, please see its
+dedicated structure in include/linux/remoteproc.h.
+
+We also expect that platform-specific resource entries will show up
+at some point. When that happens, we could easily add a new RSC_PLATFORM
+type, and hand those resources to the platform-specific rproc driver to handle.
+
+7. Virtio and remoteproc
+
+The firmware should provide remoteproc information about virtio devices
+that it supports, and their configurations: a RSC_VDEV resource entry
+should specify the virtio device id (as in virtio_ids.h), virtio features,
+virtio config space, vrings information, etc.
+
+When a new remote processor is registered, the remoteproc framework
+will look for its resource table and will register the virtio devices
+it supports. A firmware may support any number of virtio devices, and
+of any type (a single remote processor can also easily support several
+rpmsg virtio devices this way, if desired).
+
+Of course, RSC_VDEV resource entries are only good enough for static
+allocation of virtio devices. Dynamic allocations will also be made possible
+using the rpmsg bus (similar to how we already do dynamic allocations of
+rpmsg channels; read more about it in rpmsg.txt).
diff --git a/Documentation/rfkill.txt b/Documentation/rfkill.txt
new file mode 100644
index 000000000..2ee6ef9a6
--- /dev/null
+++ b/Documentation/rfkill.txt
@@ -0,0 +1,123 @@
+rfkill - RF kill switch support
+===============================
+
+1. Introduction
+2. Implementation details
+3. Kernel API
+4. Userspace support
+
+
+1. Introduction
+
+The rfkill subsystem provides a generic interface to disabling any radio
+transmitter in the system. When a transmitter is blocked, it shall not
+radiate any power.
+
+The subsystem also provides the ability to react on button presses and
+disable all transmitters of a certain type (or all). This is intended for
+situations where transmitters need to be turned off, for example on
+aircraft.
+
+The rfkill subsystem has a concept of "hard" and "soft" block, which
+differ little in their meaning (block == transmitters off) but rather in
+whether they can be changed or not:
+ - hard block: read-only radio block that cannot be overridden by software
+ - soft block: writable radio block (need not be readable) that is set by
+ the system software.
+
+The rfkill subsystem has two parameters, rfkill.default_state and
+rfkill.master_switch_mode, which are documented in kernel-parameters.txt.
+
+
+2. Implementation details
+
+The rfkill subsystem is composed of three main components:
+ * the rfkill core,
+ * the deprecated rfkill-input module (an input layer handler, being
+ replaced by userspace policy code) and
+ * the rfkill drivers.
+
+The rfkill core provides API for kernel drivers to register their radio
+transmitter with the kernel, methods for turning it on and off and, letting
+the system know about hardware-disabled states that may be implemented on
+the device.
+
+The rfkill core code also notifies userspace of state changes, and provides
+ways for userspace to query the current states. See the "Userspace support"
+section below.
+
+When the device is hard-blocked (either by a call to rfkill_set_hw_state()
+or from query_hw_block) set_block() will be invoked for additional software
+block, but drivers can ignore the method call since they can use the return
+value of the function rfkill_set_hw_state() to sync the software state
+instead of keeping track of calls to set_block(). In fact, drivers should
+use the return value of rfkill_set_hw_state() unless the hardware actually
+keeps track of soft and hard block separately.
+
+
+3. Kernel API
+
+
+Drivers for radio transmitters normally implement an rfkill driver.
+
+Platform drivers might implement input devices if the rfkill button is just
+that, a button. If that button influences the hardware then you need to
+implement an rfkill driver instead. This also applies if the platform provides
+a way to turn on/off the transmitter(s).
+
+For some platforms, it is possible that the hardware state changes during
+suspend/hibernation, in which case it will be necessary to update the rfkill
+core with the current state is at resume time.
+
+To create an rfkill driver, driver's Kconfig needs to have
+
+ depends on RFKILL || !RFKILL
+
+to ensure the driver cannot be built-in when rfkill is modular. The !RFKILL
+case allows the driver to be built when rfkill is not configured, which
+case all rfkill API can still be used but will be provided by static inlines
+which compile to almost nothing.
+
+Calling rfkill_set_hw_state() when a state change happens is required from
+rfkill drivers that control devices that can be hard-blocked unless they also
+assign the poll_hw_block() callback (then the rfkill core will poll the
+device). Don't do this unless you cannot get the event in any other way.
+
+
+
+5. Userspace support
+
+The recommended userspace interface to use is /dev/rfkill, which is a misc
+character device that allows userspace to obtain and set the state of rfkill
+devices and sets of devices. It also notifies userspace about device addition
+and removal. The API is a simple read/write API that is defined in
+linux/rfkill.h, with one ioctl that allows turning off the deprecated input
+handler in the kernel for the transition period.
+
+Except for the one ioctl, communication with the kernel is done via read()
+and write() of instances of 'struct rfkill_event'. In this structure, the
+soft and hard block are properly separated (unlike sysfs, see below) and
+userspace is able to get a consistent snapshot of all rfkill devices in the
+system. Also, it is possible to switch all rfkill drivers (or all drivers of
+a specified type) into a state which also updates the default state for
+hotplugged devices.
+
+After an application opens /dev/rfkill, it can read the current state of all
+devices. Changes can be either obtained by either polling the descriptor for
+hotplug or state change events or by listening for uevents emitted by the
+rfkill core framework.
+
+Additionally, each rfkill device is registered in sysfs and emits uevents.
+
+rfkill devices issue uevents (with an action of "change"), with the following
+environment variables set:
+
+RFKILL_NAME
+RFKILL_STATE
+RFKILL_TYPE
+
+The contents of these variables corresponds to the "name", "state" and
+"type" sysfs files explained above.
+
+
+For further details consult Documentation/ABI/stable/sysfs-class-rfkill.
diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
new file mode 100644
index 000000000..16eb314f5
--- /dev/null
+++ b/Documentation/robust-futex-ABI.txt
@@ -0,0 +1,182 @@
+Started by Paul Jackson <pj@sgi.com>
+
+The robust futex ABI
+--------------------
+
+Robust_futexes provide a mechanism that is used in addition to normal
+futexes, for kernel assist of cleanup of held locks on task exit.
+
+The interesting data as to what futexes a thread is holding is kept on a
+linked list in user space, where it can be updated efficiently as locks
+are taken and dropped, without kernel intervention. The only additional
+kernel intervention required for robust_futexes above and beyond what is
+required for futexes is:
+
+ 1) a one time call, per thread, to tell the kernel where its list of
+ held robust_futexes begins, and
+ 2) internal kernel code at exit, to handle any listed locks held
+ by the exiting thread.
+
+The existing normal futexes already provide a "Fast Userspace Locking"
+mechanism, which handles uncontested locking without needing a system
+call, and handles contested locking by maintaining a list of waiting
+threads in the kernel. Options on the sys_futex(2) system call support
+waiting on a particular futex, and waking up the next waiter on a
+particular futex.
+
+For robust_futexes to work, the user code (typically in a library such
+as glibc linked with the application) has to manage and place the
+necessary list elements exactly as the kernel expects them. If it fails
+to do so, then improperly listed locks will not be cleaned up on exit,
+probably causing deadlock or other such failure of the other threads
+waiting on the same locks.
+
+A thread that anticipates possibly using robust_futexes should first
+issue the system call:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head, size_t len);
+
+The pointer 'head' points to a structure in the threads address space
+consisting of three words. Each word is 32 bits on 32 bit arch's, or 64
+bits on 64 bit arch's, and local byte order. Each thread should have
+its own thread private 'head'.
+
+If a thread is running in 32 bit compatibility mode on a 64 native arch
+kernel, then it can actually have two such structures - one using 32 bit
+words for 32 bit compatibility mode, and one using 64 bit words for 64
+bit native mode. The kernel, if it is a 64 bit kernel supporting 32 bit
+compatibility mode, will attempt to process both lists on each task
+exit, if the corresponding sys_set_robust_list() call has been made to
+setup that list.
+
+ The first word in the memory structure at 'head' contains a
+ pointer to a single linked list of 'lock entries', one per lock,
+ as described below. If the list is empty, the pointer will point
+ to itself, 'head'. The last 'lock entry' points back to the 'head'.
+
+ The second word, called 'offset', specifies the offset from the
+ address of the associated 'lock entry', plus or minus, of what will
+ be called the 'lock word', from that 'lock entry'. The 'lock word'
+ is always a 32 bit word, unlike the other words above. The 'lock
+ word' holds 3 flag bits in the upper 3 bits, and the thread id (TID)
+ of the thread holding the lock in the bottom 29 bits. See further
+ below for a description of the flag bits.
+
+ The third word, called 'list_op_pending', contains transient copy of
+ the address of the 'lock entry', during list insertion and removal,
+ and is needed to correctly resolve races should a thread exit while
+ in the middle of a locking or unlocking operation.
+
+Each 'lock entry' on the single linked list starting at 'head' consists
+of just a single word, pointing to the next 'lock entry', or back to
+'head' if there are no more entries. In addition, nearby to each 'lock
+entry', at an offset from the 'lock entry' specified by the 'offset'
+word, is one 'lock word'.
+
+The 'lock word' is always 32 bits, and is intended to be the same 32 bit
+lock variable used by the futex mechanism, in conjunction with
+robust_futexes. The kernel will only be able to wakeup the next thread
+waiting for a lock on a threads exit if that next thread used the futex
+mechanism to register the address of that 'lock word' with the kernel.
+
+For each futex lock currently held by a thread, if it wants this
+robust_futex support for exit cleanup of that lock, it should have one
+'lock entry' on this list, with its associated 'lock word' at the
+specified 'offset'. Should a thread die while holding any such locks,
+the kernel will walk this list, mark any such locks with a bit
+indicating their holder died, and wakeup the next thread waiting for
+that lock using the futex mechanism.
+
+When a thread has invoked the above system call to indicate it
+anticipates using robust_futexes, the kernel stores the passed in 'head'
+pointer for that task. The task may retrieve that value later on by
+using the system call:
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+ size_t __user *len_ptr);
+
+It is anticipated that threads will use robust_futexes embedded in
+larger, user level locking structures, one per lock. The kernel
+robust_futex mechanism doesn't care what else is in that structure, so
+long as the 'offset' to the 'lock word' is the same for all
+robust_futexes used by that thread. The thread should link those locks
+it currently holds using the 'lock entry' pointers. It may also have
+other links between the locks, such as the reverse side of a double
+linked list, but that doesn't matter to the kernel.
+
+By keeping its locks linked this way, on a list starting with a 'head'
+pointer known to the kernel, the kernel can provide to a thread the
+essential service available for robust_futexes, which is to help clean
+up locks held at the time of (a perhaps unexpectedly) exit.
+
+Actual locking and unlocking, during normal operations, is handled
+entirely by user level code in the contending threads, and by the
+existing futex mechanism to wait for, and wakeup, locks. The kernels
+only essential involvement in robust_futexes is to remember where the
+list 'head' is, and to walk the list on thread exit, handling locks
+still held by the departing thread, as described below.
+
+There may exist thousands of futex lock structures in a threads shared
+memory, on various data structures, at a given point in time. Only those
+lock structures for locks currently held by that thread should be on
+that thread's robust_futex linked lock list a given time.
+
+A given futex lock structure in a user shared memory region may be held
+at different times by any of the threads with access to that region. The
+thread currently holding such a lock, if any, is marked with the threads
+TID in the lower 29 bits of the 'lock word'.
+
+When adding or removing a lock from its list of held locks, in order for
+the kernel to correctly handle lock cleanup regardless of when the task
+exits (perhaps it gets an unexpected signal 9 in the middle of
+manipulating this list), the user code must observe the following
+protocol on 'lock entry' insertion and removal:
+
+On insertion:
+ 1) set the 'list_op_pending' word to the address of the 'lock entry'
+ to be inserted,
+ 2) acquire the futex lock,
+ 3) add the lock entry, with its thread id (TID) in the bottom 29 bits
+ of the 'lock word', to the linked list starting at 'head', and
+ 4) clear the 'list_op_pending' word.
+
+On removal:
+ 1) set the 'list_op_pending' word to the address of the 'lock entry'
+ to be removed,
+ 2) remove the lock entry for this lock from the 'head' list,
+ 3) release the futex lock, and
+ 4) clear the 'lock_op_pending' word.
+
+On exit, the kernel will consider the address stored in
+'list_op_pending' and the address of each 'lock word' found by walking
+the list starting at 'head'. For each such address, if the bottom 29
+bits of the 'lock word' at offset 'offset' from that address equals the
+exiting threads TID, then the kernel will do two things:
+
+ 1) if bit 31 (0x80000000) is set in that word, then attempt a futex
+ wakeup on that address, which will waken the next thread that has
+ used to the futex mechanism to wait on that address, and
+ 2) atomically set bit 30 (0x40000000) in the 'lock word'.
+
+In the above, bit 31 was set by futex waiters on that lock to indicate
+they were waiting, and bit 30 is set by the kernel to indicate that the
+lock owner died holding the lock.
+
+The kernel exit code will silently stop scanning the list further if at
+any point:
+
+ 1) the 'head' pointer or an subsequent linked list pointer
+ is not a valid address of a user space word
+ 2) the calculated location of the 'lock word' (address plus
+ 'offset') is not the valid address of a 32 bit user space
+ word
+ 3) if the list contains more than 1 million (subject to
+ future kernel configuration changes) elements.
+
+When the kernel sees a list entry whose 'lock word' doesn't have the
+current threads TID in the lower 29 bits, it does nothing with that
+entry, and goes on to the next entry.
+
+Bit 29 (0x20000000) of the 'lock word' is reserved for future use.
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
new file mode 100644
index 000000000..af6fce23e
--- /dev/null
+++ b/Documentation/robust-futexes.txt
@@ -0,0 +1,218 @@
+Started by: Ingo Molnar <mingo@redhat.com>
+
+Background
+----------
+
+what are robust futexes? To answer that, we first need to understand
+what futexes are: normal futexes are special types of locks that in the
+noncontended case can be acquired/released from userspace without having
+to enter the kernel.
+
+A futex is in essence a user-space address, e.g. a 32-bit lock variable
+field. If userspace notices contention (the lock is already owned and
+someone else wants to grab it too) then the lock is marked with a value
+that says "there's a waiter pending", and the sys_futex(FUTEX_WAIT)
+syscall is used to wait for the other guy to release it. The kernel
+creates a 'futex queue' internally, so that it can later on match up the
+waiter with the waker - without them having to know about each other.
+When the owner thread releases the futex, it notices (via the variable
+value) that there were waiter(s) pending, and does the
+sys_futex(FUTEX_WAKE) syscall to wake them up. Once all waiters have
+taken and released the lock, the futex is again back to 'uncontended'
+state, and there's no in-kernel state associated with it. The kernel
+completely forgets that there ever was a futex at that address. This
+method makes futexes very lightweight and scalable.
+
+"Robustness" is about dealing with crashes while holding a lock: if a
+process exits prematurely while holding a pthread_mutex_t lock that is
+also shared with some other process (e.g. yum segfaults while holding a
+pthread_mutex_t, or yum is kill -9-ed), then waiters for that lock need
+to be notified that the last owner of the lock exited in some irregular
+way.
+
+To solve such types of problems, "robust mutex" userspace APIs were
+created: pthread_mutex_lock() returns an error value if the owner exits
+prematurely - and the new owner can decide whether the data protected by
+the lock can be recovered safely.
+
+There is a big conceptual problem with futex based mutexes though: it is
+the kernel that destroys the owner task (e.g. due to a SEGFAULT), but
+the kernel cannot help with the cleanup: if there is no 'futex queue'
+(and in most cases there is none, futexes being fast lightweight locks)
+then the kernel has no information to clean up after the held lock!
+Userspace has no chance to clean up after the lock either - userspace is
+the one that crashes, so it has no opportunity to clean up. Catch-22.
+
+In practice, when e.g. yum is kill -9-ed (or segfaults), a system reboot
+is needed to release that futex based lock. This is one of the leading
+bugreports against yum.
+
+To solve this problem, the traditional approach was to extend the vma
+(virtual memory area descriptor) concept to have a notion of 'pending
+robust futexes attached to this area'. This approach requires 3 new
+syscall variants to sys_futex(): FUTEX_REGISTER, FUTEX_DEREGISTER and
+FUTEX_RECOVER. At do_exit() time, all vmas are searched to see whether
+they have a robust_head set. This approach has two fundamental problems
+left:
+
+ - it has quite complex locking and race scenarios. The vma-based
+ approach had been pending for years, but they are still not completely
+ reliable.
+
+ - they have to scan _every_ vma at sys_exit() time, per thread!
+
+The second disadvantage is a real killer: pthread_exit() takes around 1
+microsecond on Linux, but with thousands (or tens of thousands) of vmas
+every pthread_exit() takes a millisecond or more, also totally
+destroying the CPU's L1 and L2 caches!
+
+This is very much noticeable even for normal process sys_exit_group()
+calls: the kernel has to do the vma scanning unconditionally! (this is
+because the kernel has no knowledge about how many robust futexes there
+are to be cleaned up, because a robust futex might have been registered
+in another task, and the futex variable might have been simply mmap()-ed
+into this process's address space).
+
+This huge overhead forced the creation of CONFIG_FUTEX_ROBUST so that
+normal kernels can turn it off, but worse than that: the overhead makes
+robust futexes impractical for any type of generic Linux distribution.
+
+So something had to be done.
+
+New approach to robust futexes
+------------------------------
+
+At the heart of this new approach there is a per-thread private list of
+robust locks that userspace is holding (maintained by glibc) - which
+userspace list is registered with the kernel via a new syscall [this
+registration happens at most once per thread lifetime]. At do_exit()
+time, the kernel checks this user-space list: are there any robust futex
+locks to be cleaned up?
+
+In the common case, at do_exit() time, there is no list registered, so
+the cost of robust futexes is just a simple current->robust_list != NULL
+comparison. If the thread has registered a list, then normally the list
+is empty. If the thread/process crashed or terminated in some incorrect
+way then the list might be non-empty: in this case the kernel carefully
+walks the list [not trusting it], and marks all locks that are owned by
+this thread with the FUTEX_OWNER_DIED bit, and wakes up one waiter (if
+any).
+
+The list is guaranteed to be private and per-thread at do_exit() time,
+so it can be accessed by the kernel in a lockless way.
+
+There is one race possible though: since adding to and removing from the
+list is done after the futex is acquired by glibc, there is a few
+instructions window for the thread (or process) to die there, leaving
+the futex hung. To protect against this possibility, userspace (glibc)
+also maintains a simple per-thread 'list_op_pending' field, to allow the
+kernel to clean up if the thread dies after acquiring the lock, but just
+before it could have added itself to the list. Glibc sets this
+list_op_pending field before it tries to acquire the futex, and clears
+it after the list-add (or list-remove) has finished.
+
+That's all that is needed - all the rest of robust-futex cleanup is done
+in userspace [just like with the previous patches].
+
+Ulrich Drepper has implemented the necessary glibc support for this new
+mechanism, which fully enables robust mutexes.
+
+Key differences of this userspace-list based approach, compared to the
+vma based method:
+
+ - it's much, much faster: at thread exit time, there's no need to loop
+ over every vma (!), which the VM-based method has to do. Only a very
+ simple 'is the list empty' op is done.
+
+ - no VM changes are needed - 'struct address_space' is left alone.
+
+ - no registration of individual locks is needed: robust mutexes dont
+ need any extra per-lock syscalls. Robust mutexes thus become a very
+ lightweight primitive - so they dont force the application designer
+ to do a hard choice between performance and robustness - robust
+ mutexes are just as fast.
+
+ - no per-lock kernel allocation happens.
+
+ - no resource limits are needed.
+
+ - no kernel-space recovery call (FUTEX_RECOVER) is needed.
+
+ - the implementation and the locking is "obvious", and there are no
+ interactions with the VM.
+
+Performance
+-----------
+
+I have benchmarked the time needed for the kernel to process a list of 1
+million (!) held locks, using the new method [on a 2GHz CPU]:
+
+ - with FUTEX_WAIT set [contended mutex]: 130 msecs
+ - without FUTEX_WAIT set [uncontended mutex]: 30 msecs
+
+I have also measured an approach where glibc does the lock notification
+[which it currently does for !pshared robust mutexes], and that took 256
+msecs - clearly slower, due to the 1 million FUTEX_WAKE syscalls
+userspace had to do.
+
+(1 million held locks are unheard of - we expect at most a handful of
+locks to be held at a time. Nevertheless it's nice to know that this
+approach scales nicely.)
+
+Implementation details
+----------------------
+
+The patch adds two new syscalls: one to register the userspace list, and
+one to query the registered list pointer:
+
+ asmlinkage long
+ sys_set_robust_list(struct robust_list_head __user *head,
+ size_t len);
+
+ asmlinkage long
+ sys_get_robust_list(int pid, struct robust_list_head __user **head_ptr,
+ size_t __user *len_ptr);
+
+List registration is very fast: the pointer is simply stored in
+current->robust_list. [Note that in the future, if robust futexes become
+widespread, we could extend sys_clone() to register a robust-list head
+for new threads, without the need of another syscall.]
+
+So there is virtually zero overhead for tasks not using robust futexes,
+and even for robust futex users, there is only one extra syscall per
+thread lifetime, and the cleanup operation, if it happens, is fast and
+straightforward. The kernel doesn't have any internal distinction between
+robust and normal futexes.
+
+If a futex is found to be held at exit time, the kernel sets the
+following bit of the futex word:
+
+ #define FUTEX_OWNER_DIED 0x40000000
+
+and wakes up the next futex waiter (if any). User-space does the rest of
+the cleanup.
+
+Otherwise, robust futexes are acquired by glibc by putting the TID into
+the futex field atomically. Waiters set the FUTEX_WAITERS bit:
+
+ #define FUTEX_WAITERS 0x80000000
+
+and the remaining bits are for the TID.
+
+Testing, architecture support
+-----------------------------
+
+i've tested the new syscalls on x86 and x86_64, and have made sure the
+parsing of the userspace list is robust [ ;-) ] even if the list is
+deliberately corrupted.
+
+i386 and x86_64 syscalls are wired up at the moment, and Ulrich has
+tested the new glibc code (on x86_64 and i386), and it works for his
+robust-mutex testcases.
+
+All other architectures should build just fine too - but they won't have
+the new syscalls yet.
+
+Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
+inline function before writing up the syscalls (that function returns
+-ENOSYS right now).
diff --git a/Documentation/rpmsg.txt b/Documentation/rpmsg.txt
new file mode 100644
index 000000000..f7edc3aa1
--- /dev/null
+++ b/Documentation/rpmsg.txt
@@ -0,0 +1,293 @@
+Remote Processor Messaging (rpmsg) Framework
+
+Note: this document describes the rpmsg bus and how to write rpmsg drivers.
+To learn how to add rpmsg support for new platforms, check out remoteproc.txt
+(also a resident of Documentation/).
+
+1. Introduction
+
+Modern SoCs typically employ heterogeneous remote processor devices in
+asymmetric multiprocessing (AMP) configurations, which may be running
+different instances of operating system, whether it's Linux or any other
+flavor of real-time OS.
+
+OMAP4, for example, has dual Cortex-A9, dual Cortex-M3 and a C64x+ DSP.
+Typically, the dual cortex-A9 is running Linux in a SMP configuration,
+and each of the other three cores (two M3 cores and a DSP) is running
+its own instance of RTOS in an AMP configuration.
+
+Typically AMP remote processors employ dedicated DSP codecs and multimedia
+hardware accelerators, and therefore are often used to offload CPU-intensive
+multimedia tasks from the main application processor.
+
+These remote processors could also be used to control latency-sensitive
+sensors, drive random hardware blocks, or just perform background tasks
+while the main CPU is idling.
+
+Users of those remote processors can either be userland apps (e.g. multimedia
+frameworks talking with remote OMX components) or kernel drivers (controlling
+hardware accessible only by the remote processor, reserving kernel-controlled
+resources on behalf of the remote processor, etc..).
+
+Rpmsg is a virtio-based messaging bus that allows kernel drivers to communicate
+with remote processors available on the system. In turn, drivers could then
+expose appropriate user space interfaces, if needed.
+
+When writing a driver that exposes rpmsg communication to userland, please
+keep in mind that remote processors might have direct access to the
+system's physical memory and other sensitive hardware resources (e.g. on
+OMAP4, remote cores and hardware accelerators may have direct access to the
+physical memory, gpio banks, dma controllers, i2c bus, gptimers, mailbox
+devices, hwspinlocks, etc..). Moreover, those remote processors might be
+running RTOS where every task can access the entire memory/devices exposed
+to the processor. To minimize the risks of rogue (or buggy) userland code
+exploiting remote bugs, and by that taking over the system, it is often
+desired to limit userland to specific rpmsg channels (see definition below)
+it can send messages on, and if possible, minimize how much control
+it has over the content of the messages.
+
+Every rpmsg device is a communication channel with a remote processor (thus
+rpmsg devices are called channels). Channels are identified by a textual name
+and have a local ("source") rpmsg address, and remote ("destination") rpmsg
+address.
+
+When a driver starts listening on a channel, its rx callback is bound with
+a unique rpmsg local address (a 32-bit integer). This way when inbound messages
+arrive, the rpmsg core dispatches them to the appropriate driver according
+to their destination address (this is done by invoking the driver's rx handler
+with the payload of the inbound message).
+
+
+2. User API
+
+ int rpmsg_send(struct rpmsg_channel *rpdev, void *data, int len);
+ - sends a message across to the remote processor on a given channel.
+ The caller should specify the channel, the data it wants to send,
+ and its length (in bytes). The message will be sent on the specified
+ channel, i.e. its source and destination address fields will be
+ set to the channel's src and dst addresses.
+
+ In case there are no TX buffers available, the function will block until
+ one becomes available (i.e. until the remote processor consumes
+ a tx buffer and puts it back on virtio's used descriptor ring),
+ or a timeout of 15 seconds elapses. When the latter happens,
+ -ERESTARTSYS is returned.
+ The function can only be called from a process context (for now).
+ Returns 0 on success and an appropriate error value on failure.
+
+ int rpmsg_sendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst);
+ - sends a message across to the remote processor on a given channel,
+ to a destination address provided by the caller.
+ The caller should specify the channel, the data it wants to send,
+ its length (in bytes), and an explicit destination address.
+ The message will then be sent to the remote processor to which the
+ channel belongs, using the channel's src address, and the user-provided
+ dst address (thus the channel's dst address will be ignored).
+
+ In case there are no TX buffers available, the function will block until
+ one becomes available (i.e. until the remote processor consumes
+ a tx buffer and puts it back on virtio's used descriptor ring),
+ or a timeout of 15 seconds elapses. When the latter happens,
+ -ERESTARTSYS is returned.
+ The function can only be called from a process context (for now).
+ Returns 0 on success and an appropriate error value on failure.
+
+ int rpmsg_send_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst,
+ void *data, int len);
+ - sends a message across to the remote processor, using the src and dst
+ addresses provided by the user.
+ The caller should specify the channel, the data it wants to send,
+ its length (in bytes), and explicit source and destination addresses.
+ The message will then be sent to the remote processor to which the
+ channel belongs, but the channel's src and dst addresses will be
+ ignored (and the user-provided addresses will be used instead).
+
+ In case there are no TX buffers available, the function will block until
+ one becomes available (i.e. until the remote processor consumes
+ a tx buffer and puts it back on virtio's used descriptor ring),
+ or a timeout of 15 seconds elapses. When the latter happens,
+ -ERESTARTSYS is returned.
+ The function can only be called from a process context (for now).
+ Returns 0 on success and an appropriate error value on failure.
+
+ int rpmsg_trysend(struct rpmsg_channel *rpdev, void *data, int len);
+ - sends a message across to the remote processor on a given channel.
+ The caller should specify the channel, the data it wants to send,
+ and its length (in bytes). The message will be sent on the specified
+ channel, i.e. its source and destination address fields will be
+ set to the channel's src and dst addresses.
+
+ In case there are no TX buffers available, the function will immediately
+ return -ENOMEM without waiting until one becomes available.
+ The function can only be called from a process context (for now).
+ Returns 0 on success and an appropriate error value on failure.
+
+ int rpmsg_trysendto(struct rpmsg_channel *rpdev, void *data, int len, u32 dst)
+ - sends a message across to the remote processor on a given channel,
+ to a destination address provided by the user.
+ The user should specify the channel, the data it wants to send,
+ its length (in bytes), and an explicit destination address.
+ The message will then be sent to the remote processor to which the
+ channel belongs, using the channel's src address, and the user-provided
+ dst address (thus the channel's dst address will be ignored).
+
+ In case there are no TX buffers available, the function will immediately
+ return -ENOMEM without waiting until one becomes available.
+ The function can only be called from a process context (for now).
+ Returns 0 on success and an appropriate error value on failure.
+
+ int rpmsg_trysend_offchannel(struct rpmsg_channel *rpdev, u32 src, u32 dst,
+ void *data, int len);
+ - sends a message across to the remote processor, using source and
+ destination addresses provided by the user.
+ The user should specify the channel, the data it wants to send,
+ its length (in bytes), and explicit source and destination addresses.
+ The message will then be sent to the remote processor to which the
+ channel belongs, but the channel's src and dst addresses will be
+ ignored (and the user-provided addresses will be used instead).
+
+ In case there are no TX buffers available, the function will immediately
+ return -ENOMEM without waiting until one becomes available.
+ The function can only be called from a process context (for now).
+ Returns 0 on success and an appropriate error value on failure.
+
+ struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_channel *rpdev,
+ void (*cb)(struct rpmsg_channel *, void *, int, void *, u32),
+ void *priv, u32 addr);
+ - every rpmsg address in the system is bound to an rx callback (so when
+ inbound messages arrive, they are dispatched by the rpmsg bus using the
+ appropriate callback handler) by means of an rpmsg_endpoint struct.
+
+ This function allows drivers to create such an endpoint, and by that,
+ bind a callback, and possibly some private data too, to an rpmsg address
+ (either one that is known in advance, or one that will be dynamically
+ assigned for them).
+
+ Simple rpmsg drivers need not call rpmsg_create_ept, because an endpoint
+ is already created for them when they are probed by the rpmsg bus
+ (using the rx callback they provide when they registered to the rpmsg bus).
+
+ So things should just work for simple drivers: they already have an
+ endpoint, their rx callback is bound to their rpmsg address, and when
+ relevant inbound messages arrive (i.e. messages which their dst address
+ equals to the src address of their rpmsg channel), the driver's handler
+ is invoked to process it.
+
+ That said, more complicated drivers might do need to allocate
+ additional rpmsg addresses, and bind them to different rx callbacks.
+ To accomplish that, those drivers need to call this function.
+ Drivers should provide their channel (so the new endpoint would bind
+ to the same remote processor their channel belongs to), an rx callback
+ function, an optional private data (which is provided back when the
+ rx callback is invoked), and an address they want to bind with the
+ callback. If addr is RPMSG_ADDR_ANY, then rpmsg_create_ept will
+ dynamically assign them an available rpmsg address (drivers should have
+ a very good reason why not to always use RPMSG_ADDR_ANY here).
+
+ Returns a pointer to the endpoint on success, or NULL on error.
+
+ void rpmsg_destroy_ept(struct rpmsg_endpoint *ept);
+ - destroys an existing rpmsg endpoint. user should provide a pointer
+ to an rpmsg endpoint that was previously created with rpmsg_create_ept().
+
+ int register_rpmsg_driver(struct rpmsg_driver *rpdrv);
+ - registers an rpmsg driver with the rpmsg bus. user should provide
+ a pointer to an rpmsg_driver struct, which contains the driver's
+ ->probe() and ->remove() functions, an rx callback, and an id_table
+ specifying the names of the channels this driver is interested to
+ be probed with.
+
+ void unregister_rpmsg_driver(struct rpmsg_driver *rpdrv);
+ - unregisters an rpmsg driver from the rpmsg bus. user should provide
+ a pointer to a previously-registered rpmsg_driver struct.
+ Returns 0 on success, and an appropriate error value on failure.
+
+
+3. Typical usage
+
+The following is a simple rpmsg driver, that sends an "hello!" message
+on probe(), and whenever it receives an incoming message, it dumps its
+content to the console.
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/rpmsg.h>
+
+static void rpmsg_sample_cb(struct rpmsg_channel *rpdev, void *data, int len,
+ void *priv, u32 src)
+{
+ print_hex_dump(KERN_INFO, "incoming message:", DUMP_PREFIX_NONE,
+ 16, 1, data, len, true);
+}
+
+static int rpmsg_sample_probe(struct rpmsg_channel *rpdev)
+{
+ int err;
+
+ dev_info(&rpdev->dev, "chnl: 0x%x -> 0x%x\n", rpdev->src, rpdev->dst);
+
+ /* send a message on our channel */
+ err = rpmsg_send(rpdev, "hello!", 6);
+ if (err) {
+ pr_err("rpmsg_send failed: %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
+static void rpmsg_sample_remove(struct rpmsg_channel *rpdev)
+{
+ dev_info(&rpdev->dev, "rpmsg sample client driver is removed\n");
+}
+
+static struct rpmsg_device_id rpmsg_driver_sample_id_table[] = {
+ { .name = "rpmsg-client-sample" },
+ { },
+};
+MODULE_DEVICE_TABLE(rpmsg, rpmsg_driver_sample_id_table);
+
+static struct rpmsg_driver rpmsg_sample_client = {
+ .drv.name = KBUILD_MODNAME,
+ .drv.owner = THIS_MODULE,
+ .id_table = rpmsg_driver_sample_id_table,
+ .probe = rpmsg_sample_probe,
+ .callback = rpmsg_sample_cb,
+ .remove = rpmsg_sample_remove,
+};
+
+static int __init init(void)
+{
+ return register_rpmsg_driver(&rpmsg_sample_client);
+}
+module_init(init);
+
+static void __exit fini(void)
+{
+ unregister_rpmsg_driver(&rpmsg_sample_client);
+}
+module_exit(fini);
+
+Note: a similar sample which can be built and loaded can be found
+in samples/rpmsg/.
+
+4. Allocations of rpmsg channels:
+
+At this point we only support dynamic allocations of rpmsg channels.
+
+This is possible only with remote processors that have the VIRTIO_RPMSG_F_NS
+virtio device feature set. This feature bit means that the remote
+processor supports dynamic name service announcement messages.
+
+When this feature is enabled, creation of rpmsg devices (i.e. channels)
+is completely dynamic: the remote processor announces the existence of a
+remote rpmsg service by sending a name service message (which contains
+the name and rpmsg addr of the remote service, see struct rpmsg_ns_msg).
+
+This message is then handled by the rpmsg bus, which in turn dynamically
+creates and registers an rpmsg channel (which represents the remote service).
+If/when a relevant rpmsg driver is registered, it will be immediately probed
+by the bus, and can then start sending messages to the remote service.
+
+The plan is also to add static creation of rpmsg channels via the virtio
+config space, but it's not implemented yet.
diff --git a/Documentation/rtc.txt b/Documentation/rtc.txt
new file mode 100644
index 000000000..8446f1ea1
--- /dev/null
+++ b/Documentation/rtc.txt
@@ -0,0 +1,207 @@
+
+ Real Time Clock (RTC) Drivers for Linux
+ =======================================
+
+When Linux developers talk about a "Real Time Clock", they usually mean
+something that tracks wall clock time and is battery backed so that it
+works even with system power off. Such clocks will normally not track
+the local time zone or daylight savings time -- unless they dual boot
+with MS-Windows -- but will instead be set to Coordinated Universal Time
+(UTC, formerly "Greenwich Mean Time").
+
+The newest non-PC hardware tends to just count seconds, like the time(2)
+system call reports, but RTCs also very commonly represent time using
+the Gregorian calendar and 24 hour time, as reported by gmtime(3).
+
+Linux has two largely-compatible userspace RTC API families you may
+need to know about:
+
+ * /dev/rtc ... is the RTC provided by PC compatible systems,
+ so it's not very portable to non-x86 systems.
+
+ * /dev/rtc0, /dev/rtc1 ... are part of a framework that's
+ supported by a wide variety of RTC chips on all systems.
+
+Programmers need to understand that the PC/AT functionality is not
+always available, and some systems can do much more. That is, the
+RTCs use the same API to make requests in both RTC frameworks (using
+different filenames of course), but the hardware may not offer the
+same functionality. For example, not every RTC is hooked up to an
+IRQ, so they can't all issue alarms; and where standard PC RTCs can
+only issue an alarm up to 24 hours in the future, other hardware may
+be able to schedule one any time in the upcoming century.
+
+
+ Old PC/AT-Compatible driver: /dev/rtc
+ --------------------------------------
+
+All PCs (even Alpha machines) have a Real Time Clock built into them.
+Usually they are built into the chipset of the computer, but some may
+actually have a Motorola MC146818 (or clone) on the board. This is the
+clock that keeps the date and time while your computer is turned off.
+
+ACPI has standardized that MC146818 functionality, and extended it in
+a few ways (enabling longer alarm periods, and wake-from-hibernate).
+That functionality is NOT exposed in the old driver.
+
+However it can also be used to generate signals from a slow 2Hz to a
+relatively fast 8192Hz, in increments of powers of two. These signals
+are reported by interrupt number 8. (Oh! So *that* is what IRQ 8 is
+for...) It can also function as a 24hr alarm, raising IRQ 8 when the
+alarm goes off. The alarm can also be programmed to only check any
+subset of the three programmable values, meaning that it could be set to
+ring on the 30th second of the 30th minute of every hour, for example.
+The clock can also be set to generate an interrupt upon every clock
+update, thus generating a 1Hz signal.
+
+The interrupts are reported via /dev/rtc (major 10, minor 135, read only
+character device) in the form of an unsigned long. The low byte contains
+the type of interrupt (update-done, alarm-rang, or periodic) that was
+raised, and the remaining bytes contain the number of interrupts since
+the last read. Status information is reported through the pseudo-file
+/proc/driver/rtc if the /proc filesystem was enabled. The driver has
+built in locking so that only one process is allowed to have the /dev/rtc
+interface open at a time.
+
+A user process can monitor these interrupts by doing a read(2) or a
+select(2) on /dev/rtc -- either will block/stop the user process until
+the next interrupt is received. This is useful for things like
+reasonably high frequency data acquisition where one doesn't want to
+burn up 100% CPU by polling gettimeofday etc. etc.
+
+At high frequencies, or under high loads, the user process should check
+the number of interrupts received since the last read to determine if
+there has been any interrupt "pileup" so to speak. Just for reference, a
+typical 486-33 running a tight read loop on /dev/rtc will start to suffer
+occasional interrupt pileup (i.e. > 1 IRQ event since last read) for
+frequencies above 1024Hz. So you really should check the high bytes
+of the value you read, especially at frequencies above that of the
+normal timer interrupt, which is 100Hz.
+
+Programming and/or enabling interrupt frequencies greater than 64Hz is
+only allowed by root. This is perhaps a bit conservative, but we don't want
+an evil user generating lots of IRQs on a slow 386sx-16, where it might have
+a negative impact on performance. This 64Hz limit can be changed by writing
+a different value to /proc/sys/dev/rtc/max-user-freq. Note that the
+interrupt handler is only a few lines of code to minimize any possibility
+of this effect.
+
+Also, if the kernel time is synchronized with an external source, the
+kernel will write the time back to the CMOS clock every 11 minutes. In
+the process of doing this, the kernel briefly turns off RTC periodic
+interrupts, so be aware of this if you are doing serious work. If you
+don't synchronize the kernel time with an external source (via ntp or
+whatever) then the kernel will keep its hands off the RTC, allowing you
+exclusive access to the device for your applications.
+
+The alarm and/or interrupt frequency are programmed into the RTC via
+various ioctl(2) calls as listed in ./include/linux/rtc.h
+Rather than write 50 pages describing the ioctl() and so on, it is
+perhaps more useful to include a small test program that demonstrates
+how to use them, and demonstrates the features of the driver. This is
+probably a lot more useful to people interested in writing applications
+that will be using this driver. See the code at the end of this document.
+
+(The original /dev/rtc driver was written by Paul Gortmaker.)
+
+
+ New portable "RTC Class" drivers: /dev/rtcN
+ --------------------------------------------
+
+Because Linux supports many non-ACPI and non-PC platforms, some of which
+have more than one RTC style clock, it needed a more portable solution
+than expecting a single battery-backed MC146818 clone on every system.
+Accordingly, a new "RTC Class" framework has been defined. It offers
+three different userspace interfaces:
+
+ * /dev/rtcN ... much the same as the older /dev/rtc interface
+
+ * /sys/class/rtc/rtcN ... sysfs attributes support readonly
+ access to some RTC attributes.
+
+ * /proc/driver/rtc ... the system clock RTC may expose itself
+ using a procfs interface. If there is no RTC for the system clock,
+ rtc0 is used by default. More information is (currently) shown
+ here than through sysfs.
+
+The RTC Class framework supports a wide variety of RTCs, ranging from those
+integrated into embeddable system-on-chip (SOC) processors to discrete chips
+using I2C, SPI, or some other bus to communicate with the host CPU. There's
+even support for PC-style RTCs ... including the features exposed on newer PCs
+through ACPI.
+
+The new framework also removes the "one RTC per system" restriction. For
+example, maybe the low-power battery-backed RTC is a discrete I2C chip, but
+a high functionality RTC is integrated into the SOC. That system might read
+the system clock from the discrete RTC, but use the integrated one for all
+other tasks, because of its greater functionality.
+
+SYSFS INTERFACE
+---------------
+
+The sysfs interface under /sys/class/rtc/rtcN provides access to various
+rtc attributes without requiring the use of ioctls. All dates and times
+are in the RTC's timezone, rather than in system time.
+
+date: RTC-provided date
+hctosys: 1 if the RTC provided the system time at boot via the
+ CONFIG_RTC_HCTOSYS kernel option, 0 otherwise
+max_user_freq: The maximum interrupt rate an unprivileged user may request
+ from this RTC.
+name: The name of the RTC corresponding to this sysfs directory
+since_epoch: The number of seconds since the epoch according to the RTC
+time: RTC-provided time
+wakealarm: The time at which the clock will generate a system wakeup
+ event. This is a one shot wakeup event, so must be reset
+ after wake if a daily wakeup is required. Format is seconds since
+ the epoch by default, or if there's a leading +, seconds in the
+ future, or if there is a leading +=, seconds ahead of the current
+ alarm.
+
+IOCTL INTERFACE
+---------------
+
+The ioctl() calls supported by /dev/rtc are also supported by the RTC class
+framework. However, because the chips and systems are not standardized,
+some PC/AT functionality might not be provided. And in the same way, some
+newer features -- including those enabled by ACPI -- are exposed by the
+RTC class framework, but can't be supported by the older driver.
+
+ * RTC_RD_TIME, RTC_SET_TIME ... every RTC supports at least reading
+ time, returning the result as a Gregorian calendar date and 24 hour
+ wall clock time. To be most useful, this time may also be updated.
+
+ * RTC_AIE_ON, RTC_AIE_OFF, RTC_ALM_SET, RTC_ALM_READ ... when the RTC
+ is connected to an IRQ line, it can often issue an alarm IRQ up to
+ 24 hours in the future. (Use RTC_WKALM_* by preference.)
+
+ * RTC_WKALM_SET, RTC_WKALM_RD ... RTCs that can issue alarms beyond
+ the next 24 hours use a slightly more powerful API, which supports
+ setting the longer alarm time and enabling its IRQ using a single
+ request (using the same model as EFI firmware).
+
+ * RTC_UIE_ON, RTC_UIE_OFF ... if the RTC offers IRQs, the RTC framework
+ will emulate this mechanism.
+
+ * RTC_PIE_ON, RTC_PIE_OFF, RTC_IRQP_SET, RTC_IRQP_READ ... these icotls
+ are emulated via a kernel hrtimer.
+
+In many cases, the RTC alarm can be a system wake event, used to force
+Linux out of a low power sleep state (or hibernation) back to a fully
+operational state. For example, a system could enter a deep power saving
+state until it's time to execute some scheduled tasks.
+
+Note that many of these ioctls are handled by the common rtc-dev interface.
+Some common examples:
+
+ * RTC_RD_TIME, RTC_SET_TIME: the read_time/set_time functions will be
+ called with appropriate values.
+
+ * RTC_ALM_SET, RTC_ALM_READ, RTC_WKALM_SET, RTC_WKALM_RD: gets or sets
+ the alarm rtc_timer. May call the set_alarm driver function.
+
+ * RTC_IRQP_SET, RTC_IRQP_READ: These are emulated by the generic code.
+
+ * RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
+
+If all else fails, check out the tools/testing/selftests/timers/rtctest.c test!
diff --git a/Documentation/s390/00-INDEX b/Documentation/s390/00-INDEX
new file mode 100644
index 000000000..10c874ebd
--- /dev/null
+++ b/Documentation/s390/00-INDEX
@@ -0,0 +1,28 @@
+00-INDEX
+ - this file.
+3270.ChangeLog
+ - ChangeLog for the UTS Global 3270-support patch (outdated).
+3270.txt
+ - how to use the IBM 3270 display system support.
+cds.txt
+ - s390 common device support (common I/O layer).
+CommonIO
+ - common I/O layer command line parameters, procfs and debugfs entries
+config3270.sh
+ - example configuration for 3270 devices.
+DASD
+ - information on the DASD disk device driver.
+Debugging390.txt
+ - hints for debugging on s390 systems.
+driver-model.txt
+ - information on s390 devices and the driver model.
+kvm.txt
+ - ioctl calls to /dev/kvm on s390.
+monreader.txt
+ - information on accessing the z/VM monitor stream from Linux.
+qeth.txt
+ - HiperSockets Bridge Port Support.
+s390dbf.txt
+ - information on using the s390 debug feature.
+zfcpdump.txt
+ - information on the s390 SCSI dump tool.
diff --git a/Documentation/s390/3270.ChangeLog b/Documentation/s390/3270.ChangeLog
new file mode 100644
index 000000000..031c36081
--- /dev/null
+++ b/Documentation/s390/3270.ChangeLog
@@ -0,0 +1,44 @@
+ChangeLog for the UTS Global 3270-support patch
+
+Sep 2002: Get bootup colors right on 3270 console
+ * In tubttybld.c, substantially revise ESC processing so that
+ ESC sequences (especially coloring ones) and the strings
+ they affect work as right as 3270 can get them. Also, set
+ screen height to omit the two rows used for input area, in
+ tty3270_open() in tubtty.c.
+
+Sep 2002: Dynamically get 3270 input buffer
+ * Oversize 3270 screen widths may exceed GEOM_MAXINPLEN columns,
+ so get input-area buffer dynamically when sizing the device in
+ tubmakemin() in tuball.c (if it's the console) or tty3270_open()
+ in tubtty.c (if needed). Change tubp->tty_input to be a
+ pointer rather than an array, in tubio.h.
+
+Sep 2002: Fix tubfs kmalloc()s
+ * Do read and write lengths correctly in fs3270_read()
+ and fs3270_write(), whilst never asking kmalloc()
+ for more than 0x800 bytes. Affects tubfs.c and tubio.h.
+
+Sep 2002: Recognize 3270 control unit type 3174
+ * Recognize control-unit type 0x3174 as well as 0x327?.
+ The IBM 2047 device emulates a 3174 control unit.
+ Modularize control-unit recognition in tuball.c by
+ adding and invoking new tub3270_is_ours().
+
+Apr 2002: Fix 3270 console reboot loop
+ * (Belated log entry) Fixed reboot loop if 3270 console,
+ in tubtty.c:ttu3270_bh().
+
+Feb 6, 2001:
+ * This changelog is new
+ * tub3270 now supports 3270 console:
+ Specify y for CONFIG_3270 and y for CONFIG_3270_CONSOLE.
+ Support for 3215 will not appear if 3270 console support
+ is chosen.
+ NOTE: The default is 3270 console support, NOT 3215.
+ * the components are remodularized: added source modules are
+ tubttybld.c and tubttyscl.c, for screen-building code and
+ scroll-timeout code.
+ * tub3270 source for this (2.4.0) version is #ifdeffed to
+ build with both 2.4.0 and 2.2.16.2.
+ * color support and minimal other ESC-sequence support is added.
diff --git a/Documentation/s390/3270.txt b/Documentation/s390/3270.txt
new file mode 100644
index 000000000..7c715de99
--- /dev/null
+++ b/Documentation/s390/3270.txt
@@ -0,0 +1,271 @@
+IBM 3270 Display System support
+
+This file describes the driver that supports local channel attachment
+of IBM 3270 devices. It consists of three sections:
+ * Introduction
+ * Installation
+ * Operation
+
+
+INTRODUCTION.
+
+This paper describes installing and operating 3270 devices under
+Linux/390. A 3270 device is a block-mode rows-and-columns terminal of
+which I'm sure hundreds of millions were sold by IBM and clonemakers
+twenty and thirty years ago.
+
+You may have 3270s in-house and not know it. If you're using the
+VM-ESA operating system, define a 3270 to your virtual machine by using
+the command "DEF GRAF <hex-address>" This paper presumes you will be
+defining four 3270s with the CP/CMS commands
+
+ DEF GRAF 620
+ DEF GRAF 621
+ DEF GRAF 622
+ DEF GRAF 623
+
+Your network connection from VM-ESA allows you to use x3270, tn3270, or
+another 3270 emulator, started from an xterm window on your PC or
+workstation. With the DEF GRAF command, an application such as xterm,
+and this Linux-390 3270 driver, you have another way of talking to your
+Linux box.
+
+This paper covers installation of the driver and operation of a
+dialed-in x3270.
+
+
+INSTALLATION.
+
+You install the driver by installing a patch, doing a kernel build, and
+running the configuration script (config3270.sh, in this directory).
+
+WARNING: If you are using 3270 console support, you must rerun the
+configuration script every time you change the console's address (perhaps
+by using the condev= parameter in silo's /boot/parmfile). More precisely,
+you should rerun the configuration script every time your set of 3270s,
+including the console 3270, changes subchannel identifier relative to
+one another. ReIPL as soon as possible after running the configuration
+script and the resulting /tmp/mkdev3270.
+
+If you have chosen to make tub3270 a module, you add a line to a
+configuration file under /etc/modprobe.d/. If you are working on a VM
+virtual machine, you can use DEF GRAF to define virtual 3270 devices.
+
+You may generate both 3270 and 3215 console support, or one or the
+other, or neither. If you generate both, the console type under VM is
+not changed. Use #CP Q TERM to see what the current console type is.
+Use #CP TERM CONMODE 3270 to change it to 3270. If you generate only
+3270 console support, then the driver automatically converts your console
+at boot time to a 3270 if it is a 3215.
+
+In brief, these are the steps:
+ 1. Install the tub3270 patch
+ 2. (If a module) add a line to a file in /etc/modprobe.d/*.conf
+ 3. (If VM) define devices with DEF GRAF
+ 4. Reboot
+ 5. Configure
+
+To test that everything works, assuming VM and x3270,
+ 1. Bring up an x3270 window.
+ 2. Use the DIAL command in that window.
+ 3. You should immediately see a Linux login screen.
+
+Here are the installation steps in detail:
+
+ 1. The 3270 driver is a part of the official Linux kernel
+ source. Build a tree with the kernel source and any necessary
+ patches. Then do
+ make oldconfig
+ (If you wish to disable 3215 console support, edit
+ .config; change CONFIG_TN3215's value to "n";
+ and rerun "make oldconfig".)
+ make image
+ make modules
+ make modules_install
+
+ 2. (Perform this step only if you have configured tub3270 as a
+ module.) Add a line to a file /etc/modprobe.d/*.conf to automatically
+ load the driver when it's needed. With this line added, you will see
+ login prompts appear on your 3270s as soon as boot is complete (or
+ with emulated 3270s, as soon as you dial into your vm guest using the
+ command "DIAL <vmguestname>"). Since the line-mode major number is
+ 227, the line to add should be:
+ alias char-major-227 tub3270
+
+ 3. Define graphic devices to your vm guest machine, if you
+ haven't already. Define them before you reboot (reipl):
+ DEFINE GRAF 620
+ DEFINE GRAF 621
+ DEFINE GRAF 622
+ DEFINE GRAF 623
+
+ 4. Reboot. The reboot process scans hardware devices, including
+ 3270s, and this enables the tub3270 driver once loaded to respond
+ correctly to the configuration requests of the next step. If
+ you have chosen 3270 console support, your console now behaves
+ as a 3270, not a 3215.
+
+ 5. Run the 3270 configuration script config3270. It is
+ distributed in this same directory, Documentation/s390, as
+ config3270.sh. Inspect the output script it produces,
+ /tmp/mkdev3270, and then run that script. This will create the
+ necessary character special device files and make the necessary
+ changes to /etc/inittab.
+
+ Then notify /sbin/init that /etc/inittab has changed, by issuing
+ the telinit command with the q operand:
+ cd Documentation/s390
+ sh config3270.sh
+ sh /tmp/mkdev3270
+ telinit q
+
+ This should be sufficient for your first time. If your 3270
+ configuration has changed and you're reusing config3270, you
+ should follow these steps:
+ Change 3270 configuration
+ Reboot
+ Run config3270 and /tmp/mkdev3270
+ Reboot
+
+Here are the testing steps in detail:
+
+ 1. Bring up an x3270 window, or use an actual hardware 3278 or
+ 3279, or use the 3270 emulator of your choice. You would be
+ running the emulator on your PC or workstation. You would use
+ the command, for example,
+ x3270 vm-esa-domain-name &
+ if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
+ default model number. The driver does not take advantage of
+ extended attributes.
+
+ The screen you should now see contains a VM logo with input
+ lines near the bottom. Use TAB to move to the bottom line,
+ probably labeled "COMMAND ===>".
+
+ 2. Use the DIAL command instead of the LOGIN command to connect
+ to one of the virtual 3270s you defined with the DEF GRAF
+ commands:
+ dial my-vm-guest-name
+
+ 3. You should immediately see a login prompt from your
+ Linux-390 operating system. If that does not happen, you would
+ see instead the line "DIALED TO my-vm-guest-name 0620".
+
+ To troubleshoot: do these things.
+
+ A. Is the driver loaded? Use the lsmod command (no operands)
+ to find out. Probably it isn't. Try loading it manually, with
+ the command "insmod tub3270". Does that command give error
+ messages? Ha! There's your problem.
+
+ B. Is the /etc/inittab file modified as in installation step 3
+ above? Use the grep command to find out; for instance, issue
+ "grep 3270 /etc/inittab". Nothing found? There's your
+ problem!
+
+ C. Are the device special files created, as in installation
+ step 2 above? Use the ls -l command to find out; for instance,
+ issue "ls -l /dev/3270/tty620". The output should start with the
+ letter "c" meaning character device and should contain "227, 1"
+ just to the left of the device name. No such file? no "c"?
+ Wrong major number? Wrong minor number? There's your
+ problem!
+
+ D. Do you get the message
+ "HCPDIA047E my-vm-guest-name 0620 does not exist"?
+ If so, you must issue the command "DEF GRAF 620" from your VM
+ 3215 console and then reboot the system.
+
+
+
+OPERATION.
+
+The driver defines three areas on the 3270 screen: the log area, the
+input area, and the status area.
+
+The log area takes up all but the bottom two lines of the screen. The
+driver writes terminal output to it, starting at the top line and going
+down. When it fills, the status area changes from "Linux Running" to
+"Linux More...". After a scrolling timeout of (default) 5 sec, the
+screen clears and more output is written, from the top down.
+
+The input area extends from the beginning of the second-to-last screen
+line to the start of the status area. You type commands in this area
+and hit ENTER to execute them.
+
+The status area initializes to "Linux Running" to give you a warm
+fuzzy feeling. When the log area fills up and output awaits, it
+changes to "Linux More...". At this time you can do several things or
+nothing. If you do nothing, the screen will clear in (default) 5 sec
+and more output will appear. You may hit ENTER with nothing typed in
+the input area to toggle between "Linux More..." and "Linux Holding",
+which indicates no scrolling will occur. (If you hit ENTER with "Linux
+Running" and nothing typed, the application receives a newline.)
+
+You may change the scrolling timeout value. For example, the following
+command line:
+ echo scrolltime=60 > /proc/tty/driver/tty3270
+changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if
+you wish to prevent scrolling entirely.
+
+Other things you may do when the log area fills up are: hit PA2 to
+clear the log area and write more output to it, or hit CLEAR to clear
+the log area and the input area and write more output to the log area.
+
+Some of the Program Function (PF) and Program Attention (PA) keys are
+preassigned special functions. The ones that are not yield an alarm
+when pressed.
+
+PA1 causes a SIGINT to the currently running application. You may do
+the same thing from the input area, by typing "^C" and hitting ENTER.
+
+PA2 causes the log area to be cleared. If output awaits, it is then
+written to the log area.
+
+PF3 causes an EOF to be received as input by the application. You may
+cause an EOF also by typing "^D" and hitting ENTER.
+
+No PF key is preassigned to cause a job suspension, but you may cause a
+job suspension by typing "^Z" and hitting ENTER. You may wish to
+assign this function to a PF key. To make PF7 cause job suspension,
+execute the command:
+ echo pf7=^z > /proc/tty/driver/tty3270
+
+If the input you type does not end with the two characters "^n", the
+driver appends a newline character and sends it to the tty driver;
+otherwise the driver strips the "^n" and does not append a newline.
+The IBM 3215 driver behaves similarly.
+
+Pf10 causes the most recent command to be retrieved from the tube's
+command stack (default depth 20) and displayed in the input area. You
+may hit PF10 again for the next-most-recent command, and so on. A
+command is entered into the stack only when the input area is not made
+invisible (such as for password entry) and it is not identical to the
+current top entry. PF10 rotates backward through the command stack;
+PF11 rotates forward. You may assign the backward function to any PF
+key (or PA key, for that matter), say, PA3, with the command:
+ echo -e pa3=\\033k > /proc/tty/driver/tty3270
+This assigns the string ESC-k to PA3. Similarly, the string ESC-j
+performs the forward function. (Rationale: In bash with vi-mode line
+editing, ESC-k and ESC-j retrieve backward and forward history.
+Suggestions welcome.)
+
+Is a stack size of twenty commands not to your liking? Change it on
+the fly. To change to saving the last 100 commands, execute the
+command:
+ echo recallsize=100 > /proc/tty/driver/tty3270
+
+Have a command you issue frequently? Assign it to a PF or PA key! Use
+the command
+ echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
+to execute the commands mkdir foobar and cd foobar immediately when you
+hit PF24. Want to see the command line first, before you execute it?
+Use the -n option of the echo command:
+ echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270
+
+
+
+Happy testing! I welcome any and all comments about this document, the
+driver, etc etc.
+
+Dick Hitt <rbh00@utsglobal.com>
diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO
new file mode 100644
index 000000000..6e0f63f34
--- /dev/null
+++ b/Documentation/s390/CommonIO
@@ -0,0 +1,125 @@
+S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
+============================================================================
+
+Command line parameters
+-----------------------
+
+* ccw_timeout_log
+
+ Enable logging of debug information in case of ccw device timeouts.
+
+* cio_ignore = device[,device[,..]]
+
+ device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
+
+ The given devices will be ignored by the common I/O-layer; no detection
+ and device sensing will be done on any of those devices. The subchannel to
+ which the device in question is attached will be treated as if no device was
+ attached.
+
+ An ignored device can be un-ignored later; see the "/proc entries"-section for
+ details.
+
+ The devices must be given either as bus ids (0.x.abcd) or as hexadecimal
+ device numbers (0xabcd or abcd, for 2.4 backward compatibility). If you
+ give a device number 0xabcd, it will be interpreted as 0.0.abcd.
+
+ You can use the 'all' keyword to ignore all devices. The 'ipldev' and 'condev'
+ keywords can be used to refer to the CCW based boot device and CCW console
+ device respectively (these are probably useful only when combined with the '!'
+ operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
+ The command line is parsed from left to right.
+
+ For example,
+ cio_ignore=0.0.0023-0.0.0042,0.0.4711
+ will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
+ 0.0.4711, if detected.
+ As another example,
+ cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
+ will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
+
+ By default, no devices are ignored.
+
+
+/proc entries
+-------------
+
+* /proc/cio_ignore
+
+ Lists the ranges of devices (by bus id) which are ignored by common I/O.
+
+ You can un-ignore certain or all devices by piping to /proc/cio_ignore.
+ "free all" will un-ignore all ignored devices,
+ "free <device range>, <device range>, ..." will un-ignore the specified
+ devices.
+
+ For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
+ - echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
+ will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
+ to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
+ - echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
+ 0.0.0041;
+ - echo free all > /proc/cio_ignore will un-ignore all remaining ignored
+ devices.
+
+ When a device is un-ignored, device recognition and sensing is performed and
+ the device driver will be notified if possible, so the device will become
+ available to the system. Note that un-ignoring is performed asynchronously.
+
+ You can also add ranges of devices to be ignored by piping to
+ /proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
+ specified devices.
+
+ Note: While already known devices can be added to the list of devices to be
+ ignored, there will be no effect on then. However, if such a device
+ disappears and then reappears, it will then be ignored. To make
+ known devices go away, you need the "purge" command (see below).
+
+ For example,
+ "echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
+ will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
+ devices.
+
+ You can remove already known but now ignored devices via
+ "echo purge > /proc/cio_ignore"
+ All devices ignored but still registered and not online (= not in use)
+ will be deregistered and thus removed from the system.
+
+ The devices can be specified either by bus id (0.x.abcd) or, for 2.4 backward
+ compatibility, by the device number in hexadecimal (0xabcd or abcd). Device
+ numbers given as 0xabcd will be interpreted as 0.0.abcd.
+
+* /proc/cio_settle
+
+ A write request to this file is blocked until all queued cio actions are
+ handled. This will allow userspace to wait for pending work affecting
+ device availability after changing cio_ignore or the hardware configuration.
+
+* For some of the information present in the /proc filesystem in 2.4 (namely,
+ /proc/subchannels and /proc/chpids), see driver-model.txt.
+ Information formerly in /proc/irq_count is now in /proc/interrupts.
+
+
+debugfs entries
+---------------
+
+* /sys/kernel/debug/s390dbf/cio_*/ (S/390 debug feature)
+
+ Some views generated by the debug feature to hold various debug outputs.
+
+ - /sys/kernel/debug/s390dbf/cio_crw/sprintf
+ Messages from the processing of pending channel report words (machine check
+ handling).
+
+ - /sys/kernel/debug/s390dbf/cio_msg/sprintf
+ Various debug messages from the common I/O-layer.
+
+ - /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
+ Logs the calling of functions in the common I/O-layer and, if applicable,
+ which subchannel they were called for, as well as dumps of some data
+ structures (like irb in an error case).
+
+ The level of logging can be changed to be more or less verbose by piping to
+ /sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
+ documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
+ for details.
diff --git a/Documentation/s390/DASD b/Documentation/s390/DASD
new file mode 100644
index 000000000..9963f1e9c
--- /dev/null
+++ b/Documentation/s390/DASD
@@ -0,0 +1,73 @@
+DASD device driver
+
+S/390's disk devices (DASDs) are managed by Linux via the DASD device
+driver. It is valid for all types of DASDs and represents them to
+Linux as block devices, namely "dd". Currently the DASD driver uses a
+single major number (254) and 4 minor numbers per volume (1 for the
+physical volume and 3 for partitions). With respect to partitions see
+below. Thus you may have up to 64 DASD devices in your system.
+
+The kernel parameter 'dasd=from-to,...' may be issued arbitrary times
+in the kernel's parameter line or not at all. The 'from' and 'to'
+parameters are to be given in hexadecimal notation without a leading
+0x.
+If you supply kernel parameters the different instances are processed
+in order of appearance and a minor number is reserved for any device
+covered by the supplied range up to 64 volumes. Additional DASDs are
+ignored. If you do not supply the 'dasd=' kernel parameter at all, the
+DASD driver registers all supported DASDs of your system to a minor
+number in ascending order of the subchannel number.
+
+The driver currently supports ECKD-devices and there are stubs for
+support of the FBA and CKD architectures. For the FBA architecture
+only some smart data structures are missing to make the support
+complete.
+We performed our testing on 3380 and 3390 type disks of different
+sizes, under VM and on the bare hardware (LPAR), using internal disks
+of the multiprise as well as a RAMAC virtual array. Disks exported by
+an Enterprise Storage Server (Seascape) should work fine as well.
+
+We currently implement one partition per volume, which is the whole
+volume, skipping the first blocks up to the volume label. These are
+reserved for IPL records and IBM's volume label to assure
+accessibility of the DASD from other OSs. In a later stage we will
+provide support of partitions, maybe VTOC oriented or using a kind of
+partition table in the label record.
+
+USAGE
+
+-Low-level format (?CKD only)
+For using an ECKD-DASD as a Linux harddisk you have to low-level
+format the tracks by issuing the BLKDASDFORMAT-ioctl on that
+device. This will erase any data on that volume including IBM volume
+labels, VTOCs etc. The ioctl may take a 'struct format_data *' or
+'NULL' as an argument.
+typedef struct {
+ int start_unit;
+ int stop_unit;
+ int blksize;
+} format_data_t;
+When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
+disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
+and stop_unit are the first and last track to be formatted. If
+stop_unit is -1 it implies that the DASD is formatted from start_unit
+up to the last track. blksize can be any power of two between 512 and
+4096. We recommend no blksize lower than 1024 because the ext2fs uses
+1kB blocks anyway and you gain approx. 50% of capacity increasing your
+blksize from 512 byte to 1kB.
+
+-Make a filesystem
+Then you can mk??fs the filesystem of your choice on that volume or
+partition. For reasons of sanity you should build your filesystem on
+the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
+but may be sure that you can reuse your data after introduction of a
+real partition table.
+
+BUGS:
+- Performance sometimes is rather low because we don't fully exploit clustering
+
+TODO-List:
+- Add IBM'S Disk layout to genhd
+- Enhance driver to use more than one major number
+- Enable usage as a module
+- Support Cache fast write and DASD fast write (ECKD)
diff --git a/Documentation/s390/Debugging390.txt b/Documentation/s390/Debugging390.txt
new file mode 100644
index 000000000..3df8babcd
--- /dev/null
+++ b/Documentation/s390/Debugging390.txt
@@ -0,0 +1,2142 @@
+
+ Debugging on Linux for s/390 & z/Architecture
+ by
+ Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ Copyright (C) 2000-2001 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ Best viewed with fixed width fonts
+
+Overview of Document:
+=====================
+This document is intended to give a good overview of how to debug Linux for
+s/390 and z/Architecture. It is not intended as a complete reference and not a
+tutorial on the fundamentals of C & assembly. It doesn't go into
+390 IO in any detail. It is intended to complement the documents in the
+reference section below & any other worthwhile references you get.
+
+It is intended like the Enterprise Systems Architecture/390 Reference Summary
+to be printed out & used as a quick cheat sheet self help style reference when
+problems occur.
+
+Contents
+========
+Register Set
+Address Spaces on Intel Linux
+Address Spaces on Linux for s/390 & z/Architecture
+The Linux for s/390 & z/Architecture Kernel Task Structure
+Register Usage & Stackframes on Linux for s/390 & z/Architecture
+A sample program with comments
+Compiling programs for debugging on Linux for s/390 & z/Architecture
+Debugging under VM
+s/390 & z/Architecture IO Overview
+Debugging IO on s/390 & z/Architecture under VM
+GDB on s/390 & z/Architecture
+Stack chaining in gdb by hand
+Examining core dumps
+ldd
+Debugging modules
+The proc file system
+SysRq
+References
+Special Thanks
+
+Register Set
+============
+The current architectures have the following registers.
+
+16 General propose registers, 32 bit on s/390 and 64 bit on z/Architecture,
+r0-r15 (or gpr0-gpr15), used for arithmetic and addressing.
+
+16 Control registers, 32 bit on s/390 and 64 bit on z/Architecture, cr0-cr15,
+kernel usage only, used for memory management, interrupt control, debugging
+control etc.
+
+16 Access registers (ar0-ar15), 32 bit on both s/390 and z/Architecture,
+normally not used by normal programs but potentially could be used as
+temporary storage. These registers have a 1:1 association with general
+purpose registers and are designed to be used in the so-called access
+register mode to select different address spaces.
+Access register 0 (and access register 1 on z/Architecture, which needs a
+64 bit pointer) is currently used by the pthread library as a pointer to
+the current running threads private area.
+
+16 64 bit floating point registers (fp0-fp15 ) IEEE & HFP floating
+point format compliant on G5 upwards & a Floating point control reg (FPC)
+4 64 bit registers (fp0,fp2,fp4 & fp6) HFP only on older machines.
+Note:
+Linux (currently) always uses IEEE & emulates G5 IEEE format on older machines,
+( provided the kernel is configured for this ).
+
+
+The PSW is the most important register on the machine it
+is 64 bit on s/390 & 128 bit on z/Architecture & serves the roles of
+a program counter (pc), condition code register,memory space designator.
+In IBM standard notation I am counting bit 0 as the MSB.
+It has several advantages over a normal program counter
+in that you can change address translation & program counter
+in a single instruction. To change address translation,
+e.g. switching address translation off requires that you
+have a logical=physical mapping for the address you are
+currently running at.
+
+ Bit Value
+s/390 z/Architecture
+0 0 Reserved ( must be 0 ) otherwise specification exception occurs.
+
+1 1 Program Event Recording 1 PER enabled,
+ PER is used to facilitate debugging e.g. single stepping.
+
+2-4 2-4 Reserved ( must be 0 ).
+
+5 5 Dynamic address translation 1=DAT on.
+
+6 6 Input/Output interrupt Mask
+
+7 7 External interrupt Mask used primarily for interprocessor
+ signalling and clock interrupts.
+
+8-11 8-11 PSW Key used for complex memory protection mechanism
+ (not used under linux)
+
+12 12 1 on s/390 0 on z/Architecture
+
+13 13 Machine Check Mask 1=enable machine check interrupts
+
+14 14 Wait State. Set this to 1 to stop the processor except for
+ interrupts and give time to other LPARS. Used in CPU idle in
+ the kernel to increase overall usage of processor resources.
+
+15 15 Problem state ( if set to 1 certain instructions are disabled )
+ all linux user programs run with this bit 1
+ ( useful info for debugging under VM ).
+
+16-17 16-17 Address Space Control
+
+ 00 Primary Space Mode:
+ The register CR1 contains the primary address-space control ele-
+ ment (PASCE), which points to the primary space region/segment
+ table origin.
+
+ 01 Access register mode
+
+ 10 Secondary Space Mode:
+ The register CR7 contains the secondary address-space control
+ element (SASCE), which points to the secondary space region or
+ segment table origin.
+
+ 11 Home Space Mode:
+ The register CR13 contains the home space address-space control
+ element (HASCE), which points to the home space region/segment
+ table origin.
+
+ See "Address Spaces on Linux for s/390 & z/Architecture" below
+ for more information about address space usage in Linux.
+
+18-19 18-19 Condition codes (CC)
+
+20 20 Fixed point overflow mask if 1=FPU exceptions for this event
+ occur ( normally 0 )
+
+21 21 Decimal overflow mask if 1=FPU exceptions for this event occur
+ ( normally 0 )
+
+22 22 Exponent underflow mask if 1=FPU exceptions for this event occur
+ ( normally 0 )
+
+23 23 Significance Mask if 1=FPU exceptions for this event occur
+ ( normally 0 )
+
+24-31 24-30 Reserved Must be 0.
+
+ 31 Extended Addressing Mode
+ 32 Basic Addressing Mode
+ Used to set addressing mode
+ PSW 31 PSW 32
+ 0 0 24 bit
+ 0 1 31 bit
+ 1 1 64 bit
+
+32 1=31 bit addressing mode 0=24 bit addressing mode (for backward
+ compatibility), linux always runs with this bit set to 1
+
+33-64 Instruction address.
+ 33-63 Reserved must be 0
+ 64-127 Address
+ In 24 bits mode bits 64-103=0 bits 104-127 Address
+ In 31 bits mode bits 64-96=0 bits 97-127 Address
+ Note: unlike 31 bit mode on s/390 bit 96 must be zero
+ when loading the address with LPSWE otherwise a
+ specification exception occurs, LPSW is fully backward
+ compatible.
+
+
+Prefix Page(s)
+--------------
+This per cpu memory area is too intimately tied to the processor not to mention.
+It exists between the real addresses 0-4096 on s/390 and between 0-8192 on
+z/Architecture and is exchanged with one page on s/390 or two pages on
+z/Architecture in absolute storage by the set prefix instruction during Linux
+startup.
+This page is mapped to a different prefix for each processor in an SMP
+configuration (assuming the OS designer is sane of course).
+Bytes 0-512 (200 hex) on s/390 and 0-512, 4096-4544, 4604-5119 currently on
+z/Architecture are used by the processor itself for holding such information
+as exception indications and entry points for exceptions.
+Bytes after 0xc00 hex are used by linux for per processor globals on s/390 and
+z/Architecture (there is a gap on z/Architecture currently between 0xc00 and
+0x1000, too, which is used by Linux).
+The closest thing to this on traditional architectures is the interrupt
+vector table. This is a good thing & does simplify some of the kernel coding
+however it means that we now cannot catch stray NULL pointers in the
+kernel without hard coded checks.
+
+
+
+Address Spaces on Intel Linux
+=============================
+
+The traditional Intel Linux is approximately mapped as follows forgive
+the ascii art.
+0xFFFFFFFF 4GB Himem *****************
+ * *
+ * Kernel Space *
+ * *
+ ***************** ****************
+User Space Himem * User Stack * * *
+(typically 0xC0000000 3GB ) ***************** * *
+ * Shared Libs * * Next Process *
+ ***************** * to *
+ * * <== * Run * <==
+ * User Program * * *
+ * Data BSS * * *
+ * Text * * *
+ * Sections * * *
+0x00000000 ***************** ****************
+
+Now it is easy to see that on Intel it is quite easy to recognise a kernel
+address as being one greater than user space himem (in this case 0xC0000000),
+and addresses of less than this are the ones in the current running program on
+this processor (if an smp box).
+If using the virtual machine ( VM ) as a debugger it is quite difficult to
+know which user process is running as the address space you are looking at
+could be from any process in the run queue.
+
+The limitation of Intels addressing technique is that the linux
+kernel uses a very simple real address to virtual addressing technique
+of Real Address=Virtual Address-User Space Himem.
+This means that on Intel the kernel linux can typically only address
+Himem=0xFFFFFFFF-0xC0000000=1GB & this is all the RAM these machines
+can typically use.
+They can lower User Himem to 2GB or lower & thus be
+able to use 2GB of RAM however this shrinks the maximum size
+of User Space from 3GB to 2GB they have a no win limit of 4GB unless
+they go to 64 Bit.
+
+
+On 390 our limitations & strengths make us slightly different.
+For backward compatibility we are only allowed use 31 bits (2GB)
+of our 32 bit addresses, however, we use entirely separate address
+spaces for the user & kernel.
+
+This means we can support 2GB of non Extended RAM on s/390, & more
+with the Extended memory management swap device &
+currently 4TB of physical memory currently on z/Architecture.
+
+
+Address Spaces on Linux for s/390 & z/Architecture
+==================================================
+
+Our addressing scheme is basically as follows:
+
+ Primary Space Home Space
+Himem 0x7fffffff 2GB on s/390 ***************** ****************
+currently 0x3ffffffffff (2^42)-1 * User Stack * * *
+on z/Architecture. ***************** * *
+ * Shared Libs * * *
+ ***************** * *
+ * * * Kernel *
+ * User Program * * *
+ * Data BSS * * *
+ * Text * * *
+ * Sections * * *
+0x00000000 ***************** ****************
+
+This also means that we need to look at the PSW problem state bit and the
+addressing mode to decide whether we are looking at user or kernel space.
+
+User space runs in primary address mode (or access register mode within
+the vdso code).
+
+The kernel usually also runs in home space mode, however when accessing
+user space the kernel switches to primary or secondary address mode if
+the mvcos instruction is not available or if a compare-and-swap (futex)
+instruction on a user space address is performed.
+
+When also looking at the ASCE control registers, this means:
+
+User space:
+- runs in primary or access register mode
+- cr1 contains the user asce
+- cr7 contains the user asce
+- cr13 contains the kernel asce
+
+Kernel space:
+- runs in home space mode
+- cr1 contains the user or kernel asce
+ -> the kernel asce is loaded when a uaccess requires primary or
+ secondary address mode
+- cr7 contains the user or kernel asce, (changed with set_fs())
+- cr13 contains the kernel asce
+
+In case of uaccess the kernel changes to:
+- primary space mode in case of a uaccess (copy_to_user) and uses
+ e.g. the mvcp instruction to access user space. However the kernel
+ will stay in home space mode if the mvcos instruction is available
+- secondary space mode in case of futex atomic operations, so that the
+ instructions come from primary address space and data from secondary
+ space
+
+In case of KVM, the kernel runs in home space mode, but cr1 gets switched
+to contain the gmap asce before the SIE instruction gets executed. When
+the SIE instruction is finished, cr1 will be switched back to contain the
+user asce.
+
+
+Virtual Addresses on s/390 & z/Architecture
+===========================================
+
+A virtual address on s/390 is made up of 3 parts
+The SX (segment index, roughly corresponding to the PGD & PMD in Linux
+terminology) being bits 1-11.
+The PX (page index, corresponding to the page table entry (pte) in Linux
+terminology) being bits 12-19.
+The remaining bits BX (the byte index are the offset in the page )
+i.e. bits 20 to 31.
+
+On z/Architecture in linux we currently make up an address from 4 parts.
+The region index bits (RX) 0-32 we currently use bits 22-32
+The segment index (SX) being bits 33-43
+The page index (PX) being bits 44-51
+The byte index (BX) being bits 52-63
+
+Notes:
+1) s/390 has no PMD so the PMD is really the PGD also.
+A lot of this stuff is defined in pgtable.h.
+
+2) Also seeing as s/390's page indexes are only 1k in size
+(bits 12-19 x 4 bytes per pte ) we use 1 ( page 4k )
+to make the best use of memory by updating 4 segment indices
+entries each time we mess with a PMD & use offsets
+0,1024,2048 & 3072 in this page as for our segment indexes.
+On z/Architecture our page indexes are now 2k in size
+( bits 12-19 x 8 bytes per pte ) we do a similar trick
+but only mess with 2 segment indices each time we mess with
+a PMD.
+
+3) As z/Architecture supports up to a massive 5-level page table lookup we
+can only use 3 currently on Linux ( as this is all the generic kernel
+currently supports ) however this may change in future
+this allows us to access ( according to my sums )
+4TB of virtual storage per process i.e.
+4096*512(PTES)*1024(PMDS)*2048(PGD) = 4398046511104 bytes,
+enough for another 2 or 3 of years I think :-).
+to do this we use a region-third-table designation type in
+our address space control registers.
+
+
+The Linux for s/390 & z/Architecture Kernel Task Structure
+==========================================================
+Each process/thread under Linux for S390 has its own kernel task_struct
+defined in linux/include/linux/sched.h
+The S390 on initialisation & resuming of a process on a cpu sets
+the __LC_KERNEL_STACK variable in the spare prefix area for this cpu
+(which we use for per-processor globals).
+
+The kernel stack pointer is intimately tied with the task structure for
+each processor as follows.
+
+ s/390
+ ************************
+ * 1 page kernel stack *
+ * ( 4K ) *
+ ************************
+ * 1 page task_struct *
+ * ( 4K ) *
+8K aligned ************************
+
+ z/Architecture
+ ************************
+ * 2 page kernel stack *
+ * ( 8K ) *
+ ************************
+ * 2 page task_struct *
+ * ( 8K ) *
+16K aligned ************************
+
+What this means is that we don't need to dedicate any register or global
+variable to point to the current running process & can retrieve it with the
+following very simple construct for s/390 & one very similar for z/Architecture.
+
+static inline struct task_struct * get_current(void)
+{
+ struct task_struct *current;
+ __asm__("lhi %0,-8192\n\t"
+ "nr %0,15"
+ : "=r" (current) );
+ return current;
+}
+
+i.e. just anding the current kernel stack pointer with the mask -8192.
+Thankfully because Linux doesn't have support for nested IO interrupts
+& our devices have large buffers can survive interrupts being shut for
+short amounts of time we don't need a separate stack for interrupts.
+
+
+
+
+Register Usage & Stackframes on Linux for s/390 & z/Architecture
+=================================================================
+Overview:
+---------
+This is the code that gcc produces at the top & the bottom of
+each function. It usually is fairly consistent & similar from
+function to function & if you know its layout you can probably
+make some headway in finding the ultimate cause of a problem
+after a crash without a source level debugger.
+
+Note: To follow stackframes requires a knowledge of C or Pascal &
+limited knowledge of one assembly language.
+
+It should be noted that there are some differences between the
+s/390 and z/Architecture stack layouts as the z/Architecture stack layout
+didn't have to maintain compatibility with older linkage formats.
+
+Glossary:
+---------
+alloca:
+This is a built in compiler function for runtime allocation
+of extra space on the callers stack which is obviously freed
+up on function exit ( e.g. the caller may choose to allocate nothing
+of a buffer of 4k if required for temporary purposes ), it generates
+very efficient code ( a few cycles ) when compared to alternatives
+like malloc.
+
+automatics: These are local variables on the stack,
+i.e they aren't in registers & they aren't static.
+
+back-chain:
+This is a pointer to the stack pointer before entering a
+framed functions ( see frameless function ) prologue got by
+dereferencing the address of the current stack pointer,
+ i.e. got by accessing the 32 bit value at the stack pointers
+current location.
+
+base-pointer:
+This is a pointer to the back of the literal pool which
+is an area just behind each procedure used to store constants
+in each function.
+
+call-clobbered: The caller probably needs to save these registers if there
+is something of value in them, on the stack or elsewhere before making a
+call to another procedure so that it can restore it later.
+
+epilogue:
+The code generated by the compiler to return to the caller.
+
+frameless-function
+A frameless function in Linux for s390 & z/Architecture is one which doesn't
+need more than the register save area (96 bytes on s/390, 160 on z/Architecture)
+given to it by the caller.
+A frameless function never:
+1) Sets up a back chain.
+2) Calls alloca.
+3) Calls other normal functions
+4) Has automatics.
+
+GOT-pointer:
+This is a pointer to the global-offset-table in ELF
+( Executable Linkable Format, Linux'es most common executable format ),
+all globals & shared library objects are found using this pointer.
+
+lazy-binding
+ELF shared libraries are typically only loaded when routines in the shared
+library are actually first called at runtime. This is lazy binding.
+
+procedure-linkage-table
+This is a table found from the GOT which contains pointers to routines
+in other shared libraries which can't be called to by easier means.
+
+prologue:
+The code generated by the compiler to set up the stack frame.
+
+outgoing-args:
+This is extra area allocated on the stack of the calling function if the
+parameters for the callee's cannot all be put in registers, the same
+area can be reused by each function the caller calls.
+
+routine-descriptor:
+A COFF executable format based concept of a procedure reference
+actually being 8 bytes or more as opposed to a simple pointer to the routine.
+This is typically defined as follows
+Routine Descriptor offset 0=Pointer to Function
+Routine Descriptor offset 4=Pointer to Table of Contents
+The table of contents/TOC is roughly equivalent to a GOT pointer.
+& it means that shared libraries etc. can be shared between several
+environments each with their own TOC.
+
+
+static-chain: This is used in nested functions a concept adopted from pascal
+by gcc not used in ansi C or C++ ( although quite useful ), basically it
+is a pointer used to reference local variables of enclosing functions.
+You might come across this stuff once or twice in your lifetime.
+
+e.g.
+The function below should return 11 though gcc may get upset & toss warnings
+about unused variables.
+int FunctionA(int a)
+{
+ int b;
+ FunctionC(int c)
+ {
+ b=c+1;
+ }
+ FunctionC(10);
+ return(b);
+}
+
+
+s/390 & z/Architecture Register usage
+=====================================
+r0 used by syscalls/assembly call-clobbered
+r1 used by syscalls/assembly call-clobbered
+r2 argument 0 / return value 0 call-clobbered
+r3 argument 1 / return value 1 (if long long) call-clobbered
+r4 argument 2 call-clobbered
+r5 argument 3 call-clobbered
+r6 argument 4 saved
+r7 pointer-to arguments 5 to ... saved
+r8 this & that saved
+r9 this & that saved
+r10 static-chain ( if nested function ) saved
+r11 frame-pointer ( if function used alloca ) saved
+r12 got-pointer saved
+r13 base-pointer saved
+r14 return-address saved
+r15 stack-pointer saved
+
+f0 argument 0 / return value ( float/double ) call-clobbered
+f2 argument 1 call-clobbered
+f4 z/Architecture argument 2 saved
+f6 z/Architecture argument 3 saved
+The remaining floating points
+f1,f3,f5 f7-f15 are call-clobbered.
+
+Notes:
+------
+1) The only requirement is that registers which are used
+by the callee are saved, e.g. the compiler is perfectly
+capable of using r11 for purposes other than a frame a
+frame pointer if a frame pointer is not needed.
+2) In functions with variable arguments e.g. printf the calling procedure
+is identical to one without variable arguments & the same number of
+parameters. However, the prologue of this function is somewhat more
+hairy owing to it having to move these parameters to the stack to
+get va_start, va_arg & va_end to work.
+3) Access registers are currently unused by gcc but are used in
+the kernel. Possibilities exist to use them at the moment for
+temporary storage but it isn't recommended.
+4) Only 4 of the floating point registers are used for
+parameter passing as older machines such as G3 only have only 4
+& it keeps the stack frame compatible with other compilers.
+However with IEEE floating point emulation under linux on the
+older machines you are free to use the other 12.
+5) A long long or double parameter cannot be have the
+first 4 bytes in a register & the second four bytes in the
+outgoing args area. It must be purely in the outgoing args
+area if crossing this boundary.
+6) Floating point parameters are mixed with outgoing args
+on the outgoing args area in the order the are passed in as parameters.
+7) Floating point arguments 2 & 3 are saved in the outgoing args area for
+z/Architecture
+
+
+Stack Frame Layout
+------------------
+s/390 z/Architecture
+0 0 back chain ( a 0 here signifies end of back chain )
+4 8 eos ( end of stack, not used on Linux for S390 used in other linkage formats )
+8 16 glue used in other s/390 linkage formats for saved routine descriptors etc.
+12 24 glue used in other s/390 linkage formats for saved routine descriptors etc.
+16 32 scratch area
+20 40 scratch area
+24 48 saved r6 of caller function
+28 56 saved r7 of caller function
+32 64 saved r8 of caller function
+36 72 saved r9 of caller function
+40 80 saved r10 of caller function
+44 88 saved r11 of caller function
+48 96 saved r12 of caller function
+52 104 saved r13 of caller function
+56 112 saved r14 of caller function
+60 120 saved r15 of caller function
+64 128 saved f4 of caller function
+72 132 saved f6 of caller function
+80 undefined
+96 160 outgoing args passed from caller to callee
+96+x 160+x possible stack alignment ( 8 bytes desirable )
+96+x+y 160+x+y alloca space of caller ( if used )
+96+x+y+z 160+x+y+z automatics of caller ( if used )
+0 back-chain
+
+A sample program with comments.
+===============================
+
+Comments on the function test
+-----------------------------
+1) It didn't need to set up a pointer to the constant pool gpr13 as it is not
+used ( :-( ).
+2) This is a frameless function & no stack is bought.
+3) The compiler was clever enough to recognise that it could return the
+value in r2 as well as use it for the passed in parameter ( :-) ).
+4) The basr ( branch relative & save ) trick works as follows the instruction
+has a special case with r0,r0 with some instruction operands is understood as
+the literal value 0, some risc architectures also do this ). So now
+we are branching to the next address & the address new program counter is
+in r13,so now we subtract the size of the function prologue we have executed
++ the size of the literal pool to get to the top of the literal pool
+0040037c int test(int b)
+{ # Function prologue below
+ 40037c: 90 de f0 34 stm %r13,%r14,52(%r15) # Save registers r13 & r14
+ 400380: 0d d0 basr %r13,%r0 # Set up pointer to constant pool using
+ 400382: a7 da ff fa ahi %r13,-6 # basr trick
+ return(5+b);
+ # Huge main program
+ 400386: a7 2a 00 05 ahi %r2,5 # add 5 to r2
+
+ # Function epilogue below
+ 40038a: 98 de f0 34 lm %r13,%r14,52(%r15) # restore registers r13 & 14
+ 40038e: 07 fe br %r14 # return
+}
+
+Comments on the function main
+-----------------------------
+1) The compiler did this function optimally ( 8-) )
+
+Literal pool for main.
+400390: ff ff ff ec .long 0xffffffec
+main(int argc,char *argv[])
+{ # Function prologue below
+ 400394: 90 bf f0 2c stm %r11,%r15,44(%r15) # Save necessary registers
+ 400398: 18 0f lr %r0,%r15 # copy stack pointer to r0
+ 40039a: a7 fa ff a0 ahi %r15,-96 # Make area for callee saving
+ 40039e: 0d d0 basr %r13,%r0 # Set up r13 to point to
+ 4003a0: a7 da ff f0 ahi %r13,-16 # literal pool
+ 4003a4: 50 00 f0 00 st %r0,0(%r15) # Save backchain
+
+ return(test(5)); # Main Program Below
+ 4003a8: 58 e0 d0 00 l %r14,0(%r13) # load relative address of test from
+ # literal pool
+ 4003ac: a7 28 00 05 lhi %r2,5 # Set first parameter to 5
+ 4003b0: 4d ee d0 00 bas %r14,0(%r14,%r13) # jump to test setting r14 as return
+ # address using branch & save instruction.
+
+ # Function Epilogue below
+ 4003b4: 98 bf f0 8c lm %r11,%r15,140(%r15)# Restore necessary registers.
+ 4003b8: 07 fe br %r14 # return to do program exit
+}
+
+
+Compiler updates
+----------------
+
+main(int argc,char *argv[])
+{
+ 4004fc: 90 7f f0 1c stm %r7,%r15,28(%r15)
+ 400500: a7 d5 00 04 bras %r13,400508 <main+0xc>
+ 400504: 00 40 04 f4 .long 0x004004f4
+ # compiler now puts constant pool in code to so it saves an instruction
+ 400508: 18 0f lr %r0,%r15
+ 40050a: a7 fa ff a0 ahi %r15,-96
+ 40050e: 50 00 f0 00 st %r0,0(%r15)
+ return(test(5));
+ 400512: 58 10 d0 00 l %r1,0(%r13)
+ 400516: a7 28 00 05 lhi %r2,5
+ 40051a: 0d e1 basr %r14,%r1
+ # compiler adds 1 extra instruction to epilogue this is done to
+ # avoid processor pipeline stalls owing to data dependencies on g5 &
+ # above as register 14 in the old code was needed directly after being loaded
+ # by the lm %r11,%r15,140(%r15) for the br %14.
+ 40051c: 58 40 f0 98 l %r4,152(%r15)
+ 400520: 98 7f f0 7c lm %r7,%r15,124(%r15)
+ 400524: 07 f4 br %r4
+}
+
+
+Hartmut ( our compiler developer ) also has been threatening to take out the
+stack backchain in optimised code as this also causes pipeline stalls, you
+have been warned.
+
+64 bit z/Architecture code disassembly
+--------------------------------------
+
+If you understand the stuff above you'll understand the stuff
+below too so I'll avoid repeating myself & just say that
+some of the instructions have g's on the end of them to indicate
+they are 64 bit & the stack offsets are a bigger,
+the only other difference you'll find between 32 & 64 bit is that
+we now use f4 & f6 for floating point arguments on 64 bit.
+00000000800005b0 <test>:
+int test(int b)
+{
+ return(5+b);
+ 800005b0: a7 2a 00 05 ahi %r2,5
+ 800005b4: b9 14 00 22 lgfr %r2,%r2 # downcast to integer
+ 800005b8: 07 fe br %r14
+ 800005ba: 07 07 bcr 0,%r7
+
+
+}
+
+00000000800005bc <main>:
+main(int argc,char *argv[])
+{
+ 800005bc: eb bf f0 58 00 24 stmg %r11,%r15,88(%r15)
+ 800005c2: b9 04 00 1f lgr %r1,%r15
+ 800005c6: a7 fb ff 60 aghi %r15,-160
+ 800005ca: e3 10 f0 00 00 24 stg %r1,0(%r15)
+ return(test(5));
+ 800005d0: a7 29 00 05 lghi %r2,5
+ # brasl allows jumps > 64k & is overkill here bras would do fune
+ 800005d4: c0 e5 ff ff ff ee brasl %r14,800005b0 <test>
+ 800005da: e3 40 f1 10 00 04 lg %r4,272(%r15)
+ 800005e0: eb bf f0 f8 00 04 lmg %r11,%r15,248(%r15)
+ 800005e6: 07 f4 br %r4
+}
+
+
+
+Compiling programs for debugging on Linux for s/390 & z/Architecture
+====================================================================
+-gdwarf-2 now works it should be considered the default debugging
+format for s/390 & z/Architecture as it is more reliable for debugging
+shared libraries, normal -g debugging works much better now
+Thanks to the IBM java compiler developers bug reports.
+
+This is typically done adding/appending the flags -g or -gdwarf-2 to the
+CFLAGS & LDFLAGS variables Makefile of the program concerned.
+
+If using gdb & you would like accurate displays of registers &
+ stack traces compile without optimisation i.e make sure
+that there is no -O2 or similar on the CFLAGS line of the Makefile &
+the emitted gcc commands, obviously this will produce worse code
+( not advisable for shipment ) but it is an aid to the debugging process.
+
+This aids debugging because the compiler will copy parameters passed in
+in registers onto the stack so backtracing & looking at passed in
+parameters will work, however some larger programs which use inline functions
+will not compile without optimisation.
+
+Debugging with optimisation has since much improved after fixing
+some bugs, please make sure you are using gdb-5.0 or later developed
+after Nov'2000.
+
+
+
+Debugging under VM
+==================
+
+Notes
+-----
+Addresses & values in the VM debugger are always hex never decimal
+Address ranges are of the format <HexValue1>-<HexValue2> or
+<HexValue1>.<HexValue2>
+For example, the address range 0x2000 to 0x3000 can be described as 2000-3000
+or 2000.1000
+
+The VM Debugger is case insensitive.
+
+VM's strengths are usually other debuggers weaknesses you can get at any
+resource no matter how sensitive e.g. memory management resources, change
+address translation in the PSW. For kernel hacking you will reap dividends if
+you get good at it.
+
+The VM Debugger displays operators but not operands, and also the debugger
+displays useful information on the same line as the author of the code probably
+felt that it was a good idea not to go over the 80 columns on the screen.
+This isn't as unintuitive as it may seem as the s/390 instructions are easy to
+decode mentally and you can make a good guess at a lot of them as all the
+operands are nibble (half byte aligned).
+So if you have an objdump listing by hand, it is quite easy to follow, and if
+you don't have an objdump listing keep a copy of the s/390 Reference Summary
+or alternatively the s/390 principles of operation next to you.
+e.g. even I can guess that
+0001AFF8' LR 180F CC 0
+is a ( load register ) lr r0,r15
+
+Also it is very easy to tell the length of a 390 instruction from the 2 most
+significant bits in the instruction (not that this info is really useful except
+if you are trying to make sense of a hexdump of code).
+Here is a table
+Bits Instruction Length
+------------------------------------------
+00 2 Bytes
+01 4 Bytes
+10 4 Bytes
+11 6 Bytes
+
+The debugger also displays other useful info on the same line such as the
+addresses being operated on destination addresses of branches & condition codes.
+e.g.
+00019736' AHI A7DAFF0E CC 1
+000198BA' BRC A7840004 -> 000198C2' CC 0
+000198CE' STM 900EF068 >> 0FA95E78 CC 2
+
+
+
+Useful VM debugger commands
+---------------------------
+
+I suppose I'd better mention this before I start
+to list the current active traces do
+Q TR
+there can be a maximum of 255 of these per set
+( more about trace sets later ).
+To stop traces issue a
+TR END.
+To delete a particular breakpoint issue
+TR DEL <breakpoint number>
+
+The PA1 key drops to CP mode so you can issue debugger commands,
+Doing alt c (on my 3270 console at least ) clears the screen.
+hitting b <enter> comes back to the running operating system
+from cp mode ( in our case linux ).
+It is typically useful to add shortcuts to your profile.exec file
+if you have one ( this is roughly equivalent to autoexec.bat in DOS ).
+file here are a few from mine.
+/* this gives me command history on issuing f12 */
+set pf12 retrieve
+/* this continues */
+set pf8 imm b
+/* goes to trace set a */
+set pf1 imm tr goto a
+/* goes to trace set b */
+set pf2 imm tr goto b
+/* goes to trace set c */
+set pf3 imm tr goto c
+
+
+
+Instruction Tracing
+-------------------
+Setting a simple breakpoint
+TR I PSWA <address>
+To debug a particular function try
+TR I R <function address range>
+TR I on its own will single step.
+TR I DATA <MNEMONIC> <OPTIONAL RANGE> will trace for particular mnemonics
+e.g.
+TR I DATA 4D R 0197BC.4000
+will trace for BAS'es ( opcode 4D ) in the range 0197BC.4000
+if you were inclined you could add traces for all branch instructions &
+suffix them with the run prefix so you would have a backtrace on screen
+when a program crashes.
+TR BR <INTO OR FROM> will trace branches into or out of an address.
+e.g.
+TR BR INTO 0 is often quite useful if a program is getting awkward & deciding
+to branch to 0 & crashing as this will stop at the address before in jumps to 0.
+TR I R <address range> RUN cmd d g
+single steps a range of addresses but stays running &
+displays the gprs on each step.
+
+
+
+Displaying & modifying Registers
+--------------------------------
+D G will display all the gprs
+Adding a extra G to all the commands is necessary to access the full 64 bit
+content in VM on z/Architecture. Obviously this isn't required for access
+registers as these are still 32 bit.
+e.g. DGG instead of DG
+D X will display all the control registers
+D AR will display all the access registers
+D AR4-7 will display access registers 4 to 7
+CPU ALL D G will display the GRPS of all CPUS in the configuration
+D PSW will display the current PSW
+st PSW 2000 will put the value 2000 into the PSW &
+cause crash your machine.
+D PREFIX displays the prefix offset
+
+
+Displaying Memory
+-----------------
+To display memory mapped using the current PSW's mapping try
+D <range>
+To make VM display a message each time it hits a particular address and
+continue try
+D I<range> will disassemble/display a range of instructions.
+ST addr 32 bit word will store a 32 bit aligned address
+D T<range> will display the EBCDIC in an address (if you are that way inclined)
+D R<range> will display real addresses ( without DAT ) but with prefixing.
+There are other complex options to display if you need to get at say home space
+but are in primary space the easiest thing to do is to temporarily
+modify the PSW to the other addressing mode, display the stuff & then
+restore it.
+
+
+
+Hints
+-----
+If you want to issue a debugger command without halting your virtual machine
+with the PA1 key try prefixing the command with #CP e.g.
+#cp tr i pswa 2000
+also suffixing most debugger commands with RUN will cause them not
+to stop just display the mnemonic at the current instruction on the console.
+If you have several breakpoints you want to put into your program &
+you get fed up of cross referencing with System.map
+you can do the following trick for several symbols.
+grep do_signal System.map
+which emits the following among other things
+0001f4e0 T do_signal
+now you can do
+
+TR I PSWA 0001f4e0 cmd msg * do_signal
+This sends a message to your own console each time do_signal is entered.
+( As an aside I wrote a perl script once which automatically generated a REXX
+script with breakpoints on every kernel procedure, this isn't a good idea
+because there are thousands of these routines & VM can only set 255 breakpoints
+at a time so you nearly had to spend as long pruning the file down as you would
+entering the msgs by hand), however, the trick might be useful for a single
+object file. In the 3270 terminal emulator x3270 there is a very useful option
+in the file menu called "Save Screen In File" - this is very good for keeping a
+copy of traces.
+
+From CMS help <command name> will give you online help on a particular command.
+e.g.
+HELP DISPLAY
+
+Also CP has a file called profile.exec which automatically gets called
+on startup of CMS ( like autoexec.bat ), keeping on a DOS analogy session
+CP has a feature similar to doskey, it may be useful for you to
+use profile.exec to define some keystrokes.
+e.g.
+SET PF9 IMM B
+This does a single step in VM on pressing F8.
+SET PF10 ^
+This sets up the ^ key.
+which can be used for ^c (ctrl-c),^z (ctrl-z) which can't be typed directly
+into some 3270 consoles.
+SET PF11 ^-
+This types the starting keystrokes for a sysrq see SysRq below.
+SET PF12 RETRIEVE
+This retrieves command history on pressing F12.
+
+
+Sometimes in VM the display is set up to scroll automatically this
+can be very annoying if there are messages you wish to look at
+to stop this do
+TERM MORE 255 255
+This will nearly stop automatic screen updates, however it will
+cause a denial of service if lots of messages go to the 3270 console,
+so it would be foolish to use this as the default on a production machine.
+
+
+Tracing particular processes
+----------------------------
+The kernel's text segment is intentionally at an address in memory that it will
+very seldom collide with text segments of user programs ( thanks Martin ),
+this simplifies debugging the kernel.
+However it is quite common for user processes to have addresses which collide
+this can make debugging a particular process under VM painful under normal
+circumstances as the process may change when doing a
+TR I R <address range>.
+Thankfully after reading VM's online help I figured out how to debug
+I particular process.
+
+Your first problem is to find the STD ( segment table designation )
+of the program you wish to debug.
+There are several ways you can do this here are a few
+1) objdump --syms <program to be debugged> | grep main
+To get the address of main in the program.
+tr i pswa <address of main>
+Start the program, if VM drops to CP on what looks like the entry
+point of the main function this is most likely the process you wish to debug.
+Now do a D X13 or D XG13 on z/Architecture.
+On 31 bit the STD is bits 1-19 ( the STO segment table origin )
+& 25-31 ( the STL segment table length ) of CR13.
+now type
+TR I R STD <CR13's value> 0.7fffffff
+e.g.
+TR I R STD 8F32E1FF 0.7fffffff
+Another very useful variation is
+TR STORE INTO STD <CR13's value> <address range>
+for finding out when a particular variable changes.
+
+An alternative way of finding the STD of a currently running process
+is to do the following, ( this method is more complex but
+could be quite convenient if you aren't updating the kernel much &
+so your kernel structures will stay constant for a reasonable period of
+time ).
+
+grep task /proc/<pid>/status
+from this you should see something like
+task: 0f160000 ksp: 0f161de8 pt_regs: 0f161f68
+This now gives you a pointer to the task structure.
+Now make CC:="s390-gcc -g" kernel/sched.s
+To get the task_struct stabinfo.
+( task_struct is defined in include/linux/sched.h ).
+Now we want to look at
+task->active_mm->pgd
+on my machine the active_mm in the task structure stab is
+active_mm:(4,12),672,32
+its offset is 672/8=84=0x54
+the pgd member in the mm_struct stab is
+pgd:(4,6)=*(29,5),96,32
+so its offset is 96/8=12=0xc
+
+so we'll
+hexdump -s 0xf160054 /dev/mem | more
+i.e. task_struct+active_mm offset
+to look at the active_mm member
+f160054 0fee cc60 0019 e334 0000 0000 0000 0011
+hexdump -s 0x0feecc6c /dev/mem | more
+i.e. active_mm+pgd offset
+feecc6c 0f2c 0000 0000 0001 0000 0001 0000 0010
+we get something like
+now do
+TR I R STD <pgd|0x7f> 0.7fffffff
+i.e. the 0x7f is added because the pgd only
+gives the page table origin & we need to set the low bits
+to the maximum possible segment table length.
+TR I R STD 0f2c007f 0.7fffffff
+on z/Architecture you'll probably need to do
+TR I R STD <pgd|0x7> 0.ffffffffffffffff
+to set the TableType to 0x1 & the Table length to 3.
+
+
+
+Tracing Program Exceptions
+--------------------------
+If you get a crash which says something like
+illegal operation or specification exception followed by a register dump
+You can restart linux & trace these using the tr prog <range or value> trace
+option.
+
+
+The most common ones you will normally be tracing for is
+1=operation exception
+2=privileged operation exception
+4=protection exception
+5=addressing exception
+6=specification exception
+10=segment translation exception
+11=page translation exception
+
+The full list of these is on page 22 of the current s/390 Reference Summary.
+e.g.
+tr prog 10 will trace segment translation exceptions.
+tr prog on its own will trace all program interruption codes.
+
+Trace Sets
+----------
+On starting VM you are initially in the INITIAL trace set.
+You can do a Q TR to verify this.
+If you have a complex tracing situation where you wish to wait for instance
+till a driver is open before you start tracing IO, but know in your
+heart that you are going to have to make several runs through the code till you
+have a clue whats going on.
+
+What you can do is
+TR I PSWA <Driver open address>
+hit b to continue till breakpoint
+reach the breakpoint
+now do your
+TR GOTO B
+TR IO 7c08-7c09 inst int run
+or whatever the IO channels you wish to trace are & hit b
+
+To got back to the initial trace set do
+TR GOTO INITIAL
+& the TR I PSWA <Driver open address> will be the only active breakpoint again.
+
+
+Tracing linux syscalls under VM
+-------------------------------
+Syscalls are implemented on Linux for S390 by the Supervisor call instruction
+(SVC). There 256 possibilities of these as the instruction is made up of a 0xA
+opcode and the second byte being the syscall number. They are traced using the
+simple command:
+TR SVC <Optional value or range>
+the syscalls are defined in linux/arch/s390/include/asm/unistd.h
+e.g. to trace all file opens just do
+TR SVC 5 ( as this is the syscall number of open )
+
+
+SMP Specific commands
+---------------------
+To find out how many cpus you have
+Q CPUS displays all the CPU's available to your virtual machine
+To find the cpu that the current cpu VM debugger commands are being directed at
+do Q CPU to change the current cpu VM debugger commands are being directed at do
+CPU <desired cpu no>
+
+On a SMP guest issue a command to all CPUs try prefixing the command with cpu
+all. To issue a command to a particular cpu try cpu <cpu number> e.g.
+CPU 01 TR I R 2000.3000
+If you are running on a guest with several cpus & you have a IO related problem
+& cannot follow the flow of code but you know it isn't smp related.
+from the bash prompt issue
+shutdown -h now or halt.
+do a Q CPUS to find out how many cpus you have
+detach each one of them from cp except cpu 0
+by issuing a
+DETACH CPU 01-(number of cpus in configuration)
+& boot linux again.
+TR SIGP will trace inter processor signal processor instructions.
+DEFINE CPU 01-(number in configuration)
+will get your guests cpus back.
+
+
+Help for displaying ascii textstrings
+-------------------------------------
+On the very latest VM Nucleus'es VM can now display ascii
+( thanks Neale for the hint ) by doing
+D TX<lowaddr>.<len>
+e.g.
+D TX0.100
+
+Alternatively
+=============
+Under older VM debuggers (I love EBDIC too) you can use following little
+program which converts a command line of hex digits to ascii text. It can be
+compiled under linux and you can copy the hex digits from your x3270 terminal
+to your xterm if you are debugging from a linuxbox.
+
+This is quite useful when looking at a parameter passed in as a text string
+under VM ( unless you are good at decoding ASCII in your head ).
+
+e.g. consider tracing an open syscall
+TR SVC 5
+We have stopped at a breakpoint
+000151B0' SVC 0A05 -> 0001909A' CC 0
+
+D 20.8 to check the SVC old psw in the prefix area and see was it from userspace
+(for the layout of the prefix area consult the "Fixed Storage Locations"
+chapter of the s/390 Reference Summary if you have it available).
+V00000020 070C2000 800151B2
+The problem state bit wasn't set & it's also too early in the boot sequence
+for it to be a userspace SVC if it was we would have to temporarily switch the
+psw to user space addressing so we could get at the first parameter of the open
+in gpr2.
+Next do a
+D G2
+GPR 2 = 00014CB4
+Now display what gpr2 is pointing to
+D 00014CB4.20
+V00014CB4 2F646576 2F636F6E 736F6C65 00001BF5
+V00014CC4 FC00014C B4001001 E0001000 B8070707
+Now copy the text till the first 00 hex ( which is the end of the string
+to an xterm & do hex2ascii on it.
+hex2ascii 2F646576 2F636F6E 736F6C65 00
+outputs
+Decoded Hex:=/ d e v / c o n s o l e 0x00
+We were opening the console device,
+
+You can compile the code below yourself for practice :-),
+/*
+ * hex2ascii.c
+ * a useful little tool for converting a hexadecimal command line to ascii
+ *
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ * (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation.
+ */
+#include <stdio.h>
+
+int main(int argc,char *argv[])
+{
+ int cnt1,cnt2,len,toggle=0;
+ int startcnt=1;
+ unsigned char c,hex;
+
+ if(argc>1&&(strcmp(argv[1],"-a")==0))
+ startcnt=2;
+ printf("Decoded Hex:=");
+ for(cnt1=startcnt;cnt1<argc;cnt1++)
+ {
+ len=strlen(argv[cnt1]);
+ for(cnt2=0;cnt2<len;cnt2++)
+ {
+ c=argv[cnt1][cnt2];
+ if(c>='0'&&c<='9')
+ c=c-'0';
+ if(c>='A'&&c<='F')
+ c=c-'A'+10;
+ if(c>='a'&&c<='f')
+ c=c-'a'+10;
+ switch(toggle)
+ {
+ case 0:
+ hex=c<<4;
+ toggle=1;
+ break;
+ case 1:
+ hex+=c;
+ if(hex<32||hex>127)
+ {
+ if(startcnt==1)
+ printf("0x%02X ",(int)hex);
+ else
+ printf(".");
+ }
+ else
+ {
+ printf("%c",hex);
+ if(startcnt==1)
+ printf(" ");
+ }
+ toggle=0;
+ break;
+ }
+ }
+ }
+ printf("\n");
+}
+
+
+
+
+Stack tracing under VM
+----------------------
+A basic backtrace
+-----------------
+
+Here are the tricks I use 9 out of 10 times it works pretty well,
+
+When your backchain reaches a dead end
+--------------------------------------
+This can happen when an exception happens in the kernel and the kernel is
+entered twice. If you reach the NULL pointer at the end of the back chain you
+should be able to sniff further back if you follow the following tricks.
+1) A kernel address should be easy to recognise since it is in
+primary space & the problem state bit isn't set & also
+The Hi bit of the address is set.
+2) Another backchain should also be easy to recognise since it is an
+address pointing to another address approximately 100 bytes or 0x70 hex
+behind the current stackpointer.
+
+
+Here is some practice.
+boot the kernel & hit PA1 at some random time
+d g to display the gprs, this should display something like
+GPR 0 = 00000001 00156018 0014359C 00000000
+GPR 4 = 00000001 001B8888 000003E0 00000000
+GPR 8 = 00100080 00100084 00000000 000FE000
+GPR 12 = 00010400 8001B2DC 8001B36A 000FFED8
+Note that GPR14 is a return address but as we are real men we are going to
+trace the stack.
+display 0x40 bytes after the stack pointer.
+
+V000FFED8 000FFF38 8001B838 80014C8E 000FFF38
+V000FFEE8 00000000 00000000 000003E0 00000000
+V000FFEF8 00100080 00100084 00000000 000FE000
+V000FFF08 00010400 8001B2DC 8001B36A 000FFED8
+
+
+Ah now look at whats in sp+56 (sp+0x38) this is 8001B36A our saved r14 if
+you look above at our stackframe & also agrees with GPR14.
+
+now backchain
+d 000FFF38.40
+we now are taking the contents of SP to get our first backchain.
+
+V000FFF38 000FFFA0 00000000 00014995 00147094
+V000FFF48 00147090 001470A0 000003E0 00000000
+V000FFF58 00100080 00100084 00000000 001BF1D0
+V000FFF68 00010400 800149BA 80014CA6 000FFF38
+
+This displays a 2nd return address of 80014CA6
+
+now do d 000FFFA0.40 for our 3rd backchain
+
+V000FFFA0 04B52002 0001107F 00000000 00000000
+V000FFFB0 00000000 00000000 FF000000 0001107F
+V000FFFC0 00000000 00000000 00000000 00000000
+V000FFFD0 00010400 80010802 8001085A 000FFFA0
+
+
+our 3rd return address is 8001085A
+
+as the 04B52002 looks suspiciously like rubbish it is fair to assume that the
+kernel entry routines for the sake of optimisation don't set up a backchain.
+
+now look at System.map to see if the addresses make any sense.
+
+grep -i 0001b3 System.map
+outputs among other things
+0001b304 T cpu_idle
+so 8001B36A
+is cpu_idle+0x66 ( quiet the cpu is asleep, don't wake it )
+
+
+grep -i 00014 System.map
+produces among other things
+00014a78 T start_kernel
+so 0014CA6 is start_kernel+some hex number I can't add in my head.
+
+grep -i 00108 System.map
+this produces
+00010800 T _stext
+so 8001085A is _stext+0x5a
+
+Congrats you've done your first backchain.
+
+
+
+s/390 & z/Architecture IO Overview
+==================================
+
+I am not going to give a course in 390 IO architecture as this would take me
+quite a while and I'm no expert. Instead I'll give a 390 IO architecture
+summary for Dummies. If you have the s/390 principles of operation available
+read this instead. If nothing else you may find a few useful keywords in here
+and be able to use them on a web search engine to find more useful information.
+
+Unlike other bus architectures modern 390 systems do their IO using mostly
+fibre optics and devices such as tapes and disks can be shared between several
+mainframes. Also S390 can support up to 65536 devices while a high end PC based
+system might be choking with around 64.
+
+Here is some of the common IO terminology:
+
+Subchannel:
+This is the logical number most IO commands use to talk to an IO device. There
+can be up to 0x10000 (65536) of these in a configuration, typically there are a
+few hundred. Under VM for simplicity they are allocated contiguously, however
+on the native hardware they are not. They typically stay consistent between
+boots provided no new hardware is inserted or removed.
+Under Linux for s390 we use these as IRQ's and also when issuing an IO command
+(CLEAR SUBCHANNEL, HALT SUBCHANNEL, MODIFY SUBCHANNEL, RESUME SUBCHANNEL,
+START SUBCHANNEL, STORE SUBCHANNEL and TEST SUBCHANNEL). We use this as the ID
+of the device we wish to talk to. The most important of these instructions are
+START SUBCHANNEL (to start IO), TEST SUBCHANNEL (to check whether the IO
+completed successfully) and HALT SUBCHANNEL (to kill IO). A subchannel can have
+up to 8 channel paths to a device, this offers redundancy if one is not
+available.
+
+Device Number:
+This number remains static and is closely tied to the hardware. There are 65536
+of these, made up of a CHPID (Channel Path ID, the most significant 8 bits) and
+another lsb 8 bits. These remain static even if more devices are inserted or
+removed from the hardware. There is a 1 to 1 mapping between subchannels and
+device numbers, provided devices aren't inserted or removed.
+
+Channel Control Words:
+CCWs are linked lists of instructions initially pointed to by an operation
+request block (ORB), which is initially given to Start Subchannel (SSCH)
+command along with the subchannel number for the IO subsystem to process
+while the CPU continues executing normal code.
+CCWs come in two flavours, Format 0 (24 bit for backward compatibility) and
+Format 1 (31 bit). These are typically used to issue read and write (and many
+other) instructions. They consist of a length field and an absolute address
+field.
+Each IO typically gets 1 or 2 interrupts, one for channel end (primary status)
+when the channel is idle, and the second for device end (secondary status).
+Sometimes you get both concurrently. You check how the IO went on by issuing a
+TEST SUBCHANNEL at each interrupt, from which you receive an Interruption
+response block (IRB). If you get channel and device end status in the IRB
+without channel checks etc. your IO probably went okay. If you didn't you
+probably need to examine the IRB, extended status word etc.
+If an error occurs, more sophisticated control units have a facility known as
+concurrent sense. This means that if an error occurs Extended sense information
+will be presented in the Extended status word in the IRB. If not you have to
+issue a subsequent SENSE CCW command after the test subchannel.
+
+
+TPI (Test pending interrupt) can also be used for polled IO, but in
+multitasking multiprocessor systems it isn't recommended except for
+checking special cases (i.e. non looping checks for pending IO etc.).
+
+Store Subchannel and Modify Subchannel can be used to examine and modify
+operating characteristics of a subchannel (e.g. channel paths).
+
+Other IO related Terms:
+Sysplex: S390's Clustering Technology
+QDIO: S390's new high speed IO architecture to support devices such as gigabit
+ethernet, this architecture is also designed to be forward compatible with
+upcoming 64 bit machines.
+
+
+General Concepts
+
+Input Output Processors (IOP's) are responsible for communicating between
+the mainframe CPU's & the channel & relieve the mainframe CPU's from the
+burden of communicating with IO devices directly, this allows the CPU's to
+concentrate on data processing.
+
+IOP's can use one or more links ( known as channel paths ) to talk to each
+IO device. It first checks for path availability & chooses an available one,
+then starts ( & sometimes terminates IO ).
+There are two types of channel path: ESCON & the Parallel IO interface.
+
+IO devices are attached to control units, control units provide the
+logic to interface the channel paths & channel path IO protocols to
+the IO devices, they can be integrated with the devices or housed separately
+& often talk to several similar devices ( typical examples would be raid
+controllers or a control unit which connects to 1000 3270 terminals ).
+
+
+ +---------------------------------------------------------------+
+ | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
+ | | CPU | | CPU | | CPU | | CPU | | Main | | Expanded | |
+ | | | | | | | | | | Memory | | Storage | |
+ | +-----+ +-----+ +-----+ +-----+ +----------+ +----------+ |
+ |---------------------------------------------------------------+
+ | IOP | IOP | IOP |
+ |---------------------------------------------------------------
+ | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C | C |
+ ----------------------------------------------------------------
+ || ||
+ || Bus & Tag Channel Path || ESCON
+ || ====================== || Channel
+ || || || || Path
+ +----------+ +----------+ +----------+
+ | | | | | |
+ | CU | | CU | | CU |
+ | | | | | |
+ +----------+ +----------+ +----------+
+ | | | | |
++----------+ +----------+ +----------+ +----------+ +----------+
+|I/O Device| |I/O Device| |I/O Device| |I/O Device| |I/O Device|
++----------+ +----------+ +----------+ +----------+ +----------+
+ CPU = Central Processing Unit
+ C = Channel
+ IOP = IP Processor
+ CU = Control Unit
+
+The 390 IO systems come in 2 flavours the current 390 machines support both
+
+The Older 360 & 370 Interface,sometimes called the Parallel I/O interface,
+sometimes called Bus-and Tag & sometimes Original Equipment Manufacturers
+Interface (OEMI).
+
+This byte wide Parallel channel path/bus has parity & data on the "Bus" cable
+and control lines on the "Tag" cable. These can operate in byte multiplex mode
+for sharing between several slow devices or burst mode and monopolize the
+channel for the whole burst. Up to 256 devices can be addressed on one of these
+cables. These cables are about one inch in diameter. The maximum unextended
+length supported by these cables is 125 Meters but this can be extended up to
+2km with a fibre optic channel extended such as a 3044. The maximum burst speed
+supported is 4.5 megabytes per second. However, some really old processors
+support only transfer rates of 3.0, 2.0 & 1.0 MB/sec.
+One of these paths can be daisy chained to up to 8 control units.
+
+
+ESCON if fibre optic it is also called FICON
+Was introduced by IBM in 1990. Has 2 fibre optic cables and uses either leds or
+lasers for communication at a signaling rate of up to 200 megabits/sec. As
+10bits are transferred for every 8 bits info this drops to 160 megabits/sec
+and to 18.6 Megabytes/sec once control info and CRC are added. ESCON only
+operates in burst mode.
+
+ESCONs typical max cable length is 3km for the led version and 20km for the
+laser version known as XDF (extended distance facility). This can be further
+extended by using an ESCON director which triples the above mentioned ranges.
+Unlike Bus & Tag as ESCON is serial it uses a packet switching architecture,
+the standard Bus & Tag control protocol is however present within the packets.
+Up to 256 devices can be attached to each control unit that uses one of these
+interfaces.
+
+Common 390 Devices include:
+Network adapters typically OSA2,3172's,2116's & OSA-E gigabit ethernet adapters,
+Consoles 3270 & 3215 (a teletype emulated under linux for a line mode console).
+DASD's direct access storage devices ( otherwise known as hard disks ).
+Tape Drives.
+CTC ( Channel to Channel Adapters ),
+ESCON or Parallel Cables used as a very high speed serial link
+between 2 machines.
+
+
+Debugging IO on s/390 & z/Architecture under VM
+===============================================
+
+Now we are ready to go on with IO tracing commands under VM
+
+A few self explanatory queries:
+Q OSA
+Q CTC
+Q DISK ( This command is CMS specific )
+Q DASD
+
+
+
+
+
+
+Q OSA on my machine returns
+OSA 7C08 ON OSA 7C08 SUBCHANNEL = 0000
+OSA 7C09 ON OSA 7C09 SUBCHANNEL = 0001
+OSA 7C14 ON OSA 7C14 SUBCHANNEL = 0002
+OSA 7C15 ON OSA 7C15 SUBCHANNEL = 0003
+
+If you have a guest with certain privileges you may be able to see devices
+which don't belong to you. To avoid this, add the option V.
+e.g.
+Q V OSA
+
+Now using the device numbers returned by this command we will
+Trace the io starting up on the first device 7c08 & 7c09
+In our simplest case we can trace the
+start subchannels
+like TR SSCH 7C08-7C09
+or the halt subchannels
+or TR HSCH 7C08-7C09
+MSCH's ,STSCH's I think you can guess the rest
+
+A good trick is tracing all the IO's and CCWS and spooling them into the reader
+of another VM guest so he can ftp the logfile back to his own machine. I'll do
+a small bit of this and give you a look at the output.
+
+1) Spool stdout to VM reader
+SP PRT TO (another vm guest ) or * for the local vm guest
+2) Fill the reader with the trace
+TR IO 7c08-7c09 INST INT CCW PRT RUN
+3) Start up linux
+i 00c
+4) Finish the trace
+TR END
+5) close the reader
+C PRT
+6) list reader contents
+RDRLIST
+7) copy it to linux4's minidisk
+RECEIVE / LOG TXT A1 ( replace
+8)
+filel & press F11 to look at it
+You should see something like:
+
+00020942' SSCH B2334000 0048813C CC 0 SCH 0000 DEV 7C08
+ CPA 000FFDF0 PARM 00E2C9C4 KEY 0 FPI C0 LPM 80
+ CCW 000FFDF0 E4200100 00487FE8 0000 E4240100 ........
+ IDAL 43D8AFE8
+ IDAL 0FB76000
+00020B0A' I/O DEV 7C08 -> 000197BC' SCH 0000 PARM 00E2C9C4
+00021628' TSCH B2354000 >> 00488164 CC 0 SCH 0000 DEV 7C08
+ CCWA 000FFDF8 DEV STS 0C SCH STS 00 CNT 00EC
+ KEY 0 FPI C0 CC 0 CTLS 4007
+00022238' STSCH B2344000 >> 00488108 CC 0 SCH 0000 DEV 7C08
+
+If you don't like messing up your readed ( because you possibly booted from it )
+you can alternatively spool it to another readers guest.
+
+
+Other common VM device related commands
+---------------------------------------------
+These commands are listed only because they have
+been of use to me in the past & may be of use to
+you too. For more complete info on each of the commands
+use type HELP <command> from CMS.
+detaching devices
+DET <devno range>
+ATT <devno range> <guest>
+attach a device to guest * for your own guest
+READY <devno> cause VM to issue a fake interrupt.
+
+The VARY command is normally only available to VM administrators.
+VARY ON PATH <path> TO <devno range>
+VARY OFF PATH <PATH> FROM <devno range>
+This is used to switch on or off channel paths to devices.
+
+Q CHPID <channel path ID>
+This displays state of devices using this channel path
+D SCHIB <subchannel>
+This displays the subchannel information SCHIB block for the device.
+this I believe is also only available to administrators.
+DEFINE CTC <devno>
+defines a virtual CTC channel to channel connection
+2 need to be defined on each guest for the CTC driver to use.
+COUPLE devno userid remote devno
+Joins a local virtual device to a remote virtual device
+( commonly used for the CTC driver ).
+
+Building a VM ramdisk under CMS which linux can use
+def vfb-<blocksize> <subchannel> <number blocks>
+blocksize is commonly 4096 for linux.
+Formatting it
+format <subchannel> <driver letter e.g. x> (blksize <blocksize>
+
+Sharing a disk between multiple guests
+LINK userid devno1 devno2 mode password
+
+
+
+GDB on S390
+===========
+N.B. if compiling for debugging gdb works better without optimisation
+( see Compiling programs for debugging )
+
+invocation
+----------
+gdb <victim program> <optional corefile>
+
+Online help
+-----------
+help: gives help on commands
+e.g.
+help
+help display
+Note gdb's online help is very good use it.
+
+
+Assembly
+--------
+info registers: displays registers other than floating point.
+info all-registers: displays floating points as well.
+disassemble: disassembles
+e.g.
+disassemble without parameters will disassemble the current function
+disassemble $pc $pc+10
+
+Viewing & modifying variables
+-----------------------------
+print or p: displays variable or register
+e.g. p/x $sp will display the stack pointer
+
+display: prints variable or register each time program stops
+e.g.
+display/x $pc will display the program counter
+display argc
+
+undisplay : undo's display's
+
+info breakpoints: shows all current breakpoints
+
+info stack: shows stack back trace (if this doesn't work too well, I'll show
+you the stacktrace by hand below).
+
+info locals: displays local variables.
+
+info args: display current procedure arguments.
+
+set args: will set argc & argv each time the victim program is invoked.
+
+set <variable>=value
+set argc=100
+set $pc=0
+
+
+
+Modifying execution
+-------------------
+step: steps n lines of sourcecode
+step steps 1 line.
+step 100 steps 100 lines of code.
+
+next: like step except this will not step into subroutines
+
+stepi: steps a single machine code instruction.
+e.g. stepi 100
+
+nexti: steps a single machine code instruction but will not step into
+subroutines.
+
+finish: will run until exit of the current routine
+
+run: (re)starts a program
+
+cont: continues a program
+
+quit: exits gdb.
+
+
+breakpoints
+------------
+
+break
+sets a breakpoint
+e.g.
+
+break main
+
+break *$pc
+
+break *0x400618
+
+Here's a really useful one for large programs
+rbr
+Set a breakpoint for all functions matching REGEXP
+e.g.
+rbr 390
+will set a breakpoint with all functions with 390 in their name.
+
+info breakpoints
+lists all breakpoints
+
+delete: delete breakpoint by number or delete them all
+e.g.
+delete 1 will delete the first breakpoint
+delete will delete them all
+
+watch: This will set a watchpoint ( usually hardware assisted ),
+This will watch a variable till it changes
+e.g.
+watch cnt, will watch the variable cnt till it changes.
+As an aside unfortunately gdb's, architecture independent watchpoint code
+is inconsistent & not very good, watchpoints usually work but not always.
+
+info watchpoints: Display currently active watchpoints
+
+condition: ( another useful one )
+Specify breakpoint number N to break only if COND is true.
+Usage is `condition N COND', where N is an integer and COND is an
+expression to be evaluated whenever breakpoint N is reached.
+
+
+
+User defined functions/macros
+-----------------------------
+define: ( Note this is very very useful,simple & powerful )
+usage define <name> <list of commands> end
+
+examples which you should consider putting into .gdbinit in your home directory
+define d
+stepi
+disassemble $pc $pc+10
+end
+
+define e
+nexti
+disassemble $pc $pc+10
+end
+
+
+Other hard to classify stuff
+----------------------------
+signal n:
+sends the victim program a signal.
+e.g. signal 3 will send a SIGQUIT.
+
+info signals:
+what gdb does when the victim receives certain signals.
+
+list:
+e.g.
+list lists current function source
+list 1,10 list first 10 lines of current file.
+list test.c:1,10
+
+
+directory:
+Adds directories to be searched for source if gdb cannot find the source.
+(note it is a bit sensitive about slashes)
+e.g. To add the root of the filesystem to the searchpath do
+directory //
+
+
+call <function>
+This calls a function in the victim program, this is pretty powerful
+e.g.
+(gdb) call printf("hello world")
+outputs:
+$1 = 11
+
+You might now be thinking that the line above didn't work, something extra had
+to be done.
+(gdb) call fflush(stdout)
+hello world$2 = 0
+As an aside the debugger also calls malloc & free under the hood
+to make space for the "hello world" string.
+
+
+
+hints
+-----
+1) command completion works just like bash
+( if you are a bad typist like me this really helps )
+e.g. hit br <TAB> & cursor up & down :-).
+
+2) if you have a debugging problem that takes a few steps to recreate
+put the steps into a file called .gdbinit in your current working directory
+if you have defined a few extra useful user defined commands put these in
+your home directory & they will be read each time gdb is launched.
+
+A typical .gdbinit file might be.
+break main
+run
+break runtime_exception
+cont
+
+
+stack chaining in gdb by hand
+-----------------------------
+This is done using a the same trick described for VM
+p/x (*($sp+56))&0x7fffffff get the first backchain.
+
+For z/Architecture
+Replace 56 with 112 & ignore the &0x7fffffff
+in the macros below & do nasty casts to longs like the following
+as gdb unfortunately deals with printed arguments as ints which
+messes up everything.
+i.e. here is a 3rd backchain dereference
+p/x *(long *)(***(long ***)$sp+112)
+
+
+this outputs
+$5 = 0x528f18
+on my machine.
+Now you can use
+info symbol (*($sp+56))&0x7fffffff
+you might see something like.
+rl_getc + 36 in section .text telling you what is located at address 0x528f18
+Now do.
+p/x (*(*$sp+56))&0x7fffffff
+This outputs
+$6 = 0x528ed0
+Now do.
+info symbol (*(*$sp+56))&0x7fffffff
+rl_read_key + 180 in section .text
+now do
+p/x (*(**$sp+56))&0x7fffffff
+& so on.
+
+Disassembling instructions without debug info
+---------------------------------------------
+gdb typically complains if there is a lack of debugging
+symbols in the disassemble command with
+"No function contains specified address." To get around
+this do
+x/<number lines to disassemble>xi <address>
+e.g.
+x/20xi 0x400730
+
+
+
+Note: Remember gdb has history just like bash you don't need to retype the
+whole line just use the up & down arrows.
+
+
+
+For more info
+-------------
+From your linuxbox do
+man gdb or info gdb.
+
+core dumps
+----------
+What a core dump ?,
+A core dump is a file generated by the kernel (if allowed) which contains the
+registers and all active pages of the program which has crashed.
+From this file gdb will allow you to look at the registers, stack trace and
+memory of the program as if it just crashed on your system. It is usually
+called core and created in the current working directory.
+This is very useful in that a customer can mail a core dump to a technical
+support department and the technical support department can reconstruct what
+happened. Provided they have an identical copy of this program with debugging
+symbols compiled in and the source base of this build is available.
+In short it is far more useful than something like a crash log could ever hope
+to be.
+
+Why have I never seen one ?.
+Probably because you haven't used the command
+ulimit -c unlimited in bash
+to allow core dumps, now do
+ulimit -a
+to verify that the limit was accepted.
+
+A sample core dump
+To create this I'm going to do
+ulimit -c unlimited
+gdb
+to launch gdb (my victim app. ) now be bad & do the following from another
+telnet/xterm session to the same machine
+ps -aux | grep gdb
+kill -SIGSEGV <gdb's pid>
+or alternatively use killall -SIGSEGV gdb if you have the killall command.
+Now look at the core dump.
+./gdb core
+Displays the following
+GNU gdb 4.18
+Copyright 1998 Free Software Foundation, Inc.
+GDB is free software, covered by the GNU General Public License, and you are
+welcome to change it and/or distribute copies of it under certain conditions.
+Type "show copying" to see the conditions.
+There is absolutely no warranty for GDB. Type "show warranty" for details.
+This GDB was configured as "s390-ibm-linux"...
+Core was generated by `./gdb'.
+Program terminated with signal 11, Segmentation fault.
+Reading symbols from /usr/lib/libncurses.so.4...done.
+Reading symbols from /lib/libm.so.6...done.
+Reading symbols from /lib/libc.so.6...done.
+Reading symbols from /lib/ld-linux.so.2...done.
+#0 0x40126d1a in read () from /lib/libc.so.6
+Setting up the environment for debugging gdb.
+Breakpoint 1 at 0x4dc6f8: file utils.c, line 471.
+Breakpoint 2 at 0x4d87a4: file top.c, line 2609.
+(top-gdb) info stack
+#0 0x40126d1a in read () from /lib/libc.so.6
+#1 0x528f26 in rl_getc (stream=0x7ffffde8) at input.c:402
+#2 0x528ed0 in rl_read_key () at input.c:381
+#3 0x5167e6 in readline_internal_char () at readline.c:454
+#4 0x5168ee in readline_internal_charloop () at readline.c:507
+#5 0x51692c in readline_internal () at readline.c:521
+#6 0x5164fe in readline (prompt=0x7ffff810)
+ at readline.c:349
+#7 0x4d7a8a in command_line_input (prompt=0x564420 "(gdb) ", repeat=1,
+ annotation_suffix=0x4d6b44 "prompt") at top.c:2091
+#8 0x4d6cf0 in command_loop () at top.c:1345
+#9 0x4e25bc in main (argc=1, argv=0x7ffffdf4) at main.c:635
+
+
+LDD
+===
+This is a program which lists the shared libraries which a library needs,
+Note you also get the relocations of the shared library text segments which
+help when using objdump --source.
+e.g.
+ ldd ./gdb
+outputs
+libncurses.so.4 => /usr/lib/libncurses.so.4 (0x40018000)
+libm.so.6 => /lib/libm.so.6 (0x4005e000)
+libc.so.6 => /lib/libc.so.6 (0x40084000)
+/lib/ld-linux.so.2 => /lib/ld-linux.so.2 (0x40000000)
+
+
+Debugging shared libraries
+==========================
+Most programs use shared libraries, however it can be very painful
+when you single step instruction into a function like printf for the
+first time & you end up in functions like _dl_runtime_resolve this is
+the ld.so doing lazy binding, lazy binding is a concept in ELF where
+shared library functions are not loaded into memory unless they are
+actually used, great for saving memory but a pain to debug.
+To get around this either relink the program -static or exit gdb type
+export LD_BIND_NOW=true this will stop lazy binding & restart the gdb'ing
+the program in question.
+
+
+
+Debugging modules
+=================
+As modules are dynamically loaded into the kernel their address can be
+anywhere to get around this use the -m option with insmod to emit a load
+map which can be piped into a file if required.
+
+The proc file system
+====================
+What is it ?.
+It is a filesystem created by the kernel with files which are created on demand
+by the kernel if read, or can be used to modify kernel parameters,
+it is a powerful concept.
+
+e.g.
+
+cat /proc/sys/net/ipv4/ip_forward
+On my machine outputs
+0
+telling me ip_forwarding is not on to switch it on I can do
+echo 1 > /proc/sys/net/ipv4/ip_forward
+cat it again
+cat /proc/sys/net/ipv4/ip_forward
+On my machine now outputs
+1
+IP forwarding is on.
+There is a lot of useful info in here best found by going in and having a look
+around, so I'll take you through some entries I consider important.
+
+All the processes running on the machine have their own entry defined by
+/proc/<pid>
+So lets have a look at the init process
+cd /proc/1
+
+cat cmdline
+emits
+init [2]
+
+cd /proc/1/fd
+This contains numerical entries of all the open files,
+some of these you can cat e.g. stdout (2)
+
+cat /proc/29/maps
+on my machine emits
+
+00400000-00478000 r-xp 00000000 5f:00 4103 /bin/bash
+00478000-0047e000 rw-p 00077000 5f:00 4103 /bin/bash
+0047e000-00492000 rwxp 00000000 00:00 0
+40000000-40015000 r-xp 00000000 5f:00 14382 /lib/ld-2.1.2.so
+40015000-40016000 rw-p 00014000 5f:00 14382 /lib/ld-2.1.2.so
+40016000-40017000 rwxp 00000000 00:00 0
+40017000-40018000 rw-p 00000000 00:00 0
+40018000-4001b000 r-xp 00000000 5f:00 14435 /lib/libtermcap.so.2.0.8
+4001b000-4001c000 rw-p 00002000 5f:00 14435 /lib/libtermcap.so.2.0.8
+4001c000-4010d000 r-xp 00000000 5f:00 14387 /lib/libc-2.1.2.so
+4010d000-40111000 rw-p 000f0000 5f:00 14387 /lib/libc-2.1.2.so
+40111000-40114000 rw-p 00000000 00:00 0
+40114000-4011e000 r-xp 00000000 5f:00 14408 /lib/libnss_files-2.1.2.so
+4011e000-4011f000 rw-p 00009000 5f:00 14408 /lib/libnss_files-2.1.2.so
+7fffd000-80000000 rwxp ffffe000 00:00 0
+
+
+Showing us the shared libraries init uses where they are in memory
+& memory access permissions for each virtual memory area.
+
+/proc/1/cwd is a softlink to the current working directory.
+/proc/1/root is the root of the filesystem for this process.
+
+/proc/1/mem is the current running processes memory which you
+can read & write to like a file.
+strace uses this sometimes as it is a bit faster than the
+rather inefficient ptrace interface for peeking at DATA.
+
+
+cat status
+
+Name: init
+State: S (sleeping)
+Pid: 1
+PPid: 0
+Uid: 0 0 0 0
+Gid: 0 0 0 0
+Groups:
+VmSize: 408 kB
+VmLck: 0 kB
+VmRSS: 208 kB
+VmData: 24 kB
+VmStk: 8 kB
+VmExe: 368 kB
+VmLib: 0 kB
+SigPnd: 0000000000000000
+SigBlk: 0000000000000000
+SigIgn: 7fffffffd7f0d8fc
+SigCgt: 00000000280b2603
+CapInh: 00000000fffffeff
+CapPrm: 00000000ffffffff
+CapEff: 00000000fffffeff
+
+User PSW: 070de000 80414146
+task: 004b6000 tss: 004b62d8 ksp: 004b7ca8 pt_regs: 004b7f68
+User GPRS:
+00000400 00000000 0000000b 7ffffa90
+00000000 00000000 00000000 0045d9f4
+0045cafc 7ffffa90 7fffff18 0045cb08
+00010400 804039e8 80403af8 7ffff8b0
+User ACRS:
+00000000 00000000 00000000 00000000
+00000001 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+00000000 00000000 00000000 00000000
+Kernel BackChain CallChain BackChain CallChain
+ 004b7ca8 8002bd0c 004b7d18 8002b92c
+ 004b7db8 8005cd50 004b7e38 8005d12a
+ 004b7f08 80019114
+Showing among other things memory usage & status of some signals &
+the processes'es registers from the kernel task_structure
+as well as a backchain which may be useful if a process crashes
+in the kernel for some unknown reason.
+
+Some driver debugging techniques
+================================
+debug feature
+-------------
+Some of our drivers now support a "debug feature" in
+/proc/s390dbf see s390dbf.txt in the linux/Documentation directory
+for more info.
+e.g.
+to switch on the lcs "debug feature"
+echo 5 > /proc/s390dbf/lcs/level
+& then after the error occurred.
+cat /proc/s390dbf/lcs/sprintf >/logfile
+the logfile now contains some information which may help
+tech support resolve a problem in the field.
+
+
+
+high level debugging network drivers
+------------------------------------
+ifconfig is a quite useful command
+it gives the current state of network drivers.
+
+If you suspect your network device driver is dead
+one way to check is type
+ifconfig <network device>
+e.g. tr0
+You should see something like
+tr0 Link encap:16/4 Mbps Token Ring (New) HWaddr 00:04:AC:20:8E:48
+ inet addr:9.164.185.132 Bcast:9.164.191.255 Mask:255.255.224.0
+ UP BROADCAST RUNNING MULTICAST MTU:2000 Metric:1
+ RX packets:246134 errors:0 dropped:0 overruns:0 frame:0
+ TX packets:5 errors:0 dropped:0 overruns:0 carrier:0
+ collisions:0 txqueuelen:100
+
+if the device doesn't say up
+try
+/etc/rc.d/init.d/network start
+( this starts the network stack & hopefully calls ifconfig tr0 up ).
+ifconfig looks at the output of /proc/net/dev and presents it in a more
+presentable form.
+Now ping the device from a machine in the same subnet.
+if the RX packets count & TX packets counts don't increment you probably
+have problems.
+next
+cat /proc/net/arp
+Do you see any hardware addresses in the cache if not you may have problems.
+Next try
+ping -c 5 <broadcast_addr> i.e. the Bcast field above in the output of
+ifconfig. Do you see any replies from machines other than the local machine
+if not you may have problems. also if the TX packets count in ifconfig
+hasn't incremented either you have serious problems in your driver
+(e.g. the txbusy field of the network device being stuck on )
+or you may have multiple network devices connected.
+
+
+chandev
+-------
+There is a new device layer for channel devices, some
+drivers e.g. lcs are registered with this layer.
+If the device uses the channel device layer you'll be
+able to find what interrupts it uses & the current state
+of the device.
+See the manpage chandev.8 &type cat /proc/chandev for more info.
+
+
+SysRq
+=====
+This is now supported by linux for s/390 & z/Architecture.
+To enable it do compile the kernel with
+Kernel Hacking -> Magic SysRq Key Enabled
+echo "1" > /proc/sys/kernel/sysrq
+also type
+echo "8" >/proc/sys/kernel/printk
+To make printk output go to console.
+On 390 all commands are prefixed with
+^-
+e.g.
+^-t will show tasks.
+^-? or some unknown command will display help.
+The sysrq key reading is very picky ( I have to type the keys in an
+ xterm session & paste them into the x3270 console )
+& it may be wise to predefine the keys as described in the VM hints above
+
+This is particularly useful for syncing disks unmounting & rebooting
+if the machine gets partially hung.
+
+Read Documentation/sysrq.txt for more info
+
+References:
+===========
+Enterprise Systems Architecture Reference Summary
+Enterprise Systems Architecture Principles of Operation
+Hartmut Penners s390 stack frame sheet.
+IBM Mainframe Channel Attachment a technology brief from a CISCO webpage
+Various bits of man & info pages of Linux.
+Linux & GDB source.
+Various info & man pages.
+CMS Help on tracing commands.
+Linux for s/390 Elf Application Binary Interface
+Linux for z/Series Elf Application Binary Interface ( Both Highly Recommended )
+z/Architecture Principles of Operation SA22-7832-00
+Enterprise Systems Architecture/390 Reference Summary SA22-7209-01 & the
+Enterprise Systems Architecture/390 Principles of Operation SA22-7201-05
+
+Special Thanks
+==============
+Special thanks to Neale Ferguson who maintains a much
+prettier HTML version of this page at
+http://linuxvm.org/penguinvm/
+Bob Grainger Stefan Bader & others for reporting bugs
diff --git a/Documentation/s390/cds.txt b/Documentation/s390/cds.txt
new file mode 100644
index 000000000..480a78ef5
--- /dev/null
+++ b/Documentation/s390/cds.txt
@@ -0,0 +1,472 @@
+Linux for S/390 and zSeries
+
+Common Device Support (CDS)
+Device Driver I/O Support Routines
+
+Authors : Ingo Adlung
+ Cornelia Huck
+
+Copyright, IBM Corp. 1999-2002
+
+Introduction
+
+This document describes the common device support routines for Linux/390.
+Different than other hardware architectures, ESA/390 has defined a unified
+I/O access method. This gives relief to the device drivers as they don't
+have to deal with different bus types, polling versus interrupt
+processing, shared versus non-shared interrupt processing, DMA versus port
+I/O (PIO), and other hardware features more. However, this implies that
+either every single device driver needs to implement the hardware I/O
+attachment functionality itself, or the operating system provides for a
+unified method to access the hardware, providing all the functionality that
+every single device driver would have to provide itself.
+
+The document does not intend to explain the ESA/390 hardware architecture in
+every detail.This information can be obtained from the ESA/390 Principles of
+Operation manual (IBM Form. No. SA22-7201).
+
+In order to build common device support for ESA/390 I/O interfaces, a
+functional layer was introduced that provides generic I/O access methods to
+the hardware.
+
+The common device support layer comprises the I/O support routines defined
+below. Some of them implement common Linux device driver interfaces, while
+some of them are ESA/390 platform specific.
+
+Note:
+In order to write a driver for S/390, you also need to look into the interface
+described in Documentation/s390/driver-model.txt.
+
+Note for porting drivers from 2.4:
+The major changes are:
+* The functions use a ccw_device instead of an irq (subchannel).
+* All drivers must define a ccw_driver (see driver-model.txt) and the associated
+ functions.
+* request_irq() and free_irq() are no longer done by the driver.
+* The oper_handler is (kindof) replaced by the probe() and set_online() functions
+ of the ccw_driver.
+* The not_oper_handler is (kindof) replaced by the remove() and set_offline()
+ functions of the ccw_driver.
+* The channel device layer is gone.
+* The interrupt handlers must be adapted to use a ccw_device as argument.
+ Moreover, they don't return a devstat, but an irb.
+* Before initiating an io, the options must be set via ccw_device_set_options().
+* Instead of calling read_dev_chars()/read_conf_data(), the driver issues
+ the channel program and handles the interrupt itself.
+
+ccw_device_get_ciw()
+ get commands from extended sense data.
+
+ccw_device_start()
+ccw_device_start_timeout()
+ccw_device_start_key()
+ccw_device_start_key_timeout()
+ initiate an I/O request.
+
+ccw_device_resume()
+ resume channel program execution.
+
+ccw_device_halt()
+ terminate the current I/O request processed on the device.
+
+do_IRQ()
+ generic interrupt routine. This function is called by the interrupt entry
+ routine whenever an I/O interrupt is presented to the system. The do_IRQ()
+ routine determines the interrupt status and calls the device specific
+ interrupt handler according to the rules (flags) defined during I/O request
+ initiation with do_IO().
+
+The next chapters describe the functions other than do_IRQ() in more details.
+The do_IRQ() interface is not described, as it is called from the Linux/390
+first level interrupt handler only and does not comprise a device driver
+callable interface. Instead, the functional description of do_IO() also
+describes the input to the device specific interrupt handler.
+
+Note: All explanations apply also to the 64 bit architecture s390x.
+
+
+Common Device Support (CDS) for Linux/390 Device Drivers
+
+General Information
+
+The following chapters describe the I/O related interface routines the
+Linux/390 common device support (CDS) provides to allow for device specific
+driver implementations on the IBM ESA/390 hardware platform. Those interfaces
+intend to provide the functionality required by every device driver
+implementation to allow to drive a specific hardware device on the ESA/390
+platform. Some of the interface routines are specific to Linux/390 and some
+of them can be found on other Linux platforms implementations too.
+Miscellaneous function prototypes, data declarations, and macro definitions
+can be found in the architecture specific C header file
+linux/arch/s390/include/asm/irq.h.
+
+Overview of CDS interface concepts
+
+Different to other hardware platforms, the ESA/390 architecture doesn't define
+interrupt lines managed by a specific interrupt controller and bus systems
+that may or may not allow for shared interrupts, DMA processing, etc.. Instead,
+the ESA/390 architecture has implemented a so called channel subsystem, that
+provides a unified view of the devices physically attached to the systems.
+Though the ESA/390 hardware platform knows about a huge variety of different
+peripheral attachments like disk devices (aka. DASDs), tapes, communication
+controllers, etc. they can all be accessed by a well defined access method and
+they are presenting I/O completion a unified way : I/O interruptions. Every
+single device is uniquely identified to the system by a so called subchannel,
+where the ESA/390 architecture allows for 64k devices be attached.
+
+Linux, however, was first built on the Intel PC architecture, with its two
+cascaded 8259 programmable interrupt controllers (PICs), that allow for a
+maximum of 15 different interrupt lines. All devices attached to such a system
+share those 15 interrupt levels. Devices attached to the ISA bus system must
+not share interrupt levels (aka. IRQs), as the ISA bus bases on edge triggered
+interrupts. MCA, EISA, PCI and other bus systems base on level triggered
+interrupts, and therewith allow for shared IRQs. However, if multiple devices
+present their hardware status by the same (shared) IRQ, the operating system
+has to call every single device driver registered on this IRQ in order to
+determine the device driver owning the device that raised the interrupt.
+
+Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
+For internal use of the common I/O layer, these are still there. However,
+device drivers should use the new calling interface via the ccw_device only.
+
+During its startup the Linux/390 system checks for peripheral devices. Each
+of those devices is uniquely defined by a so called subchannel by the ESA/390
+channel subsystem. While the subchannel numbers are system generated, each
+subchannel also takes a user defined attribute, the so called device number.
+Both subchannel number and device number cannot exceed 65535. During sysfs
+initialisation, the information about control unit type and device types that
+imply specific I/O commands (channel command words - CCWs) in order to operate
+the device are gathered. Device drivers can retrieve this set of hardware
+information during their initialization step to recognize the devices they
+support using the information saved in the struct ccw_device given to them.
+This methods implies that Linux/390 doesn't require to probe for free (not
+armed) interrupt request lines (IRQs) to drive its devices with. Where
+applicable, the device drivers can use issue the READ DEVICE CHARACTERISTICS
+ccw to retrieve device characteristics in its online routine.
+
+In order to allow for easy I/O initiation the CDS layer provides a
+ccw_device_start() interface that takes a device specific channel program (one
+or more CCWs) as input sets up the required architecture specific control blocks
+and initiates an I/O request on behalf of the device driver. The
+ccw_device_start() routine allows to specify whether it expects the CDS layer
+to notify the device driver for every interrupt it observes, or with final status
+only. See ccw_device_start() for more details. A device driver must never issue
+ESA/390 I/O commands itself, but must use the Linux/390 CDS interfaces instead.
+
+For long running I/O request to be canceled, the CDS layer provides the
+ccw_device_halt() function. Some devices require to initially issue a HALT
+SUBCHANNEL (HSCH) command without having pending I/O requests. This function is
+also covered by ccw_device_halt().
+
+
+get_ciw() - get command information word
+
+This call enables a device driver to get information about supported commands
+from the extended SenseID data.
+
+struct ciw *
+ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
+
+cdev - The ccw_device for which the command is to be retrieved.
+cmd - The command type to be retrieved.
+
+ccw_device_get_ciw() returns:
+NULL - No extended data available, invalid device or command not found.
+!NULL - The command requested.
+
+
+ccw_device_start() - Initiate I/O Request
+
+The ccw_device_start() routines is the I/O request front-end processor. All
+device driver I/O requests must be issued using this routine. A device driver
+must not issue ESA/390 I/O commands itself. Instead the ccw_device_start()
+routine provides all interfaces required to drive arbitrary devices.
+
+This description also covers the status information passed to the device
+driver's interrupt handler as this is related to the rules (flags) defined
+with the associated I/O request when calling ccw_device_start().
+
+int ccw_device_start(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags);
+int ccw_device_start_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ unsigned long flags,
+ int expires);
+int ccw_device_start_key(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags);
+int ccw_device_start_key_timeout(struct ccw_device *cdev,
+ struct ccw1 *cpa,
+ unsigned long intparm,
+ __u8 lpm,
+ __u8 key,
+ unsigned long flags,
+ int expires);
+
+cdev : ccw_device the I/O is destined for
+cpa : logical start address of channel program
+user_intparm : user specific interrupt information; will be presented
+ back to the device driver's interrupt handler. Allows a
+ device driver to associate the interrupt with a
+ particular I/O request.
+lpm : defines the channel path to be used for a specific I/O
+ request. A value of 0 will make cio use the opm.
+key : the storage key to use for the I/O (useful for operating on a
+ storage with a storage key != default key)
+flag : defines the action to be performed for I/O processing
+expires : timeout value in jiffies. The common I/O layer will terminate
+ the running program after this and call the interrupt handler
+ with ERR_PTR(-ETIMEDOUT) as irb.
+
+Possible flag values are :
+
+DOIO_ALLOW_SUSPEND - channel program may become suspended
+DOIO_DENY_PREFETCH - don't allow for CCW prefetch; usually
+ this implies the channel program might
+ become modified
+DOIO_SUPPRESS_INTER - don't call the handler on intermediate status
+
+The cpa parameter points to the first format 1 CCW of a channel program :
+
+struct ccw1 {
+ __u8 cmd_code;/* command code */
+ __u8 flags; /* flags, like IDA addressing, etc. */
+ __u16 count; /* byte count */
+ __u32 cda; /* data address */
+} __attribute__ ((packed,aligned(8)));
+
+with the following CCW flags values defined :
+
+CCW_FLAG_DC - data chaining
+CCW_FLAG_CC - command chaining
+CCW_FLAG_SLI - suppress incorrect length
+CCW_FLAG_SKIP - skip
+CCW_FLAG_PCI - PCI
+CCW_FLAG_IDA - indirect addressing
+CCW_FLAG_SUSPEND - suspend
+
+
+Via ccw_device_set_options(), the device driver may specify the following
+options for the device:
+
+DOIO_EARLY_NOTIFICATION - allow for early interrupt notification
+DOIO_REPORT_ALL - report all interrupt conditions
+
+
+The ccw_device_start() function returns :
+
+ 0 - successful completion or request successfully initiated
+-EBUSY - The device is currently processing a previous I/O request, or there is
+ a status pending at the device.
+-ENODEV - cdev is invalid, the device is not operational or the ccw_device is
+ not online.
+
+When the I/O request completes, the CDS first level interrupt handler will
+accumulate the status in a struct irb and then call the device interrupt handler.
+The intparm field will contain the value the device driver has associated with a
+particular I/O request. If a pending device status was recognized,
+intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
+by an alert status notification. In any case this status is not related to the
+current (last) I/O request. In case of a delayed status notification no special
+interrupt will be presented to indicate I/O completion as the I/O request was
+never started, even though ccw_device_start() returned with successful completion.
+
+The irb may contain an error value, and the device driver should check for this
+first:
+
+-ETIMEDOUT: the common I/O layer terminated the request after the specified
+ timeout value
+-EIO: the common I/O layer terminated the request due to an error state
+
+If the concurrent sense flag in the extended status word (esw) in the irb is
+set, the field erw.scnt in the esw describes the number of device specific
+sense bytes available in the extended control word irb->scsw.ecw[]. No device
+sensing by the device driver itself is required.
+
+The device interrupt handler can use the following definitions to investigate
+the primary unit check source coded in sense byte 0 :
+
+SNS0_CMD_REJECT 0x80
+SNS0_INTERVENTION_REQ 0x40
+SNS0_BUS_OUT_CHECK 0x20
+SNS0_EQUIPMENT_CHECK 0x10
+SNS0_DATA_CHECK 0x08
+SNS0_OVERRUN 0x04
+SNS0_INCOMPL_DOMAIN 0x01
+
+Depending on the device status, multiple of those values may be set together.
+Please refer to the device specific documentation for details.
+
+The irb->scsw.cstat field provides the (accumulated) subchannel status :
+
+SCHN_STAT_PCI - program controlled interrupt
+SCHN_STAT_INCORR_LEN - incorrect length
+SCHN_STAT_PROG_CHECK - program check
+SCHN_STAT_PROT_CHECK - protection check
+SCHN_STAT_CHN_DATA_CHK - channel data check
+SCHN_STAT_CHN_CTRL_CHK - channel control check
+SCHN_STAT_INTF_CTRL_CHK - interface control check
+SCHN_STAT_CHAIN_CHECK - chaining check
+
+The irb->scsw.dstat field provides the (accumulated) device status :
+
+DEV_STAT_ATTENTION - attention
+DEV_STAT_STAT_MOD - status modifier
+DEV_STAT_CU_END - control unit end
+DEV_STAT_BUSY - busy
+DEV_STAT_CHN_END - channel end
+DEV_STAT_DEV_END - device end
+DEV_STAT_UNIT_CHECK - unit check
+DEV_STAT_UNIT_EXCEP - unit exception
+
+Please see the ESA/390 Principles of Operation manual for details on the
+individual flag meanings.
+
+Usage Notes :
+
+ccw_device_start() must be called disabled and with the ccw device lock held.
+
+The device driver is allowed to issue the next ccw_device_start() call from
+within its interrupt handler already. It is not required to schedule a
+bottom-half, unless a non deterministically long running error recovery procedure
+or similar needs to be scheduled. During I/O processing the Linux/390 generic
+I/O device driver support has already obtained the IRQ lock, i.e. the handler
+must not try to obtain it again when calling ccw_device_start() or we end in a
+deadlock situation!
+
+If a device driver relies on an I/O request to be completed prior to start the
+next it can reduce I/O processing overhead by chaining a NoOp I/O command
+CCW_CMD_NOOP to the end of the submitted CCW chain. This will force Channel-End
+and Device-End status to be presented together, with a single interrupt.
+However, this should be used with care as it implies the channel will remain
+busy, not being able to process I/O requests for other devices on the same
+channel. Therefore e.g. read commands should never use this technique, as the
+result will be presented by a single interrupt anyway.
+
+In order to minimize I/O overhead, a device driver should use the
+DOIO_REPORT_ALL only if the device can report intermediate interrupt
+information prior to device-end the device driver urgently relies on. In this
+case all I/O interruptions are presented to the device driver until final
+status is recognized.
+
+If a device is able to recover from asynchronously presented I/O errors, it can
+perform overlapping I/O using the DOIO_EARLY_NOTIFICATION flag. While some
+devices always report channel-end and device-end together, with a single
+interrupt, others present primary status (channel-end) when the channel is
+ready for the next I/O request and secondary status (device-end) when the data
+transmission has been completed at the device.
+
+Above flag allows to exploit this feature, e.g. for communication devices that
+can handle lost data on the network to allow for enhanced I/O processing.
+
+Unless the channel subsystem at any time presents a secondary status interrupt,
+exploiting this feature will cause only primary status interrupts to be
+presented to the device driver while overlapping I/O is performed. When a
+secondary status without error (alert status) is presented, this indicates
+successful completion for all overlapping ccw_device_start() requests that have
+been issued since the last secondary (final) status.
+
+Channel programs that intend to set the suspend flag on a channel command word
+(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
+suspend flag will cause a channel program check. At the time the channel program
+becomes suspended an intermediate interrupt will be generated by the channel
+subsystem.
+
+ccw_device_resume() - Resume Channel Program Execution
+
+If a device driver chooses to suspend the current channel program execution by
+setting the CCW suspend flag on a particular CCW, the channel program execution
+is suspended. In order to resume channel program execution the CIO layer
+provides the ccw_device_resume() routine.
+
+int ccw_device_resume(struct ccw_device *cdev);
+
+cdev - ccw_device the resume operation is requested for
+
+The ccw_device_resume() function returns:
+
+ 0 - suspended channel program is resumed
+-EBUSY - status pending
+-ENODEV - cdev invalid or not-operational subchannel
+-EINVAL - resume function not applicable
+-ENOTCONN - there is no I/O request pending for completion
+
+Usage Notes:
+Please have a look at the ccw_device_start() usage notes for more details on
+suspended channel programs.
+
+ccw_device_halt() - Halt I/O Request Processing
+
+Sometimes a device driver might need a possibility to stop the processing of
+a long-running channel program or the device might require to initially issue
+a halt subchannel (HSCH) I/O command. For those purposes the ccw_device_halt()
+command is provided.
+
+ccw_device_halt() must be called disabled and with the ccw device lock held.
+
+int ccw_device_halt(struct ccw_device *cdev,
+ unsigned long intparm);
+
+cdev : ccw_device the halt operation is requested for
+intparm : interruption parameter; value is only used if no I/O
+ is outstanding, otherwise the intparm associated with
+ the I/O request is returned
+
+The ccw_device_halt() function returns :
+
+ 0 - request successfully initiated
+-EBUSY - the device is currently busy, or status pending.
+-ENODEV - cdev invalid.
+-EINVAL - The device is not operational or the ccw device is not online.
+
+Usage Notes :
+
+A device driver may write a never-ending channel program by writing a channel
+program that at its end loops back to its beginning by means of a transfer in
+channel (TIC) command (CCW_CMD_TIC). Usually this is performed by network
+device drivers by setting the PCI CCW flag (CCW_FLAG_PCI). Once this CCW is
+executed a program controlled interrupt (PCI) is generated. The device driver
+can then perform an appropriate action. Prior to interrupt of an outstanding
+read to a network device (with or without PCI flag) a ccw_device_halt()
+is required to end the pending operation.
+
+ccw_device_clear() - Terminage I/O Request Processing
+
+In order to terminate all I/O processing at the subchannel, the clear subchannel
+(CSCH) command is used. It can be issued via ccw_device_clear().
+
+ccw_device_clear() must be called disabled and with the ccw device lock held.
+
+int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
+
+cdev: ccw_device the clear operation is requested for
+intparm: interruption parameter (see ccw_device_halt())
+
+The ccw_device_clear() function returns:
+
+ 0 - request successfully initiated
+-ENODEV - cdev invalid
+-EINVAL - The device is not operational or the ccw device is not online.
+
+Miscellaneous Support Routines
+
+This chapter describes various routines to be used in a Linux/390 device
+driver programming environment.
+
+get_ccwdev_lock()
+
+Get the address of the device specific lock. This is then used in
+spin_lock() / spin_unlock() calls.
+
+
+__u8 ccw_device_get_path_mask(struct ccw_device *cdev);
+
+Get the mask of the path currently available for cdev.
diff --git a/Documentation/s390/config3270.sh b/Documentation/s390/config3270.sh
new file mode 100644
index 000000000..515e2f431
--- /dev/null
+++ b/Documentation/s390/config3270.sh
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# config3270 -- Autoconfigure /dev/3270/* and /etc/inittab
+#
+# Usage:
+# config3270
+#
+# Output:
+# /tmp/mkdev3270
+#
+# Operation:
+# 1. Run this script
+# 2. Run the script it produces: /tmp/mkdev3270
+# 3. Issue "telinit q" or reboot, as appropriate.
+#
+P=/proc/tty/driver/tty3270
+ROOT=
+D=$ROOT/dev
+SUBD=3270
+TTY=$SUBD/tty
+TUB=$SUBD/tub
+SCR=$ROOT/tmp/mkdev3270
+SCRTMP=$SCR.a
+GETTYLINE=:2345:respawn:/sbin/mingetty
+INITTAB=$ROOT/etc/inittab
+NINITTAB=$ROOT/etc/NEWinittab
+OINITTAB=$ROOT/etc/OLDinittab
+ADDNOTE=\\"# Additional mingettys for the 3270/tty* driver, tub3270 ---\\"
+
+if ! ls $P > /dev/null 2>&1; then
+ modprobe tub3270 > /dev/null 2>&1
+fi
+ls $P > /dev/null 2>&1 || exit 1
+
+# Initialize two files, one for /dev/3270 commands and one
+# to replace the /etc/inittab file (old one saved in OLDinittab)
+echo "#!/bin/sh" > $SCR || exit 1
+echo " " >> $SCR
+echo "# Script built by /sbin/config3270" >> $SCR
+if [ ! -d /dev/dasd ]; then
+ echo rm -rf "$D/$SUBD/*" >> $SCR
+fi
+echo "grep -v $TTY $INITTAB > $NINITTAB" > $SCRTMP || exit 1
+echo "echo $ADDNOTE >> $NINITTAB" >> $SCRTMP
+if [ ! -d /dev/dasd ]; then
+ echo mkdir -p $D/$SUBD >> $SCR
+fi
+
+# Now query the tub3270 driver for 3270 device information
+# and add appropriate mknod and mingetty lines to our files
+echo what=config > $P
+while read devno maj min;do
+ if [ $min = 0 ]; then
+ fsmaj=$maj
+ if [ ! -d /dev/dasd ]; then
+ echo mknod $D/$TUB c $fsmaj 0 >> $SCR
+ echo chmod 666 $D/$TUB >> $SCR
+ fi
+ elif [ $maj = CONSOLE ]; then
+ if [ ! -d /dev/dasd ]; then
+ echo mknod $D/$TUB$devno c $fsmaj $min >> $SCR
+ fi
+ else
+ if [ ! -d /dev/dasd ]; then
+ echo mknod $D/$TTY$devno c $maj $min >>$SCR
+ echo mknod $D/$TUB$devno c $fsmaj $min >> $SCR
+ fi
+ echo "echo t$min$GETTYLINE $TTY$devno >> $NINITTAB" >> $SCRTMP
+ fi
+done < $P
+
+echo mv $INITTAB $OINITTAB >> $SCRTMP || exit 1
+echo mv $NINITTAB $INITTAB >> $SCRTMP
+cat $SCRTMP >> $SCR
+rm $SCRTMP
+exit 0
diff --git a/Documentation/s390/driver-model.txt b/Documentation/s390/driver-model.txt
new file mode 100644
index 000000000..ed265cf54
--- /dev/null
+++ b/Documentation/s390/driver-model.txt
@@ -0,0 +1,287 @@
+S/390 driver model interfaces
+-----------------------------
+
+1. CCW devices
+--------------
+
+All devices which can be addressed by means of ccws are called 'CCW devices' -
+even if they aren't actually driven by ccws.
+
+All ccw devices are accessed via a subchannel, this is reflected in the
+structures under devices/:
+
+devices/
+ - system/
+ - css0/
+ - 0.0.0000/0.0.0815/
+ - 0.0.0001/0.0.4711/
+ - 0.0.0002/
+ - 0.1.0000/0.1.1234/
+ ...
+ - defunct/
+
+In this example, device 0815 is accessed via subchannel 0 in subchannel set 0,
+device 4711 via subchannel 1 in subchannel set 0, and subchannel 2 is a non-I/O
+subchannel. Device 1234 is accessed via subchannel 0 in subchannel set 1.
+
+The subchannel named 'defunct' does not represent any real subchannel on the
+system; it is a pseudo subchannel where disconnected ccw devices are moved to
+if they are displaced by another ccw device becoming operational on their
+former subchannel. The ccw devices will be moved again to a proper subchannel
+if they become operational again on that subchannel.
+
+You should address a ccw device via its bus id (e.g. 0.0.4711); the device can
+be found under bus/ccw/devices/.
+
+All ccw devices export some data via sysfs.
+
+cutype: The control unit type / model.
+
+devtype: The device type / model, if applicable.
+
+availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
+ disconnected devices.
+
+online: An interface to set the device online and offline.
+ In the special case of the device being disconnected (see the
+ notify function under 1.2), piping 0 to online will forcibly delete
+ the device.
+
+The device drivers can add entries to export per-device data and interfaces.
+
+There is also some data exported on a per-subchannel basis (see under
+bus/css/devices/):
+
+chpids: Via which chpids the device is connected.
+
+pimpampom: The path installed, path available and path operational masks.
+
+There also might be additional data, for example for block devices.
+
+
+1.1 Bringing up a ccw device
+----------------------------
+
+This is done in several steps.
+
+a. Each driver can provide one or more parameter interfaces where parameters can
+ be specified. These interfaces are also in the driver's responsibility.
+b. After a. has been performed, if necessary, the device is finally brought up
+ via the 'online' interface.
+
+
+1.2 Writing a driver for ccw devices
+------------------------------------
+
+The basic struct ccw_device and struct ccw_driver data structures can be found
+under include/asm/ccwdev.h.
+
+struct ccw_device {
+ spinlock_t *ccwlock;
+ struct ccw_device_private *private;
+ struct ccw_device_id id;
+
+ struct ccw_driver *drv;
+ struct device dev;
+ int online;
+
+ void (*handler) (struct ccw_device *dev, unsigned long intparm,
+ struct irb *irb);
+};
+
+struct ccw_driver {
+ struct module *owner;
+ struct ccw_device_id *ids;
+ int (*probe) (struct ccw_device *);
+ int (*remove) (struct ccw_device *);
+ int (*set_online) (struct ccw_device *);
+ int (*set_offline) (struct ccw_device *);
+ int (*notify) (struct ccw_device *, int);
+ struct device_driver driver;
+ char *name;
+};
+
+The 'private' field contains data needed for internal i/o operation only, and
+is not available to the device driver.
+
+Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
+and/or device types/models it is interested. This information can later be found
+in the struct ccw_device_id fields:
+
+struct ccw_device_id {
+ __u16 match_flags;
+
+ __u16 cu_type;
+ __u16 dev_type;
+ __u8 cu_model;
+ __u8 dev_model;
+
+ unsigned long driver_info;
+};
+
+The functions in ccw_driver should be used in the following way:
+probe: This function is called by the device layer for each device the driver
+ is interested in. The driver should only allocate private structures
+ to put in dev->driver_data and create attributes (if needed). Also,
+ the interrupt handler (see below) should be set here.
+
+int (*probe) (struct ccw_device *cdev);
+
+Parameters: cdev - the device to be probed.
+
+
+remove: This function is called by the device layer upon removal of the driver,
+ the device or the module. The driver should perform cleanups here.
+
+int (*remove) (struct ccw_device *cdev);
+
+Parameters: cdev - the device to be removed.
+
+
+set_online: This function is called by the common I/O layer when the device is
+ activated via the 'online' attribute. The driver should finally
+ setup and activate the device here.
+
+int (*set_online) (struct ccw_device *);
+
+Parameters: cdev - the device to be activated. The common layer has
+ verified that the device is not already online.
+
+
+set_offline: This function is called by the common I/O layer when the device is
+ de-activated via the 'online' attribute. The driver should shut
+ down the device, but not de-allocate its private data.
+
+int (*set_offline) (struct ccw_device *);
+
+Parameters: cdev - the device to be deactivated. The common layer has
+ verified that the device is online.
+
+
+notify: This function is called by the common I/O layer for some state changes
+ of the device.
+ Signalled to the driver are:
+ * In online state, device detached (CIO_GONE) or last path gone
+ (CIO_NO_PATH). The driver must return !0 to keep the device; for
+ return code 0, the device will be deleted as usual (also when no
+ notify function is registered). If the driver wants to keep the
+ device, it is moved into disconnected state.
+ * In disconnected state, device operational again (CIO_OPER). The
+ common I/O layer performs some sanity checks on device number and
+ Device / CU to be reasonably sure if it is still the same device.
+ If not, the old device is removed and a new one registered. By the
+ return code of the notify function the device driver signals if it
+ wants the device back: !0 for keeping, 0 to make the device being
+ removed and re-registered.
+
+int (*notify) (struct ccw_device *, int);
+
+Parameters: cdev - the device whose state changed.
+ event - the event that happened. This can be one of CIO_GONE,
+ CIO_NO_PATH or CIO_OPER.
+
+The handler field of the struct ccw_device is meant to be set to the interrupt
+handler for the device. In order to accommodate drivers which use several
+distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
+instead of ccw_driver.
+The handler is registered with the common layer during set_online() processing
+before the driver is called, and is deregistered during set_offline() after the
+driver has been called. Also, after registering / before deregistering, path
+grouping resp. disbanding of the path group (if applicable) are performed.
+
+void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
+
+Parameters: dev - the device the handler is called for
+ intparm - the intparm which allows the device driver to identify
+ the i/o the interrupt is associated with, or to recognize
+ the interrupt as unsolicited.
+ irb - interruption response block which contains the accumulated
+ status.
+
+The device driver is called from the common ccw_device layer and can retrieve
+information about the interrupt from the irb parameter.
+
+
+1.3 ccwgroup devices
+--------------------
+
+The ccwgroup mechanism is designed to handle devices consisting of multiple ccw
+devices, like lcs or ctc.
+
+The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to
+this attributes creates a ccwgroup device consisting of these ccw devices (if
+possible). This ccwgroup device can be set online or offline just like a normal
+ccw device.
+
+Each ccwgroup device also provides an 'ungroup' attribute to destroy the device
+again (only when offline). This is a generic ccwgroup mechanism (the driver does
+not need to implement anything beyond normal removal routines).
+
+A ccw device which is a member of a ccwgroup device carries a pointer to the
+ccwgroup device in the driver_data of its device struct. This field must not be
+touched by the driver - it should use the ccwgroup device's driver_data for its
+private data.
+
+To implement a ccwgroup driver, please refer to include/asm/ccwgroup.h. Keep in
+mind that most drivers will need to implement both a ccwgroup and a ccw
+driver.
+
+
+2. Channel paths
+-----------------
+
+Channel paths show up, like subchannels, under the channel subsystem root (css0)
+and are called 'chp0.<chpid>'. They have no driver and do not belong to any bus.
+Please note, that unlike /proc/chpids in 2.4, the channel path objects reflect
+only the logical state and not the physical state, since we cannot track the
+latter consistently due to lacking machine support (we don't need to be aware
+of it anyway).
+
+status - Can be 'online' or 'offline'.
+ Piping 'on' or 'off' sets the chpid logically online/offline.
+ Piping 'on' to an online chpid triggers path reprobing for all devices
+ the chpid connects to. This can be used to force the kernel to re-use
+ a channel path the user knows to be online, but the machine hasn't
+ created a machine check for.
+
+type - The physical type of the channel path.
+
+shared - Whether the channel path is shared.
+
+cmg - The channel measurement group.
+
+3. System devices
+-----------------
+
+3.1 xpram
+---------
+
+xpram shows up under devices/system/ as 'xpram'.
+
+3.2 cpus
+--------
+
+For each cpu, a directory is created under devices/system/cpu/. Each cpu has an
+attribute 'online' which can be 0 or 1.
+
+
+4. Other devices
+----------------
+
+4.1 Netiucv
+-----------
+
+The netiucv driver creates an attribute 'connection' under
+bus/iucv/drivers/netiucv. Piping to this attribute creates a new netiucv
+connection to the specified host.
+
+Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interface
+number is assigned sequentially to the connections defined via the 'connection'
+attribute.
+
+user - shows the connection partner.
+
+buffer - maximum buffer size.
+ Pipe to it to change buffer size.
+
+
diff --git a/Documentation/s390/kvm.txt b/Documentation/s390/kvm.txt
new file mode 100644
index 000000000..85f3280d7
--- /dev/null
+++ b/Documentation/s390/kvm.txt
@@ -0,0 +1,125 @@
+*** BIG FAT WARNING ***
+The kvm module is currently in EXPERIMENTAL state for s390. This means that
+the interface to the module is not yet considered to remain stable. Thus, be
+prepared that we keep breaking your userspace application and guest
+compatibility over and over again until we feel happy with the result. Make sure
+your guest kernel, your host kernel, and your userspace launcher are in a
+consistent state.
+
+This Documentation describes the unique ioctl calls to /dev/kvm, the resulting
+kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86.
+
+1. ioctl calls to /dev/kvm
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_GET_API_VERSION
+KVM_CREATE_VM (*) see note
+KVM_CHECK_EXTENSION
+KVM_GET_VCPU_MMAP_SIZE
+
+Notes:
+* KVM_CREATE_VM may fail on s390, if the calling process has multiple
+threads and has not called KVM_S390_ENABLE_SIE before.
+
+In addition, on s390 the following architecture specific ioctls are supported:
+ioctl: KVM_S390_ENABLE_SIE
+args: none
+see also: include/linux/kvm.h
+This call causes the kernel to switch on PGSTE in the user page table. This
+operation is needed in order to run a virtual machine, and it requires the
+calling process to be single-threaded. Note that the first call to KVM_CREATE_VM
+will implicitly try to switch on PGSTE if the user process has not called
+KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads
+before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will
+observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time
+operation, is not reversible, and will persist over the entire lifetime of
+the calling process. It does not have any user-visible effect other than a small
+performance penalty.
+
+2. ioctl calls to the kvm-vm file descriptor
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_CREATE_VCPU
+KVM_SET_USER_MEMORY_REGION (*) see note
+KVM_GET_DIRTY_LOG (**) see note
+
+Notes:
+* kvm does only allow exactly one memory slot on s390, which has to start
+ at guest absolute address zero and at a user address that is aligned on any
+ page boundary. This hardware "limitation" allows us to have a few unique
+ optimizations. The memory slot doesn't have to be filled
+ with memory actually, it may contain sparse holes. That said, with different
+ user memory layout this does still allow a large flexibility when
+ doing the guest memory setup.
+** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty
+log. This ioctl call is only needed for guest migration, and we intend to
+implement this one in the future.
+
+In addition, on s390 the following architecture specific ioctls for the kvm-vm
+file descriptor are supported:
+ioctl: KVM_S390_INTERRUPT
+args: struct kvm_s390_interrupt *
+see also: include/linux/kvm.h
+This ioctl is used to submit a floating interrupt for a virtual machine.
+Floating interrupts may be delivered to any virtual cpu in the configuration.
+Only some interrupt types defined in include/linux/kvm.h make sense when
+submitted as floating interrupts. The following interrupts are not considered
+to be useful as floating interrupts, and a call to inject them will result in
+-EINVAL error code: program interrupts and interprocessor signals. Valid
+floating interrupts are:
+KVM_S390_INT_VIRTIO
+KVM_S390_INT_SERVICE
+
+3. ioctl calls to the kvm-vcpu file descriptor
+KVM does support the following ioctls on s390 that are common with other
+architectures and do behave the same:
+KVM_RUN
+KVM_GET_REGS
+KVM_SET_REGS
+KVM_GET_SREGS
+KVM_SET_SREGS
+KVM_GET_FPU
+KVM_SET_FPU
+
+In addition, on s390 the following architecture specific ioctls for the
+kvm-vcpu file descriptor are supported:
+ioctl: KVM_S390_INTERRUPT
+args: struct kvm_s390_interrupt *
+see also: include/linux/kvm.h
+This ioctl is used to submit an interrupt for a specific virtual cpu.
+Only some interrupt types defined in include/linux/kvm.h make sense when
+submitted for a specific cpu. The following interrupts are not considered
+to be useful, and a call to inject them will result in -EINVAL error code:
+service processor calls and virtio interrupts. Valid interrupt types are:
+KVM_S390_PROGRAM_INT
+KVM_S390_SIGP_STOP
+KVM_S390_RESTART
+KVM_S390_SIGP_SET_PREFIX
+KVM_S390_INT_EMERGENCY
+
+ioctl: KVM_S390_STORE_STATUS
+args: unsigned long
+see also: include/linux/kvm.h
+This ioctl stores the state of the cpu at the guest real address given as
+argument, unless one of the following values defined in include/linux/kvm.h
+is given as argument:
+KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in
+absolute lowcore as defined by the principles of operation
+KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in
+its prefix page just like the dump tool that comes with zipl. This is useful
+to create a system dump for use with lkcdutils or crash.
+
+ioctl: KVM_S390_SET_INITIAL_PSW
+args: struct kvm_s390_psw *
+see also: include/linux/kvm.h
+This ioctl can be used to set the processor status word (psw) of a stopped cpu
+prior to running it with KVM_RUN. Note that this call is not required to modify
+the psw during sie intercepts that fall back to userspace because struct kvm_run
+does contain the psw, and this value is evaluated during reentry of KVM_RUN
+after the intercept exit was recognized.
+
+ioctl: KVM_S390_INITIAL_RESET
+args: none
+see also: include/linux/kvm.h
+This ioctl can be used to perform an initial cpu reset as defined by the
+principles of operation. The target cpu has to be in stopped state.
diff --git a/Documentation/s390/monreader.txt b/Documentation/s390/monreader.txt
new file mode 100644
index 000000000..d3729585f
--- /dev/null
+++ b/Documentation/s390/monreader.txt
@@ -0,0 +1,197 @@
+
+Date : 2004-Nov-26
+Author: Gerald Schaefer (geraldsc@de.ibm.com)
+
+
+ Linux API for read access to z/VM Monitor Records
+ =================================================
+
+
+Description
+===========
+This item delivers a new Linux API in the form of a misc char device that is
+usable from user space and allows read access to the z/VM Monitor Records
+collected by the *MONITOR System Service of z/VM.
+
+
+User Requirements
+=================
+The z/VM guest on which you want to access this API needs to be configured in
+order to allow IUCV connections to the *MONITOR service, i.e. it needs the
+IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is
+restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
+This item will use the IUCV device driver to access the z/VM services, so you
+need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
+
+There are two options for being able to load the monitor DCSS (examples assume
+that the monitor DCSS begins at 144 MB and ends at 152 MB). You can query the
+location of the monitor DCSS with the Class E privileged CP command Q NSS MAP
+(the values BEGPAG and ENDPAG are given in units of 4K pages).
+
+See also "CP Command and Utility Reference" (SC24-6081-00) for more information
+on the DEF STOR and Q NSS MAP commands, as well as "Saved Segments Planning
+and Administration" (SC24-6116-00) for more information on DCSSes.
+
+1st option:
+-----------
+You can use the CP command DEF STOR CONFIG to define a "memory hole" in your
+guest virtual storage around the address range of the DCSS.
+
+Example: DEF STOR CONFIG 0.140M 200M.200M
+
+This defines two blocks of storage, the first is 140MB in size an begins at
+address 0MB, the second is 200MB in size and begins at address 200MB,
+resulting in a total storage of 340MB. Note that the first block should
+always start at 0 and be at least 64MB in size.
+
+2nd option:
+-----------
+Your guest virtual storage has to end below the starting address of the DCSS
+and you have to specify the "mem=" kernel parameter in your parmfile with a
+value greater than the ending address of the DCSS.
+
+Example: DEF STOR 140M
+
+This defines 140MB storage size for your guest, the parameter "mem=160M" is
+added to the parmfile.
+
+
+User Interface
+==============
+The char device is implemented as a kernel module named "monreader",
+which can be loaded via the modprobe command, or it can be compiled into the
+kernel instead. There is one optional module (or kernel) parameter, "mondcss",
+to specify the name of the monitor DCSS. If the module is compiled into the
+kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
+in the parmfile.
+
+The default name for the DCSS is "MONDCSS" if none is specified. In case that
+there are other users already connected to the *MONITOR service (e.g.
+Performance Toolkit), the monitor DCSS is already defined and you have to use
+the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
+of the monitor DCSS, if already defined, and the users connected to the
+*MONITOR service.
+Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
+DCSS if your z/VM doesn't have one already, you need Class E privileges to
+define and save a DCSS.
+
+Example:
+--------
+modprobe monreader mondcss=MYDCSS
+
+This loads the module and sets the DCSS name to "MYDCSS".
+
+NOTE:
+-----
+This API provides no interface to control the *MONITOR service, e.g. specify
+which data should be collected. This can be done by the CP command MONITOR
+(Class E privileged), see "CP Command and Utility Reference".
+
+Device nodes with udev:
+-----------------------
+After loading the module, a char device will be created along with the device
+node /<udev directory>/monreader.
+
+Device nodes without udev:
+--------------------------
+If your distribution does not support udev, a device node will not be created
+automatically and you have to create it manually after loading the module.
+Therefore you need to know the major and minor numbers of the device. These
+numbers can be found in /sys/class/misc/monreader/dev.
+Typing cat /sys/class/misc/monreader/dev will give an output of the form
+<major>:<minor>. The device node can be created via the mknod command, enter
+mknod <name> c <major> <minor>, where <name> is the name of the device node
+to be created.
+
+Example:
+--------
+# modprobe monreader
+# cat /sys/class/misc/monreader/dev
+10:63
+# mknod /dev/monreader c 10 63
+
+This loads the module with the default monitor DCSS (MONDCSS) and creates a
+device node.
+
+File operations:
+----------------
+The following file operations are supported: open, release, read, poll.
+There are two alternative methods for reading: either non-blocking read in
+conjunction with polling, or blocking read without polling. IOCTLs are not
+supported.
+
+Read:
+-----
+Reading from the device provides a 12 Byte monitor control element (MCE),
+followed by a set of one or more contiguous monitor records (similar to the
+output of the CMS utility MONWRITE without the 4K control blocks). The MCE
+contains information on the type of the following record set (sample/event
+data), the monitor domains contained within it and the start and end address
+of the record set in the monitor DCSS. The start and end address can be used
+to determine the size of the record set, the end address is the address of the
+last byte of data. The start address is needed to handle "end-of-frame" records
+correctly (domain 1, record 13), i.e. it can be used to determine the record
+start offset relative to a 4K page (frame) boundary.
+
+See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description
+of the monitor control element layout. The layout of the monitor records can
+be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
+
+The layout of the data stream provided by the monreader device is as follows:
+...
+<0 byte read>
+<first MCE> \
+<first set of records> |
+... |- data set
+<last MCE> |
+<last set of records> /
+<0 byte read>
+...
+
+There may be more than one combination of MCE and corresponding record set
+within one data set and the end of each data set is indicated by a successful
+read with a return value of 0 (0 byte read).
+Any received data must be considered invalid until a complete set was
+read successfully, including the closing 0 byte read. Therefore you should
+always read the complete set into a buffer before processing the data.
+
+The maximum size of a data set can be as large as the size of the
+monitor DCSS, so design the buffer adequately or use dynamic memory allocation.
+The size of the monitor DCSS will be printed into syslog after loading the
+module. You can also use the (Class E privileged) CP command Q NSS MAP to
+list all available segments and information about them.
+
+As with most char devices, error conditions are indicated by returning a
+negative value for the number of bytes read. In this case, the errno variable
+indicates the error condition:
+
+EIO: reply failed, read data is invalid and the application
+ should discard the data read since the last successful read with 0 size.
+EFAULT: copy_to_user failed, read data is invalid and the application should
+ discard the data read since the last successful read with 0 size.
+EAGAIN: occurs on a non-blocking read if there is no data available at the
+ moment. There is no data missing or corrupted, just try again or rather
+ use polling for non-blocking reads.
+EOVERFLOW: message limit reached, the data read since the last successful
+ read with 0 size is valid but subsequent records may be missing.
+
+In the last case (EOVERFLOW) there may be missing data, in the first two cases
+(EIO, EFAULT) there will be missing data. It's up to the application if it will
+continue reading subsequent data or rather exit.
+
+Open:
+-----
+Only one user is allowed to open the char device. If it is already in use, the
+open function will fail (return a negative value) and set errno to EBUSY.
+The open function may also fail if an IUCV connection to the *MONITOR service
+cannot be established. In this case errno will be set to EIO and an error
+message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
+codes are described in the "z/VM Performance" book, Appendix A.
+
+NOTE:
+-----
+As soon as the device is opened, incoming messages will be accepted and they
+will account for the message limit, i.e. opening the device without reading
+from it will provoke the "message limit reached" error (EOVERFLOW error code)
+eventually.
+
diff --git a/Documentation/s390/qeth.txt b/Documentation/s390/qeth.txt
new file mode 100644
index 000000000..74122ada9
--- /dev/null
+++ b/Documentation/s390/qeth.txt
@@ -0,0 +1,50 @@
+IBM s390 QDIO Ethernet Driver
+
+HiperSockets Bridge Port Support
+
+Uevents
+
+To generate the events the device must be assigned a role of either
+a primary or a secondary Bridge Port. For more information, see
+"z/VM Connectivity, SC24-6174".
+
+When run on HiperSockets Bridge Capable Port hardware, and the state
+of some configured Bridge Port device on the channel changes, a udev
+event with ACTION=CHANGE is emitted on behalf of the corresponding
+ccwgroup device. The event has the following attributes:
+
+BRIDGEPORT=statechange - indicates that the Bridge Port device changed
+ its state.
+
+ROLE={primary|secondary|none} - the role assigned to the port.
+
+STATE={active|standby|inactive} - the newly assumed state of the port.
+
+When run on HiperSockets Bridge Capable Port hardware with host address
+notifications enabled, a udev event with ACTION=CHANGE is emitted.
+It is emitted on behalf of the corresponding ccwgroup device when a host
+or a VLAN is registered or unregistered on the network served by the device.
+The event has the following attributes:
+
+BRIDGEDHOST={reset|register|deregister|abort} - host address
+ notifications are started afresh, a new host or VLAN is registered or
+ deregistered on the Bridge Port HiperSockets channel, or address
+ notifications are aborted.
+
+VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included
+ if no VLAN is involved in the event.
+
+MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered
+ or deregistered from the HiperSockets channel. Not reported if the
+ event reports the creation or destruction of a VLAN.
+
+NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number).
+
+NTOK_IID=xx - device IID.
+
+NTOK_CHPID=xx - device CHPID.
+
+NTOK_CHID=xxxx - device channel ID.
+
+Note that the NTOK_* attributes refer to devices other than the one
+connected to the system on which the OS is running.
diff --git a/Documentation/s390/s390dbf.txt b/Documentation/s390/s390dbf.txt
new file mode 100644
index 000000000..3da163383
--- /dev/null
+++ b/Documentation/s390/s390dbf.txt
@@ -0,0 +1,667 @@
+S390 Debug Feature
+==================
+
+files: arch/s390/kernel/debug.c
+ arch/s390/include/asm/debug.h
+
+Description:
+------------
+The goal of this feature is to provide a kernel debug logging API
+where log records can be stored efficiently in memory, where each component
+(e.g. device drivers) can have one separate debug log.
+One purpose of this is to inspect the debug logs after a production system crash
+in order to analyze the reason for the crash.
+If the system still runs but only a subcomponent which uses dbf fails,
+it is possible to look at the debug logs on a live system via the Linux
+debugfs filesystem.
+The debug feature may also very useful for kernel and driver development.
+
+Design:
+-------
+Kernel components (e.g. device drivers) can register themselves at the debug
+feature with the function call debug_register(). This function initializes a
+debug log for the caller. For each debug log exists a number of debug areas
+where exactly one is active at one time. Each debug area consists of contiguous
+pages in memory. In the debug areas there are stored debug entries (log records)
+which are written by event- and exception-calls.
+
+An event-call writes the specified debug entry to the active debug
+area and updates the log pointer for the active area. If the end
+of the active debug area is reached, a wrap around is done (ring buffer)
+and the next debug entry will be written at the beginning of the active
+debug area.
+
+An exception-call writes the specified debug entry to the log and
+switches to the next debug area. This is done in order to be sure
+that the records which describe the origin of the exception are not
+overwritten when a wrap around for the current area occurs.
+
+The debug areas themselves are also ordered in form of a ring buffer.
+When an exception is thrown in the last debug area, the following debug
+entries are then written again in the very first area.
+
+There are three versions for the event- and exception-calls: One for
+logging raw data, one for text and one for numbers.
+
+Each debug entry contains the following data:
+
+- Timestamp
+- Cpu-Number of calling task
+- Level of debug entry (0...6)
+- Return Address to caller
+- Flag, if entry is an exception or not
+
+The debug logs can be inspected in a live system through entries in
+the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
+a directory for each registered component, which is named like the
+corresponding component. The debugfs normally should be mounted to
+/sys/kernel/debug therefore the debug feature can be accessed under
+/sys/kernel/debug/s390dbf.
+
+The content of the directories are files which represent different views
+to the debug log. Each component can decide which views should be
+used through registering them with the function debug_register_view().
+Predefined views for hex/ascii, sprintf and raw binary data are provided.
+It is also possible to define other views. The content of
+a view can be inspected simply by reading the corresponding debugfs file.
+
+All debug logs have an actual debug level (range from 0 to 6).
+The default level is 3. Event and Exception functions have a 'level'
+parameter. Only debug entries with a level that is lower or equal
+than the actual level are written to the log. This means, when
+writing events, high priority log entries should have a low level
+value whereas low priority entries should have a high one.
+The actual debug level can be changed with the help of the debugfs-filesystem
+through writing a number string "x" to the 'level' debugfs file which is
+provided for every debug log. Debugging can be switched off completely
+by using "-" on the 'level' debugfs file.
+
+Example:
+
+> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
+
+It is also possible to deactivate the debug feature globally for every
+debug log. You can change the behavior using 2 sysctl parameters in
+/proc/sys/s390dbf:
+There are currently 2 possible triggers, which stop the debug feature
+globally. The first possibility is to use the "debug_active" sysctl. If
+set to 1 the debug feature is running. If "debug_active" is set to 0 the
+debug feature is turned off.
+The second trigger which stops the debug feature is a kernel oops.
+That prevents the debug feature from overwriting debug information that
+happened before the oops. After an oops you can reactivate the debug feature
+by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
+suggested to use an oopsed kernel in a production environment.
+If you want to disallow the deactivation of the debug feature, you can use
+the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
+feature cannot be stopped. If the debug feature is already stopped, it
+will stay deactivated.
+
+Kernel Interfaces:
+------------------
+
+----------------------------------------------------------------------------
+debug_info_t *debug_register(char *name, int pages, int nr_areas,
+ int buf_size);
+
+Parameter: name: Name of debug log (e.g. used for debugfs entry)
+ pages: number of pages, which will be allocated per area
+ nr_areas: number of debug areas
+ buf_size: size of data area in each debug entry
+
+Return Value: Handle for generated debug area
+ NULL if register failed
+
+Description: Allocates memory for a debug log
+ Must not be called within an interrupt handler
+
+----------------------------------------------------------------------------
+debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
+ int buf_size, mode_t mode, uid_t uid,
+ gid_t gid);
+
+Parameter: name: Name of debug log (e.g. used for debugfs entry)
+ pages: Number of pages, which will be allocated per area
+ nr_areas: Number of debug areas
+ buf_size: Size of data area in each debug entry
+ mode: File mode for debugfs files. E.g. S_IRWXUGO
+ uid: User ID for debugfs files. Currently only 0 is
+ supported.
+ gid: Group ID for debugfs files. Currently only 0 is
+ supported.
+
+Return Value: Handle for generated debug area
+ NULL if register failed
+
+Description: Allocates memory for a debug log
+ Must not be called within an interrupt handler
+
+---------------------------------------------------------------------------
+void debug_unregister (debug_info_t * id);
+
+Parameter: id: handle for debug log
+
+Return Value: none
+
+Description: frees memory for a debug log and removes all registered debug
+ views.
+ Must not be called within an interrupt handler
+
+---------------------------------------------------------------------------
+void debug_set_level (debug_info_t * id, int new_level);
+
+Parameter: id: handle for debug log
+ new_level: new debug level
+
+Return Value: none
+
+Description: Sets new actual debug level if new_level is valid.
+
+---------------------------------------------------------------------------
+bool debug_level_enabled (debug_info_t * id, int level);
+
+Parameter: id: handle for debug log
+ level: debug level
+
+Return Value: True if level is less or equal to the current debug level.
+
+Description: Returns true if debug events for the specified level would be
+ logged. Otherwise returns false.
+---------------------------------------------------------------------------
+void debug_stop_all(void);
+
+Parameter: none
+
+Return Value: none
+
+Description: stops the debug feature if stopping is allowed. Currently
+ used in case of a kernel oops.
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
+ int length);
+
+Parameter: id: handle for debug log
+ level: debug level
+ data: pointer to data for debug entry
+ length: length of data in bytes
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry to active debug area (if level <= actual
+ debug level)
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_int_event (debug_info_t * id, int level,
+ unsigned int data);
+debug_entry_t* debug_long_event(debug_info_t * id, int level,
+ unsigned long data);
+
+Parameter: id: handle for debug log
+ level: debug level
+ data: integer value for debug entry
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry to active debug area (if level <= actual
+ debug level)
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_text_event (debug_info_t * id, int level,
+ const char* data);
+
+Parameter: id: handle for debug log
+ level: debug level
+ data: string for debug entry
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry in ascii format to active debug area
+ (if level <= actual debug level)
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
+ char* string,...);
+
+Parameter: id: handle for debug log
+ level: debug level
+ string: format string for debug entry
+ ...: varargs used as in sprintf()
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry with format string and varargs (longs) to
+ active debug area (if level $<=$ actual debug level).
+ floats and long long datatypes cannot be used as varargs.
+
+---------------------------------------------------------------------------
+
+debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
+ int length);
+
+Parameter: id: handle for debug log
+ level: debug level
+ data: pointer to data for debug entry
+ length: length of data in bytes
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry to active debug area (if level <= actual
+ debug level) and switches to next debug area
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_int_exception (debug_info_t * id, int level,
+ unsigned int data);
+debug_entry_t* debug_long_exception(debug_info_t * id, int level,
+ unsigned long data);
+
+Parameter: id: handle for debug log
+ level: debug level
+ data: integer value for debug entry
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry to active debug area (if level <= actual
+ debug level) and switches to next debug area
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_text_exception (debug_info_t * id, int level,
+ const char* data);
+
+Parameter: id: handle for debug log
+ level: debug level
+ data: string for debug entry
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry in ascii format to active debug area
+ (if level <= actual debug level) and switches to next debug
+ area
+
+---------------------------------------------------------------------------
+debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
+ char* string,...);
+
+Parameter: id: handle for debug log
+ level: debug level
+ string: format string for debug entry
+ ...: varargs used as in sprintf()
+
+Return Value: Address of written debug entry
+
+Description: writes debug entry with format string and varargs (longs) to
+ active debug area (if level $<=$ actual debug level) and
+ switches to next debug area.
+ floats and long long datatypes cannot be used as varargs.
+
+---------------------------------------------------------------------------
+
+int debug_register_view (debug_info_t * id, struct debug_view *view);
+
+Parameter: id: handle for debug log
+ view: pointer to debug view struct
+
+Return Value: 0 : ok
+ < 0: Error
+
+Description: registers new debug view and creates debugfs dir entry
+
+---------------------------------------------------------------------------
+int debug_unregister_view (debug_info_t * id, struct debug_view *view);
+
+Parameter: id: handle for debug log
+ view: pointer to debug view struct
+
+Return Value: 0 : ok
+ < 0: Error
+
+Description: unregisters debug view and removes debugfs dir entry
+
+
+
+Predefined views:
+-----------------
+
+extern struct debug_view debug_hex_ascii_view;
+extern struct debug_view debug_raw_view;
+extern struct debug_view debug_sprintf_view;
+
+Examples
+--------
+
+/*
+ * hex_ascii- + raw-view Example
+ */
+
+#include <linux/init.h>
+#include <asm/debug.h>
+
+static debug_info_t* debug_info;
+
+static int init(void)
+{
+ /* register 4 debug areas with one page each and 4 byte data field */
+
+ debug_info = debug_register ("test", 1, 4, 4 );
+ debug_register_view(debug_info,&debug_hex_ascii_view);
+ debug_register_view(debug_info,&debug_raw_view);
+
+ debug_text_event(debug_info, 4 , "one ");
+ debug_int_exception(debug_info, 4, 4711);
+ debug_event(debug_info, 3, &debug_info, 4);
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ debug_unregister (debug_info);
+}
+
+module_init(init);
+module_exit(cleanup);
+
+---------------------------------------------------------------------------
+
+/*
+ * sprintf-view Example
+ */
+
+#include <linux/init.h>
+#include <asm/debug.h>
+
+static debug_info_t* debug_info;
+
+static int init(void)
+{
+ /* register 4 debug areas with one page each and data field for */
+ /* format string pointer + 2 varargs (= 3 * sizeof(long)) */
+
+ debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
+ debug_register_view(debug_info,&debug_sprintf_view);
+
+ debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
+ debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ debug_unregister (debug_info);
+}
+
+module_init(init);
+module_exit(cleanup);
+
+
+
+Debugfs Interface
+----------------
+Views to the debug logs can be investigated through reading the corresponding
+debugfs-files:
+
+Example:
+
+> ls /sys/kernel/debug/s390dbf/dasd
+flush hex_ascii level pages raw
+> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort +1
+00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
+00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
+00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
+00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
+01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
+01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
+01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
+01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
+01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
+01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
+
+See section about predefined views for explanation of the above output!
+
+Changing the debug level
+------------------------
+
+Example:
+
+
+> cat /sys/kernel/debug/s390dbf/dasd/level
+3
+> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
+> cat /sys/kernel/debug/s390dbf/dasd/level
+5
+
+Flushing debug areas
+--------------------
+Debug areas can be flushed with piping the number of the desired
+area (0...n) to the debugfs file "flush". When using "-" all debug areas
+are flushed.
+
+Examples:
+
+1. Flush debug area 0:
+> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
+
+2. Flush all debug areas:
+> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
+
+Changing the size of debug areas
+------------------------------------
+It is possible the change the size of debug areas through piping
+the number of pages to the debugfs file "pages". The resize request will
+also flush the debug areas.
+
+Example:
+
+Define 4 pages for the debug areas of debug feature "dasd":
+> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
+
+Stooping the debug feature
+--------------------------
+Example:
+
+1. Check if stopping is allowed
+> cat /proc/sys/s390dbf/debug_stoppable
+2. Stop debug feature
+> echo 0 > /proc/sys/s390dbf/debug_active
+
+lcrash Interface
+----------------
+It is planned that the dump analysis tool lcrash gets an additional command
+'s390dbf' to display all the debug logs. With this tool it will be possible
+to investigate the debug logs on a live system and with a memory dump after
+a system crash.
+
+Investigating raw memory
+------------------------
+One last possibility to investigate the debug logs at a live
+system and after a system crash is to look at the raw memory
+under VM or at the Service Element.
+It is possible to find the anker of the debug-logs through
+the 'debug_area_first' symbol in the System map. Then one has
+to follow the correct pointers of the data-structures defined
+in debug.h and find the debug-areas in memory.
+Normally modules which use the debug feature will also have
+a global variable with the pointer to the debug-logs. Following
+this pointer it will also be possible to find the debug logs in
+memory.
+
+For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
+for the length of the data field in debug_register() in
+order to see the debug entries well formatted.
+
+
+Predefined Views
+----------------
+
+There are three predefined views: hex_ascii, raw and sprintf.
+The hex_ascii view shows the data field in hex and ascii representation
+(e.g. '45 43 4b 44 | ECKD').
+The raw view returns a bytestream as the debug areas are stored in memory.
+
+The sprintf view formats the debug entries in the same way as the sprintf
+function would do. The sprintf event/exception functions write to the
+debug entry a pointer to the format string (size = sizeof(long))
+and for each vararg a long value. So e.g. for a debug entry with a format
+string plus two varargs one would need to allocate a (3 * sizeof(long))
+byte data area in the debug_register() function.
+
+IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
+use "%s" in the sprintf event functions, if the memory for the passed string is
+available as long as the debug feature exists. The reason behind this is that
+due to performance considerations only a pointer to the string is stored in
+the debug feature. If you log a string that is freed afterwards, you will get
+an OOPS when inspecting the debug feature, because then the debug feature will
+access the already freed memory.
+
+NOTE: If using the sprintf view do NOT use other event/exception functions
+than the sprintf-event and -exception functions.
+
+The format of the hex_ascii and sprintf view is as follows:
+- Number of area
+- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
+ Universal Time (UTC), January 1, 1970)
+- level of debug entry
+- Exception flag (* = Exception)
+- Cpu-Number of calling task
+- Return Address to caller
+- data field
+
+The format of the raw view is:
+- Header as described in debug.h
+- datafield
+
+A typical line of the hex_ascii view will look like the following (first line
+is only for explanation and will not be displayed when 'cating' the view):
+
+area time level exception cpu caller data (hex + ascii)
+--------------------------------------------------------------------------
+00 00964419409:440690 1 - 00 88023fe
+
+
+Defining views
+--------------
+
+Views are specified with the 'debug_view' structure. There are defined
+callback functions which are used for reading and writing the debugfs files:
+
+struct debug_view {
+ char name[DEBUG_MAX_PROCF_LEN];
+ debug_prolog_proc_t* prolog_proc;
+ debug_header_proc_t* header_proc;
+ debug_format_proc_t* format_proc;
+ debug_input_proc_t* input_proc;
+ void* private_data;
+};
+
+where
+
+typedef int (debug_header_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ int area,
+ debug_entry_t* entry,
+ char* out_buf);
+
+typedef int (debug_format_proc_t) (debug_info_t* id,
+ struct debug_view* view, char* out_buf,
+ const char* in_buf);
+typedef int (debug_prolog_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ char* out_buf);
+typedef int (debug_input_proc_t) (debug_info_t* id,
+ struct debug_view* view,
+ struct file* file, const char* user_buf,
+ size_t in_buf_size, loff_t* offset);
+
+
+The "private_data" member can be used as pointer to view specific data.
+It is not used by the debug feature itself.
+
+The output when reading a debugfs file is structured like this:
+
+"prolog_proc output"
+
+"header_proc output 1" "format_proc output 1"
+"header_proc output 2" "format_proc output 2"
+"header_proc output 3" "format_proc output 3"
+...
+
+When a view is read from the debugfs, the Debug Feature calls the
+'prolog_proc' once for writing the prolog.
+Then 'header_proc' and 'format_proc' are called for each
+existing debug entry.
+
+The input_proc can be used to implement functionality when it is written to
+the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
+
+For header_proc there can be used the default function
+debug_dflt_header_fn() which is defined in debug.h.
+and which produces the same header output as the predefined views.
+E.g:
+00 00964419409:440761 2 - 00 88023ec
+
+In order to see how to use the callback functions check the implementation
+of the default views!
+
+Example
+
+#include <asm/debug.h>
+
+#define UNKNOWNSTR "data: %08x"
+
+const char* messages[] =
+{"This error...........\n",
+ "That error...........\n",
+ "Problem..............\n",
+ "Something went wrong.\n",
+ "Everything ok........\n",
+ NULL
+};
+
+static int debug_test_format_fn(
+ debug_info_t * id, struct debug_view *view,
+ char *out_buf, const char *in_buf
+)
+{
+ int i, rc = 0;
+
+ if(id->buf_size >= 4) {
+ int msg_nr = *((int*)in_buf);
+ if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
+ rc += sprintf(out_buf, "%s", messages[msg_nr]);
+ else
+ rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
+ }
+ out:
+ return rc;
+}
+
+struct debug_view debug_test_view = {
+ "myview", /* name of view */
+ NULL, /* no prolog */
+ &debug_dflt_header_fn, /* default header for each entry */
+ &debug_test_format_fn, /* our own format function */
+ NULL, /* no input function */
+ NULL /* no private data */
+};
+
+=====
+test:
+=====
+debug_info_t *debug_info;
+...
+debug_info = debug_register ("test", 0, 4, 4 ));
+debug_register_view(debug_info, &debug_test_view);
+for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
+
+> cat /sys/kernel/debug/s390dbf/test/myview
+00 00964419734:611402 1 - 00 88042ca This error...........
+00 00964419734:611405 1 - 00 88042ca That error...........
+00 00964419734:611408 1 - 00 88042ca Problem..............
+00 00964419734:611411 1 - 00 88042ca Something went wrong.
+00 00964419734:611414 1 - 00 88042ca Everything ok........
+00 00964419734:611417 1 - 00 88042ca data: 00000005
+00 00964419734:611419 1 - 00 88042ca data: 00000006
+00 00964419734:611422 1 - 00 88042ca data: 00000007
+00 00964419734:611425 1 - 00 88042ca data: 00000008
+00 00964419734:611428 1 - 00 88042ca data: 00000009
diff --git a/Documentation/s390/zfcpdump.txt b/Documentation/s390/zfcpdump.txt
new file mode 100644
index 000000000..dc929be96
--- /dev/null
+++ b/Documentation/s390/zfcpdump.txt
@@ -0,0 +1,52 @@
+The s390 SCSI dump tool (zfcpdump)
+
+System z machines (z900 or higher) provide hardware support for creating system
+dumps on SCSI disks. The dump process is initiated by booting a dump tool, which
+has to create a dump of the current (probably crashed) Linux image. In order to
+not overwrite memory of the crashed Linux with data of the dump tool, the
+hardware saves some memory plus the register sets of the boot CPU before the
+dump tool is loaded. There exists an SCLP hardware interface to obtain the saved
+memory afterwards. Currently 32 MB are saved.
+
+This zfcpdump implementation consists of a Linux dump kernel together with
+a user space dump tool, which are loaded together into the saved memory region
+below 32 MB. zfcpdump is installed on a SCSI disk using zipl (as contained in
+the s390-tools package) to make the device bootable. The operator of a Linux
+system can then trigger a SCSI dump by booting the SCSI disk, where zfcpdump
+resides on.
+
+The kernel part of zfcpdump is implemented as a debugfs file under "zcore/mem",
+which exports memory and registers of the crashed Linux in an s390
+standalone dump format. It can be used in the same way as e.g. /dev/mem. The
+dump format defines a 4K header followed by plain uncompressed memory. The
+register sets are stored in the prefix pages of the respective CPUs. To build a
+dump enabled kernel with the zcore driver, the kernel config option
+CONFIG_CRASH_DUMP has to be set. When reading from "zcore/mem", the part of
+memory, which has been saved by hardware is read by the driver via the SCLP
+hardware interface. The second part is just copied from the non overwritten real
+memory.
+
+Since kernel version 3.12 also the /proc/vmcore file can also be used to access
+the dump.
+
+To get a valid zfcpdump kernel configuration use "make zfcpdump_defconfig".
+
+The s390 zipl tool looks for the zfcpdump kernel and optional initrd/initramfs
+under the following locations:
+
+* kernel: <zfcpdump directory>/zfcpdump.image
+* ramdisk: <zfcpdump directory>/zfcpdump.rd
+
+The zfcpdump directory is defined in the s390-tools package.
+
+The user space application of zfcpdump can reside in an intitramfs or an
+initrd. It can also be included in a built-in kernel initramfs. The application
+reads from /proc/vmcore or zcore/mem and writes the system dump to a SCSI disk.
+
+The s390-tools package version 1.24.0 and above builds an external zfcpdump
+initramfs with a user space application that writes the dump to a SCSI
+partition.
+
+For more information on how to use zfcpdump refer to the s390 'Using the Dump
+Tools book', which is available from
+http://www.ibm.com/developerworks/linux/linux390.
diff --git a/Documentation/scheduler/00-INDEX b/Documentation/scheduler/00-INDEX
new file mode 100644
index 000000000..eccf7ad2e
--- /dev/null
+++ b/Documentation/scheduler/00-INDEX
@@ -0,0 +1,18 @@
+00-INDEX
+ - this file.
+sched-arch.txt
+ - CPU Scheduler implementation hints for architecture specific code.
+sched-bwc.txt
+ - CFS bandwidth control overview.
+sched-design-CFS.txt
+ - goals, design and implementation of the Completely Fair Scheduler.
+sched-domains.txt
+ - information on scheduling domains.
+sched-nice-design.txt
+ - How and why the scheduler's nice levels are implemented.
+sched-rt-group.txt
+ - real-time group scheduling.
+sched-deadline.txt
+ - deadline scheduling.
+sched-stats.txt
+ - information on schedstats (Linux Scheduler Statistics).
diff --git a/Documentation/scheduler/completion.txt b/Documentation/scheduler/completion.txt
new file mode 100644
index 000000000..2622bc7a1
--- /dev/null
+++ b/Documentation/scheduler/completion.txt
@@ -0,0 +1,248 @@
+completions - wait for completion handling
+==========================================
+
+This document was originally written based on 3.18.0 (linux-next)
+
+Introduction:
+-------------
+
+If you have one or more threads of execution that must wait for some process
+to have reached a point or a specific state, completions can provide a
+race-free solution to this problem. Semantically they are somewhat like a
+pthread_barrier and have similar use-cases.
+
+Completions are a code synchronization mechanism which is preferable to any
+misuse of locks. Any time you think of using yield() or some quirky
+msleep(1) loop to allow something else to proceed, you probably want to
+look into using one of the wait_for_completion*() calls instead. The
+advantage of using completions is clear intent of the code, but also more
+efficient code as both threads can continue until the result is actually
+needed.
+
+Completions are built on top of the generic event infrastructure in Linux,
+with the event reduced to a simple flag (appropriately called "done") in
+struct completion that tells the waiting threads of execution if they
+can continue safely.
+
+As completions are scheduling related, the code is found in
+kernel/sched/completion.c - for details on completion design and
+implementation see completions-design.txt
+
+
+Usage:
+------
+
+There are three parts to using completions, the initialization of the
+struct completion, the waiting part through a call to one of the variants of
+wait_for_completion() and the signaling side through a call to complete()
+or complete_all(). Further there are some helper functions for checking the
+state of completions.
+
+To use completions one needs to include <linux/completion.h> and
+create a variable of type struct completion. The structure used for
+handling of completions is:
+
+ struct completion {
+ unsigned int done;
+ wait_queue_head_t wait;
+ };
+
+providing the wait queue to place tasks on for waiting and the flag for
+indicating the state of affairs.
+
+Completions should be named to convey the intent of the waiter. A good
+example is:
+
+ wait_for_completion(&early_console_added);
+
+ complete(&early_console_added);
+
+Good naming (as always) helps code readability.
+
+
+Initializing completions:
+-------------------------
+
+Initialization of dynamically allocated completions, often embedded in
+other structures, is done with:
+
+ void init_completion(&done);
+
+Initialization is accomplished by initializing the wait queue and setting
+the default state to "not available", that is, "done" is set to 0.
+
+The re-initialization function, reinit_completion(), simply resets the
+done element to "not available", thus again to 0, without touching the
+wait queue. Calling init_completion() twice on the same completion object is
+most likely a bug as it re-initializes the queue to an empty queue and
+enqueued tasks could get "lost" - use reinit_completion() in that case.
+
+For static declaration and initialization, macros are available. These are:
+
+ static DECLARE_COMPLETION(setup_done)
+
+used for static declarations in file scope. Within functions the static
+initialization should always use:
+
+ DECLARE_COMPLETION_ONSTACK(setup_done)
+
+suitable for automatic/local variables on the stack and will make lockdep
+happy. Note also that one needs to make *sure* the completion passed to
+work threads remains in-scope, and no references remain to on-stack data
+when the initiating function returns.
+
+Using on-stack completions for code that calls any of the _timeout or
+_interruptible/_killable variants is not advisable as they will require
+additional synchronization to prevent the on-stack completion object in
+the timeout/signal cases from going out of scope. Consider using dynamically
+allocated completions when intending to use the _interruptible/_killable
+or _timeout variants of wait_for_completion().
+
+
+Waiting for completions:
+------------------------
+
+For a thread of execution to wait for some concurrent work to finish, it
+calls wait_for_completion() on the initialized completion structure.
+A typical usage scenario is:
+
+ struct completion setup_done;
+ init_completion(&setup_done);
+ initialize_work(...,&setup_done,...)
+
+ /* run non-dependent code */ /* do setup */
+
+ wait_for_completion(&setup_done); complete(setup_done)
+
+This is not implying any temporal order on wait_for_completion() and the
+call to complete() - if the call to complete() happened before the call
+to wait_for_completion() then the waiting side simply will continue
+immediately as all dependencies are satisfied if not it will block until
+completion is signaled by complete().
+
+Note that wait_for_completion() is calling spin_lock_irq()/spin_unlock_irq(),
+so it can only be called safely when you know that interrupts are enabled.
+Calling it from hard-irq or irqs-off atomic contexts will result in
+hard-to-detect spurious enabling of interrupts.
+
+wait_for_completion():
+
+ void wait_for_completion(struct completion *done):
+
+The default behavior is to wait without a timeout and to mark the task as
+uninterruptible. wait_for_completion() and its variants are only safe
+in process context (as they can sleep) but not in atomic context,
+interrupt context, with disabled irqs. or preemption is disabled - see also
+try_wait_for_completion() below for handling completion in atomic/interrupt
+context.
+
+As all variants of wait_for_completion() can (obviously) block for a long
+time, you probably don't want to call this with held mutexes.
+
+
+Variants available:
+-------------------
+
+The below variants all return status and this status should be checked in
+most(/all) cases - in cases where the status is deliberately not checked you
+probably want to make a note explaining this (e.g. see
+arch/arm/kernel/smp.c:__cpu_up()).
+
+A common problem that occurs is to have unclean assignment of return types,
+so care should be taken with assigning return-values to variables of proper
+type. Checking for the specific meaning of return values also has been found
+to be quite inaccurate e.g. constructs like
+if (!wait_for_completion_interruptible_timeout(...)) would execute the same
+code path for successful completion and for the interrupted case - which is
+probably not what you want.
+
+ int wait_for_completion_interruptible(struct completion *done)
+
+This function marks the task TASK_INTERRUPTIBLE. If a signal was received
+while waiting it will return -ERESTARTSYS; 0 otherwise.
+
+ unsigned long wait_for_completion_timeout(struct completion *done,
+ unsigned long timeout)
+
+The task is marked as TASK_UNINTERRUPTIBLE and will wait at most 'timeout'
+(in jiffies). If timeout occurs it returns 0 else the remaining time in
+jiffies (but at least 1). Timeouts are preferably calculated with
+msecs_to_jiffies() or usecs_to_jiffies(). If the returned timeout value is
+deliberately ignored a comment should probably explain why (e.g. see
+drivers/mfd/wm8350-core.c wm8350_read_auxadc())
+
+ long wait_for_completion_interruptible_timeout(
+ struct completion *done, unsigned long timeout)
+
+This function passes a timeout in jiffies and marks the task as
+TASK_INTERRUPTIBLE. If a signal was received it will return -ERESTARTSYS;
+otherwise it returns 0 if the completion timed out or the remaining time in
+jiffies if completion occurred.
+
+Further variants include _killable which uses TASK_KILLABLE as the
+designated tasks state and will return -ERESTARTSYS if it is interrupted or
+else 0 if completion was achieved. There is a _timeout variant as well:
+
+ long wait_for_completion_killable(struct completion *done)
+ long wait_for_completion_killable_timeout(struct completion *done,
+ unsigned long timeout)
+
+The _io variants wait_for_completion_io() behave the same as the non-_io
+variants, except for accounting waiting time as waiting on IO, which has
+an impact on how the task is accounted in scheduling stats.
+
+ void wait_for_completion_io(struct completion *done)
+ unsigned long wait_for_completion_io_timeout(struct completion *done
+ unsigned long timeout)
+
+
+Signaling completions:
+----------------------
+
+A thread that wants to signal that the conditions for continuation have been
+achieved calls complete() to signal exactly one of the waiters that it can
+continue.
+
+ void complete(struct completion *done)
+
+or calls complete_all() to signal all current and future waiters.
+
+ void complete_all(struct completion *done)
+
+The signaling will work as expected even if completions are signaled before
+a thread starts waiting. This is achieved by the waiter "consuming"
+(decrementing) the done element of struct completion. Waiting threads
+wakeup order is the same in which they were enqueued (FIFO order).
+
+If complete() is called multiple times then this will allow for that number
+of waiters to continue - each call to complete() will simply increment the
+done element. Calling complete_all() multiple times is a bug though. Both
+complete() and complete_all() can be called in hard-irq/atomic context safely.
+
+There only can be one thread calling complete() or complete_all() on a
+particular struct completion at any time - serialized through the wait
+queue spinlock. Any such concurrent calls to complete() or complete_all()
+probably are a design bug.
+
+Signaling completion from hard-irq context is fine as it will appropriately
+lock with spin_lock_irqsave/spin_unlock_irqrestore and it will never sleep.
+
+
+try_wait_for_completion()/completion_done():
+--------------------------------------------
+
+The try_wait_for_completion() function will not put the thread on the wait
+queue but rather returns false if it would need to enqueue (block) the thread,
+else it consumes one posted completion and returns true.
+
+ bool try_wait_for_completion(struct completion *done)
+
+Finally, to check the state of a completion without changing it in any way,
+call completion_done(), which returns false if there are no posted
+completions that were not yet consumed by waiters (implying that there are
+waiters) and true otherwise;
+
+ bool completion_done(struct completion *done)
+
+Both try_wait_for_completion() and completion_done() are safe to be called in
+hard-irq or atomic context.
diff --git a/Documentation/scheduler/sched-BFS.txt b/Documentation/scheduler/sched-BFS.txt
new file mode 100644
index 000000000..c10d95601
--- /dev/null
+++ b/Documentation/scheduler/sched-BFS.txt
@@ -0,0 +1,347 @@
+BFS - The Brain Fuck Scheduler by Con Kolivas.
+
+Goals.
+
+The goal of the Brain Fuck Scheduler, referred to as BFS from here on, is to
+completely do away with the complex designs of the past for the cpu process
+scheduler and instead implement one that is very simple in basic design.
+The main focus of BFS is to achieve excellent desktop interactivity and
+responsiveness without heuristics and tuning knobs that are difficult to
+understand, impossible to model and predict the effect of, and when tuned to
+one workload cause massive detriment to another.
+
+
+Design summary.
+
+BFS is best described as a single runqueue, O(n) lookup, earliest effective
+virtual deadline first design, loosely based on EEVDF (earliest eligible virtual
+deadline first) and my previous Staircase Deadline scheduler. Each component
+shall be described in order to understand the significance of, and reasoning for
+it. The codebase when the first stable version was released was approximately
+9000 lines less code than the existing mainline linux kernel scheduler (in
+2.6.31). This does not even take into account the removal of documentation and
+the cgroups code that is not used.
+
+Design reasoning.
+
+The single runqueue refers to the queued but not running processes for the
+entire system, regardless of the number of CPUs. The reason for going back to
+a single runqueue design is that once multiple runqueues are introduced,
+per-CPU or otherwise, there will be complex interactions as each runqueue will
+be responsible for the scheduling latency and fairness of the tasks only on its
+own runqueue, and to achieve fairness and low latency across multiple CPUs, any
+advantage in throughput of having CPU local tasks causes other disadvantages.
+This is due to requiring a very complex balancing system to at best achieve some
+semblance of fairness across CPUs and can only maintain relatively low latency
+for tasks bound to the same CPUs, not across them. To increase said fairness
+and latency across CPUs, the advantage of local runqueue locking, which makes
+for better scalability, is lost due to having to grab multiple locks.
+
+A significant feature of BFS is that all accounting is done purely based on CPU
+used and nowhere is sleep time used in any way to determine entitlement or
+interactivity. Interactivity "estimators" that use some kind of sleep/run
+algorithm are doomed to fail to detect all interactive tasks, and to falsely tag
+tasks that aren't interactive as being so. The reason for this is that it is
+close to impossible to determine that when a task is sleeping, whether it is
+doing it voluntarily, as in a userspace application waiting for input in the
+form of a mouse click or otherwise, or involuntarily, because it is waiting for
+another thread, process, I/O, kernel activity or whatever. Thus, such an
+estimator will introduce corner cases, and more heuristics will be required to
+cope with those corner cases, introducing more corner cases and failed
+interactivity detection and so on. Interactivity in BFS is built into the design
+by virtue of the fact that tasks that are waking up have not used up their quota
+of CPU time, and have earlier effective deadlines, thereby making it very likely
+they will preempt any CPU bound task of equivalent nice level. See below for
+more information on the virtual deadline mechanism. Even if they do not preempt
+a running task, because the rr interval is guaranteed to have a bound upper
+limit on how long a task will wait for, it will be scheduled within a timeframe
+that will not cause visible interface jitter.
+
+
+Design details.
+
+Task insertion.
+
+BFS inserts tasks into each relevant queue as an O(1) insertion into a double
+linked list. On insertion, *every* running queue is checked to see if the newly
+queued task can run on any idle queue, or preempt the lowest running task on the
+system. This is how the cross-CPU scheduling of BFS achieves significantly lower
+latency per extra CPU the system has. In this case the lookup is, in the worst
+case scenario, O(n) where n is the number of CPUs on the system.
+
+Data protection.
+
+BFS has one single lock protecting the process local data of every task in the
+global queue. Thus every insertion, removal and modification of task data in the
+global runqueue needs to grab the global lock. However, once a task is taken by
+a CPU, the CPU has its own local data copy of the running process' accounting
+information which only that CPU accesses and modifies (such as during a
+timer tick) thus allowing the accounting data to be updated lockless. Once a
+CPU has taken a task to run, it removes it from the global queue. Thus the
+global queue only ever has, at most,
+
+ (number of tasks requesting cpu time) - (number of logical CPUs) + 1
+
+tasks in the global queue. This value is relevant for the time taken to look up
+tasks during scheduling. This will increase if many tasks with CPU affinity set
+in their policy to limit which CPUs they're allowed to run on if they outnumber
+the number of CPUs. The +1 is because when rescheduling a task, the CPU's
+currently running task is put back on the queue. Lookup will be described after
+the virtual deadline mechanism is explained.
+
+Virtual deadline.
+
+The key to achieving low latency, scheduling fairness, and "nice level"
+distribution in BFS is entirely in the virtual deadline mechanism. The one
+tunable in BFS is the rr_interval, or "round robin interval". This is the
+maximum time two SCHED_OTHER (or SCHED_NORMAL, the common scheduling policy)
+tasks of the same nice level will be running for, or looking at it the other
+way around, the longest duration two tasks of the same nice level will be
+delayed for. When a task requests cpu time, it is given a quota (time_slice)
+equal to the rr_interval and a virtual deadline. The virtual deadline is
+offset from the current time in jiffies by this equation:
+
+ jiffies + (prio_ratio * rr_interval)
+
+The prio_ratio is determined as a ratio compared to the baseline of nice -20
+and increases by 10% per nice level. The deadline is a virtual one only in that
+no guarantee is placed that a task will actually be scheduled by this time, but
+it is used to compare which task should go next. There are three components to
+how a task is next chosen. First is time_slice expiration. If a task runs out
+of its time_slice, it is descheduled, the time_slice is refilled, and the
+deadline reset to that formula above. Second is sleep, where a task no longer
+is requesting CPU for whatever reason. The time_slice and deadline are _not_
+adjusted in this case and are just carried over for when the task is next
+scheduled. Third is preemption, and that is when a newly waking task is deemed
+higher priority than a currently running task on any cpu by virtue of the fact
+that it has an earlier virtual deadline than the currently running task. The
+earlier deadline is the key to which task is next chosen for the first and
+second cases. Once a task is descheduled, it is put back on the queue, and an
+O(n) lookup of all queued-but-not-running tasks is done to determine which has
+the earliest deadline and that task is chosen to receive CPU next.
+
+The CPU proportion of different nice tasks works out to be approximately the
+
+ (prio_ratio difference)^2
+
+The reason it is squared is that a task's deadline does not change while it is
+running unless it runs out of time_slice. Thus, even if the time actually
+passes the deadline of another task that is queued, it will not get CPU time
+unless the current running task deschedules, and the time "base" (jiffies) is
+constantly moving.
+
+Task lookup.
+
+BFS has 103 priority queues. 100 of these are dedicated to the static priority
+of realtime tasks, and the remaining 3 are, in order of best to worst priority,
+SCHED_ISO (isochronous), SCHED_NORMAL, and SCHED_IDLEPRIO (idle priority
+scheduling). When a task of these priorities is queued, a bitmap of running
+priorities is set showing which of these priorities has tasks waiting for CPU
+time. When a CPU is made to reschedule, the lookup for the next task to get
+CPU time is performed in the following way:
+
+First the bitmap is checked to see what static priority tasks are queued. If
+any realtime priorities are found, the corresponding queue is checked and the
+first task listed there is taken (provided CPU affinity is suitable) and lookup
+is complete. If the priority corresponds to a SCHED_ISO task, they are also
+taken in FIFO order (as they behave like SCHED_RR). If the priority corresponds
+to either SCHED_NORMAL or SCHED_IDLEPRIO, then the lookup becomes O(n). At this
+stage, every task in the runlist that corresponds to that priority is checked
+to see which has the earliest set deadline, and (provided it has suitable CPU
+affinity) it is taken off the runqueue and given the CPU. If a task has an
+expired deadline, it is taken and the rest of the lookup aborted (as they are
+chosen in FIFO order).
+
+Thus, the lookup is O(n) in the worst case only, where n is as described
+earlier, as tasks may be chosen before the whole task list is looked over.
+
+
+Scalability.
+
+The major limitations of BFS will be that of scalability, as the separate
+runqueue designs will have less lock contention as the number of CPUs rises.
+However they do not scale linearly even with separate runqueues as multiple
+runqueues will need to be locked concurrently on such designs to be able to
+achieve fair CPU balancing, to try and achieve some sort of nice-level fairness
+across CPUs, and to achieve low enough latency for tasks on a busy CPU when
+other CPUs would be more suited. BFS has the advantage that it requires no
+balancing algorithm whatsoever, as balancing occurs by proxy simply because
+all CPUs draw off the global runqueue, in priority and deadline order. Despite
+the fact that scalability is _not_ the prime concern of BFS, it both shows very
+good scalability to smaller numbers of CPUs and is likely a more scalable design
+at these numbers of CPUs.
+
+It also has some very low overhead scalability features built into the design
+when it has been deemed their overhead is so marginal that they're worth adding.
+The first is the local copy of the running process' data to the CPU it's running
+on to allow that data to be updated lockless where possible. Then there is
+deference paid to the last CPU a task was running on, by trying that CPU first
+when looking for an idle CPU to use the next time it's scheduled. Finally there
+is the notion of "sticky" tasks that are flagged when they are involuntarily
+descheduled, meaning they still want further CPU time. This sticky flag is
+used to bias heavily against those tasks being scheduled on a different CPU
+unless that CPU would be otherwise idle. When a cpu frequency governor is used
+that scales with CPU load, such as ondemand, sticky tasks are not scheduled
+on a different CPU at all, preferring instead to go idle. This means the CPU
+they were bound to is more likely to increase its speed while the other CPU
+will go idle, thus speeding up total task execution time and likely decreasing
+power usage. This is the only scenario where BFS will allow a CPU to go idle
+in preference to scheduling a task on the earliest available spare CPU.
+
+The real cost of migrating a task from one CPU to another is entirely dependant
+on the cache footprint of the task, how cache intensive the task is, how long
+it's been running on that CPU to take up the bulk of its cache, how big the CPU
+cache is, how fast and how layered the CPU cache is, how fast a context switch
+is... and so on. In other words, it's close to random in the real world where we
+do more than just one sole workload. The only thing we can be sure of is that
+it's not free. So BFS uses the principle that an idle CPU is a wasted CPU and
+utilising idle CPUs is more important than cache locality, and cache locality
+only plays a part after that.
+
+When choosing an idle CPU for a waking task, the cache locality is determined
+according to where the task last ran and then idle CPUs are ranked from best
+to worst to choose the most suitable idle CPU based on cache locality, NUMA
+node locality and hyperthread sibling business. They are chosen in the
+following preference (if idle):
+
+* Same core, idle or busy cache, idle threads
+* Other core, same cache, idle or busy cache, idle threads.
+* Same node, other CPU, idle cache, idle threads.
+* Same node, other CPU, busy cache, idle threads.
+* Same core, busy threads.
+* Other core, same cache, busy threads.
+* Same node, other CPU, busy threads.
+* Other node, other CPU, idle cache, idle threads.
+* Other node, other CPU, busy cache, idle threads.
+* Other node, other CPU, busy threads.
+
+This shows the SMT or "hyperthread" awareness in the design as well which will
+choose a real idle core first before a logical SMT sibling which already has
+tasks on the physical CPU.
+
+Early benchmarking of BFS suggested scalability dropped off at the 16 CPU mark.
+However this benchmarking was performed on an earlier design that was far less
+scalable than the current one so it's hard to know how scalable it is in terms
+of both CPUs (due to the global runqueue) and heavily loaded machines (due to
+O(n) lookup) at this stage. Note that in terms of scalability, the number of
+_logical_ CPUs matters, not the number of _physical_ CPUs. Thus, a dual (2x)
+quad core (4X) hyperthreaded (2X) machine is effectively a 16X. Newer benchmark
+results are very promising indeed, without needing to tweak any knobs, features
+or options. Benchmark contributions are most welcome.
+
+
+Features
+
+As the initial prime target audience for BFS was the average desktop user, it
+was designed to not need tweaking, tuning or have features set to obtain benefit
+from it. Thus the number of knobs and features has been kept to an absolute
+minimum and should not require extra user input for the vast majority of cases.
+There are precisely 2 tunables, and 2 extra scheduling policies. The rr_interval
+and iso_cpu tunables, and the SCHED_ISO and SCHED_IDLEPRIO policies. In addition
+to this, BFS also uses sub-tick accounting. What BFS does _not_ now feature is
+support for CGROUPS. The average user should neither need to know what these
+are, nor should they need to be using them to have good desktop behaviour.
+
+rr_interval
+
+There is only one "scheduler" tunable, the round robin interval. This can be
+accessed in
+
+ /proc/sys/kernel/rr_interval
+
+The value is in milliseconds, and the default value is set to 6ms. Valid values
+are from 1 to 1000. Decreasing the value will decrease latencies at the cost of
+decreasing throughput, while increasing it will improve throughput, but at the
+cost of worsening latencies. The accuracy of the rr interval is limited by HZ
+resolution of the kernel configuration. Thus, the worst case latencies are
+usually slightly higher than this actual value. BFS uses "dithering" to try and
+minimise the effect the Hz limitation has. The default value of 6 is not an
+arbitrary one. It is based on the fact that humans can detect jitter at
+approximately 7ms, so aiming for much lower latencies is pointless under most
+circumstances. It is worth noting this fact when comparing the latency
+performance of BFS to other schedulers. Worst case latencies being higher than
+7ms are far worse than average latencies not being in the microsecond range.
+Experimentation has shown that rr intervals being increased up to 300 can
+improve throughput but beyond that, scheduling noise from elsewhere prevents
+further demonstrable throughput.
+
+Isochronous scheduling.
+
+Isochronous scheduling is a unique scheduling policy designed to provide
+near-real-time performance to unprivileged (ie non-root) users without the
+ability to starve the machine indefinitely. Isochronous tasks (which means
+"same time") are set using, for example, the schedtool application like so:
+
+ schedtool -I -e amarok
+
+This will start the audio application "amarok" as SCHED_ISO. How SCHED_ISO works
+is that it has a priority level between true realtime tasks and SCHED_NORMAL
+which would allow them to preempt all normal tasks, in a SCHED_RR fashion (ie,
+if multiple SCHED_ISO tasks are running, they purely round robin at rr_interval
+rate). However if ISO tasks run for more than a tunable finite amount of time,
+they are then demoted back to SCHED_NORMAL scheduling. This finite amount of
+time is the percentage of _total CPU_ available across the machine, configurable
+as a percentage in the following "resource handling" tunable (as opposed to a
+scheduler tunable):
+
+ /proc/sys/kernel/iso_cpu
+
+and is set to 70% by default. It is calculated over a rolling 5 second average
+Because it is the total CPU available, it means that on a multi CPU machine, it
+is possible to have an ISO task running as realtime scheduling indefinitely on
+just one CPU, as the other CPUs will be available. Setting this to 100 is the
+equivalent of giving all users SCHED_RR access and setting it to 0 removes the
+ability to run any pseudo-realtime tasks.
+
+A feature of BFS is that it detects when an application tries to obtain a
+realtime policy (SCHED_RR or SCHED_FIFO) and the caller does not have the
+appropriate privileges to use those policies. When it detects this, it will
+give the task SCHED_ISO policy instead. Thus it is transparent to the user.
+Because some applications constantly set their policy as well as their nice
+level, there is potential for them to undo the override specified by the user
+on the command line of setting the policy to SCHED_ISO. To counter this, once
+a task has been set to SCHED_ISO policy, it needs superuser privileges to set
+it back to SCHED_NORMAL. This will ensure the task remains ISO and all child
+processes and threads will also inherit the ISO policy.
+
+Idleprio scheduling.
+
+Idleprio scheduling is a scheduling policy designed to give out CPU to a task
+_only_ when the CPU would be otherwise idle. The idea behind this is to allow
+ultra low priority tasks to be run in the background that have virtually no
+effect on the foreground tasks. This is ideally suited to distributed computing
+clients (like setiathome, folding, mprime etc) but can also be used to start
+a video encode or so on without any slowdown of other tasks. To avoid this
+policy from grabbing shared resources and holding them indefinitely, if it
+detects a state where the task is waiting on I/O, the machine is about to
+suspend to ram and so on, it will transiently schedule them as SCHED_NORMAL. As
+per the Isochronous task management, once a task has been scheduled as IDLEPRIO,
+it cannot be put back to SCHED_NORMAL without superuser privileges. Tasks can
+be set to start as SCHED_IDLEPRIO with the schedtool command like so:
+
+ schedtool -D -e ./mprime
+
+Subtick accounting.
+
+It is surprisingly difficult to get accurate CPU accounting, and in many cases,
+the accounting is done by simply determining what is happening at the precise
+moment a timer tick fires off. This becomes increasingly inaccurate as the
+timer tick frequency (HZ) is lowered. It is possible to create an application
+which uses almost 100% CPU, yet by being descheduled at the right time, records
+zero CPU usage. While the main problem with this is that there are possible
+security implications, it is also difficult to determine how much CPU a task
+really does use. BFS tries to use the sub-tick accounting from the TSC clock,
+where possible, to determine real CPU usage. This is not entirely reliable, but
+is far more likely to produce accurate CPU usage data than the existing designs
+and will not show tasks as consuming no CPU usage when they actually are. Thus,
+the amount of CPU reported as being used by BFS will more accurately represent
+how much CPU the task itself is using (as is shown for example by the 'time'
+application), so the reported values may be quite different to other schedulers.
+Values reported as the 'load' are more prone to problems with this design, but
+per process values are closer to real usage. When comparing throughput of BFS
+to other designs, it is important to compare the actual completed work in terms
+of total wall clock time taken and total work done, rather than the reported
+"cpu usage".
+
+
+Con Kolivas <kernel@kolivas.org> Tue, 5 Apr 2011
diff --git a/Documentation/scheduler/sched-arch.txt b/Documentation/scheduler/sched-arch.txt
new file mode 100644
index 000000000..a2f27bbf2
--- /dev/null
+++ b/Documentation/scheduler/sched-arch.txt
@@ -0,0 +1,74 @@
+ CPU Scheduler implementation hints for architecture specific code
+
+ Nick Piggin, 2005
+
+Context switch
+==============
+1. Runqueue locking
+By default, the switch_to arch function is called with the runqueue
+locked. This is usually not a problem unless switch_to may need to
+take the runqueue lock. This is usually due to a wake up operation in
+the context switch. See arch/ia64/include/asm/switch_to.h for an example.
+
+To request the scheduler call switch_to with the runqueue unlocked,
+you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file
+(typically the one where switch_to is defined).
+
+Unlocked context switches introduce only a very minor performance
+penalty to the core scheduler implementation in the CONFIG_SMP case.
+
+CPU idle
+========
+Your cpu_idle routines need to obey the following rules:
+
+1. Preempt should now disabled over idle routines. Should only
+ be enabled to call schedule() then disabled again.
+
+2. need_resched/TIF_NEED_RESCHED is only ever set, and will never
+ be cleared until the running task has called schedule(). Idle
+ threads need only ever query need_resched, and may never set or
+ clear it.
+
+3. When cpu_idle finds (need_resched() == 'true'), it should call
+ schedule(). It should not call schedule() otherwise.
+
+4. The only time interrupts need to be disabled when checking
+ need_resched is if we are about to sleep the processor until
+ the next interrupt (this doesn't provide any protection of
+ need_resched, it prevents losing an interrupt).
+
+ 4a. Common problem with this type of sleep appears to be:
+ local_irq_disable();
+ if (!need_resched()) {
+ local_irq_enable();
+ *** resched interrupt arrives here ***
+ __asm__("sleep until next interrupt");
+ }
+
+5. TIF_POLLING_NRFLAG can be set by idle routines that do not
+ need an interrupt to wake them up when need_resched goes high.
+ In other words, they must be periodically polling need_resched,
+ although it may be reasonable to do some background work or enter
+ a low CPU priority.
+
+ 5a. If TIF_POLLING_NRFLAG is set, and we do decide to enter
+ an interrupt sleep, it needs to be cleared then a memory
+ barrier issued (followed by a test of need_resched with
+ interrupts disabled, as explained in 3).
+
+arch/x86/kernel/process.c has examples of both polling and
+sleeping idle functions.
+
+
+Possible arch/ problems
+=======================
+
+Possible arch problems I found (and either tried to fix or didn't):
+
+ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a)
+
+sh64 - Is sleeping racy vs interrupts? (See #4a)
+
+sparc - IRQs on at this point(?), change local_irq_save to _disable.
+ - TODO: needs secondary CPUs to disable preempt (See #1)
+
diff --git a/Documentation/scheduler/sched-bwc.txt b/Documentation/scheduler/sched-bwc.txt
new file mode 100644
index 000000000..f6b1873f6
--- /dev/null
+++ b/Documentation/scheduler/sched-bwc.txt
@@ -0,0 +1,122 @@
+CFS Bandwidth Control
+=====================
+
+[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
+ The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ]
+
+CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
+specification of the maximum CPU bandwidth available to a group or hierarchy.
+
+The bandwidth allowed for a group is specified using a quota and period. Within
+each given "period" (microseconds), a group is allowed to consume only up to
+"quota" microseconds of CPU time. When the CPU bandwidth consumption of a
+group exceeds this limit (for that period), the tasks belonging to its
+hierarchy will be throttled and are not allowed to run again until the next
+period.
+
+A group's unused runtime is globally tracked, being refreshed with quota units
+above at each period boundary. As threads consume this bandwidth it is
+transferred to cpu-local "silos" on a demand basis. The amount transferred
+within each of these updates is tunable and described as the "slice".
+
+Management
+----------
+Quota and period are managed within the cpu subsystem via cgroupfs.
+
+cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
+cpu.cfs_period_us: the length of a period (in microseconds)
+cpu.stat: exports throttling statistics [explained further below]
+
+The default values are:
+ cpu.cfs_period_us=100ms
+ cpu.cfs_quota=-1
+
+A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
+bandwidth restriction in place, such a group is described as an unconstrained
+bandwidth group. This represents the traditional work-conserving behavior for
+CFS.
+
+Writing any (valid) positive value(s) will enact the specified bandwidth limit.
+The minimum quota allowed for the quota or period is 1ms. There is also an
+upper bound on the period length of 1s. Additional restrictions exist when
+bandwidth limits are used in a hierarchical fashion, these are explained in
+more detail below.
+
+Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit
+and return the group to an unconstrained state once more.
+
+Any updates to a group's bandwidth specification will result in it becoming
+unthrottled if it is in a constrained state.
+
+System wide settings
+--------------------
+For efficiency run-time is transferred between the global pool and CPU local
+"silos" in a batch fashion. This greatly reduces global accounting pressure
+on large systems. The amount transferred each time such an update is required
+is described as the "slice".
+
+This is tunable via procfs:
+ /proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
+
+Larger slice values will reduce transfer overheads, while smaller values allow
+for more fine-grained consumption.
+
+Statistics
+----------
+A group's bandwidth statistics are exported via 3 fields in cpu.stat.
+
+cpu.stat:
+- nr_periods: Number of enforcement intervals that have elapsed.
+- nr_throttled: Number of times the group has been throttled/limited.
+- throttled_time: The total time duration (in nanoseconds) for which entities
+ of the group have been throttled.
+
+This interface is read-only.
+
+Hierarchical considerations
+---------------------------
+The interface enforces that an individual entity's bandwidth is always
+attainable, that is: max(c_i) <= C. However, over-subscription in the
+aggregate case is explicitly allowed to enable work-conserving semantics
+within a hierarchy.
+ e.g. \Sum (c_i) may exceed C
+[ Where C is the parent's bandwidth, and c_i its children ]
+
+
+There are two ways in which a group may become throttled:
+ a. it fully consumes its own quota within a period
+ b. a parent's quota is fully consumed within its period
+
+In case b) above, even though the child may have runtime remaining it will not
+be allowed to until the parent's runtime is refreshed.
+
+Examples
+--------
+1. Limit a group to 1 CPU worth of runtime.
+
+ If period is 250ms and quota is also 250ms, the group will get
+ 1 CPU worth of runtime every 250ms.
+
+ # echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
+ # echo 250000 > cpu.cfs_period_us /* period = 250ms */
+
+2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine.
+
+ With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
+ runtime every 500ms.
+
+ # echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
+ # echo 500000 > cpu.cfs_period_us /* period = 500ms */
+
+ The larger period here allows for increased burst capacity.
+
+3. Limit a group to 20% of 1 CPU.
+
+ With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU.
+
+ # echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
+ # echo 50000 > cpu.cfs_period_us /* period = 50ms */
+
+ By using a small period here we are ensuring a consistent latency
+ response at the expense of burst capacity.
+
diff --git a/Documentation/scheduler/sched-deadline.txt b/Documentation/scheduler/sched-deadline.txt
new file mode 100644
index 000000000..21461a044
--- /dev/null
+++ b/Documentation/scheduler/sched-deadline.txt
@@ -0,0 +1,525 @@
+ Deadline Task Scheduling
+ ------------------------
+
+CONTENTS
+========
+
+ 0. WARNING
+ 1. Overview
+ 2. Scheduling algorithm
+ 3. Scheduling Real-Time Tasks
+ 4. Bandwidth management
+ 4.1 System-wide settings
+ 4.2 Task interface
+ 4.3 Default behavior
+ 5. Tasks CPU affinity
+ 5.1 SCHED_DEADLINE and cpusets HOWTO
+ 6. Future plans
+ A. Test suite
+ B. Minimal main()
+
+
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unpredictable or even unstable
+ system behavior. As for -rt (group) scheduling, it is assumed that root users
+ know what they're doing.
+
+
+1. Overview
+===========
+
+ The SCHED_DEADLINE policy contained inside the sched_dl scheduling class is
+ basically an implementation of the Earliest Deadline First (EDF) scheduling
+ algorithm, augmented with a mechanism (called Constant Bandwidth Server, CBS)
+ that makes it possible to isolate the behavior of tasks between each other.
+
+
+2. Scheduling algorithm
+==================
+
+ SCHED_DEADLINE uses three parameters, named "runtime", "period", and
+ "deadline", to schedule tasks. A SCHED_DEADLINE task should receive
+ "runtime" microseconds of execution time every "period" microseconds, and
+ these "runtime" microseconds are available within "deadline" microseconds
+ from the beginning of the period. In order to implement this behaviour,
+ every time the task wakes up, the scheduler computes a "scheduling deadline"
+ consistent with the guarantee (using the CBS[2,3] algorithm). Tasks are then
+ scheduled using EDF[1] on these scheduling deadlines (the task with the
+ earliest scheduling deadline is selected for execution). Notice that the
+ task actually receives "runtime" time units within "deadline" if a proper
+ "admission control" strategy (see Section "4. Bandwidth management") is used
+ (clearly, if the system is overloaded this guarantee cannot be respected).
+
+ Summing up, the CBS[2,3] algorithms assigns scheduling deadlines to tasks so
+ that each task runs for at most its runtime every period, avoiding any
+ interference between different tasks (bandwidth isolation), while the EDF[1]
+ algorithm selects the task with the earliest scheduling deadline as the one
+ to be executed next. Thanks to this feature, tasks that do not strictly comply
+ with the "traditional" real-time task model (see Section 3) can effectively
+ use the new policy.
+
+ In more details, the CBS algorithm assigns scheduling deadlines to
+ tasks in the following way:
+
+ - Each SCHED_DEADLINE task is characterised by the "runtime",
+ "deadline", and "period" parameters;
+
+ - The state of the task is described by a "scheduling deadline", and
+ a "remaining runtime". These two parameters are initially set to 0;
+
+ - When a SCHED_DEADLINE task wakes up (becomes ready for execution),
+ the scheduler checks if
+
+ remaining runtime runtime
+ ---------------------------------- > ---------
+ scheduling deadline - current time period
+
+ then, if the scheduling deadline is smaller than the current time, or
+ this condition is verified, the scheduling deadline and the
+ remaining runtime are re-initialised as
+
+ scheduling deadline = current time + deadline
+ remaining runtime = runtime
+
+ otherwise, the scheduling deadline and the remaining runtime are
+ left unchanged;
+
+ - When a SCHED_DEADLINE task executes for an amount of time t, its
+ remaining runtime is decreased as
+
+ remaining runtime = remaining runtime - t
+
+ (technically, the runtime is decreased at every tick, or when the
+ task is descheduled / preempted);
+
+ - When the remaining runtime becomes less or equal than 0, the task is
+ said to be "throttled" (also known as "depleted" in real-time literature)
+ and cannot be scheduled until its scheduling deadline. The "replenishment
+ time" for this task (see next item) is set to be equal to the current
+ value of the scheduling deadline;
+
+ - When the current time is equal to the replenishment time of a
+ throttled task, the scheduling deadline and the remaining runtime are
+ updated as
+
+ scheduling deadline = scheduling deadline + period
+ remaining runtime = remaining runtime + runtime
+
+
+3. Scheduling Real-Time Tasks
+=============================
+
+ * BIG FAT WARNING ******************************************************
+ *
+ * This section contains a (not-thorough) summary on classical deadline
+ * scheduling theory, and how it applies to SCHED_DEADLINE.
+ * The reader can "safely" skip to Section 4 if only interested in seeing
+ * how the scheduling policy can be used. Anyway, we strongly recommend
+ * to come back here and continue reading (once the urge for testing is
+ * satisfied :P) to be sure of fully understanding all technical details.
+ ************************************************************************
+
+ There are no limitations on what kind of task can exploit this new
+ scheduling discipline, even if it must be said that it is particularly
+ suited for periodic or sporadic real-time tasks that need guarantees on their
+ timing behavior, e.g., multimedia, streaming, control applications, etc.
+
+ A typical real-time task is composed of a repetition of computation phases
+ (task instances, or jobs) which are activated on a periodic or sporadic
+ fashion.
+ Each job J_j (where J_j is the j^th job of the task) is characterised by an
+ arrival time r_j (the time when the job starts), an amount of computation
+ time c_j needed to finish the job, and a job absolute deadline d_j, which
+ is the time within which the job should be finished. The maximum execution
+ time max_j{c_j} is called "Worst Case Execution Time" (WCET) for the task.
+ A real-time task can be periodic with period P if r_{j+1} = r_j + P, or
+ sporadic with minimum inter-arrival time P is r_{j+1} >= r_j + P. Finally,
+ d_j = r_j + D, where D is the task's relative deadline.
+ The utilisation of a real-time task is defined as the ratio between its
+ WCET and its period (or minimum inter-arrival time), and represents
+ the fraction of CPU time needed to execute the task.
+
+ If the total utilisation sum_i(WCET_i/P_i) is larger than M (with M equal
+ to the number of CPUs), then the scheduler is unable to respect all the
+ deadlines.
+ Note that total utilisation is defined as the sum of the utilisations
+ WCET_i/P_i over all the real-time tasks in the system. When considering
+ multiple real-time tasks, the parameters of the i-th task are indicated
+ with the "_i" suffix.
+ Moreover, if the total utilisation is larger than M, then we risk starving
+ non- real-time tasks by real-time tasks.
+ If, instead, the total utilisation is smaller than M, then non real-time
+ tasks will not be starved and the system might be able to respect all the
+ deadlines.
+ As a matter of fact, in this case it is possible to provide an upper bound
+ for tardiness (defined as the maximum between 0 and the difference
+ between the finishing time of a job and its absolute deadline).
+ More precisely, it can be proven that using a global EDF scheduler the
+ maximum tardiness of each task is smaller or equal than
+ ((M − 1) · WCET_max − WCET_min)/(M − (M − 2) · U_max) + WCET_max
+ where WCET_max = max_i{WCET_i} is the maximum WCET, WCET_min=min_i{WCET_i}
+ is the minimum WCET, and U_max = max_i{WCET_i/P_i} is the maximum utilisation.
+
+ If M=1 (uniprocessor system), or in case of partitioned scheduling (each
+ real-time task is statically assigned to one and only one CPU), it is
+ possible to formally check if all the deadlines are respected.
+ If D_i = P_i for all tasks, then EDF is able to respect all the deadlines
+ of all the tasks executing on a CPU if and only if the total utilisation
+ of the tasks running on such a CPU is smaller or equal than 1.
+ If D_i != P_i for some task, then it is possible to define the density of
+ a task as C_i/min{D_i,T_i}, and EDF is able to respect all the deadlines
+ of all the tasks running on a CPU if the sum sum_i C_i/min{D_i,T_i} of the
+ densities of the tasks running on such a CPU is smaller or equal than 1
+ (notice that this condition is only sufficient, and not necessary).
+
+ On multiprocessor systems with global EDF scheduling (non partitioned
+ systems), a sufficient test for schedulability can not be based on the
+ utilisations (it can be shown that task sets with utilisations slightly
+ larger than 1 can miss deadlines regardless of the number of CPUs M).
+ However, as previously stated, enforcing that the total utilisation is smaller
+ than M is enough to guarantee that non real-time tasks are not starved and
+ that the tardiness of real-time tasks has an upper bound.
+
+ SCHED_DEADLINE can be used to schedule real-time tasks guaranteeing that
+ the jobs' deadlines of a task are respected. In order to do this, a task
+ must be scheduled by setting:
+
+ - runtime >= WCET
+ - deadline = D
+ - period <= P
+
+ IOW, if runtime >= WCET and if period is >= P, then the scheduling deadlines
+ and the absolute deadlines (d_j) coincide, so a proper admission control
+ allows to respect the jobs' absolute deadlines for this task (this is what is
+ called "hard schedulability property" and is an extension of Lemma 1 of [2]).
+ Notice that if runtime > deadline the admission control will surely reject
+ this task, as it is not possible to respect its temporal constraints.
+
+ References:
+ 1 - C. L. Liu and J. W. Layland. Scheduling algorithms for multiprogram-
+ ming in a hard-real-time environment. Journal of the Association for
+ Computing Machinery, 20(1), 1973.
+ 2 - L. Abeni , G. Buttazzo. Integrating Multimedia Applications in Hard
+ Real-Time Systems. Proceedings of the 19th IEEE Real-time Systems
+ Symposium, 1998. http://retis.sssup.it/~giorgio/paps/1998/rtss98-cbs.pdf
+ 3 - L. Abeni. Server Mechanisms for Multimedia Applications. ReTiS Lab
+ Technical Report. http://disi.unitn.it/~abeni/tr-98-01.pdf
+
+4. Bandwidth management
+=======================
+
+ As previously mentioned, in order for -deadline scheduling to be
+ effective and useful (that is, to be able to provide "runtime" time units
+ within "deadline"), it is important to have some method to keep the allocation
+ of the available fractions of CPU time to the various tasks under control.
+ This is usually called "admission control" and if it is not performed, then
+ no guarantee can be given on the actual scheduling of the -deadline tasks.
+
+ As already stated in Section 3, a necessary condition to be respected to
+ correctly schedule a set of real-time tasks is that the total utilisation
+ is smaller than M. When talking about -deadline tasks, this requires that
+ the sum of the ratio between runtime and period for all tasks is smaller
+ than M. Notice that the ratio runtime/period is equivalent to the utilisation
+ of a "traditional" real-time task, and is also often referred to as
+ "bandwidth".
+ The interface used to control the CPU bandwidth that can be allocated
+ to -deadline tasks is similar to the one already used for -rt
+ tasks with real-time group scheduling (a.k.a. RT-throttling - see
+ Documentation/scheduler/sched-rt-group.txt), and is based on readable/
+ writable control files located in procfs (for system wide settings).
+ Notice that per-group settings (controlled through cgroupfs) are still not
+ defined for -deadline tasks, because more discussion is needed in order to
+ figure out how we want to manage SCHED_DEADLINE bandwidth at the task group
+ level.
+
+ A main difference between deadline bandwidth management and RT-throttling
+ is that -deadline tasks have bandwidth on their own (while -rt ones don't!),
+ and thus we don't need a higher level throttling mechanism to enforce the
+ desired bandwidth. In other words, this means that interface parameters are
+ only used at admission control time (i.e., when the user calls
+ sched_setattr()). Scheduling is then performed considering actual tasks'
+ parameters, so that CPU bandwidth is allocated to SCHED_DEADLINE tasks
+ respecting their needs in terms of granularity. Therefore, using this simple
+ interface we can put a cap on total utilization of -deadline tasks (i.e.,
+ \Sum (runtime_i / period_i) < global_dl_utilization_cap).
+
+4.1 System wide settings
+------------------------
+
+ The system wide settings are configured under the /proc virtual file system.
+
+ For now the -rt knobs are used for -deadline admission control and the
+ -deadline runtime is accounted against the -rt runtime. We realise that this
+ isn't entirely desirable; however, it is better to have a small interface for
+ now, and be able to change it easily later. The ideal situation (see 5.) is to
+ run -rt tasks from a -deadline server; in which case the -rt bandwidth is a
+ direct subset of dl_bw.
+
+ This means that, for a root_domain comprising M CPUs, -deadline tasks
+ can be created while the sum of their bandwidths stays below:
+
+ M * (sched_rt_runtime_us / sched_rt_period_us)
+
+ It is also possible to disable this bandwidth management logic, and
+ be thus free of oversubscribing the system up to any arbitrary level.
+ This is done by writing -1 in /proc/sys/kernel/sched_rt_runtime_us.
+
+
+4.2 Task interface
+------------------
+
+ Specifying a periodic/sporadic task that executes for a given amount of
+ runtime at each instance, and that is scheduled according to the urgency of
+ its own timing constraints needs, in general, a way of declaring:
+ - a (maximum/typical) instance execution time,
+ - a minimum interval between consecutive instances,
+ - a time constraint by which each instance must be completed.
+
+ Therefore:
+ * a new struct sched_attr, containing all the necessary fields is
+ provided;
+ * the new scheduling related syscalls that manipulate it, i.e.,
+ sched_setattr() and sched_getattr() are implemented.
+
+
+4.3 Default behavior
+---------------------
+
+ The default value for SCHED_DEADLINE bandwidth is to have rt_runtime equal to
+ 950000. With rt_period equal to 1000000, by default, it means that -deadline
+ tasks can use at most 95%, multiplied by the number of CPUs that compose the
+ root_domain, for each root_domain.
+ This means that non -deadline tasks will receive at least 5% of the CPU time,
+ and that -deadline tasks will receive their runtime with a guaranteed
+ worst-case delay respect to the "deadline" parameter. If "deadline" = "period"
+ and the cpuset mechanism is used to implement partitioned scheduling (see
+ Section 5), then this simple setting of the bandwidth management is able to
+ deterministically guarantee that -deadline tasks will receive their runtime
+ in a period.
+
+ Finally, notice that in order not to jeopardize the admission control a
+ -deadline task cannot fork.
+
+5. Tasks CPU affinity
+=====================
+
+ -deadline tasks cannot have an affinity mask smaller that the entire
+ root_domain they are created on. However, affinities can be specified
+ through the cpuset facility (Documentation/cgroups/cpusets.txt).
+
+5.1 SCHED_DEADLINE and cpusets HOWTO
+------------------------------------
+
+ An example of a simple configuration (pin a -deadline task to CPU0)
+ follows (rt-app is used to create a -deadline task).
+
+ mkdir /dev/cpuset
+ mount -t cgroup -o cpuset cpuset /dev/cpuset
+ cd /dev/cpuset
+ mkdir cpu0
+ echo 0 > cpu0/cpuset.cpus
+ echo 0 > cpu0/cpuset.mems
+ echo 1 > cpuset.cpu_exclusive
+ echo 0 > cpuset.sched_load_balance
+ echo 1 > cpu0/cpuset.cpu_exclusive
+ echo 1 > cpu0/cpuset.mem_exclusive
+ echo $$ > cpu0/tasks
+ rt-app -t 100000:10000:d:0 -D5 (it is now actually superfluous to specify
+ task affinity)
+
+6. Future plans
+===============
+
+ Still missing:
+
+ - refinements to deadline inheritance, especially regarding the possibility
+ of retaining bandwidth isolation among non-interacting tasks. This is
+ being studied from both theoretical and practical points of view, and
+ hopefully we should be able to produce some demonstrative code soon;
+ - (c)group based bandwidth management, and maybe scheduling;
+ - access control for non-root users (and related security concerns to
+ address), which is the best way to allow unprivileged use of the mechanisms
+ and how to prevent non-root users "cheat" the system?
+
+ As already discussed, we are planning also to merge this work with the EDF
+ throttling patches [https://lkml.org/lkml/2010/2/23/239] but we still are in
+ the preliminary phases of the merge and we really seek feedback that would
+ help us decide on the direction it should take.
+
+Appendix A. Test suite
+======================
+
+ The SCHED_DEADLINE policy can be easily tested using two applications that
+ are part of a wider Linux Scheduler validation suite. The suite is
+ available as a GitHub repository: https://github.com/scheduler-tools.
+
+ The first testing application is called rt-app and can be used to
+ start multiple threads with specific parameters. rt-app supports
+ SCHED_{OTHER,FIFO,RR,DEADLINE} scheduling policies and their related
+ parameters (e.g., niceness, priority, runtime/deadline/period). rt-app
+ is a valuable tool, as it can be used to synthetically recreate certain
+ workloads (maybe mimicking real use-cases) and evaluate how the scheduler
+ behaves under such workloads. In this way, results are easily reproducible.
+ rt-app is available at: https://github.com/scheduler-tools/rt-app.
+
+ Thread parameters can be specified from the command line, with something like
+ this:
+
+ # rt-app -t 100000:10000:d -t 150000:20000:f:10 -D5
+
+ The above creates 2 threads. The first one, scheduled by SCHED_DEADLINE,
+ executes for 10ms every 100ms. The second one, scheduled at SCHED_FIFO
+ priority 10, executes for 20ms every 150ms. The test will run for a total
+ of 5 seconds.
+
+ More interestingly, configurations can be described with a json file that
+ can be passed as input to rt-app with something like this:
+
+ # rt-app my_config.json
+
+ The parameters that can be specified with the second method are a superset
+ of the command line options. Please refer to rt-app documentation for more
+ details (<rt-app-sources>/doc/*.json).
+
+ The second testing application is a modification of schedtool, called
+ schedtool-dl, which can be used to setup SCHED_DEADLINE parameters for a
+ certain pid/application. schedtool-dl is available at:
+ https://github.com/scheduler-tools/schedtool-dl.git.
+
+ The usage is straightforward:
+
+ # schedtool -E -t 10000000:100000000 -e ./my_cpuhog_app
+
+ With this, my_cpuhog_app is put to run inside a SCHED_DEADLINE reservation
+ of 10ms every 100ms (note that parameters are expressed in microseconds).
+ You can also use schedtool to create a reservation for an already running
+ application, given that you know its pid:
+
+ # schedtool -E -t 10000000:100000000 my_app_pid
+
+Appendix B. Minimal main()
+==========================
+
+ We provide in what follows a simple (ugly) self-contained code snippet
+ showing how SCHED_DEADLINE reservations can be created by a real-time
+ application developer.
+
+ #define _GNU_SOURCE
+ #include <unistd.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ #include <time.h>
+ #include <linux/unistd.h>
+ #include <linux/kernel.h>
+ #include <linux/types.h>
+ #include <sys/syscall.h>
+ #include <pthread.h>
+
+ #define gettid() syscall(__NR_gettid)
+
+ #define SCHED_DEADLINE 6
+
+ /* XXX use the proper syscall numbers */
+ #ifdef __x86_64__
+ #define __NR_sched_setattr 314
+ #define __NR_sched_getattr 315
+ #endif
+
+ #ifdef __i386__
+ #define __NR_sched_setattr 351
+ #define __NR_sched_getattr 352
+ #endif
+
+ #ifdef __arm__
+ #define __NR_sched_setattr 380
+ #define __NR_sched_getattr 381
+ #endif
+
+ static volatile int done;
+
+ struct sched_attr {
+ __u32 size;
+
+ __u32 sched_policy;
+ __u64 sched_flags;
+
+ /* SCHED_NORMAL, SCHED_BATCH */
+ __s32 sched_nice;
+
+ /* SCHED_FIFO, SCHED_RR */
+ __u32 sched_priority;
+
+ /* SCHED_DEADLINE (nsec) */
+ __u64 sched_runtime;
+ __u64 sched_deadline;
+ __u64 sched_period;
+ };
+
+ int sched_setattr(pid_t pid,
+ const struct sched_attr *attr,
+ unsigned int flags)
+ {
+ return syscall(__NR_sched_setattr, pid, attr, flags);
+ }
+
+ int sched_getattr(pid_t pid,
+ struct sched_attr *attr,
+ unsigned int size,
+ unsigned int flags)
+ {
+ return syscall(__NR_sched_getattr, pid, attr, size, flags);
+ }
+
+ void *run_deadline(void *data)
+ {
+ struct sched_attr attr;
+ int x = 0;
+ int ret;
+ unsigned int flags = 0;
+
+ printf("deadline thread started [%ld]\n", gettid());
+
+ attr.size = sizeof(attr);
+ attr.sched_flags = 0;
+ attr.sched_nice = 0;
+ attr.sched_priority = 0;
+
+ /* This creates a 10ms/30ms reservation */
+ attr.sched_policy = SCHED_DEADLINE;
+ attr.sched_runtime = 10 * 1000 * 1000;
+ attr.sched_period = attr.sched_deadline = 30 * 1000 * 1000;
+
+ ret = sched_setattr(0, &attr, flags);
+ if (ret < 0) {
+ done = 0;
+ perror("sched_setattr");
+ exit(-1);
+ }
+
+ while (!done) {
+ x++;
+ }
+
+ printf("deadline thread dies [%ld]\n", gettid());
+ return NULL;
+ }
+
+ int main (int argc, char **argv)
+ {
+ pthread_t thread;
+
+ printf("main thread [%ld]\n", gettid());
+
+ pthread_create(&thread, NULL, run_deadline, NULL);
+
+ sleep(10);
+
+ done = 1;
+ pthread_join(thread, NULL);
+
+ printf("main dies [%ld]\n", gettid());
+ return 0;
+ }
diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt
new file mode 100644
index 000000000..f14f49304
--- /dev/null
+++ b/Documentation/scheduler/sched-design-CFS.txt
@@ -0,0 +1,242 @@
+ =============
+ CFS Scheduler
+ =============
+
+
+1. OVERVIEW
+
+CFS stands for "Completely Fair Scheduler," and is the new "desktop" process
+scheduler implemented by Ingo Molnar and merged in Linux 2.6.23. It is the
+replacement for the previous vanilla scheduler's SCHED_OTHER interactivity
+code.
+
+80% of CFS's design can be summed up in a single sentence: CFS basically models
+an "ideal, precise multi-tasking CPU" on real hardware.
+
+"Ideal multi-tasking CPU" is a (non-existent :-)) CPU that has 100% physical
+power and which can run each task at precise equal speed, in parallel, each at
+1/nr_running speed. For example: if there are 2 tasks running, then it runs
+each at 50% physical power --- i.e., actually in parallel.
+
+On real hardware, we can run only a single task at once, so we have to
+introduce the concept of "virtual runtime." The virtual runtime of a task
+specifies when its next timeslice would start execution on the ideal
+multi-tasking CPU described above. In practice, the virtual runtime of a task
+is its actual runtime normalized to the total number of running tasks.
+
+
+
+2. FEW IMPLEMENTATION DETAILS
+
+In CFS the virtual runtime is expressed and tracked via the per-task
+p->se.vruntime (nanosec-unit) value. This way, it's possible to accurately
+timestamp and measure the "expected CPU time" a task should have gotten.
+
+[ small detail: on "ideal" hardware, at any time all tasks would have the same
+ p->se.vruntime value --- i.e., tasks would execute simultaneously and no task
+ would ever get "out of balance" from the "ideal" share of CPU time. ]
+
+CFS's task picking logic is based on this p->se.vruntime value and it is thus
+very simple: it always tries to run the task with the smallest p->se.vruntime
+value (i.e., the task which executed least so far). CFS always tries to split
+up CPU time between runnable tasks as close to "ideal multitasking hardware" as
+possible.
+
+Most of the rest of CFS's design just falls out of this really simple concept,
+with a few add-on embellishments like nice levels, multiprocessing and various
+algorithm variants to recognize sleepers.
+
+
+
+3. THE RBTREE
+
+CFS's design is quite radical: it does not use the old data structures for the
+runqueues, but it uses a time-ordered rbtree to build a "timeline" of future
+task execution, and thus has no "array switch" artifacts (by which both the
+previous vanilla scheduler and RSDL/SD are affected).
+
+CFS also maintains the rq->cfs.min_vruntime value, which is a monotonic
+increasing value tracking the smallest vruntime among all tasks in the
+runqueue. The total amount of work done by the system is tracked using
+min_vruntime; that value is used to place newly activated entities on the left
+side of the tree as much as possible.
+
+The total number of running tasks in the runqueue is accounted through the
+rq->cfs.load value, which is the sum of the weights of the tasks queued on the
+runqueue.
+
+CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
+p->se.vruntime key. CFS picks the "leftmost" task from this tree and sticks to it.
+As the system progresses forwards, the executed tasks are put into the tree
+more and more to the right --- slowly but surely giving a chance for every task
+to become the "leftmost task" and thus get on the CPU within a deterministic
+amount of time.
+
+Summing up, CFS works like this: it runs a task a bit, and when the task
+schedules (or a scheduler tick happens) the task's CPU usage is "accounted
+for": the (small) time it just spent using the physical CPU is added to
+p->se.vruntime. Once p->se.vruntime gets high enough so that another task
+becomes the "leftmost task" of the time-ordered rbtree it maintains (plus a
+small amount of "granularity" distance relative to the leftmost task so that we
+do not over-schedule tasks and trash the cache), then the new leftmost task is
+picked and the current task is preempted.
+
+
+
+4. SOME FEATURES OF CFS
+
+CFS uses nanosecond granularity accounting and does not rely on any jiffies or
+other HZ detail. Thus the CFS scheduler has no notion of "timeslices" in the
+way the previous scheduler had, and has no heuristics whatsoever. There is
+only one central tunable (you have to switch on CONFIG_SCHED_DEBUG):
+
+ /proc/sys/kernel/sched_min_granularity_ns
+
+which can be used to tune the scheduler from "desktop" (i.e., low latencies) to
+"server" (i.e., good batching) workloads. It defaults to a setting suitable
+for desktop workloads. SCHED_BATCH is handled by the CFS scheduler module too.
+
+Due to its design, the CFS scheduler is not prone to any of the "attacks" that
+exist today against the heuristics of the stock scheduler: fiftyp.c, thud.c,
+chew.c, ring-test.c, massive_intr.c all work fine and do not impact
+interactivity and produce the expected behavior.
+
+The CFS scheduler has a much stronger handling of nice levels and SCHED_BATCH
+than the previous vanilla scheduler: both types of workloads are isolated much
+more aggressively.
+
+SMP load-balancing has been reworked/sanitized: the runqueue-walking
+assumptions are gone from the load-balancing code now, and iterators of the
+scheduling modules are used. The balancing code got quite a bit simpler as a
+result.
+
+
+
+5. Scheduling policies
+
+CFS implements three scheduling policies:
+
+ - SCHED_NORMAL (traditionally called SCHED_OTHER): The scheduling
+ policy that is used for regular tasks.
+
+ - SCHED_BATCH: Does not preempt nearly as often as regular tasks
+ would, thereby allowing tasks to run longer and make better use of
+ caches but at the cost of interactivity. This is well suited for
+ batch jobs.
+
+ - SCHED_IDLE: This is even weaker than nice 19, but its not a true
+ idle timer scheduler in order to avoid to get into priority
+ inversion problems which would deadlock the machine.
+
+SCHED_FIFO/_RR are implemented in sched/rt.c and are as specified by
+POSIX.
+
+The command chrt from util-linux-ng 2.13.1.1 can set all of these except
+SCHED_IDLE.
+
+
+
+6. SCHEDULING CLASSES
+
+The new CFS scheduler has been designed in such a way to introduce "Scheduling
+Classes," an extensible hierarchy of scheduler modules. These modules
+encapsulate scheduling policy details and are handled by the scheduler core
+without the core code assuming too much about them.
+
+sched/fair.c implements the CFS scheduler described above.
+
+sched/rt.c implements SCHED_FIFO and SCHED_RR semantics, in a simpler way than
+the previous vanilla scheduler did. It uses 100 runqueues (for all 100 RT
+priority levels, instead of 140 in the previous scheduler) and it needs no
+expired array.
+
+Scheduling classes are implemented through the sched_class structure, which
+contains hooks to functions that must be called whenever an interesting event
+occurs.
+
+This is the (partial) list of the hooks:
+
+ - enqueue_task(...)
+
+ Called when a task enters a runnable state.
+ It puts the scheduling entity (task) into the red-black tree and
+ increments the nr_running variable.
+
+ - dequeue_task(...)
+
+ When a task is no longer runnable, this function is called to keep the
+ corresponding scheduling entity out of the red-black tree. It decrements
+ the nr_running variable.
+
+ - yield_task(...)
+
+ This function is basically just a dequeue followed by an enqueue, unless the
+ compat_yield sysctl is turned on; in that case, it places the scheduling
+ entity at the right-most end of the red-black tree.
+
+ - check_preempt_curr(...)
+
+ This function checks if a task that entered the runnable state should
+ preempt the currently running task.
+
+ - pick_next_task(...)
+
+ This function chooses the most appropriate task eligible to run next.
+
+ - set_curr_task(...)
+
+ This function is called when a task changes its scheduling class or changes
+ its task group.
+
+ - task_tick(...)
+
+ This function is mostly called from time tick functions; it might lead to
+ process switch. This drives the running preemption.
+
+
+
+
+7. GROUP SCHEDULER EXTENSIONS TO CFS
+
+Normally, the scheduler operates on individual tasks and strives to provide
+fair CPU time to each task. Sometimes, it may be desirable to group tasks and
+provide fair CPU time to each such task group. For example, it may be
+desirable to first provide fair CPU time to each user on the system and then to
+each task belonging to a user.
+
+CONFIG_CGROUP_SCHED strives to achieve exactly that. It lets tasks to be
+grouped and divides CPU time fairly among such groups.
+
+CONFIG_RT_GROUP_SCHED permits to group real-time (i.e., SCHED_FIFO and
+SCHED_RR) tasks.
+
+CONFIG_FAIR_GROUP_SCHED permits to group CFS (i.e., SCHED_NORMAL and
+SCHED_BATCH) tasks.
+
+ These options need CONFIG_CGROUPS to be defined, and let the administrator
+ create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See
+ Documentation/cgroups/cgroups.txt for more information about this filesystem.
+
+When CONFIG_FAIR_GROUP_SCHED is defined, a "cpu.shares" file is created for each
+group created using the pseudo filesystem. See example steps below to create
+task groups and modify their CPU share using the "cgroups" pseudo filesystem.
+
+ # mount -t tmpfs cgroup_root /sys/fs/cgroup
+ # mkdir /sys/fs/cgroup/cpu
+ # mount -t cgroup -ocpu none /sys/fs/cgroup/cpu
+ # cd /sys/fs/cgroup/cpu
+
+ # mkdir multimedia # create "multimedia" group of tasks
+ # mkdir browser # create "browser" group of tasks
+
+ # #Configure the multimedia group to receive twice the CPU bandwidth
+ # #that of browser group
+
+ # echo 2048 > multimedia/cpu.shares
+ # echo 1024 > browser/cpu.shares
+
+ # firefox & # Launch firefox and move it to "browser" group
+ # echo <firefox_pid> > browser/tasks
+
+ # #Launch gmplayer (or your favourite movie player)
+ # echo <movie_player_pid> > multimedia/tasks
diff --git a/Documentation/scheduler/sched-domains.txt b/Documentation/scheduler/sched-domains.txt
new file mode 100644
index 000000000..4af80b1c0
--- /dev/null
+++ b/Documentation/scheduler/sched-domains.txt
@@ -0,0 +1,77 @@
+Each CPU has a "base" scheduling domain (struct sched_domain). The domain
+hierarchy is built from these base domains via the ->parent pointer. ->parent
+MUST be NULL terminated, and domain structures should be per-CPU as they are
+locklessly updated.
+
+Each scheduling domain spans a number of CPUs (stored in the ->span field).
+A domain's span MUST be a superset of it child's span (this restriction could
+be relaxed if the need arises), and a base domain for CPU i MUST span at least
+i. The top domain for each CPU will generally span all CPUs in the system
+although strictly it doesn't have to, but this could lead to a case where some
+CPUs will never be given tasks to run unless the CPUs allowed mask is
+explicitly set. A sched domain's span means "balance process load among these
+CPUs".
+
+Each scheduling domain must have one or more CPU groups (struct sched_group)
+which are organised as a circular one way linked list from the ->groups
+pointer. The union of cpumasks of these groups MUST be the same as the
+domain's span. The intersection of cpumasks from any two of these groups
+MUST be the empty set. The group pointed to by the ->groups pointer MUST
+contain the CPU to which the domain belongs. Groups may be shared among
+CPUs as they contain read only data after they have been set up.
+
+Balancing within a sched domain occurs between groups. That is, each group
+is treated as one entity. The load of a group is defined as the sum of the
+load of each of its member CPUs, and only when the load of a group becomes
+out of balance are tasks moved between groups.
+
+In kernel/sched/core.c, trigger_load_balance() is run periodically on each CPU
+through scheduler_tick(). It raises a softirq after the next regularly scheduled
+rebalancing event for the current runqueue has arrived. The actual load
+balancing workhorse, run_rebalance_domains()->rebalance_domains(), is then run
+in softirq context (SCHED_SOFTIRQ).
+
+The latter function takes two arguments: the current CPU and whether it was idle
+at the time the scheduler_tick() happened and iterates over all sched domains
+our CPU is on, starting from its base domain and going up the ->parent chain.
+While doing that, it checks to see if the current domain has exhausted its
+rebalance interval. If so, it runs load_balance() on that domain. It then checks
+the parent sched_domain (if it exists), and the parent of the parent and so
+forth.
+
+Initially, load_balance() finds the busiest group in the current sched domain.
+If it succeeds, it looks for the busiest runqueue of all the CPUs' runqueues in
+that group. If it manages to find such a runqueue, it locks both our initial
+CPU's runqueue and the newly found busiest one and starts moving tasks from it
+to our runqueue. The exact number of tasks amounts to an imbalance previously
+computed while iterating over this sched domain's groups.
+
+*** Implementing sched domains ***
+The "base" domain will "span" the first level of the hierarchy. In the case
+of SMT, you'll span all siblings of the physical CPU, with each group being
+a single virtual CPU.
+
+In SMP, the parent of the base domain will span all physical CPUs in the
+node. Each group being a single physical CPU. Then with NUMA, the parent
+of the SMP domain will span the entire machine, with each group having the
+cpumask of a node. Or, you could do multi-level NUMA or Opteron, for example,
+might have just one domain covering its one NUMA level.
+
+The implementor should read comments in include/linux/sched.h:
+struct sched_domain fields, SD_FLAG_*, SD_*_INIT to get an idea of
+the specifics and what to tune.
+
+Architectures may retain the regular override the default SD_*_INIT flags
+while using the generic domain builder in kernel/sched/core.c if they wish to
+retain the traditional SMT->SMP->NUMA topology (or some subset of that). This
+can be done by #define'ing ARCH_HASH_SCHED_TUNE.
+
+Alternatively, the architecture may completely override the generic domain
+builder by #define'ing ARCH_HASH_SCHED_DOMAIN, and exporting your
+arch_init_sched_domains function. This function will attach domains to all
+CPUs using cpu_attach_domain.
+
+The sched-domains debugging infrastructure can be enabled by enabling
+CONFIG_SCHED_DEBUG. This enables an error checking parse of the sched domains
+which should catch most possible errors (described above). It also prints out
+the domain structure in a visual format.
diff --git a/Documentation/scheduler/sched-nice-design.txt b/Documentation/scheduler/sched-nice-design.txt
new file mode 100644
index 000000000..3ac1e46d5
--- /dev/null
+++ b/Documentation/scheduler/sched-nice-design.txt
@@ -0,0 +1,108 @@
+This document explains the thinking about the revamped and streamlined
+nice-levels implementation in the new Linux scheduler.
+
+Nice levels were always pretty weak under Linux and people continuously
+pestered us to make nice +19 tasks use up much less CPU time.
+
+Unfortunately that was not that easy to implement under the old
+scheduler, (otherwise we'd have done it long ago) because nice level
+support was historically coupled to timeslice length, and timeslice
+units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
+
+In the O(1) scheduler (in 2003) we changed negative nice levels to be
+much stronger than they were before in 2.4 (and people were happy about
+that change), and we also intentionally calibrated the linear timeslice
+rule so that nice +19 level would be _exactly_ 1 jiffy. To better
+understand it, the timeslice graph went like this (cheesy ASCII art
+alert!):
+
+
+ A
+ \ | [timeslice length]
+ \ |
+ \ |
+ \ |
+ \ |
+ \|___100msecs
+ |^ . _
+ | ^ . _
+ | ^ . _
+ -*----------------------------------*-----> [nice level]
+ -20 | +19
+ |
+ |
+
+So that if someone wanted to really renice tasks, +19 would give a much
+bigger hit than the normal linear rule would do. (The solution of
+changing the ABI to extend priorities was discarded early on.)
+
+This approach worked to some degree for some time, but later on with
+HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
+we felt to be a bit excessive. Excessive _not_ because it's too small of
+a CPU utilization, but because it causes too frequent (once per
+millisec) rescheduling. (and would thus trash the cache, etc. Remember,
+this was long ago when hardware was weaker and caches were smaller, and
+people were running number crunching apps at nice +19.)
+
+So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
+right minimal granularity - and this translates to 5% CPU utilization.
+But the fundamental HZ-sensitive property for nice+19 still remained,
+and we never got a single complaint about nice +19 being too _weak_ in
+terms of CPU utilization, we only got complaints about it (still) being
+too _strong_ :-)
+
+To sum it up: we always wanted to make nice levels more consistent, but
+within the constraints of HZ and jiffies and their nasty design level
+coupling to timeslices and granularity it was not really viable.
+
+The second (less frequent but still periodically occurring) complaint
+about Linux's nice level support was its assymetry around the origo
+(which you can see demonstrated in the picture above), or more
+accurately: the fact that nice level behavior depended on the _absolute_
+nice level as well, while the nice API itself is fundamentally
+"relative":
+
+ int nice(int inc);
+
+ asmlinkage long sys_nice(int increment)
+
+(the first one is the glibc API, the second one is the syscall API.)
+Note that the 'inc' is relative to the current nice level. Tools like
+bash's "nice" command mirror this relative API.
+
+With the old scheduler, if you for example started a niced task with +1
+and another task with +2, the CPU split between the two tasks would
+depend on the nice level of the parent shell - if it was at nice -10 the
+CPU split was different than if it was at +5 or +10.
+
+A third complaint against Linux's nice level support was that negative
+nice levels were not 'punchy enough', so lots of people had to resort to
+run audio (and other multimedia) apps under RT priorities such as
+SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
+proof, and a buggy SCHED_FIFO app can also lock up the system for good.
+
+The new scheduler in v2.6.23 addresses all three types of complaints:
+
+To address the first complaint (of nice levels being not "punchy"
+enough), the scheduler was decoupled from 'time slice' and HZ concepts
+(and granularity was made a separate concept from nice levels) and thus
+it was possible to implement better and more consistent nice +19
+support: with the new scheduler nice +19 tasks get a HZ-independent
+1.5%, instead of the variable 3%-5%-9% range they got in the old
+scheduler.
+
+To address the second complaint (of nice levels not being consistent),
+the new scheduler makes nice(1) have the same CPU utilization effect on
+tasks, regardless of their absolute nice levels. So on the new
+scheduler, running a nice +10 and a nice 11 task has the same CPU
+utilization "split" between them as running a nice -5 and a nice -4
+task. (one will get 55% of the CPU, the other 45%.) That is why nice
+levels were changed to be "multiplicative" (or exponential) - that way
+it does not matter which nice level you start out from, the 'relative
+result' will always be the same.
+
+The third complaint (of negative nice levels not being "punchy" enough
+and forcing audio apps to run under the more dangerous SCHED_FIFO
+scheduling policy) is addressed by the new scheduler almost
+automatically: stronger negative nice levels are an automatic
+side-effect of the recalibrated dynamic range of nice levels.
diff --git a/Documentation/scheduler/sched-rt-group.txt b/Documentation/scheduler/sched-rt-group.txt
new file mode 100644
index 000000000..71b54d549
--- /dev/null
+++ b/Documentation/scheduler/sched-rt-group.txt
@@ -0,0 +1,183 @@
+ Real-Time group scheduling
+ --------------------------
+
+CONTENTS
+========
+
+0. WARNING
+1. Overview
+ 1.1 The problem
+ 1.2 The solution
+2. The interface
+ 2.1 System-wide settings
+ 2.2 Default behaviour
+ 2.3 Basis for grouping tasks
+3. Future plans
+
+
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unstable system, the knobs are
+ root only and assumes root knows what he is doing.
+
+Most notable:
+
+ * very small values in sched_rt_period_us can result in an unstable
+ system when the period is smaller than either the available hrtimer
+ resolution, or the time it takes to handle the budget refresh itself.
+
+ * very small values in sched_rt_runtime_us can result in an unstable
+ system when the runtime is so small the system has difficulty making
+ forward progress (NOTE: the migration thread and kstopmachine both
+ are real-time processes).
+
+1. Overview
+===========
+
+
+1.1 The problem
+---------------
+
+Realtime scheduling is all about determinism, a group has to be able to rely on
+the amount of bandwidth (eg. CPU time) being constant. In order to schedule
+multiple groups of realtime tasks, each group must be assigned a fixed portion
+of the CPU time available. Without a minimum guarantee a realtime group can
+obviously fall short. A fuzzy upper limit is of no use since it cannot be
+relied upon. Which leaves us with just the single fixed portion.
+
+1.2 The solution
+----------------
+
+CPU time is divided by means of specifying how much time can be spent running
+in a given period. We allocate this "run time" for each realtime group which
+the other realtime groups will not be permitted to use.
+
+Any time not allocated to a realtime group will be used to run normal priority
+tasks (SCHED_OTHER). Any allocated run time not used will also be picked up by
+SCHED_OTHER.
+
+Let's consider an example: a frame fixed realtime renderer must deliver 25
+frames a second, which yields a period of 0.04s per frame. Now say it will also
+have to play some music and respond to input, leaving it with around 80% CPU
+time dedicated for the graphics. We can then give this group a run time of 0.8
+* 0.04s = 0.032s.
+
+This way the graphics group will have a 0.04s period with a 0.032s run time
+limit. Now if the audio thread needs to refill the DMA buffer every 0.005s, but
+needs only about 3% CPU time to do so, it can do with a 0.03 * 0.005s =
+0.00015s. So this group can be scheduled with a period of 0.005s and a run time
+of 0.00015s.
+
+The remaining CPU time will be used for user input and other tasks. Because
+realtime tasks have explicitly allocated the CPU time they need to perform
+their tasks, buffer underruns in the graphics or audio can be eliminated.
+
+NOTE: the above example is not fully implemented yet. We still
+lack an EDF scheduler to make non-uniform periods usable.
+
+
+2. The Interface
+================
+
+
+2.1 System wide settings
+------------------------
+
+The system wide settings are configured under the /proc virtual file system:
+
+/proc/sys/kernel/sched_rt_period_us:
+ The scheduling period that is equivalent to 100% CPU bandwidth
+
+/proc/sys/kernel/sched_rt_runtime_us:
+ A global limit on how much time realtime scheduling may use. Even without
+ CONFIG_RT_GROUP_SCHED enabled, this will limit time reserved to realtime
+ processes. With CONFIG_RT_GROUP_SCHED it signifies the total bandwidth
+ available to all realtime groups.
+
+ * Time is specified in us because the interface is s32. This gives an
+ operating range from 1us to about 35 minutes.
+ * sched_rt_period_us takes values from 1 to INT_MAX.
+ * sched_rt_runtime_us takes values from -1 to (INT_MAX - 1).
+ * A run time of -1 specifies runtime == period, ie. no limit.
+
+
+2.2 Default behaviour
+---------------------
+
+The default values for sched_rt_period_us (1000000 or 1s) and
+sched_rt_runtime_us (950000 or 0.95s). This gives 0.05s to be used by
+SCHED_OTHER (non-RT tasks). These defaults were chosen so that a run-away
+realtime tasks will not lock up the machine but leave a little time to recover
+it. By setting runtime to -1 you'd get the old behaviour back.
+
+By default all bandwidth is assigned to the root group and new groups get the
+period from /proc/sys/kernel/sched_rt_period_us and a run time of 0. If you
+want to assign bandwidth to another group, reduce the root group's bandwidth
+and assign some or all of the difference to another group.
+
+Realtime group scheduling means you have to assign a portion of total CPU
+bandwidth to the group before it will accept realtime tasks. Therefore you will
+not be able to run realtime tasks as any user other than root until you have
+done that, even if the user has the rights to run processes with realtime
+priority!
+
+
+2.3 Basis for grouping tasks
+----------------------------
+
+Enabling CONFIG_RT_GROUP_SCHED lets you explicitly allocate real
+CPU bandwidth to task groups.
+
+This uses the cgroup virtual file system and "<cgroup>/cpu.rt_runtime_us"
+to control the CPU time reserved for each control group.
+
+For more information on working with control groups, you should read
+Documentation/cgroups/cgroups.txt as well.
+
+Group settings are checked against the following limits in order to keep the
+configuration schedulable:
+
+ \Sum_{i} runtime_{i} / global_period <= global_runtime / global_period
+
+For now, this can be simplified to just the following (but see Future plans):
+
+ \Sum_{i} runtime_{i} <= global_runtime
+
+
+3. Future plans
+===============
+
+There is work in progress to make the scheduling period for each group
+("<cgroup>/cpu.rt_period_us") configurable as well.
+
+The constraint on the period is that a subgroup must have a smaller or
+equal period to its parent. But realistically its not very useful _yet_
+as its prone to starvation without deadline scheduling.
+
+Consider two sibling groups A and B; both have 50% bandwidth, but A's
+period is twice the length of B's.
+
+* group A: period=100000us, runtime=10000us
+ - this runs for 0.01s once every 0.1s
+
+* group B: period= 50000us, runtime=10000us
+ - this runs for 0.01s twice every 0.1s (or once every 0.05 sec).
+
+This means that currently a while (1) loop in A will run for the full period of
+B and can starve B's tasks (assuming they are of lower priority) for a whole
+period.
+
+The next project will be SCHED_EDF (Earliest Deadline First scheduling) to bring
+full deadline scheduling to the linux kernel. Deadline scheduling the above
+groups and treating end of the period as a deadline will ensure that they both
+get their allocated time.
+
+Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
+the biggest challenge as the current linux PI infrastructure is geared towards
+the limited static priority levels 0-99. With deadline scheduling you need to
+do deadline inheritance (since priority is inversely proportional to the
+deadline delta (deadline - now)).
+
+This means the whole PI machinery will have to be reworked - and that is one of
+the most complex pieces of code we have.
diff --git a/Documentation/scheduler/sched-stats.txt b/Documentation/scheduler/sched-stats.txt
new file mode 100644
index 000000000..8259b34a6
--- /dev/null
+++ b/Documentation/scheduler/sched-stats.txt
@@ -0,0 +1,154 @@
+Version 15 of schedstats dropped counters for some sched_yield:
+yld_exp_empty, yld_act_empty and yld_both_empty. Otherwise, it is
+identical to version 14.
+
+Version 14 of schedstats includes support for sched_domains, which hit the
+mainline kernel in 2.6.20 although it is identical to the stats from version
+12 which was in the kernel from 2.6.13-2.6.19 (version 13 never saw a kernel
+release). Some counters make more sense to be per-runqueue; other to be
+per-domain. Note that domains (and their associated information) will only
+be pertinent and available on machines utilizing CONFIG_SMP.
+
+In version 14 of schedstat, there is at least one level of domain
+statistics for each cpu listed, and there may well be more than one
+domain. Domains have no particular names in this implementation, but
+the highest numbered one typically arbitrates balancing across all the
+cpus on the machine, while domain0 is the most tightly focused domain,
+sometimes balancing only between pairs of cpus. At this time, there
+are no architectures which need more than three domain levels. The first
+field in the domain stats is a bit map indicating which cpus are affected
+by that domain.
+
+These fields are counters, and only increment. Programs which make use
+of these will need to start with a baseline observation and then calculate
+the change in the counters at each subsequent observation. A perl script
+which does this for many of the fields is available at
+
+ http://eaglet.rain.com/rick/linux/schedstat/
+
+Note that any such script will necessarily be version-specific, as the main
+reason to change versions is changes in the output format. For those wishing
+to write their own scripts, the fields are described here.
+
+CPU statistics
+--------------
+cpu<N> 1 2 3 4 5 6 7 8 9
+
+First field is a sched_yield() statistic:
+ 1) # of times sched_yield() was called
+
+Next three are schedule() statistics:
+ 2) This field is a legacy array expiration count field used in the O(1)
+ scheduler. We kept it for ABI compatibility, but it is always set to zero.
+ 3) # of times schedule() was called
+ 4) # of times schedule() left the processor idle
+
+Next two are try_to_wake_up() statistics:
+ 5) # of times try_to_wake_up() was called
+ 6) # of times try_to_wake_up() was called to wake up the local cpu
+
+Next three are statistics describing scheduling latency:
+ 7) sum of all time spent running by tasks on this processor (in jiffies)
+ 8) sum of all time spent waiting to run by tasks on this processor (in
+ jiffies)
+ 9) # of timeslices run on this cpu
+
+
+Domain statistics
+-----------------
+One of these is produced per domain for each cpu described. (Note that if
+CONFIG_SMP is not defined, *no* domains are utilized and these lines
+will not appear in the output.)
+
+domain<N> <cpumask> 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
+
+The first field is a bit mask indicating what cpus this domain operates over.
+
+The next 24 are a variety of load_balance() statistics in grouped into types
+of idleness (idle, busy, and newly idle):
+
+ 1) # of times in this domain load_balance() was called when the
+ cpu was idle
+ 2) # of times in this domain load_balance() checked but found
+ the load did not require balancing when the cpu was idle
+ 3) # of times in this domain load_balance() tried to move one or
+ more tasks and failed, when the cpu was idle
+ 4) sum of imbalances discovered (if any) with each call to
+ load_balance() in this domain when the cpu was idle
+ 5) # of times in this domain pull_task() was called when the cpu
+ was idle
+ 6) # of times in this domain pull_task() was called even though
+ the target task was cache-hot when idle
+ 7) # of times in this domain load_balance() was called but did
+ not find a busier queue while the cpu was idle
+ 8) # of times in this domain a busier queue was found while the
+ cpu was idle but no busier group was found
+
+ 9) # of times in this domain load_balance() was called when the
+ cpu was busy
+ 10) # of times in this domain load_balance() checked but found the
+ load did not require balancing when busy
+ 11) # of times in this domain load_balance() tried to move one or
+ more tasks and failed, when the cpu was busy
+ 12) sum of imbalances discovered (if any) with each call to
+ load_balance() in this domain when the cpu was busy
+ 13) # of times in this domain pull_task() was called when busy
+ 14) # of times in this domain pull_task() was called even though the
+ target task was cache-hot when busy
+ 15) # of times in this domain load_balance() was called but did not
+ find a busier queue while the cpu was busy
+ 16) # of times in this domain a busier queue was found while the cpu
+ was busy but no busier group was found
+
+ 17) # of times in this domain load_balance() was called when the
+ cpu was just becoming idle
+ 18) # of times in this domain load_balance() checked but found the
+ load did not require balancing when the cpu was just becoming idle
+ 19) # of times in this domain load_balance() tried to move one or more
+ tasks and failed, when the cpu was just becoming idle
+ 20) sum of imbalances discovered (if any) with each call to
+ load_balance() in this domain when the cpu was just becoming idle
+ 21) # of times in this domain pull_task() was called when newly idle
+ 22) # of times in this domain pull_task() was called even though the
+ target task was cache-hot when just becoming idle
+ 23) # of times in this domain load_balance() was called but did not
+ find a busier queue while the cpu was just becoming idle
+ 24) # of times in this domain a busier queue was found while the cpu
+ was just becoming idle but no busier group was found
+
+ Next three are active_load_balance() statistics:
+ 25) # of times active_load_balance() was called
+ 26) # of times active_load_balance() tried to move a task and failed
+ 27) # of times active_load_balance() successfully moved a task
+
+ Next three are sched_balance_exec() statistics:
+ 28) sbe_cnt is not used
+ 29) sbe_balanced is not used
+ 30) sbe_pushed is not used
+
+ Next three are sched_balance_fork() statistics:
+ 31) sbf_cnt is not used
+ 32) sbf_balanced is not used
+ 33) sbf_pushed is not used
+
+ Next three are try_to_wake_up() statistics:
+ 34) # of times in this domain try_to_wake_up() awoke a task that
+ last ran on a different cpu in this domain
+ 35) # of times in this domain try_to_wake_up() moved a task to the
+ waking cpu because it was cache-cold on its own cpu anyway
+ 36) # of times in this domain try_to_wake_up() started passive balancing
+
+/proc/<pid>/schedstat
+----------------
+schedstats also adds a new /proc/<pid>/schedstat file to include some of
+the same information on a per-process level. There are three fields in
+this file correlating for that process to:
+ 1) time spent on the cpu
+ 2) time spent waiting on a runqueue
+ 3) # of timeslices run on this cpu
+
+A program could be easily written to make use of these extra fields to
+report on how well a particular process or set of processes is faring
+under the scheduler's policies. A simple version of such a program is
+available at
+ http://eaglet.rain.com/rick/linux/schedstat/v12/latency.c
diff --git a/Documentation/scsi/00-INDEX b/Documentation/scsi/00-INDEX
new file mode 100644
index 000000000..c4b978a72
--- /dev/null
+++ b/Documentation/scsi/00-INDEX
@@ -0,0 +1,110 @@
+00-INDEX
+ - this file
+53c700.txt
+ - info on driver for 53c700 based adapters
+BusLogic.txt
+ - info on driver for adapters with BusLogic chips
+ChangeLog.1992-1997
+ - Changes to scsi files, if not listed elsewhere
+ChangeLog.arcmsr
+ - Changes to driver for ARECA's SATA RAID controller cards
+ChangeLog.ips
+ - IBM ServeRAID driver Changelog
+ChangeLog.lpfc
+ - Changes to lpfc driver
+ChangeLog.megaraid
+ - Changes to LSI megaraid controller.
+ChangeLog.megaraid_sas
+ - Changes to serial attached scsi version of LSI megaraid controller.
+ChangeLog.ncr53c8xx
+ - Changes to ncr53c8xx driver
+ChangeLog.sym53c8xx
+ - Changes to sym53c8xx driver
+ChangeLog.sym53c8xx_2
+ - Changes to second generation of sym53c8xx driver
+FlashPoint.txt
+ - info on driver for BusLogic FlashPoint adapters
+LICENSE.FlashPoint
+ - Licence of the Flashpoint driver
+LICENSE.qla2xxx
+ - License for QLogic Linux Fibre Channel HBA Driver firmware.
+LICENSE.qla4xxx
+ - License for QLogic Linux iSCSI HBA Driver.
+Mylex.txt
+ - info on driver for Mylex adapters
+NinjaSCSI.txt
+ - info on WorkBiT NinjaSCSI-32/32Bi driver
+aacraid.txt
+ - Driver supporting Adaptec RAID controllers
+advansys.txt
+ - List of Advansys Host Adapters
+aha152x.txt
+ - info on driver for Adaptec AHA152x based adapters
+aic79xx.txt
+ - Adaptec Ultra320 SCSI host adapters
+aic7xxx.txt
+ - info on driver for Adaptec controllers
+arcmsr_spec.txt
+ - ARECA FIRMWARE SPEC (for IOP331 adapter)
+bfa.txt
+ - Brocade FC/FCOE adapter driver.
+bnx2fc.txt
+ - FCoE hardware offload for Broadcom network interfaces.
+cxgb3i.txt
+ - Chelsio iSCSI Linux Driver
+dc395x.txt
+ - README file for the dc395x SCSI driver
+dpti.txt
+ - info on driver for DPT SmartRAID and Adaptec I2O RAID based adapters
+dtc3x80.txt
+ - info on driver for DTC 2x80 based adapters
+g_NCR5380.txt
+ - info on driver for NCR5380 and NCR53c400 based adapters
+hpsa.txt
+ - HP Smart Array Controller SCSI driver.
+hptiop.txt
+ - HIGHPOINT ROCKETRAID 3xxx RAID DRIVER
+in2000.txt
+ - info on in2000 driver
+libsas.txt
+ - Serial Attached SCSI management layer.
+link_power_management_policy.txt
+ - Link power management options.
+lpfc.txt
+ - LPFC driver release notes
+megaraid.txt
+ - Common Management Module, shared code handling ioctls for LSI drivers
+ncr53c8xx.txt
+ - info on driver for NCR53c8xx based adapters
+osd.txt
+ Object-Based Storage Device, command set introduction.
+osst.txt
+ - info on driver for OnStream SC-x0 SCSI tape
+ppa.txt
+ - info on driver for IOmega zip drive
+qlogicfas.txt
+ - info on driver for QLogic FASxxx based adapters
+scsi-changer.txt
+ - README for the SCSI media changer driver
+scsi-generic.txt
+ - info on the sg driver for generic (non-disk/CD/tape) SCSI devices.
+scsi-parameters.txt
+ - List of SCSI-parameters to pass to the kernel at module load-time.
+scsi.txt
+ - short blurb on using SCSI support as a module.
+scsi_mid_low_api.txt
+ - info on API between SCSI layer and low level drivers
+scsi_eh.txt
+ - info on SCSI midlayer error handling infrastructure
+scsi_fc_transport.txt
+ - SCSI Fiber Channel Tansport
+st.txt
+ - info on scsi tape driver
+sym53c500_cs.txt
+ - info on PCMCIA driver for Symbios Logic 53c500 based adapters
+sym53c8xx_2.txt
+ - info on second generation driver for sym53c8xx based adapters
+tmscsim.txt
+ - info on driver for AM53c974 based adapters
+ufs.txt
+ - info on Universal Flash Storage(UFS) and UFS host controller driver.
diff --git a/Documentation/scsi/53c700.txt b/Documentation/scsi/53c700.txt
new file mode 100644
index 000000000..e31aceb6d
--- /dev/null
+++ b/Documentation/scsi/53c700.txt
@@ -0,0 +1,135 @@
+General Description
+===================
+
+This driver supports the 53c700 and 53c700-66 chips. It also supports
+the 53c710 but only in 53c700 emulation mode. It is full featured and
+does sync (-66 and 710 only), disconnects and tag command queueing.
+
+Since the 53c700 must be interfaced to a bus, you need to wrapper the
+card detector around this driver. For an example, see the
+NCR_D700.[ch] or lasi700.[ch] files.
+
+The comments in the 53c700.[ch] files tell you which parts you need to
+fill in to get the driver working.
+
+
+Compile Time Flags
+==================
+
+A compile time flag is:
+
+CONFIG_53C700_LE_ON_BE
+
+define if the chipset must be supported in little endian mode on a big
+endian architecture (used for the 700 on parisc).
+
+
+Using the Chip Core Driver
+==========================
+
+In order to plumb the 53c700 chip core driver into a working SCSI
+driver, you need to know three things about the way the chip is wired
+into your system (or expansion card).
+
+1. The clock speed of the SCSI core
+2. The interrupt line used
+3. The memory (or io space) location of the 53c700 registers.
+
+Optionally, you may also need to know other things, like how to read
+the SCSI Id from the card bios or whether the chip is wired for
+differential operation.
+
+Usually you can find items 2. and 3. from general spec. documents or
+even by examining the configuration of a working driver under another
+operating system.
+
+The clock speed is usually buried deep in the technical literature.
+It is required because it is used to set up both the synchronous and
+asynchronous dividers for the chip. As a general rule of thumb,
+manufacturers set the clock speed at the lowest possible setting
+consistent with the best operation of the chip (although some choose
+to drive it off the CPU or bus clock rather than going to the expense
+of an extra clock chip). The best operation clock speeds are:
+
+53c700 - 25MHz
+53c700-66 - 50MHz
+53c710 - 40Mhz
+
+Writing Your Glue Driver
+========================
+
+This will be a standard SCSI driver (I don't know of a good document
+describing this, just copy from some other driver) with at least a
+detect and release entry.
+
+In the detect routine, you need to allocate a struct
+NCR_700_Host_Parameters sized memory area and clear it (so that the
+default values for everything are 0). Then you must fill in the
+parameters that matter to you (see below), plumb the NCR_700_intr
+routine into the interrupt line and call NCR_700_detect with the host
+template and the new parameters as arguments. You should also call
+the relevant request_*_region function and place the register base
+address into the `base' pointer of the host parameters.
+
+In the release routine, you must free the NCR_700_Host_Parameters that
+you allocated, call the corresponding release_*_region and free the
+interrupt.
+
+Handling Interrupts
+-------------------
+
+In general, you should just plumb the card's interrupt line in with
+
+request_irq(irq, NCR_700_intr, <irq flags>, <driver name>, host);
+
+where host is the return from the relevant NCR_700_detect() routine.
+
+You may also write your own interrupt handling routine which calls
+NCR_700_intr() directly. However, you should only really do this if
+you have a card with more than one chip on it and you can read a
+register to tell which set of chips wants the interrupt.
+
+Settable NCR_700_Host_Parameters
+--------------------------------
+
+The following are a list of the user settable parameters:
+
+clock: (MANDATORY)
+
+Set to the clock speed of the chip in MHz.
+
+base: (MANDATORY)
+
+set to the base of the io or mem region for the register set. On 64
+bit architectures this is only 32 bits wide, so the registers must be
+mapped into the low 32 bits of memory.
+
+pci_dev: (OPTIONAL)
+
+set to the PCI board device. Leave NULL for a non-pci board. This is
+used for the pci_alloc_consistent() and pci_map_*() functions.
+
+dmode_extra: (OPTIONAL, 53c710 only)
+
+extra flags for the DMODE register. These are used to control bus
+output pins on the 710. The settings should be a combination of
+DMODE_FC1 and DMODE_FC2. What these pins actually do is entirely up
+to the board designer. Usually it is safe to ignore this setting.
+
+differential: (OPTIONAL)
+
+set to 1 if the chip drives a differential bus.
+
+force_le_on_be: (OPTIONAL, only if CONFIG_53C700_LE_ON_BE is set)
+
+set to 1 if the chip is operating in little endian mode on a big
+endian architecture.
+
+chip710: (OPTIONAL)
+
+set to 1 if the chip is a 53c710.
+
+burst_disable: (OPTIONAL, 53c710 only)
+
+disable 8 byte bursting for DMA transfers.
+
diff --git a/Documentation/scsi/BusLogic.txt b/Documentation/scsi/BusLogic.txt
new file mode 100644
index 000000000..48e982cd6
--- /dev/null
+++ b/Documentation/scsi/BusLogic.txt
@@ -0,0 +1,566 @@
+ BusLogic MultiMaster and FlashPoint SCSI Driver for Linux
+
+ Version 2.0.15 for Linux 2.0
+ Version 2.1.15 for Linux 2.1
+
+ PRODUCTION RELEASE
+
+ 17 August 1998
+
+ Leonard N. Zubkoff
+ Dandelion Digital
+ lnz@dandelion.com
+
+ Copyright 1995-1998 by Leonard N. Zubkoff <lnz@dandelion.com>
+
+
+ INTRODUCTION
+
+BusLogic, Inc. designed and manufactured a variety of high performance SCSI
+host adapters which share a common programming interface across a diverse
+collection of bus architectures by virtue of their MultiMaster ASIC technology.
+BusLogic was acquired by Mylex Corporation in February 1996, but the products
+supported by this driver originated under the BusLogic name and so that name is
+retained in the source code and documentation.
+
+This driver supports all present BusLogic MultiMaster Host Adapters, and should
+support any future MultiMaster designs with little or no modification. More
+recently, BusLogic introduced the FlashPoint Host Adapters, which are less
+costly and rely on the host CPU, rather than including an onboard processor.
+Despite not having an onboard CPU, the FlashPoint Host Adapters perform very
+well and have very low command latency. BusLogic has recently provided me with
+the FlashPoint Driver Developer's Kit, which comprises documentation and freely
+redistributable source code for the FlashPoint SCCB Manager. The SCCB Manager
+is the library of code that runs on the host CPU and performs functions
+analogous to the firmware on the MultiMaster Host Adapters. Thanks to their
+having provided the SCCB Manager, this driver now supports the FlashPoint Host
+Adapters as well.
+
+My primary goals in writing this completely new BusLogic driver for Linux are
+to achieve the full performance that BusLogic SCSI Host Adapters and modern
+SCSI peripherals are capable of, and to provide a highly robust driver that can
+be depended upon for high performance mission critical applications. All of
+the major performance features can be configured from the Linux kernel command
+line or at module initialization time, allowing individual installations to
+tune driver performance and error recovery to their particular needs.
+
+The latest information on Linux support for BusLogic SCSI Host Adapters, as
+well as the most recent release of this driver and the latest firmware for the
+BT-948/958/958D, will always be available from my Linux Home Page at URL
+"http://sourceforge.net/projects/dandelion/".
+
+Bug reports should be sent via electronic mail to "lnz@dandelion.com". Please
+include with the bug report the complete configuration messages reported by the
+driver and SCSI subsystem at startup, along with any subsequent system messages
+relevant to SCSI operations, and a detailed description of your system's
+hardware configuration.
+
+Mylex has been an excellent company to work with and I highly recommend their
+products to the Linux community. In November 1995, I was offered the
+opportunity to become a beta test site for their latest MultiMaster product,
+the BT-948 PCI Ultra SCSI Host Adapter, and then again for the BT-958 PCI Wide
+Ultra SCSI Host Adapter in January 1996. This was mutually beneficial since
+Mylex received a degree and kind of testing that their own testing group cannot
+readily achieve, and the Linux community has available high performance host
+adapters that have been well tested with Linux even before being brought to
+market. This relationship has also given me the opportunity to interact
+directly with their technical staff, to understand more about the internal
+workings of their products, and in turn to educate them about the needs and
+potential of the Linux community.
+
+More recently, Mylex has reaffirmed the company's interest in supporting the
+Linux community, and I am now working on a Linux driver for the DAC960 PCI RAID
+Controllers. Mylex's interest and support is greatly appreciated.
+
+Unlike some other vendors, if you contact Mylex Technical Support with a
+problem and are running Linux, they will not tell you that your use of their
+products is unsupported. Their latest product marketing literature even states
+"Mylex SCSI host adapters are compatible with all major operating systems
+including: ... Linux ...".
+
+Mylex Corporation is located at 34551 Ardenwood Blvd., Fremont, California
+94555, USA and can be reached at 510/796-6100 or on the World Wide Web at
+http://www.mylex.com. Mylex HBA Technical Support can be reached by electronic
+mail at techsup@mylex.com, by Voice at 510/608-2400, or by FAX at 510/745-7715.
+Contact information for offices in Europe and Japan is available on the Web
+site.
+
+
+ DRIVER FEATURES
+
+o Configuration Reporting and Testing
+
+ During system initialization, the driver reports extensively on the host
+ adapter hardware configuration, including the synchronous transfer parameters
+ requested and negotiated with each target device. AutoSCSI settings for
+ Synchronous Negotiation, Wide Negotiation, and Disconnect/Reconnect are
+ reported for each target device, as well as the status of Tagged Queuing.
+ If the same setting is in effect for all target devices, then a single word
+ or phrase is used; otherwise, a letter is provided for each target device to
+ indicate the individual status. The following examples
+ should clarify this reporting format:
+
+ Synchronous Negotiation: Ultra
+
+ Synchronous negotiation is enabled for all target devices and the host
+ adapter will attempt to negotiate for 20.0 mega-transfers/second.
+
+ Synchronous Negotiation: Fast
+
+ Synchronous negotiation is enabled for all target devices and the host
+ adapter will attempt to negotiate for 10.0 mega-transfers/second.
+
+ Synchronous Negotiation: Slow
+
+ Synchronous negotiation is enabled for all target devices and the host
+ adapter will attempt to negotiate for 5.0 mega-transfers/second.
+
+ Synchronous Negotiation: Disabled
+
+ Synchronous negotiation is disabled and all target devices are limited to
+ asynchronous operation.
+
+ Synchronous Negotiation: UFSNUUU#UUUUUUUU
+
+ Synchronous negotiation to Ultra speed is enabled for target devices 0
+ and 4 through 15, to Fast speed for target device 1, to Slow speed for
+ target device 2, and is not permitted to target device 3. The host
+ adapter's SCSI ID is represented by the "#".
+
+ The status of Wide Negotiation, Disconnect/Reconnect, and Tagged Queuing
+ are reported as "Enabled", Disabled", or a sequence of "Y" and "N" letters.
+
+o Performance Features
+
+ BusLogic SCSI Host Adapters directly implement SCSI-2 Tagged Queuing, and so
+ support has been included in the driver to utilize tagged queuing with any
+ target devices that report having the tagged queuing capability. Tagged
+ queuing allows for multiple outstanding commands to be issued to each target
+ device or logical unit, and can improve I/O performance substantially. In
+ addition, BusLogic's Strict Round Robin Mode is used to optimize host adapter
+ performance, and scatter/gather I/O can support as many segments as can be
+ effectively utilized by the Linux I/O subsystem. Control over the use of
+ tagged queuing for each target device as well as individual selection of the
+ tagged queue depth is available through driver options provided on the kernel
+ command line or at module initialization time. By default, the queue depth
+ is determined automatically based on the host adapter's total queue depth and
+ the number, type, speed, and capabilities of the target devices found. In
+ addition, tagged queuing is automatically disabled whenever the host adapter
+ firmware version is known not to implement it correctly, or whenever a tagged
+ queue depth of 1 is selected. Tagged queuing is also disabled for individual
+ target devices if disconnect/reconnect is disabled for that device.
+
+o Robustness Features
+
+ The driver implements extensive error recovery procedures. When the higher
+ level parts of the SCSI subsystem request that a timed out command be reset,
+ a selection is made between a full host adapter hard reset and SCSI bus reset
+ versus sending a bus device reset message to the individual target device
+ based on the recommendation of the SCSI subsystem. Error recovery strategies
+ are selectable through driver options individually for each target device,
+ and also include sending a bus device reset to the specific target device
+ associated with the command being reset, as well as suppressing error
+ recovery entirely to avoid perturbing an improperly functioning device. If
+ the bus device reset error recovery strategy is selected and sending a bus
+ device reset does not restore correct operation, the next command that is
+ reset will force a full host adapter hard reset and SCSI bus reset. SCSI bus
+ resets caused by other devices and detected by the host adapter are also
+ handled by issuing a soft reset to the host adapter and re-initialization.
+ Finally, if tagged queuing is active and more than one command reset occurs
+ in a 10 minute interval, or if a command reset occurs within the first 10
+ minutes of operation, then tagged queuing will be disabled for that target
+ device. These error recovery options improve overall system robustness by
+ preventing individual errant devices from causing the system as a whole to
+ lock up or crash, and thereby allowing a clean shutdown and restart after the
+ offending component is removed.
+
+o PCI Configuration Support
+
+ On PCI systems running kernels compiled with PCI BIOS support enabled, this
+ driver will interrogate the PCI configuration space and use the I/O port
+ addresses assigned by the system BIOS, rather than the ISA compatible I/O
+ port addresses. The ISA compatible I/O port address is then disabled by the
+ driver. On PCI systems it is also recommended that the AutoSCSI utility be
+ used to disable the ISA compatible I/O port entirely as it is not necessary.
+ The ISA compatible I/O port is disabled by default on the BT-948/958/958D.
+
+o /proc File System Support
+
+ Copies of the host adapter configuration information together with updated
+ data transfer and error recovery statistics are available through the
+ /proc/scsi/BusLogic/<N> interface.
+
+o Shared Interrupts Support
+
+ On systems that support shared interrupts, any number of BusLogic Host
+ Adapters may share the same interrupt request channel.
+
+
+ SUPPORTED HOST ADAPTERS
+
+The following list comprises the supported BusLogic SCSI Host Adapters as of
+the date of this document. It is recommended that anyone purchasing a BusLogic
+Host Adapter not in the following table contact the author beforehand to verify
+that it is or will be supported.
+
+FlashPoint Series PCI Host Adapters:
+
+FlashPoint LT (BT-930) Ultra SCSI-3
+FlashPoint LT (BT-930R) Ultra SCSI-3 with RAIDPlus
+FlashPoint LT (BT-920) Ultra SCSI-3 (BT-930 without BIOS)
+FlashPoint DL (BT-932) Dual Channel Ultra SCSI-3
+FlashPoint DL (BT-932R) Dual Channel Ultra SCSI-3 with RAIDPlus
+FlashPoint LW (BT-950) Wide Ultra SCSI-3
+FlashPoint LW (BT-950R) Wide Ultra SCSI-3 with RAIDPlus
+FlashPoint DW (BT-952) Dual Channel Wide Ultra SCSI-3
+FlashPoint DW (BT-952R) Dual Channel Wide Ultra SCSI-3 with RAIDPlus
+
+MultiMaster "W" Series Host Adapters:
+
+BT-948 PCI Ultra SCSI-3
+BT-958 PCI Wide Ultra SCSI-3
+BT-958D PCI Wide Differential Ultra SCSI-3
+
+MultiMaster "C" Series Host Adapters:
+
+BT-946C PCI Fast SCSI-2
+BT-956C PCI Wide Fast SCSI-2
+BT-956CD PCI Wide Differential Fast SCSI-2
+BT-445C VLB Fast SCSI-2
+BT-747C EISA Fast SCSI-2
+BT-757C EISA Wide Fast SCSI-2
+BT-757CD EISA Wide Differential Fast SCSI-2
+BT-545C ISA Fast SCSI-2
+BT-540CF ISA Fast SCSI-2
+
+MultiMaster "S" Series Host Adapters:
+
+BT-445S VLB Fast SCSI-2
+BT-747S EISA Fast SCSI-2
+BT-747D EISA Differential Fast SCSI-2
+BT-757S EISA Wide Fast SCSI-2
+BT-757D EISA Wide Differential Fast SCSI-2
+BT-545S ISA Fast SCSI-2
+BT-542D ISA Differential Fast SCSI-2
+BT-742A EISA SCSI-2 (742A revision H)
+BT-542B ISA SCSI-2 (542B revision H)
+
+MultiMaster "A" Series Host Adapters:
+
+BT-742A EISA SCSI-2 (742A revisions A - G)
+BT-542B ISA SCSI-2 (542B revisions A - G)
+
+AMI FastDisk Host Adapters that are true BusLogic MultiMaster clones are also
+supported by this driver.
+
+BusLogic SCSI Host Adapters are available packaged both as bare boards and as
+retail kits. The BT- model numbers above refer to the bare board packaging.
+The retail kit model numbers are found by replacing BT- with KT- in the above
+list. The retail kit includes the bare board and manual as well as cabling and
+driver media and documentation that are not provided with bare boards.
+
+
+ FLASHPOINT INSTALLATION NOTES
+
+o RAIDPlus Support
+
+ FlashPoint Host Adapters now include RAIDPlus, Mylex's bootable software
+ RAID. RAIDPlus is not supported on Linux, and there are no plans to support
+ it. The MD driver in Linux 2.0 provides for concatenation (LINEAR) and
+ striping (RAID-0), and support for mirroring (RAID-1), fixed parity (RAID-4),
+ and distributed parity (RAID-5) is available separately. The built-in Linux
+ RAID support is generally more flexible and is expected to perform better
+ than RAIDPlus, so there is little impetus to include RAIDPlus support in the
+ BusLogic driver.
+
+o Enabling UltraSCSI Transfers
+
+ FlashPoint Host Adapters ship with their configuration set to "Factory
+ Default" settings that are conservative and do not allow for UltraSCSI speed
+ to be negotiated. This results in fewer problems when these host adapters
+ are installed in systems with cabling or termination that is not sufficient
+ for UltraSCSI operation, or where existing SCSI devices do not properly
+ respond to synchronous transfer negotiation for UltraSCSI speed. AutoSCSI
+ may be used to load "Optimum Performance" settings which allow UltraSCSI
+ speed to be negotiated with all devices, or UltraSCSI speed can be enabled on
+ an individual basis. It is recommended that SCAM be manually disabled after
+ the "Optimum Performance" settings are loaded.
+
+
+ BT-948/958/958D INSTALLATION NOTES
+
+The BT-948/958/958D PCI Ultra SCSI Host Adapters have some features which may
+require attention in some circumstances when installing Linux.
+
+o PCI I/O Port Assignments
+
+ When configured to factory default settings, the BT-948/958/958D will only
+ recognize the PCI I/O port assignments made by the motherboard's PCI BIOS.
+ The BT-948/958/958D will not respond to any of the ISA compatible I/O ports
+ that previous BusLogic SCSI Host Adapters respond to. This driver supports
+ the PCI I/O port assignments, so this is the preferred configuration.
+ However, if the obsolete BusLogic driver must be used for any reason, such as
+ a Linux distribution that does not yet use this driver in its boot kernel,
+ BusLogic has provided an AutoSCSI configuration option to enable a legacy ISA
+ compatible I/O port.
+
+ To enable this backward compatibility option, invoke the AutoSCSI utility via
+ Ctrl-B at system startup and select "Adapter Configuration", "View/Modify
+ Configuration", and then change the "ISA Compatible Port" setting from
+ "Disable" to "Primary" or "Alternate". Once this driver has been installed,
+ the "ISA Compatible Port" option should be set back to "Disable" to avoid
+ possible future I/O port conflicts. The older BT-946C/956C/956CD also have
+ this configuration option, but the factory default setting is "Primary".
+
+o PCI Slot Scanning Order
+
+ In systems with multiple BusLogic PCI Host Adapters, the order in which the
+ PCI slots are scanned may appear reversed with the BT-948/958/958D as
+ compared to the BT-946C/956C/956CD. For booting from a SCSI disk to work
+ correctly, it is necessary that the host adapter's BIOS and the kernel agree
+ on which disk is the boot device, which requires that they recognize the PCI
+ host adapters in the same order. The motherboard's PCI BIOS provides a
+ standard way of enumerating the PCI host adapters, which is used by the Linux
+ kernel. Some PCI BIOS implementations enumerate the PCI slots in order of
+ increasing bus number and device number, while others do so in the opposite
+ direction.
+
+ Unfortunately, Microsoft decided that Windows 95 would always enumerate the
+ PCI slots in order of increasing bus number and device number regardless of
+ the PCI BIOS enumeration, and requires that their scheme be supported by the
+ host adapter's BIOS to receive Windows 95 certification. Therefore, the
+ factory default settings of the BT-948/958/958D enumerate the host adapters
+ by increasing bus number and device number. To disable this feature, invoke
+ the AutoSCSI utility via Ctrl-B at system startup and select "Adapter
+ Configuration", "View/Modify Configuration", press Ctrl-F10, and then change
+ the "Use Bus And Device # For PCI Scanning Seq." option to OFF.
+
+ This driver will interrogate the setting of the PCI Scanning Sequence option
+ so as to recognize the host adapters in the same order as they are enumerated
+ by the host adapter's BIOS.
+
+o Enabling UltraSCSI Transfers
+
+ The BT-948/958/958D ship with their configuration set to "Factory Default"
+ settings that are conservative and do not allow for UltraSCSI speed to be
+ negotiated. This results in fewer problems when these host adapters are
+ installed in systems with cabling or termination that is not sufficient for
+ UltraSCSI operation, or where existing SCSI devices do not properly respond
+ to synchronous transfer negotiation for UltraSCSI speed. AutoSCSI may be
+ used to load "Optimum Performance" settings which allow UltraSCSI speed to be
+ negotiated with all devices, or UltraSCSI speed can be enabled on an
+ individual basis. It is recommended that SCAM be manually disabled after the
+ "Optimum Performance" settings are loaded.
+
+
+ DRIVER OPTIONS
+
+BusLogic Driver Options may be specified either via the Linux Kernel Command
+Line or via the Loadable Kernel Module Installation Facility. Driver Options
+for multiple host adapters may be specified either by separating the option
+strings by a semicolon, or by specifying multiple "BusLogic=" strings on the
+command line. Individual option specifications for a single host adapter are
+separated by commas. The Probing and Debugging Options apply to all host
+adapters whereas the remaining options apply individually only to the
+selected host adapter.
+
+The BusLogic Driver Probing Options comprise the following:
+
+IO:<integer>
+
+ The "IO:" option specifies an ISA I/O Address to be probed for a non-PCI
+ MultiMaster Host Adapter. If neither "IO:" nor "NoProbeISA" options are
+ specified, then the standard list of BusLogic MultiMaster ISA I/O Addresses
+ will be probed (0x330, 0x334, 0x230, 0x234, 0x130, and 0x134). Multiple
+ "IO:" options may be specified to precisely determine the I/O Addresses to
+ be probed, but the probe order will always follow the standard list.
+
+NoProbe
+
+ The "NoProbe" option disables all probing and therefore no BusLogic Host
+ Adapters will be detected.
+
+NoProbeISA
+
+ The "NoProbeISA" option disables probing of the standard BusLogic ISA I/O
+ Addresses and therefore only PCI MultiMaster and FlashPoint Host Adapters
+ will be detected.
+
+NoProbePCI
+
+ The "NoProbePCI" options disables the interrogation of PCI Configuration
+ Space and therefore only ISA Multimaster Host Adapters will be detected, as
+ well as PCI Multimaster Host Adapters that have their ISA Compatible I/O
+ Port set to "Primary" or "Alternate".
+
+NoSortPCI
+
+ The "NoSortPCI" option forces PCI MultiMaster Host Adapters to be
+ enumerated in the order provided by the PCI BIOS, ignoring any setting of
+ the AutoSCSI "Use Bus And Device # For PCI Scanning Seq." option.
+
+MultiMasterFirst
+
+ The "MultiMasterFirst" option forces MultiMaster Host Adapters to be probed
+ before FlashPoint Host Adapters. By default, if both FlashPoint and PCI
+ MultiMaster Host Adapters are present, this driver will probe for
+ FlashPoint Host Adapters first unless the BIOS primary disk is controlled
+ by the first PCI MultiMaster Host Adapter, in which case MultiMaster Host
+ Adapters will be probed first.
+
+FlashPointFirst
+
+ The "FlashPointFirst" option forces FlashPoint Host Adapters to be probed
+ before MultiMaster Host Adapters.
+
+The BusLogic Driver Tagged Queuing Options allow for explicitly specifying
+the Queue Depth and whether Tagged Queuing is permitted for each Target
+Device (assuming that the Target Device supports Tagged Queuing). The Queue
+Depth is the number of SCSI Commands that are allowed to be concurrently
+presented for execution (either to the Host Adapter or Target Device). Note
+that explicitly enabling Tagged Queuing may lead to problems; the option to
+enable or disable Tagged Queuing is provided primarily to allow disabling
+Tagged Queuing on Target Devices that do not implement it correctly. The
+following options are available:
+
+QueueDepth:<integer>
+
+ The "QueueDepth:" or QD:" option specifies the Queue Depth to use for all
+ Target Devices that support Tagged Queuing, as well as the maximum Queue
+ Depth for devices that do not support Tagged Queuing. If no Queue Depth
+ option is provided, the Queue Depth will be determined automatically based
+ on the Host Adapter's Total Queue Depth and the number, type, speed, and
+ capabilities of the detected Target Devices. For Host Adapters that
+ require ISA Bounce Buffers, the Queue Depth is automatically set by default
+ to BusLogic_TaggedQueueDepthBB or BusLogic_UntaggedQueueDepthBB to avoid
+ excessive preallocation of DMA Bounce Buffer memory. Target Devices that
+ do not support Tagged Queuing always have their Queue Depth set to
+ BusLogic_UntaggedQueueDepth or BusLogic_UntaggedQueueDepthBB, unless a
+ lower Queue Depth option is provided. A Queue Depth of 1 automatically
+ disables Tagged Queuing.
+
+QueueDepth:[<integer>,<integer>...]
+
+ The "QueueDepth:[...]" or "QD:[...]" option specifies the Queue Depth
+ individually for each Target Device. If an <integer> is omitted, the
+ associated Target Device will have its Queue Depth selected automatically.
+
+TaggedQueuing:Default
+
+ The "TaggedQueuing:Default" or "TQ:Default" option permits Tagged Queuing
+ based on the firmware version of the BusLogic Host Adapter and based on
+ whether the Queue Depth allows queuing multiple commands.
+
+TaggedQueuing:Enable
+
+ The "TaggedQueuing:Enable" or "TQ:Enable" option enables Tagged Queuing for
+ all Target Devices on this Host Adapter, overriding any limitation that
+ would otherwise be imposed based on the Host Adapter firmware version.
+
+TaggedQueuing:Disable
+
+ The "TaggedQueuing:Disable" or "TQ:Disable" option disables Tagged Queuing
+ for all Target Devices on this Host Adapter.
+
+TaggedQueuing:<Target-Spec>
+
+ The "TaggedQueuing:<Target-Spec>" or "TQ:<Target-Spec>" option controls
+ Tagged Queuing individually for each Target Device. <Target-Spec> is a
+ sequence of "Y", "N", and "X" characters. "Y" enables Tagged Queuing, "N"
+ disables Tagged Queuing, and "X" accepts the default based on the firmware
+ version. The first character refers to Target Device 0, the second to
+ Target Device 1, and so on; if the sequence of "Y", "N", and "X" characters
+ does not cover all the Target Devices, unspecified characters are assumed
+ to be "X".
+
+The BusLogic Driver Miscellaneous Options comprise the following:
+
+BusSettleTime:<seconds>
+
+ The "BusSettleTime:" or "BST:" option specifies the Bus Settle Time in
+ seconds. The Bus Settle Time is the amount of time to wait between a Host
+ Adapter Hard Reset which initiates a SCSI Bus Reset and issuing any SCSI
+ Commands. If unspecified, it defaults to BusLogic_DefaultBusSettleTime.
+
+InhibitTargetInquiry
+
+ The "InhibitTargetInquiry" option inhibits the execution of an Inquire
+ Target Devices or Inquire Installed Devices command on MultiMaster Host
+ Adapters. This may be necessary with some older Target Devices that do not
+ respond correctly when Logical Units above 0 are addressed.
+
+The BusLogic Driver Debugging Options comprise the following:
+
+TraceProbe
+
+ The "TraceProbe" option enables tracing of Host Adapter Probing.
+
+TraceHardwareReset
+
+ The "TraceHardwareReset" option enables tracing of Host Adapter Hardware
+ Reset.
+
+TraceConfiguration
+
+ The "TraceConfiguration" option enables tracing of Host Adapter
+ Configuration.
+
+TraceErrors
+
+ The "TraceErrors" option enables tracing of SCSI Commands that return an
+ error from the Target Device. The CDB and Sense Data will be printed for
+ each SCSI Command that fails.
+
+Debug
+
+ The "Debug" option enables all debugging options.
+
+The following examples demonstrate setting the Queue Depth for Target Devices
+1 and 2 on the first host adapter to 7 and 15, the Queue Depth for all Target
+Devices on the second host adapter to 31, and the Bus Settle Time on the
+second host adapter to 30 seconds.
+
+Linux Kernel Command Line:
+
+ linux BusLogic=QueueDepth:[,7,15];QueueDepth:31,BusSettleTime:30
+
+LILO Linux Boot Loader (in /etc/lilo.conf):
+
+ append = "BusLogic=QueueDepth:[,7,15];QueueDepth:31,BusSettleTime:30"
+
+INSMOD Loadable Kernel Module Installation Facility:
+
+ insmod BusLogic.o \
+ 'BusLogic="QueueDepth:[,7,15];QueueDepth:31,BusSettleTime:30"'
+
+NOTE: Module Utilities 2.1.71 or later is required for correct parsing
+ of driver options containing commas.
+
+
+ DRIVER INSTALLATION
+
+This distribution was prepared for Linux kernel version 2.0.35, but should be
+compatible with 2.0.4 or any later 2.0 series kernel.
+
+To install the new BusLogic SCSI driver, you may use the following commands,
+replacing "/usr/src" with wherever you keep your Linux kernel source tree:
+
+ cd /usr/src
+ tar -xvzf BusLogic-2.0.15.tar.gz
+ mv README.* LICENSE.* BusLogic.[ch] FlashPoint.c linux/drivers/scsi
+ patch -p0 < BusLogic.patch (only for 2.0.33 and below)
+ cd linux
+ make config
+ make zImage
+
+Then install "arch/x86/boot/zImage" as your standard kernel, run lilo if
+appropriate, and reboot.
+
+
+ BUSLOGIC ANNOUNCEMENTS MAILING LIST
+
+The BusLogic Announcements Mailing List provides a forum for informing Linux
+users of new driver releases and other announcements regarding Linux support
+for BusLogic SCSI Host Adapters. To join the mailing list, send a message to
+"buslogic-announce-request@dandelion.com" with the line "subscribe" in the
+message body.
diff --git a/Documentation/scsi/ChangeLog.1992-1997 b/Documentation/scsi/ChangeLog.1992-1997
new file mode 100644
index 000000000..6faad7e64
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.1992-1997
@@ -0,0 +1,2023 @@
+Sat Jan 18 15:51:45 1997 Richard Henderson <rth@tamu.edu>
+
+ * Don't play with usage_count directly, instead hand around
+ the module header and use the module macros.
+
+Fri May 17 00:00:00 1996 Leonard N. Zubkoff <lnz@dandelion.com>
+
+ * BusLogic Driver Version 2.0.3 Released.
+
+Tue Apr 16 21:00:00 1996 Leonard N. Zubkoff <lnz@dandelion.com>
+
+ * BusLogic Driver Version 1.3.2 Released.
+
+Sun Dec 31 23:26:00 1995 Leonard N. Zubkoff <lnz@dandelion.com>
+
+ * BusLogic Driver Version 1.3.1 Released.
+
+Fri Nov 10 15:29:49 1995 Leonard N. Zubkoff <lnz@dandelion.com>
+
+ * Released new BusLogic driver.
+
+Wed Aug 9 22:37:04 1995 Andries Brouwer <aeb@cwi.nl>
+
+ As a preparation for new device code, separated the various
+ functions the request->dev field had into the device proper,
+ request->rq_dev and a status field request->rq_status.
+
+ The 2nd argument of bios_param is now a kdev_t.
+
+Wed Jul 19 10:43:15 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * scsi.c (scsi_proc_info): /proc/scsi/scsi now also lists all
+ attached devices.
+
+ * scsi_proc.c (proc_print_scsidevice): Added. Used by scsi.c and
+ eata_dma_proc.c to produce some device info for /proc/scsi.
+
+ * eata_dma.c (eata_queue)(eata_int_handler)(eata_scsi_done):
+ Changed handling of internal SCSI commands send to the HBA.
+
+
+Wed Jul 19 10:09:17 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * Linux 1.3.11 released.
+
+ * eata_dma.c (eata_queue)(eata_int_handler): Added code to do
+ command latency measurements if requested by root through
+ /proc/scsi interface.
+ Throughout Use HZ constant for time references.
+
+ * eata_pio.c: Use HZ constant for time references.
+
+ * aic7xxx.c, aic7xxx.h, aic7xxx_asm.c: Changed copyright from BSD
+ to GNU style.
+
+ * scsi.h: Added READ_12 command opcode constant
+
+Wed Jul 19 09:25:30 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * Linux 1.3.10 released.
+
+ * scsi_proc.c (dispatch_scsi_info): Removed unused variable.
+
+Wed Jul 19 09:25:30 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * Linux 1.3.9 released.
+
+ * scsi.c Blacklist concept expanded to 'support' more device
+ deficiencies. blacklist[] renamed to device_list[]
+ (scan_scsis): Code cleanup.
+
+ * scsi_debug.c (scsi_debug_proc_info): Added support to control
+ device lockup simulation via /proc/scsi interface.
+
+
+Wed Jul 19 09:22:34 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * Linux 1.3.7 released.
+
+ * scsi_proc.c: Fixed a number of bugs in directory handling
+
+Wed Jul 19 09:18:28 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * Linux 1.3.5 released.
+
+ * Native wide, multichannel and /proc/scsi support now in official
+ kernel distribution.
+
+ * scsi.c/h, hosts.c/h et al reindented to increase readability
+ (especially on 80 column wide terminals).
+
+ * scsi.c, scsi_proc.c, ../../fs/proc/inode.c: Added
+ /proc/scsi/scsi which allows root to scan for hotplugged devices.
+
+ * scsi.c (scsi_proc_info): Added, to support /proc/scsi/scsi.
+ (scan_scsis): Added some 'spaghetti' code to allow scanning for
+ single devices.
+
+
+Thu Jun 20 15:20:27 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * proc.c: Renamed to scsi_proc.c
+
+Mon Jun 12 20:32:45 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * Linux 1.3.0 released.
+
+Mon May 15 19:33:14 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * scsi.c: Added native multichannel and wide scsi support.
+
+ * proc.c (dispatch_scsi_info) (build_proc_dir_hba_entries):
+ Updated /proc/scsi interface.
+
+Thu May 4 17:58:48 1995 Michael Neuffer <neuffer@goofy.zdv.uni-mainz.de>
+
+ * sd.c (requeue_sd_request): Zero out the scatterlist only if
+ scsi_malloc returned memory for it.
+
+ * eata_dma.c (register_HBA) (eata_queue): Add support for
+ large scatter/gather tables and set use_clustering accordingly
+
+ * hosts.c: Make use_clustering changeable in the Scsi_Host structure.
+
+Wed Apr 12 15:25:52 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.2.5 released.
+
+ * buslogic.c: Update to version 1.15 (From Leonard N. Zubkoff).
+ Fixed interrupt routine to avoid races when handling multiple
+ complete commands per interrupt. Seems to come up with faster
+ cards.
+
+ * eata_dma.c: Update to 2.3.5r. Modularize. Improved error handling
+ throughout and fixed bug interrupt routine which resulted in shifted
+ status bytes. Added blink LED state checks for ISA and EISA HBAs.
+ Memory management bug seems to have disappeared ==> increasing
+ C_P_L_CURRENT_MAX to 16 for now. Decreasing C_P_L_DIV to 3 for
+ performance reasons.
+
+ * scsi.c: If we get a FMK, EOM, or ILI when attempting to scan
+ the bus, assume that it was just noise on the bus, and ignore
+ the device.
+
+ * scsi.h: Update and add a bunch of missing commands which we
+ were never using.
+
+ * sd.c: Use restore_flags in do_sd_request - this may result in
+ latency conditions, but it gets rid of races and crashes.
+ Do not save flags again when searching for a second command to
+ queue.
+
+ * st.c: Use bytes, not STP->buffer->buffer_size when reading
+ from tape.
+
+
+Tue Apr 4 09:42:08 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.2.4 released.
+
+ * st.c: Fix typo - restoring wrong flags.
+
+Wed Mar 29 06:55:12 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.2.3 released.
+
+ * st.c: Perform some waiting operations with interrupts off.
+ Is this correct???
+
+Wed Mar 22 10:34:26 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.2.2 released.
+
+ * aha152x.c: Modularize. Add support for PCMCIA.
+
+ * eata.c: Update to version 2.0. Fixed bug preventing media
+ detection. If scsi_register_host returns NULL, fail gracefully.
+
+ * scsi.c: Detect as NEC (for photo-cd purposes) for the 84
+ and 25 models as "NEC_OLDCDR".
+
+ * scsi.h: Add define for NEC_OLDCDR
+
+ * sr.c: Add handling for NEC_OLDCDR. Treat as unknown.
+
+ * u14-34f.c: Update to version 2.0. Fixed same bug as in
+ eata.c.
+
+
+Mon Mar 6 11:11:20 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.2.0 released. Yeah!!!
+
+ * Minor spelling/punctuation changes throughout. Nothing
+ substantive.
+
+Mon Feb 20 21:33:03 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.95 released.
+
+ * qlogic.c: Update to version 0.41.
+
+ * seagate.c: Change some message to be more descriptive about what
+ we detected.
+
+ * sr.c: spelling/whitespace changes.
+
+Mon Feb 20 21:33:03 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.94 released.
+
+Mon Feb 20 08:57:17 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.93 released.
+
+ * hosts.h: Change io_port to long int from short.
+
+ * 53c7,8xx.c: crash on AEN fixed, SCSI reset is no longer a NOP,
+ NULL pointer panic on odd UDCs fixed, two bugs in diagnostic output
+ fixed, should initialize correctly if left running, now loadable,
+ new memory allocation, extraneous diagnostic output suppressed,
+ splx() replaced with save/restore flags. [ Drew ]
+
+ * hosts.c, hosts.h, scsi_ioctl.c, sd.c, sd_ioctl.c, sg.c, sr.c,
+ sr_ioctl.c: Add special junk at end that Emacs will use for
+ formatting the file.
+
+ * qlogic.c: Update to v0.40a. Improve parity handling.
+
+ * scsi.c: Add Hitachi DK312C to blacklist. Change "};" to "}" in
+ many places. Use scsi_init_malloc to get command block - may
+ need this to be dma compatible for some host adapters.
+ Restore interrupts after unregistering a host.
+
+ * sd.c: Use sti instead of restore flags - causes latency problems.
+
+ * seagate.c: Use controller_type to determine string used when
+ registering irq.
+
+ * sr.c: More photo-cd hacks to make sure we get the xa stuff right.
+ * sr.h, sr.c: Change is_xa to xa_flags field.
+
+ * st.c: Disable retries for write operations.
+
+Wed Feb 15 10:52:56 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.92 released.
+
+ * eata.c: Update to 1.17.
+
+ * eata_dma.c: Update to 2.31a. Add more support for /proc/scsi.
+ Continuing modularization. Less crashes because of the bug in the
+ memory management ==> increase C_P_L_CURRENT_MAX to 10
+ and decrease C_P_L_DIV to 4.
+
+ * hosts.c: If we remove last host registered, reuse host number.
+ When freeing memory from host being deregistered, free extra_bytes
+ too.
+
+ * scsi.c (scan_scsis): memset(SDpnt, 0) and set SCmd.device to SDpnt.
+ Change memory allocation to work around bugs in __get_dma_pages.
+ Do not free host if usage count is not zero (for modules).
+
+ * sr_ioctl.c: Increase IOCTL_TIMEOUT to 3000.
+
+ * st.c: Allow for ST_EXTRA_DEVS in st data structures.
+
+ * u14-34f.c: Update to 1.17.
+
+Thu Feb 9 10:11:16 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.91 released.
+
+ * eata.c: Update to 1.16. Use wish_block instead of host->block.
+
+ * hosts.c: Initialize wish_block to 0.
+
+ * hosts.h: Add wish_block.
+
+ * scsi.c: Use wish_block as indicator that the host should be added
+ to block list.
+
+ * sg.c: Add SG_EXTRA_DEVS to number of slots.
+
+ * u14-34f.c: Use wish_block.
+
+Tue Feb 7 11:46:04 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.90 released.
+
+ * eata.c: Change naming from eata_* to eata2x_*. Now at vers 1.15.
+ Update interrupt handler to take pt_regs as arg. Allow blocking
+ even if loaded as module. Initialize target_time_out array.
+ Do not put sti(); in timing loop.
+
+ * hosts.c: Do not reuse host numbers.
+ Use scsi_make_blocked_list to generate blocking list.
+
+ * script_asm.pl: Beats me. Don't know perl. Something to do with
+ phase index.
+
+ * scsi.c (scsi_make_blocked_list): New function - code copied from
+ hosts.c.
+
+ * scsi.c: Update code to disable photo CD for Toshiba cdroms.
+ Use just manufacturer name, not model number.
+
+ * sr.c: Fix setting density for Toshiba drives.
+
+ * u14-34f.c: Clear target_time_out array during reset.
+
+Wed Feb 1 09:20:45 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.89 released.
+
+ * Makefile, u14-34f.c: Modularize.
+
+ * Makefile, eata.c: Modularize. Now version 1.14
+
+ * NCR5380.c: Update interrupt handler with new arglist. Minor
+ cleanups.
+
+ * eata_dma.c: Begin to modularize. Add hooks for /proc/scsi.
+ New version 2.3.0a. Add code in interrupt handler to allow
+ certain CDROM drivers to be detected which return a
+ CHECK_CONDITION during SCSI bus scan. Add opcode check to get
+ all DATA IN and DATA OUT phases right. Utilize HBA_interpret flag.
+ Improvements in HBA identification. Various other minor stuff.
+
+ * hosts.c: Initialize ->dma_channel and ->io_port when registering
+ a new host.
+
+ * qlogic.c: Modularize and add PCMCIA support.
+
+ * scsi.c: Add Hitachi to blacklist.
+
+ * scsi.c: Change default to no lun scan (too many problem devices).
+
+ * scsi.h: Define QUEUE_FULL condition.
+
+ * sd.c: Do not check for non-existent partition until after
+ new media check.
+
+ * sg.c: Undo previous change which was wrong.
+
+ * sr_ioctl.c: Increase IOCTL_TIMEOUT to 2000.
+
+ * st.c: Patches from Kai - improve filemark handling.
+
+Tue Jan 31 17:32:12 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.88 released.
+
+ * Throughout - spelling/grammar fixups.
+
+ * scsi.c: Make sure that all buffers are 16 byte aligned - some
+ drivers (buslogic) need this.
+
+ * scsi.c (scan_scsis): Remove message printed.
+
+ * scsi.c (scsi_init): Move message here.
+
+Mon Jan 30 06:40:25 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.87 released.
+
+ * sr.c: Photo-cd related changes. (Gerd Knorr??).
+
+ * st.c: Changes from Kai related to EOM detection.
+
+Mon Jan 23 23:53:10 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.86 released.
+
+ * 53c7,8xx.h: Change SG size to 127.
+
+ * eata_dma: Update to version 2.10i. Remove bug in the registration
+ of multiple HBAs and channels. Minor other improvements and stylistic
+ changes.
+
+ * scsi.c: Test for Toshiba XM-3401TA and exclude from detection
+ as toshiba drive - photo cd does not work with this drive.
+
+ * sr.c: Update photocd code.
+
+Mon Jan 23 23:53:10 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.85 released.
+
+ * st.c, st_ioctl.c, sg.c, sd_ioctl.c, scsi_ioctl.c, hosts.c:
+ include linux/mm.h
+
+ * qlogic.c, buslogic.c, aha1542.c: Include linux/module.h.
+
+Sun Jan 22 22:08:46 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.84 released.
+
+ * Makefile: Support for loadable QLOGIC boards.
+
+ * aha152x.c: Update to version 1.8 from Juergen.
+
+ * eata_dma.c: Update from Michael Neuffer.
+ Remove hard limit of 2 commands per lun and make it better
+ configurable. Improvements in HBA identification.
+
+ * in2000.c: Fix biosparam to support large disks.
+
+ * qlogic.c: Minor changes (change sti -> restore_flags).
+
+Wed Jan 18 23:33:09 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.83 released.
+
+ * aha1542.c(aha1542_intr_handle): Use arguments handed down to find
+ which irq.
+
+ * buslogic.c: Likewise.
+
+ * eata_dma.c: Use min of 2 cmd_per_lun for OCS_enabled boards.
+
+ * scsi.c: Make RECOVERED_ERROR a SUGGEST_IS_OK.
+
+ * sd.c: Fail if we are opening a non-existent partition.
+
+ * sr.c: Bump SR_TIMEOUT to 15000.
+ Do not probe for media size at boot time(hard on changers).
+ Flag device as needing sector size instead.
+
+ * sr_ioctl.c: Remove CDROMMULTISESSION_SYS ioctl.
+
+ * ultrastor.c: Fix bug in call to ultrastor_interrupt (wrong #args).
+
+Mon Jan 16 07:18:23 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.82 released.
+
+ Throughout.
+ - Change all interrupt handlers to accept new calling convention.
+ In particular, we now receive the irq number as one of the arguments.
+
+ * More minor spelling corrections in some of the new files.
+
+ * aha1542.c, buslogic.c: Clean up interrupt handler a little now
+ that we receive the irq as an arg.
+
+ * aha274x.c: s/snarf_region/request_region/
+
+ * eata.c: Update to version 1.12. Fix some comments and display a
+ message if we cannot reserve the port addresses.
+
+ * u14-34f.c: Update to version 1.13. Fix some comments and display a
+ message if we cannot reserve the port addresses.
+
+ * eata_dma.c: Define get_board_data function (send INQUIRY command).
+ Use to improve detection of variants of different DPT boards. Change
+ version subnumber to "0g".
+
+ * fdomain.c: Update to version 5.26. Improve detection of some boards
+ repackaged by IBM.
+
+ * scsi.c (scsi_register_host): Change "name" to const char *.
+
+ * sr.c: Fix problem in set mode command for Toshiba drives.
+
+ * sr.c: Fix typo from patch 81.
+
+Fri Jan 13 12:54:46 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.81 released. Codefreeze for 1.2 release announced.
+
+ Big changes here.
+
+ * eata_dma.*: New files from Michael Neuffer.
+ (neuffer@goofy.zdv.uni-mainz.de). Should support
+ all eata/dpt cards.
+
+ * hosts.c, Makefile: Add eata_dma.
+
+ * README.st: Document MTEOM.
+
+ Patches from me (ERY) to finish support for low-level loadable scsi.
+ It now works, and is actually useful.
+
+ * Throughout - add new argument to scsi_init_malloc that takes an
+ additional parameter. This is used as a priority to kmalloc,
+ and you can specify the GFP_DMA flag if you need DMA-able memory.
+
+ * Makefile: For source files that are loadable, always add name
+ to SCSI_SRCS. Fill in modules: target.
+
+ * hosts.c: Change next_host to next_scsi_host, and make global.
+ Print hosts after we have identified all of them. Use info()
+ function if present, otherwise use name field.
+
+ * hosts.h: Change attach function to return int, not void.
+ Define number of device slots to allow for loadable devices.
+ Define tags to tell scsi module code what type of module we
+ are loading.
+
+ * scsi.c: Fix scan_scsis so that it can be run by a user process.
+ Do not use waiting loops - use up and down mechanism as long
+ as current != task[0].
+
+ * scsi.c(scan_scsis): Do not use stack variables for I/O - this
+ could be > 16Mb if we are loading a module at runtime (i.e. use
+ scsi_init_malloc to get some memory we know will be safe).
+
+ * scsi.c: Change dma freelist to be a set of pages. This allows
+ us to dynamically adjust the size of the list by adding more pages
+ to the pagelist. Fix scsi_malloc and scsi_free accordingly.
+
+ * scsi_module.c: Fix include.
+
+ * sd.c: Declare detach function. Increment/decrement module usage
+ count as required. Fix init functions to allow loaded devices.
+ Revalidate all new disks so we get the partition tables. Define
+ detach function.
+
+ * sr.c: Likewise.
+
+ * sg.c: Declare detach function. Allow attachment of devices on
+ loaded drivers.
+
+ * st.c: Declare detach function. Increment/decrement module usage
+ count as required.
+
+Tue Jan 10 10:09:58 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.79 released.
+
+ Patch from some undetermined individual who needs to get a life :-).
+
+ * sr.c: Attacked by spelling bee...
+
+ Patches from Gerd Knorr:
+
+ * sr.c: make printk messages for photoCD a little more informative.
+
+ * sr_ioctl.c: Fix CDROMMULTISESSION_SYS ioctl.
+
+Mon Jan 9 10:01:37 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.78 released.
+
+ * Makefile: Add empty modules: target.
+
+ * Wheee. Now change register_iomem to request_region.
+
+ * in2000.c: Bugfix - apparently this is the fix that we have
+ all been waiting for. It fixes a problem whereby the driver
+ is not stable under heavy load. Race condition and all that.
+ Patch from Peter Lu.
+
+Wed Jan 4 21:17:40 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.77 released.
+
+ * 53c7,8xx.c: Fix from Linus - emulate splx.
+
+ Throughout:
+
+ Change "snarf_region" with "register_iomem".
+
+ * scsi_module.c: New file. Contains support for low-level loadable
+ scsi drivers. [ERY].
+
+ * sd.c: More s/int/long/ changes.
+
+ * seagate.c: Explicitly include linux/config.h
+
+ * sg.c: Increment/decrement module usage count on open/close.
+
+ * sg.c: Be a bit more careful about the user not supplying enough
+ information for a valid command. Pass correct size down to
+ scsi_do_cmd.
+
+ * sr.c: More changes for Photo-CD. This apparently breaks NEC drives.
+
+ * sr_ioctl.c: Support CDROMMULTISESSION ioctl.
+
+
+Sun Jan 1 19:55:21 1995 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.76 released.
+
+ * constants.c: Add type cast in switch statement.
+
+ * scsi.c (scsi_free): Change datatype of "offset" to long.
+ (scsi_malloc): Change a few more variables to long. Who
+ did this and why was it important? 64 bit machines?
+
+
+ Lots of changes to use save_state/restore_state instead of cli/sti.
+ Files changed include:
+
+ * aha1542.c:
+ * aha1740.c:
+ * buslogic.c:
+ * in2000.c:
+ * scsi.c:
+ * scsi_debug.c:
+ * sd.c:
+ * sr.c:
+ * st.c:
+
+Wed Dec 28 16:38:29 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.75 released.
+
+ * buslogic.c: Spelling fix.
+
+ * scsi.c: Add HP C1790A and C2500A scanjet to blacklist.
+
+ * scsi.c: Spelling fixup.
+
+ * sd.c: Add support for sd_hardsizes (hard sector sizes).
+
+ * ultrastor.c: Use save_flags/restore_flags instead of cli/sti.
+
+Fri Dec 23 13:36:25 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.74 released.
+
+ * README.st: Update from Kai Makisara.
+
+ * eata.c: New version from Dario - version 1.11.
+ use scsicam bios_param routine. Add support for 2011
+ and 2021 boards.
+
+ * hosts.c: Add support for blocking. Linked list automatically
+ generated when shpnt->block is set.
+
+ * scsi.c: Add sankyo & HP scanjet to blacklist. Add support for
+ kicking things loose when we deadlock.
+
+ * scsi.c: Recognize scanners and processors in scan_scsis.
+
+ * scsi_ioctl.h: Increase timeout to 9 seconds.
+
+ * st.c: New version from Kai - add better support for backspace.
+
+ * u14-34f.c: New version from Dario. Supports blocking.
+
+Wed Dec 14 14:46:30 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.73 released.
+
+ * buslogic.c: Update from Dave Gentzel. Version 1.14.
+ Add module related stuff. More fault tolerant if out of
+ DMA memory.
+
+ * fdomain.c: New version from Rik Faith - version 5.22. Add support
+ for ISA-200S SCSI adapter.
+
+ * hosts.c: Spelling.
+
+ * qlogic.c: Update to version 0.38a. Add more support for PCMCIA.
+
+ * scsi.c: Mask device type with 0x1f during scan_scsis.
+ Add support for deadlocking, err, make that getting out of
+ deadlock situations that are created when we allow the user
+ to limit requests to one host adapter at a time.
+
+ * scsi.c: Bugfix - pass pid, not SCpnt as second arg to
+ scsi_times_out.
+
+ * scsi.c: Restore interrupt state to previous value instead of using
+ cli/sti pairs.
+
+ * scsi.c: Add a bunch of module stuff (all commented out for now).
+
+ * scsi.c: Clean up scsi_dump_status.
+
+Tue Dec 6 12:34:20 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.72 released.
+
+ * sg.c: Bugfix - always use sg_free, since we might have big buff.
+
+Fri Dec 2 11:24:53 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.71 released.
+
+ * sg.c: Clear buff field when not in use. Only call scsi_free if
+ non-null.
+
+ * scsi.h: Call wake_up(&wait_for_request) when done with a
+ command.
+
+ * scsi.c (scsi_times_out): Pass pid down so that we can protect
+ against race conditions.
+
+ * scsi.c (scsi_abort): Zero timeout field if we get the
+ NOT_RUNNING message back from low-level driver.
+
+
+ * scsi.c (scsi_done): Restore cmd_len, use_sg here.
+
+ * scsi.c (request_sense): Not here.
+
+ * hosts.h: Add new forbidden_addr, forbidden_size fields. Who
+ added these and why????
+
+ * hosts.c (scsi_mem_init): Mark pages as reserved if they fall in
+ the forbidden regions. I am not sure - I think this is so that
+ we can deal with boards that do incomplete decoding of their
+ address lines for the bios chips, but I am not entirely sure.
+
+ * buslogic.c: Set forbidden_addr stuff if using a buggy board.
+
+ * aha1740.c: Test for NULL pointer in SCtmp. This should not
+ occur, but a nice message is better than a kernel segfault.
+
+ * 53c7,8xx.c: Add new PCI chip ID for 815.
+
+Fri Dec 2 11:24:53 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.70 released.
+
+ * ChangeLog, st.c: Spelling.
+
+Tue Nov 29 18:48:42 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.69 released.
+
+ * u14-34f.h: Non-functional change. [Dario].
+
+ * u14-34f.c: Use block field in Scsi_Host to prevent commands from
+ being queued to more than one host at the same time (used when
+ motherboard does not deal with multiple bus-masters very well).
+ Only when SINGLE_HOST_OPERATIONS is defined.
+ Use new cmd_per_lun field. [Dario]
+
+ * eata.c: Likewise.
+
+ * st.c: More changes from Kai. Add ready flag to indicate drive
+ status.
+
+ * README.st: Document this.
+
+ * sr.c: Bugfix (do not subtract CD_BLOCK_OFFSET) for photo-cd
+ code.
+
+ * sg.c: Bugfix - fix problem where opcode is not correctly set up.
+
+ * seagate.[c,h]: Use #defines to set driver name.
+
+ * scsi_ioctl.c: Zero buffer before executing command.
+
+ * scsi.c: Use new cmd_per_lun field in Scsi_Hosts as appropriate.
+ Add Sony CDU55S to blacklist.
+
+ * hosts.h: Add new cmd_per_lun field to Scsi_Hosts.
+
+ * hosts.c: Initialize cmd_per_lun in Scsi_Hosts from template.
+
+ * buslogic.c: Use cmd_per_lun field - initialize to different
+ values depending upon bus type (i.e. use 1 if ISA, so we do not
+ hog memory). Use other patches which got lost from 1.1.68.
+
+ * aha1542.c: Spelling.
+
+Tue Nov 29 15:43:50 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.68 released.
+
+ Add support for 12 byte vendor specific commands in scsi-generics,
+ more (i.e. the last mandatory) low-level changes to support
+ loadable modules, plus a few other changes people have requested
+ lately. Changes by me (ERY) unless otherwise noted. Spelling
+ changes appear from some unknown corner of the universe.
+
+ * Throughout: Change COMMAND_SIZE() to use SCpnt->cmd_len.
+
+ * Throughout: Change info() low level function to take a Scsi_Host
+ pointer. This way the info function can return specific
+ information about the host in question, if desired.
+
+ * All low-level drivers: Add NULL in initializer for the
+ usage_count field added to Scsi_Host_Template.
+
+ * aha152x.[c,h]: Remove redundant info() function.
+
+ * aha1542.[c,h]: Likewise.
+
+ * aha1740.[c,h]: Likewise.
+
+ * aha274x.[c,h]: Likewise.
+
+ * eata.[c,h]: Likewise.
+
+ * pas16.[c,h]: Likewise.
+
+ * scsi_debug.[c,h]: Likewise.
+
+ * t128.[c,h]: Likewise.
+
+ * u14-34f.[c,h]: Likewise.
+
+ * ultrastor.[c,h]: Likewise.
+
+ * wd7000.[c,h]: Likewise.
+
+ * aha1542.c: Add support for command line options with lilo to set
+ DMA parameters, I/O port. From Matt Aarnio.
+
+ * buslogic.[c,h]: New version (1.13) from Dave Gentzel.
+
+ * hosts.h: Add new field to Scsi_Hosts "block" to allow blocking
+ all I/O to certain other cards. Helps prevent problems with some
+ ISA motherboards.
+
+ * hosts.h: Add usage_count to Scsi_Host_Template.
+
+ * hosts.h: Add n_io_port to Scsi_Host (used when releasing module).
+
+ * hosts.c: Initialize block field.
+
+ * in2000.c: Remove "static" declarations from exported functions.
+
+ * in2000.h: Likewise.
+
+ * scsi.c: Correctly set cmd_len field as required. Save and
+ change setting when doing a request_sense, restore when done.
+ Move abort timeout message. Fix panic in request_queueable to
+ print correct function name.
+
+ * scsi.c: When incrementing usage count, walk block linked list
+ for host, and or in SCSI_HOST_BLOCK bit. When decrementing usage
+ count to 0, clear this bit to allow usage to continue, wake up
+ processes waiting.
+
+
+ * scsi_ioctl.c: If we have an info() function, call it, otherwise
+ if we have a "name" field, use it, else do nothing.
+
+ * sd.c, sr.c: Clear cmd_len field prior to each command we
+ generate.
+
+ * sd.h: Add "has_part_table" bit to rscsi_disks.
+
+ * sg.[c,h]: Add support for vendor specific 12 byte commands (i.e.
+ override command length in COMMAND_SIZE).
+
+ * sr.c: Bugfix from Gerd in photocd code.
+
+ * sr.c: Bugfix in get_sectorsize - always use scsi_malloc buffer -
+ we cannot guarantee that the stack is < 16Mb.
+
+Tue Nov 22 15:40:46 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.67 released.
+
+ * sr.c: Change spelling of manufactor to manufacturer.
+
+ * scsi.h: Likewise.
+
+ * scsi.c: Likewise.
+
+ * qlogic.c: Spelling corrections.
+
+ * in2000.h: Spelling corrections.
+
+ * in2000.c: Update from Bill Earnest, change from
+ jshiffle@netcom.com. Support new bios versions.
+
+ * README.qlogic: Spelling correction.
+
+Tue Nov 22 15:40:46 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.66 released.
+
+ * u14-34f.c: Spelling corrections.
+
+ * sr.[h,c]: Add support for multi-session CDs from Gerd Knorr.
+
+ * scsi.h: Add manufactor field for keeping track of device
+ manufacturer.
+
+ * scsi.c: More spelling corrections.
+
+ * qlogic.h, qlogic.c, README.qlogic: New driver from Tom Zerucha.
+
+ * in2000.c, in2000.h: New driver from Brad McLean/Bill Earnest.
+
+ * fdomain.c: Spelling correction.
+
+ * eata.c: Spelling correction.
+
+Fri Nov 18 15:22:44 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.65 released.
+
+ * eata.h: Update version string to 1.08.00.
+
+ * eata.c: Set sg_tablesize correctly for DPT PM2012 boards.
+
+ * aha274x.seq: Spell checking.
+
+ * README.st: Likewise.
+
+ * README.aha274x: Likewise.
+
+ * ChangeLog: Likewise.
+
+Tue Nov 15 15:35:08 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.64 released.
+
+ * u14-34f.h: Update version number to 1.10.01.
+
+ * u14-34f.c: Use Scsi_Host can_queue variable instead of one from template.
+
+ * eata.[c,h]: New driver for DPT boards from Dario Ballabio.
+
+ * buslogic.c: Use can_queue field.
+
+Wed Nov 30 12:09:09 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.63 released.
+
+ * sd.c: Give I/O error if we attempt 512 byte I/O to a disk with
+ 1024 byte sectors.
+
+ * scsicam.c: Make sure we do read from whole disk (mask off
+ partition).
+
+ * scsi.c: Use can_queue in Scsi_Host structure.
+ Fix panic message about invalid host.
+
+ * hosts.c: Initialize can_queue from template.
+
+ * hosts.h: Add can_queue to Scsi_Host structure.
+
+ * aha1740.c: Print out warning about NULL ecbptr.
+
+Fri Nov 4 12:40:30 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.62 released.
+
+ * fdomain.c: Update to version 5.20. (From Rik Faith). Support
+ BIOS version 3.5.
+
+ * st.h: Add ST_EOD symbol.
+
+ * st.c: Patches from Kai Makisara - support additional densities,
+ add support for MTFSS, MTBSS, MTWSM commands.
+
+ * README.st: Update to document new commands.
+
+ * scsi.c: Add Mediavision CDR-H93MV to blacklist.
+
+Sat Oct 29 20:57:36 1994 Eric Youngdale (eric@andante.aib.com)
+
+ * Linux 1.1.60 released.
+
+ * u14-34f.[c,h]: New driver from Dario Ballabio.
+
+ * aic7770.c, aha274x_seq.h, aha274x.seq, aha274x.h, aha274x.c,
+ README.aha274x: New files, new driver from John Aycock.
+
+
+Tue Oct 11 08:47:39 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.54 released.
+
+ * Add third PCI chip id. [Drew]
+
+ * buslogic.c: Set BUSLOGIC_CMDLUN back to 1 [Eric].
+
+ * ultrastor.c: Fix asm directives for new GCC.
+
+ * sr.c, sd.c: Use new end_scsi_request function.
+
+ * scsi.h(end_scsi_request): Return pointer to block if still
+ active, else return NULL if inactive. Fixes race condition.
+
+Sun Oct 9 20:23:14 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.53 released.
+
+ * scsi.c: Do not allocate dma bounce buffers if we have exactly
+ 16Mb.
+
+Fri Sep 9 05:35:30 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.51 released.
+
+ * aha152x.c: Add support for disabling the parity check. Update
+ to version 1.4. [Juergen].
+
+ * seagate.c: Tweak debugging message.
+
+Wed Aug 31 10:15:55 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.50 released.
+
+ * aha152x.c: Add eb800 for Vtech Platinum SMP boards. [Juergen].
+
+ * scsi.c: Add Quantum PD1225S to blacklist.
+
+Fri Aug 26 09:38:45 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.49 released.
+
+ * sd.c: Fix bug when we were deleting the wrong entry if we
+ get an unsupported sector size device.
+
+ * sr.c: Another spelling patch.
+
+Thu Aug 25 09:15:27 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.48 released.
+
+ * Throughout: Use new semantics for request_dma, as appropriate.
+
+ * sr.c: Print correct device number.
+
+Sun Aug 21 17:49:23 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.47 released.
+
+ * NCR5380.c: Add support for LIMIT_TRANSFERSIZE.
+
+ * constants.h: Add prototype for print_Scsi_Cmnd.
+
+ * pas16.c: Some more minor tweaks. Test for Mediavision board.
+ Allow for disks > 1Gb. [Drew??]
+
+ * sr.c: Set SCpnt->transfersize.
+
+Tue Aug 16 17:29:35 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.46 released.
+
+ * Throughout: More spelling fixups.
+
+ * buslogic.c: Add a few more fixups from Dave. Disk translation
+ mainly.
+
+ * pas16.c: Add a few patches (Drew?).
+
+
+Thu Aug 11 20:45:15 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.44 released.
+
+ * hosts.c: Add type casts for scsi_init_malloc.
+
+ * scsicam.c: Add type cast.
+
+Wed Aug 10 19:23:01 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.43 released.
+
+ * Throughout: Spelling cleanups. [??]
+
+ * aha152x.c, NCR53*.c, fdomain.c, g_NCR5380.c, pas16.c, seagate.c,
+ t128.c: Use request_irq, not irqaction. [??]
+
+ * aha1542.c: Move test for shost before we start to use shost.
+
+ * aha1542.c, aha1740.c, ultrastor.c, wd7000.c: Use new
+ calling sequence for request_irq.
+
+ * buslogic.c: Update from Dave Gentzel.
+
+Tue Aug 9 09:32:59 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.42 released.
+
+ * NCR5380.c: Change NCR5380_print_status to static.
+
+ * seagate.c: A few more bugfixes. Only Drew knows what they are
+ for.
+
+ * ultrastor.c: Tweak some __asm__ directives so that it works
+ with newer compilers. [??]
+
+Sat Aug 6 21:29:36 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.40 released.
+
+ * NCR5380.c: Return SCSI_RESET_WAKEUP from reset function.
+
+ * aha1542.c: Reset mailbox status after a bus device reset.
+
+ * constants.c: Fix typo (;;).
+
+ * g_NCR5380.c:
+ * pas16.c: Correct usage of NCR5380_init.
+
+ * scsi.c: Remove redundant (and unused variables).
+
+ * sd.c: Use memset to clear all of rscsi_disks before we use it.
+
+ * sg.c: Ditto, except for scsi_generics.
+
+ * sr.c: Ditto, except for scsi_CDs.
+
+ * st.c: Initialize STp->device.
+
+ * seagate.c: Fix bug. [Drew]
+
+Thu Aug 4 08:47:27 1994 Eric Youngdale (eric@andante)
+
+ * Linux 1.1.39 released.
+
+ * Makefile: Fix typo in NCR53C7xx.
+
+ * st.c: Print correct number for device.
+
+Tue Aug 2 11:29:14 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.38 released.
+
+ Lots of changes in 1.1.38. All from Drew unless otherwise noted.
+
+ * 53c7,8xx.c: New file from Drew. PCI driver.
+
+ * 53c7,8xx.h: Likewise.
+
+ * 53c7,8xx.scr: Likewise.
+
+ * 53c8xx_d.h, 53c8xx_u.h, script_asm.pl: Likewise.
+
+ * scsicam.c: New file from Drew. Read block 0 on the disk and
+ read the partition table. Attempt to deduce the geometry from
+ the partition table if possible. Only used by 53c[7,8]xx right
+ now, but could be used by any device for which we have no way
+ of identifying the geometry.
+
+ * sd.c: Use device letters instead of sd%d in a lot of messages.
+
+ * seagate.c: Fix bug that resulted in lockups with some devices.
+
+ * sr.c (sr_open): Return -EROFS, not -EACCES if we attempt to open
+ device for write.
+
+ * hosts.c, Makefile: Update for new driver.
+
+ * NCR5380.c, NCR5380.h, g_NCR5380.h: Update from Drew to support
+ 53C400 chip.
+
+ * constants.c: Define CONST_CMND and CONST_MSG. Other minor
+ cleanups along the way. Improve handling of CONST_MSG.
+
+ * fdomain.c, fdomain.h: New version from Rik Faith. Update to
+ 5.18. Should now support TMC-3260 PCI card with 18C30 chip.
+
+ * pas16.c: Update with new irq initialization.
+
+ * t128.c: Update with minor cleanups.
+
+ * scsi.c (scsi_pid): New variable - gives each command a unique
+ id. Add Quantum LPS5235S to blacklist. Change in_scan to
+ in_scan_scsis and make global.
+
+ * scsi.h: Add some defines for extended message handling,
+ INITIATE/RELEASE_RECOVERY. Add a few new fields to support sync
+ transfers.
+
+ * scsi_ioctl.h: Add ioctl to request synchronous transfers.
+
+
+Tue Jul 26 21:36:58 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.37 released.
+
+ * aha1542.c: Always call aha1542_mbenable, use new udelay
+ mechanism so we do not wait a long time if the board does not
+ implement this command.
+
+ * g_NCR5380.c: Remove #include <linux/config.h> and #if
+ defined(CONFIG_SCSI_*).
+
+ * seagate.c: Likewise.
+
+ Next round of changes to support loadable modules. Getting closer
+ now, still not possible to do anything remotely usable.
+
+ hosts.c: Create a linked list of detected high level devices.
+ (scsi_register_device): New function to insert into this list.
+ (scsi_init): Call scsi_register_device for each of the known high
+ level drivers.
+
+ hosts.h: Add prototype for linked list header. Add structure
+ definition for device template structure which defines the linked
+ list.
+
+ scsi.c: (scan_scsis): Use linked list instead of knowledge about
+ existing high level device drivers.
+ (scsi_dev_init): Use init functions for drivers on linked list
+ instead of explicit list to initialize and attach devices to high
+ level drivers.
+
+ scsi.h: Add new field "attached" to scsi_device - count of number
+ of high level devices attached.
+
+ sd.c, sr.c, sg.c, st.c: Adjust init/attach functions to use new
+ scheme.
+
+Sat Jul 23 13:03:17 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.35 released.
+
+ * ultrastor.c: Change constraint on asm() operand so that it works
+ with gcc 2.6.0.
+
+Thu Jul 21 10:37:39 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.33 released.
+
+ * sr.c(sr_open): Do not allow opens with write access.
+
+Mon Jul 18 09:51:22 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.31 released.
+
+ * sd.c: Increase SD_TIMEOUT from 300 to 600.
+
+ * sr.c: Remove stray task_struct* variable that was no longer
+ used.
+
+ * sr_ioctl.c: Fix typo in up() call.
+
+Sun Jul 17 16:25:29 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.30 released.
+
+ * scsi.c (scan_scsis): Fix detection of some Toshiba CDROM drives
+ that report themselves as disk drives.
+
+ * (Throughout): Use request.sem instead of request.waiting.
+ Should fix swap problem with fdomain.
+
+Thu Jul 14 10:51:42 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.29 released.
+
+ * scsi.c (scan_scsis): Add new devices to end of linked list, not
+ to the beginning.
+
+ * scsi.h (SCSI_SLEEP): Remove brain dead hack to try to save
+ the task state before sleeping.
+
+Sat Jul 9 15:01:03 1994 Eric Youngdale (eric@esp22)
+
+ More changes to eventually support loadable modules. Mainly
+ we want to use linked lists instead of arrays because it is easier
+ to dynamically add and remove things this way.
+
+ Quite a bit more work is needed before loadable modules are
+ possible (and usable) with scsi, but this is most of the grunge
+ work.
+
+ * Linux 1.1.28 released.
+
+ * scsi.c, scsi.h (allocate_device, request_queueable): Change
+ argument from index into scsi_devices to a pointer to the
+ Scsi_Device struct.
+
+ * Throughout: Change all calls to allocate_device,
+ request_queueable to use new calling sequence.
+
+ * Throughout: Use SCpnt->device instead of
+ scsi_devices[SCpnt->index]. Ugh - the pointer was there all along
+ - much cleaner this way.
+
+ * scsi.c (scsi_init_malloc, scsi_free_malloc): New functions -
+ allow us to pretend that we have a working malloc when we
+ initialize. Use this instead of passing memory_start, memory_end
+ around all over the place.
+
+ * scsi.h, st.c, sr.c, sd.c, sg.c: Change *_init1 functions to use
+ scsi_init_malloc, remove all arguments, no return value.
+
+ * scsi.h: Remove index field from Scsi_Device and Scsi_Cmnd
+ structs.
+
+ * scsi.c (scsi_dev_init): Set up for scsi_init_malloc.
+ (scan_scsis): Get SDpnt from scsi_init_malloc, and refresh
+ when we discover a device. Free pointer before returning.
+ Change scsi_devices into a linked list.
+
+ * scsi.c (scan_scsis): Change to only scan one host.
+ (scsi_dev_init): Loop over all detected hosts, and scan them.
+
+ * hosts.c (scsi_init_free): Change so that number of extra bytes
+ is stored in struct, and we do not have to pass it each time.
+
+ * hosts.h: Change Scsi_Host_Template struct to include "next" and
+ "release" functions. Initialize to NULL in all low level
+ adapters.
+
+ * hosts.c: Rename scsi_hosts to builtin_scsi_hosts, create linked
+ list scsi_hosts, linked together with the new "next" field.
+
+Wed Jul 6 05:45:02 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.25 released.
+
+ * aha152x.c: Changes from Juergen - cleanups and updates.
+
+ * sd.c, sr.c: Use new check_media_change and revalidate
+ file_operations fields.
+
+ * st.c, st.h: Add changes from Kai Makisara, dated Jun 22.
+
+ * hosts.h: Change SG_ALL back to 0xff. Apparently soft error
+ in /dev/brain resulted in having this bumped up.
+ Change first parameter in bios_param function to be Disk * instead
+ of index into rscsi_disks.
+
+ * sd_ioctl.c: Pass pointer to rscsi_disks element instead of index
+ to array.
+
+ * sd.h: Add struct name "scsi_disk" to typedef for Scsi_Disk.
+
+ * scsi.c: Remove redundant Maxtor XT8760S from blacklist.
+ In scsi_reset, add printk when DEBUG defined.
+
+ * All low level drivers: Modify definitions of bios_param in
+ appropriate way.
+
+Thu Jun 16 10:31:59 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.20 released.
+
+ * scsi_ioctl.c: Only pass down the actual number of characters
+ required to scsi_do_cmd, not the one rounded up to a even number
+ of sectors.
+
+ * ultrastor.c: Changes from Caleb Epstein for 24f cards. Support
+ larger SG lists.
+
+ * ultrastor.c: Changes from me - use scsi_register to register
+ host. Add some consistency checking,
+
+Wed Jun 1 21:12:13 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.19 released.
+
+ * scsi.h: Add new return code for reset() function:
+ SCSI_RESET_PUNT.
+
+ * scsi.c: Make SCSI_RESET_PUNT the same as SCSI_RESET_WAKEUP for
+ now.
+
+ * aha1542.c: If the command responsible for the reset is not
+ pending, return SCSI_RESET_PUNT.
+
+ * aha1740.c, buslogic.c, wd7000.c, ultrastor.c: Return
+ SCSI_RESET_PUNT instead of SCSI_RESET_SNOOZE.
+
+Tue May 31 19:36:01 1994 Eric Youngdale (eric@esp22)
+
+ * buslogic.c: Do not print out message about "must be Adaptec"
+ if we have detected a buslogic card. Print out a warning message
+ if we are configuring for >16Mb, since the 445S at board level
+ D or earlier does not work right. The "D" level board can be made
+ to work by flipping an undocumented switch, but this is too subtle.
+
+ Changes based upon patches in Yggdrasil distribution.
+
+ * sg.c, sg.h: Return sense data to user.
+
+ * aha1542.c, aha1740.c, buslogic.c: Do not panic if
+ sense buffer is wrong size.
+
+ * hosts.c: Test for ultrastor card before any of the others.
+
+ * scsi.c: Allow boot-time option for max_scsi_luns=? so that
+ buggy firmware has an easy work-around.
+
+Sun May 15 20:24:34 1994 Eric Youngdale (eric@esp22)
+
+ * Linux 1.1.15 released.
+
+ Post-codefreeze thaw...
+
+ * buslogic.[c,h]: New driver from David Gentzel.
+
+ * hosts.h: Add use_clustering field to explicitly say whether
+ clustering should be used for devices attached to this host
+ adapter. The buslogic board apparently supports large SG lists,
+ but it is apparently faster if sd.c condenses this into a smaller
+ list.
+
+ * sd.c: Use this field instead of heuristic.
+
+ * All host adapter include files: Add appropriate initializer for
+ use_clustering field.
+
+ * scsi.h: Add #defines for return codes for the abort and reset
+ functions. There are now a specific set of return codes to fully
+ specify all of the possible things that the low-level adapter
+ could do.
+
+ * scsi.c: Act based upon return codes from abort/reset functions.
+
+ * All host adapter abort/reset functions: Return new return code.
+
+ * Add code in scsi.c to help debug timeouts. Use #define
+ DEBUG_TIMEOUT to enable this.
+
+ * scsi.c: If the host->irq field is set, use
+ disable_irq/enable_irq before calling queuecommand if we
+ are not already in an interrupt. Reduce races, and we
+ can be sloppier about cli/sti in the interrupt routines now
+ (reduce interrupt latency).
+
+ * constants.c: Fix some things to eliminate warnings. Add some
+ sense descriptions that were omitted before.
+
+ * aha1542.c: Watch for SCRD from host adapter - if we see it, set
+ a flag. Currently we only print out the number of pending
+ commands that might need to be restarted.
+
+ * aha1542.c (aha1542_abort): Look for lost interrupts, OGMB still
+ full, and attempt to recover. Otherwise give up.
+
+ * aha1542.c (aha1542_reset): Try BUS DEVICE RESET, and then pass
+ DID_RESET back up to the upper level code for all commands running
+ on this target (even on different LUNs).
+
+Sat May 7 14:54:01 1994
+
+ * Linux 1.1.12 released.
+
+ * st.c, st.h: New version from Kai. Supports boot time
+ specification of number of buffers.
+
+ * wd7000.[c,h]: Updated driver from John Boyd. Now supports
+ more than one wd7000 board in machine at one time, among other things.
+
+Wed Apr 20 22:20:35 1994
+
+ * Linux 1.1.8 released.
+
+ * sd.c: Add a few type casts where scsi_malloc is called.
+
+Wed Apr 13 12:53:29 1994
+
+ * Linux 1.1.4 released.
+
+ * scsi.c: Clean up a few printks (use %p to print pointers).
+
+Wed Apr 13 11:33:02 1994
+
+ * Linux 1.1.3 released.
+
+ * fdomain.c: Update to version 5.16 (Handle different FIFO sizes
+ better).
+
+Fri Apr 8 08:57:19 1994
+
+ * Linux 1.1.2 released.
+
+ * Throughout: SCSI portion of cluster diffs added.
+
+Tue Apr 5 07:41:50 1994
+
+ * Linux 1.1 development tree initiated.
+
+ * The linux 1.0 development tree is now effectively frozen except
+ for obvious bugfixes.
+
+******************************************************************
+******************************************************************
+******************************************************************
+******************************************************************
+
+Sun Apr 17 00:17:39 1994
+
+ * Linux 1.0, patchlevel 9 released.
+
+ * fdomain.c: Update to version 5.16 (Handle different FIFO sizes
+ better).
+
+Thu Apr 7 08:36:20 1994
+
+ * Linux 1.0, patchlevel8 released.
+
+ * fdomain.c: Update to version 5.15 from 5.9. Handles 3.4 bios.
+
+Sun Apr 3 14:43:03 1994
+
+ * Linux 1.0, patchlevel6 released.
+
+ * wd7000.c: Make stab at fixing race condition.
+
+Sat Mar 26 14:14:50 1994
+
+ * Linux 1.0, patchlevel5 released.
+
+ * aha152x.c, Makefile: Fix a few bugs (too much data message).
+ Add a few more bios signatures. (Patches from Juergen).
+
+ * aha1542.c: Fix race condition in aha1542_out.
+
+Mon Mar 21 16:36:20 1994
+
+ * Linux 1.0, patchlevel3 released.
+
+ * sd.c, st.c, sr.c, sg.c: Return -ENXIO, not -ENODEV if we attempt
+ to open a non-existent device.
+
+ * scsi.c: Add Chinon cdrom to blacklist.
+
+ * sr_ioctl.c: Check return status of verify_area.
+
+Sat Mar 6 16:06:19 1994
+
+ * Linux 1.0 released (technically a pre-release).
+
+ * scsi.c: Add IMS CDD521, Maxtor XT-8760S to blacklist.
+
+Tue Feb 15 10:58:20 1994
+
+ * pl15e released.
+
+ * aha1542.c: For 1542C, allow dynamic device scan with >1Gb turned
+ off.
+
+ * constants.c: Fix typo in definition of CONSTANTS.
+
+ * pl15d released.
+
+Fri Feb 11 10:10:16 1994
+
+ * pl15c released.
+
+ * scsi.c: Add Maxtor XT-3280 and Rodime RO3000S to blacklist.
+
+ * scsi.c: Allow tagged queueing for scsi 3 devices as well.
+ Some really old devices report a version number of 0. Disallow
+ LUN != 0 for these.
+
+Thu Feb 10 09:48:57 1994
+
+ * pl15b released.
+
+Sun Feb 6 12:19:46 1994
+
+ * pl15a released.
+
+Fri Feb 4 09:02:17 1994
+
+ * scsi.c: Add Teac cdrom to blacklist.
+
+Thu Feb 3 14:16:43 1994
+
+ * pl15 released.
+
+Tue Feb 1 15:47:43 1994
+
+ * pl14w released.
+
+ * wd7000.c (wd_bases): Fix typo in last change.
+
+Mon Jan 24 17:37:23 1994
+
+ * pl14u released.
+
+ * aha1542.c: Support 1542CF/extended bios. Different from 1542C
+
+ * wd7000.c: Allow bios at 0xd8000 as well.
+
+ * ultrastor.c: Do not truncate cylinders to 1024.
+
+ * fdomain.c: Update to version 5.9 (add new bios signature).
+
+ * NCR5380.c: Update from Drew - should work a lot better now.
+
+Sat Jan 8 15:13:10 1994
+
+ * pl14o released.
+
+ * sr_ioctl.c: Zero reserved field before trying to set audio volume.
+
+Wed Jan 5 13:21:10 1994
+
+ * pl14m released.
+
+ * fdomain.c: Update to version 5.8. No functional difference???
+
+Tue Jan 4 14:26:13 1994
+
+ * pl14l released.
+
+ * ultrastor.c: Remove outl, inl functions (now provided elsewhere).
+
+Mon Jan 3 12:27:25 1994
+
+ * pl14k released.
+
+ * aha152x.c: Remove insw and outsw functions.
+
+ * fdomain.c: Ditto.
+
+Wed Dec 29 09:47:20 1993
+
+ * pl14i released.
+
+ * scsi.c: Support RECOVERED_ERROR for tape drives.
+
+ * st.c: Update of tape driver from Kai.
+
+Tue Dec 21 09:18:30 1993
+
+ * pl14g released.
+
+ * aha1542.[c,h]: Support extended BIOS stuff.
+
+ * scsi.c: Clean up messages about disks, so they are displayed as
+ sda, sdb, etc instead of sd0, sd1, etc.
+
+ * sr.c: Force reread of capacity if disk was changed.
+ Clear buffer before asking for capacity/sectorsize (some drives
+ do not report this properly). Set needs_sector_size flag if
+ drive did not return sensible sector size.
+
+Mon Dec 13 12:13:47 1993
+
+ * aha152x.c: Update to version .101 from Juergen.
+
+Mon Nov 29 03:03:00 1993
+
+ * linux 0.99.14 released.
+
+ * All scsi stuff moved from kernel/blk_drv/scsi to drivers/scsi.
+
+ * Throughout: Grammatical corrections to various comments.
+
+ * Makefile: fix so that we do not need to compile things we are
+ not going to use.
+
+ * NCR5380.c, NCR5380.h, g_NCR5380.c, g_NCR5380.h, pas16.c,
+ pas16.h, t128.c, t128.h: New files from Drew.
+
+ * aha152x.c, aha152x.h: New files from Juergen Fischer.
+
+ * aha1542.c: Support for more than one 1542 in the machine
+ at the same time. Make functions static that do not need
+ visibility.
+
+ * aha1740.c: Set NEEDS_JUMPSTART flag in reset function, so we
+ know to restart the command. Change prototype of aha1740_reset
+ to take a command pointer.
+
+ * constants.c: Clean up a few things.
+
+ * fdomain.c: Update to version 5.6. Move snarf_region. Allow
+ board to be set at different SCSI ids. Remove support for
+ reselection (did not work well). Set JUMPSTART flag in reset
+ code.
+
+ * hosts.c: Support new low-level adapters. Allow for more than
+ one adapter of a given type.
+
+ * hosts.h: Allow for more than one adapter of a given type.
+
+ * scsi.c: Add scsi_device_types array, if NEEDS_JUMPSTART is set
+ after a low-level reset, start the command again. Sort blacklist,
+ and add Maxtor MXT-1240S, XT-4170S, NEC CDROM 84, Seagate ST157N.
+
+ * scsi.h: Add constants for tagged queueing.
+
+ * Throughout: Use constants from major.h instead of hardcoded
+ numbers for major numbers.
+
+ * scsi_ioctl.c: Fix bug in buffer length in ioctl_command. Use
+ verify_area in GET_IDLUN ioctl. Add new ioctls for
+ TAGGED_QUEUE_ENABLE, DISABLE. Only allow IOCTL_SEND_COMMAND by
+ superuser.
+
+ * sd.c: Only pay attention to UNIT_ATTENTION for removable disks.
+ Fix bug where sometimes portions of blocks would get lost
+ resulting in processes hanging. Add messages when we spin up a
+ disk, and fix a bug in the timing. Increase read-ahead for disks
+ that are on a scatter-gather capable host adapter.
+
+ * seagate.c: Fix so that some parameters can be set from the lilo
+ prompt. Supply jumpstart flag if we are resetting and need the
+ command restarted. Fix so that we return 1 if we detect a card
+ so that multiple card detection works correctly. Add yet another
+ signature for FD cards (950). Add another signature for ST0x.
+
+ * sg.c, sg.h: New files from Lawrence Foard for generic scsi
+ access.
+
+ * sr.c: Add type casts for (void*) so that we can do pointer
+ arithmetic. Works with GCC without this, but it is not strictly
+ correct. Same bugfix as was in sd.c. Increase read-ahead a la
+ disk driver.
+
+ * sr_ioctl.c: Use scsi_malloc buffer instead of buffer from stack
+ since we cannot guarantee that the stack is < 16Mb.
+
+ ultrastor.c: Update to support 24f properly (JFC's driver).
+
+ wd7000.c: Supply jumpstart flag for reset. Do not round up
+ number of cylinders in biosparam function.
+
+Sat Sep 4 20:49:56 1993
+
+ * 0.99pl13 released.
+
+ * Throughout: Use check_region/snarf_region for all low-level
+ drivers.
+
+ * aha1542.c: Do hard reset instead of soft (some ethercard probes
+ screw us up).
+
+ * scsi.c: Add new flag ASKED_FOR_SENSE so that we can tell if we are
+ in a loop whereby the device returns null sense data.
+
+ * sd.c: Add code to spin up a drive if it is not already spinning.
+ Do this one at a time to make it easier on power supplies.
+
+ * sd_ioctl.c: Use sync_dev instead of fsync_dev in BLKFLSBUF ioctl.
+
+ * seagate.c: Switch around DATA/CONTROL lines.
+
+ * st.c: Change sense to unsigned.
+
+Thu Aug 5 11:59:18 1993
+
+ * 0.99pl12 released.
+
+ * constants.c, constants.h: New files with ascii descriptions of
+ various conditions.
+
+ * Makefile: Do not try to count the number of low-level drivers,
+ just generate the list of .o files.
+
+ * aha1542.c: Replace 16 with sizeof(SCpnt->sense_buffer). Add tests
+ for addresses > 16Mb, panic if we find one.
+
+ * aha1740.c: Ditto with sizeof().
+
+ * fdomain.c: Update to version 3.18. Add new signature, register IRQ
+ with irqaction. Use ID 7 for new board. Be more intelligent about
+ obtaining the h/s/c numbers for biosparam.
+
+ * hosts.c: Do not depend upon Makefile generated count of the number
+ of low-level host adapters.
+
+ * scsi.c: Use array for scsi_command_size instead of a function. Add
+ Texel cdrom and Maxtor XT-4380S to blacklist. Allow compile time
+ option for no-multi lun scan. Add semaphore for possible problems
+ with handshaking, assume device is faulty until we know it not to be
+ the case. Add DEBUG_INIT symbol to dump info as we scan for devices.
+ Zero sense buffer so we can tell if we need to request it. When
+ examining sense information, request sense if buffer is all zero.
+ If RESET, request sense information to see what to do next.
+
+ * scsi_debug.c: Change some constants to use symbols like INT_MAX.
+
+ * scsi_ioctl.c (kernel_scsi_ioctl): New function -for making ioctl
+ calls from kernel space.
+
+ * sd.c: Increase timeout to 300. Use functions in constants.h to
+ display info. Use scsi_malloc buffer for READ_CAPACITY, since
+ we cannot guarantee that a stack based buffer is < 16Mb.
+
+ * sd_ioctl.c: Add BLKFLSBUF ioctl.
+
+ * seagate.c: Add new compile time options for ARBITRATE,
+ SLOW_HANDSHAKE, and SLOW_RATE. Update assembly loops for transferring
+ data. Use kernel_scsi_ioctl to request mode page with geometry.
+
+ * sr.c: Use functions in constants.c to display messages.
+
+ * st.c: Support for variable block size.
+
+ * ultrastor.c: Do not use cache for tape drives. Set
+ unchecked_isa_dma flag, even though this may not be needed (gets set
+ later).
+
+Sat Jul 17 18:32:44 1993
+
+ * 0.99pl11 released. C++ compilable.
+
+ * Throughout: Add type casts all over the place, and use "ip" instead
+ of "info" in the various biosparam functions.
+
+ * Makefile: Compile seagate.c with C++ compiler.
+
+ * aha1542.c: Always set ccb pointer as this gets trashed somehow on
+ some systems. Add a few type casts. Update biosparam function a little.
+
+ * aha1740.c: Add a few type casts.
+
+ * fdomain.c: Update to version 3.17 from 3.6. Now works with
+ TMC-18C50.
+
+ * scsi.c: Minor changes here and there with datatypes. Save use_sg
+ when requesting sense information so that this can properly be
+ restored if we retry the command. Set aside dma buffers assuming each
+ block is 1 page, not 1Kb minix block.
+
+ * scsi_ioctl.c: Add a few type casts. Other minor changes.
+
+ * sd.c: Correctly free all scsi_malloc'd memory if we run out of
+ dma_pool. Store blocksize information for each partition.
+
+ * seagate.c: Minor cleanups here and there.
+
+ * sr.c: Set up blocksize array for all discs. Fix bug in freeing
+ buffers if we run out of dma pool.
+
+Thu Jun 2 17:58:11 1993
+
+ * 0.99pl10 released.
+
+ * aha1542.c: Support for BT 445S (VL-bus board with no dma channel).
+
+ * fdomain.c: Upgrade to version 3.6. Preliminary support for TNC-18C50.
+
+ * scsi.c: First attempt to fix problem with old_use_sg. Change
+ NOT_READY to a SUGGEST_ABORT. Fix timeout race where time might
+ get decremented past zero.
+
+ * sd.c: Add block_fsync function to dispatch table.
+
+ * sr.c: Increase timeout to 500 from 250. Add entry for sync in
+ dispatch table (supply NULL). If we do not have a sectorsize,
+ try to get it in the sd_open function. Add new function just to
+ obtain sectorsize.
+
+ * sr.h: Add needs_sector_size semaphore.
+
+ * st.c: Add NULL for fsync in dispatch table.
+
+ * wd7000.c: Allow another condition for power on that are normal
+ and do not require a panic.
+
+Thu Apr 22 23:10:11 1993
+
+ * 0.99pl9 released.
+
+ * aha1542.c: Use (void) instead of () in setup_mailboxes.
+
+ * scsi.c: Initialize transfersize and underflow fields in SCmd to 0.
+ Do not panic for unsupported message bytes.
+
+ * scsi.h: Allocate 12 bytes instead of 10 for commands. Add
+ transfersize and underflow fields.
+
+ * scsi_ioctl.c: Further bugfix to ioctl_probe.
+
+ * sd.c: Use long instead of int for last parameter in sd_ioctl.
+ Initialize transfersize and underflow fields.
+
+ * sd_ioctl.c: Ditto for sd_ioctl(,,,,);
+
+ * seagate.c: New version from Drew. Includes new signatures for FD
+ cards. Support for 0ws jumper. Correctly initialize
+ scsi_hosts[hostnum].this_id. Improved handing of
+ disconnect/reconnect, and support command linking. Use
+ transfersize and underflow fields. Support scatter-gather.
+
+ * sr.c, sr_ioctl.c: Use long instead of int for last parameter in sr_ioctl.
+ Use buffer and buflength in do_ioctl. Patches from Chris Newbold for
+ scsi-2 audio commands.
+
+ * ultrastor.c: Comment out in_byte (compiler warning).
+
+ * wd7000.c: Change () to (void) in wd7000_enable_dma.
+
+Wed Mar 31 16:36:25 1993
+
+ * 0.99pl8 released.
+
+ * aha1542.c: Handle mailboxes better for 1542C.
+ Do not truncate number of cylinders at 1024 for biosparam call.
+
+ * aha1740.c: Fix a few minor bugs for multiple devices.
+ Same as above for biosparam.
+
+ * scsi.c: Add lockable semaphore for removable devices that can have
+ media removal prevented. Add another signature for flopticals.
+ (allocate_device): Fix race condition. Allow more space in dma pool
+ for blocksizes of up to 4Kb.
+
+ * scsi.h: Define COMMAND_SIZE. Define a SCSI specific version of
+ INIT_REQUEST that can run with interrupts off.
+
+ * scsi_ioctl.c: Make ioctl_probe function more idiot-proof. If
+ a removable device says ILLEGAL REQUEST to a door-locking command,
+ clear lockable flag. Add SCSI_IOCTL_GET_IDLUN ioctl. Do not attempt
+ to lock door for devices that do not have lockable semaphore set.
+
+ * sd.c: Fix race condition for multiple disks. Use INIT_SCSI_REQUEST
+ instead of INIT_REQUEST. Allow sector sizes of 1024 and 256. For
+ removable disks that are not ready, mark them as having a media change
+ (some drives do not report this later).
+
+ * seagate.c: Use volatile keyword for memory-mapped register pointers.
+
+ * sr.c: Fix race condition, a la sd.c. Increase the number of retries
+ to 1. Use INIT_SCSI_REQUEST. Allow 512 byte sector sizes. Do a
+ read_capacity when we init the device so we know the size and
+ sectorsize.
+
+ * st.c: If ioctl not found in st.c, try scsi_ioctl for others.
+
+ * ultrastor.c: Do not truncate number of cylinders at 1024 for
+ biosparam call.
+
+ * wd7000.c: Ditto.
+ Throughout: Use COMMAND_SIZE macro to determine length of scsi
+ command.
+
+
+
+Sat Mar 13 17:31:29 1993
+
+ * 0.99pl7 released.
+
+ Throughout: Improve punctuation in some messages, and use new
+ verify_area syntax.
+
+ * aha1542.c: Handle unexpected interrupts better.
+
+ * scsi.c: Ditto. Handle reset conditions a bit better, asking for
+ sense information and retrying if required.
+
+ * scsi_ioctl.c: Allow for 12 byte scsi commands.
+
+ * ultrastor.c: Update to use scatter-gather.
+
+Sat Feb 20 17:57:15 1993
+
+ * 0.99pl6 released.
+
+ * fdomain.c: Update to version 3.5. Handle spurious interrupts
+ better.
+
+ * sd.c: Use register_blkdev function.
+
+ * sr.c: Ditto.
+
+ * st.c: Use register_chrdev function.
+
+ * wd7000.c: Undo previous change.
+
+Sat Feb 6 11:20:43 1993
+
+ * 0.99pl5 released.
+
+ * scsi.c: Fix bug in testing for UNIT_ATTENTION.
+
+ * wd7000.c: Check at more addresses for bios. Fix bug in biosparam
+ (heads & sectors turned around).
+
+Wed Jan 20 18:13:59 1993
+
+ * 0.99pl4 released.
+
+ * scsi.c: Ignore leading spaces when looking for blacklisted devices.
+
+ * seagate.c: Add a few new signatures for FD cards. Another patch
+ with SCint to fix race condition. Use recursion_depth to keep track
+ of how many times we have been recursively called, and do not start
+ another command unless we are on the outer level. Fixes bug
+ with Syquest cartridge drives (used to crash kernel), because
+ they do not disconnect with large data transfers.
+
+Tue Jan 12 14:33:36 1993
+
+ * 0.99pl3 released.
+
+ * fdomain.c: Update to version 3.3 (a few new signatures).
+
+ * scsi.c: Add CDU-541, Denon DRD-25X to blacklist.
+ (allocate_request, request_queueable): Init request.waiting to NULL if
+ non-buffer type of request.
+
+ * seagate.c: Allow controller to be overridden with CONTROLLER symbol.
+ Set SCint=NULL when we are done, to remove race condition.
+
+ * st.c: Changes from Kai.
+
+Wed Dec 30 20:03:47 1992
+
+ * 0.99pl2 released.
+
+ * scsi.c: Blacklist back in. Remove Newbury drive as other bugfix
+ eliminates need for it here.
+
+ * sd.c: Return ENODEV instead of EACCES if no such device available.
+ (sd_init) Init blkdev_fops earlier so that sd_open is available sooner.
+
+ * sr.c: Same as above for sd.c.
+
+ * st.c: Return ENODEV instead of ENXIO if no device. Init chrdev_fops
+ sooner, so that it is always there even if no tapes.
+
+ * seagate.c (controller_type): New variable to keep track of ST0x or
+ FD. Modify signatures list to indicate controller type, and init
+ controller_type once we find a match.
+
+ * wd7000.c (wd7000_set_sync): Remove redundant function.
+
+Sun Dec 20 16:26:24 1992
+
+ * 0.99pl1 released.
+
+ * scsi_ioctl.c: Bugfix - check dev->index, not dev->id against
+ NR_SCSI_DEVICES.
+
+ * sr_ioctl.c: Verify that device exists before allowing an ioctl.
+
+ * st.c: Patches from Kai - change timeout values, improve end of tape
+ handling.
+
+Sun Dec 13 18:15:23 1992
+
+ * 0.99 kernel released. Baseline for this ChangeLog.
diff --git a/Documentation/scsi/ChangeLog.arcmsr b/Documentation/scsi/ChangeLog.arcmsr
new file mode 100644
index 000000000..038a3e6ec
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.arcmsr
@@ -0,0 +1,118 @@
+**************************************************************************
+** History
+**
+** REV# DATE NAME DESCRIPTION
+** 1.00.00.00 3/31/2004 Erich Chen First release
+** 1.10.00.04 7/28/2004 Erich Chen modify for ioctl
+** 1.10.00.06 8/28/2004 Erich Chen modify for 2.6.x
+** 1.10.00.08 9/28/2004 Erich Chen modify for x86_64
+** 1.10.00.10 10/10/2004 Erich Chen bug fix for SMP & ioctl
+** 1.20.00.00 11/29/2004 Erich Chen bug fix with arcmsr_bus_reset when PHY error
+** 1.20.00.02 12/09/2004 Erich Chen bug fix with over 2T bytes RAID Volume
+** 1.20.00.04 1/09/2005 Erich Chen fits for Debian linux kernel version 2.2.xx
+** 1.20.00.05 2/20/2005 Erich Chen cleanly as look like a Linux driver at 2.6.x
+** thanks for peoples kindness comment
+** Kornel Wieliczek
+** Christoph Hellwig
+** Adrian Bunk
+** Andrew Morton
+** Christoph Hellwig
+** James Bottomley
+** Arjan van de Ven
+** 1.20.00.06 3/12/2005 Erich Chen fix with arcmsr_pci_unmap_dma "unsigned long" cast,
+** modify PCCB POOL allocated by "dma_alloc_coherent"
+** (Kornel Wieliczek's comment)
+** 1.20.00.07 3/23/2005 Erich Chen bug fix with arcmsr_scsi_host_template_init
+** occur segmentation fault,
+** if RAID adapter does not on PCI slot
+** and modprobe/rmmod this driver twice.
+** bug fix enormous stack usage (Adrian Bunk's comment)
+** 1.20.00.08 6/23/2005 Erich Chen bug fix with abort command,
+** in case of heavy loading when sata cable
+** working on low quality connection
+** 1.20.00.09 9/12/2005 Erich Chen bug fix with abort command handling, firmware version check
+** and firmware update notify for hardware bug fix
+** 1.20.00.10 9/23/2005 Erich Chen enhance sysfs function for change driver's max tag Q number.
+** add DMA_64BIT_MASK for backward compatible with all 2.6.x
+** add some useful message for abort command
+** add ioctl code 'ARCMSR_IOCTL_FLUSH_ADAPTER_CACHE'
+** customer can send this command for sync raid volume data
+** 1.20.00.11 9/29/2005 Erich Chen by comment of Arjan van de Ven fix incorrect msleep redefine
+** cast off sizeof(dma_addr_t) condition for 64bit pci_set_dma_mask
+** 1.20.00.12 9/30/2005 Erich Chen bug fix with 64bit platform's ccbs using if over 4G system memory
+** change 64bit pci_set_consistent_dma_mask into 32bit
+** increcct adapter count if adapter initialize fail.
+** miss edit at arcmsr_build_ccb....
+** psge += sizeof(struct _SG64ENTRY *) =>
+** psge += sizeof(struct _SG64ENTRY)
+** 64 bits sg entry would be incorrectly calculated
+** thanks Kornel Wieliczek give me kindly notify
+** and detail description
+** 1.20.00.13 11/15/2005 Erich Chen scheduling pending ccb with FIFO
+** change the architecture of arcmsr command queue list
+** for linux standard list
+** enable usage of pci message signal interrupt
+** follow Randy.Danlup kindness suggestion cleanup this code
+** 1.20.00.14 05/02/2007 Erich Chen & Nick Cheng
+** 1.implement PCI-Express error recovery function and AER capability
+** 2.implement the selection of ARCMSR_MAX_XFER_SECTORS_B=4096
+** if firmware version is newer than 1.42
+** 3.modify arcmsr_iop_reset to improve the ability
+** 4.modify the ISR, arcmsr_interrupt routine,to prevent the
+** inconsistency with sg_mod driver if application directly calls
+** the arcmsr driver w/o passing through scsi mid layer
+** specially thanks to Yanmin Zhang's openhanded help about AER
+** 1.20.00.15 08/30/2007 Erich Chen & Nick Cheng
+** 1. support ARC1200/1201/1202 SATA RAID adapter, which is named
+** ACB_ADAPTER_TYPE_B
+** 2. modify the arcmsr_pci_slot_reset function
+** 3. modify the arcmsr_pci_ers_disconnect_forepart function
+** 4. modify the arcmsr_pci_ers_need_reset_forepart function
+** 1.20.00.15 09/27/2007 Erich Chen & Nick Cheng
+** 1. add arcmsr_enable_eoi_mode() on adapter Type B
+** 2. add readl(reg->iop2drv_doorbell_reg) in arcmsr_handle_hbb_isr()
+** in case of the doorbell interrupt clearance is cached
+** 1.20.00.15 10/01/2007 Erich Chen & Nick Cheng
+** 1. modify acb->devstate[i][j]
+** as ARECA_RAID_GOOD instead of
+** ARECA_RAID_GONE in arcmsr_alloc_ccb_pool
+** 1.20.00.15 11/06/2007 Erich Chen & Nick Cheng
+** 1. add conditional declaration for
+** arcmsr_pci_error_detected() and
+** arcmsr_pci_slot_reset
+** 1.20.00.15 11/23/2007 Erich Chen & Nick Cheng
+** 1.check if the sg list member number
+** exceeds arcmsr default limit in arcmsr_build_ccb()
+** 2.change the returned value type of arcmsr_build_ccb()
+** from "void" to "int"
+** 3.add the conditional check if arcmsr_build_ccb()
+** returns FAILED
+** 1.20.00.15 12/04/2007 Erich Chen & Nick Cheng
+** 1. modify arcmsr_drain_donequeue() to ignore unknown
+** command and let kernel process command timeout.
+** This could handle IO request violating max. segments
+** while Linux XFS over DM-CRYPT.
+** Thanks to Milan Broz's comments <mbroz@redhat.com>
+** 1.20.00.15 12/24/2007 Erich Chen & Nick Cheng
+** 1.fix the portability problems
+** 2.fix type B where we should _not_ iounmap() acb->pmu;
+** it's not ioremapped.
+** 3.add return -ENOMEM if ioremap() fails
+** 4.transfer IS_SG64_ADDR w/ cpu_to_le32()
+** in arcmsr_build_ccb
+** 5. modify acb->devstate[i][j] as ARECA_RAID_GONE instead of
+** ARECA_RAID_GOOD in arcmsr_alloc_ccb_pool()
+** 6.fix arcmsr_cdb->Context as (unsigned long)arcmsr_cdb
+** 7.add the checking state of
+** (outbound_intstatus & ARCMSR_MU_OUTBOUND_HANDLE_INT) == 0
+** in arcmsr_handle_hba_isr
+** 8.replace pci_alloc_consistent()/pci_free_consistent() with kmalloc()/kfree() in arcmsr_iop_message_xfer()
+** 9. fix the release of dma memory for type B in arcmsr_free_ccb_pool()
+** 10.fix the arcmsr_polling_hbb_ccbdone()
+** 1.20.00.15 02/27/2008 Erich Chen & Nick Cheng
+** 1.arcmsr_iop_message_xfer() is called from atomic context under the
+** queuecommand scsi_host_template handler. James Bottomley pointed out
+** that the current GFP_KERNEL|GFP_DMA flags are wrong: firstly we are in
+** atomic context, secondly this memory is not used for DMA.
+** Also removed some unneeded casts. Thanks to Daniel Drake <dsd@gentoo.org>
+**************************************************************************
diff --git a/Documentation/scsi/ChangeLog.ips b/Documentation/scsi/ChangeLog.ips
new file mode 100644
index 000000000..5019f5182
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.ips
@@ -0,0 +1,122 @@
+IBM ServeRAID driver Change Log
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ 5.00.01 - Sarasota ( 5i ) adapters must always be scanned first
+ - Get rid on IOCTL_NEW_COMMAND code
+ - Add Extended DCDB Commands for Tape Support in 5I
+
+ 4.90.11 - Don't actually RESET unless it's physically required
+ - Remove unused compile options
+
+ 4.90.08 - Data Corruption if First Scatter Gather Element is > 64K
+
+ 4.90.08 - Increase Delays in Flashing ( Trombone Only - 4H )
+
+ 4.90.05 - Use New PCI Architecture to facilitate Hot Plug Development
+
+ 4.90.01 - Add ServeRAID Version Checking
+
+ 4.80.26 - Clean up potential code problems ( Arjan's recommendations )
+
+ 4.80.21 - Change memcpy() to copy_to_user() in NVRAM Page 5 IOCTL path
+
+ 4.80.20 - Set max_sectors in Scsi_Host structure ( if >= 2.4.7 kernel )
+ - 5 second delay needed after resetting an i960 adapter
+
+ 4.80.14 - Take all semaphores off stack
+ - Clean Up New_IOCTL path
+
+ 4.80.04 - Eliminate calls to strtok() if 2.4.x or greater
+ - Adjustments to Device Queue Depth
+
+ 4.80.00 - Make ia64 Safe
+
+ 4.72.01 - I/O Mapped Memory release ( so "insmod ips" does not Fail )
+ - Don't Issue Internal FFDC Command if there are Active Commands
+ - Close Window for getting too many IOCTL's active
+
+ 4.72.00 - Allow for a Scatter-Gather Element to exceed MAX_XFER Size
+
+ 4.71.00 - Change all memory allocations to not use GFP_DMA flag
+ - Code Clean-Up for 2.4.x kernel
+
+ 4.70.15 - Fix Breakup for very large ( non-SG ) requests
+
+ 4.70.13 - Don't release HA Lock in ips_next() until SC taken off queue
+ - Unregister SCSI device in ips_release()
+ - Don't Send CDB's if we already know the device is not present
+
+ 4.70.12 - Corrective actions for bad controller ( during initialization )
+
+ 4.70.09 - Use a Common ( Large Buffer ) for Flashing from the JCRM CD
+ - Add IPSSEND Flash Support
+ - Set Sense Data for Unknown SCSI Command
+ - Use Slot Number from NVRAM Page 5
+ - Restore caller's DCDB Structure
+
+ 4.20.14 - Update patch files for kernel 2.4.0-test5
+
+ 4.20.13 - Fix some failure cases / reset code
+ - Hook into the reboot_notifier to flush the controller
+ cache
+
+ 4.20.03 - Rename version to coincide with new release schedules
+ - Performance fixes
+ - Fix truncation of /proc files with cat
+ - Merge in changes through kernel 2.4.0test1ac21
+
+ 4.10.13 - Fix for dynamic unload and proc file system
+
+ 4.10.00 - Add support for ServeRAID 4M/4L
+
+ 4.00.06 - Fix timeout with initial FFDC command
+
+ 4.00.05 - Remove wish_block from init routine
+ - Use linux/spinlock.h instead of asm/spinlock.h for kernels
+ 2.3.18 and later
+ - Sync with other changes from the 2.3 kernels
+
+ 4.00.04 - Rename structures/constants to be prefixed with IPS_
+
+ 4.00.03 - Add alternative passthru interface
+ - Add ability to flash ServeRAID BIOS
+
+ 4.00.02 - Fix problem with PT DCDB with no buffer
+
+ 4.00.01 - Add support for First Failure Data Capture
+
+ 4.00.00 - Add support for ServeRAID 4
+
+ 3.60.02 - Make DCDB direction based on lookup table.
+ - Only allow one DCDB command to a SCSI ID at a time.
+
+ 3.60.01 - Remove bogus error check in passthru routine.
+
+ 3.60.00 - Bump max commands to 128 for use with ServeRAID
+ firmware 3.60.
+ - Change version to 3.60 to coincide with ServeRAID release
+ numbering.
+
+ 1.00.00 - Initial Public Release
+ - Functionally equivalent to 0.99.05
+
+ 0.99.05 - Fix an oops on certain passthru commands
+
+ 0.99.04 - Fix race condition in the passthru mechanism
+ -- this required the interface to the utilities to change
+ - Fix error recovery code
+
+ 0.99.03 - Make interrupt routine handle all completed request on the
+ adapter not just the first one
+ - Make sure passthru commands get woken up if we run out of
+ SCBs
+ - Send all of the commands on the queue at once rather than
+ one at a time since the card will support it.
+
+ 0.99.02 - Added some additional debug statements to print out
+ errors if an error occurs while trying to read/write
+ to a logical drive (IPS_DEBUG).
+
+ - Fixed read/write errors when the adapter is using an
+ 8K stripe size.
+
diff --git a/Documentation/scsi/ChangeLog.lpfc b/Documentation/scsi/ChangeLog.lpfc
new file mode 100644
index 000000000..2f6d595f9
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.lpfc
@@ -0,0 +1,1865 @@
+Known issues :
+ * Please read the associated RELEASE-NOTES file !!!
+ * This source release intended for upstream kernel releases only!
+
+Changes from 20050323 to 20050413
+
+ * Changed version number to 8.0.28
+ * Fixed build warning for 2.6.12-rc2 kernels: mempool_alloc now
+ requires a function which takes an unsigned int for gfp_flags.
+ * Removed pci dma sync calls to coherent/consistent pci memory.
+ * Merged patch from Christoph Hellwig <hch@lst.de>: split helpers
+ for fabric and nport logins out of lpfc_cmpl_els_flogi.
+ * Removed sysfs attributes that are used to dump the various
+ discovery lists.
+ * Fix for issue where not all luns are seen. Search all lists
+ other than unmap list in lpfc_find_target(). Otherwise INQUIRY
+ to luns on nodes in NPR or other relevant states (PLOGI,
+ PRLI...) are errored back and scan() terminates.
+ * Removed FC_TRANSPORT_PATCHESxxx defines. They're in 2.6.12-rc1.
+ * Compare return value of lpfc_scsi_tgt_reset against SCSI
+ midlayer codes SUCCESS/FAILED which that function returns rather
+ than SLI return code.
+ * Removed extraneous calls to lpfc_sli_next_iotag which should
+ only be called from lpfc_sli_submit_iocb. Also make
+ lpfc_sli_next_iotag static.
+ * Added PCI ID for LP10000-S.
+ * Changes in lpfc_abort_handler(): Return SUCCESS if we did not
+ find command in both TX and TX completion queues. Return ERROR
+ if we timed out waiting for command to complete after abort was
+ issued.
+ * Zero-out response sense length in lpfc_scsi_prep_cmnd to prevent
+ interpretation of stale sense length when the command completes
+ - was causing spurious 0710 messages.
+ * Moved clearing of host_scribble inside host_lock in IO
+ completion path.
+ * Fixed a bunch of mixed tab/space indentation.
+ * Allow hex format numbers in sysfs attribute setting. Fix
+ application hang when invalid numbers are used in sysfs
+ settings.
+ * Removed extra iotag allocation by lpfc_abort_handler.
+ * Clear host_scribble in the scsi_cmnd structure when failing in
+ queuecommand.
+ * Changed logic at top of lpfc_abort_handler so that if the
+ command's host_scibble field is NULL, return SUCCESS because the
+ driver has already returned the command to the midlayer.
+
+Changes from 20050308 to 20050323
+
+ * Changed version number to 8.0.27
+ * Changed a few lines from patch submitted by Christoph Hellwig
+ (3/19). MAILBOX_WSIZE * (uint32_t) is replaced with an
+ equivalent MAILBOX_CMDSIZE macro.
+ * Merged patch from Christoph Hellwig (3/19): some misc patches
+ against the latest drivers:
+ - stop using volatile. if you need special ordering use memory
+ barriers but that doesn't seem to be the case here
+ - switch lpfc_sli_pcimem_bcopy to take void * arguments.
+ - remove typecast for constants - a U postfix marks them
+ unsigned int in C
+ - add a MAILBOX_CMD_SIZE macro, as most users of
+ MAILBOX_CMD_WSIZE didn't really want the word count
+ - kill struct lpfc_scsi_dma_buf and embedded the two members
+ directly in struct lpfc_scsi_buf
+ - don't call dma_sync function on allocations from
+ pci_pool_alloc - it's only for streaming mappings (pci_map_*)
+ * Merged patch from Christoph Hellwig (3/19) - nlp_failMask isn't
+ ever used by the driver, just reported to userspace (and that in
+ a multi-value file which is against the sysfs guidelines).
+ * Change pci_module_init to pci_register_module() with appropriate
+ ifdefs.
+ * Added #include <linux/dma-mapping.h> as required by the DMA
+ 32bit and 64bit defines on some archs.
+ * Merged patch from Christoph Hellwig (03/19) - fix initialization
+ order - scsi_add_host must happen last from scsi POV. Also some
+ minor style/comment fixups.
+ * Fixed use of TRANSPORT_PATCHES_V2 by changing to
+ FC_TRANSPORT_PATCHES_V2.
+
+Changes from 20050223 to 20050308
+
+ * Changed version number to 8.0.26
+ * Revise TRANSPORT_PATCHES_V2 so that lpfc_target is removed and
+ rport data is used instead. Removed device_queue_hash[].
+ * Changed RW attributes of scan_down, max_luns and fcp_bind_method
+ to R only.
+ * Fixed RSCN handling during initial link initialization.
+ * Fixed issue with receiving PLOGI handling when node is on NPR
+ list and marked for ADISC.
+ * Fixed RSCN timeout issues.
+ * Reduced severity of "SCSI layer issued abort device" message to
+ KERN_WARNING.
+ * Feedback from Christoph Hellwig (on 2/5) - In the LPFC_EVT_SCAN
+ case the caller already has the target ID handly, so pass that
+ one in evt_arg1.
+ * Fix compile warning/resultant panic in
+ lpfc_register_remote_port().
+
+Changes from 20050215 to 20050223
+
+ * Changed version number to 8.0.25
+ * Add appropriate comments to lpfc_sli.c.
+ * Use DMA_64BIT_MASK and DMA_32BIT_MASK defines instead of
+ 0xffffffffffffffffULL & 0xffffffffULL respectively. Use pci
+ equivalents instead of dma_set_mask and also modify condition
+ clause to actually exit on error condition.
+ * Restart els timeout handler only if txcmplq_cnt. On submission,
+ mod_timer the els_tmofunc. This prevents the worker thread from
+ waking up the els_tmo handler un-necessarily. The thread was
+ being woken up even when there were no pending els commands.
+ * Added new typedefs for abort and reset functions.
+ * Collapsed lpfc_sli_abort_iocb_xxx into a single function.
+ * Collapsed lpfc_sli_sum_iocb_xxx into a single function.
+ * Removed TXQ from all abort and reset handlers since it is never
+ used.
+ * Fixed Oops panic in 8.0.23 (reported on SourceForge). The
+ driver was not handling LPFC_IO_POLL cases correctly in
+ fast_ring_event and was setting the tgt_reset timeout to 0 in
+ lpfc_reset_bus_handler. This 0 timeout would not allow the FW
+ to timeout ABTS's on bad targets and allow the driver to have an
+ iocb on two lists. Also split the lpfc_sli_ringtxcmpl_get
+ function into two routines to match the fast and slow completion
+ semantics - ELS completions worked for the wrong reasons. Also
+ provided new log message number - had two 0326 entries.
+ * Removed unused #define LPFC_SCSI_INITIAL_BPL_SIZE.
+ * Removed unused struct lpfc_node_farp_pend definition.
+ * Removed unused #define LPFC_SLIM2_PAGE_AREA.
+ * Changed zeros used as pointers to NULL.
+ * Removed unneeded braces around single line in lpfc_do_work.
+ * Close humongous memory leak in lpfc_sli.c - driver was losing 13
+ iocbq structures per LIP.
+ * Removed last of GFP_ATOMIC allocations.
+ * Locks are not taken outside of nportdisc, hbadisc, els and most
+ of the init, sli, mbox and ct groups of functions
+ * Fix comment for lpfc_sli_iocb_cmd_type to fit within 80 columns.
+ * Replaced wait_event() with wait_event_interruptible().
+ wait_event() puts the woker thread in an UNINTERRUPTIBLE state
+ causing it to figure in load average calculations. Also add a
+ BUG_ON to the ret code of wait_event_interruptible() since the
+ premise is that the worker thread is signal-immune.
+
+Changes from 20050208 to 20050215
+
+ * Changed version number to 8.0.24
+ * Fixed a memory leak of iocbq structure. For ELS solicited iocbs
+ sli layer now frees the response iocbs after processing it.
+ * Closed large memory leak -- we were losing 13 iocbq structures
+ per LIP.
+ * Changing EIO and ENOMEM to -EIO and -ENOMEM respectively.
+ * Cleanup of lpfc_sli_iocb_cmd_type array and typing of iocb type.
+ * Implemented Christoph Hellwig's feedback from 02/05: Remove
+ macros putLunHigh, putLunLow. Use lpfc_put_lun() inline instead.
+ * Integrated Christoph Hellwig's feedback from 02/05: Instead of
+ cpu_to_be32(), use swab16((uint16_t)lun). This is the same as
+ "swab16() on LE" and "<<16 on BE".
+ * Added updates for revised FC remote port patch (dev_loss_tmo
+ moved to rport, hostdata renamed dd_data, add fc_remote_host()
+ on shutdown).
+ * Removed unnecessary function prototype.
+ * Added code to prevent waking up worker thread after the exit of
+ worker thread. Fixes panic seen with insmod/rmmod testing with
+ 70 disks.
+ * Integrated Christoph Hellwig's patch from 1/30: Make some
+ variables/code static (namely lpfcAlpaArray and
+ process_nodev_timeout()).
+ * Integrated Christoph Hellwig's patch from 1/30: Use
+ switch...case instead of if...else if...else if while decoding
+ JDEC id.
+
+Changes from 20050201 to 20050208
+
+ * Changed version number to 8.0.23
+ * Make lpfc_work_done, lpfc_get_scsi_buf,
+ lpfc_mbx_process_link_up, lpfc_mbx_issue_link_down and
+ lpfc_sli_chipset_init static.
+ * Cleaned up references to list_head->next field in the driver.
+ * Replaced lpfc_discq_post_event with lpfc_workq_post_event.
+ * Implmented Christoph Hellwig's review from 2/5: Check for return
+ values of kmalloc.
+ * Integrated Christoph Hellwig's patch from 1/30: Protecting
+ scan_tmo and friends in !FC_TRANSPORT_PATCHES_V2 &&
+ !USE_SCAN_TARGET.
+ * Integrated Christoph Hellwig's patch from 1/30: Some fixes in
+ the evt handling area.
+ * Integrated Christoph Hellwig's patch from 1/30: Remove usage of
+ intr_inited variable. The interrupt initilization from OS side
+ now happens in lpfc_probe_one().
+ * Integrated Christoph Hellwig's patch from 1/30: remove shim
+ lpfc_alloc_transport_attr - remove shim lpfc_alloc_shost_attrs -
+ remove shim lpfc_scsi_host_init - allocate phba mem in scsi's
+ hostdata readjust code so that they are no use after free's
+ (don't use after scsi_host_put) - make lpfc_alloc_sysfs_attr
+ return errors
+ * Fixed panic in lpfc_probe_one(). Do not delete in a list
+ iterator that is not safe.
+ * Clean up fast lookup array of the fcp_ring when aborting iocbs.
+ * Following timeout handlers moved to the lpfc worker thread:
+ lpfc_disc_timeout, lpfc_els_timeout, lpfc_mbox, lpfc_fdmi_tmo,
+ lpfc_nodev_timeout, lpfc_els_retry_delay.
+ * Removed unused NLP_NS_NODE #define.
+ * Integrated Christoph Hellwig's patch from 1/30: remove unused
+ lpfc_hba_list; remove unused lpfc_rdrev_wd30; remove
+ lpfc_get_brd_no and use Linux provided IDR.
+ * Changed board reset procedure so that lpfc_sli_send_reset()
+ writes the INITFF bit and leaves lpfc_sli_brdreset() to clear
+ the bit.
+ * Removed outfcpio sysfs device attribute.
+ * VPD changes: 1) Modify driver to use the model name and
+ description from the VPD data if it exists 2) Rework use of DUMP
+ mailbox command to support HBAs with 256 bytes of SLIM.
+ * Fixed compile error for implicit definition of struct
+ scsi_target
+
+Changes from 20050124 to 20050201
+
+ * Changed version number to 8.0.22
+ * Moved discovery timeout handler to worker thread. There are
+ function calls in this function which are not safe to call from
+ HW interrupt context.
+ * Removed free_irq from the error path of HBA initialization.
+ This will fix the free of uninitialised IRQ when config_port
+ fails.
+ * Make sure function which processes unsolicited IOCBs on ELS ring
+ still is called with the lock held.
+ * Clear LA bit from work_ha when we are not supposed to handle LA.
+ * Fix double locking bug in the error handling part of
+ lpfc_mbx_cmpl_read_la.
+ * Implemented fast IOCB processing for FCP ring.
+ * Since mboxes are now unconditionally allocated outside of the
+ lock, free them in cases where they are not used.
+ * Moved out a couple of GFP_ATOMICs in lpfc_disc_timeout, to
+ before locks so that they can GFP_KERNEL instead. Also cleaned
+ up code.
+ * Collapsed interrupt handling code into one function.
+ * Removed event posting and handling of solicited and unsolicited
+ iocbs.
+ * Remove ELS ring handling leftovers from the lpfc_sli_inter().
+ * ELS ring (any slow ring) moved from the lpfc_sli_inter() into a
+ worker thread. Link Attention, Mbox Attention, and Error
+ Attention, as well as slow rings' attention is passed to the
+ worker thread via worker thread copy of Host Attention
+ register. Corresponding events are removed from the event queue
+ handling.
+ * Add entries to hba structure to delegate some functionality from
+ the lpfc_sli_inter() to a worker thread.
+ * Reduced used of GFP_ATOMIC for memory allocations.
+ * Moved locks deeper in order to change GFP_ATOMIC to GFP_KERNEL.
+ * IOCB initialization fix for Raw IO.
+ * Removed qcmdcnt, iodonecnt, errcnt from lpfc_target and from
+ driver.
+ * Added call to lpfc_els_abort in lpfc_free_node. Modified
+ lpfc_els_abort to reset txq and txcmplq iterator after a
+ iocb_cmpl call.
+ * Fixed a use after free issue in lpfc_init.c.
+ * Defined default mailbox completion routine and removed code in
+ the sli layer which checks the mbox_cmpl == 0 to free mail box
+ resources.
+ * In lpfc_workq_post_event, clean up comment formatting and remove
+ unneeded cast of kmalloc's return.
+ * Removed loop which calls fc_remote_port_unblock and
+ fc_remote_port_delete for every target as this same effect is
+ accomplished by the scsi_remove_host call.
+ * Minor cleanup of header files. Stop header files including
+ other header files. Removed sentinels which hide multiple
+ inclusions. Removed unneeded #include directives.
+ * Fixed memory leaks in mailbox error paths.
+ * Moved lock from around of lpfc_work_done to lpfc_work_done
+ itself.
+ * Removed typedef for LPFC_WORK_EVT_t and left just struct
+ lpfc_work_evt to comply with linux_scsi review coding style.
+ * Fixed some trailing whitespaces, spaces used for indentation and
+ ill-formatting multiline comments.
+ * Bug fix for Raw IO errors. Reuse of IOCBs now mandates setting
+ of ulpPU and fcpi_parm to avoid incorrect read check of Write IO
+ and incorrect read length.
+
+Changes from 20050110 to 20050124
+
+ * Changed version number to 8.0.21
+ * Removed unpleasant casting in the definition and use of
+ lpfc_disc_action function pointer array.
+ * Makefile cleanup. Use ?= operator for setting default
+ KERNELVERSION and BASEINCLUDE values. Use $(PWD) consistently.
+ * Removed call to lpfc_sli_intr from lpfc_config_port_post. All
+ Linux systems will service hardware interrupts while bringing up
+ the driver.
+ * Christoph Hellwig change request: Reorg of contents of
+ lpfc_hbadisc.c, lpfc_scsi.h, lpfc_init.c, lpfc_sli.c,
+ lpfc_attr.c, lpfc_scsi.c.
+ * Renamed discovery thread to lpfc_worker thread. Moved handling
+ of error attention and link attention and mbox event handler to
+ lpfc_worker thread.
+ * Removed .proc_info and .proc_name from the driver template and
+ associated code.
+ * Removed check of FC_UNLOADING flag in lpfc_queuecommand to
+ determine what result to return.
+ * Move modification of FC_UNLOADING flag under host_lock.
+ * Fix IOERR_RCV_BUFFER_WAITING handling for CT and ELS subsystem.
+ * Workaround firmware bug for IOERR_RCV_BUFFER_WAITING on ELS
+ ring.
+ * Fixed a couple lpfc_post_buffer problems in lpfc_init.c.
+ * Add missing spaces to the parameter descriptions for
+ lpfc_cr_delay, lpfc_cr_count and lpfc_discovery_threads.
+ * Lock before calling lpfc_sli_hba_down().
+ * Fix leak of "host" in the error path in the remove_one() path.
+ * Fix comment for lpfc_cr_count. It defaults to 1.
+ * Fix issue where we are calling lpfc_disc_done() recursively from
+ lpfc_linkdown(), but list_for_each_entry_safe() is not safe for
+ such use.
+ * Bump lpfc_discovery_threads (count of outstading ELS commands in
+ discovery) to 32
+ * If the SCSI midlayer tries to recover from an error on a lun
+ while the corresponding target is in the NPR state, lpfc driver
+ will reject all the resets. This will cause the target to be
+ moved to offline state and block all the I/Os. The fix for this
+ is to delay the lun reset to a target which is not in MAPPED
+ state until the target is rediscovered or nodev timeout is
+ fired.
+
+Changes from 20041229 to 20050110
+
+ * Changed version number to 8.0.20
+ * rport fix: use new fc_remote_port_rolechg() function instead of
+ direct structure change
+ * rport fix: last null pointer check
+ * Phase II of GFP_ATOMIC effort. Replaced iocb_mem_pool and
+ scsibuf_mem_pool with kmalloc and linked list. Inserted list
+ operations for mempool_alloc calls. General code cleanup. All
+ abort and reset routines converted. Handle_ring_event
+ converted.
+ * If the mbox_cmpl == lpfc_sli_wake_mbox_wait in
+ lpfc_sli_handle_mb_event, pmb->context1 points to a waitq. Do
+ not free the structure.
+ * rport fixes: fix for rmmod crash
+ * rport fixes: when receiving PRLI's, set node/rport role values
+ * rport fixes: fix for unload and for fabric port deletes
+ * VPD info bug fix.
+ * lpfc_linkdown() should be able to process all outstanding events
+ by calling lpfc_disc_done() even if it is called from
+ lpfc_disc_done() Moving all events from phba->dpc_disc to local
+ local_dpc_disc prevents those events from being processed.
+ Removing that queue. From now on we should not see "Illegal
+ State Transition" messages.
+ * Release host lock and enable interrupts when calling
+ del_timer_sync()
+ * All related to rports: Clean up issues with rport deletion
+ Convert to using block/unblock on list remove (was del/add)
+ Moved rport delete to freenode - so rport tracks node.
+ * rport fixes: for fport, get maxframe and class support
+ information
+ * Added use of wait_event to work with kthread interface.
+ * Ensure that scsi_transport_fc.h is always pulled in by
+ lpfc_scsiport.c
+ * In remote port changes: no longer nulling target->pnode when
+ removing from mapped list. Pnode get nulled when the node is
+ freed (after nodev tmo). This bug was causing i/o received in
+ the small window while the device was blocked to be errored w/
+ did_no_connect. With the fix, it returns host_busy
+ (per the pre-remote port changes).
+ * Merge in support for fc transport remote port use. This removes
+ any consistent bindings within the driver. All scanning is now
+ on a per-target basis driven by the discovery engine.
+
+Changes from 20041220 to 20041229
+
+ * Changed version number to 8.0.19
+ * Fixed bug for handling RSCN type 3. Terminate RSCN mode
+ properly after ADISC handling completes.
+ * Add list_remove_head macro. Macro cleans up memory allocation
+ list handling. Also clean up lpfc_reset_bus_handler - routine
+ does not need to allocate its own scsi_cmnd and scsi_device
+ structures.
+ * Fixed potential discovery bug, nlp list corrutpion fix potential
+ memory leak
+ * Part 1 of the memory allocation rework request by linux-scsi.
+ This effort fixes the number of bdes per scsi_buf to 64, makes
+ the scatter-gather count a module parameter, builds a linked
+ list of scsi_bufs, and removes all dependencies on lpfc_mem.h.
+ * Reverted lpfc_do_dpc, probe_one, remove_one to original
+ implementation. Too many problems (driver not completing
+ initial discovery, and IO not starting to disks). Backs out
+ kthread patch.
+ * Fix race condition in lpfc_do_dpc. If wake_up interrupt occurs
+ while lpfc_do_dpc is running disc_done and the dpc list is
+ empty, the latest insertion is missed and the schedule_timeout
+ does not wakeup. The sleep interval is MAX_SCHEDULE_TIMEOUT
+ defined as ~0UL >> 1, a very large number. Hacked it to 5*HZ
+ for now.
+ * Fixed bug introduced when discovery thread implementation was
+ moved to kthread. kthread_stop() is not able to wake up thread
+ waiting on a semaphore and "modprobe -r lpfc" is not always
+ (most of the times) able to complete. Fix is in not using
+ semaphore for the interruptable sleep.
+ * Small Makefile cleanup - Remove remnants of 2.4 vs. 2.6
+ determination.
+
+Changes from 20041213 to 20041220
+
+ * Changed version number to 8.0.18
+ * Janitorial cleanup after removal of sliinit and ringinit[] ring
+ statistic is owned by the ring and SLI stats are in sli
+ structure.
+ * Integrated patch from Christoph Hellwig <hch@lst.de> Kill
+ compile warnings on 64 bit platforms: %variables for %llx format
+ specifiers must be caste to long long because %(u)int64_t can
+ just be long on 64bit platforms.
+ * Integrated patch from Christoph Hellwig <hch@lst.de> Removes
+ dead code.
+ * Integrated patch from Christoph Hellwig <hch@lst.de>: use
+ kthread interface.
+ * Print LPFC_MODULE_DESC banner in module init routine.
+ * Removed sliinit structure and ringinit[] array.
+ * Changed log message number from 324 to 326 in lpfc_sli.c.
+ * Wait longer for commands to complete in lpfc_reset_bus_handler
+ and lpfc_reset_bus_handler. Also use schedule_timeout() instead
+ of msleep() and add error message in lpfc_abort_handler()
+ * When setting lpfc_nodev_tmo, from dev_loss set routine, make 1
+ sec minimum value.
+ * Functions which assume lock being held were called without lock
+ and kernel complained about unlocking lock which is not locked.
+ * Added code in linkdown to unreg if we know login session will be
+ terminated.
+ * Removed automap config parameter and fixed up use_adisc logic to
+ include FCP2 devices.
+
+Changes from 20041207 to 20041213
+
+ * Changed version number to 8.0.17
+ * Fix sparse warnings by adding __iomem markers to lpfc_compat.h.
+ * Fix some sparse warnings -- 0 used as NULL pointer.
+ * Make sure there's a space between every if and it's (.
+ * Fix some overly long lines and make sure hard tabs are used for
+ indentation.
+ * Remove all trailing whitespace.
+ * Integrate Christoph Hellwig's patch for 8.0.14: if
+ pci_module_init fails we need to release the transport template.
+ (also don't print the driver name at startup, linux drivers can
+ be loaded without hardware present, and noise in the log for
+ that case is considered unpolite, better print messages only for
+ hardware actually found).
+ * Integrate Christoph Hellwig's patch for 8.0.14: Add missing
+ __iomem annotations, remove broken casts, mark functions static.
+ Only major changes is chaning of some offsets from word-based to
+ byte-based so we cans simply do void pointer arithmetics (gcc
+ extension) instead of casting to uint32_t.
+ * Integrate Christoph Hellwig's patch for 8.0.14: flag is always
+ LPFC_SLI_ABORT_IMED, aka 0 - remove dead code.
+ * Modified preprocessor #ifdef, #if, #ifndef to reflect upstream
+ kernel submission. Clean build with make clean;make and make
+ clean;make ADVANCED=1 on SMP x86, 2.6.10-rc2 on RHEL 4 Beta
+ 1. IO with a few lips and a long cable pull behaved accordingly.
+ * Implement full VPD support.
+ * Abort handler will try to wait for abort completion before
+ returning. Fixes some panics in iocb completion code path.
+
+Changes from 20041130 to 20041207
+
+ * Changed version number to 8.0.16
+ * Hung dt session fix. When the midlayer calls to abort a scsi
+ command, make sure the driver does not complete post-abort
+ handler. Just NULL the iocb_cmpl callback handler and let SLI
+ take over.
+ * Add Read check that uses SLI option to validate all READ data
+ actually received.
+
+
+Changes from 20041123 to 20041130
+
+ * Changed version number to 8.0.15
+ * Ifdef'd unused "binary" attributes by DFC_DEBUG for clean
+ compiles
+ * Stop DID_ERROR from showing up along with QUEUE_FULL set by the
+ Clarion array (SCSI error ret. val. 0x70028) There is no need
+ for driver to hard fail command which was failed by the target
+ device.
+ * Fix for Scsi device scan bug reported on SourceForge. Driver
+ was returning a DID_ERROR in lpfc_handle_fcp_error causing
+ midlayer to mark report luns as failing even though it
+ succeeded.
+ * Don't ignore SCSI status on underrun conditions for inquiries,
+ test unit ready's, etc. This was causing us to lose
+ reservation conflicts, etc
+
+Changes from 20041018 to 20041123
+
+ * Changed version number to 8.0.14
+ * Added new function "iterator" lpfc_sli_next_iocb_slot() which
+ returns pointer to iocb entry at cmdidx if queue is not full.
+ It also updates next_cmdidx, and local_getidx (but not cmdidx)
+ * lpfc_sli_submit_iocb() copies next_cmdidx into cmdidx. Now it is
+ the only place were we are updating cmdidx.
+ * lpfc_sli_update_ring() is split in to two --
+ lpfc_sli_update_ring() and lpfc_sli_update_full_ring().
+ * lpfc_sli_update_ring() don't to read back correct value of
+ cmdidx.
+ * Simplified lpfc_sli_resume_iocb() and its use.
+ * New static function lpfc_sli_next_iocb(phba, pring, &piocb) to
+ iterate through commands in the TX queue and new command (at the
+ end).
+ * Reduced max_lun to 256 (due to issues reported to some arrays).
+ Fixed comment, and macro values so def=256, min=1, max=32768.
+ * Fix an obvious typo/bug: kfree was used to free lpfc_scsi_buf
+ instead of mempool_free in lpfc_scsiport.c.
+ * Suppress nodev_tmo message for FABRIC nodes.
+ * Fixed some usage of plain integer as NULL pointer.
+ * Bug fix for FLOGI cmpl, lpfc_els_chk_latt error path code
+ cleanup.
+ * Fixup lpfc_els_chk_latt() to have Fabric NPorts go thru
+ discovery state machine as well.
+ * Fixes to lpfc_els_chk_latt().
+ * Use DID not SCSI target id as a port_id and add some missing
+ locks in lpfc_fcp.c.
+ * Changed eh_abort_handler to return FAILED if command is not
+ found in driver.
+ * Fix crash: paging request at virtual address 0000000000100108 -
+ a result of removing from the txcmpl list item which was already
+ removed (100100 is a LIST_POISON1 value from the next pointer
+ and 8 is an offset of the "prev") Driver runs out of iotags and
+ does not handle that case well. The root of the proble is in the
+ initialization code in lpfc_sli.c
+ * Changes to work with proposed linux kernel patch to support
+ hotplug.
+ * Zero out seg_cnt in prep_io failure path to prevent double sg
+ unmap calls.
+ * Fix setting of upper 32 bits for Host Group Ring Pointers if in
+ SLIM. Old code was inappropriately masking off low order bits.
+ * Use scsi_[activate|deactivate]_tcq calls provided in scsi_tcq.h.
+ * Integrated patch from Christoph Hellwig (hch@lst.de): don't call
+ pci_dma_sync_* on coherent memory. pci_dma_sync_* is need and
+ must be used only with streaming dma mappings pci_map_*, not
+ coherent mappings. Note: There are more consistent mappings
+ that are using pci_dma_sync calls. Probably these should be
+ removed as well.
+ * Modified lpfc_free_scsi_buf to accommodate all three scsi_buf
+ free types to alleviate miscellaneous panics with cable pull
+ testing.
+ * Set hotplug to default 0 and lpfc_target_remove to not remove
+ devices unless hotplug is enabled.
+ * Fixed discovery bug: plogi cmpl uses ndlp after its freed.
+ * Fixed discovery bug: rnid acc cmpl, can potentially use ndlp
+ after its freed.
+ * Modularize code path in lpfc_target_remove().
+ * Changes to support SCSI hotplug (ifdef'ed out because they need
+ kernel support USE_SCAN_TARGET requires kernel support to export
+ the interface to scsi_scan_target and to move the SCAN_WILD_CARD
+ define to a general scsi header file. USE_RESCAN_HOST requires
+ kernel support to export an interface to scan_scsi_host() with
+ the rescan flag turned on).
+ * Removed redundant variable declaration of lpfc_linkdown_tmo.
+ * Fix for large port count remove test.
+ * Added check to see if BAR1 register is valid before using BAR1
+ register for programming config_port mail box command.
+ * Added lpfc_scsi_hotplug to enable/disable driver support of SCSI
+ hotplug.
+ * Changed lpfc_disc_neverdev() to lpfc_disc_illegal() and changed
+ lpfc_disc_nodev() to lpfc_disc_noop(). Adjusted appropriate
+ events to use these routines.
+ * Add support for SCSI device hotplug.
+ * Take dummy lpfc_target's into account for lpfc_slave_destroy().
+ * Bug fix to store WWPN / WWNN in NameServer / FDMI lpfc_nodelist
+ entries.
+ * Added slavecnt in lpfc_target for diagnostic purposes.
+ * Added lpfc_hba load/unload flags to take care of special cases
+ for add/remove device.
+ * Have target add/remove delay before scanning.
+ * Have rmmod path cleanup blocked devices before scsi_remove_host.
+ * Added a #define for msleep for 2.6.5 kernels.
+ * In reset bus handler if memory allocation fails, return FAILED
+ and not SUCCESS.
+ * Have lpfc eh handlers, bus_reset and lun_reset, wait for all
+ associated I/Os to complete before returning.
+ * Fix memset byte count in lpfc_hba_init so that
+ LP1050 would initialize correctly.
+ * Backround nodev_timeout processing to DPC This enables us to
+ unblock (stop dev_loss_tmo) when appopriate.
+ * Fix array discovery with multiple luns. The max_luns was 0 at
+ the time the host structure was initialized. lpfc_cfg_params
+ then set the max_luns to the correct value afterwards.
+ * Remove unused define LPFC_MAX_LUN and set the default value of
+ lpfc_max_lun parameter to 512.
+ * Reduced stack usage of lpfc_hba_init.
+ * Cleaned up the following warning generated by
+ scripts/checkincludes.pl lpfc_fcp.c: scsi/scsi_cmnd.h is
+ included more than once.
+ * Replaced "set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(timeout)" with "msleep(timeout)".
+ * Fixnode was losing starget when rediscovered. We saw messages
+ like: lpfc 0000:04:02.0: 0:0263 Cannot block scsi target as a
+ result. Moved starget field into struct lpfc_target which is
+ referenced from the node.
+ * Add additional SLI layer logging in lpfc_sli.c.
+ * Ignore more unexpected completions in lpfc_nportdisc.c.
+ * Can not call lpfc_target_unblock from the soft interrupt
+ context. It seems to be not nessasery to unblock target from
+ nodev timeout.
+ * Introduce and use less lethal event handler for unexpected
+ events in lpfc_nportdisc.c.
+ * Can not call fc_target_(un)block() functions with interrupts
+ disabled in lpfc_scsiport.c.
+ * Added new configuration parameter, lpfc_max_luns range 1-32768,
+ default 32768.
+ * Allow lpfc_fcp.c to call lpfc_get_hba_sym_node_name().
+ * Increase nodev timeout from 20 seconds to 30 seconds.
+ * Replace some kfree((void*)ptr) with kfree(ptr).
+ * Make 3 functions static: lpfc_get_hba_sym_node_name,
+ lpfc_intr_prep and lpfc_setup_slim_access. Move lpfc_intr_prep
+ and lpfc_setup_slim_access so they're defined before being used.
+ * Remove an unnecessary list_del() in lpfc_hbadisc.c.
+ * Set nlp_state before calling lpfc_nlp_list() since this will
+ potentially call fc_target_unblock which may cause a race in
+ queuecommand by releasing host_lock.
+ * Since lpfc_nodev_tmo < dev_loss_tmo remove queuecommand
+ DID_BAD_TARGET return for now.
+ * Fix a problem with rcv logo.
+ * Remove unused portstatistics_t structure.
+ * Remove #if 0 and unnecessary checks in lpfc_fcp.c.
+ * Simplify lpfc_issue_lip: Extra layer of protection removed.
+ * Grab lock before calling lpfc_sli_issue_mbox(phba, pmb,
+ MBX_NOWAIT) in lpfc_sli_issue_mbox_wait().
+
+Changes from 20040920 to 20041018
+
+ * Changed version number to 8.0.13
+ * Hide some attributes using #ifndef DFC_DEBUG ... #endif.
+ * Modify Makefile to (1) make BUILD_NO_DEBUG=1 will hide some
+ (binary) attributes (2) make BUILD_FC_TRANS=0 will build driver
+ for 2.6.5 kernel with block/unblock patch.
+ * Modified #ifdef names.
+ * Added support for proposed FC transport host attributes (which
+ replaces some of the attributes we had local to the driver).
+ Removed the binary statistics sysfs attribute.
+ * Added extra ELS verbose logging for ELS responses.
+ * Added recognition for BUILD_FC_TRANS=2 to Makefile to define
+ FC_TRANS_VER2.
+ * Add a pointer for link stats allocation.
+ * Exported lpfc_get_hba_sym_node_name for use by FC_TRANS_VER2
+ sysfs routines.
+ * Fix discovery problem in lip testing: if device sends an ELS cmd
+ (i.e. LOGO) before our FLOGI completes it should be LS_RJT'ed.
+ * Moved #defines around to provide target_add/remove for upstream
+ kernel deliverables only not SLES9. Provided ifdefs to #include
+ target_block/unblock only if FC_TRANS_VER1.
+ * Add sanity check in lpfc_nlp_list move setting nlp_Target
+ outside #ifdef.
+ * Added a blocked member to the lpfc_target structure for
+ block/unblock. This member allows the driver to know when to
+ unblock for pci_remove_one or pci_add_one. #ifdef'd some more
+ block/unblock stuff and removed some defensive checks from
+ target_block/unblock.
+ * Moved + 5 second window to dev_loss_tmo setting and updated
+ comments.
+ * Removed NULL target check from target_block/unblock and fixed up
+ a few comments.
+ * Enable sysfs attributes on 2.6.5 kernels and remove extra
+ compatibility code.
+ * Remove any and all trailing whitespace.
+ * Added message 0718 and return error when dma_map_single fails.
+ * Changed the fcpCntl2 commands to include an FCP_ prefix to get
+ rid of build warnings on later 2.6.9-rc kernels. Build
+ conflicts with scsi/scsi.h. Remove inclusions of scsi/scsi.h
+ from hbadisc.c, sli.c, and fcp.c since these modules had no
+ dependencies on scsi.h.
+ * Fixed a bug with RSCN handling. A RSCN received on one device,
+ shouldn't affect other devices not referenced by the RSCN.
+ * Moved #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,6) to include
+ lpfc_jedec_to_ascii to prevent warning in SLES 9.
+ * Update Makefile to account for SLES 9 and scsi-target upstream
+ kernel.
+ * This checkin provides block/unblock hooks for the upstream scsi
+ target kernel and 2.6.5 on SLES9 SP1 with the block/unblock
+ patch.
+ * Discovery changes regarding setting targetp->pnode and
+ ndlp->nlp_Target Ensure fc_target_* routines are called properly
+ from discovery. Remove list_del's from lpfc_cleanup(). Ensure
+ all the lpfc_consistent_bind_* routines don't set any driver
+ structure objects.
+ * Fix for timeout of READ_LA or READ_SPARAM mailbox command
+ causing panic.
+ * Cleanup list_del()'s for Discovery ndlp lists.
+ * Bug fixes for some insmod/rmmod crashes, link down crashes and
+ device loss crashes.
+ * Removed NLP_SEARCH_DEQUE.
+ * Call lpfc_target_unblock only if the targetp is nonNull and with
+ the host_lock held.
+ * Added qcmdcnt back along with misc bug fixes to discovery.
+ * Changed tgt_io to outfcpio lpfc_fcp.c.
+ * Fixed errors caused by LIP and cable pulls both with and without
+ block/unblock patch.
+ * For now we have to call fc_target_unblock and fc_target_block
+ with interrupts enabled.
+ * Save seg_cnt from dma_map_sg. Save scatter-gather start address
+ and pass back to dma_unmap_sg in error with seg_cnt.
+ * Incorporating block/unblock calls into driver with ifdefs. This
+ change is supported by scsi-target-2.6 kernel and forward only.
+ * Merged in some discovery bug fixes and added tgt io counters.
+ * Added sysfs attributes/interfaces: read only attribute
+ "management_version" and write only attribute "issue_lip".
+ * Fix build on big endian machines: while #if was OK with
+ __BIG_ENDIAN which defined as 4321, __BIG_ENDIAN_BITFIELD has to
+ be tested with #ifdef because it does not have any value, it is
+ either defined or not.
+ * Add fabric_name and port_type attributes.
+ * Change mdelay to msleep. mdelay works, but wastefully uses cpu
+ resources without a lock held. Revert to msleep. Tested with
+ sg_reset for bus and three attached targets.
+ * Added the customary #ifndef...#define...#endif to
+ lpfc_version.h.
+ * Integrate patches from Christoph Hellwig: two new helpers common
+ to lpfc_sli_resume_iocb and lpfc_sli_issue_iocb - singificant
+ cleanup of those two functions - the unused SLI_IOCB_USE_TXQ is
+ gone - lpfc_sli_issue_iocb_wait loses its flags argument
+ totally.
+ * Fix in lpfc_sli.c: we can not store a 5 bit value in a 4-bit
+ field.
+ * Moved some routines out of lpfc_fcp.c into more appropriate
+ files.
+ * Whitespace cleanup: remove all trailing whitespace.
+ * Make lpfc_disc_ndlp_show static to lpfc_fcp.c.
+ * Remove leftover printk and replace some with
+ printk(KERN_WARNING)
+ * Trivial: fix a few long lines and a soft tab.
+ * Remove warnings generated by Sparse against driver (make
+ C=1). Mostly these are "using integer as pointer warnings"
+ i.e. use NULL instead of 0.
+ * Integrated patch from Christoph Hellwig: Quite a lot of changes
+ here, the most notable is that the phba->slim2p lpfc_dmabuf goes
+ away in favour of a typede pointer and a dma_addr_t. Due to the
+ typed pointer lots of the cast mess can go away, and while at it
+ I also replaced the messy SLI2_SLIM_t with a simple struct
+ lpfc2_sli2_slim that only contains the part of the union we care
+ about while using SLI2_SLIM_SIZE for all size calculations
+ directly.
+ * Integrated patch from Christoph Hellwig: This streamlines the
+ I/O completion path a little more, especially taking care of
+ fast-pathing the non-error case. Also removes tons of dead
+ members and defines from lpfc_scsi.h - e.g. lpfc_target is down
+ to nothing more than the lpfc_nodelist pointer.
+ * Added binary sysfs file to issue mbox commands
+ * Replaced #if __BIG_ENDIAN with #if __BIG_ENDIAN_BITFIELD for
+ compatibility with the user space applications.
+ * Decrease the amount of data in proc_info.
+ * Condense nodelist flag members.
+ * Expand INFO for discovery sysfs shost entries.
+ * Notify user if information exceeds 4k sysfs limit.
+ * Removed a bunch of unused #defines.
+ * Added initial sysfs discovery shost attributes.
+ * Remove unused #defines lpfc_disc.h.
+ * Fixed failMask nodelist settings.
+ * Cleanup some old comments / unused variables.
+ * Add LP101 to list of recognized adapters.
+
+Changes from 20040908 to 20040920
+
+ * Changed version number to 8.0.12
+ * Removed used #defines: DEFAULT_PCI_LATENCY_CLOCKS and
+ PCI_LATENCY_VALUE from lpfc_hw.h.
+ * Changes to accommodate rnid.
+ * Fix RSCN handling so RSCN NS queries only effect NPorts found in
+ RSCN data.
+ * If we rcv a plogi on a NPort queued up for discovery, clear the
+ NLP_NPR_2B_DISC bit since rcv plogi logic will force NPort thru
+ discovery.
+ * Ensure lpfc_target is also cleaned up in lpfc_cleanup().
+ * Preliminary changes for block/unblock kernel API extensions in
+ progress with linux-scsi list. These are name changes and
+ prototype changes only.
+ * Added send_abts flag to lpfc_els_abort. For rcv LOGO when ADISC
+ sent, the XRI of the LOGO rcv'ed is the same as the ADISC
+ sent. Thus we cannot ABTS the ADISC before sending the LOGO ACC.
+ * Weed out some unused fc_flags. Add FC_DISC_TMO.
+ * board_online sysfs attribute added to support libdfc functions
+ InitDiagEnv and SetBrdEnv.
+ * Streamline code in lpfc_els_retry fixup abort case in
+ lpfc_els_timeout_handler().
+ * Flush discovery/ELS events when we bring SLI layer down.
+ * ctlreg and slimem binary attributes added to support libdfc
+ read/write mem/ctl functions.
+ * Integrated Christoph Hellwig's patch: Cleanup
+ lpfc_sli_ringpostbuf_get.
+ * Modified lpfc_slave_alloc and lpfc_slave_destroy to allocate and
+ free a dummy target pointer. This allows queuecommand to skip
+ the NULL target pointer check and avoid the console spam when
+ slave_alloc fails.
+ * Fix cfg_scan_down logic, it was reversed.
+ * Init list head ctrspbuflist.
+ * Change name of lpfc_driver_abort to lpfc_els_abort since it is
+ only valid for ELS ring.
+ * Remove unused third argument for lpfc_consistent_bind_get().
+ * Fix up iotag fields in lpfc_prep_els_iocb().
+ * Remove log message on code path triggered by lpfc_els_abort().
+ * Set host->unique_id in lpfc_fcp.c.
+ * Removed deadwood: lpfc_target.pHba not necessary anymore.
+ * Integrated patch from Christoph Hellwig: remove dead
+ SLI_IOCB_POLL handling.
+ * Integrated patch from Christoph Hellwig: Streamline I/O
+ submission and completion path a little.
+ * Remove unnecessary lpfc_brd_no. Ensure brd_no assignment is
+ unique.
+ * Removed unused MAX_FCP_LUN.
+ * Use mod_timer instead of add_timer for fdmi in lpfc_ct.c.
+ * Fixed misc discovery problems.
+ * Move stopping timers till just before lpfc_mem_free() call.
+ * Fix up NameServer reglogin error path.
+ * Cleanup possible outstanding discovery timers on rmmod.
+ * Fix discovery NPort to NPort pt2pt problem.
+ * Get rid of ip_tmofunc / scsi_tmofunc.
+ * Integrated patch from Christoph Hellwig:
+ lpfc_disc_done/lpfc_do_dpc cleanup - lpfc_disc_done can return
+ void - move lpfc_do_dpc and lpfc_disc_done to lpfc_hbadisc.c -
+ remove checking of list emptiness before calling lpfc_disc_done,
+ it handles the empty list case just fine and the additional
+ instructions cost less then the bustlocked spinlock operations.
+ * Integrated patch from Christoph Hellwig: This adds a new 64bit
+ counter instead, brd_no isn't reused anymore. Also some tiny
+ whitespace cleanups in surrounding code.
+ * Reorder functions in lpfc_els.c to remove need for prototypes.
+ * Removed unused prototypes from lpfc_crtn.h -
+ lpfc_ip_timeout_handler, lpfc_read_pci and lpfc_revoke.
+ * Removed some unused prototypes from lpfc_crtn.h -
+ lpfc_scsi_hba_reset, lpfc_scsi_issue_inqsn,
+ lpfc_scsi_issue_inqp0, lpfc_scsi_timeout_handler.
+ * Integrated patch from Christoph Hellwig: remove TRUE/FALSE
+ usage.
+ * Integrated patch from Christoph Hellwig: Remove unused function
+ prototypes lpfc_set_pkt_len and lpfc_get_pkt_data from
+ lpfc_crtn.h - fixes build warnings.
+ * Removed unused struct lpfc_dmabufip definition from lpfc_mem.h.
+ * Removed pre-2.6.5 MODULE_VERSION macro from lpfc_compat.h.
+ * Fixing missing static and removing dead code.
+ * Adding nodewwn, portwwn and portfcid shost attributes.
+ * Initial support for CT via sysfs. request payloads of size less
+ than PAGE_SIZE and rsp payloads of size PAGE_SIZE are supported.
+ Driver maintains a list of rsp's and passes back rsp's
+ corresponding to the pid of the calling process.
+ * Support for RefreshInformation, GetAdapterAttributes,
+ GetPortStatistics.
+ * Make nodev-tmo default to 20 seconds.
+ * Fix up some DSM error cases, unreg_login rpi where needed.
+ * Fix up comments for fc_target_block / fc_target_unblock.
+ * Fix up code for scsi_block_requests / scsi_unblock_requests.
+ * Add NLP_FCP_TARGET for nodeinfo support.
+ * Move suspend/resume in lpfc_nlp_list under appropriate case -
+ Used host_lock for DPC to avoid race (remove dpc_lock)
+ * Fix some corner cases for PLOGI receive - simplify error case
+ for cmpl_reglogin_reglogin_issue.
+ * Bug fix for ppc64 EEH MMIO panic - always do readl after
+ writel's of HBA registers to force flush.
+ * Get rid of initial static routine declarations in lpfc_hbadisc.c
+ and lpfc_els.c.
+ * Updates to discovery processing.
+
+Changes from 20040823 to 20040908
+
+ * Changed version number to 8.0.11
+ * Removed persistent binding code.
+ * Display both ASC and ASCQ info.
+ * Fixed link down->up transitions when linkdown tmo expires. Fix
+ was in the defensive error checking at the start of
+ queuecommand.
+ * Removed lpfc_scsi_timeout_handler as this timer is no longer
+ required. The midlayer will exhaust retries and then call
+ lpfc_abort_handler, lpfc_reset_lun_handler, and
+ lpfc_reset_target_handler.
+ * Minimal support for SCSI flat space addressing/volume set
+ addressing. Use 16 bits of LUN address so that flat
+ addressing/VSA will work.
+ * Changed 2 occurrences of if( 1 != f(x)) to if(f(x) != 1)
+ * Drop include of lpfc_cfgparm.h.
+ * Reduce stack usage of lpfc_fdmi_cmd in lpfc_ct.c.
+ * Add minimum range checking property to /sys write/store
+ functions.
+ * Fix display of node_name and port_name via fc transport
+ attr.
+ * Removed biosparam code.
+ * Removed range checking. phba->config[] array elements are now
+ embedded into the hba struct. lpfc_config_setup() has been
+ removed.
+ * Collapsed lpfc_scsi_cmd_start into lpfc_queuecommand and cleaned
+ up combined routines.
+ * Removed unused prototypes myprint and
+ lpfc_sched_service_high_priority_queue.
+ * Removed unused function lpfc_nodev.
+ * Removed scsi_cmnd->timeout_per_command cancelation. SCSI midlayer
+ now times out all commands - FW is instructed to not timeout.
+ * Removed polling code from lpfc_scsi_cmd_start. Reorganized
+ queuecommand and cmd_start some.
+
+Changes from 20040810 to 20040823
+
+ * Changed version number to 8.0.10
+ * Additional timer changes as per Arjan / Christoph's comments.
+ * Used mod_timer() instead of del_timer_sync() where appropriate.
+ * Fixed a use after free case (panic on 2.6.8.1 with
+ CONFIG_DEBUG_SLAB set).
+ * Fix compile warning in lpfc_fcp.c.
+ * Minor fix for log message, that prints unassigned brdno which is
+ zero.
+ * Move scsi_host_alloc() to the beginning of probe_one(). This
+ ensures that host_lock is available at later stages and also
+ avoids tons of unnecessary initializing if host_alloc()
+ fails.
+ * Removed else clause from lpfc_slave_configure that set
+ sdev->queue_depth. The driver informs the midlayer of its
+ setting in the template and only overrides if queue tagging is
+ enabled.
+ * Added PCI_DEVICE_ID_ZEPHYR and PCI_DEVICE_ID_ZFLY (Junior
+ Zephyr) support
+
+Changes from 20040730 to 20040810
+
+ * Changed version number to 8.0.9
+ * Removed per HBA driver lock. Driver now uses the host->host_lock
+ * Restored support for the 2.6.5 kernel for those linux distributions
+ shipped with the 2.6.5 kernel.
+ * Applied patch from Christoph Hellwig (hch@infradead.org) as follows
+ "[PATCH] use scsi host private data in ->proc_info.
+ * Applied patch from Christoph Hellwig (hch@infradead.org) as follows
+ "Re: [Emulex] Ready for next round. This patch cleans up the memory
+ allocation routines a little and fixes a missing mempool_destroy and
+ some missing error handling."
+ * Changed pointers assignments from 0 to NULL.
+ * Added fixes to the lpfc_reset_lun_handler and lpfc_reset_bus_handler
+ entry points that caused kernel to Oops or hang.
+ * Added fixes to targetless hosts that caused modprobe and insmod to hang.
+ * Ongoing cleanup to many files
+
+Changes from 20040723 to 20040730
+
+ * Changed version number to 8.0.8
+ * Removed unused LPFN_DRIVER_VERSION #define.
+ * Folded lpfc_findnode_scsiid into lpfc_find_target, its only
+ caller.
+ * Removed 2 unneeded arguments to lpfc_find_target (lun and
+ create_flag).
+ * Make lpfc_sli_reset_on_init = 1
+ * Minor cleanup to quieten sparse.
+ * Removed missing function = 0 in tmo routine in lpfc_els.c.
+ * Moved additional binding parameters into lpfc_defaults.c:
+ lpfc_automap / lpfc_fcp_bind_method
+ * Use msecs_to_jiffies() where applicable.
+ * Only use queue depth attribute only after SLI HBA setup was
+ completed.
+ * Put in memory barriers for PPC
+ * Added PCI_DEVICE_ID_HELIOS and PCI_DEVICE_ID_JFLY (Junior
+ Helios) support
+ * Added 4&10 gigabit choices in user option link_speed
+ * Updated timer logic: Set timer data after init_timer use
+ timer_pending() instead of expires.
+ * Removed some remnants of IP over FC support from Kconfig and
+ Makefile.
+ * Remove redundant prototypes for lpfc_handle_eratt,
+ lpfc_handle_latt and lpfc_read_pci.
+ * Ongoing cleanup of lpfc_init.c.
+ * Changed LPFC_CFG_DFT_HBA_Q_DEPTH -> LPFC_CFG_HBA_Q_DEPTH.
+ * Another cleanup stab at lpfc_ct.c. Remove castings, structure
+ code sanely, remove redundant code, reorganize code so that
+ functions are invoked after definition.
+
+Changes from 20040716 to 20040723
+
+ * Changed version number to 8.0.7
+ * Cleanup of lpfc_ct.c. Removed number of casts, removed tons of
+ dead/redundant code, cleaned up badly and poorly written code,
+ cleaned up return values.
+ * Fixed Persistent binding implementation
+ * Removed all references to lpfc_scsi_req_tmo
+ * Removed last references to lun_skip config parameter.
+ * Removed LPFC_DEV_RPTLUN node failure bit because we don't issue
+ REPORT_LUNS from the driver anymore.
+ * Removed LUN-tracking in driver. Removed lpfc_lun struct and
+ moved any functionality we still need to lpfc_target.
+ * Added new lpfc_jedec_to_ascii() call and replace two instances
+ of duplicate code with calls to this function.
+ * Removed Volume Set Addressing handling on LUN IDs.
+ * Applied patch from Christoph Hellwig (hch@infradead.org) that
+ removes dead code belonging to lpfc_build_scsi_cmnd() and its
+ call path. This is related to the recently removed report_lun
+ code.
+
+Changes from 20040709 to 20040716
+
+ * Changed version number to 8.0.6
+ * Removed internal report LUNs usage. Removed functions:
+ lpfc_disc_issue_rptlun, lpfc_disc_cmpl_rptlun,
+ lpfc_disc_retry_rptlun and their use.
+ * Removed usused scheduler prototypes in lpfc_crtn.h
+ * Replace lpfc_geportname() with generic memcmp().
+ * Rearrange code in lpfc_rcv_plogi_plogi_issue() to make it a
+ little more readable.
+ * Remove redundant port_cmp != 2 check in if
+ (!port_cmp) { .... if (port_cmp != 2).... }
+ * Clock changes: removed struct clk_data and timerList.
+ * Clock changes: separate nodev_tmo and els_retry_delay into 2
+ separate timers and convert to 1 argument changed
+ LPFC_NODE_FARP_PEND_t to struct lpfc_node_farp_pend convert
+ ipfarp_tmo to 1 argument convert target struct tmofunc and
+ rtplunfunc to 1 argument * cr_count, cr_delay and
+ discovery_threads are only needed to be module_params and not
+ visible via sysfs.
+
+Changes from 20040614 to 20040709
+
+ * Changed version number to 8.0.5
+ * Make lpfc_info static.
+ * Make lpfc_get_scsi_buf static.
+ * Print a warning if pci_set_mwi returns an error.
+ * Changed SERV_PARM to struct serv_parm.
+ * Changed LS_RJT to struct ls_rjt.
+ * Changed CSP to struct csp.
+ * Changed CLASS_PARMS to struct class_parms.
+ * Some cosmetic coding style cleanups to lpfc_fcp.c.
+ * Providing a sysfs interface that dumps the last 32
+ LINK_[UP|DOWN] and RSCN events.
+ * Get rid of delay_iodone timer.
+ * Remove qfull timers and qfull logic.
+ * Convert mbox_tmo, nlp_xri_tmo to 1 argment clock handler
+ * Removed duplicate extern defs of the bind variables.
+ * Streamline usage of the defines CLASS2 and CLASS3, removing
+ un-necessary checks on config[LPFC_CFG_FCP_CLASS].
+ * Moving the persistent binding variables to new file
+ lpfc_defaults.c
+ * Changed LPFC_SCSI_BUF_t to struct lpfc_scsi_buf.
+ * Moved config specific code from probe_one() into
+ config_setup(). Removing a redundant check on scandown value
+ from bind_setup() as this is already done in config_setup().
+ * Changed LPFC_SLI_t to struct lpfc_sli.
+ * Changed FCP_CMND to struct fcp_cmnd.
+ * Changed FCP_RSP to struct fcp_rsp.
+ * Remove the need for buf_tmo.
+ * Changed ULP_BDE64 to struct ulp_bde64.
+ * Changed ULP_BDE to struct ulp_bde.
+ * Cleanup lpfc_os_return_scsi_cmd() and its call path.
+ * Removed lpfc_no_device_delay.
+ * Consolidating lpfc_hba_put_event() into lpfc_put_event().
+ * Removed following attributes and their functionality:
+ lpfc_extra_io_tmo, lpfc_nodev_holdio, lpfc_delay_rsp_err,
+ lpfc_tgt_queue_depth and lpfc_check_cond_err.
+ * Clock changes consolidating timers, just in the struct lpfc_hba,
+ to get rid of clkData and pass only one argument to timeout
+ routine. Also, removing need for outstanding clock linked list
+ to stop these timers at rmmod.
+ * Move lpfc.conf contents into lpfc_fcp.c. Removing per adapter
+ attributes in favor of global attributes.
+ * Fix a potential null pointer reference of pmbuf in lpfc_ct.c.
+ * On reset_lun, issue LUN_RESET as opposed to ABORT_TASK_SET.
+ * Removed SCSI_REQ_TMO related code.
+ * Introducing two new defines LPFC_ATTR_R and LPFC_ATTR_RW that do
+ a module_param, MODULE_PARM_DESC, lpfc_param_show,
+ [lpfc_param_store] and CLASS_DEVICE_ATTRIBUTE.
+ * Properly clean up when allocation of a linked BDE fails in the
+ SCSI queuecommand path.
+ * Fail SCSI command if dma_map_sg call fails.
+ * Remove unused macros SWAP_ALWAYS and SWAP_ALWAYS16.
+ * Reset context2 to 0 on exit in
+ lpfc_sli_issue_iocb_wait_high_priority() and
+ lpfc_sli_issue_iocb_wait().
+ * Arranging lpfc_scsiport.c to follow style of use after
+ definition. This removes the need for the cruft of forward
+ declarations. Also removing a redundant #define ScsiResult as it
+ already available elsewhere.
+ * Applying "Streamline lpfc error handling" patch from Christoph
+ Hellwig (hch@infradead.org) with following modifications: fix
+ mem leaks, remove some misplaced code that need not be there,
+ print a message on exit (old code prints two (entry/exit)), make
+ ret values consistent (either 1/0 or SUCCESS/FAILURE), keep all
+ eh routines in a single file (lpfc_scsiport.c).
+ * Move contents of lpfc_module_param.h into lpfc_fcp.c.
+ * Changed sysfs attributes to CLASS_DEVICE_ATTRIBUTES (previously
+ DEVICE_ATTRIBUTES). They now appear in
+ /sys/class/scsi_host/hostx (previously in
+ /sys/bus/pci/drivers/lpfc/devx).
+ * Removed lpfc_syfs.h and lpfc_sysfs.c.
+ * Cleanup of config params. Throttle params have been removed.
+ max_lun has been removed. max_target is replaced with a #define,
+ lun_skip is removed. Remove ipfc config params and related
+ code.
+ * Changed DMABUF_t usage to struct lpfc_dmabuf.
+ * Downsizing iCfgParam structure to include a_string, a_low, a_hi
+ and a_default values only.
+ * Free SCSI buf safety memory pool on shutdown to eliminate memory
+ leak.
+ * Change lpfc_printf_log to a #define. Also include phba->brd_no
+ and newline in the print string rather than in the #define.
+ * Remove code that optionally locates Host Group Pointers in host
+ memory SLIM since this is no longer needed for PPC64, once
+ CONFIG_PORT uses HBA's view of its BAR0.
+ * Removed the forward declarations of the sli functions and
+ rearranging the code in lpfc_sli.c.
+ * Removed the preamble functionality from logging.
+ * Make lpfc_sli_hba_setup() return negative error codes on error
+ and correct the comment left over in lpfc_fcp.c
+ * Removed the lpfc_loadtime variable.
+ * Put a space between all ifs and their open parens '('.
+ * Change Studly_Caps LPFC_SCSI_BUF_t to struct lpfc_scsi_buf.
+ * Fixed insmod hang after hardware error.
+ * Relocated scsi_host alloc to before we enable the interrupt
+ handler
+ * Add .tmp_versions directory to Makefile clean target. This
+ directory is created in the 2.6.5+ build process (with Red Hat
+ kernels at least).
+ * Changing phba->config to kmalloc lpfc_icfgparam and not
+ *phba->config. This is manifesting itself as a panic in
+ pci_release_region().
+ * Fix for firmware download / board reset problem.
+ * Integrated patch from Christoph Hellwig (hch@infradead.org) to
+ reorganize and cleanup lpfc_fcp.c
+ * Don't abort commands immediately when there is an RSCN event to
+ give driver time to rediscover targets before the midlayer
+ retries the SCSI commands.
+
+Changes from 20040604 to 20040614
+
+ * Changed version number to 8.0.4
+ * Removed lpfc_valid_lun function.
+ * Added scsi_buf safety pool to address scsi_buf failures in
+ queuecommand under low memory conditions. Allocations now come
+ from kmalloc initially, but if kmalloc fails, the allocation
+ comes from the safety pool.
+ * Modified lpfc_slave_alloc to only set the scsi_device->hostdata
+ pointer if the driver has discovered the target. This routine
+ always returns success now as well since no error ever occurs in
+ the alloc routine.
+ * Mask only info and warning messages. Print all error messages
+ irrespective of mask.
+ * Removing lpfc_log_chk_msg_disabled()
+ * Changed lpfc_printf_log to take struct lpfc_hba * directly
+ instead of a "board number".
+ * Convert dma_sync_single to pci_dma_sync_single_for_{device/cpu}.
+ * Implemented new style log messages. The message strings are now
+ embedded in the call to lpfc_printf_log.
+ * Decreased FLOGI discovery timeout to 20 seconds.
+ * On error in lpfc_pci_probe_one() return -1 and not 1.
+ * Allow for board numbers that are not sequential, paving the way
+ for hotplug support.
+ * scsi_add_host() can fail, so wrap it around in an if(). Also
+ initiate scsi_scan_host() after attaching the sysfs attributes.
+ * lpfc_release_version is used only in lpfc_ct.c, so move it there
+ and mark it as static.
+ * Removed lpfc_sleep_ms and replaced with mdelay or schedule calls
+ directly
+ * Removed all (struct list_head *) casts from clkData-related list
+ handling in list_add, list_del macros.
+ * Removed EXPORT_SYMBOLs.
+ * Removed LPFC_MIN_QFULL and lpfc_qthrottle_up.
+ * Replace LPFCSCSITARGET_t with struct lpfc_target.
+ * Replace LPFCSCSILUN_t with struct lpfc_lun.
+ * Remove unused struct declarations (fcPathId and fcRouteId) from
+ lpfc_scsi.h.
+ * Rewrite use of FC transport attributes.
+ * Fix crash when link is lost. This was due to lpfc_delay_iodone
+ calling list_del on an object that was never put on a list.
+ * Remove trailing spaces at the end of all lines.
+ * Set MAX_FCP_TARGET to 256 from 0xff. Set MAX_FCP_LUN and
+ MAX_FCP_CMDS to their decimal equivalents and updated
+ documentation.
+
+Changes from 20040526 to 20040604
+
+ * Changed version number to 8.0.3
+ * Completed sysfs FC transport support.
+ * Removed unused fields in SCSI LUN and SCSI Target structures:
+ void *pTargetProto; void *pTargetOSEnv; void *pLunOSEnv;
+ * Modified list_for_each to list_for_each_entry. Modified
+ list_for_each_safe to list_for_each_entry_safe.
+ * Remove lpfc_dfc.h file.
+ * Changed pHba->phba, pCommand->pcmd
+ * Changed plogi_ndlp -> plogindlp, pos_tmp->postmp, pRsp->prsp,
+ pCmd->pcmd
+ * Changed pText -> ptext
+ * Changed p_tmp_buff -> ptmpbuff
+ * Changed pBufList -> pbuflist, pRsp -> prsp, pCmd -> pcmd
+ * Changed *pos_tmp -> *postmp, *p_mbuf -> *pmbuf
+ * Following changes are made to the SCSI fast path: Added
+ DMA_BUF_t member to the lpfc_scsi_buf_t. This will reduce a
+ memory allocation in the scsi fast path. Added check for
+ targetp == NULL in the scsi fast path. Increased number of
+ scatter gather entries in lpfc_scsi_dma_ext to 4 from 3 and
+ changed the size of lpfc_scsi_dma_ext to 264
+ * Fixing some missing static lpfc_nportdisc.c.
+ * Reordered #include lines so that lpfc.h doesn't have to #include
+ other header files.
+ * Remove lpfc_get_hba_sym_node_name() as a global EXPORT and make
+ it static.
+ * Move struct clk_data definition from lpfc_hw.h to lpfc_sli.h.
+ * Changed LPFC_IOCBQ_t to struct lpfc_iocbq.
+ * Changed LPFC_SLI_RING_t to struct lpfc_sli_ring.
+ * Changed LPFC_NODELIST_t to struct lpfc_nodelist.
+ * Rearranged lpfc_nportdisc.c by moving state machine array
+ (lpfc_disc_action) and the one function that uses it,
+ lpfc_disc_state_machine, to the end of the file, removing the
+ need for the raft of prototypes at the top.
+ * Changed LPFC_BINDLIST_t to struct lpfc_bindlist.
+ * Removed lpfc_issue_ct_rsp(), lpfc_sleep(), lpfc_add_bind(),
+ lpfc_del_bind(), lpfc_sli_wake_mbox_wait() and
+ lpfc_sli_issue_mbox_wait().
+ * Fixed a large number of overly-long lines.
+ * Fixed some discovery problems: Introduced deferred ndlp removal
+ when in DSM to avoid panic when in nested DMSs Fix NportId
+ fffc01 handling to not relogin after LOGO fixed handling of LOGO
+ on PLOGI issue.
+ * Changed SLI_CT_REQUEST to lpfc_sli_ct_request.
+ * Changed NAME_TYPE to struct lpfc_name.
+ * Changed lpfcCfgParam_t to struct lpfc_cfgparam.
+ * Changed LPFC_STAT_t to struct lpfc_stats.
+ * Changed HBAEVT_t to struct lpfc_hba_event.
+ * Changed Studly_Caps lpfcHBA_t to struct lpfc_hba.
+ * Removed no longer used tasklet_running flag.
+ * Removing *PSOME_VAR typedefs and using SOME_VAR* directly.
+ * Changing .use_clustering to ENABLE_CLUSTERING.
+ * Modify lpfc_queuecommand to return SCSI_MLQUEUE_HOST_BUSY when
+ it can't queue a SCSI command. Also, remove cmnds_in_flight
+ member of struct lpfcHBA for 2.6 kernels as it was only needed
+ to determine what to return from queuecommand.
+ * Change return type of lpfc_evt_iocb_free to void as it doesn't
+ return anything.
+ * Remove unused cmnd_retry_list and in_retry members in struct
+ lpfcHBA.
+ * Remove some instances of unneeded casting of kmalloc's return in
+ lpfc_scsiport.c
+ * Remove lpfc_linux_attach() and lpfc_linux_detach(). Integrate
+ them into lpfc_probe_one() and lpfc_release_one() respectively.
+ * Remove lpfc_num_iocbs, lpfc_num_bufs module parameters
+ * Remove #defines for NUM_NODES, NUM_BUFS and NUM_IOCBS
+
+Changes from 20040515 to 20040526
+
+ * Changing version number to 8.0.2.
+ * Including dma-mapping.h as one of the include headers. Also
+ rearrange the #include order.
+ * Make functions static as appropriate.
+ * queuecommand() will now return SCSI_MLQUEUE_HOST_BUSY instead of
+ 1 to backpressure midlayer.
+ * Removed function prototypes for lpfc_start_timer() and
+ lpfc_stop_timer()
+ * Changed timer support to be inline. Clk_data is now declared
+ right next to the corresponding timer_list entry so we don't
+ have to allocate these clk_data dynamically.
+ * Add readls after writels to PCI space to flush the writes.
+ * Fix misspelled word "safety" in function names.
+ * Fix up comments in lpfc.conf for per HBA parameters to reflect
+ new implementation.
+ * Change lpfc_proc_info handler to get the Nodename from
+ fc_nodename and not fc_portname.
+ * Fix up some comments and whitespace in lpfc_fcp.c.
+ * Formatting changes: get rid of leading spaces in code
+ * Move discovery processing from tasklet to a kernel thread.
+ * Move ndlp node from unmap list to map list if ADISC completed
+ successfully.
+ * Flush all the ELS IOCBs when there is a link event.
+ * LP9802 qdepth is twice the LP9802DC qdepth. Delay
+ elx_sched_init after READ_CONFIG to get max_xri from the
+ firmware. Reset ELX_CFG_DFT_HBA_Q_DEPTH to max_xri after
+ READ_CONFIG
+ * Fix fc_get_cfg_parm() to be more robust and support embedded hex
+ values. The lpfc_param's are now defined as:
+ lpfc_log_verbose="lpfc:0,lpfc0:0x10,lpfc1:4,lpfc100:0xffff" The
+ "," delimter does not matter. It can be anything or not exist at
+ all. ie param = "lpfc:0lpfc0:0x10.lpfc1:4txtlpfc100:0xffff" will
+ also work. Additionally the string is treated as case
+ insensitive.
+ * Changed all usage of lpfc_find_lun_device() to lpfc_find_lun().
+ * Removed unnecessary wrappers lpfc_find_lun_device() and
+ lpfc_tran_find_lun().
+ * Switch from using internal bus/id/lun to similar data from
+ scsi_device structure.
+ * Eliminate one-line function lpfc_find_target()
+ * Added slave_alloc, slave_destory
+ * lpfc_scsi_cmd_start can now acquire lun pointer from
+ scsi_device->hostdata, which is setup in slave_alloc.
+ * Eliminate unnecessary checking on every cmd just to see if we
+ are accessing the device the first time.
+ * Remove assumption in lpfc_reset_lun_handler that a valid
+ lpfc_scsi_buf is hung off of linux's scsi_cmnd->host_scribble
+ when our reset is called.
+
+Changes from 20040507 to 20040515
+
+ * Changed version to 8.0.1
+ * Fixed crash on driver rmmod after error injection tests and
+ lpfc_tasklet deadlock.
+ * Modified lpfc.conf to remove limit on number of support hosts
+ * Removed HBAAPI
+ * Removed duplication of SCSI opcodes from lpfc_fcp.h that are
+ available in scsi/scsi.h
+ * Rework module_param usage
+ * Added MODULE_PARAM_DESC for various module_params
+ * Removed #define EXPORT_SYMTAB
+ * Removed #includes of if_arp.h and rtnetlink.h
+ * Removed string "Open Source" from MODULE_DESC
+ * Cleanup duplicated string definitions used by MODULE_DESC
+ * Renamed lpfc_pci_[detect|release] to lpfc_pci_[probe|remove]_one
+ * Fix formatting of lpfc_driver
+ * Remove unnecessary memset to 0 of lpfcDRVR
+ * Attach driver attributes always unless pci_module_init failed
+ * Remove all one-line wrappers from lpfc_mem.
+ * Remove lpfc_sysfs_set_[show|store] as it is no longer needed
+ * Redo lpfc_sysfs_params_[show|store] to one value per attribute rule
+ * Breakdown lpfc_sysfs_info_show into smaller one value per attribute
+ * Use device attributes instead of driver attributes where appropriate
+ * Remove no longer needed EXPORT_SYMBOLs
+ * Remove some unused code (1600 msg's related)
+
+Changes from 20040429 to 20040507
+
+ * Change version to 8.0.0
+ * Fix the number of cmd / rsp ring entries in lpfc_fcp.c to match
+ the divisions setup in lpfc_hw.h.
+ * Remove phba->iflag reference.
+ * Several locking improvements.
+ * Remove functions lpfc_drvr_init_lock, lpfc_drvr_lock,
+ lpfc_drvr_unlock and lpfc_hipri_*.
+ * Remove LPFC_DRVR_LOCK and LPFC_DRVR_UNLOCK macros.
+ * Make lpfc_info() use lpfc_get_hba_model_desc() instead of
+ rewriting almost identical code.
+ * Fix 1 overly long line in each of lpfc_cfgparm.h, lpfc_ftp.c and
+ lpfc_sli.c.
+ * Fix build for Red Hat 2.6.3 kernel by #defining MODULE_VERSION
+ only if it isn't already defined.
+ * Change elx_sli_issue_mbox_wait to return correct error code to
+ the caller.
+ * In some of the els completion routines, after calling
+ lpfc_elx_chk_latt, driver ignores the return code of the
+ lpfc_elx_chk_latt. This will prevent the discovery state machine
+ restarting correctly when there are link events in the middle of
+ discovery state machine running. Fix this by exiting discovery
+ state machine if lpfc_els_chk_latt returns a non zero value.
+ * Removed MAX_LPFC_BRDS from lpfc_diag.h
+ * Removed unused first_check.
+ * Remove some unused fields and defines.
+ * Change lpfc-param names to lpfc_param.
+ * Add use of MODULE_VERSION macro for 2.6 kernels.
+ * Shorten length of some of the comment lines to make them more
+ readable.
+ * Move FCP_* definitions to their own header file, lpfc_fcp.h.
+ * Remove unused prototypes from lpfc_crtn.h: fcptst, iptst,
+ lpfc_DELAYMS.
+ * Remove duplicated prototypes from lpfc_crtn.h:
+ lpfc_config_port_prep, lpfc_config_port_post,
+ lpfc_hba_down_prep.
+ * Removed some unused export_symbols.
+ * Install driver files into */drivers/scsi/lpfc instead of
+ */drivers/scsi.
+
+Changes from 20040426 to 20040429
+
+ * Declared export symbol lpfc_page_alloc and lpfc_page_free
+ * Changed lpfc version number to 6.98.3
+ * Move the definition of MAX_LPFC_BRDS to the only header file
+ that uses it (lpfc_diag.h).
+ * Change lpfc_sli_wake_iocb_wait to do a regular wake_up since
+ lpfc_sli_issue_iocb_wait now sleeps uninterruptible.
+ * Replace list_for_each() with list_for_each_safe() when a list
+ element could be deleted.
+ * Fix IOCB memory leak
+
+Changes from 20040416 to 20040426
+
+ * Change lpfc_config_port_prep() to interpret word 4 of the DUMP
+ mbox response as a byte-count
+ * Add info attribute to sysfs
+ * Minor formatting (spaces to tabs) cleanup in lpfc_sched.h
+ * Remove unused log message number 732
+ * Completing MODULE_PARM -> module_param changes
+ * Removed unused targetenable module parameter
+ * Removed locks from lpfc_sli_issue_mbox_wait routine
+ * Removed code that retry 29,00 check condition
+ * Removed code that manipulates rspSnsLen.
+ * Fix use of lun-q-depth config param
+ * Fix severity inconsistency with log message 249
+ * Removed lpfc_max_target from lpfc_linux_attach
+ * Replace references to lpfcDRVR.pHba[] with lpfc_get_phba_by_inst()
+ * Change lpfc_param to lpfc-param
+ * Partially removed 32 HBA restriction within driver. Incorported
+ lpfc_instcnt, lpfc_instance[], and pHba[] into lpfcDRVR
+ structure Added routines lpfc_get_phba_by_inst()
+ lpfc_get_inst_by_phba() lpfc_check_valid_phba()
+ * Turn on attributes "set" & "params" by default.
+ * Further formatting/whitespace/line length cleanup on: lpfc_ct.c
+ lpfc_els.c lpfc_fcp.c lpfc_hbadisc.c lpfc_init.c lpfc_ipport.c
+ lpfc_mbox.c lpfc_nportdisc.c lpfc_sched.c lpfc_sched.h
+ lpfc_scsi.h lpfc_scsiport.c lpfc_sli.c and lpfc_sli.h
+ * Add log message 249 to log any unsupported device addressing
+ modes encountered.
+ * Add support for 256 targets and 256 LUNs
+ * Fixed panic in lpfc_linkdown.
+ * Removed (struct list_head*) casting in several calls to list_del
+ * Free irq reservation and kill running timers when insmod or
+ modprobe are killed via ctrl-c
+ * Remove drivers/scsi from include path
+ * Wrap use of log message 311 in macro
+ * Detect failure return from pci_map_sg call in lpfc_os_prep_io
+ * Fix use-after-free of IOCB in lpfc_sli_process_sol_iocb which
+ was causing an Oops on 2.6.5 kernel.
+ * Cleanup use of several gotos not used for error exit.
+ * Replace memcpy_toio() and memcpy_toio() with endian-dependent
+ lpfc_memcpy_to_slim() and lpfc_memcpy_from_slim() so that for
+ big endian hosts like PPC64, the SLIM is accessed 4 bytes at a
+ time instead of as a byte-stream.
+
+Changes from 20040409 to 20040416
+
+ * The scsi_register and scsi_alloc_host OS calls can fail and
+ return a zero-valued host pointer. A ctrl-C on 2.6 kernels
+ during driver load will cause this and the driver to panic.
+ Fixed this bug. Also found a bug in the error_x handling with
+ lpfc_sli_hba_down - it was in the wrong place and the driver
+ lock was not held, but needed to be (in lpfc_linux_attach) Fixed
+ both. Did some minor comment clean up.
+ * Removed unwanted (void *) castings.
+ * Replace define of INVALID_PHYS, with kernel 2.6.5's
+ dma_mapping_error() and add a inline function for earlier
+ kernels. Remove lpfc_bad_scatterlist().
+ * Clean up formatting in hbaapi.h, lpfc.h, lpfc_cfgparm.h,
+ lpfc_crtn.h, lpfc_ct.c, lpfc_diag.h, lpfc_disc.h, lpfc_els.c,
+ lpfc_fcp.c, lpfc_hbadisc.c, lpfc_hw.h, lpfc_init.c,
+ lpfc_ipport.c, lpfc_logmsg.c, lpfc_logmsg.h and lpfc_scsiport.c
+ - mostly replacing groups of 8 spaces with hard tabs and keeping
+ lines to 80 column max..
+ * Removed LPFC_DRVR_LOCK call from lpfc_unblock_requests for 2.4
+ kernels. The lpfc_scsi_done routine already unlocks the driver
+ lock since it expects this lock to be held.
+ * Removed global lock capabilities from driver lock routines
+ * Remove SA_INTERRUPT flag from request_irq
+ * Move dma_addr_t cast inside of getPaddr macro as everywhere
+ getPaddr is used, the return is cast to dma_addr_t.
+ * Clean up formatting in lpfc_sli.c and lpfc_sysfs.c - mostly
+ replacing groups of 8 spaces with hard tabs and keeping lines
+ to 80 column max.
+ * Fix build for RHEL 2.1 BOOT kernels by always #including
+ interrupt.h in lpfc.h.
+ * Fix RHEL 3 build by #defining EXPORT_SYMTAB.
+ * Replace sprintf with snprintf in lpfc_proc_info.
+ * Fix build warnings on 2.6 kernels - remove no longer used calls
+ to character device initialization.
+ * Initial support code for discovery in tasklet conversion.
+ * Removing char interface and ioctl code.
+ * Change all elx prefixes to lpfc
+ * Replace lpfc_write_slim() & lpfc_read_slim() with memcpy_toio(),
+ memcpy_fromio(), writel() & readl().
+
+Changes from 20040402 to 20040409
+
+ * Replaced lpfc_read_hbaregs_plus_offset and
+ lpfc_write_hbaregs_plus_offset functions with readl and writel.
+ * Get rid of long mdelay's in insmod path
+ * Changed the way our pci_device_id structures are initialized
+ * Replace lpfc_read/write_CA/HA/HC/HS with calls to readl() &
+ writel() directly.
+ * Increase SLI2_SLIM to 16K Increase cmd / rsp IOCBs accordingly
+ * Removed lpfc_els_chk_latt from the lpfc_config_post function.
+ lpfc_els_chk_latt will enable the link event interrupts when
+ flogi is pending which causes two discovery state machines
+ running parallelly.
+ * Add pci_disable_device to unload path.
+ * Move lpfc_sleep_event from lpfc_fcp.c to lpfc_util_ioctl.c
+ * Call dma_map_single() & pci_map_single() directly instead of via
+ macro lpfc_pci_map(). Allow address 0 for PPC64.
+ * Change sleep to uninterruptible in lpfc_sli_issue_icob_wait
+ because this function doesn't handle signals.
+ * Move lpfc_wakeup_event from lpfc_fcp.c to lpfc_ioctl.c
+ * Remove unneeded #include <linux/netdevice.h>
+ * Remove unused clock variables lpfc_clkCnt and lpfc_sec_clk.
+ * Get rid of capitalization of function names.
+ * Removed lpfc_addr_sprintf.
+ * Implemented gotos in lpfc_linux_attach for error cases.
+ * Replace mlist->dma.list = dmp->dma.list; to mlist = dmp.
+ * Remove functions lpfc_get_OsNameVersion and elx_wakeup. Change
+ elx_wakeup to wake_up_interruptible
+ * Add function lpfc_get_os_nameversion and change
+ lpfc_get_OsNameVersion to lpfc_get_os_nameversion.
+ * Remove lpfc_get_OsNameVersion
+ * Change driver name to a consistent lpfc in every visible place.
+ * Fix build warning: removed unused variable ret in lpfc_fdmi_tmo.
+ * Remove lpfc_utsname_nodename_check function
+ * Remove functions lpfc_register_intr and lpfc_unregister_intr
+ * Fill in owner field in lpfc_ops file_operations struct and
+ remove now unnecessary open and close entry points.
+ * Change function name prefixes from elx_ to lpfc_
+ * Remove special case check for TUR in elx_os_prep_io()
+ * Renamed elx_scsi.h to lpfc_scsi.h
+ * Renamed elx_sched.h to lpfc_sched.h
+ * Renamed elx_mem.h to lpfc_mem.h
+ * Renamed elx_sli.h to lpfc_sli.h
+ * Renamed elx_logmsg.h to lpfc_logmsg.h
+ * Renamed elx.h to lpfc.h
+ * Renamed elx_sli.c to lpfc_sli.c
+ * Renamed elx_sched.c to lpfc_sched.c
+ * Renamed elx_mem.c to lpfc_mem.c
+ * Renamed elx_logmsg.c to lpfc_logmsg.c
+ * Renamed lpfcLINUXfcp.c lpfc_fcp.c
+ * Renamed elx_clock.c to lpfc_clock.c
+ * Reduce stack usage in lpfc_info().
+ * Move lpip_stats structure from lpfc_hba.h to lpfc_ip.h.
+ * Move lpfc_stats and HBAEVT_t structures from lpfc_hba.h to
+ lpfc.h
+ * Remove lpfc_hba.h
+ * Remove duplicate rc definitions from
+ * Removed code which used next pointer to store mbox structure.
+ * Cleaned up list iterations.
+ * Removed non list manipulation of the next pointers.
+ * Change list_del()/INIT_LIST_HEAD sequences to list_del_init()
+ * In ELX_IOCBQ_t: Moved hipri_trigger field to iocb_flag. Combined
+ hipri_wait_queue and rsp_iocb in union
+ * Replaced casting from list_head with list_entry macro.
+ * Added ct_ndlp_context field to the ELX_IOCBQ_t.
+ * Do not use DMABUf_t list to store ndlp context
+ * Return 0 from lpfc_process_iotcl_util() when ELX_INITBRDS
+ succeeds.
+ * remove elx_os_scsiport.h
+ * Do not use DMABUf_t list to hold rpi context
+ * Replace elx_cfg_* names with lpfc_cfg-*
+ * Moved FCP activity to ring 0. Moved ELS/CT activity to ring 2.
+ * Clean up formatting of elx_sli.h (tabs for indents, 80 column
+ lines).
+ * Remove unused elxclock declaration in elx_sli.h.
+ * Since everywhere IOCB_ENTRY is used, the return value is cast,
+ move the cast into the macro.
+ * Split ioctls out into separate files
+
+Changes from 20040326 to 20040402
+
+ * Updated ChangeLog for 20040402 SourceForge drop.
+ * Use safe list iterator for ndlp list
+ * Added code to return NLP_STE_FREED_NODE from the discovery
+ state machine functions if the node is freed from the
+ function.
+ * Fixes to DMABUF_t handling
+ * Fix for load error in discovery
+ * Remove loop_cnt variable from lpfc_rcv_plogi_unused_node.
+ * Remove nle. reference.
+ * Remove support for building 2.4 drivers
+ * Remove elx_util.h and replace elx_disc.h with lpfc_disc.h
+ * Implemented the Linux list macros in the discovery code.
+ Also moved elx_disc.h contents into lpfc_disc.h
+ * Unused variable cleanup
+ * Use Linux list macros for DMABUF_t
+ * Break up ioctls into 3 sections, dfc, util, hbaapi
+ rearranged code so this could be easily separated into a
+ differnet module later All 3 are currently turned on by
+ defines in lpfc_ioctl.c LPFC_DFC_IOCTL, LPFC_UTIL_IOCTL,
+ LPFC_HBAAPI_IOCTL
+ * Misc cleanup: some goto's; add comments; clarify function
+ args
+ * Added code to use list macro for ELXSCSITARGET_t.
+ * New list implementation for ELX_MBOXQ_t
+ * Cleaned up some list_head casting.
+ * Put IPFC ifdef around two members of struct lpfc_nodelist.
+ * Cleaned up iocb list using list macros and list_head data
+ structure.
+ * lpfc_online() was missing some timer routines that were
+ started by lpfc_linux_attach(). These routines are now also
+ started by lpfc_online(). lpfc_offline() only stopped
+ els_timeout routine. It now stops all timeout routines
+ associated with that hba.
+ * Replace separate next and prev pointers in struct
+ lpfc_bindlist with list_head type. In elxHBA_t, replace
+ fc_nlpbind_start and _end with fc_nlpbind_list and use
+ list_head macros to access it.
+ * Fix ulpStatus for aborting I/Os overlaps with newer firmware
+ ulpStatus values
+ * Rework params_show/store to be consistent as the other
+ routines. Remove generic'ness and rely on set attribute.
+ * Remove unused log message.
+ * Collapse elx_crtn.h and prod_crtn.h into lpfc_crtn.h
+ * Ifdef Scheduler specific routines
+ * Removed following ununsed ioclt's: ELX_READ_IOCB
+ ELX_READ_MEMSEG ELX_READ_BINFO ELX_READ_EINVAL ELX_READ_LHBA
+ ELX_READ_LXHBA ELX_SET ELX_DBG LPFC_TRACE
+ * Removed variable fc_dbg_flg
+ * Fixed a bug where HBA_Q_DEPTH was set incorrectly for
+ 3-digit HBAs. Also changed can_queue so midlayer will only
+ send (HBA_Q_DEPTH - 10) cmds.
+ * Clean up code in the error path, check condition. Remove
+ ununsed sense-related fields in lun structure.
+ * Added code for safety pools for following objects: mbuf/bpl,
+ mbox, iocb, ndlp, bind
+ * Wrapped '#include <elx_sched.h>' in '#ifdef USE_SCHEDULER'.
+ * Fixed 'make clean' target.
+ * Build now ignores elx_sched.o, and includes lpfc_sysfs.o.
+ * Wrapped lpfndd.o target in BUILD_IPFC ifdef.
+ * Removed elx_os.h inclusion in implementation files.
+ * Removed ELX_OS_IO_t data structure and put data direction
+ and non scatter/gather physical address into the scsi buffer
+ structure directly. Moved DRVR_LOCK, putPaddr, getPaddr
+ macros and some defines into elx.h since they are required
+ by the whole driver.
+ * Migrated following ioctls (debug) ELX_DISPLAY_PCI_ALL
+ ELX_DEVP ELX_READ_BPLIST ELX_RESET_QDEPTH ELX_STAT.
+ * Step 1 of attempt to move all Debug ioctls to sysfs.
+ Implemented the following IOCTLs in sysfs: ELX_WRITE_HC
+ ELX_WRITE_HS ELX_WRITE_HA ELX_WRITE_CA ELX_READ_HC
+ ELX_READ_HS ELX_READ_HA ELX_READ_CA ELX_READ_MB ELX_RESET
+ ELX_READ_HBA ELX_INSTANCE ELX_LIP. Also introduced
+ attribute "set" to be used in conjunction with the above
+ attributes.
+ * Removed DLINK, enque and deque declarations now that clock
+ doesn't use them anymore
+ * Separated install rule so that BUILD_IPFC has to be set when
+ make is called in order for the install rule to attempt to
+ copy the lpfndd.o driver. This change fixes a bug that
+ occurs because the install rule by default attempted to
+ install lpfndd.o, whereas the default make rule did not by
+ default build lpfndd.o.
+ * Keep track if hbaapi index numbers need to be refreshed.
+ * Removed prod_os.h from include list.
+ * Removed LPFC_LOCK and LPFC_UNLOCK macros. Added OS calls
+ into elx_os_scsiport.c. This file is now empty.
+ * Added spin_lock_irqsave and spin_unlock_irqrestore calls
+ into code directly and removed LPFC_LOCK_ and _UNLOCK_
+ macros
+ * Remove references to "elx_clock.h"
+ * Added utsname.h to include list. The previous checkin to
+ elx_os.h removed its inclusion of utsname.h since there is
+ precious little in the file. However, lpfcLINUXfcp.c needs
+ it and now has it.
+ * Removed some commented-out code
+ * Removed elx_lck_t data structure, stray elxDRVR_t type, and
+ include from file. No longer used.
+ * Removed two PCI Sync defines. Removed includes - not
+ needed. Cleaned up macro lines.
+ * Added two functions from elxLINUXfcp.c. These functions
+ were IPFC specific.
+ * Removed hipri lock abstractions and added OS call into code.
+ Removed elx_lck_t and added spinlock_t directly. Moved two
+ IPFC functions into lpfc_ipport.c
+ * Moved IP specific structures to lpfc_ip.h file.
+ * lpfc_ipfarp_timeout() uses system timer. Remove all usages
+ of old internal clock support.
+ * Made changes to compile without IPFC support for the default
+ build. Added ifdef IPFC for all lpfc_ip.h includes.
+ * Patched elx_free_scsi_buf
+ * Removed elx_sched.o from 2.6 dependencies
+ * Reworked lpfc_pcimap.
+ * Use Linux swap macros to replace ELX swapping macros
+ (SWAP_SHORT, SWAP_LONG, SWAP_DATA, SWAP_DATA16,
+ PCIMEM_SHORT, PCIMEM_LONG, PCIMEM_DATA).
+ * move in_interrupt() check inside of elx_sleep_ms()
+ * Moved location of pci.h include.
+ * Restored elx_lck_t types in elxHBA_t.
+ * Removed elx_pci_dma_sync call. Also removed some PCI
+ defines from elx_hw.h and removed the spinlock_t locks that
+ are no longer used in elx.h
+ * elx_iodone() now uses system timer.
+ * elx_qfull_retry() now uses system timer.
+ * lpfc_put_buf(), lpfc_ip_xri_timeout() and
+ lpfc_ip_timeout_handler() now use system timer.
+ * lpfc_fdmi_tmo() and lpfc_qthrottle_up() now use system
+ timer.
+ * Removed num_bufs and num_iocbs configuration parameters.
+ * Fixed a memory corruption bug. This was caused by a memory
+ write to ndlp structure from lpfc_cmpl_els_acc function.
+ This ndlp structure was freed from lpfc_els_unsol_event.
+ * lpfc_disc_timeout() and lpfc_establish_link_tmo() now use
+ system timer. Also update lpfc_els_retry_delay() to do a
+ single lock release at the end.
+ * Remove use of PAN (pseudo adapter number).
+ * Reintroduced usage of the cross compiler for building on
+ ppc64 to remove build errors that were cropping up when
+ using the standard gcc compiler.
+ * Fix no-unlock-before return in lpfc_els_retry_delay which was
+ causing a deadlock on insmod in some environments.
+ * Minor format changes fix up comments
+ * Create utility clock function elx_start_timer() and
+ elx_stop_timer(). All timeout routines now use these common
+ routines.
+ * Minor formating changes fix up comments
+ * Minor formatting changes get rid of failover defines for
+ syntax checking
+ * Minor formatting changes remove ISCSI defines.
+ * Fix typo in install target for 2.4 kernels.
+ * Removed unused elx_scsi_add_timer extern function
+ declaration.
+ * Cleanup casting around DMA masks.
+ * Comment out lpfndd.o modules_install section as lpfndd.o is
+ not generated if CONFIG_NET_LPFC is not set. Also refer to
+ BASEINCLUDE only in out of kernel source module builds as it
+ will not exist otherwise.
+ * Removed unused malloc counters from lpfcLINUXfcp.c.
+ * Remove some unnecessary #includes in lpfcLINUXfcp.c
+ * Remove unncessary #includes in elxLINUXfcp.c
+ * Minor formatting cleanups in Makefile to avoid some
+ linewrapping.
+ * Removed unused elx_mem_pool data structure.
+ * Remove several unnecessary #includes.
+ * Moving fix for memory leak in ioctl lip area to sysfs's lip.
+ * Removed unused elx_dma_handle_t elx_acc_handle_t
+ FC_MAX_SEGSZ and FC_MAX_POOL.
+ * Rewrite of Makefile. Fixes breakages with make -j4 during
+ kernel compile. Does not recompile all files on every
+ build. Uses the kernel build's definitions of CFLAGS,
+ MODFLAGS etc. Removed "make rpm" option.
+ * Removed unused #defines CLOSED, DEAD, OPENED, NORMAL_OPEN
+ and unneeded #include of elx_sched.h in elx.h.
+ * Several log message updates
+ * Add PCI_DEVICE_ID_FIREFLY for LP6000
+ * Fixed known issues in 20040326: driver crashes on rmmod in
+ both 2.4 and 2.6 kernels
+
+
+Changes from 20040319 to 20040326
+
+ * Updated ChangeLog for 20040326 SourceForge drop.
+ * remove lpfc_isr / lpfc_tmr logic fixed up 8 spaces from
+ previous checkins with tabs
+ * replace elx_in_intr() with in_interrupt()
+ * Remove unused messages 1602 and 1603.
+ * Fix the following issues with log messages: Remove unused
+ messages 406, 407, 409, 927, 928, 1201, 1202, 1204, 1205, 1206
+ and 1207. Create a new message 738 to fix duplicate instances
+ of 736.
+ * Removed remaining pci interface abstractions from elxLINUXfcp.c.
+ Implemented OS calls directly in all remaining files and cleaned
+ up modules. Removed prototypes as well.
+ * Removed following functions/structures elx_mem_dmapool
+ elx_idx_dmapool elx_size_dmapool elx_kmem_lock dfc_data_alloc
+ dfc_data_free dfc_mem struct mbuf_info elx_acc_handle_t
+ data_handle elx_dma_handle_t dma_handle struct elx_memseg
+ MEMSEG_t
+ * lpfc_els_timeout_handler() now uses system timer.
+ * Further cleanup of #ifdef powerpc
+ * lpfc_scsi_timeout_handler() now uses system timer.
+ * Replace common driver's own defines for endianness w/ Linux's
+ __BIG_ENDIAN etc.
+ * Added #ifdef IPFC for all IPFC specific code.
+ * lpfc_disc_retry_rptlun() now uses system timer.
+ * lpfc_npr_timeout() now uses system timer.
+ * Modified detect code, on insmod, to only wait a max of 2 secs if
+ link comes up and there are no devices.
+ * Move remaining message logging functions into
+ elx_logmsg.c/elx_logmsg.h.
+ * Added code to clear link attention bit when there is a pending
+ link event and the memory allocation for read_la mail box
+ command fails.
+ * Removed function calls for mapping bar registers and allocating
+ kernel virtual memory mappings to the mapped bars Removed
+ prototypes, lpfc_driver_cache_line, and pci_bar1_map rename to
+ pci_bar2_map.
+ * Allocate mbox only if the hba_state is in ready state.
+ * Complete lip support via sysfs. To lip, echo brdnum >
+ /sys/bus/pci/drivers/lpfc/lip.
+ * moving sysfs show/store implementations to lpfc_sysfs.c. Also add
+ support for lip.
+ * Add files: lpfc_sysfs.c, lpfc_sysfs.h
+ * move LPFC_DRIVER_NAME and LPFC_MODULE_DESC out of lpfcLINUXfcp.c
+ to lpfc_version.h, since it is now needed in lpfc_sysfs.c
+ * elx_mbox_timeout now uses system timer
+ * Changed lpfc_nodev_timeout, lpfc_els_retry_delay and
+ lpfc_linkdown_timeout to use the system timer instead of
+ internal clock support.
+ * Move remaining message logging functions in elx_util.c to
+ elx_logmsg.c.
+ * Remove some unnecessary typecasting.
+ * Remove log message that is no longer used (was used by
+ elx_str_atox).
+ * Replaced DLINK_t and SLINK_t by standard Linux list_head
+ * Removed deque macro
+ * Replaced ELX_DLINK_t ans ELX_SLINK_t by Linux struct list_head
+ (except for clock)
+ * Removed following functions from code: linux_kmalloc linux_kfree
+ elx_alloc_bigbuf elx_free_bigbuf
+ * Removed following abstract functions from the code. elx_malloc
+ elx_free elx_ip_get_rcv_buf elx_ip_free_rcv_buf
+ elx_mem_alloc_dmabuf elx_mem_alloc_dmabufext elx_mem_alloc_dma
+ elx_mem_alloc_buf lpfc_bufmap
+ * Removed custom PCI configuration #defines and replaced with
+ OS-provided #defines. Also added linux/pci.h to *.c files.
+ * Remove elx_str_ctox. Replace elx_str_atox with sscanf.
+ * Many indentation/whitespace fixes.
+ * Replace elx_str_ctox with isxdigit where it was only used to
+ check the value of a character.
+ * Removed following functions from the code. elx_kmem_free
+ elx_kmem_alloc elx_kmem_zalloc
+ * Change use of 2.4 SCSI typedef Scsi_Host_Template to struct
+ scsi_host_template for 2.6 kernels.
+ * Change use of 2.4 SCSI typedefs (Scsi_Device, Scsi_Cmnd,
+ Scsi_Request) the their real struct names.
+ * Move 2.6 compatibility irqreturn definitions to lpfc_compat.h.
+ Protect these definitions from conflicting with similar ones in
+ later 2.4 kernels.
+ * Remove unused definitions: LINUX_TGT_t, LINUX_LUN_t,
+ LINUX_BUF_t, elx_lun_t, SET_ADAPTER_STATUS.
+ * Convert pci_ calls to linux 2.6 dma_ equivalents.
+ * Removed unused types: struct buf, struct sc_buf, T_SCSIBUF
+ typedef.
+ * Fix Makefile so that 2.4 drivers don't always rebuild all files.
+ * Remove unused _static_ and fc_lun_t definitions.
+ * Cleaned up some memory pool implementation code.
+ * Fix panic with char dev changes. Turns out that 2.6.4 code does
+ the same in kernel space with the 2.4 interface style
+ definitions. So remove the new char dev code altogether.
+ * Remove typecasting from fc_get_cfg_param and consolidate
+ multiple instances of the parameter switch into a single
+ instance.
+ * Use lpfc_is_LC_HBA() macro that tests pcidev->device directly
+ instead of saving a private copy that undergoes varied shifting
+ & casting.
+ * Removed usage of all memory pools.
+
+Changes from 20040312 to 20040319
+
+ * Use dev_warn instead of printk for 2.6 kernels
+ * Correct Iocbq completion routine for 2.6 kernel case
+ * Change void *pOSCmd to Scsi_Smnd *pCmd
+ * Change void *pOScmd to struct sk_buff *pCmd
+ * Remove data directon code.
+ * Removed memory pool for buf/bpl buffers and use kmalloc/kfree
+ pci_pool_alloc/free directly.
+ * Move PPC check for DMA address 0 in scatter-gather list, into
+ lpfc_compat.h
+ * Always use pci_unmap_single() instead of pci_unmap_page()
+ * Clean up the 2.6 vs 2.4 #if blocks.
+ * Conditionalize Scheduler
+ * Add a comment to explain a little what the first Makefile
+ section does.
+ * Removed lpfc_intr_post
+ * Sysfs new display format. Also added write functionality. You
+ can [ echo "0 log_verbose 3" >
+ /sys/bus/pci/drivers/lpfc/params]. Hex support yet to be added.
+ * Removed several #ifdef powerpc, including for a discovery issue
+ in lpfc_ValidLun()
+ * Change elx_printf_log to use vsprintf.
+ * Added lpfc_compat.h provides macros to aid compilation in the
+ Linux 2.4 kernel over various platform architectures. Initially
+ support mapping to a DMA address.
+ * Removed memory pool for nlp/bind buffers and use kmalloc/kfree
+ directly.
+ * Removed memory pool for iocb buffers and use kmalloc/kfree
+ directly.
+ * Removed memory pool for mailbox buffers and use kmalloc/kfree
+ directly.
+ * Cleaned up back and forth casts
+ * Initial support for sysfs for 2.6 kernel.
+ * Changed elx_dma_addr_t to dma_addr_t
+ * Fix a 2.6 kernel check to be >= 2.6.0 instead of > (was missing
+ 2.6.0).
+ * Remove elx_printf and elx_str_sprintf. Replace elx_print with
+ printk.
+ * Replace elx_printf with printk.
+ * Replace elx_str_sprintf with sprintf.
+ * Removed the mem_lock, its prototype, function, macro, and
+ iflags.
+ * Use kmalloc/kfree for ELX_SCSI_BUF_t
+ * Use linux pci_pools for SCSI_DMA_EXT
+ * Use linux pci_pools for BPLs.
+ * Minor cleanup of DFC args for PPC64.
+ * Several small indentation cleanups.
+ * New Linux 2.6 style of char device registration.
+ * Migrated members of LPFCHBA_t and LINUX_HBA_t into elxHBA_t
+ * Use strcpy, strncmp, isdigit, strlen instead of abstractions
+ * Cleanup of driver_template.
+ * Facilitate compile time turn on/off of lpfc_network_on.
+ * Split large source files into smaller, better named ones.
+
+Changes from 2.10a to 20040312
+
+ * Fix build for 2.4 kernels
+ * Move driver version macros into lpfc_version.h file.
+ * Fixed data miscompare with LIP.
+ * Removed elx_sli, elx_ioc, elx_disc, elx_sch routines,
+ prototypes, and reference points.
+ * Correct the space insertions with hardtabs
+ * Remove routine call pointers in ELX_SLI_INIT_t struct.
+ * Removed module locks except for drvr, mem, and clock.
+ * Removed unused module locks from sourcebase. Kept drvr_lock,
+ mem_lock, and clock_lock.
+ * Change NULL to 0
diff --git a/Documentation/scsi/ChangeLog.megaraid b/Documentation/scsi/ChangeLog.megaraid
new file mode 100644
index 000000000..d2052fdbe
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.megaraid
@@ -0,0 +1,614 @@
+Release Date : Thu Nov 16 15:32:35 EST 2006 -
+ Sumant Patro <sumant.patro@lsi.com>
+Current Version : 2.20.5.1 (scsi module), 2.20.2.6 (cmm module)
+Older Version : 2.20.4.9 (scsi module), 2.20.2.6 (cmm module)
+
+1. Changes in Initialization to fix kdump failure.
+ Send SYNC command on loading.
+ This command clears the pending commands in the adapter
+ and re-initialize its internal RAID structure.
+ Without this change, megaraid driver either panics or fails to
+ initialize the adapter during kdump's second kernel boot
+ if there are pending commands or interrupts from other devices
+ sharing the same IRQ.
+2. Authors email-id domain name changed from lsil.com to lsi.com.
+ Also modified the MODULE_AUTHOR to megaraidlinux@lsi.com
+
+Release Date : Fri May 19 09:31:45 EST 2006 - Seokmann Ju <sju@lsil.com>
+Current Version : 2.20.4.9 (scsi module), 2.20.2.6 (cmm module)
+Older Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
+
+1. Fixed a bug in megaraid_init_mbox().
+ Customer reported "garbage in file on x86_64 platform".
+ Root Cause: the driver registered controllers as 64-bit DMA capable
+ for those which are not support it.
+ Fix: Made change in the function inserting identification machanism
+ identifying 64-bit DMA capable controllers.
+
+ > -----Original Message-----
+ > From: Vasily Averin [mailto:vvs@sw.ru]
+ > Sent: Thursday, May 04, 2006 2:49 PM
+ > To: linux-scsi@vger.kernel.org; Kolli, Neela; Mukker, Atul;
+ > Ju, Seokmann; Bagalkote, Sreenivas;
+ > James.Bottomley@SteelEye.com; devel@openvz.org
+ > Subject: megaraid_mbox: garbage in file
+ >
+ > Hello all,
+ >
+ > I've investigated customers claim on the unstable work of
+ > their node and found a
+ > strange effect: reading from some files leads to the
+ > "attempt to access beyond end of device" messages.
+ >
+ > I've checked filesystem, memory on the node, motherboard BIOS
+ > version, but it
+ > does not help and issue still has been reproduced by simple
+ > file reading.
+ >
+ > Reproducer is simple:
+ >
+ > echo 0xffffffff >/proc/sys/dev/scsi/logging_level ;
+ > cat /vz/private/101/root/etc/ld.so.cache >/tmp/ttt ;
+ > echo 0 >/proc/sys/dev/scsi/logging
+ >
+ > It leads to the following messages in dmesg
+ >
+ > sd_init_command: disk=sda, block=871769260, count=26
+ > sda : block=871769260
+ > sda : reading 26/26 512 byte blocks.
+ > scsi_add_timer: scmd: f79ed980, time: 7500, (c02b1420)
+ > sd 0:1:0:0: send 0xf79ed980 sd 0:1:0:0:
+ > command: Read (10): 28 00 33 f6 24 ac 00 00 1a 00
+ > buffer = 0xf7cfb540, bufflen = 13312, done = 0xc0366b40,
+ > queuecommand 0xc0344010
+ > leaving scsi_dispatch_cmnd()
+ > scsi_delete_timer: scmd: f79ed980, rtn: 1
+ > sd 0:1:0:0: done 0xf79ed980 SUCCESS 0 sd 0:1:0:0:
+ > command: Read (10): 28 00 33 f6 24 ac 00 00 1a 00
+ > scsi host busy 1 failed 0
+ > sd 0:1:0:0: Notifying upper driver of completion (result 0)
+ > sd_rw_intr: sda: res=0x0
+ > 26 sectors total, 13312 bytes done.
+ > use_sg is 4
+ > attempt to access beyond end of device
+ > sda6: rw=0, want=1044134458, limit=951401367
+ > Buffer I/O error on device sda6, logical block 522067228
+ > attempt to access beyond end of device
+
+2. When INQUIRY with EVPD bit set issued to the MegaRAID controller,
+ system memory gets corrupted.
+ Root Cause: MegaRAID F/W handle the INQUIRY with EVPD bit set
+ incorrectly.
+ Fix: MegaRAID F/W has fixed the problem and being process of release,
+ soon. Meanwhile, driver will filter out the request.
+
+3. One of member in the data structure of the driver leads unaligne
+ issue on 64-bit platform.
+ Customer reporeted "kernel unaligned access addrss" issue when
+ application communicates with MegaRAID HBA driver.
+ Root Cause: in uioc_t structure, one of member had misaligned and it
+ led system to display the error message.
+ Fix: A patch submitted to community from following folk.
+
+ > -----Original Message-----
+ > From: linux-scsi-owner@vger.kernel.org
+ > [mailto:linux-scsi-owner@vger.kernel.org] On Behalf Of Sakurai Hiroomi
+ > Sent: Wednesday, July 12, 2006 4:20 AM
+ > To: linux-scsi@vger.kernel.org; linux-kernel@vger.kernel.org
+ > Subject: Re: Help: strange messages from kernel on IA64 platform
+ >
+ > Hi,
+ >
+ > I saw same message.
+ >
+ > When GAM(Global Array Manager) is started, The following
+ > message output.
+ > kernel: kernel unaligned access to 0xe0000001fe1080d4,
+ > ip=0xa000000200053371
+ >
+ > The uioc structure used by ioctl is defined by packed,
+ > the allignment of each member are disturbed.
+ > In a 64 bit structure, the allignment of member doesn't fit 64 bit
+ > boundary. this causes this messages.
+ > In a 32 bit structure, we don't see the message because the allinment
+ > of member fit 32 bit boundary even if packed is specified.
+ >
+ > patch
+ > I Add 32 bit dummy member to fit 64 bit boundary. I tested.
+ > We confirmed this patch fix the problem by IA64 server.
+ >
+ > **************************************************************
+ > ****************
+ > --- linux-2.6.9/drivers/scsi/megaraid/megaraid_ioctl.h.orig
+ > 2006-04-03 17:13:03.000000000 +0900
+ > +++ linux-2.6.9/drivers/scsi/megaraid/megaraid_ioctl.h
+ > 2006-04-03 17:14:09.000000000 +0900
+ > @@ -132,6 +132,10 @@
+ > /* Driver Data: */
+ > void __user * user_data;
+ > uint32_t user_data_len;
+ > +
+ > + /* 64bit alignment */
+ > + uint32_t pad_0xBC;
+ > +
+ > mraid_passthru_t __user *user_pthru;
+ >
+ > mraid_passthru_t *pthru32;
+ > **************************************************************
+ > ****************
+
+Release Date : Mon Apr 11 12:27:22 EST 2006 - Seokmann Ju <sju@lsil.com>
+Current Version : 2.20.4.8 (scsi module), 2.20.2.6 (cmm module)
+Older Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
+
+1. Fixed a bug in megaraid_reset_handler().
+ Customer reported "Unable to handle kernel NULL pointer dereference
+ at virtual address 00000000" when system goes to reset condition
+ for some reason. It happened randomly.
+ Root Cause: in the megaraid_reset_handler(), there is possibility not
+ returning pending packets in the pend_list if there are multiple
+ pending packets.
+ Fix: Made the change in the driver so that it will return all packets
+ in the pend_list.
+
+2. Added change request.
+ As found in the following URL, rmb() only didn't help the
+ problem. I had to increase the loop counter to 0xFFFFFF. (6 F's)
+ http://marc.theaimsgroup.com/?l=linux-scsi&m=110971060502497&w=2
+
+ I attached a patch for your reference, too.
+ Could you check and get this fix in your driver?
+
+ Best Regards,
+ Jun'ichi Nomura
+
+Release Date : Fri Nov 11 12:27:22 EST 2005 - Seokmann Ju <sju@lsil.com>
+Current Version : 2.20.4.7 (scsi module), 2.20.2.6 (cmm module)
+Older Version : 2.20.4.6 (scsi module), 2.20.2.6 (cmm module)
+
+1. Sorted out PCI IDs to remove megaraid support overlaps.
+ Based on the patch from Daniel, sorted out PCI IDs along with
+ character node name change from 'megadev' to 'megadev_legacy' to avoid
+ conflict.
+ ---
+ Hopefully we'll be getting the build restriction zapped much sooner,
+ but we should also be thinking about totally removing the hardware
+ support overlap in the megaraid drivers.
+
+ This patch pencils in a date of Feb 06 for this, and performs some
+ printk abuse in hope that existing legacy users might pick up on what's
+ going on.
+
+ Signed-off-by: Daniel Drake <dsd@gentoo.org>
+ ---
+
+2. Fixed a issue: megaraid always fails to reset handler.
+ ---
+ I found that the megaraid driver always fails to reset the
+ adapter with the following message:
+ megaraid: resetting the host...
+ megaraid mbox: reset sequence completed successfully
+ megaraid: fast sync command timed out
+ megaraid: reservation reset failed
+ when the "Cluster mode" of the adapter BIOS is enabled.
+ So, whenever the reset occurs, the adapter goes to
+ offline and just become unavailable.
+
+ Jun'ichi Nomura [mailto:jnomura@mtc.biglobe.ne.jp]
+ ---
+
+Release Date : Mon Mar 07 12:27:22 EST 2005 - Seokmann Ju <sju@lsil.com>
+Current Version : 2.20.4.6 (scsi module), 2.20.2.6 (cmm module)
+Older Version : 2.20.4.5 (scsi module), 2.20.2.5 (cmm module)
+
+1. Added IOCTL backward compatibility.
+ Convert megaraid_mm driver to new compat_ioctl entry points.
+ I don't have easy access to hardware, so only compile tested.
+ - Signed-off-by:Andi Kleen <ak@muc.de>
+
+2. megaraid_mbox fix: wrong order of arguments in memset()
+ That, BTW, shows why cross-builds are useful-the only indication of
+ problem had been a new warning showing up in sparse output on alpha
+ build (number of exceeding 256 got truncated).
+ - Signed-off-by: Al Viro
+ <viro@parcelfarce.linux.theplanet.co.uk>
+
+3. Convert pci_module_init to pci_register_driver
+ Convert from pci_module_init to pci_register_driver
+ (from:http://kernelnewbies.org/KernelJanitors/TODO)
+ - Signed-off-by: Domen Puncer <domen@coderock.org>
+
+4. Use the pre defined DMA mask constants from dma-mapping.h
+ Use the DMA_{64,32}BIT_MASK constants from dma-mapping.h when calling
+ pci_set_dma_mask() or pci_set_consistend_dma_mask(). See
+ http://marc.theaimsgroup.com/?t=108001993000001&r=1&w=2 for more
+ details.
+ Signed-off-by: Tobias Klauser <tklauser@nuerscht.ch>
+ Signed-off-by: Domen Puncer <domen@coderock.org>
+
+5. Remove SSID checking for Dobson, Lindsay, and Verde based products.
+ Checking the SSVID/SSID for controllers which have Dobson, Lindsay,
+ and Verde is unnecessary because device ID has been assigned by LSI
+ and it is unique value. So, all controllers with these IOPs have to be
+ supported by the driver regardless SSVID/SSID.
+
+6. Date Thu, 27 Jan 2005 04:31:09 +0100
+ From Herbert Poetzl <>
+ Subject RFC: assert_spin_locked() for 2.6
+
+ Greetings!
+
+ overcautious programming will kill your kernel ;)
+ ever thought about checking a spin_lock or even
+ asserting that it must be held (maybe just for
+ spinlock debugging?) ...
+
+ there are several checks present in the kernel
+ where somebody does a variation on the following:
+
+ BUG_ON(!spin_is_locked(&some_lock));
+
+ so what's wrong about that? nothing, unless you
+ compile the code with CONFIG_DEBUG_SPINLOCK but
+ without CONFIG_SMP ... in which case the BUG()
+ will kill your kernel ...
+
+ maybe it's not advised to make such assertions,
+ but here is a solution which works for me ...
+ (compile tested for sh, x86_64 and x86, boot/run
+ tested for x86 only)
+
+ best,
+ Herbert
+
+ - Herbert Poetzl <herbert@13thfloor.at>, Thu, 27 Jan 2005
+
+Release Date : Thu Feb 03 12:27:22 EST 2005 - Seokmann Ju <sju@lsil.com>
+Current Version : 2.20.4.5 (scsi module), 2.20.2.5 (cmm module)
+Older Version : 2.20.4.4 (scsi module), 2.20.2.4 (cmm module)
+
+1. Modified name of two attributes in scsi_host_template.
+ On Wed, 2005-02-02 at 10:56 -0500, Ju, Seokmann wrote:
+ > + .sdev_attrs = megaraid_device_attrs,
+ > + .shost_attrs = megaraid_class_device_attrs,
+
+ These are, perhaps, slightly confusing names.
+ The terms device and class_device have well defined meanings in the
+ generic device model, neither of which is what you mean here.
+ Why not simply megaraid_sdev_attrs and megaraid_shost_attrs?
+
+ Other than this, it looks fine to me too.
+
+Release Date : Thu Jan 27 00:01:03 EST 2005 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.4.4 (scsi module), 2.20.2.5 (cmm module)
+Older Version : 2.20.4.3 (scsi module), 2.20.2.4 (cmm module)
+
+1. Bump up the version of scsi module due to its conflict.
+
+Release Date : Thu Jan 21 00:01:03 EST 2005 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.4.3 (scsi module), 2.20.2.5 (cmm module)
+Older Version : 2.20.4.2 (scsi module), 2.20.2.4 (cmm module)
+
+1. Remove driver ioctl for logical drive to scsi address translation and
+ replace with the sysfs attribute. To remove drives and change
+ capacity, application shall now use the device attribute to get the
+ logical drive number for a scsi device. For adding newly created
+ logical drives, class device attribute would be required to uniquely
+ identify each controller.
+ - Atul Mukker <atulm@lsil.com>
+
+ "James, I've been thinking about this a little more, and you may be on
+ to something here. Let each driver add files as such:"
+
+ - Matt Domsch <Matt_Domsch@dell.com>, 12.15.2004
+ linux-scsi mailing list
+
+
+ "Then, if you simply publish your LD number as an extra parameter of
+ the device, you can look through /sys to find it."
+
+ - James Bottomley <James.Bottomley@SteelEye.com>, 01.03.2005
+ linux-scsi mailing list
+
+
+ "I don't see why not ... it's your driver, you can publish whatever
+ extra information you need as scsi_device attributes; that was one of
+ the designs of the extensible attribute system."
+
+ - James Bottomley <James.Bottomley@SteelEye.com>, 01.06.2005
+ linux-scsi mailing list
+
+2. Add AMI megaraid support - Brian King <brking@charter.net>
+ PCI_VENDOR_ID_AMI, PCI_DEVICE_ID_AMI_MEGARAID3,
+ PCI_VENDOR_ID_AMI, PCI_SUBSYS_ID_PERC3_DC,
+
+3. Make some code static - Adrian Bunk <bunk@stusta.de>
+ Date: Mon, 15 Nov 2004 03:14:57 +0100
+
+ The patch below makes some needlessly global code static.
+ -wait_queue_head_t wait_q;
+ +static wait_queue_head_t wait_q;
+
+ Signed-off-by: Adrian Bunk <bunk@stusta.de>
+
+4. Added NEC ROMB support - NEC MegaRAID PCI Express ROMB controller
+ PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_MEGARAID_NEC_ROMB_2E,
+ PCI_SUBSYS_ID_NEC, PCI_SUBSYS_ID_MEGARAID_NEC_ROMB_2E,
+
+5. Fixed Tape drive issue : For any Direct CDB command to physical device
+ including tape, timeout value set by driver was 10 minutes. With this
+ value, most of command will return within timeout. However, for those
+ command like ERASE or FORMAT, it takes more than an hour depends on
+ capacity of the device and the command could be terminated before it
+ completes.
+ To address this issue, the 'timeout' field in the DCDB command will
+ have NO TIMEOUT (i.e., 4) value as its timeout on DCDB command.
+
+
+
+Release Date : Thu Dec 9 19:10:23 EST 2004
+ - Sreenivas Bagalkote <sreenib@lsil.com>
+
+Current Version : 2.20.4.2 (scsi module), 2.20.2.4 (cmm module)
+Older Version : 2.20.4.1 (scsi module), 2.20.2.3 (cmm module)
+
+i. Introduced driver ioctl that returns scsi address for a given ld.
+
+ "Why can't the existing sysfs interfaces be used to do this?"
+ - Brian King (brking@us.ibm.com)
+
+ "I've looked into solving this another way, but I cannot see how
+ to get this driver-private mapping of logical drive number-> HCTL
+ without putting code something like this into the driver."
+
+ "...and by providing a mapping a function to userspace, the driver
+ is free to change its mapping algorithm in the future if necessary .."
+ - Matt Domsch (Matt_Domsch@dell.com)
+
+Release Date : Thu Dec 9 19:02:14 EST 2004 - Sreenivas Bagalkote <sreenib@lsil.com>
+
+Current Version : 2.20.4.1 (scsi module), 2.20.2.3 (cmm module)
+Older Version : 2.20.4.1 (scsi module), 2.20.2.2 (cmm module)
+
+i. Fix a bug in kioc's dma buffer deallocation
+
+Release Date : Thu Nov 4 18:24:56 EST 2004 - Sreenivas Bagalkote <sreenib@lsil.com>
+
+Current Version : 2.20.4.1 (scsi module), 2.20.2.2 (cmm module)
+Older Version : 2.20.4.0 (scsi module), 2.20.2.1 (cmm module)
+
+i. Handle IOCTL cmd timeouts more properly.
+
+ii. pci_dma_sync_{sg,single}_for_cpu was introduced into megaraid_mbox
+ incorrectly (instead of _for_device). Changed to appropriate
+ pci_dma_sync_{sg,single}_for_device.
+
+Release Date : Wed Oct 06 11:15:29 EDT 2004 - Sreenivas Bagalkote <sreenib@lsil.com>
+Current Version : 2.20.4.0 (scsi module), 2.20.2.1 (cmm module)
+Older Version : 2.20.4.0 (scsi module), 2.20.2.0 (cmm module)
+
+i. Remove CONFIG_COMPAT around register_ioctl32_conversion
+
+Release Date : Mon Sep 27 22:15:07 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.4.0 (scsi module), 2.20.2.0 (cmm module)
+Older Version : 2.20.3.1 (scsi module), 2.20.2.0 (cmm module)
+
+i. Fix data corruption. Because of a typo in the driver, the IO packets
+ were wrongly shared by the ioctl path. This causes a whole IO command
+ to be replaced by an incoming ioctl command.
+
+Release Date : Tue Aug 24 09:43:35 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.3.1 (scsi module), 2.20.2.0 (cmm module)
+Older Version : 2.20.3.0 (scsi module), 2.20.2.0 (cmm module)
+
+i. Function reordering so that inline functions are defined before they
+ are actually used. It is now mandatory for GCC 3.4.1 (current stable)
+
+ Declare some heavy-weight functions to be non-inlined,
+ megaraid_mbox_build_cmd, megaraid_mbox_runpendq,
+ megaraid_mbox_prepare_pthru, megaraid_mbox_prepare_epthru,
+ megaraid_busywait_mbox
+
+ - Andrew Morton, 08.19.2004
+ linux-scsi mailing list
+
+ "Something else to clean up after inclusion: every instance of an
+ inline function is actually rendered as a full function call, because
+ the function is always used before it is defined. Atul, please
+ re-arrange the code to eliminate the need for most (all) of the
+ function prototypes at the top of each file, and define (not just
+ declare with a prototype) each inline function before its first use"
+
+ - Matt Domsch <Matt_Domsch@dell.com>, 07.27.2004
+ linux-scsi mailing list
+
+
+ii. Display elapsed time (countdown) while waiting for FW to boot.
+
+iii. Module compilation reorder in Makefile so that unresolved symbols do
+ not occur when driver is compiled non-modular.
+
+ Patrick J. LoPresti <patl@users.sourceforge.net>, 8.22.2004
+ linux-scsi mailing list
+
+
+Release Date : Thu Aug 19 09:58:33 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.3.0 (scsi module), 2.20.2.0 (cmm module)
+Older Version : 2.20.2.0 (scsi module), 2.20.1.0 (cmm module)
+
+i. When copying the mailbox packets, copy only first 14 bytes (for 32-bit
+ mailboxes) and only first 22 bytes (for 64-bit mailboxes). This is to
+ avoid getting the stale values for busy bit. We want to set the busy
+ bit just before issuing command to the FW.
+
+ii. In the reset handling, if the reseted command is not owned by the
+ driver, do not (wrongly) print information for the "attached" driver
+ packet.
+
+iii. Have extended wait when issuing command in synchronous mode. This is
+ required for the cases where the option ROM is disabled and there is
+ no BIOS to start the controller. The FW starts to boot after receiving
+ the first command from the driver. The current driver has 1 second
+ timeout for the synchronous commands, which is far less than what is
+ actually required. We now wait up to MBOX_RESET_TIME (180 seconds) for
+ FW boot process.
+
+iv. In megaraid_mbox_product_info, clear the mailbox contents completely
+ before preparing the command for inquiry3. This is to ensure that the
+ FW does not get junk values in the command.
+
+v. Do away with the redundant LSI_CONFIG_COMPAT redefinition for
+ CONFIG_COMPAT. Replace <asm/ioctl32.h> with <linux/ioctl32.h>
+
+ - James Bottomley <James.Bottomley@SteelEye.com>, 08.17.2004
+ linux-scsi mailing list
+
+vi. Add support for 64-bit applications. Current drivers assume only
+ 32-bit applications, even on 64-bit platforms. Use the "data" and
+ "buffer" fields of the mimd_t structure, instead of embedded 32-bit
+ addresses in application mailbox and passthru structures.
+
+vii. Move the function declarations for the management module from
+ megaraid_mm.h to megaraid_mm.c
+
+ - Andrew Morton, 08.19.2004
+ linux-scsi mailing list
+
+viii. Change default values for MEGARAID_NEWGEN, MEGARAID_MM, and
+ MEGARAID_MAILBOX to 'n' in Kconfig.megaraid
+
+ - Andrew Morton, 08.19.2004
+ linux-scsi mailing list
+
+ix. replace udelay with msleep
+
+x. Typos corrected in comments and whitespace adjustments, explicit
+ grouping of expressions.
+
+
+Release Date : Fri Jul 23 15:22:07 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.2.0 (scsi module), 2.20.1.0 (cmm module)
+Older Version : 2.20.1.0 (scsi module), 2.20.0.0 (cmm module)
+
+i. Add PCI ids for Acer ROMB 2E solution
+
+ii. Add PCI ids for I4
+
+iii. Typo corrected for subsys id for megaraid sata 300-4x
+
+iv. Remove yield() while mailbox handshake in synchronous commands
+
+
+ "My other main gripe is things like this:
+
+ + // wait for maximum 1 second for status to post
+ + for (i = 0; i < 40000; i++) {
+ + if (mbox->numstatus != 0xFF) break;
+ + udelay(25); yield();
+ + }
+
+ which litter the driver. Use of yield() in drivers is deprecated."
+
+ - James Bottomley <James.Bottomley@SteelEye.com>, 07.14.2004
+ linux-scsi mailing list
+
+v. Remove redundant __megaraid_busywait_mbox routine
+
+vi. Fix bug in the management module, which causes a system lockup when the
+ IO module is loaded and then unloaded, followed by executing any
+ management utility. The current version of management module does not
+ handle the adapter unregister properly.
+
+ Specifically, it still keeps a reference to the unregistered
+ controllers. To avoid this, the static array adapters has been
+ replaced by a dynamic list, which gets updated every time an adapter
+ is added or removed.
+
+ Also, during unregistration of the IO module, the resources are
+ now released in the exact reverse order of the allocation time
+ sequence.
+
+
+Release Date : Fri Jun 25 18:58:43 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.1.0
+Older Version : megaraid 2.20.0.1
+
+i. Stale list pointer in adapter causes kernel panic when module
+ megaraid_mbox is unloaded
+
+
+Release Date : Thu Jun 24 20:37:11 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.0.1
+Older Version : megaraid 2.20.0.00
+
+i. Modules are not 'y' by default, but depend on current definition of
+ SCSI & PCI.
+
+ii. Redundant structure mraid_driver_t removed.
+
+iii. Miscellaneous indentation and goto/label fixes.
+ - Christoph Hellwig <hch@infradead.org>, 06.24.2004 linux-scsi
+
+iv. scsi_host_put(), do just before completing HBA shutdown.
+
+
+
+Release Date : Mon Jun 21 19:53:54 EDT 2004 - Atul Mukker <atulm@lsil.com>
+Current Version : 2.20.0.0
+Older Version : megaraid 2.20.0.rc2 and 2.00.3
+
+i. Independent module to interact with userland applications and
+ multiplex command to low level RAID module(s).
+
+ "Shared code in a third module, a "library module", is an acceptable
+ solution. modprobe automatically loads dependent modules, so users
+ running "modprobe driver1" or "modprobe driver2" would automatically
+ load the shared library module."
+
+ - Jeff Garzik <jgarzik@pobox.com> 02.25.2004 LKML
+
+ "As Jeff hinted, if your userspace<->driver API is consistent between
+ your new MPT-based RAID controllers and your existing megaraid driver,
+ then perhaps you need a single small helper module (lsiioctl or some
+ better name), loaded by both mptraid and megaraid automatically, which
+ handles registering the /dev/megaraid node dynamically. In this case,
+ both mptraid and megaraid would register with lsiioctl for each
+ adapter discovered, and lsiioctl would essentially be a switch,
+ redirecting userspace tool ioctls to the appropriate driver."
+
+ - Matt Domsch <Matt_Domsch@dell.com> 02.25.2004 LKML
+
+ii. Remove C99 initializations from pci_device id.
+
+ "pci_id_table_g would be much more readable when not using C99
+ initializers.
+ PCI table doesn't change, there's lots of users that prefer the more
+ readable variant. And it's really far less and much easier to grok
+ lines without C99 initializers."
+
+ - Christoph Hellwig <hch@infradead.org>, 05.28.2004 linux-scsi
+
+iii. Many fixes as suggested by Christoph Hellwig <hch@infradead.org> on
+ linux-scsi, 05.28.2004
+
+iv. We now support up to 32 parallel ioctl commands instead of current 1.
+ There is a conscious effort to let memory allocation not fail for ioctl
+ commands.
+
+v. Do away with internal memory management. Use pci_pool_(create|alloc)
+ instead.
+
+vi. Kill tasklet when unloading the driver.
+
+vii. Do not use "host_lock', driver has fine-grain locks now to protect all
+ data structures.
+
+viii. Optimize the build scatter-gather list routine. The callers already
+ know the data transfer address and length.
+
+ix. Better implementation of error handling and recovery. Driver now
+ performs extended errors recovery for instances like scsi cable pull.
+
+x. Disassociate the management commands with an overlaid scsi command.
+ Driver now treats the management packets as special packets and has a
+ dedicated callback routine.
diff --git a/Documentation/scsi/ChangeLog.megaraid_sas b/Documentation/scsi/ChangeLog.megaraid_sas
new file mode 100644
index 000000000..18b570990
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.megaraid_sas
@@ -0,0 +1,641 @@
+Release Date : Thu. Jun 19, 2014 17:00:00 PST 2014 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+ Kashyap Desai
+ Sumit Saxena
+ Uday Lingala
+Current Version : 06.803.02.00-rc1
+Old Version : 06.803.01.00-rc1
+ 1. Fix reset_mutex leak in megasas_reset_fusion().
+ 2. Remove unused variables in megasas_instance.
+ 3. Fix LD/VF affiliation parsing.
+ 4. Add missing initial call to megasas_get_ld_vf_affiliation().
+ 5. Version and Changelog update.
+-------------------------------------------------------------------------------
+Release Date : Mon. Mar 10, 2014 17:00:00 PST 2014 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+ Kashyap Desai
+ Sumit Saxena
+Current Version : 06.803.01.00-rc1
+Old Version : 06.700.06.00-rc1
+ 1. Load correct raid context timeout value for multipathing & clustering.
+ 2. Fix megasas_ioc_init_fusion to use local stack variable.
+ 3. Return leaked MPT frames to MPT command pool.
+ 4. Add Dell PowerEdge VRTX SR-IOV VF device support.
+ 5. Version and Changelog update.
+-------------------------------------------------------------------------------
+Release Date : Sat. Aug 31, 2013 17:00:00 PST 2013 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+ Kashyap Desai
+ Sumit Saxena
+Current Version : 06.700.06.00-rc1
+Old Version : 06.600.18.00-rc1
+ 1. Add High Availability clustering support using shared Logical Disks.
+ 2. Version and Changelog update.
+-------------------------------------------------------------------------------
+Release Date : Wed. May 15, 2013 17:00:00 PST 2013 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+ Kashyap Desai
+ Sumit Saxena
+Current Version : 06.600.18.00-rc1
+Old Version : 06.506.00.00-rc1
+ 1. Return DID_ERROR for scsi io, when controller is in critical h/w error.
+ 2. Fix the interrupt mask for Gen2 controller.
+ 3. Update balance count in driver to be in sync of firmware.
+ 4. Free event detail memory without device ID check.
+ 5. Set IO request timeout value provided by OS timeout for Tape devices.
+ 6. Add support for MegaRAID Fury (device ID-0x005f) 12Gb/s controllers.
+ 7. Add support to display Customer branding details in syslog.
+ 8. Set IoFlags to enable Fast Path for JBODs for Invader/Fury(12 Gb/s)
+ controllers.
+ 9. Add support for Extended MSI-x vectors for Invader and Fury(12Gb/s
+ HBA).
+ 10.Add support for Uneven Span PRL11.
+ 11.Add support to differentiate between iMR and MR Firmware.
+ 12.Version and Changelog update.
+-------------------------------------------------------------------------------
+Release Date : Sat. Feb 9, 2013 17:00:00 PST 2013 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 06.506.00.00-rc1
+Old Version : 06.504.01.00-rc1
+ 1. Add 4k FastPath DIF support.
+ 2. Dont load DevHandle unless FastPath enabled.
+ 3. Version and Changelog update.
+-------------------------------------------------------------------------------
+Release Date : Mon. Oct 1, 2012 17:00:00 PST 2012 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 06.504.01.00-rc1
+Old Version : 00.00.06.18-rc1
+ 1. Removed un-needed completion_lock spinlock calls.
+ 2. Add module param for configurable MSI-X vector count.
+ 3. Load io_request DataLength in bytes.
+ 4. Add array boundary check for SystemPD.
+ 5. Add SystemPD FastPath support.
+ 6. Remove duplicate code.
+ 7. Version, Changelog, Copyright update.
+-------------------------------------------------------------------------------
+Release Date : Tue. Jun 17, 2012 17:00:00 PST 2012 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford/Kashyap Desai
+Current Version : 00.00.06.18-rc1
+Old Version : 00.00.06.15-rc1
+ 1. Fix Copyright dates.
+ 2. Add throttlequeuedepth module parameter.
+ 3. Add resetwaittime module parameter.
+ 4. Move poll_aen_lock initializer.
+-------------------------------------------------------------------------------
+Release Date : Mon. Mar 19, 2012 17:00:00 PST 2012 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.06.15-rc1
+Old Version : 00.00.06.14-rc1
+ 1. Optimize HostMSIxVectors setting.
+ 2. Add fpRead/WriteCapable, fpRead/WriteAcrossStripe checks.
+-------------------------------------------------------------------------------
+Release Date : Fri. Jan 6, 2012 17:00:00 PST 2010 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.06.14-rc1
+Old Version : 00.00.06.12-rc1
+ 1. Fix reglockFlags for degraded raid5/6 for MR 9360/9380.
+ 2. Mask off flags in ioctl path to prevent memory scribble with older
+ MegaCLI versions.
+ 3. Remove poll_mode_io module paramater, sysfs node, and associated code.
+-------------------------------------------------------------------------------
+Release Date : Wed. Oct 5, 2011 17:00:00 PST 2010 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.06.12-rc1
+Old Version : 00.00.05.40-rc1
+ 1. Continue booting immediately if FW in FAULT at driver load time.
+ 2. Increase default cmds per lun to 256.
+ 3. Fix mismatch in megasas_reset_fusion() mutex lock-unlock.
+ 4. Remove some un-necessary code.
+ 5. Clear state change interrupts for Fusion/Invader.
+ 6. Clear FUSION_IN_RESET before enabling interrupts.
+ 7. Add support for MegaRAID 9360/9380 12GB/s controllers.
+ 8. Add multiple MSI-X vector/multiple reply queue support.
+ 9. Add driver workaround for PERC5/1068 kdump kernel panic.
+-------------------------------------------------------------------------------
+Release Date : Tue. Jul 26, 2011 17:00:00 PST 2010 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.05.40-rc1
+Old Version : 00.00.05.38-rc1
+ 1. Fix FastPath I/O to work with degraded RAID 1.
+ 2. Add .change_queue_depth support.
+-------------------------------------------------------------------------------
+Release Date : Wed. May 11, 2011 17:00:00 PST 2010 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.05.38-rc1
+Old Version : 00.00.05.34-rc1
+ 1. Remove MSI-X black list, use MFI_REG_STATE.ready.msiEnable.
+ 2. Remove un-used function megasas_return_cmd_for_smid().
+ 3. Check MFI_REG_STATE.fault.resetAdapter in megasas_reset_fusion().
+ 4. Disable interrupts/free_irq() in megasas_shutdown().
+ 5. Fix bug where AENs could be lost in probe() and resume().
+ 6. Convert 6,10,12 byte CDB's to 16 byte CDB for large LBA's for FastPath
+ IO.
+ 7. Add 1078 OCR support.
+-------------------------------------------------------------------------------
+Release Date : Thu. Feb 24, 2011 17:00:00 PST 2010 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.05.34-rc1
+Old Version : 00.00.05.29-rc1
+ 1. Fix some failure gotos from megasas_probe_one(), etc.
+ 2. Add missing check_and_restore_queue_depth() call in
+ complete_cmd_fusion().
+ 3. Enable MSI-X before calling megasas_init_fw().
+ 4. Call tasklet_schedule() even if outbound_intr_status == 0 for MFI based
+ boards in MSI-X mode.
+ 5. Fix megasas_probe_one() to clear PCI_MSIX_FLAGS_ENABLE in msi control
+ register in kdump kernel.
+ 6. Fix megasas_get_cmd() to only print "Command pool empty" if
+ megasas_dbg_lvl is set.
+ 7. Fix megasas_build_dcdb_fusion() to not filter by TYPE_DISK.
+ 8. Fix megasas_build_dcdb_fusion() to use io_request->LUN[1] field.
+ 9. Add MR_EVT_CFG_CLEARED to megasas_aen_polling().
+ 10. Fix tasklet_init() in megasas_init_fw() to use instancet->tasklet.
+ 11. Fix fault state handling in megasas_transition_to_ready().
+ 12. Fix max_sectors setting for IEEE SGL's.
+ 13. Fix iMR OCR support to work correctly.
+-------------------------------------------------------------------------------
+Release Date : Tues. Dec 14, 2010 17:00:00 PST 2010 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Adam Radford
+Current Version : 00.00.05.29-rc1
+Old Version : 00.00.04.31-rc1
+ 1. Rename megaraid_sas.c to megaraid_sas_base.c.
+ 2. Update GPL headers.
+ 3. Add MSI-X support and 'msix_disable' module parameter.
+ 4. Use lowest memory bar (for SR-IOV VF support).
+ 5. Add struct megasas_instance_temlate changes, and change all code to use
+ new instance entries:
+
+ irqreturn_t (*service_isr )(int irq, void *devp);
+ void (*tasklet)(unsigned long);
+ u32 (*init_adapter)(struct megasas_instance *);
+ u32 (*build_and_issue_cmd) (struct megasas_instance *,
+ struct scsi_cmnd *);
+ void (*issue_dcmd) (struct megasas_instance *instance,
+ struct megasas_cmd *cmd);
+
+ 6. Add code to support MegaRAID 9265/9285 controllers device id (0x5b).
+-------------------------------------------------------------------------------
+1 Release Date : Thur. May 03, 2010 09:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.31-rc1
+3 Older Version : 00.00.04.17.1-rc1
+
+1. Add the Online Controller Reset (OCR) to the Driver.
+ OCR is the new feature for megaraid_sas driver which
+ will allow the fw to do the chip reset which will not
+ affact the OS behavious.
+
+ To add the OCR support, driver need to do:
+ a). reset the controller chips -- Xscale and Gen2 which
+ will change the function calls and add the reset function
+ related to this two chips.
+
+ b). during the reset, driver will store the pending cmds
+ which not returned by FW to driver's pending queue. Driver
+ will re-issue those pending cmds again to FW after the OCR
+ finished.
+
+ c). In driver's timeout routine, driver will report to
+ OS as reset. Also driver's queue routine will block the
+ cmds until the OCR finished.
+
+ d). in Driver's ISR routine, if driver get the FW state as
+ state change, FW in Failure status and FW support online controller
+ reset (OCR), driver will start to do the controller reset.
+
+ e). In driver's IOCTL routine, the application cmds will wait for the
+ OCR to finish, then issue the cmds to FW.
+
+ f). Before driver kill adapter, driver will do last chance of
+ OCR to see if driver can bring back the FW.
+
+2. Add the support update flag to the driver to tell LSI megaraid_sas
+ application which driver will support the device update. So application
+ will not need to do the device update after application add/del the device
+ from the system.
+3. In driver's timeout routine, driver will do three time reset if fw is in
+ failed state. Driver will kill adapter if can't bring back FW after the
+ this three times reset.
+4. Add the input parameter max_sectors to 1MB support to our GEN2 controller.
+ customer can use the input paramenter max_sectors to add 1MB support to GEN2
+ controller.
+
+1 Release Date : Thur. Oct 29, 2009 09:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.17.1-rc1
+3 Older Version : 00.00.04.12
+
+1. Add the pad_0 in mfi frame structure to 0 to fix the
+ context value larger than 32bit value issue.
+
+2. Add the logic drive list to the driver. Driver will
+ keep the logic drive list internal after driver load.
+
+3. driver fixed the device update issue after get the AEN
+ PD delete/ADD, LD add/delete from FW.
+
+1 Release Date : Tues. July 28, 2009 10:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.12
+3 Older Version : 00.00.04.10
+
+1. Change the AEN sys PD update from scsi_scan to
+ scsi_add_device and scsi_remove_device.
+2. Takeoff the debug print-out in aen_polling routine.
+
+1 Release Date : Thur. July 02, 2009 10:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.10
+3 Older Version : 00.00.04.08
+
+1. Add the 3 mins timeout during the controller initialize.
+2. Add the fix for 64bit sense date errors.
+
+1 Release Date : Tues. May 05, 2009 10:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.08
+3 Older Version : 00.00.04.06
+
+1. Add the fix of pending in FW after deleted the logic drives.
+2. Add the fix of deallocating memory after get pdlist.
+
+1 Release Date : Tues. March 26, 2009 10:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.06
+3 Older Version : 00.00.04.04
+
+1. Add the fix of the driver cmd empty fix of the driver cmd empty.
+2. Add the fix of the driver MSM AEN CMD cause the system slow.
+
+1 Release Date : Tues. March 03, 2009 10:12:45 PST 2009 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Bo Yang
+
+2 Current Version : 00.00.04.04
+3 Older Version : 00.00.04.01
+
+1. Add the Tape drive fix to the driver: If the command is for
+ the tape device, set the pthru timeout to the os layer timeout value.
+
+2. Add Poll_wait mechanism to Gen-2 Linux driv.
+ In the aen handler, driver needs to wakeup poll handler similar to
+ the way it raises SIGIO.
+
+3. Add new controller new SAS2 support to the driver.
+
+4. Report the unconfigured PD (system PD) to OS.
+
+5. Add the IEEE SGL support to the driver
+
+6. Reasign the Application cmds to SAS2 controller
+
+1 Release Date : Thur.July. 24 11:41:51 PST 2008 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.04.01
+3 Older Version : 00.00.03.22
+
+1. Add the new controller (0078, 0079) support to the driver
+ Those controllers are LSI's next generatation(gen2) SAS controllers.
+
+1 Release Date : Mon.June. 23 10:12:45 PST 2008 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.22
+3 Older Version : 00.00.03.20
+
+1. Add shutdown DCMD cmd to the shutdown routine to make FW shutdown proper.
+2. Unexpected interrupt occurs in HWR Linux driver, add the dumy readl pci flush will fix this issue.
+
+1 Release Date : Mon. March 10 11:02:31 PDT 2008 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.20-RC1
+3 Older Version : 00.00.03.16
+
+1. Rollback the sense info implementation
+ Sense buffer ptr data type in the ioctl path is reverted back
+ to u32 * as in previous versions of driver.
+
+2. Fixed the driver frame count.
+ When Driver sent wrong frame count to firmware. As this
+ particular command is sent to drive, FW is seeing continuous
+ chip resets and so the command will timeout.
+
+3. Add the new controller(1078DE) support to the driver
+ and Increase the max_wait to 60 from 10 in the controller
+ operational status. With this max_wait increase, driver will
+ make sure the FW will finish the pending cmd for KDUMP case.
+
+1 Release Date : Thur. Nov. 07 16:30:43 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.16
+3 Older Version : 00.00.03.15
+
+1. Increased MFI_POLL_TIMEOUT_SECS to 60 seconds from 10. FW may take
+ a max of 60 seconds to respond to the INIT cmd.
+
+1 Release Date : Fri. Sep. 07 16:30:43 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.15
+3 Older Version : 00.00.03.14
+
+1. Added module parameter "poll_mode_io" to support for "polling"
+ (reduced interrupt operation). In this mode, IO completion
+ interrupts are delayed. At the end of initiating IOs, the
+ driver schedules for cmd completion if there are pending cmds
+ to be completed. A timer-based interrupt has also been added
+ to prevent IO completion processing from being delayed
+ indefinitely in the case that no new IOs are initiated.
+
+1 Release Date : Fri. Sep. 07 16:30:43 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.14
+3 Older Version : 00.00.03.13
+
+1. Setting the max_sectors_per_req based on max SGL supported by the
+ FW. Prior versions calculated this value from controller info
+ (max_sectors_1, max_sectors_2). For certain controllers/FW,
+ this was resulting in a value greater than max SGL supported
+ by the FW. Issue was first reported by users running LUKS+XFS
+ with megaraid_sas. Thanks to RB for providing the logs and
+ duplication steps that helped to get to the root cause of the
+ issue. 2. Increased MFI_POLL_TIMEOUT_SECS to 60 seconds from
+ 10. FW may take a max of 60 seconds to respond to the INIT
+ cmd.
+
+1 Release Date : Fri. June. 15 16:30:43 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.13
+3 Older Version : 00.00.03.12
+
+1. Added the megasas_reset_timer routine to intercept cmd timeout and throttle io.
+
+On Fri, 2007-03-16 at 16:44 -0600, James Bottomley wrote:
+It looks like megaraid_sas at least needs this to throttle its commands
+> as they begin to time out. The code keeps the existing transport
+> template use of eh_timed_out (and allows the transport to override the
+> host if they both have this callback).
+>
+> James
+
+1 Release Date : Sat May. 12 16:30:43 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.12
+3 Older Version : 00.00.03.11
+
+1. When MegaSAS driver receives reset call from OS, driver waits in reset
+routine for max 3 minutes for all pending command completion. Now driver will
+call completion routine every 5 seconds from the reset routine instead of
+waiting for depending on cmd completion from isr path.
+
+1 Release Date : Mon Apr. 30 10:25:52 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.11
+3 Older Version : 00.00.03.09
+
+ 1. Memory Manager for IOCTL removed for 2.6 kernels.
+ pci_alloc_consistent replaced by dma_alloc_coherent. With this
+ change there is no need of memory manager in the driver code
+
+ On Wed, 2007-02-07 at 13:30 -0800, Andrew Morton wrote:
+ > I suspect all this horror is due to stupidity in the DMA API.
+ >
+ > pci_alloc_consistent() just goes and assumes GFP_ATOMIC, whereas
+ > the caller (megasas_mgmt_fw_ioctl) would have been perfectly happy
+ > to use GFP_KERNEL.
+ >
+ > I bet this fixes it
+
+ It does, but the DMA API was expanded to cope with this exact case, so
+ use dma_alloc_coherent() directly in the megaraid code instead. The dev
+ is just &pci_dev->dev.
+
+ James <James.Bottomley@SteelEye.com>
+
+ 3. SYNCHRONIZE_CACHE is not supported by FW and thus blocked by driver.
+ 4. Hibernation support added
+ 5. Performing diskdump while running IO in RHEL 4 was failing. Fixed.
+
+1 Release Date : Fri Feb. 09 14:36:28 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+
+2 Current Version : 00.00.03.09
+3 Older Version : 00.00.03.08
+
+i. Under heavy IO mid-layer prints "DRIVER_TIMEOUT" errors
+
+ The driver now waits for 10 seconds to elapse instead of 5 (as in
+ previous release) to resume IO.
+
+1 Release Date : Mon Feb. 05 11:35:24 PST 2007 -
+ (emaild-id:megaraidlinux@lsi.com)
+ Sumant Patro
+ Bo Yang
+2 Current Version : 00.00.03.08
+3 Older Version : 00.00.03.07
+
+i. Under heavy IO mid-layer prints "DRIVER_TIMEOUT" errors
+
+ Fix: The driver is now throttling IO.
+ Checks added in megasas_queue_command to know if FW is able to
+ process commands within timeout period. If number of retries
+ is 2 or greater,the driver stops sending cmd to FW temporarily. IO is
+ resumed if pending cmd count reduces to 16 or 5 seconds has elapsed
+ from the time cmds were last sent to FW.
+
+ii. FW enables WCE bit in Mode Sense cmd for drives that are configured
+ as WriteBack. The OS may send "SYNCHRONIZE_CACHE" cmd when Logical
+ Disks are exposed with WCE=1. User is advised to enable Write Back
+ mode only when the controller has battery backup. At this time
+ Synhronize cache is not supported by the FW. Driver will short-cycle
+ the cmd and return success without sending down to FW.
+
+1 Release Date : Sun Jan. 14 11:21:32 PDT 2007 -
+ Sumant Patro <Sumant.Patro@lsil.com>/Bo Yang
+2 Current Version : 00.00.03.07
+3 Older Version : 00.00.03.06
+
+i. bios_param entry added in scsi_host_template that returns disk geometry
+ information.
+
+1 Release Date : Fri Oct 20 11:21:32 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>/Bo Yang
+2 Current Version : 00.00.03.06
+3 Older Version : 00.00.03.05
+
+1. Added new memory management module to support the IOCTL memory allocation. For IOCTL we try to allocate from the memory pool created during driver initialization. If mem pool is empty then we allocate at run time.
+2. Added check in megasas_queue_command and dpc/isr routine to see if we have already declared adapter dead
+ (hw_crit_error=1). If hw_crit_error==1, now we donot accept any processing of pending cmds/accept any cmd from OS
+
+1 Release Date : Mon Oct 02 11:21:32 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.03.05
+3 Older Version : 00.00.03.04
+
+i. PCI_DEVICE macro used
+
+ Convert the pci_device_id-table of the megaraid_sas-driver to the PCI_DEVICE-macro, to safe some lines.
+
+ - Henrik Kretzschmar <henne@nachtwindheim.de>
+ii. All compiler warnings removed
+iii. megasas_ctrl_info struct reverted to 3.02 release
+iv. Default value of megasas_dbg_lvl set to 0
+v. Removing in megasas_exit the sysfs entry created for megasas_dbg_lvl
+vi. In megasas_teardown_frame_pool(), cmd->frame was passed instead of
+ cmd->sense to pci_pool_free. Fixed. Bug was pointed out by
+ Eric Sesterhenn
+
+1 Release Date : Wed Sep 13 14:22:51 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.03.04
+3 Older Version : 00.00.03.03
+
+i. Added Reboot notify
+ii. Reduced by 1 max cmds sent to FW from Driver to make the reply_q_sz same
+ as Max Cmds FW can support
+
+1 Release Date : Tue Aug 22 16:33:14 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.03.03
+3 Older Version : 00.00.03.02
+
+i. Send stop adapter to FW & Dump pending FW cmds before declaring adapter dead.
+ New varible added to set dbg level.
+ii. Disable interrupt made as fn pointer as they are different for 1068 / 1078
+iii. Frame count optimization. Main frame can contain 2 SGE for 64 bit SGLs and
+ 3 SGE for 32 bit SGL
+iv. Tasklet added for cmd completion
+v. If FW in operational state before firing INIT, now we send RESET Flag to FW instead of just READY. This is used to do soft reset.
+vi. megasas_ctrl_prop structure updated (based on FW struct)
+vii. Added print : FW now in Ready State during initialization
+
+1 Release Date : Sun Aug 06 22:49:52 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.03.02
+3 Older Version : 00.00.03.01
+
+i. Added FW tranistion state for Hotplug scenario
+
+1 Release Date : Sun May 14 22:49:52 PDT 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.03.01
+3 Older Version : 00.00.02.04
+
+i. Added support for ZCR controller.
+
+ New device id 0x413 added.
+
+ii. Bug fix : Disable controller interrupt before firing INIT cmd to FW.
+
+ Interrupt is enabled after required initialization is over.
+ This is done to ensure that driver is ready to handle interrupts when
+ it is generated by the controller.
+
+ -Sumant Patro <Sumant.Patro@lsil.com>
+
+1 Release Date : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.02.04
+3 Older Version : 00.00.02.04
+
+i. Remove superflous instance_lock
+
+ gets rid of the otherwise superflous instance_lock and avoids an unsave
+ unsynchronized access in the error handler.
+
+ - Christoph Hellwig <hch@lst.de>
+
+
+1 Release Date : Wed Feb 03 14:31:44 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.02.04
+3 Older Version : 00.00.02.04
+
+i. Support for 1078 type (ppc IOP) controller, device id : 0x60 added.
+ During initialization, depending on the device id, the template members
+ are initialized with function pointers specific to the ppc or
+ xscale controllers.
+
+ -Sumant Patro <Sumant.Patro@lsil.com>
+
+1 Release Date : Fri Feb 03 14:16:25 PST 2006 - Sumant Patro
+ <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.02.04
+3 Older Version : 00.00.02.02
+i. Register 16 byte CDB capability with scsi midlayer
+
+ "This patch properly registers the 16 byte command length capability of the
+ megaraid_sas controlled hardware with the scsi midlayer. All megaraid_sas
+ hardware supports 16 byte CDB's."
+
+ -Joshua Giles <joshua_giles@dell.com>
+
+1 Release Date : Mon Jan 23 14:09:01 PST 2006 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.02.02
+3 Older Version : 00.00.02.01
+
+i. New template defined to represent each family of controllers (identified by processor used).
+ The template will have defintions that will be initialised to appropritae values for a specific family of controllers. The template definition has four function pointers. During driver initialisation the function pointers will be set based on the controller family type. This change is done to support new controllers that has different processors and thus different register set.
+
+ -Sumant Patro <Sumant.Patro@lsil.com>
+
+1 Release Date : Mon Dec 19 14:36:26 PST 2005 - Sumant Patro <Sumant.Patro@lsil.com>
+2 Current Version : 00.00.02.00-rc4
+3 Older Version : 00.00.02.01
+
+i. Code reorganized to remove code duplication in megasas_build_cmd.
+
+ "There's a lot of duplicate code megasas_build_cmd. Move that out of the different codepathes and merge the reminder of megasas_build_cmd into megasas_queue_command"
+
+ - Christoph Hellwig <hch@lst.de>
+
+ii. Defined MEGASAS_IOC_FIRMWARE32 for code paths that handles 32 bit applications in 64 bit systems.
+
+ "MEGASAS_IOC_FIRMWARE can't be redefined if CONFIG_COMPAT is set, we need to define a MEGASAS_IOC_FIRMWARE32 define so native binaries continue to work"
+
+ - Christoph Hellwig <hch@lst.de>
diff --git a/Documentation/scsi/ChangeLog.ncr53c8xx b/Documentation/scsi/ChangeLog.ncr53c8xx
new file mode 100644
index 000000000..9288e3d89
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.ncr53c8xx
@@ -0,0 +1,495 @@
+Sat May 12 12:00 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version ncr53c8xx-3.4.3b
+ - Ensure LEDC bit in GPCNTL is cleared when reading the NVRAM.
+ Fix sent by Stig Telfer <stig@api-networks.com>.
+ - Define scsi_set_pci_device() as nil for kernel < 2.4.4.
+
+Mon Feb 12 22:30 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version ncr53c8xx-3.4.3
+ - Call pci_enable_device() as AC wants this to be done.
+ - Get both the BAR cookies actual and PCI BAR values.
+ (see Changelog.sym53c8xx rev. 1.7.3 for details)
+ - Merge changes for linux-2.4 that declare the host template
+ in the driver object also when the driver is statically
+ linked with the kernel.
+
+Sun Sep 24 21:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version ncr53c8xx-3.4.2
+ - See Changelog.sym53c8xx, driver version 1.7.2.
+
+Wed Jul 26 23:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version ncr53c8xx-3.4.1
+ - Provide OpenFirmware path through the proc FS on PPC.
+ - Remove trailing argument #2 from a couple of #undefs.
+
+Sun Jul 09 16:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version ncr53c8xx-3.4.0
+ - Remove the PROFILE C and SCRIPTS code.
+ This facility was not this useful and thus was not longer
+ desirable given the increasing complexity of the driver code.
+ - Merges from FreeBSD sym-1.6.2 driver:
+ * Clarify memory barriers needed by the driver for architectures
+ that implement a weak memory ordering.
+ - General cleanup:
+ Move definitions for barriers and IO/MMIO operations to the
+ sym53c8xx_defs.h header files. They are now shared by the
+ both drivers.
+ Use SCSI_NCR_IOMAPPED instead of NCR_IOMAPPED.
+
+Thu May 11 12:30 2000 Pam Delaney (pam.delaney@lsil.com)
+ * revision 3.3b
+
+Mon Apr 24 12:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2i
+ - Return value 1 (instead of 0) from the driver setup routine.
+ - Let the driver also attach controllers that have been set to
+ OFF in the NVRAM as it did prior to revision 3.2g.
+
+Sat Apr 1 12:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2h
+ - Fix a compilation problem on Alpha introduced in version 3.2g.
+ (`port' changed to `base_io').
+ - Move from `sym' to this driver a tiny change for __sparc__ that
+ applies to cache line size (? Probably from David S Miller).
+ - Make sure no data transfer will happen for Scsi_Cmnd requests
+ that supply SCSI_DATA_NONE direction (this avoids some BUG()
+ statement in the PCI code when a data buffer is also supplied).
+
+Thu Mar 16 9:30 2000 Pam Delaney (pam.delaney@lsil.com)
+ * revision 3.3b-3
+ - Added exclusion for the 53C1010 and 53C1010_66 chips
+ to the driver (change to sym53c8xx_comm.h).
+
+Mon March 6 23:15 2000 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2g
+ - Add the file sym53c8xx_comm.h that collects code that should
+ be shared by sym53c8xx and ncr53c8xx drivers. For now, it is
+ a header file that is only included by the ncr53c8xx driver,
+ but things will be cleaned up later. This code addresses
+ notably:
+ * Chip detection and PCI related initialisations
+ * NVRAM detection and reading
+ * DMA mapping
+ * Boot setup command
+ * And some other ...
+ - Add support for the new dynamic dma mapping kernel interface.
+ Requires Linux-2.3.47 (tested with pre-2.3.47-6).
+ - Get data transfer direction from the scsi command structure
+ (Scsi_Cmnd) when this information is available.
+
+Mon March 6 23:15 2000 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2g
+ - Add the file sym53c8xx_comm.h that collects code that should
+ be shared by sym53c8xx and ncr53c8xx drivers. For now, it is
+ a header file that is only included by the ncr53c8xx driver,
+ but things will be cleaned up later. This code addresses
+ notably:
+ * Chip detection and PCI related initialisations
+ * NVRAM detection and reading
+ * DMA mapping
+ * Boot setup command
+ * And some other ...
+ - Add support for the new dynamic dma mapping kernel interface.
+ Requires Linux-2.3.47 (tested with pre-2.3.47-6).
+ - Get data transfer direction from the scsi command structure
+ (Scsi_Cmnd) when this information is available.
+
+Fri Jan 14 14:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * revision pre-3.3b-1
+ - Merge parallel driver series 3.31 and 3.2e
+
+Tue Jan 11 14:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * revision 3.31
+ - Added support for mounting disks on wide-narrow-wide
+ scsi configurations.
+ - Built off of version 3.30
+
+Mon Jan 10 13:30 2000 Pam Delaney (pam.delaney@lsil.com)
+ * revision 3.30
+ - Added capability to use the integrity checking code
+ in the kernel (optional).
+ - Disabled support for the 53C1010.
+ - Built off of version 3.2c
+
+Sat Jan 8 22:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2e
+ - Add year 2000 copyright.
+ - Display correctly bus signals when bus is detected wrong.
+ - Remove the dead code that broke driver 3.2d.
+
+Mon Dec 6 22:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2d
+ - Change messages written by the driver at initialisation and
+ through the /proc FS (rather cosmetic changes that consist in
+ printing out the PCI bus number and device/function).
+ - Get rid of the old PCI bios interface, but preserve kernel 2.0
+ compatibility from a simple wrapper.
+ - Remove the compilation condition about having to acquire the
+ io_request_lock since it seems to be a definite feature now.:)
+ - proc_dir structure no longer needed for kernel >= 2.3.27.
+ - Change the driver detection code by the sym53c8xx one, modulo
+ some minor changes. The driver can now attach any number of
+ controllers (>40) and does no longer hoger stack space at
+ initialisation.
+ - Definitely disable overlapped PCI arbitration for all dual
+ function chips, since I cannot make sure for what chip revisions
+ it is actually safe.
+ - Add support for the SYM53C1510D.
+ - Update the poor Tekram sync factor table.
+ - Remove the compilation condition about having to acquire the
+ io_request_lock since it seems to be a definite feature now.:)
+ - proc_dir structure no longer needed for kernel >= 2.3.27.
+
+Sat Sep 11 18:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2c
+ - Handle correctly (hopefully) jiffies wrap-around.
+ - Restore the entry used to detect 875 until revision 0xff.
+ (I removed it inadvertently, it seems :) )
+ - Replace __initfunc() which is deprecated stuff by __init which
+ is not yet so. ;-)
+ - Add support of some 'resource handling' for linux-2.3.13.
+ Basically the BARs have been changed to something more complex
+ in the pci_dev structure.
+ - Remove some deprecated code.
+
+Sat May 10 11:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision pre-3.2b-1
+ - Support for the 53C895A by Pamela Delaney <pam.delaney@lsil.com>
+ The 53C895A contains all of the features of the 896 but has only
+ one channel and has a 32 bit PCI bus. It does 64 bit PCI addressing
+ using dual cycle PCI data transfers.
+ - Miscellaneous minor fixes.
+ - Some additions to the README.ncr53c8xx file.
+
+Sun Apr 11 10:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2a
+ - Add 'hostid:#id' boot option. This option allows to change the
+ default SCSI id the driver uses for controllers.
+ - Remove nvram layouts and driver set-up structures from the C source,
+ and use the one defined in sym53c8xx_defs.h file.
+ (shared by both drivers).
+ - Set for now MAX LUNS to 16 (instead of 8).
+
+Thu Mar 11 23:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.2 (8xx-896 driver bundle)
+ - Only define the host template in ncr53c8xx.h and include the
+ sym53c8xx_defs.h file.
+ - Declare static all symbols that do not need to be visible from
+ outside the driver code.
+ - Add 'excl' boot command option that allows to pass to the driver
+ io address of devices not to attach.
+ - Add info() function called from the host template to print
+ driver/host information.
+ - Minor documentation additions.
+
+Sat Mar 6 11:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1h
+ - Fix some oooold bug that hangs the bus if a device rejects a
+ negotiation. Btw, the corresponding stuff also needed some cleanup
+ and thus the change is a bit larger than it could have been.
+ - Still some typo that made compilation fail for 64 bit (trivial fix).
+
+Sun Feb 14:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1g
+ - Deal correctly with 64 bit PCI address registers on Linux 2.2.
+ Pointed out by Leonard Zubkoff.
+ - Allow to tune request_irq() flags from the boot command line using
+ ncr53c8xx=irqm:??, as follows:
+ a) If bit 0x10 is set in irqm, IRQF_SHARED flag is not used.
+ b) If bit 0x20 is set in irqm, IRQF_DISABLED flag is not used.
+ By default the driver uses both IRQF_SHARED and IRQF_DISABLED.
+ Option 'ncr53c8xx=irqm:0x20' may be used when an IRQ is shared by
+ a 53C8XX adapter and a network board.
+ - Tiny misspelling fixed (ABORT instead of ABRT). Was fortunately
+ harmless.
+ - Negotiate SYNC data transfers with CCS devices.
+
+Sat Jan 16 17:30 1999 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1f
+ - Some PCI fix-ups not needed any more for PPC (from Cort).
+ - Cache line size set to 16 DWORDS for Sparc (from DSM).
+ - Waiting list look-up didn't work for the first command of the list.
+ - Remove 2 useless lines of code.
+
+Sun Dec 13 18:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1e
+ - Same work-around as for the 53c876 rev <= 0x15 for 53c896 rev 1:
+ Disable overlapped arbitration. This will not make difference
+ since the chip has on-chip RAM.
+
+Thu Nov 26 22:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1d
+ - The SISL RAID change requires now remap_pci_mem() stuff to be
+ compiled for __i386__ when normal IOs are used.
+ - Minor spelling fixes in doc files.
+
+Sat Nov 21 18:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1c
+ - Ignore chips that are driven by SISL RAID (DAC 960).
+ Change sent by Leonard Zubkoff and slightly reworked.
+ - Still a buglet in the tags initial settings that needed to be fixed.
+ It was not possible to disable TGQ at system startup for devices
+ that claim TGQ support. The driver used at least 2 for the queue
+ depth but did'nt keep track of user settings for tags depth lower
+ than 2.
+
+Wed Nov 11 10:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1b
+ - The driver was unhappy when configured with default_tags > MAX_TAGS
+ Hopefully doubly-fixed.
+ - Update the Configure.help driver section that speaks of TAGS.
+
+Wed Oct 21 21:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.1a
+ - Changes from Eddie Dost for Sparc and Alpha:
+ ioremap/iounmap support for Sparc.
+ pcivtophys changed to bus_dvma_to_phys.
+ - Add the 53c876 description to the chip table. This is only useful
+ for printing the right name of the controller.
+ - DEL-441 Item 2 work-around for the 53c876 rev <= 5 (0x15).
+ - Add additional checking of INQUIRY data:
+ Check INQUIRY data received length is at least 7. Byte 7 of
+ inquiry data contains device features bits and the driver might
+ be confused by garbage. Also check peripheral qualifier.
+ - Cleanup of the SCSI tasks management:
+ Remove the special case for 32 tags. Now the driver only uses the
+ scheme that allows up to 64 tags per LUN.
+ Merge some code from the 896 driver.
+ Use a 1,3,5,...MAXTAGS*2+1 tag numbering. Previous driver could
+ use any tag number from 1 to 253 and some non conformant devices
+ might have problems with large tag numbers.
+ - 'no_sync' changed to 'no_disc' in the README file. This is an old
+ and trivial mistake that seems to demonstrate the README file is
+ not often read. :)
+
+Sun Oct 4 14:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0i
+ - Cosmetic changes for sparc (but not for the driver) that needs
+ __irq_itoa() to be used for printed IRQ value to be understandable.
+ - Some problems with the driver that didn't occur using driver 2.5f
+ were due to a SCSI selection problem triggered by a clearly
+ documented feature that in fact seems not to work: (53C8XX chips
+ are claimed by the manuals to be able to execute SCSI scripts just
+ after abitration while the SCSI core is performing SCSI selection).
+ This optimization is broken and has been removed.
+ - Some broken scsi devices are confused when a negotiation is started
+ on a LUN that does not correspond to a real device. According to
+ SCSI specs, this is a device firmware bug. This has been worked
+ around by only starting negotiation if the LUN has previously be
+ used for at least 1 successful SCSI command.
+ - The 'last message sent' printed out on M_REJECT message reception
+ was read from the SFBR i/o register after the previous message had
+ been sent.
+ This was not correct and affects all previous driver versions and
+ the original FreeBSD one as well. The SCSI scripts has been fixed
+ so that it now provides the right information to the C code.
+
+Sat Jul 18 13:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0g
+ - Preliminary fixes for Big Endian (sent by Eddie C. Dost).
+ Big Endian architectures should work again with the driver.
+ Eddie's patch has been partially applied since current 2.1.109
+ does not have all the Sparc changes of the vger tree.
+ - Use of BITS_PER_LONG instead of (~0UL == 0xffffffffUL) has fixed
+ the problem observed when the driver was compiled using EGCS or
+ PGCC.
+
+Mon Jul 13 20:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0f
+ - Some spelling fixes.
+ - linux/config.h misplaced in ncr53c8xx.h
+ - MODULE_PARM stuff added for linux 2.1.
+ - check INQUIRY response data format is exactly 2.
+ - use BITS_PER_LONG if defined.
+
+Sun Jun 28 12:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0e
+ - Some cleanup, spelling fixes, version checks, documentations
+ changes, etc ...
+
+Sat Jun 20 20:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0c
+ - Add a boot setup option that allows to set up device queue depths
+ at boot-up. This option is very useful since Linux does not
+ allow to change scsi device queue depth once the system has been
+ booted up.
+
+Sun Jun 15 23:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0a
+ - Support for up to 64 TAGS per LUN.
+ - Rewrite the TARGET vs LUN capabilities management.
+ CmdQueue is now handled as a LUN capability as it shall be.
+ This also fixes a bug triggered when disabling tagged command
+ queuing for a device that had this feature enabled.
+ - Remove the ncr_opennings() stuff that was useless under Linux
+ and hard to understand to me.
+ - Add "setverbose" procfs driver command. It allows to tune
+ verbose level after boot-up. Setting this level to zero, for
+ example avoid flooding the syslog file.
+ - Add KERN_XXX to some printk's.
+
+Tue Jun 10 23:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 3.0
+ - Linux config changes for 2.0.34:
+ Remove NVRAM detection config option. This option is now enabled
+ by default but can be disabled by editing the driver header file.
+ Add a PROFILE config option.
+ - Update Configure.help
+ - Add calls to new function mdelay() for milli-seconds delay if
+ kernel version >= 2.1.105.
+ - Replace all printf(s) by printk(s). After all, the ncr53c8xx is
+ a driver for Linux.
+ - Perform auto-sense on COMMAND TERMINATED. Not sure it is useful.
+ - Some other minor changes.
+
+Tue Jun 4 23:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6n
+ - Code cleanup and simplification:
+ Remove kernel 1.2.X and 1.3.X support.
+ Remove the _old_ target capabilities table.
+ Remove the error recovery code that have'nt been really useful.
+ Use a single alignment boundary (CACHE_LINE_SIZE) for data
+ structures.
+ - Several aggressive SCRIPTS optimizations and changes:
+ Reselect SCRIPTS code rewritten.
+ Support for selection/reselection without ATN.
+ And some others.
+ - Miscallaneous changes in the C code:
+ Count actual number of CCB queued to the controller (future use).
+ Lots of other minor changes.
+
+Wed May 13 20:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6m
+ - Problem of missed SCSI bus reset with the 53C895 fixed by
+ Richard Waltham. The 53C895 needs about 650 us for the bus
+ mode to settle. Delays used while resetting the controller
+ and the bus have been adjusted. Thanks Richard!
+ - Some simplification for 64 bit arch done ccb address testing.
+ - Add a check of the MSG_OUT phase after Selection with ATN.
+ - The new tagged queue stuff seems ok, so some informationnal
+ message have been conditionned by verbose >= 3.
+ - Donnot reset if a SBMC interrupt reports the same bus mode.
+ - Print out the whole driver set-up. Some options were missing and
+ the print statement was misplaced for modules.
+ - Ignore a SCSI parity interrupt if the chip is not connected to
+ the SCSI bus.
+
+Sat May 1 16:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6l
+ - Add CCB done queue support for Alpha and perhaps some other
+ architectures.
+ - Add some barriers to enforce memory ordering for x86 and
+ Alpha architectures.
+ - Fix something that looks like an old bug in the nego SIR
+ interrupt code in case of negotiation failure.
+
+Sat Apr 25 21:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6k
+ - Remove all accesses to the on-chip RAM from the C code:
+ Use SCRIPTS to load the on-chip RAM.
+ Use SCRIPTS to repair the start queue on selection timeout.
+ Use the copy of script in main memory to calculate the chip
+ context on phase mismatch.
+ - The above allows now to use the on-chip RAM without requiring
+ to get access to the on-chip RAM from the C code. This makes
+ on-chip RAM useable for linux-1.2.13 and for Linux-Alpha for
+ instance.
+ - Some simplifications and cleanups in the SCRIPTS and C code.
+ - Buglet fixed in parity error recovery SCRIPTS (never tested).
+ - Minor updates in README.ncr53c8xx.
+
+Wed Apr 15 21:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6j
+ - Incorporate changes from linux-2.1.95 ncr53c8xx driver version.
+ - Add SMP support for linux-2.1.95 and above.
+ - Fix a bug when QUEUE FULL is returned and no commands are
+ disconnected. This happens with Atlas I / L912 and may happen
+ with Atlas II / LXY4.
+ - Nail another one on CHECK condition when requeuing the command
+ for auto-sense.
+ - Call scsi_done() for all completed commands after interrupt
+ handling.
+ - Increase the done queue to 24 entries.
+
+Sat Apr 4 20:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6i
+ - CTEST0 is used by the 53C885 for Power Management and
+ priority setting between the 2 functions.
+ Use SDID instead as actual target number. Just have had to
+ overwrite it with SSID on reselection.
+ - Split DATA_IN and DATA_OUT scripts into 2 sub-scripts.
+ 64 segments are moved from on-chip RAM scripts.
+ If more segments, a script in main memory is used for the
+ additional segments.
+ - Since the SCRIPTS processor continues SCRIPTS execution after
+ having won arbitration, do some stuff prior to testing any SCSI
+ phase on reselection. This should have the vertue to process
+ scripts in parallel with the SCSI core performing selection.
+ - Increase the done queue to 12 entries.
+
+Sun Mar 29 12:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6h
+ - Some fixes.
+
+Tue Mar 26 23:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6g
+ - New done queue. 8 entries by default (6 always useable).
+ Can be increased if needed.
+ - Resources management using doubly linked queues.
+ - New auto-sense and QUEUE FULL handling that does not need to
+ stall the NCR queue any more.
+ - New CCB starvation avoiding algorithm.
+ - Prepare CCBs for SCSI commands that cannot be queued, instead of
+ inserting these commands into the waiting list. The waiting list
+ is now only used while resetting and when memory for CCBs is not
+ yet available?
+
+Sun Feb 8 22:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6f
+ - Some fixes in order to really support the 53C895, at least with
+ FAST-20 devices.
+ - Heavy changes in the target/lun resources management to allow
+ the scripts to jump directly to the CCB on reselection instead
+ of walking on the lun CCBs list. Up to 32 tags per lun are now
+ supported without script processor and PCI traffic overhead.
+
+Sun Jan 11 22:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * revision 2.6d
+ - new (different ?) implementation of the start queue:
+ Use a simple CALL to a launch script in the CCB.
+ - implement a minimal done queue (1 entry :-) ).
+ this avoid scanning all CCBs on INT FLY (Only scan all CCBs, on
+ overflow). Hit ratio is better than 99.9 % on my system, so no
+ need to have a larger done queue.
+ - generalization of the restart of CCB on special condition as
+ Abort, QUEUE FULL, CHECK CONDITION.
+ This has been called 'silly scheduler'.
+ - make all the profiling code conditionned by a config option.
+ This spare some PCI traffic and C code when this feature is not
+ needed.
+ - handle more cleanly the situation where direction is unknown.
+ The pointers patching is now performed by the SCRIPTS processor.
+ - remove some useless scripts instructions.
+
+ Ported from driver 2.5 series:
+ ------------------------------
+ - Use FAST-5 instead of SLOW for slow scsi devices according to
+ new SPI-2 draft.
+ - Make some changes in order to accommodate with 875 rev <= 3
+ device errata listing 397. Minor consequences are:
+ . Leave use of PCI Write and Invalidate under user control.
+ Now, by default the driver does not enable PCI MWI and option
+ 'specf:y' is required in order to enable this feature.
+ . Memory Read Line is not enabled for 875 and 875-like chips.
+ . Programmed burst length set to 64 DWORDS (instead of 128).
+ (Note: SYMBIOS uses 32 DWORDS for the SDMS BIOS)
+ - Add 'buschk' boot option.
+ This option enables checking of SCSI BUS data lines after SCSI
+ RESET (set by default). (Submitted by Richard Waltham).
+ - Update the README file.
+ - Dispatch CONDITION MET and RESERVATION CONFLICT scsi status
+ as OK driver status.
+ - Update the README file and the Symbios NVRAM format definition
+ with removable media flags values (available with SDMS 4.09).
+ - Several PCI configuration registers fix-ups for powerpc.
+ (Patch sent by Cort).
diff --git a/Documentation/scsi/ChangeLog.sym53c8xx b/Documentation/scsi/ChangeLog.sym53c8xx
new file mode 100644
index 000000000..c1933707d
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.sym53c8xx
@@ -0,0 +1,593 @@
+Sat May 12 12:00 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.3c
+ - Ensure LEDC bit in GPCNTL is cleared when reading the NVRAM.
+ Fix sent by Stig Telfer <stig@api-networks.com>.
+ - Backport from SYM-2 the work-around that allows to support
+ hardwares that fail PCI parity checking.
+ - Check that we received at least 8 bytes of INQUIRY response
+ for byte 7, that contains device capabilities, to be valid.
+ - Define scsi_set_pci_device() as nil for kernel < 2.4.4.
+ - + A couple of minor changes.
+
+Sat Apr 7 19:30 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.3b
+ - Fix an unaligned LOAD from scripts (was used as dummy read).
+ - In ncr_soft_reset(), only try to ABORT the current operation
+ for chips that support SRUN bit in ISTAT1 and if SCRIPTS are
+ currently running, as 896 and 1010 manuals suggest.
+ - In the CCB abort path, do not assume that the CCB is currently
+ queued to SCRIPTS. This is not always true, notably after a
+ QUEUE FULL status or when using untagged commands.
+
+Sun Mar 4 18:30 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.3a
+ - Fix an issue in the ncr_int_udc() (unexpected disconnect)
+ handling. If the DSA didn't match a CCB, a bad write to
+ memory could happen.
+
+Mon Feb 12 22:30 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.3
+ - Support for hppa.
+ Tiny patch sent to me by Robert Hirst.
+ - Tiny patch for ia64 sent to me by Pamela Delaney.
+
+Tue Feb 6 13:30 2001 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.3-pre1
+ - Call pci_enable_device() as AC wants this to be done.
+ - Get both the BAR cookies used by CPU and actual PCI BAR
+ values used from SCRIPTS. Recent PCI chips are able to
+ access themselves using internal cycles, but they compare
+ BAR values to destination address to make decision.
+ Earlier chips simply use PCI transactions to access IO
+ registers from SCRIPTS.
+ The bus_dvma_to_mem() interface that reverses the actual
+ PCI BAR value from the BAR cookie is now useless.
+ This point had been discussed at the list and the solution
+ got approved by PCI code maintainer (Martin Mares).
+ - Merge changes for linux-2.4 that declare the host template
+ in the driver object also when the driver is statically
+ linked with the kernel.
+ - Increase SCSI message size up to 12 bytes, given that 8
+ bytes was not enough for the PPR message (fix).
+ - Add field 'maxoffs_st' (max offset for ST data transfers).
+ The C1010 supports offset 62 in DT mode but only 31 in
+ ST mode, to 2 different values for the max SCSI offset
+ are needed. Replace the obviously wrong masking of the
+ offset against 0x1f for ST mode by a lowering to
+ maxoffs_st of the SCSI offset in ST mode.
+ - Refine a work-around for the C1010-66. Revision 1 does
+ not requires extra cycles in DT DATA OUT phase.
+ - Add a missing endian-ization (abrt_tbl.addr).
+ - Minor clean-up in the np structure for fields accessed
+ from SCRIPTS that requires special alignments.
+
+Sun Sep 24 21:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.2
+ - Remove the hack for PPC added in previous driver version.
+ - Add FE_DAC feature bit to distinguish between 64 bit PCI
+ addressing (FE_DAC) and 64 bit PCI interface (FE_64BIT).
+ - Get rid of the boot command line "ultra:" argument.
+ This parameter wasn't that clever since we can use "sync:"
+ for Ultra/Ultra2 settings, and for Ultra3 we may want to
+ pass PPR options (for now only DT clocking).
+ - Add FE_VARCLK feature bit that indicates that SCSI clock
+ frequency may vary depending on board design and thus,
+ the driver should try to evaluate the SCSI clock.
+ - Simplify the way the driver determine the SCSI clock:
+ ULTRA3 -> 160 MHz, ULTRA2 -> 80 MHz otherwise 40 MHz.
+ Measure the SCSI clock frequency if FE_VARCLK is set.
+ - Remove FE_CLK80 feature bit that got useless.
+ - Add support for the SYM53C875A (Pamela Delaney).
+
+Wed Jul 26 23:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.1
+ - Provide OpenFirmware path through the proc FS on PPC.
+ - Download of on-chip SRAM using memcpy_toio() doesn't work
+ on PPC. Restore previous method (MEMORY MOVE from SCRIPTS).
+ - Remove trailing argument #2 from a couple of #undefs.
+
+Sun Jul 09 16:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.7.0
+ - Remove the PROFILE C and SCRIPTS code.
+ This facility was not this useful and thus was not longer
+ desirable given the increasing complexity of the driver code.
+ - Merges from FreeBSD sym-1.6.2 driver:
+ * Clarify memory barriers needed by the driver for architectures
+ that implement a weak memory ordering.
+ * Simpler handling of illegal phases and data overrun from
+ SCRIPTS. These errors are now immediately reported to
+ the C code by an interrupt.
+ * Sync the residual handling code with sym-1.6.2 and now
+ report `resid' to user for linux version >= 2.3.99
+ - General cleanup:
+ Move definitions for barriers and IO/MMIO operations to the
+ sym53c8xx_defs.h header files. They are now shared by the
+ both drivers.
+ Remove unused options that claimed to optimize for the 896.
+ If fact, they were not this clever. :)
+ Use SCSI_NCR_IOMAPPED instead of NCR_IOMAPPED.
+ Remove a couple of unused fields from data structures.
+
+Thu May 11 12:40 2000 Pam Delaney (pam.delaney@lsil.com)
+ * version sym53c8xx-1.6b
+ - Merged version.
+
+Mon Apr 24 12:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5m
+ - Return value 1 (instead of 0) from the driver setup routine.
+ - Do not enable PCI DAC cycles. This just broke support for
+ SYM534C896 on sparc64. Problem fixed by David S. Miller.
+
+Fri Apr 14 9:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * version sym53c8xx-1.6b-9
+ - Added 53C1010_66 support.
+ - Small fix to integrity checking code.
+ - Removed requirement for integrity checking if want to run
+ at ultra 3.
+
+Sat Apr 1 12:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5l
+ - Tiny change for __sparc__ appeared in 2.3.99-pre4.1 that
+ applies to cache line size (? Probably from David S Miller).
+ - Make sure no data transfer will happen for Scsi_Cmnd requests
+ that supply SCSI_DATA_NONE direction (this avoids some BUG()
+ statement in the PCI code when a data buffer is also supplied).
+
+Sat Mar 11 12:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.6b-5
+ - Test against expected data transfer direction from SCRIPTS.
+ - Add support for the new dynamic dma mapping kernel interface.
+ Requires Linux-2.3.47 (tested with pre-2.3.47-6).
+ Many thanks to David S. Miller for his preliminary changes
+ that have been useful guidelines.
+ - Get data transfer direction from the scsi command structure
+ (Scsi_Cmnd) with kernels that provide this information.
+
+Mon Mar 6 23:30 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5k
+ - Test against expected data transfer direction from SCRIPTS.
+ - Revert the change in 'ncr_flush_done_cmds()' but unmap the
+ scsi dma buffer prior to queueing the command to our done
+ list.
+ - Miscellaneous (minor) fixes in the code added in driver
+ version 1.5j.
+
+Mon Feb 14 4:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * version sym53c8xx-pre-1.6b-2.
+ - Updated the SCRIPTS error handling of the SWIDE
+ condition - to remove any reads of the sbdl
+ register. Changes needed because the 896 and 1010
+ chips will check parity in some special circumstances.
+ This will cause a parity error interrupt if not in
+ data phase. Changes based on those made in the
+ FreeBSD driver version 1.3.2.
+
+Sun Feb 20 11:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5j
+ - Add support for the new dynamic dma mapping kernel interface.
+ Requires Linux-2.3.47 (tested with pre-2.3.47-6).
+ Many thanks to David S. Miller for his preliminary changes
+ that have been useful guidelines, for having reviewed the
+ code and having tested this driver version on Ultra-Sparc.
+ - 2 tiny bugs fixed in the PCI wrapper that provides support
+ for early kernels without pci device structure.
+ - Get data transfer direction from the scsi command structure
+ (Scsi_Cmnd) with kernels that provide this information.
+ - Fix an old bug that only affected 896 rev. 1 when driver
+ profile support option was set in kernel configuration.
+
+Fri Jan 14 14:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * version sym53c8xx-pre-1.6b-1.
+ - Merge parallel driver series 1.61 and 1.5e
+
+Tue Jan 11 14:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * version sym53c8xx-1.61
+ - Added support for mounting disks on wide-narrow-wide
+ scsi configurations.
+ - Modified offset to be a maximum of 31 in ST mode,
+ 62 in DT mode.
+ - Based off of 1.60
+
+Mon Jan 10 10:00 2000 Pam Delaney (pam.delaney@lsil.com)
+ * version sym53c8xx-1.60
+ - Added capability to use the integrity checking code
+ in the kernel (optional).
+ - Added PPR negotiation.
+ - Added support for 53C1010 Ultra 3 part.
+ - Based off of 1.5f
+
+Sat Jan 8 22:00 2000 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5h
+ - Add year 2000 copyright.
+ - Display correctly bus signals when bus is detected wrong.
+ - Some fix for Sparc from DSM that went directly to kernel tree.
+
+Mon Dec 6 22:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5g
+ - Change messages written by the driver at initialisation and
+ through the /proc FS (rather cosmetic changes that consist in
+ printing out the PCI bus number and PCI device/function).
+ - Ensure the SCRIPTS processor is stopped while calibrating the
+ SCSI clock (the initialisation code has been a bit reworked).
+ Change moved to the FreeBSD sym_hipd driver).
+ - Some fixes in the MODIFY_DP/IGN_RESIDUE code and residual
+ calculation (moved from FreeBSD sym_hipd driver).
+ - Add NVRAM support for Tekram boards that use 24C16 EEPROM.
+ Code moved from the FreeBSD sym_hipd driver, since it has
+ been that one that got this feature first.
+ - Definitely disable overlapped PCI arbitration for all dual
+ function chips, since I cannot make sure for what chip revisions
+ it is actually safe.
+ - Add support for the SYM53C1510D (also for ncr53c8xx).
+ - Fix up properly the PCI latency timer when needed or asked for.
+ - Get rid of the old PCI bios interface, but preserve kernel 2.0
+ compatibility from a simple wrapper.
+ - Update the poor Tekram sync factor table.
+ - Fix in a tiny 'printk' bug that may oops in case of extended
+ errors (unrecovered parity error, data overrun, etc ...)
+ (Sent by Pamela Delaney from LSILOGIC)
+ - Remove the compilation condition about having to acquire the
+ io_request_lock since it seems to be a definite feature now.:)
+ - Change get_pages by GetPages since Linux >= 2.3.27 now wants
+ get_pages to ever be used as a kernel symbol (from 2.3.27).
+ - proc_dir structure no longer needed for kernel >= 2.3.27.
+
+Sun Oct 3 19:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5f
+ - Change the way the driver checks the PCI clock frequency, so
+ that overclocked PCI BUS up to 48 MHz will not be refused.
+ The more the BUS is overclocked, the less the driver will
+ guarantee that its measure of the SCSI clock is correct.
+ - Backport some minor improvements of SCRIPTS from the sym_hipd
+ driver.
+ - Backport the code rewrite of the START QUEUE dequeuing (on
+ bad scsi status received) from the sym_hipd driver.
+
+Sat Sep 11 11:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5e
+ - New linux-2.3.13 __setup scheme support added.
+ - Cleanup of the extended error status handling:
+ Use 1 bit per error type.
+ - Also save the extended error status prior to auto-sense.
+ - Add the FE_DIFF chip feature bit to indicate support of
+ diff probing from GPIO3 (825/825A/876/875).
+ - Remove the quirk handling that has been useless since day one.
+ - Work-around PCI chips being reported twice on some platforms.
+ - Add some redundant PCI reads in order to deal with common
+ bridge misbehaviour regarding posted write flushing.
+ - Add some other conditionnal code for people who have to deal
+ with really broken bridges (they will have to edit a source
+ file to try these options).
+ - Handle correctly (hopefully) jiffies wrap-around.
+ - Restore the entry used to detect 875 until revision 0xff.
+ (I removed it inadvertently, it seems :) )
+ - Replace __initfunc() which is deprecated stuff by __init which
+ is not yet so. ;-)
+ - Rewrite the MESSAGE IN scripts more generic by using a MOVE
+ table indirect. Extended messages of any size are accepted now.
+ (Size is limited to 8 for now, but a constant is just to be
+ increased if necessary)
+ - Fix some bug in the fully untested MDP handling:) and share
+ some code between MDP handling and residual calculation.
+ - Calculate the data transfer residual as the 2's complement
+ integer (A positive value in returned on data overrun, and
+ a negative one on underrun).
+ - Add support of some 'resource handling' for linux-2.3.13.
+ Basically the BARs have been changed to something more complex
+ in the pci_dev structure.
+ - Remove some deprecated code.
+
+Sat Jun 5 11:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5c
+ - Do not negotiate on auto-sense if we are currently using 8 bit
+ async transfer for the target.
+ - Only check for SISL/RAID on i386 platforms.
+ (A problem has been reported on PPC with that code).
+ - On MSG REJECT for a negotiation, the driver attempted to restart
+ the SCRIPT processor when this one was already running.
+
+Sat May 29 12:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5b
+ - Force negotiation prior auto-sense.
+ This ensures that the driver will be able to grab the sense data
+ from a device that has received a BUS DEVICE RESET message from
+ another initiator.
+ - Complete all disconnected CCBs for a logical UNIT if we are told
+ about a UNIT ATTENTION for a RESET condition by this target.
+ - Add the control command 'cleardev' that allows to send a ABORT
+ message to a logical UNIT (for test purpose).
+
+Tue May 25 23:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5a
+ - Add support for task abort and bus device reset SCSI message
+ and implement proper synchonisation with SCRIPTS to handle
+ correctly task abortion without races.
+ - Send an ABORT message (if untagged) or ABORT TAG message (if tagged)
+ when the driver is told to abort a command that is disconnected and
+ complete the command with appropriate error.
+ If the aborted command is not yet started, remove it from the start
+ queue and complete it with error.
+ - Add the control command 'resetdev' that allows to send a BUS
+ DEVICE RESET message to a target (for test purpose).
+ - Clean-up some unused or useless code.
+
+Fri May 21 23:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.5
+ - Add support for CHMOV with Wide controllers.
+ - Handling of the SWIDE (low byte residue at the end of a CHMOV
+ in DATA IN phase with WIDE transfer when the byte count gets odd).
+ - Handling of the IGNORE WIDE RESIDUE message.
+ Handled from SCRIPTS as possible with some optimizations when both
+ a wide device and the controller are odd at the same time (SWIDE
+ present and IGNORE WIDE RESIDUE message on the BUS at the same time).
+ - Check against data OVERRUN/UNDERRUN condition at the end of a data
+ transfer, whatever a SWIDE is present (OVERRUN in DATA IN phase)
+ or the SODL is full (UNDERRUN in DATA out phase).
+ - Handling of the MODIFY DATA POINTER message.
+ This one cannot be handled from SCRIPTS, but hopefully it will not
+ happen very often. :)
+ - Large rewrite of the SCSI MESSAGE handling.
+
+Sun May 9 11:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.4
+ - Support for IMMEDIATE ARBITRATION.
+ See the README file for detailed information about this feature.
+ Requires both a compile option and a boot option.
+ - Minor SCRIPTS optimization in reselection pattern for LUN 0.
+ - Simpler algorithm to deal with SCSI command starvation.
+ Just use 2 tag counters in flip/flop and switch to the other
+ one every 3 seconds.
+ - Do some work in SCRIPTS after the SELECT instruction and prior
+ to testing for a PHASE. SYMBIOS say this feature is working fine.
+ (Btw, only problems with Toshiba 3401B had been reported).
+ - Measure the PCI clock speed and do not attach controllers if
+ result is greater than 37 MHz. Since the precision of the
+ algorithm (from Stefan Esser) is better than 2%, this should
+ be fine.
+ - Fix the misdetection of SYM53C875E (was detected as a 876).
+ - Fix the misdetection of SYM53C810 not A (was detected as a 810A).
+ - Support for up to 256 TAGS per LUN (CMD_PER_LUN).
+ Currently limited to 255 due to Linux limitation. :)
+ - Support for up to 508 active commands (CAN_QUEUE).
+ - Support for the 53C895A by Pamela Delaney <pam.delaney@lsil.com>
+ The 53C895A contains all of the features of the 896 but has only
+ one channel and has a 32 bit PCI bus. It does 64 bit PCI addressing
+ using dual cycle PCI data transfers.
+ - Miscellaneous minor fixes.
+ - Some additions to the README.ncr53c8xx file.
+
+Tue Apr 15 10:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.3e
+ - Support for any number of LUNs (64) (SPI2-compliant).
+ (Btw, this may only be ever useful under linux-2.2 ;-))
+
+Sun Apr 11 10:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.3d
+ - Add 'hostid:#id' boot option. This option allows to change the
+ default SCSI id the driver uses for controllers.
+ - Make SCRIPTS not use self-mastering for PCI.
+ There were still 2 places the driver used this feature of the
+ 53C8XX family.
+ - Move some data structures (nvram layouts and driver set-up) to
+ the sym53c8xx_defs.h file. So, the both drivers will share them.
+ - Set MAX LUNS to 16 (instead of 8).
+
+Sat Mar 20 21:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.3b
+ - Add support for NCR PQS PDS.
+ James Bottomley <James.Bottomley@columbiasc.ncr.com>
+ - Allow value 0 for host ID.
+ - Support more than 8 controllers (> 40 in fact :-) )
+ - Add 'excl=#ioaddr' boot option: exclude controller.
+ (Version 1.3a driver)
+
+Thu Mar 11 23:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.3 (8xx-896 driver bundle)
+ - Equivalent changes as ncr53c8xx-3.2 due to the driver bundle.
+ (See Changelog.ncr53c8xx)
+ - Do a normal soft reset as first chip reset, since aborting current
+ operation may raise an interrupt we are not able to handle since
+ the interrupt handler is not yet established.
+
+Sat Mar 6 11:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.2b
+ - Fix some oooold bug that hangs the bus if a device rejects a
+ negotiation. Btw, the corresponding stuff also needed some cleanup
+ and thus the change is a bit larger than it could have been.
+ - Still some typo that made compilation fail for 64 bit (trivial fix).
+
+Sun Feb 21 20:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.2a
+ - The rewrite of the interrupt handling broke the SBMC interrupt
+ handling due to a 1 bit mask tiny error. Hopefully fixed.
+ - If INQUIRY came from a scatter list, the driver looked into
+ the scatterlist instead of the data.:) Since this should never
+ happen, we just discard the data if use_sg is not zero.
+
+Fri Feb 12 23:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.2
+ - Major rewrite of the interrupt handling and recovery stuff for
+ the support of non compliant SCSI removal, insertion and all
+ kinds of screw-up that may happen on the SCSI BUS.
+ Hopefully, the driver is now unbreakable or may-be, it is just
+ quite brocken. :-)
+ Many thanks to Johnson Russel (Symbios) for having responded to
+ my questions and for his interesting advices and comments about
+ support of SCSI hot-plug.
+ - Add 'recovery' option to driver set-up.
+ - Negotiate SYNC data transfers with CCS devices.
+ - Deal correctly with 64 bit PCI address registers on Linux 2.2.
+ Pointed out by Leonard Zubkoff.
+
+Sun Jan 31 18:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.1a
+ - Some 896 chip revisions (all for now :-)), may hang-up if the
+ soft reset bit is set at the wrong time while SCRIPTS are running.
+ We need to first abort the current SCRIPTS operation prior to
+ resetting the chip. This fix has been sent to me by SYMBIOS/LSI
+ and I just translated it into ncr53c8xx syntax.
+ Must be considered 100 % trustable, unless I did some mistake
+ when translating it. :-)
+
+Sun Jan 24 18:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.1
+ - Major rewrite of the SCSI parity error handling.
+ The informations contained in the data manuals are incomplete about
+ this feature.
+ I asked SYMBIOS about and got in reply the explanations that are
+ _indeed_ missing in the data manuals.
+ - Allow to tune request_irq() flags from the boot command line using
+ ncr53c8xx=irqm:??, as follows:
+ a) If bit 0x10 is set in irqm, SA_SHIRQ flag is not used.
+ b) If bit 0x20 is set in irqm, SA_INTERRUPT flag is not used.
+ By default the driver uses both SA_SHIRQ and SA_INTERRUPT.
+ Option 'ncr53c8xx=irqm:0x20' may be used when an IRQ is shared by
+ a 53C8XX adapter and a network board.
+ - Fix for 64 bit PCI address register calculation. (Lance Robinson)
+ - Fix for big-endian in phase mismatch handling. (Michal Jaegermann)
+
+Fri Jan 1 20:00 1999 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.0a
+ - Waiting list look-up didn't work for the first command of the list.
+ Hopefully fixed, but tested on paper only. ;)
+ - Remove the most part of PPC specific code for Linux-2.2.
+ Thanks to Cort.
+ - Some other minors changes.
+
+Sat Dec 19 21:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version sym53c8xx-1.0
+ - Define some new IO registers for the 896 (istat1, mbox0, mbox1)
+ - Revamp slightly the Symbios NVRAM lay-out based on the excerpt of
+ the header file I received from Symbios.
+ - Check the PCI bus number for the boot order (Using a fast
+ PCI controller behing a PCI-PCI bridge seems sub-optimal).
+ - Disable overlapped PCI arbitration for the 896 revision 1.
+ - Reduce a bit the number of IO register reads for phase mismatch
+ by reading DWORDS at a time instead of BYTES.
+
+Thu Dec 3 24:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.18
+ - I received this afternoon a 896 from SYMBIOS and started testing
+ the driver with this beast. After having fixed 3 buglets, it worked
+ with all features enabled including the phase mismatch handling
+ from SCRIPTS. Since this feature is not yet tested enough, the
+ boot option 'ncr53c8xx=specf:1' is still required to enable the
+ driver to handle PM from SCRIPTS.
+
+Sun Nov 29 18:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.17
+ - The SISL RAID change requires now remap_pci_mem() stuff to be
+ compiled for __i386__ when normal IOs are used.
+ - The PCI memory read from SCRIPTS that should ensure ordering
+ was in fact misplaced. BTW, this may explain why broken PCI
+ device drivers regarding ordering are working so well. ;-)
+ - Rewrite ncr53c8xx_setup (boot command line options) since the
+ binary code was a bit too bloated in my opinion.
+ - Make the code simpler in the wakeup_done routine.
+
+Tue Nov 24 23:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.16
+ - Add SCSI_NCR_OPTIMIZE_896_1 compile option and 'optim' boot option.
+ When set, the driver unconditionnaly assumes that the interrupt
+ handler is called for command completion, then clears INTF, scans
+ the done queue and returns if some completed CCB is found. If no
+ completed CCB are found, interrupt handling will proceed normally.
+ With a 896 that handles MA from SCRIPTS, this can be a great win,
+ since the driver will never performs PCI read transactions, but
+ only PCI write transactions that may be posted.
+ If the driver haven't to also raise the SIGP this would be perfect.
+ Even with this penalty, I think that this will work great.
+ Obviously this optimization makes sense only if the IRQ is not
+ shared with another device.
+ - Still a buglet in the tags initial settings that needed to be fixed.
+ It was not possible to disable TGQ at system startup for devices
+ that claim TGQ support. The driver used at least 2 for the queue
+ depth but did'nt keep track of user settings for tags depth lower
+ than 2.
+
+Thu Nov 19 23:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.15
+ - Add support for hardware LED control of the 896.
+ - Ignore chips that are driven by SISL RAID (DAC 960).
+ Change sent by Leonard Zubkoff and slightly reworked.
+ - Prevent 810A rev 11 and 860 rev 1 from using cache line based
+ transactions since those early chip revisions may use such on
+ LOAD/STORE instructions (work-around).
+ - Remove some useless and bloat code from the pci init stuff.
+ - Do not use the readX()/writeX() kernel functions for __i386__,
+ since they perform useless masking operations in order to deal
+ with broken driver in 2.1.X kernel.
+
+Wed Nov 11 10:00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.14
+ - The driver was unhappy when configured with default_tags > MAX_TAGS
+ Hopefully doubly-fixed.
+ - Set PCI_PARITY in PCI_COMMAND register in not set (PCI fix-up).
+ - Print out some message if phase mismatch is handled from SCRIPTS.
+
+Sun Nov 1 14H00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.13
+ - Some rewrite of the device detection code. This code had been
+ patched too much and needed to be face-lifted a bit.
+ Remove all platform dependent fix-ups that was not needed or
+ conflicted with some other driver code as work-arounds.
+ Reread the NVRAM before the calling of ncr_attach(). This spares
+ stack space and so allows to handle more boards.
+ Handle 64 bit base addresses under linux-2.0.X.
+ Set MASTER bit in PCI COMMAND register if not set.
+
+Wed Oct 30 22H00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.12
+ - Damned! I just broke the driver for Alpha by leaving a stale
+ instruction in the source code. Hopefully fixed.
+ - Do not set PFEN when it is useless. Doing so we are sure that BOF
+ will be active, since the manual appears to be very unclear on what
+ feature is actually used by the chip when both PFEN and BOF are
+ set.
+
+Sat Oct 24 16H00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.11
+ - LOAD/STORE instructions were miscompiled for register offsets
+ beyond 0x7f. This broke accesses to 896' new registers.
+ - Disable by default Phase Mismatch handling from SCRIPTS, since
+ current 896 rev.1 seems not to operate safely with the driver
+ when this feature is enabled (and above LOAD/STORE fix applied).
+ I will change the default to 'enabled' when this problem will be
+ solved.
+ Using boot option 'ncr53c8xx=specf:1' enables this feature.
+ - Implement a work-around (DEL 472 - ITEM 5) that should allow the
+ driver to safely enable hardware phase mismatch with 896 rev. 1.
+
+Tue Oct 20 22H00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.10
+ - Add the 53c876 description to the chip table. This is only useful
+ for printing the right name of the controller.
+ - Add additional checking of INQUIRY data:
+ Check INQUIRY data received length is at least 7. Byte 7 of
+ inquiry data contains device features bits and the driver might
+ be confused by garbage. Also check peripheral qualifier.
+ - Use a 1,3,5,...MAXTAGS*2+1 tag numbering. Previous driver could
+ use any tag number from 1 to 253 and some non conformant devices
+ might have problems with large tag numbers.
+ - Use NAME53C and NAME53C8XX for chip name prefix chip family name.
+ Just give a try using "sym53c" and "sym53c8xx" instead of "ncr53c"
+ and "ncr53c8xx". :-)
+
+Sun Oct 11 17H00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.9
+ - DEL-441 Item 2 work-around for the 53c876 rev <= 5 (0x15).
+ - Break ncr_scatter() into 2 functions in order to guarantee best
+ possible code optimization for the case we get a scatter list.
+ - Add the code intended to support up to 1 tera-byte for 64 bit systems.
+ It is probably too early, but I wanted to complete the thing.
+
+Sat Oct 3 14H00 1998 Gerard Roudier (groudier@club-internet.fr)
+ * version pre-sym53c8xx-0.8
+ - Do some testing with io_mapped and fix what needed to be so.
+ - Wait for SCSI selection to complete or time-out immediately after
+ the chip won arbitration, since executing SCRIPTS while the SCSI
+ core is performing SCSI selection breaks the selection procedure
+ at least for some chip revisions.
+ - Interrupt the SCRIPTS if a device does not go to MSG OUT phase after
+ having been selected with ATN. Such a situation is not recoverable,
+ better to fail when we are stuck.
diff --git a/Documentation/scsi/ChangeLog.sym53c8xx_2 b/Documentation/scsi/ChangeLog.sym53c8xx_2
new file mode 100644
index 000000000..18a5d712a
--- /dev/null
+++ b/Documentation/scsi/ChangeLog.sym53c8xx_2
@@ -0,0 +1,144 @@
+Sat Dec 30 21:30 2000 Gerard Roudier
+ * version sym-2.1.0-20001230
+ - Initial release of SYM-2.
+
+Mon Jan 08 21:30 2001 Gerard Roudier
+ * version sym-2.1.1-20010108
+ - Change a couple of defines containing ncr or NCR by their
+ equivalent containing sym or SYM instead.
+
+Sun Jan 14 22:30 2001 Gerard Roudier
+ * version sym-2.1.2-20010114
+ - Fix a couple of printfs:
+ * Add the target number to the display of transfer parameters.
+ * Make the display of TCQ and queue depth clearer.
+
+Wed Jan 17 23:30 2001 Gerard Roudier
+ * version sym-2.1.3-20010117
+ - Wrong residual values were returned in some situations.
+ This broke cdrecord with linux-2.4.0, for example.
+
+Sat Jan 20 18:00 2001 Gerard Roudier
+ * version sym-2.1.4-20010120
+ - Add year 2001 to Copyright.
+ - A tiny bug in the dma memory freeing path has been fixed.
+ (Driver unload failed with a bad address reference).
+
+Wed Jan 24 21:00 2001 Gerard Roudier
+ * version sym-2.1.5-20010124
+ - Make the driver work under Linux-2.4.x when statically linked
+ with the kernel.
+ - Check against memory allocation failure for SCRIPTZ and add the
+ missing free of this memory on instance detach.
+ - Check against GPIO3 pulled low for HVD controllers (driver did
+ just the opposite).
+ Misdetection of BUS mode was triggered on module reload only,
+ since BIOS settings were trusted instead on first load.
+
+Wed Feb 7 21:00 2001 Gerard Roudier
+ * version sym-2.1.6-20010207
+ - Call pci_enable_device() as wished by kernel maintainers.
+ - Change the sym_queue_scsiio() interface.
+ This is intended to simplify portability.
+ - Move the code intended to deal with the dowloading of SCRIPTS
+ from SCRIPTS :) in the patch method (was wrongly placed in
+ the SCRIPTS setup method).
+ - Add a missing cpu_to_scr() (np->abort_tbl.addr)
+ - Remove a wrong cpu_to_scr() (np->targtbl_ba)
+ - Cleanup a bit the PPR failure recovery code.
+
+Sat Mar 3 21:00 2001 Gerard Roudier
+ - Add option SYM_OPT_ANNOUNCE_TRANSFER_RATE and move the
+ corresponding code to file sym_misc.c.
+ Also move the code that sniffes INQUIRY to sym_misc.c.
+ This allows to share the corresponding code with NetBSD
+ without polluating the core driver source (sym_hipd.c).
+ - Add optionnal code that handles IO timeouts from the driver.
+ (not used under Linux, but required for NetBSD)
+ - Donnot assume any longer that PAGE_SHIFT and PAGE_SIZE are
+ defined at compile time, as at least NetBSD uses variables
+ in memory for that.
+ - Refine a work-around for the C1010-33 that consists in
+ disabling internal LOAD/STORE. Was applied up to revision 1.
+ Is now only applied to revision 0.
+ - Some code reorganisations due to code moves between files.
+
+Tues Apr 10 21:00 2001 Gerard Roudier
+ * version sym-2.1.9-20010412
+ - Reset 53C896 and 53C1010 chip according to the manual.
+ (i.e.: set the ABRT bit in ISTAT if SCRIPTS are running)
+ - Set #LUN in request sense only if scsi version <= 2 and
+ #LUN <= 7.
+ - Set busy_itl in LCB to 1 if the LCB is allocated and a
+ SCSI command is active. This is a simplification.
+ - In sym_hcb_free(), do not scan the free_ccbq if no CCBs
+ has been allocated. This fixes a panic if attach failed.
+ - Add DT/ST (double/simple transition) in the transfer
+ negotiation announce.
+ - Forces the max number of tasks per LUN to at least 64.
+ - Use pci_set_dma_mask() for linux-2.4.3 and above.
+ - A couple of comments fixes.
+
+Wed May 22:00 2001 Gerard Roudier
+ * version sym-2.1.10-20010509
+ - Mask GPCNTL against 0x1c (was 0xfc) for the reading of the NVRAM.
+ This ensure LEDC bit will not be set on 896 and later chips.
+ Fix sent by Chip Salzenberg <chip@perlsupport.com>.
+ - Define the number of PQS BUSes supported.
+ Fix sent by Stig Telfer <stig@api-networks.com>
+ - Miscellaneous common code rearrangements due to NetBSD accel
+ ioctl support, without impact on Linux (hopefully).
+
+Mon July 2 12:00 2001 Gerard Roudier
+ * version sym-2.1.11-20010702
+ - Add Tekram 390 U2B/U2W SCSI LED handling.
+ Submitted by Chip Salzenberg <chip@valinux.com>
+ - Add call to scsi_set_pci_device() for kernels >= 2.4.4.
+ - Check pci dma mapping failures and complete the IO with some
+ error when such mapping fails.
+ - Fill in instance->max_cmd_len for kernels > 2.4.0.
+ - A couple of tiny fixes ...
+
+Sun Sep 9 18:00 2001 Gerard Roudier
+ * version sym-2.1.12-20010909
+ - Change my email address.
+ - Add infrastructure for the forthcoming 64 bit DMA addressing support.
+ (Based on PCI 64 bit patch from David S. Miller)
+ - Donnot use anymore vm_offset_t type.
+
+Sat Sep 15 20:00 2001 Gerard Roudier
+ * version sym-2.1.13-20010916
+ - Add support for 64 bit DMA addressing using segment registers.
+ 16 registers for up to 4 GB x 16 -> 64 GB.
+
+Sat Sep 22 12:00 2001 Gerard Roudier
+ * version sym-2.1.14-20010922
+ - Complete rewrite of the eh handling. The driver is now using a
+ semaphore in order to behave synchronously as required by the eh
+ threads. A timer is also used to prevent from waiting indefinitely.
+
+Sun Sep 30 17:00 2001 Gerard Roudier
+ * version sym-2.1.15-20010930
+ - Include <linux/module.h> unconditionnaly as expected by latest
+ kernels.
+ - Use del_timer_sync() for recent kernels to kill the driver timer
+ on module release.
+
+Sun Oct 28 15:00 2001 Gerard Roudier
+ * version sym-2.1.16-20011028
+ - Slightly simplify driver configuration.
+ - Prepare a new patch against linux-2.4.13.
+
+Sat Nov 17 10:00 2001 Gerard Roudier
+ * version sym-2.1.17
+ - Fix a couple of gcc/gcc3 warnings.
+ - Allocate separately from the HCB the array for CCBs hashed by DSA.
+ All driver memory allocations are now not greater than 1 PAGE
+ even on PPC64 / 4KB PAGE surprising setup.
+
+Sat Dec 01 18:00 2001 Gerard Roudier
+ * version sym-2.1.17a
+ - Use u_long instead of U32 for the IO base cookie. This is more
+ consistent with what archs are expecting.
+ - Use MMIO per default for Power PC instead of some fake normal IO,
+ as Paul Mackerras stated that MMIO works fine now on this arch.
diff --git a/Documentation/scsi/FlashPoint.txt b/Documentation/scsi/FlashPoint.txt
new file mode 100644
index 000000000..5b5f29cb9
--- /dev/null
+++ b/Documentation/scsi/FlashPoint.txt
@@ -0,0 +1,163 @@
+The BusLogic FlashPoint SCSI Host Adapters are now fully supported on Linux.
+The upgrade program described below has been officially terminated effective
+31 March 1997 since it is no longer needed.
+
+
+
+ MYLEX INTRODUCES LINUX OPERATING SYSTEM SUPPORT FOR ITS
+ BUSLOGIC FLASHPOINT LINE OF SCSI HOST ADAPTERS
+
+
+FREMONT, CA, -- October 8, 1996 -- Mylex Corporation has expanded Linux
+operating system support to its BusLogic brand of FlashPoint Ultra SCSI
+host adapters. All of BusLogic's other SCSI host adapters, including the
+MultiMaster line, currently support the Linux operating system. Linux
+drivers and information will be available on October 15th at
+http://sourceforge.net/projects/dandelion/.
+
+"Mylex is committed to supporting the Linux community," says Peter Shambora,
+vice president of marketing for Mylex. "We have supported Linux driver
+development and provided technical support for our host adapters for several
+years, and are pleased to now make our FlashPoint products available to this
+user base."
+
+The Linux Operating System
+
+Linux is a freely-distributed implementation of UNIX for Intel x86, Sun
+SPARC, SGI MIPS, Motorola 68k, Digital Alpha AXP and Motorola PowerPC
+machines. It supports a wide range of software, including the X Window
+System, Emacs, and TCP/IP networking. Further information is available at
+http://www.linux.org and http://www.ssc.com/.
+
+FlashPoint Host Adapters
+
+The FlashPoint family of Ultra SCSI host adapters, designed for workstation
+and file server environments, are available in narrow, wide, dual channel,
+and dual channel wide versions. These adapters feature SeqEngine
+automation technology, which minimizes SCSI command overhead and reduces
+the number of interrupts generated to the CPU.
+
+About Mylex
+
+Mylex Corporation (NASDAQ/NM SYMBOL: MYLX), founded in 1983, is a leading
+producer of RAID technology and network management products. The company
+produces high performance disk array (RAID) controllers, and complementary
+computer products for network servers, mass storage systems, workstations
+and system boards. Through its wide range of RAID controllers and its
+BusLogic line of Ultra SCSI host adapter products, Mylex provides enabling
+intelligent I/O technologies that increase network management control,
+enhance CPU utilization, optimize I/O performance, and ensure data security
+and availability. Products are sold globally through a network of OEMs,
+major distributors, VARs, and system integrators. Mylex Corporation is
+headquartered at 34551 Ardenwood Blvd., Fremont, CA.
+
+ ####
+
+Contact:
+
+Peter Shambora
+Vice President of Marketing
+Mylex Corp.
+510/796-6100
+peters@mylex.com
+
+ ANNOUNCEMENT
+ BusLogic FlashPoint LT/BT-948 Upgrade Program
+ 1 February 1996
+
+ ADDITIONAL ANNOUNCEMENT
+ BusLogic FlashPoint LW/BT-958 Upgrade Program
+ 14 June 1996
+
+Ever since its introduction last October, the BusLogic FlashPoint LT has
+been problematic for members of the Linux community, in that no Linux
+drivers have been available for this new Ultra SCSI product. Despite its
+officially being positioned as a desktop workstation product, and not being
+particularly well suited for a high performance multitasking operating
+system like Linux, the FlashPoint LT has been touted by computer system
+vendors as the latest thing, and has been sold even on many of their high
+end systems, to the exclusion of the older MultiMaster products. This has
+caused grief for many people who inadvertently purchased a system expecting
+that all BusLogic SCSI Host Adapters were supported by Linux, only to
+discover that the FlashPoint was not supported and would not be for quite
+some time, if ever.
+
+After this problem was identified, BusLogic contacted its major OEM
+customers to make sure the BT-946C/956C MultiMaster cards would still be
+made available, and that Linux users who mistakenly ordered systems with
+the FlashPoint would be able to upgrade to the BT-946C. While this helped
+many purchasers of new systems, it was only a partial solution to the
+overall problem of FlashPoint support for Linux users. It did nothing to
+assist the people who initially purchased a FlashPoint for a supported
+operating system and then later decided to run Linux, or those who had
+ended up with a FlashPoint LT, believing it was supported, and were unable
+to return it.
+
+In the middle of December, I asked to meet with BusLogic's senior
+management to discuss the issues related to Linux and free software support
+for the FlashPoint. Rumors of varying accuracy had been circulating
+publicly about BusLogic's attitude toward the Linux community, and I felt
+it was best that these issues be addressed directly. I sent an email
+message after 11pm one evening, and the meeting took place the next
+afternoon. Unfortunately, corporate wheels sometimes grind slowly,
+especially when a company is being acquired, and so it's taken until now
+before the details were completely determined and a public statement could
+be made.
+
+BusLogic is not prepared at this time to release the information necessary
+for third parties to write drivers for the FlashPoint. The only existing
+FlashPoint drivers have been written directly by BusLogic Engineering, and
+there is no FlashPoint documentation sufficiently detailed to allow outside
+developers to write a driver without substantial assistance. While there
+are people at BusLogic who would rather not release the details of the
+FlashPoint architecture at all, that debate has not yet been settled either
+way. In any event, even if documentation were available today it would
+take quite a while for a usable driver to be written, especially since I'm
+not convinced that the effort required would be worthwhile.
+
+However, BusLogic does remain committed to providing a high performance
+SCSI solution for the Linux community, and does not want to see anyone left
+unable to run Linux because they have a Flashpoint LT. Therefore, BusLogic
+has put in place a direct upgrade program to allow any Linux user worldwide
+to trade in their FlashPoint LT for the new BT-948 MultiMaster PCI Ultra
+SCSI Host Adapter. The BT-948 is the Ultra SCSI successor to the BT-946C
+and has all the best features of both the BT-946C and FlashPoint LT,
+including smart termination and a flash PROM for easy firmware updates, and
+is of course compatible with the present Linux driver. The price for this
+upgrade has been set at US $45 plus shipping and handling, and the upgrade
+program will be administered through BusLogic Technical Support, which can
+be reached by electronic mail at techsup@buslogic.com, by Voice at +1 408
+654-0760, or by FAX at +1 408 492-1542.
+
+As of 14 June 1996, the original BusLogic FlashPoint LT to BT-948 upgrade
+program has now been extended to encompass the FlashPoint LW Wide Ultra
+SCSI Host Adapter. Any Linux user worldwide may trade in their FlashPoint
+LW (BT-950) for a BT-958 MultiMaster PCI Ultra SCSI Host Adapter. The
+price for this upgrade has been set at US $65 plus shipping and handling.
+
+I was a beta test site for the BT-948/958, and versions 1.2.1 and 1.3.1 of
+my BusLogic driver already included latent support for the BT-948/958.
+Additional cosmetic support for the Ultra SCSI MultiMaster cards was added
+subsequent releases. As a result of this cooperative testing process,
+several firmware bugs were found and corrected. My heavily loaded Linux
+test system provided an ideal environment for testing error recovery
+processes that are much more rarely exercised in production systems, but
+are crucial to overall system stability. It was especially convenient
+being able to work directly with their firmware engineer in demonstrating
+the problems under control of the firmware debugging environment; things
+sure have come a long way since the last time I worked on firmware for an
+embedded system. I am presently working on some performance testing and
+expect to have some data to report in the not too distant future.
+
+BusLogic asked me to send this announcement since a large percentage of the
+questions regarding support for the FlashPoint have either been sent to me
+directly via email, or have appeared in the Linux newsgroups in which I
+participate. To summarize, BusLogic is offering Linux users an upgrade
+from the unsupported FlashPoint LT (BT-930) to the supported BT-948 for US
+$45 plus shipping and handling, or from the unsupported FlashPoint LW
+(BT-950) to the supported BT-958 for $65 plus shipping and handling.
+Contact BusLogic Technical Support at techsup@buslogic.com or +1 408
+654-0760 to take advantage of their offer.
+
+ Leonard N. Zubkoff
+ lnz@dandelion.com
diff --git a/Documentation/scsi/LICENSE.FlashPoint b/Documentation/scsi/LICENSE.FlashPoint
new file mode 100644
index 000000000..ffd0fe226
--- /dev/null
+++ b/Documentation/scsi/LICENSE.FlashPoint
@@ -0,0 +1,60 @@
+ FlashPoint Driver Developer's Kit
+ Version 1.0
+
+ Copyright 1995-1996 by Mylex Corporation
+ All Rights Reserved
+
+This program is free software; you may redistribute and/or modify it under
+the terms of either:
+
+ a) the GNU General Public License as published by the Free Software
+ Foundation; either version 2, or (at your option) any later version,
+
+ or
+
+ b) the "BSD-style License" included below.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY, without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See either the GNU General Public
+License or the BSD-style License below for more details.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+675 Mass Ave, Cambridge, MA 02139, USA.
+
+The BSD-style License is as follows:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain this LICENSE.FlashPoint
+ file, without modification, this list of conditions, and the following
+ disclaimer. The following copyright notice must appear immediately at
+ the beginning of all source files:
+
+ Copyright 1995-1996 by Mylex Corporation. All Rights Reserved
+
+ This file is available under both the GNU General Public License
+ and a BSD-style copyright; see LICENSE.FlashPoint for details.
+
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+3. The name of Mylex Corporation may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+THIS SOFTWARE IS PROVIDED BY MYLEX CORP. ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
+NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
diff --git a/Documentation/scsi/LICENSE.qla2xxx b/Documentation/scsi/LICENSE.qla2xxx
new file mode 100644
index 000000000..52f0b4359
--- /dev/null
+++ b/Documentation/scsi/LICENSE.qla2xxx
@@ -0,0 +1,290 @@
+Copyright (c) 2003-2014 QLogic Corporation
+QLogic Linux FC-FCoE Driver
+
+This program includes a device driver for Linux 3.x.
+You may modify and redistribute the device driver code under the
+GNU General Public License (a copy of which is attached hereto as
+Exhibit A) published by the Free Software Foundation (version 2).
+
+
+
+EXHIBIT A
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/Documentation/scsi/LICENSE.qla4xxx b/Documentation/scsi/LICENSE.qla4xxx
new file mode 100644
index 000000000..fcc27ad27
--- /dev/null
+++ b/Documentation/scsi/LICENSE.qla4xxx
@@ -0,0 +1,289 @@
+Copyright (c) 2003-2013 QLogic Corporation
+QLogic Linux iSCSI Driver
+
+This program includes a device driver for Linux 3.x.
+You may modify and redistribute the device driver code under the
+GNU General Public License (a copy of which is attached hereto as
+Exhibit A) published by the Free Software Foundation (version 2).
+
+
+EXHIBIT A
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
diff --git a/Documentation/scsi/Mylex.txt b/Documentation/scsi/Mylex.txt
new file mode 100644
index 000000000..3797f3e6c
--- /dev/null
+++ b/Documentation/scsi/Mylex.txt
@@ -0,0 +1,5 @@
+Please see the file README.BusLogic for information about Linux support for
+Mylex (formerly BusLogic) MultiMaster and FlashPoint SCSI Host Adapters.
+
+The Mylex DAC960 PCI RAID Controllers are now supported. Please consult
+http://sourceforge.net/projects/dandelion for further information on the DAC960 driver.
diff --git a/Documentation/scsi/NinjaSCSI.txt b/Documentation/scsi/NinjaSCSI.txt
new file mode 100644
index 000000000..ac8db8cee
--- /dev/null
+++ b/Documentation/scsi/NinjaSCSI.txt
@@ -0,0 +1,128 @@
+
+ WorkBiT NinjaSCSI-3/32Bi driver for Linux
+
+1. Comment
+ This is Workbit corp.'s(http://www.workbit.co.jp/) NinjaSCSI-3
+for Linux.
+
+2. My Linux environment
+Linux kernel: 2.4.7 / 2.2.19
+pcmcia-cs: 3.1.27
+gcc: gcc-2.95.4
+PC card: I-O data PCSC-F (NinjaSCSI-3)
+ I-O data CBSC-II in 16 bit mode (NinjaSCSI-32Bi)
+SCSI device: I-O data CDPS-PX24 (CD-ROM drive)
+ Media Intelligent MMO-640GT (Optical disk drive)
+
+3. Install
+[1] Check your PC card is true "NinjaSCSI-3" card.
+ If you installed pcmcia-cs already, pcmcia reports your card as UNKNOWN
+ card, and write ["WBT", "NinjaSCSI-3", "R1.0"] or some other string to
+ your console or log file.
+ You can also use "cardctl" program (this program is in pcmcia-cs source
+ code) to get more info.
+
+# cat /var/log/messages
+...
+Jan 2 03:45:06 lindberg cardmgr[78]: unsupported card in socket 1
+Jan 2 03:45:06 lindberg cardmgr[78]: product info: "WBT", "NinjaSCSI-3", "R1.0"
+...
+# cardctl ident
+Socket 0:
+ no product info available
+Socket 1:
+ product info: "IO DATA", "CBSC16 ", "1"
+
+
+[2] Get the Linux kernel source, and extract it to /usr/src.
+ Because the NinjaSCSI driver requires some SCSI header files in Linux
+ kernel source, I recommend rebuilding your kernel; this eliminates
+ some versioning problems.
+$ cd /usr/src
+$ tar -zxvf linux-x.x.x.tar.gz
+$ cd linux
+$ make config
+...
+
+[3] If you use this driver with Kernel 2.2, unpack pcmcia-cs in some directory
+ and make & install. This driver requires the pcmcia-cs header file.
+$ cd /usr/src
+$ tar zxvf cs-pcmcia-cs-3.x.x.tar.gz
+...
+
+[4] Extract this driver's archive somewhere, and edit Makefile, then do make.
+$ tar -zxvf nsp_cs-x.x.tar.gz
+$ cd nsp_cs-x.x
+$ emacs Makefile
+...
+$ make
+
+[5] Copy nsp_cs.ko to suitable place, like /lib/modules/<Kernel version>/pcmcia/ .
+
+[6] Add these lines to /etc/pcmcia/config .
+ If you use pcmcia-cs-3.1.8 or later, we can use "nsp_cs.conf" file.
+ So, you don't need to edit file. Just copy to /etc/pcmcia/ .
+
+-------------------------------------
+device "nsp_cs"
+ class "scsi" module "nsp_cs"
+
+card "WorkBit NinjaSCSI-3"
+ version "WBT", "NinjaSCSI-3", "R1.0"
+ bind "nsp_cs"
+
+card "WorkBit NinjaSCSI-32Bi (16bit)"
+ version "WORKBIT", "UltraNinja-16", "1"
+ bind "nsp_cs"
+
+# OEM
+card "WorkBit NinjaSCSI-32Bi (16bit) / IO-DATA"
+ version "IO DATA", "CBSC16 ", "1"
+ bind "nsp_cs"
+
+# OEM
+card "WorkBit NinjaSCSI-32Bi (16bit) / KME-1"
+ version "KME ", "SCSI-CARD-001", "1"
+ bind "nsp_cs"
+card "WorkBit NinjaSCSI-32Bi (16bit) / KME-2"
+ version "KME ", "SCSI-CARD-002", "1"
+ bind "nsp_cs"
+card "WorkBit NinjaSCSI-32Bi (16bit) / KME-3"
+ version "KME ", "SCSI-CARD-003", "1"
+ bind "nsp_cs"
+card "WorkBit NinjaSCSI-32Bi (16bit) / KME-4"
+ version "KME ", "SCSI-CARD-004", "1"
+ bind "nsp_cs"
+-------------------------------------
+
+[7] Start (or restart) pcmcia-cs.
+# /etc/rc.d/rc.pcmcia start (BSD style)
+or
+# /etc/init.d/pcmcia start (SYSV style)
+
+
+4. History
+See README.nin_cs .
+
+5. Caution
+ If you eject card when doing some operation for your SCSI device or suspend
+your computer, you encount some *BAD* error like disk crash.
+ It works good when I using this driver right way. But I'm not guarantee
+your data. Please backup your data when you use this driver.
+
+6. Known Bugs
+ In 2.4 kernel, you can't use 640MB Optical disk. This error comes from
+high level SCSI driver.
+
+7. Testing
+ Please send me some reports(bug reports etc..) of this software.
+When you send report, please tell me these or more.
+ card name
+ kernel version
+ your SCSI device name(hard drive, CD-ROM, etc...)
+
+8. Copyright
+ See GPL.
+
+
+2001/08/08 yokota@netlab.is.tsukuba.ac.jp <YOKOTA Hiroshi>
diff --git a/Documentation/scsi/aacraid.txt b/Documentation/scsi/aacraid.txt
new file mode 100644
index 000000000..30f643f61
--- /dev/null
+++ b/Documentation/scsi/aacraid.txt
@@ -0,0 +1,150 @@
+AACRAID Driver for Linux (take two)
+
+Introduction
+-------------------------
+The aacraid driver adds support for Adaptec (http://www.adaptec.com)
+RAID controllers. This is a major rewrite from the original
+Adaptec supplied driver. It has significantly cleaned up both the code
+and the running binary size (the module is less than half the size of
+the original).
+
+Supported Cards/Chipsets
+-------------------------
+ PCI ID (pci.ids) OEM Product
+ 9005:0285:9005:0285 Adaptec 2200S (Vulcan)
+ 9005:0285:9005:0286 Adaptec 2120S (Crusader)
+ 9005:0285:9005:0287 Adaptec 2200S (Vulcan-2m)
+ 9005:0285:9005:0288 Adaptec 3230S (Harrier)
+ 9005:0285:9005:0289 Adaptec 3240S (Tornado)
+ 9005:0285:9005:028a Adaptec 2020ZCR (Skyhawk)
+ 9005:0285:9005:028b Adaptec 2025ZCR (Terminator)
+ 9005:0286:9005:028c Adaptec 2230S (Lancer)
+ 9005:0286:9005:028c Adaptec 2230SLP (Lancer)
+ 9005:0286:9005:028d Adaptec 2130S (Lancer)
+ 9005:0285:9005:028e Adaptec 2020SA (Skyhawk)
+ 9005:0285:9005:028f Adaptec 2025SA (Terminator)
+ 9005:0285:9005:0290 Adaptec 2410SA (Jaguar)
+ 9005:0285:103c:3227 Adaptec 2610SA (Bearcat HP release)
+ 9005:0285:9005:0293 Adaptec 21610SA (Corsair-16)
+ 9005:0285:9005:0296 Adaptec 2240S (SabreExpress)
+ 9005:0285:9005:0292 Adaptec 2810SA (Corsair-8)
+ 9005:0285:9005:0297 Adaptec 4005 (AvonPark)
+ 9005:0285:9005:0298 Adaptec 4000 (BlackBird)
+ 9005:0285:9005:0299 Adaptec 4800SAS (Marauder-X)
+ 9005:0285:9005:029a Adaptec 4805SAS (Marauder-E)
+ 9005:0286:9005:029b Adaptec 2820SA (Intruder)
+ 9005:0286:9005:029c Adaptec 2620SA (Intruder)
+ 9005:0286:9005:029d Adaptec 2420SA (Intruder HP release)
+ 9005:0286:9005:02ac Adaptec 1800 (Typhoon44)
+ 9005:0285:9005:02b5 Adaptec 5445 (Voodoo44)
+ 9005:0285:15d9:02b5 SMC AOC-USAS-S4i
+ 9005:0285:9005:02b6 Adaptec 5805 (Voodoo80)
+ 9005:0285:15d9:02b6 SMC AOC-USAS-S8i
+ 9005:0285:9005:02b7 Adaptec 5085 (Voodoo08)
+ 9005:0285:9005:02bb Adaptec 3405 (Marauder40LP)
+ 9005:0285:9005:02bc Adaptec 3805 (Marauder80LP)
+ 9005:0285:9005:02c7 Adaptec 3085 (Marauder08ELP)
+ 9005:0285:9005:02bd Adaptec 31205 (Marauder120)
+ 9005:0285:9005:02be Adaptec 31605 (Marauder160)
+ 9005:0285:9005:02c3 Adaptec 51205 (Voodoo120)
+ 9005:0285:9005:02c4 Adaptec 51605 (Voodoo160)
+ 9005:0285:15d9:02c9 SMC AOC-USAS-S4iR
+ 9005:0285:15d9:02ca SMC AOC-USAS-S8iR
+ 9005:0285:9005:02ce Adaptec 51245 (Voodoo124)
+ 9005:0285:9005:02cf Adaptec 51645 (Voodoo164)
+ 9005:0285:9005:02d0 Adaptec 52445 (Voodoo244)
+ 9005:0285:9005:02d1 Adaptec 5405 (Voodoo40)
+ 9005:0285:15d9:02d2 SMC AOC-USAS-S8i-LP
+ 9005:0285:15d9:02d3 SMC AOC-USAS-S8iR-LP
+ 9005:0285:9005:02d4 Adaptec ASR-2045 (Voodoo04 Lite)
+ 9005:0285:9005:02d5 Adaptec ASR-2405 (Voodoo40 Lite)
+ 9005:0285:9005:02d6 Adaptec ASR-2445 (Voodoo44 Lite)
+ 9005:0285:9005:02d7 Adaptec ASR-2805 (Voodoo80 Lite)
+ 9005:0285:9005:02d8 Adaptec 5405Z (Voodoo40 BLBU)
+ 9005:0285:9005:02d9 Adaptec 5445Z (Voodoo44 BLBU)
+ 9005:0285:9005:02da Adaptec 5805Z (Voodoo80 BLBU)
+ 1011:0046:9005:0364 Adaptec 5400S (Mustang)
+ 1011:0046:9005:0365 Adaptec 5400S (Mustang)
+ 9005:0287:9005:0800 Adaptec Themisto (Jupiter)
+ 9005:0200:9005:0200 Adaptec Themisto (Jupiter)
+ 9005:0286:9005:0800 Adaptec Callisto (Jupiter)
+ 1011:0046:9005:1364 Dell PERC 2/QC (Quad Channel, Mustang)
+ 1011:0046:9005:1365 Dell PERC 2/QC (Quad Channel, Mustang)
+ 1028:0001:1028:0001 Dell PERC 2/Si (Iguana)
+ 1028:0003:1028:0003 Dell PERC 3/Si (SlimFast)
+ 1028:0002:1028:0002 Dell PERC 3/Di (Opal)
+ 1028:0004:1028:0004 Dell PERC 3/SiF (Iguana)
+ 1028:0004:1028:00d0 Dell PERC 3/DiF (Iguana)
+ 1028:0002:1028:00d1 Dell PERC 3/DiV (Viper)
+ 1028:0002:1028:00d9 Dell PERC 3/DiL (Lexus)
+ 1028:000a:1028:0106 Dell PERC 3/DiJ (Jaguar)
+ 1028:000a:1028:011b Dell PERC 3/DiD (Dagger)
+ 1028:000a:1028:0121 Dell PERC 3/DiB (Boxster)
+ 9005:0285:1028:0287 Dell PERC 320/DC (Vulcan)
+ 9005:0285:1028:0291 Dell CERC 2 (DellCorsair)
+ 1011:0046:103c:10c2 HP NetRAID-4M (Mustang)
+ 9005:0285:17aa:0286 Legend S220 (Crusader)
+ 9005:0285:17aa:0287 Legend S230 (Vulcan)
+ 9005:0285:9005:0290 IBM ServeRAID 7t (Jaguar)
+ 9005:0285:1014:02F2 IBM ServeRAID 8i (AvonPark)
+ 9005:0286:1014:9540 IBM ServeRAID 8k/8k-l4 (AuroraLite)
+ 9005:0286:1014:9580 IBM ServeRAID 8k/8k-l8 (Aurora)
+ 9005:0285:1014:034d IBM ServeRAID 8s (Marauder-E)
+ 9005:0286:9005:029e ICP ICP9024RO (Lancer)
+ 9005:0286:9005:029f ICP ICP9014RO (Lancer)
+ 9005:0286:9005:02a0 ICP ICP9047MA (Lancer)
+ 9005:0286:9005:02a1 ICP ICP9087MA (Lancer)
+ 9005:0285:9005:02a4 ICP ICP9085LI (Marauder-X)
+ 9005:0285:9005:02a5 ICP ICP5085BR (Marauder-E)
+ 9005:0286:9005:02a6 ICP ICP9067MA (Intruder-6)
+ 9005:0285:9005:02b2 ICP (Voodoo 8 internal 8 external)
+ 9005:0285:9005:02b8 ICP ICP5445SL (Voodoo44)
+ 9005:0285:9005:02b9 ICP ICP5085SL (Voodoo80)
+ 9005:0285:9005:02ba ICP ICP5805SL (Voodoo08)
+ 9005:0285:9005:02bf ICP ICP5045BL (Marauder40LP)
+ 9005:0285:9005:02c0 ICP ICP5085BL (Marauder80LP)
+ 9005:0285:9005:02c8 ICP ICP5805BL (Marauder08ELP)
+ 9005:0285:9005:02c1 ICP ICP5125BR (Marauder120)
+ 9005:0285:9005:02c2 ICP ICP5165BR (Marauder160)
+ 9005:0285:9005:02c5 ICP ICP5125SL (Voodoo120)
+ 9005:0285:9005:02c6 ICP ICP5165SL (Voodoo160)
+ 9005:0286:9005:02ab (Typhoon40)
+ 9005:0286:9005:02ad (Aurora ARK)
+ 9005:0286:9005:02ae (Aurora Lite ARK)
+ 9005:0285:9005:02b0 (Sunrise Lake ARK)
+ 9005:0285:9005:02b1 Adaptec (Voodoo 8 internal 8 external)
+ 9005:0285:108e:7aac SUN STK RAID REM (Voodoo44 Coyote)
+ 9005:0285:108e:0286 SUN STK RAID INT (Cougar)
+ 9005:0285:108e:0287 SUN STK RAID EXT (Prometheus)
+ 9005:0285:108e:7aae SUN STK RAID EM (Narvi)
+
+People
+-------------------------
+Alan Cox <alan@lxorguk.ukuu.org.uk>
+Christoph Hellwig <hch@infradead.org> (updates for new-style PCI probing and SCSI host registration,
+ small cleanups/fixes)
+Matt Domsch <matt_domsch@dell.com> (revision ioctl, adapter messages)
+Deanna Bonds (non-DASD support, PAE fibs and 64 bit, added new adaptec controllers
+ added new ioctls, changed scsi interface to use new error handler,
+ increased the number of fibs and outstanding commands to a container)
+
+ (fixed 64bit and 64G memory model, changed confusing naming convention
+ where fibs that go to the hardware are consistently called hw_fibs and
+ not just fibs like the name of the driver tracking structure)
+Mark Salyzyn <Mark_Salyzyn@adaptec.com> Fixed panic issues and added some new product ids for upcoming hbas. Performance tuning, card failover and bug mitigations.
+Achim Leubner <Achim_Leubner@adaptec.com>
+
+Original Driver
+-------------------------
+Adaptec Unix OEM Product Group
+
+Mailing List
+-------------------------
+linux-scsi@vger.kernel.org (Interested parties troll here)
+Also note this is very different to Brian's original driver
+so don't expect him to support it.
+Adaptec does support this driver. Contact Adaptec tech support or
+aacraid@adaptec.com
+
+Original by Brian Boerner February 2001
+Rewritten by Alan Cox, November 2001
diff --git a/Documentation/scsi/advansys.txt b/Documentation/scsi/advansys.txt
new file mode 100644
index 000000000..4a3db62b7
--- /dev/null
+++ b/Documentation/scsi/advansys.txt
@@ -0,0 +1,243 @@
+AdvanSys (Advanced System Products, Inc.) manufactures the following
+RISC-based, Bus-Mastering, Fast (10 Mhz) and Ultra (20 Mhz) Narrow
+(8-bit transfer) SCSI Host Adapters for the ISA, EISA, VL, and PCI
+buses and RISC-based, Bus-Mastering, Ultra (20 Mhz) Wide (16-bit
+transfer) SCSI Host Adapters for the PCI bus.
+
+The CDB counts below indicate the number of SCSI CDB (Command
+Descriptor Block) requests that can be stored in the RISC chip
+cache and board LRAM. A CDB is a single SCSI command. The driver
+detect routine will display the number of CDBs available for each
+adapter detected. The number of CDBs used by the driver can be
+lowered in the BIOS by changing the 'Host Queue Size' adapter setting.
+
+Laptop Products:
+ ABP-480 - Bus-Master CardBus (16 CDB)
+
+Connectivity Products:
+ ABP510/5150 - Bus-Master ISA (240 CDB)
+ ABP5140 - Bus-Master ISA PnP (16 CDB)
+ ABP5142 - Bus-Master ISA PnP with floppy (16 CDB)
+ ABP902/3902 - Bus-Master PCI (16 CDB)
+ ABP3905 - Bus-Master PCI (16 CDB)
+ ABP915 - Bus-Master PCI (16 CDB)
+ ABP920 - Bus-Master PCI (16 CDB)
+ ABP3922 - Bus-Master PCI (16 CDB)
+ ABP3925 - Bus-Master PCI (16 CDB)
+ ABP930 - Bus-Master PCI (16 CDB)
+ ABP930U - Bus-Master PCI Ultra (16 CDB)
+ ABP930UA - Bus-Master PCI Ultra (16 CDB)
+ ABP960 - Bus-Master PCI MAC/PC (16 CDB)
+ ABP960U - Bus-Master PCI MAC/PC Ultra (16 CDB)
+
+Single Channel Products:
+ ABP542 - Bus-Master ISA with floppy (240 CDB)
+ ABP742 - Bus-Master EISA (240 CDB)
+ ABP842 - Bus-Master VL (240 CDB)
+ ABP940 - Bus-Master PCI (240 CDB)
+ ABP940U - Bus-Master PCI Ultra (240 CDB)
+ ABP940UA/3940UA - Bus-Master PCI Ultra (240 CDB)
+ ABP970 - Bus-Master PCI MAC/PC (240 CDB)
+ ABP970U - Bus-Master PCI MAC/PC Ultra (240 CDB)
+ ABP3960UA - Bus-Master PCI MAC/PC Ultra (240 CDB)
+ ABP940UW/3940UW - Bus-Master PCI Ultra-Wide (253 CDB)
+ ABP970UW - Bus-Master PCI MAC/PC Ultra-Wide (253 CDB)
+ ABP3940U2W - Bus-Master PCI LVD/Ultra2-Wide (253 CDB)
+
+Multi-Channel Products:
+ ABP752 - Dual Channel Bus-Master EISA (240 CDB Per Channel)
+ ABP852 - Dual Channel Bus-Master VL (240 CDB Per Channel)
+ ABP950 - Dual Channel Bus-Master PCI (240 CDB Per Channel)
+ ABP950UW - Dual Channel Bus-Master PCI Ultra-Wide (253 CDB Per Channel)
+ ABP980 - Four Channel Bus-Master PCI (240 CDB Per Channel)
+ ABP980U - Four Channel Bus-Master PCI Ultra (240 CDB Per Channel)
+ ABP980UA/3980UA - Four Channel Bus-Master PCI Ultra (16 CDB Per Chan.)
+ ABP3950U2W - Bus-Master PCI LVD/Ultra2-Wide and Ultra-Wide (253 CDB)
+ ABP3950U3W - Bus-Master PCI Dual LVD2/Ultra3-Wide (253 CDB)
+
+Driver Compile Time Options and Debugging
+
+The following constants can be defined in the source file.
+
+1. ADVANSYS_ASSERT - Enable driver assertions (Def: Enabled)
+
+ Enabling this option adds assertion logic statements to the
+ driver. If an assertion fails a message will be displayed to
+ the console, but the system will continue to operate. Any
+ assertions encountered should be reported to the person
+ responsible for the driver. Assertion statements may proactively
+ detect problems with the driver and facilitate fixing these
+ problems. Enabling assertions will add a small overhead to the
+ execution of the driver.
+
+2. ADVANSYS_DEBUG - Enable driver debugging (Def: Disabled)
+
+ Enabling this option adds tracing functions to the driver and the
+ ability to set a driver tracing level at boot time. This option is
+ very useful for debugging the driver, but it will add to the size
+ of the driver execution image and add overhead to the execution of
+ the driver.
+
+ The amount of debugging output can be controlled with the global
+ variable 'asc_dbglvl'. The higher the number the more output. By
+ default the debug level is 0.
+
+ If the driver is loaded at boot time and the LILO Driver Option
+ is included in the system, the debug level can be changed by
+ specifying a 5th (ASC_NUM_IOPORT_PROBE + 1) I/O Port. The
+ first three hex digits of the pseudo I/O Port must be set to
+ 'deb' and the fourth hex digit specifies the debug level: 0 - F.
+ The following command line will look for an adapter at 0x330
+ and set the debug level to 2.
+
+ linux advansys=0x330,0,0,0,0xdeb2
+
+ If the driver is built as a loadable module this variable can be
+ defined when the driver is loaded. The following insmod command
+ will set the debug level to one.
+
+ insmod advansys.o asc_dbglvl=1
+
+ Debugging Message Levels:
+ 0: Errors Only
+ 1: High-Level Tracing
+ 2-N: Verbose Tracing
+
+ To enable debug output to console, please make sure that:
+
+ a. System and kernel logging is enabled (syslogd, klogd running).
+ b. Kernel messages are routed to console output. Check
+ /etc/syslog.conf for an entry similar to this:
+
+ kern.* /dev/console
+
+ c. klogd is started with the appropriate -c parameter
+ (e.g. klogd -c 8)
+
+ This will cause printk() messages to be be displayed on the
+ current console. Refer to the klogd(8) and syslogd(8) man pages
+ for details.
+
+ Alternatively you can enable printk() to console with this
+ program. However, this is not the 'official' way to do this.
+ Debug output is logged in /var/log/messages.
+
+ main()
+ {
+ syscall(103, 7, 0, 0);
+ }
+
+ Increasing LOG_BUF_LEN in kernel/printk.c to something like
+ 40960 allows more debug messages to be buffered in the kernel
+ and written to the console or log file.
+
+3. ADVANSYS_STATS - Enable statistics (Def: Enabled)
+
+ Enabling this option adds statistics collection and display
+ through /proc to the driver. The information is useful for
+ monitoring driver and device performance. It will add to the
+ size of the driver execution image and add minor overhead to
+ the execution of the driver.
+
+ Statistics are maintained on a per adapter basis. Driver entry
+ point call counts and transfer size counts are maintained.
+ Statistics are only available for kernels greater than or equal
+ to v1.3.0 with the CONFIG_PROC_FS (/proc) file system configured.
+
+ AdvanSys SCSI adapter files have the following path name format:
+
+ /proc/scsi/advansys/{0,1,2,3,...}
+
+ This information can be displayed with cat. For example:
+
+ cat /proc/scsi/advansys/0
+
+ When ADVANSYS_STATS is not defined the AdvanSys /proc files only
+ contain adapter and device configuration information.
+
+Driver LILO Option
+
+If init/main.c is modified as described in the 'Directions for Adding
+the AdvanSys Driver to Linux' section (B.4.) above, the driver will
+recognize the 'advansys' LILO command line and /etc/lilo.conf option.
+This option can be used to either disable I/O port scanning or to limit
+scanning to 1 - 4 I/O ports. Regardless of the option setting EISA and
+PCI boards will still be searched for and detected. This option only
+affects searching for ISA and VL boards.
+
+Examples:
+ 1. Eliminate I/O port scanning:
+ boot: linux advansys=
+ or
+ boot: linux advansys=0x0
+ 2. Limit I/O port scanning to one I/O port:
+ boot: linux advansys=0x110
+ 3. Limit I/O port scanning to four I/O ports:
+ boot: linux advansys=0x110,0x210,0x230,0x330
+
+For a loadable module the same effect can be achieved by setting
+the 'asc_iopflag' variable and 'asc_ioport' array when loading
+the driver, e.g.
+
+ insmod advansys.o asc_iopflag=1 asc_ioport=0x110,0x330
+
+If ADVANSYS_DEBUG is defined a 5th (ASC_NUM_IOPORT_PROBE + 1)
+I/O Port may be added to specify the driver debug level. Refer to
+the 'Driver Compile Time Options and Debugging' section above for
+more information.
+
+Credits (Chronological Order)
+
+Bob Frey <bfrey@turbolinux.com.cn> wrote the AdvanSys SCSI driver
+and maintained it up to 3.3F. He continues to answer questions
+and help maintain the driver.
+
+Nathan Hartwell <mage@cdc3.cdc.net> provided the directions and
+basis for the Linux v1.3.X changes which were included in the
+1.2 release.
+
+Thomas E Zerucha <zerucha@shell.portal.com> pointed out a bug
+in advansys_biosparam() which was fixed in the 1.3 release.
+
+Erik Ratcliffe <erik@caldera.com> has done testing of the
+AdvanSys driver in the Caldera releases.
+
+Rik van Riel <H.H.vanRiel@fys.ruu.nl> provided a patch to
+AscWaitTixISRDone() which he found necessary to make the
+driver work with a SCSI-1 disk.
+
+Mark Moran <mmoran@mmoran.com> has helped test Ultra-Wide
+support in the 3.1A driver.
+
+Doug Gilbert <dgilbert@interlog.com> has made changes and
+suggestions to improve the driver and done a lot of testing.
+
+Ken Mort <ken@mort.net> reported a DEBUG compile bug fixed
+in 3.2K.
+
+Tom Rini <trini@kernel.crashing.org> provided the CONFIG_ISA
+patch and helped with PowerPC wide and narrow board support.
+
+Philip Blundell <philb@gnu.org> provided an
+advansys_interrupts_enabled patch.
+
+Dave Jones <dave@denial.force9.co.uk> reported the compiler
+warnings generated when CONFIG_PROC_FS was not defined in
+the 3.2M driver.
+
+Jerry Quinn <jlquinn@us.ibm.com> fixed PowerPC support (endian
+problems) for wide cards.
+
+Bryan Henderson <bryanh@giraffe-data.com> helped debug narrow
+card error handling.
+
+Manuel Veloso <veloso@pobox.com> worked hard on PowerPC narrow
+board support and fixed a bug in AscGetEEPConfig().
+
+Arnaldo Carvalho de Melo <acme@conectiva.com.br> made
+save_flags/restore_flags changes.
+
+Andy Kellner <AKellner@connectcom.net> continued the Advansys SCSI
+driver development for ConnectCom (Version > 3.3F).
+
+Ken Witherow for extensive testing during the development of version 3.4.
diff --git a/Documentation/scsi/aha152x.txt b/Documentation/scsi/aha152x.txt
new file mode 100644
index 000000000..94848734a
--- /dev/null
+++ b/Documentation/scsi/aha152x.txt
@@ -0,0 +1,183 @@
+$Id: README.aha152x,v 1.2 1999/12/25 15:32:30 fischer Exp fischer $
+Adaptec AHA-1520/1522 SCSI driver for Linux (aha152x)
+
+Copyright 1993-1999 Jürgen Fischer <fischer@norbit.de>
+TC1550 patches by Luuk van Dijk (ldz@xs4all.nl)
+
+
+In Revision 2 the driver was modified a lot (especially the
+bottom-half handler complete()).
+
+The driver is much cleaner now, has support for the new
+error handling code in 2.3, produced less cpu load (much
+less polling loops), has slightly higher throughput (at
+least on my ancient test box; a i486/33Mhz/20MB).
+
+
+CONFIGURATION ARGUMENTS:
+
+IOPORT base io address (0x340/0x140)
+IRQ interrupt level (9-12; default 11)
+SCSI_ID scsi id of controller (0-7; default 7)
+RECONNECT allow targets to disconnect from the bus (0/1; default 1 [on])
+PARITY enable parity checking (0/1; default 1 [on])
+SYNCHRONOUS enable synchronous transfers (0/1; default 1 [on])
+DELAY: bus reset delay (default 100)
+EXT_TRANS: enable extended translation (0/1: default 0 [off])
+ (see NOTES)
+
+COMPILE TIME CONFIGURATION (go into AHA152X in drivers/scsi/Makefile):
+
+-DAUTOCONF
+ use configuration the controller reports (AHA-152x only)
+
+-DSKIP_BIOSTEST
+ Don't test for BIOS signature (AHA-1510 or disabled BIOS)
+
+-DSETUP0="{ IOPORT, IRQ, SCSI_ID, RECONNECT, PARITY, SYNCHRONOUS, DELAY, EXT_TRANS }"
+ override for the first controller
+
+-DSETUP1="{ IOPORT, IRQ, SCSI_ID, RECONNECT, PARITY, SYNCHRONOUS, DELAY, EXT_TRANS }"
+ override for the second controller
+
+-DAHA152X_DEBUG
+ enable debugging output
+
+-DAHA152X_STAT
+ enable some statistics
+
+
+LILO COMMAND LINE OPTIONS:
+
+aha152x=<IOPORT>[,<IRQ>[,<SCSI-ID>[,<RECONNECT>[,<PARITY>[,<SYNCHRONOUS>[,<DELAY> [,<EXT_TRANS]]]]]]]
+
+ The normal configuration can be overridden by specifying a command line.
+ When you do this, the BIOS test is skipped. Entered values have to be
+ valid (known). Don't use values that aren't supported under normal
+ operation. If you think that you need other values: contact me.
+ For two controllers use the aha152x statement twice.
+
+
+SYMBOLS FOR MODULE CONFIGURATION:
+
+Choose from 2 alternatives:
+
+1. specify everything (old)
+
+aha152x=IOPORT,IRQ,SCSI_ID,RECONNECT,PARITY,SYNCHRONOUS,DELAY,EXT_TRANS
+ configuration override for first controller
+
+
+aha152x1=IOPORT,IRQ,SCSI_ID,RECONNECT,PARITY,SYNCHRONOUS,DELAY,EXT_TRANS
+ configuration override for second controller
+
+2. specify only what you need to (irq or io is required; new)
+
+io=IOPORT0[,IOPORT1]
+ IOPORT for first and second controller
+
+irq=IRQ0[,IRQ1]
+ IRQ for first and second controller
+
+scsiid=SCSIID0[,SCSIID1]
+ SCSIID for first and second controller
+
+reconnect=RECONNECT0[,RECONNECT1]
+ allow targets to disconnect for first and second controller
+
+parity=PAR0[PAR1]
+ use parity for first and second controller
+
+sync=SYNCHRONOUS0[,SYNCHRONOUS1]
+ enable synchronous transfers for first and second controller
+
+delay=DELAY0[,DELAY1]
+ reset DELAY for first and second controller
+
+exttrans=EXTTRANS0[,EXTTRANS1]
+ enable extended translation for first and second controller
+
+
+If you use both alternatives the first will be taken.
+
+
+NOTES ON EXT_TRANS:
+
+SCSI uses block numbers to address blocks/sectors on a device.
+The BIOS uses a cylinder/head/sector addressing scheme (C/H/S)
+scheme instead. DOS expects a BIOS or driver that understands this
+C/H/S addressing.
+
+The number of cylinders/heads/sectors is called geometry and is required
+as base for requests in C/H/S addressing. SCSI only knows about the
+total capacity of disks in blocks (sectors).
+
+Therefore the SCSI BIOS/DOS driver has to calculate a logical/virtual
+geometry just to be able to support that addressing scheme. The geometry
+returned by the SCSI BIOS is a pure calculation and has nothing to
+do with the real/physical geometry of the disk (which is usually
+irrelevant anyway).
+
+Basically this has no impact at all on Linux, because it also uses block
+instead of C/H/S addressing. Unfortunately C/H/S addressing is also used
+in the partition table and therefore every operating system has to know
+the right geometry to be able to interpret it.
+
+Moreover there are certain limitations to the C/H/S addressing scheme,
+namely the address space is limited to up to 255 heads, up to 63 sectors
+and a maximum of 1023 cylinders.
+
+The AHA-1522 BIOS calculates the geometry by fixing the number of heads
+to 64, the number of sectors to 32 and by calculating the number of
+cylinders by dividing the capacity reported by the disk by 64*32 (1 MB).
+This is considered to be the default translation.
+
+With respect to the limit of 1023 cylinders using C/H/S you can only
+address the first GB of your disk in the partition table. Therefore
+BIOSes of some newer controllers based on the AIC-6260/6360 support
+extended translation. This means that the BIOS uses 255 for heads,
+63 for sectors and then divides the capacity of the disk by 255*63
+(about 8 MB), as soon it sees a disk greater than 1 GB. That results
+in a maximum of about 8 GB addressable diskspace in the partition table
+(but there are already bigger disks out there today).
+
+To make it even more complicated the translation mode might/might
+not be configurable in certain BIOS setups.
+
+This driver does some more or less failsafe guessing to get the
+geometry right in most cases:
+
+- for disks<1GB: use default translation (C/32/64)
+
+- for disks>1GB:
+ - take current geometry from the partition table
+ (using scsicam_bios_param and accept only `valid' geometries,
+ ie. either (C/32/64) or (C/63/255)). This can be extended translation
+ even if it's not enabled in the driver.
+
+ - if that fails, take extended translation if enabled by override,
+ kernel or module parameter, otherwise take default translation and
+ ask the user for verification. This might on not yet partitioned
+ disks.
+
+
+REFERENCES USED:
+
+ "AIC-6260 SCSI Chip Specification", Adaptec Corporation.
+
+ "SCSI COMPUTER SYSTEM INTERFACE - 2 (SCSI-2)", X3T9.2/86-109 rev. 10h
+
+ "Writing a SCSI device driver for Linux", Rik Faith (faith@cs.unc.edu)
+
+ "Kernel Hacker's Guide", Michael K. Johnson (johnsonm@sunsite.unc.edu)
+
+ "Adaptec 1520/1522 User's Guide", Adaptec Corporation.
+
+ Michael K. Johnson (johnsonm@sunsite.unc.edu)
+
+ Drew Eckhardt (drew@cs.colorado.edu)
+
+ Eric Youngdale (eric@andante.org)
+
+ special thanks to Eric Youngdale for the free(!) supplying the
+ documentation on the chip.
diff --git a/Documentation/scsi/aic79xx.txt b/Documentation/scsi/aic79xx.txt
new file mode 100644
index 000000000..e2d327300
--- /dev/null
+++ b/Documentation/scsi/aic79xx.txt
@@ -0,0 +1,497 @@
+====================================================================
+= Adaptec Ultra320 Family Manager Set =
+= =
+= README for =
+= The Linux Operating System =
+====================================================================
+
+The following information is available in this file:
+
+ 1. Supported Hardware
+ 2. Version History
+ 3. Command Line Options
+ 4. Additional Notes
+ 5. Contacting Adaptec
+
+
+1. Supported Hardware
+
+ The following Adaptec SCSI Host Adapters are supported by this
+ driver set.
+
+ Ultra320 ASIC Description
+ ----------------------------------------------------------------
+ AIC-7901A Single Channel 64-bit PCI-X 133MHz to
+ Ultra320 SCSI ASIC
+ AIC-7901B Single Channel 64-bit PCI-X 133MHz to
+ Ultra320 SCSI ASIC with Retained Training
+ AIC-7902A4 Dual Channel 64-bit PCI-X 133MHz to
+ Ultra320 SCSI ASIC
+ AIC-7902B Dual Channel 64-bit PCI-X 133MHz to
+ Ultra320 SCSI ASIC with Retained Training
+
+ Ultra320 Adapters Description ASIC
+ --------------------------------------------------------------------------
+ Adaptec SCSI Card 39320 Dual Channel 64-bit PCI-X 133MHz to 7902A4/7902B
+ Ultra320 SCSI Card (one external
+ 68-pin, two internal 68-pin)
+ Adaptec SCSI Card 39320A Dual Channel 64-bit PCI-X 133MHz to 7902B
+ Ultra320 SCSI Card (one external
+ 68-pin, two internal 68-pin)
+ Adaptec SCSI Card 39320D Dual Channel 64-bit PCI-X 133MHz to 7902A4
+ Ultra320 SCSI Card (two external VHDC
+ and one internal 68-pin)
+ Adaptec SCSI Card 39320D Dual Channel 64-bit PCI-X 133MHz to 7902A4
+ Ultra320 SCSI Card (two external VHDC
+ and one internal 68-pin) based on the
+ AIC-7902B ASIC
+ Adaptec SCSI Card 29320 Single Channel 64-bit PCI-X 133MHz to 7901A
+ Ultra320 SCSI Card (one external
+ 68-pin, two internal 68-pin, one
+ internal 50-pin)
+ Adaptec SCSI Card 29320A Single Channel 64-bit PCI-X 133MHz to 7901B
+ Ultra320 SCSI Card (one external
+ 68-pin, two internal 68-pin, one
+ internal 50-pin)
+ Adaptec SCSI Card 29320LP Single Channel 64-bit Low Profile 7901A
+ PCI-X 133MHz to Ultra320 SCSI Card
+ (One external VHDC, one internal
+ 68-pin)
+ Adaptec SCSI Card 29320ALP Single Channel 64-bit Low Profile 7901B
+ PCI-X 133MHz to Ultra320 SCSI Card
+ (One external VHDC, one internal
+ 68-pin)
+2. Version History
+
+ 3.0 (December 1st, 2005)
+ - Updated driver to use SCSI transport class infrastructure
+ - Upported sequencer and core fixes from adaptec released
+ version 2.0.15 of the driver.
+
+ 1.3.11 (July 11, 2003)
+ - Fix several deadlock issues.
+ - Add 29320ALP and 39320B Id's.
+
+ 1.3.10 (June 3rd, 2003)
+ - Align the SCB_TAG field on a 16byte boundary. This avoids
+ SCB corruption on some PCI-33 busses.
+ - Correct non-zero luns on Rev B. hardware.
+ - Update for change in 2.5.X SCSI proc FS interface.
+ - When negotiation async via an 8bit WDTR message, send
+ an SDTR with an offset of 0 to be sure the target
+ knows we are async. This works around a firmware defect
+ in the Quantum Atlas 10K.
+ - Implement controller suspend and resume.
+ - Clear PCI error state during driver attach so that we
+ don't disable memory mapped I/O due to a stray write
+ by some other driver probe that occurred before we
+ claimed the controller.
+
+ 1.3.9 (May 22nd, 2003)
+ - Fix compiler errors.
+ - Remove S/G splitting for segments that cross a 4GB boundary.
+ This is guaranteed not to happen in Linux.
+ - Add support for scsi_report_device_reset() found in
+ 2.5.X kernels.
+ - Add 7901B support.
+ - Simplify handling of the packetized lun Rev A workaround.
+ - Correct and simplify handling of the ignore wide residue
+ message. The previous code would fail to report a residual
+ if the transaction data length was even and we received
+ an IWR message.
+
+ 1.3.8 (April 29th, 2003)
+ - Fix types accessed via the command line interface code.
+ - Perform a few firmware optimizations.
+ - Fix "Unexpected PKT busfree" errors.
+ - Use a sequencer interrupt to notify the host of
+ commands with bad status. We defer the notification
+ until there are no outstanding selections to ensure
+ that the host is interrupted for as short a time as
+ possible.
+ - Remove pre-2.2.X support.
+ - Add support for new 2.5.X interrupt API.
+ - Correct big-endian architecture support.
+
+ 1.3.7 (April 16th, 2003)
+ - Use del_timer_sync() to ensure that no timeouts
+ are pending during controller shutdown.
+ - For pre-2.5.X kernels, carefully adjust our segment
+ list size to avoid SCSI malloc pool fragmentation.
+ - Cleanup channel display in our /proc output.
+ - Workaround duplicate device entries in the mid-layer
+ device list during add-single-device.
+
+ 1.3.6 (March 28th, 2003)
+ - Correct a double free in the Domain Validation code.
+ - Correct a reference to free'ed memory during controller
+ shutdown.
+ - Reset the bus on an SE->LVD change. This is required
+ to reset our transceivers.
+
+ 1.3.5 (March 24th, 2003)
+ - Fix a few register window mode bugs.
+ - Include read streaming in the PPR flags we display in
+ diagnostics as well as /proc.
+ - Add PCI hot plug support for 2.5.X kernels.
+ - Correct default precompensation value for RevA hardware.
+ - Fix Domain Validation thread shutdown.
+ - Add a firmware workaround to make the LED blink
+ brighter during packetized operations on the H2A4.
+ - Correct /proc display of user read streaming settings.
+ - Simplify driver locking by releasing the io_request_lock
+ upon driver entry from the mid-layer.
+ - Cleanup command line parsing and move much of this code
+ to aiclib.
+
+ 1.3.4 (February 28th, 2003)
+ - Correct a race condition in our error recovery handler.
+ - Allow Test Unit Ready commands to take a full 5 seconds
+ during Domain Validation.
+
+ 1.3.2 (February 19th, 2003)
+ - Correct a Rev B. regression due to the GEM318
+ compatibility fix included in 1.3.1.
+
+ 1.3.1 (February 11th, 2003)
+ - Add support for the 39320A.
+ - Improve recovery for certain PCI-X errors.
+ - Fix handling of LQ/DATA/LQ/DATA for the
+ same write transaction that can occur without
+ interveining training.
+ - Correct compatibility issues with the GEM318
+ enclosure services device.
+ - Correct data corruption issue that occurred under
+ high tag depth write loads.
+ - Adapt to a change in the 2.5.X daemonize() API.
+ - Correct a "Missing case in ahd_handle_scsiint" panic.
+
+ 1.3.0 (January 21st, 2003)
+ - Full regression testing for all U320 products completed.
+ - Added abort and target/lun reset error recovery handler and
+ interrupt coalescing.
+
+ 1.2.0 (November 14th, 2002)
+ - Added support for Domain Validation
+ - Add support for the Hewlett-Packard version of the 39320D
+ and AIC-7902 adapters.
+ Support for previous adapters has not been fully tested and should
+ only be used at the customer's own risk.
+
+ 1.1.1 (September 24th, 2002)
+ - Added support for the Linux 2.5.X kernel series
+
+ 1.1.0 (September 17th, 2002)
+ - Added support for four additional SCSI products:
+ ASC-39320, ASC-29320, ASC-29320LP, AIC-7901.
+
+ 1.0.0 (May 30th, 2002)
+ - Initial driver release.
+
+ 2.1. Software/Hardware Features
+ - Support for the SPI-4 "Ultra320" standard:
+ - 320MB/s transfer rates
+ - Packetized SCSI Protocol at 160MB/s and 320MB/s
+ - Quick Arbitration Selection (QAS)
+ - Retained Training Information (Rev B. ASIC only)
+ - Interrupt Coalescing
+ - Initiator Mode (target mode not currently
+ supported)
+ - Support for the PCI-X standard up to 133MHz
+ - Support for the PCI v2.2 standard
+ - Domain Validation
+
+ 2.2. Operating System Support:
+ - Redhat Linux 7.2, 7.3, 8.0, Advanced Server 2.1
+ - SuSE Linux 7.3, 8.0, 8.1, Enterprise Server 7
+ - only Intel and AMD x86 supported at this time
+ - >4GB memory configurations supported.
+
+ Refer to the User's Guide for more details on this.
+
+3. Command Line Options
+
+ WARNING: ALTERING OR ADDING THESE DRIVER PARAMETERS
+ INCORRECTLY CAN RENDER YOUR SYSTEM INOPERABLE.
+ USE THEM WITH CAUTION.
+
+ Put a .conf file in the /etc/modprobe.d/ directory and add/edit a
+ line containing 'options aic79xx aic79xx=[command[,command...]]' where
+ 'command' is one or more of the following:
+ -----------------------------------------------------------------
+ Option: verbose
+ Definition: enable additional informative messages during
+ driver operation.
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: debug:[value]
+ Definition: Enables various levels of debugging information
+ The bit definitions for the debugging mask can
+ be found in drivers/scsi/aic7xxx/aic79xx.h under
+ the "Debug" heading.
+ Possible Values: 0x0000 = no debugging, 0xffff = full debugging
+ Default Value: 0x0000
+ -----------------------------------------------------------------
+ Option: no_reset
+ Definition: Do not reset the bus during the initial probe
+ phase
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: extended
+ Definition: Force extended translation on the controller
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: periodic_otag
+ Definition: Send an ordered tag periodically to prevent
+ tag starvation. Needed for some older devices
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: reverse_scan
+ Definition: Probe the scsi bus in reverse order, starting
+ with target 15
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: global_tag_depth
+ Definition: Global tag depth for all targets on all busses.
+ This option sets the default tag depth which
+ may be selectively overridden vi the tag_info
+ option.
+ Possible Values: 1 - 253
+ Default Value: 32
+ -----------------------------------------------------------------
+ Option: tag_info:{{value[,value...]}[,{value[,value...]}...]}
+ Definition: Set the per-target tagged queue depth on a
+ per controller basis. Both controllers and targets
+ may be omitted indicating that they should retain
+ the default tag depth.
+ Examples: tag_info:{{16,32,32,64,8,8,,32,32,32,32,32,32,32,32,32}
+ On Controller 0
+ specifies a tag depth of 16 for target 0
+ specifies a tag depth of 64 for target 3
+ specifies a tag depth of 8 for targets 4 and 5
+ leaves target 6 at the default
+ specifies a tag depth of 32 for targets 1,2,7-15
+ All other targets retain the default depth.
+
+ tag_info:{{},{32,,32}}
+ On Controller 1
+ specifies a tag depth of 32 for targets 0 and 2
+ All other targets retain the default depth.
+
+ Possible Values: 1 - 253
+ Default Value: 32
+ -----------------------------------------------------------------
+ Option: rd_strm: {rd_strm_bitmask[,rd_strm_bitmask...]}
+ Definition: Enable read streaming on a per target basis.
+ The rd_strm_bitmask is a 16 bit hex value in which
+ each bit represents a target. Setting the target's
+ bit to '1' enables read streaming for that
+ target. Controllers may be omitted indicating that
+ they should retain the default read streaming setting.
+ Example: rd_strm:{0x0041}
+ On Controller 0
+ enables read streaming for targets 0 and 6.
+ disables read streaming for targets 1-5,7-15.
+ All other targets retain the default read
+ streaming setting.
+ Example: rd_strm:{0x0023,,0xFFFF}
+ On Controller 0
+ enables read streaming for targets 1,2, and 5.
+ disables read streaming for targets 3,4,6-15.
+ On Controller 2
+ enables read streaming for all targets.
+ All other targets retain the default read
+ streaming setting.
+
+ Possible Values: 0x0000 - 0xffff
+ Default Value: 0x0000
+ -----------------------------------------------------------------
+ Option: dv: {value[,value...]}
+ Definition: Set Domain Validation Policy on a per-controller basis.
+ Controllers may be omitted indicating that
+ they should retain the default read streaming setting.
+ Example: dv:{-1,0,,1,1,0}
+ On Controller 0 leave DV at its default setting.
+ On Controller 1 disable DV.
+ Skip configuration on Controller 2.
+ On Controllers 3 and 4 enable DV.
+ On Controller 5 disable DV.
+
+ Possible Values: < 0 Use setting from serial EEPROM.
+ 0 Disable DV
+ > 0 Enable DV
+ Default Value: DV Serial EEPROM configuration setting.
+ -----------------------------------------------------------------
+ Option: seltime:[value]
+ Definition: Specifies the selection timeout value
+ Possible Values: 0 = 256ms, 1 = 128ms, 2 = 64ms, 3 = 32ms
+ Default Value: 0
+ -----------------------------------------------------------------
+
+ *** The following three options should only be changed at ***
+ *** the direction of a technical support representative. ***
+
+ -----------------------------------------------------------------
+ Option: precomp: {value[,value...]}
+ Definition: Set IO Cell precompensation value on a per-controller
+ basis.
+ Controllers may be omitted indicating that
+ they should retain the default precompensation setting.
+ Example: precomp:{0x1}
+ On Controller 0 set precompensation to 1.
+ Example: precomp:{1,,7}
+ On Controller 0 set precompensation to 1.
+ On Controller 2 set precompensation to 8.
+
+ Possible Values: 0 - 7
+ Default Value: Varies based on chip revision
+ -----------------------------------------------------------------
+ Option: slewrate: {value[,value...]}
+ Definition: Set IO Cell slew rate on a per-controller basis.
+ Controllers may be omitted indicating that
+ they should retain the default slew rate setting.
+ Example: slewrate:{0x1}
+ On Controller 0 set slew rate to 1.
+ Example: slewrate :{1,,8}
+ On Controller 0 set slew rate to 1.
+ On Controller 2 set slew rate to 8.
+
+ Possible Values: 0 - 15
+ Default Value: Varies based on chip revision
+ -----------------------------------------------------------------
+ Option: amplitude: {value[,value...]}
+ Definition: Set IO Cell signal amplitude on a per-controller basis.
+ Controllers may be omitted indicating that
+ they should retain the default read streaming setting.
+ Example: amplitude:{0x1}
+ On Controller 0 set amplitude to 1.
+ Example: amplitude :{1,,7}
+ On Controller 0 set amplitude to 1.
+ On Controller 2 set amplitude to 7.
+
+ Possible Values: 1 - 7
+ Default Value: Varies based on chip revision
+ -----------------------------------------------------------------
+
+ Example: 'options aic79xx aic79xx=verbose,rd_strm:{{0x0041}}'
+ enables verbose output in the driver and turns read streaming on
+ for targets 0 and 6 of Controller 0.
+
+4. Additional Notes
+
+ 4.1. Known/Unresolved or FYI Issues
+
+ * Under SuSE Linux Enterprise 7, the driver may fail to operate
+ correctly due to a problem with PCI interrupt routing in the
+ Linux kernel. Please contact SuSE for an updated Linux
+ kernel.
+
+ 4.2. Third-Party Compatibility Issues
+
+ * Adaptec only supports Ultra320 hard drives running
+ the latest firmware available. Please check with
+ your hard drive manufacturer to ensure you have the
+ latest version.
+
+ 4.3. Operating System or Technology Limitations
+
+ * PCI Hot Plug is untested and may cause the operating system
+ to stop responding.
+ * Luns that are not numbered contiguously starting with 0 might not
+ be automatically probed during system startup. This is a limitation
+ of the OS. Please contact your Linux vendor for instructions on
+ manually probing non-contiguous luns.
+ * Using the Driver Update Disk version of this package during OS
+ installation under RedHat might result in two versions of this
+ driver being installed into the system module directory. This
+ might cause problems with the /sbin/mkinitrd program and/or
+ other RPM packages that try to install system modules. The best
+ way to correct this once the system is running is to install
+ the latest RPM package version of this driver, available from
+ http://www.adaptec.com.
+
+
+5. Adaptec Customer Support
+
+ A Technical Support Identification (TSID) Number is required for
+ Adaptec technical support.
+ - The 12-digit TSID can be found on the white barcode-type label
+ included inside the box with your product. The TSID helps us
+ provide more efficient service by accurately identifying your
+ product and support status.
+
+ Support Options
+ - Search the Adaptec Support Knowledgebase (ASK) at
+ http://ask.adaptec.com for articles, troubleshooting tips, and
+ frequently asked questions about your product.
+ - For support via Email, submit your question to Adaptec's
+ Technical Support Specialists at http://ask.adaptec.com/.
+
+ North America
+ - Visit our Web site at http://www.adaptec.com/.
+ - For information about Adaptec's support options, call
+ 408-957-2550, 24 hours a day, 7 days a week.
+ - To speak with a Technical Support Specialist,
+ * For hardware products, call 408-934-7274,
+ Monday to Friday, 3:00 am to 5:00 pm, PDT.
+ * For RAID and Fibre Channel products, call 321-207-2000,
+ Monday to Friday, 3:00 am to 5:00 pm, PDT.
+ To expedite your service, have your computer with you.
+ - To order Adaptec products, including accessories and cables,
+ call 408-957-7274. To order cables online go to
+ http://www.adaptec.com/buy-cables/.
+
+ Europe
+ - Visit our Web site at http://www.adaptec.com/en-US/_common/world_index.
+ - To speak with a Technical Support Specialist, call, or email,
+ * German: +49 89 4366 5522, Monday-Friday, 9:00-17:00 CET,
+ http://ask-de.adaptec.com/.
+ * French: +49 89 4366 5533, Monday-Friday, 9:00-17:00 CET,
+ http://ask-fr.adaptec.com/.
+ * English: +49 89 4366 5544, Monday-Friday, 9:00-17:00 GMT,
+ http://ask.adaptec.com/.
+ - You can order Adaptec cables online at
+ http://www.adaptec.com/buy-cables/.
+
+ Japan
+ - Visit our web site at http://www.adaptec.co.jp/.
+ - To speak with a Technical Support Specialist, call
+ +81 3 5308 6120, Monday-Friday, 9:00 a.m. to 12:00 p.m.,
+ 1:00 p.m. to 6:00 p.m.
+
+-------------------------------------------------------------------
+/*
+ * Copyright (c) 2003 Adaptec Inc. 691 S. Milpitas Blvd., Milpitas CA 95035 USA.
+ * All rights reserved.
+ *
+ * You are permitted to redistribute, use and modify this README file in whole
+ * or in part in conjunction with redistribution of software governed by the
+ * General Public License, provided that the following conditions are met:
+ * 1. Redistributions of README file must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ * 3. Modifications or new contributions must be attributed in a copyright
+ * notice identifying the author ("Contributor") and added below the
+ * original copyright notice. The copyright notice is for purposes of
+ * identifying contributors and should not be deemed as permission to alter
+ * the permissions given by Adaptec.
+ *
+ * THIS README FILE IS PROVIDED BY ADAPTEC AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ANY
+ * WARRANTIES OF NON-INFRINGEMENT OR THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * ADAPTEC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS README
+ * FILE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
diff --git a/Documentation/scsi/aic7xxx.txt b/Documentation/scsi/aic7xxx.txt
new file mode 100644
index 000000000..7c5d0223d
--- /dev/null
+++ b/Documentation/scsi/aic7xxx.txt
@@ -0,0 +1,394 @@
+====================================================================
+= Adaptec Aic7xxx Fast -> Ultra160 Family Manager Set v7.0 =
+= README for =
+= The Linux Operating System =
+====================================================================
+
+The following information is available in this file:
+
+ 1. Supported Hardware
+ 2. Version History
+ 3. Command Line Options
+ 4. Contacting Adaptec
+
+1. Supported Hardware
+
+ The following Adaptec SCSI Chips and Host Adapters are supported by
+ the aic7xxx driver.
+
+ Chip MIPS Host Bus MaxSync MaxWidth SCBs Notes
+ ---------------------------------------------------------------
+ aic7770 10 EISA/VL 10MHz 16Bit 4 1
+ aic7850 10 PCI/32 10MHz 8Bit 3
+ aic7855 10 PCI/32 10MHz 8Bit 3
+ aic7856 10 PCI/32 10MHz 8Bit 3
+ aic7859 10 PCI/32 20MHz 8Bit 3
+ aic7860 10 PCI/32 20MHz 8Bit 3
+ aic7870 10 PCI/32 10MHz 16Bit 16
+ aic7880 10 PCI/32 20MHz 16Bit 16
+ aic7890 20 PCI/32 40MHz 16Bit 16 3 4 5 6 7 8
+ aic7891 20 PCI/64 40MHz 16Bit 16 3 4 5 6 7 8
+ aic7892 20 PCI/64-66 80MHz 16Bit 16 3 4 5 6 7 8
+ aic7895 15 PCI/32 20MHz 16Bit 16 2 3 4 5
+ aic7895C 15 PCI/32 20MHz 16Bit 16 2 3 4 5 8
+ aic7896 20 PCI/32 40MHz 16Bit 16 2 3 4 5 6 7 8
+ aic7897 20 PCI/64 40MHz 16Bit 16 2 3 4 5 6 7 8
+ aic7899 20 PCI/64-66 80MHz 16Bit 16 2 3 4 5 6 7 8
+
+ 1. Multiplexed Twin Channel Device - One controller servicing two
+ busses.
+ 2. Multi-function Twin Channel Device - Two controllers on one chip.
+ 3. Command Channel Secondary DMA Engine - Allows scatter gather list
+ and SCB prefetch.
+ 4. 64 Byte SCB Support - Allows disconnected, untagged request table
+ for all possible target/lun combinations.
+ 5. Block Move Instruction Support - Doubles the speed of certain
+ sequencer operations.
+ 6. `Bayonet' style Scatter Gather Engine - Improves S/G prefetch
+ performance.
+ 7. Queuing Registers - Allows queuing of new transactions without
+ pausing the sequencer.
+ 8. Multiple Target IDs - Allows the controller to respond to selection
+ as a target on multiple SCSI IDs.
+
+ Controller Chip Host-Bus Int-Connectors Ext-Connectors Notes
+ --------------------------------------------------------------------------
+ AHA-274X[A] aic7770 EISA SE-50M SE-HD50F
+ AHA-274X[A]W aic7770 EISA SE-HD68F SE-HD68F
+ SE-50M
+ AHA-274X[A]T aic7770 EISA 2 X SE-50M SE-HD50F
+ AHA-2842 aic7770 VL SE-50M SE-HD50F
+ AHA-2940AU aic7860 PCI/32 SE-50M SE-HD50F
+ AVA-2902I aic7860 PCI/32 SE-50M
+ AVA-2902E aic7860 PCI/32 SE-50M
+ AVA-2906 aic7856 PCI/32 SE-50M SE-DB25F
+ APC-7850 aic7850 PCI/32 SE-50M 1
+ AVA-2940 aic7860 PCI/32 SE-50M
+ AHA-2920B aic7860 PCI/32 SE-50M
+ AHA-2930B aic7860 PCI/32 SE-50M
+ AHA-2920C aic7856 PCI/32 SE-50M SE-HD50F
+ AHA-2930C aic7860 PCI/32 SE-50M
+ AHA-2930C aic7860 PCI/32 SE-50M
+ AHA-2910C aic7860 PCI/32 SE-50M
+ AHA-2915C aic7860 PCI/32 SE-50M
+ AHA-2940AU/CN aic7860 PCI/32 SE-50M SE-HD50F
+ AHA-2944W aic7870 PCI/32 HVD-HD68F HVD-HD68F
+ HVD-50M
+ AHA-3940W aic7870 PCI/32 2 X SE-HD68F SE-HD68F 2
+ AHA-2940UW aic7880 PCI/32 SE-HD68F
+ SE-50M SE-HD68F
+ AHA-2940U aic7880 PCI/32 SE-50M SE-HD50F
+ AHA-2940D aic7880 PCI/32
+ aHA-2940 A/T aic7880 PCI/32
+ AHA-2940D A/T aic7880 PCI/32
+ AHA-3940UW aic7880 PCI/32 2 X SE-HD68F SE-HD68F 3
+ AHA-3940UWD aic7880 PCI/32 2 X SE-HD68F 2 X SE-VHD68F 3
+ AHA-3940U aic7880 PCI/32 2 X SE-50M SE-HD50F 3
+ AHA-2944UW aic7880 PCI/32 HVD-HD68F HVD-HD68F
+ HVD-50M
+ AHA-3944UWD aic7880 PCI/32 2 X HVD-HD68F 2 X HVD-VHD68F 3
+ AHA-4944UW aic7880 PCI/32
+ AHA-2930UW aic7880 PCI/32
+ AHA-2940UW Pro aic7880 PCI/32 SE-HD68F SE-HD68F 4
+ SE-50M
+ AHA-2940UW/CN aic7880 PCI/32
+ AHA-2940UDual aic7895 PCI/32
+ AHA-2940UWDual aic7895 PCI/32
+ AHA-3940UWD aic7895 PCI/32
+ AHA-3940AUW aic7895 PCI/32
+ AHA-3940AUWD aic7895 PCI/32
+ AHA-3940AU aic7895 PCI/32
+ AHA-3944AUWD aic7895 PCI/32 2 X HVD-HD68F 2 X HVD-VHD68F
+ AHA-2940U2B aic7890 PCI/32 LVD-HD68F LVD-HD68F
+ AHA-2940U2 OEM aic7891 PCI/64
+ AHA-2940U2W aic7890 PCI/32 LVD-HD68F LVD-HD68F
+ SE-HD68F
+ SE-50M
+ AHA-2950U2B aic7891 PCI/64 LVD-HD68F LVD-HD68F
+ AHA-2930U2 aic7890 PCI/32 LVD-HD68F SE-HD50F
+ SE-50M
+ AHA-3950U2B aic7897 PCI/64
+ AHA-3950U2D aic7897 PCI/64
+ AHA-29160 aic7892 PCI/64-66
+ AHA-29160 CPQ aic7892 PCI/64-66
+ AHA-29160N aic7892 PCI/32 LVD-HD68F SE-HD50F
+ SE-50M
+ AHA-29160LP aic7892 PCI/64-66
+ AHA-19160 aic7892 PCI/64-66
+ AHA-29150LP aic7892 PCI/64-66
+ AHA-29130LP aic7892 PCI/64-66
+ AHA-3960D aic7899 PCI/64-66 2 X LVD-HD68F 2 X LVD-VHD68F
+ LVD-50M
+ AHA-3960D CPQ aic7899 PCI/64-66 2 X LVD-HD68F 2 X LVD-VHD68F
+ LVD-50M
+ AHA-39160 aic7899 PCI/64-66 2 X LVD-HD68F 2 X LVD-VHD68F
+ LVD-50M
+
+ 1. No BIOS support
+ 2. DEC21050 PCI-PCI bridge with multiple controller chips on secondary bus
+ 3. DEC2115X PCI-PCI bridge with multiple controller chips on secondary bus
+ 4. All three SCSI connectors may be used simultaneously without
+ SCSI "stub" effects.
+
+2. Version History
+ 7.0 (4th August, 2005)
+ - Updated driver to use SCSI transport class infrastructure
+ - Upported sequencer and core fixes from last adaptec released
+ version of the driver.
+ 6.2.36 (June 3rd, 2003)
+ - Correct code that disables PCI parity error checking.
+ - Correct and simplify handling of the ignore wide residue
+ message. The previous code would fail to report a residual
+ if the transaction data length was even and we received
+ an IWR message.
+ - Add support for the 2.5.X EISA framework.
+ - Update for change in 2.5.X SCSI proc FS interface.
+ - Correct Domain Validation command-line option parsing.
+ - When negotiation async via an 8bit WDTR message, send
+ an SDTR with an offset of 0 to be sure the target
+ knows we are async. This works around a firmware defect
+ in the Quantum Atlas 10K.
+ - Clear PCI error state during driver attach so that we
+ don't disable memory mapped I/O due to a stray write
+ by some other driver probe that occurred before we
+ claimed the controller.
+
+ 6.2.35 (May 14th, 2003)
+ - Fix a few GCC 3.3 compiler warnings.
+ - Correct operation on EISA Twin Channel controller.
+ - Add support for 2.5.X's scsi_report_device_reset().
+
+ 6.2.34 (May 5th, 2003)
+ - Fix locking regression introduced in 6.2.29 that
+ could cause a lock order reversal between the io_request_lock
+ and our per-softc lock. This was only possible on RH9,
+ SuSE, and kernel.org 2.4.X kernels.
+
+ 6.2.33 (April 30th, 2003)
+ - Dynamically disable PCI parity error reporting after
+ 10 errors are reported to the user. These errors are
+ the result of some other device issuing PCI transactions
+ with bad parity. Once the user has been informed of the
+ problem, continuing to report the errors just degrades
+ our performance.
+
+ 6.2.32 (March 28th, 2003)
+ - Dynamically sized S/G lists to avoid SCSI malloc
+ pool fragmentation and SCSI mid-layer deadlock.
+
+ 6.2.28 (January 20th, 2003)
+ - Domain Validation Fixes
+ - Add ability to disable PCI parity error checking.
+ - Enhanced Memory Mapped I/O probe
+
+ 6.2.20 (November 7th, 2002)
+ - Added Domain Validation.
+
+3. Command Line Options
+
+ WARNING: ALTERING OR ADDING THESE DRIVER PARAMETERS
+ INCORRECTLY CAN RENDER YOUR SYSTEM INOPERABLE.
+ USE THEM WITH CAUTION.
+
+ Put a .conf file in the /etc/modprobe.d directory and add/edit a
+ line containing 'options aic7xxx aic7xxx=[command[,command...]]' where
+ 'command' is one or more of the following:
+ -----------------------------------------------------------------
+ Option: verbose
+ Definition: enable additional informative messages during
+ driver operation.
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: debug:[value]
+ Definition: Enables various levels of debugging information
+ Possible Values: 0x0000 = no debugging, 0xffff = full debugging
+ Default Value: 0x0000
+ -----------------------------------------------------------------
+ Option: no_probe
+ Option: probe_eisa_vl
+ Definition: Do not probe for EISA/VLB controllers.
+ This is a toggle. If the driver is compiled
+ to not probe EISA/VLB controllers by default,
+ specifying "no_probe" will enable this probing.
+ If the driver is compiled to probe EISA/VLB
+ controllers by default, specifying "no_probe"
+ will disable this probing.
+ Possible Values: This option is a toggle
+ Default Value: EISA/VLB probing is disabled by default.
+ -----------------------------------------------------------------
+ Option: pci_parity
+ Definition: Toggles the detection of PCI parity errors.
+ On many motherboards with VIA chipsets,
+ PCI parity is not generated correctly on the
+ PCI bus. It is impossible for the hardware to
+ differentiate between these "spurious" parity
+ errors and real parity errors. The symptom of
+ this problem is a stream of the message:
+ "scsi0: Data Parity Error Detected during address or write data phase"
+ output by the driver.
+ Possible Values: This option is a toggle
+ Default Value: PCI Parity Error reporting is disabled
+ -----------------------------------------------------------------
+ Option: no_reset
+ Definition: Do not reset the bus during the initial probe
+ phase
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: extended
+ Definition: Force extended translation on the controller
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: periodic_otag
+ Definition: Send an ordered tag periodically to prevent
+ tag starvation. Needed for some older devices
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: reverse_scan
+ Definition: Probe the scsi bus in reverse order, starting
+ with target 15
+ Possible Values: This option is a flag
+ Default Value: disabled
+ -----------------------------------------------------------------
+ Option: global_tag_depth:[value]
+ Definition: Global tag depth for all targets on all busses.
+ This option sets the default tag depth which
+ may be selectively overridden vi the tag_info
+ option.
+ Possible Values: 1 - 253
+ Default Value: 32
+ -----------------------------------------------------------------
+ Option: tag_info:{{value[,value...]}[,{value[,value...]}...]}
+ Definition: Set the per-target tagged queue depth on a
+ per controller basis. Both controllers and targets
+ may be omitted indicating that they should retain
+ the default tag depth.
+ Examples: tag_info:{{16,32,32,64,8,8,,32,32,32,32,32,32,32,32,32}
+ On Controller 0
+ specifies a tag depth of 16 for target 0
+ specifies a tag depth of 64 for target 3
+ specifies a tag depth of 8 for targets 4 and 5
+ leaves target 6 at the default
+ specifies a tag depth of 32 for targets 1,2,7-15
+ All other targets retain the default depth.
+
+ tag_info:{{},{32,,32}}
+ On Controller 1
+ specifies a tag depth of 32 for targets 0 and 2
+ All other targets retain the default depth.
+
+ Possible Values: 1 - 253
+ Default Value: 32
+ -----------------------------------------------------------------
+ Option: seltime:[value]
+ Definition: Specifies the selection timeout value
+ Possible Values: 0 = 256ms, 1 = 128ms, 2 = 64ms, 3 = 32ms
+ Default Value: 0
+ -----------------------------------------------------------------
+ Option: dv: {value[,value...]}
+ Definition: Set Domain Validation Policy on a per-controller basis.
+ Controllers may be omitted indicating that
+ they should retain the default read streaming setting.
+ Example: dv:{-1,0,,1,1,0}
+ On Controller 0 leave DV at its default setting.
+ On Controller 1 disable DV.
+ Skip configuration on Controller 2.
+ On Controllers 3 and 4 enable DV.
+ On Controller 5 disable DV.
+
+ Possible Values: < 0 Use setting from serial EEPROM.
+ 0 Disable DV
+ > 0 Enable DV
+
+ Default Value: SCSI-Select setting on controllers with a SCSI Select
+ option for DV. Otherwise, on for controllers supporting
+ U160 speeds and off for all other controller types.
+ -----------------------------------------------------------------
+
+ Example:
+ 'options aic7xxx aic7xxx=verbose,no_probe,tag_info:{{},{,,10}},seltime:1'
+ enables verbose logging, Disable EISA/VLB probing,
+ and set tag depth on Controller 1/Target 2 to 10 tags.
+
+4. Adaptec Customer Support
+
+ A Technical Support Identification (TSID) Number is required for
+ Adaptec technical support.
+ - The 12-digit TSID can be found on the white barcode-type label
+ included inside the box with your product. The TSID helps us
+ provide more efficient service by accurately identifying your
+ product and support status.
+
+ Support Options
+ - Search the Adaptec Support Knowledgebase (ASK) at
+ http://ask.adaptec.com for articles, troubleshooting tips, and
+ frequently asked questions about your product.
+ - For support via Email, submit your question to Adaptec's
+ Technical Support Specialists at http://ask.adaptec.com/.
+
+ North America
+ - Visit our Web site at http://www.adaptec.com/.
+ - For information about Adaptec's support options, call
+ 408-957-2550, 24 hours a day, 7 days a week.
+ - To speak with a Technical Support Specialist,
+ * For hardware products, call 408-934-7274,
+ Monday to Friday, 3:00 am to 5:00 pm, PDT.
+ * For RAID and Fibre Channel products, call 321-207-2000,
+ Monday to Friday, 3:00 am to 5:00 pm, PDT.
+ To expedite your service, have your computer with you.
+ - To order Adaptec products, including accessories and cables,
+ call 408-957-7274. To order cables online go to
+ http://www.adaptec.com/buy-cables/.
+
+ Europe
+ - Visit our Web site at http://www.adaptec.com/en-US/_common/world_index.
+ - To speak with a Technical Support Specialist, call, or email,
+ * German: +49 89 4366 5522, Monday-Friday, 9:00-17:00 CET,
+ http://ask-de.adaptec.com/.
+ * French: +49 89 4366 5533, Monday-Friday, 9:00-17:00 CET,
+ http://ask-fr.adaptec.com/.
+ * English: +49 89 4366 5544, Monday-Friday, 9:00-17:00 GMT,
+ http://ask.adaptec.com/.
+ - You can order Adaptec cables online at
+ http://www.adaptec.com/buy-cables/.
+
+ Japan
+ - Visit our web site at http://www.adaptec.co.jp/.
+ - To speak with a Technical Support Specialist, call
+ +81 3 5308 6120, Monday-Friday, 9:00 a.m. to 12:00 p.m.,
+ 1:00 p.m. to 6:00 p.m.
+
+-------------------------------------------------------------------
+/*
+ * Copyright (c) 2003 Adaptec Inc. 691 S. Milpitas Blvd., Milpitas CA 95035 USA.
+ * All rights reserved.
+ *
+ * You are permitted to redistribute, use and modify this README file in whole
+ * or in part in conjunction with redistribution of software governed by the
+ * General Public License, provided that the following conditions are met:
+ * 1. Redistributions of README file must retain the above copyright
+ * notice, this list of conditions, and the following disclaimer,
+ * without modification.
+ * 2. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ * 3. Modifications or new contributions must be attributed in a copyright
+ * notice identifying the author ("Contributor") and added below the
+ * original copyright notice. The copyright notice is for purposes of
+ * identifying contributors and should not be deemed as permission to alter
+ * the permissions given by Adaptec.
+ *
+ * THIS README FILE IS PROVIDED BY ADAPTEC AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, ANY
+ * WARRANTIES OF NON-INFRINGEMENT OR THE IMPLIED WARRANTIES OF MERCHANTABILITY
+ * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
+ * ADAPTEC OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS README
+ * FILE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
diff --git a/Documentation/scsi/arcmsr_spec.txt b/Documentation/scsi/arcmsr_spec.txt
new file mode 100644
index 000000000..45d9482c1
--- /dev/null
+++ b/Documentation/scsi/arcmsr_spec.txt
@@ -0,0 +1,574 @@
+*******************************************************************************
+** ARECA FIRMWARE SPEC
+*******************************************************************************
+** Usage of IOP331 adapter
+** (All In/Out is in IOP331's view)
+** 1. Message 0 --> InitThread message and return code
+** 2. Doorbell is used for RS-232 emulation
+** inDoorBell : bit0 -- data in ready
+** (DRIVER DATA WRITE OK)
+** bit1 -- data out has been read
+** (DRIVER DATA READ OK)
+** outDooeBell: bit0 -- data out ready
+** (IOP331 DATA WRITE OK)
+** bit1 -- data in has been read
+** (IOP331 DATA READ OK)
+** 3. Index Memory Usage
+** offset 0xf00 : for RS232 out (request buffer)
+** offset 0xe00 : for RS232 in (scratch buffer)
+** offset 0xa00 : for inbound message code message_rwbuffer
+** (driver send to IOP331)
+** offset 0xa00 : for outbound message code message_rwbuffer
+** (IOP331 send to driver)
+** 4. RS-232 emulation
+** Currently 128 byte buffer is used
+** 1st uint32_t : Data length (1--124)
+** Byte 4--127 : Max 124 bytes of data
+** 5. PostQ
+** All SCSI Command must be sent through postQ:
+** (inbound queue port) Request frame must be 32 bytes aligned
+** #bit27--bit31 => flag for post ccb
+** #bit0--bit26 => real address (bit27--bit31) of post arcmsr_cdb
+** bit31 :
+** 0 : 256 bytes frame
+** 1 : 512 bytes frame
+** bit30 :
+** 0 : normal request
+** 1 : BIOS request
+** bit29 : reserved
+** bit28 : reserved
+** bit27 : reserved
+** ---------------------------------------------------------------------------
+** (outbount queue port) Request reply
+** #bit27--bit31
+** => flag for reply
+** #bit0--bit26
+** => real address (bit27--bit31) of reply arcmsr_cdb
+** bit31 : must be 0 (for this type of reply)
+** bit30 : reserved for BIOS handshake
+** bit29 : reserved
+** bit28 :
+** 0 : no error, ignore AdapStatus/DevStatus/SenseData
+** 1 : Error, error code in AdapStatus/DevStatus/SenseData
+** bit27 : reserved
+** 6. BIOS request
+** All BIOS request is the same with request from PostQ
+** Except :
+** Request frame is sent from configuration space
+** offset: 0x78 : Request Frame (bit30 == 1)
+** offset: 0x18 : writeonly to generate
+** IRQ to IOP331
+** Completion of request:
+** (bit30 == 0, bit28==err flag)
+** 7. Definition of SGL entry (structure)
+** 8. Message1 Out - Diag Status Code (????)
+** 9. Message0 message code :
+** 0x00 : NOP
+** 0x01 : Get Config
+** ->offset 0xa00 :for outbound message code message_rwbuffer
+** (IOP331 send to driver)
+** Signature 0x87974060(4)
+** Request len 0x00000200(4)
+** numbers of queue 0x00000100(4)
+** SDRAM Size 0x00000100(4)-->256 MB
+** IDE Channels 0x00000008(4)
+** vendor 40 bytes char
+** model 8 bytes char
+** FirmVer 16 bytes char
+** Device Map 16 bytes char
+** FirmwareVersion DWORD <== Added for checking of
+** new firmware capability
+** 0x02 : Set Config
+** ->offset 0xa00 :for inbound message code message_rwbuffer
+** (driver send to IOP331)
+** Signature 0x87974063(4)
+** UPPER32 of Request Frame (4)-->Driver Only
+** 0x03 : Reset (Abort all queued Command)
+** 0x04 : Stop Background Activity
+** 0x05 : Flush Cache
+** 0x06 : Start Background Activity
+** (re-start if background is halted)
+** 0x07 : Check If Host Command Pending
+** (Novell May Need This Function)
+** 0x08 : Set controller time
+** ->offset 0xa00 : for inbound message code message_rwbuffer
+** (driver to IOP331)
+** byte 0 : 0xaa <-- signature
+** byte 1 : 0x55 <-- signature
+** byte 2 : year (04)
+** byte 3 : month (1..12)
+** byte 4 : date (1..31)
+** byte 5 : hour (0..23)
+** byte 6 : minute (0..59)
+** byte 7 : second (0..59)
+*******************************************************************************
+*******************************************************************************
+** RS-232 Interface for Areca Raid Controller
+** The low level command interface is exclusive with VT100 terminal
+** --------------------------------------------------------------------
+** 1. Sequence of command execution
+** --------------------------------------------------------------------
+** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61)
+** (B) Command block : variable length of data including length,
+** command code, data and checksum byte
+** (C) Return data : variable length of data
+** --------------------------------------------------------------------
+** 2. Command block
+** --------------------------------------------------------------------
+** (A) 1st byte : command block length (low byte)
+** (B) 2nd byte : command block length (high byte)
+** note ..command block length shouldn't > 2040 bytes,
+** length excludes these two bytes
+** (C) 3rd byte : command code
+** (D) 4th and following bytes : variable length data bytes
+** depends on command code
+** (E) last byte : checksum byte (sum of 1st byte until last data byte)
+** --------------------------------------------------------------------
+** 3. Command code and associated data
+** --------------------------------------------------------------------
+** The following are command code defined in raid controller Command
+** code 0x10--0x1? are used for system level management,
+** no password checking is needed and should be implemented in separate
+** well controlled utility and not for end user access.
+** Command code 0x20--0x?? always check the password,
+** password must be entered to enable these command.
+** enum
+** {
+** GUI_SET_SERIAL=0x10,
+** GUI_SET_VENDOR,
+** GUI_SET_MODEL,
+** GUI_IDENTIFY,
+** GUI_CHECK_PASSWORD,
+** GUI_LOGOUT,
+** GUI_HTTP,
+** GUI_SET_ETHERNET_ADDR,
+** GUI_SET_LOGO,
+** GUI_POLL_EVENT,
+** GUI_GET_EVENT,
+** GUI_GET_HW_MONITOR,
+** // GUI_QUICK_CREATE=0x20, (function removed)
+** GUI_GET_INFO_R=0x20,
+** GUI_GET_INFO_V,
+** GUI_GET_INFO_P,
+** GUI_GET_INFO_S,
+** GUI_CLEAR_EVENT,
+** GUI_MUTE_BEEPER=0x30,
+** GUI_BEEPER_SETTING,
+** GUI_SET_PASSWORD,
+** GUI_HOST_INTERFACE_MODE,
+** GUI_REBUILD_PRIORITY,
+** GUI_MAX_ATA_MODE,
+** GUI_RESET_CONTROLLER,
+** GUI_COM_PORT_SETTING,
+** GUI_NO_OPERATION,
+** GUI_DHCP_IP,
+** GUI_CREATE_PASS_THROUGH=0x40,
+** GUI_MODIFY_PASS_THROUGH,
+** GUI_DELETE_PASS_THROUGH,
+** GUI_IDENTIFY_DEVICE,
+** GUI_CREATE_RAIDSET=0x50,
+** GUI_DELETE_RAIDSET,
+** GUI_EXPAND_RAIDSET,
+** GUI_ACTIVATE_RAIDSET,
+** GUI_CREATE_HOT_SPARE,
+** GUI_DELETE_HOT_SPARE,
+** GUI_CREATE_VOLUME=0x60,
+** GUI_MODIFY_VOLUME,
+** GUI_DELETE_VOLUME,
+** GUI_START_CHECK_VOLUME,
+** GUI_STOP_CHECK_VOLUME
+** };
+** Command description :
+** GUI_SET_SERIAL : Set the controller serial#
+** byte 0,1 : length
+** byte 2 : command code 0x10
+** byte 3 : password length (should be 0x0f)
+** byte 4-0x13 : should be "ArEcATecHnoLogY"
+** byte 0x14--0x23 : Serial number string (must be 16 bytes)
+** GUI_SET_VENDOR : Set vendor string for the controller
+** byte 0,1 : length
+** byte 2 : command code 0x11
+** byte 3 : password length (should be 0x08)
+** byte 4-0x13 : should be "ArEcAvAr"
+** byte 0x14--0x3B : vendor string (must be 40 bytes)
+** GUI_SET_MODEL : Set the model name of the controller
+** byte 0,1 : length
+** byte 2 : command code 0x12
+** byte 3 : password length (should be 0x08)
+** byte 4-0x13 : should be "ArEcAvAr"
+** byte 0x14--0x1B : model string (must be 8 bytes)
+** GUI_IDENTIFY : Identify device
+** byte 0,1 : length
+** byte 2 : command code 0x13
+** return "Areca RAID Subsystem "
+** GUI_CHECK_PASSWORD : Verify password
+** byte 0,1 : length
+** byte 2 : command code 0x14
+** byte 3 : password length
+** byte 4-0x?? : user password to be checked
+** GUI_LOGOUT : Logout GUI (force password checking on next command)
+** byte 0,1 : length
+** byte 2 : command code 0x15
+** GUI_HTTP : HTTP interface (reserved for Http proxy service)(0x16)
+**
+** GUI_SET_ETHERNET_ADDR : Set the ethernet MAC address
+** byte 0,1 : length
+** byte 2 : command code 0x17
+** byte 3 : password length (should be 0x08)
+** byte 4-0x13 : should be "ArEcAvAr"
+** byte 0x14--0x19 : Ethernet MAC address (must be 6 bytes)
+** GUI_SET_LOGO : Set logo in HTTP
+** byte 0,1 : length
+** byte 2 : command code 0x18
+** byte 3 : Page# (0/1/2/3) (0xff --> clear OEM logo)
+** byte 4/5/6/7 : 0x55/0xaa/0xa5/0x5a
+** byte 8 : TITLE.JPG data (each page must be 2000 bytes)
+** note page0 1st 2 byte must be
+** actual length of the JPG file
+** GUI_POLL_EVENT : Poll If Event Log Changed
+** byte 0,1 : length
+** byte 2 : command code 0x19
+** GUI_GET_EVENT : Read Event
+** byte 0,1 : length
+** byte 2 : command code 0x1a
+** byte 3 : Event Page (0:1st page/1/2/3:last page)
+** GUI_GET_HW_MONITOR : Get HW monitor data
+** byte 0,1 : length
+** byte 2 : command code 0x1b
+** byte 3 : # of FANs(example 2)
+** byte 4 : # of Voltage sensor(example 3)
+** byte 5 : # of temperature sensor(example 2)
+** byte 6 : # of power
+** byte 7/8 : Fan#0 (RPM)
+** byte 9/10 : Fan#1
+** byte 11/12 : Voltage#0 original value in *1000
+** byte 13/14 : Voltage#0 value
+** byte 15/16 : Voltage#1 org
+** byte 17/18 : Voltage#1
+** byte 19/20 : Voltage#2 org
+** byte 21/22 : Voltage#2
+** byte 23 : Temp#0
+** byte 24 : Temp#1
+** byte 25 : Power indicator (bit0 : power#0,
+** bit1 : power#1)
+** byte 26 : UPS indicator
+** GUI_QUICK_CREATE : Quick create raid/volume set
+** byte 0,1 : length
+** byte 2 : command code 0x20
+** byte 3/4/5/6 : raw capacity
+** byte 7 : raid level
+** byte 8 : stripe size
+** byte 9 : spare
+** byte 10/11/12/13: device mask (the devices to create raid/volume)
+** This function is removed, application like
+** to implement quick create function
+** need to use GUI_CREATE_RAIDSET and GUI_CREATE_VOLUMESET function.
+** GUI_GET_INFO_R : Get Raid Set Information
+** byte 0,1 : length
+** byte 2 : command code 0x20
+** byte 3 : raidset#
+** typedef struct sGUI_RAIDSET
+** {
+** BYTE grsRaidSetName[16];
+** DWORD grsCapacity;
+** DWORD grsCapacityX;
+** DWORD grsFailMask;
+** BYTE grsDevArray[32];
+** BYTE grsMemberDevices;
+** BYTE grsNewMemberDevices;
+** BYTE grsRaidState;
+** BYTE grsVolumes;
+** BYTE grsVolumeList[16];
+** BYTE grsRes1;
+** BYTE grsRes2;
+** BYTE grsRes3;
+** BYTE grsFreeSegments;
+** DWORD grsRawStripes[8];
+** DWORD grsRes4;
+** DWORD grsRes5; // Total to 128 bytes
+** DWORD grsRes6; // Total to 128 bytes
+** } sGUI_RAIDSET, *pGUI_RAIDSET;
+** GUI_GET_INFO_V : Get Volume Set Information
+** byte 0,1 : length
+** byte 2 : command code 0x21
+** byte 3 : volumeset#
+** typedef struct sGUI_VOLUMESET
+** {
+** BYTE gvsVolumeName[16]; // 16
+** DWORD gvsCapacity;
+** DWORD gvsCapacityX;
+** DWORD gvsFailMask;
+** DWORD gvsStripeSize;
+** DWORD gvsNewFailMask;
+** DWORD gvsNewStripeSize;
+** DWORD gvsVolumeStatus;
+** DWORD gvsProgress; // 32
+** sSCSI_ATTR gvsScsi;
+** BYTE gvsMemberDisks;
+** BYTE gvsRaidLevel; // 8
+** BYTE gvsNewMemberDisks;
+** BYTE gvsNewRaidLevel;
+** BYTE gvsRaidSetNumber;
+** BYTE gvsRes0; // 4
+** BYTE gvsRes1[4]; // 64 bytes
+** } sGUI_VOLUMESET, *pGUI_VOLUMESET;
+** GUI_GET_INFO_P : Get Physical Drive Information
+** byte 0,1 : length
+** byte 2 : command code 0x22
+** byte 3 : drive # (from 0 to max-channels - 1)
+** typedef struct sGUI_PHY_DRV
+** {
+** BYTE gpdModelName[40];
+** BYTE gpdSerialNumber[20];
+** BYTE gpdFirmRev[8];
+** DWORD gpdCapacity;
+** DWORD gpdCapacityX; // Reserved for expansion
+** BYTE gpdDeviceState;
+** BYTE gpdPioMode;
+** BYTE gpdCurrentUdmaMode;
+** BYTE gpdUdmaMode;
+** BYTE gpdDriveSelect;
+** BYTE gpdRaidNumber; // 0xff if not belongs to a raid set
+** sSCSI_ATTR gpdScsi;
+** BYTE gpdReserved[40]; // Total to 128 bytes
+** } sGUI_PHY_DRV, *pGUI_PHY_DRV;
+** GUI_GET_INFO_S : Get System Information
+** byte 0,1 : length
+** byte 2 : command code 0x23
+** typedef struct sCOM_ATTR
+** {
+** BYTE comBaudRate;
+** BYTE comDataBits;
+** BYTE comStopBits;
+** BYTE comParity;
+** BYTE comFlowControl;
+** } sCOM_ATTR, *pCOM_ATTR;
+** typedef struct sSYSTEM_INFO
+** {
+** BYTE gsiVendorName[40];
+** BYTE gsiSerialNumber[16];
+** BYTE gsiFirmVersion[16];
+** BYTE gsiBootVersion[16];
+** BYTE gsiMbVersion[16];
+** BYTE gsiModelName[8];
+** BYTE gsiLocalIp[4];
+** BYTE gsiCurrentIp[4];
+** DWORD gsiTimeTick;
+** DWORD gsiCpuSpeed;
+** DWORD gsiICache;
+** DWORD gsiDCache;
+** DWORD gsiScache;
+** DWORD gsiMemorySize;
+** DWORD gsiMemorySpeed;
+** DWORD gsiEvents;
+** BYTE gsiMacAddress[6];
+** BYTE gsiDhcp;
+** BYTE gsiBeeper;
+** BYTE gsiChannelUsage;
+** BYTE gsiMaxAtaMode;
+** BYTE gsiSdramEcc; // 1:if ECC enabled
+** BYTE gsiRebuildPriority;
+** sCOM_ATTR gsiComA; // 5 bytes
+** sCOM_ATTR gsiComB; // 5 bytes
+** BYTE gsiIdeChannels;
+** BYTE gsiScsiHostChannels;
+** BYTE gsiIdeHostChannels;
+** BYTE gsiMaxVolumeSet;
+** BYTE gsiMaxRaidSet;
+** BYTE gsiEtherPort; // 1:if ether net port supported
+** BYTE gsiRaid6Engine; // 1:Raid6 engine supported
+** BYTE gsiRes[75];
+** } sSYSTEM_INFO, *pSYSTEM_INFO;
+** GUI_CLEAR_EVENT : Clear System Event
+** byte 0,1 : length
+** byte 2 : command code 0x24
+** GUI_MUTE_BEEPER : Mute current beeper
+** byte 0,1 : length
+** byte 2 : command code 0x30
+** GUI_BEEPER_SETTING : Disable beeper
+** byte 0,1 : length
+** byte 2 : command code 0x31
+** byte 3 : 0->disable, 1->enable
+** GUI_SET_PASSWORD : Change password
+** byte 0,1 : length
+** byte 2 : command code 0x32
+** byte 3 : pass word length ( must <= 15 )
+** byte 4 : password (must be alpha-numerical)
+** GUI_HOST_INTERFACE_MODE : Set host interface mode
+** byte 0,1 : length
+** byte 2 : command code 0x33
+** byte 3 : 0->Independent, 1->cluster
+** GUI_REBUILD_PRIORITY : Set rebuild priority
+** byte 0,1 : length
+** byte 2 : command code 0x34
+** byte 3 : 0/1/2/3 (low->high)
+** GUI_MAX_ATA_MODE : Set maximum ATA mode to be used
+** byte 0,1 : length
+** byte 2 : command code 0x35
+** byte 3 : 0/1/2/3 (133/100/66/33)
+** GUI_RESET_CONTROLLER : Reset Controller
+** byte 0,1 : length
+** byte 2 : command code 0x36
+** *Response with VT100 screen (discard it)
+** GUI_COM_PORT_SETTING : COM port setting
+** byte 0,1 : length
+** byte 2 : command code 0x37
+** byte 3 : 0->COMA (term port),
+** 1->COMB (debug port)
+** byte 4 : 0/1/2/3/4/5/6/7
+** (1200/2400/4800/9600/19200/38400/57600/115200)
+** byte 5 : data bit
+** (0:7 bit, 1:8 bit : must be 8 bit)
+** byte 6 : stop bit (0:1, 1:2 stop bits)
+** byte 7 : parity (0:none, 1:off, 2:even)
+** byte 8 : flow control
+** (0:none, 1:xon/xoff, 2:hardware => must use none)
+** GUI_NO_OPERATION : No operation
+** byte 0,1 : length
+** byte 2 : command code 0x38
+** GUI_DHCP_IP : Set DHCP option and local IP address
+** byte 0,1 : length
+** byte 2 : command code 0x39
+** byte 3 : 0:dhcp disabled, 1:dhcp enabled
+** byte 4/5/6/7 : IP address
+** GUI_CREATE_PASS_THROUGH : Create pass through disk
+** byte 0,1 : length
+** byte 2 : command code 0x40
+** byte 3 : device #
+** byte 4 : scsi channel (0/1)
+** byte 5 : scsi id (0-->15)
+** byte 6 : scsi lun (0-->7)
+** byte 7 : tagged queue (1 : enabled)
+** byte 8 : cache mode (1 : enabled)
+** byte 9 : max speed (0/1/2/3/4,
+** async/20/40/80/160 for scsi)
+** (0/1/2/3/4, 33/66/100/133/150 for ide )
+** GUI_MODIFY_PASS_THROUGH : Modify pass through disk
+** byte 0,1 : length
+** byte 2 : command code 0x41
+** byte 3 : device #
+** byte 4 : scsi channel (0/1)
+** byte 5 : scsi id (0-->15)
+** byte 6 : scsi lun (0-->7)
+** byte 7 : tagged queue (1 : enabled)
+** byte 8 : cache mode (1 : enabled)
+** byte 9 : max speed (0/1/2/3/4,
+** async/20/40/80/160 for scsi)
+** (0/1/2/3/4, 33/66/100/133/150 for ide )
+** GUI_DELETE_PASS_THROUGH : Delete pass through disk
+** byte 0,1 : length
+** byte 2 : command code 0x42
+** byte 3 : device# to be deleted
+** GUI_IDENTIFY_DEVICE : Identify Device
+** byte 0,1 : length
+** byte 2 : command code 0x43
+** byte 3 : Flash Method
+** (0:flash selected, 1:flash not selected)
+** byte 4/5/6/7 : IDE device mask to be flashed
+** note .... no response data available
+** GUI_CREATE_RAIDSET : Create Raid Set
+** byte 0,1 : length
+** byte 2 : command code 0x50
+** byte 3/4/5/6 : device mask
+** byte 7-22 : raidset name (if byte 7 == 0:use default)
+** GUI_DELETE_RAIDSET : Delete Raid Set
+** byte 0,1 : length
+** byte 2 : command code 0x51
+** byte 3 : raidset#
+** GUI_EXPAND_RAIDSET : Expand Raid Set
+** byte 0,1 : length
+** byte 2 : command code 0x52
+** byte 3 : raidset#
+** byte 4/5/6/7 : device mask for expansion
+** byte 8/9/10 : (8:0 no change, 1 change, 0xff:terminate,
+** 9:new raid level,
+** 10:new stripe size
+** 0/1/2/3/4/5->4/8/16/32/64/128K )
+** byte 11/12/13 : repeat for each volume in the raidset
+** GUI_ACTIVATE_RAIDSET : Activate incomplete raid set
+** byte 0,1 : length
+** byte 2 : command code 0x53
+** byte 3 : raidset#
+** GUI_CREATE_HOT_SPARE : Create hot spare disk
+** byte 0,1 : length
+** byte 2 : command code 0x54
+** byte 3/4/5/6 : device mask for hot spare creation
+** GUI_DELETE_HOT_SPARE : Delete hot spare disk
+** byte 0,1 : length
+** byte 2 : command code 0x55
+** byte 3/4/5/6 : device mask for hot spare deletion
+** GUI_CREATE_VOLUME : Create volume set
+** byte 0,1 : length
+** byte 2 : command code 0x60
+** byte 3 : raidset#
+** byte 4-19 : volume set name
+** (if byte4 == 0, use default)
+** byte 20-27 : volume capacity (blocks)
+** byte 28 : raid level
+** byte 29 : stripe size
+** (0/1/2/3/4/5->4/8/16/32/64/128K)
+** byte 30 : channel
+** byte 31 : ID
+** byte 32 : LUN
+** byte 33 : 1 enable tag
+** byte 34 : 1 enable cache
+** byte 35 : speed
+** (0/1/2/3/4->async/20/40/80/160 for scsi)
+** (0/1/2/3/4->33/66/100/133/150 for IDE )
+** byte 36 : 1 to select quick init
+**
+** GUI_MODIFY_VOLUME : Modify volume Set
+** byte 0,1 : length
+** byte 2 : command code 0x61
+** byte 3 : volumeset#
+** byte 4-19 : new volume set name
+** (if byte4 == 0, not change)
+** byte 20-27 : new volume capacity (reserved)
+** byte 28 : new raid level
+** byte 29 : new stripe size
+** (0/1/2/3/4/5->4/8/16/32/64/128K)
+** byte 30 : new channel
+** byte 31 : new ID
+** byte 32 : new LUN
+** byte 33 : 1 enable tag
+** byte 34 : 1 enable cache
+** byte 35 : speed
+** (0/1/2/3/4->async/20/40/80/160 for scsi)
+** (0/1/2/3/4->33/66/100/133/150 for IDE )
+** GUI_DELETE_VOLUME : Delete volume set
+** byte 0,1 : length
+** byte 2 : command code 0x62
+** byte 3 : volumeset#
+** GUI_START_CHECK_VOLUME : Start volume consistency check
+** byte 0,1 : length
+** byte 2 : command code 0x63
+** byte 3 : volumeset#
+** GUI_STOP_CHECK_VOLUME : Stop volume consistency check
+** byte 0,1 : length
+** byte 2 : command code 0x64
+** ---------------------------------------------------------------------
+** 4. Returned data
+** ---------------------------------------------------------------------
+** (A) Header : 3 bytes sequence (0x5E, 0x01, 0x61)
+** (B) Length : 2 bytes
+** (low byte 1st, excludes length and checksum byte)
+** (C) status or data :
+** <1> If length == 1 ==> 1 byte status code
+** #define GUI_OK 0x41
+** #define GUI_RAIDSET_NOT_NORMAL 0x42
+** #define GUI_VOLUMESET_NOT_NORMAL 0x43
+** #define GUI_NO_RAIDSET 0x44
+** #define GUI_NO_VOLUMESET 0x45
+** #define GUI_NO_PHYSICAL_DRIVE 0x46
+** #define GUI_PARAMETER_ERROR 0x47
+** #define GUI_UNSUPPORTED_COMMAND 0x48
+** #define GUI_DISK_CONFIG_CHANGED 0x49
+** #define GUI_INVALID_PASSWORD 0x4a
+** #define GUI_NO_DISK_SPACE 0x4b
+** #define GUI_CHECKSUM_ERROR 0x4c
+** #define GUI_PASSWORD_REQUIRED 0x4d
+** <2> If length > 1 ==>
+** data block returned from controller
+** and the contents depends on the command code
+** (E) Checksum : checksum of length and status or data byte
+**************************************************************************
diff --git a/Documentation/scsi/bfa.txt b/Documentation/scsi/bfa.txt
new file mode 100644
index 000000000..f2d6e9d17
--- /dev/null
+++ b/Documentation/scsi/bfa.txt
@@ -0,0 +1,82 @@
+Linux driver for Brocade FC/FCOE adapters
+
+
+Supported Hardware
+------------------
+
+bfa 3.0.2.2 driver supports all Brocade FC/FCOE adapters. Below is a list of
+adapter models with corresponding PCIIDs.
+
+ PCIID Model
+
+ 1657:0013:1657:0014 425 4Gbps dual port FC HBA
+ 1657:0013:1657:0014 825 8Gbps PCIe dual port FC HBA
+ 1657:0013:103c:1742 HP 82B 8Gbps PCIedual port FC HBA
+ 1657:0013:103c:1744 HP 42B 4Gbps dual port FC HBA
+ 1657:0017:1657:0014 415 4Gbps single port FC HBA
+ 1657:0017:1657:0014 815 8Gbps single port FC HBA
+ 1657:0017:103c:1741 HP 41B 4Gbps single port FC HBA
+ 1657:0017:103c 1743 HP 81B 8Gbps single port FC HBA
+ 1657:0021:103c:1779 804 8Gbps FC HBA for HP Bladesystem c-class
+
+ 1657:0014:1657:0014 1010 10Gbps single port CNA - FCOE
+ 1657:0014:1657:0014 1020 10Gbps dual port CNA - FCOE
+ 1657:0014:1657:0014 1007 10Gbps dual port CNA - FCOE
+ 1657:0014:1657:0014 1741 10Gbps dual port CNA - FCOE
+
+ 1657:0022:1657:0024 1860 16Gbps FC HBA
+ 1657:0022:1657:0022 1860 10Gbps CNA - FCOE
+
+
+Firmware download
+-----------------
+
+The latest Firmware package for 3.0.2.2 bfa driver can be found at:
+
+http://www.brocade.com/services-support/drivers-downloads/adapters/Linux.page
+
+and then click following respective util package link:
+
+ Version Link
+
+ v3.0.0.0 Linux Adapter Firmware package for RHEL 6.2, SLES 11SP2
+
+
+Configuration & Management utility download
+-------------------------------------------
+
+The latest driver configuration & management utility for 3.0.2.2 bfa driver can
+be found at:
+
+http://www.brocade.com/services-support/drivers-downloads/adapters/Linux.page
+
+and then click following respective util pacakge link
+
+ Version Link
+
+ v3.0.2.0 Linux Adapter Firmware package for RHEL 6.2, SLES 11SP2
+
+
+Documentation
+-------------
+
+The latest Administration's Guide, Installation and Reference Manual,
+Troubleshooting Guide, and Release Notes for the corresponding out-of-box
+driver can be found at:
+
+http://www.brocade.com/services-support/drivers-downloads/adapters/Linux.page
+
+and use the following inbox and out-of-box driver version mapping to find
+the corresponding documentation:
+
+ Inbox Version Out-of-box Version
+
+ v3.0.2.2 v3.0.0.0
+
+
+Support
+-------
+
+For general product and support info, go to the Brocade website at:
+
+http://www.brocade.com/services-support/index.page
diff --git a/Documentation/scsi/bnx2fc.txt b/Documentation/scsi/bnx2fc.txt
new file mode 100644
index 000000000..80823556d
--- /dev/null
+++ b/Documentation/scsi/bnx2fc.txt
@@ -0,0 +1,75 @@
+Operating FCoE using bnx2fc
+===========================
+Broadcom FCoE offload through bnx2fc is full stateful hardware offload that
+cooperates with all interfaces provided by the Linux ecosystem for FC/FCoE and
+SCSI controllers. As such, FCoE functionality, once enabled is largely
+transparent. Devices discovered on the SAN will be registered and unregistered
+automatically with the upper storage layers.
+
+Despite the fact that the Broadcom's FCoE offload is fully offloaded, it does
+depend on the state of the network interfaces to operate. As such, the network
+interface (e.g. eth0) associated with the FCoE offload initiator must be 'up'.
+It is recommended that the network interfaces be configured to be brought up
+automatically at boot time.
+
+Furthermore, the Broadcom FCoE offload solution creates VLAN interfaces to
+support the VLANs that have been discovered for FCoE operation (e.g.
+eth0.1001-fcoe). Do not delete or disable these interfaces or FCoE operation
+will be disrupted.
+
+Driver Usage Model:
+===================
+
+1. Ensure that fcoe-utils package is installed.
+
+2. Configure the interfaces on which bnx2fc driver has to operate on.
+Here are the steps to configure:
+ a. cd /etc/fcoe
+ b. copy cfg-ethx to cfg-eth5 if FCoE has to be enabled on eth5.
+ c. Repeat this for all the interfaces where FCoE has to be enabled.
+ d. Edit all the cfg-eth files to set "no" for DCB_REQUIRED** field, and
+ "yes" for AUTO_VLAN.
+ e. Other configuration parameters should be left as default
+
+3. Ensure that "bnx2fc" is in SUPPORTED_DRIVERS list in /etc/fcoe/config.
+
+4. Start fcoe service. (service fcoe start). If Broadcom devices are present in
+the system, bnx2fc driver would automatically claim the interfaces, starts vlan
+discovery and log into the targets.
+
+5. "Symbolic Name" in 'fcoeadm -i' output would display if bnx2fc has claimed
+the interface.
+Eg:
+[root@bh2 ~]# fcoeadm -i
+ Description: NetXtreme II BCM57712 10 Gigabit Ethernet
+ Revision: 01
+ Manufacturer: Broadcom Corporation
+ Serial Number: 0010186FD558
+ Driver: bnx2x 1.70.00-0
+ Number of Ports: 2
+
+ Symbolic Name: bnx2fc v1.0.5 over eth5.4
+ OS Device Name: host11
+ Node Name: 0x10000010186FD559
+ Port Name: 0x20000010186FD559
+ FabricName: 0x2001000DECB3B681
+ Speed: 10 Gbit
+ Supported Speed: 10 Gbit
+ MaxFrameSize: 2048
+ FC-ID (Port ID): 0x0F0377
+ State: Online
+
+6. Verify the vlan discovery is performed by running ifconfig and notice
+<INTERFACE>.<VLAN>-fcoe interfaces are automatically created.
+
+Refer to fcoeadm manpage for more information on fcoeadm operations to
+create/destroy interfaces or to display lun/target information.
+
+NOTE:
+====
+** Broadcom FCoE capable devices implement a DCBX/LLDP client on-chip. Only one
+LLDP client is allowed per interface. For proper operation all host software
+based DCBX/LLDP clients (e.g. lldpad) must be disabled. To disable lldpad on a
+given interface, run the following command:
+
+lldptool set-lldp -i <interface_name> adminStatus=disabled
diff --git a/Documentation/scsi/cxgb3i.txt b/Documentation/scsi/cxgb3i.txt
new file mode 100644
index 000000000..7ac8032ee
--- /dev/null
+++ b/Documentation/scsi/cxgb3i.txt
@@ -0,0 +1,84 @@
+Chelsio S3 iSCSI Driver for Linux
+
+Introduction
+============
+
+The Chelsio T3 ASIC based Adapters (S310, S320, S302, S304, Mezz cards, etc.
+series of products) support iSCSI acceleration and iSCSI Direct Data Placement
+(DDP) where the hardware handles the expensive byte touching operations, such
+as CRC computation and verification, and direct DMA to the final host memory
+destination:
+
+ - iSCSI PDU digest generation and verification
+
+ On transmitting, Chelsio S3 h/w computes and inserts the Header and
+ Data digest into the PDUs.
+ On receiving, Chelsio S3 h/w computes and verifies the Header and
+ Data digest of the PDUs.
+
+ - Direct Data Placement (DDP)
+
+ S3 h/w can directly place the iSCSI Data-In or Data-Out PDU's
+ payload into pre-posted final destination host-memory buffers based
+ on the Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
+ in Data-Out PDUs.
+
+ - PDU Transmit and Recovery
+
+ On transmitting, S3 h/w accepts the complete PDU (header + data)
+ from the host driver, computes and inserts the digests, decomposes
+ the PDU into multiple TCP segments if necessary, and transmit all
+ the TCP segments onto the wire. It handles TCP retransmission if
+ needed.
+
+ On receiving, S3 h/w recovers the iSCSI PDU by reassembling TCP
+ segments, separating the header and data, calculating and verifying
+ the digests, then forwarding the header to the host. The payload data,
+ if possible, will be directly placed into the pre-posted host DDP
+ buffer. Otherwise, the payload data will be sent to the host too.
+
+The cxgb3i driver interfaces with open-iscsi initiator and provides the iSCSI
+acceleration through Chelsio hardware wherever applicable.
+
+Using the cxgb3i Driver
+=======================
+
+The following steps need to be taken to accelerates the open-iscsi initiator:
+
+1. Load the cxgb3i driver: "modprobe cxgb3i"
+
+ The cxgb3i module registers a new transport class "cxgb3i" with open-iscsi.
+
+ * in the case of recompiling the kernel, the cxgb3i selection is located at
+ Device Drivers
+ SCSI device support --->
+ [*] SCSI low-level drivers --->
+ <M> Chelsio S3xx iSCSI support
+
+2. Create an interface file located under /etc/iscsi/ifaces/ for the new
+ transport class "cxgb3i".
+
+ The content of the file should be in the following format:
+ iface.transport_name = cxgb3i
+ iface.net_ifacename = <ethX>
+ iface.ipaddress = <iscsi ip address>
+
+ * if iface.ipaddress is specified, <iscsi ip address> needs to be either the
+ same as the ethX's ip address or an address on the same subnet. Make
+ sure the ip address is unique in the network.
+
+3. edit /etc/iscsi/iscsid.conf
+ The default setting for MaxRecvDataSegmentLength (131072) is too big;
+ replace with a value no bigger than 15360 (for example 8192):
+
+ node.conn[0].iscsi.MaxRecvDataSegmentLength = 8192
+
+ * The login would fail for a normal session if MaxRecvDataSegmentLength is
+ too big. A error message in the format of
+ "cxgb3i: ERR! MaxRecvSegmentLength <X> too big. Need to be <= <Y>."
+ would be logged to dmesg.
+
+4. To direct open-iscsi traffic to go through cxgb3i's accelerated path,
+ "-I <iface file name>" option needs to be specified with most of the
+ iscsiadm command. <iface file name> is the transport interface file created
+ in step 2.
diff --git a/Documentation/scsi/dc395x.txt b/Documentation/scsi/dc395x.txt
new file mode 100644
index 000000000..88219f966
--- /dev/null
+++ b/Documentation/scsi/dc395x.txt
@@ -0,0 +1,102 @@
+README file for the dc395x SCSI driver
+==========================================
+
+Status
+------
+The driver has been tested with CD-R and CD-R/W drives. These should
+be safe to use. Testing with hard disks has not been done to any
+great degree and caution should be exercised if you want to attempt
+to use this driver with hard disks.
+
+This is a 2.5 only driver. For a 2.4 driver please see the original
+driver (which this driver started from) at
+http://www.garloff.de/kurt/linux/dc395/
+
+Problems, questions and patches should be submitted to the mailing
+list. Details on the list, including archives, are available at
+http://lists.twibble.org/mailman/listinfo/dc395x/
+
+Parameters
+----------
+The driver uses the settings from the EEPROM set in the SCSI BIOS
+setup. If there is no EEPROM, the driver uses default values.
+Both can be overridden by command line parameters (module or kernel
+parameters).
+
+The following parameters are available:
+
+ - safe
+ Default: 0, Acceptable values: 0 or 1
+
+ If safe is set to 1 then the adapter will use conservative
+ ("safe") default settings. This sets:
+
+ shortcut for dc395x=7,4,9,15,2,10
+
+ - adapter_id
+ Default: 7, Acceptable values: 0 to 15
+
+ Sets the host adapter SCSI ID.
+
+ - max_speed
+ Default: 1, Acceptable value: 0 to 7
+ 0 = 20 Mhz
+ 1 = 12.2 Mhz
+ 2 = 10 Mhz
+ 3 = 8 Mhz
+ 4 = 6.7 Mhz
+ 5 = 5.8 Hhz
+ 6 = 5 Mhz
+ 7 = 4 Mhz
+
+ - dev_mode
+ Bitmap for device configuration
+
+ DevMode bit definition:
+ Bit Val(hex) Val(dec) Meaning
+ *0 0x01 1 Parity check
+ *1 0x02 2 Synchronous Negotiation
+ *2 0x04 4 Disconnection
+ *3 0x08 8 Send Start command on startup. (Not used)
+ *4 0x10 16 Tagged Command Queueing
+ *5 0x20 32 Wide Negotiation
+
+ - adapter_mode
+ Bitmap for adapter configuration
+
+ AdaptMode bit definition
+ Bit Val(hex) Val(dec) Meaning
+ *0 0x01 1 Support more than two drives. (Not used)
+ *1 0x02 2 Use DOS compatible mapping for HDs greater than 1GB.
+ *2 0x04 4 Reset SCSI Bus on startup.
+ *3 0x08 8 Active Negation: Improves SCSI Bus noise immunity.
+ 4 0x10 16 Immediate return on BIOS seek command. (Not used)
+ (*)5 0x20 32 Check for LUNs >= 1.
+
+ - tags
+ Default: 3, Acceptable values: 0-5
+
+ The number of tags is 1<<x, if x has been specified
+
+ - reset_delay
+ Default: 1, Acceptable values: 0-180
+
+ The seconds to not accept commands after a SCSI Reset
+
+
+For the built in driver the parameters should be prefixed with
+dc395x. (eg "dc395x.safe=1")
+
+
+Copyright
+---------
+The driver is free software. It is protected by the GNU General Public
+License (GPL). Please read it, before using this driver. It should be
+included in your kernel sources and with your distribution. It carries the
+filename COPYING. If you don't have it, please ask me to send you one by
+email.
+Note: The GNU GPL says also something about warranty and liability.
+Please be aware the following: While we do my best to provide a working and
+reliable driver, there is a chance, that it will kill your valuable data.
+We refuse to take any responsibility for that. The driver is provided as-is
+and YOU USE IT AT YOUR OWN RESPONSIBILITY.
diff --git a/Documentation/scsi/dpti.txt b/Documentation/scsi/dpti.txt
new file mode 100644
index 000000000..f36dc0e7c
--- /dev/null
+++ b/Documentation/scsi/dpti.txt
@@ -0,0 +1,83 @@
+ /* TERMS AND CONDITIONS OF USE
+ *
+ * Redistribution and use in source form, with or without modification, are
+ * permitted provided that redistributions of source code must retain the
+ * above copyright notice, this list of conditions and the following disclaimer.
+ *
+ * This software is provided `as is' by Adaptec and
+ * any express or implied warranties, including, but not limited to, the
+ * implied warranties of merchantability and fitness for a particular purpose,
+ * are disclaimed. In no event shall Adaptec be
+ * liable for any direct, indirect, incidental, special, exemplary or
+ * consequential damages (including, but not limited to, procurement of
+ * substitute goods or services; loss of use, data, or profits; or business
+ * interruptions) however caused and on any theory of liability, whether in
+ * contract, strict liability, or tort (including negligence or otherwise)
+ * arising in any way out of the use of this driver software, even if advised
+ * of the possibility of such damage.
+ *
+ ****************************************************************
+ * This driver supports the Adaptec I2O RAID and DPT SmartRAID V I2O boards.
+ *
+ * CREDITS:
+ * The original linux driver was ported to Linux by Karen White while at
+ * Dell Computer. It was ported from Bob Pasteur's (of DPT) original
+ * non-Linux driver. Mark Salyzyn and Bob Pasteur consulted on the original
+ * driver.
+ *
+ * 2.0 version of the driver by Deanna Bonds and Mark Salyzyn.
+ *
+ * HISTORY:
+ * The driver was originally ported to linux version 2.0.34
+ *
+ * V2.0 Rewrite of driver. Re-architectured based on i2o subsystem.
+ * This was the first full GPL version since the last version used
+ * i2osig headers which were not GPL. Developer Testing version.
+ * V2.1 Internal testing
+ * V2.2 First released version
+ *
+ * V2.3
+ * Changes:
+ * Added Raptor Support
+ * Fixed bug causing system to hang under extreme load with
+ * management utilities running (removed GFP_DMA from kmalloc flags)
+ *
+ *
+ * V2.4 First version ready to be submitted to be embedded in the kernel
+ * Changes:
+ * Implemented suggestions from Alan Cox
+ * Added calculation of resid for sg layer
+ * Better error handling
+ * Added checking underflow conditions
+ * Added DATAPROTECT checking
+ * Changed error return codes
+ * Fixed pointer bug in bus reset routine
+ * Enabled hba reset from ioctls (allows a FW flash to reboot and use the new
+ * FW without having to reboot)
+ * Changed proc output
+ *
+ * TODO:
+ * Add 64 bit Scatter Gather when compiled on 64 bit architectures
+ * Add sparse lun scanning
+ * Add code that checks if a device that had been taken offline is
+ * now online (at the FW level) when test unit ready or inquiry
+ * command from scsi-core
+ * Add proc read interface
+ * busrescan command
+ * rescan command
+ * Add code to rescan routine that notifies scsi-core about new devices
+ * Add support for C-PCI (hotplug stuff)
+ * Add ioctl passthru error recovery
+ *
+ * NOTES:
+ * The DPT card optimizes the order of processing commands. Consequently,
+ * a command may take up to 6 minutes to complete after it has been sent
+ * to the board.
+ *
+ * The files dpti_ioctl.h dptsig.h osd_defs.h osd_util.h sys_info.h are part of the
+ * interface files for Adaptec's management routines. These define the structures used
+ * in the ioctls. They are written to be portable. They are hard to read, but I need
+ * to use them 'as is' or I can miss changes in the interface.
+ *
+ */
+
diff --git a/Documentation/scsi/dtc3x80.txt b/Documentation/scsi/dtc3x80.txt
new file mode 100644
index 000000000..1d7af9f9a
--- /dev/null
+++ b/Documentation/scsi/dtc3x80.txt
@@ -0,0 +1,43 @@
+README file for the Linux DTC3180/3280 scsi driver.
+by Ray Van Tassle (rayvt@comm.mot.com) March 1996
+Based on the generic & core NCR5380 code by Drew Eckhard
+
+SCSI device driver for the DTC 3180/3280.
+Data Technology Corp---a division of Qume.
+
+The 3280 has a standard floppy interface.
+
+The 3180 does not. Otherwise, they are identical.
+
+The DTC3x80 does not support DMA but it does have Pseudo-DMA which is
+supported by the driver.
+
+Its DTC406 scsi chip is supposedly compatible with the NCR 53C400.
+It is memory mapped, uses an IRQ, but no dma or io-port. There is
+internal DMA, between SCSI bus and an on-chip 128-byte buffer. Double
+buffering is done automagically by the chip. Data is transferred
+between the on-chip buffer and CPU/RAM via memory moves.
+
+The driver detects the possible memory addresses (jumper selectable):
+ CC00, DC00, C800, and D800
+The possible IRQ's (jumper selectable) are:
+ IRQ 10, 11, 12, 15
+Parity is supported by the chip, but not by this driver.
+Information can be obtained from /proc/scsi/dtc3c80/N.
+
+Note on interrupts:
+
+The documentation says that it can be set to interrupt whenever the
+on-chip buffer needs CPU attention. I couldn't get this to work. So
+the driver polls for data-ready in the pseudo-DMA transfer routine.
+The interrupt support routines in the NCR3280.c core modules handle
+scsi disconnect/reconnect, and this (mostly) works. However..... I
+have tested it with 4 totally different hard drives (both SCSI-1 and
+SCSI-2), and one CDROM drive. Interrupts works great for all but one
+specific hard drive. For this one, the driver will eventually hang in
+the transfer state. I have tested with: "dd bs=4k count=2k
+of=/dev/null if=/dev/sdb". It reads ok for a while, then hangs.
+After beating my head against this for a couple of weeks, getting
+nowhere, I give up. So.....This driver does NOT use interrupts, even
+if you have the card jumpered to an IRQ. Probably nobody will ever
+care.
diff --git a/Documentation/scsi/g_NCR5380.txt b/Documentation/scsi/g_NCR5380.txt
new file mode 100644
index 000000000..3b80f567f
--- /dev/null
+++ b/Documentation/scsi/g_NCR5380.txt
@@ -0,0 +1,63 @@
+README file for the Linux g_NCR5380 driver.
+
+(c) 1993 Drew Eckhard
+NCR53c400 extensions (c) 1994,1995,1996 Kevin Lentin
+
+This file documents the NCR53c400 extensions by Kevin Lentin and some
+enhancements to the NCR5380 core.
+
+This driver supports both NCR5380 and NCR53c400 cards in port or memory
+mapped modes. Currently this driver can only support one of those mapping
+modes at a time but it does support both of these chips at the same time.
+The next release of this driver will support port & memory mapped cards at
+the same time. It should be able to handle multiple different cards in the
+same machine.
+
+The drivers/scsi/Makefile has an override in it for the most common
+NCR53c400 card, the Trantor T130B in its default configuration:
+ Port: 0x350
+ IRQ : 5
+
+The NCR53c400 does not support DMA but it does have Pseudo-DMA which is
+supported by the driver.
+
+If the default configuration does not work for you, you can use the kernel
+command lines (eg using the lilo append command):
+ ncr5380=port,irq,dma
+ ncr53c400=port,irq
+or
+ ncr5380=base,irq,dma
+ ncr53c400=base,irq
+
+The driver does not probe for any addresses or ports other than those in
+the OVERRIDE or given to the kernel as above.
+
+This driver provides some information on what it has detected in
+/proc/scsi/g_NCR5380/x where x is the scsi card number as detected at boot
+time. More info to come in the future.
+
+When NCR53c400 support is compiled in, BIOS parameters will be returned by
+the driver (the raw 5380 driver does not and I don't plan to fiddle with
+it!).
+
+This driver works as a module.
+When included as a module, parameters can be passed on the insmod/modprobe
+command line:
+ ncr_irq=xx the interrupt
+ ncr_addr=xx the port or base address (for port or memory
+ mapped, resp.)
+ ncr_dma=xx the DMA
+ ncr_5380=1 to set up for a NCR5380 board
+ ncr_53c400=1 to set up for a NCR53C400 board
+e.g.
+modprobe g_NCR5380 ncr_irq=5 ncr_addr=0x350 ncr_5380=1
+ for a port mapped NCR5380 board or
+modprobe g_NCR5380 ncr_irq=255 ncr_addr=0xc8000 ncr_53c400=1
+ for a memory mapped NCR53C400 board with interrupts disabled.
+
+(255 should be specified for no or DMA interrupt, 254 to autoprobe for an
+ IRQ line if overridden on the command line.)
+
+
+Kevin Lentin
+K.Lentin@cs.monash.edu.au
diff --git a/Documentation/scsi/hpsa.txt b/Documentation/scsi/hpsa.txt
new file mode 100644
index 000000000..891435a72
--- /dev/null
+++ b/Documentation/scsi/hpsa.txt
@@ -0,0 +1,130 @@
+
+HPSA - Hewlett Packard Smart Array driver
+-----------------------------------------
+
+This file describes the hpsa SCSI driver for HP Smart Array controllers.
+The hpsa driver is intended to supplant the cciss driver for newer
+Smart Array controllers. The hpsa driver is a SCSI driver, while the
+cciss driver is a "block" driver. Actually cciss is both a block
+driver (for logical drives) AND a SCSI driver (for tape drives). This
+"split-brained" design of the cciss driver is a source of excess
+complexity and eliminating that complexity is one of the reasons
+for hpsa to exist.
+
+Supported devices:
+------------------
+
+Smart Array P212
+Smart Array P410
+Smart Array P410i
+Smart Array P411
+Smart Array P812
+Smart Array P712m
+Smart Array P711m
+StorageWorks P1210m
+
+Additionally, older Smart Arrays may work with the hpsa driver if the kernel
+boot parameter "hpsa_allow_any=1" is specified, however these are not tested
+nor supported by HP with this driver. For older Smart Arrays, the cciss
+driver should still be used.
+
+The "hpsa_simple_mode=1" boot parameter may be used to prevent the driver from
+putting the controller into "performant" mode. The difference is that with simple
+mode, each command completion requires an interrupt, while with "performant mode"
+(the default, and ordinarily better performing) it is possible to have multiple
+command completions indicated by a single interrupt.
+
+HPSA specific entries in /sys
+-----------------------------
+
+ In addition to the generic SCSI attributes available in /sys, hpsa supports
+ the following attributes:
+
+ HPSA specific host attributes:
+ ------------------------------
+
+ /sys/class/scsi_host/host*/rescan
+ /sys/class/scsi_host/host*/firmware_revision
+ /sys/class/scsi_host/host*/resettable
+ /sys/class/scsi_host/host*/transport_mode
+
+ the host "rescan" attribute is a write only attribute. Writing to this
+ attribute will cause the driver to scan for new, changed, or removed devices
+ (e.g. hot-plugged tape drives, or newly configured or deleted logical drives,
+ etc.) and notify the SCSI midlayer of any changes detected. Normally this is
+ triggered automatically by HP's Array Configuration Utility (either the GUI or
+ command line variety) so for logical drive changes, the user should not
+ normally have to use this. It may be useful when hot plugging devices like
+ tape drives, or entire storage boxes containing pre-configured logical drives.
+
+ The "firmware_revision" attribute contains the firmware version of the Smart Array.
+ For example:
+
+ root@host:/sys/class/scsi_host/host4# cat firmware_revision
+ 7.14
+
+ The transport_mode indicates whether the controller is in "performant"
+ or "simple" mode. This is controlled by the "hpsa_simple_mode" module
+ parameter.
+
+ The "resettable" read-only attribute indicates whether a particular
+ controller is able to honor the "reset_devices" kernel parameter. If the
+ device is resettable, this file will contain a "1", otherwise, a "0". This
+ parameter is used by kdump, for example, to reset the controller at driver
+ load time to eliminate any outstanding commands on the controller and get the
+ controller into a known state so that the kdump initiated i/o will work right
+ and not be disrupted in any way by stale commands or other stale state
+ remaining on the controller from the previous kernel. This attribute enables
+ kexec tools to warn the user if they attempt to designate a device which is
+ unable to honor the reset_devices kernel parameter as a dump device.
+
+ HPSA specific disk attributes:
+ ------------------------------
+
+ /sys/class/scsi_disk/c:b:t:l/device/unique_id
+ /sys/class/scsi_disk/c:b:t:l/device/raid_level
+ /sys/class/scsi_disk/c:b:t:l/device/lunid
+
+ (where c:b:t:l are the controller, bus, target and lun of the device)
+
+ For example:
+
+ root@host:/sys/class/scsi_disk/4:0:0:0/device# cat unique_id
+ 600508B1001044395355323037570F77
+ root@host:/sys/class/scsi_disk/4:0:0:0/device# cat lunid
+ 0x0000004000000000
+ root@host:/sys/class/scsi_disk/4:0:0:0/device# cat raid_level
+ RAID 0
+
+HPSA specific ioctls:
+---------------------
+
+ For compatibility with applications written for the cciss driver, many, but
+ not all of the ioctls supported by the cciss driver are also supported by the
+ hpsa driver. The data structures used by these are described in
+ include/linux/cciss_ioctl.h
+
+ CCISS_DEREGDISK
+ CCISS_REGNEWDISK
+ CCISS_REGNEWD
+
+ The above three ioctls all do exactly the same thing, which is to cause the driver
+ to rescan for new devices. This does exactly the same thing as writing to the
+ hpsa specific host "rescan" attribute.
+
+ CCISS_GETPCIINFO
+
+ Returns PCI domain, bus, device and function and "board ID" (PCI subsystem ID).
+
+ CCISS_GETDRIVVER
+
+ Returns driver version in three bytes encoded as:
+ (major_version << 16) | (minor_version << 8) | (subminor_version)
+
+ CCISS_PASSTHRU
+ CCISS_BIG_PASSTHRU
+
+ Allows "BMIC" and "CISS" commands to be passed through to the Smart Array.
+ These are used extensively by the HP Array Configuration Utility, SNMP storage
+ agents, etc. See cciss_vol_status at http://cciss.sf.net for some examples.
+
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt
new file mode 100644
index 000000000..12ecfd308
--- /dev/null
+++ b/Documentation/scsi/hptiop.txt
@@ -0,0 +1,184 @@
+HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop)
+
+Controller Register Map
+-------------------------
+
+For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2:
+
+ BAR0 offset Register
+ 0x11C5C Link Interface IRQ Set
+ 0x11C60 Link Interface IRQ Clear
+
+ BAR2 offset Register
+ 0x10 Inbound Message Register 0
+ 0x14 Inbound Message Register 1
+ 0x18 Outbound Message Register 0
+ 0x1C Outbound Message Register 1
+ 0x20 Inbound Doorbell Register
+ 0x24 Inbound Interrupt Status Register
+ 0x28 Inbound Interrupt Mask Register
+ 0x30 Outbound Interrupt Status Register
+ 0x34 Outbound Interrupt Mask Register
+ 0x40 Inbound Queue Port
+ 0x44 Outbound Queue Port
+
+For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0:
+
+ BAR0 offset Register
+ 0x10 Inbound Message Register 0
+ 0x14 Inbound Message Register 1
+ 0x18 Outbound Message Register 0
+ 0x1C Outbound Message Register 1
+ 0x20 Inbound Doorbell Register
+ 0x24 Inbound Interrupt Status Register
+ 0x28 Inbound Interrupt Mask Register
+ 0x30 Outbound Interrupt Status Register
+ 0x34 Outbound Interrupt Mask Register
+ 0x40 Inbound Queue Port
+ 0x44 Outbound Queue Port
+
+For Marvell not Frey IOP based adapters, the IOP is accessed via PCI BAR0 and BAR1:
+
+ BAR0 offset Register
+ 0x20400 Inbound Doorbell Register
+ 0x20404 Inbound Interrupt Mask Register
+ 0x20408 Outbound Doorbell Register
+ 0x2040C Outbound Interrupt Mask Register
+
+ BAR1 offset Register
+ 0x0 Inbound Queue Head Pointer
+ 0x4 Inbound Queue Tail Pointer
+ 0x8 Outbound Queue Head Pointer
+ 0xC Outbound Queue Tail Pointer
+ 0x10 Inbound Message Register
+ 0x14 Outbound Message Register
+ 0x40-0x1040 Inbound Queue
+ 0x1040-0x2040 Outbound Queue
+
+For Marvell Frey IOP based adapters, the IOP is accessed via PCI BAR0 and BAR1:
+
+ BAR0 offset Register
+ 0x0 IOP configuration information.
+
+ BAR1 offset Register
+ 0x4000 Inbound List Base Address Low
+ 0x4004 Inbound List Base Address High
+ 0x4018 Inbound List Write Pointer
+ 0x402C Inbound List Configuration and Control
+ 0x4050 Outbound List Base Address Low
+ 0x4054 Outbound List Base Address High
+ 0x4058 Outbound List Copy Pointer Shadow Base Address Low
+ 0x405C Outbound List Copy Pointer Shadow Base Address High
+ 0x4088 Outbound List Interrupt Cause
+ 0x408C Outbound List Interrupt Enable
+ 0x1020C PCIe Function 0 Interrupt Enable
+ 0x10400 PCIe Function 0 to CPU Message A
+ 0x10420 CPU to PCIe Function 0 Message A
+ 0x10480 CPU to PCIe Function 0 Doorbell
+ 0x10484 CPU to PCIe Function 0 Doorbell Enable
+
+
+I/O Request Workflow of Not Marvell Frey
+------------------------------------------
+
+All queued requests are handled via inbound/outbound queue port.
+A request packet can be allocated in either IOP or host memory.
+
+To send a request to the controller:
+
+ - Get a free request packet by reading the inbound queue port or
+ allocate a free request in host DMA coherent memory.
+
+ The value returned from the inbound queue port is an offset
+ relative to the IOP BAR0.
+
+ Requests allocated in host memory must be aligned on 32-bytes boundary.
+
+ - Fill the packet.
+
+ - Post the packet to IOP by writing it to inbound queue. For requests
+ allocated in IOP memory, write the offset to inbound queue port. For
+ requests allocated in host memory, write (0x80000000|(bus_addr>>5))
+ to the inbound queue port.
+
+ - The IOP process the request. When the request is completed, it
+ will be put into outbound queue. An outbound interrupt will be
+ generated.
+
+ For requests allocated in IOP memory, the request offset is posted to
+ outbound queue.
+
+ For requests allocated in host memory, (0x80000000|(bus_addr>>5))
+ is posted to the outbound queue. If IOP_REQUEST_FLAG_OUTPUT_CONTEXT
+ flag is set in the request, the low 32-bit context value will be
+ posted instead.
+
+ - The host read the outbound queue and complete the request.
+
+ For requests allocated in IOP memory, the host driver free the request
+ by writing it to the outbound queue.
+
+Non-queued requests (reset/flush etc) can be sent via inbound message
+register 0. An outbound message with the same value indicates the completion
+of an inbound message.
+
+
+I/O Request Workflow of Marvell Frey
+--------------------------------------
+
+All queued requests are handled via inbound/outbound list.
+
+To send a request to the controller:
+
+ - Allocate a free request in host DMA coherent memory.
+
+ Requests allocated in host memory must be aligned on 32-bytes boundary.
+
+ - Fill the request with index of the request in the flag.
+
+ Fill a free inbound list unit with the physical address and the size of
+ the request.
+
+ Set up the inbound list write pointer with the index of previous unit,
+ round to 0 if the index reaches the supported count of requests.
+
+ - Post the inbound list writer pointer to IOP.
+
+ - The IOP process the request. When the request is completed, the flag of
+ the request with or-ed IOPMU_QUEUE_MASK_HOST_BITS will be put into a
+ free outbound list unit and the index of the outbound list unit will be
+ put into the copy pointer shadow register. An outbound interrupt will be
+ generated.
+
+ - The host read the outbound list copy pointer shadow register and compare
+ with previous saved read pointer N. If they are different, the host will
+ read the (N+1)th outbound list unit.
+
+ The host get the index of the request from the (N+1)th outbound list
+ unit and complete the request.
+
+Non-queued requests (reset communication/reset/flush etc) can be sent via PCIe
+Function 0 to CPU Message A register. The CPU to PCIe Function 0 Message register
+with the same value indicates the completion of message.
+
+
+User-level Interface
+---------------------
+
+The driver exposes following sysfs attributes:
+
+ NAME R/W Description
+ driver-version R driver version string
+ firmware-version R firmware version string
+
+
+-----------------------------------------------------------------------------
+Copyright (C) 2006-2012 HighPoint Technologies, Inc. All Rights Reserved.
+
+ This file is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ linux@highpoint-tech.com
+ http://www.highpoint-tech.com
diff --git a/Documentation/scsi/in2000.txt b/Documentation/scsi/in2000.txt
new file mode 100644
index 000000000..c3e2a9047
--- /dev/null
+++ b/Documentation/scsi/in2000.txt
@@ -0,0 +1,202 @@
+
+UPDATE NEWS: version 1.33 - 26 Aug 98
+
+ Interrupt management in this driver has become, over
+ time, increasingly odd and difficult to explain - this
+ has been mostly due to my own mental inadequacies. In
+ recent kernels, it has failed to function at all when
+ compiled for SMP. I've fixed that problem, and after
+ taking a fresh look at interrupts in general, greatly
+ reduced the number of places where they're fiddled
+ with. Done some heavy testing and it looks very good.
+ The driver now makes use of the __initfunc() and
+ __initdata macros to save about 4k of kernel memory.
+ Once again, the same code works for both 2.0.xx and
+ 2.1.xx kernels.
+
+UPDATE NEWS: version 1.32 - 28 Mar 98
+
+ Removed the check for legal IN2000 hardware versions:
+ It appears that the driver works fine with serial
+ EPROMs (the 8-pin chip that defines hardware rev) as
+ old as 2.1, so we'll assume that all cards are OK.
+
+UPDATE NEWS: version 1.31 - 6 Jul 97
+
+ Fixed a bug that caused incorrect SCSI status bytes to be
+ returned from commands sent to LUNs greater than 0. This
+ means that CDROM changers work now! Fixed a bug in the
+ handling of command-line arguments when loaded as a module.
+ Also put all the header data in in2000.h where it belongs.
+ There are no longer any differences between this driver in
+ the 2.1.xx source tree and the 2.0.xx tree, as of 2.0.31
+ and 2.1.45 (or is it .46?) - this makes things much easier
+ for me...
+
+UPDATE NEWS: version 1.30 - 14 Oct 96
+
+ Fixed a bug in the code that sets the transfer direction
+ bit (DESTID_DPD in the WD_DESTINATION_ID register). There
+ are quite a few SCSI commands that do a write-to-device;
+ now we deal with all of them correctly. Thanks to Joerg
+ Dorchain for catching this one.
+
+UPDATE NEWS: version 1.29 - 24 Sep 96
+
+ The memory-mapped hardware on the card is now accessed via
+ the 'readb()' and 'readl()' macros - required by the new
+ memory management scheme in the 2.1.x kernel series.
+ As suggested by Andries Brouwer, 'bios_param()' no longer
+ forces an artificial 1023 track limit on drives. Also
+ removed some kludge-code left over from struggles with
+ older (buggy) compilers.
+
+UPDATE NEWS: version 1.28 - 07 May 96
+
+ Tightened up the "interrupts enabled/disabled" discipline
+ in 'in2000_queuecommand()' and maybe 1 or 2 other places.
+ I _think_ it may have been a little too lax, causing an
+ occasional crash during full moon. A fully functional
+ /proc interface is now in place - if you want to play
+ with it, start by doing 'cat /proc/scsi/in2000/0'. You
+ can also use it to change a few run-time parameters on
+ the fly, but it's mostly for debugging. The curious
+ should take a good look at 'in2000_proc_info()' in the
+ in2000.c file to get an understanding of what it's all
+ about; I figure that people who are really into it will
+ want to add features suited to their own needs...
+ Also, sync is now DISABLED by default.
+
+UPDATE NEWS: version 1.27 - 10 Apr 96
+
+ Fixed a well-hidden bug in the adaptive-disconnect code
+ that would show up every now and then during extreme
+ heavy loads involving 2 or more simultaneously active
+ devices. Thanks to Joe Mack for keeping my nose to the
+ grindstone on this one.
+
+UPDATE NEWS: version 1.26 - 07 Mar 96
+
+ 1.25 had a nasty bug that bit people with swap partitions
+ and tape drives. Also, in my attempt to guess my way
+ through Intel assembly language, I made an error in the
+ inline code for IO writes. Made a few other changes and
+ repairs - this version (fingers crossed) should work well.
+
+UPDATE NEWS: version 1.25 - 05 Mar 96
+
+ Kernel 1.3.70 interrupt mods added; old kernels still OK.
+ Big help from Bill Earnest and David Willmore on speed
+ testing and optimizing: I think there's a real improvement
+ in this area.
+ New! User-friendly command-line interface for LILO and
+ module loading - the old method is gone, so you'll need
+ to read the comments for 'setup_strings' near the top
+ of in2000.c. For people with CDROM's or other devices
+ that have a tough time with sync negotiation, you can
+ now selectively disable sync on individual devices -
+ search for the 'nosync' keyword in the command-line
+ comments. Some of you disable the BIOS on the card, which
+ caused the auto-detect function to fail; there is now a
+ command-line option to force detection of a ROM-less card.
+
+UPDATE NEWS: version 1.24a - 24 Feb 96
+
+ There was a bug in the synchronous transfer code. Only
+ a few people downloaded before I caught it - could have
+ been worse.
+
+UPDATE NEWS: version 1.24 - 23 Feb 96
+
+ Lots of good changes. Advice from Bill Earnest resulted
+ in much better detection of cards, more efficient usage
+ of the fifo, and (hopefully) faster data transfers. The
+ jury is still out on speed - I hope it's improved some.
+ One nifty new feature is a cool way of doing disconnect/
+ reselect. The driver defaults to what I'm calling
+ 'adaptive disconnect' - meaning that each command is
+ evaluated individually as to whether or not it should be
+ run with the option to disconnect/reselect (if the device
+ chooses), or as a "SCSI-bus-hog". When several devices
+ are operating simultaneously, disconnects are usually an
+ advantage. In a single device system, or if only 1 device
+ is being accessed, transfers usually go faster if disconnects
+ are not allowed.
+
+
+
+The default arguments (you get these when you don't give an 'in2000'
+command-line argument, or you give a blank argument) will cause
+the driver to do adaptive disconnect, synchronous transfers, and a
+minimum of debug messages. If you want to fool with the options,
+search for 'setup_strings' near the top of the in2000.c file and
+check the 'hostdata->args' section in in2000.h - but be warned! Not
+everything is working yet (some things will never work, probably).
+I believe that disabling disconnects (DIS_NEVER) will allow you
+to choose a LEVEL2 value higher than 'L2_BASIC', but I haven't
+spent a lot of time testing this. You might try 'ENABLE_CLUSTERING'
+to see what happens: my tests showed little difference either way.
+There's also a define called 'DEFAULT_SX_PER'; this sets the data
+transfer speed for the asynchronous mode. I've put it at 500 ns
+despite the fact that the card could handle settings of 376 or
+252, because higher speeds may be a problem with poor quality
+cables or improper termination; 500 ns is a compromise. You can
+choose your own default through the command-line with the
+'period' keyword.
+
+
+------------------------------------------------
+*********** DIP switch settings **************
+------------------------------------------------
+
+ sw1-1 sw1-2 BIOS address (hex)
+ -----------------------------------------
+ off off C8000 - CBFF0
+ on off D8000 - DBFF0
+ off on D0000 - D3FF0
+ on on BIOS disabled
+
+ sw1-3 sw1-4 IO port address (hex)
+ ------------------------------------
+ off off 220 - 22F
+ on off 200 - 20F
+ off on 110 - 11F
+ on on 100 - 10F
+
+ sw1-5 sw1-6 sw1-7 Interrupt
+ ------------------------------
+ off off off 15
+ off on off 14
+ off off on 11
+ off on on 10
+ on - - disabled
+
+ sw1-8 function depends on BIOS version. In earlier versions this
+ controlled synchronous data transfer support for MSDOS:
+ off = disabled
+ on = enabled
+ In later ROMs (starting with 01.3 in April 1994) sw1-8 controls
+ the "greater than 2 disk drive" feature that first appeared in
+ MSDOS 5.0 (ignored by Linux):
+ off = 2 drives maximum
+ on = 7 drives maximum
+
+ sw1-9 Floppy controller
+ --------------------------
+ off disabled
+ on enabled
+
+------------------------------------------------
+
+ I should mention that Drew Eckhardt's 'Generic NCR5380' sources
+ were my main inspiration, with lots of reference to the IN2000
+ driver currently distributed in the kernel source. I also owe
+ much to a driver written by Hamish Macdonald for Linux-m68k(!).
+ And to Eric Wright for being an ALPHA guinea pig. And to Bill
+ Earnest for 2 tons of great input and information. And to David
+ Willmore for extensive 'bonnie' testing. And to Joe Mack for
+ continual testing and feedback.
+
+
+ John Shifflett jshiffle@netcom.com
+
diff --git a/Documentation/scsi/libsas.txt b/Documentation/scsi/libsas.txt
new file mode 100644
index 000000000..8cac6492a
--- /dev/null
+++ b/Documentation/scsi/libsas.txt
@@ -0,0 +1,395 @@
+SAS Layer
+---------
+
+The SAS Layer is a management infrastructure which manages
+SAS LLDDs. It sits between SCSI Core and SAS LLDDs. The
+layout is as follows: while SCSI Core is concerned with
+SAM/SPC issues, and a SAS LLDD+sequencer is concerned with
+phy/OOB/link management, the SAS layer is concerned with:
+
+ * SAS Phy/Port/HA event management (LLDD generates,
+ SAS Layer processes),
+ * SAS Port management (creation/destruction),
+ * SAS Domain discovery and revalidation,
+ * SAS Domain device management,
+ * SCSI Host registration/unregistration,
+ * Device registration with SCSI Core (SAS) or libata
+ (SATA), and
+ * Expander management and exporting expander control
+ to user space.
+
+A SAS LLDD is a PCI device driver. It is concerned with
+phy/OOB management, and vendor specific tasks and generates
+events to the SAS layer.
+
+The SAS Layer does most SAS tasks as outlined in the SAS 1.1
+spec.
+
+The sas_ha_struct describes the SAS LLDD to the SAS layer.
+Most of it is used by the SAS Layer but a few fields need to
+be initialized by the LLDDs.
+
+After initializing your hardware, from the probe() function
+you call sas_register_ha(). It will register your LLDD with
+the SCSI subsystem, creating a SCSI host and it will
+register your SAS driver with the sysfs SAS tree it creates.
+It will then return. Then you enable your phys to actually
+start OOB (at which point your driver will start calling the
+notify_* event callbacks).
+
+Structure descriptions:
+
+struct sas_phy --------------------
+Normally this is statically embedded to your driver's
+phy structure:
+ struct my_phy {
+ blah;
+ struct sas_phy sas_phy;
+ bleh;
+ };
+And then all the phys are an array of my_phy in your HA
+struct (shown below).
+
+Then as you go along and initialize your phys you also
+initialize the sas_phy struct, along with your own
+phy structure.
+
+In general, the phys are managed by the LLDD and the ports
+are managed by the SAS layer. So the phys are initialized
+and updated by the LLDD and the ports are initialized and
+updated by the SAS layer.
+
+There is a scheme where the LLDD can RW certain fields,
+and the SAS layer can only read such ones, and vice versa.
+The idea is to avoid unnecessary locking.
+
+enabled -- must be set (0/1)
+id -- must be set [0,MAX_PHYS)
+class, proto, type, role, oob_mode, linkrate -- must be set
+oob_mode -- you set this when OOB has finished and then notify
+the SAS Layer.
+
+sas_addr -- this normally points to an array holding the sas
+address of the phy, possibly somewhere in your my_phy
+struct.
+
+attached_sas_addr -- set this when you (LLDD) receive an
+IDENTIFY frame or a FIS frame, _before_ notifying the SAS
+layer. The idea is that sometimes the LLDD may want to fake
+or provide a different SAS address on that phy/port and this
+allows it to do this. At best you should copy the sas
+address from the IDENTIFY frame or maybe generate a SAS
+address for SATA directly attached devices. The Discover
+process may later change this.
+
+frame_rcvd -- this is where you copy the IDENTIFY/FIS frame
+when you get it; you lock, copy, set frame_rcvd_size and
+unlock the lock, and then call the event. It is a pointer
+since there's no way to know your hw frame size _exactly_,
+so you define the actual array in your phy struct and let
+this pointer point to it. You copy the frame from your
+DMAable memory to that area holding the lock.
+
+sas_prim -- this is where primitives go when they're
+received. See sas.h. Grab the lock, set the primitive,
+release the lock, notify.
+
+port -- this points to the sas_port if the phy belongs
+to a port -- the LLDD only reads this. It points to the
+sas_port this phy is part of. Set by the SAS Layer.
+
+ha -- may be set; the SAS layer sets it anyway.
+
+lldd_phy -- you should set this to point to your phy so you
+can find your way around faster when the SAS layer calls one
+of your callbacks and passes you a phy. If the sas_phy is
+embedded you can also use container_of -- whatever you
+prefer.
+
+
+struct sas_port --------------------
+The LLDD doesn't set any fields of this struct -- it only
+reads them. They should be self explanatory.
+
+phy_mask is 32 bit, this should be enough for now, as I
+haven't heard of a HA having more than 8 phys.
+
+lldd_port -- I haven't found use for that -- maybe other
+LLDD who wish to have internal port representation can make
+use of this.
+
+
+struct sas_ha_struct --------------------
+It normally is statically declared in your own LLDD
+structure describing your adapter:
+struct my_sas_ha {
+ blah;
+ struct sas_ha_struct sas_ha;
+ struct my_phy phys[MAX_PHYS];
+ struct sas_port sas_ports[MAX_PHYS]; /* (1) */
+ bleh;
+};
+
+(1) If your LLDD doesn't have its own port representation.
+
+What needs to be initialized (sample function given below).
+
+pcidev
+sas_addr -- since the SAS layer doesn't want to mess with
+ memory allocation, etc, this points to statically
+ allocated array somewhere (say in your host adapter
+ structure) and holds the SAS address of the host
+ adapter as given by you or the manufacturer, etc.
+sas_port
+sas_phy -- an array of pointers to structures. (see
+ note above on sas_addr).
+ These must be set. See more notes below.
+num_phys -- the number of phys present in the sas_phy array,
+ and the number of ports present in the sas_port
+ array. There can be a maximum num_phys ports (one per
+ port) so we drop the num_ports, and only use
+ num_phys.
+
+The event interface:
+
+ /* LLDD calls these to notify the class of an event. */
+ void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event);
+ void (*notify_port_event)(struct sas_phy *, enum port_event);
+ void (*notify_phy_event)(struct sas_phy *, enum phy_event);
+
+When sas_register_ha() returns, those are set and can be
+called by the LLDD to notify the SAS layer of such events
+the SAS layer.
+
+The port notification:
+
+ /* The class calls these to notify the LLDD of an event. */
+ void (*lldd_port_formed)(struct sas_phy *);
+ void (*lldd_port_deformed)(struct sas_phy *);
+
+If the LLDD wants notification when a port has been formed
+or deformed it sets those to a function satisfying the type.
+
+A SAS LLDD should also implement at least one of the Task
+Management Functions (TMFs) described in SAM:
+
+ /* Task Management Functions. Must be called from process context. */
+ int (*lldd_abort_task)(struct sas_task *);
+ int (*lldd_abort_task_set)(struct domain_device *, u8 *lun);
+ int (*lldd_clear_aca)(struct domain_device *, u8 *lun);
+ int (*lldd_clear_task_set)(struct domain_device *, u8 *lun);
+ int (*lldd_I_T_nexus_reset)(struct domain_device *);
+ int (*lldd_lu_reset)(struct domain_device *, u8 *lun);
+ int (*lldd_query_task)(struct sas_task *);
+
+For more information please read SAM from T10.org.
+
+Port and Adapter management:
+
+ /* Port and Adapter management */
+ int (*lldd_clear_nexus_port)(struct sas_port *);
+ int (*lldd_clear_nexus_ha)(struct sas_ha_struct *);
+
+A SAS LLDD should implement at least one of those.
+
+Phy management:
+
+ /* Phy management */
+ int (*lldd_control_phy)(struct sas_phy *, enum phy_func);
+
+lldd_ha -- set this to point to your HA struct. You can also
+use container_of if you embedded it as shown above.
+
+A sample initialization and registration function
+can look like this (called last thing from probe())
+*but* before you enable the phys to do OOB:
+
+static int register_sas_ha(struct my_sas_ha *my_ha)
+{
+ int i;
+ static struct sas_phy *sas_phys[MAX_PHYS];
+ static struct sas_port *sas_ports[MAX_PHYS];
+
+ my_ha->sas_ha.sas_addr = &my_ha->sas_addr[0];
+
+ for (i = 0; i < MAX_PHYS; i++) {
+ sas_phys[i] = &my_ha->phys[i].sas_phy;
+ sas_ports[i] = &my_ha->sas_ports[i];
+ }
+
+ my_ha->sas_ha.sas_phy = sas_phys;
+ my_ha->sas_ha.sas_port = sas_ports;
+ my_ha->sas_ha.num_phys = MAX_PHYS;
+
+ my_ha->sas_ha.lldd_port_formed = my_port_formed;
+
+ my_ha->sas_ha.lldd_dev_found = my_dev_found;
+ my_ha->sas_ha.lldd_dev_gone = my_dev_gone;
+
+ my_ha->sas_ha.lldd_execute_task = my_execute_task;
+
+ my_ha->sas_ha.lldd_abort_task = my_abort_task;
+ my_ha->sas_ha.lldd_abort_task_set = my_abort_task_set;
+ my_ha->sas_ha.lldd_clear_aca = my_clear_aca;
+ my_ha->sas_ha.lldd_clear_task_set = my_clear_task_set;
+ my_ha->sas_ha.lldd_I_T_nexus_reset= NULL; (2)
+ my_ha->sas_ha.lldd_lu_reset = my_lu_reset;
+ my_ha->sas_ha.lldd_query_task = my_query_task;
+
+ my_ha->sas_ha.lldd_clear_nexus_port = my_clear_nexus_port;
+ my_ha->sas_ha.lldd_clear_nexus_ha = my_clear_nexus_ha;
+
+ my_ha->sas_ha.lldd_control_phy = my_control_phy;
+
+ return sas_register_ha(&my_ha->sas_ha);
+}
+
+(2) SAS 1.1 does not define I_T Nexus Reset TMF.
+
+Events
+------
+
+Events are _the only way_ a SAS LLDD notifies the SAS layer
+of anything. There is no other method or way a LLDD to tell
+the SAS layer of anything happening internally or in the SAS
+domain.
+
+Phy events:
+ PHYE_LOSS_OF_SIGNAL, (C)
+ PHYE_OOB_DONE,
+ PHYE_OOB_ERROR, (C)
+ PHYE_SPINUP_HOLD.
+
+Port events, passed on a _phy_:
+ PORTE_BYTES_DMAED, (M)
+ PORTE_BROADCAST_RCVD, (E)
+ PORTE_LINK_RESET_ERR, (C)
+ PORTE_TIMER_EVENT, (C)
+ PORTE_HARD_RESET.
+
+Host Adapter event:
+ HAE_RESET
+
+A SAS LLDD should be able to generate
+ - at least one event from group C (choice),
+ - events marked M (mandatory) are mandatory (only one),
+ - events marked E (expander) if it wants the SAS layer
+ to handle domain revalidation (only one such).
+ - Unmarked events are optional.
+
+Meaning:
+
+HAE_RESET -- when your HA got internal error and was reset.
+
+PORTE_BYTES_DMAED -- on receiving an IDENTIFY/FIS frame
+PORTE_BROADCAST_RCVD -- on receiving a primitive
+PORTE_LINK_RESET_ERR -- timer expired, loss of signal, loss
+of DWS, etc. (*)
+PORTE_TIMER_EVENT -- DWS reset timeout timer expired (*)
+PORTE_HARD_RESET -- Hard Reset primitive received.
+
+PHYE_LOSS_OF_SIGNAL -- the device is gone (*)
+PHYE_OOB_DONE -- OOB went fine and oob_mode is valid
+PHYE_OOB_ERROR -- Error while doing OOB, the device probably
+got disconnected. (*)
+PHYE_SPINUP_HOLD -- SATA is present, COMWAKE not sent.
+
+(*) should set/clear the appropriate fields in the phy,
+ or alternatively call the inlined sas_phy_disconnected()
+ which is just a helper, from their tasklet.
+
+The Execute Command SCSI RPC:
+
+ int (*lldd_execute_task)(struct sas_task *, gfp_t gfp_flags);
+
+Used to queue a task to the SAS LLDD. @task is the task to be executed.
+@gfp_mask is the gfp_mask defining the context of the caller.
+
+This function should implement the Execute Command SCSI RPC,
+
+That is, when lldd_execute_task() is called, the command
+go out on the transport *immediately*. There is *no*
+queuing of any sort and at any level in a SAS LLDD.
+
+Returns: -SAS_QUEUE_FULL, -ENOMEM, nothing was queued;
+ 0, the task(s) were queued.
+
+struct sas_task {
+ dev -- the device this task is destined to
+ task_proto -- _one_ of enum sas_proto
+ scatter -- pointer to scatter gather list array
+ num_scatter -- number of elements in scatter
+ total_xfer_len -- total number of bytes expected to be transferred
+ data_dir -- PCI_DMA_...
+ task_done -- callback when the task has finished execution
+};
+
+DISCOVERY
+---------
+
+The sysfs tree has the following purposes:
+ a) It shows you the physical layout of the SAS domain at
+ the current time, i.e. how the domain looks in the
+ physical world right now.
+ b) Shows some device parameters _at_discovery_time_.
+
+This is a link to the tree(1) program, very useful in
+viewing the SAS domain:
+ftp://mama.indstate.edu/linux/tree/
+I expect user space applications to actually create a
+graphical interface of this.
+
+That is, the sysfs domain tree doesn't show or keep state if
+you e.g., change the meaning of the READY LED MEANING
+setting, but it does show you the current connection status
+of the domain device.
+
+Keeping internal device state changes is responsibility of
+upper layers (Command set drivers) and user space.
+
+When a device or devices are unplugged from the domain, this
+is reflected in the sysfs tree immediately, and the device(s)
+removed from the system.
+
+The structure domain_device describes any device in the SAS
+domain. It is completely managed by the SAS layer. A task
+points to a domain device, this is how the SAS LLDD knows
+where to send the task(s) to. A SAS LLDD only reads the
+contents of the domain_device structure, but it never creates
+or destroys one.
+
+Expander management from User Space
+-----------------------------------
+
+In each expander directory in sysfs, there is a file called
+"smp_portal". It is a binary sysfs attribute file, which
+implements an SMP portal (Note: this is *NOT* an SMP port),
+to which user space applications can send SMP requests and
+receive SMP responses.
+
+Functionality is deceptively simple:
+
+1. Build the SMP frame you want to send. The format and layout
+ is described in the SAS spec. Leave the CRC field equal 0.
+open(2)
+2. Open the expander's SMP portal sysfs file in RW mode.
+write(2)
+3. Write the frame you built in 1.
+read(2)
+4. Read the amount of data you expect to receive for the frame you built.
+ If you receive different amount of data you expected to receive,
+ then there was some kind of error.
+close(2)
+All this process is shown in detail in the function do_smp_func()
+and its callers, in the file "expander_conf.c".
+
+The kernel functionality is implemented in the file
+"sas_expander.c".
+
+The program "expander_conf.c" implements this. It takes one
+argument, the sysfs file name of the SMP portal to the
+expander, and gives expander information, including routing
+tables.
+
+The SMP portal gives you complete control of the expander,
+so please be careful.
diff --git a/Documentation/scsi/link_power_management_policy.txt b/Documentation/scsi/link_power_management_policy.txt
new file mode 100644
index 000000000..0285601a6
--- /dev/null
+++ b/Documentation/scsi/link_power_management_policy.txt
@@ -0,0 +1,22 @@
+This parameter allows the user to set the link (interface) power management.
+There are 4 possible options:
+
+Value Effect
+----------------------------------------------------------------------------
+firmware_defaults Inherit configuration from the state programmed by
+ the firmware during system init.
+
+min_power Tell the controller to try to make the link use the
+ least possible power when possible. This may
+ sacrifice some performance due to increased latency
+ when coming out of lower power states.
+
+max_performance Generally, this means no power management. Tell
+ the controller to have performance be a priority
+ over power management.
+
+medium_power Tell the controller to enter a lower power state
+ when possible, but do not enter the lowest power
+ state, thus improving latency over min_power setting.
+
+
diff --git a/Documentation/scsi/lpfc.txt b/Documentation/scsi/lpfc.txt
new file mode 100644
index 000000000..5741ea8aa
--- /dev/null
+++ b/Documentation/scsi/lpfc.txt
@@ -0,0 +1,83 @@
+
+LPFC Driver Release Notes:
+
+=============================================================================
+
+
+ IMPORTANT:
+
+ Starting in the 8.0.17 release, the driver began to be targeted strictly
+ toward the upstream kernel. As such, we removed #ifdefs for older kernels
+ (pre 2.6.10). The 8.0.16 release should be used if the driver is to be
+ run on one of the older kernels.
+
+ The proposed modifications to the transport layer for FC remote ports
+ and extended attribute support is now part of the upstream kernel
+ as of 2.6.12. We no longer need to provide patches for this support,
+ nor a *full* version which has old an new kernel support.
+
+ The driver now requires a 2.6.12 (if pre-release, 2.6.12-rc1) or later
+ kernel.
+
+ Please heed these dependencies....
+
+
+ ********************************************************************
+
+
+The following information is provided for additional background on the
+history of the driver as we push for upstream acceptance.
+
+Cable pull and temporary device Loss:
+
+ In older revisions of the lpfc driver, the driver internally queued i/o
+ received from the midlayer. In the cases where a cable was pulled, link
+ jitter, or a device temporarily loses connectivity (due to its cable
+ being removed, a switch rebooting, or a device reboot), the driver could
+ hide the disappearance of the device from the midlayer. I/O's issued to
+ the LLDD would simply be queued for a short duration, allowing the device
+ to reappear or link come back alive, with no inadvertent side effects
+ to the system. If the driver did not hide these conditions, i/o would be
+ errored by the driver, the mid-layer would exhaust its retries, and the
+ device would be taken offline. Manual intervention would be required to
+ re-enable the device.
+
+ The community supporting kernel.org has driven an effort to remove
+ internal queuing from all LLDDs. The philosophy is that internal
+ queuing is unnecessary as the block layer already performs the
+ queuing. Removing the queues from the LLDD makes a more predictable
+ and more simple LLDD.
+
+ As a potential new addition to kernel.org, the 8.x driver was asked to
+ have all internal queuing removed. Emulex complied with this request.
+ In explaining the impacts of this change, Emulex has worked with the
+ community in modifying the behavior of the SCSI midlayer so that SCSI
+ devices can be temporarily suspended while transport events (such as
+ those described) can occur.
+
+ The proposed patch was posted to the linux-scsi mailing list. The patch
+ is contained in the 2.6.10-rc2 (and later) patch kits. As such, this
+ patch is part of the standard 2.6.10 kernel.
+
+ By default, the driver expects the patches for block/unblock interfaces
+ to be present in the kernel. No #define needs to be set to enable support.
+
+
+Kernel Support
+
+ This source package is targeted for the upstream kernel only. (See notes
+ at the top of this file). It relies on interfaces that are slowing
+ migrating into the kernel.org kernel.
+
+ At this time, the driver requires the 2.6.12 (if pre-release, 2.6.12-rc1)
+ kernel.
+
+ If a driver is needed for older kernels please utilize the 8.0.16
+ driver sources.
+
+
+Patches
+
+ Thankfully, at this time, patches are not needed.
+
+
diff --git a/Documentation/scsi/megaraid.txt b/Documentation/scsi/megaraid.txt
new file mode 100644
index 000000000..3c7cea51e
--- /dev/null
+++ b/Documentation/scsi/megaraid.txt
@@ -0,0 +1,70 @@
+ Notes on Management Module
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Overview:
+--------
+
+Different classes of controllers from LSI Logic accept and respond to the
+user applications in a similar way. They understand the same firmware control
+commands. Furthermore, the applications also can treat different classes of
+the controllers uniformly. Hence it is logical to have a single module that
+interfaces with the applications on one side and all the low level drivers
+on the other.
+
+The advantages, though obvious, are listed for completeness:
+
+ i. Avoid duplicate code from the low level drivers.
+ ii. Unburden the low level drivers from having to export the
+ character node device and related handling.
+ iii. Implement any policy mechanisms in one place.
+ iv. Applications have to interface with only module instead of
+ multiple low level drivers.
+
+Currently this module (called Common Management Module) is used only to issue
+ioctl commands. But this module is envisioned to handle all user space level
+interactions. So any 'proc', 'sysfs' implementations will be localized in this
+common module.
+
+Credits:
+-------
+
+"Shared code in a third module, a "library module", is an acceptable
+solution. modprobe automatically loads dependent modules, so users
+running "modprobe driver1" or "modprobe driver2" would automatically
+load the shared library module."
+
+ - Jeff Garzik (jgarzik@pobox.com), 02.25.2004 LKML
+
+"As Jeff hinted, if your userspace<->driver API is consistent between
+your new MPT-based RAID controllers and your existing megaraid driver,
+then perhaps you need a single small helper module (lsiioctl or some
+better name), loaded by both mptraid and megaraid automatically, which
+handles registering the /dev/megaraid node dynamically. In this case,
+both mptraid and megaraid would register with lsiioctl for each
+adapter discovered, and lsiioctl would essentially be a switch,
+redirecting userspace tool ioctls to the appropriate driver."
+
+ - Matt Domsch, (Matt_Domsch@dell.com), 02.25.2004 LKML
+
+Design:
+------
+
+The Common Management Module is implemented in megaraid_mm.[ch] files. This
+module acts as a registry for low level hba drivers. The low level drivers
+(currently only megaraid) register each controller with the common module.
+
+The applications interface with the common module via the character device
+node exported by the module.
+
+The lower level drivers now understand only a new improved ioctl packet called
+uioc_t. The management module converts the older ioctl packets from the older
+applications into uioc_t. After driver handles the uioc_t, the common module
+will convert that back into the old format before returning to applications.
+
+As new applications evolve and replace the old ones, the old packet format
+will be retired.
+
+Common module dedicates one uioc_t packet to each controller registered. This
+can easily be more than one. But since megaraid is the only low level driver
+today, and it can handle only one ioctl, there is no reason to have more. But
+as new controller classes get added, this will be tuned appropriately.
diff --git a/Documentation/scsi/ncr53c8xx.txt b/Documentation/scsi/ncr53c8xx.txt
new file mode 100644
index 000000000..8586efff1
--- /dev/null
+++ b/Documentation/scsi/ncr53c8xx.txt
@@ -0,0 +1,1824 @@
+The Linux NCR53C8XX/SYM53C8XX drivers README file
+
+Written by Gerard Roudier <groudier@free.fr>
+21 Rue Carnot
+95170 DEUIL LA BARRE - FRANCE
+
+29 May 1999
+===============================================================================
+
+1. Introduction
+2. Supported chips and SCSI features
+3. Advantages of the enhanced 896 driver
+ 3.1 Optimized SCSI SCRIPTS
+ 3.2 New features of the SYM53C896 (64 bit PCI dual LVD SCSI controller)
+4. Memory mapped I/O versus normal I/O
+5. Tagged command queueing
+6. Parity checking
+7. Profiling information
+8. Control commands
+ 8.1 Set minimum synchronous period
+ 8.2 Set wide size
+ 8.3 Set maximum number of concurrent tagged commands
+ 8.4 Set order type for tagged command
+ 8.5 Set debug mode
+ 8.6 Clear profile counters
+ 8.7 Set flag (no_disc)
+ 8.8 Set verbose level
+ 8.9 Reset all logical units of a target
+ 8.10 Abort all tasks of all logical units of a target
+9. Configuration parameters
+10. Boot setup commands
+ 10.1 Syntax
+ 10.2 Available arguments
+ 10.2.1 Master parity checking
+ 10.2.2 Scsi parity checking
+ 10.2.3 Scsi disconnections
+ 10.2.4 Special features
+ 10.2.5 Ultra SCSI support
+ 10.2.6 Default number of tagged commands
+ 10.2.7 Default synchronous period factor
+ 10.2.8 Negotiate synchronous with all devices
+ 10.2.9 Verbosity level
+ 10.2.10 Debug mode
+ 10.2.11 Burst max
+ 10.2.12 LED support
+ 10.2.13 Max wide
+ 10.2.14 Differential mode
+ 10.2.15 IRQ mode
+ 10.2.16 Reverse probe
+ 10.2.17 Fix up PCI configuration space
+ 10.2.18 Serial NVRAM
+ 10.2.19 Check SCSI BUS
+ 10.2.20 Exclude a host from being attached
+ 10.2.21 Suggest a default SCSI id for hosts
+ 10.2.22 Enable use of IMMEDIATE ARBITRATION
+ 10.3 Advised boot setup commands
+ 10.4 PCI configuration fix-up boot option
+ 10.5 Serial NVRAM support boot option
+ 10.6 SCSI BUS checking boot option
+ 10.7 IMMEDIATE ARBITRATION boot option
+11. Some constants and flags of the ncr53c8xx.h header file
+12. Installation
+13. Architecture dependent features
+14. Known problems
+ 14.1 Tagged commands with Iomega Jaz device
+ 14.2 Device names change when another controller is added
+ 14.3 Using only 8 bit devices with a WIDE SCSI controller.
+ 14.4 Possible data corruption during a Memory Write and Invalidate
+ 14.5 IRQ sharing problems
+15. SCSI problem troubleshooting
+ 15.1 Problem tracking
+ 15.2 Understanding hardware error reports
+16. Synchronous transfer negotiation tables
+ 16.1 Synchronous timings for 53C875 and 53C860 Ultra-SCSI controllers
+ 16.2 Synchronous timings for fast SCSI-2 53C8XX controllers
+17. Serial NVRAM support (by Richard Waltham)
+ 17.1 Features
+ 17.2 Symbios NVRAM layout
+ 17.3 Tekram NVRAM layout
+18. Support for Big Endian
+ 18.1 Big Endian CPU
+ 18.2 NCR chip in Big Endian mode of operations
+
+===============================================================================
+
+1. Introduction
+
+The initial Linux ncr53c8xx driver has been a port of the ncr driver from
+FreeBSD that has been achieved in November 1995 by:
+ Gerard Roudier <groudier@free.fr>
+
+The original driver has been written for 386bsd and FreeBSD by:
+ Wolfgang Stanglmeier <wolf@cologne.de>
+ Stefan Esser <se@mi.Uni-Koeln.de>
+
+It is now available as a bundle of 2 drivers:
+
+- ncr53c8xx generic driver that supports all the SYM53C8XX family including
+ the earliest 810 rev. 1, the latest 896 (2 channel LVD SCSI controller) and
+ the new 895A (1 channel LVD SCSI controller).
+- sym53c8xx enhanced driver (a.k.a. 896 drivers) that drops support of oldest
+ chips in order to gain advantage of new features, as LOAD/STORE instructions
+ available since the 810A and hardware phase mismatch available with the
+ 896 and the 895A.
+
+You can find technical information about the NCR 8xx family in the
+PCI-HOWTO written by Michael Will and in the SCSI-HOWTO written by
+Drew Eckhardt.
+
+Information about new chips is available at LSILOGIC web server:
+
+ http://www.lsilogic.com/
+
+SCSI standard documentations are available at SYMBIOS ftp server:
+
+ ftp://ftp.symbios.com/
+
+Useful SCSI tools written by Eric Youngdale are available at tsx-11:
+
+ ftp://tsx-11.mit.edu/pub/linux/ALPHA/scsi/scsiinfo-X.Y.tar.gz
+ ftp://tsx-11.mit.edu/pub/linux/ALPHA/scsi/scsidev-X.Y.tar.gz
+
+These tools are not ALPHA but quite clean and work quite well.
+It is essential you have the 'scsiinfo' package.
+
+This short documentation describes the features of the generic and enhanced
+drivers, configuration parameters and control commands available through
+the proc SCSI file system read / write operations.
+
+This driver has been tested OK with linux/i386, Linux/Alpha and Linux/PPC.
+
+Latest driver version and patches are available at:
+
+ ftp://ftp.tux.org/pub/people/gerard-roudier
+or
+ ftp://ftp.symbios.com/mirror/ftp.tux.org/pub/tux/roudier/drivers
+
+I am not a native speaker of English and there are probably lots of
+mistakes in this README file. Any help will be welcome.
+
+
+2. Supported chips and SCSI features
+
+The following features are supported for all chips:
+
+ Synchronous negotiation
+ Disconnection
+ Tagged command queuing
+ SCSI parity checking
+ Master parity checking
+
+"Wide negotiation" is supported for chips that allow it. The
+following table shows some characteristics of NCR 8xx family chips
+and what drivers support them.
+
+ Supported by Supported by
+ On board the generic the enhanced
+Chip SDMS BIOS Wide SCSI std. Max. sync driver driver
+---- --------- ---- --------- ---------- ------------ -------------
+810 N N FAST10 10 MB/s Y N
+810A N N FAST10 10 MB/s Y Y
+815 Y N FAST10 10 MB/s Y N
+825 Y Y FAST10 20 MB/s Y N
+825A Y Y FAST10 20 MB/s Y Y
+860 N N FAST20 20 MB/s Y Y
+875 Y Y FAST20 40 MB/s Y Y
+876 Y Y FAST20 40 MB/s Y Y
+895 Y Y FAST40 80 MB/s Y Y
+895A Y Y FAST40 80 MB/s Y Y
+896 Y Y FAST40 80 MB/s Y Y
+897 Y Y FAST40 80 MB/s Y Y
+1510D Y Y FAST40 80 MB/s Y Y
+1010 Y Y FAST80 160 MB/s N Y
+1010_66* Y Y FAST80 160 MB/s N Y
+
+* Chip supports 33MHz and 66MHz PCI buses.
+
+
+Summary of other supported features:
+
+Module: allow to load the driver
+Memory mapped I/O: increases performance
+Profiling information: read operations from the proc SCSI file system
+Control commands: write operations to the proc SCSI file system
+Debugging information: written to syslog (expert only)
+Scatter / gather
+Shared interrupt
+Boot setup commands
+Serial NVRAM: Symbios and Tekram formats
+
+
+3. Advantages of the enhanced 896 driver
+
+3.1 Optimized SCSI SCRIPTS.
+
+The 810A, 825A, 875, 895, 896 and 895A support new SCSI SCRIPTS instructions
+named LOAD and STORE that allow to move up to 1 DWORD from/to an IO register
+to/from memory much faster that the MOVE MEMORY instruction that is supported
+by the 53c7xx and 53c8xx family.
+The LOAD/STORE instructions support absolute and DSA relative addressing
+modes. The SCSI SCRIPTS had been entirely rewritten using LOAD/STORE instead
+of MOVE MEMORY instructions.
+
+3.2 New features of the SYM53C896 (64 bit PCI dual LVD SCSI controller)
+
+The 896 and the 895A allows handling of the phase mismatch context from
+SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor
+until the C code has saved the context of the transfer).
+Implementing this without using LOAD/STORE instructions would be painful
+and I didn't even want to try it.
+
+The 896 chip supports 64 bit PCI transactions and addressing, while the
+895A supports 32 bit PCI transactions and 64 bit addressing.
+The SCRIPTS processor of these chips is not true 64 bit, but uses segment
+registers for bit 32-63. Another interesting feature is that LOAD/STORE
+instructions that address the on-chip RAM (8k) remain internal to the chip.
+
+Due to the use of LOAD/STORE SCRIPTS instructions, this driver does not
+support the following chips:
+- SYM53C810 revision < 0x10 (16)
+- SYM53C815 all revisions
+- SYM53C825 revision < 0x10 (16)
+
+4. Memory mapped I/O versus normal I/O
+
+Memory mapped I/O has less latency than normal I/O. Since
+linux-1.3.x, memory mapped I/O is used rather than normal I/O. Memory
+mapped I/O seems to work fine on most hardware configurations, but
+some poorly designed motherboards may break this feature.
+
+The configuration option CONFIG_SCSI_NCR53C8XX_IOMAPPED forces the
+driver to use normal I/O in all cases.
+
+
+5. Tagged command queueing
+
+Queuing more than 1 command at a time to a device allows it to perform
+optimizations based on actual head positions and its mechanical
+characteristics. This feature may also reduce average command latency.
+In order to really gain advantage of this feature, devices must have
+a reasonable cache size (No miracle is to be expected for a low-end
+hard disk with 128 KB or less).
+Some known SCSI devices do not properly support tagged command queuing.
+Generally, firmware revisions that fix this kind of problems are available
+at respective vendor web/ftp sites.
+All I can say is that the hard disks I use on my machines behave well with
+this driver with tagged command queuing enabled:
+
+- IBM S12 0662
+- Conner 1080S
+- Quantum Atlas I
+- Quantum Atlas II
+
+If your controller has NVRAM, you can configure this feature per target
+from the user setup tool. The Tekram Setup program allows to tune the
+maximum number of queued commands up to 32. The Symbios Setup only allows
+to enable or disable this feature.
+
+The maximum number of simultaneous tagged commands queued to a device
+is currently set to 8 by default. This value is suitable for most SCSI
+disks. With large SCSI disks (>= 2GB, cache >= 512KB, average seek time
+<= 10 ms), using a larger value may give better performances.
+
+The sym53c8xx driver supports up to 255 commands per device, and the
+generic ncr53c8xx driver supports up to 64, but using more than 32 is
+generally not worth-while, unless you are using a very large disk or disk
+array. It is noticeable that most of recent hard disks seem not to accept
+more than 64 simultaneous commands. So, using more than 64 queued commands
+is probably just resource wasting.
+
+If your controller does not have NVRAM or if it is managed by the SDMS
+BIOS/SETUP, you can configure tagged queueing feature and device queue
+depths from the boot command-line. For example:
+
+ ncr53c8xx=tags:4/t2t3q15-t4q7/t1u0q32
+
+will set tagged commands queue depths as follow:
+
+- target 2 all luns on controller 0 --> 15
+- target 3 all luns on controller 0 --> 15
+- target 4 all luns on controller 0 --> 7
+- target 1 lun 0 on controller 1 --> 32
+- all other target/lun --> 4
+
+In some special conditions, some SCSI disk firmwares may return a
+QUEUE FULL status for a SCSI command. This behaviour is managed by the
+driver using the following heuristic:
+
+- Each time a QUEUE FULL status is returned, tagged queue depth is reduced
+ to the actual number of disconnected commands.
+
+- Every 1000 successfully completed SCSI commands, if allowed by the
+ current limit, the maximum number of queueable commands is incremented.
+
+Since QUEUE FULL status reception and handling is resource wasting, the
+driver notifies by default this problem to user by indicating the actual
+number of commands used and their status, as well as its decision on the
+device queue depth change.
+The heuristic used by the driver in handling QUEUE FULL ensures that the
+impact on performances is not too bad. You can get rid of the messages by
+setting verbose level to zero, as follow:
+
+1st method: boot your system using 'ncr53c8xx=verb:0' option.
+2nd method: apply "setverbose 0" control command to the proc fs entry
+ corresponding to your controller after boot-up.
+
+6. Parity checking
+
+The driver supports SCSI parity checking and PCI bus master parity
+checking. These features must be enabled in order to ensure safe data
+transfers. However, some flawed devices or mother boards will have
+problems with parity. You can disable either PCI parity or SCSI parity
+checking by entering appropriate options from the boot command line.
+(See 10: Boot setup commands).
+
+7. Profiling information
+
+Profiling information is available through the proc SCSI file system.
+Since gathering profiling information may impact performances, this
+feature is disabled by default and requires a compilation configuration
+option to be set to Y.
+
+The device associated with a host has the following pathname:
+
+ /proc/scsi/ncr53c8xx/N (N=0,1,2 ....)
+
+Generally, only 1 board is used on hardware configuration, and that device is:
+ /proc/scsi/ncr53c8xx/0
+
+However, if the driver has been made as module, the number of the
+hosts is incremented each time the driver is loaded.
+
+In order to display profiling information, just enter:
+
+ cat /proc/scsi/ncr53c8xx/0
+
+and you will get something like the following text:
+
+-------------------------------------------------------
+General information:
+ Chip NCR53C810, device id 0x1, revision id 0x2
+ IO port address 0x6000, IRQ number 10
+ Using memory mapped IO at virtual address 0x282c000
+ Synchronous transfer period 25, max commands per lun 4
+Profiling information:
+ num_trans = 18014
+ num_kbytes = 671314
+ num_disc = 25763
+ num_break = 1673
+ num_int = 1685
+ num_fly = 18038
+ ms_setup = 4940
+ ms_data = 369940
+ ms_disc = 183090
+ ms_post = 1320
+-------------------------------------------------------
+
+General information is easy to understand. The device ID and the
+revision ID identify the SCSI chip as follows:
+
+Chip Device id Revision Id
+---- --------- -----------
+810 0x1 < 0x10
+810A 0x1 >= 0x10
+815 0x4
+825 0x3 < 0x10
+860 0x6
+825A 0x3 >= 0x10
+875 0xf
+895 0xc
+
+The profiling information is updated upon completion of SCSI commands.
+A data structure is allocated and zeroed when the host adapter is
+attached. So, if the driver is a module, the profile counters are
+cleared each time the driver is loaded. The "clearprof" command
+allows you to clear these counters at any time.
+
+The following counters are available:
+
+("num" prefix means "number of",
+"ms" means milli-seconds)
+
+num_trans
+ Number of completed commands
+ Example above: 18014 completed commands
+
+num_kbytes
+ Number of kbytes transferred
+ Example above: 671 MB transferred
+
+num_disc
+ Number of SCSI disconnections
+ Example above: 25763 SCSI disconnections
+
+num_break
+ number of script interruptions (phase mismatch)
+ Example above: 1673 script interruptions
+
+num_int
+ Number of interrupts other than "on the fly"
+ Example above: 1685 interruptions not "on the fly"
+
+num_fly
+ Number of interrupts "on the fly"
+ Example above: 18038 interruptions "on the fly"
+
+ms_setup
+ Elapsed time for SCSI commands setups
+ Example above: 4.94 seconds
+
+ms_data
+ Elapsed time for data transfers
+ Example above: 369.94 seconds spent for data transfer
+
+ms_disc
+ Elapsed time for SCSI disconnections
+ Example above: 183.09 seconds spent disconnected
+
+ms_post
+ Elapsed time for command post processing
+ (time from SCSI status get to command completion call)
+ Example above: 1.32 seconds spent for post processing
+
+Due to the 1/100 second tick of the system clock, "ms_post" time may
+be wrong.
+
+In the example above, we got 18038 interrupts "on the fly" and only
+1673 script breaks generally due to disconnections inside a segment
+of the scatter list.
+
+
+8. Control commands
+
+Control commands can be sent to the driver with write operations to
+the proc SCSI file system. The generic command syntax is the
+following:
+
+ echo "<verb> <parameters>" >/proc/scsi/ncr53c8xx/0
+ (assumes controller number is 0)
+
+Using "all" for "<target>" parameter with the commands below will
+apply to all targets of the SCSI chain (except the controller).
+
+Available commands:
+
+8.1 Set minimum synchronous period factor
+
+ setsync <target> <period factor>
+
+ target: target number
+ period: minimum synchronous period.
+ Maximum speed = 1000/(4*period factor) except for special
+ cases below.
+
+ Specify a period of 255, to force asynchronous transfer mode.
+
+ 10 means 25 nano-seconds synchronous period
+ 11 means 30 nano-seconds synchronous period
+ 12 means 50 nano-seconds synchronous period
+
+8.2 Set wide size
+
+ setwide <target> <size>
+
+ target: target number
+ size: 0=8 bits, 1=16bits
+
+8.3 Set maximum number of concurrent tagged commands
+
+ settags <target> <tags>
+
+ target: target number
+ tags: number of concurrent tagged commands
+ must not be greater than SCSI_NCR_MAX_TAGS (default: 8)
+
+8.4 Set order type for tagged command
+
+ setorder <order>
+
+ order: 3 possible values:
+ simple: use SIMPLE TAG for all operations (read and write)
+ ordered: use ORDERED TAG for all operations
+ default: use default tag type,
+ SIMPLE TAG for read operations
+ ORDERED TAG for write operations
+
+
+8.5 Set debug mode
+
+ setdebug <list of debug flags>
+
+ Available debug flags:
+ alloc: print info about memory allocations (ccb, lcb)
+ queue: print info about insertions into the command start queue
+ result: print sense data on CHECK CONDITION status
+ scatter: print info about the scatter process
+ scripts: print info about the script binding process
+ tiny: print minimal debugging information
+ timing: print timing information of the NCR chip
+ nego: print information about SCSI negotiations
+ phase: print information on script interruptions
+
+ Use "setdebug" with no argument to reset debug flags.
+
+
+8.6 Clear profile counters
+
+ clearprof
+
+ The profile counters are automatically cleared when the amount of
+ data transferred reaches 1000 GB in order to avoid overflow.
+ The "clearprof" command allows you to clear these counters at any time.
+
+
+8.7 Set flag (no_disc)
+
+ setflag <target> <flag>
+
+ target: target number
+
+ For the moment, only one flag is available:
+
+ no_disc: not allow target to disconnect.
+
+ Do not specify any flag in order to reset the flag. For example:
+ - setflag 4
+ will reset no_disc flag for target 4, so will allow it disconnections.
+ - setflag all
+ will allow disconnection for all devices on the SCSI bus.
+
+
+8.8 Set verbose level
+
+ setverbose #level
+
+ The driver default verbose level is 1. This command allows to change
+ th driver verbose level after boot-up.
+
+8.9 Reset all logical units of a target
+
+ resetdev <target>
+
+ target: target number
+ The driver will try to send a BUS DEVICE RESET message to the target.
+ (Only supported by the SYM53C8XX driver and provided for test purpose)
+
+8.10 Abort all tasks of all logical units of a target
+
+ cleardev <target>
+
+ target: target number
+ The driver will try to send a ABORT message to all the logical units
+ of the target.
+ (Only supported by the SYM53C8XX driver and provided for test purpose)
+
+
+9. Configuration parameters
+
+If the firmware of all your devices is perfect enough, all the
+features supported by the driver can be enabled at start-up. However,
+if only one has a flaw for some SCSI feature, you can disable the
+support by the driver of this feature at linux start-up and enable
+this feature after boot-up only for devices that support it safely.
+
+CONFIG_SCSI_NCR53C8XX_IOMAPPED (default answer: n)
+ Answer "y" if you suspect your mother board to not allow memory mapped I/O.
+ May slow down performance a little. This option is required by
+ Linux/PPC and is used no matter what you select here. Linux/PPC
+ suffers no performance loss with this option since all IO is memory
+ mapped anyway.
+
+CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS (default answer: 8)
+ Default tagged command queue depth.
+
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS (default answer: 8)
+ This option allows you to specify the maximum number of tagged commands
+ that can be queued to a device. The maximum supported value is 32.
+
+CONFIG_SCSI_NCR53C8XX_SYNC (default answer: 5)
+ This option allows you to specify the frequency in MHz the driver
+ will use at boot time for synchronous data transfer negotiations.
+ This frequency can be changed later with the "setsync" control command.
+ 0 means "asynchronous data transfers".
+
+CONFIG_SCSI_NCR53C8XX_FORCE_SYNC_NEGO (default answer: n)
+ Force synchronous negotiation for all SCSI-2 devices.
+ Some SCSI-2 devices do not report this feature in byte 7 of inquiry
+ response but do support it properly (TAMARACK scanners for example).
+
+CONFIG_SCSI_NCR53C8XX_NO_DISCONNECT (default and only reasonable answer: n)
+ If you suspect a device of yours does not properly support disconnections,
+ you can answer "y". Then, all SCSI devices will never disconnect the bus
+ even while performing long SCSI operations.
+
+CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT
+ Genuine SYMBIOS boards use GPIO0 in output for controller LED and GPIO3
+ bit as a flag indicating singled-ended/differential interface.
+ If all the boards of your system are genuine SYMBIOS boards or use
+ BIOS and drivers from SYMBIOS, you would want to enable this option.
+ This option must NOT be enabled if your system has at least one 53C8XX
+ based scsi board with a vendor-specific BIOS.
+ For example, Tekram DC-390/U, DC-390/W and DC-390/F scsi controllers
+ use a vendor-specific BIOS and are known to not use SYMBIOS compatible
+ GPIO wiring. So, this option must not be enabled if your system has
+ such a board installed.
+
+CONFIG_SCSI_NCR53C8XX_NVRAM_DETECT
+ Enable support for reading the serial NVRAM data on Symbios and
+ some Symbios compatible cards, and Tekram DC390W/U/F cards. Useful for
+ systems with more than one Symbios compatible controller where at least
+ one has a serial NVRAM, or for a system with a mixture of Symbios and
+ Tekram cards. Enables setting the boot order of host adaptors
+ to something other than the default order or "reverse probe" order.
+ Also enables Symbios and Tekram cards to be distinguished so
+ CONFIG_SCSI_NCR53C8XX_SYMBIOS_COMPAT may be set in a system with a
+ mixture of Symbios and Tekram cards so the Symbios cards can make use of
+ the full range of Symbios features, differential, led pin, without
+ causing problems for the Tekram card(s).
+
+10. Boot setup commands
+
+10.1 Syntax
+
+Setup commands can be passed to the driver either at boot time or as a
+string variable using 'insmod'.
+
+A boot setup command for the ncr53c8xx (sym53c8xx) driver begins with the
+driver name "ncr53c8xx="(sym53c8xx). The kernel syntax parser then expects
+an optional list of integers separated with comma followed by an optional
+list of comma-separated strings. Example of boot setup command under lilo
+prompt:
+
+lilo: linux root=/dev/hda2 ncr53c8xx=tags:4,sync:10,debug:0x200
+
+- enable tagged commands, up to 4 tagged commands queued.
+- set synchronous negotiation speed to 10 Mega-transfers / second.
+- set DEBUG_NEGO flag.
+
+Since comma seems not to be allowed when defining a string variable using
+'insmod', the driver also accepts <space> as option separator.
+The following command will install driver module with the same options as
+above.
+
+ insmod ncr53c8xx.o ncr53c8xx="tags:4 sync:10 debug:0x200"
+
+For the moment, the integer list of arguments is discarded by the driver.
+It will be used in the future in order to allow a per controller setup.
+
+Each string argument must be specified as "keyword:value". Only lower-case
+characters and digits are allowed.
+
+In a system that contains multiple 53C8xx adapters insmod will install the
+specified driver on each adapter. To exclude a chip use the 'excl' keyword.
+
+The sequence of commands,
+
+ insmod sym53c8xx sym53c8xx=excl:0x1400
+ insmod ncr53c8xx
+
+installs the sym53c8xx driver on all adapters except the one at IO port
+address 0x1400 and then installs the ncr53c8xx driver to the adapter at IO
+port address 0x1400.
+
+
+10.2 Available arguments
+
+10.2.1 Master parity checking
+ mpar:y enabled
+ mpar:n disabled
+
+10.2.2 Scsi parity checking
+ spar:y enabled
+ spar:n disabled
+
+10.2.3 Scsi disconnections
+ disc:y enabled
+ disc:n disabled
+
+10.2.4 Special features
+ Only apply to 810A, 825A, 860, 875 and 895 controllers.
+ Have no effect with other ones.
+ specf:y (or 1) enabled
+ specf:n (or 0) disabled
+ specf:3 enabled except Memory Write And Invalidate
+ The default driver setup is 'specf:3'. As a consequence, option 'specf:y'
+ must be specified in the boot setup command to enable Memory Write And
+ Invalidate.
+
+10.2.5 Ultra SCSI support
+ Only apply to 860, 875, 895, 895a, 896, 1010 and 1010_66 controllers.
+ Have no effect with other ones.
+ ultra:n All ultra speeds enabled
+ ultra:2 Ultra2 enabled
+ ultra:1 Ultra enabled
+ ultra:0 Ultra speeds disabled
+
+10.2.6 Default number of tagged commands
+ tags:0 (or tags:1 ) tagged command queuing disabled
+ tags:#tags (#tags > 1) tagged command queuing enabled
+ #tags will be truncated to the max queued commands configuration parameter.
+ This option also allows to specify a command queue depth for each device
+ that support tagged command queueing.
+ Example:
+ ncr53c8xx=tags:10/t2t3q16-t5q24/t1u2q32
+ will set devices queue depth as follow:
+ - controller #0 target #2 and target #3 -> 16 commands,
+ - controller #0 target #5 -> 24 commands,
+ - controller #1 target #1 logical unit #2 -> 32 commands,
+ - all other logical units (all targets, all controllers) -> 10 commands.
+
+10.2.7 Default synchronous period factor
+ sync:255 disabled (asynchronous transfer mode)
+ sync:#factor
+ #factor = 10 Ultra-2 SCSI 40 Mega-transfers / second
+ #factor = 11 Ultra-2 SCSI 33 Mega-transfers / second
+ #factor < 25 Ultra SCSI 20 Mega-transfers / second
+ #factor < 50 Fast SCSI-2
+
+ In all cases, the driver will use the minimum transfer period supported by
+ controllers according to NCR53C8XX chip type.
+
+10.2.8 Negotiate synchronous with all devices
+ (force sync nego)
+ fsn:y enabled
+ fsn:n disabled
+
+10.2.9 Verbosity level
+ verb:0 minimal
+ verb:1 normal
+ verb:2 too much
+
+10.2.10 Debug mode
+ debug:0 clear debug flags
+ debug:#x set debug flags
+ #x is an integer value combining the following power-of-2 values:
+ DEBUG_ALLOC 0x1
+ DEBUG_PHASE 0x2
+ DEBUG_POLL 0x4
+ DEBUG_QUEUE 0x8
+ DEBUG_RESULT 0x10
+ DEBUG_SCATTER 0x20
+ DEBUG_SCRIPT 0x40
+ DEBUG_TINY 0x80
+ DEBUG_TIMING 0x100
+ DEBUG_NEGO 0x200
+ DEBUG_TAGS 0x400
+ DEBUG_FREEZE 0x800
+ DEBUG_RESTART 0x1000
+
+ You can play safely with DEBUG_NEGO. However, some of these flags may
+ generate bunches of syslog messages.
+
+10.2.11 Burst max
+ burst:0 burst disabled
+ burst:255 get burst length from initial IO register settings.
+ burst:#x burst enabled (1<<#x burst transfers max)
+ #x is an integer value which is log base 2 of the burst transfers max.
+ The NCR53C875 and NCR53C825A support up to 128 burst transfers (#x = 7).
+ Other chips only support up to 16 (#x = 4).
+ This is a maximum value. The driver set the burst length according to chip
+ and revision ids. By default the driver uses the maximum value supported
+ by the chip.
+
+10.2.12 LED support
+ led:1 enable LED support
+ led:0 disable LED support
+ Donnot enable LED support if your scsi board does not use SDMS BIOS.
+ (See 'Configuration parameters')
+
+10.2.13 Max wide
+ wide:1 wide scsi enabled
+ wide:0 wide scsi disabled
+ Some scsi boards use a 875 (ultra wide) and only supply narrow connectors.
+ If you have connected a wide device with a 50 pins to 68 pins cable
+ converter, any accepted wide negotiation will break further data transfers.
+ In such a case, using "wide:0" in the bootup command will be helpful.
+
+10.2.14 Differential mode
+ diff:0 never set up diff mode
+ diff:1 set up diff mode if BIOS set it
+ diff:2 always set up diff mode
+ diff:3 set diff mode if GPIO3 is not set
+
+10.2.15 IRQ mode
+ irqm:0 always open drain
+ irqm:1 same as initial settings (assumed BIOS settings)
+ irqm:2 always totem pole
+ irqm:0x10 driver will not use IRQF_SHARED flag when requesting irq
+
+ (Bits 0x10 and 0x20 can be combined with hardware irq mode option)
+
+10.2.16 Reverse probe
+ revprob:n probe chip ids from the PCI configuration in this order:
+ 810, 815, 820, 860, 875, 885, 895, 896
+ revprob:y probe chip ids in the reverse order.
+
+10.2.17 Fix up PCI configuration space
+ pcifix:<option bits>
+
+ Available option bits:
+ 0x0: No attempt to fix PCI configuration space registers values.
+ 0x1: Set PCI cache-line size register if not set.
+ 0x2: Set write and invalidate bit in PCI command register.
+ 0x4: Increase if necessary PCI latency timer according to burst max.
+
+ Use 'pcifix:7' in order to allow the driver to fix up all PCI features.
+
+10.2.18 Serial NVRAM
+ nvram:n do not look for serial NVRAM
+ nvram:y test controllers for onboard serial NVRAM
+ (alternate binary form)
+ mvram=<bits options>
+ 0x01 look for NVRAM (equivalent to nvram=y)
+ 0x02 ignore NVRAM "Synchronous negotiation" parameters for all devices
+ 0x04 ignore NVRAM "Wide negotiation" parameter for all devices
+ 0x08 ignore NVRAM "Scan at boot time" parameter for all devices
+ 0x80 also attach controllers set to OFF in the NVRAM (sym53c8xx only)
+
+10.2.19 Check SCSI BUS
+ buschk:<option bits>
+
+ Available option bits:
+ 0x0: No check.
+ 0x1: Check and do not attach the controller on error.
+ 0x2: Check and just warn on error.
+ 0x4: Disable SCSI bus integrity checking.
+
+10.2.20 Exclude a host from being attached
+ excl=<io_address>
+
+ Prevent host at a given io address from being attached.
+ For example 'ncr53c8xx=excl:0xb400,excl:0xc000' indicate to the
+ ncr53c8xx driver not to attach hosts at address 0xb400 and 0xc000.
+
+10.2.21 Suggest a default SCSI id for hosts
+ hostid:255 no id suggested.
+ hostid:#x (0 < x < 7) x suggested for hosts SCSI id.
+
+ If a host SCSI id is available from the NVRAM, the driver will ignore
+ any value suggested as boot option. Otherwise, if a suggested value
+ different from 255 has been supplied, it will use it. Otherwise, it will
+ try to deduce the value previously set in the hardware and use value
+ 7 if the hardware value is zero.
+
+10.2.22 Enable use of IMMEDIATE ARBITRATION
+ (only supported by the sym53c8xx driver. See 10.7 for more details)
+ iarb:0 do not use this feature.
+ iarb:#x use this feature according to bit fields as follow:
+
+ bit 0 (1) : enable IARB each time the initiator has been reselected
+ when it arbitrated for the SCSI BUS.
+ (#x >> 4) : maximum number of successive settings of IARB if the initiator
+ win arbitration and it has other commands to send to a device.
+
+Boot fail safe
+ safe:y load the following assumed fail safe initial setup
+
+ master parity disabled mpar:n
+ scsi parity enabled spar:y
+ disconnections not allowed disc:n
+ special features disabled specf:n
+ ultra scsi disabled ultra:n
+ force sync negotiation disabled fsn:n
+ reverse probe disabled revprob:n
+ PCI fix up disabled pcifix:0
+ serial NVRAM enabled nvram:y
+ verbosity level 2 verb:2
+ tagged command queuing disabled tags:0
+ synchronous negotiation disabled sync:255
+ debug flags none debug:0
+ burst length from BIOS settings burst:255
+ LED support disabled led:0
+ wide support disabled wide:0
+ settle time 10 seconds settle:10
+ differential support from BIOS settings diff:1
+ irq mode from BIOS settings irqm:1
+ SCSI BUS check do not attach on error buschk:1
+ immediate arbitration disabled iarb:0
+
+10.3 Advised boot setup commands
+
+If the driver has been configured with default options, the equivalent
+boot setup is:
+
+ ncr53c8xx=mpar:y,spar:y,disc:y,specf:3,fsn:n,ultra:2,fsn:n,revprob:n,verb:1\
+ tags:0,sync:50,debug:0,burst:7,led:0,wide:1,settle:2,diff:0,irqm:0
+
+For an installation diskette or a safe but not fast system,
+boot setup can be:
+
+ ncr53c8xx=safe:y,mpar:y,disc:y
+ ncr53c8xx=safe:y,disc:y
+ ncr53c8xx=safe:y,mpar:y
+ ncr53c8xx=safe:y
+
+My personal system works flawlessly with the following equivalent setup:
+
+ ncr53c8xx=mpar:y,spar:y,disc:y,specf:1,fsn:n,ultra:2,fsn:n,revprob:n,verb:1\
+ tags:32,sync:12,debug:0,burst:7,led:1,wide:1,settle:2,diff:0,irqm:0
+
+The driver prints its actual setup when verbosity level is 2. You can try
+"ncr53c8xx=verb:2" to get the "static" setup of the driver, or add "verb:2"
+to your boot setup command in order to check the actual setup the driver is
+using.
+
+10.4 PCI configuration fix-up boot option
+
+pcifix:<option bits>
+
+Available option bits:
+ 0x1: Set PCI cache-line size register if not set.
+ 0x2: Set write and invalidate bit in PCI command register.
+
+Use 'pcifix:3' in order to allow the driver to fix both PCI features.
+
+These options only apply to new SYMBIOS chips 810A, 825A, 860, 875
+and 895 and are only supported for Pentium and 486 class processors.
+Recent SYMBIOS 53C8XX scsi processors are able to use PCI read multiple
+and PCI write and invalidate commands. These features require the
+cache line size register to be properly set in the PCI configuration
+space of the chips. On the other hand, chips will use PCI write and
+invalidate commands only if the corresponding bit is set to 1 in the
+PCI command register.
+
+Not all PCI bioses set the PCI cache line register and the PCI write and
+invalidate bit in the PCI configuration space of 53C8XX chips.
+Optimized PCI accesses may be broken for some PCI/memory controllers or
+make problems with some PCI boards.
+
+This fix-up worked flawlessly on my previous system.
+(MB Triton HX / 53C875 / 53C810A)
+I use these options at my own risks as you will do if you decide to
+use them too.
+
+
+10.5 Serial NVRAM support boot option
+
+nvram:n do not look for serial NVRAM
+nvram:y test controllers for onboard serial NVRAM
+
+This option can also been entered as an hexadecimal value that allows
+to control what information the driver will get from the NVRAM and what
+information it will ignore.
+For details see '17. Serial NVRAM support'.
+
+When this option is enabled, the driver tries to detect all boards using
+a Serial NVRAM. This memory is used to hold user set up parameters.
+
+The parameters the driver is able to get from the NVRAM depend on the
+data format used, as follow:
+
+ Tekram format Symbios format
+General and host parameters
+ Boot order N Y
+ Host SCSI ID Y Y
+ SCSI parity checking Y Y
+ Verbose boot messages N Y
+SCSI devices parameters
+ Synchronous transfer speed Y Y
+ Wide 16 / Narrow Y Y
+ Tagged Command Queuing enabled Y Y
+ Disconnections enabled Y Y
+ Scan at boot time N Y
+
+In order to speed up the system boot, for each device configured without
+the "scan at boot time" option, the driver forces an error on the
+first TEST UNIT READY command received for this device.
+
+Some SDMS BIOS revisions seem to be unable to boot cleanly with very fast
+hard disks. In such a situation you cannot configure the NVRAM with
+optimized parameters value.
+
+The 'nvram' boot option can be entered in hexadecimal form in order
+to ignore some options configured in the NVRAM, as follow:
+
+mvram=<bits options>
+ 0x01 look for NVRAM (equivalent to nvram=y)
+ 0x02 ignore NVRAM "Synchronous negotiation" parameters for all devices
+ 0x04 ignore NVRAM "Wide negotiation" parameter for all devices
+ 0x08 ignore NVRAM "Scan at boot time" parameter for all devices
+ 0x80 also attach controllers set to OFF in the NVRAM (sym53c8xx only)
+
+Option 0x80 is only supported by the sym53c8xx driver and is disabled by
+default. Result is that, by default (option not set), the sym53c8xx driver
+will not attach controllers set to OFF in the NVRAM.
+
+The ncr53c8xx always tries to attach all the controllers. Option 0x80 has
+not been added to the ncr53c8xx driver, since it has been reported to
+confuse users who use this driver since a long time. If you desire a
+controller not to be attached by the ncr53c8xx driver at Linux boot, you
+must use the 'excl' driver boot option.
+
+10.6 SCSI BUS checking boot option.
+
+When this option is set to a non-zero value, the driver checks SCSI lines
+logic state, 100 micro-seconds after having asserted the SCSI RESET line.
+The driver just reads SCSI lines and checks all lines read FALSE except RESET.
+Since SCSI devices shall release the BUS at most 800 nano-seconds after SCSI
+RESET has been asserted, any signal to TRUE may indicate a SCSI BUS problem.
+Unfortunately, the following common SCSI BUS problems are not detected:
+- Only 1 terminator installed.
+- Misplaced terminators.
+- Bad quality terminators.
+On the other hand, either bad cabling, broken devices, not conformant
+devices, ... may cause a SCSI signal to be wrong when te driver reads it.
+
+10.7 IMMEDIATE ARBITRATION boot option
+
+This option is only supported by the SYM53C8XX driver (not by the NCR53C8XX).
+
+SYMBIOS 53C8XX chips are able to arbitrate for the SCSI BUS as soon as they
+have detected an expected disconnection (BUS FREE PHASE). For this process
+to be started, bit 1 of SCNTL1 IO register must be set when the chip is
+connected to the SCSI BUS.
+
+When this feature has been enabled for the current connection, the chip has
+every chance to win arbitration if only devices with lower priority are
+competing for the SCSI BUS. By the way, when the chip is using SCSI id 7,
+then it will for sure win the next SCSI BUS arbitration.
+
+Since, there is no way to know what devices are trying to arbitrate for the
+BUS, using this feature can be extremely unfair. So, you are not advised
+to enable it, or at most enable this feature for the case the chip lost
+the previous arbitration (boot option 'iarb:1').
+
+This feature has the following advantages:
+
+a) Allow the initiator with ID 7 to win arbitration when it wants so.
+b) Overlap at least 4 micro-seconds of arbitration time with the execution
+ of SCRIPTS that deal with the end of the current connection and that
+ starts the next job.
+
+Hmmm... But (a) may just prevent other devices from reselecting the initiator,
+and delay data transfers or status/completions, and (b) may just waste
+SCSI BUS bandwidth if the SCRIPTS execution lasts more than 4 micro-seconds.
+
+The use of IARB needs the SCSI_NCR_IARB_SUPPORT option to have been defined
+at compile time and the 'iarb' boot option to have been set to a non zero
+value at boot time. It is not that useful for real work, but can be used
+to stress SCSI devices or for some applications that can gain advantage of
+it. By the way, if you experience badnesses like 'unexpected disconnections',
+'bad reselections', etc... when using IARB on heavy IO load, you should not
+be surprised, because force-feeding anything and blocking its arse at the
+same time cannot work for a long time. :-))
+
+
+11. Some constants and flags of the ncr53c8xx.h header file
+
+Some of these are defined from the configuration parameters. To
+change other "defines", you must edit the header file. Do that only
+if you know what you are doing.
+
+SCSI_NCR_SETUP_SPECIAL_FEATURES (default: defined)
+ If defined, the driver will enable some special features according
+ to chip and revision id.
+ For 810A, 860, 825A, 875 and 895 scsi chips, this option enables
+ support of features that reduce load of PCI bus and memory accesses
+ during scsi transfer processing: burst op-code fetch, read multiple,
+ read line, prefetch, cache line, write and invalidate,
+ burst 128 (875 only), large dma fifo (875 only), offset 16 (875 only).
+ Can be changed by the following boot setup command:
+ ncr53c8xx=specf:n
+
+SCSI_NCR_IOMAPPED (default: not defined)
+ If defined, normal I/O is forced.
+
+SCSI_NCR_SHARE_IRQ (default: defined)
+ If defined, request shared IRQ.
+
+SCSI_NCR_MAX_TAGS (default: 8)
+ Maximum number of simultaneous tagged commands to a device.
+ Can be changed by "settags <target> <maxtags>"
+
+SCSI_NCR_SETUP_DEFAULT_SYNC (default: 50)
+ Transfer period factor the driver will use at boot time for synchronous
+ negotiation. 0 means asynchronous.
+ Can be changed by "setsync <target> <period factor>"
+
+SCSI_NCR_SETUP_DEFAULT_TAGS (default: 8)
+ Default number of simultaneous tagged commands to a device.
+ < 1 means tagged command queuing disabled at start-up.
+
+SCSI_NCR_ALWAYS_SIMPLE_TAG (default: defined)
+ Use SIMPLE TAG for read and write commands.
+ Can be changed by "setorder <ordered|simple|default>"
+
+SCSI_NCR_SETUP_DISCONNECTION (default: defined)
+ If defined, targets are allowed to disconnect.
+
+SCSI_NCR_SETUP_FORCE_SYNC_NEGO (default: not defined)
+ If defined, synchronous negotiation is tried for all SCSI-2 devices.
+ Can be changed by "setsync <target> <period>"
+
+SCSI_NCR_SETUP_MASTER_PARITY (default: defined)
+ If defined, master parity checking is enabled.
+
+SCSI_NCR_SETUP_SCSI_PARITY (default: defined)
+ If defined, SCSI parity checking is enabled.
+
+SCSI_NCR_PROFILE_SUPPORT (default: not defined)
+ If defined, profiling information is gathered.
+
+SCSI_NCR_MAX_SCATTER (default: 128)
+ Scatter list size of the driver ccb.
+
+SCSI_NCR_MAX_TARGET (default: 16)
+ Max number of targets per host.
+
+SCSI_NCR_MAX_HOST (default: 2)
+ Max number of host controllers.
+
+SCSI_NCR_SETTLE_TIME (default: 2)
+ Number of seconds the driver will wait after reset.
+
+SCSI_NCR_TIMEOUT_ALERT (default: 3)
+ If a pending command will time out after this amount of seconds,
+ an ordered tag is used for the next command.
+ Avoids timeouts for unordered tagged commands.
+
+SCSI_NCR_CAN_QUEUE (default: 7*SCSI_NCR_MAX_TAGS)
+ Max number of commands that can be queued to a host.
+
+SCSI_NCR_CMD_PER_LUN (default: SCSI_NCR_MAX_TAGS)
+ Max number of commands queued to a host for a device.
+
+SCSI_NCR_SG_TABLESIZE (default: SCSI_NCR_MAX_SCATTER-1)
+ Max size of the Linux scatter/gather list.
+
+SCSI_NCR_MAX_LUN (default: 8)
+ Max number of LUNs per target.
+
+
+12. Installation
+
+This driver is part of the linux kernel distribution.
+Driver files are located in the sub-directory "drivers/scsi" of the
+kernel source tree.
+
+Driver files:
+
+ README.ncr53c8xx : this file
+ ChangeLog.ncr53c8xx : change log
+ ncr53c8xx.h : definitions
+ ncr53c8xx.c : the driver code
+
+New driver versions are made available separately in order to allow testing
+changes and new features prior to including them into the linux kernel
+distribution. The following URL provides information on latest available
+patches:
+
+ ftp://ftp.tux.org/pub/people/gerard-roudier/README
+
+
+13. Architecture dependent features.
+
+<Not yet written>
+
+
+14. Known problems
+
+14.1 Tagged commands with Iomega Jaz device
+
+I have not tried this device, however it has been reported to me the
+following: This device is capable of Tagged command queuing. However
+while spinning up, it rejects Tagged commands. This behaviour is
+conforms to 6.8.2 of SCSI-2 specifications. The current behaviour of
+the driver in that situation is not satisfying. So do not enable
+Tagged command queuing for devices that are able to spin down. The
+other problem that may appear is timeouts. The only way to avoid
+timeouts seems to edit linux/drivers/scsi/sd.c and to increase the
+current timeout values.
+
+14.2 Device names change when another controller is added.
+
+When you add a new NCR53C8XX chip based controller to a system that already
+has one or more controllers of this family, it may happen that the order
+the driver registers them to the kernel causes problems due to device
+name changes.
+When at least one controller uses NvRAM, SDMS BIOS version 4 allows you to
+define the order the BIOS will scan the scsi boards. The driver attaches
+controllers according to BIOS information if NvRAM detect option is set.
+
+If your controllers do not have NvRAM, you can:
+
+- Ask the driver to probe chip ids in reverse order from the boot command
+ line: ncr53c8xx=revprob:y
+- Make appropriate changes in the fstab.
+- Use the 'scsidev' tool from Eric Youngdale.
+
+14.3 Using only 8 bit devices with a WIDE SCSI controller.
+
+When only 8 bit NARROW devices are connected to a 16 bit WIDE SCSI controller,
+you must ensure that lines of the wide part of the SCSI BUS are pulled-up.
+This can be achieved by ENABLING the WIDE TERMINATOR portion of the SCSI
+controller card.
+The TYAN 1365 documentation revision 1.2 is not correct about such settings.
+(page 10, figure 3.3).
+
+14.4 Possible data corruption during a Memory Write and Invalidate
+
+This problem is described in SYMBIOS DEL 397, Part Number 69-039241, ITEM 4.
+
+In some complex situations, 53C875 chips revision <= 3 may start a PCI
+Write and Invalidate Command at a not cache-line-aligned 4 DWORDS boundary.
+This is only possible when Cache Line Size is 8 DWORDS or greater.
+Pentium systems use a 8 DWORDS cache line size and so are concerned by
+this chip bug, unlike i486 systems that use a 4 DWORDS cache line size.
+
+When this situation occurs, the chip may complete the Write and Invalidate
+command after having only filled part of the last cache line involved in
+the transfer, leaving to data corruption the remainder of this cache line.
+
+Not using Write And Invalidate obviously gets rid of this chip bug, and so
+it is now the default setting of the driver.
+However, for people like me who want to enable this feature, I have added
+part of a work-around suggested by SYMBIOS. This work-around resets the
+addressing logic when the DATA IN phase is entered and so prevents the bug
+from being triggered for the first SCSI MOVE of the phase. This work-around
+should be enough according to the following:
+
+The only driver internal data structure that is greater than 8 DWORDS and
+that is moved by the SCRIPTS processor is the 'CCB header' that contains
+the context of the SCSI transfer. This data structure is aligned on 8 DWORDS
+boundary (Pentium Cache Line Size), and so is immune to this chip bug, at
+least on Pentium systems.
+But the conditions of this bug can be met when a SCSI read command is
+performed using a buffer that is 4 DWORDS but not cache-line aligned.
+This cannot happen under Linux when scatter/gather lists are used since
+they only refer to system buffers that are well aligned. So, a work around
+may only be needed under Linux when a scatter/gather list is not used and
+when the SCSI DATA IN phase is reentered after a phase mismatch.
+
+15. SCSI problem troubleshooting
+
+15.1 Problem tracking
+
+Most SCSI problems are due to a non conformant SCSI bus or to buggy
+devices. If unfortunately you have SCSI problems, you can check the
+following things:
+
+- SCSI bus cables
+- terminations at both end of the SCSI chain
+- linux syslog messages (some of them may help you)
+
+If you do not find the source of problems, you can configure the
+driver with no features enabled.
+
+- only asynchronous data transfers
+- tagged commands disabled
+- disconnections not allowed
+
+Now, if your SCSI bus is ok, your system have every chance to work
+with this safe configuration but performances will not be optimal.
+
+If it still fails, then you can send your problem description to
+appropriate mailing lists or news-groups. Send me a copy in order to
+be sure I will receive it. Obviously, a bug in the driver code is
+possible.
+
+ My email address: Gerard Roudier <groudier@free.fr>
+
+Allowing disconnections is important if you use several devices on
+your SCSI bus but often causes problems with buggy devices.
+Synchronous data transfers increases throughput of fast devices like
+hard disks. Good SCSI hard disks with a large cache gain advantage of
+tagged commands queuing.
+
+Try to enable one feature at a time with control commands. For example:
+
+- echo "setsync all 25" >/proc/scsi/ncr53c8xx/0
+ Will enable fast synchronous data transfer negotiation for all targets.
+
+- echo "setflag 3" >/proc/scsi/ncr53c8xx/0
+ Will reset flags (no_disc) for target 3, and so will allow it to disconnect
+ the SCSI Bus.
+
+- echo "settags 3 8" >/proc/scsi/ncr53c8xx/0
+ Will enable tagged command queuing for target 3 if that device supports it.
+
+Once you have found the device and the feature that cause problems, just
+disable that feature for that device.
+
+15.2 Understanding hardware error reports
+
+When the driver detects an unexpected error condition, it may display a
+message of the following pattern.
+
+sym53c876-0:1: ERROR (0:48) (1-21-65) (f/95) @ (script 7c0:19000000).
+sym53c876-0: script cmd = 19000000
+sym53c876-0: regdump: da 10 80 95 47 0f 01 07 75 01 81 21 80 01 09 00.
+
+Some fields in such a message may help you understand the cause of the
+problem, as follows:
+
+sym53c876-0:1: ERROR (0:48) (1-21-65) (f/95) @ (script 7c0:19000000).
+............A.........B.C....D.E..F....G.H.......I.....J...K.......
+
+Field A : target number.
+ SCSI ID of the device the controller was talking with at the moment the
+ error occurs.
+
+Field B : DSTAT io register (DMA STATUS)
+ Bit 0x40 : MDPE Master Data Parity Error
+ Data parity error detected on the PCI BUS.
+ Bit 0x20 : BF Bus Fault
+ PCI bus fault condition detected
+ Bit 0x01 : IID Illegal Instruction Detected
+ Set by the chip when it detects an Illegal Instruction format
+ on some condition that makes an instruction illegal.
+ Bit 0x80 : DFE Dma Fifo Empty
+ Pure status bit that does not indicate an error.
+ If the reported DSTAT value contains a combination of MDPE (0x40),
+ BF (0x20), then the cause may be likely due to a PCI BUS problem.
+
+Field C : SIST io register (SCSI Interrupt Status)
+ Bit 0x08 : SGE SCSI GROSS ERROR
+ Indicates that the chip detected a severe error condition
+ on the SCSI BUS that prevents the SCSI protocol from functioning
+ properly.
+ Bit 0x04 : UDC Unexpected Disconnection
+ Indicates that the device released the SCSI BUS when the chip
+ was not expecting this to happen. A device may behave so to
+ indicate the SCSI initiator that an error condition not reportable using the SCSI protocol has occurred.
+ Bit 0x02 : RST SCSI BUS Reset
+ Generally SCSI targets do not reset the SCSI BUS, although any
+ device on the BUS can reset it at any time.
+ Bit 0x01 : PAR Parity
+ SCSI parity error detected.
+ On a faulty SCSI BUS, any error condition among SGE (0x08), UDC (0x04) and
+ PAR (0x01) may be detected by the chip. If your SCSI system sometimes
+ encounters such error conditions, especially SCSI GROSS ERROR, then a SCSI
+ BUS problem is likely the cause of these errors.
+
+For fields D,E,F,G and H, you may look into the sym53c8xx_defs.h file
+that contains some minimal comments on IO register bits.
+Field D : SOCL Scsi Output Control Latch
+ This register reflects the state of the SCSI control lines the
+ chip want to drive or compare against.
+Field E : SBCL Scsi Bus Control Lines
+ Actual value of control lines on the SCSI BUS.
+Field F : SBDL Scsi Bus Data Lines
+ Actual value of data lines on the SCSI BUS.
+Field G : SXFER SCSI Transfer
+ Contains the setting of the Synchronous Period for output and
+ the current Synchronous offset (offset 0 means asynchronous).
+Field H : SCNTL3 Scsi Control Register 3
+ Contains the setting of timing values for both asynchronous and
+ synchronous data transfers.
+
+Understanding Fields I, J, K and dumps requires to have good knowledge of
+SCSI standards, chip cores functionnals and internal driver data structures.
+You are not required to decode and understand them, unless you want to help
+maintain the driver code.
+
+16. Synchronous transfer negotiation tables
+
+Tables below have been created by calling the routine the driver uses
+for synchronisation negotiation timing calculation and chip setting.
+The first table corresponds to Ultra chips 53875 and 53C860 with 80 MHz
+clock and 5 clock divisors.
+The second one has been calculated by setting the scsi clock to 40 Mhz
+and using 4 clock divisors and so applies to all NCR53C8XX chips in fast
+SCSI-2 mode.
+
+Periods are in nano-seconds and speeds are in Mega-transfers per second.
+1 Mega-transfers/second means 1 MB/s with 8 bits SCSI and 2 MB/s with
+Wide16 SCSI.
+
+16.1 Synchronous timings for 53C895, 53C875 and 53C860 SCSI controllers
+
+ ----------------------------------------------
+ Negotiated NCR settings
+ Factor Period Speed Period Speed
+ ------ ------ ------ ------ ------
+ 10 25 40.000 25 40.000 (53C895 only)
+ 11 30.2 33.112 31.25 32.000 (53C895 only)
+ 12 50 20.000 50 20.000
+ 13 52 19.230 62 16.000
+ 14 56 17.857 62 16.000
+ 15 60 16.666 62 16.000
+ 16 64 15.625 75 13.333
+ 17 68 14.705 75 13.333
+ 18 72 13.888 75 13.333
+ 19 76 13.157 87 11.428
+ 20 80 12.500 87 11.428
+ 21 84 11.904 87 11.428
+ 22 88 11.363 93 10.666
+ 23 92 10.869 93 10.666
+ 24 96 10.416 100 10.000
+ 25 100 10.000 100 10.000
+ 26 104 9.615 112 8.888
+ 27 108 9.259 112 8.888
+ 28 112 8.928 112 8.888
+ 29 116 8.620 125 8.000
+ 30 120 8.333 125 8.000
+ 31 124 8.064 125 8.000
+ 32 128 7.812 131 7.619
+ 33 132 7.575 150 6.666
+ 34 136 7.352 150 6.666
+ 35 140 7.142 150 6.666
+ 36 144 6.944 150 6.666
+ 37 148 6.756 150 6.666
+ 38 152 6.578 175 5.714
+ 39 156 6.410 175 5.714
+ 40 160 6.250 175 5.714
+ 41 164 6.097 175 5.714
+ 42 168 5.952 175 5.714
+ 43 172 5.813 175 5.714
+ 44 176 5.681 187 5.333
+ 45 180 5.555 187 5.333
+ 46 184 5.434 187 5.333
+ 47 188 5.319 200 5.000
+ 48 192 5.208 200 5.000
+ 49 196 5.102 200 5.000
+
+
+16.2 Synchronous timings for fast SCSI-2 53C8XX controllers
+
+ ----------------------------------------------
+ Negotiated NCR settings
+ Factor Period Speed Period Speed
+ ------ ------ ------ ------ ------
+ 25 100 10.000 100 10.000
+ 26 104 9.615 125 8.000
+ 27 108 9.259 125 8.000
+ 28 112 8.928 125 8.000
+ 29 116 8.620 125 8.000
+ 30 120 8.333 125 8.000
+ 31 124 8.064 125 8.000
+ 32 128 7.812 131 7.619
+ 33 132 7.575 150 6.666
+ 34 136 7.352 150 6.666
+ 35 140 7.142 150 6.666
+ 36 144 6.944 150 6.666
+ 37 148 6.756 150 6.666
+ 38 152 6.578 175 5.714
+ 39 156 6.410 175 5.714
+ 40 160 6.250 175 5.714
+ 41 164 6.097 175 5.714
+ 42 168 5.952 175 5.714
+ 43 172 5.813 175 5.714
+ 44 176 5.681 187 5.333
+ 45 180 5.555 187 5.333
+ 46 184 5.434 187 5.333
+ 47 188 5.319 200 5.000
+ 48 192 5.208 200 5.000
+ 49 196 5.102 200 5.000
+
+
+17. Serial NVRAM (added by Richard Waltham: dormouse@farsrobt.demon.co.uk)
+
+17.1 Features
+
+Enabling serial NVRAM support enables detection of the serial NVRAM included
+on Symbios and some Symbios compatible host adaptors, and Tekram boards. The
+serial NVRAM is used by Symbios and Tekram to hold set up parameters for the
+host adaptor and its attached drives.
+
+The Symbios NVRAM also holds data on the boot order of host adaptors in a
+system with more than one host adaptor. This enables the order of scanning
+the cards for drives to be changed from the default used during host adaptor
+detection.
+
+This can be done to a limited extent at the moment using "reverse probe" but
+this only changes the order of detection of different types of cards. The
+NVRAM boot order settings can do this as well as change the order the same
+types of cards are scanned in, something "reverse probe" cannot do.
+
+Tekram boards using Symbios chips, DC390W/F/U, which have NVRAM are detected
+and this is used to distinguish between Symbios compatible and Tekram host
+adaptors. This is used to disable the Symbios compatible "diff" setting
+incorrectly set on Tekram boards if the CONFIG_SCSI_53C8XX_SYMBIOS_COMPAT
+configuration parameter is set enabling both Symbios and Tekram boards to be
+used together with the Symbios cards using all their features, including
+"diff" support. ("led pin" support for Symbios compatible cards can remain
+enabled when using Tekram cards. It does nothing useful for Tekram host
+adaptors but does not cause problems either.)
+
+
+17.2 Symbios NVRAM layout
+
+typical data at NVRAM address 0x100 (53c810a NVRAM)
+-----------------------------------------------------------
+00 00
+64 01
+8e 0b
+
+00 30 00 00 00 00 07 00 00 00 00 00 00 00 07 04 10 04 00 00
+
+04 00 0f 00 00 10 00 50 00 00 01 00 00 62
+04 00 03 00 00 10 00 58 00 00 01 00 00 63
+04 00 01 00 00 10 00 48 00 00 01 00 00 61
+00 00 00 00 00 00 00 00 00 00 00 00 00 00
+
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+fe fe
+00 00
+00 00
+-----------------------------------------------------------
+NVRAM layout details
+
+NVRAM Address 0x000-0x0ff not used
+ 0x100-0x26f initialised data
+ 0x270-0x7ff not used
+
+general layout
+
+ header - 6 bytes,
+ data - 356 bytes (checksum is byte sum of this data)
+ trailer - 6 bytes
+ ---
+ total 368 bytes
+
+data area layout
+
+ controller set up - 20 bytes
+ boot configuration - 56 bytes (4x14 bytes)
+ device set up - 128 bytes (16x8 bytes)
+ unused (spare?) - 152 bytes (19x8 bytes)
+ ---
+ total 356 bytes
+
+-----------------------------------------------------------
+header
+
+00 00 - ?? start marker
+64 01 - byte count (lsb/msb excludes header/trailer)
+8e 0b - checksum (lsb/msb excludes header/trailer)
+-----------------------------------------------------------
+controller set up
+
+00 30 00 00 00 00 07 00 00 00 00 00 00 00 07 04 10 04 00 00
+ | | | |
+ | | | -- host ID
+ | | |
+ | | --Removable Media Support
+ | | 0x00 = none
+ | | 0x01 = Bootable Device
+ | | 0x02 = All with Media
+ | |
+ | --flag bits 2
+ | 0x00000001= scan order hi->low
+ | (default 0x00 - scan low->hi)
+ --flag bits 1
+ 0x00000001 scam enable
+ 0x00000010 parity enable
+ 0x00000100 verbose boot msgs
+
+remaining bytes unknown - they do not appear to change in my
+current set up for any of the controllers.
+
+default set up is identical for 53c810a and 53c875 NVRAM
+(Removable Media added Symbios BIOS version 4.09)
+-----------------------------------------------------------
+boot configuration
+
+boot order set by order of the devices in this table
+
+04 00 0f 00 00 10 00 50 00 00 01 00 00 62 -- 1st controller
+04 00 03 00 00 10 00 58 00 00 01 00 00 63 2nd controller
+04 00 01 00 00 10 00 48 00 00 01 00 00 61 3rd controller
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 4th controller
+ | | | | | | | |
+ | | | | | | ---- PCI io port adr
+ | | | | | --0x01 init/scan at boot time
+ | | | | --PCI device/function number (0xdddddfff)
+ | | ----- ?? PCI vendor ID (lsb/msb)
+ ----PCI device ID (lsb/msb)
+
+?? use of this data is a guess but seems reasonable
+
+remaining bytes unknown - they do not appear to change in my
+current set up
+
+default set up is identical for 53c810a and 53c875 NVRAM
+-----------------------------------------------------------
+device set up (up to 16 devices - includes controller)
+
+0f 00 08 08 64 00 0a 00 - id 0
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00 - id 15
+ | | | | | |
+ | | | | ----timeout (lsb/msb)
+ | | | --synch period (0x?? 40 Mtrans/sec- fast 40) (probably 0x28)
+ | | | (0x30 20 Mtrans/sec- fast 20)
+ | | | (0x64 10 Mtrans/sec- fast )
+ | | | (0xc8 5 Mtrans/sec)
+ | | | (0x00 asynchronous)
+ | | -- ?? max sync offset (0x08 in NVRAM on 53c810a)
+ | | (0x10 in NVRAM on 53c875)
+ | --device bus width (0x08 narrow)
+ | (0x10 16 bit wide)
+ --flag bits
+ 0x00000001 - disconnect enabled
+ 0x00000010 - scan at boot time
+ 0x00000100 - scan luns
+ 0x00001000 - queue tags enabled
+
+remaining bytes unknown - they do not appear to change in my
+current set up
+
+?? use of this data is a guess but seems reasonable
+(but it could be max bus width)
+
+default set up for 53c810a NVRAM
+default set up for 53c875 NVRAM - bus width - 0x10
+ - sync offset ? - 0x10
+ - sync period - 0x30
+-----------------------------------------------------------
+?? spare device space (32 bit bus ??)
+
+00 00 00 00 00 00 00 00 (19x8bytes)
+.
+.
+00 00 00 00 00 00 00 00
+
+default set up is identical for 53c810a and 53c875 NVRAM
+-----------------------------------------------------------
+trailer
+
+fe fe - ? end marker ?
+00 00
+00 00
+
+default set up is identical for 53c810a and 53c875 NVRAM
+-----------------------------------------------------------
+
+
+
+17.3 Tekram NVRAM layout
+
+nvram 64x16 (1024 bit)
+
+Drive settings
+
+Drive ID 0-15 (addr 0x0yyyy0 = device setup, yyyy = ID)
+ (addr 0x0yyyy1 = 0x0000)
+
+ x x x x x x x x x x x x x x x x
+ | | | | | | | | |
+ | | | | | | | | ----- parity check 0 - off
+ | | | | | | | | 1 - on
+ | | | | | | | |
+ | | | | | | | ------- sync neg 0 - off
+ | | | | | | | 1 - on
+ | | | | | | |
+ | | | | | | --------- disconnect 0 - off
+ | | | | | | 1 - on
+ | | | | | |
+ | | | | | ----------- start cmd 0 - off
+ | | | | | 1 - on
+ | | | | |
+ | | | | -------------- tagged cmds 0 - off
+ | | | | 1 - on
+ | | | |
+ | | | ---------------- wide neg 0 - off
+ | | | 1 - on
+ | | |
+ --------------------------- sync rate 0 - 10.0 Mtrans/sec
+ 1 - 8.0
+ 2 - 6.6
+ 3 - 5.7
+ 4 - 5.0
+ 5 - 4.0
+ 6 - 3.0
+ 7 - 2.0
+ 7 - 2.0
+ 8 - 20.0
+ 9 - 16.7
+ a - 13.9
+ b - 11.9
+
+Global settings
+
+Host flags 0 (addr 0x100000, 32)
+
+ x x x x x x x x x x x x x x x x
+ | | | | | | | | | | | |
+ | | | | | | | | ----------- host ID 0x00 - 0x0f
+ | | | | | | | |
+ | | | | | | | ----------------------- support for 0 - off
+ | | | | | | | > 2 drives 1 - on
+ | | | | | | |
+ | | | | | | ------------------------- support drives 0 - off
+ | | | | | | > 1Gbytes 1 - on
+ | | | | | |
+ | | | | | --------------------------- bus reset on 0 - off
+ | | | | | power on 1 - on
+ | | | | |
+ | | | | ----------------------------- active neg 0 - off
+ | | | | 1 - on
+ | | | |
+ | | | -------------------------------- imm seek 0 - off
+ | | | 1 - on
+ | | |
+ | | ---------------------------------- scan luns 0 - off
+ | | 1 - on
+ | |
+ -------------------------------------- removable 0 - disable
+ as BIOS dev 1 - boot device
+ 2 - all
+
+Host flags 1 (addr 0x100001, 33)
+
+ x x x x x x x x x x x x x x x x
+ | | | | | |
+ | | | --------- boot delay 0 - 3 sec
+ | | | 1 - 5
+ | | | 2 - 10
+ | | | 3 - 20
+ | | | 4 - 30
+ | | | 5 - 60
+ | | | 6 - 120
+ | | |
+ --------------------------- max tag cmds 0 - 2
+ 1 - 4
+ 2 - 8
+ 3 - 16
+ 4 - 32
+
+Host flags 2 (addr 0x100010, 34)
+
+ x x x x x x x x x x x x x x x x
+ |
+ ----- F2/F6 enable 0 - off ???
+ 1 - on ???
+
+checksum (addr 0x111111)
+
+checksum = 0x1234 - (sum addr 0-63)
+
+----------------------------------------------------------------------------
+
+default nvram data:
+
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+
+0x0f07 0x0400 0x0001 0x0000 0x0000 0x0000 0x0000 0x0000
+0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000
+0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000
+0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0xfbbc
+
+
+18. Support for Big Endian
+
+The PCI local bus has been primarily designed for x86 architecture.
+As a consequence, PCI devices generally expect DWORDS using little endian
+byte ordering.
+
+18.1 Big Endian CPU
+
+In order to support NCR chips on a Big Endian architecture the driver has to
+perform byte reordering each time it is needed. This feature has been
+added to the driver by Cort <cort@cs.nmt.edu> and is available in driver
+version 2.5 and later ones. For the moment Big Endian support has only
+been tested on Linux/PPC (PowerPC).
+
+18.2 NCR chip in Big Endian mode of operations
+
+It can be read in SYMBIOS documentation that some chips support a special
+Big Endian mode, on paper: 53C815, 53C825A, 53C875, 53C875N, 53C895.
+This mode of operations is not software-selectable, but needs pin named
+BigLit to be pulled-up. Using this mode, most of byte reorderings should
+be avoided when the driver is running on a Big Endian CPU.
+Driver version 2.5 is also, in theory, ready for this feature.
+
+===============================================================================
+End of NCR53C8XX driver README file
diff --git a/Documentation/scsi/osd.txt b/Documentation/scsi/osd.txt
new file mode 100644
index 000000000..5a9879bad
--- /dev/null
+++ b/Documentation/scsi/osd.txt
@@ -0,0 +1,197 @@
+The OSD Standard
+================
+OSD (Object-Based Storage Device) is a T10 SCSI command set that is designed
+to provide efficient operation of input/output logical units that manage the
+allocation, placement, and accessing of variable-size data-storage containers,
+called objects. Objects are intended to contain operating system and application
+constructs. Each object has associated attributes attached to it, which are
+integral part of the object and provide metadata about the object. The standard
+defines some common obligatory attributes, but user attributes can be added as
+needed.
+
+See: http://www.t10.org/ftp/t10/drafts/osd2/ for the latest draft for OSD 2
+or search the web for "OSD SCSI"
+
+OSD in the Linux Kernel
+=======================
+osd-initiator:
+ The main component of OSD in Kernel is the osd-initiator library. Its main
+user is intended to be the pNFS-over-objects layout driver, which uses objects
+as its back-end data storage. Other clients are the other osd parts listed below.
+
+osd-uld:
+ This is a SCSI ULD that registers for OSD type devices and provides a testing
+platform, both for the in-kernel initiator as well as connected targets. It
+currently has no useful user-mode API, though it could have if need be.
+
+exofs:
+ Is an OSD based Linux file system. It uses the osd-initiator and osd-uld,
+to export a usable file system for users.
+See Documentation/filesystems/exofs.txt for more details
+
+osd target:
+ There are no current plans for an OSD target implementation in kernel. For all
+needs, a user-mode target that is based on the scsi tgt target framework is
+available from Ohio Supercomputer Center (OSC) at:
+http://www.open-osd.org/bin/view/Main/OscOsdProject
+There are several other target implementations. See http://open-osd.org for more
+links.
+
+Files and Folders
+=================
+This is the complete list of files included in this work:
+include/scsi/
+ osd_initiator.h Main API for the initiator library
+ osd_types.h Common OSD types
+ osd_sec.h Security Manager API
+ osd_protocol.h Wire definitions of the OSD standard protocol
+ osd_attributes.h Wire definitions of OSD attributes
+
+drivers/scsi/osd/
+ osd_initiator.c OSD-Initiator library implementation
+ osd_uld.c The OSD scsi ULD
+ osd_ktest.{h,c} In-kernel test suite (called by osd_uld)
+ osd_debug.h Some printk macros
+ Makefile For both in-tree and out-of-tree compilation
+ Kconfig Enables inclusion of the different pieces
+ osd_test.c User-mode application to call the kernel tests
+
+The OSD-Initiator Library
+=========================
+osd_initiator is a low level implementation of an osd initiator encoder.
+But even though, it should be intuitive and easy to use. Perhaps over time an
+higher lever will form that automates some of the more common recipes.
+
+init/fini:
+- osd_dev_init() associates a scsi_device with an osd_dev structure
+ and initializes some global pools. This should be done once per scsi_device
+ (OSD LUN). The osd_dev structure is needed for calling osd_start_request().
+
+- osd_dev_fini() cleans up before a osd_dev/scsi_device destruction.
+
+OSD commands encoding, execution, and decoding of results:
+
+struct osd_request's is used to iteratively encode an OSD command and carry
+its state throughout execution. Each request goes through these stages:
+
+a. osd_start_request() allocates the request.
+
+b. Any of the osd_req_* methods is used to encode a request of the specified
+ type.
+
+c. osd_req_add_{get,set}_attr_* may be called to add get/set attributes to the
+ CDB. "List" or "Page" mode can be used exclusively. The attribute-list API
+ can be called multiple times on the same request. However, only one
+ attribute-page can be read, as mandated by the OSD standard.
+
+d. osd_finalize_request() computes offsets into the data-in and data-out buffers
+ and signs the request using the provided capability key and integrity-
+ check parameters.
+
+e. osd_execute_request() may be called to execute the request via the block
+ layer and wait for its completion. The request can be executed
+ asynchronously by calling the block layer API directly.
+
+f. After execution, osd_req_decode_sense() can be called to decode the request's
+ sense information.
+
+g. osd_req_decode_get_attr() may be called to retrieve osd_add_get_attr_list()
+ values.
+
+h. osd_end_request() must be called to deallocate the request and any resource
+ associated with it. Note that osd_end_request cleans up the request at any
+ stage and it must always be called after a successful osd_start_request().
+
+osd_request's structure:
+
+The OSD standard defines a complex structure of IO segments pointed to by
+members in the CDB. Up to 3 segments can be deployed in the IN-Buffer and up to
+4 in the OUT-Buffer. The ASCII illustration below depicts a secure-read with
+associated get+set of attributes-lists. Other combinations very on the same
+basic theme. From no-segments-used up to all-segments-used.
+
+|________OSD-CDB__________|
+| |
+|read_len (offset=0) -|---------\
+| | |
+|get_attrs_list_length | |
+|get_attrs_list_offset -|----\ |
+| | | |
+|retrieved_attrs_alloc_len| | |
+|retrieved_attrs_offset -|----|----|-\
+| | | | |
+|set_attrs_list_length | | | |
+|set_attrs_list_offset -|-\ | | |
+| | | | | |
+|in_data_integ_offset -|-|--|----|-|-\
+|out_data_integ_offset -|-|--|--\ | | |
+\_________________________/ | | | | | |
+ | | | | | |
+|_______OUT-BUFFER________| | | | | | |
+| Set attr list |</ | | | | |
+| | | | | | |
+|-------------------------| | | | | |
+| Get attr descriptors |<---/ | | | |
+| | | | | |
+|-------------------------| | | | |
+| Out-data integrity |<------/ | | |
+| | | | |
+\_________________________/ | | |
+ | | |
+|________IN-BUFFER________| | | |
+| In-Data read |<--------/ | |
+| | | |
+|-------------------------| | |
+| Get attr list |<----------/ |
+| | |
+|-------------------------| |
+| In-data integrity |<------------/
+| |
+\_________________________/
+
+A block device request can carry bidirectional payload by means of associating
+a bidi_read request with a main write-request. Each in/out request is described
+by a chain of BIOs associated with each request.
+The CDB is of a SCSI VARLEN CDB format, as described by OSD standard.
+The OSD standard also mandates alignment restrictions at start of each segment.
+
+In the code, in struct osd_request, there are two _osd_io_info structures to
+describe the IN/OUT buffers above, two BIOs for the data payload and up to five
+_osd_req_data_segment structures to hold the different segments allocation and
+information.
+
+Important: We have chosen to disregard the assumption that a BIO-chain (and
+the resulting sg-list) describes a linear memory buffer. Meaning only first and
+last scatter chain can be incomplete and all the middle chains are of PAGE_SIZE.
+For us, a scatter-gather-list, as its name implies and as used by the Networking
+layer, is to describe a vector of buffers that will be transferred to/from the
+wire. It works very well with current iSCSI transport. iSCSI is currently the
+only deployed OSD transport. In the future we anticipate SAS and FC attached OSD
+devices as well.
+
+The OSD Testing ULD
+===================
+TODO: More user-mode control on tests.
+
+Authors, Mailing list
+=====================
+Please communicate with us on any deployment of osd, whether using this code
+or not.
+
+Any problems, questions, bug reports, lonely OSD nights, please email:
+ OSD Dev List <osd-dev@open-osd.org>
+
+More up-to-date information can be found on:
+http://open-osd.org
+
+Boaz Harrosh <ooo@electrozaur.com>
+
+References
+==========
+Weber, R., "SCSI Object-Based Storage Device Commands",
+T10/1355-D ANSI/INCITS 400-2004,
+http://www.t10.org/ftp/t10/drafts/osd/osd-r10.pdf
+
+Weber, R., "SCSI Object-Based Storage Device Commands -2 (OSD-2)"
+T10/1729-D, Working Draft, rev. 3
+http://www.t10.org/ftp/t10/drafts/osd2/osd2r03.pdf
diff --git a/Documentation/scsi/osst.txt b/Documentation/scsi/osst.txt
new file mode 100644
index 000000000..00c8ebb2f
--- /dev/null
+++ b/Documentation/scsi/osst.txt
@@ -0,0 +1,218 @@
+README file for the osst driver
+===============================
+(w) Kurt Garloff <garloff@suse.de> 12/2000
+
+This file describes the osst driver as of version 0.8.x/0.9.x, the released
+version of the osst driver.
+It is intended to help advanced users to understand the role of osst and to
+get them started using (and maybe debugging) it.
+It won't address issues like "How do I compile a kernel?" or "How do I load
+a module?", as these are too basic.
+Once the OnStream got merged into the official kernel, the distro makers
+will provide the OnStream support for those who are not familiar with
+hacking their kernels.
+
+
+Purpose
+-------
+The osst driver was developed, because the standard SCSI tape driver in
+Linux, st, does not support the OnStream SC-x0 SCSI tape. The st is not to
+blame for that, as the OnStream tape drives do not support the standard SCSI
+command set for Serial Access Storage Devices (SASDs), which basically
+corresponds to the QIC-157 spec.
+Nevertheless, the OnStream tapes are nice pieces of hardware and therefore
+the osst driver has been written to make these tape devs supported by Linux.
+The driver is free software. It's released under the GNU GPL and planned to
+be integrated into the mainstream kernel.
+
+
+Implementation
+--------------
+The osst is a new high-level SCSI driver, just like st, sr, sd and sg. It
+can be compiled into the kernel or loaded as a module.
+As it represents a new device, it got assigned a new device node: /dev/osstX
+are character devices with major no 206 and minor numbers like the /dev/stX
+devices. If those are not present, you may create them by calling
+Makedevs.sh as root (see below).
+The driver started being a copy of st and as such, the osst devices'
+behavior looks very much the same as st to the userspace applications.
+
+
+History
+-------
+In the first place, osst shared its identity very much with st. That meant
+that it used the same kernel structures and the same device node as st.
+So you could only have either of them being present in the kernel. This has
+been fixed by registering an own device, now.
+st and osst can coexist, each only accessing the devices it can support by
+themselves.
+
+
+Installation
+------------
+osst got integrated into the linux kernel. Select it during kernel
+configuration as module or compile statically into the kernel.
+Compile your kernel and install the modules.
+
+Now, your osst driver is inside the kernel or available as a module,
+depending on your choice during kernel config. You may still need to create
+the device nodes by calling the Makedevs.sh script (see below) manually.
+
+To load your module, you may use the command
+modprobe osst
+as root. dmesg should show you, whether your OnStream tapes have been
+recognized.
+
+If you want to have the module autoloaded on access to /dev/osst, you may
+add something like
+alias char-major-206 osst
+to a file under /etc/modprobe.d/ directory.
+
+You may find it convenient to create a symbolic link
+ln -s nosst0 /dev/tape
+to make programs assuming a default name of /dev/tape more convenient to
+use.
+
+The device nodes for osst have to be created. Use the Makedevs.sh script
+attached to this file.
+
+
+Using it
+--------
+You may use the OnStream tape driver with your standard backup software,
+which may be tar, cpio, amanda, arkeia, BRU, Lone Tar, ...
+by specifying /dev/(n)osst0 as the tape device to use or using the above
+symlink trick. The IOCTLs to control tape operation are also mostly
+supported and you may try the mt (or mt_st) program to jump between
+filemarks, eject the tape, ...
+
+There's one limitation: You need to use a block size of 32kB.
+
+(This limitation is worked on and will be fixed in version 0.8.8 of
+ this driver.)
+
+If you just want to get started with standard software, here is an example
+for creating and restoring a full backup:
+# Backup
+tar cvf - / --exclude /proc | buffer -s 32k -m 24M -B -t -o /dev/nosst0
+# Restore
+buffer -s 32k -m 8M -B -t -i /dev/osst0 | tar xvf - -C /
+
+The buffer command has been used to buffer the data before it goes to the
+tape (or the file system) in order to smooth out the data stream and prevent
+the tape from needing to stop and rewind. The OnStream does have an internal
+buffer and a variable speed which help this, but especially on writing, the
+buffering still proves useful in most cases. It also pads the data to
+guarantees the block size of 32k. (Otherwise you may pass the -b64 option to
+tar.)
+Expect something like 1.8MB/s for the SC-x0 drives and 0.9MB/s for the DI-30.
+The USB drive will give you about 0.7MB/s.
+On a fast machine, you may profit from software data compression (z flag for
+tar).
+
+
+USB and IDE
+-----------
+Via the SCSI emulation layers usb-storage and ide-scsi, you can also use the
+osst driver to drive the USB-30 and the DI-30 drives. (Unfortunately, there
+is no such layer for the parallel port, otherwise the DP-30 would work as
+well.) For the USB support, you need the latest 2.4.0-test kernels and the
+latest usb-storage driver from
+http://www.linux-usb.org/
+http://sourceforge.net/cvs/?group_id=3581
+
+Note that the ide-tape driver as of 1.16f uses a slightly outdated on-tape
+format and therefore is not completely interoperable with osst tapes.
+
+The ADR-x0 line is fully SCSI-2 compliant and is supported by st, not osst.
+The on-tape format is supposed to be compatible with the one used by osst.
+
+
+Feedback and updates
+--------------------
+The driver development is coordinated through a mailing list
+<osst@linux1.onstream.nl>
+a CVS repository and some web pages.
+The tester's pages which contain recent news and updated drivers to download
+can be found on
+http://sourceforge.net/projects/osst/
+
+If you find any problems, please have a look at the tester's page in order
+to see whether the problem is already known and solved. Otherwise, please
+report it to the mailing list. Your feedback is welcome. (This holds also
+for reports of successful usage, of course.)
+In case of trouble, please do always provide the following info:
+* driver and kernel version used (see syslog)
+* driver messages (syslog)
+* SCSI config and OnStream Firmware (/proc/scsi/scsi)
+* description of error. Is it reproducible?
+* software and commands used
+
+You may subscribe to the mailing list, BTW, it's a majordomo list.
+
+
+Status
+------
+0.8.0 was the first widespread BETA release. Since then a lot of reports
+have been sent, but mostly reported success or only minor trouble.
+All the issues have been addressed.
+Check the web pages for more info about the current developments.
+0.9.x is the tree for the 2.3/2.4 kernel.
+
+
+Acknowledgments
+----------------
+The driver has been started by making a copy of Kai Makisara's st driver.
+Most of the development has been done by Willem Riede. The presence of the
+userspace program osg (onstreamsg) from Terry Hardie has been rather
+helpful. The same holds for Gadi Oxman's ide-tape support for the DI-30.
+I did add some patches to those drivers as well and coordinated things a
+little bit.
+Note that most of them did mostly spend their spare time for the creation of
+this driver.
+The people from OnStream, especially Jack Bombeeck did support this project
+and always tried to answer HW or FW related questions. Furthermore, he
+pushed the FW developers to do the right things.
+SuSE did support this project by allowing me to work on it during my working
+time for them and by integrating the driver into their distro.
+
+More people did help by sending useful comments. Sorry to those who have
+been forgotten. Thanks to all the GNU/FSF and Linux developers who made this
+platform such an interesting, nice and stable platform.
+Thanks go to those who tested the drivers and did send useful reports. Your
+help is needed!
+
+
+Makedevs.sh
+-----------
+#!/bin/sh
+# Script to create OnStream SC-x0 device nodes (major 206)
+# Usage: Makedevs.sh [nos [path to dev]]
+# $Id: README.osst.kernel,v 1.4 2000/12/20 14:13:15 garloff Exp $
+major=206
+nrs=4
+dir=/dev
+test -z "$1" || nrs=$1
+test -z "$2" || dir=$2
+declare -i nr
+nr=0
+test -d $dir || mkdir -p $dir
+while test $nr -lt $nrs; do
+ mknod $dir/osst$nr c $major $nr
+ chown 0.disk $dir/osst$nr; chmod 660 $dir/osst$nr;
+ mknod $dir/nosst$nr c $major $[nr+128]
+ chown 0.disk $dir/nosst$nr; chmod 660 $dir/nosst$nr;
+ mknod $dir/osst${nr}l c $major $[nr+32]
+ chown 0.disk $dir/osst${nr}l; chmod 660 $dir/osst${nr}l;
+ mknod $dir/nosst${nr}l c $major $[nr+160]
+ chown 0.disk $dir/nosst${nr}l; chmod 660 $dir/nosst${nr}l;
+ mknod $dir/osst${nr}m c $major $[nr+64]
+ chown 0.disk $dir/osst${nr}m; chmod 660 $dir/osst${nr}m;
+ mknod $dir/nosst${nr}m c $major $[nr+192]
+ chown 0.disk $dir/nosst${nr}m; chmod 660 $dir/nosst${nr}m;
+ mknod $dir/osst${nr}a c $major $[nr+96]
+ chown 0.disk $dir/osst${nr}a; chmod 660 $dir/osst${nr}a;
+ mknod $dir/nosst${nr}a c $major $[nr+224]
+ chown 0.disk $dir/nosst${nr}a; chmod 660 $dir/nosst${nr}a;
+ let nr+=1
+done
diff --git a/Documentation/scsi/ppa.txt b/Documentation/scsi/ppa.txt
new file mode 100644
index 000000000..05ff47dbe
--- /dev/null
+++ b/Documentation/scsi/ppa.txt
@@ -0,0 +1,14 @@
+-------- Terse where to get ZIP Drive help info --------
+
+General Iomega ZIP drive page for Linux:
+http://web.archive.org/web/*/http://www.torque.net/~campbell/
+
+Driver archive for old drivers:
+http://web.archive.org/web/*/http://www.torque.net/~campbell/ppa
+
+Linux Parport page (parallel port)
+http://web.archive.org/web/*/http://www.torque.net/parport/
+
+Email list for Linux Parport
+linux-parport@torque.net
+
diff --git a/Documentation/scsi/qlogicfas.txt b/Documentation/scsi/qlogicfas.txt
new file mode 100644
index 000000000..c211d827f
--- /dev/null
+++ b/Documentation/scsi/qlogicfas.txt
@@ -0,0 +1,78 @@
+
+This driver supports the Qlogic FASXXX family of chips. This driver
+only works with the ISA, VLB, and PCMCIA versions of the Qlogic
+FastSCSI! cards as well as any other card based on the FASXX chip
+(including the Control Concepts SCSI/IDE/SIO/PIO/FDC cards).
+
+This driver does NOT support the PCI version. Support for these PCI
+Qlogic boards:
+
+ * IQ-PCI
+ * IQ-PCI-10
+ * IQ-PCI-D
+
+is provided by the qla1280 driver.
+
+Nor does it support the PCI-Basic, which is supported by the
+'am53c974' driver.
+
+PCMCIA SUPPORT
+
+This currently only works if the card is enabled first from DOS. This
+means you will have to load your socket and card services, and
+QL41DOS.SYS and QL40ENBL.SYS. These are a minimum, but loading the
+rest of the modules won't interfere with the operation. The next
+thing to do is load the kernel without resetting the hardware, which
+can be a simple ctrl-alt-delete with a boot floppy, or by using
+loadlin with the kernel image accessible from DOS. If you are using
+the Linux PCMCIA driver, you will have to adjust it or otherwise stop
+it from configuring the card.
+
+I am working with the PCMCIA group to make it more flexible, but that
+may take a while.
+
+ALL CARDS
+
+The top of the qlogic.c file has a number of defines that controls
+configuration. As shipped, it provides a balance between speed and
+function. If there are any problems, try setting SLOW_CABLE to 1, and
+then try changing USE_IRQ and TURBO_PDMA to zero. If you are familiar
+with SCSI, there are other settings which can tune the bus.
+
+It may be a good idea to enable RESET_AT_START, especially if the
+devices may not have been just powered up, or if you are restarting
+after a crash, since they may be busy trying to complete the last
+command or something. It comes up faster if this is set to zero, and
+if you have reliable hardware and connections it may be more useful to
+not reset things.
+
+SOME TROUBLESHOOTING TIPS
+
+Make sure it works properly under DOS. You should also do an initial FDISK
+on a new drive if you want partitions.
+
+Don't enable all the speedups first. If anything is wrong, they will make
+any problem worse.
+
+IMPORTANT
+
+The best way to test if your cables, termination, etc. are good is to
+copy a very big file (e.g. a doublespace container file, or a very
+large executable or archive). It should be at least 5 megabytes, but
+you can do multiple tests on smaller files. Then do a COMP to verify
+that the file copied properly. (Turn off all caching when doing these
+tests, otherwise you will test your RAM and not the files). Then do
+10 COMPs, comparing the same file on the SCSI hard drive, i.e. "COMP
+realbig.doc realbig.doc". Then do it after the computer gets warm.
+
+I noticed my system which seems to work 100% would fail this test if
+the computer was left on for a few hours. It was worse with longer
+cables, and more devices on the SCSI bus. What seems to happen is
+that it gets a false ACK causing an extra byte to be inserted into the
+stream (and this is not detected). This can be caused by bad
+termination (the ACK can be reflected), or by noise when the chips
+work less well because of the heat, or when cables get too long for
+the speed.
+
+Remember, if it doesn't work under DOS, it probably won't work under
+Linux.
diff --git a/Documentation/scsi/scsi-changer.txt b/Documentation/scsi/scsi-changer.txt
new file mode 100644
index 000000000..ade046ea7
--- /dev/null
+++ b/Documentation/scsi/scsi-changer.txt
@@ -0,0 +1,180 @@
+
+README for the SCSI media changer driver
+========================================
+
+This is a driver for SCSI Medium Changer devices, which are listed
+with "Type: Medium Changer" in /proc/scsi/scsi.
+
+This is for *real* Jukeboxes. It is *not* supported to work with
+common small CD-ROM changers, neither one-lun-per-slot SCSI changers
+nor IDE drives.
+
+Userland tools available from here:
+ http://linux.bytesex.org/misc/changer.html
+
+
+General Information
+-------------------
+
+First some words about how changers work: A changer has 2 (possibly
+more) SCSI ID's. One for the changer device which controls the robot,
+and one for the device which actually reads and writes the data. The
+later may be anything, a MOD, a CD-ROM, a tape or whatever. For the
+changer device this is a "don't care", he *only* shuffles around the
+media, nothing else.
+
+
+The SCSI changer model is complex, compared to - for example - IDE-CD
+changers. But it allows to handle nearly all possible cases. It knows
+4 different types of changer elements:
+
+ media transport - this one shuffles around the media, i.e. the
+ transport arm. Also known as "picker".
+ storage - a slot which can hold a media.
+ import/export - the same as above, but is accessible from outside,
+ i.e. there the operator (you !) can use this to
+ fill in and remove media from the changer.
+ Sometimes named "mailslot".
+ data transfer - this is the device which reads/writes, i.e. the
+ CD-ROM / Tape / whatever drive.
+
+None of these is limited to one: A huge Jukebox could have slots for
+123 CD-ROM's, 5 CD-ROM readers (and therefore 6 SCSI ID's: the changer
+and each CD-ROM) and 2 transport arms. No problem to handle.
+
+
+How it is implemented
+---------------------
+
+I implemented the driver as character device driver with a NetBSD-like
+ioctl interface. Just grabbed NetBSD's header file and one of the
+other linux SCSI device drivers as starting point. The interface
+should be source code compatible with NetBSD. So if there is any
+software (anybody knows ???) which supports a BSDish changer driver,
+it should work with this driver too.
+
+Over time a few more ioctls where added, volume tag support for example
+wasn't covered by the NetBSD ioctl API.
+
+
+Current State
+-------------
+
+Support for more than one transport arm is not implemented yet (and
+nobody asked for it so far...).
+
+I test and use the driver myself with a 35 slot cdrom jukebox from
+Grundig. I got some reports telling it works ok with tape autoloaders
+(Exabyte, HP and DEC). Some People use this driver with amanda. It
+works fine with small (11 slots) and a huge (4 MOs, 88 slots)
+magneto-optical Jukebox. Probably with lots of other changers too, most
+(but not all :-) people mail me only if it does *not* work...
+
+I don't have any device lists, neither black-list nor white-list. Thus
+it is quite useless to ask me whenever a specific device is supported or
+not. In theory every changer device which supports the SCSI-2 media
+changer command set should work out-of-the-box with this driver. If it
+doesn't, it is a bug. Either within the driver or within the firmware
+of the changer device.
+
+
+Using it
+--------
+
+This is a character device with major number is 86, so use
+"mknod /dev/sch0 c 86 0" to create the special file for the driver.
+
+If the module finds the changer, it prints some messages about the
+device [ try "dmesg" if you don't see anything ] and should show up in
+/proc/devices. If not.... some changers use ID ? / LUN 0 for the
+device and ID ? / LUN 1 for the robot mechanism. But Linux does *not*
+look for LUNs other than 0 as default, because there are too many
+broken devices. So you can try:
+
+ 1) echo "scsi add-single-device 0 0 ID 1" > /proc/scsi/scsi
+ (replace ID with the SCSI-ID of the device)
+ 2) boot the kernel with "max_scsi_luns=1" on the command line
+ (append="max_scsi_luns=1" in lilo.conf should do the trick)
+
+
+Trouble?
+--------
+
+If you insmod the driver with "insmod debug=1", it will be verbose and
+prints a lot of stuff to the syslog. Compiling the kernel with
+CONFIG_SCSI_CONSTANTS=y improves the quality of the error messages a lot
+because the kernel will translate the error codes into human-readable
+strings then.
+
+You can display these messages with the dmesg command (or check the
+logfiles). If you email me some question because of a problem with the
+driver, please include these messages.
+
+
+Insmod options
+--------------
+
+debug=0/1
+ Enable debug messages (see above, default: 0).
+
+verbose=0/1
+ Be verbose (default: 1).
+
+init=0/1
+ Send INITIALIZE ELEMENT STATUS command to the changer
+ at insmod time (default: 1).
+
+timeout_init=<seconds>
+ timeout for the INITIALIZE ELEMENT STATUS command
+ (default: 3600).
+
+timeout_move=<seconds>
+ timeout for all other commands (default: 120).
+
+dt_id=<id1>,<id2>,...
+dt_lun=<lun1>,<lun2>,...
+ These two allow to specify the SCSI ID and LUN for the data
+ transfer elements. You likely don't need this as the jukebox
+ should provide this information. But some devices don't ...
+
+vendor_firsts=
+vendor_counts=
+vendor_labels=
+ These insmod options can be used to tell the driver that there
+ are some vendor-specific element types. Grundig for example
+ does this. Some jukeboxes have a printer to label fresh burned
+ CDs, which is addressed as element 0xc000 (type 5). To tell the
+ driver about this vendor-specific element, use this:
+ $ insmod ch \
+ vendor_firsts=0xc000 \
+ vendor_counts=1 \
+ vendor_labels=printer
+ All three insmod options accept up to four comma-separated
+ values, this way you can configure the element types 5-8.
+ You likely need the SCSI specs for the device in question to
+ find the correct values as they are not covered by the SCSI-2
+ standard.
+
+
+Credits
+-------
+
+I wrote this driver using the famous mailing-patches-around-the-world
+method. With (more or less) help from:
+
+ Daniel Moehwald <moehwald@hdg.de>
+ Dane Jasper <dane@sonic.net>
+ R. Scott Bailey <sbailey@dsddi.eds.com>
+ Jonathan Corbet <corbet@lwn.net>
+
+Special thanks go to
+ Martin Kuehne <martin.kuehne@bnbt.de>
+for a old, second-hand (but full functional) cdrom jukebox which I use
+to develop/test driver and tools now.
+
+Have fun,
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@bytesex.org>
diff --git a/Documentation/scsi/scsi-generic.txt b/Documentation/scsi/scsi-generic.txt
new file mode 100644
index 000000000..51be20a6a
--- /dev/null
+++ b/Documentation/scsi/scsi-generic.txt
@@ -0,0 +1,101 @@
+ Notes on Linux SCSI Generic (sg) driver
+ ---------------------------------------
+ 20020126
+Introduction
+============
+The SCSI Generic driver (sg) is one of the four "high level" SCSI device
+drivers along with sd, st and sr (disk, tape and CDROM respectively). Sg
+is more generalized (but lower level) than its siblings and tends to be
+used on SCSI devices that don't fit into the already serviced categories.
+Thus sg is used for scanners, CD writers and reading audio CDs digitally
+amongst other things.
+
+Rather than document the driver's interface here, version information
+is provided plus pointers (i.e. URLs) where to find documentation
+and examples.
+
+
+Major versions of the sg driver
+===============================
+There are three major versions of sg found in the linux kernel (lk):
+ - sg version 1 (original) from 1992 to early 1999 (lk 2.2.5) .
+ It is based in the sg_header interface structure.
+ - sg version 2 from lk 2.2.6 in the 2.2 series. It is based on
+ an extended version of the sg_header interface structure.
+ - sg version 3 found in the lk 2.4 series (and the lk 2.5 series).
+ It adds the sg_io_hdr interface structure.
+
+
+Sg driver documentation
+=======================
+The most recent documentation of the sg driver is kept at the Linux
+Documentation Project's (LDP) site:
+http://www.tldp.org/HOWTO/SCSI-Generic-HOWTO
+This describes the sg version 3 driver found in the lk 2.4 series.
+The LDP renders documents in single and multiple page HTML, postscript
+and pdf. This document can also be found at:
+http://sg.danny.cz/sg/p/sg_v3_ho.html
+
+Documentation for the version 2 sg driver found in the lk 2.2 series can
+be found at http://sg.danny.cz/sg/. A larger version
+is at: http://sg.danny.cz/sg/p/scsi-generic_long.txt.
+
+The original documentation for the sg driver (prior to lk 2.2.6) can be
+found at http://www.torque.net/sg/p/original/SCSI-Programming-HOWTO.txt
+and in the LDP archives.
+
+A changelog with brief notes can be found in the
+/usr/src/linux/include/scsi/sg.h file. Note that the glibc maintainers copy
+and edit this file (removing its changelog for example) before placing it
+in /usr/include/scsi/sg.h . Driver debugging information and other notes
+can be found at the top of the /usr/src/linux/drivers/scsi/sg.c file.
+
+A more general description of the Linux SCSI subsystem of which sg is a
+part can be found at http://www.tldp.org/HOWTO/SCSI-2.4-HOWTO .
+
+
+Example code and utilities
+==========================
+There are two packages of sg utilities:
+ - sg3_utils for the sg version 3 driver found in lk 2.4
+ - sg_utils for the sg version 2 (and original) driver found in lk 2.2
+ and earlier
+Both packages will work in the lk 2.4 series however sg3_utils offers more
+capabilities. They can be found at: http://sg.danny.cz/sg/sg3_utils.html and
+freecode.com
+
+Another approach is to look at the applications that use the sg driver.
+These include cdrecord, cdparanoia, SANE and cdrdao.
+
+
+Mapping of Linux kernel versions to sg driver versions
+======================================================
+Here is a list of linux kernels in the 2.4 series that had new version
+of the sg driver:
+ lk 2.4.0 : sg version 3.1.17
+ lk 2.4.7 : sg version 3.1.19
+ lk 2.4.10 : sg version 3.1.20 **
+ lk 2.4.17 : sg version 3.1.22
+
+** There were 3 changes to sg version 3.1.20 by third parties in the
+ next six linux kernel versions.
+
+For reference here is a list of linux kernels in the 2.2 series that had
+new version of the sg driver:
+ lk 2.2.0 : original sg version [with no version number]
+ lk 2.2.6 : sg version 2.1.31
+ lk 2.2.8 : sg version 2.1.32
+ lk 2.2.10 : sg version 2.1.34 [SG_GET_VERSION_NUM ioctl first appeared]
+ lk 2.2.14 : sg version 2.1.36
+ lk 2.2.16 : sg version 2.1.38
+ lk 2.2.17 : sg version 2.1.39
+ lk 2.2.20 : sg version 2.1.40
+
+The lk 2.5 development series has recently commenced and it currently
+contains sg version 3.5.23 which is functionally equivalent to sg
+version 3.1.22 found in lk 2.4.17 .
+
+
+Douglas Gilbert
+26th January 2002
+dgilbert@interlog.com
diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt
new file mode 100644
index 000000000..2bfd6f6d2
--- /dev/null
+++ b/Documentation/scsi/scsi-parameters.txt
@@ -0,0 +1,133 @@
+ SCSI Kernel Parameters
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+See Documentation/kernel-parameters.txt for general information on
+specifying module parameters.
+
+This document may not be entirely up to date and comprehensive. The command
+"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
+module. Loadable modules, after being loaded into the running kernel, also
+reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
+parameters may be changed at runtime by the command
+"echo -n ${value} > /sys/module/${modulename}/parameters/${parm}".
+
+
+ advansys= [HW,SCSI]
+ See header of drivers/scsi/advansys.c.
+
+ aha152x= [HW,SCSI]
+ See Documentation/scsi/aha152x.txt.
+
+ aha1542= [HW,SCSI]
+ Format: <portbase>[,<buson>,<busoff>[,<dmaspeed>]]
+
+ aic7xxx= [HW,SCSI]
+ See Documentation/scsi/aic7xxx.txt.
+
+ aic79xx= [HW,SCSI]
+ See Documentation/scsi/aic79xx.txt.
+
+ atascsi= [HW,SCSI] Atari SCSI
+
+ BusLogic= [HW,SCSI]
+ See drivers/scsi/BusLogic.c, comment before function
+ BusLogic_ParseDriverOptions().
+
+ dtc3181e= [HW,SCSI]
+
+ eata= [HW,SCSI]
+
+ fdomain= [HW,SCSI]
+ See header of drivers/scsi/fdomain.c.
+
+ gdth= [HW,SCSI]
+ See header of drivers/scsi/gdth.c.
+
+ gvp11= [HW,SCSI]
+
+ in2000= [HW,SCSI]
+ See header of drivers/scsi/in2000.c.
+
+ ips= [HW,SCSI] Adaptec / IBM ServeRAID controller
+ See header of drivers/scsi/ips.c.
+
+ mac5380= [HW,SCSI] Format:
+ <can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+
+ max_luns= [SCSI] Maximum number of LUNs to probe.
+ Should be between 1 and 2^32-1.
+
+ max_report_luns=
+ [SCSI] Maximum number of LUNs received.
+ Should be between 1 and 16384.
+
+ NCR_D700= [HW,SCSI]
+ See header of drivers/scsi/NCR_D700.c.
+
+ ncr5380= [HW,SCSI]
+
+ ncr53c400= [HW,SCSI]
+
+ ncr53c400a= [HW,SCSI]
+
+ ncr53c406a= [HW,SCSI]
+
+ ncr53c8xx= [HW,SCSI]
+
+ nodisconnect [HW,SCSI,M68K] Disables SCSI disconnects.
+
+ osst= [HW,SCSI] SCSI Tape Driver
+ Format: <buffer_size>,<write_threshold>
+ See also Documentation/scsi/st.txt.
+
+ pas16= [HW,SCSI]
+ See header of drivers/scsi/pas16.c.
+
+ scsi_debug_*= [SCSI]
+ See drivers/scsi/scsi_debug.c.
+
+ scsi_default_dev_flags=
+ [SCSI] SCSI default device flags
+ Format: <integer>
+
+ scsi_dev_flags= [SCSI] Black/white list entry for vendor and model
+ Format: <vendor>:<model>:<flags>
+ (flags are integer value)
+
+ scsi_logging_level= [SCSI] a bit mask of logging levels
+ See drivers/scsi/scsi_logging.h for bits. Also
+ settable via sysctl at dev.scsi.logging_level
+ (/proc/sys/dev/scsi/logging_level).
+ There is also a nice 'scsi_logging_level' script in the
+ S390-tools package, available for download at
+ http://www-128.ibm.com/developerworks/linux/linux390/s390-tools-1.5.4.html
+
+ scsi_mod.scan= [SCSI] sync (default) scans SCSI busses as they are
+ discovered. async scans them in kernel threads,
+ allowing boot to proceed. none ignores them, expecting
+ user space to do the scan.
+
+ sim710= [SCSI,HW]
+ See header of drivers/scsi/sim710.c.
+
+ st= [HW,SCSI] SCSI tape parameters (buffers, etc.)
+ See Documentation/scsi/st.txt.
+
+ sym53c416= [HW,SCSI]
+ See header of drivers/scsi/sym53c416.c.
+
+ t128= [HW,SCSI]
+ See header of drivers/scsi/t128.c.
+
+ tmscsim= [HW,SCSI]
+ See comment before function dc390_setup() in
+ drivers/scsi/tmscsim.c.
+
+ u14-34f= [HW,SCSI] UltraStor 14F/34F SCSI host adapter
+ See header of drivers/scsi/u14-34f.c.
+
+ wd33c93= [HW,SCSI]
+ See header of drivers/scsi/wd33c93.c.
+
+ wd7000= [HW,SCSI]
+ See header of drivers/scsi/wd7000.c.
diff --git a/Documentation/scsi/scsi.txt b/Documentation/scsi/scsi.txt
new file mode 100644
index 000000000..3d99d38cb
--- /dev/null
+++ b/Documentation/scsi/scsi.txt
@@ -0,0 +1,44 @@
+SCSI subsystem documentation
+============================
+The Linux Documentation Project (LDP) maintains a document describing
+the SCSI subsystem in the Linux kernel (lk) 2.4 series. See:
+http://www.tldp.org/HOWTO/SCSI-2.4-HOWTO . The LDP has single
+and multiple page HTML renderings as well as postscript and pdf.
+It can also be found at:
+http://web.archive.org/web/*/http://www.torque.net/scsi/SCSI-2.4-HOWTO
+
+Notes on using modules in the SCSI subsystem
+============================================
+The scsi support in the linux kernel can be modularized in a number of
+different ways depending upon the needs of the end user. To understand
+your options, we should first define a few terms.
+
+The scsi-core (also known as the "mid level") contains the core of scsi
+support. Without it you can do nothing with any of the other scsi drivers.
+The scsi core support can be a module (scsi_mod.o), or it can be built into
+the kernel. If the core is a module, it must be the first scsi module
+loaded, and if you unload the modules, it will have to be the last one
+unloaded. In practice the modprobe and rmmod commands (and "autoclean")
+will enforce the correct ordering of loading and unloading modules in
+the SCSI subsystem.
+
+The individual upper and lower level drivers can be loaded in any order
+once the scsi core is present in the kernel (either compiled in or loaded
+as a module). The disk driver (sd_mod.o), cdrom driver (sr_mod.o),
+tape driver ** (st.o) and scsi generics driver (sg.o) represent the upper
+level drivers to support the various assorted devices which can be
+controlled. You can for example load the tape driver to use the tape drive,
+and then unload it once you have no further need for the driver (and release
+the associated memory).
+
+The lower level drivers are the ones that support the individual cards that
+are supported for the hardware platform that you are running under. Those
+individual cards are often called Host Bus Adapters (HBAs). For example the
+aic7xxx.o driver is used to control all recent SCSI controller cards from
+Adaptec. Almost all lower level drivers can be built either as modules or
+built into the kernel.
+
+
+** There is a variant of the st driver for controlling OnStream tape
+ devices. Its module name is osst.o .
+
diff --git a/Documentation/scsi/scsi_eh.txt b/Documentation/scsi/scsi_eh.txt
new file mode 100644
index 000000000..8638f61c8
--- /dev/null
+++ b/Documentation/scsi/scsi_eh.txt
@@ -0,0 +1,488 @@
+
+SCSI EH
+======================================
+
+ This document describes SCSI midlayer error handling infrastructure.
+Please refer to Documentation/scsi/scsi_mid_low_api.txt for more
+information regarding SCSI midlayer.
+
+TABLE OF CONTENTS
+
+[1] How SCSI commands travel through the midlayer and to EH
+ [1-1] struct scsi_cmnd
+ [1-2] How do scmd's get completed?
+ [1-2-1] Completing a scmd w/ scsi_done
+ [1-2-2] Completing a scmd w/ timeout
+ [1-3] How EH takes over
+[2] How SCSI EH works
+ [2-1] EH through fine-grained callbacks
+ [2-1-1] Overview
+ [2-1-2] Flow of scmds through EH
+ [2-1-3] Flow of control
+ [2-2] EH through transportt->eh_strategy_handler()
+ [2-2-1] Pre transportt->eh_strategy_handler() SCSI midlayer conditions
+ [2-2-2] Post transportt->eh_strategy_handler() SCSI midlayer conditions
+ [2-2-3] Things to consider
+
+
+[1] How SCSI commands travel through the midlayer and to EH
+
+[1-1] struct scsi_cmnd
+
+ Each SCSI command is represented with struct scsi_cmnd (== scmd). A
+scmd has two list_head's to link itself into lists. The two are
+scmd->list and scmd->eh_entry. The former is used for free list or
+per-device allocated scmd list and not of much interest to this EH
+discussion. The latter is used for completion and EH lists and unless
+otherwise stated scmds are always linked using scmd->eh_entry in this
+discussion.
+
+
+[1-2] How do scmd's get completed?
+
+ Once LLDD gets hold of a scmd, either the LLDD will complete the
+command by calling scsi_done callback passed from midlayer when
+invoking hostt->queuecommand() or the block layer will time it out.
+
+
+[1-2-1] Completing a scmd w/ scsi_done
+
+ For all non-EH commands, scsi_done() is the completion callback. It
+just calls blk_complete_request() to delete the block layer timer and
+raise SCSI_SOFTIRQ
+
+ SCSI_SOFTIRQ handler scsi_softirq calls scsi_decide_disposition() to
+determine what to do with the command. scsi_decide_disposition()
+looks at the scmd->result value and sense data to determine what to do
+with the command.
+
+ - SUCCESS
+ scsi_finish_command() is invoked for the command. The
+ function does some maintenance chores and then calls
+ scsi_io_completion() to finish the I/O.
+ scsi_io_completion() then notifies the block layer on
+ the completed request by calling blk_end_request and
+ friends or figures out what to do with the remainder
+ of the data in case of an error.
+
+ - NEEDS_RETRY
+ - ADD_TO_MLQUEUE
+ scmd is requeued to blk queue.
+
+ - otherwise
+ scsi_eh_scmd_add(scmd, 0) is invoked for the command. See
+ [1-3] for details of this function.
+
+
+[1-2-2] Completing a scmd w/ timeout
+
+ The timeout handler is scsi_times_out(). When a timeout occurs, this
+function
+
+ 1. invokes optional hostt->eh_timed_out() callback. Return value can
+ be one of
+
+ - BLK_EH_HANDLED
+ This indicates that eh_timed_out() dealt with the timeout.
+ The command is passed back to the block layer and completed
+ via __blk_complete_requests().
+
+ *NOTE* After returning BLK_EH_HANDLED the SCSI layer is
+ assumed to be finished with the command, and no other
+ functions from the SCSI layer will be called. So this
+ should typically only be returned if the eh_timed_out()
+ handler raced with normal completion.
+
+ - BLK_EH_RESET_TIMER
+ This indicates that more time is required to finish the
+ command. Timer is restarted. This action is counted as a
+ retry and only allowed scmd->allowed + 1(!) times. Once the
+ limit is reached, action for BLK_EH_NOT_HANDLED is taken instead.
+
+ - BLK_EH_NOT_HANDLED
+ eh_timed_out() callback did not handle the command.
+ Step #2 is taken.
+
+ 2. If the host supports asynchronous completion (as indicated by the
+ no_async_abort setting in the host template) scsi_abort_command()
+ is invoked to schedule an asynchrous abort. If that fails
+ Step #3 is taken.
+
+ 2. scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD) is invoked for the
+ command. See [1-3] for more information.
+
+[1-3] Asynchronous command aborts
+
+ After a timeout occurs a command abort is scheduled from
+ scsi_abort_command(). If the abort is successful the command
+ will either be retried (if the number of retries is not exhausted)
+ or terminated with DID_TIME_OUT.
+ Otherwise scsi_eh_scmd_add() is invoked for the command.
+ See [1-4] for more information.
+
+[1-4] How EH takes over
+
+ scmds enter EH via scsi_eh_scmd_add(), which does the following.
+
+ 1. Turns on scmd->eh_eflags as requested. It's 0 for error
+ completions and SCSI_EH_CANCEL_CMD for timeouts.
+
+ 2. Links scmd->eh_entry to shost->eh_cmd_q
+
+ 3. Sets SHOST_RECOVERY bit in shost->shost_state
+
+ 4. Increments shost->host_failed
+
+ 5. Wakes up SCSI EH thread if shost->host_busy == shost->host_failed
+
+ As can be seen above, once any scmd is added to shost->eh_cmd_q,
+SHOST_RECOVERY shost_state bit is turned on. This prevents any new
+scmd to be issued from blk queue to the host; eventually, all scmds on
+the host either complete normally, fail and get added to eh_cmd_q, or
+time out and get added to shost->eh_cmd_q.
+
+ If all scmds either complete or fail, the number of in-flight scmds
+becomes equal to the number of failed scmds - i.e. shost->host_busy ==
+shost->host_failed. This wakes up SCSI EH thread. So, once woken up,
+SCSI EH thread can expect that all in-flight commands have failed and
+are linked on shost->eh_cmd_q.
+
+ Note that this does not mean lower layers are quiescent. If a LLDD
+completed a scmd with error status, the LLDD and lower layers are
+assumed to forget about the scmd at that point. However, if a scmd
+has timed out, unless hostt->eh_timed_out() made lower layers forget
+about the scmd, which currently no LLDD does, the command is still
+active as long as lower layers are concerned and completion could
+occur at any time. Of course, all such completions are ignored as the
+timer has already expired.
+
+ We'll talk about how SCSI EH takes actions to abort - make LLDD
+forget about - timed out scmds later.
+
+
+[2] How SCSI EH works
+
+ LLDD's can implement SCSI EH actions in one of the following two
+ways.
+
+ - Fine-grained EH callbacks
+ LLDD can implement fine-grained EH callbacks and let SCSI
+ midlayer drive error handling and call appropriate callbacks.
+ This will be discussed further in [2-1].
+
+ - eh_strategy_handler() callback
+ This is one big callback which should perform whole error
+ handling. As such, it should do all chores the SCSI midlayer
+ performs during recovery. This will be discussed in [2-2].
+
+ Once recovery is complete, SCSI EH resumes normal operation by
+calling scsi_restart_operations(), which
+
+ 1. Checks if door locking is needed and locks door.
+
+ 2. Clears SHOST_RECOVERY shost_state bit
+
+ 3. Wakes up waiters on shost->host_wait. This occurs if someone
+ calls scsi_block_when_processing_errors() on the host.
+ (*QUESTION* why is it needed? All operations will be blocked
+ anyway after it reaches blk queue.)
+
+ 4. Kicks queues in all devices on the host in the asses
+
+
+[2-1] EH through fine-grained callbacks
+
+[2-1-1] Overview
+
+ If eh_strategy_handler() is not present, SCSI midlayer takes charge
+of driving error handling. EH's goals are two - make LLDD, host and
+device forget about timed out scmds and make them ready for new
+commands. A scmd is said to be recovered if the scmd is forgotten by
+lower layers and lower layers are ready to process or fail the scmd
+again.
+
+ To achieve these goals, EH performs recovery actions with increasing
+severity. Some actions are performed by issuing SCSI commands and
+others are performed by invoking one of the following fine-grained
+hostt EH callbacks. Callbacks may be omitted and omitted ones are
+considered to fail always.
+
+int (* eh_abort_handler)(struct scsi_cmnd *);
+int (* eh_device_reset_handler)(struct scsi_cmnd *);
+int (* eh_bus_reset_handler)(struct scsi_cmnd *);
+int (* eh_host_reset_handler)(struct scsi_cmnd *);
+
+ Higher-severity actions are taken only when lower-severity actions
+cannot recover some of failed scmds. Also, note that failure of the
+highest-severity action means EH failure and results in offlining of
+all unrecovered devices.
+
+ During recovery, the following rules are followed
+
+ - Recovery actions are performed on failed scmds on the to do list,
+ eh_work_q. If a recovery action succeeds for a scmd, recovered
+ scmds are removed from eh_work_q.
+
+ Note that single recovery action on a scmd can recover multiple
+ scmds. e.g. resetting a device recovers all failed scmds on the
+ device.
+
+ - Higher severity actions are taken iff eh_work_q is not empty after
+ lower severity actions are complete.
+
+ - EH reuses failed scmds to issue commands for recovery. For
+ timed-out scmds, SCSI EH ensures that LLDD forgets about a scmd
+ before reusing it for EH commands.
+
+ When a scmd is recovered, the scmd is moved from eh_work_q to EH
+local eh_done_q using scsi_eh_finish_cmd(). After all scmds are
+recovered (eh_work_q is empty), scsi_eh_flush_done_q() is invoked to
+either retry or error-finish (notify upper layer of failure) recovered
+scmds.
+
+ scmds are retried iff its sdev is still online (not offlined during
+EH), REQ_FAILFAST is not set and ++scmd->retries is less than
+scmd->allowed.
+
+
+[2-1-2] Flow of scmds through EH
+
+ 1. Error completion / time out
+ ACTION: scsi_eh_scmd_add() is invoked for scmd
+ - set scmd->eh_eflags
+ - add scmd to shost->eh_cmd_q
+ - set SHOST_RECOVERY
+ - shost->host_failed++
+ LOCKING: shost->host_lock
+
+ 2. EH starts
+ ACTION: move all scmds to EH's local eh_work_q. shost->eh_cmd_q
+ is cleared.
+ LOCKING: shost->host_lock (not strictly necessary, just for
+ consistency)
+
+ 3. scmd recovered
+ ACTION: scsi_eh_finish_cmd() is invoked to EH-finish scmd
+ - shost->host_failed--
+ - clear scmd->eh_eflags
+ - scsi_setup_cmd_retry()
+ - move from local eh_work_q to local eh_done_q
+ LOCKING: none
+
+ 4. EH completes
+ ACTION: scsi_eh_flush_done_q() retries scmds or notifies upper
+ layer of failure.
+ - scmd is removed from eh_done_q and scmd->eh_entry is cleared
+ - if retry is necessary, scmd is requeued using
+ scsi_queue_insert()
+ - otherwise, scsi_finish_command() is invoked for scmd
+ LOCKING: queue or finish function performs appropriate locking
+
+
+[2-1-3] Flow of control
+
+ EH through fine-grained callbacks start from scsi_unjam_host().
+
+<<scsi_unjam_host>>
+
+ 1. Lock shost->host_lock, splice_init shost->eh_cmd_q into local
+ eh_work_q and unlock host_lock. Note that shost->eh_cmd_q is
+ cleared by this action.
+
+ 2. Invoke scsi_eh_get_sense.
+
+ <<scsi_eh_get_sense>>
+
+ This action is taken for each error-completed
+ (!SCSI_EH_CANCEL_CMD) commands without valid sense data. Most
+ SCSI transports/LLDDs automatically acquire sense data on
+ command failures (autosense). Autosense is recommended for
+ performance reasons and as sense information could get out of
+ sync between occurrence of CHECK CONDITION and this action.
+
+ Note that if autosense is not supported, scmd->sense_buffer
+ contains invalid sense data when error-completing the scmd
+ with scsi_done(). scsi_decide_disposition() always returns
+ FAILED in such cases thus invoking SCSI EH. When the scmd
+ reaches here, sense data is acquired and
+ scsi_decide_disposition() is called again.
+
+ 1. Invoke scsi_request_sense() which issues REQUEST_SENSE
+ command. If fails, no action. Note that taking no action
+ causes higher-severity recovery to be taken for the scmd.
+
+ 2. Invoke scsi_decide_disposition() on the scmd
+
+ - SUCCESS
+ scmd->retries is set to scmd->allowed preventing
+ scsi_eh_flush_done_q() from retrying the scmd and
+ scsi_eh_finish_cmd() is invoked.
+
+ - NEEDS_RETRY
+ scsi_eh_finish_cmd() invoked
+
+ - otherwise
+ No action.
+
+ 3. If !list_empty(&eh_work_q), invoke scsi_eh_abort_cmds().
+
+ <<scsi_eh_abort_cmds>>
+
+ This action is taken for each timed out command when
+ no_async_abort is enabled in the host template.
+ hostt->eh_abort_handler() is invoked for each scmd. The
+ handler returns SUCCESS if it has succeeded to make LLDD and
+ all related hardware forget about the scmd.
+
+ If a timedout scmd is successfully aborted and the sdev is
+ either offline or ready, scsi_eh_finish_cmd() is invoked for
+ the scmd. Otherwise, the scmd is left in eh_work_q for
+ higher-severity actions.
+
+ Note that both offline and ready status mean that the sdev is
+ ready to process new scmds, where processing also implies
+ immediate failing; thus, if a sdev is in one of the two
+ states, no further recovery action is needed.
+
+ Device readiness is tested using scsi_eh_tur() which issues
+ TEST_UNIT_READY command. Note that the scmd must have been
+ aborted successfully before reusing it for TEST_UNIT_READY.
+
+ 4. If !list_empty(&eh_work_q), invoke scsi_eh_ready_devs()
+
+ <<scsi_eh_ready_devs>>
+
+ This function takes four increasingly more severe measures to
+ make failed sdevs ready for new commands.
+
+ 1. Invoke scsi_eh_stu()
+
+ <<scsi_eh_stu>>
+
+ For each sdev which has failed scmds with valid sense data
+ of which scsi_check_sense()'s verdict is FAILED,
+ START_STOP_UNIT command is issued w/ start=1. Note that
+ as we explicitly choose error-completed scmds, it is known
+ that lower layers have forgotten about the scmd and we can
+ reuse it for STU.
+
+ If STU succeeds and the sdev is either offline or ready,
+ all failed scmds on the sdev are EH-finished with
+ scsi_eh_finish_cmd().
+
+ *NOTE* If hostt->eh_abort_handler() isn't implemented or
+ failed, we may still have timed out scmds at this point
+ and STU doesn't make lower layers forget about those
+ scmds. Yet, this function EH-finish all scmds on the sdev
+ if STU succeeds leaving lower layers in an inconsistent
+ state. It seems that STU action should be taken only when
+ a sdev has no timed out scmd.
+
+ 2. If !list_empty(&eh_work_q), invoke scsi_eh_bus_device_reset().
+
+ <<scsi_eh_bus_device_reset>>
+
+ This action is very similar to scsi_eh_stu() except that,
+ instead of issuing STU, hostt->eh_device_reset_handler()
+ is used. Also, as we're not issuing SCSI commands and
+ resetting clears all scmds on the sdev, there is no need
+ to choose error-completed scmds.
+
+ 3. If !list_empty(&eh_work_q), invoke scsi_eh_bus_reset()
+
+ <<scsi_eh_bus_reset>>
+
+ hostt->eh_bus_reset_handler() is invoked for each channel
+ with failed scmds. If bus reset succeeds, all failed
+ scmds on all ready or offline sdevs on the channel are
+ EH-finished.
+
+ 4. If !list_empty(&eh_work_q), invoke scsi_eh_host_reset()
+
+ <<scsi_eh_host_reset>>
+
+ This is the last resort. hostt->eh_host_reset_handler()
+ is invoked. If host reset succeeds, all failed scmds on
+ all ready or offline sdevs on the host are EH-finished.
+
+ 5. If !list_empty(&eh_work_q), invoke scsi_eh_offline_sdevs()
+
+ <<scsi_eh_offline_sdevs>>
+
+ Take all sdevs which still have unrecovered scmds offline
+ and EH-finish the scmds.
+
+ 5. Invoke scsi_eh_flush_done_q().
+
+ <<scsi_eh_flush_done_q>>
+
+ At this point all scmds are recovered (or given up) and
+ put on eh_done_q by scsi_eh_finish_cmd(). This function
+ flushes eh_done_q by either retrying or notifying upper
+ layer of failure of the scmds.
+
+
+[2-2] EH through transportt->eh_strategy_handler()
+
+ transportt->eh_strategy_handler() is invoked in the place of
+scsi_unjam_host() and it is responsible for whole recovery process.
+On completion, the handler should have made lower layers forget about
+all failed scmds and either ready for new commands or offline. Also,
+it should perform SCSI EH maintenance chores to maintain integrity of
+SCSI midlayer. IOW, of the steps described in [2-1-2], all steps
+except for #1 must be implemented by eh_strategy_handler().
+
+
+[2-2-1] Pre transportt->eh_strategy_handler() SCSI midlayer conditions
+
+ The following conditions are true on entry to the handler.
+
+ - Each failed scmd's eh_flags field is set appropriately.
+
+ - Each failed scmd is linked on scmd->eh_cmd_q by scmd->eh_entry.
+
+ - SHOST_RECOVERY is set.
+
+ - shost->host_failed == shost->host_busy
+
+
+[2-2-2] Post transportt->eh_strategy_handler() SCSI midlayer conditions
+
+ The following conditions must be true on exit from the handler.
+
+ - shost->host_failed is zero.
+
+ - Each scmd's eh_eflags field is cleared.
+
+ - Each scmd is in such a state that scsi_setup_cmd_retry() on the
+ scmd doesn't make any difference.
+
+ - shost->eh_cmd_q is cleared.
+
+ - Each scmd->eh_entry is cleared.
+
+ - Either scsi_queue_insert() or scsi_finish_command() is called on
+ each scmd. Note that the handler is free to use scmd->retries and
+ ->allowed to limit the number of retries.
+
+
+[2-2-3] Things to consider
+
+ - Know that timed out scmds are still active on lower layers. Make
+ lower layers forget about them before doing anything else with
+ those scmds.
+
+ - For consistency, when accessing/modifying shost data structure,
+ grab shost->host_lock.
+
+ - On completion, each failed sdev must have forgotten about all
+ active scmds.
+
+ - On completion, each failed sdev must be ready for new commands or
+ offline.
+
+
+--
+Tejun Heo
+htejun@gmail.com
+11th September 2005
diff --git a/Documentation/scsi/scsi_fc_transport.txt b/Documentation/scsi/scsi_fc_transport.txt
new file mode 100644
index 000000000..f79282fc4
--- /dev/null
+++ b/Documentation/scsi/scsi_fc_transport.txt
@@ -0,0 +1,496 @@
+ SCSI FC Tansport
+ =============================================
+
+Date: 11/18/2008
+Kernel Revisions for features:
+ rports : <<TBS>>
+ vports : 2.6.22
+ bsg support : 2.6.30 (?TBD?)
+
+
+Introduction
+============
+This file documents the features and components of the SCSI FC Transport.
+It also provides documents the API between the transport and FC LLDDs.
+The FC transport can be found at:
+ drivers/scsi/scsi_transport_fc.c
+ include/scsi/scsi_transport_fc.h
+ include/scsi/scsi_netlink_fc.h
+ include/scsi/scsi_bsg_fc.h
+
+This file is found at Documentation/scsi/scsi_fc_transport.txt
+
+
+FC Remote Ports (rports)
+========================================================================
+<< To Be Supplied >>
+
+
+FC Virtual Ports (vports)
+========================================================================
+
+Overview:
+-------------------------------
+
+ New FC standards have defined mechanisms which allows for a single physical
+ port to appear on as multiple communication ports. Using the N_Port Id
+ Virtualization (NPIV) mechanism, a point-to-point connection to a Fabric
+ can be assigned more than 1 N_Port_ID. Each N_Port_ID appears as a
+ separate port to other endpoints on the fabric, even though it shares one
+ physical link to the switch for communication. Each N_Port_ID can have a
+ unique view of the fabric based on fabric zoning and array lun-masking
+ (just like a normal non-NPIV adapter). Using the Virtual Fabric (VF)
+ mechanism, adding a fabric header to each frame allows the port to
+ interact with the Fabric Port to join multiple fabrics. The port will
+ obtain an N_Port_ID on each fabric it joins. Each fabric will have its
+ own unique view of endpoints and configuration parameters. NPIV may be
+ used together with VF so that the port can obtain multiple N_Port_IDs
+ on each virtual fabric.
+
+ The FC transport is now recognizing a new object - a vport. A vport is
+ an entity that has a world-wide unique World Wide Port Name (wwpn) and
+ World Wide Node Name (wwnn). The transport also allows for the FC4's to
+ be specified for the vport, with FCP_Initiator being the primary role
+ expected. Once instantiated by one of the above methods, it will have a
+ distinct N_Port_ID and view of fabric endpoints and storage entities.
+ The fc_host associated with the physical adapter will export the ability
+ to create vports. The transport will create the vport object within the
+ Linux device tree, and instruct the fc_host's driver to instantiate the
+ virtual port. Typically, the driver will create a new scsi_host instance
+ on the vport, resulting in a unique <H,C,T,L> namespace for the vport.
+ Thus, whether a FC port is based on a physical port or on a virtual port,
+ each will appear as a unique scsi_host with its own target and lun space.
+
+ Note: At this time, the transport is written to create only NPIV-based
+ vports. However, consideration was given to VF-based vports and it
+ should be a minor change to add support if needed. The remaining
+ discussion will concentrate on NPIV.
+
+ Note: World Wide Name assignment (and uniqueness guarantees) are left
+ up to an administrative entity controlling the vport. For example,
+ if vports are to be associated with virtual machines, a XEN mgmt
+ utility would be responsible for creating wwpn/wwnn's for the vport,
+ using its own naming authority and OUI. (Note: it already does this
+ for virtual MAC addresses).
+
+
+Device Trees and Vport Objects:
+-------------------------------
+
+ Today, the device tree typically contains the scsi_host object,
+ with rports and scsi target objects underneath it. Currently the FC
+ transport creates the vport object and places it under the scsi_host
+ object corresponding to the physical adapter. The LLDD will allocate
+ a new scsi_host for the vport and link its object under the vport.
+ The remainder of the tree under the vports scsi_host is the same
+ as the non-NPIV case. The transport is written currently to easily
+ allow the parent of the vport to be something other than the scsi_host.
+ This could be used in the future to link the object onto a vm-specific
+ device tree. If the vport's parent is not the physical port's scsi_host,
+ a symbolic link to the vport object will be placed in the physical
+ port's scsi_host.
+
+ Here's what to expect in the device tree :
+ The typical Physical Port's Scsi_Host:
+ /sys/devices/.../host17/
+ and it has the typical descendant tree:
+ /sys/devices/.../host17/rport-17:0-0/target17:0:0/17:0:0:0:
+ and then the vport is created on the Physical Port:
+ /sys/devices/.../host17/vport-17:0-0
+ and the vport's Scsi_Host is then created:
+ /sys/devices/.../host17/vport-17:0-0/host18
+ and then the rest of the tree progresses, such as:
+ /sys/devices/.../host17/vport-17:0-0/host18/rport-18:0-0/target18:0:0/18:0:0:0:
+
+ Here's what to expect in the sysfs tree :
+ scsi_hosts:
+ /sys/class/scsi_host/host17 physical port's scsi_host
+ /sys/class/scsi_host/host18 vport's scsi_host
+ fc_hosts:
+ /sys/class/fc_host/host17 physical port's fc_host
+ /sys/class/fc_host/host18 vport's fc_host
+ fc_vports:
+ /sys/class/fc_vports/vport-17:0-0 the vport's fc_vport
+ fc_rports:
+ /sys/class/fc_remote_ports/rport-17:0-0 rport on the physical port
+ /sys/class/fc_remote_ports/rport-18:0-0 rport on the vport
+
+
+Vport Attributes:
+-------------------------------
+
+ The new fc_vport class object has the following attributes
+
+ node_name: Read_Only
+ The WWNN of the vport
+
+ port_name: Read_Only
+ The WWPN of the vport
+
+ roles: Read_Only
+ Indicates the FC4 roles enabled on the vport.
+
+ symbolic_name: Read_Write
+ A string, appended to the driver's symbolic port name string, which
+ is registered with the switch to identify the vport. For example,
+ a hypervisor could set this string to "Xen Domain 2 VM 5 Vport 2",
+ and this set of identifiers can be seen on switch management screens
+ to identify the port.
+
+ vport_delete: Write_Only
+ When written with a "1", will tear down the vport.
+
+ vport_disable: Write_Only
+ When written with a "1", will transition the vport to a disabled.
+ state. The vport will still be instantiated with the Linux kernel,
+ but it will not be active on the FC link.
+ When written with a "0", will enable the vport.
+
+ vport_last_state: Read_Only
+ Indicates the previous state of the vport. See the section below on
+ "Vport States".
+
+ vport_state: Read_Only
+ Indicates the state of the vport. See the section below on
+ "Vport States".
+
+ vport_type: Read_Only
+ Reflects the FC mechanism used to create the virtual port.
+ Only NPIV is supported currently.
+
+
+ For the fc_host class object, the following attributes are added for vports:
+
+ max_npiv_vports: Read_Only
+ Indicates the maximum number of NPIV-based vports that the
+ driver/adapter can support on the fc_host.
+
+ npiv_vports_inuse: Read_Only
+ Indicates how many NPIV-based vports have been instantiated on the
+ fc_host.
+
+ vport_create: Write_Only
+ A "simple" create interface to instantiate a vport on an fc_host.
+ A "<WWPN>:<WWNN>" string is written to the attribute. The transport
+ then instantiates the vport object and calls the LLDD to create the
+ vport with the role of FCP_Initiator. Each WWN is specified as 16
+ hex characters and may *not* contain any prefixes (e.g. 0x, x, etc).
+
+ vport_delete: Write_Only
+ A "simple" delete interface to teardown a vport. A "<WWPN>:<WWNN>"
+ string is written to the attribute. The transport will locate the
+ vport on the fc_host with the same WWNs and tear it down. Each WWN
+ is specified as 16 hex characters and may *not* contain any prefixes
+ (e.g. 0x, x, etc).
+
+
+Vport States:
+-------------------------------
+
+ Vport instantiation consists of two parts:
+ - Creation with the kernel and LLDD. This means all transport and
+ driver data structures are built up, and device objects created.
+ This is equivalent to a driver "attach" on an adapter, which is
+ independent of the adapter's link state.
+ - Instantiation of the vport on the FC link via ELS traffic, etc.
+ This is equivalent to a "link up" and successful link initialization.
+ Further information can be found in the interfaces section below for
+ Vport Creation.
+
+ Once a vport has been instantiated with the kernel/LLDD, a vport state
+ can be reported via the sysfs attribute. The following states exist:
+
+ FC_VPORT_UNKNOWN - Unknown
+ An temporary state, typically set only while the vport is being
+ instantiated with the kernel and LLDD.
+
+ FC_VPORT_ACTIVE - Active
+ The vport has been successfully been created on the FC link.
+ It is fully functional.
+
+ FC_VPORT_DISABLED - Disabled
+ The vport instantiated, but "disabled". The vport is not instantiated
+ on the FC link. This is equivalent to a physical port with the
+ link "down".
+
+ FC_VPORT_LINKDOWN - Linkdown
+ The vport is not operational as the physical link is not operational.
+
+ FC_VPORT_INITIALIZING - Initializing
+ The vport is in the process of instantiating on the FC link.
+ The LLDD will set this state just prior to starting the ELS traffic
+ to create the vport. This state will persist until the vport is
+ successfully created (state becomes FC_VPORT_ACTIVE) or it fails
+ (state is one of the values below). As this state is transitory,
+ it will not be preserved in the "vport_last_state".
+
+ FC_VPORT_NO_FABRIC_SUPP - No Fabric Support
+ The vport is not operational. One of the following conditions were
+ encountered:
+ - The FC topology is not Point-to-Point
+ - The FC port is not connected to an F_Port
+ - The F_Port has indicated that NPIV is not supported.
+
+ FC_VPORT_NO_FABRIC_RSCS - No Fabric Resources
+ The vport is not operational. The Fabric failed FDISC with a status
+ indicating that it does not have sufficient resources to complete
+ the operation.
+
+ FC_VPORT_FABRIC_LOGOUT - Fabric Logout
+ The vport is not operational. The Fabric has LOGO'd the N_Port_ID
+ associated with the vport.
+
+ FC_VPORT_FABRIC_REJ_WWN - Fabric Rejected WWN
+ The vport is not operational. The Fabric failed FDISC with a status
+ indicating that the WWN's are not valid.
+
+ FC_VPORT_FAILED - VPort Failed
+ The vport is not operational. This is a catchall for all other
+ error conditions.
+
+
+ The following state table indicates the different state transitions:
+
+ State Event New State
+ --------------------------------------------------------------------
+ n/a Initialization Unknown
+ Unknown: Link Down Linkdown
+ Link Up & Loop No Fabric Support
+ Link Up & no Fabric No Fabric Support
+ Link Up & FLOGI response No Fabric Support
+ indicates no NPIV support
+ Link Up & FDISC being sent Initializing
+ Disable request Disable
+ Linkdown: Link Up Unknown
+ Initializing: FDISC ACC Active
+ FDISC LS_RJT w/ no resources No Fabric Resources
+ FDISC LS_RJT w/ invalid Fabric Rejected WWN
+ pname or invalid nport_id
+ FDISC LS_RJT failed for Vport Failed
+ other reasons
+ Link Down Linkdown
+ Disable request Disable
+ Disable: Enable request Unknown
+ Active: LOGO received from fabric Fabric Logout
+ Link Down Linkdown
+ Disable request Disable
+ Fabric Logout: Link still up Unknown
+
+ The following 4 error states all have the same transitions:
+ No Fabric Support:
+ No Fabric Resources:
+ Fabric Rejected WWN:
+ Vport Failed:
+ Disable request Disable
+ Link goes down Linkdown
+
+
+Transport <-> LLDD Interfaces :
+-------------------------------
+
+Vport support by LLDD:
+
+ The LLDD indicates support for vports by supplying a vport_create()
+ function in the transport template. The presence of this function will
+ cause the creation of the new attributes on the fc_host. As part of
+ the physical port completing its initialization relative to the
+ transport, it should set the max_npiv_vports attribute to indicate the
+ maximum number of vports the driver and/or adapter supports.
+
+
+Vport Creation:
+
+ The LLDD vport_create() syntax is:
+
+ int vport_create(struct fc_vport *vport, bool disable)
+
+ where:
+ vport: Is the newly allocated vport object
+ disable: If "true", the vport is to be created in a disabled stated.
+ If "false", the vport is to be enabled upon creation.
+
+ When a request is made to create a new vport (via sgio/netlink, or the
+ vport_create fc_host attribute), the transport will validate that the LLDD
+ can support another vport (e.g. max_npiv_vports > npiv_vports_inuse).
+ If not, the create request will be failed. If space remains, the transport
+ will increment the vport count, create the vport object, and then call the
+ LLDD's vport_create() function with the newly allocated vport object.
+
+ As mentioned above, vport creation is divided into two parts:
+ - Creation with the kernel and LLDD. This means all transport and
+ driver data structures are built up, and device objects created.
+ This is equivalent to a driver "attach" on an adapter, which is
+ independent of the adapter's link state.
+ - Instantiation of the vport on the FC link via ELS traffic, etc.
+ This is equivalent to a "link up" and successful link initialization.
+
+ The LLDD's vport_create() function will not synchronously wait for both
+ parts to be fully completed before returning. It must validate that the
+ infrastructure exists to support NPIV, and complete the first part of
+ vport creation (data structure build up) before returning. We do not
+ hinge vport_create() on the link-side operation mainly because:
+ - The link may be down. It is not a failure if it is. It simply
+ means the vport is in an inoperable state until the link comes up.
+ This is consistent with the link bouncing post vport creation.
+ - The vport may be created in a disabled state.
+ - This is consistent with a model where: the vport equates to a
+ FC adapter. The vport_create is synonymous with driver attachment
+ to the adapter, which is independent of link state.
+
+ Note: special error codes have been defined to delineate infrastructure
+ failure cases for quicker resolution.
+
+ The expected behavior for the LLDD's vport_create() function is:
+ - Validate Infrastructure:
+ - If the driver or adapter cannot support another vport, whether
+ due to improper firmware, (a lie about) max_npiv, or a lack of
+ some other resource - return VPCERR_UNSUPPORTED.
+ - If the driver validates the WWN's against those already active on
+ the adapter and detects an overlap - return VPCERR_BAD_WWN.
+ - If the driver detects the topology is loop, non-fabric, or the
+ FLOGI did not support NPIV - return VPCERR_NO_FABRIC_SUPP.
+ - Allocate data structures. If errors are encountered, such as out
+ of memory conditions, return the respective negative Exxx error code.
+ - If the role is FCP Initiator, the LLDD is to :
+ - Call scsi_host_alloc() to allocate a scsi_host for the vport.
+ - Call scsi_add_host(new_shost, &vport->dev) to start the scsi_host
+ and bind it as a child of the vport device.
+ - Initializes the fc_host attribute values.
+ - Kick of further vport state transitions based on the disable flag and
+ link state - and return success (zero).
+
+ LLDD Implementers Notes:
+ - It is suggested that there be a different fc_function_templates for
+ the physical port and the virtual port. The physical port's template
+ would have the vport_create, vport_delete, and vport_disable functions,
+ while the vports would not.
+ - It is suggested that there be different scsi_host_templates
+ for the physical port and virtual port. Likely, there are driver
+ attributes, embedded into the scsi_host_template, that are applicable
+ for the physical port only (link speed, topology setting, etc). This
+ ensures that the attributes are applicable to the respective scsi_host.
+
+
+Vport Disable/Enable:
+
+ The LLDD vport_disable() syntax is:
+
+ int vport_disable(struct fc_vport *vport, bool disable)
+
+ where:
+ vport: Is vport to be enabled or disabled
+ disable: If "true", the vport is to be disabled.
+ If "false", the vport is to be enabled.
+
+ When a request is made to change the disabled state on a vport, the
+ transport will validate the request against the existing vport state.
+ If the request is to disable and the vport is already disabled, the
+ request will fail. Similarly, if the request is to enable, and the
+ vport is not in a disabled state, the request will fail. If the request
+ is valid for the vport state, the transport will call the LLDD to
+ change the vport's state.
+
+ Within the LLDD, if a vport is disabled, it remains instantiated with
+ the kernel and LLDD, but it is not active or visible on the FC link in
+ any way. (see Vport Creation and the 2 part instantiation discussion).
+ The vport will remain in this state until it is deleted or re-enabled.
+ When enabling a vport, the LLDD reinstantiates the vport on the FC
+ link - essentially restarting the LLDD statemachine (see Vport States
+ above).
+
+
+Vport Deletion:
+
+ The LLDD vport_delete() syntax is:
+
+ int vport_delete(struct fc_vport *vport)
+
+ where:
+ vport: Is vport to delete
+
+ When a request is made to delete a vport (via sgio/netlink, or via the
+ fc_host or fc_vport vport_delete attributes), the transport will call
+ the LLDD to terminate the vport on the FC link, and teardown all other
+ datastructures and references. If the LLDD completes successfully,
+ the transport will teardown the vport objects and complete the vport
+ removal. If the LLDD delete request fails, the vport object will remain,
+ but will be in an indeterminate state.
+
+ Within the LLDD, the normal code paths for a scsi_host teardown should
+ be followed. E.g. If the vport has a FCP Initiator role, the LLDD
+ will call fc_remove_host() for the vports scsi_host, followed by
+ scsi_remove_host() and scsi_host_put() for the vports scsi_host.
+
+
+Other:
+ fc_host port_type attribute:
+ There is a new fc_host port_type value - FC_PORTTYPE_NPIV. This value
+ must be set on all vport-based fc_hosts. Normally, on a physical port,
+ the port_type attribute would be set to NPORT, NLPORT, etc based on the
+ topology type and existence of the fabric. As this is not applicable to
+ a vport, it makes more sense to report the FC mechanism used to create
+ the vport.
+
+ Driver unload:
+ FC drivers are required to call fc_remove_host() prior to calling
+ scsi_remove_host(). This allows the fc_host to tear down all remote
+ ports prior the scsi_host being torn down. The fc_remove_host() call
+ was updated to remove all vports for the fc_host as well.
+
+
+Transport supplied functions
+----------------------------
+
+The following functions are supplied by the FC-transport for use by LLDs.
+
+ fc_vport_create - create a vport
+ fc_vport_terminate - detach and remove a vport
+
+Details:
+
+/**
+ * fc_vport_create - Admin App or LLDD requests creation of a vport
+ * @shost: scsi host the virtual port is connected to.
+ * @ids: The world wide names, FC4 port roles, etc for
+ * the virtual port.
+ *
+ * Notes:
+ * This routine assumes no locks are held on entry.
+ */
+struct fc_vport *
+fc_vport_create(struct Scsi_Host *shost, struct fc_vport_identifiers *ids)
+
+/**
+ * fc_vport_terminate - Admin App or LLDD requests termination of a vport
+ * @vport: fc_vport to be terminated
+ *
+ * Calls the LLDD vport_delete() function, then deallocates and removes
+ * the vport from the shost and object tree.
+ *
+ * Notes:
+ * This routine assumes no locks are held on entry.
+ */
+int
+fc_vport_terminate(struct fc_vport *vport)
+
+
+FC BSG support (CT & ELS passthru, and more)
+========================================================================
+<< To Be Supplied >>
+
+
+
+
+
+Credits
+=======
+The following people have contributed to this document:
+
+
+
+
+
+
+James Smart
+james.smart@emulex.com
+
diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
new file mode 100644
index 000000000..731bc4f4c
--- /dev/null
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -0,0 +1,1401 @@
+ Linux Kernel 2.6 series
+ SCSI mid_level - lower_level driver interface
+ =============================================
+
+Introduction
+============
+This document outlines the interface between the Linux SCSI mid level and
+SCSI lower level drivers. Lower level drivers (LLDs) are variously called
+host bus adapter (HBA) drivers and host drivers (HD). A "host" in this
+context is a bridge between a computer IO bus (e.g. PCI or ISA) and a
+single SCSI initiator port on a SCSI transport. An "initiator" port
+(SCSI terminology, see SAM-3 at http://www.t10.org) sends SCSI commands
+to "target" SCSI ports (e.g. disks). There can be many LLDs in a running
+system, but only one per hardware type. Most LLDs can control one or more
+SCSI HBAs. Some HBAs contain multiple hosts.
+
+In some cases the SCSI transport is an external bus that already has
+its own subsystem in Linux (e.g. USB and ieee1394). In such cases the
+SCSI subsystem LLD is a software bridge to the other driver subsystem.
+Examples are the usb-storage driver (found in the drivers/usb/storage
+directory) and the ieee1394/sbp2 driver (found in the drivers/ieee1394
+directory).
+
+For example, the aic7xxx LLD controls Adaptec SCSI parallel interface
+(SPI) controllers based on that company's 7xxx chip series. The aic7xxx
+LLD can be built into the kernel or loaded as a module. There can only be
+one aic7xxx LLD running in a Linux system but it may be controlling many
+HBAs. These HBAs might be either on PCI daughter-boards or built into
+the motherboard (or both). Some aic7xxx based HBAs are dual controllers
+and thus represent two hosts. Like most modern HBAs, each aic7xxx host
+has its own PCI device address. [The one-to-one correspondence between
+a SCSI host and a PCI device is common but not required (e.g. with
+ISA adapters).]
+
+The SCSI mid level isolates an LLD from other layers such as the SCSI
+upper layer drivers and the block layer.
+
+This version of the document roughly matches linux kernel version 2.6.8 .
+
+Documentation
+=============
+There is a SCSI documentation directory within the kernel source tree,
+typically Documentation/scsi . Most documents are in plain
+(i.e. ASCII) text. This file is named scsi_mid_low_api.txt and can be
+found in that directory. A more recent copy of this document may be found
+at http://web.archive.org/web/20070107183357rn_1/sg.torque.net/scsi/.
+Many LLDs are documented there (e.g. aic7xxx.txt). The SCSI mid-level is
+briefly described in scsi.txt which contains a url to a document
+describing the SCSI subsystem in the lk 2.4 series. Two upper level
+drivers have documents in that directory: st.txt (SCSI tape driver) and
+scsi-generic.txt (for the sg driver).
+
+Some documentation (or urls) for LLDs may be found in the C source code
+or in the same directory as the C source code. For example to find a url
+about the USB mass storage driver see the
+/usr/src/linux/drivers/usb/storage directory.
+
+Driver structure
+================
+Traditionally an LLD for the SCSI subsystem has been at least two files in
+the drivers/scsi directory. For example, a driver called "xyz" has a header
+file "xyz.h" and a source file "xyz.c". [Actually there is no good reason
+why this couldn't all be in one file; the header file is superfluous.] Some
+drivers that have been ported to several operating systems have more than
+two files. For example the aic7xxx driver has separate files for generic
+and OS-specific code (e.g. FreeBSD and Linux). Such drivers tend to have
+their own directory under the drivers/scsi directory.
+
+When a new LLD is being added to Linux, the following files (found in the
+drivers/scsi directory) will need some attention: Makefile and Kconfig .
+It is probably best to study how existing LLDs are organized.
+
+As the 2.5 series development kernels evolve into the 2.6 series
+production series, changes are being introduced into this interface. An
+example of this is driver initialization code where there are now 2 models
+available. The older one, similar to what was found in the lk 2.4 series,
+is based on hosts that are detected at HBA driver load time. This will be
+referred to the "passive" initialization model. The newer model allows HBAs
+to be hot plugged (and unplugged) during the lifetime of the LLD and will
+be referred to as the "hotplug" initialization model. The newer model is
+preferred as it can handle both traditional SCSI equipment that is
+permanently connected as well as modern "SCSI" devices (e.g. USB or
+IEEE 1394 connected digital cameras) that are hotplugged. Both
+initialization models are discussed in the following sections.
+
+An LLD interfaces to the SCSI subsystem several ways:
+ a) directly invoking functions supplied by the mid level
+ b) passing a set of function pointers to a registration function
+ supplied by the mid level. The mid level will then invoke these
+ functions at some point in the future. The LLD will supply
+ implementations of these functions.
+ c) direct access to instances of well known data structures maintained
+ by the mid level
+
+Those functions in group a) are listed in a section entitled "Mid level
+supplied functions" below.
+
+Those functions in group b) are listed in a section entitled "Interface
+functions" below. Their function pointers are placed in the members of
+"struct scsi_host_template", an instance of which is passed to
+scsi_host_alloc() ** . Those interface functions that the LLD does not
+wish to supply should have NULL placed in the corresponding member of
+struct scsi_host_template. Defining an instance of struct
+scsi_host_template at file scope will cause NULL to be placed in function
+ pointer members not explicitly initialized.
+
+Those usages in group c) should be handled with care, especially in a
+"hotplug" environment. LLDs should be aware of the lifetime of instances
+that are shared with the mid level and other layers.
+
+All functions defined within an LLD and all data defined at file scope
+should be static. For example the slave_alloc() function in an LLD
+called "xxx" could be defined as
+"static int xxx_slave_alloc(struct scsi_device * sdev) { /* code */ }"
+
+** the scsi_host_alloc() function is a replacement for the rather vaguely
+named scsi_register() function in most situations. The scsi_register()
+and scsi_unregister() functions remain to support legacy LLDs that use
+the passive initialization model.
+
+
+Hotplug initialization model
+============================
+In this model an LLD controls when SCSI hosts are introduced and removed
+from the SCSI subsystem. Hosts can be introduced as early as driver
+initialization and removed as late as driver shutdown. Typically a driver
+will respond to a sysfs probe() callback that indicates an HBA has been
+detected. After confirming that the new device is one that the LLD wants
+to control, the LLD will initialize the HBA and then register a new host
+with the SCSI mid level.
+
+During LLD initialization the driver should register itself with the
+appropriate IO bus on which it expects to find HBA(s) (e.g. the PCI bus).
+This can probably be done via sysfs. Any driver parameters (especially
+those that are writable after the driver is loaded) could also be
+registered with sysfs at this point. The SCSI mid level first becomes
+aware of an LLD when that LLD registers its first HBA.
+
+At some later time, the LLD becomes aware of an HBA and what follows
+is a typical sequence of calls between the LLD and the mid level.
+This example shows the mid level scanning the newly introduced HBA for 3
+scsi devices of which only the first 2 respond:
+
+ HBA PROBE: assume 2 SCSI devices found in scan
+LLD mid level LLD
+===-------------------=========--------------------===------
+scsi_host_alloc() -->
+scsi_add_host() ---->
+scsi_scan_host() -------+
+ |
+ slave_alloc()
+ slave_configure() --> scsi_change_queue_depth()
+ |
+ slave_alloc()
+ slave_configure()
+ |
+ slave_alloc() ***
+ slave_destroy() ***
+------------------------------------------------------------
+
+If the LLD wants to adjust the default queue settings, it can invoke
+scsi_change_queue_depth() in its slave_configure() routine.
+
+*** For scsi devices that the mid level tries to scan but do not
+ respond, a slave_alloc(), slave_destroy() pair is called.
+
+When an HBA is being removed it could be as part of an orderly shutdown
+associated with the LLD module being unloaded (e.g. with the "rmmod"
+command) or in response to a "hot unplug" indicated by sysfs()'s
+remove() callback being invoked. In either case, the sequence is the
+same:
+
+ HBA REMOVE: assume 2 SCSI devices attached
+LLD mid level LLD
+===----------------------=========-----------------===------
+scsi_remove_host() ---------+
+ |
+ slave_destroy()
+ slave_destroy()
+scsi_host_put()
+------------------------------------------------------------
+
+It may be useful for a LLD to keep track of struct Scsi_Host instances
+(a pointer is returned by scsi_host_alloc()). Such instances are "owned"
+by the mid-level. struct Scsi_Host instances are freed from
+scsi_host_put() when the reference count hits zero.
+
+Hot unplugging an HBA that controls a disk which is processing SCSI
+commands on a mounted file system is an interesting situation. Reference
+counting logic is being introduced into the mid level to cope with many
+of the issues involved. See the section on reference counting below.
+
+
+The hotplug concept may be extended to SCSI devices. Currently, when an
+HBA is added, the scsi_scan_host() function causes a scan for SCSI devices
+attached to the HBA's SCSI transport. On newer SCSI transports the HBA
+may become aware of a new SCSI device _after_ the scan has completed.
+An LLD can use this sequence to make the mid level aware of a SCSI device:
+
+ SCSI DEVICE hotplug
+LLD mid level LLD
+===-------------------=========--------------------===------
+scsi_add_device() ------+
+ |
+ slave_alloc()
+ slave_configure() [--> scsi_change_queue_depth()]
+------------------------------------------------------------
+
+In a similar fashion, an LLD may become aware that a SCSI device has been
+removed (unplugged) or the connection to it has been interrupted. Some
+existing SCSI transports (e.g. SPI) may not become aware that a SCSI
+device has been removed until a subsequent SCSI command fails which will
+probably cause that device to be set offline by the mid level. An LLD that
+detects the removal of a SCSI device can instigate its removal from
+upper layers with this sequence:
+
+ SCSI DEVICE hot unplug
+LLD mid level LLD
+===----------------------=========-----------------===------
+scsi_remove_device() -------+
+ |
+ slave_destroy()
+------------------------------------------------------------
+
+It may be useful for an LLD to keep track of struct scsi_device instances
+(a pointer is passed as the parameter to slave_alloc() and
+slave_configure() callbacks). Such instances are "owned" by the mid-level.
+struct scsi_device instances are freed after slave_destroy().
+
+
+Passive initialization model
+============================
+These older LLDs include a file called "scsi_module.c" [yes the ".c" is a
+little surprising] in their source code. For that file to work an
+instance of struct scsi_host_template with the name "driver_template"
+needs to be defined. Here is a typical code sequence used in this model:
+ static struct scsi_host_template driver_template = {
+ ...
+ };
+ #include "scsi_module.c"
+
+The scsi_module.c file contains two functions:
+ - init_this_scsi_driver() which is executed when the LLD is
+ initialized (i.e. boot time or module load time)
+ - exit_this_scsi_driver() which is executed when the LLD is shut
+ down (i.e. module unload time)
+Note: since these functions are tagged with __init and __exit qualifiers
+an LLD should not call them explicitly (since the kernel does that).
+
+Here is an example of an initialization sequence when two hosts are
+detected (so detect() returns 2) and the SCSI bus scan on each host
+finds 1 SCSI device (and a second device does not respond).
+
+LLD mid level LLD
+===----------------------=========-----------------===------
+init_this_scsi_driver() ----+
+ |
+ detect() -----------------+
+ | |
+ | scsi_register()
+ | scsi_register()
+ |
+ slave_alloc()
+ slave_configure() --> scsi_change_queue_depth()
+ slave_alloc() ***
+ slave_destroy() ***
+ |
+ slave_alloc()
+ slave_configure()
+ slave_alloc() ***
+ slave_destroy() ***
+------------------------------------------------------------
+
+The mid level invokes scsi_change_queue_depth() with "cmd_per_lun" for that
+host as the queue length. These settings can be overridden by a
+slave_configure() supplied by the LLD.
+
+*** For scsi devices that the mid level tries to scan but do not
+ respond, a slave_alloc(), slave_destroy() pair is called.
+
+Here is an LLD shutdown sequence:
+
+LLD mid level LLD
+===----------------------=========-----------------===------
+exit_this_scsi_driver() ----+
+ |
+ slave_destroy()
+ release() --> scsi_unregister()
+ |
+ slave_destroy()
+ release() --> scsi_unregister()
+------------------------------------------------------------
+
+An LLD need not define slave_destroy() (i.e. it is optional).
+
+The shortcoming of the "passive initialization model" is that host
+registration and de-registration are (typically) tied to LLD initialization
+and shutdown. Once the LLD is initialized then a new host that appears
+(e.g. via hotplugging) cannot easily be added without a redundant
+driver shutdown and re-initialization. It may be possible to write an LLD
+that uses both initialization models.
+
+
+Reference Counting
+==================
+The Scsi_Host structure has had reference counting infrastructure added.
+This effectively spreads the ownership of struct Scsi_Host instances
+across the various SCSI layers which use them. Previously such instances
+were exclusively owned by the mid level. LLDs would not usually need to
+directly manipulate these reference counts but there may be some cases
+where they do.
+
+There are 3 reference counting functions of interest associated with
+struct Scsi_Host:
+ - scsi_host_alloc(): returns a pointer to new instance of struct
+ Scsi_Host which has its reference count ^^ set to 1
+ - scsi_host_get(): adds 1 to the reference count of the given instance
+ - scsi_host_put(): decrements 1 from the reference count of the given
+ instance. If the reference count reaches 0 then the given instance
+ is freed
+
+The Scsi_device structure has had reference counting infrastructure added.
+This effectively spreads the ownership of struct Scsi_device instances
+across the various SCSI layers which use them. Previously such instances
+were exclusively owned by the mid level. See the access functions declared
+towards the end of include/scsi/scsi_device.h . If an LLD wants to keep
+a copy of a pointer to a Scsi_device instance it should use scsi_device_get()
+to bump its reference count. When it is finished with the pointer it can
+use scsi_device_put() to decrement its reference count (and potentially
+delete it).
+
+^^ struct Scsi_Host actually has 2 reference counts which are manipulated
+in parallel by these functions.
+
+
+Conventions
+===========
+First, Linus Torvalds's thoughts on C coding style can be found in the
+Documentation/CodingStyle file.
+
+Next, there is a movement to "outlaw" typedefs introducing synonyms for
+struct tags. Both can be still found in the SCSI subsystem, but
+the typedefs have been moved to a single file, scsi_typedefs.h to
+make their future removal easier, for example:
+"typedef struct scsi_cmnd Scsi_Cmnd;"
+
+Also, most C99 enhancements are encouraged to the extent they are supported
+by the relevant gcc compilers. So C99 style structure and array
+initializers are encouraged where appropriate. Don't go too far,
+VLAs are not properly supported yet. An exception to this is the use of
+"//" style comments; /*...*/ comments are still preferred in Linux.
+
+Well written, tested and documented code, need not be re-formatted to
+comply with the above conventions. For example, the aic7xxx driver
+comes to Linux from FreeBSD and Adaptec's own labs. No doubt FreeBSD
+and Adaptec have their own coding conventions.
+
+
+Mid level supplied functions
+============================
+These functions are supplied by the SCSI mid level for use by LLDs.
+The names (i.e. entry points) of these functions are exported
+so an LLD that is a module can access them. The kernel will
+arrange for the SCSI mid level to be loaded and initialized before any LLD
+is initialized. The functions below are listed alphabetically and their
+names all start with "scsi_".
+
+Summary:
+ scsi_add_device - creates new scsi device (lu) instance
+ scsi_add_host - perform sysfs registration and set up transport class
+ scsi_change_queue_depth - change the queue depth on a SCSI device
+ scsi_bios_ptable - return copy of block device's partition table
+ scsi_block_requests - prevent further commands being queued to given host
+ scsi_host_alloc - return a new scsi_host instance whose refcount==1
+ scsi_host_get - increments Scsi_Host instance's refcount
+ scsi_host_put - decrements Scsi_Host instance's refcount (free if 0)
+ scsi_partsize - parse partition table into cylinders, heads + sectors
+ scsi_register - create and register a scsi host adapter instance.
+ scsi_remove_device - detach and remove a SCSI device
+ scsi_remove_host - detach and remove all SCSI devices owned by host
+ scsi_report_bus_reset - report scsi _bus_ reset observed
+ scsi_scan_host - scan SCSI bus
+ scsi_track_queue_full - track successive QUEUE_FULL events
+ scsi_unblock_requests - allow further commands to be queued to given host
+ scsi_unregister - [calls scsi_host_put()]
+
+
+Details:
+
+/**
+ * scsi_add_device - creates new scsi device (lu) instance
+ * @shost: pointer to scsi host instance
+ * @channel: channel number (rarely other than 0)
+ * @id: target id number
+ * @lun: logical unit number
+ *
+ * Returns pointer to new struct scsi_device instance or
+ * ERR_PTR(-ENODEV) (or some other bent pointer) if something is
+ * wrong (e.g. no lu responds at given address)
+ *
+ * Might block: yes
+ *
+ * Notes: This call is usually performed internally during a scsi
+ * bus scan when an HBA is added (i.e. scsi_scan_host()). So it
+ * should only be called if the HBA becomes aware of a new scsi
+ * device (lu) after scsi_scan_host() has completed. If successful
+ * this call can lead to slave_alloc() and slave_configure() callbacks
+ * into the LLD.
+ *
+ * Defined in: drivers/scsi/scsi_scan.c
+ **/
+struct scsi_device * scsi_add_device(struct Scsi_Host *shost,
+ unsigned int channel,
+ unsigned int id, unsigned int lun)
+
+
+/**
+ * scsi_add_host - perform sysfs registration and set up transport class
+ * @shost: pointer to scsi host instance
+ * @dev: pointer to struct device of type scsi class
+ *
+ * Returns 0 on success, negative errno of failure (e.g. -ENOMEM)
+ *
+ * Might block: no
+ *
+ * Notes: Only required in "hotplug initialization model" after a
+ * successful call to scsi_host_alloc(). This function does not
+ * scan the bus; this can be done by calling scsi_scan_host() or
+ * in some other transport-specific way. The LLD must set up
+ * the transport template before calling this function and may only
+ * access the transport class data after this function has been called.
+ *
+ * Defined in: drivers/scsi/hosts.c
+ **/
+int scsi_add_host(struct Scsi_Host *shost, struct device * dev)
+
+
+/**
+ * scsi_change_queue_depth - allow LLD to change queue depth on a SCSI device
+ * @sdev: pointer to SCSI device to change queue depth on
+ * @tags Number of tags allowed if tagged queuing enabled,
+ * or number of commands the LLD can queue up
+ * in non-tagged mode (as per cmd_per_lun).
+ *
+ * Returns nothing
+ *
+ * Might block: no
+ *
+ * Notes: Can be invoked any time on a SCSI device controlled by this
+ * LLD. [Specifically during and after slave_configure() and prior to
+ * slave_destroy().] Can safely be invoked from interrupt code.
+ *
+ * Defined in: drivers/scsi/scsi.c [see source code for more notes]
+ *
+ **/
+int scsi_change_queue_depth(struct scsi_device *sdev, int tags)
+
+
+/**
+ * scsi_bios_ptable - return copy of block device's partition table
+ * @dev: pointer to block device
+ *
+ * Returns pointer to partition table, or NULL for failure
+ *
+ * Might block: yes
+ *
+ * Notes: Caller owns memory returned (free with kfree() )
+ *
+ * Defined in: drivers/scsi/scsicam.c
+ **/
+unsigned char *scsi_bios_ptable(struct block_device *dev)
+
+
+/**
+ * scsi_block_requests - prevent further commands being queued to given host
+ *
+ * @shost: pointer to host to block commands on
+ *
+ * Returns nothing
+ *
+ * Might block: no
+ *
+ * Notes: There is no timer nor any other means by which the requests
+ * get unblocked other than the LLD calling scsi_unblock_requests().
+ *
+ * Defined in: drivers/scsi/scsi_lib.c
+**/
+void scsi_block_requests(struct Scsi_Host * shost)
+
+
+/**
+ * scsi_host_alloc - create a scsi host adapter instance and perform basic
+ * initialization.
+ * @sht: pointer to scsi host template
+ * @privsize: extra bytes to allocate in hostdata array (which is the
+ * last member of the returned Scsi_Host instance)
+ *
+ * Returns pointer to new Scsi_Host instance or NULL on failure
+ *
+ * Might block: yes
+ *
+ * Notes: When this call returns to the LLD, the SCSI bus scan on
+ * this host has _not_ yet been done.
+ * The hostdata array (by default zero length) is a per host scratch
+ * area for the LLD's exclusive use.
+ * Both associated refcounting objects have their refcount set to 1.
+ * Full registration (in sysfs) and a bus scan are performed later when
+ * scsi_add_host() and scsi_scan_host() are called.
+ *
+ * Defined in: drivers/scsi/hosts.c .
+ **/
+struct Scsi_Host * scsi_host_alloc(struct scsi_host_template * sht,
+ int privsize)
+
+
+/**
+ * scsi_host_get - increment Scsi_Host instance refcount
+ * @shost: pointer to struct Scsi_Host instance
+ *
+ * Returns nothing
+ *
+ * Might block: currently may block but may be changed to not block
+ *
+ * Notes: Actually increments the counts in two sub-objects
+ *
+ * Defined in: drivers/scsi/hosts.c
+ **/
+void scsi_host_get(struct Scsi_Host *shost)
+
+
+/**
+ * scsi_host_put - decrement Scsi_Host instance refcount, free if 0
+ * @shost: pointer to struct Scsi_Host instance
+ *
+ * Returns nothing
+ *
+ * Might block: currently may block but may be changed to not block
+ *
+ * Notes: Actually decrements the counts in two sub-objects. If the
+ * latter refcount reaches 0, the Scsi_Host instance is freed.
+ * The LLD need not worry exactly when the Scsi_Host instance is
+ * freed, it just shouldn't access the instance after it has balanced
+ * out its refcount usage.
+ *
+ * Defined in: drivers/scsi/hosts.c
+ **/
+void scsi_host_put(struct Scsi_Host *shost)
+
+
+/**
+ * scsi_partsize - parse partition table into cylinders, heads + sectors
+ * @buf: pointer to partition table
+ * @capacity: size of (total) disk in 512 byte sectors
+ * @cyls: outputs number of cylinders calculated via this pointer
+ * @hds: outputs number of heads calculated via this pointer
+ * @secs: outputs number of sectors calculated via this pointer
+ *
+ * Returns 0 on success, -1 on failure
+ *
+ * Might block: no
+ *
+ * Notes: Caller owns memory returned (free with kfree() )
+ *
+ * Defined in: drivers/scsi/scsicam.c
+ **/
+int scsi_partsize(unsigned char *buf, unsigned long capacity,
+ unsigned int *cyls, unsigned int *hds, unsigned int *secs)
+
+
+/**
+ * scsi_register - create and register a scsi host adapter instance.
+ * @sht: pointer to scsi host template
+ * @privsize: extra bytes to allocate in hostdata array (which is the
+ * last member of the returned Scsi_Host instance)
+ *
+ * Returns pointer to new Scsi_Host instance or NULL on failure
+ *
+ * Might block: yes
+ *
+ * Notes: When this call returns to the LLD, the SCSI bus scan on
+ * this host has _not_ yet been done.
+ * The hostdata array (by default zero length) is a per host scratch
+ * area for the LLD.
+ *
+ * Defined in: drivers/scsi/hosts.c .
+ **/
+struct Scsi_Host * scsi_register(struct scsi_host_template * sht,
+ int privsize)
+
+
+/**
+ * scsi_remove_device - detach and remove a SCSI device
+ * @sdev: a pointer to a scsi device instance
+ *
+ * Returns value: 0 on success, -EINVAL if device not attached
+ *
+ * Might block: yes
+ *
+ * Notes: If an LLD becomes aware that a scsi device (lu) has
+ * been removed but its host is still present then it can request
+ * the removal of that scsi device. If successful this call will
+ * lead to the slave_destroy() callback being invoked. sdev is an
+ * invalid pointer after this call.
+ *
+ * Defined in: drivers/scsi/scsi_sysfs.c .
+ **/
+int scsi_remove_device(struct scsi_device *sdev)
+
+
+/**
+ * scsi_remove_host - detach and remove all SCSI devices owned by host
+ * @shost: a pointer to a scsi host instance
+ *
+ * Returns value: 0 on success, 1 on failure (e.g. LLD busy ??)
+ *
+ * Might block: yes
+ *
+ * Notes: Should only be invoked if the "hotplug initialization
+ * model" is being used. It should be called _prior_ to
+ * scsi_unregister().
+ *
+ * Defined in: drivers/scsi/hosts.c .
+ **/
+int scsi_remove_host(struct Scsi_Host *shost)
+
+
+/**
+ * scsi_report_bus_reset - report scsi _bus_ reset observed
+ * @shost: a pointer to a scsi host involved
+ * @channel: channel (within) host on which scsi bus reset occurred
+ *
+ * Returns nothing
+ *
+ * Might block: no
+ *
+ * Notes: This only needs to be called if the reset is one which
+ * originates from an unknown location. Resets originated by the
+ * mid level itself don't need to call this, but there should be
+ * no harm. The main purpose of this is to make sure that a
+ * CHECK_CONDITION is properly treated.
+ *
+ * Defined in: drivers/scsi/scsi_error.c .
+ **/
+void scsi_report_bus_reset(struct Scsi_Host * shost, int channel)
+
+
+/**
+ * scsi_scan_host - scan SCSI bus
+ * @shost: a pointer to a scsi host instance
+ *
+ * Might block: yes
+ *
+ * Notes: Should be called after scsi_add_host()
+ *
+ * Defined in: drivers/scsi/scsi_scan.c
+ **/
+void scsi_scan_host(struct Scsi_Host *shost)
+
+
+/**
+ * scsi_track_queue_full - track successive QUEUE_FULL events on given
+ * device to determine if and when there is a need
+ * to adjust the queue depth on the device.
+ * @sdev: pointer to SCSI device instance
+ * @depth: Current number of outstanding SCSI commands on this device,
+ * not counting the one returned as QUEUE_FULL.
+ *
+ * Returns 0 - no change needed
+ * >0 - adjust queue depth to this new depth
+ * -1 - drop back to untagged operation using host->cmd_per_lun
+ * as the untagged command depth
+ *
+ * Might block: no
+ *
+ * Notes: LLDs may call this at any time and we will do "The Right
+ * Thing"; interrupt context safe.
+ *
+ * Defined in: drivers/scsi/scsi.c .
+ **/
+int scsi_track_queue_full(struct scsi_device *sdev, int depth)
+
+
+/**
+ * scsi_unblock_requests - allow further commands to be queued to given host
+ *
+ * @shost: pointer to host to unblock commands on
+ *
+ * Returns nothing
+ *
+ * Might block: no
+ *
+ * Defined in: drivers/scsi/scsi_lib.c .
+**/
+void scsi_unblock_requests(struct Scsi_Host * shost)
+
+
+/**
+ * scsi_unregister - unregister and free memory used by host instance
+ * @shp: pointer to scsi host instance to unregister.
+ *
+ * Returns nothing
+ *
+ * Might block: no
+ *
+ * Notes: Should not be invoked if the "hotplug initialization
+ * model" is being used. Called internally by exit_this_scsi_driver()
+ * in the "passive initialization model". Hence a LLD has no need to
+ * call this function directly.
+ *
+ * Defined in: drivers/scsi/hosts.c .
+ **/
+void scsi_unregister(struct Scsi_Host * shp)
+
+
+
+
+Interface Functions
+===================
+Interface functions are supplied (defined) by LLDs and their function
+pointers are placed in an instance of struct scsi_host_template which
+is passed to scsi_host_alloc() [or scsi_register() / init_this_scsi_driver()].
+Some are mandatory. Interface functions should be declared static. The
+accepted convention is that driver "xyz" will declare its slave_configure()
+function as:
+ static int xyz_slave_configure(struct scsi_device * sdev);
+and so forth for all interface functions listed below.
+
+A pointer to this function should be placed in the 'slave_configure' member
+of a "struct scsi_host_template" instance. A pointer to such an instance
+should be passed to the mid level's scsi_host_alloc() [or scsi_register() /
+init_this_scsi_driver()].
+
+The interface functions are also described in the include/scsi/scsi_host.h
+file immediately above their definition point in "struct scsi_host_template".
+In some cases more detail is given in scsi_host.h than below.
+
+The interface functions are listed below in alphabetical order.
+
+Summary:
+ bios_param - fetch head, sector, cylinder info for a disk
+ detect - detects HBAs this driver wants to control
+ eh_timed_out - notify the host that a command timer expired
+ eh_abort_handler - abort given command
+ eh_bus_reset_handler - issue SCSI bus reset
+ eh_device_reset_handler - issue SCSI device reset
+ eh_host_reset_handler - reset host (host bus adapter)
+ info - supply information about given host
+ ioctl - driver can respond to ioctls
+ proc_info - supports /proc/scsi/{driver_name}/{host_no}
+ queuecommand - queue scsi command, invoke 'done' on completion
+ release - release all resources associated with given host
+ slave_alloc - prior to any commands being sent to a new device
+ slave_configure - driver fine tuning for given device after attach
+ slave_destroy - given device is about to be shut down
+
+
+Details:
+
+/**
+ * bios_param - fetch head, sector, cylinder info for a disk
+ * @sdev: pointer to scsi device context (defined in
+ * include/scsi/scsi_device.h)
+ * @bdev: pointer to block device context (defined in fs.h)
+ * @capacity: device size (in 512 byte sectors)
+ * @params: three element array to place output:
+ * params[0] number of heads (max 255)
+ * params[1] number of sectors (max 63)
+ * params[2] number of cylinders
+ *
+ * Return value is ignored
+ *
+ * Locks: none
+ *
+ * Calling context: process (sd)
+ *
+ * Notes: an arbitrary geometry (based on READ CAPACITY) is used
+ * if this function is not provided. The params array is
+ * pre-initialized with made up values just in case this function
+ * doesn't output anything.
+ *
+ * Optionally defined in: LLD
+ **/
+ int bios_param(struct scsi_device * sdev, struct block_device *bdev,
+ sector_t capacity, int params[3])
+
+
+/**
+ * detect - detects HBAs this driver wants to control
+ * @shtp: host template for this driver.
+ *
+ * Returns number of hosts this driver wants to control. 0 means no
+ * suitable hosts found.
+ *
+ * Locks: none held
+ *
+ * Calling context: process [invoked from init_this_scsi_driver()]
+ *
+ * Notes: First function called from the SCSI mid level on this
+ * driver. Upper level drivers (e.g. sd) may not (yet) be present.
+ * For each host found, this method should call scsi_register()
+ * [see hosts.c].
+ *
+ * Defined in: LLD (required if "passive initialization mode" is used,
+ * not invoked in "hotplug initialization mode")
+ **/
+ int detect(struct scsi_host_template * shtp)
+
+
+/**
+ * eh_timed_out - The timer for the command has just fired
+ * @scp: identifies command timing out
+ *
+ * Returns:
+ *
+ * EH_HANDLED: I fixed the error, please complete the command
+ * EH_RESET_TIMER: I need more time, reset the timer and
+ * begin counting again
+ * EH_NOT_HANDLED Begin normal error recovery
+ *
+ *
+ * Locks: None held
+ *
+ * Calling context: interrupt
+ *
+ * Notes: This is to give the LLD an opportunity to do local recovery.
+ * This recovery is limited to determining if the outstanding command
+ * will ever complete. You may not abort and restart the command from
+ * this callback.
+ *
+ * Optionally defined in: LLD
+ **/
+ int eh_timed_out(struct scsi_cmnd * scp)
+
+
+/**
+ * eh_abort_handler - abort command associated with scp
+ * @scp: identifies command to be aborted
+ *
+ * Returns SUCCESS if command aborted else FAILED
+ *
+ * Locks: None held
+ *
+ * Calling context: kernel thread
+ *
+ * Notes: If 'no_async_abort' is defined this callback
+ * will be invoked from scsi_eh thread. No other commands
+ * will then be queued on current host during eh.
+ * Otherwise it will be called whenever scsi_times_out()
+ * is called due to a command timeout.
+ *
+ * Optionally defined in: LLD
+ **/
+ int eh_abort_handler(struct scsi_cmnd * scp)
+
+
+/**
+ * eh_bus_reset_handler - issue SCSI bus reset
+ * @scp: SCSI bus that contains this device should be reset
+ *
+ * Returns SUCCESS if command aborted else FAILED
+ *
+ * Locks: None held
+ *
+ * Calling context: kernel thread
+ *
+ * Notes: Invoked from scsi_eh thread. No other commands will be
+ * queued on current host during eh.
+ *
+ * Optionally defined in: LLD
+ **/
+ int eh_bus_reset_handler(struct scsi_cmnd * scp)
+
+
+/**
+ * eh_device_reset_handler - issue SCSI device reset
+ * @scp: identifies SCSI device to be reset
+ *
+ * Returns SUCCESS if command aborted else FAILED
+ *
+ * Locks: None held
+ *
+ * Calling context: kernel thread
+ *
+ * Notes: Invoked from scsi_eh thread. No other commands will be
+ * queued on current host during eh.
+ *
+ * Optionally defined in: LLD
+ **/
+ int eh_device_reset_handler(struct scsi_cmnd * scp)
+
+
+/**
+ * eh_host_reset_handler - reset host (host bus adapter)
+ * @scp: SCSI host that contains this device should be reset
+ *
+ * Returns SUCCESS if command aborted else FAILED
+ *
+ * Locks: None held
+ *
+ * Calling context: kernel thread
+ *
+ * Notes: Invoked from scsi_eh thread. No other commands will be
+ * queued on current host during eh.
+ * With the default eh_strategy in place, if none of the _abort_,
+ * _device_reset_, _bus_reset_ or this eh handler function are
+ * defined (or they all return FAILED) then the device in question
+ * will be set offline whenever eh is invoked.
+ *
+ * Optionally defined in: LLD
+ **/
+ int eh_host_reset_handler(struct scsi_cmnd * scp)
+
+
+/**
+ * info - supply information about given host: driver name plus data
+ * to distinguish given host
+ * @shp: host to supply information about
+ *
+ * Return ASCII null terminated string. [This driver is assumed to
+ * manage the memory pointed to and maintain it, typically for the
+ * lifetime of this host.]
+ *
+ * Locks: none
+ *
+ * Calling context: process
+ *
+ * Notes: Often supplies PCI or ISA information such as IO addresses
+ * and interrupt numbers. If not supplied struct Scsi_Host::name used
+ * instead. It is assumed the returned information fits on one line
+ * (i.e. does not included embedded newlines).
+ * The SCSI_IOCTL_PROBE_HOST ioctl yields the string returned by this
+ * function (or struct Scsi_Host::name if this function is not
+ * available).
+ * In a similar manner, init_this_scsi_driver() outputs to the console
+ * each host's "info" (or name) for the driver it is registering.
+ * Also if proc_info() is not supplied, the output of this function
+ * is used instead.
+ *
+ * Optionally defined in: LLD
+ **/
+ const char * info(struct Scsi_Host * shp)
+
+
+/**
+ * ioctl - driver can respond to ioctls
+ * @sdp: device that ioctl was issued for
+ * @cmd: ioctl number
+ * @arg: pointer to read or write data from. Since it points to
+ * user space, should use appropriate kernel functions
+ * (e.g. copy_from_user() ). In the Unix style this argument
+ * can also be viewed as an unsigned long.
+ *
+ * Returns negative "errno" value when there is a problem. 0 or a
+ * positive value indicates success and is returned to the user space.
+ *
+ * Locks: none
+ *
+ * Calling context: process
+ *
+ * Notes: The SCSI subsystem uses a "trickle down" ioctl model.
+ * The user issues an ioctl() against an upper level driver
+ * (e.g. /dev/sdc) and if the upper level driver doesn't recognize
+ * the 'cmd' then it is passed to the SCSI mid level. If the SCSI
+ * mid level does not recognize it, then the LLD that controls
+ * the device receives the ioctl. According to recent Unix standards
+ * unsupported ioctl() 'cmd' numbers should return -ENOTTY.
+ *
+ * Optionally defined in: LLD
+ **/
+ int ioctl(struct scsi_device *sdp, int cmd, void *arg)
+
+
+/**
+ * proc_info - supports /proc/scsi/{driver_name}/{host_no}
+ * @buffer: anchor point to output to (0==writeto1_read0) or fetch from
+ * (1==writeto1_read0).
+ * @start: where "interesting" data is written to. Ignored when
+ * 1==writeto1_read0.
+ * @offset: offset within buffer 0==writeto1_read0 is actually
+ * interested in. Ignored when 1==writeto1_read0 .
+ * @length: maximum (or actual) extent of buffer
+ * @host_no: host number of interest (struct Scsi_Host::host_no)
+ * @writeto1_read0: 1 -> data coming from user space towards driver
+ * (e.g. "echo some_string > /proc/scsi/xyz/2")
+ * 0 -> user what data from this driver
+ * (e.g. "cat /proc/scsi/xyz/2")
+ *
+ * Returns length when 1==writeto1_read0. Otherwise number of chars
+ * output to buffer past offset.
+ *
+ * Locks: none held
+ *
+ * Calling context: process
+ *
+ * Notes: Driven from scsi_proc.c which interfaces to proc_fs. proc_fs
+ * support can now be configured out of the scsi subsystem.
+ *
+ * Optionally defined in: LLD
+ **/
+ int proc_info(char * buffer, char ** start, off_t offset,
+ int length, int host_no, int writeto1_read0)
+
+
+/**
+ * queuecommand - queue scsi command, invoke scp->scsi_done on completion
+ * @shost: pointer to the scsi host object
+ * @scp: pointer to scsi command object
+ *
+ * Returns 0 on success.
+ *
+ * If there's a failure, return either:
+ *
+ * SCSI_MLQUEUE_DEVICE_BUSY if the device queue is full, or
+ * SCSI_MLQUEUE_HOST_BUSY if the entire host queue is full
+ *
+ * On both of these returns, the mid-layer will requeue the I/O
+ *
+ * - if the return is SCSI_MLQUEUE_DEVICE_BUSY, only that particular
+ * device will be paused, and it will be unpaused when a command to
+ * the device returns (or after a brief delay if there are no more
+ * outstanding commands to it). Commands to other devices continue
+ * to be processed normally.
+ *
+ * - if the return is SCSI_MLQUEUE_HOST_BUSY, all I/O to the host
+ * is paused and will be unpaused when any command returns from
+ * the host (or after a brief delay if there are no outstanding
+ * commands to the host).
+ *
+ * For compatibility with earlier versions of queuecommand, any
+ * other return value is treated the same as
+ * SCSI_MLQUEUE_HOST_BUSY.
+ *
+ * Other types of errors that are detected immediately may be
+ * flagged by setting scp->result to an appropriate value,
+ * invoking the scp->scsi_done callback, and then returning 0
+ * from this function. If the command is not performed
+ * immediately (and the LLD is starting (or will start) the given
+ * command) then this function should place 0 in scp->result and
+ * return 0.
+ *
+ * Command ownership. If the driver returns zero, it owns the
+ * command and must take responsibility for ensuring the
+ * scp->scsi_done callback is executed. Note: the driver may
+ * call scp->scsi_done before returning zero, but after it has
+ * called scp->scsi_done, it may not return any value other than
+ * zero. If the driver makes a non-zero return, it must not
+ * execute the command's scsi_done callback at any time.
+ *
+ * Locks: up to and including 2.6.36, struct Scsi_Host::host_lock
+ * held on entry (with "irqsave") and is expected to be
+ * held on return. From 2.6.37 onwards, queuecommand is
+ * called without any locks held.
+ *
+ * Calling context: in interrupt (soft irq) or process context
+ *
+ * Notes: This function should be relatively fast. Normally it
+ * will not wait for IO to complete. Hence the scp->scsi_done
+ * callback is invoked (often directly from an interrupt service
+ * routine) some time after this function has returned. In some
+ * cases (e.g. pseudo adapter drivers that manufacture the
+ * response to a SCSI INQUIRY) the scp->scsi_done callback may be
+ * invoked before this function returns. If the scp->scsi_done
+ * callback is not invoked within a certain period the SCSI mid
+ * level will commence error processing. If a status of CHECK
+ * CONDITION is placed in "result" when the scp->scsi_done
+ * callback is invoked, then the LLD driver should perform
+ * autosense and fill in the struct scsi_cmnd::sense_buffer
+ * array. The scsi_cmnd::sense_buffer array is zeroed prior to
+ * the mid level queuing a command to an LLD.
+ *
+ * Defined in: LLD
+ **/
+ int queuecommand(struct Scsi_Host *shost, struct scsi_cmnd * scp)
+
+
+/**
+ * release - release all resources associated with given host
+ * @shp: host to be released.
+ *
+ * Return value ignored (could soon be a function returning void).
+ *
+ * Locks: none held
+ *
+ * Calling context: process
+ *
+ * Notes: Invoked from scsi_module.c's exit_this_scsi_driver().
+ * LLD's implementation of this function should call
+ * scsi_unregister(shp) prior to returning.
+ * Only needed for old-style host templates.
+ *
+ * Defined in: LLD (required in "passive initialization model",
+ * should not be defined in hotplug model)
+ **/
+ int release(struct Scsi_Host * shp)
+
+
+/**
+ * slave_alloc - prior to any commands being sent to a new device
+ * (i.e. just prior to scan) this call is made
+ * @sdp: pointer to new device (about to be scanned)
+ *
+ * Returns 0 if ok. Any other return is assumed to be an error and
+ * the device is ignored.
+ *
+ * Locks: none
+ *
+ * Calling context: process
+ *
+ * Notes: Allows the driver to allocate any resources for a device
+ * prior to its initial scan. The corresponding scsi device may not
+ * exist but the mid level is just about to scan for it (i.e. send
+ * and INQUIRY command plus ...). If a device is found then
+ * slave_configure() will be called while if a device is not found
+ * slave_destroy() is called.
+ * For more details see the include/scsi/scsi_host.h file.
+ *
+ * Optionally defined in: LLD
+ **/
+ int slave_alloc(struct scsi_device *sdp)
+
+
+/**
+ * slave_configure - driver fine tuning for given device just after it
+ * has been first scanned (i.e. it responded to an
+ * INQUIRY)
+ * @sdp: device that has just been attached
+ *
+ * Returns 0 if ok. Any other return is assumed to be an error and
+ * the device is taken offline. [offline devices will _not_ have
+ * slave_destroy() called on them so clean up resources.]
+ *
+ * Locks: none
+ *
+ * Calling context: process
+ *
+ * Notes: Allows the driver to inspect the response to the initial
+ * INQUIRY done by the scanning code and take appropriate action.
+ * For more details see the include/scsi/scsi_host.h file.
+ *
+ * Optionally defined in: LLD
+ **/
+ int slave_configure(struct scsi_device *sdp)
+
+
+/**
+ * slave_destroy - given device is about to be shut down. All
+ * activity has ceased on this device.
+ * @sdp: device that is about to be shut down
+ *
+ * Returns nothing
+ *
+ * Locks: none
+ *
+ * Calling context: process
+ *
+ * Notes: Mid level structures for given device are still in place
+ * but are about to be torn down. Any per device resources allocated
+ * by this driver for given device should be freed now. No further
+ * commands will be sent for this sdp instance. [However the device
+ * could be re-attached in the future in which case a new instance
+ * of struct scsi_device would be supplied by future slave_alloc()
+ * and slave_configure() calls.]
+ *
+ * Optionally defined in: LLD
+ **/
+ void slave_destroy(struct scsi_device *sdp)
+
+
+
+Data Structures
+===============
+struct scsi_host_template
+-------------------------
+There is one "struct scsi_host_template" instance per LLD ***. It is
+typically initialized as a file scope static in a driver's header file. That
+way members that are not explicitly initialized will be set to 0 or NULL.
+Member of interest:
+ name - name of driver (may contain spaces, please limit to
+ less than 80 characters)
+ proc_name - name used in "/proc/scsi/<proc_name>/<host_no>" and
+ by sysfs in one of its "drivers" directories. Hence
+ "proc_name" should only contain characters acceptable
+ to a Unix file name.
+ (*queuecommand)() - primary callback that the mid level uses to inject
+ SCSI commands into an LLD.
+The structure is defined and commented in include/scsi/scsi_host.h
+
+*** In extreme situations a single driver may have several instances
+ if it controls several different classes of hardware (e.g. an LLD
+ that handles both ISA and PCI cards and has a separate instance of
+ struct scsi_host_template for each class).
+
+struct Scsi_Host
+----------------
+There is one struct Scsi_Host instance per host (HBA) that an LLD
+controls. The struct Scsi_Host structure has many members in common
+with "struct scsi_host_template". When a new struct Scsi_Host instance
+is created (in scsi_host_alloc() in hosts.c) those common members are
+initialized from the driver's struct scsi_host_template instance. Members
+of interest:
+ host_no - system wide unique number that is used for identifying
+ this host. Issued in ascending order from 0.
+ can_queue - must be greater than 0; do not send more than can_queue
+ commands to the adapter.
+ this_id - scsi id of host (scsi initiator) or -1 if not known
+ sg_tablesize - maximum scatter gather elements allowed by host.
+ 0 implies scatter gather not supported by host
+ max_sectors - maximum number of sectors (usually 512 bytes) allowed
+ in a single SCSI command. The default value of 0 leads
+ to a setting of SCSI_DEFAULT_MAX_SECTORS (defined in
+ scsi_host.h) which is currently set to 1024. So for a
+ disk the maximum transfer size is 512 KB when max_sectors
+ is not defined. Note that this size may not be sufficient
+ for disk firmware uploads.
+ cmd_per_lun - maximum number of commands that can be queued on devices
+ controlled by the host. Overridden by LLD calls to
+ scsi_change_queue_depth().
+ unchecked_isa_dma - 1=>only use bottom 16 MB of ram (ISA DMA addressing
+ restriction), 0=>can use full 32 bit (or better) DMA
+ address space
+ use_clustering - 1=>SCSI commands in mid level's queue can be merged,
+ 0=>disallow SCSI command merging
+ no_async_abort - 1=>Asynchronous aborts are not supported
+ 0=>Timed-out commands will be aborted asynchronously
+ hostt - pointer to driver's struct scsi_host_template from which
+ this struct Scsi_Host instance was spawned
+ hostt->proc_name - name of LLD. This is the driver name that sysfs uses
+ transportt - pointer to driver's struct scsi_transport_template instance
+ (if any). FC and SPI transports currently supported.
+ sh_list - a double linked list of pointers to all struct Scsi_Host
+ instances (currently ordered by ascending host_no)
+ my_devices - a double linked list of pointers to struct scsi_device
+ instances that belong to this host.
+ hostdata[0] - area reserved for LLD at end of struct Scsi_Host. Size
+ is set by the second argument (named 'xtr_bytes') to
+ scsi_host_alloc() or scsi_register().
+ vendor_id - a unique value that identifies the vendor supplying
+ the LLD for the Scsi_Host. Used most often in validating
+ vendor-specific message requests. Value consists of an
+ identifier type and a vendor-specific value.
+ See scsi_netlink.h for a description of valid formats.
+
+The scsi_host structure is defined in include/scsi/scsi_host.h
+
+struct scsi_device
+------------------
+Generally, there is one instance of this structure for each SCSI logical unit
+on a host. Scsi devices connected to a host are uniquely identified by a
+channel number, target id and logical unit number (lun).
+The structure is defined in include/scsi/scsi_device.h
+
+struct scsi_cmnd
+----------------
+Instances of this structure convey SCSI commands to the LLD and responses
+back to the mid level. The SCSI mid level will ensure that no more SCSI
+commands become queued against the LLD than are indicated by
+scsi_change_queue_depth() (or struct Scsi_Host::cmd_per_lun). There will
+be at least one instance of struct scsi_cmnd available for each SCSI device.
+Members of interest:
+ cmnd - array containing SCSI command
+ cmnd_len - length (in bytes) of SCSI command
+ sc_data_direction - direction of data transfer in data phase. See
+ "enum dma_data_direction" in include/linux/dma-mapping.h
+ request_bufflen - number of data bytes to transfer (0 if no data phase)
+ use_sg - ==0 -> no scatter gather list, hence transfer data
+ to/from request_buffer
+ - >0 -> scatter gather list (actually an array) in
+ request_buffer with use_sg elements
+ request_buffer - either contains data buffer or scatter gather list
+ depending on the setting of use_sg. Scatter gather
+ elements are defined by 'struct scatterlist' found
+ in include/asm/scatterlist.h .
+ done - function pointer that should be invoked by LLD when the
+ SCSI command is completed (successfully or otherwise).
+ Should only be called by an LLD if the LLD has accepted
+ the command (i.e. queuecommand() returned or will return
+ 0). The LLD may invoke 'done' prior to queuecommand()
+ finishing.
+ result - should be set by LLD prior to calling 'done'. A value
+ of 0 implies a successfully completed command (and all
+ data (if any) has been transferred to or from the SCSI
+ target device). 'result' is a 32 bit unsigned integer that
+ can be viewed as 4 related bytes. The SCSI status value is
+ in the LSB. See include/scsi/scsi.h status_byte(),
+ msg_byte(), host_byte() and driver_byte() macros and
+ related constants.
+ sense_buffer - an array (maximum size: SCSI_SENSE_BUFFERSIZE bytes) that
+ should be written when the SCSI status (LSB of 'result')
+ is set to CHECK_CONDITION (2). When CHECK_CONDITION is
+ set, if the top nibble of sense_buffer[0] has the value 7
+ then the mid level will assume the sense_buffer array
+ contains a valid SCSI sense buffer; otherwise the mid
+ level will issue a REQUEST_SENSE SCSI command to
+ retrieve the sense buffer. The latter strategy is error
+ prone in the presence of command queuing so the LLD should
+ always "auto-sense".
+ device - pointer to scsi_device object that this command is
+ associated with.
+ resid - an LLD should set this signed integer to the requested
+ transfer length (i.e. 'request_bufflen') less the number
+ of bytes that are actually transferred. 'resid' is
+ preset to 0 so an LLD can ignore it if it cannot detect
+ underruns (overruns should be rare). If possible an LLD
+ should set 'resid' prior to invoking 'done'. The most
+ interesting case is data transfers from a SCSI target
+ device (e.g. READs) that underrun.
+ underflow - LLD should place (DID_ERROR << 16) in 'result' if
+ actual number of bytes transferred is less than this
+ figure. Not many LLDs implement this check and some that
+ do just output an error message to the log rather than
+ report a DID_ERROR. Better for an LLD to implement
+ 'resid'.
+
+It is recommended that a LLD set 'resid' on data transfers from a SCSI
+target device (e.g. READs). It is especially important that 'resid' is set
+when such data transfers have sense keys of MEDIUM ERROR and HARDWARE ERROR
+(and possibly RECOVERED ERROR). In these cases if a LLD is in doubt how much
+data has been received then the safest approach is to indicate no bytes have
+been received. For example: to indicate that no valid data has been received
+a LLD might use these helpers:
+ scsi_set_resid(SCpnt, scsi_bufflen(SCpnt));
+where 'SCpnt' is a pointer to a scsi_cmnd object. To indicate only three 512
+bytes blocks has been received 'resid' could be set like this:
+ scsi_set_resid(SCpnt, scsi_bufflen(SCpnt) - (3 * 512));
+
+The scsi_cmnd structure is defined in include/scsi/scsi_cmnd.h
+
+
+Locks
+=====
+Each struct Scsi_Host instance has a spin_lock called struct
+Scsi_Host::default_lock which is initialized in scsi_host_alloc() [found in
+hosts.c]. Within the same function the struct Scsi_Host::host_lock pointer
+is initialized to point at default_lock. Thereafter lock and unlock
+operations performed by the mid level use the struct Scsi_Host::host_lock
+pointer. Previously drivers could override the host_lock pointer but
+this is not allowed anymore.
+
+
+Autosense
+=========
+Autosense (or auto-sense) is defined in the SAM-2 document as "the
+automatic return of sense data to the application client coincident
+with the completion of a SCSI command" when a status of CHECK CONDITION
+occurs. LLDs should perform autosense. This should be done when the LLD
+detects a CHECK CONDITION status by either:
+ a) instructing the SCSI protocol (e.g. SCSI Parallel Interface (SPI))
+ to perform an extra data in phase on such responses
+ b) or, the LLD issuing a REQUEST SENSE command itself
+
+Either way, when a status of CHECK CONDITION is detected, the mid level
+decides whether the LLD has performed autosense by checking struct
+scsi_cmnd::sense_buffer[0] . If this byte has an upper nibble of 7 (or 0xf)
+then autosense is assumed to have taken place. If it has another value (and
+this byte is initialized to 0 before each command) then the mid level will
+issue a REQUEST SENSE command.
+
+In the presence of queued commands the "nexus" that maintains sense
+buffer data from the command that failed until a following REQUEST SENSE
+may get out of synchronization. This is why it is best for the LLD
+to perform autosense.
+
+
+Changes since lk 2.4 series
+===========================
+io_request_lock has been replaced by several finer grained locks. The lock
+relevant to LLDs is struct Scsi_Host::host_lock and there is
+one per SCSI host.
+
+The older error handling mechanism has been removed. This means the
+LLD interface functions abort() and reset() have been removed.
+The struct scsi_host_template::use_new_eh_code flag has been removed.
+
+In the 2.4 series the SCSI subsystem configuration descriptions were
+aggregated with the configuration descriptions from all other Linux
+subsystems in the Documentation/Configure.help file. In the 2.6 series,
+the SCSI subsystem now has its own (much smaller) drivers/scsi/Kconfig
+file that contains both configuration and help information.
+
+struct SHT has been renamed to struct scsi_host_template.
+
+Addition of the "hotplug initialization model" and many extra functions
+to support it.
+
+
+Credits
+=======
+The following people have contributed to this document:
+ Mike Anderson <andmike at us dot ibm dot com>
+ James Bottomley <James dot Bottomley at hansenpartnership dot com>
+ Patrick Mansfield <patmans at us dot ibm dot com>
+ Christoph Hellwig <hch at infradead dot org>
+ Doug Ledford <dledford at redhat dot com>
+ Andries Brouwer <Andries dot Brouwer at cwi dot nl>
+ Randy Dunlap <rdunlap at xenotime dot net>
+ Alan Stern <stern at rowland dot harvard dot edu>
+
+
+Douglas Gilbert
+dgilbert at interlog dot com
+21st September 2004
diff --git a/Documentation/scsi/scsi_transport_srp/Makefile b/Documentation/scsi/scsi_transport_srp/Makefile
new file mode 100644
index 000000000..5f6b567e9
--- /dev/null
+++ b/Documentation/scsi/scsi_transport_srp/Makefile
@@ -0,0 +1,7 @@
+all: rport_state_diagram.svg rport_state_diagram.png
+
+rport_state_diagram.svg: rport_state_diagram.dot
+ dot -Tsvg -o $@ $<
+
+rport_state_diagram.png: rport_state_diagram.dot
+ dot -Tpng -o $@ $<
diff --git a/Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot b/Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot
new file mode 100644
index 000000000..75d610d64
--- /dev/null
+++ b/Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot
@@ -0,0 +1,26 @@
+digraph srp_initiator {
+ node [shape = doublecircle]; running lost;
+ node [shape = circle];
+
+ {
+ rank = min;
+ running_rta [ label = "running;\nreconnect\ntimer\nactive" ];
+ };
+ running [ label = "running;\nreconnect\ntimer\nstopped" ];
+ blocked;
+ failfast [ label = "fail I/O\nfast" ];
+ lost;
+
+ running -> running_rta [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nsrp_start_tl_fail_timers()" ];
+ running_rta -> running [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting succeeded" ];
+ running -> blocked [ label = "fast_io_fail_tmo >= 0 or\ndev_loss_tmo >= 0;\nsrp_start_tl_fail_timers()" ];
+ running -> failfast [ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting failed\n" ];
+ blocked -> failfast [ label = "fast_io_fail_tmo\nexpired or\nreconnecting\nfailed" ];
+ blocked -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ];
+ failfast -> lost [ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ];
+ blocked -> running [ label = "reconnecting\nsucceeded" ];
+ failfast -> failfast [ label = "reconnecting\nfailed" ];
+ failfast -> running [ label = "reconnecting\nsucceeded" ];
+ running -> lost [ label = "srp_stop_rport_timers()" ];
+ running_rta -> lost [ label = "srp_stop_rport_timers()" ];
+}
diff --git a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt
new file mode 100644
index 000000000..0d5bdb153
--- /dev/null
+++ b/Documentation/scsi/st.txt
@@ -0,0 +1,526 @@
+This file contains brief information about the SCSI tape driver.
+The driver is currently maintained by Kai Mäkisara (email
+Kai.Makisara@kolumbus.fi)
+
+Last modified: Sun Aug 29 18:25:47 2010 by kai.makisara
+
+
+BASICS
+
+The driver is generic, i.e., it does not contain any code tailored
+to any specific tape drive. The tape parameters can be specified with
+one of the following three methods:
+
+1. Each user can specify the tape parameters he/she wants to use
+directly with ioctls. This is administratively a very simple and
+flexible method and applicable to single-user workstations. However,
+in a multiuser environment the next user finds the tape parameters in
+state the previous user left them.
+
+2. The system manager (root) can define default values for some tape
+parameters, like block size and density using the MTSETDRVBUFFER ioctl.
+These parameters can be programmed to come into effect either when a
+new tape is loaded into the drive or if writing begins at the
+beginning of the tape. The second method is applicable if the tape
+drive performs auto-detection of the tape format well (like some
+QIC-drives). The result is that any tape can be read, writing can be
+continued using existing format, and the default format is used if
+the tape is rewritten from the beginning (or a new tape is written
+for the first time). The first method is applicable if the drive
+does not perform auto-detection well enough and there is a single
+"sensible" mode for the device. An example is a DAT drive that is
+used only in variable block mode (I don't know if this is sensible
+or not :-).
+
+The user can override the parameters defined by the system
+manager. The changes persist until the defaults again come into
+effect.
+
+3. By default, up to four modes can be defined and selected using the minor
+number (bits 5 and 6). The number of modes can be changed by changing
+ST_NBR_MODE_BITS in st.h. Mode 0 corresponds to the defaults discussed
+above. Additional modes are dormant until they are defined by the
+system manager (root). When specification of a new mode is started,
+the configuration of mode 0 is used to provide a starting point for
+definition of the new mode.
+
+Using the modes allows the system manager to give the users choices
+over some of the buffering parameters not directly accessible to the
+users (buffered and asynchronous writes). The modes also allow choices
+between formats in multi-tape operations (the explicitly overridden
+parameters are reset when a new tape is loaded).
+
+If more than one mode is used, all modes should contain definitions
+for the same set of parameters.
+
+Many Unices contain internal tables that associate different modes to
+supported devices. The Linux SCSI tape driver does not contain such
+tables (and will not do that in future). Instead of that, a utility
+program can be made that fetches the inquiry data sent by the device,
+scans its database, and sets up the modes using the ioctls. Another
+alternative is to make a small script that uses mt to set the defaults
+tailored to the system.
+
+The driver supports fixed and variable block size (within buffer
+limits). Both the auto-rewind (minor equals device number) and
+non-rewind devices (minor is 128 + device number) are implemented.
+
+In variable block mode, the byte count in write() determines the size
+of the physical block on tape. When reading, the drive reads the next
+tape block and returns to the user the data if the read() byte count
+is at least the block size. Otherwise, error ENOMEM is returned.
+
+In fixed block mode, the data transfer between the drive and the
+driver is in multiples of the block size. The write() byte count must
+be a multiple of the block size. This is not required when reading but
+may be advisable for portability.
+
+Support is provided for changing the tape partition and partitioning
+of the tape with one or two partitions. By default support for
+partitioned tape is disabled for each driver and it can be enabled
+with the ioctl MTSETDRVBUFFER.
+
+By default the driver writes one filemark when the device is closed after
+writing and the last operation has been a write. Two filemarks can be
+optionally written. In both cases end of data is signified by
+returning zero bytes for two consecutive reads.
+
+Writing filemarks without the immediate bit set in the SCSI command block acts
+as a synchronization point, i.e., all remaining data form the drive buffers is
+written to tape before the command returns. This makes sure that write errors
+are caught at that point, but this takes time. In some applications, several
+consecutive files must be written fast. The MTWEOFI operation can be used to
+write the filemarks without flushing the drive buffer. Writing filemark at
+close() is always flushing the drive buffers. However, if the previous
+operation is MTWEOFI, close() does not write a filemark. This can be used if
+the program wants to close/open the tape device between files and wants to
+skip waiting.
+
+If rewind, offline, bsf, or seek is done and previous tape operation was
+write, a filemark is written before moving tape.
+
+The compile options are defined in the file linux/drivers/scsi/st_options.h.
+
+4. If the open option O_NONBLOCK is used, open succeeds even if the
+drive is not ready. If O_NONBLOCK is not used, the driver waits for
+the drive to become ready. If this does not happen in ST_BLOCK_SECONDS
+seconds, open fails with the errno value EIO. With O_NONBLOCK the
+device can be opened for writing even if there is a write protected
+tape in the drive (commands trying to write something return error if
+attempted).
+
+
+MINOR NUMBERS
+
+The tape driver currently supports up to 2^17 drives if 4 modes for
+each drive are used.
+
+The minor numbers consist of the following bit fields:
+
+dev_upper non-rew mode dev-lower
+ 20 - 8 7 6 5 4 0
+The non-rewind bit is always bit 7 (the uppermost bit in the lowermost
+byte). The bits defining the mode are below the non-rewind bit. The
+remaining bits define the tape device number. This numbering is
+backward compatible with the numbering used when the minor number was
+only 8 bits wide.
+
+
+SYSFS SUPPORT
+
+The driver creates the directory /sys/class/scsi_tape and populates it with
+directories corresponding to the existing tape devices. There are autorewind
+and non-rewind entries for each mode. The names are stxy and nstxy, where x
+is the tape number and y a character corresponding to the mode (none, l, m,
+a). For example, the directories for the first tape device are (assuming four
+modes): st0 nst0 st0l nst0l st0m nst0m st0a nst0a.
+
+Each directory contains the entries: default_blksize default_compression
+default_density defined dev device driver. The file 'defined' contains 1
+if the mode is defined and zero if not defined. The files 'default_*' contain
+the defaults set by the user. The value -1 means the default is not set. The
+file 'dev' contains the device numbers corresponding to this device. The links
+'device' and 'driver' point to the SCSI device and driver entries.
+
+Each directory also contains the entry 'options' which shows the currently
+enabled driver and mode options. The value in the file is a bit mask where the
+bit definitions are the same as those used with MTSETDRVBUFFER in setting the
+options.
+
+A link named 'tape' is made from the SCSI device directory to the class
+directory corresponding to the mode 0 auto-rewind device (e.g., st0).
+
+
+BSD AND SYS V SEMANTICS
+
+The user can choose between these two behaviours of the tape driver by
+defining the value of the symbol ST_SYSV. The semantics differ when a
+file being read is closed. The BSD semantics leaves the tape where it
+currently is whereas the SYS V semantics moves the tape past the next
+filemark unless the filemark has just been crossed.
+
+The default is BSD semantics.
+
+
+BUFFERING
+
+The driver tries to do transfers directly to/from user space. If this
+is not possible, a driver buffer allocated at run-time is used. If
+direct i/o is not possible for the whole transfer, the driver buffer
+is used (i.e., bounce buffers for individual pages are not
+used). Direct i/o can be impossible because of several reasons, e.g.:
+- one or more pages are at addresses not reachable by the HBA
+- the number of pages in the transfer exceeds the number of
+ scatter/gather segments permitted by the HBA
+- one or more pages can't be locked into memory (should not happen in
+ any reasonable situation)
+
+The size of the driver buffers is always at least one tape block. In fixed
+block mode, the minimum buffer size is defined (in 1024 byte units) by
+ST_FIXED_BUFFER_BLOCKS. With small block size this allows buffering of
+several blocks and using one SCSI read or write to transfer all of the
+blocks. Buffering of data across write calls in fixed block mode is
+allowed if ST_BUFFER_WRITES is non-zero and direct i/o is not used.
+Buffer allocation uses chunks of memory having sizes 2^n * (page
+size). Because of this the actual buffer size may be larger than the
+minimum allowable buffer size.
+
+NOTE that if direct i/o is used, the small writes are not buffered. This may
+cause a surprise when moving from 2.4. There small writes (e.g., tar without
+-b option) may have had good throughput but this is not true any more with
+2.6. Direct i/o can be turned off to solve this problem but a better solution
+is to use bigger write() byte counts (e.g., tar -b 64).
+
+Asynchronous writing. Writing the buffer contents to the tape is
+started and the write call returns immediately. The status is checked
+at the next tape operation. Asynchronous writes are not done with
+direct i/o and not in fixed block mode.
+
+Buffered writes and asynchronous writes may in some rare cases cause
+problems in multivolume operations if there is not enough space on the
+tape after the early-warning mark to flush the driver buffer.
+
+Read ahead for fixed block mode (ST_READ_AHEAD). Filling the buffer is
+attempted even if the user does not want to get all of the data at
+this read command. Should be disabled for those drives that don't like
+a filemark to truncate a read request or that don't like backspacing.
+
+Scatter/gather buffers (buffers that consist of chunks non-contiguous
+in the physical memory) are used if contiguous buffers can't be
+allocated. To support all SCSI adapters (including those not
+supporting scatter/gather), buffer allocation is using the following
+three kinds of chunks:
+1. The initial segment that is used for all SCSI adapters including
+those not supporting scatter/gather. The size of this buffer will be
+(PAGE_SIZE << ST_FIRST_ORDER) bytes if the system can give a chunk of
+this size (and it is not larger than the buffer size specified by
+ST_BUFFER_BLOCKS). If this size is not available, the driver halves
+the size and tries again until the size of one page. The default
+settings in st_options.h make the driver to try to allocate all of the
+buffer as one chunk.
+2. The scatter/gather segments to fill the specified buffer size are
+allocated so that as many segments as possible are used but the number
+of segments does not exceed ST_FIRST_SG.
+3. The remaining segments between ST_MAX_SG (or the module parameter
+max_sg_segs) and the number of segments used in phases 1 and 2
+are used to extend the buffer at run-time if this is necessary. The
+number of scatter/gather segments allowed for the SCSI adapter is not
+exceeded if it is smaller than the maximum number of scatter/gather
+segments specified. If the maximum number allowed for the SCSI adapter
+is smaller than the number of segments used in phases 1 and 2,
+extending the buffer will always fail.
+
+
+EOM BEHAVIOUR WHEN WRITING
+
+When the end of medium early warning is encountered, the current write
+is finished and the number of bytes is returned. The next write
+returns -1 and errno is set to ENOSPC. To enable writing a trailer,
+the next write is allowed to proceed and, if successful, the number of
+bytes is returned. After this, -1 and the number of bytes are
+alternately returned until the physical end of medium (or some other
+error) is encountered.
+
+
+MODULE PARAMETERS
+
+The buffer size, write threshold, and the maximum number of allocated buffers
+are configurable when the driver is loaded as a module. The keywords are:
+
+buffer_kbs=xxx the buffer size for fixed block mode is set
+ to xxx kilobytes
+write_threshold_kbs=xxx the write threshold in kilobytes set to xxx
+max_sg_segs=xxx the maximum number of scatter/gather
+ segments
+try_direct_io=x try direct transfer between user buffer and
+ tape drive if this is non-zero
+
+Note that if the buffer size is changed but the write threshold is not
+set, the write threshold is set to the new buffer size - 2 kB.
+
+
+BOOT TIME CONFIGURATION
+
+If the driver is compiled into the kernel, the same parameters can be
+also set using, e.g., the LILO command line. The preferred syntax is
+to use the same keyword used when loading as module but prepended
+with 'st.'. For instance, to set the maximum number of scatter/gather
+segments, the parameter 'st.max_sg_segs=xx' should be used (xx is the
+number of scatter/gather segments).
+
+For compatibility, the old syntax from early 2.5 and 2.4 kernel
+versions is supported. The same keywords can be used as when loading
+the driver as module. If several parameters are set, the keyword-value
+pairs are separated with a comma (no spaces allowed). A colon can be
+used instead of the equal mark. The definition is prepended by the
+string st=. Here is an example:
+
+ st=buffer_kbs:64,write_threshold_kbs:60
+
+The following syntax used by the old kernel versions is also supported:
+
+ st=aa[,bb[,dd]]
+
+where
+ aa is the buffer size for fixed block mode in 1024 byte units
+ bb is the write threshold in 1024 byte units
+ dd is the maximum number of scatter/gather segments
+
+
+IOCTLS
+
+The tape is positioned and the drive parameters are set with ioctls
+defined in mtio.h The tape control program 'mt' uses these ioctls. Try
+to find an mt that supports all of the Linux SCSI tape ioctls and
+opens the device for writing if the tape contents will be modified
+(look for a package mt-st* from the Linux ftp sites; the GNU mt does
+not open for writing for, e.g., erase).
+
+The supported ioctls are:
+
+The following use the structure mtop:
+
+MTFSF Space forward over count filemarks. Tape positioned after filemark.
+MTFSFM As above but tape positioned before filemark.
+MTBSF Space backward over count filemarks. Tape positioned before
+ filemark.
+MTBSFM As above but ape positioned after filemark.
+MTFSR Space forward over count records.
+MTBSR Space backward over count records.
+MTFSS Space forward over count setmarks.
+MTBSS Space backward over count setmarks.
+MTWEOF Write count filemarks.
+MTWEOFI Write count filemarks with immediate bit set (i.e., does not
+ wait until data is on tape)
+MTWSM Write count setmarks.
+MTREW Rewind tape.
+MTOFFL Set device off line (often rewind plus eject).
+MTNOP Do nothing except flush the buffers.
+MTRETEN Re-tension tape.
+MTEOM Space to end of recorded data.
+MTERASE Erase tape. If the argument is zero, the short erase command
+ is used. The long erase command is used with all other values
+ of the argument.
+MTSEEK Seek to tape block count. Uses Tandberg-compatible seek (QFA)
+ for SCSI-1 drives and SCSI-2 seek for SCSI-2 drives. The file and
+ block numbers in the status are not valid after a seek.
+MTSETBLK Set the drive block size. Setting to zero sets the drive into
+ variable block mode (if applicable).
+MTSETDENSITY Sets the drive density code to arg. See drive
+ documentation for available codes.
+MTLOCK and MTUNLOCK Explicitly lock/unlock the tape drive door.
+MTLOAD and MTUNLOAD Explicitly load and unload the tape. If the
+ command argument x is between MT_ST_HPLOADER_OFFSET + 1 and
+ MT_ST_HPLOADER_OFFSET + 6, the number x is used sent to the
+ drive with the command and it selects the tape slot to use of
+ HP C1553A changer.
+MTCOMPRESSION Sets compressing or uncompressing drive mode using the
+ SCSI mode page 15. Note that some drives other methods for
+ control of compression. Some drives (like the Exabytes) use
+ density codes for compression control. Some drives use another
+ mode page but this page has not been implemented in the
+ driver. Some drives without compression capability will accept
+ any compression mode without error.
+MTSETPART Moves the tape to the partition given by the argument at the
+ next tape operation. The block at which the tape is positioned
+ is the block where the tape was previously positioned in the
+ new active partition unless the next tape operation is
+ MTSEEK. In this case the tape is moved directly to the block
+ specified by MTSEEK. MTSETPART is inactive unless
+ MT_ST_CAN_PARTITIONS set.
+MTMKPART Formats the tape with one partition (argument zero) or two
+ partitions (the argument gives in megabytes the size of
+ partition 1 that is physically the first partition of the
+ tape). The drive has to support partitions with size specified
+ by the initiator. Inactive unless MT_ST_CAN_PARTITIONS set.
+MTSETDRVBUFFER
+ Is used for several purposes. The command is obtained from count
+ with mask MT_SET_OPTIONS, the low order bits are used as argument.
+ This command is only allowed for the superuser (root). The
+ subcommands are:
+ 0
+ The drive buffer option is set to the argument. Zero means
+ no buffering.
+ MT_ST_BOOLEANS
+ Sets the buffering options. The bits are the new states
+ (enabled/disabled) the following options (in the
+ parenthesis is specified whether the option is global or
+ can be specified differently for each mode):
+ MT_ST_BUFFER_WRITES write buffering (mode)
+ MT_ST_ASYNC_WRITES asynchronous writes (mode)
+ MT_ST_READ_AHEAD read ahead (mode)
+ MT_ST_TWO_FM writing of two filemarks (global)
+ MT_ST_FAST_EOM using the SCSI spacing to EOD (global)
+ MT_ST_AUTO_LOCK automatic locking of the drive door (global)
+ MT_ST_DEF_WRITES the defaults are meant only for writes (mode)
+ MT_ST_CAN_BSR backspacing over more than one records can
+ be used for repositioning the tape (global)
+ MT_ST_NO_BLKLIMS the driver does not ask the block limits
+ from the drive (block size can be changed only to
+ variable) (global)
+ MT_ST_CAN_PARTITIONS enables support for partitioned
+ tapes (global)
+ MT_ST_SCSI2LOGICAL the logical block number is used in
+ the MTSEEK and MTIOCPOS for SCSI-2 drives instead of
+ the device dependent address. It is recommended to set
+ this flag unless there are tapes using the device
+ dependent (from the old times) (global)
+ MT_ST_SYSV sets the SYSV semantics (mode)
+ MT_ST_NOWAIT enables immediate mode (i.e., don't wait for
+ the command to finish) for some commands (e.g., rewind)
+ MT_ST_NOWAIT_EOF enables immediate filemark mode (i.e. when
+ writing a filemark, don't wait for it to complete). Please
+ see the BASICS note about MTWEOFI with respect to the
+ possible dangers of writing immediate filemarks.
+ MT_ST_SILI enables setting the SILI bit in SCSI commands when
+ reading in variable block mode to enhance performance when
+ reading blocks shorter than the byte count; set this only
+ if you are sure that the drive supports SILI and the HBA
+ correctly returns transfer residuals
+ MT_ST_DEBUGGING debugging (global; debugging must be
+ compiled into the driver)
+ MT_ST_SETBOOLEANS
+ MT_ST_CLEARBOOLEANS
+ Sets or clears the option bits.
+ MT_ST_WRITE_THRESHOLD
+ Sets the write threshold for this device to kilobytes
+ specified by the lowest bits.
+ MT_ST_DEF_BLKSIZE
+ Defines the default block size set automatically. Value
+ 0xffffff means that the default is not used any more.
+ MT_ST_DEF_DENSITY
+ MT_ST_DEF_DRVBUFFER
+ Used to set or clear the density (8 bits), and drive buffer
+ state (3 bits). If the value is MT_ST_CLEAR_DEFAULT
+ (0xfffff) the default will not be used any more. Otherwise
+ the lowermost bits of the value contain the new value of
+ the parameter.
+ MT_ST_DEF_COMPRESSION
+ The compression default will not be used if the value of
+ the lowermost byte is 0xff. Otherwise the lowermost bit
+ contains the new default. If the bits 8-15 are set to a
+ non-zero number, and this number is not 0xff, the number is
+ used as the compression algorithm. The value
+ MT_ST_CLEAR_DEFAULT can be used to clear the compression
+ default.
+ MT_ST_SET_TIMEOUT
+ Set the normal timeout in seconds for this device. The
+ default is 900 seconds (15 minutes). The timeout should be
+ long enough for the retries done by the device while
+ reading/writing.
+ MT_ST_SET_LONG_TIMEOUT
+ Set the long timeout that is used for operations that are
+ known to take a long time. The default is 14000 seconds
+ (3.9 hours). For erase this value is further multiplied by
+ eight.
+ MT_ST_SET_CLN
+ Set the cleaning request interpretation parameters using
+ the lowest 24 bits of the argument. The driver can set the
+ generic status bit GMT_CLN if a cleaning request bit pattern
+ is found from the extended sense data. Many drives set one or
+ more bits in the extended sense data when the drive needs
+ cleaning. The bits are device-dependent. The driver is
+ given the number of the sense data byte (the lowest eight
+ bits of the argument; must be >= 18 (values 1 - 17
+ reserved) and <= the maximum requested sense data sixe),
+ a mask to select the relevant bits (the bits 9-16), and the
+ bit pattern (bits 17-23). If the bit pattern is zero, one
+ or more bits under the mask indicate cleaning request. If
+ the pattern is non-zero, the pattern must match the masked
+ sense data byte.
+
+ (The cleaning bit is set if the additional sense code and
+ qualifier 00h 17h are seen regardless of the setting of
+ MT_ST_SET_CLN.)
+
+The following ioctl uses the structure mtpos:
+MTIOCPOS Reads the current position from the drive. Uses
+ Tandberg-compatible QFA for SCSI-1 drives and the SCSI-2
+ command for the SCSI-2 drives.
+
+The following ioctl uses the structure mtget to return the status:
+MTIOCGET Returns some status information.
+ The file number and block number within file are returned. The
+ block is -1 when it can't be determined (e.g., after MTBSF).
+ The drive type is either MTISSCSI1 or MTISSCSI2.
+ The number of recovered errors since the previous status call
+ is stored in the lower word of the field mt_erreg.
+ The current block size and the density code are stored in the field
+ mt_dsreg (shifts for the subfields are MT_ST_BLKSIZE_SHIFT and
+ MT_ST_DENSITY_SHIFT).
+ The GMT_xxx status bits reflect the drive status. GMT_DR_OPEN
+ is set if there is no tape in the drive. GMT_EOD means either
+ end of recorded data or end of tape. GMT_EOT means end of tape.
+
+
+MISCELLANEOUS COMPILE OPTIONS
+
+The recovered write errors are considered fatal if ST_RECOVERED_WRITE_FATAL
+is defined.
+
+The maximum number of tape devices is determined by the define
+ST_MAX_TAPES. If more tapes are detected at driver initialization, the
+maximum is adjusted accordingly.
+
+Immediate return from tape positioning SCSI commands can be enabled by
+defining ST_NOWAIT. If this is defined, the user should take care that
+the next tape operation is not started before the previous one has
+finished. The drives and SCSI adapters should handle this condition
+gracefully, but some drive/adapter combinations are known to hang the
+SCSI bus in this case.
+
+The MTEOM command is by default implemented as spacing over 32767
+filemarks. With this method the file number in the status is
+correct. The user can request using direct spacing to EOD by setting
+ST_FAST_EOM 1 (or using the MT_ST_OPTIONS ioctl). In this case the file
+number will be invalid.
+
+When using read ahead or buffered writes the position within the file
+may not be correct after the file is closed (correct position may
+require backspacing over more than one record). The correct position
+within file can be obtained if ST_IN_FILE_POS is defined at compile
+time or the MT_ST_CAN_BSR bit is set for the drive with an ioctl.
+(The driver always backs over a filemark crossed by read ahead if the
+user does not request data that far.)
+
+
+DEBUGGING HINTS
+
+Debugging code is now compiled in by default but debugging is turned off
+with the kernel module parameter debug_flag defaulting to 0. Debugging
+can still be switched on and off with an ioctl. To enable debug at
+module load time add debug_flag=1 to the module load options, the
+debugging output is not voluminous.
+
+If the tape seems to hang, I would be very interested to hear where
+the driver is waiting. With the command 'ps -l' you can see the state
+of the process using the tape. If the state is D, the process is
+waiting for something. The field WCHAN tells where the driver is
+waiting. If you have the current System.map in the correct place (in
+/boot for the procps I use) or have updated /etc/psdatabase (for kmem
+ps), ps writes the function name in the WCHAN field. If not, you have
+to look up the function from System.map.
+
+Note also that the timeouts are very long compared to most other
+drivers. This means that the Linux driver may appear hung although the
+real reason is that the tape firmware has got confused.
diff --git a/Documentation/scsi/sym53c500_cs.txt b/Documentation/scsi/sym53c500_cs.txt
new file mode 100644
index 000000000..75febcf92
--- /dev/null
+++ b/Documentation/scsi/sym53c500_cs.txt
@@ -0,0 +1,23 @@
+The sym53c500_cs driver originated as an add-on to David Hinds' pcmcia-cs
+package, and was written by Tom Corner (tcorner@via.at). A rewrite was
+long overdue, and the current version addresses the following concerns:
+
+ (1) extensive kernel changes between 2.4 and 2.6.
+ (2) deprecated PCMCIA support outside the kernel.
+
+All the USE_BIOS code has been ripped out. It was never used, and could
+not have worked anyway. The USE_DMA code is likewise gone. Many thanks
+to YOKOTA Hiroshi (nsp_cs driver) and David Hinds (qlogic_cs driver) for
+the code fragments I shamelessly adapted for this work. Thanks also to
+Christoph Hellwig for his patient tutelage while I stumbled about.
+
+The Symbios Logic 53c500 chip was used in the "newer" (circa 1997) version
+of the New Media Bus Toaster PCMCIA SCSI controller. Presumably there are
+other products using this chip, but I've never laid eyes (much less hands)
+on one.
+
+Through the years, there have been a number of downloads of the pcmcia-cs
+version of this driver, and I guess it worked for those users. It worked
+for Tom Corner, and it works for me. Your mileage will probably vary.
+
+--Bob Tracy (rct@frus.com)
diff --git a/Documentation/scsi/sym53c8xx_2.txt b/Documentation/scsi/sym53c8xx_2.txt
new file mode 100644
index 000000000..6af8f7a77
--- /dev/null
+++ b/Documentation/scsi/sym53c8xx_2.txt
@@ -0,0 +1,1048 @@
+The Linux SYM-2 driver documentation file
+
+Written by Gerard Roudier <groudier@free.fr>
+21 Rue Carnot
+95170 DEUIL LA BARRE - FRANCE
+
+Updated by Matthew Wilcox <matthew@wil.cx>
+
+2004-10-09
+===============================================================================
+
+1. Introduction
+2. Supported chips and SCSI features
+3. Advantages of this driver for newer chips.
+ 3.1 Optimized SCSI SCRIPTS
+ 3.2 New features appeared with the SYM53C896
+4. Memory mapped I/O versus normal I/O
+5. Tagged command queueing
+6. Parity checking
+7. Profiling information
+8. Control commands
+ 8.1 Set minimum synchronous period
+ 8.2 Set wide size
+ 8.3 Set maximum number of concurrent tagged commands
+ 8.4 Set debug mode
+ 8.5 Set flag (no_disc)
+ 8.6 Set verbose level
+ 8.7 Reset all logical units of a target
+ 8.8 Abort all tasks of all logical units of a target
+9. Configuration parameters
+10. Boot setup commands
+ 10.1 Syntax
+ 10.2 Available arguments
+ 10.2.1 Default number of tagged commands
+ 10.2.2 Burst max
+ 10.2.3 LED support
+ 10.2.4 Differential mode
+ 10.2.5 IRQ mode
+ 10.2.6 Check SCSI BUS
+ 10.2.7 Suggest a default SCSI id for hosts
+ 10.2.8 Verbosity level
+ 10.2.9 Debug mode
+ 10.2.10 Settle delay
+ 10.2.11 Serial NVRAM
+ 10.2.12 Exclude a host from being attached
+ 10.3 Converting from old options
+ 10.4 SCSI BUS checking boot option
+11. SCSI problem troubleshooting
+ 15.1 Problem tracking
+ 15.2 Understanding hardware error reports
+12. Serial NVRAM support (by Richard Waltham)
+ 17.1 Features
+ 17.2 Symbios NVRAM layout
+ 17.3 Tekram NVRAM layout
+
+===============================================================================
+
+1. Introduction
+
+This driver supports the whole SYM53C8XX family of PCI-SCSI controllers.
+It also support the subset of LSI53C10XX PCI-SCSI controllers that are based
+on the SYM53C8XX SCRIPTS language.
+
+It replaces the sym53c8xx+ncr53c8xx driver bundle and shares its core code
+with the FreeBSD SYM-2 driver. The `glue' that allows this driver to work
+under Linux is contained in 2 files named sym_glue.h and sym_glue.c.
+Other drivers files are intended not to depend on the Operating System
+on which the driver is used.
+
+The history of this driver can be summarized as follows:
+
+1993: ncr driver written for 386bsd and FreeBSD by:
+ Wolfgang Stanglmeier <wolf@cologne.de>
+ Stefan Esser <se@mi.Uni-Koeln.de>
+
+1996: port of the ncr driver to Linux-1.2.13 and rename it ncr53c8xx.
+ Gerard Roudier
+
+1998: new sym53c8xx driver for Linux based on LOAD/STORE instruction and that
+ adds full support for the 896 but drops support for early NCR devices.
+ Gerard Roudier
+
+1999: port of the sym53c8xx driver to FreeBSD and support for the LSI53C1010
+ 33 MHz and 66MHz Ultra-3 controllers. The new driver is named `sym'.
+ Gerard Roudier
+
+2000: Add support for early NCR devices to FreeBSD `sym' driver.
+ Break the driver into several sources and separate the OS glue
+ code from the core code that can be shared among different O/Ses.
+ Write a glue code for Linux.
+ Gerard Roudier
+
+2004: Remove FreeBSD compatibility code. Remove support for versions of
+ Linux before 2.6. Start using Linux facilities.
+
+This README file addresses the Linux version of the driver. Under FreeBSD,
+the driver documentation is the sym.8 man page.
+
+Information about new chips is available at LSILOGIC web server:
+
+ http://www.lsilogic.com/
+
+SCSI standard documentations are available at T10 site:
+
+ http://www.t10.org/
+
+Useful SCSI tools written by Eric Youngdale are part of most Linux
+distributions:
+ scsiinfo: command line tool
+ scsi-config: TCL/Tk tool using scsiinfo
+
+2. Supported chips and SCSI features
+
+The following features are supported for all chips:
+
+ Synchronous negotiation
+ Disconnection
+ Tagged command queuing
+ SCSI parity checking
+ PCI Master parity checking
+
+Other features depends on chip capabilities.
+The driver notably uses optimized SCRIPTS for devices that support
+LOAD/STORE and handles PHASE MISMATCH from SCRIPTS for devices that
+support the corresponding feature.
+
+The following table shows some characteristics of the chip family.
+
+ On board LOAD/STORE HARDWARE
+Chip SDMS BIOS Wide SCSI std. Max. sync SCRIPTS PHASE MISMATCH
+---- --------- ---- --------- ---------- ---------- --------------
+810 N N FAST10 10 MB/s N N
+810A N N FAST10 10 MB/s Y N
+815 Y N FAST10 10 MB/s N N
+825 Y Y FAST10 20 MB/s N N
+825A Y Y FAST10 20 MB/s Y N
+860 N N FAST20 20 MB/s Y N
+875 Y Y FAST20 40 MB/s Y N
+875A Y Y FAST20 40 MB/s Y Y
+876 Y Y FAST20 40 MB/s Y N
+895 Y Y FAST40 80 MB/s Y N
+895A Y Y FAST40 80 MB/s Y Y
+896 Y Y FAST40 80 MB/s Y Y
+897 Y Y FAST40 80 MB/s Y Y
+1510D Y Y FAST40 80 MB/s Y Y
+1010 Y Y FAST80 160 MB/s Y Y
+1010_66* Y Y FAST80 160 MB/s Y Y
+
+* Chip supports 33MHz and 66MHz PCI bus clock.
+
+
+Summary of other supported features:
+
+Module: allow to load the driver
+Memory mapped I/O: increases performance
+Control commands: write operations to the proc SCSI file system
+Debugging information: written to syslog (expert only)
+Scatter / gather
+Shared interrupt
+Boot setup commands
+Serial NVRAM: Symbios and Tekram formats
+
+
+3. Advantages of this driver for newer chips.
+
+3.1 Optimized SCSI SCRIPTS.
+
+All chips except the 810, 815 and 825, support new SCSI SCRIPTS instructions
+named LOAD and STORE that allow to move up to 1 DWORD from/to an IO register
+to/from memory much faster that the MOVE MEMORY instruction that is supported
+by the 53c7xx and 53c8xx family.
+
+The LOAD/STORE instructions support absolute and DSA relative addressing
+modes. The SCSI SCRIPTS had been entirely rewritten using LOAD/STORE instead
+of MOVE MEMORY instructions.
+
+Due to the lack of LOAD/STORE SCRIPTS instructions by earlier chips, this
+driver also incorporates a different SCRIPTS set based on MEMORY MOVE, in
+order to provide support for the entire SYM53C8XX chips family.
+
+3.2 New features appeared with the SYM53C896
+
+Newer chips (see above) allows handling of the phase mismatch context from
+SCRIPTS (avoids the phase mismatch interrupt that stops the SCSI processor
+until the C code has saved the context of the transfer).
+
+The 896 and 1010 chips support 64 bit PCI transactions and addressing,
+while the 895A supports 32 bit PCI transactions and 64 bit addressing.
+The SCRIPTS processor of these chips is not true 64 bit, but uses segment
+registers for bit 32-63. Another interesting feature is that LOAD/STORE
+instructions that address the on-chip RAM (8k) remain internal to the chip.
+
+4. Memory mapped I/O versus normal I/O
+
+Memory mapped I/O has less latency than normal I/O and is the recommended
+way for doing IO with PCI devices. Memory mapped I/O seems to work fine on
+most hardware configurations, but some poorly designed chipsets may break
+this feature. A configuration option is provided for normal I/O to be
+used but the driver defaults to MMIO.
+
+5. Tagged command queueing
+
+Queuing more than 1 command at a time to a device allows it to perform
+optimizations based on actual head positions and its mechanical
+characteristics. This feature may also reduce average command latency.
+In order to really gain advantage of this feature, devices must have
+a reasonable cache size (No miracle is to be expected for a low-end
+hard disk with 128 KB or less).
+Some known old SCSI devices do not properly support tagged command queuing.
+Generally, firmware revisions that fix this kind of problems are available
+at respective vendor web/ftp sites.
+All I can say is that I never have had problem with tagged queuing using
+this driver and its predecessors. Hard disks that behaved correctly for
+me using tagged commands are the following:
+
+- IBM S12 0662
+- Conner 1080S
+- Quantum Atlas I
+- Quantum Atlas II
+- Seagate Cheetah I
+- Quantum Viking II
+- IBM DRVS
+- Quantum Atlas IV
+- Seagate Cheetah II
+
+If your controller has NVRAM, you can configure this feature per target
+from the user setup tool. The Tekram Setup program allows to tune the
+maximum number of queued commands up to 32. The Symbios Setup only allows
+to enable or disable this feature.
+
+The maximum number of simultaneous tagged commands queued to a device
+is currently set to 16 by default. This value is suitable for most SCSI
+disks. With large SCSI disks (>= 2GB, cache >= 512KB, average seek time
+<= 10 ms), using a larger value may give better performances.
+
+This driver supports up to 255 commands per device, and but using more than
+64 is generally not worth-while, unless you are using a very large disk or
+disk arrays. It is noticeable that most of recent hard disks seem not to
+accept more than 64 simultaneous commands. So, using more than 64 queued
+commands is probably just resource wasting.
+
+If your controller does not have NVRAM or if it is managed by the SDMS
+BIOS/SETUP, you can configure tagged queueing feature and device queue
+depths from the boot command-line. For example:
+
+ sym53c8xx=tags:4/t2t3q15-t4q7/t1u0q32
+
+will set tagged commands queue depths as follow:
+
+- target 2 all luns on controller 0 --> 15
+- target 3 all luns on controller 0 --> 15
+- target 4 all luns on controller 0 --> 7
+- target 1 lun 0 on controller 1 --> 32
+- all other target/lun --> 4
+
+In some special conditions, some SCSI disk firmwares may return a
+QUEUE FULL status for a SCSI command. This behaviour is managed by the
+driver using the following heuristic:
+
+- Each time a QUEUE FULL status is returned, tagged queue depth is reduced
+ to the actual number of disconnected commands.
+
+- Every 200 successfully completed SCSI commands, if allowed by the
+ current limit, the maximum number of queueable commands is incremented.
+
+Since QUEUE FULL status reception and handling is resource wasting, the
+driver notifies by default this problem to user by indicating the actual
+number of commands used and their status, as well as its decision on the
+device queue depth change.
+The heuristic used by the driver in handling QUEUE FULL ensures that the
+impact on performances is not too bad. You can get rid of the messages by
+setting verbose level to zero, as follow:
+
+1st method: boot your system using 'sym53c8xx=verb:0' option.
+2nd method: apply "setverbose 0" control command to the proc fs entry
+ corresponding to your controller after boot-up.
+
+6. Parity checking
+
+The driver supports SCSI parity checking and PCI bus master parity
+checking. These features must be enabled in order to ensure safe
+data transfers. Some flawed devices or mother boards may have problems
+with parity. The options to defeat parity checking have been removed
+from the driver.
+
+7. Profiling information
+
+This driver does not provide profiling information as did its predecessors.
+This feature was not this useful and added complexity to the code.
+As the driver code got more complex, I have decided to remove everything
+that didn't seem actually useful.
+
+8. Control commands
+
+Control commands can be sent to the driver with write operations to
+the proc SCSI file system. The generic command syntax is the
+following:
+
+ echo "<verb> <parameters>" >/proc/scsi/sym53c8xx/0
+ (assumes controller number is 0)
+
+Using "all" for "<target>" parameter with the commands below will
+apply to all targets of the SCSI chain (except the controller).
+
+Available commands:
+
+8.1 Set minimum synchronous period factor
+
+ setsync <target> <period factor>
+
+ target: target number
+ period: minimum synchronous period.
+ Maximum speed = 1000/(4*period factor) except for special
+ cases below.
+
+ Specify a period of 0, to force asynchronous transfer mode.
+
+ 9 means 12.5 nano-seconds synchronous period
+ 10 means 25 nano-seconds synchronous period
+ 11 means 30 nano-seconds synchronous period
+ 12 means 50 nano-seconds synchronous period
+
+8.2 Set wide size
+
+ setwide <target> <size>
+
+ target: target number
+ size: 0=8 bits, 1=16bits
+
+8.3 Set maximum number of concurrent tagged commands
+
+ settags <target> <tags>
+
+ target: target number
+ tags: number of concurrent tagged commands
+ must not be greater than configured (default: 16)
+
+8.4 Set debug mode
+
+ setdebug <list of debug flags>
+
+ Available debug flags:
+ alloc: print info about memory allocations (ccb, lcb)
+ queue: print info about insertions into the command start queue
+ result: print sense data on CHECK CONDITION status
+ scatter: print info about the scatter process
+ scripts: print info about the script binding process
+ tiny: print minimal debugging information
+ timing: print timing information of the NCR chip
+ nego: print information about SCSI negotiations
+ phase: print information on script interruptions
+
+ Use "setdebug" with no argument to reset debug flags.
+
+
+8.5 Set flag (no_disc)
+
+ setflag <target> <flag>
+
+ target: target number
+
+ For the moment, only one flag is available:
+
+ no_disc: not allow target to disconnect.
+
+ Do not specify any flag in order to reset the flag. For example:
+ - setflag 4
+ will reset no_disc flag for target 4, so will allow it disconnections.
+ - setflag all
+ will allow disconnection for all devices on the SCSI bus.
+
+
+8.6 Set verbose level
+
+ setverbose #level
+
+ The driver default verbose level is 1. This command allows to change
+ th driver verbose level after boot-up.
+
+8.7 Reset all logical units of a target
+
+ resetdev <target>
+
+ target: target number
+ The driver will try to send a BUS DEVICE RESET message to the target.
+
+8.8 Abort all tasks of all logical units of a target
+
+ cleardev <target>
+
+ target: target number
+ The driver will try to send a ABORT message to all the logical units
+ of the target.
+
+
+9. Configuration parameters
+
+Under kernel configuration tools (make menuconfig, for example), it is
+possible to change some default driver configuration parameters.
+If the firmware of all your devices is perfect enough, all the
+features supported by the driver can be enabled at start-up. However,
+if only one has a flaw for some SCSI feature, you can disable the
+support by the driver of this feature at linux start-up and enable
+this feature after boot-up only for devices that support it safely.
+
+Configuration parameters:
+
+Use normal IO (default answer: n)
+ Answer "y" if you suspect your mother board to not allow memory mapped I/O.
+ May slow down performance a little.
+
+Default tagged command queue depth (default answer: 16)
+ Entering 0 defaults to tagged commands not being used.
+ This parameter can be specified from the boot command line.
+
+Maximum number of queued commands (default answer: 32)
+ This option allows you to specify the maximum number of tagged commands
+ that can be queued to a device. The maximum supported value is 255.
+
+Synchronous transfers frequency (default answer: 80)
+ This option allows you to specify the frequency in MHz the driver
+ will use at boot time for synchronous data transfer negotiations.
+ 0 means "asynchronous data transfers".
+
+10. Boot setup commands
+
+10.1 Syntax
+
+Setup commands can be passed to the driver either at boot time or as
+parameters to modprobe, as described in Documentation/kernel-parameters.txt
+
+Example of boot setup command under lilo prompt:
+
+lilo: linux root=/dev/sda2 sym53c8xx.cmd_per_lun=4 sym53c8xx.sync=10 sym53c8xx.debug=0x200
+
+- enable tagged commands, up to 4 tagged commands queued.
+- set synchronous negotiation speed to 10 Mega-transfers / second.
+- set DEBUG_NEGO flag.
+
+The following command will install the driver module with the same
+options as above.
+
+ modprobe sym53c8xx cmd_per_lun=4 sync=10 debug=0x200
+
+10.2 Available arguments
+
+10.2.1 Default number of tagged commands
+ cmd_per_lun=0 (or cmd_per_lun=1) tagged command queuing disabled
+ cmd_per_lun=#tags (#tags > 1) tagged command queuing enabled
+ #tags will be truncated to the max queued commands configuration parameter.
+
+10.2.2 Burst max
+ burst=0 burst disabled
+ burst=255 get burst length from initial IO register settings.
+ burst=#x burst enabled (1<<#x burst transfers max)
+ #x is an integer value which is log base 2 of the burst transfers max.
+ By default the driver uses the maximum value supported by the chip.
+
+10.2.3 LED support
+ led=1 enable LED support
+ led=0 disable LED support
+ Do not enable LED support if your scsi board does not use SDMS BIOS.
+ (See 'Configuration parameters')
+
+10.2.4 Differential mode
+ diff=0 never set up diff mode
+ diff=1 set up diff mode if BIOS set it
+ diff=2 always set up diff mode
+ diff=3 set diff mode if GPIO3 is not set
+
+10.2.5 IRQ mode
+ irqm=0 always open drain
+ irqm=1 same as initial settings (assumed BIOS settings)
+ irqm=2 always totem pole
+
+10.2.6 Check SCSI BUS
+ buschk=<option bits>
+
+ Available option bits:
+ 0x0: No check.
+ 0x1: Check and do not attach the controller on error.
+ 0x2: Check and just warn on error.
+
+10.2.7 Suggest a default SCSI id for hosts
+ hostid=255 no id suggested.
+ hostid=#x (0 < x < 7) x suggested for hosts SCSI id.
+
+ If a host SCSI id is available from the NVRAM, the driver will ignore
+ any value suggested as boot option. Otherwise, if a suggested value
+ different from 255 has been supplied, it will use it. Otherwise, it will
+ try to deduce the value previously set in the hardware and use value
+ 7 if the hardware value is zero.
+
+10.2.8 Verbosity level
+ verb=0 minimal
+ verb=1 normal
+ verb=2 too much
+
+10.2.9 Debug mode
+ debug=0 clear debug flags
+ debug=#x set debug flags
+ #x is an integer value combining the following power-of-2 values:
+ DEBUG_ALLOC 0x1
+ DEBUG_PHASE 0x2
+ DEBUG_POLL 0x4
+ DEBUG_QUEUE 0x8
+ DEBUG_RESULT 0x10
+ DEBUG_SCATTER 0x20
+ DEBUG_SCRIPT 0x40
+ DEBUG_TINY 0x80
+ DEBUG_TIMING 0x100
+ DEBUG_NEGO 0x200
+ DEBUG_TAGS 0x400
+ DEBUG_FREEZE 0x800
+ DEBUG_RESTART 0x1000
+
+ You can play safely with DEBUG_NEGO. However, some of these flags may
+ generate bunches of syslog messages.
+
+10.2.10 Settle delay
+ settle=n delay for n seconds
+
+ After a bus reset, the driver will delay for n seconds before talking
+ to any device on the bus. The default is 3 seconds and safe mode will
+ default it to 10.
+
+10.2.11 Serial NVRAM
+ NB: option not currently implemented.
+ nvram=n do not look for serial NVRAM
+ nvram=y test controllers for onboard serial NVRAM
+ (alternate binary form)
+ nvram=<bits options>
+ 0x01 look for NVRAM (equivalent to nvram=y)
+ 0x02 ignore NVRAM "Synchronous negotiation" parameters for all devices
+ 0x04 ignore NVRAM "Wide negotiation" parameter for all devices
+ 0x08 ignore NVRAM "Scan at boot time" parameter for all devices
+ 0x80 also attach controllers set to OFF in the NVRAM (sym53c8xx only)
+
+10.2.12 Exclude a host from being attached
+ excl=<io_address>,...
+
+ Prevent host at a given io address from being attached.
+ For example 'excl=0xb400,0xc000' indicate to the
+ driver not to attach hosts at address 0xb400 and 0xc000.
+
+10.3 Converting from old style options
+
+Previously, the sym2 driver accepted arguments of the form
+ sym53c8xx=tags:4,sync:10,debug:0x200
+
+As a result of the new module parameters, this is no longer available.
+Most of the options have remained the same, but tags has become
+cmd_per_lun to reflect its different purposes. The sample above would
+be specified as:
+ modprobe sym53c8xx cmd_per_lun=4 sync=10 debug=0x200
+
+or on the kernel boot line as:
+ sym53c8xx.cmd_per_lun=4 sym53c8xx.sync=10 sym53c8xx.debug=0x200
+
+10.4 SCSI BUS checking boot option.
+
+When this option is set to a non-zero value, the driver checks SCSI lines
+logic state, 100 micro-seconds after having asserted the SCSI RESET line.
+The driver just reads SCSI lines and checks all lines read FALSE except RESET.
+Since SCSI devices shall release the BUS at most 800 nano-seconds after SCSI
+RESET has been asserted, any signal to TRUE may indicate a SCSI BUS problem.
+Unfortunately, the following common SCSI BUS problems are not detected:
+- Only 1 terminator installed.
+- Misplaced terminators.
+- Bad quality terminators.
+On the other hand, either bad cabling, broken devices, not conformant
+devices, ... may cause a SCSI signal to be wrong when te driver reads it.
+
+15. SCSI problem troubleshooting
+
+15.1 Problem tracking
+
+Most SCSI problems are due to a non conformant SCSI bus or too buggy
+devices. If unfortunately you have SCSI problems, you can check the
+following things:
+
+- SCSI bus cables
+- terminations at both end of the SCSI chain
+- linux syslog messages (some of them may help you)
+
+If you do not find the source of problems, you can configure the
+driver or devices in the NVRAM with minimal features.
+
+- only asynchronous data transfers
+- tagged commands disabled
+- disconnections not allowed
+
+Now, if your SCSI bus is ok, your system has every chance to work
+with this safe configuration but performances will not be optimal.
+
+If it still fails, then you can send your problem description to
+appropriate mailing lists or news-groups. Send me a copy in order to
+be sure I will receive it. Obviously, a bug in the driver code is
+possible.
+
+ My current email address: Gerard Roudier <groudier@free.fr>
+
+Allowing disconnections is important if you use several devices on
+your SCSI bus but often causes problems with buggy devices.
+Synchronous data transfers increases throughput of fast devices like
+hard disks. Good SCSI hard disks with a large cache gain advantage of
+tagged commands queuing.
+
+15.2 Understanding hardware error reports
+
+When the driver detects an unexpected error condition, it may display a
+message of the following pattern.
+
+sym0:1: ERROR (0:48) (1-21-65) (f/95/0) @ (script 7c0:19000000).
+sym0: script cmd = 19000000
+sym0: regdump: da 10 80 95 47 0f 01 07 75 01 81 21 80 01 09 00.
+
+Some fields in such a message may help you understand the cause of the
+problem, as follows:
+
+sym0:1: ERROR (0:48) (1-21-65) (f/95/0) @ (script 7c0:19000000).
+.....A.........B.C....D.E..F....G.H..I.......J.....K...L.......
+
+Field A : target number.
+ SCSI ID of the device the controller was talking with at the moment the
+ error occurs.
+
+Field B : DSTAT io register (DMA STATUS)
+ Bit 0x40 : MDPE Master Data Parity Error
+ Data parity error detected on the PCI BUS.
+ Bit 0x20 : BF Bus Fault
+ PCI bus fault condition detected
+ Bit 0x01 : IID Illegal Instruction Detected
+ Set by the chip when it detects an Illegal Instruction format
+ on some condition that makes an instruction illegal.
+ Bit 0x80 : DFE Dma Fifo Empty
+ Pure status bit that does not indicate an error.
+ If the reported DSTAT value contains a combination of MDPE (0x40),
+ BF (0x20), then the cause may be likely due to a PCI BUS problem.
+
+Field C : SIST io register (SCSI Interrupt Status)
+ Bit 0x08 : SGE SCSI GROSS ERROR
+ Indicates that the chip detected a severe error condition
+ on the SCSI BUS that prevents the SCSI protocol from functioning
+ properly.
+ Bit 0x04 : UDC Unexpected Disconnection
+ Indicates that the device released the SCSI BUS when the chip
+ was not expecting this to happen. A device may behave so to
+ indicate the SCSI initiator that an error condition not reportable using the SCSI protocol has occurred.
+ Bit 0x02 : RST SCSI BUS Reset
+ Generally SCSI targets do not reset the SCSI BUS, although any
+ device on the BUS can reset it at any time.
+ Bit 0x01 : PAR Parity
+ SCSI parity error detected.
+ On a faulty SCSI BUS, any error condition among SGE (0x08), UDC (0x04) and
+ PAR (0x01) may be detected by the chip. If your SCSI system sometimes
+ encounters such error conditions, especially SCSI GROSS ERROR, then a SCSI
+ BUS problem is likely the cause of these errors.
+
+For fields D,E,F,G and H, you may look into the sym53c8xx_defs.h file
+that contains some minimal comments on IO register bits.
+Field D : SOCL Scsi Output Control Latch
+ This register reflects the state of the SCSI control lines the
+ chip want to drive or compare against.
+Field E : SBCL Scsi Bus Control Lines
+ Actual value of control lines on the SCSI BUS.
+Field F : SBDL Scsi Bus Data Lines
+ Actual value of data lines on the SCSI BUS.
+Field G : SXFER SCSI Transfer
+ Contains the setting of the Synchronous Period for output and
+ the current Synchronous offset (offset 0 means asynchronous).
+Field H : SCNTL3 Scsi Control Register 3
+ Contains the setting of timing values for both asynchronous and
+ synchronous data transfers.
+Field I : SCNTL4 Scsi Control Register 4
+ Only meaningful for 53C1010 Ultra3 controllers.
+
+Understanding Fields J, K, L and dumps requires to have good knowledge of
+SCSI standards, chip cores functionnals and internal driver data structures.
+You are not required to decode and understand them, unless you want to help
+maintain the driver code.
+
+17. Serial NVRAM (added by Richard Waltham: dormouse@farsrobt.demon.co.uk)
+
+17.1 Features
+
+Enabling serial NVRAM support enables detection of the serial NVRAM included
+on Symbios and some Symbios compatible host adaptors, and Tekram boards. The
+serial NVRAM is used by Symbios and Tekram to hold set up parameters for the
+host adaptor and its attached drives.
+
+The Symbios NVRAM also holds data on the boot order of host adaptors in a
+system with more than one host adaptor. This information is no longer used
+as it's fundamentally incompatible with the hotplug PCI model.
+
+Tekram boards using Symbios chips, DC390W/F/U, which have NVRAM are detected
+and this is used to distinguish between Symbios compatible and Tekram host
+adaptors. This is used to disable the Symbios compatible "diff" setting
+incorrectly set on Tekram boards if the CONFIG_SCSI_53C8XX_SYMBIOS_COMPAT
+configuration parameter is set enabling both Symbios and Tekram boards to be
+used together with the Symbios cards using all their features, including
+"diff" support. ("led pin" support for Symbios compatible cards can remain
+enabled when using Tekram cards. It does nothing useful for Tekram host
+adaptors but does not cause problems either.)
+
+The parameters the driver is able to get from the NVRAM depend on the
+data format used, as follow:
+
+ Tekram format Symbios format
+General and host parameters
+ Boot order N Y
+ Host SCSI ID Y Y
+ SCSI parity checking Y Y
+ Verbose boot messages N Y
+SCSI devices parameters
+ Synchronous transfer speed Y Y
+ Wide 16 / Narrow Y Y
+ Tagged Command Queuing enabled Y Y
+ Disconnections enabled Y Y
+ Scan at boot time N Y
+
+In order to speed up the system boot, for each device configured without
+the "scan at boot time" option, the driver forces an error on the
+first TEST UNIT READY command received for this device.
+
+
+17.2 Symbios NVRAM layout
+
+typical data at NVRAM address 0x100 (53c810a NVRAM)
+-----------------------------------------------------------
+00 00
+64 01
+8e 0b
+
+00 30 00 00 00 00 07 00 00 00 00 00 00 00 07 04 10 04 00 00
+
+04 00 0f 00 00 10 00 50 00 00 01 00 00 62
+04 00 03 00 00 10 00 58 00 00 01 00 00 63
+04 00 01 00 00 10 00 48 00 00 01 00 00 61
+00 00 00 00 00 00 00 00 00 00 00 00 00 00
+
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+00 00 00 00 00 00 00 00
+
+fe fe
+00 00
+00 00
+-----------------------------------------------------------
+NVRAM layout details
+
+NVRAM Address 0x000-0x0ff not used
+ 0x100-0x26f initialised data
+ 0x270-0x7ff not used
+
+general layout
+
+ header - 6 bytes,
+ data - 356 bytes (checksum is byte sum of this data)
+ trailer - 6 bytes
+ ---
+ total 368 bytes
+
+data area layout
+
+ controller set up - 20 bytes
+ boot configuration - 56 bytes (4x14 bytes)
+ device set up - 128 bytes (16x8 bytes)
+ unused (spare?) - 152 bytes (19x8 bytes)
+ ---
+ total 356 bytes
+
+-----------------------------------------------------------
+header
+
+00 00 - ?? start marker
+64 01 - byte count (lsb/msb excludes header/trailer)
+8e 0b - checksum (lsb/msb excludes header/trailer)
+-----------------------------------------------------------
+controller set up
+
+00 30 00 00 00 00 07 00 00 00 00 00 00 00 07 04 10 04 00 00
+ | | | |
+ | | | -- host ID
+ | | |
+ | | --Removable Media Support
+ | | 0x00 = none
+ | | 0x01 = Bootable Device
+ | | 0x02 = All with Media
+ | |
+ | --flag bits 2
+ | 0x00000001= scan order hi->low
+ | (default 0x00 - scan low->hi)
+ --flag bits 1
+ 0x00000001 scam enable
+ 0x00000010 parity enable
+ 0x00000100 verbose boot msgs
+
+remaining bytes unknown - they do not appear to change in my
+current set up for any of the controllers.
+
+default set up is identical for 53c810a and 53c875 NVRAM
+(Removable Media added Symbios BIOS version 4.09)
+-----------------------------------------------------------
+boot configuration
+
+boot order set by order of the devices in this table
+
+04 00 0f 00 00 10 00 50 00 00 01 00 00 62 -- 1st controller
+04 00 03 00 00 10 00 58 00 00 01 00 00 63 2nd controller
+04 00 01 00 00 10 00 48 00 00 01 00 00 61 3rd controller
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 4th controller
+ | | | | | | | |
+ | | | | | | ---- PCI io port adr
+ | | | | | --0x01 init/scan at boot time
+ | | | | --PCI device/function number (0xdddddfff)
+ | | ----- ?? PCI vendor ID (lsb/msb)
+ ----PCI device ID (lsb/msb)
+
+?? use of this data is a guess but seems reasonable
+
+remaining bytes unknown - they do not appear to change in my
+current set up
+
+default set up is identical for 53c810a and 53c875 NVRAM
+-----------------------------------------------------------
+device set up (up to 16 devices - includes controller)
+
+0f 00 08 08 64 00 0a 00 - id 0
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00
+0f 00 08 08 64 00 0a 00 - id 15
+ | | | | | |
+ | | | | ----timeout (lsb/msb)
+ | | | --synch period (0x?? 40 Mtrans/sec- fast 40) (probably 0x28)
+ | | | (0x30 20 Mtrans/sec- fast 20)
+ | | | (0x64 10 Mtrans/sec- fast )
+ | | | (0xc8 5 Mtrans/sec)
+ | | | (0x00 asynchronous)
+ | | -- ?? max sync offset (0x08 in NVRAM on 53c810a)
+ | | (0x10 in NVRAM on 53c875)
+ | --device bus width (0x08 narrow)
+ | (0x10 16 bit wide)
+ --flag bits
+ 0x00000001 - disconnect enabled
+ 0x00000010 - scan at boot time
+ 0x00000100 - scan luns
+ 0x00001000 - queue tags enabled
+
+remaining bytes unknown - they do not appear to change in my
+current set up
+
+?? use of this data is a guess but seems reasonable
+(but it could be max bus width)
+
+default set up for 53c810a NVRAM
+default set up for 53c875 NVRAM - bus width - 0x10
+ - sync offset ? - 0x10
+ - sync period - 0x30
+-----------------------------------------------------------
+?? spare device space (32 bit bus ??)
+
+00 00 00 00 00 00 00 00 (19x8bytes)
+.
+.
+00 00 00 00 00 00 00 00
+
+default set up is identical for 53c810a and 53c875 NVRAM
+-----------------------------------------------------------
+trailer
+
+fe fe - ? end marker ?
+00 00
+00 00
+
+default set up is identical for 53c810a and 53c875 NVRAM
+-----------------------------------------------------------
+
+
+
+17.3 Tekram NVRAM layout
+
+nvram 64x16 (1024 bit)
+
+Drive settings
+
+Drive ID 0-15 (addr 0x0yyyy0 = device setup, yyyy = ID)
+ (addr 0x0yyyy1 = 0x0000)
+
+ x x x x x x x x x x x x x x x x
+ | | | | | | | | |
+ | | | | | | | | ----- parity check 0 - off
+ | | | | | | | | 1 - on
+ | | | | | | | |
+ | | | | | | | ------- sync neg 0 - off
+ | | | | | | | 1 - on
+ | | | | | | |
+ | | | | | | --------- disconnect 0 - off
+ | | | | | | 1 - on
+ | | | | | |
+ | | | | | ----------- start cmd 0 - off
+ | | | | | 1 - on
+ | | | | |
+ | | | | -------------- tagged cmds 0 - off
+ | | | | 1 - on
+ | | | |
+ | | | ---------------- wide neg 0 - off
+ | | | 1 - on
+ | | |
+ --------------------------- sync rate 0 - 10.0 Mtrans/sec
+ 1 - 8.0
+ 2 - 6.6
+ 3 - 5.7
+ 4 - 5.0
+ 5 - 4.0
+ 6 - 3.0
+ 7 - 2.0
+ 7 - 2.0
+ 8 - 20.0
+ 9 - 16.7
+ a - 13.9
+ b - 11.9
+
+Global settings
+
+Host flags 0 (addr 0x100000, 32)
+
+ x x x x x x x x x x x x x x x x
+ | | | | | | | | | | | |
+ | | | | | | | | ----------- host ID 0x00 - 0x0f
+ | | | | | | | |
+ | | | | | | | ----------------------- support for 0 - off
+ | | | | | | | > 2 drives 1 - on
+ | | | | | | |
+ | | | | | | ------------------------- support drives 0 - off
+ | | | | | | > 1Gbytes 1 - on
+ | | | | | |
+ | | | | | --------------------------- bus reset on 0 - off
+ | | | | | power on 1 - on
+ | | | | |
+ | | | | ----------------------------- active neg 0 - off
+ | | | | 1 - on
+ | | | |
+ | | | -------------------------------- imm seek 0 - off
+ | | | 1 - on
+ | | |
+ | | ---------------------------------- scan luns 0 - off
+ | | 1 - on
+ | |
+ -------------------------------------- removable 0 - disable
+ as BIOS dev 1 - boot device
+ 2 - all
+
+Host flags 1 (addr 0x100001, 33)
+
+ x x x x x x x x x x x x x x x x
+ | | | | | |
+ | | | --------- boot delay 0 - 3 sec
+ | | | 1 - 5
+ | | | 2 - 10
+ | | | 3 - 20
+ | | | 4 - 30
+ | | | 5 - 60
+ | | | 6 - 120
+ | | |
+ --------------------------- max tag cmds 0 - 2
+ 1 - 4
+ 2 - 8
+ 3 - 16
+ 4 - 32
+
+Host flags 2 (addr 0x100010, 34)
+
+ x x x x x x x x x x x x x x x x
+ |
+ ----- F2/F6 enable 0 - off ???
+ 1 - on ???
+
+checksum (addr 0x111111)
+
+checksum = 0x1234 - (sum addr 0-63)
+
+----------------------------------------------------------------------------
+
+default nvram data:
+
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+0x0037 0x0000 0x0037 0x0000 0x0037 0x0000 0x0037 0x0000
+
+0x0f07 0x0400 0x0001 0x0000 0x0000 0x0000 0x0000 0x0000
+0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000
+0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000
+0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0x0000 0xfbbc
+
+
+===============================================================================
+End of Linux SYM-2 driver documentation file
diff --git a/Documentation/scsi/tmscsim.txt b/Documentation/scsi/tmscsim.txt
new file mode 100644
index 000000000..0e0322bf0
--- /dev/null
+++ b/Documentation/scsi/tmscsim.txt
@@ -0,0 +1,443 @@
+The tmscsim driver
+==================
+
+1. Purpose and history
+2. Installation
+3. Features
+4. Configuration via /proc/scsi/tmscsim/?
+5. Configuration via boot/module params
+6. Potential improvements
+7. Bug reports, debugging and updates
+8. Acknowledgements
+9. Copyright
+
+
+1. Purpose and history
+----------------------
+The tmscsim driver supports PCI SCSI Host Adapters based on the AM53C974
+chip. AM53C974 based SCSI adapters include:
+ Tekram DC390, DC390T
+ Dawicontrol 2974
+ QLogic Fast! PCI Basic
+ some on-board adapters
+(This is most probably not a complete list)
+
+It has originally written by C.L. Huang from the Tekram corp. to support the
+Tekram DC390(T) adapter. This is where the name comes from: tm = Tekram
+scsi = SCSI driver, m = AMD (?) as opposed to w for the DC390W/U/F
+(NCR53c8X5, X=2/7) driver. Yes, there was also a driver for the latter,
+tmscsiw, which supported DC390W/U/F adapters. It's not maintained any more,
+as the ncr53c8xx is perfectly supporting these adapters since some time.
+
+The driver first appeared in April 1996, exclusively supported the DC390
+and has been enhanced since then in various steps. In May 1998 support for
+general AM53C974 based adapters and some possibilities to configure it were
+added. The non-DC390 support works by assuming some values for the data
+normally taken from the DC390 EEPROM. See below (chapter 5) for details.
+
+When using the DC390, the configuration is still be done using the DC390
+BIOS setup. The DC390 EEPROM is read and used by the driver, any boot or
+module parameters (chapter 5) are ignored! However, you can change settings
+dynamically, as described in chapter 4.
+
+For a more detailed description of the driver's history, see the first lines
+of tmscsim.c.
+The numbering scheme isn't consistent. The first versions went from 1.00 to
+1.12, then 1.20a to 1.20t. Finally I decided to use the ncr53c8xx scheme. So
+the next revisions will be 2.0a to 2.0X (stable), 2.1a to 2.1X (experimental),
+2.2a to 2.2X (stable, again) etc. (X = anything between a and z.) If I send
+fixes to people for testing, I create intermediate versions with a digit
+appended, e.g. 2.0c3.
+
+
+2. Installation
+---------------
+If you got any recent kernel with this driver and document included in
+linux/drivers/scsi, you basically have to do nothing special to use this
+driver. Of course you have to choose to compile SCSI support and DC390(T)
+support into your kernel or as module when configuring your kernel for
+compiling.
+NEW: You may as well compile this module outside your kernel, using the
+supplied Makefile.
+
+ If you got an old kernel (pre 2.1.127, pre 2.0.37p1) with an old version of
+ this driver: Get dc390-21125-20b.diff.gz or dc390-2036p21-20b1.diff.gz from
+ my web page and apply the patch. Apply further patches to upgrade to the
+ latest version of the driver.
+
+ If you want to do it manually, you should copy the files (dc390.h,
+ tmscsim.h, tmscsim.c, scsiiom.c and README.tmscsim) from this directory to
+ linux/drivers/scsi. You have to recompile your kernel/module of course.
+
+ You should apply the three patches included in dc390-120-kernel.diff
+ (Applying them: cd /usr/src; patch -p0 <~/dc390-120-kernel.diff)
+ The patches are against 2.1.125, so you might have to manually resolve
+ rejections when applying to another kernel version.
+
+ The patches will update the kernel startup code to allow boot parameters to
+ be passed to the driver, update the Documentation and finally offer you the
+ possibility to omit the non-DC390 parts of the driver.
+ (By selecting "Omit support for non DC390" you basically disable the
+ emulation of a DC390 EEPROM for non DC390 adapters. This saves a few bytes
+ of memory.)
+
+If you got a very old kernel without the tmscsim driver (pre 2.0.31)
+I recommend upgrading your kernel. However, if you don't want to, please
+contact me to get the appropriate patches.
+
+
+Upgrading a SCSI driver is always a delicate thing to do. The 2.0 driver has
+proven stable on many systems, but it's still a good idea to take some
+precautions. In an ideal world you would have a full backup of your disks.
+The world isn't ideal and most people don't have full backups (me neither).
+So take at least the following measures:
+* make your kernel remount the FS read-only on detecting an error:
+ tune2fs -e remount-ro /dev/sd??
+* have copies of your SCSI disk's partition tables on some safe location:
+ dd if=/dev/sda of=/mnt/floppy/sda bs=512 count=1
+ or just print it with:
+ fdisk -l | lpr
+* make sure you are able to boot Linux (e.g. from floppy disk using InitRD)
+ if your SCSI disk gets corrupted. You can use
+ ftp://student.physik.uni-dortmund.de/pub/linux/kernel/bootdisk.gz
+
+One more warning: I used to overclock my PCI bus to 41.67 MHz. My Tekram
+DC390F (Sym53c875) accepted this as well as my Millennium. But the Am53C974
+produced errors and started to corrupt my disks. So don't do that! A 37.50
+MHz PCI bus works for me, though, but I don't recommend using higher clocks
+than the 33.33 MHz being in the PCI spec.
+
+
+3.Features
+----------
+- SCSI
+ * Tagged command queueing
+ * Sync speed up to 10 MHz
+ * Disconnection
+ * Multiple LUNs
+
+- General / Linux interface
+ * Support for up to 4 AM53C974 adapters.
+ * DC390 EEPROM usage or boot/module params
+ * Information via cat /proc/scsi/tmscsim/?
+ * Dynamically configurable by writing to /proc/scsi/tmscsim/?
+ * Dynamic allocation of resources
+ * SMP support: Locking on io_request lock (Linux 2.1/2.2) or adapter
+ specific locks (Linux 2.5?)
+ * Uniform source code for Linux-2.x.y
+ * Support for dyn. addition/removal of devices via add/remove-single-device
+ (Try: echo "scsi add-single-device C B T U" >/proc/scsi/scsi
+ C = Controller, B = Bus, T = Target SCSI ID, U = Unit SCSI LUN.)
+ Use with care!
+ * Try to use the partition table for the determination of the mapping
+
+
+4. Configuration via /proc/scsi/tmscsim/?
+-----------------------------------------
+First of all look at the output of /proc/scsi/tmscsim/? by typing
+ cat /proc/scsi/tmscsim/?
+The "?" should be replaced by the SCSI host number. (The shell might do this
+for you.)
+You will see some info regarding the adapter and, at the end, a listing of
+the attached devices and their settings.
+
+Here's an example:
+garloff@kurt:/home/garloff > cat /proc/scsi/tmscsim/0
+Tekram DC390/AM53C974 PCI SCSI Host Adapter, Driver Version 2.0e7 2000-11-28
+SCSI Host Nr 1, AM53C974 Adapter Nr 0
+IOPortBase 0xb000, IRQ 10
+MaxID 8, MaxLUN 8, AdapterID 6, SelTimeout 250 ms, DelayReset 1 s
+TagMaxNum 16, Status 0x00, ACBFlag 0x00, GlitchEater 24 ns
+Statistics: Cmnds 1470165, Cmnds not sent directly 0, Out of SRB conds 0
+ Lost arbitrations 587, Sel. connected 0, Connected: No
+Nr of attached devices: 4, Nr of DCBs: 4
+Map of attached LUNs: 01 00 00 03 01 00 00 00
+Idx ID LUN Prty Sync DsCn SndS TagQ NegoPeriod SyncSpeed SyncOffs MaxCmd
+00 00 00 Yes Yes Yes Yes Yes 100 ns 10.0 M 15 16
+01 03 00 Yes Yes Yes Yes No 100 ns 10.0 M 15 01
+02 03 01 Yes Yes Yes Yes No 100 ns 10.0 M 15 01
+03 04 00 Yes Yes Yes Yes No 100 ns 10.0 M 15 01
+
+Note that the settings MaxID and MaxLUN are not zero- but one-based, which
+means that a setting MaxLUN=4, will result in the support of LUNs 0..3. This
+is somehow inconvenient, but the way the mid-level SCSI code expects it to be.
+
+ACB and DCB are acronyms for Adapter Control Block and Device Control Block.
+These are data structures of the driver containing information about the
+adapter and the connected SCSI devices respectively.
+
+Idx is the device index (just a consecutive number for the driver), ID and
+LUN are the SCSI ID and LUN, Prty means Parity checking, Sync synchronous
+negotiation, DsCn Disconnection, SndS Send Start command on startup (not
+used by the driver) and TagQ Tagged Command Queueing. NegoPeriod and
+SyncSpeed are somehow redundant, because they are reciprocal values
+(1 / 112 ns = 8.9 MHz). At least in theory. The driver is able to adjust the
+NegoPeriod more accurate (4ns) than the SyncSpeed (1 / 25ns). I don't know
+if certain devices will have problems with this discrepancy. Max. speed is
+10 MHz corresp. to a min. NegoPeriod of 100 ns.
+(The driver allows slightly higher speeds if the devices (Ultra SCSI) accept
+it, but that's out of adapter spec, on your own risk and unlikely to improve
+performance. You're likely to crash your disks.)
+SyncOffs is the offset used for synchronous negotiations; max. is 15.
+The last values are only shown, if Sync is enabled. (NegoPeriod is still
+displayed in brackets to show the values which will be used after enabling
+Sync.)
+MaxCmd ist the number of commands (=tags) which can be processed at the same
+time by the device.
+
+If you want to change a setting, you can do that by writing to
+/proc/scsi/tmscsim/?. Basically you have to imitate the output of driver.
+(Don't use the brackets for NegoPeriod on Sync disabled devices.)
+You don't have to care about capitalisation. The driver will accept space,
+tab, comma, = and : as separators.
+
+There are three kinds of changes:
+
+(1) Change driver settings:
+ You type the names of the parameters and the params following it.
+ Example:
+ echo "MaxLUN=8 seltimeout 200" >/proc/scsi/tmscsim/0
+
+ Note that you can only change MaxID, MaxLUN, AdapterID, SelTimeOut,
+ TagMaxNum, ACBFlag, GlitchEater and DelayReset. Don't change ACBFlag
+ unless you want to see what happens, if the driver hangs.
+
+(2) Change device settings: You write a config line to the driver. The Nr
+ must match the ID and LUN given. If you give "-" as parameter, it is
+ ignored and the corresponding setting won't be changed.
+ You can use "y" or "n" instead of "Yes" and "No" if you want to.
+ You don't need to specify a full line. The driver automatically performs
+ an INQUIRY on the device if necessary to check if it is capable to operate
+ with the given settings (Sync, TagQ).
+ Examples:
+ echo "0 0 0 y y y - y - 10 " >/proc/scsi/tmscsim/0
+ echo "3 5 0 y n y " >/proc/scsi/tmscsim/0
+
+ To give a short explanation of the first example:
+ The first three numbers, "0 0 0" (Device index 0, SCSI ID 0, SCSI LUN 0),
+ select the device to which the following parameters apply. Note that it
+ would be sufficient to use the index or both SCSI ID and LUN, but I chose
+ to require all three to have a syntax similar to the output.
+ The following "y y y - y" enables Parity checking, enables Synchronous
+ transfers, Disconnection, leaves Send Start (not used) untouched and
+ enables Tagged Command Queueing for the selected device. The "-" skips
+ the Negotiation Period setting but the "10" sets the max sync. speed to
+ 10 MHz. It's useless to specify both NegoPeriod and SyncSpeed as
+ discussed above. The values used in this example will result in maximum
+ performance.
+
+(3) Special commands: You can force a SCSI bus reset, an INQUIRY command, the
+ removal or the addition of a device's DCB and a SCSI register dump.
+ This is only used for debugging when you meet problems. The parameter of
+ the INQUIRY and REMOVE commands is the device index as shown by the
+ output of /proc/scsi/tmscsim/? in the device listing in the first column
+ (Idx). ADD takes the SCSI ID and LUN.
+ Examples:
+ echo "reset" >/proc/scsi/tmscsim/0
+ echo "inquiry 1" >/proc/scsi/tmscsim/0
+ echo "remove 2" >/proc/scsi/tmscsim/1
+ echo "add 2 3" >/proc/scsi/tmscsim/?
+ echo "dump" >/proc/scsi/tmscsim/0
+
+ Note that you will meet problems when you REMOVE a device's DCB with the
+ remove command if it contains partitions which are mounted. Only use it
+ after unmounting its partitions, telling the SCSI mid-level code to
+ remove it (scsi remove-single-device) and you really need a few bytes of
+ memory.
+ The ADD command allows you to configure a device before you tell the
+ mid-level code to try detection.
+
+
+I'd suggest reviewing the output of /proc/scsi/tmscsim/? after changing
+settings to see if everything changed as requested.
+
+
+5. Configuration via boot/module parameters
+-------------------------------------------
+With the DC390, the driver reads its EEPROM settings and tries to use them.
+But you may want to override the settings prior to being able to change the
+driver configuration via /proc/scsi/tmscsim/?.
+If you do have another AM53C974 based adapter, that's even the only
+possibility to adjust settings before you are able to write to the
+/proc/scsi/tmscsim/? pseudo-file, e.g. if you want to use another
+adapter ID than 7.
+(BTW, the log message "DC390: No EEPROM found!" is normal without a DC390.)
+For this purpose, you can pass options to the driver before it is initialised
+by using kernel or module parameters. See lilo(8) or modprobe(1) manual
+pages on how to pass params to the kernel or a module.
+[NOTE: Formerly, it was not possible to override the EEPROM supplied
+ settings of the DC390 with cmd line parameters. This has changed since
+ 2.0e7]
+
+The syntax of the params is much shorter than the syntax of the /proc/...
+interface. This makes it a little bit more difficult to use. However, long
+parameter lines have the risk to be misinterpreted and the length of kernel
+parameters is limited.
+
+As the support for non-DC390 adapters works by simulating the values of the
+DC390 EEPROM, the settings are given in a DC390 BIOS' way.
+
+Here's the syntax:
+tmscsim=AdaptID,SpdIdx,DevMode,AdaptMode,TaggedCmnds,DelayReset
+
+Each of the parameters is a number, containing the described information:
+
+* AdaptID: The SCSI ID of the host adapter. Must be in the range 0..7
+ Default is 7.
+
+* SpdIdx: The index of the maximum speed as in the DC390 BIOS. The values
+ 0..7 mean 10, 8.0, 6.7, 5.7, 5.0, 4.0, 3.1 and 2 MHz resp. Default is
+ 0 (10.0 MHz).
+
+* DevMode is a bit mapped value describing the per-device features. It
+ applies to all devices. (Sync, Disc and TagQ will only apply, if the
+ device supports it.) The meaning of the bits (* = default):
+
+ Bit Val(hex) Val(dec) Meaning
+ *0 0x01 1 Parity check
+ *1 0x02 2 Synchronous Negotiation
+ *2 0x04 4 Disconnection
+ *3 0x08 8 Send Start command on startup. (Not used)
+ *4 0x10 16 Tagged Command Queueing
+
+ As usual, the desired value is obtained by adding the wanted values. If
+ you want to enable all values, e.g., you would use 31(0x1f). Default is 31.
+
+* AdaptMode is a bit mapped value describing the enabled adapter features.
+
+ Bit Val(hex) Val(dec) Meaning
+ *0 0x01 1 Support more than two drives. (Not used)
+ *1 0x02 2 Use DOS compatible mapping for HDs greater than 1GB.
+ *2 0x04 4 Reset SCSI Bus on startup.
+ *3 0x08 8 Active Negation: Improves SCSI Bus noise immunity.
+ 4 0x10 16 Immediate return on BIOS seek command. (Not used)
+ (*)5 0x20 32 Check for LUNs >= 1.
+
+* TaggedCmnds is a number indicating the maximum number of Tagged Commands.
+ It is the binary logarithm - 1 of the actual number. Max is 4 (32).
+ Value Number of Tagged Commands
+ 0 2
+ 1 4
+ 2 8
+ *3 16
+ 4 32
+
+* DelayReset is the time in seconds (minus 0.5s), the adapter waits, after a
+ bus reset. Default is 1 (corresp. to 1.5s).
+
+Example:
+ modprobe tmscsim tmscsim=6,2,31
+would set the adapter ID to 6, max. speed to 6.7 MHz, enable all device
+features and leave the adapter features, the number of Tagged Commands
+and the Delay after a reset to the defaults.
+
+As you can see, you don't need to specify all of the six params.
+If you want values to be ignored (i.e. the EEprom settings or the defaults
+will be used), you may pass -2 (not 0!) at the corresponding position.
+
+The defaults (7,0,31,15,3,1) are aggressive to allow good performance. You
+can use tmscsim=7,0,31,63,4,0 for maximum performance, if your SCSI chain
+allows it. If you meet problems, you can use tmscsim=-1 which is a shortcut
+for tmscsim=7,4,9,15,2,10.
+
+
+6. Potential improvements
+-------------------------
+Most of the intended work on the driver has been done. Here are a few ideas
+to further improve its usability:
+
+* Cleanly separate per-Target and per-LUN properties (DCB)
+* More intelligent abort() routine
+* Use new_eh code (Linux-2.1+)
+* Have the mid-level (ML) code (and not the driver) handle more of the
+ various conditions.
+* Command queueing in the driver: Eliminate Query list and use ML instead.
+* More user friendly boot/module param syntax
+
+Further investigation on these problems:
+
+* Driver hangs with sync readcdda (xcdroast) (most probably VIA PCI error)
+
+Known problems:
+Please see http://www.garloff.de/kurt/linux/dc390/problems.html
+
+* Changing the parameters of multi-lun by the tmscsim/? interface will
+ cause problems, cause these settings are mostly per Target and not per LUN
+ and should be updated accordingly. To be fixed for 2.0d24.
+* CDRs (eg Yam CRW4416) not recognized, because some buggy devices don't
+ recover from a SCSI reset in time. Use a higher delay or don't issue
+ a SCSI bus reset on driver initialization. See problems page.
+ For the CRW4416S, this seems to be solved with firmware 1.0g (reported by
+ Jean-Yves Barbier).
+* TEAC CD-532S not being recognized. (Works with 1.11).
+* Scanners (eg. Astra UMAX 1220S) don't work: Disable Sync Negotiation.
+ If this does not help, try echo "INQUIRY t" >/proc/scsi/tmscsim/? (t
+ replaced by the dev index of your scanner). You may try to reset your SCSI
+ bus afterwards (echo "RESET" >/proc/scsi/tmscsim/?).
+ The problem seems to be solved as of 2.0d18, thanks to Andreas Rick.
+* If there is a valid partition table, the driver will use it for determining
+ the mapping. If there's none, a reasonable mapping (Symbios-like) will be
+ assumed. Other operating systems may not like this mapping, though
+ it's consistent with the BIOS' behaviour. Old DC390 drivers ignored the
+ partition table and used a H/S = 64/32 or 255/63 translation. So if you
+ want to be compatible to those, use this old mapping when creating
+ partition tables. Even worse, on bootup the DC390 might complain if other
+ mappings are found, so auto rebooting may fail.
+* In some situations, the driver will get stuck in an abort loop. This is a
+ bad interaction between the Mid-Layer of Linux' SCSI code and the driver.
+ Try to disable DsCn, if you meet this problem. Please contact me for
+ further debugging.
+
+
+7. Bug reports, debugging and updates
+-------------------------------------
+Whenever you have problems with the driver, you are invited to ask the
+author for help. However, I'd suggest reading the docs and trying to solve
+the problem yourself, first.
+If you find something, which you believe to be a bug, please report it to me.
+Please append the output of /proc/scsi/scsi, /proc/scsi/tmscsim/? and
+maybe the DC390 log messages to the report.
+
+Bug reports should be send to me (Kurt Garloff <dc390@garloff.de>) as well
+as to the linux-scsi list (<linux-scsi@vger.kernel.org>), as sometimes bugs
+are caused by the SCSI mid-level code.
+
+I will ask you for some more details and probably I will also ask you to
+enable some of the DEBUG options in the driver (tmscsim.c:DC390_DEBUGXXX
+defines). The driver will produce some data for the syslog facility then.
+Beware: If your syslog gets written to a SCSI disk connected to your
+AM53C974, the logging might produce log output again, and you might end
+having your box spending most of its time doing the logging.
+
+The latest version of the driver can be found at:
+ http://www.garloff.de/kurt/linux/dc390/
+ ftp://ftp.suse.com/pub/people/garloff/linux/dc390/
+
+
+8. Acknowledgements
+-------------------
+Thanks to Linus Torvalds, Alan Cox, the FSF people, the XFree86 team and
+all the others for the wonderful OS and software.
+Thanks to C.L. Huang and Philip Giang (Tekram) for the initial driver
+release and support.
+Thanks to Doug Ledford, Gérard Roudier for support with SCSI coding.
+Thanks to a lot of people (espec. Chiaki Ishikawa, Andreas Haumer, Hubert
+Tonneau) for intensively testing the driver (and even risking data loss
+doing this during early revisions).
+Recently, SuSE GmbH, Nuernberg, FRG, has been paying me for the driver
+development and maintenance. Special thanks!
+
+
+9. Copyright
+------------
+ This driver is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; version 2 of the License.
+ If you want to use any later version of the GNU GPL, you will probably
+ be allowed to, but you have to ask me and Tekram <erich@tekram.com.tw>
+ before.
+
+-------------------------------------------------------------------------
+Written by Kurt Garloff <kurt@garloff.de> 1998/06/11
+Last updated 2000/11/28, driver revision 2.0e7
+$Id: README.tmscsim,v 2.25.2.7 2000/12/20 01:07:12 garloff Exp $
diff --git a/Documentation/scsi/ufs.txt b/Documentation/scsi/ufs.txt
new file mode 100644
index 000000000..41a616459
--- /dev/null
+++ b/Documentation/scsi/ufs.txt
@@ -0,0 +1,133 @@
+ Universal Flash Storage
+ =======================
+
+
+Contents
+--------
+
+1. Overview
+2. UFS Architecture Overview
+ 2.1 Application Layer
+ 2.2 UFS Transport Protocol(UTP) layer
+ 2.3 UFS Interconnect(UIC) Layer
+3. UFSHCD Overview
+ 3.1 UFS controller initialization
+ 3.2 UTP Transfer requests
+ 3.3 UFS error handling
+ 3.4 SCSI Error handling
+
+
+1. Overview
+-----------
+
+Universal Flash Storage(UFS) is a storage specification for flash devices.
+It is aimed to provide a universal storage interface for both
+embedded and removable flash memory based storage in mobile
+devices such as smart phones and tablet computers. The specification
+is defined by JEDEC Solid State Technology Association. UFS is based
+on MIPI M-PHY physical layer standard. UFS uses MIPI M-PHY as the
+physical layer and MIPI Unipro as the link layer.
+
+The main goals of UFS is to provide,
+ * Optimized performance:
+ For UFS version 1.0 and 1.1 the target performance is as follows,
+ Support for Gear1 is mandatory (rate A: 1248Mbps, rate B: 1457.6Mbps)
+ Support for Gear2 is optional (rate A: 2496Mbps, rate B: 2915.2Mbps)
+ Future version of the standard,
+ Gear3 (rate A: 4992Mbps, rate B: 5830.4Mbps)
+ * Low power consumption
+ * High random IOPs and low latency
+
+
+2. UFS Architecture Overview
+----------------------------
+
+UFS has a layered communication architecture which is based on SCSI
+SAM-5 architectural model.
+
+UFS communication architecture consists of following layers,
+
+2.1 Application Layer
+
+ The Application layer is composed of UFS command set layer(UCS),
+ Task Manager and Device manager. The UFS interface is designed to be
+ protocol agnostic, however SCSI has been selected as a baseline
+ protocol for versions 1.0 and 1.1 of UFS protocol layer.
+ UFS supports subset of SCSI commands defined by SPC-4 and SBC-3.
+ * UCS: It handles SCSI commands supported by UFS specification.
+ * Task manager: It handles task management functions defined by the
+ UFS which are meant for command queue control.
+ * Device manager: It handles device level operations and device
+ configuration operations. Device level operations mainly involve
+ device power management operations and commands to Interconnect
+ layers. Device level configurations involve handling of query
+ requests which are used to modify and retrieve configuration
+ information of the device.
+
+2.2 UFS Transport Protocol(UTP) layer
+
+ UTP layer provides services for
+ the higher layers through Service Access Points. UTP defines 3
+ service access points for higher layers.
+ * UDM_SAP: Device manager service access point is exposed to device
+ manager for device level operations. These device level operations
+ are done through query requests.
+ * UTP_CMD_SAP: Command service access point is exposed to UFS command
+ set layer(UCS) to transport commands.
+ * UTP_TM_SAP: Task management service access point is exposed to task
+ manager to transport task management functions.
+ UTP transports messages through UFS protocol information unit(UPIU).
+
+2.3 UFS Interconnect(UIC) Layer
+
+ UIC is the lowest layer of UFS layered architecture. It handles
+ connection between UFS host and UFS device. UIC consists of
+ MIPI UniPro and MIPI M-PHY. UIC provides 2 service access points
+ to upper layer,
+ * UIC_SAP: To transport UPIU between UFS host and UFS device.
+ * UIO_SAP: To issue commands to Unipro layers.
+
+
+3. UFSHCD Overview
+------------------
+
+The UFS host controller driver is based on Linux SCSI Framework.
+UFSHCD is a low level device driver which acts as an interface between
+SCSI Midlayer and PCIe based UFS host controllers.
+
+The current UFSHCD implementation supports following functionality,
+
+3.1 UFS controller initialization
+
+ The initialization module brings UFS host controller to active state
+ and prepares the controller to transfer commands/response between
+ UFSHCD and UFS device.
+
+3.2 UTP Transfer requests
+
+ Transfer request handling module of UFSHCD receives SCSI commands
+ from SCSI Midlayer, forms UPIUs and issues the UPIUs to UFS Host
+ controller. Also, the module decodes, responses received from UFS
+ host controller in the form of UPIUs and intimates the SCSI Midlayer
+ of the status of the command.
+
+3.3 UFS error handling
+
+ Error handling module handles Host controller fatal errors,
+ Device fatal errors and UIC interconnect layer related errors.
+
+3.4 SCSI Error handling
+
+ This is done through UFSHCD SCSI error handling routines registered
+ with SCSI Midlayer. Examples of some of the error handling commands
+ issues by SCSI Midlayer are Abort task, Lun reset and host reset.
+ UFSHCD Routines to perform these tasks are registered with
+ SCSI Midlayer through .eh_abort_handler, .eh_device_reset_handler and
+ .eh_host_reset_handler.
+
+In this version of UFSHCD Query requests and power management
+functionality are not implemented.
+
+UFS Specifications can be found at,
+UFS - http://www.jedec.org/sites/default/files/docs/JESD220.pdf
+UFSHCI - http://www.jedec.org/sites/default/files/docs/JESD223.pdf
diff --git a/Documentation/scsi/wd719x.txt b/Documentation/scsi/wd719x.txt
new file mode 100644
index 000000000..74b3f011f
--- /dev/null
+++ b/Documentation/scsi/wd719x.txt
@@ -0,0 +1,4 @@
+Driver for Western Digital WD7193, WD7197 and WD7296 SCSI cards
+---------------------------------------------------------------
+
+/*(DEBLOBBED)*/
diff --git a/Documentation/security/00-INDEX b/Documentation/security/00-INDEX
new file mode 100644
index 000000000..45c82fd3e
--- /dev/null
+++ b/Documentation/security/00-INDEX
@@ -0,0 +1,26 @@
+00-INDEX
+ - this file.
+LSM.txt
+ - description of the Linux Security Module framework.
+SELinux.txt
+ - how to get started with the SELinux security enhancement.
+Smack.txt
+ - documentation on the Smack Linux Security Module.
+Yama.txt
+ - documentation on the Yama Linux Security Module.
+apparmor.txt
+ - documentation on the AppArmor security extension.
+credentials.txt
+ - documentation about credentials in Linux.
+keys-ecryptfs.txt
+ - description of the encryption keys for the ecryptfs filesystem.
+keys-request-key.txt
+ - description of the kernel key request service.
+keys-trusted-encrypted.txt
+ - info on the Trusted and Encrypted keys in the kernel key ring service.
+keys.txt
+ - description of the kernel key retention service.
+tomoyo.txt
+ - documentation on the TOMOYO Linux Security Module.
+IMA-templates.txt
+ - documentation on the template management mechanism for IMA.
diff --git a/Documentation/security/IMA-templates.txt b/Documentation/security/IMA-templates.txt
new file mode 100644
index 000000000..839b5dad9
--- /dev/null
+++ b/Documentation/security/IMA-templates.txt
@@ -0,0 +1,88 @@
+ IMA Template Management Mechanism
+
+
+==== INTRODUCTION ====
+
+The original 'ima' template is fixed length, containing the filedata hash
+and pathname. The filedata hash is limited to 20 bytes (md5/sha1).
+The pathname is a null terminated string, limited to 255 characters.
+To overcome these limitations and to add additional file metadata, it is
+necessary to extend the current version of IMA by defining additional
+templates. For example, information that could be possibly reported are
+the inode UID/GID or the LSM labels either of the inode and of the process
+that is accessing it.
+
+However, the main problem to introduce this feature is that, each time
+a new template is defined, the functions that generate and display
+the measurements list would include the code for handling a new format
+and, thus, would significantly grow over the time.
+
+The proposed solution solves this problem by separating the template
+management from the remaining IMA code. The core of this solution is the
+definition of two new data structures: a template descriptor, to determine
+which information should be included in the measurement list; a template
+field, to generate and display data of a given type.
+
+Managing templates with these structures is very simple. To support
+a new data type, developers define the field identifier and implement
+two functions, init() and show(), respectively to generate and display
+measurement entries. Defining a new template descriptor requires
+specifying the template format (a string of field identifiers separated
+by the '|' character) through the 'ima_template_fmt' kernel command line
+parameter. At boot time, IMA initializes the chosen template descriptor
+by translating the format into an array of template fields structures taken
+from the set of the supported ones.
+
+After the initialization step, IMA will call ima_alloc_init_template()
+(new function defined within the patches for the new template management
+mechanism) to generate a new measurement entry by using the template
+descriptor chosen through the kernel configuration or through the newly
+introduced 'ima_template' and 'ima_template_fmt' kernel command line parameters.
+It is during this phase that the advantages of the new architecture are
+clearly shown: the latter function will not contain specific code to handle
+a given template but, instead, it simply calls the init() method of the template
+fields associated to the chosen template descriptor and store the result
+(pointer to allocated data and data length) in the measurement entry structure.
+
+The same mechanism is employed to display measurements entries.
+The functions ima[_ascii]_measurements_show() retrieve, for each entry,
+the template descriptor used to produce that entry and call the show()
+method for each item of the array of template fields structures.
+
+
+
+==== SUPPORTED TEMPLATE FIELDS AND DESCRIPTORS ====
+
+In the following, there is the list of supported template fields
+('<identifier>': description), that can be used to define new template
+descriptors by adding their identifier to the format string
+(support for more data types will be added later):
+
+ - 'd': the digest of the event (i.e. the digest of a measured file),
+ calculated with the SHA1 or MD5 hash algorithm;
+ - 'n': the name of the event (i.e. the file name), with size up to 255 bytes;
+ - 'd-ng': the digest of the event, calculated with an arbitrary hash
+ algorithm (field format: [<hash algo>:]digest, where the digest
+ prefix is shown only if the hash algorithm is not SHA1 or MD5);
+ - 'n-ng': the name of the event, without size limitations;
+ - 'sig': the file signature.
+
+
+Below, there is the list of defined template descriptors:
+ - "ima": its format is 'd|n';
+ - "ima-ng" (default): its format is 'd-ng|n-ng';
+ - "ima-sig": its format is 'd-ng|n-ng|sig'.
+
+
+
+==== USE ====
+
+To specify the template descriptor to be used to generate measurement entries,
+currently the following methods are supported:
+
+ - select a template descriptor among those supported in the kernel
+ configuration ('ima-ng' is the default choice);
+ - specify a template descriptor name from the kernel command line through
+ the 'ima_template=' parameter;
+ - register a new template descriptor with custom format through the kernel
+ command line parameter 'ima_template_fmt='.
diff --git a/Documentation/security/LSM.txt b/Documentation/security/LSM.txt
new file mode 100644
index 000000000..3db7e671c
--- /dev/null
+++ b/Documentation/security/LSM.txt
@@ -0,0 +1,34 @@
+Linux Security Module framework
+-------------------------------
+
+The Linux Security Module (LSM) framework provides a mechanism for
+various security checks to be hooked by new kernel extensions. The name
+"module" is a bit of a misnomer since these extensions are not actually
+loadable kernel modules. Instead, they are selectable at build-time via
+CONFIG_DEFAULT_SECURITY and can be overridden at boot-time via the
+"security=..." kernel command line argument, in the case where multiple
+LSMs were built into a given kernel.
+
+The primary users of the LSM interface are Mandatory Access Control
+(MAC) extensions which provide a comprehensive security policy. Examples
+include SELinux, Smack, Tomoyo, and AppArmor. In addition to the larger
+MAC extensions, other extensions can be built using the LSM to provide
+specific changes to system operation when these tweaks are not available
+in the core functionality of Linux itself.
+
+Without a specific LSM built into the kernel, the default LSM will be the
+Linux capabilities system. Most LSMs choose to extend the capabilities
+system, building their checks on top of the defined capability hooks.
+For more details on capabilities, see capabilities(7) in the Linux
+man-pages project.
+
+Based on https://lkml.org/lkml/2007/10/26/215,
+a new LSM is accepted into the kernel when its intent (a description of
+what it tries to protect against and in what cases one would expect to
+use it) has been appropriately documented in Documentation/security/.
+This allows an LSM's code to be easily compared to its goals, and so
+that end users and distros can make a more informed decision about which
+LSMs suit their requirements.
+
+For extensive documentation on the available LSM hook interfaces, please
+see include/linux/security.h.
diff --git a/Documentation/security/SELinux.txt b/Documentation/security/SELinux.txt
new file mode 100644
index 000000000..07eae00f3
--- /dev/null
+++ b/Documentation/security/SELinux.txt
@@ -0,0 +1,27 @@
+If you want to use SELinux, chances are you will want
+to use the distro-provided policies, or install the
+latest reference policy release from
+ http://oss.tresys.com/projects/refpolicy
+
+However, if you want to install a dummy policy for
+testing, you can do using 'mdp' provided under
+scripts/selinux. Note that this requires the selinux
+userspace to be installed - in particular you will
+need checkpolicy to compile a kernel, and setfiles and
+fixfiles to label the filesystem.
+
+ 1. Compile the kernel with selinux enabled.
+ 2. Type 'make' to compile mdp.
+ 3. Make sure that you are not running with
+ SELinux enabled and a real policy. If
+ you are, reboot with selinux disabled
+ before continuing.
+ 4. Run install_policy.sh:
+ cd scripts/selinux
+ sh install_policy.sh
+
+Step 4 will create a new dummy policy valid for your
+kernel, with a single selinux user, role, and type.
+It will compile the policy, will set your SELINUXTYPE to
+dummy in /etc/selinux/config, install the compiled policy
+as 'dummy', and relabel your filesystem.
diff --git a/Documentation/security/Smack.txt b/Documentation/security/Smack.txt
new file mode 100644
index 000000000..abc82f852
--- /dev/null
+++ b/Documentation/security/Smack.txt
@@ -0,0 +1,717 @@
+
+
+ "Good for you, you've decided to clean the elevator!"
+ - The Elevator, from Dark Star
+
+Smack is the Simplified Mandatory Access Control Kernel.
+Smack is a kernel based implementation of mandatory access
+control that includes simplicity in its primary design goals.
+
+Smack is not the only Mandatory Access Control scheme
+available for Linux. Those new to Mandatory Access Control
+are encouraged to compare Smack with the other mechanisms
+available to determine which is best suited to the problem
+at hand.
+
+Smack consists of three major components:
+ - The kernel
+ - Basic utilities, which are helpful but not required
+ - Configuration data
+
+The kernel component of Smack is implemented as a Linux
+Security Modules (LSM) module. It requires netlabel and
+works best with file systems that support extended attributes,
+although xattr support is not strictly required.
+It is safe to run a Smack kernel under a "vanilla" distribution.
+
+Smack kernels use the CIPSO IP option. Some network
+configurations are intolerant of IP options and can impede
+access to systems that use them as Smack does.
+
+The current git repository for Smack user space is:
+
+ git://github.com/smack-team/smack.git
+
+This should make and install on most modern distributions.
+There are five commands included in smackutil:
+
+chsmack - display or set Smack extended attribute values
+smackctl - load the Smack access rules
+smackaccess - report if a process with one label has access
+ to an object with another
+
+These two commands are obsolete with the introduction of
+the smackfs/load2 and smackfs/cipso2 interfaces.
+
+smackload - properly formats data for writing to smackfs/load
+smackcipso - properly formats data for writing to smackfs/cipso
+
+In keeping with the intent of Smack, configuration data is
+minimal and not strictly required. The most important
+configuration step is mounting the smackfs pseudo filesystem.
+If smackutil is installed the startup script will take care
+of this, but it can be manually as well.
+
+Add this line to /etc/fstab:
+
+ smackfs /sys/fs/smackfs smackfs defaults 0 0
+
+The /sys/fs/smackfs directory is created by the kernel.
+
+Smack uses extended attributes (xattrs) to store labels on filesystem
+objects. The attributes are stored in the extended attribute security
+name space. A process must have CAP_MAC_ADMIN to change any of these
+attributes.
+
+The extended attributes that Smack uses are:
+
+SMACK64
+ Used to make access control decisions. In almost all cases
+ the label given to a new filesystem object will be the label
+ of the process that created it.
+SMACK64EXEC
+ The Smack label of a process that execs a program file with
+ this attribute set will run with this attribute's value.
+SMACK64MMAP
+ Don't allow the file to be mmapped by a process whose Smack
+ label does not allow all of the access permitted to a process
+ with the label contained in this attribute. This is a very
+ specific use case for shared libraries.
+SMACK64TRANSMUTE
+ Can only have the value "TRUE". If this attribute is present
+ on a directory when an object is created in the directory and
+ the Smack rule (more below) that permitted the write access
+ to the directory includes the transmute ("t") mode the object
+ gets the label of the directory instead of the label of the
+ creating process. If the object being created is a directory
+ the SMACK64TRANSMUTE attribute is set as well.
+SMACK64IPIN
+ This attribute is only available on file descriptors for sockets.
+ Use the Smack label in this attribute for access control
+ decisions on packets being delivered to this socket.
+SMACK64IPOUT
+ This attribute is only available on file descriptors for sockets.
+ Use the Smack label in this attribute for access control
+ decisions on packets coming from this socket.
+
+There are multiple ways to set a Smack label on a file:
+
+ # attr -S -s SMACK64 -V "value" path
+ # chsmack -a value path
+
+A process can see the Smack label it is running with by
+reading /proc/self/attr/current. A process with CAP_MAC_ADMIN
+can set the process Smack by writing there.
+
+Most Smack configuration is accomplished by writing to files
+in the smackfs filesystem. This pseudo-filesystem is mounted
+on /sys/fs/smackfs.
+
+access
+ This interface reports whether a subject with the specified
+ Smack label has a particular access to an object with a
+ specified Smack label. Write a fixed format access rule to
+ this file. The next read will indicate whether the access
+ would be permitted. The text will be either "1" indicating
+ access, or "0" indicating denial.
+access2
+ This interface reports whether a subject with the specified
+ Smack label has a particular access to an object with a
+ specified Smack label. Write a long format access rule to
+ this file. The next read will indicate whether the access
+ would be permitted. The text will be either "1" indicating
+ access, or "0" indicating denial.
+ambient
+ This contains the Smack label applied to unlabeled network
+ packets.
+change-rule
+ This interface allows modification of existing access control rules.
+ The format accepted on write is:
+ "%s %s %s %s"
+ where the first string is the subject label, the second the
+ object label, the third the access to allow and the fourth the
+ access to deny. The access strings may contain only the characters
+ "rwxat-". If a rule for a given subject and object exists it will be
+ modified by enabling the permissions in the third string and disabling
+ those in the fourth string. If there is no such rule it will be
+ created using the access specified in the third and the fourth strings.
+cipso
+ This interface allows a specific CIPSO header to be assigned
+ to a Smack label. The format accepted on write is:
+ "%24s%4d%4d"["%4d"]...
+ The first string is a fixed Smack label. The first number is
+ the level to use. The second number is the number of categories.
+ The following numbers are the categories.
+ "level-3-cats-5-19 3 2 5 19"
+cipso2
+ This interface allows a specific CIPSO header to be assigned
+ to a Smack label. The format accepted on write is:
+ "%s%4d%4d"["%4d"]...
+ The first string is a long Smack label. The first number is
+ the level to use. The second number is the number of categories.
+ The following numbers are the categories.
+ "level-3-cats-5-19 3 2 5 19"
+direct
+ This contains the CIPSO level used for Smack direct label
+ representation in network packets.
+doi
+ This contains the CIPSO domain of interpretation used in
+ network packets.
+load
+ This interface allows access control rules in addition to
+ the system defined rules to be specified. The format accepted
+ on write is:
+ "%24s%24s%5s"
+ where the first string is the subject label, the second the
+ object label, and the third the requested access. The access
+ string may contain only the characters "rwxat-", and specifies
+ which sort of access is allowed. The "-" is a placeholder for
+ permissions that are not allowed. The string "r-x--" would
+ specify read and execute access. Labels are limited to 23
+ characters in length.
+load2
+ This interface allows access control rules in addition to
+ the system defined rules to be specified. The format accepted
+ on write is:
+ "%s %s %s"
+ where the first string is the subject label, the second the
+ object label, and the third the requested access. The access
+ string may contain only the characters "rwxat-", and specifies
+ which sort of access is allowed. The "-" is a placeholder for
+ permissions that are not allowed. The string "r-x--" would
+ specify read and execute access.
+load-self
+ This interface allows process specific access rules to be
+ defined. These rules are only consulted if access would
+ otherwise be permitted, and are intended to provide additional
+ restrictions on the process. The format is the same as for
+ the load interface.
+load-self2
+ This interface allows process specific access rules to be
+ defined. These rules are only consulted if access would
+ otherwise be permitted, and are intended to provide additional
+ restrictions on the process. The format is the same as for
+ the load2 interface.
+logging
+ This contains the Smack logging state.
+mapped
+ This contains the CIPSO level used for Smack mapped label
+ representation in network packets.
+netlabel
+ This interface allows specific internet addresses to be
+ treated as single label hosts. Packets are sent to single
+ label hosts without CIPSO headers, but only from processes
+ that have Smack write access to the host label. All packets
+ received from single label hosts are given the specified
+ label. The format accepted on write is:
+ "%d.%d.%d.%d label" or "%d.%d.%d.%d/%d label".
+onlycap
+ This contains the label processes must have for CAP_MAC_ADMIN
+ and CAP_MAC_OVERRIDE to be effective. If this file is empty
+ these capabilities are effective at for processes with any
+ label. The value is set by writing the desired label to the
+ file or cleared by writing "-" to the file.
+ptrace
+ This is used to define the current ptrace policy
+ 0 - default: this is the policy that relies on Smack access rules.
+ For the PTRACE_READ a subject needs to have a read access on
+ object. For the PTRACE_ATTACH a read-write access is required.
+ 1 - exact: this is the policy that limits PTRACE_ATTACH. Attach is
+ only allowed when subject's and object's labels are equal.
+ PTRACE_READ is not affected. Can be overridden with CAP_SYS_PTRACE.
+ 2 - draconian: this policy behaves like the 'exact' above with an
+ exception that it can't be overridden with CAP_SYS_PTRACE.
+revoke-subject
+ Writing a Smack label here sets the access to '-' for all access
+ rules with that subject label.
+unconfined
+ If the kernel is configured with CONFIG_SECURITY_SMACK_BRINGUP
+ a process with CAP_MAC_ADMIN can write a label into this interface.
+ Thereafter, accesses that involve that label will be logged and
+ the access permitted if it wouldn't be otherwise. Note that this
+ is dangerous and can ruin the proper labeling of your system.
+ It should never be used in production.
+
+You can add access rules in /etc/smack/accesses. They take the form:
+
+ subjectlabel objectlabel access
+
+access is a combination of the letters rwxatb which specify the
+kind of access permitted a subject with subjectlabel on an
+object with objectlabel. If there is no rule no access is allowed.
+
+Look for additional programs on http://schaufler-ca.com
+
+From the Smack Whitepaper:
+
+The Simplified Mandatory Access Control Kernel
+
+Casey Schaufler
+casey@schaufler-ca.com
+
+Mandatory Access Control
+
+Computer systems employ a variety of schemes to constrain how information is
+shared among the people and services using the machine. Some of these schemes
+allow the program or user to decide what other programs or users are allowed
+access to pieces of data. These schemes are called discretionary access
+control mechanisms because the access control is specified at the discretion
+of the user. Other schemes do not leave the decision regarding what a user or
+program can access up to users or programs. These schemes are called mandatory
+access control mechanisms because you don't have a choice regarding the users
+or programs that have access to pieces of data.
+
+Bell & LaPadula
+
+From the middle of the 1980's until the turn of the century Mandatory Access
+Control (MAC) was very closely associated with the Bell & LaPadula security
+model, a mathematical description of the United States Department of Defense
+policy for marking paper documents. MAC in this form enjoyed a following
+within the Capital Beltway and Scandinavian supercomputer centers but was
+often sited as failing to address general needs.
+
+Domain Type Enforcement
+
+Around the turn of the century Domain Type Enforcement (DTE) became popular.
+This scheme organizes users, programs, and data into domains that are
+protected from each other. This scheme has been widely deployed as a component
+of popular Linux distributions. The administrative overhead required to
+maintain this scheme and the detailed understanding of the whole system
+necessary to provide a secure domain mapping leads to the scheme being
+disabled or used in limited ways in the majority of cases.
+
+Smack
+
+Smack is a Mandatory Access Control mechanism designed to provide useful MAC
+while avoiding the pitfalls of its predecessors. The limitations of Bell &
+LaPadula are addressed by providing a scheme whereby access can be controlled
+according to the requirements of the system and its purpose rather than those
+imposed by an arcane government policy. The complexity of Domain Type
+Enforcement and avoided by defining access controls in terms of the access
+modes already in use.
+
+Smack Terminology
+
+The jargon used to talk about Smack will be familiar to those who have dealt
+with other MAC systems and shouldn't be too difficult for the uninitiated to
+pick up. There are four terms that are used in a specific way and that are
+especially important:
+
+ Subject: A subject is an active entity on the computer system.
+ On Smack a subject is a task, which is in turn the basic unit
+ of execution.
+
+ Object: An object is a passive entity on the computer system.
+ On Smack files of all types, IPC, and tasks can be objects.
+
+ Access: Any attempt by a subject to put information into or get
+ information from an object is an access.
+
+ Label: Data that identifies the Mandatory Access Control
+ characteristics of a subject or an object.
+
+These definitions are consistent with the traditional use in the security
+community. There are also some terms from Linux that are likely to crop up:
+
+ Capability: A task that possesses a capability has permission to
+ violate an aspect of the system security policy, as identified by
+ the specific capability. A task that possesses one or more
+ capabilities is a privileged task, whereas a task with no
+ capabilities is an unprivileged task.
+
+ Privilege: A task that is allowed to violate the system security
+ policy is said to have privilege. As of this writing a task can
+ have privilege either by possessing capabilities or by having an
+ effective user of root.
+
+Smack Basics
+
+Smack is an extension to a Linux system. It enforces additional restrictions
+on what subjects can access which objects, based on the labels attached to
+each of the subject and the object.
+
+Labels
+
+Smack labels are ASCII character strings. They can be up to 255 characters
+long, but keeping them to twenty-three characters is recommended.
+Single character labels using special characters, that being anything
+other than a letter or digit, are reserved for use by the Smack development
+team. Smack labels are unstructured, case sensitive, and the only operation
+ever performed on them is comparison for equality. Smack labels cannot
+contain unprintable characters, the "/" (slash), the "\" (backslash), the "'"
+(quote) and '"' (double-quote) characters.
+Smack labels cannot begin with a '-'. This is reserved for special options.
+
+There are some predefined labels:
+
+ _ Pronounced "floor", a single underscore character.
+ ^ Pronounced "hat", a single circumflex character.
+ * Pronounced "star", a single asterisk character.
+ ? Pronounced "huh", a single question mark character.
+ @ Pronounced "web", a single at sign character.
+
+Every task on a Smack system is assigned a label. The Smack label
+of a process will usually be assigned by the system initialization
+mechanism.
+
+Access Rules
+
+Smack uses the traditional access modes of Linux. These modes are read,
+execute, write, and occasionally append. There are a few cases where the
+access mode may not be obvious. These include:
+
+ Signals: A signal is a write operation from the subject task to
+ the object task.
+ Internet Domain IPC: Transmission of a packet is considered a
+ write operation from the source task to the destination task.
+
+Smack restricts access based on the label attached to a subject and the label
+attached to the object it is trying to access. The rules enforced are, in
+order:
+
+ 1. Any access requested by a task labeled "*" is denied.
+ 2. A read or execute access requested by a task labeled "^"
+ is permitted.
+ 3. A read or execute access requested on an object labeled "_"
+ is permitted.
+ 4. Any access requested on an object labeled "*" is permitted.
+ 5. Any access requested by a task on an object with the same
+ label is permitted.
+ 6. Any access requested that is explicitly defined in the loaded
+ rule set is permitted.
+ 7. Any other access is denied.
+
+Smack Access Rules
+
+With the isolation provided by Smack access separation is simple. There are
+many interesting cases where limited access by subjects to objects with
+different labels is desired. One example is the familiar spy model of
+sensitivity, where a scientist working on a highly classified project would be
+able to read documents of lower classifications and anything she writes will
+be "born" highly classified. To accommodate such schemes Smack includes a
+mechanism for specifying rules allowing access between labels.
+
+Access Rule Format
+
+The format of an access rule is:
+
+ subject-label object-label access
+
+Where subject-label is the Smack label of the task, object-label is the Smack
+label of the thing being accessed, and access is a string specifying the sort
+of access allowed. The access specification is searched for letters that
+describe access modes:
+
+ a: indicates that append access should be granted.
+ r: indicates that read access should be granted.
+ w: indicates that write access should be granted.
+ x: indicates that execute access should be granted.
+ t: indicates that the rule requests transmutation.
+ b: indicates that the rule should be reported for bring-up.
+
+Uppercase values for the specification letters are allowed as well.
+Access mode specifications can be in any order. Examples of acceptable rules
+are:
+
+ TopSecret Secret rx
+ Secret Unclass R
+ Manager Game x
+ User HR w
+ Snap Crackle rwxatb
+ New Old rRrRr
+ Closed Off -
+
+Examples of unacceptable rules are:
+
+ Top Secret Secret rx
+ Ace Ace r
+ Odd spells waxbeans
+
+Spaces are not allowed in labels. Since a subject always has access to files
+with the same label specifying a rule for that case is pointless. Only
+valid letters (rwxatbRWXATB) and the dash ('-') character are allowed in
+access specifications. The dash is a placeholder, so "a-r" is the same
+as "ar". A lone dash is used to specify that no access should be allowed.
+
+Applying Access Rules
+
+The developers of Linux rarely define new sorts of things, usually importing
+schemes and concepts from other systems. Most often, the other systems are
+variants of Unix. Unix has many endearing properties, but consistency of
+access control models is not one of them. Smack strives to treat accesses as
+uniformly as is sensible while keeping with the spirit of the underlying
+mechanism.
+
+File system objects including files, directories, named pipes, symbolic links,
+and devices require access permissions that closely match those used by mode
+bit access. To open a file for reading read access is required on the file. To
+search a directory requires execute access. Creating a file with write access
+requires both read and write access on the containing directory. Deleting a
+file requires read and write access to the file and to the containing
+directory. It is possible that a user may be able to see that a file exists
+but not any of its attributes by the circumstance of having read access to the
+containing directory but not to the differently labeled file. This is an
+artifact of the file name being data in the directory, not a part of the file.
+
+If a directory is marked as transmuting (SMACK64TRANSMUTE=TRUE) and the
+access rule that allows a process to create an object in that directory
+includes 't' access the label assigned to the new object will be that
+of the directory, not the creating process. This makes it much easier
+for two processes with different labels to share data without granting
+access to all of their files.
+
+IPC objects, message queues, semaphore sets, and memory segments exist in flat
+namespaces and access requests are only required to match the object in
+question.
+
+Process objects reflect tasks on the system and the Smack label used to access
+them is the same Smack label that the task would use for its own access
+attempts. Sending a signal via the kill() system call is a write operation
+from the signaler to the recipient. Debugging a process requires both reading
+and writing. Creating a new task is an internal operation that results in two
+tasks with identical Smack labels and requires no access checks.
+
+Sockets are data structures attached to processes and sending a packet from
+one process to another requires that the sender have write access to the
+receiver. The receiver is not required to have read access to the sender.
+
+Setting Access Rules
+
+The configuration file /etc/smack/accesses contains the rules to be set at
+system startup. The contents are written to the special file
+/sys/fs/smackfs/load2. Rules can be added at any time and take effect
+immediately. For any pair of subject and object labels there can be only
+one rule, with the most recently specified overriding any earlier
+specification.
+
+Task Attribute
+
+The Smack label of a process can be read from /proc/<pid>/attr/current. A
+process can read its own Smack label from /proc/self/attr/current. A
+privileged process can change its own Smack label by writing to
+/proc/self/attr/current but not the label of another process.
+
+File Attribute
+
+The Smack label of a filesystem object is stored as an extended attribute
+named SMACK64 on the file. This attribute is in the security namespace. It can
+only be changed by a process with privilege.
+
+Privilege
+
+A process with CAP_MAC_OVERRIDE or CAP_MAC_ADMIN is privileged.
+CAP_MAC_OVERRIDE allows the process access to objects it would
+be denied otherwise. CAP_MAC_ADMIN allows a process to change
+Smack data, including rules and attributes.
+
+Smack Networking
+
+As mentioned before, Smack enforces access control on network protocol
+transmissions. Every packet sent by a Smack process is tagged with its Smack
+label. This is done by adding a CIPSO tag to the header of the IP packet. Each
+packet received is expected to have a CIPSO tag that identifies the label and
+if it lacks such a tag the network ambient label is assumed. Before the packet
+is delivered a check is made to determine that a subject with the label on the
+packet has write access to the receiving process and if that is not the case
+the packet is dropped.
+
+CIPSO Configuration
+
+It is normally unnecessary to specify the CIPSO configuration. The default
+values used by the system handle all internal cases. Smack will compose CIPSO
+label values to match the Smack labels being used without administrative
+intervention. Unlabeled packets that come into the system will be given the
+ambient label.
+
+Smack requires configuration in the case where packets from a system that is
+not Smack that speaks CIPSO may be encountered. Usually this will be a Trusted
+Solaris system, but there are other, less widely deployed systems out there.
+CIPSO provides 3 important values, a Domain Of Interpretation (DOI), a level,
+and a category set with each packet. The DOI is intended to identify a group
+of systems that use compatible labeling schemes, and the DOI specified on the
+Smack system must match that of the remote system or packets will be
+discarded. The DOI is 3 by default. The value can be read from
+/sys/fs/smackfs/doi and can be changed by writing to /sys/fs/smackfs/doi.
+
+The label and category set are mapped to a Smack label as defined in
+/etc/smack/cipso.
+
+A Smack/CIPSO mapping has the form:
+
+ smack level [category [category]*]
+
+Smack does not expect the level or category sets to be related in any
+particular way and does not assume or assign accesses based on them. Some
+examples of mappings:
+
+ TopSecret 7
+ TS:A,B 7 1 2
+ SecBDE 5 2 4 6
+ RAFTERS 7 12 26
+
+The ":" and "," characters are permitted in a Smack label but have no special
+meaning.
+
+The mapping of Smack labels to CIPSO values is defined by writing to
+/sys/fs/smackfs/cipso2.
+
+In addition to explicit mappings Smack supports direct CIPSO mappings. One
+CIPSO level is used to indicate that the category set passed in the packet is
+in fact an encoding of the Smack label. The level used is 250 by default. The
+value can be read from /sys/fs/smackfs/direct and changed by writing to
+/sys/fs/smackfs/direct.
+
+Socket Attributes
+
+There are two attributes that are associated with sockets. These attributes
+can only be set by privileged tasks, but any task can read them for their own
+sockets.
+
+ SMACK64IPIN: The Smack label of the task object. A privileged
+ program that will enforce policy may set this to the star label.
+
+ SMACK64IPOUT: The Smack label transmitted with outgoing packets.
+ A privileged program may set this to match the label of another
+ task with which it hopes to communicate.
+
+Smack Netlabel Exceptions
+
+You will often find that your labeled application has to talk to the outside,
+unlabeled world. To do this there's a special file /sys/fs/smackfs/netlabel
+where you can add some exceptions in the form of :
+@IP1 LABEL1 or
+@IP2/MASK LABEL2
+
+It means that your application will have unlabeled access to @IP1 if it has
+write access on LABEL1, and access to the subnet @IP2/MASK if it has write
+access on LABEL2.
+
+Entries in the /sys/fs/smackfs/netlabel file are matched by longest mask
+first, like in classless IPv4 routing.
+
+A special label '@' and an option '-CIPSO' can be used there :
+@ means Internet, any application with any label has access to it
+-CIPSO means standard CIPSO networking
+
+If you don't know what CIPSO is and don't plan to use it, you can just do :
+echo 127.0.0.1 -CIPSO > /sys/fs/smackfs/netlabel
+echo 0.0.0.0/0 @ > /sys/fs/smackfs/netlabel
+
+If you use CIPSO on your 192.168.0.0/16 local network and need also unlabeled
+Internet access, you can have :
+echo 127.0.0.1 -CIPSO > /sys/fs/smackfs/netlabel
+echo 192.168.0.0/16 -CIPSO > /sys/fs/smackfs/netlabel
+echo 0.0.0.0/0 @ > /sys/fs/smackfs/netlabel
+
+
+Writing Applications for Smack
+
+There are three sorts of applications that will run on a Smack system. How an
+application interacts with Smack will determine what it will have to do to
+work properly under Smack.
+
+Smack Ignorant Applications
+
+By far the majority of applications have no reason whatever to care about the
+unique properties of Smack. Since invoking a program has no impact on the
+Smack label associated with the process the only concern likely to arise is
+whether the process has execute access to the program.
+
+Smack Relevant Applications
+
+Some programs can be improved by teaching them about Smack, but do not make
+any security decisions themselves. The utility ls(1) is one example of such a
+program.
+
+Smack Enforcing Applications
+
+These are special programs that not only know about Smack, but participate in
+the enforcement of system policy. In most cases these are the programs that
+set up user sessions. There are also network services that provide information
+to processes running with various labels.
+
+File System Interfaces
+
+Smack maintains labels on file system objects using extended attributes. The
+Smack label of a file, directory, or other file system object can be obtained
+using getxattr(2).
+
+ len = getxattr("/", "security.SMACK64", value, sizeof (value));
+
+will put the Smack label of the root directory into value. A privileged
+process can set the Smack label of a file system object with setxattr(2).
+
+ len = strlen("Rubble");
+ rc = setxattr("/foo", "security.SMACK64", "Rubble", len, 0);
+
+will set the Smack label of /foo to "Rubble" if the program has appropriate
+privilege.
+
+Socket Interfaces
+
+The socket attributes can be read using fgetxattr(2).
+
+A privileged process can set the Smack label of outgoing packets with
+fsetxattr(2).
+
+ len = strlen("Rubble");
+ rc = fsetxattr(fd, "security.SMACK64IPOUT", "Rubble", len, 0);
+
+will set the Smack label "Rubble" on packets going out from the socket if the
+program has appropriate privilege.
+
+ rc = fsetxattr(fd, "security.SMACK64IPIN, "*", strlen("*"), 0);
+
+will set the Smack label "*" as the object label against which incoming
+packets will be checked if the program has appropriate privilege.
+
+Administration
+
+Smack supports some mount options:
+
+ smackfsdef=label: specifies the label to give files that lack
+ the Smack label extended attribute.
+
+ smackfsroot=label: specifies the label to assign the root of the
+ file system if it lacks the Smack extended attribute.
+
+ smackfshat=label: specifies a label that must have read access to
+ all labels set on the filesystem. Not yet enforced.
+
+ smackfsfloor=label: specifies a label to which all labels set on the
+ filesystem must have read access. Not yet enforced.
+
+These mount options apply to all file system types.
+
+Smack auditing
+
+If you want Smack auditing of security events, you need to set CONFIG_AUDIT
+in your kernel configuration.
+By default, all denied events will be audited. You can change this behavior by
+writing a single character to the /sys/fs/smackfs/logging file :
+0 : no logging
+1 : log denied (default)
+2 : log accepted
+3 : log denied & accepted
+
+Events are logged as 'key=value' pairs, for each event you at least will get
+the subject, the object, the rights requested, the action, the kernel function
+that triggered the event, plus other pairs depending on the type of event
+audited.
+
+Bringup Mode
+
+Bringup mode provides logging features that can make application
+configuration and system bringup easier. Configure the kernel with
+CONFIG_SECURITY_SMACK_BRINGUP to enable these features. When bringup
+mode is enabled accesses that succeed due to rules marked with the "b"
+access mode will logged. When a new label is introduced for processes
+rules can be added aggressively, marked with the "b". The logging allows
+tracking of which rules actual get used for that label.
+
+Another feature of bringup mode is the "unconfined" option. Writing
+a label to /sys/fs/smackfs/unconfined makes subjects with that label
+able to access any object, and objects with that label accessible to
+all subjects. Any access that is granted because a label is unconfined
+is logged. This feature is dangerous, as files and directories may
+be created in places they couldn't if the policy were being enforced.
diff --git a/Documentation/security/Yama.txt b/Documentation/security/Yama.txt
new file mode 100644
index 000000000..227a63f01
--- /dev/null
+++ b/Documentation/security/Yama.txt
@@ -0,0 +1,73 @@
+Yama is a Linux Security Module that collects a number of system-wide DAC
+security protections that are not handled by the core kernel itself. To
+select it at boot time, specify "security=yama" (though this will disable
+any other LSM).
+
+Yama is controlled through sysctl in /proc/sys/kernel/yama:
+
+- ptrace_scope
+
+==============================================================
+
+ptrace_scope:
+
+As Linux grows in popularity, it will become a larger target for
+malware. One particularly troubling weakness of the Linux process
+interfaces is that a single user is able to examine the memory and
+running state of any of their processes. For example, if one application
+(e.g. Pidgin) was compromised, it would be possible for an attacker to
+attach to other running processes (e.g. Firefox, SSH sessions, GPG agent,
+etc) to extract additional credentials and continue to expand the scope
+of their attack without resorting to user-assisted phishing.
+
+This is not a theoretical problem. SSH session hijacking
+(http://www.storm.net.nz/projects/7) and arbitrary code injection
+(http://c-skills.blogspot.com/2007/05/injectso.html) attacks already
+exist and remain possible if ptrace is allowed to operate as before.
+Since ptrace is not commonly used by non-developers and non-admins, system
+builders should be allowed the option to disable this debugging system.
+
+For a solution, some applications use prctl(PR_SET_DUMPABLE, ...) to
+specifically disallow such ptrace attachment (e.g. ssh-agent), but many
+do not. A more general solution is to only allow ptrace directly from a
+parent to a child process (i.e. direct "gdb EXE" and "strace EXE" still
+work), or with CAP_SYS_PTRACE (i.e. "gdb --pid=PID", and "strace -p PID"
+still work as root).
+
+In mode 1, software that has defined application-specific relationships
+between a debugging process and its inferior (crash handlers, etc),
+prctl(PR_SET_PTRACER, pid, ...) can be used. An inferior can declare which
+other process (and its descendants) are allowed to call PTRACE_ATTACH
+against it. Only one such declared debugging process can exists for
+each inferior at a time. For example, this is used by KDE, Chromium, and
+Firefox's crash handlers, and by Wine for allowing only Wine processes
+to ptrace each other. If a process wishes to entirely disable these ptrace
+restrictions, it can call prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, ...)
+so that any otherwise allowed process (even those in external pid namespaces)
+may attach.
+
+The sysctl settings (writable only with CAP_SYS_PTRACE) are:
+
+0 - classic ptrace permissions: a process can PTRACE_ATTACH to any other
+ process running under the same uid, as long as it is dumpable (i.e.
+ did not transition uids, start privileged, or have called
+ prctl(PR_SET_DUMPABLE...) already). Similarly, PTRACE_TRACEME is
+ unchanged.
+
+1 - restricted ptrace: a process must have a predefined relationship
+ with the inferior it wants to call PTRACE_ATTACH on. By default,
+ this relationship is that of only its descendants when the above
+ classic criteria is also met. To change the relationship, an
+ inferior can call prctl(PR_SET_PTRACER, debugger, ...) to declare
+ an allowed debugger PID to call PTRACE_ATTACH on the inferior.
+ Using PTRACE_TRACEME is unchanged.
+
+2 - admin-only attach: only processes with CAP_SYS_PTRACE may use ptrace
+ with PTRACE_ATTACH, or through children calling PTRACE_TRACEME.
+
+3 - no attach: no processes may use ptrace with PTRACE_ATTACH nor via
+ PTRACE_TRACEME. Once set, this sysctl value cannot be changed.
+
+The original children-only logic was based on the restrictions in grsecurity.
+
+==============================================================
diff --git a/Documentation/security/apparmor.txt b/Documentation/security/apparmor.txt
new file mode 100644
index 000000000..93c1fd7d0
--- /dev/null
+++ b/Documentation/security/apparmor.txt
@@ -0,0 +1,39 @@
+--- What is AppArmor? ---
+
+AppArmor is MAC style security extension for the Linux kernel. It implements
+a task centered policy, with task "profiles" being created and loaded
+from user space. Tasks on the system that do not have a profile defined for
+them run in an unconfined state which is equivalent to standard Linux DAC
+permissions.
+
+--- How to enable/disable ---
+
+set CONFIG_SECURITY_APPARMOR=y
+
+If AppArmor should be selected as the default security module then
+ set CONFIG_DEFAULT_SECURITY="apparmor"
+ and CONFIG_SECURITY_APPARMOR_BOOTPARAM_VALUE=1
+
+Build the kernel
+
+If AppArmor is not the default security module it can be enabled by passing
+security=apparmor on the kernel's command line.
+
+If AppArmor is the default security module it can be disabled by passing
+apparmor=0, security=XXXX (where XXX is valid security module), on the
+kernel's command line
+
+For AppArmor to enforce any restrictions beyond standard Linux DAC permissions
+policy must be loaded into the kernel from user space (see the Documentation
+and tools links).
+
+--- Documentation ---
+
+Documentation can be found on the wiki.
+
+--- Links ---
+
+Mailing List - apparmor@lists.ubuntu.com
+Wiki - http://apparmor.wiki.kernel.org/
+User space tools - https://launchpad.net/apparmor
+Kernel module - git://git.kernel.org/pub/scm/linux/kernel/git/jj/apparmor-dev.git
diff --git a/Documentation/security/credentials.txt b/Documentation/security/credentials.txt
new file mode 100644
index 000000000..86257052e
--- /dev/null
+++ b/Documentation/security/credentials.txt
@@ -0,0 +1,581 @@
+ ====================
+ CREDENTIALS IN LINUX
+ ====================
+
+By: David Howells <dhowells@redhat.com>
+
+Contents:
+
+ (*) Overview.
+
+ (*) Types of credentials.
+
+ (*) File markings.
+
+ (*) Task credentials.
+
+ - Immutable credentials.
+ - Accessing task credentials.
+ - Accessing another task's credentials.
+ - Altering credentials.
+ - Managing credentials.
+
+ (*) Open file credentials.
+
+ (*) Overriding the VFS's use of credentials.
+
+
+========
+OVERVIEW
+========
+
+There are several parts to the security check performed by Linux when one
+object acts upon another:
+
+ (1) Objects.
+
+ Objects are things in the system that may be acted upon directly by
+ userspace programs. Linux has a variety of actionable objects, including:
+
+ - Tasks
+ - Files/inodes
+ - Sockets
+ - Message queues
+ - Shared memory segments
+ - Semaphores
+ - Keys
+
+ As a part of the description of all these objects there is a set of
+ credentials. What's in the set depends on the type of object.
+
+ (2) Object ownership.
+
+ Amongst the credentials of most objects, there will be a subset that
+ indicates the ownership of that object. This is used for resource
+ accounting and limitation (disk quotas and task rlimits for example).
+
+ In a standard UNIX filesystem, for instance, this will be defined by the
+ UID marked on the inode.
+
+ (3) The objective context.
+
+ Also amongst the credentials of those objects, there will be a subset that
+ indicates the 'objective context' of that object. This may or may not be
+ the same set as in (2) - in standard UNIX files, for instance, this is the
+ defined by the UID and the GID marked on the inode.
+
+ The objective context is used as part of the security calculation that is
+ carried out when an object is acted upon.
+
+ (4) Subjects.
+
+ A subject is an object that is acting upon another object.
+
+ Most of the objects in the system are inactive: they don't act on other
+ objects within the system. Processes/tasks are the obvious exception:
+ they do stuff; they access and manipulate things.
+
+ Objects other than tasks may under some circumstances also be subjects.
+ For instance an open file may send SIGIO to a task using the UID and EUID
+ given to it by a task that called fcntl(F_SETOWN) upon it. In this case,
+ the file struct will have a subjective context too.
+
+ (5) The subjective context.
+
+ A subject has an additional interpretation of its credentials. A subset
+ of its credentials forms the 'subjective context'. The subjective context
+ is used as part of the security calculation that is carried out when a
+ subject acts.
+
+ A Linux task, for example, has the FSUID, FSGID and the supplementary
+ group list for when it is acting upon a file - which are quite separate
+ from the real UID and GID that normally form the objective context of the
+ task.
+
+ (6) Actions.
+
+ Linux has a number of actions available that a subject may perform upon an
+ object. The set of actions available depends on the nature of the subject
+ and the object.
+
+ Actions include reading, writing, creating and deleting files; forking or
+ signalling and tracing tasks.
+
+ (7) Rules, access control lists and security calculations.
+
+ When a subject acts upon an object, a security calculation is made. This
+ involves taking the subjective context, the objective context and the
+ action, and searching one or more sets of rules to see whether the subject
+ is granted or denied permission to act in the desired manner on the
+ object, given those contexts.
+
+ There are two main sources of rules:
+
+ (a) Discretionary access control (DAC):
+
+ Sometimes the object will include sets of rules as part of its
+ description. This is an 'Access Control List' or 'ACL'. A Linux
+ file may supply more than one ACL.
+
+ A traditional UNIX file, for example, includes a permissions mask that
+ is an abbreviated ACL with three fixed classes of subject ('user',
+ 'group' and 'other'), each of which may be granted certain privileges
+ ('read', 'write' and 'execute' - whatever those map to for the object
+ in question). UNIX file permissions do not allow the arbitrary
+ specification of subjects, however, and so are of limited use.
+
+ A Linux file might also sport a POSIX ACL. This is a list of rules
+ that grants various permissions to arbitrary subjects.
+
+ (b) Mandatory access control (MAC):
+
+ The system as a whole may have one or more sets of rules that get
+ applied to all subjects and objects, regardless of their source.
+ SELinux and Smack are examples of this.
+
+ In the case of SELinux and Smack, each object is given a label as part
+ of its credentials. When an action is requested, they take the
+ subject label, the object label and the action and look for a rule
+ that says that this action is either granted or denied.
+
+
+====================
+TYPES OF CREDENTIALS
+====================
+
+The Linux kernel supports the following types of credentials:
+
+ (1) Traditional UNIX credentials.
+
+ Real User ID
+ Real Group ID
+
+ The UID and GID are carried by most, if not all, Linux objects, even if in
+ some cases it has to be invented (FAT or CIFS files for example, which are
+ derived from Windows). These (mostly) define the objective context of
+ that object, with tasks being slightly different in some cases.
+
+ Effective, Saved and FS User ID
+ Effective, Saved and FS Group ID
+ Supplementary groups
+
+ These are additional credentials used by tasks only. Usually, an
+ EUID/EGID/GROUPS will be used as the subjective context, and real UID/GID
+ will be used as the objective. For tasks, it should be noted that this is
+ not always true.
+
+ (2) Capabilities.
+
+ Set of permitted capabilities
+ Set of inheritable capabilities
+ Set of effective capabilities
+ Capability bounding set
+
+ These are only carried by tasks. They indicate superior capabilities
+ granted piecemeal to a task that an ordinary task wouldn't otherwise have.
+ These are manipulated implicitly by changes to the traditional UNIX
+ credentials, but can also be manipulated directly by the capset() system
+ call.
+
+ The permitted capabilities are those caps that the process might grant
+ itself to its effective or permitted sets through capset(). This
+ inheritable set might also be so constrained.
+
+ The effective capabilities are the ones that a task is actually allowed to
+ make use of itself.
+
+ The inheritable capabilities are the ones that may get passed across
+ execve().
+
+ The bounding set limits the capabilities that may be inherited across
+ execve(), especially when a binary is executed that will execute as UID 0.
+
+ (3) Secure management flags (securebits).
+
+ These are only carried by tasks. These govern the way the above
+ credentials are manipulated and inherited over certain operations such as
+ execve(). They aren't used directly as objective or subjective
+ credentials.
+
+ (4) Keys and keyrings.
+
+ These are only carried by tasks. They carry and cache security tokens
+ that don't fit into the other standard UNIX credentials. They are for
+ making such things as network filesystem keys available to the file
+ accesses performed by processes, without the necessity of ordinary
+ programs having to know about security details involved.
+
+ Keyrings are a special type of key. They carry sets of other keys and can
+ be searched for the desired key. Each process may subscribe to a number
+ of keyrings:
+
+ Per-thread keying
+ Per-process keyring
+ Per-session keyring
+
+ When a process accesses a key, if not already present, it will normally be
+ cached on one of these keyrings for future accesses to find.
+
+ For more information on using keys, see Documentation/security/keys.txt.
+
+ (5) LSM
+
+ The Linux Security Module allows extra controls to be placed over the
+ operations that a task may do. Currently Linux supports several LSM
+ options.
+
+ Some work by labelling the objects in a system and then applying sets of
+ rules (policies) that say what operations a task with one label may do to
+ an object with another label.
+
+ (6) AF_KEY
+
+ This is a socket-based approach to credential management for networking
+ stacks [RFC 2367]. It isn't discussed by this document as it doesn't
+ interact directly with task and file credentials; rather it keeps system
+ level credentials.
+
+
+When a file is opened, part of the opening task's subjective context is
+recorded in the file struct created. This allows operations using that file
+struct to use those credentials instead of the subjective context of the task
+that issued the operation. An example of this would be a file opened on a
+network filesystem where the credentials of the opened file should be presented
+to the server, regardless of who is actually doing a read or a write upon it.
+
+
+=============
+FILE MARKINGS
+=============
+
+Files on disk or obtained over the network may have annotations that form the
+objective security context of that file. Depending on the type of filesystem,
+this may include one or more of the following:
+
+ (*) UNIX UID, GID, mode;
+
+ (*) Windows user ID;
+
+ (*) Access control list;
+
+ (*) LSM security label;
+
+ (*) UNIX exec privilege escalation bits (SUID/SGID);
+
+ (*) File capabilities exec privilege escalation bits.
+
+These are compared to the task's subjective security context, and certain
+operations allowed or disallowed as a result. In the case of execve(), the
+privilege escalation bits come into play, and may allow the resulting process
+extra privileges, based on the annotations on the executable file.
+
+
+================
+TASK CREDENTIALS
+================
+
+In Linux, all of a task's credentials are held in (uid, gid) or through
+(groups, keys, LSM security) a refcounted structure of type 'struct cred'.
+Each task points to its credentials by a pointer called 'cred' in its
+task_struct.
+
+Once a set of credentials has been prepared and committed, it may not be
+changed, barring the following exceptions:
+
+ (1) its reference count may be changed;
+
+ (2) the reference count on the group_info struct it points to may be changed;
+
+ (3) the reference count on the security data it points to may be changed;
+
+ (4) the reference count on any keyrings it points to may be changed;
+
+ (5) any keyrings it points to may be revoked, expired or have their security
+ attributes changed; and
+
+ (6) the contents of any keyrings to which it points may be changed (the whole
+ point of keyrings being a shared set of credentials, modifiable by anyone
+ with appropriate access).
+
+To alter anything in the cred struct, the copy-and-replace principle must be
+adhered to. First take a copy, then alter the copy and then use RCU to change
+the task pointer to make it point to the new copy. There are wrappers to aid
+with this (see below).
+
+A task may only alter its _own_ credentials; it is no longer permitted for a
+task to alter another's credentials. This means the capset() system call is no
+longer permitted to take any PID other than the one of the current process.
+Also keyctl_instantiate() and keyctl_negate() functions no longer permit
+attachment to process-specific keyrings in the requesting process as the
+instantiating process may need to create them.
+
+
+IMMUTABLE CREDENTIALS
+---------------------
+
+Once a set of credentials has been made public (by calling commit_creds() for
+example), it must be considered immutable, barring two exceptions:
+
+ (1) The reference count may be altered.
+
+ (2) Whilst the keyring subscriptions of a set of credentials may not be
+ changed, the keyrings subscribed to may have their contents altered.
+
+To catch accidental credential alteration at compile time, struct task_struct
+has _const_ pointers to its credential sets, as does struct file. Furthermore,
+certain functions such as get_cred() and put_cred() operate on const pointers,
+thus rendering casts unnecessary, but require to temporarily ditch the const
+qualification to be able to alter the reference count.
+
+
+ACCESSING TASK CREDENTIALS
+--------------------------
+
+A task being able to alter only its own credentials permits the current process
+to read or replace its own credentials without the need for any form of locking
+- which simplifies things greatly. It can just call:
+
+ const struct cred *current_cred()
+
+to get a pointer to its credentials structure, and it doesn't have to release
+it afterwards.
+
+There are convenience wrappers for retrieving specific aspects of a task's
+credentials (the value is simply returned in each case):
+
+ uid_t current_uid(void) Current's real UID
+ gid_t current_gid(void) Current's real GID
+ uid_t current_euid(void) Current's effective UID
+ gid_t current_egid(void) Current's effective GID
+ uid_t current_fsuid(void) Current's file access UID
+ gid_t current_fsgid(void) Current's file access GID
+ kernel_cap_t current_cap(void) Current's effective capabilities
+ void *current_security(void) Current's LSM security pointer
+ struct user_struct *current_user(void) Current's user account
+
+There are also convenience wrappers for retrieving specific associated pairs of
+a task's credentials:
+
+ void current_uid_gid(uid_t *, gid_t *);
+ void current_euid_egid(uid_t *, gid_t *);
+ void current_fsuid_fsgid(uid_t *, gid_t *);
+
+which return these pairs of values through their arguments after retrieving
+them from the current task's credentials.
+
+
+In addition, there is a function for obtaining a reference on the current
+process's current set of credentials:
+
+ const struct cred *get_current_cred(void);
+
+and functions for getting references to one of the credentials that don't
+actually live in struct cred:
+
+ struct user_struct *get_current_user(void);
+ struct group_info *get_current_groups(void);
+
+which get references to the current process's user accounting structure and
+supplementary groups list respectively.
+
+Once a reference has been obtained, it must be released with put_cred(),
+free_uid() or put_group_info() as appropriate.
+
+
+ACCESSING ANOTHER TASK'S CREDENTIALS
+------------------------------------
+
+Whilst a task may access its own credentials without the need for locking, the
+same is not true of a task wanting to access another task's credentials. It
+must use the RCU read lock and rcu_dereference().
+
+The rcu_dereference() is wrapped by:
+
+ const struct cred *__task_cred(struct task_struct *task);
+
+This should be used inside the RCU read lock, as in the following example:
+
+ void foo(struct task_struct *t, struct foo_data *f)
+ {
+ const struct cred *tcred;
+ ...
+ rcu_read_lock();
+ tcred = __task_cred(t);
+ f->uid = tcred->uid;
+ f->gid = tcred->gid;
+ f->groups = get_group_info(tcred->groups);
+ rcu_read_unlock();
+ ...
+ }
+
+Should it be necessary to hold another task's credentials for a long period of
+time, and possibly to sleep whilst doing so, then the caller should get a
+reference on them using:
+
+ const struct cred *get_task_cred(struct task_struct *task);
+
+This does all the RCU magic inside of it. The caller must call put_cred() on
+the credentials so obtained when they're finished with.
+
+ [*] Note: The result of __task_cred() should not be passed directly to
+ get_cred() as this may race with commit_cred().
+
+There are a couple of convenience functions to access bits of another task's
+credentials, hiding the RCU magic from the caller:
+
+ uid_t task_uid(task) Task's real UID
+ uid_t task_euid(task) Task's effective UID
+
+If the caller is holding the RCU read lock at the time anyway, then:
+
+ __task_cred(task)->uid
+ __task_cred(task)->euid
+
+should be used instead. Similarly, if multiple aspects of a task's credentials
+need to be accessed, RCU read lock should be used, __task_cred() called, the
+result stored in a temporary pointer and then the credential aspects called
+from that before dropping the lock. This prevents the potentially expensive
+RCU magic from being invoked multiple times.
+
+Should some other single aspect of another task's credentials need to be
+accessed, then this can be used:
+
+ task_cred_xxx(task, member)
+
+where 'member' is a non-pointer member of the cred struct. For instance:
+
+ uid_t task_cred_xxx(task, suid);
+
+will retrieve 'struct cred::suid' from the task, doing the appropriate RCU
+magic. This may not be used for pointer members as what they point to may
+disappear the moment the RCU read lock is dropped.
+
+
+ALTERING CREDENTIALS
+--------------------
+
+As previously mentioned, a task may only alter its own credentials, and may not
+alter those of another task. This means that it doesn't need to use any
+locking to alter its own credentials.
+
+To alter the current process's credentials, a function should first prepare a
+new set of credentials by calling:
+
+ struct cred *prepare_creds(void);
+
+this locks current->cred_replace_mutex and then allocates and constructs a
+duplicate of the current process's credentials, returning with the mutex still
+held if successful. It returns NULL if not successful (out of memory).
+
+The mutex prevents ptrace() from altering the ptrace state of a process whilst
+security checks on credentials construction and changing is taking place as
+the ptrace state may alter the outcome, particularly in the case of execve().
+
+The new credentials set should be altered appropriately, and any security
+checks and hooks done. Both the current and the proposed sets of credentials
+are available for this purpose as current_cred() will return the current set
+still at this point.
+
+
+When the credential set is ready, it should be committed to the current process
+by calling:
+
+ int commit_creds(struct cred *new);
+
+This will alter various aspects of the credentials and the process, giving the
+LSM a chance to do likewise, then it will use rcu_assign_pointer() to actually
+commit the new credentials to current->cred, it will release
+current->cred_replace_mutex to allow ptrace() to take place, and it will notify
+the scheduler and others of the changes.
+
+This function is guaranteed to return 0, so that it can be tail-called at the
+end of such functions as sys_setresuid().
+
+Note that this function consumes the caller's reference to the new credentials.
+The caller should _not_ call put_cred() on the new credentials afterwards.
+
+Furthermore, once this function has been called on a new set of credentials,
+those credentials may _not_ be changed further.
+
+
+Should the security checks fail or some other error occur after prepare_creds()
+has been called, then the following function should be invoked:
+
+ void abort_creds(struct cred *new);
+
+This releases the lock on current->cred_replace_mutex that prepare_creds() got
+and then releases the new credentials.
+
+
+A typical credentials alteration function would look something like this:
+
+ int alter_suid(uid_t suid)
+ {
+ struct cred *new;
+ int ret;
+
+ new = prepare_creds();
+ if (!new)
+ return -ENOMEM;
+
+ new->suid = suid;
+ ret = security_alter_suid(new);
+ if (ret < 0) {
+ abort_creds(new);
+ return ret;
+ }
+
+ return commit_creds(new);
+ }
+
+
+MANAGING CREDENTIALS
+--------------------
+
+There are some functions to help manage credentials:
+
+ (*) void put_cred(const struct cred *cred);
+
+ This releases a reference to the given set of credentials. If the
+ reference count reaches zero, the credentials will be scheduled for
+ destruction by the RCU system.
+
+ (*) const struct cred *get_cred(const struct cred *cred);
+
+ This gets a reference on a live set of credentials, returning a pointer to
+ that set of credentials.
+
+ (*) struct cred *get_new_cred(struct cred *cred);
+
+ This gets a reference on a set of credentials that is under construction
+ and is thus still mutable, returning a pointer to that set of credentials.
+
+
+=====================
+OPEN FILE CREDENTIALS
+=====================
+
+When a new file is opened, a reference is obtained on the opening task's
+credentials and this is attached to the file struct as 'f_cred' in place of
+'f_uid' and 'f_gid'. Code that used to access file->f_uid and file->f_gid
+should now access file->f_cred->fsuid and file->f_cred->fsgid.
+
+It is safe to access f_cred without the use of RCU or locking because the
+pointer will not change over the lifetime of the file struct, and nor will the
+contents of the cred struct pointed to, barring the exceptions listed above
+(see the Task Credentials section).
+
+
+=======================================
+OVERRIDING THE VFS'S USE OF CREDENTIALS
+=======================================
+
+Under some circumstances it is desirable to override the credentials used by
+the VFS, and that can be done by calling into such as vfs_mkdir() with a
+different set of credentials. This is done in the following places:
+
+ (*) sys_faccessat().
+
+ (*) do_coredump().
+
+ (*) nfs4recover.c.
diff --git a/Documentation/security/keys-ecryptfs.txt b/Documentation/security/keys-ecryptfs.txt
new file mode 100644
index 000000000..c3bbeba63
--- /dev/null
+++ b/Documentation/security/keys-ecryptfs.txt
@@ -0,0 +1,68 @@
+ Encrypted keys for the eCryptfs filesystem
+
+ECryptfs is a stacked filesystem which transparently encrypts and decrypts each
+file using a randomly generated File Encryption Key (FEK).
+
+Each FEK is in turn encrypted with a File Encryption Key Encryption Key (FEFEK)
+either in kernel space or in user space with a daemon called 'ecryptfsd'. In
+the former case the operation is performed directly by the kernel CryptoAPI
+using a key, the FEFEK, derived from a user prompted passphrase; in the latter
+the FEK is encrypted by 'ecryptfsd' with the help of external libraries in order
+to support other mechanisms like public key cryptography, PKCS#11 and TPM based
+operations.
+
+The data structure defined by eCryptfs to contain information required for the
+FEK decryption is called authentication token and, currently, can be stored in a
+kernel key of the 'user' type, inserted in the user's session specific keyring
+by the userspace utility 'mount.ecryptfs' shipped with the package
+'ecryptfs-utils'.
+
+The 'encrypted' key type has been extended with the introduction of the new
+format 'ecryptfs' in order to be used in conjunction with the eCryptfs
+filesystem. Encrypted keys of the newly introduced format store an
+authentication token in its payload with a FEFEK randomly generated by the
+kernel and protected by the parent master key.
+
+In order to avoid known-plaintext attacks, the datablob obtained through
+commands 'keyctl print' or 'keyctl pipe' does not contain the overall
+authentication token, which content is well known, but only the FEFEK in
+encrypted form.
+
+The eCryptfs filesystem may really benefit from using encrypted keys in that the
+required key can be securely generated by an Administrator and provided at boot
+time after the unsealing of a 'trusted' key in order to perform the mount in a
+controlled environment. Another advantage is that the key is not exposed to
+threats of malicious software, because it is available in clear form only at
+kernel level.
+
+Usage:
+ keyctl add encrypted name "new ecryptfs key-type:master-key-name keylen" ring
+ keyctl add encrypted name "load hex_blob" ring
+ keyctl update keyid "update key-type:master-key-name"
+
+name:= '<16 hexadecimal characters>'
+key-type:= 'trusted' | 'user'
+keylen:= 64
+
+
+Example of encrypted key usage with the eCryptfs filesystem:
+
+Create an encrypted key "1000100010001000" of length 64 bytes with format
+'ecryptfs' and save it using a previously loaded user key "test":
+
+ $ keyctl add encrypted 1000100010001000 "new ecryptfs user:test 64" @u
+ 19184530
+
+ $ keyctl print 19184530
+ ecryptfs user:test 64 490045d4bfe48c99f0d465fbbbb79e7500da954178e2de0697
+ dd85091f5450a0511219e9f7cd70dcd498038181466f78ac8d4c19504fcc72402bfc41c2
+ f253a41b7507ccaa4b2b03fff19a69d1cc0b16e71746473f023a95488b6edfd86f7fdd40
+ 9d292e4bacded1258880122dd553a661
+
+ $ keyctl pipe 19184530 > ecryptfs.blob
+
+Mount an eCryptfs filesystem using the created encrypted key "1000100010001000"
+into the '/secret' directory:
+
+ $ mount -i -t ecryptfs -oecryptfs_sig=1000100010001000,\
+ ecryptfs_cipher=aes,ecryptfs_key_bytes=32 /secret /secret
diff --git a/Documentation/security/keys-request-key.txt b/Documentation/security/keys-request-key.txt
new file mode 100644
index 000000000..51987bfec
--- /dev/null
+++ b/Documentation/security/keys-request-key.txt
@@ -0,0 +1,202 @@
+ ===================
+ KEY REQUEST SERVICE
+ ===================
+
+The key request service is part of the key retention service (refer to
+Documentation/security/keys.txt). This document explains more fully how
+the requesting algorithm works.
+
+The process starts by either the kernel requesting a service by calling
+request_key*():
+
+ struct key *request_key(const struct key_type *type,
+ const char *description,
+ const char *callout_info);
+
+or:
+
+ struct key *request_key_with_auxdata(const struct key_type *type,
+ const char *description,
+ const char *callout_info,
+ size_t callout_len,
+ void *aux);
+
+or:
+
+ struct key *request_key_async(const struct key_type *type,
+ const char *description,
+ const char *callout_info,
+ size_t callout_len);
+
+or:
+
+ struct key *request_key_async_with_auxdata(const struct key_type *type,
+ const char *description,
+ const char *callout_info,
+ size_t callout_len,
+ void *aux);
+
+Or by userspace invoking the request_key system call:
+
+ key_serial_t request_key(const char *type,
+ const char *description,
+ const char *callout_info,
+ key_serial_t dest_keyring);
+
+The main difference between the access points is that the in-kernel interface
+does not need to link the key to a keyring to prevent it from being immediately
+destroyed. The kernel interface returns a pointer directly to the key, and
+it's up to the caller to destroy the key.
+
+The request_key*_with_auxdata() calls are like the in-kernel request_key*()
+calls, except that they permit auxiliary data to be passed to the upcaller (the
+default is NULL). This is only useful for those key types that define their
+own upcall mechanism rather than using /sbin/request-key.
+
+The two async in-kernel calls may return keys that are still in the process of
+being constructed. The two non-async ones will wait for construction to
+complete first.
+
+The userspace interface links the key to a keyring associated with the process
+to prevent the key from going away, and returns the serial number of the key to
+the caller.
+
+
+The following example assumes that the key types involved don't define their
+own upcall mechanisms. If they do, then those should be substituted for the
+forking and execution of /sbin/request-key.
+
+
+===========
+THE PROCESS
+===========
+
+A request proceeds in the following manner:
+
+ (1) Process A calls request_key() [the userspace syscall calls the kernel
+ interface].
+
+ (2) request_key() searches the process's subscribed keyrings to see if there's
+ a suitable key there. If there is, it returns the key. If there isn't,
+ and callout_info is not set, an error is returned. Otherwise the process
+ proceeds to the next step.
+
+ (3) request_key() sees that A doesn't have the desired key yet, so it creates
+ two things:
+
+ (a) An uninstantiated key U of requested type and description.
+
+ (b) An authorisation key V that refers to key U and notes that process A
+ is the context in which key U should be instantiated and secured, and
+ from which associated key requests may be satisfied.
+
+ (4) request_key() then forks and executes /sbin/request-key with a new session
+ keyring that contains a link to auth key V.
+
+ (5) /sbin/request-key assumes the authority associated with key U.
+
+ (6) /sbin/request-key execs an appropriate program to perform the actual
+ instantiation.
+
+ (7) The program may want to access another key from A's context (say a
+ Kerberos TGT key). It just requests the appropriate key, and the keyring
+ search notes that the session keyring has auth key V in its bottom level.
+
+ This will permit it to then search the keyrings of process A with the
+ UID, GID, groups and security info of process A as if it was process A,
+ and come up with key W.
+
+ (8) The program then does what it must to get the data with which to
+ instantiate key U, using key W as a reference (perhaps it contacts a
+ Kerberos server using the TGT) and then instantiates key U.
+
+ (9) Upon instantiating key U, auth key V is automatically revoked so that it
+ may not be used again.
+
+(10) The program then exits 0 and request_key() deletes key V and returns key
+ U to the caller.
+
+This also extends further. If key W (step 7 above) didn't exist, key W would
+be created uninstantiated, another auth key (X) would be created (as per step
+3) and another copy of /sbin/request-key spawned (as per step 4); but the
+context specified by auth key X will still be process A, as it was in auth key
+V.
+
+This is because process A's keyrings can't simply be attached to
+/sbin/request-key at the appropriate places because (a) execve will discard two
+of them, and (b) it requires the same UID/GID/Groups all the way through.
+
+
+====================================
+NEGATIVE INSTANTIATION AND REJECTION
+====================================
+
+Rather than instantiating a key, it is possible for the possessor of an
+authorisation key to negatively instantiate a key that's under construction.
+This is a short duration placeholder that causes any attempt at re-requesting
+the key whilst it exists to fail with error ENOKEY if negated or the specified
+error if rejected.
+
+This is provided to prevent excessive repeated spawning of /sbin/request-key
+processes for a key that will never be obtainable.
+
+Should the /sbin/request-key process exit anything other than 0 or die on a
+signal, the key under construction will be automatically negatively
+instantiated for a short amount of time.
+
+
+====================
+THE SEARCH ALGORITHM
+====================
+
+A search of any particular keyring proceeds in the following fashion:
+
+ (1) When the key management code searches for a key (keyring_search_aux) it
+ firstly calls key_permission(SEARCH) on the keyring it's starting with,
+ if this denies permission, it doesn't search further.
+
+ (2) It considers all the non-keyring keys within that keyring and, if any key
+ matches the criteria specified, calls key_permission(SEARCH) on it to see
+ if the key is allowed to be found. If it is, that key is returned; if
+ not, the search continues, and the error code is retained if of higher
+ priority than the one currently set.
+
+ (3) It then considers all the keyring-type keys in the keyring it's currently
+ searching. It calls key_permission(SEARCH) on each keyring, and if this
+ grants permission, it recurses, executing steps (2) and (3) on that
+ keyring.
+
+The process stops immediately a valid key is found with permission granted to
+use it. Any error from a previous match attempt is discarded and the key is
+returned.
+
+When search_process_keyrings() is invoked, it performs the following searches
+until one succeeds:
+
+ (1) If extant, the process's thread keyring is searched.
+
+ (2) If extant, the process's process keyring is searched.
+
+ (3) The process's session keyring is searched.
+
+ (4) If the process has assumed the authority associated with a request_key()
+ authorisation key then:
+
+ (a) If extant, the calling process's thread keyring is searched.
+
+ (b) If extant, the calling process's process keyring is searched.
+
+ (c) The calling process's session keyring is searched.
+
+The moment one succeeds, all pending errors are discarded and the found key is
+returned.
+
+Only if all these fail does the whole thing fail with the highest priority
+error. Note that several errors may have come from LSM.
+
+The error priority is:
+
+ EKEYREVOKED > EKEYEXPIRED > ENOKEY
+
+EACCES/EPERM are only returned on a direct search of a specific keyring where
+the basal keyring does not grant Search permission.
diff --git a/Documentation/security/keys-trusted-encrypted.txt b/Documentation/security/keys-trusted-encrypted.txt
new file mode 100644
index 000000000..e105ae97a
--- /dev/null
+++ b/Documentation/security/keys-trusted-encrypted.txt
@@ -0,0 +1,160 @@
+ Trusted and Encrypted Keys
+
+Trusted and Encrypted Keys are two new key types added to the existing kernel
+key ring service. Both of these new types are variable length symmetric keys,
+and in both cases all keys are created in the kernel, and user space sees,
+stores, and loads only encrypted blobs. Trusted Keys require the availability
+of a Trusted Platform Module (TPM) chip for greater security, while Encrypted
+Keys can be used on any system. All user level blobs, are displayed and loaded
+in hex ascii for convenience, and are integrity verified.
+
+Trusted Keys use a TPM both to generate and to seal the keys. Keys are sealed
+under a 2048 bit RSA key in the TPM, and optionally sealed to specified PCR
+(integrity measurement) values, and only unsealed by the TPM, if PCRs and blob
+integrity verifications match. A loaded Trusted Key can be updated with new
+(future) PCR values, so keys are easily migrated to new pcr values, such as
+when the kernel and initramfs are updated. The same key can have many saved
+blobs under different PCR values, so multiple boots are easily supported.
+
+By default, trusted keys are sealed under the SRK, which has the default
+authorization value (20 zeros). This can be set at takeownership time with the
+trouser's utility: "tpm_takeownership -u -z".
+
+Usage:
+ keyctl add trusted name "new keylen [options]" ring
+ keyctl add trusted name "load hex_blob [pcrlock=pcrnum]" ring
+ keyctl update key "update [options]"
+ keyctl print keyid
+
+ options:
+ keyhandle= ascii hex value of sealing key default 0x40000000 (SRK)
+ keyauth= ascii hex auth for sealing key default 0x00...i
+ (40 ascii zeros)
+ blobauth= ascii hex auth for sealed data default 0x00...
+ (40 ascii zeros)
+ blobauth= ascii hex auth for sealed data default 0x00...
+ (40 ascii zeros)
+ pcrinfo= ascii hex of PCR_INFO or PCR_INFO_LONG (no default)
+ pcrlock= pcr number to be extended to "lock" blob
+ migratable= 0|1 indicating permission to reseal to new PCR values,
+ default 1 (resealing allowed)
+
+"keyctl print" returns an ascii hex copy of the sealed key, which is in standard
+TPM_STORED_DATA format. The key length for new keys are always in bytes.
+Trusted Keys can be 32 - 128 bytes (256 - 1024 bits), the upper limit is to fit
+within the 2048 bit SRK (RSA) keylength, with all necessary structure/padding.
+
+Encrypted keys do not depend on a TPM, and are faster, as they use AES for
+encryption/decryption. New keys are created from kernel generated random
+numbers, and are encrypted/decrypted using a specified 'master' key. The
+'master' key can either be a trusted-key or user-key type. The main
+disadvantage of encrypted keys is that if they are not rooted in a trusted key,
+they are only as secure as the user key encrypting them. The master user key
+should therefore be loaded in as secure a way as possible, preferably early in
+boot.
+
+The decrypted portion of encrypted keys can contain either a simple symmetric
+key or a more complex structure. The format of the more complex structure is
+application specific, which is identified by 'format'.
+
+Usage:
+ keyctl add encrypted name "new [format] key-type:master-key-name keylen"
+ ring
+ keyctl add encrypted name "load hex_blob" ring
+ keyctl update keyid "update key-type:master-key-name"
+
+format:= 'default | ecryptfs'
+key-type:= 'trusted' | 'user'
+
+
+Examples of trusted and encrypted key usage:
+
+Create and save a trusted key named "kmk" of length 32 bytes:
+
+ $ keyctl add trusted kmk "new 32" @u
+ 440502848
+
+ $ keyctl show
+ Session Keyring
+ -3 --alswrv 500 500 keyring: _ses
+ 97833714 --alswrv 500 -1 \_ keyring: _uid.500
+ 440502848 --alswrv 500 500 \_ trusted: kmk
+
+ $ keyctl print 440502848
+ 0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
+ 3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
+ 27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
+ a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
+ d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
+ dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
+ f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
+ e4a8aea2b607ec96931e6f4d4fe563ba
+
+ $ keyctl pipe 440502848 > kmk.blob
+
+Load a trusted key from the saved blob:
+
+ $ keyctl add trusted kmk "load `cat kmk.blob`" @u
+ 268728824
+
+ $ keyctl print 268728824
+ 0101000000000000000001005d01b7e3f4a6be5709930f3b70a743cbb42e0cc95e18e915
+ 3f60da455bbf1144ad12e4f92b452f966929f6105fd29ca28e4d4d5a031d068478bacb0b
+ 27351119f822911b0a11ba3d3498ba6a32e50dac7f32894dd890eb9ad578e4e292c83722
+ a52e56a097e6a68b3f56f7a52ece0cdccba1eb62cad7d817f6dc58898b3ac15f36026fec
+ d568bd4a706cb60bb37be6d8f1240661199d640b66fb0fe3b079f97f450b9ef9c22c6d5d
+ dd379f0facd1cd020281dfa3c70ba21a3fa6fc2471dc6d13ecf8298b946f65345faa5ef0
+ f1f8fff03ad0acb083725535636addb08d73dedb9832da198081e5deae84bfaf0409c22b
+ e4a8aea2b607ec96931e6f4d4fe563ba
+
+Reseal a trusted key under new pcr values:
+
+ $ keyctl update 268728824 "update pcrinfo=`cat pcr.blob`"
+ $ keyctl print 268728824
+ 010100000000002c0002800093c35a09b70fff26e7a98ae786c641e678ec6ffb6b46d805
+ 77c8a6377aed9d3219c6dfec4b23ffe3000001005d37d472ac8a44023fbb3d18583a4f73
+ d3a076c0858f6f1dcaa39ea0f119911ff03f5406df4f7f27f41da8d7194f45c9f4e00f2e
+ df449f266253aa3f52e55c53de147773e00f0f9aca86c64d94c95382265968c354c5eab4
+ 9638c5ae99c89de1e0997242edfb0b501744e11ff9762dfd951cffd93227cc513384e7e6
+ e782c29435c7ec2edafaa2f4c1fe6e7a781b59549ff5296371b42133777dcc5b8b971610
+ 94bc67ede19e43ddb9dc2baacad374a36feaf0314d700af0a65c164b7082401740e489c9
+ 7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef
+ df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8
+
+The initial consumer of trusted keys is EVM, which at boot time needs a high
+quality symmetric key for HMAC protection of file metadata. The use of a
+trusted key provides strong guarantees that the EVM key has not been
+compromised by a user level problem, and when sealed to specific boot PCR
+values, protects against boot and offline attacks. Create and save an
+encrypted key "evm" using the above trusted key "kmk":
+
+option 1: omitting 'format'
+ $ keyctl add encrypted evm "new trusted:kmk 32" @u
+ 159771175
+
+option 2: explicitly defining 'format' as 'default'
+ $ keyctl add encrypted evm "new default trusted:kmk 32" @u
+ 159771175
+
+ $ keyctl print 159771175
+ default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+ 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+ 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
+
+ $ keyctl pipe 159771175 > evm.blob
+
+Load an encrypted key "evm" from saved blob:
+
+ $ keyctl add encrypted evm "load `cat evm.blob`" @u
+ 831684262
+
+ $ keyctl print 831684262
+ default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3
+ 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0
+ 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc
+
+Other uses for trusted and encrypted keys, such as for disk and file encryption
+are anticipated. In particular the new format 'ecryptfs' has been defined in
+in order to use encrypted keys to mount an eCryptfs filesystem. More details
+about the usage can be found in the file
+'Documentation/security/keys-ecryptfs.txt'.
diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt
new file mode 100644
index 000000000..c9e7f4f22
--- /dev/null
+++ b/Documentation/security/keys.txt
@@ -0,0 +1,1433 @@
+ ============================
+ KERNEL KEY RETENTION SERVICE
+ ============================
+
+This service allows cryptographic keys, authentication tokens, cross-domain
+user mappings, and similar to be cached in the kernel for the use of
+filesystems and other kernel services.
+
+Keyrings are permitted; these are a special type of key that can hold links to
+other keys. Processes each have three standard keyring subscriptions that a
+kernel service can search for relevant keys.
+
+The key service can be configured on by enabling:
+
+ "Security options"/"Enable access key retention support" (CONFIG_KEYS)
+
+This document has the following sections:
+
+ - Key overview
+ - Key service overview
+ - Key access permissions
+ - SELinux support
+ - New procfs files
+ - Userspace system call interface
+ - Kernel services
+ - Notes on accessing payload contents
+ - Defining a key type
+ - Request-key callback service
+ - Garbage collection
+
+
+============
+KEY OVERVIEW
+============
+
+In this context, keys represent units of cryptographic data, authentication
+tokens, keyrings, etc.. These are represented in the kernel by struct key.
+
+Each key has a number of attributes:
+
+ - A serial number.
+ - A type.
+ - A description (for matching a key in a search).
+ - Access control information.
+ - An expiry time.
+ - A payload.
+ - State.
+
+
+ (*) Each key is issued a serial number of type key_serial_t that is unique for
+ the lifetime of that key. All serial numbers are positive non-zero 32-bit
+ integers.
+
+ Userspace programs can use a key's serial numbers as a way to gain access
+ to it, subject to permission checking.
+
+ (*) Each key is of a defined "type". Types must be registered inside the
+ kernel by a kernel service (such as a filesystem) before keys of that type
+ can be added or used. Userspace programs cannot define new types directly.
+
+ Key types are represented in the kernel by struct key_type. This defines a
+ number of operations that can be performed on a key of that type.
+
+ Should a type be removed from the system, all the keys of that type will
+ be invalidated.
+
+ (*) Each key has a description. This should be a printable string. The key
+ type provides an operation to perform a match between the description on a
+ key and a criterion string.
+
+ (*) Each key has an owner user ID, a group ID and a permissions mask. These
+ are used to control what a process may do to a key from userspace, and
+ whether a kernel service will be able to find the key.
+
+ (*) Each key can be set to expire at a specific time by the key type's
+ instantiation function. Keys can also be immortal.
+
+ (*) Each key can have a payload. This is a quantity of data that represent the
+ actual "key". In the case of a keyring, this is a list of keys to which
+ the keyring links; in the case of a user-defined key, it's an arbitrary
+ blob of data.
+
+ Having a payload is not required; and the payload can, in fact, just be a
+ value stored in the struct key itself.
+
+ When a key is instantiated, the key type's instantiation function is
+ called with a blob of data, and that then creates the key's payload in
+ some way.
+
+ Similarly, when userspace wants to read back the contents of the key, if
+ permitted, another key type operation will be called to convert the key's
+ attached payload back into a blob of data.
+
+ (*) Each key can be in one of a number of basic states:
+
+ (*) Uninstantiated. The key exists, but does not have any data attached.
+ Keys being requested from userspace will be in this state.
+
+ (*) Instantiated. This is the normal state. The key is fully formed, and
+ has data attached.
+
+ (*) Negative. This is a relatively short-lived state. The key acts as a
+ note saying that a previous call out to userspace failed, and acts as
+ a throttle on key lookups. A negative key can be updated to a normal
+ state.
+
+ (*) Expired. Keys can have lifetimes set. If their lifetime is exceeded,
+ they traverse to this state. An expired key can be updated back to a
+ normal state.
+
+ (*) Revoked. A key is put in this state by userspace action. It can't be
+ found or operated upon (apart from by unlinking it).
+
+ (*) Dead. The key's type was unregistered, and so the key is now useless.
+
+Keys in the last three states are subject to garbage collection. See the
+section on "Garbage collection".
+
+
+====================
+KEY SERVICE OVERVIEW
+====================
+
+The key service provides a number of features besides keys:
+
+ (*) The key service defines three special key types:
+
+ (+) "keyring"
+
+ Keyrings are special keys that contain a list of other keys. Keyring
+ lists can be modified using various system calls. Keyrings should not
+ be given a payload when created.
+
+ (+) "user"
+
+ A key of this type has a description and a payload that are arbitrary
+ blobs of data. These can be created, updated and read by userspace,
+ and aren't intended for use by kernel services.
+
+ (+) "logon"
+
+ Like a "user" key, a "logon" key has a payload that is an arbitrary
+ blob of data. It is intended as a place to store secrets which are
+ accessible to the kernel but not to userspace programs.
+
+ The description can be arbitrary, but must be prefixed with a non-zero
+ length string that describes the key "subclass". The subclass is
+ separated from the rest of the description by a ':'. "logon" keys can
+ be created and updated from userspace, but the payload is only
+ readable from kernel space.
+
+ (*) Each process subscribes to three keyrings: a thread-specific keyring, a
+ process-specific keyring, and a session-specific keyring.
+
+ The thread-specific keyring is discarded from the child when any sort of
+ clone, fork, vfork or execve occurs. A new keyring is created only when
+ required.
+
+ The process-specific keyring is replaced with an empty one in the child on
+ clone, fork, vfork unless CLONE_THREAD is supplied, in which case it is
+ shared. execve also discards the process's process keyring and creates a
+ new one.
+
+ The session-specific keyring is persistent across clone, fork, vfork and
+ execve, even when the latter executes a set-UID or set-GID binary. A
+ process can, however, replace its current session keyring with a new one
+ by using PR_JOIN_SESSION_KEYRING. It is permitted to request an anonymous
+ new one, or to attempt to create or join one of a specific name.
+
+ The ownership of the thread keyring changes when the real UID and GID of
+ the thread changes.
+
+ (*) Each user ID resident in the system holds two special keyrings: a user
+ specific keyring and a default user session keyring. The default session
+ keyring is initialised with a link to the user-specific keyring.
+
+ When a process changes its real UID, if it used to have no session key, it
+ will be subscribed to the default session key for the new UID.
+
+ If a process attempts to access its session key when it doesn't have one,
+ it will be subscribed to the default for its current UID.
+
+ (*) Each user has two quotas against which the keys they own are tracked. One
+ limits the total number of keys and keyrings, the other limits the total
+ amount of description and payload space that can be consumed.
+
+ The user can view information on this and other statistics through procfs
+ files. The root user may also alter the quota limits through sysctl files
+ (see the section "New procfs files").
+
+ Process-specific and thread-specific keyrings are not counted towards a
+ user's quota.
+
+ If a system call that modifies a key or keyring in some way would put the
+ user over quota, the operation is refused and error EDQUOT is returned.
+
+ (*) There's a system call interface by which userspace programs can create and
+ manipulate keys and keyrings.
+
+ (*) There's a kernel interface by which services can register types and search
+ for keys.
+
+ (*) There's a way for the a search done from the kernel to call back to
+ userspace to request a key that can't be found in a process's keyrings.
+
+ (*) An optional filesystem is available through which the key database can be
+ viewed and manipulated.
+
+
+======================
+KEY ACCESS PERMISSIONS
+======================
+
+Keys have an owner user ID, a group access ID, and a permissions mask. The mask
+has up to eight bits each for possessor, user, group and other access. Only
+six of each set of eight bits are defined. These permissions granted are:
+
+ (*) View
+
+ This permits a key or keyring's attributes to be viewed - including key
+ type and description.
+
+ (*) Read
+
+ This permits a key's payload to be viewed or a keyring's list of linked
+ keys.
+
+ (*) Write
+
+ This permits a key's payload to be instantiated or updated, or it allows a
+ link to be added to or removed from a keyring.
+
+ (*) Search
+
+ This permits keyrings to be searched and keys to be found. Searches can
+ only recurse into nested keyrings that have search permission set.
+
+ (*) Link
+
+ This permits a key or keyring to be linked to. To create a link from a
+ keyring to a key, a process must have Write permission on the keyring and
+ Link permission on the key.
+
+ (*) Set Attribute
+
+ This permits a key's UID, GID and permissions mask to be changed.
+
+For changing the ownership, group ID or permissions mask, being the owner of
+the key or having the sysadmin capability is sufficient.
+
+
+===============
+SELINUX SUPPORT
+===============
+
+The security class "key" has been added to SELinux so that mandatory access
+controls can be applied to keys created within various contexts. This support
+is preliminary, and is likely to change quite significantly in the near future.
+Currently, all of the basic permissions explained above are provided in SELinux
+as well; SELinux is simply invoked after all basic permission checks have been
+performed.
+
+The value of the file /proc/self/attr/keycreate influences the labeling of
+newly-created keys. If the contents of that file correspond to an SELinux
+security context, then the key will be assigned that context. Otherwise, the
+key will be assigned the current context of the task that invoked the key
+creation request. Tasks must be granted explicit permission to assign a
+particular context to newly-created keys, using the "create" permission in the
+key security class.
+
+The default keyrings associated with users will be labeled with the default
+context of the user if and only if the login programs have been instrumented to
+properly initialize keycreate during the login process. Otherwise, they will
+be labeled with the context of the login program itself.
+
+Note, however, that the default keyrings associated with the root user are
+labeled with the default kernel context, since they are created early in the
+boot process, before root has a chance to log in.
+
+The keyrings associated with new threads are each labeled with the context of
+their associated thread, and both session and process keyrings are handled
+similarly.
+
+
+================
+NEW PROCFS FILES
+================
+
+Two files have been added to procfs by which an administrator can find out
+about the status of the key service:
+
+ (*) /proc/keys
+
+ This lists the keys that are currently viewable by the task reading the
+ file, giving information about their type, description and permissions.
+ It is not possible to view the payload of the key this way, though some
+ information about it may be given.
+
+ The only keys included in the list are those that grant View permission to
+ the reading process whether or not it possesses them. Note that LSM
+ security checks are still performed, and may further filter out keys that
+ the current process is not authorised to view.
+
+ The contents of the file look like this:
+
+ SERIAL FLAGS USAGE EXPY PERM UID GID TYPE DESCRIPTION: SUMMARY
+ 00000001 I----- 39 perm 1f3f0000 0 0 keyring _uid_ses.0: 1/4
+ 00000002 I----- 2 perm 1f3f0000 0 0 keyring _uid.0: empty
+ 00000007 I----- 1 perm 1f3f0000 0 0 keyring _pid.1: empty
+ 0000018d I----- 1 perm 1f3f0000 0 0 keyring _pid.412: empty
+ 000004d2 I--Q-- 1 perm 1f3f0000 32 -1 keyring _uid.32: 1/4
+ 000004d3 I--Q-- 3 perm 1f3f0000 32 -1 keyring _uid_ses.32: empty
+ 00000892 I--QU- 1 perm 1f000000 0 0 user metal:copper: 0
+ 00000893 I--Q-N 1 35s 1f3f0000 0 0 user metal:silver: 0
+ 00000894 I--Q-- 1 10h 003f0000 0 0 user metal:gold: 0
+
+ The flags are:
+
+ I Instantiated
+ R Revoked
+ D Dead
+ Q Contributes to user's quota
+ U Under construction by callback to userspace
+ N Negative key
+
+
+ (*) /proc/key-users
+
+ This file lists the tracking data for each user that has at least one key
+ on the system. Such data includes quota information and statistics:
+
+ [root@andromeda root]# cat /proc/key-users
+ 0: 46 45/45 1/100 13/10000
+ 29: 2 2/2 2/100 40/10000
+ 32: 2 2/2 2/100 40/10000
+ 38: 2 2/2 2/100 40/10000
+
+ The format of each line is
+ <UID>: User ID to which this applies
+ <usage> Structure refcount
+ <inst>/<keys> Total number of keys and number instantiated
+ <keys>/<max> Key count quota
+ <bytes>/<max> Key size quota
+
+
+Four new sysctl files have been added also for the purpose of controlling the
+quota limits on keys:
+
+ (*) /proc/sys/kernel/keys/root_maxkeys
+ /proc/sys/kernel/keys/root_maxbytes
+
+ These files hold the maximum number of keys that root may have and the
+ maximum total number of bytes of data that root may have stored in those
+ keys.
+
+ (*) /proc/sys/kernel/keys/maxkeys
+ /proc/sys/kernel/keys/maxbytes
+
+ These files hold the maximum number of keys that each non-root user may
+ have and the maximum total number of bytes of data that each of those
+ users may have stored in their keys.
+
+Root may alter these by writing each new limit as a decimal number string to
+the appropriate file.
+
+
+===============================
+USERSPACE SYSTEM CALL INTERFACE
+===============================
+
+Userspace can manipulate keys directly through three new syscalls: add_key,
+request_key and keyctl. The latter provides a number of functions for
+manipulating keys.
+
+When referring to a key directly, userspace programs should use the key's
+serial number (a positive 32-bit integer). However, there are some special
+values available for referring to special keys and keyrings that relate to the
+process making the call:
+
+ CONSTANT VALUE KEY REFERENCED
+ ============================== ====== ===========================
+ KEY_SPEC_THREAD_KEYRING -1 thread-specific keyring
+ KEY_SPEC_PROCESS_KEYRING -2 process-specific keyring
+ KEY_SPEC_SESSION_KEYRING -3 session-specific keyring
+ KEY_SPEC_USER_KEYRING -4 UID-specific keyring
+ KEY_SPEC_USER_SESSION_KEYRING -5 UID-session keyring
+ KEY_SPEC_GROUP_KEYRING -6 GID-specific keyring
+ KEY_SPEC_REQKEY_AUTH_KEY -7 assumed request_key()
+ authorisation key
+
+
+The main syscalls are:
+
+ (*) Create a new key of given type, description and payload and add it to the
+ nominated keyring:
+
+ key_serial_t add_key(const char *type, const char *desc,
+ const void *payload, size_t plen,
+ key_serial_t keyring);
+
+ If a key of the same type and description as that proposed already exists
+ in the keyring, this will try to update it with the given payload, or it
+ will return error EEXIST if that function is not supported by the key
+ type. The process must also have permission to write to the key to be able
+ to update it. The new key will have all user permissions granted and no
+ group or third party permissions.
+
+ Otherwise, this will attempt to create a new key of the specified type and
+ description, and to instantiate it with the supplied payload and attach it
+ to the keyring. In this case, an error will be generated if the process
+ does not have permission to write to the keyring.
+
+ If the key type supports it, if the description is NULL or an empty
+ string, the key type will try and generate a description from the content
+ of the payload.
+
+ The payload is optional, and the pointer can be NULL if not required by
+ the type. The payload is plen in size, and plen can be zero for an empty
+ payload.
+
+ A new keyring can be generated by setting type "keyring", the keyring name
+ as the description (or NULL) and setting the payload to NULL.
+
+ User defined keys can be created by specifying type "user". It is
+ recommended that a user defined key's description by prefixed with a type
+ ID and a colon, such as "krb5tgt:" for a Kerberos 5 ticket granting
+ ticket.
+
+ Any other type must have been registered with the kernel in advance by a
+ kernel service such as a filesystem.
+
+ The ID of the new or updated key is returned if successful.
+
+
+ (*) Search the process's keyrings for a key, potentially calling out to
+ userspace to create it.
+
+ key_serial_t request_key(const char *type, const char *description,
+ const char *callout_info,
+ key_serial_t dest_keyring);
+
+ This function searches all the process's keyrings in the order thread,
+ process, session for a matching key. This works very much like
+ KEYCTL_SEARCH, including the optional attachment of the discovered key to
+ a keyring.
+
+ If a key cannot be found, and if callout_info is not NULL, then
+ /sbin/request-key will be invoked in an attempt to obtain a key. The
+ callout_info string will be passed as an argument to the program.
+
+ See also Documentation/security/keys-request-key.txt.
+
+
+The keyctl syscall functions are:
+
+ (*) Map a special key ID to a real key ID for this process:
+
+ key_serial_t keyctl(KEYCTL_GET_KEYRING_ID, key_serial_t id,
+ int create);
+
+ The special key specified by "id" is looked up (with the key being created
+ if necessary) and the ID of the key or keyring thus found is returned if
+ it exists.
+
+ If the key does not yet exist, the key will be created if "create" is
+ non-zero; and the error ENOKEY will be returned if "create" is zero.
+
+
+ (*) Replace the session keyring this process subscribes to with a new one:
+
+ key_serial_t keyctl(KEYCTL_JOIN_SESSION_KEYRING, const char *name);
+
+ If name is NULL, an anonymous keyring is created attached to the process
+ as its session keyring, displacing the old session keyring.
+
+ If name is not NULL, if a keyring of that name exists, the process
+ attempts to attach it as the session keyring, returning an error if that
+ is not permitted; otherwise a new keyring of that name is created and
+ attached as the session keyring.
+
+ To attach to a named keyring, the keyring must have search permission for
+ the process's ownership.
+
+ The ID of the new session keyring is returned if successful.
+
+
+ (*) Update the specified key:
+
+ long keyctl(KEYCTL_UPDATE, key_serial_t key, const void *payload,
+ size_t plen);
+
+ This will try to update the specified key with the given payload, or it
+ will return error EOPNOTSUPP if that function is not supported by the key
+ type. The process must also have permission to write to the key to be able
+ to update it.
+
+ The payload is of length plen, and may be absent or empty as for
+ add_key().
+
+
+ (*) Revoke a key:
+
+ long keyctl(KEYCTL_REVOKE, key_serial_t key);
+
+ This makes a key unavailable for further operations. Further attempts to
+ use the key will be met with error EKEYREVOKED, and the key will no longer
+ be findable.
+
+
+ (*) Change the ownership of a key:
+
+ long keyctl(KEYCTL_CHOWN, key_serial_t key, uid_t uid, gid_t gid);
+
+ This function permits a key's owner and group ID to be changed. Either one
+ of uid or gid can be set to -1 to suppress that change.
+
+ Only the superuser can change a key's owner to something other than the
+ key's current owner. Similarly, only the superuser can change a key's
+ group ID to something other than the calling process's group ID or one of
+ its group list members.
+
+
+ (*) Change the permissions mask on a key:
+
+ long keyctl(KEYCTL_SETPERM, key_serial_t key, key_perm_t perm);
+
+ This function permits the owner of a key or the superuser to change the
+ permissions mask on a key.
+
+ Only bits the available bits are permitted; if any other bits are set,
+ error EINVAL will be returned.
+
+
+ (*) Describe a key:
+
+ long keyctl(KEYCTL_DESCRIBE, key_serial_t key, char *buffer,
+ size_t buflen);
+
+ This function returns a summary of the key's attributes (but not its
+ payload data) as a string in the buffer provided.
+
+ Unless there's an error, it always returns the amount of data it could
+ produce, even if that's too big for the buffer, but it won't copy more
+ than requested to userspace. If the buffer pointer is NULL then no copy
+ will take place.
+
+ A process must have view permission on the key for this function to be
+ successful.
+
+ If successful, a string is placed in the buffer in the following format:
+
+ <type>;<uid>;<gid>;<perm>;<description>
+
+ Where type and description are strings, uid and gid are decimal, and perm
+ is hexadecimal. A NUL character is included at the end of the string if
+ the buffer is sufficiently big.
+
+ This can be parsed with
+
+ sscanf(buffer, "%[^;];%d;%d;%o;%s", type, &uid, &gid, &mode, desc);
+
+
+ (*) Clear out a keyring:
+
+ long keyctl(KEYCTL_CLEAR, key_serial_t keyring);
+
+ This function clears the list of keys attached to a keyring. The calling
+ process must have write permission on the keyring, and it must be a
+ keyring (or else error ENOTDIR will result).
+
+ This function can also be used to clear special kernel keyrings if they
+ are appropriately marked if the user has CAP_SYS_ADMIN capability. The
+ DNS resolver cache keyring is an example of this.
+
+
+ (*) Link a key into a keyring:
+
+ long keyctl(KEYCTL_LINK, key_serial_t keyring, key_serial_t key);
+
+ This function creates a link from the keyring to the key. The process must
+ have write permission on the keyring and must have link permission on the
+ key.
+
+ Should the keyring not be a keyring, error ENOTDIR will result; and if the
+ keyring is full, error ENFILE will result.
+
+ The link procedure checks the nesting of the keyrings, returning ELOOP if
+ it appears too deep or EDEADLK if the link would introduce a cycle.
+
+ Any links within the keyring to keys that match the new key in terms of
+ type and description will be discarded from the keyring as the new one is
+ added.
+
+
+ (*) Unlink a key or keyring from another keyring:
+
+ long keyctl(KEYCTL_UNLINK, key_serial_t keyring, key_serial_t key);
+
+ This function looks through the keyring for the first link to the
+ specified key, and removes it if found. Subsequent links to that key are
+ ignored. The process must have write permission on the keyring.
+
+ If the keyring is not a keyring, error ENOTDIR will result; and if the key
+ is not present, error ENOENT will be the result.
+
+
+ (*) Search a keyring tree for a key:
+
+ key_serial_t keyctl(KEYCTL_SEARCH, key_serial_t keyring,
+ const char *type, const char *description,
+ key_serial_t dest_keyring);
+
+ This searches the keyring tree headed by the specified keyring until a key
+ is found that matches the type and description criteria. Each keyring is
+ checked for keys before recursion into its children occurs.
+
+ The process must have search permission on the top level keyring, or else
+ error EACCES will result. Only keyrings that the process has search
+ permission on will be recursed into, and only keys and keyrings for which
+ a process has search permission can be matched. If the specified keyring
+ is not a keyring, ENOTDIR will result.
+
+ If the search succeeds, the function will attempt to link the found key
+ into the destination keyring if one is supplied (non-zero ID). All the
+ constraints applicable to KEYCTL_LINK apply in this case too.
+
+ Error ENOKEY, EKEYREVOKED or EKEYEXPIRED will be returned if the search
+ fails. On success, the resulting key ID will be returned.
+
+
+ (*) Read the payload data from a key:
+
+ long keyctl(KEYCTL_READ, key_serial_t keyring, char *buffer,
+ size_t buflen);
+
+ This function attempts to read the payload data from the specified key
+ into the buffer. The process must have read permission on the key to
+ succeed.
+
+ The returned data will be processed for presentation by the key type. For
+ instance, a keyring will return an array of key_serial_t entries
+ representing the IDs of all the keys to which it is subscribed. The user
+ defined key type will return its data as is. If a key type does not
+ implement this function, error EOPNOTSUPP will result.
+
+ As much of the data as can be fitted into the buffer will be copied to
+ userspace if the buffer pointer is not NULL.
+
+ On a successful return, the function will always return the amount of data
+ available rather than the amount copied.
+
+
+ (*) Instantiate a partially constructed key.
+
+ long keyctl(KEYCTL_INSTANTIATE, key_serial_t key,
+ const void *payload, size_t plen,
+ key_serial_t keyring);
+ long keyctl(KEYCTL_INSTANTIATE_IOV, key_serial_t key,
+ const struct iovec *payload_iov, unsigned ioc,
+ key_serial_t keyring);
+
+ If the kernel calls back to userspace to complete the instantiation of a
+ key, userspace should use this call to supply data for the key before the
+ invoked process returns, or else the key will be marked negative
+ automatically.
+
+ The process must have write access on the key to be able to instantiate
+ it, and the key must be uninstantiated.
+
+ If a keyring is specified (non-zero), the key will also be linked into
+ that keyring, however all the constraints applying in KEYCTL_LINK apply in
+ this case too.
+
+ The payload and plen arguments describe the payload data as for add_key().
+
+ The payload_iov and ioc arguments describe the payload data in an iovec
+ array instead of a single buffer.
+
+
+ (*) Negatively instantiate a partially constructed key.
+
+ long keyctl(KEYCTL_NEGATE, key_serial_t key,
+ unsigned timeout, key_serial_t keyring);
+ long keyctl(KEYCTL_REJECT, key_serial_t key,
+ unsigned timeout, unsigned error, key_serial_t keyring);
+
+ If the kernel calls back to userspace to complete the instantiation of a
+ key, userspace should use this call mark the key as negative before the
+ invoked process returns if it is unable to fulfill the request.
+
+ The process must have write access on the key to be able to instantiate
+ it, and the key must be uninstantiated.
+
+ If a keyring is specified (non-zero), the key will also be linked into
+ that keyring, however all the constraints applying in KEYCTL_LINK apply in
+ this case too.
+
+ If the key is rejected, future searches for it will return the specified
+ error code until the rejected key expires. Negating the key is the same
+ as rejecting the key with ENOKEY as the error code.
+
+
+ (*) Set the default request-key destination keyring.
+
+ long keyctl(KEYCTL_SET_REQKEY_KEYRING, int reqkey_defl);
+
+ This sets the default keyring to which implicitly requested keys will be
+ attached for this thread. reqkey_defl should be one of these constants:
+
+ CONSTANT VALUE NEW DEFAULT KEYRING
+ ====================================== ====== =======================
+ KEY_REQKEY_DEFL_NO_CHANGE -1 No change
+ KEY_REQKEY_DEFL_DEFAULT 0 Default[1]
+ KEY_REQKEY_DEFL_THREAD_KEYRING 1 Thread keyring
+ KEY_REQKEY_DEFL_PROCESS_KEYRING 2 Process keyring
+ KEY_REQKEY_DEFL_SESSION_KEYRING 3 Session keyring
+ KEY_REQKEY_DEFL_USER_KEYRING 4 User keyring
+ KEY_REQKEY_DEFL_USER_SESSION_KEYRING 5 User session keyring
+ KEY_REQKEY_DEFL_GROUP_KEYRING 6 Group keyring
+
+ The old default will be returned if successful and error EINVAL will be
+ returned if reqkey_defl is not one of the above values.
+
+ The default keyring can be overridden by the keyring indicated to the
+ request_key() system call.
+
+ Note that this setting is inherited across fork/exec.
+
+ [1] The default is: the thread keyring if there is one, otherwise
+ the process keyring if there is one, otherwise the session keyring if
+ there is one, otherwise the user default session keyring.
+
+
+ (*) Set the timeout on a key.
+
+ long keyctl(KEYCTL_SET_TIMEOUT, key_serial_t key, unsigned timeout);
+
+ This sets or clears the timeout on a key. The timeout can be 0 to clear
+ the timeout or a number of seconds to set the expiry time that far into
+ the future.
+
+ The process must have attribute modification access on a key to set its
+ timeout. Timeouts may not be set with this function on negative, revoked
+ or expired keys.
+
+
+ (*) Assume the authority granted to instantiate a key
+
+ long keyctl(KEYCTL_ASSUME_AUTHORITY, key_serial_t key);
+
+ This assumes or divests the authority required to instantiate the
+ specified key. Authority can only be assumed if the thread has the
+ authorisation key associated with the specified key in its keyrings
+ somewhere.
+
+ Once authority is assumed, searches for keys will also search the
+ requester's keyrings using the requester's security label, UID, GID and
+ groups.
+
+ If the requested authority is unavailable, error EPERM will be returned,
+ likewise if the authority has been revoked because the target key is
+ already instantiated.
+
+ If the specified key is 0, then any assumed authority will be divested.
+
+ The assumed authoritative key is inherited across fork and exec.
+
+
+ (*) Get the LSM security context attached to a key.
+
+ long keyctl(KEYCTL_GET_SECURITY, key_serial_t key, char *buffer,
+ size_t buflen)
+
+ This function returns a string that represents the LSM security context
+ attached to a key in the buffer provided.
+
+ Unless there's an error, it always returns the amount of data it could
+ produce, even if that's too big for the buffer, but it won't copy more
+ than requested to userspace. If the buffer pointer is NULL then no copy
+ will take place.
+
+ A NUL character is included at the end of the string if the buffer is
+ sufficiently big. This is included in the returned count. If no LSM is
+ in force then an empty string will be returned.
+
+ A process must have view permission on the key for this function to be
+ successful.
+
+
+ (*) Install the calling process's session keyring on its parent.
+
+ long keyctl(KEYCTL_SESSION_TO_PARENT);
+
+ This functions attempts to install the calling process's session keyring
+ on to the calling process's parent, replacing the parent's current session
+ keyring.
+
+ The calling process must have the same ownership as its parent, the
+ keyring must have the same ownership as the calling process, the calling
+ process must have LINK permission on the keyring and the active LSM module
+ mustn't deny permission, otherwise error EPERM will be returned.
+
+ Error ENOMEM will be returned if there was insufficient memory to complete
+ the operation, otherwise 0 will be returned to indicate success.
+
+ The keyring will be replaced next time the parent process leaves the
+ kernel and resumes executing userspace.
+
+
+ (*) Invalidate a key.
+
+ long keyctl(KEYCTL_INVALIDATE, key_serial_t key);
+
+ This function marks a key as being invalidated and then wakes up the
+ garbage collector. The garbage collector immediately removes invalidated
+ keys from all keyrings and deletes the key when its reference count
+ reaches zero.
+
+ Keys that are marked invalidated become invisible to normal key operations
+ immediately, though they are still visible in /proc/keys until deleted
+ (they're marked with an 'i' flag).
+
+ A process must have search permission on the key for this function to be
+ successful.
+
+
+===============
+KERNEL SERVICES
+===============
+
+The kernel services for key management are fairly simple to deal with. They can
+be broken down into two areas: keys and key types.
+
+Dealing with keys is fairly straightforward. Firstly, the kernel service
+registers its type, then it searches for a key of that type. It should retain
+the key as long as it has need of it, and then it should release it. For a
+filesystem or device file, a search would probably be performed during the open
+call, and the key released upon close. How to deal with conflicting keys due to
+two different users opening the same file is left to the filesystem author to
+solve.
+
+To access the key manager, the following header must be #included:
+
+ <linux/key.h>
+
+Specific key types should have a header file under include/keys/ that should be
+used to access that type. For keys of type "user", for example, that would be:
+
+ <keys/user-type.h>
+
+Note that there are two different types of pointers to keys that may be
+encountered:
+
+ (*) struct key *
+
+ This simply points to the key structure itself. Key structures will be at
+ least four-byte aligned.
+
+ (*) key_ref_t
+
+ This is equivalent to a struct key *, but the least significant bit is set
+ if the caller "possesses" the key. By "possession" it is meant that the
+ calling processes has a searchable link to the key from one of its
+ keyrings. There are three functions for dealing with these:
+
+ key_ref_t make_key_ref(const struct key *key, bool possession);
+
+ struct key *key_ref_to_ptr(const key_ref_t key_ref);
+
+ bool is_key_possessed(const key_ref_t key_ref);
+
+ The first function constructs a key reference from a key pointer and
+ possession information (which must be true or false).
+
+ The second function retrieves the key pointer from a reference and the
+ third retrieves the possession flag.
+
+When accessing a key's payload contents, certain precautions must be taken to
+prevent access vs modification races. See the section "Notes on accessing
+payload contents" for more information.
+
+(*) To search for a key, call:
+
+ struct key *request_key(const struct key_type *type,
+ const char *description,
+ const char *callout_info);
+
+ This is used to request a key or keyring with a description that matches
+ the description specified according to the key type's match_preparse()
+ method. This permits approximate matching to occur. If callout_string is
+ not NULL, then /sbin/request-key will be invoked in an attempt to obtain
+ the key from userspace. In that case, callout_string will be passed as an
+ argument to the program.
+
+ Should the function fail error ENOKEY, EKEYEXPIRED or EKEYREVOKED will be
+ returned.
+
+ If successful, the key will have been attached to the default keyring for
+ implicitly obtained request-key keys, as set by KEYCTL_SET_REQKEY_KEYRING.
+
+ See also Documentation/security/keys-request-key.txt.
+
+
+(*) To search for a key, passing auxiliary data to the upcaller, call:
+
+ struct key *request_key_with_auxdata(const struct key_type *type,
+ const char *description,
+ const void *callout_info,
+ size_t callout_len,
+ void *aux);
+
+ This is identical to request_key(), except that the auxiliary data is
+ passed to the key_type->request_key() op if it exists, and the callout_info
+ is a blob of length callout_len, if given (the length may be 0).
+
+
+(*) A key can be requested asynchronously by calling one of:
+
+ struct key *request_key_async(const struct key_type *type,
+ const char *description,
+ const void *callout_info,
+ size_t callout_len);
+
+ or:
+
+ struct key *request_key_async_with_auxdata(const struct key_type *type,
+ const char *description,
+ const char *callout_info,
+ size_t callout_len,
+ void *aux);
+
+ which are asynchronous equivalents of request_key() and
+ request_key_with_auxdata() respectively.
+
+ These two functions return with the key potentially still under
+ construction. To wait for construction completion, the following should be
+ called:
+
+ int wait_for_key_construction(struct key *key, bool intr);
+
+ The function will wait for the key to finish being constructed and then
+ invokes key_validate() to return an appropriate value to indicate the state
+ of the key (0 indicates the key is usable).
+
+ If intr is true, then the wait can be interrupted by a signal, in which
+ case error ERESTARTSYS will be returned.
+
+
+(*) When it is no longer required, the key should be released using:
+
+ void key_put(struct key *key);
+
+ Or:
+
+ void key_ref_put(key_ref_t key_ref);
+
+ These can be called from interrupt context. If CONFIG_KEYS is not set then
+ the argument will not be parsed.
+
+
+(*) Extra references can be made to a key by calling one of the following
+ functions:
+
+ struct key *__key_get(struct key *key);
+ struct key *key_get(struct key *key);
+
+ Keys so references will need to be disposed of by calling key_put() when
+ they've been finished with. The key pointer passed in will be returned.
+
+ In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set
+ then the key will not be dereferenced and no increment will take place.
+
+
+(*) A key's serial number can be obtained by calling:
+
+ key_serial_t key_serial(struct key *key);
+
+ If key is NULL or if CONFIG_KEYS is not set then 0 will be returned (in the
+ latter case without parsing the argument).
+
+
+(*) If a keyring was found in the search, this can be further searched by:
+
+ key_ref_t keyring_search(key_ref_t keyring_ref,
+ const struct key_type *type,
+ const char *description)
+
+ This searches the keyring tree specified for a matching key. Error ENOKEY
+ is returned upon failure (use IS_ERR/PTR_ERR to determine). If successful,
+ the returned key will need to be released.
+
+ The possession attribute from the keyring reference is used to control
+ access through the permissions mask and is propagated to the returned key
+ reference pointer if successful.
+
+
+(*) A keyring can be created by:
+
+ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid,
+ const struct cred *cred,
+ key_perm_t perm,
+ unsigned long flags,
+ struct key *dest);
+
+ This creates a keyring with the given attributes and returns it. If dest
+ is not NULL, the new keyring will be linked into the keyring to which it
+ points. No permission checks are made upon the destination keyring.
+
+ Error EDQUOT can be returned if the keyring would overload the quota (pass
+ KEY_ALLOC_NOT_IN_QUOTA in flags if the keyring shouldn't be accounted
+ towards the user's quota). Error ENOMEM can also be returned.
+
+
+(*) To check the validity of a key, this function can be called:
+
+ int validate_key(struct key *key);
+
+ This checks that the key in question hasn't expired or and hasn't been
+ revoked. Should the key be invalid, error EKEYEXPIRED or EKEYREVOKED will
+ be returned. If the key is NULL or if CONFIG_KEYS is not set then 0 will be
+ returned (in the latter case without parsing the argument).
+
+
+(*) To register a key type, the following function should be called:
+
+ int register_key_type(struct key_type *type);
+
+ This will return error EEXIST if a type of the same name is already
+ present.
+
+
+(*) To unregister a key type, call:
+
+ void unregister_key_type(struct key_type *type);
+
+
+Under some circumstances, it may be desirable to deal with a bundle of keys.
+The facility provides access to the keyring type for managing such a bundle:
+
+ struct key_type key_type_keyring;
+
+This can be used with a function such as request_key() to find a specific
+keyring in a process's keyrings. A keyring thus found can then be searched
+with keyring_search(). Note that it is not possible to use request_key() to
+search a specific keyring, so using keyrings in this way is of limited utility.
+
+
+===================================
+NOTES ON ACCESSING PAYLOAD CONTENTS
+===================================
+
+The simplest payload is just a number in key->payload.value. In this case,
+there's no need to indulge in RCU or locking when accessing the payload.
+
+More complex payload contents must be allocated and a pointer to them set in
+key->payload.data. One of the following ways must be selected to access the
+data:
+
+ (1) Unmodifiable key type.
+
+ If the key type does not have a modify method, then the key's payload can
+ be accessed without any form of locking, provided that it's known to be
+ instantiated (uninstantiated keys cannot be "found").
+
+ (2) The key's semaphore.
+
+ The semaphore could be used to govern access to the payload and to control
+ the payload pointer. It must be write-locked for modifications and would
+ have to be read-locked for general access. The disadvantage of doing this
+ is that the accessor may be required to sleep.
+
+ (3) RCU.
+
+ RCU must be used when the semaphore isn't already held; if the semaphore
+ is held then the contents can't change under you unexpectedly as the
+ semaphore must still be used to serialise modifications to the key. The
+ key management code takes care of this for the key type.
+
+ However, this means using:
+
+ rcu_read_lock() ... rcu_dereference() ... rcu_read_unlock()
+
+ to read the pointer, and:
+
+ rcu_dereference() ... rcu_assign_pointer() ... call_rcu()
+
+ to set the pointer and dispose of the old contents after a grace period.
+ Note that only the key type should ever modify a key's payload.
+
+ Furthermore, an RCU controlled payload must hold a struct rcu_head for the
+ use of call_rcu() and, if the payload is of variable size, the length of
+ the payload. key->datalen cannot be relied upon to be consistent with the
+ payload just dereferenced if the key's semaphore is not held.
+
+
+===================
+DEFINING A KEY TYPE
+===================
+
+A kernel service may want to define its own key type. For instance, an AFS
+filesystem might want to define a Kerberos 5 ticket key type. To do this, it
+author fills in a key_type struct and registers it with the system.
+
+Source files that implement key types should include the following header file:
+
+ <linux/key-type.h>
+
+The structure has a number of fields, some of which are mandatory:
+
+ (*) const char *name
+
+ The name of the key type. This is used to translate a key type name
+ supplied by userspace into a pointer to the structure.
+
+
+ (*) size_t def_datalen
+
+ This is optional - it supplies the default payload data length as
+ contributed to the quota. If the key type's payload is always or almost
+ always the same size, then this is a more efficient way to do things.
+
+ The data length (and quota) on a particular key can always be changed
+ during instantiation or update by calling:
+
+ int key_payload_reserve(struct key *key, size_t datalen);
+
+ With the revised data length. Error EDQUOT will be returned if this is not
+ viable.
+
+
+ (*) int (*vet_description)(const char *description);
+
+ This optional method is called to vet a key description. If the key type
+ doesn't approve of the key description, it may return an error, otherwise
+ it should return 0.
+
+
+ (*) int (*preparse)(struct key_preparsed_payload *prep);
+
+ This optional method permits the key type to attempt to parse payload
+ before a key is created (add key) or the key semaphore is taken (update or
+ instantiate key). The structure pointed to by prep looks like:
+
+ struct key_preparsed_payload {
+ char *description;
+ void *type_data[2];
+ void *payload;
+ const void *data;
+ size_t datalen;
+ size_t quotalen;
+ time_t expiry;
+ };
+
+ Before calling the method, the caller will fill in data and datalen with
+ the payload blob parameters; quotalen will be filled in with the default
+ quota size from the key type; expiry will be set to TIME_T_MAX and the
+ rest will be cleared.
+
+ If a description can be proposed from the payload contents, that should be
+ attached as a string to the description field. This will be used for the
+ key description if the caller of add_key() passes NULL or "".
+
+ The method can attach anything it likes to type_data[] and payload. These
+ are merely passed along to the instantiate() or update() operations. If
+ set, the expiry time will be applied to the key if it is instantiated from
+ this data.
+
+ The method should return 0 if successful or a negative error code
+ otherwise.
+
+
+ (*) void (*free_preparse)(struct key_preparsed_payload *prep);
+
+ This method is only required if the preparse() method is provided,
+ otherwise it is unused. It cleans up anything attached to the
+ description, type_data and payload fields of the key_preparsed_payload
+ struct as filled in by the preparse() method. It will always be called
+ after preparse() returns successfully, even if instantiate() or update()
+ succeed.
+
+
+ (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep);
+
+ This method is called to attach a payload to a key during construction.
+ The payload attached need not bear any relation to the data passed to this
+ function.
+
+ The prep->data and prep->datalen fields will define the original payload
+ blob. If preparse() was supplied then other fields may be filled in also.
+
+ If the amount of data attached to the key differs from the size in
+ keytype->def_datalen, then key_payload_reserve() should be called.
+
+ This method does not have to lock the key in order to attach a payload.
+ The fact that KEY_FLAG_INSTANTIATED is not set in key->flags prevents
+ anything else from gaining access to the key.
+
+ It is safe to sleep in this method.
+
+
+ (*) int (*update)(struct key *key, const void *data, size_t datalen);
+
+ If this type of key can be updated, then this method should be provided.
+ It is called to update a key's payload from the blob of data provided.
+
+ The prep->data and prep->datalen fields will define the original payload
+ blob. If preparse() was supplied then other fields may be filled in also.
+
+ key_payload_reserve() should be called if the data length might change
+ before any changes are actually made. Note that if this succeeds, the type
+ is committed to changing the key because it's already been altered, so all
+ memory allocation must be done first.
+
+ The key will have its semaphore write-locked before this method is called,
+ but this only deters other writers; any changes to the key's payload must
+ be made under RCU conditions, and call_rcu() must be used to dispose of
+ the old payload.
+
+ key_payload_reserve() should be called before the changes are made, but
+ after all allocations and other potentially failing function calls are
+ made.
+
+ It is safe to sleep in this method.
+
+
+ (*) int (*match_preparse)(struct key_match_data *match_data);
+
+ This method is optional. It is called when a key search is about to be
+ performed. It is given the following structure:
+
+ struct key_match_data {
+ bool (*cmp)(const struct key *key,
+ const struct key_match_data *match_data);
+ const void *raw_data;
+ void *preparsed;
+ unsigned lookup_type;
+ };
+
+ On entry, raw_data will be pointing to the criteria to be used in matching
+ a key by the caller and should not be modified. (*cmp)() will be pointing
+ to the default matcher function (which does an exact description match
+ against raw_data) and lookup_type will be set to indicate a direct lookup.
+
+ The following lookup_type values are available:
+
+ [*] KEYRING_SEARCH_LOOKUP_DIRECT - A direct lookup hashes the type and
+ description to narrow down the search to a small number of keys.
+
+ [*] KEYRING_SEARCH_LOOKUP_ITERATE - An iterative lookup walks all the
+ keys in the keyring until one is matched. This must be used for any
+ search that's not doing a simple direct match on the key description.
+
+ The method may set cmp to point to a function of its choice that does some
+ other form of match, may set lookup_type to KEYRING_SEARCH_LOOKUP_ITERATE
+ and may attach something to the preparsed pointer for use by (*cmp)().
+ (*cmp)() should return true if a key matches and false otherwise.
+
+ If preparsed is set, it may be necessary to use the match_free() method to
+ clean it up.
+
+ The method should return 0 if successful or a negative error code
+ otherwise.
+
+ It is permitted to sleep in this method, but (*cmp)() may not sleep as
+ locks will be held over it.
+
+ If match_preparse() is not provided, keys of this type will be matched
+ exactly by their description.
+
+
+ (*) void (*match_free)(struct key_match_data *match_data);
+
+ This method is optional. If given, it called to clean up
+ match_data->preparsed after a successful call to match_preparse().
+
+
+ (*) void (*revoke)(struct key *key);
+
+ This method is optional. It is called to discard part of the payload
+ data upon a key being revoked. The caller will have the key semaphore
+ write-locked.
+
+ It is safe to sleep in this method, though care should be taken to avoid
+ a deadlock against the key semaphore.
+
+
+ (*) void (*destroy)(struct key *key);
+
+ This method is optional. It is called to discard the payload data on a key
+ when it is being destroyed.
+
+ This method does not need to lock the key to access the payload; it can
+ consider the key as being inaccessible at this time. Note that the key's
+ type may have been changed before this function is called.
+
+ It is not safe to sleep in this method; the caller may hold spinlocks.
+
+
+ (*) void (*describe)(const struct key *key, struct seq_file *p);
+
+ This method is optional. It is called during /proc/keys reading to
+ summarise a key's description and payload in text form.
+
+ This method will be called with the RCU read lock held. rcu_dereference()
+ should be used to read the payload pointer if the payload is to be
+ accessed. key->datalen cannot be trusted to stay consistent with the
+ contents of the payload.
+
+ The description will not change, though the key's state may.
+
+ It is not safe to sleep in this method; the RCU read lock is held by the
+ caller.
+
+
+ (*) long (*read)(const struct key *key, char __user *buffer, size_t buflen);
+
+ This method is optional. It is called by KEYCTL_READ to translate the
+ key's payload into something a blob of data for userspace to deal with.
+ Ideally, the blob should be in the same format as that passed in to the
+ instantiate and update methods.
+
+ If successful, the blob size that could be produced should be returned
+ rather than the size copied.
+
+ This method will be called with the key's semaphore read-locked. This will
+ prevent the key's payload changing. It is not necessary to use RCU locking
+ when accessing the key's payload. It is safe to sleep in this method, such
+ as might happen when the userspace buffer is accessed.
+
+
+ (*) int (*request_key)(struct key_construction *cons, const char *op,
+ void *aux);
+
+ This method is optional. If provided, request_key() and friends will
+ invoke this function rather than upcalling to /sbin/request-key to operate
+ upon a key of this type.
+
+ The aux parameter is as passed to request_key_async_with_auxdata() and
+ similar or is NULL otherwise. Also passed are the construction record for
+ the key to be operated upon and the operation type (currently only
+ "create").
+
+ This method is permitted to return before the upcall is complete, but the
+ following function must be called under all circumstances to complete the
+ instantiation process, whether or not it succeeds, whether or not there's
+ an error:
+
+ void complete_request_key(struct key_construction *cons, int error);
+
+ The error parameter should be 0 on success, -ve on error. The
+ construction record is destroyed by this action and the authorisation key
+ will be revoked. If an error is indicated, the key under construction
+ will be negatively instantiated if it wasn't already instantiated.
+
+ If this method returns an error, that error will be returned to the
+ caller of request_key*(). complete_request_key() must be called prior to
+ returning.
+
+ The key under construction and the authorisation key can be found in the
+ key_construction struct pointed to by cons:
+
+ (*) struct key *key;
+
+ The key under construction.
+
+ (*) struct key *authkey;
+
+ The authorisation key.
+
+
+============================
+REQUEST-KEY CALLBACK SERVICE
+============================
+
+To create a new key, the kernel will attempt to execute the following command
+line:
+
+ /sbin/request-key create <key> <uid> <gid> \
+ <threadring> <processring> <sessionring> <callout_info>
+
+<key> is the key being constructed, and the three keyrings are the process
+keyrings from the process that caused the search to be issued. These are
+included for two reasons:
+
+ (1) There may be an authentication token in one of the keyrings that is
+ required to obtain the key, eg: a Kerberos Ticket-Granting Ticket.
+
+ (2) The new key should probably be cached in one of these rings.
+
+This program should set it UID and GID to those specified before attempting to
+access any more keys. It may then look around for a user specific process to
+hand the request off to (perhaps a path held in placed in another key by, for
+example, the KDE desktop manager).
+
+The program (or whatever it calls) should finish construction of the key by
+calling KEYCTL_INSTANTIATE or KEYCTL_INSTANTIATE_IOV, which also permits it to
+cache the key in one of the keyrings (probably the session ring) before
+returning. Alternatively, the key can be marked as negative with KEYCTL_NEGATE
+or KEYCTL_REJECT; this also permits the key to be cached in one of the
+keyrings.
+
+If it returns with the key remaining in the unconstructed state, the key will
+be marked as being negative, it will be added to the session keyring, and an
+error will be returned to the key requestor.
+
+Supplementary information may be provided from whoever or whatever invoked this
+service. This will be passed as the <callout_info> parameter. If no such
+information was made available, then "-" will be passed as this parameter
+instead.
+
+
+Similarly, the kernel may attempt to update an expired or a soon to expire key
+by executing:
+
+ /sbin/request-key update <key> <uid> <gid> \
+ <threadring> <processring> <sessionring>
+
+In this case, the program isn't required to actually attach the key to a ring;
+the rings are provided for reference.
+
+
+==================
+GARBAGE COLLECTION
+==================
+
+Dead keys (for which the type has been removed) will be automatically unlinked
+from those keyrings that point to them and deleted as soon as possible by a
+background garbage collector.
+
+Similarly, revoked and expired keys will be garbage collected, but only after a
+certain amount of time has passed. This time is set as a number of seconds in:
+
+ /proc/sys/kernel/keys/gc_delay
diff --git a/Documentation/security/tomoyo.txt b/Documentation/security/tomoyo.txt
new file mode 100644
index 000000000..200a2d37c
--- /dev/null
+++ b/Documentation/security/tomoyo.txt
@@ -0,0 +1,55 @@
+--- What is TOMOYO? ---
+
+TOMOYO is a name-based MAC extension (LSM module) for the Linux kernel.
+
+LiveCD-based tutorials are available at
+http://tomoyo.sourceforge.jp/1.7/1st-step/ubuntu10.04-live/
+http://tomoyo.sourceforge.jp/1.7/1st-step/centos5-live/ .
+Though these tutorials use non-LSM version of TOMOYO, they are useful for you
+to know what TOMOYO is.
+
+--- How to enable TOMOYO? ---
+
+Build the kernel with CONFIG_SECURITY_TOMOYO=y and pass "security=tomoyo" on
+kernel's command line.
+
+Please see http://tomoyo.sourceforge.jp/2.3/ for details.
+
+--- Where is documentation? ---
+
+User <-> Kernel interface documentation is available at
+http://tomoyo.sourceforge.jp/2.3/policy-reference.html .
+
+Materials we prepared for seminars and symposiums are available at
+http://sourceforge.jp/projects/tomoyo/docs/?category_id=532&language_id=1 .
+Below lists are chosen from three aspects.
+
+What is TOMOYO?
+ TOMOYO Linux Overview
+ http://sourceforge.jp/projects/tomoyo/docs/lca2009-takeda.pdf
+ TOMOYO Linux: pragmatic and manageable security for Linux
+ http://sourceforge.jp/projects/tomoyo/docs/freedomhectaipei-tomoyo.pdf
+ TOMOYO Linux: A Practical Method to Understand and Protect Your Own Linux Box
+ http://sourceforge.jp/projects/tomoyo/docs/PacSec2007-en-no-demo.pdf
+
+What can TOMOYO do?
+ Deep inside TOMOYO Linux
+ http://sourceforge.jp/projects/tomoyo/docs/lca2009-kumaneko.pdf
+ The role of "pathname based access control" in security.
+ http://sourceforge.jp/projects/tomoyo/docs/lfj2008-bof.pdf
+
+History of TOMOYO?
+ Realities of Mainlining
+ http://sourceforge.jp/projects/tomoyo/docs/lfj2008.pdf
+
+--- What is future plan? ---
+
+We believe that inode based security and name based security are complementary
+and both should be used together. But unfortunately, so far, we cannot enable
+multiple LSM modules at the same time. We feel sorry that you have to give up
+SELinux/SMACK/AppArmor etc. when you want to use TOMOYO.
+
+We hope that LSM becomes stackable in future. Meanwhile, you can use non-LSM
+version of TOMOYO, available at http://tomoyo.sourceforge.jp/1.7/ .
+LSM version of TOMOYO is a subset of non-LSM version of TOMOYO. We are planning
+to port non-LSM version's functionalities to LSM versions.
diff --git a/Documentation/serial-console.txt b/Documentation/serial-console.txt
new file mode 100644
index 000000000..9a7bc8b3f
--- /dev/null
+++ b/Documentation/serial-console.txt
@@ -0,0 +1,109 @@
+ Linux Serial Console
+
+To use a serial port as console you need to compile the support into your
+kernel - by default it is not compiled in. For PC style serial ports
+it's the config option next to "Standard/generic (dumb) serial support".
+You must compile serial support into the kernel and not as a module.
+
+It is possible to specify multiple devices for console output. You can
+define a new kernel command line option to select which device(s) to
+use for console output.
+
+The format of this option is:
+
+ console=device,options
+
+ device: tty0 for the foreground virtual console
+ ttyX for any other virtual console
+ ttySx for a serial port
+ lp0 for the first parallel port
+ ttyUSB0 for the first USB serial device
+
+ options: depend on the driver. For the serial port this
+ defines the baudrate/parity/bits/flow control of
+ the port, in the format BBBBPNF, where BBBB is the
+ speed, P is parity (n/o/e), N is number of bits,
+ and F is flow control ('r' for RTS). Default is
+ 9600n8. The maximum baudrate is 115200.
+
+You can specify multiple console= options on the kernel command line.
+Output will appear on all of them. The last device will be used when
+you open /dev/console. So, for example:
+
+ console=ttyS1,9600 console=tty0
+
+defines that opening /dev/console will get you the current foreground
+virtual console, and kernel messages will appear on both the VGA
+console and the 2nd serial port (ttyS1 or COM2) at 9600 baud.
+
+Note that you can only define one console per device type (serial, video).
+
+If no console device is specified, the first device found capable of
+acting as a system console will be used. At this time, the system
+first looks for a VGA card and then for a serial port. So if you don't
+have a VGA card in your system the first serial port will automatically
+become the console.
+
+You will need to create a new device to use /dev/console. The official
+/dev/console is now character device 5,1.
+
+(You can also use a network device as a console. See
+Documentation/networking/netconsole.txt for information on that.)
+
+Here's an example that will use /dev/ttyS1 (COM2) as the console.
+Replace the sample values as needed.
+
+1. Create /dev/console (real console) and /dev/tty0 (master virtual
+ console):
+
+ cd /dev
+ rm -f console tty0
+ mknod -m 622 console c 5 1
+ mknod -m 622 tty0 c 4 0
+
+2. LILO can also take input from a serial device. This is a very
+ useful option. To tell LILO to use the serial port:
+ In lilo.conf (global section):
+
+ serial = 1,9600n8 (ttyS1, 9600 bd, no parity, 8 bits)
+
+3. Adjust to kernel flags for the new kernel,
+ again in lilo.conf (kernel section)
+
+ append = "console=ttyS1,9600"
+
+4. Make sure a getty runs on the serial port so that you can login to
+ it once the system is done booting. This is done by adding a line
+ like this to /etc/inittab (exact syntax depends on your getty):
+
+ S1:23:respawn:/sbin/getty -L ttyS1 9600 vt100
+
+5. Init and /etc/ioctl.save
+
+ Sysvinit remembers its stty settings in a file in /etc, called
+ `/etc/ioctl.save'. REMOVE THIS FILE before using the serial
+ console for the first time, because otherwise init will probably
+ set the baudrate to 38400 (baudrate of the virtual console).
+
+6. /dev/console and X
+ Programs that want to do something with the virtual console usually
+ open /dev/console. If you have created the new /dev/console device,
+ and your console is NOT the virtual console some programs will fail.
+ Those are programs that want to access the VT interface, and use
+ /dev/console instead of /dev/tty0. Some of those programs are:
+
+ Xfree86, svgalib, gpm, SVGATextMode
+
+ It should be fixed in modern versions of these programs though.
+
+ Note that if you boot without a console= option (or with
+ console=/dev/tty0), /dev/console is the same as /dev/tty0. In that
+ case everything will still work.
+
+7. Thanks
+
+ Thanks to Geert Uytterhoeven <geert@linux-m68k.org>
+ for porting the patches from 2.1.4x to 2.1.6x for taking care of
+ the integration of these patches into m68k, ppc and alpha.
+
+Miquel van Smoorenburg <miquels@cistron.nl>, 11-Jun-2000
diff --git a/Documentation/serial/00-INDEX b/Documentation/serial/00-INDEX
new file mode 100644
index 000000000..8021a9f29
--- /dev/null
+++ b/Documentation/serial/00-INDEX
@@ -0,0 +1,16 @@
+00-INDEX
+ - this file.
+README.cycladesZ
+ - info on Cyclades-Z firmware loading.
+driver
+ - intro to the low level serial driver.
+moxa-smartio
+ - file with info on installing/using Moxa multiport serial driver.
+n_gsm.txt
+ - GSM 0710 tty multiplexer howto.
+rocket.txt
+ - info on the Comtrol RocketPort multiport serial driver.
+serial-rs485.txt
+ - info about RS485 structures and support in the kernel.
+tty.txt
+ - guide to the locking policies of the tty layer.
diff --git a/Documentation/serial/README.cycladesZ b/Documentation/serial/README.cycladesZ
new file mode 100644
index 000000000..024a69443
--- /dev/null
+++ b/Documentation/serial/README.cycladesZ
@@ -0,0 +1,8 @@
+
+The Cyclades-Z must have firmware loaded onto the card before it will
+operate. This operation should be performed during system startup,
+
+The firmware, loader program and the latest device driver code are
+available from Cyclades at
+ ftp://ftp.cyclades.com/pub/cyclades/cyclades-z/linux/
+
diff --git a/Documentation/serial/driver b/Documentation/serial/driver
new file mode 100644
index 000000000..c415b0ef4
--- /dev/null
+++ b/Documentation/serial/driver
@@ -0,0 +1,460 @@
+
+ Low Level Serial API
+ --------------------
+
+
+This document is meant as a brief overview of some aspects of the new serial
+driver. It is not complete, any questions you have should be directed to
+<rmk@arm.linux.org.uk>
+
+The reference implementation is contained within amba_pl011.c.
+
+
+
+Low Level Serial Hardware Driver
+--------------------------------
+
+The low level serial hardware driver is responsible for supplying port
+information (defined by uart_port) and a set of control methods (defined
+by uart_ops) to the core serial driver. The low level driver is also
+responsible for handling interrupts for the port, and providing any
+console support.
+
+
+Console Support
+---------------
+
+The serial core provides a few helper functions. This includes identifing
+the correct port structure (via uart_get_console) and decoding command line
+arguments (uart_parse_options).
+
+There is also a helper function (uart_write_console) which performs a
+character by character write, translating newlines to CRLF sequences.
+Driver writers are recommended to use this function rather than implementing
+their own version.
+
+
+Locking
+-------
+
+It is the responsibility of the low level hardware driver to perform the
+necessary locking using port->lock. There are some exceptions (which
+are described in the uart_ops listing below.)
+
+There are three locks. A per-port spinlock, a per-port tmpbuf semaphore,
+and an overall semaphore.
+
+From the core driver perspective, the port->lock locks the following
+data:
+
+ port->mctrl
+ port->icount
+ info->xmit.head (circ->head)
+ info->xmit.tail (circ->tail)
+
+The low level driver is free to use this lock to provide any additional
+locking.
+
+The core driver uses the info->tmpbuf_sem lock to prevent multi-threaded
+access to the info->tmpbuf bouncebuffer used for port writes.
+
+The port_sem semaphore is used to protect against ports being added/
+removed or reconfigured at inappropriate times. Since v2.6.27, this
+semaphore has been the 'mutex' member of the tty_port struct, and
+commonly referred to as the port mutex (or port->mutex).
+
+
+uart_ops
+--------
+
+The uart_ops structure is the main interface between serial_core and the
+hardware specific driver. It contains all the methods to control the
+hardware.
+
+ tx_empty(port)
+ This function tests whether the transmitter fifo and shifter
+ for the port described by 'port' is empty. If it is empty,
+ this function should return TIOCSER_TEMT, otherwise return 0.
+ If the port does not support this operation, then it should
+ return TIOCSER_TEMT.
+
+ Locking: none.
+ Interrupts: caller dependent.
+ This call must not sleep
+
+ set_mctrl(port, mctrl)
+ This function sets the modem control lines for port described
+ by 'port' to the state described by mctrl. The relevant bits
+ of mctrl are:
+ - TIOCM_RTS RTS signal.
+ - TIOCM_DTR DTR signal.
+ - TIOCM_OUT1 OUT1 signal.
+ - TIOCM_OUT2 OUT2 signal.
+ - TIOCM_LOOP Set the port into loopback mode.
+ If the appropriate bit is set, the signal should be driven
+ active. If the bit is clear, the signal should be driven
+ inactive.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ get_mctrl(port)
+ Returns the current state of modem control inputs. The state
+ of the outputs should not be returned, since the core keeps
+ track of their state. The state information should include:
+ - TIOCM_CAR state of DCD signal
+ - TIOCM_CTS state of CTS signal
+ - TIOCM_DSR state of DSR signal
+ - TIOCM_RI state of RI signal
+ The bit is set if the signal is currently driven active. If
+ the port does not support CTS, DCD or DSR, the driver should
+ indicate that the signal is permanently active. If RI is
+ not available, the signal should not be indicated as active.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ stop_tx(port)
+ Stop transmitting characters. This might be due to the CTS
+ line becoming inactive or the tty layer indicating we want
+ to stop transmission due to an XOFF character.
+
+ The driver should stop transmitting characters as soon as
+ possible.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ start_tx(port)
+ Start transmitting characters.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ send_xchar(port,ch)
+ Transmit a high priority character, even if the port is stopped.
+ This is used to implement XON/XOFF flow control and tcflow(). If
+ the serial driver does not implement this function, the tty core
+ will append the character to the circular buffer and then call
+ start_tx() / stop_tx() to flush the data out.
+
+ Do not transmit if ch == '\0' (__DISABLED_CHAR).
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ stop_rx(port)
+ Stop receiving characters; the port is in the process of
+ being closed.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ enable_ms(port)
+ Enable the modem status interrupts.
+
+ This method may be called multiple times. Modem status
+ interrupts should be disabled when the shutdown method is
+ called.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ break_ctl(port,ctl)
+ Control the transmission of a break signal. If ctl is
+ nonzero, the break signal should be transmitted. The signal
+ should be terminated when another call is made with a zero
+ ctl.
+
+ Locking: none.
+ Interrupts: caller dependent.
+ This call must not sleep
+
+ startup(port)
+ Grab any interrupt resources and initialise any low level driver
+ state. Enable the port for reception. It should not activate
+ RTS nor DTR; this will be done via a separate call to set_mctrl.
+
+ This method will only be called when the port is initially opened.
+
+ Locking: port_sem taken.
+ Interrupts: globally disabled.
+
+ shutdown(port)
+ Disable the port, disable any break condition that may be in
+ effect, and free any interrupt resources. It should not disable
+ RTS nor DTR; this will have already been done via a separate
+ call to set_mctrl.
+
+ Drivers must not access port->info once this call has completed.
+
+ This method will only be called when there are no more users of
+ this port.
+
+ Locking: port_sem taken.
+ Interrupts: caller dependent.
+
+ flush_buffer(port)
+ Flush any write buffers, reset any DMA state and stop any
+ ongoing DMA transfers.
+
+ This will be called whenever the port->info->xmit circular
+ buffer is cleared.
+
+ Locking: port->lock taken.
+ Interrupts: locally disabled.
+ This call must not sleep
+
+ set_termios(port,termios,oldtermios)
+ Change the port parameters, including word length, parity, stop
+ bits. Update read_status_mask and ignore_status_mask to indicate
+ the types of events we are interested in receiving. Relevant
+ termios->c_cflag bits are:
+ CSIZE - word size
+ CSTOPB - 2 stop bits
+ PARENB - parity enable
+ PARODD - odd parity (when PARENB is in force)
+ CREAD - enable reception of characters (if not set,
+ still receive characters from the port, but
+ throw them away.
+ CRTSCTS - if set, enable CTS status change reporting
+ CLOCAL - if not set, enable modem status change
+ reporting.
+ Relevant termios->c_iflag bits are:
+ INPCK - enable frame and parity error events to be
+ passed to the TTY layer.
+ BRKINT
+ PARMRK - both of these enable break events to be
+ passed to the TTY layer.
+
+ IGNPAR - ignore parity and framing errors
+ IGNBRK - ignore break errors, If IGNPAR is also
+ set, ignore overrun errors as well.
+ The interaction of the iflag bits is as follows (parity error
+ given as an example):
+ Parity error INPCK IGNPAR
+ n/a 0 n/a character received, marked as
+ TTY_NORMAL
+ None 1 n/a character received, marked as
+ TTY_NORMAL
+ Yes 1 0 character received, marked as
+ TTY_PARITY
+ Yes 1 1 character discarded
+
+ Other flags may be used (eg, xon/xoff characters) if your
+ hardware supports hardware "soft" flow control.
+
+ Locking: caller holds port->mutex
+ Interrupts: caller dependent.
+ This call must not sleep
+
+ pm(port,state,oldstate)
+ Perform any power management related activities on the specified
+ port. State indicates the new state (defined by
+ enum uart_pm_state), oldstate indicates the previous state.
+
+ This function should not be used to grab any resources.
+
+ This will be called when the port is initially opened and finally
+ closed, except when the port is also the system console. This
+ will occur even if CONFIG_PM is not set.
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ type(port)
+ Return a pointer to a string constant describing the specified
+ port, or return NULL, in which case the string 'unknown' is
+ substituted.
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ release_port(port)
+ Release any memory and IO region resources currently in use by
+ the port.
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ request_port(port)
+ Request any memory and IO region resources required by the port.
+ If any fail, no resources should be registered when this function
+ returns, and it should return -EBUSY on failure.
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ config_port(port,type)
+ Perform any autoconfiguration steps required for the port. `type`
+ contains a bit mask of the required configuration. UART_CONFIG_TYPE
+ indicates that the port requires detection and identification.
+ port->type should be set to the type found, or PORT_UNKNOWN if
+ no port was detected.
+
+ UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal,
+ which should be probed using standard kernel autoprobing techniques.
+ This is not necessary on platforms where ports have interrupts
+ internally hard wired (eg, system on a chip implementations).
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ verify_port(port,serinfo)
+ Verify the new serial port information contained within serinfo is
+ suitable for this port type.
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ ioctl(port,cmd,arg)
+ Perform any port specific IOCTLs. IOCTL commands must be defined
+ using the standard numbering system found in <asm/ioctl.h>
+
+ Locking: none.
+ Interrupts: caller dependent.
+
+ poll_init(port)
+ Called by kgdb to perform the minimal hardware initialization needed
+ to support poll_put_char() and poll_get_char(). Unlike ->startup()
+ this should not request interrupts.
+
+ Locking: tty_mutex and tty_port->mutex taken.
+ Interrupts: n/a.
+
+ poll_put_char(port,ch)
+ Called by kgdb to write a single character directly to the serial
+ port. It can and should block until there is space in the TX FIFO.
+
+ Locking: none.
+ Interrupts: caller dependent.
+ This call must not sleep
+
+ poll_get_char(port)
+ Called by kgdb to read a single character directly from the serial
+ port. If data is available, it should be returned; otherwise
+ the function should return NO_POLL_CHAR immediately.
+
+ Locking: none.
+ Interrupts: caller dependent.
+ This call must not sleep
+
+Other functions
+---------------
+
+uart_update_timeout(port,cflag,baud)
+ Update the FIFO drain timeout, port->timeout, according to the
+ number of bits, parity, stop bits and baud rate.
+
+ Locking: caller is expected to take port->lock
+ Interrupts: n/a
+
+uart_get_baud_rate(port,termios,old,min,max)
+ Return the numeric baud rate for the specified termios, taking
+ account of the special 38400 baud "kludge". The B0 baud rate
+ is mapped to 9600 baud.
+
+ If the baud rate is not within min..max, then if old is non-NULL,
+ the original baud rate will be tried. If that exceeds the
+ min..max constraint, 9600 baud will be returned. termios will
+ be updated to the baud rate in use.
+
+ Note: min..max must always allow 9600 baud to be selected.
+
+ Locking: caller dependent.
+ Interrupts: n/a
+
+uart_get_divisor(port,baud)
+ Return the divsor (baud_base / baud) for the specified baud
+ rate, appropriately rounded.
+
+ If 38400 baud and custom divisor is selected, return the
+ custom divisor instead.
+
+ Locking: caller dependent.
+ Interrupts: n/a
+
+uart_match_port(port1,port2)
+ This utility function can be used to determine whether two
+ uart_port structures describe the same port.
+
+ Locking: n/a
+ Interrupts: n/a
+
+uart_write_wakeup(port)
+ A driver is expected to call this function when the number of
+ characters in the transmit buffer have dropped below a threshold.
+
+ Locking: port->lock should be held.
+ Interrupts: n/a
+
+uart_register_driver(drv)
+ Register a uart driver with the core driver. We in turn register
+ with the tty layer, and initialise the core driver per-port state.
+
+ drv->port should be NULL, and the per-port structures should be
+ registered using uart_add_one_port after this call has succeeded.
+
+ Locking: none
+ Interrupts: enabled
+
+uart_unregister_driver()
+ Remove all references to a driver from the core driver. The low
+ level driver must have removed all its ports via the
+ uart_remove_one_port() if it registered them with uart_add_one_port().
+
+ Locking: none
+ Interrupts: enabled
+
+uart_suspend_port()
+
+uart_resume_port()
+
+uart_add_one_port()
+
+uart_remove_one_port()
+
+Other notes
+-----------
+
+It is intended some day to drop the 'unused' entries from uart_port, and
+allow low level drivers to register their own individual uart_port's with
+the core. This will allow drivers to use uart_port as a pointer to a
+structure containing both the uart_port entry with their own extensions,
+thus:
+
+ struct my_port {
+ struct uart_port port;
+ int my_stuff;
+ };
+
+Modem control lines via GPIO
+----------------------------
+
+Some helpers are provided in order to set/get modem control lines via GPIO.
+
+mctrl_gpio_init(dev, idx):
+ This will get the {cts,rts,...}-gpios from device tree if they are
+ present and request them, set direction etc, and return an
+ allocated structure. devm_* functions are used, so there's no need
+ to call mctrl_gpio_free().
+
+mctrl_gpio_free(dev, gpios):
+ This will free the requested gpios in mctrl_gpio_init().
+ As devm_* function are used, there's generally no need to call
+ this function.
+
+mctrl_gpio_to_gpiod(gpios, gidx)
+ This returns the gpio structure associated to the modem line index.
+
+mctrl_gpio_set(gpios, mctrl):
+ This will sets the gpios according to the mctrl state.
+
+mctrl_gpio_get(gpios, mctrl):
+ This will update mctrl with the gpios values.
diff --git a/Documentation/serial/moxa-smartio b/Documentation/serial/moxa-smartio
new file mode 100644
index 000000000..5d2a33be0
--- /dev/null
+++ b/Documentation/serial/moxa-smartio
@@ -0,0 +1,523 @@
+=============================================================================
+ MOXA Smartio/Industio Family Device Driver Installation Guide
+ for Linux Kernel 2.4.x, 2.6.x
+ Copyright (C) 2008, Moxa Inc.
+=============================================================================
+Date: 01/21/2008
+
+Content
+
+1. Introduction
+2. System Requirement
+3. Installation
+ 3.1 Hardware installation
+ 3.2 Driver files
+ 3.3 Device naming convention
+ 3.4 Module driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x.
+ 3.6 Custom configuration
+ 3.7 Verify driver installation
+4. Utilities
+5. Setserial
+6. Troubleshooting
+
+-----------------------------------------------------------------------------
+1. Introduction
+
+ The Smartio/Industio/UPCI family Linux driver supports following multiport
+ boards.
+
+ - 2 ports multiport board
+ CP-102U, CP-102UL, CP-102UF
+ CP-132U-I, CP-132UL,
+ CP-132, CP-132I, CP132S, CP-132IS,
+ CI-132, CI-132I, CI-132IS,
+ (C102H, C102HI, C102HIS, C102P, CP-102, CP-102S)
+
+ - 4 ports multiport board
+ CP-104EL,
+ CP-104UL, CP-104JU,
+ CP-134U, CP-134U-I,
+ C104H/PCI, C104HS/PCI,
+ CP-114, CP-114I, CP-114S, CP-114IS, CP-114UL,
+ C104H, C104HS,
+ CI-104J, CI-104JS,
+ CI-134, CI-134I, CI-134IS,
+ (C114HI, CT-114I, C104P)
+ POS-104UL,
+ CB-114,
+ CB-134I
+
+ - 8 ports multiport board
+ CP-118EL, CP-168EL,
+ CP-118U, CP-168U,
+ C168H/PCI,
+ C168H, C168HS,
+ (C168P),
+ CB-108
+
+ This driver and installation procedure have been developed upon Linux Kernel
+ 2.4.x and 2.6.x. This driver supports Intel x86 hardware platform. In order
+ to maintain compatibility, this version has also been properly tested with
+ RedHat, Mandrake, Fedora and S.u.S.E Linux. However, if compatibility problem
+ occurs, please contact Moxa at support@moxa.com.tw.
+
+ In addition to device driver, useful utilities are also provided in this
+ version. They are
+ - msdiag Diagnostic program for displaying installed Moxa
+ Smartio/Industio boards.
+ - msmon Monitor program to observe data count and line status signals.
+ - msterm A simple terminal program which is useful in testing serial
+ ports.
+ - io-irq.exe Configuration program to setup ISA boards. Please note that
+ this program can only be executed under DOS.
+
+ All the drivers and utilities are published in form of source code under
+ GNU General Public License in this version. Please refer to GNU General
+ Public License announcement in each source code file for more detail.
+
+ In Moxa's Web sites, you may always find latest driver at http://www.moxa.com/.
+
+ This version of driver can be installed as Loadable Module (Module driver)
+ or built-in into kernel (Static driver). You may refer to following
+ installation procedure for suitable one. Before you install the driver,
+ please refer to hardware installation procedure in the User's Manual.
+
+ We assume the user should be familiar with following documents.
+ - Serial-HOWTO
+ - Kernel-HOWTO
+
+-----------------------------------------------------------------------------
+2. System Requirement
+ - Hardware platform: Intel x86 machine
+ - Kernel version: 2.4.x or 2.6.x
+ - gcc version 2.72 or later
+ - Maximum 4 boards can be installed in combination
+
+-----------------------------------------------------------------------------
+3. Installation
+
+ 3.1 Hardware installation
+ 3.2 Driver files
+ 3.3 Device naming convention
+ 3.4 Module driver configuration
+ 3.5 Static driver configuration for Linux kernel 2.4.x, 2.6.x.
+ 3.6 Custom configuration
+ 3.7 Verify driver installation
+
+
+ 3.1 Hardware installation
+
+ There are two types of buses, ISA and PCI, for Smartio/Industio
+ family multiport board.
+
+ ISA board
+ ---------
+ You'll have to configure CAP address, I/O address, Interrupt Vector
+ as well as IRQ before installing this driver. Please refer to hardware
+ installation procedure in User's Manual before proceed any further.
+ Please make sure the JP1 is open after the ISA board is set properly.
+
+ PCI/UPCI board
+ --------------
+ You may need to adjust IRQ usage in BIOS to avoid from IRQ conflict
+ with other ISA devices. Please refer to hardware installation
+ procedure in User's Manual in advance.
+
+ PCI IRQ Sharing
+ -----------
+ Each port within the same multiport board shares the same IRQ. Up to
+ 4 Moxa Smartio/Industio PCI Family multiport boards can be installed
+ together on one system and they can share the same IRQ.
+
+
+ 3.2 Driver files
+
+ The driver file may be obtained from ftp, CD-ROM or floppy disk. The
+ first step, anyway, is to copy driver file "mxser.tgz" into specified
+ directory. e.g. /moxa. The execute commands as below.
+
+ # cd /
+ # mkdir moxa
+ # cd /moxa
+ # tar xvf /dev/fd0
+
+ or
+
+ # cd /
+ # mkdir moxa
+ # cd /moxa
+ # cp /mnt/cdrom/<driver directory>/mxser.tgz .
+ # tar xvfz mxser.tgz
+
+
+ 3.3 Device naming convention
+
+ You may find all the driver and utilities files in /moxa/mxser.
+ Following installation procedure depends on the model you'd like to
+ run the driver. If you prefer module driver, please refer to 3.4.
+ If static driver is required, please refer to 3.5.
+
+ Dialin and callout port
+ -----------------------
+ This driver remains traditional serial device properties. There are
+ two special file name for each serial port. One is dial-in port
+ which is named "ttyMxx". For callout port, the naming convention
+ is "cumxx".
+
+ Device naming when more than 2 boards installed
+ -----------------------------------------------
+ Naming convention for each Smartio/Industio multiport board is
+ pre-defined as below.
+
+ Board Num. Dial-in Port Callout port
+ 1st board ttyM0 - ttyM7 cum0 - cum7
+ 2nd board ttyM8 - ttyM15 cum8 - cum15
+ 3rd board ttyM16 - ttyM23 cum16 - cum23
+ 4th board ttyM24 - ttym31 cum24 - cum31
+
+
+ !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+ Under Kernel 2.6 the cum Device is Obsolete. So use ttyM*
+ device instead.
+ !!!!!!!!!!!!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+ Board sequence
+ --------------
+ This driver will activate ISA boards according to the parameter set
+ in the driver. After all specified ISA board activated, PCI board
+ will be installed in the system automatically driven.
+ Therefore the board number is sorted by the CAP address of ISA boards.
+ For PCI boards, their sequence will be after ISA boards and C168H/PCI
+ has higher priority than C104H/PCI boards.
+
+ 3.4 Module driver configuration
+ Module driver is easiest way to install. If you prefer static driver
+ installation, please skip this paragraph.
+
+
+ ------------- Prepare to use the MOXA driver--------------------
+ 3.4.1 Create tty device with correct major number
+ Before using MOXA driver, your system must have the tty devices
+ which are created with driver's major number. We offer one shell
+ script "msmknod" to simplify the procedure.
+ This step is only needed to be executed once. But you still
+ need to do this procedure when:
+ a. You change the driver's major number. Please refer the "3.7"
+ section.
+ b. Your total installed MOXA boards number is changed. Maybe you
+ add/delete one MOXA board.
+ c. You want to change the tty name. This needs to modify the
+ shell script "msmknod"
+
+ The procedure is:
+ # cd /moxa/mxser/driver
+ # ./msmknod
+
+ This shell script will require the major number for dial-in
+ device and callout device to create tty device. You also need
+ to specify the total installed MOXA board number. Default major
+ numbers for dial-in device and callout device are 30, 35. If
+ you need to change to other number, please refer section "3.7"
+ for more detailed procedure.
+ Msmknod will delete any special files occupying the same device
+ naming.
+
+ 3.4.2 Build the MOXA driver and utilities
+ Before using the MOXA driver and utilities, you need compile the
+ all the source code. This step is only need to be executed once.
+ But you still re-compile the source code if you modify the source
+ code. For example, if you change the driver's major number (see
+ "3.7" section), then you need to do this step again.
+
+ Find "Makefile" in /moxa/mxser, then run
+
+ # make clean; make install
+
+ !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+ For Red Hat 9, Red Hat Enterprise Linux AS3/ES3/WS3 & Fedora Core1:
+ # make clean; make installsp1
+
+ For Red Hat Enterprise Linux AS4/ES4/WS4:
+ # make clean; make installsp2
+ !!!!!!!!!! NOTE !!!!!!!!!!!!!!!!!
+
+ The driver files "mxser.o" and utilities will be properly compiled
+ and copied to system directories respectively.
+
+ ------------- Load MOXA driver--------------------
+ 3.4.3 Load the MOXA driver
+
+ # modprobe mxser <argument>
+
+ will activate the module driver. You may run "lsmod" to check
+ if "mxser" is activated. If the MOXA board is ISA board, the
+ <argument> is needed. Please refer to section "3.4.5" for more
+ information.
+
+
+ ------------- Load MOXA driver on boot --------------------
+ 3.4.4 For the above description, you may manually execute
+ "modprobe mxser" to activate this driver and run
+ "rmmod mxser" to remove it.
+ However, it's better to have a boot time configuration to
+ eliminate manual operation. Boot time configuration can be
+ achieved by rc file. We offer one "rc.mxser" file to simplify
+ the procedure under "moxa/mxser/driver".
+
+ But if you use ISA board, please modify the "modprobe ..." command
+ to add the argument (see "3.4.5" section). After modifying the
+ rc.mxser, please try to execute "/moxa/mxser/driver/rc.mxser"
+ manually to make sure the modification is ok. If any error
+ encountered, please try to modify again. If the modification is
+ completed, follow the below step.
+
+ Run following command for setting rc files.
+
+ # cd /moxa/mxser/driver
+ # cp ./rc.mxser /etc/rc.d
+ # cd /etc/rc.d
+
+ Check "rc.serial" is existed or not. If "rc.serial" doesn't exist,
+ create it by vi, run "chmod 755 rc.serial" to change the permission.
+ Add "/etc/rc.d/rc.mxser" in last line,
+
+ Reboot and check if moxa.o activated by "lsmod" command.
+
+ 3.4.5. If you'd like to drive Smartio/Industio ISA boards in the system,
+ you'll have to add parameter to specify CAP address of given
+ board while activating "mxser.o". The format for parameters are
+ as follows.
+
+ modprobe mxser ioaddr=0x???,0x???,0x???,0x???
+ | | | |
+ | | | +- 4th ISA board
+ | | +------ 3rd ISA board
+ | +------------ 2nd ISA board
+ +------------------- 1st ISA board
+
+ 3.5 Static driver configuration for Linux kernel 2.4.x and 2.6.x
+
+ Note: To use static driver, you must install the linux kernel
+ source package.
+
+ 3.5.1 Backup the built-in driver in the kernel.
+ # cd /usr/src/linux/drivers/char
+ # mv mxser.c mxser.c.old
+
+ For Red Hat 7.x user, you need to create link:
+ # cd /usr/src
+ # ln -s linux-2.4 linux
+
+ 3.5.2 Create link
+ # cd /usr/src/linux/drivers/char
+ # ln -s /moxa/mxser/driver/mxser.c mxser.c
+
+ 3.5.3 Add CAP address list for ISA boards. For PCI boards user,
+ please skip this step.
+
+ In module mode, the CAP address for ISA board is given by
+ parameter. In static driver configuration, you'll have to
+ assign it within driver's source code. If you will not
+ install any ISA boards, you may skip to next portion.
+ The instructions to modify driver source code are as
+ below.
+ a. # cd /moxa/mxser/driver
+ # vi mxser.c
+ b. Find the array mxserBoardCAP[] as below.
+
+ static int mxserBoardCAP[]
+ = {0x00, 0x00, 0x00, 0x00};
+
+ c. Change the address within this array using vi. For
+ example, to driver 2 ISA boards with CAP address
+ 0x280 and 0x180 as 1st and 2nd board. Just to change
+ the source code as follows.
+
+ static int mxserBoardCAP[]
+ = {0x280, 0x180, 0x00, 0x00};
+
+ 3.5.4 Setup kernel configuration
+
+ Configure the kernel:
+
+ # cd /usr/src/linux
+ # make menuconfig
+
+ You will go into a menu-driven system. Please select [Character
+ devices][Non-standard serial port support], enable the [Moxa
+ SmartIO support] driver with "[*]" for built-in (not "[M]"), then
+ select [Exit] to exit this program.
+
+ 3.5.5 Rebuild kernel
+ The following are for Linux kernel rebuilding, for your
+ reference only.
+ For appropriate details, please refer to the Linux document.
+
+ a. cd /usr/src/linux
+ b. make clean /* take a few minutes */
+ c. make dep /* take a few minutes */
+ d. make bzImage /* take probably 10-20 minutes */
+ e. make install /* copy boot image to correct position */
+ f. Please make sure the boot kernel (vmlinuz) is in the
+ correct position.
+ g. If you use 'lilo' utility, you should check /etc/lilo.conf
+ 'image' item specified the path which is the 'vmlinuz' path,
+ or you will load wrong (or old) boot kernel image (vmlinuz).
+ After checking /etc/lilo.conf, please run "lilo".
+
+ Note that if the result of "make bzImage" is ERROR, then you have to
+ go back to Linux configuration Setup. Type "make menuconfig" in
+ directory /usr/src/linux.
+
+
+ 3.5.6 Make tty device and special file
+ # cd /moxa/mxser/driver
+ # ./msmknod
+
+ 3.5.7 Make utility
+ # cd /moxa/mxser/utility
+ # make clean; make install
+
+ 3.5.8 Reboot
+
+
+
+ 3.6 Custom configuration
+ Although this driver already provides you default configuration, you
+ still can change the device name and major number. The instruction to
+ change these parameters are shown as below.
+
+ Change Device name
+ ------------------
+ If you'd like to use other device names instead of default naming
+ convention, all you have to do is to modify the internal code
+ within the shell script "msmknod". First, you have to open "msmknod"
+ by vi. Locate each line contains "ttyM" and "cum" and change them
+ to the device name you desired. "msmknod" creates the device names
+ you need next time executed.
+
+ Change Major number
+ -------------------
+ If major number 30 and 35 had been occupied, you may have to select
+ 2 free major numbers for this driver. There are 3 steps to change
+ major numbers.
+
+ 3.6.1 Find free major numbers
+ In /proc/devices, you may find all the major numbers occupied
+ in the system. Please select 2 major numbers that are available.
+ e.g. 40, 45.
+ 3.6.2 Create special files
+ Run /moxa/mxser/driver/msmknod to create special files with
+ specified major numbers.
+ 3.6.3 Modify driver with new major number
+ Run vi to open /moxa/mxser/driver/mxser.c. Locate the line
+ contains "MXSERMAJOR". Change the content as below.
+ #define MXSERMAJOR 40
+ #define MXSERCUMAJOR 45
+ 3.6.4 Run "make clean; make install" in /moxa/mxser/driver.
+
+ 3.7 Verify driver installation
+ You may refer to /var/log/messages to check the latest status
+ log reported by this driver whenever it's activated.
+
+-----------------------------------------------------------------------------
+4. Utilities
+ There are 3 utilities contained in this driver. They are msdiag, msmon and
+ msterm. These 3 utilities are released in form of source code. They should
+ be compiled into executable file and copied into /usr/bin.
+
+ Before using these utilities, please load driver (refer 3.4 & 3.5) and
+ make sure you had run the "msmknod" utility.
+
+ msdiag - Diagnostic
+ --------------------
+ This utility provides the function to display what Moxa Smartio/Industio
+ board found by driver in the system.
+
+ msmon - Port Monitoring
+ -----------------------
+ This utility gives the user a quick view about all the MOXA ports'
+ activities. One can easily learn each port's total received/transmitted
+ (Rx/Tx) character count since the time when the monitoring is started.
+ Rx/Tx throughputs per second are also reported in interval basis (e.g.
+ the last 5 seconds) and in average basis (since the time the monitoring
+ is started). You can reset all ports' count by <HOME> key. <+> <->
+ (plus/minus) keys to change the displaying time interval. Press <ENTER>
+ on the port, that cursor stay, to view the port's communication
+ parameters, signal status, and input/output queue.
+
+ msterm - Terminal Emulation
+ ---------------------------
+ This utility provides data sending and receiving ability of all tty ports,
+ especially for MOXA ports. It is quite useful for testing simple
+ application, for example, sending AT command to a modem connected to the
+ port or used as a terminal for login purpose. Note that this is only a
+ dumb terminal emulation without handling full screen operation.
+
+-----------------------------------------------------------------------------
+5. Setserial
+
+ Supported Setserial parameters are listed as below.
+
+ uart set UART type(16450-->disable FIFO, 16550A-->enable FIFO)
+ close_delay set the amount of time(in 1/100 of a second) that DTR
+ should be kept low while being closed.
+ closing_wait set the amount of time(in 1/100 of a second) that the
+ serial port should wait for data to be drained while
+ being closed, before the receiver is disable.
+ spd_hi Use 57.6kb when the application requests 38.4kb.
+ spd_vhi Use 115.2kb when the application requests 38.4kb.
+ spd_shi Use 230.4kb when the application requests 38.4kb.
+ spd_warp Use 460.8kb when the application requests 38.4kb.
+ spd_normal Use 38.4kb when the application requests 38.4kb.
+ spd_cust Use the custom divisor to set the speed when the
+ application requests 38.4kb.
+ divisor This option set the custom division.
+ baud_base This option set the base baud rate.
+
+-----------------------------------------------------------------------------
+6. Troubleshooting
+
+ The boot time error messages and solutions are stated as clearly as
+ possible. If all the possible solutions fail, please contact our technical
+ support team to get more help.
+
+
+ Error msg: More than 4 Moxa Smartio/Industio family boards found. Fifth board
+ and after are ignored.
+ Solution:
+ To avoid this problem, please unplug fifth and after board, because Moxa
+ driver supports up to 4 boards.
+
+ Error msg: Request_irq fail, IRQ(?) may be conflict with another device.
+ Solution:
+ Other PCI or ISA devices occupy the assigned IRQ. If you are not sure
+ which device causes the situation, please check /proc/interrupts to find
+ free IRQ and simply change another free IRQ for Moxa board.
+
+ Error msg: Board #: C1xx Series(CAP=xxx) interrupt number invalid.
+ Solution:
+ Each port within the same multiport board shares the same IRQ. Please set
+ one IRQ (IRQ doesn't equal to zero) for one Moxa board.
+
+ Error msg: No interrupt vector be set for Moxa ISA board(CAP=xxx).
+ Solution:
+ Moxa ISA board needs an interrupt vector.Please refer to user's manual
+ "Hardware Installation" chapter to set interrupt vector.
+
+ Error msg: Couldn't install MOXA Smartio/Industio family driver!
+ Solution:
+ Load Moxa driver fail, the major number may conflict with other devices.
+ Please refer to previous section 3.7 to change a free major number for
+ Moxa driver.
+
+ Error msg: Couldn't install MOXA Smartio/Industio family callout driver!
+ Solution:
+ Load Moxa callout driver fail, the callout device major number may
+ conflict with other devices. Please refer to previous section 3.7 to
+ change a free callout device major number for Moxa driver.
+
+
+-----------------------------------------------------------------------------
+
diff --git a/Documentation/serial/n_gsm.txt b/Documentation/serial/n_gsm.txt
new file mode 100644
index 000000000..a5d91126a
--- /dev/null
+++ b/Documentation/serial/n_gsm.txt
@@ -0,0 +1,89 @@
+n_gsm.c GSM 0710 tty multiplexor HOWTO
+===================================================
+
+This line discipline implements the GSM 07.10 multiplexing protocol
+detailed in the following 3GPP document :
+http://www.3gpp.org/ftp/Specs/archive/07_series/07.10/0710-720.zip
+
+This document give some hints on how to use this driver with GPRS and 3G
+modems connected to a physical serial port.
+
+How to use it
+-------------
+1- initialize the modem in 0710 mux mode (usually AT+CMUX= command) through
+its serial port. Depending on the modem used, you can pass more or less
+parameters to this command,
+2- switch the serial line to using the n_gsm line discipline by using
+TIOCSETD ioctl,
+3- configure the mux using GSMIOC_GETCONF / GSMIOC_SETCONF ioctl,
+
+Major parts of the initialization program :
+(a good starting point is util-linux-ng/sys-utils/ldattach.c)
+#include <linux/gsmmux.h>
+#define N_GSM0710 21 /* GSM 0710 Mux */
+#define DEFAULT_SPEED B115200
+#define SERIAL_PORT /dev/ttyS0
+
+ int ldisc = N_GSM0710;
+ struct gsm_config c;
+ struct termios configuration;
+
+ /* open the serial port connected to the modem */
+ fd = open(SERIAL_PORT, O_RDWR | O_NOCTTY | O_NDELAY);
+
+ /* configure the serial port : speed, flow control ... */
+
+ /* send the AT commands to switch the modem to CMUX mode
+ and check that it's successful (should return OK) */
+ write(fd, "AT+CMUX=0\r", 10);
+
+ /* experience showed that some modems need some time before
+ being able to answer to the first MUX packet so a delay
+ may be needed here in some case */
+ sleep(3);
+
+ /* use n_gsm line discipline */
+ ioctl(fd, TIOCSETD, &ldisc);
+
+ /* get n_gsm configuration */
+ ioctl(fd, GSMIOC_GETCONF, &c);
+ /* we are initiator and need encoding 0 (basic) */
+ c.initiator = 1;
+ c.encapsulation = 0;
+ /* our modem defaults to a maximum size of 127 bytes */
+ c.mru = 127;
+ c.mtu = 127;
+ /* set the new configuration */
+ ioctl(fd, GSMIOC_SETCONF, &c);
+
+ /* and wait for ever to keep the line discipline enabled */
+ daemon(0,0);
+ pause();
+
+4- create the devices corresponding to the "virtual" serial ports (take care,
+each modem has its configuration and some DLC have dedicated functions,
+for example GPS), starting with minor 1 (DLC0 is reserved for the management
+of the mux)
+
+MAJOR=`cat /proc/devices |grep gsmtty | awk '{print $1}`
+for i in `seq 1 4`; do
+ mknod /dev/ttygsm$i c $MAJOR $i
+done
+
+5- use these devices as plain serial ports.
+for example, it's possible :
+- and to use gnokii to send / receive SMS on ttygsm1
+- to use ppp to establish a datalink on ttygsm2
+
+6- first close all virtual ports before closing the physical port.
+
+Additional Documentation
+------------------------
+More practical details on the protocol and how it's supported by industrial
+modems can be found in the following documents :
+http://www.telit.com/module/infopool/download.php?id=616
+http://www.u-blox.com/images/downloads/Product_Docs/LEON-G100-G200-MuxImplementation_ApplicationNote_%28GSM%20G1-CS-10002%29.pdf
+http://www.sierrawireless.com/Support/Downloads/AirPrime/WMP_Series/~/media/Support_Downloads/AirPrime/Application_notes/CMUX_Feature_Application_Note-Rev004.ashx
+http://wm.sim.com/sim/News/photo/2010721161442.pdf
+
+11-03-08 - Eric Bénard - <eric@eukrea.com>
diff --git a/Documentation/serial/rocket.txt b/Documentation/serial/rocket.txt
new file mode 100644
index 000000000..60b039891
--- /dev/null
+++ b/Documentation/serial/rocket.txt
@@ -0,0 +1,189 @@
+Comtrol(tm) RocketPort(R)/RocketModem(TM) Series
+Device Driver for the Linux Operating System
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+PRODUCT OVERVIEW
+----------------
+
+This driver provides a loadable kernel driver for the Comtrol RocketPort
+and RocketModem PCI boards. These boards provide, 2, 4, 8, 16, or 32
+high-speed serial ports or modems. This driver supports up to a combination
+of four RocketPort or RocketModems boards in one machine simultaneously.
+This file assumes that you are using the RocketPort driver which is
+integrated into the kernel sources.
+
+The driver can also be installed as an external module using the usual
+"make;make install" routine. This external module driver, obtainable
+from the Comtrol website listed below, is useful for updating the driver
+or installing it into kernels which do not have the driver configured
+into them. Installations instructions for the external module
+are in the included README and HW_INSTALL files.
+
+RocketPort ISA and RocketModem II PCI boards currently are only supported by
+this driver in module form.
+
+The RocketPort ISA board requires I/O ports to be configured by the DIP
+switches on the board. See the section "ISA Rocketport Boards" below for
+information on how to set the DIP switches.
+
+You pass the I/O port to the driver using the following module parameters:
+
+board1 : I/O port for the first ISA board
+board2 : I/O port for the second ISA board
+board3 : I/O port for the third ISA board
+board4 : I/O port for the fourth ISA board
+
+There is a set of utilities and scripts provided with the external driver
+( downloadable from http://www.comtrol.com ) that ease the configuration and
+setup of the ISA cards.
+
+The RocketModem II PCI boards require firmware to be loaded into the card
+before it will function. The driver has only been tested as a module for this
+board.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+INSTALLATION PROCEDURES
+-----------------------
+
+RocketPort/RocketModem PCI cards require no driver configuration, they are
+automatically detected and configured.
+
+The RocketPort driver can be installed as a module (recommended) or built
+into the kernel. This is selected, as for other drivers, through the `make config`
+command from the root of the Linux source tree during the kernel build process.
+
+The RocketPort/RocketModem serial ports installed by this driver are assigned
+device major number 46, and will be named /dev/ttyRx, where x is the port number
+starting at zero (ex. /dev/ttyR0, /devttyR1, ...). If you have multiple cards
+installed in the system, the mapping of port names to serial ports is displayed
+in the system log at /var/log/messages.
+
+If installed as a module, the module must be loaded. This can be done
+manually by entering "modprobe rocket". To have the module loaded automatically
+upon system boot, edit a /etc/modprobe.d/*.conf file and add the line
+"alias char-major-46 rocket".
+
+In order to use the ports, their device names (nodes) must be created with mknod.
+This is only required once, the system will retain the names once created. To
+create the RocketPort/RocketModem device names, use the command
+"mknod /dev/ttyRx c 46 x" where x is the port number starting at zero. For example:
+
+>mknod /dev/ttyR0 c 46 0
+>mknod /dev/ttyR1 c 46 1
+>mknod /dev/ttyR2 c 46 2
+
+The Linux script MAKEDEV will create the first 16 ttyRx device names (nodes)
+for you:
+
+>/dev/MAKEDEV ttyR
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+ISA Rocketport Boards
+---------------------
+
+You must assign and configure the I/O addresses used by the ISA Rocketport
+card before installing and using it. This is done by setting a set of DIP
+switches on the Rocketport board.
+
+
+SETTING THE I/O ADDRESS
+-----------------------
+
+Before installing RocketPort(R) or RocketPort RA boards, you must find
+a range of I/O addresses for it to use. The first RocketPort card
+requires a 68-byte contiguous block of I/O addresses, starting at one
+of the following: 0x100h, 0x140h, 0x180h, 0x200h, 0x240h, 0x280h,
+0x300h, 0x340h, 0x380h. This I/O address must be reflected in the DIP
+switches of *all* of the Rocketport cards.
+
+The second, third, and fourth RocketPort cards require a 64-byte
+contiguous block of I/O addresses, starting at one of the following
+I/O addresses: 0x100h, 0x140h, 0x180h, 0x1C0h, 0x200h, 0x240h, 0x280h,
+0x2C0h, 0x300h, 0x340h, 0x380h, 0x3C0h. The I/O address used by the
+second, third, and fourth Rocketport cards (if present) are set via
+software control. The DIP switch settings for the I/O address must be
+set to the value of the first Rocketport cards.
+
+In order to distinguish each of the card from the others, each card
+must have a unique board ID set on the dip switches. The first
+Rocketport board must be set with the DIP switches corresponding to
+the first board, the second board must be set with the DIP switches
+corresponding to the second board, etc. IMPORTANT: The board ID is
+the only place where the DIP switch settings should differ between the
+various Rocketport boards in a system.
+
+The I/O address range used by any of the RocketPort cards must not
+conflict with any other cards in the system, including other
+RocketPort cards. Below, you will find a list of commonly used I/O
+address ranges which may be in use by other devices in your system.
+On a Linux system, "cat /proc/ioports" will also be helpful in
+identifying what I/O addresses are being used by devices on your
+system.
+
+Remember, the FIRST RocketPort uses 68 I/O addresses. So, if you set it
+for 0x100, it will occupy 0x100 to 0x143. This would mean that you
+CAN NOT set the second, third or fourth board for address 0x140 since
+the first 4 bytes of that range are used by the first board. You would
+need to set the second, third, or fourth board to one of the next available
+blocks such as 0x180.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+RocketPort and RocketPort RA SW1 Settings:
+
+ +-------------------------------+
+ | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 |
+ +-------+-------+---------------+
+ | Unused| Card | I/O Port Block|
+ +-------------------------------+
+
+DIP Switches DIP Switches
+7 8 6 5
+=================== ===================
+On On UNUSED, MUST BE ON. On On First Card <==== Default
+ On Off Second Card
+ Off On Third Card
+ Off Off Fourth Card
+
+DIP Switches I/O Address Range
+4 3 2 1 Used by the First Card
+=====================================
+On Off On Off 100-143
+On Off Off On 140-183
+On Off Off Off 180-1C3 <==== Default
+Off On On Off 200-243
+Off On Off On 240-283
+Off On Off Off 280-2C3
+Off Off On Off 300-343
+Off Off Off On 340-383
+Off Off Off Off 380-3C3
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+REPORTING BUGS
+--------------
+
+For technical support, please provide the following
+information: Driver version, kernel release, distribution of
+kernel, and type of board you are using. Error messages and log
+printouts port configuration details are especially helpful.
+
+USA
+ Phone: (612) 494-4100
+ FAX: (612) 494-4199
+ email: support@comtrol.com
+
+Comtrol Europe
+ Phone: +44 (0) 1 869 323-220
+ FAX: +44 (0) 1 869 323-211
+ email: support@comtrol.co.uk
+
+Web: http://www.comtrol.com
+FTP: ftp.comtrol.com
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
+
+
diff --git a/Documentation/serial/serial-rs485.txt b/Documentation/serial/serial-rs485.txt
new file mode 100644
index 000000000..39dac9542
--- /dev/null
+++ b/Documentation/serial/serial-rs485.txt
@@ -0,0 +1,136 @@
+ RS485 SERIAL COMMUNICATIONS
+
+1. INTRODUCTION
+
+ EIA-485, also known as TIA/EIA-485 or RS-485, is a standard defining the
+ electrical characteristics of drivers and receivers for use in balanced
+ digital multipoint systems.
+ This standard is widely used for communications in industrial automation
+ because it can be used effectively over long distances and in electrically
+ noisy environments.
+
+2. HARDWARE-RELATED CONSIDERATIONS
+
+ Some CPUs/UARTs (e.g., Atmel AT91 or 16C950 UART) contain a built-in
+ half-duplex mode capable of automatically controlling line direction by
+ toggling RTS or DTR signals. That can be used to control external
+ half-duplex hardware like an RS485 transceiver or any RS232-connected
+ half-duplex devices like some modems.
+
+ For these microcontrollers, the Linux driver should be made capable of
+ working in both modes, and proper ioctls (see later) should be made
+ available at user-level to allow switching from one mode to the other, and
+ vice versa.
+
+3. DATA STRUCTURES ALREADY AVAILABLE IN THE KERNEL
+
+ The Linux kernel provides the serial_rs485 structure (see [1]) to handle
+ RS485 communications. This data structure is used to set and configure RS485
+ parameters in the platform data and in ioctls.
+
+ The device tree can also provide RS485 boot time parameters (see [2]
+ for bindings). The driver is in charge of filling this data structure from
+ the values given by the device tree.
+
+ Any driver for devices capable of working both as RS232 and RS485 should
+ provide at least the following ioctls:
+
+ - TIOCSRS485 (typically associated with number 0x542F). This ioctl is used
+ to enable/disable RS485 mode from user-space
+
+ - TIOCGRS485 (typically associated with number 0x542E). This ioctl is used
+ to get RS485 mode from kernel-space (i.e., driver) to user-space.
+
+ In other words, the serial driver should contain a code similar to the next
+ one:
+
+ static struct uart_ops atmel_pops = {
+ /* ... */
+ .ioctl = handle_ioctl,
+ };
+
+ static int handle_ioctl(struct uart_port *port,
+ unsigned int cmd,
+ unsigned long arg)
+ {
+ struct serial_rs485 rs485conf;
+
+ switch (cmd) {
+ case TIOCSRS485:
+ if (copy_from_user(&rs485conf,
+ (struct serial_rs485 *) arg,
+ sizeof(rs485conf)))
+ return -EFAULT;
+
+ /* ... */
+ break;
+
+ case TIOCGRS485:
+ if (copy_to_user((struct serial_rs485 *) arg,
+ ...,
+ sizeof(rs485conf)))
+ return -EFAULT;
+ /* ... */
+ break;
+
+ /* ... */
+ }
+ }
+
+
+4. USAGE FROM USER-LEVEL
+
+ From user-level, RS485 configuration can be get/set using the previous
+ ioctls. For instance, to set RS485 you can use the following code:
+
+ #include <linux/serial.h>
+
+ /* Driver-specific ioctls: */
+ #define TIOCGRS485 0x542E
+ #define TIOCSRS485 0x542F
+
+ /* Open your specific device (e.g., /dev/mydevice): */
+ int fd = open ("/dev/mydevice", O_RDWR);
+ if (fd < 0) {
+ /* Error handling. See errno. */
+ }
+
+ struct serial_rs485 rs485conf;
+
+ /* Enable RS485 mode: */
+ rs485conf.flags |= SER_RS485_ENABLED;
+
+ /* Set logical level for RTS pin equal to 1 when sending: */
+ rs485conf.flags |= SER_RS485_RTS_ON_SEND;
+ /* or, set logical level for RTS pin equal to 0 when sending: */
+ rs485conf.flags &= ~(SER_RS485_RTS_ON_SEND);
+
+ /* Set logical level for RTS pin equal to 1 after sending: */
+ rs485conf.flags |= SER_RS485_RTS_AFTER_SEND;
+ /* or, set logical level for RTS pin equal to 0 after sending: */
+ rs485conf.flags &= ~(SER_RS485_RTS_AFTER_SEND);
+
+ /* Set rts delay before send, if needed: */
+ rs485conf.delay_rts_before_send = ...;
+
+ /* Set rts delay after send, if needed: */
+ rs485conf.delay_rts_after_send = ...;
+
+ /* Set this flag if you want to receive data even whilst sending data */
+ rs485conf.flags |= SER_RS485_RX_DURING_TX;
+
+ if (ioctl (fd, TIOCSRS485, &rs485conf) < 0) {
+ /* Error handling. See errno. */
+ }
+
+ /* Use read() and write() syscalls here... */
+
+ /* Close the device when finished: */
+ if (close (fd) < 0) {
+ /* Error handling. See errno. */
+ }
+
+5. REFERENCES
+
+ [1] include/uapi/linux/serial.h
+ [2] Documentation/devicetree/bindings/serial/rs485.txt
diff --git a/Documentation/serial/tty.txt b/Documentation/serial/tty.txt
new file mode 100644
index 000000000..dbe6623fe
--- /dev/null
+++ b/Documentation/serial/tty.txt
@@ -0,0 +1,304 @@
+
+ The Lockronomicon
+
+Your guide to the ancient and twisted locking policies of the tty layer and
+the warped logic behind them. Beware all ye who read on.
+
+FIXME: still need to work out the full set of BKL assumptions and document
+them so they can eventually be killed off.
+
+
+Line Discipline
+---------------
+
+Line disciplines are registered with tty_register_ldisc() passing the
+discipline number and the ldisc structure. At the point of registration the
+discipline must be ready to use and it is possible it will get used before
+the call returns success. If the call returns an error then it won't get
+called. Do not re-use ldisc numbers as they are part of the userspace ABI
+and writing over an existing ldisc will cause demons to eat your computer.
+After the return the ldisc data has been copied so you may free your own
+copy of the structure. You must not re-register over the top of the line
+discipline even with the same data or your computer again will be eaten by
+demons.
+
+In order to remove a line discipline call tty_unregister_ldisc().
+In ancient times this always worked. In modern times the function will
+return -EBUSY if the ldisc is currently in use. Since the ldisc referencing
+code manages the module counts this should not usually be a concern.
+
+Heed this warning: the reference count field of the registered copies of the
+tty_ldisc structure in the ldisc table counts the number of lines using this
+discipline. The reference count of the tty_ldisc structure within a tty
+counts the number of active users of the ldisc at this instant. In effect it
+counts the number of threads of execution within an ldisc method (plus those
+about to enter and exit although this detail matters not).
+
+Line Discipline Methods
+-----------------------
+
+TTY side interfaces:
+
+open() - Called when the line discipline is attached to
+ the terminal. No other call into the line
+ discipline for this tty will occur until it
+ completes successfully. Returning an error will
+ prevent the ldisc from being attached. Can sleep.
+
+close() - This is called on a terminal when the line
+ discipline is being unplugged. At the point of
+ execution no further users will enter the
+ ldisc code for this tty. Can sleep.
+
+hangup() - Called when the tty line is hung up.
+ The line discipline should cease I/O to the tty.
+ No further calls into the ldisc code will occur.
+ The return value is ignored. Can sleep.
+
+write() - A process is writing data through the line
+ discipline. Multiple write calls are serialized
+ by the tty layer for the ldisc. May sleep.
+
+flush_buffer() - (optional) May be called at any point between
+ open and close, and instructs the line discipline
+ to empty its input buffer.
+
+chars_in_buffer() - (optional) Report the number of bytes in the input
+ buffer.
+
+set_termios() - (optional) Called on termios structure changes.
+ The caller passes the old termios data and the
+ current data is in the tty. Called under the
+ termios semaphore so allowed to sleep. Serialized
+ against itself only.
+
+read() - Move data from the line discipline to the user.
+ Multiple read calls may occur in parallel and the
+ ldisc must deal with serialization issues. May
+ sleep.
+
+poll() - Check the status for the poll/select calls. Multiple
+ poll calls may occur in parallel. May sleep.
+
+ioctl() - Called when an ioctl is handed to the tty layer
+ that might be for the ldisc. Multiple ioctl calls
+ may occur in parallel. May sleep.
+
+compat_ioctl() - Called when a 32 bit ioctl is handed to the tty layer
+ that might be for the ldisc. Multiple ioctl calls
+ may occur in parallel. May sleep.
+
+Driver Side Interfaces:
+
+receive_buf() - Hand buffers of bytes from the driver to the ldisc
+ for processing. Semantics currently rather
+ mysterious 8(
+
+write_wakeup() - May be called at any point between open and close.
+ The TTY_DO_WRITE_WAKEUP flag indicates if a call
+ is needed but always races versus calls. Thus the
+ ldisc must be careful about setting order and to
+ handle unexpected calls. Must not sleep.
+
+ The driver is forbidden from calling this directly
+ from the ->write call from the ldisc as the ldisc
+ is permitted to call the driver write method from
+ this function. In such a situation defer it.
+
+dcd_change() - Report to the tty line the current DCD pin status
+ changes and the relative timestamp. The timestamp
+ cannot be NULL.
+
+
+Driver Access
+
+Line discipline methods can call the following methods of the underlying
+hardware driver through the function pointers within the tty->driver
+structure:
+
+write() Write a block of characters to the tty device.
+ Returns the number of characters accepted. The
+ character buffer passed to this method is already
+ in kernel space.
+
+put_char() Queues a character for writing to the tty device.
+ If there is no room in the queue, the character is
+ ignored.
+
+flush_chars() (Optional) If defined, must be called after
+ queueing characters with put_char() in order to
+ start transmission.
+
+write_room() Returns the numbers of characters the tty driver
+ will accept for queueing to be written.
+
+ioctl() Invoke device specific ioctl.
+ Expects data pointers to refer to userspace.
+ Returns ENOIOCTLCMD for unrecognized ioctl numbers.
+
+set_termios() Notify the tty driver that the device's termios
+ settings have changed. New settings are in
+ tty->termios. Previous settings should be passed in
+ the "old" argument.
+
+ The API is defined such that the driver should return
+ the actual modes selected. This means that the
+ driver function is responsible for modifying any
+ bits in the request it cannot fulfill to indicate
+ the actual modes being used. A device with no
+ hardware capability for change (e.g. a USB dongle or
+ virtual port) can provide NULL for this method.
+
+throttle() Notify the tty driver that input buffers for the
+ line discipline are close to full, and it should
+ somehow signal that no more characters should be
+ sent to the tty.
+
+unthrottle() Notify the tty driver that characters can now be
+ sent to the tty without fear of overrunning the
+ input buffers of the line disciplines.
+
+stop() Ask the tty driver to stop outputting characters
+ to the tty device.
+
+start() Ask the tty driver to resume sending characters
+ to the tty device.
+
+hangup() Ask the tty driver to hang up the tty device.
+
+break_ctl() (Optional) Ask the tty driver to turn on or off
+ BREAK status on the RS-232 port. If state is -1,
+ then the BREAK status should be turned on; if
+ state is 0, then BREAK should be turned off.
+ If this routine is not implemented, use ioctls
+ TIOCSBRK / TIOCCBRK instead.
+
+wait_until_sent() Waits until the device has written out all of the
+ characters in its transmitter FIFO.
+
+send_xchar() Send a high-priority XON/XOFF character to the device.
+
+
+Flags
+
+Line discipline methods have access to tty->flags field containing the
+following interesting flags:
+
+TTY_THROTTLED Driver input is throttled. The ldisc should call
+ tty->driver->unthrottle() in order to resume
+ reception when it is ready to process more data.
+
+TTY_DO_WRITE_WAKEUP If set, causes the driver to call the ldisc's
+ write_wakeup() method in order to resume
+ transmission when it can accept more data
+ to transmit.
+
+TTY_IO_ERROR If set, causes all subsequent userspace read/write
+ calls on the tty to fail, returning -EIO.
+
+TTY_OTHER_CLOSED Device is a pty and the other side has closed.
+
+TTY_OTHER_DONE Device is a pty and the other side has closed and
+ all pending input processing has been completed.
+
+TTY_NO_WRITE_SPLIT Prevent driver from splitting up writes into
+ smaller chunks.
+
+
+Locking
+
+Callers to the line discipline functions from the tty layer are required to
+take line discipline locks. The same is true of calls from the driver side
+but not yet enforced.
+
+Three calls are now provided
+
+ ldisc = tty_ldisc_ref(tty);
+
+takes a handle to the line discipline in the tty and returns it. If no ldisc
+is currently attached or the ldisc is being closed and re-opened at this
+point then NULL is returned. While this handle is held the ldisc will not
+change or go away.
+
+ tty_ldisc_deref(ldisc)
+
+Returns the ldisc reference and allows the ldisc to be closed. Returning the
+reference takes away your right to call the ldisc functions until you take
+a new reference.
+
+ ldisc = tty_ldisc_ref_wait(tty);
+
+Performs the same function as tty_ldisc_ref except that it will wait for an
+ldisc change to complete and then return a reference to the new ldisc.
+
+While these functions are slightly slower than the old code they should have
+minimal impact as most receive logic uses the flip buffers and they only
+need to take a reference when they push bits up through the driver.
+
+A caution: The ldisc->open(), ldisc->close() and driver->set_ldisc
+functions are called with the ldisc unavailable. Thus tty_ldisc_ref will
+fail in this situation if used within these functions. Ldisc and driver
+code calling its own functions must be careful in this case.
+
+
+Driver Interface
+----------------
+
+open() - Called when a device is opened. May sleep
+
+close() - Called when a device is closed. At the point of
+ return from this call the driver must make no
+ further ldisc calls of any kind. May sleep
+
+write() - Called to write bytes to the device. May not
+ sleep. May occur in parallel in special cases.
+ Because this includes panic paths drivers generally
+ shouldn't try and do clever locking here.
+
+put_char() - Stuff a single character onto the queue. The
+ driver is guaranteed following up calls to
+ flush_chars.
+
+flush_chars() - Ask the kernel to write put_char queue
+
+write_room() - Return the number of characters that can be stuffed
+ into the port buffers without overflow (or less).
+ The ldisc is responsible for being intelligent
+ about multi-threading of write_room/write calls
+
+ioctl() - Called when an ioctl may be for the driver
+
+set_termios() - Called on termios change, serialized against
+ itself by a semaphore. May sleep.
+
+set_ldisc() - Notifier for discipline change. At the point this
+ is done the discipline is not yet usable. Can now
+ sleep (I think)
+
+throttle() - Called by the ldisc to ask the driver to do flow
+ control. Serialization including with unthrottle
+ is the job of the ldisc layer.
+
+unthrottle() - Called by the ldisc to ask the driver to stop flow
+ control.
+
+stop() - Ldisc notifier to the driver to stop output. As with
+ throttle the serializations with start() are down
+ to the ldisc layer.
+
+start() - Ldisc notifier to the driver to start output.
+
+hangup() - Ask the tty driver to cause a hangup initiated
+ from the host side. [Can sleep ??]
+
+break_ctl() - Send RS232 break. Can sleep. Can get called in
+ parallel, driver must serialize (for now), and
+ with write calls.
+
+wait_until_sent() - Wait for characters to exit the hardware queue
+ of the driver. Can sleep
+
+send_xchar() - Send XON/XOFF and if possible jump the queue with
+ it in order to get fast flow control responses.
+ Cannot sleep ??
+
diff --git a/Documentation/sgi-ioc4.txt b/Documentation/sgi-ioc4.txt
new file mode 100644
index 000000000..876c96ae3
--- /dev/null
+++ b/Documentation/sgi-ioc4.txt
@@ -0,0 +1,45 @@
+The SGI IOC4 PCI device is a bit of a strange beast, so some notes on
+it are in order.
+
+First, even though the IOC4 performs multiple functions, such as an
+IDE controller, a serial controller, a PS/2 keyboard/mouse controller,
+and an external interrupt mechanism, it's not implemented as a
+multifunction device. The consequence of this from a software
+standpoint is that all these functions share a single IRQ, and
+they can't all register to own the same PCI device ID. To make
+matters a bit worse, some of the register blocks (and even registers
+themselves) present in IOC4 are mixed-purpose between these several
+functions, meaning that there's no clear "owning" device driver.
+
+The solution is to organize the IOC4 driver into several independent
+drivers, "ioc4", "sgiioc4", and "ioc4_serial". Note that there is no
+PS/2 controller driver as this functionality has never been wired up
+on a shipping IO card.
+
+ioc4
+====
+This is the core (or shim) driver for IOC4. It is responsible for
+initializing the basic functionality of the chip, and allocating
+the PCI resources that are shared between the IOC4 functions.
+
+This driver also provides registration functions that the other
+IOC4 drivers can call to make their presence known. Each driver
+needs to provide a probe and remove function, which are invoked
+by the core driver at appropriate times. The interface of these
+IOC4 function probe and remove operations isn't precisely the same
+as PCI device probe and remove operations, but is logically the
+same operation.
+
+sgiioc4
+=======
+This is the IDE driver for IOC4. Its name isn't very descriptive
+simply for historical reasons (it used to be the only IOC4 driver
+component). There's not much to say about it other than it hooks
+up to the ioc4 driver via the appropriate registration, probe, and
+remove functions.
+
+ioc4_serial
+===========
+This is the serial driver for IOC4. There's not much to say about it
+other than it hooks up to the ioc4 driver via the appropriate registration,
+probe, and remove functions.
diff --git a/Documentation/sh/new-machine.txt b/Documentation/sh/new-machine.txt
new file mode 100644
index 000000000..f0354164c
--- /dev/null
+++ b/Documentation/sh/new-machine.txt
@@ -0,0 +1,278 @@
+
+ Adding a new board to LinuxSH
+ ================================
+
+ Paul Mundt <lethal@linux-sh.org>
+
+This document attempts to outline what steps are necessary to add support
+for new boards to the LinuxSH port under the new 2.5 and 2.6 kernels. This
+also attempts to outline some of the noticeable changes between the 2.4
+and the 2.5/2.6 SH backend.
+
+1. New Directory Structure
+==========================
+
+The first thing to note is the new directory structure. Under 2.4, most
+of the board-specific code (with the exception of stboards) ended up
+in arch/sh/kernel/ directly, with board-specific headers ending up in
+include/asm-sh/. For the new kernel, things are broken out by board type,
+companion chip type, and CPU type. Looking at a tree view of this directory
+hierarchy looks like the following:
+
+Board-specific code:
+
+.
+|-- arch
+| `-- sh
+| `-- boards
+| |-- adx
+| | `-- board-specific files
+| |-- bigsur
+| | `-- board-specific files
+| |
+| ... more boards here ...
+|
+`-- include
+ `-- asm-sh
+ |-- adx
+ | `-- board-specific headers
+ |-- bigsur
+ | `-- board-specific headers
+ |
+ .. more boards here ...
+
+Next, for companion chips:
+.
+`-- arch
+ `-- sh
+ `-- cchips
+ `-- hd6446x
+ `-- hd64461
+ `-- cchip-specific files
+
+... and so on. Headers for the companion chips are treated the same way as
+board-specific headers. Thus, include/asm-sh/hd64461 is home to all of the
+hd64461-specific headers.
+
+Finally, CPU family support is also abstracted:
+.
+|-- arch
+| `-- sh
+| |-- kernel
+| | `-- cpu
+| | |-- sh2
+| | | `-- SH-2 generic files
+| | |-- sh3
+| | | `-- SH-3 generic files
+| | `-- sh4
+| | `-- SH-4 generic files
+| `-- mm
+| `-- This is also broken out per CPU family, so each family can
+| have their own set of cache/tlb functions.
+|
+`-- include
+ `-- asm-sh
+ |-- cpu-sh2
+ | `-- SH-2 specific headers
+ |-- cpu-sh3
+ | `-- SH-3 specific headers
+ `-- cpu-sh4
+ `-- SH-4 specific headers
+
+It should be noted that CPU subtypes are _not_ abstracted. Thus, these still
+need to be dealt with by the CPU family specific code.
+
+2. Adding a New Board
+=====================
+
+The first thing to determine is whether the board you are adding will be
+isolated, or whether it will be part of a family of boards that can mostly
+share the same board-specific code with minor differences.
+
+In the first case, this is just a matter of making a directory for your
+board in arch/sh/boards/ and adding rules to hook your board in with the
+build system (more on this in the next section). However, for board families
+it makes more sense to have a common top-level arch/sh/boards/ directory
+and then populate that with sub-directories for each member of the family.
+Both the Solution Engine and the hp6xx boards are an example of this.
+
+After you have setup your new arch/sh/boards/ directory, remember that you
+should also add a directory in include/asm-sh for headers localized to this
+board (if there are going to be more than one). In order to interoperate
+seamlessly with the build system, it's best to have this directory the same
+as the arch/sh/boards/ directory name, though if your board is again part of
+a family, the build system has ways of dealing with this (via incdir-y
+overloading), and you can feel free to name the directory after the family
+member itself.
+
+There are a few things that each board is required to have, both in the
+arch/sh/boards and the include/asm-sh/ hierarchy. In order to better
+explain this, we use some examples for adding an imaginary board. For
+setup code, we're required at the very least to provide definitions for
+get_system_type() and platform_setup(). For our imaginary board, this
+might look something like:
+
+/*
+ * arch/sh/boards/vapor/setup.c - Setup code for imaginary board
+ */
+#include <linux/init.h>
+#include <asm/rtc.h> /* for board_time_init() */
+
+const char *get_system_type(void)
+{
+ return "FooTech Vaporboard";
+}
+
+int __init platform_setup(void)
+{
+ /*
+ * If our hardware actually existed, we would do real
+ * setup here. Though it's also sane to leave this empty
+ * if there's no real init work that has to be done for
+ * this board.
+ */
+
+ /*
+ * Presume all FooTech boards have the same broken timer,
+ * and also presume that we've defined foo_timer_init to
+ * do something useful.
+ */
+ board_time_init = foo_timer_init;
+
+ /* Start-up imaginary PCI ... */
+
+ /* And whatever else ... */
+
+ return 0;
+}
+
+Our new imaginary board will also have to tie into the machvec in order for it
+to be of any use.
+
+machvec functions fall into a number of categories:
+
+ - I/O functions to IO memory (inb etc) and PCI/main memory (readb etc).
+ - I/O mapping functions (ioport_map, ioport_unmap, etc).
+ - a 'heartbeat' function.
+ - PCI and IRQ initialization routines.
+ - Consistent allocators (for boards that need special allocators,
+ particularly for allocating out of some board-specific SRAM for DMA
+ handles).
+
+There are machvec functions added and removed over time, so always be sure to
+consult include/asm-sh/machvec.h for the current state of the machvec.
+
+The kernel will automatically wrap in generic routines for undefined function
+pointers in the machvec at boot time, as machvec functions are referenced
+unconditionally throughout most of the tree. Some boards have incredibly
+sparse machvecs (such as the dreamcast and sh03), whereas others must define
+virtually everything (rts7751r2d).
+
+Adding a new machine is relatively trivial (using vapor as an example):
+
+If the board-specific definitions are quite minimalistic, as is the case for
+the vast majority of boards, simply having a single board-specific header is
+sufficient.
+
+ - add a new file include/asm-sh/vapor.h which contains prototypes for
+ any machine specific IO functions prefixed with the machine name, for
+ example vapor_inb. These will be needed when filling out the machine
+ vector.
+
+ Note that these prototypes are generated automatically by setting
+ __IO_PREFIX to something sensible. A typical example would be:
+
+ #define __IO_PREFIX vapor
+ #include <asm/io_generic.h>
+
+ somewhere in the board-specific header. Any boards being ported that still
+ have a legacy io.h should remove it entirely and switch to the new model.
+
+ - Add machine vector definitions to the board's setup.c. At a bare minimum,
+ this must be defined as something like:
+
+ struct sh_machine_vector mv_vapor __initmv = {
+ .mv_name = "vapor",
+ };
+ ALIAS_MV(vapor)
+
+ - finally add a file arch/sh/boards/vapor/io.c, which contains definitions of
+ the machine specific io functions (if there are enough to warrant it).
+
+3. Hooking into the Build System
+================================
+
+Now that we have the corresponding directories setup, and all of the
+board-specific code is in place, it's time to look at how to get the
+whole mess to fit into the build system.
+
+Large portions of the build system are now entirely dynamic, and merely
+require the proper entry here and there in order to get things done.
+
+The first thing to do is to add an entry to arch/sh/Kconfig, under the
+"System type" menu:
+
+config SH_VAPOR
+ bool "Vapor"
+ help
+ select Vapor if configuring for a FooTech Vaporboard.
+
+next, this has to be added into arch/sh/Makefile. All boards require a
+machdir-y entry in order to be built. This entry needs to be the name of
+the board directory as it appears in arch/sh/boards, even if it is in a
+sub-directory (in which case, all parent directories below arch/sh/boards/
+need to be listed). For our new board, this entry can look like:
+
+machdir-$(CONFIG_SH_VAPOR) += vapor
+
+provided that we've placed everything in the arch/sh/boards/vapor/ directory.
+
+Next, the build system assumes that your include/asm-sh directory will also
+be named the same. If this is not the case (as is the case with multiple
+boards belonging to a common family), then the directory name needs to be
+implicitly appended to incdir-y. The existing code manages this for the
+Solution Engine and hp6xx boards, so see these for an example.
+
+Once that is taken care of, it's time to add an entry for the mach type.
+This is done by adding an entry to the end of the arch/sh/tools/mach-types
+list. The method for doing this is self explanatory, and so we won't waste
+space restating it here. After this is done, you will be able to use
+implicit checks for your board if you need this somewhere throughout the
+common code, such as:
+
+ /* Make sure we're on the FooTech Vaporboard */
+ if (!mach_is_vapor())
+ return -ENODEV;
+
+also note that the mach_is_boardname() check will be implicitly forced to
+lowercase, regardless of the fact that the mach-types entries are all
+uppercase. You can read the script if you really care, but it's pretty ugly,
+so you probably don't want to do that.
+
+Now all that's left to do is providing a defconfig for your new board. This
+way, other people who end up with this board can simply use this config
+for reference instead of trying to guess what settings are supposed to be
+used on it.
+
+Also, as soon as you have copied over a sample .config for your new board
+(assume arch/sh/configs/vapor_defconfig), you can also use this directly as a
+build target, and it will be implicitly listed as such in the help text.
+
+Looking at the 'make help' output, you should now see something like:
+
+Architecture specific targets (sh):
+ zImage - Compressed kernel image (arch/sh/boot/zImage)
+ adx_defconfig - Build for adx
+ cqreek_defconfig - Build for cqreek
+ dreamcast_defconfig - Build for dreamcast
+...
+ vapor_defconfig - Build for vapor
+
+which then allows you to do:
+
+$ make ARCH=sh CROSS_COMPILE=sh4-linux- vapor_defconfig vmlinux
+
+which will in turn copy the defconfig for this board, run it through
+oldconfig (prompting you for any new options since the time of creation),
+and start you on your way to having a functional kernel for your new
+board.
diff --git a/Documentation/sh/register-banks.txt b/Documentation/sh/register-banks.txt
new file mode 100644
index 000000000..a6719f2f6
--- /dev/null
+++ b/Documentation/sh/register-banks.txt
@@ -0,0 +1,33 @@
+ Notes on register bank usage in the kernel
+ ==========================================
+
+Introduction
+------------
+
+The SH-3 and SH-4 CPU families traditionally include a single partial register
+bank (selected by SR.RB, only r0 ... r7 are banked), whereas other families
+may have more full-featured banking or simply no such capabilities at all.
+
+SR.RB banking
+-------------
+
+In the case of this type of banking, banked registers are mapped directly to
+r0 ... r7 if SR.RB is set to the bank we are interested in, otherwise ldc/stc
+can still be used to reference the banked registers (as r0_bank ... r7_bank)
+when in the context of another bank. The developer must keep the SR.RB value
+in mind when writing code that utilizes these banked registers, for obvious
+reasons. Userspace is also not able to poke at the bank1 values, so these can
+be used rather effectively as scratch registers by the kernel.
+
+Presently the kernel uses several of these registers.
+
+ - r0_bank, r1_bank (referenced as k0 and k1, used for scratch
+ registers when doing exception handling).
+ - r2_bank (used to track the EXPEVT/INTEVT code)
+ - Used by do_IRQ() and friends for doing irq mapping based off
+ of the interrupt exception vector jump table offset
+ - r6_bank (global interrupt mask)
+ - The SR.IMASK interrupt handler makes use of this to set the
+ interrupt priority level (used by local_irq_enable())
+ - r7_bank (current)
+
diff --git a/Documentation/smsc_ece1099.txt b/Documentation/smsc_ece1099.txt
new file mode 100644
index 000000000..6b492e82b
--- /dev/null
+++ b/Documentation/smsc_ece1099.txt
@@ -0,0 +1,56 @@
+What is smsc-ece1099?
+----------------------
+
+The ECE1099 is a 40-Pin 3.3V Keyboard Scan Expansion
+or GPIO Expansion device. The device supports a keyboard
+scan matrix of 23x8. The device is connected to a Master
+via the SMSC BC-Link interface or via the SMBus.
+Keypad scan Input(KSI) and Keypad Scan Output(KSO) signals
+are multiplexed with GPIOs.
+
+Interrupt generation
+--------------------
+
+Interrupts can be generated by an edge detection on a GPIO
+pin or an edge detection on one of the bus interface pins.
+Interrupts can also be detected on the keyboard scan interface.
+The bus interrupt pin (BC_INT# or SMBUS_INT#) is asserted if
+any bit in one of the Interrupt Status registers is 1 and
+the corresponding Interrupt Mask bit is also 1.
+
+In order for software to determine which device is the source
+of an interrupt, it should first read the Group Interrupt Status Register
+to determine which Status register group is a source for the interrupt.
+Software should read both the Status register and the associated Mask register,
+then AND the two values together. Bits that are 1 in the result of the AND
+are active interrupts. Software clears an interrupt by writing a 1 to the
+corresponding bit in the Status register.
+
+Communication Protocol
+----------------------
+
+- SMbus slave Interface
+ The host processor communicates with the ECE1099 device
+ through a series of read/write registers via the SMBus
+ interface. SMBus is a serial communication protocol between
+ a computer host and its peripheral devices. The SMBus data
+ rate is 10KHz minimum to 400 KHz maximum
+
+- Slave Bus Interface
+ The ECE1099 device SMBus implementation is a subset of the
+ SMBus interface to the host. The device is a slave-only SMBus device.
+ The implementation in the device is a subset of SMBus since it
+ only supports four protocols.
+
+ The Write Byte, Read Byte, Send Byte, and Receive Byte protocols are the
+ only valid SMBus protocols for the device.
+
+- BC-LinkTM Interface
+ The BC-Link is a proprietary bus that allows communication
+ between a Master device and a Companion device. The Master
+ device uses this serial bus to read and write registers
+ located on the Companion device. The bus comprises three signals,
+ BC_CLK, BC_DAT and BC_INT#. The Master device always provides the
+ clock, BC_CLK, and the Companion device is the source for an
+ independent asynchronous interrupt signal, BC_INT#. The ECE1099
+ supports BC-Link speeds up to 24MHz.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
new file mode 100644
index 000000000..5a8583abe
--- /dev/null
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -0,0 +1,2312 @@
+
+ Advanced Linux Sound Architecture - Driver
+ ==========================================
+ Configuration guide
+
+
+Kernel Configuration
+====================
+
+To enable ALSA support you need at least to build the kernel with
+primary sound card support (CONFIG_SOUND). Since ALSA can emulate OSS,
+you don't have to choose any of the OSS modules.
+
+Enable "OSS API emulation" (CONFIG_SND_OSSEMUL) and both OSS mixer and
+PCM supports if you want to run OSS applications with ALSA.
+
+If you want to support the WaveTable functionality on cards such as
+SB Live! then you need to enable "Sequencer support"
+(CONFIG_SND_SEQUENCER).
+
+To make ALSA debug messages more verbose, enable the "Verbose printk"
+and "Debug" options. To check for memory leaks, turn on "Debug memory"
+too. "Debug detection" will add checks for the detection of cards.
+
+Please note that all the ALSA ISA drivers support the Linux isapnp API
+(if the card supports ISA PnP). You don't need to configure the cards
+using isapnptools.
+
+
+Creating ALSA devices
+=====================
+
+This depends on your distribution, but normally you use the /dev/MAKEDEV
+script to create the necessary device nodes. On some systems you use a
+script named 'snddevices'.
+
+
+Module parameters
+=================
+
+The user can load modules with options. If the module supports more than
+one card and you have more than one card of the same type then you can
+specify multiple values for the option separated by commas.
+
+Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
+
+ Module snd
+ ----------
+
+ The core ALSA module. It is used by all ALSA card drivers.
+ It takes the following options which have global effects.
+
+ major - major number for sound driver
+ - Default: 116
+ cards_limit
+ - limiting card index for auto-loading (1-8)
+ - Default: 1
+ - For auto-loading more than one card, specify this
+ option together with snd-card-X aliases.
+ slots - Reserve the slot index for the given driver.
+ This option takes multiple strings.
+ See "Module Autoloading Support" section for details.
+ debug - Specifies the debug message level
+ (0 = disable debug prints, 1 = normal debug messages,
+ 2 = verbose debug messages)
+ This option appears only when CONFIG_SND_DEBUG=y.
+ This option can be dynamically changed via sysfs
+ /sys/modules/snd/parameters/debug file.
+
+ Module snd-pcm-oss
+ ------------------
+
+ The PCM OSS emulation module.
+ This module takes options which change the mapping of devices.
+
+ dsp_map - PCM device number maps assigned to the 1st OSS device.
+ - Default: 0
+ adsp_map - PCM device number maps assigned to the 2st OSS device.
+ - Default: 1
+ nonblock_open
+ - Don't block opening busy PCM devices. Default: 1
+
+ For example, when dsp_map=2, /dev/dsp will be mapped to PCM #2 of
+ the card #0. Similarly, when adsp_map=0, /dev/adsp will be mapped
+ to PCM #0 of the card #0.
+ For changing the second or later card, specify the option with
+ commas, such like "dsp_map=0,1".
+
+ nonblock_open option is used to change the behavior of the PCM
+ regarding opening the device. When this option is non-zero,
+ opening a busy OSS PCM device won't be blocked but return
+ immediately with EAGAIN (just like O_NONBLOCK flag).
+
+ Module snd-rawmidi
+ ------------------
+
+ This module takes options which change the mapping of devices.
+ similar to those of the snd-pcm-oss module.
+
+ midi_map - MIDI device number maps assigned to the 1st OSS device.
+ - Default: 0
+ amidi_map - MIDI device number maps assigned to the 2st OSS device.
+ - Default: 1
+
+ Common parameters for top sound card modules
+ --------------------------------------------
+
+ Each of top level sound card module takes the following options.
+
+ index - index (slot #) of sound card
+ - Values: 0 through 31 or negative
+ - If nonnegative, assign that index number
+ - if negative, interpret as a bitmask of permissible
+ indices; the first free permitted index is assigned
+ - Default: -1
+ id - card ID (identifier or name)
+ - Can be up to 15 characters long
+ - Default: the card type
+ - A directory by this name is created under /proc/asound/
+ containing information about the card
+ - This ID can be used instead of the index number in
+ identifying the card
+ enable - enable card
+ - Default: enabled, for PCI and ISA PnP cards
+
+ Module snd-adlib
+ ----------------
+
+ Module for AdLib FM cards.
+
+ port - port # for OPL chip
+
+ This module supports multiple cards. It does not support autoprobe, so
+ the port must be specified. For actual AdLib FM cards it will be 0x388.
+ Note that this card does not have PCM support and no mixer; only FM
+ synthesis.
+
+ Make sure you have "sbiload" from the alsa-tools package available and,
+ after loading the module, find out the assigned ALSA sequencer port
+ number through "sbiload -l". Example output:
+
+ Port Client name Port name
+ 64:0 OPL2 FM synth OPL2 FM Port
+
+ Load the std.sb and drums.sb patches also supplied by sbiload:
+
+ sbiload -p 64:0 std.sb drums.sb
+
+ If you use this driver to drive an OPL3, you can use std.o3 and drums.o3
+ instead. To have the card produce sound, use aplaymidi from alsa-utils:
+
+ aplaymidi -p 64:0 foo.mid
+
+ Module snd-ad1816a
+ ------------------
+
+ Module for sound cards based on Analog Devices AD1816A/AD1815 ISA chips.
+
+ clockfreq - Clock frequency for AD1816A chip (default = 0, 33000Hz)
+
+ This module supports multiple cards, autoprobe and PnP.
+
+ Module snd-ad1848
+ -----------------
+
+ Module for sound cards based on AD1848/AD1847/CS4248 ISA chips.
+
+ port - port # for AD1848 chip
+ irq - IRQ # for AD1848 chip
+ dma1 - DMA # for AD1848 chip (0,1,3)
+
+ This module supports multiple cards. It does not support autoprobe
+ thus main port must be specified!!! Other ports are optional.
+
+ The power-management is supported.
+
+ Module snd-ad1889
+ -----------------
+
+ Module for Analog Devices AD1889 chips.
+
+ ac97_quirk - AC'97 workaround for strange hardware
+ See the description of intel8x0 module for details.
+
+ This module supports multiple cards.
+
+ Module snd-ali5451
+ ------------------
+
+ Module for ALi M5451 PCI chip.
+
+ pcm_channels - Number of hardware channels assigned for PCM
+ spdif - Support SPDIF I/O
+ - Default: disabled
+
+ This module supports one chip and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-als100
+ -----------------
+
+ Module for sound cards based on Avance Logic ALS100/ALS120 ISA chips.
+
+ This module supports multiple cards, autoprobe and PnP.
+
+ The power-management is supported.
+
+ Module snd-als300
+ -----------------
+
+ Module for Avance Logic ALS300 and ALS300+
+
+ This module supports multiple cards.
+
+ The power-management is supported.
+
+ Module snd-als4000
+ ------------------
+
+ Module for sound cards based on Avance Logic ALS4000 PCI chip.
+
+ joystick_port - port # for legacy joystick support.
+ 0 = disabled (default), 1 = auto-detect
+
+ This module supports multiple cards, autoprobe and PnP.
+
+ The power-management is supported.
+
+ Module snd-asihpi
+ -----------------
+
+ Module for AudioScience ASI soundcards
+
+ enable_hpi_hwdep - enable HPI hwdep for AudioScience soundcard
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-atiixp
+ -----------------
+
+ Module for ATI IXP 150/200/250/400 AC97 controllers.
+
+ ac97_clock - AC'97 clock (default = 48000)
+ ac97_quirk - AC'97 workaround for strange hardware
+ See "AC97 Quirk Option" section below.
+ ac97_codec - Workaround to specify which AC'97 codec
+ instead of probing. If this works for you
+ file a bug with your `lspci -vn` output.
+ -2 -- Force probing.
+ -1 -- Default behavior.
+ 0-2 -- Use the specified codec.
+ spdif_aclink - S/PDIF transfer over AC-link (default = 1)
+
+ This module supports one card and autoprobe.
+
+ ATI IXP has two different methods to control SPDIF output. One is
+ over AC-link and another is over the "direct" SPDIF output. The
+ implementation depends on the motherboard, and you'll need to
+ choose the correct one via spdif_aclink module option.
+
+ The power-management is supported.
+
+ Module snd-atiixp-modem
+ -----------------------
+
+ Module for ATI IXP 150/200/250 AC97 modem controllers.
+
+ This module supports one card and autoprobe.
+
+ Note: The default index value of this module is -2, i.e. the first
+ slot is excluded.
+
+ The power-management is supported.
+
+ Module snd-au8810, snd-au8820, snd-au8830
+ -----------------------------------------
+
+ Module for Aureal Vortex, Vortex2 and Advantage device.
+
+ pcifix - Control PCI workarounds
+ 0 = Disable all workarounds
+ 1 = Force the PCI latency of the Aureal card to 0xff
+ 2 = Force the Extend PCI#2 Internal Master for Efficient
+ Handling of Dummy Requests on the VIA KT133 AGP Bridge
+ 3 = Force both settings
+ 255 = Autodetect what is required (default)
+
+ This module supports all ADB PCM channels, ac97 mixer, SPDIF, hardware
+ EQ, mpu401, gameport. A3D and wavetable support are still in development.
+ Development and reverse engineering work is being coordinated at
+ http://savannah.nongnu.org/projects/openvortex/
+ SPDIF output has a copy of the AC97 codec output, unless you use the
+ "spdif" pcm device, which allows raw data passthru.
+ The hardware EQ hardware and SPDIF is only present in the Vortex2 and
+ Advantage.
+
+ Note: Some ALSA mixer applications don't handle the SPDIF sample rate
+ control correctly. If you have problems regarding this, try
+ another ALSA compliant mixer (alsamixer works).
+
+ Module snd-azt1605
+ ------------------
+
+ Module for Aztech Sound Galaxy soundcards based on the Aztech AZT1605
+ chipset.
+
+ port - port # for BASE (0x220,0x240,0x260,0x280)
+ wss_port - port # for WSS (0x530,0x604,0xe80,0xf40)
+ irq - IRQ # for WSS (7,9,10,11)
+ dma1 - DMA # for WSS playback (0,1,3)
+ dma2 - DMA # for WSS capture (0,1), -1 = disabled (default)
+ mpu_port - port # for MPU-401 UART (0x300,0x330), -1 = disabled (default)
+ mpu_irq - IRQ # for MPU-401 UART (3,5,7,9), -1 = disabled (default)
+ fm_port - port # for OPL3 (0x388), -1 = disabled (default)
+
+ This module supports multiple cards. It does not support autoprobe: port,
+ wss_port, irq and dma1 have to be specified. The other values are
+ optional.
+
+ "port" needs to match the BASE ADDRESS jumper on the card (0x220 or 0x240)
+ or the value stored in the card's EEPROM for cards that have an EEPROM and
+ their "CONFIG MODE" jumper set to "EEPROM SETTING". The other values can
+ be chosen freely from the options enumerated above.
+
+ If dma2 is specified and different from dma1, the card will operate in
+ full-duplex mode. When dma1=3, only dma2=0 is valid and the only way to
+ enable capture since only channels 0 and 1 are available for capture.
+
+ Generic settings are "port=0x220 wss_port=0x530 irq=10 dma1=1 dma2=0
+ mpu_port=0x330 mpu_irq=9 fm_port=0x388".
+
+ Whatever IRQ and DMA channels you pick, be sure to reserve them for
+ legacy ISA in your BIOS.
+
+ Module snd-azt2316
+ ------------------
+
+ Module for Aztech Sound Galaxy soundcards based on the Aztech AZT2316
+ chipset.
+
+ port - port # for BASE (0x220,0x240,0x260,0x280)
+ wss_port - port # for WSS (0x530,0x604,0xe80,0xf40)
+ irq - IRQ # for WSS (7,9,10,11)
+ dma1 - DMA # for WSS playback (0,1,3)
+ dma2 - DMA # for WSS capture (0,1), -1 = disabled (default)
+ mpu_port - port # for MPU-401 UART (0x300,0x330), -1 = disabled (default)
+ mpu_irq - IRQ # for MPU-401 UART (5,7,9,10), -1 = disabled (default)
+ fm_port - port # for OPL3 (0x388), -1 = disabled (default)
+
+ This module supports multiple cards. It does not support autoprobe: port,
+ wss_port, irq and dma1 have to be specified. The other values are
+ optional.
+
+ "port" needs to match the BASE ADDRESS jumper on the card (0x220 or 0x240)
+ or the value stored in the card's EEPROM for cards that have an EEPROM and
+ their "CONFIG MODE" jumper set to "EEPROM SETTING". The other values can
+ be chosen freely from the options enumerated above.
+
+ If dma2 is specified and different from dma1, the card will operate in
+ full-duplex mode. When dma1=3, only dma2=0 is valid and the only way to
+ enable capture since only channels 0 and 1 are available for capture.
+
+ Generic settings are "port=0x220 wss_port=0x530 irq=10 dma1=1 dma2=0
+ mpu_port=0x330 mpu_irq=9 fm_port=0x388".
+
+ Whatever IRQ and DMA channels you pick, be sure to reserve them for
+ legacy ISA in your BIOS.
+
+ Module snd-aw2
+ --------------
+
+ Module for Audiowerk2 sound card
+
+ This module supports multiple cards.
+
+ Module snd-azt2320
+ ------------------
+
+ Module for sound cards based on Aztech System AZT2320 ISA chip (PnP only).
+
+ This module supports multiple cards, PnP and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-azt3328
+ ------------------
+
+ Module for sound cards based on Aztech AZF3328 PCI chip.
+
+ joystick - Enable joystick (default off)
+
+ This module supports multiple cards.
+
+ Module snd-bt87x
+ ----------------
+
+ Module for video cards based on Bt87x chips.
+
+ digital_rate - Override the default digital rate (Hz)
+ load_all - Load the driver even if the card model isn't known
+
+ This module supports multiple cards.
+
+ Note: The default index value of this module is -2, i.e. the first
+ slot is excluded.
+
+ Module snd-ca0106
+ -----------------
+
+ Module for Creative Audigy LS and SB Live 24bit
+
+ This module supports multiple cards.
+
+
+ Module snd-cmi8330
+ ------------------
+
+ Module for sound cards based on C-Media CMI8330 ISA chips.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ wssport - port # for CMI8330 chip (WSS)
+ wssirq - IRQ # for CMI8330 chip (WSS)
+ wssdma - first DMA # for CMI8330 chip (WSS)
+ sbport - port # for CMI8330 chip (SB16)
+ sbirq - IRQ # for CMI8330 chip (SB16)
+ sbdma8 - 8bit DMA # for CMI8330 chip (SB16)
+ sbdma16 - 16bit DMA # for CMI8330 chip (SB16)
+ fmport - (optional) OPL3 I/O port
+ mpuport - (optional) MPU401 I/O port
+ mpuirq - (optional) MPU401 irq #
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-cmipci
+ -----------------
+
+ Module for C-Media CMI8338/8738/8768/8770 PCI sound cards.
+
+ mpu_port - port address of MIDI interface (8338 only):
+ 0x300,0x310,0x320,0x330 = legacy port,
+ 0 = disable (default)
+ fm_port - port address of OPL-3 FM synthesizer (8x38 only):
+ 0x388 = legacy port,
+ 1 = integrated PCI port (default on 8738),
+ 0 = disable
+ soft_ac3 - Software-conversion of raw SPDIF packets (model 033 only)
+ (default = 1)
+ joystick_port - Joystick port address (0 = disable, 1 = auto-detect)
+
+ This module supports autoprobe and multiple cards.
+
+ The power-management is supported.
+
+ Module snd-cs4231
+ -----------------
+
+ Module for sound cards based on CS4231 ISA chips.
+
+ port - port # for CS4231 chip
+ mpu_port - port # for MPU-401 UART (optional), -1 = disable
+ irq - IRQ # for CS4231 chip
+ mpu_irq - IRQ # for MPU-401 UART
+ dma1 - first DMA # for CS4231 chip
+ dma2 - second DMA # for CS4231 chip
+
+ This module supports multiple cards. This module does not support autoprobe
+ thus main port must be specified!!! Other ports are optional.
+
+ The power-management is supported.
+
+ Module snd-cs4236
+ -----------------
+
+ Module for sound cards based on CS4232/CS4232A,
+ CS4235/CS4236/CS4236B/CS4237B/
+ CS4238B/CS4239 ISA chips.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for CS4236 chip (PnP setup - 0x534)
+ cport - control port # for CS4236 chip (PnP setup - 0x120,0x210,0xf00)
+ mpu_port - port # for MPU-401 UART (PnP setup - 0x300), -1 = disable
+ fm_port - FM port # for CS4236 chip (PnP setup - 0x388), -1 = disable
+ irq - IRQ # for CS4236 chip (5,7,9,11,12,15)
+ mpu_irq - IRQ # for MPU-401 UART (9,11,12,15)
+ dma1 - first DMA # for CS4236 chip (0,1,3)
+ dma2 - second DMA # for CS4236 chip (0,1,3), -1 = disable
+
+ This module supports multiple cards. This module does not support autoprobe
+ (if ISA PnP is not used) thus main port and control port must be
+ specified!!! Other ports are optional.
+
+ The power-management is supported.
+
+ This module is aliased as snd-cs4232 since it provides the old
+ snd-cs4232 functionality, too.
+
+ Module snd-cs4281
+ -----------------
+
+ Module for Cirrus Logic CS4281 soundchip.
+
+ dual_codec - Secondary codec ID (0 = disable, default)
+
+ This module supports multiple cards.
+
+ The power-management is supported.
+
+ Module snd-cs46xx
+ -----------------
+
+ Module for PCI sound cards based on CS4610/CS4612/CS4614/CS4615/CS4622/
+ CS4624/CS4630/CS4280 PCI chips.
+
+ external_amp - Force to enable external amplifier.
+ thinkpad - Force to enable Thinkpad's CLKRUN control.
+ mmap_valid - Support OSS mmap mode (default = 0).
+
+ This module supports multiple cards and autoprobe.
+ Usually external amp and CLKRUN controls are detected automatically
+ from PCI sub vendor/device ids. If they don't work, give the options
+ above explicitly.
+
+ The power-management is supported.
+
+ Module snd-cs5530
+ _________________
+
+ Module for Cyrix/NatSemi Geode 5530 chip.
+
+ Module snd-cs5535audio
+ ----------------------
+
+ Module for multifunction CS5535 companion PCI device
+
+ The power-management is supported.
+
+ Module snd-ctxfi
+ ----------------
+
+ Module for Creative Sound Blaster X-Fi boards (20k1 / 20k2 chips)
+ * Creative Sound Blaster X-Fi Titanium Fatal1ty Champion Series
+ * Creative Sound Blaster X-Fi Titanium Fatal1ty Professional Series
+ * Creative Sound Blaster X-Fi Titanium Professional Audio
+ * Creative Sound Blaster X-Fi Titanium
+ * Creative Sound Blaster X-Fi Elite Pro
+ * Creative Sound Blaster X-Fi Platinum
+ * Creative Sound Blaster X-Fi Fatal1ty
+ * Creative Sound Blaster X-Fi XtremeGamer
+ * Creative Sound Blaster X-Fi XtremeMusic
+
+ reference_rate - reference sample rate, 44100 or 48000 (default)
+ multiple - multiple to ref. sample rate, 1 or 2 (default)
+ subsystem - override the PCI SSID for probing; the value
+ consists of SSVID << 16 | SSDID. The default is
+ zero, which means no override.
+
+ This module supports multiple cards.
+
+ Module snd-darla20
+ ------------------
+
+ Module for Echoaudio Darla20
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-darla24
+ ------------------
+
+ Module for Echoaudio Darla24
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-dt019x
+ -----------------
+
+ Module for Diamond Technologies DT-019X / Avance Logic ALS-007 (PnP
+ only)
+
+ This module supports multiple cards. This module is enabled only with
+ ISA PnP support.
+
+ The power-management is supported.
+
+ Module snd-dummy
+ ----------------
+
+ Module for the dummy sound card. This "card" doesn't do any output
+ or input, but you may use this module for any application which
+ requires a sound card (like RealPlayer).
+
+ pcm_devs - Number of PCM devices assigned to each card
+ (default = 1, up to 4)
+ pcm_substreams - Number of PCM substreams assigned to each PCM
+ (default = 8, up to 128)
+ hrtimer - Use hrtimer (=1, default) or system timer (=0)
+ fake_buffer - Fake buffer allocations (default = 1)
+
+ When multiple PCM devices are created, snd-dummy gives different
+ behavior to each PCM device:
+ 0 = interleaved with mmap support
+ 1 = non-interleaved with mmap support
+ 2 = interleaved without mmap
+ 3 = non-interleaved without mmap
+
+ As default, snd-dummy drivers doesn't allocate the real buffers
+ but either ignores read/write or mmap a single dummy page to all
+ buffer pages, in order to save the resources. If your apps need
+ the read/ written buffer data to be consistent, pass fake_buffer=0
+ option.
+
+ The power-management is supported.
+
+ Module snd-echo3g
+ -----------------
+
+ Module for Echoaudio 3G cards (Gina3G/Layla3G)
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-emu10k1
+ ------------------
+
+ Module for EMU10K1/EMU10k2 based PCI sound cards.
+ * Sound Blaster Live!
+ * Sound Blaster PCI 512
+ * Emu APS (partially supported)
+ * Sound Blaster Audigy
+
+ extin - bitmap of available external inputs for FX8010 (see bellow)
+ extout - bitmap of available external outputs for FX8010 (see bellow)
+ seq_ports - allocated sequencer ports (4 by default)
+ max_synth_voices - limit of voices used for wavetable (64 by default)
+ max_buffer_size - specifies the maximum size of wavetable/pcm buffers
+ given in MB unit. Default value is 128.
+ enable_ir - enable IR
+
+ This module supports multiple cards and autoprobe.
+
+ Input & Output configurations [extin/extout]
+ * Creative Card wo/Digital out [0x0003/0x1f03]
+ * Creative Card w/Digital out [0x0003/0x1f0f]
+ * Creative Card w/Digital CD in [0x000f/0x1f0f]
+ * Creative Card wo/Digital out + LiveDrive [0x3fc3/0x1fc3]
+ * Creative Card w/Digital out + LiveDrive [0x3fc3/0x1fcf]
+ * Creative Card w/Digital CD in + LiveDrive [0x3fcf/0x1fcf]
+ * Creative Card wo/Digital out + Digital I/O 2 [0x0fc3/0x1f0f]
+ * Creative Card w/Digital out + Digital I/O 2 [0x0fc3/0x1f0f]
+ * Creative Card w/Digital CD in + Digital I/O 2 [0x0fcf/0x1f0f]
+ * Creative Card 5.1/w Digital out + LiveDrive [0x3fc3/0x1fff]
+ * Creative Card 5.1 (c) 2003 [0x3fc3/0x7cff]
+ * Creative Card all ins and outs [0x3fff/0x7fff]
+
+ The power-management is supported.
+
+ Module snd-emu10k1x
+ -------------------
+
+ Module for Creative Emu10k1X (SB Live Dell OEM version)
+
+ This module supports multiple cards.
+
+ Module snd-ens1370
+ ------------------
+
+ Module for Ensoniq AudioPCI ES1370 PCI sound cards.
+ * SoundBlaster PCI 64
+ * SoundBlaster PCI 128
+
+ joystick - Enable joystick (default off)
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-ens1371
+ ------------------
+
+ Module for Ensoniq AudioPCI ES1371 PCI sound cards.
+ * SoundBlaster PCI 64
+ * SoundBlaster PCI 128
+ * SoundBlaster Vibra PCI
+
+ joystick_port - port # for joystick (0x200,0x208,0x210,0x218),
+ 0 = disable (default), 1 = auto-detect
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-es1688
+ -----------------
+
+ Module for ESS AudioDrive ES-1688 and ES-688 sound cards.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+ mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable (default)
+ mpu_irq - IRQ # for MPU-401 port (5,7,9,10)
+ fm_port - port # for OPL3 (option; share the same port as default)
+
+ with isapnp=0, the following additional options are available:
+ port - port # for ES-1688 chip (0x220,0x240,0x260)
+ irq - IRQ # for ES-1688 chip (5,7,9,10)
+ dma8 - DMA # for ES-1688 chip (0,1,3)
+
+ This module supports multiple cards and autoprobe (without MPU-401 port)
+ and PnP with the ES968 chip.
+
+ Module snd-es18xx
+ -----------------
+
+ Module for ESS AudioDrive ES-18xx sound cards.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for ES-18xx chip (0x220,0x240,0x260)
+ mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable (default)
+ fm_port - port # for FM (optional, not used)
+ irq - IRQ # for ES-18xx chip (5,7,9,10)
+ dma1 - first DMA # for ES-18xx chip (0,1,3)
+ dma2 - first DMA # for ES-18xx chip (0,1,3)
+
+ This module supports multiple cards, ISA PnP and autoprobe (without MPU-401
+ port if native ISA PnP routines are not used).
+ When dma2 is equal with dma1, the driver works as half-duplex.
+
+ The power-management is supported.
+
+ Module snd-es1938
+ -----------------
+
+ Module for sound cards based on ESS Solo-1 (ES1938,ES1946) chips.
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-es1968
+ -----------------
+
+ Module for sound cards based on ESS Maestro-1/2/2E (ES1968/ES1978) chips.
+
+ total_bufsize - total buffer size in kB (1-4096kB)
+ pcm_substreams_p - playback channels (1-8, default=2)
+ pcm_substreams_c - capture channels (1-8, default=0)
+ clock - clock (0 = auto-detection)
+ use_pm - support the power-management (0 = off, 1 = on,
+ 2 = auto (default))
+ enable_mpu - enable MPU401 (0 = off, 1 = on, 2 = auto (default))
+ joystick - enable joystick (default off)
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-fm801
+ ----------------
+
+ Module for ForteMedia FM801 based PCI sound cards.
+
+ tea575x_tuner - Enable TEA575x tuner
+ - 1 = MediaForte 256-PCS
+ - 2 = MediaForte 256-PCPR
+ - 3 = MediaForte 64-PCR
+ - High 16-bits are video (radio) device number + 1
+ - example: 0x10002 (MediaForte 256-PCPR, device 1)
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-gina20
+ -----------------
+
+ Module for Echoaudio Gina20
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-gina24
+ -----------------
+
+ Module for Echoaudio Gina24
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-gusclassic
+ ---------------------
+
+ Module for Gravis UltraSound Classic sound card.
+
+ port - port # for GF1 chip (0x220,0x230,0x240,0x250,0x260)
+ irq - IRQ # for GF1 chip (3,5,9,11,12,15)
+ dma1 - DMA # for GF1 chip (1,3,5,6,7)
+ dma2 - DMA # for GF1 chip (1,3,5,6,7,-1=disable)
+ joystick_dac - 0 to 31, (0.59V-4.52V or 0.389V-2.98V)
+ voices - GF1 voices limit (14-32)
+ pcm_voices - reserved PCM voices
+
+ This module supports multiple cards and autoprobe.
+
+ Module snd-gusextreme
+ ---------------------
+
+ Module for Gravis UltraSound Extreme (Synergy ViperMax) sound card.
+
+ port - port # for ES-1688 chip (0x220,0x230,0x240,0x250,0x260)
+ gf1_port - port # for GF1 chip (0x210,0x220,0x230,0x240,0x250,0x260,0x270)
+ mpu_port - port # for MPU-401 port (0x300,0x310,0x320,0x330), -1 = disable
+ irq - IRQ # for ES-1688 chip (5,7,9,10)
+ gf1_irq - IRQ # for GF1 chip (3,5,9,11,12,15)
+ mpu_irq - IRQ # for MPU-401 port (5,7,9,10)
+ dma8 - DMA # for ES-1688 chip (0,1,3)
+ dma1 - DMA # for GF1 chip (1,3,5,6,7)
+ joystick_dac - 0 to 31, (0.59V-4.52V or 0.389V-2.98V)
+ voices - GF1 voices limit (14-32)
+ pcm_voices - reserved PCM voices
+
+ This module supports multiple cards and autoprobe (without MPU-401 port).
+
+ Module snd-gusmax
+ -----------------
+
+ Module for Gravis UltraSound MAX sound card.
+
+ port - port # for GF1 chip (0x220,0x230,0x240,0x250,0x260)
+ irq - IRQ # for GF1 chip (3,5,9,11,12,15)
+ dma1 - DMA # for GF1 chip (1,3,5,6,7)
+ dma2 - DMA # for GF1 chip (1,3,5,6,7,-1=disable)
+ joystick_dac - 0 to 31, (0.59V-4.52V or 0.389V-2.98V)
+ voices - GF1 voices limit (14-32)
+ pcm_voices - reserved PCM voices
+
+ This module supports multiple cards and autoprobe.
+
+ Module snd-hda-intel
+ --------------------
+
+ Module for Intel HD Audio (ICH6, ICH6M, ESB2, ICH7, ICH8, ICH9, ICH10,
+ PCH, SCH),
+ ATI SB450, SB600, R600, RS600, RS690, RS780, RV610, RV620,
+ RV630, RV635, RV670, RV770,
+ VIA VT8251/VT8237A,
+ SIS966, ULI M5461
+
+ [Multiple options for each card instance]
+ model - force the model name
+ position_fix - Fix DMA pointer
+ -1 = system default: choose appropriate one per controller
+ hardware
+ 0 = auto: falls back to LPIB when POSBUF doesn't work
+ 1 = use LPIB
+ 2 = POSBUF: use position buffer
+ 3 = VIACOMBO: VIA-specific workaround for capture
+ 4 = COMBO: use LPIB for playback, auto for capture stream
+ probe_mask - Bitmask to probe codecs (default = -1, meaning all slots)
+ When the bit 8 (0x100) is set, the lower 8 bits are used
+ as the "fixed" codec slots; i.e. the driver probes the
+ slots regardless what hardware reports back
+ probe_only - Only probing and no codec initialization (default=off);
+ Useful to check the initial codec status for debugging
+ bdl_pos_adj - Specifies the DMA IRQ timing delay in samples.
+ Passing -1 will make the driver to choose the appropriate
+ value based on the controller chip.
+ patch - Specifies the early "patch" files to modify the HD-audio
+ setup before initializing the codecs. This option is
+ available only when CONFIG_SND_HDA_PATCH_LOADER=y is set.
+ See HD-Audio.txt for details.
+ beep_mode - Selects the beep registration mode (0=off, 1=on); default
+ value is set via CONFIG_SND_HDA_INPUT_BEEP_MODE kconfig.
+
+ [Single (global) options]
+ single_cmd - Use single immediate commands to communicate with
+ codecs (for debugging only)
+ enable_msi - Enable Message Signaled Interrupt (MSI) (default = off)
+ power_save - Automatic power-saving timeout (in second, 0 =
+ disable)
+ power_save_controller - Reset HD-audio controller in power-saving mode
+ (default = on)
+ align_buffer_size - Force rounding of buffer/period sizes to multiples
+ of 128 bytes. This is more efficient in terms of memory
+ access but isn't required by the HDA spec and prevents
+ users from specifying exact period/buffer sizes.
+ (default = on)
+ snoop - Enable/disable snooping (default = on)
+
+ This module supports multiple cards and autoprobe.
+
+ See Documentation/sound/alsa/HD-Audio.txt for more details about
+ HD-audio driver.
+
+ Each codec may have a model table for different configurations.
+ If your machine isn't listed there, the default (usually minimal)
+ configuration is set up. You can pass "model=<name>" option to
+ specify a certain model in such a case. There are different
+ models depending on the codec chip. The list of available models
+ is found in HD-Audio-Models.txt
+
+ The model name "generic" is treated as a special case. When this
+ model is given, the driver uses the generic codec parser without
+ "codec-patch". It's sometimes good for testing and debugging.
+
+ If the default configuration doesn't work and one of the above
+ matches with your device, report it together with alsa-info.sh
+ output (with --no-upload option) to kernel bugzilla or alsa-devel
+ ML (see the section "Links and Addresses").
+
+ power_save and power_save_controller options are for power-saving
+ mode. See powersave.txt for details.
+
+ Note 2: If you get click noises on output, try the module option
+ position_fix=1 or 2. position_fix=1 will use the SD_LPIB
+ register value without FIFO size correction as the current
+ DMA pointer. position_fix=2 will make the driver to use
+ the position buffer instead of reading SD_LPIB register.
+ (Usually SD_LPIB register is more accurate than the
+ position buffer.)
+
+ position_fix=3 is specific to VIA devices. The position
+ of the capture stream is checked from both LPIB and POSBUF
+ values. position_fix=4 is a combination mode, using LPIB
+ for playback and POSBUF for capture.
+
+ NB: If you get many "azx_get_response timeout" messages at
+ loading, it's likely a problem of interrupts (e.g. ACPI irq
+ routing). Try to boot with options like "pci=noacpi". Also, you
+ can try "single_cmd=1" module option. This will switch the
+ communication method between HDA controller and codecs to the
+ single immediate commands instead of CORB/RIRB. Basically, the
+ single command mode is provided only for BIOS, and you won't get
+ unsolicited events, too. But, at least, this works independently
+ from the irq. Remember this is a last resort, and should be
+ avoided as much as possible...
+
+ MORE NOTES ON "azx_get_response timeout" PROBLEMS:
+ On some hardware, you may need to add a proper probe_mask option
+ to avoid the "azx_get_response timeout" problem above, instead.
+ This occurs when the access to non-existing or non-working codec slot
+ (likely a modem one) causes a stall of the communication via HD-audio
+ bus. You can see which codec slots are probed by enabling
+ CONFIG_SND_DEBUG_VERBOSE, or simply from the file name of the codec
+ proc files. Then limit the slots to probe by probe_mask option.
+ For example, probe_mask=1 means to probe only the first slot, and
+ probe_mask=4 means only the third slot.
+
+ The power-management is supported.
+
+ Module snd-hdsp
+ ---------------
+
+ Module for RME Hammerfall DSP audio interface(s)
+
+ This module supports multiple cards.
+
+ Note: The firmware data can be automatically loaded via hotplug
+ when CONFIG_FW_LOADER is set. Otherwise, you need to load
+ the firmware via hdsploader utility included in alsa-tools
+ package.
+ The firmware data is found in alsa-firmware package.
+
+ Note: snd-page-alloc module does the job which snd-hammerfall-mem
+ module did formerly. It will allocate the buffers in advance
+ when any HDSP cards are found. To make the buffer
+ allocation sure, load snd-page-alloc module in the early
+ stage of boot sequence. See "Early Buffer Allocation"
+ section.
+
+ Module snd-hdspm
+ ----------------
+
+ Module for RME HDSP MADI board.
+
+ precise_ptr - Enable precise pointer, or disable.
+ line_outs_monitor - Send playback streams to analog outs by default.
+ enable_monitor - Enable Analog Out on Channel 63/64 by default.
+
+ See hdspm.txt for details.
+
+ Module snd-ice1712
+ ------------------
+
+ Module for Envy24 (ICE1712) based PCI sound cards.
+ * MidiMan M Audio Delta 1010
+ * MidiMan M Audio Delta 1010LT
+ * MidiMan M Audio Delta DiO 2496
+ * MidiMan M Audio Delta 66
+ * MidiMan M Audio Delta 44
+ * MidiMan M Audio Delta 410
+ * MidiMan M Audio Audiophile 2496
+ * TerraTec EWS 88MT
+ * TerraTec EWS 88D
+ * TerraTec EWX 24/96
+ * TerraTec DMX 6Fire
+ * TerraTec Phase 88
+ * Hoontech SoundTrack DSP 24
+ * Hoontech SoundTrack DSP 24 Value
+ * Hoontech SoundTrack DSP 24 Media 7.1
+ * Event Electronics, EZ8
+ * Digigram VX442
+ * Lionstracs, Mediastaton
+ * Terrasoniq TS 88
+
+ model - Use the given board model, one of the following:
+ delta1010, dio2496, delta66, delta44, audiophile, delta410,
+ delta1010lt, vx442, ewx2496, ews88mt, ews88mt_new, ews88d,
+ dmx6fire, dsp24, dsp24_value, dsp24_71, ez8,
+ phase88, mediastation
+ omni - Omni I/O support for MidiMan M-Audio Delta44/66
+ cs8427_timeout - reset timeout for the CS8427 chip (S/PDIF transceiver)
+ in msec resolution, default value is 500 (0.5 sec)
+
+ This module supports multiple cards and autoprobe. Note: The consumer part
+ is not used with all Envy24 based cards (for example in the MidiMan Delta
+ serie).
+
+ Note: The supported board is detected by reading EEPROM or PCI
+ SSID (if EEPROM isn't available). You can override the
+ model by passing "model" module option in case that the
+ driver isn't configured properly or you want to try another
+ type for testing.
+
+ Module snd-ice1724
+ ------------------
+
+ Module for Envy24HT (VT/ICE1724), Envy24PT (VT1720) based PCI sound cards.
+ * MidiMan M Audio Revolution 5.1
+ * MidiMan M Audio Revolution 7.1
+ * MidiMan M Audio Audiophile 192
+ * AMP Ltd AUDIO2000
+ * TerraTec Aureon 5.1 Sky
+ * TerraTec Aureon 7.1 Space
+ * TerraTec Aureon 7.1 Universe
+ * TerraTec Phase 22
+ * TerraTec Phase 28
+ * AudioTrak Prodigy 7.1
+ * AudioTrak Prodigy 7.1 LT
+ * AudioTrak Prodigy 7.1 XT
+ * AudioTrak Prodigy 7.1 HIFI
+ * AudioTrak Prodigy 7.1 HD2
+ * AudioTrak Prodigy 192
+ * Pontis MS300
+ * Albatron K8X800 Pro II
+ * Chaintech ZNF3-150
+ * Chaintech ZNF3-250
+ * Chaintech 9CJS
+ * Chaintech AV-710
+ * Shuttle SN25P
+ * Onkyo SE-90PCI
+ * Onkyo SE-200PCI
+ * ESI Juli@
+ * ESI Maya44
+ * Hercules Fortissimo IV
+ * EGO-SYS WaveTerminal 192M
+
+ model - Use the given board model, one of the following:
+ revo51, revo71, amp2000, prodigy71, prodigy71lt,
+ prodigy71xt, prodigy71hifi, prodigyhd2, prodigy192,
+ juli, aureon51, aureon71, universe, ap192, k8x800,
+ phase22, phase28, ms300, av710, se200pci, se90pci,
+ fortissimo4, sn25p, WT192M, maya44
+
+ This module supports multiple cards and autoprobe.
+
+ Note: The supported board is detected by reading EEPROM or PCI
+ SSID (if EEPROM isn't available). You can override the
+ model by passing "model" module option in case that the
+ driver isn't configured properly or you want to try another
+ type for testing.
+
+ Module snd-indigo
+ -----------------
+
+ Module for Echoaudio Indigo
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-indigodj
+ -------------------
+
+ Module for Echoaudio Indigo DJ
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-indigoio
+ -------------------
+
+ Module for Echoaudio Indigo IO
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-intel8x0
+ -------------------
+
+ Module for AC'97 motherboards from Intel and compatibles.
+ * Intel i810/810E, i815, i820, i830, i84x, MX440
+ ICH5, ICH6, ICH7, 6300ESB, ESB2
+ * SiS 7012 (SiS 735)
+ * NVidia NForce, NForce2, NForce3, MCP04, CK804
+ CK8, CK8S, MCP501
+ * AMD AMD768, AMD8111
+ * ALi m5455
+
+ ac97_clock - AC'97 codec clock base (0 = auto-detect)
+ ac97_quirk - AC'97 workaround for strange hardware
+ See "AC97 Quirk Option" section below.
+ buggy_irq - Enable workaround for buggy interrupts on some
+ motherboards (default yes on nForce chips,
+ otherwise off)
+ buggy_semaphore - Enable workaround for hardware with buggy
+ semaphores (e.g. on some ASUS laptops)
+ (default off)
+ spdif_aclink - Use S/PDIF over AC-link instead of direct connection
+ from the controller chip
+ (0 = off, 1 = on, -1 = default)
+
+ This module supports one chip and autoprobe.
+
+ Note: the latest driver supports auto-detection of chip clock.
+ if you still encounter too fast playback, specify the clock
+ explicitly via the module option "ac97_clock=41194".
+
+ Joystick/MIDI ports are not supported by this driver. If your
+ motherboard has these devices, use the ns558 or snd-mpu401
+ modules, respectively.
+
+ The power-management is supported.
+
+ Module snd-intel8x0m
+ --------------------
+
+ Module for Intel ICH (i8x0) chipset MC97 modems.
+ * Intel i810/810E, i815, i820, i830, i84x, MX440
+ ICH5, ICH6, ICH7
+ * SiS 7013 (SiS 735)
+ * NVidia NForce, NForce2, NForce2s, NForce3
+ * AMD AMD8111
+ * ALi m5455
+
+ ac97_clock - AC'97 codec clock base (0 = auto-detect)
+
+ This module supports one card and autoprobe.
+
+ Note: The default index value of this module is -2, i.e. the first
+ slot is excluded.
+
+ The power-management is supported.
+
+ Module snd-interwave
+ --------------------
+
+ Module for Gravis UltraSound PnP, Dynasonic 3-D/Pro, STB Sound Rage 32
+ and other sound cards based on AMD InterWave (tm) chip.
+
+ joystick_dac - 0 to 31, (0.59V-4.52V or 0.389V-2.98V)
+ midi - 1 = MIDI UART enable, 0 = MIDI UART disable (default)
+ pcm_voices - reserved PCM voices for the synthesizer (default 2)
+ effect - 1 = InterWave effects enable (default 0);
+ requires 8 voices
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for InterWave chip (0x210,0x220,0x230,0x240,0x250,0x260)
+ irq - IRQ # for InterWave chip (3,5,9,11,12,15)
+ dma1 - DMA # for InterWave chip (0,1,3,5,6,7)
+ dma2 - DMA # for InterWave chip (0,1,3,5,6,7,-1=disable)
+
+ This module supports multiple cards, autoprobe and ISA PnP.
+
+ Module snd-interwave-stb
+ ------------------------
+
+ Module for UltraSound 32-Pro (sound card from STB used by Compaq)
+ and other sound cards based on AMD InterWave (tm) chip with TEA6330T
+ circuit for extended control of bass, treble and master volume.
+
+ joystick_dac - 0 to 31, (0.59V-4.52V or 0.389V-2.98V)
+ midi - 1 = MIDI UART enable, 0 = MIDI UART disable (default)
+ pcm_voices - reserved PCM voices for the synthesizer (default 2)
+ effect - 1 = InterWave effects enable (default 0);
+ requires 8 voices
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for InterWave chip (0x210,0x220,0x230,0x240,0x250,0x260)
+ port_tc - tone control (i2c bus) port # for TEA6330T chip (0x350,0x360,0x370,0x380)
+ irq - IRQ # for InterWave chip (3,5,9,11,12,15)
+ dma1 - DMA # for InterWave chip (0,1,3,5,6,7)
+ dma2 - DMA # for InterWave chip (0,1,3,5,6,7,-1=disable)
+
+ This module supports multiple cards, autoprobe and ISA PnP.
+
+ Module snd-jazz16
+ -------------------
+
+ Module for Media Vision Jazz16 chipset. The chipset consists of 3 chips:
+ MVD1216 + MVA416 + MVA514.
+
+ port - port # for SB DSP chip (0x210,0x220,0x230,0x240,0x250,0x260)
+ irq - IRQ # for SB DSP chip (3,5,7,9,10,15)
+ dma8 - DMA # for SB DSP chip (1,3)
+ dma16 - DMA # for SB DSP chip (5,7)
+ mpu_port - MPU-401 port # (0x300,0x310,0x320,0x330)
+ mpu_irq - MPU-401 irq # (2,3,5,7)
+
+ This module supports multiple cards.
+
+ Module snd-korg1212
+ -------------------
+
+ Module for Korg 1212 IO PCI card
+
+ This module supports multiple cards.
+
+ Module snd-layla20
+ ------------------
+
+ Module for Echoaudio Layla20
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-layla24
+ ------------------
+
+ Module for Echoaudio Layla24
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-lola
+ ---------------
+
+ Module for Digigram Lola PCI-e boards
+
+ This module supports multiple cards.
+
+ Module snd-lx6464es
+ -------------------
+
+ Module for Digigram LX6464ES boards
+
+ This module supports multiple cards.
+
+ Module snd-maestro3
+ -------------------
+
+ Module for Allegro/Maestro3 chips
+
+ external_amp - enable external amp (enabled by default)
+ amp_gpio - GPIO pin number for external amp (0-15) or
+ -1 for default pin (8 for allegro, 1 for
+ others)
+
+ This module supports autoprobe and multiple chips.
+
+ Note: the binding of amplifier is dependent on hardware.
+ If there is no sound even though all channels are unmuted, try to
+ specify other gpio connection via amp_gpio option.
+ For example, a Panasonic notebook might need "amp_gpio=0x0d"
+ option.
+
+ The power-management is supported.
+
+ Module snd-mia
+ ---------------
+
+ Module for Echoaudio Mia
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-miro
+ ---------------
+
+ Module for Miro soundcards: miroSOUND PCM 1 pro,
+ miroSOUND PCM 12,
+ miroSOUND PCM 20 Radio.
+
+ port - Port # (0x530,0x604,0xe80,0xf40)
+ irq - IRQ # (5,7,9,10,11)
+ dma1 - 1st dma # (0,1,3)
+ dma2 - 2nd dma # (0,1)
+ mpu_port - MPU-401 port # (0x300,0x310,0x320,0x330)
+ mpu_irq - MPU-401 irq # (5,7,9,10)
+ fm_port - FM Port # (0x388)
+ wss - enable WSS mode
+ ide - enable onboard ide support
+
+ Module snd-mixart
+ -----------------
+
+ Module for Digigram miXart8 sound cards.
+
+ This module supports multiple cards.
+ Note: One miXart8 board will be represented as 4 alsa cards.
+ See MIXART.txt for details.
+
+ When the driver is compiled as a module and the hotplug firmware
+ is supported, the firmware data is loaded via hotplug automatically.
+ Install the necessary firmware files in alsa-firmware package.
+ When no hotplug fw loader is available, you need to load the
+ firmware via mixartloader utility in alsa-tools package.
+
+ Module snd-mona
+ ---------------
+
+ Module for Echoaudio Mona
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-mpu401
+ -----------------
+
+ Module for MPU-401 UART devices.
+
+ port - port number or -1 (disable)
+ irq - IRQ number or -1 (disable)
+ pnp - PnP detection - 0 = disable, 1 = enable (default)
+
+ This module supports multiple devices and PnP.
+
+ Module snd-msnd-classic
+ -----------------------
+
+ Module for Turtle Beach MultiSound Classic, Tahiti or Monterey
+ soundcards.
+
+ io - Port # for msnd-classic card
+ irq - IRQ # for msnd-classic card
+ mem - Memory address (0xb0000, 0xc8000, 0xd0000, 0xd8000,
+ 0xe0000 or 0xe8000)
+ write_ndelay - enable write ndelay (default = 1)
+ calibrate_signal - calibrate signal (default = 0)
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+ digital - Digital daughterboard present (default = 0)
+ cfg - Config port (0x250, 0x260 or 0x270) default = PnP
+ reset - Reset all devices
+ mpu_io - MPU401 I/O port
+ mpu_irq - MPU401 irq#
+ ide_io0 - IDE port #0
+ ide_io1 - IDE port #1
+ ide_irq - IDE irq#
+ joystick_io - Joystick I/O port
+
+ The driver requires firmware files "/*(DEBLOBBED)*/" and
+ "/*(DEBLOBBED)*/" in the proper firmware directory.
+
+ See Documentation/sound/oss/MultiSound for important information
+ about this driver. Note that it has been discontinued, but the
+ Voyetra Turtle Beach knowledge base entry for it is still available
+ at
+ http://www.turtlebeach.com
+
+ Module snd-msnd-pinnacle
+ ------------------------
+
+ Module for Turtle Beach MultiSound Pinnacle/Fiji soundcards.
+
+ io - Port # for pinnacle/fiji card
+ irq - IRQ # for pinnalce/fiji card
+ mem - Memory address (0xb0000, 0xc8000, 0xd0000, 0xd8000,
+ 0xe0000 or 0xe8000)
+ write_ndelay - enable write ndelay (default = 1)
+ calibrate_signal - calibrate signal (default = 0)
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ The driver requires firmware files "/*(DEBLOBBED)*/" and
+ "/*(DEBLOBBED)*/" in the proper firmware directory.
+
+ Module snd-mtpav
+ ----------------
+
+ Module for MOTU MidiTimePiece AV multiport MIDI (on the parallel
+ port).
+
+ port - I/O port # for MTPAV (0x378,0x278, default=0x378)
+ irq - IRQ # for MTPAV (7,5, default=7)
+ hwports - number of supported hardware ports, default=8.
+
+ Module supports only 1 card. This module has no enable option.
+
+ Module snd-mts64
+ ----------------
+
+ Module for Ego Systems (ESI) Miditerminal 4140
+
+ This module supports multiple devices.
+ Requires parport (CONFIG_PARPORT).
+
+ Module snd-nm256
+ ----------------
+
+ Module for NeoMagic NM256AV/ZX chips
+
+ playback_bufsize - max playback frame size in kB (4-128kB)
+ capture_bufsize - max capture frame size in kB (4-128kB)
+ force_ac97 - 0 or 1 (disabled by default)
+ buffer_top - specify buffer top address
+ use_cache - 0 or 1 (disabled by default)
+ vaio_hack - alias buffer_top=0x25a800
+ reset_workaround - enable AC97 RESET workaround for some laptops
+ reset_workaround2 - enable extended AC97 RESET workaround for some
+ other laptops
+
+ This module supports one chip and autoprobe.
+
+ The power-management is supported.
+
+ Note: on some notebooks the buffer address cannot be detected
+ automatically, or causes hang-up during initialization.
+ In such a case, specify the buffer top address explicitly via
+ the buffer_top option.
+ For example,
+ Sony F250: buffer_top=0x25a800
+ Sony F270: buffer_top=0x272800
+ The driver supports only ac97 codec. It's possible to force
+ to initialize/use ac97 although it's not detected. In such a
+ case, use force_ac97=1 option - but *NO* guarantee whether it
+ works!
+
+ Note: The NM256 chip can be linked internally with non-AC97
+ codecs. This driver supports only the AC97 codec, and won't work
+ with machines with other (most likely CS423x or OPL3SAx) chips,
+ even though the device is detected in lspci. In such a case, try
+ other drivers, e.g. snd-cs4232 or snd-opl3sa2. Some has ISA-PnP
+ but some doesn't have ISA PnP. You'll need to specify isapnp=0
+ and proper hardware parameters in the case without ISA PnP.
+
+ Note: some laptops need a workaround for AC97 RESET. For the
+ known hardware like Dell Latitude LS and Sony PCG-F305, this
+ workaround is enabled automatically. For other laptops with a
+ hard freeze, you can try reset_workaround=1 option.
+
+ Note: Dell Latitude CSx laptops have another problem regarding
+ AC97 RESET. On these laptops, reset_workaround2 option is
+ turned on as default. This option is worth to try if the
+ previous reset_workaround option doesn't help.
+
+ Note: This driver is really crappy. It's a porting from the
+ OSS driver, which is a result of black-magic reverse engineering.
+ The detection of codec will fail if the driver is loaded *after*
+ X-server as described above. You might be able to force to load
+ the module, but it may result in hang-up. Hence, make sure that
+ you load this module *before* X if you encounter this kind of
+ problem.
+
+ Module snd-opl3sa2
+ ------------------
+
+ Module for Yamaha OPL3-SA2/SA3 sound cards.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - control port # for OPL3-SA chip (0x370)
+ sb_port - SB port # for OPL3-SA chip (0x220,0x240)
+ wss_port - WSS port # for OPL3-SA chip (0x530,0xe80,0xf40,0x604)
+ midi_port - port # for MPU-401 UART (0x300,0x330), -1 = disable
+ fm_port - FM port # for OPL3-SA chip (0x388), -1 = disable
+ irq - IRQ # for OPL3-SA chip (5,7,9,10)
+ dma1 - first DMA # for Yamaha OPL3-SA chip (0,1,3)
+ dma2 - second DMA # for Yamaha OPL3-SA chip (0,1,3), -1 = disable
+
+ This module supports multiple cards and ISA PnP. It does not support
+ autoprobe (if ISA PnP is not used) thus all ports must be specified!!!
+
+ The power-management is supported.
+
+ Module snd-opti92x-ad1848
+ -------------------------
+
+ Module for sound cards based on OPTi 82c92x and Analog Devices AD1848 chips.
+ Module works with OAK Mozart cards as well.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for WSS chip (0x530,0xe80,0xf40,0x604)
+ mpu_port - port # for MPU-401 UART (0x300,0x310,0x320,0x330)
+ fm_port - port # for OPL3 device (0x388)
+ irq - IRQ # for WSS chip (5,7,9,10,11)
+ mpu_irq - IRQ # for MPU-401 UART (5,7,9,10)
+ dma1 - first DMA # for WSS chip (0,1,3)
+
+ This module supports only one card, autoprobe and PnP.
+
+ Module snd-opti92x-cs4231
+ -------------------------
+
+ Module for sound cards based on OPTi 82c92x and Crystal CS4231 chips.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for WSS chip (0x530,0xe80,0xf40,0x604)
+ mpu_port - port # for MPU-401 UART (0x300,0x310,0x320,0x330)
+ fm_port - port # for OPL3 device (0x388)
+ irq - IRQ # for WSS chip (5,7,9,10,11)
+ mpu_irq - IRQ # for MPU-401 UART (5,7,9,10)
+ dma1 - first DMA # for WSS chip (0,1,3)
+ dma2 - second DMA # for WSS chip (0,1,3)
+
+ This module supports only one card, autoprobe and PnP.
+
+ Module snd-opti93x
+ ------------------
+
+ Module for sound cards based on OPTi 82c93x chips.
+
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for WSS chip (0x530,0xe80,0xf40,0x604)
+ mpu_port - port # for MPU-401 UART (0x300,0x310,0x320,0x330)
+ fm_port - port # for OPL3 device (0x388)
+ irq - IRQ # for WSS chip (5,7,9,10,11)
+ mpu_irq - IRQ # for MPU-401 UART (5,7,9,10)
+ dma1 - first DMA # for WSS chip (0,1,3)
+ dma2 - second DMA # for WSS chip (0,1,3)
+
+ This module supports only one card, autoprobe and PnP.
+
+ Module snd-oxygen
+ -----------------
+
+ Module for sound cards based on the C-Media CMI8786/8787/8788 chip:
+ * Asound A-8788
+ * Asus Xonar DG/DGX
+ * AuzenTech X-Meridian
+ * AuzenTech X-Meridian 2G
+ * Bgears b-Enspirer
+ * Club3D Theatron DTS
+ * HT-Omega Claro (plus)
+ * HT-Omega Claro halo (XT)
+ * Kuroutoshikou CMI8787-HG2PCI
+ * Razer Barracuda AC-1
+ * Sondigo Inferno
+ * TempoTec HiFier Fantasia
+ * TempoTec HiFier Serenade
+
+ This module supports autoprobe and multiple cards.
+
+ Module snd-pcsp
+ -----------------
+
+ Module for internal PC-Speaker.
+
+ nopcm - Disable PC-Speaker PCM sound. Only beeps remain.
+ nforce_wa - enable NForce chipset workaround. Expect bad sound.
+
+ This module supports system beeps, some kind of PCM playback and
+ even a few mixer controls.
+
+ Module snd-pcxhr
+ ----------------
+
+ Module for Digigram PCXHR boards
+
+ This module supports multiple cards.
+
+ Module snd-portman2x4
+ ---------------------
+
+ Module for Midiman Portman 2x4 parallel port MIDI interface
+
+ This module supports multiple cards.
+
+ Module snd-powermac (on ppc only)
+ ---------------------------------
+
+ Module for PowerMac, iMac and iBook on-board soundchips
+
+ enable_beep - enable beep using PCM (enabled as default)
+
+ Module supports autoprobe a chip.
+
+ Note: the driver may have problems regarding endianness.
+
+ The power-management is supported.
+
+ Module snd-pxa2xx-ac97 (on arm only)
+ ------------------------------------
+
+ Module for AC97 driver for the Intel PXA2xx chip
+
+ For ARM architecture only.
+
+ The power-management is supported.
+
+ Module snd-riptide
+ ------------------
+
+ Module for Conexant Riptide chip
+
+ joystick_port - Joystick port # (default: 0x200)
+ mpu_port - MPU401 port # (default: 0x330)
+ opl3_port - OPL3 port # (default: 0x388)
+
+ This module supports multiple cards.
+ The driver requires the firmware loader support on kernel.
+ You need to install the firmware file "/*(DEBLOBBED)*/" to the standard
+ firmware path (e.g. /lib/firmware).
+
+ Module snd-rme32
+ ----------------
+
+ Module for RME Digi32, Digi32 Pro and Digi32/8 (Sek'd Prodif32,
+ Prodif96 and Prodif Gold) sound cards.
+
+ This module supports multiple cards.
+
+ Module snd-rme96
+ ----------------
+
+ Module for RME Digi96, Digi96/8 and Digi96/8 PRO/PAD/PST sound cards.
+
+ This module supports multiple cards.
+
+ Module snd-rme9652
+ ------------------
+
+ Module for RME Digi9652 (Hammerfall, Hammerfall-Light) sound cards.
+
+ precise_ptr - Enable precise pointer (doesn't work reliably).
+ (default = 0)
+
+ This module supports multiple cards.
+
+ Note: snd-page-alloc module does the job which snd-hammerfall-mem
+ module did formerly. It will allocate the buffers in advance
+ when any RME9652 cards are found. To make the buffer
+ allocation sure, load snd-page-alloc module in the early
+ stage of boot sequence. See "Early Buffer Allocation"
+ section.
+
+ Module snd-sa11xx-uda1341 (on arm only)
+ ---------------------------------------
+
+ Module for Philips UDA1341TS on Compaq iPAQ H3600 sound card.
+
+ Module supports only one card.
+ Module has no enable and index options.
+
+ The power-management is supported.
+
+ Module snd-sb8
+ --------------
+
+ Module for 8-bit SoundBlaster cards: SoundBlaster 1.0,
+ SoundBlaster 2.0,
+ SoundBlaster Pro
+
+ port - port # for SB DSP chip (0x220,0x240,0x260)
+ irq - IRQ # for SB DSP chip (5,7,9,10)
+ dma8 - DMA # for SB DSP chip (1,3)
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-sb16 and snd-sbawe
+ -----------------------------
+
+ Module for 16-bit SoundBlaster cards: SoundBlaster 16 (PnP),
+ SoundBlaster AWE 32 (PnP),
+ SoundBlaster AWE 64 PnP
+
+ mic_agc - Mic Auto-Gain-Control - 0 = disable, 1 = enable (default)
+ csp - ASP/CSP chip support - 0 = disable (default), 1 = enable
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ port - port # for SB DSP 4.x chip (0x220,0x240,0x260)
+ mpu_port - port # for MPU-401 UART (0x300,0x330), -1 = disable
+ awe_port - base port # for EMU8000 synthesizer (0x620,0x640,0x660)
+ (snd-sbawe module only)
+ irq - IRQ # for SB DSP 4.x chip (5,7,9,10)
+ dma8 - 8-bit DMA # for SB DSP 4.x chip (0,1,3)
+ dma16 - 16-bit DMA # for SB DSP 4.x chip (5,6,7)
+
+ This module supports multiple cards, autoprobe and ISA PnP.
+
+ Note: To use Vibra16X cards in 16-bit half duplex mode, you must
+ disable 16bit DMA with dma16 = -1 module parameter.
+ Also, all Sound Blaster 16 type cards can operate in 16-bit
+ half duplex mode through 8-bit DMA channel by disabling their
+ 16-bit DMA channel.
+
+ The power-management is supported.
+
+ Module snd-sc6000
+ -----------------
+
+ Module for Gallant SC-6000 soundcard and later models: SC-6600
+ and SC-7000.
+
+ port - Port # (0x220 or 0x240)
+ mss_port - MSS Port # (0x530 or 0xe80)
+ irq - IRQ # (5,7,9,10,11)
+ mpu_irq - MPU-401 IRQ # (5,7,9,10) ,0 - no MPU-401 irq
+ dma - DMA # (1,3,0)
+ joystick - Enable gameport - 0 = disable (default), 1 = enable
+
+ This module supports multiple cards.
+
+ This card is also known as Audio Excel DSP 16 or Zoltrix AV302.
+
+ Module snd-sscape
+ -----------------
+
+ Module for ENSONIQ SoundScape cards.
+
+ port - Port # (PnP setup)
+ wss_port - WSS Port # (PnP setup)
+ irq - IRQ # (PnP setup)
+ mpu_irq - MPU-401 IRQ # (PnP setup)
+ dma - DMA # (PnP setup)
+ dma2 - 2nd DMA # (PnP setup, -1 to disable)
+ joystick - Enable gameport - 0 = disable (default), 1 = enable
+
+ This module supports multiple cards.
+
+ The driver requires the firmware loader support on kernel.
+
+ Module snd-sun-amd7930 (on sparc only)
+ --------------------------------------
+
+ Module for AMD7930 sound chips found on Sparcs.
+
+ This module supports multiple cards.
+
+ Module snd-sun-cs4231 (on sparc only)
+ -------------------------------------
+
+ Module for CS4231 sound chips found on Sparcs.
+
+ This module supports multiple cards.
+
+ Module snd-sun-dbri (on sparc only)
+ -----------------------------------
+
+ Module for DBRI sound chips found on Sparcs.
+
+ This module supports multiple cards.
+
+ Module snd-wavefront
+ --------------------
+
+ Module for Turtle Beach Maui, Tropez and Tropez+ sound cards.
+
+ use_cs4232_midi - Use CS4232 MPU-401 interface
+ (inaccessibly located inside your computer)
+ isapnp - ISA PnP detection - 0 = disable, 1 = enable (default)
+
+ with isapnp=0, the following options are available:
+
+ cs4232_pcm_port - Port # for CS4232 PCM interface.
+ cs4232_pcm_irq - IRQ # for CS4232 PCM interface (5,7,9,11,12,15).
+ cs4232_mpu_port - Port # for CS4232 MPU-401 interface.
+ cs4232_mpu_irq - IRQ # for CS4232 MPU-401 interface (9,11,12,15).
+ ics2115_port - Port # for ICS2115
+ ics2115_irq - IRQ # for ICS2115
+ fm_port - FM OPL-3 Port #
+ dma1 - DMA1 # for CS4232 PCM interface.
+ dma2 - DMA2 # for CS4232 PCM interface.
+
+ The below are options for wavefront_synth features:
+ wf_raw - Assume that we need to boot the OS (default:no)
+ If yes, then during driver loading, the state of the board is
+ ignored, and we reset the board and load the firmware anyway.
+ fx_raw - Assume that the FX process needs help (default:yes)
+ If false, we'll leave the FX processor in whatever state it is
+ when the driver is loaded. The default is to download the
+ microprogram and associated coefficients to set it up for
+ "default" operation, whatever that means.
+ debug_default - Debug parameters for card initialization
+ wait_usecs - How long to wait without sleeping, usecs
+ (default:150)
+ This magic number seems to give pretty optimal throughput
+ based on my limited experimentation.
+ If you want to play around with it and find a better value, be
+ my guest. Remember, the idea is to get a number that causes us
+ to just busy wait for as many WaveFront commands as possible,
+ without coming up with a number so large that we hog the whole
+ CPU.
+ Specifically, with this number, out of about 134,000 status
+ waits, only about 250 result in a sleep.
+ sleep_interval - How long to sleep when waiting for reply
+ (default: 100)
+ sleep_tries - How many times to try sleeping during a wait
+ (default: 50)
+ /*(DEBLOBBED)*/
+ reset_time - How long to wait for a reset to take effect
+ (default:2)
+ ramcheck_time - How many seconds to wait for the RAM test
+ (default:20)
+ osrun_time - How many seconds to wait for the ICS2115 OS
+ (default:10)
+
+ This module supports multiple cards and ISA PnP.
+
+ /*(DEBLOBBED)*/.
+
+ Module snd-sonicvibes
+ ---------------------
+
+ Module for S3 SonicVibes PCI sound cards.
+ * PINE Schubert 32 PCI
+
+ reverb - Reverb Enable - 1 = enable, 0 = disable (default)
+ - SoundCard must have onboard SRAM for this.
+ mge - Mic Gain Enable - 1 = enable, 0 = disable (default)
+
+ This module supports multiple cards and autoprobe.
+
+ Module snd-serial-u16550
+ ------------------------
+
+ Module for UART16550A serial MIDI ports.
+
+ port - port # for UART16550A chip
+ irq - IRQ # for UART16550A chip, -1 = poll mode
+ speed - speed in bauds (9600,19200,38400,57600,115200)
+ 38400 = default
+ base - base for divisor in bauds (57600,115200,230400,460800)
+ 115200 = default
+ outs - number of MIDI ports in a serial port (1-4)
+ 1 = default
+ adaptor - Type of adaptor.
+ 0 = Soundcanvas, 1 = MS-124T, 2 = MS-124W S/A,
+ 3 = MS-124W M/B, 4 = Generic
+
+ This module supports multiple cards. This module does not support autoprobe
+ thus the main port must be specified!!! Other options are optional.
+
+ Module snd-trident
+ ------------------
+
+ Module for Trident 4DWave DX/NX sound cards.
+ * Best Union Miss Melody 4DWave PCI
+ * HIS 4DWave PCI
+ * Warpspeed ONSpeed 4DWave PCI
+ * AzTech PCI 64-Q3D
+ * Addonics SV 750
+ * CHIC True Sound 4Dwave
+ * Shark Predator4D-PCI
+ * Jaton SonicWave 4D
+ * SiS SI7018 PCI Audio
+ * Hoontech SoundTrack Digital 4DWave NX
+
+ pcm_channels - max channels (voices) reserved for PCM
+ wavetable_size - max wavetable size in kB (4-?kb)
+
+ This module supports multiple cards and autoprobe.
+
+ The power-management is supported.
+
+ Module snd-ua101
+ ----------------
+
+ Module for the Edirol UA-101/UA-1000 audio/MIDI interfaces.
+
+ This module supports multiple devices, autoprobe and hotplugging.
+
+ Module snd-usb-audio
+ --------------------
+
+ Module for USB audio and USB MIDI devices.
+
+ vid - Vendor ID for the device (optional)
+ pid - Product ID for the device (optional)
+ nrpacks - Max. number of packets per URB (default: 8)
+ device_setup - Device specific magic number (optional)
+ - Influence depends on the device
+ - Default: 0x0000
+ ignore_ctl_error - Ignore any USB-controller regarding mixer
+ interface (default: no)
+
+ This module supports multiple devices, autoprobe and hotplugging.
+
+ NB: nrpacks parameter can be modified dynamically via sysfs.
+ Don't put the value over 20. Changing via sysfs has no sanity
+ check.
+ NB: ignore_ctl_error=1 may help when you get an error at accessing
+ the mixer element such as URB error -22. This happens on some
+ buggy USB device or the controller.
+
+ Module snd-usb-caiaq
+ --------------------
+
+ Module for caiaq UB audio interfaces,
+ * Native Instruments RigKontrol2
+ * Native Instruments Kore Controller
+ * Native Instruments Audio Kontrol 1
+ * Native Instruments Audio 8 DJ
+
+ This module supports multiple devices, autoprobe and hotplugging.
+
+ Module snd-usb-usx2y
+ --------------------
+
+ Module for Tascam USB US-122, US-224 and US-428 devices.
+
+ This module supports multiple devices, autoprobe and hotplugging.
+
+ Note: you need to load the firmware via usx2yloader utility included
+ in alsa-tools and alsa-firmware packages.
+
+ Module snd-via82xx
+ ------------------
+
+ Module for AC'97 motherboards based on VIA 82C686A/686B, 8233,
+ 8233A, 8233C, 8235, 8237 (south) bridge.
+
+ mpu_port - 0x300,0x310,0x320,0x330, otherwise obtain BIOS setup
+ [VIA686A/686B only]
+ joystick - Enable joystick (default off) [VIA686A/686B only]
+ ac97_clock - AC'97 codec clock base (default 48000Hz)
+ dxs_support - support DXS channels,
+ 0 = auto (default), 1 = enable, 2 = disable,
+ 3 = 48k only, 4 = no VRA, 5 = enable any sample
+ rate and different sample rates on different
+ channels
+ [VIA8233/C, 8235, 8237 only]
+ ac97_quirk - AC'97 workaround for strange hardware
+ See "AC97 Quirk Option" section below.
+
+ This module supports one chip and autoprobe.
+
+ Note: on some SMP motherboards like MSI 694D the interrupts might
+ not be generated properly. In such a case, please try to
+ set the SMP (or MPS) version on BIOS to 1.1 instead of
+ default value 1.4. Then the interrupt number will be
+ assigned under 15. You might also upgrade your BIOS.
+
+ Note: VIA8233/5/7 (not VIA8233A) can support DXS (direct sound)
+ channels as the first PCM. On these channels, up to 4
+ streams can be played at the same time, and the controller
+ can perform sample rate conversion with separate rates for
+ each channel.
+ As default (dxs_support = 0), 48k fixed rate is chosen
+ except for the known devices since the output is often
+ noisy except for 48k on some mother boards due to the
+ bug of BIOS.
+ Please try once dxs_support=5 and if it works on other
+ sample rates (e.g. 44.1kHz of mp3 playback), please let us
+ know the PCI subsystem vendor/device id's (output of
+ "lspci -nv").
+ If dxs_support=5 does not work, try dxs_support=4; if it
+ doesn't work too, try dxs_support=1. (dxs_support=1 is
+ usually for old motherboards. The correct implemented
+ board should work with 4 or 5.) If it still doesn't
+ work and the default setting is ok, dxs_support=3 is the
+ right choice. If the default setting doesn't work at all,
+ try dxs_support=2 to disable the DXS channels.
+ In any cases, please let us know the result and the
+ subsystem vendor/device ids. See "Links and Addresses"
+ below.
+
+ Note: for the MPU401 on VIA823x, use snd-mpu401 driver
+ additionally. The mpu_port option is for VIA686 chips only.
+
+ The power-management is supported.
+
+ Module snd-via82xx-modem
+ ------------------------
+
+ Module for VIA82xx AC97 modem
+
+ ac97_clock - AC'97 codec clock base (default 48000Hz)
+
+ This module supports one card and autoprobe.
+
+ Note: The default index value of this module is -2, i.e. the first
+ slot is excluded.
+
+ The power-management is supported.
+
+ Module snd-virmidi
+ ------------------
+
+ Module for virtual rawmidi devices.
+ This module creates virtual rawmidi devices which communicate
+ to the corresponding ALSA sequencer ports.
+
+ midi_devs - MIDI devices # (1-4, default=4)
+
+ This module supports multiple cards.
+
+ Module snd-virtuoso
+ -------------------
+
+ Module for sound cards based on the Asus AV66/AV100/AV200 chips,
+ i.e., Xonar D1, DX, D2, D2X, DS, DSX, Essence ST (Deluxe),
+ Essence STX (II), HDAV1.3 (Deluxe), and HDAV1.3 Slim.
+
+ This module supports autoprobe and multiple cards.
+
+ Module snd-vx222
+ ----------------
+
+ Module for Digigram VX-Pocket VX222, V222 v2 and Mic cards.
+
+ mic - Enable Microphone on V222 Mic (NYI)
+ ibl - Capture IBL size. (default = 0, minimum size)
+
+ This module supports multiple cards.
+
+ When the driver is compiled as a module and the hotplug firmware
+ is supported, the firmware data is loaded via hotplug automatically.
+ Install the necessary firmware files in alsa-firmware package.
+ When no hotplug fw loader is available, you need to load the
+ firmware via vxloader utility in alsa-tools package. To invoke
+ vxloader automatically, add the following to /etc/modprobe.d/alsa.conf
+
+ install snd-vx222 /sbin/modprobe --first-time -i snd-vx222 && /usr/bin/vxloader
+
+ (for 2.2/2.4 kernels, add "post-install /usr/bin/vxloader" to
+ /etc/modules.conf, instead.)
+ IBL size defines the interrupts period for PCM. The smaller size
+ gives smaller latency but leads to more CPU consumption, too.
+ The size is usually aligned to 126. As default (=0), the smallest
+ size is chosen. The possible IBL values can be found in
+ /proc/asound/cardX/vx-status proc file.
+
+ The power-management is supported.
+
+ Module snd-vxpocket
+ -------------------
+
+ Module for Digigram VX-Pocket VX2 and 440 PCMCIA cards.
+
+ ibl - Capture IBL size. (default = 0, minimum size)
+
+ This module supports multiple cards. The module is compiled only when
+ PCMCIA is supported on kernel.
+
+ With the older 2.6.x kernel, to activate the driver via the card
+ manager, you'll need to set up /etc/pcmcia/vxpocket.conf. See the
+ sound/pcmcia/vx/vxpocket.c. 2.6.13 or later kernel requires no
+ longer require a config file.
+
+ When the driver is compiled as a module and the hotplug firmware
+ is supported, the firmware data is loaded via hotplug automatically.
+ Install the necessary firmware files in alsa-firmware package.
+ When no hotplug fw loader is available, you need to load the
+ firmware via vxloader utility in alsa-tools package.
+
+ About capture IBL, see the description of snd-vx222 module.
+
+ Note: snd-vxp440 driver is merged to snd-vxpocket driver since
+ ALSA 1.0.10.
+
+ The power-management is supported.
+
+ Module snd-ymfpci
+ -----------------
+
+ Module for Yamaha PCI chips (YMF72x, YMF74x & YMF75x).
+
+ mpu_port - 0x300,0x330,0x332,0x334, 0 (disable) by default,
+ 1 (auto-detect for YMF744/754 only)
+ fm_port - 0x388,0x398,0x3a0,0x3a8, 0 (disable) by default
+ 1 (auto-detect for YMF744/754 only)
+ joystick_port - 0x201,0x202,0x204,0x205, 0 (disable) by default,
+ 1 (auto-detect)
+ rear_switch - enable shared rear/line-in switch (bool)
+
+ This module supports autoprobe and multiple chips.
+
+ The power-management is supported.
+
+ Module snd-pdaudiocf
+ --------------------
+
+ Module for Sound Core PDAudioCF sound card.
+
+ The power-management is supported.
+
+
+AC97 Quirk Option
+=================
+
+The ac97_quirk option is used to enable/override the workaround for
+specific devices on drivers for on-board AC'97 controllers like
+snd-intel8x0. Some hardware have swapped output pins between Master
+and Headphone, or Surround (thanks to confusion of AC'97
+specifications from version to version :-)
+
+The driver provides the auto-detection of known problematic devices,
+but some might be unknown or wrongly detected. In such a case, pass
+the proper value with this option.
+
+The following strings are accepted:
+ - default Don't override the default setting
+ - none Disable the quirk
+ - hp_only Bind Master and Headphone controls as a single control
+ - swap_hp Swap headphone and master controls
+ - swap_surround Swap master and surround controls
+ - ad_sharing For AD1985, turn on OMS bit and use headphone
+ - alc_jack For ALC65x, turn on the jack sense mode
+ - inv_eapd Inverted EAPD implementation
+ - mute_led Bind EAPD bit for turning on/off mute LED
+
+For backward compatibility, the corresponding integer value -1, 0,
+... are accepted, too.
+
+For example, if "Master" volume control has no effect on your device
+but only "Headphone" does, pass ac97_quirk=hp_only module option.
+
+
+Configuring Non-ISAPNP Cards
+============================
+
+When the kernel is configured with ISA-PnP support, the modules
+supporting the isapnp cards will have module options "isapnp".
+If this option is set, *only* the ISA-PnP devices will be probed.
+For probing the non ISA-PnP cards, you have to pass "isapnp=0" option
+together with the proper i/o and irq configuration.
+
+When the kernel is configured without ISA-PnP support, isapnp option
+will be not built in.
+
+
+Module Autoloading Support
+==========================
+
+The ALSA drivers can be loaded automatically on demand by defining
+module aliases. The string 'snd-card-%1' is requested for ALSA native
+devices where %i is sound card number from zero to seven.
+
+To auto-load an ALSA driver for OSS services, define the string
+'sound-slot-%i' where %i means the slot number for OSS, which
+corresponds to the card index of ALSA. Usually, define this
+as the same card module.
+
+An example configuration for a single emu10k1 card is like below:
+----- /etc/modprobe.d/alsa.conf
+alias snd-card-0 snd-emu10k1
+alias sound-slot-0 snd-emu10k1
+----- /etc/modprobe.d/alsa.conf
+
+The available number of auto-loaded sound cards depends on the module
+option "cards_limit" of snd module. As default it's set to 1.
+To enable the auto-loading of multiple cards, specify the number of
+sound cards in that option.
+
+When multiple cards are available, it'd better to specify the index
+number for each card via module option, too, so that the order of
+cards is kept consistent.
+
+An example configuration for two sound cards is like below:
+
+----- /etc/modprobe.d/alsa.conf
+# ALSA portion
+options snd cards_limit=2
+alias snd-card-0 snd-interwave
+alias snd-card-1 snd-ens1371
+options snd-interwave index=0
+options snd-ens1371 index=1
+# OSS/Free portion
+alias sound-slot-0 snd-interwave
+alias sound-slot-1 snd-ens1371
+----- /etc/modprobe.d/alsa.conf
+
+In this example, the interwave card is always loaded as the first card
+(index 0) and ens1371 as the second (index 1).
+
+Alternative (and new) way to fixate the slot assignment is to use
+"slots" option of snd module. In the case above, specify like the
+following:
+
+options snd slots=snd-interwave,snd-ens1371
+
+Then, the first slot (#0) is reserved for snd-interwave driver, and
+the second (#1) for snd-ens1371. You can omit index option in each
+driver if slots option is used (although you can still have them at
+the same time as long as they don't conflict).
+
+The slots option is especially useful for avoiding the possible
+hot-plugging and the resultant slot conflict. For example, in the
+case above again, the first two slots are already reserved. If any
+other driver (e.g. snd-usb-audio) is loaded before snd-interwave or
+snd-ens1371, it will be assigned to the third or later slot.
+
+When a module name is given with '!', the slot will be given for any
+modules but that name. For example, "slots=!snd-pcsp" will reserve
+the first slot for any modules but snd-pcsp.
+
+
+ALSA PCM devices to OSS devices mapping
+=======================================
+
+/dev/snd/pcmC0D0[c|p] -> /dev/audio0 (/dev/audio) -> minor 4
+/dev/snd/pcmC0D0[c|p] -> /dev/dsp0 (/dev/dsp) -> minor 3
+/dev/snd/pcmC0D1[c|p] -> /dev/adsp0 (/dev/adsp) -> minor 12
+/dev/snd/pcmC1D0[c|p] -> /dev/audio1 -> minor 4+16 = 20
+/dev/snd/pcmC1D0[c|p] -> /dev/dsp1 -> minor 3+16 = 19
+/dev/snd/pcmC1D1[c|p] -> /dev/adsp1 -> minor 12+16 = 28
+/dev/snd/pcmC2D0[c|p] -> /dev/audio2 -> minor 4+32 = 36
+/dev/snd/pcmC2D0[c|p] -> /dev/dsp2 -> minor 3+32 = 39
+/dev/snd/pcmC2D1[c|p] -> /dev/adsp2 -> minor 12+32 = 44
+
+The first number from /dev/snd/pcmC{X}D{Y}[c|p] expression means
+sound card number and second means device number. The ALSA devices
+have either 'c' or 'p' suffix indicating the direction, capture and
+playback, respectively.
+
+Please note that the device mapping above may be varied via the module
+options of snd-pcm-oss module.
+
+
+Proc interfaces (/proc/asound)
+==============================
+
+/proc/asound/card#/pcm#[cp]/oss
+-------------------------------
+ String "erase" - erase all additional information about OSS applications
+ String "<app_name> <fragments> <fragment_size> [<options>]"
+
+ <app_name> - name of application with (higher priority) or without path
+ <fragments> - number of fragments or zero if auto
+ <fragment_size> - size of fragment in bytes or zero if auto
+ <options> - optional parameters
+ - disable the application tries to open a pcm device for
+ this channel but does not want to use it.
+ (Cause a bug or mmap needs)
+ It's good for Quake etc...
+ - direct don't use plugins
+ - block force block mode (rvplayer)
+ - non-block force non-block mode
+ - whole-frag write only whole fragments (optimization affecting
+ playback only)
+ - no-silence do not fill silence ahead to avoid clicks
+ - buggy-ptr Returns the whitespace blocks in GETOPTR ioctl
+ instead of filled blocks
+
+ Example: echo "x11amp 128 16384" > /proc/asound/card0/pcm0p/oss
+ echo "squake 0 0 disable" > /proc/asound/card0/pcm0c/oss
+ echo "rvplayer 0 0 block" > /proc/asound/card0/pcm0p/oss
+
+
+Early Buffer Allocation
+=======================
+
+Some drivers (e.g. hdsp) require the large contiguous buffers, and
+sometimes it's too late to find such spaces when the driver module is
+actually loaded due to memory fragmentation. You can pre-allocate the
+PCM buffers by loading snd-page-alloc module and write commands to its
+proc file in prior, for example, in the early boot stage like
+/etc/init.d/*.local scripts.
+
+Reading the proc file /proc/drivers/snd-page-alloc shows the current
+usage of page allocation. In writing, you can send the following
+commands to the snd-page-alloc driver:
+
+ - add VENDOR DEVICE MASK SIZE BUFFERS
+
+ VENDOR and DEVICE are PCI vendor and device IDs. They take
+ integer numbers (0x prefix is needed for the hex).
+ MASK is the PCI DMA mask. Pass 0 if not restricted.
+ SIZE is the size of each buffer to allocate. You can pass
+ k and m suffix for KB and MB. The max number is 16MB.
+ BUFFERS is the number of buffers to allocate. It must be greater
+ than 0. The max number is 4.
+
+ - erase
+
+ This will erase the all pre-allocated buffers which are not in
+ use.
+
+
+Links and Addresses
+===================
+
+ ALSA project homepage
+ http://www.alsa-project.org
+
+ Kernel Bugzilla
+ http://bugzilla.kernel.org/
+
+ ALSA Developers ML
+ mailto:alsa-devel@alsa-project.org
+
+ alsa-info.sh script
+ http://www.alsa-project.org/alsa-info.sh
diff --git a/Documentation/sound/alsa/Audigy-mixer.txt b/Documentation/sound/alsa/Audigy-mixer.txt
new file mode 100644
index 000000000..7f10dc6ff
--- /dev/null
+++ b/Documentation/sound/alsa/Audigy-mixer.txt
@@ -0,0 +1,345 @@
+
+ Sound Blaster Audigy mixer / default DSP code
+ ===========================================
+
+This is based on SB-Live-mixer.txt.
+
+The EMU10K2 chips have a DSP part which can be programmed to support
+various ways of sample processing, which is described here.
+(This article does not deal with the overall functionality of the
+EMU10K2 chips. See the manuals section for further details.)
+
+The ALSA driver programs this portion of chip by default code
+(can be altered later) which offers the following functionality:
+
+
+1) Digital mixer controls
+-------------------------
+
+These controls are built using the DSP instructions. They offer extended
+functionality. Only the default build-in code in the ALSA driver is described
+here. Note that the controls work as attenuators: the maximum value is the
+neutral position leaving the signal unchanged. Note that if the same destination
+is mentioned in multiple controls, the signal is accumulated and can be wrapped
+(set to maximal or minimal value without checking of overflow).
+
+
+Explanation of used abbreviations:
+
+DAC - digital to analog converter
+ADC - analog to digital converter
+I2S - one-way three wire serial bus for digital sound by Philips Semiconductors
+ (this standard is used for connecting standalone DAC and ADC converters)
+LFE - low frequency effects (subwoofer signal)
+AC97 - a chip containing an analog mixer, DAC and ADC converters
+IEC958 - S/PDIF
+FX-bus - the EMU10K2 chip has an effect bus containing 64 accumulators.
+ Each of the synthesizer voices can feed its output to these accumulators
+ and the DSP microcontroller can operate with the resulting sum.
+
+name='PCM Front Playback Volume',index=0
+
+This control is used to attenuate samples for left and right front PCM FX-bus
+accumulators. ALSA uses accumulators 8 and 9 for left and right front PCM
+samples for 5.1 playback. The result samples are forwarded to the front DAC PCM
+slots of the Philips DAC.
+
+name='PCM Surround Playback Volume',index=0
+
+This control is used to attenuate samples for left and right surround PCM FX-bus
+accumulators. ALSA uses accumulators 2 and 3 for left and right surround PCM
+samples for 5.1 playback. The result samples are forwarded to the surround DAC PCM
+slots of the Philips DAC.
+
+name='PCM Center Playback Volume',index=0
+
+This control is used to attenuate samples for center PCM FX-bus accumulator.
+ALSA uses accumulator 6 for center PCM sample for 5.1 playback. The result sample
+is forwarded to the center DAC PCM slot of the Philips DAC.
+
+name='PCM LFE Playback Volume',index=0
+
+This control is used to attenuate sample for LFE PCM FX-bus accumulator.
+ALSA uses accumulator 7 for LFE PCM sample for 5.1 playback. The result sample
+is forwarded to the LFE DAC PCM slot of the Philips DAC.
+
+name='PCM Playback Volume',index=0
+
+This control is used to attenuate samples for left and right PCM FX-bus
+accumulators. ALSA uses accumulators 0 and 1 for left and right PCM samples for
+stereo playback. The result samples are forwarded to the front DAC PCM slots
+of the Philips DAC.
+
+name='PCM Capture Volume',index=0
+
+This control is used to attenuate samples for left and right PCM FX-bus
+accumulator. ALSA uses accumulators 0 and 1 for left and right PCM.
+The result is forwarded to the ADC capture FIFO (thus to the standard capture
+PCM device).
+
+name='Music Playback Volume',index=0
+
+This control is used to attenuate samples for left and right MIDI FX-bus
+accumulators. ALSA uses accumulators 4 and 5 for left and right MIDI samples.
+The result samples are forwarded to the front DAC PCM slots of the AC97 codec.
+
+name='Music Capture Volume',index=0
+
+These controls are used to attenuate samples for left and right MIDI FX-bus
+accumulator. ALSA uses accumulators 4 and 5 for left and right PCM.
+The result is forwarded to the ADC capture FIFO (thus to the standard capture
+PCM device).
+
+name='Mic Playback Volume',index=0
+
+This control is used to attenuate samples for left and right Mic input.
+For Mic input is used AC97 codec. The result samples are forwarded to
+the front DAC PCM slots of the Philips DAC. Samples are forwarded to Mic
+capture FIFO (device 1 - 16bit/8KHz mono) too without volume control.
+
+name='Mic Capture Volume',index=0
+
+This control is used to attenuate samples for left and right Mic input.
+The result is forwarded to the ADC capture FIFO (thus to the standard capture
+PCM device).
+
+name='Audigy CD Playback Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 TTL
+digital inputs (usually used by a CDROM drive). The result samples are
+forwarded to the front DAC PCM slots of the Philips DAC.
+
+name='Audigy CD Capture Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 TTL
+digital inputs (usually used by a CDROM drive). The result samples are
+forwarded to the ADC capture FIFO (thus to the standard capture PCM device).
+
+name='IEC958 Optical Playback Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 optical
+digital input. The result samples are forwarded to the front DAC PCM slots
+of the Philips DAC.
+
+name='IEC958 Optical Capture Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 optical
+digital inputs. The result samples are forwarded to the ADC capture FIFO
+(thus to the standard capture PCM device).
+
+name='Line2 Playback Volume',index=0
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs (on the AudigyDrive). The result samples are forwarded to the front
+DAC PCM slots of the Philips DAC.
+
+name='Line2 Capture Volume',index=1
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs (on the AudigyDrive). The result samples are forwarded to the ADC
+capture FIFO (thus to the standard capture PCM device).
+
+name='Analog Mix Playback Volume',index=0
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs from Philips ADC. The result samples are forwarded to the front
+DAC PCM slots of the Philips DAC. This contains mix from analog sources
+like CD, Line In, Aux, ....
+
+name='Analog Mix Capture Volume',index=1
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs Philips ADC. The result samples are forwarded to the ADC
+capture FIFO (thus to the standard capture PCM device).
+
+name='Aux2 Playback Volume',index=0
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs (on the AudigyDrive). The result samples are forwarded to the front
+DAC PCM slots of the Philips DAC.
+
+name='Aux2 Capture Volume',index=1
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs (on the AudigyDrive). The result samples are forwarded to the ADC
+capture FIFO (thus to the standard capture PCM device).
+
+name='Front Playback Volume',index=0
+
+All stereo signals are mixed together and mirrored to surround, center and LFE.
+This control is used to attenuate samples for left and right front speakers of
+this mix.
+
+name='Surround Playback Volume',index=0
+
+All stereo signals are mixed together and mirrored to surround, center and LFE.
+This control is used to attenuate samples for left and right surround speakers of
+this mix.
+
+name='Center Playback Volume',index=0
+
+All stereo signals are mixed together and mirrored to surround, center and LFE.
+This control is used to attenuate sample for center speaker of this mix.
+
+name='LFE Playback Volume',index=0
+
+All stereo signals are mixed together and mirrored to surround, center and LFE.
+This control is used to attenuate sample for LFE speaker of this mix.
+
+name='Tone Control - Switch',index=0
+
+This control turns the tone control on or off. The samples for front, rear
+and center / LFE outputs are affected.
+
+name='Tone Control - Bass',index=0
+
+This control sets the bass intensity. There is no neutral value!!
+When the tone control code is activated, the samples are always modified.
+The closest value to pure signal is 20.
+
+name='Tone Control - Treble',index=0
+
+This control sets the treble intensity. There is no neutral value!!
+When the tone control code is activated, the samples are always modified.
+The closest value to pure signal is 20.
+
+name='Master Playback Volume',index=0
+
+This control is used to attenuate samples for front, surround, center and
+LFE outputs.
+
+name='IEC958 Optical Raw Playback Switch',index=0
+
+If this switch is on, then the samples for the IEC958 (S/PDIF) digital
+output are taken only from the raw FX8010 PCM, otherwise standard front
+PCM samples are taken.
+
+
+2) PCM stream related controls
+------------------------------
+
+name='EMU10K1 PCM Volume',index 0-31
+
+Channel volume attenuation in range 0-0xffff. The maximum value (no
+attenuation) is default. The channel mapping for three values is
+as follows:
+
+ 0 - mono, default 0xffff (no attenuation)
+ 1 - left, default 0xffff (no attenuation)
+ 2 - right, default 0xffff (no attenuation)
+
+name='EMU10K1 PCM Send Routing',index 0-31
+
+This control specifies the destination - FX-bus accumulators. There 24
+values with this mapping:
+
+ 0 - mono, A destination (FX-bus 0-63), default 0
+ 1 - mono, B destination (FX-bus 0-63), default 1
+ 2 - mono, C destination (FX-bus 0-63), default 2
+ 3 - mono, D destination (FX-bus 0-63), default 3
+ 4 - mono, E destination (FX-bus 0-63), default 0
+ 5 - mono, F destination (FX-bus 0-63), default 0
+ 6 - mono, G destination (FX-bus 0-63), default 0
+ 7 - mono, H destination (FX-bus 0-63), default 0
+ 8 - left, A destination (FX-bus 0-63), default 0
+ 9 - left, B destination (FX-bus 0-63), default 1
+ 10 - left, C destination (FX-bus 0-63), default 2
+ 11 - left, D destination (FX-bus 0-63), default 3
+ 12 - left, E destination (FX-bus 0-63), default 0
+ 13 - left, F destination (FX-bus 0-63), default 0
+ 14 - left, G destination (FX-bus 0-63), default 0
+ 15 - left, H destination (FX-bus 0-63), default 0
+ 16 - right, A destination (FX-bus 0-63), default 0
+ 17 - right, B destination (FX-bus 0-63), default 1
+ 18 - right, C destination (FX-bus 0-63), default 2
+ 19 - right, D destination (FX-bus 0-63), default 3
+ 20 - right, E destination (FX-bus 0-63), default 0
+ 21 - right, F destination (FX-bus 0-63), default 0
+ 22 - right, G destination (FX-bus 0-63), default 0
+ 23 - right, H destination (FX-bus 0-63), default 0
+
+Don't forget that it's illegal to assign a channel to the same FX-bus accumulator
+more than once (it means 0=0 && 1=0 is an invalid combination).
+
+name='EMU10K1 PCM Send Volume',index 0-31
+
+It specifies the attenuation (amount) for given destination in range 0-255.
+The channel mapping is following:
+
+ 0 - mono, A destination attn, default 255 (no attenuation)
+ 1 - mono, B destination attn, default 255 (no attenuation)
+ 2 - mono, C destination attn, default 0 (mute)
+ 3 - mono, D destination attn, default 0 (mute)
+ 4 - mono, E destination attn, default 0 (mute)
+ 5 - mono, F destination attn, default 0 (mute)
+ 6 - mono, G destination attn, default 0 (mute)
+ 7 - mono, H destination attn, default 0 (mute)
+ 8 - left, A destination attn, default 255 (no attenuation)
+ 9 - left, B destination attn, default 0 (mute)
+ 10 - left, C destination attn, default 0 (mute)
+ 11 - left, D destination attn, default 0 (mute)
+ 12 - left, E destination attn, default 0 (mute)
+ 13 - left, F destination attn, default 0 (mute)
+ 14 - left, G destination attn, default 0 (mute)
+ 15 - left, H destination attn, default 0 (mute)
+ 16 - right, A destination attn, default 0 (mute)
+ 17 - right, B destination attn, default 255 (no attenuation)
+ 18 - right, C destination attn, default 0 (mute)
+ 19 - right, D destination attn, default 0 (mute)
+ 20 - right, E destination attn, default 0 (mute)
+ 21 - right, F destination attn, default 0 (mute)
+ 22 - right, G destination attn, default 0 (mute)
+ 23 - right, H destination attn, default 0 (mute)
+
+
+
+4) MANUALS/PATENTS:
+-------------------
+
+ftp://opensource.creative.com/pub/doc
+-------------------------------------
+
+ Files:
+ LM4545.pdf AC97 Codec
+
+ m2049.pdf The EMU10K1 Digital Audio Processor
+
+ hog63.ps FX8010 - A DSP Chip Architecture for Audio Effects
+
+
+WIPO Patents
+------------
+ Patent numbers:
+ WO 9901813 (A1) Audio Effects Processor with multiple asynchronous (Jan. 14, 1999)
+ streams
+
+ WO 9901814 (A1) Processor with Instruction Set for Audio Effects (Jan. 14, 1999)
+
+ WO 9901953 (A1) Audio Effects Processor having Decoupled Instruction
+ Execution and Audio Data Sequencing (Jan. 14, 1999)
+
+
+US Patents (http://www.uspto.gov/)
+----------------------------------
+
+ US 5925841 Digital Sampling Instrument employing cache memory (Jul. 20, 1999)
+
+ US 5928342 Audio Effects Processor integrated on a single chip (Jul. 27, 1999)
+ with a multiport memory onto which multiple asynchronous
+ digital sound samples can be concurrently loaded
+
+ US 5930158 Processor with Instruction Set for Audio Effects (Jul. 27, 1999)
+
+ US 6032235 Memory initialization circuit (Tram) (Feb. 29, 2000)
+
+ US 6138207 Interpolation looping of audio samples in cache connected to (Oct. 24, 2000)
+ system bus with prioritization and modification of bus transfers
+ in accordance with loop ends and minimum block sizes
+
+ US 6151670 Method for conserving memory storage using a (Nov. 21, 2000)
+ pool of short term memory registers
+
+ US 6195715 Interrupt control for multiple programs communicating with (Feb. 27, 2001)
+ a common interrupt by associating programs to GP registers,
+ defining interrupt register, polling GP registers, and invoking
+ callback routine associated with defined interrupt register
diff --git a/Documentation/sound/alsa/Audiophile-Usb.txt b/Documentation/sound/alsa/Audiophile-Usb.txt
new file mode 100644
index 000000000..e7a5ed4dc
--- /dev/null
+++ b/Documentation/sound/alsa/Audiophile-Usb.txt
@@ -0,0 +1,442 @@
+ Guide to using M-Audio Audiophile USB with ALSA and Jack v1.5
+ ========================================================
+
+ Thibault Le Meur <Thibault.LeMeur@supelec.fr>
+
+This document is a guide to using the M-Audio Audiophile USB (tm) device with
+ALSA and JACK.
+
+History
+=======
+* v1.4 - Thibault Le Meur (2007-07-11)
+ - Added Low Endianness nature of 16bits-modes
+ found by Hakan Lennestal <Hakan.Lennestal@brfsodrahamn.se>
+ - Modifying document structure
+* v1.5 - Thibault Le Meur (2007-07-12)
+ - Added AC3/DTS passthru info
+
+
+1 - Audiophile USB Specs and correct usage
+==========================================
+
+This part is a reminder of important facts about the functions and limitations
+of the device.
+
+The device has 4 audio interfaces, and 2 MIDI ports:
+ * Analog Stereo Input (Ai)
+ - This port supports 2 pairs of line-level audio inputs (1/4" TS and RCA)
+ - When the 1/4" TS (jack) connectors are connected, the RCA connectors
+ are disabled
+ * Analog Stereo Output (Ao)
+ * Digital Stereo Input (Di)
+ * Digital Stereo Output (Do)
+ * Midi In (Mi)
+ * Midi Out (Mo)
+
+The internal DAC/ADC has the following characteristics:
+* sample depth of 16 or 24 bits
+* sample rate from 8kHz to 96kHz
+* Two interfaces can't use different sample depths at the same time.
+Moreover, the Audiophile USB documentation gives the following Warning:
+"Please exit any audio application running before switching between bit depths"
+
+Due to the USB 1.1 bandwidth limitation, a limited number of interfaces can be
+activated at the same time depending on the audio mode selected:
+ * 16-bit/48kHz ==> 4 channels in + 4 channels out
+ - Ai+Ao+Di+Do
+ * 24-bit/48kHz ==> 4 channels in + 2 channels out,
+ or 2 channels in + 4 channels out
+ - Ai+Ao+Do or Ai+Di+Ao or Ai+Di+Do or Di+Ao+Do
+ * 24-bit/96kHz ==> 2 channels in _or_ 2 channels out (half duplex only)
+ - Ai or Ao or Di or Do
+
+Important facts about the Digital interface:
+--------------------------------------------
+ * The Do port additionally supports surround-encoded AC-3 and DTS passthrough,
+though I haven't tested it under Linux
+ - Note that in this setup only the Do interface can be enabled
+ * Apart from recording an audio digital stream, enabling the Di port is a way
+to synchronize the device to an external sample clock
+ - As a consequence, the Di port must be enable only if an active Digital
+source is connected
+ - Enabling Di when no digital source is connected can result in a
+synchronization error (for instance sound played at an odd sample rate)
+
+
+2 - Audiophile USB MIDI support in ALSA
+=======================================
+
+The Audiophile USB MIDI ports will be automatically supported once the
+following modules have been loaded:
+ * snd-usb-audio
+ * snd-seq-midi
+
+No additional setting is required.
+
+
+3 - Audiophile USB Audio support in ALSA
+========================================
+
+Audio functions of the Audiophile USB device are handled by the snd-usb-audio
+module. This module can work in a default mode (without any device-specific
+parameter), or in an "advanced" mode with the device-specific parameter called
+"device_setup".
+
+3.1 - Default Alsa driver mode
+------------------------------
+
+The default behavior of the snd-usb-audio driver is to list the device
+capabilities at startup and activate the required mode when required
+by the applications: for instance if the user is recording in a
+24bit-depth-mode and immediately after wants to switch to a 16bit-depth mode,
+the snd-usb-audio module will reconfigure the device on the fly.
+
+This approach has the advantage to let the driver automatically switch from sample
+rates/depths automatically according to the user's needs. However, those who
+are using the device under windows know that this is not how the device is meant to
+work: under windows applications must be closed before using the m-audio control
+panel to switch the device working mode. Thus as we'll see in next section, this
+Default Alsa driver mode can lead to device misconfigurations.
+
+Let's get back to the Default Alsa driver mode for now. In this case the
+Audiophile interfaces are mapped to alsa pcm devices in the following
+way (I suppose the device's index is 1):
+ * hw:1,0 is Ao in playback and Di in capture
+ * hw:1,1 is Do in playback and Ai in capture
+ * hw:1,2 is Do in AC3/DTS passthrough mode
+
+In this mode, the device uses Big Endian byte-encoding so that
+supported audio format are S16_BE for 16-bit depth modes and S24_3BE for
+24-bits depth mode.
+
+One exception is the hw:1,2 port which was reported to be Little Endian
+compliant (supposedly supporting S16_LE) but processes in fact only S16_BE streams.
+This has been fixed in kernel 2.6.23 and above and now the hw:1,2 interface
+is reported to be big endian in this default driver mode.
+
+Examples:
+ * playing a S24_3BE encoded raw file to the Ao port
+ % aplay -D hw:1,0 -c2 -t raw -r48000 -fS24_3BE test.raw
+ * recording a S24_3BE encoded raw file from the Ai port
+ % arecord -D hw:1,1 -c2 -t raw -r48000 -fS24_3BE test.raw
+ * playing a S16_BE encoded raw file to the Do port
+ % aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test.raw
+ * playing an ac3 sample file to the Do port
+ % aplay -D hw:1,2 --channels=6 ac3_S16_BE_encoded_file.raw
+
+If you're happy with the default Alsa driver mode and don't experience any
+issue with this mode, then you can skip the following chapter.
+
+3.2 - Advanced module setup
+---------------------------
+
+Due to the hardware constraints described above, the device initialization made
+by the Alsa driver in default mode may result in a corrupted state of the
+device. For instance, a particularly annoying issue is that the sound captured
+from the Ai interface sounds distorted (as if boosted with an excessive high
+volume gain).
+
+For people having this problem, the snd-usb-audio module has a new module
+parameter called "device_setup" (this parameter was introduced in kernel
+release 2.6.17)
+
+3.2.1 - Initializing the working mode of the Audiophile USB
+
+As far as the Audiophile USB device is concerned, this value let the user
+specify:
+ * the sample depth
+ * the sample rate
+ * whether the Di port is used or not
+
+When initialized with "device_setup=0x00", the snd-usb-audio module has
+the same behaviour as when the parameter is omitted (see paragraph "Default
+Alsa driver mode" above)
+
+Others modes are described in the following subsections.
+
+3.2.1.1 - 16-bit modes
+
+The two supported modes are:
+
+ * device_setup=0x01
+ - 16bits 48kHz mode with Di disabled
+ - Ai,Ao,Do can be used at the same time
+ - hw:1,0 is not available in capture mode
+ - hw:1,2 is not available
+
+ * device_setup=0x11
+ - 16bits 48kHz mode with Di enabled
+ - Ai,Ao,Di,Do can be used at the same time
+ - hw:1,0 is available in capture mode
+ - hw:1,2 is not available
+
+In this modes the device operates only at 16bits-modes. Before kernel 2.6.23,
+the devices where reported to be Big-Endian when in fact they were Little-Endian
+so that playing a file was a matter of using:
+ % aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_BE test_S16_LE.raw
+where "test_S16_LE.raw" was in fact a little-endian sample file.
+
+Thanks to Hakan Lennestal (who discovered the Little-Endiannes of the device in
+these modes) a fix has been committed (expected in kernel 2.6.23) and
+Alsa now reports Little-Endian interfaces. Thus playing a file now is as simple as
+using:
+ % aplay -D hw:1,1 -c2 -t raw -r48000 -fS16_LE test_S16_LE.raw
+
+3.2.1.2 - 24-bit modes
+
+The three supported modes are:
+
+ * device_setup=0x09
+ - 24bits 48kHz mode with Di disabled
+ - Ai,Ao,Do can be used at the same time
+ - hw:1,0 is not available in capture mode
+ - hw:1,2 is not available
+
+ * device_setup=0x19
+ - 24bits 48kHz mode with Di enabled
+ - 3 ports from {Ai,Ao,Di,Do} can be used at the same time
+ - hw:1,0 is available in capture mode and an active digital source must be
+ connected to Di
+ - hw:1,2 is not available
+
+ * device_setup=0x0D or 0x10
+ - 24bits 96kHz mode
+ - Di is enabled by default for this mode but does not need to be connected
+ to an active source
+ - Only 1 port from {Ai,Ao,Di,Do} can be used at the same time
+ - hw:1,0 is available in captured mode
+ - hw:1,2 is not available
+
+In these modes the device is only Big-Endian compliant (see "Default Alsa driver
+mode" above for an aplay command example)
+
+3.2.1.3 - AC3 w/ DTS passthru mode
+
+Thanks to Hakan Lennestal, I now have a report saying that this mode works.
+
+ * device_setup=0x03
+ - 16bits 48kHz mode with only the Do port enabled
+ - AC3 with DTS passthru
+ - Caution with this setup the Do port is mapped to the pcm device hw:1,0
+
+The command line used to playback the AC3/DTS encoded .wav-files in this mode:
+ % aplay -D hw:1,0 --channels=6 ac3_S16_LE_encoded_file.raw
+
+3.2.2 - How to use the device_setup parameter
+----------------------------------------------
+
+The parameter can be given:
+
+ * By manually probing the device (as root):
+ # modprobe -r snd-usb-audio
+ # modprobe snd-usb-audio index=1 device_setup=0x09
+
+ * Or while configuring the modules options in your modules configuration file
+ (typically a .conf file in /etc/modprobe.d/ directory:
+ alias snd-card-1 snd-usb-audio
+ options snd-usb-audio index=1 device_setup=0x09
+
+CAUTION when initializing the device
+-------------------------------------
+
+ * Correct initialization on the device requires that device_setup is given to
+ the module BEFORE the device is turned on. So, if you use the "manual probing"
+ method described above, take care to power-on the device AFTER this initialization.
+
+ * Failing to respect this will lead to a misconfiguration of the device. In this case
+ turn off the device, unprobe the snd-usb-audio module, then probe it again with
+ correct device_setup parameter and then (and only then) turn on the device again.
+
+ * If you've correctly initialized the device in a valid mode and then want to switch
+ to another mode (possibly with another sample-depth), please use also the following
+ procedure:
+ - first turn off the device
+ - de-register the snd-usb-audio module (modprobe -r)
+ - change the device_setup parameter by changing the device_setup
+ option in /etc/modprobe.d/*.conf
+ - turn on the device
+ * A workaround for this last issue has been applied to kernel 2.6.23, but it may not
+ be enough to ensure the 'stability' of the device initialization.
+
+3.2.3 - Technical details for hackers
+-------------------------------------
+This section is for hackers, wanting to understand details about the device
+internals and how Alsa supports it.
+
+3.2.3.1 - Audiophile USB's device_setup structure
+
+If you want to understand the device_setup magic numbers for the Audiophile
+USB, you need some very basic understanding of binary computation. However,
+this is not required to use the parameter and you may skip this section.
+
+The device_setup is one byte long and its structure is the following:
+
+ +---+---+---+---+---+---+---+---+
+ | b7| b6| b5| b4| b3| b2| b1| b0|
+ +---+---+---+---+---+---+---+---+
+ | 0 | 0 | 0 | Di|24B|96K|DTS|SET|
+ +---+---+---+---+---+---+---+---+
+
+Where:
+ * b0 is the "SET" bit
+ - it MUST be set if device_setup is initialized
+ * b1 is the "DTS" bit
+ - it is set only for Digital output with DTS/AC3
+ - this setup is not tested
+ * b2 is the Rate selection flag
+ - When set to "1" the rate range is 48.1-96kHz
+ - Otherwise the sample rate range is 8-48kHz
+ * b3 is the bit depth selection flag
+ - When set to "1" samples are 24bits long
+ - Otherwise they are 16bits long
+ - Note that b2 implies b3 as the 96kHz mode is only supported for 24 bits
+ samples
+ * b4 is the Digital input flag
+ - When set to "1" the device assumes that an active digital source is
+ connected
+ - You shouldn't enable Di if no source is seen on the port (this leads to
+ synchronization issues)
+ - b4 is implied by b2 (since only one port is enabled at a time no synch
+ error can occur)
+ * b5 to b7 are reserved for future uses, and must be set to "0"
+ - might become Ao, Do, Ai, for b7, b6, b4 respectively
+
+Caution:
+ * there is no check on the value you will give to device_setup
+ - for instance choosing 0x05 (16bits 96kHz) will fail back to 0x09 since
+ b2 implies b3. But _there_will_be_no_warning_ in /var/log/messages
+ * Hardware constraints due to the USB bus limitation aren't checked
+ - choosing b2 will prepare all interfaces for 24bits/96kHz but you'll
+ only be able to use one at the same time
+
+3.2.3.2 - USB implementation details for this device
+
+You may safely skip this section if you're not interested in driver
+hacking.
+
+This section describes some internal aspects of the device and summarizes the
+data I got by usb-snooping the windows and Linux drivers.
+
+The M-Audio Audiophile USB has 7 USB Interfaces:
+a "USB interface":
+ * USB Interface nb.0
+ * USB Interface nb.1
+ - Audio Control function
+ * USB Interface nb.2
+ - Analog Output
+ * USB Interface nb.3
+ - Digital Output
+ * USB Interface nb.4
+ - Analog Input
+ * USB Interface nb.5
+ - Digital Input
+ * USB Interface nb.6
+ - MIDI interface compliant with the MIDIMAN quirk
+
+Each interface has 5 altsettings (AltSet 1,2,3,4,5) except:
+ * Interface 3 (Digital Out) has an extra Alset nb.6
+ * Interface 5 (Digital In) does not have Alset nb.3 and 5
+
+Here is a short description of the AltSettings capabilities:
+ * AltSettings 1 corresponds to
+ - 24-bit depth, 48.1-96kHz sample mode
+ - Adaptive playback (Ao and Do), Synch capture (Ai), or Asynch capture (Di)
+ * AltSettings 2 corresponds to
+ - 24-bit depth, 8-48kHz sample mode
+ - Asynch capture and playback (Ao,Ai,Do,Di)
+ * AltSettings 3 corresponds to
+ - 24-bit depth, 8-48kHz sample mode
+ - Synch capture (Ai) and Adaptive playback (Ao,Do)
+ * AltSettings 4 corresponds to
+ - 16-bit depth, 8-48kHz sample mode
+ - Asynch capture and playback (Ao,Ai,Do,Di)
+ * AltSettings 5 corresponds to
+ - 16-bit depth, 8-48kHz sample mode
+ - Synch capture (Ai) and Adaptive playback (Ao,Do)
+ * AltSettings 6 corresponds to
+ - 16-bit depth, 8-48kHz sample mode
+ - Synch playback (Do), audio format type III IEC1937_AC-3
+
+In order to ensure a correct initialization of the device, the driver
+_must_know_ how the device will be used:
+ * if DTS is chosen, only Interface 2 with AltSet nb.6 must be
+ registered
+ * if 96KHz only AltSets nb.1 of each interface must be selected
+ * if samples are using 24bits/48KHz then AltSet 2 must me used if
+ Digital input is connected, and only AltSet nb.3 if Digital input
+ is not connected
+ * if samples are using 16bits/48KHz then AltSet 4 must me used if
+ Digital input is connected, and only AltSet nb.5 if Digital input
+ is not connected
+
+When device_setup is given as a parameter to the snd-usb-audio module, the
+parse_audio_endpoints function uses a quirk called
+"audiophile_skip_setting_quirk" in order to prevent AltSettings not
+corresponding to device_setup from being registered in the driver.
+
+4 - Audiophile USB and Jack support
+===================================
+
+This section deals with support of the Audiophile USB device in Jack.
+
+There are 2 main potential issues when using Jackd with the device:
+* support for Big-Endian devices in 24-bit modes
+* support for 4-in / 4-out channels
+
+4.1 - Direct support in Jackd
+-----------------------------
+
+Jack supports big endian devices only in recent versions (thanks to
+Andreas Steinmetz for his first big-endian patch). I can't remember
+exactly when this support was released into jackd, let's just say that
+with jackd version 0.103.0 it's almost ok (just a small bug is affecting
+16bits Big-Endian devices, but since you've read carefully the above
+paragraphs, you're now using kernel >= 2.6.23 and your 16bits devices
+are now Little Endians ;-) ).
+
+You can run jackd with the following command for playback with Ao and
+record with Ai:
+ % jackd -R -dalsa -Phw:1,0 -r48000 -p128 -n2 -D -Chw:1,1
+
+4.2 - Using Alsa plughw
+-----------------------
+If you don't have a recent Jackd installed, you can downgrade to using
+the Alsa "plug" converter.
+
+For instance here is one way to run Jack with 2 playback channels on Ao and 2
+capture channels from Ai:
+ % jackd -R -dalsa -dplughw:1 -r48000 -p256 -n2 -D -Cplughw:1,1
+
+However you may see the following warning message:
+"You appear to be using the ALSA software "plug" layer, probably a result of
+using the "default" ALSA device. This is less efficient than it could be.
+Consider using a hardware device instead rather than using the plug layer."
+
+4.3 - Getting 2 input and/or output interfaces in Jack
+------------------------------------------------------
+
+As you can see, starting the Jack server this way will only enable 1 stereo
+input (Di or Ai) and 1 stereo output (Ao or Do).
+
+This is due to the following restrictions:
+* Jack can only open one capture device and one playback device at a time
+* The Audiophile USB is seen as 2 (or three) Alsa devices: hw:1,0, hw:1,1
+ (and optionally hw:1,2)
+
+If you want to get Ai+Di and/or Ao+Do support with Jack, you would need to
+combine the Alsa devices into one logical "complex" device.
+
+If you want to give it a try, I recommend reading the information from
+this page: http://www.sound-man.co.uk/linuxaudio/ice1712multi.html
+It is related to another device (ice1712) but can be adapted to suit
+the Audiophile USB.
+
+Enabling multiple Audiophile USB interfaces for Jackd will certainly require:
+* Making sure your Jackd version has the MMAP_COMPLEX patch (see the ice1712 page)
+* (maybe) patching the alsa-lib/src/pcm/pcm_multi.c file (see the ice1712 page)
+* define a multi device (combination of hw:1,0 and hw:1,1) in your .asoundrc
+ file
+* start jackd with this device
+
+I had no success in testing this for now, if you have any success with this kind
+of setup, please drop me an email.
diff --git a/Documentation/sound/alsa/Bt87x.txt b/Documentation/sound/alsa/Bt87x.txt
new file mode 100644
index 000000000..f158cde8b
--- /dev/null
+++ b/Documentation/sound/alsa/Bt87x.txt
@@ -0,0 +1,78 @@
+Intro
+=====
+
+You might have noticed that the bt878 grabber cards have actually
+_two_ PCI functions:
+
+$ lspci
+[ ... ]
+00:0a.0 Multimedia video controller: Brooktree Corporation Bt878 (rev 02)
+00:0a.1 Multimedia controller: Brooktree Corporation Bt878 (rev 02)
+[ ... ]
+
+The first does video, it is backward compatible to the bt848. The second
+does audio. snd-bt87x is a driver for the second function. It's a sound
+driver which can be used for recording sound (and _only_ recording, no
+playback). As most TV cards come with a short cable which can be plugged
+into your sound card's line-in you probably don't need this driver if all
+you want to do is just watching TV...
+
+Some cards do not bother to connect anything to the audio input pins of
+the chip, and some other cards use the audio function to transport MPEG
+video data, so it's quite possible that audio recording may not work
+with your card.
+
+
+Driver Status
+=============
+
+The driver is now stable. However, it doesn't know about many TV cards,
+and it refuses to load for cards it doesn't know.
+
+If the driver complains ("Unknown TV card found, the audio driver will
+not load"), you can specify the load_all=1 option to force the driver to
+try to use the audio capture function of your card. If the frequency of
+recorded data is not right, try to specify the digital_rate option with
+other values than the default 32000 (often it's 44100 or 64000).
+
+If you have an unknown card, please mail the ID and board name to
+<alsa-devel@alsa-project.org>, regardless of whether audio capture works
+or not, so that future versions of this driver know about your card.
+
+
+Audio modes
+===========
+
+The chip knows two different modes (digital/analog). snd-bt87x
+registers two PCM devices, one for each mode. They cannot be used at
+the same time.
+
+
+Digital audio mode
+==================
+
+The first device (hw:X,0) gives you 16 bit stereo sound. The sample
+rate depends on the external source which feeds the Bt87x with digital
+sound via I2S interface.
+
+
+Analog audio mode (A/D)
+=======================
+
+The second device (hw:X,1) gives you 8 or 16 bit mono sound. Supported
+sample rates are between 119466 and 448000 Hz (yes, these numbers are
+that high). If you've set the CONFIG_SND_BT87X_OVERCLOCK option, the
+maximum sample rate is 1792000 Hz, but audio data becomes unusable
+beyond 896000 Hz on my card.
+
+The chip has three analog inputs. Consequently you'll get a mixer
+device to control these.
+
+
+Have fun,
+
+ Clemens
+
+
+Written by Clemens Ladisch <clemens@ladisch.de>
+big parts copied from btaudio.txt by Gerd Knorr <kraxel@bytesex.org>
diff --git a/Documentation/sound/alsa/CMIPCI.txt b/Documentation/sound/alsa/CMIPCI.txt
new file mode 100644
index 000000000..4e36e6e80
--- /dev/null
+++ b/Documentation/sound/alsa/CMIPCI.txt
@@ -0,0 +1,254 @@
+ Brief Notes on C-Media 8338/8738/8768/8770 Driver
+ =================================================
+
+ Takashi Iwai <tiwai@suse.de>
+
+
+Front/Rear Multi-channel Playback
+---------------------------------
+
+CM8x38 chip can use ADC as the second DAC so that two different stereo
+channels can be used for front/rear playbacks. Since there are two
+DACs, both streams are handled independently unlike the 4/6ch multi-
+channel playbacks in the section below.
+
+As default, ALSA driver assigns the first PCM device (i.e. hw:0,0 for
+card#0) for front and 4/6ch playbacks, while the second PCM device
+(hw:0,1) is assigned to the second DAC for rear playback.
+
+There are slight differences between the two DACs:
+
+- The first DAC supports U8 and S16LE formats, while the second DAC
+ supports only S16LE.
+- The second DAC supports only two channel stereo.
+
+Please note that the CM8x38 DAC doesn't support continuous playback
+rate but only fixed rates: 5512, 8000, 11025, 16000, 22050, 32000,
+44100 and 48000 Hz.
+
+The rear output can be heard only when "Four Channel Mode" switch is
+disabled. Otherwise no signal will be routed to the rear speakers.
+As default it's turned on.
+
+*** WARNING ***
+When "Four Channel Mode" switch is off, the output from rear speakers
+will be FULL VOLUME regardless of Master and PCM volumes.
+This might damage your audio equipment. Please disconnect speakers
+before your turn off this switch.
+*** WARNING ***
+
+[ Well.. I once got the output with correct volume (i.e. same with the
+ front one) and was so excited. It was even with "Four Channel" bit
+ on and "double DAC" mode. Actually I could hear separate 4 channels
+ from front and rear speakers! But.. after reboot, all was gone.
+ It's a very pity that I didn't save the register dump at that
+ time.. Maybe there is an unknown register to achieve this... ]
+
+If your card has an extra output jack for the rear output, the rear
+playback should be routed there as default. If not, there is a
+control switch in the driver "Line-In As Rear", which you can change
+via alsamixer or somewhat else. When this switch is on, line-in jack
+is used as rear output.
+
+There are two more controls regarding to the rear output.
+The "Exchange DAC" switch is used to exchange front and rear playback
+routes, i.e. the 2nd DAC is output from front output.
+
+
+4/6 Multi-Channel Playback
+--------------------------
+
+The recent CM8738 chips support for the 4/6 multi-channel playback
+function. This is useful especially for AC3 decoding.
+
+When the multi-channel is supported, the driver name has a suffix
+"-MC" such like "CMI8738-MC6". You can check this name from
+/proc/asound/cards.
+
+When the 4/6-ch output is enabled, the second DAC accepts up to 6 (or
+4) channels. While the dual DAC supports two different rates or
+formats, the 4/6-ch playback supports only the same condition for all
+channels. Since the multi-channel playback mode uses both DACs, you
+cannot operate with full-duplex.
+
+The 4.0 and 5.1 modes are defined as the pcm "surround40" and "surround51"
+in alsa-lib. For example, you can play a WAV file with 6 channels like
+
+ % aplay -Dsurround51 sixchannels.wav
+
+For programming the 4/6 channel playback, you need to specify the PCM
+channels as you like and set the format S16LE. For example, for playback
+with 4 channels,
+
+ snd_pcm_hw_params_set_access(pcm, hw, SND_PCM_ACCESS_RW_INTERLEAVED);
+ // or mmap if you like
+ snd_pcm_hw_params_set_format(pcm, hw, SND_PCM_FORMAT_S16_LE);
+ snd_pcm_hw_params_set_channels(pcm, hw, 4);
+
+and use the interleaved 4 channel data.
+
+There are some control switches affecting to the speaker connections:
+
+"Line-In Mode" - an enum control to change the behavior of line-in
+ jack. Either "Line-In", "Rear Output" or "Bass Output" can
+ be selected. The last item is available only with model 039
+ or newer.
+ When "Rear Output" is chosen, the surround channels 3 and 4
+ are output to line-in jack.
+"Mic-In Mode" - an enum control to change the behavior of mic-in
+ jack. Either "Mic-In" or "Center/LFE Output" can be
+ selected.
+ When "Center/LFE Output" is chosen, the center and bass
+ channels (channels 5 and 6) are output to mic-in jack.
+
+Digital I/O
+-----------
+
+The CM8x38 provides the excellent SPDIF capability with very cheap
+price (yes, that's the reason I bought the card :)
+
+The SPDIF playback and capture are done via the third PCM device
+(hw:0,2). Usually this is assigned to the PCM device "spdif".
+The available rates are 44100 and 48000 Hz.
+For playback with aplay, you can run like below:
+
+ % aplay -Dhw:0,2 foo.wav
+
+or
+
+ % aplay -Dspdif foo.wav
+
+24bit format is also supported experimentally.
+
+The playback and capture over SPDIF use normal DAC and ADC,
+respectively, so you cannot playback both analog and digital streams
+simultaneously.
+
+To enable SPDIF output, you need to turn on "IEC958 Output Switch"
+control via mixer or alsactl ("IEC958" is the official name of
+so-called S/PDIF). Then you'll see the red light on from the card so
+you know that's working obviously :)
+The SPDIF input is always enabled, so you can hear SPDIF input data
+from line-out with "IEC958 In Monitor" switch at any time (see
+below).
+
+You can play via SPDIF even with the first device (hw:0,0),
+but SPDIF is enabled only when the proper format (S16LE), sample rate
+(441100 or 48000) and channels (2) are used. Otherwise it's turned
+off. (Also don't forget to turn on "IEC958 Output Switch", too.)
+
+
+Additionally there are relevant control switches:
+
+"IEC958 Mix Analog" - Mix analog PCM playback and FM-OPL/3 streams and
+ output through SPDIF. This switch appears only on old chip
+ models (CM8738 033 and 037).
+ Note: without this control you can output PCM to SPDIF.
+ This is "mixing" of streams, so e.g. it's not for AC3 output
+ (see the next section).
+
+"IEC958 In Select" - Select SPDIF input, the internal CD-in (false)
+ and the external input (true).
+
+"IEC958 Loop" - SPDIF input data is loop back into SPDIF
+ output (aka bypass)
+
+"IEC958 Copyright" - Set the copyright bit.
+
+"IEC958 5V" - Select 0.5V (coax) or 5V (optical) interface.
+ On some cards this doesn't work and you need to change the
+ configuration with hardware dip-switch.
+
+"IEC958 In Monitor" - SPDIF input is routed to DAC.
+
+"IEC958 In Phase Inverse" - Set SPDIF input format as inverse.
+ [FIXME: this doesn't work on all chips..]
+
+"IEC958 In Valid" - Set input validity flag detection.
+
+Note: When "PCM Playback Switch" is on, you'll hear the digital output
+stream through analog line-out.
+
+
+The AC3 (RAW DIGITAL) OUTPUT
+----------------------------
+
+The driver supports raw digital (typically AC3) i/o over SPDIF. This
+can be toggled via IEC958 playback control, but usually you need to
+access it via alsa-lib. See alsa-lib documents for more details.
+
+On the raw digital mode, the "PCM Playback Switch" is automatically
+turned off so that non-audio data is heard from the analog line-out.
+Similarly the following switches are off: "IEC958 Mix Analog" and
+"IEC958 Loop". The switches are resumed after closing the SPDIF PCM
+device automatically to the previous state.
+
+On the model 033, AC3 is implemented by the software conversion in
+the alsa-lib. If you need to bypass the software conversion of IEC958
+subframes, pass the "soft_ac3=0" module option. This doesn't matter
+on the newer models.
+
+
+ANALOG MIXER INTERFACE
+----------------------
+
+The mixer interface on CM8x38 is similar to SB16.
+There are Master, PCM, Synth, CD, Line, Mic and PC Speaker playback
+volumes. Synth, CD, Line and Mic have playback and capture switches,
+too, as well as SB16.
+
+In addition to the standard SB mixer, CM8x38 provides more functions.
+- PCM playback switch
+- PCM capture switch (to capture the data sent to DAC)
+- Mic Boost switch
+- Mic capture volume
+- Aux playback volume/switch and capture switch
+- 3D control switch
+
+
+MIDI CONTROLLER
+---------------
+
+With CMI8338 chips, the MPU401-UART interface is disabled as default.
+You need to set the module option "mpu_port" to a valid I/O port address
+to enable MIDI support. Valid I/O ports are 0x300, 0x310, 0x320 and
+0x330. Choose a value that doesn't conflict with other cards.
+
+With CMI8738 and newer chips, the MIDI interface is enabled by default
+and the driver automatically chooses a port address.
+
+There is _no_ hardware wavetable function on this chip (except for
+OPL3 synth below).
+What's said as MIDI synth on Windows is a software synthesizer
+emulation. On Linux use TiMidity or other softsynth program for
+playing MIDI music.
+
+
+FM OPL/3 Synth
+--------------
+
+The FM OPL/3 is also enabled as default only for the first card.
+Set "fm_port" module option for more cards.
+
+The output quality of FM OPL/3 is, however, very weird.
+I don't know why..
+
+CMI8768 and newer chips do not have the FM synth.
+
+
+Joystick and Modem
+------------------
+
+The legacy joystick is supported. To enable the joystick support, pass
+joystick_port=1 module option. The value 1 means the auto-detection.
+If the auto-detection fails, try to pass the exact I/O address.
+
+The modem is enabled dynamically via a card control switch "Modem".
+
+
+Debugging Information
+---------------------
+
+The registers are shown in /proc/asound/cardX/cmipci. If you have any
+problem (especially unexpected behavior of mixer), please attach the
+output of this proc file together with the bug report.
diff --git a/Documentation/sound/alsa/Channel-Mapping-API.txt b/Documentation/sound/alsa/Channel-Mapping-API.txt
new file mode 100644
index 000000000..3c43d1a4c
--- /dev/null
+++ b/Documentation/sound/alsa/Channel-Mapping-API.txt
@@ -0,0 +1,153 @@
+ALSA PCM channel-mapping API
+============================
+ Takashi Iwai <tiwai@suse.de>
+
+GENERAL
+-------
+
+The channel mapping API allows user to query the possible channel maps
+and the current channel map, also optionally to modify the channel map
+of the current stream.
+
+A channel map is an array of position for each PCM channel.
+Typically, a stereo PCM stream has a channel map of
+ { front_left, front_right }
+while a 4.0 surround PCM stream has a channel map of
+ { front left, front right, rear left, rear right }.
+
+The problem, so far, was that we had no standard channel map
+explicitly, and applications had no way to know which channel
+corresponds to which (speaker) position. Thus, applications applied
+wrong channels for 5.1 outputs, and you hear suddenly strange sound
+from rear. Or, some devices secretly assume that center/LFE is the
+third/fourth channels while others that C/LFE as 5th/6th channels.
+
+Also, some devices such as HDMI are configurable for different speaker
+positions even with the same number of total channels. However, there
+was no way to specify this because of lack of channel map
+specification. These are the main motivations for the new channel
+mapping API.
+
+
+DESIGN
+------
+
+Actually, "the channel mapping API" doesn't introduce anything new in
+the kernel/user-space ABI perspective. It uses only the existing
+control element features.
+
+As a ground design, each PCM substream may contain a control element
+providing the channel mapping information and configuration. This
+element is specified by:
+ iface = SNDRV_CTL_ELEM_IFACE_PCM
+ name = "Playback Channel Map" or "Capture Channel Map"
+ device = the same device number for the assigned PCM substream
+ index = the same index number for the assigned PCM substream
+
+Note the name is different depending on the PCM substream direction.
+
+Each control element provides at least the TLV read operation and the
+read operation. Optionally, the write operation can be provided to
+allow user to change the channel map dynamically.
+
+* TLV
+
+The TLV operation gives the list of available channel
+maps. A list item of a channel map is usually a TLV of
+ type data-bytes ch0 ch1 ch2...
+where type is the TLV type value, the second argument is the total
+bytes (not the numbers) of channel values, and the rest are the
+position value for each channel.
+
+As a TLV type, either SNDRV_CTL_TLVT_CHMAP_FIXED,
+SNDRV_CTL_TLV_CHMAP_VAR or SNDRV_CTL_TLVT_CHMAP_PAIRED can be used.
+The _FIXED type is for a channel map with the fixed channel position
+while the latter two are for flexible channel positions. _VAR type is
+for a channel map where all channels are freely swappable and _PAIRED
+type is where pair-wise channels are swappable. For example, when you
+have {FL/FR/RL/RR} channel map, _PAIRED type would allow you to swap
+only {RL/RR/FL/FR} while _VAR type would allow even swapping FL and
+RR.
+
+These new TLV types are defined in sound/tlv.h.
+
+The available channel position values are defined in sound/asound.h,
+here is a cut:
+
+/* channel positions */
+enum {
+ SNDRV_CHMAP_UNKNOWN = 0,
+ SNDRV_CHMAP_NA, /* N/A, silent */
+ SNDRV_CHMAP_MONO, /* mono stream */
+ /* this follows the alsa-lib mixer channel value + 3 */
+ SNDRV_CHMAP_FL, /* front left */
+ SNDRV_CHMAP_FR, /* front right */
+ SNDRV_CHMAP_RL, /* rear left */
+ SNDRV_CHMAP_RR, /* rear right */
+ SNDRV_CHMAP_FC, /* front center */
+ SNDRV_CHMAP_LFE, /* LFE */
+ SNDRV_CHMAP_SL, /* side left */
+ SNDRV_CHMAP_SR, /* side right */
+ SNDRV_CHMAP_RC, /* rear center */
+ /* new definitions */
+ SNDRV_CHMAP_FLC, /* front left center */
+ SNDRV_CHMAP_FRC, /* front right center */
+ SNDRV_CHMAP_RLC, /* rear left center */
+ SNDRV_CHMAP_RRC, /* rear right center */
+ SNDRV_CHMAP_FLW, /* front left wide */
+ SNDRV_CHMAP_FRW, /* front right wide */
+ SNDRV_CHMAP_FLH, /* front left high */
+ SNDRV_CHMAP_FCH, /* front center high */
+ SNDRV_CHMAP_FRH, /* front right high */
+ SNDRV_CHMAP_TC, /* top center */
+ SNDRV_CHMAP_TFL, /* top front left */
+ SNDRV_CHMAP_TFR, /* top front right */
+ SNDRV_CHMAP_TFC, /* top front center */
+ SNDRV_CHMAP_TRL, /* top rear left */
+ SNDRV_CHMAP_TRR, /* top rear right */
+ SNDRV_CHMAP_TRC, /* top rear center */
+ SNDRV_CHMAP_LAST = SNDRV_CHMAP_TRC,
+};
+
+When a PCM stream can provide more than one channel map, you can
+provide multiple channel maps in a TLV container type. The TLV data
+to be returned will contain such as:
+ SNDRV_CTL_TLVT_CONTAINER 96
+ SNDRV_CTL_TLVT_CHMAP_FIXED 4 SNDRV_CHMAP_FC
+ SNDRV_CTL_TLVT_CHMAP_FIXED 8 SNDRV_CHMAP_FL SNDRV_CHMAP_FR
+ SNDRV_CTL_TLVT_CHMAP_FIXED 16 NDRV_CHMAP_FL SNDRV_CHMAP_FR \
+ SNDRV_CHMAP_RL SNDRV_CHMAP_RR
+
+The channel position is provided in LSB 16bits. The upper bits are
+used for bit flags.
+
+#define SNDRV_CHMAP_POSITION_MASK 0xffff
+#define SNDRV_CHMAP_PHASE_INVERSE (0x01 << 16)
+#define SNDRV_CHMAP_DRIVER_SPEC (0x02 << 16)
+
+SNDRV_CHMAP_PHASE_INVERSE indicates the channel is phase inverted,
+(thus summing left and right channels would result in almost silence).
+Some digital mic devices have this.
+
+When SNDRV_CHMAP_DRIVER_SPEC is set, all the channel position values
+don't follow the standard definition above but driver-specific.
+
+* READ OPERATION
+
+The control read operation is for providing the current channel map of
+the given stream. The control element returns an integer array
+containing the position of each channel.
+
+When this is performed before the number of the channel is specified
+(i.e. hw_params is set), it should return all channels set to
+UNKNOWN.
+
+* WRITE OPERATION
+
+The control write operation is optional, and only for devices that can
+change the channel configuration on the fly, such as HDMI. User needs
+to pass an integer value containing the valid channel positions for
+all channels of the assigned PCM substream.
+
+This operation is allowed only at PCM PREPARED state. When called in
+other states, it shall return an error.
diff --git a/Documentation/sound/alsa/ControlNames.txt b/Documentation/sound/alsa/ControlNames.txt
new file mode 100644
index 000000000..3fc1cf50d
--- /dev/null
+++ b/Documentation/sound/alsa/ControlNames.txt
@@ -0,0 +1,107 @@
+This document describes standard names of mixer controls.
+
+Syntax: [LOCATION] SOURCE [CHANNEL] [DIRECTION] FUNCTION
+
+DIRECTION:
+ <nothing> (both directions)
+ Playback
+ Capture
+ Bypass Playback
+ Bypass Capture
+
+FUNCTION:
+ Switch (on/off switch)
+ Volume
+ Route (route control, hardware specific)
+
+CHANNEL:
+ <nothing> (channel independent, or applies to all channels)
+ Front
+ Surround (rear left/right in 4.0/5.1 surround)
+ CLFE
+ Center
+ LFE
+ Side (side left/right for 7.1 surround)
+
+LOCATION: (physical location of source)
+ Front
+ Rear
+ Dock (docking station)
+ Internal
+
+SOURCE:
+ Master
+ Master Mono
+ Hardware Master
+ Speaker (internal speaker)
+ Bass Speaker (internal LFE speaker)
+ Headphone
+ Line Out
+ Beep (beep generator)
+ Phone
+ Phone Input
+ Phone Output
+ Synth
+ FM
+ Mic
+ Headset Mic (mic part of combined headset jack - 4-pin headphone + mic)
+ Headphone Mic (mic part of either/or - 3-pin headphone or mic)
+ Line (input only, use "Line Out" for output)
+ CD
+ Video
+ Zoom Video
+ Aux
+ PCM
+ PCM Pan
+ Loopback
+ Analog Loopback (D/A -> A/D loopback)
+ Digital Loopback (playback -> capture loopback - without analog path)
+ Mono
+ Mono Output
+ Multi
+ ADC
+ Wave
+ Music
+ I2S
+ IEC958
+ HDMI
+ SPDIF (output only)
+ SPDIF In
+ Digital In
+ HDMI/DP (either HDMI or DisplayPort)
+
+Exceptions (deprecated):
+ [Analogue|Digital] Capture Source
+ [Analogue|Digital] Capture Switch (aka input gain switch)
+ [Analogue|Digital] Capture Volume (aka input gain volume)
+ [Analogue|Digital] Playback Switch (aka output gain switch)
+ [Analogue|Digital] Playback Volume (aka output gain volume)
+ Tone Control - Switch
+ Tone Control - Bass
+ Tone Control - Treble
+ 3D Control - Switch
+ 3D Control - Center
+ 3D Control - Depth
+ 3D Control - Wide
+ 3D Control - Space
+ 3D Control - Level
+ Mic Boost [(?dB)]
+
+PCM interface:
+
+ Sample Clock Source { "Word", "Internal", "AutoSync" }
+ Clock Sync Status { "Lock", "Sync", "No Lock" }
+ External Rate /* external capture rate */
+ Capture Rate /* capture rate taken from external source */
+
+IEC958 (S/PDIF) interface:
+
+ IEC958 [...] [Playback|Capture] Switch /* turn on/off the IEC958 interface */
+ IEC958 [...] [Playback|Capture] Volume /* digital volume control */
+ IEC958 [...] [Playback|Capture] Default /* default or global value - read/write */
+ IEC958 [...] [Playback|Capture] Mask /* consumer and professional mask */
+ IEC958 [...] [Playback|Capture] Con Mask /* consumer mask */
+ IEC958 [...] [Playback|Capture] Pro Mask /* professional mask */
+ IEC958 [...] [Playback|Capture] PCM Stream /* the settings assigned to a PCM stream */
+ IEC958 Q-subcode [Playback|Capture] Default /* Q-subcode bits */
+ IEC958 Preamble [Playback|Capture] Default /* burst preamble words (4*16bits) */
diff --git a/Documentation/sound/alsa/HD-Audio-Controls.txt b/Documentation/sound/alsa/HD-Audio-Controls.txt
new file mode 100644
index 000000000..e9621e349
--- /dev/null
+++ b/Documentation/sound/alsa/HD-Audio-Controls.txt
@@ -0,0 +1,116 @@
+This file explains the codec-specific mixer controls.
+
+Realtek codecs
+--------------
+
+* Channel Mode
+ This is an enum control to change the surround-channel setup,
+ appears only when the surround channels are available.
+ It gives the number of channels to be used, "2ch", "4ch", "6ch",
+ and "8ch". According to the configuration, this also controls the
+ jack-retasking of multi-I/O jacks.
+
+* Auto-Mute Mode
+ This is an enum control to change the auto-mute behavior of the
+ headphone and line-out jacks. If built-in speakers and headphone
+ and/or line-out jacks are available on a machine, this controls
+ appears.
+ When there are only either headphones or line-out jacks, it gives
+ "Disabled" and "Enabled" state. When enabled, the speaker is muted
+ automatically when a jack is plugged.
+
+ When both headphone and line-out jacks are present, it gives
+ "Disabled", "Speaker Only" and "Line-Out+Speaker". When
+ speaker-only is chosen, plugging into a headphone or a line-out jack
+ mutes the speakers, but not line-outs. When line-out+speaker is
+ selected, plugging to a headphone jack mutes both speakers and
+ line-outs.
+
+
+IDT/Sigmatel codecs
+-------------------
+
+* Analog Loopback
+ This control enables/disables the analog-loopback circuit. This
+ appears only when "loopback" is set to true in a codec hint
+ (see HD-Audio.txt). Note that on some codecs the analog-loopback
+ and the normal PCM playback are exclusive, i.e. when this is on, you
+ won't hear any PCM stream.
+
+* Swap Center/LFE
+ Swaps the center and LFE channel order. Normally, the left
+ corresponds to the center and the right to the LFE. When this is
+ ON, the left to the LFE and the right to the center.
+
+* Headphone as Line Out
+ When this control is ON, treat the headphone jacks as line-out
+ jacks. That is, the headphone won't auto-mute the other line-outs,
+ and no HP-amp is set to the pins.
+
+* Mic Jack Mode, Line Jack Mode, etc
+ These enum controls the direction and the bias of the input jack
+ pins. Depending on the jack type, it can set as "Mic In" and "Line
+ In", for determining the input bias, or it can be set to "Line Out"
+ when the pin is a multi-I/O jack for surround channels.
+
+
+VIA codecs
+----------
+
+* Smart 5.1
+ An enum control to re-task the multi-I/O jacks for surround outputs.
+ When it's ON, the corresponding input jacks (usually a line-in and a
+ mic-in) are switched as the surround and the CLFE output jacks.
+
+* Independent HP
+ When this enum control is enabled, the headphone output is routed
+ from an individual stream (the third PCM such as hw:0,2) instead of
+ the primary stream. In the case the headphone DAC is shared with a
+ side or a CLFE-channel DAC, the DAC is switched to the headphone
+ automatically.
+
+* Loopback Mixing
+ An enum control to determine whether the analog-loopback route is
+ enabled or not. When it's enabled, the analog-loopback is mixed to
+ the front-channel. Also, the same route is used for the headphone
+ and speaker outputs. As a side-effect, when this mode is set, the
+ individual volume controls will be no longer available for
+ headphones and speakers because there is only one DAC connected to a
+ mixer widget.
+
+* Dynamic Power-Control
+ This control determines whether the dynamic power-control per jack
+ detection is enabled or not. When enabled, the widgets power state
+ (D0/D3) are changed dynamically depending on the jack plugging
+ state for saving power consumptions. However, if your system
+ doesn't provide a proper jack-detection, this won't work; in such a
+ case, turn this control OFF.
+
+* Jack Detect
+ This control is provided only for VT1708 codec which gives no proper
+ unsolicited event per jack plug. When this is on, the driver polls
+ the jack detection so that the headphone auto-mute can work, while
+ turning this off would reduce the power consumption.
+
+
+Conexant codecs
+---------------
+
+* Auto-Mute Mode
+ See Reatek codecs.
+
+
+Analog codecs
+--------------
+
+* Channel Mode
+ This is an enum control to change the surround-channel setup,
+ appears only when the surround channels are available.
+ It gives the number of channels to be used, "2ch", "4ch" and "6ch".
+ According to the configuration, this also controls the
+ jack-retasking of multi-I/O jacks.
+
+* Independent HP
+ When this enum control is enabled, the headphone output is routed
+ from an individual stream (the third PCM such as hw:0,2) instead of
+ the primary stream.
diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt
new file mode 100644
index 000000000..5a3163cac
--- /dev/null
+++ b/Documentation/sound/alsa/HD-Audio-Models.txt
@@ -0,0 +1,314 @@
+ Model name Description
+ ---------- -----------
+ALC880
+======
+ 3stack 3-jack in back and a headphone out
+ 3stack-digout 3-jack in back, a HP out and a SPDIF out
+ 5stack 5-jack in back, 2-jack in front
+ 5stack-digout 5-jack in back, 2-jack in front, a SPDIF out
+ 6stack 6-jack in back, 2-jack in front
+ 6stack-digout 6-jack with a SPDIF out
+
+ALC260
+======
+ N/A
+
+ALC262
+======
+ inv-dmic Inverted internal mic workaround
+
+ALC267/268
+==========
+ inv-dmic Inverted internal mic workaround
+
+ALC269/270/275/276/28x/29x
+======
+ laptop-amic Laptops with analog-mic input
+ laptop-dmic Laptops with digital-mic input
+ alc269-dmic Enable ALC269(VA) digital mic workaround
+ alc271-dmic Enable ALC271X digital mic workaround
+ inv-dmic Inverted internal mic workaround
+ headset-mic Indicates a combined headset (headphone+mic) jack
+ lenovo-dock Enables docking station I/O for some Lenovos
+ dell-headset-multi Headset jack, which can also be used as mic-in
+ dell-headset-dock Headset jack (without mic-in), and also dock I/O
+
+ALC66x/67x/892
+==============
+ mario Chromebook mario model fixup
+ asus-mode1 ASUS
+ asus-mode2 ASUS
+ asus-mode3 ASUS
+ asus-mode4 ASUS
+ asus-mode5 ASUS
+ asus-mode6 ASUS
+ asus-mode7 ASUS
+ asus-mode8 ASUS
+ inv-dmic Inverted internal mic workaround
+ dell-headset-multi Headset jack, which can also be used as mic-in
+
+ALC680
+======
+ N/A
+
+ALC88x/898/1150
+======================
+ acer-aspire-4930g Acer Aspire 4930G/5930G/6530G/6930G/7730G
+ acer-aspire-8930g Acer Aspire 8330G/6935G
+ acer-aspire Acer Aspire others
+ inv-dmic Inverted internal mic workaround
+ no-primary-hp VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
+
+ALC861/660
+==========
+ N/A
+
+ALC861VD/660VD
+==============
+ N/A
+
+CMI9880
+=======
+ minimal 3-jack in back
+ min_fp 3-jack in back, 2-jack in front
+ full 6-jack in back, 2-jack in front
+ full_dig 6-jack in back, 2-jack in front, SPDIF I/O
+ allout 5-jack in back, 2-jack in front, SPDIF out
+ auto auto-config reading BIOS (default)
+
+AD1882 / AD1882A
+================
+ 3stack 3-stack mode
+ 3stack-automute 3-stack with automute front HP (default)
+ 6stack 6-stack mode
+
+AD1884A / AD1883 / AD1984A / AD1984B
+====================================
+ desktop 3-stack desktop (default)
+ laptop laptop with HP jack sensing
+ mobile mobile devices with HP jack sensing
+ thinkpad Lenovo Thinkpad X300
+ touchsmart HP Touchsmart
+
+AD1884
+======
+ N/A
+
+AD1981
+======
+ basic 3-jack (default)
+ hp HP nx6320
+ thinkpad Lenovo Thinkpad T60/X60/Z60
+ toshiba Toshiba U205
+
+AD1983
+======
+ N/A
+
+AD1984
+======
+ basic default configuration
+ thinkpad Lenovo Thinkpad T61/X61
+ dell_desktop Dell T3400
+
+AD1986A
+=======
+ 3stack 3-stack, shared surrounds
+ laptop 2-channel only (FSC V2060, Samsung M50)
+ laptop-imic 2-channel with built-in mic
+ eapd Turn on EAPD constantly
+
+AD1988/AD1988B/AD1989A/AD1989B
+==============================
+ 6stack 6-jack
+ 6stack-dig ditto with SPDIF
+ 3stack 3-jack
+ 3stack-dig ditto with SPDIF
+ laptop 3-jack with hp-jack automute
+ laptop-dig ditto with SPDIF
+ auto auto-config reading BIOS (default)
+
+Conexant 5045
+=============
+ laptop-hpsense Laptop with HP sense (old model laptop)
+ laptop-micsense Laptop with Mic sense (old model fujitsu)
+ laptop-hpmicsense Laptop with HP and Mic senses
+ benq Benq R55E
+ laptop-hp530 HP 530 laptop
+ test for testing/debugging purpose, almost all controls
+ can be adjusted. Appearing only when compiled with
+ $CONFIG_SND_DEBUG=y
+
+Conexant 5047
+=============
+ laptop Basic Laptop config
+ laptop-hp Laptop config for some HP models (subdevice 30A5)
+ laptop-eapd Laptop config with EAPD support
+ test for testing/debugging purpose, almost all controls
+ can be adjusted. Appearing only when compiled with
+ $CONFIG_SND_DEBUG=y
+
+Conexant 5051
+=============
+ laptop Basic Laptop config (default)
+ hp HP Spartan laptop
+ hp-dv6736 HP dv6736
+ hp-f700 HP Compaq Presario F700
+ ideapad Lenovo IdeaPad laptop
+ toshiba Toshiba Satellite M300
+
+Conexant 5066
+=============
+ laptop Basic Laptop config (default)
+ hp-laptop HP laptops, e g G60
+ asus Asus K52JU, Lenovo G560
+ dell-laptop Dell laptops
+ dell-vostro Dell Vostro
+ olpc-xo-1_5 OLPC XO 1.5
+ ideapad Lenovo IdeaPad U150
+ thinkpad Lenovo Thinkpad
+
+STAC9200
+========
+ ref Reference board
+ oqo OQO Model 2
+ dell-d21 Dell (unknown)
+ dell-d22 Dell (unknown)
+ dell-d23 Dell (unknown)
+ dell-m21 Dell Inspiron 630m, Dell Inspiron 640m
+ dell-m22 Dell Latitude D620, Dell Latitude D820
+ dell-m23 Dell XPS M1710, Dell Precision M90
+ dell-m24 Dell Latitude 120L
+ dell-m25 Dell Inspiron E1505n
+ dell-m26 Dell Inspiron 1501
+ dell-m27 Dell Inspiron E1705/9400
+ gateway-m4 Gateway laptops with EAPD control
+ gateway-m4-2 Gateway laptops with EAPD control
+ panasonic Panasonic CF-74
+ auto BIOS setup (default)
+
+STAC9205/9254
+=============
+ ref Reference board
+ dell-m42 Dell (unknown)
+ dell-m43 Dell Precision
+ dell-m44 Dell Inspiron
+ eapd Keep EAPD on (e.g. Gateway T1616)
+ auto BIOS setup (default)
+
+STAC9220/9221
+=============
+ ref Reference board
+ 3stack D945 3stack
+ 5stack D945 5stack + SPDIF
+ intel-mac-v1 Intel Mac Type 1
+ intel-mac-v2 Intel Mac Type 2
+ intel-mac-v3 Intel Mac Type 3
+ intel-mac-v4 Intel Mac Type 4
+ intel-mac-v5 Intel Mac Type 5
+ intel-mac-auto Intel Mac (detect type according to subsystem id)
+ macmini Intel Mac Mini (equivalent with type 3)
+ macbook Intel Mac Book (eq. type 5)
+ macbook-pro-v1 Intel Mac Book Pro 1st generation (eq. type 3)
+ macbook-pro Intel Mac Book Pro 2nd generation (eq. type 3)
+ imac-intel Intel iMac (eq. type 2)
+ imac-intel-20 Intel iMac (newer version) (eq. type 3)
+ ecs202 ECS/PC chips
+ dell-d81 Dell (unknown)
+ dell-d82 Dell (unknown)
+ dell-m81 Dell (unknown)
+ dell-m82 Dell XPS M1210
+ auto BIOS setup (default)
+
+STAC9202/9250/9251
+==================
+ ref Reference board, base config
+ m1 Some Gateway MX series laptops (NX560XL)
+ m1-2 Some Gateway MX series laptops (MX6453)
+ m2 Some Gateway MX series laptops (M255)
+ m2-2 Some Gateway MX series laptops
+ m3 Some Gateway MX series laptops
+ m5 Some Gateway MX series laptops (MP6954)
+ m6 Some Gateway NX series laptops
+ auto BIOS setup (default)
+
+STAC9227/9228/9229/927x
+=======================
+ ref Reference board
+ ref-no-jd Reference board without HP/Mic jack detection
+ 3stack D965 3stack
+ 5stack D965 5stack + SPDIF
+ 5stack-no-fp D965 5stack without front panel
+ dell-3stack Dell Dimension E520
+ dell-bios Fixes with Dell BIOS setup
+ dell-bios-amic Fixes with Dell BIOS setup including analog mic
+ volknob Fixes with volume-knob widget 0x24
+ auto BIOS setup (default)
+
+STAC92HD71B*
+============
+ ref Reference board
+ dell-m4-1 Dell desktops
+ dell-m4-2 Dell desktops
+ dell-m4-3 Dell desktops
+ hp-m4 HP mini 1000
+ hp-dv5 HP dv series
+ hp-hdx HP HDX series
+ hp-dv4-1222nr HP dv4-1222nr (with LED support)
+ auto BIOS setup (default)
+
+STAC92HD73*
+===========
+ ref Reference board
+ no-jd BIOS setup but without jack-detection
+ intel Intel DG45* mobos
+ dell-m6-amic Dell desktops/laptops with analog mics
+ dell-m6-dmic Dell desktops/laptops with digital mics
+ dell-m6 Dell desktops/laptops with both type of mics
+ dell-eq Dell desktops/laptops
+ alienware Alienware M17x
+ auto BIOS setup (default)
+
+STAC92HD83*
+===========
+ ref Reference board
+ mic-ref Reference board with power management for ports
+ dell-s14 Dell laptop
+ dell-vostro-3500 Dell Vostro 3500 laptop
+ hp-dv7-4000 HP dv-7 4000
+ hp_cNB11_intquad HP CNB models with 4 speakers
+ hp-zephyr HP Zephyr
+ hp-led HP with broken BIOS for mute LED
+ hp-inv-led HP with broken BIOS for inverted mute LED
+ hp-mic-led HP with mic-mute LED
+ headset-jack Dell Latitude with a 4-pin headset jack
+ hp-envy-bass Pin fixup for HP Envy bass speaker (NID 0x0f)
+ hp-envy-ts-bass Pin fixup for HP Envy TS bass speaker (NID 0x10)
+ hp-bnb13-eq Hardware equalizer setup for HP laptops
+ auto BIOS setup (default)
+
+STAC92HD95
+==========
+ hp-led LED support for HP laptops
+ hp-bass Bass HPF setup for HP Spectre 13
+
+STAC9872
+========
+ vaio VAIO laptop without SPDIF
+ auto BIOS setup (default)
+
+Cirrus Logic CS4206/4207
+========================
+ mbp55 MacBook Pro 5,5
+ imac27 IMac 27 Inch
+ auto BIOS setup (default)
+
+Cirrus Logic CS4208
+===================
+ mba6 MacBook Air 6,1 and 6,2
+ gpio0 Enable GPIO 0 amp
+ auto BIOS setup (default)
+
+VIA VT17xx/VT18xx/VT20xx
+========================
+ auto BIOS setup (default)
diff --git a/Documentation/sound/alsa/HD-Audio.txt b/Documentation/sound/alsa/HD-Audio.txt
new file mode 100644
index 000000000..e7193aac6
--- /dev/null
+++ b/Documentation/sound/alsa/HD-Audio.txt
@@ -0,0 +1,863 @@
+MORE NOTES ON HD-AUDIO DRIVER
+=============================
+ Takashi Iwai <tiwai@suse.de>
+
+
+GENERAL
+-------
+
+HD-audio is the new standard on-board audio component on modern PCs
+after AC97. Although Linux has been supporting HD-audio since long
+time ago, there are often problems with new machines. A part of the
+problem is broken BIOS, and the rest is the driver implementation.
+This document explains the brief trouble-shooting and debugging
+methods for the HD-audio hardware.
+
+The HD-audio component consists of two parts: the controller chip and
+the codec chips on the HD-audio bus. Linux provides a single driver
+for all controllers, snd-hda-intel. Although the driver name contains
+a word of a well-known hardware vendor, it's not specific to it but for
+all controller chips by other companies. Since the HD-audio
+controllers are supposed to be compatible, the single snd-hda-driver
+should work in most cases. But, not surprisingly, there are known
+bugs and issues specific to each controller type. The snd-hda-intel
+driver has a bunch of workarounds for these as described below.
+
+A controller may have multiple codecs. Usually you have one audio
+codec and optionally one modem codec. In theory, there might be
+multiple audio codecs, e.g. for analog and digital outputs, and the
+driver might not work properly because of conflict of mixer elements.
+This should be fixed in future if such hardware really exists.
+
+The snd-hda-intel driver has several different codec parsers depending
+on the codec. It has a generic parser as a fallback, but this
+functionality is fairly limited until now. Instead of the generic
+parser, usually the codec-specific parser (coded in patch_*.c) is used
+for the codec-specific implementations. The details about the
+codec-specific problems are explained in the later sections.
+
+If you are interested in the deep debugging of HD-audio, read the
+HD-audio specification at first. The specification is found on
+Intel's web page, for example:
+
+- http://www.intel.com/standards/hdaudio/
+
+
+HD-AUDIO CONTROLLER
+-------------------
+
+DMA-Position Problem
+~~~~~~~~~~~~~~~~~~~~
+The most common problem of the controller is the inaccurate DMA
+pointer reporting. The DMA pointer for playback and capture can be
+read in two ways, either via a LPIB register or via a position-buffer
+map. As default the driver tries to read from the io-mapped
+position-buffer, and falls back to LPIB if the position-buffer appears
+dead. However, this detection isn't perfect on some devices. In such
+a case, you can change the default method via `position_fix` option.
+
+`position_fix=1` means to use LPIB method explicitly.
+`position_fix=2` means to use the position-buffer.
+`position_fix=3` means to use a combination of both methods, needed
+for some VIA controllers. The capture stream position is corrected
+by comparing both LPIB and position-buffer values.
+`position_fix=4` is another combination available for all controllers,
+and uses LPIB for the playback and the position-buffer for the capture
+streams.
+0 is the default value for all other
+controllers, the automatic check and fallback to LPIB as described in
+the above. If you get a problem of repeated sounds, this option might
+help.
+
+In addition to that, every controller is known to be broken regarding
+the wake-up timing. It wakes up a few samples before actually
+processing the data on the buffer. This caused a lot of problems, for
+example, with ALSA dmix or JACK. Since 2.6.27 kernel, the driver puts
+an artificial delay to the wake up timing. This delay is controlled
+via `bdl_pos_adj` option.
+
+When `bdl_pos_adj` is a negative value (as default), it's assigned to
+an appropriate value depending on the controller chip. For Intel
+chips, it'd be 1 while it'd be 32 for others. Usually this works.
+Only in case it doesn't work and you get warning messages, you should
+change this parameter to other values.
+
+
+Codec-Probing Problem
+~~~~~~~~~~~~~~~~~~~~~
+A less often but a more severe problem is the codec probing. When
+BIOS reports the available codec slots wrongly, the driver gets
+confused and tries to access the non-existing codec slot. This often
+results in the total screw-up, and destructs the further communication
+with the codec chips. The symptom appears usually as error messages
+like:
+------------------------------------------------------------------------
+ hda_intel: azx_get_response timeout, switching to polling mode:
+ last cmd=0x12345678
+ hda_intel: azx_get_response timeout, switching to single_cmd mode:
+ last cmd=0x12345678
+------------------------------------------------------------------------
+
+The first line is a warning, and this is usually relatively harmless.
+It means that the codec response isn't notified via an IRQ. The
+driver uses explicit polling method to read the response. It gives
+very slight CPU overhead, but you'd unlikely notice it.
+
+The second line is, however, a fatal error. If this happens, usually
+it means that something is really wrong. Most likely you are
+accessing a non-existing codec slot.
+
+Thus, if the second error message appears, try to narrow the probed
+codec slots via `probe_mask` option. It's a bitmask, and each bit
+corresponds to the codec slot. For example, to probe only the first
+slot, pass `probe_mask=1`. For the first and the third slots, pass
+`probe_mask=5` (where 5 = 1 | 4), and so on.
+
+Since 2.6.29 kernel, the driver has a more robust probing method, so
+this error might happen rarely, though.
+
+On a machine with a broken BIOS, sometimes you need to force the
+driver to probe the codec slots the hardware doesn't report for use.
+In such a case, turn the bit 8 (0x100) of `probe_mask` option on.
+Then the rest 8 bits are passed as the codec slots to probe
+unconditionally. For example, `probe_mask=0x103` will force to probe
+the codec slots 0 and 1 no matter what the hardware reports.
+
+
+Interrupt Handling
+~~~~~~~~~~~~~~~~~~
+HD-audio driver uses MSI as default (if available) since 2.6.33
+kernel as MSI works better on some machines, and in general, it's
+better for performance. However, Nvidia controllers showed bad
+regressions with MSI (especially in a combination with AMD chipset),
+thus we disabled MSI for them.
+
+There seem also still other devices that don't work with MSI. If you
+see a regression wrt the sound quality (stuttering, etc) or a lock-up
+in the recent kernel, try to pass `enable_msi=0` option to disable
+MSI. If it works, you can add the known bad device to the blacklist
+defined in hda_intel.c. In such a case, please report and give the
+patch back to the upstream developer.
+
+
+HD-AUDIO CODEC
+--------------
+
+Model Option
+~~~~~~~~~~~~
+The most common problem regarding the HD-audio driver is the
+unsupported codec features or the mismatched device configuration.
+Most of codec-specific code has several preset models, either to
+override the BIOS setup or to provide more comprehensive features.
+
+The driver checks PCI SSID and looks through the static configuration
+table until any matching entry is found. If you have a new machine,
+you may see a message like below:
+------------------------------------------------------------------------
+ hda_codec: ALC880: BIOS auto-probing.
+------------------------------------------------------------------------
+Meanwhile, in the earlier versions, you would see a message like:
+------------------------------------------------------------------------
+ hda_codec: Unknown model for ALC880, trying auto-probe from BIOS...
+------------------------------------------------------------------------
+Even if you see such a message, DON'T PANIC. Take a deep breath and
+keep your towel. First of all, it's an informational message, no
+warning, no error. This means that the PCI SSID of your device isn't
+listed in the known preset model (white-)list. But, this doesn't mean
+that the driver is broken. Many codec-drivers provide the automatic
+configuration mechanism based on the BIOS setup.
+
+The HD-audio codec has usually "pin" widgets, and BIOS sets the default
+configuration of each pin, which indicates the location, the
+connection type, the jack color, etc. The HD-audio driver can guess
+the right connection judging from these default configuration values.
+However -- some codec-support codes, such as patch_analog.c, don't
+support the automatic probing (yet as of 2.6.28). And, BIOS is often,
+yes, pretty often broken. It sets up wrong values and screws up the
+driver.
+
+The preset model (or recently called as "fix-up") is provided
+basically to overcome such a situation. When the matching preset
+model is found in the white-list, the driver assumes the static
+configuration of that preset with the correct pin setup, etc.
+Thus, if you have a newer machine with a slightly different PCI SSID
+(or codec SSID) from the existing one, you may have a good chance to
+re-use the same model. You can pass the `model` option to specify the
+preset model instead of PCI (and codec-) SSID look-up.
+
+What `model` option values are available depends on the codec chip.
+Check your codec chip from the codec proc file (see "Codec Proc-File"
+section below). It will show the vendor/product name of your codec
+chip. Then, see Documentation/sound/alsa/HD-Audio-Models.txt file,
+the section of HD-audio driver. You can find a list of codecs
+and `model` options belonging to each codec. For example, for Realtek
+ALC262 codec chip, pass `model=ultra` for devices that are compatible
+with Samsung Q1 Ultra.
+
+Thus, the first thing you can do for any brand-new, unsupported and
+non-working HD-audio hardware is to check HD-audio codec and several
+different `model` option values. If you have any luck, some of them
+might suit with your device well.
+
+There are a few special model option values:
+- when 'nofixup' is passed, the device-specific fixups in the codec
+ parser are skipped.
+- when `generic` is passed, the codec-specific parser is skipped and
+ only the generic parser is used.
+
+
+Speaker and Headphone Output
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+One of the most frequent (and obvious) bugs with HD-audio is the
+silent output from either or both of a built-in speaker and a
+headphone jack. In general, you should try a headphone output at
+first. A speaker output often requires more additional controls like
+the external amplifier bits. Thus a headphone output has a slightly
+better chance.
+
+Before making a bug report, double-check whether the mixer is set up
+correctly. The recent version of snd-hda-intel driver provides mostly
+"Master" volume control as well as "Front" volume (where Front
+indicates the front-channels). In addition, there can be individual
+"Headphone" and "Speaker" controls.
+
+Ditto for the speaker output. There can be "External Amplifier"
+switch on some codecs. Turn on this if present.
+
+Another related problem is the automatic mute of speaker output by
+headphone plugging. This feature is implemented in most cases, but
+not on every preset model or codec-support code.
+
+In anyway, try a different model option if you have such a problem.
+Some other models may match better and give you more matching
+functionality. If none of the available models works, send a bug
+report. See the bug report section for details.
+
+If you are masochistic enough to debug the driver problem, note the
+following:
+
+- The speaker (and the headphone, too) output often requires the
+ external amplifier. This can be set usually via EAPD verb or a
+ certain GPIO. If the codec pin supports EAPD, you have a better
+ chance via SET_EAPD_BTL verb (0x70c). On others, GPIO pin (mostly
+ it's either GPIO0 or GPIO1) may turn on/off EAPD.
+- Some Realtek codecs require special vendor-specific coefficients to
+ turn on the amplifier. See patch_realtek.c.
+- IDT codecs may have extra power-enable/disable controls on each
+ analog pin. See patch_sigmatel.c.
+- Very rare but some devices don't accept the pin-detection verb until
+ triggered. Issuing GET_PIN_SENSE verb (0xf09) may result in the
+ codec-communication stall. Some examples are found in
+ patch_realtek.c.
+
+
+Capture Problems
+~~~~~~~~~~~~~~~~
+The capture problems are often because of missing setups of mixers.
+Thus, before submitting a bug report, make sure that you set up the
+mixer correctly. For example, both "Capture Volume" and "Capture
+Switch" have to be set properly in addition to the right "Capture
+Source" or "Input Source" selection. Some devices have "Mic Boost"
+volume or switch.
+
+When the PCM device is opened via "default" PCM (without pulse-audio
+plugin), you'll likely have "Digital Capture Volume" control as well.
+This is provided for the extra gain/attenuation of the signal in
+software, especially for the inputs without the hardware volume
+control such as digital microphones. Unless really needed, this
+should be set to exactly 50%, corresponding to 0dB -- neither extra
+gain nor attenuation. When you use "hw" PCM, i.e., a raw access PCM,
+this control will have no influence, though.
+
+It's known that some codecs / devices have fairly bad analog circuits,
+and the recorded sound contains a certain DC-offset. This is no bug
+of the driver.
+
+Most of modern laptops have no analog CD-input connection. Thus, the
+recording from CD input won't work in many cases although the driver
+provides it as the capture source. Use CDDA instead.
+
+The automatic switching of the built-in and external mic per plugging
+is implemented on some codec models but not on every model. Partly
+because of my laziness but mostly lack of testers. Feel free to
+submit the improvement patch to the author.
+
+
+Direct Debugging
+~~~~~~~~~~~~~~~~
+If no model option gives you a better result, and you are a tough guy
+to fight against evil, try debugging via hitting the raw HD-audio
+codec verbs to the device. Some tools are available: hda-emu and
+hda-analyzer. The detailed description is found in the sections
+below. You'd need to enable hwdep for using these tools. See "Kernel
+Configuration" section.
+
+
+OTHER ISSUES
+------------
+
+Kernel Configuration
+~~~~~~~~~~~~~~~~~~~~
+In general, I recommend you to enable the sound debug option,
+`CONFIG_SND_DEBUG=y`, no matter whether you are debugging or not.
+This enables snd_printd() macro and others, and you'll get additional
+kernel messages at probing.
+
+In addition, you can enable `CONFIG_SND_DEBUG_VERBOSE=y`. But this
+will give you far more messages. Thus turn this on only when you are
+sure to want it.
+
+Don't forget to turn on the appropriate `CONFIG_SND_HDA_CODEC_*`
+options. Note that each of them corresponds to the codec chip, not
+the controller chip. Thus, even if lspci shows the Nvidia controller,
+you may need to choose the option for other vendors. If you are
+unsure, just select all yes.
+
+`CONFIG_SND_HDA_HWDEP` is a useful option for debugging the driver.
+When this is enabled, the driver creates hardware-dependent devices
+(one per each codec), and you have a raw access to the device via
+these device files. For example, `hwC0D2` will be created for the
+codec slot #2 of the first card (#0). For debug-tools such as
+hda-verb and hda-analyzer, the hwdep device has to be enabled.
+Thus, it'd be better to turn this on always.
+
+`CONFIG_SND_HDA_RECONFIG` is a new option, and this depends on the
+hwdep option above. When enabled, you'll have some sysfs files under
+the corresponding hwdep directory. See "HD-audio reconfiguration"
+section below.
+
+`CONFIG_SND_HDA_POWER_SAVE` option enables the power-saving feature.
+See "Power-saving" section below.
+
+
+Codec Proc-File
+~~~~~~~~~~~~~~~
+The codec proc-file is a treasure-chest for debugging HD-audio.
+It shows most of useful information of each codec widget.
+
+The proc file is located in /proc/asound/card*/codec#*, one file per
+each codec slot. You can know the codec vendor, product id and
+names, the type of each widget, capabilities and so on.
+This file, however, doesn't show the jack sensing state, so far. This
+is because the jack-sensing might be depending on the trigger state.
+
+This file will be picked up by the debug tools, and also it can be fed
+to the emulator as the primary codec information. See the debug tools
+section below.
+
+This proc file can be also used to check whether the generic parser is
+used. When the generic parser is used, the vendor/product ID name
+will appear as "Realtek ID 0262", instead of "Realtek ALC262".
+
+
+HD-Audio Reconfiguration
+~~~~~~~~~~~~~~~~~~~~~~~~
+This is an experimental feature to allow you re-configure the HD-audio
+codec dynamically without reloading the driver. The following sysfs
+files are available under each codec-hwdep device directory (e.g.
+/sys/class/sound/hwC0D0):
+
+vendor_id::
+ Shows the 32bit codec vendor-id hex number. You can change the
+ vendor-id value by writing to this file.
+subsystem_id::
+ Shows the 32bit codec subsystem-id hex number. You can change the
+ subsystem-id value by writing to this file.
+revision_id::
+ Shows the 32bit codec revision-id hex number. You can change the
+ revision-id value by writing to this file.
+afg::
+ Shows the AFG ID. This is read-only.
+mfg::
+ Shows the MFG ID. This is read-only.
+name::
+ Shows the codec name string. Can be changed by writing to this
+ file.
+modelname::
+ Shows the currently set `model` option. Can be changed by writing
+ to this file.
+init_verbs::
+ The extra verbs to execute at initialization. You can add a verb by
+ writing to this file. Pass three numbers: nid, verb and parameter
+ (separated with a space).
+hints::
+ Shows / stores hint strings for codec parsers for any use.
+ Its format is `key = value`. For example, passing `jack_detect = no`
+ will disable the jack detection of the machine completely.
+init_pin_configs::
+ Shows the initial pin default config values set by BIOS.
+driver_pin_configs::
+ Shows the pin default values set by the codec parser explicitly.
+ This doesn't show all pin values but only the changed values by
+ the parser. That is, if the parser doesn't change the pin default
+ config values by itself, this will contain nothing.
+user_pin_configs::
+ Shows the pin default config values to override the BIOS setup.
+ Writing this (with two numbers, NID and value) appends the new
+ value. The given will be used instead of the initial BIOS value at
+ the next reconfiguration time. Note that this config will override
+ even the driver pin configs, too.
+reconfig::
+ Triggers the codec re-configuration. When any value is written to
+ this file, the driver re-initialize and parses the codec tree
+ again. All the changes done by the sysfs entries above are taken
+ into account.
+clear::
+ Resets the codec, removes the mixer elements and PCM stuff of the
+ specified codec, and clear all init verbs and hints.
+
+For example, when you want to change the pin default configuration
+value of the pin widget 0x14 to 0x9993013f, and let the driver
+re-configure based on that state, run like below:
+------------------------------------------------------------------------
+ # echo 0x14 0x9993013f > /sys/class/sound/hwC0D0/user_pin_configs
+ # echo 1 > /sys/class/sound/hwC0D0/reconfig
+------------------------------------------------------------------------
+
+
+Hint Strings
+~~~~~~~~~~~~
+The codec parser have several switches and adjustment knobs for
+matching better with the actual codec or device behavior. Many of
+them can be adjusted dynamically via "hints" strings as mentioned in
+the section above. For example, by passing `jack_detect = no` string
+via sysfs or a patch file, you can disable the jack detection, thus
+the codec parser will skip the features like auto-mute or mic
+auto-switch. As a boolean value, either `yes`, `no`, `true`, `false`,
+`1` or `0` can be passed.
+
+The generic parser supports the following hints:
+
+- jack_detect (bool): specify whether the jack detection is available
+ at all on this machine; default true
+- inv_jack_detect (bool): indicates that the jack detection logic is
+ inverted
+- trigger_sense (bool): indicates that the jack detection needs the
+ explicit call of AC_VERB_SET_PIN_SENSE verb
+- inv_eapd (bool): indicates that the EAPD is implemented in the
+ inverted logic
+- pcm_format_first (bool): sets the PCM format before the stream tag
+ and channel ID
+- sticky_stream (bool): keep the PCM format, stream tag and ID as long
+ as possible; default true
+- spdif_status_reset (bool): reset the SPDIF status bits at each time
+ the SPDIF stream is set up
+- pin_amp_workaround (bool): the output pin may have multiple amp
+ values
+- single_adc_amp (bool): ADCs can have only single input amps
+- auto_mute (bool): enable/disable the headphone auto-mute feature;
+ default true
+- auto_mic (bool): enable/disable the mic auto-switch feature; default
+ true
+- line_in_auto_switch (bool): enable/disable the line-in auto-switch
+ feature; default false
+- need_dac_fix (bool): limits the DACs depending on the channel count
+- primary_hp (bool): probe headphone jacks as the primary outputs;
+ default true
+- multi_io (bool): try probing multi-I/O config (e.g. shared
+ line-in/surround, mic/clfe jacks)
+- multi_cap_vol (bool): provide multiple capture volumes
+- inv_dmic_split (bool): provide split internal mic volume/switch for
+ phase-inverted digital mics
+- indep_hp (bool): provide the independent headphone PCM stream and
+ the corresponding mixer control, if available
+- add_stereo_mix_input (bool): add the stereo mix (analog-loopback
+ mix) to the input mux if available
+- add_jack_modes (bool): add "xxx Jack Mode" enum controls to each
+ I/O jack for allowing to change the headphone amp and mic bias VREF
+ capabilities
+- power_save_node (bool): advanced power management for each widget,
+ controlling the power sate (D0/D3) of each widget node depending on
+ the actual pin and stream states
+- power_down_unused (bool): power down the unused widgets, a subset of
+ power_save_node, and will be dropped in future
+- add_hp_mic (bool): add the headphone to capture source if possible
+- hp_mic_detect (bool): enable/disable the hp/mic shared input for a
+ single built-in mic case; default true
+- mixer_nid (int): specifies the widget NID of the analog-loopback
+ mixer
+
+
+Early Patching
+~~~~~~~~~~~~~~
+When CONFIG_SND_HDA_PATCH_LOADER=y is set, you can pass a "patch" as a
+firmware file for modifying the HD-audio setup before initializing the
+codec. This can work basically like the reconfiguration via sysfs in
+the above, but it does it before the first codec configuration.
+
+A patch file is a plain text file which looks like below:
+
+------------------------------------------------------------------------
+ [codec]
+ 0x12345678 0xabcd1234 2
+
+ [model]
+ auto
+
+ [pincfg]
+ 0x12 0x411111f0
+
+ [verb]
+ 0x20 0x500 0x03
+ 0x20 0x400 0xff
+
+ [hint]
+ jack_detect = no
+------------------------------------------------------------------------
+
+The file needs to have a line `[codec]`. The next line should contain
+three numbers indicating the codec vendor-id (0x12345678 in the
+example), the codec subsystem-id (0xabcd1234) and the address (2) of
+the codec. The rest patch entries are applied to this specified codec
+until another codec entry is given. Passing 0 or a negative number to
+the first or the second value will make the check of the corresponding
+field be skipped. It'll be useful for really broken devices that don't
+initialize SSID properly.
+
+The `[model]` line allows to change the model name of the each codec.
+In the example above, it will be changed to model=auto.
+Note that this overrides the module option.
+
+After the `[pincfg]` line, the contents are parsed as the initial
+default pin-configurations just like `user_pin_configs` sysfs above.
+The values can be shown in user_pin_configs sysfs file, too.
+
+Similarly, the lines after `[verb]` are parsed as `init_verbs`
+sysfs entries, and the lines after `[hint]` are parsed as `hints`
+sysfs entries, respectively.
+
+Another example to override the codec vendor id from 0x12345678 to
+0xdeadbeef is like below:
+------------------------------------------------------------------------
+ [codec]
+ 0x12345678 0xabcd1234 2
+
+ [vendor_id]
+ 0xdeadbeef
+------------------------------------------------------------------------
+
+In the similar way, you can override the codec subsystem_id via
+`[subsystem_id]`, the revision id via `[revision_id]` line.
+Also, the codec chip name can be rewritten via `[chip_name]` line.
+------------------------------------------------------------------------
+ [codec]
+ 0x12345678 0xabcd1234 2
+
+ [subsystem_id]
+ 0xffff1111
+
+ [revision_id]
+ 0x10
+
+ [chip_name]
+ My-own NEWS-0002
+------------------------------------------------------------------------
+
+The hd-audio driver reads the file via request_firmware(). Thus,
+a patch file has to be located on the appropriate firmware path,
+typically, /lib/firmware. For example, when you pass the option
+`patch=hda-init.fw`, the file /lib/firmware/hda-init.fw must be
+present.
+
+The patch module option is specific to each card instance, and you
+need to give one file name for each instance, separated by commas.
+For example, if you have two cards, one for an on-board analog and one
+for an HDMI video board, you may pass patch option like below:
+------------------------------------------------------------------------
+ options snd-hda-intel patch=on-board-patch,hdmi-patch
+------------------------------------------------------------------------
+
+
+Power-Saving
+~~~~~~~~~~~~
+The power-saving is a kind of auto-suspend of the device. When the
+device is inactive for a certain time, the device is automatically
+turned off to save the power. The time to go down is specified via
+`power_save` module option, and this option can be changed dynamically
+via sysfs.
+
+The power-saving won't work when the analog loopback is enabled on
+some codecs. Make sure that you mute all unneeded signal routes when
+you want the power-saving.
+
+The power-saving feature might cause audible click noises at each
+power-down/up depending on the device. Some of them might be
+solvable, but some are hard, I'm afraid. Some distros such as
+openSUSE enables the power-saving feature automatically when the power
+cable is unplugged. Thus, if you hear noises, suspect first the
+power-saving. See /sys/module/snd_hda_intel/parameters/power_save to
+check the current value. If it's non-zero, the feature is turned on.
+
+The recent kernel supports the runtime PM for the HD-audio controller
+chip, too. It means that the HD-audio controller is also powered up /
+down dynamically. The feature is enabled only for certain controller
+chips like Intel LynxPoint. You can enable/disable this feature
+forcibly by setting `power_save_controller` option, which is also
+available at /sys/module/snd_hda_intel/parameters directory.
+
+
+Tracepoints
+~~~~~~~~~~~
+The hd-audio driver gives a few basic tracepoints.
+`hda:hda_send_cmd` traces each CORB write while `hda:hda_get_response`
+traces the response from RIRB (only when read from the codec driver).
+`hda:hda_bus_reset` traces the bus-reset due to fatal error, etc,
+`hda:hda_unsol_event` traces the unsolicited events, and
+`hda:hda_power_down` and `hda:hda_power_up` trace the power down/up
+via power-saving behavior.
+
+Enabling all tracepoints can be done like
+------------------------------------------------------------------------
+ # echo 1 > /sys/kernel/debug/tracing/events/hda/enable
+------------------------------------------------------------------------
+then after some commands, you can traces from
+/sys/kernel/debug/tracing/trace file. For example, when you want to
+trace what codec command is sent, enable the tracepoint like:
+------------------------------------------------------------------------
+ # cat /sys/kernel/debug/tracing/trace
+ # tracer: nop
+ #
+ # TASK-PID CPU# TIMESTAMP FUNCTION
+ # | | | | |
+ <...>-7807 [002] 105147.774889: hda_send_cmd: [0:0] val=e3a019
+ <...>-7807 [002] 105147.774893: hda_send_cmd: [0:0] val=e39019
+ <...>-7807 [002] 105147.999542: hda_send_cmd: [0:0] val=e3a01a
+ <...>-7807 [002] 105147.999543: hda_send_cmd: [0:0] val=e3901a
+ <...>-26764 [001] 349222.837143: hda_send_cmd: [0:0] val=e3a019
+ <...>-26764 [001] 349222.837148: hda_send_cmd: [0:0] val=e39019
+ <...>-26764 [001] 349223.058539: hda_send_cmd: [0:0] val=e3a01a
+ <...>-26764 [001] 349223.058541: hda_send_cmd: [0:0] val=e3901a
+------------------------------------------------------------------------
+Here `[0:0]` indicates the card number and the codec address, and
+`val` shows the value sent to the codec, respectively. The value is
+a packed value, and you can decode it via hda-decode-verb program
+included in hda-emu package below. For example, the value e3a019 is
+to set the left output-amp value to 25.
+------------------------------------------------------------------------
+ % hda-decode-verb 0xe3a019
+ raw value = 0x00e3a019
+ cid = 0, nid = 0x0e, verb = 0x3a0, parm = 0x19
+ raw value: verb = 0x3a0, parm = 0x19
+ verbname = set_amp_gain_mute
+ amp raw val = 0xa019
+ output, left, idx=0, mute=0, val=25
+------------------------------------------------------------------------
+
+
+Development Tree
+~~~~~~~~~~~~~~~~
+The latest development codes for HD-audio are found on sound git tree:
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound.git
+
+The master branch or for-next branches can be used as the main
+development branches in general while the development for the current
+and next kernels are found in for-linus and for-next branches,
+respectively.
+
+If you are using the latest Linus tree, it'd be better to pull the
+above GIT tree onto it. If you are using the older kernels, an easy
+way to try the latest ALSA code is to build from the snapshot
+tarball. There are daily tarballs and the latest snapshot tarball.
+All can be built just like normal alsa-driver release packages, that
+is, installed via the usual spells: configure, make and make
+install(-modules). See INSTALL in the package. The snapshot tarballs
+are found at:
+
+- ftp://ftp.suse.com/pub/people/tiwai/snapshot/
+
+
+Sending a Bug Report
+~~~~~~~~~~~~~~~~~~~~
+If any model or module options don't work for your device, it's time
+to send a bug report to the developers. Give the following in your
+bug report:
+
+- Hardware vendor, product and model names
+- Kernel version (and ALSA-driver version if you built externally)
+- `alsa-info.sh` output; run with `--no-upload` option. See the
+ section below about alsa-info
+
+If it's a regression, at best, send alsa-info outputs of both working
+and non-working kernels. This is really helpful because we can
+compare the codec registers directly.
+
+Send a bug report either the followings:
+
+kernel-bugzilla::
+ https://bugzilla.kernel.org/
+alsa-devel ML::
+ alsa-devel@alsa-project.org
+
+
+DEBUG TOOLS
+-----------
+
+This section describes some tools available for debugging HD-audio
+problems.
+
+alsa-info
+~~~~~~~~~
+The script `alsa-info.sh` is a very useful tool to gather the audio
+device information. You can fetch the latest version from:
+
+- http://www.alsa-project.org/alsa-info.sh
+
+Run this script as root, and it will gather the important information
+such as the module lists, module parameters, proc file contents
+including the codec proc files, mixer outputs and the control
+elements. As default, it will store the information onto a web server
+on alsa-project.org. But, if you send a bug report, it'd be better to
+run with `--no-upload` option, and attach the generated file.
+
+There are some other useful options. See `--help` option output for
+details.
+
+When a probe error occurs or when the driver obviously assigns a
+mismatched model, it'd be helpful to load the driver with
+`probe_only=1` option (at best after the cold reboot) and run
+alsa-info at this state. With this option, the driver won't configure
+the mixer and PCM but just tries to probe the codec slot. After
+probing, the proc file is available, so you can get the raw codec
+information before modified by the driver. Of course, the driver
+isn't usable with `probe_only=1`. But you can continue the
+configuration via hwdep sysfs file if hda-reconfig option is enabled.
+Using `probe_only` mask 2 skips the reset of HDA codecs (use
+`probe_only=3` as module option). The hwdep interface can be used
+to determine the BIOS codec initialization.
+
+
+hda-verb
+~~~~~~~~
+hda-verb is a tiny program that allows you to access the HD-audio
+codec directly. You can execute a raw HD-audio codec verb with this.
+This program accesses the hwdep device, thus you need to enable the
+kernel config `CONFIG_SND_HDA_HWDEP=y` beforehand.
+
+The hda-verb program takes four arguments: the hwdep device file, the
+widget NID, the verb and the parameter. When you access to the codec
+on the slot 2 of the card 0, pass /dev/snd/hwC0D2 to the first
+argument, typically. (However, the real path name depends on the
+system.)
+
+The second parameter is the widget number-id to access. The third
+parameter can be either a hex/digit number or a string corresponding
+to a verb. Similarly, the last parameter is the value to write, or
+can be a string for the parameter type.
+
+------------------------------------------------------------------------
+ % hda-verb /dev/snd/hwC0D0 0x12 0x701 2
+ nid = 0x12, verb = 0x701, param = 0x2
+ value = 0x0
+
+ % hda-verb /dev/snd/hwC0D0 0x0 PARAMETERS VENDOR_ID
+ nid = 0x0, verb = 0xf00, param = 0x0
+ value = 0x10ec0262
+
+ % hda-verb /dev/snd/hwC0D0 2 set_a 0xb080
+ nid = 0x2, verb = 0x300, param = 0xb080
+ value = 0x0
+------------------------------------------------------------------------
+
+Although you can issue any verbs with this program, the driver state
+won't be always updated. For example, the volume values are usually
+cached in the driver, and thus changing the widget amp value directly
+via hda-verb won't change the mixer value.
+
+The hda-verb program is included now in alsa-tools:
+
+- git://git.alsa-project.org/alsa-tools.git
+
+Also, the old stand-alone package is found in the ftp directory:
+
+- ftp://ftp.suse.com/pub/people/tiwai/misc/
+
+Also a git repository is available:
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/hda-verb.git
+
+See README file in the tarball for more details about hda-verb
+program.
+
+
+hda-analyzer
+~~~~~~~~~~~~
+hda-analyzer provides a graphical interface to access the raw HD-audio
+control, based on pyGTK2 binding. It's a more powerful version of
+hda-verb. The program gives you an easy-to-use GUI stuff for showing
+the widget information and adjusting the amp values, as well as the
+proc-compatible output.
+
+The hda-analyzer:
+
+- http://git.alsa-project.org/?p=alsa.git;a=tree;f=hda-analyzer
+
+is a part of alsa.git repository in alsa-project.org:
+
+- git://git.alsa-project.org/alsa.git
+
+Codecgraph
+~~~~~~~~~~
+Codecgraph is a utility program to generate a graph and visualizes the
+codec-node connection of a codec chip. It's especially useful when
+you analyze or debug a codec without a proper datasheet. The program
+parses the given codec proc file and converts to SVG via graphiz
+program.
+
+The tarball and GIT trees are found in the web page at:
+
+- http://helllabs.org/codecgraph/
+
+
+hda-emu
+~~~~~~~
+hda-emu is an HD-audio emulator. The main purpose of this program is
+to debug an HD-audio codec without the real hardware. Thus, it
+doesn't emulate the behavior with the real audio I/O, but it just
+dumps the codec register changes and the ALSA-driver internal changes
+at probing and operating the HD-audio driver.
+
+The program requires a codec proc-file to simulate. Get a proc file
+for the target codec beforehand, or pick up an example codec from the
+codec proc collections in the tarball. Then, run the program with the
+proc file, and the hda-emu program will start parsing the codec file
+and simulates the HD-audio driver:
+
+------------------------------------------------------------------------
+ % hda-emu codecs/stac9200-dell-d820-laptop
+ # Parsing..
+ hda_codec: Unknown model for STAC9200, using BIOS defaults
+ hda_codec: pin nid 08 bios pin config 40c003fa
+ ....
+------------------------------------------------------------------------
+
+The program gives you only a very dumb command-line interface. You
+can get a proc-file dump at the current state, get a list of control
+(mixer) elements, set/get the control element value, simulate the PCM
+operation, the jack plugging simulation, etc.
+
+The package is found in:
+
+- ftp://ftp.suse.com/pub/people/tiwai/misc/
+
+A git repository is available:
+
+- git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/hda-emu.git
+
+See README file in the tarball for more details about hda-emu
+program.
+
+
+hda-jack-retask
+~~~~~~~~~~~~~~~
+hda-jack-retask is a user-friendly GUI program to manipulate the
+HD-audio pin control for jack retasking. If you have a problem about
+the jack assignment, try this program and check whether you can get
+useful results. Once when you figure out the proper pin assignment,
+it can be fixed either in the driver code statically or via passing a
+firmware patch file (see "Early Patching" section).
+
+The program is included in alsa-tools now:
+
+- git://git.alsa-project.org/alsa-tools.git
+
diff --git a/Documentation/sound/alsa/Joystick.txt b/Documentation/sound/alsa/Joystick.txt
new file mode 100644
index 000000000..ccda41b10
--- /dev/null
+++ b/Documentation/sound/alsa/Joystick.txt
@@ -0,0 +1,86 @@
+Analog Joystick Support on ALSA Drivers
+=======================================
+ Oct. 14, 2003
+ Takashi Iwai <tiwai@suse.de>
+
+General
+-------
+
+First of all, you need to enable GAMEPORT support on Linux kernel for
+using a joystick with the ALSA driver. For the details of gameport
+support, refer to Documentation/input/joystick.txt.
+
+The joystick support of ALSA drivers is different between ISA and PCI
+cards. In the case of ISA (PnP) cards, it's usually handled by the
+independent module (ns558). Meanwhile, the ALSA PCI drivers have the
+built-in gameport support. Hence, when the ALSA PCI driver is built
+in the kernel, CONFIG_GAMEPORT must be 'y', too. Otherwise, the
+gameport support on that card will be (silently) disabled.
+
+Some adapter modules probe the physical connection of the device at
+the load time. It'd be safer to plug in the joystick device before
+loading the module.
+
+
+PCI Cards
+---------
+
+For PCI cards, the joystick is enabled when the appropriate module
+option is specified. Some drivers don't need options, and the
+joystick support is always enabled. In the former ALSA version, there
+was a dynamic control API for the joystick activation. It was
+changed, however, to the static module options because of the system
+stability and the resource management.
+
+The following PCI drivers support the joystick natively.
+
+ Driver Module Option Available Values
+ ---------------------------------------------------------------------------
+ als4000 joystick_port 0 = disable (default), 1 = auto-detect,
+ manual: any address (e.g. 0x200)
+ au88x0 N/A N/A
+ azf3328 joystick 0 = disable, 1 = enable, -1 = auto (default)
+ ens1370 joystick 0 = disable (default), 1 = enable
+ ens1371 joystick_port 0 = disable (default), 1 = auto-detect,
+ manual: 0x200, 0x208, 0x210, 0x218
+ cmipci joystick_port 0 = disable (default), 1 = auto-detect,
+ manual: any address (e.g. 0x200)
+ cs4281 N/A N/A
+ cs46xx N/A N/A
+ es1938 N/A N/A
+ es1968 joystick 0 = disable (default), 1 = enable
+ sonicvibes N/A N/A
+ trident N/A N/A
+ via82xx(*1) joystick 0 = disable (default), 1 = enable
+ ymfpci joystick_port 0 = disable (default), 1 = auto-detect,
+ manual: 0x201, 0x202, 0x204, 0x205(*2)
+ ---------------------------------------------------------------------------
+
+ *1) VIA686A/B only
+ *2) With YMF744/754 chips, the port address can be chosen arbitrarily
+
+The following drivers don't support gameport natively, but there are
+additional modules. Load the corresponding module to add the gameport
+support.
+
+ Driver Additional Module
+ -----------------------------
+ emu10k1 emu10k1-gp
+ fm801 fm801-gp
+ -----------------------------
+
+Note: the "pcigame" and "cs461x" modules are for the OSS drivers only.
+ These ALSA drivers (cs46xx, trident and au88x0) have the
+ built-in gameport support.
+
+As mentioned above, ALSA PCI drivers have the built-in gameport
+support, so you don't have to load ns558 module. Just load "joydev"
+and the appropriate adapter module (e.g. "analog").
+
+
+ISA Cards
+---------
+
+ALSA ISA drivers don't have the built-in gameport support.
+Instead, you need to load "ns558" module in addition to "joydev" and
+the adapter module (e.g. "analog").
diff --git a/Documentation/sound/alsa/MIXART.txt b/Documentation/sound/alsa/MIXART.txt
new file mode 100644
index 000000000..4ee35b4fb
--- /dev/null
+++ b/Documentation/sound/alsa/MIXART.txt
@@ -0,0 +1,100 @@
+ Alsa driver for Digigram miXart8 and miXart8AES/EBU soundcards
+ Digigram <alsa@digigram.com>
+
+
+GENERAL
+=======
+
+The miXart8 is a multichannel audio processing and mixing soundcard
+that has 4 stereo audio inputs and 4 stereo audio outputs.
+The miXart8AES/EBU is the same with a add-on card that offers further
+4 digital stereo audio inputs and outputs.
+Furthermore the add-on card offers external clock synchronisation
+(AES/EBU, Word Clock, Time Code and Video Synchro)
+
+The mainboard has a PowerPC that offers onboard mpeg encoding and
+decoding, samplerate conversions and various effects.
+
+The driver don't work properly at all until the certain firmwares
+are loaded, i.e. no PCM nor mixer devices will appear.
+Use the mixartloader that can be found in the alsa-tools package.
+
+
+VERSION 0.1.0
+=============
+
+One miXart8 board will be represented as 4 alsa cards, each with 1
+stereo analog capture 'pcm0c' and 1 stereo analog playback 'pcm0p' device.
+With a miXart8AES/EBU there is in addition 1 stereo digital input
+'pcm1c' and 1 stereo digital output 'pcm1p' per card.
+
+Formats
+-------
+U8, S16_LE, S16_BE, S24_3LE, S24_3BE, FLOAT_LE, FLOAT_BE
+Sample rates : 8000 - 48000 Hz continuously
+
+Playback
+--------
+For instance the playback devices are configured to have max. 4
+substreams performing hardware mixing. This could be changed to a
+maximum of 24 substreams if wished.
+Mono files will be played on the left and right channel. Each channel
+can be muted for each stream to use 8 analog/digital outputs separately.
+
+Capture
+-------
+There is one substream per capture device. For instance only stereo
+formats are supported.
+
+Mixer
+-----
+<Master> and <Master Capture> : analog volume control of playback and capture PCM.
+<PCM 0-3> and <PCM Capture> : digital volume control of each analog substream.
+<AES 0-3> and <AES Capture> : digital volume control of each AES/EBU substream.
+<Monitoring> : Loopback from 'pcm0c' to 'pcm0p' with digital volume
+and mute control.
+
+Rem : for best audio quality try to keep a 0 attenuation on the PCM
+and AES volume controls which is set by 219 in the range from 0 to 255
+(about 86% with alsamixer)
+
+
+NOT YET IMPLEMENTED
+===================
+
+- external clock support (AES/EBU, Word Clock, Time Code, Video Sync)
+- MPEG audio formats
+- mono record
+- on-board effects and samplerate conversions
+- linked streams
+
+
+FIRMWARE
+========
+
+[As of 2.6.11, the firmware can be loaded automatically with hotplug
+ when CONFIG_FW_LOADER is set. The mixartloader is necessary only
+ for older versions or when you build the driver into kernel.]
+
+For loading the firmware automatically after the module is loaded, use a
+install command. For example, add the following entry to
+/etc/modprobe.d/mixart.conf for miXart driver:
+
+ install snd-mixart /sbin/modprobe --first-time -i snd-mixart && \
+ /usr/bin/mixartloader
+(for 2.2/2.4 kernels, add "post-install snd-mixart /usr/bin/vxloader" to
+ /etc/modules.conf, instead.)
+
+The firmware binaries are installed on /usr/share/alsa/firmware
+(or /usr/local/share/alsa/firmware, depending to the prefix option of
+configure). There will be a miXart.conf file, which define the dsp image
+files.
+
+The firmware files are copyright by Digigram SA
+
+
+COPYRIGHT
+=========
+
+Copyright (c) 2003 Digigram SA <alsa@digigram.com>
+Distributable under GPL.
diff --git a/Documentation/sound/alsa/OSS-Emulation.txt b/Documentation/sound/alsa/OSS-Emulation.txt
new file mode 100644
index 000000000..152ca2a3f
--- /dev/null
+++ b/Documentation/sound/alsa/OSS-Emulation.txt
@@ -0,0 +1,305 @@
+ NOTES ON KERNEL OSS-EMULATION
+ =============================
+
+ Jan. 22, 2004 Takashi Iwai <tiwai@suse.de>
+
+
+Modules
+=======
+
+ALSA provides a powerful OSS emulation on the kernel.
+The OSS emulation for PCM, mixer and sequencer devices is implemented
+as add-on kernel modules, snd-pcm-oss, snd-mixer-oss and snd-seq-oss.
+When you need to access the OSS PCM, mixer or sequencer devices, the
+corresponding module has to be loaded.
+
+These modules are loaded automatically when the corresponding service
+is called. The alias is defined sound-service-x-y, where x and y are
+the card number and the minor unit number. Usually you don't have to
+define these aliases by yourself.
+
+Only necessary step for auto-loading of OSS modules is to define the
+card alias in /etc/modprobe.d/alsa.conf, such as
+
+ alias sound-slot-0 snd-emu10k1
+
+As the second card, define sound-slot-1 as well.
+Note that you can't use the aliased name as the target name (i.e.
+"alias sound-slot-0 snd-card-0" doesn't work any more like the old
+modutils).
+
+The currently available OSS configuration is shown in
+/proc/asound/oss/sndstat. This shows in the same syntax of
+/dev/sndstat, which is available on the commercial OSS driver.
+On ALSA, you can symlink /dev/sndstat to this proc file.
+
+Please note that the devices listed in this proc file appear only
+after the corresponding OSS-emulation module is loaded. Don't worry
+even if "NOT ENABLED IN CONFIG" is shown in it.
+
+
+Device Mapping
+==============
+
+ALSA supports the following OSS device files:
+
+ PCM:
+ /dev/dspX
+ /dev/adspX
+
+ Mixer:
+ /dev/mixerX
+
+ MIDI:
+ /dev/midi0X
+ /dev/amidi0X
+
+ Sequencer:
+ /dev/sequencer
+ /dev/sequencer2 (aka /dev/music)
+
+where X is the card number from 0 to 7.
+
+(NOTE: Some distributions have the device files like /dev/midi0 and
+ /dev/midi1. They are NOT for OSS but for tclmidi, which is
+ a totally different thing.)
+
+Unlike the real OSS, ALSA cannot use the device files more than the
+assigned ones. For example, the first card cannot use /dev/dsp1 or
+/dev/dsp2, but only /dev/dsp0 and /dev/adsp0.
+
+As seen above, PCM and MIDI may have two devices. Usually, the first
+PCM device (hw:0,0 in ALSA) is mapped to /dev/dsp and the secondary
+device (hw:0,1) to /dev/adsp (if available). For MIDI, /dev/midi and
+/dev/amidi, respectively.
+
+You can change this device mapping via the module options of
+snd-pcm-oss and snd-rawmidi. In the case of PCM, the following
+options are available for snd-pcm-oss:
+
+ dsp_map PCM device number assigned to /dev/dspX
+ (default = 0)
+ adsp_map PCM device number assigned to /dev/adspX
+ (default = 1)
+
+For example, to map the third PCM device (hw:0,2) to /dev/adsp0,
+define like this:
+
+ options snd-pcm-oss adsp_map=2
+
+The options take arrays. For configuring the second card, specify
+two entries separated by comma. For example, to map the third PCM
+device on the second card to /dev/adsp1, define like below:
+
+ options snd-pcm-oss adsp_map=0,2
+
+To change the mapping of MIDI devices, the following options are
+available for snd-rawmidi:
+
+ midi_map MIDI device number assigned to /dev/midi0X
+ (default = 0)
+ amidi_map MIDI device number assigned to /dev/amidi0X
+ (default = 1)
+
+For example, to assign the third MIDI device on the first card to
+/dev/midi00, define as follows:
+
+ options snd-rawmidi midi_map=2
+
+
+PCM Mode
+========
+
+As default, ALSA emulates the OSS PCM with so-called plugin layer,
+i.e. tries to convert the sample format, rate or channels
+automatically when the card doesn't support it natively.
+This will lead to some problems for some applications like quake or
+wine, especially if they use the card only in the MMAP mode.
+
+In such a case, you can change the behavior of PCM per application by
+writing a command to the proc file. There is a proc file for each PCM
+stream, /proc/asound/cardX/pcmY[cp]/oss, where X is the card number
+(zero-based), Y the PCM device number (zero-based), and 'p' is for
+playback and 'c' for capture, respectively. Note that this proc file
+exists only after snd-pcm-oss module is loaded.
+
+The command sequence has the following syntax:
+
+ app_name fragments fragment_size [options]
+
+app_name is the name of application with (higher priority) or without
+path.
+fragments specifies the number of fragments or zero if no specific
+number is given.
+fragment_size is the size of fragment in bytes or zero if not given.
+options is the optional parameters. The following options are
+available:
+
+ disable the application tries to open a pcm device for
+ this channel but does not want to use it.
+ direct don't use plugins
+ block force block open mode
+ non-block force non-block open mode
+ partial-frag write also partial fragments (affects playback only)
+ no-silence do not fill silence ahead to avoid clicks
+
+The disable option is useful when one stream direction (playback or
+capture) is not handled correctly by the application although the
+hardware itself does support both directions.
+The direct option is used, as mentioned above, to bypass the automatic
+conversion and useful for MMAP-applications.
+For example, to playback the first PCM device without plugins for
+quake, send a command via echo like the following:
+
+ % echo "quake 0 0 direct" > /proc/asound/card0/pcm0p/oss
+
+While quake wants only playback, you may append the second command
+to notify driver that only this direction is about to be allocated:
+
+ % echo "quake 0 0 disable" > /proc/asound/card0/pcm0c/oss
+
+The permission of proc files depend on the module options of snd.
+As default it's set as root, so you'll likely need to be superuser for
+sending the command above.
+
+The block and non-block options are used to change the behavior of
+opening the device file.
+
+As default, ALSA behaves as original OSS drivers, i.e. does not block
+the file when it's busy. The -EBUSY error is returned in this case.
+
+This blocking behavior can be changed globally via nonblock_open
+module option of snd-pcm-oss. For using the blocking mode as default
+for OSS devices, define like the following:
+
+ options snd-pcm-oss nonblock_open=0
+
+The partial-frag and no-silence commands have been added recently.
+Both commands are for optimization use only. The former command
+specifies to invoke the write transfer only when the whole fragment is
+filled. The latter stops writing the silence data ahead
+automatically. Both are disabled as default.
+
+You can check the currently defined configuration by reading the proc
+file. The read image can be sent to the proc file again, hence you
+can save the current configuration
+
+ % cat /proc/asound/card0/pcm0p/oss > /somewhere/oss-cfg
+
+and restore it like
+
+ % cat /somewhere/oss-cfg > /proc/asound/card0/pcm0p/oss
+
+Also, for clearing all the current configuration, send "erase" command
+as below:
+
+ % echo "erase" > /proc/asound/card0/pcm0p/oss
+
+
+Mixer Elements
+==============
+
+Since ALSA has completely different mixer interface, the emulation of
+OSS mixer is relatively complicated. ALSA builds up a mixer element
+from several different ALSA (mixer) controls based on the name
+string. For example, the volume element SOUND_MIXER_PCM is composed
+from "PCM Playback Volume" and "PCM Playback Switch" controls for the
+playback direction and from "PCM Capture Volume" and "PCM Capture
+Switch" for the capture directory (if exists). When the PCM volume of
+OSS is changed, all the volume and switch controls above are adjusted
+automatically.
+
+As default, ALSA uses the following control for OSS volumes:
+
+ OSS volume ALSA control Index
+ -----------------------------------------------------
+ SOUND_MIXER_VOLUME Master 0
+ SOUND_MIXER_BASS Tone Control - Bass 0
+ SOUND_MIXER_TREBLE Tone Control - Treble 0
+ SOUND_MIXER_SYNTH Synth 0
+ SOUND_MIXER_PCM PCM 0
+ SOUND_MIXER_SPEAKER PC Speaker 0
+ SOUND_MIXER_LINE Line 0
+ SOUND_MIXER_MIC Mic 0
+ SOUND_MIXER_CD CD 0
+ SOUND_MIXER_IMIX Monitor Mix 0
+ SOUND_MIXER_ALTPCM PCM 1
+ SOUND_MIXER_RECLEV (not assigned)
+ SOUND_MIXER_IGAIN Capture 0
+ SOUND_MIXER_OGAIN Playback 0
+ SOUND_MIXER_LINE1 Aux 0
+ SOUND_MIXER_LINE2 Aux 1
+ SOUND_MIXER_LINE3 Aux 2
+ SOUND_MIXER_DIGITAL1 Digital 0
+ SOUND_MIXER_DIGITAL2 Digital 1
+ SOUND_MIXER_DIGITAL3 Digital 2
+ SOUND_MIXER_PHONEIN Phone 0
+ SOUND_MIXER_PHONEOUT Phone 1
+ SOUND_MIXER_VIDEO Video 0
+ SOUND_MIXER_RADIO Radio 0
+ SOUND_MIXER_MONITOR Monitor 0
+
+The second column is the base-string of the corresponding ALSA
+control. In fact, the controls with "XXX [Playback|Capture]
+[Volume|Switch]" will be checked in addition.
+
+The current assignment of these mixer elements is listed in the proc
+file, /proc/asound/cardX/oss_mixer, which will be like the following
+
+ VOLUME "Master" 0
+ BASS "" 0
+ TREBLE "" 0
+ SYNTH "" 0
+ PCM "PCM" 0
+ ...
+
+where the first column is the OSS volume element, the second column
+the base-string of the corresponding ALSA control, and the third the
+control index. When the string is empty, it means that the
+corresponding OSS control is not available.
+
+For changing the assignment, you can write the configuration to this
+proc file. For example, to map "Wave Playback" to the PCM volume,
+send the command like the following:
+
+ % echo 'VOLUME "Wave Playback" 0' > /proc/asound/card0/oss_mixer
+
+The command is exactly as same as listed in the proc file. You can
+change one or more elements, one volume per line. In the last
+example, both "Wave Playback Volume" and "Wave Playback Switch" will
+be affected when PCM volume is changed.
+
+Like the case of PCM proc file, the permission of proc files depend on
+the module options of snd. you'll likely need to be superuser for
+sending the command above.
+
+As well as in the case of PCM proc file, you can save and restore the
+current mixer configuration by reading and writing the whole file
+image.
+
+
+Duplex Streams
+==============
+
+Note that when attempting to use a single device file for playback and
+capture, the OSS API provides no way to set the format, sample rate or
+number of channels different in each direction. Thus
+ io_handle = open("device", O_RDWR)
+will only function correctly if the values are the same in each direction.
+
+To use different values in the two directions, use both
+ input_handle = open("device", O_RDONLY)
+ output_handle = open("device", O_WRONLY)
+and set the values for the corresponding handle.
+
+
+Unsupported Features
+====================
+
+MMAP on ICE1712 driver
+----------------------
+ICE1712 supports only the unconventional format, interleaved
+10-channels 24bit (packed in 32bit) format. Therefore you cannot mmap
+the buffer as the conventional (mono or 2-channels, 8 or 16bit) format
+on OSS.
+
diff --git a/Documentation/sound/alsa/Procfile.txt b/Documentation/sound/alsa/Procfile.txt
new file mode 100644
index 000000000..7f8a0d325
--- /dev/null
+++ b/Documentation/sound/alsa/Procfile.txt
@@ -0,0 +1,234 @@
+ Proc Files of ALSA Drivers
+ ==========================
+ Takashi Iwai <tiwai@suse.de>
+
+General
+-------
+
+ALSA has its own proc tree, /proc/asound. Many useful information are
+found in this tree. When you encounter a problem and need debugging,
+check the files listed in the following sections.
+
+Each card has its subtree cardX, where X is from 0 to 7. The
+card-specific files are stored in the card* subdirectories.
+
+
+Global Information
+------------------
+
+cards
+ Shows the list of currently configured ALSA drivers,
+ index, the id string, short and long descriptions.
+
+version
+ Shows the version string and compile date.
+
+modules
+ Lists the module of each card
+
+devices
+ Lists the ALSA native device mappings.
+
+meminfo
+ Shows the status of allocated pages via ALSA drivers.
+ Appears only when CONFIG_SND_DEBUG=y.
+
+hwdep
+ Lists the currently available hwdep devices in format of
+ <card>-<device>: <name>
+
+pcm
+ Lists the currently available PCM devices in format of
+ <card>-<device>: <id>: <name> : <sub-streams>
+
+timer
+ Lists the currently available timer devices
+
+
+oss/devices
+ Lists the OSS device mappings.
+
+oss/sndstat
+ Provides the output compatible with /dev/sndstat.
+ You can symlink this to /dev/sndstat.
+
+
+Card Specific Files
+-------------------
+
+The card-specific files are found in /proc/asound/card* directories.
+Some drivers (e.g. cmipci) have their own proc entries for the
+register dump, etc (e.g. /proc/asound/card*/cmipci shows the register
+dump). These files would be really helpful for debugging.
+
+When PCM devices are available on this card, you can see directories
+like pcm0p or pcm1c. They hold the PCM information for each PCM
+stream. The number after 'pcm' is the PCM device number from 0, and
+the last 'p' or 'c' means playback or capture direction. The files in
+this subtree is described later.
+
+The status of MIDI I/O is found in midi* files. It shows the device
+name and the received/transmitted bytes through the MIDI device.
+
+When the card is equipped with AC97 codecs, there are codec97#*
+subdirectories (described later).
+
+When the OSS mixer emulation is enabled (and the module is loaded),
+oss_mixer file appears here, too. This shows the current mapping of
+OSS mixer elements to the ALSA control elements. You can change the
+mapping by writing to this device. Read OSS-Emulation.txt for
+details.
+
+
+PCM Proc Files
+--------------
+
+card*/pcm*/info
+ The general information of this PCM device: card #, device #,
+ substreams, etc.
+
+card*/pcm*/xrun_debug
+ This file appears when CONFIG_SND_DEBUG=y and
+ CONFIG_PCM_XRUN_DEBUG=y.
+ This shows the status of xrun (= buffer overrun/xrun) and
+ invalid PCM position debug/check of ALSA PCM middle layer.
+ It takes an integer value, can be changed by writing to this
+ file, such as
+
+ # echo 5 > /proc/asound/card0/pcm0p/xrun_debug
+
+ The value consists of the following bit flags:
+ bit 0 = Enable XRUN/jiffies debug messages
+ bit 1 = Show stack trace at XRUN / jiffies check
+ bit 2 = Enable additional jiffies check
+
+ When the bit 0 is set, the driver will show the messages to
+ kernel log when an xrun is detected. The debug message is
+ shown also when the invalid H/W pointer is detected at the
+ update of periods (usually called from the interrupt
+ handler).
+
+ When the bit 1 is set, the driver will show the stack trace
+ additionally. This may help the debugging.
+
+ Since 2.6.30, this option can enable the hwptr check using
+ jiffies. This detects spontaneous invalid pointer callback
+ values, but can be lead to too much corrections for a (mostly
+ buggy) hardware that doesn't give smooth pointer updates.
+ This feature is enabled via the bit 2.
+
+card*/pcm*/sub*/info
+ The general information of this PCM sub-stream.
+
+card*/pcm*/sub*/status
+ The current status of this PCM sub-stream, elapsed time,
+ H/W position, etc.
+
+card*/pcm*/sub*/hw_params
+ The hardware parameters set for this sub-stream.
+
+card*/pcm*/sub*/sw_params
+ The soft parameters set for this sub-stream.
+
+card*/pcm*/sub*/prealloc
+ The buffer pre-allocation information.
+
+card*/pcm*/sub*/xrun_injection
+ Triggers an XRUN to the running stream when any value is
+ written to this proc file. Used for fault injection.
+ This entry is write-only.
+
+AC97 Codec Information
+----------------------
+
+card*/codec97#*/ac97#?-?
+ Shows the general information of this AC97 codec chip, such as
+ name, capabilities, set up.
+
+card*/codec97#0/ac97#?-?+regs
+ Shows the AC97 register dump. Useful for debugging.
+
+ When CONFIG_SND_DEBUG is enabled, you can write to this file for
+ changing an AC97 register directly. Pass two hex numbers.
+ For example,
+
+ # echo 02 9f1f > /proc/asound/card0/codec97#0/ac97#0-0+regs
+
+
+USB Audio Streams
+-----------------
+
+card*/stream*
+ Shows the assignment and the current status of each audio stream
+ of the given card. This information is very useful for debugging.
+
+
+HD-Audio Codecs
+---------------
+
+card*/codec#*
+ Shows the general codec information and the attribute of each
+ widget node.
+
+card*/eld#*
+ Available for HDMI or DisplayPort interfaces.
+ Shows ELD(EDID Like Data) info retrieved from the attached HDMI sink,
+ and describes its audio capabilities and configurations.
+
+ Some ELD fields may be modified by doing `echo name hex_value > eld#*`.
+ Only do this if you are sure the HDMI sink provided value is wrong.
+ And if that makes your HDMI audio work, please report to us so that we
+ can fix it in future kernel releases.
+
+
+Sequencer Information
+---------------------
+
+seq/drivers
+ Lists the currently available ALSA sequencer drivers.
+
+seq/clients
+ Shows the list of currently available sequencer clients and
+ ports. The connection status and the running status are shown
+ in this file, too.
+
+seq/queues
+ Lists the currently allocated/running sequencer queues.
+
+seq/timer
+ Lists the currently allocated/running sequencer timers.
+
+seq/oss
+ Lists the OSS-compatible sequencer stuffs.
+
+
+Help For Debugging?
+-------------------
+
+When the problem is related with PCM, first try to turn on xrun_debug
+mode. This will give you the kernel messages when and where xrun
+happened.
+
+If it's really a bug, report it with the following information:
+
+ - the name of the driver/card, show in /proc/asound/cards
+ - the register dump, if available (e.g. card*/cmipci)
+
+when it's a PCM problem,
+
+ - set-up of PCM, shown in hw_parms, sw_params, and status in the PCM
+ sub-stream directory
+
+when it's a mixer problem,
+
+ - AC97 proc files, codec97#*/* files
+
+for USB audio/midi,
+
+ - output of lsusb -v
+ - stream* files in card directory
+
+
+The ALSA bug-tracking system is found at:
+
+ https://bugtrack.alsa-project.org/alsa-bug/
diff --git a/Documentation/sound/alsa/README.maya44 b/Documentation/sound/alsa/README.maya44
new file mode 100644
index 000000000..67b2ea1cc
--- /dev/null
+++ b/Documentation/sound/alsa/README.maya44
@@ -0,0 +1,163 @@
+NOTE: The following is the original document of Rainer's patch that the
+current maya44 code based on. Some contents might be obsoleted, but I
+keep here as reference -- tiwai
+
+----------------------------------------------------------------
+
+STATE OF DEVELOPMENT:
+
+This driver is being developed on the initiative of Piotr Makowski (oponek@gmail.com) and financed by Lars Bergmann.
+Development is carried out by Rainer Zimmermann (mail@lightshed.de).
+
+ESI provided a sample Maya44 card for the development work.
+
+However, unfortunately it has turned out difficult to get detailed programming information, so I (Rainer Zimmermann) had to find out some card-specific information by experiment and conjecture. Some information (in particular, several GPIO bits) is still missing.
+
+This is the first testing version of the Maya44 driver released to the alsa-devel mailing list (Feb 5, 2008).
+
+
+The following functions work, as tested by Rainer Zimmermann and Piotr Makowski:
+
+- playback and capture at all sampling rates
+- input/output level
+- crossmixing
+- line/mic switch
+- phantom power switch
+- analogue monitor a.k.a bypass
+
+
+The following functions *should* work, but are not fully tested:
+
+- Channel 3+4 analogue - S/PDIF input switching
+- S/PDIF output
+- all inputs/outputs on the M/IO/DIO extension card
+- internal/external clock selection
+
+
+*In particular, we would appreciate testing of these functions by anyone who has access to an M/IO/DIO extension card.*
+
+
+Things that do not seem to work:
+
+- The level meters ("multi track") in 'alsamixer' do not seem to react to signals in (if this is a bug, it would probably be in the existing ICE1724 code).
+
+- Ardour 2.1 seems to work only via JACK, not using ALSA directly or via OSS. This still needs to be tracked down.
+
+
+DRIVER DETAILS:
+
+the following files were added:
+
+pci/ice1724/maya44.c - Maya44 specific code
+pci/ice1724/maya44.h
+pci/ice1724/ice1724.patch
+pci/ice1724/ice1724.h.patch - PROPOSED patch to ice1724.h (see SAMPLING RATES)
+i2c/other/wm8776.c - low-level access routines for Wolfson WM8776 codecs
+include/wm8776.h
+
+
+Note that the wm8776.c code is meant to be card-independent and does not actually register the codec with the ALSA infrastructure.
+This is done in maya44.c, mainly because some of the WM8776 controls are used in Maya44-specific ways, and should be named appropriately.
+
+
+the following files were created in pci/ice1724, simply #including the corresponding file from the alsa-kernel tree:
+
+wtm.h
+vt1720_mobo.h
+revo.h
+prodigy192.h
+pontis.h
+phase.h
+maya44.h
+juli.h
+aureon.h
+amp.h
+envy24ht.h
+se.h
+prodigy_hifi.h
+
+
+*I hope this is the correct way to do things.*
+
+
+SAMPLING RATES:
+
+The Maya44 card (or more exactly, the Wolfson WM8776 codecs) allow a maximum sampling rate of 192 kHz for playback and 92 kHz for capture.
+
+As the ICE1724 chip only allows one global sampling rate, this is handled as follows:
+
+* setting the sampling rate on any open PCM device on the maya44 card will always set the *global* sampling rate for all playback and capture channels.
+
+* In the current state of the driver, setting rates of up to 192 kHz is permitted even for capture devices.
+
+*AVOID CAPTURING AT RATES ABOVE 96kHz*, even though it may appear to work. The codec cannot actually capture at such rates, meaning poor quality.
+
+
+I propose some additional code for limiting the sampling rate when setting on a capture pcm device. However because of the global sampling rate, this logic would be somewhat problematic.
+
+The proposed code (currently deactivated) is in ice1712.h.patch, ice1724.c and maya44.c (in pci/ice1712).
+
+
+SOUND DEVICES:
+
+PCM devices correspond to inputs/outputs as follows (assuming Maya44 is card #0):
+
+hw:0,0 input - stereo, analog input 1+2
+hw:0,0 output - stereo, analog output 1+2
+hw:0,1 input - stereo, analog input 3+4 OR S/PDIF input
+hw:0,1 output - stereo, analog output 3+4 (and SPDIF out)
+
+
+NAMING OF MIXER CONTROLS:
+
+(for more information about the signal flow, please refer to the block diagram on p.24 of the ESI Maya44 manual, or in the ESI windows software).
+
+
+PCM: (digital) output level for channel 1+2
+PCM 1: same for channel 3+4
+
+Mic Phantom+48V: switch for +48V phantom power for electrostatic microphones on input 1/2.
+ Make sure this is not turned on while any other source is connected to input 1/2.
+ It might damage the source and/or the maya44 card.
+
+Mic/Line input: if switch is on, input jack 1/2 is microphone input (mono), otherwise line input (stereo).
+
+Bypass: analogue bypass from ADC input to output for channel 1+2. Same as "Monitor" in the windows driver.
+Bypass 1: same for channel 3+4.
+
+Crossmix: cross-mixer from channels 1+2 to channels 3+4
+Crossmix 1: cross-mixer from channels 3+4 to channels 1+2
+
+IEC958 Output: switch for S/PDIF output.
+ This is not supported by the ESI windows driver.
+ S/PDIF should output the same signal as channel 3+4. [untested!]
+
+
+Digitial output selectors:
+
+ These switches allow a direct digital routing from the ADCs to the DACs.
+ Each switch determines where the digital input data to one of the DACs comes from.
+ They are not supported by the ESI windows driver.
+ For normal operation, they should all be set to "PCM out".
+
+H/W: Output source channel 1
+H/W 1: Output source channel 2
+H/W 2: Output source channel 3
+H/W 3: Output source channel 4
+
+H/W 4 ... H/W 9: unknown function, left in to enable testing.
+ Possibly some of these control S/PDIF output(s).
+ If these turn out to be unused, they will go away in later driver versions.
+
+Selectable values for each of the digital output selectors are:
+ "PCM out" -> DAC output of the corresponding channel (default setting)
+ "Input 1"...
+ "Input 4" -> direct routing from ADC output of the selected input channel
+
+
+--------
+
+Feb 14, 2008
+Rainer Zimmermann
+mail@lightshed.de
+
diff --git a/Documentation/sound/alsa/SB-Live-mixer.txt b/Documentation/sound/alsa/SB-Live-mixer.txt
new file mode 100644
index 000000000..f4b5988f4
--- /dev/null
+++ b/Documentation/sound/alsa/SB-Live-mixer.txt
@@ -0,0 +1,356 @@
+
+ Sound Blaster Live mixer / default DSP code
+ ===========================================
+
+
+The EMU10K1 chips have a DSP part which can be programmed to support
+various ways of sample processing, which is described here.
+(This article does not deal with the overall functionality of the
+EMU10K1 chips. See the manuals section for further details.)
+
+The ALSA driver programs this portion of chip by default code
+(can be altered later) which offers the following functionality:
+
+
+1) IEC958 (S/PDIF) raw PCM
+--------------------------
+
+This PCM device (it's the 4th PCM device (index 3!) and first subdevice
+(index 0) for a given card) allows to forward 48kHz, stereo, 16-bit
+little endian streams without any modifications to the digital output
+(coaxial or optical). The universal interface allows the creation of up
+to 8 raw PCM devices operating at 48kHz, 16-bit little endian. It would
+be easy to add support for multichannel devices to the current code,
+but the conversion routines exist only for stereo (2-channel streams)
+at the time.
+
+Look to tram_poke routines in lowlevel/emu10k1/emufx.c for more details.
+
+
+2) Digital mixer controls
+-------------------------
+
+These controls are built using the DSP instructions. They offer extended
+functionality. Only the default build-in code in the ALSA driver is described
+here. Note that the controls work as attenuators: the maximum value is the
+neutral position leaving the signal unchanged. Note that if the same destination
+is mentioned in multiple controls, the signal is accumulated and can be wrapped
+(set to maximal or minimal value without checking of overflow).
+
+
+Explanation of used abbreviations:
+
+DAC - digital to analog converter
+ADC - analog to digital converter
+I2S - one-way three wire serial bus for digital sound by Philips Semiconductors
+ (this standard is used for connecting standalone DAC and ADC converters)
+LFE - low frequency effects (subwoofer signal)
+AC97 - a chip containing an analog mixer, DAC and ADC converters
+IEC958 - S/PDIF
+FX-bus - the EMU10K1 chip has an effect bus containing 16 accumulators.
+ Each of the synthesizer voices can feed its output to these accumulators
+ and the DSP microcontroller can operate with the resulting sum.
+
+
+name='Wave Playback Volume',index=0
+
+This control is used to attenuate samples for left and right PCM FX-bus
+accumulators. ALSA uses accumulators 0 and 1 for left and right PCM samples.
+The result samples are forwarded to the front DAC PCM slots of the AC97 codec.
+
+name='Wave Surround Playback Volume',index=0
+
+This control is used to attenuate samples for left and right PCM FX-bus
+accumulators. ALSA uses accumulators 0 and 1 for left and right PCM samples.
+The result samples are forwarded to the rear I2S DACs. These DACs operates
+separately (they are not inside the AC97 codec).
+
+name='Wave Center Playback Volume',index=0
+
+This control is used to attenuate samples for left and right PCM FX-bus
+accumulators. ALSA uses accumulators 0 and 1 for left and right PCM samples.
+The result is mixed to mono signal (single channel) and forwarded to
+the ??rear?? right DAC PCM slot of the AC97 codec.
+
+name='Wave LFE Playback Volume',index=0
+
+This control is used to attenuate samples for left and right PCM FX-bus
+accumulators. ALSA uses accumulators 0 and 1 for left and right PCM.
+The result is mixed to mono signal (single channel) and forwarded to
+the ??rear?? left DAC PCM slot of the AC97 codec.
+
+name='Wave Capture Volume',index=0
+name='Wave Capture Switch',index=0
+
+These controls are used to attenuate samples for left and right PCM FX-bus
+accumulator. ALSA uses accumulators 0 and 1 for left and right PCM.
+The result is forwarded to the ADC capture FIFO (thus to the standard capture
+PCM device).
+
+name='Synth Playback Volume',index=0
+
+This control is used to attenuate samples for left and right MIDI FX-bus
+accumulators. ALSA uses accumulators 4 and 5 for left and right MIDI samples.
+The result samples are forwarded to the front DAC PCM slots of the AC97 codec.
+
+name='Synth Capture Volume',index=0
+name='Synth Capture Switch',index=0
+
+These controls are used to attenuate samples for left and right MIDI FX-bus
+accumulator. ALSA uses accumulators 4 and 5 for left and right PCM.
+The result is forwarded to the ADC capture FIFO (thus to the standard capture
+PCM device).
+
+name='Surround Playback Volume',index=0
+
+This control is used to attenuate samples for left and right rear PCM FX-bus
+accumulators. ALSA uses accumulators 2 and 3 for left and right rear PCM samples.
+The result samples are forwarded to the rear I2S DACs. These DACs operate
+separately (they are not inside the AC97 codec).
+
+name='Surround Capture Volume',index=0
+name='Surround Capture Switch',index=0
+
+These controls are used to attenuate samples for left and right rear PCM FX-bus
+accumulators. ALSA uses accumulators 2 and 3 for left and right rear PCM samples.
+The result is forwarded to the ADC capture FIFO (thus to the standard capture
+PCM device).
+
+name='Center Playback Volume',index=0
+
+This control is used to attenuate sample for center PCM FX-bus accumulator.
+ALSA uses accumulator 6 for center PCM sample. The result sample is forwarded
+to the ??rear?? right DAC PCM slot of the AC97 codec.
+
+name='LFE Playback Volume',index=0
+
+This control is used to attenuate sample for center PCM FX-bus accumulator.
+ALSA uses accumulator 6 for center PCM sample. The result sample is forwarded
+to the ??rear?? left DAC PCM slot of the AC97 codec.
+
+name='AC97 Playback Volume',index=0
+
+This control is used to attenuate samples for left and right front ADC PCM slots
+of the AC97 codec. The result samples are forwarded to the front DAC PCM
+slots of the AC97 codec.
+********************************************************************************
+*** Note: This control should be zero for the standard operations, otherwise ***
+*** a digital loopback is activated. ***
+********************************************************************************
+
+name='AC97 Capture Volume',index=0
+
+This control is used to attenuate samples for left and right front ADC PCM slots
+of the AC97 codec. The result is forwarded to the ADC capture FIFO (thus to
+the standard capture PCM device).
+********************************************************************************
+*** Note: This control should be 100 (maximal value), otherwise no analog ***
+*** inputs of the AC97 codec can be captured (recorded). ***
+********************************************************************************
+
+name='IEC958 TTL Playback Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 TTL
+digital inputs (usually used by a CDROM drive). The result samples are
+forwarded to the front DAC PCM slots of the AC97 codec.
+
+name='IEC958 TTL Capture Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 TTL
+digital inputs (usually used by a CDROM drive). The result samples are
+forwarded to the ADC capture FIFO (thus to the standard capture PCM device).
+
+name='Zoom Video Playback Volume',index=0
+
+This control is used to attenuate samples from left and right zoom video
+digital inputs (usually used by a CDROM drive). The result samples are
+forwarded to the front DAC PCM slots of the AC97 codec.
+
+name='Zoom Video Capture Volume',index=0
+
+This control is used to attenuate samples from left and right zoom video
+digital inputs (usually used by a CDROM drive). The result samples are
+forwarded to the ADC capture FIFO (thus to the standard capture PCM device).
+
+name='IEC958 LiveDrive Playback Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 optical
+digital input. The result samples are forwarded to the front DAC PCM slots
+of the AC97 codec.
+
+name='IEC958 LiveDrive Capture Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 optical
+digital inputs. The result samples are forwarded to the ADC capture FIFO
+(thus to the standard capture PCM device).
+
+name='IEC958 Coaxial Playback Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 coaxial
+digital inputs. The result samples are forwarded to the front DAC PCM slots
+of the AC97 codec.
+
+name='IEC958 Coaxial Capture Volume',index=0
+
+This control is used to attenuate samples from left and right IEC958 coaxial
+digital inputs. The result samples are forwarded to the ADC capture FIFO
+(thus to the standard capture PCM device).
+
+name='Line LiveDrive Playback Volume',index=0
+name='Line LiveDrive Playback Volume',index=1
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs (on the LiveDrive). The result samples are forwarded to the front
+DAC PCM slots of the AC97 codec.
+
+name='Line LiveDrive Capture Volume',index=1
+name='Line LiveDrive Capture Volume',index=1
+
+This control is used to attenuate samples from left and right I2S ADC
+inputs (on the LiveDrive). The result samples are forwarded to the ADC
+capture FIFO (thus to the standard capture PCM device).
+
+name='Tone Control - Switch',index=0
+
+This control turns the tone control on or off. The samples for front, rear
+and center / LFE outputs are affected.
+
+name='Tone Control - Bass',index=0
+
+This control sets the bass intensity. There is no neutral value!!
+When the tone control code is activated, the samples are always modified.
+The closest value to pure signal is 20.
+
+name='Tone Control - Treble',index=0
+
+This control sets the treble intensity. There is no neutral value!!
+When the tone control code is activated, the samples are always modified.
+The closest value to pure signal is 20.
+
+name='IEC958 Optical Raw Playback Switch',index=0
+
+If this switch is on, then the samples for the IEC958 (S/PDIF) digital
+output are taken only from the raw FX8010 PCM, otherwise standard front
+PCM samples are taken.
+
+name='Headphone Playback Volume',index=1
+
+This control attenuates the samples for the headphone output.
+
+name='Headphone Center Playback Switch',index=1
+
+If this switch is on, then the sample for the center PCM is put to the
+left headphone output (useful for SB Live cards without separate center/LFE
+output).
+
+name='Headphone LFE Playback Switch',index=1
+
+If this switch is on, then the sample for the center PCM is put to the
+right headphone output (useful for SB Live cards without separate center/LFE
+output).
+
+
+3) PCM stream related controls
+------------------------------
+
+name='EMU10K1 PCM Volume',index 0-31
+
+Channel volume attenuation in range 0-0xffff. The maximum value (no
+attenuation) is default. The channel mapping for three values is
+as follows:
+
+ 0 - mono, default 0xffff (no attenuation)
+ 1 - left, default 0xffff (no attenuation)
+ 2 - right, default 0xffff (no attenuation)
+
+name='EMU10K1 PCM Send Routing',index 0-31
+
+This control specifies the destination - FX-bus accumulators. There are
+twelve values with this mapping:
+
+ 0 - mono, A destination (FX-bus 0-15), default 0
+ 1 - mono, B destination (FX-bus 0-15), default 1
+ 2 - mono, C destination (FX-bus 0-15), default 2
+ 3 - mono, D destination (FX-bus 0-15), default 3
+ 4 - left, A destination (FX-bus 0-15), default 0
+ 5 - left, B destination (FX-bus 0-15), default 1
+ 6 - left, C destination (FX-bus 0-15), default 2
+ 7 - left, D destination (FX-bus 0-15), default 3
+ 8 - right, A destination (FX-bus 0-15), default 0
+ 9 - right, B destination (FX-bus 0-15), default 1
+ 10 - right, C destination (FX-bus 0-15), default 2
+ 11 - right, D destination (FX-bus 0-15), default 3
+
+Don't forget that it's illegal to assign a channel to the same FX-bus accumulator
+more than once (it means 0=0 && 1=0 is an invalid combination).
+
+name='EMU10K1 PCM Send Volume',index 0-31
+
+It specifies the attenuation (amount) for given destination in range 0-255.
+The channel mapping is following:
+
+ 0 - mono, A destination attn, default 255 (no attenuation)
+ 1 - mono, B destination attn, default 255 (no attenuation)
+ 2 - mono, C destination attn, default 0 (mute)
+ 3 - mono, D destination attn, default 0 (mute)
+ 4 - left, A destination attn, default 255 (no attenuation)
+ 5 - left, B destination attn, default 0 (mute)
+ 6 - left, C destination attn, default 0 (mute)
+ 7 - left, D destination attn, default 0 (mute)
+ 8 - right, A destination attn, default 0 (mute)
+ 9 - right, B destination attn, default 255 (no attenuation)
+ 10 - right, C destination attn, default 0 (mute)
+ 11 - right, D destination attn, default 0 (mute)
+
+
+
+4) MANUALS/PATENTS:
+-------------------
+
+ftp://opensource.creative.com/pub/doc
+-------------------------------------
+
+ Files:
+ LM4545.pdf AC97 Codec
+
+ m2049.pdf The EMU10K1 Digital Audio Processor
+
+ hog63.ps FX8010 - A DSP Chip Architecture for Audio Effects
+
+
+WIPO Patents
+------------
+ Patent numbers:
+ WO 9901813 (A1) Audio Effects Processor with multiple asynchronous (Jan. 14, 1999)
+ streams
+
+ WO 9901814 (A1) Processor with Instruction Set for Audio Effects (Jan. 14, 1999)
+
+ WO 9901953 (A1) Audio Effects Processor having Decoupled Instruction
+ Execution and Audio Data Sequencing (Jan. 14, 1999)
+
+
+US Patents (http://www.uspto.gov/)
+----------------------------------
+
+ US 5925841 Digital Sampling Instrument employing cache memory (Jul. 20, 1999)
+
+ US 5928342 Audio Effects Processor integrated on a single chip (Jul. 27, 1999)
+ with a multiport memory onto which multiple asynchronous
+ digital sound samples can be concurrently loaded
+
+ US 5930158 Processor with Instruction Set for Audio Effects (Jul. 27, 1999)
+
+ US 6032235 Memory initialization circuit (Tram) (Feb. 29, 2000)
+
+ US 6138207 Interpolation looping of audio samples in cache connected to (Oct. 24, 2000)
+ system bus with prioritization and modification of bus transfers
+ in accordance with loop ends and minimum block sizes
+
+ US 6151670 Method for conserving memory storage using a (Nov. 21, 2000)
+ pool of short term memory registers
+
+ US 6195715 Interrupt control for multiple programs communicating with (Feb. 27, 2001)
+ a common interrupt by associating programs to GP registers,
+ defining interrupt register, polling GP registers, and invoking
+ callback routine associated with defined interrupt register
diff --git a/Documentation/sound/alsa/VIA82xx-mixer.txt b/Documentation/sound/alsa/VIA82xx-mixer.txt
new file mode 100644
index 000000000..1b0ac06ba
--- /dev/null
+++ b/Documentation/sound/alsa/VIA82xx-mixer.txt
@@ -0,0 +1,8 @@
+
+ VIA82xx mixer
+ =============
+
+On many VIA82xx boards, the 'Input Source Select' mixer control does not work.
+Setting it to 'Input2' on such boards will cause recording to hang, or fail
+with EIO (input/output error) via OSS emulation. This control should be left
+at 'Input1' for such cards.
diff --git a/Documentation/sound/alsa/alsa-parameters.txt b/Documentation/sound/alsa/alsa-parameters.txt
new file mode 100644
index 000000000..0fa40679b
--- /dev/null
+++ b/Documentation/sound/alsa/alsa-parameters.txt
@@ -0,0 +1,135 @@
+ ALSA Kernel Parameters
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+See Documentation/kernel-parameters.txt for general information on
+specifying module parameters.
+
+This document may not be entirely up to date and comprehensive. The command
+"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
+module. Loadable modules, after being loaded into the running kernel, also
+reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
+parameters may be changed at runtime by the command
+"echo -n ${value} > /sys/module/${modulename}/parameters/${parm}".
+
+
+ snd-ad1816a= [HW,ALSA]
+
+ snd-ad1848= [HW,ALSA]
+
+ snd-ali5451= [HW,ALSA]
+
+ snd-als100= [HW,ALSA]
+
+ snd-als4000= [HW,ALSA]
+
+ snd-azt2320= [HW,ALSA]
+
+ snd-cmi8330= [HW,ALSA]
+
+ snd-cmipci= [HW,ALSA]
+
+ snd-cs4231= [HW,ALSA]
+
+ snd-cs4232= [HW,ALSA]
+
+ snd-cs4236= [HW,ALSA]
+
+ snd-cs4281= [HW,ALSA]
+
+ snd-cs46xx= [HW,ALSA]
+
+ snd-dt019x= [HW,ALSA]
+
+ snd-dummy= [HW,ALSA]
+
+ snd-emu10k1= [HW,ALSA]
+
+ snd-ens1370= [HW,ALSA]
+
+ snd-ens1371= [HW,ALSA]
+
+ snd-es968= [HW,ALSA]
+
+ snd-es1688= [HW,ALSA]
+
+ snd-es18xx= [HW,ALSA]
+
+ snd-es1938= [HW,ALSA]
+
+ snd-es1968= [HW,ALSA]
+
+ snd-fm801= [HW,ALSA]
+
+ snd-gusclassic= [HW,ALSA]
+
+ snd-gusextreme= [HW,ALSA]
+
+ snd-gusmax= [HW,ALSA]
+
+ snd-hdsp= [HW,ALSA]
+
+ snd-ice1712= [HW,ALSA]
+
+ snd-intel8x0= [HW,ALSA]
+
+ snd-interwave= [HW,ALSA]
+
+ snd-interwave-stb=
+ [HW,ALSA]
+
+ snd-korg1212= [HW,ALSA]
+
+ snd-maestro3= [HW,ALSA]
+
+ snd-mpu401= [HW,ALSA]
+
+ snd-mtpav= [HW,ALSA]
+
+ snd-nm256= [HW,ALSA]
+
+ snd-opl3sa2= [HW,ALSA]
+
+ snd-opti92x-ad1848=
+ [HW,ALSA]
+
+ snd-opti92x-cs4231=
+ [HW,ALSA]
+
+ snd-opti93x= [HW,ALSA]
+
+ snd-pmac= [HW,ALSA]
+
+ snd-rme32= [HW,ALSA]
+
+ snd-rme96= [HW,ALSA]
+
+ snd-rme9652= [HW,ALSA]
+
+ snd-sb8= [HW,ALSA]
+
+ snd-sb16= [HW,ALSA]
+
+ snd-sbawe= [HW,ALSA]
+
+ snd-serial= [HW,ALSA]
+
+ snd-sgalaxy= [HW,ALSA]
+
+ snd-sonicvibes= [HW,ALSA]
+
+ snd-sun-amd7930=
+ [HW,ALSA]
+
+ snd-sun-cs4231= [HW,ALSA]
+
+ snd-trident= [HW,ALSA]
+
+ snd-usb-audio= [HW,ALSA,USB]
+
+ snd-via82xx= [HW,ALSA]
+
+ snd-virmidi= [HW,ALSA]
+
+ snd-wavefront= [HW,ALSA]
+
+ snd-ymfpci= [HW,ALSA]
diff --git a/Documentation/sound/alsa/compress_offload.txt b/Documentation/sound/alsa/compress_offload.txt
new file mode 100644
index 000000000..630c492c3
--- /dev/null
+++ b/Documentation/sound/alsa/compress_offload.txt
@@ -0,0 +1,234 @@
+ compress_offload.txt
+ =====================
+ Pierre-Louis.Bossart <pierre-louis.bossart@linux.intel.com>
+ Vinod Koul <vinod.koul@linux.intel.com>
+
+Overview
+
+Since its early days, the ALSA API was defined with PCM support or
+constant bitrates payloads such as IEC61937 in mind. Arguments and
+returned values in frames are the norm, making it a challenge to
+extend the existing API to compressed data streams.
+
+In recent years, audio digital signal processors (DSP) were integrated
+in system-on-chip designs, and DSPs are also integrated in audio
+codecs. Processing compressed data on such DSPs results in a dramatic
+reduction of power consumption compared to host-based
+processing. Support for such hardware has not been very good in Linux,
+mostly because of a lack of a generic API available in the mainline
+kernel.
+
+Rather than requiring a compatibility break with an API change of the
+ALSA PCM interface, a new 'Compressed Data' API is introduced to
+provide a control and data-streaming interface for audio DSPs.
+
+The design of this API was inspired by the 2-year experience with the
+Intel Moorestown SOC, with many corrections required to upstream the
+API in the mainline kernel instead of the staging tree and make it
+usable by others.
+
+Requirements
+
+The main requirements are:
+
+- separation between byte counts and time. Compressed formats may have
+ a header per file, per frame, or no header at all. The payload size
+ may vary from frame-to-frame. As a result, it is not possible to
+ estimate reliably the duration of audio buffers when handling
+ compressed data. Dedicated mechanisms are required to allow for
+ reliable audio-video synchronization, which requires precise
+ reporting of the number of samples rendered at any given time.
+
+- Handling of multiple formats. PCM data only requires a specification
+ of the sampling rate, number of channels and bits per sample. In
+ contrast, compressed data comes in a variety of formats. Audio DSPs
+ may also provide support for a limited number of audio encoders and
+ decoders embedded in firmware, or may support more choices through
+ dynamic download of libraries.
+
+- Focus on main formats. This API provides support for the most
+ popular formats used for audio and video capture and playback. It is
+ likely that as audio compression technology advances, new formats
+ will be added.
+
+- Handling of multiple configurations. Even for a given format like
+ AAC, some implementations may support AAC multichannel but HE-AAC
+ stereo. Likewise WMA10 level M3 may require too much memory and cpu
+ cycles. The new API needs to provide a generic way of listing these
+ formats.
+
+- Rendering/Grabbing only. This API does not provide any means of
+ hardware acceleration, where PCM samples are provided back to
+ user-space for additional processing. This API focuses instead on
+ streaming compressed data to a DSP, with the assumption that the
+ decoded samples are routed to a physical output or logical back-end.
+
+ - Complexity hiding. Existing user-space multimedia frameworks all
+ have existing enums/structures for each compressed format. This new
+ API assumes the existence of a platform-specific compatibility layer
+ to expose, translate and make use of the capabilities of the audio
+ DSP, eg. Android HAL or PulseAudio sinks. By construction, regular
+ applications are not supposed to make use of this API.
+
+
+Design
+
+The new API shares a number of concepts with the PCM API for flow
+control. Start, pause, resume, drain and stop commands have the same
+semantics no matter what the content is.
+
+The concept of memory ring buffer divided in a set of fragments is
+borrowed from the ALSA PCM API. However, only sizes in bytes can be
+specified.
+
+Seeks/trick modes are assumed to be handled by the host.
+
+The notion of rewinds/forwards is not supported. Data committed to the
+ring buffer cannot be invalidated, except when dropping all buffers.
+
+The Compressed Data API does not make any assumptions on how the data
+is transmitted to the audio DSP. DMA transfers from main memory to an
+embedded audio cluster or to a SPI interface for external DSPs are
+possible. As in the ALSA PCM case, a core set of routines is exposed;
+each driver implementer will have to write support for a set of
+mandatory routines and possibly make use of optional ones.
+
+The main additions are
+
+- get_caps
+This routine returns the list of audio formats supported. Querying the
+codecs on a capture stream will return encoders, decoders will be
+listed for playback streams.
+
+- get_codec_caps For each codec, this routine returns a list of
+capabilities. The intent is to make sure all the capabilities
+correspond to valid settings, and to minimize the risks of
+configuration failures. For example, for a complex codec such as AAC,
+the number of channels supported may depend on a specific profile. If
+the capabilities were exposed with a single descriptor, it may happen
+that a specific combination of profiles/channels/formats may not be
+supported. Likewise, embedded DSPs have limited memory and cpu cycles,
+it is likely that some implementations make the list of capabilities
+dynamic and dependent on existing workloads. In addition to codec
+settings, this routine returns the minimum buffer size handled by the
+implementation. This information can be a function of the DMA buffer
+sizes, the number of bytes required to synchronize, etc, and can be
+used by userspace to define how much needs to be written in the ring
+buffer before playback can start.
+
+- set_params
+This routine sets the configuration chosen for a specific codec. The
+most important field in the parameters is the codec type; in most
+cases decoders will ignore other fields, while encoders will strictly
+comply to the settings
+
+- get_params
+This routines returns the actual settings used by the DSP. Changes to
+the settings should remain the exception.
+
+- get_timestamp
+The timestamp becomes a multiple field structure. It lists the number
+of bytes transferred, the number of samples processed and the number
+of samples rendered/grabbed. All these values can be used to determine
+the average bitrate, figure out if the ring buffer needs to be
+refilled or the delay due to decoding/encoding/io on the DSP.
+
+Note that the list of codecs/profiles/modes was derived from the
+OpenMAX AL specification instead of reinventing the wheel.
+Modifications include:
+- Addition of FLAC and IEC formats
+- Merge of encoder/decoder capabilities
+- Profiles/modes listed as bitmasks to make descriptors more compact
+- Addition of set_params for decoders (missing in OpenMAX AL)
+- Addition of AMR/AMR-WB encoding modes (missing in OpenMAX AL)
+- Addition of format information for WMA
+- Addition of encoding options when required (derived from OpenMAX IL)
+- Addition of rateControlSupported (missing in OpenMAX AL)
+
+Gapless Playback
+================
+When playing thru an album, the decoders have the ability to skip the encoder
+delay and padding and directly move from one track content to another. The end
+user can perceive this as gapless playback as we dont have silence while
+switching from one track to another
+
+Also, there might be low-intensity noises due to encoding. Perfect gapless is
+difficult to reach with all types of compressed data, but works fine with most
+music content. The decoder needs to know the encoder delay and encoder padding.
+So we need to pass this to DSP. This metadata is extracted from ID3/MP4 headers
+and are not present by default in the bitstream, hence the need for a new
+interface to pass this information to the DSP. Also DSP and userspace needs to
+switch from one track to another and start using data for second track.
+
+The main additions are:
+
+- set_metadata
+This routine sets the encoder delay and encoder padding. This can be used by
+decoder to strip the silence. This needs to be set before the data in the track
+is written.
+
+- set_next_track
+This routine tells DSP that metadata and write operation sent after this would
+correspond to subsequent track
+
+- partial drain
+This is called when end of file is reached. The userspace can inform DSP that
+EOF is reached and now DSP can start skipping padding delay. Also next write
+data would belong to next track
+
+Sequence flow for gapless would be:
+- Open
+- Get caps / codec caps
+- Set params
+- Set metadata of the first track
+- Fill data of the first track
+- Trigger start
+- User-space finished sending all,
+- Indicaite next track data by sending set_next_track
+- Set metadata of the next track
+- then call partial_drain to flush most of buffer in DSP
+- Fill data of the next track
+- DSP switches to second track
+(note: order for partial_drain and write for next track can be reversed as well)
+
+Not supported:
+
+- Support for VoIP/circuit-switched calls is not the target of this
+ API. Support for dynamic bit-rate changes would require a tight
+ coupling between the DSP and the host stack, limiting power savings.
+
+- Packet-loss concealment is not supported. This would require an
+ additional interface to let the decoder synthesize data when frames
+ are lost during transmission. This may be added in the future.
+
+- Volume control/routing is not handled by this API. Devices exposing a
+ compressed data interface will be considered as regular ALSA devices;
+ volume changes and routing information will be provided with regular
+ ALSA kcontrols.
+
+- Embedded audio effects. Such effects should be enabled in the same
+ manner, no matter if the input was PCM or compressed.
+
+- multichannel IEC encoding. Unclear if this is required.
+
+- Encoding/decoding acceleration is not supported as mentioned
+ above. It is possible to route the output of a decoder to a capture
+ stream, or even implement transcoding capabilities. This routing
+ would be enabled with ALSA kcontrols.
+
+- Audio policy/resource management. This API does not provide any
+ hooks to query the utilization of the audio DSP, nor any preemption
+ mechanisms.
+
+- No notion of underrun/overrun. Since the bytes written are compressed
+ in nature and data written/read doesn't translate directly to
+ rendered output in time, this does not deal with underrun/overrun and
+ maybe dealt in user-library
+
+Credits:
+- Mark Brown and Liam Girdwood for discussions on the need for this API
+- Harsha Priya for her work on intel_sst compressed API
+- Rakesh Ughreja for valuable feedback
+- Sing Nallasellan, Sikkandar Madar and Prasanna Samaga for
+ demonstrating and quantifying the benefits of audio offload on a
+ real platform.
diff --git a/Documentation/sound/alsa/emu10k1-jack.txt b/Documentation/sound/alsa/emu10k1-jack.txt
new file mode 100644
index 000000000..751d45036
--- /dev/null
+++ b/Documentation/sound/alsa/emu10k1-jack.txt
@@ -0,0 +1,74 @@
+This document is a guide to using the emu10k1 based devices with JACK for low
+latency, multichannel recording functionality. All of my recent work to allow
+Linux users to use the full capabilities of their hardware has been inspired
+by the kX Project. Without their work I never would have discovered the true
+power of this hardware.
+
+ http://www.kxproject.com
+ - Lee Revell, 2005.03.30
+
+Low latency, multichannel audio with JACK and the emu10k1/emu10k2
+-----------------------------------------------------------------
+
+Until recently, emu10k1 users on Linux did not have access to the same low
+latency, multichannel features offered by the "kX ASIO" feature of their
+Windows driver. As of ALSA 1.0.9 this is no more!
+
+For those unfamiliar with kX ASIO, this consists of 16 capture and 16 playback
+channels. With a post 2.6.9 Linux kernel, latencies down to 64 (1.33 ms) or
+even 32 (0.66ms) frames should work well.
+
+The configuration is slightly more involved than on Windows, as you have to
+select the correct device for JACK to use. Actually, for qjackctl users it's
+fairly self explanatory - select Duplex, then for capture and playback select
+the multichannel devices, set the in and out channels to 16, and the sample
+rate to 48000Hz. The command line looks like this:
+
+/usr/local/bin/jackd -R -dalsa -r48000 -p64 -n2 -D -Chw:0,2 -Phw:0,3 -S
+
+This will give you 16 input ports and 16 output ports.
+
+The 16 output ports map onto the 16 FX buses (or the first 16 of 64, for the
+Audigy). The mapping from FX bus to physical output is described in
+SB-Live-mixer.txt (or Audigy-mixer.txt).
+
+The 16 input ports are connected to the 16 physical inputs. Contrary to
+popular belief, all emu10k1 cards are multichannel cards. Which of these
+input channels have physical inputs connected to them depends on the card
+model. Trial and error is highly recommended; the pinout diagrams
+for the card have been reverse engineered by some enterprising kX users and are
+available on the internet. Meterbridge is helpful here, and the kX forums are
+packed with useful information.
+
+Each input port will either correspond to a digital (SPDIF) input, an analog
+input, or nothing. The one exception is the SBLive! 5.1. On these devices,
+the second and third input ports are wired to the center/LFE output. You will
+still see 16 capture channels, but only 14 are available for recording inputs.
+
+This chart, borrowed from kxfxlib/da_asio51.cpp, describes the mapping of JACK
+ports to FXBUS2 (multitrack recording input) and EXTOUT (physical output)
+channels.
+
+/*JACK (& ASIO) mappings on 10k1 5.1 SBLive cards:
+--------------------------------------------
+JACK Epilog FXBUS2(nr)
+--------------------------------------------
+capture_1 asio14 FXBUS2(0xe)
+capture_2 asio15 FXBUS2(0xf)
+capture_3 asio0 FXBUS2(0x0)
+~capture_4 Center EXTOUT(0x11) // mapped to by Center
+~capture_5 LFE EXTOUT(0x12) // mapped to by LFE
+capture_6 asio3 FXBUS2(0x3)
+capture_7 asio4 FXBUS2(0x4)
+capture_8 asio5 FXBUS2(0x5)
+capture_9 asio6 FXBUS2(0x6)
+capture_10 asio7 FXBUS2(0x7)
+capture_11 asio8 FXBUS2(0x8)
+capture_12 asio9 FXBUS2(0x9)
+capture_13 asio10 FXBUS2(0xa)
+capture_14 asio11 FXBUS2(0xb)
+capture_15 asio12 FXBUS2(0xc)
+capture_16 asio13 FXBUS2(0xd)
+*/
+
+TODO: describe use of ld10k1/qlo10k1 in conjunction with JACK
diff --git a/Documentation/sound/alsa/hda_codec.txt b/Documentation/sound/alsa/hda_codec.txt
new file mode 100644
index 000000000..de8efbc7e
--- /dev/null
+++ b/Documentation/sound/alsa/hda_codec.txt
@@ -0,0 +1,322 @@
+Notes on Universal Interface for Intel High Definition Audio Codec
+------------------------------------------------------------------
+
+Takashi Iwai <tiwai@suse.de>
+
+
+[Still a draft version]
+
+
+General
+=======
+
+The snd-hda-codec module supports the generic access function for the
+High Definition (HD) audio codecs. It's designed to be independent
+from the controller code like ac97 codec module. The real accessors
+from/to the controller must be implemented in the lowlevel driver.
+
+The structure of this module is similar with ac97_codec module.
+Each codec chip belongs to a bus class which communicates with the
+controller.
+
+
+Initialization of Bus Instance
+==============================
+
+The card driver has to create struct hda_bus at first. The template
+struct should be filled and passed to the constructor:
+
+struct hda_bus_template {
+ void *private_data;
+ struct pci_dev *pci;
+ const char *modelname;
+ struct hda_bus_ops ops;
+};
+
+The card driver can set and use the private_data field to retrieve its
+own data in callback functions. The pci field is used when the patch
+needs to check the PCI subsystem IDs, so on. For non-PCI system, it
+doesn't have to be set, of course.
+The modelname field specifies the board's specific configuration. The
+string is passed to the codec parser, and it depends on the parser how
+the string is used.
+These fields, private_data, pci and modelname are all optional.
+
+The ops field contains the callback functions as the following:
+
+struct hda_bus_ops {
+ int (*command)(struct hda_codec *codec, hda_nid_t nid, int direct,
+ unsigned int verb, unsigned int parm);
+ unsigned int (*get_response)(struct hda_codec *codec);
+ void (*private_free)(struct hda_bus *);
+#ifdef CONFIG_SND_HDA_POWER_SAVE
+ void (*pm_notify)(struct hda_codec *codec);
+#endif
+};
+
+The command callback is called when the codec module needs to send a
+VERB to the controller. It's always a single command.
+The get_response callback is called when the codec requires the answer
+for the last command. These two callbacks are mandatory and have to
+be given.
+The third, private_free callback, is optional. It's called in the
+destructor to release any necessary data in the lowlevel driver.
+
+The pm_notify callback is available only with
+CONFIG_SND_HDA_POWER_SAVE kconfig. It's called when the codec needs
+to power up or may power down. The controller should check the all
+belonging codecs on the bus whether they are actually powered off
+(check codec->power_on), and optionally the driver may power down the
+controller side, too.
+
+The bus instance is created via snd_hda_bus_new(). You need to pass
+the card instance, the template, and the pointer to store the
+resultant bus instance.
+
+int snd_hda_bus_new(struct snd_card *card, const struct hda_bus_template *temp,
+ struct hda_bus **busp);
+
+It returns zero if successful. A negative return value means any
+error during creation.
+
+
+Creation of Codec Instance
+==========================
+
+Each codec chip on the board is then created on the BUS instance.
+To create a codec instance, call snd_hda_codec_new().
+
+int snd_hda_codec_new(struct hda_bus *bus, unsigned int codec_addr,
+ struct hda_codec **codecp);
+
+The first argument is the BUS instance, the second argument is the
+address of the codec, and the last one is the pointer to store the
+resultant codec instance (can be NULL if not needed).
+
+The codec is stored in a linked list of bus instance. You can follow
+the codec list like:
+
+ struct hda_codec *codec;
+ list_for_each_entry(codec, &bus->codec_list, list) {
+ ...
+ }
+
+The codec isn't initialized at this stage properly. The
+initialization sequence is called when the controls are built later.
+
+
+Codec Access
+============
+
+To access codec, use snd_hda_codec_read() and snd_hda_codec_write().
+snd_hda_param_read() is for reading parameters.
+For writing a sequence of verbs, use snd_hda_sequence_write().
+
+There are variants of cached read/write, snd_hda_codec_write_cache(),
+snd_hda_sequence_write_cache(). These are used for recording the
+register states for the power-management resume. When no PM is needed,
+these are equivalent with non-cached version.
+
+To retrieve the number of sub nodes connected to the given node, use
+snd_hda_get_sub_nodes(). The connection list can be obtained via
+snd_hda_get_connections() call.
+
+When an unsolicited event happens, pass the event via
+snd_hda_queue_unsol_event() so that the codec routines will process it
+later.
+
+
+(Mixer) Controls
+================
+
+To create mixer controls of all codecs, call
+snd_hda_build_controls(). It then builds the mixers and does
+initialization stuff on each codec.
+
+
+PCM Stuff
+=========
+
+snd_hda_build_pcms() gives the necessary information to create PCM
+streams. When it's called, each codec belonging to the bus stores
+codec->num_pcms and codec->pcm_info fields. The num_pcms indicates
+the number of elements in pcm_info array. The card driver is supposed
+to traverse the codec linked list, read the pcm information in
+pcm_info array, and build pcm instances according to them.
+
+The pcm_info array contains the following record:
+
+/* PCM information for each substream */
+struct hda_pcm_stream {
+ unsigned int substreams; /* number of substreams, 0 = not exist */
+ unsigned int channels_min; /* min. number of channels */
+ unsigned int channels_max; /* max. number of channels */
+ hda_nid_t nid; /* default NID to query rates/formats/bps, or set up */
+ u32 rates; /* supported rates */
+ u64 formats; /* supported formats (SNDRV_PCM_FMTBIT_) */
+ unsigned int maxbps; /* supported max. bit per sample */
+ struct hda_pcm_ops ops;
+};
+
+/* for PCM creation */
+struct hda_pcm {
+ char *name;
+ struct hda_pcm_stream stream[2];
+};
+
+The name can be passed to snd_pcm_new(). The stream field contains
+the information for playback (SNDRV_PCM_STREAM_PLAYBACK = 0) and
+capture (SNDRV_PCM_STREAM_CAPTURE = 1) directions. The card driver
+should pass substreams to snd_pcm_new() for the number of substreams
+to create.
+
+The channels_min, channels_max, rates and formats should be copied to
+runtime->hw record. They and maxbps fields are used also to compute
+the format value for the HDA codec and controller. Call
+snd_hda_calc_stream_format() to get the format value.
+
+The ops field contains the following callback functions:
+
+struct hda_pcm_ops {
+ int (*open)(struct hda_pcm_stream *info, struct hda_codec *codec,
+ struct snd_pcm_substream *substream);
+ int (*close)(struct hda_pcm_stream *info, struct hda_codec *codec,
+ struct snd_pcm_substream *substream);
+ int (*prepare)(struct hda_pcm_stream *info, struct hda_codec *codec,
+ unsigned int stream_tag, unsigned int format,
+ struct snd_pcm_substream *substream);
+ int (*cleanup)(struct hda_pcm_stream *info, struct hda_codec *codec,
+ struct snd_pcm_substream *substream);
+};
+
+All are non-NULL, so you can call them safely without NULL check.
+
+The open callback should be called in PCM open after runtime->hw is
+set up. It may override some setting and constraints additionally.
+Similarly, the close callback should be called in the PCM close.
+
+The prepare callback should be called in PCM prepare. This will set
+up the codec chip properly for the operation. The cleanup should be
+called in hw_free to clean up the configuration.
+
+The caller should check the return value, at least for open and
+prepare callbacks. When a negative value is returned, some error
+occurred.
+
+
+Proc Files
+==========
+
+Each codec dumps the widget node information in
+/proc/asound/card*/codec#* file. This information would be really
+helpful for debugging. Please provide its contents together with the
+bug report.
+
+
+Power Management
+================
+
+It's simple:
+Call snd_hda_suspend() in the PM suspend callback.
+Call snd_hda_resume() in the PM resume callback.
+
+
+Codec Preset (Patch)
+====================
+
+To set up and handle the codec functionality fully, each codec may
+have a codec preset (patch). It's defined in struct hda_codec_preset:
+
+ struct hda_codec_preset {
+ unsigned int id;
+ unsigned int mask;
+ unsigned int subs;
+ unsigned int subs_mask;
+ unsigned int rev;
+ const char *name;
+ int (*patch)(struct hda_codec *codec);
+ };
+
+When the codec id and codec subsystem id match with the given id and
+subs fields bitwise (with bitmask mask and subs_mask), the callback
+patch is called. The patch callback should initialize the codec and
+set the codec->patch_ops field. This is defined as below:
+
+ struct hda_codec_ops {
+ int (*build_controls)(struct hda_codec *codec);
+ int (*build_pcms)(struct hda_codec *codec);
+ int (*init)(struct hda_codec *codec);
+ void (*free)(struct hda_codec *codec);
+ void (*unsol_event)(struct hda_codec *codec, unsigned int res);
+ #ifdef CONFIG_PM
+ int (*suspend)(struct hda_codec *codec, pm_message_t state);
+ int (*resume)(struct hda_codec *codec);
+ #endif
+ #ifdef CONFIG_SND_HDA_POWER_SAVE
+ int (*check_power_status)(struct hda_codec *codec,
+ hda_nid_t nid);
+ #endif
+ };
+
+The build_controls callback is called from snd_hda_build_controls().
+Similarly, the build_pcms callback is called from
+snd_hda_build_pcms(). The init callback is called after
+build_controls to initialize the hardware.
+The free callback is called as a destructor.
+
+The unsol_event callback is called when an unsolicited event is
+received.
+
+The suspend and resume callbacks are for power management.
+They can be NULL if no special sequence is required. When the resume
+callback is NULL, the driver calls the init callback and resumes the
+registers from the cache. If other handling is needed, you'd need to
+write your own resume callback. There, the amp values can be resumed
+via
+ void snd_hda_codec_resume_amp(struct hda_codec *codec);
+and the other codec registers via
+ void snd_hda_codec_resume_cache(struct hda_codec *codec);
+
+The check_power_status callback is called when the amp value of the
+given widget NID is changed. The codec code can turn on/off the power
+appropriately from this information.
+
+Each entry can be NULL if not necessary to be called.
+
+
+Generic Parser
+==============
+
+When the device doesn't match with any given presets, the widgets are
+parsed via th generic parser (hda_generic.c). Its support is
+limited: no multi-channel support, for example.
+
+
+Digital I/O
+===========
+
+Call snd_hda_create_spdif_out_ctls() from the patch to create controls
+related with SPDIF out.
+
+
+Helper Functions
+================
+
+snd_hda_get_codec_name() stores the codec name on the given string.
+
+snd_hda_check_board_config() can be used to obtain the configuration
+information matching with the device. Define the model string table
+and the table with struct snd_pci_quirk entries (zero-terminated),
+and pass it to the function. The function checks the modelname given
+as a module parameter, and PCI subsystem IDs. If the matching entry
+is found, it returns the config field value.
+
+snd_hda_add_new_ctls() can be used to create and add control entries.
+Pass the zero-terminated array of struct snd_kcontrol_new
+
+Macros HDA_CODEC_VOLUME(), HDA_CODEC_MUTE() and their variables can be
+used for the entry of struct snd_kcontrol_new.
+
+The input MUX helper callbacks for such a control are provided, too:
+snd_hda_input_mux_info() and snd_hda_input_mux_put(). See
+patch_realtek.c for example.
diff --git a/Documentation/sound/alsa/hdspm.txt b/Documentation/sound/alsa/hdspm.txt
new file mode 100644
index 000000000..7ba31948d
--- /dev/null
+++ b/Documentation/sound/alsa/hdspm.txt
@@ -0,0 +1,362 @@
+Software Interface ALSA-DSP MADI Driver
+
+(translated from German, so no good English ;-),
+2004 - winfried ritsch
+
+
+
+ Full functionality has been added to the driver. Since some of
+ the Controls and startup-options are ALSA-Standard and only the
+ special Controls are described and discussed below.
+
+
+ hardware functionality:
+
+
+ Audio transmission:
+
+ number of channels -- depends on transmission mode
+
+ The number of channels chosen is from 1..Nmax. The reason to
+ use for a lower number of channels is only resource allocation,
+ since unused DMA channels are disabled and less memory is
+ allocated. So also the throughput of the PCI system can be
+ scaled. (Only important for low performance boards).
+
+ Single Speed -- 1..64 channels
+
+ (Note: Choosing the 56channel mode for transmission or as
+ receiver, only 56 are transmitted/received over the MADI, but
+ all 64 channels are available for the mixer, so channel count
+ for the driver)
+
+ Double Speed -- 1..32 channels
+
+ Note: Choosing the 56-channel mode for
+ transmission/receive-mode , only 28 are transmitted/received
+ over the MADI, but all 32 channels are available for the mixer,
+ so channel count for the driver
+
+
+ Quad Speed -- 1..16 channels
+
+ Note: Choosing the 56-channel mode for
+ transmission/receive-mode , only 14 are transmitted/received
+ over the MADI, but all 16 channels are available for the mixer,
+ so channel count for the driver
+
+ Format -- signed 32 Bit Little Endian (SNDRV_PCM_FMTBIT_S32_LE)
+
+ Sample Rates --
+
+ Single Speed -- 32000, 44100, 48000
+
+ Double Speed -- 64000, 88200, 96000 (untested)
+
+ Quad Speed -- 128000, 176400, 192000 (untested)
+
+ access-mode -- MMAP (memory mapped), Not interleaved
+ (PCM_NON-INTERLEAVED)
+
+ buffer-sizes -- 64,128,256,512,1024,2048,8192 Samples
+
+ fragments -- 2
+
+ Hardware-pointer -- 2 Modi
+
+
+ The Card supports the readout of the actual Buffer-pointer,
+ where DMA reads/writes. Since of the bulk mode of PCI it is only
+ 64 Byte accurate. SO it is not really usable for the
+ ALSA-mid-level functions (here the buffer-ID gives a better
+ result), but if MMAP is used by the application. Therefore it
+ can be configured at load-time with the parameter
+ precise-pointer.
+
+
+ (Hint: Experimenting I found that the pointer is maximum 64 to
+ large never to small. So if you subtract 64 you always have a
+ safe pointer for writing, which is used on this mode inside
+ ALSA. In theory now you can get now a latency as low as 16
+ Samples, which is a quarter of the interrupt possibilities.)
+
+ Precise Pointer -- off
+ interrupt used for pointer-calculation
+
+ Precise Pointer -- on
+ hardware pointer used.
+
+ Controller:
+
+
+ Since DSP-MADI-Mixer has 8152 Fader, it does not make sense to
+ use the standard mixer-controls, since this would break most of
+ (especially graphic) ALSA-Mixer GUIs. So Mixer control has be
+ provided by a 2-dimensional controller using the
+ hwdep-interface.
+
+ Also all 128+256 Peak and RMS-Meter can be accessed via the
+ hwdep-interface. Since it could be a performance problem always
+ copying and converting Peak and RMS-Levels even if you just need
+ one, I decided to export the hardware structure, so that of
+ needed some driver-guru can implement a memory-mapping of mixer
+ or peak-meters over ioctl, or also to do only copying and no
+ conversion. A test-application shows the usage of the controller.
+
+ Latency Controls --- not implemented !!!
+
+
+ Note: Within the windows-driver the latency is accessible of a
+ control-panel, but buffer-sizes are controlled with ALSA from
+ hwparams-calls and should not be changed in run-state, I did not
+ implement it here.
+
+
+ System Clock -- suspended !!!!
+
+ Name -- "System Clock Mode"
+
+ Access -- Read Write
+
+ Values -- "Master" "Slave"
+
+
+ !!!! This is a hardware-function but is in conflict with the
+ Clock-source controller, which is a kind of ALSA-standard. I
+ makes sense to set the card to a special mode (master at some
+ frequency or slave), since even not using an Audio-application
+ a studio should have working synchronisations setup. So use
+ Clock-source-controller instead !!!!
+
+ Clock Source
+
+ Name -- "Sample Clock Source"
+
+ Access -- Read Write
+
+ Values -- "AutoSync", "Internal 32.0 kHz", "Internal 44.1 kHz",
+ "Internal 48.0 kHz", "Internal 64.0 kHz", "Internal 88.2 kHz",
+ "Internal 96.0 kHz"
+
+ Choose between Master at a specific Frequency and so also the
+ Speed-mode or Slave (Autosync). Also see "Preferred Sync Ref"
+
+
+ !!!! This is no pure hardware function but was implemented by
+ ALSA by some ALSA-drivers before, so I use it also. !!!
+
+
+ Preferred Sync Ref
+
+ Name -- "Preferred Sync Reference"
+
+ Access -- Read Write
+
+ Values -- "Word" "MADI"
+
+
+ Within the Auto-sync-Mode the preferred Sync Source can be
+ chosen. If it is not available another is used if possible.
+
+ Note: Since MADI has a much higher bit-rate than word-clock, the
+ card should synchronise better in MADI Mode. But since the
+ RME-PLL is very good, there are almost no problems with
+ word-clock too. I never found a difference.
+
+
+ TX 64 channel ---
+
+ Name -- "TX 64 channels mode"
+
+ Access -- Read Write
+
+ Values -- 0 1
+
+ Using 64-channel-modus (1) or 56-channel-modus for
+ MADI-transmission (0).
+
+
+ Note: This control is for output only. Input-mode is detected
+ automatically from hardware sending MADI.
+
+
+ Clear TMS ---
+
+ Name -- "Clear Track Marker"
+
+ Access -- Read Write
+
+ Values -- 0 1
+
+
+ Don't use to lower 5 Audio-bits on AES as additional Bits.
+
+
+ Safe Mode oder Auto Input ---
+
+ Name -- "Safe Mode"
+
+ Access -- Read Write
+
+ Values -- 0 1
+
+ (default on)
+
+ If on (1), then if either the optical or coaxial connection
+ has a failure, there is a takeover to the working one, with no
+ sample failure. Its only useful if you use the second as a
+ backup connection.
+
+ Input ---
+
+ Name -- "Input Select"
+
+ Access -- Read Write
+
+ Values -- optical coaxial
+
+
+ Choosing the Input, optical or coaxial. If Safe-mode is active,
+ this is the preferred Input.
+
+-------------- Mixer ----------------------
+
+ Mixer
+
+ Name -- "Mixer"
+
+ Access -- Read Write
+
+ Values - <channel-number 0-127> <Value 0-65535>
+
+
+ Here as a first value the channel-index is taken to get/set the
+ corresponding mixer channel, where 0-63 are the input to output
+ fader and 64-127 the playback to outputs fader. Value 0
+ is channel muted 0 and 32768 an amplification of 1.
+
+ Chn 1-64
+
+ fast mixer for the ALSA-mixer utils. The diagonal of the
+ mixer-matrix is implemented from playback to output.
+
+
+ Line Out
+
+ Name -- "Line Out"
+
+ Access -- Read Write
+
+ Values -- 0 1
+
+ Switching on and off the analog out, which has nothing to do
+ with mixing or routing. the analog outs reflects channel 63,64.
+
+
+--- information (only read access):
+
+ Sample Rate
+
+ Name -- "System Sample Rate"
+
+ Access -- Read-only
+
+ getting the sample rate.
+
+
+ External Rate measured
+
+ Name -- "External Rate"
+
+ Access -- Read only
+
+
+ Should be "Autosync Rate", but Name used is
+ ALSA-Scheme. External Sample frequency liked used on Autosync is
+ reported.
+
+
+ MADI Sync Status
+
+ Name -- "MADI Sync Lock Status"
+
+ Access -- Read
+
+ Values -- 0,1,2
+
+ MADI-Input is 0=Unlocked, 1=Locked, or 2=Synced.
+
+
+ Word Clock Sync Status
+
+ Name -- "Word Clock Lock Status"
+
+ Access -- Read
+
+ Values -- 0,1,2
+
+ Word Clock Input is 0=Unlocked, 1=Locked, or 2=Synced.
+
+ AutoSync
+
+ Name -- "AutoSync Reference"
+
+ Access -- Read
+
+ Values -- "WordClock", "MADI", "None"
+
+ Sync-Reference is either "WordClock", "MADI" or none.
+
+ RX 64ch --- noch nicht implementiert
+
+ MADI-Receiver is in 64 channel mode oder 56 channel mode.
+
+
+ AB_inp --- not tested
+
+ Used input for Auto-Input.
+
+
+ actual Buffer Position --- not implemented
+
+ !!! this is a ALSA internal function, so no control is used !!!
+
+
+
+Calling Parameter:
+
+ index int array (min = 1, max = 8),
+ "Index value for RME HDSPM interface." card-index within ALSA
+
+ note: ALSA-standard
+
+ id string array (min = 1, max = 8),
+ "ID string for RME HDSPM interface."
+
+ note: ALSA-standard
+
+ enable int array (min = 1, max = 8),
+ "Enable/disable specific HDSPM sound-cards."
+
+ note: ALSA-standard
+
+ precise_ptr int array (min = 1, max = 8),
+ "Enable precise pointer, or disable."
+
+ note: Use only when the application supports this (which is a special case).
+
+ line_outs_monitor int array (min = 1, max = 8),
+ "Send playback streams to analog outs by default."
+
+
+ note: each playback channel is mixed to the same numbered output
+ channel (routed). This is against the ALSA-convention, where all
+ channels have to be muted on after loading the driver, but was
+ used before on other cards, so i historically use it again)
+
+
+
+ enable_monitor int array (min = 1, max = 8),
+ "Enable Analog Out on Channel 63/64 by default."
+
+ note: here the analog output is enabled (but not routed).
diff --git a/Documentation/sound/alsa/powersave.txt b/Documentation/sound/alsa/powersave.txt
new file mode 100644
index 000000000..9657e8099
--- /dev/null
+++ b/Documentation/sound/alsa/powersave.txt
@@ -0,0 +1,41 @@
+Notes on Power-Saving Mode
+==========================
+
+AC97 and HD-audio drivers have the automatic power-saving mode.
+This feature is enabled via Kconfig CONFIG_SND_AC97_POWER_SAVE
+and CONFIG_SND_HDA_POWER_SAVE options, respectively.
+
+With the automatic power-saving, the driver turns off the codec power
+appropriately when no operation is required. When no applications use
+the device and/or no analog loopback is set, the power disablement is
+done fully or partially. It'll save a certain power consumption, thus
+good for laptops (even for desktops).
+
+The time-out for automatic power-off can be specified via power_save
+module option of snd-ac97-codec and snd-hda-intel modules. Specify
+the time-out value in seconds. 0 means to disable the automatic
+power-saving. The default value of timeout is given via
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT and
+CONFIG_SND_HDA_POWER_SAVE_DEFAULT Kconfig options. Setting this to 1
+(the minimum value) isn't recommended because many applications try to
+reopen the device frequently. 10 would be a good choice for normal
+operations.
+
+The power_save option is exported as writable. This means you can
+adjust the value via sysfs on the fly. For example, to turn on the
+automatic power-save mode with 10 seconds, write to
+/sys/modules/snd_ac97_codec/parameters/power_save (usually as root):
+
+ # echo 10 > /sys/modules/snd_ac97_codec/parameters/power_save
+
+
+Note that you might hear click noise/pop when changing the power
+state. Also, it often takes certain time to wake up from the
+power-down to the active state. These are often hardly to fix, so
+don't report extra bug reports unless you have a fix patch ;-)
+
+For HD-audio interface, there is another module option,
+power_save_controller. This enables/disables the power-save mode of
+the controller side. Setting this on may reduce a bit more power
+consumption, but might result in longer wake-up time and click noise.
+Try to turn it off when you experience such a thing too often.
diff --git a/Documentation/sound/alsa/seq_oss.html b/Documentation/sound/alsa/seq_oss.html
new file mode 100644
index 000000000..9663b45f6
--- /dev/null
+++ b/Documentation/sound/alsa/seq_oss.html
@@ -0,0 +1,409 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<HTML>
+<HEAD>
+ <TITLE>OSS Sequencer Emulation on ALSA</TITLE>
+</HEAD>
+<BODY>
+
+<CENTER>
+<H1>
+
+<HR WIDTH="100%"></H1></CENTER>
+
+<CENTER>
+<H1>
+OSS Sequencer Emulation on ALSA</H1></CENTER>
+
+<HR WIDTH="100%">
+<P>Copyright (c) 1998,1999 by Takashi Iwai
+<TT><A HREF="mailto:iwai@ww.uni-erlangen.de">&lt;iwai@ww.uni-erlangen.de></A></TT>
+<P>ver.0.1.8; Nov. 16, 1999
+<H2>
+
+<HR WIDTH="100%"></H2>
+
+<H2>
+1. Description</H2>
+This directory contains the OSS sequencer emulation driver on ALSA. Note
+that this program is still in the development state.
+<P>What this does - it provides the emulation of the OSS sequencer, access
+via
+<TT>/dev/sequencer</TT> and <TT>/dev/music</TT> devices.
+The most of applications using OSS can run if the appropriate ALSA
+sequencer is prepared.
+<P>The following features are emulated by this driver:
+<UL>
+<LI>
+Normal sequencer and MIDI events:</LI>
+
+<BR>They are converted to the ALSA sequencer events, and sent to the corresponding
+port.
+<LI>
+Timer events:</LI>
+
+<BR>The timer is not selectable by ioctl. The control rate is fixed to
+100 regardless of HZ. That is, even on Alpha system, a tick is always
+1/100 second. The base rate and tempo can be changed in <TT>/dev/music</TT>.
+
+<LI>
+Patch loading:</LI>
+
+<BR>It purely depends on the synth drivers whether it's supported since
+the patch loading is realized by callback to the synth driver.
+<LI>
+I/O controls:</LI>
+
+<BR>Most of controls are accepted. Some controls
+are dependent on the synth driver, as well as even on original OSS.</UL>
+Furthermore, you can find the following advanced features:
+<UL>
+<LI>
+Better queue mechanism:</LI>
+
+<BR>The events are queued before processing them.
+<LI>
+Multiple applications:</LI>
+
+<BR>You can run two or more applications simultaneously (even for OSS sequencer)!
+However, each MIDI device is exclusive - that is, if a MIDI device is opened
+once by some application, other applications can't use it. No such a restriction
+in synth devices.
+<LI>
+Real-time event processing:</LI>
+
+<BR>The events can be processed in real time without using out of bound
+ioctl. To switch to real-time mode, send ABSTIME 0 event. The followed
+events will be processed in real-time without queued. To switch off the
+real-time mode, send RELTIME 0 event.
+<LI>
+<TT>/proc</TT> interface:</LI>
+
+<BR>The status of applications and devices can be shown via <TT>/proc/asound/seq/oss</TT>
+at any time. In the later version, configuration will be changed via <TT>/proc</TT>
+interface, too.</UL>
+
+<H2>
+2. Installation</H2>
+Run configure script with both sequencer support (<TT>--with-sequencer=yes</TT>)
+and OSS emulation (<TT>--with-oss=yes</TT>) options. A module <TT>snd-seq-oss.o</TT>
+will be created. If the synth module of your sound card supports for OSS
+emulation (so far, only Emu8000 driver), this module will be loaded automatically.
+Otherwise, you need to load this module manually.
+<P>At beginning, this module probes all the MIDI ports which have been
+already connected to the sequencer. Once after that, the creation and deletion
+of ports are watched by announcement mechanism of ALSA sequencer.
+<P>The available synth and MIDI devices can be found in proc interface.
+Run "<TT>cat /proc/asound/seq/oss</TT>", and check the devices. For example,
+if you use an AWE64 card, you'll see like the following:
+<PRE>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; OSS sequencer emulation version 0.1.8
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ALSA client number 63
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ALSA receiver port 0
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Number of applications: 0
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Number of synth devices: 1
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; synth 0: [EMU8000]
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; type 0x1 : subtype 0x20 : voices 32
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; capabilties : ioctl enabled / load_patch enabled
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; Number of MIDI devices: 3
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; midi 0: [Emu8000 Port-0] ALSA port 65:0
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; capability write / opened none
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; midi 1: [Emu8000 Port-1] ALSA port 65:1
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; capability write / opened none
+
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; midi 2: [0: MPU-401 (UART)] ALSA port 64:0
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; capability read/write / opened none</PRE>
+Note that the device number may be different from the information of
+<TT>/proc/asound/oss-devices</TT>
+or ones of the original OSS driver. Use the device number listed in <TT>/proc/asound/seq/oss</TT>
+to play via OSS sequencer emulation.
+<H2>
+3. Using Synthesizer Devices</H2>
+Run your favorite program. I've tested playmidi-2.4, awemidi-0.4.3, gmod-3.1
+and xmp-1.1.5. You can load samples via <TT>/dev/sequencer</TT> like sfxload,
+too.
+<P>If the lowlevel driver supports multiple access to synth devices (like
+Emu8000 driver), two or more applications are allowed to run at the same
+time.
+<H2>
+4. Using MIDI Devices</H2>
+So far, only MIDI output was tested. MIDI input was not checked at all,
+but hopefully it will work. Use the device number listed in <TT>/proc/asound/seq/oss</TT>.
+Be aware that these numbers are mostly different from the list in
+<TT>/proc/asound/oss-devices</TT>.
+<H2>
+5. Module Options</H2>
+The following module options are available:
+<UL>
+<LI>
+<TT>maxqlen</TT></LI>
+
+<BR>specifies the maximum read/write queue length. This queue is private
+for OSS sequencer, so that it is independent from the queue length of ALSA
+sequencer. Default value is 1024.
+<LI>
+<TT>seq_oss_debug</TT></LI>
+
+<BR>specifies the debug level and accepts zero (= no debug message) or
+positive integer. Default value is 0.</UL>
+
+<H2>
+6. Queue Mechanism</H2>
+OSS sequencer emulation uses an ALSA priority queue. The
+events from <TT>/dev/sequencer</TT> are processed and put onto the queue
+specified by module option.
+<P>All the events from <TT>/dev/sequencer</TT> are parsed at beginning.
+The timing events are also parsed at this moment, so that the events may
+be processed in real-time. Sending an event ABSTIME 0 switches the operation
+mode to real-time mode, and sending an event RELTIME 0 switches it off.
+In the real-time mode, all events are dispatched immediately.
+<P>The queued events are dispatched to the corresponding ALSA sequencer
+ports after scheduled time by ALSA sequencer dispatcher.
+<P>If the write-queue is full, the application sleeps until a certain amount
+(as default one half) becomes empty in blocking mode. The synchronization
+to write timing was implemented, too.
+<P>The input from MIDI devices or echo-back events are stored on read FIFO
+queue. If application reads <TT>/dev/sequencer</TT> in blocking mode, the
+process will be awaked.
+
+<H2>
+7. Interface to Synthesizer Device</H2>
+
+<H3>
+7.1. Registration</H3>
+To register an OSS synthesizer device, use <TT>snd_seq_oss_synth_register</TT>
+function.
+<PRE>int snd_seq_oss_synth_register(char *name, int type, int subtype, int nvoices,
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; snd_seq_oss_callback_t *oper, void *private_data)</PRE>
+The arguments <TT>name</TT>, <TT>type</TT>, <TT>subtype</TT> and
+<TT>nvoices</TT>
+are used for making the appropriate synth_info structure for ioctl. The
+return value is an index number of this device. This index must be remembered
+for unregister. If registration is failed, -errno will be returned.
+<P>To release this device, call <TT>snd_seq_oss_synth_unregister function</TT>:
+<PRE>int snd_seq_oss_synth_unregister(int index),</PRE>
+where the <TT>index</TT> is the index number returned by register function.
+<H3>
+7.2. Callbacks</H3>
+OSS synthesizer devices have capability for sample downloading and ioctls
+like sample reset. In OSS emulation, these special features are realized
+by using callbacks. The registration argument oper is used to specify these
+callbacks. The following callback functions must be defined:
+<PRE>snd_seq_oss_callback_t:
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int (*open)(snd_seq_oss_arg_t *p, void *closure);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int (*close)(snd_seq_oss_arg_t *p);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int (*ioctl)(snd_seq_oss_arg_t *p, unsigned int cmd, unsigned long arg);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int (*load_patch)(snd_seq_oss_arg_t *p, int format, const char *buf, int offs, int count);
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int (*reset)(snd_seq_oss_arg_t *p);
+Except for <TT>open</TT> and <TT>close</TT> callbacks, they are allowed
+to be NULL.
+<P>Each callback function takes the argument type snd_seq_oss_arg_t as the
+first argument.
+<PRE>struct snd_seq_oss_arg_t {
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int app_index;
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int file_mode;
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int seq_mode;
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; snd_seq_addr_t addr;
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; void *private_data;
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; int event_passing;
+};</PRE>
+The first three fields, <TT>app_index</TT>, <TT>file_mode</TT> and
+<TT>seq_mode</TT>
+are initialized by OSS sequencer. The <TT>app_index</TT> is the application
+index which is unique to each application opening OSS sequencer. The
+<TT>file_mode</TT>
+is bit-flags indicating the file operation mode. See
+<TT>seq_oss.h</TT>
+for its meaning. The <TT>seq_mode</TT> is sequencer operation mode. In
+the current version, only <TT>SND_OSSSEQ_MODE_SYNTH</TT> is used.
+<P>The next two fields, <TT>addr</TT> and <TT>private_data</TT>, must be
+filled by the synth driver at open callback. The <TT>addr</TT> contains
+the address of ALSA sequencer port which is assigned to this device. If
+the driver allocates memory for <TT>private_data</TT>, it must be released
+in close callback by itself.
+<P>The last field, <TT>event_passing</TT>, indicates how to translate note-on
+/ off events. In <TT>PROCESS_EVENTS</TT> mode, the note 255 is regarded
+as velocity change, and key pressure event is passed to the port. In <TT>PASS_EVENTS</TT>
+mode, all note on/off events are passed to the port without modified. <TT>PROCESS_KEYPRESS</TT>
+mode checks the note above 128 and regards it as key pressure event (mainly
+for Emu8000 driver).
+<H4>
+7.2.1. Open Callback</H4>
+The <TT>open</TT> is called at each time this device is opened by an application
+using OSS sequencer. This must not be NULL. Typically, the open callback
+does the following procedure:
+<OL>
+<LI>
+Allocate private data record.</LI>
+
+<LI>
+Create an ALSA sequencer port.</LI>
+
+<LI>
+Set the new port address on arg->addr.</LI>
+
+<LI>
+Set the private data record pointer on arg->private_data.</LI>
+</OL>
+Note that the type bit-flags in port_info of this synth port must NOT contain
+<TT>TYPE_MIDI_GENERIC</TT>
+bit. Instead, <TT>TYPE_SPECIFIC</TT> should be used. Also, <TT>CAP_SUBSCRIPTION</TT>
+bit should NOT be included, too. This is necessary to tell it from other
+normal MIDI devices. If the open procedure succeeded, return zero. Otherwise,
+return -errno.
+<H4>
+7.2.2 Ioctl Callback</H4>
+The <TT>ioctl</TT> callback is called when the sequencer receives device-specific
+ioctls. The following two ioctls should be processed by this callback:
+<UL>
+<LI>
+<TT>IOCTL_SEQ_RESET_SAMPLES</TT></LI>
+
+<BR>reset all samples on memory -- return 0
+<LI>
+<TT>IOCTL_SYNTH_MEMAVL</TT></LI>
+
+<BR>return the available memory size
+<LI>
+<TT>FM_4OP_ENABLE</TT></LI>
+
+<BR>can be ignored usually</UL>
+The other ioctls are processed inside the sequencer without passing to
+the lowlevel driver.
+<H4>
+7.2.3 Load_Patch Callback</H4>
+The <TT>load_patch</TT> callback is used for sample-downloading. This callback
+must read the data on user-space and transfer to each device. Return 0
+if succeeded, and -errno if failed. The format argument is the patch key
+in patch_info record. The buf is user-space pointer where patch_info record
+is stored. The offs can be ignored. The count is total data size of this
+sample data.
+<H4>
+7.2.4 Close Callback</H4>
+The <TT>close</TT> callback is called when this device is closed by the
+application. If any private data was allocated in open callback, it must
+be released in the close callback. The deletion of ALSA port should be
+done here, too. This callback must not be NULL.
+<H4>
+7.2.5 Reset Callback</H4>
+The <TT>reset</TT> callback is called when sequencer device is reset or
+closed by applications. The callback should turn off the sounds on the
+relevant port immediately, and initialize the status of the port. If this
+callback is undefined, OSS seq sends a <TT>HEARTBEAT</TT> event to the
+port.
+<H3>
+7.3 Events</H3>
+Most of the events are processed by sequencer and translated to the adequate
+ALSA sequencer events, so that each synth device can receive by input_event
+callback of ALSA sequencer port. The following ALSA events should be implemented
+by the driver:
+<BR>&nbsp;
+<TABLE BORDER WIDTH="75%" NOSAVE >
+<TR NOSAVE>
+<TD NOSAVE><B>ALSA event</B></TD>
+
+<TD><B>Original OSS events</B></TD>
+</TR>
+
+<TR>
+<TD>NOTEON</TD>
+
+<TD>SEQ_NOTEON
+<BR>MIDI_NOTEON</TD>
+</TR>
+
+<TR>
+<TD>NOTE</TD>
+
+<TD>SEQ_NOTEOFF
+<BR>MIDI_NOTEOFF</TD>
+</TR>
+
+<TR NOSAVE>
+<TD NOSAVE>KEYPRESS</TD>
+
+<TD>MIDI_KEY_PRESSURE</TD>
+</TR>
+
+<TR NOSAVE>
+<TD>CHANPRESS</TD>
+
+<TD NOSAVE>SEQ_AFTERTOUCH
+<BR>MIDI_CHN_PRESSURE</TD>
+</TR>
+
+<TR NOSAVE>
+<TD NOSAVE>PGMCHANGE</TD>
+
+<TD NOSAVE>SEQ_PGMCHANGE
+<BR>MIDI_PGM_CHANGE</TD>
+</TR>
+
+<TR>
+<TD>PITCHBEND</TD>
+
+<TD>SEQ_CONTROLLER(CTRL_PITCH_BENDER)
+<BR>MIDI_PITCH_BEND</TD>
+</TR>
+
+<TR>
+<TD>CONTROLLER</TD>
+
+<TD>MIDI_CTL_CHANGE
+<BR>SEQ_BALANCE (with CTL_PAN)</TD>
+</TR>
+
+<TR>
+<TD>CONTROL14</TD>
+
+<TD>SEQ_CONTROLLER</TD>
+</TR>
+
+<TR>
+<TD>REGPARAM</TD>
+
+<TD>SEQ_CONTROLLER(CTRL_PITCH_BENDER_RANGE)</TD>
+</TR>
+
+<TR>
+<TD>SYSEX</TD>
+
+<TD>SEQ_SYSEX</TD>
+</TR>
+</TABLE>
+
+<P>The most of these behavior can be realized by MIDI emulation driver
+included in the Emu8000 lowlevel driver. In the future release, this module
+will be independent.
+<P>Some OSS events (<TT>SEQ_PRIVATE</TT> and <TT>SEQ_VOLUME</TT> events) are passed as event
+type SND_SEQ_OSS_PRIVATE. The OSS sequencer passes these event 8 byte
+packets without any modification. The lowlevel driver should process these
+events appropriately.
+<H2>
+8. Interface to MIDI Device</H2>
+Since the OSS emulation probes the creation and deletion of ALSA MIDI sequencer
+ports automatically by receiving announcement from ALSA sequencer, the
+MIDI devices don't need to be registered explicitly like synth devices.
+However, the MIDI port_info registered to ALSA sequencer must include a group
+name <TT>SND_SEQ_GROUP_DEVICE</TT> and a capability-bit <TT>CAP_READ</TT> or
+<TT>CAP_WRITE</TT>. Also, subscription capabilities, <TT>CAP_SUBS_READ</TT> or <TT>CAP_SUBS_WRITE</TT>,
+must be defined, too. If these conditions are not satisfied, the port is not
+registered as OSS sequencer MIDI device.
+<P>The events via MIDI devices are parsed in OSS sequencer and converted
+to the corresponding ALSA sequencer events. The input from MIDI sequencer
+is also converted to MIDI byte events by OSS sequencer. This works just
+a reverse way of seq_midi module.
+<H2>
+9. Known Problems / TODO's</H2>
+
+<UL>
+<LI>
+Patch loading via ALSA instrument layer is not implemented yet.</LI>
+</UL>
+
+</BODY>
+</HTML>
diff --git a/Documentation/sound/alsa/serial-u16550.txt b/Documentation/sound/alsa/serial-u16550.txt
new file mode 100644
index 000000000..c1919559d
--- /dev/null
+++ b/Documentation/sound/alsa/serial-u16550.txt
@@ -0,0 +1,88 @@
+
+ Serial UART 16450/16550 MIDI driver
+ ===================================
+
+The adaptor module parameter allows you to select either:
+
+ 0 - Roland Soundcanvas support (default)
+ 1 - Midiator MS-124T support (1)
+ 2 - Midiator MS-124W S/A mode (2)
+ 3 - MS-124W M/B mode support (3)
+ 4 - Generic device with multiple input support (4)
+
+For the Midiator MS-124W, you must set the physical M-S and A-B
+switches on the Midiator to match the driver mode you select.
+
+In Roland Soundcanvas mode, multiple ALSA raw MIDI substreams are supported
+(midiCnD0-midiCnD15). Whenever you write to a different substream, the driver
+sends the nonstandard MIDI command sequence F5 NN, where NN is the substream
+number plus 1. Roland modules use this command to switch between different
+"parts", so this feature lets you treat each part as a distinct raw MIDI
+substream. The driver provides no way to send F5 00 (no selection) or to not
+send the F5 NN command sequence at all; perhaps it ought to.
+
+Usage example for simple serial converter:
+
+ /sbin/setserial /dev/ttyS0 uart none
+ /sbin/modprobe snd-serial-u16550 port=0x3f8 irq=4 speed=115200
+
+Usage example for Roland SoundCanvas with 4 MIDI ports:
+
+ /sbin/setserial /dev/ttyS0 uart none
+ /sbin/modprobe snd-serial-u16550 port=0x3f8 irq=4 outs=4
+
+In MS-124T mode, one raw MIDI substream is supported (midiCnD0); the outs
+module parameter is automatically set to 1. The driver sends the same data to
+all four MIDI Out connectors. Set the A-B switch and the speed module
+parameter to match (A=19200, B=9600).
+
+Usage example for MS-124T, with A-B switch in A position:
+
+ /sbin/setserial /dev/ttyS0 uart none
+ /sbin/modprobe snd-serial-u16550 port=0x3f8 irq=4 adaptor=1 \
+ speed=19200
+
+In MS-124W S/A mode, one raw MIDI substream is supported (midiCnD0);
+the outs module parameter is automatically set to 1. The driver sends
+the same data to all four MIDI Out connectors at full MIDI speed.
+
+Usage example for S/A mode:
+
+ /sbin/setserial /dev/ttyS0 uart none
+ /sbin/modprobe snd-serial-u16550 port=0x3f8 irq=4 adaptor=2
+
+In MS-124W M/B mode, the driver supports 16 ALSA raw MIDI substreams;
+the outs module parameter is automatically set to 16. The substream
+number gives a bitmask of which MIDI Out connectors the data should be
+sent to, with midiCnD1 sending to Out 1, midiCnD2 to Out 2, midiCnD4 to
+Out 3, and midiCnD8 to Out 4. Thus midiCnD15 sends the data to all 4 ports.
+As a special case, midiCnD0 also sends to all ports, since it is not useful
+to send the data to no ports. M/B mode has extra overhead to select the MIDI
+Out for each byte, so the aggregate data rate across all four MIDI Outs is
+at most one byte every 520 us, as compared with the full MIDI data rate of
+one byte every 320 us per port.
+
+Usage example for M/B mode:
+
+ /sbin/setserial /dev/ttyS0 uart none
+ /sbin/modprobe snd-serial-u16550 port=0x3f8 irq=4 adaptor=3
+
+The MS-124W hardware's M/A mode is currently not supported. This mode allows
+the MIDI Outs to act independently at double the aggregate throughput of M/B,
+but does not allow sending the same byte simultaneously to multiple MIDI Outs.
+The M/A protocol requires the driver to twiddle the modem control lines under
+timing constraints, so it would be a bit more complicated to implement than
+the other modes.
+
+Midiator models other than MS-124W and MS-124T are currently not supported.
+Note that the suffix letter is significant; the MS-124 and MS-124B are not
+compatible, nor are the other known models MS-101, MS-101B, MS-103, and MS-114.
+I do have documentation (tim.mann@compaq.com) that partially covers these models,
+but no units to experiment with. The MS-124W support is tested with a real unit.
+The MS-124T support is untested, but should work.
+
+The Generic driver supports multiple input and output substreams over a single
+serial port. Similar to Roland Soundcanvas mode, F5 NN is used to select the
+appropriate input or output stream (depending on the data direction).
+Additionally, the CTS signal is used to regulate the data flow. The number of
+inputs is specified by the ins parameter.
diff --git a/Documentation/sound/alsa/soc/DAI.txt b/Documentation/sound/alsa/soc/DAI.txt
new file mode 100644
index 000000000..c9679264c
--- /dev/null
+++ b/Documentation/sound/alsa/soc/DAI.txt
@@ -0,0 +1,56 @@
+ASoC currently supports the three main Digital Audio Interfaces (DAI) found on
+SoC controllers and portable audio CODECs today, namely AC97, I2S and PCM.
+
+
+AC97
+====
+
+ AC97 is a five wire interface commonly found on many PC sound cards. It is
+now also popular in many portable devices. This DAI has a reset line and time
+multiplexes its data on its SDATA_OUT (playback) and SDATA_IN (capture) lines.
+The bit clock (BCLK) is always driven by the CODEC (usually 12.288MHz) and the
+frame (FRAME) (usually 48kHz) is always driven by the controller. Each AC97
+frame is 21uS long and is divided into 13 time slots.
+
+The AC97 specification can be found at :-
+http://www.intel.com/p/en_US/business/design
+
+
+I2S
+===
+
+ I2S is a common 4 wire DAI used in HiFi, STB and portable devices. The Tx and
+Rx lines are used for audio transmission, whilst the bit clock (BCLK) and
+left/right clock (LRC) synchronise the link. I2S is flexible in that either the
+controller or CODEC can drive (master) the BCLK and LRC clock lines. Bit clock
+usually varies depending on the sample rate and the master system clock
+(SYSCLK). LRCLK is the same as the sample rate. A few devices support separate
+ADC and DAC LRCLKs, this allows for simultaneous capture and playback at
+different sample rates.
+
+I2S has several different operating modes:-
+
+ o I2S - MSB is transmitted on the falling edge of the first BCLK after LRC
+ transition.
+
+ o Left Justified - MSB is transmitted on transition of LRC.
+
+ o Right Justified - MSB is transmitted sample size BCLKs before LRC
+ transition.
+
+PCM
+===
+
+PCM is another 4 wire interface, very similar to I2S, which can support a more
+flexible protocol. It has bit clock (BCLK) and sync (SYNC) lines that are used
+to synchronise the link whilst the Tx and Rx lines are used to transmit and
+receive the audio data. Bit clock usually varies depending on sample rate
+whilst sync runs at the sample rate. PCM also supports Time Division
+Multiplexing (TDM) in that several devices can use the bus simultaneously (this
+is sometimes referred to as network mode).
+
+Common PCM operating modes:-
+
+ o Mode A - MSB is transmitted on falling edge of first BCLK after FRAME/SYNC.
+
+ o Mode B - MSB is transmitted on rising edge of FRAME/SYNC.
diff --git a/Documentation/sound/alsa/soc/DPCM.txt b/Documentation/sound/alsa/soc/DPCM.txt
new file mode 100644
index 000000000..0110180b7
--- /dev/null
+++ b/Documentation/sound/alsa/soc/DPCM.txt
@@ -0,0 +1,380 @@
+Dynamic PCM
+===========
+
+1. Description
+==============
+
+Dynamic PCM allows an ALSA PCM device to digitally route its PCM audio to
+various digital endpoints during the PCM stream runtime. e.g. PCM0 can route
+digital audio to I2S DAI0, I2S DAI1 or PDM DAI2. This is useful for on SoC DSP
+drivers that expose several ALSA PCMs and can route to multiple DAIs.
+
+The DPCM runtime routing is determined by the ALSA mixer settings in the same
+way as the analog signal is routed in an ASoC codec driver. DPCM uses a DAPM
+graph representing the DSP internal audio paths and uses the mixer settings to
+determine the patch used by each ALSA PCM.
+
+DPCM re-uses all the existing component codec, platform and DAI drivers without
+any modifications.
+
+
+Phone Audio System with SoC based DSP
+-------------------------------------
+
+Consider the following phone audio subsystem. This will be used in this
+document for all examples :-
+
+| Front End PCMs | SoC DSP | Back End DAIs | Audio devices |
+
+ *************
+PCM0 <------------> * * <----DAI0-----> Codec Headset
+ * *
+PCM1 <------------> * * <----DAI1-----> Codec Speakers
+ * DSP *
+PCM2 <------------> * * <----DAI2-----> MODEM
+ * *
+PCM3 <------------> * * <----DAI3-----> BT
+ * *
+ * * <----DAI4-----> DMIC
+ * *
+ * * <----DAI5-----> FM
+ *************
+
+This diagram shows a simple smart phone audio subsystem. It supports Bluetooth,
+FM digital radio, Speakers, Headset Jack, digital microphones and cellular
+modem. This sound card exposes 4 DSP front end (FE) ALSA PCM devices and
+supports 6 back end (BE) DAIs. Each FE PCM can digitally route audio data to any
+of the BE DAIs. The FE PCM devices can also route audio to more than 1 BE DAI.
+
+
+
+Example - DPCM Switching playback from DAI0 to DAI1
+---------------------------------------------------
+
+Audio is being played to the Headset. After a while the user removes the headset
+and audio continues playing on the speakers.
+
+Playback on PCM0 to Headset would look like :-
+
+ *************
+PCM0 <============> * * <====DAI0=====> Codec Headset
+ * *
+PCM1 <------------> * * <----DAI1-----> Codec Speakers
+ * DSP *
+PCM2 <------------> * * <----DAI2-----> MODEM
+ * *
+PCM3 <------------> * * <----DAI3-----> BT
+ * *
+ * * <----DAI4-----> DMIC
+ * *
+ * * <----DAI5-----> FM
+ *************
+
+The headset is removed from the jack by user so the speakers must now be used :-
+
+ *************
+PCM0 <============> * * <----DAI0-----> Codec Headset
+ * *
+PCM1 <------------> * * <====DAI1=====> Codec Speakers
+ * DSP *
+PCM2 <------------> * * <----DAI2-----> MODEM
+ * *
+PCM3 <------------> * * <----DAI3-----> BT
+ * *
+ * * <----DAI4-----> DMIC
+ * *
+ * * <----DAI5-----> FM
+ *************
+
+The audio driver processes this as follows :-
+
+ 1) Machine driver receives Jack removal event.
+
+ 2) Machine driver OR audio HAL disables the Headset path.
+
+ 3) DPCM runs the PCM trigger(stop), hw_free(), shutdown() operations on DAI0
+ for headset since the path is now disabled.
+
+ 4) Machine driver or audio HAL enables the speaker path.
+
+ 5) DPCM runs the PCM ops for startup(), hw_params(), prepapre() and
+ trigger(start) for DAI1 Speakers since the path is enabled.
+
+In this example, the machine driver or userspace audio HAL can alter the routing
+and then DPCM will take care of managing the DAI PCM operations to either bring
+the link up or down. Audio playback does not stop during this transition.
+
+
+
+DPCM machine driver
+===================
+
+The DPCM enabled ASoC machine driver is similar to normal machine drivers
+except that we also have to :-
+
+ 1) Define the FE and BE DAI links.
+
+ 2) Define any FE/BE PCM operations.
+
+ 3) Define widget graph connections.
+
+
+1 FE and BE DAI links
+---------------------
+
+| Front End PCMs | SoC DSP | Back End DAIs | Audio devices |
+
+ *************
+PCM0 <------------> * * <----DAI0-----> Codec Headset
+ * *
+PCM1 <------------> * * <----DAI1-----> Codec Speakers
+ * DSP *
+PCM2 <------------> * * <----DAI2-----> MODEM
+ * *
+PCM3 <------------> * * <----DAI3-----> BT
+ * *
+ * * <----DAI4-----> DMIC
+ * *
+ * * <----DAI5-----> FM
+ *************
+
+For the example above we have to define 4 FE DAI links and 6 BE DAI links. The
+FE DAI links are defined as follows :-
+
+static struct snd_soc_dai_link machine_dais[] = {
+ {
+ .name = "PCM0 System",
+ .stream_name = "System Playback",
+ .cpu_dai_name = "System Pin",
+ .platform_name = "dsp-audio",
+ .codec_name = "snd-soc-dummy",
+ .codec_dai_name = "snd-soc-dummy-dai",
+ .dynamic = 1,
+ .trigger = {SND_SOC_DPCM_TRIGGER_POST, SND_SOC_DPCM_TRIGGER_POST},
+ .dpcm_playback = 1,
+ },
+ .....< other FE and BE DAI links here >
+};
+
+This FE DAI link is pretty similar to a regular DAI link except that we also
+set the DAI link to a DPCM FE with the "dynamic = 1". The supported FE stream
+directions should also be set with the "dpcm_playback" and "dpcm_capture"
+flags. There is also an option to specify the ordering of the trigger call for
+each FE. This allows the ASoC core to trigger the DSP before or after the other
+components (as some DSPs have strong requirements for the ordering DAI/DSP
+start and stop sequences).
+
+The FE DAI above sets the codec and code DAIs to dummy devices since the BE is
+dynamic and will change depending on runtime config.
+
+The BE DAIs are configured as follows :-
+
+static struct snd_soc_dai_link machine_dais[] = {
+ .....< FE DAI links here >
+ {
+ .name = "Codec Headset",
+ .cpu_dai_name = "ssp-dai.0",
+ .platform_name = "snd-soc-dummy",
+ .no_pcm = 1,
+ .codec_name = "rt5640.0-001c",
+ .codec_dai_name = "rt5640-aif1",
+ .ignore_suspend = 1,
+ .ignore_pmdown_time = 1,
+ .be_hw_params_fixup = hswult_ssp0_fixup,
+ .ops = &haswell_ops,
+ .dpcm_playback = 1,
+ .dpcm_capture = 1,
+ },
+ .....< other BE DAI links here >
+};
+
+This BE DAI link connects DAI0 to the codec (in this case RT5460 AIF1). It sets
+the "no_pcm" flag to mark it has a BE and sets flags for supported stream
+directions using "dpcm_playback" and "dpcm_capture" above.
+
+The BE has also flags set for ignoring suspend and PM down time. This allows
+the BE to work in a hostless mode where the host CPU is not transferring data
+like a BT phone call :-
+
+ *************
+PCM0 <------------> * * <----DAI0-----> Codec Headset
+ * *
+PCM1 <------------> * * <----DAI1-----> Codec Speakers
+ * DSP *
+PCM2 <------------> * * <====DAI2=====> MODEM
+ * *
+PCM3 <------------> * * <====DAI3=====> BT
+ * *
+ * * <----DAI4-----> DMIC
+ * *
+ * * <----DAI5-----> FM
+ *************
+
+This allows the host CPU to sleep whilst the DSP, MODEM DAI and the BT DAI are
+still in operation.
+
+A BE DAI link can also set the codec to a dummy device if the code is a device
+that is managed externally.
+
+Likewise a BE DAI can also set a dummy cpu DAI if the CPU DAI is managed by the
+DSP firmware.
+
+
+2 FE/BE PCM operations
+----------------------
+
+The BE above also exports some PCM operations and a "fixup" callback. The fixup
+callback is used by the machine driver to (re)configure the DAI based upon the
+FE hw params. i.e. the DSP may perform SRC or ASRC from the FE to BE.
+
+e.g. DSP converts all FE hw params to run at fixed rate of 48k, 16bit, stereo for
+DAI0. This means all FE hw_params have to be fixed in the machine driver for
+DAI0 so that the DAI is running at desired configuration regardless of the FE
+configuration.
+
+static int dai0_fixup(struct snd_soc_pcm_runtime *rtd,
+ struct snd_pcm_hw_params *params)
+{
+ struct snd_interval *rate = hw_param_interval(params,
+ SNDRV_PCM_HW_PARAM_RATE);
+ struct snd_interval *channels = hw_param_interval(params,
+ SNDRV_PCM_HW_PARAM_CHANNELS);
+
+ /* The DSP will covert the FE rate to 48k, stereo */
+ rate->min = rate->max = 48000;
+ channels->min = channels->max = 2;
+
+ /* set DAI0 to 16 bit */
+ snd_mask_set(&params->masks[SNDRV_PCM_HW_PARAM_FORMAT -
+ SNDRV_PCM_HW_PARAM_FIRST_MASK],
+ SNDRV_PCM_FORMAT_S16_LE);
+ return 0;
+}
+
+The other PCM operation are the same as for regular DAI links. Use as necessary.
+
+
+3 Widget graph connections
+--------------------------
+
+The BE DAI links will normally be connected to the graph at initialisation time
+by the ASoC DAPM core. However, if the BE codec or BE DAI is a dummy then this
+has to be set explicitly in the driver :-
+
+/* BE for codec Headset - DAI0 is dummy and managed by DSP FW */
+{"DAI0 CODEC IN", NULL, "AIF1 Capture"},
+{"AIF1 Playback", NULL, "DAI0 CODEC OUT"},
+
+
+Writing a DPCM DSP driver
+=========================
+
+The DPCM DSP driver looks much like a standard platform class ASoC driver
+combined with elements from a codec class driver. A DSP platform driver must
+implement :-
+
+ 1) Front End PCM DAIs - i.e. struct snd_soc_dai_driver.
+
+ 2) DAPM graph showing DSP audio routing from FE DAIs to BEs.
+
+ 3) DAPM widgets from DSP graph.
+
+ 4) Mixers for gains, routing, etc.
+
+ 5) DMA configuration.
+
+ 6) BE AIF widgets.
+
+Items 6 is important for routing the audio outside of the DSP. AIF need to be
+defined for each BE and each stream direction. e.g for BE DAI0 above we would
+have :-
+
+SND_SOC_DAPM_AIF_IN("DAI0 RX", NULL, 0, SND_SOC_NOPM, 0, 0),
+SND_SOC_DAPM_AIF_OUT("DAI0 TX", NULL, 0, SND_SOC_NOPM, 0, 0),
+
+The BE AIF are used to connect the DSP graph to the graphs for the other
+component drivers (e.g. codec graph).
+
+
+Hostless PCM streams
+====================
+
+A hostless PCM stream is a stream that is not routed through the host CPU. An
+example of this would be a phone call from handset to modem.
+
+
+ *************
+PCM0 <------------> * * <----DAI0-----> Codec Headset
+ * *
+PCM1 <------------> * * <====DAI1=====> Codec Speakers/Mic
+ * DSP *
+PCM2 <------------> * * <====DAI2=====> MODEM
+ * *
+PCM3 <------------> * * <----DAI3-----> BT
+ * *
+ * * <----DAI4-----> DMIC
+ * *
+ * * <----DAI5-----> FM
+ *************
+
+In this case the PCM data is routed via the DSP. The host CPU in this use case
+is only used for control and can sleep during the runtime of the stream.
+
+The host can control the hostless link either by :-
+
+ 1) Configuring the link as a CODEC <-> CODEC style link. In this case the link
+ is enabled or disabled by the state of the DAPM graph. This usually means
+ there is a mixer control that can be used to connect or disconnect the path
+ between both DAIs.
+
+ 2) Hostless FE. This FE has a virtual connection to the BE DAI links on the DAPM
+ graph. Control is then carried out by the FE as regular PCM operations.
+ This method gives more control over the DAI links, but requires much more
+ userspace code to control the link. Its recommended to use CODEC<->CODEC
+ unless your HW needs more fine grained sequencing of the PCM ops.
+
+
+CODEC <-> CODEC link
+--------------------
+
+This DAI link is enabled when DAPM detects a valid path within the DAPM graph.
+The machine driver sets some additional parameters to the DAI link i.e.
+
+static const struct snd_soc_pcm_stream dai_params = {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .rate_min = 8000,
+ .rate_max = 8000,
+ .channels_min = 2,
+ .channels_max = 2,
+};
+
+static struct snd_soc_dai_link dais[] = {
+ < ... more DAI links above ... >
+ {
+ .name = "MODEM",
+ .stream_name = "MODEM",
+ .cpu_dai_name = "dai2",
+ .codec_dai_name = "modem-aif1",
+ .codec_name = "modem",
+ .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF
+ | SND_SOC_DAIFMT_CBM_CFM,
+ .params = &dai_params,
+ }
+ < ... more DAI links here ... >
+
+These parameters are used to configure the DAI hw_params() when DAPM detects a
+valid path and then calls the PCM operations to start the link. DAPM will also
+call the appropriate PCM operations to disable the DAI when the path is no
+longer valid.
+
+
+Hostless FE
+-----------
+
+The DAI link(s) are enabled by a FE that does not read or write any PCM data.
+This means creating a new FE that is connected with a virtual path to both
+DAI links. The DAI links will be started when the FE PCM is started and stopped
+when the FE PCM is stopped. Note that the FE PCM cannot read or write data in
+this configuration.
+
+
diff --git a/Documentation/sound/alsa/soc/clocking.txt b/Documentation/sound/alsa/soc/clocking.txt
new file mode 100644
index 000000000..b1300162e
--- /dev/null
+++ b/Documentation/sound/alsa/soc/clocking.txt
@@ -0,0 +1,51 @@
+Audio Clocking
+==============
+
+This text describes the audio clocking terms in ASoC and digital audio in
+general. Note: Audio clocking can be complex!
+
+
+Master Clock
+------------
+
+Every audio subsystem is driven by a master clock (sometimes referred to as MCLK
+or SYSCLK). This audio master clock can be derived from a number of sources
+(e.g. crystal, PLL, CPU clock) and is responsible for producing the correct
+audio playback and capture sample rates.
+
+Some master clocks (e.g. PLLs and CPU based clocks) are configurable in that
+their speed can be altered by software (depending on the system use and to save
+power). Other master clocks are fixed at a set frequency (i.e. crystals).
+
+
+DAI Clocks
+----------
+The Digital Audio Interface is usually driven by a Bit Clock (often referred to
+as BCLK). This clock is used to drive the digital audio data across the link
+between the codec and CPU.
+
+The DAI also has a frame clock to signal the start of each audio frame. This
+clock is sometimes referred to as LRC (left right clock) or FRAME. This clock
+runs at exactly the sample rate (LRC = Rate).
+
+Bit Clock can be generated as follows:-
+
+BCLK = MCLK / x
+
+ or
+
+BCLK = LRC * x
+
+ or
+
+BCLK = LRC * Channels * Word Size
+
+This relationship depends on the codec or SoC CPU in particular. In general
+it is best to configure BCLK to the lowest possible speed (depending on your
+rate, number of channels and word size) to save on power.
+
+It is also desirable to use the codec (if possible) to drive (or master) the
+audio clocks as it usually gives more accurate sample rates than the CPU.
+
+
+
diff --git a/Documentation/sound/alsa/soc/codec.txt b/Documentation/sound/alsa/soc/codec.txt
new file mode 100644
index 000000000..db5f9c9ae
--- /dev/null
+++ b/Documentation/sound/alsa/soc/codec.txt
@@ -0,0 +1,179 @@
+ASoC Codec Class Driver
+=======================
+
+The codec class driver is generic and hardware independent code that configures
+the codec, FM, MODEM, BT or external DSP to provide audio capture and playback.
+It should contain no code that is specific to the target platform or machine.
+All platform and machine specific code should be added to the platform and
+machine drivers respectively.
+
+Each codec class driver *must* provide the following features:-
+
+ 1) Codec DAI and PCM configuration
+ 2) Codec control IO - using RegMap API
+ 3) Mixers and audio controls
+ 4) Codec audio operations
+ 5) DAPM description.
+ 6) DAPM event handler.
+
+Optionally, codec drivers can also provide:-
+
+ 7) DAC Digital mute control.
+
+Its probably best to use this guide in conjunction with the existing codec
+driver code in sound/soc/codecs/
+
+ASoC Codec driver breakdown
+===========================
+
+1 - Codec DAI and PCM configuration
+-----------------------------------
+Each codec driver must have a struct snd_soc_dai_driver to define its DAI and
+PCM capabilities and operations. This struct is exported so that it can be
+registered with the core by your machine driver.
+
+e.g.
+
+static struct snd_soc_dai_ops wm8731_dai_ops = {
+ .prepare = wm8731_pcm_prepare,
+ .hw_params = wm8731_hw_params,
+ .shutdown = wm8731_shutdown,
+ .digital_mute = wm8731_mute,
+ .set_sysclk = wm8731_set_dai_sysclk,
+ .set_fmt = wm8731_set_dai_fmt,
+};
+
+struct snd_soc_dai_driver wm8731_dai = {
+ .name = "wm8731-hifi",
+ .playback = {
+ .stream_name = "Playback",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = WM8731_RATES,
+ .formats = WM8731_FORMATS,},
+ .capture = {
+ .stream_name = "Capture",
+ .channels_min = 1,
+ .channels_max = 2,
+ .rates = WM8731_RATES,
+ .formats = WM8731_FORMATS,},
+ .ops = &wm8731_dai_ops,
+ .symmetric_rates = 1,
+};
+
+
+2 - Codec control IO
+--------------------
+The codec can usually be controlled via an I2C or SPI style interface
+(AC97 combines control with data in the DAI). The codec driver should use the
+Regmap API for all codec IO. Please see include/linux/regmap.h and existing
+codec drivers for example regmap usage.
+
+
+3 - Mixers and audio controls
+-----------------------------
+All the codec mixers and audio controls can be defined using the convenience
+macros defined in soc.h.
+
+ #define SOC_SINGLE(xname, reg, shift, mask, invert)
+
+Defines a single control as follows:-
+
+ xname = Control name e.g. "Playback Volume"
+ reg = codec register
+ shift = control bit(s) offset in register
+ mask = control bit size(s) e.g. mask of 7 = 3 bits
+ invert = the control is inverted
+
+Other macros include:-
+
+ #define SOC_DOUBLE(xname, reg, shift_left, shift_right, mask, invert)
+
+A stereo control
+
+ #define SOC_DOUBLE_R(xname, reg_left, reg_right, shift, mask, invert)
+
+A stereo control spanning 2 registers
+
+ #define SOC_ENUM_SINGLE(xreg, xshift, xmask, xtexts)
+
+Defines an single enumerated control as follows:-
+
+ xreg = register
+ xshift = control bit(s) offset in register
+ xmask = control bit(s) size
+ xtexts = pointer to array of strings that describe each setting
+
+ #define SOC_ENUM_DOUBLE(xreg, xshift_l, xshift_r, xmask, xtexts)
+
+Defines a stereo enumerated control
+
+
+4 - Codec Audio Operations
+--------------------------
+The codec driver also supports the following ALSA PCM operations:-
+
+/* SoC audio ops */
+struct snd_soc_ops {
+ int (*startup)(struct snd_pcm_substream *);
+ void (*shutdown)(struct snd_pcm_substream *);
+ int (*hw_params)(struct snd_pcm_substream *, struct snd_pcm_hw_params *);
+ int (*hw_free)(struct snd_pcm_substream *);
+ int (*prepare)(struct snd_pcm_substream *);
+};
+
+Please refer to the ALSA driver PCM documentation for details.
+http://www.alsa-project.org/~iwai/writing-an-alsa-driver/
+
+
+5 - DAPM description.
+---------------------
+The Dynamic Audio Power Management description describes the codec power
+components and their relationships and registers to the ASoC core.
+Please read dapm.txt for details of building the description.
+
+Please also see the examples in other codec drivers.
+
+
+6 - DAPM event handler
+----------------------
+This function is a callback that handles codec domain PM calls and system
+domain PM calls (e.g. suspend and resume). It is used to put the codec
+to sleep when not in use.
+
+Power states:-
+
+ SNDRV_CTL_POWER_D0: /* full On */
+ /* vref/mid, clk and osc on, active */
+
+ SNDRV_CTL_POWER_D1: /* partial On */
+ SNDRV_CTL_POWER_D2: /* partial On */
+
+ SNDRV_CTL_POWER_D3hot: /* Off, with power */
+ /* everything off except vref/vmid, inactive */
+
+ SNDRV_CTL_POWER_D3cold: /* Everything Off, without power */
+
+
+7 - Codec DAC digital mute control
+----------------------------------
+Most codecs have a digital mute before the DACs that can be used to
+minimise any system noise. The mute stops any digital data from
+entering the DAC.
+
+A callback can be created that is called by the core for each codec DAI
+when the mute is applied or freed.
+
+i.e.
+
+static int wm8974_mute(struct snd_soc_dai *dai, int mute)
+{
+ struct snd_soc_codec *codec = dai->codec;
+ u16 mute_reg = snd_soc_read(codec, WM8974_DAC) & 0xffbf;
+
+ if (mute)
+ snd_soc_write(codec, WM8974_DAC, mute_reg | 0x40);
+ else
+ snd_soc_write(codec, WM8974_DAC, mute_reg);
+ return 0;
+}
diff --git a/Documentation/sound/alsa/soc/dapm.txt b/Documentation/sound/alsa/soc/dapm.txt
new file mode 100644
index 000000000..6faab4880
--- /dev/null
+++ b/Documentation/sound/alsa/soc/dapm.txt
@@ -0,0 +1,305 @@
+Dynamic Audio Power Management for Portable Devices
+===================================================
+
+1. Description
+==============
+
+Dynamic Audio Power Management (DAPM) is designed to allow portable
+Linux devices to use the minimum amount of power within the audio
+subsystem at all times. It is independent of other kernel PM and as
+such, can easily co-exist with the other PM systems.
+
+DAPM is also completely transparent to all user space applications as
+all power switching is done within the ASoC core. No code changes or
+recompiling are required for user space applications. DAPM makes power
+switching decisions based upon any audio stream (capture/playback)
+activity and audio mixer settings within the device.
+
+DAPM spans the whole machine. It covers power control within the entire
+audio subsystem, this includes internal codec power blocks and machine
+level power systems.
+
+There are 4 power domains within DAPM
+
+ 1. Codec bias domain - VREF, VMID (core codec and audio power)
+ Usually controlled at codec probe/remove and suspend/resume, although
+ can be set at stream time if power is not needed for sidetone, etc.
+
+ 2. Platform/Machine domain - physically connected inputs and outputs
+ Is platform/machine and user action specific, is configured by the
+ machine driver and responds to asynchronous events e.g when HP
+ are inserted
+
+ 3. Path domain - audio subsystem signal paths
+ Automatically set when mixer and mux settings are changed by the user.
+ e.g. alsamixer, amixer.
+
+ 4. Stream domain - DACs and ADCs.
+ Enabled and disabled when stream playback/capture is started and
+ stopped respectively. e.g. aplay, arecord.
+
+All DAPM power switching decisions are made automatically by consulting an audio
+routing map of the whole machine. This map is specific to each machine and
+consists of the interconnections between every audio component (including
+internal codec components). All audio components that effect power are called
+widgets hereafter.
+
+
+2. DAPM Widgets
+===============
+
+Audio DAPM widgets fall into a number of types:-
+
+ o Mixer - Mixes several analog signals into a single analog signal.
+ o Mux - An analog switch that outputs only one of many inputs.
+ o PGA - A programmable gain amplifier or attenuation widget.
+ o ADC - Analog to Digital Converter
+ o DAC - Digital to Analog Converter
+ o Switch - An analog switch
+ o Input - A codec input pin
+ o Output - A codec output pin
+ o Headphone - Headphone (and optional Jack)
+ o Mic - Mic (and optional Jack)
+ o Line - Line Input/Output (and optional Jack)
+ o Speaker - Speaker
+ o Supply - Power or clock supply widget used by other widgets.
+ o Regulator - External regulator that supplies power to audio components.
+ o Clock - External clock that supplies clock to audio components.
+ o AIF IN - Audio Interface Input (with TDM slot mask).
+ o AIF OUT - Audio Interface Output (with TDM slot mask).
+ o Siggen - Signal Generator.
+ o DAI IN - Digital Audio Interface Input.
+ o DAI OUT - Digital Audio Interface Output.
+ o DAI Link - DAI Link between two DAI structures */
+ o Pre - Special PRE widget (exec before all others)
+ o Post - Special POST widget (exec after all others)
+
+(Widgets are defined in include/sound/soc-dapm.h)
+
+Widgets can be added to the sound card by any of the component driver types.
+There are convenience macros defined in soc-dapm.h that can be used to quickly
+build a list of widgets of the codecs and machines DAPM widgets.
+
+Most widgets have a name, register, shift and invert. Some widgets have extra
+parameters for stream name and kcontrols.
+
+
+2.1 Stream Domain Widgets
+-------------------------
+
+Stream Widgets relate to the stream power domain and only consist of ADCs
+(analog to digital converters), DACs (digital to analog converters),
+AIF IN and AIF OUT.
+
+Stream widgets have the following format:-
+
+SND_SOC_DAPM_DAC(name, stream name, reg, shift, invert),
+SND_SOC_DAPM_AIF_IN(name, stream, slot, reg, shift, invert)
+
+NOTE: the stream name must match the corresponding stream name in your codec
+snd_soc_codec_dai.
+
+e.g. stream widgets for HiFi playback and capture
+
+SND_SOC_DAPM_DAC("HiFi DAC", "HiFi Playback", REG, 3, 1),
+SND_SOC_DAPM_ADC("HiFi ADC", "HiFi Capture", REG, 2, 1),
+
+e.g. stream widgets for AIF
+
+SND_SOC_DAPM_AIF_IN("AIF1RX", "AIF1 Playback", 0, SND_SOC_NOPM, 0, 0),
+SND_SOC_DAPM_AIF_OUT("AIF1TX", "AIF1 Capture", 0, SND_SOC_NOPM, 0, 0),
+
+
+2.2 Path Domain Widgets
+-----------------------
+
+Path domain widgets have a ability to control or affect the audio signal or
+audio paths within the audio subsystem. They have the following form:-
+
+SND_SOC_DAPM_PGA(name, reg, shift, invert, controls, num_controls)
+
+Any widget kcontrols can be set using the controls and num_controls members.
+
+e.g. Mixer widget (the kcontrols are declared first)
+
+/* Output Mixer */
+static const snd_kcontrol_new_t wm8731_output_mixer_controls[] = {
+SOC_DAPM_SINGLE("Line Bypass Switch", WM8731_APANA, 3, 1, 0),
+SOC_DAPM_SINGLE("Mic Sidetone Switch", WM8731_APANA, 5, 1, 0),
+SOC_DAPM_SINGLE("HiFi Playback Switch", WM8731_APANA, 4, 1, 0),
+};
+
+SND_SOC_DAPM_MIXER("Output Mixer", WM8731_PWR, 4, 1, wm8731_output_mixer_controls,
+ ARRAY_SIZE(wm8731_output_mixer_controls)),
+
+If you dont want the mixer elements prefixed with the name of the mixer widget,
+you can use SND_SOC_DAPM_MIXER_NAMED_CTL instead. the parameters are the same
+as for SND_SOC_DAPM_MIXER.
+
+
+2.3 Machine domain Widgets
+--------------------------
+
+Machine widgets are different from codec widgets in that they don't have a
+codec register bit associated with them. A machine widget is assigned to each
+machine audio component (non codec or DSP) that can be independently
+powered. e.g.
+
+ o Speaker Amp
+ o Microphone Bias
+ o Jack connectors
+
+A machine widget can have an optional call back.
+
+e.g. Jack connector widget for an external Mic that enables Mic Bias
+when the Mic is inserted:-
+
+static int spitz_mic_bias(struct snd_soc_dapm_widget* w, int event)
+{
+ gpio_set_value(SPITZ_GPIO_MIC_BIAS, SND_SOC_DAPM_EVENT_ON(event));
+ return 0;
+}
+
+SND_SOC_DAPM_MIC("Mic Jack", spitz_mic_bias),
+
+
+2.4 Codec (BIAS) Domain
+-----------------------
+
+The codec bias power domain has no widgets and is handled by the codecs DAPM
+event handler. This handler is called when the codec powerstate is changed wrt
+to any stream event or by kernel PM events.
+
+
+2.5 Virtual Widgets
+-------------------
+
+Sometimes widgets exist in the codec or machine audio map that don't have any
+corresponding soft power control. In this case it is necessary to create
+a virtual widget - a widget with no control bits e.g.
+
+SND_SOC_DAPM_MIXER("AC97 Mixer", SND_SOC_DAPM_NOPM, 0, 0, NULL, 0),
+
+This can be used to merge to signal paths together in software.
+
+After all the widgets have been defined, they can then be added to the DAPM
+subsystem individually with a call to snd_soc_dapm_new_control().
+
+
+3. Codec/DSP Widget Interconnections
+====================================
+
+Widgets are connected to each other within the codec, platform and machine by
+audio paths (called interconnections). Each interconnection must be defined in
+order to create a map of all audio paths between widgets.
+
+This is easiest with a diagram of the codec or DSP (and schematic of the machine
+audio system), as it requires joining widgets together via their audio signal
+paths.
+
+e.g., from the WM8731 output mixer (wm8731.c)
+
+The WM8731 output mixer has 3 inputs (sources)
+
+ 1. Line Bypass Input
+ 2. DAC (HiFi playback)
+ 3. Mic Sidetone Input
+
+Each input in this example has a kcontrol associated with it (defined in example
+above) and is connected to the output mixer via its kcontrol name. We can now
+connect the destination widget (wrt audio signal) with its source widgets.
+
+ /* output mixer */
+ {"Output Mixer", "Line Bypass Switch", "Line Input"},
+ {"Output Mixer", "HiFi Playback Switch", "DAC"},
+ {"Output Mixer", "Mic Sidetone Switch", "Mic Bias"},
+
+So we have :-
+
+ Destination Widget <=== Path Name <=== Source Widget
+
+Or:-
+
+ Sink, Path, Source
+
+Or :-
+
+ "Output Mixer" is connected to the "DAC" via the "HiFi Playback Switch".
+
+When there is no path name connecting widgets (e.g. a direct connection) we
+pass NULL for the path name.
+
+Interconnections are created with a call to:-
+
+snd_soc_dapm_connect_input(codec, sink, path, source);
+
+Finally, snd_soc_dapm_new_widgets(codec) must be called after all widgets and
+interconnections have been registered with the core. This causes the core to
+scan the codec and machine so that the internal DAPM state matches the
+physical state of the machine.
+
+
+3.1 Machine Widget Interconnections
+-----------------------------------
+Machine widget interconnections are created in the same way as codec ones and
+directly connect the codec pins to machine level widgets.
+
+e.g. connects the speaker out codec pins to the internal speaker.
+
+ /* ext speaker connected to codec pins LOUT2, ROUT2 */
+ {"Ext Spk", NULL , "ROUT2"},
+ {"Ext Spk", NULL , "LOUT2"},
+
+This allows the DAPM to power on and off pins that are connected (and in use)
+and pins that are NC respectively.
+
+
+4 Endpoint Widgets
+===================
+An endpoint is a start or end point (widget) of an audio signal within the
+machine and includes the codec. e.g.
+
+ o Headphone Jack
+ o Internal Speaker
+ o Internal Mic
+ o Mic Jack
+ o Codec Pins
+
+Endpoints are added to the DAPM graph so that their usage can be determined in
+order to save power. e.g. NC codecs pins will be switched OFF, unconnected
+jacks can also be switched OFF.
+
+
+5 DAPM Widget Events
+====================
+
+Some widgets can register their interest with the DAPM core in PM events.
+e.g. A Speaker with an amplifier registers a widget so the amplifier can be
+powered only when the spk is in use.
+
+/* turn speaker amplifier on/off depending on use */
+static int corgi_amp_event(struct snd_soc_dapm_widget *w, int event)
+{
+ gpio_set_value(CORGI_GPIO_APM_ON, SND_SOC_DAPM_EVENT_ON(event));
+ return 0;
+}
+
+/* corgi machine dapm widgets */
+static const struct snd_soc_dapm_widget wm8731_dapm_widgets =
+ SND_SOC_DAPM_SPK("Ext Spk", corgi_amp_event);
+
+Please see soc-dapm.h for all other widgets that support events.
+
+
+5.1 Event types
+---------------
+
+The following event types are supported by event widgets.
+
+/* dapm event types */
+#define SND_SOC_DAPM_PRE_PMU 0x1 /* before widget power up */
+#define SND_SOC_DAPM_POST_PMU 0x2 /* after widget power up */
+#define SND_SOC_DAPM_PRE_PMD 0x4 /* before widget power down */
+#define SND_SOC_DAPM_POST_PMD 0x8 /* after widget power down */
+#define SND_SOC_DAPM_PRE_REG 0x10 /* before audio path setup */
+#define SND_SOC_DAPM_POST_REG 0x20 /* after audio path setup */
diff --git a/Documentation/sound/alsa/soc/jack.txt b/Documentation/sound/alsa/soc/jack.txt
new file mode 100644
index 000000000..fcf82a417
--- /dev/null
+++ b/Documentation/sound/alsa/soc/jack.txt
@@ -0,0 +1,71 @@
+ASoC jack detection
+===================
+
+ALSA has a standard API for representing physical jacks to user space,
+the kernel side of which can be seen in include/sound/jack.h. ASoC
+provides a version of this API adding two additional features:
+
+ - It allows more than one jack detection method to work together on one
+ user visible jack. In embedded systems it is common for multiple
+ to be present on a single jack but handled by separate bits of
+ hardware.
+
+ - Integration with DAPM, allowing DAPM endpoints to be updated
+ automatically based on the detected jack status (eg, turning off the
+ headphone outputs if no headphones are present).
+
+This is done by splitting the jacks up into three things working
+together: the jack itself represented by a struct snd_soc_jack, sets of
+snd_soc_jack_pins representing DAPM endpoints to update and blocks of
+code providing jack reporting mechanisms.
+
+For example, a system may have a stereo headset jack with two reporting
+mechanisms, one for the headphone and one for the microphone. Some
+systems won't be able to use their speaker output while a headphone is
+connected and so will want to make sure to update both speaker and
+headphone when the headphone jack status changes.
+
+The jack - struct snd_soc_jack
+==============================
+
+This represents a physical jack on the system and is what is visible to
+user space. The jack itself is completely passive, it is set up by the
+machine driver and updated by jack detection methods.
+
+Jacks are created by the machine driver calling snd_soc_jack_new().
+
+snd_soc_jack_pin
+================
+
+These represent a DAPM pin to update depending on some of the status
+bits supported by the jack. Each snd_soc_jack has zero or more of these
+which are updated automatically. They are created by the machine driver
+and associated with the jack using snd_soc_jack_add_pins(). The status
+of the endpoint may configured to be the opposite of the jack status if
+required (eg, enabling a built in microphone if a microphone is not
+connected via a jack).
+
+Jack detection methods
+======================
+
+Actual jack detection is done by code which is able to monitor some
+input to the system and update a jack by calling snd_soc_jack_report(),
+specifying a subset of bits to update. The jack detection code should
+be set up by the machine driver, taking configuration for the jack to
+update and the set of things to report when the jack is connected.
+
+Often this is done based on the status of a GPIO - a handler for this is
+provided by the snd_soc_jack_add_gpio() function. Other methods are
+also available, for example integrated into CODECs. One example of
+CODEC integrated jack detection can be see in the WM8350 driver.
+
+Each jack may have multiple reporting mechanisms, though it will need at
+least one to be useful.
+
+Machine drivers
+===============
+
+These are all hooked together by the machine driver depending on the
+system hardware. The machine driver will set up the snd_soc_jack and
+the list of pins to update then set up one or more jack detection
+mechanisms to update that jack based on their current status.
diff --git a/Documentation/sound/alsa/soc/machine.txt b/Documentation/sound/alsa/soc/machine.txt
new file mode 100644
index 000000000..74056dba5
--- /dev/null
+++ b/Documentation/sound/alsa/soc/machine.txt
@@ -0,0 +1,93 @@
+ASoC Machine Driver
+===================
+
+The ASoC machine (or board) driver is the code that glues together all the
+component drivers (e.g. codecs, platforms and DAIs). It also describes the
+relationships between each componnent which include audio paths, GPIOs,
+interrupts, clocking, jacks and voltage regulators.
+
+The machine driver can contain codec and platform specific code. It registers
+the audio subsystem with the kernel as a platform device and is represented by
+the following struct:-
+
+/* SoC machine */
+struct snd_soc_card {
+ char *name;
+
+ ...
+
+ int (*probe)(struct platform_device *pdev);
+ int (*remove)(struct platform_device *pdev);
+
+ /* the pre and post PM functions are used to do any PM work before and
+ * after the codec and DAIs do any PM work. */
+ int (*suspend_pre)(struct platform_device *pdev, pm_message_t state);
+ int (*suspend_post)(struct platform_device *pdev, pm_message_t state);
+ int (*resume_pre)(struct platform_device *pdev);
+ int (*resume_post)(struct platform_device *pdev);
+
+ ...
+
+ /* CPU <--> Codec DAI links */
+ struct snd_soc_dai_link *dai_link;
+ int num_links;
+
+ ...
+};
+
+probe()/remove()
+----------------
+probe/remove are optional. Do any machine specific probe here.
+
+
+suspend()/resume()
+------------------
+The machine driver has pre and post versions of suspend and resume to take care
+of any machine audio tasks that have to be done before or after the codec, DAIs
+and DMA is suspended and resumed. Optional.
+
+
+Machine DAI Configuration
+-------------------------
+The machine DAI configuration glues all the codec and CPU DAIs together. It can
+also be used to set up the DAI system clock and for any machine related DAI
+initialisation e.g. the machine audio map can be connected to the codec audio
+map, unconnected codec pins can be set as such.
+
+struct snd_soc_dai_link is used to set up each DAI in your machine. e.g.
+
+/* corgi digital audio interface glue - connects codec <--> CPU */
+static struct snd_soc_dai_link corgi_dai = {
+ .name = "WM8731",
+ .stream_name = "WM8731",
+ .cpu_dai_name = "pxa-is2-dai",
+ .codec_dai_name = "wm8731-hifi",
+ .platform_name = "pxa-pcm-audio",
+ .codec_name = "wm8713-codec.0-001a",
+ .init = corgi_wm8731_init,
+ .ops = &corgi_ops,
+};
+
+struct snd_soc_card then sets up the machine with its DAIs. e.g.
+
+/* corgi audio machine driver */
+static struct snd_soc_card snd_soc_corgi = {
+ .name = "Corgi",
+ .dai_link = &corgi_dai,
+ .num_links = 1,
+};
+
+
+Machine Power Map
+-----------------
+
+The machine driver can optionally extend the codec power map and to become an
+audio power map of the audio subsystem. This allows for automatic power up/down
+of speaker/HP amplifiers, etc. Codec pins can be connected to the machines jack
+sockets in the machine init function.
+
+
+Machine Controls
+----------------
+
+Machine specific audio mixer controls can be added in the DAI init function.
diff --git a/Documentation/sound/alsa/soc/overview.txt b/Documentation/sound/alsa/soc/overview.txt
new file mode 100644
index 000000000..ff88f52ee
--- /dev/null
+++ b/Documentation/sound/alsa/soc/overview.txt
@@ -0,0 +1,95 @@
+ALSA SoC Layer
+==============
+
+The overall project goal of the ALSA System on Chip (ASoC) layer is to
+provide better ALSA support for embedded system-on-chip processors (e.g.
+pxa2xx, au1x00, iMX, etc) and portable audio codecs. Prior to the ASoC
+subsystem there was some support in the kernel for SoC audio, however it
+had some limitations:-
+
+ * Codec drivers were often tightly coupled to the underlying SoC
+ CPU. This is not ideal and leads to code duplication - for example,
+ Linux had different wm8731 drivers for 4 different SoC platforms.
+
+ * There was no standard method to signal user initiated audio events (e.g.
+ Headphone/Mic insertion, Headphone/Mic detection after an insertion
+ event). These are quite common events on portable devices and often require
+ machine specific code to re-route audio, enable amps, etc., after such an
+ event.
+
+ * Drivers tended to power up the entire codec when playing (or
+ recording) audio. This is fine for a PC, but tends to waste a lot of
+ power on portable devices. There was also no support for saving
+ power via changing codec oversampling rates, bias currents, etc.
+
+
+ASoC Design
+===========
+
+The ASoC layer is designed to address these issues and provide the following
+features :-
+
+ * Codec independence. Allows reuse of codec drivers on other platforms
+ and machines.
+
+ * Easy I2S/PCM audio interface setup between codec and SoC. Each SoC
+ interface and codec registers its audio interface capabilities with the
+ core and are subsequently matched and configured when the application
+ hardware parameters are known.
+
+ * Dynamic Audio Power Management (DAPM). DAPM automatically sets the codec to
+ its minimum power state at all times. This includes powering up/down
+ internal power blocks depending on the internal codec audio routing and any
+ active streams.
+
+ * Pop and click reduction. Pops and clicks can be reduced by powering the
+ codec up/down in the correct sequence (including using digital mute). ASoC
+ signals the codec when to change power states.
+
+ * Machine specific controls: Allow machines to add controls to the sound card
+ (e.g. volume control for speaker amplifier).
+
+To achieve all this, ASoC basically splits an embedded audio system into
+multiple re-usable component drivers :-
+
+ * Codec class drivers: The codec class driver is platform independent and
+ contains audio controls, audio interface capabilities, codec DAPM
+ definition and codec IO functions. This class extends to BT, FM and MODEM
+ ICs if required. Codec class drivers should be generic code that can run
+ on any architecture and machine.
+
+ * Platform class drivers: The platform class driver includes the audio DMA
+ engine driver, digital audio interface (DAI) drivers (e.g. I2S, AC97, PCM)
+ and any audio DSP drivers for that platform.
+
+ * Machine class driver: The machine driver class acts as the glue that
+ decribes and binds the other component drivers together to form an ALSA
+ "sound card device". It handles any machine specific controls and
+ machine level audio events (e.g. turning on an amp at start of playback).
+
+
+Documentation
+=============
+
+The documentation is spilt into the following sections:-
+
+overview.txt: This file.
+
+codec.txt: Codec driver internals.
+
+DAI.txt: Description of Digital Audio Interface standards and how to configure
+a DAI within your codec and CPU DAI drivers.
+
+dapm.txt: Dynamic Audio Power Management
+
+platform.txt: Platform audio DMA and DAI.
+
+machine.txt: Machine driver internals.
+
+pop_clicks.txt: How to minimise audio artifacts.
+
+clocking.txt: ASoC clocking for best power performance.
+
+jack.txt: ASoC jack detection.
+
+DPCM.txt: Dynamic PCM - Describes DPCM with DSP examples.
diff --git a/Documentation/sound/alsa/soc/platform.txt b/Documentation/sound/alsa/soc/platform.txt
new file mode 100644
index 000000000..3a08a2c91
--- /dev/null
+++ b/Documentation/sound/alsa/soc/platform.txt
@@ -0,0 +1,79 @@
+ASoC Platform Driver
+====================
+
+An ASoC platform driver class can be divided into audio DMA drivers, SoC DAI
+drivers and DSP drivers. The platform drivers only target the SoC CPU and must
+have no board specific code.
+
+Audio DMA
+=========
+
+The platform DMA driver optionally supports the following ALSA operations:-
+
+/* SoC audio ops */
+struct snd_soc_ops {
+ int (*startup)(struct snd_pcm_substream *);
+ void (*shutdown)(struct snd_pcm_substream *);
+ int (*hw_params)(struct snd_pcm_substream *, struct snd_pcm_hw_params *);
+ int (*hw_free)(struct snd_pcm_substream *);
+ int (*prepare)(struct snd_pcm_substream *);
+ int (*trigger)(struct snd_pcm_substream *, int);
+};
+
+The platform driver exports its DMA functionality via struct
+snd_soc_platform_driver:-
+
+struct snd_soc_platform_driver {
+ char *name;
+
+ int (*probe)(struct platform_device *pdev);
+ int (*remove)(struct platform_device *pdev);
+ int (*suspend)(struct platform_device *pdev, struct snd_soc_cpu_dai *cpu_dai);
+ int (*resume)(struct platform_device *pdev, struct snd_soc_cpu_dai *cpu_dai);
+
+ /* pcm creation and destruction */
+ int (*pcm_new)(struct snd_card *, struct snd_soc_codec_dai *, struct snd_pcm *);
+ void (*pcm_free)(struct snd_pcm *);
+
+ /*
+ * For platform caused delay reporting.
+ * Optional.
+ */
+ snd_pcm_sframes_t (*delay)(struct snd_pcm_substream *,
+ struct snd_soc_dai *);
+
+ /* platform stream ops */
+ struct snd_pcm_ops *pcm_ops;
+};
+
+Please refer to the ALSA driver documentation for details of audio DMA.
+http://www.alsa-project.org/~iwai/writing-an-alsa-driver/
+
+An example DMA driver is soc/pxa/pxa2xx-pcm.c
+
+
+SoC DAI Drivers
+===============
+
+Each SoC DAI driver must provide the following features:-
+
+ 1) Digital audio interface (DAI) description
+ 2) Digital audio interface configuration
+ 3) PCM's description
+ 4) SYSCLK configuration
+ 5) Suspend and resume (optional)
+
+Please see codec.txt for a description of items 1 - 4.
+
+
+SoC DSP Drivers
+===============
+
+Each SoC DSP driver usually supplies the following features :-
+
+ 1) DAPM graph
+ 2) Mixer controls
+ 3) DMA IO to/from DSP buffers (if applicable)
+ 4) Definition of DSP front end (FE) PCM devices.
+
+Please see DPCM.txt for a description of item 4.
diff --git a/Documentation/sound/alsa/soc/pops_clicks.txt b/Documentation/sound/alsa/soc/pops_clicks.txt
new file mode 100644
index 000000000..e1e74daa4
--- /dev/null
+++ b/Documentation/sound/alsa/soc/pops_clicks.txt
@@ -0,0 +1,52 @@
+Audio Pops and Clicks
+=====================
+
+Pops and clicks are unwanted audio artifacts caused by the powering up and down
+of components within the audio subsystem. This is noticeable on PCs when an
+audio module is either loaded or unloaded (at module load time the sound card is
+powered up and causes a popping noise on the speakers).
+
+Pops and clicks can be more frequent on portable systems with DAPM. This is
+because the components within the subsystem are being dynamically powered
+depending on the audio usage and this can subsequently cause a small pop or
+click every time a component power state is changed.
+
+
+Minimising Playback Pops and Clicks
+===================================
+
+Playback pops in portable audio subsystems cannot be completely eliminated
+currently, however future audio codec hardware will have better pop and click
+suppression. Pops can be reduced within playback by powering the audio
+components in a specific order. This order is different for startup and
+shutdown and follows some basic rules:-
+
+ Startup Order :- DAC --> Mixers --> Output PGA --> Digital Unmute
+
+ Shutdown Order :- Digital Mute --> Output PGA --> Mixers --> DAC
+
+This assumes that the codec PCM output path from the DAC is via a mixer and then
+a PGA (programmable gain amplifier) before being output to the speakers.
+
+
+Minimising Capture Pops and Clicks
+==================================
+
+Capture artifacts are somewhat easier to get rid as we can delay activating the
+ADC until all the pops have occurred. This follows similar power rules to
+playback in that components are powered in a sequence depending upon stream
+startup or shutdown.
+
+ Startup Order - Input PGA --> Mixers --> ADC
+
+ Shutdown Order - ADC --> Mixers --> Input PGA
+
+
+Zipper Noise
+============
+An unwanted zipper noise can occur within the audio playback or capture stream
+when a volume control is changed near its maximum gain value. The zipper noise
+is heard when the gain increase or decrease changes the mean audio signal
+amplitude too quickly. It can be minimised by enabling the zero cross setting
+for each volume control. The ZC forces the gain change to occur when the signal
+crosses the zero amplitude line.
diff --git a/Documentation/sound/alsa/timestamping.txt b/Documentation/sound/alsa/timestamping.txt
new file mode 100644
index 000000000..0b191a23f
--- /dev/null
+++ b/Documentation/sound/alsa/timestamping.txt
@@ -0,0 +1,200 @@
+The ALSA API can provide two different system timestamps:
+
+- Trigger_tstamp is the system time snapshot taken when the .trigger
+callback is invoked. This snapshot is taken by the ALSA core in the
+general case, but specific hardware may have synchronization
+capabilities or conversely may only be able to provide a correct
+estimate with a delay. In the latter two cases, the low-level driver
+is responsible for updating the trigger_tstamp at the most appropriate
+and precise moment. Applications should not rely solely on the first
+trigger_tstamp but update their internal calculations if the driver
+provides a refined estimate with a delay.
+
+- tstamp is the current system timestamp updated during the last
+event or application query.
+The difference (tstamp - trigger_tstamp) defines the elapsed time.
+
+The ALSA API provides reports two basic pieces of information, avail
+and delay, which combined with the trigger and current system
+timestamps allow for applications to keep track of the 'fullness' of
+the ring buffer and the amount of queued samples.
+
+The use of these different pointers and time information depends on
+the application needs:
+
+- 'avail' reports how much can be written in the ring buffer
+- 'delay' reports the time it will take to hear a new sample after all
+queued samples have been played out.
+
+When timestamps are enabled, the avail/delay information is reported
+along with a snapshot of system time. Applications can select from
+CLOCK_REALTIME (NTP corrections including going backwards),
+CLOCK_MONOTONIC (NTP corrections but never going backwards),
+CLOCK_MONOTIC_RAW (without NTP corrections) and change the mode
+dynamically with sw_params
+
+
+The ALSA API also provide an audio_tstamp which reflects the passage
+of time as measured by different components of audio hardware. In
+ascii-art, this could be represented as follows (for the playback
+case):
+
+
+--------------------------------------------------------------> time
+ ^ ^ ^ ^ ^
+ | | | | |
+ analog link dma app FullBuffer
+ time time time time time
+ | | | | |
+ |< codec delay >|<--hw delay-->|<queued samples>|<---avail->|
+ |<----------------- delay---------------------->| |
+ |<----ring buffer length---->|
+
+The analog time is taken at the last stage of the playback, as close
+as possible to the actual transducer
+
+The link time is taken at the output of the SOC/chipset as the samples
+are pushed on a link. The link time can be directly measured if
+supported in hardware by sample counters or wallclocks (e.g. with
+HDAudio 24MHz or PTP clock for networked solutions) or indirectly
+estimated (e.g. with the frame counter in USB).
+
+The DMA time is measured using counters - typically the least reliable
+of all measurements due to the bursty natured of DMA transfers.
+
+The app time corresponds to the time tracked by an application after
+writing in the ring buffer.
+
+The application can query what the hardware supports, define which
+audio time it wants reported by selecting the relevant settings in
+audio_tstamp_config fields, get an estimate of the timestamp
+accuracy. It can also request the delay-to-analog be included in the
+measurement. Direct access to the link time is very interesting on
+platforms that provide an embedded DSP; measuring directly the link
+time with dedicated hardware, possibly synchronized with system time,
+removes the need to keep track of internal DSP processing times and
+latency.
+
+In case the application requests an audio tstamp that is not supported
+in hardware/low-level driver, the type is overridden as DEFAULT and the
+timestamp will report the DMA time based on the hw_pointer value.
+
+For backwards compatibility with previous implementations that did not
+provide timestamp selection, with a zero-valued COMPAT timestamp type
+the results will default to the HDAudio wall clock for playback
+streams and to the DMA time (hw_ptr) in all other cases.
+
+The audio timestamp accuracy can be returned to user-space, so that
+appropriate decisions are made:
+
+- for dma time (default), the granularity of the transfers can be
+ inferred from the steps between updates and in turn provide
+ information on how much the application pointer can be rewound
+ safely.
+
+- the link time can be used to track long-term drifts between audio
+ and system time using the (tstamp-trigger_tstamp)/audio_tstamp
+ ratio, the precision helps define how much smoothing/low-pass
+ filtering is required. The link time can be either reset on startup
+ or reported as is (the latter being useful to compare progress of
+ different streams - but may require the wallclock to be always
+ running and not wrap-around during idle periods). If supported in
+ hardware, the absolute link time could also be used to define a
+ precise start time (patches WIP)
+
+- including the delay in the audio timestamp may
+ counter-intuitively not increase the precision of timestamps, e.g. if a
+ codec includes variable-latency DSP processing or a chain of
+ hardware components the delay is typically not known with precision.
+
+The accuracy is reported in nanosecond units (using an unsigned 32-bit
+word), which gives a max precision of 4.29s, more than enough for
+audio applications...
+
+Due to the varied nature of timestamping needs, even for a single
+application, the audio_tstamp_config can be changed dynamically. In
+the STATUS ioctl, the parameters are read-only and do not allow for
+any application selection. To work around this limitation without
+impacting legacy applications, a new STATUS_EXT ioctl is introduced
+with read/write parameters. ALSA-lib will be modified to make use of
+STATUS_EXT and effectively deprecate STATUS.
+
+The ALSA API only allows for a single audio timestamp to be reported
+at a time. This is a conscious design decision, reading the audio
+timestamps from hardware registers or from IPC takes time, the more
+timestamps are read the more imprecise the combined measurements
+are. To avoid any interpretation issues, a single (system, audio)
+timestamp is reported. Applications that need different timestamps
+will be required to issue multiple queries and perform an
+interpolation of the results
+
+In some hardware-specific configuration, the system timestamp is
+latched by a low-level audio subsytem, and the information provided
+back to the driver. Due to potential delays in the communication with
+the hardware, there is a risk of misalignment with the avail and delay
+information. To make sure applications are not confused, a
+driver_timestamp field is added in the snd_pcm_status structure; this
+timestamp shows when the information is put together by the driver
+before returning from the STATUS and STATUS_EXT ioctl. in most cases
+this driver_timestamp will be identical to the regular system tstamp.
+
+Examples of typestamping with HDaudio:
+
+1. DMA timestamp, no compensation for DMA+analog delay
+$ ./audio_time -p --ts_type=1
+playback: systime: 341121338 nsec, audio time 342000000 nsec, systime delta -878662
+playback: systime: 426236663 nsec, audio time 427187500 nsec, systime delta -950837
+playback: systime: 597080580 nsec, audio time 598000000 nsec, systime delta -919420
+playback: systime: 682059782 nsec, audio time 683020833 nsec, systime delta -961051
+playback: systime: 852896415 nsec, audio time 853854166 nsec, systime delta -957751
+playback: systime: 937903344 nsec, audio time 938854166 nsec, systime delta -950822
+
+2. DMA timestamp, compensation for DMA+analog delay
+$ ./audio_time -p --ts_type=1 -d
+playback: systime: 341053347 nsec, audio time 341062500 nsec, systime delta -9153
+playback: systime: 426072447 nsec, audio time 426062500 nsec, systime delta 9947
+playback: systime: 596899518 nsec, audio time 596895833 nsec, systime delta 3685
+playback: systime: 681915317 nsec, audio time 681916666 nsec, systime delta -1349
+playback: systime: 852741306 nsec, audio time 852750000 nsec, systime delta -8694
+
+3. link timestamp, compensation for DMA+analog delay
+$ ./audio_time -p --ts_type=2 -d
+playback: systime: 341060004 nsec, audio time 341062791 nsec, systime delta -2787
+playback: systime: 426242074 nsec, audio time 426244875 nsec, systime delta -2801
+playback: systime: 597080992 nsec, audio time 597084583 nsec, systime delta -3591
+playback: systime: 682084512 nsec, audio time 682088291 nsec, systime delta -3779
+playback: systime: 852936229 nsec, audio time 852940916 nsec, systime delta -4687
+playback: systime: 938107562 nsec, audio time 938112708 nsec, systime delta -5146
+
+Example 1 shows that the timestamp at the DMA level is close to 1ms
+ahead of the actual playback time (as a side time this sort of
+measurement can help define rewind safeguards). Compensating for the
+DMA-link delay in example 2 helps remove the hardware buffering abut
+the information is still very jittery, with up to one sample of
+error. In example 3 where the timestamps are measured with the link
+wallclock, the timestamps show a monotonic behavior and a lower
+dispersion.
+
+Example 3 and 4 are with USB audio class. Example 3 shows a high
+offset between audio time and system time due to buffering. Example 4
+shows how compensating for the delay exposes a 1ms accuracy (due to
+the use of the frame counter by the driver)
+
+Example 3: DMA timestamp, no compensation for delay, delta of ~5ms
+$ ./audio_time -p -Dhw:1 -t1
+playback: systime: 120174019 nsec, audio time 125000000 nsec, systime delta -4825981
+playback: systime: 245041136 nsec, audio time 250000000 nsec, systime delta -4958864
+playback: systime: 370106088 nsec, audio time 375000000 nsec, systime delta -4893912
+playback: systime: 495040065 nsec, audio time 500000000 nsec, systime delta -4959935
+playback: systime: 620038179 nsec, audio time 625000000 nsec, systime delta -4961821
+playback: systime: 745087741 nsec, audio time 750000000 nsec, systime delta -4912259
+playback: systime: 870037336 nsec, audio time 875000000 nsec, systime delta -4962664
+
+Example 4: DMA timestamp, compensation for delay, delay of ~1ms
+$ ./audio_time -p -Dhw:1 -t1 -d
+playback: systime: 120190520 nsec, audio time 120000000 nsec, systime delta 190520
+playback: systime: 245036740 nsec, audio time 244000000 nsec, systime delta 1036740
+playback: systime: 370034081 nsec, audio time 369000000 nsec, systime delta 1034081
+playback: systime: 495159907 nsec, audio time 494000000 nsec, systime delta 1159907
+playback: systime: 620098824 nsec, audio time 619000000 nsec, systime delta 1098824
+playback: systime: 745031847 nsec, audio time 744000000 nsec, systime delta 1031847
diff --git a/Documentation/sound/oss/ALS b/Documentation/sound/oss/ALS
new file mode 100644
index 000000000..bf10bed45
--- /dev/null
+++ b/Documentation/sound/oss/ALS
@@ -0,0 +1,66 @@
+ALS-007/ALS-100/ALS-200 based sound cards
+=========================================
+
+Support for sound cards based around the Avance Logic
+ALS-007/ALS-100/ALS-200 chip is included. These chips are a single
+chip PnP sound solution which is mostly hardware compatible with the
+Sound Blaster 16 card, with most differences occurring in the use of
+the mixer registers. For this reason the ALS code is integrated
+as part of the Sound Blaster 16 driver (adding only 800 bytes to the
+SB16 driver).
+
+To use an ALS sound card under Linux, enable the following options as
+modules in the sound configuration section of the kernel config:
+ - 100% Sound Blaster compatibles (SB16/32/64, ESS, Jazz16) support
+ - FM synthesizer (YM3812/OPL-3) support
+ - standalone MPU401 support may be required for some cards; for the
+ ALS-007, when using isapnptools, it is required
+Since the ALS-007/100/200 are PnP cards, ISAPnP support should probably be
+compiled in. If kernel level PnP support is not included, isapnptools will
+be required to configure the card before the sound modules are loaded.
+
+When using kernel level ISAPnP, the kernel should correctly identify and
+configure all resources required by the card when the "sb" module is
+inserted. Note that the ALS-007 does not have a 16 bit DMA channel and that
+the MPU401 interface on this card uses a different interrupt to the audio
+section. This should all be correctly configured by the kernel; if problems
+with the MPU401 interface surface, try using the standalone MPU401 module,
+passing "0" as the "sb" module's "mpu_io" module parameter to prevent the
+soundblaster driver attempting to register the MPU401 itself. The onboard
+synth device can be accessed using the "opl3" module.
+
+If isapnptools is used to wake up the sound card (as in 2.2.x), the settings
+of the card's resources should be passed to the kernel modules ("sb", "opl3"
+and "mpu401") using the module parameters. When configuring an ALS-007, be
+sure to specify different IRQs for the audio and MPU401 sections - this card
+requires they be different. For "sb", "io", "irq" and "dma" should be set
+to the same values used to configure the audio section of the card with
+isapnp. "dma16" should be explicitly set to "-1" for an ALS-007 since this
+card does not have a 16 bit dma channel; if not specified the kernel will
+default to using channel 5 anyway which will cause audio not to work.
+"mpu_io" should be set to 0. The "io" parameter of the "opl3" module should
+also agree with the setting used by isapnp. To get the MPU401 interface
+working on an ALS-007 card, the "mpu401" module will be required since this
+card uses separate IRQs for the audio and MPU401 sections and there is no
+parameter available to pass a different IRQ to the "sb" driver (whose
+inbuilt MPU401 driver would otherwise be fine). Insert the mpu401 module
+passing appropriate values using the "io" and "irq" parameters.
+
+The resulting sound driver will provide the following capabilities:
+ - 8 and 16 bit audio playback
+ - 8 and 16 bit audio recording
+ - Software selection of record source (line in, CD, FM, mic, master)
+ - Record and playback of midi data via the external MPU-401
+ - Playback of midi data using inbuilt FM synthesizer
+ - Control of the ALS-007 mixer via any OSS-compatible mixer programs.
+ Controls available are Master (L&R), Line in (L&R), CD (L&R),
+ DSP/PCM/audio out (L&R), FM (L&R) and Mic in (mono).
+
+Jonathan Woithe
+jwoithe@just42.net
+30 March 1998
+
+Modified 2000-02-26 by Dave Forrest, drf5n@virginia.edu to add ALS100/ALS200
+Modified 2000-04-10 by Paul Laufer, pelaufer@csupomona.edu to add ISAPnP info.
+Modified 2000-11-19 by Jonathan Woithe, jwoithe@just42.net
+ - updated information for kernel 2.4.x.
diff --git a/Documentation/sound/oss/AudioExcelDSP16 b/Documentation/sound/oss/AudioExcelDSP16
new file mode 100644
index 000000000..ea8549fae
--- /dev/null
+++ b/Documentation/sound/oss/AudioExcelDSP16
@@ -0,0 +1,101 @@
+Driver
+------
+
+Information about Audio Excel DSP 16 driver can be found in the source
+file aedsp16.c
+Please, read the head of the source before using it. It contain useful
+information.
+
+Configuration
+-------------
+
+The Audio Excel configuration, is now done with the standard Linux setup.
+You have to configure the sound card (Sound Blaster or Microsoft Sound System)
+and, if you want it, the Roland MPU-401 (do not use the Sound Blaster MPU-401,
+SB-MPU401) in the main driver menu. Activate the lowlevel drivers then select
+the Audio Excel hardware that you want to initialize. Check the IRQ/DMA/MIRQ
+of the Audio Excel initialization: it must be the same as the SBPRO (or MSS)
+setup. If the parameters are different, correct it.
+I you own a Gallant's audio card based on SC-6600, activate the SC-6600 support.
+If you want to change the configuration of the sound board, be sure to
+check off all the configuration items before re-configure it.
+
+Module parameters
+-----------------
+To use this driver as a module, you must configure some module parameters, to
+set up I/O addresses, IRQ lines and DMA channels. Some parameters are
+mandatory while some others are optional. Here a list of parameters you can
+use with this module:
+
+Name Description
+==== ===========
+MANDATORY
+io I/O base address (0x220 or 0x240)
+irq irq line (5, 7, 9, 10 or 11)
+dma dma channel (0, 1 or 3)
+
+OPTIONAL
+mss_base I/O base address for activate MSS mode (default SBPRO)
+ (0x530 or 0xE80)
+mpu_base I/O base address for activate MPU-401 mode
+ (0x300, 0x310, 0x320 or 0x330)
+mpu_irq MPU-401 irq line (5, 7, 9, 10 or 0)
+
+A configuration file in /etc/modprobe.d/ directory will have lines like this:
+
+options opl3 io=0x388
+options ad1848 io=0x530 irq=11 dma=3
+options aedsp16 io=0x220 irq=11 dma=3 mss_base=0x530
+
+Where the aedsp16 options are the options for this driver while opl3 and
+ad1848 are the corresponding options for the MSS and OPL3 modules.
+
+Loading MSS and OPL3 needs to pre load the aedsp16 module to set up correctly
+the sound card. Installation dependencies must be written in configuration
+files under /etc/modprobe.d/ directory:
+
+softdep ad1848 pre: aedsp16
+softdep opl3 pre: aedsp16
+
+Then you must load the sound modules stack in this order:
+sound -> aedsp16 -> [ ad1848, opl3 ]
+
+With the above configuration, loading ad1848 or opl3 modules, will
+automatically load all the sound stack.
+
+Sound cards supported
+---------------------
+This driver supports the SC-6000 and SC-6600 based Gallant's sound card.
+It don't support the Audio Excel DSP 16 III (try the SC-6600 code).
+I'm working on the III version of the card: if someone have useful
+information about it, please let me know.
+For all the non-supported audio cards, you have to boot MS-DOS (or WIN95)
+activating the audio card with the MS-DOS device driver, then you have to
+<ctrl>-<alt>-<del> and boot Linux.
+Follow these steps:
+
+1) Compile Linux kernel with standard sound driver, using the emulation
+ you want, with the parameters of your audio card,
+ e.g. Microsoft Sound System irq10 dma3
+2) Install your new kernel as the default boot kernel.
+3) Boot MS-DOS and configure the audio card with the boot time device
+ driver, for MSS irq10 dma3 in our example.
+4) <ctrl>-<alt>-<del> and boot Linux. This will maintain the DOS configuration
+ and will boot the new kernel with sound driver. The sound driver will find
+ the audio card and will recognize and attach it.
+
+Reports on User successes
+-------------------------
+
+> Date: Mon, 29 Jul 1996 08:35:40 +0100
+> From: Mr S J Greenaway <sjg95@unixfe.rl.ac.uk>
+> To: riccardo@cdc8g5.cdc.polimi.it (Riccardo Facchetti)
+> Subject: Re: Audio Excel DSP 16 initialization code
+>
+> Just to let you know got my Audio Excel (emulating a MSS) working
+> with my original SB16, thanks for the driver!
+
+
+Last revised: 20 August 1998
+Riccardo Facchetti
+fizban@tin.it
diff --git a/Documentation/sound/oss/CMI8330 b/Documentation/sound/oss/CMI8330
new file mode 100644
index 000000000..8a5fd1611
--- /dev/null
+++ b/Documentation/sound/oss/CMI8330
@@ -0,0 +1,152 @@
+Documentation for CMI 8330 (SoundPRO)
+-------------------------------------
+Alessandro Zummo <azummo@ita.flashnet.it>
+
+( Be sure to read Documentation/sound/oss/SoundPro too )
+
+
+This adapter is now directly supported by the sb driver.
+
+ The only thing you have to do is to compile the kernel sound
+support as a module and to enable kernel ISAPnP support,
+as shown below.
+
+
+CONFIG_SOUND=m
+CONFIG_SOUND_SB=m
+
+CONFIG_PNP=y
+CONFIG_ISAPNP=y
+
+
+and optionally:
+
+
+CONFIG_SOUND_MPU401=m
+
+ for MPU401 support.
+
+
+(I suggest you to use "make menuconfig" or "make xconfig"
+ for a more comfortable configuration editing)
+
+
+
+Then you can do
+
+ modprobe sb
+
+and everything will be (hopefully) configured.
+
+You should get something similar in syslog:
+
+sb: CMI8330 detected.
+sb: CMI8330 sb base located at 0x220
+sb: CMI8330 mpu base located at 0x330
+sb: CMI8330 mail reports to Alessandro Zummo <azummo@ita.flashnet.it>
+sb: ISAPnP reports CMI 8330 SoundPRO at i/o 0x220, irq 7, dma 1,5
+
+
+
+
+The old documentation file follows for reference
+purposes.
+
+
+How to enable CMI 8330 (SOUNDPRO) soundchip on Linux
+------------------------------------------
+Stefan Laudat <Stefan.Laudat@asit.ro>
+
+[Note: The CMI 8338 is unrelated and is supported by cmpci.o]
+
+
+ In order to use CMI8330 under Linux you just have to use a proper isapnp.conf, a good isapnp and a little bit of patience. I use isapnp 1.17, but
+you may get a better one I guess at http://www.roestock.demon.co.uk/isapnptools/.
+
+ Of course you will have to compile kernel sound support as module, as shown below:
+
+CONFIG_SOUND=m
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_SB=m
+CONFIG_SOUND_ADLIB=m
+CONFIG_SOUND_MPU401=m
+# Mikro$chaft sound system (kinda useful here ;))
+CONFIG_SOUND_MSS=m
+
+ The /etc/isapnp.conf file will be:
+
+<snip below>
+
+
+(READPORT 0x0203)
+(ISOLATE PRESERVE)
+(IDENTIFY *)
+(VERBOSITY 2)
+(CONFLICT (IO FATAL)(IRQ FATAL)(DMA FATAL)(MEM FATAL)) # or WARNING
+(VERIFYLD N)
+
+
+# WSS
+
+(CONFIGURE CMI0001/16777472 (LD 0
+(IO 0 (SIZE 8) (BASE 0x0530))
+(IO 1 (SIZE 8) (BASE 0x0388))
+(INT 0 (IRQ 7 (MODE +E)))
+(DMA 0 (CHANNEL 0))
+(NAME "CMI0001/16777472[0]{CMI8330/C3D Audio Adapter}")
+(ACT Y)
+))
+
+# MPU
+
+(CONFIGURE CMI0001/16777472 (LD 1
+(IO 0 (SIZE 2) (BASE 0x0330))
+(INT 0 (IRQ 11 (MODE +E)))
+(NAME "CMI0001/16777472[1]{CMI8330/C3D Audio Adapter}")
+(ACT Y)
+))
+
+# Joystick
+
+(CONFIGURE CMI0001/16777472 (LD 2
+(IO 0 (SIZE 8) (BASE 0x0200))
+(NAME "CMI0001/16777472[2]{CMI8330/C3D Audio Adapter}")
+(ACT Y)
+))
+
+# SoundBlaster
+
+(CONFIGURE CMI0001/16777472 (LD 3
+(IO 0 (SIZE 16) (BASE 0x0220))
+(INT 0 (IRQ 5 (MODE +E)))
+(DMA 0 (CHANNEL 1))
+(DMA 1 (CHANNEL 5))
+(NAME "CMI0001/16777472[3]{CMI8330/C3D Audio Adapter}")
+(ACT Y)
+))
+
+
+(WAITFORKEY)
+
+<end of snip>
+
+ The module sequence is trivial:
+
+/sbin/insmod soundcore
+/sbin/insmod sound
+/sbin/insmod uart401
+# insert this first
+/sbin/insmod ad1848 io=0x530 irq=7 dma=0 soundpro=1
+# The sb module is an alternative to the ad1848 (Microsoft Sound System)
+# Anyhow, this is full duplex and has MIDI
+/sbin/insmod sb io=0x220 dma=1 dma16=5 irq=5 mpu_io=0x330
+
+
+
+Alma Chao <elysian@ethereal.torsion.org> suggests the following in
+a /etc/modprobe.d/*conf file:
+
+alias sound ad1848
+alias synth0 opl3
+options ad1848 io=0x530 irq=7 dma=0 soundpro=1
+options opl3 io=0x388
diff --git a/Documentation/sound/oss/ESS b/Documentation/sound/oss/ESS
new file mode 100644
index 000000000..bba93b4d2
--- /dev/null
+++ b/Documentation/sound/oss/ESS
@@ -0,0 +1,34 @@
+Documentation for the ESS AudioDrive chips
+
+In 2.4 kernels the SoundBlaster driver not only tries to detect an ESS chip, it
+tries to detect the type of ESS chip too. The correct detection of the chip
+doesn't always succeed however, so unless you use the kernel isapnp facilities
+(and you chip is pnp capable) the default behaviour is 2.0 behaviour which
+means: only detect ES688 and ES1688.
+
+All ESS chips now have a recording level setting. This is a need-to-have for
+people who want to use their ESS for recording sound.
+
+Every chip that's detected as a later-than-es1688 chip has a 6 bits logarithmic
+master volume control.
+
+Every chip that's detected as a ES1887 now has Full Duplex support. Made a
+little testprogram that shows that is works, haven't seen a real program that
+needs this however.
+
+For ESS chips an additional parameter "esstype" can be specified. This controls
+the (auto) detection of the ESS chips. It can have 3 kinds of values:
+
+-1 Act like 2.0 kernels: only detect ES688 or ES1688.
+0 Try to auto-detect the chip (may fail for ES1688)
+688 The chip will be treated as ES688
+1688 ,, ,, ,, ,, ,, ,, ES1688
+1868 ,, ,, ,, ,, ,, ,, ES1868
+1869 ,, ,, ,, ,, ,, ,, ES1869
+1788 ,, ,, ,, ,, ,, ,, ES1788
+1887 ,, ,, ,, ,, ,, ,, ES1887
+1888 ,, ,, ,, ,, ,, ,, ES1888
+
+Because Full Duplex is supported for ES1887 you can specify a second DMA
+channel by specifying module parameter dma16. It can be one of: 0, 1, 3 or 5.
+
diff --git a/Documentation/sound/oss/ESS1868 b/Documentation/sound/oss/ESS1868
new file mode 100644
index 000000000..55e922f21
--- /dev/null
+++ b/Documentation/sound/oss/ESS1868
@@ -0,0 +1,55 @@
+Documentation for the ESS1868F AudioDrive PnP sound card
+
+The ESS1868 sound card is a PnP ESS1688-compatible 16-bit sound card.
+
+It should be automatically detected by the Linux Kernel isapnp support when you
+load the sb.o module. Otherwise you should take care of:
+
+ * The ESS1868 does not allow use of a 16-bit DMA, thus DMA 0, 1, 2, and 3
+ may only be used.
+
+ * isapnptools version 1.14 does work with ESS1868. Earlier versions might
+ not.
+
+ * Sound support MUST be compiled as MODULES, not statically linked
+ into the kernel.
+
+
+NOTE: this is only needed when not using the kernel isapnp support!
+
+For configuring the sound card's I/O addresses, IRQ and DMA, here is a
+sample copy of the isapnp.conf directives regarding the ESS1868:
+
+(CONFIGURE ESS1868/-1 (LD 1
+(IO 0 (BASE 0x0220))
+(IO 1 (BASE 0x0388))
+(IO 2 (BASE 0x0330))
+(DMA 0 (CHANNEL 1))
+(INT 0 (IRQ 5 (MODE +E)))
+(ACT Y)
+))
+
+(for a full working isapnp.conf file, remember the
+(ISOLATE)
+(IDENTIFY *)
+at the beginning and the
+(WAITFORKEY)
+at the end.)
+
+In this setup, the main card I/O is 0x0220, FM synthesizer is 0x0388, and
+the MPU-401 MIDI port is located at 0x0330. IRQ is IRQ 5, DMA is channel 1.
+
+After configuring the sound card via isapnp, to use the card you must load
+the sound modules with the proper I/O information. Here is my setup:
+
+# ESS1868F AudioDrive initialization
+
+/sbin/modprobe sound
+/sbin/insmod uart401
+/sbin/insmod sb io=0x220 irq=5 dma=1 dma16=-1
+/sbin/insmod mpu401 io=0x330
+/sbin/insmod opl3 io=0x388
+/sbin/insmod v_midi
+
+opl3 is the FM synthesizer
+/sbin/insmod opl3 io=0x388
diff --git a/Documentation/sound/oss/Introduction b/Documentation/sound/oss/Introduction
new file mode 100644
index 000000000..42da2d8fa
--- /dev/null
+++ b/Documentation/sound/oss/Introduction
@@ -0,0 +1,459 @@
+Introduction Notes on Modular Sound Drivers and Soundcore
+Wade Hampton
+2/14/2001
+
+Purpose:
+========
+This document provides some general notes on the modular
+sound drivers and their configuration, along with the
+support modules sound.o and soundcore.o.
+
+Note, some of this probably should be added to the Sound-HOWTO!
+
+Note, soundlow.o was present with 2.2 kernels but is not
+required for 2.4.x kernels. References have been removed
+to this.
+
+
+Copying:
+========
+none
+
+
+History:
+========
+0.1.0 11/20/1998 First version, draft
+1.0.0 11/1998 Alan Cox changes, incorporation in 2.2.0
+ as Documentation/sound/oss/Introduction
+1.1.0 6/30/1999 Second version, added notes on making the drivers,
+ added info on multiple sound cards of similar types,]
+ added more diagnostics info, added info about esd.
+ added info on OSS and ALSA.
+1.1.1 19991031 Added notes on sound-slot- and sound-service.
+ (Alan Cox)
+1.1.2 20000920 Modified for Kernel 2.4 (Christoph Hellwig)
+1.1.3 20010214 Minor notes and corrections (Wade Hampton)
+ Added examples of sound-slot-0, etc.
+
+
+Modular Sound Drivers:
+======================
+
+Thanks to the GREAT work by Alan Cox (alan@lxorguk.ukuu.org.uk),
+
+[And Oleg Drokin, Thomas Sailer, Andrew Veliath and more than a few
+ others - not to mention Hannu's original code being designed well
+ enough to cope with that kind of chopping up](Alan)
+
+the standard Linux kernels support a modular sound driver. From
+Alan's comments in linux/drivers/sound/README.FIRST:
+
+ The modular sound driver patches were funded by Red Hat Software
+ (www.redhat.com). The sound driver here is thus a modified version of
+ Hannu's code. Please bear that in mind when considering the appropriate
+ forums for bug reporting.
+
+The modular sound drivers may be loaded via insmod or modprobe.
+To support all the various sound modules, there are two general
+support modules that must be loaded first:
+
+ soundcore.o: Top level handler for the sound system, provides
+ a set of functions for registration of devices
+ by type.
+
+ sound.o: Common sound functions required by all modules.
+
+For the specific sound modules (e.g., sb.o for the Soundblaster),
+read the documentation on that module to determine what options
+are available, for example IRQ, address, DMA.
+
+Warning, the options for different cards sometime use different names
+for the same or a similar feature (dma1= versus dma16=). As a last
+resort, inspect the code (search for module_param).
+
+Notes:
+
+1. There is a new OpenSource sound driver called ALSA which is
+ currently under development: http://www.alsa-project.org/
+ The ALSA drivers support some newer hardware that may not
+ be supported by this sound driver and also provide some
+ additional features.
+
+2. The commercial OSS driver may be obtained from the site:
+ http://www.opensound.com. This may be used for cards that
+ are unsupported by the kernel driver, or may be used
+ by other operating systems.
+
+3. The enlightenment sound daemon may be used for playing
+ multiple sounds at the same time via a single card, eliminating
+ some of the requirements for multiple sound card systems. For
+ more information, see: http://www.tux.org/~ricdude/EsounD.html
+ The "esd" program may be used with the real-player and mpeg
+ players like mpg123 and x11amp. The newer real-player
+ and some games even include built-in support for ESD!
+
+
+Building the Modules:
+=====================
+
+This document does not provide full details on building the
+kernel, etc. The notes below apply only to making the kernel
+sound modules. If this conflicts with the kernel's README,
+the README takes precedence.
+
+1. To make the kernel sound modules, cd to your /usr/src/linux
+ directory (typically) and type make config, make menuconfig,
+ or make xconfig (to start the command line, dialog, or x-based
+ configuration tool).
+
+2. Select the Sound option and a dialog will be displayed.
+
+3. Select M (module) for "Sound card support".
+
+4. Select your sound driver(s) as a module. For ProAudio, Sound
+ Blaster, etc., select M (module) for OSS sound modules.
+ [thanks to Marvin Stodolsky <stodolsk@erols.com>]A
+
+5. Make the kernel (e.g., make bzImage), and install the kernel.
+
+6. Make the modules and install them (make modules; make modules_install).
+
+Note, for 2.5.x kernels, make sure you have the newer module-init-tools
+installed or modules will not be loaded properly. 2.5.x requires an
+updated module-init-tools.
+
+
+Plug and Play (PnP:
+===================
+
+If the sound card is an ISA PnP card, isapnp may be used
+to configure the card. See the file isapnp.txt in the
+directory one level up (e.g., /usr/src/linux/Documentation).
+
+Also the 2.4.x kernels provide PnP capabilities, see the
+file NEWS in this directory.
+
+PCI sound cards are highly recommended, as they are far
+easier to configure and from what I have read, they use
+less resources and are more CPU efficient.
+
+
+INSMOD:
+=======
+
+If loading via insmod, the common modules must be loaded in the
+order below BEFORE loading the other sound modules. The card-specific
+modules may then be loaded (most require parameters). For example,
+I use the following via a shell script to load my SoundBlaster:
+
+SB_BASE=0x240
+SB_IRQ=9
+SB_DMA=3
+SB_DMA2=5
+SB_MPU=0x300
+#
+echo Starting sound
+/sbin/insmod soundcore
+/sbin/insmod sound
+#
+echo Starting sound blaster....
+/sbin/insmod uart401
+/sbin/insmod sb io=$SB_BASE irq=$SB_IRQ dma=$SB_DMA dma16=$SB_DMA2 mpu_io=$SB_MP
+
+When using sound as a module, I typically put these commands
+in a file such as /root/soundon.sh.
+
+
+MODPROBE:
+=========
+
+If loading via modprobe, these common files are automatically loaded when
+requested by modprobe. For example, my /etc/modprobe.d/oss.conf contains:
+
+alias sound sb
+options sb io=0x240 irq=9 dma=3 dma16=5 mpu_io=0x300
+
+All you need to do to load the module is:
+
+ /sbin/modprobe sb
+
+
+Sound Status:
+=============
+
+The status of sound may be read/checked by:
+ cat (anyfile).au >/dev/audio
+
+[WWH: This may not work properly for SoundBlaster PCI 128 cards
+such as the es1370/1 (see the es1370/1 files in this directory)
+as they do not automatically support uLaw on /dev/audio.]
+
+The status of the modules and which modules depend on
+which other modules may be checked by:
+ /sbin/lsmod
+
+/sbin/lsmod should show something like the following:
+ sb 26280 0
+ uart401 5640 0 [sb]
+ sound 57112 0 [sb uart401]
+ soundcore 1968 8 [sb sound]
+
+
+Removing Sound:
+===============
+
+Sound may be removed by using /sbin/rmmod in the reverse order
+in which you load the modules. Note, if a program has a sound device
+open (e.g., xmixer), that module (and the modules on which it
+depends) may not be unloaded.
+
+For example, I use the following to remove my Soundblaster (rmmod
+in the reverse order in which I loaded the modules):
+
+/sbin/rmmod sb
+/sbin/rmmod uart401
+/sbin/rmmod sound
+/sbin/rmmod soundcore
+
+When using sound as a module, I typically put these commands
+in a script such as /root/soundoff.sh.
+
+
+Removing Sound for use with OSS:
+================================
+
+If you get really stuck or have a card that the kernel modules
+will not support, you can get a commercial sound driver from
+http://www.opensound.com. Before loading the commercial sound
+driver, you should do the following:
+
+1. remove sound modules (detailed above)
+2. remove the sound modules from /etc/modprobe.d/*.conf
+3. move the sound modules from /lib/modules/<kernel>/misc
+ (for example, I make a /lib/modules/<kernel>/misc/tmp
+ directory and copy the sound module files to that
+ directory).
+
+
+Multiple Sound Cards:
+=====================
+
+The sound drivers will support multiple sound cards and there
+are some great applications like multitrack that support them.
+Typically, you need two sound cards of different types. Note, this
+uses more precious interrupts and DMA channels and sometimes
+can be a configuration nightmare. I have heard reports of 3-4
+sound cards (typically I only use 2). You can sometimes use
+multiple PCI sound cards of the same type.
+
+On my machine I have two sound cards (cs4232 and Soundblaster Vibra
+16). By loading sound as modules, I can control which is the first
+sound device (/dev/dsp, /dev/audio, /dev/mixer) and which is
+the second. Normally, the cs4232 (Dell sound on the motherboard)
+would be the first sound device, but I prefer the Soundblaster.
+All you have to do is to load the one you want as /dev/dsp
+first (in my case "sb") and then load the other one
+(in my case "cs4232").
+
+If you have two cards of the same type that are jumpered
+cards or different PnP revisions, you may load the same
+module twice. For example, I have a SoundBlaster vibra 16
+and an older SoundBlaster 16 (jumpers). To load the module
+twice, you need to do the following:
+
+1. Copy the sound modules to a new name. For example
+ sb.o could be copied (or symlinked) to sb1.o for the
+ second SoundBlaster.
+
+2. Make a second entry in /etc/modprobe.d/*conf, for example,
+ sound1 or sb1. This second entry should refer to the
+ new module names for example sb1, and should include
+ the I/O, etc. for the second sound card.
+
+3. Update your soundon.sh script, etc.
+
+Warning: I have never been able to get two PnP sound cards of the
+same type to load at the same time. I have tried this several times
+with the Soundblaster Vibra 16 cards. OSS has indicated that this
+is a PnP problem.... If anyone has any luck doing this, please
+send me an E-MAIL. PCI sound cards should not have this problem.a
+Since this was originally release, I have received a couple of
+mails from people who have accomplished this!
+
+NOTE: In Linux 2.4 the Sound Blaster driver (and only this one yet)
+supports multiple cards with one module by default.
+Read the file 'Soundblaster' in this directory for details.
+
+
+Sound Problems:
+===============
+
+First RTFM (including the troubleshooting section
+in the Sound-HOWTO).
+
+1) If you are having problems loading the modules (for
+ example, if you get device conflict errors) try the
+ following:
+
+ A) If you have Win95 or NT on the same computer,
+ write down what addresses, IRQ, and DMA channels
+ those were using for the same hardware. You probably
+ can use these addresses, IRQs, and DMA channels.
+ You should really do this BEFORE attempting to get
+ sound working!
+
+ B) Check (cat) /proc/interrupts, /proc/ioports,
+ and /proc/dma. Are you trying to use an address,
+ IRQ or DMA port that another device is using?
+
+ C) Check (cat) /proc/isapnp
+
+ D) Inspect your /var/log/messages file. Often that will
+ indicate what IRQ or IO port could not be obtained.
+
+ E) Try another port or IRQ. Note this may involve
+ using the PnP tools to move the sound card to
+ another location. Sometimes this is the only way
+ and it is more or less trial and error.
+
+2) If you get motor-boating (the same sound or part of a
+ sound clip repeated), you probably have either an IRQ
+ or DMA conflict. Move the card to another IRQ or DMA
+ port. This has happened to me when playing long files
+ when I had an IRQ conflict.
+
+3. If you get dropouts or pauses when playing high sample
+ rate files such as using mpg123 or x11amp/xmms, you may
+ have too slow of a CPU and may have to use the options to
+ play the files at 1/2 speed. For example, you may use
+ the -2 or -4 option on mpg123. You may also get this
+ when trying to play mpeg files stored on a CD-ROM
+ (my Toshiba T8000 PII/366 sometimes has this problem).
+
+4. If you get "cannot access device" errors, your /dev/dsp
+ files, etc. may be set to owner root, mode 600. You
+ may have to use the command:
+ chmod 666 /dev/dsp /dev/mixer /dev/audio
+
+5. If you get "device busy" errors, another program has the
+ sound device open. For example, if using the Enlightenment
+ sound daemon "esd", the "esd" program has the sound device.
+ If using "esd", please RTFM the docs on ESD. For example,
+ esddsp <program> may be used to play files via a non-esd
+ aware program.
+
+6) Ask for help on the sound list or send E-MAIL to the
+ sound driver author/maintainer.
+
+7) Turn on debug in drivers/sound/sound_config.h (DEB, DDB, MDB).
+
+8) If the system reports insufficient DMA memory then you may want to
+ load sound with the "dmabufs=1" option. Or in /etc/conf.modules add
+
+ preinstall sound dmabufs=1
+
+ This makes the sound system allocate its buffers and hang onto them.
+
+ You may also set persistent DMA when building a 2.4.x kernel.
+
+
+Configuring Sound:
+==================
+
+There are several ways of configuring your sound:
+
+1) On the kernel command line (when using the sound driver(s)
+ compiled in the kernel). Check the driver source and
+ documentation for details.
+
+2) On the command line when using insmod or in a bash script
+ using command line calls to load sound.
+
+3) In /etc/modprobe.d/*conf when using modprobe.
+
+4) Via Red Hat's GPL'd /usr/sbin/sndconfig program (text based).
+
+5) Via the OSS soundconf program (with the commercial version
+ of the OSS driver.
+
+6) By just loading the module and let isapnp do everything relevant
+ for you. This works only with a few drivers yet and - of course -
+ only with isapnp hardware.
+
+And I am sure, several other ways.
+
+Anyone want to write a linuxconf module for configuring sound?
+
+
+Module Loading:
+===============
+
+When a sound card is first referenced and sound is modular, the sound system
+will ask for the sound devices to be loaded. Initially it requests that
+the driver for the sound system is loaded. It then will ask for
+sound-slot-0, where 0 is the first sound card. (sound-slot-1 the second and
+so on). Thus you can do
+
+alias sound-slot-0 sb
+
+To load a soundblaster at this point. If the slot loading does not provide
+the desired device - for example a soundblaster does not directly provide
+a midi synth in all cases then it will request "sound-service-0-n" where n
+is
+
+ 0 Mixer
+
+ 2 MIDI
+
+ 3, 4 DSP audio
+
+
+For example, I use the following to load my Soundblaster PCI 128
+(ES 1371) card first, followed by my SoundBlaster Vibra 16 card,
+then by my TV card:
+
+# Load the Soundblaster PCI 128 as /dev/dsp, /dev/dsp1, /dev/mixer
+alias sound-slot-0 es1371
+
+# Load the Soundblaster Vibra 16 as /dev/dsp2, /dev/mixer1
+alias sound-slot-1 sb
+options sb io=0x240 irq=5 dma=1 dma16=5 mpu_io=0x330
+
+# Load the BTTV (TV card) as /dev/mixer2
+alias sound-slot-2 bttv
+alias sound-service-2-0 tvmixer
+
+pre-install bttv modprobe tuner ; modprobe tvmixer
+pre-install tvmixer modprobe msp3400; modprobe tvaudio
+options tuner debug=0 type=8
+options bttv card=0 radio=0 pll=0
+
+
+For More Information (RTFM):
+============================
+1) Information on kernel modules: manual pages for insmod and modprobe.
+
+2) Information on PnP, RTFM manual pages for isapnp.
+
+3) Sound-HOWTO and Sound-Playing-HOWTO.
+
+4) OSS's WWW site at http://www.opensound.com.
+
+5) All the files in Documentation/sound.
+
+6) The comments and code in linux/drivers/sound.
+
+7) The sndconfig and rhsound documentation from Red Hat.
+
+8) The Linux-sound mailing list: sound-list@redhat.com.
+
+9) Enlightenment documentation (for info on esd)
+ http://www.tux.org/~ricdude/EsounD.html.
+
+10) ALSA home page: http://www.alsa-project.org/
+
+
+Contact Information:
+====================
+Wade Hampton: (whampton@staffnet.com)
+
diff --git a/Documentation/sound/oss/MultiSound b/Documentation/sound/oss/MultiSound
new file mode 100644
index 000000000..c64daadd4
--- /dev/null
+++ b/Documentation/sound/oss/MultiSound
@@ -0,0 +1,1065 @@
+#! /bin/sh
+#
+# Turtle Beach MultiSound Driver Notes
+# -- Andrew Veliath <andrewtv@usa.net>
+#
+# Last update: September 10, 1998
+# Corresponding msnd driver: 0.8.3
+#
+# ** This file is a README (top part) and shell archive (bottom part).
+# The corresponding archived utility sources can be unpacked by
+# running `sh MultiSound' (the utilities are only needed for the
+# Pinnacle and Fiji cards). **
+#
+#
+# -=-=- Getting Firmware -=-=-
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# See the section `Obtaining and Creating Firmware Files' in this
+# document for instructions on obtaining the necessary firmware
+# files.
+#
+#
+# Supported Features
+# ~~~~~~~~~~~~~~~~~~
+#
+# Currently, full-duplex digital audio (/dev/dsp only, /dev/audio is
+# not currently available) and mixer functionality (/dev/mixer) are
+# supported (memory mapped digital audio is not yet supported).
+# Digital transfers and monitoring can be done as well if you have
+# the digital daughterboard (see the section on using the S/PDIF port
+# for more information).
+#
+# Support for the Turtle Beach MultiSound Hurricane architecture is
+# composed of the following modules (these can also operate compiled
+# into the kernel):
+#
+# msnd - MultiSound base (requires soundcore)
+#
+# msnd_classic - Base audio/mixer support for Classic, Monetery and
+# Tahiti cards
+#
+# msnd_pinnacle - Base audio/mixer support for Pinnacle and Fiji cards
+#
+#
+# /*(DEBLOBBED)*/
+#
+#
+# Configuring Card Resources
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# ** This section is very important, as your card may not work at all
+# or your machine may crash if you do not do this correctly. **
+#
+# * Classic/Monterey/Tahiti
+#
+# These cards are configured through the driver msnd_classic. You must
+# know the io port, then the driver will select the irq and memory resources
+# on the card. It is up to you to know if these are free locations or now,
+# a conflict can lock the machine up.
+#
+# * Pinnacle/Fiji
+#
+# The Pinnacle and Fiji cards have an extra config port, either
+# 0x250, 0x260 or 0x270. This port can be disabled to have the card
+# configured strictly through PnP, however you lose the ability to
+# access the IDE controller and joystick devices on this card when
+# using PnP. The included pinnaclecfg program in this shell archive
+# can be used to configure the card in non-PnP mode, and in PnP mode
+# you can use isapnptools. These are described briefly here.
+#
+# pinnaclecfg is not required; you can use the msnd_pinnacle module
+# to fully configure the card as well. However, pinnaclecfg can be
+# used to change the resource values of a particular device after the
+# msnd_pinnacle module has been loaded. If you are compiling the
+# driver into the kernel, you must set these values during compile
+# time, however other peripheral resource values can be changed with
+# the pinnaclecfg program after the kernel is loaded.
+#
+#
+# *** PnP mode
+#
+# Use pnpdump to obtain a sample configuration if you can; I was able
+# to obtain one with the command `pnpdump 1 0x203' -- this may vary
+# for you (running pnpdump by itself did not work for me). Then,
+# edit this file and use isapnp to uncomment and set the card values.
+# Use these values when inserting the msnd_pinnacle module. Using
+# this method, you can set the resources for the DSP and the Kurzweil
+# synth (Pinnacle). Since Linux does not directly support PnP
+# devices, you may have difficulty when using the card in PnP mode
+# when it the driver is compiled into the kernel. Using non-PnP mode
+# is preferable in this case.
+#
+# Here is an example mypinnacle.conf for isapnp that sets the card to
+# io base 0x210, irq 5 and mem 0xd8000, and also sets the Kurzweil
+# synth to 0x330 and irq 9 (may need editing for your system):
+#
+# (READPORT 0x0203)
+# (CSN 2)
+# (IDENTIFY *)
+#
+# # DSP
+# (CONFIGURE BVJ0440/-1 (LD 0
+# (INT 0 (IRQ 5 (MODE +E))) (IO 0 (BASE 0x0210)) (MEM 0 (BASE 0x0d8000))
+# (ACT Y)))
+#
+# # Kurzweil Synth (Pinnacle Only)
+# (CONFIGURE BVJ0440/-1 (LD 1
+# (IO 0 (BASE 0x0330)) (INT 0 (IRQ 9 (MODE +E)))
+# (ACT Y)))
+#
+# (WAITFORKEY)
+#
+#
+# *** Non-PnP mode
+#
+# The second way is by running the card in non-PnP mode. This
+# actually has some advantages in that you can access some other
+# devices on the card, such as the joystick and IDE controller. To
+# configure the card, unpack this shell archive and build the
+# pinnaclecfg program. Using this program, you can assign the
+# resource values to the card's devices, or disable the devices. As
+# an alternative to using pinnaclecfg, you can specify many of the
+# configuration values when loading the msnd_pinnacle module (or
+# during kernel configuration when compiling the driver into the
+# kernel).
+#
+# If you specify cfg=0x250 for the msnd_pinnacle module, it
+# automatically configure the card to the given io, irq and memory
+# values using that config port (the config port is jumper selectable
+# on the card to 0x250, 0x260 or 0x270).
+#
+# See the `msnd_pinnacle Additional Options' section below for more
+# information on these parameters (also, if you compile the driver
+# directly into the kernel, these extra parameters can be useful
+# here).
+#
+#
+# ** It is very easy to cause problems in your machine if you choose a
+# resource value which is incorrect. **
+#
+#
+# Examples
+# ~~~~~~~~
+#
+# * MultiSound Classic/Monterey/Tahiti:
+#
+# modprobe soundcore
+# insmod msnd
+# insmod msnd_classic io=0x290 irq=7 mem=0xd0000
+#
+# * MultiSound Pinnacle in PnP mode:
+#
+# modprobe soundcore
+# insmod msnd
+# isapnp mypinnacle.conf
+# insmod msnd_pinnacle io=0x210 irq=5 mem=0xd8000 <-- match mypinnacle.conf values
+#
+# * MultiSound Pinnacle in non-PnP mode (replace 0x250 with your configuration port,
+# one of 0x250, 0x260 or 0x270):
+#
+# insmod soundcore
+# insmod msnd
+# insmod msnd_pinnacle cfg=0x250 io=0x290 irq=5 mem=0xd0000
+#
+# * To use the MPU-compatible Kurzweil synth on the Pinnacle in PnP
+# mode, add the following (assumes you did `isapnp mypinnacle.conf'):
+#
+# insmod sound
+# insmod mpu401 io=0x330 irq=9 <-- match mypinnacle.conf values
+#
+# * To use the MPU-compatible Kurzweil synth on the Pinnacle in non-PnP
+# mode, add the following. Note how we first configure the peripheral's
+# resources, _then_ install a Linux driver for it:
+#
+# insmod sound
+# pinnaclecfg 0x250 mpu 0x330 9
+# insmod mpu401 io=0x330 irq=9
+#
+# -- OR you can use the following sequence without pinnaclecfg in non-PnP mode:
+#
+# insmod soundcore
+# insmod msnd
+# insmod msnd_pinnacle cfg=0x250 io=0x290 irq=5 mem=0xd0000 mpu_io=0x330 mpu_irq=9
+# insmod sound
+# insmod mpu401 io=0x330 irq=9
+#
+# * To setup the joystick port on the Pinnacle in non-PnP mode (though
+# you have to find the actual Linux joystick driver elsewhere), you
+# can use pinnaclecfg:
+#
+# pinnaclecfg 0x250 joystick 0x200
+#
+# -- OR you can configure this using msnd_pinnacle with the following:
+#
+# insmod soundcore
+# insmod msnd
+# insmod msnd_pinnacle cfg=0x250 io=0x290 irq=5 mem=0xd0000 joystick_io=0x200
+#
+#
+# msnd_classic, msnd_pinnacle Required Options
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# If the following options are not given, the module will not load.
+# Examine the kernel message log for informative error messages.
+# WARNING--probing isn't supported so try to make sure you have the
+# correct shared memory area, otherwise you may experience problems.
+#
+# io I/O base of DSP, e.g. io=0x210
+# irq IRQ number, e.g. irq=5
+# mem Shared memory area, e.g. mem=0xd8000
+#
+#
+# msnd_classic, msnd_pinnacle Additional Options
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# fifosize The digital audio FIFOs, in kilobytes. If not
+# specified, the default will be used. Increasing
+# this value will reduce the chance of a FIFO
+# underflow at the expense of increasing overall
+# latency. For example, fifosize=512 will
+# allocate 512kB read and write FIFOs (1MB total).
+# While this may reduce dropouts, a heavy machine
+# load will undoubtedly starve the FIFO of data
+# and you will eventually get dropouts. One
+# option is to alter the scheduling priority of
+# the playback process, using `nice' or some form
+# of POSIX soft real-time scheduling.
+#
+# calibrate_signal Setting this to one calibrates the ADCs to the
+# signal, zero calibrates to the card (defaults
+# to zero).
+#
+#
+# msnd_pinnacle Additional Options
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# digital Specify digital=1 to enable the S/PDIF input
+# if you have the digital daughterboard
+# adapter. This will enable access to the
+# DIGITAL1 input for the soundcard in the mixer.
+# Some mixer programs might have trouble setting
+# the DIGITAL1 source as an input. If you have
+# trouble, you can try the setdigital.c program
+# at the bottom of this document.
+#
+# cfg Non-PnP configuration port for the Pinnacle
+# and Fiji (typically 0x250, 0x260 or 0x270,
+# depending on the jumper configuration). If
+# this option is omitted, then it is assumed
+# that the card is in PnP mode, and that the
+# specified DSP resource values are already
+# configured with PnP (i.e. it won't attempt to
+# do any sort of configuration).
+#
+# When the Pinnacle is in non-PnP mode, you can use the following
+# options to configure particular devices. If a full specification
+# for a device is not given, then the device is not configured. Note
+# that you still must use a Linux driver for any of these devices
+# once their resources are setup (such as the Linux joystick driver,
+# or the MPU401 driver from OSS for the Kurzweil synth).
+#
+# mpu_io I/O port of MPU (on-board Kurzweil synth)
+# mpu_irq IRQ of MPU (on-board Kurzweil synth)
+# ide_io0 First I/O port of IDE controller
+# ide_io1 Second I/O port of IDE controller
+# ide_irq IRQ IDE controller
+# joystick_io I/O port of joystick
+#
+#
+# /*(DEBLOBBED)*/
+#
+#
+# Using Digital I/O with the S/PDIF Port
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# If you have a Pinnacle or Fiji with the digital daughterboard and
+# want to set it as the input source, you can use this program if you
+# have trouble trying to do it with a mixer program (be sure to
+# insert the module with the digital=1 option, or say Y to the option
+# during compiled-in kernel operation). Upon selection of the S/PDIF
+# port, you should be able monitor and record from it.
+#
+# There is something to note about using the S/PDIF port. Digital
+# timing is taken from the digital signal, so if a signal is not
+# connected to the port and it is selected as recording input, you
+# will find PCM playback to be distorted in playback rate. Also,
+# attempting to record at a sampling rate other than the DAT rate may
+# be problematic (i.e. trying to record at 8000Hz when the DAT signal
+# is 44100Hz). If you have a problem with this, set the recording
+# input to analog if you need to record at a rate other than that of
+# the DAT rate.
+#
+#
+# -- Shell archive attached below, just run `sh MultiSound' to extract.
+# Contains Pinnacle/Fiji utilities to convert firmware, configure
+# in non-PnP mode, and select the DIGITAL1 input for the mixer.
+#
+#
+#!/bin/sh
+# This is a shell archive (produced by GNU sharutils 4.2).
+# To extract the files from this archive, save it to some FILE, remove
+# everything before the `!/bin/sh' line above, then type `sh FILE'.
+#
+# Made on 1998-12-04 10:07 EST by <andrewtv@ztransform.velsoft.com>.
+# Source directory was `/home/andrewtv/programming/pinnacle/pinnacle'.
+#
+# Existing files will *not* be overwritten unless `-c' is specified.
+#
+# This shar contains:
+# length mode name
+# ------ ---------- ------------------------------------------
+# 2046 -rw-rw-r-- MultiSound.d/setdigital.c
+# 10235 -rw-rw-r-- MultiSound.d/pinnaclecfg.c
+# 106 -rw-rw-r-- MultiSound.d/Makefile
+# 141 -rw-rw-r-- MultiSound.d/conv.l
+# 1472 -rw-rw-r-- MultiSound.d/msndreset.c
+#
+save_IFS="${IFS}"
+IFS="${IFS}:"
+gettext_dir=FAILED
+locale_dir=FAILED
+first_param="$1"
+for dir in $PATH
+do
+ if test "$gettext_dir" = FAILED && test -f $dir/gettext \
+ && ($dir/gettext --version >/dev/null 2>&1)
+ then
+ set `$dir/gettext --version 2>&1`
+ if test "$3" = GNU
+ then
+ gettext_dir=$dir
+ fi
+ fi
+ if test "$locale_dir" = FAILED && test -f $dir/shar \
+ && ($dir/shar --print-text-domain-dir >/dev/null 2>&1)
+ then
+ locale_dir=`$dir/shar --print-text-domain-dir`
+ fi
+done
+IFS="$save_IFS"
+if test "$locale_dir" = FAILED || test "$gettext_dir" = FAILED
+then
+ echo=echo
+else
+ TEXTDOMAINDIR=$locale_dir
+ export TEXTDOMAINDIR
+ TEXTDOMAIN=sharutils
+ export TEXTDOMAIN
+ echo="$gettext_dir/gettext -s"
+fi
+touch -am 1231235999 $$.touch >/dev/null 2>&1
+if test ! -f 1231235999 && test -f $$.touch; then
+ shar_touch=touch
+else
+ shar_touch=:
+ echo
+ $echo 'WARNING: not restoring timestamps. Consider getting and'
+ $echo "installing GNU \`touch', distributed in GNU File Utilities..."
+ echo
+fi
+rm -f 1231235999 $$.touch
+#
+if mkdir _sh01426; then
+ $echo 'x -' 'creating lock directory'
+else
+ $echo 'failed to create lock directory'
+ exit 1
+fi
+# ============= MultiSound.d/setdigital.c ==============
+if test ! -d 'MultiSound.d'; then
+ $echo 'x -' 'creating directory' 'MultiSound.d'
+ mkdir 'MultiSound.d'
+fi
+if test -f 'MultiSound.d/setdigital.c' && test "$first_param" != -c; then
+ $echo 'x -' SKIPPING 'MultiSound.d/setdigital.c' '(file already exists)'
+else
+ $echo 'x -' extracting 'MultiSound.d/setdigital.c' '(text)'
+ sed 's/^X//' << 'SHAR_EOF' > 'MultiSound.d/setdigital.c' &&
+/*********************************************************************
+X *
+X * setdigital.c - sets the DIGITAL1 input for a mixer
+X *
+X * Copyright (C) 1998 Andrew Veliath
+X *
+X * This program is free software; you can redistribute it and/or modify
+X * it under the terms of the GNU General Public License as published by
+X * the Free Software Foundation; either version 2 of the License, or
+X * (at your option) any later version.
+X *
+X * This program is distributed in the hope that it will be useful,
+X * but WITHOUT ANY WARRANTY; without even the implied warranty of
+X * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+X * GNU General Public License for more details.
+X *
+X * You should have received a copy of the GNU General Public License
+X * along with this program; if not, write to the Free Software
+X * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+X *
+X ********************************************************************/
+X
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/soundcard.h>
+X
+int main(int argc, char *argv[])
+{
+X int fd;
+X unsigned long recmask, recsrc;
+X
+X if (argc != 2) {
+X fprintf(stderr, "usage: setdigital <mixer device>\n");
+X exit(1);
+X }
+X
+X if ((fd = open(argv[1], O_RDWR)) < 0) {
+X perror(argv[1]);
+X exit(1);
+X }
+X
+X if (ioctl(fd, SOUND_MIXER_READ_RECMASK, &recmask) < 0) {
+X fprintf(stderr, "error: ioctl read recording mask failed\n");
+X perror("ioctl");
+X close(fd);
+X exit(1);
+X }
+X
+X if (!(recmask & SOUND_MASK_DIGITAL1)) {
+X fprintf(stderr, "error: cannot find DIGITAL1 device in mixer\n");
+X close(fd);
+X exit(1);
+X }
+X
+X if (ioctl(fd, SOUND_MIXER_READ_RECSRC, &recsrc) < 0) {
+X fprintf(stderr, "error: ioctl read recording source failed\n");
+X perror("ioctl");
+X close(fd);
+X exit(1);
+X }
+X
+X recsrc |= SOUND_MASK_DIGITAL1;
+X
+X if (ioctl(fd, SOUND_MIXER_WRITE_RECSRC, &recsrc) < 0) {
+X fprintf(stderr, "error: ioctl write recording source failed\n");
+X perror("ioctl");
+X close(fd);
+X exit(1);
+X }
+X
+X close(fd);
+X
+X return 0;
+}
+SHAR_EOF
+ $shar_touch -am 1204092598 'MultiSound.d/setdigital.c' &&
+ chmod 0664 'MultiSound.d/setdigital.c' ||
+ $echo 'restore of' 'MultiSound.d/setdigital.c' 'failed'
+ if ( md5sum --help 2>&1 | grep 'sage: md5sum \[' ) >/dev/null 2>&1 \
+ && ( md5sum --version 2>&1 | grep -v 'textutils 1.12' ) >/dev/null; then
+ md5sum -c << SHAR_EOF >/dev/null 2>&1 \
+ || $echo 'MultiSound.d/setdigital.c:' 'MD5 check failed'
+e87217fc3e71288102ba41fd81f71ec4 MultiSound.d/setdigital.c
+SHAR_EOF
+ else
+ shar_count="`LC_ALL= LC_CTYPE= LANG= wc -c < 'MultiSound.d/setdigital.c'`"
+ test 2046 -eq "$shar_count" ||
+ $echo 'MultiSound.d/setdigital.c:' 'original size' '2046,' 'current size' "$shar_count!"
+ fi
+fi
+# ============= MultiSound.d/pinnaclecfg.c ==============
+if test -f 'MultiSound.d/pinnaclecfg.c' && test "$first_param" != -c; then
+ $echo 'x -' SKIPPING 'MultiSound.d/pinnaclecfg.c' '(file already exists)'
+else
+ $echo 'x -' extracting 'MultiSound.d/pinnaclecfg.c' '(text)'
+ sed 's/^X//' << 'SHAR_EOF' > 'MultiSound.d/pinnaclecfg.c' &&
+/*********************************************************************
+X *
+X * pinnaclecfg.c - Pinnacle/Fiji Device Configuration Program
+X *
+X * This is for NON-PnP mode only. For PnP mode, use isapnptools.
+X *
+X * This is Linux-specific, and must be run with root permissions.
+X *
+X * Part of the Turtle Beach MultiSound Sound Card Driver for Linux
+X *
+X * Copyright (C) 1998 Andrew Veliath
+X *
+X * This program is free software; you can redistribute it and/or modify
+X * it under the terms of the GNU General Public License as published by
+X * the Free Software Foundation; either version 2 of the License, or
+X * (at your option) any later version.
+X *
+X * This program is distributed in the hope that it will be useful,
+X * but WITHOUT ANY WARRANTY; without even the implied warranty of
+X * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+X * GNU General Public License for more details.
+X *
+X * You should have received a copy of the GNU General Public License
+X * along with this program; if not, write to the Free Software
+X * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+X *
+X ********************************************************************/
+X
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <asm/io.h>
+#include <asm/types.h>
+X
+#define IREG_LOGDEVICE 0x07
+#define IREG_ACTIVATE 0x30
+#define LD_ACTIVATE 0x01
+#define LD_DISACTIVATE 0x00
+#define IREG_EECONTROL 0x3F
+#define IREG_MEMBASEHI 0x40
+#define IREG_MEMBASELO 0x41
+#define IREG_MEMCONTROL 0x42
+#define IREG_MEMRANGEHI 0x43
+#define IREG_MEMRANGELO 0x44
+#define MEMTYPE_8BIT 0x00
+#define MEMTYPE_16BIT 0x02
+#define MEMTYPE_RANGE 0x00
+#define MEMTYPE_HIADDR 0x01
+#define IREG_IO0_BASEHI 0x60
+#define IREG_IO0_BASELO 0x61
+#define IREG_IO1_BASEHI 0x62
+#define IREG_IO1_BASELO 0x63
+#define IREG_IRQ_NUMBER 0x70
+#define IREG_IRQ_TYPE 0x71
+#define IRQTYPE_HIGH 0x02
+#define IRQTYPE_LOW 0x00
+#define IRQTYPE_LEVEL 0x01
+#define IRQTYPE_EDGE 0x00
+X
+#define HIBYTE(w) ((BYTE)(((WORD)(w) >> 8) & 0xFF))
+#define LOBYTE(w) ((BYTE)(w))
+#define MAKEWORD(low,hi) ((WORD)(((BYTE)(low))|(((WORD)((BYTE)(hi)))<<8)))
+X
+typedef __u8 BYTE;
+typedef __u16 USHORT;
+typedef __u16 WORD;
+X
+static int config_port = -1;
+X
+static int msnd_write_cfg(int cfg, int reg, int value)
+{
+X outb(reg, cfg);
+X outb(value, cfg + 1);
+X if (value != inb(cfg + 1)) {
+X fprintf(stderr, "error: msnd_write_cfg: I/O error\n");
+X return -EIO;
+X }
+X return 0;
+}
+X
+static int msnd_read_cfg(int cfg, int reg)
+{
+X outb(reg, cfg);
+X return inb(cfg + 1);
+}
+X
+static int msnd_write_cfg_io0(int cfg, int num, WORD io)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_IO0_BASEHI, HIBYTE(io)))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_IO0_BASELO, LOBYTE(io)))
+X return -EIO;
+X return 0;
+}
+X
+static int msnd_read_cfg_io0(int cfg, int num, WORD *io)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X
+X *io = MAKEWORD(msnd_read_cfg(cfg, IREG_IO0_BASELO),
+X msnd_read_cfg(cfg, IREG_IO0_BASEHI));
+X
+X return 0;
+}
+X
+static int msnd_write_cfg_io1(int cfg, int num, WORD io)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_IO1_BASEHI, HIBYTE(io)))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_IO1_BASELO, LOBYTE(io)))
+X return -EIO;
+X return 0;
+}
+X
+static int msnd_read_cfg_io1(int cfg, int num, WORD *io)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X
+X *io = MAKEWORD(msnd_read_cfg(cfg, IREG_IO1_BASELO),
+X msnd_read_cfg(cfg, IREG_IO1_BASEHI));
+X
+X return 0;
+}
+X
+static int msnd_write_cfg_irq(int cfg, int num, WORD irq)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_IRQ_NUMBER, LOBYTE(irq)))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_IRQ_TYPE, IRQTYPE_EDGE))
+X return -EIO;
+X return 0;
+}
+X
+static int msnd_read_cfg_irq(int cfg, int num, WORD *irq)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X
+X *irq = msnd_read_cfg(cfg, IREG_IRQ_NUMBER);
+X
+X return 0;
+}
+X
+static int msnd_write_cfg_mem(int cfg, int num, int mem)
+{
+X WORD wmem;
+X
+X mem >>= 8;
+X mem &= 0xfff;
+X wmem = (WORD)mem;
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_MEMBASEHI, HIBYTE(wmem)))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_MEMBASELO, LOBYTE(wmem)))
+X return -EIO;
+X if (wmem && msnd_write_cfg(cfg, IREG_MEMCONTROL, (MEMTYPE_HIADDR | MEMTYPE_16BIT)))
+X return -EIO;
+X return 0;
+}
+X
+static int msnd_read_cfg_mem(int cfg, int num, int *mem)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X
+X *mem = MAKEWORD(msnd_read_cfg(cfg, IREG_MEMBASELO),
+X msnd_read_cfg(cfg, IREG_MEMBASEHI));
+X *mem <<= 8;
+X
+X return 0;
+}
+X
+static int msnd_activate_logical(int cfg, int num)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_write_cfg(cfg, IREG_ACTIVATE, LD_ACTIVATE))
+X return -EIO;
+X return 0;
+}
+X
+static int msnd_write_cfg_logical(int cfg, int num, WORD io0, WORD io1, WORD irq, int mem)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_write_cfg_io0(cfg, num, io0))
+X return -EIO;
+X if (msnd_write_cfg_io1(cfg, num, io1))
+X return -EIO;
+X if (msnd_write_cfg_irq(cfg, num, irq))
+X return -EIO;
+X if (msnd_write_cfg_mem(cfg, num, mem))
+X return -EIO;
+X if (msnd_activate_logical(cfg, num))
+X return -EIO;
+X return 0;
+}
+X
+static int msnd_read_cfg_logical(int cfg, int num, WORD *io0, WORD *io1, WORD *irq, int *mem)
+{
+X if (msnd_write_cfg(cfg, IREG_LOGDEVICE, num))
+X return -EIO;
+X if (msnd_read_cfg_io0(cfg, num, io0))
+X return -EIO;
+X if (msnd_read_cfg_io1(cfg, num, io1))
+X return -EIO;
+X if (msnd_read_cfg_irq(cfg, num, irq))
+X return -EIO;
+X if (msnd_read_cfg_mem(cfg, num, mem))
+X return -EIO;
+X return 0;
+}
+X
+static void usage(void)
+{
+X fprintf(stderr,
+X "\n"
+X "pinnaclecfg 1.0\n"
+X "\n"
+X "usage: pinnaclecfg <config port> [device config]\n"
+X "\n"
+X "This is for use with the card in NON-PnP mode only.\n"
+X "\n"
+X "Available devices (not all available for Fiji):\n"
+X "\n"
+X " Device Description\n"
+X " -------------------------------------------------------------------\n"
+X " reset Reset all devices (i.e. disable)\n"
+X " show Display current device configurations\n"
+X "\n"
+X " dsp <io> <irq> <mem> Audio device\n"
+X " mpu <io> <irq> Internal Kurzweil synth\n"
+X " ide <io0> <io1> <irq> On-board IDE controller\n"
+X " joystick <io> Joystick port\n"
+X "\n");
+X exit(1);
+}
+X
+static int cfg_reset(void)
+{
+X int i;
+X
+X for (i = 0; i < 4; ++i)
+X msnd_write_cfg_logical(config_port, i, 0, 0, 0, 0);
+X
+X return 0;
+}
+X
+static int cfg_show(void)
+{
+X int i;
+X int count = 0;
+X
+X for (i = 0; i < 4; ++i) {
+X WORD io0, io1, irq;
+X int mem;
+X msnd_read_cfg_logical(config_port, i, &io0, &io1, &irq, &mem);
+X switch (i) {
+X case 0:
+X if (io0 || irq || mem) {
+X printf("dsp 0x%x %d 0x%x\n", io0, irq, mem);
+X ++count;
+X }
+X break;
+X case 1:
+X if (io0 || irq) {
+X printf("mpu 0x%x %d\n", io0, irq);
+X ++count;
+X }
+X break;
+X case 2:
+X if (io0 || io1 || irq) {
+X printf("ide 0x%x 0x%x %d\n", io0, io1, irq);
+X ++count;
+X }
+X break;
+X case 3:
+X if (io0) {
+X printf("joystick 0x%x\n", io0);
+X ++count;
+X }
+X break;
+X }
+X }
+X
+X if (count == 0)
+X fprintf(stderr, "no devices configured\n");
+X
+X return 0;
+}
+X
+static int cfg_dsp(int argc, char *argv[])
+{
+X int io, irq, mem;
+X
+X if (argc < 3 ||
+X sscanf(argv[0], "0x%x", &io) != 1 ||
+X sscanf(argv[1], "%d", &irq) != 1 ||
+X sscanf(argv[2], "0x%x", &mem) != 1)
+X usage();
+X
+X if (!(io == 0x290 ||
+X io == 0x260 ||
+X io == 0x250 ||
+X io == 0x240 ||
+X io == 0x230 ||
+X io == 0x220 ||
+X io == 0x210 ||
+X io == 0x3e0)) {
+X fprintf(stderr, "error: io must be one of "
+X "210, 220, 230, 240, 250, 260, 290, or 3E0\n");
+X usage();
+X }
+X
+X if (!(irq == 5 ||
+X irq == 7 ||
+X irq == 9 ||
+X irq == 10 ||
+X irq == 11 ||
+X irq == 12)) {
+X fprintf(stderr, "error: irq must be one of "
+X "5, 7, 9, 10, 11 or 12\n");
+X usage();
+X }
+X
+X if (!(mem == 0xb0000 ||
+X mem == 0xc8000 ||
+X mem == 0xd0000 ||
+X mem == 0xd8000 ||
+X mem == 0xe0000 ||
+X mem == 0xe8000)) {
+X fprintf(stderr, "error: mem must be one of "
+X "0xb0000, 0xc8000, 0xd0000, 0xd8000, 0xe0000 or 0xe8000\n");
+X usage();
+X }
+X
+X return msnd_write_cfg_logical(config_port, 0, io, 0, irq, mem);
+}
+X
+static int cfg_mpu(int argc, char *argv[])
+{
+X int io, irq;
+X
+X if (argc < 2 ||
+X sscanf(argv[0], "0x%x", &io) != 1 ||
+X sscanf(argv[1], "%d", &irq) != 1)
+X usage();
+X
+X return msnd_write_cfg_logical(config_port, 1, io, 0, irq, 0);
+}
+X
+static int cfg_ide(int argc, char *argv[])
+{
+X int io0, io1, irq;
+X
+X if (argc < 3 ||
+X sscanf(argv[0], "0x%x", &io0) != 1 ||
+X sscanf(argv[0], "0x%x", &io1) != 1 ||
+X sscanf(argv[1], "%d", &irq) != 1)
+X usage();
+X
+X return msnd_write_cfg_logical(config_port, 2, io0, io1, irq, 0);
+}
+X
+static int cfg_joystick(int argc, char *argv[])
+{
+X int io;
+X
+X if (argc < 1 ||
+X sscanf(argv[0], "0x%x", &io) != 1)
+X usage();
+X
+X return msnd_write_cfg_logical(config_port, 3, io, 0, 0, 0);
+}
+X
+int main(int argc, char *argv[])
+{
+X char *device;
+X int rv = 0;
+X
+X --argc; ++argv;
+X
+X if (argc < 2)
+X usage();
+X
+X sscanf(argv[0], "0x%x", &config_port);
+X if (config_port != 0x250 && config_port != 0x260 && config_port != 0x270) {
+X fprintf(stderr, "error: <config port> must be 0x250, 0x260 or 0x270\n");
+X exit(1);
+X }
+X if (ioperm(config_port, 2, 1)) {
+X perror("ioperm");
+X fprintf(stderr, "note: pinnaclecfg must be run as root\n");
+X exit(1);
+X }
+X device = argv[1];
+X
+X argc -= 2; argv += 2;
+X
+X if (strcmp(device, "reset") == 0)
+X rv = cfg_reset();
+X else if (strcmp(device, "show") == 0)
+X rv = cfg_show();
+X else if (strcmp(device, "dsp") == 0)
+X rv = cfg_dsp(argc, argv);
+X else if (strcmp(device, "mpu") == 0)
+X rv = cfg_mpu(argc, argv);
+X else if (strcmp(device, "ide") == 0)
+X rv = cfg_ide(argc, argv);
+X else if (strcmp(device, "joystick") == 0)
+X rv = cfg_joystick(argc, argv);
+X else {
+X fprintf(stderr, "error: unknown device %s\n", device);
+X usage();
+X }
+X
+X if (rv)
+X fprintf(stderr, "error: device configuration failed\n");
+X
+X return 0;
+}
+SHAR_EOF
+ $shar_touch -am 1204092598 'MultiSound.d/pinnaclecfg.c' &&
+ chmod 0664 'MultiSound.d/pinnaclecfg.c' ||
+ $echo 'restore of' 'MultiSound.d/pinnaclecfg.c' 'failed'
+ if ( md5sum --help 2>&1 | grep 'sage: md5sum \[' ) >/dev/null 2>&1 \
+ && ( md5sum --version 2>&1 | grep -v 'textutils 1.12' ) >/dev/null; then
+ md5sum -c << SHAR_EOF >/dev/null 2>&1 \
+ || $echo 'MultiSound.d/pinnaclecfg.c:' 'MD5 check failed'
+366bdf27f0db767a3c7921d0a6db20fe MultiSound.d/pinnaclecfg.c
+SHAR_EOF
+ else
+ shar_count="`LC_ALL= LC_CTYPE= LANG= wc -c < 'MultiSound.d/pinnaclecfg.c'`"
+ test 10235 -eq "$shar_count" ||
+ $echo 'MultiSound.d/pinnaclecfg.c:' 'original size' '10235,' 'current size' "$shar_count!"
+ fi
+fi
+# ============= MultiSound.d/Makefile ==============
+if test -f 'MultiSound.d/Makefile' && test "$first_param" != -c; then
+ $echo 'x -' SKIPPING 'MultiSound.d/Makefile' '(file already exists)'
+else
+ $echo 'x -' extracting 'MultiSound.d/Makefile' '(text)'
+ sed 's/^X//' << 'SHAR_EOF' > 'MultiSound.d/Makefile' &&
+CC = gcc
+CFLAGS = -O
+PROGS = setdigital msndreset pinnaclecfg conv
+X
+all: $(PROGS)
+X
+clean:
+X rm -f $(PROGS)
+SHAR_EOF
+ $shar_touch -am 1204092398 'MultiSound.d/Makefile' &&
+ chmod 0664 'MultiSound.d/Makefile' ||
+ $echo 'restore of' 'MultiSound.d/Makefile' 'failed'
+ if ( md5sum --help 2>&1 | grep 'sage: md5sum \[' ) >/dev/null 2>&1 \
+ && ( md5sum --version 2>&1 | grep -v 'textutils 1.12' ) >/dev/null; then
+ md5sum -c << SHAR_EOF >/dev/null 2>&1 \
+ || $echo 'MultiSound.d/Makefile:' 'MD5 check failed'
+76ca8bb44e3882edcf79c97df6c81845 MultiSound.d/Makefile
+SHAR_EOF
+ else
+ shar_count="`LC_ALL= LC_CTYPE= LANG= wc -c < 'MultiSound.d/Makefile'`"
+ test 106 -eq "$shar_count" ||
+ $echo 'MultiSound.d/Makefile:' 'original size' '106,' 'current size' "$shar_count!"
+ fi
+fi
+# ============= MultiSound.d/conv.l ==============
+if test -f 'MultiSound.d/conv.l' && test "$first_param" != -c; then
+ $echo 'x -' SKIPPING 'MultiSound.d/conv.l' '(file already exists)'
+else
+ $echo 'x -' extracting 'MultiSound.d/conv.l' '(text)'
+ sed 's/^X//' << 'SHAR_EOF' > 'MultiSound.d/conv.l' &&
+%%
+[ \n\t,\r]
+\;.*
+DB
+[0-9A-Fa-f]+H { int n; sscanf(yytext, "%xH", &n); printf("%c", n); }
+%%
+int yywrap() { return 1; }
+main() { yylex(); }
+SHAR_EOF
+ $shar_touch -am 0828231798 'MultiSound.d/conv.l' &&
+ chmod 0664 'MultiSound.d/conv.l' ||
+ $echo 'restore of' 'MultiSound.d/conv.l' 'failed'
+ if ( md5sum --help 2>&1 | grep 'sage: md5sum \[' ) >/dev/null 2>&1 \
+ && ( md5sum --version 2>&1 | grep -v 'textutils 1.12' ) >/dev/null; then
+ md5sum -c << SHAR_EOF >/dev/null 2>&1 \
+ || $echo 'MultiSound.d/conv.l:' 'MD5 check failed'
+d2411fc32cd71a00dcdc1f009e858dd2 MultiSound.d/conv.l
+SHAR_EOF
+ else
+ shar_count="`LC_ALL= LC_CTYPE= LANG= wc -c < 'MultiSound.d/conv.l'`"
+ test 141 -eq "$shar_count" ||
+ $echo 'MultiSound.d/conv.l:' 'original size' '141,' 'current size' "$shar_count!"
+ fi
+fi
+# ============= MultiSound.d/msndreset.c ==============
+if test -f 'MultiSound.d/msndreset.c' && test "$first_param" != -c; then
+ $echo 'x -' SKIPPING 'MultiSound.d/msndreset.c' '(file already exists)'
+else
+ $echo 'x -' extracting 'MultiSound.d/msndreset.c' '(text)'
+ sed 's/^X//' << 'SHAR_EOF' > 'MultiSound.d/msndreset.c' &&
+/*********************************************************************
+X *
+X * msndreset.c - resets the MultiSound card
+X *
+X * Copyright (C) 1998 Andrew Veliath
+X *
+X * This program is free software; you can redistribute it and/or modify
+X * it under the terms of the GNU General Public License as published by
+X * the Free Software Foundation; either version 2 of the License, or
+X * (at your option) any later version.
+X *
+X * This program is distributed in the hope that it will be useful,
+X * but WITHOUT ANY WARRANTY; without even the implied warranty of
+X * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+X * GNU General Public License for more details.
+X *
+X * You should have received a copy of the GNU General Public License
+X * along with this program; if not, write to the Free Software
+X * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+X *
+X ********************************************************************/
+X
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/soundcard.h>
+X
+int main(int argc, char *argv[])
+{
+X int fd;
+X
+X if (argc != 2) {
+X fprintf(stderr, "usage: msndreset <mixer device>\n");
+X exit(1);
+X }
+X
+X if ((fd = open(argv[1], O_RDWR)) < 0) {
+X perror(argv[1]);
+X exit(1);
+X }
+X
+X if (ioctl(fd, SOUND_MIXER_PRIVATE1, 0) < 0) {
+X fprintf(stderr, "error: msnd ioctl reset failed\n");
+X perror("ioctl");
+X close(fd);
+X exit(1);
+X }
+X
+X close(fd);
+X
+X return 0;
+}
+SHAR_EOF
+ $shar_touch -am 1204100698 'MultiSound.d/msndreset.c' &&
+ chmod 0664 'MultiSound.d/msndreset.c' ||
+ $echo 'restore of' 'MultiSound.d/msndreset.c' 'failed'
+ if ( md5sum --help 2>&1 | grep 'sage: md5sum \[' ) >/dev/null 2>&1 \
+ && ( md5sum --version 2>&1 | grep -v 'textutils 1.12' ) >/dev/null; then
+ md5sum -c << SHAR_EOF >/dev/null 2>&1 \
+ || $echo 'MultiSound.d/msndreset.c:' 'MD5 check failed'
+c52f876521084e8eb25e12e01dcccb8a MultiSound.d/msndreset.c
+SHAR_EOF
+ else
+ shar_count="`LC_ALL= LC_CTYPE= LANG= wc -c < 'MultiSound.d/msndreset.c'`"
+ test 1472 -eq "$shar_count" ||
+ $echo 'MultiSound.d/msndreset.c:' 'original size' '1472,' 'current size' "$shar_count!"
+ fi
+fi
+rm -fr _sh01426
+exit 0
diff --git a/Documentation/sound/oss/OPL3 b/Documentation/sound/oss/OPL3
new file mode 100644
index 000000000..2468ff827
--- /dev/null
+++ b/Documentation/sound/oss/OPL3
@@ -0,0 +1,6 @@
+A pure OPL3 card is nice and easy to configure. Simply do
+
+insmod opl3 io=0x388
+
+Change the I/O address in the very unlikely case this card is differently
+configured
diff --git a/Documentation/sound/oss/Opti b/Documentation/sound/oss/Opti
new file mode 100644
index 000000000..4cd5d9ab3
--- /dev/null
+++ b/Documentation/sound/oss/Opti
@@ -0,0 +1,218 @@
+Support for the OPTi 82C931 chip
+--------------------------------
+Note: parts of this README file apply also to other
+cards that use the mad16 driver.
+
+Some items in this README file are based on features
+added to the sound driver after Linux-2.1.91 was out.
+By the time of writing this I do not know which official
+kernel release will include these features.
+Please do not report inconsistencies on older Linux
+kernels.
+
+The OPTi 82C931 is supported in its non-PnP mode.
+Usually you do not need to set jumpers, etc. The sound driver
+will check the card status and if it is required it will
+force the card into a mode in which it can be programmed.
+
+If you have another OS installed on your computer it is recommended
+that Linux and the other OS use the same resources.
+
+Also, it is recommended that resources specified in /etc/modprobe.d/*.conf
+and resources specified in /etc/isapnp.conf agree.
+
+Compiling the sound driver
+--------------------------
+I highly recommend that you build a modularized sound driver.
+This document does not cover a sound-driver which is built in
+the kernel.
+
+Sound card support should be enabled as a module (chose m).
+Answer 'm' for these items:
+ Generic OPL2/OPL3 FM synthesizer support (CONFIG_SOUND_ADLIB)
+ Microsoft Sound System support (CONFIG_SOUND_MSS)
+ Support for OPTi MAD16 and/or Mozart based cards (CONFIG_SOUND_MAD16)
+ FM synthesizer (YM3812/OPL-3) support (CONFIG_SOUND_YM3812)
+
+The configuration menu may ask for addresses, IRQ lines or DMA
+channels. If the card is used as a module the module loading
+options will override these values.
+
+For the OPTi 931 you can answer 'n' to:
+ Support MIDI in older MAD16 based cards (requires SB) (CONFIG_SOUND_MAD16_OLDCARD)
+If you do need MIDI support in a Mozart or C928 based card you
+need to answer 'm' to the above question. In that case you will
+also need to answer 'm' to:
+ '100% Sound Blaster compatibles (SB16/32/64, ESS, Jazz16) support' (CONFIG_SOUND_SB)
+
+Go on and compile your kernel and modules. Install the modules. Run depmod -a.
+
+Using isapnptools
+-----------------
+In most systems with a PnP BIOS you do not need to use isapnp. The
+initialization provided by the BIOS is sufficient for the driver
+to pick up the card and continue initialization.
+
+If that fails, or if you have other PnP cards, you need to use isapnp
+to initialize the card.
+This was tested with isapnptools-1.11 but I recommend that you use
+isapnptools-1.13 (or newer). Run pnpdump to dump the information
+about your PnP cards. Then edit the resulting file and select
+the options of your choice. This file is normally installed as
+/etc/isapnp.conf.
+
+The driver has one limitation with respect to I/O port resources:
+IO3 base must be 0x0E0C. Although isapnp allows other ports, this
+address is hard-coded into the driver.
+
+Using kmod and autoloading the sound driver
+-------------------------------------------
+Config files in '/etc/modprobe.d/' are used as below:
+
+alias mixer0 mad16
+alias audio0 mad16
+alias midi0 mad16
+alias synth0 opl3
+options sb mad16=1
+options mad16 irq=10 dma=0 dma16=1 io=0x530 joystick=1 cdtype=0
+options opl3 io=0x388
+install mad16 /sbin/modprobe -i mad16 && /sbin/ad1848_mixer_reroute 14 8 15 3 16 6
+
+If you have an MPU daughtercard or onboard MPU you will want to add to the
+"options mad16" line - eg
+
+options mad16 irq=5 dma=0 dma16=3 io=0x530 mpu_io=0x330 mpu_irq=9
+
+To set the I/O and IRQ of the MPU.
+
+
+Explain:
+
+alias mixer0 mad16
+alias audio0 mad16
+alias midi0 mad16
+alias synth0 opl3
+
+When any sound device is opened the kernel requests auto-loading
+of char-major-14. There is a built-in alias that translates this
+request to loading the main sound module.
+
+The sound module in its turn will request loading of a sub-driver
+for mixer, audio, midi or synthesizer device. The first 3 are
+supported by the mad16 driver. The synth device is supported
+by the opl3 driver.
+
+There is currently no way to autoload the sound device driver
+if more than one card is installed.
+
+options sb mad16=1
+
+This is left for historical reasons. If you enable the
+config option 'Support MIDI in older MAD16 based cards (requires SB)'
+or if you use an older mad16 driver it will force loading of the
+SoundBlaster driver. This option tells the SB driver not to look
+for a SB card but to wait for the mad16 driver.
+
+options mad16 irq=10 dma=0 dma16=1 io=0x530 joystick=1 cdtype=0
+options opl3 io=0x388
+
+post-install mad16 /sbin/ad1848_mixer_reroute 14 8 15 3 16 6
+
+This sets resources and options for the mad16 and opl3 drivers.
+I use two DMA channels (only one is required) to enable full duplex.
+joystick=1 enables the joystick port. cdtype=0 disables the cd port.
+You can also set mpu_io and mpu_irq in the mad16 options for the
+uart401 driver.
+
+This tells modprobe to run /sbin/ad1848_mixer_reroute after
+mad16 is successfully loaded and initialized. The source
+for ad1848_mixer_reroute is appended to the end of this readme
+file. It is impossible for the sound driver to know the actual
+connections to the mixer. The 3 inputs intended for cd, synth
+and line-in are mapped to the generic inputs line1, line2 and
+line3. This program reroutes these mixer channels to their
+right names (note the right mapping depends on the actual sound
+card that you use).
+The numeric parameters mean:
+ 14=line1 8=cd - reroute line1 to the CD input.
+ 15=line2 3=synth - reroute line2 to the synthesizer input.
+ 16=line3 6=line - reroute line3 to the line input.
+For reference on other input names look at the file
+/usr/include/linux/soundcard.h.
+
+Using a joystick
+-----------------
+You must enable a joystick in the mad16 options. (also
+in /etc/isapnp.conf if you use it).
+Tested with regular analog joysticks.
+
+A CDROM drive connected to the sound card
+-----------------------------------------
+The 82C931 chip has support only for secondary ATAPI cdrom.
+(cdtype=8). Loading the mad16 driver resets the C931 chip
+and if a cdrom was already mounted it may cause a complete
+system hang. Do not use the sound card if you have an alternative.
+If you do use the sound card it is important that you load
+the mad16 driver (use "modprobe mad16" to prevent auto-unloading)
+before the cdrom is accessed the first time.
+
+Using the sound driver built-in to the kernel may help here, but...
+Most new systems have a PnP BIOS and also two IDE controllers.
+The IDE controller on the sound card may be needed only on older
+systems (which have only one IDE controller) but these systems
+also do not have a PnP BIOS - requiring isapnptools and a modularized
+driver.
+
+Known problems
+--------------
+1. See the section on "A CDROM drive connected to the sound card".
+
+2. On my system the codec cannot capture companded sound samples.
+ (eg., recording from /dev/audio). When any companded capture is
+ requested I get stereo-16 bit samples instead. Playback of
+ companded samples works well. Apparently this problem is not common
+ to all C931 based cards. I do not know how to identify cards that
+ have this problem.
+
+Source for ad1848_mixer_reroute.c
+---------------------------------
+#include <stdio.h>
+#include <fcntl.h>
+#include <linux/soundcard.h>
+
+static char *mixer_names[SOUND_MIXER_NRDEVICES] =
+ SOUND_DEVICE_LABELS;
+
+int
+main(int argc, char **argv) {
+ int val, from, to;
+ int i, fd;
+
+ fd = open("/dev/mixer", O_RDWR);
+ if(fd < 0) {
+ perror("/dev/mixer");
+ return 1;
+ }
+
+ for(i = 2; i < argc; i += 2) {
+ from = atoi(argv[i-1]);
+ to = atoi(argv[i]);
+
+ if(to == SOUND_MIXER_NONE)
+ fprintf(stderr, "%s: turning off mixer %s\n",
+ argv[0], mixer_names[to]);
+ else
+ fprintf(stderr, "%s: rerouting mixer %s to %s\n",
+ argv[0], mixer_names[from], mixer_names[to]);
+
+ val = from << 8 | to;
+
+ if(ioctl(fd, SOUND_MIXER_PRIVATE2, &val)) {
+ perror("AD1848 mixer reroute");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
diff --git a/Documentation/sound/oss/PAS16 b/Documentation/sound/oss/PAS16
new file mode 100644
index 000000000..5c27229ee
--- /dev/null
+++ b/Documentation/sound/oss/PAS16
@@ -0,0 +1,162 @@
+Pro Audio Spectrum 16 for 2.3.99 and later
+=========================================
+by Thomas Molina (tmolina@home.com)
+last modified 3 Mar 2001
+Acknowledgement to Axel Boldt (boldt@math.ucsb.edu) for stuff taken
+from Configure.help, Riccardo Facchetti for stuff from README.OSS,
+and others whose names I could not find.
+
+This documentation is relevant for the PAS16 driver (pas2_card.c and
+friends) under kernel version 2.3.99 and later. If you are
+unfamiliar with configuring sound under Linux, please read the
+Sound-HOWTO, Documentation/sound/oss/Introduction and other
+relevant docs first.
+
+The following information is relevant information from README.OSS
+and legacy docs for the Pro Audio Spectrum 16 (PAS16):
+==================================================================
+
+The pas2_card.c driver supports the following cards --
+Pro Audio Spectrum 16 (PAS16) and compatibles:
+ Pro Audio Spectrum 16
+ Pro Audio Studio 16
+ Logitech Sound Man 16
+ NOTE! The original Pro Audio Spectrum as well as the PAS+ are not
+ and will not be supported by the driver.
+
+The sound driver configuration dialog
+-------------------------------------
+
+Sound configuration starts by making some yes/no questions. Be careful
+when answering to these questions since answering y to a question may
+prevent some later ones from being asked. For example don't answer y to
+the question about (PAS16) if you don't really have a PAS16. Sound
+configuration may also be made modular by answering m to configuration
+options presented.
+
+Note also that all questions may not be asked. The configuration program
+may disable some questions depending on the earlier choices. It may also
+select some options automatically as well.
+
+ "ProAudioSpectrum 16 support",
+ - Answer 'y'_ONLY_ if you have a Pro Audio Spectrum _16_,
+ Pro Audio Studio 16 or Logitech SoundMan 16 (be sure that
+ you read the above list correctly). Don't answer 'y' if you
+ have some other card made by Media Vision or Logitech since they
+ are not PAS16 compatible.
+ NOTE! Since 3.5-beta10 you need to enable SB support (next question)
+ if you want to use the SB emulation of PAS16. It's also possible to
+ the emulation if you want to use a true SB card together with PAS16
+ (there is another question about this that is asked later).
+
+ "Generic OPL2/OPL3 FM synthesizer support",
+ - Answer 'y' if your card has a FM chip made by Yamaha (OPL2/OPL3/OPL4).
+ The PAS16 has an OPL3-compatible FM chip.
+
+With PAS16 you can use two audio device files at the same time. /dev/dsp (and
+/dev/audio) is connected to the 8/16 bit native codec and the /dev/dsp1 (and
+/dev/audio1) is connected to the SB emulation (8 bit mono only).
+
+
+The new stuff for 2.3.99 and later
+============================================================================
+The following configuration options are relevant to configuring the PAS16:
+
+Sound card support
+CONFIG_SOUND
+ If you have a sound card in your computer, i.e. if it can say more
+ than an occasional beep, say Y. Be sure to have all the information
+ about your sound card and its configuration down (I/O port,
+ interrupt and DMA channel), because you will be asked for it.
+
+ You want to read the Sound-HOWTO, available from
+ http://www.tldp.org/docs.html#howto . General information
+ about the modular sound system is contained in the files
+ Documentation/sound/oss/Introduction. The file
+ Documentation/sound/oss/README.OSS contains some slightly outdated but
+ still useful information as well.
+
+OSS sound modules
+CONFIG_SOUND_OSS
+ OSS is the Open Sound System suite of sound card drivers. They make
+ sound programming easier since they provide a common API. Say Y or M
+ here (the module will be called sound.o) if you haven't found a
+ driver for your sound card above, then pick your driver from the
+ list below.
+
+Persistent DMA buffers
+CONFIG_SOUND_DMAP
+ Linux can often have problems allocating DMA buffers for ISA sound
+ cards on machines with more than 16MB of RAM. This is because ISA
+ DMA buffers must exist below the 16MB boundary and it is quite
+ possible that a large enough free block in this region cannot be
+ found after the machine has been running for a while. If you say Y
+ here the DMA buffers (64Kb) will be allocated at boot time and kept
+ until the shutdown. This option is only useful if you said Y to
+ "OSS sound modules", above. If you said M to "OSS sound modules"
+ then you can get the persistent DMA buffer functionality by passing
+ the command-line argument "dmabuf=1" to the sound.o module.
+
+ Say y here for PAS16.
+
+ProAudioSpectrum 16 support
+CONFIG_SOUND_PAS
+ Answer Y only if you have a Pro Audio Spectrum 16, ProAudio Studio
+ 16 or Logitech SoundMan 16 sound card. Don't answer Y if you have
+ some other card made by Media Vision or Logitech since they are not
+ PAS16 compatible. It is not necessary to enable the separate
+ Sound Blaster support; it is included in the PAS driver.
+
+ If you compile the driver into the kernel, you have to add
+ "pas2=<io>,<irq>,<dma>,<dma2>,<sbio>,<sbirq>,<sbdma>,<sbdma2>
+ to the kernel command line.
+
+FM Synthesizer (YM3812/OPL-3) support
+CONFIG_SOUND_YM3812
+ Answer Y if your card has a FM chip made by Yamaha (OPL2/OPL3/OPL4).
+ Answering Y is usually a safe and recommended choice, however some
+ cards may have software (TSR) FM emulation. Enabling FM support with
+ these cards may cause trouble (I don't currently know of any such
+ cards, however).
+ Please read the file Documentation/sound/oss/OPL3 if your card has an
+ OPL3 chip.
+ If you compile the driver into the kernel, you have to add
+ "opl3=<io>" to the kernel command line.
+
+ If you compile your drivers into the kernel, you MUST configure
+ OPL3 support as a module for PAS16 support to work properly.
+ You can then get OPL3 functionality by issuing the command:
+ insmod opl3
+ In addition, you must either add the following line to
+ /etc/modprobe.d/*.conf:
+ options opl3 io=0x388
+ or else add the following line to /etc/lilo.conf:
+ opl3=0x388
+
+
+EXAMPLES
+===================================================================
+To use the PAS16 in my computer I have enabled the following sound
+configuration options:
+
+CONFIG_SOUND=y
+CONFIG_SOUND_OSS=y
+CONFIG_SOUND_TRACEINIT=y
+CONFIG_SOUND_DMAP=y
+CONFIG_SOUND_PAS=y
+CONFIG_SOUND_SB=n
+CONFIG_SOUND_YM3812=m
+
+I have also included the following append line in /etc/lilo.conf:
+append="pas2=0x388,10,3,-1,0x220,5,1,-1 sb=0x220,5,1,-1 opl3=0x388"
+
+The io address of 0x388 is default configuration on the PAS16. The
+irq of 10 and dma of 3 may not match your installation. The above
+configuration enables PAS16, 8-bit Soundblaster and OPL3
+functionality. If Soundblaster functionality is not desired, the
+following line would be appropriate:
+append="pas2=0x388,10,3,-1,0,-1,-1,-1 opl3=0x388"
+
+If sound is built totally modular, the above options may be
+specified in /etc/modprobe.d/*.conf for pas2, sb and opl3
+respectively.
diff --git a/Documentation/sound/oss/PSS b/Documentation/sound/oss/PSS
new file mode 100644
index 000000000..0892cd7d8
--- /dev/null
+++ b/Documentation/sound/oss/PSS
@@ -0,0 +1,34 @@
+The PSS cards and other ECHO based cards provide an onboard DSP with
+downloadable programs and also has an AD1848 "Microsoft Sound System"
+device. The PSS driver enables MSS and MPU401 modes of the card. SB
+is not enabled since it doesn't work concurrently with MSS.
+
+If you build this driver as a module then the driver takes the following
+parameters
+
+pss_io. The I/O base the PSS card is configured at (normally 0x220
+ or 0x240)
+
+mss_io The base address of the Microsoft Sound System interface.
+ This is normally 0x530, but may be 0x604 or other addresses.
+
+mss_irq The interrupt assigned to the Microsoft Sound System
+ emulation. IRQ's 3,5,7,9,10,11 and 12 are available. If you
+ get IRQ errors be sure to check the interrupt is set to
+ "ISA/Legacy" in the BIOS on modern machines.
+
+mss_dma The DMA channel used by the Microsoft Sound System.
+ This can be 0, 1, or 3. DMA 0 is not available on older
+ machines and will cause a crash on them.
+
+mpu_io The MPU emulation base address. This sets the base of the
+ synthesizer. It is typically 0x330 but can be altered.
+
+mpu_irq The interrupt to use for the synthesizer. It must differ
+ from the IRQ used by the Microsoft Sound System port.
+
+
+The mpu_io/mpu_irq fields are optional. If they are not specified the
+synthesizer parts are not configured.
+
+/*(DEBLOBBED)*/ \ No newline at end of file
diff --git a/Documentation/sound/oss/PSS-updates b/Documentation/sound/oss/PSS-updates
new file mode 100644
index 000000000..a11a41fe4
--- /dev/null
+++ b/Documentation/sound/oss/PSS-updates
@@ -0,0 +1,82 @@
+ This file contains notes for users of PSS sound cards who wish to use the
+newly added features of the newest version of this driver.
+
+ The major enhancements present in this new revision of this driver is the
+addition of two new module parameters that allow you to take full advantage of
+all the features present on your PSS sound card. These features include the
+ability to enable both the builtin CDROM and joystick ports.
+
+pss_enable_joystick
+
+ This parameter is basically a flag. A 0 will leave the joystick port
+disabled, while a non-zero value would enable the joystick port. The default
+setting is pss_enable_joystick=0 as this keeps this driver fully compatible
+with systems that were using previous versions of this driver. If you wish to
+enable the joystick port you will have to add pss_enable_joystick=1 as an
+argument to the driver. To actually use the joystick port you will then have
+to load the joystick driver itself. Just remember to load the joystick driver
+AFTER the pss sound driver.
+
+pss_cdrom_port
+
+ This parameter takes a port address as its parameter. Any available port
+address can be specified to enable the CDROM port, except for 0x0 and -1 as
+these values would leave the port disabled. Like the joystick port, the cdrom
+port will require that an appropriate CDROM driver be loaded before you can make
+use of the newly enabled CDROM port. Like the joystick port option above,
+remember to load the CDROM driver AFTER the pss sound driver. While it may
+differ on some PSS sound cards, all the PSS sound cards that I have seen have a
+builtin Wearnes CDROM port. If this is the case with your PSS sound card you
+should load aztcd with the appropriate port option that matches the port you
+assigned to the CDROM port when you loaded your pss sound driver. (ex.
+modprobe pss pss_cdrom_port=0x340 && modprobe aztcd aztcd=0x340) The default
+setting of this parameter leaves the CDROM port disabled to maintain full
+compatibility with systems using previous versions of this driver.
+
+ Other options have also been added for the added convenience and utility
+of the user. These options are only available if this driver is loaded as a
+module.
+
+pss_no_sound
+
+ This module parameter is a flag that can be used to tell the driver to
+just configure non-sound components. 0 configures all components, a non-0
+value will only attept to configure the CDROM and joystick ports. This
+parameter can be used by a user who only wished to use the builtin joystick
+and/or CDROM port(s) of his PSS sound card. If this driver is loaded with this
+parameter and with the parameter below set to true then a user can safely unload
+this driver with the following command "rmmod pss && rmmod ad1848 && rmmod
+mpu401 && rmmod sound && rmmod soundcore" and retain the full functionality of
+his CDROM and/or joystick port(s) while gaining back the memory previously used
+by the sound drivers. This default setting of this parameter is 0 to retain
+full behavioral compatibility with previous versions of this driver.
+
+pss_keep_settings
+
+ This parameter can be used to specify whether you want the driver to reset
+all emulations whenever its unloaded. This can be useful for those who are
+sharing resources (io ports, IRQ's, DMA's) between different ISA cards. This
+flag can also be useful in that future versions of this driver may reset all
+emulations by default on the driver's unloading (as it probably should), so
+specifying it now will ensure that all future versions of this driver will
+continue to work as expected. The default value of this parameter is 1 to
+retain full behavioral compatibility with previous versions of this driver.
+
+/*(DEBLOBBED)*/
+
+Examples:
+
+# Normal PSS sound card system, loading of drivers.
+# Should be specified in an rc file (ex. Slackware uses /etc/rc.d/rc.modules).
+
+/sbin/modprobe pss pss_io=0x220 mpu_io=0x338 mpu_irq=9 mss_io=0x530 mss_irq=10 mss_dma=1 pss_cdrom_port=0x340 pss_enable_joystick=1
+/sbin/modprobe aztcd aztcd=0x340
+/sbin/modprobe joystick
+
+# System using the PSS sound card just for its CDROM and joystick ports.
+# Should be specified in an rc file (ex. Slackware uses /etc/rc.d/rc.modules).
+
+/sbin/modprobe pss pss_io=0x220 pss_cdrom_port=0x340 pss_enable_joystick=1 pss_no_sound=1
+/sbin/rmmod pss && /sbin/rmmod ad1848 && /sbin/rmmod mpu401 && /sbin/rmmod sound && /sbin/rmmod soundcore # This line not needed, but saves memory.
+/sbin/modprobe aztcd aztcd=0x340
+/sbin/modprobe joystick
diff --git a/Documentation/sound/oss/README.OSS b/Documentation/sound/oss/README.OSS
new file mode 100644
index 000000000..3463b8272
--- /dev/null
+++ b/Documentation/sound/oss/README.OSS
@@ -0,0 +1,1425 @@
+Introduction
+------------
+
+This file is a collection of all the old Readme files distributed with
+OSS/Lite by Hannu Savolainen. Since the new Linux sound driver is founded
+on it I think these information may still be interesting for users that
+have to configure their sound system.
+
+Be warned: Alan Cox is the current maintainer of the Linux sound driver so if
+you have problems with it, please contact him or the current device-specific
+driver maintainer (e.g. for aedsp16 specific problems contact me). If you have
+patches, contributions or suggestions send them to Alan: I'm sure they are
+welcome.
+
+In this document you will find a lot of references about OSS/Lite or ossfree:
+they are gone forever. Keeping this in mind and with a grain of salt this
+document can be still interesting and very helpful.
+
+[ File edited 17.01.1999 - Riccardo Facchetti ]
+[ Edited miroSOUND section 19.04.2001 - Robert Siemer ]
+
+OSS/Free version 3.8 release notes
+----------------------------------
+
+Please read the SOUND-HOWTO (available from sunsite.unc.edu and other Linux FTP
+sites). It gives instructions about using sound with Linux. It's bit out of
+date but still very useful. Information about bug fixes and such things
+is available from the web page (see above).
+
+Please check http://www.opensound.com/pguide for more info about programming
+with OSS API.
+
+ ====================================================
+- THIS VERSION ____REQUIRES____ Linux 2.1.57 OR LATER.
+ ====================================================
+
+Packages "snd-util-3.8.tar.gz" and "snd-data-0.1.tar.Z"
+contain useful utilities to be used with this driver.
+See http://www.opensound.com/ossfree/ for
+download instructions.
+
+If you are looking for the installation instructions, please
+look forward into this document.
+
+Supported sound cards
+---------------------
+
+See below.
+
+Contributors
+------------
+
+This driver contains code by several contributors. In addition several other
+persons have given useful suggestions. The following is a list of major
+contributors. (I could have forgotten some names.)
+
+ Craig Metz 1/2 of the PAS16 Mixer and PCM support
+ Rob Hooft Volume computation algorithm for the FM synth.
+ Mika Liljeberg uLaw encoding and decoding routines
+ Jeff Tranter Linux SOUND HOWTO document
+ Greg Lee Volume computation algorithm for the GUS and
+ lots of valuable suggestions.
+ Andy Warner ISC port
+ Jim Lowe,
+ Amancio Hasty Jr FreeBSD/NetBSD port
+ Anders Baekgaard Bug hunting and valuable suggestions.
+ Joerg Schubert SB16 DSP support (initial version).
+ Andrew Robinson Improvements to the GUS driver
+ Megens SA MIDI recording for SB and SB Pro (initial version).
+ Mikael Nordqvist Linear volume support for GUS and
+ nonblocking /dev/sequencer.
+ Ian Hartas SVR4.2 port
+ Markus Aroharju and
+ Risto Kankkunen Major contributions to the mixer support
+ of GUS v3.7.
+ Hunyue Yau Mixer support for SG NX Pro.
+ Marc Hoffman PSS support (initial version).
+ Rainer Vranken Initialization for Jazz16 (initial version).
+ Peter Trattler Initial version of loadable module support for Linux.
+ JRA Gibson 16 bit mode for Jazz16 (initial version)
+ Davor Jadrijevic MAD16 support (initial version)
+ Gregor Hoffleit Mozart support (initial version)
+ Riccardo Facchetti Audio Excel DSP 16 (aedsp16) support
+ James Hightower Spotting a tiny but important bug in CS423x support.
+ Denis Sablic OPTi 82C924 specific enhancements (non PnP mode)
+ Tim MacKenzie Full duplex support for OPTi 82C930.
+
+ Please look at lowlevel/README for more contributors.
+
+There are probably many other names missing. If you have sent me some
+patches and your name is not in the above list, please inform me.
+
+Sending your contributions or patches
+-------------------------------------
+
+First of all it's highly recommended to contact me before sending anything
+or before even starting to do any work. Tell me what you suggest to be
+changed or what you have planned to do. Also ensure you are using the
+very latest (development) version of OSS/Free since the change may already be
+implemented there. In general it's a major waste of time to try to improve a
+several months old version. Information about the latest version can be found
+from http://www.opensound.com/ossfree. In general there is no point in
+sending me patches relative to production kernels.
+
+Sponsors etc.
+-------------
+
+The following companies have greatly helped development of this driver
+in form of a free copy of their product:
+
+Novell, Inc. UnixWare personal edition + SDK
+The Santa Cruz Operation, Inc. A SCO OpenServer + SDK
+Ensoniq Corp, a SoundScape card and extensive amount of assistance
+MediaTrix Peripherals Inc, a AudioTrix Pro card + SDK
+Acer, Inc. a pair of AcerMagic S23 cards.
+
+In addition the following companies have provided me sufficient amount
+of technical information at least some of their products (free or $$$):
+
+Advanced Gravis Computer Technology Ltd.
+Media Vision Inc.
+Analog Devices Inc.
+Logitech Inc.
+Aztech Labs Inc.
+Crystal Semiconductor Corporation,
+Integrated Circuit Systems Inc.
+OAK Technology
+OPTi
+Turtle Beach
+miro
+Ad Lib Inc. ($$)
+Music Quest Inc. ($$)
+Creative Labs ($$$)
+
+If you have some problems
+=========================
+
+Read the sound HOWTO (sunsite.unc.edu:/pub/Linux/docs/...?).
+Also look at the home page (http://www.opensound.com/ossfree). It may
+contain info about some recent bug fixes.
+
+It's likely that you have some problems when trying to use the sound driver
+first time. Sound cards don't have standard configuration so there are no
+good default configuration to use. Please try to use same I/O, DMA and IRQ
+values for the sound card than with DOS.
+
+If you get an error message when trying to use the driver, please look
+at /var/adm/messages for more verbose error message.
+
+
+The following errors are likely with /dev/dsp and /dev/audio.
+
+ - "No such device or address".
+ This error indicates that there are no suitable hardware for the
+ device file or the sound driver has been compiled without support for
+ this particular device. For example /dev/audio and /dev/dsp will not
+ work if "digitized voice support" was not enabled during "make config".
+
+ - "Device or resource busy". Probably the IRQ (or DMA) channel
+ required by the sound card is in use by some other device/driver.
+
+ - "I/O error". Almost certainly (99%) it's an IRQ or DMA conflict.
+ Look at the kernel messages in /var/adm/notice for more info.
+
+ - "Invalid argument". The application is calling ioctl()
+ with impossible parameters. Check that the application is
+ for sound driver version 2.X or later.
+
+Linux installation
+==================
+
+IMPORTANT! Read this if you are installing a separately
+ distributed version of this driver.
+
+ Check that your kernel version works with this
+ release of the driver (see Readme). Also verify
+ that your current kernel version doesn't have more
+ recent sound driver version than this one. IT'S HIGHLY
+ RECOMMENDED THAT YOU USE THE SOUND DRIVER VERSION THAT
+ IS DISTRIBUTED WITH KERNEL SOURCES.
+
+- When installing separately distributed sound driver you should first
+ read the above notice. Then try to find proper directory where and how
+ to install the driver sources. You should not try to install a separately
+ distributed driver version if you are not able to find the proper way
+ yourself (in this case use the version that is distributed with kernel
+ sources). Remove old version of linux/drivers/sound directory before
+ installing new files.
+
+- To build the device files you need to run the enclosed shell script
+ (see below). You need to do this only when installing sound driver
+ first time or when upgrading to much recent version than the earlier
+ one.
+
+- Configure and compile Linux as normally (remember to include the
+ sound support during "make config"). Please refer to kernel documentation
+ for instructions about configuring and compiling kernel. File Readme.cards
+ contains card specific instructions for configuring this driver for
+ use with various sound cards.
+
+Boot time configuration (using lilo and insmod)
+-----------------------------------------------
+
+This information has been removed. Too many users didn't believe
+that it's really not necessary to use this method. Please look at
+Readme of sound driver version 3.0.1 if you still want to use this method.
+
+Problems
+--------
+
+Common error messages:
+
+- /dev/???????: No such file or directory.
+Run the script at the end of this file.
+
+- /dev/???????: No such device.
+You are not running kernel which contains the sound driver. When using
+modularized sound driver this error means that the sound driver is not
+loaded.
+
+- /dev/????: No such device or address.
+Sound driver didn't detect suitable card when initializing. Please look at
+Readme.cards for info about configuring the driver with your card. Also
+check for possible boot (insmod) time error messages in /var/adm/messages.
+
+- Other messages or problems
+Please check http://www.opensound.com/ossfree for more info.
+
+Configuring version 3.8 (for Linux) with some common sound cards
+================================================================
+
+This document describes configuring sound cards with the freeware version of
+Open Sound Systems (OSS/Free). Information about the commercial version
+(OSS/Linux) and its configuration is available from
+http://www.opensound.com/linux.html. Information presented here is
+not valid for OSS/Linux.
+
+If you are unsure about how to configure OSS/Free
+you can download the free evaluation version of OSS/Linux from the above
+address. There is a chance that it can autodetect your sound card. In this case
+you can use the information included in soundon.log when configuring OSS/Free.
+
+
+IMPORTANT! This document covers only cards that were "known" when
+ this driver version was released. Please look at
+ http://www.opensound.com/ossfree for info about
+ cards introduced recently.
+
+ When configuring the sound driver, you should carefully
+ check each sound configuration option (particularly
+ "Support for /dev/dsp and /dev/audio"). The default values
+ offered by these programs are not necessarily valid.
+
+
+THE BIGGEST MISTAKES YOU CAN MAKE
+=================================
+
+1. Assuming that the card is Sound Blaster compatible when it's not.
+--------------------------------------------------------------------
+
+The number one mistake is to assume that your card is compatible with
+Sound Blaster. Only the cards made by Creative Technology or which have
+one or more chips labeled by Creative are SB compatible. In addition there
+are few sound chipsets which are SB compatible in Linux such as ESS1688 or
+Jazz16. Note that SB compatibility in DOS/Windows does _NOT_ mean anything
+in Linux.
+
+IF YOU REALLY ARE 150% SURE YOU HAVE A SOUND BLASTER YOU CAN SKIP THE REST OF
+THIS CHAPTER.
+
+For most other "supposed to be SB compatible" cards you have to use other
+than SB drivers (see below). It is possible to get most sound cards to work
+in SB mode but in general it's a complete waste of time. There are several
+problems which you will encounter by using SB mode with cards that are not
+truly SB compatible:
+
+- The SB emulation is at most SB Pro (DSP version 3.x) which means that
+you get only 8 bit audio (there is always an another ("native") mode which
+gives the 16 bit capability). The 8 bit only operation is the reason why
+many users claim that sound quality in Linux is much worse than in DOS.
+In addition some applications require 16 bit mode and they produce just
+noise with a 8 bit only device.
+- The card may work only in some cases but refuse to work most of the
+time. The SB compatible mode always requires special initialization which is
+done by the DOS/Windows drivers. This kind of cards work in Linux after
+you have warm booted it after DOS but they don't work after cold boot
+(power on or reset).
+- You get the famous "DMA timed out" messages. Usually all SB clones have
+software selectable IRQ and DMA settings. If the (power on default) values
+currently used by the card don't match configuration of the driver you will
+get the above error message whenever you try to record or play. There are
+few other reasons to the DMA timeout message but using the SB mode seems
+to be the most common cause.
+
+2. Trying to use a PnP (Plug & Play) card just like an ordinary sound card
+--------------------------------------------------------------------------
+
+Plug & Play is a protocol defined by Intel and Microsoft. It lets operating
+systems to easily identify and reconfigure I/O ports, IRQs and DMAs of ISA
+cards. The problem with PnP cards is that the standard Linux doesn't currently
+(versions 2.1.x and earlier) don't support PnP. This means that you will have
+to use some special tricks (see later) to get a PnP card alive. Many PnP cards
+work after they have been initialized but this is not always the case.
+
+There are sometimes both PnP and non-PnP versions of the same sound card.
+The non-PnP version is the original model which usually has been discontinued
+more than an year ago. The PnP version has the same name but with "PnP"
+appended to it (sometimes not). This causes major confusion since the non-PnP
+model works with Linux but the PnP one doesn't.
+
+You should carefully check if "Plug & Play" or "PnP" is mentioned in the name
+of the card or in the documentation or package that came with the card.
+Everything described in the rest of this document is not necessarily valid for
+PnP models of sound cards even you have managed to wake up the card properly.
+Many PnP cards are simply too different from their non-PnP ancestors which are
+covered by this document.
+
+
+Cards that are not (fully) supported by this driver
+===================================================
+
+See http://www.opensound.com/ossfree for information about sound cards
+to be supported in future.
+
+
+How to use sound without recompiling kernel and/or sound driver
+===============================================================
+
+There is a commercial sound driver which comes in precompiled form and doesn't
+require recompiling of the kernel. See http://www.4Front-tech.com/oss.html for
+more info.
+
+
+Configuring PnP cards
+=====================
+
+New versions of most sound cards use the so-called ISA PnP protocol for
+soft configuring their I/O, IRQ, DMA and shared memory resources.
+Currently at least cards made by Creative Technology (SB32 and SB32AWE
+PnP), Gravis (GUS PnP and GUS PnP Pro), Ensoniq (Soundscape PnP) and
+Aztech (some Sound Galaxy models) use PnP technology. The CS4232/4236 audio
+chip by Crystal Semiconductor (Intel Atlantis, HP Pavilion and many other
+motherboards) is also based on PnP technology but there is a "native" driver
+available for it (see information about CS4232 later in this document).
+
+PnP sound cards (as well as most other PnP ISA cards) are not supported
+by this version of the driver . Proper
+support for them should be released during 97 once the kernel level
+PnP support is available.
+
+There is a method to get most of the PnP cards to work. The basic method
+is the following:
+
+1) Boot DOS so the card's DOS drivers have a chance to initialize it.
+2) _Cold_ boot to Linux by using "loadlin.exe". Hitting ctrl-alt-del
+works with older machines but causes a hard reset of all cards on recent
+(Pentium) machines.
+3) If you have the sound driver in Linux configured properly, the card should
+work now. "Proper" means that I/O, IRQ and DMA settings are the same as in
+DOS. The hard part is to find which settings were used. See the documentation of
+your card for more info.
+
+Windows 95 could work as well as DOS but running loadlin may be difficult.
+Probably you should "shut down" your machine to MS-DOS mode before running it.
+
+Some machines have a BIOS utility for setting PnP resources. This is a good
+way to configure some cards. In this case you don't need to boot DOS/Win95
+before starting Linux.
+
+Another way to initialize PnP cards without DOS/Win95 is a Linux based
+PnP isolation tool. When writing this there is a pre alpha test version
+of such a tool available from ftp://ftp.demon.co.uk/pub/unix/linux/utils. The
+file is called isapnptools-*. Please note that this tool is just a temporary
+solution which may be incompatible with future kernel versions having proper
+support for PnP cards. There are bugs in setting DMA channels in earlier
+versions of isapnptools so at least version 1.6 is required with sound cards.
+
+Yet another way to use PnP cards is to use (commercial) OSS/Linux drivers. See
+http://www.opensound.com/linux.html for more info. This is probably the way you
+should do it if you don't want to spend time recompiling the kernel and
+required tools.
+
+
+Read this before trying to configure the driver
+===============================================
+
+There are currently many cards that work with this driver. Some of the cards
+have native support while others work since they emulate some other
+card (usually SB, MSS/WSS and/or MPU401). The following cards have native
+support in the driver. Detailed instructions for configuring these cards
+will be given later in this document.
+
+Pro Audio Spectrum 16 (PAS16) and compatibles:
+ Pro Audio Spectrum 16
+ Pro Audio Studio 16
+ Logitech Sound Man 16
+ NOTE! The original Pro Audio Spectrum as well as the PAS+ are not
+ and will not be supported by the driver.
+
+Media Vision Jazz16 based cards
+ Pro Sonic 16
+ Logitech SoundMan Wave
+ (Other Jazz based cards should work but I don't have any reports
+ about them).
+
+Sound Blasters
+ SB 1.0 to 2.0
+ SB Pro
+ SB 16
+ SB32/64/AWE
+ Configure SB32/64/AWE just like SB16. See lowlevel/README.awe
+ for information about using the wave table synth.
+ NOTE! AWE63/Gold and 16/32/AWE "PnP" cards need to be activated
+ using isapnptools before they work with OSS/Free.
+ SB16 compatible cards by other manufacturers than Creative.
+ You have been fooled since there are _no_ SB16 compatible
+ cards on the market (as of May 1997). It's likely that your card
+ is compatible just with SB Pro but there is also a non-SB-
+ compatible 16 bit mode. Usually it's MSS/WSS but it could also
+ be a proprietary one like MV Jazz16 or ESS ES688. OPTi
+ MAD16 chips are very common in so called "SB 16 bit cards"
+ (try with the MAD16 driver).
+
+ ======================================================================
+ "Supposed to be SB compatible" cards.
+ Forget the SB compatibility and check for other alternatives
+ first. The only cards that work with the SB driver in
+ Linux have been made by Creative Technology (there is at least
+ one chip on the card with "CREATIVE" printed on it). The
+ only other SB compatible chips are ESS and Jazz16 chips
+ (maybe ALSxxx chips too but they probably don't work).
+ Most other "16 bit SB compatible" cards such as "OPTi/MAD16" or
+ "Crystal" are _NOT_ SB compatible in Linux.
+
+ Practically all sound cards have some kind of SB emulation mode
+ in addition to their native (16 bit) mode. In most cases this
+ (8 bit only) SB compatible mode doesn't work with Linux. If
+ you get it working it may cause problems with games and
+ applications which require 16 bit audio. Some 16 bit only
+ applications don't check if the card actually supports 16 bits.
+ They just dump 16 bit data to a 8 bit card which produces just
+ noise.
+
+ In most cases the 16 bit native mode is supported by Linux.
+ Use the SB mode with "clones" only if you don't find anything
+ better from the rest of this doc.
+ ======================================================================
+
+Gravis Ultrasound (GUS)
+ GUS
+ GUS + the 16 bit option
+ GUS MAX
+ GUS ACE (No MIDI port and audio recording)
+ GUS PnP (with RAM)
+
+MPU-401 and compatibles
+ The driver works both with the full (intelligent mode) MPU-401
+ cards (such as MPU IPC-T and MQX-32M) and with the UART only
+ dumb MIDI ports. MPU-401 is currently the most common MIDI
+ interface. Most sound cards are compatible with it. However,
+ don't enable MPU401 mode blindly. Many cards with native support
+ in the driver have their own MPU401 driver. Enabling the standard one
+ will cause a conflict with these cards. So check if your card is
+ in the list of supported cards before enabling MPU401.
+
+Windows Sound System (MSS/WSS)
+ Even when Microsoft has discontinued their own Sound System card
+ they managed to make it a standard. MSS compatible cards are based on
+ a codec chip which is easily available from at least two manufacturers
+ (AD1848 by Analog Devices and CS4231/CS4248 by Crystal Semiconductor).
+ Currently most sound cards are based on one of the MSS compatible codec
+ chips. The CS4231 is used in the high quality cards such as GUS MAX,
+ MediaTrix AudioTrix Pro and TB Tropez (GUS MAX is not MSS compatible).
+
+ Having a AD1848, CS4248 or CS4231 codec chip on the card is a good
+ sign. Even if the card is not MSS compatible, it could be easy to write
+ support for it. Note also that most MSS compatible cards
+ require special boot time initialization which may not be present
+ in the driver. Also, some MSS compatible cards have native support.
+ Enabling the MSS support with these cards is likely to
+ cause a conflict. So check if your card is listed in this file before
+ enabling the MSS support.
+
+Yamaha FM synthesizers (OPL2, OPL3 (not OPL3-SA) and OPL4)
+ Most sound cards have a FM synthesizer chip. The OPL2 is a 2
+ operator chip used in the original AdLib card. Currently it's used
+ only in the cheapest (8 bit mono) cards. The OPL3 is a 4 operator
+ FM chip which provides better sound quality and/or more available
+ voices than the OPL2. The OPL4 is a new chip that has an OPL3 and
+ a wave table synthesizer packed onto the same chip. The driver supports
+ just the OPL3 mode directly. Most cards with an OPL4 (like
+ SM Wave and AudioTrix Pro) support the OPL4 mode using MPU401
+ emulation. Writing a native OPL4 support is difficult
+ since Yamaha doesn't give information about their sample ROM chip.
+
+ Enable the generic OPL2/OPL3 FM synthesizer support if your
+ card has a FM chip made by Yamaha. Don't enable it if your card
+ has a software (TRS) based FM emulator.
+
+ ----------------------------------------------------------------
+ NOTE! OPL3-SA is different chip than the ordinary OPL3. In addition
+ to the FM synth this chip has also digital audio (WSS) and
+ MIDI (MPU401) capabilities. Support for OPL3-SA is described below.
+ ----------------------------------------------------------------
+
+Yamaha OPL3-SA1
+
+ Yamaha OPL3-SA1 (YMF701) is an audio controller chip used on some
+ (Intel) motherboards and on cheap sound cards. It should not be
+ confused with the original OPL3 chip (YMF278) which is entirely
+ different chip. OPL3-SA1 has support for MSS, MPU401 and SB Pro
+ (not used in OSS/Free) in addition to the OPL3 FM synth.
+
+ There are also chips called OPL3-SA2, OPL3-SA3, ..., OPL3SA-N. They
+ are PnP chips and will not work with the OPL3-SA1 driver. You should
+ use the standard MSS, MPU401 and OPL3 options with these chips and to
+ activate the card using isapnptools.
+
+4Front Technologies SoftOSS
+
+ SoftOSS is a software based wave table emulation which works with
+ any 16 bit stereo sound card. Due to its nature a fast CPU is
+ required (P133 is minimum). Although SoftOSS does _not_ use MMX
+ instructions it has proven out that recent processors (which appear
+ to have MMX) perform significantly better with SoftOSS than earlier
+ ones. For example a P166MMX beats a PPro200. SoftOSS should not be used
+ on 486 or 386 machines.
+
+ The amount of CPU load caused by SoftOSS can be controlled by
+ selecting the CONFIG_SOFTOSS_RATE and CONFIG_SOFTOSS_VOICES
+ parameters properly (they will be prompted by make config). It's
+ recommended to set CONFIG_SOFTOSS_VOICES to 32. If you have a
+ P166MMX or faster (PPro200 is not faster) you can set
+ CONFIG_SOFTOSS_RATE to 44100 (kHz). However with slower systems it
+ recommended to use sampling rates around 22050 or even 16000 kHz.
+ Selecting too high values for these parameters may hang your
+ system when playing MIDI files with hight degree of polyphony
+ (number of concurrently playing notes). It's also possible to
+ decrease CONFIG_SOFTOSS_VOICES. This makes it possible to use
+ higher sampling rates. However using fewer voices decreases
+ playback quality more than decreasing the sampling rate.
+
+ SoftOSS keeps the samples loaded on the system's RAM so much RAM is
+ required. SoftOSS should never be used on machines with less than 16 MB
+ of RAM since this is potentially dangerous (you may accidentally run out
+ of memory which probably crashes the machine).
+
+ SoftOSS implements the wave table API originally designed for GUS. For
+ this reason all applications designed for GUS should work (at least
+ after minor modifications). For example gmod/xgmod and playmidi -g are
+ known to work.
+
+ To work SoftOSS will require GUS compatible
+ patch files to be installed on the system (in /dos/ultrasnd/midi). You
+ can use the public domain MIDIA patchset available from several ftp
+ sites.
+
+ *********************************************************************
+ IMPORTANT NOTICE! The original patch set distributed with the Gravis
+ Ultrasound card is not in public domain (even though it's available from
+ some FTP sites). You should contact Voice Crystal (www.voicecrystal.com)
+ if you like to use these patches with SoftOSS included in OSS/Free.
+ *********************************************************************
+
+PSS based cards (AD1848 + ADSP-2115 + Echo ESC614 ASIC)
+ Analog Devices and Echo Speech have together defined a sound card
+ architecture based on the above chips. The DSP chip is used
+ for emulation of SB Pro, FM and General MIDI/MT32.
+
+ There are several cards based on this architecture. The most known
+ ones are Orchid SW32 and Cardinal DSP16.
+
+ The driver supports downloading DSP algorithms to these cards.
+
+ NOTE! You will have to use the "old" config script when configuring
+ PSS cards.
+
+MediaTrix AudioTrix Pro
+ The ATP card is built around a CS4231 codec and an OPL4 synthesizer
+ chips. The OPL4 mode is supported by a microcontroller running a
+ General MIDI emulator. There is also a SB 1.5 compatible playback mode.
+
+Ensoniq SoundScape and compatibles
+ Ensoniq has designed a sound card architecture based on the
+ OTTO synthesizer chip used in their professional MIDI synthesizers.
+ Several companies (including Ensoniq, Reveal and Spea) are selling
+ cards based on this architecture.
+
+ NOTE! The SoundScape PnP is not supported by OSS/Free. Ensoniq VIVO and
+ VIVO90 cards are not compatible with Soundscapes so the Soundscape
+ driver will not work with them. You may want to use OSS/Linux with these
+ cards.
+
+OPTi MAD16 and Mozart based cards
+ The Mozart (OAK OTI-601), MAD16 (OPTi 82C928), MAD16 Pro (OPTi 82C929),
+ OPTi 82C924/82C925 (in _non_ PnP mode) and OPTi 82C930 interface
+ chips are used in many different sound cards, including some
+ cards by Reveal miro and Turtle Beach (Tropez). The purpose of these
+ chips is to connect other audio components to the PC bus. The
+ interface chip performs address decoding for the other chips.
+ NOTE! Tropez Plus is not MAD16 but CS4232 based.
+ NOTE! MAD16 PnP cards (82C924, 82C925, 82C931) are not MAD16 compatible
+ in the PnP mode. You will have to use them in MSS mode after having
+ initialized them using isapnptools or DOS. 82C931 probably requires
+ initialization using DOS/Windows (running isapnptools is not enough).
+ It's possible to use 82C931 with OSS/Free by jumpering it to non-PnP
+ mode (provided that the card has a jumper for this). In non-PnP mode
+ 82C931 is compatible with 82C930 and should work with the MAD16 driver
+ (without need to use isapnptools or DOS to initialize it). All OPTi
+ chips are supported by OSS/Linux (both in PnP and non-PnP modes).
+
+Audio Excel DSP16
+ Support for this card was written by Riccardo Faccetti
+ (riccardo@cdc8g5.cdc.polimi.it). The AEDSP16 driver included in
+ the lowlevel/ directory. To use it you should enable the
+ "Additional low level drivers" option.
+
+Crystal CS4232 and CS4236 based cards such as AcerMagic S23, TB Tropez _Plus_ and
+ many PC motherboards (Compaq, HP, Intel, ...)
+ CS4232 is a PnP multimedia chip which contains a CS3231A codec,
+ SB and MPU401 emulations. There is support for OPL3 too.
+ Unfortunately the MPU401 mode doesn't work (I don't know how to
+ initialize it). CS4236 is an enhanced (compatible) version of CS4232.
+ NOTE! Don't ever try to use isapnptools with CS4232 since this will just
+ freeze your machine (due to chip bugs). If you have problems in getting
+ CS4232 working you could try initializing it with DOS (CS4232C.EXE) and
+ then booting Linux using loadlin. CS4232C.EXE loads a secret firmware
+ patch which is not documented by Crystal.
+
+Turtle Beach Maui and Tropez "classic"
+ This driver version supports sample, patch and program loading commands
+ described in the Maui/Tropez User's manual.
+ There is now full initialization support too. The audio side of
+ the Tropez is based on the MAD16 chip (see above).
+ NOTE! Tropez Plus is different card than Tropez "classic" and will not
+ work fully in Linux. You can get audio features working by configuring
+ the card as a CS4232 based card (above).
+
+
+Jumpers and software configuration
+==================================
+
+Some of the earliest sound cards were jumper configurable. You have to
+configure the driver use I/O, IRQ and DMA settings
+that match the jumpers. Just few 8 bit cards are fully jumper
+configurable (SB 1.x/2.x, SB Pro and clones).
+Some cards made by Aztech have an EEPROM which contains the
+config info. These cards behave much like hardware jumpered cards.
+
+Most cards have jumper for the base I/O address but other parameters
+are software configurable. Sometimes there are few other jumpers too.
+
+Latest cards are fully software configurable or they are PnP ISA
+compatible. There are no jumpers on the board.
+
+The driver handles software configurable cards automatically. Just configure
+the driver to use I/O, IRQ and DMA settings which are known to work.
+You could usually use the same values than with DOS and/or Windows.
+Using different settings is possible but not recommended since it may cause
+some trouble (for example when warm booting from an OS to another or
+when installing new hardware to the machine).
+
+Sound driver sets the soft configurable parameters of the card automatically
+during boot. Usually you don't need to run any extra initialization
+programs when booting Linux but there are some exceptions. See the
+card-specific instructions below for more info.
+
+The drawback of software configuration is that the driver needs to know
+how the card must be initialized. It cannot initialize unknown cards
+even if they are otherwise compatible with some other cards (like SB,
+MPU401 or Windows Sound System).
+
+
+What if your card was not listed above?
+=======================================
+
+The first thing to do is to look at the major IC chips on the card.
+Many of the latest sound cards are based on some standard chips. If you
+are lucky, all of them could be supported by the driver. The most common ones
+are the OPTi MAD16, Mozart, SoundScape (Ensoniq) and the PSS architectures
+listed above. Also look at the end of this file for list of unsupported
+cards and the ones which could be supported later.
+
+The last resort is to send _exact_ name and model information of the card
+to me together with a list of the major IC chips (manufactured, model) to
+me. I could then try to check if your card looks like something familiar.
+
+There are many more cards in the world than listed above. The first thing to
+do with these cards is to check if they emulate some other card or interface
+such as SB, MSS and/or MPU401. In this case there is a chance to get the
+card to work by booting DOS before starting Linux (boot DOS, hit ctrl-alt-del
+and boot Linux without hard resetting the machine). In this method the
+DOS based driver initializes the hardware to use known I/O, IRQ and DMA
+settings. If sound driver is configured to use the same settings, everything
+should work OK.
+
+
+Configuring sound driver (with Linux)
+=====================================
+
+The sound driver is currently distributed as part of the Linux kernel. The
+files are in /usr/src/linux/drivers/sound/.
+
+****************************************************************************
+* ALWAYS USE THE SOUND DRIVER VERSION WHICH IS DISTRIBUTED WITH *
+* THE KERNEL SOURCE PACKAGE YOU ARE USING. SOME ALPHA AND BETA TEST *
+* VERSIONS CAN BE INSTALLED FROM A SEPARATELY DISTRIBUTED PACKAGE *
+* BUT CHECK THAT THE PACKAGE IS NOT MUCH OLDER (OR NEWER) THAN THE *
+* KERNEL YOU ARE USING. IT'S POSSIBLE THAT THE KERNEL/DRIVER *
+* INTERFACE CHANGES BETWEEN KERNEL RELEASES WHICH MAY CAUSE SOME *
+* INCOMPATIBILITY PROBLEMS. *
+* *
+* IN CASE YOU INSTALL A SEPARATELY DISTRIBUTED SOUND DRIVER VERSION, *
+* BE SURE TO REMOVE OR RENAME THE OLD SOUND DRIVER DIRECTORY BEFORE *
+* INSTALLING THE NEW ONE. LEAVING OLD FILES TO THE SOUND DRIVER *
+* DIRECTORY _WILL_ CAUSE PROBLEMS WHEN THE DRIVER IS USED OR *
+* COMPILED. *
+****************************************************************************
+
+To configure the driver, run "make config" in the kernel source directory
+(/usr/src/linux). Answer "y" or "m" to the question about Sound card support
+(after the questions about mouse, CD-ROM, ftape, etc. support). Questions
+about options for sound will then be asked.
+
+After configuring the kernel and sound driver and compile the kernel
+following instructions in the kernel README.
+
+The sound driver configuration dialog
+-------------------------------------
+
+Sound configuration starts by making some yes/no questions. Be careful
+when answering to these questions since answering y to a question may
+prevent some later ones from being asked. For example don't answer y to
+the first question (PAS16) if you don't really have a PAS16. Don't enable
+more cards than you really need since they just consume memory. Also
+some drivers (like MPU401) may conflict with your SCSI controller and
+prevent kernel from booting. If you card was in the list of supported
+cards (above), please look at the card specific config instructions
+(later in this file) before starting to configure. Some cards must be
+configured in way which is not obvious.
+
+So here is the beginning of the config dialog. Answer 'y' or 'n' to these
+questions. The default answer is shown so that (y/n) means 'y' by default and
+(n/y) means 'n'. To use the default value, just hit ENTER. But be careful
+since using the default _doesn't_ guarantee anything.
+
+Note also that all questions may not be asked. The configuration program
+may disable some questions depending on the earlier choices. It may also
+select some options automatically as well.
+
+ "ProAudioSpectrum 16 support",
+ - Answer 'y'_ONLY_ if you have a Pro Audio Spectrum _16_,
+ Pro Audio Studio 16 or Logitech SoundMan 16 (be sure that
+ you read the above list correctly). Don't answer 'y' if you
+ have some other card made by Media Vision or Logitech since they
+ are not PAS16 compatible.
+ NOTE! Since 3.5-beta10 you need to enable SB support (next question)
+ if you want to use the SB emulation of PAS16. It's also possible to
+ the emulation if you want to use a true SB card together with PAS16
+ (there is another question about this that is asked later).
+ "Sound Blaster support",
+ - Answer 'y' if you have an original SB card made by Creative Labs
+ or a full 100% hardware compatible clone (like Thunderboard or
+ SM Games). If your card was in the list of supported cards (above),
+ please look at the card specific instructions later in this file
+ before answering this question. For an unknown card you may answer
+ 'y' if the card claims to be SB compatible.
+ Enable this option also with PAS16 (changed since v3.5-beta9).
+
+ Don't enable SB if you have a MAD16 or Mozart compatible card.
+
+ "Generic OPL2/OPL3 FM synthesizer support",
+ - Answer 'y' if your card has a FM chip made by Yamaha (OPL2/OPL3/OPL4).
+ Answering 'y' is usually a safe and recommended choice. However some
+ cards may have software (TSR) FM emulation. Enabling FM support
+ with these cards may cause trouble. However I don't currently know
+ such cards.
+ "Gravis Ultrasound support",
+ - Answer 'y' if you have GUS or GUS MAX. Answer 'n' if you don't
+ have GUS since the GUS driver consumes much memory.
+ Currently I don't have experiences with the GUS ACE so I don't
+ know what to answer with it.
+ "MPU-401 support (NOT for SB16)",
+ - Be careful with this question. The MPU401 interface is supported
+ by almost any sound card today. However some natively supported cards
+ have their own driver for MPU401. Enabling the MPU401 option with
+ these cards will cause a conflict. Also enabling MPU401 on a system
+ that doesn't really have a MPU401 could cause some trouble. If your
+ card was in the list of supported cards (above), please look at
+ the card specific instructions later in this file.
+
+ In MOST cases this MPU401 driver should only be used with "true"
+ MIDI-only MPU401 professional cards. In most other cases there
+ is another way to get the MPU401 compatible interface of a
+ sound card to work.
+ Support for the MPU401 compatible MIDI port of SB16, ESS1688
+ and MV Jazz16 cards is included in the SB driver. Use it instead
+ of this separate MPU401 driver with these cards. As well
+ Soundscape, PSS and Maui drivers include their own MPU401
+ options.
+
+ It's safe to answer 'y' if you have a true MPU401 MIDI interface
+ card.
+ "6850 UART Midi support",
+ - It's safe to answer 'n' to this question in all cases. The 6850
+ UART interface is so rarely used.
+ "PSS (ECHO-ADI2111) support",
+ - Answer 'y' only if you have Orchid SW32, Cardinal DSP16 or some
+ other card based on the PSS chipset (AD1848 codec + ADSP-2115
+ DSP chip + Echo ESC614 ASIC CHIP).
+ "16 bit sampling option of GUS (_NOT_ GUS MAX)",
+ - Answer 'y' if you have installed the 16 bit sampling daughtercard
+ to your GUS. Answer 'n' if you have GUS MAX. Enabling this option
+ disables GUS MAX support.
+ "GUS MAX support",
+ - Answer 'y' only if you have a GUS MAX.
+ "Microsoft Sound System support",
+ - Again think carefully before answering 'y' to this question. It's
+ safe to answer 'y' in case you have the original Windows Sound
+ System card made by Microsoft or Aztech SG 16 Pro (or NX16 Pro).
+ Also you may answer 'y' in case your card was not listed earlier
+ in this file. For cards having native support in the driver, consult
+ the card specific instructions later in this file. Some drivers
+ have their own MSS support and enabling this option will cause a
+ conflict.
+ Note! The MSS driver permits configuring two DMA channels. This is a
+ "nonstandard" feature and works only with very few cards (if any).
+ In most cases the second DMA channel should be disabled or set to
+ the same channel than the first one. Trying to configure two separate
+ channels with cards that don't support this feature will prevent
+ audio (at least recording) from working.
+ "Ensoniq Soundscape support",
+ - Answer 'y' if you have a sound card based on the Ensoniq SoundScape
+ chipset. Such cards are being manufactured at least by Ensoniq,
+ Spea and Reveal (note that Reveal makes other cards also). The oldest
+ cards made by Spea don't work properly with Linux.
+ Soundscape PnP as well as Ensoniq VIVO work only with the commercial
+ OSS/Linux version.
+ "MediaTrix AudioTrix Pro support",
+ - Answer 'y' if you have the AudioTrix Pro.
+ "Support for MAD16 and/or Mozart based cards",
+ - Answer y if your card has a Mozart (OAK OTI-601) or MAD16
+ (OPTi 82C928, 82C929, 82C924/82C925 or 82C930) audio interface chip.
+ These chips are
+ currently quite common so it's possible that many no-name cards
+ have one of them. In addition the MAD16 chip is used in some
+ cards made by known manufacturers such as Turtle Beach (Tropez),
+ Reveal (some models) and Diamond (some recent models).
+ Note OPTi 82C924 and 82C925 are MAD16 compatible only in non PnP
+ mode (jumper selectable on many cards).
+ "Support for TB Maui"
+ - This enables TB Maui specific initialization. Works with TB Maui
+ and TB Tropez (may not work with Tropez Plus).
+
+
+Then the configuration program asks some y/n questions about the higher
+level services. It's recommended to answer 'y' to each of these questions.
+Answer 'n' only if you know you will not need the option.
+
+ "MIDI interface support",
+ - Answering 'n' disables /dev/midi## devices and access to any
+ MIDI ports using /dev/sequencer and /dev/music. This option
+ also affects any MPU401 and/or General MIDI compatible devices.
+ "FM synthesizer (YM3812/OPL-3) support",
+ - Answer 'y' here.
+ "/dev/sequencer support",
+ - Answering 'n' disables /dev/sequencer and /dev/music.
+
+Entering the I/O, IRQ and DMA config parameters
+-----------------------------------------------
+
+After the above questions the configuration program prompts for the
+card specific configuration information. Usually just a set of
+I/O address, IRQ and DMA numbers are asked. With some cards the program
+asks for some files to be used during initialization of the card. For example
+many cards have a DSP chip or microprocessor which must be initialized by
+downloading a program (microcode) file to the card.
+
+Instructions for answering these questions are given in the next section.
+
+
+Card specific information
+=========================
+
+This section gives additional instructions about configuring some cards.
+Please refer manual of your card for valid I/O, IRQ and DMA numbers. Using
+the same settings with DOS/Windows and Linux is recommended. Using
+different values could cause some problems when switching between
+different operating systems.
+
+Sound Blasters (the original ones by Creative)
+---------------------------------------------
+
+NOTE! Check if you have a PnP Sound Blaster (cards sold after summer 1995
+ are almost certainly PnP ones). With PnP cards you should use isapnptools
+ to activate them (see above).
+
+It's possible to configure these cards to use different I/O, IRQ and
+DMA settings. Since the possible/default settings have changed between various
+models, you have to consult manual of your card for the proper ones. It's
+a good idea to use the same values than with DOS/Windows. With SB and SB Pro
+it's the only choice. SB16 has software selectable IRQ and DMA channels but
+using different values with DOS and Linux is likely to cause troubles. The
+DOS driver is not able to reset the card properly after warm boot from Linux
+if Linux has used different IRQ or DMA values.
+
+The original (steam) Sound Blaster (versions 1.x and 2.x) use always
+DMA1. There is no way to change it.
+
+The SB16 needs two DMA channels. A 8 bit one (1 or 3) is required for
+8 bit operation and a 16 bit one (5, 6 or 7) for the 16 bit mode. In theory
+it's possible to use just one (8 bit) DMA channel by answering the 8 bit
+one when the configuration program asks for the 16 bit one. This may work
+in some systems but is likely to cause terrible noise on some other systems.
+
+It's possible to use two SB16/32/64 at the same time. To do this you should
+first configure OSS/Free for one card. Then edit local.h manually and define
+SB2_BASE, SB2_IRQ, SB2_DMA and SB2_DMA2 for the second one. You can't get
+the OPL3, MIDI and EMU8000 devices of the second card to work. If you are
+going to use two PnP Sound Blasters, ensure that they are of different model
+and have different PnP IDs. There is no way to get two cards with the same
+card ID and serial number to work. The easiest way to check this is trying
+if isapnptools can see both cards or just one.
+
+NOTE! Don't enable the SM Games option (asked by the configuration program)
+ if you are not 101% sure that your card is a Logitech Soundman Games
+ (not a SM Wave or SM16).
+
+SB Clones
+---------
+
+First of all: There are no SB16 clones. There are SB Pro clones with a
+16 bit mode which is not SB16 compatible. The most likely alternative is that
+the 16 bit mode means MSS/WSS.
+
+There are just a few fully 100% hardware SB or SB Pro compatible cards.
+I know just Thunderboard and SM Games. Other cards require some kind of
+hardware initialization before they become SB compatible. Check if your card
+was listed in the beginning of this file. In this case you should follow
+instructions for your card later in this file.
+
+For other not fully SB clones you may try initialization using DOS in
+the following way:
+
+ - Boot DOS so that the card specific driver gets run.
+ - Hit ctrl-alt-del (or use loadlin) to boot Linux. Don't
+ switch off power or press the reset button.
+ - If you use the same I/O, IRQ and DMA settings in Linux, the
+ card should work.
+
+If your card is both SB and MSS compatible, I recommend using the MSS mode.
+Most cards of this kind are not able to work in the SB and the MSS mode
+simultaneously. Using the MSS mode provides 16 bit recording and playback.
+
+ProAudioSpectrum 16 and compatibles
+-----------------------------------
+
+PAS16 has a SB emulation chip which can be used together with the native
+(16 bit) mode of the card. To enable this emulation you should configure
+the driver to have SB support too (this has been changed since version
+3.5-beta9 of this driver).
+
+With current driver versions it's also possible to use PAS16 together with
+another SB compatible card. In this case you should configure SB support
+for the other card and to disable the SB emulation of PAS16 (there is a
+separate questions about this).
+
+With PAS16 you can use two audio device files at the same time. /dev/dsp (and
+/dev/audio) is connected to the 8/16 bit native codec and the /dev/dsp1 (and
+/dev/audio1) is connected to the SB emulation (8 bit mono only).
+
+Gravis Ultrasound
+-----------------
+
+There are many different revisions of the Ultrasound card (GUS). The
+earliest ones (pre 3.7) don't have a hardware mixer. With these cards
+the driver uses a software emulation for synth and pcm playbacks. It's
+also possible to switch some of the inputs (line in, mic) off by setting
+mixer volume of the channel level below 10%. For recording you have
+to select the channel as a recording source and to use volume above 10%.
+
+GUS 3.7 has a hardware mixer.
+
+GUS MAX and the 16 bit sampling daughtercard have a CS4231 codec chip which
+also contains a mixer.
+
+Configuring GUS is simple. Just enable the GUS support and GUS MAX or
+the 16 bit daughtercard if you have them. Note that enabling the daughter
+card disables GUS MAX driver.
+
+NOTE for owners of the 16 bit daughtercard: By default the daughtercard
+uses /dev/dsp (and /dev/audio). Command "ln -sf /dev/dsp1 /dev/dsp"
+selects the daughter card as the default device.
+
+With just the standard GUS enabled the configuration program prompts
+for the I/O, IRQ and DMA numbers for the card. Use the same values than
+with DOS.
+
+With the daughter card option enabled you will be prompted for the I/O,
+IRQ and DMA numbers for the daughter card. You have to use different I/O
+and DMA values than for the standard GUS. The daughter card permits
+simultaneous recording and playback. Use /dev/dsp (the daughtercard) for
+recording and /dev/dsp1 (GUS GF1) for playback.
+
+GUS MAX uses the same I/O address and IRQ settings than the original GUS
+(GUS MAX = GUS + a CS4231 codec). In addition an extra DMA channel may be used.
+Using two DMA channels permits simultaneous playback using two devices
+(dev/dsp0 and /dev/dsp1). The second DMA channel is required for
+full duplex audio.
+To enable the second DMA channels, give a valid DMA channel when the config
+program asks for the GUS MAX DMA (entering -1 disables the second DMA).
+Using 16 bit DMA channels (5,6 or 7) is recommended.
+
+If you have problems in recording with GUS MAX, you could try to use
+just one 8 bit DMA channel. Recording will not work with one DMA
+channel if it's a 16 bit one.
+
+Microphone input of GUS MAX is connected to mixer in little bit nonstandard
+way. There is actually two microphone volume controls. Normal "mic" controls
+only recording level. Mixer control "speaker" is used to control volume of
+microphone signal connected directly to line/speaker out. So just decrease
+volume of "speaker" if you have problems with microphone feedback.
+
+GUS ACE works too but any attempt to record or to use the MIDI port
+will fail.
+
+GUS PnP (with RAM) is partially supported but it needs to be initialized using
+DOS or isapnptools before starting the driver.
+
+MPU401 and Windows Sound System
+-------------------------------
+
+Again. Don't enable these options in case your card is listed
+somewhere else in this file.
+
+Configuring these cards is obvious (or it should be). With MSS
+you should probably enable the OPL3 synth also since
+most MSS compatible cards have it. However check that this is true
+before enabling OPL3.
+
+Sound driver supports more than one MPU401 compatible cards at the same time
+but the config program asks config info for just the first of them.
+Adding the second or third MPU interfaces must be done manually by
+editing sound/local.h (after running the config program). Add defines for
+MPU2_BASE & MPU2_IRQ (and MPU3_BASE & MPU3_IRQ) to the file.
+
+CAUTION!
+
+The default I/O base of Adaptec AHA-1542 SCSI controller is 0x330 which
+is also the default of the MPU401 driver. Don't configure the sound driver to
+use 0x330 as the MPU401 base if you have a AHA1542. The kernel will not boot
+if you make this mistake.
+
+PSS
+---
+
+Even the PSS cards are compatible with SB, MSS and MPU401, you must not
+enable these options when configuring the driver. The configuration
+program handles these options itself. (You may use the SB, MPU and MSS options
+together with PSS if you have another card on the system).
+
+The PSS driver enables MSS and MPU401 modes of the card. SB is not enabled
+since it doesn't work concurrently with MSS. The driver loads also a
+DSP algorithm which is used to for the general MIDI emulation. The
+algorithm file (.ld) is read by the config program and written to a
+file included when the pss.c is compiled. For this reason the config
+program asks if you want to download the file. Use the genmidi.ld file
+distributed with the DOS/Windows drivers of the card (don't use the mt32.ld).
+With some cards the file is called 'synth.ld'. You must have access to
+the file when configuring the driver. The easiest way is to mount the DOS
+partition containing the file with Linux.
+
+It's possible to load your own DSP algorithms and run them with the card.
+Look at the directory pss_test of snd-util-3.0.tar.gz for more info.
+
+AudioTrix Pro
+-------------
+
+You have to enable the OPL3 and SB (not SB Pro or SB16) drivers in addition
+to the native AudioTrix driver. Don't enable MSS or MPU drivers.
+
+Configuring ATP is little bit tricky since it uses so many I/O, IRQ and
+DMA numbers. Using the same values than with DOS/Win is a good idea. Don't
+attempt to use the same IRQ or DMA channels twice.
+
+The SB mode of ATP is implemented so the ATP driver just enables SB
+in the proper address. The SB driver handles the rest. You have to configure
+both the SB driver and the SB mode of ATP to use the same IRQ, DMA and I/O
+settings.
+
+Also the ATP has a microcontroller for the General MIDI emulation (OPL4).
+For this reason the driver asks for the name of a file containing the
+microcode (TRXPRO.HEX). This file is usually located in the directory
+where the DOS drivers were installed. You must have access to this file
+when configuring the driver.
+
+If you have the effects daughtercard, it must be initialized by running
+the setfx program of snd-util-3.0.tar.gz package. This step is not required
+when using the (future) binary distribution version of the driver.
+
+Ensoniq SoundScape
+------------------
+
+NOTE! The new PnP SoundScape is not supported yet. Soundscape compatible
+ cards made by Reveal don't work with Linux. They use older revision
+ of the Soundscape chipset which is not fully compatible with
+ newer cards made by Ensoniq.
+
+The SoundScape driver handles initialization of MSS and MPU supports
+itself so you don't need to enable other drivers than SoundScape
+(enable also the /dev/dsp, /dev/sequencer and MIDI supports).
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!! !!!!
+!!!!! NOTE! Before version 3.5-beta6 there WERE two sets of audio !!!!
+!!!!! device files (/dev/dsp0 and /dev/dsp1). The first one WAS !!!!
+!!!!! used only for card initialization and the second for audio !!!!
+!!!!! purposes. It WAS required to change /dev/dsp (a symlink) to !!!!
+!!!!! point to /dev/dsp1. !!!!
+!!!!! !!!!
+!!!!! This is not required with OSS versions 3.5-beta6 and later !!!!
+!!!!! since there is now just one audio device file. Please !!!!
+!!!!! change /dev/dsp to point back to /dev/dsp0 if you are !!!!
+!!!!! upgrading from an earlier driver version using !!!!
+!!!!! (cd /dev;rm dsp;ln -s dsp0 dsp). !!!!
+!!!!! !!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+The configuration program asks one DMA channel and two interrupts. One IRQ
+and one DMA is used by the MSS codec. The second IRQ is required for the
+MPU401 mode (you have to use different IRQs for both purposes).
+There were earlier two DMA channels for SoundScape but the current driver
+version requires just one.
+
+The SoundScape card has a Motorola microcontroller which must initialized
+_after_ boot (the driver doesn't initialize it during boot).
+The initialization is done by running the 'ssinit' program which is
+distributed in the snd-util-3.0.tar.gz package. You have to edit two
+defines in the ssinit.c and then compile the program. You may run ssinit
+manually (after each boot) or add it to /etc/rc.d/rc.local.
+
+/*(DEBLOBBED)*/
+
+MAD16 (Pro) and Mozart
+----------------------
+
+You need to enable just the MAD16 /Mozart support when configuring
+the driver. _Don't_ enable SB, MPU401 or MSS. However you will need the
+/dev/audio, /dev/sequencer and MIDI supports.
+
+Mozart and OPTi 82C928 (the original MAD16) chips don't support
+MPU401 mode so enter just 0 when the configuration program asks the
+MPU/MIDI I/O base. The MAD16 Pro (OPTi 82C929) and 82C930 chips have MPU401
+mode.
+
+TB Tropez is based on the 82C929 chip. It has two MIDI ports.
+The one connected to the MAD16 chip is the second one (there is a second
+MIDI connector/pins somewhere??). If you have not connected the second MIDI
+port, just disable the MIDI port of MAD16. The 'Maui' compatible synth of
+Tropez is jumper configurable and not connected to the MAD16 chip (the
+Maui driver can be used with it).
+
+Some MAD16 based cards may cause feedback, whistle or terrible noise if the
+line3 mixer channel is turned too high. This happens at least with Shuttle
+Sound System. Current driver versions set volume of line3 low enough so
+this should not be a problem.
+
+If you have a MAD16 card which have an OPL4 (FM + Wave table) synthesizer
+chip (_not_ an OPL3), you have to append a line containing #define MAD16_OPL4
+to the file linux/drivers/sound/local.h (after running make config).
+
+MAD16 cards having a CS4231 codec support full duplex mode. This mode
+can be enabled by configuring the card to use two DMA channels. Possible
+DMA channel pairs are: 0&1, 1&0 and 3&0.
+
+NOTE! Cards having an OPTi 82C924/82C925 chip work with OSS/Free only in
+non-PnP mode (usually jumper selectable). The PnP mode is supported only
+by OSS/Linux.
+
+MV Jazz (ProSonic)
+------------------
+
+The Jazz16 driver is just a hack made to the SB Pro driver. However it works
+fairly well. You have to enable SB, SB Pro (_not_ SB16) and MPU401 supports
+when configuring the driver. The configuration program asks later if you
+want support for MV Jazz16 based cards (after asking SB base address). Answer
+'y' here and the driver asks the second (16 bit) DMA channel.
+
+The Jazz16 driver uses the MPU401 driver in a way which will cause
+problems if you have another MPU401 compatible card. In this case you must
+give address of the Jazz16 based MPU401 interface when the config
+program prompts for the MPU401 information. Then look at the MPU401
+specific section for instructions about configuring more than one MPU401 cards.
+
+Logitech Soundman Wave
+----------------------
+
+Read the above MV Jazz specific instructions first.
+
+The Logitech SoundMan Wave (don't confuse this with the SM16 or SM Games) is
+a MV Jazz based card which has an additional OPL4 based wave table
+synthesizer. The OPL4 chip is handled by an on board microcontroller
+which must be initialized during boot. The config program asks if
+you have a SM Wave immediately after asking the second DMA channel of jazz16.
+If you answer 'y', the config program will ask name of the file containing
+code to be loaded to the microcontroller. The file is usually called
+MIDI0001.BIN and it's located in the DOS/Windows driver directory. The file
+may also be called as TSUNAMI.BIN or something else (older cards?).
+
+The OPL4 synth will be inaccessible without loading the microcontroller code.
+
+Also remember to enable SB MPU401 support if you want to use the OPL4 mode.
+(Don't enable the 'normal' MPU401 device as with some earlier driver
+versions (pre 3.5-alpha8)).
+
+NOTE! Don't answer 'y' when the driver asks about SM Games support
+ (the next question after the MIDI0001.BIN name). However
+ answering 'y' doesn't cause damage your computer so don't panic.
+
+Sound Galaxies
+--------------
+
+There are many different Sound Galaxy cards made by Aztech. The 8 bit
+ones are fully SB or SB Pro compatible and there should be no problems
+with them.
+
+The older 16 bit cards (SG Pro16, SG NX Pro16, Nova and Lyra) have
+an EEPROM chip for storing the configuration data. There is a microcontroller
+which initializes the card to match the EEPROM settings when the machine
+is powered on. These cards actually behave just like they have jumpers
+for all of the settings. Configure driver for MSS, MPU, SB/SB Pro and OPL3
+supports with these cards.
+
+There are some new Sound Galaxies in the market. I have no experience with
+them so read the card's manual carefully.
+
+ESS ES1688 and ES688 'AudioDrive' based cards
+---------------------------------------------
+
+Support for these two ESS chips is embedded in the SB driver.
+Configure these cards just like SB. Enable the 'SB MPU401 MIDI port'
+if you want to use MIDI features of ES1688. ES688 doesn't have MPU mode
+so you don't need to enable it (the driver uses normal SB MIDI automatically
+with ES688).
+
+NOTE! ESS cards are not compatible with MSS/WSS so don't worry if MSS support
+of OSS doesn't work with it.
+
+There are some ES1688/688 based sound cards and (particularly) motherboards
+which use software configurable I/O port relocation feature of the chip.
+This ESS proprietary feature is supported only by OSS/Linux.
+
+There are ES1688 based cards which use different interrupt pin assignment than
+recommended by ESS (5, 7, 9/2 and 10). In this case all IRQs don't work.
+At least a card called (Pearl?) Hypersound 16 supports IRQ 15 but it doesn't
+work.
+
+ES1868 is a PnP chip which is (supposed to be) compatible with ESS1688
+probably works with OSS/Free after initialization using isapnptools.
+
+Reveal cards
+------------
+
+There are several different cards made/marketed by Reveal. Some of them
+are compatible with SoundScape and some use the MAD16 chip. You may have
+to look at the card and try to identify its origin.
+
+Diamond
+-------
+
+The oldest (Sierra Aria based) sound cards made by Diamond are not supported
+(they may work if the card is initialized using DOS). The recent (LX?)
+models are based on the MAD16 chip which is supported by the driver.
+
+Audio Excel DSP16
+-----------------
+
+Support for this card is currently not functional. A new driver for it
+should be available later this year.
+
+PCMCIA cards
+------------
+
+Sorry, can't help. Some cards may work and some don't.
+
+TI TM4000M notebooks
+--------------------
+
+These computers have a built in sound support based on the Jazz chipset.
+Look at the instructions for MV Jazz (above). It's also important to note
+that there is something wrong with the mouse port and sound at least on
+some TM models. Don't enable the "C&T 82C710 mouse port support" when
+configuring Linux. Having it enabled is likely to cause mysterious problems
+and kernel failures when sound is used.
+
+miroSOUND
+---------
+
+The miroSOUND PCM1-pro, PCM12 and PCM20 radio has been used
+successfully. These cards are based on the MAD16, OPL4, and CS4231A chips
+and everything said in the section about MAD16 cards applies here,
+too. The only major difference between the PCMxx and other MAD16 cards
+is that instead of the mixer in the CS4231 codec a separate mixer
+controlled by an on-board 80C32 microcontroller is used. Control of
+the mixer takes place via the ACI (miro's audio control interface)
+protocol that is implemented in a separate lowlevel driver. Make sure
+you compile this ACI driver together with the normal MAD16 support
+when you use a miroSOUND PCMxx card. The ACI mixer is controlled by
+/dev/mixer and the CS4231 mixer by /dev/mixer1 (depends on load
+time). Only in special cases you want to change something regularly on
+the CS4231 mixer.
+
+The miroSOUND PCM12 and PCM20 radio is capable of full duplex
+operation (simultaneous PCM replay and recording), which allows you to
+implement nice real-time signal processing audio effect software and
+network telephones. The ACI mixer has to be switched into the "solo"
+mode for duplex operation in order to avoid feedback caused by the
+mixer (input hears output signal). You can de-/activate this mode
+through toggleing the record button for the wave controller with an
+OSS-mixer.
+
+The PCM20 contains a radio tuner, which is also controlled by
+ACI. This radio tuner is supported by the ACI driver together with the
+miropcm20.o module. Also the 7-band equalizer is integrated
+(limited by the OSS-design). Development has started and maybe
+finished for the RDS decoder on this card, too. You will be able to
+read RadioText, the Programme Service name, Programme TYpe and
+others. Even the v4l radio module benefits from it with a refined
+strength value. See aci.[ch] and miropcm20*.[ch] for more details.
+
+The following configuration parameters have worked fine for the PCM12
+in Markus Kuhn's system, many other configurations might work, too:
+CONFIG_MAD16_BASE=0x530, CONFIG_MAD16_IRQ=11, CONFIG_MAD16_DMA=3,
+CONFIG_MAD16_DMA2=0, CONFIG_MAD16_MPU_BASE=0x330, CONFIG_MAD16_MPU_IRQ=10,
+DSP_BUFFSIZE=65536, SELECTED_SOUND_OPTIONS=0x00281000.
+
+Bas van der Linden is using his PCM1-pro with a configuration that
+differs in: CONFIG_MAD16_IRQ=7, CONFIG_MAD16_DMA=1, CONFIG_MAD16_MPU_IRQ=9
+
+Compaq Deskpro XL
+-----------------
+
+The builtin sound hardware of Compaq Deskpro XL is now supported.
+You need to configure the driver with MSS and OPL3 supports enabled.
+In addition you need to manually edit linux/drivers/sound/local.h and
+to add a line containing "#define DESKPROXL" if you used
+make menuconfig/xconfig.
+
+Others?
+-------
+
+Since there are so many different sound cards, it's likely that I have
+forgotten to mention many of them. Please inform me if you know yet another
+card which works with Linux, please inform me (or is anybody else
+willing to maintain a database of supported cards (just like in XF86)?).
+
+Cards not supported yet
+=======================
+
+Please check the version of sound driver you are using before
+complaining that your card is not supported. It's possible you are
+using a driver version which was released months before your card was
+introduced.
+
+First of all, there is an easy way to make most sound cards work with Linux.
+Just use the DOS based driver to initialize the card to a known state, then use
+loadlin.exe to boot Linux. If Linux is configured to use the same I/O, IRQ and
+DMA numbers as DOS, the card could work.
+(ctrl-alt-del can be used in place of loadlin.exe but it doesn't work with
+new motherboards). This method works also with all/most PnP sound cards.
+
+Don't get fooled with SB compatibility. Most cards are compatible with
+SB but that may require a TSR which is not possible with Linux. If
+the card is compatible with MSS, it's a better choice. Some cards
+don't work in the SB and MSS modes at the same time.
+
+Then there are cards which are no longer manufactured and/or which
+are relatively rarely used (such as the 8 bit ProAudioSpectrum
+models). It's extremely unlikely that such cards ever get supported.
+Adding support for a new card requires much work and increases time
+required in maintaining the driver (some changes need to be done
+to all low level drivers and be tested too, maybe with multiple
+operating systems). For this reason I have made a decision to not support
+obsolete cards. It's possible that someone else makes a separately
+distributed driver (diffs) for the card.
+
+Writing a driver for a new card is not possible if there are no
+programming information available about the card. If you don't
+find your new card from this file, look from the home page
+(http://www.opensound.com/ossfree). Then please contact
+manufacturer of the card and ask if they have (or are willing to)
+released technical details of the card. Do this before contacting me. I
+can only answer 'no' if there are no programming information available.
+
+I have made decision to not accept code based on reverse engineering
+to the driver. There are three main reasons: First I don't want to break
+relationships to sound card manufacturers. The second reason is that
+maintaining and supporting a driver without any specs will be a pain.
+The third reason is that companies have freedom to refuse selling their
+products to other than Windows users.
+
+Some companies don't give low level technical information about their
+products to public or at least their require signing a NDA. It's not
+possible to implement a freeware driver for them. However it's possible
+that support for such cards become available in the commercial version
+of this driver (see http://www.4Front-tech.com/oss.html for more info).
+
+There are some common audio chipsets that are not supported yet. For example
+Sierra Aria and IBM Mwave. It's possible that these architectures
+get some support in future but I can't make any promises. Just look
+at the home page (http://www.opensound.com/ossfree/)
+for latest info.
+
+Information about unsupported sound cards and chipsets is welcome as well
+as free copies of sound cards, SDKs and operating systems.
+
+If you have any corrections and/or comments, please contact me.
+
+Hannu Savolainen
+hannu@opensound.com
+
+home page of OSS/Free: http://www.opensound.com/ossfree
+
+home page of commercial OSS
+(Open Sound System) drivers: http://www.opensound.com/oss.html
diff --git a/Documentation/sound/oss/README.modules b/Documentation/sound/oss/README.modules
new file mode 100644
index 000000000..cdc039421
--- /dev/null
+++ b/Documentation/sound/oss/README.modules
@@ -0,0 +1,106 @@
+Building a modular sound driver
+================================
+
+ The following information is current as of linux-2.1.85. Check the other
+readme files, especially README.OSS, for information not specific to
+making sound modular.
+
+ First, configure your kernel. This is an idea of what you should be
+setting in the sound section:
+
+<M> Sound card support
+
+<M> 100% Sound Blaster compatibles (SB16/32/64, ESS, Jazz16) support
+
+ I have SoundBlaster. Select your card from the list.
+
+<M> Generic OPL2/OPL3 FM synthesizer support
+<M> FM synthesizer (YM3812/OPL-3) support
+
+ If you don't set these, you will probably find you can play .wav files
+but not .midi. As the help for them says, set them unless you know your
+card does not use one of these chips for FM support.
+
+ Once you are configured, make zlilo, modules, modules_install; reboot.
+Note that it is no longer necessary or possible to configure sound in the
+drivers/sound dir. Now one simply configures and makes one's kernel and
+modules in the usual way.
+
+ Then, add to your /etc/modprobe.d/oss.conf something like:
+
+alias char-major-14-* sb
+install sb /sbin/modprobe -i sb && /sbin/modprobe adlib_card
+options sb io=0x220 irq=7 dma=1 dma16=5 mpu_io=0x330
+options adlib_card io=0x388 # FM synthesizer
+
+ Alternatively, if you have compiled in kernel level ISAPnP support:
+
+alias char-major-14 sb
+softdep sb post: adlib_card
+options adlib_card io=0x388
+
+ The effect of this is that the sound driver and all necessary bits and
+pieces autoload on demand, assuming you use kerneld (a sound choice) and
+autoclean when not in use. Also, options for the device drivers are
+set. They will not work without them. Change as appropriate for your card.
+If you are not yet using the very cool kerneld, you will have to "modprobe
+-k sb" yourself to get things going. Eventually things may be fixed so
+that this kludgery is not necessary; for the time being, it seems to work
+well.
+
+ Replace 'sb' with the driver for your card, and give it the right
+options. To find the filename of the driver, look in
+/lib/modules/<kernel-version>/misc. Mine looks like:
+
+adlib_card.o # This is the generic OPLx driver
+opl3.o # The OPL3 driver
+sb.o # <<The SoundBlaster driver. Yours may differ.>>
+sound.o # The sound driver
+uart401.o # Used by sb, maybe other cards
+
+ Whichever card you have, try feeding it the options that would be the
+default if you were making the driver wired, not as modules. You can
+look at function referred to by module_init() for the card to see what
+args are expected.
+
+ Note that at present there is no way to configure the io, irq and other
+parameters for the modular drivers as one does for the wired drivers.. One
+needs to pass the modules the necessary parameters as arguments, either
+with /etc/modprobe.d/*.conf or with command-line args to modprobe, e.g.
+
+modprobe sb io=0x220 irq=7 dma=1 dma16=5 mpu_io=0x330
+modprobe adlib_card io=0x388
+
+ recommend using /etc/modprobe.d/*.conf.
+
+Persistent DMA Buffers:
+
+The sound modules normally allocate DMA buffers during open() and
+deallocate them during close(). Linux can often have problems allocating
+DMA buffers for ISA cards on machines with more than 16MB RAM. This is
+because ISA DMA buffers must exist below the 16MB boundary and it is quite
+possible that we can't find a large enough free block in this region after
+the machine has been running for any amount of time. The way to avoid this
+problem is to allocate the DMA buffers during module load and deallocate
+them when the module is unloaded. For this to be effective we need to load
+the sound modules right after the kernel boots, either manually or by an
+init script, and keep them around until we shut down. This is a little
+wasteful of RAM, but it guarantees that sound always works.
+
+To make the sound driver use persistent DMA buffers we need to pass the
+sound.o module a "dmabuf=1" command-line argument. This is normally done
+in /etc/modprobe.d/*.conf files like so:
+
+options sound dmabuf=1
+
+If you have 16MB or less RAM or a PCI sound card, this is wasteful and
+unnecessary. It is possible that machine with 16MB or less RAM will find
+this option useful, but if your machine is so memory-starved that it
+cannot find a 64K block free, you will be wasting even more RAM by keeping
+the sound modules loaded and the DMA buffers allocated when they are not
+needed. The proper solution is to upgrade your RAM. But you do also have
+this improper solution as well. Use it wisely.
+
+ I'm afraid I know nothing about anything but my setup, being more of a
+text-mode guy anyway. If you have options for other cards or other helpful
+hints, send them to me, Jim Bray, jb@as220.org, http://as220.org/jb.
diff --git a/Documentation/sound/oss/README.ymfsb b/Documentation/sound/oss/README.ymfsb
new file mode 100644
index 000000000..b6b77906b
--- /dev/null
+++ b/Documentation/sound/oss/README.ymfsb
@@ -0,0 +1,107 @@
+Legacy audio driver for YMF7xx PCI cards.
+
+
+FIRST OF ALL
+============
+
+ This code references YAMAHA's sample codes and data sheets.
+ I respect and thank for all people they made open the information
+ about YMF7xx cards.
+
+ And this codes heavily based on Jeff Garzik <jgarzik@pobox.com>'s
+ old VIA 82Cxxx driver (via82cxxx.c). I also respect him.
+
+
+DISCLIMER
+=========
+
+ This driver is currently at early ALPHA stage. It may cause serious
+ damage to your computer when used.
+ PLEASE USE IT AT YOUR OWN RISK.
+
+
+ABOUT THIS DRIVER
+=================
+
+ This code enables you to use your YMF724[A-F], YMF740[A-C], YMF744, YMF754
+ cards. When enabled, your card acts as "SoundBlaster Pro" compatible card.
+ It can only play 22.05kHz / 8bit / Stereo samples, control external MIDI
+ port.
+ If you want to use your card as recent "16-bit" card, you should use
+ Alsa or OSS/Linux driver. Of course you can write native PCI driver for
+ your cards :)
+
+
+USAGE
+=====
+
+ # modprobe ymfsb (options)
+
+
+OPTIONS FOR MODULE
+==================
+
+ io : SB base address (0x220, 0x240, 0x260, 0x280)
+ synth_io : OPL3 base address (0x388, 0x398, 0x3a0, 0x3a8)
+ dma : DMA number (0,1,3)
+ master_volume: AC'97 PCM out Vol (0-100)
+ spdif_out : SPDIF-out flag (0:disable 1:enable)
+
+ These options will change in future...
+
+
+FREQUENCY
+=========
+
+ When playing sounds via this driver, you will hear its pitch is slightly
+ lower than original sounds. Since this driver recognizes your card acts
+ with 21.739kHz sample rates rather than 22.050kHz (I think it must be
+ hardware restriction). So many players become tone deafness.
+ To prevent this, you should express some options to your sound player
+ that specify correct sample frequency. For example, to play your MP3 file
+ correctly with mpg123, specify the frequency like following:
+
+ % mpg123 -r 21739 foo.mp3
+
+
+SPDIF OUT
+=========
+
+ With installing modules with option 'spdif_out=1', you can enjoy your
+ sounds from SPDIF-out of your card (if it had).
+ Its Fs is fixed to 48kHz (It never means the sample frequency become
+ up to 48kHz. All sounds via SPDIF-out also 22kHz samples). So your
+ digital-in capable components has to be able to handle 48kHz Fs.
+
+
+COPYING
+=======
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+TODO
+====
+ * support for multiple cards
+ (set the different SB_IO,MPU_IO,OPL_IO for each cards)
+
+ * support for OPL (dmfm) : There will be no requirements... :-<
+
+
+AUTHOR
+======
+
+ Daisuke Nagano <breeze.nagano@nifty.ne.jp>
+
diff --git a/Documentation/sound/oss/SoundPro b/Documentation/sound/oss/SoundPro
new file mode 100644
index 000000000..9d4db1f29
--- /dev/null
+++ b/Documentation/sound/oss/SoundPro
@@ -0,0 +1,105 @@
+Documentation for the SoundPro CMI8330 extensions in the WSS driver (ad1848.o)
+------------------------------------------------------------------------------
+
+( Be sure to read Documentation/sound/oss/CMI8330 too )
+
+Ion Badulescu, ionut@cs.columbia.edu
+February 24, 1999
+
+(derived from the OPL3-SA2 documentation by Scott Murray)
+
+The SoundPro CMI8330 (ISA) is a chip usually found on some Taiwanese
+motherboards. The official name in the documentation is CMI8330, SoundPro
+is the nickname and the big inscription on the chip itself.
+
+The chip emulates a WSS as well as a SB16, but it has certain differences
+in the mixer section which require separate support. It also emulates an
+MPU401 and an OPL3 synthesizer, so you probably want to enable support
+for these, too.
+
+The chip identifies itself as an AD1848, but its mixer is significantly
+more advanced than the original AD1848 one. If your system works with
+either WSS or SB16 and you are having problems with some mixer controls
+(no CD audio, no line-in, etc), you might want to give this driver a try.
+Detection should work, but it hasn't been widely tested, so it might still
+mis-identify the chip. You can still force soundpro=1 in the modprobe
+parameters for ad1848. Please let me know if it happens to you, so I can
+adjust the detection routine.
+
+The chip is capable of doing full-duplex, but since the driver sees it as an
+AD1848, it cannot take advantage of this. Moreover, the full-duplex mode is
+not achievable through the WSS interface, b/c it needs a dma16 line which is
+assigned only to the SB16 subdevice (with isapnp). Windows documentation
+says the user must use WSS Playback and SB16 Recording for full-duplex, so
+it might be possible to do the same thing under Linux. You can try loading
+up both ad1848 and sb then use one for playback and the other for
+recording. I don't know if this works, b/c I haven't tested it. Anyway, if
+you try it, be very careful: the SB16 mixer *mostly* works, but certain
+settings can have unexpected effects. Use the WSS mixer for best results.
+
+There is also a PCI SoundPro chip. I have not seen this chip, so I have
+no idea if the driver will work with it. I suspect it won't.
+
+As with PnP cards, some configuration is required. There are two ways
+of doing this. The most common is to use the isapnptools package to
+initialize the card, and use the kernel module form of the sound
+subsystem and sound drivers. Alternatively, some BIOS's allow manual
+configuration of installed PnP devices in a BIOS menu, which should
+allow using the non-modular sound drivers, i.e. built into the kernel.
+Since in this latter case you cannot use module parameters, you will
+have to enable support for the SoundPro at compile time.
+
+The IRQ and DMA values can be any that are considered acceptable for a
+WSS. Assuming you've got isapnp all happy, then you should be able to
+do something like the following (which *must* match the isapnp/BIOS
+configuration):
+
+modprobe ad1848 io=0x530 irq=11 dma=0 soundpro=1
+-and maybe-
+modprobe sb io=0x220 irq=5 dma=1 dma16=5
+
+-then-
+modprobe mpu401 io=0x330 irq=9
+modprobe opl3 io=0x388
+
+If all goes well and you see no error messages, you should be able to
+start using the sound capabilities of your system. If you get an
+error message while trying to insert the module(s), then make
+sure that the values of the various arguments match what you specified
+in your isapnp configuration file, and that there is no conflict with
+another device for an I/O port or interrupt. Checking the contents of
+/proc/ioports and /proc/interrupts can be useful to see if you're
+butting heads with another device.
+
+If you do not see the chipset version message, and none of the other
+messages present in the system log are helpful, try adding 'debug=1'
+to the ad1848 parameters, email me the syslog results and I'll do
+my best to help.
+
+Lastly, if you're using modules and want to set up automatic module
+loading with kmod, the kernel module loader, here is the section I
+currently use in my conf.modules file:
+
+# Sound
+post-install sound modprobe -k ad1848; modprobe -k mpu401; modprobe -k opl3
+options ad1848 io=0x530 irq=11 dma=0
+options sb io=0x220 irq=5 dma=1 dma16=5
+options mpu401 io=0x330 irq=9
+options opl3 io=0x388
+
+The above ensures that ad1848 will be loaded whenever the sound system
+is being used.
+
+Good luck.
+
+Ion
+
+NOT REALLY TESTED:
+- recording
+- recording device selection
+- full-duplex
+
+TODO:
+- implement mixer support for surround, loud, digital CD switches.
+- come up with a scheme which allows recording volumes for each subdevice.
+This is a major OSS API change.
diff --git a/Documentation/sound/oss/Soundblaster b/Documentation/sound/oss/Soundblaster
new file mode 100644
index 000000000..b288d464b
--- /dev/null
+++ b/Documentation/sound/oss/Soundblaster
@@ -0,0 +1,53 @@
+modprobe sound
+insmod uart401
+insmod sb ...
+
+This loads the driver for the Sound Blaster and assorted clones. Cards that
+are covered by other drivers should not be using this driver.
+
+The Sound Blaster module takes the following arguments
+
+io I/O address of the Sound Blaster chip (0x220,0x240,0x260,0x280)
+irq IRQ of the Sound Blaster chip (5,7,9,10)
+dma 8-bit DMA channel for the Sound Blaster (0,1,3)
+dma16 16-bit DMA channel for SB16 and equivalent cards (5,6,7)
+mpu_io I/O for MPU chip if present (0x300,0x330)
+
+sm_games=1 Set if you have a Logitech soundman games
+acer=1 Set this to detect cards in some ACER notebooks
+mwave_bug=1 Set if you are trying to use this driver with mwave (see on)
+type Use this to specify a specific card type
+
+The following arguments are taken if ISAPnP support is compiled in
+
+isapnp=0 Set this to disable ISAPnP detection (use io=0xXXX etc. above)
+multiple=0 Set to disable detection of multiple Soundblaster cards.
+ Consider it a bug if this option is needed, and send in a
+ report.
+pnplegacy=1 Set this to be able to use a PnP card(s) along with a single
+ non-PnP (legacy) card. Above options for io, irq, etc. are
+ needed, and will apply only to the legacy card.
+reverse=1 Reverses the order of the search in the PnP table.
+uart401=1 Set to enable detection of mpu devices on some clones.
+isapnpjump=n Jumps to slot n in the driver's PnP table. Use the source,
+ Luke.
+
+You may well want to load the opl3 driver for synth music on most SB and
+clone SB devices
+
+insmod opl3 io=0x388
+
+Using Mwave
+
+To make this driver work with Mwave you must set mwave_bug. You also need
+to warm boot from DOS/Windows with the required firmware loaded under this
+OS. IBM are being difficult about documenting how to load this firmware.
+
+Avance Logic ALS007
+
+This card is supported; see the separate file ALS007 for full details.
+
+Avance Logic ALS100
+
+This card is supported; setup should be as for a standard Sound Blaster 16.
+The driver will identify the audio device as a "Sound Blaster 16 (ALS-100)".
diff --git a/Documentation/sound/oss/Tropez+ b/Documentation/sound/oss/Tropez+
new file mode 100644
index 000000000..b93a6b734
--- /dev/null
+++ b/Documentation/sound/oss/Tropez+
@@ -0,0 +1,26 @@
+From: Paul Barton-Davis <pbd@op.net>
+
+Here is the configuration I use with a Tropez+ and my modular
+driver:
+
+ alias char-major-14 wavefront
+ alias synth0 wavefront
+ alias mixer0 cs4232
+ alias audio0 cs4232
+ pre-install wavefront modprobe "-k" "cs4232"
+ post-install wavefront modprobe "-k" "opl3"
+ options wavefront io=0x200 irq=9
+ options cs4232 synthirq=9 synthio=0x200 io=0x530 irq=5 dma=1 dma2=0
+ options opl3 io=0x388
+
+Things to note:
+
+ the wavefront options "io" and "irq" ***MUST*** match the "synthio"
+ and "synthirq" cs4232 options.
+
+ you can do without the opl3 module if you don't
+ want to use the OPL/[34] synth on the soundcard
+
+ the opl3 io parameter is conventionally not adjustable.
+
+Please see drivers/sound/README.wavefront for more details.
diff --git a/Documentation/sound/oss/VIBRA16 b/Documentation/sound/oss/VIBRA16
new file mode 100644
index 000000000..68a5a46be
--- /dev/null
+++ b/Documentation/sound/oss/VIBRA16
@@ -0,0 +1,80 @@
+Sound Blaster 16X Vibra addendum
+--------------------------------
+by Marius Ilioaea <mariusi@protv.ro>
+ Stefan Laudat <stefan@asit.ro>
+
+Sat Mar 6 23:55:27 EET 1999
+
+ Hello again,
+
+ Playing with a SB Vibra 16x soundcard we found it very difficult
+to setup because the kernel reported a lot of DMA errors and wouldn't
+simply play any sound.
+ A good starting point is that the vibra16x chip full-duplex facility
+is neither still exploited by the sb driver found in the linux kernel
+(tried it with a 2.2.2-ac7), nor in the commercial OSS package (it reports
+it as half-duplex soundcard). Oh, I almost forgot, the RedHat sndconfig
+failed detecting it ;)
+ So, the big problem still remains, because the sb module wants a
+8-bit and a 16-bit dma, which we could not allocate for vibra... it supports
+only two 8-bit dma channels, the second one will be passed to the module
+as a 16 bit channel, the kernel will yield about that but everything will
+be okay, trust us.
+ The only inconvenient you may find is that you will have
+some sound playing jitters if you have HDD dma support enabled - but this
+will happen with almost all soundcards...
+
+ A fully working isapnp.conf is just here:
+
+<snip here>
+
+(READPORT 0x0203)
+(ISOLATE PRESERVE)
+(IDENTIFY *)
+(VERBOSITY 2)
+(CONFLICT (IO FATAL)(IRQ FATAL)(DMA FATAL)(MEM FATAL)) # or WARNING
+# SB 16 and OPL3 devices
+(CONFIGURE CTL00f0/-1 (LD 0
+(INT 0 (IRQ 5 (MODE +E)))
+(DMA 0 (CHANNEL 1))
+(DMA 1 (CHANNEL 3))
+(IO 0 (SIZE 16) (BASE 0x0220))
+(IO 2 (SIZE 4) (BASE 0x0388))
+(NAME "CTL00f0/-1[0]{Audio }")
+(ACT Y)
+))
+
+# Joystick device - only if you need it :-/
+
+(CONFIGURE CTL00f0/-1 (LD 1
+(IO 0 (SIZE 1) (BASE 0x0200))
+(NAME "CTL00f0/-1[1]{Game }")
+(ACT Y)
+))
+(WAITFORKEY)
+
+<end of snipping>
+
+ So, after a good kernel modules compilation and a 'depmod -a kernel_ver'
+you may want to:
+
+modprobe sb io=0x220 irq=5 dma=1 dma16=3
+
+ Or, take the hard way:
+
+modprobe soundcore
+modprobe sound
+modprobe uart401
+modprobe sb io=0x220 irq=5 dma=1 dma16=3
+# do you need MIDI?
+modprobe opl3=0x388
+
+ Just in case, the kernel sound support should be:
+
+CONFIG_SOUND=m
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_SB=m
+
+ Enjoy your new noisy Linux box! ;)
+
+
diff --git a/Documentation/sound/oss/WaveArtist b/Documentation/sound/oss/WaveArtist
new file mode 100644
index 000000000..f4f3407cd
--- /dev/null
+++ b/Documentation/sound/oss/WaveArtist
@@ -0,0 +1,170 @@
+
+ (the following is from the armlinux CVS)
+
+ WaveArtist mixer and volume levels can be accessed via these commands:
+
+ nn30 read registers nn, where nn = 00 - 09 for mixer settings
+ 0a - 13 for channel volumes
+ mm31 write the volume setting in pairs, where mm = (nn - 10) / 2
+ rr32 write the mixer settings in pairs, where rr = nn/2
+ xx33 reset all settings to default
+ 0y34 select mono source, y=0 = left, y=1 = right
+
+ bits
+ nn 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 00 | 0 | 0 0 1 1 | left line mixer gain | left aux1 mixer gain |lmute|
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 01 | 0 | 0 1 0 1 | left aux2 mixer gain | right 2 left mic gain |mmute|
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 02 | 0 | 0 1 1 1 | left mic mixer gain | left mic | left mixer gain |dith |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 03 | 0 | 1 0 0 1 | left mixer input select |lrfg | left ADC gain |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 04 | 0 | 1 0 1 1 | right line mixer gain | right aux1 mixer gain |rmute|
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 05 | 0 | 1 1 0 1 | right aux2 mixer gain | left 2 right mic gain |test |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 06 | 0 | 1 1 1 1 | right mic mixer gain | right mic |right mixer gain |rbyps|
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 07 | 1 | 0 0 0 1 | right mixer select |rrfg | right ADC gain |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 08 | 1 | 0 0 1 1 | mono mixer gain |right ADC mux sel|left ADC mux sel |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 09 | 1 | 0 1 0 1 |loopb|left linout|loop|ADCch|TxFch|OffCD|test |loopb|loopb|osamp|
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 0a | 0 | left PCM channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 0b | 0 | right PCM channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 0c | 0 | left FM channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 0d | 0 | right FM channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 0e | 0 | left wavetable channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 0f | 0 | right wavetable channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 10 | 0 | left PCM expansion channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 11 | 0 | right PCM expansion channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 12 | 0 | left FM expansion channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+ 13 | 0 | right FM expansion channel volume |
+----+---+------------+-----+-----+-----+----+-----+-----+-----+-----+-----+-----+-----+
+
+ lmute: left mute
+ mmute: mono mute
+ dith: dithds
+ lrfg:
+ rmute: right mute
+ rbyps: right bypass
+ rrfg:
+ ADCch:
+ TxFch:
+ OffCD:
+ osamp:
+
+ And the following diagram is derived from the description in the CVS archive:
+
+ MIC L (mouthpiece)
+ +------+
+ -->PreAmp>-\
+ +--^---+ |
+ | |
+ r2b4-5 | +--------+
+ /----*-------------------------------->5 |
+ | | |
+ | /----------------------------------->4 |
+ | | | |
+ | | /--------------------------------->3 1of5 | +---+
+ | | | | mux >-->AMP>--> ADC L
+ | | | /------------------------------->2 | +-^-+
+ | | | | | | |
+ Line | | | | +----+ +------+ +---+ /---->1 | r3b3-0
+ ------------*->mute>--> Gain >--> | | | |
+ L | | | +----+ +------+ | | | *->0 |
+ | | | | | | +---^----+
+ Aux2 | | | +----+ +------+ | | | |
+ ----------*--->mute>--> Gain >--> M | | r8b0-2
+ L | | +----+ +------+ | | |
+ | | | | \------\
+ Aux1 | | +----+ +------+ | | |
+ --------*----->mute>--> Gain >--> I | |
+ L | +----+ +------+ | | |
+ | | | |
+ | +----+ +------+ | | +---+ |
+ *------->mute>--> Gain >--> X >-->AMP>--*
+ | +----+ +------+ | | +-^-+ |
+ | | | | |
+ | +----+ +------+ | | r2b1-3 |
+ | /----->mute>--> Gain >--> E | |
+ | | +----+ +------+ | | |
+ | | | | |
+ | | +----+ +------+ | | |
+ | | /--->mute>--> Gain >--> R | |
+ | | | +----+ +------+ | | |
+ | | | | | | r9b8-9
+ | | | +----+ +------+ | | | |
+ | | | /->mute>--> Gain >--> | | +---v---+
+ | | | | +----+ +------+ +---+ /-*->0 |
+ DAC | | | | | | |
+ ------------*----------------------------------->? | +----+
+ L | | | | | Mux >-->mute>--> L output
+ | | | | /->? | +--^-+
+ | | | | | | | |
+ | | | /--------->? | r0b0
+ | | | | | | +-------+
+ | | | | | |
+ Mono | | | | | | +-------+
+ ----------* | \---> | +----+
+ | | | | | | Mix >-->mute>--> Mono output
+ | | | | *-> | +--^-+
+ | | | | | +-------+ |
+ | | | | | r1b0
+ DAC | | | | | +-------+
+ ------------*-------------------------*--------->1 | +----+
+ R | | | | | | Mux >-->mute>--> R output
+ | | | | +----+ +------+ +---+ *->0 | +--^-+
+ | | | \->mute>--> Gain >--> | | +---^---+ |
+ | | | +----+ +------+ | | | | r5b0
+ | | | | | | r6b0
+ | | | +----+ +------+ | | |
+ | | \--->mute>--> Gain >--> M | |
+ | | +----+ +------+ | | |
+ | | | | |
+ | | +----+ +------+ | | |
+ | *----->mute>--> Gain >--> I | |
+ | | +----+ +------+ | | |
+ | | | | |
+ | | +----+ +------+ | | +---+ |
+ \------->mute>--> Gain >--> X >-->AMP>--*
+ | +----+ +------+ | | +-^-+ |
+ /--/ | | | |
+ Aux1 | +----+ +------+ | | r6b1-3 |
+ -------*------>mute>--> Gain >--> E | |
+ R | | +----+ +------+ | | |
+ | | | | |
+ Aux2 | | +----+ +------+ | | /------/
+ ---------*---->mute>--> Gain >--> R | |
+ R | | | +----+ +------+ | | |
+ | | | | | | +--------+
+ Line | | | +----+ +------+ | | | *->0 |
+ -----------*-->mute>--> Gain >--> | | | |
+ R | | | | +----+ +------+ +---+ \---->1 |
+ | | | | | |
+ | | | \-------------------------------->2 | +---+
+ | | | | Mux >-->AMP>--> ADC R
+ | | \---------------------------------->3 | +-^-+
+ | | | | |
+ | \------------------------------------>4 | r7b3-0
+ | | |
+ \-----*-------------------------------->5 |
+ | +---^----+
+ r6b4-5 | |
+ | | r8b3-5
+ +--v---+ |
+ -->PreAmp>-/
+ +------+
+ MIC R (electret mic)
diff --git a/Documentation/sound/oss/btaudio b/Documentation/sound/oss/btaudio
new file mode 100644
index 000000000..1a693e69d
--- /dev/null
+++ b/Documentation/sound/oss/btaudio
@@ -0,0 +1,92 @@
+
+Intro
+=====
+
+people start bugging me about this with questions, looks like I
+should write up some documentation for this beast. That way I
+don't have to answer that much mails I hope. Yes, I'm lazy...
+
+
+You might have noticed that the bt878 grabber cards have actually
+_two_ PCI functions:
+
+$ lspci
+[ ... ]
+00:0a.0 Multimedia video controller: Brooktree Corporation Bt878 (rev 02)
+00:0a.1 Multimedia controller: Brooktree Corporation Bt878 (rev 02)
+[ ... ]
+
+The first does video, it is backward compatible to the bt848. The second
+does audio. btaudio is a driver for the second function. It's a sound
+driver which can be used for recording sound (and _only_ recording, no
+playback). As most TV cards come with a short cable which can be plugged
+into your sound card's line-in you probably don't need this driver if all
+you want to do is just watching TV...
+
+
+Driver Status
+=============
+
+Still somewhat experimental. The driver should work stable, i.e. it
+should'nt crash your box. It might not work as expected, have bugs,
+not being fully OSS API compilant, ...
+
+Latest versions are available from http://bytesex.org/bttv/, the
+driver is in the bttv tarball. Kernel patches might be available too,
+have a look at http://bytesex.org/bttv/listing.html.
+
+The chip knows two different modes. btaudio registers two dsp
+devices, one for each mode. They can not be used at the same time.
+
+
+Digital audio mode
+==================
+
+The chip gives you 16 bit stereo sound. The sample rate depends on
+the external source which feeds the bt878 with digital sound via I2S
+interface. There is a insmod option (rate) to tell the driver which
+sample rate the hardware uses (32000 is the default).
+
+One possible source for digital sound is the msp34xx audio processor
+chip which provides digital sound via I2S with 32 kHz sample rate. My
+Hauppauge board works this way.
+
+The Osprey-200 reportly gives you digital sound with 44100 Hz sample
+rate. It is also possible that you get no sound at all.
+
+
+analog mode (A/D)
+=================
+
+You can tell the driver to use this mode with the insmod option "analog=1".
+The chip has three analog inputs. Consequently you'll get a mixer device
+to control these.
+
+The analog mode supports mono only. Both 8 + 16 bit. Both are _signed_
+int, which is uncommon for the 8 bit case. Sample rate range is 119 kHz
+to 448 kHz. Yes, the number of digits is correct. The driver supports
+downsampling by powers of two, so you can ask for more usual sample rates
+like 44 kHz too.
+
+With my Hauppauge I get noisy sound on the second input (mapped to line2
+by the mixer device). Others get a useable signal on line1.
+
+
+some examples
+=============
+
+* read audio data from btaudio (dsp2), send to es1730 (dsp,dsp1):
+ $ sox -w -r 32000 -t ossdsp /dev/dsp2 -t ossdsp /dev/dsp
+
+* read audio data from btaudio, send to esound daemon (which might be
+ running on another host):
+ $ sox -c 2 -w -r 32000 -t ossdsp /dev/dsp2 -t sw - | esdcat -r 32000
+ $ sox -c 1 -w -r 32000 -t ossdsp /dev/dsp2 -t sw - | esdcat -m -r 32000
+
+
+Have fun,
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@bytesex.org>
diff --git a/Documentation/sound/oss/mwave b/Documentation/sound/oss/mwave
new file mode 100644
index 000000000..5fbcb1609
--- /dev/null
+++ b/Documentation/sound/oss/mwave
@@ -0,0 +1,185 @@
+ How to try to survive an IBM Mwave under Linux SB drivers
+
+
++ IBM have now released documentation of sorts and Torsten is busy
+ trying to make the Mwave work. This is not however a trivial task.
+
+----------------------------------------------------------------------------
+
+OK, first thing - the IRQ problem IS a problem, whether the test is bypassed or
+not. It is NOT a Linux problem, but an MWAVE problem that is fixed with the
+latest MWAVE patches. So, in other words, don't bypass the test for MWAVES!
+
+I have Windows 95 on /dev/hda1, swap on /dev/hda2, and Red Hat 5 on /dev/hda3.
+
+The steps, then:
+
+ Boot to Linux.
+ Mount Windows 95 file system (assume mount point = /dos95).
+ mkdir /dos95/linux
+ mkdir /dos95/linux/boot
+ mkdir /dos95/linux/boot/parms
+
+ Copy the kernel, any initrd image, and loadlin to /dos95/linux/boot/.
+
+ Reboot to Windows 95.
+
+ Edit C:/msdos.sys and add or change the following:
+
+ Logo=0
+ BootGUI=0
+
+ Note that msdos.sys is a text file but it needs to be made 'unhidden',
+ readable and writable before it can be edited. This can be done with
+ DOS' "attrib" command.
+
+ Edit config.sys to have multiple config menus. I have one for windows 95 and
+ five for Linux, like this:
+------------
+[menu]
+menuitem=W95, Windows 95
+menuitem=LINTP, Linux - ThinkPad
+menuitem=LINTP3, Linux - ThinkPad Console
+menuitem=LINDOC, Linux - Docked
+menuitem=LINDOC3, Linux - Docked Console
+menuitem=LIN1, Linux - Single User Mode
+REM menudefault=W95,10
+
+[W95]
+
+[LINTP]
+
+[LINDOC]
+
+[LINTP3]
+
+[LINDOC3]
+
+[LIN1]
+
+[COMMON]
+FILES=30
+REM Please read README.TXT in C:\MWW subdirectory before changing the DOS= statement.
+DOS=HIGH,UMB
+DEVICE=C:\MWW\MANAGER\MWD50430.EXE
+SHELL=c:\command.com /e:2048
+-------------------
+
+The important things are the SHELL and DEVICE statements.
+
+ Then change autoexec.bat. Basically everything in there originally should be
+ done ONLY when Windows 95 is booted. Then you add new things specifically
+ for Linux. Mine is as follows
+
+---------------
+@ECHO OFF
+if "%CONFIG%" == "W95" goto W95
+
+REM
+REM Linux stuff
+REM
+SET MWPATH=C:\MWW\DLL;C:\MWW\MWGAMES;C:\MWW\DSP
+SET BLASTER=A220 I5 D1
+SET MWROOT=C:\MWW
+SET LIBPATH=C:\MWW\DLL
+SET PATH=C:\WINDOWS;C:\MWW\DLL;
+CALL MWAVE START NOSHOW
+c:\linux\boot\loadlin.exe @c:\linux\boot\parms\%CONFIG%.par
+
+:W95
+REM
+REM Windows 95 stuff
+REM
+c:\toolkit\guard
+SET MSINPUT=C:\MSINPUT
+SET MWPATH=C:\MWW\DLL;C:\MWW\MWGAMES;C:\MWW\DSP
+REM The following is used by DOS games to recognize Sound Blaster hardware.
+REM If hardware settings are changed, please change this line as well.
+REM See the Mwave README file for instructions.
+SET BLASTER=A220 I5 D1
+SET MWROOT=C:\MWW
+SET LIBPATH=C:\MWW\DLL
+SET PATH=C:\WINDOWS;C:\WINDOWS\COMMAND;E:\ORAWIN95\BIN;f:\msdev\bin;e:\v30\bin.dbg;v:\devt\v30\bin;c:\JavaSDK\Bin;C:\MWW\DLL;
+SET INCLUDE=f:\MSDEV\INCLUDE;F:\MSDEV\MFC\INCLUDE
+SET LIB=F:\MSDEV\LIB;F:\MSDEV\MFC\LIB
+win
+
+------------------------
+
+Now build a file in c:\linux\boot\parms for each Linux config that you have.
+
+For example, my LINDOC3 config is for a docked Thinkpad at runlevel 3 with no
+initrd image, and has a parameter file named LINDOC3.PAR in c:\linux\boot\parms:
+
+-----------------------
+# LOADLIN @param_file image=other_image root=/dev/other
+#
+# Linux Console in docking station
+#
+c:\linux\boot\zImage.krn # First value must be filename of Linux kernel.
+root=/dev/hda3 # device which gets mounted as root FS
+ro # Other kernel arguments go here.
+apm=off
+doc=yes
+3
+-----------------------
+
+The doc=yes parameter is an environment variable used by my init scripts, not
+a kernel argument.
+
+However, the apm=off parameter IS a kernel argument! APM, at least in my setup,
+causes the kernel to crash when loaded via loadlin (but NOT when loaded via
+LILO). The APM stuff COULD be forced out of the kernel via the kernel compile
+options. Instead, I got an unofficial patch to the APM drivers that allows them
+to be dynamically deactivated via kernel arguments. Whatever you chose to
+document, APM, it seems, MUST be off for setups like mine.
+
+Now make sure C:\MWW\MWCONFIG.REF looks like this:
+
+----------------------
+[NativeDOS]
+Default=SB1.5
+SBInputSource=CD
+SYNTH=FM
+QSound=OFF
+Reverb=OFF
+Chorus=OFF
+ReverbDepth=5
+ChorusDepth=5
+SBInputVolume=5
+SBMainVolume=10
+SBWaveVolume=10
+SBSynthVolume=10
+WaveTableVolume=10
+AudioPowerDriver=ON
+
+[FastCFG]
+Show=No
+HideOption=Off
+-----------------------------
+
+OR the Default= line COULD be
+
+Default=SBPRO
+
+Reboot to Windows 95 and choose Linux. When booted, use sndconfig to configure
+the sound modules and voilà - ThinkPad sound with Linux.
+
+Now the gotchas - you can either have CD sound OR Mixers but not both. That's a
+problem with the SB1.5 (CD sound) or SBPRO (Mixers) settings. No one knows why
+this is!
+
+For some reason MPEG3 files, when played through mpg123, sound like they
+are playing at 1/8th speed - not very useful! If you have ANY insight
+on why this second thing might be happening, I would be grateful.
+
+===========================================================
+ _/ _/_/_/_/
+ _/_/ _/_/ _/
+ _/ _/_/ _/_/_/_/ Martin John Bartlett
+ _/ _/ _/ _/ (martin@nitram.demon.co.uk)
+_/ _/_/_/_/
+ _/
+_/ _/
+ _/_/
+===========================================================
diff --git a/Documentation/sound/oss/oss-parameters.txt b/Documentation/sound/oss/oss-parameters.txt
new file mode 100644
index 000000000..3ab391e7c
--- /dev/null
+++ b/Documentation/sound/oss/oss-parameters.txt
@@ -0,0 +1,51 @@
+ OSS Kernel Parameters
+ ~~~~~~~~~~~~~~~~~~~~~
+
+See Documentation/kernel-parameters.txt for general information on
+specifying module parameters.
+
+This document may not be entirely up to date and comprehensive. The command
+"modinfo -p ${modulename}" shows a current list of all parameters of a loadable
+module. Loadable modules, after being loaded into the running kernel, also
+reveal their parameters in /sys/module/${modulename}/parameters/. Some of these
+parameters may be changed at runtime by the command
+"echo -n ${value} > /sys/module/${modulename}/parameters/${parm}".
+
+
+ ad1848= [HW,OSS]
+ Format: <io>,<irq>,<dma>,<dma2>,<type>
+
+ aedsp16= [HW,OSS] Audio Excel DSP 16
+ Format: <io>,<irq>,<dma>,<mss_io>,<mpu_io>,<mpu_irq>
+ See also header of sound/oss/aedsp16.c.
+
+ dmasound= [HW,OSS] Sound subsystem buffers
+
+ mpu401= [HW,OSS]
+ Format: <io>,<irq>
+
+ opl3= [HW,OSS]
+ Format: <io>
+
+ pas2= [HW,OSS] Format:
+ <io>,<irq>,<dma>,<dma16>,<sb_io>,<sb_irq>,<sb_dma>,<sb_dma16>
+
+ pss= [HW,OSS] Personal Sound System (ECHO ESC614)
+ Format:
+ <io>,<mss_io>,<mss_irq>,<mss_dma>,<mpu_io>,<mpu_irq>
+
+ sscape= [HW,OSS]
+ Format: <io>,<irq>,<dma>,<mpu_io>,<mpu_irq>
+
+ trix= [HW,OSS] MediaTrix AudioTrix Pro
+ Format:
+ <io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
+
+ uart401= [HW,OSS]
+ Format: <io>,<irq>
+
+ uart6850= [HW,OSS]
+ Format: <io>,<irq>
+
+ waveartist= [HW,OSS]
+ Format: <io>,<irq>,<dma>,<dma2>
diff --git a/Documentation/sound/oss/ultrasound b/Documentation/sound/oss/ultrasound
new file mode 100644
index 000000000..eed331c73
--- /dev/null
+++ b/Documentation/sound/oss/ultrasound
@@ -0,0 +1,30 @@
+modprobe sound
+insmod ad1848
+insmod gus io=* irq=* dma=* ...
+
+This loads the driver for the Gravis Ultrasound family of sound cards.
+
+The gus module takes the following arguments
+
+io I/O address of the Ultrasound card (eg. io=0x220)
+irq IRQ of the Sound Blaster card
+dma DMA channel for the Sound Blaster
+dma16 2nd DMA channel, only needed for full duplex operation
+type 1 for PnP card
+gus16 1 for using 16 bit sampling daughter board
+no_wave_dma Set to disable DMA usage for wavetable (see note)
+db16 ???
+
+
+no_wave_dma option
+
+This option defaults to a value of 0, which allows the Ultrasound wavetable
+DSP to use DMA for playback and downloading samples. This is the same
+as the old behaviour. If set to 1, no DMA is needed for downloading samples,
+and allows owners of a GUS MAX to make use of simultaneous digital audio
+(/dev/dsp), MIDI, and wavetable playback.
+
+
+If you have problems in recording with GUS MAX, you could try to use
+just one 8 bit DMA channel. Recording will not work with one DMA
+channel if it's a 16 bit one.
diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt
new file mode 100644
index 000000000..eceab1308
--- /dev/null
+++ b/Documentation/sparse.txt
@@ -0,0 +1,108 @@
+Copyright 2004 Linus Torvalds
+Copyright 2004 Pavel Machek <pavel@ucw.cz>
+Copyright 2006 Bob Copeland <me@bobcopeland.com>
+
+Using sparse for typechecking
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+"__bitwise" is a type attribute, so you have to do something like this:
+
+ typedef int __bitwise pm_request_t;
+
+ enum pm_request {
+ PM_SUSPEND = (__force pm_request_t) 1,
+ PM_RESUME = (__force pm_request_t) 2
+ };
+
+which makes PM_SUSPEND and PM_RESUME "bitwise" integers (the "__force" is
+there because sparse will complain about casting to/from a bitwise type,
+but in this case we really _do_ want to force the conversion). And because
+the enum values are all the same type, now "enum pm_request" will be that
+type too.
+
+And with gcc, all the __bitwise/__force stuff goes away, and it all ends
+up looking just like integers to gcc.
+
+Quite frankly, you don't need the enum there. The above all really just
+boils down to one special "int __bitwise" type.
+
+So the simpler way is to just do
+
+ typedef int __bitwise pm_request_t;
+
+ #define PM_SUSPEND ((__force pm_request_t) 1)
+ #define PM_RESUME ((__force pm_request_t) 2)
+
+and you now have all the infrastructure needed for strict typechecking.
+
+One small note: the constant integer "0" is special. You can use a
+constant zero as a bitwise integer type without sparse ever complaining.
+This is because "bitwise" (as the name implies) was designed for making
+sure that bitwise types don't get mixed up (little-endian vs big-endian
+vs cpu-endian vs whatever), and there the constant "0" really _is_
+special.
+
+__bitwise__ - to be used for relatively compact stuff (gfp_t, etc.) that
+is mostly warning-free and is supposed to stay that way. Warnings will
+be generated without __CHECK_ENDIAN__.
+
+__bitwise - noisy stuff; in particular, __le*/__be* are that. We really
+don't want to drown in noise unless we'd explicitly asked for it.
+
+Using sparse for lock checking
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The following macros are undefined for gcc and defined during a sparse
+run to use the "context" tracking feature of sparse, applied to
+locking. These annotations tell sparse when a lock is held, with
+regard to the annotated function's entry and exit.
+
+__must_hold - The specified lock is held on function entry and exit.
+
+__acquires - The specified lock is held on function exit, but not entry.
+
+__releases - The specified lock is held on function entry, but not exit.
+
+If the function enters and exits without the lock held, acquiring and
+releasing the lock inside the function in a balanced way, no
+annotation is needed. The tree annotations above are for cases where
+sparse would otherwise report a context imbalance.
+
+Getting sparse
+~~~~~~~~~~~~~~
+
+You can get latest released versions from the Sparse homepage at
+https://sparse.wiki.kernel.org/index.php/Main_Page
+
+Alternatively, you can get snapshots of the latest development version
+of sparse using git to clone..
+
+ git://git.kernel.org/pub/scm/devel/sparse/sparse.git
+
+DaveJ has hourly generated tarballs of the git tree available at..
+
+ http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
+
+
+Once you have it, just do
+
+ make
+ make install
+
+as a regular user, and it will install sparse in your ~/bin directory.
+
+Using sparse
+~~~~~~~~~~~~
+
+Do a kernel make with "make C=1" to run sparse on all the C files that get
+recompiled, or use "make C=2" to run sparse on the files whether they need to
+be recompiled or not. The latter is a fast way to check the whole tree if you
+have already built it.
+
+The optional make variable CF can be used to pass arguments to sparse. The
+build system passes -Wbitwise to sparse automatically. To perform endianness
+checks, you may define __CHECK_ENDIAN__:
+
+ make C=2 CF="-D__CHECK_ENDIAN__"
+
+These checks are disabled by default as they generate a host of warnings.
diff --git a/Documentation/spi/.gitignore b/Documentation/spi/.gitignore
new file mode 100644
index 000000000..428057639
--- /dev/null
+++ b/Documentation/spi/.gitignore
@@ -0,0 +1,2 @@
+spidev_fdx
+spidev_test
diff --git a/Documentation/spi/00-INDEX b/Documentation/spi/00-INDEX
new file mode 100644
index 000000000..a128fa835
--- /dev/null
+++ b/Documentation/spi/00-INDEX
@@ -0,0 +1,22 @@
+00-INDEX
+ - this file.
+Makefile
+ - Makefile for the example sourcefiles.
+butterfly
+ - AVR Butterfly SPI driver overview and pin configuration.
+ep93xx_spi
+ - Basic EP93xx SPI driver configuration.
+pxa2xx
+ - PXA2xx SPI master controller build by spi_message fifo wq
+spidev
+ - Intro to the userspace API for spi devices
+spidev_fdx.c
+ - spidev example file
+spi-lm70llp
+ - Connecting an LM70-LLP sensor to the kernel via the SPI subsys.
+spi-sc18is602
+ - NXP SC18IS602/603 I2C-bus to SPI bridge
+spi-summary
+ - (Linux) SPI overview. If unsure about SPI or SPI in Linux, start here.
+spidev_test.c
+ - SPI testing utility.
diff --git a/Documentation/spi/Makefile b/Documentation/spi/Makefile
new file mode 100644
index 000000000..efa255813
--- /dev/null
+++ b/Documentation/spi/Makefile
@@ -0,0 +1,8 @@
+# List of programs to build
+hostprogs-y := spidev_test spidev_fdx
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS_spidev_test.o += -I$(objtree)/usr/include
+HOSTCFLAGS_spidev_fdx.o += -I$(objtree)/usr/include
diff --git a/Documentation/spi/butterfly b/Documentation/spi/butterfly
new file mode 100644
index 000000000..9927af7a6
--- /dev/null
+++ b/Documentation/spi/butterfly
@@ -0,0 +1,68 @@
+spi_butterfly - parport-to-butterfly adapter driver
+===================================================
+
+This is a hardware and software project that includes building and using
+a parallel port adapter cable, together with an "AVR Butterfly" to run
+firmware for user interfacing and/or sensors. A Butterfly is a $US20
+battery powered card with an AVR microcontroller and lots of goodies:
+sensors, LCD, flash, toggle stick, and more. You can use AVR-GCC to
+develop firmware for this, and flash it using this adapter cable.
+
+You can make this adapter from an old printer cable and solder things
+directly to the Butterfly. Or (if you have the parts and skills) you
+can come up with something fancier, providing ciruit protection to the
+Butterfly and the printer port, or with a better power supply than two
+signal pins from the printer port. Or for that matter, you can use
+similar cables to talk to many AVR boards, even a breadboard.
+
+This is more powerful than "ISP programming" cables since it lets kernel
+SPI protocol drivers interact with the AVR, and could even let the AVR
+issue interrupts to them. Later, your protocol driver should work
+easily with a "real SPI controller", instead of this bitbanger.
+
+
+The first cable connections will hook Linux up to one SPI bus, with the
+AVR and a DataFlash chip; and to the AVR reset line. This is all you
+need to reflash the firmware, and the pins are the standard Atmel "ISP"
+connector pins (used also on non-Butterfly AVR boards). On the parport
+side this is like "sp12" programming cables.
+
+ Signal Butterfly Parport (DB-25)
+ ------ --------- ---------------
+ SCK = J403.PB1/SCK = pin 2/D0
+ RESET = J403.nRST = pin 3/D1
+ VCC = J403.VCC_EXT = pin 8/D6
+ MOSI = J403.PB2/MOSI = pin 9/D7
+ MISO = J403.PB3/MISO = pin 11/S7,nBUSY
+ GND = J403.GND = pin 23/GND
+
+Then to let Linux master that bus to talk to the DataFlash chip, you must
+(a) flash new firmware that disables SPI (set PRR.2, and disable pullups
+by clearing PORTB.[0-3]); (b) configure the mtd_dataflash driver; and
+(c) cable in the chipselect.
+
+ Signal Butterfly Parport (DB-25)
+ ------ --------- ---------------
+ VCC = J400.VCC_EXT = pin 7/D5
+ SELECT = J400.PB0/nSS = pin 17/C3,nSELECT
+ GND = J400.GND = pin 24/GND
+
+Or you could flash firmware making the AVR into an SPI slave (keeping the
+DataFlash in reset) and tweak the spi_butterfly driver to make it bind to
+the driver for your custom SPI-based protocol.
+
+The "USI" controller, using J405, can also be used for a second SPI bus.
+That would let you talk to the AVR using custom SPI-with-USI firmware,
+while letting either Linux or the AVR use the DataFlash. There are plenty
+of spare parport pins to wire this one up, such as:
+
+ Signal Butterfly Parport (DB-25)
+ ------ --------- ---------------
+ SCK = J403.PE4/USCK = pin 5/D3
+ MOSI = J403.PE5/DI = pin 6/D4
+ MISO = J403.PE6/DO = pin 12/S5,nPAPEROUT
+ GND = J403.GND = pin 22/GND
+
+ IRQ = J402.PF4 = pin 10/S6,ACK
+ GND = J402.GND(P2) = pin 25/GND
+
diff --git a/Documentation/spi/ep93xx_spi b/Documentation/spi/ep93xx_spi
new file mode 100644
index 000000000..832ddce6e
--- /dev/null
+++ b/Documentation/spi/ep93xx_spi
@@ -0,0 +1,105 @@
+Cirrus EP93xx SPI controller driver HOWTO
+=========================================
+
+ep93xx_spi driver brings SPI master support for EP93xx SPI controller. Chip
+selects are implemented with GPIO lines.
+
+NOTE: If possible, don't use SFRMOUT (SFRM1) signal as a chip select. It will
+not work correctly (it cannot be controlled by software). Use GPIO lines
+instead.
+
+Sample configuration
+====================
+
+Typically driver configuration is done in platform board files (the files under
+arch/arm/mach-ep93xx/*.c). In this example we configure MMC over SPI through
+this driver on TS-7260 board. You can adapt the code to suit your needs.
+
+This example uses EGPIO9 as SD/MMC card chip select (this is wired in DIO1
+header on the board).
+
+You need to select CONFIG_MMC_SPI to use mmc_spi driver.
+
+arch/arm/mach-ep93xx/ts72xx.c:
+
+...
+#include <linux/gpio.h>
+#include <linux/spi/spi.h>
+
+#include <linux/platform_data/spi-ep93xx.h>
+
+/* this is our GPIO line used for chip select */
+#define MMC_CHIP_SELECT_GPIO EP93XX_GPIO_LINE_EGPIO9
+
+static int ts72xx_mmc_spi_setup(struct spi_device *spi)
+{
+ int err;
+
+ err = gpio_request(MMC_CHIP_SELECT_GPIO, spi->modalias);
+ if (err)
+ return err;
+
+ gpio_direction_output(MMC_CHIP_SELECT_GPIO, 1);
+
+ return 0;
+}
+
+static void ts72xx_mmc_spi_cleanup(struct spi_device *spi)
+{
+ gpio_set_value(MMC_CHIP_SELECT_GPIO, 1);
+ gpio_direction_input(MMC_CHIP_SELECT_GPIO);
+ gpio_free(MMC_CHIP_SELECT_GPIO);
+}
+
+static void ts72xx_mmc_spi_cs_control(struct spi_device *spi, int value)
+{
+ gpio_set_value(MMC_CHIP_SELECT_GPIO, value);
+}
+
+static struct ep93xx_spi_chip_ops ts72xx_mmc_spi_ops = {
+ .setup = ts72xx_mmc_spi_setup,
+ .cleanup = ts72xx_mmc_spi_cleanup,
+ .cs_control = ts72xx_mmc_spi_cs_control,
+};
+
+static struct spi_board_info ts72xx_spi_devices[] __initdata = {
+ {
+ .modalias = "mmc_spi",
+ .controller_data = &ts72xx_mmc_spi_ops,
+ /*
+ * We use 10 MHz even though the maximum is 7.4 MHz. The driver
+ * will limit it automatically to max. frequency.
+ */
+ .max_speed_hz = 10 * 1000 * 1000,
+ .bus_num = 0,
+ .chip_select = 0,
+ .mode = SPI_MODE_0,
+ },
+};
+
+static struct ep93xx_spi_info ts72xx_spi_info = {
+ .num_chipselect = ARRAY_SIZE(ts72xx_spi_devices),
+};
+
+static void __init ts72xx_init_machine(void)
+{
+ ...
+ ep93xx_register_spi(&ts72xx_spi_info, ts72xx_spi_devices,
+ ARRAY_SIZE(ts72xx_spi_devices));
+}
+
+The driver can use DMA for the transfers also. In this case ts72xx_spi_info
+becomes:
+
+static struct ep93xx_spi_info ts72xx_spi_info = {
+ .num_chipselect = ARRAY_SIZE(ts72xx_spi_devices),
+ .use_dma = true;
+};
+
+Note that CONFIG_EP93XX_DMA should be enabled as well.
+
+Thanks to
+=========
+Martin Guy, H. Hartley Sweeten and others who helped me during development of
+the driver. Simplemachines.it donated me a Sim.One board which I used testing
+the driver on EP9307.
diff --git a/Documentation/spi/pxa2xx b/Documentation/spi/pxa2xx
new file mode 100644
index 000000000..3352f9743
--- /dev/null
+++ b/Documentation/spi/pxa2xx
@@ -0,0 +1,241 @@
+PXA2xx SPI on SSP driver HOWTO
+===================================================
+This a mini howto on the pxa2xx_spi driver. The driver turns a PXA2xx
+synchronous serial port into a SPI master controller
+(see Documentation/spi/spi-summary). The driver has the following features
+
+- Support for any PXA2xx SSP
+- SSP PIO and SSP DMA data transfers.
+- External and Internal (SSPFRM) chip selects.
+- Per slave device (chip) configuration.
+- Full suspend, freeze, resume support.
+
+The driver is built around a "spi_message" fifo serviced by workqueue and a
+tasklet. The workqueue, "pump_messages", drives message fifo and the tasklet
+(pump_transfer) is responsible for queuing SPI transactions and setting up and
+launching the dma/interrupt driven transfers.
+
+Declaring PXA2xx Master Controllers
+-----------------------------------
+Typically a SPI master is defined in the arch/.../mach-*/board-*.c as a
+"platform device". The master configuration is passed to the driver via a table
+found in include/linux/spi/pxa2xx_spi.h:
+
+struct pxa2xx_spi_master {
+ u32 clock_enable;
+ u16 num_chipselect;
+ u8 enable_dma;
+};
+
+The "pxa2xx_spi_master.clock_enable" field is used to enable/disable the
+corresponding SSP peripheral block in the "Clock Enable Register (CKEN"). See
+the "PXA2xx Developer Manual" section "Clocks and Power Management".
+
+The "pxa2xx_spi_master.num_chipselect" field is used to determine the number of
+slave device (chips) attached to this SPI master.
+
+The "pxa2xx_spi_master.enable_dma" field informs the driver that SSP DMA should
+be used. This caused the driver to acquire two DMA channels: rx_channel and
+tx_channel. The rx_channel has a higher DMA service priority the tx_channel.
+See the "PXA2xx Developer Manual" section "DMA Controller".
+
+NSSP MASTER SAMPLE
+------------------
+Below is a sample configuration using the PXA255 NSSP.
+
+static struct resource pxa_spi_nssp_resources[] = {
+ [0] = {
+ .start = __PREG(SSCR0_P(2)), /* Start address of NSSP */
+ .end = __PREG(SSCR0_P(2)) + 0x2c, /* Range of registers */
+ .flags = IORESOURCE_MEM,
+ },
+ [1] = {
+ .start = IRQ_NSSP, /* NSSP IRQ */
+ .end = IRQ_NSSP,
+ .flags = IORESOURCE_IRQ,
+ },
+};
+
+static struct pxa2xx_spi_master pxa_nssp_master_info = {
+ .clock_enable = CKEN_NSSP, /* NSSP Peripheral clock */
+ .num_chipselect = 1, /* Matches the number of chips attached to NSSP */
+ .enable_dma = 1, /* Enables NSSP DMA */
+};
+
+static struct platform_device pxa_spi_nssp = {
+ .name = "pxa2xx-spi", /* MUST BE THIS VALUE, so device match driver */
+ .id = 2, /* Bus number, MUST MATCH SSP number 1..n */
+ .resource = pxa_spi_nssp_resources,
+ .num_resources = ARRAY_SIZE(pxa_spi_nssp_resources),
+ .dev = {
+ .platform_data = &pxa_nssp_master_info, /* Passed to driver */
+ },
+};
+
+static struct platform_device *devices[] __initdata = {
+ &pxa_spi_nssp,
+};
+
+static void __init board_init(void)
+{
+ (void)platform_add_device(devices, ARRAY_SIZE(devices));
+}
+
+Declaring Slave Devices
+-----------------------
+Typically each SPI slave (chip) is defined in the arch/.../mach-*/board-*.c
+using the "spi_board_info" structure found in "linux/spi/spi.h". See
+"Documentation/spi/spi-summary" for additional information.
+
+Each slave device attached to the PXA must provide slave specific configuration
+information via the structure "pxa2xx_spi_chip" found in
+"include/linux/spi/pxa2xx_spi.h". The pxa2xx_spi master controller driver
+will uses the configuration whenever the driver communicates with the slave
+device. All fields are optional.
+
+struct pxa2xx_spi_chip {
+ u8 tx_threshold;
+ u8 rx_threshold;
+ u8 dma_burst_size;
+ u32 timeout;
+ u8 enable_loopback;
+ void (*cs_control)(u32 command);
+};
+
+The "pxa2xx_spi_chip.tx_threshold" and "pxa2xx_spi_chip.rx_threshold" fields are
+used to configure the SSP hardware fifo. These fields are critical to the
+performance of pxa2xx_spi driver and misconfiguration will result in rx
+fifo overruns (especially in PIO mode transfers). Good default values are
+
+ .tx_threshold = 8,
+ .rx_threshold = 8,
+
+The range is 1 to 16 where zero indicates "use default".
+
+The "pxa2xx_spi_chip.dma_burst_size" field is used to configure PXA2xx DMA
+engine and is related the "spi_device.bits_per_word" field. Read and understand
+the PXA2xx "Developer Manual" sections on the DMA controller and SSP Controllers
+to determine the correct value. An SSP configured for byte-wide transfers would
+use a value of 8. The driver will determine a reasonable default if
+dma_burst_size == 0.
+
+The "pxa2xx_spi_chip.timeout" fields is used to efficiently handle
+trailing bytes in the SSP receiver fifo. The correct value for this field is
+dependent on the SPI bus speed ("spi_board_info.max_speed_hz") and the specific
+slave device. Please note that the PXA2xx SSP 1 does not support trailing byte
+timeouts and must busy-wait any trailing bytes.
+
+The "pxa2xx_spi_chip.enable_loopback" field is used to place the SSP porting
+into internal loopback mode. In this mode the SSP controller internally
+connects the SSPTX pin to the SSPRX pin. This is useful for initial setup
+testing.
+
+The "pxa2xx_spi_chip.cs_control" field is used to point to a board specific
+function for asserting/deasserting a slave device chip select. If the field is
+NULL, the pxa2xx_spi master controller driver assumes that the SSP port is
+configured to use SSPFRM instead.
+
+NOTE: the SPI driver cannot control the chip select if SSPFRM is used, so the
+chipselect is dropped after each spi_transfer. Most devices need chip select
+asserted around the complete message. Use SSPFRM as a GPIO (through cs_control)
+to accommodate these chips.
+
+
+NSSP SLAVE SAMPLE
+-----------------
+The pxa2xx_spi_chip structure is passed to the pxa2xx_spi driver in the
+"spi_board_info.controller_data" field. Below is a sample configuration using
+the PXA255 NSSP.
+
+/* Chip Select control for the CS8415A SPI slave device */
+static void cs8415a_cs_control(u32 command)
+{
+ if (command & PXA2XX_CS_ASSERT)
+ GPCR(2) = GPIO_bit(2);
+ else
+ GPSR(2) = GPIO_bit(2);
+}
+
+/* Chip Select control for the CS8405A SPI slave device */
+static void cs8405a_cs_control(u32 command)
+{
+ if (command & PXA2XX_CS_ASSERT)
+ GPCR(3) = GPIO_bit(3);
+ else
+ GPSR(3) = GPIO_bit(3);
+}
+
+static struct pxa2xx_spi_chip cs8415a_chip_info = {
+ .tx_threshold = 8, /* SSP hardward FIFO threshold */
+ .rx_threshold = 8, /* SSP hardward FIFO threshold */
+ .dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
+ .timeout = 235, /* See Intel documentation */
+ .cs_control = cs8415a_cs_control, /* Use external chip select */
+};
+
+static struct pxa2xx_spi_chip cs8405a_chip_info = {
+ .tx_threshold = 8, /* SSP hardward FIFO threshold */
+ .rx_threshold = 8, /* SSP hardward FIFO threshold */
+ .dma_burst_size = 8, /* Byte wide transfers used so 8 byte bursts */
+ .timeout = 235, /* See Intel documentation */
+ .cs_control = cs8405a_cs_control, /* Use external chip select */
+};
+
+static struct spi_board_info streetracer_spi_board_info[] __initdata = {
+ {
+ .modalias = "cs8415a", /* Name of spi_driver for this device */
+ .max_speed_hz = 3686400, /* Run SSP as fast a possbile */
+ .bus_num = 2, /* Framework bus number */
+ .chip_select = 0, /* Framework chip select */
+ .platform_data = NULL; /* No spi_driver specific config */
+ .controller_data = &cs8415a_chip_info, /* Master chip config */
+ .irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
+ },
+ {
+ .modalias = "cs8405a", /* Name of spi_driver for this device */
+ .max_speed_hz = 3686400, /* Run SSP as fast a possbile */
+ .bus_num = 2, /* Framework bus number */
+ .chip_select = 1, /* Framework chip select */
+ .controller_data = &cs8405a_chip_info, /* Master chip config */
+ .irq = STREETRACER_APCI_IRQ, /* Slave device interrupt */
+ },
+};
+
+static void __init streetracer_init(void)
+{
+ spi_register_board_info(streetracer_spi_board_info,
+ ARRAY_SIZE(streetracer_spi_board_info));
+}
+
+
+DMA and PIO I/O Support
+-----------------------
+The pxa2xx_spi driver supports both DMA and interrupt driven PIO message
+transfers. The driver defaults to PIO mode and DMA transfers must be enabled
+by setting the "enable_dma" flag in the "pxa2xx_spi_master" structure. The DMA
+mode supports both coherent and stream based DMA mappings.
+
+The following logic is used to determine the type of I/O to be used on
+a per "spi_transfer" basis:
+
+if !enable_dma then
+ always use PIO transfers
+
+if spi_message.len > 8191 then
+ print "rate limited" warning
+ use PIO transfers
+
+if spi_message.is_dma_mapped and rx_dma_buf != 0 and tx_dma_buf != 0 then
+ use coherent DMA mode
+
+if rx_buf and tx_buf are aligned on 8 byte boundary then
+ use streaming DMA mode
+
+otherwise
+ use PIO transfer
+
+THANKS TO
+---------
+
+David Brownell and others for mentoring the development of this driver.
+
diff --git a/Documentation/spi/spi-lm70llp b/Documentation/spi/spi-lm70llp
new file mode 100644
index 000000000..463f6d01f
--- /dev/null
+++ b/Documentation/spi/spi-lm70llp
@@ -0,0 +1,79 @@
+spi_lm70llp : LM70-LLP parport-to-SPI adapter
+==============================================
+
+Supported board/chip:
+ * National Semiconductor LM70 LLP evaluation board
+ Datasheet: http://www.national.com/pf/LM/LM70.html
+
+Author:
+ Kaiwan N Billimoria <kaiwan@designergraphix.com>
+
+Description
+-----------
+This driver provides glue code connecting a National Semiconductor LM70 LLP
+temperature sensor evaluation board to the kernel's SPI core subsystem.
+
+This is a SPI master controller driver. It can be used in conjunction with
+(layered under) the LM70 logical driver (a "SPI protocol driver").
+In effect, this driver turns the parallel port interface on the eval board
+into a SPI bus with a single device, which will be driven by the generic
+LM70 driver (drivers/hwmon/lm70.c).
+
+
+Hardware Interfacing
+--------------------
+The schematic for this particular board (the LM70EVAL-LLP) is
+available (on page 4) here:
+
+ http://www.national.com/appinfo/tempsensors/files/LM70LLPEVALmanual.pdf
+
+The hardware interfacing on the LM70 LLP eval board is as follows:
+
+ Parallel LM70 LLP
+ Port Direction JP2 Header
+ ----------- --------- ----------------
+ D0 2 - -
+ D1 3 --> V+ 5
+ D2 4 --> V+ 5
+ D3 5 --> V+ 5
+ D4 6 --> V+ 5
+ D5 7 --> nCS 8
+ D6 8 --> SCLK 3
+ D7 9 --> SI/O 5
+ GND 25 - GND 7
+ Select 13 <-- SI/O 1
+ ----------- --------- ----------------
+
+Note that since the LM70 uses a "3-wire" variant of SPI, the SI/SO pin
+is connected to both pin D7 (as Master Out) and Select (as Master In)
+using an arrangement that lets either the parport or the LM70 pull the
+pin low. This can't be shared with true SPI devices, but other 3-wire
+devices might share the same SI/SO pin.
+
+The bitbanger routine in this driver (lm70_txrx) is called back from
+the bound "hwmon/lm70" protocol driver through its sysfs hook, using a
+spi_write_then_read() call. It performs Mode 0 (SPI/Microwire) bitbanging.
+The lm70 driver then inteprets the resulting digital temperature value
+and exports it through sysfs.
+
+A "gotcha": National Semiconductor's LM70 LLP eval board circuit schematic
+shows that the SI/O line from the LM70 chip is connected to the base of a
+transistor Q1 (and also a pullup, and a zener diode to D7); while the
+collector is tied to VCC.
+
+Interpreting this circuit, when the LM70 SI/O line is High (or tristate
+and not grounded by the host via D7), the transistor conducts and switches
+the collector to zero, which is reflected on pin 13 of the DB25 parport
+connector. When SI/O is Low (driven by the LM70 or the host) on the other
+hand, the transistor is cut off and the voltage tied to it's collector is
+reflected on pin 13 as a High level.
+
+So: the getmiso inline routine in this driver takes this fact into account,
+inverting the value read at pin 13.
+
+
+Thanks to
+---------
+o David Brownell for mentoring the SPI-side driver development.
+o Dr.Craig Hollabaugh for the (early) "manual" bitbanging driver version.
+o Nadir Billimoria for help interpreting the circuit schematic.
diff --git a/Documentation/spi/spi-sc18is602 b/Documentation/spi/spi-sc18is602
new file mode 100644
index 000000000..a45702865
--- /dev/null
+++ b/Documentation/spi/spi-sc18is602
@@ -0,0 +1,36 @@
+Kernel driver spi-sc18is602
+===========================
+
+Supported chips:
+ * NXP SI18IS602/602B/603
+ Datasheet: http://www.nxp.com/documents/data_sheet/SC18IS602_602B_603.pdf
+
+Author:
+ Guenter Roeck <linux@roeck-us.net>
+
+
+Description
+-----------
+
+This driver provides connects a NXP SC18IS602/603 I2C-bus to SPI bridge to the
+kernel's SPI core subsystem.
+
+The driver does not probe for supported chips, since the SI18IS602/603 does not
+support Chip ID registers. You will have to instantiate the devices explicitly.
+Please see Documentation/i2c/instantiating-devices for details.
+
+
+Usage Notes
+-----------
+
+This driver requires the I2C adapter driver to support raw I2C messages. I2C
+adapter drivers which can only handle the SMBus protocol are not supported.
+
+The maximum SPI message size supported by SC18IS602/603 is 200 bytes. Attempts
+to initiate longer transfers will fail with -EINVAL. EEPROM read operations and
+similar large accesses have to be split into multiple chunks of no more than
+200 bytes per SPI message (128 bytes of data per message is recommended). This
+means that programs such as "cp" or "od", which automatically use large block
+sizes to access a device, can not be used directly to read data from EEPROM.
+Programs such as dd, where the block size can be specified, should be used
+instead.
diff --git a/Documentation/spi/spi-summary b/Documentation/spi/spi-summary
new file mode 100644
index 000000000..d1824b399
--- /dev/null
+++ b/Documentation/spi/spi-summary
@@ -0,0 +1,612 @@
+Overview of Linux kernel SPI support
+====================================
+
+02-Feb-2012
+
+What is SPI?
+------------
+The "Serial Peripheral Interface" (SPI) is a synchronous four wire serial
+link used to connect microcontrollers to sensors, memory, and peripherals.
+It's a simple "de facto" standard, not complicated enough to acquire a
+standardization body. SPI uses a master/slave configuration.
+
+The three signal wires hold a clock (SCK, often on the order of 10 MHz),
+and parallel data lines with "Master Out, Slave In" (MOSI) or "Master In,
+Slave Out" (MISO) signals. (Other names are also used.) There are four
+clocking modes through which data is exchanged; mode-0 and mode-3 are most
+commonly used. Each clock cycle shifts data out and data in; the clock
+doesn't cycle except when there is a data bit to shift. Not all data bits
+are used though; not every protocol uses those full duplex capabilities.
+
+SPI masters use a fourth "chip select" line to activate a given SPI slave
+device, so those three signal wires may be connected to several chips
+in parallel. All SPI slaves support chipselects; they are usually active
+low signals, labeled nCSx for slave 'x' (e.g. nCS0). Some devices have
+other signals, often including an interrupt to the master.
+
+Unlike serial busses like USB or SMBus, even low level protocols for
+SPI slave functions are usually not interoperable between vendors
+(except for commodities like SPI memory chips).
+
+ - SPI may be used for request/response style device protocols, as with
+ touchscreen sensors and memory chips.
+
+ - It may also be used to stream data in either direction (half duplex),
+ or both of them at the same time (full duplex).
+
+ - Some devices may use eight bit words. Others may use different word
+ lengths, such as streams of 12-bit or 20-bit digital samples.
+
+ - Words are usually sent with their most significant bit (MSB) first,
+ but sometimes the least significant bit (LSB) goes first instead.
+
+ - Sometimes SPI is used to daisy-chain devices, like shift registers.
+
+In the same way, SPI slaves will only rarely support any kind of automatic
+discovery/enumeration protocol. The tree of slave devices accessible from
+a given SPI master will normally be set up manually, with configuration
+tables.
+
+SPI is only one of the names used by such four-wire protocols, and
+most controllers have no problem handling "MicroWire" (think of it as
+half-duplex SPI, for request/response protocols), SSP ("Synchronous
+Serial Protocol"), PSP ("Programmable Serial Protocol"), and other
+related protocols.
+
+Some chips eliminate a signal line by combining MOSI and MISO, and
+limiting themselves to half-duplex at the hardware level. In fact
+some SPI chips have this signal mode as a strapping option. These
+can be accessed using the same programming interface as SPI, but of
+course they won't handle full duplex transfers. You may find such
+chips described as using "three wire" signaling: SCK, data, nCSx.
+(That data line is sometimes called MOMI or SISO.)
+
+Microcontrollers often support both master and slave sides of the SPI
+protocol. This document (and Linux) currently only supports the master
+side of SPI interactions.
+
+
+Who uses it? On what kinds of systems?
+---------------------------------------
+Linux developers using SPI are probably writing device drivers for embedded
+systems boards. SPI is used to control external chips, and it is also a
+protocol supported by every MMC or SD memory card. (The older "DataFlash"
+cards, predating MMC cards but using the same connectors and card shape,
+support only SPI.) Some PC hardware uses SPI flash for BIOS code.
+
+SPI slave chips range from digital/analog converters used for analog
+sensors and codecs, to memory, to peripherals like USB controllers
+or Ethernet adapters; and more.
+
+Most systems using SPI will integrate a few devices on a mainboard.
+Some provide SPI links on expansion connectors; in cases where no
+dedicated SPI controller exists, GPIO pins can be used to create a
+low speed "bitbanging" adapter. Very few systems will "hotplug" an SPI
+controller; the reasons to use SPI focus on low cost and simple operation,
+and if dynamic reconfiguration is important, USB will often be a more
+appropriate low-pincount peripheral bus.
+
+Many microcontrollers that can run Linux integrate one or more I/O
+interfaces with SPI modes. Given SPI support, they could use MMC or SD
+cards without needing a special purpose MMC/SD/SDIO controller.
+
+
+I'm confused. What are these four SPI "clock modes"?
+-----------------------------------------------------
+It's easy to be confused here, and the vendor documentation you'll
+find isn't necessarily helpful. The four modes combine two mode bits:
+
+ - CPOL indicates the initial clock polarity. CPOL=0 means the
+ clock starts low, so the first (leading) edge is rising, and
+ the second (trailing) edge is falling. CPOL=1 means the clock
+ starts high, so the first (leading) edge is falling.
+
+ - CPHA indicates the clock phase used to sample data; CPHA=0 says
+ sample on the leading edge, CPHA=1 means the trailing edge.
+
+ Since the signal needs to stablize before it's sampled, CPHA=0
+ implies that its data is written half a clock before the first
+ clock edge. The chipselect may have made it become available.
+
+Chip specs won't always say "uses SPI mode X" in as many words,
+but their timing diagrams will make the CPOL and CPHA modes clear.
+
+In the SPI mode number, CPOL is the high order bit and CPHA is the
+low order bit. So when a chip's timing diagram shows the clock
+starting low (CPOL=0) and data stabilized for sampling during the
+trailing clock edge (CPHA=1), that's SPI mode 1.
+
+Note that the clock mode is relevant as soon as the chipselect goes
+active. So the master must set the clock to inactive before selecting
+a slave, and the slave can tell the chosen polarity by sampling the
+clock level when its select line goes active. That's why many devices
+support for example both modes 0 and 3: they don't care about polarity,
+and always clock data in/out on rising clock edges.
+
+
+How do these driver programming interfaces work?
+------------------------------------------------
+The <linux/spi/spi.h> header file includes kerneldoc, as does the
+main source code, and you should certainly read that chapter of the
+kernel API document. This is just an overview, so you get the big
+picture before those details.
+
+SPI requests always go into I/O queues. Requests for a given SPI device
+are always executed in FIFO order, and complete asynchronously through
+completion callbacks. There are also some simple synchronous wrappers
+for those calls, including ones for common transaction types like writing
+a command and then reading its response.
+
+There are two types of SPI driver, here called:
+
+ Controller drivers ... controllers may be built into System-On-Chip
+ processors, and often support both Master and Slave roles.
+ These drivers touch hardware registers and may use DMA.
+ Or they can be PIO bitbangers, needing just GPIO pins.
+
+ Protocol drivers ... these pass messages through the controller
+ driver to communicate with a Slave or Master device on the
+ other side of an SPI link.
+
+So for example one protocol driver might talk to the MTD layer to export
+data to filesystems stored on SPI flash like DataFlash; and others might
+control audio interfaces, present touchscreen sensors as input interfaces,
+or monitor temperature and voltage levels during industrial processing.
+And those might all be sharing the same controller driver.
+
+A "struct spi_device" encapsulates the master-side interface between
+those two types of driver. At this writing, Linux has no slave side
+programming interface.
+
+There is a minimal core of SPI programming interfaces, focussing on
+using the driver model to connect controller and protocol drivers using
+device tables provided by board specific initialization code. SPI
+shows up in sysfs in several locations:
+
+ /sys/devices/.../CTLR ... physical node for a given SPI controller
+
+ /sys/devices/.../CTLR/spiB.C ... spi_device on bus "B",
+ chipselect C, accessed through CTLR.
+
+ /sys/bus/spi/devices/spiB.C ... symlink to that physical
+ .../CTLR/spiB.C device
+
+ /sys/devices/.../CTLR/spiB.C/modalias ... identifies the driver
+ that should be used with this device (for hotplug/coldplug)
+
+ /sys/bus/spi/drivers/D ... driver for one or more spi*.* devices
+
+ /sys/class/spi_master/spiB ... symlink (or actual device node) to
+ a logical node which could hold class related state for the
+ controller managing bus "B". All spiB.* devices share one
+ physical SPI bus segment, with SCLK, MOSI, and MISO.
+
+Note that the actual location of the controller's class state depends
+on whether you enabled CONFIG_SYSFS_DEPRECATED or not. At this time,
+the only class-specific state is the bus number ("B" in "spiB"), so
+those /sys/class entries are only useful to quickly identify busses.
+
+
+How does board-specific init code declare SPI devices?
+------------------------------------------------------
+Linux needs several kinds of information to properly configure SPI devices.
+That information is normally provided by board-specific code, even for
+chips that do support some of automated discovery/enumeration.
+
+DECLARE CONTROLLERS
+
+The first kind of information is a list of what SPI controllers exist.
+For System-on-Chip (SOC) based boards, these will usually be platform
+devices, and the controller may need some platform_data in order to
+operate properly. The "struct platform_device" will include resources
+like the physical address of the controller's first register and its IRQ.
+
+Platforms will often abstract the "register SPI controller" operation,
+maybe coupling it with code to initialize pin configurations, so that
+the arch/.../mach-*/board-*.c files for several boards can all share the
+same basic controller setup code. This is because most SOCs have several
+SPI-capable controllers, and only the ones actually usable on a given
+board should normally be set up and registered.
+
+So for example arch/.../mach-*/board-*.c files might have code like:
+
+ #include <mach/spi.h> /* for mysoc_spi_data */
+
+ /* if your mach-* infrastructure doesn't support kernels that can
+ * run on multiple boards, pdata wouldn't benefit from "__init".
+ */
+ static struct mysoc_spi_data pdata __initdata = { ... };
+
+ static __init board_init(void)
+ {
+ ...
+ /* this board only uses SPI controller #2 */
+ mysoc_register_spi(2, &pdata);
+ ...
+ }
+
+And SOC-specific utility code might look something like:
+
+ #include <mach/spi.h>
+
+ static struct platform_device spi2 = { ... };
+
+ void mysoc_register_spi(unsigned n, struct mysoc_spi_data *pdata)
+ {
+ struct mysoc_spi_data *pdata2;
+
+ pdata2 = kmalloc(sizeof *pdata2, GFP_KERNEL);
+ *pdata2 = pdata;
+ ...
+ if (n == 2) {
+ spi2->dev.platform_data = pdata2;
+ register_platform_device(&spi2);
+
+ /* also: set up pin modes so the spi2 signals are
+ * visible on the relevant pins ... bootloaders on
+ * production boards may already have done this, but
+ * developer boards will often need Linux to do it.
+ */
+ }
+ ...
+ }
+
+Notice how the platform_data for boards may be different, even if the
+same SOC controller is used. For example, on one board SPI might use
+an external clock, where another derives the SPI clock from current
+settings of some master clock.
+
+
+DECLARE SLAVE DEVICES
+
+The second kind of information is a list of what SPI slave devices exist
+on the target board, often with some board-specific data needed for the
+driver to work correctly.
+
+Normally your arch/.../mach-*/board-*.c files would provide a small table
+listing the SPI devices on each board. (This would typically be only a
+small handful.) That might look like:
+
+ static struct ads7846_platform_data ads_info = {
+ .vref_delay_usecs = 100,
+ .x_plate_ohms = 580,
+ .y_plate_ohms = 410,
+ };
+
+ static struct spi_board_info spi_board_info[] __initdata = {
+ {
+ .modalias = "ads7846",
+ .platform_data = &ads_info,
+ .mode = SPI_MODE_0,
+ .irq = GPIO_IRQ(31),
+ .max_speed_hz = 120000 /* max sample rate at 3V */ * 16,
+ .bus_num = 1,
+ .chip_select = 0,
+ },
+ };
+
+Again, notice how board-specific information is provided; each chip may need
+several types. This example shows generic constraints like the fastest SPI
+clock to allow (a function of board voltage in this case) or how an IRQ pin
+is wired, plus chip-specific constraints like an important delay that's
+changed by the capacitance at one pin.
+
+(There's also "controller_data", information that may be useful to the
+controller driver. An example would be peripheral-specific DMA tuning
+data or chipselect callbacks. This is stored in spi_device later.)
+
+The board_info should provide enough information to let the system work
+without the chip's driver being loaded. The most troublesome aspect of
+that is likely the SPI_CS_HIGH bit in the spi_device.mode field, since
+sharing a bus with a device that interprets chipselect "backwards" is
+not possible until the infrastructure knows how to deselect it.
+
+Then your board initialization code would register that table with the SPI
+infrastructure, so that it's available later when the SPI master controller
+driver is registered:
+
+ spi_register_board_info(spi_board_info, ARRAY_SIZE(spi_board_info));
+
+Like with other static board-specific setup, you won't unregister those.
+
+The widely used "card" style computers bundle memory, cpu, and little else
+onto a card that's maybe just thirty square centimeters. On such systems,
+your arch/.../mach-.../board-*.c file would primarily provide information
+about the devices on the mainboard into which such a card is plugged. That
+certainly includes SPI devices hooked up through the card connectors!
+
+
+NON-STATIC CONFIGURATIONS
+
+Developer boards often play by different rules than product boards, and one
+example is the potential need to hotplug SPI devices and/or controllers.
+
+For those cases you might need to use spi_busnum_to_master() to look
+up the spi bus master, and will likely need spi_new_device() to provide the
+board info based on the board that was hotplugged. Of course, you'd later
+call at least spi_unregister_device() when that board is removed.
+
+When Linux includes support for MMC/SD/SDIO/DataFlash cards through SPI, those
+configurations will also be dynamic. Fortunately, such devices all support
+basic device identification probes, so they should hotplug normally.
+
+
+How do I write an "SPI Protocol Driver"?
+----------------------------------------
+Most SPI drivers are currently kernel drivers, but there's also support
+for userspace drivers. Here we talk only about kernel drivers.
+
+SPI protocol drivers somewhat resemble platform device drivers:
+
+ static struct spi_driver CHIP_driver = {
+ .driver = {
+ .name = "CHIP",
+ .owner = THIS_MODULE,
+ .pm = &CHIP_pm_ops,
+ },
+
+ .probe = CHIP_probe,
+ .remove = CHIP_remove,
+ };
+
+The driver core will automatically attempt to bind this driver to any SPI
+device whose board_info gave a modalias of "CHIP". Your probe() code
+might look like this unless you're creating a device which is managing
+a bus (appearing under /sys/class/spi_master).
+
+ static int CHIP_probe(struct spi_device *spi)
+ {
+ struct CHIP *chip;
+ struct CHIP_platform_data *pdata;
+
+ /* assuming the driver requires board-specific data: */
+ pdata = &spi->dev.platform_data;
+ if (!pdata)
+ return -ENODEV;
+
+ /* get memory for driver's per-chip state */
+ chip = kzalloc(sizeof *chip, GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+ spi_set_drvdata(spi, chip);
+
+ ... etc
+ return 0;
+ }
+
+As soon as it enters probe(), the driver may issue I/O requests to
+the SPI device using "struct spi_message". When remove() returns,
+or after probe() fails, the driver guarantees that it won't submit
+any more such messages.
+
+ - An spi_message is a sequence of protocol operations, executed
+ as one atomic sequence. SPI driver controls include:
+
+ + when bidirectional reads and writes start ... by how its
+ sequence of spi_transfer requests is arranged;
+
+ + which I/O buffers are used ... each spi_transfer wraps a
+ buffer for each transfer direction, supporting full duplex
+ (two pointers, maybe the same one in both cases) and half
+ duplex (one pointer is NULL) transfers;
+
+ + optionally defining short delays after transfers ... using
+ the spi_transfer.delay_usecs setting (this delay can be the
+ only protocol effect, if the buffer length is zero);
+
+ + whether the chipselect becomes inactive after a transfer and
+ any delay ... by using the spi_transfer.cs_change flag;
+
+ + hinting whether the next message is likely to go to this same
+ device ... using the spi_transfer.cs_change flag on the last
+ transfer in that atomic group, and potentially saving costs
+ for chip deselect and select operations.
+
+ - Follow standard kernel rules, and provide DMA-safe buffers in
+ your messages. That way controller drivers using DMA aren't forced
+ to make extra copies unless the hardware requires it (e.g. working
+ around hardware errata that force the use of bounce buffering).
+
+ If standard dma_map_single() handling of these buffers is inappropriate,
+ you can use spi_message.is_dma_mapped to tell the controller driver
+ that you've already provided the relevant DMA addresses.
+
+ - The basic I/O primitive is spi_async(). Async requests may be
+ issued in any context (irq handler, task, etc) and completion
+ is reported using a callback provided with the message.
+ After any detected error, the chip is deselected and processing
+ of that spi_message is aborted.
+
+ - There are also synchronous wrappers like spi_sync(), and wrappers
+ like spi_read(), spi_write(), and spi_write_then_read(). These
+ may be issued only in contexts that may sleep, and they're all
+ clean (and small, and "optional") layers over spi_async().
+
+ - The spi_write_then_read() call, and convenience wrappers around
+ it, should only be used with small amounts of data where the
+ cost of an extra copy may be ignored. It's designed to support
+ common RPC-style requests, such as writing an eight bit command
+ and reading a sixteen bit response -- spi_w8r16() being one its
+ wrappers, doing exactly that.
+
+Some drivers may need to modify spi_device characteristics like the
+transfer mode, wordsize, or clock rate. This is done with spi_setup(),
+which would normally be called from probe() before the first I/O is
+done to the device. However, that can also be called at any time
+that no message is pending for that device.
+
+While "spi_device" would be the bottom boundary of the driver, the
+upper boundaries might include sysfs (especially for sensor readings),
+the input layer, ALSA, networking, MTD, the character device framework,
+or other Linux subsystems.
+
+Note that there are two types of memory your driver must manage as part
+of interacting with SPI devices.
+
+ - I/O buffers use the usual Linux rules, and must be DMA-safe.
+ You'd normally allocate them from the heap or free page pool.
+ Don't use the stack, or anything that's declared "static".
+
+ - The spi_message and spi_transfer metadata used to glue those
+ I/O buffers into a group of protocol transactions. These can
+ be allocated anywhere it's convenient, including as part of
+ other allocate-once driver data structures. Zero-init these.
+
+If you like, spi_message_alloc() and spi_message_free() convenience
+routines are available to allocate and zero-initialize an spi_message
+with several transfers.
+
+
+How do I write an "SPI Master Controller Driver"?
+-------------------------------------------------
+An SPI controller will probably be registered on the platform_bus; write
+a driver to bind to the device, whichever bus is involved.
+
+The main task of this type of driver is to provide an "spi_master".
+Use spi_alloc_master() to allocate the master, and spi_master_get_devdata()
+to get the driver-private data allocated for that device.
+
+ struct spi_master *master;
+ struct CONTROLLER *c;
+
+ master = spi_alloc_master(dev, sizeof *c);
+ if (!master)
+ return -ENODEV;
+
+ c = spi_master_get_devdata(master);
+
+The driver will initialize the fields of that spi_master, including the
+bus number (maybe the same as the platform device ID) and three methods
+used to interact with the SPI core and SPI protocol drivers. It will
+also initialize its own internal state. (See below about bus numbering
+and those methods.)
+
+After you initialize the spi_master, then use spi_register_master() to
+publish it to the rest of the system. At that time, device nodes for the
+controller and any predeclared spi devices will be made available, and
+the driver model core will take care of binding them to drivers.
+
+If you need to remove your SPI controller driver, spi_unregister_master()
+will reverse the effect of spi_register_master().
+
+
+BUS NUMBERING
+
+Bus numbering is important, since that's how Linux identifies a given
+SPI bus (shared SCK, MOSI, MISO). Valid bus numbers start at zero. On
+SOC systems, the bus numbers should match the numbers defined by the chip
+manufacturer. For example, hardware controller SPI2 would be bus number 2,
+and spi_board_info for devices connected to it would use that number.
+
+If you don't have such hardware-assigned bus number, and for some reason
+you can't just assign them, then provide a negative bus number. That will
+then be replaced by a dynamically assigned number. You'd then need to treat
+this as a non-static configuration (see above).
+
+
+SPI MASTER METHODS
+
+ master->setup(struct spi_device *spi)
+ This sets up the device clock rate, SPI mode, and word sizes.
+ Drivers may change the defaults provided by board_info, and then
+ call spi_setup(spi) to invoke this routine. It may sleep.
+
+ Unless each SPI slave has its own configuration registers, don't
+ change them right away ... otherwise drivers could corrupt I/O
+ that's in progress for other SPI devices.
+
+ ** BUG ALERT: for some reason the first version of
+ ** many spi_master drivers seems to get this wrong.
+ ** When you code setup(), ASSUME that the controller
+ ** is actively processing transfers for another device.
+
+ master->cleanup(struct spi_device *spi)
+ Your controller driver may use spi_device.controller_state to hold
+ state it dynamically associates with that device. If you do that,
+ be sure to provide the cleanup() method to free that state.
+
+ master->prepare_transfer_hardware(struct spi_master *master)
+ This will be called by the queue mechanism to signal to the driver
+ that a message is coming in soon, so the subsystem requests the
+ driver to prepare the transfer hardware by issuing this call.
+ This may sleep.
+
+ master->unprepare_transfer_hardware(struct spi_master *master)
+ This will be called by the queue mechanism to signal to the driver
+ that there are no more messages pending in the queue and it may
+ relax the hardware (e.g. by power management calls). This may sleep.
+
+ master->transfer_one_message(struct spi_master *master,
+ struct spi_message *mesg)
+ The subsystem calls the driver to transfer a single message while
+ queuing transfers that arrive in the meantime. When the driver is
+ finished with this message, it must call
+ spi_finalize_current_message() so the subsystem can issue the next
+ message. This may sleep.
+
+ master->transfer_one(struct spi_master *master, struct spi_device *spi,
+ struct spi_transfer *transfer)
+ The subsystem calls the driver to transfer a single transfer while
+ queuing transfers that arrive in the meantime. When the driver is
+ finished with this transfer, it must call
+ spi_finalize_current_transfer() so the subsystem can issue the next
+ transfer. This may sleep. Note: transfer_one and transfer_one_message
+ are mutually exclusive; when both are set, the generic subsystem does
+ not call your transfer_one callback.
+
+ Return values:
+ negative errno: error
+ 0: transfer is finished
+ 1: transfer is still in progress
+
+ DEPRECATED METHODS
+
+ master->transfer(struct spi_device *spi, struct spi_message *message)
+ This must not sleep. Its responsibility is to arrange that the
+ transfer happens and its complete() callback is issued. The two
+ will normally happen later, after other transfers complete, and
+ if the controller is idle it will need to be kickstarted. This
+ method is not used on queued controllers and must be NULL if
+ transfer_one_message() and (un)prepare_transfer_hardware() are
+ implemented.
+
+
+SPI MESSAGE QUEUE
+
+If you are happy with the standard queueing mechanism provided by the
+SPI subsystem, just implement the queued methods specified above. Using
+the message queue has the upside of centralizing a lot of code and
+providing pure process-context execution of methods. The message queue
+can also be elevated to realtime priority on high-priority SPI traffic.
+
+Unless the queueing mechanism in the SPI subsystem is selected, the bulk
+of the driver will be managing the I/O queue fed by the now deprecated
+function transfer().
+
+That queue could be purely conceptual. For example, a driver used only
+for low-frequency sensor access might be fine using synchronous PIO.
+
+But the queue will probably be very real, using message->queue, PIO,
+often DMA (especially if the root filesystem is in SPI flash), and
+execution contexts like IRQ handlers, tasklets, or workqueues (such
+as keventd). Your driver can be as fancy, or as simple, as you need.
+Such a transfer() method would normally just add the message to a
+queue, and then start some asynchronous transfer engine (unless it's
+already running).
+
+
+THANKS TO
+---------
+Contributors to Linux-SPI discussions include (in alphabetical order,
+by last name):
+
+Mark Brown
+David Brownell
+Russell King
+Grant Likely
+Dmitry Pervushin
+Stephen Street
+Mark Underwood
+Andrew Victor
+Linus Walleij
+Vitaly Wool
diff --git a/Documentation/spi/spidev b/Documentation/spi/spidev
new file mode 100644
index 000000000..3d14035b1
--- /dev/null
+++ b/Documentation/spi/spidev
@@ -0,0 +1,149 @@
+SPI devices have a limited userspace API, supporting basic half-duplex
+read() and write() access to SPI slave devices. Using ioctl() requests,
+full duplex transfers and device I/O configuration are also available.
+
+ #include <fcntl.h>
+ #include <unistd.h>
+ #include <sys/ioctl.h>
+ #include <linux/types.h>
+ #include <linux/spi/spidev.h>
+
+Some reasons you might want to use this programming interface include:
+
+ * Prototyping in an environment that's not crash-prone; stray pointers
+ in userspace won't normally bring down any Linux system.
+
+ * Developing simple protocols used to talk to microcontrollers acting
+ as SPI slaves, which you may need to change quite often.
+
+Of course there are drivers that can never be written in userspace, because
+they need to access kernel interfaces (such as IRQ handlers or other layers
+of the driver stack) that are not accessible to userspace.
+
+
+DEVICE CREATION, DRIVER BINDING
+===============================
+The simplest way to arrange to use this driver is to just list it in the
+spi_board_info for a device as the driver it should use: the "modalias"
+entry is "spidev", matching the name of the driver exposing this API.
+Set up the other device characteristics (bits per word, SPI clocking,
+chipselect polarity, etc) as usual, so you won't always need to override
+them later.
+
+(Sysfs also supports userspace driven binding/unbinding of drivers to
+devices. That mechanism might be supported here in the future.)
+
+When you do that, the sysfs node for the SPI device will include a child
+device node with a "dev" attribute that will be understood by udev or mdev.
+(Larger systems will have "udev". Smaller ones may configure "mdev" into
+busybox; it's less featureful, but often enough.) For a SPI device with
+chipselect C on bus B, you should see:
+
+ /dev/spidevB.C ... character special device, major number 153 with
+ a dynamically chosen minor device number. This is the node
+ that userspace programs will open, created by "udev" or "mdev".
+
+ /sys/devices/.../spiB.C ... as usual, the SPI device node will
+ be a child of its SPI master controller.
+
+ /sys/class/spidev/spidevB.C ... created when the "spidev" driver
+ binds to that device. (Directory or symlink, based on whether
+ or not you enabled the "deprecated sysfs files" Kconfig option.)
+
+Do not try to manage the /dev character device special file nodes by hand.
+That's error prone, and you'd need to pay careful attention to system
+security issues; udev/mdev should already be configured securely.
+
+If you unbind the "spidev" driver from that device, those two "spidev" nodes
+(in sysfs and in /dev) should automatically be removed (respectively by the
+kernel and by udev/mdev). You can unbind by removing the "spidev" driver
+module, which will affect all devices using this driver. You can also unbind
+by having kernel code remove the SPI device, probably by removing the driver
+for its SPI controller (so its spi_master vanishes).
+
+Since this is a standard Linux device driver -- even though it just happens
+to expose a low level API to userspace -- it can be associated with any number
+of devices at a time. Just provide one spi_board_info record for each such
+SPI device, and you'll get a /dev device node for each device.
+
+
+BASIC CHARACTER DEVICE API
+==========================
+Normal open() and close() operations on /dev/spidevB.D files work as you
+would expect.
+
+Standard read() and write() operations are obviously only half-duplex, and
+the chipselect is deactivated between those operations. Full-duplex access,
+and composite operation without chipselect de-activation, is available using
+the SPI_IOC_MESSAGE(N) request.
+
+Several ioctl() requests let your driver read or override the device's current
+settings for data transfer parameters:
+
+ SPI_IOC_RD_MODE, SPI_IOC_WR_MODE ... pass a pointer to a byte which will
+ return (RD) or assign (WR) the SPI transfer mode. Use the constants
+ SPI_MODE_0..SPI_MODE_3; or if you prefer you can combine SPI_CPOL
+ (clock polarity, idle high iff this is set) or SPI_CPHA (clock phase,
+ sample on trailing edge iff this is set) flags.
+ Note that this request is limited to SPI mode flags that fit in a
+ single byte.
+
+ SPI_IOC_RD_MODE32, SPI_IOC_WR_MODE32 ... pass a pointer to a uin32_t
+ which will return (RD) or assign (WR) the full SPI transfer mode,
+ not limited to the bits that fit in one byte.
+
+ SPI_IOC_RD_LSB_FIRST, SPI_IOC_WR_LSB_FIRST ... pass a pointer to a byte
+ which will return (RD) or assign (WR) the bit justification used to
+ transfer SPI words. Zero indicates MSB-first; other values indicate
+ the less common LSB-first encoding. In both cases the specified value
+ is right-justified in each word, so that unused (TX) or undefined (RX)
+ bits are in the MSBs.
+
+ SPI_IOC_RD_BITS_PER_WORD, SPI_IOC_WR_BITS_PER_WORD ... pass a pointer to
+ a byte which will return (RD) or assign (WR) the number of bits in
+ each SPI transfer word. The value zero signifies eight bits.
+
+ SPI_IOC_RD_MAX_SPEED_HZ, SPI_IOC_WR_MAX_SPEED_HZ ... pass a pointer to a
+ u32 which will return (RD) or assign (WR) the maximum SPI transfer
+ speed, in Hz. The controller can't necessarily assign that specific
+ clock speed.
+
+NOTES:
+
+ - At this time there is no async I/O support; everything is purely
+ synchronous.
+
+ - There's currently no way to report the actual bit rate used to
+ shift data to/from a given device.
+
+ - From userspace, you can't currently change the chip select polarity;
+ that could corrupt transfers to other devices sharing the SPI bus.
+ Each SPI device is deselected when it's not in active use, allowing
+ other drivers to talk to other devices.
+
+ - There's a limit on the number of bytes each I/O request can transfer
+ to the SPI device. It defaults to one page, but that can be changed
+ using a module parameter.
+
+ - Because SPI has no low-level transfer acknowledgement, you usually
+ won't see any I/O errors when talking to a non-existent device.
+
+
+FULL DUPLEX CHARACTER DEVICE API
+================================
+
+See the spidev_fdx.c sample program for one example showing the use of the
+full duplex programming interface. (Although it doesn't perform a full duplex
+transfer.) The model is the same as that used in the kernel spi_sync()
+request; the individual transfers offer the same capabilities as are
+available to kernel drivers (except that it's not asynchronous).
+
+The example shows one half-duplex RPC-style request and response message.
+These requests commonly require that the chip not be deselected between
+the request and response. Several such requests could be chained into
+a single kernel request, even allowing the chip to be deselected after
+each response. (Other protocol options include changing the word size
+and bitrate for each transfer segment.)
+
+To make a full duplex request, provide both rx_buf and tx_buf for the
+same transfer. It's even OK if those are the same buffer.
diff --git a/Documentation/spi/spidev_fdx.c b/Documentation/spi/spidev_fdx.c
new file mode 100644
index 000000000..0ea3e5129
--- /dev/null
+++ b/Documentation/spi/spidev_fdx.c
@@ -0,0 +1,158 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/types.h>
+#include <linux/spi/spidev.h>
+
+
+static int verbose;
+
+static void do_read(int fd, int len)
+{
+ unsigned char buf[32], *bp;
+ int status;
+
+ /* read at least 2 bytes, no more than 32 */
+ if (len < 2)
+ len = 2;
+ else if (len > sizeof(buf))
+ len = sizeof(buf);
+ memset(buf, 0, sizeof buf);
+
+ status = read(fd, buf, len);
+ if (status < 0) {
+ perror("read");
+ return;
+ }
+ if (status != len) {
+ fprintf(stderr, "short read\n");
+ return;
+ }
+
+ printf("read(%2d, %2d): %02x %02x,", len, status,
+ buf[0], buf[1]);
+ status -= 2;
+ bp = buf + 2;
+ while (status-- > 0)
+ printf(" %02x", *bp++);
+ printf("\n");
+}
+
+static void do_msg(int fd, int len)
+{
+ struct spi_ioc_transfer xfer[2];
+ unsigned char buf[32], *bp;
+ int status;
+
+ memset(xfer, 0, sizeof xfer);
+ memset(buf, 0, sizeof buf);
+
+ if (len > sizeof buf)
+ len = sizeof buf;
+
+ buf[0] = 0xaa;
+ xfer[0].tx_buf = (unsigned long)buf;
+ xfer[0].len = 1;
+
+ xfer[1].rx_buf = (unsigned long) buf;
+ xfer[1].len = len;
+
+ status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
+ if (status < 0) {
+ perror("SPI_IOC_MESSAGE");
+ return;
+ }
+
+ printf("response(%2d, %2d): ", len, status);
+ for (bp = buf; len; len--)
+ printf(" %02x", *bp++);
+ printf("\n");
+}
+
+static void dumpstat(const char *name, int fd)
+{
+ __u8 lsb, bits;
+ __u32 mode, speed;
+
+ if (ioctl(fd, SPI_IOC_RD_MODE32, &mode) < 0) {
+ perror("SPI rd_mode");
+ return;
+ }
+ if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) {
+ perror("SPI rd_lsb_fist");
+ return;
+ }
+ if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) {
+ perror("SPI bits_per_word");
+ return;
+ }
+ if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) {
+ perror("SPI max_speed_hz");
+ return;
+ }
+
+ printf("%s: spi mode 0x%x, %d bits %sper word, %d Hz max\n",
+ name, mode, bits, lsb ? "(lsb first) " : "", speed);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ int readcount = 0;
+ int msglen = 0;
+ int fd;
+ const char *name;
+
+ while ((c = getopt(argc, argv, "hm:r:v")) != EOF) {
+ switch (c) {
+ case 'm':
+ msglen = atoi(optarg);
+ if (msglen < 0)
+ goto usage;
+ continue;
+ case 'r':
+ readcount = atoi(optarg);
+ if (readcount < 0)
+ goto usage;
+ continue;
+ case 'v':
+ verbose++;
+ continue;
+ case 'h':
+ case '?':
+usage:
+ fprintf(stderr,
+ "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n",
+ argv[0]);
+ return 1;
+ }
+ }
+
+ if ((optind + 1) != argc)
+ goto usage;
+ name = argv[optind];
+
+ fd = open(name, O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return 1;
+ }
+
+ dumpstat(name, fd);
+
+ if (msglen)
+ do_msg(fd, msglen);
+
+ if (readcount)
+ do_read(fd, readcount);
+
+ close(fd);
+ return 0;
+}
diff --git a/Documentation/spi/spidev_test.c b/Documentation/spi/spidev_test.c
new file mode 100644
index 000000000..135b3f592
--- /dev/null
+++ b/Documentation/spi/spidev_test.c
@@ -0,0 +1,318 @@
+/*
+ * SPI testing utility (using spidev driver)
+ *
+ * Copyright (c) 2007 MontaVista Software, Inc.
+ * Copyright (c) 2007 Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * Cross-compile with cross-gcc -I/path/to/cross-kernel/include
+ */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/spi/spidev.h>
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+
+static void pabort(const char *s)
+{
+ perror(s);
+ abort();
+}
+
+static const char *device = "/dev/spidev1.1";
+static uint32_t mode;
+static uint8_t bits = 8;
+static uint32_t speed = 500000;
+static uint16_t delay;
+static int verbose;
+
+uint8_t default_tx[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xF0, 0x0D,
+};
+
+uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
+char *input_tx;
+
+static void hex_dump(const void *src, size_t length, size_t line_size, char *prefix)
+{
+ int i = 0;
+ const unsigned char *address = src;
+ const unsigned char *line = address;
+ unsigned char c;
+
+ printf("%s | ", prefix);
+ while (length-- > 0) {
+ printf("%02X ", *address++);
+ if (!(++i % line_size) || (length == 0 && i % line_size)) {
+ if (length == 0) {
+ while (i++ % line_size)
+ printf("__ ");
+ }
+ printf(" | "); /* right close */
+ while (line < address) {
+ c = *line++;
+ printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+ }
+ printf("\n");
+ if (length > 0)
+ printf("%s | ", prefix);
+ }
+ }
+}
+
+/*
+ * Unescape - process hexadecimal escape character
+ * converts shell input "\x23" -> 0x23
+ */
+static int unescape(char *_dst, char *_src, size_t len)
+{
+ int ret = 0;
+ char *src = _src;
+ char *dst = _dst;
+ unsigned int ch;
+
+ while (*src) {
+ if (*src == '\\' && *(src+1) == 'x') {
+ sscanf(src + 2, "%2x", &ch);
+ src += 4;
+ *dst++ = (unsigned char)ch;
+ } else {
+ *dst++ = *src++;
+ }
+ ret++;
+ }
+ return ret;
+}
+
+static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
+{
+ int ret;
+
+ struct spi_ioc_transfer tr = {
+ .tx_buf = (unsigned long)tx,
+ .rx_buf = (unsigned long)rx,
+ .len = len,
+ .delay_usecs = delay,
+ .speed_hz = speed,
+ .bits_per_word = bits,
+ };
+
+ if (mode & SPI_TX_QUAD)
+ tr.tx_nbits = 4;
+ else if (mode & SPI_TX_DUAL)
+ tr.tx_nbits = 2;
+ if (mode & SPI_RX_QUAD)
+ tr.rx_nbits = 4;
+ else if (mode & SPI_RX_DUAL)
+ tr.rx_nbits = 2;
+ if (!(mode & SPI_LOOP)) {
+ if (mode & (SPI_TX_QUAD | SPI_TX_DUAL))
+ tr.rx_buf = 0;
+ else if (mode & (SPI_RX_QUAD | SPI_RX_DUAL))
+ tr.tx_buf = 0;
+ }
+
+ ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
+ if (ret < 1)
+ pabort("can't send spi message");
+
+ if (verbose)
+ hex_dump(tx, len, 32, "TX");
+ hex_dump(rx, len, 32, "RX");
+}
+
+static void print_usage(const char *prog)
+{
+ printf("Usage: %s [-DsbdlHOLC3]\n", prog);
+ puts(" -D --device device to use (default /dev/spidev1.1)\n"
+ " -s --speed max speed (Hz)\n"
+ " -d --delay delay (usec)\n"
+ " -b --bpw bits per word \n"
+ " -l --loop loopback\n"
+ " -H --cpha clock phase\n"
+ " -O --cpol clock polarity\n"
+ " -L --lsb least significant bit first\n"
+ " -C --cs-high chip select active high\n"
+ " -3 --3wire SI/SO signals shared\n"
+ " -v --verbose Verbose (show tx buffer)\n"
+ " -p Send data (e.g. \"1234\\xde\\xad\")\n"
+ " -N --no-cs no chip select\n"
+ " -R --ready slave pulls low to pause\n"
+ " -2 --dual dual transfer\n"
+ " -4 --quad quad transfer\n");
+ exit(1);
+}
+
+static void parse_opts(int argc, char *argv[])
+{
+ while (1) {
+ static const struct option lopts[] = {
+ { "device", 1, 0, 'D' },
+ { "speed", 1, 0, 's' },
+ { "delay", 1, 0, 'd' },
+ { "bpw", 1, 0, 'b' },
+ { "loop", 0, 0, 'l' },
+ { "cpha", 0, 0, 'H' },
+ { "cpol", 0, 0, 'O' },
+ { "lsb", 0, 0, 'L' },
+ { "cs-high", 0, 0, 'C' },
+ { "3wire", 0, 0, '3' },
+ { "no-cs", 0, 0, 'N' },
+ { "ready", 0, 0, 'R' },
+ { "dual", 0, 0, '2' },
+ { "verbose", 0, 0, 'v' },
+ { "quad", 0, 0, '4' },
+ { NULL, 0, 0, 0 },
+ };
+ int c;
+
+ c = getopt_long(argc, argv, "D:s:d:b:lHOLC3NR24p:v", lopts, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'D':
+ device = optarg;
+ break;
+ case 's':
+ speed = atoi(optarg);
+ break;
+ case 'd':
+ delay = atoi(optarg);
+ break;
+ case 'b':
+ bits = atoi(optarg);
+ break;
+ case 'l':
+ mode |= SPI_LOOP;
+ break;
+ case 'H':
+ mode |= SPI_CPHA;
+ break;
+ case 'O':
+ mode |= SPI_CPOL;
+ break;
+ case 'L':
+ mode |= SPI_LSB_FIRST;
+ break;
+ case 'C':
+ mode |= SPI_CS_HIGH;
+ break;
+ case '3':
+ mode |= SPI_3WIRE;
+ break;
+ case 'N':
+ mode |= SPI_NO_CS;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'R':
+ mode |= SPI_READY;
+ break;
+ case 'p':
+ input_tx = optarg;
+ break;
+ case '2':
+ mode |= SPI_TX_DUAL;
+ break;
+ case '4':
+ mode |= SPI_TX_QUAD;
+ break;
+ default:
+ print_usage(argv[0]);
+ break;
+ }
+ }
+ if (mode & SPI_LOOP) {
+ if (mode & SPI_TX_DUAL)
+ mode |= SPI_RX_DUAL;
+ if (mode & SPI_TX_QUAD)
+ mode |= SPI_RX_QUAD;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ int fd;
+ uint8_t *tx;
+ uint8_t *rx;
+ int size;
+
+ parse_opts(argc, argv);
+
+ fd = open(device, O_RDWR);
+ if (fd < 0)
+ pabort("can't open device");
+
+ /*
+ * spi mode
+ */
+ ret = ioctl(fd, SPI_IOC_WR_MODE32, &mode);
+ if (ret == -1)
+ pabort("can't set spi mode");
+
+ ret = ioctl(fd, SPI_IOC_RD_MODE32, &mode);
+ if (ret == -1)
+ pabort("can't get spi mode");
+
+ /*
+ * bits per word
+ */
+ ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
+ if (ret == -1)
+ pabort("can't set bits per word");
+
+ ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
+ if (ret == -1)
+ pabort("can't get bits per word");
+
+ /*
+ * max speed hz
+ */
+ ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
+ if (ret == -1)
+ pabort("can't set max speed hz");
+
+ ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
+ if (ret == -1)
+ pabort("can't get max speed hz");
+
+ printf("spi mode: 0x%x\n", mode);
+ printf("bits per word: %d\n", bits);
+ printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
+
+ if (input_tx) {
+ size = strlen(input_tx+1);
+ tx = malloc(size);
+ rx = malloc(size);
+ size = unescape((char *)tx, input_tx, size);
+ transfer(fd, tx, rx, size);
+ free(rx);
+ free(tx);
+ } else {
+ transfer(fd, default_tx, default_rx, sizeof(default_tx));
+ }
+
+ close(fd);
+
+ return ret;
+}
diff --git a/Documentation/stable_api_nonsense.txt b/Documentation/stable_api_nonsense.txt
new file mode 100644
index 000000000..db3be892a
--- /dev/null
+++ b/Documentation/stable_api_nonsense.txt
@@ -0,0 +1,190 @@
+The Linux Kernel Driver Interface
+(all of your questions answered and then some)
+
+Greg Kroah-Hartman <greg@kroah.com>
+
+This is being written to try to explain why Linux does not have a binary
+kernel interface, nor does it have a stable kernel interface. Please
+realize that this article describes the _in kernel_ interfaces, not the
+kernel to userspace interfaces. The kernel to userspace interface is
+the one that application programs use, the syscall interface. That
+interface is _very_ stable over time, and will not break. I have old
+programs that were built on a pre 0.9something kernel that still work
+just fine on the latest 2.6 kernel release. That interface is the one
+that users and application programmers can count on being stable.
+
+
+Executive Summary
+-----------------
+You think you want a stable kernel interface, but you really do not, and
+you don't even know it. What you want is a stable running driver, and
+you get that only if your driver is in the main kernel tree. You also
+get lots of other good benefits if your driver is in the main kernel
+tree, all of which has made Linux into such a strong, stable, and mature
+operating system which is the reason you are using it in the first
+place.
+
+
+Intro
+-----
+
+It's only the odd person who wants to write a kernel driver that needs
+to worry about the in-kernel interfaces changing. For the majority of
+the world, they neither see this interface, nor do they care about it at
+all.
+
+First off, I'm not going to address _any_ legal issues about closed
+source, hidden source, binary blobs, source wrappers, or any other term
+that describes kernel drivers that do not have their source code
+released under the GPL. Please consult a lawyer if you have any legal
+questions, I'm a programmer and hence, I'm just going to be describing
+the technical issues here (not to make light of the legal issues, they
+are real, and you do need to be aware of them at all times.)
+
+So, there are two main topics here, binary kernel interfaces and stable
+kernel source interfaces. They both depend on each other, but we will
+discuss the binary stuff first to get it out of the way.
+
+
+Binary Kernel Interface
+-----------------------
+Assuming that we had a stable kernel source interface for the kernel, a
+binary interface would naturally happen too, right? Wrong. Please
+consider the following facts about the Linux kernel:
+ - Depending on the version of the C compiler you use, different kernel
+ data structures will contain different alignment of structures, and
+ possibly include different functions in different ways (putting
+ functions inline or not.) The individual function organization
+ isn't that important, but the different data structure padding is
+ very important.
+ - Depending on what kernel build options you select, a wide range of
+ different things can be assumed by the kernel:
+ - different structures can contain different fields
+ - Some functions may not be implemented at all, (i.e. some locks
+ compile away to nothing for non-SMP builds.)
+ - Memory within the kernel can be aligned in different ways,
+ depending on the build options.
+ - Linux runs on a wide range of different processor architectures.
+ There is no way that binary drivers from one architecture will run
+ on another architecture properly.
+
+Now a number of these issues can be addressed by simply compiling your
+module for the exact specific kernel configuration, using the same exact
+C compiler that the kernel was built with. This is sufficient if you
+want to provide a module for a specific release version of a specific
+Linux distribution. But multiply that single build by the number of
+different Linux distributions and the number of different supported
+releases of the Linux distribution and you quickly have a nightmare of
+different build options on different releases. Also realize that each
+Linux distribution release contains a number of different kernels, all
+tuned to different hardware types (different processor types and
+different options), so for even a single release you will need to create
+multiple versions of your module.
+
+Trust me, you will go insane over time if you try to support this kind
+of release, I learned this the hard way a long time ago...
+
+
+Stable Kernel Source Interfaces
+-------------------------------
+
+This is a much more "volatile" topic if you talk to people who try to
+keep a Linux kernel driver that is not in the main kernel tree up to
+date over time.
+
+Linux kernel development is continuous and at a rapid pace, never
+stopping to slow down. As such, the kernel developers find bugs in
+current interfaces, or figure out a better way to do things. If they do
+that, they then fix the current interfaces to work better. When they do
+so, function names may change, structures may grow or shrink, and
+function parameters may be reworked. If this happens, all of the
+instances of where this interface is used within the kernel are fixed up
+at the same time, ensuring that everything continues to work properly.
+
+As a specific examples of this, the in-kernel USB interfaces have
+undergone at least three different reworks over the lifetime of this
+subsystem. These reworks were done to address a number of different
+issues:
+ - A change from a synchronous model of data streams to an asynchronous
+ one. This reduced the complexity of a number of drivers and
+ increased the throughput of all USB drivers such that we are now
+ running almost all USB devices at their maximum speed possible.
+ - A change was made in the way data packets were allocated from the
+ USB core by USB drivers so that all drivers now needed to provide
+ more information to the USB core to fix a number of documented
+ deadlocks.
+
+This is in stark contrast to a number of closed source operating systems
+which have had to maintain their older USB interfaces over time. This
+provides the ability for new developers to accidentally use the old
+interfaces and do things in improper ways, causing the stability of the
+operating system to suffer.
+
+In both of these instances, all developers agreed that these were
+important changes that needed to be made, and they were made, with
+relatively little pain. If Linux had to ensure that it will preserve a
+stable source interface, a new interface would have been created, and
+the older, broken one would have had to be maintained over time, leading
+to extra work for the USB developers. Since all Linux USB developers do
+their work on their own time, asking programmers to do extra work for no
+gain, for free, is not a possibility.
+
+Security issues are also very important for Linux. When a
+security issue is found, it is fixed in a very short amount of time. A
+number of times this has caused internal kernel interfaces to be
+reworked to prevent the security problem from occurring. When this
+happens, all drivers that use the interfaces were also fixed at the
+same time, ensuring that the security problem was fixed and could not
+come back at some future time accidentally. If the internal interfaces
+were not allowed to change, fixing this kind of security problem and
+insuring that it could not happen again would not be possible.
+
+Kernel interfaces are cleaned up over time. If there is no one using a
+current interface, it is deleted. This ensures that the kernel remains
+as small as possible, and that all potential interfaces are tested as
+well as they can be (unused interfaces are pretty much impossible to
+test for validity.)
+
+
+What to do
+----------
+
+So, if you have a Linux kernel driver that is not in the main kernel
+tree, what are you, a developer, supposed to do? Releasing a binary
+driver for every different kernel version for every distribution is a
+nightmare, and trying to keep up with an ever changing kernel interface
+is also a rough job.
+
+Simple, get your kernel driver into the main kernel tree (remember we
+are talking about GPL released drivers here, if your code doesn't fall
+under this category, good luck, you are on your own here, you leech
+<insert link to leech comment from Andrew and Linus here>.) If your
+driver is in the tree, and a kernel interface changes, it will be fixed
+up by the person who did the kernel change in the first place. This
+ensures that your driver is always buildable, and works over time, with
+very little effort on your part.
+
+The very good side effects of having your driver in the main kernel tree
+are:
+ - The quality of the driver will rise as the maintenance costs (to the
+ original developer) will decrease.
+ - Other developers will add features to your driver.
+ - Other people will find and fix bugs in your driver.
+ - Other people will find tuning opportunities in your driver.
+ - Other people will update the driver for you when external interface
+ changes require it.
+ - The driver automatically gets shipped in all Linux distributions
+ without having to ask the distros to add it.
+
+As Linux supports a larger number of different devices "out of the box"
+than any other operating system, and it supports these devices on more
+different processor architectures than any other operating system, this
+proven type of development model must be doing something right :)
+
+
+
+------
+
+Thanks to Randy Dunlap, Andrew Morton, David Brownell, Hanna Linder,
+Robert Love, and Nishanth Aravamudan for their review and comments on
+early drafts of this paper.
diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
new file mode 100644
index 000000000..58d0ac4df
--- /dev/null
+++ b/Documentation/stable_kernel_rules.txt
@@ -0,0 +1,132 @@
+Everything you ever wanted to know about Linux -stable releases.
+
+Rules on what kind of patches are accepted, and which ones are not, into the
+"-stable" tree:
+
+ - It must be obviously correct and tested.
+ - It cannot be bigger than 100 lines, with context.
+ - It must fix only one thing.
+ - It must fix a real bug that bothers people (not a, "This could be a
+ problem..." type thing).
+ - It must fix a problem that causes a build error (but not for things
+ marked CONFIG_BROKEN), an oops, a hang, data corruption, a real
+ security issue, or some "oh, that's not good" issue. In short, something
+ critical.
+ - Serious issues as reported by a user of a distribution kernel may also
+ be considered if they fix a notable performance or interactivity issue.
+ As these fixes are not as obvious and have a higher risk of a subtle
+ regression they should only be submitted by a distribution kernel
+ maintainer and include an addendum linking to a bugzilla entry if it
+ exists and additional information on the user-visible impact.
+ - New device IDs and quirks are also accepted.
+ - No "theoretical race condition" issues, unless an explanation of how the
+ race can be exploited is also provided.
+ - It cannot contain any "trivial" fixes in it (spelling changes,
+ whitespace cleanups, etc).
+ - It must follow the Documentation/SubmittingPatches rules.
+ - It or an equivalent fix must already exist in Linus' tree (upstream).
+
+
+Procedure for submitting patches to the -stable tree:
+
+ - If the patch covers files in net/ or drivers/net please follow netdev stable
+ submission guidelines as described in
+ Documentation/networking/netdev-FAQ.txt
+ - Security patches should not be handled (solely) by the -stable review
+ process but should follow the procedures in Documentation/SecurityBugs.
+
+For all other submissions, choose one of the following procedures:
+
+ --- Option 1 ---
+
+ To have the patch automatically included in the stable tree, add the tag
+ Cc: stable@vger.kernel.org
+ in the sign-off area. Once the patch is merged it will be applied to
+ the stable tree without anything else needing to be done by the author
+ or subsystem maintainer.
+
+ --- Option 2 ---
+
+ After the patch has been merged to Linus' tree, send an email to
+ stable@vger.kernel.org containing the subject of the patch, the commit ID,
+ why you think it should be applied, and what kernel version you wish it to
+ be applied to.
+
+ --- Option 3 ---
+
+ Send the patch, after verifying that it follows the above rules, to
+ stable@vger.kernel.org. You must note the upstream commit ID in the
+ changelog of your submission, as well as the kernel version you wish
+ it to be applied to.
+
+Option 1 is probably the easiest and most common. Options 2 and 3 are more
+useful if the patch isn't deemed worthy at the time it is applied to a public
+git tree (for instance, because it deserves more regression testing first).
+Option 3 is especially useful if the patch needs some special handling to apply
+to an older kernel (e.g., if API's have changed in the meantime).
+
+Additionally, some patches submitted via Option 1 may have additional patch
+prerequisites which can be cherry-picked. This can be specified in the following
+format in the sign-off area:
+
+ Cc: <stable@vger.kernel.org> # 3.3.x: a1f84a3: sched: Check for idle
+ Cc: <stable@vger.kernel.org> # 3.3.x: 1b9508f: sched: Rate-limit newidle
+ Cc: <stable@vger.kernel.org> # 3.3.x: fd21073: sched: Fix affinity logic
+ Cc: <stable@vger.kernel.org> # 3.3.x
+ Signed-off-by: Ingo Molnar <mingo@elte.hu>
+
+ The tag sequence has the meaning of:
+ git cherry-pick a1f84a3
+ git cherry-pick 1b9508f
+ git cherry-pick fd21073
+ git cherry-pick <this commit>
+
+Also, some patches may have kernel version prerequisites. This can be
+specified in the following format in the sign-off area:
+
+ Cc: <stable@vger.kernel.org> # 3.3.x-
+
+ The tag has the meaning of:
+ git cherry-pick <this commit>
+
+ For each "-stable" tree starting with the specified version.
+
+Following the submission:
+
+ - The sender will receive an ACK when the patch has been accepted into the
+ queue, or a NAK if the patch is rejected. This response might take a few
+ days, according to the developer's schedules.
+ - If accepted, the patch will be added to the -stable queue, for review by
+ other developers and by the relevant subsystem maintainer.
+
+
+Review cycle:
+
+ - When the -stable maintainers decide for a review cycle, the patches will be
+ sent to the review committee, and the maintainer of the affected area of
+ the patch (unless the submitter is the maintainer of the area) and CC: to
+ the linux-kernel mailing list.
+ - The review committee has 48 hours in which to ACK or NAK the patch.
+ - If the patch is rejected by a member of the committee, or linux-kernel
+ members object to the patch, bringing up issues that the maintainers and
+ members did not realize, the patch will be dropped from the queue.
+ - At the end of the review cycle, the ACKed patches will be added to the
+ latest -stable release, and a new -stable release will happen.
+ - Security patches will be accepted into the -stable tree directly from the
+ security kernel team, and not go through the normal review cycle.
+ Contact the kernel security team for more details on this procedure.
+
+Trees:
+
+ - The queues of patches, for both completed versions and in progress
+ versions can be found at:
+ http://git.kernel.org/?p=linux/kernel/git/stable/stable-queue.git
+ - The finalized and tagged releases of all stable kernels can be found
+ in separate branches per version at:
+ http://git.kernel.org/?p=linux/kernel/git/stable/linux-stable.git
+
+
+Review committee:
+
+ - This is made up of a number of kernel developers who have volunteered for
+ this task, and a few that haven't.
diff --git a/Documentation/static-keys.txt b/Documentation/static-keys.txt
new file mode 100644
index 000000000..c4407a41b
--- /dev/null
+++ b/Documentation/static-keys.txt
@@ -0,0 +1,286 @@
+ Static Keys
+ -----------
+
+By: Jason Baron <jbaron@redhat.com>
+
+0) Abstract
+
+Static keys allows the inclusion of seldom used features in
+performance-sensitive fast-path kernel code, via a GCC feature and a code
+patching technique. A quick example:
+
+ struct static_key key = STATIC_KEY_INIT_FALSE;
+
+ ...
+
+ if (static_key_false(&key))
+ do unlikely code
+ else
+ do likely code
+
+ ...
+ static_key_slow_inc();
+ ...
+ static_key_slow_inc();
+ ...
+
+The static_key_false() branch will be generated into the code with as little
+impact to the likely code path as possible.
+
+
+1) Motivation
+
+
+Currently, tracepoints are implemented using a conditional branch. The
+conditional check requires checking a global variable for each tracepoint.
+Although the overhead of this check is small, it increases when the memory
+cache comes under pressure (memory cache lines for these global variables may
+be shared with other memory accesses). As we increase the number of tracepoints
+in the kernel this overhead may become more of an issue. In addition,
+tracepoints are often dormant (disabled) and provide no direct kernel
+functionality. Thus, it is highly desirable to reduce their impact as much as
+possible. Although tracepoints are the original motivation for this work, other
+kernel code paths should be able to make use of the static keys facility.
+
+
+2) Solution
+
+
+gcc (v4.5) adds a new 'asm goto' statement that allows branching to a label:
+
+http://gcc.gnu.org/ml/gcc-patches/2009-07/msg01556.html
+
+Using the 'asm goto', we can create branches that are either taken or not taken
+by default, without the need to check memory. Then, at run-time, we can patch
+the branch site to change the branch direction.
+
+For example, if we have a simple branch that is disabled by default:
+
+ if (static_key_false(&key))
+ printk("I am the true branch\n");
+
+Thus, by default the 'printk' will not be emitted. And the code generated will
+consist of a single atomic 'no-op' instruction (5 bytes on x86), in the
+straight-line code path. When the branch is 'flipped', we will patch the
+'no-op' in the straight-line codepath with a 'jump' instruction to the
+out-of-line true branch. Thus, changing branch direction is expensive but
+branch selection is basically 'free'. That is the basic tradeoff of this
+optimization.
+
+This lowlevel patching mechanism is called 'jump label patching', and it gives
+the basis for the static keys facility.
+
+3) Static key label API, usage and examples:
+
+
+In order to make use of this optimization you must first define a key:
+
+ struct static_key key;
+
+Which is initialized as:
+
+ struct static_key key = STATIC_KEY_INIT_TRUE;
+
+or:
+
+ struct static_key key = STATIC_KEY_INIT_FALSE;
+
+If the key is not initialized, it is default false. The 'struct static_key',
+must be a 'global'. That is, it can't be allocated on the stack or dynamically
+allocated at run-time.
+
+The key is then used in code as:
+
+ if (static_key_false(&key))
+ do unlikely code
+ else
+ do likely code
+
+Or:
+
+ if (static_key_true(&key))
+ do likely code
+ else
+ do unlikely code
+
+A key that is initialized via 'STATIC_KEY_INIT_FALSE', must be used in a
+'static_key_false()' construct. Likewise, a key initialized via
+'STATIC_KEY_INIT_TRUE' must be used in a 'static_key_true()' construct. A
+single key can be used in many branches, but all the branches must match the
+way that the key has been initialized.
+
+The branch(es) can then be switched via:
+
+ static_key_slow_inc(&key);
+ ...
+ static_key_slow_dec(&key);
+
+Thus, 'static_key_slow_inc()' means 'make the branch true', and
+'static_key_slow_dec()' means 'make the branch false' with appropriate
+reference counting. For example, if the key is initialized true, a
+static_key_slow_dec(), will switch the branch to false. And a subsequent
+static_key_slow_inc(), will change the branch back to true. Likewise, if the
+key is initialized false, a 'static_key_slow_inc()', will change the branch to
+true. And then a 'static_key_slow_dec()', will again make the branch false.
+
+An example usage in the kernel is the implementation of tracepoints:
+
+ static inline void trace_##name(proto) \
+ { \
+ if (static_key_false(&__tracepoint_##name.key)) \
+ __DO_TRACE(&__tracepoint_##name, \
+ TP_PROTO(data_proto), \
+ TP_ARGS(data_args), \
+ TP_CONDITION(cond)); \
+ }
+
+Tracepoints are disabled by default, and can be placed in performance critical
+pieces of the kernel. Thus, by using a static key, the tracepoints can have
+absolutely minimal impact when not in use.
+
+
+4) Architecture level code patching interface, 'jump labels'
+
+
+There are a few functions and macros that architectures must implement in order
+to take advantage of this optimization. If there is no architecture support, we
+simply fall back to a traditional, load, test, and jump sequence.
+
+* select HAVE_ARCH_JUMP_LABEL, see: arch/x86/Kconfig
+
+* #define JUMP_LABEL_NOP_SIZE, see: arch/x86/include/asm/jump_label.h
+
+* __always_inline bool arch_static_branch(struct static_key *key), see:
+ arch/x86/include/asm/jump_label.h
+
+* void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type),
+ see: arch/x86/kernel/jump_label.c
+
+* __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry, enum jump_label_type type),
+ see: arch/x86/kernel/jump_label.c
+
+
+* struct jump_entry, see: arch/x86/include/asm/jump_label.h
+
+
+5) Static keys / jump label analysis, results (x86_64):
+
+
+As an example, let's add the following branch to 'getppid()', such that the
+system call now looks like:
+
+SYSCALL_DEFINE0(getppid)
+{
+ int pid;
+
++ if (static_key_false(&key))
++ printk("I am the true branch\n");
+
+ rcu_read_lock();
+ pid = task_tgid_vnr(rcu_dereference(current->real_parent));
+ rcu_read_unlock();
+
+ return pid;
+}
+
+The resulting instructions with jump labels generated by GCC is:
+
+ffffffff81044290 <sys_getppid>:
+ffffffff81044290: 55 push %rbp
+ffffffff81044291: 48 89 e5 mov %rsp,%rbp
+ffffffff81044294: e9 00 00 00 00 jmpq ffffffff81044299 <sys_getppid+0x9>
+ffffffff81044299: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax
+ffffffff810442a0: 00 00
+ffffffff810442a2: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax
+ffffffff810442a9: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax
+ffffffff810442b0: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi
+ffffffff810442b7: e8 f4 d9 00 00 callq ffffffff81051cb0 <pid_vnr>
+ffffffff810442bc: 5d pop %rbp
+ffffffff810442bd: 48 98 cltq
+ffffffff810442bf: c3 retq
+ffffffff810442c0: 48 c7 c7 e3 54 98 81 mov $0xffffffff819854e3,%rdi
+ffffffff810442c7: 31 c0 xor %eax,%eax
+ffffffff810442c9: e8 71 13 6d 00 callq ffffffff8171563f <printk>
+ffffffff810442ce: eb c9 jmp ffffffff81044299 <sys_getppid+0x9>
+
+Without the jump label optimization it looks like:
+
+ffffffff810441f0 <sys_getppid>:
+ffffffff810441f0: 8b 05 8a 52 d8 00 mov 0xd8528a(%rip),%eax # ffffffff81dc9480 <key>
+ffffffff810441f6: 55 push %rbp
+ffffffff810441f7: 48 89 e5 mov %rsp,%rbp
+ffffffff810441fa: 85 c0 test %eax,%eax
+ffffffff810441fc: 75 27 jne ffffffff81044225 <sys_getppid+0x35>
+ffffffff810441fe: 65 48 8b 04 25 c0 b6 mov %gs:0xb6c0,%rax
+ffffffff81044205: 00 00
+ffffffff81044207: 48 8b 80 80 02 00 00 mov 0x280(%rax),%rax
+ffffffff8104420e: 48 8b 80 b0 02 00 00 mov 0x2b0(%rax),%rax
+ffffffff81044215: 48 8b b8 e8 02 00 00 mov 0x2e8(%rax),%rdi
+ffffffff8104421c: e8 2f da 00 00 callq ffffffff81051c50 <pid_vnr>
+ffffffff81044221: 5d pop %rbp
+ffffffff81044222: 48 98 cltq
+ffffffff81044224: c3 retq
+ffffffff81044225: 48 c7 c7 13 53 98 81 mov $0xffffffff81985313,%rdi
+ffffffff8104422c: 31 c0 xor %eax,%eax
+ffffffff8104422e: e8 60 0f 6d 00 callq ffffffff81715193 <printk>
+ffffffff81044233: eb c9 jmp ffffffff810441fe <sys_getppid+0xe>
+ffffffff81044235: 66 66 2e 0f 1f 84 00 data32 nopw %cs:0x0(%rax,%rax,1)
+ffffffff8104423c: 00 00 00 00
+
+Thus, the disable jump label case adds a 'mov', 'test' and 'jne' instruction
+vs. the jump label case just has a 'no-op' or 'jmp 0'. (The jmp 0, is patched
+to a 5 byte atomic no-op instruction at boot-time.) Thus, the disabled jump
+label case adds:
+
+6 (mov) + 2 (test) + 2 (jne) = 10 - 5 (5 byte jump 0) = 5 addition bytes.
+
+If we then include the padding bytes, the jump label code saves, 16 total bytes
+of instruction memory for this small function. In this case the non-jump label
+function is 80 bytes long. Thus, we have saved 20% of the instruction
+footprint. We can in fact improve this even further, since the 5-byte no-op
+really can be a 2-byte no-op since we can reach the branch with a 2-byte jmp.
+However, we have not yet implemented optimal no-op sizes (they are currently
+hard-coded).
+
+Since there are a number of static key API uses in the scheduler paths,
+'pipe-test' (also known as 'perf bench sched pipe') can be used to show the
+performance improvement. Testing done on 3.3.0-rc2:
+
+jump label disabled:
+
+ Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
+
+ 855.700314 task-clock # 0.534 CPUs utilized ( +- 0.11% )
+ 200,003 context-switches # 0.234 M/sec ( +- 0.00% )
+ 0 CPU-migrations # 0.000 M/sec ( +- 39.58% )
+ 487 page-faults # 0.001 M/sec ( +- 0.02% )
+ 1,474,374,262 cycles # 1.723 GHz ( +- 0.17% )
+ <not supported> stalled-cycles-frontend
+ <not supported> stalled-cycles-backend
+ 1,178,049,567 instructions # 0.80 insns per cycle ( +- 0.06% )
+ 208,368,926 branches # 243.507 M/sec ( +- 0.06% )
+ 5,569,188 branch-misses # 2.67% of all branches ( +- 0.54% )
+
+ 1.601607384 seconds time elapsed ( +- 0.07% )
+
+jump label enabled:
+
+ Performance counter stats for 'bash -c /tmp/pipe-test' (50 runs):
+
+ 841.043185 task-clock # 0.533 CPUs utilized ( +- 0.12% )
+ 200,004 context-switches # 0.238 M/sec ( +- 0.00% )
+ 0 CPU-migrations # 0.000 M/sec ( +- 40.87% )
+ 487 page-faults # 0.001 M/sec ( +- 0.05% )
+ 1,432,559,428 cycles # 1.703 GHz ( +- 0.18% )
+ <not supported> stalled-cycles-frontend
+ <not supported> stalled-cycles-backend
+ 1,175,363,994 instructions # 0.82 insns per cycle ( +- 0.04% )
+ 206,859,359 branches # 245.956 M/sec ( +- 0.04% )
+ 4,884,119 branch-misses # 2.36% of all branches ( +- 0.85% )
+
+ 1.579384366 seconds time elapsed
+
+The percentage of saved branches is .7%, and we've saved 12% on
+'branch-misses'. This is where we would expect to get the most savings, since
+this optimization is about reducing the number of branches. In addition, we've
+saved .2% on instructions, and 2.8% on cycles and 1.4% on elapsed time.
diff --git a/Documentation/svga.txt b/Documentation/svga.txt
new file mode 100644
index 000000000..cd66ec836
--- /dev/null
+++ b/Documentation/svga.txt
@@ -0,0 +1,276 @@
+ Video Mode Selection Support 2.13
+ (c) 1995--1999 Martin Mares, <mj@ucw.cz>
+--------------------------------------------------------------------------------
+
+1. Intro
+~~~~~~~~
+ This small document describes the "Video Mode Selection" feature which
+allows the use of various special video modes supported by the video BIOS. Due
+to usage of the BIOS, the selection is limited to boot time (before the
+kernel decompression starts) and works only on 80X86 machines.
+
+ ** Short intro for the impatient: Just use vga=ask for the first time,
+ ** enter `scan' on the video mode prompt, pick the mode you want to use,
+ ** remember its mode ID (the four-digit hexadecimal number) and then
+ ** set the vga parameter to this number (converted to decimal first).
+
+ The video mode to be used is selected by a kernel parameter which can be
+specified in the kernel Makefile (the SVGA_MODE=... line) or by the "vga=..."
+option of LILO (or some other boot loader you use) or by the "vidmode" utility
+(present in standard Linux utility packages). You can use the following values
+of this parameter:
+
+ NORMAL_VGA - Standard 80x25 mode available on all display adapters.
+
+ EXTENDED_VGA - Standard 8-pixel font mode: 80x43 on EGA, 80x50 on VGA.
+
+ ASK_VGA - Display a video mode menu upon startup (see below).
+
+ 0..35 - Menu item number (when you have used the menu to view the list of
+ modes available on your adapter, you can specify the menu item you want
+ to use). 0..9 correspond to "0".."9", 10..35 to "a".."z". Warning: the
+ mode list displayed may vary as the kernel version changes, because the
+ modes are listed in a "first detected -- first displayed" manner. It's
+ better to use absolute mode numbers instead.
+
+ 0x.... - Hexadecimal video mode ID (also displayed on the menu, see below
+ for exact meaning of the ID). Warning: rdev and LILO don't support
+ hexadecimal numbers -- you have to convert it to decimal manually.
+
+2. Menu
+~~~~~~~
+ The ASK_VGA mode causes the kernel to offer a video mode menu upon
+bootup. It displays a "Press <RETURN> to see video modes available, <SPACE>
+to continue or wait 30 secs" message. If you press <RETURN>, you enter the
+menu, if you press <SPACE> or wait 30 seconds, the kernel will boot up in
+the standard 80x25 mode.
+
+ The menu looks like:
+
+Video adapter: <name-of-detected-video-adapter>
+Mode: COLSxROWS:
+0 0F00 80x25
+1 0F01 80x50
+2 0F02 80x43
+3 0F03 80x26
+....
+Enter mode number or `scan': <flashing-cursor-here>
+
+ <name-of-detected-video-adapter> tells what video adapter did Linux detect
+-- it's either a generic adapter name (MDA, CGA, HGC, EGA, VGA, VESA VGA [a VGA
+with VESA-compliant BIOS]) or a chipset name (e.g., Trident). Direct detection
+of chipsets is turned off by default (see CONFIG_VIDEO_SVGA in chapter 4 to see
+how to enable it if you really want) as it's inherently unreliable due to
+absolutely insane PC design.
+
+ "0 0F00 80x25" means that the first menu item (the menu items are numbered
+from "0" to "9" and from "a" to "z") is a 80x25 mode with ID=0x0f00 (see the
+next section for a description of mode IDs).
+
+ <flashing-cursor-here> encourages you to enter the item number or mode ID
+you wish to set and press <RETURN>. If the computer complains something about
+"Unknown mode ID", it is trying to tell you that it isn't possible to set such
+a mode. It's also possible to press only <RETURN> which leaves the current mode.
+
+ The mode list usually contains a few basic modes and some VESA modes. In
+case your chipset has been detected, some chipset-specific modes are shown as
+well (some of these might be missing or unusable on your machine as different
+BIOSes are often shipped with the same card and the mode numbers depend purely
+on the VGA BIOS).
+
+ The modes displayed on the menu are partially sorted: The list starts with
+the standard modes (80x25 and 80x50) followed by "special" modes (80x28 and
+80x43), local modes (if the local modes feature is enabled), VESA modes and
+finally SVGA modes for the auto-detected adapter.
+
+ If you are not happy with the mode list offered (e.g., if you think your card
+is able to do more), you can enter "scan" instead of item number / mode ID. The
+program will try to ask the BIOS for all possible video mode numbers and test
+what happens then. The screen will be probably flashing wildly for some time and
+strange noises will be heard from inside the monitor and so on and then, really
+all consistent video modes supported by your BIOS will appear (plus maybe some
+`ghost modes'). If you are afraid this could damage your monitor, don't use this
+function.
+
+ After scanning, the mode ordering is a bit different: the auto-detected SVGA
+modes are not listed at all and the modes revealed by `scan' are shown before
+all VESA modes.
+
+3. Mode IDs
+~~~~~~~~~~~
+ Because of the complexity of all the video stuff, the video mode IDs
+used here are also a bit complex. A video mode ID is a 16-bit number usually
+expressed in a hexadecimal notation (starting with "0x"). You can set a mode
+by entering its mode directly if you know it even if it isn't shown on the menu.
+
+The ID numbers can be divided to three regions:
+
+ 0x0000 to 0x00ff - menu item references. 0x0000 is the first item. Don't use
+ outside the menu as this can change from boot to boot (especially if you
+ have used the `scan' feature).
+
+ 0x0100 to 0x017f - standard BIOS modes. The ID is a BIOS video mode number
+ (as presented to INT 10, function 00) increased by 0x0100.
+
+ 0x0200 to 0x08ff - VESA BIOS modes. The ID is a VESA mode ID increased by
+ 0x0100. All VESA modes should be autodetected and shown on the menu.
+
+ 0x0900 to 0x09ff - Video7 special modes. Set by calling INT 0x10, AX=0x6f05.
+ (Usually 940=80x43, 941=132x25, 942=132x44, 943=80x60, 944=100x60,
+ 945=132x28 for the standard Video7 BIOS)
+
+ 0x0f00 to 0x0fff - special modes (they are set by various tricks -- usually
+ by modifying one of the standard modes). Currently available:
+ 0x0f00 standard 80x25, don't reset mode if already set (=FFFF)
+ 0x0f01 standard with 8-point font: 80x43 on EGA, 80x50 on VGA
+ 0x0f02 VGA 80x43 (VGA switched to 350 scanlines with a 8-point font)
+ 0x0f03 VGA 80x28 (standard VGA scans, but 14-point font)
+ 0x0f04 leave current video mode
+ 0x0f05 VGA 80x30 (480 scans, 16-point font)
+ 0x0f06 VGA 80x34 (480 scans, 14-point font)
+ 0x0f07 VGA 80x60 (480 scans, 8-point font)
+ 0x0f08 Graphics hack (see the CONFIG_VIDEO_HACK paragraph below)
+
+ 0x1000 to 0x7fff - modes specified by resolution. The code has a "0xRRCC"
+ form where RR is a number of rows and CC is a number of columns.
+ E.g., 0x1950 corresponds to a 80x25 mode, 0x2b84 to 132x43 etc.
+ This is the only fully portable way to refer to a non-standard mode,
+ but it relies on the mode being found and displayed on the menu
+ (remember that mode scanning is not done automatically).
+
+ 0xff00 to 0xffff - aliases for backward compatibility:
+ 0xffff equivalent to 0x0f00 (standard 80x25)
+ 0xfffe equivalent to 0x0f01 (EGA 80x43 or VGA 80x50)
+
+ If you add 0x8000 to the mode ID, the program will try to recalculate
+vertical display timing according to mode parameters, which can be used to
+eliminate some annoying bugs of certain VGA BIOSes (usually those used for
+cards with S3 chipsets and old Cirrus Logic BIOSes) -- mainly extra lines at the
+end of the display.
+
+4. Options
+~~~~~~~~~~
+ Some options can be set in the source text (in arch/i386/boot/video.S).
+All of them are simple #define's -- change them to #undef's when you want to
+switch them off. Currently supported:
+
+ CONFIG_VIDEO_SVGA - enables autodetection of SVGA cards. This is switched
+off by default as it's a bit unreliable due to terribly bad PC design. If you
+really want to have the adapter autodetected (maybe in case the `scan' feature
+doesn't work on your machine), switch this on and don't cry if the results
+are not completely sane. In case you really need this feature, please drop me
+a mail as I think of removing it some day.
+
+ CONFIG_VIDEO_VESA - enables autodetection of VESA modes. If it doesn't work
+on your machine (or displays a "Error: Scanning of VESA modes failed" message),
+you can switch it off and report as a bug.
+
+ CONFIG_VIDEO_COMPACT - enables compacting of the video mode list. If there
+are more modes with the same screen size, only the first one is kept (see above
+for more info on mode ordering). However, in very strange cases it's possible
+that the first "version" of the mode doesn't work although some of the others
+do -- in this case turn this switch off to see the rest.
+
+ CONFIG_VIDEO_RETAIN - enables retaining of screen contents when switching
+video modes. Works only with some boot loaders which leave enough room for the
+buffer. (If you have old LILO, you can adjust heap_end_ptr and loadflags
+in setup.S, but it's better to upgrade the boot loader...)
+
+ CONFIG_VIDEO_LOCAL - enables inclusion of "local modes" in the list. The
+local modes are added automatically to the beginning of the list not depending
+on hardware configuration. The local modes are listed in the source text after
+the "local_mode_table:" line. The comment before this line describes the format
+of the table (which also includes a video card name to be displayed on the
+top of the menu).
+
+ CONFIG_VIDEO_400_HACK - force setting of 400 scan lines for standard VGA
+modes. This option is intended to be used on certain buggy BIOSes which draw
+some useless logo using font download and then fail to reset the correct mode.
+Don't use unless needed as it forces resetting the video card.
+
+ CONFIG_VIDEO_GFX_HACK - includes special hack for setting of graphics modes
+to be used later by special drivers (e.g., 800x600 on IBM ThinkPad -- see
+ftp://ftp.phys.keio.ac.jp/pub/XFree86/800x600/XF86Configs/XF86Config.IBM_TP560).
+Allows to set _any_ BIOS mode including graphic ones and forcing specific
+text screen resolution instead of peeking it from BIOS variables. Don't use
+unless you think you know what you're doing. To activate this setup, use
+mode number 0x0f08 (see section 3).
+
+5. Still doesn't work?
+~~~~~~~~~~~~~~~~~~~~~~
+ When the mode detection doesn't work (e.g., the mode list is incorrect or
+the machine hangs instead of displaying the menu), try to switch off some of
+the configuration options listed in section 4. If it fails, you can still use
+your kernel with the video mode set directly via the kernel parameter.
+
+ In either case, please send me a bug report containing what _exactly_
+happens and how do the configuration switches affect the behaviour of the bug.
+
+ If you start Linux from M$-DOS, you might also use some DOS tools for
+video mode setting. In this case, you must specify the 0x0f04 mode ("leave
+current settings") to Linux, because if you don't and you use any non-standard
+mode, Linux will switch to 80x25 automatically.
+
+ If you set some extended mode and there's one or more extra lines on the
+bottom of the display containing already scrolled-out text, your VGA BIOS
+contains the most common video BIOS bug called "incorrect vertical display
+end setting". Adding 0x8000 to the mode ID might fix the problem. Unfortunately,
+this must be done manually -- no autodetection mechanisms are available.
+
+ If you have a VGA card and your display still looks as on EGA, your BIOS
+is probably broken and you need to set the CONFIG_VIDEO_400_HACK switch to
+force setting of the correct mode.
+
+6. History
+~~~~~~~~~~
+1.0 (??-Nov-95) First version supporting all adapters supported by the old
+ setup.S + Cirrus Logic 54XX. Present in some 1.3.4? kernels
+ and then removed due to instability on some machines.
+2.0 (28-Jan-96) Rewritten from scratch. Cirrus Logic 64XX support added, almost
+ everything is configurable, the VESA support should be much more
+ stable, explicit mode numbering allowed, "scan" implemented etc.
+2.1 (30-Jan-96) VESA modes moved to 0x200-0x3ff. Mode selection by resolution
+ supported. Few bugs fixed. VESA modes are listed prior to
+ modes supplied by SVGA autodetection as they are more reliable.
+ CLGD autodetect works better. Doesn't depend on 80x25 being
+ active when started. Scanning fixed. 80x43 (any VGA) added.
+ Code cleaned up.
+2.2 (01-Feb-96) EGA 80x43 fixed. VESA extended to 0x200-0x4ff (non-standard 02XX
+ VESA modes work now). Display end bug workaround supported.
+ Special modes renumbered to allow adding of the "recalculate"
+ flag, 0xffff and 0xfffe became aliases instead of real IDs.
+ Screen contents retained during mode changes.
+2.3 (15-Mar-96) Changed to work with 1.3.74 kernel.
+2.4 (18-Mar-96) Added patches by Hans Lermen fixing a memory overwrite problem
+ with some boot loaders. Memory management rewritten to reflect
+ these changes. Unfortunately, screen contents retaining works
+ only with some loaders now.
+ Added a Tseng 132x60 mode.
+2.5 (19-Mar-96) Fixed a VESA mode scanning bug introduced in 2.4.
+2.6 (25-Mar-96) Some VESA BIOS errors not reported -- it fixes error reports on
+ several cards with broken VESA code (e.g., ATI VGA).
+2.7 (09-Apr-96) - Accepted all VESA modes in range 0x100 to 0x7ff, because some
+ cards use very strange mode numbers.
+ - Added Realtek VGA modes (thanks to Gonzalo Tornaria).
+ - Hardware testing order slightly changed, tests based on ROM
+ contents done as first.
+ - Added support for special Video7 mode switching functions
+ (thanks to Tom Vander Aa).
+ - Added 480-scanline modes (especially useful for notebooks,
+ original version written by hhanemaa@cs.ruu.nl, patched by
+ Jeff Chua, rewritten by me).
+ - Screen store/restore fixed.
+2.8 (14-Apr-96) - Previous release was not compilable without CONFIG_VIDEO_SVGA.
+ - Better recognition of text modes during mode scan.
+2.9 (12-May-96) - Ignored VESA modes 0x80 - 0xff (more VESA BIOS bugs!)
+2.10 (11-Nov-96)- The whole thing made optional.
+ - Added the CONFIG_VIDEO_400_HACK switch.
+ - Added the CONFIG_VIDEO_GFX_HACK switch.
+ - Code cleanup.
+2.11 (03-May-97)- Yet another cleanup, now including also the documentation.
+ - Direct testing of SVGA adapters turned off by default, `scan'
+ offered explicitly on the prompt line.
+ - Removed the doc section describing adding of new probing
+ functions as I try to get rid of _all_ hardware probing here.
+2.12 (25-May-98)- Added support for VESA frame buffer graphics.
+2.13 (14-May-99)- Minor documentation fixes.
diff --git a/Documentation/sysctl/00-INDEX b/Documentation/sysctl/00-INDEX
new file mode 100644
index 000000000..8cf5d493f
--- /dev/null
+++ b/Documentation/sysctl/00-INDEX
@@ -0,0 +1,16 @@
+00-INDEX
+ - this file.
+README
+ - general information about /proc/sys/ sysctl files.
+abi.txt
+ - documentation for /proc/sys/abi/*.
+fs.txt
+ - documentation for /proc/sys/fs/*.
+kernel.txt
+ - documentation for /proc/sys/kernel/*.
+net.txt
+ - documentation for /proc/sys/net/*.
+sunrpc.txt
+ - documentation for /proc/sys/sunrpc/*.
+vm.txt
+ - documentation for /proc/sys/vm/*.
diff --git a/Documentation/sysctl/README b/Documentation/sysctl/README
new file mode 100644
index 000000000..8c3306e01
--- /dev/null
+++ b/Documentation/sysctl/README
@@ -0,0 +1,75 @@
+Documentation for /proc/sys/ kernel version 2.2.10
+ (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+'Why', I hear you ask, 'would anyone even _want_ documentation
+for them sysctl files? If anybody really needs it, it's all in
+the source...'
+
+Well, this documentation is written because some people either
+don't know they need to tweak something, or because they don't
+have the time or knowledge to read the source code.
+
+Furthermore, the programmers who built sysctl have built it to
+be actually used, not just for the fun of programming it :-)
+
+==============================================================
+
+Legal blurb:
+
+As usual, there are two main things to consider:
+1. you get what you pay for
+2. it's free
+
+The consequences are that I won't guarantee the correctness of
+this document, and if you come to me complaining about how you
+screwed up your system because of wrong documentation, I won't
+feel sorry for you. I might even laugh at you...
+
+But of course, if you _do_ manage to screw up your system using
+only the sysctl options used in this file, I'd like to hear of
+it. Not only to have a great laugh, but also to make sure that
+you're the last RTFMing person to screw up.
+
+In short, e-mail your suggestions, corrections and / or horror
+stories to: <riel@nl.linux.org>
+
+Rik van Riel.
+
+==============================================================
+
+Introduction:
+
+Sysctl is a means of configuring certain aspects of the kernel
+at run-time, and the /proc/sys/ directory is there so that you
+don't even need special tools to do it!
+In fact, there are only four things needed to use these config
+facilities:
+- a running Linux system
+- root access
+- common sense (this is especially hard to come by these days)
+- knowledge of what all those values mean
+
+As a quick 'ls /proc/sys' will show, the directory consists of
+several (arch-dependent?) subdirs. Each subdir is mainly about
+one part of the kernel, so you can do configuration on a piece
+by piece basis, or just some 'thematic frobbing'.
+
+The subdirs are about:
+abi/ execution domains & personalities
+debug/ <empty>
+dev/ device specific information (eg dev/cdrom/info)
+fs/ specific filesystems
+ filehandle, inode, dentry and quota tuning
+ binfmt_misc <Documentation/binfmt_misc.txt>
+kernel/ global kernel info / tuning
+ miscellaneous stuff
+net/ networking stuff, for documentation look in:
+ <Documentation/networking/>
+proc/ <empty>
+sunrpc/ SUN Remote Procedure Call (NFS)
+vm/ memory management tuning
+ buffer and cache management
+
+These are the subdirs I have on my system. There might be more
+or other subdirs in another setup. If you see another dir, I'd
+really like to hear about it :-)
diff --git a/Documentation/sysctl/abi.txt b/Documentation/sysctl/abi.txt
new file mode 100644
index 000000000..63f4ebcf6
--- /dev/null
+++ b/Documentation/sysctl/abi.txt
@@ -0,0 +1,54 @@
+Documentation for /proc/sys/abi/* kernel version 2.6.0.test2
+ (c) 2003, Fabian Frederick <ffrederick@users.sourceforge.net>
+
+For general info : README.
+
+==============================================================
+
+This path is binary emulation relevant aka personality types aka abi.
+When a process is executed, it's linked to an exec_domain whose
+personality is defined using values available from /proc/sys/abi.
+You can find further details about abi in include/linux/personality.h.
+
+Here are the files featuring in 2.6 kernel :
+
+- defhandler_coff
+- defhandler_elf
+- defhandler_lcall7
+- defhandler_libcso
+- fake_utsname
+- trace
+
+===========================================================
+defhandler_coff:
+defined value :
+PER_SCOSVR3
+0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS | SHORT_INODE
+
+===========================================================
+defhandler_elf:
+defined value :
+PER_LINUX
+0
+
+===========================================================
+defhandler_lcall7:
+defined value :
+PER_SVR4
+0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+===========================================================
+defhandler_libsco:
+defined value:
+PER_SVR4
+0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
+
+===========================================================
+fake_utsname:
+Unused
+
+===========================================================
+trace:
+Unused
+
+===========================================================
diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
new file mode 100644
index 000000000..88152f214
--- /dev/null
+++ b/Documentation/sysctl/fs.txt
@@ -0,0 +1,300 @@
+Documentation for /proc/sys/fs/* kernel version 2.2.10
+ (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+ (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in README.
+
+==============================================================
+
+This file contains documentation for the sysctl files in
+/proc/sys/fs/ and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files _can_ be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+1. /proc/sys/fs
+----------------------------------------------------------
+
+Currently, these files are in /proc/sys/fs:
+- aio-max-nr
+- aio-nr
+- dentry-state
+- dquot-max
+- dquot-nr
+- file-max
+- file-nr
+- inode-max
+- inode-nr
+- inode-state
+- nr_open
+- overflowuid
+- overflowgid
+- protected_hardlinks
+- protected_symlinks
+- suid_dumpable
+- super-max
+- super-nr
+
+==============================================================
+
+aio-nr & aio-max-nr:
+
+aio-nr is the running total of the number of events specified on the
+io_setup system call for all currently active aio contexts. If aio-nr
+reaches aio-max-nr then io_setup will fail with EAGAIN. Note that
+raising aio-max-nr does not result in the pre-allocation or re-sizing
+of any kernel data structures.
+
+==============================================================
+
+dentry-state:
+
+From linux/fs/dentry.c:
+--------------------------------------------------------------
+struct {
+ int nr_dentry;
+ int nr_unused;
+ int age_limit; /* age in seconds */
+ int want_pages; /* pages requested by system */
+ int dummy[2];
+} dentry_stat = {0, 0, 45, 0,};
+--------------------------------------------------------------
+
+Dentries are dynamically allocated and deallocated, and
+nr_dentry seems to be 0 all the time. Hence it's safe to
+assume that only nr_unused, age_limit and want_pages are
+used. Nr_unused seems to be exactly what its name says.
+Age_limit is the age in seconds after which dcache entries
+can be reclaimed when memory is short and want_pages is
+nonzero when shrink_dcache_pages() has been called and the
+dcache isn't pruned yet.
+
+==============================================================
+
+dquot-max & dquot-nr:
+
+The file dquot-max shows the maximum number of cached disk
+quota entries.
+
+The file dquot-nr shows the number of allocated disk quota
+entries and the number of free disk quota entries.
+
+If the number of free cached disk quotas is very low and
+you have some awesome number of simultaneous system users,
+you might want to raise the limit.
+
+==============================================================
+
+file-max & file-nr:
+
+The value in file-max denotes the maximum number of file-
+handles that the Linux kernel will allocate. When you get lots
+of error messages about running out of file handles, you might
+want to increase this limit.
+
+Historically,the kernel was able to allocate file handles
+dynamically, but not to free them again. The three values in
+file-nr denote the number of allocated file handles, the number
+of allocated but unused file handles, and the maximum number of
+file handles. Linux 2.6 always reports 0 as the number of free
+file handles -- this is not an error, it just means that the
+number of allocated file handles exactly matches the number of
+used file handles.
+
+Attempts to allocate more file descriptors than file-max are
+reported with printk, look for "VFS: file-max limit <number>
+reached".
+==============================================================
+
+nr_open:
+
+This denotes the maximum number of file-handles a process can
+allocate. Default value is 1024*1024 (1048576) which should be
+enough for most machines. Actual limit depends on RLIMIT_NOFILE
+resource limit.
+
+==============================================================
+
+inode-max, inode-nr & inode-state:
+
+As with file handles, the kernel allocates the inode structures
+dynamically, but can't free them yet.
+
+The value in inode-max denotes the maximum number of inode
+handlers. This value should be 3-4 times larger than the value
+in file-max, since stdin, stdout and network sockets also
+need an inode struct to handle them. When you regularly run
+out of inodes, you need to increase this value.
+
+The file inode-nr contains the first two items from
+inode-state, so we'll skip to that file...
+
+Inode-state contains three actual numbers and four dummies.
+The actual numbers are, in order of appearance, nr_inodes,
+nr_free_inodes and preshrink.
+
+Nr_inodes stands for the number of inodes the system has
+allocated, this can be slightly more than inode-max because
+Linux allocates them one pageful at a time.
+
+Nr_free_inodes represents the number of free inodes (?) and
+preshrink is nonzero when the nr_inodes > inode-max and the
+system needs to prune the inode list instead of allocating
+more.
+
+==============================================================
+
+overflowgid & overflowuid:
+
+Some filesystems only support 16-bit UIDs and GIDs, although in Linux
+UIDs and GIDs are 32 bits. When one of these filesystems is mounted
+with writes enabled, any UID or GID that would exceed 65535 is translated
+to a fixed value before being written to disk.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+==============================================================
+
+protected_hardlinks:
+
+A long-standing class of security issues is the hardlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given hardlink (i.e. a
+root process follows a hardlink created by another user). Additionally,
+on systems without separated partitions, this stops unauthorized users
+from "pinning" vulnerable setuid/setgid files against being upgraded by
+the administrator, or linking to special files.
+
+When set to "0", hardlink creation behavior is unrestricted.
+
+When set to "1" hardlinks cannot be created by users if they do not
+already own the source file, or do not have read/write access to it.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+==============================================================
+
+protected_symlinks:
+
+A long-standing class of security issues is the symlink-based
+time-of-check-time-of-use race, most commonly seen in world-writable
+directories like /tmp. The common method of exploitation of this flaw
+is to cross privilege boundaries when following a given symlink (i.e. a
+root process follows a symlink belonging to another user). For a likely
+incomplete list of hundreds of examples across the years, please see:
+http://cve.mitre.org/cgi-bin/cvekey.cgi?keyword=/tmp
+
+When set to "0", symlink following behavior is unrestricted.
+
+When set to "1" symlinks are permitted to be followed only when outside
+a sticky world-writable directory, or when the uid of the symlink and
+follower match, or when the directory owner matches the symlink's owner.
+
+This protection is based on the restrictions in Openwall and grsecurity.
+
+==============================================================
+
+suid_dumpable:
+
+This value can be used to query and set the core dump mode for setuid
+or otherwise protected/tainted binaries. The modes are
+
+0 - (default) - traditional behaviour. Any process which has changed
+ privilege levels or is execute only will not be dumped.
+1 - (debug) - all processes dump core when possible. The core dump is
+ owned by the current user and no security is applied. This is
+ intended for system debugging situations only. Ptrace is unchecked.
+ This is insecure as it allows regular users to examine the memory
+ contents of privileged processes.
+2 - (suidsafe) - any binary which normally would not be dumped is dumped
+ anyway, but only if the "core_pattern" kernel sysctl is set to
+ either a pipe handler or a fully qualified path. (For more details
+ on this limitation, see CVE-2006-2451.) This mode is appropriate
+ when administrators are attempting to debug problems in a normal
+ environment, and either have a core dump pipe handler that knows
+ to treat privileged core dumps with care, or specific directory
+ defined for catching core dumps. If a core dump happens without
+ a pipe handler or fully qualifid path, a message will be emitted
+ to syslog warning about the lack of a correct setting.
+
+==============================================================
+
+super-max & super-nr:
+
+These numbers control the maximum number of superblocks, and
+thus the maximum number of mounted filesystems the kernel
+can have. You only need to increase super-max if you need to
+mount more filesystems than the current value in super-max
+allows you to.
+
+==============================================================
+
+aio-nr & aio-max-nr:
+
+aio-nr shows the current system-wide number of asynchronous io
+requests. aio-max-nr allows you to change the maximum value
+aio-nr can grow to.
+
+==============================================================
+
+
+2. /proc/sys/fs/binfmt_misc
+----------------------------------------------------------
+
+Documentation for the files in /proc/sys/fs/binfmt_misc is
+in Documentation/binfmt_misc.txt.
+
+
+3. /proc/sys/fs/mqueue - POSIX message queues filesystem
+----------------------------------------------------------
+
+The "mqueue" filesystem provides the necessary kernel features to enable the
+creation of a user space library that implements the POSIX message queues
+API (as noted by the MSG tag in the POSIX 1003.1-2001 version of the System
+Interfaces specification.)
+
+The "mqueue" filesystem contains values for determining/setting the amount of
+resources used by the file system.
+
+/proc/sys/fs/mqueue/queues_max is a read/write file for setting/getting the
+maximum number of message queues allowed on the system.
+
+/proc/sys/fs/mqueue/msg_max is a read/write file for setting/getting the
+maximum number of messages in a queue value. In fact it is the limiting value
+for another (user) limit which is set in mq_open invocation. This attribute of
+a queue must be less or equal then msg_max.
+
+/proc/sys/fs/mqueue/msgsize_max is a read/write file for setting/getting the
+maximum message size value (it is every message queue's attribute set during
+its creation).
+
+/proc/sys/fs/mqueue/msg_default is a read/write file for setting/getting the
+default number of messages in a queue value if attr parameter of mq_open(2) is
+NULL. If it exceed msg_max, the default value is initialized msg_max.
+
+/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
+the default message size value if attr parameter of mq_open(2) is NULL. If it
+exceed msgsize_max, the default value is initialized msgsize_max.
+
+4. /proc/sys/fs/epoll - Configuration options for the epoll interface
+--------------------------------------------------------
+
+This directory contains configuration options for the epoll(7) interface.
+
+max_user_watches
+----------------
+
+Every epoll file descriptor can store a number of files to be monitored
+for event readiness. Each one of these monitored files constitutes a "watch".
+This configuration option sets the maximum number of "watches" that are
+allowed for each user.
+Each "watch" costs roughly 90 bytes on a 32bit kernel, and roughly 160 bytes
+on a 64bit one.
+The current default value for max_user_watches is the 1/32 of the available
+low memory, divided for the "watch" cost in bytes.
+
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
new file mode 100644
index 000000000..45542afe7
--- /dev/null
+++ b/Documentation/sysctl/kernel.txt
@@ -0,0 +1,961 @@
+Documentation for /proc/sys/kernel/* kernel version 2.2.10
+ (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+ (c) 2009, Shen Feng<shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in README.
+
+==============================================================
+
+This file contains documentation for the sysctl files in
+/proc/sys/kernel/ and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to tune and monitor
+miscellaneous and general things in the operation of the Linux
+kernel. Since some of the files _can_ be used to screw up your
+system, it is advisable to read both documentation and source
+before actually making adjustments.
+
+Currently, these files might (depending on your configuration)
+show up in /proc/sys/kernel:
+
+- acct
+- acpi_video_flags
+- auto_msgmni
+- bootloader_type [ X86 only ]
+- bootloader_version [ X86 only ]
+- callhome [ S390 only ]
+- cap_last_cap
+- core_pattern
+- core_pipe_limit
+- core_uses_pid
+- ctrl-alt-del
+- dmesg_restrict
+- domainname
+- hostname
+- hotplug
+- hung_task_panic
+- hung_task_check_count
+- hung_task_timeout_secs
+- hung_task_warnings
+- kexec_load_disabled
+- iso_cpu
+- kptr_restrict
+- kstack_depth_to_print [ X86 only ]
+- l2cr [ PPC only ]
+- modprobe ==> Documentation/debugging-modules.txt
+- modules_disabled
+- msg_next_id [ sysv ipc ]
+- msgmax
+- msgmnb
+- msgmni
+- nmi_watchdog
+- osrelease
+- ostype
+- overflowgid
+- overflowuid
+- panic
+- panic_on_oops
+- panic_on_stackoverflow
+- panic_on_unrecovered_nmi
+- panic_on_warn
+- pid_max
+- powersave-nap [ PPC only ]
+- printk
+- printk_delay
+- printk_ratelimit
+- printk_ratelimit_burst
+- randomize_va_space
+- real-root-dev ==> Documentation/initrd.txt
+- reboot-cmd [ SPARC only ]
+- rr_interval
+- rtsig-max
+- rtsig-nr
+- sem
+- sem_next_id [ sysv ipc ]
+- sg-big-buff [ generic SCSI device (sg) ]
+- shm_next_id [ sysv ipc ]
+- shm_rmid_forced
+- shmall
+- shmmax [ sysv ipc ]
+- shmmni
+- softlockup_all_cpu_backtrace
+- soft_watchdog
+- stop-a [ SPARC only ]
+- sysrq ==> Documentation/sysrq.txt
+- sysctl_writes_strict
+- tainted
+- threads-max
+- unknown_nmi_panic
+- watchdog
+- watchdog_thresh
+- version
+
+==============================================================
+
+acct:
+
+highwater lowwater frequency
+
+If BSD-style process accounting is enabled these values control
+its behaviour. If free space on filesystem where the log lives
+goes below <lowwater>% accounting suspends. If free space gets
+above <highwater>% accounting resumes. <Frequency> determines
+how often do we check the amount of free space (value is in
+seconds). Default:
+4 2 30
+That is, suspend accounting if there left <= 2% free; resume it
+if we got >=4%; consider information about amount of free space
+valid for 30 seconds.
+
+==============================================================
+
+acpi_video_flags:
+
+flags
+
+See Doc*/kernel/power/video.txt, it allows mode of video boot to be
+set during run time.
+
+==============================================================
+
+auto_msgmni:
+
+This variable has no effect and may be removed in future kernel
+releases. Reading it always returns 0.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
+upon memory add/remove or upon ipc namespace creation/removal.
+Echoing "1" into this file enabled msgmni automatic recomputing.
+Echoing "0" turned it off. auto_msgmni default value was 1.
+
+
+==============================================================
+
+bootloader_type:
+
+x86 bootloader identification
+
+This gives the bootloader type number as indicated by the bootloader,
+shifted left by 4, and OR'd with the low four bits of the bootloader
+version. The reason for this encoding is that this used to match the
+type_of_loader field in the kernel header; the encoding is kept for
+backwards compatibility. That is, if the full bootloader type number
+is 0x15 and the full version number is 0x234, this file will contain
+the value 340 = 0x154.
+
+See the type_of_loader and ext_loader_type fields in
+Documentation/x86/boot.txt for additional information.
+
+==============================================================
+
+bootloader_version:
+
+x86 bootloader version
+
+The complete bootloader version number. In the example above, this
+file will contain the value 564 = 0x234.
+
+See the type_of_loader and ext_loader_ver fields in
+Documentation/x86/boot.txt for additional information.
+
+==============================================================
+
+callhome:
+
+Controls the kernel's callhome behavior in case of a kernel panic.
+
+The s390 hardware allows an operating system to send a notification
+to a service organization (callhome) in case of an operating system panic.
+
+When the value in this file is 0 (which is the default behavior)
+nothing happens in case of a kernel panic. If this value is set to "1"
+the complete kernel oops message is send to the IBM customer service
+organization in case the mainframe the Linux operating system is running
+on has a service contract with IBM.
+
+==============================================================
+
+cap_last_cap
+
+Highest valid capability of the running kernel. Exports
+CAP_LAST_CAP from the kernel.
+
+==============================================================
+
+core_pattern:
+
+core_pattern is used to specify a core dumpfile pattern name.
+. max length 128 characters; default value is "core"
+. core_pattern is used as a pattern template for the output filename;
+ certain string patterns (beginning with '%') are substituted with
+ their actual values.
+. backward compatibility with core_uses_pid:
+ If core_pattern does not include "%p" (default does not)
+ and core_uses_pid is set, then .PID will be appended to
+ the filename.
+. corename format specifiers:
+ %<NUL> '%' is dropped
+ %% output one '%'
+ %p pid
+ %P global pid (init PID namespace)
+ %i tid
+ %I global tid (init PID namespace)
+ %u uid
+ %g gid
+ %d dump mode, matches PR_SET_DUMPABLE and
+ /proc/sys/fs/suid_dumpable
+ %s signal number
+ %t UNIX time of dump
+ %h hostname
+ %e executable filename (may be shortened)
+ %E executable path
+ %<OTHER> both are dropped
+. If the first character of the pattern is a '|', the kernel will treat
+ the rest of the pattern as a command to run. The core dump will be
+ written to the standard input of that program instead of to a file.
+
+==============================================================
+
+core_pipe_limit:
+
+This sysctl is only applicable when core_pattern is configured to pipe
+core files to a user space helper (when the first character of
+core_pattern is a '|', see above). When collecting cores via a pipe
+to an application, it is occasionally useful for the collecting
+application to gather data about the crashing process from its
+/proc/pid directory. In order to do this safely, the kernel must wait
+for the collecting process to exit, so as not to remove the crashing
+processes proc files prematurely. This in turn creates the
+possibility that a misbehaving userspace collecting process can block
+the reaping of a crashed process simply by never exiting. This sysctl
+defends against that. It defines how many concurrent crashing
+processes may be piped to user space applications in parallel. If
+this value is exceeded, then those crashing processes above that value
+are noted via the kernel log and their cores are skipped. 0 is a
+special value, indicating that unlimited processes may be captured in
+parallel, but that no waiting will take place (i.e. the collecting
+process is not guaranteed access to /proc/<crashing pid>/). This
+value defaults to 0.
+
+==============================================================
+
+core_uses_pid:
+
+The default coredump filename is "core". By setting
+core_uses_pid to 1, the coredump filename becomes core.PID.
+If core_pattern does not include "%p" (default does not)
+and core_uses_pid is set, then .PID will be appended to
+the filename.
+
+==============================================================
+
+ctrl-alt-del:
+
+When the value in this file is 0, ctrl-alt-del is trapped and
+sent to the init(1) program to handle a graceful restart.
+When, however, the value is > 0, Linux's reaction to a Vulcan
+Nerve Pinch (tm) will be an immediate reboot, without even
+syncing its dirty buffers.
+
+Note: when a program (like dosemu) has the keyboard in 'raw'
+mode, the ctrl-alt-del is intercepted by the program before it
+ever reaches the kernel tty layer, and it's up to the program
+to decide what to do with it.
+
+==============================================================
+
+dmesg_restrict:
+
+This toggle indicates whether unprivileged users are prevented
+from using dmesg(8) to view messages from the kernel's log buffer.
+When dmesg_restrict is set to (0) there are no restrictions. When
+dmesg_restrict is set set to (1), users must have CAP_SYSLOG to use
+dmesg(8).
+
+The kernel config option CONFIG_SECURITY_DMESG_RESTRICT sets the
+default value of dmesg_restrict.
+
+==============================================================
+
+domainname & hostname:
+
+These files can be used to set the NIS/YP domainname and the
+hostname of your box in exactly the same way as the commands
+domainname and hostname, i.e.:
+# echo "darkstar" > /proc/sys/kernel/hostname
+# echo "mydomain" > /proc/sys/kernel/domainname
+has the same effect as
+# hostname "darkstar"
+# domainname "mydomain"
+
+Note, however, that the classic darkstar.frop.org has the
+hostname "darkstar" and DNS (Internet Domain Name Server)
+domainname "frop.org", not to be confused with the NIS (Network
+Information Service) or YP (Yellow Pages) domainname. These two
+domain names are in general different. For a detailed discussion
+see the hostname(1) man page.
+
+==============================================================
+
+hotplug:
+
+Path for the hotplug policy agent.
+Default value is "/sbin/hotplug".
+
+==============================================================
+
+hung_task_panic:
+
+Controls the kernel's behavior when a hung task is detected.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0: continue operation. This is the default behavior.
+
+1: panic immediately.
+
+==============================================================
+
+hung_task_check_count:
+
+The upper bound on the number of tasks that are checked.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+==============================================================
+
+hung_task_timeout_secs:
+
+Check interval. When a task in D state did not get scheduled
+for more than this value report a warning.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+0: means infinite timeout - no checking done.
+Possible values to set are in range {0..LONG_MAX/HZ}.
+
+==============================================================
+
+hung_task_warnings:
+
+The maximum number of warnings to report. During a check interval
+if a hung task is detected, this value is decreased by 1.
+When this value reaches 0, no more warnings will be reported.
+This file shows up if CONFIG_DETECT_HUNG_TASK is enabled.
+
+-1: report an infinite number of warnings.
+
+==============================================================
+
+kexec_load_disabled:
+
+A toggle indicating if the kexec_load syscall has been disabled. This
+value defaults to 0 (false: kexec_load enabled), but can be set to 1
+(true: kexec_load disabled). Once true, kexec can no longer be used, and
+the toggle cannot be set back to false. This allows a kexec image to be
+loaded before disabling the syscall, allowing a system to set up (and
+later use) an image without it being altered. Generally used together
+with the "modules_disabled" sysctl.
+
+==============================================================
+
+kptr_restrict:
+
+This toggle indicates whether restrictions are placed on
+exposing kernel addresses via /proc and other interfaces.
+
+When kptr_restrict is set to (0), the default, there are no restrictions.
+
+When kptr_restrict is set to (1), kernel pointers printed using the %pK
+format specifier will be replaced with 0's unless the user has CAP_SYSLOG
+and effective user and group ids are equal to the real ids. This is
+because %pK checks are done at read() time rather than open() time, so
+if permissions are elevated between the open() and the read() (e.g via
+a setuid binary) then %pK will not leak kernel pointers to unprivileged
+users. Note, this is a temporary solution only. The correct long-term
+solution is to do the permission checks at open() time. Consider removing
+world read permissions from files that use %pK, and using dmesg_restrict
+to protect against uses of %pK in dmesg(8) if leaking kernel pointer
+values to unprivileged users is a concern.
+
+When kptr_restrict is set to (2), kernel pointers printed using
+%pK will be replaced with 0's regardless of privileges.
+
+==============================================================
+
+kstack_depth_to_print: (X86 only)
+
+Controls the number of words to print when dumping the raw
+kernel stack.
+
+==============================================================
+
+iso_cpu: (BFS CPU scheduler only).
+
+This sets the percentage cpu that the unprivileged SCHED_ISO tasks can
+run effectively at realtime priority, averaged over a rolling five
+seconds over the -whole- system, meaning all cpus.
+
+Set to 70 (percent) by default.
+
+==============================================================
+
+l2cr: (PPC only)
+
+This flag controls the L2 cache of G3 processor boards. If
+0, the cache is disabled. Enabled if nonzero.
+
+==============================================================
+
+modules_disabled:
+
+A toggle value indicating if modules are allowed to be loaded
+in an otherwise modular kernel. This toggle defaults to off
+(0), but can be set true (1). Once true, modules can be
+neither loaded nor unloaded, and the toggle cannot be set back
+to false. Generally used with the "kexec_load_disabled" toggle.
+
+==============================================================
+
+msg_next_id, sem_next_id, and shm_next_id:
+
+These three toggles allows to specify desired id for next allocated IPC
+object: message, semaphore or shared memory respectively.
+
+By default they are equal to -1, which means generic allocation logic.
+Possible values to set are in range {0..INT_MAX}.
+
+Notes:
+1) kernel doesn't guarantee, that new object will have desired id. So,
+it's up to userspace, how to handle an object with "wrong" id.
+2) Toggle with non-default value will be set back to -1 by kernel after
+successful IPC object allocation.
+
+==============================================================
+
+nmi_watchdog:
+
+This parameter can be used to control the NMI watchdog
+(i.e. the hard lockup detector) on x86 systems.
+
+ 0 - disable the hard lockup detector
+ 1 - enable the hard lockup detector
+
+The hard lockup detector monitors each CPU for its ability to respond to
+timer interrupts. The mechanism utilizes CPU performance counter registers
+that are programmed to generate Non-Maskable Interrupts (NMIs) periodically
+while a CPU is busy. Hence, the alternative name 'NMI watchdog'.
+
+The NMI watchdog is disabled by default if the kernel is running as a guest
+in a KVM virtual machine. This default can be overridden by adding
+
+ nmi_watchdog=1
+
+to the guest kernel command line (see Documentation/kernel-parameters.txt).
+
+==============================================================
+
+numa_balancing
+
+Enables/disables automatic page fault based NUMA memory
+balancing. Memory is moved automatically to nodes
+that access it often.
+
+Enables/disables automatic NUMA memory balancing. On NUMA machines, there
+is a performance penalty if remote memory is accessed by a CPU. When this
+feature is enabled the kernel samples what task thread is accessing memory
+by periodically unmapping pages and later trapping a page fault. At the
+time of the page fault, it is determined if the data being accessed should
+be migrated to a local memory node.
+
+The unmapping of pages and trapping faults incur additional overhead that
+ideally is offset by improved memory locality but there is no universal
+guarantee. If the target workload is already bound to NUMA nodes then this
+feature should be disabled. Otherwise, if the system overhead from the
+feature is too high then the rate the kernel samples for NUMA hinting
+faults may be controlled by the numa_balancing_scan_period_min_ms,
+numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
+numa_balancing_scan_size_mb, and numa_balancing_settle_count sysctls.
+
+==============================================================
+
+numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms,
+numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
+
+Automatic NUMA balancing scans tasks address space and unmaps pages to
+detect if pages are properly placed or if the data should be migrated to a
+memory node local to where the task is running. Every "scan delay" the task
+scans the next "scan size" number of pages in its address space. When the
+end of the address space is reached the scanner restarts from the beginning.
+
+In combination, the "scan delay" and "scan size" determine the scan rate.
+When "scan delay" decreases, the scan rate increases. The scan delay and
+hence the scan rate of every task is adaptive and depends on historical
+behaviour. If pages are properly placed then the scan delay increases,
+otherwise the scan delay decreases. The "scan size" is not adaptive but
+the higher the "scan size", the higher the scan rate.
+
+Higher scan rates incur higher system overhead as page faults must be
+trapped and potentially data must be migrated. However, the higher the scan
+rate, the more quickly a tasks memory is migrated to a local node if the
+workload pattern changes and minimises performance impact due to remote
+memory accesses. These sysctls control the thresholds for scan delays and
+the number of pages scanned.
+
+numa_balancing_scan_period_min_ms is the minimum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the maximum scanning
+rate for each task.
+
+numa_balancing_scan_delay_ms is the starting "scan delay" used for a task
+when it initially forks.
+
+numa_balancing_scan_period_max_ms is the maximum time in milliseconds to
+scan a tasks virtual memory. It effectively controls the minimum scanning
+rate for each task.
+
+numa_balancing_scan_size_mb is how many megabytes worth of pages are
+scanned for a given scan.
+
+==============================================================
+
+osrelease, ostype & version:
+
+# cat osrelease
+2.1.88
+# cat ostype
+Linux
+# cat version
+#5 Wed Feb 25 21:49:24 MET 1998
+
+The files osrelease and ostype should be clear enough. Version
+needs a little more clarification however. The '#5' means that
+this is the fifth kernel built from this source base and the
+date behind it indicates the time the kernel was built.
+The only way to tune these values is to rebuild the kernel :-)
+
+==============================================================
+
+overflowgid & overflowuid:
+
+if your architecture did not always support 32-bit UIDs (i.e. arm,
+i386, m68k, sh, and sparc32), a fixed UID and GID will be returned to
+applications that use the old 16-bit UID/GID system calls, if the
+actual UID or GID would exceed 65535.
+
+These sysctls allow you to change the value of the fixed UID and GID.
+The default is 65534.
+
+==============================================================
+
+panic:
+
+The value in this file represents the number of seconds the kernel
+waits before rebooting on a panic. When you use the software watchdog,
+the recommended setting is 60.
+
+==============================================================
+
+panic_on_oops:
+
+Controls the kernel's behaviour when an oops or BUG is encountered.
+
+0: try to continue operation
+
+1: panic immediately. If the `panic' sysctl is also non-zero then the
+ machine will be rebooted.
+
+==============================================================
+
+panic_on_stackoverflow:
+
+Controls the kernel's behavior when detecting the overflows of
+kernel, IRQ and exception stacks except a user stack.
+This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
+
+0: try to continue operation.
+
+1: panic immediately.
+
+==============================================================
+
+panic_on_unrecovered_nmi:
+
+The default Linux behaviour on an NMI of either memory or unknown is
+to continue operation. For many environments such as scientific
+computing it is preferable that the box is taken out and the error
+dealt with than an uncorrected parity/ECC error get propagated.
+
+A small number of systems do generate NMI's for bizarre random reasons
+such as power management so the default is off. That sysctl works like
+the existing panic controls already in that directory.
+
+==============================================================
+
+panic_on_warn:
+
+Calls panic() in the WARN() path when set to 1. This is useful to avoid
+a kernel rebuild when attempting to kdump at the location of a WARN().
+
+0: only WARN(), default behaviour.
+
+1: call panic() after printing out WARN() location.
+
+==============================================================
+
+perf_cpu_time_max_percent:
+
+Hints to the kernel how much CPU time it should be allowed to
+use to handle perf sampling events. If the perf subsystem
+is informed that its samples are exceeding this limit, it
+will drop its sampling frequency to attempt to reduce its CPU
+usage.
+
+Some perf sampling happens in NMIs. If these samples
+unexpectedly take too long to execute, the NMIs can become
+stacked up next to each other so much that nothing else is
+allowed to execute.
+
+0: disable the mechanism. Do not monitor or correct perf's
+ sampling rate no matter how CPU time it takes.
+
+1-100: attempt to throttle perf's sample rate to this
+ percentage of CPU. Note: the kernel calculates an
+ "expected" length of each sample event. 100 here means
+ 100% of that expected length. Even if this is set to
+ 100, you may still see sample throttling if this
+ length is exceeded. Set to 0 if you truly do not care
+ how much CPU is consumed.
+
+==============================================================
+
+
+pid_max:
+
+PID allocation wrap value. When the kernel's next PID value
+reaches this value, it wraps back to a minimum PID value.
+PIDs of value pid_max or larger are not allocated.
+
+==============================================================
+
+ns_last_pid:
+
+The last pid allocated in the current (the one task using this sysctl
+lives in) pid namespace. When selecting a pid for a next task on fork
+kernel tries to allocate a number starting from this one.
+
+==============================================================
+
+powersave-nap: (PPC only)
+
+If set, Linux-PPC will use the 'nap' mode of powersaving,
+otherwise the 'doze' mode will be used.
+
+==============================================================
+
+printk:
+
+The four values in printk denote: console_loglevel,
+default_message_loglevel, minimum_console_loglevel and
+default_console_loglevel respectively.
+
+These values influence printk() behavior when printing or
+logging error messages. See 'man 2 syslog' for more info on
+the different loglevels.
+
+- console_loglevel: messages with a higher priority than
+ this will be printed to the console
+- default_message_loglevel: messages without an explicit priority
+ will be printed with this priority
+- minimum_console_loglevel: minimum (highest) value to which
+ console_loglevel can be set
+- default_console_loglevel: default value for console_loglevel
+
+==============================================================
+
+printk_delay:
+
+Delay each printk message in printk_delay milliseconds
+
+Value from 0 - 10000 is allowed.
+
+==============================================================
+
+printk_ratelimit:
+
+Some warning messages are rate limited. printk_ratelimit specifies
+the minimum length of time between these messages (in jiffies), by
+default we allow one every 5 seconds.
+
+A value of 0 will disable rate limiting.
+
+==============================================================
+
+printk_ratelimit_burst:
+
+While long term we enforce one message per printk_ratelimit
+seconds, we do allow a burst of messages to pass through.
+printk_ratelimit_burst specifies the number of messages we can
+send before ratelimiting kicks in.
+
+==============================================================
+
+randomize_va_space:
+
+This option can be used to select the type of process address
+space randomization that is used in the system, for architectures
+that support this feature.
+
+0 - Turn the process address space randomization off. This is the
+ default for architectures that do not support this feature anyways,
+ and kernels that are booted with the "norandmaps" parameter.
+
+1 - Make the addresses of mmap base, stack and VDSO page randomized.
+ This, among other things, implies that shared libraries will be
+ loaded to random addresses. Also for PIE-linked binaries, the
+ location of code start is randomized. This is the default if the
+ CONFIG_COMPAT_BRK option is enabled.
+
+2 - Additionally enable heap randomization. This is the default if
+ CONFIG_COMPAT_BRK is disabled.
+
+ There are a few legacy applications out there (such as some ancient
+ versions of libc.so.5 from 1996) that assume that brk area starts
+ just after the end of the code+bss. These applications break when
+ start of the brk area is randomized. There are however no known
+ non-legacy applications that would be broken this way, so for most
+ systems it is safe to choose full randomization.
+
+ Systems with ancient and/or broken binaries should be configured
+ with CONFIG_COMPAT_BRK enabled, which excludes the heap from process
+ address space randomization.
+
+==============================================================
+
+reboot-cmd: (Sparc only)
+
+??? This seems to be a way to give an argument to the Sparc
+ROM/Flash boot loader. Maybe to tell it what to do after
+rebooting. ???
+
+==============================================================
+
+rr_interval: (BFS CPU scheduler only)
+
+This is the smallest duration that any cpu process scheduling unit
+will run for. Increasing this value can increase throughput of cpu
+bound tasks substantially but at the expense of increased latencies
+overall. Conversely decreasing it will decrease average and maximum
+latencies but at the expense of throughput. This value is in
+milliseconds and the default value chosen depends on the number of
+cpus available at scheduler initialisation with a minimum of 6.
+
+Valid values are from 1-1000.
+
+==============================================================
+
+rtsig-max & rtsig-nr:
+
+The file rtsig-max can be used to tune the maximum number
+of POSIX realtime (queued) signals that can be outstanding
+in the system.
+
+rtsig-nr shows the number of RT signals currently queued.
+
+==============================================================
+
+sg-big-buff:
+
+This file shows the size of the generic SCSI (sg) buffer.
+You can't tune it just yet, but you could change it on
+compile time by editing include/scsi/sg.h and changing
+the value of SG_BIG_BUFF.
+
+There shouldn't be any reason to change this value. If
+you can come up with one, you probably know what you
+are doing anyway :)
+
+==============================================================
+
+shmall:
+
+This parameter sets the total amount of shared memory pages that
+can be used system wide. Hence, SHMALL should always be at least
+ceil(shmmax/PAGE_SIZE).
+
+If you are not sure what the default PAGE_SIZE is on your Linux
+system, you can run the following command:
+
+# getconf PAGE_SIZE
+
+==============================================================
+
+shmmax:
+
+This value can be used to query and set the run time limit
+on the maximum shared memory segment size that can be created.
+Shared memory segments up to 1Gb are now supported in the
+kernel. This value defaults to SHMMAX.
+
+==============================================================
+
+shm_rmid_forced:
+
+Linux lets you set resource limits, including how much memory one
+process can consume, via setrlimit(2). Unfortunately, shared memory
+segments are allowed to exist without association with any process, and
+thus might not be counted against any resource limits. If enabled,
+shared memory segments are automatically destroyed when their attach
+count becomes zero after a detach or a process termination. It will
+also destroy segments that were created, but never attached to, on exit
+from the process. The only use left for IPC_RMID is to immediately
+destroy an unattached segment. Of course, this breaks the way things are
+defined, so some applications might stop working. Note that this
+feature will do you no good unless you also configure your resource
+limits (in particular, RLIMIT_AS and RLIMIT_NPROC). Most systems don't
+need this.
+
+Note that if you change this from 0 to 1, already created segments
+without users and with a dead originative process will be destroyed.
+
+==============================================================
+
+sysctl_writes_strict:
+
+Control how file position affects the behavior of updating sysctl values
+via the /proc/sys interface:
+
+ -1 - Legacy per-write sysctl value handling, with no printk warnings.
+ Each write syscall must fully contain the sysctl value to be
+ written, and multiple writes on the same sysctl file descriptor
+ will rewrite the sysctl value, regardless of file position.
+ 0 - (default) Same behavior as above, but warn about processes that
+ perform writes to a sysctl file descriptor when the file position
+ is not 0.
+ 1 - Respect file position when writing sysctl strings. Multiple writes
+ will append to the sysctl value buffer. Anything past the max length
+ of the sysctl value buffer will be ignored. Writes to numeric sysctl
+ entries must always be at file position 0 and the value must be
+ fully contained in the buffer sent in the write syscall.
+
+==============================================================
+
+softlockup_all_cpu_backtrace:
+
+This value controls the soft lockup detector thread's behavior
+when a soft lockup condition is detected as to whether or not
+to gather further debug information. If enabled, each cpu will
+be issued an NMI and instructed to capture stack trace.
+
+This feature is only applicable for architectures which support
+NMI.
+
+0: do nothing. This is the default behavior.
+
+1: on detection capture more debug information.
+
+==============================================================
+
+soft_watchdog
+
+This parameter can be used to control the soft lockup detector.
+
+ 0 - disable the soft lockup detector
+ 1 - enable the soft lockup detector
+
+The soft lockup detector monitors CPUs for threads that are hogging the CPUs
+without rescheduling voluntarily, and thus prevent the 'watchdog/N' threads
+from running. The mechanism depends on the CPUs ability to respond to timer
+interrupts which are needed for the 'watchdog/N' threads to be woken up by
+the watchdog timer function, otherwise the NMI watchdog - if enabled - can
+detect a hard lockup condition.
+
+==============================================================
+
+tainted:
+
+Non-zero if the kernel has been tainted. Numeric values, which
+can be ORed together:
+
+ 1 - A module with a non-GPL license has been loaded, this
+ includes modules with no license.
+ Set by modutils >= 2.4.9 and module-init-tools.
+ 2 - A module was force loaded by insmod -f.
+ Set by modutils >= 2.4.9 and module-init-tools.
+ 4 - Unsafe SMP processors: SMP with CPUs not designed for SMP.
+ 8 - A module was forcibly unloaded from the system by rmmod -f.
+ 16 - A hardware machine check error occurred on the system.
+ 32 - A bad page was discovered on the system.
+ 64 - The user has asked that the system be marked "tainted". This
+ could be because they are running software that directly modifies
+ the hardware, or for other reasons.
+ 128 - The system has died.
+ 256 - The ACPI DSDT has been overridden with one supplied by the user
+ instead of using the one provided by the hardware.
+ 512 - A kernel warning has occurred.
+1024 - A module from drivers/staging was loaded.
+2048 - The system is working around a severe firmware bug.
+4096 - An out-of-tree module has been loaded.
+8192 - An unsigned module has been loaded in a kernel supporting module
+ signature.
+16384 - A soft lockup has previously occurred on the system.
+32768 - The kernel has been live patched.
+
+==============================================================
+
+threads-max
+
+This value controls the maximum number of threads that can be created
+using fork().
+
+During initialization the kernel sets this value such that even if the
+maximum number of threads is created, the thread structures occupy only
+a part (1/8th) of the available RAM pages.
+
+The minimum value that can be written to threads-max is 20.
+The maximum value that can be written to threads-max is given by the
+constant FUTEX_TID_MASK (0x3fffffff).
+If a value outside of this range is written to threads-max an error
+EINVAL occurs.
+
+The value written is checked against the available RAM pages. If the
+thread structures would occupy too much (more than 1/8th) of the
+available RAM pages threads-max is reduced accordingly.
+
+==============================================================
+
+unknown_nmi_panic:
+
+The value in this file affects behavior of handling NMI. When the
+value is non-zero, unknown NMI is trapped and then panic occurs. At
+that time, kernel debugging information is displayed on console.
+
+NMI switch that most IA32 servers have fires unknown NMI up, for
+example. If a system hangs up, try pressing the NMI switch.
+
+==============================================================
+
+watchdog:
+
+This parameter can be used to disable or enable the soft lockup detector
+_and_ the NMI watchdog (i.e. the hard lockup detector) at the same time.
+
+ 0 - disable both lockup detectors
+ 1 - enable both lockup detectors
+
+The soft lockup detector and the NMI watchdog can also be disabled or
+enabled individually, using the soft_watchdog and nmi_watchdog parameters.
+If the watchdog parameter is read, for example by executing
+
+ cat /proc/sys/kernel/watchdog
+
+the output of this command (0 or 1) shows the logical OR of soft_watchdog
+and nmi_watchdog.
+
+==============================================================
+
+watchdog_thresh:
+
+This value can be used to control the frequency of hrtimer and NMI
+events and the soft and hard lockup thresholds. The default threshold
+is 10 seconds.
+
+The softlockup threshold is (2 * watchdog_thresh). Setting this
+tunable to zero will disable lockup detection altogether.
+
+==============================================================
diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
new file mode 100644
index 000000000..6294b5186
--- /dev/null
+++ b/Documentation/sysctl/net.txt
@@ -0,0 +1,304 @@
+Documentation for /proc/sys/net/*
+ (c) 1999 Terrehon Bowden <terrehon@pacbell.net>
+ Bodo Bauer <bb@ricochet.net>
+ (c) 2000 Jorge Nerin <comandante@zaralinux.com>
+ (c) 2009 Shen Feng <shen@cn.fujitsu.com>
+
+For general info and legal blurb, please look in README.
+
+==============================================================
+
+This file contains the documentation for the sysctl files in
+/proc/sys/net
+
+The interface to the networking parts of the kernel is located in
+/proc/sys/net. The following table shows all possible subdirectories. You may
+see only some of them, depending on your kernel's configuration.
+
+
+Table : Subdirectories in /proc/sys/net
+..............................................................................
+ Directory Content Directory Content
+ core General parameter appletalk Appletalk protocol
+ unix Unix domain sockets netrom NET/ROM
+ 802 E802 protocol ax25 AX25
+ ethernet Ethernet protocol rose X.25 PLP layer
+ ipv4 IP version 4 x25 X.25 protocol
+ ipx IPX token-ring IBM token ring
+ bridge Bridging decnet DEC net
+ ipv6 IP version 6 tipc TIPC
+..............................................................................
+
+1. /proc/sys/net/core - Network core options
+-------------------------------------------------------
+
+bpf_jit_enable
+--------------
+
+This enables Berkeley Packet Filter Just in Time compiler.
+Currently supported on x86_64 architecture, bpf_jit provides a framework
+to speed packet filtering, the one used by tcpdump/libpcap for example.
+Values :
+ 0 - disable the JIT (default value)
+ 1 - enable the JIT
+ 2 - enable the JIT and ask the compiler to emit traces on kernel log.
+
+dev_weight
+--------------
+
+The maximum number of packets that kernel can handle on a NAPI interrupt,
+it's a Per-CPU variable.
+Default: 64
+
+default_qdisc
+--------------
+
+The default queuing discipline to use for network devices. This allows
+overriding the default queue discipline of pfifo_fast with an
+alternative. Since the default queuing discipline is created with the
+no additional parameters so is best suited to queuing disciplines that
+work well without configuration like stochastic fair queue (sfq),
+CoDel (codel) or fair queue CoDel (fq_codel). Don't use queuing disciplines
+like Hierarchical Token Bucket or Deficit Round Robin which require setting
+up classes and bandwidths.
+Default: pfifo_fast
+
+busy_read
+----------------
+Low latency busy poll timeout for socket reads. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for packets on the device queue.
+This sets the default value of the SO_BUSY_POLL socket option.
+Can be set or overridden per socket by setting socket option SO_BUSY_POLL,
+which is the preferred method of enabling. If you need to enable the feature
+globally via sysctl, a value of 50 is recommended.
+Will increase power usage.
+Default: 0 (off)
+
+busy_poll
+----------------
+Low latency busy poll timeout for poll and select. (needs CONFIG_NET_RX_BUSY_POLL)
+Approximate time in us to busy loop waiting for events.
+Recommended value depends on the number of sockets you poll on.
+For several sockets 50, for several hundreds 100.
+For more than that you probably want to use epoll.
+Note that only sockets with SO_BUSY_POLL set will be busy polled,
+so you want to either selectively set SO_BUSY_POLL on those sockets or set
+sysctl.net.busy_read globally.
+Will increase power usage.
+Default: 0 (off)
+
+rmem_default
+------------
+
+The default setting of the socket receive buffer in bytes.
+
+rmem_max
+--------
+
+The maximum receive socket buffer size in bytes.
+
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+Default: 1 (on)
+
+
+wmem_default
+------------
+
+The default setting (in bytes) of the socket send buffer.
+
+wmem_max
+--------
+
+The maximum send socket buffer size in bytes.
+
+message_burst and message_cost
+------------------------------
+
+These parameters are used to limit the warning messages written to the kernel
+log from the networking code. They enforce a rate limit to make a
+denial-of-service attack impossible. A higher message_cost factor, results in
+fewer messages that will be written. Message_burst controls when messages will
+be dropped. The default settings limit warning messages to one every five
+seconds.
+
+warnings
+--------
+
+This sysctl is now unused.
+
+This was used to control console messages from the networking stack that
+occur because of problems on the network like duplicate address or bad
+checksums.
+
+These messages are now emitted at KERN_DEBUG and can generally be enabled
+and controlled by the dynamic_debug facility.
+
+netdev_budget
+-------------
+
+Maximum number of packets taken from all interfaces in one polling cycle (NAPI
+poll). In one polling cycle interfaces which are registered to polling are
+probed in a round-robin manner.
+
+netdev_max_backlog
+------------------
+
+Maximum number of packets, queued on the INPUT side, when the interface
+receives packets faster than kernel can process them.
+
+netdev_rss_key
+--------------
+
+RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
+randomly generated.
+Some user space might need to gather its content even if drivers do not
+provide ethtool -x support yet.
+
+myhost:~# cat /proc/sys/net/core/netdev_rss_key
+84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
+
+File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
+Note:
+/proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
+but most drivers only use 40 bytes of it.
+
+myhost:~# ethtool -x eth0
+RX flow hash indirection table for eth0 with 8 RX ring(s):
+ 0: 0 1 2 3 4 5 6 7
+RSS hash key:
+84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
+
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
+optmem_max
+----------
+
+Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence
+of struct cmsghdr structures with appended data.
+
+2. /proc/sys/net/unix - Parameters for Unix domain sockets
+-------------------------------------------------------
+
+There is only one file in this directory.
+unix_dgram_qlen limits the max number of datagrams queued in Unix domain
+socket's buffer. It will not take effect unless PF_UNIX flag is specified.
+
+
+3. /proc/sys/net/ipv4 - IPV4 settings
+-------------------------------------------------------
+Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
+descriptions of these entries.
+
+
+4. Appletalk
+-------------------------------------------------------
+
+The /proc/sys/net/appletalk directory holds the Appletalk configuration data
+when Appletalk is loaded. The configurable parameters are:
+
+aarp-expiry-time
+----------------
+
+The amount of time we keep an ARP entry before expiring it. Used to age out
+old hosts.
+
+aarp-resolve-time
+-----------------
+
+The amount of time we will spend trying to resolve an Appletalk address.
+
+aarp-retransmit-limit
+---------------------
+
+The number of times we will retransmit a query before giving up.
+
+aarp-tick-time
+--------------
+
+Controls the rate at which expires are checked.
+
+The directory /proc/net/appletalk holds the list of active Appletalk sockets
+on a machine.
+
+The fields indicate the DDP type, the local address (in network:node format)
+the remote address, the size of the transmit pending queue, the size of the
+received queue (bytes waiting for applications to read) the state and the uid
+owning the socket.
+
+/proc/net/atalk_iface lists all the interfaces configured for appletalk.It
+shows the name of the interface, its Appletalk address, the network range on
+that address (or network number for phase 1 networks), and the status of the
+interface.
+
+/proc/net/atalk_route lists each known network route. It lists the target
+(network) that the route leads to, the router (may be directly connected), the
+route flags, and the device the route is using.
+
+
+5. IPX
+-------------------------------------------------------
+
+The IPX protocol has no tunable values in proc/sys/net.
+
+The IPX protocol does, however, provide proc/net/ipx. This lists each IPX
+socket giving the local and remote addresses in Novell format (that is
+network:node:port). In accordance with the strange Novell tradition,
+everything but the port is in hex. Not_Connected is displayed for sockets that
+are not tied to a specific remote address. The Tx and Rx queue sizes indicate
+the number of bytes pending for transmission and reception. The state
+indicates the state the socket is in and the uid is the owning uid of the
+socket.
+
+The /proc/net/ipx_interface file lists all IPX interfaces. For each interface
+it gives the network number, the node number, and indicates if the network is
+the primary network. It also indicates which device it is bound to (or
+Internal for internal networks) and the Frame Type if appropriate. Linux
+supports 802.3, 802.2, 802.2 SNAP and DIX (Blue Book) ethernet framing for
+IPX.
+
+The /proc/net/ipx_route table holds a list of IPX routes. For each route it
+gives the destination network, the router node (or Directly) and the network
+address of the router (or Connected) for internal networks.
+
+6. TIPC
+-------------------------------------------------------
+
+tipc_rmem
+----------
+
+The TIPC protocol now has a tunable for the receive memory, similar to the
+tcp_rmem - i.e. a vector of 3 INTEGERs: (min, default, max)
+
+ # cat /proc/sys/net/tipc/tipc_rmem
+ 4252725 34021800 68043600
+ #
+
+The max value is set to CONN_OVERLOAD_LIMIT, and the default and min values
+are scaled (shifted) versions of that same value. Note that the min value
+is not at this point in time used in any meaningful way, but the triplet is
+preserved in order to be consistent with things like tcp_rmem.
+
+named_timeout
+--------------
+
+TIPC name table updates are distributed asynchronously in a cluster, without
+any form of transaction handling. This means that different race scenarios are
+possible. One such is that a name withdrawal sent out by one node and received
+by another node may arrive after a second, overlapping name publication already
+has been accepted from a third node, although the conflicting updates
+originally may have been issued in the correct sequential order.
+If named_timeout is nonzero, failed topology updates will be placed on a defer
+queue until another event arrives that clears the error, or until the timeout
+expires. Value is in milliseconds.
diff --git a/Documentation/sysctl/sunrpc.txt b/Documentation/sysctl/sunrpc.txt
new file mode 100644
index 000000000..ae1ecac6f
--- /dev/null
+++ b/Documentation/sysctl/sunrpc.txt
@@ -0,0 +1,20 @@
+Documentation for /proc/sys/sunrpc/* kernel version 2.2.10
+ (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+
+For general info and legal blurb, please look in README.
+
+==============================================================
+
+This file contains the documentation for the sysctl files in
+/proc/sys/sunrpc and is valid for Linux kernel version 2.2.
+
+The files in this directory can be used to (re)set the debug
+flags of the SUN Remote Procedure Call (RPC) subsystem in
+the Linux kernel. This stuff is used for NFS, KNFSD and
+maybe a few other things as well.
+
+The files in there are used to control the debugging flags:
+rpc_debug, nfs_debug, nfsd_debug and nlm_debug.
+
+These flags are for kernel hackers only. You should read the
+source code in net/sunrpc/ for more information.
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
new file mode 100644
index 000000000..9832ec52f
--- /dev/null
+++ b/Documentation/sysctl/vm.txt
@@ -0,0 +1,815 @@
+Documentation for /proc/sys/vm/* kernel version 2.6.29
+ (c) 1998, 1999, Rik van Riel <riel@nl.linux.org>
+ (c) 2008 Peter W. Morreale <pmorreale@novell.com>
+
+For general info and legal blurb, please look in README.
+
+==============================================================
+
+This file contains the documentation for the sysctl files in
+/proc/sys/vm and is valid for Linux kernel version 2.6.29.
+
+The files in this directory can be used to tune the operation
+of the virtual memory (VM) subsystem of the Linux kernel and
+the writeout of dirty data to disk.
+
+Default values and initialization routines for most of these
+files can be found in mm/swap.c.
+
+Currently, these files are in /proc/sys/vm:
+
+- admin_reserve_kbytes
+- block_dump
+- compact_memory
+- compact_unevictable_allowed
+- dirty_background_bytes
+- dirty_background_ratio
+- dirty_bytes
+- dirty_expire_centisecs
+- dirty_ratio
+- dirty_writeback_centisecs
+- drop_caches
+- extfrag_threshold
+- hugepages_treat_as_movable
+- hugetlb_shm_group
+- laptop_mode
+- legacy_va_layout
+- lowmem_reserve_ratio
+- max_map_count
+- memory_failure_early_kill
+- memory_failure_recovery
+- min_free_kbytes
+- min_slab_ratio
+- min_unmapped_ratio
+- mmap_min_addr
+- nr_hugepages
+- nr_overcommit_hugepages
+- nr_trim_pages (only if CONFIG_MMU=n)
+- numa_zonelist_order
+- oom_dump_tasks
+- oom_kill_allocating_task
+- overcommit_kbytes
+- overcommit_memory
+- overcommit_ratio
+- page-cluster
+- panic_on_oom
+- percpu_pagelist_fraction
+- stat_interval
+- swappiness
+- user_reserve_kbytes
+- vfs_cache_pressure
+- zone_reclaim_mode
+
+==============================================================
+
+admin_reserve_kbytes
+
+The amount of free memory in the system that should be reserved for users
+with the capability cap_sys_admin.
+
+admin_reserve_kbytes defaults to min(3% of free pages, 8MB)
+
+That should provide enough for the admin to log in and kill a process,
+if necessary, under the default overcommit 'guess' mode.
+
+Systems running under overcommit 'never' should increase this to account
+for the full Virtual Memory Size of programs used to recover. Otherwise,
+root may not be able to log in to recover the system.
+
+How do you calculate a minimum useful reserve?
+
+sshd or login + bash (or some other shell) + top (or ps, kill, etc.)
+
+For overcommit 'guess', we can sum resident set sizes (RSS).
+On x86_64 this is about 8MB.
+
+For overcommit 'never', we can take the max of their virtual sizes (VSZ)
+and add the sum of their RSS.
+On x86_64 this is about 128MB.
+
+Changing this takes effect whenever an application requests memory.
+
+==============================================================
+
+block_dump
+
+block_dump enables block I/O debugging when set to a nonzero value. More
+information on block I/O debugging is in Documentation/laptops/laptop-mode.txt.
+
+==============================================================
+
+compact_memory
+
+Available only when CONFIG_COMPACTION is set. When 1 is written to the file,
+all zones are compacted such that free memory is available in contiguous
+blocks where possible. This can be important for example in the allocation of
+huge pages although processes will also directly compact memory as required.
+
+==============================================================
+
+compact_unevictable_allowed
+
+Available only when CONFIG_COMPACTION is set. When set to 1, compaction is
+allowed to examine the unevictable lru (mlocked pages) for pages to compact.
+This should be used on systems where stalls for minor page faults are an
+acceptable trade for large contiguous free memory. Set to 0 to prevent
+compaction from moving pages that are unevictable. Default value is 1.
+
+==============================================================
+
+dirty_background_bytes
+
+Contains the amount of dirty memory at which the background kernel
+flusher threads will start writeback.
+
+Note: dirty_background_bytes is the counterpart of dirty_background_ratio. Only
+one of them may be specified at a time. When one sysctl is written it is
+immediately taken into account to evaluate the dirty memory limits and the
+other appears as 0 when read.
+
+==============================================================
+
+dirty_background_ratio
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which the background kernel
+flusher threads will start writing out dirty data.
+
+The total avaiable memory is not equal to total system memory.
+
+==============================================================
+
+dirty_bytes
+
+Contains the amount of dirty memory at which a process generating disk writes
+will itself start writeback.
+
+Note: dirty_bytes is the counterpart of dirty_ratio. Only one of them may be
+specified at a time. When one sysctl is written it is immediately taken into
+account to evaluate the dirty memory limits and the other appears as 0 when
+read.
+
+Note: the minimum value allowed for dirty_bytes is two pages (in bytes); any
+value lower than this limit will be ignored and the old configuration will be
+retained.
+
+==============================================================
+
+dirty_expire_centisecs
+
+This tunable is used to define when dirty data is old enough to be eligible
+for writeout by the kernel flusher threads. It is expressed in 100'ths
+of a second. Data which has been dirty in-memory for longer than this
+interval will be written out next time a flusher thread wakes up.
+
+==============================================================
+
+dirty_ratio
+
+Contains, as a percentage of total available memory that contains free pages
+and reclaimable pages, the number of pages at which a process which is
+generating disk writes will itself start writing out dirty data.
+
+The total avaiable memory is not equal to total system memory.
+
+==============================================================
+
+dirty_writeback_centisecs
+
+The kernel flusher threads will periodically wake up and write `old' data
+out to disk. This tunable expresses the interval between those wakeups, in
+100'ths of a second.
+
+Setting this to zero disables periodic writeback altogether.
+
+==============================================================
+
+drop_caches
+
+Writing to this will cause the kernel to drop clean caches, as well as
+reclaimable slab objects like dentries and inodes. Once dropped, their
+memory becomes free.
+
+To free pagecache:
+ echo 1 > /proc/sys/vm/drop_caches
+To free reclaimable slab objects (includes dentries and inodes):
+ echo 2 > /proc/sys/vm/drop_caches
+To free slab objects and pagecache:
+ echo 3 > /proc/sys/vm/drop_caches
+
+This is a non-destructive operation and will not free any dirty objects.
+To increase the number of objects freed by this operation, the user may run
+`sync' prior to writing to /proc/sys/vm/drop_caches. This will minimize the
+number of dirty objects on the system and create more candidates to be
+dropped.
+
+This file is not a means to control the growth of the various kernel caches
+(inodes, dentries, pagecache, etc...) These objects are automatically
+reclaimed by the kernel when memory is needed elsewhere on the system.
+
+Use of this file can cause performance problems. Since it discards cached
+objects, it may cost a significant amount of I/O and CPU to recreate the
+dropped objects, especially if they were under heavy use. Because of this,
+use outside of a testing or debugging environment is not recommended.
+
+You may see informational messages in your kernel log when this file is
+used:
+
+ cat (1234): drop_caches: 3
+
+These are informational only. They do not mean that anything is wrong
+with your system. To disable them, echo 4 (bit 3) into drop_caches.
+
+==============================================================
+
+extfrag_threshold
+
+This parameter affects whether the kernel will compact memory or direct
+reclaim to satisfy a high-order allocation. /proc/extfrag_index shows what
+the fragmentation index for each order is in each zone in the system. Values
+tending towards 0 imply allocations would fail due to lack of memory,
+values towards 1000 imply failures are due to fragmentation and -1 implies
+that the allocation will succeed as long as watermarks are met.
+
+The kernel will not compact memory in a zone if the
+fragmentation index is <= extfrag_threshold. The default value is 500.
+
+==============================================================
+
+hugepages_treat_as_movable
+
+This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
+or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE.
+ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified,
+so this parameter has no effect if used without kernelcore=.
+
+Hugepage migration is now available in some situations which depend on the
+architecture and/or the hugepage size. If a hugepage supports migration,
+allocation from ZONE_MOVABLE is always enabled for the hugepage regardless
+of the value of this parameter.
+IOW, this parameter affects only non-migratable hugepages.
+
+Assuming that hugepages are not migratable in your system, one usecase of
+this parameter is that users can make hugepage pool more extensible by
+enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE
+page reclaim/migration/compaction work more and you can get contiguous
+memory more likely. Note that using ZONE_MOVABLE for non-migratable
+hugepages can do harm to other features like memory hotremove (because
+memory hotremove expects that memory blocks on ZONE_MOVABLE are always
+removable,) so it's a trade-off responsible for the users.
+
+==============================================================
+
+hugetlb_shm_group
+
+hugetlb_shm_group contains group id that is allowed to create SysV
+shared memory segment using hugetlb page.
+
+==============================================================
+
+laptop_mode
+
+laptop_mode is a knob that controls "laptop mode". All the things that are
+controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt.
+
+==============================================================
+
+legacy_va_layout
+
+If non-zero, this sysctl disables the new 32-bit mmap layout - the kernel
+will use the legacy (2.4) layout for all processes.
+
+==============================================================
+
+lowmem_reserve_ratio
+
+For some specialised workloads on highmem machines it is dangerous for
+the kernel to allow process memory to be allocated from the "lowmem"
+zone. This is because that memory could then be pinned via the mlock()
+system call, or by unavailability of swapspace.
+
+And on large highmem machines this lack of reclaimable lowmem memory
+can be fatal.
+
+So the Linux page allocator has a mechanism which prevents allocations
+which _could_ use highmem from using too much lowmem. This means that
+a certain amount of lowmem is defended from the possibility of being
+captured into pinned user memory.
+
+(The same argument applies to the old 16 megabyte ISA DMA region. This
+mechanism will also defend that region from allocations which could use
+highmem or lowmem).
+
+The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is
+in defending these lower zones.
+
+If you have a machine which uses highmem or ISA DMA and your
+applications are using mlock(), or if you are running with no swap then
+you probably should change the lowmem_reserve_ratio setting.
+
+The lowmem_reserve_ratio is an array. You can see them by reading this file.
+-
+% cat /proc/sys/vm/lowmem_reserve_ratio
+256 256 32
+-
+Note: # of this elements is one fewer than number of zones. Because the highest
+ zone's value is not necessary for following calculation.
+
+But, these values are not used directly. The kernel calculates # of protection
+pages for each zones from them. These are shown as array of protection pages
+in /proc/zoneinfo like followings. (This is an example of x86-64 box).
+Each zone has an array of protection pages like this.
+
+-
+Node 0, zone DMA
+ pages free 1355
+ min 3
+ low 3
+ high 4
+ :
+ :
+ numa_other 0
+ protection: (0, 2004, 2004, 2004)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+ pagesets
+ cpu: 0 pcp: 0
+ :
+-
+These protections are added to score to judge whether this zone should be used
+for page allocation or should be reclaimed.
+
+In this example, if normal pages (index=2) are required to this DMA zone and
+watermark[WMARK_HIGH] is used for watermark, the kernel judges this zone should
+not be used because pages_free(1355) is smaller than watermark + protection[2]
+(4 + 2004 = 2008). If this protection value is 0, this zone would be used for
+normal page requirement. If requirement is DMA zone(index=0), protection[0]
+(=0) is used.
+
+zone[i]'s protection[j] is calculated by following expression.
+
+(i < j):
+ zone[i]->protection[j]
+ = (total sums of present_pages from zone[i+1] to zone[j] on the node)
+ / lowmem_reserve_ratio[i];
+(i = j):
+ (should not be protected. = 0;
+(i > j):
+ (not necessary, but looks 0)
+
+The default values of lowmem_reserve_ratio[i] are
+ 256 (if zone[i] means DMA or DMA32 zone)
+ 32 (others).
+As above expression, they are reciprocal number of ratio.
+256 means 1/256. # of protection pages becomes about "0.39%" of total present
+pages of higher zones on the node.
+
+If you would like to protect more pages, smaller values are effective.
+The minimum value is 1 (1/1 -> 100%).
+
+==============================================================
+
+max_map_count:
+
+This file contains the maximum number of memory map areas a process
+may have. Memory map areas are used as a side-effect of calling
+malloc, directly by mmap and mprotect, and also when loading shared
+libraries.
+
+While most applications need less than a thousand maps, certain
+programs, particularly malloc debuggers, may consume lots of them,
+e.g., up to one or two maps per allocation.
+
+The default value is 65536.
+
+=============================================================
+
+memory_failure_early_kill:
+
+Control how to kill processes when uncorrected memory error (typically
+a 2bit error in a memory module) is detected in the background by hardware
+that cannot be handled by the kernel. In some cases (like the page
+still having a valid copy on disk) the kernel will handle the failure
+transparently without affecting any applications. But if there is
+no other uptodate copy of the data it will kill to prevent any data
+corruptions from propagating.
+
+1: Kill all processes that have the corrupted and not reloadable page mapped
+as soon as the corruption is detected. Note this is not supported
+for a few types of pages, like kernel internally allocated data or
+the swap cache, but works for the majority of user pages.
+
+0: Only unmap the corrupted page from all processes and only kill a process
+who tries to access it.
+
+The kill is done using a catchable SIGBUS with BUS_MCEERR_AO, so processes can
+handle this if they want to.
+
+This is only active on architectures/platforms with advanced machine
+check handling and depends on the hardware capabilities.
+
+Applications can override this setting individually with the PR_MCE_KILL prctl
+
+==============================================================
+
+memory_failure_recovery
+
+Enable memory failure recovery (when supported by the platform)
+
+1: Attempt recovery.
+
+0: Always panic on a memory failure.
+
+==============================================================
+
+min_free_kbytes:
+
+This is used to force the Linux VM to keep a minimum number
+of kilobytes free. The VM uses this number to compute a
+watermark[WMARK_MIN] value for each lowmem zone in the system.
+Each lowmem zone gets a number of reserved free pages based
+proportionally on its size.
+
+Some minimal amount of memory is needed to satisfy PF_MEMALLOC
+allocations; if you set this to lower than 1024KB, your system will
+become subtly broken, and prone to deadlock under high loads.
+
+Setting this too high will OOM your machine instantly.
+
+=============================================================
+
+min_slab_ratio:
+
+This is available only on NUMA kernels.
+
+A percentage of the total pages in each zone. On Zone reclaim
+(fallback from the local zone occurs) slabs will be reclaimed if more
+than this percentage of pages in a zone are reclaimable slab pages.
+This insures that the slab growth stays under control even in NUMA
+systems that rarely perform global reclaim.
+
+The default is 5 percent.
+
+Note that slab reclaim is triggered in a per zone / node fashion.
+The process of reclaiming slab memory is currently not node specific
+and may not be fast.
+
+=============================================================
+
+min_unmapped_ratio:
+
+This is available only on NUMA kernels.
+
+This is a percentage of the total pages in each zone. Zone reclaim will
+only occur if more than this percentage of pages are in a state that
+zone_reclaim_mode allows to be reclaimed.
+
+If zone_reclaim_mode has the value 4 OR'd, then the percentage is compared
+against all file-backed unmapped pages including swapcache pages and tmpfs
+files. Otherwise, only unmapped pages backed by normal files but not tmpfs
+files and similar are considered.
+
+The default is 1 percent.
+
+==============================================================
+
+mmap_min_addr
+
+This file indicates the amount of address space which a user process will
+be restricted from mmapping. Since kernel null dereference bugs could
+accidentally operate based on the information in the first couple of pages
+of memory userspace processes should not be allowed to write to them. By
+default this value is set to 0 and no protections will be enforced by the
+security module. Setting this value to something like 64k will allow the
+vast majority of applications to work correctly and provide defense in depth
+against future potential kernel bugs.
+
+==============================================================
+
+nr_hugepages
+
+Change the minimum size of the hugepage pool.
+
+See Documentation/vm/hugetlbpage.txt
+
+==============================================================
+
+nr_overcommit_hugepages
+
+Change the maximum size of the hugepage pool. The maximum is
+nr_hugepages + nr_overcommit_hugepages.
+
+See Documentation/vm/hugetlbpage.txt
+
+==============================================================
+
+nr_trim_pages
+
+This is available only on NOMMU kernels.
+
+This value adjusts the excess page trimming behaviour of power-of-2 aligned
+NOMMU mmap allocations.
+
+A value of 0 disables trimming of allocations entirely, while a value of 1
+trims excess pages aggressively. Any value >= 1 acts as the watermark where
+trimming of allocations is initiated.
+
+The default value is 1.
+
+See Documentation/nommu-mmap.txt for more information.
+
+==============================================================
+
+numa_zonelist_order
+
+This sysctl is only for NUMA.
+'where the memory is allocated from' is controlled by zonelists.
+(This documentation ignores ZONE_HIGHMEM/ZONE_DMA32 for simple explanation.
+ you may be able to read ZONE_DMA as ZONE_DMA32...)
+
+In non-NUMA case, a zonelist for GFP_KERNEL is ordered as following.
+ZONE_NORMAL -> ZONE_DMA
+This means that a memory allocation request for GFP_KERNEL will
+get memory from ZONE_DMA only when ZONE_NORMAL is not available.
+
+In NUMA case, you can think of following 2 types of order.
+Assume 2 node NUMA and below is zonelist of Node(0)'s GFP_KERNEL
+
+(A) Node(0) ZONE_NORMAL -> Node(0) ZONE_DMA -> Node(1) ZONE_NORMAL
+(B) Node(0) ZONE_NORMAL -> Node(1) ZONE_NORMAL -> Node(0) ZONE_DMA.
+
+Type(A) offers the best locality for processes on Node(0), but ZONE_DMA
+will be used before ZONE_NORMAL exhaustion. This increases possibility of
+out-of-memory(OOM) of ZONE_DMA because ZONE_DMA is tend to be small.
+
+Type(B) cannot offer the best locality but is more robust against OOM of
+the DMA zone.
+
+Type(A) is called as "Node" order. Type (B) is "Zone" order.
+
+"Node order" orders the zonelists by node, then by zone within each node.
+Specify "[Nn]ode" for node order
+
+"Zone Order" orders the zonelists by zone type, then by node within each
+zone. Specify "[Zz]one" for zone order.
+
+Specify "[Dd]efault" to request automatic configuration. Autoconfiguration
+will select "node" order in following case.
+(1) if the DMA zone does not exist or
+(2) if the DMA zone comprises greater than 50% of the available memory or
+(3) if any node's DMA zone comprises greater than 70% of its local memory and
+ the amount of local memory is big enough.
+
+Otherwise, "zone" order will be selected. Default order is recommended unless
+this is causing problems for your system/application.
+
+==============================================================
+
+oom_dump_tasks
+
+Enables a system-wide task dump (excluding kernel threads) to be produced
+when the kernel performs an OOM-killing and includes such information as
+pid, uid, tgid, vm size, rss, nr_ptes, nr_pmds, swapents, oom_score_adj
+score, and name. This is helpful to determine why the OOM killer was
+invoked, to identify the rogue task that caused it, and to determine why
+the OOM killer chose the task it did to kill.
+
+If this is set to zero, this information is suppressed. On very
+large systems with thousands of tasks it may not be feasible to dump
+the memory state information for each one. Such systems should not
+be forced to incur a performance penalty in OOM conditions when the
+information may not be desired.
+
+If this is set to non-zero, this information is shown whenever the
+OOM killer actually kills a memory-hogging task.
+
+The default value is 1 (enabled).
+
+==============================================================
+
+oom_kill_allocating_task
+
+This enables or disables killing the OOM-triggering task in
+out-of-memory situations.
+
+If this is set to zero, the OOM killer will scan through the entire
+tasklist and select a task based on heuristics to kill. This normally
+selects a rogue memory-hogging task that frees up a large amount of
+memory when killed.
+
+If this is set to non-zero, the OOM killer simply kills the task that
+triggered the out-of-memory condition. This avoids the expensive
+tasklist scan.
+
+If panic_on_oom is selected, it takes precedence over whatever value
+is used in oom_kill_allocating_task.
+
+The default value is 0.
+
+==============================================================
+
+overcommit_kbytes:
+
+When overcommit_memory is set to 2, the committed address space is not
+permitted to exceed swap plus this amount of physical RAM. See below.
+
+Note: overcommit_kbytes is the counterpart of overcommit_ratio. Only one
+of them may be specified at a time. Setting one disables the other (which
+then appears as 0 when read).
+
+==============================================================
+
+overcommit_memory:
+
+This value contains a flag that enables memory overcommitment.
+
+When this flag is 0, the kernel attempts to estimate the amount
+of free memory left when userspace requests more memory.
+
+When this flag is 1, the kernel pretends there is always enough
+memory until it actually runs out.
+
+When this flag is 2, the kernel uses a "never overcommit"
+policy that attempts to prevent any overcommit of memory.
+Note that user_reserve_kbytes affects this policy.
+
+This feature can be very useful because there are a lot of
+programs that malloc() huge amounts of memory "just-in-case"
+and don't use much of it.
+
+The default value is 0.
+
+See Documentation/vm/overcommit-accounting and
+security/commoncap.c::cap_vm_enough_memory() for more information.
+
+==============================================================
+
+overcommit_ratio:
+
+When overcommit_memory is set to 2, the committed address
+space is not permitted to exceed swap plus this percentage
+of physical RAM. See above.
+
+==============================================================
+
+page-cluster
+
+page-cluster controls the number of pages up to which consecutive pages
+are read in from swap in a single attempt. This is the swap counterpart
+to page cache readahead.
+The mentioned consecutivity is not in terms of virtual/physical addresses,
+but consecutive on swap space - that means they were swapped out together.
+
+It is a logarithmic value - setting it to zero means "1 page", setting
+it to 1 means "2 pages", setting it to 2 means "4 pages", etc.
+Zero disables swap readahead completely.
+
+The default value is three (eight pages at a time). There may be some
+small benefits in tuning this to a different value if your workload is
+swap-intensive.
+
+Lower values mean lower latencies for initial faults, but at the same time
+extra faults and I/O delays for following faults if they would have been part of
+that consecutive pages readahead would have brought in.
+
+=============================================================
+
+panic_on_oom
+
+This enables or disables panic on out-of-memory feature.
+
+If this is set to 0, the kernel will kill some rogue process,
+called oom_killer. Usually, oom_killer can kill rogue processes and
+system will survive.
+
+If this is set to 1, the kernel panics when out-of-memory happens.
+However, if a process limits using nodes by mempolicy/cpusets,
+and those nodes become memory exhaustion status, one process
+may be killed by oom-killer. No panic occurs in this case.
+Because other nodes' memory may be free. This means system total status
+may be not fatal yet.
+
+If this is set to 2, the kernel panics compulsorily even on the
+above-mentioned. Even oom happens under memory cgroup, the whole
+system panics.
+
+The default value is 0.
+1 and 2 are for failover of clustering. Please select either
+according to your policy of failover.
+panic_on_oom=2+kdump gives you very strong tool to investigate
+why oom happens. You can get snapshot.
+
+=============================================================
+
+percpu_pagelist_fraction
+
+This is the fraction of pages at most (high mark pcp->high) in each zone that
+are allocated for each per cpu page list. The min value for this is 8. It
+means that we don't allow more than 1/8th of pages in each zone to be
+allocated in any single per_cpu_pagelist. This entry only changes the value
+of hot per cpu pagelists. User can specify a number like 100 to allocate
+1/100th of each zone to each per cpu page list.
+
+The batch value of each per cpu pagelist is also updated as a result. It is
+set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8)
+
+The initial value is zero. Kernel does not use this value at boot time to set
+the high water marks for each per cpu page list. If the user writes '0' to this
+sysctl, it will revert to this default behavior.
+
+==============================================================
+
+stat_interval
+
+The time interval between which vm statistics are updated. The default
+is 1 second.
+
+==============================================================
+
+swappiness
+
+This control is used to define how aggressive the kernel will swap
+memory pages. Higher values will increase agressiveness, lower values
+decrease the amount of swap. A value of 0 instructs the kernel not to
+initiate swap until the amount of free and file-backed pages is less
+than the high water mark in a zone.
+
+The default value is 60.
+
+==============================================================
+
+- user_reserve_kbytes
+
+When overcommit_memory is set to 2, "never overcommit" mode, reserve
+min(3% of current process size, user_reserve_kbytes) of free memory.
+This is intended to prevent a user from starting a single memory hogging
+process, such that they cannot recover (kill the hog).
+
+user_reserve_kbytes defaults to min(3% of the current process size, 128MB).
+
+If this is reduced to zero, then the user will be allowed to allocate
+all free memory with a single process, minus admin_reserve_kbytes.
+Any subsequent attempts to execute a command will result in
+"fork: Cannot allocate memory".
+
+Changing this takes effect whenever an application requests memory.
+
+==============================================================
+
+vfs_cache_pressure
+------------------
+
+This percentage value controls the tendency of the kernel to reclaim
+the memory which is used for caching of directory and inode objects.
+
+At the default value of vfs_cache_pressure=100 the kernel will attempt to
+reclaim dentries and inodes at a "fair" rate with respect to pagecache and
+swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer
+to retain dentry and inode caches. When vfs_cache_pressure=0, the kernel will
+never reclaim dentries and inodes due to memory pressure and this can easily
+lead to out-of-memory conditions. Increasing vfs_cache_pressure beyond 100
+causes the kernel to prefer to reclaim dentries and inodes.
+
+Increasing vfs_cache_pressure significantly beyond 100 may have negative
+performance impact. Reclaim code needs to take various locks to find freeable
+directory and inode objects. With vfs_cache_pressure=1000, it will look for
+ten times more freeable objects than there are.
+
+==============================================================
+
+zone_reclaim_mode:
+
+Zone_reclaim_mode allows someone to set more or less aggressive approaches to
+reclaim memory when a zone runs out of memory. If it is set to zero then no
+zone reclaim occurs. Allocations will be satisfied from other zones / nodes
+in the system.
+
+This is value ORed together of
+
+1 = Zone reclaim on
+2 = Zone reclaim writes dirty pages out
+4 = Zone reclaim swaps pages
+
+zone_reclaim_mode is disabled by default. For file servers or workloads
+that benefit from having their data cached, zone_reclaim_mode should be
+left disabled as the caching effect is likely to be more important than
+data locality.
+
+zone_reclaim may be enabled if it's known that the workload is partitioned
+such that each partition fits within a NUMA node and that accessing remote
+memory would cause a measurable performance reduction. The page allocator
+will then reclaim easily reusable pages (those page cache pages that are
+currently not used) before allocating off node pages.
+
+Allowing zone reclaim to write out pages stops processes that are
+writing large amounts of data from dirtying pages on other nodes. Zone
+reclaim will write out dirty pages if a zone fills up and so effectively
+throttle the process. This may decrease the performance of a single process
+since it cannot use all of system memory to buffer the outgoing writes
+anymore but it preserve the memory on other nodes so that the performance
+of other processes running on other nodes will not be affected.
+
+Allowing regular swap effectively restricts allocations to the local
+node unless explicitly overridden by memory policies or cpuset
+configurations.
+
+============ End of Document =================================
diff --git a/Documentation/sysfs-rules.txt b/Documentation/sysfs-rules.txt
new file mode 100644
index 000000000..ce60ffa94
--- /dev/null
+++ b/Documentation/sysfs-rules.txt
@@ -0,0 +1,184 @@
+Rules on how to access information in the Linux kernel sysfs
+
+The kernel-exported sysfs exports internal kernel implementation details
+and depends on internal kernel structures and layout. It is agreed upon
+by the kernel developers that the Linux kernel does not provide a stable
+internal API. Therefore, there are aspects of the sysfs interface that
+may not be stable across kernel releases.
+
+To minimize the risk of breaking users of sysfs, which are in most cases
+low-level userspace applications, with a new kernel release, the users
+of sysfs must follow some rules to use an as-abstract-as-possible way to
+access this filesystem. The current udev and HAL programs already
+implement this and users are encouraged to plug, if possible, into the
+abstractions these programs provide instead of accessing sysfs directly.
+
+But if you really do want or need to access sysfs directly, please follow
+the following rules and then your programs should work with future
+versions of the sysfs interface.
+
+- Do not use libsysfs
+ It makes assumptions about sysfs which are not true. Its API does not
+ offer any abstraction, it exposes all the kernel driver-core
+ implementation details in its own API. Therefore it is not better than
+ reading directories and opening the files yourself.
+ Also, it is not actively maintained, in the sense of reflecting the
+ current kernel development. The goal of providing a stable interface
+ to sysfs has failed; it causes more problems than it solves. It
+ violates many of the rules in this document.
+
+- sysfs is always at /sys
+ Parsing /proc/mounts is a waste of time. Other mount points are a
+ system configuration bug you should not try to solve. For test cases,
+ possibly support a SYSFS_PATH environment variable to overwrite the
+ application's behavior, but never try to search for sysfs. Never try
+ to mount it, if you are not an early boot script.
+
+- devices are only "devices"
+ There is no such thing like class-, bus-, physical devices,
+ interfaces, and such that you can rely on in userspace. Everything is
+ just simply a "device". Class-, bus-, physical, ... types are just
+ kernel implementation details which should not be expected by
+ applications that look for devices in sysfs.
+
+ The properties of a device are:
+ o devpath (/devices/pci0000:00/0000:00:1d.1/usb2/2-2/2-2:1.0)
+ - identical to the DEVPATH value in the event sent from the kernel
+ at device creation and removal
+ - the unique key to the device at that point in time
+ - the kernel's path to the device directory without the leading
+ /sys, and always starting with a slash
+ - all elements of a devpath must be real directories. Symlinks
+ pointing to /sys/devices must always be resolved to their real
+ target and the target path must be used to access the device.
+ That way the devpath to the device matches the devpath of the
+ kernel used at event time.
+ - using or exposing symlink values as elements in a devpath string
+ is a bug in the application
+
+ o kernel name (sda, tty, 0000:00:1f.2, ...)
+ - a directory name, identical to the last element of the devpath
+ - applications need to handle spaces and characters like '!' in
+ the name
+
+ o subsystem (block, tty, pci, ...)
+ - simple string, never a path or a link
+ - retrieved by reading the "subsystem"-link and using only the
+ last element of the target path
+
+ o driver (tg3, ata_piix, uhci_hcd)
+ - a simple string, which may contain spaces, never a path or a
+ link
+ - it is retrieved by reading the "driver"-link and using only the
+ last element of the target path
+ - devices which do not have "driver"-link just do not have a
+ driver; copying the driver value in a child device context is a
+ bug in the application
+
+ o attributes
+ - the files in the device directory or files below subdirectories
+ of the same device directory
+ - accessing attributes reached by a symlink pointing to another device,
+ like the "device"-link, is a bug in the application
+
+ Everything else is just a kernel driver-core implementation detail
+ that should not be assumed to be stable across kernel releases.
+
+- Properties of parent devices never belong into a child device.
+ Always look at the parent devices themselves for determining device
+ context properties. If the device 'eth0' or 'sda' does not have a
+ "driver"-link, then this device does not have a driver. Its value is empty.
+ Never copy any property of the parent-device into a child-device. Parent
+ device properties may change dynamically without any notice to the
+ child device.
+
+- Hierarchy in a single device tree
+ There is only one valid place in sysfs where hierarchy can be examined
+ and this is below: /sys/devices.
+ It is planned that all device directories will end up in the tree
+ below this directory.
+
+- Classification by subsystem
+ There are currently three places for classification of devices:
+ /sys/block, /sys/class and /sys/bus. It is planned that these will
+ not contain any device directories themselves, but only flat lists of
+ symlinks pointing to the unified /sys/devices tree.
+ All three places have completely different rules on how to access
+ device information. It is planned to merge all three
+ classification directories into one place at /sys/subsystem,
+ following the layout of the bus directories. All buses and
+ classes, including the converted block subsystem, will show up
+ there.
+ The devices belonging to a subsystem will create a symlink in the
+ "devices" directory at /sys/subsystem/<name>/devices.
+
+ If /sys/subsystem exists, /sys/bus, /sys/class and /sys/block can be
+ ignored. If it does not exist, you always have to scan all three
+ places, as the kernel is free to move a subsystem from one place to
+ the other, as long as the devices are still reachable by the same
+ subsystem name.
+
+ Assuming /sys/class/<subsystem> and /sys/bus/<subsystem>, or
+ /sys/block and /sys/class/block are not interchangeable is a bug in
+ the application.
+
+- Block
+ The converted block subsystem at /sys/class/block or
+ /sys/subsystem/block will contain the links for disks and partitions
+ at the same level, never in a hierarchy. Assuming the block subsystem to
+ contain only disks and not partition devices in the same flat list is
+ a bug in the application.
+
+- "device"-link and <subsystem>:<kernel name>-links
+ Never depend on the "device"-link. The "device"-link is a workaround
+ for the old layout, where class devices are not created in
+ /sys/devices/ like the bus devices. If the link-resolving of a
+ device directory does not end in /sys/devices/, you can use the
+ "device"-link to find the parent devices in /sys/devices/. That is the
+ single valid use of the "device"-link; it must never appear in any
+ path as an element. Assuming the existence of the "device"-link for
+ a device in /sys/devices/ is a bug in the application.
+ Accessing /sys/class/net/eth0/device is a bug in the application.
+
+ Never depend on the class-specific links back to the /sys/class
+ directory. These links are also a workaround for the design mistake
+ that class devices are not created in /sys/devices. If a device
+ directory does not contain directories for child devices, these links
+ may be used to find the child devices in /sys/class. That is the single
+ valid use of these links; they must never appear in any path as an
+ element. Assuming the existence of these links for devices which are
+ real child device directories in the /sys/devices tree is a bug in
+ the application.
+
+ It is planned to remove all these links when all class device
+ directories live in /sys/devices.
+
+- Position of devices along device chain can change.
+ Never depend on a specific parent device position in the devpath,
+ or the chain of parent devices. The kernel is free to insert devices into
+ the chain. You must always request the parent device you are looking for
+ by its subsystem value. You need to walk up the chain until you find
+ the device that matches the expected subsystem. Depending on a specific
+ position of a parent device or exposing relative paths using "../" to
+ access the chain of parents is a bug in the application.
+
+- When reading and writing sysfs device attribute files, avoid dependency
+ on specific error codes wherever possible. This minimizes coupling to
+ the error handling implementation within the kernel.
+
+ In general, failures to read or write sysfs device attributes shall
+ propagate errors wherever possible. Common errors include, but are not
+ limited to:
+
+ -EIO: The read or store operation is not supported, typically returned by
+ the sysfs system itself if the read or store pointer is NULL.
+
+ -ENXIO: The read or store operation failed
+
+ Error codes will not be changed without good reason, and should a change
+ to error codes result in user-space breakage, it will be fixed, or the
+ the offending change will be reverted.
+
+ Userspace applications can, however, expect the format and contents of
+ the attribute files to remain consistent in the absence of a version
+ attribute change in the context of a given attribute.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
new file mode 100644
index 000000000..0e307c948
--- /dev/null
+++ b/Documentation/sysrq.txt
@@ -0,0 +1,255 @@
+Linux Magic System Request Key Hacks
+Documentation for sysrq.c
+
+* What is the magic SysRq key?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is a 'magical' key combo you can hit which the kernel will respond to
+regardless of whatever else it is doing, unless it is completely locked up.
+
+* How do I enable the magic SysRq key?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+You need to say "yes" to 'Magic SysRq key (CONFIG_MAGIC_SYSRQ)' when
+configuring the kernel. When running a kernel with SysRq compiled in,
+/proc/sys/kernel/sysrq controls the functions allowed to be invoked via
+the SysRq key. The default value in this file is set by the
+CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE config symbol, which itself defaults
+to 1. Here is the list of possible values in /proc/sys/kernel/sysrq:
+ 0 - disable sysrq completely
+ 1 - enable all functions of sysrq
+ >1 - bitmask of allowed sysrq functions (see below for detailed function
+ description):
+ 2 = 0x2 - enable control of console logging level
+ 4 = 0x4 - enable control of keyboard (SAK, unraw)
+ 8 = 0x8 - enable debugging dumps of processes etc.
+ 16 = 0x10 - enable sync command
+ 32 = 0x20 - enable remount read-only
+ 64 = 0x40 - enable signalling of processes (term, kill, oom-kill)
+ 128 = 0x80 - allow reboot/poweroff
+ 256 = 0x100 - allow nicing of all RT tasks
+
+You can set the value in the file by the following command:
+ echo "number" >/proc/sys/kernel/sysrq
+
+The number may be written here either as decimal or as hexadecimal
+with the 0x prefix. CONFIG_MAGIC_SYSRQ_DEFAULT_ENABLE must always be
+written in hexadecimal.
+
+Note that the value of /proc/sys/kernel/sysrq influences only the invocation
+via a keyboard. Invocation of any operation via /proc/sysrq-trigger is always
+allowed (by a user with admin privileges).
+
+* How do I use the magic SysRq key?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+On x86 - You press the key combo 'ALT-SysRq-<command key>'. Note - Some
+ keyboards may not have a key labeled 'SysRq'. The 'SysRq' key is
+ also known as the 'Print Screen' key. Also some keyboards cannot
+ handle so many keys being pressed at the same time, so you might
+ have better luck with "press Alt", "press SysRq", "release SysRq",
+ "press <command key>", release everything.
+
+On SPARC - You press 'ALT-STOP-<command key>', I believe.
+
+On the serial console (PC style standard serial ports only) -
+ You send a BREAK, then within 5 seconds a command key. Sending
+ BREAK twice is interpreted as a normal BREAK.
+
+On PowerPC - Press 'ALT - Print Screen (or F13) - <command key>,
+ Print Screen (or F13) - <command key> may suffice.
+
+On other - If you know of the key combos for other architectures, please
+ let me know so I can add them to this section.
+
+On all - write a character to /proc/sysrq-trigger. e.g.:
+
+ echo t > /proc/sysrq-trigger
+
+* What are the 'command' keys?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+'b' - Will immediately reboot the system without syncing or unmounting
+ your disks.
+
+'c' - Will perform a system crash by a NULL pointer dereference.
+ A crashdump will be taken if configured.
+
+'d' - Shows all locks that are held.
+
+'e' - Send a SIGTERM to all processes, except for init.
+
+'f' - Will call oom_kill to kill a memory hog process.
+
+'g' - Used by kgdb (kernel debugger)
+
+'h' - Will display help (actually any other key than those listed
+ here will display help. but 'h' is easy to remember :-)
+
+'i' - Send a SIGKILL to all processes, except for init.
+
+'j' - Forcibly "Just thaw it" - filesystems frozen by the FIFREEZE ioctl.
+
+'k' - Secure Access Key (SAK) Kills all programs on the current virtual
+ console. NOTE: See important comments below in SAK section.
+
+'l' - Shows a stack backtrace for all active CPUs.
+
+'m' - Will dump current memory info to your console.
+
+'n' - Used to make RT tasks nice-able
+
+'o' - Will shut your system off (if configured and supported).
+
+'p' - Will dump the current registers and flags to your console.
+
+'q' - Will dump per CPU lists of all armed hrtimers (but NOT regular
+ timer_list timers) and detailed information about all
+ clockevent devices.
+
+'r' - Turns off keyboard raw mode and sets it to XLATE.
+
+'s' - Will attempt to sync all mounted filesystems.
+
+'t' - Will dump a list of current tasks and their information to your
+ console.
+
+'u' - Will attempt to remount all mounted filesystems read-only.
+
+'v' - Forcefully restores framebuffer console
+'v' - Causes ETM buffer dump [ARM-specific]
+
+'w' - Dumps tasks that are in uninterruptable (blocked) state.
+
+'x' - Used by xmon interface on ppc/powerpc platforms.
+ Show global PMU Registers on sparc64.
+
+'y' - Show global CPU Registers [SPARC-64 specific]
+
+'z' - Dump the ftrace buffer
+
+'0'-'9' - Sets the console log level, controlling which kernel messages
+ will be printed to your console. ('0', for example would make
+ it so that only emergency messages like PANICs or OOPSes would
+ make it to your console.)
+
+* Okay, so what can I use them for?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Well, unraw(r) is very handy when your X server or a svgalib program crashes.
+
+sak(k) (Secure Access Key) is useful when you want to be sure there is no
+trojan program running at console which could grab your password
+when you would try to login. It will kill all programs on given console,
+thus letting you make sure that the login prompt you see is actually
+the one from init, not some trojan program.
+IMPORTANT: In its true form it is not a true SAK like the one in a :IMPORTANT
+IMPORTANT: c2 compliant system, and it should not be mistaken as :IMPORTANT
+IMPORTANT: such. :IMPORTANT
+ It seems others find it useful as (System Attention Key) which is
+useful when you want to exit a program that will not let you switch consoles.
+(For example, X or a svgalib program.)
+
+reboot(b) is good when you're unable to shut down. But you should also
+sync(s) and umount(u) first.
+
+crash(c) can be used to manually trigger a crashdump when the system is hung.
+Note that this just triggers a crash if there is no dump mechanism available.
+
+sync(s) is great when your system is locked up, it allows you to sync your
+disks and will certainly lessen the chance of data loss and fscking. Note
+that the sync hasn't taken place until you see the "OK" and "Done" appear
+on the screen. (If the kernel is really in strife, you may not ever get the
+OK or Done message...)
+
+umount(u) is basically useful in the same ways as sync(s). I generally sync(s),
+umount(u), then reboot(b) when my system locks. It's saved me many a fsck.
+Again, the unmount (remount read-only) hasn't taken place until you see the
+"OK" and "Done" message appear on the screen.
+
+The loglevels '0'-'9' are useful when your console is being flooded with
+kernel messages you do not want to see. Selecting '0' will prevent all but
+the most urgent kernel messages from reaching your console. (They will
+still be logged if syslogd/klogd are alive, though.)
+
+term(e) and kill(i) are useful if you have some sort of runaway process you
+are unable to kill any other way, especially if it's spawning other
+processes.
+
+"just thaw it(j)" is useful if your system becomes unresponsive due to a frozen
+(probably root) filesystem via the FIFREEZE ioctl.
+
+* Sometimes SysRq seems to get 'stuck' after using it, what can I do?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+That happens to me, also. I've found that tapping shift, alt, and control
+on both sides of the keyboard, and hitting an invalid sysrq sequence again
+will fix the problem. (i.e., something like alt-sysrq-z). Switching to another
+virtual console (ALT+Fn) and then back again should also help.
+
+* I hit SysRq, but nothing seems to happen, what's wrong?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+There are some keyboards that produce a different keycode for SysRq than the
+pre-defined value of 99 (see KEY_SYSRQ in include/linux/input.h), or which
+don't have a SysRq key at all. In these cases, run 'showkey -s' to find an
+appropriate scancode sequence, and use 'setkeycodes <sequence> 99' to map
+this sequence to the usual SysRq code (e.g., 'setkeycodes e05b 99'). It's
+probably best to put this command in a boot script. Oh, and by the way, you
+exit 'showkey' by not typing anything for ten seconds.
+
+* I want to add SysRQ key events to a module, how does it work?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+In order to register a basic function with the table, you must first include
+the header 'include/linux/sysrq.h', this will define everything else you need.
+Next, you must create a sysrq_key_op struct, and populate it with A) the key
+handler function you will use, B) a help_msg string, that will print when SysRQ
+prints help, and C) an action_msg string, that will print right before your
+handler is called. Your handler must conform to the prototype in 'sysrq.h'.
+
+After the sysrq_key_op is created, you can call the kernel function
+register_sysrq_key(int key, struct sysrq_key_op *op_p); this will
+register the operation pointed to by 'op_p' at table key 'key',
+if that slot in the table is blank. At module unload time, you must call
+the function unregister_sysrq_key(int key, struct sysrq_key_op *op_p), which
+will remove the key op pointed to by 'op_p' from the key 'key', if and only if
+it is currently registered in that slot. This is in case the slot has been
+overwritten since you registered it.
+
+The Magic SysRQ system works by registering key operations against a key op
+lookup table, which is defined in 'drivers/char/sysrq.c'. This key table has
+a number of operations registered into it at compile time, but is mutable,
+and 2 functions are exported for interface to it:
+ register_sysrq_key and unregister_sysrq_key.
+Of course, never ever leave an invalid pointer in the table. I.e., when
+your module that called register_sysrq_key() exits, it must call
+unregister_sysrq_key() to clean up the sysrq key table entry that it used.
+Null pointers in the table are always safe. :)
+
+If for some reason you feel the need to call the handle_sysrq function from
+within a function called by handle_sysrq, you must be aware that you are in
+a lock (you are also in an interrupt handler, which means don't sleep!), so
+you must call __handle_sysrq_nolock instead.
+
+* When I hit a SysRq key combination only the header appears on the console?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sysrq output is subject to the same console loglevel control as all
+other console output. This means that if the kernel was booted 'quiet'
+as is common on distro kernels the output may not appear on the actual
+console, even though it will appear in the dmesg buffer, and be accessible
+via the dmesg command and to the consumers of /proc/kmsg. As a specific
+exception the header line from the sysrq command is passed to all console
+consumers as if the current loglevel was maximum. If only the header
+is emitted it is almost certain that the kernel loglevel is too low.
+Should you require the output on the console channel then you will need
+to temporarily up the console loglevel using alt-sysrq-8 or:
+
+ echo 8 > /proc/sysrq-trigger
+
+Remember to return the loglevel to normal after triggering the sysrq
+command you are interested in.
+
+* I have more questions, who can I ask?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Just ask them on the linux-kernel mailing list:
+ linux-kernel@vger.kernel.org
+
+* Credits
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Written by Mydraal <vulpyne@vulpyne.net>
+Updated by Adam Sulmicki <adam@cfar.umd.edu>
+Updated by Jeremy M. Dolan <jmd@turbogeek.org> 2001/01/28 10:15:59
+Added to by Crutcher Dunnavant <crutcher+kernel@datastacks.com>
diff --git a/Documentation/target/tcm_mod_builder.py b/Documentation/target/tcm_mod_builder.py
new file mode 100755
index 000000000..2ba71cea0
--- /dev/null
+++ b/Documentation/target/tcm_mod_builder.py
@@ -0,0 +1,968 @@
+#!/usr/bin/python
+# The TCM v4 multi-protocol fabric module generation script for drivers/target/$NEW_MOD
+#
+# Copyright (c) 2010 Rising Tide Systems
+# Copyright (c) 2010 Linux-iSCSI.org
+#
+# Author: nab@kernel.org
+#
+import os, sys
+import subprocess as sub
+import string
+import re
+import optparse
+
+tcm_dir = ""
+
+fabric_ops = []
+fabric_mod_dir = ""
+fabric_mod_port = ""
+fabric_mod_init_port = ""
+
+def tcm_mod_err(msg):
+ print msg
+ sys.exit(1)
+
+def tcm_mod_create_module_subdir(fabric_mod_dir_var):
+
+ if os.path.isdir(fabric_mod_dir_var) == True:
+ return 1
+
+ print "Creating fabric_mod_dir: " + fabric_mod_dir_var
+ ret = os.mkdir(fabric_mod_dir_var)
+ if ret:
+ tcm_mod_err("Unable to mkdir " + fabric_mod_dir_var)
+
+ return
+
+def tcm_mod_build_FC_include(fabric_mod_dir_var, fabric_mod_name):
+ global fabric_mod_port
+ global fabric_mod_init_port
+ buf = ""
+
+ f = fabric_mod_dir_var + "/" + fabric_mod_name + "_base.h"
+ print "Writing file: " + f
+
+ p = open(f, 'w');
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n"
+ buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n"
+ buf += "\n"
+ buf += "struct " + fabric_mod_name + "_nacl {\n"
+ buf += " /* Binary World Wide unique Port Name for FC Initiator Nport */\n"
+ buf += " u64 nport_wwpn;\n"
+ buf += " /* ASCII formatted WWPN for FC Initiator Nport */\n"
+ buf += " char nport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n"
+ buf += " struct se_node_acl se_node_acl;\n"
+ buf += "};\n"
+ buf += "\n"
+ buf += "struct " + fabric_mod_name + "_tpg {\n"
+ buf += " /* FC lport target portal group tag for TCM */\n"
+ buf += " u16 lport_tpgt;\n"
+ buf += " /* Pointer back to " + fabric_mod_name + "_lport */\n"
+ buf += " struct " + fabric_mod_name + "_lport *lport;\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_tpg() */\n"
+ buf += " struct se_portal_group se_tpg;\n"
+ buf += "};\n"
+ buf += "\n"
+ buf += "struct " + fabric_mod_name + "_lport {\n"
+ buf += " /* SCSI protocol the lport is providing */\n"
+ buf += " u8 lport_proto_id;\n"
+ buf += " /* Binary World Wide unique Port Name for FC Target Lport */\n"
+ buf += " u64 lport_wwpn;\n"
+ buf += " /* ASCII formatted WWPN for FC Target Lport */\n"
+ buf += " char lport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_lport() */\n"
+ buf += " struct se_wwn lport_wwn;\n"
+ buf += "};\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+
+ fabric_mod_port = "lport"
+ fabric_mod_init_port = "nport"
+
+ return
+
+def tcm_mod_build_SAS_include(fabric_mod_dir_var, fabric_mod_name):
+ global fabric_mod_port
+ global fabric_mod_init_port
+ buf = ""
+
+ f = fabric_mod_dir_var + "/" + fabric_mod_name + "_base.h"
+ print "Writing file: " + f
+
+ p = open(f, 'w');
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n"
+ buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n"
+ buf += "\n"
+ buf += "struct " + fabric_mod_name + "_nacl {\n"
+ buf += " /* Binary World Wide unique Port Name for SAS Initiator port */\n"
+ buf += " u64 iport_wwpn;\n"
+ buf += " /* ASCII formatted WWPN for Sas Initiator port */\n"
+ buf += " char iport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n"
+ buf += " struct se_node_acl se_node_acl;\n"
+ buf += "};\n\n"
+ buf += "struct " + fabric_mod_name + "_tpg {\n"
+ buf += " /* SAS port target portal group tag for TCM */\n"
+ buf += " u16 tport_tpgt;\n"
+ buf += " /* Pointer back to " + fabric_mod_name + "_tport */\n"
+ buf += " struct " + fabric_mod_name + "_tport *tport;\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_tpg() */\n"
+ buf += " struct se_portal_group se_tpg;\n"
+ buf += "};\n\n"
+ buf += "struct " + fabric_mod_name + "_tport {\n"
+ buf += " /* SCSI protocol the tport is providing */\n"
+ buf += " u8 tport_proto_id;\n"
+ buf += " /* Binary World Wide unique Port Name for SAS Target port */\n"
+ buf += " u64 tport_wwpn;\n"
+ buf += " /* ASCII formatted WWPN for SAS Target port */\n"
+ buf += " char tport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_tport() */\n"
+ buf += " struct se_wwn tport_wwn;\n"
+ buf += "};\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+
+ fabric_mod_port = "tport"
+ fabric_mod_init_port = "iport"
+
+ return
+
+def tcm_mod_build_iSCSI_include(fabric_mod_dir_var, fabric_mod_name):
+ global fabric_mod_port
+ global fabric_mod_init_port
+ buf = ""
+
+ f = fabric_mod_dir_var + "/" + fabric_mod_name + "_base.h"
+ print "Writing file: " + f
+
+ p = open(f, 'w');
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ buf = "#define " + fabric_mod_name.upper() + "_VERSION \"v0.1\"\n"
+ buf += "#define " + fabric_mod_name.upper() + "_NAMELEN 32\n"
+ buf += "\n"
+ buf += "struct " + fabric_mod_name + "_nacl {\n"
+ buf += " /* ASCII formatted InitiatorName */\n"
+ buf += " char iport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_nodeacl() */\n"
+ buf += " struct se_node_acl se_node_acl;\n"
+ buf += "};\n\n"
+ buf += "struct " + fabric_mod_name + "_tpg {\n"
+ buf += " /* iSCSI target portal group tag for TCM */\n"
+ buf += " u16 tport_tpgt;\n"
+ buf += " /* Pointer back to " + fabric_mod_name + "_tport */\n"
+ buf += " struct " + fabric_mod_name + "_tport *tport;\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_tpg() */\n"
+ buf += " struct se_portal_group se_tpg;\n"
+ buf += "};\n\n"
+ buf += "struct " + fabric_mod_name + "_tport {\n"
+ buf += " /* SCSI protocol the tport is providing */\n"
+ buf += " u8 tport_proto_id;\n"
+ buf += " /* ASCII formatted TargetName for IQN */\n"
+ buf += " char tport_name[" + fabric_mod_name.upper() + "_NAMELEN];\n"
+ buf += " /* Returned by " + fabric_mod_name + "_make_tport() */\n"
+ buf += " struct se_wwn tport_wwn;\n"
+ buf += "};\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+
+ fabric_mod_port = "tport"
+ fabric_mod_init_port = "iport"
+
+ return
+
+def tcm_mod_build_base_includes(proto_ident, fabric_mod_dir_val, fabric_mod_name):
+
+ if proto_ident == "FC":
+ tcm_mod_build_FC_include(fabric_mod_dir_val, fabric_mod_name)
+ elif proto_ident == "SAS":
+ tcm_mod_build_SAS_include(fabric_mod_dir_val, fabric_mod_name)
+ elif proto_ident == "iSCSI":
+ tcm_mod_build_iSCSI_include(fabric_mod_dir_val, fabric_mod_name)
+ else:
+ print "Unsupported proto_ident: " + proto_ident
+ sys.exit(1)
+
+ return
+
+def tcm_mod_build_configfs(proto_ident, fabric_mod_dir_var, fabric_mod_name):
+ buf = ""
+
+ f = fabric_mod_dir_var + "/" + fabric_mod_name + "_configfs.c"
+ print "Writing file: " + f
+
+ p = open(f, 'w');
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ buf = "#include <linux/module.h>\n"
+ buf += "#include <linux/moduleparam.h>\n"
+ buf += "#include <linux/version.h>\n"
+ buf += "#include <generated/utsrelease.h>\n"
+ buf += "#include <linux/utsname.h>\n"
+ buf += "#include <linux/init.h>\n"
+ buf += "#include <linux/slab.h>\n"
+ buf += "#include <linux/kthread.h>\n"
+ buf += "#include <linux/types.h>\n"
+ buf += "#include <linux/string.h>\n"
+ buf += "#include <linux/configfs.h>\n"
+ buf += "#include <linux/ctype.h>\n"
+ buf += "#include <asm/unaligned.h>\n\n"
+ buf += "#include <target/target_core_base.h>\n"
+ buf += "#include <target/target_core_fabric.h>\n"
+ buf += "#include <target/target_core_fabric_configfs.h>\n"
+ buf += "#include <target/target_core_configfs.h>\n"
+ buf += "#include <target/configfs_macros.h>\n\n"
+ buf += "#include \"" + fabric_mod_name + "_base.h\"\n"
+ buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n"
+
+ buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops;\n\n"
+
+ buf += "static struct se_node_acl *" + fabric_mod_name + "_make_nodeacl(\n"
+ buf += " struct se_portal_group *se_tpg,\n"
+ buf += " struct config_group *group,\n"
+ buf += " const char *name)\n"
+ buf += "{\n"
+ buf += " struct se_node_acl *se_nacl, *se_nacl_new;\n"
+ buf += " struct " + fabric_mod_name + "_nacl *nacl;\n"
+
+ if proto_ident == "FC" or proto_ident == "SAS":
+ buf += " u64 wwpn = 0;\n"
+
+ buf += " u32 nexus_depth;\n\n"
+ buf += " /* " + fabric_mod_name + "_parse_wwn(name, &wwpn, 1) < 0)\n"
+ buf += " return ERR_PTR(-EINVAL); */\n"
+ buf += " se_nacl_new = " + fabric_mod_name + "_alloc_fabric_acl(se_tpg);\n"
+ buf += " if (!se_nacl_new)\n"
+ buf += " return ERR_PTR(-ENOMEM);\n"
+ buf += "//#warning FIXME: Hardcoded nexus depth in " + fabric_mod_name + "_make_nodeacl()\n"
+ buf += " nexus_depth = 1;\n"
+ buf += " /*\n"
+ buf += " * se_nacl_new may be released by core_tpg_add_initiator_node_acl()\n"
+ buf += " * when converting a NodeACL from demo mode -> explict\n"
+ buf += " */\n"
+ buf += " se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,\n"
+ buf += " name, nexus_depth);\n"
+ buf += " if (IS_ERR(se_nacl)) {\n"
+ buf += " " + fabric_mod_name + "_release_fabric_acl(se_tpg, se_nacl_new);\n"
+ buf += " return se_nacl;\n"
+ buf += " }\n"
+ buf += " /*\n"
+ buf += " * Locate our struct " + fabric_mod_name + "_nacl and set the FC Nport WWPN\n"
+ buf += " */\n"
+ buf += " nacl = container_of(se_nacl, struct " + fabric_mod_name + "_nacl, se_node_acl);\n"
+
+ if proto_ident == "FC" or proto_ident == "SAS":
+ buf += " nacl->" + fabric_mod_init_port + "_wwpn = wwpn;\n"
+
+ buf += " /* " + fabric_mod_name + "_format_wwn(&nacl->" + fabric_mod_init_port + "_name[0], " + fabric_mod_name.upper() + "_NAMELEN, wwpn); */\n\n"
+ buf += " return se_nacl;\n"
+ buf += "}\n\n"
+ buf += "static void " + fabric_mod_name + "_drop_nodeacl(struct se_node_acl *se_acl)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_nacl *nacl = container_of(se_acl,\n"
+ buf += " struct " + fabric_mod_name + "_nacl, se_node_acl);\n"
+ buf += " core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1);\n"
+ buf += " kfree(nacl);\n"
+ buf += "}\n\n"
+
+ buf += "static struct se_portal_group *" + fabric_mod_name + "_make_tpg(\n"
+ buf += " struct se_wwn *wwn,\n"
+ buf += " struct config_group *group,\n"
+ buf += " const char *name)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + "*" + fabric_mod_port + " = container_of(wwn,\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + ", " + fabric_mod_port + "_wwn);\n\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg;\n"
+ buf += " unsigned long tpgt;\n"
+ buf += " int ret;\n\n"
+ buf += " if (strstr(name, \"tpgt_\") != name)\n"
+ buf += " return ERR_PTR(-EINVAL);\n"
+ buf += " if (kstrtoul(name + 5, 10, &tpgt) || tpgt > UINT_MAX)\n"
+ buf += " return ERR_PTR(-EINVAL);\n\n"
+ buf += " tpg = kzalloc(sizeof(struct " + fabric_mod_name + "_tpg), GFP_KERNEL);\n"
+ buf += " if (!tpg) {\n"
+ buf += " printk(KERN_ERR \"Unable to allocate struct " + fabric_mod_name + "_tpg\");\n"
+ buf += " return ERR_PTR(-ENOMEM);\n"
+ buf += " }\n"
+ buf += " tpg->" + fabric_mod_port + " = " + fabric_mod_port + ";\n"
+ buf += " tpg->" + fabric_mod_port + "_tpgt = tpgt;\n\n"
+ buf += " ret = core_tpg_register(&" + fabric_mod_name + "_ops, wwn,\n"
+ buf += " &tpg->se_tpg, tpg,\n"
+ buf += " TRANSPORT_TPG_TYPE_NORMAL);\n"
+ buf += " if (ret < 0) {\n"
+ buf += " kfree(tpg);\n"
+ buf += " return NULL;\n"
+ buf += " }\n"
+ buf += " return &tpg->se_tpg;\n"
+ buf += "}\n\n"
+ buf += "static void " + fabric_mod_name + "_drop_tpg(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n\n"
+ buf += " core_tpg_deregister(se_tpg);\n"
+ buf += " kfree(tpg);\n"
+ buf += "}\n\n"
+
+ buf += "static struct se_wwn *" + fabric_mod_name + "_make_" + fabric_mod_port + "(\n"
+ buf += " struct target_fabric_configfs *tf,\n"
+ buf += " struct config_group *group,\n"
+ buf += " const char *name)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + ";\n"
+
+ if proto_ident == "FC" or proto_ident == "SAS":
+ buf += " u64 wwpn = 0;\n\n"
+
+ buf += " /* if (" + fabric_mod_name + "_parse_wwn(name, &wwpn, 1) < 0)\n"
+ buf += " return ERR_PTR(-EINVAL); */\n\n"
+ buf += " " + fabric_mod_port + " = kzalloc(sizeof(struct " + fabric_mod_name + "_" + fabric_mod_port + "), GFP_KERNEL);\n"
+ buf += " if (!" + fabric_mod_port + ") {\n"
+ buf += " printk(KERN_ERR \"Unable to allocate struct " + fabric_mod_name + "_" + fabric_mod_port + "\");\n"
+ buf += " return ERR_PTR(-ENOMEM);\n"
+ buf += " }\n"
+
+ if proto_ident == "FC" or proto_ident == "SAS":
+ buf += " " + fabric_mod_port + "->" + fabric_mod_port + "_wwpn = wwpn;\n"
+
+ buf += " /* " + fabric_mod_name + "_format_wwn(&" + fabric_mod_port + "->" + fabric_mod_port + "_name[0], " + fabric_mod_name.upper() + "_NAMELEN, wwpn); */\n\n"
+ buf += " return &" + fabric_mod_port + "->" + fabric_mod_port + "_wwn;\n"
+ buf += "}\n\n"
+ buf += "static void " + fabric_mod_name + "_drop_" + fabric_mod_port + "(struct se_wwn *wwn)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = container_of(wwn,\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + ", " + fabric_mod_port + "_wwn);\n"
+ buf += " kfree(" + fabric_mod_port + ");\n"
+ buf += "}\n\n"
+ buf += "static ssize_t " + fabric_mod_name + "_wwn_show_attr_version(\n"
+ buf += " struct target_fabric_configfs *tf,\n"
+ buf += " char *page)\n"
+ buf += "{\n"
+ buf += " return sprintf(page, \"" + fabric_mod_name.upper() + " fabric module %s on %s/%s\"\n"
+ buf += " \"on \"UTS_RELEASE\"\\n\", " + fabric_mod_name.upper() + "_VERSION, utsname()->sysname,\n"
+ buf += " utsname()->machine);\n"
+ buf += "}\n\n"
+ buf += "TF_WWN_ATTR_RO(" + fabric_mod_name + ", version);\n\n"
+ buf += "static struct configfs_attribute *" + fabric_mod_name + "_wwn_attrs[] = {\n"
+ buf += " &" + fabric_mod_name + "_wwn_version.attr,\n"
+ buf += " NULL,\n"
+ buf += "};\n\n"
+
+ buf += "static const struct target_core_fabric_ops " + fabric_mod_name + "_ops = {\n"
+ buf += " .module = THIS_MODULE,\n"
+ buf += " .name = " + fabric_mod_name + ",\n"
+ buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n"
+ buf += " .get_fabric_name = " + fabric_mod_name + "_get_fabric_name,\n"
+ buf += " .get_fabric_proto_ident = " + fabric_mod_name + "_get_fabric_proto_ident,\n"
+ buf += " .tpg_get_wwn = " + fabric_mod_name + "_get_fabric_wwn,\n"
+ buf += " .tpg_get_tag = " + fabric_mod_name + "_get_tag,\n"
+ buf += " .tpg_get_default_depth = " + fabric_mod_name + "_get_default_depth,\n"
+ buf += " .tpg_get_pr_transport_id = " + fabric_mod_name + "_get_pr_transport_id,\n"
+ buf += " .tpg_get_pr_transport_id_len = " + fabric_mod_name + "_get_pr_transport_id_len,\n"
+ buf += " .tpg_parse_pr_out_transport_id = " + fabric_mod_name + "_parse_pr_out_transport_id,\n"
+ buf += " .tpg_check_demo_mode = " + fabric_mod_name + "_check_false,\n"
+ buf += " .tpg_check_demo_mode_cache = " + fabric_mod_name + "_check_true,\n"
+ buf += " .tpg_check_demo_mode_write_protect = " + fabric_mod_name + "_check_true,\n"
+ buf += " .tpg_check_prod_mode_write_protect = " + fabric_mod_name + "_check_false,\n"
+ buf += " .tpg_alloc_fabric_acl = " + fabric_mod_name + "_alloc_fabric_acl,\n"
+ buf += " .tpg_release_fabric_acl = " + fabric_mod_name + "_release_fabric_acl,\n"
+ buf += " .tpg_get_inst_index = " + fabric_mod_name + "_tpg_get_inst_index,\n"
+ buf += " .release_cmd = " + fabric_mod_name + "_release_cmd,\n"
+ buf += " .shutdown_session = " + fabric_mod_name + "_shutdown_session,\n"
+ buf += " .close_session = " + fabric_mod_name + "_close_session,\n"
+ buf += " .sess_get_index = " + fabric_mod_name + "_sess_get_index,\n"
+ buf += " .sess_get_initiator_sid = NULL,\n"
+ buf += " .write_pending = " + fabric_mod_name + "_write_pending,\n"
+ buf += " .write_pending_status = " + fabric_mod_name + "_write_pending_status,\n"
+ buf += " .set_default_node_attributes = " + fabric_mod_name + "_set_default_node_attrs,\n"
+ buf += " .get_task_tag = " + fabric_mod_name + "_get_task_tag,\n"
+ buf += " .get_cmd_state = " + fabric_mod_name + "_get_cmd_state,\n"
+ buf += " .queue_data_in = " + fabric_mod_name + "_queue_data_in,\n"
+ buf += " .queue_status = " + fabric_mod_name + "_queue_status,\n"
+ buf += " .queue_tm_rsp = " + fabric_mod_name + "_queue_tm_rsp,\n"
+ buf += " .aborted_task = " + fabric_mod_name + "_aborted_task,\n"
+ buf += " /*\n"
+ buf += " * Setup function pointers for generic logic in target_core_fabric_configfs.c\n"
+ buf += " */\n"
+ buf += " .fabric_make_wwn = " + fabric_mod_name + "_make_" + fabric_mod_port + ",\n"
+ buf += " .fabric_drop_wwn = " + fabric_mod_name + "_drop_" + fabric_mod_port + ",\n"
+ buf += " .fabric_make_tpg = " + fabric_mod_name + "_make_tpg,\n"
+ buf += " .fabric_drop_tpg = " + fabric_mod_name + "_drop_tpg,\n"
+ buf += " .fabric_post_link = NULL,\n"
+ buf += " .fabric_pre_unlink = NULL,\n"
+ buf += " .fabric_make_np = NULL,\n"
+ buf += " .fabric_drop_np = NULL,\n"
+ buf += " .fabric_make_nodeacl = " + fabric_mod_name + "_make_nodeacl,\n"
+ buf += " .fabric_drop_nodeacl = " + fabric_mod_name + "_drop_nodeacl,\n"
+ buf += "\n"
+ buf += " .tfc_wwn_attrs = " + fabric_mod_name + "_wwn_attrs;\n"
+ buf += "};\n\n"
+
+ buf += "static int __init " + fabric_mod_name + "_init(void)\n"
+ buf += "{\n"
+ buf += " return target_register_template(" + fabric_mod_name + "_ops);\n"
+ buf += "};\n\n"
+
+ buf += "static void __exit " + fabric_mod_name + "_exit(void)\n"
+ buf += "{\n"
+ buf += " target_unregister_template(" + fabric_mod_name + "_ops);\n"
+ buf += "};\n\n"
+
+ buf += "MODULE_DESCRIPTION(\"" + fabric_mod_name.upper() + " series fabric driver\");\n"
+ buf += "MODULE_LICENSE(\"GPL\");\n"
+ buf += "module_init(" + fabric_mod_name + "_init);\n"
+ buf += "module_exit(" + fabric_mod_name + "_exit);\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+
+ return
+
+def tcm_mod_scan_fabric_ops(tcm_dir):
+
+ fabric_ops_api = tcm_dir + "include/target/target_core_fabric.h"
+
+ print "Using tcm_mod_scan_fabric_ops: " + fabric_ops_api
+ process_fo = 0;
+
+ p = open(fabric_ops_api, 'r')
+
+ line = p.readline()
+ while line:
+ if process_fo == 0 and re.search('struct target_core_fabric_ops {', line):
+ line = p.readline()
+ continue
+
+ if process_fo == 0:
+ process_fo = 1;
+ line = p.readline()
+ # Search for function pointer
+ if not re.search('\(\*', line):
+ continue
+
+ fabric_ops.append(line.rstrip())
+ continue
+
+ line = p.readline()
+ # Search for function pointer
+ if not re.search('\(\*', line):
+ continue
+
+ fabric_ops.append(line.rstrip())
+
+ p.close()
+ return
+
+def tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir_var, fabric_mod_name):
+ buf = ""
+ bufi = ""
+
+ f = fabric_mod_dir_var + "/" + fabric_mod_name + "_fabric.c"
+ print "Writing file: " + f
+
+ p = open(f, 'w')
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ fi = fabric_mod_dir_var + "/" + fabric_mod_name + "_fabric.h"
+ print "Writing file: " + fi
+
+ pi = open(fi, 'w')
+ if not pi:
+ tcm_mod_err("Unable to open file: " + fi)
+
+ buf = "#include <linux/slab.h>\n"
+ buf += "#include <linux/kthread.h>\n"
+ buf += "#include <linux/types.h>\n"
+ buf += "#include <linux/list.h>\n"
+ buf += "#include <linux/types.h>\n"
+ buf += "#include <linux/string.h>\n"
+ buf += "#include <linux/ctype.h>\n"
+ buf += "#include <asm/unaligned.h>\n"
+ buf += "#include <scsi/scsi.h>\n"
+ buf += "#include <scsi/scsi_host.h>\n"
+ buf += "#include <scsi/scsi_device.h>\n"
+ buf += "#include <scsi/scsi_cmnd.h>\n"
+ buf += "#include <scsi/libfc.h>\n\n"
+ buf += "#include <target/target_core_base.h>\n"
+ buf += "#include <target/target_core_fabric.h>\n"
+ buf += "#include <target/target_core_configfs.h>\n\n"
+ buf += "#include \"" + fabric_mod_name + "_base.h\"\n"
+ buf += "#include \"" + fabric_mod_name + "_fabric.h\"\n\n"
+
+ buf += "int " + fabric_mod_name + "_check_true(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " return 1;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_check_true(struct se_portal_group *);\n"
+
+ buf += "int " + fabric_mod_name + "_check_false(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_check_false(struct se_portal_group *);\n"
+
+ total_fabric_ops = len(fabric_ops)
+ i = 0
+
+ while i < total_fabric_ops:
+ fo = fabric_ops[i]
+ i += 1
+# print "fabric_ops: " + fo
+
+ if re.search('get_fabric_name', fo):
+ buf += "char *" + fabric_mod_name + "_get_fabric_name(void)\n"
+ buf += "{\n"
+ buf += " return \"" + fabric_mod_name + "\";\n"
+ buf += "}\n\n"
+ bufi += "char *" + fabric_mod_name + "_get_fabric_name(void);\n"
+ continue
+
+ if re.search('get_fabric_proto_ident', fo):
+ buf += "u8 " + fabric_mod_name + "_get_fabric_proto_ident(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n"
+ buf += " u8 proto_id;\n\n"
+ buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n"
+ if proto_ident == "FC":
+ buf += " case SCSI_PROTOCOL_FCP:\n"
+ buf += " default:\n"
+ buf += " proto_id = fc_get_fabric_proto_ident(se_tpg);\n"
+ buf += " break;\n"
+ elif proto_ident == "SAS":
+ buf += " case SCSI_PROTOCOL_SAS:\n"
+ buf += " default:\n"
+ buf += " proto_id = sas_get_fabric_proto_ident(se_tpg);\n"
+ buf += " break;\n"
+ elif proto_ident == "iSCSI":
+ buf += " case SCSI_PROTOCOL_ISCSI:\n"
+ buf += " default:\n"
+ buf += " proto_id = iscsi_get_fabric_proto_ident(se_tpg);\n"
+ buf += " break;\n"
+
+ buf += " }\n\n"
+ buf += " return proto_id;\n"
+ buf += "}\n\n"
+ bufi += "u8 " + fabric_mod_name + "_get_fabric_proto_ident(struct se_portal_group *);\n"
+
+ if re.search('get_wwn', fo):
+ buf += "char *" + fabric_mod_name + "_get_fabric_wwn(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n\n"
+ buf += " return &" + fabric_mod_port + "->" + fabric_mod_port + "_name[0];\n"
+ buf += "}\n\n"
+ bufi += "char *" + fabric_mod_name + "_get_fabric_wwn(struct se_portal_group *);\n"
+
+ if re.search('get_tag', fo):
+ buf += "u16 " + fabric_mod_name + "_get_tag(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n"
+ buf += " return tpg->" + fabric_mod_port + "_tpgt;\n"
+ buf += "}\n\n"
+ bufi += "u16 " + fabric_mod_name + "_get_tag(struct se_portal_group *);\n"
+
+ if re.search('get_default_depth', fo):
+ buf += "u32 " + fabric_mod_name + "_get_default_depth(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " return 1;\n"
+ buf += "}\n\n"
+ bufi += "u32 " + fabric_mod_name + "_get_default_depth(struct se_portal_group *);\n"
+
+ if re.search('get_pr_transport_id\)\(', fo):
+ buf += "u32 " + fabric_mod_name + "_get_pr_transport_id(\n"
+ buf += " struct se_portal_group *se_tpg,\n"
+ buf += " struct se_node_acl *se_nacl,\n"
+ buf += " struct t10_pr_registration *pr_reg,\n"
+ buf += " int *format_code,\n"
+ buf += " unsigned char *buf)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n"
+ buf += " int ret = 0;\n\n"
+ buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n"
+ if proto_ident == "FC":
+ buf += " case SCSI_PROTOCOL_FCP:\n"
+ buf += " default:\n"
+ buf += " ret = fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n"
+ buf += " format_code, buf);\n"
+ buf += " break;\n"
+ elif proto_ident == "SAS":
+ buf += " case SCSI_PROTOCOL_SAS:\n"
+ buf += " default:\n"
+ buf += " ret = sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n"
+ buf += " format_code, buf);\n"
+ buf += " break;\n"
+ elif proto_ident == "iSCSI":
+ buf += " case SCSI_PROTOCOL_ISCSI:\n"
+ buf += " default:\n"
+ buf += " ret = iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,\n"
+ buf += " format_code, buf);\n"
+ buf += " break;\n"
+
+ buf += " }\n\n"
+ buf += " return ret;\n"
+ buf += "}\n\n"
+ bufi += "u32 " + fabric_mod_name + "_get_pr_transport_id(struct se_portal_group *,\n"
+ bufi += " struct se_node_acl *, struct t10_pr_registration *,\n"
+ bufi += " int *, unsigned char *);\n"
+
+ if re.search('get_pr_transport_id_len\)\(', fo):
+ buf += "u32 " + fabric_mod_name + "_get_pr_transport_id_len(\n"
+ buf += " struct se_portal_group *se_tpg,\n"
+ buf += " struct se_node_acl *se_nacl,\n"
+ buf += " struct t10_pr_registration *pr_reg,\n"
+ buf += " int *format_code)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n"
+ buf += " int ret = 0;\n\n"
+ buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n"
+ if proto_ident == "FC":
+ buf += " case SCSI_PROTOCOL_FCP:\n"
+ buf += " default:\n"
+ buf += " ret = fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n"
+ buf += " format_code);\n"
+ buf += " break;\n"
+ elif proto_ident == "SAS":
+ buf += " case SCSI_PROTOCOL_SAS:\n"
+ buf += " default:\n"
+ buf += " ret = sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n"
+ buf += " format_code);\n"
+ buf += " break;\n"
+ elif proto_ident == "iSCSI":
+ buf += " case SCSI_PROTOCOL_ISCSI:\n"
+ buf += " default:\n"
+ buf += " ret = iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,\n"
+ buf += " format_code);\n"
+ buf += " break;\n"
+
+
+ buf += " }\n\n"
+ buf += " return ret;\n"
+ buf += "}\n\n"
+ bufi += "u32 " + fabric_mod_name + "_get_pr_transport_id_len(struct se_portal_group *,\n"
+ bufi += " struct se_node_acl *, struct t10_pr_registration *,\n"
+ bufi += " int *);\n"
+
+ if re.search('parse_pr_out_transport_id\)\(', fo):
+ buf += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(\n"
+ buf += " struct se_portal_group *se_tpg,\n"
+ buf += " const char *buf,\n"
+ buf += " u32 *out_tid_len,\n"
+ buf += " char **port_nexus_ptr)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_tpg *tpg = container_of(se_tpg,\n"
+ buf += " struct " + fabric_mod_name + "_tpg, se_tpg);\n"
+ buf += " struct " + fabric_mod_name + "_" + fabric_mod_port + " *" + fabric_mod_port + " = tpg->" + fabric_mod_port + ";\n"
+ buf += " char *tid = NULL;\n\n"
+ buf += " switch (" + fabric_mod_port + "->" + fabric_mod_port + "_proto_id) {\n"
+ if proto_ident == "FC":
+ buf += " case SCSI_PROTOCOL_FCP:\n"
+ buf += " default:\n"
+ buf += " tid = fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n"
+ buf += " port_nexus_ptr);\n"
+ elif proto_ident == "SAS":
+ buf += " case SCSI_PROTOCOL_SAS:\n"
+ buf += " default:\n"
+ buf += " tid = sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n"
+ buf += " port_nexus_ptr);\n"
+ elif proto_ident == "iSCSI":
+ buf += " case SCSI_PROTOCOL_ISCSI:\n"
+ buf += " default:\n"
+ buf += " tid = iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,\n"
+ buf += " port_nexus_ptr);\n"
+
+ buf += " }\n\n"
+ buf += " return tid;\n"
+ buf += "}\n\n"
+ bufi += "char *" + fabric_mod_name + "_parse_pr_out_transport_id(struct se_portal_group *,\n"
+ bufi += " const char *, u32 *, char **);\n"
+
+ if re.search('alloc_fabric_acl\)\(', fo):
+ buf += "struct se_node_acl *" + fabric_mod_name + "_alloc_fabric_acl(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_nacl *nacl;\n\n"
+ buf += " nacl = kzalloc(sizeof(struct " + fabric_mod_name + "_nacl), GFP_KERNEL);\n"
+ buf += " if (!nacl) {\n"
+ buf += " printk(KERN_ERR \"Unable to allocate struct " + fabric_mod_name + "_nacl\\n\");\n"
+ buf += " return NULL;\n"
+ buf += " }\n\n"
+ buf += " return &nacl->se_node_acl;\n"
+ buf += "}\n\n"
+ bufi += "struct se_node_acl *" + fabric_mod_name + "_alloc_fabric_acl(struct se_portal_group *);\n"
+
+ if re.search('release_fabric_acl\)\(', fo):
+ buf += "void " + fabric_mod_name + "_release_fabric_acl(\n"
+ buf += " struct se_portal_group *se_tpg,\n"
+ buf += " struct se_node_acl *se_nacl)\n"
+ buf += "{\n"
+ buf += " struct " + fabric_mod_name + "_nacl *nacl = container_of(se_nacl,\n"
+ buf += " struct " + fabric_mod_name + "_nacl, se_node_acl);\n"
+ buf += " kfree(nacl);\n"
+ buf += "}\n\n"
+ bufi += "void " + fabric_mod_name + "_release_fabric_acl(struct se_portal_group *,\n"
+ bufi += " struct se_node_acl *);\n"
+
+ if re.search('tpg_get_inst_index\)\(', fo):
+ buf += "u32 " + fabric_mod_name + "_tpg_get_inst_index(struct se_portal_group *se_tpg)\n"
+ buf += "{\n"
+ buf += " return 1;\n"
+ buf += "}\n\n"
+ bufi += "u32 " + fabric_mod_name + "_tpg_get_inst_index(struct se_portal_group *);\n"
+
+ if re.search('\*release_cmd\)\(', fo):
+ buf += "void " + fabric_mod_name + "_release_cmd(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return;\n"
+ buf += "}\n\n"
+ bufi += "void " + fabric_mod_name + "_release_cmd(struct se_cmd *);\n"
+
+ if re.search('shutdown_session\)\(', fo):
+ buf += "int " + fabric_mod_name + "_shutdown_session(struct se_session *se_sess)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_shutdown_session(struct se_session *);\n"
+
+ if re.search('close_session\)\(', fo):
+ buf += "void " + fabric_mod_name + "_close_session(struct se_session *se_sess)\n"
+ buf += "{\n"
+ buf += " return;\n"
+ buf += "}\n\n"
+ bufi += "void " + fabric_mod_name + "_close_session(struct se_session *);\n"
+
+ if re.search('sess_get_index\)\(', fo):
+ buf += "u32 " + fabric_mod_name + "_sess_get_index(struct se_session *se_sess)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "u32 " + fabric_mod_name + "_sess_get_index(struct se_session *);\n"
+
+ if re.search('write_pending\)\(', fo):
+ buf += "int " + fabric_mod_name + "_write_pending(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_write_pending(struct se_cmd *);\n"
+
+ if re.search('write_pending_status\)\(', fo):
+ buf += "int " + fabric_mod_name + "_write_pending_status(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_write_pending_status(struct se_cmd *);\n"
+
+ if re.search('set_default_node_attributes\)\(', fo):
+ buf += "void " + fabric_mod_name + "_set_default_node_attrs(struct se_node_acl *nacl)\n"
+ buf += "{\n"
+ buf += " return;\n"
+ buf += "}\n\n"
+ bufi += "void " + fabric_mod_name + "_set_default_node_attrs(struct se_node_acl *);\n"
+
+ if re.search('get_task_tag\)\(', fo):
+ buf += "u32 " + fabric_mod_name + "_get_task_tag(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "u32 " + fabric_mod_name + "_get_task_tag(struct se_cmd *);\n"
+
+ if re.search('get_cmd_state\)\(', fo):
+ buf += "int " + fabric_mod_name + "_get_cmd_state(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_get_cmd_state(struct se_cmd *);\n"
+
+ if re.search('queue_data_in\)\(', fo):
+ buf += "int " + fabric_mod_name + "_queue_data_in(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_queue_data_in(struct se_cmd *);\n"
+
+ if re.search('queue_status\)\(', fo):
+ buf += "int " + fabric_mod_name + "_queue_status(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return 0;\n"
+ buf += "}\n\n"
+ bufi += "int " + fabric_mod_name + "_queue_status(struct se_cmd *);\n"
+
+ if re.search('queue_tm_rsp\)\(', fo):
+ buf += "void " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return;\n"
+ buf += "}\n\n"
+ bufi += "void " + fabric_mod_name + "_queue_tm_rsp(struct se_cmd *);\n"
+
+ if re.search('aborted_task\)\(', fo):
+ buf += "void " + fabric_mod_name + "_aborted_task(struct se_cmd *se_cmd)\n"
+ buf += "{\n"
+ buf += " return;\n"
+ buf += "}\n\n"
+ bufi += "void " + fabric_mod_name + "_aborted_task(struct se_cmd *);\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+
+ ret = pi.write(bufi)
+ if ret:
+ tcm_mod_err("Unable to write fi: " + fi)
+
+ pi.close()
+ return
+
+def tcm_mod_build_kbuild(fabric_mod_dir_var, fabric_mod_name):
+
+ buf = ""
+ f = fabric_mod_dir_var + "/Makefile"
+ print "Writing file: " + f
+
+ p = open(f, 'w')
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ buf += fabric_mod_name + "-objs := " + fabric_mod_name + "_fabric.o \\\n"
+ buf += " " + fabric_mod_name + "_configfs.o\n"
+ buf += "obj-$(CONFIG_" + fabric_mod_name.upper() + ") += " + fabric_mod_name + ".o\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+ return
+
+def tcm_mod_build_kconfig(fabric_mod_dir_var, fabric_mod_name):
+
+ buf = ""
+ f = fabric_mod_dir_var + "/Kconfig"
+ print "Writing file: " + f
+
+ p = open(f, 'w')
+ if not p:
+ tcm_mod_err("Unable to open file: " + f)
+
+ buf = "config " + fabric_mod_name.upper() + "\n"
+ buf += " tristate \"" + fabric_mod_name.upper() + " fabric module\"\n"
+ buf += " depends on TARGET_CORE && CONFIGFS_FS\n"
+ buf += " default n\n"
+ buf += " ---help---\n"
+ buf += " Say Y here to enable the " + fabric_mod_name.upper() + " fabric module\n"
+
+ ret = p.write(buf)
+ if ret:
+ tcm_mod_err("Unable to write f: " + f)
+
+ p.close()
+ return
+
+def tcm_mod_add_kbuild(tcm_dir, fabric_mod_name):
+ buf = "obj-$(CONFIG_" + fabric_mod_name.upper() + ") += " + fabric_mod_name.lower() + "/\n"
+ kbuild = tcm_dir + "/drivers/target/Makefile"
+
+ f = open(kbuild, 'a')
+ f.write(buf)
+ f.close()
+ return
+
+def tcm_mod_add_kconfig(tcm_dir, fabric_mod_name):
+ buf = "source \"drivers/target/" + fabric_mod_name.lower() + "/Kconfig\"\n"
+ kconfig = tcm_dir + "/drivers/target/Kconfig"
+
+ f = open(kconfig, 'a')
+ f.write(buf)
+ f.close()
+ return
+
+def main(modname, proto_ident):
+# proto_ident = "FC"
+# proto_ident = "SAS"
+# proto_ident = "iSCSI"
+
+ tcm_dir = os.getcwd();
+ tcm_dir += "/../../"
+ print "tcm_dir: " + tcm_dir
+ fabric_mod_name = modname
+ fabric_mod_dir = tcm_dir + "drivers/target/" + fabric_mod_name
+ print "Set fabric_mod_name: " + fabric_mod_name
+ print "Set fabric_mod_dir: " + fabric_mod_dir
+ print "Using proto_ident: " + proto_ident
+
+ if proto_ident != "FC" and proto_ident != "SAS" and proto_ident != "iSCSI":
+ print "Unsupported proto_ident: " + proto_ident
+ sys.exit(1)
+
+ ret = tcm_mod_create_module_subdir(fabric_mod_dir)
+ if ret:
+ print "tcm_mod_create_module_subdir() failed because module already exists!"
+ sys.exit(1)
+
+ tcm_mod_build_base_includes(proto_ident, fabric_mod_dir, fabric_mod_name)
+ tcm_mod_scan_fabric_ops(tcm_dir)
+ tcm_mod_dump_fabric_ops(proto_ident, fabric_mod_dir, fabric_mod_name)
+ tcm_mod_build_configfs(proto_ident, fabric_mod_dir, fabric_mod_name)
+ tcm_mod_build_kbuild(fabric_mod_dir, fabric_mod_name)
+ tcm_mod_build_kconfig(fabric_mod_dir, fabric_mod_name)
+
+ input = raw_input("Would you like to add " + fabric_mod_name + " to drivers/target/Makefile..? [yes,no]: ")
+ if input == "yes" or input == "y":
+ tcm_mod_add_kbuild(tcm_dir, fabric_mod_name)
+
+ input = raw_input("Would you like to add " + fabric_mod_name + " to drivers/target/Kconfig..? [yes,no]: ")
+ if input == "yes" or input == "y":
+ tcm_mod_add_kconfig(tcm_dir, fabric_mod_name)
+
+ return
+
+parser = optparse.OptionParser()
+parser.add_option('-m', '--modulename', help='Module name', dest='modname',
+ action='store', nargs=1, type='string')
+parser.add_option('-p', '--protoident', help='Protocol Ident', dest='protoident',
+ action='store', nargs=1, type='string')
+
+(opts, args) = parser.parse_args()
+
+mandatories = ['modname', 'protoident']
+for m in mandatories:
+ if not opts.__dict__[m]:
+ print "mandatory option is missing\n"
+ parser.print_help()
+ exit(-1)
+
+if __name__ == "__main__":
+
+ main(str(opts.modname), opts.protoident)
diff --git a/Documentation/target/tcm_mod_builder.txt b/Documentation/target/tcm_mod_builder.txt
new file mode 100644
index 000000000..84533d8e7
--- /dev/null
+++ b/Documentation/target/tcm_mod_builder.txt
@@ -0,0 +1,145 @@
+>>>>>>>>>> The TCM v4 fabric module script generator <<<<<<<<<<
+
+Greetings all,
+
+This document is intended to be a mini-HOWTO for using the tcm_mod_builder.py
+script to generate a brand new functional TCM v4 fabric .ko module of your very own,
+that once built can be immediately be loaded to start access the new TCM/ConfigFS
+fabric skeleton, by simply using:
+
+ modprobe $TCM_NEW_MOD
+ mkdir -p /sys/kernel/config/target/$TCM_NEW_MOD
+
+This script will create a new drivers/target/$TCM_NEW_MOD/, and will do the following
+
+ *) Generate new API callers for drivers/target/target_core_fabric_configs.c logic
+ ->make_nodeacl(), ->drop_nodeacl(), ->make_tpg(), ->drop_tpg()
+ ->make_wwn(), ->drop_wwn(). These are created into $TCM_NEW_MOD/$TCM_NEW_MOD_configfs.c
+ *) Generate basic infrastructure for loading/unloading LKMs and TCM/ConfigFS fabric module
+ using a skeleton struct target_core_fabric_ops API template.
+ *) Based on user defined T10 Proto_Ident for the new fabric module being built,
+ the TransportID / Initiator and Target WWPN related handlers for
+ SPC-3 persistent reservation are automatically generated in $TCM_NEW_MOD/$TCM_NEW_MOD_fabric.c
+ using drivers/target/target_core_fabric_lib.c logic.
+ *) NOP API calls for all other Data I/O path and fabric dependent attribute logic
+ in $TCM_NEW_MOD/$TCM_NEW_MOD_fabric.c
+
+tcm_mod_builder.py depends upon the mandatory '-p $PROTO_IDENT' and '-m
+$FABRIC_MOD_name' parameters, and actually running the script looks like:
+
+target:/mnt/sdb/lio-core-2.6.git/Documentation/target# python tcm_mod_builder.py -p iSCSI -m tcm_nab5000
+tcm_dir: /mnt/sdb/lio-core-2.6.git/Documentation/target/../../
+Set fabric_mod_name: tcm_nab5000
+Set fabric_mod_dir:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000
+Using proto_ident: iSCSI
+Creating fabric_mod_dir:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000
+Writing file:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_base.h
+Using tcm_mod_scan_fabric_ops:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../include/target/target_core_fabric_ops.h
+Writing file:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_fabric.c
+Writing file:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_fabric.h
+Writing file:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/tcm_nab5000_configfs.c
+Writing file:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/Kbuild
+Writing file:
+/mnt/sdb/lio-core-2.6.git/Documentation/target/../../drivers/target/tcm_nab5000/Kconfig
+Would you like to add tcm_nab5000to drivers/target/Kbuild..? [yes,no]: yes
+Would you like to add tcm_nab5000to drivers/target/Kconfig..? [yes,no]: yes
+
+At the end of tcm_mod_builder.py. the script will ask to add the following
+line to drivers/target/Kbuild:
+
+ obj-$(CONFIG_TCM_NAB5000) += tcm_nab5000/
+
+and the same for drivers/target/Kconfig:
+
+ source "drivers/target/tcm_nab5000/Kconfig"
+
+*) Run 'make menuconfig' and select the new CONFIG_TCM_NAB5000 item:
+
+ <M> TCM_NAB5000 fabric module
+
+*) Build using 'make modules', once completed you will have:
+
+target:/mnt/sdb/lio-core-2.6.git# ls -la drivers/target/tcm_nab5000/
+total 1348
+drwxr-xr-x 2 root root 4096 2010-10-05 03:23 .
+drwxr-xr-x 9 root root 4096 2010-10-05 03:22 ..
+-rw-r--r-- 1 root root 282 2010-10-05 03:22 Kbuild
+-rw-r--r-- 1 root root 171 2010-10-05 03:22 Kconfig
+-rw-r--r-- 1 root root 49 2010-10-05 03:23 modules.order
+-rw-r--r-- 1 root root 738 2010-10-05 03:22 tcm_nab5000_base.h
+-rw-r--r-- 1 root root 9096 2010-10-05 03:22 tcm_nab5000_configfs.c
+-rw-r--r-- 1 root root 191200 2010-10-05 03:23 tcm_nab5000_configfs.o
+-rw-r--r-- 1 root root 40504 2010-10-05 03:23 .tcm_nab5000_configfs.o.cmd
+-rw-r--r-- 1 root root 5414 2010-10-05 03:22 tcm_nab5000_fabric.c
+-rw-r--r-- 1 root root 2016 2010-10-05 03:22 tcm_nab5000_fabric.h
+-rw-r--r-- 1 root root 190932 2010-10-05 03:23 tcm_nab5000_fabric.o
+-rw-r--r-- 1 root root 40713 2010-10-05 03:23 .tcm_nab5000_fabric.o.cmd
+-rw-r--r-- 1 root root 401861 2010-10-05 03:23 tcm_nab5000.ko
+-rw-r--r-- 1 root root 265 2010-10-05 03:23 .tcm_nab5000.ko.cmd
+-rw-r--r-- 1 root root 459 2010-10-05 03:23 tcm_nab5000.mod.c
+-rw-r--r-- 1 root root 23896 2010-10-05 03:23 tcm_nab5000.mod.o
+-rw-r--r-- 1 root root 22655 2010-10-05 03:23 .tcm_nab5000.mod.o.cmd
+-rw-r--r-- 1 root root 379022 2010-10-05 03:23 tcm_nab5000.o
+-rw-r--r-- 1 root root 211 2010-10-05 03:23 .tcm_nab5000.o.cmd
+
+*) Load the new module, create a lun_0 configfs group, and add new TCM Core
+ IBLOCK backstore symlink to port:
+
+target:/mnt/sdb/lio-core-2.6.git# insmod drivers/target/tcm_nab5000.ko
+target:/mnt/sdb/lio-core-2.6.git# mkdir -p /sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0
+target:/mnt/sdb/lio-core-2.6.git# cd /sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0/
+target:/sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0# ln -s /sys/kernel/config/target/core/iblock_0/lvm_test0 nab5000_port
+
+target:/sys/kernel/config/target/nab5000/iqn.foo/tpgt_1/lun/lun_0# cd -
+target:/mnt/sdb/lio-core-2.6.git# tree /sys/kernel/config/target/nab5000/
+/sys/kernel/config/target/nab5000/
+|-- discovery_auth
+|-- iqn.foo
+| `-- tpgt_1
+| |-- acls
+| |-- attrib
+| |-- lun
+| | `-- lun_0
+| | |-- alua_tg_pt_gp
+| | |-- alua_tg_pt_offline
+| | |-- alua_tg_pt_status
+| | |-- alua_tg_pt_write_md
+| | `-- nab5000_port -> ../../../../../../target/core/iblock_0/lvm_test0
+| |-- np
+| `-- param
+`-- version
+
+target:/mnt/sdb/lio-core-2.6.git# lsmod
+Module Size Used by
+tcm_nab5000 3935 4
+iscsi_target_mod 193211 0
+target_core_stgt 8090 0
+target_core_pscsi 11122 1
+target_core_file 9172 2
+target_core_iblock 9280 1
+target_core_mod 228575 31
+tcm_nab5000,iscsi_target_mod,target_core_stgt,target_core_pscsi,target_core_file,target_core_iblock
+libfc 73681 0
+scsi_debug 56265 0
+scsi_tgt 8666 1 target_core_stgt
+configfs 20644 2 target_core_mod
+
+----------------------------------------------------------------------
+
+Future TODO items:
+
+ *) Add more T10 proto_idents
+ *) Make tcm_mod_dump_fabric_ops() smarter and generate function pointer
+ defs directly from include/target/target_core_fabric_ops.h:struct target_core_fabric_ops
+ structure members.
+
+October 5th, 2010
+Nicholas A. Bellinger <nab@linux-iscsi.org>
diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt
new file mode 100644
index 000000000..263b90751
--- /dev/null
+++ b/Documentation/target/tcmu-design.txt
@@ -0,0 +1,372 @@
+Contents:
+
+1) TCM Userspace Design
+ a) Background
+ b) Benefits
+ c) Design constraints
+ d) Implementation overview
+ i. Mailbox
+ ii. Command ring
+ iii. Data Area
+ e) Device discovery
+ f) Device events
+ g) Other contingencies
+2) Writing a user pass-through handler
+ a) Discovering and configuring TCMU uio devices
+ b) Waiting for events on the device(s)
+ c) Managing the command ring
+3) A final note
+
+
+TCM Userspace Design
+--------------------
+
+TCM is another name for LIO, an in-kernel iSCSI target (server).
+Existing TCM targets run in the kernel. TCMU (TCM in Userspace)
+allows userspace programs to be written which act as iSCSI targets.
+This document describes the design.
+
+The existing kernel provides modules for different SCSI transport
+protocols. TCM also modularizes the data storage. There are existing
+modules for file, block device, RAM or using another SCSI device as
+storage. These are called "backstores" or "storage engines". These
+built-in modules are implemented entirely as kernel code.
+
+Background:
+
+In addition to modularizing the transport protocol used for carrying
+SCSI commands ("fabrics"), the Linux kernel target, LIO, also modularizes
+the actual data storage as well. These are referred to as "backstores"
+or "storage engines". The target comes with backstores that allow a
+file, a block device, RAM, or another SCSI device to be used for the
+local storage needed for the exported SCSI LUN. Like the rest of LIO,
+these are implemented entirely as kernel code.
+
+These backstores cover the most common use cases, but not all. One new
+use case that other non-kernel target solutions, such as tgt, are able
+to support is using Gluster's GLFS or Ceph's RBD as a backstore. The
+target then serves as a translator, allowing initiators to store data
+in these non-traditional networked storage systems, while still only
+using standard protocols themselves.
+
+If the target is a userspace process, supporting these is easy. tgt,
+for example, needs only a small adapter module for each, because the
+modules just use the available userspace libraries for RBD and GLFS.
+
+Adding support for these backstores in LIO is considerably more
+difficult, because LIO is entirely kernel code. Instead of undertaking
+the significant work to port the GLFS or RBD APIs and protocols to the
+kernel, another approach is to create a userspace pass-through
+backstore for LIO, "TCMU".
+
+
+Benefits:
+
+In addition to allowing relatively easy support for RBD and GLFS, TCMU
+will also allow easier development of new backstores. TCMU combines
+with the LIO loopback fabric to become something similar to FUSE
+(Filesystem in Userspace), but at the SCSI layer instead of the
+filesystem layer. A SUSE, if you will.
+
+The disadvantage is there are more distinct components to configure, and
+potentially to malfunction. This is unavoidable, but hopefully not
+fatal if we're careful to keep things as simple as possible.
+
+Design constraints:
+
+- Good performance: high throughput, low latency
+- Cleanly handle if userspace:
+ 1) never attaches
+ 2) hangs
+ 3) dies
+ 4) misbehaves
+- Allow future flexibility in user & kernel implementations
+- Be reasonably memory-efficient
+- Simple to configure & run
+- Simple to write a userspace backend
+
+
+Implementation overview:
+
+The core of the TCMU interface is a memory region that is shared
+between kernel and userspace. Within this region is: a control area
+(mailbox); a lockless producer/consumer circular buffer for commands
+to be passed up, and status returned; and an in/out data buffer area.
+
+TCMU uses the pre-existing UIO subsystem. UIO allows device driver
+development in userspace, and this is conceptually very close to the
+TCMU use case, except instead of a physical device, TCMU implements a
+memory-mapped layout designed for SCSI commands. Using UIO also
+benefits TCMU by handling device introspection (e.g. a way for
+userspace to determine how large the shared region is) and signaling
+mechanisms in both directions.
+
+There are no embedded pointers in the memory region. Everything is
+expressed as an offset from the region's starting address. This allows
+the ring to still work if the user process dies and is restarted with
+the region mapped at a different virtual address.
+
+See target_core_user.h for the struct definitions.
+
+The Mailbox:
+
+The mailbox is always at the start of the shared memory region, and
+contains a version, details about the starting offset and size of the
+command ring, and head and tail pointers to be used by the kernel and
+userspace (respectively) to put commands on the ring, and indicate
+when the commands are completed.
+
+version - 1 (userspace should abort if otherwise)
+flags - none yet defined.
+cmdr_off - The offset of the start of the command ring from the start
+of the memory region, to account for the mailbox size.
+cmdr_size - The size of the command ring. This does *not* need to be a
+power of two.
+cmd_head - Modified by the kernel to indicate when a command has been
+placed on the ring.
+cmd_tail - Modified by userspace to indicate when it has completed
+processing of a command.
+
+The Command Ring:
+
+Commands are placed on the ring by the kernel incrementing
+mailbox.cmd_head by the size of the command, modulo cmdr_size, and
+then signaling userspace via uio_event_notify(). Once the command is
+completed, userspace updates mailbox.cmd_tail in the same way and
+signals the kernel via a 4-byte write(). When cmd_head equals
+cmd_tail, the ring is empty -- no commands are currently waiting to be
+processed by userspace.
+
+TCMU commands are 8-byte aligned. They start with a common header
+containing "len_op", a 32-bit value that stores the length, as well as
+the opcode in the lowest unused bits. It also contains cmd_id and
+flags fields for setting by the kernel (kflags) and userspace
+(uflags).
+
+Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD.
+
+When the opcode is CMD, the entry in the command ring is a struct
+tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via
+tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the
+overall shared memory region, not the entry. The data in/out buffers
+are accessible via tht req.iov[] array. iov_cnt contains the number of
+entries in iov[] needed to describe either the Data-In or Data-Out
+buffers. For bidirectional commands, iov_cnt specifies how many iovec
+entries cover the Data-Out area, and iov_bidi_count specifies how many
+iovec entries immediately after that in iov[] cover the Data-In
+area. Just like other fields, iov.iov_base is an offset from the start
+of the region.
+
+When completing a command, userspace sets rsp.scsi_status, and
+rsp.sense_buffer if necessary. Userspace then increments
+mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
+kernel via the UIO method, a 4-byte write to the file descriptor.
+
+When the opcode is PAD, userspace only updates cmd_tail as above --
+it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry
+is contiguous within the command ring.)
+
+More opcodes may be added in the future. If userspace encounters an
+opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in
+hdr.uflags, update cmd_tail, and proceed with processing additional
+commands, if any.
+
+The Data Area:
+
+This is shared-memory space after the command ring. The organization
+of this area is not defined in the TCMU interface, and userspace
+should access only the parts referenced by pending iovs.
+
+
+Device Discovery:
+
+Other devices may be using UIO besides TCMU. Unrelated user processes
+may also be handling different sets of TCMU devices. TCMU userspace
+processes must find their devices by scanning sysfs
+class/uio/uio*/name. For TCMU devices, these names will be of the
+format:
+
+tcm-user/<hba_num>/<device_name>/<subtype>/<path>
+
+where "tcm-user" is common for all TCMU-backed UIO devices. <hba_num>
+and <device_name> allow userspace to find the device's path in the
+kernel target's configfs tree. Assuming the usual mount point, it is
+found at:
+
+/sys/kernel/config/target/core/user_<hba_num>/<device_name>
+
+This location contains attributes such as "hw_block_size", that
+userspace needs to know for correct operation.
+
+<subtype> will be a userspace-process-unique string to identify the
+TCMU device as expecting to be backed by a certain handler, and <path>
+will be an additional handler-specific string for the user process to
+configure the device, if needed. The name cannot contain ':', due to
+LIO limitations.
+
+For all devices so discovered, the user handler opens /dev/uioX and
+calls mmap():
+
+mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0)
+
+where size must be equal to the value read from
+/sys/class/uio/uioX/maps/map0/size.
+
+
+Device Events:
+
+If a new device is added or removed, a notification will be broadcast
+over netlink, using a generic netlink family name of "TCM-USER" and a
+multicast group named "config". This will include the UIO name as
+described in the previous section, as well as the UIO minor
+number. This should allow userspace to identify both the UIO device and
+the LIO device, so that after determining the device is supported
+(based on subtype) it can take the appropriate action.
+
+
+Other contingencies:
+
+Userspace handler process never attaches:
+
+- TCMU will post commands, and then abort them after a timeout period
+ (30 seconds.)
+
+Userspace handler process is killed:
+
+- It is still possible to restart and re-connect to TCMU
+ devices. Command ring is preserved. However, after the timeout period,
+ the kernel will abort pending tasks.
+
+Userspace handler process hangs:
+
+- The kernel will abort pending tasks after a timeout period.
+
+Userspace handler process is malicious:
+
+- The process can trivially break the handling of devices it controls,
+ but should not be able to access kernel memory outside its shared
+ memory areas.
+
+
+Writing a user pass-through handler (with example code)
+-------------------------------------------------------
+
+A user process handing a TCMU device must support the following:
+
+a) Discovering and configuring TCMU uio devices
+b) Waiting for events on the device(s)
+c) Managing the command ring: Parsing operations and commands,
+ performing work as needed, setting response fields (scsi_status and
+ possibly sense_buffer), updating cmd_tail, and notifying the kernel
+ that work has been finished
+
+First, consider instead writing a plugin for tcmu-runner. tcmu-runner
+implements all of this, and provides a higher-level API for plugin
+authors.
+
+TCMU is designed so that multiple unrelated processes can manage TCMU
+devices separately. All handlers should make sure to only open their
+devices, based opon a known subtype string.
+
+a) Discovering and configuring TCMU UIO devices:
+
+(error checking omitted for brevity)
+
+int fd, dev_fd;
+char buf[256];
+unsigned long long map_len;
+void *map;
+
+fd = open("/sys/class/uio/uio0/name", O_RDONLY);
+ret = read(fd, buf, sizeof(buf));
+close(fd);
+buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
+
+/* we only want uio devices whose name is a format we expect */
+if (strncmp(buf, "tcm-user", 8))
+ exit(-1);
+
+/* Further checking for subtype also needed here */
+
+fd = open(/sys/class/uio/%s/maps/map0/size, O_RDONLY);
+ret = read(fd, buf, sizeof(buf));
+close(fd);
+str_buf[ret-1] = '\0'; /* null-terminate and chop off the \n */
+
+map_len = strtoull(buf, NULL, 0);
+
+dev_fd = open("/dev/uio0", O_RDWR);
+map = mmap(NULL, map_len, PROT_READ|PROT_WRITE, MAP_SHARED, dev_fd, 0);
+
+
+b) Waiting for events on the device(s)
+
+while (1) {
+ char buf[4];
+
+ int ret = read(dev_fd, buf, 4); /* will block */
+
+ handle_device_events(dev_fd, map);
+}
+
+
+c) Managing the command ring
+
+#include <linux/target_core_user.h>
+
+int handle_device_events(int fd, void *map)
+{
+ struct tcmu_mailbox *mb = map;
+ struct tcmu_cmd_entry *ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
+ int did_some_work = 0;
+
+ /* Process events from cmd ring until we catch up with cmd_head */
+ while (ent != (void *)mb + mb->cmdr_off + mb->cmd_head) {
+
+ if (tcmu_hdr_get_op(ent->hdr.len_op) == TCMU_OP_CMD) {
+ uint8_t *cdb = (void *)mb + ent->req.cdb_off;
+ bool success = true;
+
+ /* Handle command here. */
+ printf("SCSI opcode: 0x%x\n", cdb[0]);
+
+ /* Set response fields */
+ if (success)
+ ent->rsp.scsi_status = SCSI_NO_SENSE;
+ else {
+ /* Also fill in rsp->sense_buffer here */
+ ent->rsp.scsi_status = SCSI_CHECK_CONDITION;
+ }
+ }
+ else if (tcmu_hdr_get_op(ent->hdr.len_op) != TCMU_OP_PAD) {
+ /* Tell the kernel we didn't handle unknown opcodes */
+ ent->hdr.uflags |= TCMU_UFLAG_UNKNOWN_OP;
+ }
+ else {
+ /* Do nothing for PAD entries except update cmd_tail */
+ }
+
+ /* update cmd_tail */
+ mb->cmd_tail = (mb->cmd_tail + tcmu_hdr_get_len(&ent->hdr)) % mb->cmdr_size;
+ ent = (void *) mb + mb->cmdr_off + mb->cmd_tail;
+ did_some_work = 1;
+ }
+
+ /* Notify the kernel that work has been finished */
+ if (did_some_work) {
+ uint32_t buf = 0;
+
+ write(fd, &buf, 4);
+ }
+
+ return 0;
+}
+
+
+A final note
+------------
+
+Please be careful to return codes as defined by the SCSI
+specifications. These are different than some values defined in the
+scsi/scsi.h include file. For example, CHECK CONDITION's status code
+is 2, not 1.
diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt
new file mode 100644
index 000000000..753e47cc2
--- /dev/null
+++ b/Documentation/thermal/cpu-cooling-api.txt
@@ -0,0 +1,43 @@
+CPU cooling APIs How To
+===================================
+
+Written by Amit Daniel Kachhap <amit.kachhap@linaro.org>
+
+Updated: 6 Jan 2015
+
+Copyright (c) 2012 Samsung Electronics Co., Ltd(http://www.samsung.com)
+
+0. Introduction
+
+The generic cpu cooling(freq clipping) provides registration/unregistration APIs
+to the caller. The binding of the cooling devices to the trip point is left for
+the user. The registration APIs returns the cooling device pointer.
+
+1. cpu cooling APIs
+
+1.1 cpufreq registration/unregistration APIs
+1.1.1 struct thermal_cooling_device *cpufreq_cooling_register(
+ struct cpumask *clip_cpus)
+
+ This interface function registers the cpufreq cooling device with the name
+ "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
+ cooling devices.
+
+ clip_cpus: cpumask of cpus where the frequency constraints will happen.
+
+1.1.2 struct thermal_cooling_device *of_cpufreq_cooling_register(
+ struct device_node *np, const struct cpumask *clip_cpus)
+
+ This interface function registers the cpufreq cooling device with
+ the name "thermal-cpufreq-%x" linking it with a device tree node, in
+ order to bind it via the thermal DT code. This api can support multiple
+ instances of cpufreq cooling devices.
+
+ np: pointer to the cooling device device tree node
+ clip_cpus: cpumask of cpus where the frequency constraints will happen.
+
+1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
+
+ This interface function unregisters the "thermal-cpufreq-%x" cooling device.
+
+ cdev: Cooling device pointer which has to be unregistered.
diff --git a/Documentation/thermal/exynos_thermal b/Documentation/thermal/exynos_thermal
new file mode 100644
index 000000000..9010c4416
--- /dev/null
+++ b/Documentation/thermal/exynos_thermal
@@ -0,0 +1,77 @@
+Kernel driver exynos_tmu
+=================
+
+Supported chips:
+* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
+ Datasheet: Not publicly available
+
+Authors: Donggeun Kim <dg77.kim@samsung.com>
+Authors: Amit Daniel <amit.daniel@samsung.com>
+
+TMU controller Description:
+---------------------------
+
+This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
+
+The chip only exposes the measured 8-bit temperature code value
+through a register.
+Temperature can be taken from the temperature code.
+There are three equations converting from temperature to temperature code.
+
+The three equations are:
+ 1. Two point trimming
+ Tc = (T - 25) * (TI2 - TI1) / (85 - 25) + TI1
+
+ 2. One point trimming
+ Tc = T + TI1 - 25
+
+ 3. No trimming
+ Tc = T + 50
+
+ Tc: Temperature code, T: Temperature,
+ TI1: Trimming info for 25 degree Celsius (stored at TRIMINFO register)
+ Temperature code measured at 25 degree Celsius which is unchanged
+ TI2: Trimming info for 85 degree Celsius (stored at TRIMINFO register)
+ Temperature code measured at 85 degree Celsius which is unchanged
+
+TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
+when temperature exceeds pre-defined levels.
+The maximum number of configurable threshold is five.
+The threshold levels are defined as follows:
+ Level_0: current temperature > trigger_level_0 + threshold
+ Level_1: current temperature > trigger_level_1 + threshold
+ Level_2: current temperature > trigger_level_2 + threshold
+ Level_3: current temperature > trigger_level_3 + threshold
+
+ The threshold and each trigger_level are set
+ through the corresponding registers.
+
+When an interrupt occurs, this driver notify kernel thermal framework
+with the function exynos_report_trigger.
+Although an interrupt condition for level_0 can be set,
+it can be used to synchronize the cooling action.
+
+TMU driver description:
+-----------------------
+
+The exynos thermal driver is structured as,
+
+ Kernel Core thermal framework
+ (thermal_core.c, step_wise.c, cpu_cooling.c)
+ ^
+ |
+ |
+TMU configuration data -------> TMU Driver <------> Exynos Core thermal wrapper
+(exynos_tmu_data.c) (exynos_tmu.c) (exynos_thermal_common.c)
+(exynos_tmu_data.h) (exynos_tmu.h) (exynos_thermal_common.h)
+
+a) TMU configuration data: This consist of TMU register offsets/bitfields
+ described through structure exynos_tmu_registers. Also several
+ other platform data (struct exynos_tmu_platform_data) members
+ are used to configure the TMU.
+b) TMU driver: This component initialises the TMU controller and sets different
+ thresholds. It invokes core thermal implementation with the call
+ exynos_report_trigger.
+c) Exynos Core thermal wrapper: This provides 3 wrapper function to use the
+ Kernel core thermal framework. They are exynos_unregister_thermal,
+ exynos_register_thermal and exynos_report_trigger.
diff --git a/Documentation/thermal/exynos_thermal_emulation b/Documentation/thermal/exynos_thermal_emulation
new file mode 100644
index 000000000..b15efec6c
--- /dev/null
+++ b/Documentation/thermal/exynos_thermal_emulation
@@ -0,0 +1,53 @@
+EXYNOS EMULATION MODE
+========================
+
+Copyright (C) 2012 Samsung Electronics
+
+Written by Jonghwa Lee <jonghwa3.lee@samsung.com>
+
+Description
+-----------
+
+Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal management unit.
+Thermal emulation mode supports software debug for TMU's operation. User can set temperature
+manually with software code and TMU will read current temperature from user value not from
+sensor's value.
+
+Enabling CONFIG_THERMAL_EMULATION option will make this support available.
+When it's enabled, sysfs node will be created as
+/sys/devices/virtual/thermal/thermal_zone'zone id'/emul_temp.
+
+The sysfs node, 'emul_node', will contain value 0 for the initial state. When you input any
+temperature you want to update to sysfs node, it automatically enable emulation mode and
+current temperature will be changed into it.
+(Exynos also supports user changeable delay time which would be used to delay of
+ changing temperature. However, this node only uses same delay of real sensing time, 938us.)
+
+Exynos emulation mode requires synchronous of value changing and enabling. It means when you
+want to update the any value of delay or next temperature, then you have to enable emulation
+mode at the same time. (Or you have to keep the mode enabling.) If you don't, it fails to
+change the value to updated one and just use last succeessful value repeatedly. That's why
+this node gives users the right to change termerpature only. Just one interface makes it more
+simply to use.
+
+Disabling emulation mode only requires writing value 0 to sysfs node.
+
+
+TEMP 120 |
+ |
+ 100 |
+ |
+ 80 |
+ | +-----------
+ 60 | | |
+ | +-------------| |
+ 40 | | | |
+ | | | |
+ 20 | | | +----------
+ | | | | |
+ 0 |______________|_____________|__________|__________|_________
+ A A A A TIME
+ |<----->| |<----->| |<----->| |
+ | 938us | | | | | |
+emulation : 0 50 | 70 | 20 | 0
+current temp : sensor 50 70 20 sensor
diff --git a/Documentation/thermal/intel_powerclamp.txt b/Documentation/thermal/intel_powerclamp.txt
new file mode 100644
index 000000000..332de4a39
--- /dev/null
+++ b/Documentation/thermal/intel_powerclamp.txt
@@ -0,0 +1,307 @@
+ =======================
+ INTEL POWERCLAMP DRIVER
+ =======================
+By: Arjan van de Ven <arjan@linux.intel.com>
+ Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+Contents:
+ (*) Introduction
+ - Goals and Objectives
+
+ (*) Theory of Operation
+ - Idle Injection
+ - Calibration
+
+ (*) Performance Analysis
+ - Effectiveness and Limitations
+ - Power vs Performance
+ - Scalability
+ - Calibration
+ - Comparison with Alternative Techniques
+
+ (*) Usage and Interfaces
+ - Generic Thermal Layer (sysfs)
+ - Kernel APIs (TBD)
+
+============
+INTRODUCTION
+============
+
+Consider the situation where a system’s power consumption must be
+reduced at runtime, due to power budget, thermal constraint, or noise
+level, and where active cooling is not preferred. Software managed
+passive power reduction must be performed to prevent the hardware
+actions that are designed for catastrophic scenarios.
+
+Currently, P-states, T-states (clock modulation), and CPU offlining
+are used for CPU throttling.
+
+On Intel CPUs, C-states provide effective power reduction, but so far
+they’re only used opportunistically, based on workload. With the
+development of intel_powerclamp driver, the method of synchronizing
+idle injection across all online CPU threads was introduced. The goal
+is to achieve forced and controllable C-state residency.
+
+Test/Analysis has been made in the areas of power, performance,
+scalability, and user experience. In many cases, clear advantage is
+shown over taking the CPU offline or modulating the CPU clock.
+
+
+===================
+THEORY OF OPERATION
+===================
+
+Idle Injection
+--------------
+
+On modern Intel processors (Nehalem or later), package level C-state
+residency is available in MSRs, thus also available to the kernel.
+
+These MSRs are:
+ #define MSR_PKG_C2_RESIDENCY 0x60D
+ #define MSR_PKG_C3_RESIDENCY 0x3F8
+ #define MSR_PKG_C6_RESIDENCY 0x3F9
+ #define MSR_PKG_C7_RESIDENCY 0x3FA
+
+If the kernel can also inject idle time to the system, then a
+closed-loop control system can be established that manages package
+level C-state. The intel_powerclamp driver is conceived as such a
+control system, where the target set point is a user-selected idle
+ratio (based on power reduction), and the error is the difference
+between the actual package level C-state residency ratio and the target idle
+ratio.
+
+Injection is controlled by high priority kernel threads, spawned for
+each online CPU.
+
+These kernel threads, with SCHED_FIFO class, are created to perform
+clamping actions of controlled duty ratio and duration. Each per-CPU
+thread synchronizes its idle time and duration, based on the rounding
+of jiffies, so accumulated errors can be prevented to avoid a jittery
+effect. Threads are also bound to the CPU such that they cannot be
+migrated, unless the CPU is taken offline. In this case, threads
+belong to the offlined CPUs will be terminated immediately.
+
+Running as SCHED_FIFO and relatively high priority, also allows such
+scheme to work for both preemptable and non-preemptable kernels.
+Alignment of idle time around jiffies ensures scalability for HZ
+values. This effect can be better visualized using a Perf timechart.
+The following diagram shows the behavior of kernel thread
+kidle_inject/cpu. During idle injection, it runs monitor/mwait idle
+for a given "duration", then relinquishes the CPU to other tasks,
+until the next time interval.
+
+The NOHZ schedule tick is disabled during idle time, but interrupts
+are not masked. Tests show that the extra wakeups from scheduler tick
+have a dramatic impact on the effectiveness of the powerclamp driver
+on large scale systems (Westmere system with 80 processors).
+
+CPU0
+ ____________ ____________
+kidle_inject/0 | sleep | mwait | sleep |
+ _________| |________| |_______
+ duration
+CPU1
+ ____________ ____________
+kidle_inject/1 | sleep | mwait | sleep |
+ _________| |________| |_______
+ ^
+ |
+ |
+ roundup(jiffies, interval)
+
+Only one CPU is allowed to collect statistics and update global
+control parameters. This CPU is referred to as the controlling CPU in
+this document. The controlling CPU is elected at runtime, with a
+policy that favors BSP, taking into account the possibility of a CPU
+hot-plug.
+
+In terms of dynamics of the idle control system, package level idle
+time is considered largely as a non-causal system where its behavior
+cannot be based on the past or current input. Therefore, the
+intel_powerclamp driver attempts to enforce the desired idle time
+instantly as given input (target idle ratio). After injection,
+powerclamp moniors the actual idle for a given time window and adjust
+the next injection accordingly to avoid over/under correction.
+
+When used in a causal control system, such as a temperature control,
+it is up to the user of this driver to implement algorithms where
+past samples and outputs are included in the feedback. For example, a
+PID-based thermal controller can use the powerclamp driver to
+maintain a desired target temperature, based on integral and
+derivative gains of the past samples.
+
+
+
+Calibration
+-----------
+During scalability testing, it is observed that synchronized actions
+among CPUs become challenging as the number of cores grows. This is
+also true for the ability of a system to enter package level C-states.
+
+To make sure the intel_powerclamp driver scales well, online
+calibration is implemented. The goals for doing such a calibration
+are:
+
+a) determine the effective range of idle injection ratio
+b) determine the amount of compensation needed at each target ratio
+
+Compensation to each target ratio consists of two parts:
+
+ a) steady state error compensation
+ This is to offset the error occurring when the system can
+ enter idle without extra wakeups (such as external interrupts).
+
+ b) dynamic error compensation
+ When an excessive amount of wakeups occurs during idle, an
+ additional idle ratio can be added to quiet interrupts, by
+ slowing down CPU activities.
+
+A debugfs file is provided for the user to examine compensation
+progress and results, such as on a Westmere system.
+[jacob@nex01 ~]$ cat
+/sys/kernel/debug/intel_powerclamp/powerclamp_calib
+controlling cpu: 0
+pct confidence steady dynamic (compensation)
+0 0 0 0
+1 1 0 0
+2 1 1 0
+3 3 1 0
+4 3 1 0
+5 3 1 0
+6 3 1 0
+7 3 1 0
+8 3 1 0
+...
+30 3 2 0
+31 3 2 0
+32 3 1 0
+33 3 2 0
+34 3 1 0
+35 3 2 0
+36 3 1 0
+37 3 2 0
+38 3 1 0
+39 3 2 0
+40 3 3 0
+41 3 1 0
+42 3 2 0
+43 3 1 0
+44 3 1 0
+45 3 2 0
+46 3 3 0
+47 3 0 0
+48 3 2 0
+49 3 3 0
+
+Calibration occurs during runtime. No offline method is available.
+Steady state compensation is used only when confidence levels of all
+adjacent ratios have reached satisfactory level. A confidence level
+is accumulated based on clean data collected at runtime. Data
+collected during a period without extra interrupts is considered
+clean.
+
+To compensate for excessive amounts of wakeup during idle, additional
+idle time is injected when such a condition is detected. Currently,
+we have a simple algorithm to double the injection ratio. A possible
+enhancement might be to throttle the offending IRQ, such as delaying
+EOI for level triggered interrupts. But it is a challenge to be
+non-intrusive to the scheduler or the IRQ core code.
+
+
+CPU Online/Offline
+------------------
+Per-CPU kernel threads are started/stopped upon receiving
+notifications of CPU hotplug activities. The intel_powerclamp driver
+keeps track of clamping kernel threads, even after they are migrated
+to other CPUs, after a CPU offline event.
+
+
+=====================
+Performance Analysis
+=====================
+This section describes the general performance data collected on
+multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P).
+
+Effectiveness and Limitations
+-----------------------------
+The maximum range that idle injection is allowed is capped at 50
+percent. As mentioned earlier, since interrupts are allowed during
+forced idle time, excessive interrupts could result in less
+effectiveness. The extreme case would be doing a ping -f to generated
+flooded network interrupts without much CPU acknowledgement. In this
+case, little can be done from the idle injection threads. In most
+normal cases, such as scp a large file, applications can be throttled
+by the powerclamp driver, since slowing down the CPU also slows down
+network protocol processing, which in turn reduces interrupts.
+
+When control parameters change at runtime by the controlling CPU, it
+may take an additional period for the rest of the CPUs to catch up
+with the changes. During this time, idle injection is out of sync,
+thus not able to enter package C- states at the expected ratio. But
+this effect is minor, in that in most cases change to the target
+ratio is updated much less frequently than the idle injection
+frequency.
+
+Scalability
+-----------
+Tests also show a minor, but measurable, difference between the 4P/8P
+Ivy Bridge system and the 80P Westmere server under 50% idle ratio.
+More compensation is needed on Westmere for the same amount of
+target idle ratio. The compensation also increases as the idle ratio
+gets larger. The above reason constitutes the need for the
+calibration code.
+
+On the IVB 8P system, compared to an offline CPU, powerclamp can
+achieve up to 40% better performance per watt. (measured by a spin
+counter summed over per CPU counting threads spawned for all running
+CPUs).
+
+====================
+Usage and Interfaces
+====================
+The powerclamp driver is registered to the generic thermal layer as a
+cooling device. Currently, it’s not bound to any thermal zones.
+
+jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . *
+cur_state:0
+max_state:50
+type:intel_powerclamp
+
+Example usage:
+- To inject 25% idle time
+$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state
+"
+
+If the system is not busy and has more than 25% idle time already,
+then the powerclamp driver will not start idle injection. Using Top
+will not show idle injection kernel threads.
+
+If the system is busy (spin test below) and has less than 25% natural
+idle time, powerclamp kernel threads will do idle injection, which
+appear running to the scheduler. But the overall system idle is still
+reflected. In this example, 24.1% idle is shown. This helps the
+system admin or user determine the cause of slowdown, when a
+powerclamp driver is in action.
+
+
+Tasks: 197 total, 1 running, 196 sleeping, 0 stopped, 0 zombie
+Cpu(s): 71.2%us, 4.7%sy, 0.0%ni, 24.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st
+Mem: 3943228k total, 1689632k used, 2253596k free, 74960k buffers
+Swap: 4087804k total, 0k used, 4087804k free, 945336k cached
+
+ PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
+ 3352 jacob 20 0 262m 644 428 S 286 0.0 0:17.16 spin
+ 3341 root -51 0 0 0 0 D 25 0.0 0:01.62 kidle_inject/0
+ 3344 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/3
+ 3342 root -51 0 0 0 0 D 25 0.0 0:01.61 kidle_inject/1
+ 3343 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/2
+ 2935 jacob 20 0 696m 125m 35m S 5 3.3 0:31.11 firefox
+ 1546 root 20 0 158m 20m 6640 S 3 0.5 0:26.97 Xorg
+ 2100 jacob 20 0 1223m 88m 30m S 3 2.3 0:23.68 compiz
+
+Tests have shown that by using the powerclamp driver as a cooling
+device, a PID based userspace thermal controller can manage to
+control CPU temperature effectively, when no other thermal influence
+is added. For example, a UltraBook user can compile the kernel under
+certain temperature (below most active trip points).
diff --git a/Documentation/thermal/nouveau_thermal b/Documentation/thermal/nouveau_thermal
new file mode 100644
index 000000000..60bc29357
--- /dev/null
+++ b/Documentation/thermal/nouveau_thermal
@@ -0,0 +1,82 @@
+Kernel driver nouveau
+===================
+
+Supported chips:
+* NV43+
+
+Authors: Martin Peres (mupuf) <martin.peres@free.fr>
+
+Description
+---------
+
+This driver allows to read the GPU core temperature, drive the GPU fan and
+set temperature alarms.
+
+Currently, due to the absence of in-kernel API to access HWMON drivers, Nouveau
+cannot access any of the i2c external monitoring chips it may find. If you
+have one of those, temperature and/or fan management through Nouveau's HWMON
+interface is likely not to work. This document may then not cover your situation
+entirely.
+
+Temperature management
+--------------------
+
+Temperature is exposed under as a read-only HWMON attribute temp1_input.
+
+In order to protect the GPU from overheating, Nouveau supports 4 configurable
+temperature thresholds:
+
+ * Fan_boost: Fan speed is set to 100% when reaching this temperature;
+ * Downclock: The GPU will be downclocked to reduce its power dissipation;
+ * Critical: The GPU is put on hold to further lower power dissipation;
+ * Shutdown: Shut the computer down to protect your GPU.
+
+WARNING: Some of these thresholds may not be used by Nouveau depending
+on your chipset.
+
+The default value for these thresholds comes from the GPU's vbios. These
+thresholds can be configured thanks to the following HWMON attributes:
+
+ * Fan_boost: temp1_auto_point1_temp and temp1_auto_point1_temp_hyst;
+ * Downclock: temp1_max and temp1_max_hyst;
+ * Critical: temp1_crit and temp1_crit_hyst;
+ * Shutdown: temp1_emergency and temp1_emergency_hyst.
+
+NOTE: Remember that the values are stored as milli degrees Celcius. Don't forget
+to multiply!
+
+Fan management
+------------
+
+Not all cards have a drivable fan. If you do, then the following HWMON
+attributes should be available:
+
+ * pwm1_enable: Current fan management mode (NONE, MANUAL or AUTO);
+ * pwm1: Current PWM value (power percentage);
+ * pwm1_min: The minimum PWM speed allowed;
+ * pwm1_max: The maximum PWM speed allowed (bypassed when hitting Fan_boost);
+
+You may also have the following attribute:
+
+ * fan1_input: Speed in RPM of your fan.
+
+Your fan can be driven in different modes:
+
+ * 0: The fan is left untouched;
+ * 1: The fan can be driven in manual (use pwm1 to change the speed);
+ * 2; The fan is driven automatically depending on the temperature.
+
+NOTE: Be sure to use the manual mode if you want to drive the fan speed manually
+
+NOTE2: When operating in manual mode outside the vbios-defined
+[PWM_min, PWM_max] range, the reported fan speed (RPM) may not be accurate
+depending on your hardware.
+
+Bug reports
+---------
+
+Thermal management on Nouveau is new and may not work on all cards. If you have
+inquiries, please ping mupuf on IRC (#nouveau, freenode).
+
+Bug reports should be filled on Freedesktop's bug tracker. Please follow
+http://nouveau.freedesktop.org/wiki/Bugs
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
new file mode 100644
index 000000000..87519cb37
--- /dev/null
+++ b/Documentation/thermal/sysfs-api.txt
@@ -0,0 +1,401 @@
+Generic Thermal Sysfs driver How To
+===================================
+
+Written by Sujith Thomas <sujith.thomas@intel.com>, Zhang Rui <rui.zhang@intel.com>
+
+Updated: 2 January 2008
+
+Copyright (c) 2008 Intel Corporation
+
+
+0. Introduction
+
+The generic thermal sysfs provides a set of interfaces for thermal zone
+devices (sensors) and thermal cooling devices (fan, processor...) to register
+with the thermal management solution and to be a part of it.
+
+This how-to focuses on enabling new thermal zone and cooling devices to
+participate in thermal management.
+This solution is platform independent and any type of thermal zone devices
+and cooling devices should be able to make use of the infrastructure.
+
+The main task of the thermal sysfs driver is to expose thermal zone attributes
+as well as cooling device attributes to the user space.
+An intelligent thermal management application can make decisions based on
+inputs from thermal zone attributes (the current temperature and trip point
+temperature) and throttle appropriate devices.
+
+[0-*] denotes any positive number starting from 0
+[1-*] denotes any positive number starting from 1
+
+1. thermal sysfs driver interface functions
+
+1.1 thermal zone device interface
+1.1.1 struct thermal_zone_device *thermal_zone_device_register(char *type,
+ int trips, int mask, void *devdata,
+ struct thermal_zone_device_ops *ops,
+ const struct thermal_zone_params *tzp,
+ int passive_delay, int polling_delay))
+
+ This interface function adds a new thermal zone device (sensor) to
+ /sys/class/thermal folder as thermal_zone[0-*]. It tries to bind all the
+ thermal cooling devices registered at the same time.
+
+ type: the thermal zone type.
+ trips: the total number of trip points this thermal zone supports.
+ mask: Bit string: If 'n'th bit is set, then trip point 'n' is writeable.
+ devdata: device private data
+ ops: thermal zone device call-backs.
+ .bind: bind the thermal zone device with a thermal cooling device.
+ .unbind: unbind the thermal zone device with a thermal cooling device.
+ .get_temp: get the current temperature of the thermal zone.
+ .get_mode: get the current mode (enabled/disabled) of the thermal zone.
+ - "enabled" means the kernel thermal management is enabled.
+ - "disabled" will prevent kernel thermal driver action upon trip points
+ so that user applications can take charge of thermal management.
+ .set_mode: set the mode (enabled/disabled) of the thermal zone.
+ .get_trip_type: get the type of certain trip point.
+ .get_trip_temp: get the temperature above which the certain trip point
+ will be fired.
+ .set_emul_temp: set the emulation temperature which helps in debugging
+ different threshold temperature points.
+ tzp: thermal zone platform parameters.
+ passive_delay: number of milliseconds to wait between polls when
+ performing passive cooling.
+ polling_delay: number of milliseconds to wait between polls when checking
+ whether trip points have been crossed (0 for interrupt driven systems).
+
+
+1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz)
+
+ This interface function removes the thermal zone device.
+ It deletes the corresponding entry form /sys/class/thermal folder and
+ unbind all the thermal cooling devices it uses.
+
+1.2 thermal cooling device interface
+1.2.1 struct thermal_cooling_device *thermal_cooling_device_register(char *name,
+ void *devdata, struct thermal_cooling_device_ops *)
+
+ This interface function adds a new thermal cooling device (fan/processor/...)
+ to /sys/class/thermal/ folder as cooling_device[0-*]. It tries to bind itself
+ to all the thermal zone devices register at the same time.
+ name: the cooling device name.
+ devdata: device private data.
+ ops: thermal cooling devices call-backs.
+ .get_max_state: get the Maximum throttle state of the cooling device.
+ .get_cur_state: get the Current throttle state of the cooling device.
+ .set_cur_state: set the Current throttle state of the cooling device.
+
+1.2.2 void thermal_cooling_device_unregister(struct thermal_cooling_device *cdev)
+
+ This interface function remove the thermal cooling device.
+ It deletes the corresponding entry form /sys/class/thermal folder and
+ unbind itself from all the thermal zone devices using it.
+
+1.3 interface for binding a thermal zone device with a thermal cooling device
+1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz,
+ int trip, struct thermal_cooling_device *cdev,
+ unsigned long upper, unsigned long lower);
+
+ This interface function bind a thermal cooling device to the certain trip
+ point of a thermal zone device.
+ This function is usually called in the thermal zone device .bind callback.
+ tz: the thermal zone device
+ cdev: thermal cooling device
+ trip: indicates which trip point the cooling devices is associated with
+ in this thermal zone.
+ upper:the Maximum cooling state for this trip point.
+ THERMAL_NO_LIMIT means no upper limit,
+ and the cooling device can be in max_state.
+ lower:the Minimum cooling state can be used for this trip point.
+ THERMAL_NO_LIMIT means no lower limit,
+ and the cooling device can be in cooling state 0.
+
+1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz,
+ int trip, struct thermal_cooling_device *cdev);
+
+ This interface function unbind a thermal cooling device from the certain
+ trip point of a thermal zone device. This function is usually called in
+ the thermal zone device .unbind callback.
+ tz: the thermal zone device
+ cdev: thermal cooling device
+ trip: indicates which trip point the cooling devices is associated with
+ in this thermal zone.
+
+1.4 Thermal Zone Parameters
+1.4.1 struct thermal_bind_params
+ This structure defines the following parameters that are used to bind
+ a zone with a cooling device for a particular trip point.
+ .cdev: The cooling device pointer
+ .weight: The 'influence' of a particular cooling device on this zone.
+ This is on a percentage scale. The sum of all these weights
+ (for a particular zone) cannot exceed 100.
+ .trip_mask:This is a bit mask that gives the binding relation between
+ this thermal zone and cdev, for a particular trip point.
+ If nth bit is set, then the cdev and thermal zone are bound
+ for trip point n.
+ .limits: This is an array of cooling state limits. Must have exactly
+ 2 * thermal_zone.number_of_trip_points. It is an array consisting
+ of tuples <lower-state upper-state> of state limits. Each trip
+ will be associated with one state limit tuple when binding.
+ A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS>
+ on all trips. These limits are used when binding a cdev to a
+ trip point.
+ .match: This call back returns success(0) if the 'tz and cdev' need to
+ be bound, as per platform data.
+1.4.2 struct thermal_zone_params
+ This structure defines the platform level parameters for a thermal zone.
+ This data, for each thermal zone should come from the platform layer.
+ This is an optional feature where some platforms can choose not to
+ provide this data.
+ .governor_name: Name of the thermal governor used for this zone
+ .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface
+ is required. when no_hwmon == false, a hwmon sysfs interface
+ will be created. when no_hwmon == true, nothing will be done.
+ In case the thermal_zone_params is NULL, the hwmon interface
+ will be created (for backward compatibility).
+ .num_tbps: Number of thermal_bind_params entries for this zone
+ .tbp: thermal_bind_params entries
+
+2. sysfs attributes structure
+
+RO read only value
+RW read/write value
+
+Thermal sysfs attributes will be represented under /sys/class/thermal.
+Hwmon sysfs I/F extension is also available under /sys/class/hwmon
+if hwmon is compiled in or built as a module.
+
+Thermal zone device sys I/F, created once it's registered:
+/sys/class/thermal/thermal_zone[0-*]:
+ |---type: Type of the thermal zone
+ |---temp: Current temperature
+ |---mode: Working mode of the thermal zone
+ |---policy: Thermal governor used for this zone
+ |---trip_point_[0-*]_temp: Trip point temperature
+ |---trip_point_[0-*]_type: Trip point type
+ |---trip_point_[0-*]_hyst: Hysteresis value for this trip point
+ |---emul_temp: Emulated temperature set node
+
+Thermal cooling device sys I/F, created once it's registered:
+/sys/class/thermal/cooling_device[0-*]:
+ |---type: Type of the cooling device(processor/fan/...)
+ |---max_state: Maximum cooling state of the cooling device
+ |---cur_state: Current cooling state of the cooling device
+
+
+Then next two dynamic attributes are created/removed in pairs. They represent
+the relationship between a thermal zone and its associated cooling device.
+They are created/removed for each successful execution of
+thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device.
+
+/sys/class/thermal/thermal_zone[0-*]:
+ |---cdev[0-*]: [0-*]th cooling device in current thermal zone
+ |---cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with
+
+Besides the thermal zone device sysfs I/F and cooling device sysfs I/F,
+the generic thermal driver also creates a hwmon sysfs I/F for each _type_
+of thermal zone device. E.g. the generic thermal driver registers one hwmon
+class device and build the associated hwmon sysfs I/F for all the registered
+ACPI thermal zones.
+
+/sys/class/hwmon/hwmon[0-*]:
+ |---name: The type of the thermal zone devices
+ |---temp[1-*]_input: The current temperature of thermal zone [1-*]
+ |---temp[1-*]_critical: The critical trip point of thermal zone [1-*]
+
+Please read Documentation/hwmon/sysfs-interface for additional information.
+
+***************************
+* Thermal zone attributes *
+***************************
+
+type
+ Strings which represent the thermal zone type.
+ This is given by thermal zone driver as part of registration.
+ E.g: "acpitz" indicates it's an ACPI thermal device.
+ In order to keep it consistent with hwmon sys attribute; this should
+ be a short, lowercase string, not containing spaces nor dashes.
+ RO, Required
+
+temp
+ Current temperature as reported by thermal zone (sensor).
+ Unit: millidegree Celsius
+ RO, Required
+
+mode
+ One of the predefined values in [enabled, disabled].
+ This file gives information about the algorithm that is currently
+ managing the thermal zone. It can be either default kernel based
+ algorithm or user space application.
+ enabled = enable Kernel Thermal management.
+ disabled = Preventing kernel thermal zone driver actions upon
+ trip points so that user application can take full
+ charge of the thermal management.
+ RW, Optional
+
+policy
+ One of the various thermal governors used for a particular zone.
+ RW, Required
+
+trip_point_[0-*]_temp
+ The temperature above which trip point will be fired.
+ Unit: millidegree Celsius
+ RO, Optional
+
+trip_point_[0-*]_type
+ Strings which indicate the type of the trip point.
+ E.g. it can be one of critical, hot, passive, active[0-*] for ACPI
+ thermal zone.
+ RO, Optional
+
+trip_point_[0-*]_hyst
+ The hysteresis value for a trip point, represented as an integer
+ Unit: Celsius
+ RW, Optional
+
+cdev[0-*]
+ Sysfs link to the thermal cooling device node where the sys I/F
+ for cooling device throttling control represents.
+ RO, Optional
+
+cdev[0-*]_trip_point
+ The trip point with which cdev[0-*] is associated in this thermal
+ zone; -1 means the cooling device is not associated with any trip
+ point.
+ RO, Optional
+
+passive
+ Attribute is only present for zones in which the passive cooling
+ policy is not supported by native thermal driver. Default is zero
+ and can be set to a temperature (in millidegrees) to enable a
+ passive trip point for the zone. Activation is done by polling with
+ an interval of 1 second.
+ Unit: millidegrees Celsius
+ Valid values: 0 (disabled) or greater than 1000
+ RW, Optional
+
+emul_temp
+ Interface to set the emulated temperature method in thermal zone
+ (sensor). After setting this temperature, the thermal zone may pass
+ this temperature to platform emulation function if registered or
+ cache it locally. This is useful in debugging different temperature
+ threshold and its associated cooling action. This is write only node
+ and writing 0 on this node should disable emulation.
+ Unit: millidegree Celsius
+ WO, Optional
+
+ WARNING: Be careful while enabling this option on production systems,
+ because userland can easily disable the thermal policy by simply
+ flooding this sysfs node with low temperature values.
+
+*****************************
+* Cooling device attributes *
+*****************************
+
+type
+ String which represents the type of device, e.g:
+ - for generic ACPI: should be "Fan", "Processor" or "LCD"
+ - for memory controller device on intel_menlow platform:
+ should be "Memory controller".
+ RO, Required
+
+max_state
+ The maximum permissible cooling state of this cooling device.
+ RO, Required
+
+cur_state
+ The current cooling state of this cooling device.
+ The value can any integer numbers between 0 and max_state:
+ - cur_state == 0 means no cooling
+ - cur_state == max_state means the maximum cooling.
+ RW, Required
+
+3. A simple implementation
+
+ACPI thermal zone may support multiple trip points like critical, hot,
+passive, active. If an ACPI thermal zone supports critical, passive,
+active[0] and active[1] at the same time, it may register itself as a
+thermal_zone_device (thermal_zone1) with 4 trip points in all.
+It has one processor and one fan, which are both registered as
+thermal_cooling_device.
+
+If the processor is listed in _PSL method, and the fan is listed in _AL0
+method, the sys I/F structure will be built like this:
+
+/sys/class/thermal:
+
+|thermal_zone1:
+ |---type: acpitz
+ |---temp: 37000
+ |---mode: enabled
+ |---policy: step_wise
+ |---trip_point_0_temp: 100000
+ |---trip_point_0_type: critical
+ |---trip_point_1_temp: 80000
+ |---trip_point_1_type: passive
+ |---trip_point_2_temp: 70000
+ |---trip_point_2_type: active0
+ |---trip_point_3_temp: 60000
+ |---trip_point_3_type: active1
+ |---cdev0: --->/sys/class/thermal/cooling_device0
+ |---cdev0_trip_point: 1 /* cdev0 can be used for passive */
+ |---cdev1: --->/sys/class/thermal/cooling_device3
+ |---cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/
+
+|cooling_device0:
+ |---type: Processor
+ |---max_state: 8
+ |---cur_state: 0
+
+|cooling_device3:
+ |---type: Fan
+ |---max_state: 2
+ |---cur_state: 0
+
+/sys/class/hwmon:
+
+|hwmon0:
+ |---name: acpitz
+ |---temp1_input: 37000
+ |---temp1_crit: 100000
+
+4. Event Notification
+
+The framework includes a simple notification mechanism, in the form of a
+netlink event. Netlink socket initialization is done during the _init_
+of the framework. Drivers which intend to use the notification mechanism
+just need to call thermal_generate_netlink_event() with two arguments viz
+(originator, event). The originator is a pointer to struct thermal_zone_device
+from where the event has been originated. An integer which represents the
+thermal zone device will be used in the message to identify the zone. The
+event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL,
+THERMAL_DEV_FAULT}. Notification can be sent when the current temperature
+crosses any of the configured thresholds.
+
+5. Export Symbol APIs:
+
+5.1: get_tz_trend:
+This function returns the trend of a thermal zone, i.e the rate of change
+of temperature of the thermal zone. Ideally, the thermal sensor drivers
+are supposed to implement the callback. If they don't, the thermal
+framework calculated the trend by comparing the previous and the current
+temperature values.
+
+5.2:get_thermal_instance:
+This function returns the thermal_instance corresponding to a given
+{thermal_zone, cooling_device, trip_point} combination. Returns NULL
+if such an instance does not exist.
+
+5.3:thermal_notify_framework:
+This function handles the trip events from sensor drivers. It starts
+throttling the cooling devices according to the policy configured.
+For CRITICAL and HOT trip points, this notifies the respective drivers,
+and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
+The throttling policy is based on the configured platform data; if no
+platform data is provided, this uses the step_wise throttling policy.
+
+5.4:thermal_cdev_update:
+This function serves as an arbitrator to set the state of a cooling
+device. It sets the cooling device to the deepest cooling state if
+possible.
diff --git a/Documentation/thermal/x86_pkg_temperature_thermal b/Documentation/thermal/x86_pkg_temperature_thermal
new file mode 100644
index 000000000..17a3a4c0a
--- /dev/null
+++ b/Documentation/thermal/x86_pkg_temperature_thermal
@@ -0,0 +1,47 @@
+Kernel driver: x86_pkg_temp_thermal
+===================
+
+Supported chips:
+* x86: with package level thermal management
+(Verify using: CPUID.06H:EAX[bit 6] =1)
+
+Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
+
+Reference
+---
+Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013):
+Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT
+
+Description
+---------
+
+This driver register CPU digital temperature package level sensor as a thermal
+zone with maximum two user mode configurable trip points. Number of trip points
+depends on the capability of the package. Once the trip point is violated,
+user mode can receive notification via thermal notification mechanism and can
+take any action to control temperature.
+
+
+Threshold management
+--------------------
+Each package will register as a thermal zone under /sys/class/thermal.
+Example:
+/sys/class/thermal/thermal_zone1
+
+This contains two trip points:
+- trip_point_0_temp
+- trip_point_1_temp
+
+User can set any temperature between 0 to TJ-Max temperature. Temperature units
+are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.txt" for
+thermal sys-fs details.
+
+Any value other than 0 in these trip points, can trigger thermal notifications.
+Setting 0, stops sending thermal notifications.
+
+Thermal notifications: To get kobject-uevent notifications, set the thermal zone
+policy to "user_space". For example: echo -n "user_space" > policy
+
+
+
+
diff --git a/Documentation/this_cpu_ops.txt b/Documentation/this_cpu_ops.txt
new file mode 100644
index 000000000..2cbf71975
--- /dev/null
+++ b/Documentation/this_cpu_ops.txt
@@ -0,0 +1,332 @@
+this_cpu operations
+-------------------
+
+this_cpu operations are a way of optimizing access to per cpu
+variables associated with the *currently* executing processor. This is
+done through the use of segment registers (or a dedicated register where
+the cpu permanently stored the beginning of the per cpu area for a
+specific processor).
+
+this_cpu operations add a per cpu variable offset to the processor
+specific per cpu base and encode that operation in the instruction
+operating on the per cpu variable.
+
+This means that there are no atomicity issues between the calculation of
+the offset and the operation on the data. Therefore it is not
+necessary to disable preemption or interrupts to ensure that the
+processor is not changed between the calculation of the address and
+the operation on the data.
+
+Read-modify-write operations are of particular interest. Frequently
+processors have special lower latency instructions that can operate
+without the typical synchronization overhead, but still provide some
+sort of relaxed atomicity guarantees. The x86, for example, can execute
+RMW (Read Modify Write) instructions like inc/dec/cmpxchg without the
+lock prefix and the associated latency penalty.
+
+Access to the variable without the lock prefix is not synchronized but
+synchronization is not necessary since we are dealing with per cpu
+data specific to the currently executing processor. Only the current
+processor should be accessing that variable and therefore there are no
+concurrency issues with other processors in the system.
+
+Please note that accesses by remote processors to a per cpu area are
+exceptional situations and may impact performance and/or correctness
+(remote write operations) of local RMW operations via this_cpu_*.
+
+The main use of the this_cpu operations has been to optimize counter
+operations.
+
+The following this_cpu() operations with implied preemption protection
+are defined. These operations can be used without worrying about
+preemption and interrupts.
+
+ this_cpu_read(pcp)
+ this_cpu_write(pcp, val)
+ this_cpu_add(pcp, val)
+ this_cpu_and(pcp, val)
+ this_cpu_or(pcp, val)
+ this_cpu_add_return(pcp, val)
+ this_cpu_xchg(pcp, nval)
+ this_cpu_cmpxchg(pcp, oval, nval)
+ this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+ this_cpu_sub(pcp, val)
+ this_cpu_inc(pcp)
+ this_cpu_dec(pcp)
+ this_cpu_sub_return(pcp, val)
+ this_cpu_inc_return(pcp)
+ this_cpu_dec_return(pcp)
+
+
+Inner working of this_cpu operations
+------------------------------------
+
+On x86 the fs: or the gs: segment registers contain the base of the
+per cpu area. It is then possible to simply use the segment override
+to relocate a per cpu relative address to the proper per cpu area for
+the processor. So the relocation to the per cpu base is encoded in the
+instruction via a segment register prefix.
+
+For example:
+
+ DEFINE_PER_CPU(int, x);
+ int z;
+
+ z = this_cpu_read(x);
+
+results in a single instruction
+
+ mov ax, gs:[x]
+
+instead of a sequence of calculation of the address and then a fetch
+from that address which occurs with the per cpu operations. Before
+this_cpu_ops such sequence also required preempt disable/enable to
+prevent the kernel from moving the thread to a different processor
+while the calculation is performed.
+
+Consider the following this_cpu operation:
+
+ this_cpu_inc(x)
+
+The above results in the following single instruction (no lock prefix!)
+
+ inc gs:[x]
+
+instead of the following operations required if there is no segment
+register:
+
+ int *y;
+ int cpu;
+
+ cpu = get_cpu();
+ y = per_cpu_ptr(&x, cpu);
+ (*y)++;
+ put_cpu();
+
+Note that these operations can only be used on per cpu data that is
+reserved for a specific processor. Without disabling preemption in the
+surrounding code this_cpu_inc() will only guarantee that one of the
+per cpu counters is correctly incremented. However, there is no
+guarantee that the OS will not move the process directly before or
+after the this_cpu instruction is executed. In general this means that
+the value of the individual counters for each processor are
+meaningless. The sum of all the per cpu counters is the only value
+that is of interest.
+
+Per cpu variables are used for performance reasons. Bouncing cache
+lines can be avoided if multiple processors concurrently go through
+the same code paths. Since each processor has its own per cpu
+variables no concurrent cache line updates take place. The price that
+has to be paid for this optimization is the need to add up the per cpu
+counters when the value of a counter is needed.
+
+
+Special operations:
+-------------------
+
+ y = this_cpu_ptr(&x)
+
+Takes the offset of a per cpu variable (&x !) and returns the address
+of the per cpu variable that belongs to the currently executing
+processor. this_cpu_ptr avoids multiple steps that the common
+get_cpu/put_cpu sequence requires. No processor number is
+available. Instead, the offset of the local per cpu area is simply
+added to the per cpu offset.
+
+Note that this operation is usually used in a code segment when
+preemption has been disabled. The pointer is then used to
+access local per cpu data in a critical section. When preemption
+is re-enabled this pointer is usually no longer useful since it may
+no longer point to per cpu data of the current processor.
+
+
+Per cpu variables and offsets
+-----------------------------
+
+Per cpu variables have *offsets* to the beginning of the per cpu
+area. They do not have addresses although they look like that in the
+code. Offsets cannot be directly dereferenced. The offset must be
+added to a base pointer of a per cpu area of a processor in order to
+form a valid address.
+
+Therefore the use of x or &x outside of the context of per cpu
+operations is invalid and will generally be treated like a NULL
+pointer dereference.
+
+ DEFINE_PER_CPU(int, x);
+
+In the context of per cpu operations the above implies that x is a per
+cpu variable. Most this_cpu operations take a cpu variable.
+
+ int __percpu *p = &x;
+
+&x and hence p is the *offset* of a per cpu variable. this_cpu_ptr()
+takes the offset of a per cpu variable which makes this look a bit
+strange.
+
+
+Operations on a field of a per cpu structure
+--------------------------------------------
+
+Let's say we have a percpu structure
+
+ struct s {
+ int n,m;
+ };
+
+ DEFINE_PER_CPU(struct s, p);
+
+
+Operations on these fields are straightforward
+
+ this_cpu_inc(p.m)
+
+ z = this_cpu_cmpxchg(p.m, 0, 1);
+
+
+If we have an offset to struct s:
+
+ struct s __percpu *ps = &p;
+
+ this_cpu_dec(ps->m);
+
+ z = this_cpu_inc_return(ps->n);
+
+
+The calculation of the pointer may require the use of this_cpu_ptr()
+if we do not make use of this_cpu ops later to manipulate fields:
+
+ struct s *pp;
+
+ pp = this_cpu_ptr(&p);
+
+ pp->m--;
+
+ z = pp->n++;
+
+
+Variants of this_cpu ops
+-------------------------
+
+this_cpu ops are interrupt safe. Some architectures do not support
+these per cpu local operations. In that case the operation must be
+replaced by code that disables interrupts, then does the operations
+that are guaranteed to be atomic and then re-enable interrupts. Doing
+so is expensive. If there are other reasons why the scheduler cannot
+change the processor we are executing on then there is no reason to
+disable interrupts. For that purpose the following __this_cpu operations
+are provided.
+
+These operations have no guarantee against concurrent interrupts or
+preemption. If a per cpu variable is not used in an interrupt context
+and the scheduler cannot preempt, then they are safe. If any interrupts
+still occur while an operation is in progress and if the interrupt too
+modifies the variable, then RMW actions can not be guaranteed to be
+safe.
+
+ __this_cpu_read(pcp)
+ __this_cpu_write(pcp, val)
+ __this_cpu_add(pcp, val)
+ __this_cpu_and(pcp, val)
+ __this_cpu_or(pcp, val)
+ __this_cpu_add_return(pcp, val)
+ __this_cpu_xchg(pcp, nval)
+ __this_cpu_cmpxchg(pcp, oval, nval)
+ __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2)
+ __this_cpu_sub(pcp, val)
+ __this_cpu_inc(pcp)
+ __this_cpu_dec(pcp)
+ __this_cpu_sub_return(pcp, val)
+ __this_cpu_inc_return(pcp)
+ __this_cpu_dec_return(pcp)
+
+
+Will increment x and will not fall-back to code that disables
+interrupts on platforms that cannot accomplish atomicity through
+address relocation and a Read-Modify-Write operation in the same
+instruction.
+
+
+&this_cpu_ptr(pp)->n vs this_cpu_ptr(&pp->n)
+--------------------------------------------
+
+The first operation takes the offset and forms an address and then
+adds the offset of the n field. This may result in two add
+instructions emitted by the compiler.
+
+The second one first adds the two offsets and then does the
+relocation. IMHO the second form looks cleaner and has an easier time
+with (). The second form also is consistent with the way
+this_cpu_read() and friends are used.
+
+
+Remote access to per cpu data
+------------------------------
+
+Per cpu data structures are designed to be used by one cpu exclusively.
+If you use the variables as intended, this_cpu_ops() are guaranteed to
+be "atomic" as no other CPU has access to these data structures.
+
+There are special cases where you might need to access per cpu data
+structures remotely. It is usually safe to do a remote read access
+and that is frequently done to summarize counters. Remote write access
+something which could be problematic because this_cpu ops do not
+have lock semantics. A remote write may interfere with a this_cpu
+RMW operation.
+
+Remote write accesses to percpu data structures are highly discouraged
+unless absolutely necessary. Please consider using an IPI to wake up
+the remote CPU and perform the update to its per cpu area.
+
+To access per-cpu data structure remotely, typically the per_cpu_ptr()
+function is used:
+
+
+ DEFINE_PER_CPU(struct data, datap);
+
+ struct data *p = per_cpu_ptr(&datap, cpu);
+
+This makes it explicit that we are getting ready to access a percpu
+area remotely.
+
+You can also do the following to convert the datap offset to an address
+
+ struct data *p = this_cpu_ptr(&datap);
+
+but, passing of pointers calculated via this_cpu_ptr to other cpus is
+unusual and should be avoided.
+
+Remote access are typically only for reading the status of another cpus
+per cpu data. Write accesses can cause unique problems due to the
+relaxed synchronization requirements for this_cpu operations.
+
+One example that illustrates some concerns with write operations is
+the following scenario that occurs because two per cpu variables
+share a cache-line but the relaxed synchronization is applied to
+only one process updating the cache-line.
+
+Consider the following example
+
+
+ struct test {
+ atomic_t a;
+ int b;
+ };
+
+ DEFINE_PER_CPU(struct test, onecacheline);
+
+There is some concern about what would happen if the field 'a' is updated
+remotely from one processor and the local processor would use this_cpu ops
+to update field b. Care should be taken that such simultaneous accesses to
+data within the same cache line are avoided. Also costly synchronization
+may be necessary. IPIs are generally recommended in such scenarios instead
+of a remote write to the per cpu area of another processor.
+
+Even in cases where the remote writes are rare, please bear in
+mind that a remote write will evict the cache line from the processor
+that most likely will access it. If the processor wakes up and finds a
+missing local cache line of a per cpu area, its performance and hence
+the wake up times will be affected.
+
+Christoph Lameter, August 4th, 2014
+Pranith Kumar, Aug 2nd, 2014
diff --git a/Documentation/timers/.gitignore b/Documentation/timers/.gitignore
new file mode 100644
index 000000000..c5c45d7ec
--- /dev/null
+++ b/Documentation/timers/.gitignore
@@ -0,0 +1 @@
+hpet_example
diff --git a/Documentation/timers/00-INDEX b/Documentation/timers/00-INDEX
new file mode 100644
index 000000000..ee212a277
--- /dev/null
+++ b/Documentation/timers/00-INDEX
@@ -0,0 +1,20 @@
+00-INDEX
+ - this file
+highres.txt
+ - High resolution timers and dynamic ticks design notes
+hpet.txt
+ - High Precision Event Timer Driver for Linux
+hpet_example.c
+ - sample hpet timer test program
+hrtimers.txt
+ - subsystem for high-resolution kernel timers
+Makefile
+ - Build and link hpet_example
+NO_HZ.txt
+ - Summary of the different methods for the scheduler clock-interrupts management.
+timekeeping.txt
+ - Clock sources, clock events, sched_clock() and delay timer notes
+timers-howto.txt
+ - how to insert delays in the kernel the right (tm) way.
+timer_stats.txt
+ - timer usage statistics
diff --git a/Documentation/timers/Makefile b/Documentation/timers/Makefile
new file mode 100644
index 000000000..6c09ee6ca
--- /dev/null
+++ b/Documentation/timers/Makefile
@@ -0,0 +1,5 @@
+# List of programs to build
+hostprogs-$(CONFIG_X86) := hpet_example
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
new file mode 100644
index 000000000..6eaf57629
--- /dev/null
+++ b/Documentation/timers/NO_HZ.txt
@@ -0,0 +1,348 @@
+ NO_HZ: Reducing Scheduling-Clock Ticks
+
+
+This document describes Kconfig options and boot parameters that can
+reduce the number of scheduling-clock interrupts, thereby improving energy
+efficiency and reducing OS jitter. Reducing OS jitter is important for
+some types of computationally intensive high-performance computing (HPC)
+applications and for real-time applications.
+
+There are three main ways of managing scheduling-clock interrupts
+(also known as "scheduling-clock ticks" or simply "ticks"):
+
+1. Never omit scheduling-clock ticks (CONFIG_HZ_PERIODIC=y or
+ CONFIG_NO_HZ=n for older kernels). You normally will -not-
+ want to choose this option.
+
+2. Omit scheduling-clock ticks on idle CPUs (CONFIG_NO_HZ_IDLE=y or
+ CONFIG_NO_HZ=y for older kernels). This is the most common
+ approach, and should be the default.
+
+3. Omit scheduling-clock ticks on CPUs that are either idle or that
+ have only one runnable task (CONFIG_NO_HZ_FULL=y). Unless you
+ are running realtime applications or certain types of HPC
+ workloads, you will normally -not- want this option.
+
+These three cases are described in the following three sections, followed
+by a third section on RCU-specific considerations, a fourth section
+discussing testing, and a fifth and final section listing known issues.
+
+
+NEVER OMIT SCHEDULING-CLOCK TICKS
+
+Very old versions of Linux from the 1990s and the very early 2000s
+are incapable of omitting scheduling-clock ticks. It turns out that
+there are some situations where this old-school approach is still the
+right approach, for example, in heavy workloads with lots of tasks
+that use short bursts of CPU, where there are very frequent idle
+periods, but where these idle periods are also quite short (tens or
+hundreds of microseconds). For these types of workloads, scheduling
+clock interrupts will normally be delivered any way because there
+will frequently be multiple runnable tasks per CPU. In these cases,
+attempting to turn off the scheduling clock interrupt will have no effect
+other than increasing the overhead of switching to and from idle and
+transitioning between user and kernel execution.
+
+This mode of operation can be selected using CONFIG_HZ_PERIODIC=y (or
+CONFIG_NO_HZ=n for older kernels).
+
+However, if you are instead running a light workload with long idle
+periods, failing to omit scheduling-clock interrupts will result in
+excessive power consumption. This is especially bad on battery-powered
+devices, where it results in extremely short battery lifetimes. If you
+are running light workloads, you should therefore read the following
+section.
+
+In addition, if you are running either a real-time workload or an HPC
+workload with short iterations, the scheduling-clock interrupts can
+degrade your applications performance. If this describes your workload,
+you should read the following two sections.
+
+
+OMIT SCHEDULING-CLOCK TICKS FOR IDLE CPUs
+
+If a CPU is idle, there is little point in sending it a scheduling-clock
+interrupt. After all, the primary purpose of a scheduling-clock interrupt
+is to force a busy CPU to shift its attention among multiple duties,
+and an idle CPU has no duties to shift its attention among.
+
+The CONFIG_NO_HZ_IDLE=y Kconfig option causes the kernel to avoid sending
+scheduling-clock interrupts to idle CPUs, which is critically important
+both to battery-powered devices and to highly virtualized mainframes.
+A battery-powered device running a CONFIG_HZ_PERIODIC=y kernel would
+drain its battery very quickly, easily 2-3 times as fast as would the
+same device running a CONFIG_NO_HZ_IDLE=y kernel. A mainframe running
+1,500 OS instances might find that half of its CPU time was consumed by
+unnecessary scheduling-clock interrupts. In these situations, there
+is strong motivation to avoid sending scheduling-clock interrupts to
+idle CPUs. That said, dyntick-idle mode is not free:
+
+1. It increases the number of instructions executed on the path
+ to and from the idle loop.
+
+2. On many architectures, dyntick-idle mode also increases the
+ number of expensive clock-reprogramming operations.
+
+Therefore, systems with aggressive real-time response constraints often
+run CONFIG_HZ_PERIODIC=y kernels (or CONFIG_NO_HZ=n for older kernels)
+in order to avoid degrading from-idle transition latencies.
+
+An idle CPU that is not receiving scheduling-clock interrupts is said to
+be "dyntick-idle", "in dyntick-idle mode", "in nohz mode", or "running
+tickless". The remainder of this document will use "dyntick-idle mode".
+
+There is also a boot parameter "nohz=" that can be used to disable
+dyntick-idle mode in CONFIG_NO_HZ_IDLE=y kernels by specifying "nohz=off".
+By default, CONFIG_NO_HZ_IDLE=y kernels boot with "nohz=on", enabling
+dyntick-idle mode.
+
+
+OMIT SCHEDULING-CLOCK TICKS FOR CPUs WITH ONLY ONE RUNNABLE TASK
+
+If a CPU has only one runnable task, there is little point in sending it
+a scheduling-clock interrupt because there is no other task to switch to.
+Note that omitting scheduling-clock ticks for CPUs with only one runnable
+task implies also omitting them for idle CPUs.
+
+The CONFIG_NO_HZ_FULL=y Kconfig option causes the kernel to avoid
+sending scheduling-clock interrupts to CPUs with a single runnable task,
+and such CPUs are said to be "adaptive-ticks CPUs". This is important
+for applications with aggressive real-time response constraints because
+it allows them to improve their worst-case response times by the maximum
+duration of a scheduling-clock interrupt. It is also important for
+computationally intensive short-iteration workloads: If any CPU is
+delayed during a given iteration, all the other CPUs will be forced to
+wait idle while the delayed CPU finishes. Thus, the delay is multiplied
+by one less than the number of CPUs. In these situations, there is
+again strong motivation to avoid sending scheduling-clock interrupts.
+
+By default, no CPU will be an adaptive-ticks CPU. The "nohz_full="
+boot parameter specifies the adaptive-ticks CPUs. For example,
+"nohz_full=1,6-8" says that CPUs 1, 6, 7, and 8 are to be adaptive-ticks
+CPUs. Note that you are prohibited from marking all of the CPUs as
+adaptive-tick CPUs: At least one non-adaptive-tick CPU must remain
+online to handle timekeeping tasks in order to ensure that system
+calls like gettimeofday() returns accurate values on adaptive-tick CPUs.
+(This is not an issue for CONFIG_NO_HZ_IDLE=y because there are no running
+user processes to observe slight drifts in clock rate.) Therefore, the
+boot CPU is prohibited from entering adaptive-ticks mode. Specifying a
+"nohz_full=" mask that includes the boot CPU will result in a boot-time
+error message, and the boot CPU will be removed from the mask. Note that
+this means that your system must have at least two CPUs in order for
+CONFIG_NO_HZ_FULL=y to do anything for you.
+
+Alternatively, the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter specifies
+that all CPUs other than the boot CPU are adaptive-ticks CPUs. This
+Kconfig parameter will be overridden by the "nohz_full=" boot parameter,
+so that if both the CONFIG_NO_HZ_FULL_ALL=y Kconfig parameter and
+the "nohz_full=1" boot parameter is specified, the boot parameter will
+prevail so that only CPU 1 will be an adaptive-ticks CPU.
+
+Finally, adaptive-ticks CPUs must have their RCU callbacks offloaded.
+This is covered in the "RCU IMPLICATIONS" section below.
+
+Normally, a CPU remains in adaptive-ticks mode as long as possible.
+In particular, transitioning to kernel mode does not automatically change
+the mode. Instead, the CPU will exit adaptive-ticks mode only if needed,
+for example, if that CPU enqueues an RCU callback.
+
+Just as with dyntick-idle mode, the benefits of adaptive-tick mode do
+not come for free:
+
+1. CONFIG_NO_HZ_FULL selects CONFIG_NO_HZ_COMMON, so you cannot run
+ adaptive ticks without also running dyntick idle. This dependency
+ extends down into the implementation, so that all of the costs
+ of CONFIG_NO_HZ_IDLE are also incurred by CONFIG_NO_HZ_FULL.
+
+2. The user/kernel transitions are slightly more expensive due
+ to the need to inform kernel subsystems (such as RCU) about
+ the change in mode.
+
+3. POSIX CPU timers prevent CPUs from entering adaptive-tick mode.
+ Real-time applications needing to take actions based on CPU time
+ consumption need to use other means of doing so.
+
+4. If there are more perf events pending than the hardware can
+ accommodate, they are normally round-robined so as to collect
+ all of them over time. Adaptive-tick mode may prevent this
+ round-robining from happening. This will likely be fixed by
+ preventing CPUs with large numbers of perf events pending from
+ entering adaptive-tick mode.
+
+5. Scheduler statistics for adaptive-tick CPUs may be computed
+ slightly differently than those for non-adaptive-tick CPUs.
+ This might in turn perturb load-balancing of real-time tasks.
+
+6. The LB_BIAS scheduler feature is disabled by adaptive ticks.
+
+Although improvements are expected over time, adaptive ticks is quite
+useful for many types of real-time and compute-intensive applications.
+However, the drawbacks listed above mean that adaptive ticks should not
+(yet) be enabled by default.
+
+
+RCU IMPLICATIONS
+
+There are situations in which idle CPUs cannot be permitted to
+enter either dyntick-idle mode or adaptive-tick mode, the most
+common being when that CPU has RCU callbacks pending.
+
+The CONFIG_RCU_FAST_NO_HZ=y Kconfig option may be used to cause such CPUs
+to enter dyntick-idle mode or adaptive-tick mode anyway. In this case,
+a timer will awaken these CPUs every four jiffies in order to ensure
+that the RCU callbacks are processed in a timely fashion.
+
+Another approach is to offload RCU callback processing to "rcuo" kthreads
+using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to
+offload may be selected via several methods:
+
+1. One of three mutually exclusive Kconfig options specify a
+ build-time default for the CPUs to offload:
+
+ a. The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in
+ no CPUs being offloaded.
+
+ b. The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes
+ CPU 0 to be offloaded.
+
+ c. The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all
+ CPUs to be offloaded. Note that the callbacks will be
+ offloaded to "rcuo" kthreads, and that those kthreads
+ will in fact run on some CPU. However, this approach
+ gives fine-grained control on exactly which CPUs the
+ callbacks run on, along with their scheduling priority
+ (including the default of SCHED_OTHER), and it further
+ allows this control to be varied dynamically at runtime.
+
+2. The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated
+ list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1,
+ 3, 4, and 5. The specified CPUs will be offloaded in addition to
+ any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or
+ CONFIG_RCU_NOCB_CPU_ALL=y. This means that the "rcu_nocbs=" boot
+ parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y.
+
+The offloaded CPUs will never queue RCU callbacks, and therefore RCU
+never prevents offloaded CPUs from entering either dyntick-idle mode
+or adaptive-tick mode. That said, note that it is up to userspace to
+pin the "rcuo" kthreads to specific CPUs if desired. Otherwise, the
+scheduler will decide where to run them, which might or might not be
+where you want them to run.
+
+
+TESTING
+
+So you enable all the OS-jitter features described in this document,
+but do not see any change in your workload's behavior. Is this because
+your workload isn't affected that much by OS jitter, or is it because
+something else is in the way? This section helps answer this question
+by providing a simple OS-jitter test suite, which is available on branch
+master of the following git archive:
+
+git://git.kernel.org/pub/scm/linux/kernel/git/frederic/dynticks-testing.git
+
+Clone this archive and follow the instructions in the README file.
+This test procedure will produce a trace that will allow you to evaluate
+whether or not you have succeeded in removing OS jitter from your system.
+If this trace shows that you have removed OS jitter as much as is
+possible, then you can conclude that your workload is not all that
+sensitive to OS jitter.
+
+Note: this test requires that your system have at least two CPUs.
+We do not currently have a good way to remove OS jitter from single-CPU
+systems.
+
+
+KNOWN ISSUES
+
+o Dyntick-idle slows transitions to and from idle slightly.
+ In practice, this has not been a problem except for the most
+ aggressive real-time workloads, which have the option of disabling
+ dyntick-idle mode, an option that most of them take. However,
+ some workloads will no doubt want to use adaptive ticks to
+ eliminate scheduling-clock interrupt latencies. Here are some
+ options for these workloads:
+
+ a. Use PMQOS from userspace to inform the kernel of your
+ latency requirements (preferred).
+
+ b. On x86 systems, use the "idle=mwait" boot parameter.
+
+ c. On x86 systems, use the "intel_idle.max_cstate=" to limit
+ ` the maximum C-state depth.
+
+ d. On x86 systems, use the "idle=poll" boot parameter.
+ However, please note that use of this parameter can cause
+ your CPU to overheat, which may cause thermal throttling
+ to degrade your latencies -- and that this degradation can
+ be even worse than that of dyntick-idle. Furthermore,
+ this parameter effectively disables Turbo Mode on Intel
+ CPUs, which can significantly reduce maximum performance.
+
+o Adaptive-ticks slows user/kernel transitions slightly.
+ This is not expected to be a problem for computationally intensive
+ workloads, which have few such transitions. Careful benchmarking
+ will be required to determine whether or not other workloads
+ are significantly affected by this effect.
+
+o Adaptive-ticks does not do anything unless there is only one
+ runnable task for a given CPU, even though there are a number
+ of other situations where the scheduling-clock tick is not
+ needed. To give but one example, consider a CPU that has one
+ runnable high-priority SCHED_FIFO task and an arbitrary number
+ of low-priority SCHED_OTHER tasks. In this case, the CPU is
+ required to run the SCHED_FIFO task until it either blocks or
+ some other higher-priority task awakens on (or is assigned to)
+ this CPU, so there is no point in sending a scheduling-clock
+ interrupt to this CPU. However, the current implementation
+ nevertheless sends scheduling-clock interrupts to CPUs having a
+ single runnable SCHED_FIFO task and multiple runnable SCHED_OTHER
+ tasks, even though these interrupts are unnecessary.
+
+ And even when there are multiple runnable tasks on a given CPU,
+ there is little point in interrupting that CPU until the current
+ running task's timeslice expires, which is almost always way
+ longer than the time of the next scheduling-clock interrupt.
+
+ Better handling of these sorts of situations is future work.
+
+o A reboot is required to reconfigure both adaptive idle and RCU
+ callback offloading. Runtime reconfiguration could be provided
+ if needed, however, due to the complexity of reconfiguring RCU at
+ runtime, there would need to be an earthshakingly good reason.
+ Especially given that you have the straightforward option of
+ simply offloading RCU callbacks from all CPUs and pinning them
+ where you want them whenever you want them pinned.
+
+o Additional configuration is required to deal with other sources
+ of OS jitter, including interrupts and system-utility tasks
+ and processes. This configuration normally involves binding
+ interrupts and tasks to particular CPUs.
+
+o Some sources of OS jitter can currently be eliminated only by
+ constraining the workload. For example, the only way to eliminate
+ OS jitter due to global TLB shootdowns is to avoid the unmapping
+ operations (such as kernel module unload operations) that
+ result in these shootdowns. For another example, page faults
+ and TLB misses can be reduced (and in some cases eliminated) by
+ using huge pages and by constraining the amount of memory used
+ by the application. Pre-faulting the working set can also be
+ helpful, especially when combined with the mlock() and mlockall()
+ system calls.
+
+o Unless all CPUs are idle, at least one CPU must keep the
+ scheduling-clock interrupt going in order to support accurate
+ timekeeping.
+
+o If there might potentially be some adaptive-ticks CPUs, there
+ will be at least one CPU keeping the scheduling-clock interrupt
+ going, even if all CPUs are otherwise idle.
+
+ Better handling of this situation is ongoing work.
+
+o Some process-handling operations still require the occasional
+ scheduling-clock tick. These operations include calculating CPU
+ load, maintaining sched average, computing CFS entity vruntime,
+ computing avenrun, and carrying out load balancing. They are
+ currently accommodated by scheduling-clock tick every second
+ or so. On-going work will eliminate the need even for these
+ infrequent scheduling-clock ticks.
diff --git a/Documentation/timers/highres.txt b/Documentation/timers/highres.txt
new file mode 100644
index 000000000..e8789976e
--- /dev/null
+++ b/Documentation/timers/highres.txt
@@ -0,0 +1,249 @@
+High resolution timers and dynamic ticks design notes
+-----------------------------------------------------
+
+Further information can be found in the paper of the OLS 2006 talk "hrtimers
+and beyond". The paper is part of the OLS 2006 Proceedings Volume 1, which can
+be found on the OLS website:
+http://www.linuxsymposium.org/2006/linuxsymposium_procv1.pdf
+
+The slides to this talk are available from:
+http://tglx.de/projects/hrtimers/ols2006-hrtimers.pdf
+
+The slides contain five figures (pages 2, 15, 18, 20, 22), which illustrate the
+changes in the time(r) related Linux subsystems. Figure #1 (p. 2) shows the
+design of the Linux time(r) system before hrtimers and other building blocks
+got merged into mainline.
+
+Note: the paper and the slides are talking about "clock event source", while we
+switched to the name "clock event devices" in meantime.
+
+The design contains the following basic building blocks:
+
+- hrtimer base infrastructure
+- timeofday and clock source management
+- clock event management
+- high resolution timer functionality
+- dynamic ticks
+
+
+hrtimer base infrastructure
+---------------------------
+
+The hrtimer base infrastructure was merged into the 2.6.16 kernel. Details of
+the base implementation are covered in Documentation/timers/hrtimers.txt. See
+also figure #2 (OLS slides p. 15)
+
+The main differences to the timer wheel, which holds the armed timer_list type
+timers are:
+ - time ordered enqueueing into a rb-tree
+ - independent of ticks (the processing is based on nanoseconds)
+
+
+timeofday and clock source management
+-------------------------------------
+
+John Stultz's Generic Time Of Day (GTOD) framework moves a large portion of
+code out of the architecture-specific areas into a generic management
+framework, as illustrated in figure #3 (OLS slides p. 18). The architecture
+specific portion is reduced to the low level hardware details of the clock
+sources, which are registered in the framework and selected on a quality based
+decision. The low level code provides hardware setup and readout routines and
+initializes data structures, which are used by the generic time keeping code to
+convert the clock ticks to nanosecond based time values. All other time keeping
+related functionality is moved into the generic code. The GTOD base patch got
+merged into the 2.6.18 kernel.
+
+Further information about the Generic Time Of Day framework is available in the
+OLS 2005 Proceedings Volume 1:
+http://www.linuxsymposium.org/2005/linuxsymposium_procv1.pdf
+
+The paper "We Are Not Getting Any Younger: A New Approach to Time and
+Timers" was written by J. Stultz, D.V. Hart, & N. Aravamudan.
+
+Figure #3 (OLS slides p.18) illustrates the transformation.
+
+
+clock event management
+----------------------
+
+While clock sources provide read access to the monotonically increasing time
+value, clock event devices are used to schedule the next event
+interrupt(s). The next event is currently defined to be periodic, with its
+period defined at compile time. The setup and selection of the event device
+for various event driven functionalities is hardwired into the architecture
+dependent code. This results in duplicated code across all architectures and
+makes it extremely difficult to change the configuration of the system to use
+event interrupt devices other than those already built into the
+architecture. Another implication of the current design is that it is necessary
+to touch all the architecture-specific implementations in order to provide new
+functionality like high resolution timers or dynamic ticks.
+
+The clock events subsystem tries to address this problem by providing a generic
+solution to manage clock event devices and their usage for the various clock
+event driven kernel functionalities. The goal of the clock event subsystem is
+to minimize the clock event related architecture dependent code to the pure
+hardware related handling and to allow easy addition and utilization of new
+clock event devices. It also minimizes the duplicated code across the
+architectures as it provides generic functionality down to the interrupt
+service handler, which is almost inherently hardware dependent.
+
+Clock event devices are registered either by the architecture dependent boot
+code or at module insertion time. Each clock event device fills a data
+structure with clock-specific property parameters and callback functions. The
+clock event management decides, by using the specified property parameters, the
+set of system functions a clock event device will be used to support. This
+includes the distinction of per-CPU and per-system global event devices.
+
+System-level global event devices are used for the Linux periodic tick. Per-CPU
+event devices are used to provide local CPU functionality such as process
+accounting, profiling, and high resolution timers.
+
+The management layer assigns one or more of the following functions to a clock
+event device:
+ - system global periodic tick (jiffies update)
+ - cpu local update_process_times
+ - cpu local profiling
+ - cpu local next event interrupt (non periodic mode)
+
+The clock event device delegates the selection of those timer interrupt related
+functions completely to the management layer. The clock management layer stores
+a function pointer in the device description structure, which has to be called
+from the hardware level handler. This removes a lot of duplicated code from the
+architecture specific timer interrupt handlers and hands the control over the
+clock event devices and the assignment of timer interrupt related functionality
+to the core code.
+
+The clock event layer API is rather small. Aside from the clock event device
+registration interface it provides functions to schedule the next event
+interrupt, clock event device notification service and support for suspend and
+resume.
+
+The framework adds about 700 lines of code which results in a 2KB increase of
+the kernel binary size. The conversion of i386 removes about 100 lines of
+code. The binary size decrease is in the range of 400 byte. We believe that the
+increase of flexibility and the avoidance of duplicated code across
+architectures justifies the slight increase of the binary size.
+
+The conversion of an architecture has no functional impact, but allows to
+utilize the high resolution and dynamic tick functionalities without any change
+to the clock event device and timer interrupt code. After the conversion the
+enabling of high resolution timers and dynamic ticks is simply provided by
+adding the kernel/time/Kconfig file to the architecture specific Kconfig and
+adding the dynamic tick specific calls to the idle routine (a total of 3 lines
+added to the idle function and the Kconfig file)
+
+Figure #4 (OLS slides p.20) illustrates the transformation.
+
+
+high resolution timer functionality
+-----------------------------------
+
+During system boot it is not possible to use the high resolution timer
+functionality, while making it possible would be difficult and would serve no
+useful function. The initialization of the clock event device framework, the
+clock source framework (GTOD) and hrtimers itself has to be done and
+appropriate clock sources and clock event devices have to be registered before
+the high resolution functionality can work. Up to the point where hrtimers are
+initialized, the system works in the usual low resolution periodic mode. The
+clock source and the clock event device layers provide notification functions
+which inform hrtimers about availability of new hardware. hrtimers validates
+the usability of the registered clock sources and clock event devices before
+switching to high resolution mode. This ensures also that a kernel which is
+configured for high resolution timers can run on a system which lacks the
+necessary hardware support.
+
+The high resolution timer code does not support SMP machines which have only
+global clock event devices. The support of such hardware would involve IPI
+calls when an interrupt happens. The overhead would be much larger than the
+benefit. This is the reason why we currently disable high resolution and
+dynamic ticks on i386 SMP systems which stop the local APIC in C3 power
+state. A workaround is available as an idea, but the problem has not been
+tackled yet.
+
+The time ordered insertion of timers provides all the infrastructure to decide
+whether the event device has to be reprogrammed when a timer is added. The
+decision is made per timer base and synchronized across per-cpu timer bases in
+a support function. The design allows the system to utilize separate per-CPU
+clock event devices for the per-CPU timer bases, but currently only one
+reprogrammable clock event device per-CPU is utilized.
+
+When the timer interrupt happens, the next event interrupt handler is called
+from the clock event distribution code and moves expired timers from the
+red-black tree to a separate double linked list and invokes the softirq
+handler. An additional mode field in the hrtimer structure allows the system to
+execute callback functions directly from the next event interrupt handler. This
+is restricted to code which can safely be executed in the hard interrupt
+context. This applies, for example, to the common case of a wakeup function as
+used by nanosleep. The advantage of executing the handler in the interrupt
+context is the avoidance of up to two context switches - from the interrupted
+context to the softirq and to the task which is woken up by the expired
+timer.
+
+Once a system has switched to high resolution mode, the periodic tick is
+switched off. This disables the per system global periodic clock event device -
+e.g. the PIT on i386 SMP systems.
+
+The periodic tick functionality is provided by an per-cpu hrtimer. The callback
+function is executed in the next event interrupt context and updates jiffies
+and calls update_process_times and profiling. The implementation of the hrtimer
+based periodic tick is designed to be extended with dynamic tick functionality.
+This allows to use a single clock event device to schedule high resolution
+timer and periodic events (jiffies tick, profiling, process accounting) on UP
+systems. This has been proved to work with the PIT on i386 and the Incrementer
+on PPC.
+
+The softirq for running the hrtimer queues and executing the callbacks has been
+separated from the tick bound timer softirq to allow accurate delivery of high
+resolution timer signals which are used by itimer and POSIX interval
+timers. The execution of this softirq can still be delayed by other softirqs,
+but the overall latencies have been significantly improved by this separation.
+
+Figure #5 (OLS slides p.22) illustrates the transformation.
+
+
+dynamic ticks
+-------------
+
+Dynamic ticks are the logical consequence of the hrtimer based periodic tick
+replacement (sched_tick). The functionality of the sched_tick hrtimer is
+extended by three functions:
+
+- hrtimer_stop_sched_tick
+- hrtimer_restart_sched_tick
+- hrtimer_update_jiffies
+
+hrtimer_stop_sched_tick() is called when a CPU goes into idle state. The code
+evaluates the next scheduled timer event (from both hrtimers and the timer
+wheel) and in case that the next event is further away than the next tick it
+reprograms the sched_tick to this future event, to allow longer idle sleeps
+without worthless interruption by the periodic tick. The function is also
+called when an interrupt happens during the idle period, which does not cause a
+reschedule. The call is necessary as the interrupt handler might have armed a
+new timer whose expiry time is before the time which was identified as the
+nearest event in the previous call to hrtimer_stop_sched_tick.
+
+hrtimer_restart_sched_tick() is called when the CPU leaves the idle state before
+it calls schedule(). hrtimer_restart_sched_tick() resumes the periodic tick,
+which is kept active until the next call to hrtimer_stop_sched_tick().
+
+hrtimer_update_jiffies() is called from irq_enter() when an interrupt happens
+in the idle period to make sure that jiffies are up to date and the interrupt
+handler has not to deal with an eventually stale jiffy value.
+
+The dynamic tick feature provides statistical values which are exported to
+userspace via /proc/stats and can be made available for enhanced power
+management control.
+
+The implementation leaves room for further development like full tickless
+systems, where the time slice is controlled by the scheduler, variable
+frequency profiling, and a complete removal of jiffies in the future.
+
+
+Aside the current initial submission of i386 support, the patchset has been
+extended to x86_64 and ARM already. Initial (work in progress) support is also
+available for MIPS and PowerPC.
+
+ Thomas, Ingo
+
+
+
diff --git a/Documentation/timers/hpet.txt b/Documentation/timers/hpet.txt
new file mode 100644
index 000000000..767392ffd
--- /dev/null
+++ b/Documentation/timers/hpet.txt
@@ -0,0 +1,30 @@
+ High Precision Event Timer Driver for Linux
+
+The High Precision Event Timer (HPET) hardware follows a specification
+by Intel and Microsoft which can be found at
+
+ http://www.intel.com/hardwaredesign/hpetspec_1.pdf
+
+Each HPET has one fixed-rate counter (at 10+ MHz, hence "High Precision")
+and up to 32 comparators. Normally three or more comparators are provided,
+each of which can generate oneshot interrupts and at least one of which has
+additional hardware to support periodic interrupts. The comparators are
+also called "timers", which can be misleading since usually timers are
+independent of each other ... these share a counter, complicating resets.
+
+HPET devices can support two interrupt routing modes. In one mode, the
+comparators are additional interrupt sources with no particular system
+role. Many x86 BIOS writers don't route HPET interrupts at all, which
+prevents use of that mode. They support the other "legacy replacement"
+mode where the first two comparators block interrupts from 8254 timers
+and from the RTC.
+
+The driver supports detection of HPET driver allocation and initialization
+of the HPET before the driver module_init routine is called. This enables
+platform code which uses timer 0 or 1 as the main timer to intercept HPET
+initialization. An example of this initialization can be found in
+arch/x86/kernel/hpet.c.
+
+The driver provides a userspace API which resembles the API found in the
+RTC driver framework. An example user space program is provided in
+file:Documentation/timers/hpet_example.c
diff --git a/Documentation/timers/hpet_example.c b/Documentation/timers/hpet_example.c
new file mode 100644
index 000000000..9a3e7012c
--- /dev/null
+++ b/Documentation/timers/hpet_example.c
@@ -0,0 +1,294 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <memory.h>
+#include <malloc.h>
+#include <time.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <linux/hpet.h>
+
+
+extern void hpet_open_close(int, const char **);
+extern void hpet_info(int, const char **);
+extern void hpet_poll(int, const char **);
+extern void hpet_fasync(int, const char **);
+extern void hpet_read(int, const char **);
+
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+
+struct hpet_command {
+ char *command;
+ void (*func)(int argc, const char ** argv);
+} hpet_command[] = {
+ {
+ "open-close",
+ hpet_open_close
+ },
+ {
+ "info",
+ hpet_info
+ },
+ {
+ "poll",
+ hpet_poll
+ },
+ {
+ "fasync",
+ hpet_fasync
+ },
+};
+
+int
+main(int argc, const char ** argv)
+{
+ int i;
+
+ argc--;
+ argv++;
+
+ if (!argc) {
+ fprintf(stderr, "-hpet: requires command\n");
+ return -1;
+ }
+
+
+ for (i = 0; i < (sizeof (hpet_command) / sizeof (hpet_command[0])); i++)
+ if (!strcmp(argv[0], hpet_command[i].command)) {
+ argc--;
+ argv++;
+ fprintf(stderr, "-hpet: executing %s\n",
+ hpet_command[i].command);
+ hpet_command[i].func(argc, argv);
+ return 0;
+ }
+
+ fprintf(stderr, "do_hpet: command %s not implemented\n", argv[0]);
+
+ return -1;
+}
+
+void
+hpet_open_close(int argc, const char **argv)
+{
+ int fd;
+
+ if (argc != 1) {
+ fprintf(stderr, "hpet_open_close: device-name\n");
+ return;
+ }
+
+ fd = open(argv[0], O_RDONLY);
+ if (fd < 0)
+ fprintf(stderr, "hpet_open_close: open failed\n");
+ else
+ close(fd);
+
+ return;
+}
+
+void
+hpet_info(int argc, const char **argv)
+{
+ struct hpet_info info;
+ int fd;
+
+ if (argc != 1) {
+ fprintf(stderr, "hpet_info: device-name\n");
+ return;
+ }
+
+ fd = open(argv[0], O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "hpet_info: open of %s failed\n", argv[0]);
+ return;
+ }
+
+ if (ioctl(fd, HPET_INFO, &info) < 0) {
+ fprintf(stderr, "hpet_info: failed to get info\n");
+ goto out;
+ }
+
+ fprintf(stderr, "hpet_info: hi_irqfreq 0x%lx hi_flags 0x%lx ",
+ info.hi_ireqfreq, info.hi_flags);
+ fprintf(stderr, "hi_hpet %d hi_timer %d\n",
+ info.hi_hpet, info.hi_timer);
+
+out:
+ close(fd);
+ return;
+}
+
+void
+hpet_poll(int argc, const char **argv)
+{
+ unsigned long freq;
+ int iterations, i, fd;
+ struct pollfd pfd;
+ struct hpet_info info;
+ struct timeval stv, etv;
+ struct timezone tz;
+ long usec;
+
+ if (argc != 3) {
+ fprintf(stderr, "hpet_poll: device-name freq iterations\n");
+ return;
+ }
+
+ freq = atoi(argv[1]);
+ iterations = atoi(argv[2]);
+
+ fd = open(argv[0], O_RDONLY);
+
+ if (fd < 0) {
+ fprintf(stderr, "hpet_poll: open of %s failed\n", argv[0]);
+ return;
+ }
+
+ if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
+ fprintf(stderr, "hpet_poll: HPET_IRQFREQ failed\n");
+ goto out;
+ }
+
+ if (ioctl(fd, HPET_INFO, &info) < 0) {
+ fprintf(stderr, "hpet_poll: failed to get info\n");
+ goto out;
+ }
+
+ fprintf(stderr, "hpet_poll: info.hi_flags 0x%lx\n", info.hi_flags);
+
+ if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
+ fprintf(stderr, "hpet_poll: HPET_EPI failed\n");
+ goto out;
+ }
+
+ if (ioctl(fd, HPET_IE_ON, 0) < 0) {
+ fprintf(stderr, "hpet_poll, HPET_IE_ON failed\n");
+ goto out;
+ }
+
+ pfd.fd = fd;
+ pfd.events = POLLIN;
+
+ for (i = 0; i < iterations; i++) {
+ pfd.revents = 0;
+ gettimeofday(&stv, &tz);
+ if (poll(&pfd, 1, -1) < 0)
+ fprintf(stderr, "hpet_poll: poll failed\n");
+ else {
+ long data;
+
+ gettimeofday(&etv, &tz);
+ usec = stv.tv_sec * 1000000 + stv.tv_usec;
+ usec = (etv.tv_sec * 1000000 + etv.tv_usec) - usec;
+
+ fprintf(stderr,
+ "hpet_poll: expired time = 0x%lx\n", usec);
+
+ fprintf(stderr, "hpet_poll: revents = 0x%x\n",
+ pfd.revents);
+
+ if (read(fd, &data, sizeof(data)) != sizeof(data)) {
+ fprintf(stderr, "hpet_poll: read failed\n");
+ }
+ else
+ fprintf(stderr, "hpet_poll: data 0x%lx\n",
+ data);
+ }
+ }
+
+out:
+ close(fd);
+ return;
+}
+
+static int hpet_sigio_count;
+
+static void
+hpet_sigio(int val)
+{
+ fprintf(stderr, "hpet_sigio: called\n");
+ hpet_sigio_count++;
+}
+
+void
+hpet_fasync(int argc, const char **argv)
+{
+ unsigned long freq;
+ int iterations, i, fd, value;
+ sig_t oldsig;
+ struct hpet_info info;
+
+ hpet_sigio_count = 0;
+ fd = -1;
+
+ if ((oldsig = signal(SIGIO, hpet_sigio)) == SIG_ERR) {
+ fprintf(stderr, "hpet_fasync: failed to set signal handler\n");
+ return;
+ }
+
+ if (argc != 3) {
+ fprintf(stderr, "hpet_fasync: device-name freq iterations\n");
+ goto out;
+ }
+
+ fd = open(argv[0], O_RDONLY);
+
+ if (fd < 0) {
+ fprintf(stderr, "hpet_fasync: failed to open %s\n", argv[0]);
+ return;
+ }
+
+
+ if ((fcntl(fd, F_SETOWN, getpid()) == 1) ||
+ ((value = fcntl(fd, F_GETFL)) == 1) ||
+ (fcntl(fd, F_SETFL, value | O_ASYNC) == 1)) {
+ fprintf(stderr, "hpet_fasync: fcntl failed\n");
+ goto out;
+ }
+
+ freq = atoi(argv[1]);
+ iterations = atoi(argv[2]);
+
+ if (ioctl(fd, HPET_IRQFREQ, freq) < 0) {
+ fprintf(stderr, "hpet_fasync: HPET_IRQFREQ failed\n");
+ goto out;
+ }
+
+ if (ioctl(fd, HPET_INFO, &info) < 0) {
+ fprintf(stderr, "hpet_fasync: failed to get info\n");
+ goto out;
+ }
+
+ fprintf(stderr, "hpet_fasync: info.hi_flags 0x%lx\n", info.hi_flags);
+
+ if (info.hi_flags && (ioctl(fd, HPET_EPI, 0) < 0)) {
+ fprintf(stderr, "hpet_fasync: HPET_EPI failed\n");
+ goto out;
+ }
+
+ if (ioctl(fd, HPET_IE_ON, 0) < 0) {
+ fprintf(stderr, "hpet_fasync, HPET_IE_ON failed\n");
+ goto out;
+ }
+
+ for (i = 0; i < iterations; i++) {
+ (void) pause();
+ fprintf(stderr, "hpet_fasync: count = %d\n", hpet_sigio_count);
+ }
+
+out:
+ signal(SIGIO, oldsig);
+
+ if (fd >= 0)
+ close(fd);
+
+ return;
+}
diff --git a/Documentation/timers/hrtimers.txt b/Documentation/timers/hrtimers.txt
new file mode 100644
index 000000000..ce31f65e1
--- /dev/null
+++ b/Documentation/timers/hrtimers.txt
@@ -0,0 +1,178 @@
+
+hrtimers - subsystem for high-resolution kernel timers
+----------------------------------------------------
+
+This patch introduces a new subsystem for high-resolution kernel timers.
+
+One might ask the question: we already have a timer subsystem
+(kernel/timers.c), why do we need two timer subsystems? After a lot of
+back and forth trying to integrate high-resolution and high-precision
+features into the existing timer framework, and after testing various
+such high-resolution timer implementations in practice, we came to the
+conclusion that the timer wheel code is fundamentally not suitable for
+such an approach. We initially didn't believe this ('there must be a way
+to solve this'), and spent a considerable effort trying to integrate
+things into the timer wheel, but we failed. In hindsight, there are
+several reasons why such integration is hard/impossible:
+
+- the forced handling of low-resolution and high-resolution timers in
+ the same way leads to a lot of compromises, macro magic and #ifdef
+ mess. The timers.c code is very "tightly coded" around jiffies and
+ 32-bitness assumptions, and has been honed and micro-optimized for a
+ relatively narrow use case (jiffies in a relatively narrow HZ range)
+ for many years - and thus even small extensions to it easily break
+ the wheel concept, leading to even worse compromises. The timer wheel
+ code is very good and tight code, there's zero problems with it in its
+ current usage - but it is simply not suitable to be extended for
+ high-res timers.
+
+- the unpredictable [O(N)] overhead of cascading leads to delays which
+ necessitate a more complex handling of high resolution timers, which
+ in turn decreases robustness. Such a design still led to rather large
+ timing inaccuracies. Cascading is a fundamental property of the timer
+ wheel concept, it cannot be 'designed out' without unevitably
+ degrading other portions of the timers.c code in an unacceptable way.
+
+- the implementation of the current posix-timer subsystem on top of
+ the timer wheel has already introduced a quite complex handling of
+ the required readjusting of absolute CLOCK_REALTIME timers at
+ settimeofday or NTP time - further underlying our experience by
+ example: that the timer wheel data structure is too rigid for high-res
+ timers.
+
+- the timer wheel code is most optimal for use cases which can be
+ identified as "timeouts". Such timeouts are usually set up to cover
+ error conditions in various I/O paths, such as networking and block
+ I/O. The vast majority of those timers never expire and are rarely
+ recascaded because the expected correct event arrives in time so they
+ can be removed from the timer wheel before any further processing of
+ them becomes necessary. Thus the users of these timeouts can accept
+ the granularity and precision tradeoffs of the timer wheel, and
+ largely expect the timer subsystem to have near-zero overhead.
+ Accurate timing for them is not a core purpose - in fact most of the
+ timeout values used are ad-hoc. For them it is at most a necessary
+ evil to guarantee the processing of actual timeout completions
+ (because most of the timeouts are deleted before completion), which
+ should thus be as cheap and unintrusive as possible.
+
+The primary users of precision timers are user-space applications that
+utilize nanosleep, posix-timers and itimer interfaces. Also, in-kernel
+users like drivers and subsystems which require precise timed events
+(e.g. multimedia) can benefit from the availability of a separate
+high-resolution timer subsystem as well.
+
+While this subsystem does not offer high-resolution clock sources just
+yet, the hrtimer subsystem can be easily extended with high-resolution
+clock capabilities, and patches for that exist and are maturing quickly.
+The increasing demand for realtime and multimedia applications along
+with other potential users for precise timers gives another reason to
+separate the "timeout" and "precise timer" subsystems.
+
+Another potential benefit is that such a separation allows even more
+special-purpose optimization of the existing timer wheel for the low
+resolution and low precision use cases - once the precision-sensitive
+APIs are separated from the timer wheel and are migrated over to
+hrtimers. E.g. we could decrease the frequency of the timeout subsystem
+from 250 Hz to 100 HZ (or even smaller).
+
+hrtimer subsystem implementation details
+----------------------------------------
+
+the basic design considerations were:
+
+- simplicity
+
+- data structure not bound to jiffies or any other granularity. All the
+ kernel logic works at 64-bit nanoseconds resolution - no compromises.
+
+- simplification of existing, timing related kernel code
+
+another basic requirement was the immediate enqueueing and ordering of
+timers at activation time. After looking at several possible solutions
+such as radix trees and hashes, we chose the red black tree as the basic
+data structure. Rbtrees are available as a library in the kernel and are
+used in various performance-critical areas of e.g. memory management and
+file systems. The rbtree is solely used for time sorted ordering, while
+a separate list is used to give the expiry code fast access to the
+queued timers, without having to walk the rbtree.
+
+(This separate list is also useful for later when we'll introduce
+high-resolution clocks, where we need separate pending and expired
+queues while keeping the time-order intact.)
+
+Time-ordered enqueueing is not purely for the purposes of
+high-resolution clocks though, it also simplifies the handling of
+absolute timers based on a low-resolution CLOCK_REALTIME. The existing
+implementation needed to keep an extra list of all armed absolute
+CLOCK_REALTIME timers along with complex locking. In case of
+settimeofday and NTP, all the timers (!) had to be dequeued, the
+time-changing code had to fix them up one by one, and all of them had to
+be enqueued again. The time-ordered enqueueing and the storage of the
+expiry time in absolute time units removes all this complex and poorly
+scaling code from the posix-timer implementation - the clock can simply
+be set without having to touch the rbtree. This also makes the handling
+of posix-timers simpler in general.
+
+The locking and per-CPU behavior of hrtimers was mostly taken from the
+existing timer wheel code, as it is mature and well suited. Sharing code
+was not really a win, due to the different data structures. Also, the
+hrtimer functions now have clearer behavior and clearer names - such as
+hrtimer_try_to_cancel() and hrtimer_cancel() [which are roughly
+equivalent to del_timer() and del_timer_sync()] - so there's no direct
+1:1 mapping between them on the algorithmical level, and thus no real
+potential for code sharing either.
+
+Basic data types: every time value, absolute or relative, is in a
+special nanosecond-resolution type: ktime_t. The kernel-internal
+representation of ktime_t values and operations is implemented via
+macros and inline functions, and can be switched between a "hybrid
+union" type and a plain "scalar" 64bit nanoseconds representation (at
+compile time). The hybrid union type optimizes time conversions on 32bit
+CPUs. This build-time-selectable ktime_t storage format was implemented
+to avoid the performance impact of 64-bit multiplications and divisions
+on 32bit CPUs. Such operations are frequently necessary to convert
+between the storage formats provided by kernel and userspace interfaces
+and the internal time format. (See include/linux/ktime.h for further
+details.)
+
+hrtimers - rounding of timer values
+-----------------------------------
+
+the hrtimer code will round timer events to lower-resolution clocks
+because it has to. Otherwise it will do no artificial rounding at all.
+
+one question is, what resolution value should be returned to the user by
+the clock_getres() interface. This will return whatever real resolution
+a given clock has - be it low-res, high-res, or artificially-low-res.
+
+hrtimers - testing and verification
+----------------------------------
+
+We used the high-resolution clock subsystem ontop of hrtimers to verify
+the hrtimer implementation details in praxis, and we also ran the posix
+timer tests in order to ensure specification compliance. We also ran
+tests on low-resolution clocks.
+
+The hrtimer patch converts the following kernel functionality to use
+hrtimers:
+
+ - nanosleep
+ - itimers
+ - posix-timers
+
+The conversion of nanosleep and posix-timers enabled the unification of
+nanosleep and clock_nanosleep.
+
+The code was successfully compiled for the following platforms:
+
+ i386, x86_64, ARM, PPC, PPC64, IA64
+
+The code was run-tested on the following platforms:
+
+ i386(UP/SMP), x86_64(UP/SMP), ARM, PPC
+
+hrtimers were also integrated into the -rt tree, along with a
+hrtimers-based high-resolution clock implementation, so the hrtimers
+code got a healthy amount of testing and use in practice.
+
+ Thomas Gleixner, Ingo Molnar
diff --git a/Documentation/timers/timekeeping.txt b/Documentation/timers/timekeeping.txt
new file mode 100644
index 000000000..f3a8cf28f
--- /dev/null
+++ b/Documentation/timers/timekeeping.txt
@@ -0,0 +1,179 @@
+Clock sources, Clock events, sched_clock() and delay timers
+-----------------------------------------------------------
+
+This document tries to briefly explain some basic kernel timekeeping
+abstractions. It partly pertains to the drivers usually found in
+drivers/clocksource in the kernel tree, but the code may be spread out
+across the kernel.
+
+If you grep through the kernel source you will find a number of architecture-
+specific implementations of clock sources, clockevents and several likewise
+architecture-specific overrides of the sched_clock() function and some
+delay timers.
+
+To provide timekeeping for your platform, the clock source provides
+the basic timeline, whereas clock events shoot interrupts on certain points
+on this timeline, providing facilities such as high-resolution timers.
+sched_clock() is used for scheduling and timestamping, and delay timers
+provide an accurate delay source using hardware counters.
+
+
+Clock sources
+-------------
+
+The purpose of the clock source is to provide a timeline for the system that
+tells you where you are in time. For example issuing the command 'date' on
+a Linux system will eventually read the clock source to determine exactly
+what time it is.
+
+Typically the clock source is a monotonic, atomic counter which will provide
+n bits which count from 0 to 2^(n-1) and then wraps around to 0 and start over.
+It will ideally NEVER stop ticking as long as the system is running. It
+may stop during system suspend.
+
+The clock source shall have as high resolution as possible, and the frequency
+shall be as stable and correct as possible as compared to a real-world wall
+clock. It should not move unpredictably back and forth in time or miss a few
+cycles here and there.
+
+It must be immune to the kind of effects that occur in hardware where e.g.
+the counter register is read in two phases on the bus lowest 16 bits first
+and the higher 16 bits in a second bus cycle with the counter bits
+potentially being updated in between leading to the risk of very strange
+values from the counter.
+
+When the wall-clock accuracy of the clock source isn't satisfactory, there
+are various quirks and layers in the timekeeping code for e.g. synchronizing
+the user-visible time to RTC clocks in the system or against networked time
+servers using NTP, but all they do basically is update an offset against
+the clock source, which provides the fundamental timeline for the system.
+These measures does not affect the clock source per se, they only adapt the
+system to the shortcomings of it.
+
+The clock source struct shall provide means to translate the provided counter
+into a nanosecond value as an unsigned long long (unsigned 64 bit) number.
+Since this operation may be invoked very often, doing this in a strict
+mathematical sense is not desirable: instead the number is taken as close as
+possible to a nanosecond value using only the arithmetic operations
+multiply and shift, so in clocksource_cyc2ns() you find:
+
+ ns ~= (clocksource * mult) >> shift
+
+You will find a number of helper functions in the clock source code intended
+to aid in providing these mult and shift values, such as
+clocksource_khz2mult(), clocksource_hz2mult() that help determine the
+mult factor from a fixed shift, and clocksource_register_hz() and
+clocksource_register_khz() which will help out assigning both shift and mult
+factors using the frequency of the clock source as the only input.
+
+For real simple clock sources accessed from a single I/O memory location
+there is nowadays even clocksource_mmio_init() which will take a memory
+location, bit width, a parameter telling whether the counter in the
+register counts up or down, and the timer clock rate, and then conjure all
+necessary parameters.
+
+Since a 32-bit counter at say 100 MHz will wrap around to zero after some 43
+seconds, the code handling the clock source will have to compensate for this.
+That is the reason why the clock source struct also contains a 'mask'
+member telling how many bits of the source are valid. This way the timekeeping
+code knows when the counter will wrap around and can insert the necessary
+compensation code on both sides of the wrap point so that the system timeline
+remains monotonic.
+
+
+Clock events
+------------
+
+Clock events are the conceptual reverse of clock sources: they take a
+desired time specification value and calculate the values to poke into
+hardware timer registers.
+
+Clock events are orthogonal to clock sources. The same hardware
+and register range may be used for the clock event, but it is essentially
+a different thing. The hardware driving clock events has to be able to
+fire interrupts, so as to trigger events on the system timeline. On an SMP
+system, it is ideal (and customary) to have one such event driving timer per
+CPU core, so that each core can trigger events independently of any other
+core.
+
+You will notice that the clock event device code is based on the same basic
+idea about translating counters to nanoseconds using mult and shift
+arithmetic, and you find the same family of helper functions again for
+assigning these values. The clock event driver does not need a 'mask'
+attribute however: the system will not try to plan events beyond the time
+horizon of the clock event.
+
+
+sched_clock()
+-------------
+
+In addition to the clock sources and clock events there is a special weak
+function in the kernel called sched_clock(). This function shall return the
+number of nanoseconds since the system was started. An architecture may or
+may not provide an implementation of sched_clock() on its own. If a local
+implementation is not provided, the system jiffy counter will be used as
+sched_clock().
+
+As the name suggests, sched_clock() is used for scheduling the system,
+determining the absolute timeslice for a certain process in the CFS scheduler
+for example. It is also used for printk timestamps when you have selected to
+include time information in printk for things like bootcharts.
+
+Compared to clock sources, sched_clock() has to be very fast: it is called
+much more often, especially by the scheduler. If you have to do trade-offs
+between accuracy compared to the clock source, you may sacrifice accuracy
+for speed in sched_clock(). It however requires some of the same basic
+characteristics as the clock source, i.e. it should be monotonic.
+
+The sched_clock() function may wrap only on unsigned long long boundaries,
+i.e. after 64 bits. Since this is a nanosecond value this will mean it wraps
+after circa 585 years. (For most practical systems this means "never".)
+
+If an architecture does not provide its own implementation of this function,
+it will fall back to using jiffies, making its maximum resolution 1/HZ of the
+jiffy frequency for the architecture. This will affect scheduling accuracy
+and will likely show up in system benchmarks.
+
+The clock driving sched_clock() may stop or reset to zero during system
+suspend/sleep. This does not matter to the function it serves of scheduling
+events on the system. However it may result in interesting timestamps in
+printk().
+
+The sched_clock() function should be callable in any context, IRQ- and
+NMI-safe and return a sane value in any context.
+
+Some architectures may have a limited set of time sources and lack a nice
+counter to derive a 64-bit nanosecond value, so for example on the ARM
+architecture, special helper functions have been created to provide a
+sched_clock() nanosecond base from a 16- or 32-bit counter. Sometimes the
+same counter that is also used as clock source is used for this purpose.
+
+On SMP systems, it is crucial for performance that sched_clock() can be called
+independently on each CPU without any synchronization performance hits.
+Some hardware (such as the x86 TSC) will cause the sched_clock() function to
+drift between the CPUs on the system. The kernel can work around this by
+enabling the CONFIG_HAVE_UNSTABLE_SCHED_CLOCK option. This is another aspect
+that makes sched_clock() different from the ordinary clock source.
+
+
+Delay timers (some architectures only)
+--------------------------------------
+
+On systems with variable CPU frequency, the various kernel delay() functions
+will sometimes behave strangely. Basically these delays usually use a hard
+loop to delay a certain number of jiffy fractions using a "lpj" (loops per
+jiffy) value, calibrated on boot.
+
+Let's hope that your system is running on maximum frequency when this value
+is calibrated: as an effect when the frequency is geared down to half the
+full frequency, any delay() will be twice as long. Usually this does not
+hurt, as you're commonly requesting that amount of delay *or more*. But
+basically the semantics are quite unpredictable on such systems.
+
+Enter timer-based delays. Using these, a timer read may be used instead of
+a hard-coded loop for providing the desired delay.
+
+This is done by declaring a struct delay_timer and assigning the appropriate
+function pointers and rate settings for this delay timer.
+
+This is available on some architectures like OpenRISC or ARM.
diff --git a/Documentation/timers/timer_stats.txt b/Documentation/timers/timer_stats.txt
new file mode 100644
index 000000000..de835ee97
--- /dev/null
+++ b/Documentation/timers/timer_stats.txt
@@ -0,0 +1,73 @@
+timer_stats - timer usage statistics
+------------------------------------
+
+timer_stats is a debugging facility to make the timer (ab)usage in a Linux
+system visible to kernel and userspace developers. If enabled in the config
+but not used it has almost zero runtime overhead, and a relatively small
+data structure overhead. Even if collection is enabled runtime all the
+locking is per-CPU and lookup is hashed.
+
+timer_stats should be used by kernel and userspace developers to verify that
+their code does not make unduly use of timers. This helps to avoid unnecessary
+wakeups, which should be avoided to optimize power consumption.
+
+It can be enabled by CONFIG_TIMER_STATS in the "Kernel hacking" configuration
+section.
+
+timer_stats collects information about the timer events which are fired in a
+Linux system over a sample period:
+
+- the pid of the task(process) which initialized the timer
+- the name of the process which initialized the timer
+- the function where the timer was initialized
+- the callback function which is associated to the timer
+- the number of events (callbacks)
+
+timer_stats adds an entry to /proc: /proc/timer_stats
+
+This entry is used to control the statistics functionality and to read out the
+sampled information.
+
+The timer_stats functionality is inactive on bootup.
+
+To activate a sample period issue:
+# echo 1 >/proc/timer_stats
+
+To stop a sample period issue:
+# echo 0 >/proc/timer_stats
+
+The statistics can be retrieved by:
+# cat /proc/timer_stats
+
+While sampling is enabled, each readout from /proc/timer_stats will see
+newly updated statistics. Once sampling is disabled, the sampled information
+is kept until a new sample period is started. This allows multiple readouts.
+
+Sample output of /proc/timer_stats:
+
+Timerstats sample period: 3.888770 s
+ 12, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick)
+ 15, 1 swapper hcd_submit_urb (rh_timer_func)
+ 4, 959 kedac schedule_timeout (process_timeout)
+ 1, 0 swapper page_writeback_init (wb_timer_fn)
+ 28, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick)
+ 22, 2948 IRQ 4 tty_flip_buffer_push (delayed_work_timer_fn)
+ 3, 3100 bash schedule_timeout (process_timeout)
+ 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn)
+ 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn)
+ 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer)
+ 1, 2292 ip __netdev_watchdog_up (dev_watchdog)
+ 1, 23 events/1 do_cache_clean (delayed_work_timer_fn)
+90 total events, 30.0 events/sec
+
+The first column is the number of events, the second column the pid, the third
+column is the name of the process. The forth column shows the function which
+initialized the timer and in parenthesis the callback function which was
+executed on expiry.
+
+ Thomas, Ingo
+
+Added flag to indicate 'deferrable timer' in /proc/timer_stats. A deferrable
+timer will appear as follows
+ 10D, 1 swapper queue_delayed_work_on (delayed_work_timer_fn)
+
diff --git a/Documentation/timers/timers-howto.txt b/Documentation/timers/timers-howto.txt
new file mode 100644
index 000000000..038f8c77a
--- /dev/null
+++ b/Documentation/timers/timers-howto.txt
@@ -0,0 +1,105 @@
+delays - Information on the various kernel delay / sleep mechanisms
+-------------------------------------------------------------------
+
+This document seeks to answer the common question: "What is the
+RightWay (TM) to insert a delay?"
+
+This question is most often faced by driver writers who have to
+deal with hardware delays and who may not be the most intimately
+familiar with the inner workings of the Linux Kernel.
+
+
+Inserting Delays
+----------------
+
+The first, and most important, question you need to ask is "Is my
+code in an atomic context?" This should be followed closely by "Does
+it really need to delay in atomic context?" If so...
+
+ATOMIC CONTEXT:
+ You must use the *delay family of functions. These
+ functions use the jiffie estimation of clock speed
+ and will busy wait for enough loop cycles to achieve
+ the desired delay:
+
+ ndelay(unsigned long nsecs)
+ udelay(unsigned long usecs)
+ mdelay(unsigned long msecs)
+
+ udelay is the generally preferred API; ndelay-level
+ precision may not actually exist on many non-PC devices.
+
+ mdelay is macro wrapper around udelay, to account for
+ possible overflow when passing large arguments to udelay.
+ In general, use of mdelay is discouraged and code should
+ be refactored to allow for the use of msleep.
+
+NON-ATOMIC CONTEXT:
+ You should use the *sleep[_range] family of functions.
+ There are a few more options here, while any of them may
+ work correctly, using the "right" sleep function will
+ help the scheduler, power management, and just make your
+ driver better :)
+
+ -- Backed by busy-wait loop:
+ udelay(unsigned long usecs)
+ -- Backed by hrtimers:
+ usleep_range(unsigned long min, unsigned long max)
+ -- Backed by jiffies / legacy_timers
+ msleep(unsigned long msecs)
+ msleep_interruptible(unsigned long msecs)
+
+ Unlike the *delay family, the underlying mechanism
+ driving each of these calls varies, thus there are
+ quirks you should be aware of.
+
+
+ SLEEPING FOR "A FEW" USECS ( < ~10us? ):
+ * Use udelay
+
+ - Why not usleep?
+ On slower systems, (embedded, OR perhaps a speed-
+ stepped PC!) the overhead of setting up the hrtimers
+ for usleep *may* not be worth it. Such an evaluation
+ will obviously depend on your specific situation, but
+ it is something to be aware of.
+
+ SLEEPING FOR ~USECS OR SMALL MSECS ( 10us - 20ms):
+ * Use usleep_range
+
+ - Why not msleep for (1ms - 20ms)?
+ Explained originally here:
+ http://lkml.org/lkml/2007/8/3/250
+ msleep(1~20) may not do what the caller intends, and
+ will often sleep longer (~20 ms actual sleep for any
+ value given in the 1~20ms range). In many cases this
+ is not the desired behavior.
+
+ - Why is there no "usleep" / What is a good range?
+ Since usleep_range is built on top of hrtimers, the
+ wakeup will be very precise (ish), thus a simple
+ usleep function would likely introduce a large number
+ of undesired interrupts.
+
+ With the introduction of a range, the scheduler is
+ free to coalesce your wakeup with any other wakeup
+ that may have happened for other reasons, or at the
+ worst case, fire an interrupt for your upper bound.
+
+ The larger a range you supply, the greater a chance
+ that you will not trigger an interrupt; this should
+ be balanced with what is an acceptable upper bound on
+ delay / performance for your specific code path. Exact
+ tolerances here are very situation specific, thus it
+ is left to the caller to determine a reasonable range.
+
+ SLEEPING FOR LARGER MSECS ( 10ms+ )
+ * Use msleep or possibly msleep_interruptible
+
+ - What's the difference?
+ msleep sets the current task to TASK_UNINTERRUPTIBLE
+ whereas msleep_interruptible sets the current task to
+ TASK_INTERRUPTIBLE before scheduling the sleep. In
+ short, the difference is whether the sleep can be ended
+ early by a signal. In general, just use msleep unless
+ you know you have a need for the interruptible variant.
diff --git a/Documentation/tp_smapi.txt b/Documentation/tp_smapi.txt
new file mode 100644
index 000000000..d03730109
--- /dev/null
+++ b/Documentation/tp_smapi.txt
@@ -0,0 +1,267 @@
+tp_smapi version 0.40
+IBM ThinkPad hardware functions driver
+
+Author: Shem Multinymous <multinymous@gmail.com>
+Project: http://sourceforge.net/projects/tpctl
+Wiki: http://thinkwiki.org/wiki/tp_smapi
+List: linux-thinkpad@linux-thinkpad.org
+ (http://mailman.linux-thinkpad.org/mailman/listinfo/linux-thinkpad)
+
+Description
+-----------
+
+ThinkPad laptops include a proprietary interface called SMAPI BIOS
+(System Management Application Program Interface) which provides some
+hardware control functionality that is not accessible by other means.
+
+This driver exposes some features of the SMAPI BIOS through a sysfs
+interface. It is suitable for newer models, on which SMAPI is invoked
+through IO port writes. Older models use a different SMAPI interface;
+for those, try the "thinkpad" module from the "tpctl" package.
+
+WARNING:
+This driver uses undocumented features and direct hardware access.
+It thus cannot be guaranteed to work, and may cause arbitrary damage
+(especially on models it wasn't tested on).
+
+
+Module parameters
+-----------------
+
+thinkpad_ec module:
+ force_io=1 lets thinkpad_ec load on some recent ThinkPad models
+ (e.g., T400 and T500) whose BIOS's ACPI DSDT reserves the ports we need.
+tp_smapi module:
+ debug=1 enables verbose dmesg output.
+
+
+Usage
+-----
+
+Control of battery charging thresholds (in percents of current full charge
+capacity):
+
+# echo 40 > /sys/devices/platform/smapi/BAT0/start_charge_thresh
+# echo 70 > /sys/devices/platform/smapi/BAT0/stop_charge_thresh
+# cat /sys/devices/platform/smapi/BAT0/*_charge_thresh
+
+ (This is useful since Li-Ion batteries wear out much faster at very
+ high or low charge levels. The driver will also keeps the thresholds
+ across suspend-to-disk with AC disconnected; this isn't done
+ automatically by the hardware.)
+
+Inhibiting battery charging for 17 minutes (overrides thresholds):
+
+# echo 17 > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+# echo 0 > /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes # stop
+# cat /sys/devices/platform/smapi/BAT0/inhibit_charge_minutes
+
+ (This can be used to control which battery is charged when using an
+ Ultrabay battery.)
+
+Forcing battery discharging even if AC power available:
+
+# echo 1 > /sys/devices/platform/smapi/BAT0/force_discharge # start discharge
+# echo 0 > /sys/devices/platform/smapi/BAT0/force_discharge # stop discharge
+# cat /sys/devices/platform/smapi/BAT0/force_discharge
+
+ (When AC is connected, forced discharging will automatically stop
+ when battery is fully depleted -- this is useful for calibration.
+ Also, this attribute can be used to control which battery is discharged
+ when both a system battery and an Ultrabay battery are connected.)
+
+Misc read-only battery status attributes (see note about HDAPS below):
+
+/sys/devices/platform/smapi/BAT0/installed # 0 or 1
+/sys/devices/platform/smapi/BAT0/state # idle/charging/discharging
+/sys/devices/platform/smapi/BAT0/cycle_count # integer counter
+/sys/devices/platform/smapi/BAT0/current_now # instantaneous current
+/sys/devices/platform/smapi/BAT0/current_avg # last minute average
+/sys/devices/platform/smapi/BAT0/power_now # instantaneous power
+/sys/devices/platform/smapi/BAT0/power_avg # last minute average
+/sys/devices/platform/smapi/BAT0/last_full_capacity # in mWh
+/sys/devices/platform/smapi/BAT0/remaining_percent # remaining percent of energy (set by calibration)
+/sys/devices/platform/smapi/BAT0/remaining_percent_error # error range of remaing_percent (not reset by calibration)
+/sys/devices/platform/smapi/BAT0/remaining_running_time # in minutes, by last minute average power
+/sys/devices/platform/smapi/BAT0/remaining_running_time_now # in minutes, by instantenous power
+/sys/devices/platform/smapi/BAT0/remaining_charging_time # in minutes
+/sys/devices/platform/smapi/BAT0/remaining_capacity # in mWh
+/sys/devices/platform/smapi/BAT0/design_capacity # in mWh
+/sys/devices/platform/smapi/BAT0/voltage # in mV
+/sys/devices/platform/smapi/BAT0/design_voltage # in mV
+/sys/devices/platform/smapi/BAT0/charging_max_current # max charging current
+/sys/devices/platform/smapi/BAT0/charging_max_voltage # max charging voltage
+/sys/devices/platform/smapi/BAT0/group{0,1,2,3}_voltage # see below
+/sys/devices/platform/smapi/BAT0/manufacturer # string
+/sys/devices/platform/smapi/BAT0/model # string
+/sys/devices/platform/smapi/BAT0/barcoding # string
+/sys/devices/platform/smapi/BAT0/chemistry # string
+/sys/devices/platform/smapi/BAT0/serial # integer
+/sys/devices/platform/smapi/BAT0/manufacture_date # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/first_use_date # YYYY-MM-DD
+/sys/devices/platform/smapi/BAT0/temperature # in milli-Celsius
+/sys/devices/platform/smapi/BAT0/dump # see below
+/sys/devices/platform/smapi/ac_connected # 0 or 1
+
+The BAT0/group{0,1,2,3}_voltage attribute refers to the separate cell groups
+in each battery. For example, on the ThinkPad 600, X3x, T4x and R5x models,
+the battery contains 3 cell groups in series, where each group consisting of 2
+or 3 cells connected in parallel. The voltage of each group is given by these
+attributes, and their sum (roughly) equals the "voltage" attribute.
+(The effective performance of the battery is determined by the weakest group,
+i.e., the one those voltage changes most rapidly during dis/charging.)
+
+The "BAT0/dump" attribute gives a a hex dump of the raw status data, which
+contains additional data now in the above (if you can figure it out). Some
+unused values are autodetected and replaced by "--":
+
+In all of the above, replace BAT0 with BAT1 to address the 2nd battery (e.g.
+in the UltraBay).
+
+
+Raw SMAPI calls:
+
+/sys/devices/platform/smapi/smapi_request
+This performs raw SMAPI calls. It uses a bad interface that cannot handle
+multiple simultaneous access. Don't touch it, it's for development only.
+If you did touch it, you would so something like
+# echo '211a 100 0 0' > /sys/devices/platform/smapi/smapi_request
+# cat /sys/devices/platform/smapi/smapi_request
+and notice that in the output "211a 34b b2 0 0 0 'OK'", the "4b" in the 2nd
+value, converted to decimal is 75: the current charge stop threshold.
+
+
+Model-specific status
+---------------------
+
+Works (at least partially) on the following ThinkPad model:
+* A30
+* G41
+* R40, R50p, R51, R52
+* T23, T40, T40p, T41, T41p, T42, T42p, T43, T43p, T60
+* X24, X31, X32, X40, X41, X60
+* Z60t, Z61m
+
+Not all functions are available on all models; for detailed status, see:
+ http://thinkwiki.org/wiki/tp_smapi
+
+Please report success/failure by e-mail or on the Wiki.
+If you get a "not implemented" or "not supported" message, your laptop
+probably just can't do that (at least not via the SMAPI BIOS).
+For negative reports, follow the bug reporting guidelines below.
+If you send me the necessary technical data (i.e., SMAPI function
+interfaces), I will support additional models.
+
+
+Additional HDAPS features
+-------------------------
+
+The modified hdaps driver has several improvements on the one in mainline
+(beyond resolving the conflict with thinkpad_ec and tp_smapi):
+
+- Fixes reliability and improves support for recent ThinkPad models
+ (especially *60 and newer). Unlike the mainline driver, the modified hdaps
+ correctly follows the Embedded Controller communication protocol.
+
+- Extends the "invert" parameter to cover all possible axis orientations.
+ The possible values are as follows.
+ Let X,Y denote the hardware readouts.
+ Let R denote the laptop's roll (tilt left/right).
+ Let P denote the laptop's pitch (tilt forward/backward).
+ invert=0: R= X P= Y (same as mainline)
+ invert=1: R=-X P=-Y (same as mainline)
+ invert=2: R=-X P= Y (new)
+ invert=3: R= X P=-Y (new)
+ invert=4: R= Y P= X (new)
+ invert=5: R=-Y P=-X (new)
+ invert=6: R=-Y P= X (new)
+ invert=7: R= Y P=-X (new)
+ It's probably easiest to just try all 8 possibilities and see which yields
+ correct results (e.g., in the hdaps-gl visualisation).
+
+- Adds a whitelist which automatically sets the correct axis orientation for
+ some models. If the value for your model is wrong or missing, you can override
+ it using the "invert" parameter. Please also update the tables at
+ http://www.thinkwiki.org/wiki/tp_smapi and
+ http://www.thinkwiki.org/wiki/List_of_DMI_IDs
+ and submit a patch for the whitelist in hdaps.c.
+
+- Provides new attributes:
+ /sys/devices/platform/hdaps/sampling_rate:
+ This determines the frequency at which the host queries the embedded
+ controller for accelerometer data (and informs the hdaps input devices).
+ Default=50.
+ /sys/devices/platform/hdaps/oversampling_ratio:
+ When set to X, the embedded controller is told to do physical accelerometer
+ measurements at a rate that is X times higher than the rate at which
+ the driver reads those measurements (i.e., X*sampling_rate). This
+ makes the readouts from the embedded controller more fresh, and is also
+ useful for the running average filter (see next). Default=5
+ /sys/devices/platform/hdaps/running_avg_filter_order:
+ When set to X, reported readouts will be the average of the last X physical
+ accelerometer measurements. Current firmware allows 1<=X<=8. Setting to a
+ high value decreases readout fluctuations. The averaging is handled by the
+ embedded controller, so no CPU resources are used. Higher values make the
+ readouts smoother, since it averages out both sensor noise (good) and abrupt
+ changes (bad). Default=2.
+
+- Provides a second input device, which publishes the raw accelerometer
+ measurements (without the fuzzing needed for joystick emulation). This input
+ device can be matched by a udev rule such as the following (all on one line):
+ KERNEL=="event[0-9]*", ATTRS{phys}=="hdaps/input1",
+ ATTRS{modalias}=="input:b0019v1014p5054e4801-*",
+ SYMLINK+="input/hdaps/accelerometer-event
+
+A new version of the hdapsd userspace daemon, which uses the input device
+interface instead of polling sysfs, is available seprately. Using this reduces
+the total interrupts per second generated by hdaps+hdapsd (on tickless kernels)
+to 50, down from a value that fluctuates between 50 and 100. Set the
+sampling_rate sysfs attribute to a lower value to further reduce interrupts,
+at the expense of response latency.
+
+Licensing note: all my changes to the HDAPS driver are licensed under the
+GPL version 2 or, at your option and to the extent allowed by derivation from
+prior works, any later version. My version of hdaps is derived work from the
+mainline version, which at the time of writing is available only under
+GPL version 2.
+
+Bug reporting
+-------------
+
+Mail <multinymous@gmail.com>. Please include:
+* Details about your model,
+* Relevant "dmesg" output. Make sure thinkpad_ec and tp_smapi are loaded with
+ the "debug=1" parameter (e.g., use "make load HDAPS=1 DEBUG=1").
+* Output of "dmidecode | grep -C5 Product"
+* Does the failed functionality works under Windows?
+
+
+More about SMAPI
+----------------
+
+For hints about what may be possible via the SMAPI BIOS and how, see:
+
+* IBM Technical Reference Manual for the ThinkPad 770
+ (http://www-307.ibm.com/pc/support/site.wss/document.do?lndocid=PFAN-3TUQQD)
+* Exported symbols in PWRMGRIF.DLL or TPPWRW32.DLL (e.g., use "objdump -x").
+* drivers/char/mwave/smapi.c in the Linux kernel tree.*
+* The "thinkpad" SMAPI module (http://tpctl.sourceforge.net).
+* The SMAPI_* constants in tp_smapi.c.
+
+Note that in the above Technical Reference and in the "thinkpad" module,
+SMAPI is invoked through a function call to some physical address. However,
+the interface used by tp_smapi and the above mwave drive, and apparently
+required by newer ThinkPad, is different: you set the parameters up in the
+CPU's registers and write to ports 0xB2 (the APM control port) and 0x4F; this
+triggers an SMI (System Management Interrupt), causing the CPU to enter
+SMM (System Management Mode) and run the BIOS firmware; the results are
+returned in the CPU's registers. It is not clear what is the relation between
+the two variants of SMAPI, though the assignment of error codes seems to be
+similar.
+
+In addition, the embedded controller on ThinkPad laptops has a non-standard
+interface at IO ports 0x1600-0x161F (mapped to LCP channel 3 of the H8S chip).
+The interface provides various system management services (currently known:
+battery information and accelerometer readouts). For more information see the
+thinkpad_ec module and the H8S hardware documentation:
+http://documentation.renesas.com/eng/products/mpumcu/rej09b0300_2140bhm.pdf
diff --git a/Documentation/tpm/xen-tpmfront.txt b/Documentation/tpm/xen-tpmfront.txt
new file mode 100644
index 000000000..69346de87
--- /dev/null
+++ b/Documentation/tpm/xen-tpmfront.txt
@@ -0,0 +1,113 @@
+Virtual TPM interface for Xen
+
+Authors: Matthew Fioravante (JHUAPL), Daniel De Graaf (NSA)
+
+This document describes the virtual Trusted Platform Module (vTPM) subsystem for
+Xen. The reader is assumed to have familiarity with building and installing Xen,
+Linux, and a basic understanding of the TPM and vTPM concepts.
+
+INTRODUCTION
+
+The goal of this work is to provide a TPM functionality to a virtual guest
+operating system (in Xen terms, a DomU). This allows programs to interact with
+a TPM in a virtual system the same way they interact with a TPM on the physical
+system. Each guest gets its own unique, emulated, software TPM. However, each
+of the vTPM's secrets (Keys, NVRAM, etc) are managed by a vTPM Manager domain,
+which seals the secrets to the Physical TPM. If the process of creating each of
+these domains (manager, vTPM, and guest) is trusted, the vTPM subsystem extends
+the chain of trust rooted in the hardware TPM to virtual machines in Xen. Each
+major component of vTPM is implemented as a separate domain, providing secure
+separation guaranteed by the hypervisor. The vTPM domains are implemented in
+mini-os to reduce memory and processor overhead.
+
+This mini-os vTPM subsystem was built on top of the previous vTPM work done by
+IBM and Intel corporation.
+
+
+DESIGN OVERVIEW
+---------------
+
+The architecture of vTPM is described below:
+
++------------------+
+| Linux DomU | ...
+| | ^ |
+| v | |
+| xen-tpmfront |
++------------------+
+ | ^
+ v |
++------------------+
+| mini-os/tpmback |
+| | ^ |
+| v | |
+| vtpm-stubdom | ...
+| | ^ |
+| v | |
+| mini-os/tpmfront |
++------------------+
+ | ^
+ v |
++------------------+
+| mini-os/tpmback |
+| | ^ |
+| v | |
+| vtpmmgr-stubdom |
+| | ^ |
+| v | |
+| mini-os/tpm_tis |
++------------------+
+ | ^
+ v |
++------------------+
+| Hardware TPM |
++------------------+
+
+ * Linux DomU: The Linux based guest that wants to use a vTPM. There may be
+ more than one of these.
+
+ * xen-tpmfront.ko: Linux kernel virtual TPM frontend driver. This driver
+ provides vTPM access to a Linux-based DomU.
+
+ * mini-os/tpmback: Mini-os TPM backend driver. The Linux frontend driver
+ connects to this backend driver to facilitate communications
+ between the Linux DomU and its vTPM. This driver is also
+ used by vtpmmgr-stubdom to communicate with vtpm-stubdom.
+
+ * vtpm-stubdom: A mini-os stub domain that implements a vTPM. There is a
+ one to one mapping between running vtpm-stubdom instances and
+ logical vtpms on the system. The vTPM Platform Configuration
+ Registers (PCRs) are normally all initialized to zero.
+
+ * mini-os/tpmfront: Mini-os TPM frontend driver. The vTPM mini-os domain
+ vtpm-stubdom uses this driver to communicate with
+ vtpmmgr-stubdom. This driver is also used in mini-os
+ domains such as pv-grub that talk to the vTPM domain.
+
+ * vtpmmgr-stubdom: A mini-os domain that implements the vTPM manager. There is
+ only one vTPM manager and it should be running during the
+ entire lifetime of the machine. This domain regulates
+ access to the physical TPM on the system and secures the
+ persistent state of each vTPM.
+
+ * mini-os/tpm_tis: Mini-os TPM version 1.2 TPM Interface Specification (TIS)
+ driver. This driver used by vtpmmgr-stubdom to talk directly to
+ the hardware TPM. Communication is facilitated by mapping
+ hardware memory pages into vtpmmgr-stubdom.
+
+ * Hardware TPM: The physical TPM that is soldered onto the motherboard.
+
+
+INTEGRATION WITH XEN
+--------------------
+
+Support for the vTPM driver was added in Xen using the libxl toolstack in Xen
+4.3. See the Xen documentation (docs/misc/vtpm.txt) for details on setting up
+the vTPM and vTPM Manager stub domains. Once the stub domains are running, a
+vTPM device is set up in the same manner as a disk or network device in the
+domain's configuration file.
+
+In order to use features such as IMA that require a TPM to be loaded prior to
+the initrd, the xen-tpmfront driver must be compiled in to the kernel. If not
+using such features, the driver can be compiled as a module and will be loaded
+as usual.
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt
new file mode 100644
index 000000000..77d14d51a
--- /dev/null
+++ b/Documentation/trace/coresight.txt
@@ -0,0 +1,299 @@
+ Coresight - HW Assisted Tracing on ARM
+ ======================================
+
+ Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ Date: September 11th, 2014
+
+Introduction
+------------
+
+Coresight is an umbrella of technologies allowing for the debugging of ARM
+based SoC. It includes solutions for JTAG and HW assisted tracing. This
+document is concerned with the latter.
+
+HW assisted tracing is becoming increasingly useful when dealing with systems
+that have many SoCs and other components like GPU and DMA engines. ARM has
+developed a HW assisted tracing solution by means of different components, each
+being added to a design at synthesis time to cater to specific tracing needs.
+Compoments are generally categorised as source, link and sinks and are
+(usually) discovered using the AMBA bus.
+
+"Sources" generate a compressed stream representing the processor instruction
+path based on tracing scenarios as configured by users. From there the stream
+flows through the coresight system (via ATB bus) using links that are connecting
+the emanating source to a sink(s). Sinks serve as endpoints to the coresight
+implementation, either storing the compressed stream in a memory buffer or
+creating an interface to the outside world where data can be transferred to a
+host without fear of filling up the onboard coresight memory buffer.
+
+At typical coresight system would look like this:
+
+ *****************************************************************
+ **************************** AMBA AXI ****************************===||
+ ***************************************************************** ||
+ ^ ^ | ||
+ | | * **
+ 0000000 ::::: 0000000 ::::: ::::: @@@@@@@ ||||||||||||
+ 0 CPU 0<-->: C : 0 CPU 0<-->: C : : C : @ STM @ || System ||
+ |->0000000 : T : |->0000000 : T : : T :<--->@@@@@ || Memory ||
+ | #######<-->: I : | #######<-->: I : : I : @@@<-| ||||||||||||
+ | # ETM # ::::: | # PTM # ::::: ::::: @ |
+ | ##### ^ ^ | ##### ^ ! ^ ! . | |||||||||
+ | |->### | ! | |->### | ! | ! . | || DAP ||
+ | | # | ! | | # | ! | ! . | |||||||||
+ | | . | ! | | . | ! | ! . | | |
+ | | . | ! | | . | ! | ! . | | *
+ | | . | ! | | . | ! | ! . | | SWD/
+ | | . | ! | | . | ! | ! . | | JTAG
+ *****************************************************************<-|
+ *************************** AMBA Debug APB ************************
+ *****************************************************************
+ | . ! . ! ! . |
+ | . * . * * . |
+ *****************************************************************
+ ******************** Cross Trigger Matrix (CTM) *******************
+ *****************************************************************
+ | . ^ . . |
+ | * ! * * |
+ *****************************************************************
+ ****************** AMBA Advanced Trace Bus (ATB) ******************
+ *****************************************************************
+ | ! =============== |
+ | * ===== F =====<---------|
+ | ::::::::: ==== U ====
+ |-->:: CTI ::<!! === N ===
+ | ::::::::: ! == N ==
+ | ^ * == E ==
+ | ! &&&&&&&&& IIIIIII == L ==
+ |------>&& ETB &&<......II I =======
+ | ! &&&&&&&&& II I .
+ | ! I I .
+ | ! I REP I<..........
+ | ! I I
+ | !!>&&&&&&&&& II I *Source: ARM ltd.
+ |------>& TPIU &<......II I DAP = Debug Access Port
+ &&&&&&&&& IIIIIII ETM = Embedded Trace Macrocell
+ ; PTM = Program Trace Macrocell
+ ; CTI = Cross Trigger Interface
+ * ETB = Embedded Trace Buffer
+ To trace port TPIU= Trace Port Interface Unit
+ SWD = Serial Wire Debug
+
+While on target configuration of the components is done via the APB bus,
+all trace data are carried out-of-band on the ATB bus. The CTM provides
+a way to aggregate and distribute signals between CoreSight components.
+
+The coresight framework provides a central point to represent, configure and
+manage coresight devices on a platform. This first implementation centers on
+the basic tracing functionality, enabling components such ETM/PTM, funnel,
+replicator, TMC, TPIU and ETB. Future work will enable more
+intricate IP blocks such as STM and CTI.
+
+
+Acronyms and Classification
+---------------------------
+
+Acronyms:
+
+PTM: Program Trace Macrocell
+ETM: Embedded Trace Macrocell
+STM: System trace Macrocell
+ETB: Embedded Trace Buffer
+ITM: Instrumentation Trace Macrocell
+TPIU: Trace Port Interface Unit
+TMC-ETR: Trace Memory Controller, configured as Embedded Trace Router
+TMC-ETF: Trace Memory Controller, configured as Embedded Trace FIFO
+CTI: Cross Trigger Interface
+
+Classification:
+
+Source:
+ ETMv3.x ETMv4, PTMv1.0, PTMv1.1, STM, STM500, ITM
+Link:
+ Funnel, replicator (intelligent or not), TMC-ETR
+Sinks:
+ ETBv1.0, ETB1.1, TPIU, TMC-ETF
+Misc:
+ CTI
+
+
+Device Tree Bindings
+----------------------
+
+See Documentation/devicetree/bindings/arm/coresight.txt for details.
+
+As of this writing drivers for ITM, STMs and CTIs are not provided but are
+expected to be added as the solution matures.
+
+
+Framework and implementation
+----------------------------
+
+The coresight framework provides a central point to represent, configure and
+manage coresight devices on a platform. Any coresight compliant device can
+register with the framework for as long as they use the right APIs:
+
+struct coresight_device *coresight_register(struct coresight_desc *desc);
+void coresight_unregister(struct coresight_device *csdev);
+
+The registering function is taking a "struct coresight_device *csdev" and
+register the device with the core framework. The unregister function takes
+a reference to a "strut coresight_device", obtained at registration time.
+
+If everything goes well during the registration process the new devices will
+show up under /sys/bus/coresight/devices, as showns here for a TC2 platform:
+
+root:~# ls /sys/bus/coresight/devices/
+replicator 20030000.tpiu 2201c000.ptm 2203c000.etm 2203e000.etm
+20010000.etb 20040000.funnel 2201d000.ptm 2203d000.etm
+root:~#
+
+The functions take a "struct coresight_device", which looks like this:
+
+struct coresight_desc {
+ enum coresight_dev_type type;
+ struct coresight_dev_subtype subtype;
+ const struct coresight_ops *ops;
+ struct coresight_platform_data *pdata;
+ struct device *dev;
+ const struct attribute_group **groups;
+};
+
+
+The "coresight_dev_type" identifies what the device is, i.e, source link or
+sink while the "coresight_dev_subtype" will characterise that type further.
+
+The "struct coresight_ops" is mandatory and will tell the framework how to
+perform base operations related to the components, each component having
+a different set of requirement. For that "struct coresight_ops_sink",
+"struct coresight_ops_link" and "struct coresight_ops_source" have been
+provided.
+
+The next field, "struct coresight_platform_data *pdata" is acquired by calling
+"of_get_coresight_platform_data()", as part of the driver's _probe routine and
+"struct device *dev" gets the device reference embedded in the "amba_device":
+
+static int etm_probe(struct amba_device *adev, const struct amba_id *id)
+{
+ ...
+ ...
+ drvdata->dev = &adev->dev;
+ ...
+}
+
+Specific class of device (source, link, or sink) have generic operations
+that can be performed on them (see "struct coresight_ops"). The
+"**groups" is a list of sysfs entries pertaining to operations
+specific to that component only. "Implementation defined" customisations are
+expected to be accessed and controlled using those entries.
+
+Last but not least, "struct module *owner" is expected to be set to reflect
+the information carried in "THIS_MODULE".
+
+How to use
+----------
+
+Before trace collection can start, a coresight sink needs to be identify.
+There is no limit on the amount of sinks (nor sources) that can be enabled at
+any given moment. As a generic operation, all device pertaining to the sink
+class will have an "active" entry in sysfs:
+
+root:/sys/bus/coresight/devices# ls
+replicator 20030000.tpiu 2201c000.ptm 2203c000.etm 2203e000.etm
+20010000.etb 20040000.funnel 2201d000.ptm 2203d000.etm
+root:/sys/bus/coresight/devices# ls 20010000.etb
+enable_sink status trigger_cntr
+root:/sys/bus/coresight/devices# echo 1 > 20010000.etb/enable_sink
+root:/sys/bus/coresight/devices# cat 20010000.etb/enable_sink
+1
+root:/sys/bus/coresight/devices#
+
+At boot time the current etm3x driver will configure the first address
+comparator with "_stext" and "_etext", essentially tracing any instruction
+that falls within that range. As such "enabling" a source will immediately
+trigger a trace capture:
+
+root:/sys/bus/coresight/devices# echo 1 > 2201c000.ptm/enable_source
+root:/sys/bus/coresight/devices# cat 2201c000.ptm/enable_source
+1
+root:/sys/bus/coresight/devices# cat 20010000.etb/status
+Depth: 0x2000
+Status: 0x1
+RAM read ptr: 0x0
+RAM wrt ptr: 0x19d3 <----- The write pointer is moving
+Trigger cnt: 0x0
+Control: 0x1
+Flush status: 0x0
+Flush ctrl: 0x2001
+root:/sys/bus/coresight/devices#
+
+Trace collection is stopped the same way:
+
+root:/sys/bus/coresight/devices# echo 0 > 2201c000.ptm/enable_source
+root:/sys/bus/coresight/devices#
+
+The content of the ETB buffer can be harvested directly from /dev:
+
+root:/sys/bus/coresight/devices# dd if=/dev/20010000.etb \
+of=~/cstrace.bin
+
+64+0 records in
+64+0 records out
+32768 bytes (33 kB) copied, 0.00125258 s, 26.2 MB/s
+root:/sys/bus/coresight/devices#
+
+The file cstrace.bin can be decompressed using "ptm2human", DS-5 or Trace32.
+
+Following is a DS-5 output of an experimental loop that increments a variable up
+to a certain value. The example is simple and yet provides a glimpse of the
+wealth of possibilities that coresight provides.
+
+Info Tracing enabled
+Instruction 106378866 0x8026B53C E52DE004 false PUSH {lr}
+Instruction 0 0x8026B540 E24DD00C false SUB sp,sp,#0xc
+Instruction 0 0x8026B544 E3A03000 false MOV r3,#0
+Instruction 0 0x8026B548 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B54C E59D3004 false LDR r3,[sp,#4]
+Instruction 0 0x8026B550 E3530004 false CMP r3,#4
+Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1
+Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c
+Timestamp Timestamp: 17106715833
+Instruction 319 0x8026B54C E59D3004 false LDR r3,[sp,#4]
+Instruction 0 0x8026B550 E3530004 false CMP r3,#4
+Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1
+Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c
+Instruction 9 0x8026B54C E59D3004 false LDR r3,[sp,#4]
+Instruction 0 0x8026B550 E3530004 false CMP r3,#4
+Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1
+Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c
+Instruction 7 0x8026B54C E59D3004 false LDR r3,[sp,#4]
+Instruction 0 0x8026B550 E3530004 false CMP r3,#4
+Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1
+Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c
+Instruction 7 0x8026B54C E59D3004 false LDR r3,[sp,#4]
+Instruction 0 0x8026B550 E3530004 false CMP r3,#4
+Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1
+Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c
+Instruction 10 0x8026B54C E59D3004 false LDR r3,[sp,#4]
+Instruction 0 0x8026B550 E3530004 false CMP r3,#4
+Instruction 0 0x8026B554 E2833001 false ADD r3,r3,#1
+Instruction 0 0x8026B558 E58D3004 false STR r3,[sp,#4]
+Instruction 0 0x8026B55C DAFFFFFA true BLE {pc}-0x10 ; 0x8026b54c
+Instruction 6 0x8026B560 EE1D3F30 false MRC p15,#0x0,r3,c13,c0,#1
+Instruction 0 0x8026B564 E1A0100D false MOV r1,sp
+Instruction 0 0x8026B568 E3C12D7F false BIC r2,r1,#0x1fc0
+Instruction 0 0x8026B56C E3C2203F false BIC r2,r2,#0x3f
+Instruction 0 0x8026B570 E59D1004 false LDR r1,[sp,#4]
+Instruction 0 0x8026B574 E59F0010 false LDR r0,[pc,#16] ; [0x8026B58C] = 0x80550368
+Instruction 0 0x8026B578 E592200C false LDR r2,[r2,#0xc]
+Instruction 0 0x8026B57C E59221D0 false LDR r2,[r2,#0x1d0]
+Instruction 0 0x8026B580 EB07A4CF true BL {pc}+0x1e9344 ; 0x804548c4
+Info Tracing enabled
+Instruction 13570831 0x8026B584 E28DD00C false ADD sp,sp,#0xc
+Instruction 0 0x8026B588 E8BD8000 true LDM sp!,{pc}
+Timestamp Timestamp: 17107041535
diff --git a/Documentation/trace/events-kmem.txt b/Documentation/trace/events-kmem.txt
new file mode 100644
index 000000000..194800410
--- /dev/null
+++ b/Documentation/trace/events-kmem.txt
@@ -0,0 +1,107 @@
+ Subsystem Trace Points: kmem
+
+The kmem tracing system captures events related to object and page allocation
+within the kernel. Broadly speaking there are five major subheadings.
+
+ o Slab allocation of small objects of unknown type (kmalloc)
+ o Slab allocation of small objects of known type
+ o Page allocation
+ o Per-CPU Allocator Activity
+ o External Fragmentation
+
+This document describes what each of the tracepoints is and why they
+might be useful.
+
+1. Slab allocation of small objects of unknown type
+===================================================
+kmalloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s
+kmalloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d
+kfree call_site=%lx ptr=%p
+
+Heavy activity for these events may indicate that a specific cache is
+justified, particularly if kmalloc slab pages are getting significantly
+internal fragmented as a result of the allocation pattern. By correlating
+kmalloc with kfree, it may be possible to identify memory leaks and where
+the allocation sites were.
+
+
+2. Slab allocation of small objects of known type
+=================================================
+kmem_cache_alloc call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s
+kmem_cache_alloc_node call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d
+kmem_cache_free call_site=%lx ptr=%p
+
+These events are similar in usage to the kmalloc-related events except that
+it is likely easier to pin the event down to a specific cache. At the time
+of writing, no information is available on what slab is being allocated from,
+but the call_site can usually be used to extrapolate that information.
+
+3. Page allocation
+==================
+mm_page_alloc page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s
+mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
+mm_page_free page=%p pfn=%lu order=%d
+mm_page_free_batched page=%p pfn=%lu order=%d cold=%d
+
+These four events deal with page allocation and freeing. mm_page_alloc is
+a simple indicator of page allocator activity. Pages may be allocated from
+the per-CPU allocator (high performance) or the buddy allocator.
+
+If pages are allocated directly from the buddy allocator, the
+mm_page_alloc_zone_locked event is triggered. This event is important as high
+amounts of activity imply high activity on the zone->lock. Taking this lock
+impairs performance by disabling interrupts, dirtying cache lines between
+CPUs and serialising many CPUs.
+
+When a page is freed directly by the caller, the only mm_page_free event
+is triggered. Significant amounts of activity here could indicate that the
+callers should be batching their activities.
+
+When pages are freed in batch, the also mm_page_free_batched is triggered.
+Broadly speaking, pages are taken off the LRU lock in bulk and
+freed in batch with a page list. Significant amounts of activity here could
+indicate that the system is under memory pressure and can also indicate
+contention on the zone->lru_lock.
+
+4. Per-CPU Allocator Activity
+=============================
+mm_page_alloc_zone_locked page=%p pfn=%lu order=%u migratetype=%d cpu=%d percpu_refill=%d
+mm_page_pcpu_drain page=%p pfn=%lu order=%d cpu=%d migratetype=%d
+
+In front of the page allocator is a per-cpu page allocator. It exists only
+for order-0 pages, reduces contention on the zone->lock and reduces the
+amount of writing on struct page.
+
+When a per-CPU list is empty or pages of the wrong type are allocated,
+the zone->lock will be taken once and the per-CPU list refilled. The event
+triggered is mm_page_alloc_zone_locked for each page allocated with the
+event indicating whether it is for a percpu_refill or not.
+
+When the per-CPU list is too full, a number of pages are freed, each one
+which triggers a mm_page_pcpu_drain event.
+
+The individual nature of the events is so that pages can be tracked
+between allocation and freeing. A number of drain or refill pages that occur
+consecutively imply the zone->lock being taken once. Large amounts of per-CPU
+refills and drains could imply an imbalance between CPUs where too much work
+is being concentrated in one place. It could also indicate that the per-CPU
+lists should be a larger size. Finally, large amounts of refills on one CPU
+and drains on another could be a factor in causing large amounts of cache
+line bounces due to writes between CPUs and worth investigating if pages
+can be allocated and freed on the same CPU through some algorithm change.
+
+5. External Fragmentation
+=========================
+mm_page_alloc_extfrag page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d
+
+External fragmentation affects whether a high-order allocation will be
+successful or not. For some types of hardware, this is important although
+it is avoided where possible. If the system is using huge pages and needs
+to be able to resize the pool over the lifetime of the system, this value
+is important.
+
+Large numbers of this event implies that memory is fragmenting and
+high-order allocations will start failing at some time in the future. One
+means of reducing the occurrence of this event is to increase the size of
+min_free_kbytes in increments of 3*pageblock_size*nr_online_nodes where
+pageblock_size is usually the size of the default hugepage size.
diff --git a/Documentation/trace/events-nmi.txt b/Documentation/trace/events-nmi.txt
new file mode 100644
index 000000000..c03c8c89f
--- /dev/null
+++ b/Documentation/trace/events-nmi.txt
@@ -0,0 +1,43 @@
+NMI Trace Events
+
+These events normally show up here:
+
+ /sys/kernel/debug/tracing/events/nmi
+
+--
+
+nmi_handler:
+
+You might want to use this tracepoint if you suspect that your
+NMI handlers are hogging large amounts of CPU time. The kernel
+will warn if it sees long-running handlers:
+
+ INFO: NMI handler took too long to run: 9.207 msecs
+
+and this tracepoint will allow you to drill down and get some
+more details.
+
+Let's say you suspect that perf_event_nmi_handler() is causing
+you some problems and you only want to trace that handler
+specifically. You need to find its address:
+
+ $ grep perf_event_nmi_handler /proc/kallsyms
+ ffffffff81625600 t perf_event_nmi_handler
+
+Let's also say you are only interested in when that function is
+really hogging a lot of CPU time, like a millisecond at a time.
+Note that the kernel's output is in milliseconds, but the input
+to the filter is in nanoseconds! You can filter on 'delta_ns':
+
+cd /sys/kernel/debug/tracing/events/nmi/nmi_handler
+echo 'handler==0xffffffff81625600 && delta_ns>1000000' > filter
+echo 1 > enable
+
+Your output would then look like:
+
+$ cat /sys/kernel/debug/tracing/trace_pipe
+<idle>-0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1
+<idle>-0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1
+<idle>-0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1
+<idle>-0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1
+
diff --git a/Documentation/trace/events-power.txt b/Documentation/trace/events-power.txt
new file mode 100644
index 000000000..21d514ced
--- /dev/null
+++ b/Documentation/trace/events-power.txt
@@ -0,0 +1,96 @@
+
+ Subsystem Trace Points: power
+
+The power tracing system captures events related to power transitions
+within the kernel. Broadly speaking there are three major subheadings:
+
+ o Power state switch which reports events related to suspend (S-states),
+ cpuidle (C-states) and cpufreq (P-states)
+ o System clock related changes
+ o Power domains related changes and transitions
+
+This document describes what each of the tracepoints is and why they
+might be useful.
+
+Cf. include/trace/events/power.h for the events definitions.
+
+1. Power state switch events
+============================
+
+1.1 Trace API
+-----------------
+
+A 'cpu' event class gathers the CPU-related events: cpuidle and
+cpufreq.
+
+cpu_idle "state=%lu cpu_id=%lu"
+cpu_frequency "state=%lu cpu_id=%lu"
+
+A suspend event is used to indicate the system going in and out of the
+suspend mode:
+
+machine_suspend "state=%lu"
+
+
+Note: the value of '-1' or '4294967295' for state means an exit from the current state,
+i.e. trace_cpu_idle(4, smp_processor_id()) means that the system
+enters the idle state 4, while trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id())
+means that the system exits the previous idle state.
+
+The event which has 'state=4294967295' in the trace is very important to the user
+space tools which are using it to detect the end of the current state, and so to
+correctly draw the states diagrams and to calculate accurate statistics etc.
+
+2. Clocks events
+================
+The clock events are used for clock enable/disable and for
+clock rate change.
+
+clock_enable "%s state=%lu cpu_id=%lu"
+clock_disable "%s state=%lu cpu_id=%lu"
+clock_set_rate "%s state=%lu cpu_id=%lu"
+
+The first parameter gives the clock name (e.g. "gpio1_iclk").
+The second parameter is '1' for enable, '0' for disable, the target
+clock rate for set_rate.
+
+3. Power domains events
+=======================
+The power domain events are used for power domains transitions
+
+power_domain_target "%s state=%lu cpu_id=%lu"
+
+The first parameter gives the power domain name (e.g. "mpu_pwrdm").
+The second parameter is the power domain target state.
+
+4. PM QoS events
+================
+The PM QoS events are used for QoS add/update/remove request and for
+target/flags update.
+
+pm_qos_add_request "pm_qos_class=%s value=%d"
+pm_qos_update_request "pm_qos_class=%s value=%d"
+pm_qos_remove_request "pm_qos_class=%s value=%d"
+pm_qos_update_request_timeout "pm_qos_class=%s value=%d, timeout_us=%ld"
+
+The first parameter gives the QoS class name (e.g. "CPU_DMA_LATENCY").
+The second parameter is value to be added/updated/removed.
+The third parameter is timeout value in usec.
+
+pm_qos_update_target "action=%s prev_value=%d curr_value=%d"
+pm_qos_update_flags "action=%s prev_value=0x%x curr_value=0x%x"
+
+The first parameter gives the QoS action name (e.g. "ADD_REQ").
+The second parameter is the previous QoS value.
+The third parameter is the current QoS value to update.
+
+And, there are also events used for device PM QoS add/update/remove request.
+
+dev_pm_qos_add_request "device=%s type=%s new_value=%d"
+dev_pm_qos_update_request "device=%s type=%s new_value=%d"
+dev_pm_qos_remove_request "device=%s type=%s new_value=%d"
+
+The first parameter gives the device name which tries to add/update/remove
+QoS requests.
+The second parameter gives the request type (e.g. "DEV_PM_QOS_RESUME_LATENCY").
+The third parameter is value to be added/updated/removed.
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644
index 000000000..75d25a1d6
--- /dev/null
+++ b/Documentation/trace/events.txt
@@ -0,0 +1,496 @@
+ Event Tracing
+
+ Documentation written by Theodore Ts'o
+ Updated by Li Zefan and Tom Zanussi
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used
+without creating custom kernel modules to register probe functions
+using the event tracing infrastructure.
+
+Not all tracepoints can be traced using the event tracing system;
+the kernel developer must provide code snippets which define how the
+tracing information is saved into the tracing buffer, and how the
+tracing information should be printed.
+
+2. Using Event Tracing
+======================
+
+2.1 Via the 'set_event' interface
+---------------------------------
+
+The events which are available for tracing can be found in the file
+/sys/kernel/debug/tracing/available_events.
+
+To enable a particular event, such as 'sched_wakeup', simply echo it
+to /sys/kernel/debug/tracing/set_event. For example:
+
+ # echo sched_wakeup >> /sys/kernel/debug/tracing/set_event
+
+[ Note: '>>' is necessary, otherwise it will firstly disable
+ all the events. ]
+
+To disable an event, echo the event name to the set_event file prefixed
+with an exclamation point:
+
+ # echo '!sched_wakeup' >> /sys/kernel/debug/tracing/set_event
+
+To disable all events, echo an empty line to the set_event file:
+
+ # echo > /sys/kernel/debug/tracing/set_event
+
+To enable all events, echo '*:*' or '*:' to the set_event file:
+
+ # echo *:* > /sys/kernel/debug/tracing/set_event
+
+The events are organized into subsystems, such as ext4, irq, sched,
+etc., and a full event name looks like this: <subsystem>:<event>. The
+subsystem name is optional, but it is displayed in the available_events
+file. All of the events in a subsystem can be specified via the syntax
+"<subsystem>:*"; for example, to enable all irq events, you can use the
+command:
+
+ # echo 'irq:*' > /sys/kernel/debug/tracing/set_event
+
+2.2 Via the 'enable' toggle
+---------------------------
+
+The events available are also listed in /sys/kernel/debug/tracing/events/ hierarchy
+of directories.
+
+To enable event 'sched_wakeup':
+
+ # echo 1 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable
+
+To disable it:
+
+ # echo 0 > /sys/kernel/debug/tracing/events/sched/sched_wakeup/enable
+
+To enable all events in sched subsystem:
+
+ # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+
+To enable all events:
+
+ # echo 1 > /sys/kernel/debug/tracing/events/enable
+
+When reading one of these enable files, there are four results:
+
+ 0 - all events this file affects are disabled
+ 1 - all events this file affects are enabled
+ X - there is a mixture of events enabled and disabled
+ ? - this file does not affect any event
+
+2.3 Boot option
+---------------
+
+In order to facilitate early boot debugging, use boot option:
+
+ trace_event=[event-list]
+
+event-list is a comma separated list of events. See section 2.1 for event
+format.
+
+3. Defining an event-enabled tracepoint
+=======================================
+
+See The example provided in samples/trace_events
+
+4. Event formats
+================
+
+Each trace event has a 'format' file associated with it that contains
+a description of each field in a logged event. This information can
+be used to parse the binary trace stream, and is also the place to
+find the field names that can be used in event filters (see section 5).
+
+It also displays the format string that will be used to print the
+event in text mode, along with the event name and ID used for
+profiling.
+
+Every event has a set of 'common' fields associated with it; these are
+the fields prefixed with 'common_'. The other fields vary between
+events and correspond to the fields defined in the TRACE_EVENT
+definition for that event.
+
+Each field in the format has the form:
+
+ field:field-type field-name; offset:N; size:N;
+
+where offset is the offset of the field in the trace record and size
+is the size of the data item, in bytes.
+
+For example, here's the information displayed for the 'sched_wakeup'
+event:
+
+# cat /sys/kernel/debug/tracing/events/sched/sched_wakeup/format
+
+name: sched_wakeup
+ID: 60
+format:
+ field:unsigned short common_type; offset:0; size:2;
+ field:unsigned char common_flags; offset:2; size:1;
+ field:unsigned char common_preempt_count; offset:3; size:1;
+ field:int common_pid; offset:4; size:4;
+ field:int common_tgid; offset:8; size:4;
+
+ field:char comm[TASK_COMM_LEN]; offset:12; size:16;
+ field:pid_t pid; offset:28; size:4;
+ field:int prio; offset:32; size:4;
+ field:int success; offset:36; size:4;
+ field:int cpu; offset:40; size:4;
+
+print fmt: "task %s:%d [%d] success=%d [%03d]", REC->comm, REC->pid,
+ REC->prio, REC->success, REC->cpu
+
+This event contains 10 fields, the first 5 common and the remaining 5
+event-specific. All the fields for this event are numeric, except for
+'comm' which is a string, a distinction important for event filtering.
+
+5. Event filtering
+==================
+
+Trace events can be filtered in the kernel by associating boolean
+'filter expressions' with them. As soon as an event is logged into
+the trace buffer, its fields are checked against the filter expression
+associated with that event type. An event with field values that
+'match' the filter will appear in the trace output, and an event whose
+values don't match will be discarded. An event with no filter
+associated with it matches everything, and is the default when no
+filter has been set for an event.
+
+5.1 Expression syntax
+---------------------
+
+A filter expression consists of one or more 'predicates' that can be
+combined using the logical operators '&&' and '||'. A predicate is
+simply a clause that compares the value of a field contained within a
+logged event with a constant value and returns either 0 or 1 depending
+on whether the field value matched (1) or didn't match (0):
+
+ field-name relational-operator value
+
+Parentheses can be used to provide arbitrary logical groupings and
+double-quotes can be used to prevent the shell from interpreting
+operators as shell metacharacters.
+
+The field-names available for use in filters can be found in the
+'format' files for trace events (see section 4).
+
+The relational-operators depend on the type of the field being tested:
+
+The operators available for numeric fields are:
+
+==, !=, <, <=, >, >=, &
+
+And for string fields they are:
+
+==, !=, ~
+
+The glob (~) only accepts a wild card character (*) at the start and or
+end of the string. For example:
+
+ prev_comm ~ "*sh"
+ prev_comm ~ "sh*"
+ prev_comm ~ "*sh*"
+
+But does not allow for it to be within the string:
+
+ prev_comm ~ "ba*sh" <-- is invalid
+
+5.2 Setting filters
+-------------------
+
+A filter for an individual event is set by writing a filter expression
+to the 'filter' file for the given event.
+
+For example:
+
+# cd /sys/kernel/debug/tracing/events/sched/sched_wakeup
+# echo "common_preempt_count > 4" > filter
+
+A slightly more involved example:
+
+# cd /sys/kernel/debug/tracing/events/signal/signal_generate
+# echo "((sig >= 10 && sig < 15) || sig == 17) && comm != bash" > filter
+
+If there is an error in the expression, you'll get an 'Invalid
+argument' error when setting it, and the erroneous string along with
+an error message can be seen by looking at the filter e.g.:
+
+# cd /sys/kernel/debug/tracing/events/signal/signal_generate
+# echo "((sig >= 10 && sig < 15) || dsig == 17) && comm != bash" > filter
+-bash: echo: write error: Invalid argument
+# cat filter
+((sig >= 10 && sig < 15) || dsig == 17) && comm != bash
+^
+parse_error: Field not found
+
+Currently the caret ('^') for an error always appears at the beginning of
+the filter string; the error message should still be useful though
+even without more accurate position info.
+
+5.3 Clearing filters
+--------------------
+
+To clear the filter for an event, write a '0' to the event's filter
+file.
+
+To clear the filters for all events in a subsystem, write a '0' to the
+subsystem's filter file.
+
+5.3 Subsystem filters
+---------------------
+
+For convenience, filters for every event in a subsystem can be set or
+cleared as a group by writing a filter expression into the filter file
+at the root of the subsystem. Note however, that if a filter for any
+event within the subsystem lacks a field specified in the subsystem
+filter, or if the filter can't be applied for any other reason, the
+filter for that event will retain its previous setting. This can
+result in an unintended mixture of filters which could lead to
+confusing (to the user who might think different filters are in
+effect) trace output. Only filters that reference just the common
+fields can be guaranteed to propagate successfully to all events.
+
+Here are a few subsystem filter examples that also illustrate the
+above points:
+
+Clear the filters on all events in the sched subsystem:
+
+# cd /sys/kernel/debug/tracing/events/sched
+# echo 0 > filter
+# cat sched_switch/filter
+none
+# cat sched_wakeup/filter
+none
+
+Set a filter using only common fields for all events in the sched
+subsystem (all events end up with the same filter):
+
+# cd /sys/kernel/debug/tracing/events/sched
+# echo common_pid == 0 > filter
+# cat sched_switch/filter
+common_pid == 0
+# cat sched_wakeup/filter
+common_pid == 0
+
+Attempt to set a filter using a non-common field for all events in the
+sched subsystem (all events but those that have a prev_pid field retain
+their old filters):
+
+# cd /sys/kernel/debug/tracing/events/sched
+# echo prev_pid == 0 > filter
+# cat sched_switch/filter
+prev_pid == 0
+# cat sched_wakeup/filter
+common_pid == 0
+
+6. Event triggers
+=================
+
+Trace events can be made to conditionally invoke trigger 'commands'
+which can take various forms and are described in detail below;
+examples would be enabling or disabling other trace events or invoking
+a stack trace whenever the trace event is hit. Whenever a trace event
+with attached triggers is invoked, the set of trigger commands
+associated with that event is invoked. Any given trigger can
+additionally have an event filter of the same form as described in
+section 5 (Event filtering) associated with it - the command will only
+be invoked if the event being invoked passes the associated filter.
+If no filter is associated with the trigger, it always passes.
+
+Triggers are added to and removed from a particular event by writing
+trigger expressions to the 'trigger' file for the given event.
+
+A given event can have any number of triggers associated with it,
+subject to any restrictions that individual commands may have in that
+regard.
+
+Event triggers are implemented on top of "soft" mode, which means that
+whenever a trace event has one or more triggers associated with it,
+the event is activated even if it isn't actually enabled, but is
+disabled in a "soft" mode. That is, the tracepoint will be called,
+but just will not be traced, unless of course it's actually enabled.
+This scheme allows triggers to be invoked even for events that aren't
+enabled, and also allows the current event filter implementation to be
+used for conditionally invoking triggers.
+
+The syntax for event triggers is roughly based on the syntax for
+set_ftrace_filter 'ftrace filter commands' (see the 'Filter commands'
+section of Documentation/trace/ftrace.txt), but there are major
+differences and the implementation isn't currently tied to it in any
+way, so beware about making generalizations between the two.
+
+6.1 Expression syntax
+---------------------
+
+Triggers are added by echoing the command to the 'trigger' file:
+
+ # echo 'command[:count] [if filter]' > trigger
+
+Triggers are removed by echoing the same command but starting with '!'
+to the 'trigger' file:
+
+ # echo '!command[:count] [if filter]' > trigger
+
+The [if filter] part isn't used in matching commands when removing, so
+leaving that off in a '!' command will accomplish the same thing as
+having it in.
+
+The filter syntax is the same as that described in the 'Event
+filtering' section above.
+
+For ease of use, writing to the trigger file using '>' currently just
+adds or removes a single trigger and there's no explicit '>>' support
+('>' actually behaves like '>>') or truncation support to remove all
+triggers (you have to use '!' for each one added.)
+
+6.2 Supported trigger commands
+------------------------------
+
+The following commands are supported:
+
+- enable_event/disable_event
+
+ These commands can enable or disable another trace event whenever
+ the triggering event is hit. When these commands are registered,
+ the other trace event is activated, but disabled in a "soft" mode.
+ That is, the tracepoint will be called, but just will not be traced.
+ The event tracepoint stays in this mode as long as there's a trigger
+ in effect that can trigger it.
+
+ For example, the following trigger causes kmalloc events to be
+ traced when a read system call is entered, and the :1 at the end
+ specifies that this enablement happens only once:
+
+ # echo 'enable_event:kmem:kmalloc:1' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+ The following trigger causes kmalloc events to stop being traced
+ when a read system call exits. This disablement happens on every
+ read system call exit:
+
+ # echo 'disable_event:kmem:kmalloc' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger
+
+ The format is:
+
+ enable_event:<system>:<event>[:count]
+ disable_event:<system>:<event>[:count]
+
+ To remove the above commands:
+
+ # echo '!enable_event:kmem:kmalloc:1' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_enter_read/trigger
+
+ # echo '!disable_event:kmem:kmalloc' > \
+ /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/trigger
+
+ Note that there can be any number of enable/disable_event triggers
+ per triggering event, but there can only be one trigger per
+ triggered event. e.g. sys_enter_read can have triggers enabling both
+ kmem:kmalloc and sched:sched_switch, but can't have two kmem:kmalloc
+ versions such as kmem:kmalloc and kmem:kmalloc:1 or 'kmem:kmalloc if
+ bytes_req == 256' and 'kmem:kmalloc if bytes_alloc == 256' (they
+ could be combined into a single filter on kmem:kmalloc though).
+
+- stacktrace
+
+ This command dumps a stacktrace in the trace buffer whenever the
+ triggering event occurs.
+
+ For example, the following trigger dumps a stacktrace every time the
+ kmalloc tracepoint is hit:
+
+ # echo 'stacktrace' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The following trigger dumps a stacktrace the first 5 times a kmalloc
+ request happens with a size >= 64K
+
+ # echo 'stacktrace:5 if bytes_req >= 65536' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The format is:
+
+ stacktrace[:count]
+
+ To remove the above commands:
+
+ # echo '!stacktrace' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ # echo '!stacktrace:5 if bytes_req >= 65536' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ The latter can also be removed more simply by the following (without
+ the filter):
+
+ # echo '!stacktrace:5' > \
+ /sys/kernel/debug/tracing/events/kmem/kmalloc/trigger
+
+ Note that there can be only one stacktrace trigger per triggering
+ event.
+
+- snapshot
+
+ This command causes a snapshot to be triggered whenever the
+ triggering event occurs.
+
+ The following command creates a snapshot every time a block request
+ queue is unplugged with a depth > 1. If you were tracing a set of
+ events or functions at the time, the snapshot trace buffer would
+ capture those events when the trigger event occurred:
+
+ # echo 'snapshot if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To only snapshot once:
+
+ # echo 'snapshot:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To remove the above commands:
+
+ # echo '!snapshot if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ # echo '!snapshot:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ Note that there can be only one snapshot trigger per triggering
+ event.
+
+- traceon/traceoff
+
+ These commands turn tracing on and off when the specified events are
+ hit. The parameter determines how many times the tracing system is
+ turned on and off. If unspecified, there is no limit.
+
+ The following command turns tracing off the first time a block
+ request queue is unplugged with a depth > 1. If you were tracing a
+ set of events or functions at the time, you could then examine the
+ trace buffer to see the sequence of events that led up to the
+ trigger event:
+
+ # echo 'traceoff:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To always disable tracing when nr_rq > 1 :
+
+ # echo 'traceoff if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ To remove the above commands:
+
+ # echo '!traceoff:1 if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ # echo '!traceoff if nr_rq > 1' > \
+ /sys/kernel/debug/tracing/events/block/block_unplug/trigger
+
+ Note that there can be only one traceon or traceoff trigger per
+ triggering event.
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
new file mode 100644
index 000000000..dd5f916b3
--- /dev/null
+++ b/Documentation/trace/ftrace-design.txt
@@ -0,0 +1,382 @@
+ function tracer guts
+ ====================
+ By Mike Frysinger
+
+Introduction
+------------
+
+Here we will cover the architecture pieces that the common function tracing
+code relies on for proper functioning. Things are broken down into increasing
+complexity so that you can start simple and at least get basic functionality.
+
+Note that this focuses on architecture implementation details only. If you
+want more explanation of a feature in terms of common code, review the common
+ftrace.txt file.
+
+Ideally, everyone who wishes to retain performance while supporting tracing in
+their kernel should make it all the way to dynamic ftrace support.
+
+
+Prerequisites
+-------------
+
+Ftrace relies on these features being implemented:
+ STACKTRACE_SUPPORT - implement save_stack_trace()
+ TRACE_IRQFLAGS_SUPPORT - implement include/asm/irqflags.h
+
+
+HAVE_FUNCTION_TRACER
+--------------------
+
+You will need to implement the mcount and the ftrace_stub functions.
+
+The exact mcount symbol name will depend on your toolchain. Some call it
+"mcount", "_mcount", or even "__mcount". You can probably figure it out by
+running something like:
+ $ echo 'main(){}' | gcc -x c -S -o - - -pg | grep mcount
+ call mcount
+We'll make the assumption below that the symbol is "mcount" just to keep things
+nice and simple in the examples.
+
+Keep in mind that the ABI that is in effect inside of the mcount function is
+*highly* architecture/toolchain specific. We cannot help you in this regard,
+sorry. Dig up some old documentation and/or find someone more familiar than
+you to bang ideas off of. Typically, register usage (argument/scratch/etc...)
+is a major issue at this point, especially in relation to the location of the
+mcount call (before/after function prologue). You might also want to look at
+how glibc has implemented the mcount function for your architecture. It might
+be (semi-)relevant.
+
+The mcount function should check the function pointer ftrace_trace_function
+to see if it is set to ftrace_stub. If it is, there is nothing for you to do,
+so return immediately. If it isn't, then call that function in the same way
+the mcount function normally calls __mcount_internal -- the first argument is
+the "frompc" while the second argument is the "selfpc" (adjusted to remove the
+size of the mcount call that is embedded in the function).
+
+For example, if the function foo() calls bar(), when the bar() function calls
+mcount(), the arguments mcount() will pass to the tracer are:
+ "frompc" - the address bar() will use to return to foo()
+ "selfpc" - the address bar() (with mcount() size adjustment)
+
+Also keep in mind that this mcount function will be called *a lot*, so
+optimizing for the default case of no tracer will help the smooth running of
+your system when tracing is disabled. So the start of the mcount function is
+typically the bare minimum with checking things before returning. That also
+means the code flow should usually be kept linear (i.e. no branching in the nop
+case). This is of course an optimization and not a hard requirement.
+
+Here is some pseudo code that should help (these functions should actually be
+implemented in assembly):
+
+void ftrace_stub(void)
+{
+ return;
+}
+
+void mcount(void)
+{
+ /* save any bare state needed in order to do initial checking */
+
+ extern void (*ftrace_trace_function)(unsigned long, unsigned long);
+ if (ftrace_trace_function != ftrace_stub)
+ goto do_trace;
+
+ /* restore any bare state */
+
+ return;
+
+do_trace:
+
+ /* save all state needed by the ABI (see paragraph above) */
+
+ unsigned long frompc = ...;
+ unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+ ftrace_trace_function(frompc, selfpc);
+
+ /* restore all state needed by the ABI */
+}
+
+Don't forget to export mcount for modules !
+extern void mcount(void);
+EXPORT_SYMBOL(mcount);
+
+
+HAVE_FUNCTION_GRAPH_TRACER
+--------------------------
+
+Deep breath ... time to do some real work. Here you will need to update the
+mcount function to check ftrace graph function pointers, as well as implement
+some functions to save (hijack) and restore the return address.
+
+The mcount function should check the function pointers ftrace_graph_return
+(compare to ftrace_stub) and ftrace_graph_entry (compare to
+ftrace_graph_entry_stub). If either of those is not set to the relevant stub
+function, call the arch-specific function ftrace_graph_caller which in turn
+calls the arch-specific function prepare_ftrace_return. Neither of these
+function names is strictly required, but you should use them anyway to stay
+consistent across the architecture ports -- easier to compare & contrast
+things.
+
+The arguments to prepare_ftrace_return are slightly different than what are
+passed to ftrace_trace_function. The second argument "selfpc" is the same,
+but the first argument should be a pointer to the "frompc". Typically this is
+located on the stack. This allows the function to hijack the return address
+temporarily to have it point to the arch-specific function return_to_handler.
+That function will simply call the common ftrace_return_to_handler function and
+that will return the original return address with which you can return to the
+original call site.
+
+Here is the updated mcount pseudo code:
+void mcount(void)
+{
+...
+ if (ftrace_trace_function != ftrace_stub)
+ goto do_trace;
+
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++ extern void (*ftrace_graph_return)(...);
++ extern void (*ftrace_graph_entry)(...);
++ if (ftrace_graph_return != ftrace_stub ||
++ ftrace_graph_entry != ftrace_graph_entry_stub)
++ ftrace_graph_caller();
++#endif
+
+ /* restore any bare state */
+...
+
+Here is the pseudo code for the new ftrace_graph_caller assembly function:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void ftrace_graph_caller(void)
+{
+ /* save all state needed by the ABI */
+
+ unsigned long *frompc = &...;
+ unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+ /* passing frame pointer up is optional -- see below */
+ prepare_ftrace_return(frompc, selfpc, frame_pointer);
+
+ /* restore all state needed by the ABI */
+}
+#endif
+
+For information on how to implement prepare_ftrace_return(), simply look at the
+x86 version (the frame pointer passing is optional; see the next section for
+more information). The only architecture-specific piece in it is the setup of
+the fault recovery table (the asm(...) code). The rest should be the same
+across architectures.
+
+Here is the pseudo code for the new return_to_handler assembly function. Note
+that the ABI that applies here is different from what applies to the mcount
+code. Since you are returning from a function (after the epilogue), you might
+be able to skimp on things saved/restored (usually just registers used to pass
+return values).
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void return_to_handler(void)
+{
+ /* save all state needed by the ABI (see paragraph above) */
+
+ void (*original_return_point)(void) = ftrace_return_to_handler();
+
+ /* restore all state needed by the ABI */
+
+ /* this is usually either a return or a jump */
+ original_return_point();
+}
+#endif
+
+
+HAVE_FUNCTION_GRAPH_FP_TEST
+---------------------------
+
+An arch may pass in a unique value (frame pointer) to both the entering and
+exiting of a function. On exit, the value is compared and if it does not
+match, then it will panic the kernel. This is largely a sanity check for bad
+code generation with gcc. If gcc for your port sanely updates the frame
+pointer under different optimization levels, then ignore this option.
+
+However, adding support for it isn't terribly difficult. In your assembly code
+that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
+Then in the C version of that function, do what the x86 port does and pass it
+along to ftrace_push_return_trace() instead of a stub value of 0.
+
+Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
+
+
+HAVE_FTRACE_NMI_ENTER
+---------------------
+
+If you can't trace NMI functions, then skip this option.
+
+<details to be filled>
+
+
+HAVE_SYSCALL_TRACEPOINTS
+------------------------
+
+You need very few things to get the syscalls tracing in an arch.
+
+- Support HAVE_ARCH_TRACEHOOK (see arch/Kconfig).
+- Have a NR_syscalls variable in <asm/unistd.h> that provides the number
+ of syscalls supported by the arch.
+- Support the TIF_SYSCALL_TRACEPOINT thread flags.
+- Put the trace_sys_enter() and trace_sys_exit() tracepoints calls from ptrace
+ in the ptrace syscalls tracing path.
+- If the system call table on this arch is more complicated than a simple array
+ of addresses of the system calls, implement an arch_syscall_addr to return
+ the address of a given system call.
+- If the symbol names of the system calls do not match the function names on
+ this arch, define ARCH_HAS_SYSCALL_MATCH_SYM_NAME in asm/ftrace.h and
+ implement arch_syscall_match_sym_name with the appropriate logic to return
+ true if the function name corresponds with the symbol name.
+- Tag this arch as HAVE_SYSCALL_TRACEPOINTS.
+
+
+HAVE_FTRACE_MCOUNT_RECORD
+-------------------------
+
+See scripts/recordmcount.pl for more info. Just fill in the arch-specific
+details for how to locate the addresses of mcount call sites via objdump.
+This option doesn't make much sense without also implementing dynamic ftrace.
+
+
+HAVE_DYNAMIC_FTRACE
+-------------------
+
+You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
+scroll your reader back up if you got over eager.
+
+Once those are out of the way, you will need to implement:
+ - asm/ftrace.h:
+ - MCOUNT_ADDR
+ - ftrace_call_adjust()
+ - struct dyn_arch_ftrace{}
+ - asm code:
+ - mcount() (new stub)
+ - ftrace_caller()
+ - ftrace_call()
+ - ftrace_stub()
+ - C code:
+ - ftrace_dyn_arch_init()
+ - ftrace_make_nop()
+ - ftrace_make_call()
+ - ftrace_update_ftrace_func()
+
+First you will need to fill out some arch details in your asm/ftrace.h.
+
+Define MCOUNT_ADDR as the address of your mcount symbol similar to:
+ #define MCOUNT_ADDR ((unsigned long)mcount)
+Since no one else will have a decl for that function, you will need to:
+ extern void mcount(void);
+
+You will also need the helper function ftrace_call_adjust(). Most people
+will be able to stub it out like so:
+ static inline unsigned long ftrace_call_adjust(unsigned long addr)
+ {
+ return addr;
+ }
+<details to be filled>
+
+Lastly you will need the custom dyn_arch_ftrace structure. If you need
+some extra state when runtime patching arbitrary call sites, this is the
+place. For now though, create an empty struct:
+ struct dyn_arch_ftrace {
+ /* No extra data needed */
+ };
+
+With the header out of the way, we can fill out the assembly code. While we
+did already create a mcount() function earlier, dynamic ftrace only wants a
+stub function. This is because the mcount() will only be used during boot
+and then all references to it will be patched out never to return. Instead,
+the guts of the old mcount() will be used to create a new ftrace_caller()
+function. Because the two are hard to merge, it will most likely be a lot
+easier to have two separate definitions split up by #ifdefs. Same goes for
+the ftrace_stub() as that will now be inlined in ftrace_caller().
+
+Before we get confused anymore, let's check out some pseudo code so you can
+implement your own stuff in assembly:
+
+void mcount(void)
+{
+ return;
+}
+
+void ftrace_caller(void)
+{
+ /* save all state needed by the ABI (see paragraph above) */
+
+ unsigned long frompc = ...;
+ unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+
+ftrace_call:
+ ftrace_stub(frompc, selfpc);
+
+ /* restore all state needed by the ABI */
+
+ftrace_stub:
+ return;
+}
+
+This might look a little odd at first, but keep in mind that we will be runtime
+patching multiple things. First, only functions that we actually want to trace
+will be patched to call ftrace_caller(). Second, since we only have one tracer
+active at a time, we will patch the ftrace_caller() function itself to call the
+specific tracer in question. That is the point of the ftrace_call label.
+
+With that in mind, let's move on to the C code that will actually be doing the
+runtime patching. You'll need a little knowledge of your arch's opcodes in
+order to make it through the next section.
+
+Every arch has an init callback function. If you need to do something early on
+to initialize some state, this is the time to do that. Otherwise, this simple
+function below should be sufficient for most people:
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+
+There are two functions that are used to do runtime patching of arbitrary
+functions. The first is used to turn the mcount call site into a nop (which
+is what helps us retain runtime performance when not tracing). The second is
+used to turn the mcount call site into a call to an arbitrary location (but
+typically that is ftracer_caller()). See the general function definition in
+linux/ftrace.h for the functions:
+ ftrace_make_nop()
+ ftrace_make_call()
+The rec->ip value is the address of the mcount call site that was collected
+by the scripts/recordmcount.pl during build time.
+
+The last function is used to do runtime patching of the active tracer. This
+will be modifying the assembly code at the location of the ftrace_call symbol
+inside of the ftrace_caller() function. So you should have sufficient padding
+at that location to support the new function calls you'll be inserting. Some
+people will be using a "call" type instruction while others will be using a
+"branch" type instruction. Specifically, the function is:
+ ftrace_update_ftrace_func()
+
+
+HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
+------------------------------------------------
+
+The function grapher needs a few tweaks in order to work with dynamic ftrace.
+Basically, you will need to:
+ - update:
+ - ftrace_caller()
+ - ftrace_graph_call()
+ - ftrace_graph_caller()
+ - implement:
+ - ftrace_enable_ftrace_graph_caller()
+ - ftrace_disable_ftrace_graph_caller()
+
+<details to be filled>
+Quick notes:
+ - add a nop stub after the ftrace_call location named ftrace_graph_call;
+ stub needs to be large enough to support a call to ftrace_graph_caller()
+ - update ftrace_graph_caller() to work with being called by the new
+ ftrace_caller() since some semantics may have changed
+ - ftrace_enable_ftrace_graph_caller() will runtime patch the
+ ftrace_graph_call location with a call to ftrace_graph_caller()
+ - ftrace_disable_ftrace_graph_caller() will runtime patch the
+ ftrace_graph_call location with nops
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
new file mode 100644
index 000000000..572ca9236
--- /dev/null
+++ b/Documentation/trace/ftrace.txt
@@ -0,0 +1,2846 @@
+ ftrace - Function Tracer
+ ========================
+
+Copyright 2008 Red Hat Inc.
+ Author: Steven Rostedt <srostedt@redhat.com>
+ License: The GNU Free Documentation License, Version 1.2
+ (dual licensed under the GPL v2)
+Reviewers: Elias Oltmanns, Randy Dunlap, Andrew Morton,
+ John Kacur, and David Teigland.
+Written for: 2.6.28-rc2
+Updated for: 3.10
+
+Introduction
+------------
+
+Ftrace is an internal tracer designed to help out developers and
+designers of systems to find what is going on inside the kernel.
+It can be used for debugging or analyzing latencies and
+performance issues that take place outside of user-space.
+
+Although ftrace is typically considered the function tracer, it
+is really a frame work of several assorted tracing utilities.
+There's latency tracing to examine what occurs between interrupts
+disabled and enabled, as well as for preemption and from a time
+a task is woken to the task is actually scheduled in.
+
+One of the most common uses of ftrace is the event tracing.
+Through out the kernel is hundreds of static event points that
+can be enabled via the debugfs file system to see what is
+going on in certain parts of the kernel.
+
+
+Implementation Details
+----------------------
+
+See ftrace-design.txt for details for arch porters and such.
+
+
+The File System
+---------------
+
+Ftrace uses the debugfs file system to hold the control files as
+well as the files to display output.
+
+When debugfs is configured into the kernel (which selecting any ftrace
+option will do) the directory /sys/kernel/debug will be created. To mount
+this directory, you can add to your /etc/fstab file:
+
+ debugfs /sys/kernel/debug debugfs defaults 0 0
+
+Or you can mount it at run time with:
+
+ mount -t debugfs nodev /sys/kernel/debug
+
+For quicker access to that directory you may want to make a soft link to
+it:
+
+ ln -s /sys/kernel/debug /debug
+
+Any selected ftrace option will also create a directory called tracing
+within the debugfs. The rest of the document will assume that you are in
+the ftrace directory (cd /sys/kernel/debug/tracing) and will only concentrate
+on the files within that directory and not distract from the content with
+the extended "/sys/kernel/debug/tracing" path name.
+
+That's it! (assuming that you have ftrace configured into your kernel)
+
+After mounting debugfs, you can see a directory called
+"tracing". This directory contains the control and output files
+of ftrace. Here is a list of some of the key files:
+
+
+ Note: all time values are in microseconds.
+
+ current_tracer:
+
+ This is used to set or display the current tracer
+ that is configured.
+
+ available_tracers:
+
+ This holds the different types of tracers that
+ have been compiled into the kernel. The
+ tracers listed here can be configured by
+ echoing their name into current_tracer.
+
+ tracing_on:
+
+ This sets or displays whether writing to the trace
+ ring buffer is enabled. Echo 0 into this file to disable
+ the tracer or 1 to enable it. Note, this only disables
+ writing to the ring buffer, the tracing overhead may
+ still be occurring.
+
+ trace:
+
+ This file holds the output of the trace in a human
+ readable format (described below).
+
+ trace_pipe:
+
+ The output is the same as the "trace" file but this
+ file is meant to be streamed with live tracing.
+ Reads from this file will block until new data is
+ retrieved. Unlike the "trace" file, this file is a
+ consumer. This means reading from this file causes
+ sequential reads to display more current data. Once
+ data is read from this file, it is consumed, and
+ will not be read again with a sequential read. The
+ "trace" file is static, and if the tracer is not
+ adding more data,they will display the same
+ information every time they are read.
+
+ trace_options:
+
+ This file lets the user control the amount of data
+ that is displayed in one of the above output
+ files. Options also exist to modify how a tracer
+ or events work (stack traces, timestamps, etc).
+
+ options:
+
+ This is a directory that has a file for every available
+ trace option (also in trace_options). Options may also be set
+ or cleared by writing a "1" or "0" respectively into the
+ corresponding file with the option name.
+
+ tracing_max_latency:
+
+ Some of the tracers record the max latency.
+ For example, the time interrupts are disabled.
+ This time is saved in this file. The max trace
+ will also be stored, and displayed by "trace".
+ A new max trace will only be recorded if the
+ latency is greater than the value in this
+ file. (in microseconds)
+
+ tracing_thresh:
+
+ Some latency tracers will record a trace whenever the
+ latency is greater than the number in this file.
+ Only active when the file contains a number greater than 0.
+ (in microseconds)
+
+ buffer_size_kb:
+
+ This sets or displays the number of kilobytes each CPU
+ buffer holds. By default, the trace buffers are the same size
+ for each CPU. The displayed number is the size of the
+ CPU buffer and not total size of all buffers. The
+ trace buffers are allocated in pages (blocks of memory
+ that the kernel uses for allocation, usually 4 KB in size).
+ If the last page allocated has room for more bytes
+ than requested, the rest of the page will be used,
+ making the actual allocation bigger than requested.
+ ( Note, the size may not be a multiple of the page size
+ due to buffer management meta-data. )
+
+ buffer_total_size_kb:
+
+ This displays the total combined size of all the trace buffers.
+
+ free_buffer:
+
+ If a process is performing the tracing, and the ring buffer
+ should be shrunk "freed" when the process is finished, even
+ if it were to be killed by a signal, this file can be used
+ for that purpose. On close of this file, the ring buffer will
+ be resized to its minimum size. Having a process that is tracing
+ also open this file, when the process exits its file descriptor
+ for this file will be closed, and in doing so, the ring buffer
+ will be "freed".
+
+ It may also stop tracing if disable_on_free option is set.
+
+ tracing_cpumask:
+
+ This is a mask that lets the user only trace
+ on specified CPUs. The format is a hex string
+ representing the CPUs.
+
+ set_ftrace_filter:
+
+ When dynamic ftrace is configured in (see the
+ section below "dynamic ftrace"), the code is dynamically
+ modified (code text rewrite) to disable calling of the
+ function profiler (mcount). This lets tracing be configured
+ in with practically no overhead in performance. This also
+ has a side effect of enabling or disabling specific functions
+ to be traced. Echoing names of functions into this file
+ will limit the trace to only those functions.
+
+ This interface also allows for commands to be used. See the
+ "Filter commands" section for more details.
+
+ set_ftrace_notrace:
+
+ This has an effect opposite to that of
+ set_ftrace_filter. Any function that is added here will not
+ be traced. If a function exists in both set_ftrace_filter
+ and set_ftrace_notrace, the function will _not_ be traced.
+
+ set_ftrace_pid:
+
+ Have the function tracer only trace a single thread.
+
+ set_graph_function:
+
+ Set a "trigger" function where tracing should start
+ with the function graph tracer (See the section
+ "dynamic ftrace" for more details).
+
+ available_filter_functions:
+
+ This lists the functions that ftrace
+ has processed and can trace. These are the function
+ names that you can pass to "set_ftrace_filter" or
+ "set_ftrace_notrace". (See the section "dynamic ftrace"
+ below for more details.)
+
+ enabled_functions:
+
+ This file is more for debugging ftrace, but can also be useful
+ in seeing if any function has a callback attached to it.
+ Not only does the trace infrastructure use ftrace function
+ trace utility, but other subsystems might too. This file
+ displays all functions that have a callback attached to them
+ as well as the number of callbacks that have been attached.
+ Note, a callback may also call multiple functions which will
+ not be listed in this count.
+
+ If the callback registered to be traced by a function with
+ the "save regs" attribute (thus even more overhead), a 'R'
+ will be displayed on the same line as the function that
+ is returning registers.
+
+ If the callback registered to be traced by a function with
+ the "ip modify" attribute (thus the regs->ip can be changed),
+ an 'I' will be displayed on the same line as the function that
+ can be overridden.
+
+ function_profile_enabled:
+
+ When set it will enable all functions with either the function
+ tracer, or if enabled, the function graph tracer. It will
+ keep a histogram of the number of functions that were called
+ and if run with the function graph tracer, it will also keep
+ track of the time spent in those functions. The histogram
+ content can be displayed in the files:
+
+ trace_stats/function<cpu> ( function0, function1, etc).
+
+ trace_stats:
+
+ A directory that holds different tracing stats.
+
+ kprobe_events:
+
+ Enable dynamic trace points. See kprobetrace.txt.
+
+ kprobe_profile:
+
+ Dynamic trace points stats. See kprobetrace.txt.
+
+ max_graph_depth:
+
+ Used with the function graph tracer. This is the max depth
+ it will trace into a function. Setting this to a value of
+ one will show only the first kernel function that is called
+ from user space.
+
+ printk_formats:
+
+ This is for tools that read the raw format files. If an event in
+ the ring buffer references a string (currently only trace_printk()
+ does this), only a pointer to the string is recorded into the buffer
+ and not the string itself. This prevents tools from knowing what
+ that string was. This file displays the string and address for
+ the string allowing tools to map the pointers to what the
+ strings were.
+
+ saved_cmdlines:
+
+ Only the pid of the task is recorded in a trace event unless
+ the event specifically saves the task comm as well. Ftrace
+ makes a cache of pid mappings to comms to try to display
+ comms for events. If a pid for a comm is not listed, then
+ "<...>" is displayed in the output.
+
+ snapshot:
+
+ This displays the "snapshot" buffer and also lets the user
+ take a snapshot of the current running trace.
+ See the "Snapshot" section below for more details.
+
+ stack_max_size:
+
+ When the stack tracer is activated, this will display the
+ maximum stack size it has encountered.
+ See the "Stack Trace" section below.
+
+ stack_trace:
+
+ This displays the stack back trace of the largest stack
+ that was encountered when the stack tracer is activated.
+ See the "Stack Trace" section below.
+
+ stack_trace_filter:
+
+ This is similar to "set_ftrace_filter" but it limits what
+ functions the stack tracer will check.
+
+ trace_clock:
+
+ Whenever an event is recorded into the ring buffer, a
+ "timestamp" is added. This stamp comes from a specified
+ clock. By default, ftrace uses the "local" clock. This
+ clock is very fast and strictly per cpu, but on some
+ systems it may not be monotonic with respect to other
+ CPUs. In other words, the local clocks may not be in sync
+ with local clocks on other CPUs.
+
+ Usual clocks for tracing:
+
+ # cat trace_clock
+ [local] global counter x86-tsc
+
+ local: Default clock, but may not be in sync across CPUs
+
+ global: This clock is in sync with all CPUs but may
+ be a bit slower than the local clock.
+
+ counter: This is not a clock at all, but literally an atomic
+ counter. It counts up one by one, but is in sync
+ with all CPUs. This is useful when you need to
+ know exactly the order events occurred with respect to
+ each other on different CPUs.
+
+ uptime: This uses the jiffies counter and the time stamp
+ is relative to the time since boot up.
+
+ perf: This makes ftrace use the same clock that perf uses.
+ Eventually perf will be able to read ftrace buffers
+ and this will help out in interleaving the data.
+
+ x86-tsc: Architectures may define their own clocks. For
+ example, x86 uses its own TSC cycle clock here.
+
+ To set a clock, simply echo the clock name into this file.
+
+ echo global > trace_clock
+
+ trace_marker:
+
+ This is a very useful file for synchronizing user space
+ with events happening in the kernel. Writing strings into
+ this file will be written into the ftrace buffer.
+
+ It is useful in applications to open this file at the start
+ of the application and just reference the file descriptor
+ for the file.
+
+ void trace_write(const char *fmt, ...)
+ {
+ va_list ap;
+ char buf[256];
+ int n;
+
+ if (trace_fd < 0)
+ return;
+
+ va_start(ap, fmt);
+ n = vsnprintf(buf, 256, fmt, ap);
+ va_end(ap);
+
+ write(trace_fd, buf, n);
+ }
+
+ start:
+
+ trace_fd = open("trace_marker", WR_ONLY);
+
+ uprobe_events:
+
+ Add dynamic tracepoints in programs.
+ See uprobetracer.txt
+
+ uprobe_profile:
+
+ Uprobe statistics. See uprobetrace.txt
+
+ instances:
+
+ This is a way to make multiple trace buffers where different
+ events can be recorded in different buffers.
+ See "Instances" section below.
+
+ events:
+
+ This is the trace event directory. It holds event tracepoints
+ (also known as static tracepoints) that have been compiled
+ into the kernel. It shows what event tracepoints exist
+ and how they are grouped by system. There are "enable"
+ files at various levels that can enable the tracepoints
+ when a "1" is written to them.
+
+ See events.txt for more information.
+
+ per_cpu:
+
+ This is a directory that contains the trace per_cpu information.
+
+ per_cpu/cpu0/buffer_size_kb:
+
+ The ftrace buffer is defined per_cpu. That is, there's a separate
+ buffer for each CPU to allow writes to be done atomically,
+ and free from cache bouncing. These buffers may have different
+ size buffers. This file is similar to the buffer_size_kb
+ file, but it only displays or sets the buffer size for the
+ specific CPU. (here cpu0).
+
+ per_cpu/cpu0/trace:
+
+ This is similar to the "trace" file, but it will only display
+ the data specific for the CPU. If written to, it only clears
+ the specific CPU buffer.
+
+ per_cpu/cpu0/trace_pipe
+
+ This is similar to the "trace_pipe" file, and is a consuming
+ read, but it will only display (and consume) the data specific
+ for the CPU.
+
+ per_cpu/cpu0/trace_pipe_raw
+
+ For tools that can parse the ftrace ring buffer binary format,
+ the trace_pipe_raw file can be used to extract the data
+ from the ring buffer directly. With the use of the splice()
+ system call, the buffer data can be quickly transferred to
+ a file or to the network where a server is collecting the
+ data.
+
+ Like trace_pipe, this is a consuming reader, where multiple
+ reads will always produce different data.
+
+ per_cpu/cpu0/snapshot:
+
+ This is similar to the main "snapshot" file, but will only
+ snapshot the current CPU (if supported). It only displays
+ the content of the snapshot for a given CPU, and if
+ written to, only clears this CPU buffer.
+
+ per_cpu/cpu0/snapshot_raw:
+
+ Similar to the trace_pipe_raw, but will read the binary format
+ from the snapshot buffer for the given CPU.
+
+ per_cpu/cpu0/stats:
+
+ This displays certain stats about the ring buffer:
+
+ entries: The number of events that are still in the buffer.
+
+ overrun: The number of lost events due to overwriting when
+ the buffer was full.
+
+ commit overrun: Should always be zero.
+ This gets set if so many events happened within a nested
+ event (ring buffer is re-entrant), that it fills the
+ buffer and starts dropping events.
+
+ bytes: Bytes actually read (not overwritten).
+
+ oldest event ts: The oldest timestamp in the buffer
+
+ now ts: The current timestamp
+
+ dropped events: Events lost due to overwrite option being off.
+
+ read events: The number of events read.
+
+The Tracers
+-----------
+
+Here is the list of current tracers that may be configured.
+
+ "function"
+
+ Function call tracer to trace all kernel functions.
+
+ "function_graph"
+
+ Similar to the function tracer except that the
+ function tracer probes the functions on their entry
+ whereas the function graph tracer traces on both entry
+ and exit of the functions. It then provides the ability
+ to draw a graph of function calls similar to C code
+ source.
+
+ "irqsoff"
+
+ Traces the areas that disable interrupts and saves
+ the trace with the longest max latency.
+ See tracing_max_latency. When a new max is recorded,
+ it replaces the old trace. It is best to view this
+ trace with the latency-format option enabled.
+
+ "preemptoff"
+
+ Similar to irqsoff but traces and records the amount of
+ time for which preemption is disabled.
+
+ "preemptirqsoff"
+
+ Similar to irqsoff and preemptoff, but traces and
+ records the largest time for which irqs and/or preemption
+ is disabled.
+
+ "wakeup"
+
+ Traces and records the max latency that it takes for
+ the highest priority task to get scheduled after
+ it has been woken up.
+ Traces all tasks as an average developer would expect.
+
+ "wakeup_rt"
+
+ Traces and records the max latency that it takes for just
+ RT tasks (as the current "wakeup" does). This is useful
+ for those interested in wake up timings of RT tasks.
+
+ "nop"
+
+ This is the "trace nothing" tracer. To remove all
+ tracers from tracing simply echo "nop" into
+ current_tracer.
+
+
+Examples of using the tracer
+----------------------------
+
+Here are typical examples of using the tracers when controlling
+them only with the debugfs interface (without using any
+user-land utilities).
+
+Output format:
+--------------
+
+Here is an example of the output format of the file "trace"
+
+ --------
+# tracer: function
+#
+# entries-in-buffer/entries-written: 140080/250280 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ bash-1977 [000] .... 17284.993652: sys_close <-system_call_fastpath
+ bash-1977 [000] .... 17284.993653: __close_fd <-sys_close
+ bash-1977 [000] .... 17284.993653: _raw_spin_lock <-__close_fd
+ sshd-1974 [003] .... 17284.993653: __srcu_read_unlock <-fsnotify
+ bash-1977 [000] .... 17284.993654: add_preempt_count <-_raw_spin_lock
+ bash-1977 [000] ...1 17284.993655: _raw_spin_unlock <-__close_fd
+ bash-1977 [000] ...1 17284.993656: sub_preempt_count <-_raw_spin_unlock
+ bash-1977 [000] .... 17284.993657: filp_close <-__close_fd
+ bash-1977 [000] .... 17284.993657: dnotify_flush <-filp_close
+ sshd-1974 [003] .... 17284.993658: sys_select <-system_call_fastpath
+ --------
+
+A header is printed with the tracer name that is represented by
+the trace. In this case the tracer is "function". Then it shows the
+number of events in the buffer as well as the total number of entries
+that were written. The difference is the number of entries that were
+lost due to the buffer filling up (250280 - 140080 = 110200 events
+lost).
+
+The header explains the content of the events. Task name "bash", the task
+PID "1977", the CPU that it was running on "000", the latency format
+(explained below), the timestamp in <secs>.<usecs> format, the
+function name that was traced "sys_close" and the parent function that
+called this function "system_call_fastpath". The timestamp is the time
+at which the function was entered.
+
+Latency trace format
+--------------------
+
+When the latency-format option is enabled or when one of the latency
+tracers is set, the trace file gives somewhat more information to see
+why a latency happened. Here is a typical trace.
+
+# tracer: irqsoff
+#
+# irqsoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 259 us, #4/4, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: ps-6143 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: __lock_task_sighand
+# => ended at: _raw_spin_unlock_irqrestore
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ ps-6143 2d... 0us!: trace_hardirqs_off <-__lock_task_sighand
+ ps-6143 2d..1 259us+: trace_hardirqs_on <-_raw_spin_unlock_irqrestore
+ ps-6143 2d..1 263us+: time_hardirqs_on <-_raw_spin_unlock_irqrestore
+ ps-6143 2d..1 306us : <stack trace>
+ => trace_hardirqs_on_caller
+ => trace_hardirqs_on
+ => _raw_spin_unlock_irqrestore
+ => do_task_stat
+ => proc_tgid_stat
+ => proc_single_show
+ => seq_read
+ => vfs_read
+ => sys_read
+ => system_call_fastpath
+
+
+This shows that the current tracer is "irqsoff" tracing the time
+for which interrupts were disabled. It gives the trace version (which
+never changes) and the version of the kernel upon which this was executed on
+(3.10). Then it displays the max latency in microseconds (259 us). The number
+of trace entries displayed and the total number (both are four: #4/4).
+VP, KP, SP, and HP are always zero and are reserved for later use.
+#P is the number of online CPUs (#P:4).
+
+The task is the process that was running when the latency
+occurred. (ps pid: 6143).
+
+The start and stop (the functions in which the interrupts were
+disabled and enabled respectively) that caused the latencies:
+
+ __lock_task_sighand is where the interrupts were disabled.
+ _raw_spin_unlock_irqrestore is where they were enabled again.
+
+The next lines after the header are the trace itself. The header
+explains which is which.
+
+ cmd: The name of the process in the trace.
+
+ pid: The PID of that process.
+
+ CPU#: The CPU which the process was running on.
+
+ irqs-off: 'd' interrupts are disabled. '.' otherwise.
+ Note: If the architecture does not support a way to
+ read the irq flags variable, an 'X' will always
+ be printed here.
+
+ need-resched:
+ 'N' both TIF_NEED_RESCHED and PREEMPT_NEED_RESCHED is set,
+ 'n' only TIF_NEED_RESCHED is set,
+ 'p' only PREEMPT_NEED_RESCHED is set,
+ '.' otherwise.
+
+ hardirq/softirq:
+ 'H' - hard irq occurred inside a softirq.
+ 'h' - hard irq is running
+ 's' - soft irq is running
+ '.' - normal context.
+
+ preempt-depth: The level of preempt_disabled
+
+The above is mostly meaningful for kernel developers.
+
+ time: When the latency-format option is enabled, the trace file
+ output includes a timestamp relative to the start of the
+ trace. This differs from the output when latency-format
+ is disabled, which includes an absolute timestamp.
+
+ delay: This is just to help catch your eye a bit better. And
+ needs to be fixed to be only relative to the same CPU.
+ The marks are determined by the difference between this
+ current trace and the next trace.
+ '$' - greater than 1 second
+ '#' - greater than 1000 microsecond
+ '!' - greater than 100 microsecond
+ '+' - greater than 10 microsecond
+ ' ' - less than or equal to 10 microsecond.
+
+ The rest is the same as the 'trace' file.
+
+ Note, the latency tracers will usually end with a back trace
+ to easily find where the latency occurred.
+
+trace_options
+-------------
+
+The trace_options file (or the options directory) is used to control
+what gets printed in the trace output, or manipulate the tracers.
+To see what is available, simply cat the file:
+
+ cat trace_options
+print-parent
+nosym-offset
+nosym-addr
+noverbose
+noraw
+nohex
+nobin
+noblock
+nostacktrace
+trace_printk
+noftrace_preempt
+nobranch
+annotate
+nouserstacktrace
+nosym-userobj
+noprintk-msg-only
+context-info
+latency-format
+sleep-time
+graph-time
+record-cmd
+overwrite
+nodisable_on_free
+irq-info
+markers
+function-trace
+
+To disable one of the options, echo in the option prepended with
+"no".
+
+ echo noprint-parent > trace_options
+
+To enable an option, leave off the "no".
+
+ echo sym-offset > trace_options
+
+Here are the available options:
+
+ print-parent - On function traces, display the calling (parent)
+ function as well as the function being traced.
+
+ print-parent:
+ bash-4000 [01] 1477.606694: simple_strtoul <-kstrtoul
+
+ noprint-parent:
+ bash-4000 [01] 1477.606694: simple_strtoul
+
+
+ sym-offset - Display not only the function name, but also the
+ offset in the function. For example, instead of
+ seeing just "ktime_get", you will see
+ "ktime_get+0xb/0x20".
+
+ sym-offset:
+ bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0
+
+ sym-addr - this will also display the function address as well
+ as the function name.
+
+ sym-addr:
+ bash-4000 [01] 1477.606694: simple_strtoul <c0339346>
+
+ verbose - This deals with the trace file when the
+ latency-format option is enabled.
+
+ bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
+ (+0.000ms): simple_strtoul (kstrtoul)
+
+ raw - This will display raw numbers. This option is best for
+ use with user applications that can translate the raw
+ numbers better than having it done in the kernel.
+
+ hex - Similar to raw, but the numbers will be in a hexadecimal
+ format.
+
+ bin - This will print out the formats in raw binary.
+
+ block - When set, reading trace_pipe will not block when polled.
+
+ stacktrace - This is one of the options that changes the trace
+ itself. When a trace is recorded, so is the stack
+ of functions. This allows for back traces of
+ trace sites.
+
+ trace_printk - Can disable trace_printk() from writing into the buffer.
+
+ branch - Enable branch tracing with the tracer.
+
+ annotate - It is sometimes confusing when the CPU buffers are full
+ and one CPU buffer had a lot of events recently, thus
+ a shorter time frame, were another CPU may have only had
+ a few events, which lets it have older events. When
+ the trace is reported, it shows the oldest events first,
+ and it may look like only one CPU ran (the one with the
+ oldest events). When the annotate option is set, it will
+ display when a new CPU buffer started:
+
+ <idle>-0 [001] dNs4 21169.031481: wake_up_idle_cpu <-add_timer_on
+ <idle>-0 [001] dNs4 21169.031482: _raw_spin_unlock_irqrestore <-add_timer_on
+ <idle>-0 [001] .Ns4 21169.031484: sub_preempt_count <-_raw_spin_unlock_irqrestore
+##### CPU 2 buffer started ####
+ <idle>-0 [002] .N.1 21169.031484: rcu_idle_exit <-cpu_idle
+ <idle>-0 [001] .Ns3 21169.031484: _raw_spin_unlock <-clocksource_watchdog
+ <idle>-0 [001] .Ns3 21169.031485: sub_preempt_count <-_raw_spin_unlock
+
+ userstacktrace - This option changes the trace. It records a
+ stacktrace of the current userspace thread.
+
+ sym-userobj - when user stacktrace are enabled, look up which
+ object the address belongs to, and print a
+ relative address. This is especially useful when
+ ASLR is on, otherwise you don't get a chance to
+ resolve the address to object/file/line after
+ the app is no longer running
+
+ The lookup is performed when you read
+ trace,trace_pipe. Example:
+
+ a.out-1623 [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
+x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
+
+
+ printk-msg-only - When set, trace_printk()s will only show the format
+ and not their parameters (if trace_bprintk() or
+ trace_bputs() was used to save the trace_printk()).
+
+ context-info - Show only the event data. Hides the comm, PID,
+ timestamp, CPU, and other useful data.
+
+ latency-format - This option changes the trace. When
+ it is enabled, the trace displays
+ additional information about the
+ latencies, as described in "Latency
+ trace format".
+
+ sleep-time - When running function graph tracer, to include
+ the time a task schedules out in its function.
+ When enabled, it will account time the task has been
+ scheduled out as part of the function call.
+
+ graph-time - When running function graph tracer, to include the
+ time to call nested functions. When this is not set,
+ the time reported for the function will only include
+ the time the function itself executed for, not the time
+ for functions that it called.
+
+ record-cmd - When any event or tracer is enabled, a hook is enabled
+ in the sched_switch trace point to fill comm cache
+ with mapped pids and comms. But this may cause some
+ overhead, and if you only care about pids, and not the
+ name of the task, disabling this option can lower the
+ impact of tracing.
+
+ overwrite - This controls what happens when the trace buffer is
+ full. If "1" (default), the oldest events are
+ discarded and overwritten. If "0", then the newest
+ events are discarded.
+ (see per_cpu/cpu0/stats for overrun and dropped)
+
+ disable_on_free - When the free_buffer is closed, tracing will
+ stop (tracing_on set to 0).
+
+ irq-info - Shows the interrupt, preempt count, need resched data.
+ When disabled, the trace looks like:
+
+# tracer: function
+#
+# entries-in-buffer/entries-written: 144405/9452052 #P:4
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ <idle>-0 [002] 23636.756054: ttwu_do_activate.constprop.89 <-try_to_wake_up
+ <idle>-0 [002] 23636.756054: activate_task <-ttwu_do_activate.constprop.89
+ <idle>-0 [002] 23636.756055: enqueue_task <-activate_task
+
+
+ markers - When set, the trace_marker is writable (only by root).
+ When disabled, the trace_marker will error with EINVAL
+ on write.
+
+
+ function-trace - The latency tracers will enable function tracing
+ if this option is enabled (default it is). When
+ it is disabled, the latency tracers do not trace
+ functions. This keeps the overhead of the tracer down
+ when performing latency tests.
+
+ Note: Some tracers have their own options. They only appear
+ when the tracer is active.
+
+
+
+irqsoff
+-------
+
+When interrupts are disabled, the CPU can not react to any other
+external event (besides NMIs and SMIs). This prevents the timer
+interrupt from triggering or the mouse interrupt from letting
+the kernel know of a new mouse event. The result is a latency
+with the reaction time.
+
+The irqsoff tracer tracks the time for which interrupts are
+disabled. When a new maximum latency is hit, the tracer saves
+the trace leading up to that latency point so that every time a
+new maximum is reached, the old saved trace is discarded and the
+new trace is saved.
+
+To reset the maximum, echo 0 into tracing_max_latency. Here is
+an example:
+
+ # echo 0 > options/function-trace
+ # echo irqsoff > current_tracer
+ # echo 1 > tracing_on
+ # echo 0 > tracing_max_latency
+ # ls -ltr
+ [...]
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: irqsoff
+#
+# irqsoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 16 us, #4/4, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: swapper/0-0 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: run_timer_softirq
+# => ended at: run_timer_softirq
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 0d.s2 0us+: _raw_spin_lock_irq <-run_timer_softirq
+ <idle>-0 0dNs3 17us : _raw_spin_unlock_irq <-run_timer_softirq
+ <idle>-0 0dNs3 17us+: trace_hardirqs_on <-run_timer_softirq
+ <idle>-0 0dNs3 25us : <stack trace>
+ => _raw_spin_unlock_irq
+ => run_timer_softirq
+ => __do_softirq
+ => call_softirq
+ => do_softirq
+ => irq_exit
+ => smp_apic_timer_interrupt
+ => apic_timer_interrupt
+ => rcu_idle_exit
+ => cpu_idle
+ => rest_init
+ => start_kernel
+ => x86_64_start_reservations
+ => x86_64_start_kernel
+
+Here we see that that we had a latency of 16 microseconds (which is
+very good). The _raw_spin_lock_irq in run_timer_softirq disabled
+interrupts. The difference between the 16 and the displayed
+timestamp 25us occurred because the clock was incremented
+between the time of recording the max latency and the time of
+recording the function that had that latency.
+
+Note the above example had function-trace not set. If we set
+function-trace, we get a much larger output:
+
+ with echo 1 > options/function-trace
+
+# tracer: irqsoff
+#
+# irqsoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 71 us, #168/168, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: bash-2042 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: ata_scsi_queuecmd
+# => ended at: ata_scsi_queuecmd
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ bash-2042 3d... 0us : _raw_spin_lock_irqsave <-ata_scsi_queuecmd
+ bash-2042 3d... 0us : add_preempt_count <-_raw_spin_lock_irqsave
+ bash-2042 3d..1 1us : ata_scsi_find_dev <-ata_scsi_queuecmd
+ bash-2042 3d..1 1us : __ata_scsi_find_dev <-ata_scsi_find_dev
+ bash-2042 3d..1 2us : ata_find_dev.part.14 <-__ata_scsi_find_dev
+ bash-2042 3d..1 2us : ata_qc_new_init <-__ata_scsi_queuecmd
+ bash-2042 3d..1 3us : ata_sg_init <-__ata_scsi_queuecmd
+ bash-2042 3d..1 4us : ata_scsi_rw_xlat <-__ata_scsi_queuecmd
+ bash-2042 3d..1 4us : ata_build_rw_tf <-ata_scsi_rw_xlat
+[...]
+ bash-2042 3d..1 67us : delay_tsc <-__delay
+ bash-2042 3d..1 67us : add_preempt_count <-delay_tsc
+ bash-2042 3d..2 67us : sub_preempt_count <-delay_tsc
+ bash-2042 3d..1 67us : add_preempt_count <-delay_tsc
+ bash-2042 3d..2 68us : sub_preempt_count <-delay_tsc
+ bash-2042 3d..1 68us+: ata_bmdma_start <-ata_bmdma_qc_issue
+ bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd
+ bash-2042 3d..1 71us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd
+ bash-2042 3d..1 72us+: trace_hardirqs_on <-ata_scsi_queuecmd
+ bash-2042 3d..1 120us : <stack trace>
+ => _raw_spin_unlock_irqrestore
+ => ata_scsi_queuecmd
+ => scsi_dispatch_cmd
+ => scsi_request_fn
+ => __blk_run_queue_uncond
+ => __blk_run_queue
+ => blk_queue_bio
+ => generic_make_request
+ => submit_bio
+ => submit_bh
+ => __ext3_get_inode_loc
+ => ext3_iget
+ => ext3_lookup
+ => lookup_real
+ => __lookup_hash
+ => walk_component
+ => lookup_last
+ => path_lookupat
+ => filename_lookup
+ => user_path_at_empty
+ => user_path_at
+ => vfs_fstatat
+ => vfs_stat
+ => sys_newstat
+ => system_call_fastpath
+
+
+Here we traced a 71 microsecond latency. But we also see all the
+functions that were called during that time. Note that by
+enabling function tracing, we incur an added overhead. This
+overhead may extend the latency times. But nevertheless, this
+trace has provided some very helpful debugging information.
+
+
+preemptoff
+----------
+
+When preemption is disabled, we may be able to receive
+interrupts but the task cannot be preempted and a higher
+priority task must wait for preemption to be enabled again
+before it can preempt a lower priority task.
+
+The preemptoff tracer traces the places that disable preemption.
+Like the irqsoff tracer, it records the maximum latency for
+which preemption was disabled. The control of preemptoff tracer
+is much like the irqsoff tracer.
+
+ # echo 0 > options/function-trace
+ # echo preemptoff > current_tracer
+ # echo 1 > tracing_on
+ # echo 0 > tracing_max_latency
+ # ls -ltr
+ [...]
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: preemptoff
+#
+# preemptoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 46 us, #4/4, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: sshd-1991 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: do_IRQ
+# => ended at: do_IRQ
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ sshd-1991 1d.h. 0us+: irq_enter <-do_IRQ
+ sshd-1991 1d..1 46us : irq_exit <-do_IRQ
+ sshd-1991 1d..1 47us+: trace_preempt_on <-do_IRQ
+ sshd-1991 1d..1 52us : <stack trace>
+ => sub_preempt_count
+ => irq_exit
+ => do_IRQ
+ => ret_from_intr
+
+
+This has some more changes. Preemption was disabled when an
+interrupt came in (notice the 'h'), and was enabled on exit.
+But we also see that interrupts have been disabled when entering
+the preempt off section and leaving it (the 'd'). We do not know if
+interrupts were enabled in the mean time or shortly after this
+was over.
+
+# tracer: preemptoff
+#
+# preemptoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 83 us, #241/241, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: bash-1994 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: wake_up_new_task
+# => ended at: task_rq_unlock
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ bash-1994 1d..1 0us : _raw_spin_lock_irqsave <-wake_up_new_task
+ bash-1994 1d..1 0us : select_task_rq_fair <-select_task_rq
+ bash-1994 1d..1 1us : __rcu_read_lock <-select_task_rq_fair
+ bash-1994 1d..1 1us : source_load <-select_task_rq_fair
+ bash-1994 1d..1 1us : source_load <-select_task_rq_fair
+[...]
+ bash-1994 1d..1 12us : irq_enter <-smp_apic_timer_interrupt
+ bash-1994 1d..1 12us : rcu_irq_enter <-irq_enter
+ bash-1994 1d..1 13us : add_preempt_count <-irq_enter
+ bash-1994 1d.h1 13us : exit_idle <-smp_apic_timer_interrupt
+ bash-1994 1d.h1 13us : hrtimer_interrupt <-smp_apic_timer_interrupt
+ bash-1994 1d.h1 13us : _raw_spin_lock <-hrtimer_interrupt
+ bash-1994 1d.h1 14us : add_preempt_count <-_raw_spin_lock
+ bash-1994 1d.h2 14us : ktime_get_update_offsets <-hrtimer_interrupt
+[...]
+ bash-1994 1d.h1 35us : lapic_next_event <-clockevents_program_event
+ bash-1994 1d.h1 35us : irq_exit <-smp_apic_timer_interrupt
+ bash-1994 1d.h1 36us : sub_preempt_count <-irq_exit
+ bash-1994 1d..2 36us : do_softirq <-irq_exit
+ bash-1994 1d..2 36us : __do_softirq <-call_softirq
+ bash-1994 1d..2 36us : __local_bh_disable <-__do_softirq
+ bash-1994 1d.s2 37us : add_preempt_count <-_raw_spin_lock_irq
+ bash-1994 1d.s3 38us : _raw_spin_unlock <-run_timer_softirq
+ bash-1994 1d.s3 39us : sub_preempt_count <-_raw_spin_unlock
+ bash-1994 1d.s2 39us : call_timer_fn <-run_timer_softirq
+[...]
+ bash-1994 1dNs2 81us : cpu_needs_another_gp <-rcu_process_callbacks
+ bash-1994 1dNs2 82us : __local_bh_enable <-__do_softirq
+ bash-1994 1dNs2 82us : sub_preempt_count <-__local_bh_enable
+ bash-1994 1dN.2 82us : idle_cpu <-irq_exit
+ bash-1994 1dN.2 83us : rcu_irq_exit <-irq_exit
+ bash-1994 1dN.2 83us : sub_preempt_count <-irq_exit
+ bash-1994 1.N.1 84us : _raw_spin_unlock_irqrestore <-task_rq_unlock
+ bash-1994 1.N.1 84us+: trace_preempt_on <-task_rq_unlock
+ bash-1994 1.N.1 104us : <stack trace>
+ => sub_preempt_count
+ => _raw_spin_unlock_irqrestore
+ => task_rq_unlock
+ => wake_up_new_task
+ => do_fork
+ => sys_clone
+ => stub_clone
+
+
+The above is an example of the preemptoff trace with
+function-trace set. Here we see that interrupts were not disabled
+the entire time. The irq_enter code lets us know that we entered
+an interrupt 'h'. Before that, the functions being traced still
+show that it is not in an interrupt, but we can see from the
+functions themselves that this is not the case.
+
+preemptirqsoff
+--------------
+
+Knowing the locations that have interrupts disabled or
+preemption disabled for the longest times is helpful. But
+sometimes we would like to know when either preemption and/or
+interrupts are disabled.
+
+Consider the following code:
+
+ local_irq_disable();
+ call_function_with_irqs_off();
+ preempt_disable();
+ call_function_with_irqs_and_preemption_off();
+ local_irq_enable();
+ call_function_with_preemption_off();
+ preempt_enable();
+
+The irqsoff tracer will record the total length of
+call_function_with_irqs_off() and
+call_function_with_irqs_and_preemption_off().
+
+The preemptoff tracer will record the total length of
+call_function_with_irqs_and_preemption_off() and
+call_function_with_preemption_off().
+
+But neither will trace the time that interrupts and/or
+preemption is disabled. This total time is the time that we can
+not schedule. To record this time, use the preemptirqsoff
+tracer.
+
+Again, using this trace is much like the irqsoff and preemptoff
+tracers.
+
+ # echo 0 > options/function-trace
+ # echo preemptirqsoff > current_tracer
+ # echo 1 > tracing_on
+ # echo 0 > tracing_max_latency
+ # ls -ltr
+ [...]
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: preemptirqsoff
+#
+# preemptirqsoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 100 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: ls-2230 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: ata_scsi_queuecmd
+# => ended at: ata_scsi_queuecmd
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ ls-2230 3d... 0us+: _raw_spin_lock_irqsave <-ata_scsi_queuecmd
+ ls-2230 3...1 100us : _raw_spin_unlock_irqrestore <-ata_scsi_queuecmd
+ ls-2230 3...1 101us+: trace_preempt_on <-ata_scsi_queuecmd
+ ls-2230 3...1 111us : <stack trace>
+ => sub_preempt_count
+ => _raw_spin_unlock_irqrestore
+ => ata_scsi_queuecmd
+ => scsi_dispatch_cmd
+ => scsi_request_fn
+ => __blk_run_queue_uncond
+ => __blk_run_queue
+ => blk_queue_bio
+ => generic_make_request
+ => submit_bio
+ => submit_bh
+ => ext3_bread
+ => ext3_dir_bread
+ => htree_dirblock_to_tree
+ => ext3_htree_fill_tree
+ => ext3_readdir
+ => vfs_readdir
+ => sys_getdents
+ => system_call_fastpath
+
+
+The trace_hardirqs_off_thunk is called from assembly on x86 when
+interrupts are disabled in the assembly code. Without the
+function tracing, we do not know if interrupts were enabled
+within the preemption points. We do see that it started with
+preemption enabled.
+
+Here is a trace with function-trace set:
+
+# tracer: preemptirqsoff
+#
+# preemptirqsoff latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 161 us, #339/339, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: ls-2269 (uid:0 nice:0 policy:0 rt_prio:0)
+# -----------------
+# => started at: schedule
+# => ended at: mutex_unlock
+#
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+kworker/-59 3...1 0us : __schedule <-schedule
+kworker/-59 3d..1 0us : rcu_preempt_qs <-rcu_note_context_switch
+kworker/-59 3d..1 1us : add_preempt_count <-_raw_spin_lock_irq
+kworker/-59 3d..2 1us : deactivate_task <-__schedule
+kworker/-59 3d..2 1us : dequeue_task <-deactivate_task
+kworker/-59 3d..2 2us : update_rq_clock <-dequeue_task
+kworker/-59 3d..2 2us : dequeue_task_fair <-dequeue_task
+kworker/-59 3d..2 2us : update_curr <-dequeue_task_fair
+kworker/-59 3d..2 2us : update_min_vruntime <-update_curr
+kworker/-59 3d..2 3us : cpuacct_charge <-update_curr
+kworker/-59 3d..2 3us : __rcu_read_lock <-cpuacct_charge
+kworker/-59 3d..2 3us : __rcu_read_unlock <-cpuacct_charge
+kworker/-59 3d..2 3us : update_cfs_rq_blocked_load <-dequeue_task_fair
+kworker/-59 3d..2 4us : clear_buddies <-dequeue_task_fair
+kworker/-59 3d..2 4us : account_entity_dequeue <-dequeue_task_fair
+kworker/-59 3d..2 4us : update_min_vruntime <-dequeue_task_fair
+kworker/-59 3d..2 4us : update_cfs_shares <-dequeue_task_fair
+kworker/-59 3d..2 5us : hrtick_update <-dequeue_task_fair
+kworker/-59 3d..2 5us : wq_worker_sleeping <-__schedule
+kworker/-59 3d..2 5us : kthread_data <-wq_worker_sleeping
+kworker/-59 3d..2 5us : put_prev_task_fair <-__schedule
+kworker/-59 3d..2 6us : pick_next_task_fair <-pick_next_task
+kworker/-59 3d..2 6us : clear_buddies <-pick_next_task_fair
+kworker/-59 3d..2 6us : set_next_entity <-pick_next_task_fair
+kworker/-59 3d..2 6us : update_stats_wait_end <-set_next_entity
+ ls-2269 3d..2 7us : finish_task_switch <-__schedule
+ ls-2269 3d..2 7us : _raw_spin_unlock_irq <-finish_task_switch
+ ls-2269 3d..2 8us : do_IRQ <-ret_from_intr
+ ls-2269 3d..2 8us : irq_enter <-do_IRQ
+ ls-2269 3d..2 8us : rcu_irq_enter <-irq_enter
+ ls-2269 3d..2 9us : add_preempt_count <-irq_enter
+ ls-2269 3d.h2 9us : exit_idle <-do_IRQ
+[...]
+ ls-2269 3d.h3 20us : sub_preempt_count <-_raw_spin_unlock
+ ls-2269 3d.h2 20us : irq_exit <-do_IRQ
+ ls-2269 3d.h2 21us : sub_preempt_count <-irq_exit
+ ls-2269 3d..3 21us : do_softirq <-irq_exit
+ ls-2269 3d..3 21us : __do_softirq <-call_softirq
+ ls-2269 3d..3 21us+: __local_bh_disable <-__do_softirq
+ ls-2269 3d.s4 29us : sub_preempt_count <-_local_bh_enable_ip
+ ls-2269 3d.s5 29us : sub_preempt_count <-_local_bh_enable_ip
+ ls-2269 3d.s5 31us : do_IRQ <-ret_from_intr
+ ls-2269 3d.s5 31us : irq_enter <-do_IRQ
+ ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter
+[...]
+ ls-2269 3d.s5 31us : rcu_irq_enter <-irq_enter
+ ls-2269 3d.s5 32us : add_preempt_count <-irq_enter
+ ls-2269 3d.H5 32us : exit_idle <-do_IRQ
+ ls-2269 3d.H5 32us : handle_irq <-do_IRQ
+ ls-2269 3d.H5 32us : irq_to_desc <-handle_irq
+ ls-2269 3d.H5 33us : handle_fasteoi_irq <-handle_irq
+[...]
+ ls-2269 3d.s5 158us : _raw_spin_unlock_irqrestore <-rtl8139_poll
+ ls-2269 3d.s3 158us : net_rps_action_and_irq_enable.isra.65 <-net_rx_action
+ ls-2269 3d.s3 159us : __local_bh_enable <-__do_softirq
+ ls-2269 3d.s3 159us : sub_preempt_count <-__local_bh_enable
+ ls-2269 3d..3 159us : idle_cpu <-irq_exit
+ ls-2269 3d..3 159us : rcu_irq_exit <-irq_exit
+ ls-2269 3d..3 160us : sub_preempt_count <-irq_exit
+ ls-2269 3d... 161us : __mutex_unlock_slowpath <-mutex_unlock
+ ls-2269 3d... 162us+: trace_hardirqs_on <-mutex_unlock
+ ls-2269 3d... 186us : <stack trace>
+ => __mutex_unlock_slowpath
+ => mutex_unlock
+ => process_output
+ => n_tty_write
+ => tty_write
+ => vfs_write
+ => sys_write
+ => system_call_fastpath
+
+This is an interesting trace. It started with kworker running and
+scheduling out and ls taking over. But as soon as ls released the
+rq lock and enabled interrupts (but not preemption) an interrupt
+triggered. When the interrupt finished, it started running softirqs.
+But while the softirq was running, another interrupt triggered.
+When an interrupt is running inside a softirq, the annotation is 'H'.
+
+
+wakeup
+------
+
+One common case that people are interested in tracing is the
+time it takes for a task that is woken to actually wake up.
+Now for non Real-Time tasks, this can be arbitrary. But tracing
+it none the less can be interesting.
+
+Without function tracing:
+
+ # echo 0 > options/function-trace
+ # echo wakeup > current_tracer
+ # echo 1 > tracing_on
+ # echo 0 > tracing_max_latency
+ # chrt -f 5 sleep 1
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: wakeup
+#
+# wakeup latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 15 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: kworker/3:1H-312 (uid:0 nice:-20 policy:0 rt_prio:0)
+# -----------------
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 3dNs7 0us : 0:120:R + [003] 312:100:R kworker/3:1H
+ <idle>-0 3dNs7 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up
+ <idle>-0 3d..3 15us : __schedule <-schedule
+ <idle>-0 3d..3 15us : 0:120:R ==> [003] 312:100:R kworker/3:1H
+
+The tracer only traces the highest priority task in the system
+to avoid tracing the normal circumstances. Here we see that
+the kworker with a nice priority of -20 (not very nice), took
+just 15 microseconds from the time it woke up, to the time it
+ran.
+
+Non Real-Time tasks are not that interesting. A more interesting
+trace is to concentrate only on Real-Time tasks.
+
+wakeup_rt
+---------
+
+In a Real-Time environment it is very important to know the
+wakeup time it takes for the highest priority task that is woken
+up to the time that it executes. This is also known as "schedule
+latency". I stress the point that this is about RT tasks. It is
+also important to know the scheduling latency of non-RT tasks,
+but the average schedule latency is better for non-RT tasks.
+Tools like LatencyTop are more appropriate for such
+measurements.
+
+Real-Time environments are interested in the worst case latency.
+That is the longest latency it takes for something to happen,
+and not the average. We can have a very fast scheduler that may
+only have a large latency once in a while, but that would not
+work well with Real-Time tasks. The wakeup_rt tracer was designed
+to record the worst case wakeups of RT tasks. Non-RT tasks are
+not recorded because the tracer only records one worst case and
+tracing non-RT tasks that are unpredictable will overwrite the
+worst case latency of RT tasks (just run the normal wakeup
+tracer for a while to see that effect).
+
+Since this tracer only deals with RT tasks, we will run this
+slightly differently than we did with the previous tracers.
+Instead of performing an 'ls', we will run 'sleep 1' under
+'chrt' which changes the priority of the task.
+
+ # echo 0 > options/function-trace
+ # echo wakeup_rt > current_tracer
+ # echo 1 > tracing_on
+ # echo 0 > tracing_max_latency
+ # chrt -f 5 sleep 1
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: wakeup
+#
+# tracer: wakeup_rt
+#
+# wakeup_rt latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 5 us, #4/4, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: sleep-2389 (uid:0 nice:0 policy:1 rt_prio:5)
+# -----------------
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 3d.h4 0us : 0:120:R + [003] 2389: 94:R sleep
+ <idle>-0 3d.h4 1us+: ttwu_do_activate.constprop.87 <-try_to_wake_up
+ <idle>-0 3d..3 5us : __schedule <-schedule
+ <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep
+
+
+Running this on an idle system, we see that it only took 5 microseconds
+to perform the task switch. Note, since the trace point in the schedule
+is before the actual "switch", we stop the tracing when the recorded task
+is about to schedule in. This may change if we add a new marker at the
+end of the scheduler.
+
+Notice that the recorded task is 'sleep' with the PID of 2389
+and it has an rt_prio of 5. This priority is user-space priority
+and not the internal kernel priority. The policy is 1 for
+SCHED_FIFO and 2 for SCHED_RR.
+
+Note, that the trace data shows the internal priority (99 - rtprio).
+
+ <idle>-0 3d..3 5us : 0:120:R ==> [003] 2389: 94:R sleep
+
+The 0:120:R means idle was running with a nice priority of 0 (120 - 20)
+and in the running state 'R'. The sleep task was scheduled in with
+2389: 94:R. That is the priority is the kernel rtprio (99 - 5 = 94)
+and it too is in the running state.
+
+Doing the same with chrt -r 5 and function-trace set.
+
+ echo 1 > options/function-trace
+
+# tracer: wakeup_rt
+#
+# wakeup_rt latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 29 us, #85/85, CPU#3 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: sleep-2448 (uid:0 nice:0 policy:1 rt_prio:5)
+# -----------------
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 3d.h4 1us+: 0:120:R + [003] 2448: 94:R sleep
+ <idle>-0 3d.h4 2us : ttwu_do_activate.constprop.87 <-try_to_wake_up
+ <idle>-0 3d.h3 3us : check_preempt_curr <-ttwu_do_wakeup
+ <idle>-0 3d.h3 3us : resched_curr <-check_preempt_curr
+ <idle>-0 3dNh3 4us : task_woken_rt <-ttwu_do_wakeup
+ <idle>-0 3dNh3 4us : _raw_spin_unlock <-try_to_wake_up
+ <idle>-0 3dNh3 4us : sub_preempt_count <-_raw_spin_unlock
+ <idle>-0 3dNh2 5us : ttwu_stat <-try_to_wake_up
+ <idle>-0 3dNh2 5us : _raw_spin_unlock_irqrestore <-try_to_wake_up
+ <idle>-0 3dNh2 6us : sub_preempt_count <-_raw_spin_unlock_irqrestore
+ <idle>-0 3dNh1 6us : _raw_spin_lock <-__run_hrtimer
+ <idle>-0 3dNh1 6us : add_preempt_count <-_raw_spin_lock
+ <idle>-0 3dNh2 7us : _raw_spin_unlock <-hrtimer_interrupt
+ <idle>-0 3dNh2 7us : sub_preempt_count <-_raw_spin_unlock
+ <idle>-0 3dNh1 7us : tick_program_event <-hrtimer_interrupt
+ <idle>-0 3dNh1 7us : clockevents_program_event <-tick_program_event
+ <idle>-0 3dNh1 8us : ktime_get <-clockevents_program_event
+ <idle>-0 3dNh1 8us : lapic_next_event <-clockevents_program_event
+ <idle>-0 3dNh1 8us : irq_exit <-smp_apic_timer_interrupt
+ <idle>-0 3dNh1 9us : sub_preempt_count <-irq_exit
+ <idle>-0 3dN.2 9us : idle_cpu <-irq_exit
+ <idle>-0 3dN.2 9us : rcu_irq_exit <-irq_exit
+ <idle>-0 3dN.2 10us : rcu_eqs_enter_common.isra.45 <-rcu_irq_exit
+ <idle>-0 3dN.2 10us : sub_preempt_count <-irq_exit
+ <idle>-0 3.N.1 11us : rcu_idle_exit <-cpu_idle
+ <idle>-0 3dN.1 11us : rcu_eqs_exit_common.isra.43 <-rcu_idle_exit
+ <idle>-0 3.N.1 11us : tick_nohz_idle_exit <-cpu_idle
+ <idle>-0 3dN.1 12us : menu_hrtimer_cancel <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 12us : ktime_get <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 12us : tick_do_update_jiffies64 <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 13us : update_cpu_load_nohz <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 13us : _raw_spin_lock <-update_cpu_load_nohz
+ <idle>-0 3dN.1 13us : add_preempt_count <-_raw_spin_lock
+ <idle>-0 3dN.2 13us : __update_cpu_load <-update_cpu_load_nohz
+ <idle>-0 3dN.2 14us : sched_avg_update <-__update_cpu_load
+ <idle>-0 3dN.2 14us : _raw_spin_unlock <-update_cpu_load_nohz
+ <idle>-0 3dN.2 14us : sub_preempt_count <-_raw_spin_unlock
+ <idle>-0 3dN.1 15us : calc_load_exit_idle <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 15us : touch_softlockup_watchdog <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 15us : hrtimer_cancel <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 15us : hrtimer_try_to_cancel <-hrtimer_cancel
+ <idle>-0 3dN.1 16us : lock_hrtimer_base.isra.18 <-hrtimer_try_to_cancel
+ <idle>-0 3dN.1 16us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18
+ <idle>-0 3dN.1 16us : add_preempt_count <-_raw_spin_lock_irqsave
+ <idle>-0 3dN.2 17us : __remove_hrtimer <-remove_hrtimer.part.16
+ <idle>-0 3dN.2 17us : hrtimer_force_reprogram <-__remove_hrtimer
+ <idle>-0 3dN.2 17us : tick_program_event <-hrtimer_force_reprogram
+ <idle>-0 3dN.2 18us : clockevents_program_event <-tick_program_event
+ <idle>-0 3dN.2 18us : ktime_get <-clockevents_program_event
+ <idle>-0 3dN.2 18us : lapic_next_event <-clockevents_program_event
+ <idle>-0 3dN.2 19us : _raw_spin_unlock_irqrestore <-hrtimer_try_to_cancel
+ <idle>-0 3dN.2 19us : sub_preempt_count <-_raw_spin_unlock_irqrestore
+ <idle>-0 3dN.1 19us : hrtimer_forward <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward
+ <idle>-0 3dN.1 20us : ktime_add_safe <-hrtimer_forward
+ <idle>-0 3dN.1 20us : hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11
+ <idle>-0 3dN.1 20us : __hrtimer_start_range_ns <-hrtimer_start_range_ns
+ <idle>-0 3dN.1 21us : lock_hrtimer_base.isra.18 <-__hrtimer_start_range_ns
+ <idle>-0 3dN.1 21us : _raw_spin_lock_irqsave <-lock_hrtimer_base.isra.18
+ <idle>-0 3dN.1 21us : add_preempt_count <-_raw_spin_lock_irqsave
+ <idle>-0 3dN.2 22us : ktime_add_safe <-__hrtimer_start_range_ns
+ <idle>-0 3dN.2 22us : enqueue_hrtimer <-__hrtimer_start_range_ns
+ <idle>-0 3dN.2 22us : tick_program_event <-__hrtimer_start_range_ns
+ <idle>-0 3dN.2 23us : clockevents_program_event <-tick_program_event
+ <idle>-0 3dN.2 23us : ktime_get <-clockevents_program_event
+ <idle>-0 3dN.2 23us : lapic_next_event <-clockevents_program_event
+ <idle>-0 3dN.2 24us : _raw_spin_unlock_irqrestore <-__hrtimer_start_range_ns
+ <idle>-0 3dN.2 24us : sub_preempt_count <-_raw_spin_unlock_irqrestore
+ <idle>-0 3dN.1 24us : account_idle_ticks <-tick_nohz_idle_exit
+ <idle>-0 3dN.1 24us : account_idle_time <-account_idle_ticks
+ <idle>-0 3.N.1 25us : sub_preempt_count <-cpu_idle
+ <idle>-0 3.N.. 25us : schedule <-cpu_idle
+ <idle>-0 3.N.. 25us : __schedule <-preempt_schedule
+ <idle>-0 3.N.. 26us : add_preempt_count <-__schedule
+ <idle>-0 3.N.1 26us : rcu_note_context_switch <-__schedule
+ <idle>-0 3.N.1 26us : rcu_sched_qs <-rcu_note_context_switch
+ <idle>-0 3dN.1 27us : rcu_preempt_qs <-rcu_note_context_switch
+ <idle>-0 3.N.1 27us : _raw_spin_lock_irq <-__schedule
+ <idle>-0 3dN.1 27us : add_preempt_count <-_raw_spin_lock_irq
+ <idle>-0 3dN.2 28us : put_prev_task_idle <-__schedule
+ <idle>-0 3dN.2 28us : pick_next_task_stop <-pick_next_task
+ <idle>-0 3dN.2 28us : pick_next_task_rt <-pick_next_task
+ <idle>-0 3dN.2 29us : dequeue_pushable_task <-pick_next_task_rt
+ <idle>-0 3d..3 29us : __schedule <-preempt_schedule
+ <idle>-0 3d..3 30us : 0:120:R ==> [003] 2448: 94:R sleep
+
+This isn't that big of a trace, even with function tracing enabled,
+so I included the entire trace.
+
+The interrupt went off while when the system was idle. Somewhere
+before task_woken_rt() was called, the NEED_RESCHED flag was set,
+this is indicated by the first occurrence of the 'N' flag.
+
+Latency tracing and events
+--------------------------
+As function tracing can induce a much larger latency, but without
+seeing what happens within the latency it is hard to know what
+caused it. There is a middle ground, and that is with enabling
+events.
+
+ # echo 0 > options/function-trace
+ # echo wakeup_rt > current_tracer
+ # echo 1 > events/enable
+ # echo 1 > tracing_on
+ # echo 0 > tracing_max_latency
+ # chrt -f 5 sleep 1
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: wakeup_rt
+#
+# wakeup_rt latency trace v1.1.5 on 3.8.0-test+
+# --------------------------------------------------------------------
+# latency: 6 us, #12/12, CPU#2 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:4)
+# -----------------
+# | task: sleep-5882 (uid:0 nice:0 policy:1 rt_prio:5)
+# -----------------
+#
+# _------=> CPU#
+# / _-----=> irqs-off
+# | / _----=> need-resched
+# || / _---=> hardirq/softirq
+# ||| / _--=> preempt-depth
+# |||| / delay
+# cmd pid ||||| time | caller
+# \ / ||||| \ | /
+ <idle>-0 2d.h4 0us : 0:120:R + [002] 5882: 94:R sleep
+ <idle>-0 2d.h4 0us : ttwu_do_activate.constprop.87 <-try_to_wake_up
+ <idle>-0 2d.h4 1us : sched_wakeup: comm=sleep pid=5882 prio=94 success=1 target_cpu=002
+ <idle>-0 2dNh2 1us : hrtimer_expire_exit: hrtimer=ffff88007796feb8
+ <idle>-0 2.N.2 2us : power_end: cpu_id=2
+ <idle>-0 2.N.2 3us : cpu_idle: state=4294967295 cpu_id=2
+ <idle>-0 2dN.3 4us : hrtimer_cancel: hrtimer=ffff88007d50d5e0
+ <idle>-0 2dN.3 4us : hrtimer_start: hrtimer=ffff88007d50d5e0 function=tick_sched_timer expires=34311211000000 softexpires=34311211000000
+ <idle>-0 2.N.2 5us : rcu_utilization: Start context switch
+ <idle>-0 2.N.2 5us : rcu_utilization: End context switch
+ <idle>-0 2d..3 6us : __schedule <-schedule
+ <idle>-0 2d..3 6us : 0:120:R ==> [002] 5882: 94:R sleep
+
+
+function
+--------
+
+This tracer is the function tracer. Enabling the function tracer
+can be done from the debug file system. Make sure the
+ftrace_enabled is set; otherwise this tracer is a nop.
+See the "ftrace_enabled" section below.
+
+ # sysctl kernel.ftrace_enabled=1
+ # echo function > current_tracer
+ # echo 1 > tracing_on
+ # usleep 1
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: function
+#
+# entries-in-buffer/entries-written: 24799/24799 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ bash-1994 [002] .... 3082.063030: mutex_unlock <-rb_simple_write
+ bash-1994 [002] .... 3082.063031: __mutex_unlock_slowpath <-mutex_unlock
+ bash-1994 [002] .... 3082.063031: __fsnotify_parent <-fsnotify_modify
+ bash-1994 [002] .... 3082.063032: fsnotify <-fsnotify_modify
+ bash-1994 [002] .... 3082.063032: __srcu_read_lock <-fsnotify
+ bash-1994 [002] .... 3082.063032: add_preempt_count <-__srcu_read_lock
+ bash-1994 [002] ...1 3082.063032: sub_preempt_count <-__srcu_read_lock
+ bash-1994 [002] .... 3082.063033: __srcu_read_unlock <-fsnotify
+[...]
+
+
+Note: function tracer uses ring buffers to store the above
+entries. The newest data may overwrite the oldest data.
+Sometimes using echo to stop the trace is not sufficient because
+the tracing could have overwritten the data that you wanted to
+record. For this reason, it is sometimes better to disable
+tracing directly from a program. This allows you to stop the
+tracing at the point that you hit the part that you are
+interested in. To disable the tracing directly from a C program,
+something like following code snippet can be used:
+
+int trace_fd;
+[...]
+int main(int argc, char *argv[]) {
+ [...]
+ trace_fd = open(tracing_file("tracing_on"), O_WRONLY);
+ [...]
+ if (condition_hit()) {
+ write(trace_fd, "0", 1);
+ }
+ [...]
+}
+
+
+Single thread tracing
+---------------------
+
+By writing into set_ftrace_pid you can trace a
+single thread. For example:
+
+# cat set_ftrace_pid
+no pid
+# echo 3111 > set_ftrace_pid
+# cat set_ftrace_pid
+3111
+# echo function > current_tracer
+# cat trace | head
+ # tracer: function
+ #
+ # TASK-PID CPU# TIMESTAMP FUNCTION
+ # | | | | |
+ yum-updatesd-3111 [003] 1637.254676: finish_task_switch <-thread_return
+ yum-updatesd-3111 [003] 1637.254681: hrtimer_cancel <-schedule_hrtimeout_range
+ yum-updatesd-3111 [003] 1637.254682: hrtimer_try_to_cancel <-hrtimer_cancel
+ yum-updatesd-3111 [003] 1637.254683: lock_hrtimer_base <-hrtimer_try_to_cancel
+ yum-updatesd-3111 [003] 1637.254685: fget_light <-do_sys_poll
+ yum-updatesd-3111 [003] 1637.254686: pipe_poll <-do_sys_poll
+# echo > set_ftrace_pid
+# cat trace |head
+ # tracer: function
+ #
+ # TASK-PID CPU# TIMESTAMP FUNCTION
+ # | | | | |
+ ##### CPU 3 buffer started ####
+ yum-updatesd-3111 [003] 1701.957688: free_poll_entry <-poll_freewait
+ yum-updatesd-3111 [003] 1701.957689: remove_wait_queue <-free_poll_entry
+ yum-updatesd-3111 [003] 1701.957691: fput <-free_poll_entry
+ yum-updatesd-3111 [003] 1701.957692: audit_syscall_exit <-sysret_audit
+ yum-updatesd-3111 [003] 1701.957693: path_put <-audit_syscall_exit
+
+If you want to trace a function when executing, you could use
+something like this simple program:
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+#define MAX_PATH 256
+
+const char *find_debugfs(void)
+{
+ static char debugfs[MAX_PATH+1];
+ static int debugfs_found;
+ char type[100];
+ FILE *fp;
+
+ if (debugfs_found)
+ return debugfs;
+
+ if ((fp = fopen("/proc/mounts","r")) == NULL) {
+ perror("/proc/mounts");
+ return NULL;
+ }
+
+ while (fscanf(fp, "%*s %"
+ STR(MAX_PATH)
+ "s %99s %*s %*d %*d\n",
+ debugfs, type) == 2) {
+ if (strcmp(type, "debugfs") == 0)
+ break;
+ }
+ fclose(fp);
+
+ if (strcmp(type, "debugfs") != 0) {
+ fprintf(stderr, "debugfs not mounted");
+ return NULL;
+ }
+
+ strcat(debugfs, "/tracing/");
+ debugfs_found = 1;
+
+ return debugfs;
+}
+
+const char *tracing_file(const char *file_name)
+{
+ static char trace_file[MAX_PATH+1];
+ snprintf(trace_file, MAX_PATH, "%s/%s", find_debugfs(), file_name);
+ return trace_file;
+}
+
+int main (int argc, char **argv)
+{
+ if (argc < 1)
+ exit(-1);
+
+ if (fork() > 0) {
+ int fd, ffd;
+ char line[64];
+ int s;
+
+ ffd = open(tracing_file("current_tracer"), O_WRONLY);
+ if (ffd < 0)
+ exit(-1);
+ write(ffd, "nop", 3);
+
+ fd = open(tracing_file("set_ftrace_pid"), O_WRONLY);
+ s = sprintf(line, "%d\n", getpid());
+ write(fd, line, s);
+
+ write(ffd, "function", 8);
+
+ close(fd);
+ close(ffd);
+
+ execvp(argv[1], argv+1);
+ }
+
+ return 0;
+}
+
+Or this simple script!
+
+------
+#!/bin/bash
+
+debugfs=`sed -ne 's/^debugfs \(.*\) debugfs.*/\1/p' /proc/mounts`
+echo nop > $debugfs/tracing/current_tracer
+echo 0 > $debugfs/tracing/tracing_on
+echo $$ > $debugfs/tracing/set_ftrace_pid
+echo function > $debugfs/tracing/current_tracer
+echo 1 > $debugfs/tracing/tracing_on
+exec "$@"
+------
+
+
+function graph tracer
+---------------------------
+
+This tracer is similar to the function tracer except that it
+probes a function on its entry and its exit. This is done by
+using a dynamically allocated stack of return addresses in each
+task_struct. On function entry the tracer overwrites the return
+address of each function traced to set a custom probe. Thus the
+original return address is stored on the stack of return address
+in the task_struct.
+
+Probing on both ends of a function leads to special features
+such as:
+
+- measure of a function's time execution
+- having a reliable call stack to draw function calls graph
+
+This tracer is useful in several situations:
+
+- you want to find the reason of a strange kernel behavior and
+ need to see what happens in detail on any areas (or specific
+ ones).
+
+- you are experiencing weird latencies but it's difficult to
+ find its origin.
+
+- you want to find quickly which path is taken by a specific
+ function
+
+- you just want to peek inside a working kernel and want to see
+ what happens there.
+
+# tracer: function_graph
+#
+# CPU DURATION FUNCTION CALLS
+# | | | | | | |
+
+ 0) | sys_open() {
+ 0) | do_sys_open() {
+ 0) | getname() {
+ 0) | kmem_cache_alloc() {
+ 0) 1.382 us | __might_sleep();
+ 0) 2.478 us | }
+ 0) | strncpy_from_user() {
+ 0) | might_fault() {
+ 0) 1.389 us | __might_sleep();
+ 0) 2.553 us | }
+ 0) 3.807 us | }
+ 0) 7.876 us | }
+ 0) | alloc_fd() {
+ 0) 0.668 us | _spin_lock();
+ 0) 0.570 us | expand_files();
+ 0) 0.586 us | _spin_unlock();
+
+
+There are several columns that can be dynamically
+enabled/disabled. You can use every combination of options you
+want, depending on your needs.
+
+- The cpu number on which the function executed is default
+ enabled. It is sometimes better to only trace one cpu (see
+ tracing_cpu_mask file) or you might sometimes see unordered
+ function calls while cpu tracing switch.
+
+ hide: echo nofuncgraph-cpu > trace_options
+ show: echo funcgraph-cpu > trace_options
+
+- The duration (function's time of execution) is displayed on
+ the closing bracket line of a function or on the same line
+ than the current function in case of a leaf one. It is default
+ enabled.
+
+ hide: echo nofuncgraph-duration > trace_options
+ show: echo funcgraph-duration > trace_options
+
+- The overhead field precedes the duration field in case of
+ reached duration thresholds.
+
+ hide: echo nofuncgraph-overhead > trace_options
+ show: echo funcgraph-overhead > trace_options
+ depends on: funcgraph-duration
+
+ ie:
+
+ 0) | up_write() {
+ 0) 0.646 us | _spin_lock_irqsave();
+ 0) 0.684 us | _spin_unlock_irqrestore();
+ 0) 3.123 us | }
+ 0) 0.548 us | fput();
+ 0) + 58.628 us | }
+
+ [...]
+
+ 0) | putname() {
+ 0) | kmem_cache_free() {
+ 0) 0.518 us | __phys_addr();
+ 0) 1.757 us | }
+ 0) 2.861 us | }
+ 0) ! 115.305 us | }
+ 0) ! 116.402 us | }
+
+ + means that the function exceeded 10 usecs.
+ ! means that the function exceeded 100 usecs.
+ # means that the function exceeded 1000 usecs.
+ $ means that the function exceeded 1 sec.
+
+
+- The task/pid field displays the thread cmdline and pid which
+ executed the function. It is default disabled.
+
+ hide: echo nofuncgraph-proc > trace_options
+ show: echo funcgraph-proc > trace_options
+
+ ie:
+
+ # tracer: function_graph
+ #
+ # CPU TASK/PID DURATION FUNCTION CALLS
+ # | | | | | | | | |
+ 0) sh-4802 | | d_free() {
+ 0) sh-4802 | | call_rcu() {
+ 0) sh-4802 | | __call_rcu() {
+ 0) sh-4802 | 0.616 us | rcu_process_gp_end();
+ 0) sh-4802 | 0.586 us | check_for_new_grace_period();
+ 0) sh-4802 | 2.899 us | }
+ 0) sh-4802 | 4.040 us | }
+ 0) sh-4802 | 5.151 us | }
+ 0) sh-4802 | + 49.370 us | }
+
+
+- The absolute time field is an absolute timestamp given by the
+ system clock since it started. A snapshot of this time is
+ given on each entry/exit of functions
+
+ hide: echo nofuncgraph-abstime > trace_options
+ show: echo funcgraph-abstime > trace_options
+
+ ie:
+
+ #
+ # TIME CPU DURATION FUNCTION CALLS
+ # | | | | | | | |
+ 360.774522 | 1) 0.541 us | }
+ 360.774522 | 1) 4.663 us | }
+ 360.774523 | 1) 0.541 us | __wake_up_bit();
+ 360.774524 | 1) 6.796 us | }
+ 360.774524 | 1) 7.952 us | }
+ 360.774525 | 1) 9.063 us | }
+ 360.774525 | 1) 0.615 us | journal_mark_dirty();
+ 360.774527 | 1) 0.578 us | __brelse();
+ 360.774528 | 1) | reiserfs_prepare_for_journal() {
+ 360.774528 | 1) | unlock_buffer() {
+ 360.774529 | 1) | wake_up_bit() {
+ 360.774529 | 1) | bit_waitqueue() {
+ 360.774530 | 1) 0.594 us | __phys_addr();
+
+
+The function name is always displayed after the closing bracket
+for a function if the start of that function is not in the
+trace buffer.
+
+Display of the function name after the closing bracket may be
+enabled for functions whose start is in the trace buffer,
+allowing easier searching with grep for function durations.
+It is default disabled.
+
+ hide: echo nofuncgraph-tail > trace_options
+ show: echo funcgraph-tail > trace_options
+
+ Example with nofuncgraph-tail (default):
+ 0) | putname() {
+ 0) | kmem_cache_free() {
+ 0) 0.518 us | __phys_addr();
+ 0) 1.757 us | }
+ 0) 2.861 us | }
+
+ Example with funcgraph-tail:
+ 0) | putname() {
+ 0) | kmem_cache_free() {
+ 0) 0.518 us | __phys_addr();
+ 0) 1.757 us | } /* kmem_cache_free() */
+ 0) 2.861 us | } /* putname() */
+
+You can put some comments on specific functions by using
+trace_printk() For example, if you want to put a comment inside
+the __might_sleep() function, you just have to include
+<linux/ftrace.h> and call trace_printk() inside __might_sleep()
+
+trace_printk("I'm a comment!\n")
+
+will produce:
+
+ 1) | __might_sleep() {
+ 1) | /* I'm a comment! */
+ 1) 1.449 us | }
+
+
+You might find other useful features for this tracer in the
+following "dynamic ftrace" section such as tracing only specific
+functions or tasks.
+
+dynamic ftrace
+--------------
+
+If CONFIG_DYNAMIC_FTRACE is set, the system will run with
+virtually no overhead when function tracing is disabled. The way
+this works is the mcount function call (placed at the start of
+every kernel function, produced by the -pg switch in gcc),
+starts of pointing to a simple return. (Enabling FTRACE will
+include the -pg switch in the compiling of the kernel.)
+
+At compile time every C file object is run through the
+recordmcount program (located in the scripts directory). This
+program will parse the ELF headers in the C object to find all
+the locations in the .text section that call mcount. (Note, only
+white listed .text sections are processed, since processing other
+sections like .init.text may cause races due to those sections
+being freed unexpectedly).
+
+A new section called "__mcount_loc" is created that holds
+references to all the mcount call sites in the .text section.
+The recordmcount program re-links this section back into the
+original object. The final linking stage of the kernel will add all these
+references into a single table.
+
+On boot up, before SMP is initialized, the dynamic ftrace code
+scans this table and updates all the locations into nops. It
+also records the locations, which are added to the
+available_filter_functions list. Modules are processed as they
+are loaded and before they are executed. When a module is
+unloaded, it also removes its functions from the ftrace function
+list. This is automatic in the module unload code, and the
+module author does not need to worry about it.
+
+When tracing is enabled, the process of modifying the function
+tracepoints is dependent on architecture. The old method is to use
+kstop_machine to prevent races with the CPUs executing code being
+modified (which can cause the CPU to do undesirable things, especially
+if the modified code crosses cache (or page) boundaries), and the nops are
+patched back to calls. But this time, they do not call mcount
+(which is just a function stub). They now call into the ftrace
+infrastructure.
+
+The new method of modifying the function tracepoints is to place
+a breakpoint at the location to be modified, sync all CPUs, modify
+the rest of the instruction not covered by the breakpoint. Sync
+all CPUs again, and then remove the breakpoint with the finished
+version to the ftrace call site.
+
+Some archs do not even need to monkey around with the synchronization,
+and can just slap the new code on top of the old without any
+problems with other CPUs executing it at the same time.
+
+One special side-effect to the recording of the functions being
+traced is that we can now selectively choose which functions we
+wish to trace and which ones we want the mcount calls to remain
+as nops.
+
+Two files are used, one for enabling and one for disabling the
+tracing of specified functions. They are:
+
+ set_ftrace_filter
+
+and
+
+ set_ftrace_notrace
+
+A list of available functions that you can add to these files is
+listed in:
+
+ available_filter_functions
+
+ # cat available_filter_functions
+put_prev_task_idle
+kmem_cache_create
+pick_next_task_rt
+get_online_cpus
+pick_next_task_fair
+mutex_lock
+[...]
+
+If I am only interested in sys_nanosleep and hrtimer_interrupt:
+
+ # echo sys_nanosleep hrtimer_interrupt > set_ftrace_filter
+ # echo function > current_tracer
+ # echo 1 > tracing_on
+ # usleep 1
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: function
+#
+# entries-in-buffer/entries-written: 5/5 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ usleep-2665 [001] .... 4186.475355: sys_nanosleep <-system_call_fastpath
+ <idle>-0 [001] d.h1 4186.475409: hrtimer_interrupt <-smp_apic_timer_interrupt
+ usleep-2665 [001] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt
+ <idle>-0 [003] d.h1 4186.475426: hrtimer_interrupt <-smp_apic_timer_interrupt
+ <idle>-0 [002] d.h1 4186.475427: hrtimer_interrupt <-smp_apic_timer_interrupt
+
+To see which functions are being traced, you can cat the file:
+
+ # cat set_ftrace_filter
+hrtimer_interrupt
+sys_nanosleep
+
+
+Perhaps this is not enough. The filters also allow simple wild
+cards. Only the following are currently available
+
+ <match>* - will match functions that begin with <match>
+ *<match> - will match functions that end with <match>
+ *<match>* - will match functions that have <match> in it
+
+These are the only wild cards which are supported.
+
+ <match>*<match> will not work.
+
+Note: It is better to use quotes to enclose the wild cards,
+ otherwise the shell may expand the parameters into names
+ of files in the local directory.
+
+ # echo 'hrtimer_*' > set_ftrace_filter
+
+Produces:
+
+# tracer: function
+#
+# entries-in-buffer/entries-written: 897/897 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ <idle>-0 [003] dN.1 4228.547803: hrtimer_cancel <-tick_nohz_idle_exit
+ <idle>-0 [003] dN.1 4228.547804: hrtimer_try_to_cancel <-hrtimer_cancel
+ <idle>-0 [003] dN.2 4228.547805: hrtimer_force_reprogram <-__remove_hrtimer
+ <idle>-0 [003] dN.1 4228.547805: hrtimer_forward <-tick_nohz_idle_exit
+ <idle>-0 [003] dN.1 4228.547805: hrtimer_start_range_ns <-hrtimer_start_expires.constprop.11
+ <idle>-0 [003] d..1 4228.547858: hrtimer_get_next_event <-get_next_timer_interrupt
+ <idle>-0 [003] d..1 4228.547859: hrtimer_start <-__tick_nohz_idle_enter
+ <idle>-0 [003] d..2 4228.547860: hrtimer_force_reprogram <-__rem
+
+Notice that we lost the sys_nanosleep.
+
+ # cat set_ftrace_filter
+hrtimer_run_queues
+hrtimer_run_pending
+hrtimer_init
+hrtimer_cancel
+hrtimer_try_to_cancel
+hrtimer_forward
+hrtimer_start
+hrtimer_reprogram
+hrtimer_force_reprogram
+hrtimer_get_next_event
+hrtimer_interrupt
+hrtimer_nanosleep
+hrtimer_wakeup
+hrtimer_get_remaining
+hrtimer_get_res
+hrtimer_init_sleeper
+
+
+This is because the '>' and '>>' act just like they do in bash.
+To rewrite the filters, use '>'
+To append to the filters, use '>>'
+
+To clear out a filter so that all functions will be recorded
+again:
+
+ # echo > set_ftrace_filter
+ # cat set_ftrace_filter
+ #
+
+Again, now we want to append.
+
+ # echo sys_nanosleep > set_ftrace_filter
+ # cat set_ftrace_filter
+sys_nanosleep
+ # echo 'hrtimer_*' >> set_ftrace_filter
+ # cat set_ftrace_filter
+hrtimer_run_queues
+hrtimer_run_pending
+hrtimer_init
+hrtimer_cancel
+hrtimer_try_to_cancel
+hrtimer_forward
+hrtimer_start
+hrtimer_reprogram
+hrtimer_force_reprogram
+hrtimer_get_next_event
+hrtimer_interrupt
+sys_nanosleep
+hrtimer_nanosleep
+hrtimer_wakeup
+hrtimer_get_remaining
+hrtimer_get_res
+hrtimer_init_sleeper
+
+
+The set_ftrace_notrace prevents those functions from being
+traced.
+
+ # echo '*preempt*' '*lock*' > set_ftrace_notrace
+
+Produces:
+
+# tracer: function
+#
+# entries-in-buffer/entries-written: 39608/39608 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ bash-1994 [000] .... 4342.324896: file_ra_state_init <-do_dentry_open
+ bash-1994 [000] .... 4342.324897: open_check_o_direct <-do_last
+ bash-1994 [000] .... 4342.324897: ima_file_check <-do_last
+ bash-1994 [000] .... 4342.324898: process_measurement <-ima_file_check
+ bash-1994 [000] .... 4342.324898: ima_get_action <-process_measurement
+ bash-1994 [000] .... 4342.324898: ima_match_policy <-ima_get_action
+ bash-1994 [000] .... 4342.324899: do_truncate <-do_last
+ bash-1994 [000] .... 4342.324899: should_remove_suid <-do_truncate
+ bash-1994 [000] .... 4342.324899: notify_change <-do_truncate
+ bash-1994 [000] .... 4342.324900: current_fs_time <-notify_change
+ bash-1994 [000] .... 4342.324900: current_kernel_time <-current_fs_time
+ bash-1994 [000] .... 4342.324900: timespec_trunc <-current_fs_time
+
+We can see that there's no more lock or preempt tracing.
+
+
+Dynamic ftrace with the function graph tracer
+---------------------------------------------
+
+Although what has been explained above concerns both the
+function tracer and the function-graph-tracer, there are some
+special features only available in the function-graph tracer.
+
+If you want to trace only one function and all of its children,
+you just have to echo its name into set_graph_function:
+
+ echo __do_fault > set_graph_function
+
+will produce the following "expanded" trace of the __do_fault()
+function:
+
+ 0) | __do_fault() {
+ 0) | filemap_fault() {
+ 0) | find_lock_page() {
+ 0) 0.804 us | find_get_page();
+ 0) | __might_sleep() {
+ 0) 1.329 us | }
+ 0) 3.904 us | }
+ 0) 4.979 us | }
+ 0) 0.653 us | _spin_lock();
+ 0) 0.578 us | page_add_file_rmap();
+ 0) 0.525 us | native_set_pte_at();
+ 0) 0.585 us | _spin_unlock();
+ 0) | unlock_page() {
+ 0) 0.541 us | page_waitqueue();
+ 0) 0.639 us | __wake_up_bit();
+ 0) 2.786 us | }
+ 0) + 14.237 us | }
+ 0) | __do_fault() {
+ 0) | filemap_fault() {
+ 0) | find_lock_page() {
+ 0) 0.698 us | find_get_page();
+ 0) | __might_sleep() {
+ 0) 1.412 us | }
+ 0) 3.950 us | }
+ 0) 5.098 us | }
+ 0) 0.631 us | _spin_lock();
+ 0) 0.571 us | page_add_file_rmap();
+ 0) 0.526 us | native_set_pte_at();
+ 0) 0.586 us | _spin_unlock();
+ 0) | unlock_page() {
+ 0) 0.533 us | page_waitqueue();
+ 0) 0.638 us | __wake_up_bit();
+ 0) 2.793 us | }
+ 0) + 14.012 us | }
+
+You can also expand several functions at once:
+
+ echo sys_open > set_graph_function
+ echo sys_close >> set_graph_function
+
+Now if you want to go back to trace all functions you can clear
+this special filter via:
+
+ echo > set_graph_function
+
+
+ftrace_enabled
+--------------
+
+Note, the proc sysctl ftrace_enable is a big on/off switch for the
+function tracer. By default it is enabled (when function tracing is
+enabled in the kernel). If it is disabled, all function tracing is
+disabled. This includes not only the function tracers for ftrace, but
+also for any other uses (perf, kprobes, stack tracing, profiling, etc).
+
+Please disable this with care.
+
+This can be disable (and enabled) with:
+
+ sysctl kernel.ftrace_enabled=0
+ sysctl kernel.ftrace_enabled=1
+
+ or
+
+ echo 0 > /proc/sys/kernel/ftrace_enabled
+ echo 1 > /proc/sys/kernel/ftrace_enabled
+
+
+Filter commands
+---------------
+
+A few commands are supported by the set_ftrace_filter interface.
+Trace commands have the following format:
+
+<function>:<command>:<parameter>
+
+The following commands are supported:
+
+- mod
+ This command enables function filtering per module. The
+ parameter defines the module. For example, if only the write*
+ functions in the ext3 module are desired, run:
+
+ echo 'write*:mod:ext3' > set_ftrace_filter
+
+ This command interacts with the filter in the same way as
+ filtering based on function names. Thus, adding more functions
+ in a different module is accomplished by appending (>>) to the
+ filter file. Remove specific module functions by prepending
+ '!':
+
+ echo '!writeback*:mod:ext3' >> set_ftrace_filter
+
+- traceon/traceoff
+ These commands turn tracing on and off when the specified
+ functions are hit. The parameter determines how many times the
+ tracing system is turned on and off. If unspecified, there is
+ no limit. For example, to disable tracing when a schedule bug
+ is hit the first 5 times, run:
+
+ echo '__schedule_bug:traceoff:5' > set_ftrace_filter
+
+ To always disable tracing when __schedule_bug is hit:
+
+ echo '__schedule_bug:traceoff' > set_ftrace_filter
+
+ These commands are cumulative whether or not they are appended
+ to set_ftrace_filter. To remove a command, prepend it by '!'
+ and drop the parameter:
+
+ echo '!__schedule_bug:traceoff:0' > set_ftrace_filter
+
+ The above removes the traceoff command for __schedule_bug
+ that have a counter. To remove commands without counters:
+
+ echo '!__schedule_bug:traceoff' > set_ftrace_filter
+
+- snapshot
+ Will cause a snapshot to be triggered when the function is hit.
+
+ echo 'native_flush_tlb_others:snapshot' > set_ftrace_filter
+
+ To only snapshot once:
+
+ echo 'native_flush_tlb_others:snapshot:1' > set_ftrace_filter
+
+ To remove the above commands:
+
+ echo '!native_flush_tlb_others:snapshot' > set_ftrace_filter
+ echo '!native_flush_tlb_others:snapshot:0' > set_ftrace_filter
+
+- enable_event/disable_event
+ These commands can enable or disable a trace event. Note, because
+ function tracing callbacks are very sensitive, when these commands
+ are registered, the trace point is activated, but disabled in
+ a "soft" mode. That is, the tracepoint will be called, but
+ just will not be traced. The event tracepoint stays in this mode
+ as long as there's a command that triggers it.
+
+ echo 'try_to_wake_up:enable_event:sched:sched_switch:2' > \
+ set_ftrace_filter
+
+ The format is:
+
+ <function>:enable_event:<system>:<event>[:count]
+ <function>:disable_event:<system>:<event>[:count]
+
+ To remove the events commands:
+
+
+ echo '!try_to_wake_up:enable_event:sched:sched_switch:0' > \
+ set_ftrace_filter
+ echo '!schedule:disable_event:sched:sched_switch' > \
+ set_ftrace_filter
+
+- dump
+ When the function is hit, it will dump the contents of the ftrace
+ ring buffer to the console. This is useful if you need to debug
+ something, and want to dump the trace when a certain function
+ is hit. Perhaps its a function that is called before a tripple
+ fault happens and does not allow you to get a regular dump.
+
+- cpudump
+ When the function is hit, it will dump the contents of the ftrace
+ ring buffer for the current CPU to the console. Unlike the "dump"
+ command, it only prints out the contents of the ring buffer for the
+ CPU that executed the function that triggered the dump.
+
+trace_pipe
+----------
+
+The trace_pipe outputs the same content as the trace file, but
+the effect on the tracing is different. Every read from
+trace_pipe is consumed. This means that subsequent reads will be
+different. The trace is live.
+
+ # echo function > current_tracer
+ # cat trace_pipe > /tmp/trace.out &
+[1] 4153
+ # echo 1 > tracing_on
+ # usleep 1
+ # echo 0 > tracing_on
+ # cat trace
+# tracer: function
+#
+# entries-in-buffer/entries-written: 0/0 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+
+ #
+ # cat /tmp/trace.out
+ bash-1994 [000] .... 5281.568961: mutex_unlock <-rb_simple_write
+ bash-1994 [000] .... 5281.568963: __mutex_unlock_slowpath <-mutex_unlock
+ bash-1994 [000] .... 5281.568963: __fsnotify_parent <-fsnotify_modify
+ bash-1994 [000] .... 5281.568964: fsnotify <-fsnotify_modify
+ bash-1994 [000] .... 5281.568964: __srcu_read_lock <-fsnotify
+ bash-1994 [000] .... 5281.568964: add_preempt_count <-__srcu_read_lock
+ bash-1994 [000] ...1 5281.568965: sub_preempt_count <-__srcu_read_lock
+ bash-1994 [000] .... 5281.568965: __srcu_read_unlock <-fsnotify
+ bash-1994 [000] .... 5281.568967: sys_dup2 <-system_call_fastpath
+
+
+Note, reading the trace_pipe file will block until more input is
+added.
+
+trace entries
+-------------
+
+Having too much or not enough data can be troublesome in
+diagnosing an issue in the kernel. The file buffer_size_kb is
+used to modify the size of the internal trace buffers. The
+number listed is the number of entries that can be recorded per
+CPU. To know the full size, multiply the number of possible CPUs
+with the number of entries.
+
+ # cat buffer_size_kb
+1408 (units kilobytes)
+
+Or simply read buffer_total_size_kb
+
+ # cat buffer_total_size_kb
+5632
+
+To modify the buffer, simple echo in a number (in 1024 byte segments).
+
+ # echo 10000 > buffer_size_kb
+ # cat buffer_size_kb
+10000 (units kilobytes)
+
+It will try to allocate as much as possible. If you allocate too
+much, it can cause Out-Of-Memory to trigger.
+
+ # echo 1000000000000 > buffer_size_kb
+-bash: echo: write error: Cannot allocate memory
+ # cat buffer_size_kb
+85
+
+The per_cpu buffers can be changed individually as well:
+
+ # echo 10000 > per_cpu/cpu0/buffer_size_kb
+ # echo 100 > per_cpu/cpu1/buffer_size_kb
+
+When the per_cpu buffers are not the same, the buffer_size_kb
+at the top level will just show an X
+
+ # cat buffer_size_kb
+X
+
+This is where the buffer_total_size_kb is useful:
+
+ # cat buffer_total_size_kb
+12916
+
+Writing to the top level buffer_size_kb will reset all the buffers
+to be the same again.
+
+Snapshot
+--------
+CONFIG_TRACER_SNAPSHOT makes a generic snapshot feature
+available to all non latency tracers. (Latency tracers which
+record max latency, such as "irqsoff" or "wakeup", can't use
+this feature, since those are already using the snapshot
+mechanism internally.)
+
+Snapshot preserves a current trace buffer at a particular point
+in time without stopping tracing. Ftrace swaps the current
+buffer with a spare buffer, and tracing continues in the new
+current (=previous spare) buffer.
+
+The following debugfs files in "tracing" are related to this
+feature:
+
+ snapshot:
+
+ This is used to take a snapshot and to read the output
+ of the snapshot. Echo 1 into this file to allocate a
+ spare buffer and to take a snapshot (swap), then read
+ the snapshot from this file in the same format as
+ "trace" (described above in the section "The File
+ System"). Both reads snapshot and tracing are executable
+ in parallel. When the spare buffer is allocated, echoing
+ 0 frees it, and echoing else (positive) values clear the
+ snapshot contents.
+ More details are shown in the table below.
+
+ status\input | 0 | 1 | else |
+ --------------+------------+------------+------------+
+ not allocated |(do nothing)| alloc+swap |(do nothing)|
+ --------------+------------+------------+------------+
+ allocated | free | swap | clear |
+ --------------+------------+------------+------------+
+
+Here is an example of using the snapshot feature.
+
+ # echo 1 > events/sched/enable
+ # echo 1 > snapshot
+ # cat snapshot
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 71/71 #P:8
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ <idle>-0 [005] d... 2440.603828: sched_switch: prev_comm=swapper/5 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2242 next_prio=120
+ sleep-2242 [005] d... 2440.603846: sched_switch: prev_comm=snapshot-test-2 prev_pid=2242 prev_prio=120 prev_state=R ==> next_comm=kworker/5:1 next_pid=60 next_prio=120
+[...]
+ <idle>-0 [002] d... 2440.707230: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2229 next_prio=120
+
+ # cat trace
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 77/77 #P:8
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ <idle>-0 [007] d... 2440.707395: sched_switch: prev_comm=swapper/7 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=snapshot-test-2 next_pid=2243 next_prio=120
+ snapshot-test-2-2229 [002] d... 2440.707438: sched_switch: prev_comm=snapshot-test-2 prev_pid=2229 prev_prio=120 prev_state=S ==> next_comm=swapper/2 next_pid=0 next_prio=120
+[...]
+
+
+If you try to use this snapshot feature when current tracer is
+one of the latency tracers, you will get the following results.
+
+ # echo wakeup > current_tracer
+ # echo 1 > snapshot
+bash: echo: write error: Device or resource busy
+ # cat snapshot
+cat: snapshot: Device or resource busy
+
+
+Instances
+---------
+In the debugfs tracing directory is a directory called "instances".
+This directory can have new directories created inside of it using
+mkdir, and removing directories with rmdir. The directory created
+with mkdir in this directory will already contain files and other
+directories after it is created.
+
+ # mkdir instances/foo
+ # ls instances/foo
+buffer_size_kb buffer_total_size_kb events free_buffer per_cpu
+set_event snapshot trace trace_clock trace_marker trace_options
+trace_pipe tracing_on
+
+As you can see, the new directory looks similar to the tracing directory
+itself. In fact, it is very similar, except that the buffer and
+events are agnostic from the main director, or from any other
+instances that are created.
+
+The files in the new directory work just like the files with the
+same name in the tracing directory except the buffer that is used
+is a separate and new buffer. The files affect that buffer but do not
+affect the main buffer with the exception of trace_options. Currently,
+the trace_options affect all instances and the top level buffer
+the same, but this may change in future releases. That is, options
+may become specific to the instance they reside in.
+
+Notice that none of the function tracer files are there, nor is
+current_tracer and available_tracers. This is because the buffers
+can currently only have events enabled for them.
+
+ # mkdir instances/foo
+ # mkdir instances/bar
+ # mkdir instances/zoot
+ # echo 100000 > buffer_size_kb
+ # echo 1000 > instances/foo/buffer_size_kb
+ # echo 5000 > instances/bar/per_cpu/cpu1/buffer_size_kb
+ # echo function > current_trace
+ # echo 1 > instances/foo/events/sched/sched_wakeup/enable
+ # echo 1 > instances/foo/events/sched/sched_wakeup_new/enable
+ # echo 1 > instances/foo/events/sched/sched_switch/enable
+ # echo 1 > instances/bar/events/irq/enable
+ # echo 1 > instances/zoot/events/syscalls/enable
+ # cat trace_pipe
+CPU:2 [LOST 11745 EVENTS]
+ bash-2044 [002] .... 10594.481032: _raw_spin_lock_irqsave <-get_page_from_freelist
+ bash-2044 [002] d... 10594.481032: add_preempt_count <-_raw_spin_lock_irqsave
+ bash-2044 [002] d..1 10594.481032: __rmqueue <-get_page_from_freelist
+ bash-2044 [002] d..1 10594.481033: _raw_spin_unlock <-get_page_from_freelist
+ bash-2044 [002] d..1 10594.481033: sub_preempt_count <-_raw_spin_unlock
+ bash-2044 [002] d... 10594.481033: get_pageblock_flags_group <-get_pageblock_migratetype
+ bash-2044 [002] d... 10594.481034: __mod_zone_page_state <-get_page_from_freelist
+ bash-2044 [002] d... 10594.481034: zone_statistics <-get_page_from_freelist
+ bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics
+ bash-2044 [002] d... 10594.481034: __inc_zone_state <-zone_statistics
+ bash-2044 [002] .... 10594.481035: arch_dup_task_struct <-copy_process
+[...]
+
+ # cat instances/foo/trace_pipe
+ bash-1998 [000] d..4 136.676759: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000
+ bash-1998 [000] dN.4 136.676760: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000
+ <idle>-0 [003] d.h3 136.676906: sched_wakeup: comm=rcu_preempt pid=9 prio=120 success=1 target_cpu=003
+ <idle>-0 [003] d..3 136.676909: sched_switch: prev_comm=swapper/3 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=rcu_preempt next_pid=9 next_prio=120
+ rcu_preempt-9 [003] d..3 136.676916: sched_switch: prev_comm=rcu_preempt prev_pid=9 prev_prio=120 prev_state=S ==> next_comm=swapper/3 next_pid=0 next_prio=120
+ bash-1998 [000] d..4 136.677014: sched_wakeup: comm=kworker/0:1 pid=59 prio=120 success=1 target_cpu=000
+ bash-1998 [000] dN.4 136.677016: sched_wakeup: comm=bash pid=1998 prio=120 success=1 target_cpu=000
+ bash-1998 [000] d..3 136.677018: sched_switch: prev_comm=bash prev_pid=1998 prev_prio=120 prev_state=R+ ==> next_comm=kworker/0:1 next_pid=59 next_prio=120
+ kworker/0:1-59 [000] d..4 136.677022: sched_wakeup: comm=sshd pid=1995 prio=120 success=1 target_cpu=001
+ kworker/0:1-59 [000] d..3 136.677025: sched_switch: prev_comm=kworker/0:1 prev_pid=59 prev_prio=120 prev_state=S ==> next_comm=bash next_pid=1998 next_prio=120
+[...]
+
+ # cat instances/bar/trace_pipe
+ migration/1-14 [001] d.h3 138.732674: softirq_raise: vec=3 [action=NET_RX]
+ <idle>-0 [001] dNh3 138.732725: softirq_raise: vec=3 [action=NET_RX]
+ bash-1998 [000] d.h1 138.733101: softirq_raise: vec=1 [action=TIMER]
+ bash-1998 [000] d.h1 138.733102: softirq_raise: vec=9 [action=RCU]
+ bash-1998 [000] ..s2 138.733105: softirq_entry: vec=1 [action=TIMER]
+ bash-1998 [000] ..s2 138.733106: softirq_exit: vec=1 [action=TIMER]
+ bash-1998 [000] ..s2 138.733106: softirq_entry: vec=9 [action=RCU]
+ bash-1998 [000] ..s2 138.733109: softirq_exit: vec=9 [action=RCU]
+ sshd-1995 [001] d.h1 138.733278: irq_handler_entry: irq=21 name=uhci_hcd:usb4
+ sshd-1995 [001] d.h1 138.733280: irq_handler_exit: irq=21 ret=unhandled
+ sshd-1995 [001] d.h1 138.733281: irq_handler_entry: irq=21 name=eth0
+ sshd-1995 [001] d.h1 138.733283: irq_handler_exit: irq=21 ret=handled
+[...]
+
+ # cat instances/zoot/trace
+# tracer: nop
+#
+# entries-in-buffer/entries-written: 18996/18996 #P:4
+#
+# _-----=> irqs-off
+# / _----=> need-resched
+# | / _---=> hardirq/softirq
+# || / _--=> preempt-depth
+# ||| / delay
+# TASK-PID CPU# |||| TIMESTAMP FUNCTION
+# | | | |||| | |
+ bash-1998 [000] d... 140.733501: sys_write -> 0x2
+ bash-1998 [000] d... 140.733504: sys_dup2(oldfd: a, newfd: 1)
+ bash-1998 [000] d... 140.733506: sys_dup2 -> 0x1
+ bash-1998 [000] d... 140.733508: sys_fcntl(fd: a, cmd: 1, arg: 0)
+ bash-1998 [000] d... 140.733509: sys_fcntl -> 0x1
+ bash-1998 [000] d... 140.733510: sys_close(fd: a)
+ bash-1998 [000] d... 140.733510: sys_close -> 0x0
+ bash-1998 [000] d... 140.733514: sys_rt_sigprocmask(how: 0, nset: 0, oset: 6e2768, sigsetsize: 8)
+ bash-1998 [000] d... 140.733515: sys_rt_sigprocmask -> 0x0
+ bash-1998 [000] d... 140.733516: sys_rt_sigaction(sig: 2, act: 7fff718846f0, oact: 7fff71884650, sigsetsize: 8)
+ bash-1998 [000] d... 140.733516: sys_rt_sigaction -> 0x0
+
+You can see that the trace of the top most trace buffer shows only
+the function tracing. The foo instance displays wakeups and task
+switches.
+
+To remove the instances, simply delete their directories:
+
+ # rmdir instances/foo
+ # rmdir instances/bar
+ # rmdir instances/zoot
+
+Note, if a process has a trace file open in one of the instance
+directories, the rmdir will fail with EBUSY.
+
+
+Stack trace
+-----------
+Since the kernel has a fixed sized stack, it is important not to
+waste it in functions. A kernel developer must be conscience of
+what they allocate on the stack. If they add too much, the system
+can be in danger of a stack overflow, and corruption will occur,
+usually leading to a system panic.
+
+There are some tools that check this, usually with interrupts
+periodically checking usage. But if you can perform a check
+at every function call that will become very useful. As ftrace provides
+a function tracer, it makes it convenient to check the stack size
+at every function call. This is enabled via the stack tracer.
+
+CONFIG_STACK_TRACER enables the ftrace stack tracing functionality.
+To enable it, write a '1' into /proc/sys/kernel/stack_tracer_enabled.
+
+ # echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+You can also enable it from the kernel command line to trace
+the stack size of the kernel during boot up, by adding "stacktrace"
+to the kernel command line parameter.
+
+After running it for a few minutes, the output looks like:
+
+ # cat stack_max_size
+2928
+
+ # cat stack_trace
+ Depth Size Location (18 entries)
+ ----- ---- --------
+ 0) 2928 224 update_sd_lb_stats+0xbc/0x4ac
+ 1) 2704 160 find_busiest_group+0x31/0x1f1
+ 2) 2544 256 load_balance+0xd9/0x662
+ 3) 2288 80 idle_balance+0xbb/0x130
+ 4) 2208 128 __schedule+0x26e/0x5b9
+ 5) 2080 16 schedule+0x64/0x66
+ 6) 2064 128 schedule_timeout+0x34/0xe0
+ 7) 1936 112 wait_for_common+0x97/0xf1
+ 8) 1824 16 wait_for_completion+0x1d/0x1f
+ 9) 1808 128 flush_work+0xfe/0x119
+ 10) 1680 16 tty_flush_to_ldisc+0x1e/0x20
+ 11) 1664 48 input_available_p+0x1d/0x5c
+ 12) 1616 48 n_tty_poll+0x6d/0x134
+ 13) 1568 64 tty_poll+0x64/0x7f
+ 14) 1504 880 do_select+0x31e/0x511
+ 15) 624 400 core_sys_select+0x177/0x216
+ 16) 224 96 sys_select+0x91/0xb9
+ 17) 128 128 system_call_fastpath+0x16/0x1b
+
+Note, if -mfentry is being used by gcc, functions get traced before
+they set up the stack frame. This means that leaf level functions
+are not tested by the stack tracer when -mfentry is used.
+
+Currently, -mfentry is used by gcc 4.6.0 and above on x86 only.
+
+---------
+
+More details can be found in the source code, in the
+kernel/trace/*.c files.
diff --git a/Documentation/trace/function-graph-fold.vim b/Documentation/trace/function-graph-fold.vim
new file mode 100644
index 000000000..0544b504c
--- /dev/null
+++ b/Documentation/trace/function-graph-fold.vim
@@ -0,0 +1,42 @@
+" Enable folding for ftrace function_graph traces.
+"
+" To use, :source this file while viewing a function_graph trace, or use vim's
+" -S option to load from the command-line together with a trace. You can then
+" use the usual vim fold commands, such as "za", to open and close nested
+" functions. While closed, a fold will show the total time taken for a call,
+" as would normally appear on the line with the closing brace. Folded
+" functions will not include finish_task_switch(), so folding should remain
+" relatively sane even through a context switch.
+"
+" Note that this will almost certainly only work well with a
+" single-CPU trace (e.g. trace-cmd report --cpu 1).
+
+function! FunctionGraphFoldExpr(lnum)
+ let line = getline(a:lnum)
+ if line[-1:] == '{'
+ if line =~ 'finish_task_switch() {$'
+ return '>1'
+ endif
+ return 'a1'
+ elseif line[-1:] == '}'
+ return 's1'
+ else
+ return '='
+ endif
+endfunction
+
+function! FunctionGraphFoldText()
+ let s = split(getline(v:foldstart), '|', 1)
+ if getline(v:foldend+1) =~ 'finish_task_switch() {$'
+ let s[2] = ' task switch '
+ else
+ let e = split(getline(v:foldend), '|', 1)
+ let s[2] = e[2]
+ endif
+ return join(s, '|')
+endfunction
+
+setlocal foldexpr=FunctionGraphFoldExpr(v:lnum)
+setlocal foldtext=FunctionGraphFoldText()
+setlocal foldcolumn=12
+setlocal foldmethod=expr
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
new file mode 100644
index 000000000..d68ea5fc8
--- /dev/null
+++ b/Documentation/trace/kprobetrace.txt
@@ -0,0 +1,172 @@
+ Kprobe-based Event Tracing
+ ==========================
+
+ Documentation is written by Masami Hiramatsu
+
+
+Overview
+--------
+These events are similar to tracepoint based events. Instead of Tracepoint,
+this is based on kprobes (kprobe and kretprobe). So it can probe wherever
+kprobes can probe (this means, all functions body except for __kprobes
+functions). Unlike the Tracepoint based event, this can be added and removed
+dynamically, on the fly.
+
+To enable this feature, build your kernel with CONFIG_KPROBE_EVENT=y.
+
+Similar to the events tracer, this doesn't need to be activated via
+current_tracer. Instead of that, add probe points via
+/sys/kernel/debug/tracing/kprobe_events, and enable it via
+/sys/kernel/debug/tracing/events/kprobes/<EVENT>/enabled.
+
+
+Synopsis of kprobe_events
+-------------------------
+ p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
+ -:[GRP/]EVENT : Clear a probe
+
+ GRP : Group name. If omitted, use "kprobes" for it.
+ EVENT : Event name. If omitted, the event name is generated
+ based on SYM+offs or MEMADDR.
+ MOD : Module name which has given SYM.
+ SYM[+offs] : Symbol+offset where the probe is inserted.
+ MEMADDR : Address where the probe is inserted.
+
+ FETCHARGS : Arguments. Each probe can have up to 128 args.
+ %REG : Fetch register REG
+ @ADDR : Fetch memory at ADDR (ADDR should be in kernel)
+ @SYM[+|-offs] : Fetch memory at SYM +|- offs (SYM should be a data symbol)
+ $stackN : Fetch Nth entry of stack (N >= 0)
+ $stack : Fetch stack address.
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+ NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
+ FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
+ (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
+ are supported.
+
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
+
+Types
+-----
+Several types are supported for fetch-args. Kprobe tracer will access memory
+by given type. Prefix 's' and 'u' means those types are signed and unsigned
+respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+String type is a special type, which fetches a "null-terminated" string from
+kernel space. This means it will fail and store NULL if the string container
+has been paged out.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-
+offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
+
+Per-Probe Event Filtering
+-------------------------
+ Per-probe event filtering feature allows you to set different filter on each
+probe and gives you what arguments will be shown in trace buffer. If an event
+name is specified right after 'p:' or 'r:' in kprobe_events, it adds an event
+under tracing/events/kprobes/<EVENT>, at the directory you can see 'id',
+'enabled', 'format' and 'filter'.
+
+enabled:
+ You can enable/disable the probe by writing 1 or 0 on it.
+
+format:
+ This shows the format of this probe event.
+
+filter:
+ You can write filtering rules of this event.
+
+id:
+ This shows the id of this probe event.
+
+
+Event Profiling
+---------------
+ You can check the total number of probe hits and probe miss-hits via
+/sys/kernel/debug/tracing/kprobe_profile.
+ The first column is event name, the second is the number of probe hits,
+the third is the number of probe miss-hits.
+
+
+Usage examples
+--------------
+To add a probe as a new event, write a new definition to kprobe_events
+as below.
+
+ echo 'p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)' > /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kprobe on the top of do_sys_open() function with recording
+1st to 4th arguments as "myprobe" event. Note, which register/stack entry is
+assigned to each function argument depends on arch-specific ABI. If you unsure
+the ABI, please try to use probe subcommand of perf-tools (you can find it
+under tools/perf/).
+As this example shows, users can choose more familiar names for each arguments.
+
+ echo 'r:myretprobe do_sys_open $retval' >> /sys/kernel/debug/tracing/kprobe_events
+
+ This sets a kretprobe on the return point of do_sys_open() function with
+recording return value as "myretprobe" event.
+ You can see the format of these events via
+/sys/kernel/debug/tracing/events/kprobes/<EVENT>/format.
+
+ cat /sys/kernel/debug/tracing/events/kprobes/myprobe/format
+name: myprobe
+ID: 780
+format:
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1;signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+
+ field:unsigned long __probe_ip; offset:12; size:4; signed:0;
+ field:int __probe_nargs; offset:16; size:4; signed:1;
+ field:unsigned long dfd; offset:20; size:4; signed:0;
+ field:unsigned long filename; offset:24; size:4; signed:0;
+ field:unsigned long flags; offset:28; size:4; signed:0;
+ field:unsigned long mode; offset:32; size:4; signed:0;
+
+
+print fmt: "(%lx) dfd=%lx filename=%lx flags=%lx mode=%lx", REC->__probe_ip,
+REC->dfd, REC->filename, REC->flags, REC->mode
+
+ You can see that the event has 4 arguments as in the expressions you specified.
+
+ echo > /sys/kernel/debug/tracing/kprobe_events
+
+ This clears all probe points.
+
+ Or,
+
+ echo -:myprobe >> kprobe_events
+
+ This clears probe points selectively.
+
+ Right after definition, each event is disabled by default. For tracing these
+events, you need to enable it.
+
+ echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable
+ echo 1 > /sys/kernel/debug/tracing/events/kprobes/myretprobe/enable
+
+ And you can see the traced information via /sys/kernel/debug/tracing/trace.
+
+ cat /sys/kernel/debug/tracing/trace
+# tracer: nop
+#
+# TASK-PID CPU# TIMESTAMP FUNCTION
+# | | | | |
+ <...>-1447 [001] 1038282.286875: myprobe: (do_sys_open+0x0/0xd6) dfd=3 filename=7fffd1ec4440 flags=8000 mode=0
+ <...>-1447 [001] 1038282.286878: myretprobe: (sys_openat+0xc/0xe <- do_sys_open) $retval=fffffffffffffffe
+ <...>-1447 [001] 1038282.286885: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=40413c flags=8000 mode=1b6
+ <...>-1447 [001] 1038282.286915: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
+ <...>-1447 [001] 1038282.286969: myprobe: (do_sys_open+0x0/0xd6) dfd=ffffff9c filename=4041c6 flags=98800 mode=10
+ <...>-1447 [001] 1038282.286976: myretprobe: (sys_open+0x1b/0x1d <- do_sys_open) $retval=3
+
+
+ Each line shows when the kernel hits an event, and <- SYMBOL means kernel
+returns from SYMBOL(e.g. "sys_open+0x1b/0x1d <- do_sys_open" means kernel
+returns from do_sys_open to sys_open+0x1b).
+
diff --git a/Documentation/trace/mmiotrace.txt b/Documentation/trace/mmiotrace.txt
new file mode 100644
index 000000000..664e7386d
--- /dev/null
+++ b/Documentation/trace/mmiotrace.txt
@@ -0,0 +1,164 @@
+ In-kernel memory-mapped I/O tracing
+
+
+Home page and links to optional user space tools:
+
+ http://nouveau.freedesktop.org/wiki/MmioTrace
+
+MMIO tracing was originally developed by Intel around 2003 for their Fault
+Injection Test Harness. In Dec 2006 - Jan 2007, using the code from Intel,
+Jeff Muizelaar created a tool for tracing MMIO accesses with the Nouveau
+project in mind. Since then many people have contributed.
+
+Mmiotrace was built for reverse engineering any memory-mapped IO device with
+the Nouveau project as the first real user. Only x86 and x86_64 architectures
+are supported.
+
+Out-of-tree mmiotrace was originally modified for mainline inclusion and
+ftrace framework by Pekka Paalanen <pq@iki.fi>.
+
+
+Preparation
+-----------
+
+Mmiotrace feature is compiled in by the CONFIG_MMIOTRACE option. Tracing is
+disabled by default, so it is safe to have this set to yes. SMP systems are
+supported, but tracing is unreliable and may miss events if more than one CPU
+is on-line, therefore mmiotrace takes all but one CPU off-line during run-time
+activation. You can re-enable CPUs by hand, but you have been warned, there
+is no way to automatically detect if you are losing events due to CPUs racing.
+
+
+Usage Quick Reference
+---------------------
+
+$ mount -t debugfs debugfs /sys/kernel/debug
+$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer
+$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt &
+Start X or whatever.
+$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker
+$ echo nop > /sys/kernel/debug/tracing/current_tracer
+Check for lost events.
+
+
+Usage
+-----
+
+Make sure debugfs is mounted to /sys/kernel/debug.
+If not (requires root privileges):
+$ mount -t debugfs debugfs /sys/kernel/debug
+
+Check that the driver you are about to trace is not loaded.
+
+Activate mmiotrace (requires root privileges):
+$ echo mmiotrace > /sys/kernel/debug/tracing/current_tracer
+
+Start storing the trace:
+$ cat /sys/kernel/debug/tracing/trace_pipe > mydump.txt &
+The 'cat' process should stay running (sleeping) in the background.
+
+Load the driver you want to trace and use it. Mmiotrace will only catch MMIO
+accesses to areas that are ioremapped while mmiotrace is active.
+
+During tracing you can place comments (markers) into the trace by
+$ echo "X is up" > /sys/kernel/debug/tracing/trace_marker
+This makes it easier to see which part of the (huge) trace corresponds to
+which action. It is recommended to place descriptive markers about what you
+do.
+
+Shut down mmiotrace (requires root privileges):
+$ echo nop > /sys/kernel/debug/tracing/current_tracer
+The 'cat' process exits. If it does not, kill it by issuing 'fg' command and
+pressing ctrl+c.
+
+Check that mmiotrace did not lose events due to a buffer filling up. Either
+$ grep -i lost mydump.txt
+which tells you exactly how many events were lost, or use
+$ dmesg
+to view your kernel log and look for "mmiotrace has lost events" warning. If
+events were lost, the trace is incomplete. You should enlarge the buffers and
+try again. Buffers are enlarged by first seeing how large the current buffers
+are:
+$ cat /sys/kernel/debug/tracing/buffer_size_kb
+gives you a number. Approximately double this number and write it back, for
+instance:
+$ echo 128000 > /sys/kernel/debug/tracing/buffer_size_kb
+Then start again from the top.
+
+If you are doing a trace for a driver project, e.g. Nouveau, you should also
+do the following before sending your results:
+$ lspci -vvv > lspci.txt
+$ dmesg > dmesg.txt
+$ tar zcf pciid-nick-mmiotrace.tar.gz mydump.txt lspci.txt dmesg.txt
+and then send the .tar.gz file. The trace compresses considerably. Replace
+"pciid" and "nick" with the PCI ID or model name of your piece of hardware
+under investigation and your nickname.
+
+
+How Mmiotrace Works
+-------------------
+
+Access to hardware IO-memory is gained by mapping addresses from PCI bus by
+calling one of the ioremap_*() functions. Mmiotrace is hooked into the
+__ioremap() function and gets called whenever a mapping is created. Mapping is
+an event that is recorded into the trace log. Note that ISA range mappings
+are not caught, since the mapping always exists and is returned directly.
+
+MMIO accesses are recorded via page faults. Just before __ioremap() returns,
+the mapped pages are marked as not present. Any access to the pages causes a
+fault. The page fault handler calls mmiotrace to handle the fault. Mmiotrace
+marks the page present, sets TF flag to achieve single stepping and exits the
+fault handler. The instruction that faulted is executed and debug trap is
+entered. Here mmiotrace again marks the page as not present. The instruction
+is decoded to get the type of operation (read/write), data width and the value
+read or written. These are stored to the trace log.
+
+Setting the page present in the page fault handler has a race condition on SMP
+machines. During the single stepping other CPUs may run freely on that page
+and events can be missed without a notice. Re-enabling other CPUs during
+tracing is discouraged.
+
+
+Trace Log Format
+----------------
+
+The raw log is text and easily filtered with e.g. grep and awk. One record is
+one line in the log. A record starts with a keyword, followed by keyword-
+dependent arguments. Arguments are separated by a space, or continue until the
+end of line. The format for version 20070824 is as follows:
+
+Explanation Keyword Space-separated arguments
+---------------------------------------------------------------------------
+
+read event R width, timestamp, map id, physical, value, PC, PID
+write event W width, timestamp, map id, physical, value, PC, PID
+ioremap event MAP timestamp, map id, physical, virtual, length, PC, PID
+iounmap event UNMAP timestamp, map id, PC, PID
+marker MARK timestamp, text
+version VERSION the string "20070824"
+info for reader LSPCI one line from lspci -v
+PCI address map PCIDEV space-separated /proc/bus/pci/devices data
+unk. opcode UNKNOWN timestamp, map id, physical, data, PC, PID
+
+Timestamp is in seconds with decimals. Physical is a PCI bus address, virtual
+is a kernel virtual address. Width is the data width in bytes and value is the
+data value. Map id is an arbitrary id number identifying the mapping that was
+used in an operation. PC is the program counter and PID is process id. PC is
+zero if it is not recorded. PID is always zero as tracing MMIO accesses
+originating in user space memory is not yet supported.
+
+For instance, the following awk filter will pass all 32-bit writes that target
+physical addresses in the range [0xfb73ce40, 0xfb800000[
+
+$ awk '/W 4 / { adr=strtonum($5); if (adr >= 0xfb73ce40 &&
+adr < 0xfb800000) print; }'
+
+
+Tools for Developers
+--------------------
+
+The user space tools include utilities for:
+- replacing numeric addresses and values with hardware register names
+- replaying MMIO logs, i.e., re-executing the recorded writes
+
+
diff --git a/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
new file mode 100644
index 000000000..0a120aae3
--- /dev/null
+++ b/Documentation/trace/postprocess/trace-pagealloc-postprocess.pl
@@ -0,0 +1,418 @@
+#!/usr/bin/perl
+# This is a POC (proof of concept or piece of crap, take your pick) for reading the
+# text representation of trace output related to page allocation. It makes an attempt
+# to extract some high-level information on what is going on. The accuracy of the parser
+# may vary considerably
+#
+# Example usage: trace-pagealloc-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
+# other options
+# --prepend-parent Report on the parent proc and PID
+# --read-procstat If the trace lacks process info, get it from /proc
+# --ignore-pid Aggregate processes of the same name together
+#
+# Copyright (c) IBM Corporation 2009
+# Author: Mel Gorman <mel@csn.ul.ie>
+use strict;
+use Getopt::Long;
+
+# Tracepoint events
+use constant MM_PAGE_ALLOC => 1;
+use constant MM_PAGE_FREE => 2;
+use constant MM_PAGE_FREE_BATCHED => 3;
+use constant MM_PAGE_PCPU_DRAIN => 4;
+use constant MM_PAGE_ALLOC_ZONE_LOCKED => 5;
+use constant MM_PAGE_ALLOC_EXTFRAG => 6;
+use constant EVENT_UNKNOWN => 7;
+
+# Constants used to track state
+use constant STATE_PCPU_PAGES_DRAINED => 8;
+use constant STATE_PCPU_PAGES_REFILLED => 9;
+
+# High-level events extrapolated from tracepoints
+use constant HIGH_PCPU_DRAINS => 10;
+use constant HIGH_PCPU_REFILLS => 11;
+use constant HIGH_EXT_FRAGMENT => 12;
+use constant HIGH_EXT_FRAGMENT_SEVERE => 13;
+use constant HIGH_EXT_FRAGMENT_MODERATE => 14;
+use constant HIGH_EXT_FRAGMENT_CHANGED => 15;
+
+my %perprocesspid;
+my %perprocess;
+my $opt_ignorepid;
+my $opt_read_procstat;
+my $opt_prepend_parent;
+
+# Catch sigint and exit on request
+my $sigint_report = 0;
+my $sigint_exit = 0;
+my $sigint_pending = 0;
+my $sigint_received = 0;
+sub sigint_handler {
+ my $current_time = time;
+ if ($current_time - 2 > $sigint_received) {
+ print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
+ $sigint_report = 1;
+ } else {
+ if (!$sigint_exit) {
+ print "Second SIGINT received quickly, exiting\n";
+ }
+ $sigint_exit++;
+ }
+
+ if ($sigint_exit > 3) {
+ print "Many SIGINTs received, exiting now without report\n";
+ exit;
+ }
+
+ $sigint_received = $current_time;
+ $sigint_pending = 1;
+}
+$SIG{INT} = "sigint_handler";
+
+# Parse command line options
+GetOptions(
+ 'ignore-pid' => \$opt_ignorepid,
+ 'read-procstat' => \$opt_read_procstat,
+ 'prepend-parent' => \$opt_prepend_parent,
+);
+
+# Defaults for dynamically discovered regex's
+my $regex_fragdetails_default = 'page=([0-9a-f]*) pfn=([0-9]*) alloc_order=([-0-9]*) fallback_order=([-0-9]*) pageblock_order=([-0-9]*) alloc_migratetype=([-0-9]*) fallback_migratetype=([-0-9]*) fragmenting=([-0-9]) change_ownership=([-0-9])';
+
+# Dyanically discovered regex
+my $regex_fragdetails;
+
+# Static regex used. Specified like this for readability and for use with /o
+# (process_pid) (cpus ) ( time ) (tpoint ) (details)
+my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
+my $regex_statname = '[-0-9]*\s\((.*)\).*';
+my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
+
+sub generate_traceevent_regex {
+ my $event = shift;
+ my $default = shift;
+ my $regex;
+
+ # Read the event format or use the default
+ if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
+ $regex = $default;
+ } else {
+ my $line;
+ while (!eof(FORMAT)) {
+ $line = <FORMAT>;
+ if ($line =~ /^print fmt:\s"(.*)",.*/) {
+ $regex = $1;
+ $regex =~ s/%p/\([0-9a-f]*\)/g;
+ $regex =~ s/%d/\([-0-9]*\)/g;
+ $regex =~ s/%lu/\([0-9]*\)/g;
+ }
+ }
+ }
+
+ # Verify fields are in the right order
+ my $tuple;
+ foreach $tuple (split /\s/, $regex) {
+ my ($key, $value) = split(/=/, $tuple);
+ my $expected = shift;
+ if ($key ne $expected) {
+ print("WARNING: Format not as expected '$key' != '$expected'");
+ $regex =~ s/$key=\((.*)\)/$key=$1/;
+ }
+ }
+
+ if (defined shift) {
+ die("Fewer fields than expected in format");
+ }
+
+ return $regex;
+}
+$regex_fragdetails = generate_traceevent_regex("kmem/mm_page_alloc_extfrag",
+ $regex_fragdetails_default,
+ "page", "pfn",
+ "alloc_order", "fallback_order", "pageblock_order",
+ "alloc_migratetype", "fallback_migratetype",
+ "fragmenting", "change_ownership");
+
+sub read_statline($) {
+ my $pid = $_[0];
+ my $statline;
+
+ if (open(STAT, "/proc/$pid/stat")) {
+ $statline = <STAT>;
+ close(STAT);
+ }
+
+ if ($statline eq '') {
+ $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
+ }
+
+ return $statline;
+}
+
+sub guess_process_pid($$) {
+ my $pid = $_[0];
+ my $statline = $_[1];
+
+ if ($pid == 0) {
+ return "swapper-0";
+ }
+
+ if ($statline !~ /$regex_statname/o) {
+ die("Failed to math stat line for process name :: $statline");
+ }
+ return "$1-$pid";
+}
+
+sub parent_info($$) {
+ my $pid = $_[0];
+ my $statline = $_[1];
+ my $ppid;
+
+ if ($pid == 0) {
+ return "NOPARENT-0";
+ }
+
+ if ($statline !~ /$regex_statppid/o) {
+ die("Failed to match stat line process ppid:: $statline");
+ }
+
+ # Read the ppid stat line
+ $ppid = $1;
+ return guess_process_pid($ppid, read_statline($ppid));
+}
+
+sub process_events {
+ my $traceevent;
+ my $process_pid;
+ my $cpus;
+ my $timestamp;
+ my $tracepoint;
+ my $details;
+ my $statline;
+
+ # Read each line of the event log
+EVENT_PROCESS:
+ while ($traceevent = <STDIN>) {
+ if ($traceevent =~ /$regex_traceevent/o) {
+ $process_pid = $1;
+ $tracepoint = $4;
+
+ if ($opt_read_procstat || $opt_prepend_parent) {
+ $process_pid =~ /(.*)-([0-9]*)$/;
+ my $process = $1;
+ my $pid = $2;
+
+ $statline = read_statline($pid);
+
+ if ($opt_read_procstat && $process eq '') {
+ $process_pid = guess_process_pid($pid, $statline);
+ }
+
+ if ($opt_prepend_parent) {
+ $process_pid = parent_info($pid, $statline) . " :: $process_pid";
+ }
+ }
+
+ # Unnecessary in this script. Uncomment if required
+ # $cpus = $2;
+ # $timestamp = $3;
+ } else {
+ next;
+ }
+
+ # Perl Switch() sucks majorly
+ if ($tracepoint eq "mm_page_alloc") {
+ $perprocesspid{$process_pid}->{MM_PAGE_ALLOC}++;
+ } elsif ($tracepoint eq "mm_page_free") {
+ $perprocesspid{$process_pid}->{MM_PAGE_FREE}++
+ } elsif ($tracepoint eq "mm_page_free_batched") {
+ $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED}++;
+ } elsif ($tracepoint eq "mm_page_pcpu_drain") {
+ $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN}++;
+ $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED}++;
+ } elsif ($tracepoint eq "mm_page_alloc_zone_locked") {
+ $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED}++;
+ $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED}++;
+ } elsif ($tracepoint eq "mm_page_alloc_extfrag") {
+
+ # Extract the details of the event now
+ $details = $5;
+
+ my ($page, $pfn);
+ my ($alloc_order, $fallback_order, $pageblock_order);
+ my ($alloc_migratetype, $fallback_migratetype);
+ my ($fragmenting, $change_ownership);
+
+ if ($details !~ /$regex_fragdetails/o) {
+ print "WARNING: Failed to parse mm_page_alloc_extfrag as expected\n";
+ next;
+ }
+
+ $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG}++;
+ $page = $1;
+ $pfn = $2;
+ $alloc_order = $3;
+ $fallback_order = $4;
+ $pageblock_order = $5;
+ $alloc_migratetype = $6;
+ $fallback_migratetype = $7;
+ $fragmenting = $8;
+ $change_ownership = $9;
+
+ if ($fragmenting) {
+ $perprocesspid{$process_pid}->{HIGH_EXT_FRAG}++;
+ if ($fallback_order <= 3) {
+ $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE}++;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE}++;
+ }
+ }
+ if ($change_ownership) {
+ $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED}++;
+ }
+ } else {
+ $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
+ }
+
+ # Catch a full pcpu drain event
+ if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} &&
+ $tracepoint ne "mm_page_pcpu_drain") {
+
+ $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS}++;
+ $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
+ }
+
+ # Catch a full pcpu refill event
+ if ($perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} &&
+ $tracepoint ne "mm_page_alloc_zone_locked") {
+ $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS}++;
+ $perprocesspid{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
+ }
+
+ if ($sigint_pending) {
+ last EVENT_PROCESS;
+ }
+ }
+}
+
+sub dump_stats {
+ my $hashref = shift;
+ my %stats = %$hashref;
+
+ # Dump per-process stats
+ my $process_pid;
+ my $max_strlen = 0;
+
+ # Get the maximum process name
+ foreach $process_pid (keys %perprocesspid) {
+ my $len = length($process_pid);
+ if ($len > $max_strlen) {
+ $max_strlen = $len;
+ }
+ }
+ $max_strlen += 2;
+
+ printf("\n");
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
+ "Process", "Pages", "Pages", "Pages", "Pages", "PCPU", "PCPU", "PCPU", "Fragment", "Fragment", "MigType", "Fragment", "Fragment", "Unknown");
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
+ "details", "allocd", "allocd", "freed", "freed", "pages", "drains", "refills", "Fallback", "Causing", "Changed", "Severe", "Moderate", "");
+
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
+ "", "", "under lock", "direct", "pagevec", "drain", "", "", "", "", "", "", "", "");
+
+ foreach $process_pid (keys %stats) {
+ # Dump final aggregates
+ if ($stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED}) {
+ $stats{$process_pid}->{HIGH_PCPU_DRAINS}++;
+ $stats{$process_pid}->{STATE_PCPU_PAGES_DRAINED} = 0;
+ }
+ if ($stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED}) {
+ $stats{$process_pid}->{HIGH_PCPU_REFILLS}++;
+ $stats{$process_pid}->{STATE_PCPU_PAGES_REFILLED} = 0;
+ }
+
+ printf("%-" . $max_strlen . "s %8d %10d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d %8d\n",
+ $process_pid,
+ $stats{$process_pid}->{MM_PAGE_ALLOC},
+ $stats{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED},
+ $stats{$process_pid}->{MM_PAGE_FREE},
+ $stats{$process_pid}->{MM_PAGE_FREE_BATCHED},
+ $stats{$process_pid}->{MM_PAGE_PCPU_DRAIN},
+ $stats{$process_pid}->{HIGH_PCPU_DRAINS},
+ $stats{$process_pid}->{HIGH_PCPU_REFILLS},
+ $stats{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG},
+ $stats{$process_pid}->{HIGH_EXT_FRAG},
+ $stats{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED},
+ $stats{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE},
+ $stats{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE},
+ $stats{$process_pid}->{EVENT_UNKNOWN});
+ }
+}
+
+sub aggregate_perprocesspid() {
+ my $process_pid;
+ my $process;
+ undef %perprocess;
+
+ foreach $process_pid (keys %perprocesspid) {
+ $process = $process_pid;
+ $process =~ s/-([0-9])*$//;
+ if ($process eq '') {
+ $process = "NO_PROCESS_NAME";
+ }
+
+ $perprocess{$process}->{MM_PAGE_ALLOC} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC};
+ $perprocess{$process}->{MM_PAGE_ALLOC_ZONE_LOCKED} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_ZONE_LOCKED};
+ $perprocess{$process}->{MM_PAGE_FREE} += $perprocesspid{$process_pid}->{MM_PAGE_FREE};
+ $perprocess{$process}->{MM_PAGE_FREE_BATCHED} += $perprocesspid{$process_pid}->{MM_PAGE_FREE_BATCHED};
+ $perprocess{$process}->{MM_PAGE_PCPU_DRAIN} += $perprocesspid{$process_pid}->{MM_PAGE_PCPU_DRAIN};
+ $perprocess{$process}->{HIGH_PCPU_DRAINS} += $perprocesspid{$process_pid}->{HIGH_PCPU_DRAINS};
+ $perprocess{$process}->{HIGH_PCPU_REFILLS} += $perprocesspid{$process_pid}->{HIGH_PCPU_REFILLS};
+ $perprocess{$process}->{MM_PAGE_ALLOC_EXTFRAG} += $perprocesspid{$process_pid}->{MM_PAGE_ALLOC_EXTFRAG};
+ $perprocess{$process}->{HIGH_EXT_FRAG} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAG};
+ $perprocess{$process}->{HIGH_EXT_FRAGMENT_CHANGED} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_CHANGED};
+ $perprocess{$process}->{HIGH_EXT_FRAGMENT_SEVERE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_SEVERE};
+ $perprocess{$process}->{HIGH_EXT_FRAGMENT_MODERATE} += $perprocesspid{$process_pid}->{HIGH_EXT_FRAGMENT_MODERATE};
+ $perprocess{$process}->{EVENT_UNKNOWN} += $perprocesspid{$process_pid}->{EVENT_UNKNOWN};
+ }
+}
+
+sub report() {
+ if (!$opt_ignorepid) {
+ dump_stats(\%perprocesspid);
+ } else {
+ aggregate_perprocesspid();
+ dump_stats(\%perprocess);
+ }
+}
+
+# Process events or signals until neither is available
+sub signal_loop() {
+ my $sigint_processed;
+ do {
+ $sigint_processed = 0;
+ process_events();
+
+ # Handle pending signals if any
+ if ($sigint_pending) {
+ my $current_time = time;
+
+ if ($sigint_exit) {
+ print "Received exit signal\n";
+ $sigint_pending = 0;
+ }
+ if ($sigint_report) {
+ if ($current_time >= $sigint_received + 2) {
+ report();
+ $sigint_report = 0;
+ $sigint_pending = 0;
+ $sigint_processed = 1;
+ }
+ }
+ }
+ } while ($sigint_pending || $sigint_processed);
+}
+
+signal_loop();
+report();
diff --git a/Documentation/trace/postprocess/trace-vmscan-postprocess.pl b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
new file mode 100644
index 000000000..8f961ef2b
--- /dev/null
+++ b/Documentation/trace/postprocess/trace-vmscan-postprocess.pl
@@ -0,0 +1,757 @@
+#!/usr/bin/perl
+# This is a POC for reading the text representation of trace output related to
+# page reclaim. It makes an attempt to extract some high-level information on
+# what is going on. The accuracy of the parser may vary
+#
+# Example usage: trace-vmscan-postprocess.pl < /sys/kernel/debug/tracing/trace_pipe
+# other options
+# --read-procstat If the trace lacks process info, get it from /proc
+# --ignore-pid Aggregate processes of the same name together
+#
+# Copyright (c) IBM Corporation 2009
+# Author: Mel Gorman <mel@csn.ul.ie>
+use strict;
+use Getopt::Long;
+
+# Tracepoint events
+use constant MM_VMSCAN_DIRECT_RECLAIM_BEGIN => 1;
+use constant MM_VMSCAN_DIRECT_RECLAIM_END => 2;
+use constant MM_VMSCAN_KSWAPD_WAKE => 3;
+use constant MM_VMSCAN_KSWAPD_SLEEP => 4;
+use constant MM_VMSCAN_LRU_SHRINK_ACTIVE => 5;
+use constant MM_VMSCAN_LRU_SHRINK_INACTIVE => 6;
+use constant MM_VMSCAN_LRU_ISOLATE => 7;
+use constant MM_VMSCAN_WRITEPAGE_FILE_SYNC => 8;
+use constant MM_VMSCAN_WRITEPAGE_ANON_SYNC => 9;
+use constant MM_VMSCAN_WRITEPAGE_FILE_ASYNC => 10;
+use constant MM_VMSCAN_WRITEPAGE_ANON_ASYNC => 11;
+use constant MM_VMSCAN_WRITEPAGE_ASYNC => 12;
+use constant EVENT_UNKNOWN => 13;
+
+# Per-order events
+use constant MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER => 11;
+use constant MM_VMSCAN_WAKEUP_KSWAPD_PERORDER => 12;
+use constant MM_VMSCAN_KSWAPD_WAKE_PERORDER => 13;
+use constant HIGH_KSWAPD_REWAKEUP_PERORDER => 14;
+
+# Constants used to track state
+use constant STATE_DIRECT_BEGIN => 15;
+use constant STATE_DIRECT_ORDER => 16;
+use constant STATE_KSWAPD_BEGIN => 17;
+use constant STATE_KSWAPD_ORDER => 18;
+
+# High-level events extrapolated from tracepoints
+use constant HIGH_DIRECT_RECLAIM_LATENCY => 19;
+use constant HIGH_KSWAPD_LATENCY => 20;
+use constant HIGH_KSWAPD_REWAKEUP => 21;
+use constant HIGH_NR_SCANNED => 22;
+use constant HIGH_NR_TAKEN => 23;
+use constant HIGH_NR_RECLAIMED => 24;
+use constant HIGH_NR_FILE_SCANNED => 25;
+use constant HIGH_NR_ANON_SCANNED => 26;
+use constant HIGH_NR_FILE_RECLAIMED => 27;
+use constant HIGH_NR_ANON_RECLAIMED => 28;
+
+my %perprocesspid;
+my %perprocess;
+my %last_procmap;
+my $opt_ignorepid;
+my $opt_read_procstat;
+
+my $total_wakeup_kswapd;
+my ($total_direct_reclaim, $total_direct_nr_scanned);
+my ($total_direct_nr_file_scanned, $total_direct_nr_anon_scanned);
+my ($total_direct_latency, $total_kswapd_latency);
+my ($total_direct_nr_reclaimed);
+my ($total_direct_nr_file_reclaimed, $total_direct_nr_anon_reclaimed);
+my ($total_direct_writepage_file_sync, $total_direct_writepage_file_async);
+my ($total_direct_writepage_anon_sync, $total_direct_writepage_anon_async);
+my ($total_kswapd_nr_scanned, $total_kswapd_wake);
+my ($total_kswapd_nr_file_scanned, $total_kswapd_nr_anon_scanned);
+my ($total_kswapd_writepage_file_sync, $total_kswapd_writepage_file_async);
+my ($total_kswapd_writepage_anon_sync, $total_kswapd_writepage_anon_async);
+my ($total_kswapd_nr_reclaimed);
+my ($total_kswapd_nr_file_reclaimed, $total_kswapd_nr_anon_reclaimed);
+
+# Catch sigint and exit on request
+my $sigint_report = 0;
+my $sigint_exit = 0;
+my $sigint_pending = 0;
+my $sigint_received = 0;
+sub sigint_handler {
+ my $current_time = time;
+ if ($current_time - 2 > $sigint_received) {
+ print "SIGINT received, report pending. Hit ctrl-c again to exit\n";
+ $sigint_report = 1;
+ } else {
+ if (!$sigint_exit) {
+ print "Second SIGINT received quickly, exiting\n";
+ }
+ $sigint_exit++;
+ }
+
+ if ($sigint_exit > 3) {
+ print "Many SIGINTs received, exiting now without report\n";
+ exit;
+ }
+
+ $sigint_received = $current_time;
+ $sigint_pending = 1;
+}
+$SIG{INT} = "sigint_handler";
+
+# Parse command line options
+GetOptions(
+ 'ignore-pid' => \$opt_ignorepid,
+ 'read-procstat' => \$opt_read_procstat,
+);
+
+# Defaults for dynamically discovered regex's
+my $regex_direct_begin_default = 'order=([0-9]*) may_writepage=([0-9]*) gfp_flags=([A-Z_|]*)';
+my $regex_direct_end_default = 'nr_reclaimed=([0-9]*)';
+my $regex_kswapd_wake_default = 'nid=([0-9]*) order=([0-9]*)';
+my $regex_kswapd_sleep_default = 'nid=([0-9]*)';
+my $regex_wakeup_kswapd_default = 'nid=([0-9]*) zid=([0-9]*) order=([0-9]*)';
+my $regex_lru_isolate_default = 'isolate_mode=([0-9]*) order=([0-9]*) nr_requested=([0-9]*) nr_scanned=([0-9]*) nr_taken=([0-9]*) file=([0-9]*)';
+my $regex_lru_shrink_inactive_default = 'nid=([0-9]*) zid=([0-9]*) nr_scanned=([0-9]*) nr_reclaimed=([0-9]*) priority=([0-9]*) flags=([A-Z_|]*)';
+my $regex_lru_shrink_active_default = 'lru=([A-Z_]*) nr_scanned=([0-9]*) nr_rotated=([0-9]*) priority=([0-9]*)';
+my $regex_writepage_default = 'page=([0-9a-f]*) pfn=([0-9]*) flags=([A-Z_|]*)';
+
+# Dyanically discovered regex
+my $regex_direct_begin;
+my $regex_direct_end;
+my $regex_kswapd_wake;
+my $regex_kswapd_sleep;
+my $regex_wakeup_kswapd;
+my $regex_lru_isolate;
+my $regex_lru_shrink_inactive;
+my $regex_lru_shrink_active;
+my $regex_writepage;
+
+# Static regex used. Specified like this for readability and for use with /o
+# (process_pid) (cpus ) ( time ) (tpoint ) (details)
+my $regex_traceevent = '\s*([a-zA-Z0-9-]*)\s*(\[[0-9]*\])(\s*[dX.][Nnp.][Hhs.][0-9a-fA-F.]*|)\s*([0-9.]*):\s*([a-zA-Z_]*):\s*(.*)';
+my $regex_statname = '[-0-9]*\s\((.*)\).*';
+my $regex_statppid = '[-0-9]*\s\(.*\)\s[A-Za-z]\s([0-9]*).*';
+
+sub generate_traceevent_regex {
+ my $event = shift;
+ my $default = shift;
+ my $regex;
+
+ # Read the event format or use the default
+ if (!open (FORMAT, "/sys/kernel/debug/tracing/events/$event/format")) {
+ print("WARNING: Event $event format string not found\n");
+ return $default;
+ } else {
+ my $line;
+ while (!eof(FORMAT)) {
+ $line = <FORMAT>;
+ $line =~ s/, REC->.*//;
+ if ($line =~ /^print fmt:\s"(.*)".*/) {
+ $regex = $1;
+ $regex =~ s/%s/\([0-9a-zA-Z|_]*\)/g;
+ $regex =~ s/%p/\([0-9a-f]*\)/g;
+ $regex =~ s/%d/\([-0-9]*\)/g;
+ $regex =~ s/%ld/\([-0-9]*\)/g;
+ $regex =~ s/%lu/\([0-9]*\)/g;
+ }
+ }
+ }
+
+ # Can't handle the print_flags stuff but in the context of this
+ # script, it really doesn't matter
+ $regex =~ s/\(REC.*\) \? __print_flags.*//;
+
+ # Verify fields are in the right order
+ my $tuple;
+ foreach $tuple (split /\s/, $regex) {
+ my ($key, $value) = split(/=/, $tuple);
+ my $expected = shift;
+ if ($key ne $expected) {
+ print("WARNING: Format not as expected for event $event '$key' != '$expected'\n");
+ $regex =~ s/$key=\((.*)\)/$key=$1/;
+ }
+ }
+
+ if (defined shift) {
+ die("Fewer fields than expected in format");
+ }
+
+ return $regex;
+}
+
+$regex_direct_begin = generate_traceevent_regex(
+ "vmscan/mm_vmscan_direct_reclaim_begin",
+ $regex_direct_begin_default,
+ "order", "may_writepage",
+ "gfp_flags");
+$regex_direct_end = generate_traceevent_regex(
+ "vmscan/mm_vmscan_direct_reclaim_end",
+ $regex_direct_end_default,
+ "nr_reclaimed");
+$regex_kswapd_wake = generate_traceevent_regex(
+ "vmscan/mm_vmscan_kswapd_wake",
+ $regex_kswapd_wake_default,
+ "nid", "order");
+$regex_kswapd_sleep = generate_traceevent_regex(
+ "vmscan/mm_vmscan_kswapd_sleep",
+ $regex_kswapd_sleep_default,
+ "nid");
+$regex_wakeup_kswapd = generate_traceevent_regex(
+ "vmscan/mm_vmscan_wakeup_kswapd",
+ $regex_wakeup_kswapd_default,
+ "nid", "zid", "order");
+$regex_lru_isolate = generate_traceevent_regex(
+ "vmscan/mm_vmscan_lru_isolate",
+ $regex_lru_isolate_default,
+ "isolate_mode", "order",
+ "nr_requested", "nr_scanned", "nr_taken",
+ "file");
+$regex_lru_shrink_inactive = generate_traceevent_regex(
+ "vmscan/mm_vmscan_lru_shrink_inactive",
+ $regex_lru_shrink_inactive_default,
+ "nid", "zid",
+ "nr_scanned", "nr_reclaimed", "priority",
+ "flags");
+$regex_lru_shrink_active = generate_traceevent_regex(
+ "vmscan/mm_vmscan_lru_shrink_active",
+ $regex_lru_shrink_active_default,
+ "nid", "zid",
+ "lru",
+ "nr_scanned", "nr_rotated", "priority");
+$regex_writepage = generate_traceevent_regex(
+ "vmscan/mm_vmscan_writepage",
+ $regex_writepage_default,
+ "page", "pfn", "flags");
+
+sub read_statline($) {
+ my $pid = $_[0];
+ my $statline;
+
+ if (open(STAT, "/proc/$pid/stat")) {
+ $statline = <STAT>;
+ close(STAT);
+ }
+
+ if ($statline eq '') {
+ $statline = "-1 (UNKNOWN_PROCESS_NAME) R 0";
+ }
+
+ return $statline;
+}
+
+sub guess_process_pid($$) {
+ my $pid = $_[0];
+ my $statline = $_[1];
+
+ if ($pid == 0) {
+ return "swapper-0";
+ }
+
+ if ($statline !~ /$regex_statname/o) {
+ die("Failed to math stat line for process name :: $statline");
+ }
+ return "$1-$pid";
+}
+
+# Convert sec.usec timestamp format
+sub timestamp_to_ms($) {
+ my $timestamp = $_[0];
+
+ my ($sec, $usec) = split (/\./, $timestamp);
+ return ($sec * 1000) + ($usec / 1000);
+}
+
+sub process_events {
+ my $traceevent;
+ my $process_pid;
+ my $cpus;
+ my $timestamp;
+ my $tracepoint;
+ my $details;
+ my $statline;
+
+ # Read each line of the event log
+EVENT_PROCESS:
+ while ($traceevent = <STDIN>) {
+ if ($traceevent =~ /$regex_traceevent/o) {
+ $process_pid = $1;
+ $timestamp = $4;
+ $tracepoint = $5;
+
+ $process_pid =~ /(.*)-([0-9]*)$/;
+ my $process = $1;
+ my $pid = $2;
+
+ if ($process eq "") {
+ $process = $last_procmap{$pid};
+ $process_pid = "$process-$pid";
+ }
+ $last_procmap{$pid} = $process;
+
+ if ($opt_read_procstat) {
+ $statline = read_statline($pid);
+ if ($opt_read_procstat && $process eq '') {
+ $process_pid = guess_process_pid($pid, $statline);
+ }
+ }
+ } else {
+ next;
+ }
+
+ # Perl Switch() sucks majorly
+ if ($tracepoint eq "mm_vmscan_direct_reclaim_begin") {
+ $timestamp = timestamp_to_ms($timestamp);
+ $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}++;
+ $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN} = $timestamp;
+
+ $details = $6;
+ if ($details !~ /$regex_direct_begin/o) {
+ print "WARNING: Failed to parse mm_vmscan_direct_reclaim_begin as expected\n";
+ print " $details\n";
+ print " $regex_direct_begin\n";
+ next;
+ }
+ my $order = $1;
+ $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order]++;
+ $perprocesspid{$process_pid}->{STATE_DIRECT_ORDER} = $order;
+ } elsif ($tracepoint eq "mm_vmscan_direct_reclaim_end") {
+ # Count the event itself
+ my $index = $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_END};
+ $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_END}++;
+
+ # Record how long direct reclaim took this time
+ if (defined $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN}) {
+ $timestamp = timestamp_to_ms($timestamp);
+ my $order = $perprocesspid{$process_pid}->{STATE_DIRECT_ORDER};
+ my $latency = ($timestamp - $perprocesspid{$process_pid}->{STATE_DIRECT_BEGIN});
+ $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index] = "$order-$latency";
+ }
+ } elsif ($tracepoint eq "mm_vmscan_kswapd_wake") {
+ $details = $6;
+ if ($details !~ /$regex_kswapd_wake/o) {
+ print "WARNING: Failed to parse mm_vmscan_kswapd_wake as expected\n";
+ print " $details\n";
+ print " $regex_kswapd_wake\n";
+ next;
+ }
+
+ my $order = $2;
+ $perprocesspid{$process_pid}->{STATE_KSWAPD_ORDER} = $order;
+ if (!$perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN}) {
+ $timestamp = timestamp_to_ms($timestamp);
+ $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}++;
+ $perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN} = $timestamp;
+ $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order]++;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP}++;
+ $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP_PERORDER}[$order]++;
+ }
+ } elsif ($tracepoint eq "mm_vmscan_kswapd_sleep") {
+
+ # Count the event itself
+ my $index = $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_SLEEP};
+ $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_SLEEP}++;
+
+ # Record how long kswapd was awake
+ $timestamp = timestamp_to_ms($timestamp);
+ my $order = $perprocesspid{$process_pid}->{STATE_KSWAPD_ORDER};
+ my $latency = ($timestamp - $perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN});
+ $perprocesspid{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index] = "$order-$latency";
+ $perprocesspid{$process_pid}->{STATE_KSWAPD_BEGIN} = 0;
+ } elsif ($tracepoint eq "mm_vmscan_wakeup_kswapd") {
+ $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}++;
+
+ $details = $6;
+ if ($details !~ /$regex_wakeup_kswapd/o) {
+ print "WARNING: Failed to parse mm_vmscan_wakeup_kswapd as expected\n";
+ print " $details\n";
+ print " $regex_wakeup_kswapd\n";
+ next;
+ }
+ my $order = $3;
+ $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order]++;
+ } elsif ($tracepoint eq "mm_vmscan_lru_isolate") {
+ $details = $6;
+ if ($details !~ /$regex_lru_isolate/o) {
+ print "WARNING: Failed to parse mm_vmscan_lru_isolate as expected\n";
+ print " $details\n";
+ print " $regex_lru_isolate/o\n";
+ next;
+ }
+ my $isolate_mode = $1;
+ my $nr_scanned = $4;
+ my $file = $6;
+
+ # To closer match vmstat scanning statistics, only count isolate_both
+ # and isolate_inactive as scanning. isolate_active is rotation
+ # isolate_inactive == 1
+ # isolate_active == 2
+ # isolate_both == 3
+ if ($isolate_mode != 2) {
+ $perprocesspid{$process_pid}->{HIGH_NR_SCANNED} += $nr_scanned;
+ if ($file == 1) {
+ $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED} += $nr_scanned;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED} += $nr_scanned;
+ }
+ }
+ } elsif ($tracepoint eq "mm_vmscan_lru_shrink_inactive") {
+ $details = $6;
+ if ($details !~ /$regex_lru_shrink_inactive/o) {
+ print "WARNING: Failed to parse mm_vmscan_lru_shrink_inactive as expected\n";
+ print " $details\n";
+ print " $regex_lru_shrink_inactive/o\n";
+ next;
+ }
+
+ my $nr_reclaimed = $4;
+ my $flags = $6;
+ my $file = 0;
+ if ($flags =~ /RECLAIM_WB_FILE/) {
+ $file = 1;
+ }
+ $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED} += $nr_reclaimed;
+ if ($file) {
+ $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED} += $nr_reclaimed;
+ } else {
+ $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED} += $nr_reclaimed;
+ }
+ } elsif ($tracepoint eq "mm_vmscan_writepage") {
+ $details = $6;
+ if ($details !~ /$regex_writepage/o) {
+ print "WARNING: Failed to parse mm_vmscan_writepage as expected\n";
+ print " $details\n";
+ print " $regex_writepage\n";
+ next;
+ }
+
+ my $flags = $3;
+ my $file = 0;
+ my $sync_io = 0;
+ if ($flags =~ /RECLAIM_WB_FILE/) {
+ $file = 1;
+ }
+ if ($flags =~ /RECLAIM_WB_SYNC/) {
+ $sync_io = 1;
+ }
+ if ($sync_io) {
+ if ($file) {
+ $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC}++;
+ } else {
+ $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC}++;
+ }
+ } else {
+ if ($file) {
+ $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC}++;
+ } else {
+ $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC}++;
+ }
+ }
+ } else {
+ $perprocesspid{$process_pid}->{EVENT_UNKNOWN}++;
+ }
+
+ if ($sigint_pending) {
+ last EVENT_PROCESS;
+ }
+ }
+}
+
+sub dump_stats {
+ my $hashref = shift;
+ my %stats = %$hashref;
+
+ # Dump per-process stats
+ my $process_pid;
+ my $max_strlen = 0;
+
+ # Get the maximum process name
+ foreach $process_pid (keys %perprocesspid) {
+ my $len = length($process_pid);
+ if ($len > $max_strlen) {
+ $max_strlen = $len;
+ }
+ }
+ $max_strlen += 2;
+
+ # Work out latencies
+ printf("\n") if !$opt_ignorepid;
+ printf("Reclaim latencies expressed as order-latency_in_ms\n") if !$opt_ignorepid;
+ foreach $process_pid (keys %stats) {
+
+ if (!$stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[0] &&
+ !$stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[0]) {
+ next;
+ }
+
+ printf "%-" . $max_strlen . "s ", $process_pid if !$opt_ignorepid;
+ my $index = 0;
+ while (defined $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index] ||
+ defined $stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index]) {
+
+ if ($stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]) {
+ printf("%s ", $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]) if !$opt_ignorepid;
+ my ($dummy, $latency) = split(/-/, $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]);
+ $total_direct_latency += $latency;
+ } else {
+ printf("%s ", $stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index]) if !$opt_ignorepid;
+ my ($dummy, $latency) = split(/-/, $stats{$process_pid}->{HIGH_KSWAPD_LATENCY}[$index]);
+ $total_kswapd_latency += $latency;
+ }
+ $index++;
+ }
+ print "\n" if !$opt_ignorepid;
+ }
+
+ # Print out process activity
+ printf("\n");
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s\n", "Process", "Direct", "Wokeup", "Pages", "Pages", "Pages", "Pages", "Time");
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s %8s %8s\n", "details", "Rclms", "Kswapd", "Scanned", "Rclmed", "Sync-IO", "ASync-IO", "Stalled");
+ foreach $process_pid (keys %stats) {
+
+ if (!$stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}) {
+ next;
+ }
+
+ $total_direct_reclaim += $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
+ $total_wakeup_kswapd += $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
+ $total_direct_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+ $total_direct_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $total_direct_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
+ $total_direct_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+ $total_direct_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $total_direct_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
+ $total_direct_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
+ $total_direct_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
+ $total_direct_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
+
+ $total_direct_writepage_anon_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC};
+
+ my $index = 0;
+ my $this_reclaim_delay = 0;
+ while (defined $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]) {
+ my ($dummy, $latency) = split(/-/, $stats{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$index]);
+ $this_reclaim_delay += $latency;
+ $index++;
+ }
+
+ printf("%-" . $max_strlen . "s %8d %10d %8u %8u %8u %8u %8.3f",
+ $process_pid,
+ $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN},
+ $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD},
+ $stats{$process_pid}->{HIGH_NR_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
+ $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
+ $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC},
+ $this_reclaim_delay / 1000);
+
+ if ($stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN}) {
+ print " ";
+ for (my $order = 0; $order < 20; $order++) {
+ my $count = $stats{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order];
+ if ($count != 0) {
+ print "direct-$order=$count ";
+ }
+ }
+ }
+ if ($stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD}) {
+ print " ";
+ for (my $order = 0; $order < 20; $order++) {
+ my $count = $stats{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order];
+ if ($count != 0) {
+ print "wakeup-$order=$count ";
+ }
+ }
+ }
+
+ print "\n";
+ }
+
+ # Print out kswapd activity
+ printf("\n");
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s\n", "Kswapd", "Kswapd", "Order", "Pages", "Pages", "Pages", "Pages");
+ printf("%-" . $max_strlen . "s %8s %10s %8s %8s %8s %8s\n", "Instance", "Wakeups", "Re-wakeup", "Scanned", "Rclmed", "Sync-IO", "ASync-IO");
+ foreach $process_pid (keys %stats) {
+
+ if (!$stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}) {
+ next;
+ }
+
+ $total_kswapd_wake += $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
+ $total_kswapd_nr_scanned += $stats{$process_pid}->{HIGH_NR_SCANNED};
+ $total_kswapd_nr_file_scanned += $stats{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $total_kswapd_nr_anon_scanned += $stats{$process_pid}->{HIGH_NR_ANON_SCANNED};
+ $total_kswapd_nr_reclaimed += $stats{$process_pid}->{HIGH_NR_RECLAIMED};
+ $total_kswapd_nr_file_reclaimed += $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $total_kswapd_nr_anon_reclaimed += $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
+ $total_kswapd_writepage_file_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
+ $total_kswapd_writepage_anon_sync += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
+ $total_kswapd_writepage_file_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
+ $total_kswapd_writepage_anon_async += $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC};
+
+ printf("%-" . $max_strlen . "s %8d %10d %8u %8u %8i %8u",
+ $process_pid,
+ $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE},
+ $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP},
+ $stats{$process_pid}->{HIGH_NR_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_FILE_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_ANON_SCANNED},
+ $stats{$process_pid}->{HIGH_NR_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_FILE_RECLAIMED},
+ $stats{$process_pid}->{HIGH_NR_ANON_RECLAIMED},
+ $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC},
+ $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} + $stats{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC});
+
+ if ($stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE}) {
+ print " ";
+ for (my $order = 0; $order < 20; $order++) {
+ my $count = $stats{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order];
+ if ($count != 0) {
+ print "wake-$order=$count ";
+ }
+ }
+ }
+ if ($stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP}) {
+ print " ";
+ for (my $order = 0; $order < 20; $order++) {
+ my $count = $stats{$process_pid}->{HIGH_KSWAPD_REWAKEUP_PERORDER}[$order];
+ if ($count != 0) {
+ print "rewake-$order=$count ";
+ }
+ }
+ }
+ printf("\n");
+ }
+
+ # Print out summaries
+ $total_direct_latency /= 1000;
+ $total_kswapd_latency /= 1000;
+ print "\nSummary\n";
+ print "Direct reclaims: $total_direct_reclaim\n";
+ print "Direct reclaim pages scanned: $total_direct_nr_scanned\n";
+ print "Direct reclaim file pages scanned: $total_direct_nr_file_scanned\n";
+ print "Direct reclaim anon pages scanned: $total_direct_nr_anon_scanned\n";
+ print "Direct reclaim pages reclaimed: $total_direct_nr_reclaimed\n";
+ print "Direct reclaim file pages reclaimed: $total_direct_nr_file_reclaimed\n";
+ print "Direct reclaim anon pages reclaimed: $total_direct_nr_anon_reclaimed\n";
+ print "Direct reclaim write file sync I/O: $total_direct_writepage_file_sync\n";
+ print "Direct reclaim write anon sync I/O: $total_direct_writepage_anon_sync\n";
+ print "Direct reclaim write file async I/O: $total_direct_writepage_file_async\n";
+ print "Direct reclaim write anon async I/O: $total_direct_writepage_anon_async\n";
+ print "Wake kswapd requests: $total_wakeup_kswapd\n";
+ printf "Time stalled direct reclaim: %-1.2f seconds\n", $total_direct_latency;
+ print "\n";
+ print "Kswapd wakeups: $total_kswapd_wake\n";
+ print "Kswapd pages scanned: $total_kswapd_nr_scanned\n";
+ print "Kswapd file pages scanned: $total_kswapd_nr_file_scanned\n";
+ print "Kswapd anon pages scanned: $total_kswapd_nr_anon_scanned\n";
+ print "Kswapd pages reclaimed: $total_kswapd_nr_reclaimed\n";
+ print "Kswapd file pages reclaimed: $total_kswapd_nr_file_reclaimed\n";
+ print "Kswapd anon pages reclaimed: $total_kswapd_nr_anon_reclaimed\n";
+ print "Kswapd reclaim write file sync I/O: $total_kswapd_writepage_file_sync\n";
+ print "Kswapd reclaim write anon sync I/O: $total_kswapd_writepage_anon_sync\n";
+ print "Kswapd reclaim write file async I/O: $total_kswapd_writepage_file_async\n";
+ print "Kswapd reclaim write anon async I/O: $total_kswapd_writepage_anon_async\n";
+ printf "Time kswapd awake: %-1.2f seconds\n", $total_kswapd_latency;
+}
+
+sub aggregate_perprocesspid() {
+ my $process_pid;
+ my $process;
+ undef %perprocess;
+
+ foreach $process_pid (keys %perprocesspid) {
+ $process = $process_pid;
+ $process =~ s/-([0-9])*$//;
+ if ($process eq '') {
+ $process = "NO_PROCESS_NAME";
+ }
+
+ $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN} += $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN};
+ $perprocess{$process}->{MM_VMSCAN_KSWAPD_WAKE} += $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE};
+ $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD} += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD};
+ $perprocess{$process}->{HIGH_KSWAPD_REWAKEUP} += $perprocesspid{$process_pid}->{HIGH_KSWAPD_REWAKEUP};
+ $perprocess{$process}->{HIGH_NR_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_SCANNED};
+ $perprocess{$process}->{HIGH_NR_FILE_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_SCANNED};
+ $perprocess{$process}->{HIGH_NR_ANON_SCANNED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_SCANNED};
+ $perprocess{$process}->{HIGH_NR_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_RECLAIMED};
+ $perprocess{$process}->{HIGH_NR_FILE_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_FILE_RECLAIMED};
+ $perprocess{$process}->{HIGH_NR_ANON_RECLAIMED} += $perprocesspid{$process_pid}->{HIGH_NR_ANON_RECLAIMED};
+ $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_SYNC};
+ $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_SYNC};
+ $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_FILE_ASYNC};
+ $perprocess{$process}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC} += $perprocesspid{$process_pid}->{MM_VMSCAN_WRITEPAGE_ANON_ASYNC};
+
+ for (my $order = 0; $order < 20; $order++) {
+ $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order] += $perprocesspid{$process_pid}->{MM_VMSCAN_DIRECT_RECLAIM_BEGIN_PERORDER}[$order];
+ $perprocess{$process}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order] += $perprocesspid{$process_pid}->{MM_VMSCAN_WAKEUP_KSWAPD_PERORDER}[$order];
+ $perprocess{$process}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order] += $perprocesspid{$process_pid}->{MM_VMSCAN_KSWAPD_WAKE_PERORDER}[$order];
+
+ }
+
+ # Aggregate direct reclaim latencies
+ my $wr_index = $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_END};
+ my $rd_index = 0;
+ while (defined $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$rd_index]) {
+ $perprocess{$process}->{HIGH_DIRECT_RECLAIM_LATENCY}[$wr_index] = $perprocesspid{$process_pid}->{HIGH_DIRECT_RECLAIM_LATENCY}[$rd_index];
+ $rd_index++;
+ $wr_index++;
+ }
+ $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_END} = $wr_index;
+
+ # Aggregate kswapd latencies
+ my $wr_index = $perprocess{$process}->{MM_VMSCAN_KSWAPD_SLEEP};
+ my $rd_index = 0;
+ while (defined $perprocesspid{$process_pid}->{HIGH_KSWAPD_LATENCY}[$rd_index]) {
+ $perprocess{$process}->{HIGH_KSWAPD_LATENCY}[$wr_index] = $perprocesspid{$process_pid}->{HIGH_KSWAPD_LATENCY}[$rd_index];
+ $rd_index++;
+ $wr_index++;
+ }
+ $perprocess{$process}->{MM_VMSCAN_DIRECT_RECLAIM_END} = $wr_index;
+ }
+}
+
+sub report() {
+ if (!$opt_ignorepid) {
+ dump_stats(\%perprocesspid);
+ } else {
+ aggregate_perprocesspid();
+ dump_stats(\%perprocess);
+ }
+}
+
+# Process events or signals until neither is available
+sub signal_loop() {
+ my $sigint_processed;
+ do {
+ $sigint_processed = 0;
+ process_events();
+
+ # Handle pending signals if any
+ if ($sigint_pending) {
+ my $current_time = time;
+
+ if ($sigint_exit) {
+ print "Received exit signal\n";
+ $sigint_pending = 0;
+ }
+ if ($sigint_report) {
+ if ($current_time >= $sigint_received + 2) {
+ report();
+ $sigint_report = 0;
+ $sigint_pending = 0;
+ $sigint_processed = 1;
+ }
+ }
+ }
+ } while ($sigint_pending || $sigint_processed);
+}
+
+signal_loop();
+report();
diff --git a/Documentation/trace/ring-buffer-design.txt b/Documentation/trace/ring-buffer-design.txt
new file mode 100644
index 000000000..ff747b6fa
--- /dev/null
+++ b/Documentation/trace/ring-buffer-design.txt
@@ -0,0 +1,955 @@
+ Lockless Ring Buffer Design
+ ===========================
+
+Copyright 2009 Red Hat Inc.
+ Author: Steven Rostedt <srostedt@redhat.com>
+ License: The GNU Free Documentation License, Version 1.2
+ (dual licensed under the GPL v2)
+Reviewers: Mathieu Desnoyers, Huang Ying, Hidetoshi Seto,
+ and Frederic Weisbecker.
+
+
+Written for: 2.6.31
+
+Terminology used in this Document
+---------------------------------
+
+tail - where new writes happen in the ring buffer.
+
+head - where new reads happen in the ring buffer.
+
+producer - the task that writes into the ring buffer (same as writer)
+
+writer - same as producer
+
+consumer - the task that reads from the buffer (same as reader)
+
+reader - same as consumer.
+
+reader_page - A page outside the ring buffer used solely (for the most part)
+ by the reader.
+
+head_page - a pointer to the page that the reader will use next
+
+tail_page - a pointer to the page that will be written to next
+
+commit_page - a pointer to the page with the last finished non-nested write.
+
+cmpxchg - hardware-assisted atomic transaction that performs the following:
+
+ A = B iff previous A == C
+
+ R = cmpxchg(A, C, B) is saying that we replace A with B if and only if
+ current A is equal to C, and we put the old (current) A into R
+
+ R gets the previous A regardless if A is updated with B or not.
+
+ To see if the update was successful a compare of R == C may be used.
+
+The Generic Ring Buffer
+-----------------------
+
+The ring buffer can be used in either an overwrite mode or in
+producer/consumer mode.
+
+Producer/consumer mode is where if the producer were to fill up the
+buffer before the consumer could free up anything, the producer
+will stop writing to the buffer. This will lose most recent events.
+
+Overwrite mode is where if the producer were to fill up the buffer
+before the consumer could free up anything, the producer will
+overwrite the older data. This will lose the oldest events.
+
+No two writers can write at the same time (on the same per-cpu buffer),
+but a writer may interrupt another writer, but it must finish writing
+before the previous writer may continue. This is very important to the
+algorithm. The writers act like a "stack". The way interrupts works
+enforces this behavior.
+
+
+ writer1 start
+ <preempted> writer2 start
+ <preempted> writer3 start
+ writer3 finishes
+ writer2 finishes
+ writer1 finishes
+
+This is very much like a writer being preempted by an interrupt and
+the interrupt doing a write as well.
+
+Readers can happen at any time. But no two readers may run at the
+same time, nor can a reader preempt/interrupt another reader. A reader
+cannot preempt/interrupt a writer, but it may read/consume from the
+buffer at the same time as a writer is writing, but the reader must be
+on another processor to do so. A reader may read on its own processor
+and can be preempted by a writer.
+
+A writer can preempt a reader, but a reader cannot preempt a writer.
+But a reader can read the buffer at the same time (on another processor)
+as a writer.
+
+The ring buffer is made up of a list of pages held together by a linked list.
+
+At initialization a reader page is allocated for the reader that is not
+part of the ring buffer.
+
+The head_page, tail_page and commit_page are all initialized to point
+to the same page.
+
+The reader page is initialized to have its next pointer pointing to
+the head page, and its previous pointer pointing to a page before
+the head page.
+
+The reader has its own page to use. At start up time, this page is
+allocated but is not attached to the list. When the reader wants
+to read from the buffer, if its page is empty (like it is on start-up),
+it will swap its page with the head_page. The old reader page will
+become part of the ring buffer and the head_page will be removed.
+The page after the inserted page (old reader_page) will become the
+new head page.
+
+Once the new page is given to the reader, the reader could do what
+it wants with it, as long as a writer has left that page.
+
+A sample of how the reader page is swapped: Note this does not
+show the head page in the buffer, it is for demonstrating a swap
+only.
+
+ +------+
+ |reader| RING BUFFER
+ |page |
+ +------+
+ +---+ +---+ +---+
+ | |-->| |-->| |
+ | |<--| |<--| |
+ +---+ +---+ +---+
+ ^ | ^ |
+ | +-------------+ |
+ +-----------------+
+
+
+ +------+
+ |reader| RING BUFFER
+ |page |-------------------+
+ +------+ v
+ | +---+ +---+ +---+
+ | | |-->| |-->| |
+ | | |<--| |<--| |<-+
+ | +---+ +---+ +---+ |
+ | ^ | ^ | |
+ | | +-------------+ | |
+ | +-----------------+ |
+ +------------------------------------+
+
+ +------+
+ |reader| RING BUFFER
+ |page |-------------------+
+ +------+ <---------------+ v
+ | ^ +---+ +---+ +---+
+ | | | |-->| |-->| |
+ | | | | | |<--| |<-+
+ | | +---+ +---+ +---+ |
+ | | | ^ | |
+ | | +-------------+ | |
+ | +-----------------------------+ |
+ +------------------------------------+
+
+ +------+
+ |buffer| RING BUFFER
+ |page |-------------------+
+ +------+ <---------------+ v
+ | ^ +---+ +---+ +---+
+ | | | | | |-->| |
+ | | New | | | |<--| |<-+
+ | | Reader +---+ +---+ +---+ |
+ | | page ----^ | |
+ | | | |
+ | +-----------------------------+ |
+ +------------------------------------+
+
+
+
+It is possible that the page swapped is the commit page and the tail page,
+if what is in the ring buffer is less than what is held in a buffer page.
+
+
+ reader page commit page tail page
+ | | |
+ v | |
+ +---+ | |
+ | |<----------+ |
+ | |<------------------------+
+ | |------+
+ +---+ |
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+This case is still valid for this algorithm.
+When the writer leaves the page, it simply goes into the ring buffer
+since the reader page still points to the next location in the ring
+buffer.
+
+
+The main pointers:
+
+ reader page - The page used solely by the reader and is not part
+ of the ring buffer (may be swapped in)
+
+ head page - the next page in the ring buffer that will be swapped
+ with the reader page.
+
+ tail page - the page where the next write will take place.
+
+ commit page - the page that last finished a write.
+
+The commit page only is updated by the outermost writer in the
+writer stack. A writer that preempts another writer will not move the
+commit page.
+
+When data is written into the ring buffer, a position is reserved
+in the ring buffer and passed back to the writer. When the writer
+is finished writing data into that position, it commits the write.
+
+Another write (or a read) may take place at anytime during this
+transaction. If another write happens it must finish before continuing
+with the previous write.
+
+
+ Write reserve:
+
+ Buffer page
+ +---------+
+ |written |
+ +---------+ <--- given back to writer (current commit)
+ |reserved |
+ +---------+ <--- tail pointer
+ | empty |
+ +---------+
+
+ Write commit:
+
+ Buffer page
+ +---------+
+ |written |
+ +---------+
+ |written |
+ +---------+ <--- next position for write (current commit)
+ | empty |
+ +---------+
+
+
+ If a write happens after the first reserve:
+
+ Buffer page
+ +---------+
+ |written |
+ +---------+ <-- current commit
+ |reserved |
+ +---------+ <--- given back to second writer
+ |reserved |
+ +---------+ <--- tail pointer
+
+ After second writer commits:
+
+
+ Buffer page
+ +---------+
+ |written |
+ +---------+ <--(last full commit)
+ |reserved |
+ +---------+
+ |pending |
+ |commit |
+ +---------+ <--- tail pointer
+
+ When the first writer commits:
+
+ Buffer page
+ +---------+
+ |written |
+ +---------+
+ |written |
+ +---------+
+ |written |
+ +---------+ <--(last full commit and tail pointer)
+
+
+The commit pointer points to the last write location that was
+committed without preempting another write. When a write that
+preempted another write is committed, it only becomes a pending commit
+and will not be a full commit until all writes have been committed.
+
+The commit page points to the page that has the last full commit.
+The tail page points to the page with the last write (before
+committing).
+
+The tail page is always equal to or after the commit page. It may
+be several pages ahead. If the tail page catches up to the commit
+page then no more writes may take place (regardless of the mode
+of the ring buffer: overwrite and produce/consumer).
+
+The order of pages is:
+
+ head page
+ commit page
+ tail page
+
+Possible scenario:
+ tail page
+ head page commit page |
+ | | |
+ v v v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+There is a special case that the head page is after either the commit page
+and possibly the tail page. That is when the commit (and tail) page has been
+swapped with the reader page. This is because the head page is always
+part of the ring buffer, but the reader page is not. Whenever there
+has been less than a full page that has been committed inside the ring buffer,
+and a reader swaps out a page, it will be swapping out the commit page.
+
+
+ reader page commit page tail page
+ | | |
+ v | |
+ +---+ | |
+ | |<----------+ |
+ | |<------------------------+
+ | |------+
+ +---+ |
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+ ^
+ |
+ head page
+
+
+In this case, the head page will not move when the tail and commit
+move back into the ring buffer.
+
+The reader cannot swap a page into the ring buffer if the commit page
+is still on that page. If the read meets the last commit (real commit
+not pending or reserved), then there is nothing more to read.
+The buffer is considered empty until another full commit finishes.
+
+When the tail meets the head page, if the buffer is in overwrite mode,
+the head page will be pushed ahead one. If the buffer is in producer/consumer
+mode, the write will fail.
+
+Overwrite mode:
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+ ^
+ |
+ head page
+
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+ ^
+ |
+ head page
+
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+ ^
+ |
+ head page
+
+Note, the reader page will still point to the previous head page.
+But when a swap takes place, it will use the most recent head page.
+
+
+Making the Ring Buffer Lockless:
+--------------------------------
+
+The main idea behind the lockless algorithm is to combine the moving
+of the head_page pointer with the swapping of pages with the reader.
+State flags are placed inside the pointer to the page. To do this,
+each page must be aligned in memory by 4 bytes. This will allow the 2
+least significant bits of the address to be used as flags, since
+they will always be zero for the address. To get the address,
+simply mask out the flags.
+
+ MASK = ~3
+
+ address & MASK
+
+Two flags will be kept by these two bits:
+
+ HEADER - the page being pointed to is a head page
+
+ UPDATE - the page being pointed to is being updated by a writer
+ and was or is about to be a head page.
+
+
+ reader page
+ |
+ v
+ +---+
+ | |------+
+ +---+ |
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-H->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+
+The above pointer "-H->" would have the HEADER flag set. That is
+the next page is the next page to be swapped out by the reader.
+This pointer means the next page is the head page.
+
+When the tail page meets the head pointer, it will use cmpxchg to
+change the pointer to the UPDATE state:
+
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-H->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+"-U->" represents a pointer in the UPDATE state.
+
+Any access to the reader will need to take some sort of lock to serialize
+the readers. But the writers will never take a lock to write to the
+ring buffer. This means we only need to worry about a single reader,
+and writes only preempt in "stack" formation.
+
+When the reader tries to swap the page with the ring buffer, it
+will also use cmpxchg. If the flag bit in the pointer to the
+head page does not have the HEADER flag set, the compare will fail
+and the reader will need to look for the new head page and try again.
+Note, the flags UPDATE and HEADER are never set at the same time.
+
+The reader swaps the reader page as follows:
+
+ +------+
+ |reader| RING BUFFER
+ |page |
+ +------+
+ +---+ +---+ +---+
+ | |--->| |--->| |
+ | |<---| |<---| |
+ +---+ +---+ +---+
+ ^ | ^ |
+ | +---------------+ |
+ +-----H-------------+
+
+The reader sets the reader page next pointer as HEADER to the page after
+the head page.
+
+
+ +------+
+ |reader| RING BUFFER
+ |page |-------H-----------+
+ +------+ v
+ | +---+ +---+ +---+
+ | | |--->| |--->| |
+ | | |<---| |<---| |<-+
+ | +---+ +---+ +---+ |
+ | ^ | ^ | |
+ | | +---------------+ | |
+ | +-----H-------------+ |
+ +--------------------------------------+
+
+It does a cmpxchg with the pointer to the previous head page to make it
+point to the reader page. Note that the new pointer does not have the HEADER
+flag set. This action atomically moves the head page forward.
+
+ +------+
+ |reader| RING BUFFER
+ |page |-------H-----------+
+ +------+ v
+ | ^ +---+ +---+ +---+
+ | | | |-->| |-->| |
+ | | | |<--| |<--| |<-+
+ | | +---+ +---+ +---+ |
+ | | | ^ | |
+ | | +-------------+ | |
+ | +-----------------------------+ |
+ +------------------------------------+
+
+After the new head page is set, the previous pointer of the head page is
+updated to the reader page.
+
+ +------+
+ |reader| RING BUFFER
+ |page |-------H-----------+
+ +------+ <---------------+ v
+ | ^ +---+ +---+ +---+
+ | | | |-->| |-->| |
+ | | | | | |<--| |<-+
+ | | +---+ +---+ +---+ |
+ | | | ^ | |
+ | | +-------------+ | |
+ | +-----------------------------+ |
+ +------------------------------------+
+
+ +------+
+ |buffer| RING BUFFER
+ |page |-------H-----------+ <--- New head page
+ +------+ <---------------+ v
+ | ^ +---+ +---+ +---+
+ | | | | | |-->| |
+ | | New | | | |<--| |<-+
+ | | Reader +---+ +---+ +---+ |
+ | | page ----^ | |
+ | | | |
+ | +-----------------------------+ |
+ +------------------------------------+
+
+Another important point: The page that the reader page points back to
+by its previous pointer (the one that now points to the new head page)
+never points back to the reader page. That is because the reader page is
+not part of the ring buffer. Traversing the ring buffer via the next pointers
+will always stay in the ring buffer. Traversing the ring buffer via the
+prev pointers may not.
+
+Note, the way to determine a reader page is simply by examining the previous
+pointer of the page. If the next pointer of the previous page does not
+point back to the original page, then the original page is a reader page:
+
+
+ +--------+
+ | reader | next +----+
+ | page |-------->| |<====== (buffer page)
+ +--------+ +----+
+ | | ^
+ | v | next
+ prev | +----+
+ +------------->| |
+ +----+
+
+The way the head page moves forward:
+
+When the tail page meets the head page and the buffer is in overwrite mode
+and more writes take place, the head page must be moved forward before the
+writer may move the tail page. The way this is done is that the writer
+performs a cmpxchg to convert the pointer to the head page from the HEADER
+flag to have the UPDATE flag set. Once this is done, the reader will
+not be able to swap the head page from the buffer, nor will it be able to
+move the head page, until the writer is finished with the move.
+
+This eliminates any races that the reader can have on the writer. The reader
+must spin, and this is why the reader cannot preempt the writer.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-H->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The following page will be made into the new head page.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+After the new head page has been set, we can set the old head page
+pointer back to NORMAL.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+After the head page has been moved, the tail page may now move forward.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+
+The above are the trivial updates. Now for the more complex scenarios.
+
+
+As stated before, if enough writes preempt the first write, the
+tail page may make it all the way around the buffer and meet the commit
+page. At this time, we must start dropping writes (usually with some kind
+of warning to the user). But what happens if the commit was still on the
+reader page? The commit page is not part of the ring buffer. The tail page
+must account for this.
+
+
+ reader page commit page
+ | |
+ v |
+ +---+ |
+ | |<----------+
+ | |
+ | |------+
+ +---+ |
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-H->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+ ^
+ |
+ tail page
+
+If the tail page were to simply push the head page forward, the commit when
+leaving the reader page would not be pointing to the correct page.
+
+The solution to this is to test if the commit page is on the reader page
+before pushing the head page. If it is, then it can be assumed that the
+tail page wrapped the buffer, and we must drop new writes.
+
+This is not a race condition, because the commit page can only be moved
+by the outermost writer (the writer that was preempted).
+This means that the commit will not move while a writer is moving the
+tail page. The reader cannot swap the reader page if it is also being
+used as the commit page. The reader can simply check that the commit
+is off the reader page. Once the commit page leaves the reader page
+it will never go back on it unless a reader does another swap with the
+buffer page that is also the commit page.
+
+
+Nested writes
+-------------
+
+In the pushing forward of the tail page we must first push forward
+the head page if the head page is the next page. If the head page
+is not the next page, the tail page is simply updated with a cmpxchg.
+
+Only writers move the tail page. This must be done atomically to protect
+against nested writers.
+
+ temp_page = tail_page
+ next_page = temp_page->next
+ cmpxchg(tail_page, temp_page, next_page)
+
+The above will update the tail page if it is still pointing to the expected
+page. If this fails, a nested write pushed it forward, the current write
+does not need to push it.
+
+
+ temp page
+ |
+ v
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+Nested write comes in and moves the tail page forward:
+
+ tail page (moved by nested writer)
+ temp page |
+ | |
+ v v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The above would fail the cmpxchg, but since the tail page has already
+been moved forward, the writer will just try again to reserve storage
+on the new tail page.
+
+But the moving of the head page is a bit more complex.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-H->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The write converts the head page pointer to UPDATE.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+But if a nested writer preempts here, it will see that the next
+page is a head page, but it is also nested. It will detect that
+it is nested and will save that information. The detection is the
+fact that it sees the UPDATE flag instead of a HEADER or NORMAL
+pointer.
+
+The nested writer will set the new head page pointer.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+But it will not reset the update back to normal. Only the writer
+that converted a pointer from HEAD to UPDATE will convert it back
+to NORMAL.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+After the nested writer finishes, the outermost writer will convert
+the UPDATE pointer to NORMAL.
+
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+
+It can be even more complex if several nested writes came in and moved
+the tail page ahead several pages:
+
+
+(first writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-H->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The write converts the head page pointer to UPDATE.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+Next writer comes in, and sees the update and sets up the new
+head page.
+
+(second writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The nested writer moves the tail page forward. But does not set the old
+update page to NORMAL because it is not the outermost writer.
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+Another writer preempts and sees the page after the tail page is a head page.
+It changes it from HEAD to UPDATE.
+
+(third writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-U->| |--->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The writer will move the head page forward:
+
+
+(third writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-U->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+But now that the third writer did change the HEAD flag to UPDATE it
+will convert it to normal:
+
+
+(third writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+
+Then it will move the tail page, and return back to the second writer.
+
+
+(second writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+
+The second writer will fail to move the tail page because it was already
+moved, so it will try again and add its data to the new tail page.
+It will return to the first writer.
+
+
+(first writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+The first writer cannot know atomically if the tail page moved
+while it updates the HEAD page. It will then update the head page to
+what it thinks is the new head page.
+
+
+(first writer)
+
+ tail page
+ |
+ v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+Since the cmpxchg returns the old value of the pointer the first writer
+will see it succeeded in updating the pointer from NORMAL to HEAD.
+But as we can see, this is not good enough. It must also check to see
+if the tail page is either where it use to be or on the next page:
+
+
+(first writer)
+
+ A B tail page
+ | | |
+ v v v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |-H->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+If tail page != A and tail page != B, then it must reset the pointer
+back to NORMAL. The fact that it only needs to worry about nested
+writers means that it only needs to check this after setting the HEAD page.
+
+
+(first writer)
+
+ A B tail page
+ | | |
+ v v v
+ +---+ +---+ +---+ +---+
+<---| |--->| |-U->| |--->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
+Now the writer can update the head page. This is also why the head page must
+remain in UPDATE and only reset by the outermost writer. This prevents
+the reader from seeing the incorrect head page.
+
+
+(first writer)
+
+ A B tail page
+ | | |
+ v v v
+ +---+ +---+ +---+ +---+
+<---| |--->| |--->| |--->| |-H->
+--->| |<---| |<---| |<---| |<---
+ +---+ +---+ +---+ +---+
+
diff --git a/Documentation/trace/tracepoint-analysis.txt b/Documentation/trace/tracepoint-analysis.txt
new file mode 100644
index 000000000..058cc6c9d
--- /dev/null
+++ b/Documentation/trace/tracepoint-analysis.txt
@@ -0,0 +1,327 @@
+ Notes on Analysing Behaviour Using Events and Tracepoints
+
+ Documentation written by Mel Gorman
+ PCL information heavily based on email from Ingo Molnar
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used without
+creating custom kernel modules to register probe functions using the event
+tracing infrastructure.
+
+Simplistically, tracepoints represent important events that can be
+taken in conjunction with other tracepoints to build a "Big Picture" of
+what is going on within the system. There are a large number of methods for
+gathering and interpreting these events. Lacking any current Best Practises,
+this document describes some of the methods that can be used.
+
+This document assumes that debugfs is mounted on /sys/kernel/debug and that
+the appropriate tracing options have been configured into the kernel. It is
+assumed that the PCL tool tools/perf has been installed and is in your path.
+
+2. Listing Available Events
+===========================
+
+2.1 Standard Utilities
+----------------------
+
+All possible events are visible from /sys/kernel/debug/tracing/events. Simply
+calling
+
+ $ find /sys/kernel/debug/tracing/events -type d
+
+will give a fair indication of the number of events available.
+
+2.2 PCL (Performance Counters for Linux)
+-------
+
+Discovery and enumeration of all counters and events, including tracepoints,
+are available with the perf tool. Getting a list of available events is a
+simple case of:
+
+ $ perf list 2>&1 | grep Tracepoint
+ ext4:ext4_free_inode [Tracepoint event]
+ ext4:ext4_request_inode [Tracepoint event]
+ ext4:ext4_allocate_inode [Tracepoint event]
+ ext4:ext4_write_begin [Tracepoint event]
+ ext4:ext4_ordered_write_end [Tracepoint event]
+ [ .... remaining output snipped .... ]
+
+
+3. Enabling Events
+==================
+
+3.1 System-Wide Event Enabling
+------------------------------
+
+See Documentation/trace/events.txt for a proper description on how events
+can be enabled system-wide. A short example of enabling all events related
+to page allocation would look something like:
+
+ $ for i in `find /sys/kernel/debug/tracing/events -name "enable" | grep mm_`; do echo 1 > $i; done
+
+3.2 System-Wide Event Enabling with SystemTap
+---------------------------------------------
+
+In SystemTap, tracepoints are accessible using the kernel.trace() function
+call. The following is an example that reports every 5 seconds what processes
+were allocating the pages.
+
+ global page_allocs
+
+ probe kernel.trace("mm_page_alloc") {
+ page_allocs[execname()]++
+ }
+
+ function print_count() {
+ printf ("%-25s %-s\n", "#Pages Allocated", "Process Name")
+ foreach (proc in page_allocs-)
+ printf("%-25d %s\n", page_allocs[proc], proc)
+ printf ("\n")
+ delete page_allocs
+ }
+
+ probe timer.s(5) {
+ print_count()
+ }
+
+3.3 System-Wide Event Enabling with PCL
+---------------------------------------
+
+By specifying the -a switch and analysing sleep, the system-wide events
+for a duration of time can be examined.
+
+ $ perf stat -a \
+ -e kmem:mm_page_alloc -e kmem:mm_page_free \
+ -e kmem:mm_page_free_batched \
+ sleep 10
+ Performance counter stats for 'sleep 10':
+
+ 9630 kmem:mm_page_alloc
+ 2143 kmem:mm_page_free
+ 7424 kmem:mm_page_free_batched
+
+ 10.002577764 seconds time elapsed
+
+Similarly, one could execute a shell and exit it as desired to get a report
+at that point.
+
+3.4 Local Event Enabling
+------------------------
+
+Documentation/trace/ftrace.txt describes how to enable events on a per-thread
+basis using set_ftrace_pid.
+
+3.5 Local Event Enablement with PCL
+-----------------------------------
+
+Events can be activated and tracked for the duration of a process on a local
+basis using PCL such as follows.
+
+ $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \
+ -e kmem:mm_page_free_batched ./hackbench 10
+ Time: 0.909
+
+ Performance counter stats for './hackbench 10':
+
+ 17803 kmem:mm_page_alloc
+ 12398 kmem:mm_page_free
+ 4827 kmem:mm_page_free_batched
+
+ 0.973913387 seconds time elapsed
+
+4. Event Filtering
+==================
+
+Documentation/trace/ftrace.txt covers in-depth how to filter events in
+ftrace. Obviously using grep and awk of trace_pipe is an option as well
+as any script reading trace_pipe.
+
+5. Analysing Event Variances with PCL
+=====================================
+
+Any workload can exhibit variances between runs and it can be important
+to know what the standard deviation is. By and large, this is left to the
+performance analyst to do it by hand. In the event that the discrete event
+occurrences are useful to the performance analyst, then perf can be used.
+
+ $ perf stat --repeat 5 -e kmem:mm_page_alloc -e kmem:mm_page_free
+ -e kmem:mm_page_free_batched ./hackbench 10
+ Time: 0.890
+ Time: 0.895
+ Time: 0.915
+ Time: 1.001
+ Time: 0.899
+
+ Performance counter stats for './hackbench 10' (5 runs):
+
+ 16630 kmem:mm_page_alloc ( +- 3.542% )
+ 11486 kmem:mm_page_free ( +- 4.771% )
+ 4730 kmem:mm_page_free_batched ( +- 2.325% )
+
+ 0.982653002 seconds time elapsed ( +- 1.448% )
+
+In the event that some higher-level event is required that depends on some
+aggregation of discrete events, then a script would need to be developed.
+
+Using --repeat, it is also possible to view how events are fluctuating over
+time on a system-wide basis using -a and sleep.
+
+ $ perf stat -e kmem:mm_page_alloc -e kmem:mm_page_free \
+ -e kmem:mm_page_free_batched \
+ -a --repeat 10 \
+ sleep 1
+ Performance counter stats for 'sleep 1' (10 runs):
+
+ 1066 kmem:mm_page_alloc ( +- 26.148% )
+ 182 kmem:mm_page_free ( +- 5.464% )
+ 890 kmem:mm_page_free_batched ( +- 30.079% )
+
+ 1.002251757 seconds time elapsed ( +- 0.005% )
+
+6. Higher-Level Analysis with Helper Scripts
+============================================
+
+When events are enabled the events that are triggering can be read from
+/sys/kernel/debug/tracing/trace_pipe in human-readable format although binary
+options exist as well. By post-processing the output, further information can
+be gathered on-line as appropriate. Examples of post-processing might include
+
+ o Reading information from /proc for the PID that triggered the event
+ o Deriving a higher-level event from a series of lower-level events.
+ o Calculating latencies between two events
+
+Documentation/trace/postprocess/trace-pagealloc-postprocess.pl is an example
+script that can read trace_pipe from STDIN or a copy of a trace. When used
+on-line, it can be interrupted once to generate a report without exiting
+and twice to exit.
+
+Simplistically, the script just reads STDIN and counts up events but it
+also can do more such as
+
+ o Derive high-level events from many low-level events. If a number of pages
+ are freed to the main allocator from the per-CPU lists, it recognises
+ that as one per-CPU drain even though there is no specific tracepoint
+ for that event
+ o It can aggregate based on PID or individual process number
+ o In the event memory is getting externally fragmented, it reports
+ on whether the fragmentation event was severe or moderate.
+ o When receiving an event about a PID, it can record who the parent was so
+ that if large numbers of events are coming from very short-lived
+ processes, the parent process responsible for creating all the helpers
+ can be identified
+
+7. Lower-Level Analysis with PCL
+================================
+
+There may also be a requirement to identify what functions within a program
+were generating events within the kernel. To begin this sort of analysis, the
+data must be recorded. At the time of writing, this required root:
+
+ $ perf record -c 1 \
+ -e kmem:mm_page_alloc -e kmem:mm_page_free \
+ -e kmem:mm_page_free_batched \
+ ./hackbench 10
+ Time: 0.894
+ [ perf record: Captured and wrote 0.733 MB perf.data (~32010 samples) ]
+
+Note the use of '-c 1' to set the event period to sample. The default sample
+period is quite high to minimise overhead but the information collected can be
+very coarse as a result.
+
+This record outputted a file called perf.data which can be analysed using
+perf report.
+
+ $ perf report
+ # Samples: 30922
+ #
+ # Overhead Command Shared Object
+ # ........ ......... ................................
+ #
+ 87.27% hackbench [vdso]
+ 6.85% hackbench /lib/i686/cmov/libc-2.9.so
+ 2.62% hackbench /lib/ld-2.9.so
+ 1.52% perf [vdso]
+ 1.22% hackbench ./hackbench
+ 0.48% hackbench [kernel]
+ 0.02% perf /lib/i686/cmov/libc-2.9.so
+ 0.01% perf /usr/bin/perf
+ 0.01% perf /lib/ld-2.9.so
+ 0.00% hackbench /lib/i686/cmov/libpthread-2.9.so
+ #
+ # (For more details, try: perf report --sort comm,dso,symbol)
+ #
+
+According to this, the vast majority of events triggered on events
+within the VDSO. With simple binaries, this will often be the case so let's
+take a slightly different example. In the course of writing this, it was
+noticed that X was generating an insane amount of page allocations so let's look
+at it:
+
+ $ perf record -c 1 -f \
+ -e kmem:mm_page_alloc -e kmem:mm_page_free \
+ -e kmem:mm_page_free_batched \
+ -p `pidof X`
+
+This was interrupted after a few seconds and
+
+ $ perf report
+ # Samples: 27666
+ #
+ # Overhead Command Shared Object
+ # ........ ....... .......................................
+ #
+ 51.95% Xorg [vdso]
+ 47.95% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1
+ 0.09% Xorg /lib/i686/cmov/libc-2.9.so
+ 0.01% Xorg [kernel]
+ #
+ # (For more details, try: perf report --sort comm,dso,symbol)
+ #
+
+So, almost half of the events are occurring in a library. To get an idea which
+symbol:
+
+ $ perf report --sort comm,dso,symbol
+ # Samples: 27666
+ #
+ # Overhead Command Shared Object Symbol
+ # ........ ....... ....................................... ......
+ #
+ 51.95% Xorg [vdso] [.] 0x000000ffffe424
+ 47.93% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] pixmanFillsse2
+ 0.09% Xorg /lib/i686/cmov/libc-2.9.so [.] _int_malloc
+ 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] pixman_region32_copy_f
+ 0.01% Xorg [kernel] [k] read_hpet
+ 0.01% Xorg /opt/gfx-test/lib/libpixman-1.so.0.13.1 [.] get_fast_path
+ 0.00% Xorg [kernel] [k] ftrace_trace_userstack
+
+To see where within the function pixmanFillsse2 things are going wrong:
+
+ $ perf annotate pixmanFillsse2
+ [ ... ]
+ 0.00 : 34eeb: 0f 18 08 prefetcht0 (%eax)
+ : }
+ :
+ : extern __inline void __attribute__((__gnu_inline__, __always_inline__, _
+ : _mm_store_si128 (__m128i *__P, __m128i __B) : {
+ : *__P = __B;
+ 12.40 : 34eee: 66 0f 7f 80 40 ff ff movdqa %xmm0,-0xc0(%eax)
+ 0.00 : 34ef5: ff
+ 12.40 : 34ef6: 66 0f 7f 80 50 ff ff movdqa %xmm0,-0xb0(%eax)
+ 0.00 : 34efd: ff
+ 12.39 : 34efe: 66 0f 7f 80 60 ff ff movdqa %xmm0,-0xa0(%eax)
+ 0.00 : 34f05: ff
+ 12.67 : 34f06: 66 0f 7f 80 70 ff ff movdqa %xmm0,-0x90(%eax)
+ 0.00 : 34f0d: ff
+ 12.58 : 34f0e: 66 0f 7f 40 80 movdqa %xmm0,-0x80(%eax)
+ 12.31 : 34f13: 66 0f 7f 40 90 movdqa %xmm0,-0x70(%eax)
+ 12.40 : 34f18: 66 0f 7f 40 a0 movdqa %xmm0,-0x60(%eax)
+ 12.31 : 34f1d: 66 0f 7f 40 b0 movdqa %xmm0,-0x50(%eax)
+
+At a glance, it looks like the time is being spent copying pixmaps to
+the card. Further investigation would be needed to determine why pixmaps
+are being copied around so much but a starting point would be to take an
+ancient build of libpixmap out of the library path where it was totally
+forgotten about from months ago!
diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt
new file mode 100644
index 000000000..a3efac621
--- /dev/null
+++ b/Documentation/trace/tracepoints.txt
@@ -0,0 +1,145 @@
+ Using the Linux Kernel Tracepoints
+
+ Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Tracepoints and their use. It
+provides examples of how to insert tracepoints in the kernel and
+connect probe functions to them and provides some examples of probe
+functions.
+
+
+* Purpose of tracepoints
+
+A tracepoint placed in code provides a hook to call a function (probe)
+that you can provide at runtime. A tracepoint can be "on" (a probe is
+connected to it) or "off" (no probe is attached). When a tracepoint is
+"off" it has no effect, except for adding a tiny time penalty
+(checking a condition for a branch) and space penalty (adding a few
+bytes for the function call at the end of the instrumented function
+and adds a data structure in a separate section). When a tracepoint
+is "on", the function you provide is called each time the tracepoint
+is executed, in the execution context of the caller. When the function
+provided ends its execution, it returns to the caller (continuing from
+the tracepoint site).
+
+You can put tracepoints at important locations in the code. They are
+lightweight hooks that can pass an arbitrary number of parameters,
+which prototypes are described in a tracepoint declaration placed in a
+header file.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+Two elements are required for tracepoints :
+
+- A tracepoint definition, placed in a header file.
+- The tracepoint statement, in C code.
+
+In order to use tracepoints, you should include linux/tracepoint.h.
+
+In include/trace/events/subsys.h :
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM subsys
+
+#if !defined(_TRACE_SUBSYS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SUBSYS_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(subsys_eventname,
+ TP_PROTO(int firstarg, struct task_struct *p),
+ TP_ARGS(firstarg, p));
+
+#endif /* _TRACE_SUBSYS_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
+In subsys/file.c (where the tracing statement must be added) :
+
+#include <trace/events/subsys.h>
+
+#define CREATE_TRACE_POINTS
+DEFINE_TRACE(subsys_eventname);
+
+void somefct(void)
+{
+ ...
+ trace_subsys_eventname(arg, task);
+ ...
+}
+
+Where :
+- subsys_eventname is an identifier unique to your event
+ - subsys is the name of your subsystem.
+ - eventname is the name of the event to trace.
+
+- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the
+ function called by this tracepoint.
+
+- TP_ARGS(firstarg, p) are the parameters names, same as found in the
+ prototype.
+
+- if you use the header in multiple source files, #define CREATE_TRACE_POINTS
+ should appear only in one source file.
+
+Connecting a function (probe) to a tracepoint is done by providing a
+probe (function to call) for the specific tracepoint through
+register_trace_subsys_eventname(). Removing a probe is done through
+unregister_trace_subsys_eventname(); it will remove the probe.
+
+tracepoint_synchronize_unregister() must be called before the end of
+the module exit function to make sure there is no caller left using
+the probe. This, and the fact that preemption is disabled around the
+probe call, make sure that probe removal and module unload are safe.
+
+The tracepoint mechanism supports inserting multiple instances of the
+same tracepoint, but a single definition must be made of a given
+tracepoint name over all the kernel to make sure no type conflict will
+occur. Name mangling of the tracepoints is done using the prototypes
+to make sure typing is correct. Verification of probe type correctness
+is done at the registration site by the compiler. Tracepoints can be
+put in inline functions, inlined static functions, and unrolled loops
+as well as regular functions.
+
+The naming scheme "subsys_event" is suggested here as a convention
+intended to limit collisions. Tracepoint names are global to the
+kernel: they are considered as being the same whether they are in the
+core kernel image or in modules.
+
+If the tracepoint has to be used in kernel modules, an
+EXPORT_TRACEPOINT_SYMBOL_GPL() or EXPORT_TRACEPOINT_SYMBOL() can be
+used to export the defined tracepoints.
+
+If you need to do a bit of work for a tracepoint parameter, and
+that work is only used for the tracepoint, that work can be encapsulated
+within an if statement with the following:
+
+ if (trace_foo_bar_enabled()) {
+ int i;
+ int tot = 0;
+
+ for (i = 0; i < count; i++)
+ tot += calculate_nuggets();
+
+ trace_foo_bar(tot);
+ }
+
+All trace_<tracepoint>() calls have a matching trace_<tracepoint>_enabled()
+function defined that returns true if the tracepoint is enabled and
+false otherwise. The trace_<tracepoint>() should always be within the
+block of the if (trace_<tracepoint>_enabled()) to prevent races between
+the tracepoint being enabled and the check being seen.
+
+The advantage of using the trace_<tracepoint>_enabled() is that it uses
+the static_key of the tracepoint to allow the if statement to be implemented
+with jump labels and avoid conditional branches.
+
+Note: The convenience macro TRACE_EVENT provides an alternative way to
+ define tracepoints. Check http://lwn.net/Articles/379903,
+ http://lwn.net/Articles/381064 and http://lwn.net/Articles/383362
+ for a series of articles with more details.
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
new file mode 100644
index 000000000..f1cf9a34a
--- /dev/null
+++ b/Documentation/trace/uprobetracer.txt
@@ -0,0 +1,159 @@
+ Uprobe-tracer: Uprobe-based Event Tracing
+ =========================================
+
+ Documentation written by Srikar Dronamraju
+
+
+Overview
+--------
+Uprobe based trace events are similar to kprobe based trace events.
+To enable this feature, build your kernel with CONFIG_UPROBE_EVENT=y.
+
+Similar to the kprobe-event tracer, this doesn't need to be activated via
+current_tracer. Instead of that, add probe points via
+/sys/kernel/debug/tracing/uprobe_events, and enable it via
+/sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled.
+
+However unlike kprobe-event tracer, the uprobe event interface expects the
+user to calculate the offset of the probepoint in the object.
+
+Synopsis of uprobe_tracer
+-------------------------
+ p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe
+ r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe)
+ -:[GRP/]EVENT : Clear uprobe or uretprobe event
+
+ GRP : Group name. If omitted, "uprobes" is the default value.
+ EVENT : Event name. If omitted, the event name is generated based
+ on PATH+OFFSET.
+ PATH : Path to an executable or a library.
+ OFFSET : Offset where the probe is inserted.
+
+ FETCHARGS : Arguments. Each probe can have up to 128 args.
+ %REG : Fetch register REG
+ @ADDR : Fetch memory at ADDR (ADDR should be in userspace)
+ @+OFFSET : Fetch memory at OFFSET (OFFSET from same file as PATH)
+ $stackN : Fetch Nth entry of stack (N >= 0)
+ $stack : Fetch stack address.
+ $retval : Fetch return value.(*)
+ +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+ NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
+ FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
+ (u8/u16/u32/u64/s8/s16/s32/s64), "string" and bitfield
+ are supported.
+
+ (*) only for return probe.
+ (**) this is useful for fetching a field of data structures.
+
+Types
+-----
+Several types are supported for fetch-args. Uprobe tracer will access memory
+by given type. Prefix 's' and 'u' means those types are signed and unsigned
+respectively. Traced arguments are shown in decimal (signed) or hex (unsigned).
+String type is a special type, which fetches a "null-terminated" string from
+user space.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-
+offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
+
+Event Profiling
+---------------
+You can check the total number of probe hits and probe miss-hits via
+/sys/kernel/debug/tracing/uprobe_profile.
+The first column is event name, the second is the number of probe hits,
+the third is the number of probe miss-hits.
+
+Usage examples
+--------------
+ * Add a probe as a new uprobe event, write a new definition to uprobe_events
+as below: (sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash)
+
+ echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
+
+ * Add a probe as a new uretprobe event:
+
+ echo 'r: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events
+
+ * Unset registered event:
+
+ echo '-:bash_0x4245c0' >> /sys/kernel/debug/tracing/uprobe_events
+
+ * Print out the events that are registered:
+
+ cat /sys/kernel/debug/tracing/uprobe_events
+
+ * Clear all events:
+
+ echo > /sys/kernel/debug/tracing/uprobe_events
+
+Following example shows how to dump the instruction pointer and %ax register
+at the probed text address. Probe zfree function in /bin/zsh:
+
+ # cd /sys/kernel/debug/tracing/
+ # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
+ 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
+ # objdump -T /bin/zsh | grep -w zfree
+ 0000000000446420 g DF .text 0000000000000012 Base zfree
+
+ 0x46420 is the offset of zfree in object /bin/zsh that is loaded at
+ 0x00400000. Hence the command to uprobe would be:
+
+ # echo 'p:zfree_entry /bin/zsh:0x46420 %ip %ax' > uprobe_events
+
+ And the same for the uretprobe would be:
+
+ # echo 'r:zfree_exit /bin/zsh:0x46420 %ip %ax' >> uprobe_events
+
+Please note: User has to explicitly calculate the offset of the probe-point
+in the object. We can see the events that are registered by looking at the
+uprobe_events file.
+
+ # cat uprobe_events
+ p:uprobes/zfree_entry /bin/zsh:0x00046420 arg1=%ip arg2=%ax
+ r:uprobes/zfree_exit /bin/zsh:0x00046420 arg1=%ip arg2=%ax
+
+Format of events can be seen by viewing the file events/uprobes/zfree_entry/format
+
+ # cat events/uprobes/zfree_entry/format
+ name: zfree_entry
+ ID: 922
+ format:
+ field:unsigned short common_type; offset:0; size:2; signed:0;
+ field:unsigned char common_flags; offset:2; size:1; signed:0;
+ field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
+ field:int common_pid; offset:4; size:4; signed:1;
+ field:int common_padding; offset:8; size:4; signed:1;
+
+ field:unsigned long __probe_ip; offset:12; size:4; signed:0;
+ field:u32 arg1; offset:16; size:4; signed:0;
+ field:u32 arg2; offset:20; size:4; signed:0;
+
+ print fmt: "(%lx) arg1=%lx arg2=%lx", REC->__probe_ip, REC->arg1, REC->arg2
+
+Right after definition, each event is disabled by default. For tracing these
+events, you need to enable it by:
+
+ # echo 1 > events/uprobes/enable
+
+Lets disable the event after sleeping for some time.
+
+ # sleep 20
+ # echo 0 > events/uprobes/enable
+
+And you can see the traced information via /sys/kernel/debug/tracing/trace.
+
+ # cat trace
+ # tracer: nop
+ #
+ # TASK-PID CPU# TIMESTAMP FUNCTION
+ # | | | | |
+ zsh-24842 [006] 258544.995456: zfree_entry: (0x446420) arg1=446420 arg2=79
+ zsh-24842 [007] 258545.000270: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
+ zsh-24842 [002] 258545.043929: zfree_entry: (0x446420) arg1=446420 arg2=79
+ zsh-24842 [004] 258547.046129: zfree_exit: (0x446540 <- 0x446420) arg1=446540 arg2=0
+
+Output shows us uprobe was triggered for a pid 24842 with ip being 0x446420
+and contents of ax register being 79. And uretprobe was triggered with ip at
+0x446540 with counterpart function entry at 0x446420.
diff --git a/Documentation/unaligned-memory-access.txt b/Documentation/unaligned-memory-access.txt
new file mode 100644
index 000000000..a445da098
--- /dev/null
+++ b/Documentation/unaligned-memory-access.txt
@@ -0,0 +1,262 @@
+UNALIGNED MEMORY ACCESSES
+=========================
+
+Linux runs on a wide variety of architectures which have varying behaviour
+when it comes to memory access. This document presents some details about
+unaligned accesses, why you need to write code that doesn't cause them,
+and how to write such code!
+
+
+The definition of an unaligned access
+=====================================
+
+Unaligned memory accesses occur when you try to read N bytes of data starting
+from an address that is not evenly divisible by N (i.e. addr % N != 0).
+For example, reading 4 bytes of data from address 0x10004 is fine, but
+reading 4 bytes of data from address 0x10005 would be an unaligned memory
+access.
+
+The above may seem a little vague, as memory access can happen in different
+ways. The context here is at the machine code level: certain instructions read
+or write a number of bytes to or from memory (e.g. movb, movw, movl in x86
+assembly). As will become clear, it is relatively easy to spot C statements
+which will compile to multiple-byte memory access instructions, namely when
+dealing with types such as u16, u32 and u64.
+
+
+Natural alignment
+=================
+
+The rule mentioned above forms what we refer to as natural alignment:
+When accessing N bytes of memory, the base memory address must be evenly
+divisible by N, i.e. addr % N == 0.
+
+When writing code, assume the target architecture has natural alignment
+requirements.
+
+In reality, only a few architectures require natural alignment on all sizes
+of memory access. However, we must consider ALL supported architectures;
+writing code that satisfies natural alignment requirements is the easiest way
+to achieve full portability.
+
+
+Why unaligned access is bad
+===========================
+
+The effects of performing an unaligned memory access vary from architecture
+to architecture. It would be easy to write a whole document on the differences
+here; a summary of the common scenarios is presented below:
+
+ - Some architectures are able to perform unaligned memory accesses
+ transparently, but there is usually a significant performance cost.
+ - Some architectures raise processor exceptions when unaligned accesses
+ happen. The exception handler is able to correct the unaligned access,
+ at significant cost to performance.
+ - Some architectures raise processor exceptions when unaligned accesses
+ happen, but the exceptions do not contain enough information for the
+ unaligned access to be corrected.
+ - Some architectures are not capable of unaligned memory access, but will
+ silently perform a different memory access to the one that was requested,
+ resulting in a subtle code bug that is hard to detect!
+
+It should be obvious from the above that if your code causes unaligned
+memory accesses to happen, your code will not work correctly on certain
+platforms and will cause performance problems on others.
+
+
+Code that does not cause unaligned access
+=========================================
+
+At first, the concepts above may seem a little hard to relate to actual
+coding practice. After all, you don't have a great deal of control over
+memory addresses of certain variables, etc.
+
+Fortunately things are not too complex, as in most cases, the compiler
+ensures that things will work for you. For example, take the following
+structure:
+
+ struct foo {
+ u16 field1;
+ u32 field2;
+ u8 field3;
+ };
+
+Let us assume that an instance of the above structure resides in memory
+starting at address 0x10000. With a basic level of understanding, it would
+not be unreasonable to expect that accessing field2 would cause an unaligned
+access. You'd be expecting field2 to be located at offset 2 bytes into the
+structure, i.e. address 0x10002, but that address is not evenly divisible
+by 4 (remember, we're reading a 4 byte value here).
+
+Fortunately, the compiler understands the alignment constraints, so in the
+above case it would insert 2 bytes of padding in between field1 and field2.
+Therefore, for standard structure types you can always rely on the compiler
+to pad structures so that accesses to fields are suitably aligned (assuming
+you do not cast the field to a type of different length).
+
+Similarly, you can also rely on the compiler to align variables and function
+parameters to a naturally aligned scheme, based on the size of the type of
+the variable.
+
+At this point, it should be clear that accessing a single byte (u8 or char)
+will never cause an unaligned access, because all memory addresses are evenly
+divisible by one.
+
+On a related topic, with the above considerations in mind you may observe
+that you could reorder the fields in the structure in order to place fields
+where padding would otherwise be inserted, and hence reduce the overall
+resident memory size of structure instances. The optimal layout of the
+above example is:
+
+ struct foo {
+ u32 field2;
+ u16 field1;
+ u8 field3;
+ };
+
+For a natural alignment scheme, the compiler would only have to add a single
+byte of padding at the end of the structure. This padding is added in order
+to satisfy alignment constraints for arrays of these structures.
+
+Another point worth mentioning is the use of __attribute__((packed)) on a
+structure type. This GCC-specific attribute tells the compiler never to
+insert any padding within structures, useful when you want to use a C struct
+to represent some data that comes in a fixed arrangement 'off the wire'.
+
+You might be inclined to believe that usage of this attribute can easily
+lead to unaligned accesses when accessing fields that do not satisfy
+architectural alignment requirements. However, again, the compiler is aware
+of the alignment constraints and will generate extra instructions to perform
+the memory access in a way that does not cause unaligned access. Of course,
+the extra instructions obviously cause a loss in performance compared to the
+non-packed case, so the packed attribute should only be used when avoiding
+structure padding is of importance.
+
+
+Code that causes unaligned access
+=================================
+
+With the above in mind, let's move onto a real life example of a function
+that can cause an unaligned memory access. The following function taken
+from include/linux/etherdevice.h is an optimized routine to compare two
+ethernet MAC addresses for equality.
+
+bool ether_addr_equal(const u8 *addr1, const u8 *addr2)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ u32 fold = ((*(const u32 *)addr1) ^ (*(const u32 *)addr2)) |
+ ((*(const u16 *)(addr1 + 4)) ^ (*(const u16 *)(addr2 + 4)));
+
+ return fold == 0;
+#else
+ const u16 *a = (const u16 *)addr1;
+ const u16 *b = (const u16 *)addr2;
+ return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0;
+#endif
+}
+
+In the above function, when the hardware has efficient unaligned access
+capability, there is no issue with this code. But when the hardware isn't
+able to access memory on arbitrary boundaries, the reference to a[0] causes
+2 bytes (16 bits) to be read from memory starting at address addr1.
+
+Think about what would happen if addr1 was an odd address such as 0x10003.
+(Hint: it'd be an unaligned access.)
+
+Despite the potential unaligned access problems with the above function, it
+is included in the kernel anyway but is understood to only work normally on
+16-bit-aligned addresses. It is up to the caller to ensure this alignment or
+not use this function at all. This alignment-unsafe function is still useful
+as it is a decent optimization for the cases when you can ensure alignment,
+which is true almost all of the time in ethernet networking context.
+
+
+Here is another example of some code that could cause unaligned accesses:
+ void myfunc(u8 *data, u32 value)
+ {
+ [...]
+ *((u32 *) data) = cpu_to_le32(value);
+ [...]
+ }
+
+This code will cause unaligned accesses every time the data parameter points
+to an address that is not evenly divisible by 4.
+
+In summary, the 2 main scenarios where you may run into unaligned access
+problems involve:
+ 1. Casting variables to types of different lengths
+ 2. Pointer arithmetic followed by access to at least 2 bytes of data
+
+
+Avoiding unaligned accesses
+===========================
+
+The easiest way to avoid unaligned access is to use the get_unaligned() and
+put_unaligned() macros provided by the <asm/unaligned.h> header file.
+
+Going back to an earlier example of code that potentially causes unaligned
+access:
+
+ void myfunc(u8 *data, u32 value)
+ {
+ [...]
+ *((u32 *) data) = cpu_to_le32(value);
+ [...]
+ }
+
+To avoid the unaligned memory access, you would rewrite it as follows:
+
+ void myfunc(u8 *data, u32 value)
+ {
+ [...]
+ value = cpu_to_le32(value);
+ put_unaligned(value, (u32 *) data);
+ [...]
+ }
+
+The get_unaligned() macro works similarly. Assuming 'data' is a pointer to
+memory and you wish to avoid unaligned access, its usage is as follows:
+
+ u32 value = get_unaligned((u32 *) data);
+
+These macros work for memory accesses of any length (not just 32 bits as
+in the examples above). Be aware that when compared to standard access of
+aligned memory, using these macros to access unaligned memory can be costly in
+terms of performance.
+
+If use of such macros is not convenient, another option is to use memcpy(),
+where the source or destination (or both) are of type u8* or unsigned char*.
+Due to the byte-wise nature of this operation, unaligned accesses are avoided.
+
+
+Alignment vs. Networking
+========================
+
+On architectures that require aligned loads, networking requires that the IP
+header is aligned on a four-byte boundary to optimise the IP stack. For
+regular ethernet hardware, the constant NET_IP_ALIGN is used. On most
+architectures this constant has the value 2 because the normal ethernet
+header is 14 bytes long, so in order to get proper alignment one needs to
+DMA to an address which can be expressed as 4*n + 2. One notable exception
+here is powerpc which defines NET_IP_ALIGN to 0 because DMA to unaligned
+addresses can be very expensive and dwarf the cost of unaligned loads.
+
+For some ethernet hardware that cannot DMA to unaligned addresses like
+4*n+2 or non-ethernet hardware, this can be a problem, and it is then
+required to copy the incoming frame into an aligned buffer. Because this is
+unnecessary on architectures that can do unaligned accesses, the code can be
+made dependent on CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS like so:
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ skb = original skb
+#else
+ skb = copy skb
+#endif
+
+--
+Authors: Daniel Drake <dsd@gentoo.org>,
+ Johannes Berg <johannes@sipsolutions.net>
+With help from: Alan Cox, Avuton Olrich, Heikki Orsila, Jan Engelhardt,
+Kyle McMartin, Kyle Moffett, Randy Dunlap, Robert Hancock, Uli Kunitz,
+Vadim Lobanov
+
diff --git a/Documentation/unicode.txt b/Documentation/unicode.txt
new file mode 100644
index 000000000..4a33f81ca
--- /dev/null
+++ b/Documentation/unicode.txt
@@ -0,0 +1,175 @@
+ Last update: 2005-01-17, version 1.4
+
+This file is maintained by H. Peter Anvin <unicode@lanana.org> as part
+of the Linux Assigned Names And Numbers Authority (LANANA) project.
+The current version can be found at:
+
+ http://www.lanana.org/docs/unicode/unicode.txt
+
+ ------------------------
+
+The Linux kernel code has been rewritten to use Unicode to map
+characters to fonts. By downloading a single Unicode-to-font table,
+both the eight-bit character sets and UTF-8 mode are changed to use
+the font as indicated.
+
+This changes the semantics of the eight-bit character tables subtly.
+The four character tables are now:
+
+Map symbol Map name Escape code (G0)
+
+LAT1_MAP Latin-1 (ISO 8859-1) ESC ( B
+GRAF_MAP DEC VT100 pseudographics ESC ( 0
+IBMPC_MAP IBM code page 437 ESC ( U
+USER_MAP User defined ESC ( K
+
+In particular, ESC ( U is no longer "straight to font", since the font
+might be completely different than the IBM character set. This
+permits for example the use of block graphics even with a Latin-1 font
+loaded.
+
+Note that although these codes are similar to ISO 2022, neither the
+codes nor their uses match ISO 2022; Linux has two 8-bit codes (G0 and
+G1), whereas ISO 2022 has four 7-bit codes (G0-G3).
+
+In accordance with the Unicode standard/ISO 10646 the range U+F000 to
+U+F8FF has been reserved for OS-wide allocation (the Unicode Standard
+refers to this as a "Corporate Zone", since this is inaccurate for
+Linux we call it the "Linux Zone"). U+F000 was picked as the starting
+point since it lets the direct-mapping area start on a large power of
+two (in case 1024- or 2048-character fonts ever become necessary).
+This leaves U+E000 to U+EFFF as End User Zone.
+
+[v1.2]: The Unicodes range from U+F000 and up to U+F7FF have been
+hard-coded to map directly to the loaded font, bypassing the
+translation table. The user-defined map now defaults to U+F000 to
+U+F0FF, emulating the previous behaviour. In practice, this range
+might be shorter; for example, vgacon can only handle 256-character
+(U+F000..U+F0FF) or 512-character (U+F000..U+F1FF) fonts.
+
+
+Actual characters assigned in the Linux Zone
+--------------------------------------------
+
+In addition, the following characters not present in Unicode 1.1.4
+have been defined; these are used by the DEC VT graphics map. [v1.2]
+THIS USE IS OBSOLETE AND SHOULD NO LONGER BE USED; PLEASE SEE BELOW.
+
+U+F800 DEC VT GRAPHICS HORIZONTAL LINE SCAN 1
+U+F801 DEC VT GRAPHICS HORIZONTAL LINE SCAN 3
+U+F803 DEC VT GRAPHICS HORIZONTAL LINE SCAN 7
+U+F804 DEC VT GRAPHICS HORIZONTAL LINE SCAN 9
+
+The DEC VT220 uses a 6x10 character matrix, and these characters form
+a smooth progression in the DEC VT graphics character set. I have
+omitted the scan 5 line, since it is also used as a block-graphics
+character, and hence has been coded as U+2500 FORMS LIGHT HORIZONTAL.
+
+[v1.3]: These characters have been officially added to Unicode 3.2.0;
+they are added at U+23BA, U+23BB, U+23BC, U+23BD. Linux now uses the
+new values.
+
+[v1.2]: The following characters have been added to represent common
+keyboard symbols that are unlikely to ever be added to Unicode proper
+since they are horribly vendor-specific. This, of course, is an
+excellent example of horrible design.
+
+U+F810 KEYBOARD SYMBOL FLYING FLAG
+U+F811 KEYBOARD SYMBOL PULLDOWN MENU
+U+F812 KEYBOARD SYMBOL OPEN APPLE
+U+F813 KEYBOARD SYMBOL SOLID APPLE
+
+Klingon language support
+------------------------
+
+In 1996, Linux was the first operating system in the world to add
+support for the artificial language Klingon, created by Marc Okrand
+for the "Star Trek" television series. This encoding was later
+adopted by the ConScript Unicode Registry and proposed (but ultimately
+rejected) for inclusion in Unicode Plane 1. Thus, it remains as a
+Linux/CSUR private assignment in the Linux Zone.
+
+This encoding has been endorsed by the Klingon Language Institute.
+For more information, contact them at:
+
+ http://www.kli.org/
+
+Since the characters in the beginning of the Linux CZ have been more
+of the dingbats/symbols/forms type and this is a language, I have
+located it at the end, on a 16-cell boundary in keeping with standard
+Unicode practice.
+
+NOTE: This range is now officially managed by the ConScript Unicode
+Registry. The normative reference is at:
+
+ http://www.evertype.com/standards/csur/klingon.html
+
+Klingon has an alphabet of 26 characters, a positional numeric writing
+system with 10 digits, and is written left-to-right, top-to-bottom.
+
+Several glyph forms for the Klingon alphabet have been proposed.
+However, since the set of symbols appear to be consistent throughout,
+with only the actual shapes being different, in keeping with standard
+Unicode practice these differences are considered font variants.
+
+U+F8D0 KLINGON LETTER A
+U+F8D1 KLINGON LETTER B
+U+F8D2 KLINGON LETTER CH
+U+F8D3 KLINGON LETTER D
+U+F8D4 KLINGON LETTER E
+U+F8D5 KLINGON LETTER GH
+U+F8D6 KLINGON LETTER H
+U+F8D7 KLINGON LETTER I
+U+F8D8 KLINGON LETTER J
+U+F8D9 KLINGON LETTER L
+U+F8DA KLINGON LETTER M
+U+F8DB KLINGON LETTER N
+U+F8DC KLINGON LETTER NG
+U+F8DD KLINGON LETTER O
+U+F8DE KLINGON LETTER P
+U+F8DF KLINGON LETTER Q
+ - Written <q> in standard Okrand Latin transliteration
+U+F8E0 KLINGON LETTER QH
+ - Written <Q> in standard Okrand Latin transliteration
+U+F8E1 KLINGON LETTER R
+U+F8E2 KLINGON LETTER S
+U+F8E3 KLINGON LETTER T
+U+F8E4 KLINGON LETTER TLH
+U+F8E5 KLINGON LETTER U
+U+F8E6 KLINGON LETTER V
+U+F8E7 KLINGON LETTER W
+U+F8E8 KLINGON LETTER Y
+U+F8E9 KLINGON LETTER GLOTTAL STOP
+
+U+F8F0 KLINGON DIGIT ZERO
+U+F8F1 KLINGON DIGIT ONE
+U+F8F2 KLINGON DIGIT TWO
+U+F8F3 KLINGON DIGIT THREE
+U+F8F4 KLINGON DIGIT FOUR
+U+F8F5 KLINGON DIGIT FIVE
+U+F8F6 KLINGON DIGIT SIX
+U+F8F7 KLINGON DIGIT SEVEN
+U+F8F8 KLINGON DIGIT EIGHT
+U+F8F9 KLINGON DIGIT NINE
+
+U+F8FD KLINGON COMMA
+U+F8FE KLINGON FULL STOP
+U+F8FF KLINGON SYMBOL FOR EMPIRE
+
+Other Fictional and Artificial Scripts
+--------------------------------------
+
+Since the assignment of the Klingon Linux Unicode block, a registry of
+fictional and artificial scripts has been established by John Cowan
+<jcowan@reutershealth.com> and Michael Everson <everson@evertype.com>.
+The ConScript Unicode Registry is accessible at:
+
+ http://www.evertype.com/standards/csur/
+
+The ranges used fall at the low end of the End User Zone and can hence
+not be normatively assigned, but it is recommended that people who
+wish to encode fictional scripts use these codes, in the interest of
+interoperability. For Klingon, CSUR has adopted the Linux encoding.
+The CSUR people are driving adding Tengwar and Cirth into Unicode
+Plane 1; the addition of Klingon to Unicode Plane 1 has been rejected
+and so the above encoding remains official.
diff --git a/Documentation/unshare.txt b/Documentation/unshare.txt
new file mode 100644
index 000000000..a8643513a
--- /dev/null
+++ b/Documentation/unshare.txt
@@ -0,0 +1,295 @@
+
+unshare system call:
+--------------------
+This document describes the new system call, unshare. The document
+provides an overview of the feature, why it is needed, how it can
+be used, its interface specification, design, implementation and
+how it can be tested.
+
+Change Log:
+-----------
+version 0.1 Initial document, Janak Desai (janak@us.ibm.com), Jan 11, 2006
+
+Contents:
+---------
+ 1) Overview
+ 2) Benefits
+ 3) Cost
+ 4) Requirements
+ 5) Functional Specification
+ 6) High Level Design
+ 7) Low Level Design
+ 8) Test Specification
+ 9) Future Work
+
+1) Overview
+-----------
+Most legacy operating system kernels support an abstraction of threads
+as multiple execution contexts within a process. These kernels provide
+special resources and mechanisms to maintain these "threads". The Linux
+kernel, in a clever and simple manner, does not make distinction
+between processes and "threads". The kernel allows processes to share
+resources and thus they can achieve legacy "threads" behavior without
+requiring additional data structures and mechanisms in the kernel. The
+power of implementing threads in this manner comes not only from
+its simplicity but also from allowing application programmers to work
+outside the confinement of all-or-nothing shared resources of legacy
+threads. On Linux, at the time of thread creation using the clone system
+call, applications can selectively choose which resources to share
+between threads.
+
+unshare system call adds a primitive to the Linux thread model that
+allows threads to selectively 'unshare' any resources that were being
+shared at the time of their creation. unshare was conceptualized by
+Al Viro in the August of 2000, on the Linux-Kernel mailing list, as part
+of the discussion on POSIX threads on Linux. unshare augments the
+usefulness of Linux threads for applications that would like to control
+shared resources without creating a new process. unshare is a natural
+addition to the set of available primitives on Linux that implement
+the concept of process/thread as a virtual machine.
+
+2) Benefits
+-----------
+unshare would be useful to large application frameworks such as PAM
+where creating a new process to control sharing/unsharing of process
+resources is not possible. Since namespaces are shared by default
+when creating a new process using fork or clone, unshare can benefit
+even non-threaded applications if they have a need to disassociate
+from default shared namespace. The following lists two use-cases
+where unshare can be used.
+
+2.1 Per-security context namespaces
+-----------------------------------
+unshare can be used to implement polyinstantiated directories using
+the kernel's per-process namespace mechanism. Polyinstantiated directories,
+such as per-user and/or per-security context instance of /tmp, /var/tmp or
+per-security context instance of a user's home directory, isolate user
+processes when working with these directories. Using unshare, a PAM
+module can easily setup a private namespace for a user at login.
+Polyinstantiated directories are required for Common Criteria certification
+with Labeled System Protection Profile, however, with the availability
+of shared-tree feature in the Linux kernel, even regular Linux systems
+can benefit from setting up private namespaces at login and
+polyinstantiating /tmp, /var/tmp and other directories deemed
+appropriate by system administrators.
+
+2.2 unsharing of virtual memory and/or open files
+-------------------------------------------------
+Consider a client/server application where the server is processing
+client requests by creating processes that share resources such as
+virtual memory and open files. Without unshare, the server has to
+decide what needs to be shared at the time of creating the process
+which services the request. unshare allows the server an ability to
+disassociate parts of the context during the servicing of the
+request. For large and complex middleware application frameworks, this
+ability to unshare after the process was created can be very
+useful.
+
+3) Cost
+-------
+In order to not duplicate code and to handle the fact that unshare
+works on an active task (as opposed to clone/fork working on a newly
+allocated inactive task) unshare had to make minor reorganizational
+changes to copy_* functions utilized by clone/fork system call.
+There is a cost associated with altering existing, well tested and
+stable code to implement a new feature that may not get exercised
+extensively in the beginning. However, with proper design and code
+review of the changes and creation of an unshare test for the LTP
+the benefits of this new feature can exceed its cost.
+
+4) Requirements
+---------------
+unshare reverses sharing that was done using clone(2) system call,
+so unshare should have a similar interface as clone(2). That is,
+since flags in clone(int flags, void *stack) specifies what should
+be shared, similar flags in unshare(int flags) should specify
+what should be unshared. Unfortunately, this may appear to invert
+the meaning of the flags from the way they are used in clone(2).
+However, there was no easy solution that was less confusing and that
+allowed incremental context unsharing in future without an ABI change.
+
+unshare interface should accommodate possible future addition of
+new context flags without requiring a rebuild of old applications.
+If and when new context flags are added, unshare design should allow
+incremental unsharing of those resources on an as needed basis.
+
+5) Functional Specification
+---------------------------
+NAME
+ unshare - disassociate parts of the process execution context
+
+SYNOPSIS
+ #include <sched.h>
+
+ int unshare(int flags);
+
+DESCRIPTION
+ unshare allows a process to disassociate parts of its execution
+ context that are currently being shared with other processes. Part
+ of execution context, such as the namespace, is shared by default
+ when a new process is created using fork(2), while other parts,
+ such as the virtual memory, open file descriptors, etc, may be
+ shared by explicit request to share them when creating a process
+ using clone(2).
+
+ The main use of unshare is to allow a process to control its
+ shared execution context without creating a new process.
+
+ The flags argument specifies one or bitwise-or'ed of several of
+ the following constants.
+
+ CLONE_FS
+ If CLONE_FS is set, file system information of the caller
+ is disassociated from the shared file system information.
+
+ CLONE_FILES
+ If CLONE_FILES is set, the file descriptor table of the
+ caller is disassociated from the shared file descriptor
+ table.
+
+ CLONE_NEWNS
+ If CLONE_NEWNS is set, the namespace of the caller is
+ disassociated from the shared namespace.
+
+ CLONE_VM
+ If CLONE_VM is set, the virtual memory of the caller is
+ disassociated from the shared virtual memory.
+
+RETURN VALUE
+ On success, zero returned. On failure, -1 is returned and errno is
+
+ERRORS
+ EPERM CLONE_NEWNS was specified by a non-root process (process
+ without CAP_SYS_ADMIN).
+
+ ENOMEM Cannot allocate sufficient memory to copy parts of caller's
+ context that need to be unshared.
+
+ EINVAL Invalid flag was specified as an argument.
+
+CONFORMING TO
+ The unshare() call is Linux-specific and should not be used
+ in programs intended to be portable.
+
+SEE ALSO
+ clone(2), fork(2)
+
+6) High Level Design
+--------------------
+Depending on the flags argument, the unshare system call allocates
+appropriate process context structures, populates it with values from
+the current shared version, associates newly duplicated structures
+with the current task structure and releases corresponding shared
+versions. Helper functions of clone (copy_*) could not be used
+directly by unshare because of the following two reasons.
+ 1) clone operates on a newly allocated not-yet-active task
+ structure, where as unshare operates on the current active
+ task. Therefore unshare has to take appropriate task_lock()
+ before associating newly duplicated context structures
+ 2) unshare has to allocate and duplicate all context structures
+ that are being unshared, before associating them with the
+ current task and releasing older shared structures. Failure
+ do so will create race conditions and/or oops when trying
+ to backout due to an error. Consider the case of unsharing
+ both virtual memory and namespace. After successfully unsharing
+ vm, if the system call encounters an error while allocating
+ new namespace structure, the error return code will have to
+ reverse the unsharing of vm. As part of the reversal the
+ system call will have to go back to older, shared, vm
+ structure, which may not exist anymore.
+
+Therefore code from copy_* functions that allocated and duplicated
+current context structure was moved into new dup_* functions. Now,
+copy_* functions call dup_* functions to allocate and duplicate
+appropriate context structures and then associate them with the
+task structure that is being constructed. unshare system call on
+the other hand performs the following:
+ 1) Check flags to force missing, but implied, flags
+ 2) For each context structure, call the corresponding unshare
+ helper function to allocate and duplicate a new context
+ structure, if the appropriate bit is set in the flags argument.
+ 3) If there is no error in allocation and duplication and there
+ are new context structures then lock the current task structure,
+ associate new context structures with the current task structure,
+ and release the lock on the current task structure.
+ 4) Appropriately release older, shared, context structures.
+
+7) Low Level Design
+-------------------
+Implementation of unshare can be grouped in the following 4 different
+items:
+ a) Reorganization of existing copy_* functions
+ b) unshare system call service function
+ c) unshare helper functions for each different process context
+ d) Registration of system call number for different architectures
+
+ 7.1) Reorganization of copy_* functions
+ Each copy function such as copy_mm, copy_namespace, copy_files,
+ etc, had roughly two components. The first component allocated
+ and duplicated the appropriate structure and the second component
+ linked it to the task structure passed in as an argument to the copy
+ function. The first component was split into its own function.
+ These dup_* functions allocated and duplicated the appropriate
+ context structure. The reorganized copy_* functions invoked
+ their corresponding dup_* functions and then linked the newly
+ duplicated structures to the task structure with which the
+ copy function was called.
+
+ 7.2) unshare system call service function
+ * Check flags
+ Force implied flags. If CLONE_THREAD is set force CLONE_VM.
+ If CLONE_VM is set, force CLONE_SIGHAND. If CLONE_SIGHAND is
+ set and signals are also being shared, force CLONE_THREAD. If
+ CLONE_NEWNS is set, force CLONE_FS.
+ * For each context flag, invoke the corresponding unshare_*
+ helper routine with flags passed into the system call and a
+ reference to pointer pointing the new unshared structure
+ * If any new structures are created by unshare_* helper
+ functions, take the task_lock() on the current task,
+ modify appropriate context pointers, and release the
+ task lock.
+ * For all newly unshared structures, release the corresponding
+ older, shared, structures.
+
+ 7.3) unshare_* helper functions
+ For unshare_* helpers corresponding to CLONE_SYSVSEM, CLONE_SIGHAND,
+ and CLONE_THREAD, return -EINVAL since they are not implemented yet.
+ For others, check the flag value to see if the unsharing is
+ required for that structure. If it is, invoke the corresponding
+ dup_* function to allocate and duplicate the structure and return
+ a pointer to it.
+
+ 7.4) Appropriately modify architecture specific code to register the
+ new system call.
+
+8) Test Specification
+---------------------
+The test for unshare should test the following:
+ 1) Valid flags: Test to check that clone flags for signal and
+ signal handlers, for which unsharing is not implemented
+ yet, return -EINVAL.
+ 2) Missing/implied flags: Test to make sure that if unsharing
+ namespace without specifying unsharing of filesystem, correctly
+ unshares both namespace and filesystem information.
+ 3) For each of the four (namespace, filesystem, files and vm)
+ supported unsharing, verify that the system call correctly
+ unshares the appropriate structure. Verify that unsharing
+ them individually as well as in combination with each
+ other works as expected.
+ 4) Concurrent execution: Use shared memory segments and futex on
+ an address in the shm segment to synchronize execution of
+ about 10 threads. Have a couple of threads execute execve,
+ a couple _exit and the rest unshare with different combination
+ of flags. Verify that unsharing is performed as expected and
+ that there are no oops or hangs.
+
+9) Future Work
+--------------
+The current implementation of unshare does not allow unsharing of
+signals and signal handlers. Signals are complex to begin with and
+to unshare signals and/or signal handlers of a currently running
+process is even more complex. If in the future there is a specific
+need to allow unsharing of signals and/or signal handlers, it can
+be incrementally added to unshare without affecting legacy
+applications using unshare.
+
diff --git a/Documentation/usb/CREDITS b/Documentation/usb/CREDITS
new file mode 100644
index 000000000..67c59cdc9
--- /dev/null
+++ b/Documentation/usb/CREDITS
@@ -0,0 +1,175 @@
+Credits for the Simple Linux USB Driver:
+
+The following people have contributed to this code (in alphabetical
+order by last name). I'm sure this list should be longer, its
+difficult to maintain, add yourself with a patch if desired.
+
+ Georg Acher <acher@informatik.tu-muenchen.de>
+ David Brownell <dbrownell@users.sourceforge.net>
+ Alan Cox <alan@lxorguk.ukuu.org.uk>
+ Randy Dunlap <randy.dunlap@intel.com>
+ Johannes Erdfelt <johannes@erdfelt.com>
+ Deti Fliegl <deti@fliegl.de>
+ ham <ham@unsuave.com>
+ Bradley M Keryan <keryan@andrew.cmu.edu>
+ Greg Kroah-Hartman <greg@kroah.com>
+ Pavel Machek <pavel@suse.cz>
+ Paul Mackerras <paulus@cs.anu.edu.au>
+ Petko Manlolov <petkan@dce.bg>
+ David E. Nelson <dnelson@jump.net>
+ Vojtech Pavlik <vojtech@suse.cz>
+ Bill Ryder <bryder@sgi.com>
+ Thomas Sailer <sailer@ife.ee.ethz.ch>
+ Gregory P. Smith <greg@electricrain.com>
+ Linus Torvalds <torvalds@linux-foundation.org>
+ Roman Weissgaerber <weissg@vienna.at>
+ <Kazuki.Yasumatsu@fujixerox.co.jp>
+
+Special thanks to:
+
+ Inaky Perez Gonzalez <inaky@peloncho.fis.ucm.es> for starting the
+ Linux USB driver effort and writing much of the larger uusbd driver.
+ Much has been learned from that effort.
+
+ The NetBSD & FreeBSD USB developers. For being on the Linux USB list
+ and offering suggestions and sharing implementation experiences.
+
+Additional thanks to the following companies and people for donations
+of hardware, support, time and development (this is from the original
+THANKS file in Inaky's driver):
+
+ The following corporations have helped us in the development
+ of Linux USB / UUSBD:
+
+ - 3Com GmbH for donating a ISDN Pro TA and supporting me
+ in technical questions and with test equipment. I'd never
+ expect such a great help.
+
+ - USAR Systems provided us with one of their excellent USB
+ Evaluation Kits. It allows us to test the Linux-USB driver
+ for compliance with the latest USB specification. USAR
+ Systems recognized the importance of an up-to-date open
+ Operating System and supports this project with
+ Hardware. Thanks!.
+
+ - Thanks to Intel Corporation for their precious help.
+
+ - We teamed up with Cherry to make Linux the first OS with
+ built-in USB support. Cherry is one of the biggest keyboard
+ makers in the world.
+
+ - CMD Technology, Inc. sponsored us kindly donating a CSA-6700
+ PCI-to-USB Controller Board to test the OHCI implementation.
+
+ - Due to their support to us, Keytronic can be sure that they
+ will sell keyboards to some of the 3 million (at least)
+ Linux users.
+
+ - Many thanks to ing büro h doran [http://www.ibhdoran.com]!
+ It was almost impossible to get a PC backplate USB connector
+ for the motherboard here at Europe (mine, home-made, was
+ quite lousy :). Now I know where to acquire nice USB stuff!
+
+ - Genius Germany donated a USB mouse to test the mouse boot
+ protocol. They've also donated a F-23 digital joystick and a
+ NetMouse Pro. Thanks!
+
+ - AVM GmbH Berlin is supporting the development of the Linux
+ USB driver for the AVM ISDN Controller B1 USB. AVM is a
+ leading manufacturer for active and passive ISDN Controllers
+ and CAPI 2.0-based software. The active design of the AVM B1
+ is open for all OS platforms, including Linux.
+
+ - Thanks to Y-E Data, Inc. for donating their FlashBuster-U
+ USB Floppy Disk Drive, so we could test the bulk transfer
+ code.
+
+ - Many thanks to Logitech for contributing a three axis USB
+ mouse.
+
+ Logitech designs, manufactures and markets
+ Human Interface Devices, having a long history and
+ experience in making devices such as keyboards, mice,
+ trackballs, cameras, loudspeakers and control devices for
+ gaming and professional use.
+
+ Being a recognized vendor and seller for all these devices,
+ they have donated USB mice, a joystick and a scanner, as a
+ way to acknowledge the importance of Linux and to allow
+ Logitech customers to enjoy support in their favorite
+ operating systems and all Linux users to use Logitech and
+ other USB hardware.
+
+ Logitech is official sponsor of the Linux Conference on
+ Feb. 11th 1999 in Vienna, where we'll will present the
+ current state of the Linux USB effort.
+
+ - CATC has provided means to uncover dark corners of the UHCI
+ inner workings with a USB Inspector.
+
+ - Thanks to Entrega for providing PCI to USB cards, hubs and
+ converter products for development.
+
+ - Thanks to ConnectTech for providing a WhiteHEAT usb to
+ serial converter, and the documentation for the device to
+ allow a driver to be written.
+
+ - Thanks to ADMtek for providing Pegasus and Pegasus II
+ evaluation boards, specs and valuable advices during
+ the driver development.
+
+ And thanks go to (hey! in no particular order :)
+
+ - Oren Tirosh <orenti@hishome.net>, for standing so patiently
+ all my doubts'bout USB and giving lots of cool ideas.
+
+ - Jochen Karrer <karrer@wpfd25.physik.uni-wuerzburg.de>, for
+ pointing out mortal bugs and giving advice.
+
+ - Edmund Humemberger <ed@atnet.at>, for it's great work on
+ public relationships and general management stuff for the
+ Linux-USB effort.
+
+ - Alberto Menegazzi <flash@flash.iol.it> is starting the
+ documentation for the UUSBD. Go for it!
+
+ - Ric Klaren <ia_ric@cs.utwente.nl> for doing nice
+ introductory documents (competing with Alberto's :).
+
+ - Christian Groessler <cpg@aladdin.de>, for it's help on those
+ itchy bits ... :)
+
+ - Paul MacKerras for polishing OHCI and pushing me harder for
+ the iMac support, giving improvements and enhancements.
+
+ - Fernando Herrera <fherrera@eurielec.etsit.upm.es> has taken
+ charge of composing, maintaining and feeding the
+ long-awaited, unique and marvelous UUSBD FAQ! Tadaaaa!!!
+
+ - Rasca Gmelch <thron@gmx.de> has revived the raw driver and
+ pointed bugs, as well as started the uusbd-utils package.
+
+ - Peter Dettori <dettori@ozy.dec.com> is uncovering bugs like
+ crazy, as well as making cool suggestions, great :)
+
+ - All the Free Software and Linux community, the FSF & the GNU
+ project, the MIT X consortium, the TeX people ... everyone!
+ You know who you are!
+
+ - Big thanks to Richard Stallman for creating Emacs!
+
+ - The people at the linux-usb mailing list, for reading so
+ many messages :) Ok, no more kidding; for all your advises!
+
+ - All the people at the USB Implementors Forum for their
+ help and assistance.
+
+ - Nathan Myers <ncm@cantrip.org>, for his advice! (hope you
+ liked Cibeles' party).
+
+ - Linus Torvalds, for starting, developing and managing Linux.
+
+ - Mike Smith, Craig Keithley, Thierry Giron and Janet Schank
+ for convincing me USB Standard hubs are not that standard
+ and that's good to allow for vendor specific quirks on the
+ standard hub driver.
diff --git a/Documentation/usb/URB.txt b/Documentation/usb/URB.txt
new file mode 100644
index 000000000..50da0d455
--- /dev/null
+++ b/Documentation/usb/URB.txt
@@ -0,0 +1,261 @@
+Revised: 2000-Dec-05.
+Again: 2002-Jul-06
+Again: 2005-Sep-19
+
+ NOTE:
+
+ The USB subsystem now has a substantial section in "The Linux Kernel API"
+ guide (in Documentation/DocBook), generated from the current source
+ code. This particular documentation file isn't particularly current or
+ complete; don't rely on it except for a quick overview.
+
+
+1.1. Basic concept or 'What is an URB?'
+
+The basic idea of the new driver is message passing, the message itself is
+called USB Request Block, or URB for short.
+
+- An URB consists of all relevant information to execute any USB transaction
+ and deliver the data and status back.
+
+- Execution of an URB is inherently an asynchronous operation, i.e. the
+ usb_submit_urb(urb) call returns immediately after it has successfully
+ queued the requested action.
+
+- Transfers for one URB can be canceled with usb_unlink_urb(urb) at any time.
+
+- Each URB has a completion handler, which is called after the action
+ has been successfully completed or canceled. The URB also contains a
+ context-pointer for passing information to the completion handler.
+
+- Each endpoint for a device logically supports a queue of requests.
+ You can fill that queue, so that the USB hardware can still transfer
+ data to an endpoint while your driver handles completion of another.
+ This maximizes use of USB bandwidth, and supports seamless streaming
+ of data to (or from) devices when using periodic transfer modes.
+
+
+1.2. The URB structure
+
+Some of the fields in an URB are:
+
+struct urb
+{
+// (IN) device and pipe specify the endpoint queue
+ struct usb_device *dev; // pointer to associated USB device
+ unsigned int pipe; // endpoint information
+
+ unsigned int transfer_flags; // ISO_ASAP, SHORT_NOT_OK, etc.
+
+// (IN) all urbs need completion routines
+ void *context; // context for completion routine
+ void (*complete)(struct urb *); // pointer to completion routine
+
+// (OUT) status after each completion
+ int status; // returned status
+
+// (IN) buffer used for data transfers
+ void *transfer_buffer; // associated data buffer
+ int transfer_buffer_length; // data buffer length
+ int number_of_packets; // size of iso_frame_desc
+
+// (OUT) sometimes only part of CTRL/BULK/INTR transfer_buffer is used
+ int actual_length; // actual data buffer length
+
+// (IN) setup stage for CTRL (pass a struct usb_ctrlrequest)
+ unsigned char* setup_packet; // setup packet (control only)
+
+// Only for PERIODIC transfers (ISO, INTERRUPT)
+ // (IN/OUT) start_frame is set unless ISO_ASAP isn't set
+ int start_frame; // start frame
+ int interval; // polling interval
+
+ // ISO only: packets are only "best effort"; each can have errors
+ int error_count; // number of errors
+ struct usb_iso_packet_descriptor iso_frame_desc[0];
+};
+
+Your driver must create the "pipe" value using values from the appropriate
+endpoint descriptor in an interface that it's claimed.
+
+
+1.3. How to get an URB?
+
+URBs are allocated with the following call
+
+ struct urb *usb_alloc_urb(int isoframes, int mem_flags)
+
+Return value is a pointer to the allocated URB, 0 if allocation failed.
+The parameter isoframes specifies the number of isochronous transfer frames
+you want to schedule. For CTRL/BULK/INT, use 0. The mem_flags parameter
+holds standard memory allocation flags, letting you control (among other
+things) whether the underlying code may block or not.
+
+To free an URB, use
+
+ void usb_free_urb(struct urb *urb)
+
+You may free an urb that you've submitted, but which hasn't yet been
+returned to you in a completion callback. It will automatically be
+deallocated when it is no longer in use.
+
+
+1.4. What has to be filled in?
+
+Depending on the type of transaction, there are some inline functions
+defined in <linux/usb.h> to simplify the initialization, such as
+fill_control_urb() and fill_bulk_urb(). In general, they need the usb
+device pointer, the pipe (usual format from usb.h), the transfer buffer,
+the desired transfer length, the completion handler, and its context.
+Take a look at the some existing drivers to see how they're used.
+
+Flags:
+For ISO there are two startup behaviors: Specified start_frame or ASAP.
+For ASAP set URB_ISO_ASAP in transfer_flags.
+
+If short packets should NOT be tolerated, set URB_SHORT_NOT_OK in
+transfer_flags.
+
+
+1.5. How to submit an URB?
+
+Just call
+
+ int usb_submit_urb(struct urb *urb, int mem_flags)
+
+The mem_flags parameter, such as SLAB_ATOMIC, controls memory allocation,
+such as whether the lower levels may block when memory is tight.
+
+It immediately returns, either with status 0 (request queued) or some
+error code, usually caused by the following:
+
+- Out of memory (-ENOMEM)
+- Unplugged device (-ENODEV)
+- Stalled endpoint (-EPIPE)
+- Too many queued ISO transfers (-EAGAIN)
+- Too many requested ISO frames (-EFBIG)
+- Invalid INT interval (-EINVAL)
+- More than one packet for INT (-EINVAL)
+
+After submission, urb->status is -EINPROGRESS; however, you should never
+look at that value except in your completion callback.
+
+For isochronous endpoints, your completion handlers should (re)submit
+URBs to the same endpoint with the ISO_ASAP flag, using multi-buffering,
+to get seamless ISO streaming.
+
+
+1.6. How to cancel an already running URB?
+
+There are two ways to cancel an URB you've submitted but which hasn't
+been returned to your driver yet. For an asynchronous cancel, call
+
+ int usb_unlink_urb(struct urb *urb)
+
+It removes the urb from the internal list and frees all allocated
+HW descriptors. The status is changed to reflect unlinking. Note
+that the URB will not normally have finished when usb_unlink_urb()
+returns; you must still wait for the completion handler to be called.
+
+To cancel an URB synchronously, call
+
+ void usb_kill_urb(struct urb *urb)
+
+It does everything usb_unlink_urb does, and in addition it waits
+until after the URB has been returned and the completion handler
+has finished. It also marks the URB as temporarily unusable, so
+that if the completion handler or anyone else tries to resubmit it
+they will get a -EPERM error. Thus you can be sure that when
+usb_kill_urb() returns, the URB is totally idle.
+
+There is a lifetime issue to consider. An URB may complete at any
+time, and the completion handler may free the URB. If this happens
+while usb_unlink_urb or usb_kill_urb is running, it will cause a
+memory-access violation. The driver is responsible for avoiding this,
+which often means some sort of lock will be needed to prevent the URB
+from being deallocated while it is still in use.
+
+On the other hand, since usb_unlink_urb may end up calling the
+completion handler, the handler must not take any lock that is held
+when usb_unlink_urb is invoked. The general solution to this problem
+is to increment the URB's reference count while holding the lock, then
+drop the lock and call usb_unlink_urb or usb_kill_urb, and then
+decrement the URB's reference count. You increment the reference
+count by calling
+
+ struct urb *usb_get_urb(struct urb *urb)
+
+(ignore the return value; it is the same as the argument) and
+decrement the reference count by calling usb_free_urb. Of course,
+none of this is necessary if there's no danger of the URB being freed
+by the completion handler.
+
+
+1.7. What about the completion handler?
+
+The handler is of the following type:
+
+ typedef void (*usb_complete_t)(struct urb *)
+
+I.e., it gets the URB that caused the completion call. In the completion
+handler, you should have a look at urb->status to detect any USB errors.
+Since the context parameter is included in the URB, you can pass
+information to the completion handler.
+
+Note that even when an error (or unlink) is reported, data may have been
+transferred. That's because USB transfers are packetized; it might take
+sixteen packets to transfer your 1KByte buffer, and ten of them might
+have transferred successfully before the completion was called.
+
+
+NOTE: ***** WARNING *****
+NEVER SLEEP IN A COMPLETION HANDLER. These are often called in atomic
+context.
+
+In the current kernel, completion handlers run with local interrupts
+disabled, but in the future this will be changed, so don't assume that
+local IRQs are always disabled inside completion handlers.
+
+1.8. How to do isochronous (ISO) transfers?
+
+For ISO transfers you have to fill a usb_iso_packet_descriptor structure,
+allocated at the end of the URB by usb_alloc_urb(n,mem_flags), for each
+packet you want to schedule. You also have to set urb->interval to say
+how often to make transfers; it's often one per frame (which is once
+every microframe for highspeed devices). The actual interval used will
+be a power of two that's no bigger than what you specify.
+
+The usb_submit_urb() call modifies urb->interval to the implemented interval
+value that is less than or equal to the requested interval value. If
+ISO_ASAP scheduling is used, urb->start_frame is also updated.
+
+For each entry you have to specify the data offset for this frame (base is
+transfer_buffer), and the length you want to write/expect to read.
+After completion, actual_length contains the actual transferred length and
+status contains the resulting status for the ISO transfer for this frame.
+It is allowed to specify a varying length from frame to frame (e.g. for
+audio synchronisation/adaptive transfer rates). You can also use the length
+0 to omit one or more frames (striping).
+
+For scheduling you can choose your own start frame or ISO_ASAP. As explained
+earlier, if you always keep at least one URB queued and your completion
+keeps (re)submitting a later URB, you'll get smooth ISO streaming (if usb
+bandwidth utilization allows).
+
+If you specify your own start frame, make sure it's several frames in advance
+of the current frame. You might want this model if you're synchronizing
+ISO data with some other event stream.
+
+
+1.9. How to start interrupt (INT) transfers?
+
+Interrupt transfers, like isochronous transfers, are periodic, and happen
+in intervals that are powers of two (1, 2, 4 etc) units. Units are frames
+for full and low speed devices, and microframes for high speed ones.
+The usb_submit_urb() call modifies urb->interval to the implemented interval
+value that is less than or equal to the requested interval value.
+
+In Linux 2.6, unlike earlier versions, interrupt URBs are not automagically
+restarted when they complete. They end when the completion handler is
+called, just like other URBs. If you want an interrupt URB to be restarted,
+your completion handler must resubmit it.
diff --git a/Documentation/usb/WUSB-Design-overview.txt b/Documentation/usb/WUSB-Design-overview.txt
new file mode 100644
index 000000000..fdb476377
--- /dev/null
+++ b/Documentation/usb/WUSB-Design-overview.txt
@@ -0,0 +1,439 @@
+
+Linux UWB + Wireless USB + WiNET
+
+ (C) 2005-2006 Intel Corporation
+ Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License version
+ 2 as published by the Free Software Foundation.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+
+Please visit http://bughost.org/thewiki/Design-overview.txt-1.8 for
+updated content.
+
+ * Design-overview.txt-1.8
+
+This code implements a Ultra Wide Band stack for Linux, as well as
+drivers for the USB based UWB radio controllers defined in the
+Wireless USB 1.0 specification (including Wireless USB host controller
+and an Intel WiNET controller).
+
+ 1. Introduction
+ 1. HWA: Host Wire adapters, your Wireless USB dongle
+
+ 2. DWA: Device Wired Adaptor, a Wireless USB hub for wired
+ devices
+ 3. WHCI: Wireless Host Controller Interface, the PCI WUSB host
+ adapter
+ 2. The UWB stack
+ 1. Devices and hosts: the basic structure
+
+ 2. Host Controller life cycle
+
+ 3. On the air: beacons and enumerating the radio neighborhood
+
+ 4. Device lists
+ 5. Bandwidth allocation
+
+ 3. Wireless USB Host Controller drivers
+
+ 4. Glossary
+
+
+ Introduction
+
+UWB is a wide-band communication protocol that is to serve also as the
+low-level protocol for others (much like TCP sits on IP). Currently
+these others are Wireless USB and TCP/IP, but seems Bluetooth and
+Firewire/1394 are coming along.
+
+UWB uses a band from roughly 3 to 10 GHz, transmitting at a max of
+~-41dB (or 0.074 uW/MHz--geography specific data is still being
+negotiated w/ regulators, so watch for changes). That band is divided in
+a bunch of ~1.5 GHz wide channels (or band groups) composed of three
+subbands/subchannels (528 MHz each). Each channel is independent of each
+other, so you could consider them different "busses". Initially this
+driver considers them all a single one.
+
+Radio time is divided in 65536 us long /superframes/, each one divided
+in 256 256us long /MASs/ (Media Allocation Slots), which are the basic
+time/media allocation units for transferring data. At the beginning of
+each superframe there is a Beacon Period (BP), where every device
+transmit its beacon on a single MAS. The length of the BP depends on how
+many devices are present and the length of their beacons.
+
+Devices have a MAC (fixed, 48 bit address) and a device (changeable, 16
+bit address) and send periodic beacons to advertise themselves and pass
+info on what they are and do. They advertise their capabilities and a
+bunch of other stuff.
+
+The different logical parts of this driver are:
+
+ *
+
+ *UWB*: the Ultra-Wide-Band stack -- manages the radio and
+ associated spectrum to allow for devices sharing it. Allows to
+ control bandwidth assignment, beaconing, scanning, etc
+
+ *
+
+ *WUSB*: the layer that sits on top of UWB to provide Wireless USB.
+ The Wireless USB spec defines means to control a UWB radio and to
+ do the actual WUSB.
+
+
+ HWA: Host Wire adapters, your Wireless USB dongle
+
+WUSB also defines a device called a Host Wire Adaptor (HWA), which in
+mere terms is a USB dongle that enables your PC to have UWB and Wireless
+USB. The Wireless USB Host Controller in a HWA looks to the host like a
+[Wireless] USB controller connected via USB (!)
+
+The HWA itself is broken in two or three main interfaces:
+
+ *
+
+ *RC*: Radio control -- this implements an interface to the
+ Ultra-Wide-Band radio controller. The driver for this implements a
+ USB-based UWB Radio Controller to the UWB stack.
+
+ *
+
+ *HC*: the wireless USB host controller. It looks like a USB host
+ whose root port is the radio and the WUSB devices connect to it.
+ To the system it looks like a separate USB host. The driver (will)
+ implement a USB host controller (similar to UHCI, OHCI or EHCI)
+ for which the root hub is the radio...To reiterate: it is a USB
+ controller that is connected via USB instead of PCI.
+
+ *
+
+ *WINET*: some HW provide a WiNET interface (IP over UWB). This
+ package provides a driver for it (it looks like a network
+ interface, winetX). The driver detects when there is a link up for
+ their type and kick into gear.
+
+
+ DWA: Device Wired Adaptor, a Wireless USB hub for wired devices
+
+These are the complement to HWAs. They are a USB host for connecting
+wired devices, but it is connected to your PC connected via Wireless
+USB. To the system it looks like yet another USB host. To the untrained
+eye, it looks like a hub that connects upstream wirelessly.
+
+We still offer no support for this; however, it should share a lot of
+code with the HWA-RC driver; there is a bunch of factorization work that
+has been done to support that in upcoming releases.
+
+
+ WHCI: Wireless Host Controller Interface, the PCI WUSB host adapter
+
+This is your usual PCI device that implements WHCI. Similar in concept
+to EHCI, it allows your wireless USB devices (including DWAs) to connect
+to your host via a PCI interface. As in the case of the HWA, it has a
+Radio Control interface and the WUSB Host Controller interface per se.
+
+There is still no driver support for this, but will be in upcoming
+releases.
+
+
+ The UWB stack
+
+The main mission of the UWB stack is to keep a tally of which devices
+are in radio proximity to allow drivers to connect to them. As well, it
+provides an API for controlling the local radio controllers (RCs from
+now on), such as to start/stop beaconing, scan, allocate bandwidth, etc.
+
+
+ Devices and hosts: the basic structure
+
+The main building block here is the UWB device (struct uwb_dev). For
+each device that pops up in radio presence (ie: the UWB host receives a
+beacon from it) you get a struct uwb_dev that will show up in
+/sys/bus/uwb/devices.
+
+For each RC that is detected, a new struct uwb_rc and struct uwb_dev are
+created. An entry is also created in /sys/class/uwb_rc for each RC.
+
+Each RC driver is implemented by a separate driver that plugs into the
+interface that the UWB stack provides through a struct uwb_rc_ops. The
+spec creators have been nice enough to make the message format the same
+for HWA and WHCI RCs, so the driver is really a very thin transport that
+moves the requests from the UWB API to the device [/uwb_rc_ops->cmd()/]
+and sends the replies and notifications back to the API
+[/uwb_rc_neh_grok()/]. Notifications are handled to the UWB daemon, that
+is chartered, among other things, to keep the tab of how the UWB radio
+neighborhood looks, creating and destroying devices as they show up or
+disappear.
+
+Command execution is very simple: a command block is sent and a event
+block or reply is expected back. For sending/receiving command/events, a
+handle called /neh/ (Notification/Event Handle) is opened with
+/uwb_rc_neh_open()/.
+
+The HWA-RC (USB dongle) driver (drivers/uwb/hwa-rc.c) does this job for
+the USB connected HWA. Eventually, drivers/whci-rc.c will do the same
+for the PCI connected WHCI controller.
+
+
+ Host Controller life cycle
+
+So let's say we connect a dongle to the system: it is detected and
+firmware uploaded if needed [for Intel's i1480
+/drivers/uwb/ptc/usb.c:ptc_usb_probe()/] and then it is reenumerated.
+Now we have a real HWA device connected and
+/drivers/uwb/hwa-rc.c:hwarc_probe()/ picks it up, that will set up the
+Wire-Adaptor environment and then suck it into the UWB stack's vision of
+the world [/drivers/uwb/lc-rc.c:uwb_rc_add()/].
+
+ *
+
+ [*] The stack should put a new RC to scan for devices
+ [/uwb_rc_scan()/] so it finds what's available around and tries to
+ connect to them, but this is policy stuff and should be driven
+ from user space. As of now, the operator is expected to do it
+ manually; see the release notes for documentation on the procedure.
+
+When a dongle is disconnected, /drivers/uwb/hwa-rc.c:hwarc_disconnect()/
+takes time of tearing everything down safely (or not...).
+
+
+ On the air: beacons and enumerating the radio neighborhood
+
+So assuming we have devices and we have agreed for a channel to connect
+on (let's say 9), we put the new RC to beacon:
+
+ *
+
+ $ echo 9 0 > /sys/class/uwb_rc/uwb0/beacon
+
+Now it is visible. If there were other devices in the same radio channel
+and beacon group (that's what the zero is for), the dongle's radio
+control interface will send beacon notifications on its
+notification/event endpoint (NEEP). The beacon notifications are part of
+the event stream that is funneled into the API with
+/drivers/uwb/neh.c:uwb_rc_neh_grok()/ and delivered to the UWBD, the UWB
+daemon through a notification list.
+
+UWBD wakes up and scans the event list; finds a beacon and adds it to
+the BEACON CACHE (/uwb_beca/). If he receives a number of beacons from
+the same device, he considers it to be 'onair' and creates a new device
+[/drivers/uwb/lc-dev.c:uwbd_dev_onair()/]. Similarly, when no beacons
+are received in some time, the device is considered gone and wiped out
+[uwbd calls periodically /uwb/beacon.c:uwb_beca_purge()/ that will purge
+the beacon cache of dead devices].
+
+
+ Device lists
+
+All UWB devices are kept in the list of the struct bus_type uwb_bus_type.
+
+
+ Bandwidth allocation
+
+The UWB stack maintains a local copy of DRP availability through
+processing of incoming *DRP Availability Change* notifications. This
+local copy is currently used to present the current bandwidth
+availability to the user through the sysfs file
+/sys/class/uwb_rc/uwbx/bw_avail. In the future the bandwidth
+availability information will be used by the bandwidth reservation
+routines.
+
+The bandwidth reservation routines are in progress and are thus not
+present in the current release. When completed they will enable a user
+to initiate DRP reservation requests through interaction with sysfs. DRP
+reservation requests from remote UWB devices will also be handled. The
+bandwidth management done by the UWB stack will include callbacks to the
+higher layers will enable the higher layers to use the reservations upon
+completion. [Note: The bandwidth reservation work is in progress and
+subject to change.]
+
+
+ Wireless USB Host Controller drivers
+
+*WARNING* This section needs a lot of work!
+
+As explained above, there are three different types of HCs in the WUSB
+world: HWA-HC, DWA-HC and WHCI-HC.
+
+HWA-HC and DWA-HC share that they are Wire-Adapters (USB or WUSB
+connected controllers), and their transfer management system is almost
+identical. So is their notification delivery system.
+
+HWA-HC and WHCI-HC share that they are both WUSB host controllers, so
+they have to deal with WUSB device life cycle and maintenance, wireless
+root-hub
+
+HWA exposes a Host Controller interface (HWA-HC 0xe0/02/02). This has
+three endpoints (Notifications, Data Transfer In and Data Transfer
+Out--known as NEP, DTI and DTO in the code).
+
+We reserve UWB bandwidth for our Wireless USB Cluster, create a Cluster
+ID and tell the HC to use all that. Then we start it. This means the HC
+starts sending MMCs.
+
+ *
+
+ The MMCs are blocks of data defined somewhere in the WUSB1.0 spec
+ that define a stream in the UWB channel time allocated for sending
+ WUSB IEs (host to device commands/notifications) and Device
+ Notifications (device initiated to host). Each host defines a
+ unique Wireless USB cluster through MMCs. Devices can connect to a
+ single cluster at the time. The IEs are Information Elements, and
+ among them are the bandwidth allocations that tell each device
+ when can they transmit or receive.
+
+Now it all depends on external stimuli.
+
+*New device connection*
+
+A new device pops up, it scans the radio looking for MMCs that give out
+the existence of Wireless USB channels. Once one (or more) are found,
+selects which one to connect to. Sends a /DN_Connect/ (device
+notification connect) during the DNTS (Device Notification Time
+Slot--announced in the MMCs
+
+HC picks the /DN_Connect/ out (nep module sends to notif.c for delivery
+into /devconnect/). This process starts the authentication process for
+the device. First we allocate a /fake port/ and assign an
+unauthenticated address (128 to 255--what we really do is
+0x80 | fake_port_idx). We fiddle with the fake port status and /hub_wq/
+sees a new connection, so he moves on to enable the fake port with a reset.
+
+So now we are in the reset path -- we know we have a non-yet enumerated
+device with an unauthorized address; we ask user space to authenticate
+(FIXME: not yet done, similar to bluetooth pairing), then we do the key
+exchange (FIXME: not yet done) and issue a /set address 0/ to bring the
+device to the default state. Device is authenticated.
+
+From here, the USB stack takes control through the usb_hcd ops. hub_wq
+has seen the port status changes, as we have been toggling them. It will
+start enumerating and doing transfers through usb_hcd->urb_enqueue() to
+read descriptors and move our data.
+
+*Device life cycle and keep alives*
+
+Every time there is a successful transfer to/from a device, we update a
+per-device activity timestamp. If not, every now and then we check and
+if the activity timestamp gets old, we ping the device by sending it a
+Keep Alive IE; it responds with a /DN_Alive/ pong during the DNTS (this
+arrives to us as a notification through
+devconnect.c:wusb_handle_dn_alive(). If a device times out, we
+disconnect it from the system (cleaning up internal information and
+toggling the bits in the fake hub port, which kicks hub_wq into removing
+the rest of the stuff).
+
+This is done through devconnect:__wusb_check_devs(), which will scan the
+device list looking for whom needs refreshing.
+
+If the device wants to disconnect, it will either die (ugly) or send a
+/DN_Disconnect/ that will prompt a disconnection from the system.
+
+*Sending and receiving data*
+
+Data is sent and received through /Remote Pipes/ (rpipes). An rpipe is
+/aimed/ at an endpoint in a WUSB device. This is the same for HWAs and
+DWAs.
+
+Each HC has a number of rpipes and buffers that can be assigned to them;
+when doing a data transfer (xfer), first the rpipe has to be aimed and
+prepared (buffers assigned), then we can start queueing requests for
+data in or out.
+
+Data buffers have to be segmented out before sending--so we send first a
+header (segment request) and then if there is any data, a data buffer
+immediately after to the DTI interface (yep, even the request). If our
+buffer is bigger than the max segment size, then we just do multiple
+requests.
+
+[This sucks, because doing USB scatter gatter in Linux is resource
+intensive, if any...not that the current approach is not. It just has to
+be cleaned up a lot :)].
+
+If reading, we don't send data buffers, just the segment headers saying
+we want to read segments.
+
+When the xfer is executed, we receive a notification that says data is
+ready in the DTI endpoint (handled through
+xfer.c:wa_handle_notif_xfer()). In there we read from the DTI endpoint a
+descriptor that gives us the status of the transfer, its identification
+(given when we issued it) and the segment number. If it was a data read,
+we issue another URB to read into the destination buffer the chunk of
+data coming out of the remote endpoint. Done, wait for the next guy. The
+callbacks for the URBs issued from here are the ones that will declare
+the xfer complete at some point and call its callback.
+
+Seems simple, but the implementation is not trivial.
+
+ *
+
+ *WARNING* Old!!
+
+The main xfer descriptor, wa_xfer (equivalent to a URB) contains an
+array of segments, tallys on segments and buffers and callback
+information. Buried in there is a lot of URBs for executing the segments
+and buffer transfers.
+
+For OUT xfers, there is an array of segments, one URB for each, another
+one of buffer URB. When submitting, we submit URBs for segment request
+1, buffer 1, segment 2, buffer 2...etc. Then we wait on the DTI for xfer
+result data; when all the segments are complete, we call the callback to
+finalize the transfer.
+
+For IN xfers, we only issue URBs for the segments we want to read and
+then wait for the xfer result data.
+
+*URB mapping into xfers*
+
+This is done by hwahc_op_urb_[en|de]queue(). In enqueue() we aim an
+rpipe to the endpoint where we have to transmit, create a transfer
+context (wa_xfer) and submit it. When the xfer is done, our callback is
+called and we assign the status bits and release the xfer resources.
+
+In dequeue() we are basically cancelling/aborting the transfer. We issue
+a xfer abort request to the HC, cancel all the URBs we had submitted
+and not yet done and when all that is done, the xfer callback will be
+called--this will call the URB callback.
+
+
+ Glossary
+
+*DWA* -- Device Wire Adapter
+
+USB host, wired for downstream devices, upstream connects wirelessly
+with Wireless USB.
+
+*EVENT* -- Response to a command on the NEEP
+
+*HWA* -- Host Wire Adapter / USB dongle for UWB and Wireless USB
+
+*NEH* -- Notification/Event Handle
+
+Handle/file descriptor for receiving notifications or events. The WA
+code requires you to get one of this to listen for notifications or
+events on the NEEP.
+
+*NEEP* -- Notification/Event EndPoint
+
+Stuff related to the management of the first endpoint of a HWA USB
+dongle that is used to deliver an stream of events and notifications to
+the host.
+
+*NOTIFICATION* -- Message coming in the NEEP as response to something.
+
+*RC* -- Radio Control
+
+Design-overview.txt-1.8 (last edited 2006-11-04 12:22:24 by
+InakyPerezGonzalez)
+
diff --git a/Documentation/usb/acm.txt b/Documentation/usb/acm.txt
new file mode 100644
index 000000000..17f5c2e1a
--- /dev/null
+++ b/Documentation/usb/acm.txt
@@ -0,0 +1,128 @@
+ Linux ACM driver v0.16
+ (c) 1999 Vojtech Pavlik <vojtech@suse.cz>
+ Sponsored by SuSE
+----------------------------------------------------------------------------
+
+0. Disclaimer
+~~~~~~~~~~~~~
+ This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2 of the License, or (at your option)
+any later version.
+
+ This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+more details.
+
+ You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc., 59
+Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ Should you need to contact me, the author, you can do so either by e-mail
+- mail your message to <vojtech@suse.cz>, or by paper mail: Vojtech Pavlik,
+Ucitelska 1576, Prague 8, 182 00 Czech Republic
+
+ For your convenience, the GNU General Public License version 2 is included
+in the package: See the file COPYING.
+
+1. Usage
+~~~~~~~~
+ The drivers/usb/class/cdc-acm.c drivers works with USB modems and USB ISDN terminal
+adapters that conform to the Universal Serial Bus Communication Device Class
+Abstract Control Model (USB CDC ACM) specification.
+
+ Many modems do, here is a list of those I know of:
+
+ 3Com OfficeConnect 56k
+ 3Com Voice FaxModem Pro
+ 3Com Sportster
+ MultiTech MultiModem 56k
+ Zoom 2986L FaxModem
+ Compaq 56k FaxModem
+ ELSA Microlink 56k
+
+ I know of one ISDN TA that does work with the acm driver:
+
+ 3Com USR ISDN Pro TA
+
+ Some cell phones also connect via USB. I know the following phones work:
+
+ SonyEricsson K800i
+
+ Unfortunately many modems and most ISDN TAs use proprietary interfaces and
+thus won't work with this drivers. Check for ACM compliance before buying.
+
+ To use the modems you need these modules loaded:
+
+ usbcore.ko
+ uhci-hcd.ko ohci-hcd.ko or ehci-hcd.ko
+ cdc-acm.ko
+
+ After that, the modem[s] should be accessible. You should be able to use
+minicom, ppp and mgetty with them.
+
+2. Verifying that it works
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+ The first step would be to check /proc/bus/usb/devices, it should look
+like this:
+
+T: Bus=01 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2
+B: Alloc= 0/900 us ( 0%), #Int= 0, #Iso= 0
+D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=0000 ProdID=0000 Rev= 0.00
+S: Product=USB UHCI Root Hub
+S: SerialNumber=6800
+C:* #Ifs= 1 Cfg#= 1 Atr=40 MxPwr= 0mA
+I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub
+E: Ad=81(I) Atr=03(Int.) MxPS= 8 Ivl=255ms
+T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=01 Dev#= 2 Spd=12 MxCh= 0
+D: Ver= 1.00 Cls=02(comm.) Sub=00 Prot=00 MxPS= 8 #Cfgs= 2
+P: Vendor=04c1 ProdID=008f Rev= 2.07
+S: Manufacturer=3Com Inc.
+S: Product=3Com U.S. Robotics Pro ISDN TA
+S: SerialNumber=UFT53A49BVT7
+C: #Ifs= 1 Cfg#= 1 Atr=60 MxPwr= 0mA
+I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=acm
+E: Ad=85(I) Atr=02(Bulk) MxPS= 64 Ivl= 0ms
+E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl= 0ms
+E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=128ms
+C:* #Ifs= 2 Cfg#= 2 Atr=60 MxPwr= 0mA
+I: If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
+E: Ad=81(I) Atr=03(Int.) MxPS= 16 Ivl=128ms
+I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
+E: Ad=85(I) Atr=02(Bulk) MxPS= 64 Ivl= 0ms
+E: Ad=04(O) Atr=02(Bulk) MxPS= 64 Ivl= 0ms
+
+The presence of these three lines (and the Cls= 'comm' and 'data' classes)
+is important, it means it's an ACM device. The Driver=acm means the acm
+driver is used for the device. If you see only Cls=ff(vend.) then you're out
+of luck, you have a device with vendor specific-interface.
+
+D: Ver= 1.00 Cls=02(comm.) Sub=00 Prot=00 MxPS= 8 #Cfgs= 2
+I: If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
+I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
+
+In the system log you should see:
+
+usb.c: USB new device connect, assigned device number 2
+usb.c: kmalloc IF c7691fa0, numif 1
+usb.c: kmalloc IF c7b5f3e0, numif 2
+usb.c: skipped 4 class/vendor specific interface descriptors
+usb.c: new device strings: Mfr=1, Product=2, SerialNumber=3
+usb.c: USB device number 2 default language ID 0x409
+Manufacturer: 3Com Inc.
+Product: 3Com U.S. Robotics Pro ISDN TA
+SerialNumber: UFT53A49BVT7
+acm.c: probing config 1
+acm.c: probing config 2
+ttyACM0: USB ACM device
+acm.c: acm_control_msg: rq: 0x22 val: 0x0 len: 0x0 result: 0
+acm.c: acm_control_msg: rq: 0x20 val: 0x0 len: 0x7 result: 7
+usb.c: acm driver claimed interface c7b5f3e0
+usb.c: acm driver claimed interface c7b5f3f8
+usb.c: acm driver claimed interface c7691fa0
+
+If all this seems to be OK, fire up minicom and set it to talk to the ttyACM
+device and try typing 'at'. If it responds with 'OK', then everything is
+working.
diff --git a/Documentation/usb/anchors.txt b/Documentation/usb/anchors.txt
new file mode 100644
index 000000000..fe6a99a32
--- /dev/null
+++ b/Documentation/usb/anchors.txt
@@ -0,0 +1,79 @@
+What is anchor?
+===============
+
+A USB driver needs to support some callbacks requiring
+a driver to cease all IO to an interface. To do so, a
+driver has to keep track of the URBs it has submitted
+to know they've all completed or to call usb_kill_urb
+for them. The anchor is a data structure takes care of
+keeping track of URBs and provides methods to deal with
+multiple URBs.
+
+Allocation and Initialisation
+=============================
+
+There's no API to allocate an anchor. It is simply declared
+as struct usb_anchor. init_usb_anchor() must be called to
+initialise the data structure.
+
+Deallocation
+============
+
+Once it has no more URBs associated with it, the anchor can be
+freed with normal memory management operations.
+
+Association and disassociation of URBs with anchors
+===================================================
+
+An association of URBs to an anchor is made by an explicit
+call to usb_anchor_urb(). The association is maintained until
+an URB is finished by (successful) completion. Thus disassociation
+is automatic. A function is provided to forcibly finish (kill)
+all URBs associated with an anchor.
+Furthermore, disassociation can be made with usb_unanchor_urb()
+
+Operations on multitudes of URBs
+================================
+
+usb_kill_anchored_urbs()
+------------------------
+
+This function kills all URBs associated with an anchor. The URBs
+are called in the reverse temporal order they were submitted.
+This way no data can be reordered.
+
+usb_unlink_anchored_urbs()
+--------------------------
+
+This function unlinks all URBs associated with an anchor. The URBs
+are processed in the reverse temporal order they were submitted.
+This is similar to usb_kill_anchored_urbs(), but it will not sleep.
+Therefore no guarantee is made that the URBs have been unlinked when
+the call returns. They may be unlinked later but will be unlinked in
+finite time.
+
+usb_scuttle_anchored_urbs()
+---------------------------
+
+All URBs of an anchor are unanchored en masse.
+
+usb_wait_anchor_empty_timeout()
+-------------------------------
+
+This function waits for all URBs associated with an anchor to finish
+or a timeout, whichever comes first. Its return value will tell you
+whether the timeout was reached.
+
+usb_anchor_empty()
+------------------
+
+Returns true if no URBs are associated with an anchor. Locking
+is the caller's responsibility.
+
+usb_get_from_anchor()
+---------------------
+
+Returns the oldest anchored URB of an anchor. The URB is unanchored
+and returned with a reference. As you may mix URBs to several
+destinations in one anchor you have no guarantee the chronologically
+first submitted URB is returned.
diff --git a/Documentation/usb/authorization.txt b/Documentation/usb/authorization.txt
new file mode 100644
index 000000000..c069b6884
--- /dev/null
+++ b/Documentation/usb/authorization.txt
@@ -0,0 +1,92 @@
+
+Authorizing (or not) your USB devices to connect to the system
+
+(C) 2007 Inaky Perez-Gonzalez <inaky@linux.intel.com> Intel Corporation
+
+This feature allows you to control if a USB device can be used (or
+not) in a system. This feature will allow you to implement a lock-down
+of USB devices, fully controlled by user space.
+
+As of now, when a USB device is connected it is configured and
+its interfaces are immediately made available to the users. With this
+modification, only if root authorizes the device to be configured will
+then it be possible to use it.
+
+Usage:
+
+Authorize a device to connect:
+
+$ echo 1 > /sys/bus/usb/devices/DEVICE/authorized
+
+Deauthorize a device:
+
+$ echo 0 > /sys/bus/usb/devices/DEVICE/authorized
+
+Set new devices connected to hostX to be deauthorized by default (ie:
+lock down):
+
+$ echo 0 > /sys/bus/usb/devices/usbX/authorized_default
+
+Remove the lock down:
+
+$ echo 1 > /sys/bus/usb/devices/usbX/authorized_default
+
+By default, Wired USB devices are authorized by default to
+connect. Wireless USB hosts deauthorize by default all new connected
+devices (this is so because we need to do an authentication phase
+before authorizing).
+
+
+Example system lockdown (lame)
+-----------------------
+
+Imagine you want to implement a lockdown so only devices of type XYZ
+can be connected (for example, it is a kiosk machine with a visible
+USB port):
+
+boot up
+rc.local ->
+
+ for host in /sys/bus/usb/devices/usb*
+ do
+ echo 0 > $host/authorized_default
+ done
+
+Hookup an script to udev, for new USB devices
+
+ if device_is_my_type $DEV
+ then
+ echo 1 > $device_path/authorized
+ done
+
+
+Now, device_is_my_type() is where the juice for a lockdown is. Just
+checking if the class, type and protocol match something is the worse
+security verification you can make (or the best, for someone willing
+to break it). If you need something secure, use crypto and Certificate
+Authentication or stuff like that. Something simple for an storage key
+could be:
+
+function device_is_my_type()
+{
+ echo 1 > authorized # temporarily authorize it
+ # FIXME: make sure none can mount it
+ mount DEVICENODE /mntpoint
+ sum=$(md5sum /mntpoint/.signature)
+ if [ $sum = $(cat /etc/lockdown/keysum) ]
+ then
+ echo "We are good, connected"
+ umount /mntpoint
+ # Other stuff so others can use it
+ else
+ echo 0 > authorized
+ fi
+}
+
+
+Of course, this is lame, you'd want to do a real certificate
+verification stuff with PKI, so you don't depend on a shared secret,
+etc, but you get the idea. Anybody with access to a device gadget kit
+can fake descriptors and device info. Don't trust that. You are
+welcome.
+
diff --git a/Documentation/usb/bulk-streams.txt b/Documentation/usb/bulk-streams.txt
new file mode 100644
index 000000000..ffc020218
--- /dev/null
+++ b/Documentation/usb/bulk-streams.txt
@@ -0,0 +1,78 @@
+Background
+==========
+
+Bulk endpoint streams were added in the USB 3.0 specification. Streams allow a
+device driver to overload a bulk endpoint so that multiple transfers can be
+queued at once.
+
+Streams are defined in sections 4.4.6.4 and 8.12.1.4 of the Universal Serial Bus
+3.0 specification at http://www.usb.org/developers/docs/ The USB Attached SCSI
+Protocol, which uses streams to queue multiple SCSI commands, can be found on
+the T10 website (http://t10.org/).
+
+
+Device-side implications
+========================
+
+Once a buffer has been queued to a stream ring, the device is notified (through
+an out-of-band mechanism on another endpoint) that data is ready for that stream
+ID. The device then tells the host which "stream" it wants to start. The host
+can also initiate a transfer on a stream without the device asking, but the
+device can refuse that transfer. Devices can switch between streams at any
+time.
+
+
+Driver implications
+===================
+
+int usb_alloc_streams(struct usb_interface *interface,
+ struct usb_host_endpoint **eps, unsigned int num_eps,
+ unsigned int num_streams, gfp_t mem_flags);
+
+Device drivers will call this API to request that the host controller driver
+allocate memory so the driver can use up to num_streams stream IDs. They must
+pass an array of usb_host_endpoints that need to be setup with similar stream
+IDs. This is to ensure that a UASP driver will be able to use the same stream
+ID for the bulk IN and OUT endpoints used in a Bi-directional command sequence.
+
+The return value is an error condition (if one of the endpoints doesn't support
+streams, or the xHCI driver ran out of memory), or the number of streams the
+host controller allocated for this endpoint. The xHCI host controller hardware
+declares how many stream IDs it can support, and each bulk endpoint on a
+SuperSpeed device will say how many stream IDs it can handle. Therefore,
+drivers should be able to deal with being allocated less stream IDs than they
+requested.
+
+Do NOT call this function if you have URBs enqueued for any of the endpoints
+passed in as arguments. Do not call this function to request less than two
+streams.
+
+Drivers will only be allowed to call this API once for the same endpoint
+without calling usb_free_streams(). This is a simplification for the xHCI host
+controller driver, and may change in the future.
+
+
+Picking new Stream IDs to use
+============================
+
+Stream ID 0 is reserved, and should not be used to communicate with devices. If
+usb_alloc_streams() returns with a value of N, you may use streams 1 though N.
+To queue an URB for a specific stream, set the urb->stream_id value. If the
+endpoint does not support streams, an error will be returned.
+
+Note that new API to choose the next stream ID will have to be added if the xHCI
+driver supports secondary stream IDs.
+
+
+Clean up
+========
+
+If a driver wishes to stop using streams to communicate with the device, it
+should call
+
+void usb_free_streams(struct usb_interface *interface,
+ struct usb_host_endpoint **eps, unsigned int num_eps,
+ gfp_t mem_flags);
+
+All stream IDs will be deallocated when the driver releases the interface, to
+ensure that drivers that don't support streams will be able to use the endpoint.
diff --git a/Documentation/usb/callbacks.txt b/Documentation/usb/callbacks.txt
new file mode 100644
index 000000000..9e85846bd
--- /dev/null
+++ b/Documentation/usb/callbacks.txt
@@ -0,0 +1,134 @@
+What callbacks will usbcore do?
+===============================
+
+Usbcore will call into a driver through callbacks defined in the driver
+structure and through the completion handler of URBs a driver submits.
+Only the former are in the scope of this document. These two kinds of
+callbacks are completely independent of each other. Information on the
+completion callback can be found in Documentation/usb/URB.txt.
+
+The callbacks defined in the driver structure are:
+
+1. Hotplugging callbacks:
+
+ * @probe: Called to see if the driver is willing to manage a particular
+ * interface on a device.
+ * @disconnect: Called when the interface is no longer accessible, usually
+ * because its device has been (or is being) disconnected or the
+ * driver module is being unloaded.
+
+2. Odd backdoor through usbfs:
+
+ * @ioctl: Used for drivers that want to talk to userspace through
+ * the "usbfs" filesystem. This lets devices provide ways to
+ * expose information to user space regardless of where they
+ * do (or don't) show up otherwise in the filesystem.
+
+3. Power management (PM) callbacks:
+
+ * @suspend: Called when the device is going to be suspended.
+ * @resume: Called when the device is being resumed.
+ * @reset_resume: Called when the suspended device has been reset instead
+ * of being resumed.
+
+4. Device level operations:
+
+ * @pre_reset: Called when the device is about to be reset.
+ * @post_reset: Called after the device has been reset
+
+The ioctl interface (2) should be used only if you have a very good
+reason. Sysfs is preferred these days. The PM callbacks are covered
+separately in Documentation/usb/power-management.txt.
+
+Calling conventions
+===================
+
+All callbacks are mutually exclusive. There's no need for locking
+against other USB callbacks. All callbacks are called from a task
+context. You may sleep. However, it is important that all sleeps have a
+small fixed upper limit in time. In particular you must not call out to
+user space and await results.
+
+Hotplugging callbacks
+=====================
+
+These callbacks are intended to associate and disassociate a driver with
+an interface. A driver's bond to an interface is exclusive.
+
+The probe() callback
+--------------------
+
+int (*probe) (struct usb_interface *intf,
+ const struct usb_device_id *id);
+
+Accept or decline an interface. If you accept the device return 0,
+otherwise -ENODEV or -ENXIO. Other error codes should be used only if a
+genuine error occurred during initialisation which prevented a driver
+from accepting a device that would else have been accepted.
+You are strongly encouraged to use usbcore's facility,
+usb_set_intfdata(), to associate a data structure with an interface, so
+that you know which internal state and identity you associate with a
+particular interface. The device will not be suspended and you may do IO
+to the interface you are called for and endpoint 0 of the device. Device
+initialisation that doesn't take too long is a good idea here.
+
+The disconnect() callback
+-------------------------
+
+void (*disconnect) (struct usb_interface *intf);
+
+This callback is a signal to break any connection with an interface.
+You are not allowed any IO to a device after returning from this
+callback. You also may not do any other operation that may interfere
+with another driver bound the interface, eg. a power management
+operation.
+If you are called due to a physical disconnection, all your URBs will be
+killed by usbcore. Note that in this case disconnect will be called some
+time after the physical disconnection. Thus your driver must be prepared
+to deal with failing IO even prior to the callback.
+
+Device level callbacks
+======================
+
+pre_reset
+---------
+
+int (*pre_reset)(struct usb_interface *intf);
+
+A driver or user space is triggering a reset on the device which
+contains the interface passed as an argument. Cease IO, wait for all
+outstanding URBs to complete, and save any device state you need to
+restore. No more URBs may be submitted until the post_reset method
+is called.
+
+If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you
+are in atomic context.
+
+post_reset
+----------
+
+int (*post_reset)(struct usb_interface *intf);
+
+The reset has completed. Restore any saved device state and begin
+using the device again.
+
+If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you
+are in atomic context.
+
+Call sequences
+==============
+
+No callbacks other than probe will be invoked for an interface
+that isn't bound to your driver.
+
+Probe will never be called for an interface bound to a driver.
+Hence following a successful probe, disconnect will be called
+before there is another probe for the same interface.
+
+Once your driver is bound to an interface, disconnect can be
+called at any time except in between pre_reset and post_reset.
+pre_reset is always followed by post_reset, even if the reset
+failed or the device has been unplugged.
+
+suspend is always followed by one of: resume, reset_resume, or
+disconnect.
diff --git a/Documentation/usb/chipidea.txt b/Documentation/usb/chipidea.txt
new file mode 100644
index 000000000..3f848c1f2
--- /dev/null
+++ b/Documentation/usb/chipidea.txt
@@ -0,0 +1,92 @@
+1. How to test OTG FSM(HNP and SRP)
+-----------------------------------
+To show how to demo OTG HNP and SRP functions via sys input files
+with 2 Freescale i.MX6Q sabre SD boards.
+
+1.1 How to enable OTG FSM in menuconfig
+---------------------------------------
+Select CONFIG_USB_OTG_FSM, rebuild kernel Image and modules.
+If you want to check some internal variables for otg fsm,
+select CONFIG_USB_CHIPIDEA_DEBUG, there are 2 files which
+can show otg fsm variables and some controller registers value:
+cat /sys/kernel/debug/ci_hdrc.0/otg
+cat /sys/kernel/debug/ci_hdrc.0/registers
+
+1.2 Test operations
+-------------------
+1) Power up 2 Freescale i.MX6Q sabre SD boards with gadget class driver loaded
+ (e.g. g_mass_storage).
+
+2) Connect 2 boards with usb cable with one end is micro A plug, the other end
+ is micro B plug.
+
+ The A-device(with micro A plug inserted) should enumrate B-device.
+
+3) Role switch
+ On B-device:
+ echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+ if HNP polling is not supported, also need:
+ On A-device:
+ echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
+
+ B-device should take host role and enumrate A-device.
+
+4) A-device switch back to host.
+ On B-device:
+ echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+ A-device should switch back to host and enumrate B-device.
+
+5) Remove B-device(unplug micro B plug) and insert again in 10 seconds,
+ A-device should enumrate B-device again.
+
+6) Remove B-device(unplug micro B plug) and insert again after 10 seconds,
+ A-device should NOT enumrate B-device.
+
+ if A-device wants to use bus:
+ On A-device:
+ echo 0 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
+ echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_req
+
+ if B-device wants to use bus:
+ On B-device:
+ echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+7) A-device power down the bus.
+ On A-device:
+ echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/a_bus_drop
+
+ A-device should disconnect with B-device and power down the bus.
+
+8) B-device does data pulse for SRP.
+ On B-device:
+ echo 1 > /sys/bus/platform/devices/ci_hdrc.0/inputs/b_bus_req
+
+ A-device should resume usb bus and enumrate B-device.
+
+1.3 Reference document
+----------------------
+"On-The-Go and Embedded Host Supplement to the USB Revision 2.0 Specification
+July 27, 2012 Revision 2.0 version 1.1a"
+
+2. How to enable USB as system wakeup source
+-----------------------------------
+Below is the example for how to enable USB as system wakeup source
+at imx6 platform.
+
+2.1 Enable core's wakeup
+echo enabled > /sys/bus/platform/devices/ci_hdrc.0/power/wakeup
+2.2 Enable glue layer's wakeup
+echo enabled > /sys/bus/platform/devices/2184000.usb/power/wakeup
+2.3 Enable PHY's wakeup (optional)
+echo enabled > /sys/bus/platform/devices/20c9000.usbphy/power/wakeup
+2.4 Enable roothub's wakeup
+echo enabled > /sys/bus/usb/devices/usb1/power/wakeup
+2.5 Enable related device's wakeup
+echo enabled > /sys/bus/usb/devices/1-1/power/wakeup
+
+If the system has only one usb port, and you want usb wakeup at this port, you
+can use below script to enable usb wakeup.
+for i in $(find /sys -name wakeup | grep usb);do echo enabled > $i;done;
+
diff --git a/Documentation/usb/dma.txt b/Documentation/usb/dma.txt
new file mode 100644
index 000000000..444651e70
--- /dev/null
+++ b/Documentation/usb/dma.txt
@@ -0,0 +1,133 @@
+In Linux 2.5 kernels (and later), USB device drivers have additional control
+over how DMA may be used to perform I/O operations. The APIs are detailed
+in the kernel usb programming guide (kerneldoc, from the source code).
+
+
+API OVERVIEW
+
+The big picture is that USB drivers can continue to ignore most DMA issues,
+though they still must provide DMA-ready buffers (see
+Documentation/DMA-API-HOWTO.txt). That's how they've worked through
+the 2.4 (and earlier) kernels.
+
+OR: they can now be DMA-aware.
+
+- New calls enable DMA-aware drivers, letting them allocate dma buffers and
+ manage dma mappings for existing dma-ready buffers (see below).
+
+- URBs have an additional "transfer_dma" field, as well as a transfer_flags
+ bit saying if it's valid. (Control requests also have "setup_dma", but
+ drivers must not use it.)
+
+- "usbcore" will map this DMA address, if a DMA-aware driver didn't do
+ it first and set URB_NO_TRANSFER_DMA_MAP. HCDs
+ don't manage dma mappings for URBs.
+
+- There's a new "generic DMA API", parts of which are usable by USB device
+ drivers. Never use dma_set_mask() on any USB interface or device; that
+ would potentially break all devices sharing that bus.
+
+
+ELIMINATING COPIES
+
+It's good to avoid making CPUs copy data needlessly. The costs can add up,
+and effects like cache-trashing can impose subtle penalties.
+
+- If you're doing lots of small data transfers from the same buffer all
+ the time, that can really burn up resources on systems which use an
+ IOMMU to manage the DMA mappings. It can cost MUCH more to set up and
+ tear down the IOMMU mappings with each request than perform the I/O!
+
+ For those specific cases, USB has primitives to allocate less expensive
+ memory. They work like kmalloc and kfree versions that give you the right
+ kind of addresses to store in urb->transfer_buffer and urb->transfer_dma.
+ You'd also set URB_NO_TRANSFER_DMA_MAP in urb->transfer_flags:
+
+ void *usb_alloc_coherent (struct usb_device *dev, size_t size,
+ int mem_flags, dma_addr_t *dma);
+
+ void usb_free_coherent (struct usb_device *dev, size_t size,
+ void *addr, dma_addr_t dma);
+
+ Most drivers should *NOT* be using these primitives; they don't need
+ to use this type of memory ("dma-coherent"), and memory returned from
+ kmalloc() will work just fine.
+
+ The memory buffer returned is "dma-coherent"; sometimes you might need to
+ force a consistent memory access ordering by using memory barriers. It's
+ not using a streaming DMA mapping, so it's good for small transfers on
+ systems where the I/O would otherwise thrash an IOMMU mapping. (See
+ Documentation/DMA-API-HOWTO.txt for definitions of "coherent" and
+ "streaming" DMA mappings.)
+
+ Asking for 1/Nth of a page (as well as asking for N pages) is reasonably
+ space-efficient.
+
+ On most systems the memory returned will be uncached, because the
+ semantics of dma-coherent memory require either bypassing CPU caches
+ or using cache hardware with bus-snooping support. While x86 hardware
+ has such bus-snooping, many other systems use software to flush cache
+ lines to prevent DMA conflicts.
+
+- Devices on some EHCI controllers could handle DMA to/from high memory.
+
+ Unfortunately, the current Linux DMA infrastructure doesn't have a sane
+ way to expose these capabilities ... and in any case, HIGHMEM is mostly a
+ design wart specific to x86_32. So your best bet is to ensure you never
+ pass a highmem buffer into a USB driver. That's easy; it's the default
+ behavior. Just don't override it; e.g. with NETIF_F_HIGHDMA.
+
+ This may force your callers to do some bounce buffering, copying from
+ high memory to "normal" DMA memory. If you can come up with a good way
+ to fix this issue (for x86_32 machines with over 1 GByte of memory),
+ feel free to submit patches.
+
+
+WORKING WITH EXISTING BUFFERS
+
+Existing buffers aren't usable for DMA without first being mapped into the
+DMA address space of the device. However, most buffers passed to your
+driver can safely be used with such DMA mapping. (See the first section
+of Documentation/DMA-API-HOWTO.txt, titled "What memory is DMA-able?")
+
+- When you're using scatterlists, you can map everything at once. On some
+ systems, this kicks in an IOMMU and turns the scatterlists into single
+ DMA transactions:
+
+ int usb_buffer_map_sg (struct usb_device *dev, unsigned pipe,
+ struct scatterlist *sg, int nents);
+
+ void usb_buffer_dmasync_sg (struct usb_device *dev, unsigned pipe,
+ struct scatterlist *sg, int n_hw_ents);
+
+ void usb_buffer_unmap_sg (struct usb_device *dev, unsigned pipe,
+ struct scatterlist *sg, int n_hw_ents);
+
+ It's probably easier to use the new usb_sg_*() calls, which do the DMA
+ mapping and apply other tweaks to make scatterlist i/o be fast.
+
+- Some drivers may prefer to work with the model that they're mapping large
+ buffers, synchronizing their safe re-use. (If there's no re-use, then let
+ usbcore do the map/unmap.) Large periodic transfers make good examples
+ here, since it's cheaper to just synchronize the buffer than to unmap it
+ each time an urb completes and then re-map it on during resubmission.
+
+ These calls all work with initialized urbs: urb->dev, urb->pipe,
+ urb->transfer_buffer, and urb->transfer_buffer_length must all be
+ valid when these calls are used (urb->setup_packet must be valid too
+ if urb is a control request):
+
+ struct urb *usb_buffer_map (struct urb *urb);
+
+ void usb_buffer_dmasync (struct urb *urb);
+
+ void usb_buffer_unmap (struct urb *urb);
+
+ The calls manage urb->transfer_dma for you, and set URB_NO_TRANSFER_DMA_MAP
+ so that usbcore won't map or unmap the buffer. They cannot be used for
+ setup_packet buffers in control requests.
+
+Note that several of those interfaces are currently commented out, since
+they don't have current users. See the source code. Other than the dmasync
+calls (where the underlying DMA primitives have changed), most of them can
+easily be commented back in if you want to use them.
diff --git a/Documentation/usb/dwc3.txt b/Documentation/usb/dwc3.txt
new file mode 100644
index 000000000..1d02c01d1
--- /dev/null
+++ b/Documentation/usb/dwc3.txt
@@ -0,0 +1,45 @@
+
+ TODO
+~~~~~~
+Please pick something while reading :)
+
+- Convert interrupt handler to per-ep-thread-irq
+
+ As it turns out some DWC3-commands ~1ms to complete. Currently we spin
+ until the command completes which is bad.
+
+ Implementation idea:
+ - dwc core implements a demultiplexing irq chip for interrupts per
+ endpoint. The interrupt numbers are allocated during probe and belong
+ to the device. If MSI provides per-endpoint interrupt this dummy
+ interrupt chip can be replaced with "real" interrupts.
+ - interrupts are requested / allocated on usb_ep_enable() and removed on
+ usb_ep_disable(). Worst case are 32 interrupts, the lower limit is two
+ for ep0/1.
+ - dwc3_send_gadget_ep_cmd() will sleep in wait_for_completion_timeout()
+ until the command completes.
+ - the interrupt handler is split into the following pieces:
+ - primary handler of the device
+ goes through every event and calls generic_handle_irq() for event
+ it. On return from generic_handle_irq() in acknowledges the event
+ counter so interrupt goes away (eventually).
+
+ - threaded handler of the device
+ none
+
+ - primary handler of the EP-interrupt
+ reads the event and tries to process it. Everything that requires
+ sleeping is handed over to the Thread. The event is saved in an
+ per-endpoint data-structure.
+ We probably have to pay attention not to process events once we
+ handed something to thread so we don't process event X prio Y
+ where X > Y.
+
+ - threaded handler of the EP-interrupt
+ handles the remaining EP work which might sleep such as waiting
+ for command completion.
+
+ Latency:
+ There should be no increase in latency since the interrupt-thread has a
+ high priority and will be run before an average task in user land
+ (except the user changed priorities).
diff --git a/Documentation/usb/ehci.txt b/Documentation/usb/ehci.txt
new file mode 100644
index 000000000..160bd6c3a
--- /dev/null
+++ b/Documentation/usb/ehci.txt
@@ -0,0 +1,214 @@
+27-Dec-2002
+
+The EHCI driver is used to talk to high speed USB 2.0 devices using
+USB 2.0-capable host controller hardware. The USB 2.0 standard is
+compatible with the USB 1.1 standard. It defines three transfer speeds:
+
+ - "High Speed" 480 Mbit/sec (60 MByte/sec)
+ - "Full Speed" 12 Mbit/sec (1.5 MByte/sec)
+ - "Low Speed" 1.5 Mbit/sec
+
+USB 1.1 only addressed full speed and low speed. High speed devices
+can be used on USB 1.1 systems, but they slow down to USB 1.1 speeds.
+
+USB 1.1 devices may also be used on USB 2.0 systems. When plugged
+into an EHCI controller, they are given to a USB 1.1 "companion"
+controller, which is a OHCI or UHCI controller as normally used with
+such devices. When USB 1.1 devices plug into USB 2.0 hubs, they
+interact with the EHCI controller through a "Transaction Translator"
+(TT) in the hub, which turns low or full speed transactions into
+high speed "split transactions" that don't waste transfer bandwidth.
+
+At this writing, this driver has been seen to work with implementations
+of EHCI from (in alphabetical order): Intel, NEC, Philips, and VIA.
+Other EHCI implementations are becoming available from other vendors;
+you should expect this driver to work with them too.
+
+While usb-storage devices have been available since mid-2001 (working
+quite speedily on the 2.4 version of this driver), hubs have only
+been available since late 2001, and other kinds of high speed devices
+appear to be on hold until more systems come with USB 2.0 built-in.
+Such new systems have been available since early 2002, and became much
+more typical in the second half of 2002.
+
+Note that USB 2.0 support involves more than just EHCI. It requires
+other changes to the Linux-USB core APIs, including the hub driver,
+but those changes haven't needed to really change the basic "usbcore"
+APIs exposed to USB device drivers.
+
+- David Brownell
+ <dbrownell@users.sourceforge.net>
+
+
+FUNCTIONALITY
+
+This driver is regularly tested on x86 hardware, and has also been
+used on PPC hardware so big/little endianness issues should be gone.
+It's believed to do all the right PCI magic so that I/O works even on
+systems with interesting DMA mapping issues.
+
+Transfer Types
+
+At this writing the driver should comfortably handle all control, bulk,
+and interrupt transfers, including requests to USB 1.1 devices through
+transaction translators (TTs) in USB 2.0 hubs. But you may find bugs.
+
+High Speed Isochronous (ISO) transfer support is also functional, but
+at this writing no Linux drivers have been using that support.
+
+Full Speed Isochronous transfer support, through transaction translators,
+is not yet available. Note that split transaction support for ISO
+transfers can't share much code with the code for high speed ISO transfers,
+since EHCI represents these with a different data structure. So for now,
+most USB audio and video devices can't be connected to high speed buses.
+
+Driver Behavior
+
+Transfers of all types can be queued. This means that control transfers
+from a driver on one interface (or through usbfs) won't interfere with
+ones from another driver, and that interrupt transfers can use periods
+of one frame without risking data loss due to interrupt processing costs.
+
+The EHCI root hub code hands off USB 1.1 devices to its companion
+controller. This driver doesn't need to know anything about those
+drivers; a OHCI or UHCI driver that works already doesn't need to change
+just because the EHCI driver is also present.
+
+There are some issues with power management; suspend/resume doesn't
+behave quite right at the moment.
+
+Also, some shortcuts have been taken with the scheduling periodic
+transactions (interrupt and isochronous transfers). These place some
+limits on the number of periodic transactions that can be scheduled,
+and prevent use of polling intervals of less than one frame.
+
+
+USE BY
+
+Assuming you have an EHCI controller (on a PCI card or motherboard)
+and have compiled this driver as a module, load this like:
+
+ # modprobe ehci-hcd
+
+and remove it by:
+
+ # rmmod ehci-hcd
+
+You should also have a driver for a "companion controller", such as
+"ohci-hcd" or "uhci-hcd". In case of any trouble with the EHCI driver,
+remove its module and then the driver for that companion controller will
+take over (at lower speed) all the devices that were previously handled
+by the EHCI driver.
+
+Module parameters (pass to "modprobe") include:
+
+ log2_irq_thresh (default 0):
+ Log2 of default interrupt delay, in microframes. The default
+ value is 0, indicating 1 microframe (125 usec). Maximum value
+ is 6, indicating 2^6 = 64 microframes. This controls how often
+ the EHCI controller can issue interrupts.
+
+If you're using this driver on a 2.5 kernel, and you've enabled USB
+debugging support, you'll see three files in the "sysfs" directory for
+any EHCI controller:
+
+ "async" dumps the asynchronous schedule, used for control
+ and bulk transfers. Shows each active qh and the qtds
+ pending, usually one qtd per urb. (Look at it with
+ usb-storage doing disk I/O; watch the request queues!)
+ "periodic" dumps the periodic schedule, used for interrupt
+ and isochronous transfers. Doesn't show qtds.
+ "registers" show controller register state, and
+
+The contents of those files can help identify driver problems.
+
+
+Device drivers shouldn't care whether they're running over EHCI or not,
+but they may want to check for "usb_device->speed == USB_SPEED_HIGH".
+High speed devices can do things that full speed (or low speed) ones
+can't, such as "high bandwidth" periodic (interrupt or ISO) transfers.
+Also, some values in device descriptors (such as polling intervals for
+periodic transfers) use different encodings when operating at high speed.
+
+However, do make a point of testing device drivers through USB 2.0 hubs.
+Those hubs report some failures, such as disconnections, differently when
+transaction translators are in use; some drivers have been seen to behave
+badly when they see different faults than OHCI or UHCI report.
+
+
+PERFORMANCE
+
+USB 2.0 throughput is gated by two main factors: how fast the host
+controller can process requests, and how fast devices can respond to
+them. The 480 Mbit/sec "raw transfer rate" is obeyed by all devices,
+but aggregate throughput is also affected by issues like delays between
+individual high speed packets, driver intelligence, and of course the
+overall system load. Latency is also a performance concern.
+
+Bulk transfers are most often used where throughput is an issue. It's
+good to keep in mind that bulk transfers are always in 512 byte packets,
+and at most 13 of those fit into one USB 2.0 microframe. Eight USB 2.0
+microframes fit in a USB 1.1 frame; a microframe is 1 msec/8 = 125 usec.
+
+So more than 50 MByte/sec is available for bulk transfers, when both
+hardware and device driver software allow it. Periodic transfer modes
+(isochronous and interrupt) allow the larger packet sizes which let you
+approach the quoted 480 MBit/sec transfer rate.
+
+Hardware Performance
+
+At this writing, individual USB 2.0 devices tend to max out at around
+20 MByte/sec transfer rates. This is of course subject to change;
+and some devices now go faster, while others go slower.
+
+The first NEC implementation of EHCI seems to have a hardware bottleneck
+at around 28 MByte/sec aggregate transfer rate. While this is clearly
+enough for a single device at 20 MByte/sec, putting three such devices
+onto one bus does not get you 60 MByte/sec. The issue appears to be
+that the controller hardware won't do concurrent USB and PCI access,
+so that it's only trying six (or maybe seven) USB transactions each
+microframe rather than thirteen. (Seems like a reasonable trade off
+for a product that beat all the others to market by over a year!)
+
+It's expected that newer implementations will better this, throwing
+more silicon real estate at the problem so that new motherboard chip
+sets will get closer to that 60 MByte/sec target. That includes an
+updated implementation from NEC, as well as other vendors' silicon.
+
+There's a minimum latency of one microframe (125 usec) for the host
+to receive interrupts from the EHCI controller indicating completion
+of requests. That latency is tunable; there's a module option. By
+default ehci-hcd driver uses the minimum latency, which means that if
+you issue a control or bulk request you can often expect to learn that
+it completed in less than 250 usec (depending on transfer size).
+
+Software Performance
+
+To get even 20 MByte/sec transfer rates, Linux-USB device drivers will
+need to keep the EHCI queue full. That means issuing large requests,
+or using bulk queuing if a series of small requests needs to be issued.
+When drivers don't do that, their performance results will show it.
+
+In typical situations, a usb_bulk_msg() loop writing out 4 KB chunks is
+going to waste more than half the USB 2.0 bandwidth. Delays between the
+I/O completion and the driver issuing the next request will take longer
+than the I/O. If that same loop used 16 KB chunks, it'd be better; a
+sequence of 128 KB chunks would waste a lot less.
+
+But rather than depending on such large I/O buffers to make synchronous
+I/O be efficient, it's better to just queue up several (bulk) requests
+to the HC, and wait for them all to complete (or be canceled on error).
+Such URB queuing should work with all the USB 1.1 HC drivers too.
+
+In the Linux 2.5 kernels, new usb_sg_*() api calls have been defined; they
+queue all the buffers from a scatterlist. They also use scatterlist DMA
+mapping (which might apply an IOMMU) and IRQ reduction, all of which will
+help make high speed transfers run as fast as they can.
+
+
+TBD: Interrupt and ISO transfer performance issues. Those periodic
+transfers are fully scheduled, so the main issue is likely to be how
+to trigger "high bandwidth" modes.
+
+TBD: More than standard 80% periodic bandwidth allocation is possible
+through sysfs uframe_periodic_max parameter. Describe that.
diff --git a/Documentation/usb/error-codes.txt b/Documentation/usb/error-codes.txt
new file mode 100644
index 000000000..9c3eb845e
--- /dev/null
+++ b/Documentation/usb/error-codes.txt
@@ -0,0 +1,175 @@
+Revised: 2004-Oct-21
+
+This is the documentation of (hopefully) all possible error codes (and
+their interpretation) that can be returned from usbcore.
+
+Some of them are returned by the Host Controller Drivers (HCDs), which
+device drivers only see through usbcore. As a rule, all the HCDs should
+behave the same except for transfer speed dependent behaviors and the
+way certain faults are reported.
+
+
+**************************************************************************
+* Error codes returned by usb_submit_urb *
+**************************************************************************
+
+Non-USB-specific:
+
+0 URB submission went fine
+
+-ENOMEM no memory for allocation of internal structures
+
+USB-specific:
+
+-EBUSY The URB is already active.
+
+-ENODEV specified USB-device or bus doesn't exist
+
+-ENOENT specified interface or endpoint does not exist or
+ is not enabled
+
+-ENXIO host controller driver does not support queuing of this type
+ of urb. (treat as a host controller bug.)
+
+-EINVAL a) Invalid transfer type specified (or not supported)
+ b) Invalid or unsupported periodic transfer interval
+ c) ISO: attempted to change transfer interval
+ d) ISO: number_of_packets is < 0
+ e) various other cases
+
+-EXDEV ISO: URB_ISO_ASAP wasn't specified and all the frames
+ the URB would be scheduled in have already expired.
+
+-EFBIG Host controller driver can't schedule that many ISO frames.
+
+-EPIPE The pipe type specified in the URB doesn't match the
+ endpoint's actual type.
+
+-EMSGSIZE (a) endpoint maxpacket size is zero; it is not usable
+ in the current interface altsetting.
+ (b) ISO packet is larger than the endpoint maxpacket.
+ (c) requested data transfer length is invalid: negative
+ or too large for the host controller.
+
+-ENOSPC This request would overcommit the usb bandwidth reserved
+ for periodic transfers (interrupt, isochronous).
+
+-ESHUTDOWN The device or host controller has been disabled due to some
+ problem that could not be worked around.
+
+-EPERM Submission failed because urb->reject was set.
+
+-EHOSTUNREACH URB was rejected because the device is suspended.
+
+-ENOEXEC A control URB doesn't contain a Setup packet.
+
+
+**************************************************************************
+* Error codes returned by in urb->status *
+* or in iso_frame_desc[n].status (for ISO) *
+**************************************************************************
+
+USB device drivers may only test urb status values in completion handlers.
+This is because otherwise there would be a race between HCDs updating
+these values on one CPU, and device drivers testing them on another CPU.
+
+A transfer's actual_length may be positive even when an error has been
+reported. That's because transfers often involve several packets, so that
+one or more packets could finish before an error stops further endpoint I/O.
+
+For isochronous URBs, the urb status value is non-zero only if the URB is
+unlinked, the device is removed, the host controller is disabled, or the total
+transferred length is less than the requested length and the URB_SHORT_NOT_OK
+flag is set. Completion handlers for isochronous URBs should only see
+urb->status set to zero, -ENOENT, -ECONNRESET, -ESHUTDOWN, or -EREMOTEIO.
+Individual frame descriptor status fields may report more status codes.
+
+
+0 Transfer completed successfully
+
+-ENOENT URB was synchronously unlinked by usb_unlink_urb
+
+-EINPROGRESS URB still pending, no results yet
+ (That is, if drivers see this it's a bug.)
+
+-EPROTO (*, **) a) bitstuff error
+ b) no response packet received within the
+ prescribed bus turn-around time
+ c) unknown USB error
+
+-EILSEQ (*, **) a) CRC mismatch
+ b) no response packet received within the
+ prescribed bus turn-around time
+ c) unknown USB error
+
+ Note that often the controller hardware does not
+ distinguish among cases a), b), and c), so a
+ driver cannot tell whether there was a protocol
+ error, a failure to respond (often caused by
+ device disconnect), or some other fault.
+
+-ETIME (**) No response packet received within the prescribed
+ bus turn-around time. This error may instead be
+ reported as -EPROTO or -EILSEQ.
+
+-ETIMEDOUT Synchronous USB message functions use this code
+ to indicate timeout expired before the transfer
+ completed, and no other error was reported by HC.
+
+-EPIPE (**) Endpoint stalled. For non-control endpoints,
+ reset this status with usb_clear_halt().
+
+-ECOMM During an IN transfer, the host controller
+ received data from an endpoint faster than it
+ could be written to system memory
+
+-ENOSR During an OUT transfer, the host controller
+ could not retrieve data from system memory fast
+ enough to keep up with the USB data rate
+
+-EOVERFLOW (*) The amount of data returned by the endpoint was
+ greater than either the max packet size of the
+ endpoint or the remaining buffer size. "Babble".
+
+-EREMOTEIO The data read from the endpoint did not fill the
+ specified buffer, and URB_SHORT_NOT_OK was set in
+ urb->transfer_flags.
+
+-ENODEV Device was removed. Often preceded by a burst of
+ other errors, since the hub driver doesn't detect
+ device removal events immediately.
+
+-EXDEV ISO transfer only partially completed
+ (only set in iso_frame_desc[n].status, not urb->status)
+
+-EINVAL ISO madness, if this happens: Log off and go home
+
+-ECONNRESET URB was asynchronously unlinked by usb_unlink_urb
+
+-ESHUTDOWN The device or host controller has been disabled due
+ to some problem that could not be worked around,
+ such as a physical disconnect.
+
+
+(*) Error codes like -EPROTO, -EILSEQ and -EOVERFLOW normally indicate
+hardware problems such as bad devices (including firmware) or cables.
+
+(**) This is also one of several codes that different kinds of host
+controller use to indicate a transfer has failed because of device
+disconnect. In the interval before the hub driver starts disconnect
+processing, devices may receive such fault reports for every request.
+
+
+
+**************************************************************************
+* Error codes returned by usbcore-functions *
+* (expect also other submit and transfer status codes) *
+**************************************************************************
+
+usb_register():
+-EINVAL error during registering new driver
+
+usb_get_*/usb_set_*():
+usb_control_msg():
+usb_bulk_msg():
+-ETIMEDOUT Timeout expired before the transfer completed.
diff --git a/Documentation/usb/functionfs.txt b/Documentation/usb/functionfs.txt
new file mode 100644
index 000000000..eaaaea019
--- /dev/null
+++ b/Documentation/usb/functionfs.txt
@@ -0,0 +1,67 @@
+*How FunctionFS works*
+
+From kernel point of view it is just a composite function with some
+unique behaviour. It may be added to an USB configuration only after
+the user space driver has registered by writing descriptors and
+strings (the user space program has to provide the same information
+that kernel level composite functions provide when they are added to
+the configuration).
+
+This in particular means that the composite initialisation functions
+may not be in init section (ie. may not use the __init tag).
+
+From user space point of view it is a file system which when
+mounted provides an "ep0" file. User space driver need to
+write descriptors and strings to that file. It does not need
+to worry about endpoints, interfaces or strings numbers but
+simply provide descriptors such as if the function was the
+only one (endpoints and strings numbers starting from one and
+interface numbers starting from zero). The FunctionFS changes
+them as needed also handling situation when numbers differ in
+different configurations.
+
+When descriptors and strings are written "ep#" files appear
+(one for each declared endpoint) which handle communication on
+a single endpoint. Again, FunctionFS takes care of the real
+numbers and changing of the configuration (which means that
+"ep1" file may be really mapped to (say) endpoint 3 (and when
+configuration changes to (say) endpoint 2)). "ep0" is used
+for receiving events and handling setup requests.
+
+When all files are closed the function disables itself.
+
+What I also want to mention is that the FunctionFS is designed in such
+a way that it is possible to mount it several times so in the end
+a gadget could use several FunctionFS functions. The idea is that
+each FunctionFS instance is identified by the device name used
+when mounting.
+
+One can imagine a gadget that has an Ethernet, MTP and HID interfaces
+where the last two are implemented via FunctionFS. On user space
+level it would look like this:
+
+$ insmod g_ffs.ko idVendor=<ID> iSerialNumber=<string> functions=mtp,hid
+$ mkdir /dev/ffs-mtp && mount -t functionfs mtp /dev/ffs-mtp
+$ ( cd /dev/ffs-mtp && mtp-daemon ) &
+$ mkdir /dev/ffs-hid && mount -t functionfs hid /dev/ffs-hid
+$ ( cd /dev/ffs-hid && hid-daemon ) &
+
+On kernel level the gadget checks ffs_data->dev_name to identify
+whether it's FunctionFS designed for MTP ("mtp") or HID ("hid").
+
+If no "functions" module parameters is supplied, the driver accepts
+just one function with any name.
+
+When "functions" module parameter is supplied, only functions
+with listed names are accepted. In particular, if the "functions"
+parameter's value is just a one-element list, then the behaviour
+is similar to when there is no "functions" at all; however,
+only a function with the specified name is accepted.
+
+The gadget is registered only after all the declared function
+filesystems have been mounted and USB descriptors of all functions
+have been written to their ep0's.
+
+Conversely, the gadget is unregistered after the first USB function
+closes its endpoints.
+
diff --git a/Documentation/usb/gadget-testing.txt b/Documentation/usb/gadget-testing.txt
new file mode 100644
index 000000000..f45b2bf4b
--- /dev/null
+++ b/Documentation/usb/gadget-testing.txt
@@ -0,0 +1,775 @@
+This file summarizes information on basic testing of USB functions
+provided by gadgets.
+
+1. ACM function
+2. ECM function
+3. ECM subset function
+4. EEM function
+5. FFS function
+6. HID function
+7. LOOPBACK function
+8. MASS STORAGE function
+9. MIDI function
+10. NCM function
+11. OBEX function
+12. PHONET function
+13. RNDIS function
+14. SERIAL function
+15. SOURCESINK function
+16. UAC1 function
+17. UAC2 function
+18. UVC function
+19. PRINTER function
+
+
+1. ACM function
+===============
+
+The function is provided by usb_f_acm.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "acm".
+The ACM function provides just one attribute in its function directory:
+
+ port_num
+
+The attribute is read-only.
+
+There can be at most 4 ACM/generic serial/OBEX ports in the system.
+
+
+Testing the ACM function
+------------------------
+
+On the host: cat > /dev/ttyACM<X>
+On the device : cat /dev/ttyGS<Y>
+
+then the other way round
+
+On the device: cat > /dev/ttyGS<Y>
+On the host: cat /dev/ttyACM<X>
+
+2. ECM function
+===============
+
+The function is provided by usb_f_ecm.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "ecm".
+The ECM function provides these attributes in its function directory:
+
+ ifname - network device interface name associated with this
+ function instance
+ qmult - queue length multiplier for high and super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
+and after creating the functions/ecm.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the ECM function
+------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device: ping <host's IP>
+On the host: ping <device's IP>
+
+3. ECM subset function
+======================
+
+The function is provided by usb_f_ecm_subset.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "geth".
+The ECM subset function provides these attributes in its function directory:
+
+ ifname - network device interface name associated with this
+ function instance
+ qmult - queue length multiplier for high and super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
+and after creating the functions/ecm.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the ECM subset function
+-------------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device: ping <host's IP>
+On the host: ping <device's IP>
+
+4. EEM function
+===============
+
+The function is provided by usb_f_eem.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "eem".
+The EEM function provides these attributes in its function directory:
+
+ ifname - network device interface name associated with this
+ function instance
+ qmult - queue length multiplier for high and super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
+and after creating the functions/eem.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the EEM function
+------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device: ping <host's IP>
+On the host: ping <device's IP>
+
+5. FFS function
+===============
+
+The function is provided by usb_f_fs.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "ffs".
+The function directory is intentionally empty and not modifiable.
+
+After creating the directory there is a new instance (a "device") of FunctionFS
+available in the system. Once a "device" is available, the user should follow
+the standard procedure for using FunctionFS (mount it, run the userspace
+process which implements the function proper). The gadget should be enabled
+by writing a suitable string to usb_gadget/<gadget>/UDC.
+
+Testing the FFS function
+------------------------
+
+On the device: start the function's userspace daemon, enable the gadget
+On the host: use the USB function provided by the device
+
+6. HID function
+===============
+
+The function is provided by usb_f_hid.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "hid".
+The HID function provides these attributes in its function directory:
+
+ protocol - HID protocol to use
+ report_desc - data to be used in HID reports, except data
+ passed with /dev/hidg<X>
+ report_length - HID report length
+ subclass - HID subclass to use
+
+For a keyboard the protocol and the subclass are 1, the report_length is 8,
+while the report_desc is:
+
+$ hd my_report_desc
+00000000 05 01 09 06 a1 01 05 07 19 e0 29 e7 15 00 25 01 |..........)...%.|
+00000010 75 01 95 08 81 02 95 01 75 08 81 03 95 05 75 01 |u.......u.....u.|
+00000020 05 08 19 01 29 05 91 02 95 01 75 03 91 03 95 06 |....).....u.....|
+00000030 75 08 15 00 25 65 05 07 19 00 29 65 81 00 c0 |u...%e....)e...|
+0000003f
+
+Such a sequence of bytes can be stored to the attribute with echo:
+
+$ echo -ne \\x05\\x01\\x09\\x06\\xa1.....
+
+Testing the HID function
+------------------------
+
+Device:
+- create the gadget
+- connect the gadget to a host, preferably not the one used
+to control the gadget
+- run a program which writes to /dev/hidg<N>, e.g.
+a userspace program found in Documentation/usb/gadget_hid.txt:
+
+$ ./hid_gadget_test /dev/hidg0 keyboard
+
+Host:
+- observe the keystrokes from the gadget
+
+7. LOOPBACK function
+====================
+
+The function is provided by usb_f_ss_lb.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "Loopback".
+The LOOPBACK function provides these attributes in its function directory:
+
+ qlen - depth of loopback queue
+ bulk_buflen - buffer length
+
+Testing the LOOPBACK function
+-----------------------------
+
+device: run the gadget
+host: test-usb
+
+http://www.linux-usb.org/usbtest/testusb.c
+
+8. MASS STORAGE function
+========================
+
+The function is provided by usb_f_mass_storage.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "mass_storage".
+The MASS STORAGE function provides these attributes in its directory:
+files:
+
+ stall - Set to permit function to halt bulk endpoints.
+ Disabled on some USB devices known not to work
+ correctly. You should set it to true.
+ num_buffers - Number of pipeline buffers. Valid numbers
+ are 2..4. Available only if
+ CONFIG_USB_GADGET_DEBUG_FILES is set.
+
+and a default lun.0 directory corresponding to SCSI LUN #0.
+
+A new lun can be added with mkdir:
+
+$ mkdir functions/mass_storage.0/partition.5
+
+Lun numbering does not have to be continuous, except for lun #0 which is
+created by default. A maximum of 8 luns can be specified and they all must be
+named following the <name>.<number> scheme. The numbers can be 0..8.
+Probably a good convention is to name the luns "lun.<number>",
+although it is not mandatory.
+
+In each lun directory there are the following attribute files:
+
+ file - The path to the backing file for the LUN.
+ Required if LUN is not marked as removable.
+ ro - Flag specifying access to the LUN shall be
+ read-only. This is implied if CD-ROM emulation
+ is enabled as well as when it was impossible
+ to open "filename" in R/W mode.
+ removable - Flag specifying that LUN shall be indicated as
+ being removable.
+ cdrom - Flag specifying that LUN shall be reported as
+ being a CD-ROM.
+ nofua - Flag specifying that FUA flag
+ in SCSI WRITE(10,12)
+
+Testing the MASS STORAGE function
+---------------------------------
+
+device: connect the gadget, enable it
+host: dmesg, see the USB drives appear (if system configured to automatically
+mount)
+
+9. MIDI function
+================
+
+The function is provided by usb_f_midi.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "midi".
+The MIDI function provides these attributes in its function directory:
+
+ buflen - MIDI buffer length
+ id - ID string for the USB MIDI adapter
+ in_ports - number of MIDI input ports
+ index - index value for the USB MIDI adapter
+ out_ports - number of MIDI output ports
+ qlen - USB read request queue length
+
+Testing the MIDI function
+-------------------------
+
+There are two cases: playing a mid from the gadget to
+the host and playing a mid from the host to the gadget.
+
+1) Playing a mid from the gadget to the host
+host)
+
+$ arecordmidi -l
+ Port Client name Port name
+ 14:0 Midi Through Midi Through Port-0
+ 24:0 MIDI Gadget MIDI Gadget MIDI 1
+$ arecordmidi -p 24:0 from_gadget.mid
+
+gadget)
+
+$ aplaymidi -l
+ Port Client name Port name
+ 20:0 f_midi f_midi
+
+$ aplaymidi -p 20:0 to_host.mid
+
+2) Playing a mid from the host to the gadget
+gadget)
+
+$ arecordmidi -l
+ Port Client name Port name
+ 20:0 f_midi f_midi
+
+$ arecordmidi -p 20:0 from_host.mid
+
+host)
+
+$ aplaymidi -l
+ Port Client name Port name
+ 14:0 Midi Through Midi Through Port-0
+ 24:0 MIDI Gadget MIDI Gadget MIDI 1
+
+$ aplaymidi -p24:0 to_gadget.mid
+
+The from_gadget.mid should sound identical to the to_host.mid.
+The from_host.id should sound identical to the to_gadget.mid.
+
+MIDI files can be played to speakers/headphones with e.g. timidity installed
+
+$ aplaymidi -l
+ Port Client name Port name
+ 14:0 Midi Through Midi Through Port-0
+ 24:0 MIDI Gadget MIDI Gadget MIDI 1
+128:0 TiMidity TiMidity port 0
+128:1 TiMidity TiMidity port 1
+128:2 TiMidity TiMidity port 2
+128:3 TiMidity TiMidity port 3
+
+$ aplaymidi -p 128:0 file.mid
+
+MIDI ports can be logically connected using the aconnect utility, e.g.:
+
+$ aconnect 24:0 128:0 # try it on the host
+
+After the gadget's MIDI port is connected to timidity's MIDI port,
+whatever is played at the gadget side with aplaymidi -l is audible
+in host's speakers/headphones.
+
+10. NCM function
+================
+
+The function is provided by usb_f_ncm.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "ncm".
+The NCM function provides these attributes in its function directory:
+
+ ifname - network device interface name associated with this
+ function instance
+ qmult - queue length multiplier for high and super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
+and after creating the functions/ncm.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+Testing the NCM function
+------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device: ping <host's IP>
+On the host: ping <device's IP>
+
+11. OBEX function
+=================
+
+The function is provided by usb_f_obex.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "obex".
+The OBEX function provides just one attribute in its function directory:
+
+ port_num
+
+The attribute is read-only.
+
+There can be at most 4 ACM/generic serial/OBEX ports in the system.
+
+Testing the OBEX function
+-------------------------
+
+On device: seriald -f /dev/ttyGS<Y> -s 1024
+On host: serialc -v <vendorID> -p <productID> -i<interface#> -a1 -s1024 \
+ -t<out endpoint addr> -r<in endpoint addr>
+
+where seriald and serialc are Felipe's utilities found here:
+
+https://git.gitorious.org/usb/usb-tools.git master
+
+12. PHONET function
+===================
+
+The function is provided by usb_f_phonet.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "phonet".
+The PHONET function provides just one attribute in its function directory:
+
+ ifname - network device interface name associated with this
+ function instance
+
+Testing the PHONET function
+---------------------------
+
+It is not possible to test the SOCK_STREAM protocol without a specific piece
+of hardware, so only SOCK_DGRAM has been tested. For the latter to work,
+in the past I had to apply the patch mentioned here:
+
+http://www.spinics.net/lists/linux-usb/msg85689.html
+
+These tools are required:
+
+git://git.gitorious.org/meego-cellular/phonet-utils.git
+
+On the host:
+
+$ ./phonet -a 0x10 -i usbpn0
+$ ./pnroute add 0x6c usbpn0
+$./pnroute add 0x10 usbpn0
+$ ifconfig usbpn0 up
+
+On the device:
+
+$ ./phonet -a 0x6c -i upnlink0
+$ ./pnroute add 0x10 upnlink0
+$ ifconfig upnlink0 up
+
+Then a test program can be used:
+
+http://www.spinics.net/lists/linux-usb/msg85690.html
+
+On the device:
+
+$ ./pnxmit -a 0x6c -r
+
+On the host:
+
+$ ./pnxmit -a 0x10 -s 0x6c
+
+As a result some data should be sent from host to device.
+Then the other way round:
+
+On the host:
+
+$ ./pnxmit -a 0x10 -r
+
+On the device:
+
+$ ./pnxmit -a 0x6c -s 0x10
+
+13. RNDIS function
+==================
+
+The function is provided by usb_f_rndis.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "rndis".
+The RNDIS function provides these attributes in its function directory:
+
+ ifname - network device interface name associated with this
+ function instance
+ qmult - queue length multiplier for high and super speed
+ host_addr - MAC address of host's end of this
+ Ethernet over USB link
+ dev_addr - MAC address of device's end of this
+ Ethernet over USB link
+
+and after creating the functions/rndis.<instance name> they contain default
+values: qmult is 5, dev_addr and host_addr are randomly selected.
+Except for ifname they can be written to until the function is linked to a
+configuration. The ifname is read-only and contains the name of the interface
+which was assigned by the net core, e. g. usb0.
+
+By default there can be only 1 RNDIS interface in the system.
+
+Testing the RNDIS function
+--------------------------
+
+Configure IP addresses of the device and the host. Then:
+
+On the device: ping <host's IP>
+On the host: ping <device's IP>
+
+14. SERIAL function
+===================
+
+The function is provided by usb_f_gser.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "gser".
+The SERIAL function provides just one attribute in its function directory:
+
+ port_num
+
+The attribute is read-only.
+
+There can be at most 4 ACM/generic serial/OBEX ports in the system.
+
+Testing the SERIAL function
+---------------------------
+
+On host: insmod usbserial
+ echo VID PID >/sys/bus/usb-serial/drivers/generic/new_id
+On host: cat > /dev/ttyUSB<X>
+On target: cat /dev/ttyGS<Y>
+
+then the other way round
+
+On target: cat > /dev/ttyGS<Y>
+On host: cat /dev/ttyUSB<X>
+
+15. SOURCESINK function
+=======================
+
+The function is provided by usb_f_ss_lb.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "SourceSink".
+The SOURCESINK function provides these attributes in its function directory:
+
+ pattern - 0 (all zeros), 1 (mod63), 2 (none)
+ isoc_interval - 1..16
+ isoc_maxpacket - 0 - 1023 (fs), 0 - 1024 (hs/ss)
+ isoc_mult - 0..2 (hs/ss only)
+ isoc_maxburst - 0..15 (ss only)
+ bulk_buflen - buffer length
+
+Testing the SOURCESINK function
+-------------------------------
+
+device: run the gadget
+host: test-usb
+
+http://www.linux-usb.org/usbtest/testusb.c
+
+16. UAC1 function
+=================
+
+The function is provided by usb_f_uac1.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "uac1".
+The uac1 function provides these attributes in its function directory:
+
+ audio_buf_size - audio buffer size
+ fn_cap - capture pcm device file name
+ fn_cntl - control device file name
+ fn_play - playback pcm device file name
+ req_buf_size - ISO OUT endpoint request buffer size
+ req_count - ISO OUT endpoint request count
+
+The attributes have sane default values.
+
+Testing the UAC1 function
+-------------------------
+
+device: run the gadget
+host: aplay -l # should list our USB Audio Gadget
+
+17. UAC2 function
+=================
+
+The function is provided by usb_f_uac2.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "uac2".
+The uac2 function provides these attributes in its function directory:
+
+ chmask - capture channel mask
+ c_srate - capture sampling rate
+ c_ssize - capture sample size (bytes)
+ p_chmask - playback channel mask
+ p_srate - playback sampling rate
+ p_ssize - playback sample size (bytes)
+
+The attributes have sane default values.
+
+Testing the UAC2 function
+-------------------------
+
+device: run the gadget
+host: aplay -l # should list our USB Audio Gadget
+
+This function does not require real hardware support, it just
+sends a stream of audio data to/from the host. In order to
+actually hear something at the device side, a command similar
+to this must be used at the device side:
+
+$ arecord -f dat -t wav -D hw:2,0 | aplay -D hw:0,0 &
+
+e.g.:
+
+$ arecord -f dat -t wav -D hw:CARD=UAC2Gadget,DEV=0 | \
+aplay -D default:CARD=OdroidU3
+
+18. UVC function
+================
+
+The function is provided by usb_f_uvc.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "uvc".
+The uvc function provides these attributes in its function directory:
+
+ streaming_interval - interval for polling endpoint for data transfers
+ streaming_maxburst - bMaxBurst for super speed companion descriptor
+ streaming_maxpacket - maximum packet size this endpoint is capable of
+ sending or receiving when this configuration is
+ selected
+
+There are also "control" and "streaming" subdirectories, each of which contain
+a number of their subdirectories. There are some sane defaults provided, but
+the user must provide the following:
+
+ control header - create in control/header, link from control/class/fs
+ and/or control/class/ss
+ streaming header - create in streaming/header, link from
+ streaming/class/fs and/or streaming/class/hs and/or
+ streaming/class/ss
+ format description - create in streaming/mjpeg and/or
+ streaming/uncompressed
+ frame description - create in streaming/mjpeg/<format> and/or in
+ streaming/uncompressed/<format>
+
+Each frame description contains frame interval specification, and each
+such specification consists of a number of lines with an inverval value
+in each line. The rules stated above are best illustrated with an example:
+
+# mkdir functions/uvc.usb0/control/header/h
+# cd functions/uvc.usb0/control/header/h
+# ln -s header/h class/fs
+# ln -s header/h class/ss
+# mkdir -p functions/uvc.usb0/streaming/uncompressed/u/360p
+# cat <<EOF > functions/uvc.usb0/streaming/uncompressed/u/360p/dwFrameInterval
+666666
+1000000
+5000000
+EOF
+# cd $GADGET_CONFIGFS_ROOT
+# mkdir functions/uvc.usb0/streaming/header/h
+# cd functions/uvc.usb0/streaming/header/h
+# ln -s ../../uncompressed/u
+# cd ../../class/fs
+# ln -s ../../header/h
+# cd ../../class/hs
+# ln -s ../../header/h
+# cd ../../class/ss
+# ln -s ../../header/h
+
+
+Testing the UVC function
+------------------------
+
+device: run the gadget, modprobe vivid
+
+# uvc-gadget -u /dev/video<uvc video node #> -v /dev/video<vivid video node #>
+
+where uvc-gadget is this program:
+http://git.ideasonboard.org/uvc-gadget.git
+
+with these patches:
+http://www.spinics.net/lists/linux-usb/msg99220.html
+
+host: luvcview -f yuv
+
+19. PRINTER function
+====================
+
+The function is provided by usb_f_printer.ko module.
+
+Function-specific configfs interface
+------------------------------------
+
+The function name to use when creating the function directory is "printer".
+The printer function provides these attributes in its function directory:
+
+ pnp_string - Data to be passed to the host in pnp string
+ q_len - Number of requests per endpoint
+
+Testing the PRINTER function
+----------------------------
+
+The most basic testing:
+
+device: run the gadget
+# ls -l /devices/virtual/usb_printer_gadget/
+
+should show g_printer<number>.
+
+If udev is active, then /dev/g_printer<number> should appear automatically.
+
+host:
+
+If udev is active, then e.g. /dev/usb/lp0 should appear.
+
+host->device transmission:
+
+device:
+# cat /dev/g_printer<number>
+host:
+# cat > /dev/usb/lp0
+
+device->host transmission:
+
+# cat > /dev/g_printer<number>
+host:
+# cat /dev/usb/lp0
+
+More advanced testing can be done with the prn_example
+described in Documentation/usb/gadget-printer.txt.
diff --git a/Documentation/usb/gadget_configfs.txt b/Documentation/usb/gadget_configfs.txt
new file mode 100644
index 000000000..635e57493
--- /dev/null
+++ b/Documentation/usb/gadget_configfs.txt
@@ -0,0 +1,384 @@
+
+
+
+
+ Linux USB gadget configured through configfs
+
+
+ 25th April 2013
+
+
+
+
+Overview
+========
+
+A USB Linux Gadget is a device which has a UDC (USB Device Controller) and can
+be connected to a USB Host to extend it with additional functions like a serial
+port or a mass storage capability.
+
+A gadget is seen by its host as a set of configurations, each of which contains
+a number of interfaces which, from the gadget's perspective, are known as
+functions, each function representing e.g. a serial connection or a SCSI disk.
+
+Linux provides a number of functions for gadgets to use.
+
+Creating a gadget means deciding what configurations there will be
+and which functions each configuration will provide.
+
+Configfs (please see Documentation/filesystems/configfs/*) lends itself nicely
+for the purpose of telling the kernel about the above mentioned decision.
+This document is about how to do it.
+
+It also describes how configfs integration into gadget is designed.
+
+
+
+
+Requirements
+============
+
+In order for this to work configfs must be available, so CONFIGFS_FS must be
+'y' or 'm' in .config. As of this writing USB_LIBCOMPOSITE selects CONFIGFS_FS.
+
+
+
+
+Usage
+=====
+
+(The original post describing the first function
+made available through configfs can be seen here:
+http://www.spinics.net/lists/linux-usb/msg76388.html)
+
+$ modprobe libcomposite
+$ mount none $CONFIGFS_HOME -t configfs
+
+where CONFIGFS_HOME is the mount point for configfs
+
+1. Creating the gadgets
+-----------------------
+
+For each gadget to be created its corresponding directory must be created:
+
+$ mkdir $CONFIGFS_HOME/usb_gadget/<gadget name>
+
+e.g.:
+
+$ mkdir $CONFIGFS_HOME/usb_gadget/g1
+
+...
+...
+...
+
+$ cd $CONFIGFS_HOME/usb_gadget/g1
+
+Each gadget needs to have its vendor id <VID> and product id <PID> specified:
+
+$ echo <VID> > idVendor
+$ echo <PID> > idProduct
+
+A gadget also needs its serial number, manufacturer and product strings.
+In order to have a place to store them, a strings subdirectory must be created
+for each language, e.g.:
+
+$ mkdir strings/0x409
+
+Then the strings can be specified:
+
+$ echo <serial number> > strings/0x409/serialnumber
+$ echo <manufacturer> > strings/0x409/manufacturer
+$ echo <product> > strings/0x409/product
+
+2. Creating the configurations
+------------------------------
+
+Each gadget will consist of a number of configurations, their corresponding
+directories must be created:
+
+$ mkdir configs/<name>.<number>
+
+where <name> can be any string which is legal in a filesystem and the
+<number> is the configuration's number, e.g.:
+
+$ mkdir configs/c.1
+
+...
+...
+...
+
+Each configuration also needs its strings, so a subdirectory must be created
+for each language, e.g.:
+
+$ mkdir configs/c.1/strings/0x409
+
+Then the configuration string can be specified:
+
+$ echo <configuration> > configs/c.1/strings/0x409/configuration
+
+Some attributes can also be set for a configuration, e.g.:
+
+$ echo 120 > configs/c.1/MaxPower
+
+3. Creating the functions
+-------------------------
+
+The gadget will provide some functions, for each function its corresponding
+directory must be created:
+
+$ mkdir functions/<name>.<instance name>
+
+where <name> corresponds to one of allowed function names and instance name
+is an arbitrary string allowed in a filesystem, e.g.:
+
+$ mkdir functions/ncm.usb0 # usb_f_ncm.ko gets loaded with request_module()
+
+...
+...
+...
+
+Each function provides its specific set of attributes, with either read-only
+or read-write access. Where applicable they need to be written to as
+appropriate.
+Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information.
+
+4. Associating the functions with their configurations
+------------------------------------------------------
+
+At this moment a number of gadgets is created, each of which has a number of
+configurations specified and a number of functions available. What remains
+is specifying which function is available in which configuration (the same
+function can be used in multiple configurations). This is achieved with
+creating symbolic links:
+
+$ ln -s functions/<name>.<instance name> configs/<name>.<number>
+
+e.g.:
+
+$ ln -s functions/ncm.usb0 configs/c.1
+
+...
+...
+...
+
+5. Enabling the gadget
+----------------------
+
+All the above steps serve the purpose of composing the gadget of
+configurations and functions.
+
+An example directory structure might look like this:
+
+.
+./strings
+./strings/0x409
+./strings/0x409/serialnumber
+./strings/0x409/product
+./strings/0x409/manufacturer
+./configs
+./configs/c.1
+./configs/c.1/ncm.usb0 -> ../../../../usb_gadget/g1/functions/ncm.usb0
+./configs/c.1/strings
+./configs/c.1/strings/0x409
+./configs/c.1/strings/0x409/configuration
+./configs/c.1/bmAttributes
+./configs/c.1/MaxPower
+./functions
+./functions/ncm.usb0
+./functions/ncm.usb0/ifname
+./functions/ncm.usb0/qmult
+./functions/ncm.usb0/host_addr
+./functions/ncm.usb0/dev_addr
+./UDC
+./bcdUSB
+./bcdDevice
+./idProduct
+./idVendor
+./bMaxPacketSize0
+./bDeviceProtocol
+./bDeviceSubClass
+./bDeviceClass
+
+
+Such a gadget must be finally enabled so that the USB host can enumerate it.
+In order to enable the gadget it must be bound to a UDC (USB Device Controller).
+
+$ echo <udc name> > UDC
+
+where <udc name> is one of those found in /sys/class/udc/*
+e.g.:
+
+$ echo s3c-hsotg > UDC
+
+
+6. Disabling the gadget
+-----------------------
+
+$ echo "" > UDC
+
+7. Cleaning up
+--------------
+
+Remove functions from configurations:
+
+$ rm configs/<config name>.<number>/<function>
+
+where <config name>.<number> specify the configuration and <function> is
+a symlink to a function being removed from the configuration, e.g.:
+
+$ rm configfs/c.1/ncm.usb0
+
+...
+...
+...
+
+Remove strings directories in configurations
+
+$ rmdir configs/<config name>.<number>/strings/<lang>
+
+e.g.:
+
+$ rmdir configs/c.1/strings/0x409
+
+...
+...
+...
+
+and remove the configurations
+
+$ rmdir configs/<config name>.<number>
+
+e.g.:
+
+rmdir configs/c.1
+
+...
+...
+...
+
+Remove functions (function modules are not unloaded, though)
+
+$ rmdir functions/<name>.<instance name>
+
+e.g.:
+
+$ rmdir functions/ncm.usb0
+
+...
+...
+...
+
+Remove strings directories in the gadget
+
+$ rmdir strings/<lang>
+
+e.g.:
+
+$ rmdir strings/0x409
+
+and finally remove the gadget:
+
+$ cd ..
+$ rmdir <gadget name>
+
+e.g.:
+
+$ rmdir g1
+
+
+
+
+Implementation design
+=====================
+
+Below the idea of how configfs works is presented.
+In configfs there are items and groups, both represented as directories.
+The difference between an item and a group is that a group can contain
+other groups. In the picture below only an item is shown.
+Both items and groups can have attributes, which are represented as files.
+The user can create and remove directories, but cannot remove files,
+which can be read-only or read-write, depending on what they represent.
+
+The filesystem part of configfs operates on config_items/groups and
+configfs_attributes which are generic and of the same type for all
+configured elements. However, they are embedded in usage-specific
+larger structures. In the picture below there is a "cs" which contains
+a config_item and an "sa" which contains a configfs_attribute.
+
+The filesystem view would be like this:
+
+./
+./cs (directory)
+ |
+ +--sa (file)
+ |
+ .
+ .
+ .
+
+Whenever a user reads/writes the "sa" file, a function is called
+which accepts a struct config_item and a struct configfs_attribute.
+In the said function the "cs" and "sa" are retrieved using the well
+known container_of technique and an appropriate sa's function (show or
+store) is called and passed the "cs" and a character buffer. The "show"
+is for displaying the file's contents (copy data from the cs to the
+buffer), while the "store" is for modifying the file's contents (copy data
+from the buffer to the cs), but it is up to the implementer of the
+two functions to decide what they actually do.
+
+typedef struct configured_structure cs;
+typedef struct specific_attribute sa;
+
+ sa
+ +----------------------------------+
+ cs | (*show)(cs *, buffer); |
++-----------------+ | (*store)(cs *, buffer, length); |
+| | | |
+| +-------------+ | | +------------------+ |
+| | struct |-|----|------>|struct | |
+| | config_item | | | |configfs_attribute| |
+| +-------------+ | | +------------------+ |
+| | +----------------------------------+
+| data to be set | .
+| | .
++-----------------+ .
+
+The file names are decided by the config item/group designer, while
+the directories in general can be named at will. A group can have
+a number of its default sub-groups created automatically.
+
+For more information on configfs please see
+Documentation/filesystems/configfs/*.
+
+The concepts described above translate to USB gadgets like this:
+
+1. A gadget has its config group, which has some attributes (idVendor,
+idProduct etc) and default sub-groups (configs, functions, strings).
+Writing to the attributes causes the information to be stored in
+appropriate locations. In the configs, functions and strings sub-groups
+a user can create their sub-groups to represent configurations, functions,
+and groups of strings in a given language.
+
+2. The user creates configurations and functions, in the configurations
+creates symbolic links to functions. This information is used when the
+gadget's UDC attribute is written to, which means binding the gadget
+to the UDC. The code in drivers/usb/gadget/configfs.c iterates over
+all configurations, and in each configuration it iterates over all
+functions and binds them. This way the whole gadget is bound.
+
+3. The file drivers/usb/gadget/configfs.c contains code for
+
+ - gadget's config_group
+ - gadget's default groups (configs, functions, strings)
+ - associating functions with configurations (symlinks)
+
+4. Each USB function naturally has its own view of what it wants
+configured, so config_groups for particular functions are defined
+in the functions implementation files drivers/usb/gadget/f_*.c.
+
+5. Function's code is written in such a way that it uses
+
+usb_get_function_instance(), which, in turn, calls request_module.
+So, provided that modprobe works, modules for particular functions
+are loaded automatically. Please note that the converse is not true:
+after a gadget is disabled and torn down, the modules remain loaded.
diff --git a/Documentation/usb/gadget_hid.txt b/Documentation/usb/gadget_hid.txt
new file mode 100644
index 000000000..7a0fb8e16
--- /dev/null
+++ b/Documentation/usb/gadget_hid.txt
@@ -0,0 +1,452 @@
+
+ Linux USB HID gadget driver
+
+Introduction
+
+ The HID Gadget driver provides emulation of USB Human Interface
+ Devices (HID). The basic HID handling is done in the kernel,
+ and HID reports can be sent/received through I/O on the
+ /dev/hidgX character devices.
+
+ For more details about HID, see the developer page on
+ http://www.usb.org/developers/hidpage/
+
+Configuration
+
+ g_hid is a platform driver, so to use it you need to add
+ struct platform_device(s) to your platform code defining the
+ HID function descriptors you want to use - E.G. something
+ like:
+
+#include <linux/platform_device.h>
+#include <linux/usb/g_hid.h>
+
+/* hid descriptor for a keyboard */
+static struct hidg_func_descriptor my_hid_data = {
+ .subclass = 0, /* No subclass */
+ .protocol = 1, /* Keyboard */
+ .report_length = 8,
+ .report_desc_length = 63,
+ .report_desc = {
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x06, /* USAGE (Keyboard) */
+ 0xa1, 0x01, /* COLLECTION (Application) */
+ 0x05, 0x07, /* USAGE_PAGE (Keyboard) */
+ 0x19, 0xe0, /* USAGE_MINIMUM (Keyboard LeftControl) */
+ 0x29, 0xe7, /* USAGE_MAXIMUM (Keyboard Right GUI) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x95, 0x08, /* REPORT_COUNT (8) */
+ 0x81, 0x02, /* INPUT (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x08, /* REPORT_SIZE (8) */
+ 0x81, 0x03, /* INPUT (Cnst,Var,Abs) */
+ 0x95, 0x05, /* REPORT_COUNT (5) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x05, 0x08, /* USAGE_PAGE (LEDs) */
+ 0x19, 0x01, /* USAGE_MINIMUM (Num Lock) */
+ 0x29, 0x05, /* USAGE_MAXIMUM (Kana) */
+ 0x91, 0x02, /* OUTPUT (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x03, /* REPORT_SIZE (3) */
+ 0x91, 0x03, /* OUTPUT (Cnst,Var,Abs) */
+ 0x95, 0x06, /* REPORT_COUNT (6) */
+ 0x75, 0x08, /* REPORT_SIZE (8) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x65, /* LOGICAL_MAXIMUM (101) */
+ 0x05, 0x07, /* USAGE_PAGE (Keyboard) */
+ 0x19, 0x00, /* USAGE_MINIMUM (Reserved) */
+ 0x29, 0x65, /* USAGE_MAXIMUM (Keyboard Application) */
+ 0x81, 0x00, /* INPUT (Data,Ary,Abs) */
+ 0xc0 /* END_COLLECTION */
+ }
+};
+
+static struct platform_device my_hid = {
+ .name = "hidg",
+ .id = 0,
+ .num_resources = 0,
+ .resource = 0,
+ .dev.platform_data = &my_hid_data,
+};
+
+ You can add as many HID functions as you want, only limited by
+ the amount of interrupt endpoints your gadget driver supports.
+
+Configuration with configfs
+
+ Instead of adding fake platform devices and drivers in order to pass
+ some data to the kernel, if HID is a part of a gadget composed with
+ configfs the hidg_func_descriptor.report_desc is passed to the kernel
+ by writing the appropriate stream of bytes to a configfs attribute.
+
+Send and receive HID reports
+
+ HID reports can be sent/received using read/write on the
+ /dev/hidgX character devices. See below for an example program
+ to do this.
+
+ hid_gadget_test is a small interactive program to test the HID
+ gadget driver. To use, point it at a hidg device and set the
+ device type (keyboard / mouse / joystick) - E.G.:
+
+ # hid_gadget_test /dev/hidg0 keyboard
+
+ You are now in the prompt of hid_gadget_test. You can type any
+ combination of options and values. Available options and
+ values are listed at program start. In keyboard mode you can
+ send up to six values.
+
+ For example type: g i s t r --left-shift
+
+ Hit return and the corresponding report will be sent by the
+ HID gadget.
+
+ Another interesting example is the caps lock test. Type
+ --caps-lock and hit return. A report is then sent by the
+ gadget and you should receive the host answer, corresponding
+ to the caps lock LED status.
+
+ --caps-lock
+ recv report:2
+
+ With this command:
+
+ # hid_gadget_test /dev/hidg1 mouse
+
+ You can test the mouse emulation. Values are two signed numbers.
+
+
+Sample code
+
+/* hid_gadget_test */
+
+#include <pthread.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define BUF_LEN 512
+
+struct options {
+ const char *opt;
+ unsigned char val;
+};
+
+static struct options kmod[] = {
+ {.opt = "--left-ctrl", .val = 0x01},
+ {.opt = "--right-ctrl", .val = 0x10},
+ {.opt = "--left-shift", .val = 0x02},
+ {.opt = "--right-shift", .val = 0x20},
+ {.opt = "--left-alt", .val = 0x04},
+ {.opt = "--right-alt", .val = 0x40},
+ {.opt = "--left-meta", .val = 0x08},
+ {.opt = "--right-meta", .val = 0x80},
+ {.opt = NULL}
+};
+
+static struct options kval[] = {
+ {.opt = "--return", .val = 0x28},
+ {.opt = "--esc", .val = 0x29},
+ {.opt = "--bckspc", .val = 0x2a},
+ {.opt = "--tab", .val = 0x2b},
+ {.opt = "--spacebar", .val = 0x2c},
+ {.opt = "--caps-lock", .val = 0x39},
+ {.opt = "--f1", .val = 0x3a},
+ {.opt = "--f2", .val = 0x3b},
+ {.opt = "--f3", .val = 0x3c},
+ {.opt = "--f4", .val = 0x3d},
+ {.opt = "--f5", .val = 0x3e},
+ {.opt = "--f6", .val = 0x3f},
+ {.opt = "--f7", .val = 0x40},
+ {.opt = "--f8", .val = 0x41},
+ {.opt = "--f9", .val = 0x42},
+ {.opt = "--f10", .val = 0x43},
+ {.opt = "--f11", .val = 0x44},
+ {.opt = "--f12", .val = 0x45},
+ {.opt = "--insert", .val = 0x49},
+ {.opt = "--home", .val = 0x4a},
+ {.opt = "--pageup", .val = 0x4b},
+ {.opt = "--del", .val = 0x4c},
+ {.opt = "--end", .val = 0x4d},
+ {.opt = "--pagedown", .val = 0x4e},
+ {.opt = "--right", .val = 0x4f},
+ {.opt = "--left", .val = 0x50},
+ {.opt = "--down", .val = 0x51},
+ {.opt = "--kp-enter", .val = 0x58},
+ {.opt = "--up", .val = 0x52},
+ {.opt = "--num-lock", .val = 0x53},
+ {.opt = NULL}
+};
+
+int keyboard_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+{
+ char *tok = strtok(buf, " ");
+ int key = 0;
+ int i = 0;
+
+ for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+ if (strcmp(tok, "--quit") == 0)
+ return -1;
+
+ if (strcmp(tok, "--hold") == 0) {
+ *hold = 1;
+ continue;
+ }
+
+ if (key < 6) {
+ for (i = 0; kval[i].opt != NULL; i++)
+ if (strcmp(tok, kval[i].opt) == 0) {
+ report[2 + key++] = kval[i].val;
+ break;
+ }
+ if (kval[i].opt != NULL)
+ continue;
+ }
+
+ if (key < 6)
+ if (islower(tok[0])) {
+ report[2 + key++] = (tok[0] - ('a' - 0x04));
+ continue;
+ }
+
+ for (i = 0; kmod[i].opt != NULL; i++)
+ if (strcmp(tok, kmod[i].opt) == 0) {
+ report[0] = report[0] | kmod[i].val;
+ break;
+ }
+ if (kmod[i].opt != NULL)
+ continue;
+
+ if (key < 6)
+ fprintf(stderr, "unknown option: %s\n", tok);
+ }
+ return 8;
+}
+
+static struct options mmod[] = {
+ {.opt = "--b1", .val = 0x01},
+ {.opt = "--b2", .val = 0x02},
+ {.opt = "--b3", .val = 0x04},
+ {.opt = NULL}
+};
+
+int mouse_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+{
+ char *tok = strtok(buf, " ");
+ int mvt = 0;
+ int i = 0;
+ for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+ if (strcmp(tok, "--quit") == 0)
+ return -1;
+
+ if (strcmp(tok, "--hold") == 0) {
+ *hold = 1;
+ continue;
+ }
+
+ for (i = 0; mmod[i].opt != NULL; i++)
+ if (strcmp(tok, mmod[i].opt) == 0) {
+ report[0] = report[0] | mmod[i].val;
+ break;
+ }
+ if (mmod[i].opt != NULL)
+ continue;
+
+ if (!(tok[0] == '-' && tok[1] == '-') && mvt < 2) {
+ errno = 0;
+ report[1 + mvt++] = (char)strtol(tok, NULL, 0);
+ if (errno != 0) {
+ fprintf(stderr, "Bad value:'%s'\n", tok);
+ report[1 + mvt--] = 0;
+ }
+ continue;
+ }
+
+ fprintf(stderr, "unknown option: %s\n", tok);
+ }
+ return 3;
+}
+
+static struct options jmod[] = {
+ {.opt = "--b1", .val = 0x10},
+ {.opt = "--b2", .val = 0x20},
+ {.opt = "--b3", .val = 0x40},
+ {.opt = "--b4", .val = 0x80},
+ {.opt = "--hat1", .val = 0x00},
+ {.opt = "--hat2", .val = 0x01},
+ {.opt = "--hat3", .val = 0x02},
+ {.opt = "--hat4", .val = 0x03},
+ {.opt = "--hatneutral", .val = 0x04},
+ {.opt = NULL}
+};
+
+int joystick_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+{
+ char *tok = strtok(buf, " ");
+ int mvt = 0;
+ int i = 0;
+
+ *hold = 1;
+
+ /* set default hat position: neutral */
+ report[3] = 0x04;
+
+ for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+ if (strcmp(tok, "--quit") == 0)
+ return -1;
+
+ for (i = 0; jmod[i].opt != NULL; i++)
+ if (strcmp(tok, jmod[i].opt) == 0) {
+ report[3] = (report[3] & 0xF0) | jmod[i].val;
+ break;
+ }
+ if (jmod[i].opt != NULL)
+ continue;
+
+ if (!(tok[0] == '-' && tok[1] == '-') && mvt < 3) {
+ errno = 0;
+ report[mvt++] = (char)strtol(tok, NULL, 0);
+ if (errno != 0) {
+ fprintf(stderr, "Bad value:'%s'\n", tok);
+ report[mvt--] = 0;
+ }
+ continue;
+ }
+
+ fprintf(stderr, "unknown option: %s\n", tok);
+ }
+ return 4;
+}
+
+void print_options(char c)
+{
+ int i = 0;
+
+ if (c == 'k') {
+ printf(" keyboard options:\n"
+ " --hold\n");
+ for (i = 0; kmod[i].opt != NULL; i++)
+ printf("\t\t%s\n", kmod[i].opt);
+ printf("\n keyboard values:\n"
+ " [a-z] or\n");
+ for (i = 0; kval[i].opt != NULL; i++)
+ printf("\t\t%-8s%s", kval[i].opt, i % 2 ? "\n" : "");
+ printf("\n");
+ } else if (c == 'm') {
+ printf(" mouse options:\n"
+ " --hold\n");
+ for (i = 0; mmod[i].opt != NULL; i++)
+ printf("\t\t%s\n", mmod[i].opt);
+ printf("\n mouse values:\n"
+ " Two signed numbers\n"
+ "--quit to close\n");
+ } else {
+ printf(" joystick options:\n");
+ for (i = 0; jmod[i].opt != NULL; i++)
+ printf("\t\t%s\n", jmod[i].opt);
+ printf("\n joystick values:\n"
+ " three signed numbers\n"
+ "--quit to close\n");
+ }
+}
+
+int main(int argc, const char *argv[])
+{
+ const char *filename = NULL;
+ int fd = 0;
+ char buf[BUF_LEN];
+ int cmd_len;
+ char report[8];
+ int to_send = 8;
+ int hold = 0;
+ fd_set rfds;
+ int retval, i;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s devname mouse|keyboard|joystick\n",
+ argv[0]);
+ return 1;
+ }
+
+ if (argv[2][0] != 'k' && argv[2][0] != 'm' && argv[2][0] != 'j')
+ return 2;
+
+ filename = argv[1];
+
+ if ((fd = open(filename, O_RDWR, 0666)) == -1) {
+ perror(filename);
+ return 3;
+ }
+
+ print_options(argv[2][0]);
+
+ while (42) {
+
+ FD_ZERO(&rfds);
+ FD_SET(STDIN_FILENO, &rfds);
+ FD_SET(fd, &rfds);
+
+ retval = select(fd + 1, &rfds, NULL, NULL, NULL);
+ if (retval == -1 && errno == EINTR)
+ continue;
+ if (retval < 0) {
+ perror("select()");
+ return 4;
+ }
+
+ if (FD_ISSET(fd, &rfds)) {
+ cmd_len = read(fd, buf, BUF_LEN - 1);
+ printf("recv report:");
+ for (i = 0; i < cmd_len; i++)
+ printf(" %02x", buf[i]);
+ printf("\n");
+ }
+
+ if (FD_ISSET(STDIN_FILENO, &rfds)) {
+ memset(report, 0x0, sizeof(report));
+ cmd_len = read(STDIN_FILENO, buf, BUF_LEN - 1);
+
+ if (cmd_len == 0)
+ break;
+
+ buf[cmd_len - 1] = '\0';
+ hold = 0;
+
+ memset(report, 0x0, sizeof(report));
+ if (argv[2][0] == 'k')
+ to_send = keyboard_fill_report(report, buf, &hold);
+ else if (argv[2][0] == 'm')
+ to_send = mouse_fill_report(report, buf, &hold);
+ else
+ to_send = joystick_fill_report(report, buf, &hold);
+
+ if (to_send == -1)
+ break;
+
+ if (write(fd, report, to_send) != to_send) {
+ perror(filename);
+ return 5;
+ }
+ if (!hold) {
+ memset(report, 0x0, sizeof(report));
+ if (write(fd, report, to_send) != to_send) {
+ perror(filename);
+ return 6;
+ }
+ }
+ }
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/Documentation/usb/gadget_multi.txt b/Documentation/usb/gadget_multi.txt
new file mode 100644
index 000000000..7d66a8636
--- /dev/null
+++ b/Documentation/usb/gadget_multi.txt
@@ -0,0 +1,150 @@
+ -*- org -*-
+
+* Overview
+
+The Multifunction Composite Gadget (or g_multi) is a composite gadget
+that makes extensive use of the composite framework to provide
+a... multifunction gadget.
+
+In it's standard configuration it provides a single USB configuration
+with RNDIS[1] (that is Ethernet), USB CDC[2] ACM (that is serial) and
+USB Mass Storage functions.
+
+A CDC ECM (Ethernet) function may be turned on via a Kconfig option
+and RNDIS can be turned off. If they are both enabled the gadget will
+have two configurations -- one with RNDIS and another with CDC ECM[3].
+
+Please note that if you use non-standard configuration (that is enable
+CDC ECM) you may need to change vendor and/or product ID.
+
+* Host drivers
+
+To make use of the gadget one needs to make it work on host side --
+without that there's no hope of achieving anything with the gadget.
+As one might expect, things one need to do very from system to system.
+
+** Linux host drivers
+
+Since the gadget uses standard composite framework and appears as such
+to Linux host it does not need any additional drivers on Linux host
+side. All the functions are handled by respective drivers developed
+for them.
+
+This is also true for two configuration set-up with RNDIS
+configuration being the first one. Linux host will use the second
+configuration with CDC ECM which should work better under Linux.
+
+** Windows host drivers
+
+For the gadget two work under Windows two conditions have to be met:
+
+*** Detecting as composite gadget
+
+First of all, Windows need to detect the gadget as an USB composite
+gadget which on its own have some conditions[4]. If they are met,
+Windows lets USB Generic Parent Driver[5] handle the device which then
+tries to much drivers for each individual interface (sort of, don't
+get into too many details).
+
+The good news is: you do not have to worry about most of the
+conditions!
+
+The only thing to worry is that the gadget has to have a single
+configuration so a dual RNDIS and CDC ECM gadget won't work unless you
+create a proper INF -- and of course, if you do submit it!
+
+*** Installing drivers for each function
+
+The other, trickier thing is making Windows install drivers for each
+individual function.
+
+For mass storage it is trivial since Windows detect it's an interface
+implementing USB Mass Storage class and selects appropriate driver.
+
+Things are harder with RDNIS and CDC ACM.
+
+**** RNDIS
+
+To make Windows select RNDIS drivers for the first function in the
+gadget, one needs to use the [[file:linux.inf]] file provided with this
+document. It "attaches" Window's RNDIS driver to the first interface
+of the gadget.
+
+Please note, that while testing we encountered some issues[6] when
+RNDIS was not the first interface. You do not need to worry abut it
+unless you are trying to develop your own gadget in which case watch
+out for this bug.
+
+**** CDC ACM
+
+Similarly, [[file:linux-cdc-acm.inf]] is provided for CDC ACM.
+
+**** Customising the gadget
+
+If you intend to hack the g_multi gadget be advised that rearranging
+functions will obviously change interface numbers for each of the
+functionality. As an effect provided INFs won't work since they have
+interface numbers hard-coded in them (it's not hard to change those
+though[7]).
+
+This also means, that after experimenting with g_multi and changing
+provided functions one should change gadget's vendor and/or product ID
+so there will be no collision with other customised gadgets or the
+original gadget.
+
+Failing to comply may cause brain damage after wondering for hours why
+things don't work as intended before realising Windows have cached
+some drivers information (changing USB port may sometimes help plus
+you might try using USBDeview[8] to remove the phantom device).
+
+**** INF testing
+
+Provided INF files have been tested on Windows XP SP3, Windows Vista
+and Windows 7, all 32-bit versions. It should work on 64-bit versions
+as well. It most likely won't work on Windows prior to Windows XP
+SP2.
+
+** Other systems
+
+At this moment, drivers for any other systems have not been tested.
+Knowing how MacOS is based on BSD and BSD is an Open Source it is
+believed that it should (read: "I have no idea whether it will") work
+out-of-the-box.
+
+For more exotic systems I have even less to say...
+
+Any testing and drivers *are* *welcome*!
+
+* Authors
+
+This document has been written by Michal Nazarewicz
+([[mailto:mina86@mina86.com]]). INF files have been hacked with
+support of Marek Szyprowski ([[mailto:m.szyprowski@samsung.com]]) and
+Xiaofan Chen ([[mailto:xiaofanc@gmail.com]]) basing on the MS RNDIS
+template[9], Microchip's CDC ACM INF file and David Brownell's
+([[mailto:dbrownell@users.sourceforge.net]]) original INF files.
+
+* Footnotes
+
+[1] Remote Network Driver Interface Specification,
+[[http://msdn.microsoft.com/en-us/library/ee484414.aspx]].
+
+[2] Communications Device Class Abstract Control Model, spec for this
+and other USB classes can be found at
+[[http://www.usb.org/developers/devclass_docs/]].
+
+[3] CDC Ethernet Control Model.
+
+[4] [[http://msdn.microsoft.com/en-us/library/ff537109(v=VS.85).aspx]]
+
+[5] [[http://msdn.microsoft.com/en-us/library/ff539234(v=VS.85).aspx]]
+
+[6] To put it in some other nice words, Windows failed to respond to
+any user input.
+
+[7] You may find [[http://www.cygnal.org/ubb/Forum9/HTML/001050.html]]
+useful.
+
+[8] http://www.nirsoft.net/utils/usb_devices_view.html
+
+[9] [[http://msdn.microsoft.com/en-us/library/ff570620.aspx]]
diff --git a/Documentation/usb/gadget_printer.txt b/Documentation/usb/gadget_printer.txt
new file mode 100644
index 000000000..ad995bf0d
--- /dev/null
+++ b/Documentation/usb/gadget_printer.txt
@@ -0,0 +1,510 @@
+
+ Linux USB Printer Gadget Driver
+ 06/04/2007
+
+ Copyright (C) 2007 Craig W. Nadler <craig@nadler.us>
+
+
+
+GENERAL
+=======
+
+This driver may be used if you are writing printer firmware using Linux as
+the embedded OS. This driver has nothing to do with using a printer with
+your Linux host system.
+
+You will need a USB device controller and a Linux driver for it that accepts
+a gadget / "device class" driver using the Linux USB Gadget API. After the
+USB device controller driver is loaded then load the printer gadget driver.
+This will present a printer interface to the USB Host that your USB Device
+port is connected to.
+
+This driver is structured for printer firmware that runs in user mode. The
+user mode printer firmware will read and write data from the kernel mode
+printer gadget driver using a device file. The printer returns a printer status
+byte when the USB HOST sends a device request to get the printer status. The
+user space firmware can read or write this status byte using a device file
+/dev/g_printer . Both blocking and non-blocking read/write calls are supported.
+
+
+
+
+HOWTO USE THIS DRIVER
+=====================
+
+To load the USB device controller driver and the printer gadget driver. The
+following example uses the Netchip 2280 USB device controller driver:
+
+modprobe net2280
+modprobe g_printer
+
+
+The follow command line parameter can be used when loading the printer gadget
+(ex: modprobe g_printer idVendor=0x0525 idProduct=0xa4a8 ):
+
+idVendor - This is the Vendor ID used in the device descriptor. The default is
+ the Netchip vendor id 0x0525. YOU MUST CHANGE TO YOUR OWN VENDOR ID
+ BEFORE RELEASING A PRODUCT. If you plan to release a product and don't
+ already have a Vendor ID please see www.usb.org for details on how to
+ get one.
+
+idProduct - This is the Product ID used in the device descriptor. The default
+ is 0xa4a8, you should change this to an ID that's not used by any of
+ your other USB products if you have any. It would be a good idea to
+ start numbering your products starting with say 0x0001.
+
+bcdDevice - This is the version number of your product. It would be a good idea
+ to put your firmware version here.
+
+iManufacturer - A string containing the name of the Vendor.
+
+iProduct - A string containing the Product Name.
+
+iSerialNum - A string containing the Serial Number. This should be changed for
+ each unit of your product.
+
+iPNPstring - The PNP ID string used for this printer. You will want to set
+ either on the command line or hard code the PNP ID string used for
+ your printer product.
+
+qlen - The number of 8k buffers to use per endpoint. The default is 10, you
+ should tune this for your product. You may also want to tune the
+ size of each buffer for your product.
+
+
+
+
+USING THE EXAMPLE CODE
+======================
+
+This example code talks to stdout, instead of a print engine.
+
+To compile the test code below:
+
+1) save it to a file called prn_example.c
+2) compile the code with the follow command:
+ gcc prn_example.c -o prn_example
+
+
+
+To read printer data from the host to stdout:
+
+ # prn_example -read_data
+
+
+To write printer data from a file (data_file) to the host:
+
+ # cat data_file | prn_example -write_data
+
+
+To get the current printer status for the gadget driver:
+
+ # prn_example -get_status
+
+ Printer status is:
+ Printer is NOT Selected
+ Paper is Out
+ Printer OK
+
+
+To set printer to Selected/On-line:
+
+ # prn_example -selected
+
+
+To set printer to Not Selected/Off-line:
+
+ # prn_example -not_selected
+
+
+To set paper status to paper out:
+
+ # prn_example -paper_out
+
+
+To set paper status to paper loaded:
+
+ # prn_example -paper_loaded
+
+
+To set error status to printer OK:
+
+ # prn_example -no_error
+
+
+To set error status to ERROR:
+
+ # prn_example -error
+
+
+
+
+EXAMPLE CODE
+============
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <linux/poll.h>
+#include <sys/ioctl.h>
+#include <linux/usb/g_printer.h>
+
+#define PRINTER_FILE "/dev/g_printer"
+#define BUF_SIZE 512
+
+
+/*
+ * 'usage()' - Show program usage.
+ */
+
+static void
+usage(const char *option) /* I - Option string or NULL */
+{
+ if (option) {
+ fprintf(stderr,"prn_example: Unknown option \"%s\"!\n",
+ option);
+ }
+
+ fputs("\n", stderr);
+ fputs("Usage: prn_example -[options]\n", stderr);
+ fputs("Options:\n", stderr);
+ fputs("\n", stderr);
+ fputs("-get_status Get the current printer status.\n", stderr);
+ fputs("-selected Set the selected status to selected.\n", stderr);
+ fputs("-not_selected Set the selected status to NOT selected.\n",
+ stderr);
+ fputs("-error Set the error status to error.\n", stderr);
+ fputs("-no_error Set the error status to NO error.\n", stderr);
+ fputs("-paper_out Set the paper status to paper out.\n", stderr);
+ fputs("-paper_loaded Set the paper status to paper loaded.\n",
+ stderr);
+ fputs("-read_data Read printer data from driver.\n", stderr);
+ fputs("-write_data Write printer sata to driver.\n", stderr);
+ fputs("-NB_read_data (Non-Blocking) Read printer data from driver.\n",
+ stderr);
+ fputs("\n\n", stderr);
+
+ exit(1);
+}
+
+
+static int
+read_printer_data()
+{
+ struct pollfd fd[1];
+
+ /* Open device file for printer gadget. */
+ fd[0].fd = open(PRINTER_FILE, O_RDWR);
+ if (fd[0].fd < 0) {
+ printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE);
+ close(fd[0].fd);
+ return(-1);
+ }
+
+ fd[0].events = POLLIN | POLLRDNORM;
+
+ while (1) {
+ static char buf[BUF_SIZE];
+ int bytes_read;
+ int retval;
+
+ /* Wait for up to 1 second for data. */
+ retval = poll(fd, 1, 1000);
+
+ if (retval && (fd[0].revents & POLLRDNORM)) {
+
+ /* Read data from printer gadget driver. */
+ bytes_read = read(fd[0].fd, buf, BUF_SIZE);
+
+ if (bytes_read < 0) {
+ printf("Error %d reading from %s\n",
+ fd[0].fd, PRINTER_FILE);
+ close(fd[0].fd);
+ return(-1);
+ } else if (bytes_read > 0) {
+ /* Write data to standard OUTPUT (stdout). */
+ fwrite(buf, 1, bytes_read, stdout);
+ fflush(stdout);
+ }
+
+ }
+
+ }
+
+ /* Close the device file. */
+ close(fd[0].fd);
+
+ return 0;
+}
+
+
+static int
+write_printer_data()
+{
+ struct pollfd fd[1];
+
+ /* Open device file for printer gadget. */
+ fd[0].fd = open (PRINTER_FILE, O_RDWR);
+ if (fd[0].fd < 0) {
+ printf("Error %d opening %s\n", fd[0].fd, PRINTER_FILE);
+ close(fd[0].fd);
+ return(-1);
+ }
+
+ fd[0].events = POLLOUT | POLLWRNORM;
+
+ while (1) {
+ int retval;
+ static char buf[BUF_SIZE];
+ /* Read data from standard INPUT (stdin). */
+ int bytes_read = fread(buf, 1, BUF_SIZE, stdin);
+
+ if (!bytes_read) {
+ break;
+ }
+
+ while (bytes_read) {
+
+ /* Wait for up to 1 second to sent data. */
+ retval = poll(fd, 1, 1000);
+
+ /* Write data to printer gadget driver. */
+ if (retval && (fd[0].revents & POLLWRNORM)) {
+ retval = write(fd[0].fd, buf, bytes_read);
+ if (retval < 0) {
+ printf("Error %d writing to %s\n",
+ fd[0].fd,
+ PRINTER_FILE);
+ close(fd[0].fd);
+ return(-1);
+ } else {
+ bytes_read -= retval;
+ }
+
+ }
+
+ }
+
+ }
+
+ /* Wait until the data has been sent. */
+ fsync(fd[0].fd);
+
+ /* Close the device file. */
+ close(fd[0].fd);
+
+ return 0;
+}
+
+
+static int
+read_NB_printer_data()
+{
+ int fd;
+ static char buf[BUF_SIZE];
+ int bytes_read;
+
+ /* Open device file for printer gadget. */
+ fd = open(PRINTER_FILE, O_RDWR|O_NONBLOCK);
+ if (fd < 0) {
+ printf("Error %d opening %s\n", fd, PRINTER_FILE);
+ close(fd);
+ return(-1);
+ }
+
+ while (1) {
+ /* Read data from printer gadget driver. */
+ bytes_read = read(fd, buf, BUF_SIZE);
+ if (bytes_read <= 0) {
+ break;
+ }
+
+ /* Write data to standard OUTPUT (stdout). */
+ fwrite(buf, 1, bytes_read, stdout);
+ fflush(stdout);
+ }
+
+ /* Close the device file. */
+ close(fd);
+
+ return 0;
+}
+
+
+static int
+get_printer_status()
+{
+ int retval;
+ int fd;
+
+ /* Open device file for printer gadget. */
+ fd = open(PRINTER_FILE, O_RDWR);
+ if (fd < 0) {
+ printf("Error %d opening %s\n", fd, PRINTER_FILE);
+ close(fd);
+ return(-1);
+ }
+
+ /* Make the IOCTL call. */
+ retval = ioctl(fd, GADGET_GET_PRINTER_STATUS);
+ if (retval < 0) {
+ fprintf(stderr, "ERROR: Failed to set printer status\n");
+ return(-1);
+ }
+
+ /* Close the device file. */
+ close(fd);
+
+ return(retval);
+}
+
+
+static int
+set_printer_status(unsigned char buf, int clear_printer_status_bit)
+{
+ int retval;
+ int fd;
+
+ retval = get_printer_status();
+ if (retval < 0) {
+ fprintf(stderr, "ERROR: Failed to get printer status\n");
+ return(-1);
+ }
+
+ /* Open device file for printer gadget. */
+ fd = open(PRINTER_FILE, O_RDWR);
+
+ if (fd < 0) {
+ printf("Error %d opening %s\n", fd, PRINTER_FILE);
+ close(fd);
+ return(-1);
+ }
+
+ if (clear_printer_status_bit) {
+ retval &= ~buf;
+ } else {
+ retval |= buf;
+ }
+
+ /* Make the IOCTL call. */
+ if (ioctl(fd, GADGET_SET_PRINTER_STATUS, (unsigned char)retval)) {
+ fprintf(stderr, "ERROR: Failed to set printer status\n");
+ return(-1);
+ }
+
+ /* Close the device file. */
+ close(fd);
+
+ return 0;
+}
+
+
+static int
+display_printer_status()
+{
+ char printer_status;
+
+ printer_status = get_printer_status();
+ if (printer_status < 0) {
+ fprintf(stderr, "ERROR: Failed to get printer status\n");
+ return(-1);
+ }
+
+ printf("Printer status is:\n");
+ if (printer_status & PRINTER_SELECTED) {
+ printf(" Printer is Selected\n");
+ } else {
+ printf(" Printer is NOT Selected\n");
+ }
+ if (printer_status & PRINTER_PAPER_EMPTY) {
+ printf(" Paper is Out\n");
+ } else {
+ printf(" Paper is Loaded\n");
+ }
+ if (printer_status & PRINTER_NOT_ERROR) {
+ printf(" Printer OK\n");
+ } else {
+ printf(" Printer ERROR\n");
+ }
+
+ return(0);
+}
+
+
+int
+main(int argc, char *argv[])
+{
+ int i; /* Looping var */
+ int retval = 0;
+
+ /* No Args */
+ if (argc == 1) {
+ usage(0);
+ exit(0);
+ }
+
+ for (i = 1; i < argc && !retval; i ++) {
+
+ if (argv[i][0] != '-') {
+ continue;
+ }
+
+ if (!strcmp(argv[i], "-get_status")) {
+ if (display_printer_status()) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-paper_loaded")) {
+ if (set_printer_status(PRINTER_PAPER_EMPTY, 1)) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-paper_out")) {
+ if (set_printer_status(PRINTER_PAPER_EMPTY, 0)) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-selected")) {
+ if (set_printer_status(PRINTER_SELECTED, 0)) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-not_selected")) {
+ if (set_printer_status(PRINTER_SELECTED, 1)) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-error")) {
+ if (set_printer_status(PRINTER_NOT_ERROR, 1)) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-no_error")) {
+ if (set_printer_status(PRINTER_NOT_ERROR, 0)) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-read_data")) {
+ if (read_printer_data()) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-write_data")) {
+ if (write_printer_data()) {
+ retval = 1;
+ }
+
+ } else if (!strcmp(argv[i], "-NB_read_data")) {
+ if (read_NB_printer_data()) {
+ retval = 1;
+ }
+
+ } else {
+ usage(argv[i]);
+ retval = 1;
+ }
+ }
+
+ exit(retval);
+}
diff --git a/Documentation/usb/gadget_serial.txt b/Documentation/usb/gadget_serial.txt
new file mode 100644
index 000000000..6b4a88a8c
--- /dev/null
+++ b/Documentation/usb/gadget_serial.txt
@@ -0,0 +1,286 @@
+
+ Linux Gadget Serial Driver v2.0
+ 11/20/2004
+ (updated 8-May-2008 for v2.3)
+
+
+License and Disclaimer
+----------------------
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of
+the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public
+License along with this program; if not, write to the Free
+Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+MA 02111-1307 USA.
+
+This document and the gadget serial driver itself are
+Copyright (C) 2004 by Al Borchers (alborchers@steinerpoint.com).
+
+If you have questions, problems, or suggestions for this driver
+please contact Al Borchers at alborchers@steinerpoint.com.
+
+
+Prerequisites
+-------------
+Versions of the gadget serial driver are available for the
+2.4 Linux kernels, but this document assumes you are using
+version 2.3 or later of the gadget serial driver in a 2.6
+Linux kernel.
+
+This document assumes that you are familiar with Linux and
+Windows and know how to configure and build Linux kernels, run
+standard utilities, use minicom and HyperTerminal, and work with
+USB and serial devices. It also assumes you configure the Linux
+gadget and usb drivers as modules.
+
+With version 2.3 of the driver, major and minor device nodes are
+no longer statically defined. Your Linux based system should mount
+sysfs in /sys, and use "mdev" (in Busybox) or "udev" to make the
+/dev nodes matching the sysfs /sys/class/tty files.
+
+
+
+Overview
+--------
+The gadget serial driver is a Linux USB gadget driver, a USB device
+side driver. It runs on a Linux system that has USB device side
+hardware; for example, a PDA, an embedded Linux system, or a PC
+with a USB development card.
+
+The gadget serial driver talks over USB to either a CDC ACM driver
+or a generic USB serial driver running on a host PC.
+
+ Host
+ --------------------------------------
+ | Host-Side CDC ACM USB Host |
+ | Operating | or | Controller | USB
+ | System | Generic USB | Driver |--------
+ | (Linux or | Serial | and | |
+ | Windows) Driver USB Stack | |
+ -------------------------------------- |
+ |
+ |
+ |
+ Gadget |
+ -------------------------------------- |
+ | Gadget USB Periph. | |
+ | Device-Side | Gadget | Controller | |
+ | Linux | Serial | Driver |--------
+ | Operating | Driver | and |
+ | System USB Stack |
+ --------------------------------------
+
+On the device-side Linux system, the gadget serial driver looks
+like a serial device.
+
+On the host-side system, the gadget serial device looks like a
+CDC ACM compliant class device or a simple vendor specific device
+with bulk in and bulk out endpoints, and it is treated similarly
+to other serial devices.
+
+The host side driver can potentially be any ACM compliant driver
+or any driver that can talk to a device with a simple bulk in/out
+interface. Gadget serial has been tested with the Linux ACM driver,
+the Windows usbser.sys ACM driver, and the Linux USB generic serial
+driver.
+
+With the gadget serial driver and the host side ACM or generic
+serial driver running, you should be able to communicate between
+the host and the gadget side systems as if they were connected by a
+serial cable.
+
+The gadget serial driver only provides simple unreliable data
+communication. It does not yet handle flow control or many other
+features of normal serial devices.
+
+
+Installing the Gadget Serial Driver
+-----------------------------------
+To use the gadget serial driver you must configure the Linux gadget
+side kernel for "Support for USB Gadgets", for a "USB Peripheral
+Controller" (for example, net2280), and for the "Serial Gadget"
+driver. All this are listed under "USB Gadget Support" when
+configuring the kernel. Then rebuild and install the kernel or
+modules.
+
+Then you must load the gadget serial driver. To load it as an
+ACM device (recommended for interoperability), do this:
+
+ modprobe g_serial
+
+To load it as a vendor specific bulk in/out device, do this:
+
+ modprobe g_serial use_acm=0
+
+This will also automatically load the underlying gadget peripheral
+controller driver. This must be done each time you reboot the gadget
+side Linux system. You can add this to the start up scripts, if
+desired.
+
+Your system should use mdev (from busybox) or udev to make the
+device nodes. After this gadget driver has been set up you should
+then see a /dev/ttyGS0 node:
+
+ # ls -l /dev/ttyGS0 | cat
+ crw-rw---- 1 root root 253, 0 May 8 14:10 /dev/ttyGS0
+ #
+
+Note that the major number (253, above) is system-specific. If
+you need to create /dev nodes by hand, the right numbers to use
+will be in the /sys/class/tty/ttyGS0/dev file.
+
+When you link this gadget driver early, perhaps even statically,
+you may want to set up an /etc/inittab entry to run "getty" on it.
+The /dev/ttyGS0 line should work like most any other serial port.
+
+
+If gadget serial is loaded as an ACM device you will want to use
+either the Windows or Linux ACM driver on the host side. If gadget
+serial is loaded as a bulk in/out device, you will want to use the
+Linux generic serial driver on the host side. Follow the appropriate
+instructions below to install the host side driver.
+
+
+Installing the Windows Host ACM Driver
+--------------------------------------
+To use the Windows ACM driver you must have the "linux-cdc-acm.inf"
+file (provided along this document) which supports all recent versions
+of Windows.
+
+When the gadget serial driver is loaded and the USB device connected
+to the Windows host with a USB cable, Windows should recognize the
+gadget serial device and ask for a driver. Tell Windows to find the
+driver in the folder that contains the "linux-cdc-acm.inf" file.
+
+For example, on Windows XP, when the gadget serial device is first
+plugged in, the "Found New Hardware Wizard" starts up. Select
+"Install from a list or specific location (Advanced)", then on the
+next screen select "Include this location in the search" and enter the
+path or browse to the folder containing the "linux-cdc-acm.inf" file.
+Windows will complain that the Gadget Serial driver has not passed
+Windows Logo testing, but select "Continue anyway" and finish the
+driver installation.
+
+On Windows XP, in the "Device Manager" (under "Control Panel",
+"System", "Hardware") expand the "Ports (COM & LPT)" entry and you
+should see "Gadget Serial" listed as the driver for one of the COM
+ports.
+
+To uninstall the Windows XP driver for "Gadget Serial", right click
+on the "Gadget Serial" entry in the "Device Manager" and select
+"Uninstall".
+
+
+Installing the Linux Host ACM Driver
+------------------------------------
+To use the Linux ACM driver you must configure the Linux host side
+kernel for "Support for Host-side USB" and for "USB Modem (CDC ACM)
+support".
+
+Once the gadget serial driver is loaded and the USB device connected
+to the Linux host with a USB cable, the host system should recognize
+the gadget serial device. For example, the command
+
+ cat /proc/bus/usb/devices
+
+should show something like this:
+
+T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 5 Spd=480 MxCh= 0
+D: Ver= 2.00 Cls=02(comm.) Sub=00 Prot=00 MxPS=64 #Cfgs= 1
+P: Vendor=0525 ProdID=a4a7 Rev= 2.01
+S: Manufacturer=Linux 2.6.8.1 with net2280
+S: Product=Gadget Serial
+S: SerialNumber=0
+C:* #Ifs= 2 Cfg#= 2 Atr=c0 MxPwr= 2mA
+I: If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=02 Prot=01 Driver=acm
+E: Ad=83(I) Atr=03(Int.) MxPS= 8 Ivl=32ms
+I: If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=acm
+E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+
+If the host side Linux system is configured properly, the ACM driver
+should be loaded automatically. The command "lsmod" should show the
+"acm" module is loaded.
+
+
+Installing the Linux Host Generic USB Serial Driver
+---------------------------------------------------
+To use the Linux generic USB serial driver you must configure the
+Linux host side kernel for "Support for Host-side USB", for "USB
+Serial Converter support", and for the "USB Generic Serial Driver".
+
+Once the gadget serial driver is loaded and the USB device connected
+to the Linux host with a USB cable, the host system should recognize
+the gadget serial device. For example, the command
+
+ cat /proc/bus/usb/devices
+
+should show something like this:
+
+T: Bus=01 Lev=01 Prnt=01 Port=01 Cnt=02 Dev#= 6 Spd=480 MxCh= 0
+D: Ver= 2.00 Cls=ff(vend.) Sub=00 Prot=00 MxPS=64 #Cfgs= 1
+P: Vendor=0525 ProdID=a4a6 Rev= 2.01
+S: Manufacturer=Linux 2.6.8.1 with net2280
+S: Product=Gadget Serial
+S: SerialNumber=0
+C:* #Ifs= 1 Cfg#= 1 Atr=c0 MxPwr= 2mA
+I: If#= 0 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=00 Prot=00 Driver=serial
+E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
+
+You must load the usbserial driver and explicitly set its parameters
+to configure it to recognize the gadget serial device, like this:
+
+ echo 0x0525 0xA4A6 >/sys/bus/usb-serial/drivers/generic/new_id
+
+The legacy way is to use module parameters:
+
+ modprobe usbserial vendor=0x0525 product=0xA4A6
+
+If everything is working, usbserial will print a message in the
+system log saying something like "Gadget Serial converter now
+attached to ttyUSB0".
+
+
+Testing with Minicom or HyperTerminal
+-------------------------------------
+Once the gadget serial driver and the host driver are both installed,
+and a USB cable connects the gadget device to the host, you should
+be able to communicate over USB between the gadget and host systems.
+You can use minicom or HyperTerminal to try this out.
+
+On the gadget side run "minicom -s" to configure a new minicom
+session. Under "Serial port setup" set "/dev/ttygserial" as the
+"Serial Device". Set baud rate, data bits, parity, and stop bits,
+to 9600, 8, none, and 1--these settings mostly do not matter.
+Under "Modem and dialing" erase all the modem and dialing strings.
+
+On a Linux host running the ACM driver, configure minicom similarly
+but use "/dev/ttyACM0" as the "Serial Device". (If you have other
+ACM devices connected, change the device name appropriately.)
+
+On a Linux host running the USB generic serial driver, configure
+minicom similarly, but use "/dev/ttyUSB0" as the "Serial Device".
+(If you have other USB serial devices connected, change the device
+name appropriately.)
+
+On a Windows host configure a new HyperTerminal session to use the
+COM port assigned to Gadget Serial. The "Port Settings" will be
+set automatically when HyperTerminal connects to the gadget serial
+device, so you can leave them set to the default values--these
+settings mostly do not matter.
+
+With minicom configured and running on the gadget side and with
+minicom or HyperTerminal configured and running on the host side,
+you should be able to send data back and forth between the gadget
+side and host side systems. Anything you type on the terminal
+window on the gadget side should appear in the terminal window on
+the host side and vice versa.
diff --git a/Documentation/usb/hotplug.txt b/Documentation/usb/hotplug.txt
new file mode 100644
index 000000000..5b243f315
--- /dev/null
+++ b/Documentation/usb/hotplug.txt
@@ -0,0 +1,148 @@
+LINUX HOTPLUGGING
+
+In hotpluggable busses like USB (and Cardbus PCI), end-users plug devices
+into the bus with power on. In most cases, users expect the devices to become
+immediately usable. That means the system must do many things, including:
+
+ - Find a driver that can handle the device. That may involve
+ loading a kernel module; newer drivers can use module-init-tools
+ to publish their device (and class) support to user utilities.
+
+ - Bind a driver to that device. Bus frameworks do that using a
+ device driver's probe() routine.
+
+ - Tell other subsystems to configure the new device. Print
+ queues may need to be enabled, networks brought up, disk
+ partitions mounted, and so on. In some cases these will
+ be driver-specific actions.
+
+This involves a mix of kernel mode and user mode actions. Making devices
+be immediately usable means that any user mode actions can't wait for an
+administrator to do them: the kernel must trigger them, either passively
+(triggering some monitoring daemon to invoke a helper program) or
+actively (calling such a user mode helper program directly).
+
+Those triggered actions must support a system's administrative policies;
+such programs are called "policy agents" here. Typically they involve
+shell scripts that dispatch to more familiar administration tools.
+
+Because some of those actions rely on information about drivers (metadata)
+that is currently available only when the drivers are dynamically linked,
+you get the best hotplugging when you configure a highly modular system.
+
+
+KERNEL HOTPLUG HELPER (/sbin/hotplug)
+
+There is a kernel parameter: /proc/sys/kernel/hotplug, which normally
+holds the pathname "/sbin/hotplug". That parameter names a program
+which the kernel may invoke at various times.
+
+The /sbin/hotplug program can be invoked by any subsystem as part of its
+reaction to a configuration change, from a thread in that subsystem.
+Only one parameter is required: the name of a subsystem being notified of
+some kernel event. That name is used as the first key for further event
+dispatch; any other argument and environment parameters are specified by
+the subsystem making that invocation.
+
+Hotplug software and other resources is available at:
+
+ http://linux-hotplug.sourceforge.net
+
+Mailing list information is also available at that site.
+
+
+--------------------------------------------------------------------------
+
+
+USB POLICY AGENT
+
+The USB subsystem currently invokes /sbin/hotplug when USB devices
+are added or removed from system. The invocation is done by the kernel
+hub workqueue [hub_wq], or else as part of root hub initialization
+(done by init, modprobe, kapmd, etc). Its single command line parameter
+is the string "usb", and it passes these environment variables:
+
+ ACTION ... "add", "remove"
+ PRODUCT ... USB vendor, product, and version codes (hex)
+ TYPE ... device class codes (decimal)
+ INTERFACE ... interface 0 class codes (decimal)
+
+If "usbdevfs" is configured, DEVICE and DEVFS are also passed. DEVICE is
+the pathname of the device, and is useful for devices with multiple and/or
+alternate interfaces that complicate driver selection. By design, USB
+hotplugging is independent of "usbdevfs": you can do most essential parts
+of USB device setup without using that filesystem, and without running a
+user mode daemon to detect changes in system configuration.
+
+Currently available policy agent implementations can load drivers for
+modules, and can invoke driver-specific setup scripts. The newest ones
+leverage USB module-init-tools support. Later agents might unload drivers.
+
+
+USB MODUTILS SUPPORT
+
+Current versions of module-init-tools will create a "modules.usbmap" file
+which contains the entries from each driver's MODULE_DEVICE_TABLE. Such
+files can be used by various user mode policy agents to make sure all the
+right driver modules get loaded, either at boot time or later.
+
+See <linux/usb.h> for full information about such table entries; or look
+at existing drivers. Each table entry describes one or more criteria to
+be used when matching a driver to a device or class of devices. The
+specific criteria are identified by bits set in "match_flags", paired
+with field values. You can construct the criteria directly, or with
+macros such as these, and use driver_info to store more information.
+
+ USB_DEVICE (vendorId, productId)
+ ... matching devices with specified vendor and product ids
+ USB_DEVICE_VER (vendorId, productId, lo, hi)
+ ... like USB_DEVICE with lo <= productversion <= hi
+ USB_INTERFACE_INFO (class, subclass, protocol)
+ ... matching specified interface class info
+ USB_DEVICE_INFO (class, subclass, protocol)
+ ... matching specified device class info
+
+A short example, for a driver that supports several specific USB devices
+and their quirks, might have a MODULE_DEVICE_TABLE like this:
+
+ static const struct usb_device_id mydriver_id_table[] = {
+ { USB_DEVICE (0x9999, 0xaaaa), driver_info: QUIRK_X },
+ { USB_DEVICE (0xbbbb, 0x8888), driver_info: QUIRK_Y|QUIRK_Z },
+ ...
+ { } /* end with an all-zeroes entry */
+ };
+ MODULE_DEVICE_TABLE(usb, mydriver_id_table);
+
+Most USB device drivers should pass these tables to the USB subsystem as
+well as to the module management subsystem. Not all, though: some driver
+frameworks connect using interfaces layered over USB, and so they won't
+need such a "struct usb_driver".
+
+Drivers that connect directly to the USB subsystem should be declared
+something like this:
+
+ static struct usb_driver mydriver = {
+ .name = "mydriver",
+ .id_table = mydriver_id_table,
+ .probe = my_probe,
+ .disconnect = my_disconnect,
+
+ /*
+ if using the usb chardev framework:
+ .minor = MY_USB_MINOR_START,
+ .fops = my_file_ops,
+ if exposing any operations through usbdevfs:
+ .ioctl = my_ioctl,
+ */
+ };
+
+When the USB subsystem knows about a driver's device ID table, it's used when
+choosing drivers to probe(). The thread doing new device processing checks
+drivers' device ID entries from the MODULE_DEVICE_TABLE against interface and
+device descriptors for the device. It will only call probe() if there is a
+match, and the third argument to probe() will be the entry that matched.
+
+If you don't provide an id_table for your driver, then your driver may get
+probed for each new device; the third parameter to probe() will be null.
+
+
diff --git a/Documentation/usb/iuu_phoenix.txt b/Documentation/usb/iuu_phoenix.txt
new file mode 100644
index 000000000..e5f048067
--- /dev/null
+++ b/Documentation/usb/iuu_phoenix.txt
@@ -0,0 +1,84 @@
+Infinity Usb Unlimited Readme
+-----------------------------
+
+Hi all,
+
+
+This module provide a serial interface to use your
+IUU unit in phoenix mode. Loading this module will
+bring a ttyUSB[0-x] interface. This driver must be
+used by your favorite application to pilot the IUU
+
+This driver is still in beta stage, so bugs can
+occur and your system may freeze. As far I now,
+I never had any problem with it, but I'm not a real
+guru, so don't blame me if your system is unstable
+
+You can plug more than one IUU. Every unit will
+have his own device file(/dev/ttyUSB0,/dev/ttyUSB1,...)
+
+
+
+How to tune the reader speed ?
+
+ A few parameters can be used at load time
+ To use parameters, just unload the module if it is
+ already loaded and use modprobe iuu_phoenix param=value.
+ In case of prebuilt module, use the command
+ insmod iuu_phoenix param=value.
+
+ Example:
+
+ modprobe iuu_phoenix clockmode=3
+
+ The parameters are:
+
+ parm: clockmode:1=3Mhz579,2=3Mhz680,3=6Mhz (int)
+ parm: boost:overclock boost percent 100 to 500 (int)
+ parm: cdmode:Card detect mode 0=none, 1=CD, 2=!CD, 3=DSR, 4=!DSR, 5=CTS, 6=!CTS, 7=RING, 8=!RING (int)
+ parm: xmas:xmas color enabled or not (bool)
+ parm: debug:Debug enabled or not (bool)
+
+- clockmode will provide 3 different base settings commonly adopted by
+ different software:
+ 1. 3Mhz579
+ 2. 3Mhz680
+ 3. 6Mhz
+
+- boost provide a way to overclock the reader ( my favorite :-) )
+ For example to have best performance than a simple clockmode=3, try this:
+
+ modprobe boost=195
+
+ This will put the reader in a base of 3Mhz579 but boosted a 195 % !
+ the real clock will be now : 6979050 Hz ( 6Mhz979 ) and will increase
+ the speed to a score 10 to 20% better than the simple clockmode=3 !!!
+
+
+- cdmode permit to setup the signal used to inform the userland ( ioctl answer )
+ if the card is present or not. Eight signals are possible.
+
+- xmas is completely useless except for your eyes. This is one of my friend who was
+ so sad to have a nice device like the iuu without seeing all color range available.
+ So I have added this option to permit him to see a lot of color ( each activity change the color
+ and the frequency randomly )
+
+- debug will produce a lot of debugging messages...
+
+
+ Last notes:
+
+ Don't worry about the serial settings, the serial emulation
+ is an abstraction, so use any speed or parity setting will
+ work. ( This will not change anything ).Later I will perhaps
+ use this settings to deduce de boost but is that feature
+ really necessary ?
+ The autodetect feature used is the serial CD. If that doesn't
+ work for your software, disable detection mechanism in it.
+
+
+ Have fun !
+
+ Alain Degreffe
+
+ eczema(at)ecze.com
diff --git a/Documentation/usb/linux-cdc-acm.inf b/Documentation/usb/linux-cdc-acm.inf
new file mode 100644
index 000000000..f0ffc27d4
--- /dev/null
+++ b/Documentation/usb/linux-cdc-acm.inf
@@ -0,0 +1,107 @@
+; Windows USB CDC ACM Setup File
+
+; Based on INF template which was:
+; Copyright (c) 2000 Microsoft Corporation
+; Copyright (c) 2007 Microchip Technology Inc.
+; likely to be covered by the MLPL as found at:
+; <http://msdn.microsoft.com/en-us/cc300389.aspx#MLPL>.
+; For use only on Windows operating systems.
+
+[Version]
+Signature="$Windows NT$"
+Class=Ports
+ClassGuid={4D36E978-E325-11CE-BFC1-08002BE10318}
+Provider=%Linux%
+DriverVer=11/15/2007,5.1.2600.0
+
+[Manufacturer]
+%Linux%=DeviceList, NTamd64
+
+[DestinationDirs]
+DefaultDestDir=12
+
+
+;------------------------------------------------------------------------------
+; Windows 2000/XP/Vista-32bit Sections
+;------------------------------------------------------------------------------
+
+[DriverInstall.nt]
+include=mdmcpq.inf
+CopyFiles=DriverCopyFiles.nt
+AddReg=DriverInstall.nt.AddReg
+
+[DriverCopyFiles.nt]
+usbser.sys,,,0x20
+
+[DriverInstall.nt.AddReg]
+HKR,,DevLoader,,*ntkern
+HKR,,NTMPDriver,,USBSER.sys
+HKR,,EnumPropPages32,,"MsPorts.dll,SerialPortPropPageProvider"
+
+[DriverInstall.nt.Services]
+AddService=usbser, 0x00000002, DriverService.nt
+
+[DriverService.nt]
+DisplayName=%SERVICE%
+ServiceType=1
+StartType=3
+ErrorControl=1
+ServiceBinary=%12%\USBSER.sys
+
+;------------------------------------------------------------------------------
+; Vista-64bit Sections
+;------------------------------------------------------------------------------
+
+[DriverInstall.NTamd64]
+include=mdmcpq.inf
+CopyFiles=DriverCopyFiles.NTamd64
+AddReg=DriverInstall.NTamd64.AddReg
+
+[DriverCopyFiles.NTamd64]
+USBSER.sys,,,0x20
+
+[DriverInstall.NTamd64.AddReg]
+HKR,,DevLoader,,*ntkern
+HKR,,NTMPDriver,,USBSER.sys
+HKR,,EnumPropPages32,,"MsPorts.dll,SerialPortPropPageProvider"
+
+[DriverInstall.NTamd64.Services]
+AddService=usbser, 0x00000002, DriverService.NTamd64
+
+[DriverService.NTamd64]
+DisplayName=%SERVICE%
+ServiceType=1
+StartType=3
+ErrorControl=1
+ServiceBinary=%12%\USBSER.sys
+
+
+;------------------------------------------------------------------------------
+; Vendor and Product ID Definitions
+;------------------------------------------------------------------------------
+; When developing your USB device, the VID and PID used in the PC side
+; application program and the firmware on the microcontroller must match.
+; Modify the below line to use your VID and PID. Use the format as shown
+; below.
+; Note: One INF file can be used for multiple devices with different
+; VID and PIDs. For each supported device, append
+; ",USB\VID_xxxx&PID_yyyy" to the end of the line.
+;------------------------------------------------------------------------------
+[SourceDisksFiles]
+[SourceDisksNames]
+[DeviceList]
+%DESCRIPTION%=DriverInstall, USB\VID_0525&PID_A4A7, USB\VID_1D6B&PID_0104&MI_02, USB\VID_1D6B&PID_0106&MI_00
+
+[DeviceList.NTamd64]
+%DESCRIPTION%=DriverInstall, USB\VID_0525&PID_A4A7, USB\VID_1D6B&PID_0104&MI_02, USB\VID_1D6B&PID_0106&MI_00
+
+
+;------------------------------------------------------------------------------
+; String Definitions
+;------------------------------------------------------------------------------
+;Modify these strings to customize your device
+;------------------------------------------------------------------------------
+[Strings]
+Linux = "Linux Developer Community"
+DESCRIPTION = "Gadget Serial"
+SERVICE = "USB RS-232 Emulation Driver"
diff --git a/Documentation/usb/linux.inf b/Documentation/usb/linux.inf
new file mode 100644
index 000000000..4ffa715b0
--- /dev/null
+++ b/Documentation/usb/linux.inf
@@ -0,0 +1,66 @@
+; Based on template INF file found at
+; <http://msdn.microsoft.com/en-us/library/ff570620.aspx>
+; which was:
+; Copyright (c) Microsoft Corporation
+; and released under the MLPL as found at:
+; <http://msdn.microsoft.com/en-us/cc300389.aspx#MLPL>.
+; For use only on Windows operating systems.
+
+[Version]
+Signature = "$Windows NT$"
+Class = Net
+ClassGUID = {4d36e972-e325-11ce-bfc1-08002be10318}
+Provider = %Linux%
+DriverVer = 06/21/2006,6.0.6000.16384
+
+[Manufacturer]
+%Linux% = LinuxDevices,NTx86,NTamd64,NTia64
+
+; Decoration for x86 architecture
+[LinuxDevices.NTx86]
+%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_1d6b&PID_0104&MI_00
+
+; Decoration for x64 architecture
+[LinuxDevices.NTamd64]
+%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_1d6b&PID_0104&MI_00
+
+; Decoration for ia64 architecture
+[LinuxDevices.NTia64]
+%LinuxDevice% = RNDIS.NT.5.1, USB\VID_0525&PID_a4a2, USB\VID_1d6b&PID_0104&MI_00
+
+;@@@ This is the common setting for setup
+[ControlFlags]
+ExcludeFromSelect=*
+
+; DDInstall section
+; References the in-build Netrndis.inf
+[RNDIS.NT.5.1]
+Characteristics = 0x84 ; NCF_PHYSICAL + NCF_HAS_UI
+BusType = 15
+; NEVER REMOVE THE FOLLOWING REFERENCE FOR NETRNDIS.INF
+include = netrndis.inf
+needs = Usb_Rndis.ndi
+AddReg = Rndis_AddReg_Vista
+
+; DDInstal.Services section
+[RNDIS.NT.5.1.Services]
+include = netrndis.inf
+needs = Usb_Rndis.ndi.Services
+
+; Optional registry settings. You can modify as needed.
+[RNDIS_AddReg_Vista]
+HKR, NDI\params\VistaProperty, ParamDesc, 0, %Vista_Property%
+HKR, NDI\params\VistaProperty, type, 0, "edit"
+HKR, NDI\params\VistaProperty, LimitText, 0, "12"
+HKR, NDI\params\VistaProperty, UpperCase, 0, "1"
+HKR, NDI\params\VistaProperty, default, 0, " "
+HKR, NDI\params\VistaProperty, optional, 0, "1"
+
+; No sys copyfiles - the sys files are already in-build
+; (part of the operating system).
+; We do not support XP SP1-, 2003 SP1-, ME, 9x.
+
+[Strings]
+Linux = "Linux Developer Community"
+LinuxDevice = "Linux USB Ethernet/RNDIS Gadget"
+Vista_Property = "Optional Vista Property"
diff --git a/Documentation/usb/mass-storage.txt b/Documentation/usb/mass-storage.txt
new file mode 100644
index 000000000..e89803a5a
--- /dev/null
+++ b/Documentation/usb/mass-storage.txt
@@ -0,0 +1,225 @@
+* Overview
+
+ Mass Storage Gadget (or MSG) acts as a USB Mass Storage device,
+ appearing to the host as a disk or a CD-ROM drive. It supports
+ multiple logical units (LUNs). Backing storage for each LUN is
+ provided by a regular file or a block device, access can be limited
+ to read-only, and gadget can indicate that it is removable and/or
+ CD-ROM (the latter implies read-only access).
+
+ Its requirements are modest; only a bulk-in and a bulk-out endpoint
+ are needed. The memory requirement amounts to two 16K buffers.
+ Support is included for full-speed, high-speed and SuperSpeed
+ operation.
+
+ Note that the driver is slightly non-portable in that it assumes
+ a single memory/DMA buffer will be usable for bulk-in and bulk-out
+ endpoints. With most device controllers this is not an issue, but
+ there may be some with hardware restrictions that prevent a buffer
+ from being used by more than one endpoint.
+
+ This document describes how to use the gadget from user space, its
+ relation to mass storage function (or MSF) and different gadgets
+ using it, and how it differs from File Storage Gadget (or FSG)
+ (which is no longer included in Linux). It will talk only briefly
+ about how to use MSF within composite gadgets.
+
+* Module parameters
+
+ The mass storage gadget accepts the following mass storage specific
+ module parameters:
+
+ - file=filename[,filename...]
+
+ This parameter lists paths to files or block devices used for
+ backing storage for each logical unit. There may be at most
+ FSG_MAX_LUNS (8) LUNs set. If more files are specified, they will
+ be silently ignored. See also “luns” parameter.
+
+ *BEWARE* that if a file is used as a backing storage, it may not
+ be modified by any other process. This is because the host
+ assumes the data does not change without its knowledge. It may be
+ read, but (if the logical unit is writable) due to buffering on
+ the host side, the contents are not well defined.
+
+ The size of the logical unit will be rounded down to a full
+ logical block. The logical block size is 2048 bytes for LUNs
+ simulating CD-ROM, block size of the device if the backing file is
+ a block device, or 512 bytes otherwise.
+
+ - removable=b[,b...]
+
+ This parameter specifies whether each logical unit should be
+ removable. “b” here is either “y”, “Y” or “1” for true or “n”,
+ “N” or “0” for false.
+
+ If this option is set for a logical unit, gadget will accept an
+ “eject” SCSI request (Start/Stop Unit). When it is sent, the
+ backing file will be closed to simulate ejection and the logical
+ unit will not be mountable by the host until a new backing file is
+ specified by userspace on the device (see “sysfs entries”
+ section).
+
+ If a logical unit is not removable (the default), a backing file
+ must be specified for it with the “file” parameter as the module
+ is loaded. The same applies if the module is built in, no
+ exceptions.
+
+ The default value of the flag is false, *HOWEVER* it used to be
+ true. This has been changed to better match File Storage Gadget
+ and because it seems like a saner default after all. Thus to
+ maintain compatibility with older kernels, it's best to specify
+ the default values. Also, if one relied on old default, explicit
+ “n” needs to be specified now.
+
+ Note that “removable” means the logical unit's media can be
+ ejected or removed (as is true for a CD-ROM drive or a card
+ reader). It does *not* mean that the entire gadget can be
+ unplugged from the host; the proper term for that is
+ “hot-unpluggable”.
+
+ - cdrom=b[,b...]
+
+ This parameter specifies whether each logical unit should simulate
+ CD-ROM. The default is false.
+
+ - ro=b[,b...]
+
+ This parameter specifies whether each logical unit should be
+ reported as read only. This will prevent host from modifying the
+ backing files.
+
+ Note that if this flag for given logical unit is false but the
+ backing file could not be opened in read/write mode, the gadget
+ will fall back to read only mode anyway.
+
+ The default value for non-CD-ROM logical units is false; for
+ logical units simulating CD-ROM it is forced to true.
+
+ - nofua=b[,b...]
+
+ This parameter specifies whether FUA flag should be ignored in SCSI
+ Write10 and Write12 commands sent to given logical units.
+
+ MS Windows mounts removable storage in “Removal optimised mode” by
+ default. All the writes to the media are synchronous, which is
+ achieved by setting the FUA (Force Unit Access) bit in SCSI
+ Write(10,12) commands. This forces each write to wait until the
+ data has actually been written out and prevents I/O requests
+ aggregation in block layer dramatically decreasing performance.
+
+ Note that this may mean that if the device is powered from USB and
+ the user unplugs the device without unmounting it first (which at
+ least some Windows users do), the data may be lost.
+
+ The default value is false.
+
+ - luns=N
+
+ This parameter specifies number of logical units the gadget will
+ have. It is limited by FSG_MAX_LUNS (8) and higher value will be
+ capped.
+
+ If this parameter is provided, and the number of files specified
+ in “file” argument is greater then the value of “luns”, all excess
+ files will be ignored.
+
+ If this parameter is not present, the number of logical units will
+ be deduced from the number of files specified in the “file”
+ parameter. If the file parameter is missing as well, one is
+ assumed.
+
+ - stall=b
+
+ Specifies whether the gadget is allowed to halt bulk endpoints.
+ The default is determined according to the type of USB device
+ controller, but usually true.
+
+ In addition to the above, the gadget also accepts the following
+ parameters defined by the composite framework (they are common to
+ all composite gadgets so just a quick listing):
+
+ - idVendor -- USB Vendor ID (16 bit integer)
+ - idProduct -- USB Product ID (16 bit integer)
+ - bcdDevice -- USB Device version (BCD) (16 bit integer)
+ - iManufacturer -- USB Manufacturer string (string)
+ - iProduct -- USB Product string (string)
+ - iSerialNumber -- SerialNumber string (sting)
+
+* sysfs entries
+
+ For each logical unit, the gadget creates a directory in the sysfs
+ hierarchy. Inside of it the following three files are created:
+
+ - file
+
+ When read it returns the path to the backing file for the given
+ logical unit. If there is no backing file (possible only if the
+ logical unit is removable), the content is empty.
+
+ When written into, it changes the backing file for given logical
+ unit. This change can be performed even if given logical unit is
+ not specified as removable (but that may look strange to the
+ host). It may fail, however, if host disallowed medium removal
+ with the Prevent-Allow Medium Removal SCSI command.
+
+ - ro
+
+ Reflects the state of ro flag for the given logical unit. It can
+ be read any time, and written to when there is no backing file
+ open for given logical unit.
+
+ - nofua
+
+ Reflects the state of nofua flag for given logical unit. It can
+ be read and written.
+
+ Other then those, as usual, the values of module parameters can be
+ read from /sys/module/g_mass_storage/parameters/* files.
+
+* Other gadgets using mass storage function
+
+ The Mass Storage Gadget uses the Mass Storage Function to handle
+ mass storage protocol. As a composite function, MSF may be used by
+ other gadgets as well (eg. g_multi and acm_ms).
+
+ All of the information in previous sections are valid for other
+ gadgets using MSF, except that support for mass storage related
+ module parameters may be missing, or the parameters may have
+ a prefix. To figure out whether any of this is true one needs to
+ consult the gadget's documentation or its source code.
+
+ For examples of how to include mass storage function in gadgets, one
+ may take a look at mass_storage.c, acm_ms.c and multi.c (sorted by
+ complexity).
+
+* Relation to file storage gadget
+
+ The Mass Storage Function and thus the Mass Storage Gadget has been
+ based on the File Storage Gadget. The difference between the two is
+ that MSG is a composite gadget (ie. uses the composite framework)
+ while file storage gadget was a traditional gadget. From userspace
+ point of view this distinction does not really matter, but from
+ kernel hacker's point of view, this means that (i) MSG does not
+ duplicate code needed for handling basic USB protocol commands and
+ (ii) MSF can be used in any other composite gadget.
+
+ Because of that, File Storage Gadget has been removed in Linux 3.8.
+ All users need to transition to the Mass Storage Gadget. The two
+ gadgets behave mostly the same from the outside except:
+
+ 1. In FSG the “removable” and “cdrom” module parameters set the flag
+ for all logical units whereas in MSG they accept a list of y/n
+ values for each logical unit. If one uses only a single logical
+ unit this does not matter, but if there are more, the y/n value
+ needs to be repeated for each logical unit.
+
+ 2. FSG's “serial”, “vendor”, “product” and “release” module
+ parameters are handled in MSG by the composite layer's parameters
+ named respectively: “iSerialnumber”, “idVendor”, “idProduct” and
+ “bcdDevice”.
+
+ 3. MSG does not support FSG's test mode, thus “transport”,
+ “protocol” and “buflen” FSG's module parameters are not
+ supported. MSG always uses SCSI protocol with bulk only
+ transport mode and 16 KiB buffers.
diff --git a/Documentation/usb/misc_usbsevseg.txt b/Documentation/usb/misc_usbsevseg.txt
new file mode 100644
index 000000000..0f6be4f99
--- /dev/null
+++ b/Documentation/usb/misc_usbsevseg.txt
@@ -0,0 +1,46 @@
+USB 7-Segment Numeric Display
+Manufactured by Delcom Engineering
+
+Device Information
+------------------
+USB VENDOR_ID 0x0fc5
+USB PRODUCT_ID 0x1227
+Both the 6 character and 8 character displays have PRODUCT_ID,
+and according to Delcom Engineering no queryable information
+can be obtained from the device to tell them apart.
+
+Device Modes
+------------
+By default, the driver assumes the display is only 6 characters
+The mode for 6 characters is:
+ MSB 0x06; LSB 0x3f
+For the 8 character display:
+ MSB 0x08; LSB 0xff
+The device can accept "text" either in raw, hex, or ascii textmode.
+raw controls each segment manually,
+hex expects a value between 0-15 per character,
+ascii expects a value between '0'-'9' and 'A'-'F'.
+The default is ascii.
+
+Device Operation
+----------------
+1. Turn on the device:
+ echo 1 > /sys/bus/usb/.../powered
+2. Set the device's mode:
+ echo $mode_msb > /sys/bus/usb/.../mode_msb
+ echo $mode_lsb > /sys/bus/usb/.../mode_lsb
+3. Set the textmode:
+ echo $textmode > /sys/bus/usb/.../textmode
+4. set the text (for example):
+ echo "123ABC" > /sys/bus/usb/.../text (ascii)
+ echo "A1B2" > /sys/bus/usb/.../text (ascii)
+ echo -ne "\x01\x02\x03" > /sys/bus/usb/.../text (hex)
+5. Set the decimal places.
+ The device has either 6 or 8 decimal points.
+ to set the nth decimal place calculate 10 ** n
+ and echo it in to /sys/bus/usb/.../decimals
+ To set multiple decimals points sum up each power.
+ For example, to set the 0th and 3rd decimal place
+ echo 1001 > /sys/bus/usb/.../decimals
+
+
diff --git a/Documentation/usb/mtouchusb.txt b/Documentation/usb/mtouchusb.txt
new file mode 100644
index 000000000..a91adb26e
--- /dev/null
+++ b/Documentation/usb/mtouchusb.txt
@@ -0,0 +1,72 @@
+CHANGES
+
+- 0.3 - Created based off of scanner & INSTALL from the original touchscreen
+ driver on freecode (http://freecode.com/projects/3mtouchscreendriver)
+- Amended for linux-2.4.18, then 2.4.19
+
+- 0.5 - Complete rewrite using Linux Input in 2.6.3
+ Unfortunately no calibration support at this time
+
+- 1.4 - Multiple changes to support the EXII 5000UC and house cleaning
+ Changed reset from standard USB dev reset to vendor reset
+ Changed data sent to host from compensated to raw coordinates
+ Eliminated vendor/product module params
+ Performed multiple successful tests with an EXII-5010UC
+
+SUPPORTED HARDWARE:
+
+ All controllers have the Vendor: 0x0596 & Product: 0x0001
+
+
+ Controller Description Part Number
+ ------------------------------------------------------
+
+ USB Capacitive - Pearl Case 14-205 (Discontinued)
+ USB Capacitive - Black Case 14-124 (Discontinued)
+ USB Capacitive - No Case 14-206 (Discontinued)
+
+ USB Capacitive - Pearl Case EXII-5010UC
+ USB Capacitive - Black Case EXII-5030UC
+ USB Capacitive - No Case EXII-5050UC
+
+DRIVER NOTES:
+
+Installation is simple, you only need to add Linux Input, Linux USB, and the
+driver to the kernel. The driver can also be optionally built as a module.
+
+This driver appears to be one of possible 2 Linux USB Input Touchscreen
+drivers. Although 3M produces a binary only driver available for
+download, I persist in updating this driver since I would like to use the
+touchscreen for embedded apps using QTEmbedded, DirectFB, etc. So I feel the
+logical choice is to use Linux Input.
+
+Currently there is no way to calibrate the device via this driver. Even if
+the device could be calibrated, the driver pulls to raw coordinate data from
+the controller. This means calibration must be performed within the
+userspace.
+
+The controller screen resolution is now 0 to 16384 for both X and Y reporting
+the raw touch data. This is the same for the old and new capacitive USB
+controllers.
+
+Perhaps at some point an abstract function will be placed into evdev so
+generic functions like calibrations, resets, and vendor information can be
+requested from the userspace (And the drivers would handle the vendor specific
+tasks).
+
+TODO:
+
+Implement a control urb again to handle requests to and from the device
+such as calibration, etc once/if it becomes available.
+
+DISCLAIMER:
+
+I am not a MicroTouch/3M employee, nor have I ever been. 3M does not support
+this driver! If you want touch drivers only supported within X, please go to:
+
+http://www.3m.com/3MTouchSystems/
+
+THANKS:
+
+A huge thank you to 3M Touch Systems for the EXII-5010UC controllers for
+testing!
diff --git a/Documentation/usb/ohci.txt b/Documentation/usb/ohci.txt
new file mode 100644
index 000000000..99320d9fa
--- /dev/null
+++ b/Documentation/usb/ohci.txt
@@ -0,0 +1,32 @@
+23-Aug-2002
+
+The "ohci-hcd" driver is a USB Host Controller Driver (HCD) that is derived
+from the "usb-ohci" driver from the 2.4 kernel series. The "usb-ohci" code
+was written primarily by Roman Weissgaerber <weissg@vienna.at> but with
+contributions from many others (read its copyright/licencing header).
+
+It supports the "Open Host Controller Interface" (OHCI), which standardizes
+hardware register protocols used to talk to USB 1.1 host controllers. As
+compared to the earlier "Universal Host Controller Interface" (UHCI) from
+Intel, it pushes more intelligence into the hardware. USB 1.1 controllers
+from vendors other than Intel and VIA generally use OHCI.
+
+Changes since the 2.4 kernel include
+
+ - improved robustness; bugfixes; and less overhead
+ - supports the updated and simplified usbcore APIs
+ - interrupt transfers can be larger, and can be queued
+ - less code, by using the upper level "hcd" framework
+ - supports some non-PCI implementations of OHCI
+ - ... more
+
+The "ohci-hcd" driver handles all USB 1.1 transfer types. Transfers of all
+types can be queued. That was also true in "usb-ohci", except for interrupt
+transfers. Previously, using periods of one frame would risk data loss due
+to overhead in IRQ processing. When interrupt transfers are queued, those
+risks can be minimized by making sure the hardware always has transfers to
+work on while the OS is getting around to the relevant IRQ processing.
+
+- David Brownell
+ <dbrownell@users.sourceforge.net>
+
diff --git a/Documentation/usb/persist.txt b/Documentation/usb/persist.txt
new file mode 100644
index 000000000..35d70eda9
--- /dev/null
+++ b/Documentation/usb/persist.txt
@@ -0,0 +1,165 @@
+ USB device persistence during system suspend
+
+ Alan Stern <stern@rowland.harvard.edu>
+
+ September 2, 2006 (Updated February 25, 2008)
+
+
+ What is the problem?
+
+According to the USB specification, when a USB bus is suspended the
+bus must continue to supply suspend current (around 1-5 mA). This
+is so that devices can maintain their internal state and hubs can
+detect connect-change events (devices being plugged in or unplugged).
+The technical term is "power session".
+
+If a USB device's power session is interrupted then the system is
+required to behave as though the device has been unplugged. It's a
+conservative approach; in the absence of suspend current the computer
+has no way to know what has actually happened. Perhaps the same
+device is still attached or perhaps it was removed and a different
+device plugged into the port. The system must assume the worst.
+
+By default, Linux behaves according to the spec. If a USB host
+controller loses power during a system suspend, then when the system
+wakes up all the devices attached to that controller are treated as
+though they had disconnected. This is always safe and it is the
+"officially correct" thing to do.
+
+For many sorts of devices this behavior doesn't matter in the least.
+If the kernel wants to believe that your USB keyboard was unplugged
+while the system was asleep and a new keyboard was plugged in when the
+system woke up, who cares? It'll still work the same when you type on
+it.
+
+Unfortunately problems _can_ arise, particularly with mass-storage
+devices. The effect is exactly the same as if the device really had
+been unplugged while the system was suspended. If you had a mounted
+filesystem on the device, you're out of luck -- everything in that
+filesystem is now inaccessible. This is especially annoying if your
+root filesystem was located on the device, since your system will
+instantly crash.
+
+Loss of power isn't the only mechanism to worry about. Anything that
+interrupts a power session will have the same effect. For example,
+even though suspend current may have been maintained while the system
+was asleep, on many systems during the initial stages of wakeup the
+firmware (i.e., the BIOS) resets the motherboard's USB host
+controllers. Result: all the power sessions are destroyed and again
+it's as though you had unplugged all the USB devices. Yes, it's
+entirely the BIOS's fault, but that doesn't do _you_ any good unless
+you can convince the BIOS supplier to fix the problem (lots of luck!).
+
+On many systems the USB host controllers will get reset after a
+suspend-to-RAM. On almost all systems, no suspend current is
+available during hibernation (also known as swsusp or suspend-to-disk).
+You can check the kernel log after resuming to see if either of these
+has happened; look for lines saying "root hub lost power or was reset".
+
+In practice, people are forced to unmount any filesystems on a USB
+device before suspending. If the root filesystem is on a USB device,
+the system can't be suspended at all. (All right, it _can_ be
+suspended -- but it will crash as soon as it wakes up, which isn't
+much better.)
+
+
+ What is the solution?
+
+The kernel includes a feature called USB-persist. It tries to work
+around these issues by allowing the core USB device data structures to
+persist across a power-session disruption.
+
+It works like this. If the kernel sees that a USB host controller is
+not in the expected state during resume (i.e., if the controller was
+reset or otherwise had lost power) then it applies a persistence check
+to each of the USB devices below that controller for which the
+"persist" attribute is set. It doesn't try to resume the device; that
+can't work once the power session is gone. Instead it issues a USB
+port reset and then re-enumerates the device. (This is exactly the
+same thing that happens whenever a USB device is reset.) If the
+re-enumeration shows that the device now attached to that port has the
+same descriptors as before, including the Vendor and Product IDs, then
+the kernel continues to use the same device structure. In effect, the
+kernel treats the device as though it had merely been reset instead of
+unplugged.
+
+The same thing happens if the host controller is in the expected state
+but a USB device was unplugged and then replugged, or if a USB device
+fails to carry out a normal resume.
+
+If no device is now attached to the port, or if the descriptors are
+different from what the kernel remembers, then the treatment is what
+you would expect. The kernel destroys the old device structure and
+behaves as though the old device had been unplugged and a new device
+plugged in.
+
+The end result is that the USB device remains available and usable.
+Filesystem mounts and memory mappings are unaffected, and the world is
+now a good and happy place.
+
+Note that the "USB-persist" feature will be applied only to those
+devices for which it is enabled. You can enable the feature by doing
+(as root):
+
+ echo 1 >/sys/bus/usb/devices/.../power/persist
+
+where the "..." should be filled in the with the device's ID. Disable
+the feature by writing 0 instead of 1. For hubs the feature is
+automatically and permanently enabled and the power/persist file
+doesn't even exist, so you only have to worry about setting it for
+devices where it really matters.
+
+
+ Is this the best solution?
+
+Perhaps not. Arguably, keeping track of mounted filesystems and
+memory mappings across device disconnects should be handled by a
+centralized Logical Volume Manager. Such a solution would allow you
+to plug in a USB flash device, create a persistent volume associated
+with it, unplug the flash device, plug it back in later, and still
+have the same persistent volume associated with the device. As such
+it would be more far-reaching than USB-persist.
+
+On the other hand, writing a persistent volume manager would be a big
+job and using it would require significant input from the user. This
+solution is much quicker and easier -- and it exists now, a giant
+point in its favor!
+
+Furthermore, the USB-persist feature applies to _all_ USB devices, not
+just mass-storage devices. It might turn out to be equally useful for
+other device types, such as network interfaces.
+
+
+ WARNING: USB-persist can be dangerous!!
+
+When recovering an interrupted power session the kernel does its best
+to make sure the USB device hasn't been changed; that is, the same
+device is still plugged into the port as before. But the checks
+aren't guaranteed to be 100% accurate.
+
+If you replace one USB device with another of the same type (same
+manufacturer, same IDs, and so on) there's an excellent chance the
+kernel won't detect the change. The serial number string and other
+descriptors are compared with the kernel's stored values, but this
+might not help since manufacturers frequently omit serial numbers
+entirely in their devices.
+
+Furthermore it's quite possible to leave a USB device exactly the same
+while changing its media. If you replace the flash memory card in a
+USB card reader while the system is asleep, the kernel will have no
+way to know you did it. The kernel will assume that nothing has
+happened and will continue to use the partition tables, inodes, and
+memory mappings for the old card.
+
+If the kernel gets fooled in this way, it's almost certain to cause
+data corruption and to crash your system. You'll have no one to blame
+but yourself.
+
+For those devices with avoid_reset_quirk attribute being set, persist
+maybe fail because they may morph after reset.
+
+YOU HAVE BEEN WARNED! USE AT YOUR OWN RISK!
+
+That having been said, most of the time there shouldn't be any trouble
+at all. The USB-persist feature can be extremely useful. Make the
+most of it.
diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
new file mode 100644
index 000000000..b5f839117
--- /dev/null
+++ b/Documentation/usb/power-management.txt
@@ -0,0 +1,760 @@
+ Power Management for USB
+
+ Alan Stern <stern@rowland.harvard.edu>
+
+ Last-updated: February 2014
+
+
+ Contents:
+ ---------
+ * What is Power Management?
+ * What is Remote Wakeup?
+ * When is a USB device idle?
+ * Forms of dynamic PM
+ * The user interface for dynamic PM
+ * Changing the default idle-delay time
+ * Warnings
+ * The driver interface for Power Management
+ * The driver interface for autosuspend and autoresume
+ * Other parts of the driver interface
+ * Mutual exclusion
+ * Interaction between dynamic PM and system PM
+ * xHCI hardware link PM
+ * USB Port Power Control
+ * User Interface for Port Power Control
+ * Suggested Userspace Port Power Policy
+
+
+ What is Power Management?
+ -------------------------
+
+Power Management (PM) is the practice of saving energy by suspending
+parts of a computer system when they aren't being used. While a
+component is "suspended" it is in a nonfunctional low-power state; it
+might even be turned off completely. A suspended component can be
+"resumed" (returned to a functional full-power state) when the kernel
+needs to use it. (There also are forms of PM in which components are
+placed in a less functional but still usable state instead of being
+suspended; an example would be reducing the CPU's clock rate. This
+document will not discuss those other forms.)
+
+When the parts being suspended include the CPU and most of the rest of
+the system, we speak of it as a "system suspend". When a particular
+device is turned off while the system as a whole remains running, we
+call it a "dynamic suspend" (also known as a "runtime suspend" or
+"selective suspend"). This document concentrates mostly on how
+dynamic PM is implemented in the USB subsystem, although system PM is
+covered to some extent (see Documentation/power/*.txt for more
+information about system PM).
+
+System PM support is present only if the kernel was built with CONFIG_SUSPEND
+or CONFIG_HIBERNATION enabled. Dynamic PM support for USB is present whenever
+the kernel was built with CONFIG_PM enabled.
+
+[Historically, dynamic PM support for USB was present only if the
+kernel had been built with CONFIG_USB_SUSPEND enabled (which depended on
+CONFIG_PM_RUNTIME). Starting with the 3.10 kernel release, dynamic PM support
+for USB was present whenever the kernel was built with CONFIG_PM_RUNTIME
+enabled. The CONFIG_USB_SUSPEND option had been eliminated.]
+
+
+ What is Remote Wakeup?
+ ----------------------
+
+When a device has been suspended, it generally doesn't resume until
+the computer tells it to. Likewise, if the entire computer has been
+suspended, it generally doesn't resume until the user tells it to, say
+by pressing a power button or opening the cover.
+
+However some devices have the capability of resuming by themselves, or
+asking the kernel to resume them, or even telling the entire computer
+to resume. This capability goes by several names such as "Wake On
+LAN"; we will refer to it generically as "remote wakeup". When a
+device is enabled for remote wakeup and it is suspended, it may resume
+itself (or send a request to be resumed) in response to some external
+event. Examples include a suspended keyboard resuming when a key is
+pressed, or a suspended USB hub resuming when a device is plugged in.
+
+
+ When is a USB device idle?
+ --------------------------
+
+A device is idle whenever the kernel thinks it's not busy doing
+anything important and thus is a candidate for being suspended. The
+exact definition depends on the device's driver; drivers are allowed
+to declare that a device isn't idle even when there's no actual
+communication taking place. (For example, a hub isn't considered idle
+unless all the devices plugged into that hub are already suspended.)
+In addition, a device isn't considered idle so long as a program keeps
+its usbfs file open, whether or not any I/O is going on.
+
+If a USB device has no driver, its usbfs file isn't open, and it isn't
+being accessed through sysfs, then it definitely is idle.
+
+
+ Forms of dynamic PM
+ -------------------
+
+Dynamic suspends occur when the kernel decides to suspend an idle
+device. This is called "autosuspend" for short. In general, a device
+won't be autosuspended unless it has been idle for some minimum period
+of time, the so-called idle-delay time.
+
+Of course, nothing the kernel does on its own initiative should
+prevent the computer or its devices from working properly. If a
+device has been autosuspended and a program tries to use it, the
+kernel will automatically resume the device (autoresume). For the
+same reason, an autosuspended device will usually have remote wakeup
+enabled, if the device supports remote wakeup.
+
+It is worth mentioning that many USB drivers don't support
+autosuspend. In fact, at the time of this writing (Linux 2.6.23) the
+only drivers which do support it are the hub driver, kaweth, asix,
+usblp, usblcd, and usb-skeleton (which doesn't count). If a
+non-supporting driver is bound to a device, the device won't be
+autosuspended. In effect, the kernel pretends the device is never
+idle.
+
+We can categorize power management events in two broad classes:
+external and internal. External events are those triggered by some
+agent outside the USB stack: system suspend/resume (triggered by
+userspace), manual dynamic resume (also triggered by userspace), and
+remote wakeup (triggered by the device). Internal events are those
+triggered within the USB stack: autosuspend and autoresume. Note that
+all dynamic suspend events are internal; external agents are not
+allowed to issue dynamic suspends.
+
+
+ The user interface for dynamic PM
+ ---------------------------------
+
+The user interface for controlling dynamic PM is located in the power/
+subdirectory of each USB device's sysfs directory, that is, in
+/sys/bus/usb/devices/.../power/ where "..." is the device's ID. The
+relevant attribute files are: wakeup, control, and
+autosuspend_delay_ms. (There may also be a file named "level"; this
+file was deprecated as of the 2.6.35 kernel and replaced by the
+"control" file. In 2.6.38 the "autosuspend" file will be deprecated
+and replaced by the "autosuspend_delay_ms" file. The only difference
+is that the newer file expresses the delay in milliseconds whereas the
+older file uses seconds. Confusingly, both files are present in 2.6.37
+but only "autosuspend" works.)
+
+ power/wakeup
+
+ This file is empty if the device does not support
+ remote wakeup. Otherwise the file contains either the
+ word "enabled" or the word "disabled", and you can
+ write those words to the file. The setting determines
+ whether or not remote wakeup will be enabled when the
+ device is next suspended. (If the setting is changed
+ while the device is suspended, the change won't take
+ effect until the following suspend.)
+
+ power/control
+
+ This file contains one of two words: "on" or "auto".
+ You can write those words to the file to change the
+ device's setting.
+
+ "on" means that the device should be resumed and
+ autosuspend is not allowed. (Of course, system
+ suspends are still allowed.)
+
+ "auto" is the normal state in which the kernel is
+ allowed to autosuspend and autoresume the device.
+
+ (In kernels up to 2.6.32, you could also specify
+ "suspend", meaning that the device should remain
+ suspended and autoresume was not allowed. This
+ setting is no longer supported.)
+
+ power/autosuspend_delay_ms
+
+ This file contains an integer value, which is the
+ number of milliseconds the device should remain idle
+ before the kernel will autosuspend it (the idle-delay
+ time). The default is 2000. 0 means to autosuspend
+ as soon as the device becomes idle, and negative
+ values mean never to autosuspend. You can write a
+ number to the file to change the autosuspend
+ idle-delay time.
+
+Writing "-1" to power/autosuspend_delay_ms and writing "on" to
+power/control do essentially the same thing -- they both prevent the
+device from being autosuspended. Yes, this is a redundancy in the
+API.
+
+(In 2.6.21 writing "0" to power/autosuspend would prevent the device
+from being autosuspended; the behavior was changed in 2.6.22. The
+power/autosuspend attribute did not exist prior to 2.6.21, and the
+power/level attribute did not exist prior to 2.6.22. power/control
+was added in 2.6.34, and power/autosuspend_delay_ms was added in
+2.6.37 but did not become functional until 2.6.38.)
+
+
+ Changing the default idle-delay time
+ ------------------------------------
+
+The default autosuspend idle-delay time (in seconds) is controlled by
+a module parameter in usbcore. You can specify the value when usbcore
+is loaded. For example, to set it to 5 seconds instead of 2 you would
+do:
+
+ modprobe usbcore autosuspend=5
+
+Equivalently, you could add to a configuration file in /etc/modprobe.d
+a line saying:
+
+ options usbcore autosuspend=5
+
+Some distributions load the usbcore module very early during the boot
+process, by means of a program or script running from an initramfs
+image. To alter the parameter value you would have to rebuild that
+image.
+
+If usbcore is compiled into the kernel rather than built as a loadable
+module, you can add
+
+ usbcore.autosuspend=5
+
+to the kernel's boot command line.
+
+Finally, the parameter value can be changed while the system is
+running. If you do:
+
+ echo 5 >/sys/module/usbcore/parameters/autosuspend
+
+then each new USB device will have its autosuspend idle-delay
+initialized to 5. (The idle-delay values for already existing devices
+will not be affected.)
+
+Setting the initial default idle-delay to -1 will prevent any
+autosuspend of any USB device. This has the benefit of allowing you
+then to enable autosuspend for selected devices.
+
+
+ Warnings
+ --------
+
+The USB specification states that all USB devices must support power
+management. Nevertheless, the sad fact is that many devices do not
+support it very well. You can suspend them all right, but when you
+try to resume them they disconnect themselves from the USB bus or
+they stop working entirely. This seems to be especially prevalent
+among printers and scanners, but plenty of other types of device have
+the same deficiency.
+
+For this reason, by default the kernel disables autosuspend (the
+power/control attribute is initialized to "on") for all devices other
+than hubs. Hubs, at least, appear to be reasonably well-behaved in
+this regard.
+
+(In 2.6.21 and 2.6.22 this wasn't the case. Autosuspend was enabled
+by default for almost all USB devices. A number of people experienced
+problems as a result.)
+
+This means that non-hub devices won't be autosuspended unless the user
+or a program explicitly enables it. As of this writing there aren't
+any widespread programs which will do this; we hope that in the near
+future device managers such as HAL will take on this added
+responsibility. In the meantime you can always carry out the
+necessary operations by hand or add them to a udev script. You can
+also change the idle-delay time; 2 seconds is not the best choice for
+every device.
+
+If a driver knows that its device has proper suspend/resume support,
+it can enable autosuspend all by itself. For example, the video
+driver for a laptop's webcam might do this (in recent kernels they
+do), since these devices are rarely used and so should normally be
+autosuspended.
+
+Sometimes it turns out that even when a device does work okay with
+autosuspend there are still problems. For example, the usbhid driver,
+which manages keyboards and mice, has autosuspend support. Tests with
+a number of keyboards show that typing on a suspended keyboard, while
+causing the keyboard to do a remote wakeup all right, will nonetheless
+frequently result in lost keystrokes. Tests with mice show that some
+of them will issue a remote-wakeup request in response to button
+presses but not to motion, and some in response to neither.
+
+The kernel will not prevent you from enabling autosuspend on devices
+that can't handle it. It is even possible in theory to damage a
+device by suspending it at the wrong time. (Highly unlikely, but
+possible.) Take care.
+
+
+ The driver interface for Power Management
+ -----------------------------------------
+
+The requirements for a USB driver to support external power management
+are pretty modest; the driver need only define
+
+ .suspend
+ .resume
+ .reset_resume
+
+methods in its usb_driver structure, and the reset_resume method is
+optional. The methods' jobs are quite simple:
+
+ The suspend method is called to warn the driver that the
+ device is going to be suspended. If the driver returns a
+ negative error code, the suspend will be aborted. Normally
+ the driver will return 0, in which case it must cancel all
+ outstanding URBs (usb_kill_urb()) and not submit any more.
+
+ The resume method is called to tell the driver that the
+ device has been resumed and the driver can return to normal
+ operation. URBs may once more be submitted.
+
+ The reset_resume method is called to tell the driver that
+ the device has been resumed and it also has been reset.
+ The driver should redo any necessary device initialization,
+ since the device has probably lost most or all of its state
+ (although the interfaces will be in the same altsettings as
+ before the suspend).
+
+If the device is disconnected or powered down while it is suspended,
+the disconnect method will be called instead of the resume or
+reset_resume method. This is also quite likely to happen when
+waking up from hibernation, as many systems do not maintain suspend
+current to the USB host controllers during hibernation. (It's
+possible to work around the hibernation-forces-disconnect problem by
+using the USB Persist facility.)
+
+The reset_resume method is used by the USB Persist facility (see
+Documentation/usb/persist.txt) and it can also be used under certain
+circumstances when CONFIG_USB_PERSIST is not enabled. Currently, if a
+device is reset during a resume and the driver does not have a
+reset_resume method, the driver won't receive any notification about
+the resume. Later kernels will call the driver's disconnect method;
+2.6.23 doesn't do this.
+
+USB drivers are bound to interfaces, so their suspend and resume
+methods get called when the interfaces are suspended or resumed. In
+principle one might want to suspend some interfaces on a device (i.e.,
+force the drivers for those interface to stop all activity) without
+suspending the other interfaces. The USB core doesn't allow this; all
+interfaces are suspended when the device itself is suspended and all
+interfaces are resumed when the device is resumed. It isn't possible
+to suspend or resume some but not all of a device's interfaces. The
+closest you can come is to unbind the interfaces' drivers.
+
+
+ The driver interface for autosuspend and autoresume
+ ---------------------------------------------------
+
+To support autosuspend and autoresume, a driver should implement all
+three of the methods listed above. In addition, a driver indicates
+that it supports autosuspend by setting the .supports_autosuspend flag
+in its usb_driver structure. It is then responsible for informing the
+USB core whenever one of its interfaces becomes busy or idle. The
+driver does so by calling these six functions:
+
+ int usb_autopm_get_interface(struct usb_interface *intf);
+ void usb_autopm_put_interface(struct usb_interface *intf);
+ int usb_autopm_get_interface_async(struct usb_interface *intf);
+ void usb_autopm_put_interface_async(struct usb_interface *intf);
+ void usb_autopm_get_interface_no_resume(struct usb_interface *intf);
+ void usb_autopm_put_interface_no_suspend(struct usb_interface *intf);
+
+The functions work by maintaining a usage counter in the
+usb_interface's embedded device structure. When the counter is > 0
+then the interface is deemed to be busy, and the kernel will not
+autosuspend the interface's device. When the usage counter is = 0
+then the interface is considered to be idle, and the kernel may
+autosuspend the device.
+
+Drivers need not be concerned about balancing changes to the usage
+counter; the USB core will undo any remaining "get"s when a driver
+is unbound from its interface. As a corollary, drivers must not call
+any of the usb_autopm_* functions after their disconnect() routine has
+returned.
+
+Drivers using the async routines are responsible for their own
+synchronization and mutual exclusion.
+
+ usb_autopm_get_interface() increments the usage counter and
+ does an autoresume if the device is suspended. If the
+ autoresume fails, the counter is decremented back.
+
+ usb_autopm_put_interface() decrements the usage counter and
+ attempts an autosuspend if the new value is = 0.
+
+ usb_autopm_get_interface_async() and
+ usb_autopm_put_interface_async() do almost the same things as
+ their non-async counterparts. The big difference is that they
+ use a workqueue to do the resume or suspend part of their
+ jobs. As a result they can be called in an atomic context,
+ such as an URB's completion handler, but when they return the
+ device will generally not yet be in the desired state.
+
+ usb_autopm_get_interface_no_resume() and
+ usb_autopm_put_interface_no_suspend() merely increment or
+ decrement the usage counter; they do not attempt to carry out
+ an autoresume or an autosuspend. Hence they can be called in
+ an atomic context.
+
+The simplest usage pattern is that a driver calls
+usb_autopm_get_interface() in its open routine and
+usb_autopm_put_interface() in its close or release routine. But other
+patterns are possible.
+
+The autosuspend attempts mentioned above will often fail for one
+reason or another. For example, the power/control attribute might be
+set to "on", or another interface in the same device might not be
+idle. This is perfectly normal. If the reason for failure was that
+the device hasn't been idle for long enough, a timer is scheduled to
+carry out the operation automatically when the autosuspend idle-delay
+has expired.
+
+Autoresume attempts also can fail, although failure would mean that
+the device is no longer present or operating properly. Unlike
+autosuspend, there's no idle-delay for an autoresume.
+
+
+ Other parts of the driver interface
+ -----------------------------------
+
+Drivers can enable autosuspend for their devices by calling
+
+ usb_enable_autosuspend(struct usb_device *udev);
+
+in their probe() routine, if they know that the device is capable of
+suspending and resuming correctly. This is exactly equivalent to
+writing "auto" to the device's power/control attribute. Likewise,
+drivers can disable autosuspend by calling
+
+ usb_disable_autosuspend(struct usb_device *udev);
+
+This is exactly the same as writing "on" to the power/control attribute.
+
+Sometimes a driver needs to make sure that remote wakeup is enabled
+during autosuspend. For example, there's not much point
+autosuspending a keyboard if the user can't cause the keyboard to do a
+remote wakeup by typing on it. If the driver sets
+intf->needs_remote_wakeup to 1, the kernel won't autosuspend the
+device if remote wakeup isn't available. (If the device is already
+autosuspended, though, setting this flag won't cause the kernel to
+autoresume it. Normally a driver would set this flag in its probe
+method, at which time the device is guaranteed not to be
+autosuspended.)
+
+If a driver does its I/O asynchronously in interrupt context, it
+should call usb_autopm_get_interface_async() before starting output and
+usb_autopm_put_interface_async() when the output queue drains. When
+it receives an input event, it should call
+
+ usb_mark_last_busy(struct usb_device *udev);
+
+in the event handler. This tells the PM core that the device was just
+busy and therefore the next autosuspend idle-delay expiration should
+be pushed back. Many of the usb_autopm_* routines also make this call,
+so drivers need to worry only when interrupt-driven input arrives.
+
+Asynchronous operation is always subject to races. For example, a
+driver may call the usb_autopm_get_interface_async() routine at a time
+when the core has just finished deciding the device has been idle for
+long enough but not yet gotten around to calling the driver's suspend
+method. The suspend method must be responsible for synchronizing with
+the I/O request routine and the URB completion handler; it should
+cause autosuspends to fail with -EBUSY if the driver needs to use the
+device.
+
+External suspend calls should never be allowed to fail in this way,
+only autosuspend calls. The driver can tell them apart by applying
+the PMSG_IS_AUTO() macro to the message argument to the suspend
+method; it will return True for internal PM events (autosuspend) and
+False for external PM events.
+
+
+ Mutual exclusion
+ ----------------
+
+For external events -- but not necessarily for autosuspend or
+autoresume -- the device semaphore (udev->dev.sem) will be held when a
+suspend or resume method is called. This implies that external
+suspend/resume events are mutually exclusive with calls to probe,
+disconnect, pre_reset, and post_reset; the USB core guarantees that
+this is true of autosuspend/autoresume events as well.
+
+If a driver wants to block all suspend/resume calls during some
+critical section, the best way is to lock the device and call
+usb_autopm_get_interface() (and do the reverse at the end of the
+critical section). Holding the device semaphore will block all
+external PM calls, and the usb_autopm_get_interface() will prevent any
+internal PM calls, even if it fails. (Exercise: Why?)
+
+
+ Interaction between dynamic PM and system PM
+ --------------------------------------------
+
+Dynamic power management and system power management can interact in
+a couple of ways.
+
+Firstly, a device may already be autosuspended when a system suspend
+occurs. Since system suspends are supposed to be as transparent as
+possible, the device should remain suspended following the system
+resume. But this theory may not work out well in practice; over time
+the kernel's behavior in this regard has changed. As of 2.6.37 the
+policy is to resume all devices during a system resume and let them
+handle their own runtime suspends afterward.
+
+Secondly, a dynamic power-management event may occur as a system
+suspend is underway. The window for this is short, since system
+suspends don't take long (a few seconds usually), but it can happen.
+For example, a suspended device may send a remote-wakeup signal while
+the system is suspending. The remote wakeup may succeed, which would
+cause the system suspend to abort. If the remote wakeup doesn't
+succeed, it may still remain active and thus cause the system to
+resume as soon as the system suspend is complete. Or the remote
+wakeup may fail and get lost. Which outcome occurs depends on timing
+and on the hardware and firmware design.
+
+
+ xHCI hardware link PM
+ ---------------------
+
+xHCI host controller provides hardware link power management to usb2.0
+(xHCI 1.0 feature) and usb3.0 devices which support link PM. By
+enabling hardware LPM, the host can automatically put the device into
+lower power state(L1 for usb2.0 devices, or U1/U2 for usb3.0 devices),
+which state device can enter and resume very quickly.
+
+The user interface for controlling USB2 hardware LPM is located in the
+power/ subdirectory of each USB device's sysfs directory, that is, in
+/sys/bus/usb/devices/.../power/ where "..." is the device's ID. The
+relevant attribute files is usb2_hardware_lpm.
+
+ power/usb2_hardware_lpm
+
+ When a USB2 device which support LPM is plugged to a
+ xHCI host root hub which support software LPM, the
+ host will run a software LPM test for it; if the device
+ enters L1 state and resume successfully and the host
+ supports USB2 hardware LPM, this file will show up and
+ driver will enable hardware LPM for the device. You
+ can write y/Y/1 or n/N/0 to the file to enable/disable
+ USB2 hardware LPM manually. This is for test purpose mainly.
+
+
+ USB Port Power Control
+ ----------------------
+
+In addition to suspending endpoint devices and enabling hardware
+controlled link power management, the USB subsystem also has the
+capability to disable power to ports under some conditions. Power is
+controlled through Set/ClearPortFeature(PORT_POWER) requests to a hub.
+In the case of a root or platform-internal hub the host controller
+driver translates PORT_POWER requests into platform firmware (ACPI)
+method calls to set the port power state. For more background see the
+Linux Plumbers Conference 2012 slides [1] and video [2]:
+
+Upon receiving a ClearPortFeature(PORT_POWER) request a USB port is
+logically off, and may trigger the actual loss of VBUS to the port [3].
+VBUS may be maintained in the case where a hub gangs multiple ports into
+a shared power well causing power to remain until all ports in the gang
+are turned off. VBUS may also be maintained by hub ports configured for
+a charging application. In any event a logically off port will lose
+connection with its device, not respond to hotplug events, and not
+respond to remote wakeup events*.
+
+WARNING: turning off a port may result in the inability to hot add a device.
+Please see "User Interface for Port Power Control" for details.
+
+As far as the effect on the device itself it is similar to what a device
+goes through during system suspend, i.e. the power session is lost. Any
+USB device or driver that misbehaves with system suspend will be
+similarly affected by a port power cycle event. For this reason the
+implementation shares the same device recovery path (and honors the same
+quirks) as the system resume path for the hub.
+
+[1]: http://dl.dropbox.com/u/96820575/sarah-sharp-lpt-port-power-off2-mini.pdf
+[2]: http://linuxplumbers.ubicast.tv/videos/usb-port-power-off-kerneluserspace-api/
+[3]: USB 3.1 Section 10.12
+* wakeup note: if a device is configured to send wakeup events the port
+ power control implementation will block poweroff attempts on that
+ port.
+
+
+ User Interface for Port Power Control
+ -------------------------------------
+
+The port power control mechanism uses the PM runtime system. Poweroff is
+requested by clearing the power/pm_qos_no_power_off flag of the port device
+(defaults to 1). If the port is disconnected it will immediately receive a
+ClearPortFeature(PORT_POWER) request. Otherwise, it will honor the pm runtime
+rules and require the attached child device and all descendants to be suspended.
+This mechanism is dependent on the hub advertising port power switching in its
+hub descriptor (wHubCharacteristics logical power switching mode field).
+
+Note, some interface devices/drivers do not support autosuspend. Userspace may
+need to unbind the interface drivers before the usb_device will suspend. An
+unbound interface device is suspended by default. When unbinding, be careful
+to unbind interface drivers, not the driver of the parent usb device. Also,
+leave hub interface drivers bound. If the driver for the usb device (not
+interface) is unbound the kernel is no longer able to resume the device. If a
+hub interface driver is unbound, control of its child ports is lost and all
+attached child-devices will disconnect. A good rule of thumb is that if the
+'driver/module' link for a device points to /sys/module/usbcore then unbinding
+it will interfere with port power control.
+
+Example of the relevant files for port power control. Note, in this example
+these files are relative to a usb hub device (prefix).
+
+ prefix=/sys/devices/pci0000:00/0000:00:14.0/usb3/3-1
+
+ attached child device +
+ hub port device + |
+ hub interface device + | |
+ v v v
+ $prefix/3-1:1.0/3-1-port1/device
+
+ $prefix/3-1:1.0/3-1-port1/power/pm_qos_no_power_off
+ $prefix/3-1:1.0/3-1-port1/device/power/control
+ $prefix/3-1:1.0/3-1-port1/device/3-1.1:<intf0>/driver/unbind
+ $prefix/3-1:1.0/3-1-port1/device/3-1.1:<intf1>/driver/unbind
+ ...
+ $prefix/3-1:1.0/3-1-port1/device/3-1.1:<intfN>/driver/unbind
+
+In addition to these files some ports may have a 'peer' link to a port on
+another hub. The expectation is that all superspeed ports have a
+hi-speed peer.
+
+$prefix/3-1:1.0/3-1-port1/peer -> ../../../../usb2/2-1/2-1:1.0/2-1-port1
+../../../../usb2/2-1/2-1:1.0/2-1-port1/peer -> ../../../../usb3/3-1/3-1:1.0/3-1-port1
+
+Distinct from 'companion ports', or 'ehci/xhci shared switchover ports'
+peer ports are simply the hi-speed and superspeed interface pins that
+are combined into a single usb3 connector. Peer ports share the same
+ancestor XHCI device.
+
+While a superspeed port is powered off a device may downgrade its
+connection and attempt to connect to the hi-speed pins. The
+implementation takes steps to prevent this:
+
+1/ Port suspend is sequenced to guarantee that hi-speed ports are powered-off
+ before their superspeed peer is permitted to power-off. The implication is
+ that the setting pm_qos_no_power_off to zero on a superspeed port may not cause
+ the port to power-off until its highspeed peer has gone to its runtime suspend
+ state. Userspace must take care to order the suspensions if it wants to
+ guarantee that a superspeed port will power-off.
+
+2/ Port resume is sequenced to force a superspeed port to power-on prior to its
+ highspeed peer.
+
+3/ Port resume always triggers an attached child device to resume. After a
+ power session is lost the device may have been removed, or need reset.
+ Resuming the child device when the parent port regains power resolves those
+ states and clamps the maximum port power cycle frequency at the rate the child
+ device can suspend (autosuspend-delay) and resume (reset-resume latency).
+
+Sysfs files relevant for port power control:
+ <hubdev-portX>/power/pm_qos_no_power_off:
+ This writable flag controls the state of an idle port.
+ Once all children and descendants have suspended the
+ port may suspend/poweroff provided that
+ pm_qos_no_power_off is '0'. If pm_qos_no_power_off is
+ '1' the port will remain active/powered regardless of
+ the stats of descendants. Defaults to 1.
+
+ <hubdev-portX>/power/runtime_status:
+ This file reflects whether the port is 'active' (power is on)
+ or 'suspended' (logically off). There is no indication to
+ userspace whether VBUS is still supplied.
+
+ <hubdev-portX>/connect_type:
+ An advisory read-only flag to userspace indicating the
+ location and connection type of the port. It returns
+ one of four values 'hotplug', 'hardwired', 'not used',
+ and 'unknown'. All values, besides unknown, are set by
+ platform firmware.
+
+ "hotplug" indicates an externally connectable/visible
+ port on the platform. Typically userspace would choose
+ to keep such a port powered to handle new device
+ connection events.
+
+ "hardwired" refers to a port that is not visible but
+ connectable. Examples are internal ports for USB
+ bluetooth that can be disconnected via an external
+ switch or a port with a hardwired USB camera. It is
+ expected to be safe to allow these ports to suspend
+ provided pm_qos_no_power_off is coordinated with any
+ switch that gates connections. Userspace must arrange
+ for the device to be connected prior to the port
+ powering off, or to activate the port prior to enabling
+ connection via a switch.
+
+ "not used" refers to an internal port that is expected
+ to never have a device connected to it. These may be
+ empty internal ports, or ports that are not physically
+ exposed on a platform. Considered safe to be
+ powered-off at all times.
+
+ "unknown" means platform firmware does not provide
+ information for this port. Most commonly refers to
+ external hub ports which should be considered 'hotplug'
+ for policy decisions.
+
+ NOTE1: since we are relying on the BIOS to get this ACPI
+ information correct, the USB port descriptions may be
+ missing or wrong.
+
+ NOTE2: Take care in clearing pm_qos_no_power_off. Once
+ power is off this port will
+ not respond to new connect events.
+
+ Once a child device is attached additional constraints are
+ applied before the port is allowed to poweroff.
+
+ <child>/power/control:
+ Must be 'auto', and the port will not
+ power down until <child>/power/runtime_status
+ reflects the 'suspended' state. Default
+ value is controlled by child device driver.
+
+ <child>/power/persist:
+ This defaults to '1' for most devices and indicates if
+ kernel can persist the device's configuration across a
+ power session loss (suspend / port-power event). When
+ this value is '0' (quirky devices), port poweroff is
+ disabled.
+
+ <child>/driver/unbind:
+ Wakeup capable devices will block port poweroff. At
+ this time the only mechanism to clear the usb-internal
+ wakeup-capability for an interface device is to unbind
+ its driver.
+
+Summary of poweroff pre-requisite settings relative to a port device:
+
+ echo 0 > power/pm_qos_no_power_off
+ echo 0 > peer/power/pm_qos_no_power_off # if it exists
+ echo auto > power/control # this is the default value
+ echo auto > <child>/power/control
+ echo 1 > <child>/power/persist # this is the default value
+
+ Suggested Userspace Port Power Policy
+ -------------------------------------
+
+As noted above userspace needs to be careful and deliberate about what
+ports are enabled for poweroff.
+
+The default configuration is that all ports start with
+power/pm_qos_no_power_off set to '1' causing ports to always remain
+active.
+
+Given confidence in the platform firmware's description of the ports
+(ACPI _PLD record for a port populates 'connect_type') userspace can
+clear pm_qos_no_power_off for all 'not used' ports. The same can be
+done for 'hardwired' ports provided poweroff is coordinated with any
+connection switch for the port.
+
+A more aggressive userspace policy is to enable USB port power off for
+all ports (set <hubdev-portX>/power/pm_qos_no_power_off to '0') when
+some external factor indicates the user has stopped interacting with the
+system. For example, a distro may want to enable power off all USB
+ports when the screen blanks, and re-power them when the screen becomes
+active. Smart phones and tablets may want to power off USB ports when
+the user pushes the power button.
diff --git a/Documentation/usb/proc_usb_info.txt b/Documentation/usb/proc_usb_info.txt
new file mode 100644
index 000000000..98be91982
--- /dev/null
+++ b/Documentation/usb/proc_usb_info.txt
@@ -0,0 +1,390 @@
+/proc/bus/usb filesystem output
+===============================
+(version 2010.09.13)
+
+
+The usbfs filesystem for USB devices is traditionally mounted at
+/proc/bus/usb. It provides the /proc/bus/usb/devices file, as well as
+the /proc/bus/usb/BBB/DDD files.
+
+In many modern systems the usbfs filesystem isn't used at all. Instead
+USB device nodes are created under /dev/usb/ or someplace similar. The
+"devices" file is available in debugfs, typically as
+/sys/kernel/debug/usb/devices.
+
+
+**NOTE**: If /proc/bus/usb appears empty, and a host controller
+ driver has been linked, then you need to mount the
+ filesystem. Issue the command (as root):
+
+ mount -t usbfs none /proc/bus/usb
+
+ An alternative and more permanent method would be to add
+
+ none /proc/bus/usb usbfs defaults 0 0
+
+ to /etc/fstab. This will mount usbfs at each reboot.
+ You can then issue `cat /proc/bus/usb/devices` to extract
+ USB device information, and user mode drivers can use usbfs
+ to interact with USB devices.
+
+ There are a number of mount options supported by usbfs.
+ Consult the source code (linux/drivers/usb/core/inode.c) for
+ information about those options.
+
+**NOTE**: The filesystem has been renamed from "usbdevfs" to
+ "usbfs", to reduce confusion with "devfs". You may
+ still see references to the older "usbdevfs" name.
+
+For more information on mounting the usbfs file system, see the
+"USB Device Filesystem" section of the USB Guide. The latest copy
+of the USB Guide can be found at http://www.linux-usb.org/
+
+
+THE /proc/bus/usb/BBB/DDD FILES:
+--------------------------------
+Each connected USB device has one file. The BBB indicates the bus
+number. The DDD indicates the device address on that bus. Both
+of these numbers are assigned sequentially, and can be reused, so
+you can't rely on them for stable access to devices. For example,
+it's relatively common for devices to re-enumerate while they are
+still connected (perhaps someone jostled their power supply, hub,
+or USB cable), so a device might be 002/027 when you first connect
+it and 002/048 sometime later.
+
+These files can be read as binary data. The binary data consists
+of first the device descriptor, then the descriptors for each
+configuration of the device. Multi-byte fields in the device descriptor
+are converted to host endianness by the kernel. The configuration
+descriptors are in bus endian format! The configuration descriptor
+are wTotalLength bytes apart. If a device returns less configuration
+descriptor data than indicated by wTotalLength there will be a hole in
+the file for the missing bytes. This information is also shown
+in text form by the /proc/bus/usb/devices file, described later.
+
+These files may also be used to write user-level drivers for the USB
+devices. You would open the /proc/bus/usb/BBB/DDD file read/write,
+read its descriptors to make sure it's the device you expect, and then
+bind to an interface (or perhaps several) using an ioctl call. You
+would issue more ioctls to the device to communicate to it using
+control, bulk, or other kinds of USB transfers. The IOCTLs are
+listed in the <linux/usbdevice_fs.h> file, and at this writing the
+source code (linux/drivers/usb/core/devio.c) is the primary reference
+for how to access devices through those files.
+
+Note that since by default these BBB/DDD files are writable only by
+root, only root can write such user mode drivers. You can selectively
+grant read/write permissions to other users by using "chmod". Also,
+usbfs mount options such as "devmode=0666" may be helpful.
+
+
+
+THE /proc/bus/usb/devices FILE:
+-------------------------------
+In /proc/bus/usb/devices, each device's output has multiple
+lines of ASCII output.
+I made it ASCII instead of binary on purpose, so that someone
+can obtain some useful data from it without the use of an
+auxiliary program. However, with an auxiliary program, the numbers
+in the first 4 columns of each "T:" line (topology info:
+Lev, Prnt, Port, Cnt) can be used to build a USB topology diagram.
+
+Each line is tagged with a one-character ID for that line:
+
+T = Topology (etc.)
+B = Bandwidth (applies only to USB host controllers, which are
+ virtualized as root hubs)
+D = Device descriptor info.
+P = Product ID info. (from Device descriptor, but they won't fit
+ together on one line)
+S = String descriptors.
+C = Configuration descriptor info. (* = active configuration)
+I = Interface descriptor info.
+E = Endpoint descriptor info.
+
+=======================================================================
+
+/proc/bus/usb/devices output format:
+
+Legend:
+ d = decimal number (may have leading spaces or 0's)
+ x = hexadecimal number (may have leading spaces or 0's)
+ s = string
+
+
+Topology info:
+
+T: Bus=dd Lev=dd Prnt=dd Port=dd Cnt=dd Dev#=ddd Spd=dddd MxCh=dd
+| | | | | | | | |__MaxChildren
+| | | | | | | |__Device Speed in Mbps
+| | | | | | |__DeviceNumber
+| | | | | |__Count of devices at this level
+| | | | |__Connector/Port on Parent for this device
+| | | |__Parent DeviceNumber
+| | |__Level in topology for this bus
+| |__Bus number
+|__Topology info tag
+
+ Speed may be:
+ 1.5 Mbit/s for low speed USB
+ 12 Mbit/s for full speed USB
+ 480 Mbit/s for high speed USB (added for USB 2.0);
+ also used for Wireless USB, which has no fixed speed
+ 5000 Mbit/s for SuperSpeed USB (added for USB 3.0)
+
+ For reasons lost in the mists of time, the Port number is always
+ too low by 1. For example, a device plugged into port 4 will
+ show up with "Port=03".
+
+Bandwidth info:
+B: Alloc=ddd/ddd us (xx%), #Int=ddd, #Iso=ddd
+| | | |__Number of isochronous requests
+| | |__Number of interrupt requests
+| |__Total Bandwidth allocated to this bus
+|__Bandwidth info tag
+
+ Bandwidth allocation is an approximation of how much of one frame
+ (millisecond) is in use. It reflects only periodic transfers, which
+ are the only transfers that reserve bandwidth. Control and bulk
+ transfers use all other bandwidth, including reserved bandwidth that
+ is not used for transfers (such as for short packets).
+
+ The percentage is how much of the "reserved" bandwidth is scheduled by
+ those transfers. For a low or full speed bus (loosely, "USB 1.1"),
+ 90% of the bus bandwidth is reserved. For a high speed bus (loosely,
+ "USB 2.0") 80% is reserved.
+
+
+Device descriptor info & Product ID info:
+
+D: Ver=x.xx Cls=xx(s) Sub=xx Prot=xx MxPS=dd #Cfgs=dd
+P: Vendor=xxxx ProdID=xxxx Rev=xx.xx
+
+where
+D: Ver=x.xx Cls=xx(sssss) Sub=xx Prot=xx MxPS=dd #Cfgs=dd
+| | | | | | |__NumberConfigurations
+| | | | | |__MaxPacketSize of Default Endpoint
+| | | | |__DeviceProtocol
+| | | |__DeviceSubClass
+| | |__DeviceClass
+| |__Device USB version
+|__Device info tag #1
+
+where
+P: Vendor=xxxx ProdID=xxxx Rev=xx.xx
+| | | |__Product revision number
+| | |__Product ID code
+| |__Vendor ID code
+|__Device info tag #2
+
+
+String descriptor info:
+
+S: Manufacturer=ssss
+| |__Manufacturer of this device as read from the device.
+| For USB host controller drivers (virtual root hubs) this may
+| be omitted, or (for newer drivers) will identify the kernel
+| version and the driver which provides this hub emulation.
+|__String info tag
+
+S: Product=ssss
+| |__Product description of this device as read from the device.
+| For older USB host controller drivers (virtual root hubs) this
+| indicates the driver; for newer ones, it's a product (and vendor)
+| description that often comes from the kernel's PCI ID database.
+|__String info tag
+
+S: SerialNumber=ssss
+| |__Serial Number of this device as read from the device.
+| For USB host controller drivers (virtual root hubs) this is
+| some unique ID, normally a bus ID (address or slot name) that
+| can't be shared with any other device.
+|__String info tag
+
+
+
+Configuration descriptor info:
+
+C:* #Ifs=dd Cfg#=dd Atr=xx MPwr=dddmA
+| | | | | |__MaxPower in mA
+| | | | |__Attributes
+| | | |__ConfiguratioNumber
+| | |__NumberOfInterfaces
+| |__ "*" indicates the active configuration (others are " ")
+|__Config info tag
+
+ USB devices may have multiple configurations, each of which act
+ rather differently. For example, a bus-powered configuration
+ might be much less capable than one that is self-powered. Only
+ one device configuration can be active at a time; most devices
+ have only one configuration.
+
+ Each configuration consists of one or more interfaces. Each
+ interface serves a distinct "function", which is typically bound
+ to a different USB device driver. One common example is a USB
+ speaker with an audio interface for playback, and a HID interface
+ for use with software volume control.
+
+
+Interface descriptor info (can be multiple per Config):
+
+I:* If#=dd Alt=dd #EPs=dd Cls=xx(sssss) Sub=xx Prot=xx Driver=ssss
+| | | | | | | | |__Driver name
+| | | | | | | | or "(none)"
+| | | | | | | |__InterfaceProtocol
+| | | | | | |__InterfaceSubClass
+| | | | | |__InterfaceClass
+| | | | |__NumberOfEndpoints
+| | | |__AlternateSettingNumber
+| | |__InterfaceNumber
+| |__ "*" indicates the active altsetting (others are " ")
+|__Interface info tag
+
+ A given interface may have one or more "alternate" settings.
+ For example, default settings may not use more than a small
+ amount of periodic bandwidth. To use significant fractions
+ of bus bandwidth, drivers must select a non-default altsetting.
+
+ Only one setting for an interface may be active at a time, and
+ only one driver may bind to an interface at a time. Most devices
+ have only one alternate setting per interface.
+
+
+Endpoint descriptor info (can be multiple per Interface):
+
+E: Ad=xx(s) Atr=xx(ssss) MxPS=dddd Ivl=dddss
+| | | | |__Interval (max) between transfers
+| | | |__EndpointMaxPacketSize
+| | |__Attributes(EndpointType)
+| |__EndpointAddress(I=In,O=Out)
+|__Endpoint info tag
+
+ The interval is nonzero for all periodic (interrupt or isochronous)
+ endpoints. For high speed endpoints the transfer interval may be
+ measured in microseconds rather than milliseconds.
+
+ For high speed periodic endpoints, the "MaxPacketSize" reflects
+ the per-microframe data transfer size. For "high bandwidth"
+ endpoints, that can reflect two or three packets (for up to
+ 3KBytes every 125 usec) per endpoint.
+
+ With the Linux-USB stack, periodic bandwidth reservations use the
+ transfer intervals and sizes provided by URBs, which can be less
+ than those found in endpoint descriptor.
+
+
+=======================================================================
+
+
+If a user or script is interested only in Topology info, for
+example, use something like "grep ^T: /proc/bus/usb/devices"
+for only the Topology lines. A command like
+"grep -i ^[tdp]: /proc/bus/usb/devices" can be used to list
+only the lines that begin with the characters in square brackets,
+where the valid characters are TDPCIE. With a slightly more able
+script, it can display any selected lines (for example, only T, D,
+and P lines) and change their output format. (The "procusb"
+Perl script is the beginning of this idea. It will list only
+selected lines [selected from TBDPSCIE] or "All" lines from
+/proc/bus/usb/devices.)
+
+The Topology lines can be used to generate a graphic/pictorial
+of the USB devices on a system's root hub. (See more below
+on how to do this.)
+
+The Interface lines can be used to determine what driver is
+being used for each device, and which altsetting it activated.
+
+The Configuration lines could be used to list maximum power
+(in milliamps) that a system's USB devices are using.
+For example, "grep ^C: /proc/bus/usb/devices".
+
+
+Here's an example, from a system which has a UHCI root hub,
+an external hub connected to the root hub, and a mouse and
+a serial converter connected to the external hub.
+
+T: Bus=00 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2
+B: Alloc= 28/900 us ( 3%), #Int= 2, #Iso= 0
+D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=0000 ProdID=0000 Rev= 0.00
+S: Product=USB UHCI Root Hub
+S: SerialNumber=dce0
+C:* #Ifs= 1 Cfg#= 1 Atr=40 MxPwr= 0mA
+I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub
+E: Ad=81(I) Atr=03(Int.) MxPS= 8 Ivl=255ms
+
+T: Bus=00 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 4
+D: Ver= 1.00 Cls=09(hub ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=0451 ProdID=1446 Rev= 1.00
+C:* #Ifs= 1 Cfg#= 1 Atr=e0 MxPwr=100mA
+I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub
+E: Ad=81(I) Atr=03(Int.) MxPS= 1 Ivl=255ms
+
+T: Bus=00 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=1.5 MxCh= 0
+D: Ver= 1.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=04b4 ProdID=0001 Rev= 0.00
+C:* #Ifs= 1 Cfg#= 1 Atr=80 MxPwr=100mA
+I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=mouse
+E: Ad=81(I) Atr=03(Int.) MxPS= 3 Ivl= 10ms
+
+T: Bus=00 Lev=02 Prnt=02 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0
+D: Ver= 1.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=0565 ProdID=0001 Rev= 1.08
+S: Manufacturer=Peracom Networks, Inc.
+S: Product=Peracom USB to Serial Converter
+C:* #Ifs= 1 Cfg#= 1 Atr=a0 MxPwr=100mA
+I: If#= 0 Alt= 0 #EPs= 3 Cls=00(>ifc ) Sub=00 Prot=00 Driver=serial
+E: Ad=81(I) Atr=02(Bulk) MxPS= 64 Ivl= 16ms
+E: Ad=01(O) Atr=02(Bulk) MxPS= 16 Ivl= 16ms
+E: Ad=82(I) Atr=03(Int.) MxPS= 8 Ivl= 8ms
+
+
+Selecting only the "T:" and "I:" lines from this (for example, by using
+"procusb ti"), we have:
+
+T: Bus=00 Lev=00 Prnt=00 Port=00 Cnt=00 Dev#= 1 Spd=12 MxCh= 2
+T: Bus=00 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 4
+I: If#= 0 Alt= 0 #EPs= 1 Cls=09(hub ) Sub=00 Prot=00 Driver=hub
+T: Bus=00 Lev=02 Prnt=02 Port=00 Cnt=01 Dev#= 3 Spd=1.5 MxCh= 0
+I: If#= 0 Alt= 0 #EPs= 1 Cls=03(HID ) Sub=01 Prot=02 Driver=mouse
+T: Bus=00 Lev=02 Prnt=02 Port=02 Cnt=02 Dev#= 4 Spd=12 MxCh= 0
+I: If#= 0 Alt= 0 #EPs= 3 Cls=00(>ifc ) Sub=00 Prot=00 Driver=serial
+
+
+Physically this looks like (or could be converted to):
+
+ +------------------+
+ | PC/root_hub (12)| Dev# = 1
+ +------------------+ (nn) is Mbps.
+ Level 0 | CN.0 | CN.1 | [CN = connector/port #]
+ +------------------+
+ /
+ /
+ +-----------------------+
+ Level 1 | Dev#2: 4-port hub (12)|
+ +-----------------------+
+ |CN.0 |CN.1 |CN.2 |CN.3 |
+ +-----------------------+
+ \ \____________________
+ \_____ \
+ \ \
+ +--------------------+ +--------------------+
+ Level 2 | Dev# 3: mouse (1.5)| | Dev# 4: serial (12)|
+ +--------------------+ +--------------------+
+
+
+
+Or, in a more tree-like structure (ports [Connectors] without
+connections could be omitted):
+
+PC: Dev# 1, root hub, 2 ports, 12 Mbps
+|_ CN.0: Dev# 2, hub, 4 ports, 12 Mbps
+ |_ CN.0: Dev #3, mouse, 1.5 Mbps
+ |_ CN.1:
+ |_ CN.2: Dev #4, serial, 12 Mbps
+ |_ CN.3:
+|_ CN.1:
+
+
+ ### END ###
diff --git a/Documentation/usb/rio.txt b/Documentation/usb/rio.txt
new file mode 100644
index 000000000..aee715af7
--- /dev/null
+++ b/Documentation/usb/rio.txt
@@ -0,0 +1,138 @@
+Copyright (C) 1999, 2000 Bruce Tenison
+Portions Copyright (C) 1999, 2000 David Nelson
+Thanks to David Nelson for guidance and the usage of the scanner.txt
+and scanner.c files to model our driver and this informative file.
+
+Mar. 2, 2000
+
+CHANGES
+
+- Initial Revision
+
+
+OVERVIEW
+
+This README will address issues regarding how to configure the kernel
+to access a RIO 500 mp3 player.
+Before I explain how to use this to access the Rio500 please be warned:
+
+W A R N I N G:
+--------------
+
+Please note that this software is still under development. The authors
+are in no way responsible for any damage that may occur, no matter how
+inconsequential.
+
+It seems that the Rio has a problem when sending .mp3 with low batteries.
+I suggest when the batteries are low and you want to transfer stuff that you
+replace it with a fresh one. In my case, what happened is I lost two 16kb
+blocks (they are no longer usable to store information to it). But I don't
+know if that's normal or not; it could simply be a problem with the flash
+memory.
+
+In an extreme case, I left my Rio playing overnight and the batteries wore
+down to nothing and appear to have corrupted the flash memory. My RIO
+needed to be replaced as a result. Diamond tech support is aware of the
+problem. Do NOT allow your batteries to wear down to nothing before
+changing them. It appears RIO 500 firmware does not handle low battery
+power well at all.
+
+On systems with OHCI controllers, the kernel OHCI code appears to have
+power on problems with some chipsets. If you are having problems
+connecting to your RIO 500, try turning it on first and then plugging it
+into the USB cable.
+
+Contact information:
+--------------------
+
+ The main page for the project is hosted at sourceforge.net in the following
+ URL: <http://rio500.sourceforge.net>. You can also go to the project's
+ sourceforge home page at: <http://sourceforge.net/projects/rio500/>.
+ There is also a mailing list: rio500-users@lists.sourceforge.net
+
+Authors:
+-------
+
+Most of the code was written by Cesar Miquel <miquel@df.uba.ar>. Keith
+Clayton <kclayton@jps.net> is incharge of the PPC port and making sure
+things work there. Bruce Tenison <btenison@dibbs.net> is adding support
+for .fon files and also does testing. The program will mostly sure be
+re-written and Pete Ikusz along with the rest will re-design it. I would
+also like to thank Tri Nguyen <tmn_3022000@hotmail.com> who provided use
+with some important information regarding the communication with the Rio.
+
+ADDITIONAL INFORMATION and Userspace tools
+
+http://rio500.sourceforge.net/
+
+
+REQUIREMENTS
+
+A host with a USB port. Ideally, either a UHCI (Intel) or OHCI
+(Compaq and others) hardware port should work.
+
+A Linux development kernel (2.3.x) with USB support enabled or a
+backported version to linux-2.2.x. See http://www.linux-usb.org for
+more information on accomplishing this.
+
+A Linux kernel with RIO 500 support enabled.
+
+'lspci' which is only needed to determine the type of USB hardware
+available in your machine.
+
+CONFIGURATION
+
+Using `lspci -v`, determine the type of USB hardware available.
+
+ If you see something like:
+
+ USB Controller: ......
+ Flags: .....
+ I/O ports at ....
+
+ Then you have a UHCI based controller.
+
+ If you see something like:
+
+ USB Controller: .....
+ Flags: ....
+ Memory at .....
+
+ Then you have a OHCI based controller.
+
+Using `make menuconfig` or your preferred method for configuring the
+kernel, select 'Support for USB', 'OHCI/UHCI' depending on your
+hardware (determined from the steps above), 'USB Diamond Rio500 support', and
+'Preliminary USB device filesystem'. Compile and install the modules
+(you may need to execute `depmod -a` to update the module
+dependencies).
+
+Add a device for the USB rio500:
+ `mknod /dev/usb/rio500 c 180 64`
+
+Set appropriate permissions for /dev/usb/rio500 (don't forget about
+group and world permissions). Both read and write permissions are
+required for proper operation.
+
+Load the appropriate modules (if compiled as modules):
+
+ OHCI:
+ modprobe usbcore
+ modprobe usb-ohci
+ modprobe rio500
+
+ UHCI:
+ modprobe usbcore
+ modprobe usb-uhci (or uhci)
+ modprobe rio500
+
+That's it. The Rio500 Utils at: http://rio500.sourceforge.net should
+be able to access the rio500.
+
+BUGS
+
+If you encounter any problems feel free to drop me an email.
+
+Bruce Tenison
+btenison@dibbs.net
+
diff --git a/Documentation/usb/usb-help.txt b/Documentation/usb/usb-help.txt
new file mode 100644
index 000000000..4273ca2b8
--- /dev/null
+++ b/Documentation/usb/usb-help.txt
@@ -0,0 +1,16 @@
+usb-help.txt
+2008-Mar-7
+
+For USB help other than the readme files that are located in
+Documentation/usb/*, see the following:
+
+Linux-USB project: http://www.linux-usb.org
+ mirrors at http://usb.in.tum.de/linux-usb/
+ and http://it.linux-usb.org
+Linux USB Guide: http://linux-usb.sourceforge.net
+Linux-USB device overview (working devices and drivers):
+ http://www.qbik.ch/usb/devices/
+
+The Linux-USB mailing list is at linux-usb@vger.kernel.org
+
+###
diff --git a/Documentation/usb/usb-serial.txt b/Documentation/usb/usb-serial.txt
new file mode 100644
index 000000000..947fa62bc
--- /dev/null
+++ b/Documentation/usb/usb-serial.txt
@@ -0,0 +1,493 @@
+INTRODUCTION
+
+ The USB serial driver currently supports a number of different USB to
+ serial converter products, as well as some devices that use a serial
+ interface from userspace to talk to the device.
+
+ See the individual product section below for specific information about
+ the different devices.
+
+
+CONFIGURATION
+
+ Currently the driver can handle up to 256 different serial interfaces at
+ one time.
+
+ The major number that the driver uses is 188 so to use the driver,
+ create the following nodes:
+ mknod /dev/ttyUSB0 c 188 0
+ mknod /dev/ttyUSB1 c 188 1
+ mknod /dev/ttyUSB2 c 188 2
+ mknod /dev/ttyUSB3 c 188 3
+ .
+ .
+ .
+ mknod /dev/ttyUSB254 c 188 254
+ mknod /dev/ttyUSB255 c 188 255
+
+ When the device is connected and recognized by the driver, the driver
+ will print to the system log, which node(s) the device has been bound
+ to.
+
+
+SPECIFIC DEVICES SUPPORTED
+
+
+ConnectTech WhiteHEAT 4 port converter
+
+ ConnectTech has been very forthcoming with information about their
+ device, including providing a unit to test with.
+
+ The driver is officially supported by Connect Tech Inc.
+ http://www.connecttech.com
+
+ For any questions or problems with this driver, please contact
+ Connect Tech's Support Department at support@connecttech.com
+
+
+HandSpring Visor, Palm USB, and Clié USB driver
+
+ This driver works with all HandSpring USB, Palm USB, and Sony Clié USB
+ devices.
+
+ Only when the device tries to connect to the host, will the device show
+ up to the host as a valid USB device. When this happens, the device is
+ properly enumerated, assigned a port, and then communication _should_ be
+ possible. The driver cleans up properly when the device is removed, or
+ the connection is canceled on the device.
+
+ NOTE:
+ This means that in order to talk to the device, the sync button must be
+ pressed BEFORE trying to get any program to communicate to the device.
+ This goes against the current documentation for pilot-xfer and other
+ packages, but is the only way that it will work due to the hardware
+ in the device.
+
+ When the device is connected, try talking to it on the second port
+ (this is usually /dev/ttyUSB1 if you do not have any other usb-serial
+ devices in the system.) The system log should tell you which port is
+ the port to use for the HotSync transfer. The "Generic" port can be used
+ for other device communication, such as a PPP link.
+
+ For some Sony Clié devices, /dev/ttyUSB0 must be used to talk to the
+ device. This is true for all OS version 3.5 devices, and most devices
+ that have had a flash upgrade to a newer version of the OS. See the
+ kernel system log for information on which is the correct port to use.
+
+ If after pressing the sync button, nothing shows up in the system log,
+ try resetting the device, first a hot reset, and then a cold reset if
+ necessary. Some devices need this before they can talk to the USB port
+ properly.
+
+ Devices that are not compiled into the kernel can be specified with module
+ parameters. e.g. modprobe visor vendor=0x54c product=0x66
+
+ There is a webpage and mailing lists for this portion of the driver at:
+ http://sourceforge.net/projects/usbvisor/
+
+ For any questions or problems with this driver, please contact Greg
+ Kroah-Hartman at greg@kroah.com
+
+
+PocketPC PDA Driver
+
+ This driver can be used to connect to Compaq iPAQ, HP Jornada, Casio EM500
+ and other PDAs running Windows CE 3.0 or PocketPC 2002 using a USB
+ cable/cradle.
+ Most devices supported by ActiveSync are supported out of the box.
+ For others, please use module parameters to specify the product and vendor
+ id. e.g. modprobe ipaq vendor=0x3f0 product=0x1125
+
+ The driver presents a serial interface (usually on /dev/ttyUSB0) over
+ which one may run ppp and establish a TCP/IP link to the PDA. Once this
+ is done, you can transfer files, backup, download email etc. The most
+ significant advantage of using USB is speed - I can get 73 to 113
+ kbytes/sec for download/upload to my iPAQ.
+
+ This driver is only one of a set of components required to utilize
+ the USB connection. Please visit http://synce.sourceforge.net which
+ contains the necessary packages and a simple step-by-step howto.
+
+ Once connected, you can use Win CE programs like ftpView, Pocket Outlook
+ from the PDA and xcerdisp, synce utilities from the Linux side.
+
+ To use Pocket IE, follow the instructions given at
+ http://www.tekguru.co.uk/EM500/usbtonet.htm to achieve the same thing
+ on Win98. Omit the proxy server part; Linux is quite capable of forwarding
+ packets unlike Win98. Another modification is required at least for the
+ iPAQ - disable autosync by going to the Start/Settings/Connections menu
+ and unchecking the "Automatically synchronize ..." box. Go to
+ Start/Programs/Connections, connect the cable and select "usbdial" (or
+ whatever you named your new USB connection). You should finally wind
+ up with a "Connected to usbdial" window with status shown as connected.
+ Now start up PIE and browse away.
+
+ If it doesn't work for some reason, load both the usbserial and ipaq module
+ with the module parameter "debug" set to 1 and examine the system log.
+ You can also try soft-resetting your PDA before attempting a connection.
+
+ Other functionality may be possible depending on your PDA. According to
+ Wes Cilldhaire <billybobjoehenrybob@hotmail.com>, with the Toshiba E570,
+ ...if you boot into the bootloader (hold down the power when hitting the
+ reset button, continuing to hold onto the power until the bootloader screen
+ is displayed), then put it in the cradle with the ipaq driver loaded, open
+ a terminal on /dev/ttyUSB0, it gives you a "USB Reflash" terminal, which can
+ be used to flash the ROM, as well as the microP code.. so much for needing
+ Toshiba's $350 serial cable for flashing!! :D
+ NOTE: This has NOT been tested. Use at your own risk.
+
+ For any questions or problems with the driver, please contact Ganesh
+ Varadarajan <ganesh@veritas.com>
+
+
+Keyspan PDA Serial Adapter
+
+ Single port DB-9 serial adapter, pushed as a PDA adapter for iMacs (mostly
+ sold in Macintosh catalogs, comes in a translucent white/green dongle).
+ Fairly simple device. Firmware is homebrew.
+ This driver also works for the Xircom/Entrega single port serial adapter.
+
+ Current status:
+ Things that work:
+ basic input/output (tested with 'cu')
+ blocking write when serial line can't keep up
+ changing baud rates (up to 115200)
+ getting/setting modem control pins (TIOCM{GET,SET,BIS,BIC})
+ sending break (although duration looks suspect)
+ Things that don't:
+ device strings (as logged by kernel) have trailing binary garbage
+ device ID isn't right, might collide with other Keyspan products
+ changing baud rates ought to flush tx/rx to avoid mangled half characters
+ Big Things on the todo list:
+ parity, 7 vs 8 bits per char, 1 or 2 stop bits
+ HW flow control
+ not all of the standard USB descriptors are handled: Get_Status, Set_Feature
+ O_NONBLOCK, select()
+
+ For any questions or problems with this driver, please contact Brian
+ Warner at warner@lothar.com
+
+
+Keyspan USA-series Serial Adapters
+
+ Single, Dual and Quad port adapters - driver uses Keyspan supplied
+ firmware and is being developed with their support.
+
+ Current status:
+ The USA-18X, USA-28X, USA-19, USA-19W and USA-49W are supported and
+ have been pretty thoroughly tested at various baud rates with 8-N-1
+ character settings. Other character lengths and parity setups are
+ presently untested.
+
+ The USA-28 isn't yet supported though doing so should be pretty
+ straightforward. Contact the maintainer if you require this
+ functionality.
+
+ More information is available at:
+ http://www.carnationsoftware.com/carnation/Keyspan.html
+
+ For any questions or problems with this driver, please contact Hugh
+ Blemings at hugh@misc.nu
+
+
+FTDI Single Port Serial Driver
+
+ This is a single port DB-25 serial adapter.
+
+ Devices supported include:
+ -TripNav TN-200 USB GPS
+ -Navis Engineering Bureau CH-4711 USB GPS
+
+ For any questions or problems with this driver, please contact Bill Ryder.
+
+
+ZyXEL omni.net lcd plus ISDN TA
+
+ This is an ISDN TA. Please report both successes and troubles to
+ azummo@towertech.it
+
+
+Cypress M8 CY4601 Family Serial Driver
+
+ This driver was in most part developed by Neil "koyama" Whelchel. It
+ has been improved since that previous form to support dynamic serial
+ line settings and improved line handling. The driver is for the most
+ part stable and has been tested on an smp machine. (dual p2)
+
+ Chipsets supported under CY4601 family:
+
+ CY7C63723, CY7C63742, CY7C63743, CY7C64013
+
+ Devices supported:
+
+ -DeLorme's USB Earthmate GPS (SiRF Star II lp arch)
+ -Cypress HID->COM RS232 adapter
+
+ Note: Cypress Semiconductor claims no affiliation with the
+ hid->com device.
+
+ Most devices using chipsets under the CY4601 family should
+ work with the driver. As long as they stay true to the CY4601
+ usbserial specification.
+
+ Technical notes:
+
+ The Earthmate starts out at 4800 8N1 by default... the driver will
+ upon start init to this setting. usbserial core provides the rest
+ of the termios settings, along with some custom termios so that the
+ output is in proper format and parsable.
+
+ The device can be put into sirf mode by issuing NMEA command:
+ $PSRF100,<protocol>,<baud>,<databits>,<stopbits>,<parity>*CHECKSUM
+ $PSRF100,0,9600,8,1,0*0C
+
+ It should then be sufficient to change the port termios to match this
+ to begin communicating.
+
+ As far as I can tell it supports pretty much every sirf command as
+ documented online available with firmware 2.31, with some unknown
+ message ids.
+
+ The hid->com adapter can run at a maximum baud of 115200bps. Please note
+ that the device has trouble or is incapable of raising line voltage properly.
+ It will be fine with null modem links, as long as you do not try to link two
+ together without hacking the adapter to set the line high.
+
+ The driver is smp safe. Performance with the driver is rather low when using
+ it for transferring files. This is being worked on, but I would be willing to
+ accept patches. An urb queue or packet buffer would likely fit the bill here.
+
+ If you have any questions, problems, patches, feature requests, etc. you can
+ contact me here via email:
+ dignome@gmail.com
+ (your problems/patches can alternately be submitted to usb-devel)
+
+
+Digi AccelePort Driver
+
+ This driver supports the Digi AccelePort USB 2 and 4 devices, 2 port
+ (plus a parallel port) and 4 port USB serial converters. The driver
+ does NOT yet support the Digi AccelePort USB 8.
+
+ This driver works under SMP with the usb-uhci driver. It does not
+ work under SMP with the uhci driver.
+
+ The driver is generally working, though we still have a few more ioctls
+ to implement and final testing and debugging to do. The parallel port
+ on the USB 2 is supported as a serial to parallel converter; in other
+ words, it appears as another USB serial port on Linux, even though
+ physically it is really a parallel port. The Digi Acceleport USB 8
+ is not yet supported.
+
+ Please contact Peter Berger (pberger@brimson.com) or Al Borchers
+ (alborchers@steinerpoint.com) for questions or problems with this
+ driver.
+
+
+Belkin USB Serial Adapter F5U103
+
+ Single port DB-9/PS-2 serial adapter from Belkin with firmware by eTEK Labs.
+ The Peracom single port serial adapter also works with this driver, as
+ well as the GoHubs adapter.
+
+ Current status:
+ The following have been tested and work:
+ Baud rate 300-230400
+ Data bits 5-8
+ Stop bits 1-2
+ Parity N,E,O,M,S
+ Handshake None, Software (XON/XOFF), Hardware (CTSRTS,CTSDTR)*
+ Break Set and clear
+ Line control Input/Output query and control **
+
+ * Hardware input flow control is only enabled for firmware
+ levels above 2.06. Read source code comments describing Belkin
+ firmware errata. Hardware output flow control is working for all
+ firmware versions.
+ ** Queries of inputs (CTS,DSR,CD,RI) show the last
+ reported state. Queries of outputs (DTR,RTS) show the last
+ requested state and may not reflect current state as set by
+ automatic hardware flow control.
+
+ TO DO List:
+ -- Add true modem control line query capability. Currently tracks the
+ states reported by the interrupt and the states requested.
+ -- Add error reporting back to application for UART error conditions.
+ -- Add support for flush ioctls.
+ -- Add everything else that is missing :)
+
+ For any questions or problems with this driver, please contact William
+ Greathouse at wgreathouse@smva.com
+
+
+Empeg empeg-car Mark I/II Driver
+
+ This is an experimental driver to provide connectivity support for the
+ client synchronization tools for an Empeg empeg-car mp3 player.
+
+ Tips:
+ * Don't forget to create the device nodes for ttyUSB{0,1,2,...}
+ * modprobe empeg (modprobe is your friend)
+ * emptool --usb /dev/ttyUSB0 (or whatever you named your device node)
+
+ For any questions or problems with this driver, please contact Gary
+ Brubaker at xavyer@ix.netcom.com
+
+
+MCT USB Single Port Serial Adapter U232
+
+ This driver is for the MCT USB-RS232 Converter (25 pin, Model No.
+ U232-P25) from Magic Control Technology Corp. (there is also a 9 pin
+ Model No. U232-P9). More information about this device can be found at
+ the manufacturer's web-site: http://www.mct.com.tw.
+
+ The driver is generally working, though it still needs some more testing.
+ It is derived from the Belkin USB Serial Adapter F5U103 driver and its
+ TODO list is valid for this driver as well.
+
+ This driver has also been found to work for other products, which have
+ the same Vendor ID but different Product IDs. Sitecom's U232-P25 serial
+ converter uses Product ID 0x230 and Vendor ID 0x711 and works with this
+ driver. Also, D-Link's DU-H3SP USB BAY also works with this driver.
+
+ For any questions or problems with this driver, please contact Wolfgang
+ Grandegger at wolfgang@ces.ch
+
+
+Inside Out Networks Edgeport Driver
+
+ This driver supports all devices made by Inside Out Networks, specifically
+ the following models:
+ Edgeport/4
+ Rapidport/4
+ Edgeport/4t
+ Edgeport/2
+ Edgeport/4i
+ Edgeport/2i
+ Edgeport/421
+ Edgeport/21
+ Edgeport/8
+ Edgeport/8 Dual
+ Edgeport/2D8
+ Edgeport/4D8
+ Edgeport/8i
+ Edgeport/2 DIN
+ Edgeport/4 DIN
+ Edgeport/16 Dual
+
+ For any questions or problems with this driver, please contact Greg
+ Kroah-Hartman at greg@kroah.com
+
+
+REINER SCT cyberJack pinpad/e-com USB chipcard reader
+
+ Interface to ISO 7816 compatible contactbased chipcards, e.g. GSM SIMs.
+
+ Current status:
+ This is the kernel part of the driver for this USB card reader.
+ There is also a user part for a CT-API driver available. A site
+ for downloading is TBA. For now, you can request it from the
+ maintainer (linux-usb@sii.li).
+
+ For any questions or problems with this driver, please contact
+ linux-usb@sii.li
+
+
+Prolific PL2303 Driver
+
+ This driver supports any device that has the PL2303 chip from Prolific
+ in it. This includes a number of single port USB to serial converters,
+ more than 70% of USB GPS devices (in 2010), and some USB UPSes. Devices
+ from Aten (the UC-232) and IO-Data work with this driver, as does
+ the DCU-11 mobile-phone cable.
+
+ For any questions or problems with this driver, please contact Greg
+ Kroah-Hartman at greg@kroah.com
+
+
+KL5KUSB105 chipset / PalmConnect USB single-port adapter
+
+Current status:
+ The driver was put together by looking at the usb bus transactions
+ done by Palm's driver under Windows, so a lot of functionality is
+ still missing. Notably, serial ioctls are sometimes faked or not yet
+ implemented. Support for finding out about DSR and CTS line status is
+ however implemented (though not nicely), so your favorite autopilot(1)
+ and pilot-manager -daemon calls will work. Baud rates up to 115200
+ are supported, but handshaking (software or hardware) is not, which is
+ why it is wise to cut down on the rate used is wise for large
+ transfers until this is settled.
+
+Options supported:
+ If this driver is compiled as a module you can pass the following
+ options to it:
+ debug - extra verbose debugging info
+ (default: 0; nonzero enables)
+ use_lowlatency - use low_latency flag to speed up tty layer
+ when reading from the device.
+ (default: 0; nonzero enables)
+
+ See http://www.uuhaus.de/linux/palmconnect.html for up-to-date
+ information on this driver.
+
+Winchiphead CH341 Driver
+
+ This driver is for the Winchiphead CH341 USB-RS232 Converter. This chip
+ also implements an IEEE 1284 parallel port, I2C and SPI, but that is not
+ supported by the driver. The protocol was analyzed from the behaviour
+ of the Windows driver, no datasheet is available at present.
+ The manufacturer's website: http://www.winchiphead.com/.
+ For any questions or problems with this driver, please contact
+ frank@kingswood-consulting.co.uk.
+
+Moschip MCS7720, MCS7715 driver
+
+ These chips are present in devices sold by various manufacturers, such as Syba
+ and Cables Unlimited. There may be others. The 7720 provides two serial
+ ports, and the 7715 provides one serial and one standard PC parallel port.
+ Support for the 7715's parallel port is enabled by a separate option, which
+ will not appear unless parallel port support is first enabled at the top-level
+ of the Device Drivers config menu. Currently only compatibility mode is
+ supported on the parallel port (no ECP/EPP).
+
+ TODO:
+ - Implement ECP/EPP modes for the parallel port.
+ - Baud rates higher than 115200 are currently broken.
+ - Devices with a single serial port based on the Moschip MCS7703 may work
+ with this driver with a simple addition to the usb_device_id table. I
+ don't have one of these devices, so I can't say for sure.
+
+Generic Serial driver
+
+ If your device is not one of the above listed devices, compatible with
+ the above models, you can try out the "generic" interface. This
+ interface does not provide any type of control messages sent to the
+ device, and does not support any kind of device flow control. All that
+ is required of your device is that it has at least one bulk in endpoint,
+ or one bulk out endpoint.
+
+ To enable the generic driver to recognize your device, build the driver
+ as a module and load it by the following invocation:
+ insmod usbserial vendor=0x#### product=0x####
+ where the #### is replaced with the hex representation of your device's
+ vendor id and product id.
+
+ This driver has been successfully used to connect to the NetChip USB
+ development board, providing a way to develop USB firmware without
+ having to write a custom driver.
+
+ For any questions or problems with this driver, please contact Greg
+ Kroah-Hartman at greg@kroah.com
+
+
+CONTACT:
+
+ If anyone has any problems using these drivers, with any of the above
+ specified products, please contact the specific driver's author listed
+ above, or join the Linux-USB mailing list (information on joining the
+ mailing list, as well as a link to its searchable archive is at
+ http://www.linux-usb.org/ )
+
+
+Greg Kroah-Hartman
+greg@kroah.com
diff --git a/Documentation/usb/usbmon.txt b/Documentation/usb/usbmon.txt
new file mode 100644
index 000000000..28425f736
--- /dev/null
+++ b/Documentation/usb/usbmon.txt
@@ -0,0 +1,355 @@
+* Introduction
+
+The name "usbmon" in lowercase refers to a facility in kernel which is
+used to collect traces of I/O on the USB bus. This function is analogous
+to a packet socket used by network monitoring tools such as tcpdump(1)
+or Ethereal. Similarly, it is expected that a tool such as usbdump or
+USBMon (with uppercase letters) is used to examine raw traces produced
+by usbmon.
+
+The usbmon reports requests made by peripheral-specific drivers to Host
+Controller Drivers (HCD). So, if HCD is buggy, the traces reported by
+usbmon may not correspond to bus transactions precisely. This is the same
+situation as with tcpdump.
+
+Two APIs are currently implemented: "text" and "binary". The binary API
+is available through a character device in /dev namespace and is an ABI.
+The text API is deprecated since 2.6.35, but available for convenience.
+
+* How to use usbmon to collect raw text traces
+
+Unlike the packet socket, usbmon has an interface which provides traces
+in a text format. This is used for two purposes. First, it serves as a
+common trace exchange format for tools while more sophisticated formats
+are finalized. Second, humans can read it in case tools are not available.
+
+To collect a raw text trace, execute following steps.
+
+1. Prepare
+
+Mount debugfs (it has to be enabled in your kernel configuration), and
+load the usbmon module (if built as module). The second step is skipped
+if usbmon is built into the kernel.
+
+# mount -t debugfs none_debugs /sys/kernel/debug
+# modprobe usbmon
+#
+
+Verify that bus sockets are present.
+
+# ls /sys/kernel/debug/usb/usbmon
+0s 0u 1s 1t 1u 2s 2t 2u 3s 3t 3u 4s 4t 4u
+#
+
+Now you can choose to either use the socket '0u' (to capture packets on all
+buses), and skip to step #3, or find the bus used by your device with step #2.
+This allows to filter away annoying devices that talk continuously.
+
+2. Find which bus connects to the desired device
+
+Run "cat /sys/kernel/debug/usb/devices", and find the T-line which corresponds
+to the device. Usually you do it by looking for the vendor string. If you have
+many similar devices, unplug one and compare the two
+/sys/kernel/debug/usb/devices outputs. The T-line will have a bus number.
+Example:
+
+T: Bus=03 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 2 Spd=12 MxCh= 0
+D: Ver= 1.10 Cls=00(>ifc ) Sub=00 Prot=00 MxPS= 8 #Cfgs= 1
+P: Vendor=0557 ProdID=2004 Rev= 1.00
+S: Manufacturer=ATEN
+S: Product=UC100KM V2.00
+
+"Bus=03" means it's bus 3. Alternatively, you can look at the output from
+"lsusb" and get the bus number from the appropriate line. Example:
+
+Bus 003 Device 002: ID 0557:2004 ATEN UC100KM V2.00
+
+3. Start 'cat'
+
+# cat /sys/kernel/debug/usb/usbmon/3u > /tmp/1.mon.out
+
+to listen on a single bus, otherwise, to listen on all buses, type:
+
+# cat /sys/kernel/debug/usb/usbmon/0u > /tmp/1.mon.out
+
+This process will read until it is killed. Naturally, the output can be
+redirected to a desirable location. This is preferred, because it is going
+to be quite long.
+
+4. Perform the desired operation on the USB bus
+
+This is where you do something that creates the traffic: plug in a flash key,
+copy files, control a webcam, etc.
+
+5. Kill cat
+
+Usually it's done with a keyboard interrupt (Control-C).
+
+At this point the output file (/tmp/1.mon.out in this example) can be saved,
+sent by e-mail, or inspected with a text editor. In the last case make sure
+that the file size is not excessive for your favourite editor.
+
+* Raw text data format
+
+Two formats are supported currently: the original, or '1t' format, and
+the '1u' format. The '1t' format is deprecated in kernel 2.6.21. The '1u'
+format adds a few fields, such as ISO frame descriptors, interval, etc.
+It produces slightly longer lines, but otherwise is a perfect superset
+of '1t' format.
+
+If it is desired to recognize one from the other in a program, look at the
+"address" word (see below), where '1u' format adds a bus number. If 2 colons
+are present, it's the '1t' format, otherwise '1u'.
+
+Any text format data consists of a stream of events, such as URB submission,
+URB callback, submission error. Every event is a text line, which consists
+of whitespace separated words. The number or position of words may depend
+on the event type, but there is a set of words, common for all types.
+
+Here is the list of words, from left to right:
+
+- URB Tag. This is used to identify URBs, and is normally an in-kernel address
+ of the URB structure in hexadecimal, but can be a sequence number or any
+ other unique string, within reason.
+
+- Timestamp in microseconds, a decimal number. The timestamp's resolution
+ depends on available clock, and so it can be much worse than a microsecond
+ (if the implementation uses jiffies, for example).
+
+- Event Type. This type refers to the format of the event, not URB type.
+ Available types are: S - submission, C - callback, E - submission error.
+
+- "Address" word (formerly a "pipe"). It consists of four fields, separated by
+ colons: URB type and direction, Bus number, Device address, Endpoint number.
+ Type and direction are encoded with two bytes in the following manner:
+ Ci Co Control input and output
+ Zi Zo Isochronous input and output
+ Ii Io Interrupt input and output
+ Bi Bo Bulk input and output
+ Bus number, Device address, and Endpoint are decimal numbers, but they may
+ have leading zeros, for the sake of human readers.
+
+- URB Status word. This is either a letter, or several numbers separated
+ by colons: URB status, interval, start frame, and error count. Unlike the
+ "address" word, all fields save the status are optional. Interval is printed
+ only for interrupt and isochronous URBs. Start frame is printed only for
+ isochronous URBs. Error count is printed only for isochronous callback
+ events.
+
+ The status field is a decimal number, sometimes negative, which represents
+ a "status" field of the URB. This field makes no sense for submissions, but
+ is present anyway to help scripts with parsing. When an error occurs, the
+ field contains the error code.
+
+ In case of a submission of a Control packet, this field contains a Setup Tag
+ instead of an group of numbers. It is easy to tell whether the Setup Tag is
+ present because it is never a number. Thus if scripts find a set of numbers
+ in this word, they proceed to read Data Length (except for isochronous URBs).
+ If they find something else, like a letter, they read the setup packet before
+ reading the Data Length or isochronous descriptors.
+
+- Setup packet, if present, consists of 5 words: one of each for bmRequestType,
+ bRequest, wValue, wIndex, wLength, as specified by the USB Specification 2.0.
+ These words are safe to decode if Setup Tag was 's'. Otherwise, the setup
+ packet was present, but not captured, and the fields contain filler.
+
+- Number of isochronous frame descriptors and descriptors themselves.
+ If an Isochronous transfer event has a set of descriptors, a total number
+ of them in an URB is printed first, then a word per descriptor, up to a
+ total of 5. The word consists of 3 colon-separated decimal numbers for
+ status, offset, and length respectively. For submissions, initial length
+ is reported. For callbacks, actual length is reported.
+
+- Data Length. For submissions, this is the requested length. For callbacks,
+ this is the actual length.
+
+- Data tag. The usbmon may not always capture data, even if length is nonzero.
+ The data words are present only if this tag is '='.
+
+- Data words follow, in big endian hexadecimal format. Notice that they are
+ not machine words, but really just a byte stream split into words to make
+ it easier to read. Thus, the last word may contain from one to four bytes.
+ The length of collected data is limited and can be less than the data length
+ reported in the Data Length word. In the case of an Isochronous input (Zi)
+ completion where the received data is sparse in the buffer, the length of
+ the collected data can be greater than the Data Length value (because Data
+ Length counts only the bytes that were received whereas the Data words
+ contain the entire transfer buffer).
+
+Examples:
+
+An input control transfer to get a port status.
+
+d5ea89a0 3575914555 S Ci:1:001:0 s a3 00 0000 0003 0004 4 <
+d5ea89a0 3575914560 C Ci:1:001:0 0 4 = 01050000
+
+An output bulk transfer to send a SCSI command 0x28 (READ_10) in a 31-byte
+Bulk wrapper to a storage device at address 5:
+
+dd65f0e8 4128379752 S Bo:1:005:2 -115 31 = 55534243 ad000000 00800000 80010a28 20000000 20000040 00000000 000000
+dd65f0e8 4128379808 C Bo:1:005:2 0 31 >
+
+* Raw binary format and API
+
+The overall architecture of the API is about the same as the one above,
+only the events are delivered in binary format. Each event is sent in
+the following structure (its name is made up, so that we can refer to it):
+
+struct usbmon_packet {
+ u64 id; /* 0: URB ID - from submission to callback */
+ unsigned char type; /* 8: Same as text; extensible. */
+ unsigned char xfer_type; /* ISO (0), Intr, Control, Bulk (3) */
+ unsigned char epnum; /* Endpoint number and transfer direction */
+ unsigned char devnum; /* Device address */
+ u16 busnum; /* 12: Bus number */
+ char flag_setup; /* 14: Same as text */
+ char flag_data; /* 15: Same as text; Binary zero is OK. */
+ s64 ts_sec; /* 16: gettimeofday */
+ s32 ts_usec; /* 24: gettimeofday */
+ int status; /* 28: */
+ unsigned int length; /* 32: Length of data (submitted or actual) */
+ unsigned int len_cap; /* 36: Delivered length */
+ union { /* 40: */
+ unsigned char setup[SETUP_LEN]; /* Only for Control S-type */
+ struct iso_rec { /* Only for ISO */
+ int error_count;
+ int numdesc;
+ } iso;
+ } s;
+ int interval; /* 48: Only for Interrupt and ISO */
+ int start_frame; /* 52: For ISO */
+ unsigned int xfer_flags; /* 56: copy of URB's transfer_flags */
+ unsigned int ndesc; /* 60: Actual number of ISO descriptors */
+}; /* 64 total length */
+
+These events can be received from a character device by reading with read(2),
+with an ioctl(2), or by accessing the buffer with mmap. However, read(2)
+only returns first 48 bytes for compatibility reasons.
+
+The character device is usually called /dev/usbmonN, where N is the USB bus
+number. Number zero (/dev/usbmon0) is special and means "all buses".
+Note that specific naming policy is set by your Linux distribution.
+
+If you create /dev/usbmon0 by hand, make sure that it is owned by root
+and has mode 0600. Otherwise, unprivileged users will be able to snoop
+keyboard traffic.
+
+The following ioctl calls are available, with MON_IOC_MAGIC 0x92:
+
+ MON_IOCQ_URB_LEN, defined as _IO(MON_IOC_MAGIC, 1)
+
+This call returns the length of data in the next event. Note that majority of
+events contain no data, so if this call returns zero, it does not mean that
+no events are available.
+
+ MON_IOCG_STATS, defined as _IOR(MON_IOC_MAGIC, 3, struct mon_bin_stats)
+
+The argument is a pointer to the following structure:
+
+struct mon_bin_stats {
+ u32 queued;
+ u32 dropped;
+};
+
+The member "queued" refers to the number of events currently queued in the
+buffer (and not to the number of events processed since the last reset).
+
+The member "dropped" is the number of events lost since the last call
+to MON_IOCG_STATS.
+
+ MON_IOCT_RING_SIZE, defined as _IO(MON_IOC_MAGIC, 4)
+
+This call sets the buffer size. The argument is the size in bytes.
+The size may be rounded down to the next chunk (or page). If the requested
+size is out of [unspecified] bounds for this kernel, the call fails with
+-EINVAL.
+
+ MON_IOCQ_RING_SIZE, defined as _IO(MON_IOC_MAGIC, 5)
+
+This call returns the current size of the buffer in bytes.
+
+ MON_IOCX_GET, defined as _IOW(MON_IOC_MAGIC, 6, struct mon_get_arg)
+ MON_IOCX_GETX, defined as _IOW(MON_IOC_MAGIC, 10, struct mon_get_arg)
+
+These calls wait for events to arrive if none were in the kernel buffer,
+then return the first event. The argument is a pointer to the following
+structure:
+
+struct mon_get_arg {
+ struct usbmon_packet *hdr;
+ void *data;
+ size_t alloc; /* Length of data (can be zero) */
+};
+
+Before the call, hdr, data, and alloc should be filled. Upon return, the area
+pointed by hdr contains the next event structure, and the data buffer contains
+the data, if any. The event is removed from the kernel buffer.
+
+The MON_IOCX_GET copies 48 bytes to hdr area, MON_IOCX_GETX copies 64 bytes.
+
+ MON_IOCX_MFETCH, defined as _IOWR(MON_IOC_MAGIC, 7, struct mon_mfetch_arg)
+
+This ioctl is primarily used when the application accesses the buffer
+with mmap(2). Its argument is a pointer to the following structure:
+
+struct mon_mfetch_arg {
+ uint32_t *offvec; /* Vector of events fetched */
+ uint32_t nfetch; /* Number of events to fetch (out: fetched) */
+ uint32_t nflush; /* Number of events to flush */
+};
+
+The ioctl operates in 3 stages.
+
+First, it removes and discards up to nflush events from the kernel buffer.
+The actual number of events discarded is returned in nflush.
+
+Second, it waits for an event to be present in the buffer, unless the pseudo-
+device is open with O_NONBLOCK.
+
+Third, it extracts up to nfetch offsets into the mmap buffer, and stores
+them into the offvec. The actual number of event offsets is stored into
+the nfetch.
+
+ MON_IOCH_MFLUSH, defined as _IO(MON_IOC_MAGIC, 8)
+
+This call removes a number of events from the kernel buffer. Its argument
+is the number of events to remove. If the buffer contains fewer events
+than requested, all events present are removed, and no error is reported.
+This works when no events are available too.
+
+ FIONBIO
+
+The ioctl FIONBIO may be implemented in the future, if there's a need.
+
+In addition to ioctl(2) and read(2), the special file of binary API can
+be polled with select(2) and poll(2). But lseek(2) does not work.
+
+* Memory-mapped access of the kernel buffer for the binary API
+
+The basic idea is simple:
+
+To prepare, map the buffer by getting the current size, then using mmap(2).
+Then, execute a loop similar to the one written in pseudo-code below:
+
+ struct mon_mfetch_arg fetch;
+ struct usbmon_packet *hdr;
+ int nflush = 0;
+ for (;;) {
+ fetch.offvec = vec; // Has N 32-bit words
+ fetch.nfetch = N; // Or less than N
+ fetch.nflush = nflush;
+ ioctl(fd, MON_IOCX_MFETCH, &fetch); // Process errors, too
+ nflush = fetch.nfetch; // This many packets to flush when done
+ for (i = 0; i < nflush; i++) {
+ hdr = (struct ubsmon_packet *) &mmap_area[vec[i]];
+ if (hdr->type == '@') // Filler packet
+ continue;
+ caddr_t data = &mmap_area[vec[i]] + 64;
+ process_packet(hdr, data);
+ }
+ }
+
+Thus, the main idea is to execute only one ioctl per N events.
+
+Although the buffer is circular, the returned headers and data do not cross
+the end of the buffer, so the above pseudo-code does not need any gathering.
diff --git a/Documentation/usb/wusb-cbaf b/Documentation/usb/wusb-cbaf
new file mode 100644
index 000000000..8b3d43efc
--- /dev/null
+++ b/Documentation/usb/wusb-cbaf
@@ -0,0 +1,130 @@
+#! /bin/bash
+#
+
+set -e
+
+progname=$(basename $0)
+function help
+{
+ cat <<EOF
+Usage: $progname COMMAND DEVICEs [ARGS]
+
+Command for manipulating the pairing/authentication credentials of a
+Wireless USB device that supports wired-mode Cable-Based-Association.
+
+Works in conjunction with the wusb-cba.ko driver from http://linuxuwb.org.
+
+
+DEVICE
+
+ sysfs path to the device to authenticate; for example, both this
+ guys are the same:
+
+ /sys/devices/pci0000:00/0000:00:1d.7/usb1/1-4/1-4.4/1-4.4:1.1
+ /sys/bus/usb/drivers/wusb-cbaf/1-4.4:1.1
+
+COMMAND/ARGS are
+
+ start
+
+ Start a WUSB host controller (by setting up a CHID)
+
+ set-chid DEVICE HOST-CHID HOST-BANDGROUP HOST-NAME
+
+ Sets host information in the device; after this you can call the
+ get-cdid to see how does this device report itself to us.
+
+ get-cdid DEVICE
+
+ Get the device ID associated to the HOST-CHID we sent with
+ 'set-chid'. We might not know about it.
+
+ set-cc DEVICE
+
+ If we allow the device to connect, set a random new CDID and CK
+ (connection key). Device saves them for the next time it wants to
+ connect wireless. We save them for that next time also so we can
+ authenticate the device (when we see the CDID he uses to id
+ itself) and the CK to crypto talk to it.
+
+CHID is always 16 hex bytes in 'XX YY ZZ...' form
+BANDGROUP is almost always 0001
+
+Examples:
+
+ You can default most arguments to '' to get a sane value:
+
+ $ $progname set-chid '' '' '' "My host name"
+
+ A full sequence:
+
+ $ $progname set-chid '' '' '' "My host name"
+ $ $progname get-cdid ''
+ $ $progname set-cc ''
+
+EOF
+}
+
+
+# Defaults
+# FIXME: CHID should come from a database :), band group from the host
+host_CHID="00 11 22 33 44 55 66 77 88 99 aa bb cc dd ee ff"
+host_band_group="0001"
+host_name=$(hostname)
+
+devs="$(echo /sys/bus/usb/drivers/wusb-cbaf/[0-9]*)"
+hdevs="$(for h in /sys/class/uwb_rc/*/wusbhc; do readlink -f $h; done)"
+
+result=0
+case $1 in
+ start)
+ for dev in ${2:-$hdevs}
+ do
+ echo $host_CHID > $dev/wusb_chid
+ echo I: started host $(basename $dev) >&2
+ done
+ ;;
+ stop)
+ for dev in ${2:-$hdevs}
+ do
+ echo 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 > $dev/wusb_chid
+ echo I: stopped host $(basename $dev) >&2
+ done
+ ;;
+ set-chid)
+ shift
+ for dev in ${2:-$devs}; do
+ echo "${4:-$host_name}" > $dev/wusb_host_name
+ echo "${3:-$host_band_group}" > $dev/wusb_host_band_groups
+ echo ${2:-$host_CHID} > $dev/wusb_chid
+ done
+ ;;
+ get-cdid)
+ for dev in ${2:-$devs}
+ do
+ cat $dev/wusb_cdid
+ done
+ ;;
+ set-cc)
+ for dev in ${2:-$devs}; do
+ shift
+ CDID="$(head --bytes=16 /dev/urandom | od -tx1 -An)"
+ CK="$(head --bytes=16 /dev/urandom | od -tx1 -An)"
+ echo "$CDID" > $dev/wusb_cdid
+ echo "$CK" > $dev/wusb_ck
+
+ echo I: CC set >&2
+ echo "CHID: $(cat $dev/wusb_chid)"
+ echo "CDID:$CDID"
+ echo "CK: $CK"
+ done
+ ;;
+ help|h|--help|-h)
+ help
+ ;;
+ *)
+ echo "E: Unknown usage" 1>&2
+ help 1>&2
+ result=1
+esac
+exit $result
diff --git a/Documentation/vDSO/.gitignore b/Documentation/vDSO/.gitignore
new file mode 100644
index 000000000..133bf9ee9
--- /dev/null
+++ b/Documentation/vDSO/.gitignore
@@ -0,0 +1,2 @@
+vdso_test
+vdso_standalone_test_x86
diff --git a/Documentation/vDSO/Makefile b/Documentation/vDSO/Makefile
new file mode 100644
index 000000000..ee075c3d2
--- /dev/null
+++ b/Documentation/vDSO/Makefile
@@ -0,0 +1,15 @@
+# vdso_test won't build for glibc < 2.16, so disable it
+# hostprogs-y := vdso_test
+hostprogs-$(CONFIG_X86) := vdso_standalone_test_x86
+vdso_standalone_test_x86-objs := vdso_standalone_test_x86.o parse_vdso.o
+vdso_test-objs := parse_vdso.o vdso_test.o
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
+
+HOSTCFLAGS := -I$(objtree)/usr/include -std=gnu99
+HOSTCFLAGS_vdso_standalone_test_x86.o := -fno-asynchronous-unwind-tables -fno-stack-protector
+HOSTLOADLIBES_vdso_standalone_test_x86 := -nostdlib
+ifeq ($(CONFIG_X86_32),y)
+HOSTLOADLIBES_vdso_standalone_test_x86 += -lgcc_s
+endif
diff --git a/Documentation/vDSO/parse_vdso.c b/Documentation/vDSO/parse_vdso.c
new file mode 100644
index 000000000..1dbb4b872
--- /dev/null
+++ b/Documentation/vDSO/parse_vdso.c
@@ -0,0 +1,269 @@
+/*
+ * parse_vdso.c: Linux reference vDSO parser
+ * Written by Andrew Lutomirski, 2011-2014.
+ *
+ * This code is meant to be linked in to various programs that run on Linux.
+ * As such, it is available with as few restrictions as possible. This file
+ * is licensed under the Creative Commons Zero License, version 1.0,
+ * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
+ *
+ * The vDSO is a regular ELF DSO that the kernel maps into user space when
+ * it starts a program. It works equally well in statically and dynamically
+ * linked binaries.
+ *
+ * This code is tested on x86. In principle it should work on any
+ * architecture that has a vDSO.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <elf.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want. For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called. vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+extern void vdso_init_from_auxv(void *auxv);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void *vdso_sym(const char *version, const char *name);
+
+
+/* And here's the code. */
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+# define ELF_BITS 64
+# else
+# define ELF_BITS 32
+# endif
+#endif
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+static struct vdso_info
+{
+ bool valid;
+
+ /* Load information */
+ uintptr_t load_addr;
+ uintptr_t load_offset; /* load_addr - recorded vaddr */
+
+ /* Symbol table */
+ ELF(Sym) *symtab;
+ const char *symstrings;
+ ELF(Word) *bucket, *chain;
+ ELF(Word) nbucket, nchain;
+
+ /* Version table */
+ ELF(Versym) *versym;
+ ELF(Verdef) *verdef;
+} vdso_info;
+
+/* Straight from the ELF specification. */
+static unsigned long elf_hash(const unsigned char *name)
+{
+ unsigned long h = 0, g;
+ while (*name)
+ {
+ h = (h << 4) + *name++;
+ if (g = h & 0xf0000000)
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+}
+
+void vdso_init_from_sysinfo_ehdr(uintptr_t base)
+{
+ size_t i;
+ bool found_vaddr = false;
+
+ vdso_info.valid = false;
+
+ vdso_info.load_addr = base;
+
+ ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+ if (hdr->e_ident[EI_CLASS] !=
+ (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+ return; /* Wrong ELF class -- check ELF_BITS */
+ }
+
+ ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+ ELF(Dyn) *dyn = 0;
+
+ /*
+ * We need two things from the segment table: the load offset
+ * and the dynamic table.
+ */
+ for (i = 0; i < hdr->e_phnum; i++)
+ {
+ if (pt[i].p_type == PT_LOAD && !found_vaddr) {
+ found_vaddr = true;
+ vdso_info.load_offset = base
+ + (uintptr_t)pt[i].p_offset
+ - (uintptr_t)pt[i].p_vaddr;
+ } else if (pt[i].p_type == PT_DYNAMIC) {
+ dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
+ }
+ }
+
+ if (!found_vaddr || !dyn)
+ return; /* Failed */
+
+ /*
+ * Fish out the useful bits of the dynamic table.
+ */
+ ELF(Word) *hash = 0;
+ vdso_info.symstrings = 0;
+ vdso_info.symtab = 0;
+ vdso_info.versym = 0;
+ vdso_info.verdef = 0;
+ for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
+ switch (dyn[i].d_tag) {
+ case DT_STRTAB:
+ vdso_info.symstrings = (const char *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_SYMTAB:
+ vdso_info.symtab = (ELF(Sym) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_HASH:
+ hash = (ELF(Word) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_VERSYM:
+ vdso_info.versym = (ELF(Versym) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_VERDEF:
+ vdso_info.verdef = (ELF(Verdef) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ }
+ }
+ if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
+ return; /* Failed */
+
+ if (!vdso_info.verdef)
+ vdso_info.versym = 0;
+
+ /* Parse the hash table header. */
+ vdso_info.nbucket = hash[0];
+ vdso_info.nchain = hash[1];
+ vdso_info.bucket = &hash[2];
+ vdso_info.chain = &hash[vdso_info.nbucket + 2];
+
+ /* That's all we need. */
+ vdso_info.valid = true;
+}
+
+static bool vdso_match_version(ELF(Versym) ver,
+ const char *name, ELF(Word) hash)
+{
+ /*
+ * This is a helper function to check if the version indexed by
+ * ver matches name (which hashes to hash).
+ *
+ * The version definition table is a mess, and I don't know how
+ * to do this in better than linear time without allocating memory
+ * to build an index. I also don't know why the table has
+ * variable size entries in the first place.
+ *
+ * For added fun, I can't find a comprehensible specification of how
+ * to parse all the weird flags in the table.
+ *
+ * So I just parse the whole table every time.
+ */
+
+ /* First step: find the version definition */
+ ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
+ ELF(Verdef) *def = vdso_info.verdef;
+ while(true) {
+ if ((def->vd_flags & VER_FLG_BASE) == 0
+ && (def->vd_ndx & 0x7fff) == ver)
+ break;
+
+ if (def->vd_next == 0)
+ return false; /* No definition. */
+
+ def = (ELF(Verdef) *)((char *)def + def->vd_next);
+ }
+
+ /* Now figure out whether it matches. */
+ ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
+ return def->vd_hash == hash
+ && !strcmp(name, vdso_info.symstrings + aux->vda_name);
+}
+
+void *vdso_sym(const char *version, const char *name)
+{
+ unsigned long ver_hash;
+ if (!vdso_info.valid)
+ return 0;
+
+ ver_hash = elf_hash(version);
+ ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+
+ for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
+ ELF(Sym) *sym = &vdso_info.symtab[chain];
+
+ /* Check for a defined global or weak function w/ right name. */
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+ continue;
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+ continue;
+ if (sym->st_shndx == SHN_UNDEF)
+ continue;
+ if (strcmp(name, vdso_info.symstrings + sym->st_name))
+ continue;
+
+ /* Check symbol version. */
+ if (vdso_info.versym
+ && !vdso_match_version(vdso_info.versym[chain],
+ version, ver_hash))
+ continue;
+
+ return (void *)(vdso_info.load_offset + sym->st_value);
+ }
+
+ return 0;
+}
+
+void vdso_init_from_auxv(void *auxv)
+{
+ ELF(auxv_t) *elf_auxv = auxv;
+ for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
+ {
+ if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
+ vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
+ return;
+ }
+ }
+
+ vdso_info.valid = false;
+}
diff --git a/Documentation/vDSO/vdso_standalone_test_x86.c b/Documentation/vDSO/vdso_standalone_test_x86.c
new file mode 100644
index 000000000..93b0ebf8c
--- /dev/null
+++ b/Documentation/vDSO/vdso_standalone_test_x86.c
@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ * vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary. On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+ /* This implementation is buggy: it never returns -1. */
+ while (*a || *b) {
+ if (*a != *b)
+ return 1;
+ if (*a == 0 || *b == 0)
+ return 1;
+ a++;
+ b++;
+ }
+
+ return 0;
+}
+
+/* ...and two syscalls. This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+ long ret;
+#ifdef __x86_64__
+ asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+ "D" (a0), "S" (a1), "d" (a2) :
+ "cc", "memory", "rcx",
+ "r8", "r9", "r10", "r11" );
+#else
+ asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+ "b" (a0), "c" (a1), "d" (a2) :
+ "cc", "memory" );
+#endif
+ return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+ return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+ x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, time_t n)
+{
+ while (n) {
+ *lastdig = (n % 10) + '0';
+ n /= 10;
+ lastdig--;
+ }
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+ /* Parse the stack */
+ long argc = (long)*stack;
+ stack += argc + 2;
+
+ /* Now we're pointing at the environment. Skip it. */
+ while(*stack)
+ stack++;
+ stack++;
+
+ /* Now we're pointing at auxv. Initialize the vDSO parser. */
+ vdso_init_from_auxv((void *)stack);
+
+ /* Find gettimeofday. */
+ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+ gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+ if (!gtod)
+ linux_exit(1);
+
+ struct timeval tv;
+ long ret = gtod(&tv, 0);
+
+ if (ret == 0) {
+ char buf[] = "The time is .000000\n";
+ to_base10(buf + 31, tv.tv_sec);
+ to_base10(buf + 38, tv.tv_usec);
+ linux_write(1, buf, sizeof(buf) - 1);
+ } else {
+ linux_exit(ret);
+ }
+
+ linux_exit(0);
+}
+
+/*
+ * This is the real entry point. It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+ ".text\n"
+ ".global _start\n"
+ ".type _start,@function\n"
+ "_start:\n\t"
+#ifdef __x86_64__
+ "mov %rsp,%rdi\n\t"
+ "jmp c_main"
+#else
+ "push %esp\n\t"
+ "call c_main\n\t"
+ "int $3"
+#endif
+ );
diff --git a/Documentation/vDSO/vdso_test.c b/Documentation/vDSO/vdso_test.c
new file mode 100644
index 000000000..8daeb7d70
--- /dev/null
+++ b/Documentation/vDSO/vdso_test.c
@@ -0,0 +1,52 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+int main(int argc, char **argv)
+{
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ return 0;
+ }
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+ /* Find gettimeofday. */
+ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+ gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+ if (!gtod) {
+ printf("Could not find __vdso_gettimeofday\n");
+ return 1;
+ }
+
+ struct timeval tv;
+ long ret = gtod(&tv, 0);
+
+ if (ret == 0) {
+ printf("The time is %lld.%06lld\n",
+ (long long)tv.tv_sec, (long long)tv.tv_usec);
+ } else {
+ printf("__vdso_gettimeofday failed\n");
+ }
+
+ return 0;
+}
diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
new file mode 100644
index 000000000..96978eced
--- /dev/null
+++ b/Documentation/vfio.txt
@@ -0,0 +1,458 @@
+VFIO - "Virtual Function I/O"[1]
+-------------------------------------------------------------------------------
+Many modern system now provide DMA and interrupt remapping facilities
+to help ensure I/O devices behave within the boundaries they've been
+allotted. This includes x86 hardware with AMD-Vi and Intel VT-d,
+POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC
+systems such as Freescale PAMU. The VFIO driver is an IOMMU/device
+agnostic framework for exposing direct device access to userspace, in
+a secure, IOMMU protected environment. In other words, this allows
+safe[2], non-privileged, userspace drivers.
+
+Why do we want that? Virtual machines often make use of direct device
+access ("device assignment") when configured for the highest possible
+I/O performance. From a device and host perspective, this simply
+turns the VM into a userspace driver, with the benefits of
+significantly reduced latency, higher bandwidth, and direct use of
+bare-metal device drivers[3].
+
+Some applications, particularly in the high performance computing
+field, also benefit from low-overhead, direct device access from
+userspace. Examples include network adapters (often non-TCP/IP based)
+and compute accelerators. Prior to VFIO, these drivers had to either
+go through the full development cycle to become proper upstream
+driver, be maintained out of tree, or make use of the UIO framework,
+which has no notion of IOMMU protection, limited interrupt support,
+and requires root privileges to access things like PCI configuration
+space.
+
+The VFIO driver framework intends to unify these, replacing both the
+KVM PCI specific device assignment code as well as provide a more
+secure, more featureful userspace driver environment than UIO.
+
+Groups, Devices, and IOMMUs
+-------------------------------------------------------------------------------
+
+Devices are the main target of any I/O driver. Devices typically
+create a programming interface made up of I/O access, interrupts,
+and DMA. Without going into the details of each of these, DMA is
+by far the most critical aspect for maintaining a secure environment
+as allowing a device read-write access to system memory imposes the
+greatest risk to the overall system integrity.
+
+To help mitigate this risk, many modern IOMMUs now incorporate
+isolation properties into what was, in many cases, an interface only
+meant for translation (ie. solving the addressing problems of devices
+with limited address spaces). With this, devices can now be isolated
+from each other and from arbitrary memory access, thus allowing
+things like secure direct assignment of devices into virtual machines.
+
+This isolation is not always at the granularity of a single device
+though. Even when an IOMMU is capable of this, properties of devices,
+interconnects, and IOMMU topologies can each reduce this isolation.
+For instance, an individual device may be part of a larger multi-
+function enclosure. While the IOMMU may be able to distinguish
+between devices within the enclosure, the enclosure may not require
+transactions between devices to reach the IOMMU. Examples of this
+could be anything from a multi-function PCI device with backdoors
+between functions to a non-PCI-ACS (Access Control Services) capable
+bridge allowing redirection without reaching the IOMMU. Topology
+can also play a factor in terms of hiding devices. A PCIe-to-PCI
+bridge masks the devices behind it, making transaction appear as if
+from the bridge itself. Obviously IOMMU design plays a major factor
+as well.
+
+Therefore, while for the most part an IOMMU may have device level
+granularity, any system is susceptible to reduced granularity. The
+IOMMU API therefore supports a notion of IOMMU groups. A group is
+a set of devices which is isolatable from all other devices in the
+system. Groups are therefore the unit of ownership used by VFIO.
+
+While the group is the minimum granularity that must be used to
+ensure secure user access, it's not necessarily the preferred
+granularity. In IOMMUs which make use of page tables, it may be
+possible to share a set of page tables between different groups,
+reducing the overhead both to the platform (reduced TLB thrashing,
+reduced duplicate page tables), and to the user (programming only
+a single set of translations). For this reason, VFIO makes use of
+a container class, which may hold one or more groups. A container
+is created by simply opening the /dev/vfio/vfio character device.
+
+On its own, the container provides little functionality, with all
+but a couple version and extension query interfaces locked away.
+The user needs to add a group into the container for the next level
+of functionality. To do this, the user first needs to identify the
+group associated with the desired device. This can be done using
+the sysfs links described in the example below. By unbinding the
+device from the host driver and binding it to a VFIO driver, a new
+VFIO group will appear for the group as /dev/vfio/$GROUP, where
+$GROUP is the IOMMU group number of which the device is a member.
+If the IOMMU group contains multiple devices, each will need to
+be bound to a VFIO driver before operations on the VFIO group
+are allowed (it's also sufficient to only unbind the device from
+host drivers if a VFIO driver is unavailable; this will make the
+group available, but not that particular device). TBD - interface
+for disabling driver probing/locking a device.
+
+Once the group is ready, it may be added to the container by opening
+the VFIO group character device (/dev/vfio/$GROUP) and using the
+VFIO_GROUP_SET_CONTAINER ioctl, passing the file descriptor of the
+previously opened container file. If desired and if the IOMMU driver
+supports sharing the IOMMU context between groups, multiple groups may
+be set to the same container. If a group fails to set to a container
+with existing groups, a new empty container will need to be used
+instead.
+
+With a group (or groups) attached to a container, the remaining
+ioctls become available, enabling access to the VFIO IOMMU interfaces.
+Additionally, it now becomes possible to get file descriptors for each
+device within a group using an ioctl on the VFIO group file descriptor.
+
+The VFIO device API includes ioctls for describing the device, the I/O
+regions and their read/write/mmap offsets on the device descriptor, as
+well as mechanisms for describing and registering interrupt
+notifications.
+
+VFIO Usage Example
+-------------------------------------------------------------------------------
+
+Assume user wants to access PCI device 0000:06:0d.0
+
+$ readlink /sys/bus/pci/devices/0000:06:0d.0/iommu_group
+../../../../kernel/iommu_groups/26
+
+This device is therefore in IOMMU group 26. This device is on the
+pci bus, therefore the user will make use of vfio-pci to manage the
+group:
+
+# modprobe vfio-pci
+
+Binding this device to the vfio-pci driver creates the VFIO group
+character devices for this group:
+
+$ lspci -n -s 0000:06:0d.0
+06:0d.0 0401: 1102:0002 (rev 08)
+# echo 0000:06:0d.0 > /sys/bus/pci/devices/0000:06:0d.0/driver/unbind
+# echo 1102 0002 > /sys/bus/pci/drivers/vfio-pci/new_id
+
+Now we need to look at what other devices are in the group to free
+it for use by VFIO:
+
+$ ls -l /sys/bus/pci/devices/0000:06:0d.0/iommu_group/devices
+total 0
+lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:00:1e.0 ->
+ ../../../../devices/pci0000:00/0000:00:1e.0
+lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.0 ->
+ ../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.0
+lrwxrwxrwx. 1 root root 0 Apr 23 16:13 0000:06:0d.1 ->
+ ../../../../devices/pci0000:00/0000:00:1e.0/0000:06:0d.1
+
+This device is behind a PCIe-to-PCI bridge[4], therefore we also
+need to add device 0000:06:0d.1 to the group following the same
+procedure as above. Device 0000:00:1e.0 is a bridge that does
+not currently have a host driver, therefore it's not required to
+bind this device to the vfio-pci driver (vfio-pci does not currently
+support PCI bridges).
+
+The final step is to provide the user with access to the group if
+unprivileged operation is desired (note that /dev/vfio/vfio provides
+no capabilities on its own and is therefore expected to be set to
+mode 0666 by the system).
+
+# chown user:user /dev/vfio/26
+
+The user now has full access to all the devices and the iommu for this
+group and can access them as follows:
+
+ int container, group, device, i;
+ struct vfio_group_status group_status =
+ { .argsz = sizeof(group_status) };
+ struct vfio_iommu_type1_info iommu_info = { .argsz = sizeof(iommu_info) };
+ struct vfio_iommu_type1_dma_map dma_map = { .argsz = sizeof(dma_map) };
+ struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
+
+ /* Create a new container */
+ container = open("/dev/vfio/vfio", O_RDWR);
+
+ if (ioctl(container, VFIO_GET_API_VERSION) != VFIO_API_VERSION)
+ /* Unknown API version */
+
+ if (!ioctl(container, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU))
+ /* Doesn't support the IOMMU driver we want. */
+
+ /* Open the group */
+ group = open("/dev/vfio/26", O_RDWR);
+
+ /* Test the group is viable and available */
+ ioctl(group, VFIO_GROUP_GET_STATUS, &group_status);
+
+ if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE))
+ /* Group is not viable (ie, not all devices bound for vfio) */
+
+ /* Add the group to the container */
+ ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
+
+ /* Enable the IOMMU model we want */
+ ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+
+ /* Get addition IOMMU info */
+ ioctl(container, VFIO_IOMMU_GET_INFO, &iommu_info);
+
+ /* Allocate some space and setup a DMA mapping */
+ dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ dma_map.size = 1024 * 1024;
+ dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ /* Get a file descriptor for the device */
+ device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+ /* Test and setup the device */
+ ioctl(device, VFIO_DEVICE_GET_INFO, &device_info);
+
+ for (i = 0; i < device_info.num_regions; i++) {
+ struct vfio_region_info reg = { .argsz = sizeof(reg) };
+
+ reg.index = i;
+
+ ioctl(device, VFIO_DEVICE_GET_REGION_INFO, &reg);
+
+ /* Setup mappings... read/write offsets, mmaps
+ * For PCI devices, config space is a region */
+ }
+
+ for (i = 0; i < device_info.num_irqs; i++) {
+ struct vfio_irq_info irq = { .argsz = sizeof(irq) };
+
+ irq.index = i;
+
+ ioctl(device, VFIO_DEVICE_GET_IRQ_INFO, &irq);
+
+ /* Setup IRQs... eventfds, VFIO_DEVICE_SET_IRQS */
+ }
+
+ /* Gratuitous device reset and go... */
+ ioctl(device, VFIO_DEVICE_RESET);
+
+VFIO User API
+-------------------------------------------------------------------------------
+
+Please see include/linux/vfio.h for complete API documentation.
+
+VFIO bus driver API
+-------------------------------------------------------------------------------
+
+VFIO bus drivers, such as vfio-pci make use of only a few interfaces
+into VFIO core. When devices are bound and unbound to the driver,
+the driver should call vfio_add_group_dev() and vfio_del_group_dev()
+respectively:
+
+extern int vfio_add_group_dev(struct iommu_group *iommu_group,
+ struct device *dev,
+ const struct vfio_device_ops *ops,
+ void *device_data);
+
+extern void *vfio_del_group_dev(struct device *dev);
+
+vfio_add_group_dev() indicates to the core to begin tracking the
+specified iommu_group and register the specified dev as owned by
+a VFIO bus driver. The driver provides an ops structure for callbacks
+similar to a file operations structure:
+
+struct vfio_device_ops {
+ int (*open)(void *device_data);
+ void (*release)(void *device_data);
+ ssize_t (*read)(void *device_data, char __user *buf,
+ size_t count, loff_t *ppos);
+ ssize_t (*write)(void *device_data, const char __user *buf,
+ size_t size, loff_t *ppos);
+ long (*ioctl)(void *device_data, unsigned int cmd,
+ unsigned long arg);
+ int (*mmap)(void *device_data, struct vm_area_struct *vma);
+};
+
+Each function is passed the device_data that was originally registered
+in the vfio_add_group_dev() call above. This allows the bus driver
+an easy place to store its opaque, private data. The open/release
+callbacks are issued when a new file descriptor is created for a
+device (via VFIO_GROUP_GET_DEVICE_FD). The ioctl interface provides
+a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap
+interfaces implement the device region access defined by the device's
+own VFIO_DEVICE_GET_REGION_INFO ioctl.
+
+
+PPC64 sPAPR implementation note
+-------------------------------------------------------------------------------
+
+This implementation has some specifics:
+
+1) Only one IOMMU group per container is supported as an IOMMU group
+represents the minimal entity which isolation can be guaranteed for and
+groups are allocated statically, one per a Partitionable Endpoint (PE)
+(PE is often a PCI domain but not always).
+
+2) The hardware supports so called DMA windows - the PCI address range
+within which DMA transfer is allowed, any attempt to access address space
+out of the window leads to the whole PE isolation.
+
+3) PPC64 guests are paravirtualized but not fully emulated. There is an API
+to map/unmap pages for DMA, and it normally maps 1..32 pages per call and
+currently there is no way to reduce the number of calls. In order to make things
+faster, the map/unmap handling has been implemented in real mode which provides
+an excellent performance which has limitations such as inability to do
+locked pages accounting in real time.
+
+4) According to sPAPR specification, A Partitionable Endpoint (PE) is an I/O
+subtree that can be treated as a unit for the purposes of partitioning and
+error recovery. A PE may be a single or multi-function IOA (IO Adapter), a
+function of a multi-function IOA, or multiple IOAs (possibly including switch
+and bridge structures above the multiple IOAs). PPC64 guests detect PCI errors
+and recover from them via EEH RTAS services, which works on the basis of
+additional ioctl commands.
+
+So 4 additional ioctls have been added:
+
+ VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start
+ of the DMA window on the PCI bus.
+
+ VFIO_IOMMU_ENABLE - enables the container. The locked pages accounting
+ is done at this point. This lets user first to know what
+ the DMA window is and adjust rlimit before doing any real job.
+
+ VFIO_IOMMU_DISABLE - disables the container.
+
+ VFIO_EEH_PE_OP - provides an API for EEH setup, error detection and recovery.
+
+The code flow from the example above should be slightly changed:
+
+ struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+
+ .....
+ /* Add the group to the container */
+ ioctl(group, VFIO_GROUP_SET_CONTAINER, &container);
+
+ /* Enable the IOMMU model we want */
+ ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU)
+
+ /* Get addition sPAPR IOMMU info */
+ vfio_iommu_spapr_tce_info spapr_iommu_info;
+ ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info);
+
+ if (ioctl(container, VFIO_IOMMU_ENABLE))
+ /* Cannot enable container, may be low rlimit */
+
+ /* Allocate some space and setup a DMA mapping */
+ dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+
+ dma_map.size = 1024 * 1024;
+ dma_map.iova = 0; /* 1MB starting at 0x0 from device view */
+ dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+
+ /* Check here is .iova/.size are within DMA window from spapr_iommu_info */
+ ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map);
+
+ /* Get a file descriptor for the device */
+ device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, "0000:06:0d.0");
+
+ ....
+
+ /* Gratuitous device reset and go... */
+ ioctl(device, VFIO_DEVICE_RESET);
+
+ /* Make sure EEH is supported */
+ ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
+
+ /* Enable the EEH functionality on the device */
+ pe_op.op = VFIO_EEH_PE_ENABLE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* You're suggested to create additional data struct to represent
+ * PE, and put child devices belonging to same IOMMU group to the
+ * PE instance for later reference.
+ */
+
+ /* Check the PE's state and make sure it's in functional state */
+ pe_op.op = VFIO_EEH_PE_GET_STATE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Save device state using pci_save_state().
+ * EEH should be enabled on the specified device.
+ */
+
+ ....
+
+ /* When 0xFF's returned from reading PCI config space or IO BARs
+ * of the PCI device. Check the PE's state to see if that has been
+ * frozen.
+ */
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Waiting for pending PCI transactions to be completed and don't
+ * produce any more PCI traffic from/to the affected PE until
+ * recovery is finished.
+ */
+
+ /* Enable IO for the affected PE and collect logs. Usually, the
+ * standard part of PCI config space, AER registers are dumped
+ * as logs for further analysis.
+ */
+ pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /*
+ * Issue PE reset: hot or fundamental reset. Usually, hot reset
+ * is enough. However, the firmware of some PCI adapters would
+ * require fundamental reset.
+ */
+ pe_op.op = VFIO_EEH_PE_RESET_HOT;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+ pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Configure the PCI bridges for the affected PE */
+ pe_op.op = VFIO_EEH_PE_CONFIGURE;
+ ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+ /* Restored state we saved at initialization time. pci_restore_state()
+ * is good enough as an example.
+ */
+
+ /* Hopefully, error is recovered successfully. Now, you can resume to
+ * start PCI traffic to/from the affected PE.
+ */
+
+ ....
+
+-------------------------------------------------------------------------------
+
+[1] VFIO was originally an acronym for "Virtual Function I/O" in its
+initial implementation by Tom Lyon while as Cisco. We've since
+outgrown the acronym, but it's catchy.
+
+[2] "safe" also depends upon a device being "well behaved". It's
+possible for multi-function devices to have backdoors between
+functions and even for single function devices to have alternative
+access to things like PCI config space through MMIO registers. To
+guard against the former we can include additional precautions in the
+IOMMU driver to group multi-function PCI devices together
+(iommu=group_mf). The latter we can't prevent, but the IOMMU should
+still provide isolation. For PCI, SR-IOV Virtual Functions are the
+best indicator of "well behaved", as these are designed for
+virtualization usage models.
+
+[3] As always there are trade-offs to virtual machine device
+assignment that are beyond the scope of VFIO. It's expected that
+future IOMMU technologies will reduce some, but maybe not all, of
+these trade-offs.
+
+[4] In this case the device is below a PCI bridge, so transactions
+from either function of the device are indistinguishable to the iommu:
+
+-[0000:00]-+-1e.0-[06]--+-0d.0
+ \-0d.1
+
+00:1e.0 PCI bridge: Intel Corporation 82801 PCI Bridge (rev 90)
diff --git a/Documentation/vgaarbiter.txt b/Documentation/vgaarbiter.txt
new file mode 100644
index 000000000..014423e28
--- /dev/null
+++ b/Documentation/vgaarbiter.txt
@@ -0,0 +1,192 @@
+
+VGA Arbiter
+===========
+
+Graphic devices are accessed through ranges in I/O or memory space. While most
+modern devices allow relocation of such ranges, some "Legacy" VGA devices
+implemented on PCI will typically have the same "hard-decoded" addresses as
+they did on ISA. For more details see "PCI Bus Binding to IEEE Std 1275-1994
+Standard for Boot (Initialization Configuration) Firmware Revision 2.1"
+Section 7, Legacy Devices.
+
+The Resource Access Control (RAC) module inside the X server [0] existed for
+the legacy VGA arbitration task (besides other bus management tasks) when more
+than one legacy device co-exists on the same machine. But the problem happens
+when these devices are trying to be accessed by different userspace clients
+(e.g. two server in parallel). Their address assignments conflict. Moreover,
+ideally, being a userspace application, it is not the role of the X server to
+control bus resources. Therefore an arbitration scheme outside of the X server
+is needed to control the sharing of these resources. This document introduces
+the operation of the VGA arbiter implemented for the Linux kernel.
+
+----------------------------------------------------------------------------
+
+I. Details and Theory of Operation
+ I.1 vgaarb
+ I.2 libpciaccess
+ I.3 xf86VGAArbiter (X server implementation)
+II. Credits
+III.References
+
+
+I. Details and Theory of Operation
+==================================
+
+I.1 vgaarb
+----------
+
+The vgaarb is a module of the Linux Kernel. When it is initially loaded, it
+scans all PCI devices and adds the VGA ones inside the arbitration. The
+arbiter then enables/disables the decoding on different devices of the VGA
+legacy instructions. Devices which do not want/need to use the arbiter may
+explicitly tell it by calling vga_set_legacy_decoding().
+
+The kernel exports a char device interface (/dev/vga_arbiter) to the clients,
+which has the following semantics:
+
+ open : open user instance of the arbiter. By default, it's attached to
+ the default VGA device of the system.
+
+ close : close user instance. Release locks made by the user
+
+ read : return a string indicating the status of the target like:
+
+ "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)"
+
+ An IO state string is of the form {io,mem,io+mem,none}, mc and
+ ic are respectively mem and io lock counts (for debugging/
+ diagnostic only). "decodes" indicate what the card currently
+ decodes, "owns" indicates what is currently enabled on it, and
+ "locks" indicates what is locked by this card. If the card is
+ unplugged, we get "invalid" then for card_ID and an -ENODEV
+ error is returned for any command until a new card is targeted.
+
+
+ write : write a command to the arbiter. List of commands:
+
+ target <card_ID> : switch target to card <card_ID> (see below)
+ lock <io_state> : acquires locks on target ("none" is an invalid io_state)
+ trylock <io_state> : non-blocking acquire locks on target (returns EBUSY if
+ unsuccessful)
+ unlock <io_state> : release locks on target
+ unlock all : release all locks on target held by this user (not
+ implemented yet)
+ decodes <io_state> : set the legacy decoding attributes for the card
+
+ poll : event if something changes on any card (not just the
+ target)
+
+ card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default"
+ to go back to the system default card (TODO: not implemented yet). Currently,
+ only PCI is supported as a prefix, but the userland API may support other bus
+ types in the future, even if the current kernel implementation doesn't.
+
+Note about locks:
+
+The driver keeps track of which user has which locks on which card. It
+supports stacking, like the kernel one. This complexifies the implementation
+a bit, but makes the arbiter more tolerant to user space problems and able
+to properly cleanup in all cases when a process dies.
+Currently, a max of 16 cards can have locks simultaneously issued from
+user space for a given user (file descriptor instance) of the arbiter.
+
+In the case of devices hot-{un,}plugged, there is a hook - pci_notify() - to
+notify them being added/removed in the system and automatically added/removed
+in the arbiter.
+
+There is also an in-kernel API of the arbiter in case DRM, vgacon, or other
+drivers want to use it.
+
+
+I.2 libpciaccess
+----------------
+
+To use the vga arbiter char device it was implemented an API inside the
+libpciaccess library. One field was added to struct pci_device (each device
+on the system):
+
+ /* the type of resource decoded by the device */
+ int vgaarb_rsrc;
+
+Besides it, in pci_system were added:
+
+ int vgaarb_fd;
+ int vga_count;
+ struct pci_device *vga_target;
+ struct pci_device *vga_default_dev;
+
+
+The vga_count is used to track how many cards are being arbitrated, so for
+instance, if there is only one card, then it can completely escape arbitration.
+
+
+These functions below acquire VGA resources for the given card and mark those
+resources as locked. If the resources requested are "normal" (and not legacy)
+resources, the arbiter will first check whether the card is doing legacy
+decoding for that type of resource. If yes, the lock is "converted" into a
+legacy resource lock. The arbiter will first look for all VGA cards that
+might conflict and disable their IOs and/or Memory access, including VGA
+forwarding on P2P bridges if necessary, so that the requested resources can
+be used. Then, the card is marked as locking these resources and the IO and/or
+Memory access is enabled on the card (including VGA forwarding on parent
+P2P bridges if any). In the case of vga_arb_lock(), the function will block
+if some conflicting card is already locking one of the required resources (or
+any resource on a different bus segment, since P2P bridges don't differentiate
+VGA memory and IO afaik). If the card already owns the resources, the function
+succeeds. vga_arb_trylock() will return (-EBUSY) instead of blocking. Nested
+calls are supported (a per-resource counter is maintained).
+
+
+Set the target device of this client.
+ int pci_device_vgaarb_set_target (struct pci_device *dev);
+
+
+For instance, in x86 if two devices on the same bus want to lock different
+resources, both will succeed (lock). If devices are in different buses and
+trying to lock different resources, only the first who tried succeeds.
+ int pci_device_vgaarb_lock (void);
+ int pci_device_vgaarb_trylock (void);
+
+Unlock resources of device.
+ int pci_device_vgaarb_unlock (void);
+
+Indicates to the arbiter if the card decodes legacy VGA IOs, legacy VGA
+Memory, both, or none. All cards default to both, the card driver (fbdev for
+example) should tell the arbiter if it has disabled legacy decoding, so the
+card can be left out of the arbitration process (and can be safe to take
+interrupts at any time.
+ int pci_device_vgaarb_decodes (int new_vgaarb_rsrc);
+
+Connects to the arbiter device, allocates the struct
+ int pci_device_vgaarb_init (void);
+
+Close the connection
+ void pci_device_vgaarb_fini (void);
+
+
+I.3 xf86VGAArbiter (X server implementation)
+--------------------------------------------
+
+(TODO)
+
+X server basically wraps all the functions that touch VGA registers somehow.
+
+
+II. Credits
+===========
+
+Benjamin Herrenschmidt (IBM?) started this work when he discussed such design
+with the Xorg community in 2005 [1, 2]. In the end of 2007, Paulo Zanoni and
+Tiago Vignatti (both of C3SL/Federal University of Paraná) proceeded his work
+enhancing the kernel code to adapt as a kernel module and also did the
+implementation of the user space side [3]. Now (2009) Tiago Vignatti and Dave
+Airlie finally put this work in shape and queued to Jesse Barnes' PCI tree.
+
+
+III. References
+==============
+
+[0] http://cgit.freedesktop.org/xorg/xserver/commit/?id=4b42448a2388d40f257774fbffdccaea87bd0347
+[1] http://lists.freedesktop.org/archives/xorg/2005-March/006663.html
+[2] http://lists.freedesktop.org/archives/xorg/2005-March/006745.html
+[3] http://lists.freedesktop.org/archives/xorg/2007-October/029507.html
diff --git a/Documentation/video-output.txt b/Documentation/video-output.txt
new file mode 100644
index 000000000..e517011be
--- /dev/null
+++ b/Documentation/video-output.txt
@@ -0,0 +1,34 @@
+
+ Video Output Switcher Control
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ 2006 luming.yu@intel.com
+
+The output sysfs class driver provides an abstract video output layer that
+can be used to hook platform specific methods to enable/disable video output
+device through common sysfs interface. For example, on my IBM ThinkPad T42
+laptop, The ACPI video driver registered its output devices and read/write
+method for 'state' with output sysfs class. The user interface under sysfs is:
+
+linux:/sys/class/video_output # tree .
+.
+|-- CRT0
+| |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+| |-- state
+| |-- subsystem -> ../../../class/video_output
+| `-- uevent
+|-- DVI0
+| |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+| |-- state
+| |-- subsystem -> ../../../class/video_output
+| `-- uevent
+|-- LCD0
+| |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+| |-- state
+| |-- subsystem -> ../../../class/video_output
+| `-- uevent
+`-- TV0
+ |-- device -> ../../../devices/pci0000:00/0000:00:01.0
+ |-- state
+ |-- subsystem -> ../../../class/video_output
+ `-- uevent
+
diff --git a/Documentation/video4linux/4CCs.txt b/Documentation/video4linux/4CCs.txt
new file mode 100644
index 000000000..41241af1e
--- /dev/null
+++ b/Documentation/video4linux/4CCs.txt
@@ -0,0 +1,32 @@
+Guidelines for Linux4Linux pixel format 4CCs
+============================================
+
+Guidelines for Video4Linux 4CC codes defined using v4l2_fourcc() are
+specified in this document. First of the characters defines the nature of
+the pixel format, compression and colour space. The interpretation of the
+other three characters depends on the first one.
+
+Existing 4CCs may not obey these guidelines.
+
+Formats
+=======
+
+Raw bayer
+---------
+
+The following first characters are used by raw bayer formats:
+
+ B: raw bayer, uncompressed
+ b: raw bayer, DPCM compressed
+ a: A-law compressed
+ u: u-law compressed
+
+2nd character: pixel order
+ B: BGGR
+ G: GBRG
+ g: GRBG
+ R: RGGB
+
+3rd character: uncompressed bits-per-pixel 0--9, A--
+
+4th character: compressed bits-per-pixel 0--9, A--
diff --git a/Documentation/video4linux/API.html b/Documentation/video4linux/API.html
new file mode 100644
index 000000000..256f8efa9
--- /dev/null
+++ b/Documentation/video4linux/API.html
@@ -0,0 +1,27 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
+ <head>
+ <meta content="text/html;charset=ISO-8859-2" http-equiv="Content-Type" />
+ <title>V4L API</title>
+ </head>
+ <body>
+ <h1>Video For Linux APIs</h1>
+ <table border="0">
+ <tr>
+ <td>
+ <a href="http://linuxtv.org/downloads/legacy/video4linux/API/V4L1_API.html">V4L original API</a>
+ </td>
+ <td>
+ Obsoleted by V4L2 API
+ </td>
+ </tr>
+ <tr>
+ <td>
+ <a href="http://v4l2spec.bytesex.org/spec-single/v4l2.html">V4L2 API</a>
+ </td>
+ <td>Should be used for new projects
+ </td>
+ </tr>
+ </table>
+ </body>
+</html>
diff --git a/Documentation/video4linux/CARDLIST.au0828 b/Documentation/video4linux/CARDLIST.au0828
new file mode 100644
index 000000000..55a21deab
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.au0828
@@ -0,0 +1,6 @@
+ 0 -> Unknown board (au0828)
+ 1 -> Hauppauge HVR950Q (au0828) [2040:7200,2040:7210,2040:7217,2040:721b,2040:721e,2040:721f,2040:7280,0fd9:0008,2040:7260,2040:7213,2040:7270]
+ 2 -> Hauppauge HVR850 (au0828) [2040:7240]
+ 3 -> DViCO FusionHDTV USB (au0828) [0fe9:d620]
+ 4 -> Hauppauge HVR950Q rev xxF8 (au0828) [2040:7201,2040:7211,2040:7281]
+ 5 -> Hauppauge Woodbury (au0828) [05e1:0480,2040:8200]
diff --git a/Documentation/video4linux/CARDLIST.bttv b/Documentation/video4linux/CARDLIST.bttv
new file mode 100644
index 000000000..b092c0a14
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.bttv
@@ -0,0 +1,167 @@
+ 0 -> *** UNKNOWN/GENERIC ***
+ 1 -> MIRO PCTV
+ 2 -> Hauppauge (bt848)
+ 3 -> STB, Gateway P/N 6000699 (bt848)
+ 4 -> Intel Create and Share PCI/ Smart Video Recorder III
+ 5 -> Diamond DTV2000
+ 6 -> AVerMedia TVPhone
+ 7 -> MATRIX-Vision MV-Delta
+ 8 -> Lifeview FlyVideo II (Bt848) LR26 / MAXI TV Video PCI2 LR26
+ 9 -> IMS/IXmicro TurboTV
+ 10 -> Hauppauge (bt878) [0070:13eb,0070:3900,2636:10b4]
+ 11 -> MIRO PCTV pro
+ 12 -> ADS Technologies Channel Surfer TV (bt848)
+ 13 -> AVerMedia TVCapture 98 [1461:0002,1461:0004,1461:0300]
+ 14 -> Aimslab Video Highway Xtreme (VHX)
+ 15 -> Zoltrix TV-Max [a1a0:a0fc]
+ 16 -> Prolink Pixelview PlayTV (bt878)
+ 17 -> Leadtek WinView 601
+ 18 -> AVEC Intercapture
+ 19 -> Lifeview FlyVideo II EZ /FlyKit LR38 Bt848 (capture only)
+ 20 -> CEI Raffles Card
+ 21 -> Lifeview FlyVideo 98/ Lucky Star Image World ConferenceTV LR50
+ 22 -> Askey CPH050/ Phoebe Tv Master + FM [14ff:3002]
+ 23 -> Modular Technology MM201/MM202/MM205/MM210/MM215 PCTV, bt878 [14c7:0101]
+ 24 -> Askey CPH05X/06X (bt878) [many vendors] [144f:3002,144f:3005,144f:5000,14ff:3000]
+ 25 -> Terratec TerraTV+ Version 1.0 (Bt848)/ Terra TValue Version 1.0/ Vobis TV-Boostar
+ 26 -> Hauppauge WinCam newer (bt878)
+ 27 -> Lifeview FlyVideo 98/ MAXI TV Video PCI2 LR50
+ 28 -> Terratec TerraTV+ Version 1.1 (bt878) [153b:1127,1852:1852]
+ 29 -> Imagenation PXC200 [1295:200a]
+ 30 -> Lifeview FlyVideo 98 LR50 [1f7f:1850]
+ 31 -> Formac iProTV, Formac ProTV I (bt848)
+ 32 -> Intel Create and Share PCI/ Smart Video Recorder III
+ 33 -> Terratec TerraTValue Version Bt878 [153b:1117,153b:1118,153b:1119,153b:111a,153b:1134,153b:5018]
+ 34 -> Leadtek WinFast 2000/ WinFast 2000 XP [107d:6606,107d:6609,6606:217d,f6ff:fff6]
+ 35 -> Lifeview FlyVideo 98 LR50 / Chronos Video Shuttle II [1851:1850,1851:a050]
+ 36 -> Lifeview FlyVideo 98FM LR50 / Typhoon TView TV/FM Tuner [1852:1852]
+ 37 -> Prolink PixelView PlayTV pro
+ 38 -> Askey CPH06X TView99 [144f:3000,144f:a005,a04f:a0fc]
+ 39 -> Pinnacle PCTV Studio/Rave [11bd:0012,bd11:1200,bd11:ff00,11bd:ff12]
+ 40 -> STB TV PCI FM, Gateway P/N 6000704 (bt878), 3Dfx VoodooTV 100 [10b4:2636,10b4:2645,121a:3060]
+ 41 -> AVerMedia TVPhone 98 [1461:0001,1461:0003]
+ 42 -> ProVideo PV951 [aa0c:146c]
+ 43 -> Little OnAir TV
+ 44 -> Sigma TVII-FM
+ 45 -> MATRIX-Vision MV-Delta 2
+ 46 -> Zoltrix Genie TV/FM [15b0:4000,15b0:400a,15b0:400d,15b0:4010,15b0:4016]
+ 47 -> Terratec TV/Radio+ [153b:1123]
+ 48 -> Askey CPH03x/ Dynalink Magic TView
+ 49 -> IODATA GV-BCTV3/PCI [10fc:4020]
+ 50 -> Prolink PV-BT878P+4E / PixelView PlayTV PAK / Lenco MXTV-9578 CP
+ 51 -> Eagle Wireless Capricorn2 (bt878A)
+ 52 -> Pinnacle PCTV Studio Pro
+ 53 -> Typhoon TView RDS + FM Stereo / KNC1 TV Station RDS
+ 54 -> Lifeview FlyVideo 2000 /FlyVideo A2/ Lifetec LT 9415 TV [LR90]
+ 55 -> Askey CPH031/ BESTBUY Easy TV
+ 56 -> Lifeview FlyVideo 98FM LR50 [a051:41a0]
+ 57 -> GrandTec 'Grand Video Capture' (Bt848) [4344:4142]
+ 58 -> Askey CPH060/ Phoebe TV Master Only (No FM)
+ 59 -> Askey CPH03x TV Capturer
+ 60 -> Modular Technology MM100PCTV
+ 61 -> AG Electronics GMV1 [15cb:0101]
+ 62 -> Askey CPH061/ BESTBUY Easy TV (bt878)
+ 63 -> ATI TV-Wonder [1002:0001]
+ 64 -> ATI TV-Wonder VE [1002:0003]
+ 65 -> Lifeview FlyVideo 2000S LR90
+ 66 -> Terratec TValueRadio [153b:1135,153b:ff3b]
+ 67 -> IODATA GV-BCTV4/PCI [10fc:4050]
+ 68 -> 3Dfx VoodooTV FM (Euro) [10b4:2637]
+ 69 -> Active Imaging AIMMS
+ 70 -> Prolink Pixelview PV-BT878P+ (Rev.4C,8E)
+ 71 -> Lifeview FlyVideo 98EZ (capture only) LR51 [1851:1851]
+ 72 -> Prolink Pixelview PV-BT878P+9B (PlayTV Pro rev.9B FM+NICAM) [1554:4011]
+ 73 -> Sensoray 311/611 [6000:0311,6000:0611]
+ 74 -> RemoteVision MX (RV605)
+ 75 -> Powercolor MTV878/ MTV878R/ MTV878F
+ 76 -> Canopus WinDVR PCI (COMPAQ Presario 3524JP, 5112JP) [0e11:0079]
+ 77 -> GrandTec Multi Capture Card (Bt878)
+ 78 -> Jetway TV/Capture JW-TV878-FBK, Kworld KW-TV878RF [0a01:17de]
+ 79 -> DSP Design TCVIDEO
+ 80 -> Hauppauge WinTV PVR [0070:4500]
+ 81 -> IODATA GV-BCTV5/PCI [10fc:4070,10fc:d018]
+ 82 -> Osprey 100/150 (878) [0070:ff00]
+ 83 -> Osprey 100/150 (848)
+ 84 -> Osprey 101 (848)
+ 85 -> Osprey 101/151
+ 86 -> Osprey 101/151 w/ svid
+ 87 -> Osprey 200/201/250/251
+ 88 -> Osprey 200/250 [0070:ff01]
+ 89 -> Osprey 210/220/230
+ 90 -> Osprey 500 [0070:ff02]
+ 91 -> Osprey 540 [0070:ff04]
+ 92 -> Osprey 2000 [0070:ff03]
+ 93 -> IDS Eagle
+ 94 -> Pinnacle PCTV Sat [11bd:001c]
+ 95 -> Formac ProTV II (bt878)
+ 96 -> MachTV
+ 97 -> Euresys Picolo
+ 98 -> ProVideo PV150 [aa00:1460,aa01:1461,aa02:1462,aa03:1463,aa04:1464,aa05:1465,aa06:1466,aa07:1467]
+ 99 -> AD-TVK503
+100 -> Hercules Smart TV Stereo
+101 -> Pace TV & Radio Card
+102 -> IVC-200 [0000:a155,0001:a155,0002:a155,0003:a155,0100:a155,0101:a155,0102:a155,0103:a155,0800:a155,0801:a155,0802:a155,0803:a155]
+103 -> Grand X-Guard / Trust 814PCI [0304:0102]
+104 -> Nebula Electronics DigiTV [0071:0101]
+105 -> ProVideo PV143 [aa00:1430,aa00:1431,aa00:1432,aa00:1433,aa03:1433]
+106 -> PHYTEC VD-009-X1 VD-011 MiniDIN (bt878)
+107 -> PHYTEC VD-009-X1 VD-011 Combi (bt878)
+108 -> PHYTEC VD-009 MiniDIN (bt878)
+109 -> PHYTEC VD-009 Combi (bt878)
+110 -> IVC-100 [ff00:a132]
+111 -> IVC-120G [ff00:a182,ff01:a182,ff02:a182,ff03:a182,ff04:a182,ff05:a182,ff06:a182,ff07:a182,ff08:a182,ff09:a182,ff0a:a182,ff0b:a182,ff0c:a182,ff0d:a182,ff0e:a182,ff0f:a182]
+112 -> pcHDTV HD-2000 TV [7063:2000]
+113 -> Twinhan DST + clones [11bd:0026,1822:0001,270f:fc00,1822:0026]
+114 -> Winfast VC100 [107d:6607]
+115 -> Teppro TEV-560/InterVision IV-560
+116 -> SIMUS GVC1100 [aa6a:82b2]
+117 -> NGS NGSTV+
+118 -> LMLBT4
+119 -> Tekram M205 PRO
+120 -> Conceptronic CONTVFMi
+121 -> Euresys Picolo Tetra [1805:0105,1805:0106,1805:0107,1805:0108]
+122 -> Spirit TV Tuner
+123 -> AVerMedia AVerTV DVB-T 771 [1461:0771]
+124 -> AverMedia AverTV DVB-T 761 [1461:0761]
+125 -> MATRIX Vision Sigma-SQ
+126 -> MATRIX Vision Sigma-SLC
+127 -> APAC Viewcomp 878(AMAX)
+128 -> DViCO FusionHDTV DVB-T Lite [18ac:db10,18ac:db11]
+129 -> V-Gear MyVCD
+130 -> Super TV Tuner
+131 -> Tibet Systems 'Progress DVR' CS16
+132 -> Kodicom 4400R (master)
+133 -> Kodicom 4400R (slave)
+134 -> Adlink RTV24
+135 -> DViCO FusionHDTV 5 Lite [18ac:d500]
+136 -> Acorp Y878F [9511:1540]
+137 -> Conceptronic CTVFMi v2 [036e:109e]
+138 -> Prolink Pixelview PV-BT878P+ (Rev.2E)
+139 -> Prolink PixelView PlayTV MPEG2 PV-M4900
+140 -> Osprey 440 [0070:ff07]
+141 -> Asound Skyeye PCTV
+142 -> Sabrent TV-FM (bttv version)
+143 -> Hauppauge ImpactVCB (bt878) [0070:13eb]
+144 -> MagicTV
+145 -> SSAI Security Video Interface [4149:5353]
+146 -> SSAI Ultrasound Video Interface [414a:5353]
+147 -> VoodooTV 200 (USA) [121a:3000]
+148 -> DViCO FusionHDTV 2 [dbc0:d200]
+149 -> Typhoon TV-Tuner PCI (50684)
+150 -> Geovision GV-600 [008a:763c]
+151 -> Kozumi KTV-01C
+152 -> Encore ENL TV-FM-2 [1000:1801]
+153 -> PHYTEC VD-012 (bt878)
+154 -> PHYTEC VD-012-X1 (bt878)
+155 -> PHYTEC VD-012-X2 (bt878)
+156 -> IVCE-8784 [0000:f050,0001:f050,0002:f050,0003:f050]
+157 -> Geovision GV-800(S) (master) [800a:763d]
+158 -> Geovision GV-800(S) (slave) [800b:763d,800c:763d,800d:763d]
+159 -> ProVideo PV183 [1830:1540,1831:1540,1832:1540,1833:1540,1834:1540,1835:1540,1836:1540,1837:1540]
+160 -> Tongwei Video Technology TD-3116 [f200:3116]
+161 -> Aposonic W-DVR [0279:0228]
+162 -> Adlink MPG24
+163 -> Bt848 Capture 14MHz
+164 -> CyberVision CV06 (SV)
+165 -> Kworld V-Stream Xpert TV PVR878
+166 -> PCI-8604PW
diff --git a/Documentation/video4linux/CARDLIST.cx23885 b/Documentation/video4linux/CARDLIST.cx23885
new file mode 100644
index 000000000..4c84ec853
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.cx23885
@@ -0,0 +1,47 @@
+ 0 -> UNKNOWN/GENERIC [0070:3400]
+ 1 -> Hauppauge WinTV-HVR1800lp [0070:7600]
+ 2 -> Hauppauge WinTV-HVR1800 [0070:7800,0070:7801,0070:7809]
+ 3 -> Hauppauge WinTV-HVR1250 [0070:7911]
+ 4 -> DViCO FusionHDTV5 Express [18ac:d500]
+ 5 -> Hauppauge WinTV-HVR1500Q [0070:7790,0070:7797]
+ 6 -> Hauppauge WinTV-HVR1500 [0070:7710,0070:7717]
+ 7 -> Hauppauge WinTV-HVR1200 [0070:71d1,0070:71d3]
+ 8 -> Hauppauge WinTV-HVR1700 [0070:8101]
+ 9 -> Hauppauge WinTV-HVR1400 [0070:8010]
+ 10 -> DViCO FusionHDTV7 Dual Express [18ac:d618]
+ 11 -> DViCO FusionHDTV DVB-T Dual Express [18ac:db78]
+ 12 -> Leadtek Winfast PxDVR3200 H [107d:6681]
+ 13 -> Compro VideoMate E650F [185b:e800]
+ 14 -> TurboSight TBS 6920 [6920:8888]
+ 15 -> TeVii S470 [d470:9022]
+ 16 -> DVBWorld DVB-S2 2005 [0001:2005]
+ 17 -> NetUP Dual DVB-S2 CI [1b55:2a2c]
+ 18 -> Hauppauge WinTV-HVR1270 [0070:2211]
+ 19 -> Hauppauge WinTV-HVR1275 [0070:2215,0070:221d,0070:22f2]
+ 20 -> Hauppauge WinTV-HVR1255 [0070:2251,0070:22f1]
+ 21 -> Hauppauge WinTV-HVR1210 [0070:2291,0070:2295,0070:2299,0070:229d,0070:22f0,0070:22f3,0070:22f4,0070:22f5]
+ 22 -> Mygica X8506 DMB-TH [14f1:8651]
+ 23 -> Magic-Pro ProHDTV Extreme 2 [14f1:8657]
+ 24 -> Hauppauge WinTV-HVR1850 [0070:8541]
+ 25 -> Compro VideoMate E800 [1858:e800]
+ 26 -> Hauppauge WinTV-HVR1290 [0070:8551]
+ 27 -> Mygica X8558 PRO DMB-TH [14f1:8578]
+ 28 -> LEADTEK WinFast PxTV1200 [107d:6f22]
+ 29 -> GoTView X5 3D Hybrid [5654:2390]
+ 30 -> NetUP Dual DVB-T/C-CI RF [1b55:e2e4]
+ 31 -> Leadtek Winfast PxDVR3200 H XC4000 [107d:6f39]
+ 32 -> MPX-885
+ 33 -> Mygica X8502/X8507 ISDB-T [14f1:8502]
+ 34 -> TerraTec Cinergy T PCIe Dual [153b:117e]
+ 35 -> TeVii S471 [d471:9022]
+ 36 -> Hauppauge WinTV-HVR1255 [0070:2259]
+ 37 -> Prof Revolution DVB-S2 8000 [8000:3034]
+ 38 -> Hauppauge WinTV-HVR4400 [0070:c108,0070:c138,0070:c12a,0070:c1f8]
+ 39 -> AVerTV Hybrid Express Slim HC81R [1461:d939]
+ 40 -> TurboSight TBS 6981 [6981:8888]
+ 41 -> TurboSight TBS 6980 [6980:8888]
+ 42 -> Leadtek Winfast PxPVR2200 [107d:6f21]
+ 43 -> Hauppauge ImpactVCB-e [0070:7133]
+ 44 -> DViCO FusionHDTV DVB-T Dual Express2 [18ac:db98]
+ 45 -> DVBSky T9580 [4254:9580]
+ 46 -> DVBSky T980C [4254:980c]
diff --git a/Documentation/video4linux/CARDLIST.cx88 b/Documentation/video4linux/CARDLIST.cx88
new file mode 100644
index 000000000..fa4b3f947
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.cx88
@@ -0,0 +1,91 @@
+ 0 -> UNKNOWN/GENERIC
+ 1 -> Hauppauge WinTV 34xxx models [0070:3400,0070:3401]
+ 2 -> GDI Black Gold [14c7:0106,14c7:0107]
+ 3 -> PixelView [1554:4811]
+ 4 -> ATI TV Wonder Pro [1002:00f8,1002:00f9]
+ 5 -> Leadtek Winfast 2000XP Expert [107d:6611,107d:6613]
+ 6 -> AverTV Studio 303 (M126) [1461:000b]
+ 7 -> MSI TV-@nywhere Master [1462:8606]
+ 8 -> Leadtek Winfast DV2000 [107d:6620,107d:6621]
+ 9 -> Leadtek PVR 2000 [107d:663b,107d:663c,107d:6632,107d:6630,107d:6638,107d:6631,107d:6637,107d:663d]
+ 10 -> IODATA GV-VCP3/PCI [10fc:d003]
+ 11 -> Prolink PlayTV PVR
+ 12 -> ASUS PVR-416 [1043:4823,1461:c111]
+ 13 -> MSI TV-@nywhere
+ 14 -> KWorld/VStream XPert DVB-T [17de:08a6]
+ 15 -> DViCO FusionHDTV DVB-T1 [18ac:db00]
+ 16 -> KWorld LTV883RF
+ 17 -> DViCO FusionHDTV 3 Gold-Q [18ac:d810,18ac:d800]
+ 18 -> Hauppauge Nova-T DVB-T [0070:9002,0070:9001,0070:9000]
+ 19 -> Conexant DVB-T reference design [14f1:0187]
+ 20 -> Provideo PV259 [1540:2580]
+ 21 -> DViCO FusionHDTV DVB-T Plus [18ac:db10,18ac:db11]
+ 22 -> pcHDTV HD3000 HDTV [7063:3000]
+ 23 -> digitalnow DNTV Live! DVB-T [17de:a8a6]
+ 24 -> Hauppauge WinTV 28xxx (Roslyn) models [0070:2801]
+ 25 -> Digital-Logic MICROSPACE Entertainment Center (MEC) [14f1:0342]
+ 26 -> IODATA GV/BCTV7E [10fc:d035]
+ 27 -> PixelView PlayTV Ultra Pro (Stereo)
+ 28 -> DViCO FusionHDTV 3 Gold-T [18ac:d820]
+ 29 -> ADS Tech Instant TV DVB-T PCI [1421:0334]
+ 30 -> TerraTec Cinergy 1400 DVB-T [153b:1166]
+ 31 -> DViCO FusionHDTV 5 Gold [18ac:d500]
+ 32 -> AverMedia UltraTV Media Center PCI 550 [1461:8011]
+ 33 -> Kworld V-Stream Xpert DVD
+ 34 -> ATI HDTV Wonder [1002:a101]
+ 35 -> WinFast DTV1000-T [107d:665f]
+ 36 -> AVerTV 303 (M126) [1461:000a]
+ 37 -> Hauppauge Nova-S-Plus DVB-S [0070:9201,0070:9202]
+ 38 -> Hauppauge Nova-SE2 DVB-S [0070:9200]
+ 39 -> KWorld DVB-S 100 [17de:08b2,1421:0341]
+ 40 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid [0070:9400,0070:9402]
+ 41 -> Hauppauge WinTV-HVR1100 DVB-T/Hybrid (Low Profile) [0070:9800,0070:9802]
+ 42 -> digitalnow DNTV Live! DVB-T Pro [1822:0025,1822:0019]
+ 43 -> KWorld/VStream XPert DVB-T with cx22702 [17de:08a1,12ab:2300]
+ 44 -> DViCO FusionHDTV DVB-T Dual Digital [18ac:db50,18ac:db54]
+ 45 -> KWorld HardwareMpegTV XPert [17de:0840,1421:0305]
+ 46 -> DViCO FusionHDTV DVB-T Hybrid [18ac:db40,18ac:db44]
+ 47 -> pcHDTV HD5500 HDTV [7063:5500]
+ 48 -> Kworld MCE 200 Deluxe [17de:0841]
+ 49 -> PixelView PlayTV P7000 [1554:4813]
+ 50 -> NPG Tech Real TV FM Top 10 [14f1:0842]
+ 51 -> WinFast DTV2000 H [107d:665e]
+ 52 -> Geniatech DVB-S [14f1:0084]
+ 53 -> Hauppauge WinTV-HVR3000 TriMode Analog/DVB-S/DVB-T [0070:1404,0070:1400,0070:1401,0070:1402]
+ 54 -> Norwood Micro TV Tuner
+ 55 -> Shenzhen Tungsten Ages Tech TE-DTV-250 / Swann OEM [c180:c980]
+ 56 -> Hauppauge WinTV-HVR1300 DVB-T/Hybrid MPEG Encoder [0070:9600,0070:9601,0070:9602]
+ 57 -> ADS Tech Instant Video PCI [1421:0390]
+ 58 -> Pinnacle PCTV HD 800i [11bd:0051]
+ 59 -> DViCO FusionHDTV 5 PCI nano [18ac:d530]
+ 60 -> Pinnacle Hybrid PCTV [12ab:1788]
+ 61 -> Leadtek TV2000 XP Global [107d:6f18,107d:6618,107d:6619]
+ 62 -> PowerColor RA330 [14f1:ea3d]
+ 63 -> Geniatech X8000-MT DVBT [14f1:8852]
+ 64 -> DViCO FusionHDTV DVB-T PRO [18ac:db30]
+ 65 -> DViCO FusionHDTV 7 Gold [18ac:d610]
+ 66 -> Prolink Pixelview MPEG 8000GT [1554:4935]
+ 67 -> Kworld PlusTV HD PCI 120 (ATSC 120) [17de:08c1]
+ 68 -> Hauppauge WinTV-HVR4000 DVB-S/S2/T/Hybrid [0070:6900,0070:6904,0070:6902]
+ 69 -> Hauppauge WinTV-HVR4000(Lite) DVB-S/S2 [0070:6905,0070:6906]
+ 70 -> TeVii S460 DVB-S/S2 [d460:9022]
+ 71 -> Omicom SS4 DVB-S/S2 PCI [A044:2011]
+ 72 -> TBS 8920 DVB-S/S2 [8920:8888]
+ 73 -> TeVii S420 DVB-S [d420:9022]
+ 74 -> Prolink Pixelview Global Extreme [1554:4976]
+ 75 -> PROF 7300 DVB-S/S2 [B033:3033]
+ 76 -> SATTRADE ST4200 DVB-S/S2 [b200:4200]
+ 77 -> TBS 8910 DVB-S [8910:8888]
+ 78 -> Prof 6200 DVB-S [b022:3022]
+ 79 -> Terratec Cinergy HT PCI MKII [153b:1177]
+ 80 -> Hauppauge WinTV-IR Only [0070:9290]
+ 81 -> Leadtek WinFast DTV1800 Hybrid [107d:6654]
+ 82 -> WinFast DTV2000 H rev. J [107d:6f2b]
+ 83 -> Prof 7301 DVB-S/S2 [b034:3034]
+ 84 -> Samsung SMT 7020 DVB-S [18ac:dc00,18ac:dccd]
+ 85 -> Twinhan VP-1027 DVB-S [1822:0023]
+ 86 -> TeVii S464 DVB-S/S2 [d464:9022]
+ 87 -> Leadtek WinFast DTV2000 H PLUS [107d:6f42]
+ 88 -> Leadtek WinFast DTV1800 H (XC4000) [107d:6f38]
+ 89 -> Leadtek TV2000 XP Global (SC4100) [107d:6f36]
+ 90 -> Leadtek TV2000 XP Global (XC4100) [107d:6f43]
diff --git a/Documentation/video4linux/CARDLIST.em28xx b/Documentation/video4linux/CARDLIST.em28xx
new file mode 100644
index 000000000..3700edb81
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.em28xx
@@ -0,0 +1,96 @@
+ 0 -> Unknown EM2800 video grabber (em2800) [eb1a:2800]
+ 1 -> Unknown EM2750/28xx video grabber (em2820/em2840) [eb1a:2710,eb1a:2820,eb1a:2821,eb1a:2860,eb1a:2861,eb1a:2862,eb1a:2863,eb1a:2870,eb1a:2881,eb1a:2883,eb1a:2868,eb1a:2875]
+ 2 -> Terratec Cinergy 250 USB (em2820/em2840) [0ccd:0036]
+ 3 -> Pinnacle PCTV USB 2 (em2820/em2840) [2304:0208]
+ 4 -> Hauppauge WinTV USB 2 (em2820/em2840) [2040:4200,2040:4201]
+ 5 -> MSI VOX USB 2.0 (em2820/em2840)
+ 6 -> Terratec Cinergy 200 USB (em2800)
+ 7 -> Leadtek Winfast USB II (em2800) [0413:6023]
+ 8 -> Kworld USB2800 (em2800)
+ 9 -> Pinnacle Dazzle DVC 90/100/101/107 / Kaiser Baas Video to DVD maker (em2820/em2840) [1b80:e302,1b80:e304,2304:0207,2304:021a,093b:a003]
+ 10 -> Hauppauge WinTV HVR 900 (em2880) [2040:6500]
+ 11 -> Terratec Hybrid XS (em2880)
+ 12 -> Kworld PVR TV 2800 RF (em2820/em2840)
+ 13 -> Terratec Prodigy XS (em2880)
+ 14 -> SIIG AVTuner-PVR / Pixelview Prolink PlayTV USB 2.0 (em2820/em2840)
+ 15 -> V-Gear PocketTV (em2800)
+ 16 -> Hauppauge WinTV HVR 950 (em2883) [2040:6513,2040:6517,2040:651b]
+ 17 -> Pinnacle PCTV HD Pro Stick (em2880) [2304:0227]
+ 18 -> Hauppauge WinTV HVR 900 (R2) (em2880) [2040:6502]
+ 19 -> EM2860/SAA711X Reference Design (em2860)
+ 20 -> AMD ATI TV Wonder HD 600 (em2880) [0438:b002]
+ 21 -> eMPIA Technology, Inc. GrabBeeX+ Video Encoder (em2800) [eb1a:2801]
+ 22 -> EM2710/EM2750/EM2751 webcam grabber (em2750) [eb1a:2750,eb1a:2751]
+ 23 -> Huaqi DLCW-130 (em2750)
+ 24 -> D-Link DUB-T210 TV Tuner (em2820/em2840) [2001:f112]
+ 25 -> Gadmei UTV310 (em2820/em2840)
+ 26 -> Hercules Smart TV USB 2.0 (em2820/em2840)
+ 27 -> Pinnacle PCTV USB 2 (Philips FM1216ME) (em2820/em2840)
+ 28 -> Leadtek Winfast USB II Deluxe (em2820/em2840)
+ 29 -> EM2860/TVP5150 Reference Design (em2860)
+ 30 -> Videology 20K14XUSB USB2.0 (em2820/em2840)
+ 31 -> Usbgear VD204v9 (em2821)
+ 32 -> Supercomp USB 2.0 TV (em2821)
+ 33 -> Elgato Video Capture (em2860) [0fd9:0033]
+ 34 -> Terratec Cinergy A Hybrid XS (em2860) [0ccd:004f]
+ 35 -> Typhoon DVD Maker (em2860)
+ 36 -> NetGMBH Cam (em2860)
+ 37 -> Gadmei UTV330 (em2860) [eb1a:50a6]
+ 38 -> Yakumo MovieMixer (em2861)
+ 39 -> KWorld PVRTV 300U (em2861) [eb1a:e300]
+ 40 -> Plextor ConvertX PX-TV100U (em2861) [093b:a005]
+ 41 -> Kworld 350 U DVB-T (em2870) [eb1a:e350]
+ 42 -> Kworld 355 U DVB-T (em2870) [eb1a:e355,eb1a:e357,eb1a:e359]
+ 43 -> Terratec Cinergy T XS (em2870) [0ccd:0043]
+ 44 -> Terratec Cinergy T XS (MT2060) (em2870)
+ 45 -> Pinnacle PCTV DVB-T (em2870)
+ 46 -> Compro, VideoMate U3 (em2870) [185b:2870]
+ 47 -> KWorld DVB-T 305U (em2880) [eb1a:e305]
+ 48 -> KWorld DVB-T 310U (em2880)
+ 49 -> MSI DigiVox A/D (em2880) [eb1a:e310]
+ 50 -> MSI DigiVox A/D II (em2880) [eb1a:e320]
+ 51 -> Terratec Hybrid XS Secam (em2880) [0ccd:004c]
+ 52 -> DNT DA2 Hybrid (em2881)
+ 53 -> Pinnacle Hybrid Pro (em2881)
+ 54 -> Kworld VS-DVB-T 323UR (em2882) [eb1a:e323]
+ 55 -> Terratec Cinnergy Hybrid T USB XS (em2882) (em2882) [0ccd:005e,0ccd:0042]
+ 56 -> Pinnacle Hybrid Pro (330e) (em2882) [2304:0226]
+ 57 -> Kworld PlusTV HD Hybrid 330 (em2883) [eb1a:a316]
+ 58 -> Compro VideoMate ForYou/Stereo (em2820/em2840) [185b:2041]
+ 59 -> Pinnacle PCTV HD Mini (em2874) [2304:023f]
+ 60 -> Hauppauge WinTV HVR 850 (em2883) [2040:651f]
+ 61 -> Pixelview PlayTV Box 4 USB 2.0 (em2820/em2840)
+ 62 -> Gadmei TVR200 (em2820/em2840)
+ 63 -> Kaiomy TVnPC U2 (em2860) [eb1a:e303]
+ 64 -> Easy Cap Capture DC-60 (em2860) [1b80:e309]
+ 65 -> IO-DATA GV-MVP/SZ (em2820/em2840) [04bb:0515]
+ 66 -> Empire dual TV (em2880)
+ 67 -> Terratec Grabby (em2860) [0ccd:0096,0ccd:10AF]
+ 68 -> Terratec AV350 (em2860) [0ccd:0084]
+ 69 -> KWorld ATSC 315U HDTV TV Box (em2882) [eb1a:a313]
+ 70 -> Evga inDtube (em2882)
+ 71 -> Silvercrest Webcam 1.3mpix (em2820/em2840)
+ 72 -> Gadmei UTV330+ (em2861)
+ 73 -> Reddo DVB-C USB TV Box (em2870)
+ 74 -> Actionmaster/LinXcel/Digitus VC211A (em2800)
+ 75 -> Dikom DK300 (em2882)
+ 76 -> KWorld PlusTV 340U or UB435-Q (ATSC) (em2870) [1b80:a340]
+ 77 -> EM2874 Leadership ISDBT (em2874)
+ 78 -> PCTV nanoStick T2 290e (em28174)
+ 79 -> Terratec Cinergy H5 (em2884) [eb1a:2885,0ccd:10a2,0ccd:10ad,0ccd:10b6]
+ 80 -> PCTV DVB-S2 Stick (460e) (em28174)
+ 81 -> Hauppauge WinTV HVR 930C (em2884) [2040:1605]
+ 82 -> Terratec Cinergy HTC Stick (em2884) [0ccd:00b2]
+ 83 -> Honestech Vidbox NW03 (em2860) [eb1a:5006]
+ 84 -> MaxMedia UB425-TC (em2874) [1b80:e425]
+ 85 -> PCTV QuatroStick (510e) (em2884) [2304:0242]
+ 86 -> PCTV QuatroStick nano (520e) (em2884) [2013:0251]
+ 87 -> Terratec Cinergy HTC USB XS (em2884) [0ccd:008e,0ccd:00ac]
+ 88 -> C3 Tech Digital Duo HDTV/SDTV USB (em2884) [1b80:e755]
+ 89 -> Delock 61959 (em2874) [1b80:e1cc]
+ 90 -> KWorld USB ATSC TV Stick UB435-Q V2 (em2874) [1b80:e346]
+ 91 -> SpeedLink Vicious And Devine Laplace webcam (em2765) [1ae7:9003,1ae7:9004]
+ 92 -> PCTV DVB-S2 Stick (461e) (em28178)
+ 93 -> KWorld USB ATSC TV Stick UB435-Q V3 (em2874) [1b80:e34c]
+ 94 -> PCTV tripleStick (292e) (em28178)
+ 95 -> Leadtek VC100 (em2861) [0413:6f07]
diff --git a/Documentation/video4linux/CARDLIST.ivtv b/Documentation/video4linux/CARDLIST.ivtv
new file mode 100644
index 000000000..a019e27e4
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.ivtv
@@ -0,0 +1,24 @@
+ 1 -> Hauppauge WinTV PVR-250
+ 2 -> Hauppauge WinTV PVR-350
+ 3 -> Hauppauge WinTV PVR-150 or PVR-500
+ 4 -> AVerMedia M179 [1461:a3ce,1461:a3cf]
+ 5 -> Yuan MPG600/Kuroutoshikou iTVC16-STVLP [12ab:fff3,12ab:ffff]
+ 6 -> Yuan MPG160/Kuroutoshikou iTVC15-STVLP [12ab:0000,10fc:40a0]
+ 7 -> Yuan PG600/DiamondMM PVR-550 [ff92:0070,ffab:0600]
+ 8 -> Adaptec AVC-2410 [9005:0093]
+ 9 -> Adaptec AVC-2010 [9005:0092]
+10 -> NAGASE TRANSGEAR 5000TV [1461:bfff]
+11 -> AOpen VA2000MAX-STN6 [0000:ff5f]
+12 -> YUAN MPG600GR/Kuroutoshikou CX23416GYC-STVLP [12ab:0600,fbab:0600,1154:0523]
+13 -> I/O Data GV-MVP/RX [10fc:d01e,10fc:d038,10fc:d039]
+14 -> I/O Data GV-MVP/RX2E [10fc:d025]
+15 -> GOTVIEW PCI DVD (partial support only) [12ab:0600]
+16 -> GOTVIEW PCI DVD2 Deluxe [ffac:0600]
+17 -> Yuan MPC622 [ff01:d998]
+18 -> Digital Cowboy DCT-MTVP1 [1461:bfff]
+19 -> Yuan PG600V2/GotView PCI DVD Lite [ffab:0600,ffad:0600]
+20 -> Club3D ZAP-TV1x01 [ffab:0600]
+21 -> AverTV MCE 116 Plus [1461:c439]
+22 -> ASUS Falcon2 [1043:4b66,1043:462e,1043:4b2e]
+23 -> AverMedia PVR-150 Plus [1461:c035]
+24 -> AverMedia EZMaker PCI Deluxe [1461:c03f]
diff --git a/Documentation/video4linux/CARDLIST.saa7134 b/Documentation/video4linux/CARDLIST.saa7134
new file mode 100644
index 000000000..a93d86455
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.saa7134
@@ -0,0 +1,194 @@
+ 0 -> UNKNOWN/GENERIC
+ 1 -> Proteus Pro [philips reference design] [1131:2001,1131:2001]
+ 2 -> LifeView FlyVIDEO3000 [5168:0138,4e42:0138]
+ 3 -> LifeView/Typhoon FlyVIDEO2000 [5168:0138,4e42:0138]
+ 4 -> EMPRESS [1131:6752]
+ 5 -> SKNet Monster TV [1131:4e85]
+ 6 -> Tevion MD 9717
+ 7 -> KNC One TV-Station RDS / Typhoon TV Tuner RDS [1131:fe01,1894:fe01]
+ 8 -> Terratec Cinergy 400 TV [153b:1142]
+ 9 -> Medion 5044
+ 10 -> Kworld/KuroutoShikou SAA7130-TVPCI
+ 11 -> Terratec Cinergy 600 TV [153b:1143]
+ 12 -> Medion 7134 [16be:0003,16be:5000]
+ 13 -> Typhoon TV+Radio 90031
+ 14 -> ELSA EX-VISION 300TV [1048:226b]
+ 15 -> ELSA EX-VISION 500TV [1048:226a]
+ 16 -> ASUS TV-FM 7134 [1043:4842,1043:4830,1043:4840]
+ 17 -> AOPEN VA1000 POWER [1131:7133]
+ 18 -> BMK MPEX No Tuner
+ 19 -> Compro VideoMate TV [185b:c100]
+ 20 -> Matrox CronosPlus [102B:48d0]
+ 21 -> 10MOONS PCI TV CAPTURE CARD [1131:2001]
+ 22 -> AverMedia M156 / Medion 2819 [1461:a70b]
+ 23 -> BMK MPEX Tuner
+ 24 -> KNC One TV-Station DVR [1894:a006]
+ 25 -> ASUS TV-FM 7133 [1043:4843]
+ 26 -> Pinnacle PCTV Stereo (saa7134) [11bd:002b]
+ 27 -> Manli MuchTV M-TV002
+ 28 -> Manli MuchTV M-TV001
+ 29 -> Nagase Sangyo TransGear 3000TV [1461:050c]
+ 30 -> Elitegroup ECS TVP3XP FM1216 Tuner Card(PAL-BG,FM) [1019:4cb4]
+ 31 -> Elitegroup ECS TVP3XP FM1236 Tuner Card (NTSC,FM) [1019:4cb5]
+ 32 -> AVACS SmartTV
+ 33 -> AVerMedia DVD EZMaker [1461:10ff]
+ 34 -> Noval Prime TV 7133
+ 35 -> AverMedia AverTV Studio 305 [1461:2115]
+ 36 -> UPMOST PURPLE TV [12ab:0800]
+ 37 -> Items MuchTV Plus / IT-005
+ 38 -> Terratec Cinergy 200 TV [153b:1152]
+ 39 -> LifeView FlyTV Platinum Mini [5168:0212,4e42:0212,5169:1502]
+ 40 -> Compro VideoMate TV PVR/FM [185b:c100]
+ 41 -> Compro VideoMate TV Gold+ [185b:c100]
+ 42 -> Sabrent SBT-TVFM (saa7130)
+ 43 -> :Zolid Xpert TV7134
+ 44 -> Empire PCI TV-Radio LE
+ 45 -> Avermedia AVerTV Studio 307 [1461:9715]
+ 46 -> AVerMedia Cardbus TV/Radio (E500) [1461:d6ee]
+ 47 -> Terratec Cinergy 400 mobile [153b:1162]
+ 48 -> Terratec Cinergy 600 TV MK3 [153b:1158]
+ 49 -> Compro VideoMate Gold+ Pal [185b:c200]
+ 50 -> Pinnacle PCTV 300i DVB-T + PAL [11bd:002d]
+ 51 -> ProVideo PV952 [1540:9524]
+ 52 -> AverMedia AverTV/305 [1461:2108]
+ 53 -> ASUS TV-FM 7135 [1043:4845]
+ 54 -> LifeView FlyTV Platinum FM / Gold [5168:0214,5168:5214,1489:0214,5168:0304]
+ 55 -> LifeView FlyDVB-T DUO / MSI TV@nywhere Duo [5168:0306,4E42:0306]
+ 56 -> Avermedia AVerTV 307 [1461:a70a]
+ 57 -> Avermedia AVerTV GO 007 FM [1461:f31f]
+ 58 -> ADS Tech Instant TV (saa7135) [1421:0350,1421:0351,1421:0370,1421:1370]
+ 59 -> Kworld/Tevion V-Stream Xpert TV PVR7134
+ 60 -> LifeView/Typhoon/Genius FlyDVB-T Duo Cardbus [5168:0502,4e42:0502,1489:0502]
+ 61 -> Philips TOUGH DVB-T reference design [1131:2004]
+ 62 -> Compro VideoMate TV Gold+II
+ 63 -> Kworld Xpert TV PVR7134
+ 64 -> FlyTV mini Asus Digimatrix [1043:0210]
+ 65 -> V-Stream Studio TV Terminator
+ 66 -> Yuan TUN-900 (saa7135)
+ 67 -> Beholder BeholdTV 409 FM [0000:4091]
+ 68 -> GoTView 7135 PCI [5456:7135]
+ 69 -> Philips EUROPA V3 reference design [1131:2004]
+ 70 -> Compro Videomate DVB-T300 [185b:c900]
+ 71 -> Compro Videomate DVB-T200 [185b:c901]
+ 72 -> RTD Embedded Technologies VFG7350 [1435:7350]
+ 73 -> RTD Embedded Technologies VFG7330 [1435:7330]
+ 74 -> LifeView FlyTV Platinum Mini2 [14c0:1212]
+ 75 -> AVerMedia AVerTVHD MCE A180 [1461:1044]
+ 76 -> SKNet MonsterTV Mobile [1131:4ee9]
+ 77 -> Pinnacle PCTV 40i/50i/110i (saa7133) [11bd:002e]
+ 78 -> ASUSTeK P7131 Dual [1043:4862]
+ 79 -> Sedna/MuchTV PC TV Cardbus TV/Radio (ITO25 Rev:2B)
+ 80 -> ASUS Digimatrix TV [1043:0210]
+ 81 -> Philips Tiger reference design [1131:2018]
+ 82 -> MSI TV@Anywhere plus [1462:6231,1462:8624]
+ 83 -> Terratec Cinergy 250 PCI TV [153b:1160]
+ 84 -> LifeView FlyDVB Trio [5168:0319]
+ 85 -> AverTV DVB-T 777 [1461:2c05,1461:2c05]
+ 86 -> LifeView FlyDVB-T / Genius VideoWonder DVB-T [5168:0301,1489:0301]
+ 87 -> ADS Instant TV Duo Cardbus PTV331 [0331:1421]
+ 88 -> Tevion/KWorld DVB-T 220RF [17de:7201]
+ 89 -> ELSA EX-VISION 700TV [1048:226c]
+ 90 -> Kworld ATSC110/115 [17de:7350,17de:7352]
+ 91 -> AVerMedia A169 B [1461:7360]
+ 92 -> AVerMedia A169 B1 [1461:6360]
+ 93 -> Medion 7134 Bridge #2 [16be:0005]
+ 94 -> LifeView FlyDVB-T Hybrid Cardbus/MSI TV @nywhere A/D NB [5168:3306,5168:3502,5168:3307,4e42:3502]
+ 95 -> LifeView FlyVIDEO3000 (NTSC) [5169:0138]
+ 96 -> Medion Md8800 Quadro [16be:0007,16be:0008,16be:000d]
+ 97 -> LifeView FlyDVB-S /Acorp TV134DS [5168:0300,4e42:0300]
+ 98 -> Proteus Pro 2309 [0919:2003]
+ 99 -> AVerMedia TV Hybrid A16AR [1461:2c00]
+100 -> Asus Europa2 OEM [1043:4860]
+101 -> Pinnacle PCTV 310i [11bd:002f]
+102 -> Avermedia AVerTV Studio 507 [1461:9715]
+103 -> Compro Videomate DVB-T200A
+104 -> Hauppauge WinTV-HVR1110 DVB-T/Hybrid [0070:6700,0070:6701,0070:6702,0070:6703,0070:6704,0070:6705]
+105 -> Terratec Cinergy HT PCMCIA [153b:1172]
+106 -> Encore ENLTV [1131:2342,1131:2341,3016:2344]
+107 -> Encore ENLTV-FM [1131:230f]
+108 -> Terratec Cinergy HT PCI [153b:1175]
+109 -> Philips Tiger - S Reference design
+110 -> Avermedia M102 [1461:f31e]
+111 -> ASUS P7131 4871 [1043:4871]
+112 -> ASUSTeK P7131 Hybrid [1043:4876]
+113 -> Elitegroup ECS TVP3XP FM1246 Tuner Card (PAL,FM) [1019:4cb6]
+114 -> KWorld DVB-T 210 [17de:7250]
+115 -> Sabrent PCMCIA TV-PCB05 [0919:2003]
+116 -> 10MOONS TM300 TV Card [1131:2304]
+117 -> Avermedia Super 007 [1461:f01d]
+118 -> Beholder BeholdTV 401 [0000:4016]
+119 -> Beholder BeholdTV 403 [0000:4036]
+120 -> Beholder BeholdTV 403 FM [0000:4037]
+121 -> Beholder BeholdTV 405 [0000:4050]
+122 -> Beholder BeholdTV 405 FM [0000:4051]
+123 -> Beholder BeholdTV 407 [0000:4070]
+124 -> Beholder BeholdTV 407 FM [0000:4071]
+125 -> Beholder BeholdTV 409 [0000:4090]
+126 -> Beholder BeholdTV 505 FM [5ace:5050]
+127 -> Beholder BeholdTV 507 FM / BeholdTV 509 FM [5ace:5070,5ace:5090]
+128 -> Beholder BeholdTV Columbus TV/FM [0000:5201]
+129 -> Beholder BeholdTV 607 FM [5ace:6070]
+130 -> Beholder BeholdTV M6 [5ace:6190]
+131 -> Twinhan Hybrid DTV-DVB 3056 PCI [1822:0022]
+132 -> Genius TVGO AM11MCE
+133 -> NXP Snake DVB-S reference design
+134 -> Medion/Creatix CTX953 Hybrid [16be:0010]
+135 -> MSI TV@nywhere A/D v1.1 [1462:8625]
+136 -> AVerMedia Cardbus TV/Radio (E506R) [1461:f436]
+137 -> AVerMedia Hybrid TV/Radio (A16D) [1461:f936]
+138 -> Avermedia M115 [1461:a836]
+139 -> Compro VideoMate T750 [185b:c900]
+140 -> Avermedia DVB-S Pro A700 [1461:a7a1]
+141 -> Avermedia DVB-S Hybrid+FM A700 [1461:a7a2]
+142 -> Beholder BeholdTV H6 [5ace:6290]
+143 -> Beholder BeholdTV M63 [5ace:6191]
+144 -> Beholder BeholdTV M6 Extra [5ace:6193]
+145 -> AVerMedia MiniPCI DVB-T Hybrid M103 [1461:f636,1461:f736]
+146 -> ASUSTeK P7131 Analog
+147 -> Asus Tiger 3in1 [1043:4878]
+148 -> Encore ENLTV-FM v5.3 [1a7f:2008]
+149 -> Avermedia PCI pure analog (M135A) [1461:f11d]
+150 -> Zogis Real Angel 220
+151 -> ADS Tech Instant HDTV [1421:0380]
+152 -> Asus Tiger Rev:1.00 [1043:4857]
+153 -> Kworld Plus TV Analog Lite PCI [17de:7128]
+154 -> Avermedia AVerTV GO 007 FM Plus [1461:f31d]
+155 -> Hauppauge WinTV-HVR1150 ATSC/QAM-Hybrid [0070:6706,0070:6708]
+156 -> Hauppauge WinTV-HVR1120 DVB-T/Hybrid [0070:6707,0070:6709,0070:670a]
+157 -> Avermedia AVerTV Studio 507UA [1461:a11b]
+158 -> AVerMedia Cardbus TV/Radio (E501R) [1461:b7e9]
+159 -> Beholder BeholdTV 505 RDS [0000:505B]
+160 -> Beholder BeholdTV 507 RDS [0000:5071]
+161 -> Beholder BeholdTV 507 RDS [0000:507B]
+162 -> Beholder BeholdTV 607 FM [5ace:6071]
+163 -> Beholder BeholdTV 609 FM [5ace:6090]
+164 -> Beholder BeholdTV 609 FM [5ace:6091]
+165 -> Beholder BeholdTV 607 RDS [5ace:6072]
+166 -> Beholder BeholdTV 607 RDS [5ace:6073]
+167 -> Beholder BeholdTV 609 RDS [5ace:6092]
+168 -> Beholder BeholdTV 609 RDS [5ace:6093]
+169 -> Compro VideoMate S350/S300 [185b:c900]
+170 -> AverMedia AverTV Studio 505 [1461:a115]
+171 -> Beholder BeholdTV X7 [5ace:7595]
+172 -> RoverMedia TV Link Pro FM [19d1:0138]
+173 -> Zolid Hybrid TV Tuner PCI [1131:2004]
+174 -> Asus Europa Hybrid OEM [1043:4847]
+175 -> Leadtek Winfast DTV1000S [107d:6655]
+176 -> Beholder BeholdTV 505 RDS [0000:5051]
+177 -> Hawell HW-404M7
+178 -> Beholder BeholdTV H7 [5ace:7190]
+179 -> Beholder BeholdTV A7 [5ace:7090]
+180 -> Avermedia PCI M733A [1461:4155,1461:4255]
+181 -> TechoTrend TT-budget T-3000 [13c2:2804]
+182 -> Kworld PCI SBTVD/ISDB-T Full-Seg Hybrid [17de:b136]
+183 -> Compro VideoMate Vista M1F [185b:c900]
+184 -> Encore ENLTV-FM 3 [1a7f:2108]
+185 -> MagicPro ProHDTV Pro2 DMB-TH/Hybrid [17de:d136]
+186 -> Beholder BeholdTV 501 [5ace:5010]
+187 -> Beholder BeholdTV 503 FM [5ace:5030]
+188 -> Sensoray 811/911 [6000:0811,6000:0911]
+189 -> Kworld PC150-U [17de:a134]
+190 -> Asus My Cinema PS3-100 [1043:48cd]
+191 -> Hawell HW-9004V1
+192 -> AverMedia AverTV Satellite Hybrid+FM A706 [1461:2055]
+193 -> WIS Voyager or compatible [1905:7007]
diff --git a/Documentation/video4linux/CARDLIST.saa7164 b/Documentation/video4linux/CARDLIST.saa7164
new file mode 100644
index 000000000..2205e8d55
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.saa7164
@@ -0,0 +1,11 @@
+ 0 -> Unknown
+ 1 -> Generic Rev2
+ 2 -> Generic Rev3
+ 3 -> Hauppauge WinTV-HVR2250 [0070:8880,0070:8810]
+ 4 -> Hauppauge WinTV-HVR2200 [0070:8980]
+ 5 -> Hauppauge WinTV-HVR2200 [0070:8900]
+ 6 -> Hauppauge WinTV-HVR2200 [0070:8901]
+ 7 -> Hauppauge WinTV-HVR2250 [0070:8891,0070:8851]
+ 8 -> Hauppauge WinTV-HVR2250 [0070:88A1]
+ 9 -> Hauppauge WinTV-HVR2200 [0070:8940]
+ 10 -> Hauppauge WinTV-HVR2200 [0070:8953]
diff --git a/Documentation/video4linux/CARDLIST.tm6000 b/Documentation/video4linux/CARDLIST.tm6000
new file mode 100644
index 000000000..b5edce487
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.tm6000
@@ -0,0 +1,16 @@
+ 1 -> Generic tm5600 board (tm5600) [6000:0001]
+ 2 -> Generic tm6000 board (tm6000) [6000:0001]
+ 3 -> Generic tm6010 board (tm6010) [6000:0002]
+ 4 -> 10Moons UT821 (tm5600) [6000:0001]
+ 5 -> 10Moons UT330 (tm5600)
+ 6 -> ADSTech Dual TV (tm6000) [06e1:f332]
+ 7 -> FreeCom and similar (tm6000) [14aa:0620]
+ 8 -> ADSTech Mini Dual TV (tm6000) [06e1:b339]
+ 9 -> Hauppauge WinTV HVR-900H/USB2 Stick (tm6010) [2040:6600,2040:6601,2040:6610,2040:6611]
+ 10 -> Beholder Wander (tm6010) [6000:dec0]
+ 11 -> Beholder Voyager (tm6010) [6000:dec1]
+ 12 -> TerraTec Cinergy Hybrid XE/Cinergy Hybrid Stick (tm6010) [0ccd:0086,0ccd:00a5]
+ 13 -> TwinHan TU501 (tm6010) [13d3:3240,13d3:3241,13d3:3243,13d3:3264]
+ 14 -> Beholder Wander Lite (tm6010) [6000:dec2]
+ 15 -> Beholder Voyager Lite (tm6010) [6000:dec3]
+
diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner
new file mode 100644
index 000000000..ac8862184
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.tuner
@@ -0,0 +1,91 @@
+tuner=0 - Temic PAL (4002 FH5)
+tuner=1 - Philips PAL_I (FI1246 and compatibles)
+tuner=2 - Philips NTSC (FI1236,FM1236 and compatibles)
+tuner=3 - Philips (SECAM+PAL_BG) (FI1216MF, FM1216MF, FR1216MF)
+tuner=4 - NoTuner
+tuner=5 - Philips PAL_BG (FI1216 and compatibles)
+tuner=6 - Temic NTSC (4032 FY5)
+tuner=7 - Temic PAL_I (4062 FY5)
+tuner=8 - Temic NTSC (4036 FY5)
+tuner=9 - Alps HSBH1
+tuner=10 - Alps TSBE1
+tuner=11 - Alps TSBB5
+tuner=12 - Alps TSBE5
+tuner=13 - Alps TSBC5
+tuner=14 - Temic PAL_BG (4006FH5)
+tuner=15 - Alps TSCH6
+tuner=16 - Temic PAL_DK (4016 FY5)
+tuner=17 - Philips NTSC_M (MK2)
+tuner=18 - Temic PAL_I (4066 FY5)
+tuner=19 - Temic PAL* auto (4006 FN5)
+tuner=20 - Temic PAL_BG (4009 FR5) or PAL_I (4069 FR5)
+tuner=21 - Temic NTSC (4039 FR5)
+tuner=22 - Temic PAL/SECAM multi (4046 FM5)
+tuner=23 - Philips PAL_DK (FI1256 and compatibles)
+tuner=24 - Philips PAL/SECAM multi (FQ1216ME)
+tuner=25 - LG PAL_I+FM (TAPC-I001D)
+tuner=26 - LG PAL_I (TAPC-I701D)
+tuner=27 - LG NTSC+FM (TPI8NSR01F)
+tuner=28 - LG PAL_BG+FM (TPI8PSB01D)
+tuner=29 - LG PAL_BG (TPI8PSB11D)
+tuner=30 - Temic PAL* auto + FM (4009 FN5)
+tuner=31 - SHARP NTSC_JP (2U5JF5540)
+tuner=32 - Samsung PAL TCPM9091PD27
+tuner=33 - MT20xx universal
+tuner=34 - Temic PAL_BG (4106 FH5)
+tuner=35 - Temic PAL_DK/SECAM_L (4012 FY5)
+tuner=36 - Temic NTSC (4136 FY5)
+tuner=37 - LG PAL (newer TAPC series)
+tuner=38 - Philips PAL/SECAM multi (FM1216ME MK3)
+tuner=39 - LG NTSC (newer TAPC series)
+tuner=40 - HITACHI V7-J180AT
+tuner=41 - Philips PAL_MK (FI1216 MK)
+tuner=42 - Philips FCV1236D ATSC/NTSC dual in
+tuner=43 - Philips NTSC MK3 (FM1236MK3 or FM1236/F)
+tuner=44 - Philips 4 in 1 (ATI TV Wonder Pro/Conexant)
+tuner=45 - Microtune 4049 FM5
+tuner=46 - Panasonic VP27s/ENGE4324D
+tuner=47 - LG NTSC (TAPE series)
+tuner=48 - Tenna TNF 8831 BGFF)
+tuner=49 - Microtune 4042 FI5 ATSC/NTSC dual in
+tuner=50 - TCL 2002N
+tuner=51 - Philips PAL/SECAM_D (FM 1256 I-H3)
+tuner=52 - Thomson DTT 7610 (ATSC/NTSC)
+tuner=53 - Philips FQ1286
+tuner=54 - Philips/NXP TDA 8290/8295 + 8275/8275A/18271
+tuner=55 - TCL 2002MB
+tuner=56 - Philips PAL/SECAM multi (FQ1216AME MK4)
+tuner=57 - Philips FQ1236A MK4
+tuner=58 - Ymec TVision TVF-8531MF/8831MF/8731MF
+tuner=59 - Ymec TVision TVF-5533MF
+tuner=60 - Thomson DTT 761X (ATSC/NTSC)
+tuner=61 - Tena TNF9533-D/IF/TNF9533-B/DF
+tuner=62 - Philips TEA5767HN FM Radio
+tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner
+tuner=64 - LG TDVS-H06xF
+tuner=65 - Ymec TVF66T5-B/DFF
+tuner=66 - LG TALN series
+tuner=67 - Philips TD1316 Hybrid Tuner
+tuner=68 - Philips TUV1236D ATSC/NTSC dual in
+tuner=69 - Tena TNF 5335 and similar models
+tuner=70 - Samsung TCPN 2121P30A
+tuner=71 - Xceive xc2028/xc3028 tuner
+tuner=72 - Thomson FE6600
+tuner=73 - Samsung TCPG 6121P30A
+tuner=75 - Philips TEA5761 FM Radio
+tuner=76 - Xceive 5000 tuner
+tuner=77 - TCL tuner MF02GIP-5N-E
+tuner=78 - Philips FMD1216MEX MK3 Hybrid Tuner
+tuner=79 - Philips PAL/SECAM multi (FM1216 MK5)
+tuner=80 - Philips FQ1216LME MK3 PAL/SECAM w/active loopthrough
+tuner=81 - Partsnic (Daewoo) PTI-5NF05
+tuner=82 - Philips CU1216L
+tuner=83 - NXP TDA18271
+tuner=84 - Sony BTF-Pxn01Z
+tuner=85 - Philips FQ1236 MK5
+tuner=86 - Tena TNF5337 MFD
+tuner=87 - Xceive 4000 tuner
+tuner=88 - Xceive 5000C tuner
+tuner=89 - Sony BTF-PG472Z PAL/SECAM
+tuner=90 - Sony BTF-PK467Z NTSC-M-JP
+tuner=91 - Sony BTF-PB463Z NTSC-M
diff --git a/Documentation/video4linux/CARDLIST.usbvision b/Documentation/video4linux/CARDLIST.usbvision
new file mode 100644
index 000000000..6fd1af365
--- /dev/null
+++ b/Documentation/video4linux/CARDLIST.usbvision
@@ -0,0 +1,67 @@
+ 0 -> Xanboo [0a6f:0400]
+ 1 -> Belkin USB VideoBus II Adapter [050d:0106]
+ 2 -> Belkin Components USB VideoBus [050d:0207]
+ 3 -> Belkin USB VideoBus II [050d:0208]
+ 4 -> echoFX InterView Lite [0571:0002]
+ 5 -> USBGear USBG-V1 resp. HAMA USB [0573:0003]
+ 6 -> D-Link V100 [0573:0400]
+ 7 -> X10 USB Camera [0573:2000]
+ 8 -> Hauppauge WinTV USB Live (PAL B/G) [0573:2d00]
+ 9 -> Hauppauge WinTV USB Live Pro (NTSC M/N) [0573:2d01]
+ 10 -> Zoran Co. PMD (Nogatech) AV-grabber Manhattan [0573:2101]
+ 11 -> Nogatech USB-TV (NTSC) FM [0573:4100]
+ 12 -> PNY USB-TV (NTSC) FM [0573:4110]
+ 13 -> PixelView PlayTv-USB PRO (PAL) FM [0573:4450]
+ 14 -> ZTV ZT-721 2.4GHz USB A/V Receiver [0573:4550]
+ 15 -> Hauppauge WinTV USB (NTSC M/N) [0573:4d00]
+ 16 -> Hauppauge WinTV USB (PAL B/G) [0573:4d01]
+ 17 -> Hauppauge WinTV USB (PAL I) [0573:4d02]
+ 18 -> Hauppauge WinTV USB (PAL/SECAM L) [0573:4d03]
+ 19 -> Hauppauge WinTV USB (PAL D/K) [0573:4d04]
+ 20 -> Hauppauge WinTV USB (NTSC FM) [0573:4d10]
+ 21 -> Hauppauge WinTV USB (PAL B/G FM) [0573:4d11]
+ 22 -> Hauppauge WinTV USB (PAL I FM) [0573:4d12]
+ 23 -> Hauppauge WinTV USB (PAL D/K FM) [0573:4d14]
+ 24 -> Hauppauge WinTV USB Pro (NTSC M/N) [0573:4d2a]
+ 25 -> Hauppauge WinTV USB Pro (NTSC M/N) V2 [0573:4d2b]
+ 26 -> Hauppauge WinTV USB Pro (PAL/SECAM B/G/I/D/K/L) [0573:4d2c]
+ 27 -> Hauppauge WinTV USB Pro (NTSC M/N) V3 [0573:4d20]
+ 28 -> Hauppauge WinTV USB Pro (PAL B/G) [0573:4d21]
+ 29 -> Hauppauge WinTV USB Pro (PAL I) [0573:4d22]
+ 30 -> Hauppauge WinTV USB Pro (PAL/SECAM L) [0573:4d23]
+ 31 -> Hauppauge WinTV USB Pro (PAL D/K) [0573:4d24]
+ 32 -> Hauppauge WinTV USB Pro (PAL/SECAM BGDK/I/L) [0573:4d25]
+ 33 -> Hauppauge WinTV USB Pro (PAL/SECAM BGDK/I/L) V2 [0573:4d26]
+ 34 -> Hauppauge WinTV USB Pro (PAL B/G) V2 [0573:4d27]
+ 35 -> Hauppauge WinTV USB Pro (PAL B/G,D/K) [0573:4d28]
+ 36 -> Hauppauge WinTV USB Pro (PAL I,D/K) [0573:4d29]
+ 37 -> Hauppauge WinTV USB Pro (NTSC M/N FM) [0573:4d30]
+ 38 -> Hauppauge WinTV USB Pro (PAL B/G FM) [0573:4d31]
+ 39 -> Hauppauge WinTV USB Pro (PAL I FM) [0573:4d32]
+ 40 -> Hauppauge WinTV USB Pro (PAL D/K FM) [0573:4d34]
+ 41 -> Hauppauge WinTV USB Pro (Temic PAL/SECAM B/G/I/D/K/L FM) [0573:4d35]
+ 42 -> Hauppauge WinTV USB Pro (Temic PAL B/G FM) [0573:4d36]
+ 43 -> Hauppauge WinTV USB Pro (PAL/SECAM B/G/I/D/K/L FM) [0573:4d37]
+ 44 -> Hauppauge WinTV USB Pro (NTSC M/N FM) V2 [0573:4d38]
+ 45 -> Camtel Technology USB TV Genie Pro FM Model TVB330 [0768:0006]
+ 46 -> Digital Video Creator I [07d0:0001]
+ 47 -> Global Village GV-007 (NTSC) [07d0:0002]
+ 48 -> Dazzle Fusion Model DVC-50 Rev 1 (NTSC) [07d0:0003]
+ 49 -> Dazzle Fusion Model DVC-80 Rev 1 (PAL) [07d0:0004]
+ 50 -> Dazzle Fusion Model DVC-90 Rev 1 (SECAM) [07d0:0005]
+ 51 -> Eskape Labs MyTV2Go [07f8:9104]
+ 52 -> Pinnacle Studio PCTV USB (PAL) [2304:010d]
+ 53 -> Pinnacle Studio PCTV USB (SECAM) [2304:0109]
+ 54 -> Pinnacle Studio PCTV USB (PAL) FM [2304:0110]
+ 55 -> Miro PCTV USB [2304:0111]
+ 56 -> Pinnacle Studio PCTV USB (NTSC) FM [2304:0112]
+ 57 -> Pinnacle Studio PCTV USB (PAL) FM V2 [2304:0210]
+ 58 -> Pinnacle Studio PCTV USB (NTSC) FM V2 [2304:0212]
+ 59 -> Pinnacle Studio PCTV USB (PAL) FM V3 [2304:0214]
+ 60 -> Pinnacle Studio Linx Video input cable (NTSC) [2304:0300]
+ 61 -> Pinnacle Studio Linx Video input cable (PAL) [2304:0301]
+ 62 -> Pinnacle PCTV Bungee USB (PAL) FM [2304:0419]
+ 63 -> Hauppauge WinTv-USB [2400:4200]
+ 64 -> Pinnacle Studio PCTV USB (NTSC) FM V3 [2304:0113]
+ 65 -> Nogatech USB MicroCam NTSC (NV3000N) [0573:3000]
+ 66 -> Nogatech USB MicroCam PAL (NV3001P) [0573:3001]
diff --git a/Documentation/video4linux/Makefile b/Documentation/video4linux/Makefile
new file mode 100644
index 000000000..65a351d75
--- /dev/null
+++ b/Documentation/video4linux/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_VIDEO_PCI_SKELETON) := v4l2-pci-skeleton.o
diff --git a/Documentation/video4linux/README.cpia2 b/Documentation/video4linux/README.cpia2
new file mode 100644
index 000000000..38e742fd0
--- /dev/null
+++ b/Documentation/video4linux/README.cpia2
@@ -0,0 +1,130 @@
+$Id: README,v 1.7 2005/08/29 23:39:57 sbertin Exp $
+
+1. Introduction
+
+ This is a driver for STMicroelectronics's CPiA2 (second generation
+Colour Processor Interface ASIC) based cameras. This camera outputs an MJPEG
+stream at up to vga size. It implements the Video4Linux interface as much as
+possible. Since the V4L interface does not support compressed formats, only
+an mjpeg enabled application can be used with the camera. We have modified the
+gqcam application to view this stream.
+
+ The driver is implemented as two kernel modules. The cpia2 module
+contains the camera functions and the V4L interface. The cpia2_usb module
+contains usb specific functions. The main reason for this was the size of the
+module was getting out of hand, so I separated them. It is not likely that
+there will be a parallel port version.
+
+FEATURES:
+ - Supports cameras with the Vision stv6410 (CIF) and stv6500 (VGA) cmos
+ sensors. I only have the vga sensor, so can't test the other.
+ - Image formats: VGA, QVGA, CIF, QCIF, and a number of sizes in between.
+ VGA and QVGA are the native image sizes for the VGA camera. CIF is done
+ in the coprocessor by scaling QVGA. All other sizes are done by clipping.
+ - Palette: YCrCb, compressed with MJPEG.
+ - Some compression parameters are settable.
+ - Sensor framerate is adjustable (up to 30 fps CIF, 15 fps VGA).
+ - Adjust brightness, color, contrast while streaming.
+ - Flicker control settable for 50 or 60 Hz mains frequency.
+
+2. Making and installing the stv672 driver modules:
+
+ Requirements:
+ -------------
+ This should work with 2.4 (2.4.23 and later) and 2.6 kernels, but has
+only been tested on 2.6. Video4Linux must be either compiled into the kernel or
+available as a module. Video4Linux2 is automatically detected and made
+available at compile time.
+
+ Compiling:
+ ----------
+ As root, do a make install. This will compile and install the modules
+into the media/video directory in the module tree. For 2.4 kernels, use
+Makefile_2.4 (aka do make -f Makefile_2.4 install).
+
+ Setup:
+ ------
+ Use 'modprobe cpia2' to load and 'modprobe -r cpia2' to unload. This
+may be done automatically by your distribution.
+
+3. Driver options
+
+ Option Description
+ ------ -----------
+ video_nr video device to register (0=/dev/video0, etc)
+ range -1 to 64. default is -1 (first available)
+ If you have more than 1 camera, this MUST be -1.
+ buffer_size Size for each frame buffer in bytes (default 68k)
+ num_buffers Number of frame buffers (1-32, default 3)
+ alternate USB Alternate (2-7, default 7)
+ flicker_freq Frequency for flicker reduction(50 or 60, default 60)
+ flicker_mode 0 to disable, or 1 to enable flicker reduction.
+ (default 0). This is only effective if the camera
+ uses a stv0672 coprocessor.
+
+ Setting the options:
+ --------------------
+ If you are using modules, edit /etc/modules.conf and add an options
+line like this:
+ options cpia2 num_buffers=3 buffer_size=65535
+
+ If the driver is compiled into the kernel, at boot time specify them
+like this:
+ cpia2.num_buffers=3 cpia2.buffer_size=65535
+
+ What buffer size should I use?
+ ------------------------------
+ The maximum image size depends on the alternate you choose, and the
+frame rate achieved by the camera. If the compression engine is able to
+keep up with the frame rate, the maximum image size is given by the table
+below.
+ The compression engine starts out at maximum compression, and will
+increase image quality until it is close to the size in the table. As long
+as the compression engine can keep up with the frame rate, after a short time
+the images will all be about the size in the table, regardless of resolution.
+ At low alternate settings, the compression engine may not be able to
+compress the image enough and will reduce the frame rate by producing larger
+images.
+ The default of 68k should be good for most users. This will handle
+any alternate at frame rates down to 15fps. For lower frame rates, it may
+be necessary to increase the buffer size to avoid having frames dropped due
+to insufficient space.
+
+ Image size(bytes)
+ Alternate bytes/ms 15fps 30fps
+ 2 128 8533 4267
+ 3 384 25600 12800
+ 4 640 42667 21333
+ 5 768 51200 25600
+ 6 896 59733 29867
+ 7 1023 68200 34100
+
+ How many buffers should I use?
+ ------------------------------
+ For normal streaming, 3 should give the best results. With only 2,
+it is possible for the camera to finish sending one image just after a
+program has started reading the other. If this happens, the driver must drop
+a frame. The exception to this is if you have a heavily loaded machine. In
+this case use 2 buffers. You are probably not reading at the full frame rate.
+If the camera can send multiple images before a read finishes, it could
+overwrite the third buffer before the read finishes, leading to a corrupt
+image. Single and double buffering have extra checks to avoid overwriting.
+
+4. Using the camera
+
+ We are providing a modified gqcam application to view the output. In
+order to avoid confusion, here it is called mview. There is also the qx5view
+program which can also control the lights on the qx5 microscope. MJPEG Tools
+(http://mjpeg.sourceforge.net) can also be used to record from the camera.
+
+5. Notes to developers:
+
+ - This is a driver version stripped of the 2.4 back compatibility
+ and old MJPEG ioctl API. See cpia2.sf.net for 2.4 support.
+
+6. Thanks:
+
+ - Peter Pregler <Peter_Pregler@email.com>,
+ Scott J. Bertin <scottbertin@yahoo.com>, and
+ Jarl Totland <Jarl.Totland@bdc.no> for the original cpia driver, which
+ this one was modelled from.
diff --git a/Documentation/video4linux/README.cx88 b/Documentation/video4linux/README.cx88
new file mode 100644
index 000000000..35fae23f8
--- /dev/null
+++ b/Documentation/video4linux/README.cx88
@@ -0,0 +1,67 @@
+cx8800 release notes
+====================
+
+This is a v4l2 device driver for the cx2388x chip.
+
+
+current status
+==============
+
+video
+ - Basically works.
+ - For now, only capture and read(). Overlay isn't supported.
+
+audio
+ - The chip specs for the on-chip TV sound decoder are next
+ to useless :-/
+ - Neverless the builtin TV sound decoder starts working now,
+ at least for some standards.
+ FOR ANY REPORTS ON THIS PLEASE MENTION THE TV NORM YOU ARE
+ USING.
+ - Most tuner chips do provide mono sound, which may or may not
+ be useable depending on the board design. With the Hauppauge
+ cards it works, so there is mono sound available as fallback.
+ - audio data dma (i.e. recording without loopback cable to the
+ sound card) is supported via cx88-alsa.
+
+vbi
+ - Code present. Works for NTSC closed caption. PAL and other
+ TV norms may or may not work.
+
+
+how to add support for new cards
+================================
+
+The driver needs some config info for the TV cards. This stuff is in
+cx88-cards.c. If the driver doesn't work well you likely need a new
+entry for your card in that file. Check the kernel log (using dmesg)
+to see whenever the driver knows your card or not. There is a line
+like this one:
+
+ cx8800[0]: subsystem: 0070:3400, board: Hauppauge WinTV \
+ 34xxx models [card=1,autodetected]
+
+If your card is listed as "board: UNKNOWN/GENERIC" it is unknown to
+the driver. What to do then?
+
+ (1) Try upgrading to the latest snapshot, maybe it has been added
+ meanwhile.
+ (2) You can try to create a new entry yourself, have a look at
+ cx88-cards.c. If that worked, mail me your changes as unified
+ diff ("diff -u").
+ (3) Or you can mail me the config information. I need at least the
+ following informations to add the card:
+
+ * the PCI Subsystem ID ("0070:3400" from the line above,
+ "lspci -v" output is fine too).
+ * the tuner type used by the card. You can try to find one by
+ trial-and-error using the tuner=<n> insmod option. If you
+ know which one the card has you can also have a look at the
+ list in CARDLIST.tuner
+
+Have fun,
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
diff --git a/Documentation/video4linux/README.davinci-vpbe b/Documentation/video4linux/README.davinci-vpbe
new file mode 100644
index 000000000..dc9a297f4
--- /dev/null
+++ b/Documentation/video4linux/README.davinci-vpbe
@@ -0,0 +1,93 @@
+
+ VPBE V4L2 driver design
+ ======================================================================
+
+ File partitioning
+ -----------------
+ V4L2 display device driver
+ drivers/media/platform/davinci/vpbe_display.c
+ drivers/media/platform/davinci/vpbe_display.h
+
+ VPBE display controller
+ drivers/media/platform/davinci/vpbe.c
+ drivers/media/platform/davinci/vpbe.h
+
+ VPBE venc sub device driver
+ drivers/media/platform/davinci/vpbe_venc.c
+ drivers/media/platform/davinci/vpbe_venc.h
+ drivers/media/platform/davinci/vpbe_venc_regs.h
+
+ VPBE osd driver
+ drivers/media/platform/davinci/vpbe_osd.c
+ drivers/media/platform/davinci/vpbe_osd.h
+ drivers/media/platform/davinci/vpbe_osd_regs.h
+
+ Functional partitioning
+ -----------------------
+
+ Consists of the following (in the same order as the list under file
+ partitioning):-
+
+ 1. V4L2 display driver
+ Implements creation of video2 and video3 device nodes and
+ provides v4l2 device interface to manage VID0 and VID1 layers.
+
+ 2. Display controller
+ Loads up VENC, OSD and external encoders such as ths8200. It provides
+ a set of API calls to V4L2 drivers to set the output/standards
+ in the VENC or external sub devices. It also provides
+ a device object to access the services from OSD subdevice
+ using sub device ops. The connection of external encoders to VENC LCD
+ controller port is done at init time based on default output and standard
+ selection or at run time when application change the output through
+ V4L2 IOCTLs.
+
+ When connected to an external encoder, vpbe controller is also responsible
+ for setting up the interface between VENC and external encoders based on
+ board specific settings (specified in board-xxx-evm.c). This allows
+ interfacing external encoders such as ths8200. The setup_if_config()
+ is implemented for this as well as configure_venc() (part of the next patch)
+ API to set timings in VENC for a specific display resolution. As of this
+ patch series, the interconnection and enabling and setting of the external
+ encoders is not present, and would be a part of the next patch series.
+
+ 3. VENC subdevice module
+ Responsible for setting outputs provided through internal DACs and also
+ setting timings at LCD controller port when external encoders are connected
+ at the port or LCD panel timings required. When external encoder/LCD panel
+ is connected, the timings for a specific standard/preset is retrieved from
+ the board specific table and the values are used to set the timings in
+ venc using non-standard timing mode.
+
+ Support LCD Panel displays using the VENC. For example to support a Logic
+ PD display, it requires setting up the LCD controller port with a set of
+ timings for the resolution supported and setting the dot clock. So we could
+ add the available outputs as a board specific entry (i.e add the "LogicPD"
+ output name to board-xxx-evm.c). A table of timings for various LCDs
+ supported can be maintained in the board specific setup file to support
+ various LCD displays.As of this patch a basic driver is present, and this
+ support for external encoders and displays forms a part of the next
+ patch series.
+
+ 4. OSD module
+ OSD module implements all OSD layer management and hardware specific
+ features. The VPBE module interacts with the OSD for enabling and
+ disabling appropriate features of the OSD.
+
+ Current status:-
+
+ A fully functional working version of the V4L2 driver is available. This
+ driver has been tested with NTSC and PAL standards and buffer streaming.
+
+ Following are TBDs.
+
+ vpbe display controller
+ - Add support for external encoders.
+ - add support for selecting external encoder as default at probe time.
+
+ vpbe venc sub device
+ - add timings for supporting ths8200
+ - add support for LogicPD LCD.
+
+ FB drivers
+ - Add support for fbdev drivers.- Ready and part of subsequent patches.
diff --git a/Documentation/video4linux/README.ir b/Documentation/video4linux/README.ir
new file mode 100644
index 000000000..0da47a847
--- /dev/null
+++ b/Documentation/video4linux/README.ir
@@ -0,0 +1,72 @@
+
+infrared remote control support in video4linux drivers
+======================================================
+
+
+basics
+------
+
+Current versions use the linux input layer to support infrared
+remote controls. I suggest to download my input layer tools
+from http://bytesex.org/snapshot/input-<date>.tar.gz
+
+Modules you have to load:
+
+ saa7134 statically built in, i.e. just the driver :)
+ bttv ir-kbd-gpio or ir-kbd-i2c depending on your
+ card.
+
+ir-kbd-gpio and ir-kbd-i2c don't support all cards lirc supports
+(yet), mainly for the reason that the code of lirc_i2c and lirc_gpio
+was very confusing and I decided to basically start over from scratch.
+Feel free to contact me in case of trouble. Note that the ir-kbd-*
+modules work on 2.6.x kernels only through ...
+
+
+how it works
+------------
+
+The modules register the remote as keyboard within the linux input
+layer, i.e. you'll see the keys of the remote as normal key strokes
+(if CONFIG_INPUT_KEYBOARD is enabled).
+
+Using the event devices (CONFIG_INPUT_EVDEV) it is possible for
+applications to access the remote via /dev/input/event<n> devices.
+You might have to create the special files using "/sbin/MAKEDEV
+input". The input layer tools mentioned above use the event device.
+
+The input layer tools are nice for trouble shooting, i.e. to check
+whenever the input device is really present, which of the devices it
+is, check whenever pressing keys on the remote actually generates
+events and the like. You can also use the kbd utility to change the
+keymaps (2.6.x kernels only through).
+
+
+using with lircd
+================
+
+The cvs version of the lircd daemon supports reading events from the
+linux input layer (via event device). The input layer tools tarball
+comes with a lircd config file.
+
+
+using without lircd
+===================
+
+XFree86 likely can be configured to recognise the remote keys. Once I
+simply tried to configure one of the multimedia keyboards as input
+device, which had the effect that XFree86 recognised some of the keys
+of my remote control and passed volume up/down key presses as
+XF86AudioRaiseVolume and XF86AudioLowerVolume key events to the X11
+clients.
+
+It likely is possible to make that fly with a nice xkb config file,
+I know next to nothing about that through.
+
+
+Have fun,
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@bytesex.org>
diff --git a/Documentation/video4linux/README.ivtv b/Documentation/video4linux/README.ivtv
new file mode 100644
index 000000000..2579b5b70
--- /dev/null
+++ b/Documentation/video4linux/README.ivtv
@@ -0,0 +1,186 @@
+
+ivtv release notes
+==================
+
+This is a v4l2 device driver for the Conexant cx23415/6 MPEG encoder/decoder.
+The cx23415 can do both encoding and decoding, the cx23416 can only do MPEG
+encoding. Currently the only card featuring full decoding support is the
+Hauppauge PVR-350.
+
+NOTE: this driver requires the latest encoder firmware (version 2.06.039, size
+376836 bytes). Get the firmware from here:
+
+http://dl.ivtvdriver.org/ivtv/firmware/
+
+NOTE: 'normal' TV applications do not work with this driver, you need
+an application that can handle MPEG input such as mplayer, xine, MythTV,
+etc.
+
+The primary goal of the IVTV project is to provide a "clean room" Linux
+Open Source driver implementation for video capture cards based on the
+iCompression iTVC15 or Conexant CX23415/CX23416 MPEG Codec.
+
+Features:
+ * Hardware mpeg2 capture of broadcast video (and sound) via the tuner or
+ S-Video/Composite and audio line-in.
+ * Hardware mpeg2 capture of FM radio where hardware support exists
+ * Supports NTSC, PAL, SECAM with stereo sound
+ * Supports SAP and bilingual transmissions.
+ * Supports raw VBI (closed captions and teletext).
+ * Supports sliced VBI (closed captions and teletext) and is able to insert
+ this into the captured MPEG stream.
+ * Supports raw YUV and PCM input.
+
+Additional features for the PVR-350 (CX23415 based):
+ * Provides hardware mpeg2 playback
+ * Provides comprehensive OSD (On Screen Display: ie. graphics overlaying the
+ video signal)
+ * Provides a framebuffer (allowing X applications to appear on the video
+ device)
+ * Supports raw YUV output.
+
+IMPORTANT: In case of problems first read this page:
+ http://www.ivtvdriver.org/index.php/Troubleshooting
+
+See also:
+
+Homepage + Wiki
+http://www.ivtvdriver.org
+
+IRC
+irc://irc.freenode.net/ivtv-dev
+
+----------------------------------------------------------
+
+Devices
+=======
+
+A maximum of 12 ivtv boards are allowed at the moment.
+
+Cards that don't have a video output capability (i.e. non PVR350 cards)
+lack the vbi8, vbi16, video16 and video48 devices. They also do not
+support the framebuffer device /dev/fbx for OSD.
+
+The radio0 device may or may not be present, depending on whether the
+card has a radio tuner or not.
+
+Here is a list of the base v4l devices:
+crw-rw---- 1 root video 81, 0 Jun 19 22:22 /dev/video0
+crw-rw---- 1 root video 81, 16 Jun 19 22:22 /dev/video16
+crw-rw---- 1 root video 81, 24 Jun 19 22:22 /dev/video24
+crw-rw---- 1 root video 81, 32 Jun 19 22:22 /dev/video32
+crw-rw---- 1 root video 81, 48 Jun 19 22:22 /dev/video48
+crw-rw---- 1 root video 81, 64 Jun 19 22:22 /dev/radio0
+crw-rw---- 1 root video 81, 224 Jun 19 22:22 /dev/vbi0
+crw-rw---- 1 root video 81, 228 Jun 19 22:22 /dev/vbi8
+crw-rw---- 1 root video 81, 232 Jun 19 22:22 /dev/vbi16
+
+Base devices
+============
+
+For every extra card you have the numbers increased by one. For example,
+/dev/video0 is listed as the 'base' encoding capture device so we have:
+
+ /dev/video0 is the encoding capture device for the first card (card 0)
+ /dev/video1 is the encoding capture device for the second card (card 1)
+ /dev/video2 is the encoding capture device for the third card (card 2)
+
+Note that if the first card doesn't have a feature (eg no decoder, so no
+video16, the second card will still use video17. The simple rule is 'add
+the card number to the base device number'. If you have other capture
+cards (e.g. WinTV PCI) that are detected first, then you have to tell
+the ivtv module about it so that it will start counting at 1 (or 2, or
+whatever). Otherwise the device numbers can get confusing. The ivtv
+'ivtv_first_minor' module option can be used for that.
+
+
+/dev/video0
+The encoding capture device(s).
+Read-only.
+
+Reading from this device gets you the MPEG1/2 program stream.
+Example:
+
+cat /dev/video0 > my.mpg (you need to hit ctrl-c to exit)
+
+
+/dev/video16
+The decoder output device(s)
+Write-only. Only present if the MPEG decoder (i.e. CX23415) exists.
+
+An mpeg2 stream sent to this device will appear on the selected video
+display, audio will appear on the line-out/audio out. It is only
+available for cards that support video out. Example:
+
+cat my.mpg >/dev/video16
+
+
+/dev/video24
+The raw audio capture device(s).
+Read-only
+
+The raw audio PCM stereo stream from the currently selected
+tuner or audio line-in. Reading from this device results in a raw
+(signed 16 bit Little Endian, 48000 Hz, stereo pcm) capture.
+This device only captures audio. This should be replaced by an ALSA
+device in the future.
+Note that there is no corresponding raw audio output device, this is
+not supported in the decoder firmware.
+
+
+/dev/video32
+The raw video capture device(s)
+Read-only
+
+The raw YUV video output from the current video input. The YUV format
+is non-standard (V4L2_PIX_FMT_HM12).
+
+Note that the YUV and PCM streams are not synchronized, so they are of
+limited use.
+
+
+/dev/video48
+The raw video display device(s)
+Write-only. Only present if the MPEG decoder (i.e. CX23415) exists.
+
+Writes a YUV stream to the decoder of the card.
+
+
+/dev/radio0
+The radio tuner device(s)
+Cannot be read or written.
+
+Used to enable the radio tuner and tune to a frequency. You cannot
+read or write audio streams with this device. Once you use this
+device to tune the radio, use /dev/video24 to read the raw pcm stream
+or /dev/video0 to get an mpeg2 stream with black video.
+
+
+/dev/vbi0
+The 'vertical blank interval' (Teletext, CC, WSS etc) capture device(s)
+Read-only
+
+Captures the raw (or sliced) video data sent during the Vertical Blank
+Interval. This data is used to encode teletext, closed captions, VPS,
+widescreen signalling, electronic program guide information, and other
+services.
+
+
+/dev/vbi8
+Processed vbi feedback device(s)
+Read-only. Only present if the MPEG decoder (i.e. CX23415) exists.
+
+The sliced VBI data embedded in an MPEG stream is reproduced on this
+device. So while playing back a recording on /dev/video16, you can
+read the embedded VBI data from /dev/vbi8.
+
+
+/dev/vbi16
+The vbi 'display' device(s)
+Write-only. Only present if the MPEG decoder (i.e. CX23415) exists.
+
+Can be used to send sliced VBI data to the video-out connector.
+
+---------------------------------
+
+Hans Verkuil <hverkuil@xs4all.nl>
diff --git a/Documentation/video4linux/README.pvrusb2 b/Documentation/video4linux/README.pvrusb2
new file mode 100644
index 000000000..2137b5892
--- /dev/null
+++ b/Documentation/video4linux/README.pvrusb2
@@ -0,0 +1,212 @@
+
+$Id$
+Mike Isely <isely@pobox.com>
+
+ pvrusb2 driver
+
+Background:
+
+ This driver is intended for the "Hauppauge WinTV PVR USB 2.0", which
+ is a USB 2.0 hosted TV Tuner. This driver is a work in progress.
+ Its history started with the reverse-engineering effort by Björn
+ Danielsson <pvrusb2@dax.nu> whose web page can be found here:
+
+ http://pvrusb2.dax.nu/
+
+ From there Aurelien Alleaume <slts@free.fr> began an effort to
+ create a video4linux compatible driver. I began with Aurelien's
+ last known snapshot and evolved the driver to the state it is in
+ here.
+
+ More information on this driver can be found at:
+
+ http://www.isely.net/pvrusb2.html
+
+
+ This driver has a strong separation of layers. They are very
+ roughly:
+
+ 1a. Low level wire-protocol implementation with the device.
+
+ 1b. I2C adaptor implementation and corresponding I2C client drivers
+ implemented elsewhere in V4L.
+
+ 1c. High level hardware driver implementation which coordinates all
+ activities that ensure correct operation of the device.
+
+ 2. A "context" layer which manages instancing of driver, setup,
+ tear-down, arbitration, and interaction with high level
+ interfaces appropriately as devices are hotplugged in the
+ system.
+
+ 3. High level interfaces which glue the driver to various published
+ Linux APIs (V4L, sysfs, maybe DVB in the future).
+
+ The most important shearing layer is between the top 2 layers. A
+ lot of work went into the driver to ensure that any kind of
+ conceivable API can be laid on top of the core driver. (Yes, the
+ driver internally leverages V4L to do its work but that really has
+ nothing to do with the API published by the driver to the outside
+ world.) The architecture allows for different APIs to
+ simultaneously access the driver. I have a strong sense of fairness
+ about APIs and also feel that it is a good design principle to keep
+ implementation and interface isolated from each other. Thus while
+ right now the V4L high level interface is the most complete, the
+ sysfs high level interface will work equally well for similar
+ functions, and there's no reason I see right now why it shouldn't be
+ possible to produce a DVB high level interface that can sit right
+ alongside V4L.
+
+ NOTE: Complete documentation on the pvrusb2 driver is contained in
+ the html files within the doc directory; these are exactly the same
+ as what is on the web site at the time. Browse those files
+ (especially the FAQ) before asking questions.
+
+
+Building
+
+ To build these modules essentially amounts to just running "Make",
+ but you need the kernel source tree nearby and you will likely also
+ want to set a few controlling environment variables first in order
+ to link things up with that source tree. Please see the Makefile
+ here for comments that explain how to do that.
+
+
+Source file list / functional overview:
+
+ (Note: The term "module" used below generally refers to loosely
+ defined functional units within the pvrusb2 driver and bears no
+ relation to the Linux kernel's concept of a loadable module.)
+
+ pvrusb2-audio.[ch] - This is glue logic that resides between this
+ driver and the msp3400.ko I2C client driver (which is found
+ elsewhere in V4L).
+
+ pvrusb2-context.[ch] - This module implements the context for an
+ instance of the driver. Everything else eventually ties back to
+ or is otherwise instanced within the data structures implemented
+ here. Hotplugging is ultimately coordinated here. All high level
+ interfaces tie into the driver through this module. This module
+ helps arbitrate each interface's access to the actual driver core,
+ and is designed to allow concurrent access through multiple
+ instances of multiple interfaces (thus you can for example change
+ the tuner's frequency through sysfs while simultaneously streaming
+ video through V4L out to an instance of mplayer).
+
+ pvrusb2-debug.h - This header defines a printk() wrapper and a mask
+ of debugging bit definitions for the various kinds of debug
+ messages that can be enabled within the driver.
+
+ pvrusb2-debugifc.[ch] - This module implements a crude command line
+ oriented debug interface into the driver. Aside from being part
+ of the process for implementing manual firmware extraction (see
+ the pvrusb2 web site mentioned earlier), probably I'm the only one
+ who has ever used this. It is mainly a debugging aid.
+
+ pvrusb2-eeprom.[ch] - This is glue logic that resides between this
+ driver the tveeprom.ko module, which is itself implemented
+ elsewhere in V4L.
+
+ pvrusb2-encoder.[ch] - This module implements all protocol needed to
+ interact with the Conexant mpeg2 encoder chip within the pvrusb2
+ device. It is a crude echo of corresponding logic in ivtv,
+ however the design goals (strict isolation) and physical layer
+ (proxy through USB instead of PCI) are enough different that this
+ implementation had to be completely different.
+
+ pvrusb2-hdw-internal.h - This header defines the core data structure
+ in the driver used to track ALL internal state related to control
+ of the hardware. Nobody outside of the core hardware-handling
+ modules should have any business using this header. All external
+ access to the driver should be through one of the high level
+ interfaces (e.g. V4L, sysfs, etc), and in fact even those high
+ level interfaces are restricted to the API defined in
+ pvrusb2-hdw.h and NOT this header.
+
+ pvrusb2-hdw.h - This header defines the full internal API for
+ controlling the hardware. High level interfaces (e.g. V4L, sysfs)
+ will work through here.
+
+ pvrusb2-hdw.c - This module implements all the various bits of logic
+ that handle overall control of a specific pvrusb2 device.
+ (Policy, instantiation, and arbitration of pvrusb2 devices fall
+ within the jurisdiction of pvrusb-context not here).
+
+ pvrusb2-i2c-chips-*.c - These modules implement the glue logic to
+ tie together and configure various I2C modules as they attach to
+ the I2C bus. There are two versions of this file. The "v4l2"
+ version is intended to be used in-tree alongside V4L, where we
+ implement just the logic that makes sense for a pure V4L
+ environment. The "all" version is intended for use outside of
+ V4L, where we might encounter other possibly "challenging" modules
+ from ivtv or older kernel snapshots (or even the support modules
+ in the standalone snapshot).
+
+ pvrusb2-i2c-cmd-v4l1.[ch] - This module implements generic V4L1
+ compatible commands to the I2C modules. It is here where state
+ changes inside the pvrusb2 driver are translated into V4L1
+ commands that are in turn send to the various I2C modules.
+
+ pvrusb2-i2c-cmd-v4l2.[ch] - This module implements generic V4L2
+ compatible commands to the I2C modules. It is here where state
+ changes inside the pvrusb2 driver are translated into V4L2
+ commands that are in turn send to the various I2C modules.
+
+ pvrusb2-i2c-core.[ch] - This module provides an implementation of a
+ kernel-friendly I2C adaptor driver, through which other external
+ I2C client drivers (e.g. msp3400, tuner, lirc) may connect and
+ operate corresponding chips within the pvrusb2 device. It is
+ through here that other V4L modules can reach into this driver to
+ operate specific pieces (and those modules are in turn driven by
+ glue logic which is coordinated by pvrusb2-hdw, doled out by
+ pvrusb2-context, and then ultimately made available to users
+ through one of the high level interfaces).
+
+ pvrusb2-io.[ch] - This module implements a very low level ring of
+ transfer buffers, required in order to stream data from the
+ device. This module is *very* low level. It only operates the
+ buffers and makes no attempt to define any policy or mechanism for
+ how such buffers might be used.
+
+ pvrusb2-ioread.[ch] - This module layers on top of pvrusb2-io.[ch]
+ to provide a streaming API usable by a read() system call style of
+ I/O. Right now this is the only layer on top of pvrusb2-io.[ch],
+ however the underlying architecture here was intended to allow for
+ other styles of I/O to be implemented with additional modules, like
+ mmap()'ed buffers or something even more exotic.
+
+ pvrusb2-main.c - This is the top level of the driver. Module level
+ and USB core entry points are here. This is our "main".
+
+ pvrusb2-sysfs.[ch] - This is the high level interface which ties the
+ pvrusb2 driver into sysfs. Through this interface you can do
+ everything with the driver except actually stream data.
+
+ pvrusb2-tuner.[ch] - This is glue logic that resides between this
+ driver and the tuner.ko I2C client driver (which is found
+ elsewhere in V4L).
+
+ pvrusb2-util.h - This header defines some common macros used
+ throughout the driver. These macros are not really specific to
+ the driver, but they had to go somewhere.
+
+ pvrusb2-v4l2.[ch] - This is the high level interface which ties the
+ pvrusb2 driver into video4linux. It is through here that V4L
+ applications can open and operate the driver in the usual V4L
+ ways. Note that **ALL** V4L functionality is published only
+ through here and nowhere else.
+
+ pvrusb2-video-*.[ch] - This is glue logic that resides between this
+ driver and the saa711x.ko I2C client driver (which is found
+ elsewhere in V4L). Note that saa711x.ko used to be known as
+ saa7115.ko in ivtv. There are two versions of this; one is
+ selected depending on the particular saa711[5x].ko that is found.
+
+ pvrusb2.h - This header contains compile time tunable parameters
+ (and at the moment the driver has very little that needs to be
+ tuned).
+
+
+ -Mike Isely
+ isely@pobox.com
+
diff --git a/Documentation/video4linux/README.saa7134 b/Documentation/video4linux/README.saa7134
new file mode 100644
index 000000000..b911f0871
--- /dev/null
+++ b/Documentation/video4linux/README.saa7134
@@ -0,0 +1,82 @@
+
+
+What is it?
+===========
+
+This is a v4l2/oss device driver for saa7130/33/34/35 based capture / TV
+boards. See http://www.semiconductors.philips.com/pip/saa7134hl for a
+description.
+
+
+Status
+======
+
+Almost everything is working. video, sound, tuner, radio, mpeg ts, ...
+
+As with bttv, card-specific tweaks are needed. Check CARDLIST for a
+list of known TV cards and saa7134-cards.c for the drivers card
+configuration info.
+
+
+Build
+=====
+
+Pick up videodev + v4l2 patches from http://bytesex.org/patches/.
+Configure, build, install + boot the new kernel. You'll need at least
+these config options:
+
+ CONFIG_I2C=m
+ CONFIG_VIDEO_DEV=m
+
+Type "make" to build the driver now. "make install" installs the
+driver. "modprobe saa7134" should load it. Depending on the card you
+might have to pass card=<nr> as insmod option, check CARDLIST for
+valid choices.
+
+
+Changes / Fixes
+===============
+
+Please mail me unified diffs ("diff -u") with your changes, and don't
+forget to tell me what it changes / which problem it fixes / whatever
+it is good for ...
+
+
+Known Problems
+==============
+
+* The tuner for the flyvideos isn't detected automatically and the
+ default might not work for you depending on which version you have.
+ There is a tuner= insmod option to override the driver's default.
+
+Card Variations:
+================
+
+Cards can use either of these two crystals (xtal):
+ - 32.11 MHz -> .audio_clock=0x187de7
+ - 24.576MHz -> .audio_clock=0x200000
+(xtal * .audio_clock = 51539600)
+
+Some details about 30/34/35:
+
+ - saa7130 - low-price chip, doesn't have mute, that is why all those
+ cards should have .mute field defined in their tuner structure.
+
+ - saa7134 - usual chip
+
+ - saa7133/35 - saa7135 is probably a marketing decision, since all those
+ chips identifies itself as 33 on pci.
+
+Credits
+=======
+
+andrew.stevens@philips.com + werner.leeb@philips.com for providing
+saa7134 hardware specs and sample board.
+
+
+Have fun,
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]
diff --git a/Documentation/video4linux/Zoran b/Documentation/video4linux/Zoran
new file mode 100644
index 000000000..b5a911fd0
--- /dev/null
+++ b/Documentation/video4linux/Zoran
@@ -0,0 +1,510 @@
+Frequently Asked Questions:
+===========================
+subject: unified zoran driver (zr360x7, zoran, buz, dc10(+), dc30(+), lml33)
+website: http://mjpeg.sourceforge.net/driver-zoran/
+
+1. What cards are supported
+1.1 What the TV decoder can do an what not
+1.2 What the TV encoder can do an what not
+2. How do I get this damn thing to work
+3. What mainboard should I use (or why doesn't my card work)
+4. Programming interface
+5. Applications
+6. Concerning buffer sizes, quality, output size etc.
+7. It hangs/crashes/fails/whatevers! Help!
+8. Maintainers/Contacting
+9. License
+
+===========================
+
+1. What cards are supported
+
+Iomega Buz, Linux Media Labs LML33/LML33R10, Pinnacle/Miro
+DC10/DC10+/DC30/DC30+ and related boards (available under various names).
+
+Iomega Buz:
+* Zoran zr36067 PCI controller
+* Zoran zr36060 MJPEG codec
+* Philips saa7111 TV decoder
+* Philips saa7185 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, saa7111, saa7185, zr36060, zr36067
+Inputs/outputs: Composite and S-video
+Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
+Card number: 7
+
+AverMedia 6 Eyes AVS6EYES:
+* Zoran zr36067 PCI controller
+* Zoran zr36060 MJPEG codec
+* Samsung ks0127 TV decoder
+* Conexant bt866 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, ks0127, bt866, zr36060, zr36067
+Inputs/outputs: Six physical inputs. 1-6 are composite,
+ 1-2, 3-4, 5-6 doubles as S-video,
+ 1-3 triples as component.
+ One composite output.
+Norms: PAL, SECAM (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
+Card number: 8
+Not autodetected, card=8 is necessary.
+
+Linux Media Labs LML33:
+* Zoran zr36067 PCI controller
+* Zoran zr36060 MJPEG codec
+* Brooktree bt819 TV decoder
+* Brooktree bt856 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, bt819, bt856, zr36060, zr36067
+Inputs/outputs: Composite and S-video
+Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
+Card number: 5
+
+Linux Media Labs LML33R10:
+* Zoran zr36067 PCI controller
+* Zoran zr36060 MJPEG codec
+* Philips saa7114 TV decoder
+* Analog Devices adv7170 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, saa7114, adv7170, zr36060, zr36067
+Inputs/outputs: Composite and S-video
+Norms: PAL (720x576 @ 25 fps), NTSC (720x480 @ 29.97 fps)
+Card number: 6
+
+Pinnacle/Miro DC10(new):
+* Zoran zr36057 PCI controller
+* Zoran zr36060 MJPEG codec
+* Philips saa7110a TV decoder
+* Analog Devices adv7176 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, saa7110, adv7175, zr36060, zr36067
+Inputs/outputs: Composite, S-video and Internal
+Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
+Card number: 1
+
+Pinnacle/Miro DC10+:
+* Zoran zr36067 PCI controller
+* Zoran zr36060 MJPEG codec
+* Philips saa7110a TV decoder
+* Analog Devices adv7176 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, sa7110, adv7175, zr36060, zr36067
+Inputs/outputs: Composite, S-video and Internal
+Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
+Card number: 2
+
+Pinnacle/Miro DC10(old): *
+* Zoran zr36057 PCI controller
+* Zoran zr36050 MJPEG codec
+* Zoran zr36016 Video Front End or Fuji md0211 Video Front End (clone?)
+* Micronas vpx3220a TV decoder
+* mse3000 TV encoder or Analog Devices adv7176 TV encoder *
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, vpx3220, mse3000/adv7175, zr36050, zr36016, zr36067
+Inputs/outputs: Composite, S-video and Internal
+Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
+Card number: 0
+
+Pinnacle/Miro DC30: *
+* Zoran zr36057 PCI controller
+* Zoran zr36050 MJPEG codec
+* Zoran zr36016 Video Front End
+* Micronas vpx3225d/vpx3220a/vpx3216b TV decoder
+* Analog Devices adv7176 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36016, zr36067
+Inputs/outputs: Composite, S-video and Internal
+Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
+Card number: 3
+
+Pinnacle/Miro DC30+: *
+* Zoran zr36067 PCI controller
+* Zoran zr36050 MJPEG codec
+* Zoran zr36016 Video Front End
+* Micronas vpx3225d/vpx3220a/vpx3216b TV decoder
+* Analog Devices adv7176 TV encoder
+Drivers to use: videodev, i2c-core, i2c-algo-bit,
+ videocodec, vpx3220/vpx3224, adv7175, zr36050, zr36015, zr36067
+Inputs/outputs: Composite, S-video and Internal
+Norms: PAL, SECAM (768x576 @ 25 fps), NTSC (640x480 @ 29.97 fps)
+Card number: 4
+
+Note: No module for the mse3000 is available yet
+Note: No module for the vpx3224 is available yet
+
+===========================
+
+1.1 What the TV decoder can do an what not
+
+The best know TV standards are NTSC/PAL/SECAM. but for decoding a frame that
+information is not enough. There are several formats of the TV standards.
+And not every TV decoder is able to handle every format. Also the every
+combination is supported by the driver. There are currently 11 different
+tv broadcast formats all aver the world.
+
+The CCIR defines parameters needed for broadcasting the signal.
+The CCIR has defined different standards: A,B,D,E,F,G,D,H,I,K,K1,L,M,N,...
+The CCIR says not much about the colorsystem used !!!
+And talking about a colorsystem says not to much about how it is broadcast.
+
+The CCIR standards A,E,F are not used any more.
+
+When you speak about NTSC, you usually mean the standard: CCIR - M using
+the NTSC colorsystem which is used in the USA, Japan, Mexico, Canada
+and a few others.
+
+When you talk about PAL, you usually mean: CCIR - B/G using the PAL
+colorsystem which is used in many Countries.
+
+When you talk about SECAM, you mean: CCIR - L using the SECAM Colorsystem
+which is used in France, and a few others.
+
+There the other version of SECAM, CCIR - D/K is used in Bulgaria, China,
+Slovakai, Hungary, Korea (Rep.), Poland, Rumania and a others.
+
+The CCIR - H uses the PAL colorsystem (sometimes SECAM) and is used in
+Egypt, Libya, Sri Lanka, Syrain Arab. Rep.
+
+The CCIR - I uses the PAL colorsystem, and is used in Great Britain, Hong Kong,
+Ireland, Nigeria, South Africa.
+
+The CCIR - N uses the PAL colorsystem and PAL frame size but the NTSC framerate,
+and is used in Argentinia, Uruguay, an a few others
+
+We do not talk about how the audio is broadcast !
+
+A rather good sites about the TV standards are:
+http://www.sony.jp/support/
+http://info.electronicwerkstatt.de/bereiche/fernsehtechnik/frequenzen_und_normen/Fernsehnormen/
+and http://www.cabl.com/restaurant/channel.html
+
+Other weird things around: NTSC 4.43 is a modificated NTSC, which is mainly
+used in PAL VCR's that are able to play back NTSC. PAL 60 seems to be the same
+as NTSC 4.43 . The Datasheets also talk about NTSC 44, It seems as if it would
+be the same as NTSC 4.43.
+NTSC Combs seems to be a decoder mode where the decoder uses a comb filter
+to split coma and luma instead of a Delay line.
+
+But I did not defiantly find out what NTSC Comb is.
+
+Philips saa7111 TV decoder
+was introduced in 1997, is used in the BUZ and
+can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC N, NTSC 4.43 and SECAM
+
+Philips saa7110a TV decoder
+was introduced in 1995, is used in the Pinnacle/Miro DC10(new), DC10+ and
+can handle: PAL B/G, NTSC M and SECAM
+
+Philips saa7114 TV decoder
+was introduced in 2000, is used in the LML33R10 and
+can handle: PAL B/G/D/H/I/N, PAL N, PAL M, NTSC M, NTSC 4.43 and SECAM
+
+Brooktree bt819 TV decoder
+was introduced in 1996, and is used in the LML33 and
+can handle: PAL B/D/G/H/I, NTSC M
+
+Micronas vpx3220a TV decoder
+was introduced in 1996, is used in the DC30 and DC30+ and
+can handle: PAL B/G/H/I, PAL N, PAL M, NTSC M, NTSC 44, PAL 60, SECAM,NTSC Comb
+
+Samsung ks0127 TV decoder
+is used in the AVS6EYES card and
+can handle: NTSC-M/N/44, PAL-M/N/B/G/H/I/D/K/L and SECAM
+
+===========================
+
+1.2 What the TV encoder can do an what not
+
+The TV encoder are doing the "same" as the decoder, but in the oder direction.
+You feed them digital data and the generate a Composite or SVHS signal.
+For information about the colorsystems and TV norm take a look in the
+TV decoder section.
+
+Philips saa7185 TV Encoder
+was introduced in 1996, is used in the BUZ
+can generate: PAL B/G, NTSC M
+
+Brooktree bt856 TV Encoder
+was introduced in 1994, is used in the LML33
+can generate: PAL B/D/G/H/I/N, PAL M, NTSC M, PAL-N (Argentina)
+
+Analog Devices adv7170 TV Encoder
+was introduced in 2000, is used in the LML300R10
+can generate: PAL B/D/G/H/I/N, PAL M, NTSC M, PAL 60
+
+Analog Devices adv7175 TV Encoder
+was introduced in 1996, is used in the DC10, DC10+, DC10 old, DC30, DC30+
+can generate: PAL B/D/G/H/I/N, PAL M, NTSC M
+
+ITT mse3000 TV encoder
+was introduced in 1991, is used in the DC10 old
+can generate: PAL , NTSC , SECAM
+
+Conexant bt866 TV encoder
+is used in AVS6EYES, and
+can generate: NTSC/PAL, PAL­M, PAL­N
+
+The adv717x, should be able to produce PAL N. But you find nothing PAL N
+specific in the registers. Seem that you have to reuse a other standard
+to generate PAL N, maybe it would work if you use the PAL M settings.
+
+==========================
+
+2. How do I get this damn thing to work
+
+Load zr36067.o. If it can't autodetect your card, use the card=X insmod
+option with X being the card number as given in the previous section.
+To have more than one card, use card=X1[,X2[,X3,[X4[..]]]]
+
+To automate this, add the following to your /etc/modprobe.d/zoran.conf:
+
+options zr36067 card=X1[,X2[,X3[,X4[..]]]]
+alias char-major-81-0 zr36067
+
+One thing to keep in mind is that this doesn't load zr36067.o itself yet. It
+just automates loading. If you start using xawtv, the device won't load on
+some systems, since you're trying to load modules as a user, which is not
+allowed ("permission denied"). A quick workaround is to add 'Load "v4l"' to
+XF86Config-4 when you use X by default, or to run 'v4l-conf -c <device>' in
+one of your startup scripts (normally rc.local) if you don't use X. Both
+make sure that the modules are loaded on startup, under the root account.
+
+===========================
+
+3. What mainboard should I use (or why doesn't my card work)
+
+<insert lousy disclaimer here>. In short: good=SiS/Intel, bad=VIA.
+
+Experience tells us that people with a Buz, on average, have more problems
+than users with a DC10+/LML33. Also, it tells us that people owning a VIA-
+based mainboard (ktXXX, MVP3) have more problems than users with a mainboard
+based on a different chipset. Here's some notes from Andrew Stevens:
+--
+Here's my experience of using LML33 and Buz on various motherboards:
+
+VIA MVP3
+ Forget it. Pointless. Doesn't work.
+Intel 430FX (Pentium 200)
+ LML33 perfect, Buz tolerable (3 or 4 frames dropped per movie)
+Intel 440BX (early stepping)
+ LML33 tolerable. Buz starting to get annoying (6-10 frames/hour)
+Intel 440BX (late stepping)
+ Buz tolerable, LML3 almost perfect (occasional single frame drops)
+SiS735
+ LML33 perfect, Buz tolerable.
+VIA KT133(*)
+ LML33 starting to get annoying, Buz poor enough that I have up.
+
+Both 440BX boards were dual CPU versions.
+--
+Bernhard Praschinger later added:
+--
+AMD 751
+ Buz perfect-tolerable
+AMD 760
+ Buz perfect-tolerable
+--
+In general, people on the user mailinglist won't give you much of a chance
+if you have a VIA-based motherboard. They may be cheap, but sometimes, you'd
+rather want to spend some more money on better boards. In general, VIA
+mainboard's IDE/PCI performance will also suck badly compared to others.
+You'll noticed the DC10+/DC30+ aren't mentioned anywhere in the overview.
+Basically, you can assume that if the Buz works, the LML33 will work too. If
+the LML33 works, the DC10+/DC30+ will work too. They're most tolerant to
+different mainboard chipsets from all of the supported cards.
+
+If you experience timeouts during capture, buy a better mainboard or lower
+the quality/buffersize during capture (see 'Concerning buffer sizes, quality,
+output size etc.'). If it hangs, there's little we can do as of now. Check
+your IRQs and make sure the card has its own interrupts.
+
+===========================
+
+4. Programming interface
+
+This driver conforms to video4linux2. Support for V4L1 and for the custom
+zoran ioctls has been removed in kernel 2.6.38.
+
+For programming example, please, look at lavrec.c and lavplay.c code in
+the MJPEG-tools (http://mjpeg.sf.net/).
+
+Additional notes for software developers:
+
+ The driver returns maxwidth and maxheight parameters according to
+ the current TV standard (norm). Therefore, the software which
+ communicates with the driver and "asks" for these parameters should
+ first set the correct norm. Well, it seems logically correct: TV
+ standard is "more constant" for current country than geometry
+ settings of a variety of TV capture cards which may work in ITU or
+ square pixel format.
+
+===========================
+
+5. Applications
+
+Applications known to work with this driver:
+
+TV viewing:
+* xawtv
+* kwintv
+* probably any TV application that supports video4linux or video4linux2.
+
+MJPEG capture/playback:
+* mjpegtools/lavtools (or Linux Video Studio)
+* gstreamer
+* mplayer
+
+General raw capture:
+* xawtv
+* gstreamer
+* probably any application that supports video4linux or video4linux2
+
+Video editing:
+* Cinelerra
+* MainActor
+* mjpegtools (or Linux Video Studio)
+
+===========================
+
+6. Concerning buffer sizes, quality, output size etc.
+
+The zr36060 can do 1:2 JPEG compression. This is really the theoretical
+maximum that the chipset can reach. The driver can, however, limit compression
+to a maximum (size) of 1:4. The reason for this is that some cards (e.g. Buz)
+can't handle 1:2 compression without stopping capture after only a few minutes.
+With 1:4, it'll mostly work. If you have a Buz, use 'low_bitrate=1' to go into
+1:4 max. compression mode.
+
+100% JPEG quality is thus 1:2 compression in practice. So for a full PAL frame
+(size 720x576). The JPEG fields are stored in YUY2 format, so the size of the
+fields are 720x288x16/2 bits/field (2 fields/frame) = 207360 bytes/field x 2 =
+414720 bytes/frame (add some more bytes for headers and DHT (huffman)/DQT
+(quantization) tables, and you'll get to something like 512kB per frame for
+1:2 compression. For 1:4 compression, you'd have frames of half this size.
+
+Some additional explanation by Martin Samuelsson, which also explains the
+importance of buffer sizes:
+--
+> Hmm, I do not think it is really that way. With the current (downloaded
+> at 18:00 Monday) driver I get that output sizes for 10 sec:
+> -q 50 -b 128 : 24.283.332 Bytes
+> -q 50 -b 256 : 48.442.368
+> -q 25 -b 128 : 24.655.992
+> -q 25 -b 256 : 25.859.820
+
+I woke up, and can't go to sleep again. I'll kill some time explaining why
+this doesn't look strange to me.
+
+Let's do some math using a width of 704 pixels. I'm not sure whether the Buz
+actually use that number or not, but that's not too important right now.
+
+704x288 pixels, one field, is 202752 pixels. Divided by 64 pixels per block;
+3168 blocks per field. Each pixel consist of two bytes; 128 bytes per block;
+1024 bits per block. 100% in the new driver mean 1:2 compression; the maximum
+output becomes 512 bits per block. Actually 510, but 512 is simpler to use
+for calculations.
+
+Let's say that we specify d1q50. We thus want 256 bits per block; times 3168
+becomes 811008 bits; 101376 bytes per field. We're talking raw bits and bytes
+here, so we don't need to do any fancy corrections for bits-per-pixel or such
+things. 101376 bytes per field.
+
+d1 video contains two fields per frame. Those sum up to 202752 bytes per
+frame, and one of those frames goes into each buffer.
+
+But wait a second! -b128 gives 128kB buffers! It's not possible to cram
+202752 bytes of JPEG data into 128kB!
+
+This is what the driver notice and automatically compensate for in your
+examples. Let's do some math using this information:
+
+128kB is 131072 bytes. In this buffer, we want to store two fields, which
+leaves 65536 bytes for each field. Using 3168 blocks per field, we get
+20.68686868... available bytes per block; 165 bits. We can't allow the
+request for 256 bits per block when there's only 165 bits available! The -q50
+option is silently overridden, and the -b128 option takes precedence, leaving
+us with the equivalence of -q32.
+
+This gives us a data rate of 165 bits per block, which, times 3168, sums up
+to 65340 bytes per field, out of the allowed 65536. The current driver has
+another level of rate limiting; it won't accept -q values that fill more than
+6/8 of the specified buffers. (I'm not sure why. "Playing it safe" seem to be
+a safe bet. Personally, I think I would have lowered requested-bits-per-block
+by one, or something like that.) We can't use 165 bits per block, but have to
+lower it again, to 6/8 of the available buffer space: We end up with 124 bits
+per block, the equivalence of -q24. With 128kB buffers, you can't use greater
+than -q24 at -d1. (And PAL, and 704 pixels width...)
+
+The third example is limited to -q24 through the same process. The second
+example, using very similar calculations, is limited to -q48. The only
+example that actually grab at the specified -q value is the last one, which
+is clearly visible, looking at the file size.
+--
+
+Conclusion: the quality of the resulting movie depends on buffer size, quality,
+whether or not you use 'low_bitrate=1' as insmod option for the zr36060.c
+module to do 1:4 instead of 1:2 compression, etc.
+
+If you experience timeouts, lowering the quality/buffersize or using
+'low_bitrate=1 as insmod option for zr36060.o might actually help, as is
+proven by the Buz.
+
+===========================
+
+7. It hangs/crashes/fails/whatevers! Help!
+
+Make sure that the card has its own interrupts (see /proc/interrupts), check
+the output of dmesg at high verbosity (load zr36067.o with debug=2,
+load all other modules with debug=1). Check that your mainboard is favorable
+(see question 2) and if not, test the card in another computer. Also see the
+notes given in question 3 and try lowering quality/buffersize/capturesize
+if recording fails after a period of time.
+
+If all this doesn't help, give a clear description of the problem including
+detailed hardware information (memory+brand, mainboard+chipset+brand, which
+MJPEG card, processor, other PCI cards that might be of interest), give the
+system PnP information (/proc/interrupts, /proc/dma, /proc/devices), and give
+the kernel version, driver version, glibc version, gcc version and any other
+information that might possibly be of interest. Also provide the dmesg output
+at high verbosity. See 'Contacting' on how to contact the developers.
+
+===========================
+
+8. Maintainers/Contacting
+
+The driver is currently maintained by Laurent Pinchart and Ronald Bultje
+(<laurent.pinchart@skynet.be> and <rbultje@ronald.bitfreak.net>). For bug
+reports or questions, please contact the mailinglist instead of the developers
+individually. For user questions (i.e. bug reports or how-to questions), send
+an email to <mjpeg-users@lists.sf.net>, for developers (i.e. if you want to
+help programming), send an email to <mjpeg-developer@lists.sf.net>. See
+http://www.sf.net/projects/mjpeg/ for subscription information.
+
+For bug reports, be sure to include all the information as described in
+the section 'It hangs/crashes/fails/whatevers! Help!'. Please make sure
+you're using the latest version (http://mjpeg.sf.net/driver-zoran/).
+
+Previous maintainers/developers of this driver include Serguei Miridonov
+<mirsev@cicese.mx>, Wolfgang Scherr <scherr@net4you.net>, Dave Perks
+<dperks@ibm.net> and Rainer Johanni <Rainer@Johanni.de>.
+
+===========================
+
+9. License
+
+This driver is distributed under the terms of the General Public License.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+See http://www.gnu.org/ for more information.
diff --git a/Documentation/video4linux/bttv/CONTRIBUTORS b/Documentation/video4linux/bttv/CONTRIBUTORS
new file mode 100644
index 000000000..eb41b2650
--- /dev/null
+++ b/Documentation/video4linux/bttv/CONTRIBUTORS
@@ -0,0 +1,25 @@
+Contributors to bttv:
+
+Michael Chu <mmchu@pobox.com>
+ AverMedia fix and more flexible card recognition
+
+Alan Cox <alan@lxorguk.ukuu.org.uk>
+ Video4Linux interface and 2.1.x kernel adaptation
+
+Chris Kleitsch
+ Hardware I2C
+
+Gerd Knorr <kraxel@cs.tu-berlin.de>
+ Radio card (ITT sound processor)
+
+bigfoot <bigfoot@net-way.net>
+Ragnar Hojland Espinosa <ragnar@macula.net>
+ ConferenceTV card
+
+
++ many more (please mail me if you are missing in this list and would
+ like to be mentioned)
+
+
+
+
diff --git a/Documentation/video4linux/bttv/Cards b/Documentation/video4linux/bttv/Cards
new file mode 100644
index 000000000..a8fb6e2d3
--- /dev/null
+++ b/Documentation/video4linux/bttv/Cards
@@ -0,0 +1,960 @@
+
+Gunther Mayer's bttv card gallery (graphical version of this text file :-)
+is available at: http://www.bttv-gallery.de/
+
+
+Supported cards:
+Bt848/Bt848a/Bt849/Bt878/Bt879 cards
+------------------------------------
+
+All cards with Bt848/Bt848a/Bt849/Bt878/Bt879 and normal
+Composite/S-VHS inputs are supported. Teletext and Intercast support
+(PAL only) for ALL cards via VBI sample decoding in software.
+
+Some cards with additional multiplexing of inputs or other additional
+fancy chips are only partially supported (unless specifications by the
+card manufacturer are given). When a card is listed here it isn't
+necessarily fully supported.
+
+All other cards only differ by additional components as tuners, sound
+decoders, EEPROMs, teletext decoders ...
+
+
+Unsupported Cards:
+------------------
+
+Cards with Zoran (ZR) or Philips (SAA) or ISA are not supported by
+this driver.
+
+
+MATRIX Vision
+-------------
+
+MV-Delta
+- Bt848A
+- 4 Composite inputs, 1 S-VHS input (shared with 4th composite)
+- EEPROM
+
+http://www.matrix-vision.de/
+
+This card has no tuner but supports all 4 composite (1 shared with an
+S-VHS input) of the Bt848A.
+Very nice card if you only have satellite TV but several tuners connected
+to the card via composite.
+
+Many thanks to Matrix-Vision for giving us 2 cards for free which made
+Bt848a/Bt849 single crystal operation support possible!!!
+
+
+
+Miro/Pinnacle PCTV
+------------------
+
+- Bt848
+ some (all??) come with 2 crystals for PAL/SECAM and NTSC
+- PAL, SECAM or NTSC TV tuner (Philips or TEMIC)
+- MSP34xx sound decoder on add on board
+ decoder is supported but AFAIK does not yet work
+ (other sound MUX setting in GPIO port needed??? somebody who fixed this???)
+- 1 tuner, 1 composite and 1 S-VHS input
+- tuner type is autodetected
+
+http://www.miro.de/
+http://www.miro.com/
+
+
+Many thanks for the free card which made first NTSC support possible back
+in 1997!
+
+
+Hauppauge Win/TV pci
+--------------------
+
+There are many different versions of the Hauppauge cards with different
+tuners (TV+Radio ...), teletext decoders.
+Note that even cards with same model numbers have (depending on the revision)
+different chips on it.
+
+- Bt848 (and others but always in 2 crystal operation???)
+ newer cards have a Bt878
+- PAL, SECAM, NTSC or tuner with or without Radio support
+
+e.g.:
+ PAL:
+ TDA5737: VHF, hyperband and UHF mixer/oscillator for TV and VCR 3-band tuners
+ TSA5522: 1.4 GHz I2C-bus controlled synthesizer, I2C 0xc2-0xc3
+
+ NTSC:
+ TDA5731: VHF, hyperband and UHF mixer/oscillator for TV and VCR 3-band tuners
+ TSA5518: no datasheet available on Philips site
+- Philips SAA5246 or SAA5284 ( or no) Teletext decoder chip
+ with buffer RAM (e.g. Winbond W24257AS-35: 32Kx8 CMOS static RAM)
+ SAA5246 (I2C 0x22) is supported
+- 256 bytes EEPROM: Microchip 24LC02B or Philips 8582E2Y
+ with configuration information
+ I2C address 0xa0 (24LC02B also responds to 0xa2-0xaf)
+- 1 tuner, 1 composite and (depending on model) 1 S-VHS input
+- 14052B: mux for selection of sound source
+- sound decoder: TDA9800, MSP34xx (stereo cards)
+
+
+Askey CPH-Series
+----------------
+Developed by TelSignal(?), OEMed by many vendors (Typhoon, Anubis, Dynalink)
+
+ Card series:
+ CPH01x: BT848 capture only
+ CPH03x: BT848
+ CPH05x: BT878 with FM
+ CPH06x: BT878 (w/o FM)
+ CPH07x: BT878 capture only
+
+ TV standards:
+ CPH0x0: NTSC-M/M
+ CPH0x1: PAL-B/G
+ CPH0x2: PAL-I/I
+ CPH0x3: PAL-D/K
+ CPH0x4: SECAM-L/L
+ CPH0x5: SECAM-B/G
+ CPH0x6: SECAM-D/K
+ CPH0x7: PAL-N/N
+ CPH0x8: PAL-B/H
+ CPH0x9: PAL-M/M
+
+ CPH03x was often sold as "TV capturer".
+
+ Identifying:
+ 1) 878 cards can be identified by PCI Subsystem-ID:
+ 144f:3000 = CPH06x
+ 144F:3002 = CPH05x w/ FM
+ 144F:3005 = CPH06x_LC (w/o remote control)
+ 1) The cards have a sticker with "CPH"-model on the back.
+ 2) These cards have a number printed on the PCB just above the tuner metal box:
+ "80-CP2000300-x" = CPH03X
+ "80-CP2000500-x" = CPH05X
+ "80-CP2000600-x" = CPH06X / CPH06x_LC
+
+ Askey sells these cards as "Magic TView series", Brand "MagicXpress".
+ Other OEM often call these "Tview", "TView99" or else.
+
+Lifeview Flyvideo Series:
+-------------------------
+ The naming of these series differs in time and space.
+
+ Identifying:
+ 1) Some models can be identified by PCI subsystem ID:
+ 1852:1852 = Flyvideo 98 FM
+ 1851:1850 = Flyvideo 98
+ 1851:1851 = Flyvideo 98 EZ (capture only)
+ 2) There is a print on the PCB:
+ LR25 = Flyvideo (Zoran ZR36120, SAA7110A)
+ LR26 Rev.N = Flyvideo II (Bt848)
+ Rev.O = Flyvideo II (Bt878)
+ LR37 Rev.C = Flyvideo EZ (Capture only, ZR36120 + SAA7110)
+ LR38 Rev.A1= Flyvideo II EZ (Bt848 capture only)
+ LR50 Rev.Q = Flyvideo 98 (w/eeprom and PCI subsystem ID)
+ Rev.W = Flyvideo 98 (no eeprom)
+ LR51 Rev.E = Flyvideo 98 EZ (capture only)
+ LR90 = Flyvideo 2000 (Bt878)
+ Flyvideo 2000S (Bt878) w/Stereo TV (Package incl. LR91 daughterboard)
+ LR91 = Stereo daughter card for LR90
+ LR97 = Flyvideo DVBS
+ LR99 Rev.E = Low profile card for OEM integration (only internal audio!) bt878
+ LR136 = Flyvideo 2100/3100 (Low profile, SAA7130/SAA7134)
+ LR137 = Flyvideo DV2000/DV3000 (SAA7130/SAA7134 + IEEE1394)
+ LR138 Rev.C= Flyvideo 2000 (SAA7130)
+ or Flyvideo 3000 (SAA7134) w/Stereo TV
+ These exist in variations w/FM and w/Remote sometimes denoted
+ by suffixes "FM" and "R".
+ 3) You have a laptop (miniPCI card):
+ Product = FlyTV Platinum Mini
+ Model/Chip = LR212/saa7135
+
+ Lifeview.com.tw states (Feb. 2002):
+ "The FlyVideo2000 and FlyVideo2000s product name have renamed to FlyVideo98."
+ Their Bt8x8 cards are listed as discontinued.
+ Flyvideo 2000S was probably sold as Flyvideo 3000 in some contries(Europe?).
+ The new Flyvideo 2000/3000 are SAA7130/SAA7134 based.
+
+ "Flyvideo II" had been the name for the 848 cards, nowadays (in Germany)
+ this name is re-used for LR50 Rev.W.
+ The Lifeview website mentioned Flyvideo III at some time, but such a card
+ has not yet been seen (perhaps it was the german name for LR90 [stereo]).
+ These cards are sold by many OEMs too.
+
+ FlyVideo A2 (Elta 8680)= LR90 Rev.F (w/Remote, w/o FM, stereo TV by tda9821) {Germany}
+ Lifeview 3000 (Elta 8681) as sold by Plus(April 2002), Germany = LR138 w/ saa7134
+
+
+Typhoon TV card series:
+-----------------------
+ These can be CPH, Flyvideo, Pixelview or KNC1 series.
+ Typhoon is the brand of Anubis.
+ Model 50680 got re-used, some model no. had different contents over time.
+
+ Models:
+ 50680 "TV Tuner PCI Pal BG"(old,red package)=can be CPH03x(bt848) or CPH06x(bt878)
+ 50680 "TV Tuner Pal BG" (blue package)= Pixelview PV-BT878P+ (Rev 9B)
+ 50681 "TV Tuner PCI Pal I" (variant of 50680)
+ 50682 "TView TV/FM Tuner Pal BG" = Flyvideo 98FM (LR50 Rev.Q)
+ Note: The package has a picture of CPH05x (which would be a real TView)
+ 50683 "TV Tuner PCI SECAM" (variant of 50680)
+ 50684 "TV Tuner Pal BG" = Pixelview 878TV(Rev.3D)
+ 50686 "TV Tuner" = KNC1 TV Station
+ 50687 "TV Tuner stereo" = KNC1 TV Station pro
+ 50688 "TV Tuner RDS" (black package) = KNC1 TV Station RDS
+ 50689 TV SAT DVB-S CARD CI PCI (SAA7146AH, SU1278?) = "KNC1 TV Station DVB-S"
+ 50692 "TV/FM Tuner" (small PCB)
+ 50694 TV TUNER CARD RDS (PHILIPS CHIPSET SAA7134HL)
+ 50696 TV TUNER STEREO (PHILIPS CHIPSET SAA7134HL, MK3ME Tuner)
+ 50804 PC-SAT TV/Audio Karte = Techni-PC-Sat (ZORAN 36120PQC, Tuner:Alps)
+ 50866 TVIEW SAT RECEIVER+ADR
+ 50868 "TV/FM Tuner Pal I" (variant of 50682)
+ 50999 "TV/FM Tuner Secam" (variant of 50682)
+
+
+Guillemot
+---------
+ Maxi-TV PCI (ZR36120)
+ Maxi TV Video 2 = LR50 Rev.Q (FI1216MF, PAL BG+SECAM)
+ Maxi TV Video 3 = CPH064 (PAL BG + SECAM)
+
+Mentor
+------
+ Mentor TV card ("55-878TV-U1") = Pixelview 878TV(Rev.3F) (w/FM w/Remote)
+
+Prolink
+-------
+ TV cards:
+ PixelView Play TV pro - (Model: PV-BT878P+ REV 8E)
+ PixelView Play TV pro - (Model: PV-BT878P+ REV 9D)
+ PixelView Play TV pro - (Model: PV-BT878P+ REV 4C / 8D / 10A )
+ PixelView Play TV - (Model: PV-BT848P+)
+ 878TV - (Model: PV-BT878TV)
+
+ Multimedia TV packages (card + software pack):
+ PixelView Play TV Theater - (Model: PV-M4200) = PixelView Play TV pro + Software
+ PixelView Play TV PAK - (Model: PV-BT878P+ REV 4E)
+ PixelView Play TV/VCR - (Model: PV-M3200 REV 4C / 8D / 10A )
+ PixelView Studio PAK - (Model: M2200 REV 4C / 8D / 10A )
+ PixelView PowerStudio PAK - (Model: PV-M3600 REV 4E)
+ PixelView DigitalVCR PAK - (Model: PV-M2400 REV 4C / 8D / 10A )
+
+ PixelView PlayTV PAK II (TV/FM card + usb camera) PV-M3800
+ PixelView PlayTV XP PV-M4700,PV-M4700(w/FM)
+ PixelView PlayTV DVR PV-M4600 package contents:PixelView PlayTV pro, windvr & videoMail s/w
+
+ Further Cards:
+ PV-BT878P+rev.9B (Play TV Pro, opt. w/FM w/NICAM)
+ PV-BT878P+rev.2F
+ PV-BT878P Rev.1D (bt878, capture only)
+
+ XCapture PV-CX881P (cx23881)
+ PlayTV HD PV-CX881PL+, PV-CX881PL+(w/FM) (cx23881)
+
+ DTV3000 PV-DTV3000P+ DVB-S CI = Twinhan VP-1030
+ DTV2000 DVB-S = Twinhan VP-1020
+
+ Video Conferencing:
+ PixelView Meeting PAK - (Model: PV-BT878P)
+ PixelView Meeting PAK Lite - (Model: PV-BT878P)
+ PixelView Meeting PAK plus - (Model: PV-BT878P+rev 4C/8D/10A)
+ PixelView Capture - (Model: PV-BT848P)
+
+ PixelView PlayTV USB pro
+ Model No. PV-NT1004+, PV-NT1004+ (w/FM) = NT1004 USB decoder chip + SAA7113 video decoder chip
+
+Dynalink
+--------
+ These are CPH series.
+
+Phoebemicro
+-----------
+ TV Master = CPH030 or CPH060
+ TV Master FM = CPH050
+
+Genius/Kye
+----------
+ Video Wonder/Genius Internet Video Kit = LR37 Rev.C
+ Video Wonder Pro II (848 or 878) = LR26
+
+Tekram
+------
+ VideoCap C205 (Bt848)
+ VideoCap C210 (zr36120 +Philips)
+ CaptureTV M200 (ISA)
+ CaptureTV M205 (Bt848)
+
+Lucky Star
+----------
+ Image World Conference TV = LR50 Rev. Q
+
+Leadtek
+-------
+ WinView 601 (Bt848)
+ WinView 610 (Zoran)
+ WinFast2000
+ WinFast2000 XP
+
+KNC One
+-------
+ TV-Station
+ TV-Station SE (+Software Bundle)
+ TV-Station pro (+TV stereo)
+ TV-Station FM (+Radio)
+ TV-Station RDS (+RDS)
+ TV Station SAT (analog satellite)
+ TV-Station DVB-S
+
+ newer Cards have saa7134, but model name stayed the same?
+
+Provideo
+--------
+ PV951 or PV-951 (also are sold as:
+ Boeder TV-FM Video Capture Card
+ Titanmedia Supervision TV-2400
+ Provideo PV951 TF
+ 3DeMon PV951
+ MediaForte TV-Vision PV951
+ Yoko PV951
+ Vivanco Tuner Card PCI Art.-Nr.: 68404
+ ) now named PV-951T
+
+ Surveillance Series
+ PV-141
+ PV-143
+ PV-147
+ PV-148 (capture only)
+ PV-150
+ PV-151
+
+ TV-FM Tuner Series
+ PV-951TDV (tv tuner + 1394)
+ PV-951T/TF
+ PV-951PT/TF
+ PV-956T/TF Low Profile
+ PV-911
+
+Highscreen
+----------
+ TV Karte = LR50 Rev.S
+ TV-Boostar = Terratec Terra TV+ Version 1.0 (Bt848, tda9821) "ceb105.pcb"
+
+Zoltrix
+-------
+ Face to Face Capture (Bt848 capture only) (PCB "VP-2848")
+ Face To Face TV MAX (Bt848) (PCB "VP-8482 Rev1.3")
+ Genie TV (Bt878) (PCB "VP-8790 Rev 2.1")
+ Genie Wonder Pro
+
+AVerMedia
+---------
+ AVer FunTV Lite (ISA, AV3001 chipset) "M101.C"
+ AVerTV
+ AVerTV Stereo
+ AVerTV Studio (w/FM)
+ AVerMedia TV98 with Remote
+ AVerMedia TV/FM98 Stereo
+ AVerMedia TVCAM98
+ TVCapture (Bt848)
+ TVPhone (Bt848)
+ TVCapture98 (="AVerMedia TV98" in USA) (Bt878)
+ TVPhone98 (Bt878, w/FM)
+
+ PCB PCI-ID Model-Name Eeprom Tuner Sound Country
+ --------------------------------------------------------------------
+ M101.C ISA !
+ M108-B Bt848 -- FR1236 US (2),(3)
+ M1A8-A Bt848 AVer TV-Phone FM1216 --
+ M168-T 1461:0003 AVerTV Studio 48:17 FM1216 TDA9840T D (1) w/FM w/Remote
+ M168-U 1461:0004 TVCapture98 40:11 FI1216 -- D w/Remote
+ M168II-B 1461:0003 Medion MD9592 48:16 FM1216 TDA9873H D w/FM
+
+ (1) Daughterboard MB68-A with TDA9820T and TDA9840T
+ (2) Sony NE41S soldered (stereo sound?)
+ (3) Daughterboard M118-A w/ pic 16c54 and 4 MHz quartz
+
+ US site has different drivers for (as of 09/2002):
+ EZ Capture/InterCam PCI (BT-848 chip)
+ EZ Capture/InterCam PCI (BT-878 chip)
+ TV-Phone (BT-848 chip)
+ TV98 (BT-848 chip)
+ TV98 With Remote (BT-848 chip)
+ TV98 (BT-878 chip)
+ TV98 With Remote (BT-878)
+ TV/FM98 (BT-878 chip)
+ AVerTV
+ AverTV Stereo
+ AVerTV Studio
+
+ DE hat diverse Treiber fuer diese Modelle (Stand 09/2002):
+ TVPhone (848) mit Philips tuner FR12X6 (w/ FM radio)
+ TVPhone (848) mit Philips tuner FM12X6 (w/ FM radio)
+ TVCapture (848) w/Philips tuner FI12X6
+ TVCapture (848) non-Philips tuner
+ TVCapture98 (Bt878)
+ TVPhone98 (Bt878)
+ AVerTV und TVCapture98 w/VCR (Bt 878)
+ AVerTVStudio und TVPhone98 w/VCR (Bt878)
+ AVerTV GO Serie (Kein SVideo Input)
+ AVerTV98 (BT-878 chip)
+ AVerTV98 mit Fernbedienung (BT-878 chip)
+ AVerTV/FM98 (BT-878 chip)
+
+ VDOmate (www.averm.com.cn) = M168U ?
+
+Aimslab
+-------
+ Video Highway or "Video Highway TR200" (ISA)
+ Video Highway Xtreme (aka "VHX") (Bt848, FM w/ TEA5757)
+
+IXMicro (former: IMS=Integrated Micro Solutions)
+-------
+ IXTV BT848 (=TurboTV)
+ IXTV BT878
+ IMS TurboTV (Bt848)
+
+Lifetec/Medion/Tevion/Aldi
+--------------------------
+ LT9306/MD9306 = CPH061
+ LT9415/MD9415 = LR90 Rev.F or Rev.G
+ MD9592 = Avermedia TVphone98 (PCI_ID=1461:0003), PCB-Rev=M168II-B (w/TDA9873H)
+ MD9717 = KNC One (Rev D4, saa7134, FM1216 MK2 tuner)
+ MD5044 = KNC One (Rev D4, saa7134, FM1216ME MK3 tuner)
+
+Modular Technologies (www.modulartech.com) UK
+---------------------------------------------
+ MM100 PCTV (Bt848)
+ MM201 PCTV (Bt878, Bt832) w/ Quartzsight camera
+ MM202 PCTV (Bt878, Bt832, tda9874)
+ MM205 PCTV (Bt878)
+ MM210 PCTV (Bt878) (Galaxy TV, Galaxymedia ?)
+
+Terratec
+--------
+ Terra TV+ Version 1.0 (Bt848), "ceb105.PCB" printed on the PCB, TDA9821
+ Terra TV+ Version 1.1 (Bt878), "LR74 Rev.E" printed on the PCB, TDA9821
+ Terra TValueRadio, "LR102 Rev.C" printed on the PCB
+ Terra TV/Radio+ Version 1.0, "80-CP2830100-0" TTTV3 printed on the PCB,
+ "CPH010-E83" on the back, SAA6588T, TDA9873H
+ Terra TValue Version BT878, "80-CP2830110-0 TTTV4" printed on the PCB,
+ "CPH011-D83" on back
+ Terra TValue Version 1.0 "ceb105.PCB" (really identical to Terra TV+ Version 1.0)
+ Terra TValue New Revision "LR102 Rec.C"
+ Terra Active Radio Upgrade (tea5757h, saa6588t)
+
+ LR74 is a newer PCB revision of ceb105 (both incl. connector for Active Radio Upgrade)
+
+ Cinergy 400 (saa7134), "E877 11(S)", "PM820092D" printed on PCB
+ Cinergy 600 (saa7134)
+
+Technisat
+---------
+ Discos ADR PC-Karte ISA (no TV!)
+ Discos ADR PC-Karte PCI (probably no TV?)
+ Techni-PC-Sat (Sat. analog)
+ Rev 1.2 (zr36120, vpx3220, stv0030, saa5246, BSJE3-494A)
+ Mediafocus I (zr36120/zr36125, drp3510, Sat. analog + ADR Radio)
+ Mediafocus II (saa7146, Sat. analog)
+ SatADR Rev 2.1 (saa7146a, saa7113h, stv0056a, msp3400c, drp3510a, BSKE3-307A)
+ SkyStar 1 DVB (AV7110) = Technotrend Premium
+ SkyStar 2 DVB (B2C2) (=Sky2PC)
+
+Siemens
+-------
+ Multimedia eXtension Board (MXB) (SAA7146, SAA7111)
+
+Powercolor
+----------
+ MTV878
+ Package comes with different contents:
+ a) pcb "MTV878" (CARD=75)
+ b) Pixelview Rev. 4_
+ MTV878R w/Remote Control
+ MTV878F w/Remote Control w/FM radio
+
+Pinnacle
+--------
+ Mirovideo PCTV (Bt848)
+ Mirovideo PCTV SE (Bt848)
+ Mirovideo PCTV Pro (Bt848 + Daughterboard for TV Stereo and FM)
+ Studio PCTV Rave (Bt848 Version = Mirovideo PCTV)
+ Studio PCTV Rave (Bt878 package w/o infrared)
+ Studio PCTV (Bt878)
+ Studio PCTV Pro (Bt878 stereo w/ FM)
+ Pinnacle PCTV (Bt878, MT2032)
+ Pinnacle PCTV Pro (Bt878, MT2032)
+ Pinncale PCTV Sat (bt878a, HM1821/1221) ["Conexant CX24110 with CX24108 tuner, aka HM1221/HM1811"]
+ Pinnacle PCTV Sat XE
+
+ M(J)PEG capture and playback:
+ DC1+ (ISA)
+ DC10 (zr36057, zr36060, saa7110, adv7176)
+ DC10+ (zr36067, zr36060, saa7110, adv7176)
+ DC20 (ql16x24b,zr36050, zr36016, saa7110, saa7187 ...)
+ DC30 (zr36057, zr36050, zr36016, vpx3220, adv7176, ad1843, tea6415, miro FST97A1)
+ DC30+ (zr36067, zr36050, zr36016, vpx3220, adv7176)
+ DC50 (zr36067, zr36050, zr36016, saa7112, adv7176 (2 pcs.?), ad1843, miro FST97A1, Lattice ???)
+
+Lenco
+-----
+ MXR-9565 (=Technisat Mediafocus?)
+ MXR-9571 (Bt848) (=CPH031?)
+ MXR-9575
+ MXR-9577 (Bt878) (=Prolink 878TV Rev.3x)
+ MXTV-9578CP (Bt878) (= Prolink PV-BT878P+4E)
+
+Iomega
+------
+ Buz (zr36067, zr36060, saa7111, saa7185)
+
+LML
+---
+ LML33 (zr36067, zr36060, bt819, bt856)
+
+Grandtec
+--------
+ Grand Video Capture (Bt848)
+ Multi Capture Card (Bt878)
+
+Koutech
+-------
+ KW-606 (Bt848)
+ KW-607 (Bt848 capture only)
+ KW-606RSF
+ KW-607A (capture only)
+ KW-608 (Zoran capture only)
+
+IODATA (jp)
+------
+ GV-BCTV/PCI
+ GV-BCTV2/PCI
+ GV-BCTV3/PCI
+ GV-BCTV4/PCI
+ GV-VCP/PCI (capture only)
+ GV-VCP2/PCI (capture only)
+
+Canopus (jp)
+-------
+ WinDVR = Kworld "KW-TVL878RF"
+
+www.sigmacom.co.kr
+------------------
+ Sigma Cyber TV II
+
+www.sasem.co.kr
+---------------
+ Litte OnAir TV
+
+hama
+----
+ TV/Radio-Tuner Card, PCI (Model 44677) = CPH051
+
+Sigma Designs
+-------------
+ Hollywood plus (em8300, em9010, adv7175), (PCB "M340-10") MPEG DVD decoder
+
+Formac
+------
+ iProTV (Card for iMac Mezzanine slot, Bt848+SCSI)
+ ProTV (Bt848)
+ ProTV II = ProTV Stereo (Bt878) ["stereo" means FM stereo, tv is still mono]
+
+ATI
+---
+ TV-Wonder
+ TV-Wonder VE
+
+Diamond Multimedia
+------------------
+ DTV2000 (Bt848, tda9875)
+
+Aopen
+-----
+ VA1000 Plus (w/ Stereo)
+ VA1000 Lite
+ VA1000 (=LR90)
+
+Intel
+-----
+ Smart Video Recorder (ISA full-length)
+ Smart Video Recorder pro (ISA half-length)
+ Smart Video Recorder III (Bt848)
+
+STB
+---
+ STB Gateway 6000704 (bt878)
+ STB Gateway 6000699 (bt848)
+ STB Gateway 6000402 (bt848)
+ STB TV130 PCI
+
+Videologic
+----------
+ Captivator Pro/TV (ISA?)
+ Captivator PCI/VC (Bt848 bundled with camera) (capture only)
+
+Technotrend
+------------
+ TT-SAT PCI (PCB "Sat-PCI Rev.:1.3.1"; zr36125, vpx3225d, stc0056a, Tuner:BSKE6-155A
+ TT-DVB-Sat
+ revisions 1.1, 1.3, 1.5, 1.6 and 2.1
+ This card is sold as OEM from:
+ Siemens DVB-s Card
+ Hauppauge WinTV DVB-S
+ Technisat SkyStar 1 DVB
+ Galaxis DVB Sat
+ Now this card is called TT-PCline Premium Family
+ TT-Budget (saa7146, bsru6-701a)
+ This card is sold as OEM from:
+ Hauppauge WinTV Nova
+ Satelco Standard PCI (DVB-S)
+ TT-DVB-C PCI
+
+Teles
+-----
+ DVB-s (Rev. 2.2, BSRV2-301A, data only?)
+
+Remote Vision
+-------------
+ MX RV605 (Bt848 capture only)
+
+Boeder
+------
+ PC ChatCam (Model 68252) (Bt848 capture only)
+ Tv/Fm Capture Card (Model 68404) = PV951
+
+Media-Surfer (esc-kathrein.de)
+-------------------------------
+ Sat-Surfer (ISA)
+ Sat-Surfer PCI = Techni-PC-Sat
+ Cable-Surfer 1
+ Cable-Surfer 2
+ Cable-Surfer PCI (zr36120)
+ Audio-Surfer (ISA Radio card)
+
+Jetway (www.jetway.com.tw)
+--------------------------
+ JW-TV 878M
+ JW-TV 878 = KWorld KW-TV878RF
+
+Galaxis
+-------
+ Galaxis DVB Card S CI
+ Galaxis DVB Card C CI
+ Galaxis DVB Card S
+ Galaxis DVB Card C
+ Galaxis plug.in S [neuer Name: Galaxis DVB Card S CI
+
+Hauppauge
+---------
+ many many WinTV models ...
+ WinTV DVBs = Technotrend Premium 1.3
+ WinTV NOVA = Technotrend Budget 1.1 "S-DVB DATA"
+ WinTV NOVA-CI "SDVBACI"
+ WinTV Nova USB (=Technotrend USB 1.0)
+ WinTV-Nexus-s (=Technotrend Premium 2.1 or 2.2)
+ WinTV PVR
+ WinTV PVR 250
+ WinTV PVR 450
+
+ US models
+ 990 WinTV-PVR-350 (249USD) (iTVC15 chipset + radio)
+ 980 WinTV-PVR-250 (149USD) (iTVC15 chipset)
+ 880 WinTV-PVR-PCI (199USD) (KFIR chipset + bt878)
+ 881 WinTV-PVR-USB
+ 190 WinTV-GO
+ 191 WinTV-GO-FM
+ 404 WinTV
+ 401 WinTV-radio
+ 495 WinTV-Theater
+ 602 WinTV-USB
+ 621 WinTV-USB-FM
+ 600 USB-Live
+ 698 WinTV-HD
+ 697 WinTV-D
+ 564 WinTV-Nexus-S
+
+ Deutsche Modelle
+ 603 WinTV GO
+ 719 WinTV Primio-FM
+ 718 WinTV PCI-FM
+ 497 WinTV Theater
+ 569 WinTV USB
+ 568 WinTV USB-FM
+ 882 WinTV PVR
+ 981 WinTV PVR 250
+ 891 WinTV-PVR-USB
+ 541 WinTV Nova
+ 488 WinTV Nova-Ci
+ 564 WinTV-Nexus-s
+ 727 WinTV-DVB-c
+ 545 Common Interface
+ 898 WinTV-Nova-USB
+
+ UK models
+ 607 WinTV Go
+ 693,793 WinTV Primio FM
+ 647,747 WinTV PCI FM
+ 498 WinTV Theater
+ 883 WinTV PVR
+ 893 WinTV PVR USB (Duplicate entry)
+ 566 WinTV USB (UK)
+ 573 WinTV USB FM
+ 429 Impact VCB (bt848)
+ 600 USB Live (Video-In 1x Comp, 1xSVHS)
+ 542 WinTV Nova
+ 717 WinTV DVB-S
+ 909 Nova-t PCI
+ 893 Nova-t USB (Duplicate entry)
+ 802 MyTV
+ 804 MyView
+ 809 MyVideo
+ 872 MyTV2Go FM
+
+
+ 546 WinTV Nova-S CI
+ 543 WinTV Nova
+ 907 Nova-S USB
+ 908 Nova-T USB
+ 717 WinTV Nexus-S
+ 157 DEC3000-s Standalone + USB
+
+ Spain
+ 685 WinTV-Go
+ 690 WinTV-PrimioFM
+ 416 WinTV-PCI Nicam Estereo
+ 677 WinTV-PCI-FM
+ 699 WinTV-Theater
+ 683 WinTV-USB
+ 678 WinTV-USB-FM
+ 983 WinTV-PVR-250
+ 883 WinTV-PVR-PCI
+ 993 WinTV-PVR-350
+ 893 WinTV-PVR-USB
+ 728 WinTV-DVB-C PCI
+ 832 MyTV2Go
+ 869 MyTV2Go-FM
+ 805 MyVideo (USB)
+
+
+Matrix-Vision
+-------------
+ MATRIX-Vision MV-Delta
+ MATRIX-Vision MV-Delta 2
+ MVsigma-SLC (Bt848)
+
+Conceptronic (.net)
+------------
+ TVCON FM, TV card w/ FM = CPH05x
+ TVCON = CPH06x
+
+BestData
+--------
+ HCC100 = VCC100rev1 + camera
+ VCC100 rev1 (bt848)
+ VCC100 rev2 (bt878)
+
+Gallant (www.gallantcom.com) www.minton.com.tw
+-----------------------------------------------
+ Intervision IV-510 (capture only bt8x8)
+ Intervision IV-550 (bt8x8)
+ Intervision IV-100 (zoran)
+ Intervision IV-1000 (bt8x8)
+
+Asonic (www.asonic.com.cn) (website down)
+-----------------------------------------
+ SkyEye tv 878
+
+Hoontech
+--------
+ 878TV/FM
+
+Teppro (www.itcteppro.com.tw)
+-----------------------------
+ ITC PCITV (Card Ver 1.0) "Teppro TV1/TVFM1 Card"
+ ITC PCITV (Card Ver 2.0)
+ ITC PCITV (Card Ver 3.0) = "PV-BT878P+ (REV.9D)"
+ ITC PCITV (Card Ver 4.0)
+ TEPPRO IV-550 (For BT848 Main Chip)
+ ITC DSTTV (bt878, satellite)
+ ITC VideoMaker (saa7146, StreamMachine sm2110, tvtuner) "PV-SM2210P+ (REV:1C)"
+
+Kworld (www.kworld.com.tw)
+--------------------------
+ PC TV Station
+ KWORLD KW-TV878R TV (no radio)
+ KWORLD KW-TV878RF TV (w/ radio)
+
+ KWORLD KW-TVL878RF (low profile)
+
+ KWORLD KW-TV713XRF (saa7134)
+
+
+ MPEG TV Station (same cards as above plus WinDVR Software MPEG en/decoder)
+ KWORLD KW-TV878R -Pro TV (no Radio)
+ KWORLD KW-TV878RF-Pro TV (w/ Radio)
+ KWORLD KW-TV878R -Ultra TV (no Radio)
+ KWORLD KW-TV878RF-Ultra TV (w/ Radio)
+
+
+
+JTT/ Justy Corp.(http://www.jtt.ne.jp/)
+---------------------------------------------------------------------
+ JTT-02 (JTT TV) "TV watchmate pro" (bt848)
+
+ADS www.adstech.com
+-------------------
+ Channel Surfer TV ( CHX-950 )
+ Channel Surfer TV+FM ( CHX-960FM )
+
+AVEC www.prochips.com
+---------------------
+ AVEC Intercapture (bt848, tea6320)
+
+NoBrand
+-------
+ TV Excel = Australian Name for "PV-BT878P+ 8E" or "878TV Rev.3_"
+
+Mach www.machspeed.com
+----
+ Mach TV 878
+
+Eline www.eline-net.com/
+-----
+ Eline Vision TVMaster / TVMaster FM (ELV-TVM/ ELV-TVM-FM) = LR26 (bt878)
+ Eline Vision TVMaster-2000 (ELV-TVM-2000, ELV-TVM-2000-FM)= LR138 (saa713x)
+
+Spirit
+------
+ Spirit TV Tuner/Video Capture Card (bt848)
+
+Boser www.boser.com.tw
+-----
+ HS-878 Mini PCI Capture Add-on Card
+ HS-879 Mini PCI 3D Audio and Capture Add-on Card (w/ ES1938 Solo-1)
+
+Satelco www.citycom-gmbh.de, www.satelco.de
+-------
+ TV-FM =KNC1 saa7134
+ Standard PCI (DVB-S) = Technotrend Budget
+ Standard PCI (DVB-S) w/ CI
+ Satelco Highend PCI (DVB-S) = Technotrend Premium
+
+
+Sensoray www.sensoray.com
+--------
+ Sensoray 311 (PC/104 bus)
+ Sensoray 611 (PCI)
+
+CEI (Chartered Electronics Industries Pte Ltd [CEI] [FCC ID HBY])
+---
+ TV Tuner - HBY-33A-RAFFLES Brooktree Bt848KPF + Philips
+ TV Tuner MG9910 - HBY33A-TVO CEI + Philips SAA7110 + OKI M548262 + ST STV8438CV
+ Primetime TV (ISA)
+ acquired by Singapore Technologies
+ now operating as Chartered Semiconductor Manufacturing
+ Manufacturer of video cards is listed as:
+ Cogent Electronics Industries [CEI]
+
+AITech
+------
+ Wavewatcher TV (ISA)
+ AITech WaveWatcher TV-PCI = can be LR26 (Bt848) or LR50 (BT878)
+ WaveWatcher TVR-202 TV/FM Radio Card (ISA)
+
+MAXRON
+------
+ Maxron MaxTV/FM Radio (KW-TV878-FNT) = Kworld or JW-TV878-FBK
+
+www.ids-imaging.de
+------------------
+ Falcon Series (capture only)
+ In USA: http://www.theimagingsource.com/
+ DFG/LC1
+
+www.sknet-web.co.jp
+-------------------
+ SKnet Monster TV (saa7134)
+
+A-Max www.amaxhk.com (Colormax, Amax, Napa)
+-------------------
+ APAC Viewcomp 878
+
+Cybertainment
+-------------
+ CyberMail AV Video Email Kit w/ PCI Capture Card (capture only)
+ CyberMail Xtreme
+ These are Flyvideo
+
+VCR (http://www.vcrinc.com/)
+---
+ Video Catcher 16
+
+Twinhan
+-------
+ DST Card/DST-IP (bt878, twinhan asic) VP-1020
+ Sold as:
+ KWorld DVBS Satellite TV-Card
+ Powercolor DSTV Satellite Tuner Card
+ Prolink Pixelview DTV2000
+ Provideo PV-911 Digital Satellite TV Tuner Card With Common Interface ?
+ DST-CI Card (DVB Satellite) VP-1030
+ DCT Card (DVB cable)
+
+MSI
+---
+ MSI TV@nywhere Tuner Card (MS-8876) (CX23881/883) Not Bt878 compatible.
+ MS-8401 DVB-S
+
+Focus www.focusinfo.com
+-----
+ InVideo PCI (bt878)
+
+Sdisilk www.sdisilk.com/
+-------
+ SDI Silk 100
+ SDI Silk 200 SDI Input Card
+
+www.euresys.com
+ PICOLO series
+
+PMC/Pace
+www.pacecom.co.uk website closed
+
+Mercury www.kobian.com (UK and FR)
+ LR50
+ LR138RBG-Rx == LR138
+
+TEC sound (package and manuals don't have any other manufacturer info) TecSound
+ Though educated googling found: www.techmakers.com
+ TV-Mate = Zoltrix VP-8482
+
+Lorenzen www.lorenzen.de
+--------
+ SL DVB-S PCI = Technotrend Budget PCI (su1278 or bsru version)
+
+Origo (.uk) www.origo2000.com
+ PC TV Card = LR50
+
+I/O Magic www.iomagic.com
+---------
+ PC PVR - Desktop TV Personal Video Recorder DR-PCTV100 = Pinnacle ROB2D-51009464 4.0 + Cyberlink PowerVCR II
+
+Arowana
+-------
+ TV-Karte / Poso Power TV (?) = Zoltrix VP-8482 (?)
+
+iTVC15 boards:
+-------------
+kuroutoshikou.com ITVC15
+yuan.com MPG160 PCI TV (Internal PCI MPEG2 encoder card plus TV-tuner)
+
+Asus www.asuscom.com
+ Asus TV Tuner Card 880 NTSC (low profile, cx23880)
+ Asus TV (saa7134)
+
+Hoontech
+--------
+http://www.hoontech.de/
+ HART Vision 848 (H-ART Vision 848)
+ HART Vision 878 (H-Art Vision 878)
diff --git a/Documentation/video4linux/bttv/ICs b/Documentation/video4linux/bttv/ICs
new file mode 100644
index 000000000..611315f87
--- /dev/null
+++ b/Documentation/video4linux/bttv/ICs
@@ -0,0 +1,37 @@
+all boards:
+
+Brooktree Bt848/848A/849/878/879: video capture chip
+
+
+
+Miro PCTV:
+
+Philips or Temic Tuner
+
+
+
+Hauppauge Win/TV pci (version 405):
+
+Microchip 24LC02B or
+Philips 8582E2Y: 256 Byte EEPROM with configuration information
+ I2C 0xa0-0xa1, (24LC02B also responds to 0xa2-0xaf)
+Philips SAA5246AGP/E: Videotext decoder chip, I2C 0x22-0x23
+TDA9800: sound decoder
+Winbond W24257AS-35: 32Kx8 CMOS static RAM (Videotext buffer mem)
+14052B: analog switch for selection of sound source
+
+PAL:
+TDA5737: VHF, hyperband and UHF mixer/oscillator for TV and VCR 3-band tuners
+TSA5522: 1.4 GHz I2C-bus controlled synthesizer, I2C 0xc2-0xc3
+
+NTSC:
+TDA5731: VHF, hyperband and UHF mixer/oscillator for TV and VCR 3-band tuners
+TSA5518: no datasheet available on Philips site
+
+
+
+STB TV pci:
+
+???
+if you want better support for STB cards send me info!
+Look at the board! What chips are on it?
diff --git a/Documentation/video4linux/bttv/Insmod-options b/Documentation/video4linux/bttv/Insmod-options
new file mode 100644
index 000000000..14c065fa2
--- /dev/null
+++ b/Documentation/video4linux/bttv/Insmod-options
@@ -0,0 +1,172 @@
+
+Note: "modinfo <module>" prints various information about a kernel
+module, among them a complete and up-to-date list of insmod options.
+This list tends to be outdated because it is updated manually ...
+
+==========================================================================
+
+bttv.o
+ the bt848/878 (grabber chip) driver
+
+ insmod args:
+ card=n card type, see CARDLIST for a list.
+ tuner=n tuner type, see CARDLIST for a list.
+ radio=0/1 card supports radio
+ pll=0/1/2 pll settings
+ 0: don't use PLL
+ 1: 28 MHz crystal installed
+ 2: 35 MHz crystal installed
+
+ triton1=0/1 for Triton1 (+others) compatibility
+ vsfx=0/1 yet another chipset bug compatibility bit
+ see README.quirks for details on these two.
+
+ bigendian=n Set the endianness of the gfx framebuffer.
+ Default is native endian.
+ fieldnr=0/1 Count fields. Some TV descrambling software
+ needs this, for others it only generates
+ 50 useless IRQs/sec. default is 0 (off).
+ autoload=0/1 autoload helper modules (tuner, audio).
+ default is 1 (on).
+ bttv_verbose=0/1/2 verbose level (at insmod time, while
+ looking at the hardware). default is 1.
+ bttv_debug=0/1 debug messages (for capture).
+ default is 0 (off).
+ irq_debug=0/1 irq handler debug messages.
+ default is 0 (off).
+ gbuffers=2-32 number of capture buffers for mmap'ed capture.
+ default is 4.
+ gbufsize= size of capture buffers. default and
+ maximum value is 0x208000 (~2MB)
+ no_overlay=0 Enable overlay on broken hardware. There
+ are some chipsets (SIS for example) which
+ are known to have problems with the PCI DMA
+ push used by bttv. bttv will disable overlay
+ by default on this hardware to avoid crashes.
+ With this insmod option you can override this.
+ no_overlay=1 Disable overlay. It should be used by broken
+ hardware that doesn't support PCI2PCI direct
+ transfers.
+ automute=0/1 Automatically mutes the sound if there is
+ no TV signal, on by default. You might try
+ to disable this if you have bad input signal
+ quality which leading to unwanted sound
+ dropouts.
+ chroma_agc=0/1 AGC of chroma signal, off by default.
+ adc_crush=0/1 Luminance ADC crush, on by default.
+ i2c_udelay= Allow reduce I2C speed. Default is 5 usecs
+ (meaning 66,67 Kbps). The default is the
+ maximum supported speed by kernel bitbang
+ algorithm. You may use lower numbers, if I2C
+ messages are lost (16 is known to work on
+ all supported cards).
+
+ bttv_gpio=0/1
+ gpiomask=
+ audioall=
+ audiomux=
+ See Sound-FAQ for a detailed description.
+
+ remap, card, radio and pll accept up to four comma-separated arguments
+ (for multiple boards).
+
+tuner.o
+ The tuner driver. You need this unless you want to use only
+ with a camera or external tuner ...
+
+ insmod args:
+ debug=1 print some debug info to the syslog
+ type=n type of the tuner chip. n as follows:
+ see CARDLIST for a complete list.
+ pal=[bdgil] select PAL variant (used for some tuners
+ only, important for the audio carrier).
+
+tvaudio.o
+ new, experimental module which is supported to provide a single
+ driver for all simple i2c audio control chips (tda/tea*).
+
+ insmod args:
+ tda8425 = 1 enable/disable the support for the
+ tda9840 = 1 various chips.
+ tda9850 = 1 The tea6300 can't be autodetected and is
+ tda9855 = 1 therefore off by default, if you have
+ tda9873 = 1 this one on your card (STB uses these)
+ tda9874a = 1 you have to enable it explicitly.
+ tea6300 = 0 The two tda985x chips use the same i2c
+ tea6420 = 1 address and can't be disturgished from
+ pic16c54 = 1 each other, you might have to disable
+ the wrong one.
+ debug = 1 print debug messages
+
+ insmod args for tda9874a:
+ tda9874a_SIF=1/2 select sound IF input pin (1 or 2)
+ (default is pin 1)
+ tda9874a_AMSEL=0/1 auto-mute select for NICAM (default=0)
+ Please read note 3 below!
+ tda9874a_STD=n select TV sound standard (0..8):
+ 0 - A2, B/G
+ 1 - A2, M (Korea)
+ 2 - A2, D/K (1)
+ 3 - A2, D/K (2)
+ 4 - A2, D/K (3)
+ 5 - NICAM, I
+ 6 - NICAM, B/G
+ 7 - NICAM, D/K (default)
+ 8 - NICAM, L
+
+ Note 1: tda9874a supports both tda9874h (old) and tda9874a (new) chips.
+ Note 2: tda9874h/a and tda9875 (which is supported separately by
+ tda9875.o) use the same i2c address so both modules should not be
+ used at the same time.
+ Note 3: Using tda9874a_AMSEL option depends on your TV card design!
+ AMSEL=0: auto-mute will switch between NICAM sound
+ and the sound on 1st carrier (i.e. FM mono or AM).
+ AMSEL=1: auto-mute will switch between NICAM sound
+ and the analog mono input (MONOIN pin).
+ If tda9874a decoder on your card has MONOIN pin not connected, then
+ use only tda9874_AMSEL=0 or don't specify this option at all.
+ For example:
+ card=65 (FlyVideo 2000S) - set AMSEL=1 or AMSEL=0
+ card=72 (Prolink PV-BT878P rev.9B) - set AMSEL=0 only
+
+msp3400.o
+ The driver for the msp34xx sound processor chips. If you have a
+ stereo card, you probably want to insmod this one.
+
+ insmod args:
+ debug=1/2 print some debug info to the syslog,
+ 2 is more verbose.
+ simple=1 Use the "short programming" method. Newer
+ msp34xx versions support this. You need this
+ for dbx stereo. Default is on if supported by
+ the chip.
+ once=1 Don't check the TV-stations Audio mode
+ every few seconds, but only once after
+ channel switches.
+ amsound=1 Audio carrier is AM/NICAM at 6.5 Mhz. This
+ should improve things for french people, the
+ carrier autoscan seems to work with FM only...
+
+tea6300.o - OBSOLETE (use tvaudio instead)
+ The driver for the tea6300 fader chip. If you have a stereo
+ card and the msp3400.o doesn't work, you might want to try this
+ one. This chip is seen on most STB TV/FM cards (usually from
+ Gateway OEM sold surplus on auction sites).
+
+ insmod args:
+ debug=1 print some debug info to the syslog.
+
+tda8425.o - OBSOLETE (use tvaudio instead)
+ The driver for the tda8425 fader chip. This driver used to be
+ part of bttv.c, so if your sound used to work but does not
+ anymore, try loading this module.
+
+ insmod args:
+ debug=1 print some debug info to the syslog.
+
+tda985x.o - OBSOLETE (use tvaudio instead)
+ The driver for the tda9850/55 audio chips.
+
+ insmod args:
+ debug=1 print some debug info to the syslog.
+ chip=9850/9855 set the chip type.
diff --git a/Documentation/video4linux/bttv/MAKEDEV b/Documentation/video4linux/bttv/MAKEDEV
new file mode 100644
index 000000000..093c0cd18
--- /dev/null
+++ b/Documentation/video4linux/bttv/MAKEDEV
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+function makedev () {
+
+ for dev in 0 1 2 3; do
+ echo "/dev/$1$dev: char 81 $[ $2 + $dev ]"
+ rm -f /dev/$1$dev
+ mknod /dev/$1$dev c 81 $[ $2 + $dev ]
+ chmod 666 /dev/$1$dev
+ done
+
+ # symlink for default device
+ rm -f /dev/$1
+ ln -s /dev/${1}0 /dev/$1
+}
+
+# see http://linux.bytesex.org/v4l2/API.html
+
+echo "*** new device names ***"
+makedev video 0
+makedev radio 64
+makedev vbi 224
+
+#echo "*** old device names (for compatibility only) ***"
+#makedev bttv 0
+#makedev bttv-fm 64
+#makedev bttv-vbi 224
diff --git a/Documentation/video4linux/bttv/Modprobe.conf b/Documentation/video4linux/bttv/Modprobe.conf
new file mode 100644
index 000000000..55f14650d
--- /dev/null
+++ b/Documentation/video4linux/bttv/Modprobe.conf
@@ -0,0 +1,11 @@
+# i2c
+alias char-major-89 i2c-dev
+options i2c-core i2c_debug=1
+options i2c-algo-bit bit_test=1
+
+# bttv
+alias char-major-81 videodev
+alias char-major-81-0 bttv
+options bttv card=2 radio=1
+options tuner debug=1
+
diff --git a/Documentation/video4linux/bttv/Modules.conf b/Documentation/video4linux/bttv/Modules.conf
new file mode 100644
index 000000000..8f258faf1
--- /dev/null
+++ b/Documentation/video4linux/bttv/Modules.conf
@@ -0,0 +1,14 @@
+# For modern kernels (2.6 or above), this belongs in /etc/modprobe.d/*.conf
+# For for 2.4 kernels or earlier, this belongs in /etc/modules.conf.
+
+# i2c
+alias char-major-89 i2c-dev
+options i2c-core i2c_debug=1
+options i2c-algo-bit bit_test=1
+
+# bttv
+alias char-major-81 videodev
+alias char-major-81-0 bttv
+options bttv card=2 radio=1
+options tuner debug=1
+
diff --git a/Documentation/video4linux/bttv/PROBLEMS b/Documentation/video4linux/bttv/PROBLEMS
new file mode 100644
index 000000000..2b8b0079f
--- /dev/null
+++ b/Documentation/video4linux/bttv/PROBLEMS
@@ -0,0 +1,62 @@
+- Start capturing by pressing "c" or by selecting it via a menu!
+
+- Start capturing by pressing "c" or by selecting it via a menu!!!
+
+- The memory of some S3 cards is not recognized right:
+
+ First of all, if you are not using XFree-3.2 or newer, upgrade AT LEAST to
+ XFree-3.2A! This solved the problem for most people.
+
+ Start up X11 like this: "XF86_S3 -probeonly" and write down where the
+ linear frame buffer is.
+ If it is different to the address found by bttv install bttv like this:
+ "insmod bttv vidmem=0xfb0"
+ if the linear frame buffer is at 0xfb000000 (i.e. omit the last 5 zeros!)
+
+ Some S3 cards even take up 64MB of memory but only report 32MB to the BIOS.
+ If this 64MB area overlaps the IO memory of the Bt848 you also have to
+ remap this. E.g.: insmod bttv vidmem=0xfb0 remap=0xfa0
+
+ If the video memory is found at the right place and there are no address
+ conflicts but still no picture (or the computer even crashes),
+ try disabling features of your PCI chipset in the BIOS setup.
+
+ Frank Kapahnke <frank@kapahnke.prima.ruhr.de> also reported that problems
+ with his S3 868 went away when he upgraded to XFree 3.2.
+
+
+- I still only get a black picture with my S3 card!
+
+ Even with XFree-3.2A some people have problems with their S3 cards
+ (mostly with Trio 64 but also with some others)
+ Get the free demo version of Accelerated X from www.xinside.com and try
+ bttv with it. bttv seems to work with most S3 cards with Accelerated X.
+
+ Since I do not know much (better make that almost nothing) about VGA card
+ programming I do not know the reason for this.
+ Looks like XFree does something different when setting up the video memory?
+ Maybe somebody can enlighten me?
+ Would be nice if somebody could get this to work with XFree since
+ Accelerated X costs more than some of the grabber cards ...
+
+ Better linear frame buffer support for S3 cards will probably be in
+ XFree 4.0.
+
+- Grabbing is not switched off when changing consoles with XFree.
+ That's because XFree and some AcceleratedX versions do not send unmap
+ events.
+
+- Some popup windows (e.g. of the window manager) are not refreshed.
+
+ Disable backing store by starting X with the option "-bs"
+
+- When using 32 bpp in XFree or 24+8bpp mode in AccelX 3.1 the system
+ can sometimes lock up if you use more than 1 bt848 card at the same time.
+ You will always get pixel errors when e.g. using more than 1 card in full
+ screen mode. Maybe we need something faster than the PCI bus ...
+
+
+- Some S3 cards and the Matrox Mystique will produce pixel errors with
+ full resolution in 32-bit mode.
+
+- Some video cards have problems with Accelerated X 4.1
diff --git a/Documentation/video4linux/bttv/README b/Documentation/video4linux/bttv/README
new file mode 100644
index 000000000..7e9cc0a45
--- /dev/null
+++ b/Documentation/video4linux/bttv/README
@@ -0,0 +1,86 @@
+
+Release notes for bttv
+======================
+
+You'll need at least these config options for bttv:
+ CONFIG_I2C=m
+ CONFIG_I2C_ALGOBIT=m
+ CONFIG_VIDEO_DEV=m
+
+The latest bttv version is available from http://bytesex.org/bttv/
+
+
+Make bttv work with your card
+-----------------------------
+
+Just try "modprobe bttv" and see if that works.
+
+If it doesn't bttv likely could not autodetect your card and needs some
+insmod options. The most important insmod option for bttv is "card=n"
+to select the correct card type. If you get video but no sound you've
+very likely specified the wrong (or no) card type. A list of supported
+cards is in CARDLIST.bttv
+
+If bttv takes very long to load (happens sometimes with the cheap
+cards which have no tuner), try adding this to your modules.conf:
+ options i2c-algo-bit bit_test=1
+
+/*(DEBLOBBED)*/
+
+If your card isn't listed in CARDLIST.bttv or if you have trouble making
+audio work, you should read the Sound-FAQ.
+
+
+Autodetecting cards
+-------------------
+
+bttv uses the PCI Subsystem ID to autodetect the card type. lspci lists
+the Subsystem ID in the second line, looks like this:
+
+00:0a.0 Multimedia video controller: Brooktree Corporation Bt878 (rev 02)
+ Subsystem: Hauppauge computer works Inc. WinTV/GO
+ Flags: bus master, medium devsel, latency 32, IRQ 5
+ Memory at e2000000 (32-bit, prefetchable) [size=4K]
+
+only bt878-based cards can have a subsystem ID (which does not mean
+that every card really has one). bt848 cards can't have a Subsystem
+ID and therefore can't be autodetected. There is a list with the ID's
+in bttv-cards.c (in case you are intrested or want to mail patches
+with updates).
+
+
+Still doesn't work?
+-------------------
+
+I do NOT have a lab with 30+ different grabber boards and a
+PAL/NTSC/SECAM test signal generator at home, so I often can't
+reproduce your problems. This makes debugging very difficult for me.
+If you have some knowledge and spare time, please try to fix this
+yourself (patches very welcome of course...) You know: The linux
+slogan is "Do it yourself".
+
+There is a mailing list: linux-media@vger.kernel.org
+http://vger.kernel.org/vger-lists.html#linux-media
+
+If you have trouble with some specific TV card, try to ask there
+instead of mailing me directly. The chance that someone with the
+same card listens there is much higher...
+
+For problems with sound: There are a lot of different systems used
+for TV sound all over the world. And there are also different chips
+which decode the audio signal. Reports about sound problems ("stereo
+does'nt work") are pretty useless unless you include some details
+about your hardware and the TV sound scheme used in your country (or
+at least the country you are living in).
+
+
+Finally: If you mail some patches for bttv around the world (to
+linux-kernel/Alan/Linus/...), please Cc: me.
+
+
+Have fun with bttv,
+
+ Gerd
+
+--
+Gerd Knorr <kraxel@bytesex.org>
diff --git a/Documentation/video4linux/bttv/README.WINVIEW b/Documentation/video4linux/bttv/README.WINVIEW
new file mode 100644
index 000000000..c61cf2864
--- /dev/null
+++ b/Documentation/video4linux/bttv/README.WINVIEW
@@ -0,0 +1,33 @@
+
+Support for the Leadtek WinView 601 TV/FM by Jon Tombs <jon@gte.esi.us.es>
+
+This card is basically the same as all the rest (Bt484A, Philips tuner),
+the main difference is that they have attached a programmable attenuator to 3
+GPIO lines in order to give some volume control. They have also stuck an
+infra-red remote control decoded on the board, I will add support for this
+when I get time (it simple generates an interrupt for each key press, with
+the key code is placed in the GPIO port).
+
+I don't yet have any application to test the radio support. The tuner
+frequency setting should work but it is possible that the audio multiplexer
+is wrong. If it doesn't work, send me email.
+
+
+- No Thanks to Leadtek they refused to answer any questions about their
+hardware. The driver was written by visual inspection of the card. If you
+use this driver, send an email insult to them, and tell them you won't
+continue buying their hardware unless they support Linux.
+
+- Little thanks to Princeton Technology Corp (http://www.princeton.com.tw)
+who make the audio attenuator. Their publicly available data-sheet available
+on their web site doesn't include the chip programming information! Hidden
+on their server are the full data-sheets, but don't ask how I found it.
+
+To use the driver I use the following options, the tuner and pll settings might
+be different in your country
+
+insmod videodev
+insmod i2c scan=1 i2c_debug=0 verbose=0
+insmod tuner type=1 debug=0
+insmod bttv pll=1 radio=1 card=17
+
diff --git a/Documentation/video4linux/bttv/README.freeze b/Documentation/video4linux/bttv/README.freeze
new file mode 100644
index 000000000..5eddfa076
--- /dev/null
+++ b/Documentation/video4linux/bttv/README.freeze
@@ -0,0 +1,74 @@
+
+If the box freezes hard with bttv ...
+=====================================
+
+It might be a bttv driver bug. It also might be bad hardware. It also
+might be something else ...
+
+Just mailing me "bttv freezes" isn't going to help much. This README
+has a few hints how you can help to pin down the problem.
+
+
+bttv bugs
+---------
+
+If some version works and another doesn't it is likely to be a driver
+bug. It is very helpful if you can tell where exactly it broke
+(i.e. the last working and the first broken version).
+
+With a hard freeze you probably doesn't find anything in the logfiles.
+The only way to capture any kernel messages is to hook up a serial
+console and let some terminal application log the messages. /me uses
+screen. See Documentation/serial-console.txt for details on setting
+up a serial console.
+
+Read Documentation/oops-tracing.txt to learn how to get any useful
+information out of a register+stack dump printed by the kernel on
+protection faults (so-called "kernel oops").
+
+If you run into some kind of deadlock, you can try to dump a call trace
+for each process using sysrq-t (see Documentation/sysrq.txt).
+This way it is possible to figure where *exactly* some process in "D"
+state is stuck.
+
+I've seen reports that bttv 0.7.x crashes whereas 0.8.x works rock solid
+for some people. Thus probably a small buglet left somewhere in bttv
+0.7.x. I have no idea where exactly, it works stable for me and a lot of
+other people. But in case you have problems with the 0.7.x versions you
+can give 0.8.x a try ...
+
+
+hardware bugs
+-------------
+
+Some hardware can't deal with PCI-PCI transfers (i.e. grabber => vga).
+Sometimes problems show up with bttv just because of the high load on
+the PCI bus. The bt848/878 chips have a few workarounds for known
+incompatibilities, see README.quirks.
+
+Some folks report that increasing the pci latency helps too,
+althrought I'm not sure whenever this really fixes the problems or
+only makes it less likely to happen. Both bttv and btaudio have a
+insmod option to set the PCI latency of the device.
+
+Some mainboard have problems to deal correctly with multiple devices
+doing DMA at the same time. bttv + ide seems to cause this sometimes,
+if this is the case you likely see freezes only with video and hard disk
+access at the same time. Updating the IDE driver to get the latest and
+greatest workarounds for hardware bugs might fix these problems.
+
+
+other
+-----
+
+If you use some binary-only yunk (like nvidia module) try to reproduce
+the problem without.
+
+IRQ sharing is known to cause problems in some cases. It works just
+fine in theory and many configurations. Neverless it might be worth a
+try to shuffle around the PCI cards to give bttv another IRQ or make
+it share the IRQ with some other piece of hardware. IRQ sharing with
+VGA cards seems to cause trouble sometimes. I've also seen funny
+effects with bttv sharing the IRQ with the ACPI bridge (and
+apci-enabled kernel).
+
diff --git a/Documentation/video4linux/bttv/README.quirks b/Documentation/video4linux/bttv/README.quirks
new file mode 100644
index 000000000..92e03929a
--- /dev/null
+++ b/Documentation/video4linux/bttv/README.quirks
@@ -0,0 +1,83 @@
+
+Below is what the bt878 data book says about the PCI bug compatibility
+modes of the bt878 chip.
+
+The triton1 insmod option sets the EN_TBFX bit in the control register.
+The vsfx insmod option does the same for EN_VSFX bit. If you have
+stability problems you can try if one of these options makes your box
+work solid.
+
+drivers/pci/quirks.c knows about these issues, this way these bits are
+enabled automagically for known-buggy chipsets (look at the kernel
+messages, bttv tells you).
+
+HTH,
+
+ Gerd
+
+---------------------------- cut here --------------------------
+
+Normal PCI Mode
+---------------
+
+The PCI REQ signal is the logical-or of the incoming function requests.
+The inter-nal GNT[0:1] signals are gated asynchronously with GNT and
+demultiplexed by the audio request signal. Thus the arbiter defaults to
+the video function at power-up and parks there during no requests for
+bus access. This is desirable since the video will request the bus more
+often. However, the audio will have highest bus access priority. Thus
+the audio will have first access to the bus even when issuing a request
+after the video request but before the PCI external arbiter has granted
+access to the Bt879. Neither function can preempt the other once on the
+bus. The duration to empty the entire video PCI FIFO onto the PCI bus is
+very short compared to the bus access latency the audio PCI FIFO can
+tolerate.
+
+
+430FX Compatibility Mode
+------------------------
+
+When using the 430FX PCI, the following rules will ensure
+compatibility:
+
+ (1) Deassert REQ at the same time as asserting FRAME.
+ (2) Do not reassert REQ to request another bus transaction until after
+ finish-ing the previous transaction.
+
+Since the individual bus masters do not have direct control of REQ, a
+simple logical-or of video and audio requests would violate the rules.
+Thus, both the arbiter and the initiator contain 430FX compatibility
+mode logic. To enable 430FX mode, set the EN_TBFX bit as indicated in
+Device Control Register on page 104.
+
+When EN_TBFX is enabled, the arbiter ensures that the two compatibility
+rules are satisfied. Before GNT is asserted by the PCI arbiter, this
+internal arbiter may still logical-or the two requests. However, once
+the GNT is issued, this arbiter must lock in its decision and now route
+only the granted request to the REQ pin. The arbiter decision lock
+happens regardless of the state of FRAME because it does not know when
+FRAME will be asserted (typically - each initiator will assert FRAME on
+the cycle following GNT). When FRAME is asserted, it is the initiator s
+responsibility to remove its request at the same time. It is the
+arbiters responsibility to allow this request to flow through to REQ and
+not allow the other request to hold REQ asserted. The decision lock may
+be removed at the end of the transaction: for example, when the bus is
+idle (FRAME and IRDY). The arbiter decision may then continue
+asynchronously until GNT is again asserted.
+
+
+Interfacing with Non-PCI 2.1 Compliant Core Logic
+-------------------------------------------------
+
+A small percentage of core logic devices may start a bus transaction
+during the same cycle that GNT is de-asserted. This is non PCI 2.1
+compliant. To ensure compatibility when using PCs with these PCI
+controllers, the EN_VSFX bit must be enabled (refer to Device Control
+Register on page 104). When in this mode, the arbiter does not pass GNT
+to the internal functions unless REQ is asserted. This prevents a bus
+transaction from starting the same cycle as GNT is de-asserted. This
+also has the side effect of not being able to take advantage of bus
+parking, thus lowering arbitration performance. The Bt879 drivers must
+query for these non-compliant devices, and set the EN_VSFX bit only if
+required.
+
diff --git a/Documentation/video4linux/bttv/Sound-FAQ b/Documentation/video4linux/bttv/Sound-FAQ
new file mode 100644
index 000000000..d3f1d7783
--- /dev/null
+++ b/Documentation/video4linux/bttv/Sound-FAQ
@@ -0,0 +1,148 @@
+
+bttv and sound mini howto
+=========================
+
+There are a lot of different bt848/849/878/879 based boards available.
+Making video work often is not a big deal, because this is handled
+completely by the bt8xx chip, which is common on all boards. But
+sound is handled in slightly different ways on each board.
+
+To handle the grabber boards correctly, there is a array tvcards[] in
+bttv-cards.c, which holds the information required for each board.
+Sound will work only, if the correct entry is used (for video it often
+makes no difference). The bttv driver prints a line to the kernel
+log, telling which card type is used. Like this one:
+
+ bttv0: model: BT848(Hauppauge old) [autodetected]
+
+You should verify this is correct. If it isn't, you have to pass the
+correct board type as insmod argument, "insmod bttv card=2" for
+example. The file CARDLIST has a list of valid arguments for card.
+If your card isn't listed there, you might check the source code for
+new entries which are not listed yet. If there isn't one for your
+card, you can check if one of the existing entries does work for you
+(just trial and error...).
+
+Some boards have an extra processor for sound to do stereo decoding
+and other nice features. The msp34xx chips are used by Hauppauge for
+example. If your board has one, you might have to load a helper
+module like msp3400.o to make sound work. If there isn't one for the
+chip used on your board: Bad luck. Start writing a new one. Well,
+you might want to check the video4linux mailing list archive first...
+
+Of course you need a correctly installed soundcard unless you have the
+speakers connected directly to the grabber board. Hint: check the
+mixer settings too. ALSA for example has everything muted by default.
+
+
+How sound works in detail
+=========================
+
+Still doesn't work? Looks like some driver hacking is required.
+Below is a do-it-yourself description for you.
+
+The bt8xx chips have 32 general purpose pins, and registers to control
+these pins. One register is the output enable register
+(BT848_GPIO_OUT_EN), it says which pins are actively driven by the
+bt848 chip. Another one is the data register (BT848_GPIO_DATA), where
+you can get/set the status if these pins. They can be used for input
+and output.
+
+Most grabber board vendors use these pins to control an external chip
+which does the sound routing. But every board is a little different.
+These pins are also used by some companies to drive remote control
+receiver chips. Some boards use the i2c bus instead of the gpio pins
+to connect the mux chip.
+
+As mentioned above, there is a array which holds the required
+informations for each known board. You basically have to create a new
+line for your board. The important fields are these two:
+
+struct tvcard
+{
+ [ ... ]
+ u32 gpiomask;
+ u32 audiomux[6]; /* Tuner, Radio, external, internal, mute, stereo */
+};
+
+gpiomask specifies which pins are used to control the audio mux chip.
+The corresponding bits in the output enable register
+(BT848_GPIO_OUT_EN) will be set as these pins must be driven by the
+bt848 chip.
+
+The audiomux[] array holds the data values for the different inputs
+(i.e. which pins must be high/low for tuner/mute/...). This will be
+written to the data register (BT848_GPIO_DATA) to switch the audio
+mux.
+
+
+What you have to do is figure out the correct values for gpiomask and
+the audiomux array. If you have Windows and the drivers four your
+card installed, you might to check out if you can read these registers
+values used by the windows driver. A tool to do this is available
+from ftp://telepresence.dmem.strath.ac.uk/pub/bt848/winutil, but it
+does'nt work with bt878 boards according to some reports I received.
+Another one with bt878 support is available from
+http://btwincap.sourceforge.net/Files/btspy2.00.zip
+
+You might also dig around in the *.ini files of the Windows applications.
+You can have a look at the board to see which of the gpio pins are
+connected at all and then start trial-and-error ...
+
+
+Starting with release 0.7.41 bttv has a number of insmod options to
+make the gpio debugging easier:
+
+bttv_gpio=0/1 enable/disable gpio debug messages
+gpiomask=n set the gpiomask value
+audiomux=i,j,... set the values of the audiomux array
+audioall=a set the values of the audiomux array (one
+ value for all array elements, useful to check
+ out which effect the particular value has).
+
+The messages printed with bttv_gpio=1 look like this:
+
+ bttv0: gpio: en=00000027, out=00000024 in=00ffffd8 [audio: off]
+
+en = output _en_able register (BT848_GPIO_OUT_EN)
+out = _out_put bits of the data register (BT848_GPIO_DATA),
+ i.e. BT848_GPIO_DATA & BT848_GPIO_OUT_EN
+in = _in_put bits of the data register,
+ i.e. BT848_GPIO_DATA & ~BT848_GPIO_OUT_EN
+
+
+
+Other elements of the tvcards array
+===================================
+
+If you are trying to make a new card work you might find it useful to
+know what the other elements in the tvcards array are good for:
+
+video_inputs - # of video inputs the card has
+audio_inputs - historical cruft, not used any more.
+tuner - which input is the tuner
+svhs - which input is svhs (all others are labeled composite)
+muxsel - video mux, input->registervalue mapping
+pll - same as pll= insmod option
+tuner_type - same as tuner= insmod option
+*_modulename - hint whenever some card needs this or that audio
+ module loaded to work properly.
+has_radio - whenever this TV card has a radio tuner.
+no_msp34xx - "1" disables loading of msp3400.o module
+no_tda9875 - "1" disables loading of tda9875.o module
+needs_tvaudio - set to "1" to load tvaudio.o module
+
+If some config item is specified both from the tvcards array and as
+insmod option, the insmod option takes precedence.
+
+
+
+Good luck,
+
+ Gerd
+
+
+PS: If you have a new working entry, mail it to me.
+
+--
+Gerd Knorr <kraxel@bytesex.org>
diff --git a/Documentation/video4linux/bttv/Specs b/Documentation/video4linux/bttv/Specs
new file mode 100644
index 000000000..f32466cda
--- /dev/null
+++ b/Documentation/video4linux/bttv/Specs
@@ -0,0 +1,3 @@
+Philips http://www.Semiconductors.COM/pip/
+Conexant http://www.conexant.com/
+Micronas http://www.micronas.com/en/home/index.html
diff --git a/Documentation/video4linux/bttv/THANKS b/Documentation/video4linux/bttv/THANKS
new file mode 100644
index 000000000..950aa781c
--- /dev/null
+++ b/Documentation/video4linux/bttv/THANKS
@@ -0,0 +1,24 @@
+Many thanks to:
+
+- Markus Schroeder <schroedm@uni-duesseldorf.de> for information on the Bt848
+ and tuner programming and his control program xtvc.
+
+- Martin Buck <martin-2.buck@student.uni-ulm.de> for his great Videotext
+ package.
+
+- Gerd Knorr <kraxel@cs.tu-berlin.de> for the MSP3400 support and the modular
+ I2C, tuner, ... support.
+
+
+- MATRIX Vision for giving us 2 cards for free, which made support of
+ single crystal operation possible.
+
+- MIRO for providing a free PCTV card and detailed information about the
+ components on their cards. (E.g. how the tuner type is detected)
+ Without their card I could not have debugged the NTSC mode.
+
+- Hauppauge for telling how the sound input is selected and what components
+ they do and will use on their radio cards.
+ Also many thanks for faxing me the FM1216 data sheet.
+
+
diff --git a/Documentation/video4linux/bttv/Tuners b/Documentation/video4linux/bttv/Tuners
new file mode 100644
index 000000000..0a371d349
--- /dev/null
+++ b/Documentation/video4linux/bttv/Tuners
@@ -0,0 +1,115 @@
+1) Tuner Programming
+====================
+There are some flavors of Tuner programming APIs.
+These differ mainly by the bandswitch byte.
+
+ L= LG_API (VHF_LO=0x01, VHF_HI=0x02, UHF=0x08, radio=0x04)
+ P= PHILIPS_API (VHF_LO=0xA0, VHF_HI=0x90, UHF=0x30, radio=0x04)
+ T= TEMIC_API (VHF_LO=0x02, VHF_HI=0x04, UHF=0x01)
+ A= ALPS_API (VHF_LO=0x14, VHF_HI=0x12, UHF=0x11)
+ M= PHILIPS_MK3 (VHF_LO=0x01, VHF_HI=0x02, UHF=0x04, radio=0x19)
+
+2) Tuner Manufacturers
+======================
+
+SAMSUNG Tuner identification: (e.g. TCPM9091PD27)
+ TCP [ABCJLMNQ] 90[89][125] [DP] [ACD] 27 [ABCD]
+ [ABCJLMNQ]:
+ A= BG+DK
+ B= BG
+ C= I+DK
+ J= NTSC-Japan
+ L= Secam LL
+ M= BG+I+DK
+ N= NTSC
+ Q= BG+I+DK+LL
+ [89]: ?
+ [125]:
+ 2: No FM
+ 5: With FM
+ [DP]:
+ D= NTSC
+ P= PAL
+ [ACD]:
+ A= F-connector
+ C= Phono connector
+ D= Din Jack
+ [ABCD]:
+ 3-wire/I2C tuning, 2-band/3-band
+
+ These Tuners are PHILIPS_API compatible.
+
+Philips Tuner identification: (e.g. FM1216MF)
+ F[IRMQ]12[1345]6{MF|ME|MP}
+ F[IRMQ]:
+ FI12x6: Tuner Series
+ FR12x6: Tuner + Radio IF
+ FM12x6: Tuner + FM
+ FQ12x6: special
+ FMR12x6: special
+ TD15xx: Digital Tuner ATSC
+ 12[1345]6:
+ 1216: PAL BG
+ 1236: NTSC
+ 1246: PAL I
+ 1256: Pal DK
+ {MF|ME|MP}
+ MF: BG LL w/ Secam (Multi France)
+ ME: BG DK I LL (Multi Europe)
+ MP: BG DK I (Multi PAL)
+ MR: BG DK M (?)
+ MG: BG DKI M (?)
+ MK2 series PHILIPS_API, most tuners are compatible to this one !
+ MK3 series introduced in 2002 w/ PHILIPS_MK3_API
+
+Temic Tuner identification: (.e.g 4006FH5)
+ 4[01][0136][269]F[HYNR]5
+ 40x2: Tuner (5V/33V), TEMIC_API.
+ 40x6: Tuner 5V
+ 41xx: Tuner compact
+ 40x9: Tuner+FM compact
+ [0136]
+ xx0x: PAL BG
+ xx1x: Pal DK, Secam LL
+ xx3x: NTSC
+ xx6x: PAL I
+ F[HYNR]5
+ FH5: Pal BG
+ FY5: others
+ FN5: multistandard
+ FR5: w/ FM radio
+ 3X xxxx: order number with specific connector
+ Note: Only 40x2 series has TEMIC_API, all newer tuners have PHILIPS_API.
+
+LG Innotek Tuner:
+ TPI8NSR11 : NTSC J/M (TPI8NSR01 w/FM) (P,210/497)
+ TPI8PSB11 : PAL B/G (TPI8PSB01 w/FM) (P,170/450)
+ TAPC-I701 : PAL I (TAPC-I001 w/FM) (P,170/450)
+ TPI8PSB12 : PAL D/K+B/G (TPI8PSB02 w/FM) (P,170/450)
+ TAPC-H701P: NTSC_JP (TAPC-H001P w/FM) (L,170/450)
+ TAPC-G701P: PAL B/G (TAPC-G001P w/FM) (L,170/450)
+ TAPC-W701P: PAL I (TAPC-W001P w/FM) (L,170/450)
+ TAPC-Q703P: PAL D/K (TAPC-Q001P w/FM) (L,170/450)
+ TAPC-Q704P: PAL D/K+I (L,170/450)
+ TAPC-G702P: PAL D/K+B/G (L,170/450)
+
+ TADC-H002F: NTSC (L,175/410?; 2-B, C-W+11, W+12-69)
+ TADC-M201D: PAL D/K+B/G+I (L,143/425) (sound control at I2C address 0xc8)
+ TADC-T003F: NTSC Taiwan (L,175/410?; 2-B, C-W+11, W+12-69)
+ Suffix:
+ P= Standard phono female socket
+ D= IEC female socket
+ F= F-connector
+
+Other Tuners:
+TCL2002MB-1 : PAL BG + DK =TUNER_LG_PAL_NEW_TAPC
+TCL2002MB-1F: PAL BG + DK w/FM =PHILIPS_PAL
+TCL2002MI-2 : PAL I = ??
+
+ALPS Tuners:
+ Most are LG_API compatible
+ TSCH6 has ALPS_API (TSCH5 ?)
+ TSBE1 has extra API 05,02,08 Control_byte=0xCB Source:(1)
+
+Lit.
+(1) conexant100029b-PCI-Decoder-ApplicationNote.pdf
diff --git a/Documentation/video4linux/cafe_ccic b/Documentation/video4linux/cafe_ccic
new file mode 100644
index 000000000..88821022a
--- /dev/null
+++ b/Documentation/video4linux/cafe_ccic
@@ -0,0 +1,54 @@
+"cafe_ccic" is a driver for the Marvell 88ALP01 "cafe" CMOS camera
+controller. This is the controller found in first-generation OLPC systems,
+and this driver was written with support from the OLPC project.
+
+Current status: the core driver works. It can generate data in YUV422,
+RGB565, and RGB444 formats. (Anybody looking at the code will see RGB32 as
+well, but that is a debugging aid which will be removed shortly). VGA and
+QVGA modes work; CIF is there but the colors remain funky. Only the OV7670
+sensor is known to work with this controller at this time.
+
+To try it out: either of these commands will work:
+
+ mplayer tv:// -tv driver=v4l2:width=640:height=480 -nosound
+ mplayer tv:// -tv driver=v4l2:width=640:height=480:outfmt=bgr16 -nosound
+
+The "xawtv" utility also works; gqcam does not, for unknown reasons.
+
+There are a few load-time options, most of which can be changed after
+loading via sysfs as well:
+
+ - alloc_bufs_at_load: Normally, the driver will not allocate any DMA
+ buffers until the time comes to transfer data. If this option is set,
+ then worst-case-sized buffers will be allocated at module load time.
+ This option nails down the memory for the life of the module, but
+ perhaps decreases the chances of an allocation failure later on.
+
+ - dma_buf_size: The size of DMA buffers to allocate. Note that this
+ option is only consulted for load-time allocation; when buffers are
+ allocated at run time, they will be sized appropriately for the current
+ camera settings.
+
+ - n_dma_bufs: The controller can cycle through either two or three DMA
+ buffers. Normally, the driver tries to use three buffers; on faster
+ systems, however, it will work well with only two.
+
+ - min_buffers: The minimum number of streaming I/O buffers that the driver
+ will consent to work with. Default is one, but, on slower systems,
+ better behavior with mplayer can be achieved by setting to a higher
+ value (like six).
+
+ - max_buffers: The maximum number of streaming I/O buffers; default is
+ ten. That number was carefully picked out of a hat and should not be
+ assumed to actually mean much of anything.
+
+ - flip: If this boolean parameter is set, the sensor will be instructed to
+ invert the video image. Whether it makes sense is determined by how
+ your particular camera is mounted.
+
+Work is ongoing with this driver, stay tuned.
+
+jon
+
+Jonathan Corbet
+corbet@lwn.net
diff --git a/Documentation/video4linux/cpia2_overview.txt b/Documentation/video4linux/cpia2_overview.txt
new file mode 100644
index 000000000..ad6adbedf
--- /dev/null
+++ b/Documentation/video4linux/cpia2_overview.txt
@@ -0,0 +1,38 @@
+ Programmer's View of Cpia2
+
+Cpia2 is the second generation video coprocessor from VLSI Vision Ltd (now a
+division of ST Microelectronics). There are two versions. The first is the
+STV0672, which is capable of up to 30 frames per second (fps) in frame sizes
+up to CIF, and 15 fps for VGA frames. The STV0676 is an improved version,
+which can handle up to 30 fps VGA. Both coprocessors can be attached to two
+CMOS sensors - the vvl6410 CIF sensor and the vvl6500 VGA sensor. These will
+be referred to as the 410 and the 500 sensors, or the CIF and VGA sensors.
+
+The two chipsets operate almost identically. The core is an 8051 processor,
+running two different versions of firmware. The 672 runs the VP4 video
+processor code, the 676 runs VP5. There are a few differences in register
+mappings for the two chips. In these cases, the symbols defined in the
+header files are marked with VP4 or VP5 as part of the symbol name.
+
+The cameras appear externally as three sets of registers. Setting register
+values is the only way to control the camera. Some settings are
+interdependant, such as the sequence required to power up the camera. I will
+try to make note of all of these cases.
+
+The register sets are called blocks. Block 0 is the system block. This
+section is always powered on when the camera is plugged in. It contains
+registers that control housekeeping functions such as powering up the video
+processor. The video processor is the VP block. These registers control
+how the video from the sensor is processed. Examples are timing registers,
+user mode (vga, qvga), scaling, cropping, framerates, and so on. The last
+block is the video compressor (VC). The video stream sent from the camera is
+compressed as Motion JPEG (JPEGA). The VC controls all of the compression
+parameters. Looking at the file cpia2_registers.h, you can get a full view
+of these registers and the possible values for most of them.
+
+One or more registers can be set or read by sending a usb control message to
+the camera. There are three modes for this. Block mode requests a number
+of contiguous registers. Random mode reads or writes random registers with
+a tuple structure containing address/value pairs. The repeat mode is only
+used by VP4 to load a firmware patch. It contains a starting address and
+a sequence of bytes to be written into a gpio port.
diff --git a/Documentation/video4linux/cx18.txt b/Documentation/video4linux/cx18.txt
new file mode 100644
index 000000000..4652c0f5d
--- /dev/null
+++ b/Documentation/video4linux/cx18.txt
@@ -0,0 +1,30 @@
+Some notes regarding the cx18 driver for the Conexant CX23418 MPEG
+encoder chip:
+
+1) Currently supported are:
+
+ - Hauppauge HVR-1600
+ - Compro VideoMate H900
+ - Yuan MPC718
+ - Conexant Raptor PAL/SECAM devkit
+
+2) Some people have problems getting the i2c bus to work.
+ The symptom is that the eeprom cannot be read and the card is
+ unusable. This is probably fixed, but if you have problems
+ then post to the video4linux or ivtv-users mailing list.
+
+3) VBI (raw or sliced) has not yet been implemented.
+
+4) MPEG indexing is not yet implemented.
+
+5) The driver is still a bit rough around the edges, this should
+ improve over time.
+
+
+Firmware:
+
+You can obtain the firmware files here:
+
+http://dl.ivtvdriver.org/ivtv/firmware/cx18-firmware.tar.gz
+
+Untar and copy the .fw files to your firmware directory.
diff --git a/Documentation/video4linux/cx2341x/README.hm12 b/Documentation/video4linux/cx2341x/README.hm12
new file mode 100644
index 000000000..b36148ea0
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/README.hm12
@@ -0,0 +1,120 @@
+The cx23416 can produce (and the cx23415 can also read) raw YUV output. The
+format of a YUV frame is specific to this chip and is called HM12. 'HM' stands
+for 'Hauppauge Macroblock', which is a misnomer as 'Conexant Macroblock' would
+be more accurate.
+
+The format is YUV 4:2:0 which uses 1 Y byte per pixel and 1 U and V byte per
+four pixels.
+
+The data is encoded as two macroblock planes, the first containing the Y
+values, the second containing UV macroblocks.
+
+The Y plane is divided into blocks of 16x16 pixels from left to right
+and from top to bottom. Each block is transmitted in turn, line-by-line.
+
+So the first 16 bytes are the first line of the top-left block, the
+second 16 bytes are the second line of the top-left block, etc. After
+transmitting this block the first line of the block on the right to the
+first block is transmitted, etc.
+
+The UV plane is divided into blocks of 16x8 UV values going from left
+to right, top to bottom. Each block is transmitted in turn, line-by-line.
+
+So the first 16 bytes are the first line of the top-left block and
+contain 8 UV value pairs (16 bytes in total). The second 16 bytes are the
+second line of 8 UV pairs of the top-left block, etc. After transmitting
+this block the first line of the block on the right to the first block is
+transmitted, etc.
+
+The code below is given as an example on how to convert HM12 to separate
+Y, U and V planes. This code assumes frames of 720x576 (PAL) pixels.
+
+The width of a frame is always 720 pixels, regardless of the actual specified
+width.
+
+If the height is not a multiple of 32 lines, then the captured video is
+missing macroblocks at the end and is unusable. So the height must be a
+multiple of 32.
+
+--------------------------------------------------------------------------
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static unsigned char frame[576*720*3/2];
+static unsigned char framey[576*720];
+static unsigned char frameu[576*720 / 4];
+static unsigned char framev[576*720 / 4];
+
+static void de_macro_y(unsigned char* dst, unsigned char *src, int dstride, int w, int h)
+{
+ unsigned int y, x, i;
+
+ // descramble Y plane
+ // dstride = 720 = w
+ // The Y plane is divided into blocks of 16x16 pixels
+ // Each block in transmitted in turn, line-by-line.
+ for (y = 0; y < h; y += 16) {
+ for (x = 0; x < w; x += 16) {
+ for (i = 0; i < 16; i++) {
+ memcpy(dst + x + (y + i) * dstride, src, 16);
+ src += 16;
+ }
+ }
+ }
+}
+
+static void de_macro_uv(unsigned char *dstu, unsigned char *dstv, unsigned char *src, int dstride, int w, int h)
+{
+ unsigned int y, x, i;
+
+ // descramble U/V plane
+ // dstride = 720 / 2 = w
+ // The U/V values are interlaced (UVUV...).
+ // Again, the UV plane is divided into blocks of 16x16 UV values.
+ // Each block in transmitted in turn, line-by-line.
+ for (y = 0; y < h; y += 16) {
+ for (x = 0; x < w; x += 8) {
+ for (i = 0; i < 16; i++) {
+ int idx = x + (y + i) * dstride;
+
+ dstu[idx+0] = src[0]; dstv[idx+0] = src[1];
+ dstu[idx+1] = src[2]; dstv[idx+1] = src[3];
+ dstu[idx+2] = src[4]; dstv[idx+2] = src[5];
+ dstu[idx+3] = src[6]; dstv[idx+3] = src[7];
+ dstu[idx+4] = src[8]; dstv[idx+4] = src[9];
+ dstu[idx+5] = src[10]; dstv[idx+5] = src[11];
+ dstu[idx+6] = src[12]; dstv[idx+6] = src[13];
+ dstu[idx+7] = src[14]; dstv[idx+7] = src[15];
+ src += 16;
+ }
+ }
+ }
+}
+
+/*************************************************************************/
+int main(int argc, char **argv)
+{
+ FILE *fin;
+ int i;
+
+ if (argc == 1) fin = stdin;
+ else fin = fopen(argv[1], "r");
+
+ if (fin == NULL) {
+ fprintf(stderr, "cannot open input\n");
+ exit(-1);
+ }
+ while (fread(frame, sizeof(frame), 1, fin) == 1) {
+ de_macro_y(framey, frame, 720, 720, 576);
+ de_macro_uv(frameu, framev, frame + 720 * 576, 720 / 2, 720 / 2, 576 / 2);
+ fwrite(framey, sizeof(framey), 1, stdout);
+ fwrite(framev, sizeof(framev), 1, stdout);
+ fwrite(frameu, sizeof(frameu), 1, stdout);
+ }
+ fclose(fin);
+ return 0;
+}
+
+--------------------------------------------------------------------------
diff --git a/Documentation/video4linux/cx2341x/README.vbi b/Documentation/video4linux/cx2341x/README.vbi
new file mode 100644
index 000000000..5807cf156
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/README.vbi
@@ -0,0 +1,45 @@
+
+Format of embedded V4L2_MPEG_STREAM_VBI_FMT_IVTV VBI data
+=========================================================
+
+This document describes the V4L2_MPEG_STREAM_VBI_FMT_IVTV format of the VBI data
+embedded in an MPEG-2 program stream. This format is in part dictated by some
+hardware limitations of the ivtv driver (the driver for the Conexant cx23415/6
+chips), in particular a maximum size for the VBI data. Anything longer is cut
+off when the MPEG stream is played back through the cx23415.
+
+The advantage of this format is it is very compact and that all VBI data for
+all lines can be stored while still fitting within the maximum allowed size.
+
+The stream ID of the VBI data is 0xBD. The maximum size of the embedded data is
+4 + 43 * 36, which is 4 bytes for a header and 2 * 18 VBI lines with a 1 byte
+header and a 42 bytes payload each. Anything beyond this limit is cut off by
+the cx23415/6 firmware. Besides the data for the VBI lines we also need 36 bits
+for a bitmask determining which lines are captured and 4 bytes for a magic cookie,
+signifying that this data package contains V4L2_MPEG_STREAM_VBI_FMT_IVTV VBI data.
+If all lines are used, then there is no longer room for the bitmask. To solve this
+two different magic numbers were introduced:
+
+'itv0': After this magic number two unsigned longs follow. Bits 0-17 of the first
+unsigned long denote which lines of the first field are captured. Bits 18-31 of
+the first unsigned long and bits 0-3 of the second unsigned long are used for the
+second field.
+
+'ITV0': This magic number assumes all VBI lines are captured, i.e. it implicitly
+implies that the bitmasks are 0xffffffff and 0xf.
+
+After these magic cookies (and the 8 byte bitmask in case of cookie 'itv0') the
+captured VBI lines start:
+
+For each line the least significant 4 bits of the first byte contain the data type.
+Possible values are shown in the table below. The payload is in the following 42
+bytes.
+
+Here is the list of possible data types:
+
+#define IVTV_SLICED_TYPE_TELETEXT 0x1 // Teletext (uses lines 6-22 for PAL)
+#define IVTV_SLICED_TYPE_CC 0x4 // Closed Captions (line 21 NTSC)
+#define IVTV_SLICED_TYPE_WSS 0x5 // Wide Screen Signal (line 23 PAL)
+#define IVTV_SLICED_TYPE_VPS 0x7 // Video Programming System (PAL) (line 16)
+
+Hans Verkuil <hverkuil@xs4all.nl>
diff --git a/Documentation/video4linux/cx2341x/fw-calling.txt b/Documentation/video4linux/cx2341x/fw-calling.txt
new file mode 100644
index 000000000..8d21181de
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-calling.txt
@@ -0,0 +1,69 @@
+This page describes how to make calls to the firmware api.
+
+How to call
+===========
+
+The preferred calling convention is known as the firmware mailbox. The
+mailboxes are basically a fixed length array that serves as the call-stack.
+
+Firmware mailboxes can be located by searching the encoder and decoder memory
+for a 16 byte signature. That signature will be located on a 256-byte boundary.
+
+Signature:
+0x78, 0x56, 0x34, 0x12, 0x12, 0x78, 0x56, 0x34,
+0x34, 0x12, 0x78, 0x56, 0x56, 0x34, 0x12, 0x78
+
+The firmware implements 20 mailboxes of 20 32-bit words. The first 10 are
+reserved for API calls. The second 10 are used by the firmware for event
+notification.
+
+ Index Name
+ ----- ----
+ 0 Flags
+ 1 Command
+ 2 Return value
+ 3 Timeout
+ 4-19 Parameter/Result
+
+
+The flags are defined in the following table. The direction is from the
+perspective of the firmware.
+
+ Bit Direction Purpose
+ --- --------- -------
+ 2 O Firmware has processed the command.
+ 1 I Driver has finished setting the parameters.
+ 0 I Driver is using this mailbox.
+
+
+The command is a 32-bit enumerator. The API specifics may be found in the
+fw-*-api.txt documents.
+
+The return value is a 32-bit enumerator. Only two values are currently defined:
+0=success and -1=command undefined.
+
+There are 16 parameters/results 32-bit fields. The driver populates these fields
+with values for all the parameters required by the call. The driver overwrites
+these fields with result values returned by the call. The API specifics may be
+found in the fw-*-api.txt documents.
+
+The timeout value protects the card from a hung driver thread. If the driver
+doesn't handle the completed call within the timeout specified, the firmware
+will reset that mailbox.
+
+To make an API call, the driver iterates over each mailbox looking for the
+first one available (bit 0 has been cleared). The driver sets that bit, fills
+in the command enumerator, the timeout value and any required parameters. The
+driver then sets the parameter ready bit (bit 1). The firmware scans the
+mailboxes for pending commands, processes them, sets the result code, populates
+the result value array with that call's return values and sets the call
+complete bit (bit 2). Once bit 2 is set, the driver should retrieve the results
+and clear all the flags. If the driver does not perform this task within the
+time set in the timeout register, the firmware will reset that mailbox.
+
+Event notifications are sent from the firmware to the host. The host tells the
+firmware which events it is interested in via an API call. That call tells the
+firmware which notification mailbox to use. The firmware signals the host via
+an interrupt. Only the 16 Results fields are used, the Flags, Command, Return
+value and Timeout words are not used.
+
diff --git a/Documentation/video4linux/cx2341x/fw-decoder-api.txt b/Documentation/video4linux/cx2341x/fw-decoder-api.txt
new file mode 100644
index 000000000..8c317b7a4
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-decoder-api.txt
@@ -0,0 +1,297 @@
+Decoder firmware API description
+================================
+
+Note: this API is part of the decoder firmware, so it's cx23415 only.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_PING_FW
+Enum 0/0x00
+Description
+ This API call does nothing. It may be used to check if the firmware
+ is responding.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_START_PLAYBACK
+Enum 1/0x01
+Description
+ Begin or resume playback.
+Param[0]
+ 0 based frame number in GOP to begin playback from.
+Param[1]
+ Specifies the number of muted audio frames to play before normal
+ audio resumes. (This is not implemented in the firmware, leave at 0)
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_STOP_PLAYBACK
+Enum 2/0x02
+Description
+ Ends playback and clears all decoder buffers. If PTS is not zero,
+ playback stops at specified PTS.
+Param[0]
+ Display 0=last frame, 1=black
+ Note: this takes effect immediately, so if you want to wait for a PTS,
+ then use '0', otherwise the screen goes to black at once.
+ You can call this later (even if there is no playback) with a 1 value
+ to set the screen to black.
+Param[1]
+ PTS low
+Param[2]
+ PTS high
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_PLAYBACK_SPEED
+Enum 3/0x03
+Description
+ Playback stream at speed other than normal. There are two modes of
+ operation:
+ Smooth: host transfers entire stream and firmware drops unused
+ frames.
+ Coarse: host drops frames based on indexing as required to achieve
+ desired speed.
+Param[0]
+ Bitmap:
+ 0:7 0 normal
+ 1 fast only "1.5 times"
+ n nX fast, 1/nX slow
+ 30 Framedrop:
+ '0' during 1.5 times play, every other B frame is dropped
+ '1' during 1.5 times play, stream is unchanged (bitrate
+ must not exceed 8mbps)
+ 31 Speed:
+ '0' slow
+ '1' fast
+ Note: n is limited to 2. Anything higher does not result in
+ faster playback. Instead the host should start dropping frames.
+Param[1]
+ Direction: 0=forward, 1=reverse
+ Note: to make reverse playback work you have to write full GOPs in
+ reverse order.
+Param[2]
+ Picture mask:
+ 1=I frames
+ 3=I, P frames
+ 7=I, P, B frames
+Param[3]
+ B frames per GOP (for reverse play only)
+ Note: for reverse playback the Picture Mask should be set to I or I, P.
+ Adding B frames to the mask will result in corrupt video. This field
+ has to be set to the correct value in order to keep the timing correct.
+Param[4]
+ Mute audio: 0=disable, 1=enable
+Param[5]
+ Display 0=frame, 1=field
+Param[6]
+ Specifies the number of muted audio frames to play before normal audio
+ resumes. (Not implemented in the firmware, leave at 0)
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_STEP_VIDEO
+Enum 5/0x05
+Description
+ Each call to this API steps the playback to the next unit defined below
+ in the current playback direction.
+Param[0]
+ 0=frame, 1=top field, 2=bottom field
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_DMA_BLOCK_SIZE
+Enum 8/0x08
+Description
+ Set DMA transfer block size. Counterpart to API 0xC9
+Param[0]
+ DMA transfer block size in bytes. A different size may be specified
+ when issuing the DMA transfer command.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_GET_XFER_INFO
+Enum 9/0x09
+Description
+ This API call may be used to detect an end of stream condition.
+Result[0]
+ Stream type
+Result[1]
+ Address offset
+Result[2]
+ Maximum bytes to transfer
+Result[3]
+ Buffer fullness
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_GET_DMA_STATUS
+Enum 10/0x0A
+Description
+ Status of the last DMA transfer
+Result[0]
+ Bit 1 set means transfer complete
+ Bit 2 set means DMA error
+ Bit 3 set means linked list error
+Result[1]
+ DMA type: 0=MPEG, 1=OSD, 2=YUV
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SCHED_DMA_FROM_HOST
+Enum 11/0x0B
+Description
+ Setup DMA from host operation. Counterpart to API 0xCC
+Param[0]
+ Memory address of link list
+Param[1]
+ Total # of bytes to transfer
+Param[2]
+ DMA type (0=MPEG, 1=OSD, 2=YUV)
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_PAUSE_PLAYBACK
+Enum 13/0x0D
+Description
+ Freeze playback immediately. In this mode, when internal buffers are
+ full, no more data will be accepted and data request IRQs will be
+ masked.
+Param[0]
+ Display: 0=last frame, 1=black
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_HALT_FW
+Enum 14/0x0E
+Description
+ The firmware is halted and no further API calls are serviced until
+ the firmware is uploaded again.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_STANDARD
+Enum 16/0x10
+Description
+ Selects display standard
+Param[0]
+ 0=NTSC, 1=PAL
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_GET_VERSION
+Enum 17/0x11
+Description
+ Returns decoder firmware version information
+Result[0]
+ Version bitmask:
+ Bits 0:15 build
+ Bits 16:23 minor
+ Bits 24:31 major
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_STREAM_INPUT
+Enum 20/0x14
+Description
+ Select decoder stream input port
+Param[0]
+ 0=memory (default), 1=streaming
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_GET_TIMING_INFO
+Enum 21/0x15
+Description
+ Returns timing information from start of playback
+Result[0]
+ Frame count by decode order
+Result[1]
+ Video PTS bits 0:31 by display order
+Result[2]
+ Video PTS bit 32 by display order
+Result[3]
+ SCR bits 0:31 by display order
+Result[4]
+ SCR bit 32 by display order
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_AUDIO_MODE
+Enum 22/0x16
+Description
+ Select audio mode
+Param[0]
+ Dual mono mode action
+ 0=Stereo, 1=Left, 2=Right, 3=Mono, 4=Swap, -1=Unchanged
+Param[1]
+ Stereo mode action:
+ 0=Stereo, 1=Left, 2=Right, 3=Mono, 4=Swap, -1=Unchanged
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_EVENT_NOTIFICATION
+Enum 23/0x17
+Description
+ Setup firmware to notify the host about a particular event.
+ Counterpart to API 0xD5
+Param[0]
+ Event: 0=Audio mode change between mono, (joint) stereo and dual channel.
+ Event: 3=Decoder started
+ Event: 4=Unknown: goes off 10-15 times per second while decoding.
+ Event: 5=Some sync event: goes off once per frame.
+Param[1]
+ Notification 0=disabled, 1=enabled
+Param[2]
+ Interrupt bit
+Param[3]
+ Mailbox slot, -1 if no mailbox required.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_DISPLAY_BUFFERS
+Enum 24/0x18
+Description
+ Number of display buffers. To decode all frames in reverse playback you
+ must use nine buffers.
+Param[0]
+ 0=six buffers, 1=nine buffers
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_EXTRACT_VBI
+Enum 25/0x19
+Description
+ Extracts VBI data
+Param[0]
+ 0=extract from extension & user data, 1=extract from private packets
+Result[0]
+ VBI table location
+Result[1]
+ VBI table size
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_DECODER_SOURCE
+Enum 26/0x1A
+Description
+ Selects decoder source. Ensure that the parameters passed to this
+ API match the encoder settings.
+Param[0]
+ Mode: 0=MPEG from host, 1=YUV from encoder, 2=YUV from host
+Param[1]
+ YUV picture width
+Param[2]
+ YUV picture height
+Param[3]
+ Bitmap: see Param[0] of API 0xBD
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_DEC_SET_PREBUFFERING
+Enum 30/0x1E
+Description
+ Decoder prebuffering, when enabled up to 128KB are buffered for
+ streams <8mpbs or 640KB for streams >8mbps
+Param[0]
+ 0=off, 1=on
diff --git a/Documentation/video4linux/cx2341x/fw-decoder-regs.txt b/Documentation/video4linux/cx2341x/fw-decoder-regs.txt
new file mode 100644
index 000000000..cf52c8f20
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-decoder-regs.txt
@@ -0,0 +1,817 @@
+PVR350 Video decoder registers 0x02002800 -> 0x02002B00
+=======================================================
+
+This list has been worked out through trial and error. There will be mistakes
+and omissions. Some registers have no obvious effect so it's hard to say what
+they do, while others interact with each other, or require a certain load
+sequence. Horizontal filter setup is one example, with six registers working
+in unison and requiring a certain load sequence to correctly configure. The
+indexed colour palette is much easier to set at just two registers, but again
+it requires a certain load sequence.
+
+Some registers are fussy about what they are set to. Load in a bad value & the
+decoder will fail. A firmware reload will often recover, but sometimes a reset
+is required. For registers containing size information, setting them to 0 is
+generally a bad idea. For other control registers i.e. 2878, you'll only find
+out what values are bad when it hangs.
+
+--------------------------------------------------------------------------------
+2800
+ bit 0
+ Decoder enable
+ 0 = disable
+ 1 = enable
+--------------------------------------------------------------------------------
+2804
+ bits 0:31
+ Decoder horizontal Y alias register 1
+---------------
+2808
+ bits 0:31
+ Decoder horizontal Y alias register 2
+---------------
+280C
+ bits 0:31
+ Decoder horizontal Y alias register 3
+---------------
+2810
+ bits 0:31
+ Decoder horizontal Y alias register 4
+---------------
+2814
+ bits 0:31
+ Decoder horizontal Y alias register 5
+---------------
+2818
+ bits 0:31
+ Decoder horizontal Y alias trigger
+
+ These six registers control the horizontal aliasing filter for the Y plane.
+ The first five registers must all be loaded before accessing the trigger
+ (2818), as this register actually clocks the data through for the first
+ five.
+
+ To correctly program set the filter, this whole procedure must be done 16
+ times. The actual register contents are copied from a lookup-table in the
+ firmware which contains 4 different filter settings.
+
+--------------------------------------------------------------------------------
+281C
+ bits 0:31
+ Decoder horizontal UV alias register 1
+---------------
+2820
+ bits 0:31
+ Decoder horizontal UV alias register 2
+---------------
+2824
+ bits 0:31
+ Decoder horizontal UV alias register 3
+---------------
+2828
+ bits 0:31
+ Decoder horizontal UV alias register 4
+---------------
+282C
+ bits 0:31
+ Decoder horizontal UV alias register 5
+---------------
+2830
+ bits 0:31
+ Decoder horizontal UV alias trigger
+
+ These six registers control the horizontal aliasing for the UV plane.
+ Operation is the same as the Y filter, with 2830 being the trigger
+ register.
+
+--------------------------------------------------------------------------------
+2834
+ bits 0:15
+ Decoder Y source width in pixels
+
+ bits 16:31
+ Decoder Y destination width in pixels
+---------------
+2838
+ bits 0:15
+ Decoder UV source width in pixels
+
+ bits 16:31
+ Decoder UV destination width in pixels
+
+ NOTE: For both registers, the resulting image must be fully visible on
+ screen. If the image exceeds the right edge both the source and destination
+ size must be adjusted to reflect the visible portion. For the source width,
+ you must take into account the scaling when calculating the new value.
+--------------------------------------------------------------------------------
+
+283C
+ bits 0:31
+ Decoder Y horizontal scaling
+ Normally = Reg 2854 >> 2
+---------------
+2840
+ bits 0:31
+ Decoder ?? unknown - horizontal scaling
+ Usually 0x00080514
+---------------
+2844
+ bits 0:31
+ Decoder UV horizontal scaling
+ Normally = Reg 2854 >> 2
+---------------
+2848
+ bits 0:31
+ Decoder ?? unknown - horizontal scaling
+ Usually 0x00100514
+---------------
+284C
+ bits 0:31
+ Decoder ?? unknown - Y plane
+ Usually 0x00200020
+---------------
+2850
+ bits 0:31
+ Decoder ?? unknown - UV plane
+ Usually 0x00200020
+---------------
+2854
+ bits 0:31
+ Decoder 'master' value for horizontal scaling
+---------------
+2858
+ bits 0:31
+ Decoder ?? unknown
+ Usually 0
+---------------
+285C
+ bits 0:31
+ Decoder ?? unknown
+ Normally = Reg 2854 >> 1
+---------------
+2860
+ bits 0:31
+ Decoder ?? unknown
+ Usually 0
+---------------
+2864
+ bits 0:31
+ Decoder ?? unknown
+ Normally = Reg 2854 >> 1
+---------------
+2868
+ bits 0:31
+ Decoder ?? unknown
+ Usually 0
+
+ Most of these registers either control horizontal scaling, or appear linked
+ to it in some way. Register 2854 contains the 'master' value & the other
+ registers can be calculated from that one. You must also remember to
+ correctly set the divider in Reg 2874.
+
+ To enlarge:
+ Reg 2854 = (source_width * 0x00200000) / destination_width
+ Reg 2874 = No divide
+
+ To reduce from full size down to half size:
+ Reg 2854 = (source_width/2 * 0x00200000) / destination width
+ Reg 2874 = Divide by 2
+
+ To reduce from half size down to quarter size:
+ Reg 2854 = (source_width/4 * 0x00200000) / destination width
+ Reg 2874 = Divide by 4
+
+ The result is always rounded up.
+
+--------------------------------------------------------------------------------
+286C
+ bits 0:15
+ Decoder horizontal Y buffer offset
+
+ bits 15:31
+ Decoder horizontal UV buffer offset
+
+ Offset into the video image buffer. If the offset is gradually incremented,
+ the on screen image will move left & wrap around higher up on the right.
+
+--------------------------------------------------------------------------------
+2870
+ bits 0:15
+ Decoder horizontal Y output offset
+
+ bits 16:31
+ Decoder horizontal UV output offset
+
+ Offsets the actual video output. Controls output alignment of the Y & UV
+ planes. The higher the value, the greater the shift to the left. Use
+ reg 2890 to move the image right.
+
+--------------------------------------------------------------------------------
+2874
+ bits 0:1
+ Decoder horizontal Y output size divider
+ 00 = No divide
+ 01 = Divide by 2
+ 10 = Divide by 3
+
+ bits 4:5
+ Decoder horizontal UV output size divider
+ 00 = No divide
+ 01 = Divide by 2
+ 10 = Divide by 3
+
+ bit 8
+ Decoder ?? unknown
+ 0 = Normal
+ 1 = Affects video output levels
+
+ bit 16
+ Decoder ?? unknown
+ 0 = Normal
+ 1 = Disable horizontal filter
+
+--------------------------------------------------------------------------------
+2878
+ bit 0
+ ?? unknown
+
+ bit 1
+ osd on/off
+ 0 = osd off
+ 1 = osd on
+
+ bit 2
+ Decoder + osd video timing
+ 0 = NTSC
+ 1 = PAL
+
+ bits 3:4
+ ?? unknown
+
+ bit 5
+ Decoder + osd
+ Swaps upper & lower fields
+
+--------------------------------------------------------------------------------
+287C
+ bits 0:10
+ Decoder & osd ?? unknown
+ Moves entire screen horizontally. Starts at 0x005 with the screen
+ shifted heavily to the right. Incrementing in steps of 0x004 will
+ gradually shift the screen to the left.
+
+ bits 11:31
+ ?? unknown
+
+ Normally contents are 0x00101111 (NTSC) or 0x1010111d (PAL)
+
+--------------------------------------------------------------------------------
+2880 -------- ?? unknown
+2884 -------- ?? unknown
+--------------------------------------------------------------------------------
+2888
+ bit 0
+ Decoder + osd ?? unknown
+ 0 = Normal
+ 1 = Misaligned fields (Correctable through 289C & 28A4)
+
+ bit 4
+ ?? unknown
+
+ bit 8
+ ?? unknown
+
+ Warning: Bad values will require a firmware reload to recover.
+ Known to be bad are 0x000,0x011,0x100,0x111
+--------------------------------------------------------------------------------
+288C
+ bits 0:15
+ osd ?? unknown
+ Appears to affect the osd position stability. The higher the value the
+ more unstable it becomes. Decoder output remains stable.
+
+ bits 16:31
+ osd ?? unknown
+ Same as bits 0:15
+
+--------------------------------------------------------------------------------
+2890
+ bits 0:11
+ Decoder output horizontal offset.
+
+ Horizontal offset moves the video image right. A small left shift is
+ possible, but it's better to use reg 2870 for that due to its greater
+ range.
+
+ NOTE: Video corruption will occur if video window is shifted off the right
+ edge. To avoid this read the notes for 2834 & 2838.
+--------------------------------------------------------------------------------
+2894
+ bits 0:23
+ Decoder output video surround colour.
+
+ Contains the colour (in yuv) used to fill the screen when the video is
+ running in a window.
+--------------------------------------------------------------------------------
+2898
+ bits 0:23
+ Decoder video window colour
+ Contains the colour (in yuv) used to fill the video window when the
+ video is turned off.
+
+ bit 24
+ Decoder video output
+ 0 = Video on
+ 1 = Video off
+
+ bit 28
+ Decoder plane order
+ 0 = Y,UV
+ 1 = UV,Y
+
+ bit 29
+ Decoder second plane byte order
+ 0 = Normal (UV)
+ 1 = Swapped (VU)
+
+ In normal usage, the first plane is Y & the second plane is UV. Though the
+ order of the planes can be swapped, only the byte order of the second plane
+ can be swapped. This isn't much use for the Y plane, but can be useful for
+ the UV plane.
+
+--------------------------------------------------------------------------------
+289C
+ bits 0:15
+ Decoder vertical field offset 1
+
+ bits 16:31
+ Decoder vertical field offset 2
+
+ Controls field output vertical alignment. The higher the number, the lower
+ the image on screen. Known starting values are 0x011E0017 (NTSC) &
+ 0x01500017 (PAL)
+--------------------------------------------------------------------------------
+28A0
+ bits 0:15
+ Decoder & osd width in pixels
+
+ bits 16:31
+ Decoder & osd height in pixels
+
+ All output from the decoder & osd are disabled beyond this area. Decoder
+ output will simply go black outside of this region. If the osd tries to
+ exceed this area it will become corrupt.
+--------------------------------------------------------------------------------
+28A4
+ bits 0:11
+ osd left shift.
+
+ Has a range of 0x770->0x7FF. With the exception of 0, any value outside of
+ this range corrupts the osd.
+--------------------------------------------------------------------------------
+28A8
+ bits 0:15
+ osd vertical field offset 1
+
+ bits 16:31
+ osd vertical field offset 2
+
+ Controls field output vertical alignment. The higher the number, the lower
+ the image on screen. Known starting values are 0x011E0017 (NTSC) &
+ 0x01500017 (PAL)
+--------------------------------------------------------------------------------
+28AC -------- ?? unknown
+ |
+ V
+28BC -------- ?? unknown
+--------------------------------------------------------------------------------
+28C0
+ bit 0
+ Current output field
+ 0 = first field
+ 1 = second field
+
+ bits 16:31
+ Current scanline
+ The scanline counts from the top line of the first field
+ through to the last line of the second field.
+--------------------------------------------------------------------------------
+28C4 -------- ?? unknown
+ |
+ V
+28F8 -------- ?? unknown
+--------------------------------------------------------------------------------
+28FC
+ bit 0
+ ?? unknown
+ 0 = Normal
+ 1 = Breaks decoder & osd output
+--------------------------------------------------------------------------------
+2900
+ bits 0:31
+ Decoder vertical Y alias register 1
+---------------
+2904
+ bits 0:31
+ Decoder vertical Y alias register 2
+---------------
+2908
+ bits 0:31
+ Decoder vertical Y alias trigger
+
+ These three registers control the vertical aliasing filter for the Y plane.
+ Operation is similar to the horizontal Y filter (2804). The only real
+ difference is that there are only two registers to set before accessing
+ the trigger register (2908). As for the horizontal filter, the values are
+ taken from a lookup table in the firmware, and the procedure must be
+ repeated 16 times to fully program the filter.
+--------------------------------------------------------------------------------
+290C
+ bits 0:31
+ Decoder vertical UV alias register 1
+---------------
+2910
+ bits 0:31
+ Decoder vertical UV alias register 2
+---------------
+2914
+ bits 0:31
+ Decoder vertical UV alias trigger
+
+ These three registers control the vertical aliasing filter for the UV
+ plane. Operation is the same as the Y filter, with 2914 being the trigger.
+--------------------------------------------------------------------------------
+2918
+ bits 0:15
+ Decoder Y source height in pixels
+
+ bits 16:31
+ Decoder Y destination height in pixels
+---------------
+291C
+ bits 0:15
+ Decoder UV source height in pixels divided by 2
+
+ bits 16:31
+ Decoder UV destination height in pixels
+
+ NOTE: For both registers, the resulting image must be fully visible on
+ screen. If the image exceeds the bottom edge both the source and
+ destination size must be adjusted to reflect the visible portion. For the
+ source height, you must take into account the scaling when calculating the
+ new value.
+--------------------------------------------------------------------------------
+2920
+ bits 0:31
+ Decoder Y vertical scaling
+ Normally = Reg 2930 >> 2
+---------------
+2924
+ bits 0:31
+ Decoder Y vertical scaling
+ Normally = Reg 2920 + 0x514
+---------------
+2928
+ bits 0:31
+ Decoder UV vertical scaling
+ When enlarging = Reg 2930 >> 2
+ When reducing = Reg 2930 >> 3
+---------------
+292C
+ bits 0:31
+ Decoder UV vertical scaling
+ Normally = Reg 2928 + 0x514
+---------------
+2930
+ bits 0:31
+ Decoder 'master' value for vertical scaling
+---------------
+2934
+ bits 0:31
+ Decoder ?? unknown - Y vertical scaling
+---------------
+2938
+ bits 0:31
+ Decoder Y vertical scaling
+ Normally = Reg 2930
+---------------
+293C
+ bits 0:31
+ Decoder ?? unknown - Y vertical scaling
+---------------
+2940
+ bits 0:31
+ Decoder UV vertical scaling
+ When enlarging = Reg 2930 >> 1
+ When reducing = Reg 2930
+---------------
+2944
+ bits 0:31
+ Decoder ?? unknown - UV vertical scaling
+---------------
+2948
+ bits 0:31
+ Decoder UV vertical scaling
+ Normally = Reg 2940
+---------------
+294C
+ bits 0:31
+ Decoder ?? unknown - UV vertical scaling
+
+ Most of these registers either control vertical scaling, or appear linked
+ to it in some way. Register 2930 contains the 'master' value & all other
+ registers can be calculated from that one. You must also remember to
+ correctly set the divider in Reg 296C
+
+ To enlarge:
+ Reg 2930 = (source_height * 0x00200000) / destination_height
+ Reg 296C = No divide
+
+ To reduce from full size down to half size:
+ Reg 2930 = (source_height/2 * 0x00200000) / destination height
+ Reg 296C = Divide by 2
+
+ To reduce from half down to quarter.
+ Reg 2930 = (source_height/4 * 0x00200000) / destination height
+ Reg 296C = Divide by 4
+
+--------------------------------------------------------------------------------
+2950
+ bits 0:15
+ Decoder Y line index into display buffer, first field
+
+ bits 16:31
+ Decoder Y vertical line skip, first field
+--------------------------------------------------------------------------------
+2954
+ bits 0:15
+ Decoder Y line index into display buffer, second field
+
+ bits 16:31
+ Decoder Y vertical line skip, second field
+--------------------------------------------------------------------------------
+2958
+ bits 0:15
+ Decoder UV line index into display buffer, first field
+
+ bits 16:31
+ Decoder UV vertical line skip, first field
+--------------------------------------------------------------------------------
+295C
+ bits 0:15
+ Decoder UV line index into display buffer, second field
+
+ bits 16:31
+ Decoder UV vertical line skip, second field
+--------------------------------------------------------------------------------
+2960
+ bits 0:15
+ Decoder destination height minus 1
+
+ bits 16:31
+ Decoder destination height divided by 2
+--------------------------------------------------------------------------------
+2964
+ bits 0:15
+ Decoder Y vertical offset, second field
+
+ bits 16:31
+ Decoder Y vertical offset, first field
+
+ These two registers shift the Y plane up. The higher the number, the
+ greater the shift.
+--------------------------------------------------------------------------------
+2968
+ bits 0:15
+ Decoder UV vertical offset, second field
+
+ bits 16:31
+ Decoder UV vertical offset, first field
+
+ These two registers shift the UV plane up. The higher the number, the
+ greater the shift.
+--------------------------------------------------------------------------------
+296C
+ bits 0:1
+ Decoder vertical Y output size divider
+ 00 = No divide
+ 01 = Divide by 2
+ 10 = Divide by 4
+
+ bits 8:9
+ Decoder vertical UV output size divider
+ 00 = No divide
+ 01 = Divide by 2
+ 10 = Divide by 4
+--------------------------------------------------------------------------------
+2970
+ bit 0
+ Decoder ?? unknown
+ 0 = Normal
+ 1 = Affect video output levels
+
+ bit 16
+ Decoder ?? unknown
+ 0 = Normal
+ 1 = Disable vertical filter
+
+--------------------------------------------------------------------------------
+2974 -------- ?? unknown
+ |
+ V
+29EF -------- ?? unknown
+--------------------------------------------------------------------------------
+2A00
+ bits 0:2
+ osd colour mode
+ 000 = 8 bit indexed
+ 001 = 16 bit (565)
+ 010 = 15 bit (555)
+ 011 = 12 bit (444)
+ 100 = 32 bit (8888)
+
+ bits 4:5
+ osd display bpp
+ 01 = 8 bit
+ 10 = 16 bit
+ 11 = 32 bit
+
+ bit 8
+ osd global alpha
+ 0 = Off
+ 1 = On
+
+ bit 9
+ osd local alpha
+ 0 = Off
+ 1 = On
+
+ bit 10
+ osd colour key
+ 0 = Off
+ 1 = On
+
+ bit 11
+ osd ?? unknown
+ Must be 1
+
+ bit 13
+ osd colour space
+ 0 = ARGB
+ 1 = AYVU
+
+ bits 16:31
+ osd ?? unknown
+ Must be 0x001B (some kind of buffer pointer ?)
+
+ When the bits-per-pixel is set to 8, the colour mode is ignored and
+ assumed to be 8 bit indexed. For 16 & 32 bits-per-pixel the colour depth
+ is honoured, and when using a colour depth that requires fewer bytes than
+ allocated the extra bytes are used as padding. So for a 32 bpp with 8 bit
+ index colour, there are 3 padding bytes per pixel. It's also possible to
+ select 16bpp with a 32 bit colour mode. This results in the pixel width
+ being doubled, but the color key will not work as expected in this mode.
+
+ Colour key is as it suggests. You designate a colour which will become
+ completely transparent. When using 565, 555 or 444 colour modes, the
+ colour key is always 16 bits wide. The colour to key on is set in Reg 2A18.
+
+ Local alpha works differently depending on the colour mode. For 32bpp & 8
+ bit indexed, local alpha is a per-pixel 256 step transparency, with 0 being
+ transparent and 255 being solid. For the 16bpp modes 555 & 444, the unused
+ bit(s) act as a simple transparency switch, with 0 being solid & 1 being
+ fully transparent. There is no local alpha support for 16bit 565.
+
+ Global alpha is a 256 step transparency that applies to the entire osd,
+ with 0 being transparent & 255 being solid.
+
+ It's possible to combine colour key, local alpha & global alpha.
+--------------------------------------------------------------------------------
+2A04
+ bits 0:15
+ osd x coord for left edge
+
+ bits 16:31
+ osd y coord for top edge
+---------------
+2A08
+ bits 0:15
+ osd x coord for right edge
+
+ bits 16:31
+ osd y coord for bottom edge
+
+ For both registers, (0,0) = top left corner of the display area. These
+ registers do not control the osd size, only where it's positioned & how
+ much is visible. The visible osd area cannot exceed the right edge of the
+ display, otherwise the osd will become corrupt. See reg 2A10 for
+ setting osd width.
+--------------------------------------------------------------------------------
+2A0C
+ bits 0:31
+ osd buffer index
+
+ An index into the osd buffer. Slowly incrementing this moves the osd left,
+ wrapping around onto the right edge
+--------------------------------------------------------------------------------
+2A10
+ bits 0:11
+ osd buffer 32 bit word width
+
+ Contains the width of the osd measured in 32 bit words. This means that all
+ colour modes are restricted to a byte width which is divisible by 4.
+--------------------------------------------------------------------------------
+2A14
+ bits 0:15
+ osd height in pixels
+
+ bits 16:32
+ osd line index into buffer
+ osd will start displaying from this line.
+--------------------------------------------------------------------------------
+2A18
+ bits 0:31
+ osd colour key
+
+ Contains the colour value which will be transparent.
+--------------------------------------------------------------------------------
+2A1C
+ bits 0:7
+ osd global alpha
+
+ Contains the global alpha value (equiv ivtvfbctl --alpha XX)
+--------------------------------------------------------------------------------
+2A20 -------- ?? unknown
+ |
+ V
+2A2C -------- ?? unknown
+--------------------------------------------------------------------------------
+2A30
+ bits 0:7
+ osd colour to change in indexed palette
+---------------
+2A34
+ bits 0:31
+ osd colour for indexed palette
+
+ To set the new palette, first load the index of the colour to change into
+ 2A30, then load the new colour into 2A34. The full palette is 256 colours,
+ so the index range is 0x00-0xFF
+--------------------------------------------------------------------------------
+2A38 -------- ?? unknown
+2A3C -------- ?? unknown
+--------------------------------------------------------------------------------
+2A40
+ bits 0:31
+ osd ?? unknown
+
+ Affects overall brightness, wrapping around to black
+--------------------------------------------------------------------------------
+2A44
+ bits 0:31
+ osd ?? unknown
+
+ Green tint
+--------------------------------------------------------------------------------
+2A48
+ bits 0:31
+ osd ?? unknown
+
+ Red tint
+--------------------------------------------------------------------------------
+2A4C
+ bits 0:31
+ osd ?? unknown
+
+ Affects overall brightness, wrapping around to black
+--------------------------------------------------------------------------------
+2A50
+ bits 0:31
+ osd ?? unknown
+
+ Colour shift
+--------------------------------------------------------------------------------
+2A54
+ bits 0:31
+ osd ?? unknown
+
+ Colour shift
+--------------------------------------------------------------------------------
+2A58 -------- ?? unknown
+ |
+ V
+2AFC -------- ?? unknown
+--------------------------------------------------------------------------------
+2B00
+ bit 0
+ osd filter control
+ 0 = filter off
+ 1 = filter on
+
+ bits 1:4
+ osd ?? unknown
+
+--------------------------------------------------------------------------------
+
+v0.4 - 12 March 2007 - Ian Armstrong (ian@iarmst.demon.co.uk)
+
diff --git a/Documentation/video4linux/cx2341x/fw-dma.txt b/Documentation/video4linux/cx2341x/fw-dma.txt
new file mode 100644
index 000000000..be52b6fd1
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-dma.txt
@@ -0,0 +1,96 @@
+This page describes the structures and procedures used by the cx2341x DMA
+engine.
+
+Introduction
+============
+
+The cx2341x PCI interface is busmaster capable. This means it has a DMA
+engine to efficiently transfer large volumes of data between the card and main
+memory without requiring help from a CPU. Like most hardware, it must operate
+on contiguous physical memory. This is difficult to come by in large quantities
+on virtual memory machines.
+
+Therefore, it also supports a technique called "scatter-gather". The card can
+transfer multiple buffers in one operation. Instead of allocating one large
+contiguous buffer, the driver can allocate several smaller buffers.
+
+In practice, I've seen the average transfer to be roughly 80K, but transfers
+above 128K were not uncommon, particularly at startup. The 128K figure is
+important, because that is the largest block that the kernel can normally
+allocate. Even still, 128K blocks are hard to come by, so the driver writer is
+urged to choose a smaller block size and learn the scatter-gather technique.
+
+Mailbox #10 is reserved for DMA transfer information.
+
+Note: the hardware expects little-endian data ('intel format').
+
+Flow
+====
+
+This section describes, in general, the order of events when handling DMA
+transfers. Detailed information follows this section.
+
+- The card raises the Encoder interrupt.
+- The driver reads the transfer type, offset and size from Mailbox #10.
+- The driver constructs the scatter-gather array from enough free dma buffers
+ to cover the size.
+- The driver schedules the DMA transfer via the ScheduleDMAtoHost API call.
+- The card raises the DMA Complete interrupt.
+- The driver checks the DMA status register for any errors.
+- The driver post-processes the newly transferred buffers.
+
+NOTE! It is possible that the Encoder and DMA Complete interrupts get raised
+simultaneously. (End of the last, start of the next, etc.)
+
+Mailbox #10
+===========
+
+The Flags, Command, Return Value and Timeout fields are ignored.
+
+Name: Mailbox #10
+Results[0]: Type: 0: MPEG.
+Results[1]: Offset: The position relative to the card's memory space.
+Results[2]: Size: The exact number of bytes to transfer.
+
+My speculation is that since the StartCapture API has a capture type of "RAW"
+available, that the type field will have other values that correspond to YUV
+and PCM data.
+
+Scatter-Gather Array
+====================
+
+The scatter-gather array is a contiguously allocated block of memory that
+tells the card the source and destination of each data-block to transfer.
+Card "addresses" are derived from the offset supplied by Mailbox #10. Host
+addresses are the physical memory location of the target DMA buffer.
+
+Each S-G array element is a struct of three 32-bit words. The first word is
+the source address, the second is the destination address. Both take up the
+entire 32 bits. The lowest 18 bits of the third word is the transfer byte
+count. The high-bit of the third word is the "last" flag. The last-flag tells
+the card to raise the DMA_DONE interrupt. From hard personal experience, if
+you forget to set this bit, the card will still "work" but the stream will
+most likely get corrupted.
+
+The transfer count must be a multiple of 256. Therefore, the driver will need
+to track how much data in the target buffer is valid and deal with it
+accordingly.
+
+Array Element:
+
+- 32-bit Source Address
+- 32-bit Destination Address
+- 14-bit reserved (high bit is the last flag)
+- 18-bit byte count
+
+DMA Transfer Status
+===================
+
+Register 0x0004 holds the DMA Transfer Status:
+
+Bit
+0 read completed
+1 write completed
+2 DMA read error
+3 DMA write error
+4 Scatter-Gather array error
diff --git a/Documentation/video4linux/cx2341x/fw-encoder-api.txt b/Documentation/video4linux/cx2341x/fw-encoder-api.txt
new file mode 100644
index 000000000..5a27af2ee
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-encoder-api.txt
@@ -0,0 +1,709 @@
+Encoder firmware API description
+================================
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_PING_FW
+Enum 128/0x80
+Description
+ Does nothing. Can be used to check if the firmware is responding.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_START_CAPTURE
+Enum 129/0x81
+Description
+ Commences the capture of video, audio and/or VBI data. All encoding
+ parameters must be initialized prior to this API call. Captures frames
+ continuously or until a predefined number of frames have been captured.
+Param[0]
+ Capture stream type:
+ 0=MPEG
+ 1=Raw
+ 2=Raw passthrough
+ 3=VBI
+
+Param[1]
+ Bitmask:
+ Bit 0 when set, captures YUV
+ Bit 1 when set, captures PCM audio
+ Bit 2 when set, captures VBI (same as param[0]=3)
+ Bit 3 when set, the capture destination is the decoder
+ (same as param[0]=2)
+ Bit 4 when set, the capture destination is the host
+ Note: this parameter is only meaningful for RAW capture type.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_STOP_CAPTURE
+Enum 130/0x82
+Description
+ Ends a capture in progress
+Param[0]
+ 0=stop at end of GOP (generates IRQ)
+ 1=stop immediate (no IRQ)
+Param[1]
+ Stream type to stop, see param[0] of API 0x81
+Param[2]
+ Subtype, see param[1] of API 0x81
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_AUDIO_ID
+Enum 137/0x89
+Description
+ Assigns the transport stream ID of the encoded audio stream
+Param[0]
+ Audio Stream ID
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_VIDEO_ID
+Enum 139/0x8B
+Description
+ Set video transport stream ID
+Param[0]
+ Video stream ID
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_PCR_ID
+Enum 141/0x8D
+Description
+ Assigns the transport stream ID for PCR packets
+Param[0]
+ PCR Stream ID
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_FRAME_RATE
+Enum 143/0x8F
+Description
+ Set video frames per second. Change occurs at start of new GOP.
+Param[0]
+ 0=30fps
+ 1=25fps
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_FRAME_SIZE
+Enum 145/0x91
+Description
+ Select video stream encoding resolution.
+Param[0]
+ Height in lines. Default 480
+Param[1]
+ Width in pixels. Default 720
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_BIT_RATE
+Enum 149/0x95
+Description
+ Assign average video stream bitrate. Note on the last three params:
+ Param[3] and [4] seem to be always 0, param [5] doesn't seem to be used.
+Param[0]
+ 0=variable bitrate, 1=constant bitrate
+Param[1]
+ bitrate in bits per second
+Param[2]
+ peak bitrate in bits per second, divided by 400
+Param[3]
+ Mux bitrate in bits per second, divided by 400. May be 0 (default).
+Param[4]
+ Rate Control VBR Padding
+Param[5]
+ VBV Buffer used by encoder
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_GOP_PROPERTIES
+Enum 151/0x97
+Description
+ Setup the GOP structure
+Param[0]
+ GOP size (maximum is 34)
+Param[1]
+ Number of B frames between the I and P frame, plus 1.
+ For example: IBBPBBPBBPBB --> GOP size: 12, number of B frames: 2+1 = 3
+ Note that GOP size must be a multiple of (B-frames + 1).
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_ASPECT_RATIO
+Enum 153/0x99
+Description
+ Sets the encoding aspect ratio. Changes in the aspect ratio take effect
+ at the start of the next GOP.
+Param[0]
+ '0000' forbidden
+ '0001' 1:1 square
+ '0010' 4:3
+ '0011' 16:9
+ '0100' 2.21:1
+ '0101' reserved
+ ....
+ '1111' reserved
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_DNR_FILTER_MODE
+Enum 155/0x9B
+Description
+ Assign Dynamic Noise Reduction operating mode
+Param[0]
+ Bit0: Spatial filter, set=auto, clear=manual
+ Bit1: Temporal filter, set=auto, clear=manual
+Param[1]
+ Median filter:
+ 0=Disabled
+ 1=Horizontal
+ 2=Vertical
+ 3=Horiz/Vert
+ 4=Diagonal
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_DNR_FILTER_PROPS
+Enum 157/0x9D
+Description
+ These Dynamic Noise Reduction filter values are only meaningful when
+ the respective filter is set to "manual" (See API 0x9B)
+Param[0]
+ Spatial filter: default 0, range 0:15
+Param[1]
+ Temporal filter: default 0, range 0:31
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_CORING_LEVELS
+Enum 159/0x9F
+Description
+ Assign Dynamic Noise Reduction median filter properties.
+Param[0]
+ Threshold above which the luminance median filter is enabled.
+ Default: 0, range 0:255
+Param[1]
+ Threshold below which the luminance median filter is enabled.
+ Default: 255, range 0:255
+Param[2]
+ Threshold above which the chrominance median filter is enabled.
+ Default: 0, range 0:255
+Param[3]
+ Threshold below which the chrominance median filter is enabled.
+ Default: 255, range 0:255
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_SPATIAL_FILTER_TYPE
+Enum 161/0xA1
+Description
+ Assign spatial prefilter parameters
+Param[0]
+ Luminance filter
+ 0=Off
+ 1=1D Horizontal
+ 2=1D Vertical
+ 3=2D H/V Separable (default)
+ 4=2D Symmetric non-separable
+Param[1]
+ Chrominance filter
+ 0=Off
+ 1=1D Horizontal (default)
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_VBI_LINE
+Enum 183/0xB7
+Description
+ Selects VBI line number.
+Param[0]
+ Bits 0:4 line number
+ Bit 31 0=top_field, 1=bottom_field
+ Bits 0:31 all set specifies "all lines"
+Param[1]
+ VBI line information features: 0=disabled, 1=enabled
+Param[2]
+ Slicing: 0=None, 1=Closed Caption
+ Almost certainly not implemented. Set to 0.
+Param[3]
+ Luminance samples in this line.
+ Almost certainly not implemented. Set to 0.
+Param[4]
+ Chrominance samples in this line
+ Almost certainly not implemented. Set to 0.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_STREAM_TYPE
+Enum 185/0xB9
+Description
+ Assign stream type
+ Note: Transport stream is not working in recent firmwares.
+ And in older firmwares the timestamps in the TS seem to be
+ unreliable.
+Param[0]
+ 0=Program stream
+ 1=Transport stream
+ 2=MPEG1 stream
+ 3=PES A/V stream
+ 5=PES Video stream
+ 7=PES Audio stream
+ 10=DVD stream
+ 11=VCD stream
+ 12=SVCD stream
+ 13=DVD_S1 stream
+ 14=DVD_S2 stream
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_OUTPUT_PORT
+Enum 187/0xBB
+Description
+ Assign stream output port. Normally 0 when the data is copied through
+ the PCI bus (DMA), and 1 when the data is streamed to another chip
+ (pvrusb and cx88-blackbird).
+Param[0]
+ 0=Memory (default)
+ 1=Streaming
+ 2=Serial
+Param[1]
+ Unknown, but leaving this to 0 seems to work best. Indications are that
+ this might have to do with USB support, although passing anything but 0
+ only breaks things.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_AUDIO_PROPERTIES
+Enum 189/0xBD
+Description
+ Set audio stream properties, may be called while encoding is in progress.
+ Note: all bitfields are consistent with ISO11172 documentation except
+ bits 2:3 which ISO docs define as:
+ '11' Layer I
+ '10' Layer II
+ '01' Layer III
+ '00' Undefined
+ This discrepancy may indicate a possible error in the documentation.
+ Testing indicated that only Layer II is actually working, and that
+ the minimum bitrate should be 192 kbps.
+Param[0]
+ Bitmask:
+ 0:1 '00' 44.1Khz
+ '01' 48Khz
+ '10' 32Khz
+ '11' reserved
+
+ 2:3 '01'=Layer I
+ '10'=Layer II
+
+ 4:7 Bitrate:
+ Index | Layer I | Layer II
+ ------+-------------+------------
+ '0000' | free format | free format
+ '0001' | 32 kbit/s | 32 kbit/s
+ '0010' | 64 kbit/s | 48 kbit/s
+ '0011' | 96 kbit/s | 56 kbit/s
+ '0100' | 128 kbit/s | 64 kbit/s
+ '0101' | 160 kbit/s | 80 kbit/s
+ '0110' | 192 kbit/s | 96 kbit/s
+ '0111' | 224 kbit/s | 112 kbit/s
+ '1000' | 256 kbit/s | 128 kbit/s
+ '1001' | 288 kbit/s | 160 kbit/s
+ '1010' | 320 kbit/s | 192 kbit/s
+ '1011' | 352 kbit/s | 224 kbit/s
+ '1100' | 384 kbit/s | 256 kbit/s
+ '1101' | 416 kbit/s | 320 kbit/s
+ '1110' | 448 kbit/s | 384 kbit/s
+ Note: For Layer II, not all combinations of total bitrate
+ and mode are allowed. See ISO11172-3 3-Annex B, Table 3-B.2
+
+ 8:9 '00'=Stereo
+ '01'=JointStereo
+ '10'=Dual
+ '11'=Mono
+ Note: the cx23415 cannot decode Joint Stereo properly.
+
+ 10:11 Mode Extension used in joint_stereo mode.
+ In Layer I and II they indicate which subbands are in
+ intensity_stereo. All other subbands are coded in stereo.
+ '00' subbands 4-31 in intensity_stereo, bound==4
+ '01' subbands 8-31 in intensity_stereo, bound==8
+ '10' subbands 12-31 in intensity_stereo, bound==12
+ '11' subbands 16-31 in intensity_stereo, bound==16
+
+ 12:13 Emphasis:
+ '00' None
+ '01' 50/15uS
+ '10' reserved
+ '11' CCITT J.17
+
+ 14 CRC:
+ '0' off
+ '1' on
+
+ 15 Copyright:
+ '0' off
+ '1' on
+
+ 16 Generation:
+ '0' copy
+ '1' original
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_HALT_FW
+Enum 195/0xC3
+Description
+ The firmware is halted and no further API calls are serviced until the
+ firmware is uploaded again.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_GET_VERSION
+Enum 196/0xC4
+Description
+ Returns the version of the encoder firmware.
+Result[0]
+ Version bitmask:
+ Bits 0:15 build
+ Bits 16:23 minor
+ Bits 24:31 major
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_GOP_CLOSURE
+Enum 197/0xC5
+Description
+ Assigns the GOP open/close property.
+Param[0]
+ 0=Open
+ 1=Closed
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_GET_SEQ_END
+Enum 198/0xC6
+Description
+ Obtains the sequence end code of the encoder's buffer. When a capture
+ is started a number of interrupts are still generated, the last of
+ which will have Result[0] set to 1 and Result[1] will contain the size
+ of the buffer.
+Result[0]
+ State of the transfer (1 if last buffer)
+Result[1]
+ If Result[0] is 1, this contains the size of the last buffer, undefined
+ otherwise.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_PGM_INDEX_INFO
+Enum 199/0xC7
+Description
+ Sets the Program Index Information.
+ The information is stored as follows:
+
+ struct info {
+ u32 length; // Length of this frame
+ u32 offset_low; // Offset in the file of the
+ u32 offset_high; // start of this frame
+ u32 mask1; // Bits 0-2 are the type mask:
+ // 1=I, 2=P, 4=B
+ // 0=End of Program Index, other fields
+ // are invalid.
+ u32 pts; // The PTS of the frame
+ u32 mask2; // Bit 0 is bit 32 of the pts.
+ };
+ u32 table_ptr;
+ struct info index[400];
+
+ The table_ptr is the encoder memory address in the table were
+ *new* entries will be written. Note that this is a ringbuffer,
+ so the table_ptr will wraparound.
+Param[0]
+ Picture Mask:
+ 0=No index capture
+ 1=I frames
+ 3=I,P frames
+ 7=I,P,B frames
+ (Seems to be ignored, it always indexes I, P and B frames)
+Param[1]
+ Elements requested (up to 400)
+Result[0]
+ Offset in the encoder memory of the start of the table.
+Result[1]
+ Number of allocated elements up to a maximum of Param[1]
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_VBI_CONFIG
+Enum 200/0xC8
+Description
+ Configure VBI settings
+Param[0]
+ Bitmap:
+ 0 Mode '0' Sliced, '1' Raw
+ 1:3 Insertion:
+ '000' insert in extension & user data
+ '001' insert in private packets
+ '010' separate stream and user data
+ '111' separate stream and private data
+ 8:15 Stream ID (normally 0xBD)
+Param[1]
+ Frames per interrupt (max 8). Only valid in raw mode.
+Param[2]
+ Total raw VBI frames. Only valid in raw mode.
+Param[3]
+ Start codes
+Param[4]
+ Stop codes
+Param[5]
+ Lines per frame
+Param[6]
+ Byte per line
+Result[0]
+ Observed frames per interrupt in raw mode only. Rage 1 to Param[1]
+Result[1]
+ Observed number of frames in raw mode. Range 1 to Param[2]
+Result[2]
+ Memory offset to start or raw VBI data
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_DMA_BLOCK_SIZE
+Enum 201/0xC9
+Description
+ Set DMA transfer block size
+Param[0]
+ DMA transfer block size in bytes or frames. When unit is bytes,
+ supported block sizes are 2^7, 2^8 and 2^9 bytes.
+Param[1]
+ Unit: 0=bytes, 1=frames
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_GET_PREV_DMA_INFO_MB_10
+Enum 202/0xCA
+Description
+ Returns information on the previous DMA transfer in conjunction with
+ bit 27 of the interrupt mask. Uses mailbox 10.
+Result[0]
+ Type of stream
+Result[1]
+ Address Offset
+Result[2]
+ Maximum size of transfer
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_GET_PREV_DMA_INFO_MB_9
+Enum 203/0xCB
+Description
+ Returns information on the previous DMA transfer in conjunction with
+ bit 27 or 18 of the interrupt mask. Uses mailbox 9.
+Result[0]
+ Status bits:
+ 0 read completed
+ 1 write completed
+ 2 DMA read error
+ 3 DMA write error
+ 4 Scatter-Gather array error
+Result[1]
+ DMA type
+Result[2]
+ Presentation Time Stamp bits 0..31
+Result[3]
+ Presentation Time Stamp bit 32
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SCHED_DMA_TO_HOST
+Enum 204/0xCC
+Description
+ Setup DMA to host operation
+Param[0]
+ Memory address of link list
+Param[1]
+ Length of link list (wtf: what units ???)
+Param[2]
+ DMA type (0=MPEG)
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_INITIALIZE_INPUT
+Enum 205/0xCD
+Description
+ Initializes the video input
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_FRAME_DROP_RATE
+Enum 208/0xD0
+Description
+ For each frame captured, skip specified number of frames.
+Param[0]
+ Number of frames to skip
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_PAUSE_ENCODER
+Enum 210/0xD2
+Description
+ During a pause condition, all frames are dropped instead of being encoded.
+Param[0]
+ 0=Pause encoding
+ 1=Continue encoding
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_REFRESH_INPUT
+Enum 211/0xD3
+Description
+ Refreshes the video input
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_COPYRIGHT
+Enum 212/0xD4
+Description
+ Sets stream copyright property
+Param[0]
+ 0=Stream is not copyrighted
+ 1=Stream is copyrighted
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_EVENT_NOTIFICATION
+Enum 213/0xD5
+Description
+ Setup firmware to notify the host about a particular event. Host must
+ unmask the interrupt bit.
+Param[0]
+ Event (0=refresh encoder input)
+Param[1]
+ Notification 0=disabled 1=enabled
+Param[2]
+ Interrupt bit
+Param[3]
+ Mailbox slot, -1 if no mailbox required.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_NUM_VSYNC_LINES
+Enum 214/0xD6
+Description
+ Depending on the analog video decoder used, this assigns the number
+ of lines for field 1 and 2.
+Param[0]
+ Field 1 number of lines:
+ 0x00EF for SAA7114
+ 0x00F0 for SAA7115
+ 0x0105 for Micronas
+Param[1]
+ Field 2 number of lines:
+ 0x00EF for SAA7114
+ 0x00F0 for SAA7115
+ 0x0106 for Micronas
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_PLACEHOLDER
+Enum 215/0xD7
+Description
+ Provides a mechanism of inserting custom user data in the MPEG stream.
+Param[0]
+ 0=extension & user data
+ 1=private packet with stream ID 0xBD
+Param[1]
+ Rate at which to insert data, in units of frames (for private packet)
+ or GOPs (for ext. & user data)
+Param[2]
+ Number of data DWORDs (below) to insert
+Param[3]
+ Custom data 0
+Param[4]
+ Custom data 1
+Param[5]
+ Custom data 2
+Param[6]
+ Custom data 3
+Param[7]
+ Custom data 4
+Param[8]
+ Custom data 5
+Param[9]
+ Custom data 6
+Param[10]
+ Custom data 7
+Param[11]
+ Custom data 8
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_MUTE_VIDEO
+Enum 217/0xD9
+Description
+ Video muting
+Param[0]
+ Bit usage:
+ 0 '0'=video not muted
+ '1'=video muted, creates frames with the YUV color defined below
+ 1:7 Unused
+ 8:15 V chrominance information
+ 16:23 U chrominance information
+ 24:31 Y luminance information
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_MUTE_AUDIO
+Enum 218/0xDA
+Description
+ Audio muting
+Param[0]
+ 0=audio not muted
+ 1=audio muted (produces silent mpeg audio stream)
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_SET_VERT_CROP_LINE
+Enum 219/0xDB
+Description
+ Something to do with 'Vertical Crop Line'
+Param[0]
+ If saa7114 and raw VBI capture and 60 Hz, then set to 10001.
+ Else 0.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_ENC_MISC
+Enum 220/0xDC
+Description
+ Miscellaneous actions. Not known for 100% what it does. It's really a
+ sort of ioctl call. The first parameter is a command number, the second
+ the value.
+Param[0]
+ Command number:
+ 1=set initial SCR value when starting encoding (works).
+ 2=set quality mode (apparently some test setting).
+ 3=setup advanced VIM protection handling.
+ Always 1 for the cx23416 and 0 for cx23415.
+ 4=generate DVD compatible PTS timestamps
+ 5=USB flush mode
+ 6=something to do with the quantization matrix
+ 7=set navigation pack insertion for DVD: adds 0xbf (private stream 2)
+ packets to the MPEG. The size of these packets is 2048 bytes (including
+ the header of 6 bytes: 0x000001bf + length). The payload is zeroed and
+ it is up to the application to fill them in. These packets are apparently
+ inserted every four frames.
+ 8=enable scene change detection (seems to be a failure)
+ 9=set history parameters of the video input module
+ 10=set input field order of VIM
+ 11=set quantization matrix
+ 12=reset audio interface after channel change or input switch (has no argument).
+ Needed for the cx2584x, not needed for the mspx4xx, but it doesn't seem to
+ do any harm calling it regardless.
+ 13=set audio volume delay
+ 14=set audio delay
+
+Param[1]
+ Command value.
diff --git a/Documentation/video4linux/cx2341x/fw-memory.txt b/Documentation/video4linux/cx2341x/fw-memory.txt
new file mode 100644
index 000000000..9d736fe8d
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-memory.txt
@@ -0,0 +1,139 @@
+This document describes the cx2341x memory map and documents some of the register
+space.
+
+Note: the memory long words are little-endian ('intel format').
+
+Warning! This information was figured out from searching through the memory and
+registers, this information may not be correct and is certainly not complete, and
+was not derived from anything more than searching through the memory space with
+commands like:
+
+ ivtvctl -O min=0x02000000,max=0x020000ff
+
+So take this as is, I'm always searching for more stuff, it's a large
+register space :-).
+
+Memory Map
+==========
+
+The cx2341x exposes its entire 64M memory space to the PCI host via the PCI BAR0
+(Base Address Register 0). The addresses here are offsets relative to the
+address held in BAR0.
+
+0x00000000-0x00ffffff Encoder memory space
+0x00000000-0x0003ffff Encode.rom
+ ???-??? MPEG buffer(s)
+ ???-??? Raw video capture buffer(s)
+ ???-??? Raw audio capture buffer(s)
+ ???-??? Display buffers (6 or 9)
+
+0x01000000-0x01ffffff Decoder memory space
+0x01000000-0x0103ffff Decode.rom
+ ???-??? MPEG buffers(s)
+0x0114b000-0x0115afff Audio.rom (deprecated?)
+
+0x02000000-0x0200ffff Register Space
+
+Registers
+=========
+
+The registers occupy the 64k space starting at the 0x02000000 offset from BAR0.
+All of these registers are 32 bits wide.
+
+DMA Registers 0x000-0xff:
+
+ 0x00 - Control:
+ 0=reset/cancel, 1=read, 2=write, 4=stop
+ 0x04 - DMA status:
+ 1=read busy, 2=write busy, 4=read error, 8=write error, 16=link list error
+ 0x08 - pci DMA pointer for read link list
+ 0x0c - pci DMA pointer for write link list
+ 0x10 - read/write DMA enable:
+ 1=read enable, 2=write enable
+ 0x14 - always 0xffffffff, if set any lower instability occurs, 0x00 crashes
+ 0x18 - ??
+ 0x1c - always 0x20 or 32, smaller values slow down DMA transactions
+ 0x20 - always value of 0x780a010a
+ 0x24-0x3c - usually just random values???
+ 0x40 - Interrupt status
+ 0x44 - Write a bit here and shows up in Interrupt status 0x40
+ 0x48 - Interrupt Mask
+ 0x4C - always value of 0xfffdffff,
+ if changed to 0xffffffff DMA write interrupts break.
+ 0x50 - always 0xffffffff
+ 0x54 - always 0xffffffff (0x4c, 0x50, 0x54 seem like interrupt masks, are
+ 3 processors on chip, Java ones, VPU, SPU, APU, maybe these are the
+ interrupt masks???).
+ 0x60-0x7C - random values
+ 0x80 - first write linked list reg, for Encoder Memory addr
+ 0x84 - first write linked list reg, for pci memory addr
+ 0x88 - first write linked list reg, for length of buffer in memory addr
+ (|0x80000000 or this for last link)
+ 0x8c-0xdc - rest of write linked list reg, 8 sets of 3 total, DMA goes here
+ from linked list addr in reg 0x0c, firmware must push through or
+ something.
+ 0xe0 - first (and only) read linked list reg, for pci memory addr
+ 0xe4 - first (and only) read linked list reg, for Decoder memory addr
+ 0xe8 - first (and only) read linked list reg, for length of buffer
+ 0xec-0xff - Nothing seems to be in these registers, 0xec-f4 are 0x00000000.
+
+Memory locations for Encoder Buffers 0x700-0x7ff:
+
+These registers show offsets of memory locations pertaining to each
+buffer area used for encoding, have to shift them by <<1 first.
+
+0x07F8: Encoder SDRAM refresh
+0x07FC: Encoder SDRAM pre-charge
+
+Memory locations for Decoder Buffers 0x800-0x8ff:
+
+These registers show offsets of memory locations pertaining to each
+buffer area used for decoding, have to shift them by <<1 first.
+
+0x08F8: Decoder SDRAM refresh
+0x08FC: Decoder SDRAM pre-charge
+
+Other memory locations:
+
+0x2800: Video Display Module control
+0x2D00: AO (audio output?) control
+0x2D24: Bytes Flushed
+0x7000: LSB I2C write clock bit (inverted)
+0x7004: LSB I2C write data bit (inverted)
+0x7008: LSB I2C read clock bit
+0x700c: LSB I2C read data bit
+0x9008: GPIO get input state
+0x900c: GPIO set output state
+0x9020: GPIO direction (Bit7 (GPIO 0..7) - 0:input, 1:output)
+0x9050: SPU control
+0x9054: Reset HW blocks
+0x9058: VPU control
+0xA018: Bit6: interrupt pending?
+0xA064: APU command
+
+
+Interrupt Status Register
+=========================
+
+The definition of the bits in the interrupt status register 0x0040, and the
+interrupt mask 0x0048. If a bit is cleared in the mask, then we want our ISR to
+execute.
+
+Bit
+31 Encoder Start Capture
+30 Encoder EOS
+29 Encoder VBI capture
+28 Encoder Video Input Module reset event
+27 Encoder DMA complete
+24 Decoder audio mode change detection event (through event notification)
+22 Decoder data request
+20 Decoder DMA complete
+19 Decoder VBI re-insertion
+18 Decoder DMA err (linked-list bad)
+
+Missing
+Encoder API call completed
+Decoder API call completed
+Encoder API post(?)
+Decoder API post(?)
+Decoder VTRACE event
diff --git a/Documentation/video4linux/cx2341x/fw-osd-api.txt b/Documentation/video4linux/cx2341x/fw-osd-api.txt
new file mode 100644
index 000000000..89c460104
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-osd-api.txt
@@ -0,0 +1,350 @@
+OSD firmware API description
+============================
+
+Note: this API is part of the decoder firmware, so it's cx23415 only.
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_FRAMEBUFFER
+Enum 65/0x41
+Description
+ Return base and length of contiguous OSD memory.
+Result[0]
+ OSD base address
+Result[1]
+ OSD length
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_PIXEL_FORMAT
+Enum 66/0x42
+Description
+ Query OSD format
+Result[0]
+ 0=8bit index
+ 1=16bit RGB 5:6:5
+ 2=16bit ARGB 1:5:5:5
+ 3=16bit ARGB 1:4:4:4
+ 4=32bit ARGB 8:8:8:8
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_PIXEL_FORMAT
+Enum 67/0x43
+Description
+ Assign pixel format
+Param[0]
+ 0=8bit index
+ 1=16bit RGB 5:6:5
+ 2=16bit ARGB 1:5:5:5
+ 3=16bit ARGB 1:4:4:4
+ 4=32bit ARGB 8:8:8:8
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_STATE
+Enum 68/0x44
+Description
+ Query OSD state
+Result[0]
+ Bit 0 0=off, 1=on
+ Bits 1:2 alpha control
+ Bits 3:5 pixel format
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_STATE
+Enum 69/0x45
+Description
+ OSD switch
+Param[0]
+ 0=off, 1=on
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_OSD_COORDS
+Enum 70/0x46
+Description
+ Retrieve coordinates of OSD area blended with video
+Result[0]
+ OSD buffer address
+Result[1]
+ Stride in pixels
+Result[2]
+ Lines in OSD buffer
+Result[3]
+ Horizontal offset in buffer
+Result[4]
+ Vertical offset in buffer
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_OSD_COORDS
+Enum 71/0x47
+Description
+ Assign the coordinates of the OSD area to blend with video
+Param[0]
+ buffer address
+Param[1]
+ buffer stride in pixels
+Param[2]
+ lines in buffer
+Param[3]
+ horizontal offset
+Param[4]
+ vertical offset
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_SCREEN_COORDS
+Enum 72/0x48
+Description
+ Retrieve OSD screen area coordinates
+Result[0]
+ top left horizontal offset
+Result[1]
+ top left vertical offset
+Result[2]
+ bottom right horizontal offset
+Result[3]
+ bottom right vertical offset
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_SCREEN_COORDS
+Enum 73/0x49
+Description
+ Assign the coordinates of the screen area to blend with video
+Param[0]
+ top left horizontal offset
+Param[1]
+ top left vertical offset
+Param[2]
+ bottom left horizontal offset
+Param[3]
+ bottom left vertical offset
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_GLOBAL_ALPHA
+Enum 74/0x4A
+Description
+ Retrieve OSD global alpha
+Result[0]
+ global alpha: 0=off, 1=on
+Result[1]
+ bits 0:7 global alpha
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_GLOBAL_ALPHA
+Enum 75/0x4B
+Description
+ Update global alpha
+Param[0]
+ global alpha: 0=off, 1=on
+Param[1]
+ global alpha (8 bits)
+Param[2]
+ local alpha: 0=on, 1=off
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_BLEND_COORDS
+Enum 78/0x4C
+Description
+ Move start of blending area within display buffer
+Param[0]
+ horizontal offset in buffer
+Param[1]
+ vertical offset in buffer
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_FLICKER_STATE
+Enum 79/0x4F
+Description
+ Retrieve flicker reduction module state
+Result[0]
+ flicker state: 0=off, 1=on
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_FLICKER_STATE
+Enum 80/0x50
+Description
+ Set flicker reduction module state
+Param[0]
+ State: 0=off, 1=on
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_BLT_COPY
+Enum 82/0x52
+Description
+ BLT copy
+Param[0]
+'0000' zero
+'0001' ~destination AND ~source
+'0010' ~destination AND source
+'0011' ~destination
+'0100' destination AND ~source
+'0101' ~source
+'0110' destination XOR source
+'0111' ~destination OR ~source
+'1000' ~destination AND ~source
+'1001' destination XNOR source
+'1010' source
+'1011' ~destination OR source
+'1100' destination
+'1101' destination OR ~source
+'1110' destination OR source
+'1111' one
+
+Param[1]
+ Resulting alpha blending
+ '01' source_alpha
+ '10' destination_alpha
+ '11' source_alpha*destination_alpha+1
+ (zero if both source and destination alpha are zero)
+Param[2]
+ '00' output_pixel = source_pixel
+
+ '01' if source_alpha=0:
+ output_pixel = destination_pixel
+ if 256 > source_alpha > 1:
+ output_pixel = ((source_alpha + 1)*source_pixel +
+ (255 - source_alpha)*destination_pixel)/256
+
+ '10' if destination_alpha=0:
+ output_pixel = source_pixel
+ if 255 > destination_alpha > 0:
+ output_pixel = ((255 - destination_alpha)*source_pixel +
+ (destination_alpha + 1)*destination_pixel)/256
+
+ '11' if source_alpha=0:
+ source_temp = 0
+ if source_alpha=255:
+ source_temp = source_pixel*256
+ if 255 > source_alpha > 0:
+ source_temp = source_pixel*(source_alpha + 1)
+ if destination_alpha=0:
+ destination_temp = 0
+ if destination_alpha=255:
+ destination_temp = destination_pixel*256
+ if 255 > destination_alpha > 0:
+ destination_temp = destination_pixel*(destination_alpha + 1)
+ output_pixel = (source_temp + destination_temp)/256
+Param[3]
+ width
+Param[4]
+ height
+Param[5]
+ destination pixel mask
+Param[6]
+ destination rectangle start address
+Param[7]
+ destination stride in dwords
+Param[8]
+ source stride in dwords
+Param[9]
+ source rectangle start address
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_BLT_FILL
+Enum 83/0x53
+Description
+ BLT fill color
+Param[0]
+ Same as Param[0] on API 0x52
+Param[1]
+ Same as Param[1] on API 0x52
+Param[2]
+ Same as Param[2] on API 0x52
+Param[3]
+ width
+Param[4]
+ height
+Param[5]
+ destination pixel mask
+Param[6]
+ destination rectangle start address
+Param[7]
+ destination stride in dwords
+Param[8]
+ color fill value
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_BLT_TEXT
+Enum 84/0x54
+Description
+ BLT for 8 bit alpha text source
+Param[0]
+ Same as Param[0] on API 0x52
+Param[1]
+ Same as Param[1] on API 0x52
+Param[2]
+ Same as Param[2] on API 0x52
+Param[3]
+ width
+Param[4]
+ height
+Param[5]
+ destination pixel mask
+Param[6]
+ destination rectangle start address
+Param[7]
+ destination stride in dwords
+Param[8]
+ source stride in dwords
+Param[9]
+ source rectangle start address
+Param[10]
+ color fill value
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_FRAMEBUFFER_WINDOW
+Enum 86/0x56
+Description
+ Positions the main output window on the screen. The coordinates must be
+ such that the entire window fits on the screen.
+Param[0]
+ window width
+Param[1]
+ window height
+Param[2]
+ top left window corner horizontal offset
+Param[3]
+ top left window corner vertical offset
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_CHROMA_KEY
+Enum 96/0x60
+Description
+ Chroma key switch and color
+Param[0]
+ state: 0=off, 1=on
+Param[1]
+ color
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_GET_ALPHA_CONTENT_INDEX
+Enum 97/0x61
+Description
+ Retrieve alpha content index
+Result[0]
+ alpha content index, Range 0:15
+
+-------------------------------------------------------------------------------
+
+Name CX2341X_OSD_SET_ALPHA_CONTENT_INDEX
+Enum 98/0x62
+Description
+ Assign alpha content index
+Param[0]
+ alpha content index, range 0:15
diff --git a/Documentation/video4linux/cx2341x/fw-upload.txt b/Documentation/video4linux/cx2341x/fw-upload.txt
new file mode 100644
index 000000000..60c502ce3
--- /dev/null
+++ b/Documentation/video4linux/cx2341x/fw-upload.txt
@@ -0,0 +1,49 @@
+This document describes how to upload the cx2341x firmware to the card.
+
+How to find
+===========
+
+See the web pages of the various projects that uses this chip for information
+on how to obtain the firmware.
+
+The firmware stored in a Windows driver can be detected as follows:
+
+- Each firmware image is 256k bytes.
+- The 1st 32-bit word of the Encoder image is 0x0000da7
+- The 1st 32-bit word of the Decoder image is 0x00003a7
+- The 2nd 32-bit word of both images is 0xaa55bb66
+
+How to load
+===========
+
+- Issue the FWapi command to stop the encoder if it is running. Wait for the
+ command to complete.
+- Issue the FWapi command to stop the decoder if it is running. Wait for the
+ command to complete.
+- Issue the I2C command to the digitizer to stop emitting VSYNC events.
+- Issue the FWapi command to halt the encoder's firmware.
+- Sleep for 10ms.
+- Issue the FWapi command to halt the decoder's firmware.
+- Sleep for 10ms.
+- Write 0x00000000 to register 0x2800 to stop the Video Display Module.
+- Write 0x00000005 to register 0x2D00 to stop the AO (audio output?).
+- Write 0x00000000 to register 0xA064 to ping? the APU.
+- Write 0xFFFFFFFE to register 0x9058 to stop the VPU.
+- Write 0xFFFFFFFF to register 0x9054 to reset the HW blocks.
+- Write 0x00000001 to register 0x9050 to stop the SPU.
+- Sleep for 10ms.
+- Write 0x0000001A to register 0x07FC to init the Encoder SDRAM's pre-charge.
+- Write 0x80000640 to register 0x07F8 to init the Encoder SDRAM's refresh to 1us.
+- Write 0x0000001A to register 0x08FC to init the Decoder SDRAM's pre-charge.
+- Write 0x80000640 to register 0x08F8 to init the Decoder SDRAM's refresh to 1us.
+- Sleep for 512ms. (600ms is recommended)
+- Transfer the encoder's firmware image to offset 0 in Encoder memory space.
+- Transfer the decoder's firmware image to offset 0 in Decoder memory space.
+- Use a read-modify-write operation to Clear bit 0 of register 0x9050 to
+ re-enable the SPU.
+- Sleep for 1 second.
+- Use a read-modify-write operation to Clear bits 3 and 0 of register 0x9058
+ to re-enable the VPU.
+- Sleep for 1 second.
+- Issue status API commands to both firmware images to verify.
+
diff --git a/Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt b/Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt
new file mode 100644
index 000000000..f4329a388
--- /dev/null
+++ b/Documentation/video4linux/cx88/hauppauge-wintv-cx88-ir.txt
@@ -0,0 +1,54 @@
+The controls for the mux are GPIO [0,1] for source, and GPIO 2 for muting.
+
+GPIO0 GPIO1
+ 0 0 TV Audio
+ 1 0 FM radio
+ 0 1 Line-In
+ 1 1 Mono tuner bypass or CD passthru (tuner specific)
+
+GPIO 16(i believe) is tied to the IR port (if present).
+
+------------------------------------------------------------------------------------
+
+>From the data sheet:
+ Register 24'h20004 PCI Interrupt Status
+ bit [18] IR_SMP_INT Set when 32 input samples have been collected over
+ gpio[16] pin into GP_SAMPLE register.
+
+What's missing from the data sheet:
+
+Setup 4KHz sampling rate (roughly 2x oversampled; good enough for our RC5
+compat remote)
+set register 0x35C050 to 0xa80a80
+
+enable sampling
+set register 0x35C054 to 0x5
+
+Of course, enable the IRQ bit 18 in the interrupt mask register .(and
+provide for a handler)
+
+GP_SAMPLE register is at 0x35C058
+
+Bits are then right shifted into the GP_SAMPLE register at the specified
+rate; you get an interrupt when a full DWORD is received.
+You need to recover the actual RC5 bits out of the (oversampled) IR sensor
+bits. (Hint: look for the 0/1and 1/0 crossings of the RC5 bi-phase data) An
+actual raw RC5 code will span 2-3 DWORDS, depending on the actual alignment.
+
+I'm pretty sure when no IR signal is present the receiver is always in a
+marking state(1); but stray light, etc can cause intermittent noise values
+as well. Remember, this is a free running sample of the IR receiver state
+over time, so don't assume any sample starts at any particular place.
+
+http://www.atmel.com/dyn/resources/prod_documents/doc2817.pdf
+This data sheet (google search) seems to have a lovely description of the
+RC5 basics
+
+http://www.nenya.be/beor/electronics/rc5.htm and more data
+
+http://www.ee.washington.edu/circuit_archive/text/ir_decode.txt
+and even a reference to how to decode a bi-phase data stream.
+
+http://www.xs4all.nl/~sbp/knowledge/ir/rc5.htm
+still more info
+
diff --git a/Documentation/video4linux/fimc.txt b/Documentation/video4linux/fimc.txt
new file mode 100644
index 000000000..e0c6b8bc4
--- /dev/null
+++ b/Documentation/video4linux/fimc.txt
@@ -0,0 +1,148 @@
+Samsung S5P/EXYNOS4 FIMC driver
+
+Copyright (C) 2012 - 2013 Samsung Electronics Co., Ltd.
+---------------------------------------------------------------------------
+
+The FIMC (Fully Interactive Mobile Camera) device available in Samsung
+SoC Application Processors is an integrated camera host interface, color
+space converter, image resizer and rotator. It's also capable of capturing
+data from LCD controller (FIMD) through the SoC internal writeback data
+path. There are multiple FIMC instances in the SoCs (up to 4), having
+slightly different capabilities, like pixel alignment constraints, rotator
+availability, LCD writeback support, etc. The driver is located at
+drivers/media/platform/exynos4-is directory.
+
+1. Supported SoCs
+=================
+
+S5PC100 (mem-to-mem only), S5PV210, EXYNOS4210
+
+2. Supported features
+=====================
+
+ - camera parallel interface capture (ITU-R.BT601/565);
+ - camera serial interface capture (MIPI-CSI2);
+ - memory-to-memory processing (color space conversion, scaling, mirror
+ and rotation);
+ - dynamic pipeline re-configuration at runtime (re-attachment of any FIMC
+ instance to any parallel video input or any MIPI-CSI front-end);
+ - runtime PM and system wide suspend/resume
+
+Not currently supported:
+ - LCD writeback input
+ - per frame clock gating (mem-to-mem)
+
+3. Files partitioning
+=====================
+
+- media device driver
+ drivers/media/platform/exynos4-is/media-dev.[ch]
+
+ - camera capture video device driver
+ drivers/media/platform/exynos4-is/fimc-capture.c
+
+ - MIPI-CSI2 receiver subdev
+ drivers/media/platform/exynos4-is/mipi-csis.[ch]
+
+ - video post-processor (mem-to-mem)
+ drivers/media/platform/exynos4-is/fimc-core.c
+
+ - common files
+ drivers/media/platform/exynos4-is/fimc-core.h
+ drivers/media/platform/exynos4-is/fimc-reg.h
+ drivers/media/platform/exynos4-is/regs-fimc.h
+
+4. User space interfaces
+========================
+
+4.1. Media device interface
+
+The driver supports Media Controller API as defined at
+http://linuxtv.org/downloads/v4l-dvb-apis/media_common.html
+The media device driver name is "SAMSUNG S5P FIMC".
+
+The purpose of this interface is to allow changing assignment of FIMC instances
+to the SoC peripheral camera input at runtime and optionally to control internal
+connections of the MIPI-CSIS device(s) to the FIMC entities.
+
+The media device interface allows to configure the SoC for capturing image
+data from the sensor through more than one FIMC instance (e.g. for simultaneous
+viewfinder and still capture setup).
+Reconfiguration is done by enabling/disabling media links created by the driver
+during initialization. The internal device topology can be easily discovered
+through media entity and links enumeration.
+
+4.2. Memory-to-memory video node
+
+V4L2 memory-to-memory interface at /dev/video? device node. This is standalone
+video device, it has no media pads. However please note the mem-to-mem and
+capture video node operation on same FIMC instance is not allowed. The driver
+detects such cases but the applications should prevent them to avoid an
+undefined behaviour.
+
+4.3. Capture video node
+
+The driver supports V4L2 Video Capture Interface as defined at:
+http://linuxtv.org/downloads/v4l-dvb-apis/devices.html
+
+At the capture and mem-to-mem video nodes only the multi-planar API is
+supported. For more details see:
+http://linuxtv.org/downloads/v4l-dvb-apis/planar-apis.html
+
+4.4. Camera capture subdevs
+
+Each FIMC instance exports a sub-device node (/dev/v4l-subdev?), a sub-device
+node is also created per each available and enabled at the platform level
+MIPI-CSI receiver device (currently up to two).
+
+4.5. sysfs
+
+In order to enable more precise camera pipeline control through the sub-device
+API the driver creates a sysfs entry associated with "s5p-fimc-md" platform
+device. The entry path is: /sys/platform/devices/s5p-fimc-md/subdev_conf_mode.
+
+In typical use case there could be a following capture pipeline configuration:
+sensor subdev -> mipi-csi subdev -> fimc subdev -> video node
+
+When we configure these devices through sub-device API at user space, the
+configuration flow must be from left to right, and the video node is
+configured as last one.
+When we don't use sub-device user space API the whole configuration of all
+devices belonging to the pipeline is done at the video node driver.
+The sysfs entry allows to instruct the capture node driver not to configure
+the sub-devices (format, crop), to avoid resetting the subdevs' configuration
+when the last configuration steps at the video node is performed.
+
+For full sub-device control support (subdevs configured at user space before
+starting streaming):
+# echo "sub-dev" > /sys/platform/devices/s5p-fimc-md/subdev_conf_mode
+
+For V4L2 video node control only (subdevs configured internally by the host
+driver):
+# echo "vid-dev" > /sys/platform/devices/s5p-fimc-md/subdev_conf_mode
+This is a default option.
+
+5. Device mapping to video and subdev device nodes
+==================================================
+
+There are associated two video device nodes with each device instance in
+hardware - video capture and mem-to-mem and additionally a subdev node for
+more precise FIMC capture subsystem control. In addition a separate v4l2
+sub-device node is created per each MIPI-CSIS device.
+
+How to find out which /dev/video? or /dev/v4l-subdev? is assigned to which
+device?
+
+You can either grep through the kernel log to find relevant information, i.e.
+# dmesg | grep -i fimc
+(note that udev, if present, might still have rearranged the video nodes),
+
+or retrieve the information from /dev/media? with help of the media-ctl tool:
+# media-ctl -p
+
+7. Build
+========
+
+If the driver is built as a loadable kernel module (CONFIG_VIDEO_SAMSUNG_S5P_FIMC=m)
+two modules are created (in addition to the core v4l2 modules): s5p-fimc.ko and
+optional s5p-csis.ko (MIPI-CSI receiver subdev).
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
new file mode 100644
index 000000000..d2ba80bb7
--- /dev/null
+++ b/Documentation/video4linux/gspca.txt
@@ -0,0 +1,408 @@
+List of the webcams known by gspca.
+
+The modules are:
+ gspca_main main driver
+ gspca_xxxx subdriver module with xxxx as follows
+
+xxxx vend:prod
+----
+spca501 0000:0000 MystFromOri Unknown Camera
+spca508 0130:0130 Clone Digital Webcam 11043
+zc3xx 03f0:1b07 HP Premium Starter Cam
+m5602 0402:5602 ALi Video Camera Controller
+spca501 040a:0002 Kodak DVC-325
+spca500 040a:0300 Kodak EZ200
+zc3xx 041e:041e Creative WebCam Live!
+ov519 041e:4003 Video Blaster WebCam Go Plus
+spca500 041e:400a Creative PC-CAM 300
+sunplus 041e:400b Creative PC-CAM 600
+sunplus 041e:4012 PC-Cam350
+sunplus 041e:4013 Creative Pccam750
+zc3xx 041e:4017 Creative Webcam Mobile PD1090
+spca508 041e:4018 Creative Webcam Vista (PD1100)
+spca561 041e:401a Creative Webcam Vista (PD1100)
+zc3xx 041e:401c Creative NX
+spca505 041e:401d Creative Webcam NX ULTRA
+zc3xx 041e:401e Creative Nx Pro
+zc3xx 041e:401f Creative Webcam Notebook PD1171
+pac207 041e:4028 Creative Webcam Vista Plus
+zc3xx 041e:4029 Creative WebCam Vista Pro
+zc3xx 041e:4034 Creative Instant P0620
+zc3xx 041e:4035 Creative Instant P0620D
+zc3xx 041e:4036 Creative Live !
+sq930x 041e:4038 Creative Joy-IT
+zc3xx 041e:403a Creative Nx Pro 2
+spca561 041e:403b Creative Webcam Vista (VF0010)
+sq930x 041e:403c Creative Live! Ultra
+sq930x 041e:403d Creative Live! Ultra for Notebooks
+sq930x 041e:4041 Creative Live! Motion
+zc3xx 041e:4051 Creative Live!Cam Notebook Pro (VF0250)
+ov519 041e:4052 Creative Live! VISTA IM
+zc3xx 041e:4053 Creative Live!Cam Video IM
+vc032x 041e:405b Creative Live! Cam Notebook Ultra (VC0130)
+ov519 041e:405f Creative Live! VISTA VF0330
+ov519 041e:4060 Creative Live! VISTA VF0350
+ov519 041e:4061 Creative Live! VISTA VF0400
+ov519 041e:4064 Creative Live! VISTA VF0420
+ov519 041e:4067 Creative Live! Cam Video IM (VF0350)
+ov519 041e:4068 Creative Live! VISTA VF0470
+spca561 0458:7004 Genius VideoCAM Express V2
+sn9c2028 0458:7005 Genius Smart 300, version 2
+sunplus 0458:7006 Genius Dsc 1.3 Smart
+zc3xx 0458:7007 Genius VideoCam V2
+zc3xx 0458:700c Genius VideoCam V3
+zc3xx 0458:700f Genius VideoCam Web V2
+sonixj 0458:7025 Genius Eye 311Q
+sn9c20x 0458:7029 Genius Look 320s
+sonixj 0458:702e Genius Slim 310 NB
+sn9c20x 0458:7045 Genius Look 1320 V2
+sn9c20x 0458:704a Genius Slim 1320
+sn9c20x 0458:704c Genius i-Look 1321
+sn9c20x 045e:00f4 LifeCam VX-6000 (SN9C20x + OV9650)
+sonixj 045e:00f5 MicroSoft VX3000
+sonixj 045e:00f7 MicroSoft VX1000
+ov519 045e:028c Micro$oft xbox cam
+spca508 0461:0815 Micro Innovation IC200
+sunplus 0461:0821 Fujifilm MV-1
+zc3xx 0461:0a00 MicroInnovation WebCam320
+stv06xx 046d:0840 QuickCam Express
+stv06xx 046d:0850 LEGO cam / QuickCam Web
+stv06xx 046d:0870 Dexxa WebCam USB
+spca500 046d:0890 Logitech QuickCam traveler
+vc032x 046d:0892 Logitech Orbicam
+vc032x 046d:0896 Logitech Orbicam
+vc032x 046d:0897 Logitech QuickCam for Dell notebooks
+zc3xx 046d:089d Logitech QuickCam E2500
+zc3xx 046d:08a0 Logitech QC IM
+zc3xx 046d:08a1 Logitech QC IM 0x08A1 +sound
+zc3xx 046d:08a2 Labtec Webcam Pro
+zc3xx 046d:08a3 Logitech QC Chat
+zc3xx 046d:08a6 Logitech QCim
+zc3xx 046d:08a7 Logitech QuickCam Image
+zc3xx 046d:08a9 Logitech Notebook Deluxe
+zc3xx 046d:08aa Labtec Webcam Notebook
+zc3xx 046d:08ac Logitech QuickCam Cool
+zc3xx 046d:08ad Logitech QCCommunicate STX
+zc3xx 046d:08ae Logitech QuickCam for Notebooks
+zc3xx 046d:08af Logitech QuickCam Cool
+zc3xx 046d:08b9 Logitech QuickCam Express
+zc3xx 046d:08d7 Logitech QCam STX
+zc3xx 046d:08d9 Logitech QuickCam IM/Connect
+zc3xx 046d:08d8 Logitech Notebook Deluxe
+zc3xx 046d:08da Logitech QuickCam Messenger
+zc3xx 046d:08dd Logitech QuickCam for Notebooks
+spca500 046d:0900 Logitech Inc. ClickSmart 310
+spca500 046d:0901 Logitech Inc. ClickSmart 510
+sunplus 046d:0905 Logitech ClickSmart 820
+tv8532 046d:0920 Logitech QuickCam Express
+tv8532 046d:0921 Labtec Webcam
+spca561 046d:0928 Logitech QC Express Etch2
+spca561 046d:0929 Labtec Webcam Elch2
+spca561 046d:092a Logitech QC for Notebook
+spca561 046d:092b Labtec Webcam Plus
+spca561 046d:092c Logitech QC chat Elch2
+spca561 046d:092d Logitech QC Elch2
+spca561 046d:092e Logitech QC Elch2
+spca561 046d:092f Logitech QuickCam Express Plus
+sunplus 046d:0960 Logitech ClickSmart 420
+nw80x 046d:d001 Logitech QuickCam Pro (dark focus ring)
+sunplus 0471:0322 Philips DMVC1300K
+zc3xx 0471:0325 Philips SPC 200 NC
+zc3xx 0471:0326 Philips SPC 300 NC
+sonixj 0471:0327 Philips SPC 600 NC
+sonixj 0471:0328 Philips SPC 700 NC
+zc3xx 0471:032d Philips SPC 210 NC
+zc3xx 0471:032e Philips SPC 315 NC
+sonixj 0471:0330 Philips SPC 710 NC
+spca501 0497:c001 Smile International
+sunplus 04a5:3003 Benq DC 1300
+sunplus 04a5:3008 Benq DC 1500
+sunplus 04a5:300a Benq DC 3410
+spca500 04a5:300c Benq DC 1016
+benq 04a5:3035 Benq DC E300
+finepix 04cb:0104 Fujifilm FinePix 4800
+finepix 04cb:0109 Fujifilm FinePix A202
+finepix 04cb:010b Fujifilm FinePix A203
+finepix 04cb:010f Fujifilm FinePix A204
+finepix 04cb:0111 Fujifilm FinePix A205
+finepix 04cb:0113 Fujifilm FinePix A210
+finepix 04cb:0115 Fujifilm FinePix A303
+finepix 04cb:0117 Fujifilm FinePix A310
+finepix 04cb:0119 Fujifilm FinePix F401
+finepix 04cb:011b Fujifilm FinePix F402
+finepix 04cb:011d Fujifilm FinePix F410
+finepix 04cb:0121 Fujifilm FinePix F601
+finepix 04cb:0123 Fujifilm FinePix F700
+finepix 04cb:0125 Fujifilm FinePix M603
+finepix 04cb:0127 Fujifilm FinePix S300
+finepix 04cb:0129 Fujifilm FinePix S304
+finepix 04cb:012b Fujifilm FinePix S500
+finepix 04cb:012d Fujifilm FinePix S602
+finepix 04cb:012f Fujifilm FinePix S700
+finepix 04cb:0131 Fujifilm FinePix unknown model
+finepix 04cb:013b Fujifilm FinePix unknown model
+finepix 04cb:013d Fujifilm FinePix unknown model
+finepix 04cb:013f Fujifilm FinePix F420
+sunplus 04f1:1001 JVC GC A50
+spca561 04fc:0561 Flexcam 100
+spca1528 04fc:1528 Sunplus MD80 clone
+sunplus 04fc:500c Sunplus CA500C
+sunplus 04fc:504a Aiptek Mini PenCam 1.3
+sunplus 04fc:504b Maxell MaxPocket LE 1.3
+sunplus 04fc:5330 Digitrex 2110
+sunplus 04fc:5360 Sunplus Generic
+spca500 04fc:7333 PalmPixDC85
+sunplus 04fc:ffff Pure DigitalDakota
+nw80x 0502:d001 DVC V6
+spca501 0506:00df 3Com HomeConnect Lite
+sunplus 052b:1507 Megapixel 5 Pretec DC-1007
+sunplus 052b:1513 Megapix V4
+sunplus 052b:1803 MegaImage VI
+nw80x 052b:d001 EZCam Pro p35u
+tv8532 0545:808b Veo Stingray
+tv8532 0545:8333 Veo Stingray
+sunplus 0546:3155 Polaroid PDC3070
+sunplus 0546:3191 Polaroid Ion 80
+sunplus 0546:3273 Polaroid PDC2030
+ov519 054c:0154 Sonny toy4
+ov519 054c:0155 Sonny toy5
+cpia1 0553:0002 CPIA CPiA (version1) based cameras
+zc3xx 055f:c005 Mustek Wcam300A
+spca500 055f:c200 Mustek Gsmart 300
+sunplus 055f:c211 Kowa Bs888e Microcamera
+spca500 055f:c220 Gsmart Mini
+sunplus 055f:c230 Mustek Digicam 330K
+sunplus 055f:c232 Mustek MDC3500
+sunplus 055f:c360 Mustek DV4000 Mpeg4
+sunplus 055f:c420 Mustek gSmart Mini 2
+sunplus 055f:c430 Mustek Gsmart LCD 2
+sunplus 055f:c440 Mustek DV 3000
+sunplus 055f:c520 Mustek gSmart Mini 3
+sunplus 055f:c530 Mustek Gsmart LCD 3
+sunplus 055f:c540 Gsmart D30
+sunplus 055f:c630 Mustek MDC4000
+sunplus 055f:c650 Mustek MDC5500Z
+nw80x 055f:d001 Mustek Wcam 300 mini
+zc3xx 055f:d003 Mustek WCam300A
+zc3xx 055f:d004 Mustek WCam300 AN
+conex 0572:0041 Creative Notebook cx11646
+ov519 05a9:0511 Video Blaster WebCam 3/WebCam Plus, D-Link USB Digital Video Camera
+ov519 05a9:0518 Creative WebCam
+ov519 05a9:0519 OV519 Microphone
+ov519 05a9:0530 OmniVision
+ov534_9 05a9:1550 OmniVision VEHO Filmscanner
+ov519 05a9:2800 OmniVision SuperCAM
+ov519 05a9:4519 Webcam Classic
+ov534_9 05a9:8065 OmniVision test kit ov538+ov9712
+ov519 05a9:8519 OmniVision
+ov519 05a9:a511 D-Link USB Digital Video Camera
+ov519 05a9:a518 D-Link DSB-C310 Webcam
+sunplus 05da:1018 Digital Dream Enigma 1.3
+stk014 05e1:0893 Syntek DV4000
+gl860 05e3:0503 Genesys Logic PC Camera
+gl860 05e3:f191 Genesys Logic PC Camera
+spca561 060b:a001 Maxell Compact Pc PM3
+zc3xx 0698:2003 CTX M730V built in
+topro 06a2:0003 TP6800 PC Camera, CmoX CX0342 webcam
+topro 06a2:6810 Creative Qmax
+nw80x 06a5:0000 Typhoon Webcam 100 USB
+nw80x 06a5:d001 Divio based webcams
+nw80x 06a5:d800 Divio Chicony TwinkleCam, Trust SpaceCam
+spca500 06bd:0404 Agfa CL20
+spca500 06be:0800 Optimedia
+nw80x 06be:d001 EZCam Pro p35u
+sunplus 06d6:0031 Trust 610 LCD PowerC@m Zoom
+spca506 06e1:a190 ADS Instant VCD
+ov534 06f8:3002 Hercules Blog Webcam
+ov534_9 06f8:3003 Hercules Dualpix HD Weblog
+sonixj 06f8:3004 Hercules Classic Silver
+sonixj 06f8:3008 Hercules Deluxe Optical Glass
+pac7302 06f8:3009 Hercules Classic Link
+pac7302 06f8:301b Hercules Link
+nw80x 0728:d001 AVerMedia Camguard
+spca508 0733:0110 ViewQuest VQ110
+spca501 0733:0401 Intel Create and Share
+spca501 0733:0402 ViewQuest M318B
+spca505 0733:0430 Intel PC Camera Pro
+sunplus 0733:1311 Digital Dream Epsilon 1.3
+sunplus 0733:1314 Mercury 2.1MEG Deluxe Classic Cam
+sunplus 0733:2211 Jenoptik jdc 21 LCD
+sunplus 0733:2221 Mercury Digital Pro 3.1p
+sunplus 0733:3261 Concord 3045 spca536a
+sunplus 0733:3281 Cyberpix S550V
+spca506 0734:043b 3DeMon USB Capture aka
+cpia1 0813:0001 QX3 camera
+ov519 0813:0002 Dual Mode USB Camera Plus
+spca500 084d:0003 D-Link DSC-350
+spca500 08ca:0103 Aiptek PocketDV
+sunplus 08ca:0104 Aiptek PocketDVII 1.3
+sunplus 08ca:0106 Aiptek Pocket DV3100+
+mr97310a 08ca:0110 Trust Spyc@m 100
+mr97310a 08ca:0111 Aiptek PenCam VGA+
+sunplus 08ca:2008 Aiptek Mini PenCam 2 M
+sunplus 08ca:2010 Aiptek PocketCam 3M
+sunplus 08ca:2016 Aiptek PocketCam 2 Mega
+sunplus 08ca:2018 Aiptek Pencam SD 2M
+sunplus 08ca:2020 Aiptek Slim 3000F
+sunplus 08ca:2022 Aiptek Slim 3200
+sunplus 08ca:2024 Aiptek DV3500 Mpeg4
+sunplus 08ca:2028 Aiptek PocketCam4M
+sunplus 08ca:2040 Aiptek PocketDV4100M
+sunplus 08ca:2042 Aiptek PocketDV5100
+sunplus 08ca:2050 Medion MD 41437
+sunplus 08ca:2060 Aiptek PocketDV5300
+tv8532 0923:010f ICM532 cams
+mars 093a:050f Mars-Semi Pc-Camera
+mr97310a 093a:010e All known CIF cams with this ID
+mr97310a 093a:010f All known VGA cams with this ID
+pac207 093a:2460 Qtec Webcam 100
+pac207 093a:2461 HP Webcam
+pac207 093a:2463 Philips SPC 220 NC
+pac207 093a:2464 Labtec Webcam 1200
+pac207 093a:2468 Webcam WB-1400T
+pac207 093a:2470 Genius GF112
+pac207 093a:2471 Genius VideoCam ge111
+pac207 093a:2472 Genius VideoCam ge110
+pac207 093a:2474 Genius iLook 111
+pac207 093a:2476 Genius e-Messenger 112
+pac7311 093a:2600 PAC7311 Typhoon
+pac7311 093a:2601 Philips SPC 610 NC
+pac7311 093a:2603 Philips SPC 500 NC
+pac7311 093a:2608 Trust WB-3300p
+pac7311 093a:260e Gigaware VGA PC Camera, Trust WB-3350p, SIGMA cam 2350
+pac7311 093a:260f SnakeCam
+pac7302 093a:2620 Apollo AC-905
+pac7302 093a:2621 PAC731x
+pac7302 093a:2622 Genius Eye 312
+pac7302 093a:2624 PAC7302
+pac7302 093a:2625 Genius iSlim 310
+pac7302 093a:2626 Labtec 2200
+pac7302 093a:2627 Genius FaceCam 300
+pac7302 093a:2628 Genius iLook 300
+pac7302 093a:2629 Genious iSlim 300
+pac7302 093a:262a Webcam 300k
+pac7302 093a:262c Philips SPC 230 NC
+jl2005bcd 0979:0227 Various brands, 19 known cameras supported
+jeilinj 0979:0280 Sakar 57379
+jeilinj 0979:0280 Sportscam DV15
+zc3xx 0ac8:0302 Z-star Vimicro zc0302
+vc032x 0ac8:0321 Vimicro generic vc0321
+vc032x 0ac8:0323 Vimicro Vc0323
+vc032x 0ac8:0328 A4Tech PK-130MG
+zc3xx 0ac8:301b Z-Star zc301b
+zc3xx 0ac8:303b Vimicro 0x303b
+zc3xx 0ac8:305b Z-star Vimicro zc0305b
+zc3xx 0ac8:307b PC Camera (ZS0211)
+vc032x 0ac8:c001 Sony embedded vimicro
+vc032x 0ac8:c002 Sony embedded vimicro
+vc032x 0ac8:c301 Samsung Q1 Ultra Premium
+spca508 0af9:0010 Hama USB Sightcam 100
+spca508 0af9:0011 Hama USB Sightcam 100
+ov519 0b62:0059 iBOT2 Webcam
+sonixb 0c45:6001 Genius VideoCAM NB
+sonixb 0c45:6005 Microdia Sweex Mini Webcam
+sonixb 0c45:6007 Sonix sn9c101 + Tas5110D
+sonixb 0c45:6009 spcaCam@120
+sonixb 0c45:600d spcaCam@120
+sonixb 0c45:6011 Microdia PC Camera (SN9C102)
+sonixb 0c45:6019 Generic Sonix OV7630
+sonixb 0c45:6024 Generic Sonix Tas5130c
+sonixb 0c45:6025 Xcam Shanga
+sonixb 0c45:6028 Sonix Btc Pc380
+sonixb 0c45:6029 spcaCam@150
+sonixb 0c45:602c Generic Sonix OV7630
+sonixb 0c45:602d LIC-200 LG
+sonixb 0c45:602e Genius VideoCam Messenger
+sonixj 0c45:6040 Speed NVC 350K
+sonixj 0c45:607c Sonix sn9c102p Hv7131R
+sonixj 0c45:60c0 Sangha Sn535
+sonixj 0c45:60ce USB-PC-Camera-168 (TALK-5067)
+sonixj 0c45:60ec SN9C105+MO4000
+sonixj 0c45:60fb Surfer NoName
+sonixj 0c45:60fc LG-LIC300
+sonixj 0c45:60fe Microdia Audio
+sonixj 0c45:6100 PC Camera (SN9C128)
+sonixj 0c45:6102 PC Camera (SN9C128)
+sonixj 0c45:610a PC Camera (SN9C128)
+sonixj 0c45:610b PC Camera (SN9C128)
+sonixj 0c45:610c PC Camera (SN9C128)
+sonixj 0c45:610e PC Camera (SN9C128)
+sonixj 0c45:6128 Microdia/Sonix SNP325
+sonixj 0c45:612a Avant Camera
+sonixj 0c45:612b Speed-Link REFLECT2
+sonixj 0c45:612c Typhoon Rasy Cam 1.3MPix
+sonixj 0c45:6130 Sonix Pccam
+sonixj 0c45:6138 Sn9c120 Mo4000
+sonixj 0c45:613a Microdia Sonix PC Camera
+sonixj 0c45:613b Surfer SN-206
+sonixj 0c45:613c Sonix Pccam168
+sonixj 0c45:6142 Hama PC-Webcam AC-150
+sonixj 0c45:6143 Sonix Pccam168
+sonixj 0c45:6148 Digitus DA-70811/ZSMC USB PC Camera ZS211/Microdia
+sonixj 0c45:614a Frontech E-Ccam (JIL-2225)
+sn9c20x 0c45:6240 PC Camera (SN9C201 + MT9M001)
+sn9c20x 0c45:6242 PC Camera (SN9C201 + MT9M111)
+sn9c20x 0c45:6248 PC Camera (SN9C201 + OV9655)
+sn9c20x 0c45:624c PC Camera (SN9C201 + MT9M112)
+sn9c20x 0c45:624e PC Camera (SN9C201 + SOI968)
+sn9c20x 0c45:624f PC Camera (SN9C201 + OV9650)
+sn9c20x 0c45:6251 PC Camera (SN9C201 + OV9650)
+sn9c20x 0c45:6253 PC Camera (SN9C201 + OV9650)
+sn9c20x 0c45:6260 PC Camera (SN9C201 + OV7670)
+sn9c20x 0c45:6270 PC Camera (SN9C201 + MT9V011/MT9V111/MT9V112)
+sn9c20x 0c45:627b PC Camera (SN9C201 + OV7660)
+sn9c20x 0c45:627c PC Camera (SN9C201 + HV7131R)
+sn9c20x 0c45:627f PC Camera (SN9C201 + OV9650)
+sn9c20x 0c45:6280 PC Camera (SN9C202 + MT9M001)
+sn9c20x 0c45:6282 PC Camera (SN9C202 + MT9M111)
+sn9c20x 0c45:6288 PC Camera (SN9C202 + OV9655)
+sn9c20x 0c45:628c PC Camera (SN9C201 + MT9M112)
+sn9c20x 0c45:628e PC Camera (SN9C202 + SOI968)
+sn9c20x 0c45:628f PC Camera (SN9C202 + OV9650)
+sn9c20x 0c45:62a0 PC Camera (SN9C202 + OV7670)
+sn9c20x 0c45:62b0 PC Camera (SN9C202 + MT9V011/MT9V111/MT9V112)
+sn9c20x 0c45:62b3 PC Camera (SN9C202 + OV9655)
+sn9c20x 0c45:62bb PC Camera (SN9C202 + OV7660)
+sn9c20x 0c45:62bc PC Camera (SN9C202 + HV7131R)
+sn9c2028 0c45:8001 Wild Planet Digital Spy Camera
+sn9c2028 0c45:8003 Sakar #11199, #6637x, #67480 keychain cams
+sn9c2028 0c45:8008 Mini-Shotz ms-350
+sn9c2028 0c45:800a Vivitar Vivicam 3350B
+sunplus 0d64:0303 Sunplus FashionCam DXG
+ov519 0e96:c001 TRUST 380 USB2 SPACEC@M
+etoms 102c:6151 Qcam Sangha CIF
+etoms 102c:6251 Qcam xxxxxx VGA
+ov519 1046:9967 W9967CF/W9968CF WebCam IC, Video Blaster WebCam Go
+zc3xx 10fd:0128 Typhoon Webshot II USB 300k 0x0128
+spca561 10fd:7e50 FlyCam Usb 100
+zc3xx 10fd:8050 Typhoon Webshot II USB 300k
+ov534 1415:2000 Sony HD Eye for PS3 (SLEH 00201)
+pac207 145f:013a Trust WB-1300N
+sn9c20x 145f:013d Trust WB-3600R
+vc032x 15b8:6001 HP 2.0 Megapixel
+vc032x 15b8:6002 HP 2.0 Megapixel rz406aa
+spca501 1776:501c Arowana 300K CMOS Camera
+t613 17a1:0128 TASCORP JPEG Webcam, NGS Cyclops
+vc032x 17ef:4802 Lenovo Vc0323+MI1310_SOC
+pac207 2001:f115 D-Link DSB-C120
+sq905c 2770:9050 Disney pix micro (CIF)
+sq905c 2770:9051 Lego Bionicle
+sq905c 2770:9052 Disney pix micro 2 (VGA)
+sq905c 2770:905c All 11 known cameras with this ID
+sq905 2770:9120 All 24 known cameras with this ID
+sq905c 2770:913d All 4 known cameras with this ID
+sq930x 2770:930b Sweex Motion Tracking / I-Tec iCam Tracer
+sq930x 2770:930c Trust WB-3500T / NSG Robbie 2.0
+spca500 2899:012c Toptro Industrial
+ov519 8020:ef04 ov519
+spca508 8086:0110 Intel Easy PC Camera
+spca500 8086:0630 Intel Pocket PC Camera
+spca506 99fa:8988 Grandtec V.cap
+sn9c20x a168:0610 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x a168:0611 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x a168:0613 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x a168:0618 Dino-Lite Digital Microscope (SN9C201 + HV7131R)
+sn9c20x a168:0614 Dino-Lite Digital Microscope (SN9C201 + MT9M111)
+sn9c20x a168:0615 Dino-Lite Digital Microscope (SN9C201 + MT9M111)
+sn9c20x a168:0617 Dino-Lite Digital Microscope (SN9C201 + MT9M111)
+spca561 abcd:cdee Petcam
diff --git a/Documentation/video4linux/hauppauge-wintv-cx88-ir.txt b/Documentation/video4linux/hauppauge-wintv-cx88-ir.txt
new file mode 100644
index 000000000..a2fd363c4
--- /dev/null
+++ b/Documentation/video4linux/hauppauge-wintv-cx88-ir.txt
@@ -0,0 +1,54 @@
+The controls for the mux are GPIO [0,1] for source, and GPIO 2 for muting.
+
+GPIO0 GPIO1
+ 0 0 TV Audio
+ 1 0 FM radio
+ 0 1 Line-In
+ 1 1 Mono tuner bypass or CD passthru (tuner specific)
+
+GPIO 16(i believe) is tied to the IR port (if present).
+
+------------------------------------------------------------------------------------
+
+>From the data sheet:
+ Register 24'h20004 PCI Interrupt Status
+ bit [18] IR_SMP_INT Set when 32 input samples have been collected over
+ gpio[16] pin into GP_SAMPLE register.
+
+What's missing from the data sheet:
+
+Setup 4KHz sampling rate (roughly 2x oversampled; good enough for our RC5
+compat remote)
+set register 0x35C050 to 0xa80a80
+
+enable sampling
+set register 0x35C054 to 0x5
+
+Of course, enable the IRQ bit 18 in the interrupt mask register .(and
+provide for a handler)
+
+GP_SAMPLE register is at 0x35C058
+
+Bits are then right shifted into the GP_SAMPLE register at the specified
+rate; you get an interrupt when a full DWORD is received.
+You need to recover the actual RC5 bits out of the (oversampled) IR sensor
+bits. (Hint: look for the 0/1and 1/0 crossings of the RC5 bi-phase data) An
+actual raw RC5 code will span 2-3 DWORDS, depending on the actual alignment.
+
+I'm pretty sure when no IR signal is present the receiver is always in a
+marking state(1); but stray light, etc can cause intermittent noise values
+as well. Remember, this is a free running sample of the IR receiver state
+over time, so don't assume any sample starts at any particular place.
+
+http://www.atmel.com/dyn/resources/prod_documents/doc2817.pdf
+This data sheet (google search) seems to have a lovely description of the
+RC5 basics
+
+http://www.nenya.be/beor/electronics/rc5.htm and more data
+
+http://www.ee.washington.edu/circuit_archive/text/ir_decode.txt
+and even a reference to how to decode a bi-phase data stream.
+
+http://www.xs4all.nl/~sbp/knowledge/ir/rc5.htm
+still more info
+
diff --git a/Documentation/video4linux/lifeview.txt b/Documentation/video4linux/lifeview.txt
new file mode 100644
index 000000000..05f9eb57a
--- /dev/null
+++ b/Documentation/video4linux/lifeview.txt
@@ -0,0 +1,42 @@
+collecting data about the lifeview models and the config coding on
+gpio pins 0-9 ...
+==================================================================
+
+bt878:
+ LR50 rev. Q ("PARTS: 7031505116), Tuner wurde als Nr. 5 erkannt, Eingänge
+ SVideo, TV, Composite, Audio, Remote. CP9..1=100001001 (1: 0-Ohm-Widerstand
+ gegen GND unbestückt; 0: bestückt)
+
+------------------------------------------------------------------------------
+
+saa7134:
+ /* LifeView FlyTV Platinum FM (LR214WF) */
+ /* "Peter Missel <peter.missel@onlinehome.de> */
+ .name = "LifeView FlyTV Platinum FM",
+ /* GP27 MDT2005 PB4 pin 10 */
+ /* GP26 MDT2005 PB3 pin 9 */
+ /* GP25 MDT2005 PB2 pin 8 */
+ /* GP23 MDT2005 PB1 pin 7 */
+ /* GP22 MDT2005 PB0 pin 6 */
+ /* GP21 MDT2005 PB5 pin 11 */
+ /* GP20 MDT2005 PB6 pin 12 */
+ /* GP19 MDT2005 PB7 pin 13 */
+ /* nc MDT2005 PA3 pin 2 */
+ /* Remote MDT2005 PA2 pin 1 */
+ /* GP18 MDT2005 PA1 pin 18 */
+ /* nc MDT2005 PA0 pin 17 strap low */
+
+ /* GP17 Strap "GP7"=High */
+ /* GP16 Strap "GP6"=High
+ 0=Radio 1=TV
+ Drives SA630D ENCH1 and HEF4052 A1 pins
+ to do FM radio through SIF input */
+ /* GP15 nc */
+ /* GP14 nc */
+ /* GP13 nc */
+ /* GP12 Strap "GP5" = High */
+ /* GP11 Strap "GP4" = High */
+ /* GP10 Strap "GP3" = High */
+ /* GP09 Strap "GP2" = Low */
+ /* GP08 Strap "GP1" = Low */
+ /* GP07.00 nc */
diff --git a/Documentation/video4linux/meye.txt b/Documentation/video4linux/meye.txt
new file mode 100644
index 000000000..a051152ea
--- /dev/null
+++ b/Documentation/video4linux/meye.txt
@@ -0,0 +1,123 @@
+Vaio Picturebook Motion Eye Camera Driver Readme
+------------------------------------------------
+ Copyright (C) 2001-2004 Stelian Pop <stelian@popies.net>
+ Copyright (C) 2001-2002 Alcôve <www.alcove.com>
+ Copyright (C) 2000 Andrew Tridgell <tridge@samba.org>
+
+This driver enable the use of video4linux compatible applications with the
+Motion Eye camera. This driver requires the "Sony Laptop Extras" driver (which
+can be found in the "Misc devices" section of the kernel configuration utility)
+to be compiled and installed (using its "camera=1" parameter).
+
+It can do at maximum 30 fps @ 320x240 or 15 fps @ 640x480.
+
+Grabbing is supported in packed YUV colorspace only.
+
+MJPEG hardware grabbing is supported via a private API (see below).
+
+Hardware supported:
+-------------------
+
+This driver supports the 'second' version of the MotionEye camera :)
+
+The first version was connected directly on the video bus of the Neomagic
+video card and is unsupported.
+
+The second one, made by Kawasaki Steel is fully supported by this
+driver (PCI vendor/device is 0x136b/0xff01)
+
+The third one, present in recent (more or less last year) Picturebooks
+(C1M* models), is not supported. The manufacturer has given the specs
+to the developers under a NDA (which allows the development of a GPL
+driver however), but things are not moving very fast (see
+http://r-engine.sourceforge.net/) (PCI vendor/device is 0x10cf/0x2011).
+
+There is a forth model connected on the USB bus in TR1* Vaio laptops.
+This camera is not supported at all by the current driver, in fact
+little information if any is available for this camera
+(USB vendor/device is 0x054c/0x0107).
+
+Driver options:
+---------------
+
+Several options can be passed to the meye driver using the standard
+module argument syntax (<param>=<value> when passing the option to the
+module or meye.<param>=<value> on the kernel boot line when meye is
+statically linked into the kernel). Those options are:
+
+ gbuffers: number of capture buffers, default is 2 (32 max)
+
+ gbufsize: size of each capture buffer, default is 614400
+
+ video_nr: video device to register (0 = /dev/video0, etc)
+
+Module use:
+-----------
+
+In order to automatically load the meye module on use, you can put those lines
+in your /etc/modprobe.d/meye.conf file:
+
+ alias char-major-81 videodev
+ alias char-major-81-0 meye
+ options meye gbuffers=32
+
+Usage:
+------
+
+ xawtv >= 3.49 (<http://bytesex.org/xawtv/>)
+ for display and uncompressed video capture:
+
+ xawtv -c /dev/video0 -geometry 640x480
+ or
+ xawtv -c /dev/video0 -geometry 320x240
+
+ motioneye (<http://popies.net/meye/>)
+ for getting ppm or jpg snapshots, mjpeg video
+
+Private API:
+------------
+
+ The driver supports frame grabbing with the video4linux API,
+ so all video4linux tools (like xawtv) should work with this driver.
+
+ Besides the video4linux interface, the driver has a private interface
+ for accessing the Motion Eye extended parameters (camera sharpness,
+ agc, video framerate), the shapshot and the MJPEG capture facilities.
+
+ This interface consists of several ioctls (prototypes and structures
+ can be found in include/linux/meye.h):
+
+ MEYEIOC_G_PARAMS
+ MEYEIOC_S_PARAMS
+ Get and set the extended parameters of the motion eye camera.
+ The user should always query the current parameters with
+ MEYEIOC_G_PARAMS, change what he likes and then issue the
+ MEYEIOC_S_PARAMS call (checking for -EINVAL). The extended
+ parameters are described by the meye_params structure.
+
+
+ MEYEIOC_QBUF_CAPT
+ Queue a buffer for capture (the buffers must have been
+ obtained with a VIDIOCGMBUF call and mmap'ed by the
+ application). The argument to MEYEIOC_QBUF_CAPT is the
+ buffer number to queue (or -1 to end capture). The first
+ call to MEYEIOC_QBUF_CAPT starts the streaming capture.
+
+ MEYEIOC_SYNC
+ Takes as an argument the buffer number you want to sync.
+ This ioctl blocks until the buffer is filled and ready
+ for the application to use. It returns the buffer size.
+
+ MEYEIOC_STILLCAPT
+ MEYEIOC_STILLJCAPT
+ Takes a snapshot in an uncompressed or compressed jpeg format.
+ This ioctl blocks until the snapshot is done and returns (for
+ jpeg snapshot) the size of the image. The image data is
+ available from the first mmap'ed buffer.
+
+ Look at the 'motioneye' application code for an actual example.
+
+Bugs / Todo:
+------------
+
+ - 'motioneye' still uses the meye private v4l1 API extensions.
diff --git a/Documentation/video4linux/not-in-cx2388x-datasheet.txt b/Documentation/video4linux/not-in-cx2388x-datasheet.txt
new file mode 100644
index 000000000..edbfe744d
--- /dev/null
+++ b/Documentation/video4linux/not-in-cx2388x-datasheet.txt
@@ -0,0 +1,41 @@
+=================================================================================
+MO_OUTPUT_FORMAT (0x310164)
+
+ Previous default from DScaler: 0x1c1f0008
+ Digit 8: 31-28
+ 28: PREVREMOD = 1
+
+ Digit 7: 27-24 (0xc = 12 = b1100 )
+ 27: COMBALT = 1
+ 26: PAL_INV_PHASE
+ (DScaler apparently set this to 1, resulted in sucky picture)
+
+ Digits 6,5: 23-16
+ 25-16: COMB_RANGE = 0x1f [default] (9 bits -> max 512)
+
+ Digit 4: 15-12
+ 15: DISIFX = 0
+ 14: INVCBF = 0
+ 13: DISADAPT = 0
+ 12: NARROWADAPT = 0
+
+ Digit 3: 11-8
+ 11: FORCE2H
+ 10: FORCEREMD
+ 9: NCHROMAEN
+ 8: NREMODEN
+
+ Digit 2: 7-4
+ 7-6: YCORE
+ 5-4: CCORE
+
+ Digit 1: 3-0
+ 3: RANGE = 1
+ 2: HACTEXT
+ 1: HSFMT
+
+0x47 is the sync byte for MPEG-2 transport stream packets.
+Datasheet incorrectly states to use 47 decimal. 188 is the length.
+All DVB compliant frontends output packets with this start code.
+
+=================================================================================
diff --git a/Documentation/video4linux/omap3isp.txt b/Documentation/video4linux/omap3isp.txt
new file mode 100644
index 000000000..b9a9f83b1
--- /dev/null
+++ b/Documentation/video4linux/omap3isp.txt
@@ -0,0 +1,279 @@
+OMAP 3 Image Signal Processor (ISP) driver
+
+Copyright (C) 2010 Nokia Corporation
+Copyright (C) 2009 Texas Instruments, Inc.
+
+Contacts: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ Sakari Ailus <sakari.ailus@iki.fi>
+ David Cohen <dacohen@gmail.com>
+
+
+Introduction
+============
+
+This file documents the Texas Instruments OMAP 3 Image Signal Processor (ISP)
+driver located under drivers/media/platform/omap3isp. The original driver was
+written by Texas Instruments but since that it has been rewritten (twice) at
+Nokia.
+
+The driver has been successfully used on the following versions of OMAP 3:
+
+ 3430
+ 3530
+ 3630
+
+The driver implements V4L2, Media controller and v4l2_subdev interfaces.
+Sensor, lens and flash drivers using the v4l2_subdev interface in the kernel
+are supported.
+
+
+Split to subdevs
+================
+
+The OMAP 3 ISP is split into V4L2 subdevs, each of the blocks inside the ISP
+having one subdev to represent it. Each of the subdevs provide a V4L2 subdev
+interface to userspace.
+
+ OMAP3 ISP CCP2
+ OMAP3 ISP CSI2a
+ OMAP3 ISP CCDC
+ OMAP3 ISP preview
+ OMAP3 ISP resizer
+ OMAP3 ISP AEWB
+ OMAP3 ISP AF
+ OMAP3 ISP histogram
+
+Each possible link in the ISP is modelled by a link in the Media controller
+interface. For an example program see [2].
+
+
+Controlling the OMAP 3 ISP
+==========================
+
+In general, the settings given to the OMAP 3 ISP take effect at the beginning
+of the following frame. This is done when the module becomes idle during the
+vertical blanking period on the sensor. In memory-to-memory operation the pipe
+is run one frame at a time. Applying the settings is done between the frames.
+
+All the blocks in the ISP, excluding the CSI-2 and possibly the CCP2 receiver,
+insist on receiving complete frames. Sensors must thus never send the ISP
+partial frames.
+
+Autoidle does have issues with some ISP blocks on the 3430, at least.
+Autoidle is only enabled on 3630 when the omap3isp module parameter autoidle
+is non-zero.
+
+
+Events
+======
+
+The OMAP 3 ISP driver does support the V4L2 event interface on CCDC and
+statistics (AEWB, AF and histogram) subdevs.
+
+The CCDC subdev produces V4L2_EVENT_FRAME_SYNC type event on HS_VS
+interrupt which is used to signal frame start. Earlier version of this
+driver used V4L2_EVENT_OMAP3ISP_HS_VS for this purpose. The event is
+triggered exactly when the reception of the first line of the frame starts
+in the CCDC module. The event can be subscribed on the CCDC subdev.
+
+(When using parallel interface one must pay account to correct configuration
+of the VS signal polarity. This is automatically correct when using the serial
+receivers.)
+
+Each of the statistics subdevs is able to produce events. An event is
+generated whenever a statistics buffer can be dequeued by a user space
+application using the VIDIOC_OMAP3ISP_STAT_REQ IOCTL. The events available
+are:
+
+ V4L2_EVENT_OMAP3ISP_AEWB
+ V4L2_EVENT_OMAP3ISP_AF
+ V4L2_EVENT_OMAP3ISP_HIST
+
+The type of the event data is struct omap3isp_stat_event_status for these
+ioctls. If there is an error calculating the statistics, there will be an
+event as usual, but no related statistics buffer. In this case
+omap3isp_stat_event_status.buf_err is set to non-zero.
+
+
+Private IOCTLs
+==============
+
+The OMAP 3 ISP driver supports standard V4L2 IOCTLs and controls where
+possible and practical. Much of the functions provided by the ISP, however,
+does not fall under the standard IOCTLs --- gamma tables and configuration of
+statistics collection are examples of such.
+
+In general, there is a private ioctl for configuring each of the blocks
+containing hardware-dependent functions.
+
+The following private IOCTLs are supported:
+
+ VIDIOC_OMAP3ISP_CCDC_CFG
+ VIDIOC_OMAP3ISP_PRV_CFG
+ VIDIOC_OMAP3ISP_AEWB_CFG
+ VIDIOC_OMAP3ISP_HIST_CFG
+ VIDIOC_OMAP3ISP_AF_CFG
+ VIDIOC_OMAP3ISP_STAT_REQ
+ VIDIOC_OMAP3ISP_STAT_EN
+
+The parameter structures used by these ioctls are described in
+include/linux/omap3isp.h. The detailed functions of the ISP itself related to
+a given ISP block is described in the Technical Reference Manuals (TRMs) ---
+see the end of the document for those.
+
+While it is possible to use the ISP driver without any use of these private
+IOCTLs it is not possible to obtain optimal image quality this way. The AEWB,
+AF and histogram modules cannot be used without configuring them using the
+appropriate private IOCTLs.
+
+
+CCDC and preview block IOCTLs
+=============================
+
+The VIDIOC_OMAP3ISP_CCDC_CFG and VIDIOC_OMAP3ISP_PRV_CFG IOCTLs are used to
+configure, enable and disable functions in the CCDC and preview blocks,
+respectively. Both IOCTLs control several functions in the blocks they
+control. VIDIOC_OMAP3ISP_CCDC_CFG IOCTL accepts a pointer to struct
+omap3isp_ccdc_update_config as its argument. Similarly VIDIOC_OMAP3ISP_PRV_CFG
+accepts a pointer to struct omap3isp_prev_update_config. The definition of
+both structures is available in [1].
+
+The update field in the structures tells whether to update the configuration
+for the specific function and the flag tells whether to enable or disable the
+function.
+
+The update and flag bit masks accept the following values. Each separate
+functions in the CCDC and preview blocks is associated with a flag (either
+disable or enable; part of the flag field in the structure) and a pointer to
+configuration data for the function.
+
+Valid values for the update and flag fields are listed here for
+VIDIOC_OMAP3ISP_CCDC_CFG. Values may be or'ed to configure more than one
+function in the same IOCTL call.
+
+ OMAP3ISP_CCDC_ALAW
+ OMAP3ISP_CCDC_LPF
+ OMAP3ISP_CCDC_BLCLAMP
+ OMAP3ISP_CCDC_BCOMP
+ OMAP3ISP_CCDC_FPC
+ OMAP3ISP_CCDC_CULL
+ OMAP3ISP_CCDC_CONFIG_LSC
+ OMAP3ISP_CCDC_TBL_LSC
+
+The corresponding values for the VIDIOC_OMAP3ISP_PRV_CFG are here:
+
+ OMAP3ISP_PREV_LUMAENH
+ OMAP3ISP_PREV_INVALAW
+ OMAP3ISP_PREV_HRZ_MED
+ OMAP3ISP_PREV_CFA
+ OMAP3ISP_PREV_CHROMA_SUPP
+ OMAP3ISP_PREV_WB
+ OMAP3ISP_PREV_BLKADJ
+ OMAP3ISP_PREV_RGB2RGB
+ OMAP3ISP_PREV_COLOR_CONV
+ OMAP3ISP_PREV_YC_LIMIT
+ OMAP3ISP_PREV_DEFECT_COR
+ OMAP3ISP_PREV_GAMMABYPASS
+ OMAP3ISP_PREV_DRK_FRM_CAPTURE
+ OMAP3ISP_PREV_DRK_FRM_SUBTRACT
+ OMAP3ISP_PREV_LENS_SHADING
+ OMAP3ISP_PREV_NF
+ OMAP3ISP_PREV_GAMMA
+
+The associated configuration pointer for the function may not be NULL when
+enabling the function. When disabling a function the configuration pointer is
+ignored.
+
+
+Statistic blocks IOCTLs
+=======================
+
+The statistics subdevs do offer more dynamic configuration options than the
+other subdevs. They can be enabled, disable and reconfigured when the pipeline
+is in streaming state.
+
+The statistics blocks always get the input image data from the CCDC (as the
+histogram memory read isn't implemented). The statistics are dequeueable by
+the user from the statistics subdev nodes using private IOCTLs.
+
+The private IOCTLs offered by the AEWB, AF and histogram subdevs are heavily
+reflected by the register level interface offered by the ISP hardware. There
+are aspects that are purely related to the driver implementation and these are
+discussed next.
+
+VIDIOC_OMAP3ISP_STAT_EN
+-----------------------
+
+This private IOCTL enables/disables a statistic module. If this request is
+done before streaming, it will take effect as soon as the pipeline starts to
+stream. If the pipeline is already streaming, it will take effect as soon as
+the CCDC becomes idle.
+
+VIDIOC_OMAP3ISP_AEWB_CFG, VIDIOC_OMAP3ISP_HIST_CFG and VIDIOC_OMAP3ISP_AF_CFG
+-----------------------------------------------------------------------------
+
+Those IOCTLs are used to configure the modules. They require user applications
+to have an in-depth knowledge of the hardware. Most of the fields explanation
+can be found on OMAP's TRMs. The two following fields common to all the above
+configure private IOCTLs require explanation for better understanding as they
+are not part of the TRM.
+
+omap3isp_[h3a_af/h3a_aewb/hist]_config.buf_size:
+
+The modules handle their buffers internally. The necessary buffer size for the
+module's data output depends on the requested configuration. Although the
+driver supports reconfiguration while streaming, it does not support a
+reconfiguration which requires bigger buffer size than what is already
+internally allocated if the module is enabled. It will return -EBUSY on this
+case. In order to avoid such condition, either disable/reconfigure/enable the
+module or request the necessary buffer size during the first configuration
+while the module is disabled.
+
+The internal buffer size allocation considers the requested configuration's
+minimum buffer size and the value set on buf_size field. If buf_size field is
+out of [minimum, maximum] buffer size range, it's clamped to fit in there.
+The driver then selects the biggest value. The corrected buf_size value is
+written back to user application.
+
+omap3isp_[h3a_af/h3a_aewb/hist]_config.config_counter:
+
+As the configuration doesn't take effect synchronously to the request, the
+driver must provide a way to track this information to provide more accurate
+data. After a configuration is requested, the config_counter returned to user
+space application will be an unique value associated to that request. When
+user application receives an event for buffer availability or when a new
+buffer is requested, this config_counter is used to match a buffer data and a
+configuration.
+
+VIDIOC_OMAP3ISP_STAT_REQ
+------------------------
+
+Send to user space the oldest data available in the internal buffer queue and
+discards such buffer afterwards. The field omap3isp_stat_data.frame_number
+matches with the video buffer's field_count.
+
+
+Technical reference manuals (TRMs) and other documentation
+==========================================================
+
+OMAP 3430 TRM:
+<URL:http://focus.ti.com/pdfs/wtbu/OMAP34xx_ES3.1.x_PUBLIC_TRM_vZM.zip>
+Referenced 2011-03-05.
+
+OMAP 35xx TRM:
+<URL:http://www.ti.com/litv/pdf/spruf98o> Referenced 2011-03-05.
+
+OMAP 3630 TRM:
+<URL:http://focus.ti.com/pdfs/wtbu/OMAP36xx_ES1.x_PUBLIC_TRM_vQ.zip>
+Referenced 2011-03-05.
+
+DM 3730 TRM:
+<URL:http://www.ti.com/litv/pdf/sprugn4h> Referenced 2011-03-06.
+
+
+References
+==========
+
+[1] include/linux/omap3isp.h
+
+[2] http://git.ideasonboard.org/?p=media-ctl.git;a=summary
diff --git a/Documentation/video4linux/omap4_camera.txt b/Documentation/video4linux/omap4_camera.txt
new file mode 100644
index 000000000..25d9b40a4
--- /dev/null
+++ b/Documentation/video4linux/omap4_camera.txt
@@ -0,0 +1,60 @@
+ OMAP4 ISS Driver
+ ================
+
+Introduction
+------------
+
+The OMAP44XX family of chips contains the Imaging SubSystem (a.k.a. ISS),
+Which contains several components that can be categorized in 3 big groups:
+
+- Interfaces (2 Interfaces: CSI2-A & CSI2-B/CCP2)
+- ISP (Image Signal Processor)
+- SIMCOP (Still Image Coprocessor)
+
+For more information, please look in [1] for latest version of:
+ "OMAP4430 Multimedia Device Silicon Revision 2.x"
+
+As of Revision AB, the ISS is described in detail in section 8.
+
+This driver is supporting _only_ the CSI2-A/B interfaces for now.
+
+It makes use of the Media Controller framework [2], and inherited most of the
+code from OMAP3 ISP driver (found under drivers/media/platform/omap3isp/*),
+except that it doesn't need an IOMMU now for ISS buffers memory mapping.
+
+Supports usage of MMAP buffers only (for now).
+
+Tested platforms
+----------------
+
+- OMAP4430SDP, w/ ES2.1 GP & SEVM4430-CAM-V1-0 (Contains IMX060 & OV5640, in
+ which only the last one is supported, outputting YUV422 frames).
+
+- TI Blaze MDP, w/ OMAP4430 ES2.2 EMU (Contains 1 IMX060 & 2 OV5650 sensors, in
+ which only the OV5650 are supported, outputting RAW10 frames).
+
+- PandaBoard, Rev. A2, w/ OMAP4430 ES2.1 GP & OV adapter board, tested with
+ following sensors:
+ * OV5640
+ * OV5650
+
+- Tested on mainline kernel:
+
+ http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=summary
+
+ Tag: v3.3 (commit c16fa4f2ad19908a47c63d8fa436a1178438c7e7)
+
+File list
+---------
+drivers/staging/media/omap4iss/
+include/media/omap4iss.h
+
+References
+----------
+
+[1] http://focus.ti.com/general/docs/wtbu/wtbudocumentcenter.tsp?navigationId=12037&templateId=6123#62
+[2] http://lwn.net/Articles/420485/
+[3] http://www.spinics.net/lists/linux-media/msg44370.html
+--
+Author: Sergio Aguirre <sergio.a.aguirre@gmail.com>
+Copyright (C) 2012, Texas Instruments
diff --git a/Documentation/video4linux/pxa_camera.txt b/Documentation/video4linux/pxa_camera.txt
new file mode 100644
index 000000000..51ed1578b
--- /dev/null
+++ b/Documentation/video4linux/pxa_camera.txt
@@ -0,0 +1,174 @@
+ PXA-Camera Host Driver
+ ======================
+
+Constraints
+-----------
+ a) Image size for YUV422P format
+ All YUV422P images are enforced to have width x height % 16 = 0.
+ This is due to DMA constraints, which transfers only planes of 8 byte
+ multiples.
+
+
+Global video workflow
+---------------------
+ a) QCI stopped
+ Initialy, the QCI interface is stopped.
+ When a buffer is queued (pxa_videobuf_ops->buf_queue), the QCI starts.
+
+ b) QCI started
+ More buffers can be queued while the QCI is started without halting the
+ capture. The new buffers are "appended" at the tail of the DMA chain, and
+ smoothly captured one frame after the other.
+
+ Once a buffer is filled in the QCI interface, it is marked as "DONE" and
+ removed from the active buffers list. It can be then requeud or dequeued by
+ userland application.
+
+ Once the last buffer is filled in, the QCI interface stops.
+
+ c) Capture global finite state machine schema
+
+ +----+ +---+ +----+
+ | DQ | | Q | | DQ |
+ | v | v | v
+ +-----------+ +------------------------+
+ | STOP | | Wait for capture start |
+ +-----------+ Q +------------------------+
++-> | QCI: stop | ------------------> | QCI: run | <------------+
+| | DMA: stop | | DMA: stop | |
+| +-----------+ +-----> +------------------------+ |
+| / | |
+| / +---+ +----+ | |
+|capture list empty / | Q | | DQ | | QCI Irq EOF |
+| / | v | v v |
+| +--------------------+ +----------------------+ |
+| | DMA hotlink missed | | Capture running | |
+| +--------------------+ +----------------------+ |
+| | QCI: run | +-----> | QCI: run | <-+ |
+| | DMA: stop | / | DMA: run | | |
+| +--------------------+ / +----------------------+ | Other |
+| ^ /DMA still | | channels |
+| | capture list / running | DMA Irq End | not |
+| | not empty / | | finished |
+| | / v | yet |
+| +----------------------+ +----------------------+ | |
+| | Videobuf released | | Channel completed | | |
+| +----------------------+ +----------------------+ | |
++-- | QCI: run | | QCI: run | --+ |
+ | DMA: run | | DMA: run | |
+ +----------------------+ +----------------------+ |
+ ^ / | |
+ | no overrun / | overrun |
+ | / v |
+ +--------------------+ / +----------------------+ |
+ | Frame completed | / | Frame overran | |
+ +--------------------+ <-----+ +----------------------+ restart frame |
+ | QCI: run | | QCI: stop | --------------+
+ | DMA: run | | DMA: stop |
+ +--------------------+ +----------------------+
+
+ Legend: - each box is a FSM state
+ - each arrow is the condition to transition to another state
+ - an arrow with a comment is a mandatory transition (no condition)
+ - arrow "Q" means : a buffer was enqueued
+ - arrow "DQ" means : a buffer was dequeued
+ - "QCI: stop" means the QCI interface is not enabled
+ - "DMA: stop" means all 3 DMA channels are stopped
+ - "DMA: run" means at least 1 DMA channel is still running
+
+DMA usage
+---------
+ a) DMA flow
+ - first buffer queued for capture
+ Once a first buffer is queued for capture, the QCI is started, but data
+ transfer is not started. On "End Of Frame" interrupt, the irq handler
+ starts the DMA chain.
+ - capture of one videobuffer
+ The DMA chain starts transferring data into videobuffer RAM pages.
+ When all pages are transferred, the DMA irq is raised on "ENDINTR" status
+ - finishing one videobuffer
+ The DMA irq handler marks the videobuffer as "done", and removes it from
+ the active running queue
+ Meanwhile, the next videobuffer (if there is one), is transferred by DMA
+ - finishing the last videobuffer
+ On the DMA irq of the last videobuffer, the QCI is stopped.
+
+ b) DMA prepared buffer will have this structure
+
+ +------------+-----+---------------+-----------------+
+ | desc-sg[0] | ... | desc-sg[last] | finisher/linker |
+ +------------+-----+---------------+-----------------+
+
+ This structure is pointed by dma->sg_cpu.
+ The descriptors are used as follows :
+ - desc-sg[i]: i-th descriptor, transferring the i-th sg
+ element to the video buffer scatter gather
+ - finisher: has ddadr=DADDR_STOP, dcmd=ENDIRQEN
+ - linker: has ddadr= desc-sg[0] of next video buffer, dcmd=0
+
+ For the next schema, let's assume d0=desc-sg[0] .. dN=desc-sg[N],
+ "f" stands for finisher and "l" for linker.
+ A typical running chain is :
+
+ Videobuffer 1 Videobuffer 2
+ +---------+----+---+ +----+----+----+---+
+ | d0 | .. | dN | l | | d0 | .. | dN | f |
+ +---------+----+-|-+ ^----+----+----+---+
+ | |
+ +----+
+
+ After the chaining is finished, the chain looks like :
+
+ Videobuffer 1 Videobuffer 2 Videobuffer 3
+ +---------+----+---+ +----+----+----+---+ +----+----+----+---+
+ | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f |
+ +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+
+ | | | |
+ +----+ +----+
+ new_link
+
+ c) DMA hot chaining timeslice issue
+
+ As DMA chaining is done while DMA _is_ running, the linking may be done
+ while the DMA jumps from one Videobuffer to another. On the schema, that
+ would be a problem if the following sequence is encountered :
+
+ - DMA chain is Videobuffer1 + Videobuffer2
+ - pxa_videobuf_queue() is called to queue Videobuffer3
+ - DMA controller finishes Videobuffer2, and DMA stops
+ =>
+ Videobuffer 1 Videobuffer 2
+ +---------+----+---+ +----+----+----+---+
+ | d0 | .. | dN | l | | d0 | .. | dN | f |
+ +---------+----+-|-+ ^----+----+----+-^-+
+ | | |
+ +----+ +-- DMA DDADR loads DDADR_STOP
+
+ - pxa_dma_add_tail_buf() is called, the Videobuffer2 "finisher" is
+ replaced by a "linker" to Videobuffer3 (creation of new_link)
+ - pxa_videobuf_queue() finishes
+ - the DMA irq handler is called, which terminates Videobuffer2
+ - Videobuffer3 capture is not scheduled on DMA chain (as it stopped !!!)
+
+ Videobuffer 1 Videobuffer 2 Videobuffer 3
+ +---------+----+---+ +----+----+----+---+ +----+----+----+---+
+ | d0 | .. | dN | l | | d0 | .. | dN | l | | d0 | .. | dN | f |
+ +---------+----+-|-+ ^----+----+----+-|-+ ^----+----+----+---+
+ | | | |
+ +----+ +----+
+ new_link
+ DMA DDADR still is DDADR_STOP
+
+ - pxa_camera_check_link_miss() is called
+ This checks if the DMA is finished and a buffer is still on the
+ pcdev->capture list. If that's the case, the capture will be restarted,
+ and Videobuffer3 is scheduled on DMA chain.
+ - the DMA irq handler finishes
+
+ Note: if DMA stops just after pxa_camera_check_link_miss() reads DDADR()
+ value, we have the guarantee that the DMA irq handler will be called back
+ when the DMA will finish the buffer, and pxa_camera_check_link_miss() will
+ be called again, to reschedule Videobuffer3.
+
+--
+Author: Robert Jarzmik <robert.jarzmik@free.fr>
diff --git a/Documentation/video4linux/radiotrack.txt b/Documentation/video4linux/radiotrack.txt
new file mode 100644
index 000000000..d1f3ed199
--- /dev/null
+++ b/Documentation/video4linux/radiotrack.txt
@@ -0,0 +1,147 @@
+NOTES ON RADIOTRACK CARD CONTROL
+by Stephen M. Benoit (benoits@servicepro.com) Dec 14, 1996
+----------------------------------------------------------------------------
+
+Document version 1.0
+
+ACKNOWLEDGMENTS
+----------------
+This document was made based on 'C' code for Linux from Gideon le Grange
+(legrang@active.co.za or legrang@cs.sun.ac.za) in 1994, and elaborations from
+Frans Brinkman (brinkman@esd.nl) in 1996. The results reported here are from
+experiments that the author performed on his own setup, so your mileage may
+vary... I make no guarantees, claims or warranties to the suitability or
+validity of this information. No other documentation on the AIMS
+Lab (http://www.aimslab.com/) RadioTrack card was made available to the
+author. This document is offered in the hopes that it might help users who
+want to use the RadioTrack card in an environment other than MS Windows.
+
+WHY THIS DOCUMENT?
+------------------
+I have a RadioTrack card from back when I ran an MS-Windows platform. After
+converting to Linux, I found Gideon le Grange's command-line software for
+running the card, and found that it was good! Frans Brinkman made a
+comfortable X-windows interface, and added a scanning feature. For hack
+value, I wanted to see if the tuner could be tuned beyond the usual FM radio
+broadcast band, so I could pick up the audio carriers from North American
+broadcast TV channels, situated just below and above the 87.0-109.0 MHz range.
+I did not get much success, but I learned about programming ioports under
+Linux and gained some insights about the hardware design used for the card.
+
+So, without further delay, here are the details.
+
+
+PHYSICAL DESCRIPTION
+--------------------
+The RadioTrack card is an ISA 8-bit FM radio card. The radio frequency (RF)
+input is simply an antenna lead, and the output is a power audio signal
+available through a miniature phone plug. Its RF frequencies of operation are
+more or less limited from 87.0 to 109.0 MHz (the commercial FM broadcast
+band). Although the registers can be programmed to request frequencies beyond
+these limits, experiments did not give promising results. The variable
+frequency oscillator (VFO) that demodulates the intermediate frequency (IF)
+signal probably has a small range of useful frequencies, and wraps around or
+gets clipped beyond the limits mentioned above.
+
+
+CONTROLLING THE CARD WITH IOPORT
+--------------------------------
+The RadioTrack (base) ioport is configurable for 0x30c or 0x20c. Only one
+ioport seems to be involved. The ioport decoding circuitry must be pretty
+simple, as individual ioport bits are directly matched to specific functions
+(or blocks) of the radio card. This way, many functions can be changed in
+parallel with one write to the ioport. The only feedback available through
+the ioports appears to be the "Stereo Detect" bit.
+
+The bits of the ioport are arranged as follows:
+
+ MSb LSb
++------+------+------+--------+--------+-------+---------+--------+
+| VolA | VolB | ???? | Stereo | Radio | TuneA | TuneB | Tune |
+| (+) | (-) | | Detect | Audio | (bit) | (latch) | Update |
+| | | | Enable | Enable | | | Enable |
++------+------+------+--------+--------+-------+---------+--------+
+
+
+VolA . VolB [AB......]
+-----------
+0 0 : audio mute
+0 1 : volume + (some delay required)
+1 0 : volume - (some delay required)
+1 1 : stay at present volume
+
+Stereo Detect Enable [...S....]
+--------------------
+0 : No Detect
+1 : Detect
+
+ Results available by reading ioport >60 msec after last port write.
+ 0xff ==> no stereo detected, 0xfd ==> stereo detected.
+
+Radio to Audio (path) Enable [....R...]
+----------------------------
+0 : Disable path (silence)
+1 : Enable path (audio produced)
+
+TuneA . TuneB [.....AB.]
+-------------
+0 0 : "zero" bit phase 1
+0 1 : "zero" bit phase 2
+
+1 0 : "one" bit phase 1
+1 1 : "one" bit phase 2
+
+ 24-bit code, where bits = (freq*40) + 10486188.
+ The Most Significant 11 bits must be 1010 xxxx 0x0 to be valid.
+ The bits are shifted in LSb first.
+
+Tune Update Enable [.......T]
+------------------
+0 : Tuner held constant
+1 : Tuner updating in progress
+
+
+PROGRAMMING EXAMPLES
+--------------------
+Default: BASE <-- 0xc8 (current volume, no stereo detect,
+ radio enable, tuner adjust disable)
+
+Card Off: BASE <-- 0x00 (audio mute, no stereo detect,
+ radio disable, tuner adjust disable)
+
+Card On: BASE <-- 0x00 (see "Card Off", clears any unfinished business)
+ BASE <-- 0xc8 (see "Default")
+
+Volume Down: BASE <-- 0x48 (volume down, no stereo detect,
+ radio enable, tuner adjust disable)
+ * wait 10 msec *
+ BASE <-- 0xc8 (see "Default")
+
+Volume Up: BASE <-- 0x88 (volume up, no stereo detect,
+ radio enable, tuner adjust disable)
+ * wait 10 msec *
+ BASE <-- 0xc8 (see "Default")
+
+Check Stereo: BASE <-- 0xd8 (current volume, stereo detect,
+ radio enable, tuner adjust disable)
+ * wait 100 msec *
+ x <-- BASE (read ioport)
+ BASE <-- 0xc8 (see "Default")
+
+ x=0xff ==> "not stereo", x=0xfd ==> "stereo detected"
+
+Set Frequency: code = (freq*40) + 10486188
+ foreach of the 24 bits in code,
+ (from Least to Most Significant):
+ to write a "zero" bit,
+ BASE <-- 0x01 (audio mute, no stereo detect, radio
+ disable, "zero" bit phase 1, tuner adjust)
+ BASE <-- 0x03 (audio mute, no stereo detect, radio
+ disable, "zero" bit phase 2, tuner adjust)
+ to write a "one" bit,
+ BASE <-- 0x05 (audio mute, no stereo detect, radio
+ disable, "one" bit phase 1, tuner adjust)
+ BASE <-- 0x07 (audio mute, no stereo detect, radio
+ disable, "one" bit phase 2, tuner adjust)
+
+----------------------------------------------------------------------------
diff --git a/Documentation/video4linux/sh_mobile_ceu_camera.txt b/Documentation/video4linux/sh_mobile_ceu_camera.txt
new file mode 100644
index 000000000..1e96ce6e2
--- /dev/null
+++ b/Documentation/video4linux/sh_mobile_ceu_camera.txt
@@ -0,0 +1,139 @@
+ Cropping and Scaling algorithm, used in the sh_mobile_ceu_camera driver
+ =======================================================================
+
+Terminology
+-----------
+
+sensor scales: horizontal and vertical scales, configured by the sensor driver
+host scales: -"- host driver
+combined scales: sensor_scale * host_scale
+
+
+Generic scaling / cropping scheme
+---------------------------------
+
+-1--
+|
+-2-- -\
+| --\
+| --\
++-5-- . -- -3-- -\
+| `... -\
+| `... -4-- . - -7..
+| `.
+| `. .6--
+|
+| . .6'-
+| .´
+| ... -4'- .´
+| ...´ - -7'.
++-5'- .´ -/
+| -- -3'- -/
+| --/
+| --/
+-2'- -/
+|
+|
+-1'-
+
+In the above chart minuses and slashes represent "real" data amounts, points and
+accents represent "useful" data, basically, CEU scaled and cropped output,
+mapped back onto the client's source plane.
+
+Such a configuration can be produced by user requests:
+
+S_CROP(left / top = (5) - (1), width / height = (5') - (5))
+S_FMT(width / height = (6') - (6))
+
+Here:
+
+(1) to (1') - whole max width or height
+(1) to (2) - sensor cropped left or top
+(2) to (2') - sensor cropped width or height
+(3) to (3') - sensor scale
+(3) to (4) - CEU cropped left or top
+(4) to (4') - CEU cropped width or height
+(5) to (5') - reverse sensor scale applied to CEU cropped width or height
+(2) to (5) - reverse sensor scale applied to CEU cropped left or top
+(6) to (6') - CEU scale - user window
+
+
+S_FMT
+-----
+
+Do not touch input rectangle - it is already optimal.
+
+1. Calculate current sensor scales:
+
+ scale_s = ((2') - (2)) / ((3') - (3))
+
+2. Calculate "effective" input crop (sensor subwindow) - CEU crop scaled back at
+current sensor scales onto input window - this is user S_CROP:
+
+ width_u = (5') - (5) = ((4') - (4)) * scale_s
+
+3. Calculate new combined scales from "effective" input window to requested user
+window:
+
+ scale_comb = width_u / ((6') - (6))
+
+4. Calculate sensor output window by applying combined scales to real input
+window:
+
+ width_s_out = ((7') - (7)) = ((2') - (2)) / scale_comb
+
+5. Apply iterative sensor S_FMT for sensor output window.
+
+ subdev->video_ops->s_fmt(.width = width_s_out)
+
+6. Retrieve sensor output window (g_fmt)
+
+7. Calculate new sensor scales:
+
+ scale_s_new = ((3')_new - (3)_new) / ((2') - (2))
+
+8. Calculate new CEU crop - apply sensor scales to previously calculated
+"effective" crop:
+
+ width_ceu = (4')_new - (4)_new = width_u / scale_s_new
+ left_ceu = (4)_new - (3)_new = ((5) - (2)) / scale_s_new
+
+9. Use CEU cropping to crop to the new window:
+
+ ceu_crop(.width = width_ceu, .left = left_ceu)
+
+10. Use CEU scaling to scale to the requested user window:
+
+ scale_ceu = width_ceu / width
+
+
+S_CROP
+------
+
+The API at http://v4l2spec.bytesex.org/spec/x1904.htm says:
+
+"...specification does not define an origin or units. However by convention
+drivers should horizontally count unscaled samples relative to 0H."
+
+We choose to follow the advise and interpret cropping units as client input
+pixels.
+
+Cropping is performed in the following 6 steps:
+
+1. Request exactly user rectangle from the sensor.
+
+2. If smaller - iterate until a larger one is obtained. Result: sensor cropped
+ to 2 : 2', target crop 5 : 5', current output format 6' - 6.
+
+3. In the previous step the sensor has tried to preserve its output frame as
+ good as possible, but it could have changed. Retrieve it again.
+
+4. Sensor scaled to 3 : 3'. Sensor's scale is (2' - 2) / (3' - 3). Calculate
+ intermediate window: 4' - 4 = (5' - 5) * (3' - 3) / (2' - 2)
+
+5. Calculate and apply host scale = (6' - 6) / (4' - 4)
+
+6. Calculate and apply host crop: 6 - 7 = (5 - 2) * (6' - 6) / (5' - 5)
+
+--
+Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
diff --git a/Documentation/video4linux/si470x.txt b/Documentation/video4linux/si470x.txt
new file mode 100644
index 000000000..98c32925e
--- /dev/null
+++ b/Documentation/video4linux/si470x.txt
@@ -0,0 +1,129 @@
+Driver for USB radios for the Silicon Labs Si470x FM Radio Receivers
+
+Copyright (c) 2009 Tobias Lorenz <tobias.lorenz@gmx.net>
+
+
+Information from Silicon Labs
+=============================
+Silicon Laboratories is the manufacturer of the radio ICs, that nowadays are the
+most often used radio receivers in cell phones. Usually they are connected with
+I2C. But SiLabs also provides a reference design, which integrates this IC,
+together with a small microcontroller C8051F321, to form a USB radio.
+Part of this reference design is also a radio application in binary and source
+code. The software also contains an automatic firmware upgrade to the most
+current version. Information on these can be downloaded here:
+http://www.silabs.com/usbradio
+
+
+Supported ICs
+=============
+The following ICs have a very similar register set, so that they are or will be
+supported somewhen by the driver:
+- Si4700: FM radio receiver
+- Si4701: FM radio receiver, RDS Support
+- Si4702: FM radio receiver
+- Si4703: FM radio receiver, RDS Support
+- Si4704: FM radio receiver, no external antenna required
+- Si4705: FM radio receiver, no external antenna required, RDS support, Dig I/O
+- Si4706: Enhanced FM RDS/TMC radio receiver, no external antenna required, RDS
+ Support
+- Si4707: Dedicated weather band radio receiver with SAME decoder, RDS Support
+- Si4708: Smallest FM receivers
+- Si4709: Smallest FM receivers, RDS Support
+More information on these can be downloaded here:
+http://www.silabs.com/products/mcu/Pages/USBFMRadioRD.aspx
+
+
+Supported USB devices
+=====================
+Currently the following USB radios (vendor:product) with the Silicon Labs si470x
+chips are known to work:
+- 10c4:818a: Silicon Labs USB FM Radio Reference Design
+- 06e1:a155: ADS/Tech FM Radio Receiver (formerly Instant FM Music) (RDX-155-EF)
+- 1b80:d700: KWorld USB FM Radio SnapMusic Mobile 700 (FM700)
+- 10c5:819a: Sanei Electric, Inc. FM USB Radio (sold as DealExtreme.com PCear)
+
+
+Software
+========
+Testing is usually done with most application under Debian/testing:
+- fmtools - Utility for managing FM tuner cards
+- gnomeradio - FM-radio tuner for the GNOME desktop
+- gradio - GTK FM radio tuner
+- kradio - Comfortable Radio Application for KDE
+- radio - ncurses-based radio application
+- mplayer - The Ultimate Movie Player For Linux
+- v4l2-ctl - Collection of command line video4linux utilities
+For example, you can use:
+v4l2-ctl -d /dev/radio0 --set-ctrl=volume=10,mute=0 --set-freq=95.21 --all
+
+There is also a library libv4l, which can be used. It's going to have a function
+for frequency seeking, either by using hardware functionality as in radio-si470x
+or by implementing a function as we currently have in every of the mentioned
+programs. Somewhen the radio programs should make use of libv4l.
+
+For processing RDS information, there is a project ongoing at:
+http://rdsd.berlios.de/
+
+There is currently no project for making TMC sentences human readable.
+
+
+Audio Listing
+=============
+USB Audio is provided by the ALSA snd_usb_audio module. It is recommended to
+also select SND_USB_AUDIO, as this is required to get sound from the radio. For
+listing you have to redirect the sound, for example using one of the following
+commands. Please adjust the audio devices to your needs (/dev/dsp* and hw:x,x).
+
+If you just want to test audio (very poor quality):
+cat /dev/dsp1 > /dev/dsp
+
+If you use sox + OSS try:
+sox -2 --endian little -r 96000 -t oss /dev/dsp1 -t oss /dev/dsp
+or using sox + alsa:
+sox --endian little -c 2 -S -r 96000 -t alsa hw:1 -t alsa -r 96000 hw:0
+
+If you use arts try:
+arecord -D hw:1,0 -r96000 -c2 -f S16_LE | artsdsp aplay -B -
+
+If you use mplayer try:
+mplayer -radio adevice=hw=1.0:arate=96000 \
+ -rawaudio rate=96000 \
+ radio://<frequency>/capture
+
+Module Parameters
+=================
+After loading the module, you still have access to some of them in the sysfs
+mount under /sys/module/radio_si470x/parameters. The contents of read-only files
+(0444) are not updated, even if space, band and de are changed using private
+video controls. The others are runtime changeable.
+
+
+Errors
+======
+Increase tune_timeout, if you often get -EIO errors.
+
+When timed out or band limit is reached, hw_freq_seek returns -EAGAIN.
+
+If you get any errors from snd_usb_audio, please report them to the ALSA people.
+
+
+Open Issues
+===========
+V4L minor device allocation and parameter setting is not perfect. A solution is
+currently under discussion.
+
+There is an USB interface for downloading/uploading new firmware images. Support
+for it can be implemented using the request_firmware interface.
+
+There is a RDS interrupt mode. The driver is already using the same interface
+for polling RDS information, but is currently not using the interrupt mode.
+
+There is a LED interface, which can be used to override the LED control
+programmed in the firmware. This can be made available using the LED support
+functions in the kernel.
+
+
+Other useful information and links
+==================================
+http://www.silabs.com/usbradio
diff --git a/Documentation/video4linux/si4713.txt b/Documentation/video4linux/si4713.txt
new file mode 100644
index 000000000..2e7392a4f
--- /dev/null
+++ b/Documentation/video4linux/si4713.txt
@@ -0,0 +1,176 @@
+Driver for I2C radios for the Silicon Labs Si4713 FM Radio Transmitters
+
+Copyright (c) 2009 Nokia Corporation
+Contact: Eduardo Valentin <eduardo.valentin@nokia.com>
+
+
+Information about the Device
+============================
+This chip is a Silicon Labs product. It is a I2C device, currently on 0x63 address.
+Basically, it has transmission and signal noise level measurement features.
+
+The Si4713 integrates transmit functions for FM broadcast stereo transmission.
+The chip also allows integrated receive power scanning to identify low signal
+power FM channels.
+
+The chip is programmed using commands and responses. There are also several
+properties which can change the behavior of this chip.
+
+Users must comply with local regulations on radio frequency (RF) transmission.
+
+Device driver description
+=========================
+There are two modules to handle this device. One is a I2C device driver
+and the other is a platform driver.
+
+The I2C device driver exports a v4l2-subdev interface to the kernel.
+All properties can also be accessed by v4l2 extended controls interface, by
+using the v4l2-subdev calls (g_ext_ctrls, s_ext_ctrls).
+
+The platform device driver exports a v4l2 radio device interface to user land.
+So, it uses the I2C device driver as a sub device in order to send the user
+commands to the actual device. Basically it is a wrapper to the I2C device driver.
+
+Applications can use v4l2 radio API to specify frequency of operation, mute state,
+etc. But mostly of its properties will be present in the extended controls.
+
+When the v4l2 mute property is set to 1 (true), the driver will turn the chip off.
+
+Properties description
+======================
+
+The properties can be accessed using v4l2 extended controls.
+Here is an output from v4l2-ctl util:
+/ # v4l2-ctl -d /dev/radio0 --all -L
+Driver Info:
+ Driver name : radio-si4713
+ Card type : Silicon Labs Si4713 Modulator
+ Bus info :
+ Driver version: 0
+ Capabilities : 0x00080800
+ RDS Output
+ Modulator
+Audio output: 0 (FM Modulator Audio Out)
+Frequency: 1408000 (88.000000 MHz)
+Video Standard = 0x00000000
+Modulator:
+ Name : FM Modulator
+ Capabilities : 62.5 Hz stereo rds
+ Frequency range : 76.0 MHz - 108.0 MHz
+ Subchannel modulation: stereo+rds
+
+User Controls
+
+ mute (bool) : default=1 value=0
+
+FM Radio Modulator Controls
+
+ rds_signal_deviation (int) : min=0 max=90000 step=10 default=200 value=200 flags=slider
+ rds_program_id (int) : min=0 max=65535 step=1 default=0 value=0
+ rds_program_type (int) : min=0 max=31 step=1 default=0 value=0
+ rds_ps_name (str) : min=0 max=96 step=8 value='si4713 '
+ rds_radio_text (str) : min=0 max=384 step=32 value=''
+ audio_limiter_feature_enabled (bool) : default=1 value=1
+ audio_limiter_release_time (int) : min=250 max=102390 step=50 default=5010 value=5010 flags=slider
+ audio_limiter_deviation (int) : min=0 max=90000 step=10 default=66250 value=66250 flags=slider
+audio_compression_feature_enabl (bool) : default=1 value=1
+ audio_compression_gain (int) : min=0 max=20 step=1 default=15 value=15 flags=slider
+ audio_compression_threshold (int) : min=-40 max=0 step=1 default=-40 value=-40 flags=slider
+ audio_compression_attack_time (int) : min=0 max=5000 step=500 default=0 value=0 flags=slider
+ audio_compression_release_time (int) : min=100000 max=1000000 step=100000 default=1000000 value=1000000 flags=slider
+ pilot_tone_feature_enabled (bool) : default=1 value=1
+ pilot_tone_deviation (int) : min=0 max=90000 step=10 default=6750 value=6750 flags=slider
+ pilot_tone_frequency (int) : min=0 max=19000 step=1 default=19000 value=19000 flags=slider
+ pre_emphasis_settings (menu) : min=0 max=2 default=1 value=1
+ tune_power_level (int) : min=0 max=120 step=1 default=88 value=88 flags=slider
+ tune_antenna_capacitor (int) : min=0 max=191 step=1 default=0 value=110 flags=slider
+/ #
+
+Here is a summary of them:
+
+* Pilot is an audible tone sent by the device.
+
+pilot_frequency - Configures the frequency of the stereo pilot tone.
+pilot_deviation - Configures pilot tone frequency deviation level.
+pilot_enabled - Enables or disables the pilot tone feature.
+
+* The si4713 device is capable of applying audio compression to the transmitted signal.
+
+acomp_enabled - Enables or disables the audio dynamic range control feature.
+acomp_gain - Sets the gain for audio dynamic range control.
+acomp_threshold - Sets the threshold level for audio dynamic range control.
+acomp_attack_time - Sets the attack time for audio dynamic range control.
+acomp_release_time - Sets the release time for audio dynamic range control.
+
+* Limiter setups audio deviation limiter feature. Once a over deviation occurs,
+it is possible to adjust the front-end gain of the audio input and always
+prevent over deviation.
+
+limiter_enabled - Enables or disables the limiter feature.
+limiter_deviation - Configures audio frequency deviation level.
+limiter_release_time - Sets the limiter release time.
+
+* Tuning power
+
+power_level - Sets the output power level for signal transmission.
+antenna_capacitor - This selects the value of antenna tuning capacitor manually
+or automatically if set to zero.
+
+* RDS related
+
+rds_ps_name - Sets the RDS ps name field for transmission.
+rds_radio_text - Sets the RDS radio text for transmission.
+rds_pi - Sets the RDS PI field for transmission.
+rds_pty - Sets the RDS PTY field for transmission.
+
+* Region related
+
+preemphasis - sets the preemphasis to be applied for transmission.
+
+RNL
+===
+
+This device also has an interface to measure received noise level. To do that, you should
+ioctl the device node. Here is an code of example:
+
+int main (int argc, char *argv[])
+{
+ struct si4713_rnl rnl;
+ int fd = open("/dev/radio0", O_RDWR);
+ int rval;
+
+ if (argc < 2)
+ return -EINVAL;
+
+ if (fd < 0)
+ return fd;
+
+ sscanf(argv[1], "%d", &rnl.frequency);
+
+ rval = ioctl(fd, SI4713_IOC_MEASURE_RNL, &rnl);
+ if (rval < 0)
+ return rval;
+
+ printf("received noise level: %d\n", rnl.rnl);
+
+ close(fd);
+}
+
+The struct si4713_rnl and SI4713_IOC_MEASURE_RNL are defined under
+include/media/si4713.h.
+
+Stereo/Mono and RDS subchannels
+===============================
+
+The device can also be configured using the available sub channels for
+transmission. To do that use S/G_MODULATOR ioctl and configure txsubchans properly.
+Refer to the V4L2 API specification for proper use of this ioctl.
+
+Testing
+=======
+Testing is usually done with v4l2-ctl utility for managing FM tuner cards.
+The tool can be found in v4l-dvb repository under v4l2-apps/util directory.
+
+Example for setting rds ps name:
+# v4l2-ctl -d /dev/radio0 --set-ctrl=rds_ps_name="Dummy"
+
diff --git a/Documentation/video4linux/si476x.txt b/Documentation/video4linux/si476x.txt
new file mode 100644
index 000000000..616607955
--- /dev/null
+++ b/Documentation/video4linux/si476x.txt
@@ -0,0 +1,187 @@
+SI476x Driver Readme
+------------------------------------------------
+ Copyright (C) 2013 Andrey Smirnov <andrew.smirnov@gmail.com>
+
+TODO for the driver
+------------------------------
+
+- According to the SiLabs' datasheet it is possible to update the
+ firmware of the radio chip in the run-time, thus bringing it to the
+ most recent version. Unfortunately I couldn't find any mentioning of
+ the said firmware update for the old chips that I tested the driver
+ against, so for chips like that the driver only exposes the old
+ functionality.
+
+
+Parameters exposed over debugfs
+-------------------------------
+SI476x allow user to get multiple characteristics that can be very
+useful for EoL testing/RF performance estimation, parameters that have
+very little to do with V4L2 subsystem. Such parameters are exposed via
+debugfs and can be accessed via regular file I/O operations.
+
+The drivers exposes following files:
+
+* /sys/kernel/debug/<device-name>/acf
+ This file contains ACF(Automatically Controlled Features) status
+ information. The contents of the file is binary data of the
+ following layout:
+
+ Offset | Name | Description
+ ====================================================================
+ 0x00 | blend_int | Flag, set when stereo separation has
+ | | crossed below the blend threshold
+ --------------------------------------------------------------------
+ 0x01 | hblend_int | Flag, set when HiBlend cutoff
+ | | frequency is lower than threshold
+ --------------------------------------------------------------------
+ 0x02 | hicut_int | Flag, set when HiCut cutoff
+ | | frequency is lower than threshold
+ --------------------------------------------------------------------
+ 0x03 | chbw_int | Flag, set when channel filter
+ | | bandwidth is less than threshold
+ --------------------------------------------------------------------
+ 0x04 | softmute_int | Flag indicating that softmute
+ | | attenuation has increased above
+ | | softmute threshold
+ --------------------------------------------------------------------
+ 0x05 | smute | 0 - Audio is not soft muted
+ | | 1 - Audio is soft muted
+ --------------------------------------------------------------------
+ 0x06 | smattn | Soft mute attenuation level in dB
+ --------------------------------------------------------------------
+ 0x07 | chbw | Channel filter bandwidth in kHz
+ --------------------------------------------------------------------
+ 0x08 | hicut | HiCut cutoff frequency in units of
+ | | 100Hz
+ --------------------------------------------------------------------
+ 0x09 | hiblend | HiBlend cutoff frequency in units
+ | | of 100 Hz
+ --------------------------------------------------------------------
+ 0x10 | pilot | 0 - Stereo pilot is not present
+ | | 1 - Stereo pilot is present
+ --------------------------------------------------------------------
+ 0x11 | stblend | Stereo blend in %
+ --------------------------------------------------------------------
+
+
+* /sys/kernel/debug/<device-name>/rds_blckcnt
+ This file contains statistics about RDS receptions. It's binary data
+ has the following layout:
+
+ Offset | Name | Description
+ ====================================================================
+ 0x00 | expected | Number of expected RDS blocks
+ --------------------------------------------------------------------
+ 0x02 | received | Number of received RDS blocks
+ --------------------------------------------------------------------
+ 0x04 | uncorrectable | Number of uncorrectable RDS blocks
+ --------------------------------------------------------------------
+
+* /sys/kernel/debug/<device-name>/agc
+ This file contains information about parameters pertaining to
+ AGC(Automatic Gain Control)
+
+ The layout is:
+ Offset | Name | Description
+ ====================================================================
+ 0x00 | mxhi | 0 - FM Mixer PD high threshold is
+ | | not tripped
+ | | 1 - FM Mixer PD high threshold is
+ | | tripped
+ --------------------------------------------------------------------
+ 0x01 | mxlo | ditto for FM Mixer PD low
+ --------------------------------------------------------------------
+ 0x02 | lnahi | ditto for FM LNA PD high
+ --------------------------------------------------------------------
+ 0x03 | lnalo | ditto for FM LNA PD low
+ --------------------------------------------------------------------
+ 0x04 | fmagc1 | FMAGC1 attenuator resistance
+ | | (see datasheet for more detail)
+ --------------------------------------------------------------------
+ 0x05 | fmagc2 | ditto for FMAGC2
+ --------------------------------------------------------------------
+ 0x06 | pgagain | PGA gain in dB
+ --------------------------------------------------------------------
+ 0x07 | fmwblang | FM/WB LNA Gain in dB
+ --------------------------------------------------------------------
+
+* /sys/kernel/debug/<device-name>/rsq
+ This file contains information about parameters pertaining to
+ RSQ(Received Signal Quality)
+
+ The layout is:
+ Offset | Name | Description
+ ====================================================================
+ 0x00 | multhint | 0 - multipath value has not crossed
+ | | the Multipath high threshold
+ | | 1 - multipath value has crossed
+ | | the Multipath high threshold
+ --------------------------------------------------------------------
+ 0x01 | multlint | ditto for Multipath low threshold
+ --------------------------------------------------------------------
+ 0x02 | snrhint | 0 - received signal's SNR has not
+ | | crossed high threshold
+ | | 1 - received signal's SNR has
+ | | crossed high threshold
+ --------------------------------------------------------------------
+ 0x03 | snrlint | ditto for low threshold
+ --------------------------------------------------------------------
+ 0x04 | rssihint | ditto for RSSI high threshold
+ --------------------------------------------------------------------
+ 0x05 | rssilint | ditto for RSSI low threshold
+ --------------------------------------------------------------------
+ 0x06 | bltf | Flag indicating if seek command
+ | | reached/wrapped seek band limit
+ --------------------------------------------------------------------
+ 0x07 | snr_ready | Indicates that SNR metrics is ready
+ --------------------------------------------------------------------
+ 0x08 | rssiready | ditto for RSSI metrics
+ --------------------------------------------------------------------
+ 0x09 | injside | 0 - Low-side injection is being used
+ | | 1 - High-side injection is used
+ --------------------------------------------------------------------
+ 0x10 | afcrl | Flag indicating if AFC rails
+ --------------------------------------------------------------------
+ 0x11 | valid | Flag indicating if channel is valid
+ --------------------------------------------------------------------
+ 0x12 | readfreq | Current tuned frequency
+ --------------------------------------------------------------------
+ 0x14 | freqoff | Signed frequency offset in units of
+ | | 2ppm
+ --------------------------------------------------------------------
+ 0x15 | rssi | Signed value of RSSI in dBuV
+ --------------------------------------------------------------------
+ 0x16 | snr | Signed RF SNR in dB
+ --------------------------------------------------------------------
+ 0x17 | issi | Signed Image Strength Signal
+ | | indicator
+ --------------------------------------------------------------------
+ 0x18 | lassi | Signed Low side adjacent Channel
+ | | Strength indicator
+ --------------------------------------------------------------------
+ 0x19 | hassi | ditto fpr High side
+ --------------------------------------------------------------------
+ 0x20 | mult | Multipath indicator
+ --------------------------------------------------------------------
+ 0x21 | dev | Frequency deviation
+ --------------------------------------------------------------------
+ 0x24 | assi | Adjacent channel SSI
+ --------------------------------------------------------------------
+ 0x25 | usn | Ultrasonic noise indicator
+ --------------------------------------------------------------------
+ 0x26 | pilotdev | Pilot deviation in units of 100 Hz
+ --------------------------------------------------------------------
+ 0x27 | rdsdev | ditto for RDS
+ --------------------------------------------------------------------
+ 0x28 | assidev | ditto for ASSI
+ --------------------------------------------------------------------
+ 0x29 | strongdev | Frequency deviation
+ --------------------------------------------------------------------
+ 0x30 | rdspi | RDS PI code
+ --------------------------------------------------------------------
+
+* /sys/kernel/debug/<device-name>/rsq_primary
+ This file contains information about parameters pertaining to
+ RSQ(Received Signal Quality) for primary tuner only. Layout is as
+ the one above.
diff --git a/Documentation/video4linux/soc-camera.txt b/Documentation/video4linux/soc-camera.txt
new file mode 100644
index 000000000..84f41cf1f
--- /dev/null
+++ b/Documentation/video4linux/soc-camera.txt
@@ -0,0 +1,164 @@
+ Soc-Camera Subsystem
+ ====================
+
+Terminology
+-----------
+
+The following terms are used in this document:
+ - camera / camera device / camera sensor - a video-camera sensor chip, capable
+ of connecting to a variety of systems and interfaces, typically uses i2c for
+ control and configuration, and a parallel or a serial bus for data.
+ - camera host - an interface, to which a camera is connected. Typically a
+ specialised interface, present on many SoCs, e.g. PXA27x and PXA3xx, SuperH,
+ AVR32, i.MX27, i.MX31.
+ - camera host bus - a connection between a camera host and a camera. Can be
+ parallel or serial, consists of data and control lines, e.g. clock, vertical
+ and horizontal synchronization signals.
+
+Purpose of the soc-camera subsystem
+-----------------------------------
+
+The soc-camera subsystem initially provided a unified API between camera host
+drivers and camera sensor drivers. Later the soc-camera sensor API has been
+replaced with the V4L2 standard subdev API. This also made camera driver re-use
+with non-soc-camera hosts possible. The camera host API to the soc-camera core
+has been preserved.
+
+Soc-camera implements a V4L2 interface to the user, currently only the "mmap"
+method is supported by host drivers. However, the soc-camera core also provides
+support for the "read" method.
+
+The subsystem has been designed to support multiple camera host interfaces and
+multiple cameras per interface, although most applications have only one camera
+sensor.
+
+Existing drivers
+----------------
+
+As of 3.7 there are seven host drivers in the mainline: atmel-isi.c,
+mx1_camera.c (broken, scheduled for removal), mx2_camera.c, mx3_camera.c,
+omap1_camera.c, pxa_camera.c, sh_mobile_ceu_camera.c, and multiple sensor
+drivers under drivers/media/i2c/soc_camera/.
+
+Camera host API
+---------------
+
+A host camera driver is registered using the
+
+soc_camera_host_register(struct soc_camera_host *);
+
+function. The host object can be initialized as follows:
+
+ struct soc_camera_host *ici;
+ ici->drv_name = DRV_NAME;
+ ici->ops = &camera_host_ops;
+ ici->priv = pcdev;
+ ici->v4l2_dev.dev = &pdev->dev;
+ ici->nr = pdev->id;
+
+All camera host methods are passed in a struct soc_camera_host_ops:
+
+static struct soc_camera_host_ops camera_host_ops = {
+ .owner = THIS_MODULE,
+ .add = camera_add_device,
+ .remove = camera_remove_device,
+ .set_fmt = camera_set_fmt_cap,
+ .try_fmt = camera_try_fmt_cap,
+ .init_videobuf2 = camera_init_videobuf2,
+ .poll = camera_poll,
+ .querycap = camera_querycap,
+ .set_bus_param = camera_set_bus_param,
+ /* The rest of host operations are optional */
+};
+
+.add and .remove methods are called when a sensor is attached to or detached
+from the host. .set_bus_param is used to configure physical connection
+parameters between the host and the sensor. .init_videobuf2 is called by
+soc-camera core when a video-device is opened, the host driver would typically
+call vb2_queue_init() in this method. Further video-buffer management is
+implemented completely by the specific camera host driver. If the host driver
+supports non-standard pixel format conversion, it should implement a
+.get_formats and, possibly, a .put_formats operations. See below for more
+details about format conversion. The rest of the methods are called from
+respective V4L2 operations.
+
+Camera API
+----------
+
+Sensor drivers can use struct soc_camera_link, typically provided by the
+platform, and used to specify to which camera host bus the sensor is connected,
+and optionally provide platform .power and .reset methods for the camera. This
+struct is provided to the camera driver via the I2C client device platform data
+and can be obtained, using the soc_camera_i2c_to_link() macro. Care should be
+taken, when using soc_camera_vdev_to_subdev() and when accessing struct
+soc_camera_device, using v4l2_get_subdev_hostdata(): both only work, when
+running on an soc-camera host. The actual camera driver operation is implemented
+using the V4L2 subdev API. Additionally soc-camera camera drivers can use
+auxiliary soc-camera helper functions like soc_camera_power_on() and
+soc_camera_power_off(), which switch regulators, provided by the platform and call
+board-specific power switching methods. soc_camera_apply_board_flags() takes
+camera bus configuration capability flags and applies any board transformations,
+e.g. signal polarity inversion. soc_mbus_get_fmtdesc() can be used to obtain a
+pixel format descriptor, corresponding to a certain media-bus pixel format code.
+soc_camera_limit_side() can be used to restrict beginning and length of a frame
+side, based on camera capabilities.
+
+VIDIOC_S_CROP and VIDIOC_S_FMT behaviour
+----------------------------------------
+
+Above user ioctls modify image geometry as follows:
+
+VIDIOC_S_CROP: sets location and sizes of the sensor window. Unit is one sensor
+pixel. Changing sensor window sizes preserves any scaling factors, therefore
+user window sizes change as well.
+
+VIDIOC_S_FMT: sets user window. Should preserve previously set sensor window as
+much as possible by modifying scaling factors. If the sensor window cannot be
+preserved precisely, it may be changed too.
+
+In soc-camera there are two locations, where scaling and cropping can take
+place: in the camera driver and in the host driver. User ioctls are first passed
+to the host driver, which then generally passes them down to the camera driver.
+It is more efficient to perform scaling and cropping in the camera driver to
+save camera bus bandwidth and maximise the framerate. However, if the camera
+driver failed to set the required parameters with sufficient precision, the host
+driver may decide to also use its own scaling and cropping to fulfill the user's
+request.
+
+Camera drivers are interfaced to the soc-camera core and to host drivers over
+the v4l2-subdev API, which is completely functional, it doesn't pass any data.
+Therefore all camera drivers shall reply to .g_fmt() requests with their current
+output geometry. This is necessary to correctly configure the camera bus.
+.s_fmt() and .try_fmt() have to be implemented too. Sensor window and scaling
+factors have to be maintained by camera drivers internally. According to the
+V4L2 API all capture drivers must support the VIDIOC_CROPCAP ioctl, hence we
+rely on camera drivers implementing .cropcap(). If the camera driver does not
+support cropping, it may choose to not implement .s_crop(), but to enable
+cropping support by the camera host driver at least the .g_crop method must be
+implemented.
+
+User window geometry is kept in .user_width and .user_height fields in struct
+soc_camera_device and used by the soc-camera core and host drivers. The core
+updates these fields upon successful completion of a .s_fmt() call, but if these
+fields change elsewhere, e.g. during .s_crop() processing, the host driver is
+responsible for updating them.
+
+Format conversion
+-----------------
+
+V4L2 distinguishes between pixel formats, as they are stored in memory, and as
+they are transferred over a media bus. Soc-camera provides support to
+conveniently manage these formats. A table of standard transformations is
+maintained by soc-camera core, which describes, what FOURCC pixel format will
+be obtained, if a media-bus pixel format is stored in memory according to
+certain rules. E.g. if MEDIA_BUS_FMT_YUYV8_2X8 data is sampled with 8 bits per
+sample and stored in memory in the little-endian order with no gaps between
+bytes, data in memory will represent the V4L2_PIX_FMT_YUYV FOURCC format. These
+standard transformations will be used by soc-camera or by camera host drivers to
+configure camera drivers to produce the FOURCC format, requested by the user,
+using the VIDIOC_S_FMT ioctl(). Apart from those standard format conversions,
+host drivers can also provide their own conversion rules by implementing a
+.get_formats and, if required, a .put_formats methods.
+
+--
+Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
diff --git a/Documentation/video4linux/uvcvideo.txt b/Documentation/video4linux/uvcvideo.txt
new file mode 100644
index 000000000..35ce19cdd
--- /dev/null
+++ b/Documentation/video4linux/uvcvideo.txt
@@ -0,0 +1,239 @@
+Linux USB Video Class (UVC) driver
+==================================
+
+This file documents some driver-specific aspects of the UVC driver, such as
+driver-specific ioctls and implementation notes.
+
+Questions and remarks can be sent to the Linux UVC development mailing list at
+linux-uvc-devel@lists.berlios.de.
+
+
+Extension Unit (XU) support
+---------------------------
+
+1. Introduction
+
+The UVC specification allows for vendor-specific extensions through extension
+units (XUs). The Linux UVC driver supports extension unit controls (XU controls)
+through two separate mechanisms:
+
+ - through mappings of XU controls to V4L2 controls
+ - through a driver-specific ioctl interface
+
+The first one allows generic V4L2 applications to use XU controls by mapping
+certain XU controls onto V4L2 controls, which then show up during ordinary
+control enumeration.
+
+The second mechanism requires uvcvideo-specific knowledge for the application to
+access XU controls but exposes the entire UVC XU concept to user space for
+maximum flexibility.
+
+Both mechanisms complement each other and are described in more detail below.
+
+
+2. Control mappings
+
+The UVC driver provides an API for user space applications to define so-called
+control mappings at runtime. These allow for individual XU controls or byte
+ranges thereof to be mapped to new V4L2 controls. Such controls appear and
+function exactly like normal V4L2 controls (i.e. the stock controls, such as
+brightness, contrast, etc.). However, reading or writing of such a V4L2 controls
+triggers a read or write of the associated XU control.
+
+The ioctl used to create these control mappings is called UVCIOC_CTRL_MAP.
+Previous driver versions (before 0.2.0) required another ioctl to be used
+beforehand (UVCIOC_CTRL_ADD) to pass XU control information to the UVC driver.
+This is no longer necessary as newer uvcvideo versions query the information
+directly from the device.
+
+For details on the UVCIOC_CTRL_MAP ioctl please refer to the section titled
+"IOCTL reference" below.
+
+
+3. Driver specific XU control interface
+
+For applications that need to access XU controls directly, e.g. for testing
+purposes, firmware upload, or accessing binary controls, a second mechanism to
+access XU controls is provided in the form of a driver-specific ioctl, namely
+UVCIOC_CTRL_QUERY.
+
+A call to this ioctl allows applications to send queries to the UVC driver that
+directly map to the low-level UVC control requests.
+
+In order to make such a request the UVC unit ID of the control's extension unit
+and the control selector need to be known. This information either needs to be
+hardcoded in the application or queried using other ways such as by parsing the
+UVC descriptor or, if available, using the media controller API to enumerate a
+device's entities.
+
+Unless the control size is already known it is necessary to first make a
+UVC_GET_LEN requests in order to be able to allocate a sufficiently large buffer
+and set the buffer size to the correct value. Similarly, to find out whether
+UVC_GET_CUR or UVC_SET_CUR are valid requests for a given control, a
+UVC_GET_INFO request should be made. The bits 0 (GET supported) and 1 (SET
+supported) of the resulting byte indicate which requests are valid.
+
+With the addition of the UVCIOC_CTRL_QUERY ioctl the UVCIOC_CTRL_GET and
+UVCIOC_CTRL_SET ioctls have become obsolete since their functionality is a
+subset of the former ioctl. For the time being they are still supported but
+application developers are encouraged to use UVCIOC_CTRL_QUERY instead.
+
+For details on the UVCIOC_CTRL_QUERY ioctl please refer to the section titled
+"IOCTL reference" below.
+
+
+4. Security
+
+The API doesn't currently provide a fine-grained access control facility. The
+UVCIOC_CTRL_ADD and UVCIOC_CTRL_MAP ioctls require super user permissions.
+
+Suggestions on how to improve this are welcome.
+
+
+5. Debugging
+
+In order to debug problems related to XU controls or controls in general it is
+recommended to enable the UVC_TRACE_CONTROL bit in the module parameter 'trace'.
+This causes extra output to be written into the system log.
+
+
+6. IOCTL reference
+
+---- UVCIOC_CTRL_MAP - Map a UVC control to a V4L2 control ----
+
+Argument: struct uvc_xu_control_mapping
+
+Description:
+ This ioctl creates a mapping between a UVC control or part of a UVC
+ control and a V4L2 control. Once mappings are defined, userspace
+ applications can access vendor-defined UVC control through the V4L2
+ control API.
+
+ To create a mapping, applications fill the uvc_xu_control_mapping
+ structure with information about an existing UVC control defined with
+ UVCIOC_CTRL_ADD and a new V4L2 control.
+
+ A UVC control can be mapped to several V4L2 controls. For instance,
+ a UVC pan/tilt control could be mapped to separate pan and tilt V4L2
+ controls. The UVC control is divided into non overlapping fields using
+ the 'size' and 'offset' fields and are then independently mapped to
+ V4L2 control.
+
+ For signed integer V4L2 controls the data_type field should be set to
+ UVC_CTRL_DATA_TYPE_SIGNED. Other values are currently ignored.
+
+Return value:
+ On success 0 is returned. On error -1 is returned and errno is set
+ appropriately.
+
+ ENOMEM
+ Not enough memory to perform the operation.
+ EPERM
+ Insufficient privileges (super user privileges are required).
+ EINVAL
+ No such UVC control.
+ EOVERFLOW
+ The requested offset and size would overflow the UVC control.
+ EEXIST
+ Mapping already exists.
+
+Data types:
+ * struct uvc_xu_control_mapping
+
+ __u32 id V4L2 control identifier
+ __u8 name[32] V4L2 control name
+ __u8 entity[16] UVC extension unit GUID
+ __u8 selector UVC control selector
+ __u8 size V4L2 control size (in bits)
+ __u8 offset V4L2 control offset (in bits)
+ enum v4l2_ctrl_type
+ v4l2_type V4L2 control type
+ enum uvc_control_data_type
+ data_type UVC control data type
+ struct uvc_menu_info
+ *menu_info Array of menu entries (for menu controls only)
+ __u32 menu_count Number of menu entries (for menu controls only)
+
+ * struct uvc_menu_info
+
+ __u32 value Menu entry value used by the device
+ __u8 name[32] Menu entry name
+
+
+ * enum uvc_control_data_type
+
+ UVC_CTRL_DATA_TYPE_RAW Raw control (byte array)
+ UVC_CTRL_DATA_TYPE_SIGNED Signed integer
+ UVC_CTRL_DATA_TYPE_UNSIGNED Unsigned integer
+ UVC_CTRL_DATA_TYPE_BOOLEAN Boolean
+ UVC_CTRL_DATA_TYPE_ENUM Enumeration
+ UVC_CTRL_DATA_TYPE_BITMASK Bitmask
+
+
+---- UVCIOC_CTRL_QUERY - Query a UVC XU control ----
+
+Argument: struct uvc_xu_control_query
+
+Description:
+ This ioctl queries a UVC XU control identified by its extension unit ID
+ and control selector.
+
+ There are a number of different queries available that closely
+ correspond to the low-level control requests described in the UVC
+ specification. These requests are:
+
+ UVC_GET_CUR
+ Obtain the current value of the control.
+ UVC_GET_MIN
+ Obtain the minimum value of the control.
+ UVC_GET_MAX
+ Obtain the maximum value of the control.
+ UVC_GET_DEF
+ Obtain the default value of the control.
+ UVC_GET_RES
+ Query the resolution of the control, i.e. the step size of the
+ allowed control values.
+ UVC_GET_LEN
+ Query the size of the control in bytes.
+ UVC_GET_INFO
+ Query the control information bitmap, which indicates whether
+ get/set requests are supported.
+ UVC_SET_CUR
+ Update the value of the control.
+
+ Applications must set the 'size' field to the correct length for the
+ control. Exceptions are the UVC_GET_LEN and UVC_GET_INFO queries, for
+ which the size must be set to 2 and 1, respectively. The 'data' field
+ must point to a valid writable buffer big enough to hold the indicated
+ number of data bytes.
+
+ Data is copied directly from the device without any driver-side
+ processing. Applications are responsible for data buffer formatting,
+ including little-endian/big-endian conversion. This is particularly
+ important for the result of the UVC_GET_LEN requests, which is always
+ returned as a little-endian 16-bit integer by the device.
+
+Return value:
+ On success 0 is returned. On error -1 is returned and errno is set
+ appropriately.
+
+ ENOENT
+ The device does not support the given control or the specified
+ extension unit could not be found.
+ ENOBUFS
+ The specified buffer size is incorrect (too big or too small).
+ EINVAL
+ An invalid request code was passed.
+ EBADRQC
+ The given request is not supported by the given control.
+ EFAULT
+ The data pointer references an inaccessible memory area.
+
+Data types:
+ * struct uvc_xu_control_query
+
+ __u8 unit Extension unit ID
+ __u8 selector Control selector
+ __u8 query Request code to send to the device
+ __u16 size Control data size (in bytes)
+ __u8 *data Control value
diff --git a/Documentation/video4linux/v4l2-controls.txt b/Documentation/video4linux/v4l2-controls.txt
new file mode 100644
index 000000000..5517db602
--- /dev/null
+++ b/Documentation/video4linux/v4l2-controls.txt
@@ -0,0 +1,752 @@
+Introduction
+============
+
+The V4L2 control API seems simple enough, but quickly becomes very hard to
+implement correctly in drivers. But much of the code needed to handle controls
+is actually not driver specific and can be moved to the V4L core framework.
+
+After all, the only part that a driver developer is interested in is:
+
+1) How do I add a control?
+2) How do I set the control's value? (i.e. s_ctrl)
+
+And occasionally:
+
+3) How do I get the control's value? (i.e. g_volatile_ctrl)
+4) How do I validate the user's proposed control value? (i.e. try_ctrl)
+
+All the rest is something that can be done centrally.
+
+The control framework was created in order to implement all the rules of the
+V4L2 specification with respect to controls in a central place. And to make
+life as easy as possible for the driver developer.
+
+Note that the control framework relies on the presence of a struct v4l2_device
+for V4L2 drivers and struct v4l2_subdev for sub-device drivers.
+
+
+Objects in the framework
+========================
+
+There are two main objects:
+
+The v4l2_ctrl object describes the control properties and keeps track of the
+control's value (both the current value and the proposed new value).
+
+v4l2_ctrl_handler is the object that keeps track of controls. It maintains a
+list of v4l2_ctrl objects that it owns and another list of references to
+controls, possibly to controls owned by other handlers.
+
+
+Basic usage for V4L2 and sub-device drivers
+===========================================
+
+1) Prepare the driver:
+
+1.1) Add the handler to your driver's top-level struct:
+
+ struct foo_dev {
+ ...
+ struct v4l2_ctrl_handler ctrl_handler;
+ ...
+ };
+
+ struct foo_dev *foo;
+
+1.2) Initialize the handler:
+
+ v4l2_ctrl_handler_init(&foo->ctrl_handler, nr_of_controls);
+
+ The second argument is a hint telling the function how many controls this
+ handler is expected to handle. It will allocate a hashtable based on this
+ information. It is a hint only.
+
+1.3) Hook the control handler into the driver:
+
+1.3.1) For V4L2 drivers do this:
+
+ struct foo_dev {
+ ...
+ struct v4l2_device v4l2_dev;
+ ...
+ struct v4l2_ctrl_handler ctrl_handler;
+ ...
+ };
+
+ foo->v4l2_dev.ctrl_handler = &foo->ctrl_handler;
+
+ Where foo->v4l2_dev is of type struct v4l2_device.
+
+ Finally, remove all control functions from your v4l2_ioctl_ops (if any):
+ vidioc_queryctrl, vidioc_query_ext_ctrl, vidioc_querymenu, vidioc_g_ctrl,
+ vidioc_s_ctrl, vidioc_g_ext_ctrls, vidioc_try_ext_ctrls and vidioc_s_ext_ctrls.
+ Those are now no longer needed.
+
+1.3.2) For sub-device drivers do this:
+
+ struct foo_dev {
+ ...
+ struct v4l2_subdev sd;
+ ...
+ struct v4l2_ctrl_handler ctrl_handler;
+ ...
+ };
+
+ foo->sd.ctrl_handler = &foo->ctrl_handler;
+
+ Where foo->sd is of type struct v4l2_subdev.
+
+ And set all core control ops in your struct v4l2_subdev_core_ops to these
+ helpers:
+
+ .queryctrl = v4l2_subdev_queryctrl,
+ .querymenu = v4l2_subdev_querymenu,
+ .g_ctrl = v4l2_subdev_g_ctrl,
+ .s_ctrl = v4l2_subdev_s_ctrl,
+ .g_ext_ctrls = v4l2_subdev_g_ext_ctrls,
+ .try_ext_ctrls = v4l2_subdev_try_ext_ctrls,
+ .s_ext_ctrls = v4l2_subdev_s_ext_ctrls,
+
+ Note: this is a temporary solution only. Once all V4L2 drivers that depend
+ on subdev drivers are converted to the control framework these helpers will
+ no longer be needed.
+
+1.4) Clean up the handler at the end:
+
+ v4l2_ctrl_handler_free(&foo->ctrl_handler);
+
+
+2) Add controls:
+
+You add non-menu controls by calling v4l2_ctrl_new_std:
+
+ struct v4l2_ctrl *v4l2_ctrl_new_std(struct v4l2_ctrl_handler *hdl,
+ const struct v4l2_ctrl_ops *ops,
+ u32 id, s32 min, s32 max, u32 step, s32 def);
+
+Menu and integer menu controls are added by calling v4l2_ctrl_new_std_menu:
+
+ struct v4l2_ctrl *v4l2_ctrl_new_std_menu(struct v4l2_ctrl_handler *hdl,
+ const struct v4l2_ctrl_ops *ops,
+ u32 id, s32 max, s32 skip_mask, s32 def);
+
+Menu controls with a driver specific menu are added by calling
+v4l2_ctrl_new_std_menu_items:
+
+ struct v4l2_ctrl *v4l2_ctrl_new_std_menu_items(
+ struct v4l2_ctrl_handler *hdl,
+ const struct v4l2_ctrl_ops *ops, u32 id, s32 max,
+ s32 skip_mask, s32 def, const char * const *qmenu);
+
+Integer menu controls with a driver specific menu can be added by calling
+v4l2_ctrl_new_int_menu:
+
+ struct v4l2_ctrl *v4l2_ctrl_new_int_menu(struct v4l2_ctrl_handler *hdl,
+ const struct v4l2_ctrl_ops *ops,
+ u32 id, s32 max, s32 def, const s64 *qmenu_int);
+
+These functions are typically called right after the v4l2_ctrl_handler_init:
+
+ static const s64 exp_bias_qmenu[] = {
+ -2, -1, 0, 1, 2
+ };
+ static const char * const test_pattern[] = {
+ "Disabled",
+ "Vertical Bars",
+ "Solid Black",
+ "Solid White",
+ };
+
+ v4l2_ctrl_handler_init(&foo->ctrl_handler, nr_of_controls);
+ v4l2_ctrl_new_std(&foo->ctrl_handler, &foo_ctrl_ops,
+ V4L2_CID_BRIGHTNESS, 0, 255, 1, 128);
+ v4l2_ctrl_new_std(&foo->ctrl_handler, &foo_ctrl_ops,
+ V4L2_CID_CONTRAST, 0, 255, 1, 128);
+ v4l2_ctrl_new_std_menu(&foo->ctrl_handler, &foo_ctrl_ops,
+ V4L2_CID_POWER_LINE_FREQUENCY,
+ V4L2_CID_POWER_LINE_FREQUENCY_60HZ, 0,
+ V4L2_CID_POWER_LINE_FREQUENCY_DISABLED);
+ v4l2_ctrl_new_int_menu(&foo->ctrl_handler, &foo_ctrl_ops,
+ V4L2_CID_EXPOSURE_BIAS,
+ ARRAY_SIZE(exp_bias_qmenu) - 1,
+ ARRAY_SIZE(exp_bias_qmenu) / 2 - 1,
+ exp_bias_qmenu);
+ v4l2_ctrl_new_std_menu_items(&foo->ctrl_handler, &foo_ctrl_ops,
+ V4L2_CID_TEST_PATTERN, ARRAY_SIZE(test_pattern) - 1, 0,
+ 0, test_pattern);
+ ...
+ if (foo->ctrl_handler.error) {
+ int err = foo->ctrl_handler.error;
+
+ v4l2_ctrl_handler_free(&foo->ctrl_handler);
+ return err;
+ }
+
+The v4l2_ctrl_new_std function returns the v4l2_ctrl pointer to the new
+control, but if you do not need to access the pointer outside the control ops,
+then there is no need to store it.
+
+The v4l2_ctrl_new_std function will fill in most fields based on the control
+ID except for the min, max, step and default values. These are passed in the
+last four arguments. These values are driver specific while control attributes
+like type, name, flags are all global. The control's current value will be set
+to the default value.
+
+The v4l2_ctrl_new_std_menu function is very similar but it is used for menu
+controls. There is no min argument since that is always 0 for menu controls,
+and instead of a step there is a skip_mask argument: if bit X is 1, then menu
+item X is skipped.
+
+The v4l2_ctrl_new_int_menu function creates a new standard integer menu
+control with driver-specific items in the menu. It differs from
+v4l2_ctrl_new_std_menu in that it doesn't have the mask argument and takes
+as the last argument an array of signed 64-bit integers that form an exact
+menu item list.
+
+The v4l2_ctrl_new_std_menu_items function is very similar to
+v4l2_ctrl_new_std_menu but takes an extra parameter qmenu, which is the driver
+specific menu for an otherwise standard menu control. A good example for this
+control is the test pattern control for capture/display/sensors devices that
+have the capability to generate test patterns. These test patterns are hardware
+specific, so the contents of the menu will vary from device to device.
+
+Note that if something fails, the function will return NULL or an error and
+set ctrl_handler->error to the error code. If ctrl_handler->error was already
+set, then it will just return and do nothing. This is also true for
+v4l2_ctrl_handler_init if it cannot allocate the internal data structure.
+
+This makes it easy to init the handler and just add all controls and only check
+the error code at the end. Saves a lot of repetitive error checking.
+
+It is recommended to add controls in ascending control ID order: it will be
+a bit faster that way.
+
+3) Optionally force initial control setup:
+
+ v4l2_ctrl_handler_setup(&foo->ctrl_handler);
+
+This will call s_ctrl for all controls unconditionally. Effectively this
+initializes the hardware to the default control values. It is recommended
+that you do this as this ensures that both the internal data structures and
+the hardware are in sync.
+
+4) Finally: implement the v4l2_ctrl_ops
+
+ static const struct v4l2_ctrl_ops foo_ctrl_ops = {
+ .s_ctrl = foo_s_ctrl,
+ };
+
+Usually all you need is s_ctrl:
+
+ static int foo_s_ctrl(struct v4l2_ctrl *ctrl)
+ {
+ struct foo *state = container_of(ctrl->handler, struct foo, ctrl_handler);
+
+ switch (ctrl->id) {
+ case V4L2_CID_BRIGHTNESS:
+ write_reg(0x123, ctrl->val);
+ break;
+ case V4L2_CID_CONTRAST:
+ write_reg(0x456, ctrl->val);
+ break;
+ }
+ return 0;
+ }
+
+The control ops are called with the v4l2_ctrl pointer as argument.
+The new control value has already been validated, so all you need to do is
+to actually update the hardware registers.
+
+You're done! And this is sufficient for most of the drivers we have. No need
+to do any validation of control values, or implement QUERYCTRL, QUERY_EXT_CTRL
+and QUERYMENU. And G/S_CTRL as well as G/TRY/S_EXT_CTRLS are automatically supported.
+
+
+==============================================================================
+
+The remainder of this document deals with more advanced topics and scenarios.
+In practice the basic usage as described above is sufficient for most drivers.
+
+===============================================================================
+
+
+Inheriting Controls
+===================
+
+When a sub-device is registered with a V4L2 driver by calling
+v4l2_device_register_subdev() and the ctrl_handler fields of both v4l2_subdev
+and v4l2_device are set, then the controls of the subdev will become
+automatically available in the V4L2 driver as well. If the subdev driver
+contains controls that already exist in the V4L2 driver, then those will be
+skipped (so a V4L2 driver can always override a subdev control).
+
+What happens here is that v4l2_device_register_subdev() calls
+v4l2_ctrl_add_handler() adding the controls of the subdev to the controls
+of v4l2_device.
+
+
+Accessing Control Values
+========================
+
+The following union is used inside the control framework to access control
+values:
+
+union v4l2_ctrl_ptr {
+ s32 *p_s32;
+ s64 *p_s64;
+ char *p_char;
+ void *p;
+};
+
+The v4l2_ctrl struct contains these fields that can be used to access both
+current and new values:
+
+ s32 val;
+ struct {
+ s32 val;
+ } cur;
+
+
+ union v4l2_ctrl_ptr p_new;
+ union v4l2_ctrl_ptr p_cur;
+
+If the control has a simple s32 type type, then:
+
+ &ctrl->val == ctrl->p_new.p_s32
+ &ctrl->cur.val == ctrl->p_cur.p_s32
+
+For all other types use ctrl->p_cur.p<something>. Basically the val
+and cur.val fields can be considered an alias since these are used so often.
+
+Within the control ops you can freely use these. The val and cur.val speak for
+themselves. The p_char pointers point to character buffers of length
+ctrl->maximum + 1, and are always 0-terminated.
+
+Unless the control is marked volatile the p_cur field points to the the
+current cached control value. When you create a new control this value is made
+identical to the default value. After calling v4l2_ctrl_handler_setup() this
+value is passed to the hardware. It is generally a good idea to call this
+function.
+
+Whenever a new value is set that new value is automatically cached. This means
+that most drivers do not need to implement the g_volatile_ctrl() op. The
+exception is for controls that return a volatile register such as a signal
+strength read-out that changes continuously. In that case you will need to
+implement g_volatile_ctrl like this:
+
+ static int foo_g_volatile_ctrl(struct v4l2_ctrl *ctrl)
+ {
+ switch (ctrl->id) {
+ case V4L2_CID_BRIGHTNESS:
+ ctrl->val = read_reg(0x123);
+ break;
+ }
+ }
+
+Note that you use the 'new value' union as well in g_volatile_ctrl. In general
+controls that need to implement g_volatile_ctrl are read-only controls. If they
+are not, a V4L2_EVENT_CTRL_CH_VALUE will not be generated when the control
+changes.
+
+To mark a control as volatile you have to set V4L2_CTRL_FLAG_VOLATILE:
+
+ ctrl = v4l2_ctrl_new_std(&sd->ctrl_handler, ...);
+ if (ctrl)
+ ctrl->flags |= V4L2_CTRL_FLAG_VOLATILE;
+
+For try/s_ctrl the new values (i.e. as passed by the user) are filled in and
+you can modify them in try_ctrl or set them in s_ctrl. The 'cur' union
+contains the current value, which you can use (but not change!) as well.
+
+If s_ctrl returns 0 (OK), then the control framework will copy the new final
+values to the 'cur' union.
+
+While in g_volatile/s/try_ctrl you can access the value of all controls owned
+by the same handler since the handler's lock is held. If you need to access
+the value of controls owned by other handlers, then you have to be very careful
+not to introduce deadlocks.
+
+Outside of the control ops you have to go through to helper functions to get
+or set a single control value safely in your driver:
+
+ s32 v4l2_ctrl_g_ctrl(struct v4l2_ctrl *ctrl);
+ int v4l2_ctrl_s_ctrl(struct v4l2_ctrl *ctrl, s32 val);
+
+These functions go through the control framework just as VIDIOC_G/S_CTRL ioctls
+do. Don't use these inside the control ops g_volatile/s/try_ctrl, though, that
+will result in a deadlock since these helpers lock the handler as well.
+
+You can also take the handler lock yourself:
+
+ mutex_lock(&state->ctrl_handler.lock);
+ pr_info("String value is '%s'\n", ctrl1->p_cur.p_char);
+ pr_info("Integer value is '%s'\n", ctrl2->cur.val);
+ mutex_unlock(&state->ctrl_handler.lock);
+
+
+Menu Controls
+=============
+
+The v4l2_ctrl struct contains this union:
+
+ union {
+ u32 step;
+ u32 menu_skip_mask;
+ };
+
+For menu controls menu_skip_mask is used. What it does is that it allows you
+to easily exclude certain menu items. This is used in the VIDIOC_QUERYMENU
+implementation where you can return -EINVAL if a certain menu item is not
+present. Note that VIDIOC_QUERYCTRL always returns a step value of 1 for
+menu controls.
+
+A good example is the MPEG Audio Layer II Bitrate menu control where the
+menu is a list of standardized possible bitrates. But in practice hardware
+implementations will only support a subset of those. By setting the skip
+mask you can tell the framework which menu items should be skipped. Setting
+it to 0 means that all menu items are supported.
+
+You set this mask either through the v4l2_ctrl_config struct for a custom
+control, or by calling v4l2_ctrl_new_std_menu().
+
+
+Custom Controls
+===============
+
+Driver specific controls can be created using v4l2_ctrl_new_custom():
+
+ static const struct v4l2_ctrl_config ctrl_filter = {
+ .ops = &ctrl_custom_ops,
+ .id = V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER,
+ .name = "Spatial Filter",
+ .type = V4L2_CTRL_TYPE_INTEGER,
+ .flags = V4L2_CTRL_FLAG_SLIDER,
+ .max = 15,
+ .step = 1,
+ };
+
+ ctrl = v4l2_ctrl_new_custom(&foo->ctrl_handler, &ctrl_filter, NULL);
+
+The last argument is the priv pointer which can be set to driver-specific
+private data.
+
+The v4l2_ctrl_config struct also has a field to set the is_private flag.
+
+If the name field is not set, then the framework will assume this is a standard
+control and will fill in the name, type and flags fields accordingly.
+
+
+Active and Grabbed Controls
+===========================
+
+If you get more complex relationships between controls, then you may have to
+activate and deactivate controls. For example, if the Chroma AGC control is
+on, then the Chroma Gain control is inactive. That is, you may set it, but
+the value will not be used by the hardware as long as the automatic gain
+control is on. Typically user interfaces can disable such input fields.
+
+You can set the 'active' status using v4l2_ctrl_activate(). By default all
+controls are active. Note that the framework does not check for this flag.
+It is meant purely for GUIs. The function is typically called from within
+s_ctrl.
+
+The other flag is the 'grabbed' flag. A grabbed control means that you cannot
+change it because it is in use by some resource. Typical examples are MPEG
+bitrate controls that cannot be changed while capturing is in progress.
+
+If a control is set to 'grabbed' using v4l2_ctrl_grab(), then the framework
+will return -EBUSY if an attempt is made to set this control. The
+v4l2_ctrl_grab() function is typically called from the driver when it
+starts or stops streaming.
+
+
+Control Clusters
+================
+
+By default all controls are independent from the others. But in more
+complex scenarios you can get dependencies from one control to another.
+In that case you need to 'cluster' them:
+
+ struct foo {
+ struct v4l2_ctrl_handler ctrl_handler;
+#define AUDIO_CL_VOLUME (0)
+#define AUDIO_CL_MUTE (1)
+ struct v4l2_ctrl *audio_cluster[2];
+ ...
+ };
+
+ state->audio_cluster[AUDIO_CL_VOLUME] =
+ v4l2_ctrl_new_std(&state->ctrl_handler, ...);
+ state->audio_cluster[AUDIO_CL_MUTE] =
+ v4l2_ctrl_new_std(&state->ctrl_handler, ...);
+ v4l2_ctrl_cluster(ARRAY_SIZE(state->audio_cluster), state->audio_cluster);
+
+From now on whenever one or more of the controls belonging to the same
+cluster is set (or 'gotten', or 'tried'), only the control ops of the first
+control ('volume' in this example) is called. You effectively create a new
+composite control. Similar to how a 'struct' works in C.
+
+So when s_ctrl is called with V4L2_CID_AUDIO_VOLUME as argument, you should set
+all two controls belonging to the audio_cluster:
+
+ static int foo_s_ctrl(struct v4l2_ctrl *ctrl)
+ {
+ struct foo *state = container_of(ctrl->handler, struct foo, ctrl_handler);
+
+ switch (ctrl->id) {
+ case V4L2_CID_AUDIO_VOLUME: {
+ struct v4l2_ctrl *mute = ctrl->cluster[AUDIO_CL_MUTE];
+
+ write_reg(0x123, mute->val ? 0 : ctrl->val);
+ break;
+ }
+ case V4L2_CID_CONTRAST:
+ write_reg(0x456, ctrl->val);
+ break;
+ }
+ return 0;
+ }
+
+In the example above the following are equivalent for the VOLUME case:
+
+ ctrl == ctrl->cluster[AUDIO_CL_VOLUME] == state->audio_cluster[AUDIO_CL_VOLUME]
+ ctrl->cluster[AUDIO_CL_MUTE] == state->audio_cluster[AUDIO_CL_MUTE]
+
+In practice using cluster arrays like this becomes very tiresome. So instead
+the following equivalent method is used:
+
+ struct {
+ /* audio cluster */
+ struct v4l2_ctrl *volume;
+ struct v4l2_ctrl *mute;
+ };
+
+The anonymous struct is used to clearly 'cluster' these two control pointers,
+but it serves no other purpose. The effect is the same as creating an
+array with two control pointers. So you can just do:
+
+ state->volume = v4l2_ctrl_new_std(&state->ctrl_handler, ...);
+ state->mute = v4l2_ctrl_new_std(&state->ctrl_handler, ...);
+ v4l2_ctrl_cluster(2, &state->volume);
+
+And in foo_s_ctrl you can use these pointers directly: state->mute->val.
+
+Note that controls in a cluster may be NULL. For example, if for some
+reason mute was never added (because the hardware doesn't support that
+particular feature), then mute will be NULL. So in that case we have a
+cluster of 2 controls, of which only 1 is actually instantiated. The
+only restriction is that the first control of the cluster must always be
+present, since that is the 'master' control of the cluster. The master
+control is the one that identifies the cluster and that provides the
+pointer to the v4l2_ctrl_ops struct that is used for that cluster.
+
+Obviously, all controls in the cluster array must be initialized to either
+a valid control or to NULL.
+
+In rare cases you might want to know which controls of a cluster actually
+were set explicitly by the user. For this you can check the 'is_new' flag of
+each control. For example, in the case of a volume/mute cluster the 'is_new'
+flag of the mute control would be set if the user called VIDIOC_S_CTRL for
+mute only. If the user would call VIDIOC_S_EXT_CTRLS for both mute and volume
+controls, then the 'is_new' flag would be 1 for both controls.
+
+The 'is_new' flag is always 1 when called from v4l2_ctrl_handler_setup().
+
+
+Handling autogain/gain-type Controls with Auto Clusters
+=======================================================
+
+A common type of control cluster is one that handles 'auto-foo/foo'-type
+controls. Typical examples are autogain/gain, autoexposure/exposure,
+autowhitebalance/red balance/blue balance. In all cases you have one control
+that determines whether another control is handled automatically by the hardware,
+or whether it is under manual control from the user.
+
+If the cluster is in automatic mode, then the manual controls should be
+marked inactive and volatile. When the volatile controls are read the
+g_volatile_ctrl operation should return the value that the hardware's automatic
+mode set up automatically.
+
+If the cluster is put in manual mode, then the manual controls should become
+active again and the volatile flag is cleared (so g_volatile_ctrl is no longer
+called while in manual mode). In addition just before switching to manual mode
+the current values as determined by the auto mode are copied as the new manual
+values.
+
+Finally the V4L2_CTRL_FLAG_UPDATE should be set for the auto control since
+changing that control affects the control flags of the manual controls.
+
+In order to simplify this a special variation of v4l2_ctrl_cluster was
+introduced:
+
+void v4l2_ctrl_auto_cluster(unsigned ncontrols, struct v4l2_ctrl **controls,
+ u8 manual_val, bool set_volatile);
+
+The first two arguments are identical to v4l2_ctrl_cluster. The third argument
+tells the framework which value switches the cluster into manual mode. The
+last argument will optionally set V4L2_CTRL_FLAG_VOLATILE for the non-auto controls.
+If it is false, then the manual controls are never volatile. You would typically
+use that if the hardware does not give you the option to read back to values as
+determined by the auto mode (e.g. if autogain is on, the hardware doesn't allow
+you to obtain the current gain value).
+
+The first control of the cluster is assumed to be the 'auto' control.
+
+Using this function will ensure that you don't need to handle all the complex
+flag and volatile handling.
+
+
+VIDIOC_LOG_STATUS Support
+=========================
+
+This ioctl allow you to dump the current status of a driver to the kernel log.
+The v4l2_ctrl_handler_log_status(ctrl_handler, prefix) can be used to dump the
+value of the controls owned by the given handler to the log. You can supply a
+prefix as well. If the prefix didn't end with a space, then ': ' will be added
+for you.
+
+
+Different Handlers for Different Video Nodes
+============================================
+
+Usually the V4L2 driver has just one control handler that is global for
+all video nodes. But you can also specify different control handlers for
+different video nodes. You can do that by manually setting the ctrl_handler
+field of struct video_device.
+
+That is no problem if there are no subdevs involved but if there are, then
+you need to block the automatic merging of subdev controls to the global
+control handler. You do that by simply setting the ctrl_handler field in
+struct v4l2_device to NULL. Now v4l2_device_register_subdev() will no longer
+merge subdev controls.
+
+After each subdev was added, you will then have to call v4l2_ctrl_add_handler
+manually to add the subdev's control handler (sd->ctrl_handler) to the desired
+control handler. This control handler may be specific to the video_device or
+for a subset of video_device's. For example: the radio device nodes only have
+audio controls, while the video and vbi device nodes share the same control
+handler for the audio and video controls.
+
+If you want to have one handler (e.g. for a radio device node) have a subset
+of another handler (e.g. for a video device node), then you should first add
+the controls to the first handler, add the other controls to the second
+handler and finally add the first handler to the second. For example:
+
+ v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_VOLUME, ...);
+ v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_MUTE, ...);
+ v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_BRIGHTNESS, ...);
+ v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_CONTRAST, ...);
+ v4l2_ctrl_add_handler(&video_ctrl_handler, &radio_ctrl_handler, NULL);
+
+The last argument to v4l2_ctrl_add_handler() is a filter function that allows
+you to filter which controls will be added. Set it to NULL if you want to add
+all controls.
+
+Or you can add specific controls to a handler:
+
+ volume = v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_AUDIO_VOLUME, ...);
+ v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_BRIGHTNESS, ...);
+ v4l2_ctrl_new_std(&video_ctrl_handler, &ops, V4L2_CID_CONTRAST, ...);
+ v4l2_ctrl_add_ctrl(&radio_ctrl_handler, volume);
+
+What you should not do is make two identical controls for two handlers.
+For example:
+
+ v4l2_ctrl_new_std(&radio_ctrl_handler, &radio_ops, V4L2_CID_AUDIO_MUTE, ...);
+ v4l2_ctrl_new_std(&video_ctrl_handler, &video_ops, V4L2_CID_AUDIO_MUTE, ...);
+
+This would be bad since muting the radio would not change the video mute
+control. The rule is to have one control for each hardware 'knob' that you
+can twiddle.
+
+
+Finding Controls
+================
+
+Normally you have created the controls yourself and you can store the struct
+v4l2_ctrl pointer into your own struct.
+
+But sometimes you need to find a control from another handler that you do
+not own. For example, if you have to find a volume control from a subdev.
+
+You can do that by calling v4l2_ctrl_find:
+
+ struct v4l2_ctrl *volume;
+
+ volume = v4l2_ctrl_find(sd->ctrl_handler, V4L2_CID_AUDIO_VOLUME);
+
+Since v4l2_ctrl_find will lock the handler you have to be careful where you
+use it. For example, this is not a good idea:
+
+ struct v4l2_ctrl_handler ctrl_handler;
+
+ v4l2_ctrl_new_std(&ctrl_handler, &video_ops, V4L2_CID_BRIGHTNESS, ...);
+ v4l2_ctrl_new_std(&ctrl_handler, &video_ops, V4L2_CID_CONTRAST, ...);
+
+...and in video_ops.s_ctrl:
+
+ case V4L2_CID_BRIGHTNESS:
+ contrast = v4l2_find_ctrl(&ctrl_handler, V4L2_CID_CONTRAST);
+ ...
+
+When s_ctrl is called by the framework the ctrl_handler.lock is already taken, so
+attempting to find another control from the same handler will deadlock.
+
+It is recommended not to use this function from inside the control ops.
+
+
+Inheriting Controls
+===================
+
+When one control handler is added to another using v4l2_ctrl_add_handler, then
+by default all controls from one are merged to the other. But a subdev might
+have low-level controls that make sense for some advanced embedded system, but
+not when it is used in consumer-level hardware. In that case you want to keep
+those low-level controls local to the subdev. You can do this by simply
+setting the 'is_private' flag of the control to 1:
+
+ static const struct v4l2_ctrl_config ctrl_private = {
+ .ops = &ctrl_custom_ops,
+ .id = V4L2_CID_...,
+ .name = "Some Private Control",
+ .type = V4L2_CTRL_TYPE_INTEGER,
+ .max = 15,
+ .step = 1,
+ .is_private = 1,
+ };
+
+ ctrl = v4l2_ctrl_new_custom(&foo->ctrl_handler, &ctrl_private, NULL);
+
+These controls will now be skipped when v4l2_ctrl_add_handler is called.
+
+
+V4L2_CTRL_TYPE_CTRL_CLASS Controls
+==================================
+
+Controls of this type can be used by GUIs to get the name of the control class.
+A fully featured GUI can make a dialog with multiple tabs with each tab
+containing the controls belonging to a particular control class. The name of
+each tab can be found by querying a special control with ID <control class | 1>.
+
+Drivers do not have to care about this. The framework will automatically add
+a control of this type whenever the first control belonging to a new control
+class is added.
+
+
+Adding Notify Callbacks
+=======================
+
+Sometimes the platform or bridge driver needs to be notified when a control
+from a sub-device driver changes. You can set a notify callback by calling
+this function:
+
+void v4l2_ctrl_notify(struct v4l2_ctrl *ctrl,
+ void (*notify)(struct v4l2_ctrl *ctrl, void *priv), void *priv);
+
+Whenever the give control changes value the notify callback will be called
+with a pointer to the control and the priv pointer that was passed with
+v4l2_ctrl_notify. Note that the control's handler lock is held when the
+notify function is called.
+
+There can be only one notify function per control handler. Any attempt
+to set another notify function will cause a WARN_ON.
diff --git a/Documentation/video4linux/v4l2-framework.txt b/Documentation/video4linux/v4l2-framework.txt
new file mode 100644
index 000000000..59e619f9b
--- /dev/null
+++ b/Documentation/video4linux/v4l2-framework.txt
@@ -0,0 +1,1156 @@
+Overview of the V4L2 driver framework
+=====================================
+
+This text documents the various structures provided by the V4L2 framework and
+their relationships.
+
+
+Introduction
+------------
+
+The V4L2 drivers tend to be very complex due to the complexity of the
+hardware: most devices have multiple ICs, export multiple device nodes in
+/dev, and create also non-V4L2 devices such as DVB, ALSA, FB, I2C and input
+(IR) devices.
+
+Especially the fact that V4L2 drivers have to setup supporting ICs to
+do audio/video muxing/encoding/decoding makes it more complex than most.
+Usually these ICs are connected to the main bridge driver through one or
+more I2C busses, but other busses can also be used. Such devices are
+called 'sub-devices'.
+
+For a long time the framework was limited to the video_device struct for
+creating V4L device nodes and video_buf for handling the video buffers
+(note that this document does not discuss the video_buf framework).
+
+This meant that all drivers had to do the setup of device instances and
+connecting to sub-devices themselves. Some of this is quite complicated
+to do right and many drivers never did do it correctly.
+
+There is also a lot of common code that could never be refactored due to
+the lack of a framework.
+
+So this framework sets up the basic building blocks that all drivers
+need and this same framework should make it much easier to refactor
+common code into utility functions shared by all drivers.
+
+A good example to look at as a reference is the v4l2-pci-skeleton.c
+source that is available in this directory. It is a skeleton driver for
+a PCI capture card, and demonstrates how to use the V4L2 driver
+framework. It can be used as a template for real PCI video capture driver.
+
+Structure of a driver
+---------------------
+
+All drivers have the following structure:
+
+1) A struct for each device instance containing the device state.
+
+2) A way of initializing and commanding sub-devices (if any).
+
+3) Creating V4L2 device nodes (/dev/videoX, /dev/vbiX and /dev/radioX)
+ and keeping track of device-node specific data.
+
+4) Filehandle-specific structs containing per-filehandle data;
+
+5) video buffer handling.
+
+This is a rough schematic of how it all relates:
+
+ device instances
+ |
+ +-sub-device instances
+ |
+ \-V4L2 device nodes
+ |
+ \-filehandle instances
+
+
+Structure of the framework
+--------------------------
+
+The framework closely resembles the driver structure: it has a v4l2_device
+struct for the device instance data, a v4l2_subdev struct to refer to
+sub-device instances, the video_device struct stores V4L2 device node data
+and the v4l2_fh struct keeps track of filehandle instances.
+
+The V4L2 framework also optionally integrates with the media framework. If a
+driver sets the struct v4l2_device mdev field, sub-devices and video nodes
+will automatically appear in the media framework as entities.
+
+
+struct v4l2_device
+------------------
+
+Each device instance is represented by a struct v4l2_device (v4l2-device.h).
+Very simple devices can just allocate this struct, but most of the time you
+would embed this struct inside a larger struct.
+
+You must register the device instance:
+
+ v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev);
+
+Registration will initialize the v4l2_device struct. If the dev->driver_data
+field is NULL, it will be linked to v4l2_dev.
+
+Drivers that want integration with the media device framework need to set
+dev->driver_data manually to point to the driver-specific device structure
+that embed the struct v4l2_device instance. This is achieved by a
+dev_set_drvdata() call before registering the V4L2 device instance. They must
+also set the struct v4l2_device mdev field to point to a properly initialized
+and registered media_device instance.
+
+If v4l2_dev->name is empty then it will be set to a value derived from dev
+(driver name followed by the bus_id, to be precise). If you set it up before
+calling v4l2_device_register then it will be untouched. If dev is NULL, then
+you *must* setup v4l2_dev->name before calling v4l2_device_register.
+
+You can use v4l2_device_set_name() to set the name based on a driver name and
+a driver-global atomic_t instance. This will generate names like ivtv0, ivtv1,
+etc. If the name ends with a digit, then it will insert a dash: cx18-0,
+cx18-1, etc. This function returns the instance number.
+
+The first 'dev' argument is normally the struct device pointer of a pci_dev,
+usb_interface or platform_device. It is rare for dev to be NULL, but it happens
+with ISA devices or when one device creates multiple PCI devices, thus making
+it impossible to associate v4l2_dev with a particular parent.
+
+You can also supply a notify() callback that can be called by sub-devices to
+notify you of events. Whether you need to set this depends on the sub-device.
+Any notifications a sub-device supports must be defined in a header in
+include/media/<subdevice>.h.
+
+You unregister with:
+
+ v4l2_device_unregister(struct v4l2_device *v4l2_dev);
+
+If the dev->driver_data field points to v4l2_dev, it will be reset to NULL.
+Unregistering will also automatically unregister all subdevs from the device.
+
+If you have a hotpluggable device (e.g. a USB device), then when a disconnect
+happens the parent device becomes invalid. Since v4l2_device has a pointer to
+that parent device it has to be cleared as well to mark that the parent is
+gone. To do this call:
+
+ v4l2_device_disconnect(struct v4l2_device *v4l2_dev);
+
+This does *not* unregister the subdevs, so you still need to call the
+v4l2_device_unregister() function for that. If your driver is not hotpluggable,
+then there is no need to call v4l2_device_disconnect().
+
+Sometimes you need to iterate over all devices registered by a specific
+driver. This is usually the case if multiple device drivers use the same
+hardware. E.g. the ivtvfb driver is a framebuffer driver that uses the ivtv
+hardware. The same is true for alsa drivers for example.
+
+You can iterate over all registered devices as follows:
+
+static int callback(struct device *dev, void *p)
+{
+ struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
+
+ /* test if this device was inited */
+ if (v4l2_dev == NULL)
+ return 0;
+ ...
+ return 0;
+}
+
+int iterate(void *p)
+{
+ struct device_driver *drv;
+ int err;
+
+ /* Find driver 'ivtv' on the PCI bus.
+ pci_bus_type is a global. For USB busses use usb_bus_type. */
+ drv = driver_find("ivtv", &pci_bus_type);
+ /* iterate over all ivtv device instances */
+ err = driver_for_each_device(drv, NULL, p, callback);
+ put_driver(drv);
+ return err;
+}
+
+Sometimes you need to keep a running counter of the device instance. This is
+commonly used to map a device instance to an index of a module option array.
+
+The recommended approach is as follows:
+
+static atomic_t drv_instance = ATOMIC_INIT(0);
+
+static int drv_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
+{
+ ...
+ state->instance = atomic_inc_return(&drv_instance) - 1;
+}
+
+If you have multiple device nodes then it can be difficult to know when it is
+safe to unregister v4l2_device for hotpluggable devices. For this purpose
+v4l2_device has refcounting support. The refcount is increased whenever
+video_register_device is called and it is decreased whenever that device node
+is released. When the refcount reaches zero, then the v4l2_device release()
+callback is called. You can do your final cleanup there.
+
+If other device nodes (e.g. ALSA) are created, then you can increase and
+decrease the refcount manually as well by calling:
+
+void v4l2_device_get(struct v4l2_device *v4l2_dev);
+
+or:
+
+int v4l2_device_put(struct v4l2_device *v4l2_dev);
+
+Since the initial refcount is 1 you also need to call v4l2_device_put in the
+disconnect() callback (for USB devices) or in the remove() callback (for e.g.
+PCI devices), otherwise the refcount will never reach 0.
+
+struct v4l2_subdev
+------------------
+
+Many drivers need to communicate with sub-devices. These devices can do all
+sort of tasks, but most commonly they handle audio and/or video muxing,
+encoding or decoding. For webcams common sub-devices are sensors and camera
+controllers.
+
+Usually these are I2C devices, but not necessarily. In order to provide the
+driver with a consistent interface to these sub-devices the v4l2_subdev struct
+(v4l2-subdev.h) was created.
+
+Each sub-device driver must have a v4l2_subdev struct. This struct can be
+stand-alone for simple sub-devices or it might be embedded in a larger struct
+if more state information needs to be stored. Usually there is a low-level
+device struct (e.g. i2c_client) that contains the device data as setup
+by the kernel. It is recommended to store that pointer in the private
+data of v4l2_subdev using v4l2_set_subdevdata(). That makes it easy to go
+from a v4l2_subdev to the actual low-level bus-specific device data.
+
+You also need a way to go from the low-level struct to v4l2_subdev. For the
+common i2c_client struct the i2c_set_clientdata() call is used to store a
+v4l2_subdev pointer, for other busses you may have to use other methods.
+
+Bridges might also need to store per-subdev private data, such as a pointer to
+bridge-specific per-subdev private data. The v4l2_subdev structure provides
+host private data for that purpose that can be accessed with
+v4l2_get_subdev_hostdata() and v4l2_set_subdev_hostdata().
+
+From the bridge driver perspective you load the sub-device module and somehow
+obtain the v4l2_subdev pointer. For i2c devices this is easy: you call
+i2c_get_clientdata(). For other busses something similar needs to be done.
+Helper functions exists for sub-devices on an I2C bus that do most of this
+tricky work for you.
+
+Each v4l2_subdev contains function pointers that sub-device drivers can
+implement (or leave NULL if it is not applicable). Since sub-devices can do
+so many different things and you do not want to end up with a huge ops struct
+of which only a handful of ops are commonly implemented, the function pointers
+are sorted according to category and each category has its own ops struct.
+
+The top-level ops struct contains pointers to the category ops structs, which
+may be NULL if the subdev driver does not support anything from that category.
+
+It looks like this:
+
+struct v4l2_subdev_core_ops {
+ int (*log_status)(struct v4l2_subdev *sd);
+ int (*init)(struct v4l2_subdev *sd, u32 val);
+ ...
+};
+
+struct v4l2_subdev_tuner_ops {
+ ...
+};
+
+struct v4l2_subdev_audio_ops {
+ ...
+};
+
+struct v4l2_subdev_video_ops {
+ ...
+};
+
+struct v4l2_subdev_pad_ops {
+ ...
+};
+
+struct v4l2_subdev_ops {
+ const struct v4l2_subdev_core_ops *core;
+ const struct v4l2_subdev_tuner_ops *tuner;
+ const struct v4l2_subdev_audio_ops *audio;
+ const struct v4l2_subdev_video_ops *video;
+ const struct v4l2_subdev_pad_ops *video;
+};
+
+The core ops are common to all subdevs, the other categories are implemented
+depending on the sub-device. E.g. a video device is unlikely to support the
+audio ops and vice versa.
+
+This setup limits the number of function pointers while still making it easy
+to add new ops and categories.
+
+A sub-device driver initializes the v4l2_subdev struct using:
+
+ v4l2_subdev_init(sd, &ops);
+
+Afterwards you need to initialize subdev->name with a unique name and set the
+module owner. This is done for you if you use the i2c helper functions.
+
+If integration with the media framework is needed, you must initialize the
+media_entity struct embedded in the v4l2_subdev struct (entity field) by
+calling media_entity_init():
+
+ struct media_pad *pads = &my_sd->pads;
+ int err;
+
+ err = media_entity_init(&sd->entity, npads, pads, 0);
+
+The pads array must have been previously initialized. There is no need to
+manually set the struct media_entity type and name fields, but the revision
+field must be initialized if needed.
+
+A reference to the entity will be automatically acquired/released when the
+subdev device node (if any) is opened/closed.
+
+Don't forget to cleanup the media entity before the sub-device is destroyed:
+
+ media_entity_cleanup(&sd->entity);
+
+If the subdev driver intends to process video and integrate with the media
+framework, it must implement format related functionality using
+v4l2_subdev_pad_ops instead of v4l2_subdev_video_ops.
+
+In that case, the subdev driver may set the link_validate field to provide
+its own link validation function. The link validation function is called for
+every link in the pipeline where both of the ends of the links are V4L2
+sub-devices. The driver is still responsible for validating the correctness
+of the format configuration between sub-devices and video nodes.
+
+If link_validate op is not set, the default function
+v4l2_subdev_link_validate_default() is used instead. This function ensures
+that width, height and the media bus pixel code are equal on both source and
+sink of the link. Subdev drivers are also free to use this function to
+perform the checks mentioned above in addition to their own checks.
+
+There are currently two ways to register subdevices with the V4L2 core. The
+first (traditional) possibility is to have subdevices registered by bridge
+drivers. This can be done when the bridge driver has the complete information
+about subdevices connected to it and knows exactly when to register them. This
+is typically the case for internal subdevices, like video data processing units
+within SoCs or complex PCI(e) boards, camera sensors in USB cameras or connected
+to SoCs, which pass information about them to bridge drivers, usually in their
+platform data.
+
+There are however also situations where subdevices have to be registered
+asynchronously to bridge devices. An example of such a configuration is a Device
+Tree based system where information about subdevices is made available to the
+system independently from the bridge devices, e.g. when subdevices are defined
+in DT as I2C device nodes. The API used in this second case is described further
+below.
+
+Using one or the other registration method only affects the probing process, the
+run-time bridge-subdevice interaction is in both cases the same.
+
+In the synchronous case a device (bridge) driver needs to register the
+v4l2_subdev with the v4l2_device:
+
+ int err = v4l2_device_register_subdev(v4l2_dev, sd);
+
+This can fail if the subdev module disappeared before it could be registered.
+After this function was called successfully the subdev->dev field points to
+the v4l2_device.
+
+If the v4l2_device parent device has a non-NULL mdev field, the sub-device
+entity will be automatically registered with the media device.
+
+You can unregister a sub-device using:
+
+ v4l2_device_unregister_subdev(sd);
+
+Afterwards the subdev module can be unloaded and sd->dev == NULL.
+
+You can call an ops function either directly:
+
+ err = sd->ops->core->g_std(sd, &norm);
+
+but it is better and easier to use this macro:
+
+ err = v4l2_subdev_call(sd, core, g_std, &norm);
+
+The macro will to the right NULL pointer checks and returns -ENODEV if subdev
+is NULL, -ENOIOCTLCMD if either subdev->core or subdev->core->g_std is
+NULL, or the actual result of the subdev->ops->core->g_std ops.
+
+It is also possible to call all or a subset of the sub-devices:
+
+ v4l2_device_call_all(v4l2_dev, 0, core, g_std, &norm);
+
+Any subdev that does not support this ops is skipped and error results are
+ignored. If you want to check for errors use this:
+
+ err = v4l2_device_call_until_err(v4l2_dev, 0, core, g_std, &norm);
+
+Any error except -ENOIOCTLCMD will exit the loop with that error. If no
+errors (except -ENOIOCTLCMD) occurred, then 0 is returned.
+
+The second argument to both calls is a group ID. If 0, then all subdevs are
+called. If non-zero, then only those whose group ID match that value will
+be called. Before a bridge driver registers a subdev it can set sd->grp_id
+to whatever value it wants (it's 0 by default). This value is owned by the
+bridge driver and the sub-device driver will never modify or use it.
+
+The group ID gives the bridge driver more control how callbacks are called.
+For example, there may be multiple audio chips on a board, each capable of
+changing the volume. But usually only one will actually be used when the
+user want to change the volume. You can set the group ID for that subdev to
+e.g. AUDIO_CONTROLLER and specify that as the group ID value when calling
+v4l2_device_call_all(). That ensures that it will only go to the subdev
+that needs it.
+
+If the sub-device needs to notify its v4l2_device parent of an event, then
+it can call v4l2_subdev_notify(sd, notification, arg). This macro checks
+whether there is a notify() callback defined and returns -ENODEV if not.
+Otherwise the result of the notify() call is returned.
+
+The advantage of using v4l2_subdev is that it is a generic struct and does
+not contain any knowledge about the underlying hardware. So a driver might
+contain several subdevs that use an I2C bus, but also a subdev that is
+controlled through GPIO pins. This distinction is only relevant when setting
+up the device, but once the subdev is registered it is completely transparent.
+
+
+In the asynchronous case subdevice probing can be invoked independently of the
+bridge driver availability. The subdevice driver then has to verify whether all
+the requirements for a successful probing are satisfied. This can include a
+check for a master clock availability. If any of the conditions aren't satisfied
+the driver might decide to return -EPROBE_DEFER to request further reprobing
+attempts. Once all conditions are met the subdevice shall be registered using
+the v4l2_async_register_subdev() function. Unregistration is performed using
+the v4l2_async_unregister_subdev() call. Subdevices registered this way are
+stored in a global list of subdevices, ready to be picked up by bridge drivers.
+
+Bridge drivers in turn have to register a notifier object with an array of
+subdevice descriptors that the bridge device needs for its operation. This is
+performed using the v4l2_async_notifier_register() call. To unregister the
+notifier the driver has to call v4l2_async_notifier_unregister(). The former of
+the two functions takes two arguments: a pointer to struct v4l2_device and a
+pointer to struct v4l2_async_notifier. The latter contains a pointer to an array
+of pointers to subdevice descriptors of type struct v4l2_async_subdev type. The
+V4L2 core will then use these descriptors to match asynchronously registered
+subdevices to them. If a match is detected the .bound() notifier callback is
+called. After all subdevices have been located the .complete() callback is
+called. When a subdevice is removed from the system the .unbind() method is
+called. All three callbacks are optional.
+
+
+V4L2 sub-device userspace API
+-----------------------------
+
+Beside exposing a kernel API through the v4l2_subdev_ops structure, V4L2
+sub-devices can also be controlled directly by userspace applications.
+
+Device nodes named v4l-subdevX can be created in /dev to access sub-devices
+directly. If a sub-device supports direct userspace configuration it must set
+the V4L2_SUBDEV_FL_HAS_DEVNODE flag before being registered.
+
+After registering sub-devices, the v4l2_device driver can create device nodes
+for all registered sub-devices marked with V4L2_SUBDEV_FL_HAS_DEVNODE by calling
+v4l2_device_register_subdev_nodes(). Those device nodes will be automatically
+removed when sub-devices are unregistered.
+
+The device node handles a subset of the V4L2 API.
+
+VIDIOC_QUERYCTRL
+VIDIOC_QUERYMENU
+VIDIOC_G_CTRL
+VIDIOC_S_CTRL
+VIDIOC_G_EXT_CTRLS
+VIDIOC_S_EXT_CTRLS
+VIDIOC_TRY_EXT_CTRLS
+
+ The controls ioctls are identical to the ones defined in V4L2. They
+ behave identically, with the only exception that they deal only with
+ controls implemented in the sub-device. Depending on the driver, those
+ controls can be also be accessed through one (or several) V4L2 device
+ nodes.
+
+VIDIOC_DQEVENT
+VIDIOC_SUBSCRIBE_EVENT
+VIDIOC_UNSUBSCRIBE_EVENT
+
+ The events ioctls are identical to the ones defined in V4L2. They
+ behave identically, with the only exception that they deal only with
+ events generated by the sub-device. Depending on the driver, those
+ events can also be reported by one (or several) V4L2 device nodes.
+
+ Sub-device drivers that want to use events need to set the
+ V4L2_SUBDEV_USES_EVENTS v4l2_subdev::flags and initialize
+ v4l2_subdev::nevents to events queue depth before registering the
+ sub-device. After registration events can be queued as usual on the
+ v4l2_subdev::devnode device node.
+
+ To properly support events, the poll() file operation is also
+ implemented.
+
+Private ioctls
+
+ All ioctls not in the above list are passed directly to the sub-device
+ driver through the core::ioctl operation.
+
+
+I2C sub-device drivers
+----------------------
+
+Since these drivers are so common, special helper functions are available to
+ease the use of these drivers (v4l2-common.h).
+
+The recommended method of adding v4l2_subdev support to an I2C driver is to
+embed the v4l2_subdev struct into the state struct that is created for each
+I2C device instance. Very simple devices have no state struct and in that case
+you can just create a v4l2_subdev directly.
+
+A typical state struct would look like this (where 'chipname' is replaced by
+the name of the chip):
+
+struct chipname_state {
+ struct v4l2_subdev sd;
+ ... /* additional state fields */
+};
+
+Initialize the v4l2_subdev struct as follows:
+
+ v4l2_i2c_subdev_init(&state->sd, client, subdev_ops);
+
+This function will fill in all the fields of v4l2_subdev and ensure that the
+v4l2_subdev and i2c_client both point to one another.
+
+You should also add a helper inline function to go from a v4l2_subdev pointer
+to a chipname_state struct:
+
+static inline struct chipname_state *to_state(struct v4l2_subdev *sd)
+{
+ return container_of(sd, struct chipname_state, sd);
+}
+
+Use this to go from the v4l2_subdev struct to the i2c_client struct:
+
+ struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+And this to go from an i2c_client to a v4l2_subdev struct:
+
+ struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+Make sure to call v4l2_device_unregister_subdev(sd) when the remove() callback
+is called. This will unregister the sub-device from the bridge driver. It is
+safe to call this even if the sub-device was never registered.
+
+You need to do this because when the bridge driver destroys the i2c adapter
+the remove() callbacks are called of the i2c devices on that adapter.
+After that the corresponding v4l2_subdev structures are invalid, so they
+have to be unregistered first. Calling v4l2_device_unregister_subdev(sd)
+from the remove() callback ensures that this is always done correctly.
+
+
+The bridge driver also has some helper functions it can use:
+
+struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
+ "module_foo", "chipid", 0x36, NULL);
+
+This loads the given module (can be NULL if no module needs to be loaded) and
+calls i2c_new_device() with the given i2c_adapter and chip/address arguments.
+If all goes well, then it registers the subdev with the v4l2_device.
+
+You can also use the last argument of v4l2_i2c_new_subdev() to pass an array
+of possible I2C addresses that it should probe. These probe addresses are
+only used if the previous argument is 0. A non-zero argument means that you
+know the exact i2c address so in that case no probing will take place.
+
+Both functions return NULL if something went wrong.
+
+Note that the chipid you pass to v4l2_i2c_new_subdev() is usually
+the same as the module name. It allows you to specify a chip variant, e.g.
+"saa7114" or "saa7115". In general though the i2c driver autodetects this.
+The use of chipid is something that needs to be looked at more closely at a
+later date. It differs between i2c drivers and as such can be confusing.
+To see which chip variants are supported you can look in the i2c driver code
+for the i2c_device_id table. This lists all the possibilities.
+
+There are two more helper functions:
+
+v4l2_i2c_new_subdev_cfg: this function adds new irq and platform_data
+arguments and has both 'addr' and 'probed_addrs' arguments: if addr is not
+0 then that will be used (non-probing variant), otherwise the probed_addrs
+are probed.
+
+For example: this will probe for address 0x10:
+
+struct v4l2_subdev *sd = v4l2_i2c_new_subdev_cfg(v4l2_dev, adapter,
+ "module_foo", "chipid", 0, NULL, 0, I2C_ADDRS(0x10));
+
+v4l2_i2c_new_subdev_board uses an i2c_board_info struct which is passed
+to the i2c driver and replaces the irq, platform_data and addr arguments.
+
+If the subdev supports the s_config core ops, then that op is called with
+the irq and platform_data arguments after the subdev was setup. The older
+v4l2_i2c_new_(probed_)subdev functions will call s_config as well, but with
+irq set to 0 and platform_data set to NULL.
+
+struct video_device
+-------------------
+
+The actual device nodes in the /dev directory are created using the
+video_device struct (v4l2-dev.h). This struct can either be allocated
+dynamically or embedded in a larger struct.
+
+To allocate it dynamically use:
+
+ struct video_device *vdev = video_device_alloc();
+
+ if (vdev == NULL)
+ return -ENOMEM;
+
+ vdev->release = video_device_release;
+
+If you embed it in a larger struct, then you must set the release()
+callback to your own function:
+
+ struct video_device *vdev = &my_vdev->vdev;
+
+ vdev->release = my_vdev_release;
+
+The release callback must be set and it is called when the last user
+of the video device exits.
+
+The default video_device_release() callback just calls kfree to free the
+allocated memory.
+
+There is also a video_device_release_empty() function that does nothing
+(is empty) and can be used if the struct is embedded and there is nothing
+to do when it is released.
+
+You should also set these fields:
+
+- v4l2_dev: must be set to the v4l2_device parent device.
+
+- name: set to something descriptive and unique.
+
+- vfl_dir: set this to VFL_DIR_RX for capture devices (VFL_DIR_RX has value 0,
+ so this is normally already the default), set to VFL_DIR_TX for output
+ devices and VFL_DIR_M2M for mem2mem (codec) devices.
+
+- fops: set to the v4l2_file_operations struct.
+
+- ioctl_ops: if you use the v4l2_ioctl_ops to simplify ioctl maintenance
+ (highly recommended to use this and it might become compulsory in the
+ future!), then set this to your v4l2_ioctl_ops struct. The vfl_type and
+ vfl_dir fields are used to disable ops that do not match the type/dir
+ combination. E.g. VBI ops are disabled for non-VBI nodes, and output ops
+ are disabled for a capture device. This makes it possible to provide
+ just one v4l2_ioctl_ops struct for both vbi and video nodes.
+
+- lock: leave to NULL if you want to do all the locking in the driver.
+ Otherwise you give it a pointer to a struct mutex_lock and before the
+ unlocked_ioctl file operation is called this lock will be taken by the
+ core and released afterwards. See the next section for more details.
+
+- queue: a pointer to the struct vb2_queue associated with this device node.
+ If queue is non-NULL, and queue->lock is non-NULL, then queue->lock is
+ used for the queuing ioctls (VIDIOC_REQBUFS, CREATE_BUFS, QBUF, DQBUF,
+ QUERYBUF, PREPARE_BUF, STREAMON and STREAMOFF) instead of the lock above.
+ That way the vb2 queuing framework does not have to wait for other ioctls.
+ This queue pointer is also used by the vb2 helper functions to check for
+ queuing ownership (i.e. is the filehandle calling it allowed to do the
+ operation).
+
+- prio: keeps track of the priorities. Used to implement VIDIOC_G/S_PRIORITY.
+ If left to NULL, then it will use the struct v4l2_prio_state in v4l2_device.
+ If you want to have a separate priority state per (group of) device node(s),
+ then you can point it to your own struct v4l2_prio_state.
+
+- dev_parent: you only set this if v4l2_device was registered with NULL as
+ the parent device struct. This only happens in cases where one hardware
+ device has multiple PCI devices that all share the same v4l2_device core.
+
+ The cx88 driver is an example of this: one core v4l2_device struct, but
+ it is used by both a raw video PCI device (cx8800) and a MPEG PCI device
+ (cx8802). Since the v4l2_device cannot be associated with two PCI devices
+ at the same time it is setup without a parent device. But when the struct
+ video_device is initialized you *do* know which parent PCI device to use and
+ so you set dev_device to the correct PCI device.
+
+If you use v4l2_ioctl_ops, then you should set .unlocked_ioctl to video_ioctl2
+in your v4l2_file_operations struct.
+
+Do not use .ioctl! This is deprecated and will go away in the future.
+
+In some cases you want to tell the core that a function you had specified in
+your v4l2_ioctl_ops should be ignored. You can mark such ioctls by calling this
+function before video_device_register is called:
+
+void v4l2_disable_ioctl(struct video_device *vdev, unsigned int cmd);
+
+This tends to be needed if based on external factors (e.g. which card is
+being used) you want to turns off certain features in v4l2_ioctl_ops without
+having to make a new struct.
+
+The v4l2_file_operations struct is a subset of file_operations. The main
+difference is that the inode argument is omitted since it is never used.
+
+If integration with the media framework is needed, you must initialize the
+media_entity struct embedded in the video_device struct (entity field) by
+calling media_entity_init():
+
+ struct media_pad *pad = &my_vdev->pad;
+ int err;
+
+ err = media_entity_init(&vdev->entity, 1, pad, 0);
+
+The pads array must have been previously initialized. There is no need to
+manually set the struct media_entity type and name fields.
+
+A reference to the entity will be automatically acquired/released when the
+video device is opened/closed.
+
+ioctls and locking
+------------------
+
+The V4L core provides optional locking services. The main service is the
+lock field in struct video_device, which is a pointer to a mutex. If you set
+this pointer, then that will be used by unlocked_ioctl to serialize all ioctls.
+
+If you are using the videobuf2 framework, then there is a second lock that you
+can set: video_device->queue->lock. If set, then this lock will be used instead
+of video_device->lock to serialize all queuing ioctls (see the previous section
+for the full list of those ioctls).
+
+The advantage of using a different lock for the queuing ioctls is that for some
+drivers (particularly USB drivers) certain commands such as setting controls
+can take a long time, so you want to use a separate lock for the buffer queuing
+ioctls. That way your VIDIOC_DQBUF doesn't stall because the driver is busy
+changing the e.g. exposure of the webcam.
+
+Of course, you can always do all the locking yourself by leaving both lock
+pointers at NULL.
+
+If you use the old videobuf then you must pass the video_device lock to the
+videobuf queue initialize function: if videobuf has to wait for a frame to
+arrive, then it will temporarily unlock the lock and relock it afterwards. If
+your driver also waits in the code, then you should do the same to allow other
+processes to access the device node while the first process is waiting for
+something.
+
+In the case of videobuf2 you will need to implement the wait_prepare and
+wait_finish callbacks to unlock/lock if applicable. If you use the queue->lock
+pointer, then you can use the helper functions vb2_ops_wait_prepare/finish.
+
+The implementation of a hotplug disconnect should also take the lock from
+video_device before calling v4l2_device_disconnect. If you are also using
+video_device->queue->lock, then you have to first lock video_device->queue->lock
+followed by video_device->lock. That way you can be sure no ioctl is running
+when you call v4l2_device_disconnect.
+
+video_device registration
+-------------------------
+
+Next you register the video device: this will create the character device
+for you.
+
+ err = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
+ if (err) {
+ video_device_release(vdev); /* or kfree(my_vdev); */
+ return err;
+ }
+
+If the v4l2_device parent device has a non-NULL mdev field, the video device
+entity will be automatically registered with the media device.
+
+Which device is registered depends on the type argument. The following
+types exist:
+
+VFL_TYPE_GRABBER: videoX for video input/output devices
+VFL_TYPE_VBI: vbiX for vertical blank data (i.e. closed captions, teletext)
+VFL_TYPE_RADIO: radioX for radio tuners
+VFL_TYPE_SDR: swradioX for Software Defined Radio tuners
+
+The last argument gives you a certain amount of control over the device
+device node number used (i.e. the X in videoX). Normally you will pass -1
+to let the v4l2 framework pick the first free number. But sometimes users
+want to select a specific node number. It is common that drivers allow
+the user to select a specific device node number through a driver module
+option. That number is then passed to this function and video_register_device
+will attempt to select that device node number. If that number was already
+in use, then the next free device node number will be selected and it
+will send a warning to the kernel log.
+
+Another use-case is if a driver creates many devices. In that case it can
+be useful to place different video devices in separate ranges. For example,
+video capture devices start at 0, video output devices start at 16.
+So you can use the last argument to specify a minimum device node number
+and the v4l2 framework will try to pick the first free number that is equal
+or higher to what you passed. If that fails, then it will just pick the
+first free number.
+
+Since in this case you do not care about a warning about not being able
+to select the specified device node number, you can call the function
+video_register_device_no_warn() instead.
+
+Whenever a device node is created some attributes are also created for you.
+If you look in /sys/class/video4linux you see the devices. Go into e.g.
+video0 and you will see 'name', 'dev_debug' and 'index' attributes. The 'name'
+attribute is the 'name' field of the video_device struct. The 'dev_debug' attribute
+can be used to enable core debugging. See the next section for more detailed
+information on this.
+
+The 'index' attribute is the index of the device node: for each call to
+video_register_device() the index is just increased by 1. The first video
+device node you register always starts with index 0.
+
+Users can setup udev rules that utilize the index attribute to make fancy
+device names (e.g. 'mpegX' for MPEG video capture device nodes).
+
+After the device was successfully registered, then you can use these fields:
+
+- vfl_type: the device type passed to video_register_device.
+- minor: the assigned device minor number.
+- num: the device node number (i.e. the X in videoX).
+- index: the device index number.
+
+If the registration failed, then you need to call video_device_release()
+to free the allocated video_device struct, or free your own struct if the
+video_device was embedded in it. The vdev->release() callback will never
+be called if the registration failed, nor should you ever attempt to
+unregister the device if the registration failed.
+
+video device debugging
+----------------------
+
+The 'dev_debug' attribute that is created for each video, vbi, radio or swradio
+device in /sys/class/video4linux/<devX>/ allows you to enable logging of
+file operations.
+
+It is a bitmask and the following bits can be set:
+
+0x01: Log the ioctl name and error code. VIDIOC_(D)QBUF ioctls are only logged
+ if bit 0x08 is also set.
+0x02: Log the ioctl name arguments and error code. VIDIOC_(D)QBUF ioctls are
+ only logged if bit 0x08 is also set.
+0x04: Log the file operations open, release, read, write, mmap and
+ get_unmapped_area. The read and write operations are only logged if
+ bit 0x08 is also set.
+0x08: Log the read and write file operations and the VIDIOC_QBUF and
+ VIDIOC_DQBUF ioctls.
+0x10: Log the poll file operation.
+
+video_device cleanup
+--------------------
+
+When the video device nodes have to be removed, either during the unload
+of the driver or because the USB device was disconnected, then you should
+unregister them:
+
+ video_unregister_device(vdev);
+
+This will remove the device nodes from sysfs (causing udev to remove them
+from /dev).
+
+After video_unregister_device() returns no new opens can be done. However,
+in the case of USB devices some application might still have one of these
+device nodes open. So after the unregister all file operations (except
+release, of course) will return an error as well.
+
+When the last user of the video device node exits, then the vdev->release()
+callback is called and you can do the final cleanup there.
+
+Don't forget to cleanup the media entity associated with the video device if
+it has been initialized:
+
+ media_entity_cleanup(&vdev->entity);
+
+This can be done from the release callback.
+
+
+video_device helper functions
+-----------------------------
+
+There are a few useful helper functions:
+
+- file/video_device private data
+
+You can set/get driver private data in the video_device struct using:
+
+void *video_get_drvdata(struct video_device *vdev);
+void video_set_drvdata(struct video_device *vdev, void *data);
+
+Note that you can safely call video_set_drvdata() before calling
+video_register_device().
+
+And this function:
+
+struct video_device *video_devdata(struct file *file);
+
+returns the video_device belonging to the file struct.
+
+The video_drvdata function combines video_get_drvdata with video_devdata:
+
+void *video_drvdata(struct file *file);
+
+You can go from a video_device struct to the v4l2_device struct using:
+
+struct v4l2_device *v4l2_dev = vdev->v4l2_dev;
+
+- Device node name
+
+The video_device node kernel name can be retrieved using
+
+const char *video_device_node_name(struct video_device *vdev);
+
+The name is used as a hint by userspace tools such as udev. The function
+should be used where possible instead of accessing the video_device::num and
+video_device::minor fields.
+
+
+video buffer helper functions
+-----------------------------
+
+The v4l2 core API provides a set of standard methods (called "videobuf")
+for dealing with video buffers. Those methods allow a driver to implement
+read(), mmap() and overlay() in a consistent way. There are currently
+methods for using video buffers on devices that supports DMA with
+scatter/gather method (videobuf-dma-sg), DMA with linear access
+(videobuf-dma-contig), and vmalloced buffers, mostly used on USB drivers
+(videobuf-vmalloc).
+
+Please see Documentation/video4linux/videobuf for more information on how
+to use the videobuf layer.
+
+struct v4l2_fh
+--------------
+
+struct v4l2_fh provides a way to easily keep file handle specific data
+that is used by the V4L2 framework. New drivers must use struct v4l2_fh
+since it is also used to implement priority handling (VIDIOC_G/S_PRIORITY).
+
+The users of v4l2_fh (in the V4L2 framework, not the driver) know
+whether a driver uses v4l2_fh as its file->private_data pointer by
+testing the V4L2_FL_USES_V4L2_FH bit in video_device->flags. This bit is
+set whenever v4l2_fh_init() is called.
+
+struct v4l2_fh is allocated as a part of the driver's own file handle
+structure and file->private_data is set to it in the driver's open
+function by the driver.
+
+In many cases the struct v4l2_fh will be embedded in a larger structure.
+In that case you should call v4l2_fh_init+v4l2_fh_add in open() and
+v4l2_fh_del+v4l2_fh_exit in release().
+
+Drivers can extract their own file handle structure by using the container_of
+macro. Example:
+
+struct my_fh {
+ int blah;
+ struct v4l2_fh fh;
+};
+
+...
+
+int my_open(struct file *file)
+{
+ struct my_fh *my_fh;
+ struct video_device *vfd;
+ int ret;
+
+ ...
+
+ my_fh = kzalloc(sizeof(*my_fh), GFP_KERNEL);
+
+ ...
+
+ v4l2_fh_init(&my_fh->fh, vfd);
+
+ ...
+
+ file->private_data = &my_fh->fh;
+ v4l2_fh_add(&my_fh->fh);
+ return 0;
+}
+
+int my_release(struct file *file)
+{
+ struct v4l2_fh *fh = file->private_data;
+ struct my_fh *my_fh = container_of(fh, struct my_fh, fh);
+
+ ...
+ v4l2_fh_del(&my_fh->fh);
+ v4l2_fh_exit(&my_fh->fh);
+ kfree(my_fh);
+ return 0;
+}
+
+Below is a short description of the v4l2_fh functions used:
+
+void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev)
+
+ Initialise the file handle. This *MUST* be performed in the driver's
+ v4l2_file_operations->open() handler.
+
+void v4l2_fh_add(struct v4l2_fh *fh)
+
+ Add a v4l2_fh to video_device file handle list. Must be called once the
+ file handle is completely initialized.
+
+void v4l2_fh_del(struct v4l2_fh *fh)
+
+ Unassociate the file handle from video_device(). The file handle
+ exit function may now be called.
+
+void v4l2_fh_exit(struct v4l2_fh *fh)
+
+ Uninitialise the file handle. After uninitialisation the v4l2_fh
+ memory can be freed.
+
+
+If struct v4l2_fh is not embedded, then you can use these helper functions:
+
+int v4l2_fh_open(struct file *filp)
+
+ This allocates a struct v4l2_fh, initializes it and adds it to the struct
+ video_device associated with the file struct.
+
+int v4l2_fh_release(struct file *filp)
+
+ This deletes it from the struct video_device associated with the file
+ struct, uninitialised the v4l2_fh and frees it.
+
+These two functions can be plugged into the v4l2_file_operation's open() and
+release() ops.
+
+
+Several drivers need to do something when the first file handle is opened and
+when the last file handle closes. Two helper functions were added to check
+whether the v4l2_fh struct is the only open filehandle of the associated
+device node:
+
+int v4l2_fh_is_singular(struct v4l2_fh *fh)
+
+ Returns 1 if the file handle is the only open file handle, else 0.
+
+int v4l2_fh_is_singular_file(struct file *filp)
+
+ Same, but it calls v4l2_fh_is_singular with filp->private_data.
+
+
+V4L2 events
+-----------
+
+The V4L2 events provide a generic way to pass events to user space.
+The driver must use v4l2_fh to be able to support V4L2 events.
+
+Events are defined by a type and an optional ID. The ID may refer to a V4L2
+object such as a control ID. If unused, then the ID is 0.
+
+When the user subscribes to an event the driver will allocate a number of
+kevent structs for that event. So every (type, ID) event tuple will have
+its own set of kevent structs. This guarantees that if a driver is generating
+lots of events of one type in a short time, then that will not overwrite
+events of another type.
+
+But if you get more events of one type than the number of kevents that were
+reserved, then the oldest event will be dropped and the new one added.
+
+Furthermore, the internal struct v4l2_subscribed_event has merge() and
+replace() callbacks which drivers can set. These callbacks are called when
+a new event is raised and there is no more room. The replace() callback
+allows you to replace the payload of the old event with that of the new event,
+merging any relevant data from the old payload into the new payload that
+replaces it. It is called when this event type has only one kevent struct
+allocated. The merge() callback allows you to merge the oldest event payload
+into that of the second-oldest event payload. It is called when there are two
+or more kevent structs allocated.
+
+This way no status information is lost, just the intermediate steps leading
+up to that state.
+
+A good example of these replace/merge callbacks is in v4l2-event.c:
+ctrls_replace() and ctrls_merge() callbacks for the control event.
+
+Note: these callbacks can be called from interrupt context, so they must be
+fast.
+
+Useful functions:
+
+void v4l2_event_queue(struct video_device *vdev, const struct v4l2_event *ev)
+
+ Queue events to video device. The driver's only responsibility is to fill
+ in the type and the data fields. The other fields will be filled in by
+ V4L2.
+
+int v4l2_event_subscribe(struct v4l2_fh *fh,
+ struct v4l2_event_subscription *sub, unsigned elems,
+ const struct v4l2_subscribed_event_ops *ops)
+
+ The video_device->ioctl_ops->vidioc_subscribe_event must check the driver
+ is able to produce events with specified event id. Then it calls
+ v4l2_event_subscribe() to subscribe the event.
+
+ The elems argument is the size of the event queue for this event. If it is 0,
+ then the framework will fill in a default value (this depends on the event
+ type).
+
+ The ops argument allows the driver to specify a number of callbacks:
+ * add: called when a new listener gets added (subscribing to the same
+ event twice will only cause this callback to get called once)
+ * del: called when a listener stops listening
+ * replace: replace event 'old' with event 'new'.
+ * merge: merge event 'old' into event 'new'.
+ All 4 callbacks are optional, if you don't want to specify any callbacks
+ the ops argument itself maybe NULL.
+
+int v4l2_event_unsubscribe(struct v4l2_fh *fh,
+ struct v4l2_event_subscription *sub)
+
+ vidioc_unsubscribe_event in struct v4l2_ioctl_ops. A driver may use
+ v4l2_event_unsubscribe() directly unless it wants to be involved in
+ unsubscription process.
+
+ The special type V4L2_EVENT_ALL may be used to unsubscribe all events. The
+ drivers may want to handle this in a special way.
+
+int v4l2_event_pending(struct v4l2_fh *fh)
+
+ Returns the number of pending events. Useful when implementing poll.
+
+Events are delivered to user space through the poll system call. The driver
+can use v4l2_fh->wait (a wait_queue_head_t) as the argument for poll_wait().
+
+There are standard and private events. New standard events must use the
+smallest available event type. The drivers must allocate their events from
+their own class starting from class base. Class base is
+V4L2_EVENT_PRIVATE_START + n * 1000 where n is the lowest available number.
+The first event type in the class is reserved for future use, so the first
+available event type is 'class base + 1'.
+
+An example on how the V4L2 events may be used can be found in the OMAP
+3 ISP driver (drivers/media/platform/omap3isp).
+
+
+V4L2 clocks
+-----------
+
+Many subdevices, like camera sensors, TV decoders and encoders, need a clock
+signal to be supplied by the system. Often this clock is supplied by the
+respective bridge device. The Linux kernel provides a Common Clock Framework for
+this purpose. However, it is not (yet) available on all architectures. Besides,
+the nature of the multi-functional (clock, data + synchronisation, I2C control)
+connection of subdevices to the system might impose special requirements on the
+clock API usage. E.g. V4L2 has to support clock provider driver unregistration
+while a subdevice driver is holding a reference to the clock. For these reasons
+a V4L2 clock helper API has been developed and is provided to bridge and
+subdevice drivers.
+
+The API consists of two parts: two functions to register and unregister a V4L2
+clock source: v4l2_clk_register() and v4l2_clk_unregister() and calls to control
+a clock object, similar to the respective generic clock API calls:
+v4l2_clk_get(), v4l2_clk_put(), v4l2_clk_enable(), v4l2_clk_disable(),
+v4l2_clk_get_rate(), and v4l2_clk_set_rate(). Clock suppliers have to provide
+clock operations that will be called when clock users invoke respective API
+methods.
+
+It is expected that once the CCF becomes available on all relevant
+architectures this API will be removed.
diff --git a/Documentation/video4linux/v4l2-pci-skeleton.c b/Documentation/video4linux/v4l2-pci-skeleton.c
new file mode 100644
index 000000000..7bd1b975b
--- /dev/null
+++ b/Documentation/video4linux/v4l2-pci-skeleton.c
@@ -0,0 +1,924 @@
+/*
+ * This is a V4L2 PCI Skeleton Driver. It gives an initial skeleton source
+ * for use with other PCI drivers.
+ *
+ * This skeleton PCI driver assumes that the card has an S-Video connector as
+ * input 0 and an HDMI connector as input 1.
+ *
+ * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kmod.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/videodev2.h>
+#include <linux/v4l2-dv-timings.h>
+#include <media/v4l2-device.h>
+#include <media/v4l2-dev.h>
+#include <media/v4l2-ioctl.h>
+#include <media/v4l2-dv-timings.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-event.h>
+#include <media/videobuf2-dma-contig.h>
+
+MODULE_DESCRIPTION("V4L2 PCI Skeleton Driver");
+MODULE_AUTHOR("Hans Verkuil");
+MODULE_LICENSE("GPL v2");
+
+/**
+ * struct skeleton - All internal data for one instance of device
+ * @pdev: PCI device
+ * @v4l2_dev: top-level v4l2 device struct
+ * @vdev: video node structure
+ * @ctrl_handler: control handler structure
+ * @lock: ioctl serialization mutex
+ * @std: current SDTV standard
+ * @timings: current HDTV timings
+ * @format: current pix format
+ * @input: current video input (0 = SDTV, 1 = HDTV)
+ * @queue: vb2 video capture queue
+ * @alloc_ctx: vb2 contiguous DMA context
+ * @qlock: spinlock controlling access to buf_list and sequence
+ * @buf_list: list of buffers queued for DMA
+ * @sequence: frame sequence counter
+ */
+struct skeleton {
+ struct pci_dev *pdev;
+ struct v4l2_device v4l2_dev;
+ struct video_device vdev;
+ struct v4l2_ctrl_handler ctrl_handler;
+ struct mutex lock;
+ v4l2_std_id std;
+ struct v4l2_dv_timings timings;
+ struct v4l2_pix_format format;
+ unsigned input;
+
+ struct vb2_queue queue;
+ struct vb2_alloc_ctx *alloc_ctx;
+
+ spinlock_t qlock;
+ struct list_head buf_list;
+ unsigned field;
+ unsigned sequence;
+};
+
+struct skel_buffer {
+ struct vb2_buffer vb;
+ struct list_head list;
+};
+
+static inline struct skel_buffer *to_skel_buffer(struct vb2_buffer *vb2)
+{
+ return container_of(vb2, struct skel_buffer, vb);
+}
+
+static const struct pci_device_id skeleton_pci_tbl[] = {
+ /* { PCI_DEVICE(PCI_VENDOR_ID_, PCI_DEVICE_ID_) }, */
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, skeleton_pci_tbl);
+
+/*
+ * HDTV: this structure has the capabilities of the HDTV receiver.
+ * It is used to constrain the huge list of possible formats based
+ * upon the hardware capabilities.
+ */
+static const struct v4l2_dv_timings_cap skel_timings_cap = {
+ .type = V4L2_DV_BT_656_1120,
+ /* keep this initialization for compatibility with GCC < 4.4.6 */
+ .reserved = { 0 },
+ V4L2_INIT_BT_TIMINGS(
+ 720, 1920, /* min/max width */
+ 480, 1080, /* min/max height */
+ 27000000, 74250000, /* min/max pixelclock*/
+ V4L2_DV_BT_STD_CEA861, /* Supported standards */
+ /* capabilities */
+ V4L2_DV_BT_CAP_INTERLACED | V4L2_DV_BT_CAP_PROGRESSIVE
+ )
+};
+
+/*
+ * Supported SDTV standards. This does the same job as skel_timings_cap, but
+ * for standard TV formats.
+ */
+#define SKEL_TVNORMS V4L2_STD_ALL
+
+/*
+ * Interrupt handler: typically interrupts happen after a new frame has been
+ * captured. It is the job of the handler to remove the new frame from the
+ * internal list and give it back to the vb2 framework, updating the sequence
+ * counter, field and timestamp at the same time.
+ */
+static irqreturn_t skeleton_irq(int irq, void *dev_id)
+{
+#ifdef TODO
+ struct skeleton *skel = dev_id;
+
+ /* handle interrupt */
+
+ /* Once a new frame has been captured, mark it as done like this: */
+ if (captured_new_frame) {
+ ...
+ spin_lock(&skel->qlock);
+ list_del(&new_buf->list);
+ spin_unlock(&skel->qlock);
+ v4l2_get_timestamp(&new_buf->vb.v4l2_buf.timestamp);
+ new_buf->vb.v4l2_buf.sequence = skel->sequence++;
+ new_buf->vb.v4l2_buf.field = skel->field;
+ if (skel->format.field == V4L2_FIELD_ALTERNATE) {
+ if (skel->field == V4L2_FIELD_BOTTOM)
+ skel->field = V4L2_FIELD_TOP;
+ else if (skel->field == V4L2_FIELD_TOP)
+ skel->field = V4L2_FIELD_BOTTOM;
+ }
+ vb2_buffer_done(&new_buf->vb, VB2_BUF_STATE_DONE);
+ }
+#endif
+ return IRQ_HANDLED;
+}
+
+/*
+ * Setup the constraints of the queue: besides setting the number of planes
+ * per buffer and the size and allocation context of each plane, it also
+ * checks if sufficient buffers have been allocated. Usually 3 is a good
+ * minimum number: many DMA engines need a minimum of 2 buffers in the
+ * queue and you need to have another available for userspace processing.
+ */
+static int queue_setup(struct vb2_queue *vq, const struct v4l2_format *fmt,
+ unsigned int *nbuffers, unsigned int *nplanes,
+ unsigned int sizes[], void *alloc_ctxs[])
+{
+ struct skeleton *skel = vb2_get_drv_priv(vq);
+
+ skel->field = skel->format.field;
+ if (skel->field == V4L2_FIELD_ALTERNATE) {
+ /*
+ * You cannot use read() with FIELD_ALTERNATE since the field
+ * information (TOP/BOTTOM) cannot be passed back to the user.
+ */
+ if (vb2_fileio_is_active(vq))
+ return -EINVAL;
+ skel->field = V4L2_FIELD_TOP;
+ }
+
+ if (vq->num_buffers + *nbuffers < 3)
+ *nbuffers = 3 - vq->num_buffers;
+
+ if (fmt && fmt->fmt.pix.sizeimage < skel->format.sizeimage)
+ return -EINVAL;
+ *nplanes = 1;
+ sizes[0] = fmt ? fmt->fmt.pix.sizeimage : skel->format.sizeimage;
+ alloc_ctxs[0] = skel->alloc_ctx;
+ return 0;
+}
+
+/*
+ * Prepare the buffer for queueing to the DMA engine: check and set the
+ * payload size.
+ */
+static int buffer_prepare(struct vb2_buffer *vb)
+{
+ struct skeleton *skel = vb2_get_drv_priv(vb->vb2_queue);
+ unsigned long size = skel->format.sizeimage;
+
+ if (vb2_plane_size(vb, 0) < size) {
+ dev_err(&skel->pdev->dev, "buffer too small (%lu < %lu)\n",
+ vb2_plane_size(vb, 0), size);
+ return -EINVAL;
+ }
+
+ vb2_set_plane_payload(vb, 0, size);
+ return 0;
+}
+
+/*
+ * Queue this buffer to the DMA engine.
+ */
+static void buffer_queue(struct vb2_buffer *vb)
+{
+ struct skeleton *skel = vb2_get_drv_priv(vb->vb2_queue);
+ struct skel_buffer *buf = to_skel_buffer(vb);
+ unsigned long flags;
+
+ spin_lock_irqsave(&skel->qlock, flags);
+ list_add_tail(&buf->list, &skel->buf_list);
+
+ /* TODO: Update any DMA pointers if necessary */
+
+ spin_unlock_irqrestore(&skel->qlock, flags);
+}
+
+static void return_all_buffers(struct skeleton *skel,
+ enum vb2_buffer_state state)
+{
+ struct skel_buffer *buf, *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&skel->qlock, flags);
+ list_for_each_entry_safe(buf, node, &skel->buf_list, list) {
+ vb2_buffer_done(&buf->vb, state);
+ list_del(&buf->list);
+ }
+ spin_unlock_irqrestore(&skel->qlock, flags);
+}
+
+/*
+ * Start streaming. First check if the minimum number of buffers have been
+ * queued. If not, then return -ENOBUFS and the vb2 framework will call
+ * this function again the next time a buffer has been queued until enough
+ * buffers are available to actually start the DMA engine.
+ */
+static int start_streaming(struct vb2_queue *vq, unsigned int count)
+{
+ struct skeleton *skel = vb2_get_drv_priv(vq);
+ int ret = 0;
+
+ skel->sequence = 0;
+
+ /* TODO: start DMA */
+
+ if (ret) {
+ /*
+ * In case of an error, return all active buffers to the
+ * QUEUED state
+ */
+ return_all_buffers(skel, VB2_BUF_STATE_QUEUED);
+ }
+ return ret;
+}
+
+/*
+ * Stop the DMA engine. Any remaining buffers in the DMA queue are dequeued
+ * and passed on to the vb2 framework marked as STATE_ERROR.
+ */
+static void stop_streaming(struct vb2_queue *vq)
+{
+ struct skeleton *skel = vb2_get_drv_priv(vq);
+
+ /* TODO: stop DMA */
+
+ /* Release all active buffers */
+ return_all_buffers(skel, VB2_BUF_STATE_ERROR);
+}
+
+/*
+ * The vb2 queue ops. Note that since q->lock is set we can use the standard
+ * vb2_ops_wait_prepare/finish helper functions. If q->lock would be NULL,
+ * then this driver would have to provide these ops.
+ */
+static struct vb2_ops skel_qops = {
+ .queue_setup = queue_setup,
+ .buf_prepare = buffer_prepare,
+ .buf_queue = buffer_queue,
+ .start_streaming = start_streaming,
+ .stop_streaming = stop_streaming,
+ .wait_prepare = vb2_ops_wait_prepare,
+ .wait_finish = vb2_ops_wait_finish,
+};
+
+/*
+ * Required ioctl querycap. Note that the version field is prefilled with
+ * the version of the kernel.
+ */
+static int skeleton_querycap(struct file *file, void *priv,
+ struct v4l2_capability *cap)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ strlcpy(cap->driver, KBUILD_MODNAME, sizeof(cap->driver));
+ strlcpy(cap->card, "V4L2 PCI Skeleton", sizeof(cap->card));
+ snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s",
+ pci_name(skel->pdev));
+ cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE |
+ V4L2_CAP_STREAMING;
+ cap->capabilities = cap->device_caps | V4L2_CAP_DEVICE_CAPS;
+ return 0;
+}
+
+/*
+ * Helper function to check and correct struct v4l2_pix_format. It's used
+ * not only in VIDIOC_TRY/S_FMT, but also elsewhere if changes to the SDTV
+ * standard, HDTV timings or the video input would require updating the
+ * current format.
+ */
+static void skeleton_fill_pix_format(struct skeleton *skel,
+ struct v4l2_pix_format *pix)
+{
+ pix->pixelformat = V4L2_PIX_FMT_YUYV;
+ if (skel->input == 0) {
+ /* S-Video input */
+ pix->width = 720;
+ pix->height = (skel->std & V4L2_STD_525_60) ? 480 : 576;
+ pix->field = V4L2_FIELD_INTERLACED;
+ pix->colorspace = V4L2_COLORSPACE_SMPTE170M;
+ } else {
+ /* HDMI input */
+ pix->width = skel->timings.bt.width;
+ pix->height = skel->timings.bt.height;
+ if (skel->timings.bt.interlaced) {
+ pix->field = V4L2_FIELD_ALTERNATE;
+ pix->height /= 2;
+ } else {
+ pix->field = V4L2_FIELD_NONE;
+ }
+ pix->colorspace = V4L2_COLORSPACE_REC709;
+ }
+
+ /*
+ * The YUYV format is four bytes for every two pixels, so bytesperline
+ * is width * 2.
+ */
+ pix->bytesperline = pix->width * 2;
+ pix->sizeimage = pix->bytesperline * pix->height;
+ pix->priv = 0;
+}
+
+static int skeleton_try_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct skeleton *skel = video_drvdata(file);
+ struct v4l2_pix_format *pix = &f->fmt.pix;
+
+ /*
+ * Due to historical reasons providing try_fmt with an unsupported
+ * pixelformat will return -EINVAL for video receivers. Webcam drivers,
+ * however, will silently correct the pixelformat. Some video capture
+ * applications rely on this behavior...
+ */
+ if (pix->pixelformat != V4L2_PIX_FMT_YUYV)
+ return -EINVAL;
+ skeleton_fill_pix_format(skel, pix);
+ return 0;
+}
+
+static int skeleton_s_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct skeleton *skel = video_drvdata(file);
+ int ret;
+
+ ret = skeleton_try_fmt_vid_cap(file, priv, f);
+ if (ret)
+ return ret;
+
+ /*
+ * It is not allowed to change the format while buffers for use with
+ * streaming have already been allocated.
+ */
+ if (vb2_is_busy(&skel->queue))
+ return -EBUSY;
+
+ /* TODO: change format */
+ skel->format = f->fmt.pix;
+ return 0;
+}
+
+static int skeleton_g_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_format *f)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ f->fmt.pix = skel->format;
+ return 0;
+}
+
+static int skeleton_enum_fmt_vid_cap(struct file *file, void *priv,
+ struct v4l2_fmtdesc *f)
+{
+ if (f->index != 0)
+ return -EINVAL;
+
+ strlcpy(f->description, "4:2:2, packed, YUYV", sizeof(f->description));
+ f->pixelformat = V4L2_PIX_FMT_YUYV;
+ f->flags = 0;
+ return 0;
+}
+
+static int skeleton_s_std(struct file *file, void *priv, v4l2_std_id std)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* S_STD is not supported on the HDMI input */
+ if (skel->input)
+ return -ENODATA;
+
+ /*
+ * No change, so just return. Some applications call S_STD again after
+ * the buffers for streaming have been set up, so we have to allow for
+ * this behavior.
+ */
+ if (std == skel->std)
+ return 0;
+
+ /*
+ * Changing the standard implies a format change, which is not allowed
+ * while buffers for use with streaming have already been allocated.
+ */
+ if (vb2_is_busy(&skel->queue))
+ return -EBUSY;
+
+ /* TODO: handle changing std */
+
+ skel->std = std;
+
+ /* Update the internal format */
+ skeleton_fill_pix_format(skel, &skel->format);
+ return 0;
+}
+
+static int skeleton_g_std(struct file *file, void *priv, v4l2_std_id *std)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* G_STD is not supported on the HDMI input */
+ if (skel->input)
+ return -ENODATA;
+
+ *std = skel->std;
+ return 0;
+}
+
+/*
+ * Query the current standard as seen by the hardware. This function shall
+ * never actually change the standard, it just detects and reports.
+ * The framework will initially set *std to tvnorms (i.e. the set of
+ * supported standards by this input), and this function should just AND
+ * this value. If there is no signal, then *std should be set to 0.
+ */
+static int skeleton_querystd(struct file *file, void *priv, v4l2_std_id *std)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* QUERY_STD is not supported on the HDMI input */
+ if (skel->input)
+ return -ENODATA;
+
+#ifdef TODO
+ /*
+ * Query currently seen standard. Initial value of *std is
+ * V4L2_STD_ALL. This function should look something like this:
+ */
+ get_signal_info();
+ if (no_signal) {
+ *std = 0;
+ return 0;
+ }
+ /* Use signal information to reduce the number of possible standards */
+ if (signal_has_525_lines)
+ *std &= V4L2_STD_525_60;
+ else
+ *std &= V4L2_STD_625_50;
+#endif
+ return 0;
+}
+
+static int skeleton_s_dv_timings(struct file *file, void *_fh,
+ struct v4l2_dv_timings *timings)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* S_DV_TIMINGS is not supported on the S-Video input */
+ if (skel->input == 0)
+ return -ENODATA;
+
+ /* Quick sanity check */
+ if (!v4l2_valid_dv_timings(timings, &skel_timings_cap, NULL, NULL))
+ return -EINVAL;
+
+ /* Check if the timings are part of the CEA-861 timings. */
+ if (!v4l2_find_dv_timings_cap(timings, &skel_timings_cap,
+ 0, NULL, NULL))
+ return -EINVAL;
+
+ /* Return 0 if the new timings are the same as the current timings. */
+ if (v4l2_match_dv_timings(timings, &skel->timings, 0))
+ return 0;
+
+ /*
+ * Changing the timings implies a format change, which is not allowed
+ * while buffers for use with streaming have already been allocated.
+ */
+ if (vb2_is_busy(&skel->queue))
+ return -EBUSY;
+
+ /* TODO: Configure new timings */
+
+ /* Save timings */
+ skel->timings = *timings;
+
+ /* Update the internal format */
+ skeleton_fill_pix_format(skel, &skel->format);
+ return 0;
+}
+
+static int skeleton_g_dv_timings(struct file *file, void *_fh,
+ struct v4l2_dv_timings *timings)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* G_DV_TIMINGS is not supported on the S-Video input */
+ if (skel->input == 0)
+ return -ENODATA;
+
+ *timings = skel->timings;
+ return 0;
+}
+
+static int skeleton_enum_dv_timings(struct file *file, void *_fh,
+ struct v4l2_enum_dv_timings *timings)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* ENUM_DV_TIMINGS is not supported on the S-Video input */
+ if (skel->input == 0)
+ return -ENODATA;
+
+ return v4l2_enum_dv_timings_cap(timings, &skel_timings_cap,
+ NULL, NULL);
+}
+
+/*
+ * Query the current timings as seen by the hardware. This function shall
+ * never actually change the timings, it just detects and reports.
+ * If no signal is detected, then return -ENOLINK. If the hardware cannot
+ * lock to the signal, then return -ENOLCK. If the signal is out of range
+ * of the capabilities of the system (e.g., it is possible that the receiver
+ * can lock but that the DMA engine it is connected to cannot handle
+ * pixelclocks above a certain frequency), then -ERANGE is returned.
+ */
+static int skeleton_query_dv_timings(struct file *file, void *_fh,
+ struct v4l2_dv_timings *timings)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* QUERY_DV_TIMINGS is not supported on the S-Video input */
+ if (skel->input == 0)
+ return -ENODATA;
+
+#ifdef TODO
+ /*
+ * Query currently seen timings. This function should look
+ * something like this:
+ */
+ detect_timings();
+ if (no_signal)
+ return -ENOLINK;
+ if (cannot_lock_to_signal)
+ return -ENOLCK;
+ if (signal_out_of_range_of_capabilities)
+ return -ERANGE;
+
+ /* Useful for debugging */
+ v4l2_print_dv_timings(skel->v4l2_dev.name, "query_dv_timings:",
+ timings, true);
+#endif
+ return 0;
+}
+
+static int skeleton_dv_timings_cap(struct file *file, void *fh,
+ struct v4l2_dv_timings_cap *cap)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ /* DV_TIMINGS_CAP is not supported on the S-Video input */
+ if (skel->input == 0)
+ return -ENODATA;
+ *cap = skel_timings_cap;
+ return 0;
+}
+
+static int skeleton_enum_input(struct file *file, void *priv,
+ struct v4l2_input *i)
+{
+ if (i->index > 1)
+ return -EINVAL;
+
+ i->type = V4L2_INPUT_TYPE_CAMERA;
+ if (i->index == 0) {
+ i->std = SKEL_TVNORMS;
+ strlcpy(i->name, "S-Video", sizeof(i->name));
+ i->capabilities = V4L2_IN_CAP_STD;
+ } else {
+ i->std = 0;
+ strlcpy(i->name, "HDMI", sizeof(i->name));
+ i->capabilities = V4L2_IN_CAP_DV_TIMINGS;
+ }
+ return 0;
+}
+
+static int skeleton_s_input(struct file *file, void *priv, unsigned int i)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ if (i > 1)
+ return -EINVAL;
+
+ /*
+ * Changing the input implies a format change, which is not allowed
+ * while buffers for use with streaming have already been allocated.
+ */
+ if (vb2_is_busy(&skel->queue))
+ return -EBUSY;
+
+ skel->input = i;
+ /*
+ * Update tvnorms. The tvnorms value is used by the core to implement
+ * VIDIOC_ENUMSTD so it has to be correct. If tvnorms == 0, then
+ * ENUMSTD will return -ENODATA.
+ */
+ skel->vdev.tvnorms = i ? 0 : SKEL_TVNORMS;
+
+ /* Update the internal format */
+ skeleton_fill_pix_format(skel, &skel->format);
+ return 0;
+}
+
+static int skeleton_g_input(struct file *file, void *priv, unsigned int *i)
+{
+ struct skeleton *skel = video_drvdata(file);
+
+ *i = skel->input;
+ return 0;
+}
+
+/* The control handler. */
+static int skeleton_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+ /*struct skeleton *skel =
+ container_of(ctrl->handler, struct skeleton, ctrl_handler);*/
+
+ switch (ctrl->id) {
+ case V4L2_CID_BRIGHTNESS:
+ /* TODO: set brightness to ctrl->val */
+ break;
+ case V4L2_CID_CONTRAST:
+ /* TODO: set contrast to ctrl->val */
+ break;
+ case V4L2_CID_SATURATION:
+ /* TODO: set saturation to ctrl->val */
+ break;
+ case V4L2_CID_HUE:
+ /* TODO: set hue to ctrl->val */
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* ------------------------------------------------------------------
+ File operations for the device
+ ------------------------------------------------------------------*/
+
+static const struct v4l2_ctrl_ops skel_ctrl_ops = {
+ .s_ctrl = skeleton_s_ctrl,
+};
+
+/*
+ * The set of all supported ioctls. Note that all the streaming ioctls
+ * use the vb2 helper functions that take care of all the locking and
+ * that also do ownership tracking (i.e. only the filehandle that requested
+ * the buffers can call the streaming ioctls, all other filehandles will
+ * receive -EBUSY if they attempt to call the same streaming ioctls).
+ *
+ * The last three ioctls also use standard helper functions: these implement
+ * standard behavior for drivers with controls.
+ */
+static const struct v4l2_ioctl_ops skel_ioctl_ops = {
+ .vidioc_querycap = skeleton_querycap,
+ .vidioc_try_fmt_vid_cap = skeleton_try_fmt_vid_cap,
+ .vidioc_s_fmt_vid_cap = skeleton_s_fmt_vid_cap,
+ .vidioc_g_fmt_vid_cap = skeleton_g_fmt_vid_cap,
+ .vidioc_enum_fmt_vid_cap = skeleton_enum_fmt_vid_cap,
+
+ .vidioc_g_std = skeleton_g_std,
+ .vidioc_s_std = skeleton_s_std,
+ .vidioc_querystd = skeleton_querystd,
+
+ .vidioc_s_dv_timings = skeleton_s_dv_timings,
+ .vidioc_g_dv_timings = skeleton_g_dv_timings,
+ .vidioc_enum_dv_timings = skeleton_enum_dv_timings,
+ .vidioc_query_dv_timings = skeleton_query_dv_timings,
+ .vidioc_dv_timings_cap = skeleton_dv_timings_cap,
+
+ .vidioc_enum_input = skeleton_enum_input,
+ .vidioc_g_input = skeleton_g_input,
+ .vidioc_s_input = skeleton_s_input,
+
+ .vidioc_reqbufs = vb2_ioctl_reqbufs,
+ .vidioc_create_bufs = vb2_ioctl_create_bufs,
+ .vidioc_querybuf = vb2_ioctl_querybuf,
+ .vidioc_qbuf = vb2_ioctl_qbuf,
+ .vidioc_dqbuf = vb2_ioctl_dqbuf,
+ .vidioc_expbuf = vb2_ioctl_expbuf,
+ .vidioc_streamon = vb2_ioctl_streamon,
+ .vidioc_streamoff = vb2_ioctl_streamoff,
+
+ .vidioc_log_status = v4l2_ctrl_log_status,
+ .vidioc_subscribe_event = v4l2_ctrl_subscribe_event,
+ .vidioc_unsubscribe_event = v4l2_event_unsubscribe,
+};
+
+/*
+ * The set of file operations. Note that all these ops are standard core
+ * helper functions.
+ */
+static const struct v4l2_file_operations skel_fops = {
+ .owner = THIS_MODULE,
+ .open = v4l2_fh_open,
+ .release = vb2_fop_release,
+ .unlocked_ioctl = video_ioctl2,
+ .read = vb2_fop_read,
+ .mmap = vb2_fop_mmap,
+ .poll = vb2_fop_poll,
+};
+
+/*
+ * The initial setup of this device instance. Note that the initial state of
+ * the driver should be complete. So the initial format, standard, timings
+ * and video input should all be initialized to some reasonable value.
+ */
+static int skeleton_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+ /* The initial timings are chosen to be 720p60. */
+ static const struct v4l2_dv_timings timings_def =
+ V4L2_DV_BT_CEA_1280X720P60;
+ struct skeleton *skel;
+ struct video_device *vdev;
+ struct v4l2_ctrl_handler *hdl;
+ struct vb2_queue *q;
+ int ret;
+
+ /* Enable PCI */
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
+ ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(&pdev->dev, "no suitable DMA available.\n");
+ goto disable_pci;
+ }
+
+ /* Allocate a new instance */
+ skel = devm_kzalloc(&pdev->dev, sizeof(struct skeleton), GFP_KERNEL);
+ if (!skel)
+ return -ENOMEM;
+
+ /* Allocate the interrupt */
+ ret = devm_request_irq(&pdev->dev, pdev->irq,
+ skeleton_irq, 0, KBUILD_MODNAME, skel);
+ if (ret) {
+ dev_err(&pdev->dev, "request_irq failed\n");
+ goto disable_pci;
+ }
+ skel->pdev = pdev;
+
+ /* Fill in the initial format-related settings */
+ skel->timings = timings_def;
+ skel->std = V4L2_STD_625_50;
+ skeleton_fill_pix_format(skel, &skel->format);
+
+ /* Initialize the top-level structure */
+ ret = v4l2_device_register(&pdev->dev, &skel->v4l2_dev);
+ if (ret)
+ goto disable_pci;
+
+ mutex_init(&skel->lock);
+
+ /* Add the controls */
+ hdl = &skel->ctrl_handler;
+ v4l2_ctrl_handler_init(hdl, 4);
+ v4l2_ctrl_new_std(hdl, &skel_ctrl_ops,
+ V4L2_CID_BRIGHTNESS, 0, 255, 1, 127);
+ v4l2_ctrl_new_std(hdl, &skel_ctrl_ops,
+ V4L2_CID_CONTRAST, 0, 255, 1, 16);
+ v4l2_ctrl_new_std(hdl, &skel_ctrl_ops,
+ V4L2_CID_SATURATION, 0, 255, 1, 127);
+ v4l2_ctrl_new_std(hdl, &skel_ctrl_ops,
+ V4L2_CID_HUE, -128, 127, 1, 0);
+ if (hdl->error) {
+ ret = hdl->error;
+ goto free_hdl;
+ }
+ skel->v4l2_dev.ctrl_handler = hdl;
+
+ /* Initialize the vb2 queue */
+ q = &skel->queue;
+ q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+ q->io_modes = VB2_MMAP | VB2_DMABUF | VB2_READ;
+ q->drv_priv = skel;
+ q->buf_struct_size = sizeof(struct skel_buffer);
+ q->ops = &skel_qops;
+ q->mem_ops = &vb2_dma_contig_memops;
+ q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
+ /*
+ * Assume that this DMA engine needs to have at least two buffers
+ * available before it can be started. The start_streaming() op
+ * won't be called until at least this many buffers are queued up.
+ */
+ q->min_buffers_needed = 2;
+ /*
+ * The serialization lock for the streaming ioctls. This is the same
+ * as the main serialization lock, but if some of the non-streaming
+ * ioctls could take a long time to execute, then you might want to
+ * have a different lock here to prevent VIDIOC_DQBUF from being
+ * blocked while waiting for another action to finish. This is
+ * generally not needed for PCI devices, but USB devices usually do
+ * want a separate lock here.
+ */
+ q->lock = &skel->lock;
+ /*
+ * Since this driver can only do 32-bit DMA we must make sure that
+ * the vb2 core will allocate the buffers in 32-bit DMA memory.
+ */
+ q->gfp_flags = GFP_DMA32;
+ ret = vb2_queue_init(q);
+ if (ret)
+ goto free_hdl;
+
+ skel->alloc_ctx = vb2_dma_contig_init_ctx(&pdev->dev);
+ if (IS_ERR(skel->alloc_ctx)) {
+ dev_err(&pdev->dev, "Can't allocate buffer context");
+ ret = PTR_ERR(skel->alloc_ctx);
+ goto free_hdl;
+ }
+ INIT_LIST_HEAD(&skel->buf_list);
+ spin_lock_init(&skel->qlock);
+
+ /* Initialize the video_device structure */
+ vdev = &skel->vdev;
+ strlcpy(vdev->name, KBUILD_MODNAME, sizeof(vdev->name));
+ /*
+ * There is nothing to clean up, so release is set to an empty release
+ * function. The release callback must be non-NULL.
+ */
+ vdev->release = video_device_release_empty;
+ vdev->fops = &skel_fops,
+ vdev->ioctl_ops = &skel_ioctl_ops,
+ /*
+ * The main serialization lock. All ioctls are serialized by this
+ * lock. Exception: if q->lock is set, then the streaming ioctls
+ * are serialized by that separate lock.
+ */
+ vdev->lock = &skel->lock;
+ vdev->queue = q;
+ vdev->v4l2_dev = &skel->v4l2_dev;
+ /* Supported SDTV standards, if any */
+ vdev->tvnorms = SKEL_TVNORMS;
+ video_set_drvdata(vdev, skel);
+
+ ret = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
+ if (ret)
+ goto free_ctx;
+
+ dev_info(&pdev->dev, "V4L2 PCI Skeleton Driver loaded\n");
+ return 0;
+
+free_ctx:
+ vb2_dma_contig_cleanup_ctx(skel->alloc_ctx);
+free_hdl:
+ v4l2_ctrl_handler_free(&skel->ctrl_handler);
+ v4l2_device_unregister(&skel->v4l2_dev);
+disable_pci:
+ pci_disable_device(pdev);
+ return ret;
+}
+
+static void skeleton_remove(struct pci_dev *pdev)
+{
+ struct v4l2_device *v4l2_dev = pci_get_drvdata(pdev);
+ struct skeleton *skel = container_of(v4l2_dev, struct skeleton, v4l2_dev);
+
+ video_unregister_device(&skel->vdev);
+ v4l2_ctrl_handler_free(&skel->ctrl_handler);
+ vb2_dma_contig_cleanup_ctx(skel->alloc_ctx);
+ v4l2_device_unregister(&skel->v4l2_dev);
+ pci_disable_device(skel->pdev);
+}
+
+static struct pci_driver skeleton_driver = {
+ .name = KBUILD_MODNAME,
+ .probe = skeleton_probe,
+ .remove = skeleton_remove,
+ .id_table = skeleton_pci_tbl,
+};
+
+module_pci_driver(skeleton_driver);
diff --git a/Documentation/video4linux/videobuf b/Documentation/video4linux/videobuf
new file mode 100644
index 000000000..3ffe9e960
--- /dev/null
+++ b/Documentation/video4linux/videobuf
@@ -0,0 +1,355 @@
+An introduction to the videobuf layer
+Jonathan Corbet <corbet@lwn.net>
+Current as of 2.6.33
+
+The videobuf layer functions as a sort of glue layer between a V4L2 driver
+and user space. It handles the allocation and management of buffers for
+the storage of video frames. There is a set of functions which can be used
+to implement many of the standard POSIX I/O system calls, including read(),
+poll(), and, happily, mmap(). Another set of functions can be used to
+implement the bulk of the V4L2 ioctl() calls related to streaming I/O,
+including buffer allocation, queueing and dequeueing, and streaming
+control. Using videobuf imposes a few design decisions on the driver
+author, but the payback comes in the form of reduced code in the driver and
+a consistent implementation of the V4L2 user-space API.
+
+Buffer types
+
+Not all video devices use the same kind of buffers. In fact, there are (at
+least) three common variations:
+
+ - Buffers which are scattered in both the physical and (kernel) virtual
+ address spaces. (Almost) all user-space buffers are like this, but it
+ makes great sense to allocate kernel-space buffers this way as well when
+ it is possible. Unfortunately, it is not always possible; working with
+ this kind of buffer normally requires hardware which can do
+ scatter/gather DMA operations.
+
+ - Buffers which are physically scattered, but which are virtually
+ contiguous; buffers allocated with vmalloc(), in other words. These
+ buffers are just as hard to use for DMA operations, but they can be
+ useful in situations where DMA is not available but virtually-contiguous
+ buffers are convenient.
+
+ - Buffers which are physically contiguous. Allocation of this kind of
+ buffer can be unreliable on fragmented systems, but simpler DMA
+ controllers cannot deal with anything else.
+
+Videobuf can work with all three types of buffers, but the driver author
+must pick one at the outset and design the driver around that decision.
+
+[It's worth noting that there's a fourth kind of buffer: "overlay" buffers
+which are located within the system's video memory. The overlay
+functionality is considered to be deprecated for most use, but it still
+shows up occasionally in system-on-chip drivers where the performance
+benefits merit the use of this technique. Overlay buffers can be handled
+as a form of scattered buffer, but there are very few implementations in
+the kernel and a description of this technique is currently beyond the
+scope of this document.]
+
+Data structures, callbacks, and initialization
+
+Depending on which type of buffers are being used, the driver should
+include one of the following files:
+
+ <media/videobuf-dma-sg.h> /* Physically scattered */
+ <media/videobuf-vmalloc.h> /* vmalloc() buffers */
+ <media/videobuf-dma-contig.h> /* Physically contiguous */
+
+The driver's data structure describing a V4L2 device should include a
+struct videobuf_queue instance for the management of the buffer queue,
+along with a list_head for the queue of available buffers. There will also
+need to be an interrupt-safe spinlock which is used to protect (at least)
+the queue.
+
+The next step is to write four simple callbacks to help videobuf deal with
+the management of buffers:
+
+ struct videobuf_queue_ops {
+ int (*buf_setup)(struct videobuf_queue *q,
+ unsigned int *count, unsigned int *size);
+ int (*buf_prepare)(struct videobuf_queue *q,
+ struct videobuf_buffer *vb,
+ enum v4l2_field field);
+ void (*buf_queue)(struct videobuf_queue *q,
+ struct videobuf_buffer *vb);
+ void (*buf_release)(struct videobuf_queue *q,
+ struct videobuf_buffer *vb);
+ };
+
+buf_setup() is called early in the I/O process, when streaming is being
+initiated; its purpose is to tell videobuf about the I/O stream. The count
+parameter will be a suggested number of buffers to use; the driver should
+check it for rationality and adjust it if need be. As a practical rule, a
+minimum of two buffers are needed for proper streaming, and there is
+usually a maximum (which cannot exceed 32) which makes sense for each
+device. The size parameter should be set to the expected (maximum) size
+for each frame of data.
+
+Each buffer (in the form of a struct videobuf_buffer pointer) will be
+passed to buf_prepare(), which should set the buffer's size, width, height,
+and field fields properly. If the buffer's state field is
+VIDEOBUF_NEEDS_INIT, the driver should pass it to:
+
+ int videobuf_iolock(struct videobuf_queue* q, struct videobuf_buffer *vb,
+ struct v4l2_framebuffer *fbuf);
+
+Among other things, this call will usually allocate memory for the buffer.
+Finally, the buf_prepare() function should set the buffer's state to
+VIDEOBUF_PREPARED.
+
+When a buffer is queued for I/O, it is passed to buf_queue(), which should
+put it onto the driver's list of available buffers and set its state to
+VIDEOBUF_QUEUED. Note that this function is called with the queue spinlock
+held; if it tries to acquire it as well things will come to a screeching
+halt. Yes, this is the voice of experience. Note also that videobuf may
+wait on the first buffer in the queue; placing other buffers in front of it
+could again gum up the works. So use list_add_tail() to enqueue buffers.
+
+Finally, buf_release() is called when a buffer is no longer intended to be
+used. The driver should ensure that there is no I/O active on the buffer,
+then pass it to the appropriate free routine(s):
+
+ /* Scatter/gather drivers */
+ int videobuf_dma_unmap(struct videobuf_queue *q,
+ struct videobuf_dmabuf *dma);
+ int videobuf_dma_free(struct videobuf_dmabuf *dma);
+
+ /* vmalloc drivers */
+ void videobuf_vmalloc_free (struct videobuf_buffer *buf);
+
+ /* Contiguous drivers */
+ void videobuf_dma_contig_free(struct videobuf_queue *q,
+ struct videobuf_buffer *buf);
+
+One way to ensure that a buffer is no longer under I/O is to pass it to:
+
+ int videobuf_waiton(struct videobuf_buffer *vb, int non_blocking, int intr);
+
+Here, vb is the buffer, non_blocking indicates whether non-blocking I/O
+should be used (it should be zero in the buf_release() case), and intr
+controls whether an interruptible wait is used.
+
+File operations
+
+At this point, much of the work is done; much of the rest is slipping
+videobuf calls into the implementation of the other driver callbacks. The
+first step is in the open() function, which must initialize the
+videobuf queue. The function to use depends on the type of buffer used:
+
+ void videobuf_queue_sg_init(struct videobuf_queue *q,
+ struct videobuf_queue_ops *ops,
+ struct device *dev,
+ spinlock_t *irqlock,
+ enum v4l2_buf_type type,
+ enum v4l2_field field,
+ unsigned int msize,
+ void *priv);
+
+ void videobuf_queue_vmalloc_init(struct videobuf_queue *q,
+ struct videobuf_queue_ops *ops,
+ struct device *dev,
+ spinlock_t *irqlock,
+ enum v4l2_buf_type type,
+ enum v4l2_field field,
+ unsigned int msize,
+ void *priv);
+
+ void videobuf_queue_dma_contig_init(struct videobuf_queue *q,
+ struct videobuf_queue_ops *ops,
+ struct device *dev,
+ spinlock_t *irqlock,
+ enum v4l2_buf_type type,
+ enum v4l2_field field,
+ unsigned int msize,
+ void *priv);
+
+In each case, the parameters are the same: q is the queue structure for the
+device, ops is the set of callbacks as described above, dev is the device
+structure for this video device, irqlock is an interrupt-safe spinlock to
+protect access to the data structures, type is the buffer type used by the
+device (cameras will use V4L2_BUF_TYPE_VIDEO_CAPTURE, for example), field
+describes which field is being captured (often V4L2_FIELD_NONE for
+progressive devices), msize is the size of any containing structure used
+around struct videobuf_buffer, and priv is a private data pointer which
+shows up in the priv_data field of struct videobuf_queue. Note that these
+are void functions which, evidently, are immune to failure.
+
+V4L2 capture drivers can be written to support either of two APIs: the
+read() system call and the rather more complicated streaming mechanism. As
+a general rule, it is necessary to support both to ensure that all
+applications have a chance of working with the device. Videobuf makes it
+easy to do that with the same code. To implement read(), the driver need
+only make a call to one of:
+
+ ssize_t videobuf_read_one(struct videobuf_queue *q,
+ char __user *data, size_t count,
+ loff_t *ppos, int nonblocking);
+
+ ssize_t videobuf_read_stream(struct videobuf_queue *q,
+ char __user *data, size_t count,
+ loff_t *ppos, int vbihack, int nonblocking);
+
+Either one of these functions will read frame data into data, returning the
+amount actually read; the difference is that videobuf_read_one() will only
+read a single frame, while videobuf_read_stream() will read multiple frames
+if they are needed to satisfy the count requested by the application. A
+typical driver read() implementation will start the capture engine, call
+one of the above functions, then stop the engine before returning (though a
+smarter implementation might leave the engine running for a little while in
+anticipation of another read() call happening in the near future).
+
+The poll() function can usually be implemented with a direct call to:
+
+ unsigned int videobuf_poll_stream(struct file *file,
+ struct videobuf_queue *q,
+ poll_table *wait);
+
+Note that the actual wait queue eventually used will be the one associated
+with the first available buffer.
+
+When streaming I/O is done to kernel-space buffers, the driver must support
+the mmap() system call to enable user space to access the data. In many
+V4L2 drivers, the often-complex mmap() implementation simplifies to a
+single call to:
+
+ int videobuf_mmap_mapper(struct videobuf_queue *q,
+ struct vm_area_struct *vma);
+
+Everything else is handled by the videobuf code.
+
+The release() function requires two separate videobuf calls:
+
+ void videobuf_stop(struct videobuf_queue *q);
+ int videobuf_mmap_free(struct videobuf_queue *q);
+
+The call to videobuf_stop() terminates any I/O in progress - though it is
+still up to the driver to stop the capture engine. The call to
+videobuf_mmap_free() will ensure that all buffers have been unmapped; if
+so, they will all be passed to the buf_release() callback. If buffers
+remain mapped, videobuf_mmap_free() returns an error code instead. The
+purpose is clearly to cause the closing of the file descriptor to fail if
+buffers are still mapped, but every driver in the 2.6.32 kernel cheerfully
+ignores its return value.
+
+ioctl() operations
+
+The V4L2 API includes a very long list of driver callbacks to respond to
+the many ioctl() commands made available to user space. A number of these
+- those associated with streaming I/O - turn almost directly into videobuf
+calls. The relevant helper functions are:
+
+ int videobuf_reqbufs(struct videobuf_queue *q,
+ struct v4l2_requestbuffers *req);
+ int videobuf_querybuf(struct videobuf_queue *q, struct v4l2_buffer *b);
+ int videobuf_qbuf(struct videobuf_queue *q, struct v4l2_buffer *b);
+ int videobuf_dqbuf(struct videobuf_queue *q, struct v4l2_buffer *b,
+ int nonblocking);
+ int videobuf_streamon(struct videobuf_queue *q);
+ int videobuf_streamoff(struct videobuf_queue *q);
+
+So, for example, a VIDIOC_REQBUFS call turns into a call to the driver's
+vidioc_reqbufs() callback which, in turn, usually only needs to locate the
+proper struct videobuf_queue pointer and pass it to videobuf_reqbufs().
+These support functions can replace a great deal of buffer management
+boilerplate in a lot of V4L2 drivers.
+
+The vidioc_streamon() and vidioc_streamoff() functions will be a bit more
+complex, of course, since they will also need to deal with starting and
+stopping the capture engine.
+
+Buffer allocation
+
+Thus far, we have talked about buffers, but have not looked at how they are
+allocated. The scatter/gather case is the most complex on this front. For
+allocation, the driver can leave buffer allocation entirely up to the
+videobuf layer; in this case, buffers will be allocated as anonymous
+user-space pages and will be very scattered indeed. If the application is
+using user-space buffers, no allocation is needed; the videobuf layer will
+take care of calling get_user_pages() and filling in the scatterlist array.
+
+If the driver needs to do its own memory allocation, it should be done in
+the vidioc_reqbufs() function, *after* calling videobuf_reqbufs(). The
+first step is a call to:
+
+ struct videobuf_dmabuf *videobuf_to_dma(struct videobuf_buffer *buf);
+
+The returned videobuf_dmabuf structure (defined in
+<media/videobuf-dma-sg.h>) includes a couple of relevant fields:
+
+ struct scatterlist *sglist;
+ int sglen;
+
+The driver must allocate an appropriately-sized scatterlist array and
+populate it with pointers to the pieces of the allocated buffer; sglen
+should be set to the length of the array.
+
+Drivers using the vmalloc() method need not (and cannot) concern themselves
+with buffer allocation at all; videobuf will handle those details. The
+same is normally true of contiguous-DMA drivers as well; videobuf will
+allocate the buffers (with dma_alloc_coherent()) when it sees fit. That
+means that these drivers may be trying to do high-order allocations at any
+time, an operation which is not always guaranteed to work. Some drivers
+play tricks by allocating DMA space at system boot time; videobuf does not
+currently play well with those drivers.
+
+As of 2.6.31, contiguous-DMA drivers can work with a user-supplied buffer,
+as long as that buffer is physically contiguous. Normal user-space
+allocations will not meet that criterion, but buffers obtained from other
+kernel drivers, or those contained within huge pages, will work with these
+drivers.
+
+Filling the buffers
+
+The final part of a videobuf implementation has no direct callback - it's
+the portion of the code which actually puts frame data into the buffers,
+usually in response to interrupts from the device. For all types of
+drivers, this process works approximately as follows:
+
+ - Obtain the next available buffer and make sure that somebody is actually
+ waiting for it.
+
+ - Get a pointer to the memory and put video data there.
+
+ - Mark the buffer as done and wake up the process waiting for it.
+
+Step (1) above is done by looking at the driver-managed list_head structure
+- the one which is filled in the buf_queue() callback. Because starting
+the engine and enqueueing buffers are done in separate steps, it's possible
+for the engine to be running without any buffers available - in the
+vmalloc() case especially. So the driver should be prepared for the list
+to be empty. It is equally possible that nobody is yet interested in the
+buffer; the driver should not remove it from the list or fill it until a
+process is waiting on it. That test can be done by examining the buffer's
+done field (a wait_queue_head_t structure) with waitqueue_active().
+
+A buffer's state should be set to VIDEOBUF_ACTIVE before being mapped for
+DMA; that ensures that the videobuf layer will not try to do anything with
+it while the device is transferring data.
+
+For scatter/gather drivers, the needed memory pointers will be found in the
+scatterlist structure described above. Drivers using the vmalloc() method
+can get a memory pointer with:
+
+ void *videobuf_to_vmalloc(struct videobuf_buffer *buf);
+
+For contiguous DMA drivers, the function to use is:
+
+ dma_addr_t videobuf_to_dma_contig(struct videobuf_buffer *buf);
+
+The contiguous DMA API goes out of its way to hide the kernel-space address
+of the DMA buffer from drivers.
+
+The final step is to set the size field of the relevant videobuf_buffer
+structure to the actual size of the captured image, set state to
+VIDEOBUF_DONE, then call wake_up() on the done queue. At this point, the
+buffer is owned by the videobuf layer and the driver should not touch it
+again.
+
+Developers who are interested in more information can go into the relevant
+header files; there are a few low-level functions declared there which have
+not been talked about here. Also worthwhile is the vivi driver
+(drivers/media/platform/vivi.c), which is maintained as an example of how V4L2
+drivers should be written. Vivi only uses the vmalloc() API, but it's good
+enough to get started with. Note also that all of these calls are exported
+GPL-only, so they will not be available to non-GPL kernel modules.
diff --git a/Documentation/video4linux/vivid.txt b/Documentation/video4linux/vivid.txt
new file mode 100644
index 000000000..cd4b5a1ac
--- /dev/null
+++ b/Documentation/video4linux/vivid.txt
@@ -0,0 +1,1129 @@
+vivid: Virtual Video Test Driver
+================================
+
+This driver emulates video4linux hardware of various types: video capture, video
+output, vbi capture and output, radio receivers and transmitters and a software
+defined radio receiver. In addition a simple framebuffer device is available for
+testing capture and output overlays.
+
+Up to 64 vivid instances can be created, each with up to 16 inputs and 16 outputs.
+
+Each input can be a webcam, TV capture device, S-Video capture device or an HDMI
+capture device. Each output can be an S-Video output device or an HDMI output
+device.
+
+These inputs and outputs act exactly as a real hardware device would behave. This
+allows you to use this driver as a test input for application development, since
+you can test the various features without requiring special hardware.
+
+This document describes the features implemented by this driver:
+
+- Support for read()/write(), MMAP, USERPTR and DMABUF streaming I/O.
+- A large list of test patterns and variations thereof
+- Working brightness, contrast, saturation and hue controls
+- Support for the alpha color component
+- Full colorspace support, including limited/full RGB range
+- All possible control types are present
+- Support for various pixel aspect ratios and video aspect ratios
+- Error injection to test what happens if errors occur
+- Supports crop/compose/scale in any combination for both input and output
+- Can emulate up to 4K resolutions
+- All Field settings are supported for testing interlaced capturing
+- Supports all standard YUV and RGB formats, including two multiplanar YUV formats
+- Raw and Sliced VBI capture and output support
+- Radio receiver and transmitter support, including RDS support
+- Software defined radio (SDR) support
+- Capture and output overlay support
+
+These features will be described in more detail below.
+
+
+Table of Contents
+-----------------
+
+Section 1: Configuring the driver
+Section 2: Video Capture
+Section 2.1: Webcam Input
+Section 2.2: TV and S-Video Inputs
+Section 2.3: HDMI Input
+Section 3: Video Output
+Section 3.1: S-Video Output
+Section 3.2: HDMI Output
+Section 4: VBI Capture
+Section 5: VBI Output
+Section 6: Radio Receiver
+Section 7: Radio Transmitter
+Section 8: Software Defined Radio Receiver
+Section 9: Controls
+Section 9.1: User Controls - Test Controls
+Section 9.2: User Controls - Video Capture
+Section 9.3: User Controls - Audio
+Section 9.4: Vivid Controls
+Section 9.4.1: Test Pattern Controls
+Section 9.4.2: Capture Feature Selection Controls
+Section 9.4.3: Output Feature Selection Controls
+Section 9.4.4: Error Injection Controls
+Section 9.4.5: VBI Raw Capture Controls
+Section 9.5: Digital Video Controls
+Section 9.6: FM Radio Receiver Controls
+Section 9.7: FM Radio Modulator
+Section 10: Video, VBI and RDS Looping
+Section 10.1: Video and Sliced VBI looping
+Section 10.2: Radio & RDS Looping
+Section 11: Cropping, Composing, Scaling
+Section 12: Formats
+Section 13: Capture Overlay
+Section 14: Output Overlay
+Section 15: Some Future Improvements
+
+
+Section 1: Configuring the driver
+---------------------------------
+
+By default the driver will create a single instance that has a video capture
+device with webcam, TV, S-Video and HDMI inputs, a video output device with
+S-Video and HDMI outputs, one vbi capture device, one vbi output device, one
+radio receiver device, one radio transmitter device and one SDR device.
+
+The number of instances, devices, video inputs and outputs and their types are
+all configurable using the following module options:
+
+n_devs: number of driver instances to create. By default set to 1. Up to 64
+ instances can be created.
+
+node_types: which devices should each driver instance create. An array of
+ hexadecimal values, one for each instance. The default is 0x1d3d.
+ Each value is a bitmask with the following meaning:
+ bit 0: Video Capture node
+ bit 2-3: VBI Capture node: 0 = none, 1 = raw vbi, 2 = sliced vbi, 3 = both
+ bit 4: Radio Receiver node
+ bit 5: Software Defined Radio Receiver node
+ bit 8: Video Output node
+ bit 10-11: VBI Output node: 0 = none, 1 = raw vbi, 2 = sliced vbi, 3 = both
+ bit 12: Radio Transmitter node
+ bit 16: Framebuffer for testing overlays
+
+ So to create four instances, the first two with just one video capture
+ device, the second two with just one video output device you would pass
+ these module options to vivid:
+
+ n_devs=4 node_types=0x1,0x1,0x100,0x100
+
+num_inputs: the number of inputs, one for each instance. By default 4 inputs
+ are created for each video capture device. At most 16 inputs can be created,
+ and there must be at least one.
+
+input_types: the input types for each instance, the default is 0xe4. This defines
+ what the type of each input is when the inputs are created for each driver
+ instance. This is a hexadecimal value with up to 16 pairs of bits, each
+ pair gives the type and bits 0-1 map to input 0, bits 2-3 map to input 1,
+ 30-31 map to input 15. Each pair of bits has the following meaning:
+
+ 00: this is a webcam input
+ 01: this is a TV tuner input
+ 10: this is an S-Video input
+ 11: this is an HDMI input
+
+ So to create a video capture device with 8 inputs where input 0 is a TV
+ tuner, inputs 1-3 are S-Video inputs and inputs 4-7 are HDMI inputs you
+ would use the following module options:
+
+ num_inputs=8 input_types=0xffa9
+
+num_outputs: the number of outputs, one for each instance. By default 2 outputs
+ are created for each video output device. At most 16 outputs can be
+ created, and there must be at least one.
+
+output_types: the output types for each instance, the default is 0x02. This defines
+ what the type of each output is when the outputs are created for each
+ driver instance. This is a hexadecimal value with up to 16 bits, each bit
+ gives the type and bit 0 maps to output 0, bit 1 maps to output 1, bit
+ 15 maps to output 15. The meaning of each bit is as follows:
+
+ 0: this is an S-Video output
+ 1: this is an HDMI output
+
+ So to create a video output device with 8 outputs where outputs 0-3 are
+ S-Video outputs and outputs 4-7 are HDMI outputs you would use the
+ following module options:
+
+ num_outputs=8 output_types=0xf0
+
+vid_cap_nr: give the desired videoX start number for each video capture device.
+ The default is -1 which will just take the first free number. This allows
+ you to map capture video nodes to specific videoX device nodes. Example:
+
+ n_devs=4 vid_cap_nr=2,4,6,8
+
+ This will attempt to assign /dev/video2 for the video capture device of
+ the first vivid instance, video4 for the next up to video8 for the last
+ instance. If it can't succeed, then it will just take the next free
+ number.
+
+vid_out_nr: give the desired videoX start number for each video output device.
+ The default is -1 which will just take the first free number.
+
+vbi_cap_nr: give the desired vbiX start number for each vbi capture device.
+ The default is -1 which will just take the first free number.
+
+vbi_out_nr: give the desired vbiX start number for each vbi output device.
+ The default is -1 which will just take the first free number.
+
+radio_rx_nr: give the desired radioX start number for each radio receiver device.
+ The default is -1 which will just take the first free number.
+
+radio_tx_nr: give the desired radioX start number for each radio transmitter
+ device. The default is -1 which will just take the first free number.
+
+sdr_cap_nr: give the desired swradioX start number for each SDR capture device.
+ The default is -1 which will just take the first free number.
+
+ccs_cap_mode: specify the allowed video capture crop/compose/scaling combination
+ for each driver instance. Video capture devices can have any combination
+ of cropping, composing and scaling capabilities and this will tell the
+ vivid driver which of those is should emulate. By default the user can
+ select this through controls.
+
+ The value is either -1 (controlled by the user) or a set of three bits,
+ each enabling (1) or disabling (0) one of the features:
+
+ bit 0: Enable crop support. Cropping will take only part of the
+ incoming picture.
+ bit 1: Enable compose support. Composing will copy the incoming
+ picture into a larger buffer.
+ bit 2: Enable scaling support. Scaling can scale the incoming
+ picture. The scaler of the vivid driver can enlarge up
+ or down to four times the original size. The scaler is
+ very simple and low-quality. Simplicity and speed were
+ key, not quality.
+
+ Note that this value is ignored by webcam inputs: those enumerate
+ discrete framesizes and that is incompatible with cropping, composing
+ or scaling.
+
+ccs_out_mode: specify the allowed video output crop/compose/scaling combination
+ for each driver instance. Video output devices can have any combination
+ of cropping, composing and scaling capabilities and this will tell the
+ vivid driver which of those is should emulate. By default the user can
+ select this through controls.
+
+ The value is either -1 (controlled by the user) or a set of three bits,
+ each enabling (1) or disabling (0) one of the features:
+
+ bit 0: Enable crop support. Cropping will take only part of the
+ outgoing buffer.
+ bit 1: Enable compose support. Composing will copy the incoming
+ buffer into a larger picture frame.
+ bit 2: Enable scaling support. Scaling can scale the incoming
+ buffer. The scaler of the vivid driver can enlarge up
+ or down to four times the original size. The scaler is
+ very simple and low-quality. Simplicity and speed were
+ key, not quality.
+
+multiplanar: select whether each device instance supports multi-planar formats,
+ and thus the V4L2 multi-planar API. By default device instances are
+ single-planar.
+
+ This module option can override that for each instance. Values are:
+
+ 1: this is a single-planar instance.
+ 2: this is a multi-planar instance.
+
+vivid_debug: enable driver debugging info
+
+no_error_inj: if set disable the error injecting controls. This option is
+ needed in order to run a tool like v4l2-compliance. Tools like that
+ exercise all controls including a control like 'Disconnect' which
+ emulates a USB disconnect, making the device inaccessible and so
+ all tests that v4l2-compliance is doing will fail afterwards.
+
+ There may be other situations as well where you want to disable the
+ error injection support of vivid. When this option is set, then the
+ controls that select crop, compose and scale behavior are also
+ removed. Unless overridden by ccs_cap_mode and/or ccs_out_mode the
+ will default to enabling crop, compose and scaling.
+
+Taken together, all these module options allow you to precisely customize
+the driver behavior and test your application with all sorts of permutations.
+It is also very suitable to emulate hardware that is not yet available, e.g.
+when developing software for a new upcoming device.
+
+
+Section 2: Video Capture
+------------------------
+
+This is probably the most frequently used feature. The video capture device
+can be configured by using the module options num_inputs, input_types and
+ccs_cap_mode (see section 1 for more detailed information), but by default
+four inputs are configured: a webcam, a TV tuner, an S-Video and an HDMI
+input, one input for each input type. Those are described in more detail
+below.
+
+Special attention has been given to the rate at which new frames become
+available. The jitter will be around 1 jiffie (that depends on the HZ
+configuration of your kernel, so usually 1/100, 1/250 or 1/1000 of a second),
+but the long-term behavior is exactly following the framerate. So a
+framerate of 59.94 Hz is really different from 60 Hz. If the framerate
+exceeds your kernel's HZ value, then you will get dropped frames, but the
+frame/field sequence counting will keep track of that so the sequence
+count will skip whenever frames are dropped.
+
+
+Section 2.1: Webcam Input
+-------------------------
+
+The webcam input supports three framesizes: 320x180, 640x360 and 1280x720. It
+supports frames per second settings of 10, 15, 25, 30, 50 and 60 fps. Which ones
+are available depends on the chosen framesize: the larger the framesize, the
+lower the maximum frames per second.
+
+The initially selected colorspace when you switch to the webcam input will be
+sRGB.
+
+
+Section 2.2: TV and S-Video Inputs
+----------------------------------
+
+The only difference between the TV and S-Video input is that the TV has a
+tuner. Otherwise they behave identically.
+
+These inputs support audio inputs as well: one TV and one Line-In. They
+both support all TV standards. If the standard is queried, then the Vivid
+controls 'Standard Signal Mode' and 'Standard' determine what
+the result will be.
+
+These inputs support all combinations of the field setting. Special care has
+been taken to faithfully reproduce how fields are handled for the different
+TV standards. This is particularly noticable when generating a horizontally
+moving image so the temporal effect of using interlaced formats becomes clearly
+visible. For 50 Hz standards the top field is the oldest and the bottom field
+is the newest in time. For 60 Hz standards that is reversed: the bottom field
+is the oldest and the top field is the newest in time.
+
+When you start capturing in V4L2_FIELD_ALTERNATE mode the first buffer will
+contain the top field for 50 Hz standards and the bottom field for 60 Hz
+standards. This is what capture hardware does as well.
+
+Finally, for PAL/SECAM standards the first half of the top line contains noise.
+This simulates the Wide Screen Signal that is commonly placed there.
+
+The initially selected colorspace when you switch to the TV or S-Video input
+will be SMPTE-170M.
+
+The pixel aspect ratio will depend on the TV standard. The video aspect ratio
+can be selected through the 'Standard Aspect Ratio' Vivid control.
+Choices are '4x3', '16x9' which will give letterboxed widescreen video and
+'16x9 Anomorphic' which will give full screen squashed anamorphic widescreen
+video that will need to be scaled accordingly.
+
+The TV 'tuner' supports a frequency range of 44-958 MHz. Channels are available
+every 6 MHz, starting from 49.25 MHz. For each channel the generated image
+will be in color for the +/- 0.25 MHz around it, and in grayscale for
++/- 1 MHz around the channel. Beyond that it is just noise. The VIDIOC_G_TUNER
+ioctl will return 100% signal strength for +/- 0.25 MHz and 50% for +/- 1 MHz.
+It will also return correct afc values to show whether the frequency is too
+low or too high.
+
+The audio subchannels that are returned are MONO for the +/- 1 MHz range around
+a valid channel frequency. When the frequency is within +/- 0.25 MHz of the
+channel it will return either MONO, STEREO, either MONO | SAP (for NTSC) or
+LANG1 | LANG2 (for others), or STEREO | SAP.
+
+Which one is returned depends on the chosen channel, each next valid channel
+will cycle through the possible audio subchannel combinations. This allows
+you to test the various combinations by just switching channels..
+
+Finally, for these inputs the v4l2_timecode struct is filled in in the
+dequeued v4l2_buffer struct.
+
+
+Section 2.3: HDMI Input
+-----------------------
+
+The HDMI inputs supports all CEA-861 and DMT timings, both progressive and
+interlaced, for pixelclock frequencies between 25 and 600 MHz. The field
+mode for interlaced formats is always V4L2_FIELD_ALTERNATE. For HDMI the
+field order is always top field first, and when you start capturing an
+interlaced format you will receive the top field first.
+
+The initially selected colorspace when you switch to the HDMI input or
+select an HDMI timing is based on the format resolution: for resolutions
+less than or equal to 720x576 the colorspace is set to SMPTE-170M, for
+others it is set to REC-709 (CEA-861 timings) or sRGB (VESA DMT timings).
+
+The pixel aspect ratio will depend on the HDMI timing: for 720x480 is it
+set as for the NTSC TV standard, for 720x576 it is set as for the PAL TV
+standard, and for all others a 1:1 pixel aspect ratio is returned.
+
+The video aspect ratio can be selected through the 'DV Timings Aspect Ratio'
+Vivid control. Choices are 'Source Width x Height' (just use the
+same ratio as the chosen format), '4x3' or '16x9', either of which can
+result in pillarboxed or letterboxed video.
+
+For HDMI inputs it is possible to set the EDID. By default a simple EDID
+is provided. You can only set the EDID for HDMI inputs. Internally, however,
+the EDID is shared between all HDMI inputs.
+
+No interpretation is done of the EDID data.
+
+
+Section 3: Video Output
+-----------------------
+
+The video output device can be configured by using the module options
+num_outputs, output_types and ccs_out_mode (see section 1 for more detailed
+information), but by default two outputs are configured: an S-Video and an
+HDMI input, one output for each output type. Those are described in more detail
+below.
+
+Like with video capture the framerate is also exact in the long term.
+
+
+Section 3.1: S-Video Output
+---------------------------
+
+This output supports audio outputs as well: "Line-Out 1" and "Line-Out 2".
+The S-Video output supports all TV standards.
+
+This output supports all combinations of the field setting.
+
+The initially selected colorspace when you switch to the TV or S-Video input
+will be SMPTE-170M.
+
+
+Section 3.2: HDMI Output
+------------------------
+
+The HDMI output supports all CEA-861 and DMT timings, both progressive and
+interlaced, for pixelclock frequencies between 25 and 600 MHz. The field
+mode for interlaced formats is always V4L2_FIELD_ALTERNATE.
+
+The initially selected colorspace when you switch to the HDMI output or
+select an HDMI timing is based on the format resolution: for resolutions
+less than or equal to 720x576 the colorspace is set to SMPTE-170M, for
+others it is set to REC-709 (CEA-861 timings) or sRGB (VESA DMT timings).
+
+The pixel aspect ratio will depend on the HDMI timing: for 720x480 is it
+set as for the NTSC TV standard, for 720x576 it is set as for the PAL TV
+standard, and for all others a 1:1 pixel aspect ratio is returned.
+
+An HDMI output has a valid EDID which can be obtained through VIDIOC_G_EDID.
+
+
+Section 4: VBI Capture
+----------------------
+
+There are three types of VBI capture devices: those that only support raw
+(undecoded) VBI, those that only support sliced (decoded) VBI and those that
+support both. This is determined by the node_types module option. In all
+cases the driver will generate valid VBI data: for 60 Hz standards it will
+generate Closed Caption and XDS data. The closed caption stream will
+alternate between "Hello world!" and "Closed captions test" every second.
+The XDS stream will give the current time once a minute. For 50 Hz standards
+it will generate the Wide Screen Signal which is based on the actual Video
+Aspect Ratio control setting and teletext pages 100-159, one page per frame.
+
+The VBI device will only work for the S-Video and TV inputs, it will give
+back an error if the current input is a webcam or HDMI.
+
+
+Section 5: VBI Output
+---------------------
+
+There are three types of VBI output devices: those that only support raw
+(undecoded) VBI, those that only support sliced (decoded) VBI and those that
+support both. This is determined by the node_types module option.
+
+The sliced VBI output supports the Wide Screen Signal and the teletext signal
+for 50 Hz standards and Closed Captioning + XDS for 60 Hz standards.
+
+The VBI device will only work for the S-Video output, it will give
+back an error if the current output is HDMI.
+
+
+Section 6: Radio Receiver
+-------------------------
+
+The radio receiver emulates an FM/AM/SW receiver. The FM band also supports RDS.
+The frequency ranges are:
+
+ FM: 64 MHz - 108 MHz
+ AM: 520 kHz - 1710 kHz
+ SW: 2300 kHz - 26.1 MHz
+
+Valid channels are emulated every 1 MHz for FM and every 100 kHz for AM and SW.
+The signal strength decreases the further the frequency is from the valid
+frequency until it becomes 0% at +/- 50 kHz (FM) or 5 kHz (AM/SW) from the
+ideal frequency. The initial frequency when the driver is loaded is set to
+95 MHz.
+
+The FM receiver supports RDS as well, both using 'Block I/O' and 'Controls'
+modes. In the 'Controls' mode the RDS information is stored in read-only
+controls. These controls are updated every time the frequency is changed,
+or when the tuner status is requested. The Block I/O method uses the read()
+interface to pass the RDS blocks on to the application for decoding.
+
+The RDS signal is 'detected' for +/- 12.5 kHz around the channel frequency,
+and the further the frequency is away from the valid frequency the more RDS
+errors are randomly introduced into the block I/O stream, up to 50% of all
+blocks if you are +/- 12.5 kHz from the channel frequency. All four errors
+can occur in equal proportions: blocks marked 'CORRECTED', blocks marked
+'ERROR', blocks marked 'INVALID' and dropped blocks.
+
+The generated RDS stream contains all the standard fields contained in a
+0B group, and also radio text and the current time.
+
+The receiver supports HW frequency seek, either in Bounded mode, Wrap Around
+mode or both, which is configurable with the "Radio HW Seek Mode" control.
+
+
+Section 7: Radio Transmitter
+----------------------------
+
+The radio transmitter emulates an FM/AM/SW transmitter. The FM band also supports RDS.
+The frequency ranges are:
+
+ FM: 64 MHz - 108 MHz
+ AM: 520 kHz - 1710 kHz
+ SW: 2300 kHz - 26.1 MHz
+
+The initial frequency when the driver is loaded is 95.5 MHz.
+
+The FM transmitter supports RDS as well, both using 'Block I/O' and 'Controls'
+modes. In the 'Controls' mode the transmitted RDS information is configured
+using controls, and in 'Block I/O' mode the blocks are passed to the driver
+using write().
+
+
+Section 8: Software Defined Radio Receiver
+------------------------------------------
+
+The SDR receiver has three frequency bands for the ADC tuner:
+
+ - 300 kHz
+ - 900 kHz - 2800 kHz
+ - 3200 kHz
+
+The RF tuner supports 50 MHz - 2000 MHz.
+
+The generated data contains the In-phase and Quadrature components of a
+1 kHz tone that has an amplitude of sqrt(2).
+
+
+Section 9: Controls
+-------------------
+
+Different devices support different controls. The sections below will describe
+each control and which devices support them.
+
+
+Section 9.1: User Controls - Test Controls
+------------------------------------------
+
+The Button, Boolean, Integer 32 Bits, Integer 64 Bits, Menu, String, Bitmask and
+Integer Menu are controls that represent all possible control types. The Menu
+control and the Integer Menu control both have 'holes' in their menu list,
+meaning that one or more menu items return EINVAL when VIDIOC_QUERYMENU is called.
+Both menu controls also have a non-zero minimum control value. These features
+allow you to check if your application can handle such things correctly.
+These controls are supported for every device type.
+
+
+Section 9.2: User Controls - Video Capture
+------------------------------------------
+
+The following controls are specific to video capture.
+
+The Brightness, Contrast, Saturation and Hue controls actually work and are
+standard. There is one special feature with the Brightness control: each
+video input has its own brightness value, so changing input will restore
+the brightness for that input. In addition, each video input uses a different
+brightness range (minimum and maximum control values). Switching inputs will
+cause a control event to be sent with the V4L2_EVENT_CTRL_CH_RANGE flag set.
+This allows you to test controls that can change their range.
+
+The 'Gain, Automatic' and Gain controls can be used to test volatile controls:
+if 'Gain, Automatic' is set, then the Gain control is volatile and changes
+constantly. If 'Gain, Automatic' is cleared, then the Gain control is a normal
+control.
+
+The 'Horizontal Flip' and 'Vertical Flip' controls can be used to flip the
+image. These combine with the 'Sensor Flipped Horizontally/Vertically' Vivid
+controls.
+
+The 'Alpha Component' control can be used to set the alpha component for
+formats containing an alpha channel.
+
+
+Section 9.3: User Controls - Audio
+----------------------------------
+
+The following controls are specific to video capture and output and radio
+receivers and transmitters.
+
+The 'Volume' and 'Mute' audio controls are typical for such devices to
+control the volume and mute the audio. They don't actually do anything in
+the vivid driver.
+
+
+Section 9.4: Vivid Controls
+---------------------------
+
+These vivid custom controls control the image generation, error injection, etc.
+
+
+Section 9.4.1: Test Pattern Controls
+------------------------------------
+
+The Test Pattern Controls are all specific to video capture.
+
+Test Pattern: selects which test pattern to use. Use the CSC Colorbar for
+ testing colorspace conversions: the colors used in that test pattern
+ map to valid colors in all colorspaces. The colorspace conversion
+ is disabled for the other test patterns.
+
+OSD Text Mode: selects whether the text superimposed on the
+ test pattern should be shown, and if so, whether only counters should
+ be displayed or the full text.
+
+Horizontal Movement: selects whether the test pattern should
+ move to the left or right and at what speed.
+
+Vertical Movement: does the same for the vertical direction.
+
+Show Border: show a two-pixel wide border at the edge of the actual image,
+ excluding letter or pillarboxing.
+
+Show Square: show a square in the middle of the image. If the image is
+ displayed with the correct pixel and image aspect ratio corrections,
+ then the width and height of the square on the monitor should be
+ the same.
+
+Insert SAV Code in Image: adds a SAV (Start of Active Video) code to the image.
+ This can be used to check if such codes in the image are inadvertently
+ interpreted instead of being ignored.
+
+Insert EAV Code in Image: does the same for the EAV (End of Active Video) code.
+
+
+Section 9.4.2: Capture Feature Selection Controls
+-------------------------------------------------
+
+These controls are all specific to video capture.
+
+Sensor Flipped Horizontally: the image is flipped horizontally and the
+ V4L2_IN_ST_HFLIP input status flag is set. This emulates the case where
+ a sensor is for example mounted upside down.
+
+Sensor Flipped Vertically: the image is flipped vertically and the
+ V4L2_IN_ST_VFLIP input status flag is set. This emulates the case where
+ a sensor is for example mounted upside down.
+
+Standard Aspect Ratio: selects if the image aspect ratio as used for the TV or
+ S-Video input should be 4x3, 16x9 or anamorphic widescreen. This may
+ introduce letterboxing.
+
+DV Timings Aspect Ratio: selects if the image aspect ratio as used for the HDMI
+ input should be the same as the source width and height ratio, or if
+ it should be 4x3 or 16x9. This may introduce letter or pillarboxing.
+
+Timestamp Source: selects when the timestamp for each buffer is taken.
+
+Colorspace: selects which colorspace should be used when generating the image.
+ This only applies if the CSC Colorbar test pattern is selected,
+ otherwise the test pattern will go through unconverted (except for
+ the so-called 'Transfer Function' corrections and the R'G'B' to Y'CbCr
+ conversion). This behavior is also what you want, since a 75% Colorbar
+ should really have 75% signal intensity and should not be affected
+ by colorspace conversions.
+
+ Changing the colorspace will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates a detected colorspace change.
+
+Y'CbCr Encoding: selects which Y'CbCr encoding should be used when generating
+ a Y'CbCr image. This only applies if the CSC Colorbar test pattern is
+ selected, and if the format is set to a Y'CbCr format as opposed to an
+ RGB format.
+
+ Changing the Y'CbCr encoding will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates a detected colorspace change.
+
+Quantization: selects which quantization should be used for the RGB or Y'CbCr
+ encoding when generating the test pattern. This only applies if the CSC
+ Colorbar test pattern is selected.
+
+ Changing the quantization will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates a detected colorspace change.
+
+Limited RGB Range (16-235): selects if the RGB range of the HDMI source should
+ be limited or full range. This combines with the Digital Video 'Rx RGB
+ Quantization Range' control and can be used to test what happens if
+ a source provides you with the wrong quantization range information.
+ See the description of that control for more details.
+
+Apply Alpha To Red Only: apply the alpha channel as set by the 'Alpha Component'
+ user control to the red color of the test pattern only.
+
+Enable Capture Cropping: enables crop support. This control is only present if
+ the ccs_cap_mode module option is set to the default value of -1 and if
+ the no_error_inj module option is set to 0 (the default).
+
+Enable Capture Composing: enables composing support. This control is only
+ present if the ccs_cap_mode module option is set to the default value of
+ -1 and if the no_error_inj module option is set to 0 (the default).
+
+Enable Capture Scaler: enables support for a scaler (maximum 4 times upscaling
+ and downscaling). This control is only present if the ccs_cap_mode
+ module option is set to the default value of -1 and if the no_error_inj
+ module option is set to 0 (the default).
+
+Maximum EDID Blocks: determines how many EDID blocks the driver supports.
+ Note that the vivid driver does not actually interpret new EDID
+ data, it just stores it. It allows for up to 256 EDID blocks
+ which is the maximum supported by the standard.
+
+Fill Percentage of Frame: can be used to draw only the top X percent
+ of the image. Since each frame has to be drawn by the driver, this
+ demands a lot of the CPU. For large resolutions this becomes
+ problematic. By drawing only part of the image this CPU load can
+ be reduced.
+
+
+Section 9.4.3: Output Feature Selection Controls
+------------------------------------------------
+
+These controls are all specific to video output.
+
+Enable Output Cropping: enables crop support. This control is only present if
+ the ccs_out_mode module option is set to the default value of -1 and if
+ the no_error_inj module option is set to 0 (the default).
+
+Enable Output Composing: enables composing support. This control is only
+ present if the ccs_out_mode module option is set to the default value of
+ -1 and if the no_error_inj module option is set to 0 (the default).
+
+Enable Output Scaler: enables support for a scaler (maximum 4 times upscaling
+ and downscaling). This control is only present if the ccs_out_mode
+ module option is set to the default value of -1 and if the no_error_inj
+ module option is set to 0 (the default).
+
+
+Section 9.4.4: Error Injection Controls
+---------------------------------------
+
+The following two controls are only valid for video and vbi capture.
+
+Standard Signal Mode: selects the behavior of VIDIOC_QUERYSTD: what should
+ it return?
+
+ Changing this control will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates a changed input condition (e.g. a cable
+ was plugged in or out).
+
+Standard: selects the standard that VIDIOC_QUERYSTD should return if the
+ previous control is set to "Selected Standard".
+
+ Changing this control will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates a changed input standard.
+
+
+The following two controls are only valid for video capture.
+
+DV Timings Signal Mode: selects the behavior of VIDIOC_QUERY_DV_TIMINGS: what
+ should it return?
+
+ Changing this control will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates a changed input condition (e.g. a cable
+ was plugged in or out).
+
+DV Timings: selects the timings the VIDIOC_QUERY_DV_TIMINGS should return
+ if the previous control is set to "Selected DV Timings".
+
+ Changing this control will result in the V4L2_EVENT_SOURCE_CHANGE
+ to be sent since it emulates changed input timings.
+
+
+The following controls are only present if the no_error_inj module option
+is set to 0 (the default). These controls are valid for video and vbi
+capture and output streams and for the SDR capture device except for the
+Disconnect control which is valid for all devices.
+
+Wrap Sequence Number: test what happens when you wrap the sequence number in
+ struct v4l2_buffer around.
+
+Wrap Timestamp: test what happens when you wrap the timestamp in struct
+ v4l2_buffer around.
+
+Percentage of Dropped Buffers: sets the percentage of buffers that
+ are never returned by the driver (i.e., they are dropped).
+
+Disconnect: emulates a USB disconnect. The device will act as if it has
+ been disconnected. Only after all open filehandles to the device
+ node have been closed will the device become 'connected' again.
+
+Inject V4L2_BUF_FLAG_ERROR: when pressed, the next frame returned by
+ the driver will have the error flag set (i.e. the frame is marked
+ corrupt).
+
+Inject VIDIOC_REQBUFS Error: when pressed, the next REQBUFS or CREATE_BUFS
+ ioctl call will fail with an error. To be precise: the videobuf2
+ queue_setup() op will return -EINVAL.
+
+Inject VIDIOC_QBUF Error: when pressed, the next VIDIOC_QBUF or
+ VIDIOC_PREPARE_BUFFER ioctl call will fail with an error. To be
+ precise: the videobuf2 buf_prepare() op will return -EINVAL.
+
+Inject VIDIOC_STREAMON Error: when pressed, the next VIDIOC_STREAMON ioctl
+ call will fail with an error. To be precise: the videobuf2
+ start_streaming() op will return -EINVAL.
+
+Inject Fatal Streaming Error: when pressed, the streaming core will be
+ marked as having suffered a fatal error, the only way to recover
+ from that is to stop streaming. To be precise: the videobuf2
+ vb2_queue_error() function is called.
+
+
+Section 9.4.5: VBI Raw Capture Controls
+---------------------------------------
+
+Interlaced VBI Format: if set, then the raw VBI data will be interlaced instead
+ of providing it grouped by field.
+
+
+Section 9.5: Digital Video Controls
+-----------------------------------
+
+Rx RGB Quantization Range: sets the RGB quantization detection of the HDMI
+ input. This combines with the Vivid 'Limited RGB Range (16-235)'
+ control and can be used to test what happens if a source provides
+ you with the wrong quantization range information. This can be tested
+ by selecting an HDMI input, setting this control to Full or Limited
+ range and selecting the opposite in the 'Limited RGB Range (16-235)'
+ control. The effect is easy to see if the 'Gray Ramp' test pattern
+ is selected.
+
+Tx RGB Quantization Range: sets the RGB quantization detection of the HDMI
+ output. It is currently not used for anything in vivid, but most HDMI
+ transmitters would typically have this control.
+
+Transmit Mode: sets the transmit mode of the HDMI output to HDMI or DVI-D. This
+ affects the reported colorspace since DVI_D outputs will always use
+ sRGB.
+
+
+Section 9.6: FM Radio Receiver Controls
+---------------------------------------
+
+RDS Reception: set if the RDS receiver should be enabled.
+
+RDS Program Type:
+RDS PS Name:
+RDS Radio Text:
+RDS Traffic Announcement:
+RDS Traffic Program:
+RDS Music: these are all read-only controls. If RDS Rx I/O Mode is set to
+ "Block I/O", then they are inactive as well. If RDS Rx I/O Mode is set
+ to "Controls", then these controls report the received RDS data. Note
+ that the vivid implementation of this is pretty basic: they are only
+ updated when you set a new frequency or when you get the tuner status
+ (VIDIOC_G_TUNER).
+
+Radio HW Seek Mode: can be one of "Bounded", "Wrap Around" or "Both". This
+ determines if VIDIOC_S_HW_FREQ_SEEK will be bounded by the frequency
+ range or wrap-around or if it is selectable by the user.
+
+Radio Programmable HW Seek: if set, then the user can provide the lower and
+ upper bound of the HW Seek. Otherwise the frequency range boundaries
+ will be used.
+
+Generate RBDS Instead of RDS: if set, then generate RBDS (the US variant of
+ RDS) data instead of RDS (European-style RDS). This affects only the
+ PICODE and PTY codes.
+
+RDS Rx I/O Mode: this can be "Block I/O" where the RDS blocks have to be read()
+ by the application, or "Controls" where the RDS data is provided by
+ the RDS controls mentioned above.
+
+
+Section 9.7: FM Radio Modulator Controls
+----------------------------------------
+
+RDS Program ID:
+RDS Program Type:
+RDS PS Name:
+RDS Radio Text:
+RDS Stereo:
+RDS Artificial Head:
+RDS Compressed:
+RDS Dymanic PTY:
+RDS Traffic Announcement:
+RDS Traffic Program:
+RDS Music: these are all controls that set the RDS data that is transmitted by
+ the FM modulator.
+
+RDS Tx I/O Mode: this can be "Block I/O" where the application has to use write()
+ to pass the RDS blocks to the driver, or "Controls" where the RDS data is
+ provided by the RDS controls mentioned above.
+
+
+Section 10: Video, VBI and RDS Looping
+--------------------------------------
+
+The vivid driver supports looping of video output to video input, VBI output
+to VBI input and RDS output to RDS input. For video/VBI looping this emulates
+as if a cable was hooked up between the output and input connector. So video
+and VBI looping is only supported between S-Video and HDMI inputs and outputs.
+VBI is only valid for S-Video as it makes no sense for HDMI.
+
+Since radio is wireless this looping always happens if the radio receiver
+frequency is close to the radio transmitter frequency. In that case the radio
+transmitter will 'override' the emulated radio stations.
+
+Looping is currently supported only between devices created by the same
+vivid driver instance.
+
+
+Section 10.1: Video and Sliced VBI looping
+------------------------------------------
+
+The way to enable video/VBI looping is currently fairly crude. A 'Loop Video'
+control is available in the "Vivid" control class of the video
+output and VBI output devices. When checked the video looping will be enabled.
+Once enabled any video S-Video or HDMI input will show a static test pattern
+until the video output has started. At that time the video output will be
+looped to the video input provided that:
+
+- the input type matches the output type. So the HDMI input cannot receive
+ video from the S-Video output.
+
+- the video resolution of the video input must match that of the video output.
+ So it is not possible to loop a 50 Hz (720x576) S-Video output to a 60 Hz
+ (720x480) S-Video input, or a 720p60 HDMI output to a 1080p30 input.
+
+- the pixel formats must be identical on both sides. Otherwise the driver would
+ have to do pixel format conversion as well, and that's taking things too far.
+
+- the field settings must be identical on both sides. Same reason as above:
+ requiring the driver to convert from one field format to another complicated
+ matters too much. This also prohibits capturing with 'Field Top' or 'Field
+ Bottom' when the output video is set to 'Field Alternate'. This combination,
+ while legal, became too complicated to support. Both sides have to be 'Field
+ Alternate' for this to work. Also note that for this specific case the
+ sequence and field counting in struct v4l2_buffer on the capture side may not
+ be 100% accurate.
+
+- field settings V4L2_FIELD_SEQ_TB/BT are not supported. While it is possible to
+ implement this, it would mean a lot of work to get this right. Since these
+ field values are rarely used the decision was made not to implement this for
+ now.
+
+- on the input side the "Standard Signal Mode" for the S-Video input or the
+ "DV Timings Signal Mode" for the HDMI input should be configured so that a
+ valid signal is passed to the video input.
+
+The framerates do not have to match, although this might change in the future.
+
+By default you will see the OSD text superimposed on top of the looped video.
+This can be turned off by changing the "OSD Text Mode" control of the video
+capture device.
+
+For VBI looping to work all of the above must be valid and in addition the vbi
+output must be configured for sliced VBI. The VBI capture side can be configured
+for either raw or sliced VBI. Note that at the moment only CC/XDS (60 Hz formats)
+and WSS (50 Hz formats) VBI data is looped. Teletext VBI data is not looped.
+
+
+Section 10.2: Radio & RDS Looping
+---------------------------------
+
+As mentioned in section 6 the radio receiver emulates stations are regular
+frequency intervals. Depending on the frequency of the radio receiver a
+signal strength value is calculated (this is returned by VIDIOC_G_TUNER).
+However, it will also look at the frequency set by the radio transmitter and
+if that results in a higher signal strength than the settings of the radio
+transmitter will be used as if it was a valid station. This also includes
+the RDS data (if any) that the transmitter 'transmits'. This is received
+faithfully on the receiver side. Note that when the driver is loaded the
+frequencies of the radio receiver and transmitter are not identical, so
+initially no looping takes place.
+
+
+Section 11: Cropping, Composing, Scaling
+----------------------------------------
+
+This driver supports cropping, composing and scaling in any combination. Normally
+which features are supported can be selected through the Vivid controls,
+but it is also possible to hardcode it when the module is loaded through the
+ccs_cap_mode and ccs_out_mode module options. See section 1 on the details of
+these module options.
+
+This allows you to test your application for all these variations.
+
+Note that the webcam input never supports cropping, composing or scaling. That
+only applies to the TV/S-Video/HDMI inputs and outputs. The reason is that
+webcams, including this virtual implementation, normally use
+VIDIOC_ENUM_FRAMESIZES to list a set of discrete framesizes that it supports.
+And that does not combine with cropping, composing or scaling. This is
+primarily a limitation of the V4L2 API which is carefully reproduced here.
+
+The minimum and maximum resolutions that the scaler can achieve are 16x16 and
+(4096 * 4) x (2160 x 4), but it can only scale up or down by a factor of 4 or
+less. So for a source resolution of 1280x720 the minimum the scaler can do is
+320x180 and the maximum is 5120x2880. You can play around with this using the
+qv4l2 test tool and you will see these dependencies.
+
+This driver also supports larger 'bytesperline' settings, something that
+VIDIOC_S_FMT allows but that few drivers implement.
+
+The scaler is a simple scaler that uses the Coarse Bresenham algorithm. It's
+designed for speed and simplicity, not quality.
+
+If the combination of crop, compose and scaling allows it, then it is possible
+to change crop and compose rectangles on the fly.
+
+
+Section 12: Formats
+-------------------
+
+The driver supports all the regular packed YUYV formats, 16, 24 and 32 RGB
+packed formats and two multiplanar formats (one luma and one chroma plane).
+
+The alpha component can be set through the 'Alpha Component' User control
+for those formats that support it. If the 'Apply Alpha To Red Only' control
+is set, then the alpha component is only used for the color red and set to
+0 otherwise.
+
+The driver has to be configured to support the multiplanar formats. By default
+the driver instances are single-planar. This can be changed by setting the
+multiplanar module option, see section 1 for more details on that option.
+
+If the driver instance is using the multiplanar formats/API, then the first
+single planar format (YUYV) and the multiplanar NV16M and NV61M formats the
+will have a plane that has a non-zero data_offset of 128 bytes. It is rare for
+data_offset to be non-zero, so this is a useful feature for testing applications.
+
+Video output will also honor any data_offset that the application set.
+
+
+Section 13: Capture Overlay
+---------------------------
+
+Note: capture overlay support is implemented primarily to test the existing
+V4L2 capture overlay API. In practice few if any GPUs support such overlays
+anymore, and neither are they generally needed anymore since modern hardware
+is so much more capable. By setting flag 0x10000 in the node_types module
+option the vivid driver will create a simple framebuffer device that can be
+used for testing this API. Whether this API should be used for new drivers is
+questionable.
+
+This driver has support for a destructive capture overlay with bitmap clipping
+and list clipping (up to 16 rectangles) capabilities. Overlays are not
+supported for multiplanar formats. It also honors the struct v4l2_window field
+setting: if it is set to FIELD_TOP or FIELD_BOTTOM and the capture setting is
+FIELD_ALTERNATE, then only the top or bottom fields will be copied to the overlay.
+
+The overlay only works if you are also capturing at that same time. This is a
+vivid limitation since it copies from a buffer to the overlay instead of
+filling the overlay directly. And if you are not capturing, then no buffers
+are available to fill.
+
+In addition, the pixelformat of the capture format and that of the framebuffer
+must be the same for the overlay to work. Otherwise VIDIOC_OVERLAY will return
+an error.
+
+In order to really see what it going on you will need to create two vivid
+instances: the first with a framebuffer enabled. You configure the capture
+overlay of the second instance to use the framebuffer of the first, then
+you start capturing in the second instance. For the first instance you setup
+the output overlay for the video output, turn on video looping and capture
+to see the blended framebuffer overlay that's being written to by the second
+instance. This setup would require the following commands:
+
+ $ sudo modprobe vivid n_devs=2 node_types=0x10101,0x1
+ $ v4l2-ctl -d1 --find-fb
+ /dev/fb1 is the framebuffer associated with base address 0x12800000
+ $ sudo v4l2-ctl -d2 --set-fbuf fb=1
+ $ v4l2-ctl -d1 --set-fbuf fb=1
+ $ v4l2-ctl -d0 --set-fmt-video=pixelformat='AR15'
+ $ v4l2-ctl -d1 --set-fmt-video-out=pixelformat='AR15'
+ $ v4l2-ctl -d2 --set-fmt-video=pixelformat='AR15'
+ $ v4l2-ctl -d0 -i2
+ $ v4l2-ctl -d2 -i2
+ $ v4l2-ctl -d2 -c horizontal_movement=4
+ $ v4l2-ctl -d1 --overlay=1
+ $ v4l2-ctl -d1 -c loop_video=1
+ $ v4l2-ctl -d2 --stream-mmap --overlay=1
+
+And from another console:
+
+ $ v4l2-ctl -d1 --stream-out-mmap
+
+And yet another console:
+
+ $ qv4l2
+
+and start streaming.
+
+As you can see, this is not for the faint of heart...
+
+
+Section 14: Output Overlay
+--------------------------
+
+Note: output overlays are primarily implemented in order to test the existing
+V4L2 output overlay API. Whether this API should be used for new drivers is
+questionable.
+
+This driver has support for an output overlay and is capable of:
+
+ - bitmap clipping,
+ - list clipping (up to 16 rectangles)
+ - chromakey
+ - source chromakey
+ - global alpha
+ - local alpha
+ - local inverse alpha
+
+Output overlays are not supported for multiplanar formats. In addition, the
+pixelformat of the capture format and that of the framebuffer must be the
+same for the overlay to work. Otherwise VIDIOC_OVERLAY will return an error.
+
+Output overlays only work if the driver has been configured to create a
+framebuffer by setting flag 0x10000 in the node_types module option. The
+created framebuffer has a size of 720x576 and supports ARGB 1:5:5:5 and
+RGB 5:6:5.
+
+In order to see the effects of the various clipping, chromakeying or alpha
+processing capabilities you need to turn on video looping and see the results
+on the capture side. The use of the clipping, chromakeying or alpha processing
+capabilities will slow down the video loop considerably as a lot of checks have
+to be done per pixel.
+
+
+Section 15: Some Future Improvements
+------------------------------------
+
+Just as a reminder and in no particular order:
+
+- Add a virtual alsa driver to test audio
+- Add virtual sub-devices and media controller support
+- Some support for testing compressed video
+- Add support to loop raw VBI output to raw VBI input
+- Add support to loop teletext sliced VBI output to VBI input
+- Fix sequence/field numbering when looping of video with alternate fields
+- Add support for V4L2_CID_BG_COLOR for video outputs
+- Add ARGB888 overlay support: better testing of the alpha channel
+- Add custom DV timings support
+- Add support for V4L2_DV_FL_REDUCED_FPS
+- Improve pixel aspect support in the tpg code by passing a real v4l2_fract
+- Use per-queue locks and/or per-device locks to improve throughput
+- Add support to loop from a specific output to a specific input across
+ vivid instances
+- Add support for VIDIOC_EXPBUF once support for that has been added to vb2
+- The SDR radio should use the same 'frequencies' for stations as the normal
+ radio receiver, and give back noise if the frequency doesn't match up with
+ a station frequency
+- Improve the sine generation of the SDR radio.
+- Make a thread for the RDS generation, that would help in particular for the
+ "Controls" RDS Rx I/O Mode as the read-only RDS controls could be updated
+ in real-time.
diff --git a/Documentation/video4linux/zr364xx.txt b/Documentation/video4linux/zr364xx.txt
new file mode 100644
index 000000000..d98e4d302
--- /dev/null
+++ b/Documentation/video4linux/zr364xx.txt
@@ -0,0 +1,69 @@
+Zoran 364xx based USB webcam module version 0.72
+site: http://royale.zerezo.com/zr364xx/
+mail: royale@zerezo.com
+
+introduction:
+This brings support under Linux for the Aiptek PocketDV 3300 in webcam mode.
+If you just want to get on your PC the pictures and movies on the camera, you should use the usb-storage module instead.
+The driver works with several other cameras in webcam mode (see the list below).
+Maybe this code can work for other JPEG/USB cams based on the Coach chips from Zoran?
+Possible chipsets are : ZR36430 (ZR36430BGC) and maybe ZR36431, ZR36440, ZR36442...
+You can try the experience changing the vendor/product ID values (look at the source code).
+You can get these values by looking at /var/log/messages when you plug your camera, or by typing : cat /proc/bus/usb/devices.
+If you manage to use your cam with this code, you can send me a mail (royale@zerezo.com) with the name of your cam and a patch if needed.
+This is a beta release of the driver.
+Since version 0.70, this driver is only compatible with V4L2 API and 2.6.x kernels.
+If you need V4L1 or 2.4x kernels support, please use an older version, but the code is not maintained anymore.
+Good luck!
+
+install:
+In order to use this driver, you must compile it with your kernel.
+Location: Device Drivers -> Multimedia devices -> Video For Linux -> Video Capture Adapters -> V4L USB devices
+
+usage:
+modprobe zr364xx debug=X mode=Y
+ - debug : set to 1 to enable verbose debug messages
+ - mode : 0 = 320x240, 1 = 160x120, 2 = 640x480
+You can then use the camera with V4L2 compatible applications, for example Ekiga.
+To capture a single image, try this: dd if=/dev/video0 of=test.jpg bs=1M count=1
+
+links :
+http://mxhaard.free.fr/ (support for many others cams including some Aiptek PocketDV)
+http://www.harmwal.nl/pccam880/ (this project also supports cameras based on this chipset)
+
+supported devices:
+------ ------- ----------- -----
+Vendor Product Distributor Model
+------ ------- ----------- -----
+0x08ca 0x0109 Aiptek PocketDV 3300
+0x08ca 0x0109 Maxell Maxcam PRO DV3
+0x041e 0x4024 Creative PC-CAM 880
+0x0d64 0x0108 Aiptek Fidelity 3200
+0x0d64 0x0108 Praktica DCZ 1.3 S
+0x0d64 0x0108 Genius Digital Camera (?)
+0x0d64 0x0108 DXG Technology Fashion Cam
+0x0546 0x3187 Polaroid iON 230
+0x0d64 0x3108 Praktica Exakta DC 2200
+0x0d64 0x3108 Genius G-Shot D211
+0x0595 0x4343 Concord Eye-Q Duo 1300
+0x0595 0x4343 Concord Eye-Q Duo 2000
+0x0595 0x4343 Fujifilm EX-10
+0x0595 0x4343 Ricoh RDC-6000
+0x0595 0x4343 Digitrex DSC 1300
+0x0595 0x4343 Firstline FDC 2000
+0x0bb0 0x500d Concord EyeQ Go Wireless
+0x0feb 0x2004 CRS Electronic 3.3 Digital Camera
+0x0feb 0x2004 Packard Bell DSC-300
+0x055f 0xb500 Mustek MDC 3000
+0x08ca 0x2062 Aiptek PocketDV 5700
+0x052b 0x1a18 Chiphead Megapix V12
+0x04c8 0x0729 Konica Revio 2
+0x04f2 0xa208 Creative PC-CAM 850
+0x0784 0x0040 Traveler Slimline X5
+0x06d6 0x0034 Trust Powerc@m 750
+0x0a17 0x0062 Pentax Optio 50L
+0x06d6 0x003b Trust Powerc@m 970Z
+0x0a17 0x004e Pentax Optio 50
+0x041e 0x405d Creative DiVi CAM 516
+0x08ca 0x2102 Aiptek DV T300
+0x06d6 0x003d Trust Powerc@m 910Z
diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX
new file mode 100644
index 000000000..af0d23968
--- /dev/null
+++ b/Documentation/virtual/00-INDEX
@@ -0,0 +1,11 @@
+Virtualization support in the Linux kernel.
+
+00-INDEX
+ - this file.
+
+paravirt_ops.txt
+ - Describes the Linux kernel pv_ops to support different hypervisors
+kvm/
+ - Kernel Virtual Machine. See also http://linux-kvm.org
+uml/
+ - User Mode Linux, builds/runs Linux kernel as a userspace program.
diff --git a/Documentation/virtual/kvm/00-INDEX b/Documentation/virtual/kvm/00-INDEX
new file mode 100644
index 000000000..fee9f2bf9
--- /dev/null
+++ b/Documentation/virtual/kvm/00-INDEX
@@ -0,0 +1,26 @@
+00-INDEX
+ - this file.
+api.txt
+ - KVM userspace API.
+cpuid.txt
+ - KVM-specific cpuid leaves (x86).
+devices/
+ - KVM_CAP_DEVICE_CTRL userspace API.
+hypercalls.txt
+ - KVM hypercalls.
+locking.txt
+ - notes on KVM locks.
+mmu.txt
+ - the x86 kvm shadow mmu.
+msr.txt
+ - KVM-specific MSRs (x86).
+nested-vmx.txt
+ - notes on nested virtualization for Intel x86 processors.
+ppc-pv.txt
+ - the paravirtualization interface on PowerPC.
+review-checklist.txt
+ - review checklist for KVM patches.
+s390-diag.txt
+ - Diagnose hypercall description (for IBM S/390)
+timekeeping.txt
+ - timekeeping virtualization for x86-based architectures.
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
new file mode 100644
index 000000000..9fa2bf8c3
--- /dev/null
+++ b/Documentation/virtual/kvm/api.txt
@@ -0,0 +1,3592 @@
+The Definitive KVM (Kernel-based Virtual Machine) API Documentation
+===================================================================
+
+1. General description
+----------------------
+
+The kvm API is a set of ioctls that are issued to control various aspects
+of a virtual machine. The ioctls belong to three classes
+
+ - System ioctls: These query and set global attributes which affect the
+ whole kvm subsystem. In addition a system ioctl is used to create
+ virtual machines
+
+ - VM ioctls: These query and set attributes that affect an entire virtual
+ machine, for example memory layout. In addition a VM ioctl is used to
+ create virtual cpus (vcpus).
+
+ Only run VM ioctls from the same process (address space) that was used
+ to create the VM.
+
+ - vcpu ioctls: These query and set attributes that control the operation
+ of a single virtual cpu.
+
+ Only run vcpu ioctls from the same thread that was used to create the
+ vcpu.
+
+
+2. File descriptors
+-------------------
+
+The kvm API is centered around file descriptors. An initial
+open("/dev/kvm") obtains a handle to the kvm subsystem; this handle
+can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this
+handle will create a VM file descriptor which can be used to issue VM
+ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu
+and return a file descriptor pointing to it. Finally, ioctls on a vcpu
+fd can be used to control the vcpu, including the important task of
+actually running guest code.
+
+In general file descriptors can be migrated among processes by means
+of fork() and the SCM_RIGHTS facility of unix domain socket. These
+kinds of tricks are explicitly not supported by kvm. While they will
+not cause harm to the host, their actual behavior is not guaranteed by
+the API. The only supported use is one virtual machine per process,
+and one vcpu per thread.
+
+
+3. Extensions
+-------------
+
+As of Linux 2.6.22, the KVM ABI has been stabilized: no backward
+incompatible change are allowed. However, there is an extension
+facility that allows backward-compatible extensions to the API to be
+queried and used.
+
+The extension mechanism is not based on the Linux version number.
+Instead, kvm defines extension identifiers and a facility to query
+whether a particular extension identifier is available. If it is, a
+set of ioctls is available for application use.
+
+
+4. API description
+------------------
+
+This section describes ioctls that can be used to control kvm guests.
+For each ioctl, the following information is provided along with a
+description:
+
+ Capability: which KVM extension provides this ioctl. Can be 'basic',
+ which means that is will be provided by any kernel that supports
+ API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
+ means availability needs to be checked with KVM_CHECK_EXTENSION
+ (see section 4.4), or 'none' which means that while not all kernels
+ support this ioctl, there's no capability bit to check its
+ availability: for kernels that don't support the ioctl,
+ the ioctl returns -ENOTTY.
+
+ Architectures: which instruction set architectures provide this ioctl.
+ x86 includes both i386 and x86_64.
+
+ Type: system, vm, or vcpu.
+
+ Parameters: what parameters are accepted by the ioctl.
+
+ Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
+ are not detailed, but errors with specific meanings are.
+
+
+4.1 KVM_GET_API_VERSION
+
+Capability: basic
+Architectures: all
+Type: system ioctl
+Parameters: none
+Returns: the constant KVM_API_VERSION (=12)
+
+This identifies the API version as the stable kvm API. It is not
+expected that this number will change. However, Linux 2.6.20 and
+2.6.21 report earlier versions; these are not documented and not
+supported. Applications should refuse to run if KVM_GET_API_VERSION
+returns a value other than 12. If this check passes, all ioctls
+described as 'basic' will be available.
+
+
+4.2 KVM_CREATE_VM
+
+Capability: basic
+Architectures: all
+Type: system ioctl
+Parameters: machine type identifier (KVM_VM_*)
+Returns: a VM fd that can be used to control the new virtual machine.
+
+The new VM has no virtual cpus and no memory. An mmap() of a VM fd
+will access the virtual machine's physical address space; offset zero
+corresponds to guest physical address zero. Use of mmap() on a VM fd
+is discouraged if userspace memory allocation (KVM_CAP_USER_MEMORY) is
+available.
+You most certainly want to use 0 as machine type.
+
+In order to create user controlled virtual machines on S390, check
+KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL as
+privileged user (CAP_SYS_ADMIN).
+
+
+4.3 KVM_GET_MSR_INDEX_LIST
+
+Capability: basic
+Architectures: x86
+Type: system
+Parameters: struct kvm_msr_list (in/out)
+Returns: 0 on success; -1 on error
+Errors:
+ E2BIG: the msr index list is to be to fit in the array specified by
+ the user.
+
+struct kvm_msr_list {
+ __u32 nmsrs; /* number of msrs in entries */
+ __u32 indices[0];
+};
+
+This ioctl returns the guest msrs that are supported. The list varies
+by kvm version and host processor, but does not change otherwise. The
+user fills in the size of the indices array in nmsrs, and in return
+kvm adjusts nmsrs to reflect the actual number of msrs and fills in
+the indices array with their numbers.
+
+Note: if kvm indicates supports MCE (KVM_CAP_MCE), then the MCE bank MSRs are
+not returned in the MSR list, as different vcpus can have a different number
+of banks, as set via the KVM_X86_SETUP_MCE ioctl.
+
+
+4.4 KVM_CHECK_EXTENSION
+
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
+Architectures: all
+Type: system ioctl, vm ioctl
+Parameters: extension identifier (KVM_CAP_*)
+Returns: 0 if unsupported; 1 (or some other positive integer) if supported
+
+The API allows the application to query about extensions to the core
+kvm API. Userspace passes an extension identifier (an integer) and
+receives an integer that describes the extension availability.
+Generally 0 means no and 1 means yes, but some extensions may report
+additional information in the integer return value.
+
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
+
+4.5 KVM_GET_VCPU_MMAP_SIZE
+
+Capability: basic
+Architectures: all
+Type: system ioctl
+Parameters: none
+Returns: size of vcpu mmap area, in bytes
+
+The KVM_RUN ioctl (cf.) communicates with userspace via a shared
+memory region. This ioctl returns the size of that region. See the
+KVM_RUN documentation for details.
+
+
+4.6 KVM_SET_MEMORY_REGION
+
+Capability: basic
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_memory_region (in)
+Returns: 0 on success, -1 on error
+
+This ioctl is obsolete and has been removed.
+
+
+4.7 KVM_CREATE_VCPU
+
+Capability: basic
+Architectures: all
+Type: vm ioctl
+Parameters: vcpu id (apic id on x86)
+Returns: vcpu fd on success, -1 on error
+
+This API adds a vcpu to a virtual machine. The vcpu id is a small integer
+in the range [0, max_vcpus).
+
+The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of
+the KVM_CHECK_EXTENSION ioctl() at run-time.
+The maximum possible value for max_vcpus can be retrieved using the
+KVM_CAP_MAX_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time.
+
+If the KVM_CAP_NR_VCPUS does not exist, you should assume that max_vcpus is 4
+cpus max.
+If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is
+same as the value returned from KVM_CAP_NR_VCPUS.
+
+On powerpc using book3s_hv mode, the vcpus are mapped onto virtual
+threads in one or more virtual CPU cores. (This is because the
+hardware requires all the hardware threads in a CPU core to be in the
+same partition.) The KVM_CAP_PPC_SMT capability indicates the number
+of vcpus per virtual core (vcore). The vcore id is obtained by
+dividing the vcpu id by the number of vcpus per vcore. The vcpus in a
+given vcore will always be in the same physical core as each other
+(though that might be a different physical core from time to time).
+Userspace can control the threading (SMT) mode of the guest by its
+allocation of vcpu ids. For example, if userspace wants
+single-threaded guest vcpus, it should make all vcpu ids be a multiple
+of the number of vcpus per vcore.
+
+For virtual cpus that have been created with S390 user controlled virtual
+machines, the resulting vcpu fd can be memory mapped at page offset
+KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual
+cpu's hardware control block.
+
+
+4.8 KVM_GET_DIRTY_LOG (vm ioctl)
+
+Capability: basic
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_dirty_log (in/out)
+Returns: 0 on success, -1 on error
+
+/* for KVM_GET_DIRTY_LOG */
+struct kvm_dirty_log {
+ __u32 slot;
+ __u32 padding;
+ union {
+ void __user *dirty_bitmap; /* one bit per page */
+ __u64 padding;
+ };
+};
+
+Given a memory slot, return a bitmap containing any pages dirtied
+since the last call to this ioctl. Bit 0 is the first page in the
+memory slot. Ensure the entire structure is cleared to avoid padding
+issues.
+
+
+4.9 KVM_SET_MEMORY_ALIAS
+
+Capability: basic
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_memory_alias (in)
+Returns: 0 (success), -1 (error)
+
+This ioctl is obsolete and has been removed.
+
+
+4.10 KVM_RUN
+
+Capability: basic
+Architectures: all
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+Errors:
+ EINTR: an unmasked signal is pending
+
+This ioctl is used to run a guest virtual cpu. While there are no
+explicit parameters, there is an implicit parameter block that can be
+obtained by mmap()ing the vcpu fd at offset 0, with the size given by
+KVM_GET_VCPU_MMAP_SIZE. The parameter block is formatted as a 'struct
+kvm_run' (see below).
+
+
+4.11 KVM_GET_REGS
+
+Capability: basic
+Architectures: all except ARM, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_regs (out)
+Returns: 0 on success, -1 on error
+
+Reads the general purpose registers from the vcpu.
+
+/* x86 */
+struct kvm_regs {
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+ __u64 rax, rbx, rcx, rdx;
+ __u64 rsi, rdi, rsp, rbp;
+ __u64 r8, r9, r10, r11;
+ __u64 r12, r13, r14, r15;
+ __u64 rip, rflags;
+};
+
+/* mips */
+struct kvm_regs {
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+ __u64 gpr[32];
+ __u64 hi;
+ __u64 lo;
+ __u64 pc;
+};
+
+
+4.12 KVM_SET_REGS
+
+Capability: basic
+Architectures: all except ARM, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_regs (in)
+Returns: 0 on success, -1 on error
+
+Writes the general purpose registers into the vcpu.
+
+See KVM_GET_REGS for the data structure.
+
+
+4.13 KVM_GET_SREGS
+
+Capability: basic
+Architectures: x86, ppc
+Type: vcpu ioctl
+Parameters: struct kvm_sregs (out)
+Returns: 0 on success, -1 on error
+
+Reads special registers from the vcpu.
+
+/* x86 */
+struct kvm_sregs {
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
+};
+
+/* ppc -- see arch/powerpc/include/uapi/asm/kvm.h */
+
+interrupt_bitmap is a bitmap of pending external interrupts. At most
+one bit may be set. This interrupt has been acknowledged by the APIC
+but not yet injected into the cpu core.
+
+
+4.14 KVM_SET_SREGS
+
+Capability: basic
+Architectures: x86, ppc
+Type: vcpu ioctl
+Parameters: struct kvm_sregs (in)
+Returns: 0 on success, -1 on error
+
+Writes special registers into the vcpu. See KVM_GET_SREGS for the
+data structures.
+
+
+4.15 KVM_TRANSLATE
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_translation (in/out)
+Returns: 0 on success, -1 on error
+
+Translates a virtual address according to the vcpu's current address
+translation mode.
+
+struct kvm_translation {
+ /* in */
+ __u64 linear_address;
+
+ /* out */
+ __u64 physical_address;
+ __u8 valid;
+ __u8 writeable;
+ __u8 usermode;
+ __u8 pad[5];
+};
+
+
+4.16 KVM_INTERRUPT
+
+Capability: basic
+Architectures: x86, ppc, mips
+Type: vcpu ioctl
+Parameters: struct kvm_interrupt (in)
+Returns: 0 on success, -1 on error
+
+Queues a hardware interrupt vector to be injected. This is only
+useful if in-kernel local APIC or equivalent is not used.
+
+/* for KVM_INTERRUPT */
+struct kvm_interrupt {
+ /* in */
+ __u32 irq;
+};
+
+X86:
+
+Note 'irq' is an interrupt vector, not an interrupt pin or line.
+
+PPC:
+
+Queues an external interrupt to be injected. This ioctl is overleaded
+with 3 different irq values:
+
+a) KVM_INTERRUPT_SET
+
+ This injects an edge type external interrupt into the guest once it's ready
+ to receive interrupts. When injected, the interrupt is done.
+
+b) KVM_INTERRUPT_UNSET
+
+ This unsets any pending interrupt.
+
+ Only available with KVM_CAP_PPC_UNSET_IRQ.
+
+c) KVM_INTERRUPT_SET_LEVEL
+
+ This injects a level type external interrupt into the guest context. The
+ interrupt stays pending until a specific ioctl with KVM_INTERRUPT_UNSET
+ is triggered.
+
+ Only available with KVM_CAP_PPC_IRQ_LEVEL.
+
+Note that any value for 'irq' other than the ones stated above is invalid
+and incurs unexpected behavior.
+
+MIPS:
+
+Queues an external interrupt to be injected into the virtual CPU. A negative
+interrupt number dequeues the interrupt.
+
+
+4.17 KVM_DEBUG_GUEST
+
+Capability: basic
+Architectures: none
+Type: vcpu ioctl
+Parameters: none)
+Returns: -1 on error
+
+Support for this has been removed. Use KVM_SET_GUEST_DEBUG instead.
+
+
+4.18 KVM_GET_MSRS
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_msrs (in/out)
+Returns: 0 on success, -1 on error
+
+Reads model-specific registers from the vcpu. Supported msr indices can
+be obtained using KVM_GET_MSR_INDEX_LIST.
+
+struct kvm_msrs {
+ __u32 nmsrs; /* number of msrs in entries */
+ __u32 pad;
+
+ struct kvm_msr_entry entries[0];
+};
+
+struct kvm_msr_entry {
+ __u32 index;
+ __u32 reserved;
+ __u64 data;
+};
+
+Application code should set the 'nmsrs' member (which indicates the
+size of the entries array) and the 'index' member of each array entry.
+kvm will fill in the 'data' member.
+
+
+4.19 KVM_SET_MSRS
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_msrs (in)
+Returns: 0 on success, -1 on error
+
+Writes model-specific registers to the vcpu. See KVM_GET_MSRS for the
+data structures.
+
+Application code should set the 'nmsrs' member (which indicates the
+size of the entries array), and the 'index' and 'data' members of each
+array entry.
+
+
+4.20 KVM_SET_CPUID
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_cpuid (in)
+Returns: 0 on success, -1 on error
+
+Defines the vcpu responses to the cpuid instruction. Applications
+should use the KVM_SET_CPUID2 ioctl if available.
+
+
+struct kvm_cpuid_entry {
+ __u32 function;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding;
+};
+
+/* for KVM_SET_CPUID */
+struct kvm_cpuid {
+ __u32 nent;
+ __u32 padding;
+ struct kvm_cpuid_entry entries[0];
+};
+
+
+4.21 KVM_SET_SIGNAL_MASK
+
+Capability: basic
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_signal_mask (in)
+Returns: 0 on success, -1 on error
+
+Defines which signals are blocked during execution of KVM_RUN. This
+signal mask temporarily overrides the threads signal mask. Any
+unblocked signal received (except SIGKILL and SIGSTOP, which retain
+their traditional behaviour) will cause KVM_RUN to return with -EINTR.
+
+Note the signal will only be delivered if not blocked by the original
+signal mask.
+
+/* for KVM_SET_SIGNAL_MASK */
+struct kvm_signal_mask {
+ __u32 len;
+ __u8 sigset[0];
+};
+
+
+4.22 KVM_GET_FPU
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_fpu (out)
+Returns: 0 on success, -1 on error
+
+Reads the floating point state from the vcpu.
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u8 fpr[8][16];
+ __u16 fcw;
+ __u16 fsw;
+ __u8 ftwx; /* in fxsave format */
+ __u8 pad1;
+ __u16 last_opcode;
+ __u64 last_ip;
+ __u64 last_dp;
+ __u8 xmm[16][16];
+ __u32 mxcsr;
+ __u32 pad2;
+};
+
+
+4.23 KVM_SET_FPU
+
+Capability: basic
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_fpu (in)
+Returns: 0 on success, -1 on error
+
+Writes the floating point state to the vcpu.
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u8 fpr[8][16];
+ __u16 fcw;
+ __u16 fsw;
+ __u8 ftwx; /* in fxsave format */
+ __u8 pad1;
+ __u16 last_opcode;
+ __u64 last_ip;
+ __u64 last_dp;
+ __u8 xmm[16][16];
+ __u32 mxcsr;
+ __u32 pad2;
+};
+
+
+4.24 KVM_CREATE_IRQCHIP
+
+Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
+Architectures: x86, ARM, arm64, s390
+Type: vm ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Creates an interrupt controller model in the kernel.
+On x86, creates a virtual ioapic, a virtual PIC (two PICs, nested), and sets up
+future vcpus to have a local APIC. IRQ routing for GSIs 0-15 is set to both
+PIC and IOAPIC; GSI 16-23 only go to the IOAPIC.
+On ARM/arm64, a GICv2 is created. Any other GIC versions require the usage of
+KVM_CREATE_DEVICE, which also supports creating a GICv2. Using
+KVM_CREATE_DEVICE is preferred over KVM_CREATE_IRQCHIP for GICv2.
+On s390, a dummy irq routing table is created.
+
+Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
+before KVM_CREATE_IRQCHIP can be used.
+
+
+4.25 KVM_IRQ_LINE
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86, arm, arm64
+Type: vm ioctl
+Parameters: struct kvm_irq_level
+Returns: 0 on success, -1 on error
+
+Sets the level of a GSI input to the interrupt controller model in the kernel.
+On some architectures it is required that an interrupt controller model has
+been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered
+interrupts require the level to be set to 1 and then back to 0.
+
+On real hardware, interrupt pins can be active-low or active-high. This
+does not matter for the level field of struct kvm_irq_level: 1 always
+means active (asserted), 0 means inactive (deasserted).
+
+x86 allows the operating system to program the interrupt polarity
+(active-low/active-high) for level-triggered interrupts, and KVM used
+to consider the polarity. However, due to bitrot in the handling of
+active-low interrupts, the above convention is now valid on x86 too.
+This is signaled by KVM_CAP_X86_IOAPIC_POLARITY_IGNORED. Userspace
+should not present interrupts to the guest as active-low unless this
+capability is present (or unless it is not using the in-kernel irqchip,
+of course).
+
+
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus. The irq field is interpreted
+like this:
+
+  bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 |
+ field: | irq_type | vcpu_index | irq_id |
+
+The irq_type field has the following values:
+- irq_type[0]: out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
+- irq_type[1]: in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
+ (the vcpu_index field is ignored)
+- irq_type[2]: in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
+
+(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
+
+In both cases, level is used to assert/deassert the line.
+
+struct kvm_irq_level {
+ union {
+ __u32 irq; /* GSI */
+ __s32 status; /* not used for KVM_IRQ_LEVEL */
+ };
+ __u32 level; /* 0 or 1 */
+};
+
+
+4.26 KVM_GET_IRQCHIP
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_irqchip (in/out)
+Returns: 0 on success, -1 on error
+
+Reads the state of a kernel interrupt controller created with
+KVM_CREATE_IRQCHIP into a buffer provided by the caller.
+
+struct kvm_irqchip {
+ __u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
+ __u32 pad;
+ union {
+ char dummy[512]; /* reserving space */
+ struct kvm_pic_state pic;
+ struct kvm_ioapic_state ioapic;
+ } chip;
+};
+
+
+4.27 KVM_SET_IRQCHIP
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_irqchip (in)
+Returns: 0 on success, -1 on error
+
+Sets the state of a kernel interrupt controller created with
+KVM_CREATE_IRQCHIP from a buffer provided by the caller.
+
+struct kvm_irqchip {
+ __u32 chip_id; /* 0 = PIC1, 1 = PIC2, 2 = IOAPIC */
+ __u32 pad;
+ union {
+ char dummy[512]; /* reserving space */
+ struct kvm_pic_state pic;
+ struct kvm_ioapic_state ioapic;
+ } chip;
+};
+
+
+4.28 KVM_XEN_HVM_CONFIG
+
+Capability: KVM_CAP_XEN_HVM
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_xen_hvm_config (in)
+Returns: 0 on success, -1 on error
+
+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
+page, and provides the starting address and size of the hypercall
+blobs in userspace. When the guest writes the MSR, kvm copies one
+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
+memory.
+
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+
+
+4.29 KVM_GET_CLOCK
+
+Capability: KVM_CAP_ADJUST_CLOCK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_clock_data (out)
+Returns: 0 on success, -1 on error
+
+Gets the current timestamp of kvmclock as seen by the current guest. In
+conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
+such as migration.
+
+struct kvm_clock_data {
+ __u64 clock; /* kvmclock current value */
+ __u32 flags;
+ __u32 pad[9];
+};
+
+
+4.30 KVM_SET_CLOCK
+
+Capability: KVM_CAP_ADJUST_CLOCK
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_clock_data (in)
+Returns: 0 on success, -1 on error
+
+Sets the current timestamp of kvmclock to the value specified in its parameter.
+In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
+such as migration.
+
+struct kvm_clock_data {
+ __u64 clock; /* kvmclock current value */
+ __u32 flags;
+ __u32 pad[9];
+};
+
+
+4.31 KVM_GET_VCPU_EVENTS
+
+Capability: KVM_CAP_VCPU_EVENTS
+Extended by: KVM_CAP_INTR_SHADOW
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_vcpu_event (out)
+Returns: 0 on success, -1 on error
+
+Gets currently pending exceptions, interrupts, and NMIs as well as related
+states of the vcpu.
+
+struct kvm_vcpu_events {
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 has_error_code;
+ __u8 pad;
+ __u32 error_code;
+ } exception;
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 soft;
+ __u8 shadow;
+ } interrupt;
+ struct {
+ __u8 injected;
+ __u8 pending;
+ __u8 masked;
+ __u8 pad;
+ } nmi;
+ __u32 sipi_vector;
+ __u32 flags;
+};
+
+KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
+interrupt.shadow contains a valid state. Otherwise, this field is undefined.
+
+
+4.32 KVM_SET_VCPU_EVENTS
+
+Capability: KVM_CAP_VCPU_EVENTS
+Extended by: KVM_CAP_INTR_SHADOW
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_vcpu_event (in)
+Returns: 0 on success, -1 on error
+
+Set pending exceptions, interrupts, and NMIs as well as related states of the
+vcpu.
+
+See KVM_GET_VCPU_EVENTS for the data structure.
+
+Fields that may be modified asynchronously by running VCPUs can be excluded
+from the update. These fields are nmi.pending and sipi_vector. Keep the
+corresponding bits in the flags field cleared to suppress overwriting the
+current in-kernel state. The bits are:
+
+KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
+KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
+
+If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
+the flags field to signal that interrupt.shadow contains a valid state and
+shall be written into the VCPU.
+
+
+4.33 KVM_GET_DEBUGREGS
+
+Capability: KVM_CAP_DEBUGREGS
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_debugregs (out)
+Returns: 0 on success, -1 on error
+
+Reads debug registers from the vcpu.
+
+struct kvm_debugregs {
+ __u64 db[4];
+ __u64 dr6;
+ __u64 dr7;
+ __u64 flags;
+ __u64 reserved[9];
+};
+
+
+4.34 KVM_SET_DEBUGREGS
+
+Capability: KVM_CAP_DEBUGREGS
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_debugregs (in)
+Returns: 0 on success, -1 on error
+
+Writes debug registers into the vcpu.
+
+See KVM_GET_DEBUGREGS for the data structure. The flags field is unused
+yet and must be cleared on entry.
+
+
+4.35 KVM_SET_USER_MEMORY_REGION
+
+Capability: KVM_CAP_USER_MEM
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_userspace_memory_region (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_userspace_memory_region {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 userspace_addr; /* start of the userspace allocated memory */
+};
+
+/* for kvm_memory_region::flags */
+#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
+#define KVM_MEM_READONLY (1UL << 1)
+
+This ioctl allows the user to create or modify a guest physical memory
+slot. When changing an existing slot, it may be moved in the guest
+physical memory space, or its flags may be modified. It may not be
+resized. Slots may not overlap in guest physical address space.
+
+Memory for the region is taken starting at the address denoted by the
+field userspace_addr, which must point at user addressable memory for
+the entire memory slot size. Any object may back this memory, including
+anonymous memory, ordinary files, and hugetlbfs.
+
+It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
+be identical. This allows large pages in the guest to be backed by large
+pages in the host.
+
+The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
+KVM_MEM_READONLY. The former can be set to instruct KVM to keep track of
+writes to memory within the slot. See KVM_GET_DIRTY_LOG ioctl to know how to
+use it. The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
+to make a new slot read-only. In this case, writes to this memory will be
+posted to userspace as KVM_EXIT_MMIO exits.
+
+When the KVM_CAP_SYNC_MMU capability is available, changes in the backing of
+the memory region are automatically reflected into the guest. For example, an
+mmap() that affects the region will be made visible immediately. Another
+example is madvise(MADV_DROP).
+
+It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
+The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
+allocation and is deprecated.
+
+
+4.36 KVM_SET_TSS_ADDR
+
+Capability: KVM_CAP_SET_TSS_ADDR
+Architectures: x86
+Type: vm ioctl
+Parameters: unsigned long tss_address (in)
+Returns: 0 on success, -1 on error
+
+This ioctl defines the physical address of a three-page region in the guest
+physical address space. The region must be within the first 4GB of the
+guest physical address space and must not conflict with any memory slot
+or any mmio address. The guest may malfunction if it accesses this memory
+region.
+
+This ioctl is required on Intel-based hosts. This is needed on Intel hardware
+because of a quirk in the virtualization implementation (see the internals
+documentation when it pops into existence).
+
+
+4.37 KVM_ENABLE_CAP
+
+Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
+Architectures: ppc, s390
+Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
+Parameters: struct kvm_enable_cap (in)
+Returns: 0 on success; -1 on error
+
++Not all extensions are enabled by default. Using this ioctl the application
+can enable an extension, making it available to the guest.
+
+On systems that do not support this ioctl, it always fails. On systems that
+do support it, it only works for extensions that are supported for enablement.
+
+To check if a capability can be enabled, the KVM_CHECK_EXTENSION ioctl should
+be used.
+
+struct kvm_enable_cap {
+ /* in */
+ __u32 cap;
+
+The capability that is supposed to get enabled.
+
+ __u32 flags;
+
+A bitfield indicating future enhancements. Has to be 0 for now.
+
+ __u64 args[4];
+
+Arguments for enabling a feature. If a feature needs initial values to
+function properly, this is the place to put them.
+
+ __u8 pad[64];
+};
+
+The vcpu ioctl should be used for vcpu-specific capabilities, the vm ioctl
+for vm-wide capabilities.
+
+4.38 KVM_GET_MP_STATE
+
+Capability: KVM_CAP_MP_STATE
+Architectures: x86, s390, arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_mp_state (out)
+Returns: 0 on success; -1 on error
+
+struct kvm_mp_state {
+ __u32 mp_state;
+};
+
+Returns the vcpu's current "multiprocessing state" (though also valid on
+uniprocessor guests).
+
+Possible values are:
+
+ - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86,arm/arm64]
+ - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
+ which has not yet received an INIT signal [x86]
+ - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
+ now ready for a SIPI [x86]
+ - KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
+ is waiting for an interrupt [x86]
+ - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
+ accessible via KVM_GET_VCPU_EVENTS) [x86]
+ - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64]
+ - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390]
+ - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted)
+ [s390]
+ - KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state
+ [s390]
+
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
+
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
+
+4.39 KVM_SET_MP_STATE
+
+Capability: KVM_CAP_MP_STATE
+Architectures: x86, s390, arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_mp_state (in)
+Returns: 0 on success; -1 on error
+
+Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
+arguments.
+
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
+
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
+
+4.40 KVM_SET_IDENTITY_MAP_ADDR
+
+Capability: KVM_CAP_SET_IDENTITY_MAP_ADDR
+Architectures: x86
+Type: vm ioctl
+Parameters: unsigned long identity (in)
+Returns: 0 on success, -1 on error
+
+This ioctl defines the physical address of a one-page region in the guest
+physical address space. The region must be within the first 4GB of the
+guest physical address space and must not conflict with any memory slot
+or any mmio address. The guest may malfunction if it accesses this memory
+region.
+
+This ioctl is required on Intel-based hosts. This is needed on Intel hardware
+because of a quirk in the virtualization implementation (see the internals
+documentation when it pops into existence).
+
+
+4.41 KVM_SET_BOOT_CPU_ID
+
+Capability: KVM_CAP_SET_BOOT_CPU_ID
+Architectures: x86
+Type: vm ioctl
+Parameters: unsigned long vcpu_id
+Returns: 0 on success, -1 on error
+
+Define which vcpu is the Bootstrap Processor (BSP). Values are the same
+as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
+is vcpu 0.
+
+
+4.42 KVM_GET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+ __u32 region[1024];
+};
+
+This ioctl would copy current vcpu's xsave struct to the userspace.
+
+
+4.43 KVM_SET_XSAVE
+
+Capability: KVM_CAP_XSAVE
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xsave (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xsave {
+ __u32 region[1024];
+};
+
+This ioctl would copy userspace's xsave struct to the kernel.
+
+
+4.44 KVM_GET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (out)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+ __u32 xcr;
+ __u32 reserved;
+ __u64 value;
+};
+
+struct kvm_xcrs {
+ __u32 nr_xcrs;
+ __u32 flags;
+ struct kvm_xcr xcrs[KVM_MAX_XCRS];
+ __u64 padding[16];
+};
+
+This ioctl would copy current vcpu's xcrs to the userspace.
+
+
+4.45 KVM_SET_XCRS
+
+Capability: KVM_CAP_XCRS
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_xcrs (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_xcr {
+ __u32 xcr;
+ __u32 reserved;
+ __u64 value;
+};
+
+struct kvm_xcrs {
+ __u32 nr_xcrs;
+ __u32 flags;
+ struct kvm_xcr xcrs[KVM_MAX_XCRS];
+ __u64 padding[16];
+};
+
+This ioctl would set vcpu's xcr to the value userspace specified.
+
+
+4.46 KVM_GET_SUPPORTED_CPUID
+
+Capability: KVM_CAP_EXT_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+ __u32 nent;
+ __u32 padding;
+ struct kvm_cpuid_entry2 entries[0];
+};
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+
+struct kvm_cpuid_entry2 {
+ __u32 function;
+ __u32 index;
+ __u32 flags;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are supported by both the hardware
+and kvm. Userspace can use the information returned by this ioctl to
+construct cpuid information (for KVM_SET_CPUID2) that is consistent with
+hardware, kernel, and userspace capabilities, and with user requirements (for
+example, the user may wish to constrain cpuid to emulate older hardware,
+or for feature consistency across a cluster).
+
+Userspace invokes KVM_GET_SUPPORTED_CPUID by passing a kvm_cpuid2 structure
+with the 'nent' field indicating the number of entries in the variable-size
+array 'entries'. If the number of entries is too low to describe the cpu
+capabilities, an error (E2BIG) is returned. If the number is too high,
+the 'nent' field is adjusted and an error (ENOMEM) is returned. If the
+number is just right, the 'nent' field is adjusted to the number of valid
+entries in the 'entries' array, which is then filled.
+
+The entries returned are the host cpuid as returned by the cpuid instruction,
+with unknown or unsupported features masked out. Some features (for example,
+x2apic), may not be present in the host cpu, but are exposed by kvm if it can
+emulate them efficiently. The fields in each entry are defined as follows:
+
+ function: the eax value used to obtain the entry
+ index: the ecx value used to obtain the entry (for entries that are
+ affected by ecx)
+ flags: an OR of zero or more of the following:
+ KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+ if the index field is valid
+ KVM_CPUID_FLAG_STATEFUL_FUNC:
+ if cpuid for this function returns different values for successive
+ invocations; there will be several entries with the same function,
+ all with this flag set
+ KVM_CPUID_FLAG_STATE_READ_NEXT:
+ for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+ the first entry to be read by a cpu
+ eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+ this function/index combination
+
+The TSC deadline timer feature (CPUID leaf 1, ecx[24]) is always returned
+as false, since the feature depends on KVM_CREATE_IRQCHIP for local APIC
+support. Instead it is reported via
+
+ ioctl(KVM_CHECK_EXTENSION, KVM_CAP_TSC_DEADLINE_TIMER)
+
+if that returns true and you use KVM_CREATE_IRQCHIP, or if you emulate the
+feature in userspace, then you can enable the feature for KVM_SET_CPUID2.
+
+
+4.47 KVM_PPC_GET_PVINFO
+
+Capability: KVM_CAP_PPC_GET_PVINFO
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_ppc_pvinfo (out)
+Returns: 0 on success, !0 on error
+
+struct kvm_ppc_pvinfo {
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
+This ioctl fetches PV specific information that need to be passed to the guest
+using the device tree or other means from vm context.
+
+The hcall array defines 4 instructions that make up a hypercall.
+
+If any additional field gets added to this structure later on, a bit for that
+additional piece of information will be set in the flags bitmap.
+
+The flags bitmap is defined as:
+
+ /* the host supports the ePAPR idle hcall
+ #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
+4.48 KVM_ASSIGN_PCI_DEVICE
+
+Capability: none
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Assigns a host PCI device to the VM.
+
+struct kvm_assigned_pci_dev {
+ __u32 assigned_dev_id;
+ __u32 busnr;
+ __u32 devfn;
+ __u32 flags;
+ __u32 segnr;
+ union {
+ __u32 reserved[11];
+ };
+};
+
+The PCI device is specified by the triple segnr, busnr, and devfn.
+Identification in succeeding service requests is done via assigned_dev_id. The
+following flags are specified:
+
+/* Depends on KVM_CAP_IOMMU */
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+/* The following two depend on KVM_CAP_PCI_2_3 */
+#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
+#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
+
+If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts
+via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other
+assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the
+guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details.
+
+The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure
+isolation of the device. Usages not specifying this flag are deprecated.
+
+Only PCI header type 0 devices with PCI BAR resources are supported by
+device assignment. The user requesting this ioctl must have read/write
+access to the PCI sysfs resource files associated with the device.
+
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
+
+4.49 KVM_DEASSIGN_PCI_DEVICE
+
+Capability: none
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Ends PCI device assignment, releasing all associated resources.
+
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
+used in kvm_assigned_pci_dev to identify the device.
+
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
+4.50 KVM_ASSIGN_DEV_IRQ
+
+Capability: KVM_CAP_ASSIGN_DEV_IRQ
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_irq (in)
+Returns: 0 on success, -1 on error
+
+Assigns an IRQ to a passed-through device.
+
+struct kvm_assigned_irq {
+ __u32 assigned_dev_id;
+ __u32 host_irq; /* ignored (legacy field) */
+ __u32 guest_irq;
+ __u32 flags;
+ union {
+ __u32 reserved[12];
+ };
+};
+
+The following flags are defined:
+
+#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
+
+It is not valid to specify multiple types per host or guest IRQ. However, the
+IRQ type of host and guest can differ or can even be null.
+
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
+
+4.51 KVM_DEASSIGN_DEV_IRQ
+
+Capability: KVM_CAP_ASSIGN_DEV_IRQ
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_irq (in)
+Returns: 0 on success, -1 on error
+
+Ends an IRQ assignment to a passed-through device.
+
+See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
+by assigned_dev_id, flags must correspond to the IRQ type specified on
+KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
+
+
+4.52 KVM_SET_GSI_ROUTING
+
+Capability: KVM_CAP_IRQ_ROUTING
+Architectures: x86 s390
+Type: vm ioctl
+Parameters: struct kvm_irq_routing (in)
+Returns: 0 on success, -1 on error
+
+Sets the GSI routing table entries, overwriting any previously set entries.
+
+struct kvm_irq_routing {
+ __u32 nr;
+ __u32 flags;
+ struct kvm_irq_routing_entry entries[0];
+};
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_entry {
+ __u32 gsi;
+ __u32 type;
+ __u32 flags;
+ __u32 pad;
+ union {
+ struct kvm_irq_routing_irqchip irqchip;
+ struct kvm_irq_routing_msi msi;
+ struct kvm_irq_routing_s390_adapter adapter;
+ __u32 pad[8];
+ } u;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
+
+No flags are specified so far, the corresponding field must be set to zero.
+
+struct kvm_irq_routing_irqchip {
+ __u32 irqchip;
+ __u32 pin;
+};
+
+struct kvm_irq_routing_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 pad;
+};
+
+struct kvm_irq_routing_s390_adapter {
+ __u64 ind_addr;
+ __u64 summary_addr;
+ __u64 ind_offset;
+ __u32 summary_offset;
+ __u32 adapter_id;
+};
+
+
+4.53 KVM_ASSIGN_SET_MSIX_NR
+
+Capability: none
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_nr (in)
+Returns: 0 on success, -1 on error
+
+Set the number of MSI-X interrupts for an assigned device. The number is
+reset again by terminating the MSI-X assignment of the device via
+KVM_DEASSIGN_DEV_IRQ. Calling this service more than once at any earlier
+point will fail.
+
+struct kvm_assigned_msix_nr {
+ __u32 assigned_dev_id;
+ __u16 entry_nr;
+ __u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV 256
+
+
+4.54 KVM_ASSIGN_SET_MSIX_ENTRY
+
+Capability: none
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_msix_entry (in)
+Returns: 0 on success, -1 on error
+
+Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting
+the GSI vector to zero means disabling the interrupt.
+
+struct kvm_assigned_msix_entry {
+ __u32 assigned_dev_id;
+ __u32 gsi;
+ __u16 entry; /* The index of entry in the MSI-X table */
+ __u16 padding[3];
+};
+
+Errors:
+ ENOTTY: kernel does not support this ioctl
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
+
+4.55 KVM_SET_TSC_KHZ
+
+Capability: KVM_CAP_TSC_CONTROL
+Architectures: x86
+Type: vcpu ioctl
+Parameters: virtual tsc_khz
+Returns: 0 on success, -1 on error
+
+Specifies the tsc frequency for the virtual machine. The unit of the
+frequency is KHz.
+
+
+4.56 KVM_GET_TSC_KHZ
+
+Capability: KVM_CAP_GET_TSC_KHZ
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: virtual tsc-khz on success, negative value on error
+
+Returns the tsc frequency of the guest. The unit of the return value is
+KHz. If the host has unstable tsc this ioctl returns -EIO instead as an
+error.
+
+
+4.57 KVM_GET_LAPIC
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_lapic_state (out)
+Returns: 0 on success, -1 on error
+
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+ char regs[KVM_APIC_REG_SIZE];
+};
+
+Reads the Local APIC registers and copies them into the input argument. The
+data format and layout are the same as documented in the architecture manual.
+
+
+4.58 KVM_SET_LAPIC
+
+Capability: KVM_CAP_IRQCHIP
+Architectures: x86
+Type: vcpu ioctl
+Parameters: struct kvm_lapic_state (in)
+Returns: 0 on success, -1 on error
+
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+ char regs[KVM_APIC_REG_SIZE];
+};
+
+Copies the input argument into the Local APIC registers. The data format
+and layout are the same as documented in the architecture manual.
+
+
+4.59 KVM_IOEVENTFD
+
+Capability: KVM_CAP_IOEVENTFD
+Architectures: all
+Type: vm ioctl
+Parameters: struct kvm_ioeventfd (in)
+Returns: 0 on success, !0 on error
+
+This ioctl attaches or detaches an ioeventfd to a legal pio/mmio address
+within the guest. A guest write in the registered address will signal the
+provided event instead of triggering an exit.
+
+struct kvm_ioeventfd {
+ __u64 datamatch;
+ __u64 addr; /* legal pio/mmio address */
+ __u32 len; /* 1, 2, 4, or 8 bytes */
+ __s32 fd;
+ __u32 flags;
+ __u8 pad[36];
+};
+
+For the special case of virtio-ccw devices on s390, the ioevent is matched
+to a subchannel/virtqueue tuple instead.
+
+The following flags are defined:
+
+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
+#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
+#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+ (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
+
+If datamatch flag is set, the event will be signaled only if the written value
+to the registered address is equal to datamatch in struct kvm_ioeventfd.
+
+For virtio-ccw devices, addr contains the subchannel id and datamatch the
+virtqueue index.
+
+
+4.60 KVM_DIRTY_TLB
+
+Capability: KVM_CAP_SW_TLB
+Architectures: ppc
+Type: vcpu ioctl
+Parameters: struct kvm_dirty_tlb (in)
+Returns: 0 on success, -1 on error
+
+struct kvm_dirty_tlb {
+ __u64 bitmap;
+ __u32 num_dirty;
+};
+
+This must be called whenever userspace has changed an entry in the shared
+TLB, prior to calling KVM_RUN on the associated vcpu.
+
+The "bitmap" field is the userspace address of an array. This array
+consists of a number of bits, equal to the total number of TLB entries as
+determined by the last successful call to KVM_CONFIG_TLB, rounded up to the
+nearest multiple of 64.
+
+Each bit corresponds to one TLB entry, ordered the same as in the shared TLB
+array.
+
+The array is little-endian: the bit 0 is the least significant bit of the
+first byte, bit 8 is the least significant bit of the second byte, etc.
+This avoids any complications with differing word sizes.
+
+The "num_dirty" field is a performance hint for KVM to determine whether it
+should skip processing the bitmap and just invalidate everything. It must
+be set to the number of set bits in the bitmap.
+
+
+4.61 KVM_ASSIGN_SET_INTX_MASK
+
+Capability: KVM_CAP_PCI_2_3
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_assigned_pci_dev (in)
+Returns: 0 on success, -1 on error
+
+Allows userspace to mask PCI INTx interrupts from the assigned device. The
+kernel will not deliver INTx interrupts to the guest between setting and
+clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of
+and emulation of PCI 2.3 INTx disable command register behavior.
+
+This may be used for both PCI 2.3 devices supporting INTx disable natively and
+older devices lacking this support. Userspace is responsible for emulating the
+read value of the INTx disable bit in the guest visible PCI command register.
+When modifying the INTx disable state, userspace should precede updating the
+physical device command register by calling this ioctl to inform the kernel of
+the new intended INTx mask state.
+
+Note that the kernel uses the device INTx disable bit to internally manage the
+device interrupt state for PCI 2.3 devices. Reads of this register may
+therefore not match the expected value. Writes should always use the guest
+intended INTx disable value rather than attempting to read-copy-update the
+current physical device state. Races between user and kernel updates to the
+INTx disable bit are handled lazily in the kernel. It's possible the device
+may generate unintended interrupts, but they will not be injected into the
+guest.
+
+See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified
+by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is
+evaluated.
+
+
+4.62 KVM_CREATE_SPAPR_TCE
+
+Capability: KVM_CAP_SPAPR_TCE
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_create_spapr_tce (in)
+Returns: file descriptor for manipulating the created TCE table
+
+This creates a virtual TCE (translation control entry) table, which
+is an IOMMU for PAPR-style virtual I/O. It is used to translate
+logical addresses used in virtual I/O into guest physical addresses,
+and provides a scatter/gather capability for PAPR virtual I/O.
+
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+ __u64 liobn;
+ __u32 window_size;
+};
+
+The liobn field gives the logical IO bus number for which to create a
+TCE table. The window_size field specifies the size of the DMA window
+which this TCE table will translate - the table will contain one 64
+bit TCE entry for every 4kiB of the DMA window.
+
+When the guest issues an H_PUT_TCE hcall on a liobn for which a TCE
+table has been created using this ioctl(), the kernel will handle it
+in real mode, updating the TCE table. H_PUT_TCE calls for other
+liobns will cause a vm exit and must be handled by userspace.
+
+The return value is a file descriptor which can be passed to mmap(2)
+to map the created TCE table into userspace. This lets userspace read
+the entries written by kernel-handled H_PUT_TCE calls, and also lets
+userspace update the TCE table directly which is useful in some
+circumstances.
+
+
+4.63 KVM_ALLOCATE_RMA
+
+Capability: KVM_CAP_PPC_RMA
+Architectures: powerpc
+Type: vm ioctl
+Parameters: struct kvm_allocate_rma (out)
+Returns: file descriptor for mapping the allocated RMA
+
+This allocates a Real Mode Area (RMA) from the pool allocated at boot
+time by the kernel. An RMA is a physically-contiguous, aligned region
+of memory used on older POWER processors to provide the memory which
+will be accessed by real-mode (MMU off) accesses in a KVM guest.
+POWER processors support a set of sizes for the RMA that usually
+includes 64MB, 128MB, 256MB and some larger powers of two.
+
+/* for KVM_ALLOCATE_RMA */
+struct kvm_allocate_rma {
+ __u64 rma_size;
+};
+
+The return value is a file descriptor which can be passed to mmap(2)
+to map the allocated RMA into userspace. The mapped area can then be
+passed to the KVM_SET_USER_MEMORY_REGION ioctl to establish it as the
+RMA for a virtual machine. The size of the RMA in bytes (which is
+fixed at host kernel boot time) is returned in the rma_size field of
+the argument structure.
+
+The KVM_CAP_PPC_RMA capability is 1 or 2 if the KVM_ALLOCATE_RMA ioctl
+is supported; 2 if the processor requires all virtual machines to have
+an RMA, or 1 if the processor can use an RMA but doesn't require it,
+because it supports the Virtual RMA (VRMA) facility.
+
+
+4.64 KVM_NMI
+
+Capability: KVM_CAP_USER_NMI
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Queues an NMI on the thread's vcpu. Note this is well defined only
+when KVM_CREATE_IRQCHIP has not been called, since this is an interface
+between the virtual cpu core and virtual local APIC. After KVM_CREATE_IRQCHIP
+has been called, this interface is completely emulated within the kernel.
+
+To use this to emulate the LINT1 input with KVM_CREATE_IRQCHIP, use the
+following algorithm:
+
+ - pause the vpcu
+ - read the local APIC's state (KVM_GET_LAPIC)
+ - check whether changing LINT1 will queue an NMI (see the LVT entry for LINT1)
+ - if so, issue KVM_NMI
+ - resume the vcpu
+
+Some guests configure the LINT1 NMI input to cause a panic, aiding in
+debugging.
+
+
+4.65 KVM_S390_UCAS_MAP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+ struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+ };
+
+This ioctl maps the memory at "user_addr" with the length "length" to
+the vcpu's address space starting at "vcpu_addr". All parameters need to
+be aligned by 1 megabyte.
+
+
+4.66 KVM_S390_UCAS_UNMAP
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_ucas_mapping (in)
+Returns: 0 in case of success
+
+The parameter is defined like this:
+ struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+ };
+
+This ioctl unmaps the memory in the vcpu's address space starting at
+"vcpu_addr" with the length "length". The field "user_addr" is ignored.
+All parameters need to be aligned by 1 megabyte.
+
+
+4.67 KVM_S390_VCPU_FAULT
+
+Capability: KVM_CAP_S390_UCONTROL
+Architectures: s390
+Type: vcpu ioctl
+Parameters: vcpu absolute address (in)
+Returns: 0 in case of success
+
+This call creates a page table entry on the virtual cpu's address space
+(for user controlled virtual machines) or the virtual machine's address
+space (for regular virtual machines). This only works for minor faults,
+thus it's recommended to access subject memory page via the user page
+table upfront. This is useful to handle validity intercepts for user
+controlled virtual machines to fault in the virtual cpu's lowcore pages
+prior to calling the KVM_RUN ioctl.
+
+
+4.68 KVM_SET_ONE_REG
+
+Capability: KVM_CAP_ONE_REG
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_one_reg (in)
+Returns: 0 on success, negative value on failure
+
+struct kvm_one_reg {
+ __u64 id;
+ __u64 addr;
+};
+
+Using this ioctl, a single vcpu register can be set to a specific value
+defined by user space with the passed in struct kvm_one_reg, where id
+refers to the register identifier as described below and addr is a pointer
+to a variable with the respective size. There can be architecture agnostic
+and architecture specific registers. Each have their own range of operation
+and their own constants and width. To keep track of the implemented
+registers, find a list below:
+
+ Arch | Register | Width (bits)
+ | |
+ PPC | KVM_REG_PPC_HIOR | 64
+ PPC | KVM_REG_PPC_IAC1 | 64
+ PPC | KVM_REG_PPC_IAC2 | 64
+ PPC | KVM_REG_PPC_IAC3 | 64
+ PPC | KVM_REG_PPC_IAC4 | 64
+ PPC | KVM_REG_PPC_DAC1 | 64
+ PPC | KVM_REG_PPC_DAC2 | 64
+ PPC | KVM_REG_PPC_DABR | 64
+ PPC | KVM_REG_PPC_DSCR | 64
+ PPC | KVM_REG_PPC_PURR | 64
+ PPC | KVM_REG_PPC_SPURR | 64
+ PPC | KVM_REG_PPC_DAR | 64
+ PPC | KVM_REG_PPC_DSISR | 32
+ PPC | KVM_REG_PPC_AMR | 64
+ PPC | KVM_REG_PPC_UAMOR | 64
+ PPC | KVM_REG_PPC_MMCR0 | 64
+ PPC | KVM_REG_PPC_MMCR1 | 64
+ PPC | KVM_REG_PPC_MMCRA | 64
+ PPC | KVM_REG_PPC_MMCR2 | 64
+ PPC | KVM_REG_PPC_MMCRS | 64
+ PPC | KVM_REG_PPC_SIAR | 64
+ PPC | KVM_REG_PPC_SDAR | 64
+ PPC | KVM_REG_PPC_SIER | 64
+ PPC | KVM_REG_PPC_PMC1 | 32
+ PPC | KVM_REG_PPC_PMC2 | 32
+ PPC | KVM_REG_PPC_PMC3 | 32
+ PPC | KVM_REG_PPC_PMC4 | 32
+ PPC | KVM_REG_PPC_PMC5 | 32
+ PPC | KVM_REG_PPC_PMC6 | 32
+ PPC | KVM_REG_PPC_PMC7 | 32
+ PPC | KVM_REG_PPC_PMC8 | 32
+ PPC | KVM_REG_PPC_FPR0 | 64
+ ...
+ PPC | KVM_REG_PPC_FPR31 | 64
+ PPC | KVM_REG_PPC_VR0 | 128
+ ...
+ PPC | KVM_REG_PPC_VR31 | 128
+ PPC | KVM_REG_PPC_VSR0 | 128
+ ...
+ PPC | KVM_REG_PPC_VSR31 | 128
+ PPC | KVM_REG_PPC_FPSCR | 64
+ PPC | KVM_REG_PPC_VSCR | 32
+ PPC | KVM_REG_PPC_VPA_ADDR | 64
+ PPC | KVM_REG_PPC_VPA_SLB | 128
+ PPC | KVM_REG_PPC_VPA_DTL | 128
+ PPC | KVM_REG_PPC_EPCR | 32
+ PPC | KVM_REG_PPC_EPR | 32
+ PPC | KVM_REG_PPC_TCR | 32
+ PPC | KVM_REG_PPC_TSR | 32
+ PPC | KVM_REG_PPC_OR_TSR | 32
+ PPC | KVM_REG_PPC_CLEAR_TSR | 32
+ PPC | KVM_REG_PPC_MAS0 | 32
+ PPC | KVM_REG_PPC_MAS1 | 32
+ PPC | KVM_REG_PPC_MAS2 | 64
+ PPC | KVM_REG_PPC_MAS7_3 | 64
+ PPC | KVM_REG_PPC_MAS4 | 32
+ PPC | KVM_REG_PPC_MAS6 | 32
+ PPC | KVM_REG_PPC_MMUCFG | 32
+ PPC | KVM_REG_PPC_TLB0CFG | 32
+ PPC | KVM_REG_PPC_TLB1CFG | 32
+ PPC | KVM_REG_PPC_TLB2CFG | 32
+ PPC | KVM_REG_PPC_TLB3CFG | 32
+ PPC | KVM_REG_PPC_TLB0PS | 32
+ PPC | KVM_REG_PPC_TLB1PS | 32
+ PPC | KVM_REG_PPC_TLB2PS | 32
+ PPC | KVM_REG_PPC_TLB3PS | 32
+ PPC | KVM_REG_PPC_EPTCFG | 32
+ PPC | KVM_REG_PPC_ICP_STATE | 64
+ PPC | KVM_REG_PPC_TB_OFFSET | 64
+ PPC | KVM_REG_PPC_SPMC1 | 32
+ PPC | KVM_REG_PPC_SPMC2 | 32
+ PPC | KVM_REG_PPC_IAMR | 64
+ PPC | KVM_REG_PPC_TFHAR | 64
+ PPC | KVM_REG_PPC_TFIAR | 64
+ PPC | KVM_REG_PPC_TEXASR | 64
+ PPC | KVM_REG_PPC_FSCR | 64
+ PPC | KVM_REG_PPC_PSPB | 32
+ PPC | KVM_REG_PPC_EBBHR | 64
+ PPC | KVM_REG_PPC_EBBRR | 64
+ PPC | KVM_REG_PPC_BESCR | 64
+ PPC | KVM_REG_PPC_TAR | 64
+ PPC | KVM_REG_PPC_DPDES | 64
+ PPC | KVM_REG_PPC_DAWR | 64
+ PPC | KVM_REG_PPC_DAWRX | 64
+ PPC | KVM_REG_PPC_CIABR | 64
+ PPC | KVM_REG_PPC_IC | 64
+ PPC | KVM_REG_PPC_VTB | 64
+ PPC | KVM_REG_PPC_CSIGR | 64
+ PPC | KVM_REG_PPC_TACR | 64
+ PPC | KVM_REG_PPC_TCSCR | 64
+ PPC | KVM_REG_PPC_PID | 64
+ PPC | KVM_REG_PPC_ACOP | 64
+ PPC | KVM_REG_PPC_VRSAVE | 32
+ PPC | KVM_REG_PPC_LPCR | 32
+ PPC | KVM_REG_PPC_LPCR_64 | 64
+ PPC | KVM_REG_PPC_PPR | 64
+ PPC | KVM_REG_PPC_ARCH_COMPAT | 32
+ PPC | KVM_REG_PPC_DABRX | 32
+ PPC | KVM_REG_PPC_WORT | 64
+ PPC | KVM_REG_PPC_SPRG9 | 64
+ PPC | KVM_REG_PPC_DBSR | 32
+ PPC | KVM_REG_PPC_TM_GPR0 | 64
+ ...
+ PPC | KVM_REG_PPC_TM_GPR31 | 64
+ PPC | KVM_REG_PPC_TM_VSR0 | 128
+ ...
+ PPC | KVM_REG_PPC_TM_VSR63 | 128
+ PPC | KVM_REG_PPC_TM_CR | 64
+ PPC | KVM_REG_PPC_TM_LR | 64
+ PPC | KVM_REG_PPC_TM_CTR | 64
+ PPC | KVM_REG_PPC_TM_FPSCR | 64
+ PPC | KVM_REG_PPC_TM_AMR | 64
+ PPC | KVM_REG_PPC_TM_PPR | 64
+ PPC | KVM_REG_PPC_TM_VRSAVE | 64
+ PPC | KVM_REG_PPC_TM_VSCR | 32
+ PPC | KVM_REG_PPC_TM_DSCR | 64
+ PPC | KVM_REG_PPC_TM_TAR | 64
+ | |
+ MIPS | KVM_REG_MIPS_R0 | 64
+ ...
+ MIPS | KVM_REG_MIPS_R31 | 64
+ MIPS | KVM_REG_MIPS_HI | 64
+ MIPS | KVM_REG_MIPS_LO | 64
+ MIPS | KVM_REG_MIPS_PC | 64
+ MIPS | KVM_REG_MIPS_CP0_INDEX | 32
+ MIPS | KVM_REG_MIPS_CP0_CONTEXT | 64
+ MIPS | KVM_REG_MIPS_CP0_USERLOCAL | 64
+ MIPS | KVM_REG_MIPS_CP0_PAGEMASK | 32
+ MIPS | KVM_REG_MIPS_CP0_WIRED | 32
+ MIPS | KVM_REG_MIPS_CP0_HWRENA | 32
+ MIPS | KVM_REG_MIPS_CP0_BADVADDR | 64
+ MIPS | KVM_REG_MIPS_CP0_COUNT | 32
+ MIPS | KVM_REG_MIPS_CP0_ENTRYHI | 64
+ MIPS | KVM_REG_MIPS_CP0_COMPARE | 32
+ MIPS | KVM_REG_MIPS_CP0_STATUS | 32
+ MIPS | KVM_REG_MIPS_CP0_CAUSE | 32
+ MIPS | KVM_REG_MIPS_CP0_EPC | 64
+ MIPS | KVM_REG_MIPS_CP0_PRID | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG3 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32
+ MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32
+ MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64
+ MIPS | KVM_REG_MIPS_COUNT_CTL | 64
+ MIPS | KVM_REG_MIPS_COUNT_RESUME | 64
+ MIPS | KVM_REG_MIPS_COUNT_HZ | 64
+ MIPS | KVM_REG_MIPS_FPR_32(0..31) | 32
+ MIPS | KVM_REG_MIPS_FPR_64(0..31) | 64
+ MIPS | KVM_REG_MIPS_VEC_128(0..31) | 128
+ MIPS | KVM_REG_MIPS_FCR_IR | 32
+ MIPS | KVM_REG_MIPS_FCR_CSR | 32
+ MIPS | KVM_REG_MIPS_MSA_IR | 32
+ MIPS | KVM_REG_MIPS_MSA_CSR | 32
+
+ARM registers are mapped using the lower 32 bits. The upper 16 of that
+is the register group type, or coprocessor number:
+
+ARM core registers have the following id bit patterns:
+ 0x4020 0000 0010 <index into the kvm_regs struct:16>
+
+ARM 32-bit CP15 registers have the following id bit patterns:
+ 0x4020 0000 000F <zero:1> <crn:4> <crm:4> <opc1:4> <opc2:3>
+
+ARM 64-bit CP15 registers have the following id bit patterns:
+ 0x4030 0000 000F <zero:1> <zero:4> <crm:4> <opc1:4> <zero:3>
+
+ARM CCSIDR registers are demultiplexed by CSSELR value:
+ 0x4020 0000 0011 00 <csselr:8>
+
+ARM 32-bit VFP control registers have the following id bit patterns:
+ 0x4020 0000 0012 1 <regno:12>
+
+ARM 64-bit FP registers have the following id bit patterns:
+ 0x4030 0000 0012 0 <regno:12>
+
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+ 0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+ 0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+ 0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
+
+MIPS registers are mapped using the lower 32 bits. The upper 16 of that is
+the register group type:
+
+MIPS core registers (see above) have the following id bit patterns:
+ 0x7030 0000 0000 <reg:16>
+
+MIPS CP0 registers (see KVM_REG_MIPS_CP0_* above) have the following id bit
+patterns depending on whether they're 32-bit or 64-bit registers:
+ 0x7020 0000 0001 00 <reg:5> <sel:3> (32-bit)
+ 0x7030 0000 0001 00 <reg:5> <sel:3> (64-bit)
+
+MIPS KVM control registers (see above) have the following id bit patterns:
+ 0x7030 0000 0002 <reg:16>
+
+MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
+id bit patterns depending on the size of the register being accessed. They are
+always accessed according to the current guest FPU mode (Status.FR and
+Config5.FRE), i.e. as the guest would see them, and they become unpredictable
+if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
+registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
+overlap the FPU registers:
+ 0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
+ 0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
+ 0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
+
+MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
+following id bit patterns:
+ 0x7020 0000 0003 01 <0:3> <reg:5>
+
+MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
+following id bit patterns:
+ 0x7020 0000 0003 02 <0:3> <reg:5>
+
+
+4.69 KVM_GET_ONE_REG
+
+Capability: KVM_CAP_ONE_REG
+Architectures: all
+Type: vcpu ioctl
+Parameters: struct kvm_one_reg (in and out)
+Returns: 0 on success, negative value on failure
+
+This ioctl allows to receive the value of a single register implemented
+in a vcpu. The register to read is indicated by the "id" field of the
+kvm_one_reg struct passed in. On success, the register value can be found
+at the memory location pointed to by "addr".
+
+The list of registers accessible using this interface is identical to the
+list in 4.68.
+
+
+4.70 KVM_KVMCLOCK_CTRL
+
+Capability: KVM_CAP_KVMCLOCK_CTRL
+Architectures: Any that implement pvclocks (currently x86 only)
+Type: vcpu ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This signals to the host kernel that the specified guest is being paused by
+userspace. The host will set a flag in the pvclock structure that is checked
+from the soft lockup watchdog. The flag is part of the pvclock structure that
+is shared between guest and host, specifically the second bit of the flags
+field of the pvclock_vcpu_time_info structure. It will be set exclusively by
+the host and read/cleared exclusively by the guest. The guest operation of
+checking and clearing the flag must an atomic operation so
+load-link/store-conditional, or equivalent must be used. There are two cases
+where the guest will clear the flag: when the soft lockup watchdog timer resets
+itself or when a soft lockup is detected. This ioctl can be called any time
+after pausing the vcpu, but before it is resumed.
+
+
+4.71 KVM_SIGNAL_MSI
+
+Capability: KVM_CAP_SIGNAL_MSI
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_msi (in)
+Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error
+
+Directly inject a MSI message. Only valid with in-kernel irqchip that handles
+MSI messages.
+
+struct kvm_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 flags;
+ __u8 pad[16];
+};
+
+No flags are defined so far. The corresponding field must be 0.
+
+
+4.71 KVM_CREATE_PIT2
+
+Capability: KVM_CAP_PIT2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_config (in)
+Returns: 0 on success, -1 on error
+
+Creates an in-kernel device model for the i8254 PIT. This call is only valid
+after enabling in-kernel irqchip support via KVM_CREATE_IRQCHIP. The following
+parameters have to be passed:
+
+struct kvm_pit_config {
+ __u32 flags;
+ __u32 pad[15];
+};
+
+Valid flags are:
+
+#define KVM_PIT_SPEAKER_DUMMY 1 /* emulate speaker port stub */
+
+PIT timer interrupts may use a per-VM kernel thread for injection. If it
+exists, this thread will have a name of the following pattern:
+
+kvm-pit/<owner-process-pid>
+
+When running a guest with elevated priorities, the scheduling parameters of
+this thread may have to be adjusted accordingly.
+
+This IOCTL replaces the obsolete KVM_CREATE_PIT.
+
+
+4.72 KVM_GET_PIT2
+
+Capability: KVM_CAP_PIT_STATE2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_state2 (out)
+Returns: 0 on success, -1 on error
+
+Retrieves the state of the in-kernel PIT model. Only valid after
+KVM_CREATE_PIT2. The state is returned in the following structure:
+
+struct kvm_pit_state2 {
+ struct kvm_pit_channel_state channels[3];
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+Valid flags are:
+
+/* disable PIT in HPET legacy mode */
+#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+
+This IOCTL replaces the obsolete KVM_GET_PIT.
+
+
+4.73 KVM_SET_PIT2
+
+Capability: KVM_CAP_PIT_STATE2
+Architectures: x86
+Type: vm ioctl
+Parameters: struct kvm_pit_state2 (in)
+Returns: 0 on success, -1 on error
+
+Sets the state of the in-kernel PIT model. Only valid after KVM_CREATE_PIT2.
+See KVM_GET_PIT2 for details on struct kvm_pit_state2.
+
+This IOCTL replaces the obsolete KVM_SET_PIT.
+
+
+4.74 KVM_PPC_GET_SMMU_INFO
+
+Capability: KVM_CAP_PPC_GET_SMMU_INFO
+Architectures: powerpc
+Type: vm ioctl
+Parameters: None
+Returns: 0 on success, -1 on error
+
+This populates and returns a structure describing the features of
+the "Server" class MMU emulation supported by KVM.
+This can in turn be used by userspace to generate the appropriate
+device-tree properties for the guest operating system.
+
+The structure contains some global information, followed by an
+array of supported segment page sizes:
+
+ struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u32 pad;
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+ };
+
+The supported flags are:
+
+ - KVM_PPC_PAGE_SIZES_REAL:
+ When that flag is set, guest page sizes must "fit" the backing
+ store page sizes. When not set, any page size in the list can
+ be used regardless of how they are backed by userspace.
+
+ - KVM_PPC_1T_SEGMENTS
+ The emulated MMU supports 1T segments in addition to the
+ standard 256M ones.
+
+The "slb_size" field indicates how many SLB entries are supported
+
+The "sps" array contains 8 entries indicating the supported base
+page sizes for a segment in increasing order. Each entry is defined
+as follow:
+
+ struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+ };
+
+An entry with a "page_shift" of 0 is unused. Because the array is
+organized in increasing order, a lookup can stop when encoutering
+such an entry.
+
+The "slb_enc" field provides the encoding to use in the SLB for the
+page size. The bits are in positions such as the value can directly
+be OR'ed into the "vsid" argument of the slbmte instruction.
+
+The "enc" array is a list which for each of those segment base page
+size provides the list of supported actual page sizes (which can be
+only larger or equal to the base page size), along with the
+corresponding encoding in the hash PTE. Similarly, the array is
+8 entries sorted by increasing sizes and an entry with a "0" shift
+is an empty entry and a terminator:
+
+ struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+ };
+
+The "pte_enc" field provides a value that can OR'ed into the hash
+PTE's RPN field (ie, it needs to be shifted left by 12 to OR it
+into the hash PTE second double word).
+
+4.75 KVM_IRQFD
+
+Capability: KVM_CAP_IRQFD
+Architectures: x86 s390 arm arm64
+Type: vm ioctl
+Parameters: struct kvm_irqfd (in)
+Returns: 0 on success, -1 on error
+
+Allows setting an eventfd to directly trigger a guest interrupt.
+kvm_irqfd.fd specifies the file descriptor to use as the eventfd and
+kvm_irqfd.gsi specifies the irqchip pin toggled by this event. When
+an event is triggered on the eventfd, an interrupt is injected into
+the guest using the specified gsi pin. The irqfd is removed using
+the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
+and kvm_irqfd.gsi.
+
+With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
+mechanism allowing emulation of level-triggered, irqfd-based
+interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
+additional eventfd in the kvm_irqfd.resamplefd field. When operating
+in resample mode, posting of an interrupt through kvm_irq.fd asserts
+the specified gsi in the irqchip. When the irqchip is resampled, such
+as from an EOI, the gsi is de-asserted and the user is notified via
+kvm_irqfd.resamplefd. It is the user's responsibility to re-queue
+the interrupt if the device making use of it still requires service.
+Note that closing the resamplefd is not sufficient to disable the
+irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
+and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
+
+On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
+Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
+given by gsi + 32.
+
+4.76 KVM_PPC_ALLOCATE_HTAB
+
+Capability: KVM_CAP_PPC_ALLOC_HTAB
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to u32 containing hash table order (in/out)
+Returns: 0 on success, -1 on error
+
+This requests the host kernel to allocate an MMU hash table for a
+guest using the PAPR paravirtualization interface. This only does
+anything if the kernel is configured to use the Book 3S HV style of
+virtualization. Otherwise the capability doesn't exist and the ioctl
+returns an ENOTTY error. The rest of this description assumes Book 3S
+HV.
+
+There must be no vcpus running when this ioctl is called; if there
+are, it will do nothing and return an EBUSY error.
+
+The parameter is a pointer to a 32-bit unsigned integer variable
+containing the order (log base 2) of the desired size of the hash
+table, which must be between 18 and 46. On successful return from the
+ioctl, it will have been updated with the order of the hash table that
+was allocated.
+
+If no hash table has been allocated when any vcpu is asked to run
+(with the KVM_RUN ioctl), the host kernel will allocate a
+default-sized hash table (16 MB).
+
+If this ioctl is called when a hash table has already been allocated,
+the kernel will clear out the existing hash table (zero all HPTEs) and
+return the hash table order in the parameter. (If the guest is using
+the virtualized real-mode area (VRMA) facility, the kernel will
+re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
+
+4.77 KVM_S390_INTERRUPT
+
+Capability: basic
+Architectures: s390
+Type: vm ioctl, vcpu ioctl
+Parameters: struct kvm_s390_interrupt (in)
+Returns: 0 on success, -1 on error
+
+Allows to inject an interrupt to the guest. Interrupts can be floating
+(vm ioctl) or per cpu (vcpu ioctl), depending on the interrupt type.
+
+Interrupt parameters are passed via kvm_s390_interrupt:
+
+struct kvm_s390_interrupt {
+ __u32 type;
+ __u32 parm;
+ __u64 parm64;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP (vcpu) - sigp stop; optional flags in parm
+KVM_S390_PROGRAM_INT (vcpu) - program check; code in parm
+KVM_S390_SIGP_SET_PREFIX (vcpu) - sigp set prefix; prefix address in parm
+KVM_S390_RESTART (vcpu) - restart
+KVM_S390_INT_CLOCK_COMP (vcpu) - clock comparator interrupt
+KVM_S390_INT_CPU_TIMER (vcpu) - CPU timer interrupt
+KVM_S390_INT_VIRTIO (vm) - virtio external interrupt; external interrupt
+ parameters in parm and parm64
+KVM_S390_INT_SERVICE (vm) - sclp external interrupt; sclp parameter in parm
+KVM_S390_INT_EMERGENCY (vcpu) - sigp emergency; source cpu in parm
+KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
+KVM_S390_INT_IO(ai,cssid,ssid,schid) (vm) - compound value to indicate an
+ I/O interrupt (ai - adapter interrupt; cssid,ssid,schid - subchannel);
+ I/O interruption parameters in parm (subchannel) and parm64 (intparm,
+ interruption subclass)
+KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm,
+ machine check interrupt code in parm64 (note that
+ machine checks needing further payload are not
+ supported by this ioctl)
+
+Note that the vcpu ioctl is asynchronous to vcpu execution.
+
+4.78 KVM_PPC_GET_HTAB_FD
+
+Capability: KVM_CAP_PPC_HTAB_FD
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to struct kvm_get_htab_fd (in)
+Returns: file descriptor number (>= 0) on success, -1 on error
+
+This returns a file descriptor that can be used either to read out the
+entries in the guest's hashed page table (HPT), or to write entries to
+initialize the HPT. The returned fd can only be written to if the
+KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
+can only be read if that bit is clear. The argument struct looks like
+this:
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+The `start_index' field gives the index in the HPT of the entry at
+which to start reading. It is ignored when writing.
+
+Reads on the fd will initially supply information about all
+"interesting" HPT entries. Interesting entries are those with the
+bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
+all entries. When the end of the HPT is reached, the read() will
+return. If read() is called again on the fd, it will start again from
+the beginning of the HPT, but will only return HPT entries that have
+changed since they were last read.
+
+Data read or written is structured as a header (8 bytes) followed by a
+series of valid HPT entries (16 bytes) each. The header indicates how
+many valid HPT entries there are and how many invalid entries follow
+the valid entries. The invalid entries are not represented explicitly
+in the stream. The header format is:
+
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
+Writes to the fd create HPT entries starting at the index given in the
+header; first `n_valid' valid entries with contents from the data
+written, then `n_invalid' invalid entries, invalidating any previously
+valid entries found.
+
+4.79 KVM_CREATE_DEVICE
+
+Capability: KVM_CAP_DEVICE_CTRL
+Type: vm ioctl
+Parameters: struct kvm_create_device (in/out)
+Returns: 0 on success, -1 on error
+Errors:
+ ENODEV: The device type is unknown or unsupported
+ EEXIST: Device already created, and this type of device may not
+ be instantiated multiple times
+
+ Other error conditions may be defined by individual device types or
+ have their standard meanings.
+
+Creates an emulated device in the kernel. The file descriptor returned
+in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR.
+
+If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the
+device type is supported (not necessarily whether it can be created
+in the current vm).
+
+Individual devices should not define flags. Attributes should be used
+for specifying any behavior that is not implied by the device type
+number.
+
+struct kvm_create_device {
+ __u32 type; /* in: KVM_DEV_TYPE_xxx */
+ __u32 fd; /* out: device handle */
+ __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
+};
+
+4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+ ENXIO: The group or attribute is unknown/unsupported for this device
+ EPERM: The attribute cannot (currently) be accessed this way
+ (e.g. read-only attribute, or attribute that only makes
+ sense when the device is in a different state)
+
+ Other error conditions may be defined by individual device types.
+
+Gets/sets a specified piece of device configuration and/or state. The
+semantics are device-specific. See individual device documentation in
+the "devices" directory. As with ONE_REG, the size of the data
+transferred is defined by the particular attribute.
+
+struct kvm_device_attr {
+ __u32 flags; /* no flags currently defined */
+ __u32 group; /* device-defined */
+ __u64 attr; /* group-defined */
+ __u64 addr; /* userspace address of attr data */
+};
+
+4.81 KVM_HAS_DEVICE_ATTR
+
+Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device
+Type: device ioctl, vm ioctl
+Parameters: struct kvm_device_attr
+Returns: 0 on success, -1 on error
+Errors:
+ ENXIO: The group or attribute is unknown/unsupported for this device
+
+Tests whether a device supports a particular attribute. A successful
+return indicates the attribute is implemented. It does not necessarily
+indicate that the attribute can be read or written in the device's
+current state. "addr" is ignored.
+
+4.82 KVM_ARM_VCPU_INIT
+
+Capability: basic
+Architectures: arm, arm64
+Type: vcpu ioctl
+Parameters: struct kvm_vcpu_init (in)
+Returns: 0 on success; -1 on error
+Errors:
+  EINVAL:    the target is unknown, or the combination of features is invalid.
+  ENOENT:    a features bit specified is unknown.
+
+This tells KVM what type of CPU to present to the guest, and what
+optional features it should have.  This will cause a reset of the cpu
+registers to their initial values.  If this is not called, KVM_RUN will
+return ENOEXEC for that vcpu.
+
+Note that because some registers reflect machine topology, all vcpus
+should be created before this ioctl is invoked.
+
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
+Possible features:
+ - KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
+ Depends on KVM_CAP_ARM_PSCI. If not set, the CPU will be powered on
+ and execute guest code when KVM_RUN is called.
+ - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+ Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
+ - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
+ Depends on KVM_CAP_ARM_PSCI_0_2.
+
+
+4.83 KVM_ARM_PREFERRED_TARGET
+
+Capability: basic
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct struct kvm_vcpu_init (out)
+Returns: 0 on success; -1 on error
+Errors:
+ ENODEV: no preferred target available for the host
+
+This queries KVM for preferred CPU target type which can be emulated
+by KVM on underlying host.
+
+The ioctl returns struct kvm_vcpu_init instance containing information
+about preferred CPU target type and recommended features for it. The
+kvm_vcpu_init->features bitmap returned will have feature bits set if
+the preferred target recommends setting these features, but this is
+not mandatory.
+
+The information returned by this ioctl can be used to prepare an instance
+of struct kvm_vcpu_init for KVM_ARM_VCPU_INIT ioctl which will result in
+in VCPU matching underlying host.
+
+
+4.84 KVM_GET_REG_LIST
+
+Capability: basic
+Architectures: arm, arm64, mips
+Type: vcpu ioctl
+Parameters: struct kvm_reg_list (in/out)
+Returns: 0 on success; -1 on error
+Errors:
+  E2BIG:     the reg index list is too big to fit in the array specified by
+             the user (the number required will be written into n).
+
+struct kvm_reg_list {
+ __u64 n; /* number of registers in reg[] */
+ __u64 reg[0];
+};
+
+This ioctl returns the guest registers that are supported for the
+KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
+
+
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
+
+Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
+Architectures: arm, arm64
+Type: vm ioctl
+Parameters: struct kvm_arm_device_address (in)
+Returns: 0 on success, -1 on error
+Errors:
+ ENODEV: The device id is unknown
+ ENXIO: Device not supported on current system
+ EEXIST: Address already set
+ E2BIG: Address outside guest physical address space
+ EBUSY: Address overlaps with other device range
+
+struct kvm_arm_device_addr {
+ __u64 id;
+ __u64 addr;
+};
+
+Specify a device address in the guest's physical address space where guests
+can access emulated or directly exposed devices, which the host kernel needs
+to know about. The id field is an architecture specific identifier for a
+specific device.
+
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
+
+  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
+ field: | 0x00000000 | device id | addr type id |
+
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id. When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
+
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
+
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
+
+Capability: KVM_CAP_PPC_RTAS
+Architectures: ppc
+Type: vm ioctl
+Parameters: struct kvm_rtas_token_args
+Returns: 0 on success, -1 on error
+
+Defines a token value for a RTAS (Run Time Abstraction Services)
+service in order to allow it to be handled in the kernel. The
+argument struct gives the name of the service, which must be the name
+of a service that has a kernel-side implementation. If the token
+value is non-zero, it will be associated with that service, and
+subsequent RTAS calls by the guest specifying that token will be
+handled by the kernel. If the token value is 0, then any token
+associated with the service will be forgotten, and subsequent RTAS
+calls by the guest for that service will be passed to userspace to be
+handled.
+
+4.87 KVM_SET_GUEST_DEBUG
+
+Capability: KVM_CAP_SET_GUEST_DEBUG
+Architectures: x86, s390, ppc
+Type: vcpu ioctl
+Parameters: struct kvm_guest_debug (in)
+Returns: 0 on success; -1 on error
+
+struct kvm_guest_debug {
+ __u32 control;
+ __u32 pad;
+ struct kvm_guest_debug_arch arch;
+};
+
+Set up the processor specific debug registers and configure vcpu for
+handling guest debug events. There are two parts to the structure, the
+first a control bitfield indicates the type of debug events to handle
+when running. Common control bits are:
+
+ - KVM_GUESTDBG_ENABLE: guest debugging is enabled
+ - KVM_GUESTDBG_SINGLESTEP: the next run should single-step
+
+The top 16 bits of the control field are architecture specific control
+flags which can include the following:
+
+ - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86]
+ - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
+ - KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
+ - KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
+ - KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
+
+For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
+are enabled in memory so we need to ensure breakpoint exceptions are
+correctly trapped and the KVM run loop exits at the breakpoint and not
+running off into the normal guest vector. For KVM_GUESTDBG_USE_HW_BP
+we need to ensure the guest vCPUs architecture specific registers are
+updated to the correct (supplied) values.
+
+The second part of the structure is architecture specific and
+typically contains a set of debug registers.
+
+When debug events exit the main run loop with the reason
+KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
+structure containing architecture specific debug information.
+
+4.88 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+ __u32 nent;
+ __u32 flags;
+ struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+
+struct kvm_cpuid_entry2 {
+ __u32 function;
+ __u32 index;
+ __u32 flags;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+ function: the eax value used to obtain the entry
+ index: the ecx value used to obtain the entry (for entries that are
+ affected by ecx)
+ flags: an OR of zero or more of the following:
+ KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+ if the index field is valid
+ KVM_CPUID_FLAG_STATEFUL_FUNC:
+ if cpuid for this function returns different values for successive
+ invocations; there will be several entries with the same function,
+ all with this flag set
+ KVM_CPUID_FLAG_STATE_READ_NEXT:
+ for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+ the first entry to be read by a cpu
+ eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+ this function/index combination
+
+4.89 KVM_S390_MEM_OP
+
+Capability: KVM_CAP_S390_MEM_OP
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_mem_op (in)
+Returns: = 0 on success,
+ < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+ > 0 if an exception occurred while walking the page tables
+
+Read or write data from/to the logical (virtual) memory of a VPCU.
+
+Parameters are specified via the following structure:
+
+struct kvm_s390_mem_op {
+ __u64 gaddr; /* the guest address */
+ __u64 flags; /* flags */
+ __u32 size; /* amount of bytes */
+ __u32 op; /* type of operation */
+ __u64 buf; /* buffer in userspace */
+ __u8 ar; /* the access register number */
+ __u8 reserved[31]; /* should be set to 0 */
+};
+
+The type of operation is specified in the "op" field. It is either
+KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
+KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
+KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
+whether the corresponding memory access would create an access exception
+(without touching the data in the memory at the destination). In case an
+access exception occurred while walking the MMU tables of the guest, the
+ioctl returns a positive error number to indicate the type of exception.
+This exception is also raised directly at the corresponding VCPU if the
+flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
+
+The start address of the memory region has to be specified in the "gaddr"
+field, and the length of the region in the "size" field. "buf" is the buffer
+supplied by the userspace application where the read data should be written
+to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
+is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
+when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
+register number to be used.
+
+The "reserved" field is meant for future extensions. It is not used by
+KVM with the currently defined set of flags.
+
+4.90 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+ keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+ __u64 start_gfn;
+ __u64 count;
+ __u64 skeydata_addr;
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.91 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
+4.92 KVM_S390_IRQ
+
+Capability: KVM_CAP_S390_INJECT_IRQ
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq (in)
+Returns: 0 on success, -1 on error
+Errors:
+ EINVAL: interrupt type is invalid
+ type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+ type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
+ than the maximum of VCPUs
+ EBUSY: type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
+ type is KVM_S390_SIGP_STOP and a stop irq is already pending
+ type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
+ is already pending
+
+Allows to inject an interrupt to the guest.
+
+Using struct kvm_s390_irq as a parameter allows
+to inject additional payload which is not
+possible via KVM_S390_INTERRUPT.
+
+Interrupt parameters are passed via kvm_s390_irq:
+
+struct kvm_s390_irq {
+ __u64 type;
+ union {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_stop_info stop;
+ struct kvm_s390_mchk_info mchk;
+ char reserved[64];
+ } u;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+KVM_S390_RESTART - restart; no parameters
+KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+
+
+Note that the vcpu ioctl is asynchronous to vcpu execution.
+
+4.94 KVM_S390_GET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (out)
+Returns: >= number of bytes copied into buffer,
+ -EINVAL if buffer size is 0,
+ -ENOBUFS if buffer size is too small to fit all pending interrupts,
+ -EFAULT if the buffer address was invalid
+
+This ioctl allows userspace to retrieve the complete state of all currently
+pending interrupts in a single buffer. Use cases include migration
+and introspection. The parameter structure contains the address of a
+userspace buffer and its length:
+
+struct kvm_s390_irq_state {
+ __u64 buf;
+ __u32 flags;
+ __u32 len;
+ __u32 reserved[4];
+};
+
+Userspace passes in the above struct and for each pending interrupt a
+struct kvm_s390_irq is copied to the provided buffer.
+
+If -ENOBUFS is returned the buffer provided was too small and userspace
+may retry with a bigger buffer.
+
+4.95 KVM_S390_SET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (in)
+Returns: 0 on success,
+ -EFAULT if the buffer address was invalid,
+ -EINVAL for an invalid buffer length (see below),
+ -EBUSY if there were already interrupts pending,
+ errors occurring when actually injecting the
+ interrupt. See KVM_S390_IRQ.
+
+This ioctl allows userspace to set the complete state of all cpu-local
+interrupts currently pending for the vcpu. It is intended for restoring
+interrupt state after a migration. The input parameter is a userspace buffer
+containing a struct kvm_s390_irq_state:
+
+struct kvm_s390_irq_state {
+ __u64 buf;
+ __u32 len;
+ __u32 pad;
+};
+
+The userspace memory referenced by buf contains a struct kvm_s390_irq
+for each interrupt to be injected into the guest.
+If one of the interrupts could not be injected for some reason the
+ioctl aborts.
+
+len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
+and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
+which is the maximum number of possibly pending cpu-local interrupts.
+
+5. The kvm_run structure
+------------------------
+
+Application code obtains a pointer to the kvm_run structure by
+mmap()ing a vcpu fd. From that point, application code can control
+execution by changing fields in kvm_run prior to calling the KVM_RUN
+ioctl, and obtain information about the reason KVM_RUN returned by
+looking up structure members.
+
+struct kvm_run {
+ /* in */
+ __u8 request_interrupt_window;
+
+Request that KVM_RUN return when it becomes possible to inject external
+interrupts into the guest. Useful in conjunction with KVM_INTERRUPT.
+
+ __u8 padding1[7];
+
+ /* out */
+ __u32 exit_reason;
+
+When KVM_RUN has returned successfully (return value 0), this informs
+application code why KVM_RUN has returned. Allowable values for this
+field are detailed below.
+
+ __u8 ready_for_interrupt_injection;
+
+If request_interrupt_window has been specified, this field indicates
+an interrupt can be injected now with KVM_INTERRUPT.
+
+ __u8 if_flag;
+
+The value of the current interrupt flag. Only valid if in-kernel
+local APIC is not used.
+
+ __u8 padding2[2];
+
+ /* in (pre_kvm_run), out (post_kvm_run) */
+ __u64 cr8;
+
+The value of the cr8 register. Only valid if in-kernel local APIC is
+not used. Both input and output.
+
+ __u64 apic_base;
+
+The value of the APIC BASE msr. Only valid if in-kernel local
+APIC is not used. Both input and output.
+
+ union {
+ /* KVM_EXIT_UNKNOWN */
+ struct {
+ __u64 hardware_exit_reason;
+ } hw;
+
+If exit_reason is KVM_EXIT_UNKNOWN, the vcpu has exited due to unknown
+reasons. Further architecture-specific information is available in
+hardware_exit_reason.
+
+ /* KVM_EXIT_FAIL_ENTRY */
+ struct {
+ __u64 hardware_entry_failure_reason;
+ } fail_entry;
+
+If exit_reason is KVM_EXIT_FAIL_ENTRY, the vcpu could not be run due
+to unknown reasons. Further architecture-specific information is
+available in hardware_entry_failure_reason.
+
+ /* KVM_EXIT_EXCEPTION */
+ struct {
+ __u32 exception;
+ __u32 error_code;
+ } ex;
+
+Unused.
+
+ /* KVM_EXIT_IO */
+ struct {
+#define KVM_EXIT_IO_IN 0
+#define KVM_EXIT_IO_OUT 1
+ __u8 direction;
+ __u8 size; /* bytes */
+ __u16 port;
+ __u32 count;
+ __u64 data_offset; /* relative to kvm_run start */
+ } io;
+
+If exit_reason is KVM_EXIT_IO, then the vcpu has
+executed a port I/O instruction which could not be satisfied by kvm.
+data_offset describes where the data is located (KVM_EXIT_IO_OUT) or
+where kvm expects application code to place the data for the next
+KVM_RUN invocation (KVM_EXIT_IO_IN). Data format is a packed array.
+
+ struct {
+ struct kvm_debug_exit_arch arch;
+ } debug;
+
+Unused.
+
+ /* KVM_EXIT_MMIO */
+ struct {
+ __u64 phys_addr;
+ __u8 data[8];
+ __u32 len;
+ __u8 is_write;
+ } mmio;
+
+If exit_reason is KVM_EXIT_MMIO, then the vcpu has
+executed a memory-mapped I/O instruction which could not be satisfied
+by kvm. The 'data' member contains the written data if 'is_write' is
+true, and should be filled by application code otherwise.
+
+The 'data' member contains, in its first 'len' bytes, the value as it would
+appear if the VCPU performed a load or store of the appropriate width directly
+to the byte array.
+
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and
+ KVM_EXIT_EPR the corresponding
+operations are complete (and guest state is consistent) only after userspace
+has re-entered the kernel with KVM_RUN. The kernel side will first finish
+incomplete operations and then check for pending signals. Userspace
+can re-enter the guest with an unmasked signal pending to complete
+pending operations.
+
+ /* KVM_EXIT_HYPERCALL */
+ struct {
+ __u64 nr;
+ __u64 args[6];
+ __u64 ret;
+ __u32 longmode;
+ __u32 pad;
+ } hypercall;
+
+Unused. This was once used for 'hypercall to userspace'. To implement
+such functionality, use KVM_EXIT_IO (x86) or KVM_EXIT_MMIO (all except s390).
+Note KVM_EXIT_IO is significantly faster than KVM_EXIT_MMIO.
+
+ /* KVM_EXIT_TPR_ACCESS */
+ struct {
+ __u64 rip;
+ __u32 is_write;
+ __u32 pad;
+ } tpr_access;
+
+To be documented (KVM_TPR_ACCESS_REPORTING).
+
+ /* KVM_EXIT_S390_SIEIC */
+ struct {
+ __u8 icptcode;
+ __u64 mask; /* psw upper half */
+ __u64 addr; /* psw lower half */
+ __u16 ipa;
+ __u32 ipb;
+ } s390_sieic;
+
+s390 specific.
+
+ /* KVM_EXIT_S390_RESET */
+#define KVM_S390_RESET_POR 1
+#define KVM_S390_RESET_CLEAR 2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT 8
+#define KVM_S390_RESET_IPL 16
+ __u64 s390_reset_flags;
+
+s390 specific.
+
+ /* KVM_EXIT_S390_UCONTROL */
+ struct {
+ __u64 trans_exc_code;
+ __u32 pgm_code;
+ } s390_ucontrol;
+
+s390 specific. A page fault has occurred for a user controlled virtual
+machine (KVM_VM_S390_UNCONTROL) on it's host page table that cannot be
+resolved by the kernel.
+The program code and the translation exception code that were placed
+in the cpu's lowcore are presented here as defined by the z Architecture
+Principles of Operation Book in the Chapter for Dynamic Address Translation
+(DAT)
+
+ /* KVM_EXIT_DCR */
+ struct {
+ __u32 dcrn;
+ __u32 data;
+ __u8 is_write;
+ } dcr;
+
+Deprecated - was used for 440 KVM.
+
+ /* KVM_EXIT_OSI */
+ struct {
+ __u64 gprs[32];
+ } osi;
+
+MOL uses a special hypercall interface it calls 'OSI'. To enable it, we catch
+hypercalls and exit with this exit struct that contains all the guest gprs.
+
+If exit_reason is KVM_EXIT_OSI, then the vcpu has triggered such a hypercall.
+Userspace can now handle the hypercall and when it's done modify the gprs as
+necessary. Upon guest entry all guest GPRs will then be replaced by the values
+in this struct.
+
+ /* KVM_EXIT_PAPR_HCALL */
+ struct {
+ __u64 nr;
+ __u64 ret;
+ __u64 args[9];
+ } papr_hcall;
+
+This is used on 64-bit PowerPC when emulating a pSeries partition,
+e.g. with the 'pseries' machine type in qemu. It occurs when the
+guest does a hypercall using the 'sc 1' instruction. The 'nr' field
+contains the hypercall number (from the guest R3), and 'args' contains
+the arguments (from the guest R4 - R12). Userspace should put the
+return code in 'ret' and any extra returned values in args[].
+The possible hypercalls are defined in the Power Architecture Platform
+Requirements (PAPR) document available from www.power.org (free
+developer registration required to access it).
+
+ /* KVM_EXIT_S390_TSCH */
+ struct {
+ __u16 subchannel_id;
+ __u16 subchannel_nr;
+ __u32 io_int_parm;
+ __u32 io_int_word;
+ __u32 ipb;
+ __u8 dequeued;
+ } s390_tsch;
+
+s390 specific. This exit occurs when KVM_CAP_S390_CSS_SUPPORT has been enabled
+and TEST SUBCHANNEL was intercepted. If dequeued is set, a pending I/O
+interrupt for the target subchannel has been dequeued and subchannel_id,
+subchannel_nr, io_int_parm and io_int_word contain the parameters for that
+interrupt. ipb is needed for instruction parameter decoding.
+
+ /* KVM_EXIT_EPR */
+ struct {
+ __u32 epr;
+ } epr;
+
+On FSL BookE PowerPC chips, the interrupt controller has a fast patch
+interrupt acknowledge path to the core. When the core successfully
+delivers an interrupt, it automatically populates the EPR register with
+the interrupt vector number and acknowledges the interrupt inside
+the interrupt controller.
+
+In case the interrupt controller lives in user space, we need to do
+the interrupt acknowledge cycle through it to fetch the next to be
+delivered interrupt vector using this exit.
+
+It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
+external interrupt has just been delivered into the guest. User space
+should put the acknowledged interrupt vector into the 'epr' field.
+
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+ __u32 type;
+ __u64 flags;
+ } system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
+Valid values for 'type' are:
+ KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+ VM. Userspace is not obliged to honour this, and if it does honour
+ this does not need to destroy the VM synchronously (ie it may call
+ KVM_RUN again before shutdown finally occurs).
+ KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+ As with SHUTDOWN, userspace can choose to ignore the request, or
+ to schedule the reset to occur in the future and may call KVM_RUN again.
+
+ /* Fix the size of the union. */
+ char padding[256];
+ };
+
+ /*
+ * shared registers between kvm and userspace.
+ * kvm_valid_regs specifies the register classes set by the host
+ * kvm_dirty_regs specified the register classes dirtied by userspace
+ * struct kvm_sync_regs is architecture specific, as well as the
+ * bits for kvm_valid_regs and kvm_dirty_regs
+ */
+ __u64 kvm_valid_regs;
+ __u64 kvm_dirty_regs;
+ union {
+ struct kvm_sync_regs regs;
+ char padding[1024];
+ } s;
+
+If KVM_CAP_SYNC_REGS is defined, these fields allow userspace to access
+certain guest registers without having to call SET/GET_*REGS. Thus we can
+avoid some system call overhead if userspace has to handle the exit.
+Userspace can query the validity of the structure by checking
+kvm_valid_regs for specific bits. These bits are architecture specific
+and usually define the validity of a groups of registers. (e.g. one bit
+ for general purpose registers)
+
+Please note that the kernel is allowed to use the kvm_run structure as the
+primary storage for certain register types. Therefore, the kernel may use the
+values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set.
+
+};
+
+
+
+6. Capabilities that can be enabled on vCPUs
+--------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual CPU or
+the virtual machine when enabled. To enable them, please see section 4.37.
+Below you can find a list of capabilities and what their effect on the vCPU or
+the virtual machine is when enabling them.
+
+The following information is provided along with the description:
+
+ Architectures: which instruction set architectures provide this ioctl.
+ x86 includes both i386 and x86_64.
+
+ Target: whether this is a per-vcpu or per-vm capability.
+
+ Parameters: what parameters are accepted by the capability.
+
+ Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
+ are not detailed, but errors with specific meanings are.
+
+
+6.1 KVM_CAP_PPC_OSI
+
+Architectures: ppc
+Target: vcpu
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables interception of OSI hypercalls that otherwise would
+be treated as normal system calls to be injected into the guest. OSI hypercalls
+were invented by Mac-on-Linux to have a standardized communication mechanism
+between the guest and the host.
+
+When this capability is enabled, KVM_EXIT_OSI can occur.
+
+
+6.2 KVM_CAP_PPC_PAPR
+
+Architectures: ppc
+Target: vcpu
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables interception of PAPR hypercalls. PAPR hypercalls are
+done using the hypercall instruction "sc 1".
+
+It also sets the guest privilege level to "supervisor" mode. Usually the guest
+runs in "hypervisor" privilege mode with a few missing features.
+
+In addition to the above, it changes the semantics of SDR1. In this mode, the
+HTAB address part of SDR1 contains an HVA instead of a GPA, as PAPR keeps the
+HTAB invisible to the guest.
+
+When this capability is enabled, KVM_EXIT_PAPR_HCALL can occur.
+
+
+6.3 KVM_CAP_SW_TLB
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the address of a struct kvm_config_tlb
+Returns: 0 on success; -1 on error
+
+struct kvm_config_tlb {
+ __u64 params;
+ __u64 array;
+ __u32 mmu_type;
+ __u32 array_len;
+};
+
+Configures the virtual CPU's TLB array, establishing a shared memory area
+between userspace and KVM. The "params" and "array" fields are userspace
+addresses of mmu-type-specific data structures. The "array_len" field is an
+safety mechanism, and should be set to the size in bytes of the memory that
+userspace has reserved for the array. It must be at least the size dictated
+by "mmu_type" and "params".
+
+While KVM_RUN is active, the shared region is under control of KVM. Its
+contents are undefined, and any modification by userspace results in
+boundedly undefined behavior.
+
+On return from KVM_RUN, the shared region will reflect the current state of
+the guest's TLB. If userspace makes any changes, it must call KVM_DIRTY_TLB
+to tell KVM which entries have been changed, prior to calling KVM_RUN again
+on this vcpu.
+
+For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ - The "params" field is of type "struct kvm_book3e_206_tlb_params".
+ - The "array" field points to an array of type "struct
+ kvm_book3e_206_tlb_entry".
+ - The array consists of all entries in the first TLB, followed by all
+ entries in the second TLB.
+ - Within a TLB, entries are ordered first by increasing set number. Within a
+ set, entries are ordered by way (increasing ESEL).
+ - The hash for determining set number in TLB0 is: (MAS2 >> 12) & (num_sets - 1)
+ where "num_sets" is the tlb_sizes[] value divided by the tlb_ways[] value.
+ - The tsize field of mas1 shall be set to 4K on TLB0, even though the
+ hardware ignores this value for TLB0.
+
+6.4 KVM_CAP_S390_CSS_SUPPORT
+
+Architectures: s390
+Target: vcpu
+Parameters: none
+Returns: 0 on success; -1 on error
+
+This capability enables support for handling of channel I/O instructions.
+
+TEST PENDING INTERRUPTION and the interrupt portion of TEST SUBCHANNEL are
+handled in-kernel, while the other I/O instructions are passed to userspace.
+
+When this capability is enabled, KVM_EXIT_S390_TSCH will occur on TEST
+SUBCHANNEL intercepts.
+
+Note that even though this capability is enabled per-vcpu, the complete
+virtual machine is affected.
+
+6.5 KVM_CAP_PPC_EPR
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] defines whether the proxy facility is active
+Returns: 0 on success; -1 on error
+
+This capability enables or disables the delivery of interrupts through the
+external proxy facility.
+
+When enabled (args[0] != 0), every time the guest gets an external interrupt
+delivered, it automatically exits into user space with a KVM_EXIT_EPR exit
+to receive the topmost interrupt vector.
+
+When disabled (args[0] == 0), behavior is as if this facility is unsupported.
+
+When this capability is enabled, KVM_EXIT_EPR can occur.
+
+6.6 KVM_CAP_IRQ_MPIC
+
+Architectures: ppc
+Parameters: args[0] is the MPIC device fd
+ args[1] is the MPIC CPU number for this vcpu
+
+This capability connects the vcpu to an in-kernel MPIC device.
+
+6.7 KVM_CAP_IRQ_XICS
+
+Architectures: ppc
+Target: vcpu
+Parameters: args[0] is the XICS device fd
+ args[1] is the XICS CPU number (server ID) for this vcpu
+
+This capability connects the vcpu to an in-kernel XICS device.
+
+6.8 KVM_CAP_S390_IRQCHIP
+
+Architectures: s390
+Target: vm
+Parameters: none
+
+This capability enables the in-kernel irqchip for s390. Please refer to
+"4.24 KVM_CREATE_IRQCHIP" for details.
+
+6.9 KVM_CAP_MIPS_FPU
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the host Floating Point Unit by the guest. It
+allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
+done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
+(depending on the current guest FPU register mode), and the Status.FR,
+Config5.FRE bits are accessible via the KVM API and also from the guest,
+depending on them being supported by the FPU.
+
+6.10 KVM_CAP_MIPS_MSA
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
+It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
+Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
+accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
+the guest.
+
+7. Capabilities that can be enabled on VMs
+------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual
+machine when enabled. To enable them, please see section 4.37. Below
+you can find a list of capabilities and what their effect on the VM
+is when enabling them.
+
+The following information is provided along with the description:
+
+ Architectures: which instruction set architectures provide this ioctl.
+ x86 includes both i386 and x86_64.
+
+ Parameters: what parameters are accepted by the capability.
+
+ Returns: the return value. General error numbers (EBADF, ENOMEM, EINVAL)
+ are not detailed, but errors with specific meanings are.
+
+
+7.1 KVM_CAP_PPC_ENABLE_HCALL
+
+Architectures: ppc
+Parameters: args[0] is the sPAPR hcall number
+ args[1] is 0 to disable, 1 to enable in-kernel handling
+
+This capability controls whether individual sPAPR hypercalls (hcalls)
+get handled by the kernel or not. Enabling or disabling in-kernel
+handling of an hcall is effective across the VM. On creation, an
+initial set of hcalls are enabled for in-kernel handling, which
+consists of those hcalls for which in-kernel handlers were implemented
+before this capability was implemented. If disabled, the kernel will
+not to attempt to handle the hcall, but will always exit to userspace
+to handle it. Note that it may not make sense to enable some and
+disable others of a group of related hcalls, but KVM does not prevent
+userspace from doing that.
+
+If the hcall number specified is not one that has an in-kernel
+implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
+error.
+
+7.2 KVM_CAP_S390_USER_SIGP
+
+Architectures: s390
+Parameters: none
+
+This capability controls which SIGP orders will be handled completely in user
+space. With this capability enabled, all fast orders will be handled completely
+in the kernel:
+- SENSE
+- SENSE RUNNING
+- EXTERNAL CALL
+- EMERGENCY SIGNAL
+- CONDITIONAL EMERGENCY SIGNAL
+
+All other orders will be handled completely in user space.
+
+Only privileged operation exceptions will be checked for in the kernel (or even
+in the hardware prior to interception). If this capability is not enabled, the
+old way of handling SIGP orders is used (partially in kernel and user space).
+
+7.3 KVM_CAP_S390_VECTOR_REGISTERS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, negative value on error
+
+Allows use of the vector registers introduced with z13 processor, and
+provides for the synchronization between host and user space. Will
+return -EINVAL if the machine does not support vectors.
+
+7.4 KVM_CAP_S390_USER_STSI
+
+Architectures: s390
+Parameters: none
+
+This capability allows post-handlers for the STSI instruction. After
+initial handling in the kernel, KVM exits to user space with
+KVM_EXIT_S390_STSI to allow user space to insert further data.
+
+Before exiting to userspace, kvm handlers should fill in s390_stsi field of
+vcpu->run:
+struct {
+ __u64 addr;
+ __u8 ar;
+ __u8 reserved;
+ __u8 fc;
+ __u8 sel1;
+ __u16 sel2;
+} s390_stsi;
+
+@addr - guest address of STSI SYSIB
+@fc - function code
+@sel1 - selector 1
+@sel2 - selector 2
+@ar - access register number
+
+KVM handlers should exit to userspace with rc = -EREMOTE.
+
+
+8. Other capabilities.
+----------------------
+
+This section lists capabilities that give information about other
+features of the KVM implementation.
+
+8.1 KVM_CAP_PPC_HWRNG
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+H_RANDOM hypercall backed by a hardware random-number generator.
+If present, the kernel H_RANDOM handler can be enabled for guest use
+with the KVM_CAP_PPC_ENABLE_HCALL capability.
diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt
new file mode 100644
index 000000000..3c65feb83
--- /dev/null
+++ b/Documentation/virtual/kvm/cpuid.txt
@@ -0,0 +1,60 @@
+KVM CPUID bits
+Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
+=====================================================
+
+A guest running on a kvm host, can check some of its features using
+cpuid. This is not always guaranteed to work, since userspace can
+mask-out some, or even all KVM-related cpuid features before launching
+a guest.
+
+KVM cpuid functions are:
+
+function: KVM_CPUID_SIGNATURE (0x40000000)
+returns : eax = 0x40000001,
+ ebx = 0x4b4d564b,
+ ecx = 0x564b4d56,
+ edx = 0x4d.
+Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
+The value in eax corresponds to the maximum cpuid function present in this leaf,
+and will be updated if more functions are added in the future.
+Note also that old hosts set eax value to 0x0. This should
+be interpreted as if the value was 0x40000001.
+This function queries the presence of KVM cpuid leafs.
+
+
+function: define KVM_CPUID_FEATURES (0x40000001)
+returns : ebx, ecx, edx = 0
+ eax = and OR'ed group of (1 << flag), where each flags is:
+
+
+flag || value || meaning
+=============================================================================
+KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs
+ || || 0x11 and 0x12.
+------------------------------------------------------------------------------
+KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays
+ || || on PIO operations.
+------------------------------------------------------------------------------
+KVM_FEATURE_MMU_OP || 2 || deprecated.
+------------------------------------------------------------------------------
+KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
+ || || 0x4b564d00 and 0x4b564d01
+------------------------------------------------------------------------------
+KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
+ || || writing to msr 0x4b564d02
+------------------------------------------------------------------------------
+KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
+ || || writing to msr 0x4b564d03.
+------------------------------------------------------------------------------
+KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
+ || || handler can be enabled by writing
+ || || to msr 0x4b564d04.
+------------------------------------------------------------------------------
+KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
+ || || before enabling paravirtualized
+ || || spinlock support.
+------------------------------------------------------------------------------
+KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
+ || || per-cpu warps are expected in
+ || || kvmclock.
+------------------------------------------------------------------------------
diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README
new file mode 100644
index 000000000..34a698341
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/README
@@ -0,0 +1 @@
+This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL.
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000..3fb905429
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,116 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+ KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
+ KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
+
+Creating a guest GICv3 device requires a host GICv3 as well.
+GICv3 implementations with hardware compatibility support allow a guest GICv2
+as well.
+
+Groups:
+ KVM_DEV_ARM_VGIC_GRP_ADDR
+ Attributes:
+ KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+ Base address in the guest physical address space of the GIC distributor
+ register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
+ This address needs to be 4K aligned and the region covers 4 KByte.
+
+ KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+ Base address in the guest physical address space of the GIC virtual cpu
+ interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
+ This address needs to be 4K aligned and the region covers 4 KByte.
+
+ KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit)
+ Base address in the guest physical address space of the GICv3 distributor
+ register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
+ This address needs to be 64K aligned and the region covers 64 KByte.
+
+ KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit)
+ Base address in the guest physical address space of the GICv3
+ redistributor register mappings. There are two 64K pages for each
+ VCPU and all of the redistributor pages are contiguous.
+ Only valid for KVM_DEV_TYPE_ARM_VGIC_V3.
+ This address needs to be 64K aligned.
+
+
+ KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All distributor regs are (rw, 32-bit)
+
+ The offset is relative to the "Distributor base address" as defined in the
+ GICv2 specs. Getting or setting such a register has the same effect as
+ reading or writing the register on the actual hardware from the cpu
+ specified with cpu id field. Note that most distributor fields are not
+ banked, but return the same value regardless of the cpu id used to access
+ the register.
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All CPU interface regs are (rw, 32-bit)
+
+ The offset specifies the offset from the "CPU interface base address" as
+ defined in the GICv2 specs. Getting or setting such a register has the
+ same effect as reading or writing the register on the actual hardware.
+
+ The Active Priorities Registers APRn are implementation defined, so we set a
+ fixed format for our implementation that fits with the model of a "GICv2
+ implementation without the security extensions" which we present to the
+ guest. This interface always exposes four register APR[0-3] describing the
+ maximum possible 128 preemption levels. The semantics of the register
+ indicate if any interrupts in a given preemption level are in the active
+ state by setting the corresponding bit.
+
+ Thus, preemption level X has one or more active interrupts if and only if:
+
+ APRn[X mod 32] == 0b1, where n = X / 32
+
+ Bits for undefined preemption levels are RAZ/WI.
+
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ - Currently only implemented for KVM_DEV_TYPE_ARM_VGIC_V2.
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+ Attributes:
+ A value describing the number of interrupts (SGI, PPI and SPI) for
+ this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+ Errors:
+ -EINVAL: Value set is out of the expected range
+ -EBUSY: Value has already be set, or GIC has already been initialized
+ with default values.
+
+ KVM_DEV_ARM_VGIC_GRP_CTRL
+ Attributes:
+ KVM_DEV_ARM_VGIC_CTRL_INIT
+ request the initialization of the VGIC, no additional parameter in
+ kvm_device_attr.addr.
+ Errors:
+ -ENXIO: VGIC not properly configured as required prior to calling
+ this attribute
+ -ENODEV: no online VCPU
+ -ENOMEM: memory shortage when allocating vgic internal data
diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt
new file mode 100644
index 000000000..8257397ad
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/mpic.txt
@@ -0,0 +1,53 @@
+MPIC interrupt controller
+=========================
+
+Device types supported:
+ KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0
+ KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2
+
+Only one MPIC instance, of any type, may be instantiated. The created
+MPIC will act as the system interrupt controller, connecting to each
+vcpu's interrupt inputs.
+
+Groups:
+ KVM_DEV_MPIC_GRP_MISC
+ Attributes:
+ KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit)
+ Base address of the 256 KiB MPIC register space. Must be
+ naturally aligned. A value of zero disables the mapping.
+ Reset value is zero.
+
+ KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit)
+ Access an MPIC register, as if the access were made from the guest.
+ "attr" is the byte offset into the MPIC register space. Accesses
+ must be 4-byte aligned.
+
+ MSIs may be signaled by using this attribute group to write
+ to the relevant MSIIR.
+
+ KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit)
+ IRQ input line for each standard openpic source. 0 is inactive and 1
+ is active, regardless of interrupt sense.
+
+ For edge-triggered interrupts: Writing 1 is considered an activating
+ edge, and writing 0 is ignored. Reading returns 1 if a previously
+ signaled edge has not been acknowledged, and 0 otherwise.
+
+ "attr" is the IRQ number. IRQ numbers for standard sources are the
+ byte offset of the relevant IVPR from EIVPR0, divided by 32.
+
+IRQ Routing:
+
+ The MPIC emulation supports IRQ routing. Only a single MPIC device can
+ be instantiated. Once that device has been created, it's available as
+ irqchip id 0.
+
+ This irqchip 0 has 256 interrupt pins, which expose the interrupts in
+ the main array of interrupt sources (a.k.a. "SRC" interrupts).
+
+ The numbering is the same as the MPIC device tree binding -- based on
+ the register offset from the beginning of the sources array, without
+ regard to any subdivisions in chip documentation such as "internal"
+ or "external" interrupts.
+
+ Access to non-SRC interrupts is not implemented through IRQ routing mechanisms.
diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt
new file mode 100644
index 000000000..d1ad9d5ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/s390_flic.txt
@@ -0,0 +1,94 @@
+FLIC (floating interrupt controller)
+====================================
+
+FLIC handles floating (non per-cpu) interrupts, i.e. I/O, service and some
+machine check interruptions. All interrupts are stored in a per-vm list of
+pending interrupts. FLIC performs operations on this list.
+
+Only one FLIC instance may be instantiated.
+
+FLIC provides support to
+- add interrupts (KVM_DEV_FLIC_ENQUEUE)
+- inspect currently pending interrupts (KVM_FLIC_GET_ALL_IRQS)
+- purge all pending floating interrupts (KVM_DEV_FLIC_CLEAR_IRQS)
+- enable/disable for the guest transparent async page faults
+- register and modify adapter interrupt sources (KVM_DEV_FLIC_ADAPTER_*)
+
+Groups:
+ KVM_DEV_FLIC_ENQUEUE
+ Passes a buffer and length into the kernel which are then injected into
+ the list of pending interrupts.
+ attr->addr contains the pointer to the buffer and attr->attr contains
+ the length of the buffer.
+ The format of the data structure kvm_s390_irq as it is copied from userspace
+ is defined in usr/include/linux/kvm.h.
+
+ KVM_DEV_FLIC_GET_ALL_IRQS
+ Copies all floating interrupts into a buffer provided by userspace.
+ When the buffer is too small it returns -ENOMEM, which is the indication
+ for userspace to try again with a bigger buffer.
+ -ENOBUFS is returned when the allocation of a kernelspace buffer has
+ failed.
+ -EFAULT is returned when copying data to userspace failed.
+ All interrupts remain pending, i.e. are not deleted from the list of
+ currently pending interrupts.
+ attr->addr contains the userspace address of the buffer into which all
+ interrupt data will be copied.
+ attr->attr contains the size of the buffer in bytes.
+
+ KVM_DEV_FLIC_CLEAR_IRQS
+ Simply deletes all elements from the list of currently pending floating
+ interrupts. No interrupts are injected into the guest.
+
+ KVM_DEV_FLIC_APF_ENABLE
+ Enables async page faults for the guest. So in case of a major page fault
+ the host is allowed to handle this async and continues the guest.
+
+ KVM_DEV_FLIC_APF_DISABLE_WAIT
+ Disables async page faults for the guest and waits until already pending
+ async page faults are done. This is necessary to trigger a completion interrupt
+ for every init interrupt before migrating the interrupt list.
+
+ KVM_DEV_FLIC_ADAPTER_REGISTER
+ Register an I/O adapter interrupt source. Takes a kvm_s390_io_adapter
+ describing the adapter to register:
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 pad;
+};
+
+ id contains the unique id for the adapter, isc the I/O interruption subclass
+ to use, maskable whether this adapter may be masked (interrupts turned off)
+ and swap whether the indicators need to be byte swapped.
+
+
+ KVM_DEV_FLIC_ADAPTER_MODIFY
+ Modifies attributes of an existing I/O adapter interrupt source. Takes
+ a kvm_s390_io_adapter_req specifiying the adapter and the operation:
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
+ id specifies the adapter and type the operation. The supported operations
+ are:
+
+ KVM_S390_IO_ADAPTER_MASK
+ mask or unmask the adapter, as specified in mask
+
+ KVM_S390_IO_ADAPTER_MAP
+ perform a gmap translation for the guest address provided in addr,
+ pin a userspace page for the translated address and add it to the
+ list of mappings
+
+ KVM_S390_IO_ADAPTER_UNMAP
+ release a userspace page for the translated address specified in addr
+ from the list of mappings
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000..ef51740c6
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+ KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM. The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM. As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence. When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+ KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+ KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+ KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
new file mode 100644
index 000000000..5542c4641
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -0,0 +1,85 @@
+Generic vm interface
+====================================
+
+The virtual machine "device" also accepts the ioctls KVM_SET_DEVICE_ATTR,
+KVM_GET_DEVICE_ATTR, and KVM_HAS_DEVICE_ATTR. The interface uses the same
+struct kvm_device_attr as other devices, but targets VM-wide settings
+and controls.
+
+The groups and attributes per virtual machine, if any, are architecture
+specific.
+
+1. GROUP: KVM_S390_VM_MEM_CTRL
+Architectures: s390
+
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
+Parameters: none
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
+
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
+
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
+Returns: 0
+
+Clear the CMMA status for all guest pages, so any pages the guest marked
+as unused are again used any may not be reclaimed by the host.
+
+1.3. ATTRIBUTE KVM_S390_VM_MEM_LIMIT_SIZE
+Parameters: in attr->addr the address for the new limit of guest memory
+Returns: -EFAULT if the given address is not accessible
+ -EINVAL if the virtual machine is of type UCONTROL
+ -E2BIG if the given guest memory is to big for that machine
+ -EBUSY if a vcpu is already defined
+ -ENOMEM if not enough memory is available for a new shadow guest mapping
+ 0 otherwise
+
+Allows userspace to query the actual limit and set a new limit for
+the maximum guest memory size. The limit will be rounded up to
+2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
+the number of page table levels.
+
+2. GROUP: KVM_S390_VM_CPU_MODEL
+Architectures: s390
+
+2.1. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE (r/o)
+
+Allows user space to retrieve machine and kvm specific cpu related information:
+
+struct kvm_s390_vm_cpu_machine {
+ __u64 cpuid; # CPUID of host
+ __u32 ibc; # IBC level range offered by host
+ __u8 pad[4];
+ __u64 fac_mask[256]; # set of cpu facilities enabled by KVM
+ __u64 fac_list[256]; # set of cpu facilities offered by host
+}
+
+Parameters: address of buffer to store the machine related cpu data
+ of type struct kvm_s390_vm_cpu_machine*
+Returns: -EFAULT if the given address is not accessible from kernel space
+ -ENOMEM if not enough memory is available to process the ioctl
+ 0 in case of success
+
+2.2. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR (r/w)
+
+Allows user space to retrieve or request to change cpu related information for a vcpu:
+
+struct kvm_s390_vm_cpu_processor {
+ __u64 cpuid; # CPUID currently (to be) used by this vcpu
+ __u16 ibc; # IBC level currently (to be) used by this vcpu
+ __u8 pad[6];
+ __u64 fac_list[256]; # set of cpu facilities currently (to be) used
+ # by this vcpu
+}
+
+KVM does not enforce or limit the cpu model data in any form. Take the information
+retrieved by means of KVM_S390_VM_CPU_MACHINE as hint for reasonable configuration
+setups. Instruction interceptions triggered by additionally set facilitiy bits that
+are not handled by KVM need to by imlemented in the VM driver code.
+
+Parameters: address of buffer to store/set the processor related cpu
+ data of type struct kvm_s390_vm_cpu_processor*.
+Returns: -EBUSY in case 1 or more vcpus are already activated (only in write case)
+ -EFAULT if the given address is not accessible from kernel space
+ -ENOMEM if not enough memory is available to process the ioctl
+ 0 in case of success
diff --git a/Documentation/virtual/kvm/devices/xics.txt b/Documentation/virtual/kvm/devices/xics.txt
new file mode 100644
index 000000000..42864935a
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/xics.txt
@@ -0,0 +1,66 @@
+XICS interrupt controller
+
+Device type supported: KVM_DEV_TYPE_XICS
+
+Groups:
+ KVM_DEV_XICS_SOURCES
+ Attributes: One per interrupt source, indexed by the source number.
+
+This device emulates the XICS (eXternal Interrupt Controller
+Specification) defined in PAPR. The XICS has a set of interrupt
+sources, each identified by a 20-bit source number, and a set of
+Interrupt Control Presentation (ICP) entities, also called "servers",
+each associated with a virtual CPU.
+
+The ICP entities are created by enabling the KVM_CAP_IRQ_ARCH
+capability for each vcpu, specifying KVM_CAP_IRQ_XICS in args[0] and
+the interrupt server number (i.e. the vcpu number from the XICS's
+point of view) in args[1] of the kvm_enable_cap struct. Each ICP has
+64 bits of state which can be read and written using the
+KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls on the vcpu. The 64 bit
+state word has the following bitfields, starting at the
+least-significant end of the word:
+
+* Unused, 16 bits
+
+* Pending interrupt priority, 8 bits
+ Zero is the highest priority, 255 means no interrupt is pending.
+
+* Pending IPI (inter-processor interrupt) priority, 8 bits
+ Zero is the highest priority, 255 means no IPI is pending.
+
+* Pending interrupt source number, 24 bits
+ Zero means no interrupt pending, 2 means an IPI is pending
+
+* Current processor priority, 8 bits
+ Zero is the highest priority, meaning no interrupts can be
+ delivered, and 255 is the lowest priority.
+
+Each source has 64 bits of state that can be read and written using
+the KVM_GET_DEVICE_ATTR and KVM_SET_DEVICE_ATTR ioctls, specifying the
+KVM_DEV_XICS_SOURCES attribute group, with the attribute number being
+the interrupt source number. The 64 bit state word has the following
+bitfields, starting from the least-significant end of the word:
+
+* Destination (server number), 32 bits
+ This specifies where the interrupt should be sent, and is the
+ interrupt server number specified for the destination vcpu.
+
+* Priority, 8 bits
+ This is the priority specified for this interrupt source, where 0 is
+ the highest priority and 255 is the lowest. An interrupt with a
+ priority of 255 will never be delivered.
+
+* Level sensitive flag, 1 bit
+ This bit is 1 for a level-sensitive interrupt source, or 0 for
+ edge-sensitive (or MSI).
+
+* Masked flag, 1 bit
+ This bit is set to 1 if the interrupt is masked (cannot be delivered
+ regardless of its priority), for example by the ibm,int-off RTAS
+ call, or 0 if it is not masked.
+
+* Pending flag, 1 bit
+ This bit is 1 if the source has a pending interrupt, otherwise 0.
+
+Only one XICS instance may be created per VM.
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
new file mode 100644
index 000000000..c8d040e27
--- /dev/null
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -0,0 +1,83 @@
+Linux KVM Hypercall:
+===================
+X86:
+ KVM Hypercalls have a three-byte sequence of either the vmcall or the vmmcall
+ instruction. The hypervisor can replace it with instructions that are
+ guaranteed to be supported.
+
+ Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
+ The hypercall number should be placed in rax and the return value will be
+ placed in rax. No other registers will be clobbered unless explicitly stated
+ by the particular hypercall.
+
+S390:
+ R2-R7 are used for parameters 1-6. In addition, R1 is used for hypercall
+ number. The return value is written to R2.
+
+ S390 uses diagnose instruction as hypercall (0x500) along with hypercall
+ number in R1.
+
+ For further information on the S390 diagnose call as supported by KVM,
+ refer to Documentation/virtual/kvm/s390-diag.txt.
+
+ PowerPC:
+ It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
+ Return value is placed in R3.
+
+ KVM hypercalls uses 4 byte opcode, that are patched with 'hypercall-instructions'
+ property inside the device tree's /hypervisor node.
+ For more information refer to Documentation/virtual/kvm/ppc-pv.txt
+
+KVM Hypercalls Documentation
+===========================
+The template for each hypercall is:
+1. Hypercall name.
+2. Architecture(s)
+3. Status (deprecated, obsolete, active)
+4. Purpose
+
+1. KVM_HC_VAPIC_POLL_IRQ
+------------------------
+Architecture: x86
+Status: active
+Purpose: Trigger guest exit so that the host can check for pending
+interrupts on reentry.
+
+2. KVM_HC_MMU_OP
+------------------------
+Architecture: x86
+Status: deprecated.
+Purpose: Support MMU operations such as writing to PTE,
+flushing TLB, release PT.
+
+3. KVM_HC_FEATURES
+------------------------
+Architecture: PPC
+Status: active
+Purpose: Expose hypercall availability to the guest. On x86 platforms, cpuid
+used to enumerate which hypercalls are available. On PPC, either device tree
+based lookup ( which is also what EPAPR dictates) OR KVM specific enumeration
+mechanism (which is this hypercall) can be used.
+
+4. KVM_HC_PPC_MAP_MAGIC_PAGE
+------------------------
+Architecture: PPC
+Status: active
+Purpose: To enable communication between the hypervisor and guest there is a
+shared page that contains parts of supervisor visible register state.
+The guest can map this shared page to access its supervisor register through
+memory using this hypercall.
+
+5. KVM_HC_KICK_CPU
+------------------------
+Architecture: x86
+Status: active
+Purpose: Hypercall used to wakeup a vcpu from HLT state
+Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest
+kernel mode for an event to occur (ex: a spinlock to become available) can
+execute HLT instruction once it has busy-waited for more than a threshold
+time-interval. Execution of HLT instruction would cause the hypervisor to put
+the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the
+same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall,
+specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0)
+is used in the hypercall for future use.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
new file mode 100644
index 000000000..d68af4dc3
--- /dev/null
+++ b/Documentation/virtual/kvm/locking.txt
@@ -0,0 +1,168 @@
+KVM Lock Overview
+=================
+
+1. Acquisition Orders
+---------------------
+
+(to be written)
+
+2: Exception
+------------
+
+Fast page fault:
+
+Fast page fault is the fast path which fixes the guest page fault out of
+the mmu-lock on x86. Currently, the page fault can be fast only if the
+shadow page table is present and it is caused by write-protect, that means
+we just need change the W bit of the spte.
+
+What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
+SPTE_MMU_WRITEABLE bit on the spte:
+- SPTE_HOST_WRITEABLE means the gfn is writable on host.
+- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
+ the gfn is writable on guest mmu and it is not write-protected by shadow
+ page write-protection.
+
+On fast page fault path, we will use cmpxchg to atomically set the spte W
+bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, this
+is safe because whenever changing these bits can be detected by cmpxchg.
+
+But we need carefully check these cases:
+1): The mapping from gfn to pfn
+The mapping from gfn to pfn may be changed since we can only ensure the pfn
+is not changed during cmpxchg. This is a ABA problem, for example, below case
+will happen:
+
+At the beginning:
+gpte = gfn1
+gfn1 is mapped to pfn1 on host
+spte is the shadow page table entry corresponding with gpte and
+spte = pfn1
+
+ VCPU 0 VCPU0
+on fast page fault path:
+
+ old_spte = *spte;
+ pfn1 is swapped out:
+ spte = 0;
+
+ pfn1 is re-alloced for gfn2.
+
+ gpte is changed to point to
+ gfn2 by the guest:
+ spte = pfn1;
+
+ if (cmpxchg(spte, old_spte, old_spte+W)
+ mark_page_dirty(vcpu->kvm, gfn1)
+ OOPS!!!
+
+We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
+
+For direct sp, we can easily avoid it since the spte of direct sp is fixed
+to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
+to pin gfn to pfn, because after gfn_to_pfn_atomic():
+- We have held the refcount of pfn that means the pfn can not be freed and
+ be reused for another gfn.
+- The pfn is writable that means it can not be shared between different gfns
+ by KSM.
+
+Then, we can ensure the dirty bitmaps is correctly set for a gfn.
+
+Currently, to simplify the whole things, we disable fast page fault for
+indirect shadow page.
+
+2): Dirty bit tracking
+In the origin code, the spte can be fast updated (non-atomically) if the
+spte is read-only and the Accessed bit has already been set since the
+Accessed bit and Dirty bit can not be lost.
+
+But it is not true after fast page fault since the spte can be marked
+writable between reading spte and updating spte. Like below case:
+
+At the beginning:
+spte.W = 0
+spte.Accessed = 1
+
+ VCPU 0 VCPU0
+In mmu_spte_clear_track_bits():
+
+ old_spte = *spte;
+
+ /* 'if' condition is satisfied. */
+ if (old_spte.Accssed == 1 &&
+ old_spte.W == 0)
+ spte = 0ull;
+ on fast page fault path:
+ spte.W = 1
+ memory write on the spte:
+ spte.Dirty = 1
+
+
+ else
+ old_spte = xchg(spte, 0ull)
+
+
+ if (old_spte.Accssed == 1)
+ kvm_set_pfn_accessed(spte.pfn);
+ if (old_spte.Dirty == 1)
+ kvm_set_pfn_dirty(spte.pfn);
+ OOPS!!!
+
+The Dirty bit is lost in this case.
+
+In order to avoid this kind of issue, we always treat the spte as "volatile"
+if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
+the spte is always atomically updated in this case.
+
+3): flush tlbs due to spte updated
+If the spte is updated from writable to readonly, we should flush all TLBs,
+otherwise rmap_write_protect will find a read-only spte, even though the
+writable spte might be cached on a CPU's TLB.
+
+As mentioned before, the spte can be updated to writable out of mmu-lock on
+fast page fault path, in order to easily audit the path, we see if TLBs need
+be flushed caused by this reason in mmu_spte_update() since this is a common
+function to update spte (present -> present).
+
+Since the spte is "volatile" if it can be updated out of mmu-lock, we always
+atomically update the spte, the race caused by fast page fault can be avoided,
+See the comments in spte_has_volatile_bits() and mmu_spte_update().
+
+3. Reference
+------------
+
+Name: kvm_lock
+Type: spinlock_t
+Arch: any
+Protects: - vm_list
+
+Name: kvm_count_lock
+Type: raw_spinlock_t
+Arch: any
+Protects: - hardware virtualization enable/disable
+Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
+ migration.
+
+Name: kvm_arch::tsc_write_lock
+Type: raw_spinlock
+Arch: x86
+Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
+ - tsc offset in vmcb
+Comment: 'raw' because updating the tsc offsets must not be preempted.
+
+Name: kvm->mmu_lock
+Type: spinlock_t
+Arch: any
+Protects: -shadow page/shadow tlb entry
+Comment: it is a spinlock since it is used in mmu notifier.
+
+Name: kvm->srcu
+Type: srcu lock
+Arch: any
+Protects: - kvm->memslots
+ - kvm->buses
+Comment: The srcu read lock must be held while accessing memslots (e.g.
+ when using gfn_to_* functions) and while accessing in-kernel
+ MMIO/PIO address->device structure mapping (kvm->buses).
+ The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu
+ if it is needed by multiple functions.
diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
new file mode 100644
index 000000000..c59bd9bc4
--- /dev/null
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -0,0 +1,458 @@
+The x86 kvm shadow mmu
+======================
+
+The mmu (in arch/x86/kvm, files mmu.[ch] and paging_tmpl.h) is responsible
+for presenting a standard x86 mmu to the guest, while translating guest
+physical addresses to host physical addresses.
+
+The mmu code attempts to satisfy the following requirements:
+
+- correctness: the guest should not be able to determine that it is running
+ on an emulated mmu except for timing (we attempt to comply
+ with the specification, not emulate the characteristics of
+ a particular implementation such as tlb size)
+- security: the guest must not be able to touch host memory not assigned
+ to it
+- performance: minimize the performance penalty imposed by the mmu
+- scaling: need to scale to large memory and large vcpu guests
+- hardware: support the full range of x86 virtualization hardware
+- integration: Linux memory management code must be in control of guest memory
+ so that swapping, page migration, page merging, transparent
+ hugepages, and similar features work without change
+- dirty tracking: report writes to guest memory to enable live migration
+ and framebuffer-based displays
+- footprint: keep the amount of pinned kernel memory low (most memory
+ should be shrinkable)
+- reliability: avoid multipage or GFP_ATOMIC allocations
+
+Acronyms
+========
+
+pfn host page frame number
+hpa host physical address
+hva host virtual address
+gfn guest frame number
+gpa guest physical address
+gva guest virtual address
+ngpa nested guest physical address
+ngva nested guest virtual address
+pte page table entry (used also to refer generically to paging structure
+ entries)
+gpte guest pte (referring to gfns)
+spte shadow pte (referring to pfns)
+tdp two dimensional paging (vendor neutral term for NPT and EPT)
+
+Virtual and real hardware supported
+===================================
+
+The mmu supports first-generation mmu hardware, which allows an atomic switch
+of the current paging mode and cr3 during guest entry, as well as
+two-dimensional paging (AMD's NPT and Intel's EPT). The emulated hardware
+it exposes is the traditional 2/3/4 level x86 mmu, with support for global
+pages, pae, pse, pse36, cr0.wp, and 1GB pages. Work is in progress to support
+exposing NPT capable hardware on NPT capable hosts.
+
+Translation
+===========
+
+The primary job of the mmu is to program the processor's mmu to translate
+addresses for the guest. Different translations are required at different
+times:
+
+- when guest paging is disabled, we translate guest physical addresses to
+ host physical addresses (gpa->hpa)
+- when guest paging is enabled, we translate guest virtual addresses, to
+ guest physical addresses, to host physical addresses (gva->gpa->hpa)
+- when the guest launches a guest of its own, we translate nested guest
+ virtual addresses, to nested guest physical addresses, to guest physical
+ addresses, to host physical addresses (ngva->ngpa->gpa->hpa)
+
+The primary challenge is to encode between 1 and 3 translations into hardware
+that support only 1 (traditional) and 2 (tdp) translations. When the
+number of required translations matches the hardware, the mmu operates in
+direct mode; otherwise it operates in shadow mode (see below).
+
+Memory
+======
+
+Guest memory (gpa) is part of the user address space of the process that is
+using kvm. Userspace defines the translation between guest addresses and user
+addresses (gpa->hva); note that two gpas may alias to the same hva, but not
+vice versa.
+
+These hvas may be backed using any method available to the host: anonymous
+memory, file backed memory, and device memory. Memory might be paged by the
+host at any time.
+
+Events
+======
+
+The mmu is driven by events, some from the guest, some from the host.
+
+Guest generated events:
+- writes to control registers (especially cr3)
+- invlpg/invlpga instruction execution
+- access to missing or protected translations
+
+Host generated events:
+- changes in the gpa->hpa translation (either through gpa->hva changes or
+ through hva->hpa changes)
+- memory pressure (the shrinker)
+
+Shadow pages
+============
+
+The principal data structure is the shadow page, 'struct kvm_mmu_page'. A
+shadow page contains 512 sptes, which can be either leaf or nonleaf sptes. A
+shadow page may contain a mix of leaf and nonleaf sptes.
+
+A nonleaf spte allows the hardware mmu to reach the leaf pages and
+is not related to a translation directly. It points to other shadow pages.
+
+A leaf spte corresponds to either one or two translations encoded into
+one paging structure entry. These are always the lowest level of the
+translation stack, with optional higher level translations left to NPT/EPT.
+Leaf ptes point at guest pages.
+
+The following table shows translations encoded by leaf ptes, with higher-level
+translations in parentheses:
+
+ Non-nested guests:
+ nonpaging: gpa->hpa
+ paging: gva->gpa->hpa
+ paging, tdp: (gva->)gpa->hpa
+ Nested guests:
+ non-tdp: ngva->gpa->hpa (*)
+ tdp: (ngva->)ngpa->gpa->hpa
+
+(*) the guest hypervisor will encode the ngva->gpa translation into its page
+ tables if npt is not present
+
+Shadow pages contain the following information:
+ role.level:
+ The level in the shadow paging hierarchy that this shadow page belongs to.
+ 1=4k sptes, 2=2M sptes, 3=1G sptes, etc.
+ role.direct:
+ If set, leaf sptes reachable from this page are for a linear range.
+ Examples include real mode translation, large guest pages backed by small
+ host pages, and gpa->hpa translations when NPT or EPT is active.
+ The linear range starts at (gfn << PAGE_SHIFT) and its size is determined
+ by role.level (2MB for first level, 1GB for second level, 0.5TB for third
+ level, 256TB for fourth level)
+ If clear, this page corresponds to a guest page table denoted by the gfn
+ field.
+ role.quadrant:
+ When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit
+ sptes. That means a guest page table contains more ptes than the host,
+ so multiple shadow pages are needed to shadow one guest page.
+ For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the
+ first or second 512-gpte block in the guest page table. For second-level
+ page tables, each 32-bit gpte is converted to two 64-bit sptes
+ (since each first-level guest page is shadowed by two first-level
+ shadow pages) so role.quadrant takes values in the range 0..3. Each
+ quadrant maps 1GB virtual address space.
+ role.access:
+ Inherited guest access permissions in the form uwx. Note execute
+ permission is positive, not negative.
+ role.invalid:
+ The page is invalid and should not be used. It is a root page that is
+ currently pinned (by a cpu hardware register pointing to it); once it is
+ unpinned it will be destroyed.
+ role.cr4_pae:
+ Contains the value of cr4.pae for which the page is valid (e.g. whether
+ 32-bit or 64-bit gptes are in use).
+ role.nxe:
+ Contains the value of efer.nxe for which the page is valid.
+ role.cr0_wp:
+ Contains the value of cr0.wp for which the page is valid.
+ role.smep_andnot_wp:
+ Contains the value of cr4.smep && !cr0.wp for which the page is valid
+ (pages for which this is true are different from other pages; see the
+ treatment of cr0.wp=0 below).
+ role.smap_andnot_wp:
+ Contains the value of cr4.smap && !cr0.wp for which the page is valid
+ (pages for which this is true are different from other pages; see the
+ treatment of cr0.wp=0 below).
+ gfn:
+ Either the guest page table containing the translations shadowed by this
+ page, or the base page frame for linear translations. See role.direct.
+ spt:
+ A pageful of 64-bit sptes containing the translations for this page.
+ Accessed by both kvm and hardware.
+ The page pointed to by spt will have its page->private pointing back
+ at the shadow page structure.
+ sptes in spt point either at guest pages, or at lower-level shadow pages.
+ Specifically, if sp1 and sp2 are shadow pages, then sp1->spt[n] may point
+ at __pa(sp2->spt). sp2 will point back at sp1 through parent_pte.
+ The spt array forms a DAG structure with the shadow page as a node, and
+ guest pages as leaves.
+ gfns:
+ An array of 512 guest frame numbers, one for each present pte. Used to
+ perform a reverse map from a pte to a gfn. When role.direct is set, any
+ element of this array can be calculated from the gfn field when used, in
+ this case, the array of gfns is not allocated. See role.direct and gfn.
+ root_count:
+ A counter keeping track of how many hardware registers (guest cr3 or
+ pdptrs) are now pointing at the page. While this counter is nonzero, the
+ page cannot be destroyed. See role.invalid.
+ parent_ptes:
+ The reverse mapping for the pte/ptes pointing at this page's spt. If
+ parent_ptes bit 0 is zero, only one spte points at this pages and
+ parent_ptes points at this single spte, otherwise, there exists multiple
+ sptes pointing at this page and (parent_ptes & ~0x1) points at a data
+ structure with a list of parent_ptes.
+ unsync:
+ If true, then the translations in this page may not match the guest's
+ translation. This is equivalent to the state of the tlb when a pte is
+ changed but before the tlb entry is flushed. Accordingly, unsync ptes
+ are synchronized when the guest executes invlpg or flushes its tlb by
+ other means. Valid for leaf pages.
+ unsync_children:
+ How many sptes in the page point at pages that are unsync (or have
+ unsynchronized children).
+ unsync_child_bitmap:
+ A bitmap indicating which sptes in spt point (directly or indirectly) at
+ pages that may be unsynchronized. Used to quickly locate all unsychronized
+ pages reachable from a given page.
+ mmu_valid_gen:
+ Generation number of the page. It is compared with kvm->arch.mmu_valid_gen
+ during hash table lookup, and used to skip invalidated shadow pages (see
+ "Zapping all pages" below.)
+ clear_spte_count:
+ Only present on 32-bit hosts, where a 64-bit spte cannot be written
+ atomically. The reader uses this while running out of the MMU lock
+ to detect in-progress updates and retry them until the writer has
+ finished the write.
+ write_flooding_count:
+ A guest may write to a page table many times, causing a lot of
+ emulations if the page needs to be write-protected (see "Synchronized
+ and unsynchronized pages" below). Leaf pages can be unsynchronized
+ so that they do not trigger frequent emulation, but this is not
+ possible for non-leafs. This field counts the number of emulations
+ since the last time the page table was actually used; if emulation
+ is triggered too frequently on this page, KVM will unmap the page
+ to avoid emulation in the future.
+
+Reverse map
+===========
+
+The mmu maintains a reverse mapping whereby all ptes mapping a page can be
+reached given its gfn. This is used, for example, when swapping out a page.
+
+Synchronized and unsynchronized pages
+=====================================
+
+The guest uses two events to synchronize its tlb and page tables: tlb flushes
+and page invalidations (invlpg).
+
+A tlb flush means that we need to synchronize all sptes reachable from the
+guest's cr3. This is expensive, so we keep all guest page tables write
+protected, and synchronize sptes to gptes when a gpte is written.
+
+A special case is when a guest page table is reachable from the current
+guest cr3. In this case, the guest is obliged to issue an invlpg instruction
+before using the translation. We take advantage of that by removing write
+protection from the guest page, and allowing the guest to modify it freely.
+We synchronize modified gptes when the guest invokes invlpg. This reduces
+the amount of emulation we have to do when the guest modifies multiple gptes,
+or when the a guest page is no longer used as a page table and is used for
+random guest data.
+
+As a side effect we have to resynchronize all reachable unsynchronized shadow
+pages on a tlb flush.
+
+
+Reaction to events
+==================
+
+- guest page fault (or npt page fault, or ept violation)
+
+This is the most complicated event. The cause of a page fault can be:
+
+ - a true guest fault (the guest translation won't allow the access) (*)
+ - access to a missing translation
+ - access to a protected translation
+ - when logging dirty pages, memory is write protected
+ - synchronized shadow pages are write protected (*)
+ - access to untranslatable memory (mmio)
+
+ (*) not applicable in direct mode
+
+Handling a page fault is performed as follows:
+
+ - if the RSV bit of the error code is set, the page fault is caused by guest
+ accessing MMIO and cached MMIO information is available.
+ - walk shadow page table
+ - check for valid generation number in the spte (see "Fast invalidation of
+ MMIO sptes" below)
+ - cache the information to vcpu->arch.mmio_gva, vcpu->arch.access and
+ vcpu->arch.mmio_gfn, and call the emulator
+ - If both P bit and R/W bit of error code are set, this could possibly
+ be handled as a "fast page fault" (fixed without taking the MMU lock). See
+ the description in Documentation/virtual/kvm/locking.txt.
+ - if needed, walk the guest page tables to determine the guest translation
+ (gva->gpa or ngpa->gpa)
+ - if permissions are insufficient, reflect the fault back to the guest
+ - determine the host page
+ - if this is an mmio request, there is no host page; cache the info to
+ vcpu->arch.mmio_gva, vcpu->arch.access and vcpu->arch.mmio_gfn
+ - walk the shadow page table to find the spte for the translation,
+ instantiating missing intermediate page tables as necessary
+ - If this is an mmio request, cache the mmio info to the spte and set some
+ reserved bit on the spte (see callers of kvm_mmu_set_mmio_spte_mask)
+ - try to unsynchronize the page
+ - if successful, we can let the guest continue and modify the gpte
+ - emulate the instruction
+ - if failed, unshadow the page and let the guest continue
+ - update any translations that were modified by the instruction
+
+invlpg handling:
+
+ - walk the shadow page hierarchy and drop affected translations
+ - try to reinstantiate the indicated translation in the hope that the
+ guest will use it in the near future
+
+Guest control register updates:
+
+- mov to cr3
+ - look up new shadow roots
+ - synchronize newly reachable shadow pages
+
+- mov to cr0/cr4/efer
+ - set up mmu context for new paging mode
+ - look up new shadow roots
+ - synchronize newly reachable shadow pages
+
+Host translation updates:
+
+ - mmu notifier called with updated hva
+ - look up affected sptes through reverse map
+ - drop (or update) translations
+
+Emulating cr0.wp
+================
+
+If tdp is not enabled, the host must keep cr0.wp=1 so page write protection
+works for the guest kernel, not guest guest userspace. When the guest
+cr0.wp=1, this does not present a problem. However when the guest cr0.wp=0,
+we cannot map the permissions for gpte.u=1, gpte.w=0 to any spte (the
+semantics require allowing any guest kernel access plus user read access).
+
+We handle this by mapping the permissions to two possible sptes, depending
+on fault type:
+
+- kernel write fault: spte.u=0, spte.w=1 (allows full kernel access,
+ disallows user access)
+- read fault: spte.u=1, spte.w=0 (allows full read access, disallows kernel
+ write access)
+
+(user write faults generate a #PF)
+
+In the first case there are two additional complications:
+- if CR4.SMEP is enabled: since we've turned the page into a kernel page,
+ the kernel may now execute it. We handle this by also setting spte.nx.
+ If we get a user fetch or read fault, we'll change spte.u=1 and
+ spte.nx=gpte.nx back.
+- if CR4.SMAP is disabled: since the page has been changed to a kernel
+ page, it can not be reused when CR4.SMAP is enabled. We set
+ CR4.SMAP && !CR0.WP into shadow page's role to avoid this case. Note,
+ here we do not care the case that CR4.SMAP is enabled since KVM will
+ directly inject #PF to guest due to failed permission check.
+
+To prevent an spte that was converted into a kernel page with cr0.wp=0
+from being written by the kernel after cr0.wp has changed to 1, we make
+the value of cr0.wp part of the page role. This means that an spte created
+with one value of cr0.wp cannot be used when cr0.wp has a different value -
+it will simply be missed by the shadow page lookup code. A similar issue
+exists when an spte created with cr0.wp=0 and cr4.smep=0 is used after
+changing cr4.smep to 1. To avoid this, the value of !cr0.wp && cr4.smep
+is also made a part of the page role.
+
+Large pages
+===========
+
+The mmu supports all combinations of large and small guest and host pages.
+Supported page sizes include 4k, 2M, 4M, and 1G. 4M pages are treated as
+two separate 2M pages, on both guest and host, since the mmu always uses PAE
+paging.
+
+To instantiate a large spte, four constraints must be satisfied:
+
+- the spte must point to a large host page
+- the guest pte must be a large pte of at least equivalent size (if tdp is
+ enabled, there is no guest pte and this condition is satisfied)
+- if the spte will be writeable, the large page frame may not overlap any
+ write-protected pages
+- the guest page must be wholly contained by a single memory slot
+
+To check the last two conditions, the mmu maintains a ->write_count set of
+arrays for each memory slot and large page size. Every write protected page
+causes its write_count to be incremented, thus preventing instantiation of
+a large spte. The frames at the end of an unaligned memory slot have
+artificially inflated ->write_counts so they can never be instantiated.
+
+Zapping all pages (page generation count)
+=========================================
+
+For the large memory guests, walking and zapping all pages is really slow
+(because there are a lot of pages), and also blocks memory accesses of
+all VCPUs because it needs to hold the MMU lock.
+
+To make it be more scalable, kvm maintains a global generation number
+which is stored in kvm->arch.mmu_valid_gen. Every shadow page stores
+the current global generation-number into sp->mmu_valid_gen when it
+is created. Pages with a mismatching generation number are "obsolete".
+
+When KVM need zap all shadow pages sptes, it just simply increases the global
+generation-number then reload root shadow pages on all vcpus. As the VCPUs
+create new shadow page tables, the old pages are not used because of the
+mismatching generation number.
+
+KVM then walks through all pages and zaps obsolete pages. While the zap
+operation needs to take the MMU lock, the lock can be released periodically
+so that the VCPUs can make progress.
+
+Fast invalidation of MMIO sptes
+===============================
+
+As mentioned in "Reaction to events" above, kvm will cache MMIO
+information in leaf sptes. When a new memslot is added or an existing
+memslot is changed, this information may become stale and needs to be
+invalidated. This also needs to hold the MMU lock while walking all
+shadow pages, and is made more scalable with a similar technique.
+
+MMIO sptes have a few spare bits, which are used to store a
+generation number. The global generation number is stored in
+kvm_memslots(kvm)->generation, and increased whenever guest memory info
+changes. This generation number is distinct from the one described in
+the previous section.
+
+When KVM finds an MMIO spte, it checks the generation number of the spte.
+If the generation number of the spte does not equal the global generation
+number, it will ignore the cached MMIO information and handle the page
+fault through the slow path.
+
+Since only 19 bits are used to store generation-number on mmio spte, all
+pages are zapped when there is an overflow.
+
+Unfortunately, a single memory access might access kvm_memslots(kvm) multiple
+times, the last one happening when the generation number is retrieved and
+stored into the MMIO spte. Thus, the MMIO spte might be created based on
+out-of-date information, but with an up-to-date generation number.
+
+To avoid this, the generation number is incremented again after synchronize_srcu
+returns; thus, the low bit of kvm_memslots(kvm)->generation is only 1 during a
+memslot update, while some SRCU readers might be using the old copy. We do not
+want to use an MMIO sptes created with an odd generation number, and we can do
+this without losing a bit in the MMIO spte. The low bit of the generation
+is not stored in MMIO spte, and presumed zero when it is extracted out of the
+spte. If KVM is unlucky and creates an MMIO spte while the low bit is 1,
+the next access to the spte will always be a cache miss.
+
+
+Further reading
+===============
+
+- NPT presentation from KVM Forum 2008
+ http://www.linux-kvm.org/wiki/images/c/c8/KvmForum2008%24kdf2008_21.pdf
+
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
new file mode 100644
index 000000000..2a71c8f29
--- /dev/null
+++ b/Documentation/virtual/kvm/msr.txt
@@ -0,0 +1,266 @@
+KVM-specific MSRs.
+Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
+=====================================================
+
+KVM makes use of some custom MSRs to service some requests.
+
+Custom MSRs have a range reserved for them, that goes from
+0x4b564d00 to 0x4b564dff. There are MSRs outside this area,
+but they are deprecated and their use is discouraged.
+
+Custom MSR list
+--------
+
+The current supported Custom MSR list is:
+
+MSR_KVM_WALL_CLOCK_NEW: 0x4b564d00
+
+ data: 4-byte alignment physical address of a memory area which must be
+ in guest RAM. This memory is expected to hold a copy of the following
+ structure:
+
+ struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+ } __attribute__((__packed__));
+
+ whose data will be filled in by the hypervisor. The hypervisor is only
+ guaranteed to update this data at the moment of MSR write.
+ Users that want to reliably query this information more than once have
+ to write more than once to this MSR. Fields have the following meanings:
+
+ version: guest has to check version before and after grabbing
+ time information and check that they are both equal and even.
+ An odd version indicates an in-progress update.
+
+ sec: number of seconds for wallclock at time of boot.
+
+ nsec: number of nanoseconds for wallclock at time of boot.
+
+ In order to get the current wallclock time, the system_time from
+ MSR_KVM_SYSTEM_TIME_NEW needs to be added.
+
+ Note that although MSRs are per-CPU entities, the effect of this
+ particular MSR is global.
+
+ Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
+ leaf prior to usage.
+
+MSR_KVM_SYSTEM_TIME_NEW: 0x4b564d01
+
+ data: 4-byte aligned physical address of a memory area which must be in
+ guest RAM, plus an enable bit in bit 0. This memory is expected to hold
+ a copy of the following structure:
+
+ struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+ } __attribute__((__packed__)); /* 32 bytes */
+
+ whose data will be filled in by the hypervisor periodically. Only one
+ write, or registration, is needed for each VCPU. The interval between
+ updates of this structure is arbitrary and implementation-dependent.
+ The hypervisor may update this structure at any time it sees fit until
+ anything with bit0 == 0 is written to it.
+
+ Fields have the following meanings:
+
+ version: guest has to check version before and after grabbing
+ time information and check that they are both equal and even.
+ An odd version indicates an in-progress update.
+
+ tsc_timestamp: the tsc value at the current VCPU at the time
+ of the update of this structure. Guests can subtract this value
+ from current tsc to derive a notion of elapsed time since the
+ structure update.
+
+ system_time: a host notion of monotonic time, including sleep
+ time at the time this structure was last updated. Unit is
+ nanoseconds.
+
+ tsc_to_system_mul: multiplier to be used when converting
+ tsc-related quantity to nanoseconds
+
+ tsc_shift: shift to be used when converting tsc-related
+ quantity to nanoseconds. This shift will ensure that
+ multiplication with tsc_to_system_mul does not overflow.
+ A positive value denotes a left shift, a negative value
+ a right shift.
+
+ The conversion from tsc to nanoseconds involves an additional
+ right shift by 32 bits. With this information, guests can
+ derive per-CPU time by doing:
+
+ time = (current_tsc - tsc_timestamp)
+ if (tsc_shift >= 0)
+ time <<= tsc_shift;
+ else
+ time >>= -tsc_shift;
+ time = (time * tsc_to_system_mul) >> 32
+ time = time + system_time
+
+ flags: bits in this field indicate extended capabilities
+ coordinated between the guest and the hypervisor. Availability
+ of specific flags has to be checked in 0x40000001 cpuid leaf.
+ Current flags are:
+
+ flag bit | cpuid bit | meaning
+ -------------------------------------------------------------
+ | | time measures taken across
+ 0 | 24 | multiple cpus are guaranteed to
+ | | be monotonic
+ -------------------------------------------------------------
+ | | guest vcpu has been paused by
+ 1 | N/A | the host
+ | | See 4.70 in api.txt
+ -------------------------------------------------------------
+
+ Availability of this MSR must be checked via bit 3 in 0x4000001 cpuid
+ leaf prior to usage.
+
+
+MSR_KVM_WALL_CLOCK: 0x11
+
+ data and functioning: same as MSR_KVM_WALL_CLOCK_NEW. Use that instead.
+
+ This MSR falls outside the reserved KVM range and may be removed in the
+ future. Its usage is deprecated.
+
+ Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
+ leaf prior to usage.
+
+MSR_KVM_SYSTEM_TIME: 0x12
+
+ data and functioning: same as MSR_KVM_SYSTEM_TIME_NEW. Use that instead.
+
+ This MSR falls outside the reserved KVM range and may be removed in the
+ future. Its usage is deprecated.
+
+ Availability of this MSR must be checked via bit 0 in 0x4000001 cpuid
+ leaf prior to usage.
+
+ The suggested algorithm for detecting kvmclock presence is then:
+
+ if (!kvm_para_available()) /* refer to cpuid.txt */
+ return NON_PRESENT;
+
+ flags = cpuid_eax(0x40000001);
+ if (flags & 3) {
+ msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
+ msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
+ return PRESENT;
+ } else if (flags & 0) {
+ msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
+ msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
+ return PRESENT;
+ } else
+ return NON_PRESENT;
+
+MSR_KVM_ASYNC_PF_EN: 0x4b564d02
+ data: Bits 63-6 hold 64-byte aligned physical address of a
+ 64 byte memory area which must be in guest RAM and must be
+ zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
+ when asynchronous page faults are enabled on the vcpu 0 when
+ disabled. Bit 1 is 1 if asynchronous page faults can be injected
+ when vcpu is in cpl == 0.
+
+ First 4 byte of 64 byte memory location will be written to by
+ the hypervisor at the time of asynchronous page fault (APF)
+ injection to indicate type of asynchronous page fault. Value
+ of 1 means that the page referred to by the page fault is not
+ present. Value 2 means that the page is now available. Disabling
+ interrupt inhibits APFs. Guest must not enable interrupt
+ before the reason is read, or it may be overwritten by another
+ APF. Since APF uses the same exception vector as regular page
+ fault guest must reset the reason to 0 before it does
+ something that can generate normal page fault. If during page
+ fault APF reason is 0 it means that this is regular page
+ fault.
+
+ During delivery of type 1 APF cr2 contains a token that will
+ be used to notify a guest when missing page becomes
+ available. When page becomes available type 2 APF is sent with
+ cr2 set to the token associated with the page. There is special
+ kind of token 0xffffffff which tells vcpu that it should wake
+ up all processes waiting for APFs and no individual type 2 APFs
+ will be sent.
+
+ If APF is disabled while there are outstanding APFs, they will
+ not be delivered.
+
+ Currently type 2 APF will be always delivered on the same vcpu as
+ type 1 was, but guest should not rely on that.
+
+MSR_KVM_STEAL_TIME: 0x4b564d03
+
+ data: 64-byte alignment physical address of a memory area which must be
+ in guest RAM, plus an enable bit in bit 0. This memory is expected to
+ hold a copy of the following structure:
+
+ struct kvm_steal_time {
+ __u64 steal;
+ __u32 version;
+ __u32 flags;
+ __u32 pad[12];
+ }
+
+ whose data will be filled in by the hypervisor periodically. Only one
+ write, or registration, is needed for each VCPU. The interval between
+ updates of this structure is arbitrary and implementation-dependent.
+ The hypervisor may update this structure at any time it sees fit until
+ anything with bit0 == 0 is written to it. Guest is required to make sure
+ this structure is initialized to zero.
+
+ Fields have the following meanings:
+
+ version: a sequence counter. In other words, guest has to check
+ this field before and after grabbing time information and make
+ sure they are both equal and even. An odd version indicates an
+ in-progress update.
+
+ flags: At this point, always zero. May be used to indicate
+ changes in this structure in the future.
+
+ steal: the amount of time in which this vCPU did not run, in
+ nanoseconds. Time during which the vcpu is idle, will not be
+ reported as steal time.
+
+MSR_KVM_EOI_EN: 0x4b564d04
+ data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
+ when disabled. Bit 1 is reserved and must be zero. When PV end of
+ interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
+ physical address of a 4 byte memory area which must be in guest RAM and
+ must be zeroed.
+
+ The first, least significant bit of 4 byte memory location will be
+ written to by the hypervisor, typically at the time of interrupt
+ injection. Value of 1 means that guest can skip writing EOI to the apic
+ (using MSR or MMIO write); instead, it is sufficient to signal
+ EOI by clearing the bit in guest memory - this location will
+ later be polled by the hypervisor.
+ Value of 0 means that the EOI write is required.
+
+ It is always safe for the guest to ignore the optimization and perform
+ the APIC EOI write anyway.
+
+ Hypervisor is guaranteed to only modify this least
+ significant bit while in the current VCPU context, this means that
+ guest does not need to use either lock prefix or memory ordering
+ primitives to synchronise with the hypervisor.
+
+ However, hypervisor can set and clear this memory bit at any time:
+ therefore to make sure hypervisor does not interrupt the
+ guest and clear the least significant bit in the memory area
+ in the window between guest testing it to detect
+ whether it can skip EOI apic write and between guest
+ clearing it to signal EOI to the hypervisor,
+ guest must both read the least significant bit in the memory area and
+ clear it using a single CPU instruction, such as test and clear, or
+ compare and exchange.
diff --git a/Documentation/virtual/kvm/nested-vmx.txt b/Documentation/virtual/kvm/nested-vmx.txt
new file mode 100644
index 000000000..8ed937de1
--- /dev/null
+++ b/Documentation/virtual/kvm/nested-vmx.txt
@@ -0,0 +1,251 @@
+Nested VMX
+==========
+
+Overview
+---------
+
+On Intel processors, KVM uses Intel's VMX (Virtual-Machine eXtensions)
+to easily and efficiently run guest operating systems. Normally, these guests
+*cannot* themselves be hypervisors running their own guests, because in VMX,
+guests cannot use VMX instructions.
+
+The "Nested VMX" feature adds this missing capability - of running guest
+hypervisors (which use VMX) with their own nested guests. It does so by
+allowing a guest to use VMX instructions, and correctly and efficiently
+emulating them using the single level of VMX available in the hardware.
+
+We describe in much greater detail the theory behind the nested VMX feature,
+its implementation and its performance characteristics, in the OSDI 2010 paper
+"The Turtles Project: Design and Implementation of Nested Virtualization",
+available at:
+
+ http://www.usenix.org/events/osdi10/tech/full_papers/Ben-Yehuda.pdf
+
+
+Terminology
+-----------
+
+Single-level virtualization has two levels - the host (KVM) and the guests.
+In nested virtualization, we have three levels: The host (KVM), which we call
+L0, the guest hypervisor, which we call L1, and its nested guest, which we
+call L2.
+
+
+Known limitations
+-----------------
+
+The current code supports running Linux guests under KVM guests.
+Only 64-bit guest hypervisors are supported.
+
+Additional patches for running Windows under guest KVM, and Linux under
+guest VMware server, and support for nested EPT, are currently running in
+the lab, and will be sent as follow-on patchsets.
+
+
+Running nested VMX
+------------------
+
+The nested VMX feature is disabled by default. It can be enabled by giving
+the "nested=1" option to the kvm-intel module.
+
+No modifications are required to user space (qemu). However, qemu's default
+emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
+explicitly enabled, by giving qemu one of the following options:
+
+ -cpu host (emulated CPU has all features of the real CPU)
+
+ -cpu qemu64,+vmx (add just the vmx feature to a named CPU type)
+
+
+ABIs
+----
+
+Nested VMX aims to present a standard and (eventually) fully-functional VMX
+implementation for the a guest hypervisor to use. As such, the official
+specification of the ABI that it provides is Intel's VMX specification,
+namely volume 3B of their "Intel 64 and IA-32 Architectures Software
+Developer's Manual". Not all of VMX's features are currently fully supported,
+but the goal is to eventually support them all, starting with the VMX features
+which are used in practice by popular hypervisors (KVM and others).
+
+As a VMX implementation, nested VMX presents a VMCS structure to L1.
+As mandated by the spec, other than the two fields revision_id and abort,
+this structure is *opaque* to its user, who is not supposed to know or care
+about its internal structure. Rather, the structure is accessed through the
+VMREAD and VMWRITE instructions.
+Still, for debugging purposes, KVM developers might be interested to know the
+internals of this structure; This is struct vmcs12 from arch/x86/kvm/vmx.c.
+
+The name "vmcs12" refers to the VMCS that L1 builds for L2. In the code we
+also have "vmcs01", the VMCS that L0 built for L1, and "vmcs02" is the VMCS
+which L0 builds to actually run L2 - how this is done is explained in the
+aforementioned paper.
+
+For convenience, we repeat the content of struct vmcs12 here. If the internals
+of this structure changes, this can break live migration across KVM versions.
+VMCS12_REVISION (from vmx.c) should be changed if struct vmcs12 or its inner
+struct shadow_vmcs is ever changed.
+
+ typedef u64 natural_width;
+ struct __packed vmcs12 {
+ /* According to the Intel spec, a VMCS region must start with
+ * these two user-visible fields */
+ u32 revision_id;
+ u32 abort;
+
+ u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
+ u32 padding[7]; /* room for future expansion */
+
+ u64 io_bitmap_a;
+ u64 io_bitmap_b;
+ u64 msr_bitmap;
+ u64 vm_exit_msr_store_addr;
+ u64 vm_exit_msr_load_addr;
+ u64 vm_entry_msr_load_addr;
+ u64 tsc_offset;
+ u64 virtual_apic_page_addr;
+ u64 apic_access_addr;
+ u64 ept_pointer;
+ u64 guest_physical_address;
+ u64 vmcs_link_pointer;
+ u64 guest_ia32_debugctl;
+ u64 guest_ia32_pat;
+ u64 guest_ia32_efer;
+ u64 guest_pdptr0;
+ u64 guest_pdptr1;
+ u64 guest_pdptr2;
+ u64 guest_pdptr3;
+ u64 host_ia32_pat;
+ u64 host_ia32_efer;
+ u64 padding64[8]; /* room for future expansion */
+ natural_width cr0_guest_host_mask;
+ natural_width cr4_guest_host_mask;
+ natural_width cr0_read_shadow;
+ natural_width cr4_read_shadow;
+ natural_width cr3_target_value0;
+ natural_width cr3_target_value1;
+ natural_width cr3_target_value2;
+ natural_width cr3_target_value3;
+ natural_width exit_qualification;
+ natural_width guest_linear_address;
+ natural_width guest_cr0;
+ natural_width guest_cr3;
+ natural_width guest_cr4;
+ natural_width guest_es_base;
+ natural_width guest_cs_base;
+ natural_width guest_ss_base;
+ natural_width guest_ds_base;
+ natural_width guest_fs_base;
+ natural_width guest_gs_base;
+ natural_width guest_ldtr_base;
+ natural_width guest_tr_base;
+ natural_width guest_gdtr_base;
+ natural_width guest_idtr_base;
+ natural_width guest_dr7;
+ natural_width guest_rsp;
+ natural_width guest_rip;
+ natural_width guest_rflags;
+ natural_width guest_pending_dbg_exceptions;
+ natural_width guest_sysenter_esp;
+ natural_width guest_sysenter_eip;
+ natural_width host_cr0;
+ natural_width host_cr3;
+ natural_width host_cr4;
+ natural_width host_fs_base;
+ natural_width host_gs_base;
+ natural_width host_tr_base;
+ natural_width host_gdtr_base;
+ natural_width host_idtr_base;
+ natural_width host_ia32_sysenter_esp;
+ natural_width host_ia32_sysenter_eip;
+ natural_width host_rsp;
+ natural_width host_rip;
+ natural_width paddingl[8]; /* room for future expansion */
+ u32 pin_based_vm_exec_control;
+ u32 cpu_based_vm_exec_control;
+ u32 exception_bitmap;
+ u32 page_fault_error_code_mask;
+ u32 page_fault_error_code_match;
+ u32 cr3_target_count;
+ u32 vm_exit_controls;
+ u32 vm_exit_msr_store_count;
+ u32 vm_exit_msr_load_count;
+ u32 vm_entry_controls;
+ u32 vm_entry_msr_load_count;
+ u32 vm_entry_intr_info_field;
+ u32 vm_entry_exception_error_code;
+ u32 vm_entry_instruction_len;
+ u32 tpr_threshold;
+ u32 secondary_vm_exec_control;
+ u32 vm_instruction_error;
+ u32 vm_exit_reason;
+ u32 vm_exit_intr_info;
+ u32 vm_exit_intr_error_code;
+ u32 idt_vectoring_info_field;
+ u32 idt_vectoring_error_code;
+ u32 vm_exit_instruction_len;
+ u32 vmx_instruction_info;
+ u32 guest_es_limit;
+ u32 guest_cs_limit;
+ u32 guest_ss_limit;
+ u32 guest_ds_limit;
+ u32 guest_fs_limit;
+ u32 guest_gs_limit;
+ u32 guest_ldtr_limit;
+ u32 guest_tr_limit;
+ u32 guest_gdtr_limit;
+ u32 guest_idtr_limit;
+ u32 guest_es_ar_bytes;
+ u32 guest_cs_ar_bytes;
+ u32 guest_ss_ar_bytes;
+ u32 guest_ds_ar_bytes;
+ u32 guest_fs_ar_bytes;
+ u32 guest_gs_ar_bytes;
+ u32 guest_ldtr_ar_bytes;
+ u32 guest_tr_ar_bytes;
+ u32 guest_interruptibility_info;
+ u32 guest_activity_state;
+ u32 guest_sysenter_cs;
+ u32 host_ia32_sysenter_cs;
+ u32 padding32[8]; /* room for future expansion */
+ u16 virtual_processor_id;
+ u16 guest_es_selector;
+ u16 guest_cs_selector;
+ u16 guest_ss_selector;
+ u16 guest_ds_selector;
+ u16 guest_fs_selector;
+ u16 guest_gs_selector;
+ u16 guest_ldtr_selector;
+ u16 guest_tr_selector;
+ u16 host_es_selector;
+ u16 host_cs_selector;
+ u16 host_ss_selector;
+ u16 host_ds_selector;
+ u16 host_fs_selector;
+ u16 host_gs_selector;
+ u16 host_tr_selector;
+ };
+
+
+Authors
+-------
+
+These patches were written by:
+ Abel Gordon, abelg <at> il.ibm.com
+ Nadav Har'El, nyh <at> il.ibm.com
+ Orit Wasserman, oritw <at> il.ibm.com
+ Ben-Ami Yassor, benami <at> il.ibm.com
+ Muli Ben-Yehuda, muli <at> il.ibm.com
+
+With contributions by:
+ Anthony Liguori, aliguori <at> us.ibm.com
+ Mike Day, mdday <at> us.ibm.com
+ Michael Factor, factor <at> il.ibm.com
+ Zvi Dubitzky, dubi <at> il.ibm.com
+
+And valuable reviews by:
+ Avi Kivity, avi <at> redhat.com
+ Gleb Natapov, gleb <at> redhat.com
+ Marcelo Tosatti, mtosatti <at> redhat.com
+ Kevin Tian, kevin.tian <at> intel.com
+ and others.
diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt
new file mode 100644
index 000000000..319560646
--- /dev/null
+++ b/Documentation/virtual/kvm/ppc-pv.txt
@@ -0,0 +1,212 @@
+The PPC KVM paravirtual interface
+=================================
+
+The basic execution principle by which KVM on PowerPC works is to run all kernel
+space code in PR=1 which is user space. This way we trap all privileged
+instructions and can emulate them accordingly.
+
+Unfortunately that is also the downfall. There are quite some privileged
+instructions that needlessly return us to the hypervisor even though they
+could be handled differently.
+
+This is what the PPC PV interface helps with. It takes privileged instructions
+and transforms them into unprivileged ones with some help from the hypervisor.
+This cuts down virtualization costs by about 50% on some of my benchmarks.
+
+The code for that interface can be found in arch/powerpc/kernel/kvm*
+
+Querying for existence
+======================
+
+To find out if we're running on KVM or not, we leverage the device tree. When
+Linux is running on KVM, a node /hypervisor exists. That node contains a
+compatible property with the value "linux,kvm".
+
+Once you determined you're running under a PV capable KVM, you can now use
+hypercalls as described below.
+
+KVM hypercalls
+==============
+
+Inside the device tree's /hypervisor node there's a property called
+'hypercall-instructions'. This property contains at most 4 opcodes that make
+up the hypercall. To call a hypercall, just call these instructions.
+
+The parameters are as follows:
+
+ Register IN OUT
+
+ r0 - volatile
+ r3 1st parameter Return code
+ r4 2nd parameter 1st output value
+ r5 3rd parameter 2nd output value
+ r6 4th parameter 3rd output value
+ r7 5th parameter 4th output value
+ r8 6th parameter 5th output value
+ r9 7th parameter 6th output value
+ r10 8th parameter 7th output value
+ r11 hypercall number 8th output value
+ r12 - volatile
+
+Hypercall definitions are shared in generic code, so the same hypercall numbers
+apply for x86 and powerpc alike with the exception that each KVM hypercall
+also needs to be ORed with the KVM vendor code which is (42 << 16).
+
+Return codes can be as follows:
+
+ Code Meaning
+
+ 0 Success
+ 12 Hypercall not implemented
+ <0 Error
+
+The magic page
+==============
+
+To enable communication between the hypervisor and guest there is a new shared
+page that contains parts of supervisor visible register state. The guest can
+map this shared page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE.
+
+With this hypercall issued the guest always gets the magic page mapped at the
+desired location. The first parameter indicates the effective address when the
+MMU is enabled. The second parameter indicates the address in real mode, if
+applicable to the target. For now, we always map the page to -4096. This way we
+can access it using absolute load and store functions. The following
+instruction reads the first field of the magic page:
+
+ ld rX, -4096(0)
+
+The interface is designed to be extensible should there be need later to add
+additional registers to the magic page. If you add fields to the magic page,
+also define a new hypercall feature to indicate that the host can give you more
+registers. Only if the host supports the additional features, make use of them.
+
+The magic page layout is described by struct kvm_vcpu_arch_shared
+in arch/powerpc/include/asm/kvm_para.h.
+
+Magic page features
+===================
+
+When mapping the magic page using the KVM hypercall KVM_HC_PPC_MAP_MAGIC_PAGE,
+a second return value is passed to the guest. This second return value contains
+a bitmap of available features inside the magic page.
+
+The following enhancements to the magic page are currently available:
+
+ KVM_MAGIC_FEAT_SR Maps SR registers r/w in the magic page
+ KVM_MAGIC_FEAT_MAS0_TO_SPRG7 Maps MASn, ESR, PIR and high SPRGs
+
+For enhanced features in the magic page, please check for the existence of the
+feature before using them!
+
+Magic page flags
+================
+
+In addition to features that indicate whether a host is capable of a particular
+feature we also have a channel for a guest to tell the guest whether it's capable
+of something. This is what we call "flags".
+
+Flags are passed to the host in the low 12 bits of the Effective Address.
+
+The following flags are currently available for a guest to expose:
+
+ MAGIC_PAGE_FLAG_NOT_MAPPED_NX Guest handles NX bits correclty wrt magic page
+
+MSR bits
+========
+
+The MSR contains bits that require hypervisor intervention and bits that do
+not require direct hypervisor intervention because they only get interpreted
+when entering the guest or don't have any impact on the hypervisor's behavior.
+
+The following bits are safe to be set inside the guest:
+
+ MSR_EE
+ MSR_RI
+
+If any other bit changes in the MSR, please still use mtmsr(d).
+
+Patched instructions
+====================
+
+The "ld" and "std" instructions are transformed to "lwz" and "stw" instructions
+respectively on 32 bit systems with an added offset of 4 to accommodate for big
+endianness.
+
+The following is a list of mapping the Linux kernel performs when running as
+guest. Implementing any of those mappings is optional, as the instruction traps
+also act on the shared page. So calling privileged instructions still works as
+before.
+
+From To
+==== ==
+
+mfmsr rX ld rX, magic_page->msr
+mfsprg rX, 0 ld rX, magic_page->sprg0
+mfsprg rX, 1 ld rX, magic_page->sprg1
+mfsprg rX, 2 ld rX, magic_page->sprg2
+mfsprg rX, 3 ld rX, magic_page->sprg3
+mfsrr0 rX ld rX, magic_page->srr0
+mfsrr1 rX ld rX, magic_page->srr1
+mfdar rX ld rX, magic_page->dar
+mfdsisr rX lwz rX, magic_page->dsisr
+
+mtmsr rX std rX, magic_page->msr
+mtsprg 0, rX std rX, magic_page->sprg0
+mtsprg 1, rX std rX, magic_page->sprg1
+mtsprg 2, rX std rX, magic_page->sprg2
+mtsprg 3, rX std rX, magic_page->sprg3
+mtsrr0 rX std rX, magic_page->srr0
+mtsrr1 rX std rX, magic_page->srr1
+mtdar rX std rX, magic_page->dar
+mtdsisr rX stw rX, magic_page->dsisr
+
+tlbsync nop
+
+mtmsrd rX, 0 b <special mtmsr section>
+mtmsr rX b <special mtmsr section>
+
+mtmsrd rX, 1 b <special mtmsrd section>
+
+[Book3S only]
+mtsrin rX, rY b <special mtsrin section>
+
+[BookE only]
+wrteei [0|1] b <special wrteei section>
+
+
+Some instructions require more logic to determine what's going on than a load
+or store instruction can deliver. To enable patching of those, we keep some
+RAM around where we can live translate instructions to. What happens is the
+following:
+
+ 1) copy emulation code to memory
+ 2) patch that code to fit the emulated instruction
+ 3) patch that code to return to the original pc + 4
+ 4) patch the original instruction to branch to the new code
+
+That way we can inject an arbitrary amount of code as replacement for a single
+instruction. This allows us to check for pending interrupts when setting EE=1
+for example.
+
+Hypercall ABIs in KVM on PowerPC
+=================================
+1) KVM hypercalls (ePAPR)
+
+These are ePAPR compliant hypercall implementation (mentioned above). Even
+generic hypercalls are implemented here, like the ePAPR idle hcall. These are
+available on all targets.
+
+2) PAPR hypercalls
+
+PAPR hypercalls are needed to run server PowerPC PAPR guests (-M pseries in QEMU).
+These are the same hypercalls that pHyp, the POWER hypervisor implements. Some of
+them are handled in the kernel, some are handled in user space. This is only
+available on book3s_64.
+
+3) OSI hypercalls
+
+Mac-on-Linux is another user of KVM on PowerPC, which has its own hypercall (long
+before KVM). This is supported to maintain compatibility. All these hypercalls get
+forwarded to user space. This is only useful on book3s_32, but can be used with
+book3s_64 as well.
diff --git a/Documentation/virtual/kvm/review-checklist.txt b/Documentation/virtual/kvm/review-checklist.txt
new file mode 100644
index 000000000..a850986ed
--- /dev/null
+++ b/Documentation/virtual/kvm/review-checklist.txt
@@ -0,0 +1,38 @@
+Review checklist for kvm patches
+================================
+
+1. The patch must follow Documentation/CodingStyle and
+ Documentation/SubmittingPatches.
+
+2. Patches should be against kvm.git master branch.
+
+3. If the patch introduces or modifies a new userspace API:
+ - the API must be documented in Documentation/virtual/kvm/api.txt
+ - the API must be discoverable using KVM_CHECK_EXTENSION
+
+4. New state must include support for save/restore.
+
+5. New features must default to off (userspace should explicitly request them).
+ Performance improvements can and should default to on.
+
+6. New cpu features should be exposed via KVM_GET_SUPPORTED_CPUID2
+
+7. Emulator changes should be accompanied by unit tests for qemu-kvm.git
+ kvm/test directory.
+
+8. Changes should be vendor neutral when possible. Changes to common code
+ are better than duplicating changes to vendor code.
+
+9. Similarly, prefer changes to arch independent code than to arch dependent
+ code.
+
+10. User/kernel interfaces and guest/host interfaces must be 64-bit clean
+ (all variables and sizes naturally aligned on 64-bit; use specific types
+ only - u64 rather than ulong).
+
+11. New guest visible features must either be documented in a hardware manual
+ or be accompanied by documentation.
+
+12. Features must be robust against reset and kexec - for example, shared
+ host/guest memory must be unshared to prevent the host from writing to
+ guest memory that the guest has not reserved for this purpose.
diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt
new file mode 100644
index 000000000..48c492179
--- /dev/null
+++ b/Documentation/virtual/kvm/s390-diag.txt
@@ -0,0 +1,82 @@
+The s390 DIAGNOSE call on KVM
+=============================
+
+KVM on s390 supports the DIAGNOSE call for making hypercalls, both for
+native hypercalls and for selected hypercalls found on other s390
+hypervisors.
+
+Note that bits are numbered as by the usual s390 convention (most significant
+bit on the left).
+
+
+General remarks
+---------------
+
+DIAGNOSE calls by the guest cause a mandatory intercept. This implies
+all supported DIAGNOSE calls need to be handled by either KVM or its
+userspace.
+
+All DIAGNOSE calls supported by KVM use the RS-a format:
+
+--------------------------------------
+| '83' | R1 | R3 | B2 | D2 |
+--------------------------------------
+0 8 12 16 20 31
+
+The second-operand address (obtained by the base/displacement calculation)
+is not used to address data. Instead, bits 48-63 of this address specify
+the function code, and bits 0-47 are ignored.
+
+The supported DIAGNOSE function codes vary by the userspace used. For
+DIAGNOSE function codes not specific to KVM, please refer to the
+documentation for the s390 hypervisors defining them.
+
+
+DIAGNOSE function code 'X'500' - KVM virtio functions
+-----------------------------------------------------
+
+If the function code specifies 0x500, various virtio-related functions
+are performed.
+
+General register 1 contains the virtio subfunction code. Supported
+virtio subfunctions depend on KVM's userspace. Generally, userspace
+provides either s390-virtio (subcodes 0-2) or virtio-ccw (subcode 3).
+
+Upon completion of the DIAGNOSE instruction, general register 2 contains
+the function's return code, which is either a return code or a subcode
+specific value.
+
+Subcode 0 - s390-virtio notification and early console printk
+ Handled by userspace.
+
+Subcode 1 - s390-virtio reset
+ Handled by userspace.
+
+Subcode 2 - s390-virtio set status
+ Handled by userspace.
+
+Subcode 3 - virtio-ccw notification
+ Handled by either userspace or KVM (ioeventfd case).
+
+ General register 2 contains a subchannel-identification word denoting
+ the subchannel of the virtio-ccw proxy device to be notified.
+
+ General register 3 contains the number of the virtqueue to be notified.
+
+ General register 4 contains a 64bit identifier for KVM usage (the
+ kvm_io_bus cookie). If general register 4 does not contain a valid
+ identifier, it is ignored.
+
+ After completion of the DIAGNOSE call, general register 2 may contain
+ a 64bit identifier (in the kvm_io_bus cookie case).
+
+ See also the virtio standard for a discussion of this hypercall.
+
+
+DIAGNOSE function code 'X'501 - KVM breakpoint
+----------------------------------------------
+
+If the function code specifies 0x501, breakpoint functions may be performed.
+This function code is handled by userspace.
+
+This diagnose function code has no subfunctions and uses no parameters.
diff --git a/Documentation/virtual/kvm/timekeeping.txt b/Documentation/virtual/kvm/timekeeping.txt
new file mode 100644
index 000000000..76808a17a
--- /dev/null
+++ b/Documentation/virtual/kvm/timekeeping.txt
@@ -0,0 +1,612 @@
+
+ Timekeeping Virtualization for X86-Based Architectures
+
+ Zachary Amsden <zamsden@redhat.com>
+ Copyright (c) 2010, Red Hat. All rights reserved.
+
+1) Overview
+2) Timing Devices
+3) TSC Hardware
+4) Virtualization Problems
+
+=========================================================================
+
+1) Overview
+
+One of the most complicated parts of the X86 platform, and specifically,
+the virtualization of this platform is the plethora of timing devices available
+and the complexity of emulating those devices. In addition, virtualization of
+time introduces a new set of challenges because it introduces a multiplexed
+division of time beyond the control of the guest CPU.
+
+First, we will describe the various timekeeping hardware available, then
+present some of the problems which arise and solutions available, giving
+specific recommendations for certain classes of KVM guests.
+
+The purpose of this document is to collect data and information relevant to
+timekeeping which may be difficult to find elsewhere, specifically,
+information relevant to KVM and hardware-based virtualization.
+
+=========================================================================
+
+2) Timing Devices
+
+First we discuss the basic hardware devices available. TSC and the related
+KVM clock are special enough to warrant a full exposition and are described in
+the following section.
+
+2.1) i8254 - PIT
+
+One of the first timer devices available is the programmable interrupt timer,
+or PIT. The PIT has a fixed frequency 1.193182 MHz base clock and three
+channels which can be programmed to deliver periodic or one-shot interrupts.
+These three channels can be configured in different modes and have individual
+counters. Channel 1 and 2 were not available for general use in the original
+IBM PC, and historically were connected to control RAM refresh and the PC
+speaker. Now the PIT is typically integrated as part of an emulated chipset
+and a separate physical PIT is not used.
+
+The PIT uses I/O ports 0x40 - 0x43. Access to the 16-bit counters is done
+using single or multiple byte access to the I/O ports. There are 6 modes
+available, but not all modes are available to all timers, as only timer 2
+has a connected gate input, required for modes 1 and 5. The gate line is
+controlled by port 61h, bit 0, as illustrated in the following diagram.
+
+ -------------- ----------------
+| | | |
+| 1.1932 MHz |---------->| CLOCK OUT | ---------> IRQ 0
+| Clock | | | |
+ -------------- | +->| GATE TIMER 0 |
+ | ----------------
+ |
+ | ----------------
+ | | |
+ |------>| CLOCK OUT | ---------> 66.3 KHZ DRAM
+ | | | (aka /dev/null)
+ | +->| GATE TIMER 1 |
+ | ----------------
+ |
+ | ----------------
+ | | |
+ |------>| CLOCK OUT | ---------> Port 61h, bit 5
+ | | |
+Port 61h, bit 0 ---------->| GATE TIMER 2 | \_.---- ____
+ ---------------- _| )--|LPF|---Speaker
+ / *---- \___/
+Port 61h, bit 1 -----------------------------------/
+
+The timer modes are now described.
+
+Mode 0: Single Timeout. This is a one-shot software timeout that counts down
+ when the gate is high (always true for timers 0 and 1). When the count
+ reaches zero, the output goes high.
+
+Mode 1: Triggered One-shot. The output is initially set high. When the gate
+ line is set high, a countdown is initiated (which does not stop if the gate is
+ lowered), during which the output is set low. When the count reaches zero,
+ the output goes high.
+
+Mode 2: Rate Generator. The output is initially set high. When the countdown
+ reaches 1, the output goes low for one count and then returns high. The value
+ is reloaded and the countdown automatically resumes. If the gate line goes
+ low, the count is halted. If the output is low when the gate is lowered, the
+ output automatically goes high (this only affects timer 2).
+
+Mode 3: Square Wave. This generates a high / low square wave. The count
+ determines the length of the pulse, which alternates between high and low
+ when zero is reached. The count only proceeds when gate is high and is
+ automatically reloaded on reaching zero. The count is decremented twice at
+ each clock to generate a full high / low cycle at the full periodic rate.
+ If the count is even, the clock remains high for N/2 counts and low for N/2
+ counts; if the clock is odd, the clock is high for (N+1)/2 counts and low
+ for (N-1)/2 counts. Only even values are latched by the counter, so odd
+ values are not observed when reading. This is the intended mode for timer 2,
+ which generates sine-like tones by low-pass filtering the square wave output.
+
+Mode 4: Software Strobe. After programming this mode and loading the counter,
+ the output remains high until the counter reaches zero. Then the output
+ goes low for 1 clock cycle and returns high. The counter is not reloaded.
+ Counting only occurs when gate is high.
+
+Mode 5: Hardware Strobe. After programming and loading the counter, the
+ output remains high. When the gate is raised, a countdown is initiated
+ (which does not stop if the gate is lowered). When the counter reaches zero,
+ the output goes low for 1 clock cycle and then returns high. The counter is
+ not reloaded.
+
+In addition to normal binary counting, the PIT supports BCD counting. The
+command port, 0x43 is used to set the counter and mode for each of the three
+timers.
+
+PIT commands, issued to port 0x43, using the following bit encoding:
+
+Bit 7-4: Command (See table below)
+Bit 3-1: Mode (000 = Mode 0, 101 = Mode 5, 11X = undefined)
+Bit 0 : Binary (0) / BCD (1)
+
+Command table:
+
+0000 - Latch Timer 0 count for port 0x40
+ sample and hold the count to be read in port 0x40;
+ additional commands ignored until counter is read;
+ mode bits ignored.
+
+0001 - Set Timer 0 LSB mode for port 0x40
+ set timer to read LSB only and force MSB to zero;
+ mode bits set timer mode
+
+0010 - Set Timer 0 MSB mode for port 0x40
+ set timer to read MSB only and force LSB to zero;
+ mode bits set timer mode
+
+0011 - Set Timer 0 16-bit mode for port 0x40
+ set timer to read / write LSB first, then MSB;
+ mode bits set timer mode
+
+0100 - Latch Timer 1 count for port 0x41 - as described above
+0101 - Set Timer 1 LSB mode for port 0x41 - as described above
+0110 - Set Timer 1 MSB mode for port 0x41 - as described above
+0111 - Set Timer 1 16-bit mode for port 0x41 - as described above
+
+1000 - Latch Timer 2 count for port 0x42 - as described above
+1001 - Set Timer 2 LSB mode for port 0x42 - as described above
+1010 - Set Timer 2 MSB mode for port 0x42 - as described above
+1011 - Set Timer 2 16-bit mode for port 0x42 as described above
+
+1101 - General counter latch
+ Latch combination of counters into corresponding ports
+ Bit 3 = Counter 2
+ Bit 2 = Counter 1
+ Bit 1 = Counter 0
+ Bit 0 = Unused
+
+1110 - Latch timer status
+ Latch combination of counter mode into corresponding ports
+ Bit 3 = Counter 2
+ Bit 2 = Counter 1
+ Bit 1 = Counter 0
+
+ The output of ports 0x40-0x42 following this command will be:
+
+ Bit 7 = Output pin
+ Bit 6 = Count loaded (0 if timer has expired)
+ Bit 5-4 = Read / Write mode
+ 01 = MSB only
+ 10 = LSB only
+ 11 = LSB / MSB (16-bit)
+ Bit 3-1 = Mode
+ Bit 0 = Binary (0) / BCD mode (1)
+
+2.2) RTC
+
+The second device which was available in the original PC was the MC146818 real
+time clock. The original device is now obsolete, and usually emulated by the
+system chipset, sometimes by an HPET and some frankenstein IRQ routing.
+
+The RTC is accessed through CMOS variables, which uses an index register to
+control which bytes are read. Since there is only one index register, read
+of the CMOS and read of the RTC require lock protection (in addition, it is
+dangerous to allow userspace utilities such as hwclock to have direct RTC
+access, as they could corrupt kernel reads and writes of CMOS memory).
+
+The RTC generates an interrupt which is usually routed to IRQ 8. The interrupt
+can function as a periodic timer, an additional once a day alarm, and can issue
+interrupts after an update of the CMOS registers by the MC146818 is complete.
+The type of interrupt is signalled in the RTC status registers.
+
+The RTC will update the current time fields by battery power even while the
+system is off. The current time fields should not be read while an update is
+in progress, as indicated in the status register.
+
+The clock uses a 32.768kHz crystal, so bits 6-4 of register A should be
+programmed to a 32kHz divider if the RTC is to count seconds.
+
+This is the RAM map originally used for the RTC/CMOS:
+
+Location Size Description
+------------------------------------------
+00h byte Current second (BCD)
+01h byte Seconds alarm (BCD)
+02h byte Current minute (BCD)
+03h byte Minutes alarm (BCD)
+04h byte Current hour (BCD)
+05h byte Hours alarm (BCD)
+06h byte Current day of week (BCD)
+07h byte Current day of month (BCD)
+08h byte Current month (BCD)
+09h byte Current year (BCD)
+0Ah byte Register A
+ bit 7 = Update in progress
+ bit 6-4 = Divider for clock
+ 000 = 4.194 MHz
+ 001 = 1.049 MHz
+ 010 = 32 kHz
+ 10X = test modes
+ 110 = reset / disable
+ 111 = reset / disable
+ bit 3-0 = Rate selection for periodic interrupt
+ 000 = periodic timer disabled
+ 001 = 3.90625 uS
+ 010 = 7.8125 uS
+ 011 = .122070 mS
+ 100 = .244141 mS
+ ...
+ 1101 = 125 mS
+ 1110 = 250 mS
+ 1111 = 500 mS
+0Bh byte Register B
+ bit 7 = Run (0) / Halt (1)
+ bit 6 = Periodic interrupt enable
+ bit 5 = Alarm interrupt enable
+ bit 4 = Update-ended interrupt enable
+ bit 3 = Square wave interrupt enable
+ bit 2 = BCD calendar (0) / Binary (1)
+ bit 1 = 12-hour mode (0) / 24-hour mode (1)
+ bit 0 = 0 (DST off) / 1 (DST enabled)
+OCh byte Register C (read only)
+ bit 7 = interrupt request flag (IRQF)
+ bit 6 = periodic interrupt flag (PF)
+ bit 5 = alarm interrupt flag (AF)
+ bit 4 = update interrupt flag (UF)
+ bit 3-0 = reserved
+ODh byte Register D (read only)
+ bit 7 = RTC has power
+ bit 6-0 = reserved
+32h byte Current century BCD (*)
+ (*) location vendor specific and now determined from ACPI global tables
+
+2.3) APIC
+
+On Pentium and later processors, an on-board timer is available to each CPU
+as part of the Advanced Programmable Interrupt Controller. The APIC is
+accessed through memory-mapped registers and provides interrupt service to each
+CPU, used for IPIs and local timer interrupts.
+
+Although in theory the APIC is a safe and stable source for local interrupts,
+in practice, many bugs and glitches have occurred due to the special nature of
+the APIC CPU-local memory-mapped hardware. Beware that CPU errata may affect
+the use of the APIC and that workarounds may be required. In addition, some of
+these workarounds pose unique constraints for virtualization - requiring either
+extra overhead incurred from extra reads of memory-mapped I/O or additional
+functionality that may be more computationally expensive to implement.
+
+Since the APIC is documented quite well in the Intel and AMD manuals, we will
+avoid repetition of the detail here. It should be pointed out that the APIC
+timer is programmed through the LVT (local vector timer) register, is capable
+of one-shot or periodic operation, and is based on the bus clock divided down
+by the programmable divider register.
+
+2.4) HPET
+
+HPET is quite complex, and was originally intended to replace the PIT / RTC
+support of the X86 PC. It remains to be seen whether that will be the case, as
+the de facto standard of PC hardware is to emulate these older devices. Some
+systems designated as legacy free may support only the HPET as a hardware timer
+device.
+
+The HPET spec is rather loose and vague, requiring at least 3 hardware timers,
+but allowing implementation freedom to support many more. It also imposes no
+fixed rate on the timer frequency, but does impose some extremal values on
+frequency, error and slew.
+
+In general, the HPET is recommended as a high precision (compared to PIT /RTC)
+time source which is independent of local variation (as there is only one HPET
+in any given system). The HPET is also memory-mapped, and its presence is
+indicated through ACPI tables by the BIOS.
+
+Detailed specification of the HPET is beyond the current scope of this
+document, as it is also very well documented elsewhere.
+
+2.5) Offboard Timers
+
+Several cards, both proprietary (watchdog boards) and commonplace (e1000) have
+timing chips built into the cards which may have registers which are accessible
+to kernel or user drivers. To the author's knowledge, using these to generate
+a clocksource for a Linux or other kernel has not yet been attempted and is in
+general frowned upon as not playing by the agreed rules of the game. Such a
+timer device would require additional support to be virtualized properly and is
+not considered important at this time as no known operating system does this.
+
+=========================================================================
+
+3) TSC Hardware
+
+The TSC or time stamp counter is relatively simple in theory; it counts
+instruction cycles issued by the processor, which can be used as a measure of
+time. In practice, due to a number of problems, it is the most complicated
+timekeeping device to use.
+
+The TSC is represented internally as a 64-bit MSR which can be read with the
+RDMSR, RDTSC, or RDTSCP (when available) instructions. In the past, hardware
+limitations made it possible to write the TSC, but generally on old hardware it
+was only possible to write the low 32-bits of the 64-bit counter, and the upper
+32-bits of the counter were cleared. Now, however, on Intel processors family
+0Fh, for models 3, 4 and 6, and family 06h, models e and f, this restriction
+has been lifted and all 64-bits are writable. On AMD systems, the ability to
+write the TSC MSR is not an architectural guarantee.
+
+The TSC is accessible from CPL-0 and conditionally, for CPL > 0 software by
+means of the CR4.TSD bit, which when enabled, disables CPL > 0 TSC access.
+
+Some vendors have implemented an additional instruction, RDTSCP, which returns
+atomically not just the TSC, but an indicator which corresponds to the
+processor number. This can be used to index into an array of TSC variables to
+determine offset information in SMP systems where TSCs are not synchronized.
+The presence of this instruction must be determined by consulting CPUID feature
+bits.
+
+Both VMX and SVM provide extension fields in the virtualization hardware which
+allows the guest visible TSC to be offset by a constant. Newer implementations
+promise to allow the TSC to additionally be scaled, but this hardware is not
+yet widely available.
+
+3.1) TSC synchronization
+
+The TSC is a CPU-local clock in most implementations. This means, on SMP
+platforms, the TSCs of different CPUs may start at different times depending
+on when the CPUs are powered on. Generally, CPUs on the same die will share
+the same clock, however, this is not always the case.
+
+The BIOS may attempt to resynchronize the TSCs during the poweron process and
+the operating system or other system software may attempt to do this as well.
+Several hardware limitations make the problem worse - if it is not possible to
+write the full 64-bits of the TSC, it may be impossible to match the TSC in
+newly arriving CPUs to that of the rest of the system, resulting in
+unsynchronized TSCs. This may be done by BIOS or system software, but in
+practice, getting a perfectly synchronized TSC will not be possible unless all
+values are read from the same clock, which generally only is possible on single
+socket systems or those with special hardware support.
+
+3.2) TSC and CPU hotplug
+
+As touched on already, CPUs which arrive later than the boot time of the system
+may not have a TSC value that is synchronized with the rest of the system.
+Either system software, BIOS, or SMM code may actually try to establish the TSC
+to a value matching the rest of the system, but a perfect match is usually not
+a guarantee. This can have the effect of bringing a system from a state where
+TSC is synchronized back to a state where TSC synchronization flaws, however
+small, may be exposed to the OS and any virtualization environment.
+
+3.3) TSC and multi-socket / NUMA
+
+Multi-socket systems, especially large multi-socket systems are likely to have
+individual clocksources rather than a single, universally distributed clock.
+Since these clocks are driven by different crystals, they will not have
+perfectly matched frequency, and temperature and electrical variations will
+cause the CPU clocks, and thus the TSCs to drift over time. Depending on the
+exact clock and bus design, the drift may or may not be fixed in absolute
+error, and may accumulate over time.
+
+In addition, very large systems may deliberately slew the clocks of individual
+cores. This technique, known as spread-spectrum clocking, reduces EMI at the
+clock frequency and harmonics of it, which may be required to pass FCC
+standards for telecommunications and computer equipment.
+
+It is recommended not to trust the TSCs to remain synchronized on NUMA or
+multiple socket systems for these reasons.
+
+3.4) TSC and C-states
+
+C-states, or idling states of the processor, especially C1E and deeper sleep
+states may be problematic for TSC as well. The TSC may stop advancing in such
+a state, resulting in a TSC which is behind that of other CPUs when execution
+is resumed. Such CPUs must be detected and flagged by the operating system
+based on CPU and chipset identifications.
+
+The TSC in such a case may be corrected by catching it up to a known external
+clocksource.
+
+3.5) TSC frequency change / P-states
+
+To make things slightly more interesting, some CPUs may change frequency. They
+may or may not run the TSC at the same rate, and because the frequency change
+may be staggered or slewed, at some points in time, the TSC rate may not be
+known other than falling within a range of values. In this case, the TSC will
+not be a stable time source, and must be calibrated against a known, stable,
+external clock to be a usable source of time.
+
+Whether the TSC runs at a constant rate or scales with the P-state is model
+dependent and must be determined by inspecting CPUID, chipset or vendor
+specific MSR fields.
+
+In addition, some vendors have known bugs where the P-state is actually
+compensated for properly during normal operation, but when the processor is
+inactive, the P-state may be raised temporarily to service cache misses from
+other processors. In such cases, the TSC on halted CPUs could advance faster
+than that of non-halted processors. AMD Turion processors are known to have
+this problem.
+
+3.6) TSC and STPCLK / T-states
+
+External signals given to the processor may also have the effect of stopping
+the TSC. This is typically done for thermal emergency power control to prevent
+an overheating condition, and typically, there is no way to detect that this
+condition has happened.
+
+3.7) TSC virtualization - VMX
+
+VMX provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
+instructions, which is enough for full virtualization of TSC in any manner. In
+addition, VMX allows passing through the host TSC plus an additional TSC_OFFSET
+field specified in the VMCS. Special instructions must be used to read and
+write the VMCS field.
+
+3.8) TSC virtualization - SVM
+
+SVM provides conditional trapping of RDTSC, RDMSR, WRMSR and RDTSCP
+instructions, which is enough for full virtualization of TSC in any manner. In
+addition, SVM allows passing through the host TSC plus an additional offset
+field specified in the SVM control block.
+
+3.9) TSC feature bits in Linux
+
+In summary, there is no way to guarantee the TSC remains in perfect
+synchronization unless it is explicitly guaranteed by the architecture. Even
+if so, the TSCs in multi-sockets or NUMA systems may still run independently
+despite being locally consistent.
+
+The following feature bits are used by Linux to signal various TSC attributes,
+but they can only be taken to be meaningful for UP or single node systems.
+
+X86_FEATURE_TSC : The TSC is available in hardware
+X86_FEATURE_RDTSCP : The RDTSCP instruction is available
+X86_FEATURE_CONSTANT_TSC : The TSC rate is unchanged with P-states
+X86_FEATURE_NONSTOP_TSC : The TSC does not stop in C-states
+X86_FEATURE_TSC_RELIABLE : TSC sync checks are skipped (VMware)
+
+4) Virtualization Problems
+
+Timekeeping is especially problematic for virtualization because a number of
+challenges arise. The most obvious problem is that time is now shared between
+the host and, potentially, a number of virtual machines. Thus the virtual
+operating system does not run with 100% usage of the CPU, despite the fact that
+it may very well make that assumption. It may expect it to remain true to very
+exacting bounds when interrupt sources are disabled, but in reality only its
+virtual interrupt sources are disabled, and the machine may still be preempted
+at any time. This causes problems as the passage of real time, the injection
+of machine interrupts and the associated clock sources are no longer completely
+synchronized with real time.
+
+This same problem can occur on native hardware to a degree, as SMM mode may
+steal cycles from the naturally on X86 systems when SMM mode is used by the
+BIOS, but not in such an extreme fashion. However, the fact that SMM mode may
+cause similar problems to virtualization makes it a good justification for
+solving many of these problems on bare metal.
+
+4.1) Interrupt clocking
+
+One of the most immediate problems that occurs with legacy operating systems
+is that the system timekeeping routines are often designed to keep track of
+time by counting periodic interrupts. These interrupts may come from the PIT
+or the RTC, but the problem is the same: the host virtualization engine may not
+be able to deliver the proper number of interrupts per second, and so guest
+time may fall behind. This is especially problematic if a high interrupt rate
+is selected, such as 1000 HZ, which is unfortunately the default for many Linux
+guests.
+
+There are three approaches to solving this problem; first, it may be possible
+to simply ignore it. Guests which have a separate time source for tracking
+'wall clock' or 'real time' may not need any adjustment of their interrupts to
+maintain proper time. If this is not sufficient, it may be necessary to inject
+additional interrupts into the guest in order to increase the effective
+interrupt rate. This approach leads to complications in extreme conditions,
+where host load or guest lag is too much to compensate for, and thus another
+solution to the problem has risen: the guest may need to become aware of lost
+ticks and compensate for them internally. Although promising in theory, the
+implementation of this policy in Linux has been extremely error prone, and a
+number of buggy variants of lost tick compensation are distributed across
+commonly used Linux systems.
+
+Windows uses periodic RTC clocking as a means of keeping time internally, and
+thus requires interrupt slewing to keep proper time. It does use a low enough
+rate (ed: is it 18.2 Hz?) however that it has not yet been a problem in
+practice.
+
+4.2) TSC sampling and serialization
+
+As the highest precision time source available, the cycle counter of the CPU
+has aroused much interest from developers. As explained above, this timer has
+many problems unique to its nature as a local, potentially unstable and
+potentially unsynchronized source. One issue which is not unique to the TSC,
+but is highlighted because of its very precise nature is sampling delay. By
+definition, the counter, once read is already old. However, it is also
+possible for the counter to be read ahead of the actual use of the result.
+This is a consequence of the superscalar execution of the instruction stream,
+which may execute instructions out of order. Such execution is called
+non-serialized. Forcing serialized execution is necessary for precise
+measurement with the TSC, and requires a serializing instruction, such as CPUID
+or an MSR read.
+
+Since CPUID may actually be virtualized by a trap and emulate mechanism, this
+serialization can pose a performance issue for hardware virtualization. An
+accurate time stamp counter reading may therefore not always be available, and
+it may be necessary for an implementation to guard against "backwards" reads of
+the TSC as seen from other CPUs, even in an otherwise perfectly synchronized
+system.
+
+4.3) Timespec aliasing
+
+Additionally, this lack of serialization from the TSC poses another challenge
+when using results of the TSC when measured against another time source. As
+the TSC is much higher precision, many possible values of the TSC may be read
+while another clock is still expressing the same value.
+
+That is, you may read (T,T+10) while external clock C maintains the same value.
+Due to non-serialized reads, you may actually end up with a range which
+fluctuates - from (T-1.. T+10). Thus, any time calculated from a TSC, but
+calibrated against an external value may have a range of valid values.
+Re-calibrating this computation may actually cause time, as computed after the
+calibration, to go backwards, compared with time computed before the
+calibration.
+
+This problem is particularly pronounced with an internal time source in Linux,
+the kernel time, which is expressed in the theoretically high resolution
+timespec - but which advances in much larger granularity intervals, sometimes
+at the rate of jiffies, and possibly in catchup modes, at a much larger step.
+
+This aliasing requires care in the computation and recalibration of kvmclock
+and any other values derived from TSC computation (such as TSC virtualization
+itself).
+
+4.4) Migration
+
+Migration of a virtual machine raises problems for timekeeping in two ways.
+First, the migration itself may take time, during which interrupts cannot be
+delivered, and after which, the guest time may need to be caught up. NTP may
+be able to help to some degree here, as the clock correction required is
+typically small enough to fall in the NTP-correctable window.
+
+An additional concern is that timers based off the TSC (or HPET, if the raw bus
+clock is exposed) may now be running at different rates, requiring compensation
+in some way in the hypervisor by virtualizing these timers. In addition,
+migrating to a faster machine may preclude the use of a passthrough TSC, as a
+faster clock cannot be made visible to a guest without the potential of time
+advancing faster than usual. A slower clock is less of a problem, as it can
+always be caught up to the original rate. KVM clock avoids these problems by
+simply storing multipliers and offsets against the TSC for the guest to convert
+back into nanosecond resolution values.
+
+4.5) Scheduling
+
+Since scheduling may be based on precise timing and firing of interrupts, the
+scheduling algorithms of an operating system may be adversely affected by
+virtualization. In theory, the effect is random and should be universally
+distributed, but in contrived as well as real scenarios (guest device access,
+causes of virtualization exits, possible context switch), this may not always
+be the case. The effect of this has not been well studied.
+
+In an attempt to work around this, several implementations have provided a
+paravirtualized scheduler clock, which reveals the true amount of CPU time for
+which a virtual machine has been running.
+
+4.6) Watchdogs
+
+Watchdog timers, such as the lock detector in Linux may fire accidentally when
+running under hardware virtualization due to timer interrupts being delayed or
+misinterpretation of the passage of real time. Usually, these warnings are
+spurious and can be ignored, but in some circumstances it may be necessary to
+disable such detection.
+
+4.7) Delays and precision timing
+
+Precise timing and delays may not be possible in a virtualized system. This
+can happen if the system is controlling physical hardware, or issues delays to
+compensate for slower I/O to and from devices. The first issue is not solvable
+in general for a virtualized system; hardware control software can't be
+adequately virtualized without a full real-time operating system, which would
+require an RT aware virtualization platform.
+
+The second issue may cause performance problems, but this is unlikely to be a
+significant issue. In many cases these delays may be eliminated through
+configuration or paravirtualization.
+
+4.8) Covert channels and leaks
+
+In addition to the above problems, time information will inevitably leak to the
+guest about the host in anything but a perfect implementation of virtualized
+time. This may allow the guest to infer the presence of a hypervisor (as in a
+red-pill type detection), and it may allow information to leak between guests
+by using CPU utilization itself as a signalling channel. Preventing such
+problems would require completely isolated virtual time which may not track
+real time any longer. This may be useful in certain security or QA contexts,
+but in general isn't recommended for real-world deployment scenarios.
diff --git a/Documentation/virtual/paravirt_ops.txt b/Documentation/virtual/paravirt_ops.txt
new file mode 100644
index 000000000..d4881c00e
--- /dev/null
+++ b/Documentation/virtual/paravirt_ops.txt
@@ -0,0 +1,32 @@
+Paravirt_ops
+============
+
+Linux provides support for different hypervisor virtualization technologies.
+Historically different binary kernels would be required in order to support
+different hypervisors, this restriction was removed with pv_ops.
+Linux pv_ops is a virtualization API which enables support for different
+hypervisors. It allows each hypervisor to override critical operations and
+allows a single kernel binary to run on all supported execution environments
+including native machine -- without any hypervisors.
+
+pv_ops provides a set of function pointers which represent operations
+corresponding to low level critical instructions and high level
+functionalities in various areas. pv-ops allows for optimizations at run
+time by enabling binary patching of the low-ops critical operations
+at boot time.
+
+pv_ops operations are classified into three categories:
+
+- simple indirect call
+ These operations correspond to high level functionality where it is
+ known that the overhead of indirect call isn't very important.
+
+- indirect call which allows optimization with binary patch
+ Usually these operations correspond to low level critical instructions. They
+ are called frequently and are performance critical. The overhead is
+ very important.
+
+- a set of macros for hand written assembly code
+ Hand written assembly codes (.S files) also need paravirtualization
+ because they include sensitive instructions or some of code paths in
+ them are very performance critical.
diff --git a/Documentation/virtual/uml/UserModeLinux-HOWTO.txt b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
new file mode 100644
index 000000000..f4099ca6b
--- /dev/null
+++ b/Documentation/virtual/uml/UserModeLinux-HOWTO.txt
@@ -0,0 +1,4589 @@
+ User Mode Linux HOWTO
+ User Mode Linux Core Team
+ Mon Nov 18 14:16:16 EST 2002
+
+ This document describes the use and abuse of Jeff Dike's User Mode
+ Linux: a port of the Linux kernel as a normal Intel Linux process.
+ ______________________________________________________________________
+
+ Table of Contents
+
+ 1. Introduction
+
+ 1.1 How is User Mode Linux Different?
+ 1.2 Why Would I Want User Mode Linux?
+
+ 2. Compiling the kernel and modules
+
+ 2.1 Compiling the kernel
+ 2.2 Compiling and installing kernel modules
+ 2.3 Compiling and installing uml_utilities
+
+ 3. Running UML and logging in
+
+ 3.1 Running UML
+ 3.2 Logging in
+ 3.3 Examples
+
+ 4. UML on 2G/2G hosts
+
+ 4.1 Introduction
+ 4.2 The problem
+ 4.3 The solution
+
+ 5. Setting up serial lines and consoles
+
+ 5.1 Specifying the device
+ 5.2 Specifying the channel
+ 5.3 Examples
+
+ 6. Setting up the network
+
+ 6.1 General setup
+ 6.2 Userspace daemons
+ 6.3 Specifying ethernet addresses
+ 6.4 UML interface setup
+ 6.5 Multicast
+ 6.6 TUN/TAP with the uml_net helper
+ 6.7 TUN/TAP with a preconfigured tap device
+ 6.8 Ethertap
+ 6.9 The switch daemon
+ 6.10 Slip
+ 6.11 Slirp
+ 6.12 pcap
+ 6.13 Setting up the host yourself
+
+ 7. Sharing Filesystems between Virtual Machines
+
+ 7.1 A warning
+ 7.2 Using layered block devices
+ 7.3 Note!
+ 7.4 Another warning
+ 7.5 uml_moo : Merging a COW file with its backing file
+
+ 8. Creating filesystems
+
+ 8.1 Create the filesystem file
+ 8.2 Assign the file to a UML device
+ 8.3 Creating and mounting the filesystem
+
+ 9. Host file access
+
+ 9.1 Using hostfs
+ 9.2 hostfs as the root filesystem
+ 9.3 Building hostfs
+
+ 10. The Management Console
+ 10.1 version
+ 10.2 halt and reboot
+ 10.3 config
+ 10.4 remove
+ 10.5 sysrq
+ 10.6 help
+ 10.7 cad
+ 10.8 stop
+ 10.9 go
+
+ 11. Kernel debugging
+
+ 11.1 Starting the kernel under gdb
+ 11.2 Examining sleeping processes
+ 11.3 Running ddd on UML
+ 11.4 Debugging modules
+ 11.5 Attaching gdb to the kernel
+ 11.6 Using alternate debuggers
+
+ 12. Kernel debugging examples
+
+ 12.1 The case of the hung fsck
+ 12.2 Episode 2: The case of the hung fsck
+
+ 13. What to do when UML doesn't work
+
+ 13.1 Strange compilation errors when you build from source
+ 13.2 (obsolete)
+ 13.3 A variety of panics and hangs with /tmp on a reiserfs filesystem
+ 13.4 The compile fails with errors about conflicting types for 'open', 'dup', and 'waitpid'
+ 13.5 UML doesn't work when /tmp is an NFS filesystem
+ 13.6 UML hangs on boot when compiled with gprof support
+ 13.7 syslogd dies with a SIGTERM on startup
+ 13.8 TUN/TAP networking doesn't work on a 2.4 host
+ 13.9 You can network to the host but not to other machines on the net
+ 13.10 I have no root and I want to scream
+ 13.11 UML build conflict between ptrace.h and ucontext.h
+ 13.12 The UML BogoMips is exactly half the host's BogoMips
+ 13.13 When you run UML, it immediately segfaults
+ 13.14 xterms appear, then immediately disappear
+ 13.15 Any other panic, hang, or strange behavior
+
+ 14. Diagnosing Problems
+
+ 14.1 Case 1 : Normal kernel panics
+ 14.2 Case 2 : Tracing thread panics
+ 14.3 Case 3 : Tracing thread panics caused by other threads
+ 14.4 Case 4 : Hangs
+
+ 15. Thanks
+
+ 15.1 Code and Documentation
+ 15.2 Flushing out bugs
+ 15.3 Buglets and clean-ups
+ 15.4 Case Studies
+ 15.5 Other contributions
+
+
+ ______________________________________________________________________
+
+ 1. Introduction
+
+ Welcome to User Mode Linux. It's going to be fun.
+
+
+
+ 1.1. How is User Mode Linux Different?
+
+ Normally, the Linux Kernel talks straight to your hardware (video
+ card, keyboard, hard drives, etc), and any programs which run ask the
+ kernel to operate the hardware, like so:
+
+
+
+ +-----------+-----------+----+
+ | Process 1 | Process 2 | ...|
+ +-----------+-----------+----+
+ | Linux Kernel |
+ +----------------------------+
+ | Hardware |
+ +----------------------------+
+
+
+
+
+ The User Mode Linux Kernel is different; instead of talking to the
+ hardware, it talks to a `real' Linux kernel (called the `host kernel'
+ from now on), like any other program. Programs can then run inside
+ User-Mode Linux as if they were running under a normal kernel, like
+ so:
+
+
+
+ +----------------+
+ | Process 2 | ...|
+ +-----------+----------------+
+ | Process 1 | User-Mode Linux|
+ +----------------------------+
+ | Linux Kernel |
+ +----------------------------+
+ | Hardware |
+ +----------------------------+
+
+
+
+
+
+ 1.2. Why Would I Want User Mode Linux?
+
+
+ 1. If User Mode Linux crashes, your host kernel is still fine.
+
+ 2. You can run a usermode kernel as a non-root user.
+
+ 3. You can debug the User Mode Linux like any normal process.
+
+ 4. You can run gprof (profiling) and gcov (coverage testing).
+
+ 5. You can play with your kernel without breaking things.
+
+ 6. You can use it as a sandbox for testing new apps.
+
+ 7. You can try new development kernels safely.
+
+ 8. You can run different distributions simultaneously.
+
+ 9. It's extremely fun.
+
+
+
+
+
+ 2. Compiling the kernel and modules
+
+
+
+
+ 2.1. Compiling the kernel
+
+
+ Compiling the user mode kernel is just like compiling any other
+ kernel. Let's go through the steps, using 2.4.0-prerelease (current
+ as of this writing) as an example:
+
+
+ 1. Download the latest UML patch from
+
+ the download page <http://user-mode-linux.sourceforge.net/
+
+ In this example, the file is uml-patch-2.4.0-prerelease.bz2.
+
+
+ 2. Download the matching kernel from your favourite kernel mirror,
+ such as:
+
+ ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2
+ <ftp://ftp.ca.kernel.org/pub/kernel/v2.4/linux-2.4.0-prerelease.tar.bz2>
+ .
+
+
+ 3. Make a directory and unpack the kernel into it.
+
+
+
+ host%
+ mkdir ~/uml
+
+
+
+
+
+
+ host%
+ cd ~/uml
+
+
+
+
+
+
+ host%
+ tar -xzvf linux-2.4.0-prerelease.tar.bz2
+
+
+
+
+
+
+ 4. Apply the patch using
+
+
+
+ host%
+ cd ~/uml/linux
+
+
+
+ host%
+ bzcat uml-patch-2.4.0-prerelease.bz2 | patch -p1
+
+
+
+
+
+
+ 5. Run your favorite config; `make xconfig ARCH=um' is the most
+ convenient. `make config ARCH=um' and 'make menuconfig ARCH=um'
+ will work as well. The defaults will give you a useful kernel. If
+ you want to change something, go ahead, it probably won't hurt
+ anything.
+
+
+ Note: If the host is configured with a 2G/2G address space split
+ rather than the usual 3G/1G split, then the packaged UML binaries
+ will not run. They will immediately segfault. See ``UML on 2G/2G
+ hosts'' for the scoop on running UML on your system.
+
+
+
+ 6. Finish with `make linux ARCH=um': the result is a file called
+ `linux' in the top directory of your source tree.
+
+ Make sure that you don't build this kernel in /usr/src/linux. On some
+ distributions, /usr/include/asm is a link into this pool. The user-
+ mode build changes the other end of that link, and things that include
+ <asm/anything.h> stop compiling.
+
+ The sources are also available from cvs at the project's cvs page,
+ which has directions on getting the sources. You can also browse the
+ CVS pool from there.
+
+ If you get the CVS sources, you will have to check them out into an
+ empty directory. You will then have to copy each file into the
+ corresponding directory in the appropriate kernel pool.
+
+ If you don't have the latest kernel pool, you can get the
+ corresponding user-mode sources with
+
+
+ host% cvs co -r v_2_3_x linux
+
+
+
+
+ where 'x' is the version in your pool. Note that you will not get the
+ bug fixes and enhancements that have gone into subsequent releases.
+
+
+ 2.2. Compiling and installing kernel modules
+
+ UML modules are built in the same way as the native kernel (with the
+ exception of the 'ARCH=um' that you always need for UML):
+
+
+ host% make modules ARCH=um
+
+
+
+
+ Any modules that you want to load into this kernel need to be built in
+ the user-mode pool. Modules from the native kernel won't work.
+
+ You can install them by using ftp or something to copy them into the
+ virtual machine and dropping them into /lib/modules/`uname -r`.
+
+ You can also get the kernel build process to install them as follows:
+
+ 1. with the kernel not booted, mount the root filesystem in the top
+ level of the kernel pool:
+
+
+ host% mount root_fs mnt -o loop
+
+
+
+
+
+
+ 2. run
+
+
+ host%
+ make modules_install INSTALL_MOD_PATH=`pwd`/mnt ARCH=um
+
+
+
+
+
+
+ 3. unmount the filesystem
+
+
+ host% umount mnt
+
+
+
+
+
+
+ 4. boot the kernel on it
+
+
+ When the system is booted, you can use insmod as usual to get the
+ modules into the kernel. A number of things have been loaded into UML
+ as modules, especially filesystems and network protocols and filters,
+ so most symbols which need to be exported probably already are.
+ However, if you do find symbols that need exporting, let us
+ <http://user-mode-linux.sourceforge.net/> know, and
+ they'll be "taken care of".
+
+
+
+ 2.3. Compiling and installing uml_utilities
+
+ Many features of the UML kernel require a user-space helper program,
+ so a uml_utilities package is distributed separately from the kernel
+ patch which provides these helpers. Included within this is:
+
+ o port-helper - Used by consoles which connect to xterms or ports
+
+ o tunctl - Configuration tool to create and delete tap devices
+
+ o uml_net - Setuid binary for automatic tap device configuration
+
+ o uml_switch - User-space virtual switch required for daemon
+ transport
+
+ The uml_utilities tree is compiled with:
+
+
+ host#
+ make && make install
+
+
+
+
+ Note that UML kernel patches may require a specific version of the
+ uml_utilities distribution. If you don't keep up with the mailing
+ lists, ensure that you have the latest release of uml_utilities if you
+ are experiencing problems with your UML kernel, particularly when
+ dealing with consoles or command-line switches to the helper programs
+
+
+
+
+
+
+
+
+ 3. Running UML and logging in
+
+
+
+ 3.1. Running UML
+
+ It runs on 2.2.15 or later, and all 2.4 kernels.
+
+
+ Booting UML is straightforward. Simply run 'linux': it will try to
+ mount the file `root_fs' in the current directory. You do not need to
+ run it as root. If your root filesystem is not named `root_fs', then
+ you need to put a `ubd0=root_fs_whatever' switch on the linux command
+ line.
+
+
+ You will need a filesystem to boot UML from. There are a number
+ available for download from here <http://user-mode-
+ linux.sourceforge.net/> . There are also several tools
+ <http://user-mode-linux.sourceforge.net/> which can be
+ used to generate UML-compatible filesystem images from media.
+ The kernel will boot up and present you with a login prompt.
+
+
+ Note: If the host is configured with a 2G/2G address space split
+ rather than the usual 3G/1G split, then the packaged UML binaries will
+ not run. They will immediately segfault. See ``UML on 2G/2G hosts''
+ for the scoop on running UML on your system.
+
+
+
+ 3.2. Logging in
+
+
+
+ The prepackaged filesystems have a root account with password 'root'
+ and a user account with password 'user'. The login banner will
+ generally tell you how to log in. So, you log in and you will find
+ yourself inside a little virtual machine. Our filesystems have a
+ variety of commands and utilities installed (and it is fairly easy to
+ add more), so you will have a lot of tools with which to poke around
+ the system.
+
+ There are a couple of other ways to log in:
+
+ o On a virtual console
+
+
+
+ Each virtual console that is configured (i.e. the device exists in
+ /dev and /etc/inittab runs a getty on it) will come up in its own
+ xterm. If you get tired of the xterms, read ``Setting up serial
+ lines and consoles'' to see how to attach the consoles to
+ something else, like host ptys.
+
+
+
+ o Over the serial line
+
+
+ In the boot output, find a line that looks like:
+
+
+
+ serial line 0 assigned pty /dev/ptyp1
+
+
+
+
+ Attach your favorite terminal program to the corresponding tty. I.e.
+ for minicom, the command would be
+
+
+ host% minicom -o -p /dev/ttyp1
+
+
+
+
+
+
+ o Over the net
+
+
+ If the network is running, then you can telnet to the virtual
+ machine and log in to it. See ``Setting up the network'' to learn
+ about setting up a virtual network.
+
+ When you're done using it, run halt, and the kernel will bring itself
+ down and the process will exit.
+
+
+ 3.3. Examples
+
+ Here are some examples of UML in action:
+
+ o A login session <http://user-mode-linux.sourceforge.net/login.html>
+
+ o A virtual network <http://user-mode-linux.sourceforge.net/net.html>
+
+
+
+
+
+
+
+ 4. UML on 2G/2G hosts
+
+
+
+
+ 4.1. Introduction
+
+
+ Most Linux machines are configured so that the kernel occupies the
+ upper 1G (0xc0000000 - 0xffffffff) of the 4G address space and
+ processes use the lower 3G (0x00000000 - 0xbfffffff). However, some
+ machine are configured with a 2G/2G split, with the kernel occupying
+ the upper 2G (0x80000000 - 0xffffffff) and processes using the lower
+ 2G (0x00000000 - 0x7fffffff).
+
+
+
+
+ 4.2. The problem
+
+
+ The prebuilt UML binaries on this site will not run on 2G/2G hosts
+ because UML occupies the upper .5G of the 3G process address space
+ (0xa0000000 - 0xbfffffff). Obviously, on 2G/2G hosts, this is right
+ in the middle of the kernel address space, so UML won't even load - it
+ will immediately segfault.
+
+
+
+
+ 4.3. The solution
+
+
+ The fix for this is to rebuild UML from source after enabling
+ CONFIG_HOST_2G_2G (under 'General Setup'). This will cause UML to
+ load itself in the top .5G of that smaller process address space,
+ where it will run fine. See ``Compiling the kernel and modules'' if
+ you need help building UML from source.
+
+
+
+
+
+
+
+
+
+
+ 5. Setting up serial lines and consoles
+
+
+ It is possible to attach UML serial lines and consoles to many types
+ of host I/O channels by specifying them on the command line.
+
+
+ You can attach them to host ptys, ttys, file descriptors, and ports.
+ This allows you to do things like
+
+ o have a UML console appear on an unused host console,
+
+ o hook two virtual machines together by having one attach to a pty
+ and having the other attach to the corresponding tty
+
+ o make a virtual machine accessible from the net by attaching a
+ console to a port on the host.
+
+
+ The general format of the command line option is device=channel.
+
+
+
+ 5.1. Specifying the device
+
+ Devices are specified with "con" or "ssl" (console or serial line,
+ respectively), optionally with a device number if you are talking
+ about a specific device.
+
+
+ Using just "con" or "ssl" describes all of the consoles or serial
+ lines. If you want to talk about console #3 or serial line #10, they
+ would be "con3" and "ssl10", respectively.
+
+
+ A specific device name will override a less general "con=" or "ssl=".
+ So, for example, you can assign a pty to each of the serial lines
+ except for the first two like this:
+
+
+ ssl=pty ssl0=tty:/dev/tty0 ssl1=tty:/dev/tty1
+
+
+
+
+ The specificity of the device name is all that matters; order on the
+ command line is irrelevant.
+
+
+
+ 5.2. Specifying the channel
+
+ There are a number of different types of channels to attach a UML
+ device to, each with a different way of specifying exactly what to
+ attach to.
+
+ o pseudo-terminals - device=pty pts terminals - device=pts
+
+
+ This will cause UML to allocate a free host pseudo-terminal for the
+ device. The terminal that it got will be announced in the boot
+ log. You access it by attaching a terminal program to the
+ corresponding tty:
+
+ o screen /dev/pts/n
+
+ o screen /dev/ttyxx
+
+ o minicom -o -p /dev/ttyxx - minicom seems not able to handle pts
+ devices
+
+ o kermit - start it up, 'open' the device, then 'connect'
+
+
+
+
+
+ o terminals - device=tty:tty device file
+
+
+ This will make UML attach the device to the specified tty (i.e
+
+
+ con1=tty:/dev/tty3
+
+
+
+
+ will attach UML's console 1 to the host's /dev/tty3). If the tty that
+ you specify is the slave end of a tty/pty pair, something else must
+ have already opened the corresponding pty in order for this to work.
+
+
+
+
+
+ o xterms - device=xterm
+
+
+ UML will run an xterm and the device will be attached to it.
+
+
+
+
+
+ o Port - device=port:port number
+
+
+ This will attach the UML devices to the specified host port.
+ Attaching console 1 to the host's port 9000 would be done like
+ this:
+
+
+ con1=port:9000
+
+
+
+
+ Attaching all the serial lines to that port would be done similarly:
+
+
+ ssl=port:9000
+
+
+
+
+ You access these devices by telnetting to that port. Each active tel-
+ net session gets a different device. If there are more telnets to a
+ port than UML devices attached to it, then the extra telnet sessions
+ will block until an existing telnet detaches, or until another device
+ becomes active (i.e. by being activated in /etc/inittab).
+
+ This channel has the advantage that you can both attach multiple UML
+ devices to it and know how to access them without reading the UML boot
+ log. It is also unique in allowing access to a UML from remote
+ machines without requiring that the UML be networked. This could be
+ useful in allowing public access to UMLs because they would be
+ accessible from the net, but wouldn't need any kind of network
+ filtering or access control because they would have no network access.
+
+
+ If you attach the main console to a portal, then the UML boot will
+ appear to hang. In reality, it's waiting for a telnet to connect, at
+ which point the boot will proceed.
+
+
+
+
+
+ o already-existing file descriptors - device=file descriptor
+
+
+ If you set up a file descriptor on the UML command line, you can
+ attach a UML device to it. This is most commonly used to put the
+ main console back on stdin and stdout after assigning all the other
+ consoles to something else:
+
+
+ con0=fd:0,fd:1 con=pts
+
+
+
+
+
+
+
+
+ o Nothing - device=null
+
+
+ This allows the device to be opened, in contrast to 'none', but
+ reads will block, and writes will succeed and the data will be
+ thrown out.
+
+
+
+
+
+ o None - device=none
+
+
+ This causes the device to disappear.
+
+
+
+ You can also specify different input and output channels for a device
+ by putting a comma between them:
+
+
+ ssl3=tty:/dev/tty2,xterm
+
+
+
+
+ will cause serial line 3 to accept input on the host's /dev/tty2 and
+ display output on an xterm. That's a silly example - the most common
+ use of this syntax is to reattach the main console to stdin and stdout
+ as shown above.
+
+
+ If you decide to move the main console away from stdin/stdout, the
+ initial boot output will appear in the terminal that you're running
+ UML in. However, once the console driver has been officially
+ initialized, then the boot output will start appearing wherever you
+ specified that console 0 should be. That device will receive all
+ subsequent output.
+
+
+
+ 5.3. Examples
+
+ There are a number of interesting things you can do with this
+ capability.
+
+
+ First, this is how you get rid of those bleeding console xterms by
+ attaching them to host ptys:
+
+
+ con=pty con0=fd:0,fd:1
+
+
+
+
+ This will make a UML console take over an unused host virtual console,
+ so that when you switch to it, you will see the UML login prompt
+ rather than the host login prompt:
+
+
+ con1=tty:/dev/tty6
+
+
+
+
+ You can attach two virtual machines together with what amounts to a
+ serial line as follows:
+
+ Run one UML with a serial line attached to a pty -
+
+
+ ssl1=pty
+
+
+
+
+ Look at the boot log to see what pty it got (this example will assume
+ that it got /dev/ptyp1).
+
+ Boot the other UML with a serial line attached to the corresponding
+ tty -
+
+
+ ssl1=tty:/dev/ttyp1
+
+
+
+
+ Log in, make sure that it has no getty on that serial line, attach a
+ terminal program like minicom to it, and you should see the login
+ prompt of the other virtual machine.
+
+
+ 6. Setting up the network
+
+
+
+ This page describes how to set up the various transports and to
+ provide a UML instance with network access to the host, other machines
+ on the local net, and the rest of the net.
+
+
+ As of 2.4.5, UML networking has been completely redone to make it much
+ easier to set up, fix bugs, and add new features.
+
+
+ There is a new helper, uml_net, which does the host setup that
+ requires root privileges.
+
+
+ There are currently five transport types available for a UML virtual
+ machine to exchange packets with other hosts:
+
+ o ethertap
+
+ o TUN/TAP
+
+ o Multicast
+
+ o a switch daemon
+
+ o slip
+
+ o slirp
+
+ o pcap
+
+ The TUN/TAP, ethertap, slip, and slirp transports allow a UML
+ instance to exchange packets with the host. They may be directed
+ to the host or the host may just act as a router to provide access
+ to other physical or virtual machines.
+
+
+ The pcap transport is a synthetic read-only interface, using the
+ libpcap binary to collect packets from interfaces on the host and
+ filter them. This is useful for building preconfigured traffic
+ monitors or sniffers.
+
+
+ The daemon and multicast transports provide a completely virtual
+ network to other virtual machines. This network is completely
+ disconnected from the physical network unless one of the virtual
+ machines on it is acting as a gateway.
+
+
+ With so many host transports, which one should you use? Here's when
+ you should use each one:
+
+ o ethertap - if you want access to the host networking and it is
+ running 2.2
+
+ o TUN/TAP - if you want access to the host networking and it is
+ running 2.4. Also, the TUN/TAP transport is able to use a
+ preconfigured device, allowing it to avoid using the setuid uml_net
+ helper, which is a security advantage.
+
+ o Multicast - if you want a purely virtual network and you don't want
+ to set up anything but the UML
+
+ o a switch daemon - if you want a purely virtual network and you
+ don't mind running the daemon in order to get somewhat better
+ performance
+
+ o slip - there is no particular reason to run the slip backend unless
+ ethertap and TUN/TAP are just not available for some reason
+
+ o slirp - if you don't have root access on the host to setup
+ networking, or if you don't want to allocate an IP to your UML
+
+ o pcap - not much use for actual network connectivity, but great for
+ monitoring traffic on the host
+
+ Ethertap is available on 2.4 and works fine. TUN/TAP is preferred
+ to it because it has better performance and ethertap is officially
+ considered obsolete in 2.4. Also, the root helper only needs to
+ run occasionally for TUN/TAP, rather than handling every packet, as
+ it does with ethertap. This is a slight security advantage since
+ it provides fewer opportunities for a nasty UML user to somehow
+ exploit the helper's root privileges.
+
+
+ 6.1. General setup
+
+ First, you must have the virtual network enabled in your UML. If are
+ running a prebuilt kernel from this site, everything is already
+ enabled. If you build the kernel yourself, under the "Network device
+ support" menu, enable "Network device support", and then the three
+ transports.
+
+
+ The next step is to provide a network device to the virtual machine.
+ This is done by describing it on the kernel command line.
+
+ The general format is
+
+
+ eth <n> = <transport> , <transport args>
+
+
+
+
+ For example, a virtual ethernet device may be attached to a host
+ ethertap device as follows:
+
+
+ eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
+
+
+
+
+ This sets up eth0 inside the virtual machine to attach itself to the
+ host /dev/tap0, assigns it an ethernet address, and assigns the host
+ tap0 interface an IP address.
+
+
+
+ Note that the IP address you assign to the host end of the tap device
+ must be different than the IP you assign to the eth device inside UML.
+ If you are short on IPs and don't want to consume two per UML, then
+ you can reuse the host's eth IP address for the host ends of the tap
+ devices. Internally, the UMLs must still get unique IPs for their eth
+ devices. You can also give the UMLs non-routable IPs (192.168.x.x or
+ 10.x.x.x) and have the host masquerade them. This will let outgoing
+ connections work, but incoming connections won't without more work,
+ such as port forwarding from the host.
+ Also note that when you configure the host side of an interface, it is
+ only acting as a gateway. It will respond to pings sent to it
+ locally, but is not useful to do that since it's a host interface.
+ You are not talking to the UML when you ping that interface and get a
+ response.
+
+
+ You can also add devices to a UML and remove them at runtime. See the
+ ``The Management Console'' page for details.
+
+
+ The sections below describe this in more detail.
+
+
+ Once you've decided how you're going to set up the devices, you boot
+ UML, log in, configure the UML side of the devices, and set up routes
+ to the outside world. At that point, you will be able to talk to any
+ other machines, physical or virtual, on the net.
+
+
+ If ifconfig inside UML fails and the network refuses to come up, run
+ tell you what went wrong.
+
+
+
+ 6.2. Userspace daemons
+
+ You will likely need the setuid helper, or the switch daemon, or both.
+ They are both installed with the RPM and deb, so if you've installed
+ either, you can skip the rest of this section.
+
+
+ If not, then you need to check them out of CVS, build them, and
+ install them. The helper is uml_net, in CVS /tools/uml_net, and the
+ daemon is uml_switch, in CVS /tools/uml_router. They are both built
+ with a plain 'make'. Both need to be installed in a directory that's
+ in your path - /usr/bin is recommend. On top of that, uml_net needs
+ to be setuid root.
+
+
+
+ 6.3. Specifying ethernet addresses
+
+ Below, you will see that the TUN/TAP, ethertap, and daemon interfaces
+ allow you to specify hardware addresses for the virtual ethernet
+ devices. This is generally not necessary. If you don't have a
+ specific reason to do it, you probably shouldn't. If one is not
+ specified on the command line, the driver will assign one based on the
+ device IP address. It will provide the address fe:fd:nn:nn:nn:nn
+ where nn.nn.nn.nn is the device IP address. This is nearly always
+ sufficient to guarantee a unique hardware address for the device. A
+ couple of exceptions are:
+
+ o Another set of virtual ethernet devices are on the same network and
+ they are assigned hardware addresses using a different scheme which
+ may conflict with the UML IP address-based scheme
+
+ o You aren't going to use the device for IP networking, so you don't
+ assign the device an IP address
+
+ If you let the driver provide the hardware address, you should make
+ sure that the device IP address is known before the interface is
+ brought up. So, inside UML, this will guarantee that:
+
+
+
+ UML#
+ ifconfig eth0 192.168.0.250 up
+
+
+
+
+ If you decide to assign the hardware address yourself, make sure that
+ the first byte of the address is even. Addresses with an odd first
+ byte are broadcast addresses, which you don't want assigned to a
+ device.
+
+
+
+ 6.4. UML interface setup
+
+ Once the network devices have been described on the command line, you
+ should boot UML and log in.
+
+
+ The first thing to do is bring the interface up:
+
+
+ UML# ifconfig ethn ip-address up
+
+
+
+
+ You should be able to ping the host at this point.
+
+
+ To reach the rest of the world, you should set a default route to the
+ host:
+
+
+ UML# route add default gw host ip
+
+
+
+
+ Again, with host ip of 192.168.0.4:
+
+
+ UML# route add default gw 192.168.0.4
+
+
+
+
+ This page used to recommend setting a network route to your local net.
+ This is wrong, because it will cause UML to try to figure out hardware
+ addresses of the local machines by arping on the interface to the
+ host. Since that interface is basically a single strand of ethernet
+ with two nodes on it (UML and the host) and arp requests don't cross
+ networks, they will fail to elicit any responses. So, what you want
+ is for UML to just blindly throw all packets at the host and let it
+ figure out what to do with them, which is what leaving out the network
+ route and adding the default route does.
+
+
+ Note: If you can't communicate with other hosts on your physical
+ ethernet, it's probably because of a network route that's
+ automatically set up. If you run 'route -n' and see a route that
+ looks like this:
+
+
+
+
+ Destination Gateway Genmask Flags Metric Ref Use Iface
+ 192.168.0.0 0.0.0.0 255.255.255.0 U 0 0 0 eth0
+
+
+
+
+ with a mask that's not 255.255.255.255, then replace it with a route
+ to your host:
+
+
+ UML#
+ route del -net 192.168.0.0 dev eth0 netmask 255.255.255.0
+
+
+
+
+
+
+ UML#
+ route add -host 192.168.0.4 dev eth0
+
+
+
+
+ This, plus the default route to the host, will allow UML to exchange
+ packets with any machine on your ethernet.
+
+
+
+ 6.5. Multicast
+
+ The simplest way to set up a virtual network between multiple UMLs is
+ to use the mcast transport. This was written by Harald Welte and is
+ present in UML version 2.4.5-5um and later. Your system must have
+ multicast enabled in the kernel and there must be a multicast-capable
+ network device on the host. Normally, this is eth0, but if there is
+ no ethernet card on the host, then you will likely get strange error
+ messages when you bring the device up inside UML.
+
+
+ To use it, run two UMLs with
+
+
+ eth0=mcast
+
+
+
+
+ on their command lines. Log in, configure the ethernet device in each
+ machine with different IP addresses:
+
+
+ UML1# ifconfig eth0 192.168.0.254
+
+
+
+
+
+
+ UML2# ifconfig eth0 192.168.0.253
+
+
+
+
+ and they should be able to talk to each other.
+
+ The full set of command line options for this transport are
+
+
+
+ ethn=mcast,ethernet address,multicast
+ address,multicast port,ttl
+
+
+
+
+ Harald's original README is here <http://user-mode-linux.source-
+ forge.net/> and explains these in detail, as well as
+ some other issues.
+
+ There is also a related point-to-point only "ucast" transport.
+ This is useful when your network does not support multicast, and
+ all network connections are simple point to point links.
+
+ The full set of command line options for this transport are
+
+
+ ethn=ucast,ethernet address,remote address,listen port,remote port
+
+
+
+
+ 6.6. TUN/TAP with the uml_net helper
+
+ TUN/TAP is the preferred mechanism on 2.4 to exchange packets with the
+ host. The TUN/TAP backend has been in UML since 2.4.9-3um.
+
+
+ The easiest way to get up and running is to let the setuid uml_net
+ helper do the host setup for you. This involves insmod-ing the tun.o
+ module if necessary, configuring the device, and setting up IP
+ forwarding, routing, and proxy arp. If you are new to UML networking,
+ do this first. If you're concerned about the security implications of
+ the setuid helper, use it to get up and running, then read the next
+ section to see how to have UML use a preconfigured tap device, which
+ avoids the use of uml_net.
+
+
+ If you specify an IP address for the host side of the device, the
+ uml_net helper will do all necessary setup on the host - the only
+ requirement is that TUN/TAP be available, either built in to the host
+ kernel or as the tun.o module.
+
+ The format of the command line switch to attach a device to a TUN/TAP
+ device is
+
+
+ eth <n> =tuntap,,, <IP address>
+
+
+
+
+ For example, this argument will attach the UML's eth0 to the next
+ available tap device and assign an ethernet address to it based on its
+ IP address
+
+
+ eth0=tuntap,,,192.168.0.254
+
+
+
+
+
+
+ Note that the IP address that must be used for the eth device inside
+ UML is fixed by the routing and proxy arp that is set up on the
+ TUN/TAP device on the host. You can use a different one, but it won't
+ work because reply packets won't reach the UML. This is a feature.
+ It prevents a nasty UML user from doing things like setting the UML IP
+ to the same as the network's nameserver or mail server.
+
+
+ There are a couple potential problems with running the TUN/TAP
+ transport on a 2.4 host kernel
+
+ o TUN/TAP seems not to work on 2.4.3 and earlier. Upgrade the host
+ kernel or use the ethertap transport.
+
+ o With an upgraded kernel, TUN/TAP may fail with
+
+
+ File descriptor in bad state
+
+
+
+
+ This is due to a header mismatch between the upgraded kernel and the
+ kernel that was originally installed on the machine. The fix is to
+ make sure that /usr/src/linux points to the headers for the running
+ kernel.
+
+ These were pointed out by Tim Robinson <timro at trkr dot net> in
+ <http://www.geocrawler.com/> name="this uml-
+ user post"> .
+
+
+
+ 6.7. TUN/TAP with a preconfigured tap device
+
+ If you prefer not to have UML use uml_net (which is somewhat
+ insecure), with UML 2.4.17-11, you can set up a TUN/TAP device
+ beforehand. The setup needs to be done as root, but once that's done,
+ there is no need for root assistance. Setting up the device is done
+ as follows:
+
+ o Create the device with tunctl (available from the UML utilities
+ tarball)
+
+
+
+
+ host# tunctl -u uid
+
+
+
+
+ where uid is the user id or username that UML will be run as. This
+ will tell you what device was created.
+
+ o Configure the device IP (change IP addresses and device name to
+ suit)
+
+
+
+
+ host# ifconfig tap0 192.168.0.254 up
+
+
+
+
+
+ o Set up routing and arping if desired - this is my recipe, there are
+ other ways of doing the same thing
+
+
+ host#
+ bash -c 'echo 1 > /proc/sys/net/ipv4/ip_forward'
+
+ host#
+ route add -host 192.168.0.253 dev tap0
+
+
+
+
+
+
+ host#
+ bash -c 'echo 1 > /proc/sys/net/ipv4/conf/tap0/proxy_arp'
+
+
+
+
+
+
+ host#
+ arp -Ds 192.168.0.253 eth0 pub
+
+
+
+
+ Note that this must be done every time the host boots - this configu-
+ ration is not stored across host reboots. So, it's probably a good
+ idea to stick it in an rc file. An even better idea would be a little
+ utility which reads the information from a config file and sets up
+ devices at boot time.
+
+ o Rather than using up two IPs and ARPing for one of them, you can
+ also provide direct access to your LAN by the UML by using a
+ bridge.
+
+
+ host#
+ brctl addbr br0
+
+
+
+
+
+
+ host#
+ ifconfig eth0 0.0.0.0 promisc up
+
+
+
+
+
+
+ host#
+ ifconfig tap0 0.0.0.0 promisc up
+
+
+
+
+
+
+ host#
+ ifconfig br0 192.168.0.1 netmask 255.255.255.0 up
+
+
+
+
+
+
+
+ host#
+ brctl stp br0 off
+
+
+
+
+
+
+ host#
+ brctl setfd br0 1
+
+
+
+
+
+
+ host#
+ brctl sethello br0 1
+
+
+
+
+
+
+ host#
+ brctl addif br0 eth0
+
+
+
+
+
+
+ host#
+ brctl addif br0 tap0
+
+
+
+
+ Note that 'br0' should be setup using ifconfig with the existing IP
+ address of eth0, as eth0 no longer has its own IP.
+
+ o
+
+
+ Also, the /dev/net/tun device must be writable by the user running
+ UML in order for the UML to use the device that's been configured
+ for it. The simplest thing to do is
+
+
+ host# chmod 666 /dev/net/tun
+
+
+
+
+ Making it world-writable looks bad, but it seems not to be
+ exploitable as a security hole. However, it does allow anyone to cre-
+ ate useless tap devices (useless because they can't configure them),
+ which is a DOS attack. A somewhat more secure alternative would to be
+ to create a group containing all the users who have preconfigured tap
+ devices and chgrp /dev/net/tun to that group with mode 664 or 660.
+
+
+ o Once the device is set up, run UML with 'eth0=tuntap,device name'
+ (i.e. 'eth0=tuntap,tap0') on the command line (or do it with the
+ mconsole config command).
+
+ o Bring the eth device up in UML and you're in business.
+
+ If you don't want that tap device any more, you can make it non-
+ persistent with
+
+
+ host# tunctl -d tap device
+
+
+
+
+ Finally, tunctl has a -b (for brief mode) switch which causes it to
+ output only the name of the tap device it created. This makes it
+ suitable for capture by a script:
+
+
+ host# TAP=`tunctl -u 1000 -b`
+
+
+
+
+
+
+ 6.8. Ethertap
+
+ Ethertap is the general mechanism on 2.2 for userspace processes to
+ exchange packets with the kernel.
+
+
+
+ To use this transport, you need to describe the virtual network device
+ on the UML command line. The general format for this is
+
+
+ eth <n> =ethertap, <device> , <ethernet address> , <tap IP address>
+
+
+
+
+ So, the previous example
+
+
+ eth0=ethertap,tap0,fe:fd:0:0:0:1,192.168.0.254
+
+
+
+
+ attaches the UML eth0 device to the host /dev/tap0, assigns it the
+ ethernet address fe:fd:0:0:0:1, and assigns the IP address
+ 192.168.0.254 to the tap device.
+
+
+
+ The tap device is mandatory, but the others are optional. If the
+ ethernet address is omitted, one will be assigned to it.
+
+
+ The presence of the tap IP address will cause the helper to run and do
+ whatever host setup is needed to allow the virtual machine to
+ communicate with the outside world. If you're not sure you know what
+ you're doing, this is the way to go.
+
+
+ If it is absent, then you must configure the tap device and whatever
+ arping and routing you will need on the host. However, even in this
+ case, the uml_net helper still needs to be in your path and it must be
+ setuid root if you're not running UML as root. This is because the
+ tap device doesn't support SIGIO, which UML needs in order to use
+ something as a source of input. So, the helper is used as a
+ convenient asynchronous IO thread.
+
+ If you're using the uml_net helper, you can ignore the following host
+ setup - uml_net will do it for you. You just need to make sure you
+ have ethertap available, either built in to the host kernel or
+ available as a module.
+
+
+ If you want to set things up yourself, you need to make sure that the
+ appropriate /dev entry exists. If it doesn't, become root and create
+ it as follows:
+
+
+ mknod /dev/tap <minor> c 36 <minor> + 16
+
+
+
+
+ For example, this is how to create /dev/tap0:
+
+
+ mknod /dev/tap0 c 36 0 + 16
+
+
+
+
+ You also need to make sure that the host kernel has ethertap support.
+ If ethertap is enabled as a module, you apparently need to insmod
+ ethertap once for each ethertap device you want to enable. So,
+
+
+ host#
+ insmod ethertap
+
+
+
+
+ will give you the tap0 interface. To get the tap1 interface, you need
+ to run
+
+
+ host#
+ insmod ethertap unit=1 -o ethertap1
+
+
+
+
+
+
+
+ 6.9. The switch daemon
+
+ Note: This is the daemon formerly known as uml_router, but which was
+ renamed so the network weenies of the world would stop growling at me.
+
+
+ The switch daemon, uml_switch, provides a mechanism for creating a
+ totally virtual network. By default, it provides no connection to the
+ host network (but see -tap, below).
+
+
+ The first thing you need to do is run the daemon. Running it with no
+ arguments will make it listen on a default pair of unix domain
+ sockets.
+
+
+ If you want it to listen on a different pair of sockets, use
+
+
+ -unix control socket data socket
+
+
+
+
+
+ If you want it to act as a hub rather than a switch, use
+
+
+ -hub
+
+
+
+
+
+ If you want the switch to be connected to host networking (allowing
+ the umls to get access to the outside world through the host), use
+
+
+ -tap tap0
+
+
+
+
+
+ Note that the tap device must be preconfigured (see "TUN/TAP with a
+ preconfigured tap device", above). If you're using a different tap
+ device than tap0, specify that instead of tap0.
+
+
+ uml_switch can be backgrounded as follows
+
+
+ host%
+ uml_switch [ options ] < /dev/null > /dev/null
+
+
+
+
+ The reason it doesn't background by default is that it listens to
+ stdin for EOF. When it sees that, it exits.
+
+
+ The general format of the kernel command line switch is
+
+
+
+ ethn=daemon,ethernet address,socket
+ type,control socket,data socket
+
+
+
+
+ You can leave off everything except the 'daemon'. You only need to
+ specify the ethernet address if the one that will be assigned to it
+ isn't acceptable for some reason. The rest of the arguments describe
+ how to communicate with the daemon. You should only specify them if
+ you told the daemon to use different sockets than the default. So, if
+ you ran the daemon with no arguments, running the UML on the same
+ machine with
+ eth0=daemon
+
+
+
+
+ will cause the eth0 driver to attach itself to the daemon correctly.
+
+
+
+ 6.10. Slip
+
+ Slip is another, less general, mechanism for a process to communicate
+ with the host networking. In contrast to the ethertap interface,
+ which exchanges ethernet frames with the host and can be used to
+ transport any higher-level protocol, it can only be used to transport
+ IP.
+
+
+ The general format of the command line switch is
+
+
+
+ ethn=slip,slip IP
+
+
+
+
+ The slip IP argument is the IP address that will be assigned to the
+ host end of the slip device. If it is specified, the helper will run
+ and will set up the host so that the virtual machine can reach it and
+ the rest of the network.
+
+
+ There are some oddities with this interface that you should be aware
+ of. You should only specify one slip device on a given virtual
+ machine, and its name inside UML will be 'umn', not 'eth0' or whatever
+ you specified on the command line. These problems will be fixed at
+ some point.
+
+
+
+ 6.11. Slirp
+
+ slirp uses an external program, usually /usr/bin/slirp, to provide IP
+ only networking connectivity through the host. This is similar to IP
+ masquerading with a firewall, although the translation is performed in
+ user-space, rather than by the kernel. As slirp does not set up any
+ interfaces on the host, or changes routing, slirp does not require
+ root access or setuid binaries on the host.
+
+
+ The general format of the command line switch for slirp is:
+
+
+
+ ethn=slirp,ethernet address,slirp path
+
+
+
+
+ The ethernet address is optional, as UML will set up the interface
+ with an ethernet address based upon the initial IP address of the
+ interface. The slirp path is generally /usr/bin/slirp, although it
+ will depend on distribution.
+
+
+ The slirp program can have a number of options passed to the command
+ line and we can't add them to the UML command line, as they will be
+ parsed incorrectly. Instead, a wrapper shell script can be written or
+ the options inserted into the /.slirprc file. More information on
+ all of the slirp options can be found in its man pages.
+
+
+ The eth0 interface on UML should be set up with the IP 10.2.0.15,
+ although you can use anything as long as it is not used by a network
+ you will be connecting to. The default route on UML should be set to
+ use
+
+
+ UML#
+ route add default dev eth0
+
+
+
+
+ slirp provides a number of useful IP addresses which can be used by
+ UML, such as 10.0.2.3 which is an alias for the DNS server specified
+ in /etc/resolv.conf on the host or the IP given in the 'dns' option
+ for slirp.
+
+
+ Even with a baudrate setting higher than 115200, the slirp connection
+ is limited to 115200. If you need it to go faster, the slirp binary
+ needs to be compiled with FULL_BOLT defined in config.h.
+
+
+
+ 6.12. pcap
+
+ The pcap transport is attached to a UML ethernet device on the command
+ line or with uml_mconsole with the following syntax:
+
+
+
+ ethn=pcap,host interface,filter
+ expression,option1,option2
+
+
+
+
+ The expression and options are optional.
+
+
+ The interface is whatever network device on the host you want to
+ sniff. The expression is a pcap filter expression, which is also what
+ tcpdump uses, so if you know how to specify tcpdump filters, you will
+ use the same expressions here. The options are up to two of
+ 'promisc', control whether pcap puts the host interface into
+ promiscuous mode. 'optimize' and 'nooptimize' control whether the pcap
+ expression optimizer is used.
+
+
+ Example:
+
+
+
+ eth0=pcap,eth0,tcp
+
+ eth1=pcap,eth0,!tcp
+
+
+
+ will cause the UML eth0 to emit all tcp packets on the host eth0 and
+ the UML eth1 to emit all non-tcp packets on the host eth0.
+
+
+
+ 6.13. Setting up the host yourself
+
+ If you don't specify an address for the host side of the ethertap or
+ slip device, UML won't do any setup on the host. So this is what is
+ needed to get things working (the examples use a host-side IP of
+ 192.168.0.251 and a UML-side IP of 192.168.0.250 - adjust to suit your
+ own network):
+
+ o The device needs to be configured with its IP address. Tap devices
+ are also configured with an mtu of 1484. Slip devices are
+ configured with a point-to-point address pointing at the UML ip
+ address.
+
+
+ host# ifconfig tap0 arp mtu 1484 192.168.0.251 up
+
+
+
+
+
+
+ host#
+ ifconfig sl0 192.168.0.251 pointopoint 192.168.0.250 up
+
+
+
+
+
+ o If a tap device is being set up, a route is set to the UML IP.
+
+
+ UML# route add -host 192.168.0.250 gw 192.168.0.251
+
+
+
+
+
+ o To allow other hosts on your network to see the virtual machine,
+ proxy arp is set up for it.
+
+
+ host# arp -Ds 192.168.0.250 eth0 pub
+
+
+
+
+
+ o Finally, the host is set up to route packets.
+
+
+ host# echo 1 > /proc/sys/net/ipv4/ip_forward
+
+
+
+
+
+
+
+
+
+
+ 7. Sharing Filesystems between Virtual Machines
+
+
+
+
+ 7.1. A warning
+
+ Don't attempt to share filesystems simply by booting two UMLs from the
+ same file. That's the same thing as booting two physical machines
+ from a shared disk. It will result in filesystem corruption.
+
+
+
+ 7.2. Using layered block devices
+
+ The way to share a filesystem between two virtual machines is to use
+ the copy-on-write (COW) layering capability of the ubd block driver.
+ As of 2.4.6-2um, the driver supports layering a read-write private
+ device over a read-only shared device. A machine's writes are stored
+ in the private device, while reads come from either device - the
+ private one if the requested block is valid in it, the shared one if
+ not. Using this scheme, the majority of data which is unchanged is
+ shared between an arbitrary number of virtual machines, each of which
+ has a much smaller file containing the changes that it has made. With
+ a large number of UMLs booting from a large root filesystem, this
+ leads to a huge disk space saving. It will also help performance,
+ since the host will be able to cache the shared data using a much
+ smaller amount of memory, so UML disk requests will be served from the
+ host's memory rather than its disks.
+
+
+
+
+ To add a copy-on-write layer to an existing block device file, simply
+ add the name of the COW file to the appropriate ubd switch:
+
+
+ ubd0=root_fs_cow,root_fs_debian_22
+
+
+
+
+ where 'root_fs_cow' is the private COW file and 'root_fs_debian_22' is
+ the existing shared filesystem. The COW file need not exist. If it
+ doesn't, the driver will create and initialize it. Once the COW file
+ has been initialized, it can be used on its own on the command line:
+
+
+ ubd0=root_fs_cow
+
+
+
+
+ The name of the backing file is stored in the COW file header, so it
+ would be redundant to continue specifying it on the command line.
+
+
+
+ 7.3. Note!
+
+ When checking the size of the COW file in order to see the gobs of
+ space that you're saving, make sure you use 'ls -ls' to see the actual
+ disk consumption rather than the length of the file. The COW file is
+ sparse, so the length will be very different from the disk usage.
+ Here is a 'ls -l' of a COW file and backing file from one boot and
+ shutdown:
+ host% ls -l cow.debian debian2.2
+ -rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian
+ -rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2
+
+
+
+
+ Doesn't look like much saved space, does it? Well, here's 'ls -ls':
+
+
+ host% ls -ls cow.debian debian2.2
+ 880 -rw-r--r-- 1 jdike jdike 492504064 Aug 6 21:16 cow.debian
+ 525832 -rwxrw-rw- 1 jdike jdike 537919488 Aug 6 20:42 debian2.2
+
+
+
+
+ Now, you can see that the COW file has less than a meg of disk, rather
+ than 492 meg.
+
+
+
+ 7.4. Another warning
+
+ Once a filesystem is being used as a readonly backing file for a COW
+ file, do not boot directly from it or modify it in any way. Doing so
+ will invalidate any COW files that are using it. The mtime and size
+ of the backing file are stored in the COW file header at its creation,
+ and they must continue to match. If they don't, the driver will
+ refuse to use the COW file.
+
+
+
+
+ If you attempt to evade this restriction by changing either the
+ backing file or the COW header by hand, you will get a corrupted
+ filesystem.
+
+
+
+
+ Among other things, this means that upgrading the distribution in a
+ backing file and expecting that all of the COW files using it will see
+ the upgrade will not work.
+
+
+
+
+ 7.5. uml_moo : Merging a COW file with its backing file
+
+ Depending on how you use UML and COW devices, it may be advisable to
+ merge the changes in the COW file into the backing file every once in
+ a while.
+
+
+
+
+ The utility that does this is uml_moo. Its usage is
+
+
+ host% uml_moo COW file new backing file
+
+
+
+
+ There's no need to specify the backing file since that information is
+ already in the COW file header. If you're paranoid, boot the new
+ merged file, and if you're happy with it, move it over the old backing
+ file.
+
+
+
+
+ uml_moo creates a new backing file by default as a safety measure. It
+ also has a destructive merge option which will merge the COW file
+ directly into its current backing file. This is really only usable
+ when the backing file only has one COW file associated with it. If
+ there are multiple COWs associated with a backing file, a -d merge of
+ one of them will invalidate all of the others. However, it is
+ convenient if you're short of disk space, and it should also be
+ noticeably faster than a non-destructive merge.
+
+
+
+
+ uml_moo is installed with the UML deb and RPM. If you didn't install
+ UML from one of those packages, you can also get it from the UML
+ utilities <http://user-mode-linux.sourceforge.net/
+ utilities> tar file in tools/moo.
+
+
+
+
+
+
+
+
+ 8. Creating filesystems
+
+
+ You may want to create and mount new UML filesystems, either because
+ your root filesystem isn't large enough or because you want to use a
+ filesystem other than ext2.
+
+
+ This was written on the occasion of reiserfs being included in the
+ 2.4.1 kernel pool, and therefore the 2.4.1 UML, so the examples will
+ talk about reiserfs. This information is generic, and the examples
+ should be easy to translate to the filesystem of your choice.
+
+
+ 8.1. Create the filesystem file
+
+ dd is your friend. All you need to do is tell dd to create an empty
+ file of the appropriate size. I usually make it sparse to save time
+ and to avoid allocating disk space until it's actually used. For
+ example, the following command will create a sparse 100 meg file full
+ of zeroes.
+
+
+ host%
+ dd if=/dev/zero of=new_filesystem seek=100 count=1 bs=1M
+
+
+
+
+
+
+ 8.2. Assign the file to a UML device
+
+ Add an argument like the following to the UML command line:
+
+ ubd4=new_filesystem
+
+
+
+
+ making sure that you use an unassigned ubd device number.
+
+
+
+ 8.3. Creating and mounting the filesystem
+
+ Make sure that the filesystem is available, either by being built into
+ the kernel, or available as a module, then boot up UML and log in. If
+ the root filesystem doesn't have the filesystem utilities (mkfs, fsck,
+ etc), then get them into UML by way of the net or hostfs.
+
+
+ Make the new filesystem on the device assigned to the new file:
+
+
+ host# mkreiserfs /dev/ubd/4
+
+
+ <----------- MKREISERFSv2 ----------->
+
+ ReiserFS version 3.6.25
+ Block size 4096 bytes
+ Block count 25856
+ Used blocks 8212
+ Journal - 8192 blocks (18-8209), journal header is in block 8210
+ Bitmaps: 17
+ Root block 8211
+ Hash function "r5"
+ ATTENTION: ALL DATA WILL BE LOST ON '/dev/ubd/4'! (y/n)y
+ journal size 8192 (from 18)
+ Initializing journal - 0%....20%....40%....60%....80%....100%
+ Syncing..done.
+
+
+
+
+ Now, mount it:
+
+
+ UML#
+ mount /dev/ubd/4 /mnt
+
+
+
+
+ and you're in business.
+
+
+
+
+
+
+
+
+
+ 9. Host file access
+
+
+ If you want to access files on the host machine from inside UML, you
+ can treat it as a separate machine and either nfs mount directories
+ from the host or copy files into the virtual machine with scp or rcp.
+ However, since UML is running on the host, it can access those
+ files just like any other process and make them available inside the
+ virtual machine without needing to use the network.
+
+
+ This is now possible with the hostfs virtual filesystem. With it, you
+ can mount a host directory into the UML filesystem and access the
+ files contained in it just as you would on the host.
+
+
+ 9.1. Using hostfs
+
+ To begin with, make sure that hostfs is available inside the virtual
+ machine with
+
+
+ UML# cat /proc/filesystems
+
+
+
+ . hostfs should be listed. If it's not, either rebuild the kernel
+ with hostfs configured into it or make sure that hostfs is built as a
+ module and available inside the virtual machine, and insmod it.
+
+
+ Now all you need to do is run mount:
+
+
+ UML# mount none /mnt/host -t hostfs
+
+
+
+
+ will mount the host's / on the virtual machine's /mnt/host.
+
+
+ If you don't want to mount the host root directory, then you can
+ specify a subdirectory to mount with the -o switch to mount:
+
+
+ UML# mount none /mnt/home -t hostfs -o /home
+
+
+
+
+ will mount the hosts's /home on the virtual machine's /mnt/home.
+
+
+
+ 9.2. hostfs as the root filesystem
+
+ It's possible to boot from a directory hierarchy on the host using
+ hostfs rather than using the standard filesystem in a file.
+
+ To start, you need that hierarchy. The easiest way is to loop mount
+ an existing root_fs file:
+
+
+ host# mount root_fs uml_root_dir -o loop
+
+
+
+
+ You need to change the filesystem type of / in etc/fstab to be
+ 'hostfs', so that line looks like this:
+
+ /dev/ubd/0 / hostfs defaults 1 1
+
+
+
+
+ Then you need to chown to yourself all the files in that directory
+ that are owned by root. This worked for me:
+
+
+ host# find . -uid 0 -exec chown jdike {} \;
+
+
+
+
+ Next, make sure that your UML kernel has hostfs compiled in, not as a
+ module. Then run UML with the boot device pointing at that directory:
+
+
+ ubd0=/path/to/uml/root/directory
+
+
+
+
+ UML should then boot as it does normally.
+
+
+ 9.3. Building hostfs
+
+ If you need to build hostfs because it's not in your kernel, you have
+ two choices:
+
+
+
+ o Compiling hostfs into the kernel:
+
+
+ Reconfigure the kernel and set the 'Host filesystem' option under
+
+
+ o Compiling hostfs as a module:
+
+
+ Reconfigure the kernel and set the 'Host filesystem' option under
+ be in arch/um/fs/hostfs/hostfs.o. Install that in
+ /lib/modules/`uname -r`/fs in the virtual machine, boot it up, and
+
+
+ UML# insmod hostfs
+
+
+
+
+
+
+
+
+
+
+
+
+ 10. The Management Console
+
+
+
+ The UML management console is a low-level interface to the kernel,
+ somewhat like the i386 SysRq interface. Since there is a full-blown
+ operating system under UML, there is much greater flexibility possible
+ than with the SysRq mechanism.
+
+
+ There are a number of things you can do with the mconsole interface:
+
+ o get the kernel version
+
+ o add and remove devices
+
+ o halt or reboot the machine
+
+ o Send SysRq commands
+
+ o Pause and resume the UML
+
+
+ You need the mconsole client (uml_mconsole) which is present in CVS
+ (/tools/mconsole) in 2.4.5-9um and later, and will be in the RPM in
+ 2.4.6.
+
+
+ You also need CONFIG_MCONSOLE (under 'General Setup') enabled in UML.
+ When you boot UML, you'll see a line like:
+
+
+ mconsole initialized on /home/jdike/.uml/umlNJ32yL/mconsole
+
+
+
+
+ If you specify a unique machine id one the UML command line, i.e.
+
+
+ umid=debian
+
+
+
+
+ you'll see this
+
+
+ mconsole initialized on /home/jdike/.uml/debian/mconsole
+
+
+
+
+ That file is the socket that uml_mconsole will use to communicate with
+ UML. Run it with either the umid or the full path as its argument:
+
+
+ host% uml_mconsole debian
+
+
+
+
+ or
+
+
+ host% uml_mconsole /home/jdike/.uml/debian/mconsole
+
+
+
+
+ You'll get a prompt, at which you can run one of these commands:
+
+ o version
+
+ o halt
+
+ o reboot
+
+ o config
+
+ o remove
+
+ o sysrq
+
+ o help
+
+ o cad
+
+ o stop
+
+ o go
+
+
+ 10.1. version
+
+ This takes no arguments. It prints the UML version.
+
+
+ (mconsole) version
+ OK Linux usermode 2.4.5-9um #1 Wed Jun 20 22:47:08 EDT 2001 i686
+
+
+
+
+ There are a couple actual uses for this. It's a simple no-op which
+ can be used to check that a UML is running. It's also a way of
+ sending an interrupt to the UML. This is sometimes useful on SMP
+ hosts, where there's a bug which causes signals to UML to be lost,
+ often causing it to appear to hang. Sending such a UML the mconsole
+ version command is a good way to 'wake it up' before networking has
+ been enabled, as it does not do anything to the function of the UML.
+
+
+
+ 10.2. halt and reboot
+
+ These take no arguments. They shut the machine down immediately, with
+ no syncing of disks and no clean shutdown of userspace. So, they are
+ pretty close to crashing the machine.
+
+
+ (mconsole) halt
+ OK
+
+
+
+
+
+
+ 10.3. config
+
+ "config" adds a new device to the virtual machine. Currently the ubd
+ and network drivers support this. It takes one argument, which is the
+ device to add, with the same syntax as the kernel command line.
+
+
+
+
+ (mconsole)
+ config ubd3=/home/jdike/incoming/roots/root_fs_debian22
+
+ OK
+ (mconsole) config eth1=mcast
+ OK
+
+
+
+
+
+
+ 10.4. remove
+
+ "remove" deletes a device from the system. Its argument is just the
+ name of the device to be removed. The device must be idle in whatever
+ sense the driver considers necessary. In the case of the ubd driver,
+ the removed block device must not be mounted, swapped on, or otherwise
+ open, and in the case of the network driver, the device must be down.
+
+
+ (mconsole) remove ubd3
+ OK
+ (mconsole) remove eth1
+ OK
+
+
+
+
+
+
+ 10.5. sysrq
+
+ This takes one argument, which is a single letter. It calls the
+ generic kernel's SysRq driver, which does whatever is called for by
+ that argument. See the SysRq documentation in Documentation/sysrq.txt
+ in your favorite kernel tree to see what letters are valid and what
+ they do.
+
+
+
+ 10.6. help
+
+ "help" returns a string listing the valid commands and what each one
+ does.
+
+
+
+ 10.7. cad
+
+ This invokes the Ctl-Alt-Del action on init. What exactly this ends
+ up doing is up to /etc/inittab. Normally, it reboots the machine.
+ With UML, this is usually not desired, so if a halt would be better,
+ then find the section of inittab that looks like this
+
+
+ # What to do when CTRL-ALT-DEL is pressed.
+ ca:12345:ctrlaltdel:/sbin/shutdown -t1 -a -r now
+
+
+
+
+ and change the command to halt.
+
+
+
+ 10.8. stop
+
+ This puts the UML in a loop reading mconsole requests until a 'go'
+ mconsole command is received. This is very useful for making backups
+ of UML filesystems, as the UML can be stopped, then synced via 'sysrq
+ s', so that everything is written to the filesystem. You can then copy
+ the filesystem and then send the UML 'go' via mconsole.
+
+
+ Note that a UML running with more than one CPU will have problems
+ after you send the 'stop' command, as only one CPU will be held in a
+ mconsole loop and all others will continue as normal. This is a bug,
+ and will be fixed.
+
+
+
+ 10.9. go
+
+ This resumes a UML after being paused by a 'stop' command. Note that
+ when the UML has resumed, TCP connections may have timed out and if
+ the UML is paused for a long period of time, crond might go a little
+ crazy, running all the jobs it didn't do earlier.
+
+
+
+
+
+
+
+
+ 11. Kernel debugging
+
+
+ Note: The interface that makes debugging, as described here, possible
+ is present in 2.4.0-test6 kernels and later.
+
+
+ Since the user-mode kernel runs as a normal Linux process, it is
+ possible to debug it with gdb almost like any other process. It is
+ slightly different because the kernel's threads are already being
+ ptraced for system call interception, so gdb can't ptrace them.
+ However, a mechanism has been added to work around that problem.
+
+
+ In order to debug the kernel, you need build it from source. See
+ ``Compiling the kernel and modules'' for information on doing that.
+ Make sure that you enable CONFIG_DEBUGSYM and CONFIG_PT_PROXY during
+ the config. These will compile the kernel with -g, and enable the
+ ptrace proxy so that gdb works with UML, respectively.
+
+
+
+
+ 11.1. Starting the kernel under gdb
+
+ You can have the kernel running under the control of gdb from the
+ beginning by putting 'debug' on the command line. You will get an
+ xterm with gdb running inside it. The kernel will send some commands
+ to gdb which will leave it stopped at the beginning of start_kernel.
+ At this point, you can get things going with 'next', 'step', or
+ 'cont'.
+
+
+ There is a transcript of a debugging session here <debug-
+ session.html> , with breakpoints being set in the scheduler and in an
+ interrupt handler.
+ 11.2. Examining sleeping processes
+
+ Not every bug is evident in the currently running process. Sometimes,
+ processes hang in the kernel when they shouldn't because they've
+ deadlocked on a semaphore or something similar. In this case, when
+ you ^C gdb and get a backtrace, you will see the idle thread, which
+ isn't very relevant.
+
+
+ What you want is the stack of whatever process is sleeping when it
+ shouldn't be. You need to figure out which process that is, which is
+ generally fairly easy. Then you need to get its host process id,
+ which you can do either by looking at ps on the host or at
+ task.thread.extern_pid in gdb.
+
+
+ Now what you do is this:
+
+ o detach from the current thread
+
+
+ (UML gdb) det
+
+
+
+
+
+ o attach to the thread you are interested in
+
+
+ (UML gdb) att <host pid>
+
+
+
+
+
+ o look at its stack and anything else of interest
+
+
+ (UML gdb) bt
+
+
+
+
+ Note that you can't do anything at this point that requires that a
+ process execute, e.g. calling a function
+
+ o when you're done looking at that process, reattach to the current
+ thread and continue it
+
+
+ (UML gdb)
+ att 1
+
+
+
+
+
+
+ (UML gdb)
+ c
+
+
+
+
+ Here, specifying any pid which is not the process id of a UML thread
+ will cause gdb to reattach to the current thread. I commonly use 1,
+ but any other invalid pid would work.
+
+
+
+ 11.3. Running ddd on UML
+
+ ddd works on UML, but requires a special kludge. The process goes
+ like this:
+
+ o Start ddd
+
+
+ host% ddd linux
+
+
+
+
+
+ o With ps, get the pid of the gdb that ddd started. You can ask the
+ gdb to tell you, but for some reason that confuses things and
+ causes a hang.
+
+ o run UML with 'debug=parent gdb-pid=<pid>' added to the command line
+ - it will just sit there after you hit return
+
+ o type 'att 1' to the ddd gdb and you will see something like
+
+
+ 0xa013dc51 in __kill ()
+
+
+ (gdb)
+
+
+
+
+
+ o At this point, type 'c', UML will boot up, and you can use ddd just
+ as you do on any other process.
+
+
+
+ 11.4. Debugging modules
+
+ gdb has support for debugging code which is dynamically loaded into
+ the process. This support is what is needed to debug kernel modules
+ under UML.
+
+
+ Using that support is somewhat complicated. You have to tell gdb what
+ object file you just loaded into UML and where in memory it is. Then,
+ it can read the symbol table, and figure out where all the symbols are
+ from the load address that you provided. It gets more interesting
+ when you load the module again (i.e. after an rmmod). You have to
+ tell gdb to forget about all its symbols, including the main UML ones
+ for some reason, then load then all back in again.
+
+
+ There's an easy way and a hard way to do this. The easy way is to use
+ the umlgdb expect script written by Chandan Kudige. It basically
+ automates the process for you.
+
+
+ First, you must tell it where your modules are. There is a list in
+ the script that looks like this:
+ set MODULE_PATHS {
+ "fat" "/usr/src/uml/linux-2.4.18/fs/fat/fat.o"
+ "isofs" "/usr/src/uml/linux-2.4.18/fs/isofs/isofs.o"
+ "minix" "/usr/src/uml/linux-2.4.18/fs/minix/minix.o"
+ }
+
+
+
+
+ You change that to list the names and paths of the modules that you
+ are going to debug. Then you run it from the toplevel directory of
+ your UML pool and it basically tells you what to do:
+
+
+
+
+ ******** GDB pid is 21903 ********
+ Start UML as: ./linux <kernel switches> debug gdb-pid=21903
+
+
+
+ GNU gdb 5.0rh-5 Red Hat Linux 7.1
+ Copyright 2001 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "i386-redhat-linux"...
+ (gdb) b sys_init_module
+ Breakpoint 1 at 0xa0011923: file module.c, line 349.
+ (gdb) att 1
+
+
+
+
+ After you run UML and it sits there doing nothing, you hit return at
+ the 'att 1' and continue it:
+
+
+ Attaching to program: /home/jdike/linux/2.4/um/./linux, process 1
+ 0xa00f4221 in __kill ()
+ (UML gdb) c
+ Continuing.
+
+
+
+
+ At this point, you debug normally. When you insmod something, the
+ expect magic will kick in and you'll see something like:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ *** Module hostfs loaded ***
+ Breakpoint 1, sys_init_module (name_user=0x805abb0 "hostfs",
+ mod_user=0x8070e00) at module.c:349
+ 349 char *name, *n_name, *name_tmp = NULL;
+ (UML gdb) finish
+ Run till exit from #0 sys_init_module (name_user=0x805abb0 "hostfs",
+ mod_user=0x8070e00) at module.c:349
+ 0xa00e2e23 in execute_syscall (r=0xa8140284) at syscall_kern.c:411
+ 411 else res = EXECUTE_SYSCALL(syscall, regs);
+ Value returned is $1 = 0
+ (UML gdb)
+ p/x (int)module_list + module_list->size_of_struct
+
+ $2 = 0xa9021054
+ (UML gdb) symbol-file ./linux
+ Load new symbol table from "./linux"? (y or n) y
+ Reading symbols from ./linux...
+ done.
+ (UML gdb)
+ add-symbol-file /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o 0xa9021054
+
+ add symbol table from file "/home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o" at
+ .text_addr = 0xa9021054
+ (y or n) y
+
+ Reading symbols from /home/jdike/linux/2.4/um/arch/um/fs/hostfs/hostfs.o...
+ done.
+ (UML gdb) p *module_list
+ $1 = {size_of_struct = 84, next = 0xa0178720, name = 0xa9022de0 "hostfs",
+ size = 9016, uc = {usecount = {counter = 0}, pad = 0}, flags = 1,
+ nsyms = 57, ndeps = 0, syms = 0xa9023170, deps = 0x0, refs = 0x0,
+ init = 0xa90221f0 <init_hostfs>, cleanup = 0xa902222c <exit_hostfs>,
+ ex_table_start = 0x0, ex_table_end = 0x0, persist_start = 0x0,
+ persist_end = 0x0, can_unload = 0, runsize = 0, kallsyms_start = 0x0,
+ kallsyms_end = 0x0,
+ archdata_start = 0x1b855 <Address 0x1b855 out of bounds>,
+ archdata_end = 0xe5890000 <Address 0xe5890000 out of bounds>,
+ kernel_data = 0xf689c35d <Address 0xf689c35d out of bounds>}
+ >> Finished loading symbols for hostfs ...
+
+
+
+
+ That's the easy way. It's highly recommended. The hard way is
+ described below in case you're interested in what's going on.
+
+
+ Boot the kernel under the debugger and load the module with insmod or
+ modprobe. With gdb, do:
+
+
+ (UML gdb) p module_list
+
+
+
+
+ This is a list of modules that have been loaded into the kernel, with
+ the most recently loaded module first. Normally, the module you want
+ is at module_list. If it's not, walk down the next links, looking at
+ the name fields until find the module you want to debug. Take the
+ address of that structure, and add module.size_of_struct (which in
+ 2.4.10 kernels is 96 (0x60)) to it. Gdb can make this hard addition
+ for you :-):
+
+
+
+ (UML gdb)
+ printf "%#x\n", (int)module_list module_list->size_of_struct
+
+
+
+
+ The offset from the module start occasionally changes (before 2.4.0,
+ it was module.size_of_struct + 4), so it's a good idea to check the
+ init and cleanup addresses once in a while, as describe below. Now
+ do:
+
+
+ (UML gdb)
+ add-symbol-file /path/to/module/on/host that_address
+
+
+
+
+ Tell gdb you really want to do it, and you're in business.
+
+
+ If there's any doubt that you got the offset right, like breakpoints
+ appear not to work, or they're appearing in the wrong place, you can
+ check it by looking at the module structure. The init and cleanup
+ fields should look like:
+
+
+ init = 0x588066b0 <init_hostfs>, cleanup = 0x588066c0 <exit_hostfs>
+
+
+
+
+ with no offsets on the symbol names. If the names are right, but they
+ are offset, then the offset tells you how much you need to add to the
+ address you gave to add-symbol-file.
+
+
+ When you want to load in a new version of the module, you need to get
+ gdb to forget about the old one. The only way I've found to do that
+ is to tell gdb to forget about all symbols that it knows about:
+
+
+ (UML gdb) symbol-file
+
+
+
+
+ Then reload the symbols from the kernel binary:
+
+
+ (UML gdb) symbol-file /path/to/kernel
+
+
+
+
+ and repeat the process above. You'll also need to re-enable break-
+ points. They were disabled when you dumped all the symbols because
+ gdb couldn't figure out where they should go.
+
+
+
+ 11.5. Attaching gdb to the kernel
+
+ If you don't have the kernel running under gdb, you can attach gdb to
+ it later by sending the tracing thread a SIGUSR1. The first line of
+ the console output identifies its pid:
+ tracing thread pid = 20093
+
+
+
+
+ When you send it the signal:
+
+
+ host% kill -USR1 20093
+
+
+
+
+ you will get an xterm with gdb running in it.
+
+
+ If you have the mconsole compiled into UML, then the mconsole client
+ can be used to start gdb:
+
+
+ (mconsole) (mconsole) config gdb=xterm
+
+
+
+
+ will fire up an xterm with gdb running in it.
+
+
+
+ 11.6. Using alternate debuggers
+
+ UML has support for attaching to an already running debugger rather
+ than starting gdb itself. This is present in CVS as of 17 Apr 2001.
+ I sent it to Alan for inclusion in the ac tree, and it will be in my
+ 2.4.4 release.
+
+
+ This is useful when gdb is a subprocess of some UI, such as emacs or
+ ddd. It can also be used to run debuggers other than gdb on UML.
+ Below is an example of using strace as an alternate debugger.
+
+
+ To do this, you need to get the pid of the debugger and pass it in
+ with the
+
+
+ If you are using gdb under some UI, then tell it to 'att 1', and
+ you'll find yourself attached to UML.
+
+
+ If you are using something other than gdb as your debugger, then
+ you'll need to get it to do the equivalent of 'att 1' if it doesn't do
+ it automatically.
+
+
+ An example of an alternate debugger is strace. You can strace the
+ actual kernel as follows:
+
+ o Run the following in a shell
+
+
+ host%
+ sh -c 'echo pid=$$; echo -n hit return; read x; exec strace -p 1 -o strace.out'
+
+
+
+ o Run UML with 'debug' and 'gdb-pid=<pid>' with the pid printed out
+ by the previous command
+
+ o Hit return in the shell, and UML will start running, and strace
+ output will start accumulating in the output file.
+
+ Note that this is different from running
+
+
+ host% strace ./linux
+
+
+
+
+ That will strace only the main UML thread, the tracing thread, which
+ doesn't do any of the actual kernel work. It just oversees the vir-
+ tual machine. In contrast, using strace as described above will show
+ you the low-level activity of the virtual machine.
+
+
+
+
+
+ 12. Kernel debugging examples
+
+ 12.1. The case of the hung fsck
+
+ When booting up the kernel, fsck failed, and dropped me into a shell
+ to fix things up. I ran fsck -y, which hung:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Setting hostname uml [ OK ]
+ Checking root filesystem
+ /dev/fhd0 was not cleanly unmounted, check forced.
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
+
+ /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
+ (i.e., without -a or -p options)
+ [ FAILED ]
+
+ *** An error occurred during the file system check.
+ *** Dropping you to a shell; the system will reboot
+ *** when you leave the shell.
+ Give root password for maintenance
+ (or type Control-D for normal startup):
+
+ [root@uml /root]# fsck -y /dev/fhd0
+ fsck -y /dev/fhd0
+ Parallelizing fsck version 1.14 (9-Jan-1999)
+ e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
+ /dev/fhd0 contains a file system with errors, check forced.
+ Pass 1: Checking inodes, blocks, and sizes
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes
+
+ Inode 19780, i_blocks is 1548, should be 540. Fix? yes
+
+ Pass 2: Checking directory structure
+ Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes
+
+ Directory inode 11858, block 0, offset 0: directory corrupted
+ Salvage? yes
+
+ Missing '.' in directory inode 11858.
+ Fix? yes
+
+ Missing '..' in directory inode 11858.
+ Fix? yes
+
+
+
+
+
+ The standard drill in this sort of situation is to fire up gdb on the
+ signal thread, which, in this case, was pid 1935. In another window,
+ I run gdb and attach pid 1935.
+
+
+
+
+ ~/linux/2.3.26/um 1016: gdb linux
+ GNU gdb 4.17.0.11 with Linux support
+ Copyright 1998 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "i386-redhat-linux"...
+
+ (gdb) att 1935
+ Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1935
+ 0x100756d9 in __wait4 ()
+
+
+
+
+
+
+ Let's see what's currently running:
+
+
+
+ (gdb) p current_task.pid
+ $1 = 0
+
+
+
+
+
+ It's the idle thread, which means that fsck went to sleep for some
+ reason and never woke up.
+
+
+ Let's guess that the last process in the process list is fsck:
+
+
+
+ (gdb) p current_task.prev_task.comm
+ $13 = "fsck.ext2\000\000\000\000\000\000"
+
+
+
+
+
+ It is, so let's see what it thinks it's up to:
+
+
+
+ (gdb) p current_task.prev_task.thread
+ $14 = {extern_pid = 1980, tracing = 0, want_tracing = 0, forking = 0,
+ kernel_stack_page = 0, signal_stack = 1342627840, syscall = {id = 4, args = {
+ 3, 134973440, 1024, 0, 1024}, have_result = 0, result = 50590720},
+ request = {op = 2, u = {exec = {ip = 1350467584, sp = 2952789424}, fork = {
+ regs = {1350467584, 2952789424, 0 <repeats 15 times>}, sigstack = 0,
+ pid = 0}, switch_to = 0x507e8000, thread = {proc = 0x507e8000,
+ arg = 0xaffffdb0, flags = 0, new_pid = 0}, input_request = {
+ op = 1350467584, fd = -1342177872, proc = 0, pid = 0}}}}
+
+
+
+
+
+ The interesting things here are the fact that its .thread.syscall.id
+ is __NR_write (see the big switch in arch/um/kernel/syscall_kern.c or
+ the defines in include/asm-um/arch/unistd.h), and that it never
+ returned. Also, its .request.op is OP_SWITCH (see
+ arch/um/include/user_util.h). These mean that it went into a write,
+ and, for some reason, called schedule().
+
+
+ The fact that it never returned from write means that its stack should
+ be fairly interesting. Its pid is 1980 (.thread.extern_pid). That
+ process is being ptraced by the signal thread, so it must be detached
+ before gdb can attach it:
+
+
+
+
+
+
+
+
+
+
+ (gdb) call detach(1980)
+
+ Program received signal SIGSEGV, Segmentation fault.
+ <function called from gdb>
+ The program being debugged stopped while in a function called from GDB.
+ When the function (detach) is done executing, GDB will silently
+ stop (instead of continuing to evaluate the expression containing
+ the function call).
+ (gdb) call detach(1980)
+ $15 = 0
+
+
+
+
+
+ The first detach segfaults for some reason, and the second one
+ succeeds.
+
+
+ Now I detach from the signal thread, attach to the fsck thread, and
+ look at its stack:
+
+
+ (gdb) det
+ Detaching from program: /home/dike/linux/2.3.26/um/linux Pid 1935
+ (gdb) att 1980
+ Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 1980
+ 0x10070451 in __kill ()
+ (gdb) bt
+ #0 0x10070451 in __kill ()
+ #1 0x10068ccd in usr1_pid (pid=1980) at process.c:30
+ #2 0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000)
+ at process_kern.c:156
+ #3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
+ at process_kern.c:161
+ #4 0x10001d12 in schedule () at core.c:777
+ #5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
+ #6 0x1006aa10 in __down_failed () at semaphore.c:157
+ #7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
+ #8 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
+ #9 <signal handler called>
+ #10 0x10155404 in errno ()
+ #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
+ #12 0x1006c5d8 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+ #13 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
+ #14 <signal handler called>
+ #15 0xc0fd in ?? ()
+ #16 0x10016647 in sys_write (fd=3,
+ buf=0x80b8800 <Address 0x80b8800 out of bounds>, count=1024)
+ at read_write.c:159
+ #17 0x1006d5b3 in execute_syscall (syscall=4, args=0x5006ef08)
+ at syscall_kern.c:254
+ #18 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35
+ #19 <signal handler called>
+ #20 0x400dc8b0 in ?? ()
+
+
+
+
+
+ The interesting things here are :
+
+ o There are two segfaults on this stack (frames 9 and 14)
+
+ o The first faulting address (frame 11) is 0x50000800
+
+ (gdb) p (void *)1342179328
+ $16 = (void *) 0x50000800
+
+
+
+
+
+ The initial faulting address is interesting because it is on the idle
+ thread's stack. I had been seeing the idle thread segfault for no
+ apparent reason, and the cause looked like stack corruption. In hopes
+ of catching the culprit in the act, I had turned off all protections
+ to that stack while the idle thread wasn't running. This apparently
+ tripped that trap.
+
+
+ However, the more immediate problem is that second segfault and I'm
+ going to concentrate on that. First, I want to see where the fault
+ happened, so I have to go look at the sigcontent struct in frame 8:
+
+
+
+ (gdb) up
+ #1 0x10068ccd in usr1_pid (pid=1980) at process.c:30
+ 30 kill(pid, SIGUSR1);
+ (gdb)
+ #2 0x1006a03f in _switch_to (prev=0x50072000, next=0x507e8000)
+ at process_kern.c:156
+ 156 usr1_pid(getpid());
+ (gdb)
+ #3 0x1006a052 in switch_to (prev=0x50072000, next=0x507e8000, last=0x50072000)
+ at process_kern.c:161
+ 161 _switch_to(prev, next);
+ (gdb)
+ #4 0x10001d12 in schedule () at core.c:777
+ 777 switch_to(prev, next, prev);
+ (gdb)
+ #5 0x1006a744 in __down (sem=0x507d241c) at semaphore.c:71
+ 71 schedule();
+ (gdb)
+ #6 0x1006aa10 in __down_failed () at semaphore.c:157
+ 157 }
+ (gdb)
+ #7 0x1006c5d8 in segv_handler (sc=0x5006e940) at trap_user.c:174
+ 174 segv(sc->cr2, sc->err & 2);
+ (gdb)
+ #8 0x1006c5ec in kern_segv_handler (sig=11) at trap_user.c:182
+ 182 segv_handler(sc);
+ (gdb) p *sc
+ Cannot access memory at address 0x0.
+
+
+
+
+ That's not very useful, so I'll try a more manual method:
+
+
+ (gdb) p *((struct sigcontext *) (&sig + 1))
+ $19 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
+ __dsh = 0, edi = 1342179328, esi = 1350378548, ebp = 1342630440,
+ esp = 1342630420, ebx = 1348150624, edx = 1280, ecx = 0, eax = 0,
+ trapno = 14, err = 4, eip = 268480945, cs = 35, __csh = 0, eflags = 66118,
+ esp_at_signal = 1342630420, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
+ cr2 = 1280}
+
+
+
+ The ip is in handle_mm_fault:
+
+
+ (gdb) p (void *)268480945
+ $20 = (void *) 0x1000b1b1
+ (gdb) i sym $20
+ handle_mm_fault + 57 in section .text
+
+
+
+
+
+ Specifically, it's in pte_alloc:
+
+
+ (gdb) i line *$20
+ Line 124 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b1b1 <handle_mm_fault+57>
+ and ends at 0x1000b1b7 <handle_mm_fault+63>.
+
+
+
+
+
+ To find where in handle_mm_fault this is, I'll jump forward in the
+ code until I see an address in that procedure:
+
+
+
+ (gdb) i line *0x1000b1c0
+ Line 126 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b1b7 <handle_mm_fault+63>
+ and ends at 0x1000b1c3 <handle_mm_fault+75>.
+ (gdb) i line *0x1000b1d0
+ Line 131 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b1d0 <handle_mm_fault+88>
+ and ends at 0x1000b1da <handle_mm_fault+98>.
+ (gdb) i line *0x1000b1e0
+ Line 61 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b1da <handle_mm_fault+98>
+ and ends at 0x1000b1e1 <handle_mm_fault+105>.
+ (gdb) i line *0x1000b1f0
+ Line 134 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b1f0 <handle_mm_fault+120>
+ and ends at 0x1000b200 <handle_mm_fault+136>.
+ (gdb) i line *0x1000b200
+ Line 135 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b200 <handle_mm_fault+136>
+ and ends at 0x1000b208 <handle_mm_fault+144>.
+ (gdb) i line *0x1000b210
+ Line 139 of "/home/dike/linux/2.3.26/um/include/asm/pgalloc.h"
+ starts at address 0x1000b210 <handle_mm_fault+152>
+ and ends at 0x1000b219 <handle_mm_fault+161>.
+ (gdb) i line *0x1000b220
+ Line 1168 of "memory.c" starts at address 0x1000b21e <handle_mm_fault+166>
+ and ends at 0x1000b222 <handle_mm_fault+170>.
+
+
+
+
+
+ Something is apparently wrong with the page tables or vma_structs, so
+ lets go back to frame 11 and have a look at them:
+
+
+
+ #11 0x1006c0aa in segv (address=1342179328, is_write=2) at trap_kern.c:50
+ 50 handle_mm_fault(current, vma, address, is_write);
+ (gdb) call pgd_offset_proc(vma->vm_mm, address)
+ $22 = (pgd_t *) 0x80a548c
+
+
+
+
+
+ That's pretty bogus. Page tables aren't supposed to be in process
+ text or data areas. Let's see what's in the vma:
+
+
+ (gdb) p *vma
+ $23 = {vm_mm = 0x507d2434, vm_start = 0, vm_end = 134512640,
+ vm_next = 0x80a4f8c, vm_page_prot = {pgprot = 0}, vm_flags = 31200,
+ vm_avl_height = 2058, vm_avl_left = 0x80a8c94, vm_avl_right = 0x80d1000,
+ vm_next_share = 0xaffffdb0, vm_pprev_share = 0xaffffe63,
+ vm_ops = 0xaffffe7a, vm_pgoff = 2952789626, vm_file = 0xafffffec,
+ vm_private_data = 0x62}
+ (gdb) p *vma.vm_mm
+ $24 = {mmap = 0x507d2434, mmap_avl = 0x0, mmap_cache = 0x8048000,
+ pgd = 0x80a4f8c, mm_users = {counter = 0}, mm_count = {counter = 134904288},
+ map_count = 134909076, mmap_sem = {count = {counter = 135073792},
+ sleepers = -1342177872, wait = {lock = <optimized out or zero length>,
+ task_list = {next = 0xaffffe63, prev = 0xaffffe7a},
+ __magic = -1342177670, __creator = -1342177300}, __magic = 98},
+ page_table_lock = {}, context = 138, start_code = 0, end_code = 0,
+ start_data = 0, end_data = 0, start_brk = 0, brk = 0, start_stack = 0,
+ arg_start = 0, arg_end = 0, env_start = 0, env_end = 0, rss = 1350381536,
+ total_vm = 0, locked_vm = 0, def_flags = 0, cpu_vm_mask = 0, swap_cnt = 0,
+ swap_address = 0, segments = 0x0}
+
+
+
+
+
+ This also pretty bogus. With all of the 0x80xxxxx and 0xaffffxxx
+ addresses, this is looking like a stack was plonked down on top of
+ these structures. Maybe it's a stack overflow from the next page:
+
+
+
+ (gdb) p vma
+ $25 = (struct vm_area_struct *) 0x507d2434
+
+
+
+
+
+ That's towards the lower quarter of the page, so that would have to
+ have been pretty heavy stack overflow:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (gdb) x/100x $25
+ 0x507d2434: 0x507d2434 0x00000000 0x08048000 0x080a4f8c
+ 0x507d2444: 0x00000000 0x080a79e0 0x080a8c94 0x080d1000
+ 0x507d2454: 0xaffffdb0 0xaffffe63 0xaffffe7a 0xaffffe7a
+ 0x507d2464: 0xafffffec 0x00000062 0x0000008a 0x00000000
+ 0x507d2474: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2484: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2494: 0x00000000 0x00000000 0x507d2fe0 0x00000000
+ 0x507d24a4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24b4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24c4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24d4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24e4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d24f4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2504: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2514: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2524: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2534: 0x00000000 0x00000000 0x507d25dc 0x00000000
+ 0x507d2544: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2554: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2564: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2574: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2584: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d2594: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d25a4: 0x00000000 0x00000000 0x00000000 0x00000000
+ 0x507d25b4: 0x00000000 0x00000000 0x00000000 0x00000000
+
+
+
+
+
+ It's not stack overflow. The only "stack-like" piece of this data is
+ the vma_struct itself.
+
+
+ At this point, I don't see any avenues to pursue, so I just have to
+ admit that I have no idea what's going on. What I will do, though, is
+ stick a trap on the segfault handler which will stop if it sees any
+ writes to the idle thread's stack. That was the thing that happened
+ first, and it may be that if I can catch it immediately, what's going
+ on will be somewhat clearer.
+
+
+ 12.2. Episode 2: The case of the hung fsck
+
+ After setting a trap in the SEGV handler for accesses to the signal
+ thread's stack, I reran the kernel.
+
+
+ fsck hung again, this time by hitting the trap:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Setting hostname uml [ OK ]
+ Checking root filesystem
+ /dev/fhd0 contains a file system with errors, check forced.
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780.
+
+ /dev/fhd0: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.
+ (i.e., without -a or -p options)
+ [ FAILED ]
+
+ *** An error occurred during the file system check.
+ *** Dropping you to a shell; the system will reboot
+ *** when you leave the shell.
+ Give root password for maintenance
+ (or type Control-D for normal startup):
+
+ [root@uml /root]# fsck -y /dev/fhd0
+ fsck -y /dev/fhd0
+ Parallelizing fsck version 1.14 (9-Jan-1999)
+ e2fsck 1.14, 9-Jan-1999 for EXT2 FS 0.5b, 95/08/09
+ /dev/fhd0 contains a file system with errors, check forced.
+ Pass 1: Checking inodes, blocks, and sizes
+ Error reading block 86894 (Attempt to read block from filesystem resulted in short read) while reading indirect blocks of inode 19780. Ignore error? yes
+
+ Pass 2: Checking directory structure
+ Error reading block 49405 (Attempt to read block from filesystem resulted in short read). Ignore error? yes
+
+ Directory inode 11858, block 0, offset 0: directory corrupted
+ Salvage? yes
+
+ Missing '.' in directory inode 11858.
+ Fix? yes
+
+ Missing '..' in directory inode 11858.
+ Fix? yes
+
+ Untested (4127) [100fe44c]: trap_kern.c line 31
+
+
+
+
+
+ I need to get the signal thread to detach from pid 4127 so that I can
+ attach to it with gdb. This is done by sending it a SIGUSR1, which is
+ caught by the signal thread, which detaches the process:
+
+
+ kill -USR1 4127
+
+
+
+
+
+ Now I can run gdb on it:
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ~/linux/2.3.26/um 1034: gdb linux
+ GNU gdb 4.17.0.11 with Linux support
+ Copyright 1998 Free Software Foundation, Inc.
+ GDB is free software, covered by the GNU General Public License, and you are
+ welcome to change it and/or distribute copies of it under certain conditions.
+ Type "show copying" to see the conditions.
+ There is absolutely no warranty for GDB. Type "show warranty" for details.
+ This GDB was configured as "i386-redhat-linux"...
+ (gdb) att 4127
+ Attaching to program `/home/dike/linux/2.3.26/um/linux', Pid 4127
+ 0x10075891 in __libc_nanosleep ()
+
+
+
+
+
+ The backtrace shows that it was in a write and that the fault address
+ (address in frame 3) is 0x50000800, which is right in the middle of
+ the signal thread's stack page:
+
+
+ (gdb) bt
+ #0 0x10075891 in __libc_nanosleep ()
+ #1 0x1007584d in __sleep (seconds=1000000)
+ at ../sysdeps/unix/sysv/linux/sleep.c:78
+ #2 0x1006ce9a in stop () at user_util.c:191
+ #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
+ #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+ #5 0x1006c63c in kern_segv_handler (sig=11) at trap_user.c:182
+ #6 <signal handler called>
+ #7 0xc0fd in ?? ()
+ #8 0x10016647 in sys_write (fd=3, buf=0x80b8800 "R.", count=1024)
+ at read_write.c:159
+ #9 0x1006d603 in execute_syscall (syscall=4, args=0x5006ef08)
+ at syscall_kern.c:254
+ #10 0x1006af87 in really_do_syscall (sig=12) at syscall_user.c:35
+ #11 <signal handler called>
+ #12 0x400dc8b0 in ?? ()
+ #13 <signal handler called>
+ #14 0x400dc8b0 in ?? ()
+ #15 0x80545fd in ?? ()
+ #16 0x804daae in ?? ()
+ #17 0x8054334 in ?? ()
+ #18 0x804d23e in ?? ()
+ #19 0x8049632 in ?? ()
+ #20 0x80491d2 in ?? ()
+ #21 0x80596b5 in ?? ()
+ (gdb) p (void *)1342179328
+ $3 = (void *) 0x50000800
+
+
+
+
+
+ Going up the stack to the segv_handler frame and looking at where in
+ the code the access happened shows that it happened near line 110 of
+ block_dev.c:
+
+
+
+
+
+
+
+
+
+ (gdb) up
+ #1 0x1007584d in __sleep (seconds=1000000)
+ at ../sysdeps/unix/sysv/linux/sleep.c:78
+ ../sysdeps/unix/sysv/linux/sleep.c:78: No such file or directory.
+ (gdb)
+ #2 0x1006ce9a in stop () at user_util.c:191
+ 191 while(1) sleep(1000000);
+ (gdb)
+ #3 0x1006bf88 in segv (address=1342179328, is_write=2) at trap_kern.c:31
+ 31 KERN_UNTESTED();
+ (gdb)
+ #4 0x1006c628 in segv_handler (sc=0x5006eaf8) at trap_user.c:174
+ 174 segv(sc->cr2, sc->err & 2);
+ (gdb) p *sc
+ $1 = {gs = 0, __gsh = 0, fs = 0, __fsh = 0, es = 43, __esh = 0, ds = 43,
+ __dsh = 0, edi = 1342179328, esi = 134973440, ebp = 1342631484,
+ esp = 1342630864, ebx = 256, edx = 0, ecx = 256, eax = 1024, trapno = 14,
+ err = 6, eip = 268550834, cs = 35, __csh = 0, eflags = 66070,
+ esp_at_signal = 1342630864, ss = 43, __ssh = 0, fpstate = 0x0, oldmask = 0,
+ cr2 = 1342179328}
+ (gdb) p (void *)268550834
+ $2 = (void *) 0x1001c2b2
+ (gdb) i sym $2
+ block_write + 1090 in section .text
+ (gdb) i line *$2
+ Line 209 of "/home/dike/linux/2.3.26/um/include/asm/arch/string.h"
+ starts at address 0x1001c2a1 <block_write+1073>
+ and ends at 0x1001c2bf <block_write+1103>.
+ (gdb) i line *0x1001c2c0
+ Line 110 of "block_dev.c" starts at address 0x1001c2bf <block_write+1103>
+ and ends at 0x1001c2e3 <block_write+1139>.
+
+
+
+
+
+ Looking at the source shows that the fault happened during a call to
+ copy_from_user to copy the data into the kernel:
+
+
+ 107 count -= chars;
+ 108 copy_from_user(p,buf,chars);
+ 109 p += chars;
+ 110 buf += chars;
+
+
+
+
+
+ p is the pointer which must contain 0x50000800, since buf contains
+ 0x80b8800 (frame 8 above). It is defined as:
+
+
+ p = offset + bh->b_data;
+
+
+
+
+
+ I need to figure out what bh is, and it just so happens that bh is
+ passed as an argument to mark_buffer_uptodate and mark_buffer_dirty a
+ few lines later, so I do a little disassembly:
+
+
+
+
+ (gdb) disas 0x1001c2bf 0x1001c2e0
+ Dump of assembler code from 0x1001c2bf to 0x1001c2d0:
+ 0x1001c2bf <block_write+1103>: addl %eax,0xc(%ebp)
+ 0x1001c2c2 <block_write+1106>: movl 0xfffffdd4(%ebp),%edx
+ 0x1001c2c8 <block_write+1112>: btsl $0x0,0x18(%edx)
+ 0x1001c2cd <block_write+1117>: btsl $0x1,0x18(%edx)
+ 0x1001c2d2 <block_write+1122>: sbbl %ecx,%ecx
+ 0x1001c2d4 <block_write+1124>: testl %ecx,%ecx
+ 0x1001c2d6 <block_write+1126>: jne 0x1001c2e3 <block_write+1139>
+ 0x1001c2d8 <block_write+1128>: pushl $0x0
+ 0x1001c2da <block_write+1130>: pushl %edx
+ 0x1001c2db <block_write+1131>: call 0x1001819c <__mark_buffer_dirty>
+ End of assembler dump.
+
+
+
+
+
+ At that point, bh is in %edx (address 0x1001c2da), which is calculated
+ at 0x1001c2c2 as %ebp + 0xfffffdd4, so I figure exactly what that is,
+ taking %ebp from the sigcontext_struct above:
+
+
+ (gdb) p (void *)1342631484
+ $5 = (void *) 0x5006ee3c
+ (gdb) p 0x5006ee3c+0xfffffdd4
+ $6 = 1342630928
+ (gdb) p (void *)$6
+ $7 = (void *) 0x5006ec10
+ (gdb) p *((void **)$7)
+ $8 = (void *) 0x50100200
+
+
+
+
+
+ Now, I look at the structure to see what's in it, and particularly,
+ what its b_data field contains:
+
+
+ (gdb) p *((struct buffer_head *)0x50100200)
+ $13 = {b_next = 0x50289380, b_blocknr = 49405, b_size = 1024, b_list = 0,
+ b_dev = 15872, b_count = {counter = 1}, b_rdev = 15872, b_state = 24,
+ b_flushtime = 0, b_next_free = 0x501001a0, b_prev_free = 0x50100260,
+ b_this_page = 0x501001a0, b_reqnext = 0x0, b_pprev = 0x507fcf58,
+ b_data = 0x50000800 "", b_page = 0x50004000,
+ b_end_io = 0x10017f60 <end_buffer_io_sync>, b_dev_id = 0x0,
+ b_rsector = 98810, b_wait = {lock = <optimized out or zero length>,
+ task_list = {next = 0x50100248, prev = 0x50100248}, __magic = 1343226448,
+ __creator = 0}, b_kiobuf = 0x0}
+
+
+
+
+
+ The b_data field is indeed 0x50000800, so the question becomes how
+ that happened. The rest of the structure looks fine, so this probably
+ is not a case of data corruption. It happened on purpose somehow.
+
+
+ The b_page field is a pointer to the page_struct representing the
+ 0x50000000 page. Looking at it shows the kernel's idea of the state
+ of that page:
+
+
+
+ (gdb) p *$13.b_page
+ $17 = {list = {next = 0x50004a5c, prev = 0x100c5174}, mapping = 0x0,
+ index = 0, next_hash = 0x0, count = {counter = 1}, flags = 132, lru = {
+ next = 0x50008460, prev = 0x50019350}, wait = {
+ lock = <optimized out or zero length>, task_list = {next = 0x50004024,
+ prev = 0x50004024}, __magic = 1342193708, __creator = 0},
+ pprev_hash = 0x0, buffers = 0x501002c0, virtual = 1342177280,
+ zone = 0x100c5160}
+
+
+
+
+
+ Some sanity-checking: the virtual field shows the "virtual" address of
+ this page, which in this kernel is the same as its "physical" address,
+ and the page_struct itself should be mem_map[0], since it represents
+ the first page of memory:
+
+
+
+ (gdb) p (void *)1342177280
+ $18 = (void *) 0x50000000
+ (gdb) p mem_map
+ $19 = (mem_map_t *) 0x50004000
+
+
+
+
+
+ These check out fine.
+
+
+ Now to check out the page_struct itself. In particular, the flags
+ field shows whether the page is considered free or not:
+
+
+ (gdb) p (void *)132
+ $21 = (void *) 0x84
+
+
+
+
+
+ The "reserved" bit is the high bit, which is definitely not set, so
+ the kernel considers the signal stack page to be free and available to
+ be used.
+
+
+ At this point, I jump to conclusions and start looking at my early
+ boot code, because that's where that page is supposed to be reserved.
+
+
+ In my setup_arch procedure, I have the following code which looks just
+ fine:
+
+
+
+ bootmap_size = init_bootmem(start_pfn, end_pfn - start_pfn);
+ free_bootmem(__pa(low_physmem) + bootmap_size, high_physmem - low_physmem);
+
+
+
+
+
+ Two stack pages have already been allocated, and low_physmem points to
+ the third page, which is the beginning of free memory.
+ The init_bootmem call declares the entire memory to the boot memory
+ manager, which marks it all reserved. The free_bootmem call frees up
+ all of it, except for the first two pages. This looks correct to me.
+
+
+ So, I decide to see init_bootmem run and make sure that it is marking
+ those first two pages as reserved. I never get that far.
+
+
+ Stepping into init_bootmem, and looking at bootmem_map before looking
+ at what it contains shows the following:
+
+
+
+ (gdb) p bootmem_map
+ $3 = (void *) 0x50000000
+
+
+
+
+
+ Aha! The light dawns. That first page is doing double duty as a
+ stack and as the boot memory map. The last thing that the boot memory
+ manager does is to free the pages used by its memory map, so this page
+ is getting freed even its marked as reserved.
+
+
+ The fix was to initialize the boot memory manager before allocating
+ those two stack pages, and then allocate them through the boot memory
+ manager. After doing this, and fixing a couple of subsequent buglets,
+ the stack corruption problem disappeared.
+
+
+
+
+
+ 13. What to do when UML doesn't work
+
+
+
+
+ 13.1. Strange compilation errors when you build from source
+
+ As of test11, it is necessary to have "ARCH=um" in the environment or
+ on the make command line for all steps in building UML, including
+ clean, distclean, or mrproper, config, menuconfig, or xconfig, dep,
+ and linux. If you forget for any of them, the i386 build seems to
+ contaminate the UML build. If this happens, start from scratch with
+
+
+ host%
+ make mrproper ARCH=um
+
+
+
+
+ and repeat the build process with ARCH=um on all the steps.
+
+
+ See ``Compiling the kernel and modules'' for more details.
+
+
+ Another cause of strange compilation errors is building UML in
+ /usr/src/linux. If you do this, the first thing you need to do is
+ clean up the mess you made. The /usr/src/linux/asm link will now
+ point to /usr/src/linux/asm-um. Make it point back to
+ /usr/src/linux/asm-i386. Then, move your UML pool someplace else and
+ build it there. Also see below, where a more specific set of symptoms
+ is described.
+
+
+
+ 13.3. A variety of panics and hangs with /tmp on a reiserfs filesys-
+ tem
+
+ I saw this on reiserfs 3.5.21 and it seems to be fixed in 3.5.27.
+ Panics preceded by
+
+
+ Detaching pid nnnn
+
+
+
+ are diagnostic of this problem. This is a reiserfs bug which causes a
+ thread to occasionally read stale data from a mmapped page shared with
+ another thread. The fix is to upgrade the filesystem or to have /tmp
+ be an ext2 filesystem.
+
+
+
+ 13.4. The compile fails with errors about conflicting types for
+ 'open', 'dup', and 'waitpid'
+
+ This happens when you build in /usr/src/linux. The UML build makes
+ the include/asm link point to include/asm-um. /usr/include/asm points
+ to /usr/src/linux/include/asm, so when that link gets moved, files
+ which need to include the asm-i386 versions of headers get the
+ incompatible asm-um versions. The fix is to move the include/asm link
+ back to include/asm-i386 and to do UML builds someplace else.
+
+
+
+ 13.5. UML doesn't work when /tmp is an NFS filesystem
+
+ This seems to be a similar situation with the ReiserFS problem above.
+ Some versions of NFS seems not to handle mmap correctly, which UML
+ depends on. The workaround is have /tmp be a non-NFS directory.
+
+
+ 13.6. UML hangs on boot when compiled with gprof support
+
+ If you build UML with gprof support and, early in the boot, it does
+ this
+
+
+ kernel BUG at page_alloc.c:100!
+
+
+
+
+ you have a buggy gcc. You can work around the problem by removing
+ UM_FASTCALL from CFLAGS in arch/um/Makefile-i386. This will open up
+ another bug, but that one is fairly hard to reproduce.
+
+
+
+ 13.7. syslogd dies with a SIGTERM on startup
+
+ The exact boot error depends on the distribution that you're booting,
+ but Debian produces this:
+
+
+ /etc/rc2.d/S10sysklogd: line 49: 93 Terminated
+ start-stop-daemon --start --quiet --exec /sbin/syslogd -- $SYSLOGD
+
+
+
+
+ This is a syslogd bug. There's a race between a parent process
+ installing a signal handler and its child sending the signal. See
+ this uml-devel post <http://www.geocrawler.com/lists/3/Source-
+ Forge/709/0/6612801> for the details.
+
+
+
+ 13.8. TUN/TAP networking doesn't work on a 2.4 host
+
+ There are a couple of problems which were
+ <http://www.geocrawler.com/lists/3/SourceForge/597/0/> name="pointed
+ out"> by Tim Robinson <timro at trkr dot net>
+
+ o It doesn't work on hosts running 2.4.7 (or thereabouts) or earlier.
+ The fix is to upgrade to something more recent and then read the
+ next item.
+
+ o If you see
+
+
+ File descriptor in bad state
+
+
+
+ when you bring up the device inside UML, you have a header mismatch
+ between the original kernel and the upgraded one. Make /usr/src/linux
+ point at the new headers. This will only be a problem if you build
+ uml_net yourself.
+
+
+
+ 13.9. You can network to the host but not to other machines on the
+ net
+
+ If you can connect to the host, and the host can connect to UML, but
+ you cannot connect to any other machines, then you may need to enable
+ IP Masquerading on the host. Usually this is only experienced when
+ using private IP addresses (192.168.x.x or 10.x.x.x) for host/UML
+ networking, rather than the public address space that your host is
+ connected to. UML does not enable IP Masquerading, so you will need
+ to create a static rule to enable it:
+
+
+ host%
+ iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE
+
+
+
+
+ Replace eth0 with the interface that you use to talk to the rest of
+ the world.
+
+
+ Documentation on IP Masquerading, and SNAT, can be found at
+ www.netfilter.org <http://www.netfilter.org> .
+
+
+ If you can reach the local net, but not the outside Internet, then
+ that is usually a routing problem. The UML needs a default route:
+
+
+ UML#
+ route add default gw gateway IP
+
+
+
+
+ The gateway IP can be any machine on the local net that knows how to
+ reach the outside world. Usually, this is the host or the local net-
+ work's gateway.
+
+
+ Occasionally, we hear from someone who can reach some machines, but
+ not others on the same net, or who can reach some ports on other
+ machines, but not others. These are usually caused by strange
+ firewalling somewhere between the UML and the other box. You track
+ this down by running tcpdump on every interface the packets travel
+ over and see where they disappear. When you find a machine that takes
+ the packets in, but does not send them onward, that's the culprit.
+
+
+
+ 13.10. I have no root and I want to scream
+
+ Thanks to Birgit Wahlich for telling me about this strange one. It
+ turns out that there's a limit of six environment variables on the
+ kernel command line. When that limit is reached or exceeded, argument
+ processing stops, which means that the 'root=' argument that UML
+ usually adds is not seen. So, the filesystem has no idea what the
+ root device is, so it panics.
+
+
+ The fix is to put less stuff on the command line. Glomming all your
+ setup variables into one is probably the best way to go.
+
+
+
+ 13.11. UML build conflict between ptrace.h and ucontext.h
+
+ On some older systems, /usr/include/asm/ptrace.h and
+ /usr/include/sys/ucontext.h define the same names. So, when they're
+ included together, the defines from one completely mess up the parsing
+ of the other, producing errors like:
+ /usr/include/sys/ucontext.h:47: parse error before
+ `10'
+
+
+
+
+ plus a pile of warnings.
+
+
+ This is a libc botch, which has since been fixed, and I don't see any
+ way around it besides upgrading.
+
+
+
+ 13.12. The UML BogoMips is exactly half the host's BogoMips
+
+ On i386 kernels, there are two ways of running the loop that is used
+ to calculate the BogoMips rating, using the TSC if it's there or using
+ a one-instruction loop. The TSC produces twice the BogoMips as the
+ loop. UML uses the loop, since it has nothing resembling a TSC, and
+ will get almost exactly the same BogoMips as a host using the loop.
+ However, on a host with a TSC, its BogoMips will be double the loop
+ BogoMips, and therefore double the UML BogoMips.
+
+
+
+ 13.13. When you run UML, it immediately segfaults
+
+ If the host is configured with the 2G/2G address space split, that's
+ why. See ``UML on 2G/2G hosts'' for the details on getting UML to
+ run on your host.
+
+
+
+ 13.14. xterms appear, then immediately disappear
+
+ If you're running an up to date kernel with an old release of
+ uml_utilities, the port-helper program will not work properly, so
+ xterms will exit straight after they appear. The solution is to
+ upgrade to the latest release of uml_utilities. Usually this problem
+ occurs when you have installed a packaged release of UML then compiled
+ your own development kernel without upgrading the uml_utilities from
+ the source distribution.
+
+
+
+ 13.15. Any other panic, hang, or strange behavior
+
+ If you're seeing truly strange behavior, such as hangs or panics that
+ happen in random places, or you try running the debugger to see what's
+ happening and it acts strangely, then it could be a problem in the
+ host kernel. If you're not running a stock Linus or -ac kernel, then
+ try that. An early version of the preemption patch and a 2.4.10 SuSE
+ kernel have caused very strange problems in UML.
+
+
+ Otherwise, let me know about it. Send a message to one of the UML
+ mailing lists - either the developer list - user-mode-linux-devel at
+ lists dot sourceforge dot net (subscription info) or the user list -
+ user-mode-linux-user at lists dot sourceforge do net (subscription
+ info), whichever you prefer. Don't assume that everyone knows about
+ it and that a fix is imminent.
+
+
+ If you want to be super-helpful, read ``Diagnosing Problems'' and
+ follow the instructions contained therein.
+ 14. Diagnosing Problems
+
+
+ If you get UML to crash, hang, or otherwise misbehave, you should
+ report this on one of the project mailing lists, either the developer
+ list - user-mode-linux-devel at lists dot sourceforge dot net
+ (subscription info) or the user list - user-mode-linux-user at lists
+ dot sourceforge dot net (subscription info). When you do, it is
+ likely that I will want more information. So, it would be helpful to
+ read the stuff below, do whatever is applicable in your case, and
+ report the results to the list.
+
+
+ For any diagnosis, you're going to need to build a debugging kernel.
+ The binaries from this site aren't debuggable. If you haven't done
+ this before, read about ``Compiling the kernel and modules'' and
+ ``Kernel debugging'' UML first.
+
+
+ 14.1. Case 1 : Normal kernel panics
+
+ The most common case is for a normal thread to panic. To debug this,
+ you will need to run it under the debugger (add 'debug' to the command
+ line). An xterm will start up with gdb running inside it. Continue
+ it when it stops in start_kernel and make it crash. Now ^C gdb and
+
+
+ If the panic was a "Kernel mode fault", then there will be a segv
+ frame on the stack and I'm going to want some more information. The
+ stack might look something like this:
+
+
+ (UML gdb) backtrace
+ #0 0x1009bf76 in __sigprocmask (how=1, set=0x5f347940, oset=0x0)
+ at ../sysdeps/unix/sysv/linux/sigprocmask.c:49
+ #1 0x10091411 in change_sig (signal=10, on=1) at process.c:218
+ #2 0x10094785 in timer_handler (sig=26) at time_kern.c:32
+ #3 0x1009bf38 in __restore ()
+ at ../sysdeps/unix/sysv/linux/i386/sigaction.c:125
+ #4 0x1009534c in segv (address=8, ip=268849158, is_write=2, is_user=0)
+ at trap_kern.c:66
+ #5 0x10095c04 in segv_handler (sig=11) at trap_user.c:285
+ #6 0x1009bf38 in __restore ()
+
+
+
+
+ I'm going to want to see the symbol and line information for the value
+ of ip in the segv frame. In this case, you would do the following:
+
+
+ (UML gdb) i sym 268849158
+
+
+
+
+ and
+
+
+ (UML gdb) i line *268849158
+
+
+
+
+ The reason for this is the __restore frame right above the segv_han-
+ dler frame is hiding the frame that actually segfaulted. So, I have
+ to get that information from the faulting ip.
+
+
+ 14.2. Case 2 : Tracing thread panics
+
+ The less common and more painful case is when the tracing thread
+ panics. In this case, the kernel debugger will be useless because it
+ needs a healthy tracing thread in order to work. The first thing to
+ do is get a backtrace from the tracing thread. This is done by
+ figuring out what its pid is, firing up gdb, and attaching it to that
+ pid. You can figure out the tracing thread pid by looking at the
+ first line of the console output, which will look like this:
+
+
+ tracing thread pid = 15851
+
+
+
+
+ or by running ps on the host and finding the line that looks like
+ this:
+
+
+ jdike 15851 4.5 0.4 132568 1104 pts/0 S 21:34 0:05 ./linux [(tracing thread)]
+
+
+
+
+ If the panic was 'segfault in signals', then follow the instructions
+ above for collecting information about the location of the seg fault.
+
+
+ If the tracing thread flaked out all by itself, then send that
+ backtrace in and wait for our crack debugging team to fix the problem.
+
+
+ 14.3. Case 3 : Tracing thread panics caused by other threads
+
+ However, there are cases where the misbehavior of another thread
+ caused the problem. The most common panic of this type is:
+
+
+ wait_for_stop failed to wait for <pid> to stop with <signal number>
+
+
+
+
+ In this case, you'll need to get a backtrace from the process men-
+ tioned in the panic, which is complicated by the fact that the kernel
+ debugger is defunct and without some fancy footwork, another gdb can't
+ attach to it. So, this is how the fancy footwork goes:
+
+ In a shell:
+
+
+ host% kill -STOP pid
+
+
+
+
+ Run gdb on the tracing thread as described in case 2 and do:
+
+
+ (host gdb) call detach(pid)
+
+
+ If you get a segfault, do it again. It always works the second time.
+
+ Detach from the tracing thread and attach to that other thread:
+
+
+ (host gdb) detach
+
+
+
+
+
+
+ (host gdb) attach pid
+
+
+
+
+ If gdb hangs when attaching to that process, go back to a shell and
+ do:
+
+
+ host%
+ kill -CONT pid
+
+
+
+
+ And then get the backtrace:
+
+
+ (host gdb) backtrace
+
+
+
+
+
+ 14.4. Case 4 : Hangs
+
+ Hangs seem to be fairly rare, but they sometimes happen. When a hang
+ happens, we need a backtrace from the offending process. Run the
+ kernel debugger as described in case 1 and get a backtrace. If the
+ current process is not the idle thread, then send in the backtrace.
+ You can tell that it's the idle thread if the stack looks like this:
+
+
+ #0 0x100b1401 in __libc_nanosleep ()
+ #1 0x100a2885 in idle_sleep (secs=10) at time.c:122
+ #2 0x100a546f in do_idle () at process_kern.c:445
+ #3 0x100a5508 in cpu_idle () at process_kern.c:471
+ #4 0x100ec18f in start_kernel () at init/main.c:592
+ #5 0x100a3e10 in start_kernel_proc (unused=0x0) at um_arch.c:71
+ #6 0x100a383f in signal_tramp (arg=0x100a3dd8) at trap_user.c:50
+
+
+
+
+ If this is the case, then some other process is at fault, and went to
+ sleep when it shouldn't have. Run ps on the host and figure out which
+ process should not have gone to sleep and stayed asleep. Then attach
+ to it with gdb and get a backtrace as described in case 3.
+
+
+
+
+
+
+ 15. Thanks
+
+
+ A number of people have helped this project in various ways, and this
+ page gives recognition where recognition is due.
+
+
+ If you're listed here and you would prefer a real link on your name,
+ or no link at all, instead of the despammed email address pseudo-link,
+ let me know.
+
+
+ If you're not listed here and you think maybe you should be, please
+ let me know that as well. I try to get everyone, but sometimes my
+ bookkeeping lapses and I forget about contributions.
+
+
+ 15.1. Code and Documentation
+
+ Rusty Russell <rusty at linuxcare.com.au> -
+
+ o wrote the HOWTO <http://user-mode-
+ linux.sourceforge.net/UserModeLinux-HOWTO.html>
+
+ o prodded me into making this project official and putting it on
+ SourceForge
+
+ o came up with the way cool UML logo <http://user-mode-
+ linux.sourceforge.net/uml-small.png>
+
+ o redid the config process
+
+
+ Peter Moulder <reiter at netspace.net.au> - Fixed my config and build
+ processes, and added some useful code to the block driver
+
+
+ Bill Stearns <wstearns at pobox.com> -
+
+ o HOWTO updates
+
+ o lots of bug reports
+
+ o lots of testing
+
+ o dedicated a box (uml.ists.dartmouth.edu) to support UML development
+
+ o wrote the mkrootfs script, which allows bootable filesystems of
+ RPM-based distributions to be cranked out
+
+ o cranked out a large number of filesystems with said script
+
+
+ Jim Leu <jleu at mindspring.com> - Wrote the virtual ethernet driver
+ and associated usermode tools
+
+ Lars Brinkhoff <http://lars.nocrew.org/> - Contributed the ptrace
+ proxy from his own project <http://a386.nocrew.org/> to allow easier
+ kernel debugging
+
+
+ Andrea Arcangeli <andrea at suse.de> - Redid some of the early boot
+ code so that it would work on machines with Large File Support
+
+
+ Chris Emerson <http://www.chiark.greenend.org.uk/~cemerson/> - Did
+ the first UML port to Linux/ppc
+
+
+ Harald Welte <laforge at gnumonks.org> - Wrote the multicast
+ transport for the network driver
+
+
+ Jorgen Cederlof - Added special file support to hostfs
+
+
+ Greg Lonnon <glonnon at ridgerun dot com> - Changed the ubd driver
+ to allow it to layer a COW file on a shared read-only filesystem and
+ wrote the iomem emulation support
+
+
+ Henrik Nordstrom <http://hem.passagen.se/hno/> - Provided a variety
+ of patches, fixes, and clues
+
+
+ Lennert Buytenhek - Contributed various patches, a rewrite of the
+ network driver, the first implementation of the mconsole driver, and
+ did the bulk of the work needed to get SMP working again.
+
+
+ Yon Uriarte - Fixed the TUN/TAP network backend while I slept.
+
+
+ Adam Heath - Made a bunch of nice cleanups to the initialization code,
+ plus various other small patches.
+
+
+ Matt Zimmerman - Matt volunteered to be the UML Debian maintainer and
+ is doing a real nice job of it. He also noticed and fixed a number of
+ actually and potentially exploitable security holes in uml_net. Plus
+ the occasional patch. I like patches.
+
+
+ James McMechan - James seems to have taken over maintenance of the ubd
+ driver and is doing a nice job of it.
+
+
+ Chandan Kudige - wrote the umlgdb script which automates the reloading
+ of module symbols.
+
+
+ Steve Schmidtke - wrote the UML slirp transport and hostaudio drivers,
+ enabling UML processes to access audio devices on the host. He also
+ submitted patches for the slip transport and lots of other things.
+
+
+ David Coulson <http://davidcoulson.net> -
+
+ o Set up the usermodelinux.org <http://usermodelinux.org> site,
+ which is a great way of keeping the UML user community on top of
+ UML goings-on.
+
+ o Site documentation and updates
+
+ o Nifty little UML management daemon UMLd
+ <http://uml.openconsultancy.com/umld/>
+
+ o Lots of testing and bug reports
+
+
+
+
+ 15.2. Flushing out bugs
+
+
+
+ o Yuri Pudgorodsky
+
+ o Gerald Britton
+
+ o Ian Wehrman
+
+ o Gord Lamb
+
+ o Eugene Koontz
+
+ o John H. Hartman
+
+ o Anders Karlsson
+
+ o Daniel Phillips
+
+ o John Fremlin
+
+ o Rainer Burgstaller
+
+ o James Stevenson
+
+ o Matt Clay
+
+ o Cliff Jefferies
+
+ o Geoff Hoff
+
+ o Lennert Buytenhek
+
+ o Al Viro
+
+ o Frank Klingenhoefer
+
+ o Livio Baldini Soares
+
+ o Jon Burgess
+
+ o Petru Paler
+
+ o Paul
+
+ o Chris Reahard
+
+ o Sverker Nilsson
+
+ o Gong Su
+
+ o johan verrept
+
+ o Bjorn Eriksson
+
+ o Lorenzo Allegrucci
+
+ o Muli Ben-Yehuda
+
+ o David Mansfield
+
+ o Howard Goff
+
+ o Mike Anderson
+
+ o John Byrne
+
+ o Sapan J. Batia
+
+ o Iris Huang
+
+ o Jan Hudec
+
+ o Voluspa
+
+
+
+
+ 15.3. Buglets and clean-ups
+
+
+
+ o Dave Zarzycki
+
+ o Adam Lazur
+
+ o Boria Feigin
+
+ o Brian J. Murrell
+
+ o JS
+
+ o Roman Zippel
+
+ o Wil Cooley
+
+ o Ayelet Shemesh
+
+ o Will Dyson
+
+ o Sverker Nilsson
+
+ o dvorak
+
+ o v.naga srinivas
+
+ o Shlomi Fish
+
+ o Roger Binns
+
+ o johan verrept
+
+ o MrChuoi
+
+ o Peter Cleve
+
+ o Vincent Guffens
+
+ o Nathan Scott
+
+ o Patrick Caulfield
+
+ o jbearce
+
+ o Catalin Marinas
+
+ o Shane Spencer
+
+ o Zou Min
+
+
+ o Ryan Boder
+
+ o Lorenzo Colitti
+
+ o Gwendal Grignou
+
+ o Andre' Breiler
+
+ o Tsutomu Yasuda
+
+
+
+ 15.4. Case Studies
+
+
+ o Jon Wright
+
+ o William McEwan
+
+ o Michael Richardson
+
+
+
+ 15.5. Other contributions
+
+
+ Bill Carr <Bill.Carr at compaq.com> made the Red Hat mkrootfs script
+ work with RH 6.2.
+
+ Michael Jennings <mikejen at hevanet.com> sent in some material which
+ is now gracing the top of the index page <http://user-mode-
+ linux.sourceforge.net/> of this site.
+
+ SGI <http://www.sgi.com> (and more specifically Ralf Baechle <ralf at
+ uni-koblenz.de> ) gave me an account on oss.sgi.com
+ <http://www.oss.sgi.com> . The bandwidth there made it possible to
+ produce most of the filesystems available on the project download
+ page.
+
+ Laurent Bonnaud <Laurent.Bonnaud at inpg.fr> took the old grotty
+ Debian filesystem that I've been distributing and updated it to 2.2.
+ It is now available by itself here.
+
+ Rik van Riel gave me some ftp space on ftp.nl.linux.org so I can make
+ releases even when Sourceforge is broken.
+
+ Rodrigo de Castro looked at my broken pte code and told me what was
+ wrong with it, letting me fix a long-standing (several weeks) and
+ serious set of bugs.
+
+ Chris Reahard built a specialized root filesystem for running a DNS
+ server jailed inside UML. It's available from the download
+ <http://user-mode-linux.sourceforge.net/dl-sf.html> page in the Jail
+ Filesystems section.
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/Documentation/vm/.gitignore b/Documentation/vm/.gitignore
new file mode 100644
index 000000000..09b164a57
--- /dev/null
+++ b/Documentation/vm/.gitignore
@@ -0,0 +1,2 @@
+page-types
+slabinfo
diff --git a/Documentation/vm/00-INDEX b/Documentation/vm/00-INDEX
new file mode 100644
index 000000000..75bde3da7
--- /dev/null
+++ b/Documentation/vm/00-INDEX
@@ -0,0 +1,42 @@
+00-INDEX
+ - this file.
+active_mm.txt
+ - An explanation from Linus about tsk->active_mm vs tsk->mm.
+balance
+ - various information on memory balancing.
+cleancache.txt
+ - Intro to cleancache and page-granularity victim cache.
+frontswap.txt
+ - Outline frontswap, part of the transcendent memory frontend.
+highmem.txt
+ - Outline of highmem and common issues.
+hugetlbpage.txt
+ - a brief summary of hugetlbpage support in the Linux kernel.
+hwpoison.txt
+ - explains what hwpoison is
+ksm.txt
+ - how to use the Kernel Samepage Merging feature.
+uksm.txt
+ - Introduction to Ultra KSM
+numa
+ - information about NUMA specific code in the Linux vm.
+numa_memory_policy.txt
+ - documentation of concepts and APIs of the 2.6 memory policy support.
+overcommit-accounting
+ - description of the Linux kernels overcommit handling modes.
+page_migration
+ - description of page migration in NUMA systems.
+pagemap.txt
+ - pagemap, from the userspace perspective
+slub.txt
+ - a short users guide for SLUB.
+soft-dirty.txt
+ - short explanation for soft-dirty PTEs
+split_page_table_lock
+ - Separate per-table lock to improve scalability of the old page_table_lock.
+transhuge.txt
+ - Transparent Hugepage Support, alternative way of using hugepages.
+unevictable-lru.txt
+ - Unevictable LRU infrastructure
+zswap.txt
+ - Intro to compressed cache for swap pages
diff --git a/Documentation/vm/active_mm.txt b/Documentation/vm/active_mm.txt
new file mode 100644
index 000000000..dbf458174
--- /dev/null
+++ b/Documentation/vm/active_mm.txt
@@ -0,0 +1,83 @@
+List: linux-kernel
+Subject: Re: active_mm
+From: Linus Torvalds <torvalds () transmeta ! com>
+Date: 1999-07-30 21:36:24
+
+Cc'd to linux-kernel, because I don't write explanations all that often,
+and when I do I feel better about more people reading them.
+
+On Fri, 30 Jul 1999, David Mosberger wrote:
+>
+> Is there a brief description someplace on how "mm" vs. "active_mm" in
+> the task_struct are supposed to be used? (My apologies if this was
+> discussed on the mailing lists---I just returned from vacation and
+> wasn't able to follow linux-kernel for a while).
+
+Basically, the new setup is:
+
+ - we have "real address spaces" and "anonymous address spaces". The
+ difference is that an anonymous address space doesn't care about the
+ user-level page tables at all, so when we do a context switch into an
+ anonymous address space we just leave the previous address space
+ active.
+
+ The obvious use for a "anonymous address space" is any thread that
+ doesn't need any user mappings - all kernel threads basically fall into
+ this category, but even "real" threads can temporarily say that for
+ some amount of time they are not going to be interested in user space,
+ and that the scheduler might as well try to avoid wasting time on
+ switching the VM state around. Currently only the old-style bdflush
+ sync does that.
+
+ - "tsk->mm" points to the "real address space". For an anonymous process,
+ tsk->mm will be NULL, for the logical reason that an anonymous process
+ really doesn't _have_ a real address space at all.
+
+ - however, we obviously need to keep track of which address space we
+ "stole" for such an anonymous user. For that, we have "tsk->active_mm",
+ which shows what the currently active address space is.
+
+ The rule is that for a process with a real address space (ie tsk->mm is
+ non-NULL) the active_mm obviously always has to be the same as the real
+ one.
+
+ For a anonymous process, tsk->mm == NULL, and tsk->active_mm is the
+ "borrowed" mm while the anonymous process is running. When the
+ anonymous process gets scheduled away, the borrowed address space is
+ returned and cleared.
+
+To support all that, the "struct mm_struct" now has two counters: a
+"mm_users" counter that is how many "real address space users" there are,
+and a "mm_count" counter that is the number of "lazy" users (ie anonymous
+users) plus one if there are any real users.
+
+Usually there is at least one real user, but it could be that the real
+user exited on another CPU while a lazy user was still active, so you do
+actually get cases where you have a address space that is _only_ used by
+lazy users. That is often a short-lived state, because once that thread
+gets scheduled away in favour of a real thread, the "zombie" mm gets
+released because "mm_users" becomes zero.
+
+Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
+more. "init_mm" should be considered just a "lazy context when no other
+context is available", and in fact it is mainly used just at bootup when
+no real VM has yet been created. So code that used to check
+
+ if (current->mm == &init_mm)
+
+should generally just do
+
+ if (!current->mm)
+
+instead (which makes more sense anyway - the test is basically one of "do
+we have a user context", and is generally done by the page fault handler
+and things like that).
+
+Anyway, I put a pre-patch-2.3.13-1 on ftp.kernel.org just a moment ago,
+because it slightly changes the interfaces to accommodate the alpha (who
+would have thought it, but the alpha actually ends up having one of the
+ugliest context switch codes - unlike the other architectures where the MM
+and register state is separate, the alpha PALcode joins the two, and you
+need to switch both together).
+
+(From http://marc.info/?l=linux-kernel&m=93337278602211&w=2)
diff --git a/Documentation/vm/balance b/Documentation/vm/balance
new file mode 100644
index 000000000..c46e68cf9
--- /dev/null
+++ b/Documentation/vm/balance
@@ -0,0 +1,93 @@
+Started Jan 2000 by Kanoj Sarcar <kanoj@sgi.com>
+
+Memory balancing is needed for non __GFP_WAIT as well as for non
+__GFP_IO allocations.
+
+There are two reasons to be requesting non __GFP_WAIT allocations:
+the caller can not sleep (typically intr context), or does not want
+to incur cost overheads of page stealing and possible swap io for
+whatever reasons.
+
+__GFP_IO allocation requests are made to prevent file system deadlocks.
+
+In the absence of non sleepable allocation requests, it seems detrimental
+to be doing balancing. Page reclamation can be kicked off lazily, that
+is, only when needed (aka zone free memory is 0), instead of making it
+a proactive process.
+
+That being said, the kernel should try to fulfill requests for direct
+mapped pages from the direct mapped pool, instead of falling back on
+the dma pool, so as to keep the dma pool filled for dma requests (atomic
+or not). A similar argument applies to highmem and direct mapped pages.
+OTOH, if there is a lot of free dma pages, it is preferable to satisfy
+regular memory requests by allocating one from the dma pool, instead
+of incurring the overhead of regular zone balancing.
+
+In 2.2, memory balancing/page reclamation would kick off only when the
+_total_ number of free pages fell below 1/64 th of total memory. With the
+right ratio of dma and regular memory, it is quite possible that balancing
+would not be done even when the dma zone was completely empty. 2.2 has
+been running production machines of varying memory sizes, and seems to be
+doing fine even with the presence of this problem. In 2.3, due to
+HIGHMEM, this problem is aggravated.
+
+In 2.3, zone balancing can be done in one of two ways: depending on the
+zone size (and possibly of the size of lower class zones), we can decide
+at init time how many free pages we should aim for while balancing any
+zone. The good part is, while balancing, we do not need to look at sizes
+of lower class zones, the bad part is, we might do too frequent balancing
+due to ignoring possibly lower usage in the lower class zones. Also,
+with a slight change in the allocation routine, it is possible to reduce
+the memclass() macro to be a simple equality.
+
+Another possible solution is that we balance only when the free memory
+of a zone _and_ all its lower class zones falls below 1/64th of the
+total memory in the zone and its lower class zones. This fixes the 2.2
+balancing problem, and stays as close to 2.2 behavior as possible. Also,
+the balancing algorithm works the same way on the various architectures,
+which have different numbers and types of zones. If we wanted to get
+fancy, we could assign different weights to free pages in different
+zones in the future.
+
+Note that if the size of the regular zone is huge compared to dma zone,
+it becomes less significant to consider the free dma pages while
+deciding whether to balance the regular zone. The first solution
+becomes more attractive then.
+
+The appended patch implements the second solution. It also "fixes" two
+problems: first, kswapd is woken up as in 2.2 on low memory conditions
+for non-sleepable allocations. Second, the HIGHMEM zone is also balanced,
+so as to give a fighting chance for replace_with_highmem() to get a
+HIGHMEM page, as well as to ensure that HIGHMEM allocations do not
+fall back into regular zone. This also makes sure that HIGHMEM pages
+are not leaked (for example, in situations where a HIGHMEM page is in
+the swapcache but is not being used by anyone)
+
+kswapd also needs to know about the zones it should balance. kswapd is
+primarily needed in a situation where balancing can not be done,
+probably because all allocation requests are coming from intr context
+and all process contexts are sleeping. For 2.3, kswapd does not really
+need to balance the highmem zone, since intr context does not request
+highmem pages. kswapd looks at the zone_wake_kswapd field in the zone
+structure to decide whether a zone needs balancing.
+
+Page stealing from process memory and shm is done if stealing the page would
+alleviate memory pressure on any zone in the page's node that has fallen below
+its watermark.
+
+watemark[WMARK_MIN/WMARK_LOW/WMARK_HIGH]/low_on_memory/zone_wake_kswapd: These
+are per-zone fields, used to determine when a zone needs to be balanced. When
+the number of pages falls below watermark[WMARK_MIN], the hysteric field
+low_on_memory gets set. This stays set till the number of free pages becomes
+watermark[WMARK_HIGH]. When low_on_memory is set, page allocation requests will
+try to free some pages in the zone (providing GFP_WAIT is set in the request).
+Orthogonal to this, is the decision to poke kswapd to free some zone pages.
+That decision is not hysteresis based, and is done when the number of free
+pages is below watermark[WMARK_LOW]; in which case zone_wake_kswapd is also set.
+
+
+(Good) Ideas that I have heard:
+1. Dynamic experience should influence balancing: number of failed requests
+for a zone can be tracked and fed into the balancing scheme (jalvo@mbay.net)
+2. Implement a replace_with_highmem()-like replace_with_regular() to preserve
+dma pages. (lkd@tantalophile.demon.co.uk)
diff --git a/Documentation/vm/cleancache.txt b/Documentation/vm/cleancache.txt
new file mode 100644
index 000000000..e4b49df7a
--- /dev/null
+++ b/Documentation/vm/cleancache.txt
@@ -0,0 +1,277 @@
+MOTIVATION
+
+Cleancache is a new optional feature provided by the VFS layer that
+potentially dramatically increases page cache effectiveness for
+many workloads in many environments at a negligible cost.
+
+Cleancache can be thought of as a page-granularity victim cache for clean
+pages that the kernel's pageframe replacement algorithm (PFRA) would like
+to keep around, but can't since there isn't enough memory. So when the
+PFRA "evicts" a page, it first attempts to use cleancache code to
+put the data contained in that page into "transcendent memory", memory
+that is not directly accessible or addressable by the kernel and is
+of unknown and possibly time-varying size.
+
+Later, when a cleancache-enabled filesystem wishes to access a page
+in a file on disk, it first checks cleancache to see if it already
+contains it; if it does, the page of data is copied into the kernel
+and a disk access is avoided.
+
+Transcendent memory "drivers" for cleancache are currently implemented
+in Xen (using hypervisor memory) and zcache (using in-kernel compressed
+memory) and other implementations are in development.
+
+FAQs are included below.
+
+IMPLEMENTATION OVERVIEW
+
+A cleancache "backend" that provides transcendent memory registers itself
+to the kernel's cleancache "frontend" by calling cleancache_register_ops,
+passing a pointer to a cleancache_ops structure with funcs set appropriately.
+The functions provided must conform to certain semantics as follows:
+
+Most important, cleancache is "ephemeral". Pages which are copied into
+cleancache have an indefinite lifetime which is completely unknowable
+by the kernel and so may or may not still be in cleancache at any later time.
+Thus, as its name implies, cleancache is not suitable for dirty pages.
+Cleancache has complete discretion over what pages to preserve and what
+pages to discard and when.
+
+Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a
+pool id which, if positive, must be saved in the filesystem's superblock;
+a negative return value indicates failure. A "put_page" will copy a
+(presumably about-to-be-evicted) page into cleancache and associate it with
+the pool id, a file key, and a page index into the file. (The combination
+of a pool id, a file key, and an index is sometimes called a "handle".)
+A "get_page" will copy the page, if found, from cleancache into kernel memory.
+An "invalidate_page" will ensure the page no longer is present in cleancache;
+an "invalidate_inode" will invalidate all pages associated with the specified
+file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate
+all pages in all files specified by the given pool id and also surrender
+the pool id.
+
+An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache
+to treat the pool as shared using a 128-bit UUID as a key. On systems
+that may run multiple kernels (such as hard partitioned or virtualized
+systems) that may share a clustered filesystem, and where cleancache
+may be shared among those kernels, calls to init_shared_fs that specify the
+same UUID will receive the same pool id, thus allowing the pages to
+be shared. Note that any security requirements must be imposed outside
+of the kernel (e.g. by "tools" that control cleancache). Or a
+cleancache implementation can simply disable shared_init by always
+returning a negative value.
+
+If a get_page is successful on a non-shared pool, the page is invalidated
+(thus making cleancache an "exclusive" cache). On a shared pool, the page
+is NOT invalidated on a successful get_page so that it remains accessible to
+other sharers. The kernel is responsible for ensuring coherency between
+cleancache (shared or not), the page cache, and the filesystem, using
+cleancache invalidate operations as required.
+
+Note that cleancache must enforce put-put-get coherency and get-get
+coherency. For the former, if two puts are made to the same handle but
+with different data, say AAA by the first put and BBB by the second, a
+subsequent get can never return the stale data (AAA). For get-get coherency,
+if a get for a given handle fails, subsequent gets for that handle will
+never succeed unless preceded by a successful put with that handle.
+
+Last, cleancache provides no SMP serialization guarantees; if two
+different Linux threads are simultaneously putting and invalidating a page
+with the same handle, the results are indeterminate. Callers must
+lock the page to ensure serial behavior.
+
+CLEANCACHE PERFORMANCE METRICS
+
+If properly configured, monitoring of cleancache is done via debugfs in
+the /sys/kernel/debug/cleancache directory. The effectiveness of cleancache
+can be measured (across all filesystems) with:
+
+succ_gets - number of gets that were successful
+failed_gets - number of gets that failed
+puts - number of puts attempted (all "succeed")
+invalidates - number of invalidates attempted
+
+A backend implementation may provide additional metrics.
+
+FAQ
+
+1) Where's the value? (Andrew Morton)
+
+Cleancache provides a significant performance benefit to many workloads
+in many environments with negligible overhead by improving the
+effectiveness of the pagecache. Clean pagecache pages are
+saved in transcendent memory (RAM that is otherwise not directly
+addressable to the kernel); fetching those pages later avoids "refaults"
+and thus disk reads.
+
+Cleancache (and its sister code "frontswap") provide interfaces for
+this transcendent memory (aka "tmem"), which conceptually lies between
+fast kernel-directly-addressable RAM and slower DMA/asynchronous devices.
+Disallowing direct kernel or userland reads/writes to tmem
+is ideal when data is transformed to a different form and size (such
+as with compression) or secretly moved (as might be useful for write-
+balancing for some RAM-like devices). Evicted page-cache pages (and
+swap pages) are a great use for this kind of slower-than-RAM-but-much-
+faster-than-disk transcendent memory, and the cleancache (and frontswap)
+"page-object-oriented" specification provides a nice way to read and
+write -- and indirectly "name" -- the pages.
+
+In the virtual case, the whole point of virtualization is to statistically
+multiplex physical resources across the varying demands of multiple
+virtual machines. This is really hard to do with RAM and efforts to
+do it well with no kernel change have essentially failed (except in some
+well-publicized special-case workloads). Cleancache -- and frontswap --
+with a fairly small impact on the kernel, provide a huge amount
+of flexibility for more dynamic, flexible RAM multiplexing.
+Specifically, the Xen Transcendent Memory backend allows otherwise
+"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
+virtual machines, but the pages can be compressed and deduplicated to
+optimize RAM utilization. And when guest OS's are induced to surrender
+underutilized RAM (e.g. with "self-ballooning"), page cache pages
+are the first to go, and cleancache allows those pages to be
+saved and reclaimed if overall host system memory conditions allow.
+
+And the identical interface used for cleancache can be used in
+physical systems as well. The zcache driver acts as a memory-hungry
+device that stores pages of data in a compressed state. And
+the proposed "RAMster" driver shares RAM across multiple physical
+systems.
+
+2) Why does cleancache have its sticky fingers so deep inside the
+ filesystems and VFS? (Andrew Morton and Christoph Hellwig)
+
+The core hooks for cleancache in VFS are in most cases a single line
+and the minimum set are placed precisely where needed to maintain
+coherency (via cleancache_invalidate operations) between cleancache,
+the page cache, and disk. All hooks compile into nothingness if
+cleancache is config'ed off and turn into a function-pointer-
+compare-to-NULL if config'ed on but no backend claims the ops
+functions, or to a compare-struct-element-to-negative if a
+backend claims the ops functions but a filesystem doesn't enable
+cleancache.
+
+Some filesystems are built entirely on top of VFS and the hooks
+in VFS are sufficient, so don't require an "init_fs" hook; the
+initial implementation of cleancache didn't provide this hook.
+But for some filesystems (such as btrfs), the VFS hooks are
+incomplete and one or more hooks in fs-specific code are required.
+And for some other filesystems, such as tmpfs, cleancache may
+be counterproductive. So it seemed prudent to require a filesystem
+to "opt in" to use cleancache, which requires adding a hook in
+each filesystem. Not all filesystems are supported by cleancache
+only because they haven't been tested. The existing set should
+be sufficient to validate the concept, the opt-in approach means
+that untested filesystems are not affected, and the hooks in the
+existing filesystems should make it very easy to add more
+filesystems in the future.
+
+The total impact of the hooks to existing fs and mm files is only
+about 40 lines added (not counting comments and blank lines).
+
+3) Why not make cleancache asynchronous and batched so it can
+ more easily interface with real devices with DMA instead
+ of copying each individual page? (Minchan Kim)
+
+The one-page-at-a-time copy semantics simplifies the implementation
+on both the frontend and backend and also allows the backend to
+do fancy things on-the-fly like page compression and
+page deduplication. And since the data is "gone" (copied into/out
+of the pageframe) before the cleancache get/put call returns,
+a great deal of race conditions and potential coherency issues
+are avoided. While the interface seems odd for a "real device"
+or for real kernel-addressable RAM, it makes perfect sense for
+transcendent memory.
+
+4) Why is non-shared cleancache "exclusive"? And where is the
+ page "invalidated" after a "get"? (Minchan Kim)
+
+The main reason is to free up space in transcendent memory and
+to avoid unnecessary cleancache_invalidate calls. If you want inclusive,
+the page can be "put" immediately following the "get". If
+put-after-get for inclusive becomes common, the interface could
+be easily extended to add a "get_no_invalidate" call.
+
+The invalidate is done by the cleancache backend implementation.
+
+5) What's the performance impact?
+
+Performance analysis has been presented at OLS'09 and LCA'10.
+Briefly, performance gains can be significant on most workloads,
+especially when memory pressure is high (e.g. when RAM is
+overcommitted in a virtual workload); and because the hooks are
+invoked primarily in place of or in addition to a disk read/write,
+overhead is negligible even in worst case workloads. Basically
+cleancache replaces I/O with memory-copy-CPU-overhead; on older
+single-core systems with slow memory-copy speeds, cleancache
+has little value, but in newer multicore machines, especially
+consolidated/virtualized machines, it has great value.
+
+6) How do I add cleancache support for filesystem X? (Boaz Harrash)
+
+Filesystems that are well-behaved and conform to certain
+restrictions can utilize cleancache simply by making a call to
+cleancache_init_fs at mount time. Unusual, misbehaving, or
+poorly layered filesystems must either add additional hooks
+and/or undergo extensive additional testing... or should just
+not enable the optional cleancache.
+
+Some points for a filesystem to consider:
+
+- The FS should be block-device-based (e.g. a ram-based FS such
+ as tmpfs should not enable cleancache)
+- To ensure coherency/correctness, the FS must ensure that all
+ file removal or truncation operations either go through VFS or
+ add hooks to do the equivalent cleancache "invalidate" operations
+- To ensure coherency/correctness, either inode numbers must
+ be unique across the lifetime of the on-disk file OR the
+ FS must provide an "encode_fh" function.
+- The FS must call the VFS superblock alloc and deactivate routines
+ or add hooks to do the equivalent cleancache calls done there.
+- To maximize performance, all pages fetched from the FS should
+ go through the do_mpag_readpage routine or the FS should add
+ hooks to do the equivalent (cf. btrfs)
+- Currently, the FS blocksize must be the same as PAGESIZE. This
+ is not an architectural restriction, but no backends currently
+ support anything different.
+- A clustered FS should invoke the "shared_init_fs" cleancache
+ hook to get best performance for some backends.
+
+7) Why not use the KVA of the inode as the key? (Christoph Hellwig)
+
+If cleancache would use the inode virtual address instead of
+inode/filehandle, the pool id could be eliminated. But, this
+won't work because cleancache retains pagecache data pages
+persistently even when the inode has been pruned from the
+inode unused list, and only invalidates the data page if the file
+gets removed/truncated. So if cleancache used the inode kva,
+there would be potential coherency issues if/when the inode
+kva is reused for a different file. Alternately, if cleancache
+invalidated the pages when the inode kva was freed, much of the value
+of cleancache would be lost because the cache of pages in cleanache
+is potentially much larger than the kernel pagecache and is most
+useful if the pages survive inode cache removal.
+
+8) Why is a global variable required?
+
+The cleancache_enabled flag is checked in all of the frequently-used
+cleancache hooks. The alternative is a function call to check a static
+variable. Since cleancache is enabled dynamically at runtime, systems
+that don't enable cleancache would suffer thousands (possibly
+tens-of-thousands) of unnecessary function calls per second. So the
+global variable allows cleancache to be enabled by default at compile
+time, but have insignificant performance impact when cleancache remains
+disabled at runtime.
+
+9) Does cleanache work with KVM?
+
+The memory model of KVM is sufficiently different that a cleancache
+backend may have less value for KVM. This remains to be tested,
+especially in an overcommitted system.
+
+10) Does cleancache work in userspace? It sounds useful for
+ memory hungry caches like web browsers. (Jamie Lokier)
+
+No plans yet, though we agree it sounds useful, at least for
+apps that bypass the page cache (e.g. O_DIRECT).
+
+Last updated: Dan Magenheimer, April 13 2011
diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt
new file mode 100644
index 000000000..c71a019be
--- /dev/null
+++ b/Documentation/vm/frontswap.txt
@@ -0,0 +1,278 @@
+Frontswap provides a "transcendent memory" interface for swap pages.
+In some environments, dramatic performance savings may be obtained because
+swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk.
+
+(Note, frontswap -- and cleancache (merged at 3.0) -- are the "frontends"
+and the only necessary changes to the core kernel for transcendent memory;
+all other supporting code -- the "backends" -- is implemented as drivers.
+See the LWN.net article "Transcendent memory in a nutshell" for a detailed
+overview of frontswap and related kernel parts:
+https://lwn.net/Articles/454795/ )
+
+Frontswap is so named because it can be thought of as the opposite of
+a "backing" store for a swap device. The storage is assumed to be
+a synchronous concurrency-safe page-oriented "pseudo-RAM device" conforming
+to the requirements of transcendent memory (such as Xen's "tmem", or
+in-kernel compressed memory, aka "zcache", or future RAM-like devices);
+this pseudo-RAM device is not directly accessible or addressable by the
+kernel and is of unknown and possibly time-varying size. The driver
+links itself to frontswap by calling frontswap_register_ops to set the
+frontswap_ops funcs appropriately and the functions it provides must
+conform to certain policies as follows:
+
+An "init" prepares the device to receive frontswap pages associated
+with the specified swap device number (aka "type"). A "store" will
+copy the page to transcendent memory and associate it with the type and
+offset associated with the page. A "load" will copy the page, if found,
+from transcendent memory into kernel memory, but will NOT remove the page
+from transcendent memory. An "invalidate_page" will remove the page
+from transcendent memory and an "invalidate_area" will remove ALL pages
+associated with the swap type (e.g., like swapoff) and notify the "device"
+to refuse further stores with that swap type.
+
+Once a page is successfully stored, a matching load on the page will normally
+succeed. So when the kernel finds itself in a situation where it needs
+to swap out a page, it first attempts to use frontswap. If the store returns
+success, the data has been successfully saved to transcendent memory and
+a disk write and, if the data is later read back, a disk read are avoided.
+If a store returns failure, transcendent memory has rejected the data, and the
+page can be written to swap as usual.
+
+If a backend chooses, frontswap can be configured as a "writethrough
+cache" by calling frontswap_writethrough(). In this mode, the reduction
+in swap device writes is lost (and also a non-trivial performance advantage)
+in order to allow the backend to arbitrarily "reclaim" space used to
+store frontswap pages to more completely manage its memory usage.
+
+Note that if a page is stored and the page already exists in transcendent memory
+(a "duplicate" store), either the store succeeds and the data is overwritten,
+or the store fails AND the page is invalidated. This ensures stale data may
+never be obtained from frontswap.
+
+If properly configured, monitoring of frontswap is done via debugfs in
+the /sys/kernel/debug/frontswap directory. The effectiveness of
+frontswap can be measured (across all swap devices) with:
+
+failed_stores - how many store attempts have failed
+loads - how many loads were attempted (all should succeed)
+succ_stores - how many store attempts have succeeded
+invalidates - how many invalidates were attempted
+
+A backend implementation may provide additional metrics.
+
+FAQ
+
+1) Where's the value?
+
+When a workload starts swapping, performance falls through the floor.
+Frontswap significantly increases performance in many such workloads by
+providing a clean, dynamic interface to read and write swap pages to
+"transcendent memory" that is otherwise not directly addressable to the kernel.
+This interface is ideal when data is transformed to a different form
+and size (such as with compression) or secretly moved (as might be
+useful for write-balancing for some RAM-like devices). Swap pages (and
+evicted page-cache pages) are a great use for this kind of slower-than-RAM-
+but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and
+cleancache) interface to transcendent memory provides a nice way to read
+and write -- and indirectly "name" -- the pages.
+
+Frontswap -- and cleancache -- with a fairly small impact on the kernel,
+provides a huge amount of flexibility for more dynamic, flexible RAM
+utilization in various system configurations:
+
+In the single kernel case, aka "zcache", pages are compressed and
+stored in local memory, thus increasing the total anonymous pages
+that can be safely kept in RAM. Zcache essentially trades off CPU
+cycles used in compression/decompression for better memory utilization.
+Benchmarks have shown little or no impact when memory pressure is
+low while providing a significant performance improvement (25%+)
+on some workloads under high memory pressure.
+
+"RAMster" builds on zcache by adding "peer-to-peer" transcendent memory
+support for clustered systems. Frontswap pages are locally compressed
+as in zcache, but then "remotified" to another system's RAM. This
+allows RAM to be dynamically load-balanced back-and-forth as needed,
+i.e. when system A is overcommitted, it can swap to system B, and
+vice versa. RAMster can also be configured as a memory server so
+many servers in a cluster can swap, dynamically as needed, to a single
+server configured with a large amount of RAM... without pre-configuring
+how much of the RAM is available for each of the clients!
+
+In the virtual case, the whole point of virtualization is to statistically
+multiplex physical resources across the varying demands of multiple
+virtual machines. This is really hard to do with RAM and efforts to do
+it well with no kernel changes have essentially failed (except in some
+well-publicized special-case workloads).
+Specifically, the Xen Transcendent Memory backend allows otherwise
+"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple
+virtual machines, but the pages can be compressed and deduplicated to
+optimize RAM utilization. And when guest OS's are induced to surrender
+underutilized RAM (e.g. with "selfballooning"), sudden unexpected
+memory pressure may result in swapping; frontswap allows those pages
+to be swapped to and from hypervisor RAM (if overall host system memory
+conditions allow), thus mitigating the potentially awful performance impact
+of unplanned swapping.
+
+A KVM implementation is underway and has been RFC'ed to lkml. And,
+using frontswap, investigation is also underway on the use of NVM as
+a memory extension technology.
+
+2) Sure there may be performance advantages in some situations, but
+ what's the space/time overhead of frontswap?
+
+If CONFIG_FRONTSWAP is disabled, every frontswap hook compiles into
+nothingness and the only overhead is a few extra bytes per swapon'ed
+swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
+registers, there is one extra global variable compared to zero for
+every swap page read or written. If CONFIG_FRONTSWAP is enabled
+AND a frontswap backend registers AND the backend fails every "store"
+request (i.e. provides no memory despite claiming it might),
+CPU overhead is still negligible -- and since every frontswap fail
+precedes a swap page write-to-disk, the system is highly likely
+to be I/O bound and using a small fraction of a percent of a CPU
+will be irrelevant anyway.
+
+As for space, if CONFIG_FRONTSWAP is enabled AND a frontswap backend
+registers, one bit is allocated for every swap page for every swap
+device that is swapon'd. This is added to the EIGHT bits (which
+was sixteen until about 2.6.34) that the kernel already allocates
+for every swap page for every swap device that is swapon'd. (Hugh
+Dickins has observed that frontswap could probably steal one of
+the existing eight bits, but let's worry about that minor optimization
+later.) For very large swap disks (which are rare) on a standard
+4K pagesize, this is 1MB per 32GB swap.
+
+When swap pages are stored in transcendent memory instead of written
+out to disk, there is a side effect that this may create more memory
+pressure that can potentially outweigh the other advantages. A
+backend, such as zcache, must implement policies to carefully (but
+dynamically) manage memory limits to ensure this doesn't happen.
+
+3) OK, how about a quick overview of what this frontswap patch does
+ in terms that a kernel hacker can grok?
+
+Let's assume that a frontswap "backend" has registered during
+kernel initialization; this registration indicates that this
+frontswap backend has access to some "memory" that is not directly
+accessible by the kernel. Exactly how much memory it provides is
+entirely dynamic and random.
+
+Whenever a swap-device is swapon'd frontswap_init() is called,
+passing the swap device number (aka "type") as a parameter.
+This notifies frontswap to expect attempts to "store" swap pages
+associated with that number.
+
+Whenever the swap subsystem is readying a page to write to a swap
+device (c.f swap_writepage()), frontswap_store is called. Frontswap
+consults with the frontswap backend and if the backend says it does NOT
+have room, frontswap_store returns -1 and the kernel swaps the page
+to the swap device as normal. Note that the response from the frontswap
+backend is unpredictable to the kernel; it may choose to never accept a
+page, it could accept every ninth page, or it might accept every
+page. But if the backend does accept a page, the data from the page
+has already been copied and associated with the type and offset,
+and the backend guarantees the persistence of the data. In this case,
+frontswap sets a bit in the "frontswap_map" for the swap device
+corresponding to the page offset on the swap device to which it would
+otherwise have written the data.
+
+When the swap subsystem needs to swap-in a page (swap_readpage()),
+it first calls frontswap_load() which checks the frontswap_map to
+see if the page was earlier accepted by the frontswap backend. If
+it was, the page of data is filled from the frontswap backend and
+the swap-in is complete. If not, the normal swap-in code is
+executed to obtain the page of data from the real swap device.
+
+So every time the frontswap backend accepts a page, a swap device read
+and (potentially) a swap device write are replaced by a "frontswap backend
+store" and (possibly) a "frontswap backend loads", which are presumably much
+faster.
+
+4) Can't frontswap be configured as a "special" swap device that is
+ just higher priority than any real swap device (e.g. like zswap,
+ or maybe swap-over-nbd/NFS)?
+
+No. First, the existing swap subsystem doesn't allow for any kind of
+swap hierarchy. Perhaps it could be rewritten to accommodate a hierarchy,
+but this would require fairly drastic changes. Even if it were
+rewritten, the existing swap subsystem uses the block I/O layer which
+assumes a swap device is fixed size and any page in it is linearly
+addressable. Frontswap barely touches the existing swap subsystem,
+and works around the constraints of the block I/O subsystem to provide
+a great deal of flexibility and dynamicity.
+
+For example, the acceptance of any swap page by the frontswap backend is
+entirely unpredictable. This is critical to the definition of frontswap
+backends because it grants completely dynamic discretion to the
+backend. In zcache, one cannot know a priori how compressible a page is.
+"Poorly" compressible pages can be rejected, and "poorly" can itself be
+defined dynamically depending on current memory constraints.
+
+Further, frontswap is entirely synchronous whereas a real swap
+device is, by definition, asynchronous and uses block I/O. The
+block I/O layer is not only unnecessary, but may perform "optimizations"
+that are inappropriate for a RAM-oriented device including delaying
+the write of some pages for a significant amount of time. Synchrony is
+required to ensure the dynamicity of the backend and to avoid thorny race
+conditions that would unnecessarily and greatly complicate frontswap
+and/or the block I/O subsystem. That said, only the initial "store"
+and "load" operations need be synchronous. A separate asynchronous thread
+is free to manipulate the pages stored by frontswap. For example,
+the "remotification" thread in RAMster uses standard asynchronous
+kernel sockets to move compressed frontswap pages to a remote machine.
+Similarly, a KVM guest-side implementation could do in-guest compression
+and use "batched" hypercalls.
+
+In a virtualized environment, the dynamicity allows the hypervisor
+(or host OS) to do "intelligent overcommit". For example, it can
+choose to accept pages only until host-swapping might be imminent,
+then force guests to do their own swapping.
+
+There is a downside to the transcendent memory specifications for
+frontswap: Since any "store" might fail, there must always be a real
+slot on a real swap device to swap the page. Thus frontswap must be
+implemented as a "shadow" to every swapon'd device with the potential
+capability of holding every page that the swap device might have held
+and the possibility that it might hold no pages at all. This means
+that frontswap cannot contain more pages than the total of swapon'd
+swap devices. For example, if NO swap device is configured on some
+installation, frontswap is useless. Swapless portable devices
+can still use frontswap but a backend for such devices must configure
+some kind of "ghost" swap device and ensure that it is never used.
+
+5) Why this weird definition about "duplicate stores"? If a page
+ has been previously successfully stored, can't it always be
+ successfully overwritten?
+
+Nearly always it can, but no, sometimes it cannot. Consider an example
+where data is compressed and the original 4K page has been compressed
+to 1K. Now an attempt is made to overwrite the page with data that
+is non-compressible and so would take the entire 4K. But the backend
+has no more space. In this case, the store must be rejected. Whenever
+frontswap rejects a store that would overwrite, it also must invalidate
+the old data and ensure that it is no longer accessible. Since the
+swap subsystem then writes the new data to the read swap device,
+this is the correct course of action to ensure coherency.
+
+6) What is frontswap_shrink for?
+
+When the (non-frontswap) swap subsystem swaps out a page to a real
+swap device, that page is only taking up low-value pre-allocated disk
+space. But if frontswap has placed a page in transcendent memory, that
+page may be taking up valuable real estate. The frontswap_shrink
+routine allows code outside of the swap subsystem to force pages out
+of the memory managed by frontswap and back into kernel-addressable memory.
+For example, in RAMster, a "suction driver" thread will attempt
+to "repatriate" pages sent to a remote machine back to the local machine;
+this is driven using the frontswap_shrink mechanism when memory pressure
+subsides.
+
+7) Why does the frontswap patch create the new include file swapfile.h?
+
+The frontswap code depends on some swap-subsystem-internal data
+structures that have, over the years, moved back and forth between
+static and global. This seemed a reasonable compromise: Define
+them as global but declare them in a new include file that isn't
+included by the large number of source files that include swap.h.
+
+Dan Magenheimer, last updated April 9, 2012
diff --git a/Documentation/vm/highmem.txt b/Documentation/vm/highmem.txt
new file mode 100644
index 000000000..4324d24ff
--- /dev/null
+++ b/Documentation/vm/highmem.txt
@@ -0,0 +1,162 @@
+
+ ====================
+ HIGH MEMORY HANDLING
+ ====================
+
+By: Peter Zijlstra <a.p.zijlstra@chello.nl>
+
+Contents:
+
+ (*) What is high memory?
+
+ (*) Temporary virtual mappings.
+
+ (*) Using kmap_atomic.
+
+ (*) Cost of temporary mappings.
+
+ (*) i386 PAE.
+
+
+====================
+WHAT IS HIGH MEMORY?
+====================
+
+High memory (highmem) is used when the size of physical memory approaches or
+exceeds the maximum size of virtual memory. At that point it becomes
+impossible for the kernel to keep all of the available physical memory mapped
+at all times. This means the kernel needs to start using temporary mappings of
+the pieces of physical memory that it wants to access.
+
+The part of (physical) memory not covered by a permanent mapping is what we
+refer to as 'highmem'. There are various architecture dependent constraints on
+where exactly that border lies.
+
+In the i386 arch, for example, we choose to map the kernel into every process's
+VM space so that we don't have to pay the full TLB invalidation costs for
+kernel entry/exit. This means the available virtual memory space (4GiB on
+i386) has to be divided between user and kernel space.
+
+The traditional split for architectures using this approach is 3:1, 3GiB for
+userspace and the top 1GiB for kernel space:
+
+ +--------+ 0xffffffff
+ | Kernel |
+ +--------+ 0xc0000000
+ | |
+ | User |
+ | |
+ +--------+ 0x00000000
+
+This means that the kernel can at most map 1GiB of physical memory at any one
+time, but because we need virtual address space for other things - including
+temporary maps to access the rest of the physical memory - the actual direct
+map will typically be less (usually around ~896MiB).
+
+Other architectures that have mm context tagged TLBs can have separate kernel
+and user maps. Some hardware (like some ARMs), however, have limited virtual
+space when they use mm context tags.
+
+
+==========================
+TEMPORARY VIRTUAL MAPPINGS
+==========================
+
+The kernel contains several ways of creating temporary mappings:
+
+ (*) vmap(). This can be used to make a long duration mapping of multiple
+ physical pages into a contiguous virtual space. It needs global
+ synchronization to unmap.
+
+ (*) kmap(). This permits a short duration mapping of a single page. It needs
+ global synchronization, but is amortized somewhat. It is also prone to
+ deadlocks when using in a nested fashion, and so it is not recommended for
+ new code.
+
+ (*) kmap_atomic(). This permits a very short duration mapping of a single
+ page. Since the mapping is restricted to the CPU that issued it, it
+ performs well, but the issuing task is therefore required to stay on that
+ CPU until it has finished, lest some other task displace its mappings.
+
+ kmap_atomic() may also be used by interrupt contexts, since it is does not
+ sleep and the caller may not sleep until after kunmap_atomic() is called.
+
+ It may be assumed that k[un]map_atomic() won't fail.
+
+
+=================
+USING KMAP_ATOMIC
+=================
+
+When and where to use kmap_atomic() is straightforward. It is used when code
+wants to access the contents of a page that might be allocated from high memory
+(see __GFP_HIGHMEM), for example a page in the pagecache. The API has two
+functions, and they can be used in a manner similar to the following:
+
+ /* Find the page of interest. */
+ struct page *page = find_get_page(mapping, offset);
+
+ /* Gain access to the contents of that page. */
+ void *vaddr = kmap_atomic(page);
+
+ /* Do something to the contents of that page. */
+ memset(vaddr, 0, PAGE_SIZE);
+
+ /* Unmap that page. */
+ kunmap_atomic(vaddr);
+
+Note that the kunmap_atomic() call takes the result of the kmap_atomic() call
+not the argument.
+
+If you need to map two pages because you want to copy from one page to
+another you need to keep the kmap_atomic calls strictly nested, like:
+
+ vaddr1 = kmap_atomic(page1);
+ vaddr2 = kmap_atomic(page2);
+
+ memcpy(vaddr1, vaddr2, PAGE_SIZE);
+
+ kunmap_atomic(vaddr2);
+ kunmap_atomic(vaddr1);
+
+
+==========================
+COST OF TEMPORARY MAPPINGS
+==========================
+
+The cost of creating temporary mappings can be quite high. The arch has to
+manipulate the kernel's page tables, the data TLB and/or the MMU's registers.
+
+If CONFIG_HIGHMEM is not set, then the kernel will try and create a mapping
+simply with a bit of arithmetic that will convert the page struct address into
+a pointer to the page contents rather than juggling mappings about. In such a
+case, the unmap operation may be a null operation.
+
+If CONFIG_MMU is not set, then there can be no temporary mappings and no
+highmem. In such a case, the arithmetic approach will also be used.
+
+
+========
+i386 PAE
+========
+
+The i386 arch, under some circumstances, will permit you to stick up to 64GiB
+of RAM into your 32-bit machine. This has a number of consequences:
+
+ (*) Linux needs a page-frame structure for each page in the system and the
+ pageframes need to live in the permanent mapping, which means:
+
+ (*) you can have 896M/sizeof(struct page) page-frames at most; with struct
+ page being 32-bytes that would end up being something in the order of 112G
+ worth of pages; the kernel, however, needs to store more than just
+ page-frames in that memory...
+
+ (*) PAE makes your page tables larger - which slows the system down as more
+ data has to be accessed to traverse in TLB fills and the like. One
+ advantage is that PAE has more PTE bits and can provide advanced features
+ like NX and PAT.
+
+The general recommendation is that you don't use more than 8GiB on a 32-bit
+machine - although more might work for you and your workload, you're pretty
+much on your own - don't expect kernel developers to really care much if things
+come apart.
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
new file mode 100644
index 000000000..030977fb8
--- /dev/null
+++ b/Documentation/vm/hugetlbpage.txt
@@ -0,0 +1,335 @@
+
+The intent of this file is to give a brief summary of hugetlbpage support in
+the Linux kernel. This support is built on top of multiple page size support
+that is provided by most modern architectures. For example, x86 CPUs normally
+support 4K and 2M (1G if architecturally supported) page sizes, ia64
+architecture supports multiple page sizes 4K, 8K, 64K, 256K, 1M, 4M, 16M,
+256M and ppc64 supports 4K and 16M. A TLB is a cache of virtual-to-physical
+translations. Typically this is a very scarce resource on processor.
+Operating systems try to make best use of limited number of TLB resources.
+This optimization is more critical now as bigger and bigger physical memories
+(several GBs) are more readily available.
+
+Users can use the huge page support in Linux kernel by either using the mmap
+system call or standard SYSV shared memory system calls (shmget, shmat).
+
+First the Linux kernel needs to be built with the CONFIG_HUGETLBFS
+(present under "File systems") and CONFIG_HUGETLB_PAGE (selected
+automatically when CONFIG_HUGETLBFS is selected) configuration
+options.
+
+The /proc/meminfo file provides information about the total number of
+persistent hugetlb pages in the kernel's huge page pool. It also displays
+information about the number of free, reserved and surplus huge pages and the
+default huge page size. The huge page size is needed for generating the
+proper alignment and size of the arguments to system calls that map huge page
+regions.
+
+The output of "cat /proc/meminfo" will include lines like:
+
+.....
+HugePages_Total: vvv
+HugePages_Free: www
+HugePages_Rsvd: xxx
+HugePages_Surp: yyy
+Hugepagesize: zzz kB
+
+where:
+HugePages_Total is the size of the pool of huge pages.
+HugePages_Free is the number of huge pages in the pool that are not yet
+ allocated.
+HugePages_Rsvd is short for "reserved," and is the number of huge pages for
+ which a commitment to allocate from the pool has been made,
+ but no allocation has yet been made. Reserved huge pages
+ guarantee that an application will be able to allocate a
+ huge page from the pool of huge pages at fault time.
+HugePages_Surp is short for "surplus," and is the number of huge pages in
+ the pool above the value in /proc/sys/vm/nr_hugepages. The
+ maximum number of surplus huge pages is controlled by
+ /proc/sys/vm/nr_overcommit_hugepages.
+
+/proc/filesystems should also show a filesystem of type "hugetlbfs" configured
+in the kernel.
+
+/proc/sys/vm/nr_hugepages indicates the current number of "persistent" huge
+pages in the kernel's huge page pool. "Persistent" huge pages will be
+returned to the huge page pool when freed by a task. A user with root
+privileges can dynamically allocate more or free some persistent huge pages
+by increasing or decreasing the value of 'nr_hugepages'.
+
+Pages that are used as huge pages are reserved inside the kernel and cannot
+be used for other purposes. Huge pages cannot be swapped out under
+memory pressure.
+
+Once a number of huge pages have been pre-allocated to the kernel huge page
+pool, a user with appropriate privilege can use either the mmap system call
+or shared memory system calls to use the huge pages. See the discussion of
+Using Huge Pages, below.
+
+The administrator can allocate persistent huge pages on the kernel boot
+command line by specifying the "hugepages=N" parameter, where 'N' = the
+number of huge pages requested. This is the most reliable method of
+allocating huge pages as memory has not yet become fragmented.
+
+Some platforms support multiple huge page sizes. To allocate huge pages
+of a specific size, one must precede the huge pages boot command parameters
+with a huge page size selection parameter "hugepagesz=<size>". <size> must
+be specified in bytes with optional scale suffix [kKmMgG]. The default huge
+page size may be selected with the "default_hugepagesz=<size>" boot parameter.
+
+When multiple huge page sizes are supported, /proc/sys/vm/nr_hugepages
+indicates the current number of pre-allocated huge pages of the default size.
+Thus, one can use the following command to dynamically allocate/deallocate
+default sized persistent huge pages:
+
+ echo 20 > /proc/sys/vm/nr_hugepages
+
+This command will try to adjust the number of default sized huge pages in the
+huge page pool to 20, allocating or freeing huge pages, as required.
+
+On a NUMA platform, the kernel will attempt to distribute the huge page pool
+over all the set of allowed nodes specified by the NUMA memory policy of the
+task that modifies nr_hugepages. The default for the allowed nodes--when the
+task has default memory policy--is all on-line nodes with memory. Allowed
+nodes with insufficient available, contiguous memory for a huge page will be
+silently skipped when allocating persistent huge pages. See the discussion
+below of the interaction of task memory policy, cpusets and per node attributes
+with the allocation and freeing of persistent huge pages.
+
+The success or failure of huge page allocation depends on the amount of
+physically contiguous memory that is present in system at the time of the
+allocation attempt. If the kernel is unable to allocate huge pages from
+some nodes in a NUMA system, it will attempt to make up the difference by
+allocating extra pages on other nodes with sufficient available contiguous
+memory, if any.
+
+System administrators may want to put this command in one of the local rc
+init files. This will enable the kernel to allocate huge pages early in
+the boot process when the possibility of getting physical contiguous pages
+is still very high. Administrators can verify the number of huge pages
+actually allocated by checking the sysctl or meminfo. To check the per node
+distribution of huge pages in a NUMA system, use:
+
+ cat /sys/devices/system/node/node*/meminfo | fgrep Huge
+
+/proc/sys/vm/nr_overcommit_hugepages specifies how large the pool of
+huge pages can grow, if more huge pages than /proc/sys/vm/nr_hugepages are
+requested by applications. Writing any non-zero value into this file
+indicates that the hugetlb subsystem is allowed to try to obtain that
+number of "surplus" huge pages from the kernel's normal page pool, when the
+persistent huge page pool is exhausted. As these surplus huge pages become
+unused, they are freed back to the kernel's normal page pool.
+
+When increasing the huge page pool size via nr_hugepages, any existing surplus
+pages will first be promoted to persistent huge pages. Then, additional
+huge pages will be allocated, if necessary and if possible, to fulfill
+the new persistent huge page pool size.
+
+The administrator may shrink the pool of persistent huge pages for
+the default huge page size by setting the nr_hugepages sysctl to a
+smaller value. The kernel will attempt to balance the freeing of huge pages
+across all nodes in the memory policy of the task modifying nr_hugepages.
+Any free huge pages on the selected nodes will be freed back to the kernel's
+normal page pool.
+
+Caveat: Shrinking the persistent huge page pool via nr_hugepages such that
+it becomes less than the number of huge pages in use will convert the balance
+of the in-use huge pages to surplus huge pages. This will occur even if
+the number of surplus pages it would exceed the overcommit value. As long as
+this condition holds--that is, until nr_hugepages+nr_overcommit_hugepages is
+increased sufficiently, or the surplus huge pages go out of use and are freed--
+no more surplus huge pages will be allowed to be allocated.
+
+With support for multiple huge page pools at run-time available, much of
+the huge page userspace interface in /proc/sys/vm has been duplicated in sysfs.
+The /proc interfaces discussed above have been retained for backwards
+compatibility. The root huge page control directory in sysfs is:
+
+ /sys/kernel/mm/hugepages
+
+For each huge page size supported by the running kernel, a subdirectory
+will exist, of the form:
+
+ hugepages-${size}kB
+
+Inside each of these directories, the same set of files will exist:
+
+ nr_hugepages
+ nr_hugepages_mempolicy
+ nr_overcommit_hugepages
+ free_hugepages
+ resv_hugepages
+ surplus_hugepages
+
+which function as described above for the default huge page-sized case.
+
+
+Interaction of Task Memory Policy with Huge Page Allocation/Freeing
+===================================================================
+
+Whether huge pages are allocated and freed via the /proc interface or
+the /sysfs interface using the nr_hugepages_mempolicy attribute, the NUMA
+nodes from which huge pages are allocated or freed are controlled by the
+NUMA memory policy of the task that modifies the nr_hugepages_mempolicy
+sysctl or attribute. When the nr_hugepages attribute is used, mempolicy
+is ignored.
+
+The recommended method to allocate or free huge pages to/from the kernel
+huge page pool, using the nr_hugepages example above, is:
+
+ numactl --interleave <node-list> echo 20 \
+ >/proc/sys/vm/nr_hugepages_mempolicy
+
+or, more succinctly:
+
+ numactl -m <node-list> echo 20 >/proc/sys/vm/nr_hugepages_mempolicy
+
+This will allocate or free abs(20 - nr_hugepages) to or from the nodes
+specified in <node-list>, depending on whether number of persistent huge pages
+is initially less than or greater than 20, respectively. No huge pages will be
+allocated nor freed on any node not included in the specified <node-list>.
+
+When adjusting the persistent hugepage count via nr_hugepages_mempolicy, any
+memory policy mode--bind, preferred, local or interleave--may be used. The
+resulting effect on persistent huge page allocation is as follows:
+
+1) Regardless of mempolicy mode [see Documentation/vm/numa_memory_policy.txt],
+ persistent huge pages will be distributed across the node or nodes
+ specified in the mempolicy as if "interleave" had been specified.
+ However, if a node in the policy does not contain sufficient contiguous
+ memory for a huge page, the allocation will not "fallback" to the nearest
+ neighbor node with sufficient contiguous memory. To do this would cause
+ undesirable imbalance in the distribution of the huge page pool, or
+ possibly, allocation of persistent huge pages on nodes not allowed by
+ the task's memory policy.
+
+2) One or more nodes may be specified with the bind or interleave policy.
+ If more than one node is specified with the preferred policy, only the
+ lowest numeric id will be used. Local policy will select the node where
+ the task is running at the time the nodes_allowed mask is constructed.
+ For local policy to be deterministic, the task must be bound to a cpu or
+ cpus in a single node. Otherwise, the task could be migrated to some
+ other node at any time after launch and the resulting node will be
+ indeterminate. Thus, local policy is not very useful for this purpose.
+ Any of the other mempolicy modes may be used to specify a single node.
+
+3) The nodes allowed mask will be derived from any non-default task mempolicy,
+ whether this policy was set explicitly by the task itself or one of its
+ ancestors, such as numactl. This means that if the task is invoked from a
+ shell with non-default policy, that policy will be used. One can specify a
+ node list of "all" with numactl --interleave or --membind [-m] to achieve
+ interleaving over all nodes in the system or cpuset.
+
+4) Any task mempolicy specifed--e.g., using numactl--will be constrained by
+ the resource limits of any cpuset in which the task runs. Thus, there will
+ be no way for a task with non-default policy running in a cpuset with a
+ subset of the system nodes to allocate huge pages outside the cpuset
+ without first moving to a cpuset that contains all of the desired nodes.
+
+5) Boot-time huge page allocation attempts to distribute the requested number
+ of huge pages over all on-lines nodes with memory.
+
+Per Node Hugepages Attributes
+=============================
+
+A subset of the contents of the root huge page control directory in sysfs,
+described above, will be replicated under each the system device of each
+NUMA node with memory in:
+
+ /sys/devices/system/node/node[0-9]*/hugepages/
+
+Under this directory, the subdirectory for each supported huge page size
+contains the following attribute files:
+
+ nr_hugepages
+ free_hugepages
+ surplus_hugepages
+
+The free_' and surplus_' attribute files are read-only. They return the number
+of free and surplus [overcommitted] huge pages, respectively, on the parent
+node.
+
+The nr_hugepages attribute returns the total number of huge pages on the
+specified node. When this attribute is written, the number of persistent huge
+pages on the parent node will be adjusted to the specified value, if sufficient
+resources exist, regardless of the task's mempolicy or cpuset constraints.
+
+Note that the number of overcommit and reserve pages remain global quantities,
+as we don't know until fault time, when the faulting task's mempolicy is
+applied, from which node the huge page allocation will be attempted.
+
+
+Using Huge Pages
+================
+
+If the user applications are going to request huge pages using mmap system
+call, then it is required that system administrator mount a file system of
+type hugetlbfs:
+
+ mount -t hugetlbfs \
+ -o uid=<value>,gid=<value>,mode=<value>,pagesize=<value>,size=<value>,\
+ min_size=<value>,nr_inodes=<value> none /mnt/huge
+
+This command mounts a (pseudo) filesystem of type hugetlbfs on the directory
+/mnt/huge. Any files created on /mnt/huge uses huge pages. The uid and gid
+options sets the owner and group of the root of the file system. By default
+the uid and gid of the current process are taken. The mode option sets the
+mode of root of file system to value & 01777. This value is given in octal.
+By default the value 0755 is picked. If the paltform supports multiple huge
+page sizes, the pagesize option can be used to specify the huge page size and
+associated pool. pagesize is specified in bytes. If pagesize is not specified
+the paltform's default huge page size and associated pool will be used. The
+size option sets the maximum value of memory (huge pages) allowed for that
+filesystem (/mnt/huge). The size option can be specified in bytes, or as a
+percentage of the specified huge page pool (nr_hugepages). The size is
+rounded down to HPAGE_SIZE boundary. The min_size option sets the minimum
+value of memory (huge pages) allowed for the filesystem. min_size can be
+specified in the same way as size, either bytes or a percentage of the
+huge page pool. At mount time, the number of huge pages specified by
+min_size are reserved for use by the filesystem. If there are not enough
+free huge pages available, the mount will fail. As huge pages are allocated
+to the filesystem and freed, the reserve count is adjusted so that the sum
+of allocated and reserved huge pages is always at least min_size. The option
+nr_inodes sets the maximum number of inodes that /mnt/huge can use. If the
+size, min_size or nr_inodes option is not provided on command line then
+no limits are set. For pagesize, size, min_size and nr_inodes options, you
+can use [G|g]/[M|m]/[K|k] to represent giga/mega/kilo. For example, size=2K
+has the same meaning as size=2048.
+
+While read system calls are supported on files that reside on hugetlb
+file systems, write system calls are not.
+
+Regular chown, chgrp, and chmod commands (with right permissions) could be
+used to change the file attributes on hugetlbfs.
+
+Also, it is important to note that no such mount command is required if
+applications are going to use only shmat/shmget system calls or mmap with
+MAP_HUGETLB. For an example of how to use mmap with MAP_HUGETLB see map_hugetlb
+below.
+
+Users who wish to use hugetlb memory via shared memory segment should be a
+member of a supplementary group and system admin needs to configure that gid
+into /proc/sys/vm/hugetlb_shm_group. It is possible for same or different
+applications to use any combination of mmaps and shm* calls, though the mount of
+filesystem will be required for using mmap calls without MAP_HUGETLB.
+
+Syscalls that operate on memory backed by hugetlb pages only have their lengths
+aligned to the native page size of the processor; they will normally fail with
+errno set to EINVAL or exclude hugetlb pages that extend beyond the length if
+not hugepage aligned. For example, munmap(2) will fail if memory is backed by
+a hugetlb page and the length is smaller than the hugepage size.
+
+
+Examples
+========
+
+1) map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c
+
+2) hugepage-shm: see tools/testing/selftests/vm/hugepage-shm.c
+
+3) hugepage-mmap: see tools/testing/selftests/vm/hugepage-mmap.c
+
+4) The libhugetlbfs (http://libhugetlbfs.sourceforge.net) library provides a
+ wide range of userspace tools to help with huge page usability, environment
+ setup, and control. Furthermore it provides useful test cases that should be
+ used when modifying code to ensure no regressions are introduced.
diff --git a/Documentation/vm/hwpoison.txt b/Documentation/vm/hwpoison.txt
new file mode 100644
index 000000000..6ae89a9ed
--- /dev/null
+++ b/Documentation/vm/hwpoison.txt
@@ -0,0 +1,187 @@
+What is hwpoison?
+
+Upcoming Intel CPUs have support for recovering from some memory errors
+(``MCA recovery''). This requires the OS to declare a page "poisoned",
+kill the processes associated with it and avoid using it in the future.
+
+This patchkit implements the necessary infrastructure in the VM.
+
+To quote the overview comment:
+
+ * High level machine check handler. Handles pages reported by the
+ * hardware as being corrupted usually due to a 2bit ECC memory or cache
+ * failure.
+ *
+ * This focusses on pages detected as corrupted in the background.
+ * When the current CPU tries to consume corruption the currently
+ * running process can just be killed directly instead. This implies
+ * that if the error cannot be handled for some reason it's safe to
+ * just ignore it because no corruption has been consumed yet. Instead
+ * when that happens another machine check will happen.
+ *
+ * Handles page cache pages in various states. The tricky part
+ * here is that we can access any page asynchronous to other VM
+ * users, because memory failures could happen anytime and anywhere,
+ * possibly violating some of their assumptions. This is why this code
+ * has to be extremely careful. Generally it tries to use normal locking
+ * rules, as in get the standard locks, even if that means the
+ * error handling takes potentially a long time.
+ *
+ * Some of the operations here are somewhat inefficient and have non
+ * linear algorithmic complexity, because the data structures have not
+ * been optimized for this case. This is in particular the case
+ * for the mapping from a vma to a process. Since this case is expected
+ * to be rare we hope we can get away with this.
+
+The code consists of a the high level handler in mm/memory-failure.c,
+a new page poison bit and various checks in the VM to handle poisoned
+pages.
+
+The main target right now is KVM guests, but it works for all kinds
+of applications. KVM support requires a recent qemu-kvm release.
+
+For the KVM use there was need for a new signal type so that
+KVM can inject the machine check into the guest with the proper
+address. This in theory allows other applications to handle
+memory failures too. The expection is that near all applications
+won't do that, but some very specialized ones might.
+
+---
+
+There are two (actually three) modi memory failure recovery can be in:
+
+vm.memory_failure_recovery sysctl set to zero:
+ All memory failures cause a panic. Do not attempt recovery.
+ (on x86 this can be also affected by the tolerant level of the
+ MCE subsystem)
+
+early kill
+ (can be controlled globally and per process)
+ Send SIGBUS to the application as soon as the error is detected
+ This allows applications who can process memory errors in a gentle
+ way (e.g. drop affected object)
+ This is the mode used by KVM qemu.
+
+late kill
+ Send SIGBUS when the application runs into the corrupted page.
+ This is best for memory error unaware applications and default
+ Note some pages are always handled as late kill.
+
+---
+
+User control:
+
+vm.memory_failure_recovery
+ See sysctl.txt
+
+vm.memory_failure_early_kill
+ Enable early kill mode globally
+
+PR_MCE_KILL
+ Set early/late kill mode/revert to system default
+ arg1: PR_MCE_KILL_CLEAR: Revert to system default
+ arg1: PR_MCE_KILL_SET: arg2 defines thread specific mode
+ PR_MCE_KILL_EARLY: Early kill
+ PR_MCE_KILL_LATE: Late kill
+ PR_MCE_KILL_DEFAULT: Use system global default
+ Note that if you want to have a dedicated thread which handles
+ the SIGBUS(BUS_MCEERR_AO) on behalf of the process, you should
+ call prctl(PR_MCE_KILL_EARLY) on the designated thread. Otherwise,
+ the SIGBUS is sent to the main thread.
+
+PR_MCE_KILL_GET
+ return current mode
+
+
+---
+
+Testing:
+
+madvise(MADV_HWPOISON, ....)
+ (as root)
+ Poison a page in the process for testing
+
+
+hwpoison-inject module through debugfs
+
+/sys/debug/hwpoison/
+
+corrupt-pfn
+
+Inject hwpoison fault at PFN echoed into this file. This does
+some early filtering to avoid corrupted unintended pages in test suites.
+
+unpoison-pfn
+
+Software-unpoison page at PFN echoed into this file. This
+way a page can be reused again.
+This only works for Linux injected failures, not for real
+memory failures.
+
+Note these injection interfaces are not stable and might change between
+kernel versions
+
+corrupt-filter-dev-major
+corrupt-filter-dev-minor
+
+Only handle memory failures to pages associated with the file system defined
+by block device major/minor. -1U is the wildcard value.
+This should be only used for testing with artificial injection.
+
+corrupt-filter-memcg
+
+Limit injection to pages owned by memgroup. Specified by inode number
+of the memcg.
+
+Example:
+ mkdir /sys/fs/cgroup/mem/hwpoison
+
+ usemem -m 100 -s 1000 &
+ echo `jobs -p` > /sys/fs/cgroup/mem/hwpoison/tasks
+
+ memcg_ino=$(ls -id /sys/fs/cgroup/mem/hwpoison | cut -f1 -d' ')
+ echo $memcg_ino > /debug/hwpoison/corrupt-filter-memcg
+
+ page-types -p `pidof init` --hwpoison # shall do nothing
+ page-types -p `pidof usemem` --hwpoison # poison its pages
+
+corrupt-filter-flags-mask
+corrupt-filter-flags-value
+
+When specified, only poison pages if ((page_flags & mask) == value).
+This allows stress testing of many kinds of pages. The page_flags
+are the same as in /proc/kpageflags. The flag bits are defined in
+include/linux/kernel-page-flags.h and documented in
+Documentation/vm/pagemap.txt
+
+Architecture specific MCE injector
+
+x86 has mce-inject, mce-test
+
+Some portable hwpoison test programs in mce-test, see blow.
+
+---
+
+References:
+
+http://halobates.de/mce-lc09-2.pdf
+ Overview presentation from LinuxCon 09
+
+git://git.kernel.org/pub/scm/utils/cpu/mce/mce-test.git
+ Test suite (hwpoison specific portable tests in tsrc)
+
+git://git.kernel.org/pub/scm/utils/cpu/mce/mce-inject.git
+ x86 specific injector
+
+
+---
+
+Limitations:
+
+- Not all page types are supported and never will. Most kernel internal
+objects cannot be recovered, only LRU pages for now.
+- Right now hugepage support is missing.
+
+---
+Andi Kleen, Oct 2009
+
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
new file mode 100644
index 000000000..f34a8ee6f
--- /dev/null
+++ b/Documentation/vm/ksm.txt
@@ -0,0 +1,97 @@
+How to use the Kernel Samepage Merging feature
+----------------------------------------------
+
+KSM is a memory-saving de-duplication feature, enabled by CONFIG_KSM=y,
+added to the Linux kernel in 2.6.32. See mm/ksm.c for its implementation,
+and http://lwn.net/Articles/306704/ and http://lwn.net/Articles/330589/
+
+The KSM daemon ksmd periodically scans those areas of user memory which
+have been registered with it, looking for pages of identical content which
+can be replaced by a single write-protected page (which is automatically
+copied if a process later wants to update its content).
+
+KSM was originally developed for use with KVM (where it was known as
+Kernel Shared Memory), to fit more virtual machines into physical memory,
+by sharing the data common between them. But it can be useful to any
+application which generates many instances of the same data.
+
+KSM only merges anonymous (private) pages, never pagecache (file) pages.
+KSM's merged pages were originally locked into kernel memory, but can now
+be swapped out just like other user pages (but sharing is broken when they
+are swapped back in: ksmd must rediscover their identity and merge again).
+
+KSM only operates on those areas of address space which an application
+has advised to be likely candidates for merging, by using the madvise(2)
+system call: int madvise(addr, length, MADV_MERGEABLE).
+
+The app may call int madvise(addr, length, MADV_UNMERGEABLE) to cancel
+that advice and restore unshared pages: whereupon KSM unmerges whatever
+it merged in that range. Note: this unmerging call may suddenly require
+more memory than is available - possibly failing with EAGAIN, but more
+probably arousing the Out-Of-Memory killer.
+
+If KSM is not configured into the running kernel, madvise MADV_MERGEABLE
+and MADV_UNMERGEABLE simply fail with EINVAL. If the running kernel was
+built with CONFIG_KSM=y, those calls will normally succeed: even if the
+the KSM daemon is not currently running, MADV_MERGEABLE still registers
+the range for whenever the KSM daemon is started; even if the range
+cannot contain any pages which KSM could actually merge; even if
+MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
+
+Like other madvise calls, they are intended for use on mapped areas of
+the user address space: they will report ENOMEM if the specified range
+includes unmapped gaps (though working on the intervening mapped areas),
+and might fail with EAGAIN if not enough memory for internal structures.
+
+Applications should be considerate in their use of MADV_MERGEABLE,
+restricting its use to areas likely to benefit. KSM's scans may use a lot
+of processing power: some installations will disable KSM for that reason.
+
+The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
+readable by all but writable only by root:
+
+pages_to_scan - how many present pages to scan before ksmd goes to sleep
+ e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan"
+ Default: 100 (chosen for demonstration purposes)
+
+sleep_millisecs - how many milliseconds ksmd should sleep before next scan
+ e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
+ Default: 20 (chosen for demonstration purposes)
+
+merge_across_nodes - specifies if pages from different numa nodes can be merged.
+ When set to 0, ksm merges only pages which physically
+ reside in the memory area of same NUMA node. That brings
+ lower latency to access of shared pages. Systems with more
+ nodes, at significant NUMA distances, are likely to benefit
+ from the lower latency of setting 0. Smaller systems, which
+ need to minimize memory usage, are likely to benefit from
+ the greater sharing of setting 1 (default). You may wish to
+ compare how your system performs under each setting, before
+ deciding on which to use. merge_across_nodes setting can be
+ changed only when there are no ksm shared pages in system:
+ set run 2 to unmerge pages first, then to 1 after changing
+ merge_across_nodes, to remerge according to the new setting.
+ Default: 1 (merging across nodes as in earlier releases)
+
+run - set 0 to stop ksmd from running but keep merged pages,
+ set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
+ set 2 to stop ksmd and unmerge all pages currently merged,
+ but leave mergeable areas registered for next run
+ Default: 0 (must be changed to 1 to activate KSM,
+ except if CONFIG_SYSFS is disabled)
+
+The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
+
+pages_shared - how many shared pages are being used
+pages_sharing - how many more sites are sharing them i.e. how much saved
+pages_unshared - how many pages unique but repeatedly checked for merging
+pages_volatile - how many pages changing too fast to be placed in a tree
+full_scans - how many times all mergeable areas have been scanned
+
+A high ratio of pages_sharing to pages_shared indicates good sharing, but
+a high ratio of pages_unshared to pages_sharing indicates wasted effort.
+pages_volatile embraces several different kinds of activity, but a high
+proportion there would also indicate poor use of madvise MADV_MERGEABLE.
+
+Izik Eidus,
+Hugh Dickins, 17 Nov 2009
diff --git a/Documentation/vm/numa b/Documentation/vm/numa
new file mode 100644
index 000000000..ade012742
--- /dev/null
+++ b/Documentation/vm/numa
@@ -0,0 +1,149 @@
+Started Nov 1999 by Kanoj Sarcar <kanoj@sgi.com>
+
+What is NUMA?
+
+This question can be answered from a couple of perspectives: the
+hardware view and the Linux software view.
+
+From the hardware perspective, a NUMA system is a computer platform that
+comprises multiple components or assemblies each of which may contain 0
+or more CPUs, local memory, and/or IO buses. For brevity and to
+disambiguate the hardware view of these physical components/assemblies
+from the software abstraction thereof, we'll call the components/assemblies
+'cells' in this document.
+
+Each of the 'cells' may be viewed as an SMP [symmetric multi-processor] subset
+of the system--although some components necessary for a stand-alone SMP system
+may not be populated on any given cell. The cells of the NUMA system are
+connected together with some sort of system interconnect--e.g., a crossbar or
+point-to-point link are common types of NUMA system interconnects. Both of
+these types of interconnects can be aggregated to create NUMA platforms with
+cells at multiple distances from other cells.
+
+For Linux, the NUMA platforms of interest are primarily what is known as Cache
+Coherent NUMA or ccNUMA systems. With ccNUMA systems, all memory is visible
+to and accessible from any CPU attached to any cell and cache coherency
+is handled in hardware by the processor caches and/or the system interconnect.
+
+Memory access time and effective memory bandwidth varies depending on how far
+away the cell containing the CPU or IO bus making the memory access is from the
+cell containing the target memory. For example, access to memory by CPUs
+attached to the same cell will experience faster access times and higher
+bandwidths than accesses to memory on other, remote cells. NUMA platforms
+can have cells at multiple remote distances from any given cell.
+
+Platform vendors don't build NUMA systems just to make software developers'
+lives interesting. Rather, this architecture is a means to provide scalable
+memory bandwidth. However, to achieve scalable memory bandwidth, system and
+application software must arrange for a large majority of the memory references
+[cache misses] to be to "local" memory--memory on the same cell, if any--or
+to the closest cell with memory.
+
+This leads to the Linux software view of a NUMA system:
+
+Linux divides the system's hardware resources into multiple software
+abstractions called "nodes". Linux maps the nodes onto the physical cells
+of the hardware platform, abstracting away some of the details for some
+architectures. As with physical cells, software nodes may contain 0 or more
+CPUs, memory and/or IO buses. And, again, memory accesses to memory on
+"closer" nodes--nodes that map to closer cells--will generally experience
+faster access times and higher effective bandwidth than accesses to more
+remote cells.
+
+For some architectures, such as x86, Linux will "hide" any node representing a
+physical cell that has no memory attached, and reassign any CPUs attached to
+that cell to a node representing a cell that does have memory. Thus, on
+these architectures, one cannot assume that all CPUs that Linux associates with
+a given node will see the same local memory access times and bandwidth.
+
+In addition, for some architectures, again x86 is an example, Linux supports
+the emulation of additional nodes. For NUMA emulation, linux will carve up
+the existing nodes--or the system memory for non-NUMA platforms--into multiple
+nodes. Each emulated node will manage a fraction of the underlying cells'
+physical memory. NUMA emluation is useful for testing NUMA kernel and
+application features on non-NUMA platforms, and as a sort of memory resource
+management mechanism when used together with cpusets.
+[see Documentation/cgroups/cpusets.txt]
+
+For each node with memory, Linux constructs an independent memory management
+subsystem, complete with its own free page lists, in-use page lists, usage
+statistics and locks to mediate access. In addition, Linux constructs for
+each memory zone [one or more of DMA, DMA32, NORMAL, HIGH_MEMORY, MOVABLE],
+an ordered "zonelist". A zonelist specifies the zones/nodes to visit when a
+selected zone/node cannot satisfy the allocation request. This situation,
+when a zone has no available memory to satisfy a request, is called
+"overflow" or "fallback".
+
+Because some nodes contain multiple zones containing different types of
+memory, Linux must decide whether to order the zonelists such that allocations
+fall back to the same zone type on a different node, or to a different zone
+type on the same node. This is an important consideration because some zones,
+such as DMA or DMA32, represent relatively scarce resources. Linux chooses
+a default zonelist order based on the sizes of the various zone types relative
+to the total memory of the node and the total memory of the system. The
+default zonelist order may be overridden using the numa_zonelist_order kernel
+boot parameter or sysctl. [see Documentation/kernel-parameters.txt and
+Documentation/sysctl/vm.txt]
+
+By default, Linux will attempt to satisfy memory allocation requests from the
+node to which the CPU that executes the request is assigned. Specifically,
+Linux will attempt to allocate from the first node in the appropriate zonelist
+for the node where the request originates. This is called "local allocation."
+If the "local" node cannot satisfy the request, the kernel will examine other
+nodes' zones in the selected zonelist looking for the first zone in the list
+that can satisfy the request.
+
+Local allocation will tend to keep subsequent access to the allocated memory
+"local" to the underlying physical resources and off the system interconnect--
+as long as the task on whose behalf the kernel allocated some memory does not
+later migrate away from that memory. The Linux scheduler is aware of the
+NUMA topology of the platform--embodied in the "scheduling domains" data
+structures [see Documentation/scheduler/sched-domains.txt]--and the scheduler
+attempts to minimize task migration to distant scheduling domains. However,
+the scheduler does not take a task's NUMA footprint into account directly.
+Thus, under sufficient imbalance, tasks can migrate between nodes, remote
+from their initial node and kernel data structures.
+
+System administrators and application designers can restrict a task's migration
+to improve NUMA locality using various CPU affinity command line interfaces,
+such as taskset(1) and numactl(1), and program interfaces such as
+sched_setaffinity(2). Further, one can modify the kernel's default local
+allocation behavior using Linux NUMA memory policy.
+[see Documentation/vm/numa_memory_policy.txt.]
+
+System administrators can restrict the CPUs and nodes' memories that a non-
+privileged user can specify in the scheduling or NUMA commands and functions
+using control groups and CPUsets. [see Documentation/cgroups/cpusets.txt]
+
+On architectures that do not hide memoryless nodes, Linux will include only
+zones [nodes] with memory in the zonelists. This means that for a memoryless
+node the "local memory node"--the node of the first zone in CPU's node's
+zonelist--will not be the node itself. Rather, it will be the node that the
+kernel selected as the nearest node with memory when it built the zonelists.
+So, default, local allocations will succeed with the kernel supplying the
+closest available memory. This is a consequence of the same mechanism that
+allows such allocations to fallback to other nearby nodes when a node that
+does contain memory overflows.
+
+Some kernel allocations do not want or cannot tolerate this allocation fallback
+behavior. Rather they want to be sure they get memory from the specified node
+or get notified that the node has no free memory. This is usually the case when
+a subsystem allocates per CPU memory resources, for example.
+
+A typical model for making such an allocation is to obtain the node id of the
+node to which the "current CPU" is attached using one of the kernel's
+numa_node_id() or CPU_to_node() functions and then request memory from only
+the node id returned. When such an allocation fails, the requesting subsystem
+may revert to its own fallback path. The slab kernel memory allocator is an
+example of this. Or, the subsystem may choose to disable or not to enable
+itself on allocation failure. The kernel profiling subsystem is an example of
+this.
+
+If the architecture supports--does not hide--memoryless nodes, then CPUs
+attached to memoryless nodes would always incur the fallback path overhead
+or some subsystems would fail to initialize if they attempted to allocated
+memory exclusively from a node without memory. To support such
+architectures transparently, kernel subsystems can use the numa_mem_id()
+or cpu_to_mem() function to locate the "local memory node" for the calling or
+specified CPU. Again, this is the same node from which default, local page
+allocations will be attempted.
diff --git a/Documentation/vm/numa_memory_policy.txt b/Documentation/vm/numa_memory_policy.txt
new file mode 100644
index 000000000..badb05076
--- /dev/null
+++ b/Documentation/vm/numa_memory_policy.txt
@@ -0,0 +1,452 @@
+
+What is Linux Memory Policy?
+
+In the Linux kernel, "memory policy" determines from which node the kernel will
+allocate memory in a NUMA system or in an emulated NUMA system. Linux has
+supported platforms with Non-Uniform Memory Access architectures since 2.4.?.
+The current memory policy support was added to Linux 2.6 around May 2004. This
+document attempts to describe the concepts and APIs of the 2.6 memory policy
+support.
+
+Memory policies should not be confused with cpusets
+(Documentation/cgroups/cpusets.txt)
+which is an administrative mechanism for restricting the nodes from which
+memory may be allocated by a set of processes. Memory policies are a
+programming interface that a NUMA-aware application can take advantage of. When
+both cpusets and policies are applied to a task, the restrictions of the cpuset
+takes priority. See "MEMORY POLICIES AND CPUSETS" below for more details.
+
+MEMORY POLICY CONCEPTS
+
+Scope of Memory Policies
+
+The Linux kernel supports _scopes_ of memory policy, described here from
+most general to most specific:
+
+ System Default Policy: this policy is "hard coded" into the kernel. It
+ is the policy that governs all page allocations that aren't controlled
+ by one of the more specific policy scopes discussed below. When the
+ system is "up and running", the system default policy will use "local
+ allocation" described below. However, during boot up, the system
+ default policy will be set to interleave allocations across all nodes
+ with "sufficient" memory, so as not to overload the initial boot node
+ with boot-time allocations.
+
+ Task/Process Policy: this is an optional, per-task policy. When defined
+ for a specific task, this policy controls all page allocations made by or
+ on behalf of the task that aren't controlled by a more specific scope.
+ If a task does not define a task policy, then all page allocations that
+ would have been controlled by the task policy "fall back" to the System
+ Default Policy.
+
+ The task policy applies to the entire address space of a task. Thus,
+ it is inheritable, and indeed is inherited, across both fork()
+ [clone() w/o the CLONE_VM flag] and exec*(). This allows a parent task
+ to establish the task policy for a child task exec()'d from an
+ executable image that has no awareness of memory policy. See the
+ MEMORY POLICY APIS section, below, for an overview of the system call
+ that a task may use to set/change its task/process policy.
+
+ In a multi-threaded task, task policies apply only to the thread
+ [Linux kernel task] that installs the policy and any threads
+ subsequently created by that thread. Any sibling threads existing
+ at the time a new task policy is installed retain their current
+ policy.
+
+ A task policy applies only to pages allocated after the policy is
+ installed. Any pages already faulted in by the task when the task
+ changes its task policy remain where they were allocated based on
+ the policy at the time they were allocated.
+
+ VMA Policy: A "VMA" or "Virtual Memory Area" refers to a range of a task's
+ virtual address space. A task may define a specific policy for a range
+ of its virtual address space. See the MEMORY POLICIES APIS section,
+ below, for an overview of the mbind() system call used to set a VMA
+ policy.
+
+ A VMA policy will govern the allocation of pages that back this region of
+ the address space. Any regions of the task's address space that don't
+ have an explicit VMA policy will fall back to the task policy, which may
+ itself fall back to the System Default Policy.
+
+ VMA policies have a few complicating details:
+
+ VMA policy applies ONLY to anonymous pages. These include pages
+ allocated for anonymous segments, such as the task stack and heap, and
+ any regions of the address space mmap()ed with the MAP_ANONYMOUS flag.
+ If a VMA policy is applied to a file mapping, it will be ignored if
+ the mapping used the MAP_SHARED flag. If the file mapping used the
+ MAP_PRIVATE flag, the VMA policy will only be applied when an
+ anonymous page is allocated on an attempt to write to the mapping--
+ i.e., at Copy-On-Write.
+
+ VMA policies are shared between all tasks that share a virtual address
+ space--a.k.a. threads--independent of when the policy is installed; and
+ they are inherited across fork(). However, because VMA policies refer
+ to a specific region of a task's address space, and because the address
+ space is discarded and recreated on exec*(), VMA policies are NOT
+ inheritable across exec(). Thus, only NUMA-aware applications may
+ use VMA policies.
+
+ A task may install a new VMA policy on a sub-range of a previously
+ mmap()ed region. When this happens, Linux splits the existing virtual
+ memory area into 2 or 3 VMAs, each with it's own policy.
+
+ By default, VMA policy applies only to pages allocated after the policy
+ is installed. Any pages already faulted into the VMA range remain
+ where they were allocated based on the policy at the time they were
+ allocated. However, since 2.6.16, Linux supports page migration via
+ the mbind() system call, so that page contents can be moved to match
+ a newly installed policy.
+
+ Shared Policy: Conceptually, shared policies apply to "memory objects"
+ mapped shared into one or more tasks' distinct address spaces. An
+ application installs a shared policies the same way as VMA policies--using
+ the mbind() system call specifying a range of virtual addresses that map
+ the shared object. However, unlike VMA policies, which can be considered
+ to be an attribute of a range of a task's address space, shared policies
+ apply directly to the shared object. Thus, all tasks that attach to the
+ object share the policy, and all pages allocated for the shared object,
+ by any task, will obey the shared policy.
+
+ As of 2.6.22, only shared memory segments, created by shmget() or
+ mmap(MAP_ANONYMOUS|MAP_SHARED), support shared policy. When shared
+ policy support was added to Linux, the associated data structures were
+ added to hugetlbfs shmem segments. At the time, hugetlbfs did not
+ support allocation at fault time--a.k.a lazy allocation--so hugetlbfs
+ shmem segments were never "hooked up" to the shared policy support.
+ Although hugetlbfs segments now support lazy allocation, their support
+ for shared policy has not been completed.
+
+ As mentioned above [re: VMA policies], allocations of page cache
+ pages for regular files mmap()ed with MAP_SHARED ignore any VMA
+ policy installed on the virtual address range backed by the shared
+ file mapping. Rather, shared page cache pages, including pages backing
+ private mappings that have not yet been written by the task, follow
+ task policy, if any, else System Default Policy.
+
+ The shared policy infrastructure supports different policies on subset
+ ranges of the shared object. However, Linux still splits the VMA of
+ the task that installs the policy for each range of distinct policy.
+ Thus, different tasks that attach to a shared memory segment can have
+ different VMA configurations mapping that one shared object. This
+ can be seen by examining the /proc/<pid>/numa_maps of tasks sharing
+ a shared memory region, when one task has installed shared policy on
+ one or more ranges of the region.
+
+Components of Memory Policies
+
+ A Linux memory policy consists of a "mode", optional mode flags, and an
+ optional set of nodes. The mode determines the behavior of the policy,
+ the optional mode flags determine the behavior of the mode, and the
+ optional set of nodes can be viewed as the arguments to the policy
+ behavior.
+
+ Internally, memory policies are implemented by a reference counted
+ structure, struct mempolicy. Details of this structure will be discussed
+ in context, below, as required to explain the behavior.
+
+ Linux memory policy supports the following 4 behavioral modes:
+
+ Default Mode--MPOL_DEFAULT: This mode is only used in the memory
+ policy APIs. Internally, MPOL_DEFAULT is converted to the NULL
+ memory policy in all policy scopes. Any existing non-default policy
+ will simply be removed when MPOL_DEFAULT is specified. As a result,
+ MPOL_DEFAULT means "fall back to the next most specific policy scope."
+
+ For example, a NULL or default task policy will fall back to the
+ system default policy. A NULL or default vma policy will fall
+ back to the task policy.
+
+ When specified in one of the memory policy APIs, the Default mode
+ does not use the optional set of nodes.
+
+ It is an error for the set of nodes specified for this policy to
+ be non-empty.
+
+ MPOL_BIND: This mode specifies that memory must come from the
+ set of nodes specified by the policy. Memory will be allocated from
+ the node in the set with sufficient free memory that is closest to
+ the node where the allocation takes place.
+
+ MPOL_PREFERRED: This mode specifies that the allocation should be
+ attempted from the single node specified in the policy. If that
+ allocation fails, the kernel will search other nodes, in order of
+ increasing distance from the preferred node based on information
+ provided by the platform firmware.
+
+ Internally, the Preferred policy uses a single node--the
+ preferred_node member of struct mempolicy. When the internal
+ mode flag MPOL_F_LOCAL is set, the preferred_node is ignored and
+ the policy is interpreted as local allocation. "Local" allocation
+ policy can be viewed as a Preferred policy that starts at the node
+ containing the cpu where the allocation takes place.
+
+ It is possible for the user to specify that local allocation is
+ always preferred by passing an empty nodemask with this mode.
+ If an empty nodemask is passed, the policy cannot use the
+ MPOL_F_STATIC_NODES or MPOL_F_RELATIVE_NODES flags described
+ below.
+
+ MPOL_INTERLEAVED: This mode specifies that page allocations be
+ interleaved, on a page granularity, across the nodes specified in
+ the policy. This mode also behaves slightly differently, based on
+ the context where it is used:
+
+ For allocation of anonymous pages and shared memory pages,
+ Interleave mode indexes the set of nodes specified by the policy
+ using the page offset of the faulting address into the segment
+ [VMA] containing the address modulo the number of nodes specified
+ by the policy. It then attempts to allocate a page, starting at
+ the selected node, as if the node had been specified by a Preferred
+ policy or had been selected by a local allocation. That is,
+ allocation will follow the per node zonelist.
+
+ For allocation of page cache pages, Interleave mode indexes the set
+ of nodes specified by the policy using a node counter maintained
+ per task. This counter wraps around to the lowest specified node
+ after it reaches the highest specified node. This will tend to
+ spread the pages out over the nodes specified by the policy based
+ on the order in which they are allocated, rather than based on any
+ page offset into an address range or file. During system boot up,
+ the temporary interleaved system default policy works in this
+ mode.
+
+ Linux memory policy supports the following optional mode flags:
+
+ MPOL_F_STATIC_NODES: This flag specifies that the nodemask passed by
+ the user should not be remapped if the task or VMA's set of allowed
+ nodes changes after the memory policy has been defined.
+
+ Without this flag, anytime a mempolicy is rebound because of a
+ change in the set of allowed nodes, the node (Preferred) or
+ nodemask (Bind, Interleave) is remapped to the new set of
+ allowed nodes. This may result in nodes being used that were
+ previously undesired.
+
+ With this flag, if the user-specified nodes overlap with the
+ nodes allowed by the task's cpuset, then the memory policy is
+ applied to their intersection. If the two sets of nodes do not
+ overlap, the Default policy is used.
+
+ For example, consider a task that is attached to a cpuset with
+ mems 1-3 that sets an Interleave policy over the same set. If
+ the cpuset's mems change to 3-5, the Interleave will now occur
+ over nodes 3, 4, and 5. With this flag, however, since only node
+ 3 is allowed from the user's nodemask, the "interleave" only
+ occurs over that node. If no nodes from the user's nodemask are
+ now allowed, the Default behavior is used.
+
+ MPOL_F_STATIC_NODES cannot be combined with the
+ MPOL_F_RELATIVE_NODES flag. It also cannot be used for
+ MPOL_PREFERRED policies that were created with an empty nodemask
+ (local allocation).
+
+ MPOL_F_RELATIVE_NODES: This flag specifies that the nodemask passed
+ by the user will be mapped relative to the set of the task or VMA's
+ set of allowed nodes. The kernel stores the user-passed nodemask,
+ and if the allowed nodes changes, then that original nodemask will
+ be remapped relative to the new set of allowed nodes.
+
+ Without this flag (and without MPOL_F_STATIC_NODES), anytime a
+ mempolicy is rebound because of a change in the set of allowed
+ nodes, the node (Preferred) or nodemask (Bind, Interleave) is
+ remapped to the new set of allowed nodes. That remap may not
+ preserve the relative nature of the user's passed nodemask to its
+ set of allowed nodes upon successive rebinds: a nodemask of
+ 1,3,5 may be remapped to 7-9 and then to 1-3 if the set of
+ allowed nodes is restored to its original state.
+
+ With this flag, the remap is done so that the node numbers from
+ the user's passed nodemask are relative to the set of allowed
+ nodes. In other words, if nodes 0, 2, and 4 are set in the user's
+ nodemask, the policy will be effected over the first (and in the
+ Bind or Interleave case, the third and fifth) nodes in the set of
+ allowed nodes. The nodemask passed by the user represents nodes
+ relative to task or VMA's set of allowed nodes.
+
+ If the user's nodemask includes nodes that are outside the range
+ of the new set of allowed nodes (for example, node 5 is set in
+ the user's nodemask when the set of allowed nodes is only 0-3),
+ then the remap wraps around to the beginning of the nodemask and,
+ if not already set, sets the node in the mempolicy nodemask.
+
+ For example, consider a task that is attached to a cpuset with
+ mems 2-5 that sets an Interleave policy over the same set with
+ MPOL_F_RELATIVE_NODES. If the cpuset's mems change to 3-7, the
+ interleave now occurs over nodes 3,5-7. If the cpuset's mems
+ then change to 0,2-3,5, then the interleave occurs over nodes
+ 0,2-3,5.
+
+ Thanks to the consistent remapping, applications preparing
+ nodemasks to specify memory policies using this flag should
+ disregard their current, actual cpuset imposed memory placement
+ and prepare the nodemask as if they were always located on
+ memory nodes 0 to N-1, where N is the number of memory nodes the
+ policy is intended to manage. Let the kernel then remap to the
+ set of memory nodes allowed by the task's cpuset, as that may
+ change over time.
+
+ MPOL_F_RELATIVE_NODES cannot be combined with the
+ MPOL_F_STATIC_NODES flag. It also cannot be used for
+ MPOL_PREFERRED policies that were created with an empty nodemask
+ (local allocation).
+
+MEMORY POLICY REFERENCE COUNTING
+
+To resolve use/free races, struct mempolicy contains an atomic reference
+count field. Internal interfaces, mpol_get()/mpol_put() increment and
+decrement this reference count, respectively. mpol_put() will only free
+the structure back to the mempolicy kmem cache when the reference count
+goes to zero.
+
+When a new memory policy is allocated, its reference count is initialized
+to '1', representing the reference held by the task that is installing the
+new policy. When a pointer to a memory policy structure is stored in another
+structure, another reference is added, as the task's reference will be dropped
+on completion of the policy installation.
+
+During run-time "usage" of the policy, we attempt to minimize atomic operations
+on the reference count, as this can lead to cache lines bouncing between cpus
+and NUMA nodes. "Usage" here means one of the following:
+
+1) querying of the policy, either by the task itself [using the get_mempolicy()
+ API discussed below] or by another task using the /proc/<pid>/numa_maps
+ interface.
+
+2) examination of the policy to determine the policy mode and associated node
+ or node lists, if any, for page allocation. This is considered a "hot
+ path". Note that for MPOL_BIND, the "usage" extends across the entire
+ allocation process, which may sleep during page reclaimation, because the
+ BIND policy nodemask is used, by reference, to filter ineligible nodes.
+
+We can avoid taking an extra reference during the usages listed above as
+follows:
+
+1) we never need to get/free the system default policy as this is never
+ changed nor freed, once the system is up and running.
+
+2) for querying the policy, we do not need to take an extra reference on the
+ target task's task policy nor vma policies because we always acquire the
+ task's mm's mmap_sem for read during the query. The set_mempolicy() and
+ mbind() APIs [see below] always acquire the mmap_sem for write when
+ installing or replacing task or vma policies. Thus, there is no possibility
+ of a task or thread freeing a policy while another task or thread is
+ querying it.
+
+3) Page allocation usage of task or vma policy occurs in the fault path where
+ we hold them mmap_sem for read. Again, because replacing the task or vma
+ policy requires that the mmap_sem be held for write, the policy can't be
+ freed out from under us while we're using it for page allocation.
+
+4) Shared policies require special consideration. One task can replace a
+ shared memory policy while another task, with a distinct mmap_sem, is
+ querying or allocating a page based on the policy. To resolve this
+ potential race, the shared policy infrastructure adds an extra reference
+ to the shared policy during lookup while holding a spin lock on the shared
+ policy management structure. This requires that we drop this extra
+ reference when we're finished "using" the policy. We must drop the
+ extra reference on shared policies in the same query/allocation paths
+ used for non-shared policies. For this reason, shared policies are marked
+ as such, and the extra reference is dropped "conditionally"--i.e., only
+ for shared policies.
+
+ Because of this extra reference counting, and because we must lookup
+ shared policies in a tree structure under spinlock, shared policies are
+ more expensive to use in the page allocation path. This is especially
+ true for shared policies on shared memory regions shared by tasks running
+ on different NUMA nodes. This extra overhead can be avoided by always
+ falling back to task or system default policy for shared memory regions,
+ or by prefaulting the entire shared memory region into memory and locking
+ it down. However, this might not be appropriate for all applications.
+
+MEMORY POLICY APIs
+
+Linux supports 3 system calls for controlling memory policy. These APIS
+always affect only the calling task, the calling task's address space, or
+some shared object mapped into the calling task's address space.
+
+ Note: the headers that define these APIs and the parameter data types
+ for user space applications reside in a package that is not part of
+ the Linux kernel. The kernel system call interfaces, with the 'sys_'
+ prefix, are defined in <linux/syscalls.h>; the mode and flag
+ definitions are defined in <linux/mempolicy.h>.
+
+Set [Task] Memory Policy:
+
+ long set_mempolicy(int mode, const unsigned long *nmask,
+ unsigned long maxnode);
+
+ Set's the calling task's "task/process memory policy" to mode
+ specified by the 'mode' argument and the set of nodes defined
+ by 'nmask'. 'nmask' points to a bit mask of node ids containing
+ at least 'maxnode' ids. Optional mode flags may be passed by
+ combining the 'mode' argument with the flag (for example:
+ MPOL_INTERLEAVE | MPOL_F_STATIC_NODES).
+
+ See the set_mempolicy(2) man page for more details
+
+
+Get [Task] Memory Policy or Related Information
+
+ long get_mempolicy(int *mode,
+ const unsigned long *nmask, unsigned long maxnode,
+ void *addr, int flags);
+
+ Queries the "task/process memory policy" of the calling task, or
+ the policy or location of a specified virtual address, depending
+ on the 'flags' argument.
+
+ See the get_mempolicy(2) man page for more details
+
+
+Install VMA/Shared Policy for a Range of Task's Address Space
+
+ long mbind(void *start, unsigned long len, int mode,
+ const unsigned long *nmask, unsigned long maxnode,
+ unsigned flags);
+
+ mbind() installs the policy specified by (mode, nmask, maxnodes) as
+ a VMA policy for the range of the calling task's address space
+ specified by the 'start' and 'len' arguments. Additional actions
+ may be requested via the 'flags' argument.
+
+ See the mbind(2) man page for more details.
+
+MEMORY POLICY COMMAND LINE INTERFACE
+
+Although not strictly part of the Linux implementation of memory policy,
+a command line tool, numactl(8), exists that allows one to:
+
++ set the task policy for a specified program via set_mempolicy(2), fork(2) and
+ exec(2)
+
++ set the shared policy for a shared memory segment via mbind(2)
+
+The numactl(8) tool is packaged with the run-time version of the library
+containing the memory policy system call wrappers. Some distributions
+package the headers and compile-time libraries in a separate development
+package.
+
+
+MEMORY POLICIES AND CPUSETS
+
+Memory policies work within cpusets as described above. For memory policies
+that require a node or set of nodes, the nodes are restricted to the set of
+nodes whose memories are allowed by the cpuset constraints. If the nodemask
+specified for the policy contains nodes that are not allowed by the cpuset and
+MPOL_F_RELATIVE_NODES is not used, the intersection of the set of nodes
+specified for the policy and the set of nodes with memory is used. If the
+result is the empty set, the policy is considered invalid and cannot be
+installed. If MPOL_F_RELATIVE_NODES is used, the policy's nodes are mapped
+onto and folded into the task's set of allowed nodes as previously described.
+
+The interaction of memory policies and cpusets can be problematic when tasks
+in two cpusets share access to a memory region, such as shared memory segments
+created by shmget() of mmap() with the MAP_ANONYMOUS and MAP_SHARED flags, and
+any of the tasks install shared policy on the region, only nodes whose
+memories are allowed in both cpusets may be used in the policies. Obtaining
+this information requires "stepping outside" the memory policy APIs to use the
+cpuset information and requires that one know in what cpusets other task might
+be attaching to the shared region. Furthermore, if the cpusets' allowed
+memory sets are disjoint, "local" allocation is the only valid policy.
diff --git a/Documentation/vm/overcommit-accounting b/Documentation/vm/overcommit-accounting
new file mode 100644
index 000000000..cbfaaa674
--- /dev/null
+++ b/Documentation/vm/overcommit-accounting
@@ -0,0 +1,80 @@
+The Linux kernel supports the following overcommit handling modes
+
+0 - Heuristic overcommit handling. Obvious overcommits of
+ address space are refused. Used for a typical system. It
+ ensures a seriously wild allocation fails while allowing
+ overcommit to reduce swap usage. root is allowed to
+ allocate slightly more memory in this mode. This is the
+ default.
+
+1 - Always overcommit. Appropriate for some scientific
+ applications. Classic example is code using sparse arrays
+ and just relying on the virtual memory consisting almost
+ entirely of zero pages.
+
+2 - Don't overcommit. The total address space commit
+ for the system is not permitted to exceed swap + a
+ configurable amount (default is 50%) of physical RAM.
+ Depending on the amount you use, in most situations
+ this means a process will not be killed while accessing
+ pages but will receive errors on memory allocation as
+ appropriate.
+
+ Useful for applications that want to guarantee their
+ memory allocations will be available in the future
+ without having to initialize every page.
+
+The overcommit policy is set via the sysctl `vm.overcommit_memory'.
+
+The overcommit amount can be set via `vm.overcommit_ratio' (percentage)
+or `vm.overcommit_kbytes' (absolute value).
+
+The current overcommit limit and amount committed are viewable in
+/proc/meminfo as CommitLimit and Committed_AS respectively.
+
+Gotchas
+-------
+
+The C language stack growth does an implicit mremap. If you want absolute
+guarantees and run close to the edge you MUST mmap your stack for the
+largest size you think you will need. For typical stack usage this does
+not matter much but it's a corner case if you really really care
+
+In mode 2 the MAP_NORESERVE flag is ignored.
+
+
+How It Works
+------------
+
+The overcommit is based on the following rules
+
+For a file backed map
+ SHARED or READ-only - 0 cost (the file is the map not swap)
+ PRIVATE WRITABLE - size of mapping per instance
+
+For an anonymous or /dev/zero map
+ SHARED - size of mapping
+ PRIVATE READ-only - 0 cost (but of little use)
+ PRIVATE WRITABLE - size of mapping per instance
+
+Additional accounting
+ Pages made writable copies by mmap
+ shmfs memory drawn from the same pool
+
+Status
+------
+
+o We account mmap memory mappings
+o We account mprotect changes in commit
+o We account mremap changes in size
+o We account brk
+o We account munmap
+o We report the commit status in /proc
+o Account and check on fork
+o Review stack handling/building on exec
+o SHMfs accounting
+o Implement actual limit enforcement
+
+To Do
+-----
+o Account ptrace pages (this is hard)
diff --git a/Documentation/vm/page_migration b/Documentation/vm/page_migration
new file mode 100644
index 000000000..6513fe2d9
--- /dev/null
+++ b/Documentation/vm/page_migration
@@ -0,0 +1,149 @@
+Page migration
+--------------
+
+Page migration allows the moving of the physical location of pages between
+nodes in a numa system while the process is running. This means that the
+virtual addresses that the process sees do not change. However, the
+system rearranges the physical location of those pages.
+
+The main intend of page migration is to reduce the latency of memory access
+by moving pages near to the processor where the process accessing that memory
+is running.
+
+Page migration allows a process to manually relocate the node on which its
+pages are located through the MF_MOVE and MF_MOVE_ALL options while setting
+a new memory policy via mbind(). The pages of process can also be relocated
+from another process using the sys_migrate_pages() function call. The
+migrate_pages function call takes two sets of nodes and moves pages of a
+process that are located on the from nodes to the destination nodes.
+Page migration functions are provided by the numactl package by Andi Kleen
+(a version later than 0.9.3 is required. Get it from
+ftp://oss.sgi.com/www/projects/libnuma/download/). numactl provides libnuma
+which provides an interface similar to other numa functionality for page
+migration. cat /proc/<pid>/numa_maps allows an easy review of where the
+pages of a process are located. See also the numa_maps documentation in the
+proc(5) man page.
+
+Manual migration is useful if for example the scheduler has relocated
+a process to a processor on a distant node. A batch scheduler or an
+administrator may detect the situation and move the pages of the process
+nearer to the new processor. The kernel itself does only provide
+manual page migration support. Automatic page migration may be implemented
+through user space processes that move pages. A special function call
+"move_pages" allows the moving of individual pages within a process.
+A NUMA profiler may f.e. obtain a log showing frequent off node
+accesses and may use the result to move pages to more advantageous
+locations.
+
+Larger installations usually partition the system using cpusets into
+sections of nodes. Paul Jackson has equipped cpusets with the ability to
+move pages when a task is moved to another cpuset (See
+Documentation/cgroups/cpusets.txt).
+Cpusets allows the automation of process locality. If a task is moved to
+a new cpuset then also all its pages are moved with it so that the
+performance of the process does not sink dramatically. Also the pages
+of processes in a cpuset are moved if the allowed memory nodes of a
+cpuset are changed.
+
+Page migration allows the preservation of the relative location of pages
+within a group of nodes for all migration techniques which will preserve a
+particular memory allocation pattern generated even after migrating a
+process. This is necessary in order to preserve the memory latencies.
+Processes will run with similar performance after migration.
+
+Page migration occurs in several steps. First a high level
+description for those trying to use migrate_pages() from the kernel
+(for userspace usage see the Andi Kleen's numactl package mentioned above)
+and then a low level description of how the low level details work.
+
+A. In kernel use of migrate_pages()
+-----------------------------------
+
+1. Remove pages from the LRU.
+
+ Lists of pages to be migrated are generated by scanning over
+ pages and moving them into lists. This is done by
+ calling isolate_lru_page().
+ Calling isolate_lru_page increases the references to the page
+ so that it cannot vanish while the page migration occurs.
+ It also prevents the swapper or other scans to encounter
+ the page.
+
+2. We need to have a function of type new_page_t that can be
+ passed to migrate_pages(). This function should figure out
+ how to allocate the correct new page given the old page.
+
+3. The migrate_pages() function is called which attempts
+ to do the migration. It will call the function to allocate
+ the new page for each page that is considered for
+ moving.
+
+B. How migrate_pages() works
+----------------------------
+
+migrate_pages() does several passes over its list of pages. A page is moved
+if all references to a page are removable at the time. The page has
+already been removed from the LRU via isolate_lru_page() and the refcount
+is increased so that the page cannot be freed while page migration occurs.
+
+Steps:
+
+1. Lock the page to be migrated
+
+2. Insure that writeback is complete.
+
+3. Prep the new page that we want to move to. It is locked
+ and set to not being uptodate so that all accesses to the new
+ page immediately lock while the move is in progress.
+
+4. The new page is prepped with some settings from the old page so that
+ accesses to the new page will discover a page with the correct settings.
+
+5. All the page table references to the page are converted
+ to migration entries or dropped (nonlinear vmas).
+ This decrease the mapcount of a page. If the resulting
+ mapcount is not zero then we do not migrate the page.
+ All user space processes that attempt to access the page
+ will now wait on the page lock.
+
+6. The radix tree lock is taken. This will cause all processes trying
+ to access the page via the mapping to block on the radix tree spinlock.
+
+7. The refcount of the page is examined and we back out if references remain
+ otherwise we know that we are the only one referencing this page.
+
+8. The radix tree is checked and if it does not contain the pointer to this
+ page then we back out because someone else modified the radix tree.
+
+9. The radix tree is changed to point to the new page.
+
+10. The reference count of the old page is dropped because the radix tree
+ reference is gone. A reference to the new page is established because
+ the new page is referenced to by the radix tree.
+
+11. The radix tree lock is dropped. With that lookups in the mapping
+ become possible again. Processes will move from spinning on the tree_lock
+ to sleeping on the locked new page.
+
+12. The page contents are copied to the new page.
+
+13. The remaining page flags are copied to the new page.
+
+14. The old page flags are cleared to indicate that the page does
+ not provide any information anymore.
+
+15. Queued up writeback on the new page is triggered.
+
+16. If migration entries were page then replace them with real ptes. Doing
+ so will enable access for user space processes not already waiting for
+ the page lock.
+
+19. The page locks are dropped from the old and new page.
+ Processes waiting on the page lock will redo their page faults
+ and will reach the new page.
+
+20. The new page is moved to the LRU and can be scanned by the swapper
+ etc again.
+
+Christoph Lameter, May 8, 2006.
+
diff --git a/Documentation/vm/page_owner.txt b/Documentation/vm/page_owner.txt
new file mode 100644
index 000000000..8f3ce9b3a
--- /dev/null
+++ b/Documentation/vm/page_owner.txt
@@ -0,0 +1,81 @@
+page owner: Tracking about who allocated each page
+-----------------------------------------------------------
+
+* Introduction
+
+page owner is for the tracking about who allocated each page.
+It can be used to debug memory leak or to find a memory hogger.
+When allocation happens, information about allocation such as call stack
+and order of pages is stored into certain storage for each page.
+When we need to know about status of all pages, we can get and analyze
+this information.
+
+Although we already have tracepoint for tracing page allocation/free,
+using it for analyzing who allocate each page is rather complex. We need
+to enlarge the trace buffer for preventing overlapping until userspace
+program launched. And, launched program continually dump out the trace
+buffer for later analysis and it would change system behviour with more
+possibility rather than just keeping it in memory, so bad for debugging.
+
+page owner can also be used for various purposes. For example, accurate
+fragmentation statistics can be obtained through gfp flag information of
+each page. It is already implemented and activated if page owner is
+enabled. Other usages are more than welcome.
+
+page owner is disabled in default. So, if you'd like to use it, you need
+to add "page_owner=on" into your boot cmdline. If the kernel is built
+with page owner and page owner is disabled in runtime due to no enabling
+boot option, runtime overhead is marginal. If disabled in runtime, it
+doesn't require memory to store owner information, so there is no runtime
+memory overhead. And, page owner inserts just two unlikely branches into
+the page allocator hotpath and if it returns false then allocation is
+done like as the kernel without page owner. These two unlikely branches
+would not affect to allocation performance. Following is the kernel's
+code size change due to this facility.
+
+- Without page owner
+ text data bss dec hex filename
+ 40662 1493 644 42799 a72f mm/page_alloc.o
+
+- With page owner
+ text data bss dec hex filename
+ 40892 1493 644 43029 a815 mm/page_alloc.o
+ 1427 24 8 1459 5b3 mm/page_ext.o
+ 2722 50 0 2772 ad4 mm/page_owner.o
+
+Although, roughly, 4 KB code is added in total, page_alloc.o increase by
+230 bytes and only half of it is in hotpath. Building the kernel with
+page owner and turning it on if needed would be great option to debug
+kernel memory problem.
+
+There is one notice that is caused by implementation detail. page owner
+stores information into the memory from struct page extension. This memory
+is initialized some time later than that page allocator starts in sparse
+memory system, so, until initialization, many pages can be allocated and
+they would have no owner information. To fix it up, these early allocated
+pages are investigated and marked as allocated in initialization phase.
+Although it doesn't mean that they have the right owner information,
+at least, we can tell whether the page is allocated or not,
+more accurately. On 2GB memory x86-64 VM box, 13343 early allocated pages
+are catched and marked, although they are mostly allocated from struct
+page extension feature. Anyway, after that, no page is left in
+un-tracking state.
+
+* Usage
+
+1) Build user-space helper
+ cd tools/vm
+ make page_owner_sort
+
+2) Enable page owner
+ Add "page_owner=on" to boot cmdline.
+
+3) Do the job what you want to debug
+
+4) Analyze information from page owner
+ cat /sys/kernel/debug/page_owner > page_owner_full.txt
+ grep -v ^PFN page_owner_full.txt > page_owner.txt
+ ./page_owner_sort page_owner.txt sorted_page_owner.txt
+
+ See the result about who allocated each page
+ in the sorted_page_owner.txt.
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
new file mode 100644
index 000000000..6bfbc172c
--- /dev/null
+++ b/Documentation/vm/pagemap.txt
@@ -0,0 +1,161 @@
+pagemap, from the userspace perspective
+---------------------------------------
+
+pagemap is a new (as of 2.6.25) set of interfaces in the kernel that allow
+userspace programs to examine the page tables and related information by
+reading files in /proc.
+
+There are three components to pagemap:
+
+ * /proc/pid/pagemap. This file lets a userspace process find out which
+ physical frame each virtual page is mapped to. It contains one 64-bit
+ value for each virtual page, containing the following data (from
+ fs/proc/task_mmu.c, above pagemap_read):
+
+ * Bits 0-54 page frame number (PFN) if present
+ * Bits 0-4 swap type if swapped
+ * Bits 5-54 swap offset if swapped
+ * Bit 55 pte is soft-dirty (see Documentation/vm/soft-dirty.txt)
+ * Bits 56-60 zero
+ * Bit 61 page is file-page or shared-anon
+ * Bit 62 page swapped
+ * Bit 63 page present
+
+ If the page is not present but in swap, then the PFN contains an
+ encoding of the swap file number and the page's offset into the
+ swap. Unmapped pages return a null PFN. This allows determining
+ precisely which pages are mapped (or in swap) and comparing mapped
+ pages between processes.
+
+ Efficient users of this interface will use /proc/pid/maps to
+ determine which areas of memory are actually mapped and llseek to
+ skip over unmapped regions.
+
+ * /proc/kpagecount. This file contains a 64-bit count of the number of
+ times each page is mapped, indexed by PFN.
+
+ * /proc/kpageflags. This file contains a 64-bit set of flags for each
+ page, indexed by PFN.
+
+ The flags are (from fs/proc/page.c, above kpageflags_read):
+
+ 0. LOCKED
+ 1. ERROR
+ 2. REFERENCED
+ 3. UPTODATE
+ 4. DIRTY
+ 5. LRU
+ 6. ACTIVE
+ 7. SLAB
+ 8. WRITEBACK
+ 9. RECLAIM
+ 10. BUDDY
+ 11. MMAP
+ 12. ANON
+ 13. SWAPCACHE
+ 14. SWAPBACKED
+ 15. COMPOUND_HEAD
+ 16. COMPOUND_TAIL
+ 16. HUGE
+ 18. UNEVICTABLE
+ 19. HWPOISON
+ 20. NOPAGE
+ 21. KSM
+ 22. THP
+ 23. BALLOON
+ 24. ZERO_PAGE
+
+Short descriptions to the page flags:
+
+ 0. LOCKED
+ page is being locked for exclusive access, eg. by undergoing read/write IO
+
+ 7. SLAB
+ page is managed by the SLAB/SLOB/SLUB/SLQB kernel memory allocator
+ When compound page is used, SLUB/SLQB will only set this flag on the head
+ page; SLOB will not flag it at all.
+
+10. BUDDY
+ a free memory block managed by the buddy system allocator
+ The buddy system organizes free memory in blocks of various orders.
+ An order N block has 2^N physically contiguous pages, with the BUDDY flag
+ set for and _only_ for the first page.
+
+15. COMPOUND_HEAD
+16. COMPOUND_TAIL
+ A compound page with order N consists of 2^N physically contiguous pages.
+ A compound page with order 2 takes the form of "HTTT", where H donates its
+ head page and T donates its tail page(s). The major consumers of compound
+ pages are hugeTLB pages (Documentation/vm/hugetlbpage.txt), the SLUB etc.
+ memory allocators and various device drivers. However in this interface,
+ only huge/giga pages are made visible to end users.
+17. HUGE
+ this is an integral part of a HugeTLB page
+
+19. HWPOISON
+ hardware detected memory corruption on this page: don't touch the data!
+
+20. NOPAGE
+ no page frame exists at the requested address
+
+21. KSM
+ identical memory pages dynamically shared between one or more processes
+
+22. THP
+ contiguous pages which construct transparent hugepages
+
+23. BALLOON
+ balloon compaction page
+
+24. ZERO_PAGE
+ zero page for pfn_zero or huge_zero page
+
+ [IO related page flags]
+ 1. ERROR IO error occurred
+ 3. UPTODATE page has up-to-date data
+ ie. for file backed page: (in-memory data revision >= on-disk one)
+ 4. DIRTY page has been written to, hence contains new data
+ ie. for file backed page: (in-memory data revision > on-disk one)
+ 8. WRITEBACK page is being synced to disk
+
+ [LRU related page flags]
+ 5. LRU page is in one of the LRU lists
+ 6. ACTIVE page is in the active LRU list
+18. UNEVICTABLE page is in the unevictable (non-)LRU list
+ It is somehow pinned and not a candidate for LRU page reclaims,
+ eg. ramfs pages, shmctl(SHM_LOCK) and mlock() memory segments
+ 2. REFERENCED page has been referenced since last LRU list enqueue/requeue
+ 9. RECLAIM page will be reclaimed soon after its pageout IO completed
+11. MMAP a memory mapped page
+12. ANON a memory mapped page that is not part of a file
+13. SWAPCACHE page is mapped to swap space, ie. has an associated swap entry
+14. SWAPBACKED page is backed by swap/RAM
+
+The page-types tool in the tools/vm directory can be used to query the
+above flags.
+
+Using pagemap to do something useful:
+
+The general procedure for using pagemap to find out about a process' memory
+usage goes like this:
+
+ 1. Read /proc/pid/maps to determine which parts of the memory space are
+ mapped to what.
+ 2. Select the maps you are interested in -- all of them, or a particular
+ library, or the stack or the heap, etc.
+ 3. Open /proc/pid/pagemap and seek to the pages you would like to examine.
+ 4. Read a u64 for each page from pagemap.
+ 5. Open /proc/kpagecount and/or /proc/kpageflags. For each PFN you just
+ read, seek to that entry in the file, and read the data you want.
+
+For example, to find the "unique set size" (USS), which is the amount of
+memory that a process is using that is not shared with any other process,
+you can go through every map in the process, find the PFNs, look those up
+in kpagecount, and tally up the number of pages that are only referenced
+once.
+
+Other notes:
+
+Reading from any of the files will return -EINVAL if you are not starting
+the read on an 8-byte boundary (e.g., if you sought an odd number of bytes
+into the file), or if the size of the read is not a multiple of 8 bytes.
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
new file mode 100644
index 000000000..f609142f4
--- /dev/null
+++ b/Documentation/vm/remap_file_pages.txt
@@ -0,0 +1,27 @@
+The remap_file_pages() system call is used to create a nonlinear mapping,
+that is, a mapping in which the pages of the file are mapped into a
+nonsequential order in memory. The advantage of using remap_file_pages()
+over using repeated calls to mmap(2) is that the former approach does not
+require the kernel to create additional VMA (Virtual Memory Area) data
+structures.
+
+Supporting of nonlinear mapping requires significant amount of non-trivial
+code in kernel virtual memory subsystem including hot paths. Also to get
+nonlinear mapping work kernel need a way to distinguish normal page table
+entries from entries with file offset (pte_file). Kernel reserves flag in
+PTE for this purpose. PTE flags are scarce resource especially on some CPU
+architectures. It would be nice to free up the flag for other usage.
+
+Fortunately, there are not many users of remap_file_pages() in the wild.
+It's only known that one enterprise RDBMS implementation uses the syscall
+on 32-bit systems to map files bigger than can linearly fit into 32-bit
+virtual address space. This use-case is not critical anymore since 64-bit
+systems are widely available.
+
+The syscall is deprecated and replaced it with an emulation now. The
+emulation creates new VMAs instead of nonlinear mappings. It's going to
+work slower for rare users of remap_file_pages() but ABI is preserved.
+
+One side effect of emulation (apart from performance) is that user can hit
+vm.max_map_count limit more easily due to additional VMAs. See comment for
+DEFAULT_MAX_MAP_COUNT for more details on the limit.
diff --git a/Documentation/vm/slub.txt b/Documentation/vm/slub.txt
new file mode 100644
index 000000000..b0c6d1bbb
--- /dev/null
+++ b/Documentation/vm/slub.txt
@@ -0,0 +1,283 @@
+Short users guide for SLUB
+--------------------------
+
+The basic philosophy of SLUB is very different from SLAB. SLAB
+requires rebuilding the kernel to activate debug options for all
+slab caches. SLUB always includes full debugging but it is off by default.
+SLUB can enable debugging only for selected slabs in order to avoid
+an impact on overall system performance which may make a bug more
+difficult to find.
+
+In order to switch debugging on one can add a option "slub_debug"
+to the kernel command line. That will enable full debugging for
+all slabs.
+
+Typically one would then use the "slabinfo" command to get statistical
+data and perform operation on the slabs. By default slabinfo only lists
+slabs that have data in them. See "slabinfo -h" for more options when
+running the command. slabinfo can be compiled with
+
+gcc -o slabinfo tools/vm/slabinfo.c
+
+Some of the modes of operation of slabinfo require that slub debugging
+be enabled on the command line. F.e. no tracking information will be
+available without debugging on and validation can only partially
+be performed if debugging was not switched on.
+
+Some more sophisticated uses of slub_debug:
+-------------------------------------------
+
+Parameters may be given to slub_debug. If none is specified then full
+debugging is enabled. Format:
+
+slub_debug=<Debug-Options> Enable options for all slabs
+slub_debug=<Debug-Options>,<slab name>
+ Enable options only for select slabs
+
+Possible debug options are
+ F Sanity checks on (enables SLAB_DEBUG_FREE. Sorry
+ SLAB legacy issues)
+ Z Red zoning
+ P Poisoning (object and padding)
+ U User tracking (free and alloc)
+ T Trace (please only use on single slabs)
+ A Toggle failslab filter mark for the cache
+ O Switch debugging off for caches that would have
+ caused higher minimum slab orders
+ - Switch all debugging off (useful if the kernel is
+ configured with CONFIG_SLUB_DEBUG_ON)
+
+F.e. in order to boot just with sanity checks and red zoning one would specify:
+
+ slub_debug=FZ
+
+Trying to find an issue in the dentry cache? Try
+
+ slub_debug=,dentry
+
+to only enable debugging on the dentry cache.
+
+Red zoning and tracking may realign the slab. We can just apply sanity checks
+to the dentry cache with
+
+ slub_debug=F,dentry
+
+Debugging options may require the minimum possible slab order to increase as
+a result of storing the metadata (for example, caches with PAGE_SIZE object
+sizes). This has a higher liklihood of resulting in slab allocation errors
+in low memory situations or if there's high fragmentation of memory. To
+switch off debugging for such caches by default, use
+
+ slub_debug=O
+
+In case you forgot to enable debugging on the kernel command line: It is
+possible to enable debugging manually when the kernel is up. Look at the
+contents of:
+
+/sys/kernel/slab/<slab name>/
+
+Look at the writable files. Writing 1 to them will enable the
+corresponding debug option. All options can be set on a slab that does
+not contain objects. If the slab already contains objects then sanity checks
+and tracing may only be enabled. The other options may cause the realignment
+of objects.
+
+Careful with tracing: It may spew out lots of information and never stop if
+used on the wrong slab.
+
+Slab merging
+------------
+
+If no debug options are specified then SLUB may merge similar slabs together
+in order to reduce overhead and increase cache hotness of objects.
+slabinfo -a displays which slabs were merged together.
+
+Slab validation
+---------------
+
+SLUB can validate all object if the kernel was booted with slub_debug. In
+order to do so you must have the slabinfo tool. Then you can do
+
+slabinfo -v
+
+which will test all objects. Output will be generated to the syslog.
+
+This also works in a more limited way if boot was without slab debug.
+In that case slabinfo -v simply tests all reachable objects. Usually
+these are in the cpu slabs and the partial slabs. Full slabs are not
+tracked by SLUB in a non debug situation.
+
+Getting more performance
+------------------------
+
+To some degree SLUB's performance is limited by the need to take the
+list_lock once in a while to deal with partial slabs. That overhead is
+governed by the order of the allocation for each slab. The allocations
+can be influenced by kernel parameters:
+
+slub_min_objects=x (default 4)
+slub_min_order=x (default 0)
+slub_max_order=x (default 3 (PAGE_ALLOC_COSTLY_ORDER))
+
+slub_min_objects allows to specify how many objects must at least fit
+into one slab in order for the allocation order to be acceptable.
+In general slub will be able to perform this number of allocations
+on a slab without consulting centralized resources (list_lock) where
+contention may occur.
+
+slub_min_order specifies a minim order of slabs. A similar effect like
+slub_min_objects.
+
+slub_max_order specified the order at which slub_min_objects should no
+longer be checked. This is useful to avoid SLUB trying to generate
+super large order pages to fit slub_min_objects of a slab cache with
+large object sizes into one high order page. Setting command line
+parameter debug_guardpage_minorder=N (N > 0), forces setting
+slub_max_order to 0, what cause minimum possible order of slabs
+allocation.
+
+SLUB Debug output
+-----------------
+
+Here is a sample of slub debug output:
+
+====================================================================
+BUG kmalloc-8: Redzone overwritten
+--------------------------------------------------------------------
+
+INFO: 0xc90f6d28-0xc90f6d2b. First byte 0x00 instead of 0xcc
+INFO: Slab 0xc528c530 flags=0x400000c3 inuse=61 fp=0xc90f6d58
+INFO: Object 0xc90f6d20 @offset=3360 fp=0xc90f6d58
+INFO: Allocated in get_modalias+0x61/0xf5 age=53 cpu=1 pid=554
+
+Bytes b4 0xc90f6d10: 00 00 00 00 00 00 00 00 5a 5a 5a 5a 5a 5a 5a 5a ........ZZZZZZZZ
+ Object 0xc90f6d20: 31 30 31 39 2e 30 30 35 1019.005
+ Redzone 0xc90f6d28: 00 cc cc cc .
+ Padding 0xc90f6d50: 5a 5a 5a 5a 5a 5a 5a 5a ZZZZZZZZ
+
+ [<c010523d>] dump_trace+0x63/0x1eb
+ [<c01053df>] show_trace_log_lvl+0x1a/0x2f
+ [<c010601d>] show_trace+0x12/0x14
+ [<c0106035>] dump_stack+0x16/0x18
+ [<c017e0fa>] object_err+0x143/0x14b
+ [<c017e2cc>] check_object+0x66/0x234
+ [<c017eb43>] __slab_free+0x239/0x384
+ [<c017f446>] kfree+0xa6/0xc6
+ [<c02e2335>] get_modalias+0xb9/0xf5
+ [<c02e23b7>] dmi_dev_uevent+0x27/0x3c
+ [<c027866a>] dev_uevent+0x1ad/0x1da
+ [<c0205024>] kobject_uevent_env+0x20a/0x45b
+ [<c020527f>] kobject_uevent+0xa/0xf
+ [<c02779f1>] store_uevent+0x4f/0x58
+ [<c027758e>] dev_attr_store+0x29/0x2f
+ [<c01bec4f>] sysfs_write_file+0x16e/0x19c
+ [<c0183ba7>] vfs_write+0xd1/0x15a
+ [<c01841d7>] sys_write+0x3d/0x72
+ [<c0104112>] sysenter_past_esp+0x5f/0x99
+ [<b7f7b410>] 0xb7f7b410
+ =======================
+
+FIX kmalloc-8: Restoring Redzone 0xc90f6d28-0xc90f6d2b=0xcc
+
+If SLUB encounters a corrupted object (full detection requires the kernel
+to be booted with slub_debug) then the following output will be dumped
+into the syslog:
+
+1. Description of the problem encountered
+
+This will be a message in the system log starting with
+
+===============================================
+BUG <slab cache affected>: <What went wrong>
+-----------------------------------------------
+
+INFO: <corruption start>-<corruption_end> <more info>
+INFO: Slab <address> <slab information>
+INFO: Object <address> <object information>
+INFO: Allocated in <kernel function> age=<jiffies since alloc> cpu=<allocated by
+ cpu> pid=<pid of the process>
+INFO: Freed in <kernel function> age=<jiffies since free> cpu=<freed by cpu>
+ pid=<pid of the process>
+
+(Object allocation / free information is only available if SLAB_STORE_USER is
+set for the slab. slub_debug sets that option)
+
+2. The object contents if an object was involved.
+
+Various types of lines can follow the BUG SLUB line:
+
+Bytes b4 <address> : <bytes>
+ Shows a few bytes before the object where the problem was detected.
+ Can be useful if the corruption does not stop with the start of the
+ object.
+
+Object <address> : <bytes>
+ The bytes of the object. If the object is inactive then the bytes
+ typically contain poison values. Any non-poison value shows a
+ corruption by a write after free.
+
+Redzone <address> : <bytes>
+ The Redzone following the object. The Redzone is used to detect
+ writes after the object. All bytes should always have the same
+ value. If there is any deviation then it is due to a write after
+ the object boundary.
+
+ (Redzone information is only available if SLAB_RED_ZONE is set.
+ slub_debug sets that option)
+
+Padding <address> : <bytes>
+ Unused data to fill up the space in order to get the next object
+ properly aligned. In the debug case we make sure that there are
+ at least 4 bytes of padding. This allows the detection of writes
+ before the object.
+
+3. A stackdump
+
+The stackdump describes the location where the error was detected. The cause
+of the corruption is may be more likely found by looking at the function that
+allocated or freed the object.
+
+4. Report on how the problem was dealt with in order to ensure the continued
+operation of the system.
+
+These are messages in the system log beginning with
+
+FIX <slab cache affected>: <corrective action taken>
+
+In the above sample SLUB found that the Redzone of an active object has
+been overwritten. Here a string of 8 characters was written into a slab that
+has the length of 8 characters. However, a 8 character string needs a
+terminating 0. That zero has overwritten the first byte of the Redzone field.
+After reporting the details of the issue encountered the FIX SLUB message
+tells us that SLUB has restored the Redzone to its proper value and then
+system operations continue.
+
+Emergency operations:
+---------------------
+
+Minimal debugging (sanity checks alone) can be enabled by booting with
+
+ slub_debug=F
+
+This will be generally be enough to enable the resiliency features of slub
+which will keep the system running even if a bad kernel component will
+keep corrupting objects. This may be important for production systems.
+Performance will be impacted by the sanity checks and there will be a
+continual stream of error messages to the syslog but no additional memory
+will be used (unlike full debugging).
+
+No guarantees. The kernel component still needs to be fixed. Performance
+may be optimized further by locating the slab that experiences corruption
+and enabling debugging only for that cache
+
+I.e.
+
+ slub_debug=F,dentry
+
+If the corruption occurs by writing after the end of the object then it
+may be advisable to enable a Redzone to avoid corrupting the beginning
+of other objects.
+
+ slub_debug=FZ,dentry
+
+Christoph Lameter, May 30, 2007
diff --git a/Documentation/vm/soft-dirty.txt b/Documentation/vm/soft-dirty.txt
new file mode 100644
index 000000000..55684d11a
--- /dev/null
+++ b/Documentation/vm/soft-dirty.txt
@@ -0,0 +1,43 @@
+ SOFT-DIRTY PTEs
+
+ The soft-dirty is a bit on a PTE which helps to track which pages a task
+writes to. In order to do this tracking one should
+
+ 1. Clear soft-dirty bits from the task's PTEs.
+
+ This is done by writing "4" into the /proc/PID/clear_refs file of the
+ task in question.
+
+ 2. Wait some time.
+
+ 3. Read soft-dirty bits from the PTEs.
+
+ This is done by reading from the /proc/PID/pagemap. The bit 55 of the
+ 64-bit qword is the soft-dirty one. If set, the respective PTE was
+ written to since step 1.
+
+
+ Internally, to do this tracking, the writable bit is cleared from PTEs
+when the soft-dirty bit is cleared. So, after this, when the task tries to
+modify a page at some virtual address the #PF occurs and the kernel sets
+the soft-dirty bit on the respective PTE.
+
+ Note, that although all the task's address space is marked as r/o after the
+soft-dirty bits clear, the #PF-s that occur after that are processed fast.
+This is so, since the pages are still mapped to physical memory, and thus all
+the kernel does is finds this fact out and puts both writable and soft-dirty
+bits on the PTE.
+
+ While in most cases tracking memory changes by #PF-s is more than enough
+there is still a scenario when we can lose soft dirty bits -- a task
+unmaps a previously mapped memory region and then maps a new one at exactly
+the same place. When unmap is called, the kernel internally clears PTE values
+including soft dirty bits. To notify user space application about such
+memory region renewal the kernel always marks new memory regions (and
+expanded regions) as soft dirty.
+
+ This feature is actively used by the checkpoint-restore project. You
+can find more details about it on http://criu.org
+
+
+-- Pavel Emelyanov, Apr 9, 2013
diff --git a/Documentation/vm/split_page_table_lock b/Documentation/vm/split_page_table_lock
new file mode 100644
index 000000000..6dea4fd5c
--- /dev/null
+++ b/Documentation/vm/split_page_table_lock
@@ -0,0 +1,94 @@
+Split page table lock
+=====================
+
+Originally, mm->page_table_lock spinlock protected all page tables of the
+mm_struct. But this approach leads to poor page fault scalability of
+multi-threaded applications due high contention on the lock. To improve
+scalability, split page table lock was introduced.
+
+With split page table lock we have separate per-table lock to serialize
+access to the table. At the moment we use split lock for PTE and PMD
+tables. Access to higher level tables protected by mm->page_table_lock.
+
+There are helpers to lock/unlock a table and other accessor functions:
+ - pte_offset_map_lock()
+ maps pte and takes PTE table lock, returns pointer to the taken
+ lock;
+ - pte_unmap_unlock()
+ unlocks and unmaps PTE table;
+ - pte_alloc_map_lock()
+ allocates PTE table if needed and take the lock, returns pointer
+ to taken lock or NULL if allocation failed;
+ - pte_lockptr()
+ returns pointer to PTE table lock;
+ - pmd_lock()
+ takes PMD table lock, returns pointer to taken lock;
+ - pmd_lockptr()
+ returns pointer to PMD table lock;
+
+Split page table lock for PTE tables is enabled compile-time if
+CONFIG_SPLIT_PTLOCK_CPUS (usually 4) is less or equal to NR_CPUS.
+If split lock is disabled, all tables guaded by mm->page_table_lock.
+
+Split page table lock for PMD tables is enabled, if it's enabled for PTE
+tables and the architecture supports it (see below).
+
+Hugetlb and split page table lock
+---------------------------------
+
+Hugetlb can support several page sizes. We use split lock only for PMD
+level, but not for PUD.
+
+Hugetlb-specific helpers:
+ - huge_pte_lock()
+ takes pmd split lock for PMD_SIZE page, mm->page_table_lock
+ otherwise;
+ - huge_pte_lockptr()
+ returns pointer to table lock;
+
+Support of split page table lock by an architecture
+---------------------------------------------------
+
+There's no need in special enabling of PTE split page table lock:
+everything required is done by pgtable_page_ctor() and pgtable_page_dtor(),
+which must be called on PTE table allocation / freeing.
+
+Make sure the architecture doesn't use slab allocator for page table
+allocation: slab uses page->slab_cache and page->first_page for its pages.
+These fields share storage with page->ptl.
+
+PMD split lock only makes sense if you have more than two page table
+levels.
+
+PMD split lock enabling requires pgtable_pmd_page_ctor() call on PMD table
+allocation and pgtable_pmd_page_dtor() on freeing.
+
+Allocation usually happens in pmd_alloc_one(), freeing in pmd_free() and
+pmd_free_tlb(), but make sure you cover all PMD table allocation / freeing
+paths: i.e X86_PAE preallocate few PMDs on pgd_alloc().
+
+With everything in place you can set CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK.
+
+NOTE: pgtable_page_ctor() and pgtable_pmd_page_ctor() can fail -- it must
+be handled properly.
+
+page->ptl
+---------
+
+page->ptl is used to access split page table lock, where 'page' is struct
+page of page containing the table. It shares storage with page->private
+(and few other fields in union).
+
+To avoid increasing size of struct page and have best performance, we use a
+trick:
+ - if spinlock_t fits into long, we use page->ptr as spinlock, so we
+ can avoid indirect access and save a cache line.
+ - if size of spinlock_t is bigger then size of long, we use page->ptl as
+ pointer to spinlock_t and allocate it dynamically. This allows to use
+ split lock with enabled DEBUG_SPINLOCK or DEBUG_LOCK_ALLOC, but costs
+ one more cache line for indirect access;
+
+The spinlock_t allocated in pgtable_page_ctor() for PTE table and in
+pgtable_pmd_page_ctor() for PMD table.
+
+Please, never access page->ptl directly -- use appropriate helper.
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
new file mode 100644
index 000000000..8143b9e83
--- /dev/null
+++ b/Documentation/vm/transhuge.txt
@@ -0,0 +1,387 @@
+= Transparent Hugepage Support =
+
+== Objective ==
+
+Performance critical computing applications dealing with large memory
+working sets are already running on top of libhugetlbfs and in turn
+hugetlbfs. Transparent Hugepage Support is an alternative means of
+using huge pages for the backing of virtual memory with huge pages
+that supports the automatic promotion and demotion of page sizes and
+without the shortcomings of hugetlbfs.
+
+Currently it only works for anonymous memory mappings but in the
+future it can expand over the pagecache layer starting with tmpfs.
+
+The reason applications are running faster is because of two
+factors. The first factor is almost completely irrelevant and it's not
+of significant interest because it'll also have the downside of
+requiring larger clear-page copy-page in page faults which is a
+potentially negative effect. The first factor consists in taking a
+single page fault for each 2M virtual region touched by userland (so
+reducing the enter/exit kernel frequency by a 512 times factor). This
+only matters the first time the memory is accessed for the lifetime of
+a memory mapping. The second long lasting and much more important
+factor will affect all subsequent accesses to the memory for the whole
+runtime of the application. The second factor consist of two
+components: 1) the TLB miss will run faster (especially with
+virtualization using nested pagetables but almost always also on bare
+metal without virtualization) and 2) a single TLB entry will be
+mapping a much larger amount of virtual memory in turn reducing the
+number of TLB misses. With virtualization and nested pagetables the
+TLB can be mapped of larger size only if both KVM and the Linux guest
+are using hugepages but a significant speedup already happens if only
+one of the two is using hugepages just because of the fact the TLB
+miss is going to run faster.
+
+== Design ==
+
+- "graceful fallback": mm components which don't have transparent
+ hugepage knowledge fall back to breaking a transparent hugepage and
+ working on the regular pages and their respective regular pmd/pte
+ mappings
+
+- if a hugepage allocation fails because of memory fragmentation,
+ regular pages should be gracefully allocated instead and mixed in
+ the same vma without any failure or significant delay and without
+ userland noticing
+
+- if some task quits and more hugepages become available (either
+ immediately in the buddy or through the VM), guest physical memory
+ backed by regular pages should be relocated on hugepages
+ automatically (with khugepaged)
+
+- it doesn't require memory reservation and in turn it uses hugepages
+ whenever possible (the only possible reservation here is kernelcore=
+ to avoid unmovable pages to fragment all the memory but such a tweak
+ is not specific to transparent hugepage support and it's a generic
+ feature that applies to all dynamic high order allocations in the
+ kernel)
+
+- this initial support only offers the feature in the anonymous memory
+ regions but it'd be ideal to move it to tmpfs and the pagecache
+ later
+
+Transparent Hugepage Support maximizes the usefulness of free memory
+if compared to the reservation approach of hugetlbfs by allowing all
+unused memory to be used as cache or other movable (or even unmovable
+entities). It doesn't require reservation to prevent hugepage
+allocation failures to be noticeable from userland. It allows paging
+and all other advanced VM features to be available on the
+hugepages. It requires no modifications for applications to take
+advantage of it.
+
+Applications however can be further optimized to take advantage of
+this feature, like for example they've been optimized before to avoid
+a flood of mmap system calls for every malloc(4k). Optimizing userland
+is by far not mandatory and khugepaged already can take care of long
+lived page allocations even for hugepage unaware applications that
+deals with large amounts of memory.
+
+In certain cases when hugepages are enabled system wide, application
+may end up allocating more memory resources. An application may mmap a
+large region but only touch 1 byte of it, in that case a 2M page might
+be allocated instead of a 4k page for no good. This is why it's
+possible to disable hugepages system-wide and to only have them inside
+MADV_HUGEPAGE madvise regions.
+
+Embedded systems should enable hugepages only inside madvise regions
+to eliminate any risk of wasting any precious byte of memory and to
+only run faster.
+
+Applications that gets a lot of benefit from hugepages and that don't
+risk to lose memory by using hugepages, should use
+madvise(MADV_HUGEPAGE) on their critical mmapped regions.
+
+== sysfs ==
+
+Transparent Hugepage Support can be entirely disabled (mostly for
+debugging purposes) or only enabled inside MADV_HUGEPAGE regions (to
+avoid the risk of consuming more memory resources) or enabled system
+wide. This can be achieved with one of:
+
+echo always >/sys/kernel/mm/transparent_hugepage/enabled
+echo madvise >/sys/kernel/mm/transparent_hugepage/enabled
+echo never >/sys/kernel/mm/transparent_hugepage/enabled
+
+It's also possible to limit defrag efforts in the VM to generate
+hugepages in case they're not immediately free to madvise regions or
+to never try to defrag memory and simply fallback to regular pages
+unless hugepages are immediately available. Clearly if we spend CPU
+time to defrag memory, we would expect to gain even more by the fact
+we use hugepages later instead of regular pages. This isn't always
+guaranteed, but it may be more likely in case the allocation is for a
+MADV_HUGEPAGE region.
+
+echo always >/sys/kernel/mm/transparent_hugepage/defrag
+echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
+echo never >/sys/kernel/mm/transparent_hugepage/defrag
+
+By default kernel tries to use huge zero page on read page fault.
+It's possible to disable huge zero page by writing 0 or enable it
+back by writing 1:
+
+echo 0 >/sys/kernel/mm/transparent_hugepage/use_zero_page
+echo 1 >/sys/kernel/mm/transparent_hugepage/use_zero_page
+
+khugepaged will be automatically started when
+transparent_hugepage/enabled is set to "always" or "madvise, and it'll
+be automatically shutdown if it's set to "never".
+
+khugepaged runs usually at low frequency so while one may not want to
+invoke defrag algorithms synchronously during the page faults, it
+should be worth invoking defrag at least in khugepaged. However it's
+also possible to disable defrag in khugepaged by writing 0 or enable
+defrag in khugepaged by writing 1:
+
+echo 0 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
+echo 1 >/sys/kernel/mm/transparent_hugepage/khugepaged/defrag
+
+You can also control how many pages khugepaged should scan at each
+pass:
+
+/sys/kernel/mm/transparent_hugepage/khugepaged/pages_to_scan
+
+and how many milliseconds to wait in khugepaged between each pass (you
+can set this to 0 to run khugepaged at 100% utilization of one core):
+
+/sys/kernel/mm/transparent_hugepage/khugepaged/scan_sleep_millisecs
+
+and how many milliseconds to wait in khugepaged if there's an hugepage
+allocation failure to throttle the next allocation attempt.
+
+/sys/kernel/mm/transparent_hugepage/khugepaged/alloc_sleep_millisecs
+
+The khugepaged progress can be seen in the number of pages collapsed:
+
+/sys/kernel/mm/transparent_hugepage/khugepaged/pages_collapsed
+
+for each pass:
+
+/sys/kernel/mm/transparent_hugepage/khugepaged/full_scans
+
+max_ptes_none specifies how many extra small pages (that are
+not already mapped) can be allocated when collapsing a group
+of small pages into one large page.
+
+/sys/kernel/mm/transparent_hugepage/khugepaged/max_ptes_none
+
+A higher value leads to use additional memory for programs.
+A lower value leads to gain less thp performance. Value of
+max_ptes_none can waste cpu time very little, you can
+ignore it.
+
+== Boot parameter ==
+
+You can change the sysfs boot time defaults of Transparent Hugepage
+Support by passing the parameter "transparent_hugepage=always" or
+"transparent_hugepage=madvise" or "transparent_hugepage=never"
+(without "") to the kernel command line.
+
+== Need of application restart ==
+
+The transparent_hugepage/enabled values only affect future
+behavior. So to make them effective you need to restart any
+application that could have been using hugepages. This also applies to
+the regions registered in khugepaged.
+
+== Monitoring usage ==
+
+The number of transparent huge pages currently used by the system is
+available by reading the AnonHugePages field in /proc/meminfo. To
+identify what applications are using transparent huge pages, it is
+necessary to read /proc/PID/smaps and count the AnonHugePages fields
+for each mapping. Note that reading the smaps file is expensive and
+reading it frequently will incur overhead.
+
+There are a number of counters in /proc/vmstat that may be used to
+monitor how successfully the system is providing huge pages for use.
+
+thp_fault_alloc is incremented every time a huge page is successfully
+ allocated to handle a page fault. This applies to both the
+ first time a page is faulted and for COW faults.
+
+thp_collapse_alloc is incremented by khugepaged when it has found
+ a range of pages to collapse into one huge page and has
+ successfully allocated a new huge page to store the data.
+
+thp_fault_fallback is incremented if a page fault fails to allocate
+ a huge page and instead falls back to using small pages.
+
+thp_collapse_alloc_failed is incremented if khugepaged found a range
+ of pages that should be collapsed into one huge page but failed
+ the allocation.
+
+thp_split is incremented every time a huge page is split into base
+ pages. This can happen for a variety of reasons but a common
+ reason is that a huge page is old and is being reclaimed.
+
+thp_zero_page_alloc is incremented every time a huge zero page is
+ successfully allocated. It includes allocations which where
+ dropped due race with other allocation. Note, it doesn't count
+ every map of the huge zero page, only its allocation.
+
+thp_zero_page_alloc_failed is incremented if kernel fails to allocate
+ huge zero page and falls back to using small pages.
+
+As the system ages, allocating huge pages may be expensive as the
+system uses memory compaction to copy data around memory to free a
+huge page for use. There are some counters in /proc/vmstat to help
+monitor this overhead.
+
+compact_stall is incremented every time a process stalls to run
+ memory compaction so that a huge page is free for use.
+
+compact_success is incremented if the system compacted memory and
+ freed a huge page for use.
+
+compact_fail is incremented if the system tries to compact memory
+ but failed.
+
+compact_pages_moved is incremented each time a page is moved. If
+ this value is increasing rapidly, it implies that the system
+ is copying a lot of data to satisfy the huge page allocation.
+ It is possible that the cost of copying exceeds any savings
+ from reduced TLB misses.
+
+compact_pagemigrate_failed is incremented when the underlying mechanism
+ for moving a page failed.
+
+compact_blocks_moved is incremented each time memory compaction examines
+ a huge page aligned range of pages.
+
+It is possible to establish how long the stalls were using the function
+tracer to record how long was spent in __alloc_pages_nodemask and
+using the mm_page_alloc tracepoint to identify which allocations were
+for huge pages.
+
+== get_user_pages and follow_page ==
+
+get_user_pages and follow_page if run on a hugepage, will return the
+head or tail pages as usual (exactly as they would do on
+hugetlbfs). Most gup users will only care about the actual physical
+address of the page and its temporary pinning to release after the I/O
+is complete, so they won't ever notice the fact the page is huge. But
+if any driver is going to mangle over the page structure of the tail
+page (like for checking page->mapping or other bits that are relevant
+for the head page and not the tail page), it should be updated to jump
+to check head page instead (while serializing properly against
+split_huge_page() to avoid the head and tail pages to disappear from
+under it, see the futex code to see an example of that, hugetlbfs also
+needed special handling in futex code for similar reasons).
+
+NOTE: these aren't new constraints to the GUP API, and they match the
+same constrains that applies to hugetlbfs too, so any driver capable
+of handling GUP on hugetlbfs will also work fine on transparent
+hugepage backed mappings.
+
+In case you can't handle compound pages if they're returned by
+follow_page, the FOLL_SPLIT bit can be specified as parameter to
+follow_page, so that it will split the hugepages before returning
+them. Migration for example passes FOLL_SPLIT as parameter to
+follow_page because it's not hugepage aware and in fact it can't work
+at all on hugetlbfs (but it instead works fine on transparent
+hugepages thanks to FOLL_SPLIT). migration simply can't deal with
+hugepages being returned (as it's not only checking the pfn of the
+page and pinning it during the copy but it pretends to migrate the
+memory in regular page sizes and with regular pte/pmd mappings).
+
+== Optimizing the applications ==
+
+To be guaranteed that the kernel will map a 2M page immediately in any
+memory region, the mmap region has to be hugepage naturally
+aligned. posix_memalign() can provide that guarantee.
+
+== Hugetlbfs ==
+
+You can use hugetlbfs on a kernel that has transparent hugepage
+support enabled just fine as always. No difference can be noted in
+hugetlbfs other than there will be less overall fragmentation. All
+usual features belonging to hugetlbfs are preserved and
+unaffected. libhugetlbfs will also work fine as usual.
+
+== Graceful fallback ==
+
+Code walking pagetables but unware about huge pmds can simply call
+split_huge_page_pmd(vma, addr, pmd) where the pmd is the one returned by
+pmd_offset. It's trivial to make the code transparent hugepage aware
+by just grepping for "pmd_offset" and adding split_huge_page_pmd where
+missing after pmd_offset returns the pmd. Thanks to the graceful
+fallback design, with a one liner change, you can avoid to write
+hundred if not thousand of lines of complex code to make your code
+hugepage aware.
+
+If you're not walking pagetables but you run into a physical hugepage
+but you can't handle it natively in your code, you can split it by
+calling split_huge_page(page). This is what the Linux VM does before
+it tries to swapout the hugepage for example.
+
+Example to make mremap.c transparent hugepage aware with a one liner
+change:
+
+diff --git a/mm/mremap.c b/mm/mremap.c
+--- a/mm/mremap.c
++++ b/mm/mremap.c
+@@ -41,6 +41,7 @@ static pmd_t *get_old_pmd(struct mm_stru
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
++ split_huge_page_pmd(vma, addr, pmd);
+ if (pmd_none_or_clear_bad(pmd))
+ return NULL;
+
+== Locking in hugepage aware code ==
+
+We want as much code as possible hugepage aware, as calling
+split_huge_page() or split_huge_page_pmd() has a cost.
+
+To make pagetable walks huge pmd aware, all you need to do is to call
+pmd_trans_huge() on the pmd returned by pmd_offset. You must hold the
+mmap_sem in read (or write) mode to be sure an huge pmd cannot be
+created from under you by khugepaged (khugepaged collapse_huge_page
+takes the mmap_sem in write mode in addition to the anon_vma lock). If
+pmd_trans_huge returns false, you just fallback in the old code
+paths. If instead pmd_trans_huge returns true, you have to take the
+mm->page_table_lock and re-run pmd_trans_huge. Taking the
+page_table_lock will prevent the huge pmd to be converted into a
+regular pmd from under you (split_huge_page can run in parallel to the
+pagetable walk). If the second pmd_trans_huge returns false, you
+should just drop the page_table_lock and fallback to the old code as
+before. Otherwise you should run pmd_trans_splitting on the pmd. In
+case pmd_trans_splitting returns true, it means split_huge_page is
+already in the middle of splitting the page. So if pmd_trans_splitting
+returns true it's enough to drop the page_table_lock and call
+wait_split_huge_page and then fallback the old code paths. You are
+guaranteed by the time wait_split_huge_page returns, the pmd isn't
+huge anymore. If pmd_trans_splitting returns false, you can proceed to
+process the huge pmd and the hugepage natively. Once finished you can
+drop the page_table_lock.
+
+== compound_lock, get_user_pages and put_page ==
+
+split_huge_page internally has to distribute the refcounts in the head
+page to the tail pages before clearing all PG_head/tail bits from the
+page structures. It can do that easily for refcounts taken by huge pmd
+mappings. But the GUI API as created by hugetlbfs (that returns head
+and tail pages if running get_user_pages on an address backed by any
+hugepage), requires the refcount to be accounted on the tail pages and
+not only in the head pages, if we want to be able to run
+split_huge_page while there are gup pins established on any tail
+page. Failure to be able to run split_huge_page if there's any gup pin
+on any tail page, would mean having to split all hugepages upfront in
+get_user_pages which is unacceptable as too many gup users are
+performance critical and they must work natively on hugepages like
+they work natively on hugetlbfs already (hugetlbfs is simpler because
+hugetlbfs pages cannot be split so there wouldn't be requirement of
+accounting the pins on the tail pages for hugetlbfs). If we wouldn't
+account the gup refcounts on the tail pages during gup, we won't know
+anymore which tail page is pinned by gup and which is not while we run
+split_huge_page. But we still have to add the gup pin to the head page
+too, to know when we can free the compound page in case it's never
+split during its lifetime. That requires changing not just
+get_page, but put_page as well so that when put_page runs on a tail
+page (and only on a tail page) it will find its respective head page,
+and then it will decrease the head page refcount in addition to the
+tail page refcount. To obtain a head page reliably and to decrease its
+refcount without race conditions, put_page has to serialize against
+__split_huge_page_refcount using a special per-page lock called
+compound_lock.
diff --git a/Documentation/vm/uksm.txt b/Documentation/vm/uksm.txt
new file mode 100644
index 000000000..387d96ae2
--- /dev/null
+++ b/Documentation/vm/uksm.txt
@@ -0,0 +1,59 @@
+The Ultra Kernel Samepage Merging feature
+----------------------------------------------
+/*
+ * Ultra KSM. Copyright (C) 2011-2012 Nai Xia
+ *
+ * This is an improvement upon KSM. Some basic data structures and routines
+ * are borrowed from ksm.c .
+ *
+ * Its new features:
+ * 1. Full system scan:
+ * It automatically scans all user processes' anonymous VMAs. Kernel-user
+ * interaction to submit a memory area to KSM is no longer needed.
+ *
+ * 2. Rich area detection:
+ * It automatically detects rich areas containing abundant duplicated
+ * pages based. Rich areas are given a full scan speed. Poor areas are
+ * sampled at a reasonable speed with very low CPU consumption.
+ *
+ * 3. Ultra Per-page scan speed improvement:
+ * A new hash algorithm is proposed. As a result, on a machine with
+ * Core(TM)2 Quad Q9300 CPU in 32-bit mode and 800MHZ DDR2 main memory, it
+ * can scan memory areas that does not contain duplicated pages at speed of
+ * 627MB/sec ~ 2445MB/sec and can merge duplicated areas at speed of
+ * 477MB/sec ~ 923MB/sec.
+ *
+ * 4. Thrashing area avoidance:
+ * Thrashing area(an VMA that has frequent Ksm page break-out) can be
+ * filtered out. My benchmark shows it's more efficient than KSM's per-page
+ * hash value based volatile page detection.
+ *
+ *
+ * 5. Misc changes upon KSM:
+ * * It has a fully x86-opitmized memcmp dedicated for 4-byte-aligned page
+ * comparison. It's much faster than default C version on x86.
+ * * rmap_item now has an struct *page member to loosely cache a
+ * address-->page mapping, which reduces too much time-costly
+ * follow_page().
+ * * The VMA creation/exit procedures are hooked to let the Ultra KSM know.
+ * * try_to_merge_two_pages() now can revert a pte if it fails. No break_
+ * ksm is needed for this case.
+ *
+ * 6. Full Zero Page consideration(contributed by Figo Zhang)
+ * Now uksmd consider full zero pages as special pages and merge them to an
+ * special unswappable uksm zero page.
+ */
+
+ChangeLog:
+
+2012-05-05 The creation of this Doc
+2012-05-08 UKSM 0.1.1.1 libc crash bug fix, api clean up, doc clean up.
+2012-05-28 UKSM 0.1.1.2 bug fix release
+2012-06-26 UKSM 0.1.2-beta1 first beta release for 0.1.2
+2012-07-2 UKSM 0.1.2-beta2
+2012-07-10 UKSM 0.1.2-beta3
+2012-07-26 UKSM 0.1.2 Fine grained speed control, more scan optimization.
+2012-10-13 UKSM 0.1.2.1 Bug fixes.
+2012-12-31 UKSM 0.1.2.2 Minor bug fixes.
+2014-07-02 UKSM 0.1.2.3 Fix a " __this_cpu_read() in preemptible bug".
+2015-04-22 UKSM 0.1.2.4 Fix a race condition that can sometimes trigger anonying warnings.
diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
new file mode 100644
index 000000000..3be0bfc47
--- /dev/null
+++ b/Documentation/vm/unevictable-lru.txt
@@ -0,0 +1,688 @@
+ ==============================
+ UNEVICTABLE LRU INFRASTRUCTURE
+ ==============================
+
+========
+CONTENTS
+========
+
+ (*) The Unevictable LRU
+
+ - The unevictable page list.
+ - Memory control group interaction.
+ - Marking address spaces unevictable.
+ - Detecting Unevictable Pages.
+ - vmscan's handling of unevictable pages.
+
+ (*) mlock()'d pages.
+
+ - History.
+ - Basic management.
+ - mlock()/mlockall() system call handling.
+ - Filtering special vmas.
+ - munlock()/munlockall() system call handling.
+ - Migrating mlocked pages.
+ - Compacting mlocked pages.
+ - mmap(MAP_LOCKED) system call handling.
+ - munmap()/exit()/exec() system call handling.
+ - try_to_unmap().
+ - try_to_munlock() reverse map scan.
+ - Page reclaim in shrink_*_list().
+
+
+============
+INTRODUCTION
+============
+
+This document describes the Linux memory manager's "Unevictable LRU"
+infrastructure and the use of this to manage several types of "unevictable"
+pages.
+
+The document attempts to provide the overall rationale behind this mechanism
+and the rationale for some of the design decisions that drove the
+implementation. The latter design rationale is discussed in the context of an
+implementation description. Admittedly, one can obtain the implementation
+details - the "what does it do?" - by reading the code. One hopes that the
+descriptions below add value by provide the answer to "why does it do that?".
+
+
+===================
+THE UNEVICTABLE LRU
+===================
+
+The Unevictable LRU facility adds an additional LRU list to track unevictable
+pages and to hide these pages from vmscan. This mechanism is based on a patch
+by Larry Woodman of Red Hat to address several scalability problems with page
+reclaim in Linux. The problems have been observed at customer sites on large
+memory x86_64 systems.
+
+To illustrate this with an example, a non-NUMA x86_64 platform with 128GB of
+main memory will have over 32 million 4k pages in a single zone. When a large
+fraction of these pages are not evictable for any reason [see below], vmscan
+will spend a lot of time scanning the LRU lists looking for the small fraction
+of pages that are evictable. This can result in a situation where all CPUs are
+spending 100% of their time in vmscan for hours or days on end, with the system
+completely unresponsive.
+
+The unevictable list addresses the following classes of unevictable pages:
+
+ (*) Those owned by ramfs.
+
+ (*) Those mapped into SHM_LOCK'd shared memory regions.
+
+ (*) Those mapped into VM_LOCKED [mlock()ed] VMAs.
+
+The infrastructure may also be able to handle other conditions that make pages
+unevictable, either by definition or by circumstance, in the future.
+
+
+THE UNEVICTABLE PAGE LIST
+-------------------------
+
+The Unevictable LRU infrastructure consists of an additional, per-zone, LRU list
+called the "unevictable" list and an associated page flag, PG_unevictable, to
+indicate that the page is being managed on the unevictable list.
+
+The PG_unevictable flag is analogous to, and mutually exclusive with, the
+PG_active flag in that it indicates on which LRU list a page resides when
+PG_lru is set.
+
+The Unevictable LRU infrastructure maintains unevictable pages on an additional
+LRU list for a few reasons:
+
+ (1) We get to "treat unevictable pages just like we treat other pages in the
+ system - which means we get to use the same code to manipulate them, the
+ same code to isolate them (for migrate, etc.), the same code to keep track
+ of the statistics, etc..." [Rik van Riel]
+
+ (2) We want to be able to migrate unevictable pages between nodes for memory
+ defragmentation, workload management and memory hotplug. The linux kernel
+ can only migrate pages that it can successfully isolate from the LRU
+ lists. If we were to maintain pages elsewhere than on an LRU-like list,
+ where they can be found by isolate_lru_page(), we would prevent their
+ migration, unless we reworked migration code to find the unevictable pages
+ itself.
+
+
+The unevictable list does not differentiate between file-backed and anonymous,
+swap-backed pages. This differentiation is only important while the pages are,
+in fact, evictable.
+
+The unevictable list benefits from the "arrayification" of the per-zone LRU
+lists and statistics originally proposed and posted by Christoph Lameter.
+
+The unevictable list does not use the LRU pagevec mechanism. Rather,
+unevictable pages are placed directly on the page's zone's unevictable list
+under the zone lru_lock. This allows us to prevent the stranding of pages on
+the unevictable list when one task has the page isolated from the LRU and other
+tasks are changing the "evictability" state of the page.
+
+
+MEMORY CONTROL GROUP INTERACTION
+--------------------------------
+
+The unevictable LRU facility interacts with the memory control group [aka
+memory controller; see Documentation/cgroups/memory.txt] by extending the
+lru_list enum.
+
+The memory controller data structure automatically gets a per-zone unevictable
+list as a result of the "arrayification" of the per-zone LRU lists (one per
+lru_list enum element). The memory controller tracks the movement of pages to
+and from the unevictable list.
+
+When a memory control group comes under memory pressure, the controller will
+not attempt to reclaim pages on the unevictable list. This has a couple of
+effects:
+
+ (1) Because the pages are "hidden" from reclaim on the unevictable list, the
+ reclaim process can be more efficient, dealing only with pages that have a
+ chance of being reclaimed.
+
+ (2) On the other hand, if too many of the pages charged to the control group
+ are unevictable, the evictable portion of the working set of the tasks in
+ the control group may not fit into the available memory. This can cause
+ the control group to thrash or to OOM-kill tasks.
+
+
+MARKING ADDRESS SPACES UNEVICTABLE
+----------------------------------
+
+For facilities such as ramfs none of the pages attached to the address space
+may be evicted. To prevent eviction of any such pages, the AS_UNEVICTABLE
+address space flag is provided, and this can be manipulated by a filesystem
+using a number of wrapper functions:
+
+ (*) void mapping_set_unevictable(struct address_space *mapping);
+
+ Mark the address space as being completely unevictable.
+
+ (*) void mapping_clear_unevictable(struct address_space *mapping);
+
+ Mark the address space as being evictable.
+
+ (*) int mapping_unevictable(struct address_space *mapping);
+
+ Query the address space, and return true if it is completely
+ unevictable.
+
+These are currently used in two places in the kernel:
+
+ (1) By ramfs to mark the address spaces of its inodes when they are created,
+ and this mark remains for the life of the inode.
+
+ (2) By SYSV SHM to mark SHM_LOCK'd address spaces until SHM_UNLOCK is called.
+
+ Note that SHM_LOCK is not required to page in the locked pages if they're
+ swapped out; the application must touch the pages manually if it wants to
+ ensure they're in memory.
+
+
+DETECTING UNEVICTABLE PAGES
+---------------------------
+
+The function page_evictable() in vmscan.c determines whether a page is
+evictable or not using the query function outlined above [see section "Marking
+address spaces unevictable"] to check the AS_UNEVICTABLE flag.
+
+For address spaces that are so marked after being populated (as SHM regions
+might be), the lock action (eg: SHM_LOCK) can be lazy, and need not populate
+the page tables for the region as does, for example, mlock(), nor need it make
+any special effort to push any pages in the SHM_LOCK'd area to the unevictable
+list. Instead, vmscan will do this if and when it encounters the pages during
+a reclamation scan.
+
+On an unlock action (such as SHM_UNLOCK), the unlocker (eg: shmctl()) must scan
+the pages in the region and "rescue" them from the unevictable list if no other
+condition is keeping them unevictable. If an unevictable region is destroyed,
+the pages are also "rescued" from the unevictable list in the process of
+freeing them.
+
+page_evictable() also checks for mlocked pages by testing an additional page
+flag, PG_mlocked (as wrapped by PageMlocked()), which is set when a page is
+faulted into a VM_LOCKED vma, or found in a vma being VM_LOCKED.
+
+
+VMSCAN'S HANDLING OF UNEVICTABLE PAGES
+--------------------------------------
+
+If unevictable pages are culled in the fault path, or moved to the unevictable
+list at mlock() or mmap() time, vmscan will not encounter the pages until they
+have become evictable again (via munlock() for example) and have been "rescued"
+from the unevictable list. However, there may be situations where we decide,
+for the sake of expediency, to leave a unevictable page on one of the regular
+active/inactive LRU lists for vmscan to deal with. vmscan checks for such
+pages in all of the shrink_{active|inactive|page}_list() functions and will
+"cull" such pages that it encounters: that is, it diverts those pages to the
+unevictable list for the zone being scanned.
+
+There may be situations where a page is mapped into a VM_LOCKED VMA, but the
+page is not marked as PG_mlocked. Such pages will make it all the way to
+shrink_page_list() where they will be detected when vmscan walks the reverse
+map in try_to_unmap(). If try_to_unmap() returns SWAP_MLOCK,
+shrink_page_list() will cull the page at that point.
+
+To "cull" an unevictable page, vmscan simply puts the page back on the LRU list
+using putback_lru_page() - the inverse operation to isolate_lru_page() - after
+dropping the page lock. Because the condition which makes the page unevictable
+may change once the page is unlocked, putback_lru_page() will recheck the
+unevictable state of a page that it places on the unevictable list. If the
+page has become unevictable, putback_lru_page() removes it from the list and
+retries, including the page_unevictable() test. Because such a race is a rare
+event and movement of pages onto the unevictable list should be rare, these
+extra evictabilty checks should not occur in the majority of calls to
+putback_lru_page().
+
+
+=============
+MLOCKED PAGES
+=============
+
+The unevictable page list is also useful for mlock(), in addition to ramfs and
+SYSV SHM. Note that mlock() is only available in CONFIG_MMU=y situations; in
+NOMMU situations, all mappings are effectively mlocked.
+
+
+HISTORY
+-------
+
+The "Unevictable mlocked Pages" infrastructure is based on work originally
+posted by Nick Piggin in an RFC patch entitled "mm: mlocked pages off LRU".
+Nick posted his patch as an alternative to a patch posted by Christoph Lameter
+to achieve the same objective: hiding mlocked pages from vmscan.
+
+In Nick's patch, he used one of the struct page LRU list link fields as a count
+of VM_LOCKED VMAs that map the page. This use of the link field for a count
+prevented the management of the pages on an LRU list, and thus mlocked pages
+were not migratable as isolate_lru_page() could not find them, and the LRU list
+link field was not available to the migration subsystem.
+
+Nick resolved this by putting mlocked pages back on the lru list before
+attempting to isolate them, thus abandoning the count of VM_LOCKED VMAs. When
+Nick's patch was integrated with the Unevictable LRU work, the count was
+replaced by walking the reverse map to determine whether any VM_LOCKED VMAs
+mapped the page. More on this below.
+
+
+BASIC MANAGEMENT
+----------------
+
+mlocked pages - pages mapped into a VM_LOCKED VMA - are a class of unevictable
+pages. When such a page has been "noticed" by the memory management subsystem,
+the page is marked with the PG_mlocked flag. This can be manipulated using the
+PageMlocked() functions.
+
+A PG_mlocked page will be placed on the unevictable list when it is added to
+the LRU. Such pages can be "noticed" by memory management in several places:
+
+ (1) in the mlock()/mlockall() system call handlers;
+
+ (2) in the mmap() system call handler when mmapping a region with the
+ MAP_LOCKED flag;
+
+ (3) mmapping a region in a task that has called mlockall() with the MCL_FUTURE
+ flag
+
+ (4) in the fault path, if mlocked pages are "culled" in the fault path,
+ and when a VM_LOCKED stack segment is expanded; or
+
+ (5) as mentioned above, in vmscan:shrink_page_list() when attempting to
+ reclaim a page in a VM_LOCKED VMA via try_to_unmap()
+
+all of which result in the VM_LOCKED flag being set for the VMA if it doesn't
+already have it set.
+
+mlocked pages become unlocked and rescued from the unevictable list when:
+
+ (1) mapped in a range unlocked via the munlock()/munlockall() system calls;
+
+ (2) munmap()'d out of the last VM_LOCKED VMA that maps the page, including
+ unmapping at task exit;
+
+ (3) when the page is truncated from the last VM_LOCKED VMA of an mmapped file;
+ or
+
+ (4) before a page is COW'd in a VM_LOCKED VMA.
+
+
+mlock()/mlockall() SYSTEM CALL HANDLING
+---------------------------------------
+
+Both [do_]mlock() and [do_]mlockall() system call handlers call mlock_fixup()
+for each VMA in the range specified by the call. In the case of mlockall(),
+this is the entire active address space of the task. Note that mlock_fixup()
+is used for both mlocking and munlocking a range of memory. A call to mlock()
+an already VM_LOCKED VMA, or to munlock() a VMA that is not VM_LOCKED is
+treated as a no-op, and mlock_fixup() simply returns.
+
+If the VMA passes some filtering as described in "Filtering Special Vmas"
+below, mlock_fixup() will attempt to merge the VMA with its neighbors or split
+off a subset of the VMA if the range does not cover the entire VMA. Once the
+VMA has been merged or split or neither, mlock_fixup() will call
+populate_vma_page_range() to fault in the pages via get_user_pages() and to
+mark the pages as mlocked via mlock_vma_page().
+
+Note that the VMA being mlocked might be mapped with PROT_NONE. In this case,
+get_user_pages() will be unable to fault in the pages. That's okay. If pages
+do end up getting faulted into this VM_LOCKED VMA, we'll handle them in the
+fault path or in vmscan.
+
+Also note that a page returned by get_user_pages() could be truncated or
+migrated out from under us, while we're trying to mlock it. To detect this,
+populate_vma_page_range() checks page_mapping() after acquiring the page lock.
+If the page is still associated with its mapping, we'll go ahead and call
+mlock_vma_page(). If the mapping is gone, we just unlock the page and move on.
+In the worst case, this will result in a page mapped in a VM_LOCKED VMA
+remaining on a normal LRU list without being PageMlocked(). Again, vmscan will
+detect and cull such pages.
+
+mlock_vma_page() will call TestSetPageMlocked() for each page returned by
+get_user_pages(). We use TestSetPageMlocked() because the page might already
+be mlocked by another task/VMA and we don't want to do extra work. We
+especially do not want to count an mlocked page more than once in the
+statistics. If the page was already mlocked, mlock_vma_page() need do nothing
+more.
+
+If the page was NOT already mlocked, mlock_vma_page() attempts to isolate the
+page from the LRU, as it is likely on the appropriate active or inactive list
+at that time. If the isolate_lru_page() succeeds, mlock_vma_page() will put
+back the page - by calling putback_lru_page() - which will notice that the page
+is now mlocked and divert the page to the zone's unevictable list. If
+mlock_vma_page() is unable to isolate the page from the LRU, vmscan will handle
+it later if and when it attempts to reclaim the page.
+
+
+FILTERING SPECIAL VMAS
+----------------------
+
+mlock_fixup() filters several classes of "special" VMAs:
+
+1) VMAs with VM_IO or VM_PFNMAP set are skipped entirely. The pages behind
+ these mappings are inherently pinned, so we don't need to mark them as
+ mlocked. In any case, most of the pages have no struct page in which to so
+ mark the page. Because of this, get_user_pages() will fail for these VMAs,
+ so there is no sense in attempting to visit them.
+
+2) VMAs mapping hugetlbfs page are already effectively pinned into memory. We
+ neither need nor want to mlock() these pages. However, to preserve the
+ prior behavior of mlock() - before the unevictable/mlock changes -
+ mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
+ allocate the huge pages and populate the ptes.
+
+3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages,
+ such as the VDSO page, relay channel pages, etc. These pages
+ are inherently unevictable and are not managed on the LRU lists.
+ mlock_fixup() treats these VMAs the same as hugetlbfs VMAs. It calls
+ make_pages_present() to populate the ptes.
+
+Note that for all of these special VMAs, mlock_fixup() does not set the
+VM_LOCKED flag. Therefore, we won't have to deal with them later during
+munlock(), munmap() or task exit. Neither does mlock_fixup() account these
+VMAs against the task's "locked_vm".
+
+
+munlock()/munlockall() SYSTEM CALL HANDLING
+-------------------------------------------
+
+The munlock() and munlockall() system calls are handled by the same functions -
+do_mlock[all]() - as the mlock() and mlockall() system calls with the unlock vs
+lock operation indicated by an argument. So, these system calls are also
+handled by mlock_fixup(). Again, if called for an already munlocked VMA,
+mlock_fixup() simply returns. Because of the VMA filtering discussed above,
+VM_LOCKED will not be set in any "special" VMAs. So, these VMAs will be
+ignored for munlock.
+
+If the VMA is VM_LOCKED, mlock_fixup() again attempts to merge or split off the
+specified range. The range is then munlocked via the function
+populate_vma_page_range() - the same function used to mlock a VMA range -
+passing a flag to indicate that munlock() is being performed.
+
+Because the VMA access protections could have been changed to PROT_NONE after
+faulting in and mlocking pages, get_user_pages() was unreliable for visiting
+these pages for munlocking. Because we don't want to leave pages mlocked,
+get_user_pages() was enhanced to accept a flag to ignore the permissions when
+fetching the pages - all of which should be resident as a result of previous
+mlocking.
+
+For munlock(), populate_vma_page_range() unlocks individual pages by calling
+munlock_vma_page(). munlock_vma_page() unconditionally clears the PG_mlocked
+flag using TestClearPageMlocked(). As with mlock_vma_page(),
+munlock_vma_page() use the Test*PageMlocked() function to handle the case where
+the page might have already been unlocked by another task. If the page was
+mlocked, munlock_vma_page() updates that zone statistics for the number of
+mlocked pages. Note, however, that at this point we haven't checked whether
+the page is mapped by other VM_LOCKED VMAs.
+
+We can't call try_to_munlock(), the function that walks the reverse map to
+check for other VM_LOCKED VMAs, without first isolating the page from the LRU.
+try_to_munlock() is a variant of try_to_unmap() and thus requires that the page
+not be on an LRU list [more on these below]. However, the call to
+isolate_lru_page() could fail, in which case we couldn't try_to_munlock(). So,
+we go ahead and clear PG_mlocked up front, as this might be the only chance we
+have. If we can successfully isolate the page, we go ahead and
+try_to_munlock(), which will restore the PG_mlocked flag and update the zone
+page statistics if it finds another VMA holding the page mlocked. If we fail
+to isolate the page, we'll have left a potentially mlocked page on the LRU.
+This is fine, because we'll catch it later if and if vmscan tries to reclaim
+the page. This should be relatively rare.
+
+
+MIGRATING MLOCKED PAGES
+-----------------------
+
+A page that is being migrated has been isolated from the LRU lists and is held
+locked across unmapping of the page, updating the page's address space entry
+and copying the contents and state, until the page table entry has been
+replaced with an entry that refers to the new page. Linux supports migration
+of mlocked pages and other unevictable pages. This involves simply moving the
+PG_mlocked and PG_unevictable states from the old page to the new page.
+
+Note that page migration can race with mlocking or munlocking of the same page.
+This has been discussed from the mlock/munlock perspective in the respective
+sections above. Both processes (migration and m[un]locking) hold the page
+locked. This provides the first level of synchronization. Page migration
+zeros out the page_mapping of the old page before unlocking it, so m[un]lock
+can skip these pages by testing the page mapping under page lock.
+
+To complete page migration, we place the new and old pages back onto the LRU
+after dropping the page lock. The "unneeded" page - old page on success, new
+page on failure - will be freed when the reference count held by the migration
+process is released. To ensure that we don't strand pages on the unevictable
+list because of a race between munlock and migration, page migration uses the
+putback_lru_page() function to add migrated pages back to the LRU.
+
+
+COMPACTING MLOCKED PAGES
+------------------------
+
+The unevictable LRU can be scanned for compactable regions and the default
+behavior is to do so. /proc/sys/vm/compact_unevictable_allowed controls
+this behavior (see Documentation/sysctl/vm.txt). Once scanning of the
+unevictable LRU is enabled, the work of compaction is mostly handled by
+the page migration code and the same work flow as described in MIGRATING
+MLOCKED PAGES will apply.
+
+
+mmap(MAP_LOCKED) SYSTEM CALL HANDLING
+-------------------------------------
+
+In addition the mlock()/mlockall() system calls, an application can request
+that a region of memory be mlocked supplying the MAP_LOCKED flag to the mmap()
+call. Furthermore, any mmap() call or brk() call that expands the heap by a
+task that has previously called mlockall() with the MCL_FUTURE flag will result
+in the newly mapped memory being mlocked. Before the unevictable/mlock
+changes, the kernel simply called make_pages_present() to allocate pages and
+populate the page table.
+
+To mlock a range of memory under the unevictable/mlock infrastructure, the
+mmap() handler and task address space expansion functions call
+populate_vma_page_range() specifying the vma and the address range to mlock.
+
+The callers of populate_vma_page_range() will have already added the memory range
+to be mlocked to the task's "locked_vm". To account for filtered VMAs,
+populate_vma_page_range() returns the number of pages NOT mlocked. All of the
+callers then subtract a non-negative return value from the task's locked_vm. A
+negative return value represent an error - for example, from get_user_pages()
+attempting to fault in a VMA with PROT_NONE access. In this case, we leave the
+memory range accounted as locked_vm, as the protections could be changed later
+and pages allocated into that region.
+
+
+munmap()/exit()/exec() SYSTEM CALL HANDLING
+-------------------------------------------
+
+When unmapping an mlocked region of memory, whether by an explicit call to
+munmap() or via an internal unmap from exit() or exec() processing, we must
+munlock the pages if we're removing the last VM_LOCKED VMA that maps the pages.
+Before the unevictable/mlock changes, mlocking did not mark the pages in any
+way, so unmapping them required no processing.
+
+To munlock a range of memory under the unevictable/mlock infrastructure, the
+munmap() handler and task address space call tear down function
+munlock_vma_pages_all(). The name reflects the observation that one always
+specifies the entire VMA range when munlock()ing during unmap of a region.
+Because of the VMA filtering when mlocking() regions, only "normal" VMAs that
+actually contain mlocked pages will be passed to munlock_vma_pages_all().
+
+munlock_vma_pages_all() clears the VM_LOCKED VMA flag and, like mlock_fixup()
+for the munlock case, calls __munlock_vma_pages_range() to walk the page table
+for the VMA's memory range and munlock_vma_page() each resident page mapped by
+the VMA. This effectively munlocks the page, only if this is the last
+VM_LOCKED VMA that maps the page.
+
+
+try_to_unmap()
+--------------
+
+Pages can, of course, be mapped into multiple VMAs. Some of these VMAs may
+have VM_LOCKED flag set. It is possible for a page mapped into one or more
+VM_LOCKED VMAs not to have the PG_mlocked flag set and therefore reside on one
+of the active or inactive LRU lists. This could happen if, for example, a task
+in the process of munlocking the page could not isolate the page from the LRU.
+As a result, vmscan/shrink_page_list() might encounter such a page as described
+in section "vmscan's handling of unevictable pages". To handle this situation,
+try_to_unmap() checks for VM_LOCKED VMAs while it is walking a page's reverse
+map.
+
+try_to_unmap() is always called, by either vmscan for reclaim or for page
+migration, with the argument page locked and isolated from the LRU. Separate
+functions handle anonymous and mapped file pages, as these types of pages have
+different reverse map mechanisms.
+
+ (*) try_to_unmap_anon()
+
+ To unmap anonymous pages, each VMA in the list anchored in the anon_vma
+ must be visited - at least until a VM_LOCKED VMA is encountered. If the
+ page is being unmapped for migration, VM_LOCKED VMAs do not stop the
+ process because mlocked pages are migratable. However, for reclaim, if
+ the page is mapped into a VM_LOCKED VMA, the scan stops.
+
+ try_to_unmap_anon() attempts to acquire in read mode the mmap semaphore of
+ the mm_struct to which the VMA belongs. If this is successful, it will
+ mlock the page via mlock_vma_page() - we wouldn't have gotten to
+ try_to_unmap_anon() if the page were already mlocked - and will return
+ SWAP_MLOCK, indicating that the page is unevictable.
+
+ If the mmap semaphore cannot be acquired, we are not sure whether the page
+ is really unevictable or not. In this case, try_to_unmap_anon() will
+ return SWAP_AGAIN.
+
+ (*) try_to_unmap_file() - linear mappings
+
+ Unmapping of a mapped file page works the same as for anonymous mappings,
+ except that the scan visits all VMAs that map the page's index/page offset
+ in the page's mapping's reverse map priority search tree. It also visits
+ each VMA in the page's mapping's non-linear list, if the list is
+ non-empty.
+
+ As for anonymous pages, on encountering a VM_LOCKED VMA for a mapped file
+ page, try_to_unmap_file() will attempt to acquire the associated
+ mm_struct's mmap semaphore to mlock the page, returning SWAP_MLOCK if this
+ is successful, and SWAP_AGAIN, if not.
+
+ (*) try_to_unmap_file() - non-linear mappings
+
+ If a page's mapping contains a non-empty non-linear mapping VMA list, then
+ try_to_un{map|lock}() must also visit each VMA in that list to determine
+ whether the page is mapped in a VM_LOCKED VMA. Again, the scan must visit
+ all VMAs in the non-linear list to ensure that the pages is not/should not
+ be mlocked.
+
+ If a VM_LOCKED VMA is found in the list, the scan could terminate.
+ However, there is no easy way to determine whether the page is actually
+ mapped in a given VMA - either for unmapping or testing whether the
+ VM_LOCKED VMA actually pins the page.
+
+ try_to_unmap_file() handles non-linear mappings by scanning a certain
+ number of pages - a "cluster" - in each non-linear VMA associated with the
+ page's mapping, for each file mapped page that vmscan tries to unmap. If
+ this happens to unmap the page we're trying to unmap, try_to_unmap() will
+ notice this on return (page_mapcount(page) will be 0) and return
+ SWAP_SUCCESS. Otherwise, it will return SWAP_AGAIN, causing vmscan to
+ recirculate this page. We take advantage of the cluster scan in
+ try_to_unmap_cluster() as follows:
+
+ For each non-linear VMA, try_to_unmap_cluster() attempts to acquire the
+ mmap semaphore of the associated mm_struct for read without blocking.
+
+ If this attempt is successful and the VMA is VM_LOCKED,
+ try_to_unmap_cluster() will retain the mmap semaphore for the scan;
+ otherwise it drops it here.
+
+ Then, for each page in the cluster, if we're holding the mmap semaphore
+ for a locked VMA, try_to_unmap_cluster() calls mlock_vma_page() to
+ mlock the page. This call is a no-op if the page is already locked,
+ but will mlock any pages in the non-linear mapping that happen to be
+ unlocked.
+
+ If one of the pages so mlocked is the page passed in to try_to_unmap(),
+ try_to_unmap_cluster() will return SWAP_MLOCK, rather than the default
+ SWAP_AGAIN. This will allow vmscan to cull the page, rather than
+ recirculating it on the inactive list.
+
+ Again, if try_to_unmap_cluster() cannot acquire the VMA's mmap sem, it
+ returns SWAP_AGAIN, indicating that the page is mapped by a VM_LOCKED
+ VMA, but couldn't be mlocked.
+
+
+try_to_munlock() REVERSE MAP SCAN
+---------------------------------
+
+ [!] TODO/FIXME: a better name might be page_mlocked() - analogous to the
+ page_referenced() reverse map walker.
+
+When munlock_vma_page() [see section "munlock()/munlockall() System Call
+Handling" above] tries to munlock a page, it needs to determine whether or not
+the page is mapped by any VM_LOCKED VMA without actually attempting to unmap
+all PTEs from the page. For this purpose, the unevictable/mlock infrastructure
+introduced a variant of try_to_unmap() called try_to_munlock().
+
+try_to_munlock() calls the same functions as try_to_unmap() for anonymous and
+mapped file pages with an additional argument specifying unlock versus unmap
+processing. Again, these functions walk the respective reverse maps looking
+for VM_LOCKED VMAs. When such a VMA is found for anonymous pages and file
+pages mapped in linear VMAs, as in the try_to_unmap() case, the functions
+attempt to acquire the associated mmap semaphore, mlock the page via
+mlock_vma_page() and return SWAP_MLOCK. This effectively undoes the
+pre-clearing of the page's PG_mlocked done by munlock_vma_page.
+
+If try_to_unmap() is unable to acquire a VM_LOCKED VMA's associated mmap
+semaphore, it will return SWAP_AGAIN. This will allow shrink_page_list() to
+recycle the page on the inactive list and hope that it has better luck with the
+page next time.
+
+For file pages mapped into non-linear VMAs, the try_to_munlock() logic works
+slightly differently. On encountering a VM_LOCKED non-linear VMA that might
+map the page, try_to_munlock() returns SWAP_AGAIN without actually mlocking the
+page. munlock_vma_page() will just leave the page unlocked and let vmscan deal
+with it - the usual fallback position.
+
+Note that try_to_munlock()'s reverse map walk must visit every VMA in a page's
+reverse map to determine that a page is NOT mapped into any VM_LOCKED VMA.
+However, the scan can terminate when it encounters a VM_LOCKED VMA and can
+successfully acquire the VMA's mmap semaphore for read and mlock the page.
+Although try_to_munlock() might be called a great many times when munlocking a
+large region or tearing down a large address space that has been mlocked via
+mlockall(), overall this is a fairly rare event.
+
+
+PAGE RECLAIM IN shrink_*_list()
+-------------------------------
+
+shrink_active_list() culls any obviously unevictable pages - i.e.
+!page_evictable(page) - diverting these to the unevictable list.
+However, shrink_active_list() only sees unevictable pages that made it onto the
+active/inactive lru lists. Note that these pages do not have PageUnevictable
+set - otherwise they would be on the unevictable list and shrink_active_list
+would never see them.
+
+Some examples of these unevictable pages on the LRU lists are:
+
+ (1) ramfs pages that have been placed on the LRU lists when first allocated.
+
+ (2) SHM_LOCK'd shared memory pages. shmctl(SHM_LOCK) does not attempt to
+ allocate or fault in the pages in the shared memory region. This happens
+ when an application accesses the page the first time after SHM_LOCK'ing
+ the segment.
+
+ (3) mlocked pages that could not be isolated from the LRU and moved to the
+ unevictable list in mlock_vma_page().
+
+ (4) Pages mapped into multiple VM_LOCKED VMAs, but try_to_munlock() couldn't
+ acquire the VMA's mmap semaphore to test the flags and set PageMlocked.
+ munlock_vma_page() was forced to let the page back on to the normal LRU
+ list for vmscan to handle.
+
+shrink_inactive_list() also diverts any unevictable pages that it finds on the
+inactive lists to the appropriate zone's unevictable list.
+
+shrink_inactive_list() should only see SHM_LOCK'd pages that became SHM_LOCK'd
+after shrink_active_list() had moved them to the inactive list, or pages mapped
+into VM_LOCKED VMAs that munlock_vma_page() couldn't isolate from the LRU to
+recheck via try_to_munlock(). shrink_inactive_list() won't notice the latter,
+but will pass on to shrink_page_list().
+
+shrink_page_list() again culls obviously unevictable pages that it could
+encounter for similar reason to shrink_inactive_list(). Pages mapped into
+VM_LOCKED VMAs but without PG_mlocked set will make it all the way to
+try_to_unmap(). shrink_page_list() will divert them to the unevictable list
+when try_to_unmap() returns SWAP_MLOCK, as discussed above.
diff --git a/Documentation/vm/zsmalloc.txt b/Documentation/vm/zsmalloc.txt
new file mode 100644
index 000000000..64ed63c4f
--- /dev/null
+++ b/Documentation/vm/zsmalloc.txt
@@ -0,0 +1,70 @@
+zsmalloc
+--------
+
+This allocator is designed for use with zram. Thus, the allocator is
+supposed to work well under low memory conditions. In particular, it
+never attempts higher order page allocation which is very likely to
+fail under memory pressure. On the other hand, if we just use single
+(0-order) pages, it would suffer from very high fragmentation --
+any object of size PAGE_SIZE/2 or larger would occupy an entire page.
+This was one of the major issues with its predecessor (xvmalloc).
+
+To overcome these issues, zsmalloc allocates a bunch of 0-order pages
+and links them together using various 'struct page' fields. These linked
+pages act as a single higher-order page i.e. an object can span 0-order
+page boundaries. The code refers to these linked pages as a single entity
+called zspage.
+
+For simplicity, zsmalloc can only allocate objects of size up to PAGE_SIZE
+since this satisfies the requirements of all its current users (in the
+worst case, page is incompressible and is thus stored "as-is" i.e. in
+uncompressed form). For allocation requests larger than this size, failure
+is returned (see zs_malloc).
+
+Additionally, zs_malloc() does not return a dereferenceable pointer.
+Instead, it returns an opaque handle (unsigned long) which encodes actual
+location of the allocated object. The reason for this indirection is that
+zsmalloc does not keep zspages permanently mapped since that would cause
+issues on 32-bit systems where the VA region for kernel space mappings
+is very small. So, before using the allocating memory, the object has to
+be mapped using zs_map_object() to get a usable pointer and subsequently
+unmapped using zs_unmap_object().
+
+stat
+----
+
+With CONFIG_ZSMALLOC_STAT, we could see zsmalloc internal information via
+/sys/kernel/debug/zsmalloc/<user name>. Here is a sample of stat output:
+
+# cat /sys/kernel/debug/zsmalloc/zram0/classes
+
+ class size almost_full almost_empty obj_allocated obj_used pages_used pages_per_zspage
+ ..
+ ..
+ 9 176 0 1 186 129 8 4
+ 10 192 1 0 2880 2872 135 3
+ 11 208 0 1 819 795 42 2
+ 12 224 0 1 219 159 12 4
+ ..
+ ..
+
+
+class: index
+size: object size zspage stores
+almost_empty: the number of ZS_ALMOST_EMPTY zspages(see below)
+almost_full: the number of ZS_ALMOST_FULL zspages(see below)
+obj_allocated: the number of objects allocated
+obj_used: the number of objects allocated to the user
+pages_used: the number of pages allocated for the class
+pages_per_zspage: the number of 0-order pages to make a zspage
+
+We assign a zspage to ZS_ALMOST_EMPTY fullness group when:
+ n <= N / f, where
+n = number of allocated objects
+N = total number of objects zspage can store
+f = fullness_threshold_frac(ie, 4 at the moment)
+
+Similarly, we assign zspage to:
+ ZS_ALMOST_FULL when n > N / f
+ ZS_EMPTY when n == 0
+ ZS_FULL when n == N
diff --git a/Documentation/vm/zswap.txt b/Documentation/vm/zswap.txt
new file mode 100644
index 000000000..00c3d31e7
--- /dev/null
+++ b/Documentation/vm/zswap.txt
@@ -0,0 +1,68 @@
+Overview:
+
+Zswap is a lightweight compressed cache for swap pages. It takes pages that are
+in the process of being swapped out and attempts to compress them into a
+dynamically allocated RAM-based memory pool. zswap basically trades CPU cycles
+for potentially reduced swap I/O.  This trade-off can also result in a
+significant performance improvement if reads from the compressed cache are
+faster than reads from a swap device.
+
+NOTE: Zswap is a new feature as of v3.11 and interacts heavily with memory
+reclaim. This interaction has not been fully explored on the large set of
+potential configurations and workloads that exist. For this reason, zswap
+is a work in progress and should be considered experimental.
+
+Some potential benefits:
+* Desktop/laptop users with limited RAM capacities can mitigate the
+    performance impact of swapping.
+* Overcommitted guests that share a common I/O resource can
+    dramatically reduce their swap I/O pressure, avoiding heavy handed I/O
+ throttling by the hypervisor. This allows more work to get done with less
+ impact to the guest workload and guests sharing the I/O subsystem
+* Users with SSDs as swap devices can extend the life of the device by
+    drastically reducing life-shortening writes.
+
+Zswap evicts pages from compressed cache on an LRU basis to the backing swap
+device when the compressed pool reaches its size limit. This requirement had
+been identified in prior community discussions.
+
+To enabled zswap, the "enabled" attribute must be set to 1 at boot time. e.g.
+zswap.enabled=1
+
+Design:
+
+Zswap receives pages for compression through the Frontswap API and is able to
+evict pages from its own compressed pool on an LRU basis and write them back to
+the backing swap device in the case that the compressed pool is full.
+
+Zswap makes use of zbud for the managing the compressed memory pool. Each
+allocation in zbud is not directly accessible by address. Rather, a handle is
+returned by the allocation routine and that handle must be mapped before being
+accessed. The compressed memory pool grows on demand and shrinks as compressed
+pages are freed. The pool is not preallocated.
+
+When a swap page is passed from frontswap to zswap, zswap maintains a mapping
+of the swap entry, a combination of the swap type and swap offset, to the zbud
+handle that references that compressed swap page. This mapping is achieved
+with a red-black tree per swap type. The swap offset is the search key for the
+tree nodes.
+
+During a page fault on a PTE that is a swap entry, frontswap calls the zswap
+load function to decompress the page into the page allocated by the page fault
+handler.
+
+Once there are no PTEs referencing a swap page stored in zswap (i.e. the count
+in the swap_map goes to 0) the swap code calls the zswap invalidate function,
+via frontswap, to free the compressed entry.
+
+Zswap seeks to be simple in its policies. Sysfs attributes allow for one user
+controlled policy:
+* max_pool_percent - The maximum percentage of memory that the compressed
+ pool can occupy.
+
+Zswap allows the compressor to be selected at kernel boot time by setting the
+“compressor” attribute. The default compressor is lzo. e.g.
+zswap.compressor=deflate
+
+A debugfs interface is provided for various statistic about pool size, number
+of pages stored, and various counters for the reasons pages are rejected.
diff --git a/Documentation/vme_api.txt b/Documentation/vme_api.txt
new file mode 100644
index 000000000..ffe6e22a2
--- /dev/null
+++ b/Documentation/vme_api.txt
@@ -0,0 +1,406 @@
+ VME Device Driver API
+ =====================
+
+Driver registration
+===================
+
+As with other subsystems within the Linux kernel, VME device drivers register
+with the VME subsystem, typically called from the devices init routine. This is
+achieved via a call to the following function:
+
+ int vme_register_driver (struct vme_driver *driver);
+
+If driver registration is successful this function returns zero, if an error
+occurred a negative error code will be returned.
+
+A pointer to a structure of type 'vme_driver' must be provided to the
+registration function. The structure is as follows:
+
+ struct vme_driver {
+ struct list_head node;
+ const char *name;
+ int (*match)(struct vme_dev *);
+ int (*probe)(struct vme_dev *);
+ int (*remove)(struct vme_dev *);
+ void (*shutdown)(void);
+ struct device_driver driver;
+ struct list_head devices;
+ unsigned int ndev;
+ };
+
+At the minimum, the '.name', '.match' and '.probe' elements of this structure
+should be correctly set. The '.name' element is a pointer to a string holding
+the device driver's name.
+
+The '.match' function allows controlling the number of devices that need to
+be registered. The match function should return 1 if a device should be
+probed and 0 otherwise. This example match function (from vme_user.c) limits
+the number of devices probed to one:
+
+ #define USER_BUS_MAX 1
+ ...
+ static int vme_user_match(struct vme_dev *vdev)
+ {
+ if (vdev->id.num >= USER_BUS_MAX)
+ return 0;
+ return 1;
+ }
+
+The '.probe' element should contain a pointer to the probe routine. The
+probe routine is passed a 'struct vme_dev' pointer as an argument. The
+'struct vme_dev' structure looks like the following:
+
+ struct vme_dev {
+ int num;
+ struct vme_bridge *bridge;
+ struct device dev;
+ struct list_head drv_list;
+ struct list_head bridge_list;
+ };
+
+Here, the 'num' field refers to the sequential device ID for this specific
+driver. The bridge number (or bus number) can be accessed using
+dev->bridge->num.
+
+A function is also provided to unregister the driver from the VME core and is
+usually called from the device driver's exit routine:
+
+ void vme_unregister_driver (struct vme_driver *driver);
+
+
+Resource management
+===================
+
+Once a driver has registered with the VME core the provided match routine will
+be called the number of times specified during the registration. If a match
+succeeds, a non-zero value should be returned. A zero return value indicates
+failure. For all successful matches, the probe routine of the corresponding
+driver is called. The probe routine is passed a pointer to the devices
+device structure. This pointer should be saved, it will be required for
+requesting VME resources.
+
+The driver can request ownership of one or more master windows, slave windows
+and/or dma channels. Rather than allowing the device driver to request a
+specific window or DMA channel (which may be used by a different driver) this
+driver allows a resource to be assigned based on the required attributes of the
+driver in question:
+
+ struct vme_resource * vme_master_request(struct vme_dev *dev,
+ u32 aspace, u32 cycle, u32 width);
+
+ struct vme_resource * vme_slave_request(struct vme_dev *dev, u32 aspace,
+ u32 cycle);
+
+ struct vme_resource *vme_dma_request(struct vme_dev *dev, u32 route);
+
+For slave windows these attributes are split into the VME address spaces that
+need to be accessed in 'aspace' and VME bus cycle types required in 'cycle'.
+Master windows add a further set of attributes in 'width' specifying the
+required data transfer widths. These attributes are defined as bitmasks and as
+such any combination of the attributes can be requested for a single window,
+the core will assign a window that meets the requirements, returning a pointer
+of type vme_resource that should be used to identify the allocated resource
+when it is used. For DMA controllers, the request function requires the
+potential direction of any transfers to be provided in the route attributes.
+This is typically VME-to-MEM and/or MEM-to-VME, though some hardware can
+support VME-to-VME and MEM-to-MEM transfers as well as test pattern generation.
+If an unallocated window fitting the requirements can not be found a NULL
+pointer will be returned.
+
+Functions are also provided to free window allocations once they are no longer
+required. These functions should be passed the pointer to the resource provided
+during resource allocation:
+
+ void vme_master_free(struct vme_resource *res);
+
+ void vme_slave_free(struct vme_resource *res);
+
+ void vme_dma_free(struct vme_resource *res);
+
+
+Master windows
+==============
+
+Master windows provide access from the local processor[s] out onto the VME bus.
+The number of windows available and the available access modes is dependent on
+the underlying chipset. A window must be configured before it can be used.
+
+
+Master window configuration
+---------------------------
+
+Once a master window has been assigned the following functions can be used to
+configure it and retrieve the current settings:
+
+ int vme_master_set (struct vme_resource *res, int enabled,
+ unsigned long long base, unsigned long long size, u32 aspace,
+ u32 cycle, u32 width);
+
+ int vme_master_get (struct vme_resource *res, int *enabled,
+ unsigned long long *base, unsigned long long *size, u32 *aspace,
+ u32 *cycle, u32 *width);
+
+The address spaces, transfer widths and cycle types are the same as described
+under resource management, however some of the options are mutually exclusive.
+For example, only one address space may be specified.
+
+These functions return 0 on success or an error code should the call fail.
+
+
+Master window access
+--------------------
+
+The following functions can be used to read from and write to configured master
+windows. These functions return the number of bytes copied:
+
+ ssize_t vme_master_read(struct vme_resource *res, void *buf,
+ size_t count, loff_t offset);
+
+ ssize_t vme_master_write(struct vme_resource *res, void *buf,
+ size_t count, loff_t offset);
+
+In addition to simple reads and writes, a function is provided to do a
+read-modify-write transaction. This function returns the original value of the
+VME bus location :
+
+ unsigned int vme_master_rmw (struct vme_resource *res,
+ unsigned int mask, unsigned int compare, unsigned int swap,
+ loff_t offset);
+
+This functions by reading the offset, applying the mask. If the bits selected in
+the mask match with the values of the corresponding bits in the compare field,
+the value of swap is written the specified offset.
+
+
+Slave windows
+=============
+
+Slave windows provide devices on the VME bus access into mapped portions of the
+local memory. The number of windows available and the access modes that can be
+used is dependent on the underlying chipset. A window must be configured before
+it can be used.
+
+
+Slave window configuration
+--------------------------
+
+Once a slave window has been assigned the following functions can be used to
+configure it and retrieve the current settings:
+
+ int vme_slave_set (struct vme_resource *res, int enabled,
+ unsigned long long base, unsigned long long size,
+ dma_addr_t mem, u32 aspace, u32 cycle);
+
+ int vme_slave_get (struct vme_resource *res, int *enabled,
+ unsigned long long *base, unsigned long long *size,
+ dma_addr_t *mem, u32 *aspace, u32 *cycle);
+
+The address spaces, transfer widths and cycle types are the same as described
+under resource management, however some of the options are mutually exclusive.
+For example, only one address space may be specified.
+
+These functions return 0 on success or an error code should the call fail.
+
+
+Slave window buffer allocation
+------------------------------
+
+Functions are provided to allow the user to allocate and free a contiguous
+buffers which will be accessible by the VME bridge. These functions do not have
+to be used, other methods can be used to allocate a buffer, though care must be
+taken to ensure that they are contiguous and accessible by the VME bridge:
+
+ void * vme_alloc_consistent(struct vme_resource *res, size_t size,
+ dma_addr_t *mem);
+
+ void vme_free_consistent(struct vme_resource *res, size_t size,
+ void *virt, dma_addr_t mem);
+
+
+Slave window access
+-------------------
+
+Slave windows map local memory onto the VME bus, the standard methods for
+accessing memory should be used.
+
+
+DMA channels
+============
+
+The VME DMA transfer provides the ability to run link-list DMA transfers. The
+API introduces the concept of DMA lists. Each DMA list is a link-list which can
+be passed to a DMA controller. Multiple lists can be created, extended,
+executed, reused and destroyed.
+
+
+List Management
+---------------
+
+The following functions are provided to create and destroy DMA lists. Execution
+of a list will not automatically destroy the list, thus enabling a list to be
+reused for repetitive tasks:
+
+ struct vme_dma_list *vme_new_dma_list(struct vme_resource *res);
+
+ int vme_dma_list_free(struct vme_dma_list *list);
+
+
+List Population
+---------------
+
+An item can be added to a list using the following function ( the source and
+destination attributes need to be created before calling this function, this is
+covered under "Transfer Attributes"):
+
+ int vme_dma_list_add(struct vme_dma_list *list,
+ struct vme_dma_attr *src, struct vme_dma_attr *dest,
+ size_t count);
+
+NOTE: The detailed attributes of the transfers source and destination
+ are not checked until an entry is added to a DMA list, the request
+ for a DMA channel purely checks the directions in which the
+ controller is expected to transfer data. As a result it is
+ possible for this call to return an error, for example if the
+ source or destination is in an unsupported VME address space.
+
+Transfer Attributes
+-------------------
+
+The attributes for the source and destination are handled separately from adding
+an item to a list. This is due to the diverse attributes required for each type
+of source and destination. There are functions to create attributes for PCI, VME
+and pattern sources and destinations (where appropriate):
+
+Pattern source:
+
+ struct vme_dma_attr *vme_dma_pattern_attribute(u32 pattern, u32 type);
+
+PCI source or destination:
+
+ struct vme_dma_attr *vme_dma_pci_attribute(dma_addr_t mem);
+
+VME source or destination:
+
+ struct vme_dma_attr *vme_dma_vme_attribute(unsigned long long base,
+ u32 aspace, u32 cycle, u32 width);
+
+The following function should be used to free an attribute:
+
+ void vme_dma_free_attribute(struct vme_dma_attr *attr);
+
+
+List Execution
+--------------
+
+The following function queues a list for execution. The function will return
+once the list has been executed:
+
+ int vme_dma_list_exec(struct vme_dma_list *list);
+
+
+Interrupts
+==========
+
+The VME API provides functions to attach and detach callbacks to specific VME
+level and status ID combinations and for the generation of VME interrupts with
+specific VME level and status IDs.
+
+
+Attaching Interrupt Handlers
+----------------------------
+
+The following functions can be used to attach and free a specific VME level and
+status ID combination. Any given combination can only be assigned a single
+callback function. A void pointer parameter is provided, the value of which is
+passed to the callback function, the use of this pointer is user undefined:
+
+ int vme_irq_request(struct vme_dev *dev, int level, int statid,
+ void (*callback)(int, int, void *), void *priv);
+
+ void vme_irq_free(struct vme_dev *dev, int level, int statid);
+
+The callback parameters are as follows. Care must be taken in writing a callback
+function, callback functions run in interrupt context:
+
+ void callback(int level, int statid, void *priv);
+
+
+Interrupt Generation
+--------------------
+
+The following function can be used to generate a VME interrupt at a given VME
+level and VME status ID:
+
+ int vme_irq_generate(struct vme_dev *dev, int level, int statid);
+
+
+Location monitors
+=================
+
+The VME API provides the following functionality to configure the location
+monitor.
+
+
+Location Monitor Management
+---------------------------
+
+The following functions are provided to request the use of a block of location
+monitors and to free them after they are no longer required:
+
+ struct vme_resource * vme_lm_request(struct vme_dev *dev);
+
+ void vme_lm_free(struct vme_resource * res);
+
+Each block may provide a number of location monitors, monitoring adjacent
+locations. The following function can be used to determine how many locations
+are provided:
+
+ int vme_lm_count(struct vme_resource * res);
+
+
+Location Monitor Configuration
+------------------------------
+
+Once a bank of location monitors has been allocated, the following functions
+are provided to configure the location and mode of the location monitor:
+
+ int vme_lm_set(struct vme_resource *res, unsigned long long base,
+ u32 aspace, u32 cycle);
+
+ int vme_lm_get(struct vme_resource *res, unsigned long long *base,
+ u32 *aspace, u32 *cycle);
+
+
+Location Monitor Use
+--------------------
+
+The following functions allow a callback to be attached and detached from each
+location monitor location. Each location monitor can monitor a number of
+adjacent locations:
+
+ int vme_lm_attach(struct vme_resource *res, int num,
+ void (*callback)(int));
+
+ int vme_lm_detach(struct vme_resource *res, int num);
+
+The callback function is declared as follows.
+
+ void callback(int num);
+
+
+Slot Detection
+==============
+
+This function returns the slot ID of the provided bridge.
+
+ int vme_slot_num(struct vme_dev *dev);
+
+
+Bus Detection
+=============
+
+This function returns the bus ID of the provided bridge.
+
+ int vme_bus_num(struct vme_dev *dev);
+
+
diff --git a/Documentation/volatile-considered-harmful.txt b/Documentation/volatile-considered-harmful.txt
new file mode 100644
index 000000000..db0cb228d
--- /dev/null
+++ b/Documentation/volatile-considered-harmful.txt
@@ -0,0 +1,119 @@
+Why the "volatile" type class should not be used
+------------------------------------------------
+
+C programmers have often taken volatile to mean that the variable could be
+changed outside of the current thread of execution; as a result, they are
+sometimes tempted to use it in kernel code when shared data structures are
+being used. In other words, they have been known to treat volatile types
+as a sort of easy atomic variable, which they are not. The use of volatile in
+kernel code is almost never correct; this document describes why.
+
+The key point to understand with regard to volatile is that its purpose is
+to suppress optimization, which is almost never what one really wants to
+do. In the kernel, one must protect shared data structures against
+unwanted concurrent access, which is very much a different task. The
+process of protecting against unwanted concurrency will also avoid almost
+all optimization-related problems in a more efficient way.
+
+Like volatile, the kernel primitives which make concurrent access to data
+safe (spinlocks, mutexes, memory barriers, etc.) are designed to prevent
+unwanted optimization. If they are being used properly, there will be no
+need to use volatile as well. If volatile is still necessary, there is
+almost certainly a bug in the code somewhere. In properly-written kernel
+code, volatile can only serve to slow things down.
+
+Consider a typical block of kernel code:
+
+ spin_lock(&the_lock);
+ do_something_on(&shared_data);
+ do_something_else_with(&shared_data);
+ spin_unlock(&the_lock);
+
+If all the code follows the locking rules, the value of shared_data cannot
+change unexpectedly while the_lock is held. Any other code which might
+want to play with that data will be waiting on the lock. The spinlock
+primitives act as memory barriers - they are explicitly written to do so -
+meaning that data accesses will not be optimized across them. So the
+compiler might think it knows what will be in shared_data, but the
+spin_lock() call, since it acts as a memory barrier, will force it to
+forget anything it knows. There will be no optimization problems with
+accesses to that data.
+
+If shared_data were declared volatile, the locking would still be
+necessary. But the compiler would also be prevented from optimizing access
+to shared_data _within_ the critical section, when we know that nobody else
+can be working with it. While the lock is held, shared_data is not
+volatile. When dealing with shared data, proper locking makes volatile
+unnecessary - and potentially harmful.
+
+The volatile storage class was originally meant for memory-mapped I/O
+registers. Within the kernel, register accesses, too, should be protected
+by locks, but one also does not want the compiler "optimizing" register
+accesses within a critical section. But, within the kernel, I/O memory
+accesses are always done through accessor functions; accessing I/O memory
+directly through pointers is frowned upon and does not work on all
+architectures. Those accessors are written to prevent unwanted
+optimization, so, once again, volatile is unnecessary.
+
+Another situation where one might be tempted to use volatile is
+when the processor is busy-waiting on the value of a variable. The right
+way to perform a busy wait is:
+
+ while (my_variable != what_i_want)
+ cpu_relax();
+
+The cpu_relax() call can lower CPU power consumption or yield to a
+hyperthreaded twin processor; it also happens to serve as a compiler
+barrier, so, once again, volatile is unnecessary. Of course, busy-
+waiting is generally an anti-social act to begin with.
+
+There are still a few rare situations where volatile makes sense in the
+kernel:
+
+ - The above-mentioned accessor functions might use volatile on
+ architectures where direct I/O memory access does work. Essentially,
+ each accessor call becomes a little critical section on its own and
+ ensures that the access happens as expected by the programmer.
+
+ - Inline assembly code which changes memory, but which has no other
+ visible side effects, risks being deleted by GCC. Adding the volatile
+ keyword to asm statements will prevent this removal.
+
+ - The jiffies variable is special in that it can have a different value
+ every time it is referenced, but it can be read without any special
+ locking. So jiffies can be volatile, but the addition of other
+ variables of this type is strongly frowned upon. Jiffies is considered
+ to be a "stupid legacy" issue (Linus's words) in this regard; fixing it
+ would be more trouble than it is worth.
+
+ - Pointers to data structures in coherent memory which might be modified
+ by I/O devices can, sometimes, legitimately be volatile. A ring buffer
+ used by a network adapter, where that adapter changes pointers to
+ indicate which descriptors have been processed, is an example of this
+ type of situation.
+
+For most code, none of the above justifications for volatile apply. As a
+result, the use of volatile is likely to be seen as a bug and will bring
+additional scrutiny to the code. Developers who are tempted to use
+volatile should take a step back and think about what they are truly trying
+to accomplish.
+
+Patches to remove volatile variables are generally welcome - as long as
+they come with a justification which shows that the concurrency issues have
+been properly thought through.
+
+
+NOTES
+-----
+
+[1] http://lwn.net/Articles/233481/
+[2] http://lwn.net/Articles/233482/
+
+CREDITS
+-------
+
+Original impetus and research by Randy Dunlap
+Written by Jonathan Corbet
+Improvements via comments from Satyam Sharma, Johannes Stezenbach, Jesper
+ Juhl, Heikki Orsila, H. Peter Anvin, Philipp Hahn, and Stefan
+ Richter.
diff --git a/Documentation/w1/00-INDEX b/Documentation/w1/00-INDEX
new file mode 100644
index 000000000..cb4980274
--- /dev/null
+++ b/Documentation/w1/00-INDEX
@@ -0,0 +1,10 @@
+00-INDEX
+ - This file
+slaves/
+ - Drivers that provide support for specific family codes.
+masters/
+ - Individual chips providing 1-wire busses.
+w1.generic
+ - The 1-wire (w1) bus
+w1.netlink
+ - Userspace communication protocol over connector [1].
diff --git a/Documentation/w1/masters/00-INDEX b/Documentation/w1/masters/00-INDEX
new file mode 100644
index 000000000..8330cf932
--- /dev/null
+++ b/Documentation/w1/masters/00-INDEX
@@ -0,0 +1,12 @@
+00-INDEX
+ - This file
+ds2482
+ - The Maxim/Dallas Semiconductor DS2482 provides 1-wire busses.
+ds2490
+ - The Maxim/Dallas Semiconductor DS2490 builds USB <-> W1 bridges.
+mxc-w1
+ - W1 master controller driver found on Freescale MX2/MX3 SoCs
+omap-hdq
+ - HDQ/1-wire module of TI OMAP 2430/3430.
+w1-gpio
+ - GPIO 1-wire bus master driver.
diff --git a/Documentation/w1/masters/ds2482 b/Documentation/w1/masters/ds2482
new file mode 100644
index 000000000..56f8edace
--- /dev/null
+++ b/Documentation/w1/masters/ds2482
@@ -0,0 +1,31 @@
+Kernel driver ds2482
+====================
+
+Supported chips:
+ * Maxim DS2482-100, Maxim DS2482-800
+ Prefix: 'ds2482'
+ Addresses scanned: None
+ Datasheets:
+ http://datasheets.maxim-ic.com/en/ds/DS2482-100.pdf
+ http://datasheets.maxim-ic.com/en/ds/DS2482-800.pdf
+
+Author: Ben Gardner <bgardner@wabtec.com>
+
+
+Description
+-----------
+
+The Maxim/Dallas Semiconductor DS2482 is a I2C device that provides
+one (DS2482-100) or eight (DS2482-800) 1-wire busses.
+
+
+General Remarks
+---------------
+
+Valid addresses are 0x18, 0x19, 0x1a, and 0x1b.
+However, the device cannot be detected without writing to the i2c bus, so no
+detection is done. You should instantiate the device explicitly.
+
+$ modprobe ds2482
+$ echo ds2482 0x18 > /sys/bus/i2c/devices/i2c-0/new_device
+
diff --git a/Documentation/w1/masters/ds2490 b/Documentation/w1/masters/ds2490
new file mode 100644
index 000000000..3e091151d
--- /dev/null
+++ b/Documentation/w1/masters/ds2490
@@ -0,0 +1,68 @@
+Kernel driver ds2490
+====================
+
+Supported chips:
+ * Maxim DS2490 based
+
+Author: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+
+
+Description
+-----------
+
+The Maxim/Dallas Semiconductor DS2490 is a chip
+which allows to build USB <-> W1 bridges.
+
+DS9490(R) is a USB <-> W1 bus master device
+which has 0x81 family ID integrated chip and DS2490
+low-level operational chip.
+
+Notes and limitations.
+- The weak pullup current is a minimum of 0.9mA and maximum of 6.0mA.
+- The 5V strong pullup is supported with a minimum of 5.9mA and a
+ maximum of 30.4 mA. (From DS2490.pdf)
+- The hardware will detect when devices are attached to the bus on the
+ next bus (reset?) operation, however only a message is printed as
+ the core w1 code doesn't make use of the information. Connecting
+ one device tends to give multiple new device notifications.
+- The number of USB bus transactions could be reduced if w1_reset_send
+ was added to the API. The name is just a suggestion. It would take
+ a write buffer and a read buffer (along with sizes) as arguments.
+ The ds2490 block I/O command supports reset, write buffer, read
+ buffer, and strong pullup all in one command, instead of the current
+ 1 reset bus, 2 write the match rom command and slave rom id, 3 block
+ write and read data. The write buffer needs to have the match rom
+ command and slave rom id prepended to the front of the requested
+ write buffer, both of which are known to the driver.
+- The hardware supports normal, flexible, and overdrive bus
+ communication speeds, but only the normal is supported.
+- The registered w1_bus_master functions don't define error
+ conditions. If a bus search is in progress and the ds2490 is
+ removed it can produce a good amount of error output before the bus
+ search finishes.
+- The hardware supports detecting some error conditions, such as
+ short, alarming presence on reset, and no presence on reset, but the
+ driver doesn't query those values.
+- The ds2490 specification doesn't cover short bulk in reads in
+ detail, but my observation is if fewer bytes are requested than are
+ available, the bulk read will return an error and the hardware will
+ clear the entire bulk in buffer. It would be possible to read the
+ maximum buffer size to not run into this error condition, only extra
+ bytes in the buffer is a logic error in the driver. The code should
+ should match reads and writes as well as data sizes. Reads and
+ writes are serialized and the status verifies that the chip is idle
+ (and data is available) before the read is executed, so it should
+ not happen.
+- Running x86_64 2.6.24 UHCI under qemu 0.9.0 under x86_64 2.6.22-rc6
+ with a OHCI controller, ds2490 running in the guest would operate
+ normally the first time the module was loaded after qemu attached
+ the ds2490 hardware, but if the module was unloaded, then reloaded
+ most of the time one of the bulk out or in, and usually the bulk in
+ would fail. qemu sets a 50ms timeout and the bulk in would timeout
+ even when the status shows data available. A bulk out write would
+ show a successful completion, but the ds2490 status register would
+ show 0 bytes written. Detaching qemu from the ds2490 hardware and
+ reattaching would clear the problem. usbmon output in the guest and
+ host did not explain the problem. My guess is a bug in either qemu
+ or the host OS and more likely the host OS.
+-- 03-06-2008 David Fries <David@Fries.net>
diff --git a/Documentation/w1/masters/mxc-w1 b/Documentation/w1/masters/mxc-w1
new file mode 100644
index 000000000..38be1ad65
--- /dev/null
+++ b/Documentation/w1/masters/mxc-w1
@@ -0,0 +1,12 @@
+Kernel driver mxc_w1
+====================
+
+Supported chips:
+ * Freescale MX27, MX31 and probably other i.MX SoCs
+ Datasheets:
+ http://www.freescale.com/files/32bit/doc/data_sheet/MCIMX31.pdf?fpsp=1
+ http://cache.freescale.com/files/dsp/doc/archive/MCIMX27.pdf?fsrch=1&WT_TYPE=
+ Data%20Sheets&WT_VENDOR=FREESCALE&WT_FILE_FORMAT=pdf&WT_ASSET=Documentation
+
+Author: Originally based on Freescale code, prepared for mainline by
+ Sascha Hauer <s.hauer@pengutronix.de>
diff --git a/Documentation/w1/masters/omap-hdq b/Documentation/w1/masters/omap-hdq
new file mode 100644
index 000000000..884dc284b
--- /dev/null
+++ b/Documentation/w1/masters/omap-hdq
@@ -0,0 +1,46 @@
+Kernel driver for omap HDQ/1-wire module.
+========================================
+
+Supported chips:
+================
+ HDQ/1-wire controller on the TI OMAP 2430/3430 platforms.
+
+A useful link about HDQ basics:
+===============================
+http://focus.ti.com/lit/an/slua408a/slua408a.pdf
+
+Description:
+============
+The HDQ/1-Wire module of TI OMAP2430/3430 platforms implement the hardware
+protocol of the master functions of the Benchmark HDQ and the Dallas
+Semiconductor 1-Wire protocols. These protocols use a single wire for
+communication between the master (HDQ/1-Wire controller) and the slave
+(HDQ/1-Wire external compliant device).
+
+A typical application of the HDQ/1-Wire module is the communication with battery
+monitor (gas gauge) integrated circuits.
+
+The controller supports operation in both HDQ and 1-wire mode. The essential
+difference between the HDQ and 1-wire mode is how the slave device responds to
+initialization pulse.In HDQ mode, the firmware does not require the host to
+create an initialization pulse to the slave.However, the slave can be reset by
+using an initialization pulse (also referred to as a break pulse).The slave
+does not respond with a presence pulse as it does in the 1-Wire protocol.
+
+Remarks:
+========
+The driver (drivers/w1/masters/omap_hdq.c) supports the HDQ mode of the
+controller. In this mode, as we can not read the ID which obeys the W1
+spec(family:id:crc), a module parameter can be passed to the driver which will
+be used to calculate the CRC and pass back an appropriate slave ID to the W1
+core.
+
+By default the master driver and the BQ slave i/f
+driver(drivers/w1/slaves/w1_bq27000.c) sets the ID to 1.
+Please note to load both the modules with a different ID if required, but note
+that the ID used should be same for both master and slave driver loading.
+
+e.g:
+insmod omap_hdq.ko W1_ID=2
+inamod w1_bq27000.ko F_ID=2
+
diff --git a/Documentation/w1/masters/w1-gpio b/Documentation/w1/masters/w1-gpio
new file mode 100644
index 000000000..af5d3b4aa
--- /dev/null
+++ b/Documentation/w1/masters/w1-gpio
@@ -0,0 +1,33 @@
+Kernel driver w1-gpio
+=====================
+
+Author: Ville Syrjala <syrjala@sci.fi>
+
+
+Description
+-----------
+
+GPIO 1-wire bus master driver. The driver uses the GPIO API to control the
+wire and the GPIO pin can be specified using platform data.
+
+
+Example (mach-at91)
+-------------------
+
+#include <linux/w1-gpio.h>
+
+static struct w1_gpio_platform_data foo_w1_gpio_pdata = {
+ .pin = AT91_PIN_PB20,
+ .is_open_drain = 1,
+};
+
+static struct platform_device foo_w1_device = {
+ .name = "w1-gpio",
+ .id = -1,
+ .dev.platform_data = &foo_w1_gpio_pdata,
+};
+
+...
+ at91_set_GPIO_periph(foo_w1_gpio_pdata.pin, 1);
+ at91_set_multi_drive(foo_w1_gpio_pdata.pin, 1);
+ platform_device_register(&foo_w1_device);
diff --git a/Documentation/w1/slaves/00-INDEX b/Documentation/w1/slaves/00-INDEX
new file mode 100644
index 000000000..6e18c70c3
--- /dev/null
+++ b/Documentation/w1/slaves/00-INDEX
@@ -0,0 +1,8 @@
+00-INDEX
+ - This file
+w1_therm
+ - The Maxim/Dallas Semiconductor ds18*20 temperature sensor.
+w1_ds2423
+ - The Maxim/Dallas Semiconductor ds2423 counter device.
+w1_ds28e04
+ - The Maxim/Dallas Semiconductor ds28e04 eeprom.
diff --git a/Documentation/w1/slaves/w1_ds2406 b/Documentation/w1/slaves/w1_ds2406
new file mode 100644
index 000000000..8137fe6f6
--- /dev/null
+++ b/Documentation/w1/slaves/w1_ds2406
@@ -0,0 +1,25 @@
+w1_ds2406 kernel driver
+=======================
+
+Supported chips:
+ * Maxim DS2406 (and other family 0x12) addressable switches
+
+Author: Scott Alfter <scott@alfter.us>
+
+Description
+-----------
+
+The w1_ds2406 driver allows connected devices to be switched on and off.
+These chips also provide 128 bytes of OTP EPROM, but reading/writing it is
+not supported. In TSOC-6 form, the DS2406 provides two switch outputs and
+can be provided with power on a dedicated input. In TO-92 form, it provides
+one output and uses parasitic power only.
+
+The driver provides two sysfs files. state is readable; it gives the
+current state of each switch, with PIO A in bit 0 and PIO B in bit 1. The
+driver ORs this state with 0x30, so shell scripts get an ASCII 0/1/2/3 to
+work with. output is writable; bits 0 and 1 control PIO A and B,
+respectively. Bits 2-7 are ignored, so it's safe to write ASCII data.
+
+CRCs are checked on read and write. Failed checks cause an I/O error to be
+returned. On a failed write, the switch status is not changed.
diff --git a/Documentation/w1/slaves/w1_ds2423 b/Documentation/w1/slaves/w1_ds2423
new file mode 100644
index 000000000..3f98b505a
--- /dev/null
+++ b/Documentation/w1/slaves/w1_ds2423
@@ -0,0 +1,47 @@
+Kernel driver w1_ds2423
+=======================
+
+Supported chips:
+ * Maxim DS2423 based counter devices.
+
+supported family codes:
+ W1_THERM_DS2423 0x1D
+
+Author: Mika Laitio <lamikr@pilppa.org>
+
+Description
+-----------
+
+Support is provided through the sysfs w1_slave file. Each opening and
+read sequence of w1_slave file initiates the read of counters and ram
+available in DS2423 pages 12 - 15.
+
+Result of each page is provided as an ASCII output where each counter
+value and associated ram buffer is outpputed to own line.
+
+Each lines will contain the values of 42 bytes read from the counter and
+memory page along the crc=YES or NO for indicating whether the read operation
+was successful and CRC matched.
+If the operation was successful, there is also in the end of each line
+a counter value expressed as an integer after c=
+
+Meaning of 42 bytes represented is following:
+ - 1 byte from ram page
+ - 4 bytes for the counter value
+ - 4 zero bytes
+ - 2 bytes for crc16 which was calculated from the data read since the previous crc bytes
+ - 31 remaining bytes from the ram page
+ - crc=YES/NO indicating whether read was ok and crc matched
+ - c=<int> current counter value
+
+example from the successful read:
+00 02 00 00 00 00 00 00 00 6d 38 00 ff ff 00 00 fe ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff crc=YES c=2
+00 02 00 00 00 00 00 00 00 e0 1f 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff crc=YES c=2
+00 29 c6 5d 18 00 00 00 00 04 37 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff crc=YES c=408798761
+00 05 00 00 00 00 00 00 00 8d 39 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff crc=YES c=5
+
+example from the read with crc errors:
+00 02 00 00 00 00 00 00 00 6d 38 00 ff ff 00 00 fe ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff crc=YES c=2
+00 02 00 00 22 00 00 00 00 e0 1f 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff crc=NO
+00 e1 61 5d 19 00 00 00 00 df 0b 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff 00 00 ff ff crc=NO
+00 05 00 00 20 00 00 00 00 8d 39 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff crc=NO
diff --git a/Documentation/w1/slaves/w1_ds28e04 b/Documentation/w1/slaves/w1_ds28e04
new file mode 100644
index 000000000..7819b65cf
--- /dev/null
+++ b/Documentation/w1/slaves/w1_ds28e04
@@ -0,0 +1,36 @@
+Kernel driver w1_ds28e04
+========================
+
+Supported chips:
+ * Maxim DS28E04-100 4096-Bit Addressable 1-Wire EEPROM with PIO
+
+supported family codes:
+ W1_FAMILY_DS28E04 0x1C
+
+Author: Markus Franke, <franke.m@sebakmt.com> <franm@hrz.tu-chemnitz.de>
+
+Description
+-----------
+
+Support is provided through the sysfs files "eeprom" and "pio". CRC checking
+during memory accesses can optionally be enabled/disabled via the device
+attribute "crccheck". The strong pull-up can optionally be enabled/disabled
+via the module parameter "w1_strong_pullup".
+
+Memory Access
+
+ A read operation on the "eeprom" file reads the given amount of bytes
+ from the EEPROM of the DS28E04.
+
+ A write operation on the "eeprom" file writes the given byte sequence
+ to the EEPROM of the DS28E04. If CRC checking mode is enabled only
+ fully aligned blocks of 32 bytes with valid CRC16 values (in bytes 30
+ and 31) are allowed to be written.
+
+PIO Access
+
+ The 2 PIOs of the DS28E04-100 are accessible via the "pio" sysfs file.
+
+ The current status of the PIO's is returned as an 8 bit value. Bit 0/1
+ represent the state of PIO_0/PIO_1. Bits 2..7 do not care. The PIO's are
+ driven low-active, i.e. the driver delivers/expects low-active values.
diff --git a/Documentation/w1/slaves/w1_therm b/Documentation/w1/slaves/w1_therm
new file mode 100644
index 000000000..cc62a95e4
--- /dev/null
+++ b/Documentation/w1/slaves/w1_therm
@@ -0,0 +1,50 @@
+Kernel driver w1_therm
+====================
+
+Supported chips:
+ * Maxim ds18*20 based temperature sensors.
+ * Maxim ds1825 based temperature sensors.
+
+Author: Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+
+
+Description
+-----------
+
+w1_therm provides basic temperature conversion for ds18*20 devices.
+supported family codes:
+W1_THERM_DS18S20 0x10
+W1_THERM_DS1822 0x22
+W1_THERM_DS18B20 0x28
+W1_THERM_DS1825 0x3B
+
+Support is provided through the sysfs w1_slave file. Each open and
+read sequence will initiate a temperature conversion then provide two
+lines of ASCII output. The first line contains the nine hex bytes
+read along with a calculated crc value and YES or NO if it matched.
+If the crc matched the returned values are retained. The second line
+displays the retained values along with a temperature in millidegrees
+Centigrade after t=.
+
+Parasite powered devices are limited to one slave performing a
+temperature conversion at a time. If none of the devices are parasite
+powered it would be possible to convert all the devices at the same
+time and then go back to read individual sensors. That isn't
+currently supported. The driver also doesn't support reduced
+precision (which would also reduce the conversion time).
+
+The module parameter strong_pullup can be set to 0 to disable the
+strong pullup, 1 to enable autodetection or 2 to force strong pullup.
+In case of autodetection, the driver will use the "READ POWER SUPPLY"
+command to check if there are pariste powered devices on the bus.
+If so, it will activate the master's strong pullup.
+In case the detection of parasite devices using this command fails
+(seems to be the case with some DS18S20) the strong pullup can
+be force-enabled.
+If the strong pullup is enabled, the master's strong pullup will be
+driven when the conversion is taking place, provided the master driver
+does support the strong pullup (or it falls back to a pullup
+resistor). The DS18b20 temperature sensor specification lists a
+maximum current draw of 1.5mA and that a 5k pullup resistor is not
+sufficient. The strong pullup is designed to provide the additional
+current required.
diff --git a/Documentation/w1/w1.generic b/Documentation/w1/w1.generic
new file mode 100644
index 000000000..b2033c64c
--- /dev/null
+++ b/Documentation/w1/w1.generic
@@ -0,0 +1,113 @@
+The 1-wire (w1) subsystem
+------------------------------------------------------------------
+The 1-wire bus is a simple master-slave bus that communicates via a single
+signal wire (plus ground, so two wires).
+
+Devices communicate on the bus by pulling the signal to ground via an open
+drain output and by sampling the logic level of the signal line.
+
+The w1 subsystem provides the framework for managing w1 masters and
+communication with slaves.
+
+All w1 slave devices must be connected to a w1 bus master device.
+
+Example w1 master devices:
+ DS9490 usb device
+ W1-over-GPIO
+ DS2482 (i2c to w1 bridge)
+ Emulated devices, such as a RS232 converter, parallel port adapter, etc
+
+
+What does the w1 subsystem do?
+------------------------------------------------------------------
+When a w1 master driver registers with the w1 subsystem, the following occurs:
+
+ - sysfs entries for that w1 master are created
+ - the w1 bus is periodically searched for new slave devices
+
+When a device is found on the bus, w1 core tries to load the driver for its family
+and check if it is loaded. If so, the family driver is attached to the slave.
+If there is no driver for the family, default one is assigned, which allows to perform
+almost any kind of operations. Each logical operation is a transaction
+in nature, which can contain several (two or one) low-level operations.
+Let's see how one can read EEPROM context:
+1. one must write control buffer, i.e. buffer containing command byte
+and two byte address. At this step bus is reset and appropriate device
+is selected using either W1_SKIP_ROM or W1_MATCH_ROM command.
+Then provided control buffer is being written to the wire.
+2. reading. This will issue reading eeprom response.
+
+It is possible that between 1. and 2. w1 master thread will reset bus for searching
+and slave device will be even removed, but in this case 0xff will
+be read, since no device was selected.
+
+
+W1 device families
+------------------------------------------------------------------
+Slave devices are handled by a driver written for a family of w1 devices.
+
+A family driver populates a struct w1_family_ops (see w1_family.h) and
+registers with the w1 subsystem.
+
+Current family drivers:
+w1_therm - (ds18?20 thermal sensor family driver)
+ provides temperature reading function which is bound to ->rbin() method
+ of the above w1_family_ops structure.
+
+w1_smem - driver for simple 64bit memory cell provides ID reading method.
+
+You can call above methods by reading appropriate sysfs files.
+
+
+What does a w1 master driver need to implement?
+------------------------------------------------------------------
+
+The driver for w1 bus master must provide at minimum two functions.
+
+Emulated devices must provide the ability to set the output signal level
+(write_bit) and sample the signal level (read_bit).
+
+Devices that support the 1-wire natively must provide the ability to write and
+sample a bit (touch_bit) and reset the bus (reset_bus).
+
+Most hardware provides higher-level functions that offload w1 handling.
+See struct w1_bus_master definition in w1.h for details.
+
+
+w1 master sysfs interface
+------------------------------------------------------------------
+<xx-xxxxxxxxxxxxx> - a directory for a found device. The format is family-serial
+bus - (standard) symlink to the w1 bus
+driver - (standard) symlink to the w1 driver
+w1_master_add - Manually register a slave device
+w1_master_attempts - the number of times a search was attempted
+w1_master_max_slave_count
+ - maximum number of slaves to search for at a time
+w1_master_name - the name of the device (w1_bus_masterX)
+w1_master_pullup - 5V strong pullup 0 enabled, 1 disabled
+w1_master_remove - Manually remove a slave device
+w1_master_search - the number of searches left to do, -1=continual (default)
+w1_master_slave_count
+ - the number of slaves found
+w1_master_slaves - the names of the slaves, one per line
+w1_master_timeout - the delay in seconds between searches
+
+If you have a w1 bus that never changes (you don't add or remove devices),
+you can set the module parameter search_count to a small positive number
+for an initially small number of bus searches. Alternatively it could be
+set to zero, then manually add the slave device serial numbers by
+w1_master_add device file. The w1_master_add and w1_master_remove files
+generally only make sense when searching is disabled, as a search will
+redetect manually removed devices that are present and timeout manually
+added devices that aren't on the bus.
+
+
+w1 slave sysfs interface
+------------------------------------------------------------------
+bus - (standard) symlink to the w1 bus
+driver - (standard) symlink to the w1 driver
+name - the device name, usually the same as the directory name
+w1_slave - (optional) a binary file whose meaning depends on the
+ family driver
+rw - (optional) created for slave devices which do not have
+ appropriate family driver. Allows to read/write binary data.
diff --git a/Documentation/w1/w1.netlink b/Documentation/w1/w1.netlink
new file mode 100644
index 000000000..ef2727192
--- /dev/null
+++ b/Documentation/w1/w1.netlink
@@ -0,0 +1,189 @@
+Userspace communication protocol over connector [1].
+
+
+Message types.
+=============
+
+There are three types of messages between w1 core and userspace:
+1. Events. They are generated each time a new master or slave device
+ is found either due to automatic or requested search.
+2. Userspace commands.
+3. Replies to userspace commands.
+
+
+Protocol.
+========
+
+[struct cn_msg] - connector header.
+ Its length field is equal to size of the attached data
+[struct w1_netlink_msg] - w1 netlink header.
+ __u8 type - message type.
+ W1_LIST_MASTERS
+ list current bus masters
+ W1_SLAVE_ADD/W1_SLAVE_REMOVE
+ slave add/remove events
+ W1_MASTER_ADD/W1_MASTER_REMOVE
+ master add/remove events
+ W1_MASTER_CMD
+ userspace command for bus master
+ device (search/alarm search)
+ W1_SLAVE_CMD
+ userspace command for slave device
+ (read/write/touch)
+ __u8 status - error indication from kernel
+ __u16 len - size of data attached to this header data
+ union {
+ __u8 id[8]; - slave unique device id
+ struct w1_mst {
+ __u32 id; - master's id
+ __u32 res; - reserved
+ } mst;
+ } id;
+
+[struct w1_netlink_cmd] - command for given master or slave device.
+ __u8 cmd - command opcode.
+ W1_CMD_READ - read command
+ W1_CMD_WRITE - write command
+ W1_CMD_SEARCH - search command
+ W1_CMD_ALARM_SEARCH - alarm search command
+ W1_CMD_TOUCH - touch command
+ (write and sample data back to userspace)
+ W1_CMD_RESET - send bus reset
+ W1_CMD_SLAVE_ADD - add slave to kernel list
+ W1_CMD_SLAVE_REMOVE - remove slave from kernel list
+ W1_CMD_LIST_SLAVES - get slaves list from kernel
+ __u8 res - reserved
+ __u16 len - length of data for this command
+ For read command data must be allocated like for write command
+ __u8 data[0] - data for this command
+
+
+Each connector message can include one or more w1_netlink_msg with
+zero or more attached w1_netlink_cmd messages.
+
+For event messages there are no w1_netlink_cmd embedded structures,
+only connector header and w1_netlink_msg strucutre with "len" field
+being zero and filled type (one of event types) and id:
+either 8 bytes of slave unique id in host order,
+or master's id, which is assigned to bus master device
+when it is added to w1 core.
+
+Currently replies to userspace commands are only generated for read
+command request. One reply is generated exactly for one w1_netlink_cmd
+read request. Replies are not combined when sent - i.e. typical reply
+messages looks like the following:
+
+[cn_msg][w1_netlink_msg][w1_netlink_cmd]
+cn_msg.len = sizeof(struct w1_netlink_msg) +
+ sizeof(struct w1_netlink_cmd) +
+ cmd->len;
+w1_netlink_msg.len = sizeof(struct w1_netlink_cmd) + cmd->len;
+w1_netlink_cmd.len = cmd->len;
+
+Replies to W1_LIST_MASTERS should send a message back to the userspace
+which will contain list of all registered master ids in the following
+format:
+
+ cn_msg (CN_W1_IDX.CN_W1_VAL as id, len is equal to sizeof(struct
+ w1_netlink_msg) plus number of masters multiplied by 4)
+ w1_netlink_msg (type: W1_LIST_MASTERS, len is equal to
+ number of masters multiplied by 4 (u32 size))
+ id0 ... idN
+
+ Each message is at most 4k in size, so if number of master devices
+ exceeds this, it will be split into several messages.
+
+W1 search and alarm search commands.
+request:
+[cn_msg]
+ [w1_netlink_msg type = W1_MASTER_CMD
+ id is equal to the bus master id to use for searching]
+ [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH]
+
+reply:
+ [cn_msg, ack = 1 and increasing, 0 means the last message,
+ seq is equal to the request seq]
+ [w1_netlink_msg type = W1_MASTER_CMD]
+ [w1_netlink_cmd cmd = W1_CMD_SEARCH or W1_CMD_ALARM_SEARCH
+ len is equal to number of IDs multiplied by 8]
+ [64bit-id0 ... 64bit-idN]
+Length in each header corresponds to the size of the data behind it, so
+w1_netlink_cmd->len = N * 8; where N is number of IDs in this message.
+ Can be zero.
+w1_netlink_msg->len = sizeof(struct w1_netlink_cmd) + N * 8;
+cn_msg->len = sizeof(struct w1_netlink_msg) +
+ sizeof(struct w1_netlink_cmd) +
+ N*8;
+
+W1 reset command.
+[cn_msg]
+ [w1_netlink_msg type = W1_MASTER_CMD
+ id is equal to the bus master id to use for searching]
+ [w1_netlink_cmd cmd = W1_CMD_RESET]
+
+
+Command status replies.
+======================
+
+Each command (either root, master or slave with or without w1_netlink_cmd
+structure) will be 'acked' by the w1 core. Format of the reply is the same
+as request message except that length parameters do not account for data
+requested by the user, i.e. read/write/touch IO requests will not contain
+data, so w1_netlink_cmd.len will be 0, w1_netlink_msg.len will be size
+of the w1_netlink_cmd structure and cn_msg.len will be equal to the sum
+of the sizeof(struct w1_netlink_msg) and sizeof(struct w1_netlink_cmd).
+If reply is generated for master or root command (which do not have
+w1_netlink_cmd attached), reply will contain only cn_msg and w1_netlink_msg
+structures.
+
+w1_netlink_msg.status field will carry positive error value
+(EINVAL for example) or zero in case of success.
+
+All other fields in every structure will mirror the same parameters in the
+request message (except lengths as described above).
+
+Status reply is generated for every w1_netlink_cmd embedded in the
+w1_netlink_msg, if there are no w1_netlink_cmd structures,
+reply will be generated for the w1_netlink_msg.
+
+All w1_netlink_cmd command structures are handled in every w1_netlink_msg,
+even if there were errors, only length mismatch interrupts message processing.
+
+
+Operation steps in w1 core when new command is received.
+=======================================================
+
+When new message (w1_netlink_msg) is received w1 core detects if it is
+master or slave request, according to w1_netlink_msg.type field.
+Then master or slave device is searched for.
+When found, master device (requested or those one on where slave device
+is found) is locked. If slave command is requested, then reset/select
+procedure is started to select given device.
+
+Then all requested in w1_netlink_msg operations are performed one by one.
+If command requires reply (like read command) it is sent on command completion.
+
+When all commands (w1_netlink_cmd) are processed master device is unlocked
+and next w1_netlink_msg header processing started.
+
+
+Connector [1] specific documentation.
+====================================
+
+Each connector message includes two u32 fields as "address".
+w1 uses CN_W1_IDX and CN_W1_VAL defined in include/linux/connector.h header.
+Each message also includes sequence and acknowledge numbers.
+Sequence number for event messages is appropriate bus master sequence number
+increased with each event message sent "through" this master.
+Sequence number for userspace requests is set by userspace application.
+Sequence number for reply is the same as was in request, and
+acknowledge number is set to seq+1.
+
+
+Additional documantion, source code examples.
+============================================
+
+1. Documentation/connector
+2. http://www.ioremap.net/archive/w1
+This archive includes userspace application w1d.c which uses
+read/write/search commands for all master/slave devices found on the bus.
diff --git a/Documentation/watchdog/Makefile b/Documentation/watchdog/Makefile
new file mode 100644
index 000000000..6018f45f2
--- /dev/null
+++ b/Documentation/watchdog/Makefile
@@ -0,0 +1 @@
+subdir-y := src
diff --git a/Documentation/watchdog/convert_drivers_to_kernel_api.txt b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
new file mode 100644
index 000000000..271b8850d
--- /dev/null
+++ b/Documentation/watchdog/convert_drivers_to_kernel_api.txt
@@ -0,0 +1,218 @@
+Converting old watchdog drivers to the watchdog framework
+by Wolfram Sang <w.sang@pengutronix.de>
+=========================================================
+
+Before the watchdog framework came into the kernel, every driver had to
+implement the API on its own. Now, as the framework factored out the common
+components, those drivers can be lightened making it a user of the framework.
+This document shall guide you for this task. The necessary steps are described
+as well as things to look out for.
+
+
+Remove the file_operations struct
+---------------------------------
+
+Old drivers define their own file_operations for actions like open(), write(),
+etc... These are now handled by the framework and just call the driver when
+needed. So, in general, the 'file_operations' struct and assorted functions can
+go. Only very few driver-specific details have to be moved to other functions.
+Here is a overview of the functions and probably needed actions:
+
+- open: Everything dealing with resource management (file-open checks, magic
+ close preparations) can simply go. Device specific stuff needs to go to the
+ driver specific start-function. Note that for some drivers, the start-function
+ also serves as the ping-function. If that is the case and you need start/stop
+ to be balanced (clocks!), you are better off refactoring a separate start-function.
+
+- close: Same hints as for open apply.
+
+- write: Can simply go, all defined behaviour is taken care of by the framework,
+ i.e. ping on write and magic char ('V') handling.
+
+- ioctl: While the driver is allowed to have extensions to the IOCTL interface,
+ the most common ones are handled by the framework, supported by some assistance
+ from the driver:
+
+ WDIOC_GETSUPPORT:
+ Returns the mandatory watchdog_info struct from the driver
+
+ WDIOC_GETSTATUS:
+ Needs the status-callback defined, otherwise returns 0
+
+ WDIOC_GETBOOTSTATUS:
+ Needs the bootstatus member properly set. Make sure it is 0 if you
+ don't have further support!
+
+ WDIOC_SETOPTIONS:
+ No preparations needed
+
+ WDIOC_KEEPALIVE:
+ If wanted, options in watchdog_info need to have WDIOF_KEEPALIVEPING
+ set
+
+ WDIOC_SETTIMEOUT:
+ Options in watchdog_info need to have WDIOF_SETTIMEOUT set
+ and a set_timeout-callback has to be defined. The core will also
+ do limit-checking, if min_timeout and max_timeout in the watchdog
+ device are set. All is optional.
+
+ WDIOC_GETTIMEOUT:
+ No preparations needed
+
+ WDIOC_GETTIMELEFT:
+ It needs get_timeleft() callback to be defined. Otherwise it
+ will return EOPNOTSUPP
+
+ Other IOCTLs can be served using the ioctl-callback. Note that this is mainly
+ intended for porting old drivers; new drivers should not invent private IOCTLs.
+ Private IOCTLs are processed first. When the callback returns with
+ -ENOIOCTLCMD, the IOCTLs of the framework will be tried, too. Any other error
+ is directly given to the user.
+
+Example conversion:
+
+-static const struct file_operations s3c2410wdt_fops = {
+- .owner = THIS_MODULE,
+- .llseek = no_llseek,
+- .write = s3c2410wdt_write,
+- .unlocked_ioctl = s3c2410wdt_ioctl,
+- .open = s3c2410wdt_open,
+- .release = s3c2410wdt_release,
+-};
+
+Check the functions for device-specific stuff and keep it for later
+refactoring. The rest can go.
+
+
+Remove the miscdevice
+---------------------
+
+Since the file_operations are gone now, you can also remove the 'struct
+miscdevice'. The framework will create it on watchdog_dev_register() called by
+watchdog_register_device().
+
+-static struct miscdevice s3c2410wdt_miscdev = {
+- .minor = WATCHDOG_MINOR,
+- .name = "watchdog",
+- .fops = &s3c2410wdt_fops,
+-};
+
+
+Remove obsolete includes and defines
+------------------------------------
+
+Because of the simplifications, a few defines are probably unused now. Remove
+them. Includes can be removed, too. For example:
+
+- #include <linux/fs.h>
+- #include <linux/miscdevice.h> (if MODULE_ALIAS_MISCDEV is not used)
+- #include <linux/uaccess.h> (if no custom IOCTLs are used)
+
+
+Add the watchdog operations
+---------------------------
+
+All possible callbacks are defined in 'struct watchdog_ops'. You can find it
+explained in 'watchdog-kernel-api.txt' in this directory. start(), stop() and
+owner must be set, the rest are optional. You will easily find corresponding
+functions in the old driver. Note that you will now get a pointer to the
+watchdog_device as a parameter to these functions, so you probably have to
+change the function header. Other changes are most likely not needed, because
+here simply happens the direct hardware access. If you have device-specific
+code left from the above steps, it should be refactored into these callbacks.
+
+Here is a simple example:
+
++static struct watchdog_ops s3c2410wdt_ops = {
++ .owner = THIS_MODULE,
++ .start = s3c2410wdt_start,
++ .stop = s3c2410wdt_stop,
++ .ping = s3c2410wdt_keepalive,
++ .set_timeout = s3c2410wdt_set_heartbeat,
++};
+
+A typical function-header change looks like:
+
+-static void s3c2410wdt_keepalive(void)
++static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
+ {
+...
++
++ return 0;
+ }
+
+...
+
+- s3c2410wdt_keepalive();
++ s3c2410wdt_keepalive(&s3c2410_wdd);
+
+
+Add the watchdog device
+-----------------------
+
+Now we need to create a 'struct watchdog_device' and populate it with the
+necessary information for the framework. The struct is also explained in detail
+in 'watchdog-kernel-api.txt' in this directory. We pass it the mandatory
+watchdog_info struct and the newly created watchdog_ops. Often, old drivers
+have their own record-keeping for things like bootstatus and timeout using
+static variables. Those have to be converted to use the members in
+watchdog_device. Note that the timeout values are unsigned int. Some drivers
+use signed int, so this has to be converted, too.
+
+Here is a simple example for a watchdog device:
+
++static struct watchdog_device s3c2410_wdd = {
++ .info = &s3c2410_wdt_ident,
++ .ops = &s3c2410wdt_ops,
++};
+
+
+Handle the 'nowayout' feature
+-----------------------------
+
+A few drivers use nowayout statically, i.e. there is no module parameter for it
+and only CONFIG_WATCHDOG_NOWAYOUT determines if the feature is going to be
+used. This needs to be converted by initializing the status variable of the
+watchdog_device like this:
+
+ .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+
+Most drivers, however, also allow runtime configuration of nowayout, usually
+by adding a module parameter. The conversion for this would be something like:
+
+ watchdog_set_nowayout(&s3c2410_wdd, nowayout);
+
+The module parameter itself needs to stay, everything else related to nowayout
+can go, though. This will likely be some code in open(), close() or write().
+
+
+Register the watchdog device
+----------------------------
+
+Replace misc_register(&miscdev) with watchdog_register_device(&watchdog_dev).
+Make sure the return value gets checked and the error message, if present,
+still fits. Also convert the unregister case.
+
+- ret = misc_register(&s3c2410wdt_miscdev);
++ ret = watchdog_register_device(&s3c2410_wdd);
+
+...
+
+- misc_deregister(&s3c2410wdt_miscdev);
++ watchdog_unregister_device(&s3c2410_wdd);
+
+
+Update the Kconfig-entry
+------------------------
+
+The entry for the driver now needs to select WATCHDOG_CORE:
+
++ select WATCHDOG_CORE
+
+
+Create a patch and send it to upstream
+--------------------------------------
+
+Make sure you understood Documentation/SubmittingPatches and send your patch to
+linux-watchdog@vger.kernel.org. We are looking forward to it :)
+
diff --git a/Documentation/watchdog/hpwdt.txt b/Documentation/watchdog/hpwdt.txt
new file mode 100644
index 000000000..948807890
--- /dev/null
+++ b/Documentation/watchdog/hpwdt.txt
@@ -0,0 +1,95 @@
+Last reviewed: 06/02/2009
+
+ HP iLO2 NMI Watchdog Driver
+ NMI sourcing for iLO2 based ProLiant Servers
+ Documentation and Driver by
+ Thomas Mingarelli <thomas.mingarelli@hp.com>
+
+ The HP iLO2 NMI Watchdog driver is a kernel module that provides basic
+ watchdog functionality and the added benefit of NMI sourcing. Both the
+ watchdog functionality and the NMI sourcing capability need to be enabled
+ by the user. Remember that the two modes are not dependent on one another.
+ A user can have the NMI sourcing without the watchdog timer and vice-versa.
+
+ Watchdog functionality is enabled like any other common watchdog driver. That
+ is, an application needs to be started that kicks off the watchdog timer. A
+ basic application exists in the Documentation/watchdog/src directory called
+ watchdog-test.c. Simply compile the C file and kick it off. If the system
+ gets into a bad state and hangs, the HP ProLiant iLO 2 timer register will
+ not be updated in a timely fashion and a hardware system reset (also known as
+ an Automatic Server Recovery (ASR)) event will occur.
+
+ The hpwdt driver also has four (4) module parameters. They are the following:
+
+ soft_margin - allows the user to set the watchdog timer value
+ allow_kdump - allows the user to save off a kernel dump image after an NMI
+ nowayout - basic watchdog parameter that does not allow the timer to
+ be restarted or an impending ASR to be escaped.
+ priority - determines whether or not the hpwdt driver is first on the
+ die_notify list to handle NMIs or last. The default value
+ for this module parameter is 0 or LAST. If the user wants to
+ enable NMI sourcing then reload the hpwdt driver with
+ priority=1 (and boot with nmi_watchdog=0).
+
+ NOTE: More information about watchdog drivers in general, including the ioctl
+ interface to /dev/watchdog can be found in
+ Documentation/watchdog/watchdog-api.txt and Documentation/IPMI.txt.
+
+ The priority parameter was introduced due to other kernel software that relied
+ on handling NMIs (like oprofile). Keeping hpwdt's priority at 0 (or LAST)
+ enables the users of NMIs for non critical events to be work as expected.
+
+ The NMI sourcing capability is disabled by default due to the inability to
+ distinguish between "NMI Watchdog Ticks" and "HW generated NMI events" in the
+ Linux kernel. What this means is that the hpwdt nmi handler code is called
+ each time the NMI signal fires off. This could amount to several thousands of
+ NMIs in a matter of seconds. If a user sees the Linux kernel's "dazed and
+ confused" message in the logs or if the system gets into a hung state, then
+ the hpwdt driver can be reloaded with the "priority" module parameter set
+ (priority=1).
+
+ 1. If the kernel has not been booted with nmi_watchdog turned off then
+ edit /boot/grub/menu.lst and place the nmi_watchdog=0 at the end of the
+ currently booting kernel line.
+ 2. reboot the sever
+ 3. Once the system comes up perform a rmmod hpwdt
+ 4. insmod /lib/modules/`uname -r`/kernel/drivers/char/watchdog/hpwdt.ko priority=1
+
+ Now, the hpwdt can successfully receive and source the NMI and provide a log
+ message that details the reason for the NMI (as determined by the HP BIOS).
+
+ Below is a list of NMIs the HP BIOS understands along with the associated
+ code (reason):
+
+ No source found 00h
+
+ Uncorrectable Memory Error 01h
+
+ ASR NMI 1Bh
+
+ PCI Parity Error 20h
+
+ NMI Button Press 27h
+
+ SB_BUS_NMI 28h
+
+ ILO Doorbell NMI 29h
+
+ ILO IOP NMI 2Ah
+
+ ILO Watchdog NMI 2Bh
+
+ Proc Throt NMI 2Ch
+
+ Front Side Bus NMI 2Dh
+
+ PCI Express Error 2Fh
+
+ DMA controller NMI 30h
+
+ Hypertransport/CSI Error 31h
+
+
+
+ -- Tom Mingarelli
+ (thomas.mingarelli@hp.com)
diff --git a/Documentation/watchdog/pcwd-watchdog.txt b/Documentation/watchdog/pcwd-watchdog.txt
new file mode 100644
index 000000000..4f6805239
--- /dev/null
+++ b/Documentation/watchdog/pcwd-watchdog.txt
@@ -0,0 +1,66 @@
+Last reviewed: 10/05/2007
+
+ Berkshire Products PC Watchdog Card
+ Support for ISA Cards Revision A and C
+ Documentation and Driver by Ken Hollis <kenji@bitgate.com>
+
+ The PC Watchdog is a card that offers the same type of functionality that
+ the WDT card does, only it doesn't require an IRQ to run. Furthermore,
+ the Revision C card allows you to monitor any IO Port to automatically
+ trigger the card into being reset. This way you can make the card
+ monitor hard drive status, or anything else you need.
+
+ The Watchdog Driver has one basic role: to talk to the card and send
+ signals to it so it doesn't reset your computer ... at least during
+ normal operation.
+
+ The Watchdog Driver will automatically find your watchdog card, and will
+ attach a running driver for use with that card. After the watchdog
+ drivers have initialized, you can then talk to the card using a PC
+ Watchdog program.
+
+ I suggest putting a "watchdog -d" before the beginning of an fsck, and
+ a "watchdog -e -t 1" immediately after the end of an fsck. (Remember
+ to run the program with an "&" to run it in the background!)
+
+ If you want to write a program to be compatible with the PC Watchdog
+ driver, simply use of modify the watchdog test program:
+ Documentation/watchdog/src/watchdog-test.c
+
+
+ Other IOCTL functions include:
+
+ WDIOC_GETSUPPORT
+ This returns the support of the card itself. This
+ returns in structure "PCWDS" which returns:
+ options = WDIOS_TEMPPANIC
+ (This card supports temperature)
+ firmware_version = xxxx
+ (Firmware version of the card)
+
+ WDIOC_GETSTATUS
+ This returns the status of the card, with the bits of
+ WDIOF_* bitwise-anded into the value. (The comments
+ are in linux/pcwd.h)
+
+ WDIOC_GETBOOTSTATUS
+ This returns the status of the card that was reported
+ at bootup.
+
+ WDIOC_GETTEMP
+ This returns the temperature of the card. (You can also
+ read /dev/watchdog, which gives a temperature update
+ every second.)
+
+ WDIOC_SETOPTIONS
+ This lets you set the options of the card. You can either
+ enable or disable the card this way.
+
+ WDIOC_KEEPALIVE
+ This pings the card to tell it not to reset your computer.
+
+ And that's all she wrote!
+
+ -- Ken Hollis
+ (kenji@bitgate.com)
+
diff --git a/Documentation/watchdog/src/.gitignore b/Documentation/watchdog/src/.gitignore
new file mode 100644
index 000000000..ac90997db
--- /dev/null
+++ b/Documentation/watchdog/src/.gitignore
@@ -0,0 +1,2 @@
+watchdog-simple
+watchdog-test
diff --git a/Documentation/watchdog/src/Makefile b/Documentation/watchdog/src/Makefile
new file mode 100644
index 000000000..4a892c304
--- /dev/null
+++ b/Documentation/watchdog/src/Makefile
@@ -0,0 +1,5 @@
+# List of programs to build
+hostprogs-y := watchdog-simple watchdog-test
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/watchdog/src/watchdog-simple.c b/Documentation/watchdog/src/watchdog-simple.c
new file mode 100644
index 000000000..ba45803a2
--- /dev/null
+++ b/Documentation/watchdog/src/watchdog-simple.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int main(void)
+{
+ int fd = open("/dev/watchdog", O_WRONLY);
+ int ret = 0;
+ if (fd == -1) {
+ perror("watchdog");
+ exit(EXIT_FAILURE);
+ }
+ while (1) {
+ ret = write(fd, "\0", 1);
+ if (ret != 1) {
+ ret = -1;
+ break;
+ }
+ sleep(10);
+ }
+ close(fd);
+ return ret;
+}
diff --git a/Documentation/watchdog/src/watchdog-test.c b/Documentation/watchdog/src/watchdog-test.c
new file mode 100644
index 000000000..3da822967
--- /dev/null
+++ b/Documentation/watchdog/src/watchdog-test.c
@@ -0,0 +1,86 @@
+/*
+ * Watchdog Driver Test Program
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+int fd;
+
+/*
+ * This function simply sends an IOCTL to the driver, which in turn ticks
+ * the PC Watchdog card to reset its internal timer so it doesn't trigger
+ * a computer reset.
+ */
+static void keep_alive(void)
+{
+ int dummy;
+
+ ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+}
+
+/*
+ * The main program. Run the program with "-d" to disable the card,
+ * or "-e" to enable the card.
+ */
+
+static void term(int sig)
+{
+ close(fd);
+ fprintf(stderr, "Stopping watchdog ticks...\n");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ int flags;
+
+ fd = open("/dev/watchdog", O_WRONLY);
+
+ if (fd == -1) {
+ fprintf(stderr, "Watchdog device not enabled.\n");
+ fflush(stderr);
+ exit(-1);
+ }
+
+ if (argc > 1) {
+ if (!strncasecmp(argv[1], "-d", 2)) {
+ flags = WDIOS_DISABLECARD;
+ ioctl(fd, WDIOC_SETOPTIONS, &flags);
+ fprintf(stderr, "Watchdog card disabled.\n");
+ fflush(stderr);
+ goto end;
+ } else if (!strncasecmp(argv[1], "-e", 2)) {
+ flags = WDIOS_ENABLECARD;
+ ioctl(fd, WDIOC_SETOPTIONS, &flags);
+ fprintf(stderr, "Watchdog card enabled.\n");
+ fflush(stderr);
+ goto end;
+ } else {
+ fprintf(stderr, "-d to disable, -e to enable.\n");
+ fprintf(stderr, "run by itself to tick the card.\n");
+ fflush(stderr);
+ goto end;
+ }
+ } else {
+ fprintf(stderr, "Watchdog Ticking Away!\n");
+ fflush(stderr);
+ }
+
+ signal(SIGINT, term);
+
+ while(1) {
+ keep_alive();
+ sleep(1);
+ }
+end:
+ close(fd);
+ return 0;
+}
diff --git a/Documentation/watchdog/watchdog-api.txt b/Documentation/watchdog/watchdog-api.txt
new file mode 100644
index 000000000..b3a701f48
--- /dev/null
+++ b/Documentation/watchdog/watchdog-api.txt
@@ -0,0 +1,237 @@
+Last reviewed: 10/05/2007
+
+
+The Linux Watchdog driver API.
+
+Copyright 2002 Christer Weingel <wingel@nano-system.com>
+
+Some parts of this document are copied verbatim from the sbc60xxwdt
+driver which is (c) Copyright 2000 Jakob Oestergaard <jakob@ostenfeld.dk>
+
+This document describes the state of the Linux 2.4.18 kernel.
+
+Introduction:
+
+A Watchdog Timer (WDT) is a hardware circuit that can reset the
+computer system in case of a software fault. You probably knew that
+already.
+
+Usually a userspace daemon will notify the kernel watchdog driver via the
+/dev/watchdog special device file that userspace is still alive, at
+regular intervals. When such a notification occurs, the driver will
+usually tell the hardware watchdog that everything is in order, and
+that the watchdog should wait for yet another little while to reset
+the system. If userspace fails (RAM error, kernel bug, whatever), the
+notifications cease to occur, and the hardware watchdog will reset the
+system (causing a reboot) after the timeout occurs.
+
+The Linux watchdog API is a rather ad-hoc construction and different
+drivers implement different, and sometimes incompatible, parts of it.
+This file is an attempt to document the existing usage and allow
+future driver writers to use it as a reference.
+
+The simplest API:
+
+All drivers support the basic mode of operation, where the watchdog
+activates as soon as /dev/watchdog is opened and will reboot unless
+the watchdog is pinged within a certain time, this time is called the
+timeout or margin. The simplest way to ping the watchdog is to write
+some data to the device. So a very simple watchdog daemon would look
+like this source file: see Documentation/watchdog/src/watchdog-simple.c
+
+A more advanced driver could for example check that a HTTP server is
+still responding before doing the write call to ping the watchdog.
+
+When the device is closed, the watchdog is disabled, unless the "Magic
+Close" feature is supported (see below). This is not always such a
+good idea, since if there is a bug in the watchdog daemon and it
+crashes the system will not reboot. Because of this, some of the
+drivers support the configuration option "Disable watchdog shutdown on
+close", CONFIG_WATCHDOG_NOWAYOUT. If it is set to Y when compiling
+the kernel, there is no way of disabling the watchdog once it has been
+started. So, if the watchdog daemon crashes, the system will reboot
+after the timeout has passed. Watchdog devices also usually support
+the nowayout module parameter so that this option can be controlled at
+runtime.
+
+Magic Close feature:
+
+If a driver supports "Magic Close", the driver will not disable the
+watchdog unless a specific magic character 'V' has been sent to
+/dev/watchdog just before closing the file. If the userspace daemon
+closes the file without sending this special character, the driver
+will assume that the daemon (and userspace in general) died, and will
+stop pinging the watchdog without disabling it first. This will then
+cause a reboot if the watchdog is not re-opened in sufficient time.
+
+The ioctl API:
+
+All conforming drivers also support an ioctl API.
+
+Pinging the watchdog using an ioctl:
+
+All drivers that have an ioctl interface support at least one ioctl,
+KEEPALIVE. This ioctl does exactly the same thing as a write to the
+watchdog device, so the main loop in the above program could be
+replaced with:
+
+ while (1) {
+ ioctl(fd, WDIOC_KEEPALIVE, 0);
+ sleep(10);
+ }
+
+the argument to the ioctl is ignored.
+
+Setting and getting the timeout:
+
+For some drivers it is possible to modify the watchdog timeout on the
+fly with the SETTIMEOUT ioctl, those drivers have the WDIOF_SETTIMEOUT
+flag set in their option field. The argument is an integer
+representing the timeout in seconds. The driver returns the real
+timeout used in the same variable, and this timeout might differ from
+the requested one due to limitation of the hardware.
+
+ int timeout = 45;
+ ioctl(fd, WDIOC_SETTIMEOUT, &timeout);
+ printf("The timeout was set to %d seconds\n", timeout);
+
+This example might actually print "The timeout was set to 60 seconds"
+if the device has a granularity of minutes for its timeout.
+
+Starting with the Linux 2.4.18 kernel, it is possible to query the
+current timeout using the GETTIMEOUT ioctl.
+
+ ioctl(fd, WDIOC_GETTIMEOUT, &timeout);
+ printf("The timeout was is %d seconds\n", timeout);
+
+Pretimeouts:
+
+Some watchdog timers can be set to have a trigger go off before the
+actual time they will reset the system. This can be done with an NMI,
+interrupt, or other mechanism. This allows Linux to record useful
+information (like panic information and kernel coredumps) before it
+resets.
+
+ pretimeout = 10;
+ ioctl(fd, WDIOC_SETPRETIMEOUT, &pretimeout);
+
+Note that the pretimeout is the number of seconds before the time
+when the timeout will go off. It is not the number of seconds until
+the pretimeout. So, for instance, if you set the timeout to 60 seconds
+and the pretimeout to 10 seconds, the pretimeout will go off in 50
+seconds. Setting a pretimeout to zero disables it.
+
+There is also a get function for getting the pretimeout:
+
+ ioctl(fd, WDIOC_GETPRETIMEOUT, &timeout);
+ printf("The pretimeout was is %d seconds\n", timeout);
+
+Not all watchdog drivers will support a pretimeout.
+
+Get the number of seconds before reboot:
+
+Some watchdog drivers have the ability to report the remaining time
+before the system will reboot. The WDIOC_GETTIMELEFT is the ioctl
+that returns the number of seconds before reboot.
+
+ ioctl(fd, WDIOC_GETTIMELEFT, &timeleft);
+ printf("The timeout was is %d seconds\n", timeleft);
+
+Environmental monitoring:
+
+All watchdog drivers are required return more information about the system,
+some do temperature, fan and power level monitoring, some can tell you
+the reason for the last reboot of the system. The GETSUPPORT ioctl is
+available to ask what the device can do:
+
+ struct watchdog_info ident;
+ ioctl(fd, WDIOC_GETSUPPORT, &ident);
+
+the fields returned in the ident struct are:
+
+ identity a string identifying the watchdog driver
+ firmware_version the firmware version of the card if available
+ options a flags describing what the device supports
+
+the options field can have the following bits set, and describes what
+kind of information that the GET_STATUS and GET_BOOT_STATUS ioctls can
+return. [FIXME -- Is this correct?]
+
+ WDIOF_OVERHEAT Reset due to CPU overheat
+
+The machine was last rebooted by the watchdog because the thermal limit was
+exceeded
+
+ WDIOF_FANFAULT Fan failed
+
+A system fan monitored by the watchdog card has failed
+
+ WDIOF_EXTERN1 External relay 1
+
+External monitoring relay/source 1 was triggered. Controllers intended for
+real world applications include external monitoring pins that will trigger
+a reset.
+
+ WDIOF_EXTERN2 External relay 2
+
+External monitoring relay/source 2 was triggered
+
+ WDIOF_POWERUNDER Power bad/power fault
+
+The machine is showing an undervoltage status
+
+ WDIOF_CARDRESET Card previously reset the CPU
+
+The last reboot was caused by the watchdog card
+
+ WDIOF_POWEROVER Power over voltage
+
+The machine is showing an overvoltage status. Note that if one level is
+under and one over both bits will be set - this may seem odd but makes
+sense.
+
+ WDIOF_KEEPALIVEPING Keep alive ping reply
+
+The watchdog saw a keepalive ping since it was last queried.
+
+ WDIOF_SETTIMEOUT Can set/get the timeout
+
+The watchdog can do pretimeouts.
+
+ WDIOF_PRETIMEOUT Pretimeout (in seconds), get/set
+
+
+For those drivers that return any bits set in the option field, the
+GETSTATUS and GETBOOTSTATUS ioctls can be used to ask for the current
+status, and the status at the last reboot, respectively.
+
+ int flags;
+ ioctl(fd, WDIOC_GETSTATUS, &flags);
+
+ or
+
+ ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+
+Note that not all devices support these two calls, and some only
+support the GETBOOTSTATUS call.
+
+Some drivers can measure the temperature using the GETTEMP ioctl. The
+returned value is the temperature in degrees fahrenheit.
+
+ int temperature;
+ ioctl(fd, WDIOC_GETTEMP, &temperature);
+
+Finally the SETOPTIONS ioctl can be used to control some aspects of
+the cards operation.
+
+ int options = 0;
+ ioctl(fd, WDIOC_SETOPTIONS, &options);
+
+The following options are available:
+
+ WDIOS_DISABLECARD Turn off the watchdog timer
+ WDIOS_ENABLECARD Turn on the watchdog timer
+ WDIOS_TEMPPANIC Kernel panic on temperature trip
+
+[FIXME -- better explanations]
+
diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt
new file mode 100644
index 000000000..a0438f395
--- /dev/null
+++ b/Documentation/watchdog/watchdog-kernel-api.txt
@@ -0,0 +1,226 @@
+The Linux WatchDog Timer Driver Core kernel API.
+===============================================
+Last reviewed: 12-Feb-2013
+
+Wim Van Sebroeck <wim@iguana.be>
+
+Introduction
+------------
+This document does not describe what a WatchDog Timer (WDT) Driver or Device is.
+It also does not describe the API which can be used by user space to communicate
+with a WatchDog Timer. If you want to know this then please read the following
+file: Documentation/watchdog/watchdog-api.txt .
+
+So what does this document describe? It describes the API that can be used by
+WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core
+Framework. This framework provides all interfacing towards user space so that
+the same code does not have to be reproduced each time. This also means that
+a watchdog timer driver then only needs to provide the different routines
+(operations) that control the watchdog timer (WDT).
+
+The API
+-------
+Each watchdog timer driver that wants to use the WatchDog Timer Driver Core
+must #include <linux/watchdog.h> (you would have to do this anyway when
+writing a watchdog device driver). This include file contains following
+register/unregister routines:
+
+extern int watchdog_register_device(struct watchdog_device *);
+extern void watchdog_unregister_device(struct watchdog_device *);
+
+The watchdog_register_device routine registers a watchdog timer device.
+The parameter of this routine is a pointer to a watchdog_device structure.
+This routine returns zero on success and a negative errno code for failure.
+
+The watchdog_unregister_device routine deregisters a registered watchdog timer
+device. The parameter of this routine is the pointer to the registered
+watchdog_device structure.
+
+The watchdog device structure looks like this:
+
+struct watchdog_device {
+ int id;
+ struct cdev cdev;
+ struct device *dev;
+ struct device *parent;
+ const struct watchdog_info *info;
+ const struct watchdog_ops *ops;
+ unsigned int bootstatus;
+ unsigned int timeout;
+ unsigned int min_timeout;
+ unsigned int max_timeout;
+ void *driver_data;
+ struct mutex lock;
+ unsigned long status;
+};
+
+It contains following fields:
+* id: set by watchdog_register_device, id 0 is special. It has both a
+ /dev/watchdog0 cdev (dynamic major, minor 0) as well as the old
+ /dev/watchdog miscdev. The id is set automatically when calling
+ watchdog_register_device.
+* cdev: cdev for the dynamic /dev/watchdog<id> device nodes. This
+ field is also populated by watchdog_register_device.
+* dev: device under the watchdog class (created by watchdog_register_device).
+* parent: set this to the parent device (or NULL) before calling
+ watchdog_register_device.
+* info: a pointer to a watchdog_info structure. This structure gives some
+ additional information about the watchdog timer itself. (Like it's unique name)
+* ops: a pointer to the list of watchdog operations that the watchdog supports.
+* timeout: the watchdog timer's timeout value (in seconds).
+* min_timeout: the watchdog timer's minimum timeout value (in seconds).
+* max_timeout: the watchdog timer's maximum timeout value (in seconds).
+* bootstatus: status of the device after booting (reported with watchdog
+ WDIOF_* status bits).
+* driver_data: a pointer to the drivers private data of a watchdog device.
+ This data should only be accessed via the watchdog_set_drvdata and
+ watchdog_get_drvdata routines.
+* lock: Mutex for WatchDog Timer Driver Core internal use only.
+* status: this field contains a number of status bits that give extra
+ information about the status of the device (Like: is the watchdog timer
+ running/active, is the nowayout bit set, is the device opened via
+ the /dev/watchdog interface or not, ...).
+
+The list of watchdog operations is defined as:
+
+struct watchdog_ops {
+ struct module *owner;
+ /* mandatory operations */
+ int (*start)(struct watchdog_device *);
+ int (*stop)(struct watchdog_device *);
+ /* optional operations */
+ int (*ping)(struct watchdog_device *);
+ unsigned int (*status)(struct watchdog_device *);
+ int (*set_timeout)(struct watchdog_device *, unsigned int);
+ unsigned int (*get_timeleft)(struct watchdog_device *);
+ void (*ref)(struct watchdog_device *);
+ void (*unref)(struct watchdog_device *);
+ long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long);
+};
+
+It is important that you first define the module owner of the watchdog timer
+driver's operations. This module owner will be used to lock the module when
+the watchdog is active. (This to avoid a system crash when you unload the
+module and /dev/watchdog is still open).
+
+If the watchdog_device struct is dynamically allocated, just locking the module
+is not enough and a driver also needs to define the ref and unref operations to
+ensure the structure holding the watchdog_device does not go away.
+
+The simplest (and usually sufficient) implementation of this is to:
+1) Add a kref struct to the same structure which is holding the watchdog_device
+2) Define a release callback for the kref which frees the struct holding both
+3) Call kref_init on this kref *before* calling watchdog_register_device()
+4) Define a ref operation calling kref_get on this kref
+5) Define a unref operation calling kref_put on this kref
+6) When it is time to cleanup:
+ * Do not kfree() the struct holding both, the last kref_put will do this!
+ * *After* calling watchdog_unregister_device() call kref_put on the kref
+
+Some operations are mandatory and some are optional. The mandatory operations
+are:
+* start: this is a pointer to the routine that starts the watchdog timer
+ device.
+ The routine needs a pointer to the watchdog timer device structure as a
+ parameter. It returns zero on success or a negative errno code for failure.
+* stop: with this routine the watchdog timer device is being stopped.
+ The routine needs a pointer to the watchdog timer device structure as a
+ parameter. It returns zero on success or a negative errno code for failure.
+ Some watchdog timer hardware can only be started and not be stopped. The
+ driver supporting this hardware needs to make sure that a start and stop
+ routine is being provided. This can be done by using a timer in the driver
+ that regularly sends a keepalive ping to the watchdog timer hardware.
+
+Not all watchdog timer hardware supports the same functionality. That's why
+all other routines/operations are optional. They only need to be provided if
+they are supported. These optional routines/operations are:
+* ping: this is the routine that sends a keepalive ping to the watchdog timer
+ hardware.
+ The routine needs a pointer to the watchdog timer device structure as a
+ parameter. It returns zero on success or a negative errno code for failure.
+ Most hardware that does not support this as a separate function uses the
+ start function to restart the watchdog timer hardware. And that's also what
+ the watchdog timer driver core does: to send a keepalive ping to the watchdog
+ timer hardware it will either use the ping operation (when available) or the
+ start operation (when the ping operation is not available).
+ (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the
+ WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's
+ info structure).
+* status: this routine checks the status of the watchdog timer device. The
+ status of the device is reported with watchdog WDIOF_* status flags/bits.
+* set_timeout: this routine checks and changes the timeout of the watchdog
+ timer device. It returns 0 on success, -EINVAL for "parameter out of range"
+ and -EIO for "could not write value to the watchdog". On success this
+ routine should set the timeout value of the watchdog_device to the
+ achieved timeout value (which may be different from the requested one
+ because the watchdog does not necessarily has a 1 second resolution).
+ (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the
+ watchdog's info structure).
+* get_timeleft: this routines returns the time that's left before a reset.
+* ref: the operation that calls kref_get on the kref of a dynamically
+ allocated watchdog_device struct.
+* unref: the operation that calls kref_put on the kref of a dynamically
+ allocated watchdog_device struct.
+* ioctl: if this routine is present then it will be called first before we do
+ our own internal ioctl call handling. This routine should return -ENOIOCTLCMD
+ if a command is not supported. The parameters that are passed to the ioctl
+ call are: watchdog_device, cmd and arg.
+
+The status bits should (preferably) be set with the set_bit and clear_bit alike
+bit-operations. The status bits that are defined are:
+* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device
+ is active or not. When the watchdog is active after booting, then you should
+ set this status bit (Note: when you register the watchdog timer device with
+ this bit set, then opening /dev/watchdog will skip the start operation)
+* WDOG_DEV_OPEN: this status bit shows whether or not the watchdog device
+ was opened via /dev/watchdog.
+ (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_ALLOW_RELEASE: this bit stores whether or not the magic close character
+ has been sent (so that we can support the magic close feature).
+ (This bit should only be used by the WatchDog Timer Driver Core).
+* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog.
+ If this bit is set then the watchdog timer will not be able to stop.
+* WDOG_UNREGISTERED: this bit gets set by the WatchDog Timer Driver Core
+ after calling watchdog_unregister_device, and then checked before calling
+ any watchdog_ops, so that you can be sure that no operations (other then
+ unref) will get called after unregister, even if userspace still holds a
+ reference to /dev/watchdog
+
+ To set the WDOG_NO_WAY_OUT status bit (before registering your watchdog
+ timer device) you can either:
+ * set it statically in your watchdog_device struct with
+ .status = WATCHDOG_NOWAYOUT_INIT_STATUS,
+ (this will set the value the same as CONFIG_WATCHDOG_NOWAYOUT) or
+ * use the following helper function:
+ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, int nowayout)
+
+Note: The WatchDog Timer Driver Core supports the magic close feature and
+the nowayout feature. To use the magic close feature you must set the
+WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure.
+The nowayout feature will overrule the magic close feature.
+
+To get or set driver specific data the following two helper functions should be
+used:
+
+static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data)
+static inline void *watchdog_get_drvdata(struct watchdog_device *wdd)
+
+The watchdog_set_drvdata function allows you to add driver specific data. The
+arguments of this function are the watchdog device where you want to add the
+driver specific data to and a pointer to the data itself.
+
+The watchdog_get_drvdata function allows you to retrieve driver specific data.
+The argument of this function is the watchdog device where you want to retrieve
+data from. The function returns the pointer to the driver specific data.
+
+To initialize the timeout field, the following function can be used:
+
+extern int watchdog_init_timeout(struct watchdog_device *wdd,
+ unsigned int timeout_parm, struct device *dev);
+
+The watchdog_init_timeout function allows you to initialize the timeout field
+using the module timeout parameter or by retrieving the timeout-sec property from
+the device tree (if the module timeout parameter is invalid). Best practice is
+to set the default timeout value as timeout value in the watchdog_device and
+then use this function to set the user "preferred" timeout value.
+This routine returns zero on success and a negative errno code for failure.
diff --git a/Documentation/watchdog/watchdog-parameters.txt b/Documentation/watchdog/watchdog-parameters.txt
new file mode 100644
index 000000000..692791cc6
--- /dev/null
+++ b/Documentation/watchdog/watchdog-parameters.txt
@@ -0,0 +1,399 @@
+This file provides information on the module parameters of many of
+the Linux watchdog drivers. Watchdog driver parameter specs should
+be listed here unless the driver has its own driver-specific information
+file.
+
+
+See Documentation/kernel-parameters.txt for information on
+providing kernel parameters for builtin drivers versus loadable
+modules.
+
+
+-------------------------------------------------
+acquirewdt:
+wdt_stop: Acquire WDT 'stop' io port (default 0x43)
+wdt_start: Acquire WDT 'start' io port (default 0x443)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+advantechwdt:
+wdt_stop: Advantech WDT 'stop' io port (default 0x443)
+wdt_start: Advantech WDT 'start' io port (default 0x443)
+timeout: Watchdog timeout in seconds. 1<= timeout <=63, default=60.
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+alim1535_wdt:
+timeout: Watchdog timeout in seconds. (0 < timeout < 18000, default=60
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+alim7101_wdt:
+timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30
+use_gpio: Use the gpio watchdog (required by old cobalt boards).
+ default=0/off/no
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+ar7_wdt:
+margin: Watchdog margin in seconds (default=60)
+nowayout: Disable watchdog shutdown on close
+ (default=kernel config parameter)
+-------------------------------------------------
+at32ap700x_wdt:
+timeout: Timeout value. Limited to be 1 or 2 seconds. (default=2)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+at91rm9200_wdt:
+wdt_time: Watchdog time in seconds. (default=5)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+at91sam9_wdt:
+heartbeat: Watchdog heartbeats in seconds. (default = 15)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+bcm47xx_wdt:
+wdt_time: Watchdog time in seconds. (default=30)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+bfin_wdt:
+timeout: Watchdog timeout in seconds. (1<=timeout<=((2^32)/SCLK), default=20)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+coh901327_wdt:
+margin: Watchdog margin in seconds (default 60s)
+-------------------------------------------------
+cpu5wdt:
+port: base address of watchdog card, default is 0x91
+verbose: be verbose, default is 0 (no)
+ticks: count down ticks, default is 10000
+-------------------------------------------------
+cpwd:
+wd0_timeout: Default watchdog0 timeout in 1/10secs
+wd1_timeout: Default watchdog1 timeout in 1/10secs
+wd2_timeout: Default watchdog2 timeout in 1/10secs
+-------------------------------------------------
+da9052wdt:
+timeout: Watchdog timeout in seconds. 2<= timeout <=131, default=2.048s
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+davinci_wdt:
+heartbeat: Watchdog heartbeat period in seconds from 1 to 600, default 60
+-------------------------------------------------
+ep93xx_wdt:
+nowayout: Watchdog cannot be stopped once started
+timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=TBD)
+-------------------------------------------------
+eurotechwdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+io: Eurotech WDT io port (default=0x3f0)
+irq: Eurotech WDT irq (default=10)
+ev: Eurotech WDT event type (default is `int')
+-------------------------------------------------
+gef_wdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+geodewdt:
+timeout: Watchdog timeout in seconds. 1<= timeout <=131, default=60.
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+i6300esb:
+heartbeat: Watchdog heartbeat in seconds. (1<heartbeat<2046, default=30)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+iTCO_wdt:
+heartbeat: Watchdog heartbeat in seconds.
+ (2<heartbeat<39 (TCO v1) or 613 (TCO v2), default=30)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+iTCO_vendor_support:
+vendorsupport: iTCO vendor specific support mode, default=0 (none),
+ 1=SuperMicro Pent3, 2=SuperMicro Pent4+, 911=Broken SMI BIOS
+-------------------------------------------------
+ib700wdt:
+timeout: Watchdog timeout in seconds. 0<= timeout <=30, default=30.
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+ibmasr:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+imx2_wdt:
+timeout: Watchdog timeout in seconds (default 60 s)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+indydog:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+iop_wdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+it8712f_wdt:
+margin: Watchdog margin in seconds (default 60)
+nowayout: Disable watchdog shutdown on close
+ (default=kernel config parameter)
+-------------------------------------------------
+it87_wdt:
+nogameport: Forbid the activation of game port, default=0
+nocir: Forbid the use of CIR (workaround for some buggy setups); set to 1 if
+system resets despite watchdog daemon running, default=0
+exclusive: Watchdog exclusive device open, default=1
+timeout: Watchdog timeout in seconds, default=60
+testmode: Watchdog test mode (1 = no reboot), default=0
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+ixp2000_wdt:
+heartbeat: Watchdog heartbeat in seconds (default 60s)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+ixp4xx_wdt:
+heartbeat: Watchdog heartbeat in seconds (default 60s)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+ks8695_wdt:
+wdt_time: Watchdog time in seconds. (default=5)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+machzwd:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+action: after watchdog resets, generate:
+ 0 = RESET(*) 1 = SMI 2 = NMI 3 = SCI
+-------------------------------------------------
+max63xx_wdt:
+heartbeat: Watchdog heartbeat period in seconds from 1 to 60, default 60
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+nodelay: Force selection of a timeout setting without initial delay
+ (max6373/74 only, default=0)
+-------------------------------------------------
+mixcomwd:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+mpc8xxx_wdt:
+timeout: Watchdog timeout in ticks. (0<timeout<65536, default=65535)
+reset: Watchdog Interrupt/Reset Mode. 0 = interrupt, 1 = reset
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+mv64x60_wdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+nuc900_wdt:
+heartbeat: Watchdog heartbeats in seconds.
+ (default = 15)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+omap_wdt:
+timer_margin: initial watchdog timeout (in seconds)
+-------------------------------------------------
+orion_wdt:
+heartbeat: Initial watchdog heartbeat in seconds
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+pc87413_wdt:
+io: pc87413 WDT I/O port (default: io).
+timeout: Watchdog timeout in minutes (default=timeout).
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+pika_wdt:
+heartbeat: Watchdog heartbeats in seconds. (default = 15)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+pnx4008_wdt:
+heartbeat: Watchdog heartbeat period in seconds from 1 to 60, default 19
+nowayout: Set to 1 to keep watchdog running after device release
+-------------------------------------------------
+pnx833x_wdt:
+timeout: Watchdog timeout in Mhz. (68Mhz clock), default=2040000000 (30 seconds)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+start_enabled: Watchdog is started on module insertion (default=1)
+-------------------------------------------------
+rc32434_wdt:
+timeout: Watchdog timeout value, in seconds (default=20)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+riowd:
+riowd_timeout: Watchdog timeout in minutes (default=1)
+-------------------------------------------------
+s3c2410_wdt:
+tmr_margin: Watchdog tmr_margin in seconds. (default=15)
+tmr_atboot: Watchdog is started at boot time if set to 1, default=0
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+soft_noboot: Watchdog action, set to 1 to ignore reboots, 0 to reboot
+debug: Watchdog debug, set to >1 for debug, (default 0)
+-------------------------------------------------
+sa1100_wdt:
+margin: Watchdog margin in seconds (default 60s)
+-------------------------------------------------
+sb_wdog:
+timeout: Watchdog timeout in microseconds (max/default 8388607 or 8.3ish secs)
+-------------------------------------------------
+sbc60xxwdt:
+wdt_stop: SBC60xx WDT 'stop' io port (default 0x45)
+wdt_start: SBC60xx WDT 'start' io port (default 0x443)
+timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+sbc7240_wdt:
+timeout: Watchdog timeout in seconds. (1<=timeout<=255, default=30)
+nowayout: Disable watchdog when closing device file
+-------------------------------------------------
+sbc8360:
+timeout: Index into timeout table (0-63) (default=27 (60s))
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+sbc_epx_c3:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+sbc_fitpc2_wdt:
+margin: Watchdog margin in seconds (default 60s)
+nowayout: Watchdog cannot be stopped once started
+-------------------------------------------------
+sc1200wdt:
+isapnp: When set to 0 driver ISA PnP support will be disabled (default=1)
+io: io port
+timeout: range is 0-255 minutes, default is 1
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+sc520_wdt:
+timeout: Watchdog timeout in seconds. (1 <= timeout <= 3600, default=30)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+sch311x_wdt:
+force_id: Override the detected device ID
+therm_trip: Should a ThermTrip trigger the reset generator
+timeout: Watchdog timeout in seconds. 1<= timeout <=15300, default=60
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+scx200_wdt:
+margin: Watchdog margin in seconds
+nowayout: Disable watchdog shutdown on close
+-------------------------------------------------
+shwdt:
+clock_division_ratio: Clock division ratio. Valid ranges are from 0x5 (1.31ms)
+ to 0x7 (5.25ms). (default=7)
+heartbeat: Watchdog heartbeat in seconds. (1 <= heartbeat <= 3600, default=30
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+smsc37b787_wdt:
+timeout: range is 1-255 units, default is 60
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+softdog:
+soft_margin: Watchdog soft_margin in seconds.
+ (0 < soft_margin < 65536, default=60)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+soft_noboot: Softdog action, set to 1 to ignore reboots, 0 to reboot
+ (default=0)
+-------------------------------------------------
+stmp3xxx_wdt:
+heartbeat: Watchdog heartbeat period in seconds from 1 to 4194304, default 19
+-------------------------------------------------
+tegra_wdt:
+heartbeat: Watchdog heartbeats in seconds. (default = 120)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+ts72xx_wdt:
+timeout: Watchdog timeout in seconds. (1 <= timeout <= 8, default=8)
+nowayout: Disable watchdog shutdown on close
+-------------------------------------------------
+twl4030_wdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+txx9wdt:
+timeout: Watchdog timeout in seconds. (0<timeout<N, default=60)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+w83627hf_wdt:
+wdt_io: w83627hf/thf WDT io port (default 0x2E)
+timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+w83697hf_wdt:
+wdt_io: w83697hf/hg WDT io port (default 0x2e, 0 = autodetect)
+timeout: Watchdog timeout in seconds. 1<= timeout <=255 (default=60)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+early_disable: Watchdog gets disabled at boot time (default=1)
+-------------------------------------------------
+w83697ug_wdt:
+wdt_io: w83697ug/uf WDT io port (default 0x2e)
+timeout: Watchdog timeout in seconds. 1<= timeout <=255 (default=60)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+w83877f_wdt:
+timeout: Watchdog timeout in seconds. (1<=timeout<=3600, default=30)
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+w83977f_wdt:
+timeout: Watchdog timeout in seconds (15..7635), default=45)
+testmode: Watchdog testmode (1 = no reboot), default=0
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+wafer5823wdt:
+timeout: Watchdog timeout in seconds. 1 <= timeout <= 255, default=60.
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+wdt285:
+soft_margin: Watchdog timeout in seconds (default=60)
+-------------------------------------------------
+wdt977:
+timeout: Watchdog timeout in seconds (60..15300, default=60)
+testmode: Watchdog testmode (1 = no reboot), default=0
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+wm831x_wdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
+wm8350_wdt:
+nowayout: Watchdog cannot be stopped once started
+ (default=kernel config parameter)
+-------------------------------------------------
diff --git a/Documentation/watchdog/wdt.txt b/Documentation/watchdog/wdt.txt
new file mode 100644
index 000000000..061c2e353
--- /dev/null
+++ b/Documentation/watchdog/wdt.txt
@@ -0,0 +1,50 @@
+Last Reviewed: 10/05/2007
+
+ WDT Watchdog Timer Interfaces For The Linux Operating System
+ Alan Cox <alan@lxorguk.ukuu.org.uk>
+
+ ICS WDT501-P
+ ICS WDT501-P (no fan tachometer)
+ ICS WDT500-P
+
+All the interfaces provide /dev/watchdog, which when open must be written
+to within a timeout or the machine will reboot. Each write delays the reboot
+time another timeout. In the case of the software watchdog the ability to
+reboot will depend on the state of the machines and interrupts. The hardware
+boards physically pull the machine down off their own onboard timers and
+will reboot from almost anything.
+
+A second temperature monitoring interface is available on the WDT501P cards.
+This provides /dev/temperature. This is the machine internal temperature in
+degrees Fahrenheit. Each read returns a single byte giving the temperature.
+
+The third interface logs kernel messages on additional alert events.
+
+The ICS ISA-bus wdt card cannot be safely probed for. Instead you need to
+pass IO address and IRQ boot parameters. E.g.:
+ wdt.io=0x240 wdt.irq=11
+
+Other "wdt" driver parameters are:
+ heartbeat Watchdog heartbeat in seconds (default 60)
+ nowayout Watchdog cannot be stopped once started (kernel
+ build parameter)
+ tachometer WDT501-P Fan Tachometer support (0=disable, default=0)
+ type WDT501-P Card type (500 or 501, default=500)
+
+Features
+--------
+ WDT501P WDT500P
+Reboot Timer X X
+External Reboot X X
+I/O Port Monitor o o
+Temperature X o
+Fan Speed X o
+Power Under X o
+Power Over X o
+Overheat X o
+
+The external event interfaces on the WDT boards are not currently supported.
+Minor numbers are however allocated for it.
+
+
+Example Watchdog Driver: see Documentation/watchdog/src/watchdog-simple.c
diff --git a/Documentation/wimax/README.i2400m b/Documentation/wimax/README.i2400m
new file mode 100644
index 000000000..84ba22d35
--- /dev/null
+++ b/Documentation/wimax/README.i2400m
@@ -0,0 +1,237 @@
+
+ Driver for the Intel Wireless Wimax Connection 2400m
+
+ (C) 2008 Intel Corporation < linux-wimax@intel.com >
+
+ This provides a driver for the Intel Wireless WiMAX Connection 2400m
+ and a basic Linux kernel WiMAX stack.
+
+1. Requirements
+
+ * Linux installation with Linux kernel 2.6.22 or newer (if building
+ from a separate tree)
+ * Intel i2400m Echo Peak or Baxter Peak; this includes the Intel
+ Wireless WiMAX/WiFi Link 5x50 series.
+ * build tools:
+ + Linux kernel development package for the target kernel; to
+ build against your currently running kernel, you need to have
+ the kernel development package corresponding to the running
+ image installed (usually if your kernel is named
+ linux-VERSION, the development package is called
+ linux-dev-VERSION or linux-headers-VERSION).
+ + GNU C Compiler, make
+
+2. Compilation and installation
+
+2.1. Compilation of the drivers included in the kernel
+
+ Configure the kernel; to enable the WiMAX drivers select Drivers >
+ Networking Drivers > WiMAX device support. Enable all of them as
+ modules (easier).
+
+ If USB or SDIO are not enabled in the kernel configuration, the options
+ to build the i2400m USB or SDIO drivers will not show. Enable said
+ subsystems and go back to the WiMAX menu to enable the drivers.
+
+ Compile and install your kernel as usual.
+
+2.2. Compilation of the drivers distributed as an standalone module
+
+ To compile
+
+$ cd source/directory
+$ make
+
+ Once built you can load and unload using the provided load.sh script;
+ load.sh will load the modules, load.sh u will unload them.
+
+ To install in the default kernel directories (and enable auto loading
+ when the device is plugged):
+
+$ make install
+$ depmod -a
+
+ If your kernel development files are located in a non standard
+ directory or if you want to build for a kernel that is not the
+ currently running one, set KDIR to the right location:
+
+$ make KDIR=/path/to/kernel/dev/tree
+
+ For more information, please contact linux-wimax@intel.com.
+
+/*(DEBLOBBED)*/
+
+4. Design
+
+ This package contains two major parts: a WiMAX kernel stack and a
+ driver for the Intel i2400m.
+
+ The WiMAX stack is designed to provide for common WiMAX control
+ services to current and future WiMAX devices from any vendor; please
+ see README.wimax for details.
+
+ The i2400m kernel driver is broken up in two main parts: the bus
+ generic driver and the bus-specific drivers. The bus generic driver
+ forms the drivercore and contain no knowledge of the actual method we
+ use to connect to the device. The bus specific drivers are just the
+ glue to connect the bus-generic driver and the device. Currently only
+ USB and SDIO are supported. See drivers/net/wimax/i2400m/i2400m.h for
+ more information.
+
+ The bus generic driver is logically broken up in two parts: OS-glue and
+ hardware-glue. The OS-glue interfaces with Linux. The hardware-glue
+ interfaces with the device on using an interface provided by the
+ bus-specific driver. The reason for this breakup is to be able to
+ easily reuse the hardware-glue to write drivers for other OSes; note
+ the hardware glue part is written as a native Linux driver; no
+ abstraction layers are used, so to port to another OS, the Linux kernel
+ API calls should be replaced with the target OS's.
+
+5. Usage
+
+ To load the driver, follow the instructions in the install section;
+ once the driver is loaded, plug in the device (unless it is permanently
+ plugged in). The driver will enumerate the device, upload the firmware
+ and output messages in the kernel log (dmesg, /var/log/messages or
+ /var/log/kern.log) such as:
+
+...
+i2400m_usb 5-4:1.0: firmware interface version 8.0.0
+i2400m_usb 5-4:1.0: WiMAX interface wmx0 (00:1d:e1:01:94:2c) ready
+
+ At this point the device is ready to work.
+
+ Current versions require the Intel WiMAX Network Service in userspace
+ to make things work. See the network service's README for instructions
+ on how to scan, connect and disconnect.
+
+5.1. Module parameters
+
+ Module parameters can be set at kernel or module load time or by
+ echoing values:
+
+$ echo VALUE > /sys/module/MODULENAME/parameters/PARAMETERNAME
+
+ To make changes permanent, for example, for the i2400m module, you can
+ also create a file named /etc/modprobe.d/i2400m containing:
+
+options i2400m idle_mode_disabled=1
+
+ To find which parameters are supported by a module, run:
+
+$ modinfo path/to/module.ko
+
+ During kernel bootup (if the driver is linked in the kernel), specify
+ the following to the kernel command line:
+
+i2400m.PARAMETER=VALUE
+
+5.1.1. i2400m: idle_mode_disabled
+
+ The i2400m module supports a parameter to disable idle mode. This
+ parameter, once set, will take effect only when the device is
+ reinitialized by the driver (eg: following a reset or a reconnect).
+
+5.2. Debug operations: debugfs entries
+
+ The driver will register debugfs entries that allow the user to tweak
+ debug settings. There are three main container directories where
+ entries are placed, which correspond to the three blocks a i2400m WiMAX
+ driver has:
+ * /sys/kernel/debug/wimax:DEVNAME/ for the generic WiMAX stack
+ controls
+ * /sys/kernel/debug/wimax:DEVNAME/i2400m for the i2400m generic
+ driver controls
+ * /sys/kernel/debug/wimax:DEVNAME/i2400m-usb (or -sdio) for the
+ bus-specific i2400m-usb or i2400m-sdio controls).
+
+ Of course, if debugfs is mounted in a directory other than
+ /sys/kernel/debug, those paths will change.
+
+5.2.1. Increasing debug output
+
+ The files named *dl_* indicate knobs for controlling the debug output
+ of different submodules:
+ *
+# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\*
+/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_tx
+/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_rx
+/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_notif
+/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_fw
+/sys/kernel/debug/wimax:wmx0/i2400m-usb/dl_usb
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_tx
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_rx
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_rfkill
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_netdev
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_fw
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_debugfs
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_driver
+/sys/kernel/debug/wimax:wmx0/i2400m/dl_control
+/sys/kernel/debug/wimax:wmx0/wimax_dl_stack
+/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill
+/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset
+/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg
+/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
+/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs
+
+ By reading the file you can obtain the current value of said debug
+ level; by writing to it, you can set it.
+
+ To increase the debug level of, for example, the i2400m's generic TX
+ engine, just write:
+
+$ echo 3 > /sys/kernel/debug/wimax:wmx0/i2400m/dl_tx
+
+ Increasing numbers yield increasing debug information; for details of
+ what is printed and the available levels, check the source. The code
+ uses 0 for disabled and increasing values until 8.
+
+5.2.2. RX and TX statistics
+
+ The i2400m/rx_stats and i2400m/tx_stats provide statistics about the
+ data reception/delivery from the device:
+
+$ cat /sys/kernel/debug/wimax:wmx0/i2400m/rx_stats
+45 1 3 34 3104 48 480
+
+ The numbers reported are
+ * packets/RX-buffer: total, min, max
+ * RX-buffers: total RX buffers received, accumulated RX buffer size
+ in bytes, min size received, max size received
+
+ Thus, to find the average buffer size received, divide accumulated
+ RX-buffer / total RX-buffers.
+
+ To clear the statistics back to 0, write anything to the rx_stats file:
+
+$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m_rx_stats
+
+ Likewise for TX.
+
+ Note the packets this debug file refers to are not network packet, but
+ packets in the sense of the device-specific protocol for communication
+ to the host. See drivers/net/wimax/i2400m/tx.c.
+
+5.2.3. Tracing messages received from user space
+
+ To echo messages received from user space into the trace pipe that the
+ i2400m driver creates, set the debug file i2400m/trace_msg_from_user to
+ 1:
+ *
+$ echo 1 > /sys/kernel/debug/wimax:wmx0/i2400m/trace_msg_from_user
+
+5.2.4. Performing a device reset
+
+ By writing a 0, a 1 or a 2 to the file
+ /sys/kernel/debug/wimax:wmx0/reset, the driver performs a warm (without
+ disconnecting from the bus), cold (disconnecting from the bus) or bus
+ (bus specific) reset on the device.
+
+5.2.5. Asking the device to enter power saving mode
+
+ By writing any value to the /sys/kernel/debug/wimax:wmx0 file, the
+ device will attempt to enter power saving mode.
+
+6. Troubleshooting
+
+/*(DEBLOBBED)*/
diff --git a/Documentation/wimax/README.wimax b/Documentation/wimax/README.wimax
new file mode 100644
index 000000000..b78c43780
--- /dev/null
+++ b/Documentation/wimax/README.wimax
@@ -0,0 +1,81 @@
+
+ Linux kernel WiMAX stack
+
+ (C) 2008 Intel Corporation < linux-wimax@intel.com >
+
+ This provides a basic Linux kernel WiMAX stack to provide a common
+ control API for WiMAX devices, usable from kernel and user space.
+
+1. Design
+
+ The WiMAX stack is designed to provide for common WiMAX control
+ services to current and future WiMAX devices from any vendor.
+
+ Because currently there is only one and we don't know what would be the
+ common services, the APIs it currently provides are very minimal.
+ However, it is done in such a way that it is easily extensible to
+ accommodate future requirements.
+
+ The stack works by embedding a struct wimax_dev in your device's
+ control structures. This provides a set of callbacks that the WiMAX
+ stack will call in order to implement control operations requested by
+ the user. As well, the stack provides API functions that the driver
+ calls to notify about changes of state in the device.
+
+ The stack exports the API calls needed to control the device to user
+ space using generic netlink as a marshalling mechanism. You can access
+ them using your own code or use the wrappers provided for your
+ convenience in libwimax (in the wimax-tools package).
+
+ For detailed information on the stack, please see
+ include/linux/wimax.h.
+
+2. Usage
+
+ For usage in a driver (registration, API, etc) please refer to the
+ instructions in the header file include/linux/wimax.h.
+
+ When a device is registered with the WiMAX stack, a set of debugfs
+ files will appear in /sys/kernel/debug/wimax:wmxX can tweak for
+ control.
+
+2.1. Obtaining debug information: debugfs entries
+
+ The WiMAX stack is compiled, by default, with debug messages that can
+ be used to diagnose issues. By default, said messages are disabled.
+
+ The drivers will register debugfs entries that allow the user to tweak
+ debug settings.
+
+ Each driver, when registering with the stack, will cause a debugfs
+ directory named wimax:DEVICENAME to be created; optionally, it might
+ create more subentries below it.
+
+2.1.1. Increasing debug output
+
+ The files named *dl_* indicate knobs for controlling the debug output
+ of different submodules of the WiMAX stack:
+ *
+# find /sys/kernel/debug/wimax\:wmx0 -name \*dl_\*
+/sys/kernel/debug/wimax:wmx0/wimax_dl_stack
+/sys/kernel/debug/wimax:wmx0/wimax_dl_op_rfkill
+/sys/kernel/debug/wimax:wmx0/wimax_dl_op_reset
+/sys/kernel/debug/wimax:wmx0/wimax_dl_op_msg
+/sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
+/sys/kernel/debug/wimax:wmx0/wimax_dl_debugfs
+/sys/kernel/debug/wimax:wmx0/.... # other driver specific files
+
+ NOTE: Of course, if debugfs is mounted in a directory other than
+ /sys/kernel/debug, those paths will change.
+
+ By reading the file you can obtain the current value of said debug
+ level; by writing to it, you can set it.
+
+ To increase the debug level of, for example, the id-table submodule,
+ just write:
+
+$ echo 3 > /sys/kernel/debug/wimax:wmx0/wimax_dl_id_table
+
+ Increasing numbers yield increasing debug information; for details of
+ what is printed and the available levels, check the source. The code
+ uses 0 for disabled and increasing values until 8.
diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt
new file mode 100644
index 000000000..f81a65b54
--- /dev/null
+++ b/Documentation/workqueue.txt
@@ -0,0 +1,388 @@
+
+Concurrency Managed Workqueue (cmwq)
+
+September, 2010 Tejun Heo <tj@kernel.org>
+ Florian Mickler <florian@mickler.org>
+
+CONTENTS
+
+1. Introduction
+2. Why cmwq?
+3. The Design
+4. Application Programming Interface (API)
+5. Example Execution Scenarios
+6. Guidelines
+7. Debugging
+
+
+1. Introduction
+
+There are many cases where an asynchronous process execution context
+is needed and the workqueue (wq) API is the most commonly used
+mechanism for such cases.
+
+When such an asynchronous execution context is needed, a work item
+describing which function to execute is put on a queue. An
+independent thread serves as the asynchronous execution context. The
+queue is called workqueue and the thread is called worker.
+
+While there are work items on the workqueue the worker executes the
+functions associated with the work items one after the other. When
+there is no work item left on the workqueue the worker becomes idle.
+When a new work item gets queued, the worker begins executing again.
+
+
+2. Why cmwq?
+
+In the original wq implementation, a multi threaded (MT) wq had one
+worker thread per CPU and a single threaded (ST) wq had one worker
+thread system-wide. A single MT wq needed to keep around the same
+number of workers as the number of CPUs. The kernel grew a lot of MT
+wq users over the years and with the number of CPU cores continuously
+rising, some systems saturated the default 32k PID space just booting
+up.
+
+Although MT wq wasted a lot of resource, the level of concurrency
+provided was unsatisfactory. The limitation was common to both ST and
+MT wq albeit less severe on MT. Each wq maintained its own separate
+worker pool. A MT wq could provide only one execution context per CPU
+while a ST wq one for the whole system. Work items had to compete for
+those very limited execution contexts leading to various problems
+including proneness to deadlocks around the single execution context.
+
+The tension between the provided level of concurrency and resource
+usage also forced its users to make unnecessary tradeoffs like libata
+choosing to use ST wq for polling PIOs and accepting an unnecessary
+limitation that no two polling PIOs can progress at the same time. As
+MT wq don't provide much better concurrency, users which require
+higher level of concurrency, like async or fscache, had to implement
+their own thread pool.
+
+Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with
+focus on the following goals.
+
+* Maintain compatibility with the original workqueue API.
+
+* Use per-CPU unified worker pools shared by all wq to provide
+ flexible level of concurrency on demand without wasting a lot of
+ resource.
+
+* Automatically regulate worker pool and level of concurrency so that
+ the API users don't need to worry about such details.
+
+
+3. The Design
+
+In order to ease the asynchronous execution of functions a new
+abstraction, the work item, is introduced.
+
+A work item is a simple struct that holds a pointer to the function
+that is to be executed asynchronously. Whenever a driver or subsystem
+wants a function to be executed asynchronously it has to set up a work
+item pointing to that function and queue that work item on a
+workqueue.
+
+Special purpose threads, called worker threads, execute the functions
+off of the queue, one after the other. If no work is queued, the
+worker threads become idle. These worker threads are managed in so
+called worker-pools.
+
+The cmwq design differentiates between the user-facing workqueues that
+subsystems and drivers queue work items on and the backend mechanism
+which manages worker-pools and processes the queued work items.
+
+There are two worker-pools, one for normal work items and the other
+for high priority ones, for each possible CPU and some extra
+worker-pools to serve work items queued on unbound workqueues - the
+number of these backing pools is dynamic.
+
+Subsystems and drivers can create and queue work items through special
+workqueue API functions as they see fit. They can influence some
+aspects of the way the work items are executed by setting flags on the
+workqueue they are putting the work item on. These flags include
+things like CPU locality, concurrency limits, priority and more. To
+get a detailed overview refer to the API description of
+alloc_workqueue() below.
+
+When a work item is queued to a workqueue, the target worker-pool is
+determined according to the queue parameters and workqueue attributes
+and appended on the shared worklist of the worker-pool. For example,
+unless specifically overridden, a work item of a bound workqueue will
+be queued on the worklist of either normal or highpri worker-pool that
+is associated to the CPU the issuer is running on.
+
+For any worker pool implementation, managing the concurrency level
+(how many execution contexts are active) is an important issue. cmwq
+tries to keep the concurrency at a minimal but sufficient level.
+Minimal to save resources and sufficient in that the system is used at
+its full capacity.
+
+Each worker-pool bound to an actual CPU implements concurrency
+management by hooking into the scheduler. The worker-pool is notified
+whenever an active worker wakes up or sleeps and keeps track of the
+number of the currently runnable workers. Generally, work items are
+not expected to hog a CPU and consume many cycles. That means
+maintaining just enough concurrency to prevent work processing from
+stalling should be optimal. As long as there are one or more runnable
+workers on the CPU, the worker-pool doesn't start execution of a new
+work, but, when the last running worker goes to sleep, it immediately
+schedules a new worker so that the CPU doesn't sit idle while there
+are pending work items. This allows using a minimal number of workers
+without losing execution bandwidth.
+
+Keeping idle workers around doesn't cost other than the memory space
+for kthreads, so cmwq holds onto idle ones for a while before killing
+them.
+
+For unbound workqueues, the number of backing pools is dynamic.
+Unbound workqueue can be assigned custom attributes using
+apply_workqueue_attrs() and workqueue will automatically create
+backing worker pools matching the attributes. The responsibility of
+regulating concurrency level is on the users. There is also a flag to
+mark a bound wq to ignore the concurrency management. Please refer to
+the API section for details.
+
+Forward progress guarantee relies on that workers can be created when
+more execution contexts are necessary, which in turn is guaranteed
+through the use of rescue workers. All work items which might be used
+on code paths that handle memory reclaim are required to be queued on
+wq's that have a rescue-worker reserved for execution under memory
+pressure. Else it is possible that the worker-pool deadlocks waiting
+for execution contexts to free up.
+
+
+4. Application Programming Interface (API)
+
+alloc_workqueue() allocates a wq. The original create_*workqueue()
+functions are deprecated and scheduled for removal. alloc_workqueue()
+takes three arguments - @name, @flags and @max_active. @name is the
+name of the wq and also used as the name of the rescuer thread if
+there is one.
+
+A wq no longer manages execution resources but serves as a domain for
+forward progress guarantee, flush and work item attributes. @flags
+and @max_active control how work items are assigned execution
+resources, scheduled and executed.
+
+@flags:
+
+ WQ_UNBOUND
+
+ Work items queued to an unbound wq are served by the special
+ woker-pools which host workers which are not bound to any
+ specific CPU. This makes the wq behave as a simple execution
+ context provider without concurrency management. The unbound
+ worker-pools try to start execution of work items as soon as
+ possible. Unbound wq sacrifices locality but is useful for
+ the following cases.
+
+ * Wide fluctuation in the concurrency level requirement is
+ expected and using bound wq may end up creating large number
+ of mostly unused workers across different CPUs as the issuer
+ hops through different CPUs.
+
+ * Long running CPU intensive workloads which can be better
+ managed by the system scheduler.
+
+ WQ_FREEZABLE
+
+ A freezable wq participates in the freeze phase of the system
+ suspend operations. Work items on the wq are drained and no
+ new work item starts execution until thawed.
+
+ WQ_MEM_RECLAIM
+
+ All wq which might be used in the memory reclaim paths _MUST_
+ have this flag set. The wq is guaranteed to have at least one
+ execution context regardless of memory pressure.
+
+ WQ_HIGHPRI
+
+ Work items of a highpri wq are queued to the highpri
+ worker-pool of the target cpu. Highpri worker-pools are
+ served by worker threads with elevated nice level.
+
+ Note that normal and highpri worker-pools don't interact with
+ each other. Each maintain its separate pool of workers and
+ implements concurrency management among its workers.
+
+ WQ_CPU_INTENSIVE
+
+ Work items of a CPU intensive wq do not contribute to the
+ concurrency level. In other words, runnable CPU intensive
+ work items will not prevent other work items in the same
+ worker-pool from starting execution. This is useful for bound
+ work items which are expected to hog CPU cycles so that their
+ execution is regulated by the system scheduler.
+
+ Although CPU intensive work items don't contribute to the
+ concurrency level, start of their executions is still
+ regulated by the concurrency management and runnable
+ non-CPU-intensive work items can delay execution of CPU
+ intensive work items.
+
+ This flag is meaningless for unbound wq.
+
+Note that the flag WQ_NON_REENTRANT no longer exists as all workqueues
+are now non-reentrant - any work item is guaranteed to be executed by
+at most one worker system-wide at any given time.
+
+@max_active:
+
+@max_active determines the maximum number of execution contexts per
+CPU which can be assigned to the work items of a wq. For example,
+with @max_active of 16, at most 16 work items of the wq can be
+executing at the same time per CPU.
+
+Currently, for a bound wq, the maximum limit for @max_active is 512
+and the default value used when 0 is specified is 256. For an unbound
+wq, the limit is higher of 512 and 4 * num_possible_cpus(). These
+values are chosen sufficiently high such that they are not the
+limiting factor while providing protection in runaway cases.
+
+The number of active work items of a wq is usually regulated by the
+users of the wq, more specifically, by how many work items the users
+may queue at the same time. Unless there is a specific need for
+throttling the number of active work items, specifying '0' is
+recommended.
+
+Some users depend on the strict execution ordering of ST wq. The
+combination of @max_active of 1 and WQ_UNBOUND is used to achieve this
+behavior. Work items on such wq are always queued to the unbound
+worker-pools and only one work item can be active at any given time thus
+achieving the same ordering property as ST wq.
+
+
+5. Example Execution Scenarios
+
+The following example execution scenarios try to illustrate how cmwq
+behave under different configurations.
+
+ Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU.
+ w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms
+ again before finishing. w1 and w2 burn CPU for 5ms then sleep for
+ 10ms.
+
+Ignoring all other tasks, works and processing overhead, and assuming
+simple FIFO scheduling, the following is one highly simplified version
+of possible sequences of events with the original wq.
+
+ TIME IN MSECS EVENT
+ 0 w0 starts and burns CPU
+ 5 w0 sleeps
+ 15 w0 wakes up and burns CPU
+ 20 w0 finishes
+ 20 w1 starts and burns CPU
+ 25 w1 sleeps
+ 35 w1 wakes up and finishes
+ 35 w2 starts and burns CPU
+ 40 w2 sleeps
+ 50 w2 wakes up and finishes
+
+And with cmwq with @max_active >= 3,
+
+ TIME IN MSECS EVENT
+ 0 w0 starts and burns CPU
+ 5 w0 sleeps
+ 5 w1 starts and burns CPU
+ 10 w1 sleeps
+ 10 w2 starts and burns CPU
+ 15 w2 sleeps
+ 15 w0 wakes up and burns CPU
+ 20 w0 finishes
+ 20 w1 wakes up and finishes
+ 25 w2 wakes up and finishes
+
+If @max_active == 2,
+
+ TIME IN MSECS EVENT
+ 0 w0 starts and burns CPU
+ 5 w0 sleeps
+ 5 w1 starts and burns CPU
+ 10 w1 sleeps
+ 15 w0 wakes up and burns CPU
+ 20 w0 finishes
+ 20 w1 wakes up and finishes
+ 20 w2 starts and burns CPU
+ 25 w2 sleeps
+ 35 w2 wakes up and finishes
+
+Now, let's assume w1 and w2 are queued to a different wq q1 which has
+WQ_CPU_INTENSIVE set,
+
+ TIME IN MSECS EVENT
+ 0 w0 starts and burns CPU
+ 5 w0 sleeps
+ 5 w1 and w2 start and burn CPU
+ 10 w1 sleeps
+ 15 w2 sleeps
+ 15 w0 wakes up and burns CPU
+ 20 w0 finishes
+ 20 w1 wakes up and finishes
+ 25 w2 wakes up and finishes
+
+
+6. Guidelines
+
+* Do not forget to use WQ_MEM_RECLAIM if a wq may process work items
+ which are used during memory reclaim. Each wq with WQ_MEM_RECLAIM
+ set has an execution context reserved for it. If there is
+ dependency among multiple work items used during memory reclaim,
+ they should be queued to separate wq each with WQ_MEM_RECLAIM.
+
+* Unless strict ordering is required, there is no need to use ST wq.
+
+* Unless there is a specific need, using 0 for @max_active is
+ recommended. In most use cases, concurrency level usually stays
+ well under the default limit.
+
+* A wq serves as a domain for forward progress guarantee
+ (WQ_MEM_RECLAIM, flush and work item attributes. Work items which
+ are not involved in memory reclaim and don't need to be flushed as a
+ part of a group of work items, and don't require any special
+ attribute, can use one of the system wq. There is no difference in
+ execution characteristics between using a dedicated wq and a system
+ wq.
+
+* Unless work items are expected to consume a huge amount of CPU
+ cycles, using a bound wq is usually beneficial due to the increased
+ level of locality in wq operations and work item execution.
+
+
+7. Debugging
+
+Because the work functions are executed by generic worker threads
+there are a few tricks needed to shed some light on misbehaving
+workqueue users.
+
+Worker threads show up in the process list as:
+
+root 5671 0.0 0.0 0 0 ? S 12:07 0:00 [kworker/0:1]
+root 5672 0.0 0.0 0 0 ? S 12:07 0:00 [kworker/1:2]
+root 5673 0.0 0.0 0 0 ? S 12:12 0:00 [kworker/0:0]
+root 5674 0.0 0.0 0 0 ? S 12:13 0:00 [kworker/1:0]
+
+If kworkers are going crazy (using too much cpu), there are two types
+of possible problems:
+
+ 1. Something beeing scheduled in rapid succession
+ 2. A single work item that consumes lots of cpu cycles
+
+The first one can be tracked using tracing:
+
+ $ echo workqueue:workqueue_queue_work > /sys/kernel/debug/tracing/set_event
+ $ cat /sys/kernel/debug/tracing/trace_pipe > out.txt
+ (wait a few secs)
+ ^C
+
+If something is busy looping on work queueing, it would be dominating
+the output and the offender can be determined with the work item
+function.
+
+For the second type of problems it should be possible to just check
+the stack trace of the offending worker thread.
+
+ $ cat /proc/THE_OFFENDING_KWORKER/stack
+
+The work item's function should be trivially visible in the stack
+trace.
diff --git a/Documentation/x86/00-INDEX b/Documentation/x86/00-INDEX
new file mode 100644
index 000000000..692264456
--- /dev/null
+++ b/Documentation/x86/00-INDEX
@@ -0,0 +1,20 @@
+00-INDEX
+ - this file
+boot.txt
+ - List of boot protocol versions
+early-microcode.txt
+ - How to load microcode from an initrd-CPIO archive early to fix CPU issues.
+earlyprintk.txt
+ - Using earlyprintk with a USB2 debug port key.
+entry_64.txt
+ - Describe (some of the) kernel entry points for x86.
+exception-tables.txt
+ - why and how Linux kernel uses exception tables on x86
+mtrr.txt
+ - how to use x86 Memory Type Range Registers to increase performance
+pat.txt
+ - Page Attribute Table intro and API
+usb-legacy-support.txt
+ - how to fix/avoid quirks when using emulated PS/2 mouse/keyboard.
+zero-page.txt
+ - layout of the first page of memory.
diff --git a/Documentation/x86/boot.txt b/Documentation/x86/boot.txt
new file mode 100644
index 000000000..88b85899d
--- /dev/null
+++ b/Documentation/x86/boot.txt
@@ -0,0 +1,1131 @@
+ THE LINUX/x86 BOOT PROTOCOL
+ ---------------------------
+
+On the x86 platform, the Linux kernel uses a rather complicated boot
+convention. This has evolved partially due to historical aspects, as
+well as the desire in the early days to have the kernel itself be a
+bootable image, the complicated PC memory model and due to changed
+expectations in the PC industry caused by the effective demise of
+real-mode DOS as a mainstream operating system.
+
+Currently, the following versions of the Linux/x86 boot protocol exist.
+
+Old kernels: zImage/Image support only. Some very early kernels
+ may not even support a command line.
+
+Protocol 2.00: (Kernel 1.3.73) Added bzImage and initrd support, as
+ well as a formalized way to communicate between the
+ boot loader and the kernel. setup.S made relocatable,
+ although the traditional setup area still assumed
+ writable.
+
+Protocol 2.01: (Kernel 1.3.76) Added a heap overrun warning.
+
+Protocol 2.02: (Kernel 2.4.0-test3-pre3) New command line protocol.
+ Lower the conventional memory ceiling. No overwrite
+ of the traditional setup area, thus making booting
+ safe for systems which use the EBDA from SMM or 32-bit
+ BIOS entry points. zImage deprecated but still
+ supported.
+
+Protocol 2.03: (Kernel 2.4.18-pre1) Explicitly makes the highest possible
+ initrd address available to the bootloader.
+
+Protocol 2.04: (Kernel 2.6.14) Extend the syssize field to four bytes.
+
+Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable.
+ Introduce relocatable_kernel and kernel_alignment fields.
+
+Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of
+ the boot command line.
+
+Protocol 2.07: (Kernel 2.6.24) Added paravirtualised boot protocol.
+ Introduced hardware_subarch and hardware_subarch_data
+ and KEEP_SEGMENTS flag in load_flags.
+
+Protocol 2.08: (Kernel 2.6.26) Added crc32 checksum and ELF format
+ payload. Introduced payload_offset and payload_length
+ fields to aid in locating the payload.
+
+Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
+ pointer to single linked list of struct setup_data.
+
+Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
+ beyond the kernel_alignment added, new init_size and
+ pref_address fields. Added extended boot loader IDs.
+
+Protocol 2.11: (Kernel 3.6) Added a field for offset of EFI handover
+ protocol entry point.
+
+Protocol 2.12: (Kernel 3.8) Added the xloadflags field and extension fields
+ to struct boot_params for loading bzImage and ramdisk
+ above 4G in 64bit.
+
+**** MEMORY LAYOUT
+
+The traditional memory map for the kernel loader, used for Image or
+zImage kernels, typically looks like:
+
+ | |
+0A0000 +------------------------+
+ | Reserved for BIOS | Do not use. Reserved for BIOS EBDA.
+09A000 +------------------------+
+ | Command line |
+ | Stack/heap | For use by the kernel real-mode code.
+098000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+090200 +------------------------+
+ | Kernel boot sector | The kernel legacy boot sector.
+090000 +------------------------+
+ | Protected-mode kernel | The bulk of the kernel image.
+010000 +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+001000 +------------------------+
+ | Reserved for MBR/BIOS |
+000800 +------------------------+
+ | Typically used by MBR |
+000600 +------------------------+
+ | BIOS use only |
+000000 +------------------------+
+
+
+When using bzImage, the protected-mode kernel was relocated to
+0x100000 ("high memory"), and the kernel real-mode block (boot sector,
+setup, and stack/heap) was made relocatable to any address between
+0x10000 and end of low memory. Unfortunately, in protocols 2.00 and
+2.01 the 0x90000+ memory range is still used internally by the kernel;
+the 2.02 protocol resolves that problem.
+
+It is desirable to keep the "memory ceiling" -- the highest point in
+low memory touched by the boot loader -- as low as possible, since
+some newer BIOSes have begun to allocate some rather large amounts of
+memory, called the Extended BIOS Data Area, near the top of low
+memory. The boot loader should use the "INT 12h" BIOS call to verify
+how much low memory is available.
+
+Unfortunately, if INT 12h reports that the amount of memory is too
+low, there is usually nothing the boot loader can do but to report an
+error to the user. The boot loader should therefore be designed to
+take up as little space in low memory as it reasonably can. For
+zImage or old bzImage kernels, which need data written into the
+0x90000 segment, the boot loader should make sure not to use memory
+above the 0x9A000 point; too many BIOSes will break above that point.
+
+For a modern bzImage kernel with boot protocol version >= 2.02, a
+memory layout like the following is suggested:
+
+ ~ ~
+ | Protected-mode kernel |
+100000 +------------------------+
+ | I/O memory hole |
+0A0000 +------------------------+
+ | Reserved for BIOS | Leave as much as possible unused
+ ~ ~
+ | Command line | (Can also be below the X+10000 mark)
+X+10000 +------------------------+
+ | Stack/heap | For use by the kernel real-mode code.
+X+08000 +------------------------+
+ | Kernel setup | The kernel real-mode code.
+ | Kernel boot sector | The kernel legacy boot sector.
+X +------------------------+
+ | Boot loader | <- Boot sector entry point 0000:7C00
+001000 +------------------------+
+ | Reserved for MBR/BIOS |
+000800 +------------------------+
+ | Typically used by MBR |
+000600 +------------------------+
+ | BIOS use only |
+000000 +------------------------+
+
+... where the address X is as low as the design of the boot loader
+permits.
+
+
+**** THE REAL-MODE KERNEL HEADER
+
+In the following text, and anywhere in the kernel boot sequence, "a
+sector" refers to 512 bytes. It is independent of the actual sector
+size of the underlying medium.
+
+The first step in loading a Linux kernel should be to load the
+real-mode code (boot sector and setup code) and then examine the
+following header at offset 0x01f1. The real-mode code can total up to
+32K, although the boot loader may choose to load only the first two
+sectors (1K) and then examine the bootup sector size.
+
+The header looks like:
+
+Offset Proto Name Meaning
+/Size
+
+01F1/1 ALL(1 setup_sects The size of the setup in sectors
+01F2/2 ALL root_flags If set, the root is mounted readonly
+01F4/4 2.04+(2 syssize The size of the 32-bit code in 16-byte paras
+01F8/2 ALL ram_size DO NOT USE - for bootsect.S use only
+01FA/2 ALL vid_mode Video mode control
+01FC/2 ALL root_dev Default root device number
+01FE/2 ALL boot_flag 0xAA55 magic number
+0200/2 2.00+ jump Jump instruction
+0202/4 2.00+ header Magic signature "HdrS"
+0206/2 2.00+ version Boot protocol version supported
+0208/4 2.00+ realmode_swtch Boot loader hook (see below)
+020C/2 2.00+ start_sys_seg The load-low segment (0x1000) (obsolete)
+020E/2 2.00+ kernel_version Pointer to kernel version string
+0210/1 2.00+ type_of_loader Boot loader identifier
+0211/1 2.00+ loadflags Boot protocol option flags
+0212/2 2.00+ setup_move_size Move to high memory size (used with hooks)
+0214/4 2.00+ code32_start Boot loader hook (see below)
+0218/4 2.00+ ramdisk_image initrd load address (set by boot loader)
+021C/4 2.00+ ramdisk_size initrd size (set by boot loader)
+0220/4 2.00+ bootsect_kludge DO NOT USE - for bootsect.S use only
+0224/2 2.01+ heap_end_ptr Free memory after setup end
+0226/1 2.02+(3 ext_loader_ver Extended boot loader version
+0227/1 2.02+(3 ext_loader_type Extended boot loader ID
+0228/4 2.02+ cmd_line_ptr 32-bit pointer to the kernel command line
+022C/4 2.03+ initrd_addr_max Highest legal initrd address
+0230/4 2.05+ kernel_alignment Physical addr alignment required for kernel
+0234/1 2.05+ relocatable_kernel Whether kernel is relocatable or not
+0235/1 2.10+ min_alignment Minimum alignment, as a power of two
+0236/2 2.12+ xloadflags Boot protocol option flags
+0238/4 2.06+ cmdline_size Maximum size of the kernel command line
+023C/4 2.07+ hardware_subarch Hardware subarchitecture
+0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data
+0248/4 2.08+ payload_offset Offset of kernel payload
+024C/4 2.08+ payload_length Length of kernel payload
+0250/8 2.09+ setup_data 64-bit physical pointer to linked list
+ of struct setup_data
+0258/8 2.10+ pref_address Preferred loading address
+0260/4 2.10+ init_size Linear memory required during initialization
+0264/4 2.11+ handover_offset Offset of handover entry point
+
+(1) For backwards compatibility, if the setup_sects field contains 0, the
+ real value is 4.
+
+(2) For boot protocol prior to 2.04, the upper two bytes of the syssize
+ field are unusable, which means the size of a bzImage kernel
+ cannot be determined.
+
+(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+
+If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
+the boot protocol version is "old". Loading an old kernel, the
+following parameters should be assumed:
+
+ Image type = zImage
+ initrd not supported
+ Real-mode kernel must be located at 0x90000.
+
+Otherwise, the "version" field contains the protocol version,
+e.g. protocol version 2.01 will contain 0x0201 in this field. When
+setting fields in the header, you must make sure only to set fields
+supported by the protocol version in use.
+
+
+**** DETAILS OF HEADER FIELDS
+
+For each field, some are information from the kernel to the bootloader
+("read"), some are expected to be filled out by the bootloader
+("write"), and some are expected to be read and modified by the
+bootloader ("modify").
+
+All general purpose boot loaders should write the fields marked
+(obligatory). Boot loaders who want to load the kernel at a
+nonstandard address should fill in the fields marked (reloc); other
+boot loaders can ignore those fields.
+
+The byte order of all fields is littleendian (this is x86, after all.)
+
+Field name: setup_sects
+Type: read
+Offset/size: 0x1f1/1
+Protocol: ALL
+
+ The size of the setup code in 512-byte sectors. If this field is
+ 0, the real value is 4. The real-mode code consists of the boot
+ sector (always one 512-byte sector) plus the setup code.
+
+Field name: root_flags
+Type: modify (optional)
+Offset/size: 0x1f2/2
+Protocol: ALL
+
+ If this field is nonzero, the root defaults to readonly. The use of
+ this field is deprecated; use the "ro" or "rw" options on the
+ command line instead.
+
+Field name: syssize
+Type: read
+Offset/size: 0x1f4/4 (protocol 2.04+) 0x1f4/2 (protocol ALL)
+Protocol: 2.04+
+
+ The size of the protected-mode code in units of 16-byte paragraphs.
+ For protocol versions older than 2.04 this field is only two bytes
+ wide, and therefore cannot be trusted for the size of a kernel if
+ the LOAD_HIGH flag is set.
+
+Field name: ram_size
+Type: kernel internal
+Offset/size: 0x1f8/2
+Protocol: ALL
+
+ This field is obsolete.
+
+Field name: vid_mode
+Type: modify (obligatory)
+Offset/size: 0x1fa/2
+
+ Please see the section on SPECIAL COMMAND LINE OPTIONS.
+
+Field name: root_dev
+Type: modify (optional)
+Offset/size: 0x1fc/2
+Protocol: ALL
+
+ The default root device device number. The use of this field is
+ deprecated, use the "root=" option on the command line instead.
+
+Field name: boot_flag
+Type: read
+Offset/size: 0x1fe/2
+Protocol: ALL
+
+ Contains 0xAA55. This is the closest thing old Linux kernels have
+ to a magic number.
+
+Field name: jump
+Type: read
+Offset/size: 0x200/2
+Protocol: 2.00+
+
+ Contains an x86 jump instruction, 0xEB followed by a signed offset
+ relative to byte 0x202. This can be used to determine the size of
+ the header.
+
+Field name: header
+Type: read
+Offset/size: 0x202/4
+Protocol: 2.00+
+
+ Contains the magic number "HdrS" (0x53726448).
+
+Field name: version
+Type: read
+Offset/size: 0x206/2
+Protocol: 2.00+
+
+ Contains the boot protocol version, in (major << 8)+minor format,
+ e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
+ 10.17.
+
+Field name: realmode_swtch
+Type: modify (optional)
+Offset/size: 0x208/4
+Protocol: 2.00+
+
+ Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+
+Field name: start_sys_seg
+Type: read
+Offset/size: 0x20c/2
+Protocol: 2.00+
+
+ The load low segment (0x1000). Obsolete.
+
+Field name: kernel_version
+Type: read
+Offset/size: 0x20e/2
+Protocol: 2.00+
+
+ If set to a nonzero value, contains a pointer to a NUL-terminated
+ human-readable kernel version number string, less 0x200. This can
+ be used to display the kernel version to the user. This value
+ should be less than (0x200*setup_sects).
+
+ For example, if this value is set to 0x1c00, the kernel version
+ number string can be found at offset 0x1e00 in the kernel file.
+ This is a valid value if and only if the "setup_sects" field
+ contains the value 15 or higher, as:
+
+ 0x1c00 < 15*0x200 (= 0x1e00) but
+ 0x1c00 >= 14*0x200 (= 0x1c00)
+
+ 0x1c00 >> 9 = 14, so the minimum value for setup_secs is 15.
+
+Field name: type_of_loader
+Type: write (obligatory)
+Offset/size: 0x210/1
+Protocol: 2.00+
+
+ If your boot loader has an assigned id (see table below), enter
+ 0xTV here, where T is an identifier for the boot loader and V is
+ a version number. Otherwise, enter 0xFF here.
+
+ For boot loader IDs above T = 0xD, write T = 0xE to this field and
+ write the extended ID minus 0x10 to the ext_loader_type field.
+ Similarly, the ext_loader_ver field can be used to provide more than
+ four bits for the bootloader version.
+
+ For example, for T = 0x15, V = 0x234, write:
+
+ type_of_loader <- 0xE4
+ ext_loader_type <- 0x05
+ ext_loader_ver <- 0x23
+
+ Assigned boot loader ids (hexadecimal):
+
+ 0 LILO (0x00 reserved for pre-2.00 bootloader)
+ 1 Loadlin
+ 2 bootsect-loader (0x20, all other values reserved)
+ 3 Syslinux
+ 4 Etherboot/gPXE/iPXE
+ 5 ELILO
+ 7 GRUB
+ 8 U-Boot
+ 9 Xen
+ A Gujin
+ B Qemu
+ C Arcturus Networks uCbootloader
+ D kexec-tools
+ E Extended (see ext_loader_type)
+ F Special (0xFF = undefined)
+ 10 Reserved
+ 11 Minimal Linux Bootloader <http://sebastian-plotz.blogspot.de>
+ 12 OVMF UEFI virtualization stack
+
+ Please contact <hpa@zytor.com> if you need a bootloader ID
+ value assigned.
+
+Field name: loadflags
+Type: modify (obligatory)
+Offset/size: 0x211/1
+Protocol: 2.00+
+
+ This field is a bitmask.
+
+ Bit 0 (read): LOADED_HIGH
+ - If 0, the protected-mode code is loaded at 0x10000.
+ - If 1, the protected-mode code is loaded at 0x100000.
+
+ Bit 1 (kernel internal): ALSR_FLAG
+ - Used internally by the compressed kernel to communicate
+ KASLR status to kernel proper.
+ If 1, KASLR enabled.
+ If 0, KASLR disabled.
+
+ Bit 5 (write): QUIET_FLAG
+ - If 0, print early messages.
+ - If 1, suppress early messages.
+ This requests to the kernel (decompressor and early
+ kernel) to not write early messages that require
+ accessing the display hardware directly.
+
+ Bit 6 (write): KEEP_SEGMENTS
+ Protocol: 2.07+
+ - If 0, reload the segment registers in the 32bit entry point.
+ - If 1, do not reload the segment registers in the 32bit entry point.
+ Assume that %cs %ds %ss %es are all set to flat segments with
+ a base of 0 (or the equivalent for their environment).
+
+ Bit 7 (write): CAN_USE_HEAP
+ Set this bit to 1 to indicate that the value entered in the
+ heap_end_ptr is valid. If this field is clear, some setup code
+ functionality will be disabled.
+
+Field name: setup_move_size
+Type: modify (obligatory)
+Offset/size: 0x212/2
+Protocol: 2.00-2.01
+
+ When using protocol 2.00 or 2.01, if the real mode kernel is not
+ loaded at 0x90000, it gets moved there later in the loading
+ sequence. Fill in this field if you want additional data (such as
+ the kernel command line) moved in addition to the real-mode kernel
+ itself.
+
+ The unit is bytes starting with the beginning of the boot sector.
+
+ This field is can be ignored when the protocol is 2.02 or higher, or
+ if the real-mode code is loaded at 0x90000.
+
+Field name: code32_start
+Type: modify (optional, reloc)
+Offset/size: 0x214/4
+Protocol: 2.00+
+
+ The address to jump to in protected mode. This defaults to the load
+ address of the kernel, and can be used by the boot loader to
+ determine the proper load address.
+
+ This field can be modified for two purposes:
+
+ 1. as a boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+
+ 2. if a bootloader which does not install a hook loads a
+ relocatable kernel at a nonstandard address it will have to modify
+ this field to point to the load address.
+
+Field name: ramdisk_image
+Type: write (obligatory)
+Offset/size: 0x218/4
+Protocol: 2.00+
+
+ The 32-bit linear address of the initial ramdisk or ramfs. Leave at
+ zero if there is no initial ramdisk/ramfs.
+
+Field name: ramdisk_size
+Type: write (obligatory)
+Offset/size: 0x21c/4
+Protocol: 2.00+
+
+ Size of the initial ramdisk or ramfs. Leave at zero if there is no
+ initial ramdisk/ramfs.
+
+Field name: bootsect_kludge
+Type: kernel internal
+Offset/size: 0x220/4
+Protocol: 2.00+
+
+ This field is obsolete.
+
+Field name: heap_end_ptr
+Type: write (obligatory)
+Offset/size: 0x224/2
+Protocol: 2.01+
+
+ Set this field to the offset (from the beginning of the real-mode
+ code) of the end of the setup stack/heap, minus 0x0200.
+
+Field name: ext_loader_ver
+Type: write (optional)
+Offset/size: 0x226/1
+Protocol: 2.02+
+
+ This field is used as an extension of the version number in the
+ type_of_loader field. The total version number is considered to be
+ (type_of_loader & 0x0f) + (ext_loader_ver << 4).
+
+ The use of this field is boot loader specific. If not written, it
+ is zero.
+
+ Kernels prior to 2.6.31 did not recognize this field, but it is safe
+ to write for protocol version 2.02 or higher.
+
+Field name: ext_loader_type
+Type: write (obligatory if (type_of_loader & 0xf0) == 0xe0)
+Offset/size: 0x227/1
+Protocol: 2.02+
+
+ This field is used as an extension of the type number in
+ type_of_loader field. If the type in type_of_loader is 0xE, then
+ the actual type is (ext_loader_type + 0x10).
+
+ This field is ignored if the type in type_of_loader is not 0xE.
+
+ Kernels prior to 2.6.31 did not recognize this field, but it is safe
+ to write for protocol version 2.02 or higher.
+
+Field name: cmd_line_ptr
+Type: write (obligatory)
+Offset/size: 0x228/4
+Protocol: 2.02+
+
+ Set this field to the linear address of the kernel command line.
+ The kernel command line can be located anywhere between the end of
+ the setup heap and 0xA0000; it does not have to be located in the
+ same 64K segment as the real-mode code itself.
+
+ Fill in this field even if your boot loader does not support a
+ command line, in which case you can point this to an empty string
+ (or better yet, to the string "auto".) If this field is left at
+ zero, the kernel will assume that your boot loader does not support
+ the 2.02+ protocol.
+
+Field name: initrd_addr_max
+Type: read
+Offset/size: 0x22c/4
+Protocol: 2.03+
+
+ The maximum address that may be occupied by the initial
+ ramdisk/ramfs contents. For boot protocols 2.02 or earlier, this
+ field is not present, and the maximum address is 0x37FFFFFF. (This
+ address is defined as the address of the highest safe byte, so if
+ your ramdisk is exactly 131072 bytes long and this field is
+ 0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
+
+Field name: kernel_alignment
+Type: read/modify (reloc)
+Offset/size: 0x230/4
+Protocol: 2.05+ (read), 2.10+ (modify)
+
+ Alignment unit required by the kernel (if relocatable_kernel is
+ true.) A relocatable kernel that is loaded at an alignment
+ incompatible with the value in this field will be realigned during
+ kernel initialization.
+
+ Starting with protocol version 2.10, this reflects the kernel
+ alignment preferred for optimal performance; it is possible for the
+ loader to modify this field to permit a lesser alignment. See the
+ min_alignment and pref_address field below.
+
+Field name: relocatable_kernel
+Type: read (reloc)
+Offset/size: 0x234/1
+Protocol: 2.05+
+
+ If this field is nonzero, the protected-mode part of the kernel can
+ be loaded at any address that satisfies the kernel_alignment field.
+ After loading, the boot loader must set the code32_start field to
+ point to the loaded code, or to a boot loader hook.
+
+Field name: min_alignment
+Type: read (reloc)
+Offset/size: 0x235/1
+Protocol: 2.10+
+
+ This field, if nonzero, indicates as a power of two the minimum
+ alignment required, as opposed to preferred, by the kernel to boot.
+ If a boot loader makes use of this field, it should update the
+ kernel_alignment field with the alignment unit desired; typically:
+
+ kernel_alignment = 1 << min_alignment
+
+ There may be a considerable performance cost with an excessively
+ misaligned kernel. Therefore, a loader should typically try each
+ power-of-two alignment from kernel_alignment down to this alignment.
+
+Field name: xloadflags
+Type: read
+Offset/size: 0x236/2
+Protocol: 2.12+
+
+ This field is a bitmask.
+
+ Bit 0 (read): XLF_KERNEL_64
+ - If 1, this kernel has the legacy 64-bit entry point at 0x200.
+
+ Bit 1 (read): XLF_CAN_BE_LOADED_ABOVE_4G
+ - If 1, kernel/boot_params/cmdline/ramdisk can be above 4G.
+
+ Bit 2 (read): XLF_EFI_HANDOVER_32
+ - If 1, the kernel supports the 32-bit EFI handoff entry point
+ given at handover_offset.
+
+ Bit 3 (read): XLF_EFI_HANDOVER_64
+ - If 1, the kernel supports the 64-bit EFI handoff entry point
+ given at handover_offset + 0x200.
+
+ Bit 4 (read): XLF_EFI_KEXEC
+ - If 1, the kernel supports kexec EFI boot with EFI runtime support.
+
+Field name: cmdline_size
+Type: read
+Offset/size: 0x238/4
+Protocol: 2.06+
+
+ The maximum size of the command line without the terminating
+ zero. This means that the command line can contain at most
+ cmdline_size characters. With protocol version 2.05 and earlier, the
+ maximum size was 255.
+
+Field name: hardware_subarch
+Type: write (optional, defaults to x86/PC)
+Offset/size: 0x23c/4
+Protocol: 2.07+
+
+ In a paravirtualized environment the hardware low level architectural
+ pieces such as interrupt handling, page table handling, and
+ accessing process control registers needs to be done differently.
+
+ This field allows the bootloader to inform the kernel we are in one
+ one of those environments.
+
+ 0x00000000 The default x86/PC environment
+ 0x00000001 lguest
+ 0x00000002 Xen
+ 0x00000003 Moorestown MID
+ 0x00000004 CE4100 TV Platform
+
+Field name: hardware_subarch_data
+Type: write (subarch-dependent)
+Offset/size: 0x240/8
+Protocol: 2.07+
+
+ A pointer to data that is specific to hardware subarch
+ This field is currently unused for the default x86/PC environment,
+ do not modify.
+
+Field name: payload_offset
+Type: read
+Offset/size: 0x248/4
+Protocol: 2.08+
+
+ If non-zero then this field contains the offset from the beginning
+ of the protected-mode code to the payload.
+
+ The payload may be compressed. The format of both the compressed and
+ uncompressed data should be determined using the standard magic
+ numbers. The currently supported compression formats are gzip
+ (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A), LZMA
+ (magic number 5D 00), XZ (magic number FD 37), and LZ4 (magic number
+ 02 21). The uncompressed payload is currently always ELF (magic
+ number 7F 45 4C 46).
+
+Field name: payload_length
+Type: read
+Offset/size: 0x24c/4
+Protocol: 2.08+
+
+ The length of the payload.
+
+Field name: setup_data
+Type: write (special)
+Offset/size: 0x250/8
+Protocol: 2.09+
+
+ The 64-bit physical pointer to NULL terminated single linked list of
+ struct setup_data. This is used to define a more extensible boot
+ parameters passing mechanism. The definition of struct setup_data is
+ as follow:
+
+ struct setup_data {
+ u64 next;
+ u32 type;
+ u32 len;
+ u8 data[0];
+ };
+
+ Where, the next is a 64-bit physical pointer to the next node of
+ linked list, the next field of the last node is 0; the type is used
+ to identify the contents of data; the len is the length of data
+ field; the data holds the real payload.
+
+ This list may be modified at a number of points during the bootup
+ process. Therefore, when modifying this list one should always make
+ sure to consider the case where the linked list already contains
+ entries.
+
+Field name: pref_address
+Type: read (reloc)
+Offset/size: 0x258/8
+Protocol: 2.10+
+
+ This field, if nonzero, represents a preferred load address for the
+ kernel. A relocating bootloader should attempt to load at this
+ address if possible.
+
+ A non-relocatable kernel will unconditionally move itself and to run
+ at this address.
+
+Field name: init_size
+Type: read
+Offset/size: 0x260/4
+
+ This field indicates the amount of linear contiguous memory starting
+ at the kernel runtime start address that the kernel needs before it
+ is capable of examining its memory map. This is not the same thing
+ as the total amount of memory the kernel needs to boot, but it can
+ be used by a relocating boot loader to help select a safe load
+ address for the kernel.
+
+ The kernel runtime start address is determined by the following algorithm:
+
+ if (relocatable_kernel)
+ runtime_start = align_up(load_address, kernel_alignment)
+ else
+ runtime_start = pref_address
+
+Field name: handover_offset
+Type: read
+Offset/size: 0x264/4
+
+ This field is the offset from the beginning of the kernel image to
+ the EFI handover protocol entry point. Boot loaders using the EFI
+ handover protocol to boot the kernel should jump to this offset.
+
+ See EFI HANDOVER PROTOCOL below for more details.
+
+
+**** THE IMAGE CHECKSUM
+
+From boot protocol version 2.08 onwards the CRC-32 is calculated over
+the entire file using the characteristic polynomial 0x04C11DB7 and an
+initial remainder of 0xffffffff. The checksum is appended to the
+file; therefore the CRC of the file up to the limit specified in the
+syssize field of the header is always 0.
+
+
+**** THE KERNEL COMMAND LINE
+
+The kernel command line has become an important way for the boot
+loader to communicate with the kernel. Some of its options are also
+relevant to the boot loader itself, see "special command line options"
+below.
+
+The kernel command line is a null-terminated string. The maximum
+length can be retrieved from the field cmdline_size. Before protocol
+version 2.06, the maximum was 255 characters. A string that is too
+long will be automatically truncated by the kernel.
+
+If the boot protocol version is 2.02 or later, the address of the
+kernel command line is given by the header field cmd_line_ptr (see
+above.) This address can be anywhere between the end of the setup
+heap and 0xA0000.
+
+If the protocol version is *not* 2.02 or higher, the kernel
+command line is entered using the following protocol:
+
+ At offset 0x0020 (word), "cmd_line_magic", enter the magic
+ number 0xA33F.
+
+ At offset 0x0022 (word), "cmd_line_offset", enter the offset
+ of the kernel command line (relative to the start of the
+ real-mode kernel).
+
+ The kernel command line *must* be within the memory region
+ covered by setup_move_size, so you may need to adjust this
+ field.
+
+
+**** MEMORY LAYOUT OF THE REAL-MODE CODE
+
+The real-mode code requires a stack/heap to be set up, as well as
+memory allocated for the kernel command line. This needs to be done
+in the real-mode accessible memory in bottom megabyte.
+
+It should be noted that modern machines often have a sizable Extended
+BIOS Data Area (EBDA). As a result, it is advisable to use as little
+of the low megabyte as possible.
+
+Unfortunately, under the following circumstances the 0x90000 memory
+segment has to be used:
+
+ - When loading a zImage kernel ((loadflags & 0x01) == 0).
+ - When loading a 2.01 or earlier boot protocol kernel.
+
+ -> For the 2.00 and 2.01 boot protocols, the real-mode code
+ can be loaded at another address, but it is internally
+ relocated to 0x90000. For the "old" protocol, the
+ real-mode code must be loaded at 0x90000.
+
+When loading at 0x90000, avoid using memory above 0x9a000.
+
+For boot protocol 2.02 or higher, the command line does not have to be
+located in the same 64K segment as the real-mode setup code; it is
+thus permitted to give the stack/heap the full 64K segment and locate
+the command line above it.
+
+The kernel command line should not be located below the real-mode
+code, nor should it be located in high memory.
+
+
+**** SAMPLE BOOT CONFIGURATION
+
+As a sample configuration, assume the following layout of the real
+mode segment:
+
+ When loading below 0x90000, use the entire segment:
+
+ 0x0000-0x7fff Real mode kernel
+ 0x8000-0xdfff Stack and heap
+ 0xe000-0xffff Kernel command line
+
+ When loading at 0x90000 OR the protocol version is 2.01 or earlier:
+
+ 0x0000-0x7fff Real mode kernel
+ 0x8000-0x97ff Stack and heap
+ 0x9800-0x9fff Kernel command line
+
+Such a boot loader should enter the following fields in the header:
+
+ unsigned long base_ptr; /* base address for real-mode segment */
+
+ if ( setup_sects == 0 ) {
+ setup_sects = 4;
+ }
+
+ if ( protocol >= 0x0200 ) {
+ type_of_loader = <type code>;
+ if ( loading_initrd ) {
+ ramdisk_image = <initrd_address>;
+ ramdisk_size = <initrd_size>;
+ }
+
+ if ( protocol >= 0x0202 && loadflags & 0x01 )
+ heap_end = 0xe000;
+ else
+ heap_end = 0x9800;
+
+ if ( protocol >= 0x0201 ) {
+ heap_end_ptr = heap_end - 0x200;
+ loadflags |= 0x80; /* CAN_USE_HEAP */
+ }
+
+ if ( protocol >= 0x0202 ) {
+ cmd_line_ptr = base_ptr + heap_end;
+ strcpy(cmd_line_ptr, cmdline);
+ } else {
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+ setup_move_size = heap_end + strlen(cmdline)+1;
+ strcpy(base_ptr+cmd_line_offset, cmdline);
+ }
+ } else {
+ /* Very old kernel */
+
+ heap_end = 0x9800;
+
+ cmd_line_magic = 0xA33F;
+ cmd_line_offset = heap_end;
+
+ /* A very old kernel MUST have its real-mode code
+ loaded at 0x90000 */
+
+ if ( base_ptr != 0x90000 ) {
+ /* Copy the real-mode kernel */
+ memcpy(0x90000, base_ptr, (setup_sects+1)*512);
+ base_ptr = 0x90000; /* Relocated */
+ }
+
+ strcpy(0x90000+cmd_line_offset, cmdline);
+
+ /* It is recommended to clear memory up to the 32K mark */
+ memset(0x90000 + (setup_sects+1)*512, 0,
+ (64-(setup_sects+1))*512);
+ }
+
+
+**** LOADING THE REST OF THE KERNEL
+
+The 32-bit (non-real-mode) kernel starts at offset (setup_sects+1)*512
+in the kernel file (again, if setup_sects == 0 the real value is 4.)
+It should be loaded at address 0x10000 for Image/zImage kernels and
+0x100000 for bzImage kernels.
+
+The kernel is a bzImage kernel if the protocol >= 2.00 and the 0x01
+bit (LOAD_HIGH) in the loadflags field is set:
+
+ is_bzImage = (protocol >= 0x0200) && (loadflags & 0x01);
+ load_address = is_bzImage ? 0x100000 : 0x10000;
+
+Note that Image/zImage kernels can be up to 512K in size, and thus use
+the entire 0x10000-0x90000 range of memory. This means it is pretty
+much a requirement for these kernels to load the real-mode part at
+0x90000. bzImage kernels allow much more flexibility.
+
+
+**** SPECIAL COMMAND LINE OPTIONS
+
+If the command line provided by the boot loader is entered by the
+user, the user may expect the following command line options to work.
+They should normally not be deleted from the kernel command line even
+though not all of them are actually meaningful to the kernel. Boot
+loader authors who need additional command line options for the boot
+loader itself should get them registered in
+Documentation/kernel-parameters.txt to make sure they will not
+conflict with actual kernel options now or in the future.
+
+ vga=<mode>
+ <mode> here is either an integer (in C notation, either
+ decimal, octal, or hexadecimal) or one of the strings
+ "normal" (meaning 0xFFFF), "ext" (meaning 0xFFFE) or "ask"
+ (meaning 0xFFFD). This value should be entered into the
+ vid_mode field, as it is used by the kernel before the command
+ line is parsed.
+
+ mem=<size>
+ <size> is an integer in C notation optionally followed by
+ (case insensitive) K, M, G, T, P or E (meaning << 10, << 20,
+ << 30, << 40, << 50 or << 60). This specifies the end of
+ memory to the kernel. This affects the possible placement of
+ an initrd, since an initrd should be placed near end of
+ memory. Note that this is an option to *both* the kernel and
+ the bootloader!
+
+ initrd=<file>
+ An initrd should be loaded. The meaning of <file> is
+ obviously bootloader-dependent, and some boot loaders
+ (e.g. LILO) do not have such a command.
+
+In addition, some boot loaders add the following options to the
+user-specified command line:
+
+ BOOT_IMAGE=<file>
+ The boot image which was loaded. Again, the meaning of <file>
+ is obviously bootloader-dependent.
+
+ auto
+ The kernel was booted without explicit user intervention.
+
+If these options are added by the boot loader, it is highly
+recommended that they are located *first*, before the user-specified
+or configuration-specified command line. Otherwise, "init=/bin/sh"
+gets confused by the "auto" option.
+
+
+**** RUNNING THE KERNEL
+
+The kernel is started by jumping to the kernel entry point, which is
+located at *segment* offset 0x20 from the start of the real mode
+kernel. This means that if you loaded your real-mode kernel code at
+0x90000, the kernel entry point is 9020:0000.
+
+At entry, ds = es = ss should point to the start of the real-mode
+kernel code (0x9000 if the code is loaded at 0x90000), sp should be
+set up properly, normally pointing to the top of the heap, and
+interrupts should be disabled. Furthermore, to guard against bugs in
+the kernel, it is recommended that the boot loader sets fs = gs = ds =
+es = ss.
+
+In our example from above, we would do:
+
+ /* Note: in the case of the "old" kernel protocol, base_ptr must
+ be == 0x90000 at this point; see the previous sample code */
+
+ seg = base_ptr >> 4;
+
+ cli(); /* Enter with interrupts disabled! */
+
+ /* Set up the real-mode kernel stack */
+ _SS = seg;
+ _SP = heap_end;
+
+ _DS = _ES = _FS = _GS = seg;
+ jmp_far(seg+0x20, 0); /* Run the kernel */
+
+If your boot sector accesses a floppy drive, it is recommended to
+switch off the floppy motor before running the kernel, since the
+kernel boot leaves interrupts off and thus the motor will not be
+switched off, especially if the loaded kernel has the floppy driver as
+a demand-loaded module!
+
+
+**** ADVANCED BOOT LOADER HOOKS
+
+If the boot loader runs in a particularly hostile environment (such as
+LOADLIN, which runs under DOS) it may be impossible to follow the
+standard memory location requirements. Such a boot loader may use the
+following hooks that, if set, are invoked by the kernel at the
+appropriate time. The use of these hooks should probably be
+considered an absolutely last resort!
+
+IMPORTANT: All the hooks are required to preserve %esp, %ebp, %esi and
+%edi across invocation.
+
+ realmode_swtch:
+ A 16-bit real mode far subroutine invoked immediately before
+ entering protected mode. The default routine disables NMI, so
+ your routine should probably do so, too.
+
+ code32_start:
+ A 32-bit flat-mode routine *jumped* to immediately after the
+ transition to protected mode, but before the kernel is
+ uncompressed. No segments, except CS, are guaranteed to be
+ set up (current kernels do, but older ones do not); you should
+ set them up to BOOT_DS (0x18) yourself.
+
+ After completing your hook, you should jump to the address
+ that was in this field before your boot loader overwrote it
+ (relocated, if appropriate.)
+
+
+**** 32-bit BOOT PROTOCOL
+
+For machine with some new BIOS other than legacy BIOS, such as EFI,
+LinuxBIOS, etc, and kexec, the 16-bit real mode setup code in kernel
+based on legacy BIOS can not be used, so a 32-bit boot protocol needs
+to be defined.
+
+In 32-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+should be allocated and initialized to all zero. Then the setup header
+from offset 0x01f1 of kernel image on should be loaded into struct
+boot_params and examined. The end of setup header can be calculated as
+follow:
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as that
+described in zero-page.txt.
+
+After setting up the struct boot_params, the boot loader can load the
+32/64-bit kernel in the same way as that of 16-bit boot protocol.
+
+In 32-bit boot protocol, the kernel is started by jumping to the
+32-bit kernel entry point, which is the start address of loaded
+32/64-bit kernel.
+
+At entry, the CPU must be in 32-bit protected mode with paging
+disabled; a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %esi must hold the base
+address of the struct boot_params; %ebp, %edi and %ebx must be zero.
+
+**** 64-bit BOOT PROTOCOL
+
+For machine with 64bit cpus and 64bit kernel, we could use 64bit bootloader
+and we need a 64-bit boot protocol.
+
+In 64-bit boot protocol, the first step in loading a Linux kernel
+should be to setup the boot parameters (struct boot_params,
+traditionally known as "zero page"). The memory for struct boot_params
+could be allocated anywhere (even above 4G) and initialized to all zero.
+Then, the setup header at offset 0x01f1 of kernel image on should be
+loaded into struct boot_params and examined. The end of setup header
+can be calculated as follows:
+
+ 0x0202 + byte value at offset 0x0201
+
+In addition to read/modify/write the setup header of the struct
+boot_params as that of 16-bit boot protocol, the boot loader should
+also fill the additional fields of the struct boot_params as described
+in zero-page.txt.
+
+After setting up the struct boot_params, the boot loader can load
+64-bit kernel in the same way as that of 16-bit boot protocol, but
+kernel could be loaded above 4G.
+
+In 64-bit boot protocol, the kernel is started by jumping to the
+64-bit kernel entry point, which is the start address of loaded
+64-bit kernel plus 0x200.
+
+At entry, the CPU must be in 64-bit mode with paging enabled.
+The range with setup_header.init_size from start address of loaded
+kernel and zero page and command line buffer get ident mapping;
+a GDT must be loaded with the descriptors for selectors
+__BOOT_CS(0x10) and __BOOT_DS(0x18); both descriptors must be 4G flat
+segment; __BOOT_CS must have execute/read permission, and __BOOT_DS
+must have read/write permission; CS must be __BOOT_CS and DS, ES, SS
+must be __BOOT_DS; interrupt must be disabled; %rsi must hold the base
+address of the struct boot_params.
+
+**** EFI HANDOVER PROTOCOL
+
+This protocol allows boot loaders to defer initialisation to the EFI
+boot stub. The boot loader is required to load the kernel/initrd(s)
+from the boot media and jump to the EFI handover protocol entry point
+which is hdr->handover_offset bytes from the beginning of
+startup_{32,64}.
+
+The function prototype for the handover entry point looks like this,
+
+ efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
+
+'handle' is the EFI image handle passed to the boot loader by the EFI
+firmware, 'table' is the EFI system table - these are the first two
+arguments of the "handoff state" as described in section 2.3 of the
+UEFI specification. 'bp' is the boot loader-allocated boot params.
+
+The boot loader *must* fill out the following fields in bp,
+
+ o hdr.code32_start
+ o hdr.cmd_line_ptr
+ o hdr.cmdline_size
+ o hdr.ramdisk_image (if applicable)
+ o hdr.ramdisk_size (if applicable)
+
+All other fields should be zero.
diff --git a/Documentation/x86/early-microcode.txt b/Documentation/x86/early-microcode.txt
new file mode 100644
index 000000000..41465f971
--- /dev/null
+++ b/Documentation/x86/early-microcode.txt
@@ -0,0 +1,42 @@
+Early load microcode
+====================
+By Fenghua Yu <fenghua.yu@intel.com>
+
+Kernel can update microcode in early phase of boot time. Loading microcode early
+can fix CPU issues before they are observed during kernel boot time.
+
+Microcode is stored in an initrd file. The microcode is read from the initrd
+file and loaded to CPUs during boot time.
+
+The format of the combined initrd image is microcode in cpio format followed by
+the initrd image (maybe compressed). Kernel parses the combined initrd image
+during boot time. The microcode file in cpio name space is:
+on Intel: /*(DEBLOBBED)*/
+on AMD : /*(DEBLOBBED)*/
+
+During BSP boot (before SMP starts), if the kernel finds the microcode file in
+the initrd file, it parses the microcode and saves matching microcode in memory.
+If matching microcode is found, it will be uploaded in BSP and later on in all
+APs.
+
+The cached microcode patch is applied when CPUs resume from a sleep state.
+
+There are two legacy user space interfaces to load microcode, either through
+/dev/cpu/microcode or through /sys/devices/system/cpu/microcode/reload file
+in sysfs.
+
+In addition to these two legacy methods, the early loading method described
+here is the third method with which microcode can be uploaded to a system's
+CPUs.
+
+The following example script shows how to generate a new combined initrd file in
+/boot/initrd-3.5.0.ucode.img with original microcode microcode.bin and
+original initrd image /boot/initrd-3.5.0.img.
+
+mkdir initrd
+cd initrd
+mkdir -p kernel/x86/microcode
+cp ../microcode.bin /*(DEBLOBBED)*/ (or /*(DEBLOBBED)*/)
+find . | cpio -o -H newc >../ucode.cpio
+cd ..
+cat ucode.cpio /boot/initrd-3.5.0.img >/boot/initrd-3.5.0.ucode.img
diff --git a/Documentation/x86/earlyprintk.txt b/Documentation/x86/earlyprintk.txt
new file mode 100644
index 000000000..688e3eeed
--- /dev/null
+++ b/Documentation/x86/earlyprintk.txt
@@ -0,0 +1,136 @@
+
+Mini-HOWTO for using the earlyprintk=dbgp boot option with a
+USB2 Debug port key and a debug cable, on x86 systems.
+
+You need two computers, the 'USB debug key' special gadget and
+and two USB cables, connected like this:
+
+ [host/target] <-------> [USB debug key] <-------> [client/console]
+
+1. There are a number of specific hardware requirements:
+
+ a.) Host/target system needs to have USB debug port capability.
+
+ You can check this capability by looking at a 'Debug port' bit in
+ the lspci -vvv output:
+
+ # lspci -vvv
+ ...
+ 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
+ Subsystem: Lenovo ThinkPad T61
+ Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
+ Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
+ Latency: 0
+ Interrupt: pin D routed to IRQ 19
+ Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
+ Capabilities: [50] Power Management version 2
+ Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
+ Status: D0 PME-Enable- DSel=0 DScale=0 PME+
+ Capabilities: [58] Debug port: BAR=1 offset=00a0
+ ^^^^^^^^^^^ <==================== [ HERE ]
+ Kernel driver in use: ehci_hcd
+ Kernel modules: ehci-hcd
+ ...
+
+( If your system does not list a debug port capability then you probably
+ won't be able to use the USB debug key. )
+
+ b.) You also need a Netchip USB debug cable/key:
+
+ http://www.plxtech.com/products/NET2000/NET20DC/default.asp
+
+ This is a small blue plastic connector with two USB connections,
+ it draws power from its USB connections.
+
+ c.) You need a second client/console system with a high speed USB 2.0
+ port.
+
+ d.) The Netchip device must be plugged directly into the physical
+ debug port on the "host/target" system. You cannot use a USB hub in
+ between the physical debug port and the "host/target" system.
+
+ The EHCI debug controller is bound to a specific physical USB
+ port and the Netchip device will only work as an early printk
+ device in this port. The EHCI host controllers are electrically
+ wired such that the EHCI debug controller is hooked up to the
+ first physical and there is no way to change this via software.
+ You can find the physical port through experimentation by trying
+ each physical port on the system and rebooting. Or you can try
+ and use lsusb or look at the kernel info messages emitted by the
+ usb stack when you plug a usb device into various ports on the
+ "host/target" system.
+
+ Some hardware vendors do not expose the usb debug port with a
+ physical connector and if you find such a device send a complaint
+ to the hardware vendor, because there is no reason not to wire
+ this port into one of the physically accessible ports.
+
+ e.) It is also important to note, that many versions of the Netchip
+ device require the "client/console" system to be plugged into the
+ right and side of the device (with the product logo facing up and
+ readable left to right). The reason being is that the 5 volt
+ power supply is taken from only one side of the device and it
+ must be the side that does not get rebooted.
+
+2. Software requirements:
+
+ a.) On the host/target system:
+
+ You need to enable the following kernel config option:
+
+ CONFIG_EARLY_PRINTK_DBGP=y
+
+ And you need to add the boot command line: "earlyprintk=dbgp".
+ (If you are using Grub, append it to the 'kernel' line in
+ /etc/grub.conf)
+
+ On systems with more than one EHCI debug controller you must
+ specify the correct EHCI debug controller number. The ordering
+ comes from the PCI bus enumeration of the EHCI controllers. The
+ default with no number argument is "0" the first EHCI debug
+ controller. To use the second EHCI debug controller, you would
+ use the command line: "earlyprintk=dbgp1"
+
+ NOTE: normally earlyprintk console gets turned off once the
+ regular console is alive - use "earlyprintk=dbgp,keep" to keep
+ this channel open beyond early bootup. This can be useful for
+ debugging crashes under Xorg, etc.
+
+ b.) On the client/console system:
+
+ You should enable the following kernel config option:
+
+ CONFIG_USB_SERIAL_DEBUG=y
+
+ On the next bootup with the modified kernel you should
+ get a /dev/ttyUSBx device(s).
+
+ Now this channel of kernel messages is ready to be used: start
+ your favorite terminal emulator (minicom, etc.) and set
+ it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to
+ see the raw output.
+
+ c.) On Nvidia Southbridge based systems: the kernel will try to probe
+ and find out which port has debug device connected.
+
+3. Testing that it works fine:
+
+ You can test the output by using earlyprintk=dbgp,keep and provoking
+ kernel messages on the host/target system. You can provoke a harmless
+ kernel message by for example doing:
+
+ echo h > /proc/sysrq-trigger
+
+ On the host/target system you should see this help line in "dmesg" output:
+
+ SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z)
+
+ On the client/console system do:
+
+ cat /dev/ttyUSB0
+
+ And you should see the help line above displayed shortly after you've
+ provoked it on the host system.
+
+If it does not work then please ask about it on the linux-kernel@vger.kernel.org
+mailing list or contact the x86 maintainers.
diff --git a/Documentation/x86/entry_64.txt b/Documentation/x86/entry_64.txt
new file mode 100644
index 000000000..9132b8617
--- /dev/null
+++ b/Documentation/x86/entry_64.txt
@@ -0,0 +1,104 @@
+This file documents some of the kernel entries in
+arch/x86/kernel/entry_64.S. A lot of this explanation is adapted from
+an email from Ingo Molnar:
+
+http://lkml.kernel.org/r/<20110529191055.GC9835%40elte.hu>
+
+The x86 architecture has quite a few different ways to jump into
+kernel code. Most of these entry points are registered in
+arch/x86/kernel/traps.c and implemented in arch/x86/kernel/entry_64.S
+for 64-bit, arch/x86/kernel/entry_32.S for 32-bit and finally
+arch/x86/ia32/ia32entry.S which implements the 32-bit compatibility
+syscall entry points and thus provides for 32-bit processes the
+ability to execute syscalls when running on 64-bit kernels.
+
+The IDT vector assignments are listed in arch/x86/include/asm/irq_vectors.h.
+
+Some of these entries are:
+
+ - system_call: syscall instruction from 64-bit code.
+
+ - ia32_syscall: int 0x80 from 32-bit or 64-bit code; compat syscall
+ either way.
+
+ - ia32_syscall, ia32_sysenter: syscall and sysenter from 32-bit
+ code
+
+ - interrupt: An array of entries. Every IDT vector that doesn't
+ explicitly point somewhere else gets set to the corresponding
+ value in interrupts. These point to a whole array of
+ magically-generated functions that make their way to do_IRQ with
+ the interrupt number as a parameter.
+
+ - APIC interrupts: Various special-purpose interrupts for things
+ like TLB shootdown.
+
+ - Architecturally-defined exceptions like divide_error.
+
+There are a few complexities here. The different x86-64 entries
+have different calling conventions. The syscall and sysenter
+instructions have their own peculiar calling conventions. Some of
+the IDT entries push an error code onto the stack; others don't.
+IDT entries using the IST alternative stack mechanism need their own
+magic to get the stack frames right. (You can find some
+documentation in the AMD APM, Volume 2, Chapter 8 and the Intel SDM,
+Volume 3, Chapter 6.)
+
+Dealing with the swapgs instruction is especially tricky. Swapgs
+toggles whether gs is the kernel gs or the user gs. The swapgs
+instruction is rather fragile: it must nest perfectly and only in
+single depth, it should only be used if entering from user mode to
+kernel mode and then when returning to user-space, and precisely
+so. If we mess that up even slightly, we crash.
+
+So when we have a secondary entry, already in kernel mode, we *must
+not* use SWAPGS blindly - nor must we forget doing a SWAPGS when it's
+not switched/swapped yet.
+
+Now, there's a secondary complication: there's a cheap way to test
+which mode the CPU is in and an expensive way.
+
+The cheap way is to pick this info off the entry frame on the kernel
+stack, from the CS of the ptregs area of the kernel stack:
+
+ xorl %ebx,%ebx
+ testl $3,CS+8(%rsp)
+ je error_kernelspace
+ SWAPGS
+
+The expensive (paranoid) way is to read back the MSR_GS_BASE value
+(which is what SWAPGS modifies):
+
+ movl $1,%ebx
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ testl %edx,%edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx,%ebx
+1: ret
+
+If we are at an interrupt or user-trap/gate-alike boundary then we can
+use the faster check: the stack will be a reliable indicator of
+whether SWAPGS was already done: if we see that we are a secondary
+entry interrupting kernel mode execution, then we know that the GS
+base has already been switched. If it says that we interrupted
+user-space execution then we must do the SWAPGS.
+
+But if we are in an NMI/MCE/DEBUG/whatever super-atomic entry context,
+which might have triggered right after a normal entry wrote CS to the
+stack but before we executed SWAPGS, then the only safe way to check
+for GS is the slower method: the RDMSR.
+
+Therefore, super-atomic entries (except NMI, which is handled separately)
+must use idtentry with paranoid=1 to handle gsbase correctly. This
+triggers three main behavior changes:
+
+ - Interrupt entry will use the slower gsbase check.
+ - Interrupt entry from user mode will switch off the IST stack.
+ - Interrupt exit to kernel mode will not attempt to reschedule.
+
+We try to only use IST entries and the paranoid entry code for vectors
+that absolutely need the more expensive check for the GS base - and we
+generate all 'normal' entry points with the regular (faster) paranoid=0
+variant.
diff --git a/Documentation/x86/exception-tables.txt b/Documentation/x86/exception-tables.txt
new file mode 100644
index 000000000..32901aa36
--- /dev/null
+++ b/Documentation/x86/exception-tables.txt
@@ -0,0 +1,292 @@
+ Kernel level exception handling in Linux
+ Commentary by Joerg Pommnitz <joerg@raleigh.ibm.com>
+
+When a process runs in kernel mode, it often has to access user
+mode memory whose address has been passed by an untrusted program.
+To protect itself the kernel has to verify this address.
+
+In older versions of Linux this was done with the
+int verify_area(int type, const void * addr, unsigned long size)
+function (which has since been replaced by access_ok()).
+
+This function verified that the memory area starting at address
+'addr' and of size 'size' was accessible for the operation specified
+in type (read or write). To do this, verify_read had to look up the
+virtual memory area (vma) that contained the address addr. In the
+normal case (correctly working program), this test was successful.
+It only failed for a few buggy programs. In some kernel profiling
+tests, this normally unneeded verification used up a considerable
+amount of time.
+
+To overcome this situation, Linus decided to let the virtual memory
+hardware present in every Linux-capable CPU handle this test.
+
+How does this work?
+
+Whenever the kernel tries to access an address that is currently not
+accessible, the CPU generates a page fault exception and calls the
+page fault handler
+
+void do_page_fault(struct pt_regs *regs, unsigned long error_code)
+
+in arch/x86/mm/fault.c. The parameters on the stack are set up by
+the low level assembly glue in arch/x86/kernel/entry_32.S. The parameter
+regs is a pointer to the saved registers on the stack, error_code
+contains a reason code for the exception.
+
+do_page_fault first obtains the unaccessible address from the CPU
+control register CR2. If the address is within the virtual address
+space of the process, the fault probably occurred, because the page
+was not swapped in, write protected or something similar. However,
+we are interested in the other case: the address is not valid, there
+is no vma that contains this address. In this case, the kernel jumps
+to the bad_area label.
+
+There it uses the address of the instruction that caused the exception
+(i.e. regs->eip) to find an address where the execution can continue
+(fixup). If this search is successful, the fault handler modifies the
+return address (again regs->eip) and returns. The execution will
+continue at the address in fixup.
+
+Where does fixup point to?
+
+Since we jump to the contents of fixup, fixup obviously points
+to executable code. This code is hidden inside the user access macros.
+I have picked the get_user macro defined in arch/x86/include/asm/uaccess.h
+as an example. The definition is somewhat hard to follow, so let's peek at
+the code generated by the preprocessor and the compiler. I selected
+the get_user call in drivers/char/sysrq.c for a detailed examination.
+
+The original code in sysrq.c line 587:
+ get_user(c, buf);
+
+The preprocessor output (edited to become somewhat readable):
+
+(
+ {
+ long __gu_err = - 14 , __gu_val = 0;
+ const __typeof__(*( ( buf ) )) *__gu_addr = ((buf));
+ if (((((0 + current_set[0])->tss.segment) == 0x18 ) ||
+ (((sizeof(*(buf))) <= 0xC0000000UL) &&
+ ((unsigned long)(__gu_addr ) <= 0xC0000000UL - (sizeof(*(buf)))))))
+ do {
+ __gu_err = 0;
+ switch ((sizeof(*(buf)))) {
+ case 1:
+ __asm__ __volatile__(
+ "1: mov" "b" " %2,%" "b" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "b" " %" "b" "1,%" "b" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=q" (__gu_val): "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err )) ;
+ break;
+ case 2:
+ __asm__ __volatile__(
+ "1: mov" "w" " %2,%" "w" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "w" " %" "w" "1,%" "w" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n"
+ " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"( __gu_err ));
+ break;
+ case 4:
+ __asm__ __volatile__(
+ "1: mov" "l" " %2,%" "" "1\n"
+ "2:\n"
+ ".section .fixup,\"ax\"\n"
+ "3: movl %3,%0\n"
+ " xor" "l" " %" "" "1,%" "" "1\n"
+ " jmp 2b\n"
+ ".section __ex_table,\"a\"\n"
+ " .align 4\n" " .long 1b,3b\n"
+ ".text" : "=r"(__gu_err), "=r" (__gu_val) : "m"((*(struct __large_struct *)
+ ( __gu_addr )) ), "i"(- 14 ), "0"(__gu_err));
+ break;
+ default:
+ (__gu_val) = __get_user_bad();
+ }
+ } while (0) ;
+ ((c)) = (__typeof__(*((buf))))__gu_val;
+ __gu_err;
+ }
+);
+
+WOW! Black GCC/assembly magic. This is impossible to follow, so let's
+see what code gcc generates:
+
+ > xorl %edx,%edx
+ > movl current_set,%eax
+ > cmpl $24,788(%eax)
+ > je .L1424
+ > cmpl $-1073741825,64(%esp)
+ > ja .L1423
+ > .L1424:
+ > movl %edx,%eax
+ > movl 64(%esp),%ebx
+ > #APP
+ > 1: movb (%ebx),%dl /* this is the actual user access */
+ > 2:
+ > .section .fixup,"ax"
+ > 3: movl $-14,%eax
+ > xorb %dl,%dl
+ > jmp 2b
+ > .section __ex_table,"a"
+ > .align 4
+ > .long 1b,3b
+ > .text
+ > #NO_APP
+ > .L1423:
+ > movzbl %dl,%esi
+
+The optimizer does a good job and gives us something we can actually
+understand. Can we? The actual user access is quite obvious. Thanks
+to the unified address space we can just access the address in user
+memory. But what does the .section stuff do?????
+
+To understand this we have to look at the final kernel:
+
+ > objdump --section-headers vmlinux
+ >
+ > vmlinux: file format elf32-i386
+ >
+ > Sections:
+ > Idx Name Size VMA LMA File off Algn
+ > 0 .text 00098f40 c0100000 c0100000 00001000 2**4
+ > CONTENTS, ALLOC, LOAD, READONLY, CODE
+ > 1 .fixup 000016bc c0198f40 c0198f40 00099f40 2**0
+ > CONTENTS, ALLOC, LOAD, READONLY, CODE
+ > 2 .rodata 0000f127 c019a5fc c019a5fc 0009b5fc 2**2
+ > CONTENTS, ALLOC, LOAD, READONLY, DATA
+ > 3 __ex_table 000015c0 c01a9724 c01a9724 000aa724 2**2
+ > CONTENTS, ALLOC, LOAD, READONLY, DATA
+ > 4 .data 0000ea58 c01abcf0 c01abcf0 000abcf0 2**4
+ > CONTENTS, ALLOC, LOAD, DATA
+ > 5 .bss 00018e21 c01ba748 c01ba748 000ba748 2**2
+ > ALLOC
+ > 6 .comment 00000ec4 00000000 00000000 000ba748 2**0
+ > CONTENTS, READONLY
+ > 7 .note 00001068 00000ec4 00000ec4 000bb60c 2**0
+ > CONTENTS, READONLY
+
+There are obviously 2 non standard ELF sections in the generated object
+file. But first we want to find out what happened to our code in the
+final kernel executable:
+
+ > objdump --disassemble --section=.text vmlinux
+ >
+ > c017e785 <do_con_write+c1> xorl %edx,%edx
+ > c017e787 <do_con_write+c3> movl 0xc01c7bec,%eax
+ > c017e78c <do_con_write+c8> cmpl $0x18,0x314(%eax)
+ > c017e793 <do_con_write+cf> je c017e79f <do_con_write+db>
+ > c017e795 <do_con_write+d1> cmpl $0xbfffffff,0x40(%esp,1)
+ > c017e79d <do_con_write+d9> ja c017e7a7 <do_con_write+e3>
+ > c017e79f <do_con_write+db> movl %edx,%eax
+ > c017e7a1 <do_con_write+dd> movl 0x40(%esp,1),%ebx
+ > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+ > c017e7a7 <do_con_write+e3> movzbl %dl,%esi
+
+The whole user memory access is reduced to 10 x86 machine instructions.
+The instructions bracketed in the .section directives are no longer
+in the normal execution path. They are located in a different section
+of the executable file:
+
+ > objdump --disassemble --section=.fixup vmlinux
+ >
+ > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
+ > c0199ffa <.fixup+10ba> xorb %dl,%dl
+ > c0199ffc <.fixup+10bc> jmp c017e7a7 <do_con_write+e3>
+
+And finally:
+ > objdump --full-contents --section=__ex_table vmlinux
+ >
+ > c01aa7c4 93c017c0 e09f19c0 97c017c0 99c017c0 ................
+ > c01aa7d4 f6c217c0 e99f19c0 a5e717c0 f59f19c0 ................
+ > c01aa7e4 080a18c0 01a019c0 0a0a18c0 04a019c0 ................
+
+or in human readable byte order:
+
+ > c01aa7c4 c017c093 c0199fe0 c017c097 c017c099 ................
+ > c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................
+ ^^^^^^^^^^^^^^^^^
+ this is the interesting part!
+ > c01aa7e4 c0180a08 c019a001 c0180a0a c019a004 ................
+
+What happened? The assembly directives
+
+.section .fixup,"ax"
+.section __ex_table,"a"
+
+told the assembler to move the following code to the specified
+sections in the ELF object file. So the instructions
+3: movl $-14,%eax
+ xorb %dl,%dl
+ jmp 2b
+ended up in the .fixup section of the object file and the addresses
+ .long 1b,3b
+ended up in the __ex_table section of the object file. 1b and 3b
+are local labels. The local label 1b (1b stands for next label 1
+backward) is the address of the instruction that might fault, i.e.
+in our case the address of the label 1 is c017e7a5:
+the original assembly code: > 1: movb (%ebx),%dl
+and linked in vmlinux : > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+
+The local label 3 (backwards again) is the address of the code to handle
+the fault, in our case the actual value is c0199ff5:
+the original assembly code: > 3: movl $-14,%eax
+and linked in vmlinux : > c0199ff5 <.fixup+10b5> movl $0xfffffff2,%eax
+
+The assembly code
+ > .section __ex_table,"a"
+ > .align 4
+ > .long 1b,3b
+
+becomes the value pair
+ > c01aa7d4 c017c2f6 c0199fe9 c017e7a5 c0199ff5 ................
+ ^this is ^this is
+ 1b 3b
+c017e7a5,c0199ff5 in the exception table of the kernel.
+
+So, what actually happens if a fault from kernel mode with no suitable
+vma occurs?
+
+1.) access to invalid address:
+ > c017e7a5 <do_con_write+e1> movb (%ebx),%dl
+2.) MMU generates exception
+3.) CPU calls do_page_fault
+4.) do page fault calls search_exception_table (regs->eip == c017e7a5);
+5.) search_exception_table looks up the address c017e7a5 in the
+ exception table (i.e. the contents of the ELF section __ex_table)
+ and returns the address of the associated fault handle code c0199ff5.
+6.) do_page_fault modifies its own return address to point to the fault
+ handle code and returns.
+7.) execution continues in the fault handling code.
+8.) 8a) EAX becomes -EFAULT (== -14)
+ 8b) DL becomes zero (the value we "read" from user space)
+ 8c) execution continues at local label 2 (address of the
+ instruction immediately after the faulting user access).
+
+The steps 8a to 8c in a certain way emulate the faulting instruction.
+
+That's it, mostly. If you look at our example, you might ask why
+we set EAX to -EFAULT in the exception handler code. Well, the
+get_user macro actually returns a value: 0, if the user access was
+successful, -EFAULT on failure. Our original code did not test this
+return value, however the inline assembly code in get_user tries to
+return -EFAULT. GCC selected EAX to return this value.
+
+NOTE:
+Due to the way that the exception table is built and needs to be ordered,
+only use exceptions for code in the .text section. Any other section
+will cause the exception table to not be sorted correctly, and the
+exceptions will fail.
diff --git a/Documentation/x86/i386/IO-APIC.txt b/Documentation/x86/i386/IO-APIC.txt
new file mode 100644
index 000000000..15f5baf7e
--- /dev/null
+++ b/Documentation/x86/i386/IO-APIC.txt
@@ -0,0 +1,119 @@
+Most (all) Intel-MP compliant SMP boards have the so-called 'IO-APIC',
+which is an enhanced interrupt controller. It enables us to route
+hardware interrupts to multiple CPUs, or to CPU groups. Without an
+IO-APIC, interrupts from hardware will be delivered only to the
+CPU which boots the operating system (usually CPU#0).
+
+Linux supports all variants of compliant SMP boards, including ones with
+multiple IO-APICs. Multiple IO-APICs are used in high-end servers to
+distribute IRQ load further.
+
+There are (a few) known breakages in certain older boards, such bugs are
+usually worked around by the kernel. If your MP-compliant SMP board does
+not boot Linux, then consult the linux-smp mailing list archives first.
+
+If your box boots fine with enabled IO-APIC IRQs, then your
+/proc/interrupts will look like this one:
+
+ ---------------------------->
+ hell:~> cat /proc/interrupts
+ CPU0
+ 0: 1360293 IO-APIC-edge timer
+ 1: 4 IO-APIC-edge keyboard
+ 2: 0 XT-PIC cascade
+ 13: 1 XT-PIC fpu
+ 14: 1448 IO-APIC-edge ide0
+ 16: 28232 IO-APIC-level Intel EtherExpress Pro 10/100 Ethernet
+ 17: 51304 IO-APIC-level eth0
+ NMI: 0
+ ERR: 0
+ hell:~>
+ <----------------------------
+
+Some interrupts are still listed as 'XT PIC', but this is not a problem;
+none of those IRQ sources is performance-critical.
+
+
+In the unlikely case that your board does not create a working mp-table,
+you can use the pirq= boot parameter to 'hand-construct' IRQ entries. This
+is non-trivial though and cannot be automated. One sample /etc/lilo.conf
+entry:
+
+ append="pirq=15,11,10"
+
+The actual numbers depend on your system, on your PCI cards and on their
+PCI slot position. Usually PCI slots are 'daisy chained' before they are
+connected to the PCI chipset IRQ routing facility (the incoming PIRQ1-4
+lines):
+
+ ,-. ,-. ,-. ,-. ,-.
+ PIRQ4 ----| |-. ,-| |-. ,-| |-. ,-| |--------| |
+ |S| \ / |S| \ / |S| \ / |S| |S|
+ PIRQ3 ----|l|-. `/---|l|-. `/---|l|-. `/---|l|--------|l|
+ |o| \/ |o| \/ |o| \/ |o| |o|
+ PIRQ2 ----|t|-./`----|t|-./`----|t|-./`----|t|--------|t|
+ |1| /\ |2| /\ |3| /\ |4| |5|
+ PIRQ1 ----| |- `----| |- `----| |- `----| |--------| |
+ `-' `-' `-' `-' `-'
+
+Every PCI card emits a PCI IRQ, which can be INTA, INTB, INTC or INTD:
+
+ ,-.
+ INTD--| |
+ |S|
+ INTC--|l|
+ |o|
+ INTB--|t|
+ |x|
+ INTA--| |
+ `-'
+
+These INTA-D PCI IRQs are always 'local to the card', their real meaning
+depends on which slot they are in. If you look at the daisy chaining diagram,
+a card in slot4, issuing INTA IRQ, it will end up as a signal on PIRQ4 of
+the PCI chipset. Most cards issue INTA, this creates optimal distribution
+between the PIRQ lines. (distributing IRQ sources properly is not a
+necessity, PCI IRQs can be shared at will, but it's a good for performance
+to have non shared interrupts). Slot5 should be used for videocards, they
+do not use interrupts normally, thus they are not daisy chained either.
+
+so if you have your SCSI card (IRQ11) in Slot1, Tulip card (IRQ9) in
+Slot2, then you'll have to specify this pirq= line:
+
+ append="pirq=11,9"
+
+the following script tries to figure out such a default pirq= line from
+your PCI configuration:
+
+ echo -n pirq=; echo `scanpci | grep T_L | cut -c56-` | sed 's/ /,/g'
+
+note that this script won't work if you have skipped a few slots or if your
+board does not do default daisy-chaining. (or the IO-APIC has the PIRQ pins
+connected in some strange way). E.g. if in the above case you have your SCSI
+card (IRQ11) in Slot3, and have Slot1 empty:
+
+ append="pirq=0,9,11"
+
+[value '0' is a generic 'placeholder', reserved for empty (or non-IRQ emitting)
+slots.]
+
+Generally, it's always possible to find out the correct pirq= settings, just
+permute all IRQ numbers properly ... it will take some time though. An
+'incorrect' pirq line will cause the booting process to hang, or a device
+won't function properly (e.g. if it's inserted as a module).
+
+If you have 2 PCI buses, then you can use up to 8 pirq values, although such
+boards tend to have a good configuration.
+
+Be prepared that it might happen that you need some strange pirq line:
+
+ append="pirq=0,0,0,0,0,0,9,11"
+
+Use smart trial-and-error techniques to find out the correct pirq line ...
+
+Good luck and mail to linux-smp@vger.kernel.org or
+linux-kernel@vger.kernel.org if you have any problems that are not covered
+by this document.
+
+-- mingo
+
diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
new file mode 100644
index 000000000..818518a3f
--- /dev/null
+++ b/Documentation/x86/intel_mpx.txt
@@ -0,0 +1,244 @@
+1. Intel(R) MPX Overview
+========================
+
+Intel(R) Memory Protection Extensions (Intel(R) MPX) is a new capability
+introduced into Intel Architecture. Intel MPX provides hardware features
+that can be used in conjunction with compiler changes to check memory
+references, for those references whose compile-time normal intentions are
+usurped at runtime due to buffer overflow or underflow.
+
+You can tell if your CPU supports MPX by looking in /proc/cpuinfo:
+
+ cat /proc/cpuinfo | grep ' mpx '
+
+For more information, please refer to Intel(R) Architecture Instruction
+Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection
+Extensions.
+
+Note: As of December 2014, no hardware with MPX is available but it is
+possible to use SDE (Intel(R) Software Development Emulator) instead, which
+can be downloaded from
+http://software.intel.com/en-us/articles/intel-software-development-emulator
+
+
+2. How to get the advantage of MPX
+==================================
+
+For MPX to work, changes are required in the kernel, binutils and compiler.
+No source changes are required for applications, just a recompile.
+
+There are a lot of moving parts of this to all work right. The following
+is how we expect the compiler, application and kernel to work together.
+
+1) Application developer compiles with -fmpx. The compiler will add the
+ instrumentation as well as some setup code called early after the app
+ starts. New instruction prefixes are noops for old CPUs.
+2) That setup code allocates (virtual) space for the "bounds directory",
+ points the "bndcfgu" register to the directory (must also set the valid
+ bit) and notifies the kernel (via the new prctl(PR_MPX_ENABLE_MANAGEMENT))
+ that the app will be using MPX. The app must be careful not to access
+ the bounds tables between the time when it populates "bndcfgu" and
+ when it calls the prctl(). This might be hard to guarantee if the app
+ is compiled with MPX. You can add "__attribute__((bnd_legacy))" to
+ the function to disable MPX instrumentation to help guarantee this.
+ Also be careful not to call out to any other code which might be
+ MPX-instrumented.
+3) The kernel detects that the CPU has MPX, allows the new prctl() to
+ succeed, and notes the location of the bounds directory. Userspace is
+ expected to keep the bounds directory at that locationWe note it
+ instead of reading it each time because the 'xsave' operation needed
+ to access the bounds directory register is an expensive operation.
+4) If the application needs to spill bounds out of the 4 registers, it
+ issues a bndstx instruction. Since the bounds directory is empty at
+ this point, a bounds fault (#BR) is raised, the kernel allocates a
+ bounds table (in the user address space) and makes the relevant entry
+ in the bounds directory point to the new table.
+5) If the application violates the bounds specified in the bounds registers,
+ a separate kind of #BR is raised which will deliver a signal with
+ information about the violation in the 'struct siginfo'.
+6) Whenever memory is freed, we know that it can no longer contain valid
+ pointers, and we attempt to free the associated space in the bounds
+ tables. If an entire table becomes unused, we will attempt to free
+ the table and remove the entry in the directory.
+
+To summarize, there are essentially three things interacting here:
+
+GCC with -fmpx:
+ * enables annotation of code with MPX instructions and prefixes
+ * inserts code early in the application to call in to the "gcc runtime"
+GCC MPX Runtime:
+ * Checks for hardware MPX support in cpuid leaf
+ * allocates virtual space for the bounds directory (malloc() essentially)
+ * points the hardware BNDCFGU register at the directory
+ * calls a new prctl(PR_MPX_ENABLE_MANAGEMENT) to notify the kernel to
+ start managing the bounds directories
+Kernel MPX Code:
+ * Checks for hardware MPX support in cpuid leaf
+ * Handles #BR exceptions and sends SIGSEGV to the app when it violates
+ bounds, like during a buffer overflow.
+ * When bounds are spilled in to an unallocated bounds table, the kernel
+ notices in the #BR exception, allocates the virtual space, then
+ updates the bounds directory to point to the new table. It keeps
+ special track of the memory with a VM_MPX flag.
+ * Frees unused bounds tables at the time that the memory they described
+ is unmapped.
+
+
+3. How does MPX kernel code work
+================================
+
+Handling #BR faults caused by MPX
+---------------------------------
+
+When MPX is enabled, there are 2 new situations that can generate
+#BR faults.
+ * new bounds tables (BT) need to be allocated to save bounds.
+ * bounds violation caused by MPX instructions.
+
+We hook #BR handler to handle these two new situations.
+
+On-demand kernel allocation of bounds tables
+--------------------------------------------
+
+MPX only has 4 hardware registers for storing bounds information. If
+MPX-enabled code needs more than these 4 registers, it needs to spill
+them somewhere. It has two special instructions for this which allow
+the bounds to be moved between the bounds registers and some new "bounds
+tables".
+
+#BR exceptions are a new class of exceptions just for MPX. They are
+similar conceptually to a page fault and will be raised by the MPX
+hardware during both bounds violations or when the tables are not
+present. The kernel handles those #BR exceptions for not-present tables
+by carving the space out of the normal processes address space and then
+pointing the bounds-directory over to it.
+
+The tables need to be accessed and controlled by userspace because
+the instructions for moving bounds in and out of them are extremely
+frequent. They potentially happen every time a register points to
+memory. Any direct kernel involvement (like a syscall) to access the
+tables would obviously destroy performance.
+
+Why not do this in userspace? MPX does not strictly require anything in
+the kernel. It can theoretically be done completely from userspace. Here
+are a few ways this could be done. We don't think any of them are practical
+in the real-world, but here they are.
+
+Q: Can virtual space simply be reserved for the bounds tables so that we
+ never have to allocate them?
+A: MPX-enabled application will possibly create a lot of bounds tables in
+ process address space to save bounds information. These tables can take
+ up huge swaths of memory (as much as 80% of the memory on the system)
+ even if we clean them up aggressively. In the worst-case scenario, the
+ tables can be 4x the size of the data structure being tracked. IOW, a
+ 1-page structure can require 4 bounds-table pages. An X-GB virtual
+ area needs 4*X GB of virtual space, plus 2GB for the bounds directory.
+ If we were to preallocate them for the 128TB of user virtual address
+ space, we would need to reserve 512TB+2GB, which is larger than the
+ entire virtual address space today. This means they can not be reserved
+ ahead of time. Also, a single process's pre-popualated bounds directory
+ consumes 2GB of virtual *AND* physical memory. IOW, it's completely
+ infeasible to prepopulate bounds directories.
+
+Q: Can we preallocate bounds table space at the same time memory is
+ allocated which might contain pointers that might eventually need
+ bounds tables?
+A: This would work if we could hook the site of each and every memory
+ allocation syscall. This can be done for small, constrained applications.
+ But, it isn't practical at a larger scale since a given app has no
+ way of controlling how all the parts of the app might allocate memory
+ (think libraries). The kernel is really the only place to intercept
+ these calls.
+
+Q: Could a bounds fault be handed to userspace and the tables allocated
+ there in a signal handler intead of in the kernel?
+A: mmap() is not on the list of safe async handler functions and even
+ if mmap() would work it still requires locking or nasty tricks to
+ keep track of the allocation state there.
+
+Having ruled out all of the userspace-only approaches for managing
+bounds tables that we could think of, we create them on demand in
+the kernel.
+
+Decoding MPX instructions
+-------------------------
+
+If a #BR is generated due to a bounds violation caused by MPX.
+We need to decode MPX instructions to get violation address and
+set this address into extended struct siginfo.
+
+The _sigfault feild of struct siginfo is extended as follow:
+
+87 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+88 struct {
+89 void __user *_addr; /* faulting insn/memory ref. */
+90 #ifdef __ARCH_SI_TRAPNO
+91 int _trapno; /* TRAP # which caused the signal */
+92 #endif
+93 short _addr_lsb; /* LSB of the reported address */
+94 struct {
+95 void __user *_lower;
+96 void __user *_upper;
+97 } _addr_bnd;
+98 } _sigfault;
+
+The '_addr' field refers to violation address, and new '_addr_and'
+field refers to the upper/lower bounds when a #BR is caused.
+
+Glibc will be also updated to support this new siginfo. So user
+can get violation address and bounds when bounds violations occur.
+
+Cleanup unused bounds tables
+----------------------------
+
+When a BNDSTX instruction attempts to save bounds to a bounds directory
+entry marked as invalid, a #BR is generated. This is an indication that
+no bounds table exists for this entry. In this case the fault handler
+will allocate a new bounds table on demand.
+
+Since the kernel allocated those tables on-demand without userspace
+knowledge, it is also responsible for freeing them when the associated
+mappings go away.
+
+Here, the solution for this issue is to hook do_munmap() to check
+whether one process is MPX enabled. If yes, those bounds tables covered
+in the virtual address region which is being unmapped will be freed also.
+
+Adding new prctl commands
+-------------------------
+
+Two new prctl commands are added to enable and disable MPX bounds tables
+management in kernel.
+
+155 #define PR_MPX_ENABLE_MANAGEMENT 43
+156 #define PR_MPX_DISABLE_MANAGEMENT 44
+
+Runtime library in userspace is responsible for allocation of bounds
+directory. So kernel have to use XSAVE instruction to get the base
+of bounds directory from BNDCFG register.
+
+But XSAVE is expected to be very expensive. In order to do performance
+optimization, we have to get the base of bounds directory and save it
+into struct mm_struct to be used in future during PR_MPX_ENABLE_MANAGEMENT
+command execution.
+
+
+4. Special rules
+================
+
+1) If userspace is requesting help from the kernel to do the management
+of bounds tables, it may not create or modify entries in the bounds directory.
+
+Certainly users can allocate bounds tables and forcibly point the bounds
+directory at them through XSAVE instruction, and then set valid bit
+of bounds entry to have this entry valid. But, the kernel will decline
+to assist in managing these tables.
+
+2) Userspace may not take multiple bounds directory entries and point
+them at the same bounds table.
+
+This is allowed architecturally. See more information "Intel(R) Architecture
+Instruction Set Extensions Programming Reference" (9.3.4).
+
+However, if users did this, the kernel might be fooled in to unmaping an
+in-use bounds table since it does not recognize sharing.
diff --git a/Documentation/x86/mtrr.txt b/Documentation/x86/mtrr.txt
new file mode 100644
index 000000000..cc071dc33
--- /dev/null
+++ b/Documentation/x86/mtrr.txt
@@ -0,0 +1,305 @@
+MTRR (Memory Type Range Register) control
+3 Jun 1999
+Richard Gooch
+<rgooch@atnf.csiro.au>
+
+ On Intel P6 family processors (Pentium Pro, Pentium II and later)
+ the Memory Type Range Registers (MTRRs) may be used to control
+ processor access to memory ranges. This is most useful when you have
+ a video (VGA) card on a PCI or AGP bus. Enabling write-combining
+ allows bus write transfers to be combined into a larger transfer
+ before bursting over the PCI/AGP bus. This can increase performance
+ of image write operations 2.5 times or more.
+
+ The Cyrix 6x86, 6x86MX and M II processors have Address Range
+ Registers (ARRs) which provide a similar functionality to MTRRs. For
+ these, the ARRs are used to emulate the MTRRs.
+
+ The AMD K6-2 (stepping 8 and above) and K6-3 processors have two
+ MTRRs. These are supported. The AMD Athlon family provide 8 Intel
+ style MTRRs.
+
+ The Centaur C6 (WinChip) has 8 MCRs, allowing write-combining. These
+ are supported.
+
+ The VIA Cyrix III and VIA C3 CPUs offer 8 Intel style MTRRs.
+
+ The CONFIG_MTRR option creates a /proc/mtrr file which may be used
+ to manipulate your MTRRs. Typically the X server should use
+ this. This should have a reasonably generic interface so that
+ similar control registers on other processors can be easily
+ supported.
+
+
+There are two interfaces to /proc/mtrr: one is an ASCII interface
+which allows you to read and write. The other is an ioctl()
+interface. The ASCII interface is meant for administration. The
+ioctl() interface is meant for C programs (i.e. the X server). The
+interfaces are described below, with sample commands and C code.
+
+===============================================================================
+Reading MTRRs from the shell:
+
+% cat /proc/mtrr
+reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+===============================================================================
+Creating MTRRs from the C-shell:
+# echo "base=0xf8000000 size=0x400000 type=write-combining" >! /proc/mtrr
+or if you use bash:
+# echo "base=0xf8000000 size=0x400000 type=write-combining" >| /proc/mtrr
+
+And the result thereof:
+% cat /proc/mtrr
+reg00: base=0x00000000 ( 0MB), size= 128MB: write-back, count=1
+reg01: base=0x08000000 ( 128MB), size= 64MB: write-back, count=1
+reg02: base=0xf8000000 (3968MB), size= 4MB: write-combining, count=1
+
+This is for video RAM at base address 0xf8000000 and size 4 megabytes. To
+find out your base address, you need to look at the output of your X
+server, which tells you where the linear framebuffer address is. A
+typical line that you may get is:
+
+(--) S3: PCI: 968 rev 0, Linear FB @ 0xf8000000
+
+Note that you should only use the value from the X server, as it may
+move the framebuffer base address, so the only value you can trust is
+that reported by the X server.
+
+To find out the size of your framebuffer (what, you don't actually
+know?), the following line will tell you:
+
+(--) S3: videoram: 4096k
+
+That's 4 megabytes, which is 0x400000 bytes (in hexadecimal).
+A patch is being written for XFree86 which will make this automatic:
+in other words the X server will manipulate /proc/mtrr using the
+ioctl() interface, so users won't have to do anything. If you use a
+commercial X server, lobby your vendor to add support for MTRRs.
+===============================================================================
+Creating overlapping MTRRs:
+
+%echo "base=0xfb000000 size=0x1000000 type=write-combining" >/proc/mtrr
+%echo "base=0xfb000000 size=0x1000 type=uncachable" >/proc/mtrr
+
+And the results: cat /proc/mtrr
+reg00: base=0x00000000 ( 0MB), size= 64MB: write-back, count=1
+reg01: base=0xfb000000 (4016MB), size= 16MB: write-combining, count=1
+reg02: base=0xfb000000 (4016MB), size= 4kB: uncachable, count=1
+
+Some cards (especially Voodoo Graphics boards) need this 4 kB area
+excluded from the beginning of the region because it is used for
+registers.
+
+NOTE: You can only create type=uncachable region, if the first
+region that you created is type=write-combining.
+===============================================================================
+Removing MTRRs from the C-shell:
+% echo "disable=2" >! /proc/mtrr
+or using bash:
+% echo "disable=2" >| /proc/mtrr
+===============================================================================
+Reading MTRRs from a C program using ioctl()'s:
+
+/* mtrr-show.c
+
+ Source file for mtrr-show (example program to show MTRRs using ioctl()'s)
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+*/
+
+/*
+ This program will use an ioctl() on /proc/mtrr to show the current MTRR
+ settings. This is an alternative to reading /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <asm/mtrr.h>
+
+#define TRUE 1
+#define FALSE 0
+#define ERRSTRING strerror (errno)
+
+static char *mtrr_strings[MTRR_NUM_TYPES] =
+{
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+};
+
+int main ()
+{
+ int fd;
+ struct mtrr_gentry gentry;
+
+ if ( ( fd = open ("/proc/mtrr", O_RDONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (1);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (2);
+ }
+ for (gentry.regnum = 0; ioctl (fd, MTRRIOC_GET_ENTRY, &gentry) == 0;
+ ++gentry.regnum)
+ {
+ if (gentry.size < 1)
+ {
+ fprintf (stderr, "Register: %u disabled\n", gentry.regnum);
+ continue;
+ }
+ fprintf (stderr, "Register: %u base: 0x%lx size: 0x%lx type: %s\n",
+ gentry.regnum, gentry.base, gentry.size,
+ mtrr_strings[gentry.type]);
+ }
+ if (errno == EINVAL) exit (0);
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (3);
+} /* End Function main */
+===============================================================================
+Creating MTRRs from a C programme using ioctl()'s:
+
+/* mtrr-add.c
+
+ Source file for mtrr-add (example programme to add an MTRRs using ioctl())
+
+ Copyright (C) 1997-1998 Richard Gooch
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Richard Gooch may be reached by email at rgooch@atnf.csiro.au
+ The postal address is:
+ Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
+*/
+
+/*
+ This programme will use an ioctl() on /proc/mtrr to add an entry. The first
+ available mtrr is used. This is an alternative to writing /proc/mtrr.
+
+
+ Written by Richard Gooch 17-DEC-1997
+
+ Last updated by Richard Gooch 2-MAY-1998
+
+
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <asm/mtrr.h>
+
+#define TRUE 1
+#define FALSE 0
+#define ERRSTRING strerror (errno)
+
+static char *mtrr_strings[MTRR_NUM_TYPES] =
+{
+ "uncachable", /* 0 */
+ "write-combining", /* 1 */
+ "?", /* 2 */
+ "?", /* 3 */
+ "write-through", /* 4 */
+ "write-protect", /* 5 */
+ "write-back", /* 6 */
+};
+
+int main (int argc, char **argv)
+{
+ int fd;
+ struct mtrr_sentry sentry;
+
+ if (argc != 4)
+ {
+ fprintf (stderr, "Usage:\tmtrr-add base size type\n");
+ exit (1);
+ }
+ sentry.base = strtoul (argv[1], NULL, 0);
+ sentry.size = strtoul (argv[2], NULL, 0);
+ for (sentry.type = 0; sentry.type < MTRR_NUM_TYPES; ++sentry.type)
+ {
+ if (strcmp (argv[3], mtrr_strings[sentry.type]) == 0) break;
+ }
+ if (sentry.type >= MTRR_NUM_TYPES)
+ {
+ fprintf (stderr, "Illegal type: \"%s\"\n", argv[3]);
+ exit (2);
+ }
+ if ( ( fd = open ("/proc/mtrr", O_WRONLY, 0) ) == -1 )
+ {
+ if (errno == ENOENT)
+ {
+ fputs ("/proc/mtrr not found: not supported or you don't have a PPro?\n",
+ stderr);
+ exit (3);
+ }
+ fprintf (stderr, "Error opening /proc/mtrr\t%s\n", ERRSTRING);
+ exit (4);
+ }
+ if (ioctl (fd, MTRRIOC_ADD_ENTRY, &sentry) == -1)
+ {
+ fprintf (stderr, "Error doing ioctl(2) on /dev/mtrr\t%s\n", ERRSTRING);
+ exit (5);
+ }
+ fprintf (stderr, "Sleeping for 5 seconds so you can see the new entry\n");
+ sleep (5);
+ close (fd);
+ fputs ("I've just closed /proc/mtrr so now the new entry should be gone\n",
+ stderr);
+} /* End Function main */
+===============================================================================
diff --git a/Documentation/x86/pat.txt b/Documentation/x86/pat.txt
new file mode 100644
index 000000000..cf08c9fff
--- /dev/null
+++ b/Documentation/x86/pat.txt
@@ -0,0 +1,160 @@
+
+PAT (Page Attribute Table)
+
+x86 Page Attribute Table (PAT) allows for setting the memory attribute at the
+page level granularity. PAT is complementary to the MTRR settings which allows
+for setting of memory types over physical address ranges. However, PAT is
+more flexible than MTRR due to its capability to set attributes at page level
+and also due to the fact that there are no hardware limitations on number of
+such attribute settings allowed. Added flexibility comes with guidelines for
+not having memory type aliasing for the same physical memory with multiple
+virtual addresses.
+
+PAT allows for different types of memory attributes. The most commonly used
+ones that will be supported at this time are Write-back, Uncached,
+Write-combined and Uncached Minus.
+
+
+PAT APIs
+--------
+
+There are many different APIs in the kernel that allows setting of memory
+attributes at the page level. In order to avoid aliasing, these interfaces
+should be used thoughtfully. Below is a table of interfaces available,
+their intended usage and their memory attribute relationships. Internally,
+these APIs use a reserve_memtype()/free_memtype() interface on the physical
+address range to avoid any aliasing.
+
+
+-------------------------------------------------------------------
+API | RAM | ACPI,... | Reserved/Holes |
+-----------------------|----------|------------|------------------|
+ | | | |
+ioremap | -- | UC- | UC- |
+ | | | |
+ioremap_cache | -- | WB | WB |
+ | | | |
+ioremap_nocache | -- | UC- | UC- |
+ | | | |
+ioremap_wc | -- | -- | WC |
+ | | | |
+set_memory_uc | UC- | -- | -- |
+ set_memory_wb | | | |
+ | | | |
+set_memory_wc | WC | -- | -- |
+ set_memory_wb | | | |
+ | | | |
+pci sysfs resource | -- | -- | UC- |
+ | | | |
+pci sysfs resource_wc | -- | -- | WC |
+ is IORESOURCE_PREFETCH| | | |
+ | | | |
+pci proc | -- | -- | UC- |
+ !PCIIOC_WRITE_COMBINE | | | |
+ | | | |
+pci proc | -- | -- | WC |
+ PCIIOC_WRITE_COMBINE | | | |
+ | | | |
+/dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+ read-write | | | |
+ | | | |
+/dev/mem | -- | UC- | UC- |
+ mmap SYNC flag | | | |
+ | | | |
+/dev/mem | -- | WB/WC/UC- | WB/WC/UC- |
+ mmap !SYNC flag | |(from exist-| (from exist- |
+ and | | ing alias)| ing alias) |
+ any alias to this area| | | |
+ | | | |
+/dev/mem | -- | WB | WB |
+ mmap !SYNC flag | | | |
+ no alias to this area | | | |
+ and | | | |
+ MTRR says WB | | | |
+ | | | |
+/dev/mem | -- | -- | UC- |
+ mmap !SYNC flag | | | |
+ no alias to this area | | | |
+ and | | | |
+ MTRR says !WB | | | |
+ | | | |
+-------------------------------------------------------------------
+
+Advanced APIs for drivers
+-------------------------
+A. Exporting pages to users with remap_pfn_range, io_remap_pfn_range,
+vm_insert_pfn
+
+Drivers wanting to export some pages to userspace do it by using mmap
+interface and a combination of
+1) pgprot_noncached()
+2) io_remap_pfn_range() or remap_pfn_range() or vm_insert_pfn()
+
+With PAT support, a new API pgprot_writecombine is being added. So, drivers can
+continue to use the above sequence, with either pgprot_noncached() or
+pgprot_writecombine() in step 1, followed by step 2.
+
+In addition, step 2 internally tracks the region as UC or WC in memtype
+list in order to ensure no conflicting mapping.
+
+Note that this set of APIs only works with IO (non RAM) regions. If driver
+wants to export a RAM region, it has to do set_memory_uc() or set_memory_wc()
+as step 0 above and also track the usage of those pages and use set_memory_wb()
+before the page is freed to free pool.
+
+
+
+Notes:
+
+-- in the above table mean "Not suggested usage for the API". Some of the --'s
+are strictly enforced by the kernel. Some others are not really enforced
+today, but may be enforced in future.
+
+For ioremap and pci access through /sys or /proc - The actual type returned
+can be more restrictive, in case of any existing aliasing for that address.
+For example: If there is an existing uncached mapping, a new ioremap_wc can
+return uncached mapping in place of write-combine requested.
+
+set_memory_[uc|wc] and set_memory_wb should be used in pairs, where driver will
+first make a region uc or wc and switch it back to wb after use.
+
+Over time writes to /proc/mtrr will be deprecated in favor of using PAT based
+interfaces. Users writing to /proc/mtrr are suggested to use above interfaces.
+
+Drivers should use ioremap_[uc|wc] to access PCI BARs with [uc|wc] access
+types.
+
+Drivers should use set_memory_[uc|wc] to set access type for RAM ranges.
+
+
+PAT debugging
+-------------
+
+With CONFIG_DEBUG_FS enabled, PAT memtype list can be examined by
+
+# mount -t debugfs debugfs /sys/kernel/debug
+# cat /sys/kernel/debug/x86/pat_memtype_list
+PAT memtype list:
+uncached-minus @ 0x7fadf000-0x7fae0000
+uncached-minus @ 0x7fb19000-0x7fb1a000
+uncached-minus @ 0x7fb1a000-0x7fb1b000
+uncached-minus @ 0x7fb1b000-0x7fb1c000
+uncached-minus @ 0x7fb1c000-0x7fb1d000
+uncached-minus @ 0x7fb1d000-0x7fb1e000
+uncached-minus @ 0x7fb1e000-0x7fb25000
+uncached-minus @ 0x7fb25000-0x7fb26000
+uncached-minus @ 0x7fb26000-0x7fb27000
+uncached-minus @ 0x7fb27000-0x7fb28000
+uncached-minus @ 0x7fb28000-0x7fb2e000
+uncached-minus @ 0x7fb2e000-0x7fb2f000
+uncached-minus @ 0x7fb2f000-0x7fb30000
+uncached-minus @ 0x7fb31000-0x7fb32000
+uncached-minus @ 0x80000000-0x90000000
+
+This list shows physical address ranges and various PAT settings used to
+access those physical address ranges.
+
+Another, more verbose way of getting PAT related debug messages is with
+"debugpat" boot parameter. With this parameter, various debug messages are
+printed to dmesg log.
+
diff --git a/Documentation/x86/tlb.txt b/Documentation/x86/tlb.txt
new file mode 100644
index 000000000..39d172326
--- /dev/null
+++ b/Documentation/x86/tlb.txt
@@ -0,0 +1,75 @@
+When the kernel unmaps or modified the attributes of a range of
+memory, it has two choices:
+ 1. Flush the entire TLB with a two-instruction sequence. This is
+ a quick operation, but it causes collateral damage: TLB entries
+ from areas other than the one we are trying to flush will be
+ destroyed and must be refilled later, at some cost.
+ 2. Use the invlpg instruction to invalidate a single page at a
+ time. This could potentialy cost many more instructions, but
+ it is a much more precise operation, causing no collateral
+ damage to other TLB entries.
+
+Which method to do depends on a few things:
+ 1. The size of the flush being performed. A flush of the entire
+ address space is obviously better performed by flushing the
+ entire TLB than doing 2^48/PAGE_SIZE individual flushes.
+ 2. The contents of the TLB. If the TLB is empty, then there will
+ be no collateral damage caused by doing the global flush, and
+ all of the individual flush will have ended up being wasted
+ work.
+ 3. The size of the TLB. The larger the TLB, the more collateral
+ damage we do with a full flush. So, the larger the TLB, the
+ more attrative an individual flush looks. Data and
+ instructions have separate TLBs, as do different page sizes.
+ 4. The microarchitecture. The TLB has become a multi-level
+ cache on modern CPUs, and the global flushes have become more
+ expensive relative to single-page flushes.
+
+There is obviously no way the kernel can know all these things,
+especially the contents of the TLB during a given flush. The
+sizes of the flush will vary greatly depending on the workload as
+well. There is essentially no "right" point to choose.
+
+You may be doing too many individual invalidations if you see the
+invlpg instruction (or instructions _near_ it) show up high in
+profiles. If you believe that individual invalidations being
+called too often, you can lower the tunable:
+
+ /sys/kernel/debug/x86/tlb_single_page_flush_ceiling
+
+This will cause us to do the global flush for more cases.
+Lowering it to 0 will disable the use of the individual flushes.
+Setting it to 1 is a very conservative setting and it should
+never need to be 0 under normal circumstances.
+
+Despite the fact that a single individual flush on x86 is
+guaranteed to flush a full 2MB [1], hugetlbfs always uses the full
+flushes. THP is treated exactly the same as normal memory.
+
+You might see invlpg inside of flush_tlb_mm_range() show up in
+profiles, or you can use the trace_tlb_flush() tracepoints. to
+determine how long the flush operations are taking.
+
+Essentially, you are balancing the cycles you spend doing invlpg
+with the cycles that you spend refilling the TLB later.
+
+You can measure how expensive TLB refills are by using
+performance counters and 'perf stat', like this:
+
+perf stat -e
+ cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
+ cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
+ cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
+ cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
+ cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
+ cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/
+
+That works on an IvyBridge-era CPU (i5-3320M). Different CPUs
+may have differently-named counters, but they should at least
+be there in some form. You can use pmu-tools 'ocperf list'
+(https://github.com/andikleen/pmu-tools) to find the right
+counters for a given CPU.
+
+1. A footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
+ says: "One execution of INVLPG is sufficient even for a page
+ with size greater than 4 KBytes."
diff --git a/Documentation/x86/usb-legacy-support.txt b/Documentation/x86/usb-legacy-support.txt
new file mode 100644
index 000000000..1894cdfc6
--- /dev/null
+++ b/Documentation/x86/usb-legacy-support.txt
@@ -0,0 +1,44 @@
+USB Legacy support
+~~~~~~~~~~~~~~~~~~
+
+Vojtech Pavlik <vojtech@suse.cz>, January 2004
+
+
+Also known as "USB Keyboard" or "USB Mouse support" in the BIOS Setup is a
+feature that allows one to use the USB mouse and keyboard as if they were
+their classic PS/2 counterparts. This means one can use an USB keyboard to
+type in LILO for example.
+
+It has several drawbacks, though:
+
+1) On some machines, the emulated PS/2 mouse takes over even when no USB
+ mouse is present and a real PS/2 mouse is present. In that case the extra
+ features (wheel, extra buttons, touchpad mode) of the real PS/2 mouse may
+ not be available.
+
+2) If CONFIG_HIGHMEM64G is enabled, the PS/2 mouse emulation can cause
+ system crashes, because the SMM BIOS is not expecting to be in PAE mode.
+ The Intel E7505 is a typical machine where this happens.
+
+3) If AMD64 64-bit mode is enabled, again system crashes often happen,
+ because the SMM BIOS isn't expecting the CPU to be in 64-bit mode. The
+ BIOS manufacturers only test with Windows, and Windows doesn't do 64-bit
+ yet.
+
+Solutions:
+
+Problem 1) can be solved by loading the USB drivers prior to loading the
+PS/2 mouse driver. Since the PS/2 mouse driver is in 2.6 compiled into
+the kernel unconditionally, this means the USB drivers need to be
+compiled-in, too.
+
+Problem 2) can currently only be solved by either disabling HIGHMEM64G
+in the kernel config or USB Legacy support in the BIOS. A BIOS update
+could help, but so far no such update exists.
+
+Problem 3) is usually fixed by a BIOS update. Check the board
+manufacturers web site. If an update is not available, disable USB
+Legacy support in the BIOS. If this alone doesn't help, try also adding
+idle=poll on the kernel command line. The BIOS may be entering the SMM
+on the HLT instruction as well.
+
diff --git a/Documentation/x86/x86_64/00-INDEX b/Documentation/x86/x86_64/00-INDEX
new file mode 100644
index 000000000..92fc20ab5
--- /dev/null
+++ b/Documentation/x86/x86_64/00-INDEX
@@ -0,0 +1,16 @@
+00-INDEX
+ - This file
+boot-options.txt
+ - AMD64-specific boot options.
+cpu-hotplug-spec
+ - Firmware support for CPU hotplug under Linux/x86-64
+fake-numa-for-cpusets
+ - Using numa=fake and CPUSets for Resource Management
+kernel-stacks
+ - Context-specific per-processor interrupt stacks.
+machinecheck
+ - Configurable sysfs parameters for the x86-64 machine check code.
+mm.txt
+ - Memory layout of x86-64 (4 level page tables, 46 bits physical).
+uefi.txt
+ - Booting Linux via Unified Extensible Firmware Interface.
diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
new file mode 100644
index 000000000..522347929
--- /dev/null
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -0,0 +1,284 @@
+AMD64 specific boot options
+
+There are many others (usually documented in driver documentation), but
+only the AMD64 specific ones are listed here.
+
+Machine check
+
+ Please see Documentation/x86/x86_64/machinecheck for sysfs runtime tunables.
+
+ mce=off
+ Disable machine check
+ mce=no_cmci
+ Disable CMCI(Corrected Machine Check Interrupt) that
+ Intel processor supports. Usually this disablement is
+ not recommended, but it might be handy if your hardware
+ is misbehaving.
+ Note that you'll get more problems without CMCI than with
+ due to the shared banks, i.e. you might get duplicated
+ error logs.
+ mce=dont_log_ce
+ Don't make logs for corrected errors. All events reported
+ as corrected are silently cleared by OS.
+ This option will be useful if you have no interest in any
+ of corrected errors.
+ mce=ignore_ce
+ Disable features for corrected errors, e.g. polling timer
+ and CMCI. All events reported as corrected are not cleared
+ by OS and remained in its error banks.
+ Usually this disablement is not recommended, however if
+ there is an agent checking/clearing corrected errors
+ (e.g. BIOS or hardware monitoring applications), conflicting
+ with OS's error handling, and you cannot deactivate the agent,
+ then this option will be a help.
+ mce=bootlog
+ Enable logging of machine checks left over from booting.
+ Disabled by default on AMD because some BIOS leave bogus ones.
+ If your BIOS doesn't do that it's a good idea to enable though
+ to make sure you log even machine check events that result
+ in a reboot. On Intel systems it is enabled by default.
+ mce=nobootlog
+ Disable boot machine check logging.
+ mce=tolerancelevel[,monarchtimeout] (number,number)
+ tolerance levels:
+ 0: always panic on uncorrected errors, log corrected errors
+ 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ 2: SIGBUS or log uncorrected errors, log corrected errors
+ 3: never panic or SIGBUS, log all errors (for testing only)
+ Default is 1
+ Can be also set using sysfs which is preferable.
+ monarchtimeout:
+ Sets the time in us to wait for other CPUs on machine checks. 0
+ to disable.
+ mce=bios_cmci_threshold
+ Don't overwrite the bios-set CMCI threshold. This boot option
+ prevents Linux from overwriting the CMCI threshold set by the
+ bios. Without this option, Linux always sets the CMCI
+ threshold to 1. Enabling this may make memory predictive failure
+ analysis less effective if the bios sets thresholds for memory
+ errors since we will not see details for all errors.
+
+ nomce (for compatibility with i386): same as mce=off
+
+ Everything else is in sysfs now.
+
+APICs
+
+ apic Use IO-APIC. Default
+
+ noapic Don't use the IO-APIC.
+
+ disableapic Don't use the local APIC
+
+ nolapic Don't use the local APIC (alias for i386 compatibility)
+
+ pirq=... See Documentation/x86/i386/IO-APIC.txt
+
+ noapictimer Don't set up the APIC timer
+
+ no_timer_check Don't check the IO-APIC timer. This can work around
+ problems with incorrect timer initialization on some boards.
+ apicpmtimer
+ Do APIC timer calibration using the pmtimer. Implies
+ apicmaintimer. Useful when your PIT timer is totally
+ broken.
+
+Timing
+
+ notsc
+ Don't use the CPU time stamp counter to read the wall time.
+ This can be used to work around timing problems on multiprocessor systems
+ with not properly synchronized CPUs.
+
+ nohpet
+ Don't use the HPET timer.
+
+Idle loop
+
+ idle=poll
+ Don't do power saving in the idle loop using HLT, but poll for rescheduling
+ event. This will make the CPUs eat a lot more power, but may be useful
+ to get slightly better performance in multiprocessor benchmarks. It also
+ makes some profiling using performance counters more accurate.
+ Please note that on systems with MONITOR/MWAIT support (like Intel EM64T
+ CPUs) this option has no performance advantage over the normal idle loop.
+ It may also interact badly with hyperthreading.
+
+Rebooting
+
+ reboot=b[ios] | t[riple] | k[bd] | a[cpi] | e[fi] [, [w]arm | [c]old]
+ bios Use the CPU reboot vector for warm reset
+ warm Don't set the cold reboot flag
+ cold Set the cold reboot flag
+ triple Force a triple fault (init)
+ kbd Use the keyboard controller. cold reset (default)
+ acpi Use the ACPI RESET_REG in the FADT. If ACPI is not configured or the
+ ACPI reset does not work, the reboot path attempts the reset using
+ the keyboard controller.
+ efi Use efi reset_system runtime service. If EFI is not configured or the
+ EFI reset does not work, the reboot path attempts the reset using
+ the keyboard controller.
+
+ Using warm reset will be much faster especially on big memory
+ systems because the BIOS will not go through the memory check.
+ Disadvantage is that not all hardware will be completely reinitialized
+ on reboot so there may be boot problems on some systems.
+
+ reboot=force
+
+ Don't stop other CPUs on reboot. This can make reboot more reliable
+ in some cases.
+
+Non Executable Mappings
+
+ noexec=on|off
+
+ on Enable(default)
+ off Disable
+
+NUMA
+
+ numa=off Only set up a single NUMA node spanning all memory.
+
+ numa=noacpi Don't parse the SRAT table for NUMA setup
+
+ numa=fake=<size>[MG]
+ If given as a memory unit, fills all system RAM with nodes of
+ size interleaved over physical nodes.
+
+ numa=fake=<N>
+ If given as an integer, fills all system RAM with N fake nodes
+ interleaved over physical nodes.
+
+ACPI
+
+ acpi=off Don't enable ACPI
+ acpi=ht Use ACPI boot table parsing, but don't enable ACPI
+ interpreter
+ acpi=force Force ACPI on (currently not needed)
+
+ acpi=strict Disable out of spec ACPI workarounds.
+
+ acpi_sci={edge,level,high,low} Set up ACPI SCI interrupt.
+
+ acpi=noirq Don't route interrupts
+
+ acpi=nocmcff Disable firmware first mode for corrected errors. This
+ disables parsing the HEST CMC error source to check if
+ firmware has set the FF flag. This may result in
+ duplicate corrected error reports.
+
+PCI
+
+ pci=off Don't use PCI
+ pci=conf1 Use conf1 access.
+ pci=conf2 Use conf2 access.
+ pci=rom Assign ROMs.
+ pci=assign-busses Assign busses
+ pci=irqmask=MASK Set PCI interrupt mask to MASK
+ pci=lastbus=NUMBER Scan up to NUMBER busses, no matter what the mptable says.
+ pci=noacpi Don't use ACPI to set up PCI interrupt routing.
+
+IOMMU (input/output memory management unit)
+
+ Currently four x86-64 PCI-DMA mapping implementations exist:
+
+ 1. <arch/x86_64/kernel/pci-nommu.c>: use no hardware/software IOMMU at all
+ (e.g. because you have < 3 GB memory).
+ Kernel boot message: "PCI-DMA: Disabling IOMMU"
+
+ 2. <arch/x86/kernel/amd_gart_64.c>: AMD GART based hardware IOMMU.
+ Kernel boot message: "PCI-DMA: using GART IOMMU"
+
+ 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used
+ e.g. if there is no hardware IOMMU in the system and it is need because
+ you have >3GB memory or told the kernel to us it (iommu=soft))
+ Kernel boot message: "PCI-DMA: Using software bounce buffering
+ for IO (SWIOTLB)"
+
+ 4. <arch/x86_64/pci-calgary.c> : IBM Calgary hardware IOMMU. Used in IBM
+ pSeries and xSeries servers. This hardware IOMMU supports DMA address
+ mapping with memory protection, etc.
+ Kernel boot message: "PCI-DMA: Using Calgary IOMMU"
+
+ iommu=[<size>][,noagp][,off][,force][,noforce][,leak[=<nr_of_leak_pages>]
+ [,memaper[=<order>]][,merge][,forcesac][,fullflush][,nomerge]
+ [,noaperture][,calgary]
+
+ General iommu options:
+ off Don't initialize and use any kind of IOMMU.
+ noforce Don't force hardware IOMMU usage when it is not needed.
+ (default).
+ force Force the use of the hardware IOMMU even when it is
+ not actually needed (e.g. because < 3 GB memory).
+ soft Use software bounce buffering (SWIOTLB) (default for
+ Intel machines). This can be used to prevent the usage
+ of an available hardware IOMMU.
+
+ iommu options only relevant to the AMD GART hardware IOMMU:
+ <size> Set the size of the remapping area in bytes.
+ allowed Overwrite iommu off workarounds for specific chipsets.
+ fullflush Flush IOMMU on each allocation (default).
+ nofullflush Don't use IOMMU fullflush.
+ leak Turn on simple iommu leak tracing (only when
+ CONFIG_IOMMU_LEAK is on). Default number of leak pages
+ is 20.
+ memaper[=<order>] Allocate an own aperture over RAM with size 32MB<<order.
+ (default: order=1, i.e. 64MB)
+ merge Do scatter-gather (SG) merging. Implies "force"
+ (experimental).
+ nomerge Don't do scatter-gather (SG) merging.
+ noaperture Ask the IOMMU not to touch the aperture for AGP.
+ forcesac Force single-address cycle (SAC) mode for masks <40bits
+ (experimental).
+ noagp Don't initialize the AGP driver and use full aperture.
+ allowdac Allow double-address cycle (DAC) mode, i.e. DMA >4GB.
+ DAC is used with 32-bit PCI to push a 64-bit address in
+ two cycles. When off all DMA over >4GB is forced through
+ an IOMMU or software bounce buffering.
+ nodac Forbid DAC mode, i.e. DMA >4GB.
+ panic Always panic when IOMMU overflows.
+ calgary Use the Calgary IOMMU if it is available
+
+ iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU
+ implementation:
+ swiotlb=<pages>[,force]
+ <pages> Prereserve that many 128K pages for the software IO
+ bounce buffering.
+ force Force all IO through the software TLB.
+
+ Settings for the IBM Calgary hardware IOMMU currently found in IBM
+ pSeries and xSeries machines:
+
+ calgary=[64k,128k,256k,512k,1M,2M,4M,8M]
+ calgary=[translate_empty_slots]
+ calgary=[disable=<PCI bus number>]
+ panic Always panic when IOMMU overflows
+
+ 64k,...,8M - Set the size of each PCI slot's translation table
+ when using the Calgary IOMMU. This is the size of the translation
+ table itself in main memory. The smallest table, 64k, covers an IO
+ space of 32MB; the largest, 8MB table, can cover an IO space of
+ 4GB. Normally the kernel will make the right choice by itself.
+
+ translate_empty_slots - Enable translation even on slots that have
+ no devices attached to them, in case a device will be hotplugged
+ in the future.
+
+ disable=<PCI bus number> - Disable translation on a given PHB. For
+ example, the built-in graphics adapter resides on the first bridge
+ (PCI bus number 0); if translation (isolation) is enabled on this
+ bridge, X servers that access the hardware directly from user
+ space might stop working. Use this option if you have devices that
+ are accessed from userspace directly on some PCI host bridge.
+
+Debugging
+
+ kstack=N Print N words from the kernel stack in oops dumps.
+
+Miscellaneous
+
+ nogbpages
+ Do not use GB pages for kernel direct mappings.
+ gbpages
+ Use GB pages for kernel direct mappings.
diff --git a/Documentation/x86/x86_64/cpu-hotplug-spec b/Documentation/x86/x86_64/cpu-hotplug-spec
new file mode 100644
index 000000000..3c23e0587
--- /dev/null
+++ b/Documentation/x86/x86_64/cpu-hotplug-spec
@@ -0,0 +1,21 @@
+Firmware support for CPU hotplug under Linux/x86-64
+---------------------------------------------------
+
+Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to
+know in advance of boot time the maximum number of CPUs that could be plugged
+into the system. ACPI 3.0 currently has no official way to supply
+this information from the firmware to the operating system.
+
+In ACPI each CPU needs an LAPIC object in the MADT table (5.2.11.5 in the
+ACPI 3.0 specification). ACPI already has the concept of disabled LAPIC
+objects by setting the Enabled bit in the LAPIC object to zero.
+
+For CPU hotplug Linux/x86-64 expects now that any possible future hotpluggable
+CPU is already available in the MADT. If the CPU is not available yet
+it should have its LAPIC Enabled bit set to 0. Linux will use the number
+of disabled LAPICs to compute the maximum number of future CPUs.
+
+In the worst case the user can overwrite this choice using a command line
+option (additional_cpus=...), but it is recommended to supply the correct
+number (or a reasonable approximation of it, with erring towards more not less)
+in the MADT to avoid manual configuration.
diff --git a/Documentation/x86/x86_64/fake-numa-for-cpusets b/Documentation/x86/x86_64/fake-numa-for-cpusets
new file mode 100644
index 000000000..0f11d9bec
--- /dev/null
+++ b/Documentation/x86/x86_64/fake-numa-for-cpusets
@@ -0,0 +1,67 @@
+Using numa=fake and CPUSets for Resource Management
+Written by David Rientjes <rientjes@cs.washington.edu>
+
+This document describes how the numa=fake x86_64 command-line option can be used
+in conjunction with cpusets for coarse memory management. Using this feature,
+you can create fake NUMA nodes that represent contiguous chunks of memory and
+assign them to cpusets and their attached tasks. This is a way of limiting the
+amount of system memory that are available to a certain class of tasks.
+
+For more information on the features of cpusets, see
+Documentation/cgroups/cpusets.txt.
+There are a number of different configurations you can use for your needs. For
+more information on the numa=fake command line option and its various ways of
+configuring fake nodes, see Documentation/x86/x86_64/boot-options.txt.
+
+For the purposes of this introduction, we'll assume a very primitive NUMA
+emulation setup of "numa=fake=4*512,". This will split our system memory into
+four equal chunks of 512M each that we can now use to assign to cpusets. As
+you become more familiar with using this combination for resource control,
+you'll determine a better setup to minimize the number of nodes you have to deal
+with.
+
+A machine may be split as follows with "numa=fake=4*512," as reported by dmesg:
+
+ Faking node 0 at 0000000000000000-0000000020000000 (512MB)
+ Faking node 1 at 0000000020000000-0000000040000000 (512MB)
+ Faking node 2 at 0000000040000000-0000000060000000 (512MB)
+ Faking node 3 at 0000000060000000-0000000080000000 (512MB)
+ ...
+ On node 0 totalpages: 130975
+ On node 1 totalpages: 131072
+ On node 2 totalpages: 131072
+ On node 3 totalpages: 131072
+
+Now following the instructions for mounting the cpusets filesystem from
+Documentation/cgroups/cpusets.txt, you can assign fake nodes (i.e. contiguous memory
+address spaces) to individual cpusets:
+
+ [root@xroads /]# mkdir exampleset
+ [root@xroads /]# mount -t cpuset none exampleset
+ [root@xroads /]# mkdir exampleset/ddset
+ [root@xroads /]# cd exampleset/ddset
+ [root@xroads /exampleset/ddset]# echo 0-1 > cpus
+ [root@xroads /exampleset/ddset]# echo 0-1 > mems
+
+Now this cpuset, 'ddset', will only allowed access to fake nodes 0 and 1 for
+memory allocations (1G).
+
+You can now assign tasks to these cpusets to limit the memory resources
+available to them according to the fake nodes assigned as mems:
+
+ [root@xroads /exampleset/ddset]# echo $$ > tasks
+ [root@xroads /exampleset/ddset]# dd if=/dev/zero of=tmp bs=1024 count=1G
+ [1] 13425
+
+Notice the difference between the system memory usage as reported by
+/proc/meminfo between the restricted cpuset case above and the unrestricted
+case (i.e. running the same 'dd' command without assigning it to a fake NUMA
+cpuset):
+ Unrestricted Restricted
+ MemTotal: 3091900 kB 3091900 kB
+ MemFree: 42113 kB 1513236 kB
+
+This allows for coarse memory management for the tasks you assign to particular
+cpusets. Since cpusets can form a hierarchy, you can create some pretty
+interesting combinations of use-cases for various classes of tasks for your
+memory management needs.
diff --git a/Documentation/x86/x86_64/kernel-stacks b/Documentation/x86/x86_64/kernel-stacks
new file mode 100644
index 000000000..e3c8a49d1
--- /dev/null
+++ b/Documentation/x86/x86_64/kernel-stacks
@@ -0,0 +1,101 @@
+Most of the text from Keith Owens, hacked by AK
+
+x86_64 page size (PAGE_SIZE) is 4K.
+
+Like all other architectures, x86_64 has a kernel stack for every
+active thread. These thread stacks are THREAD_SIZE (2*PAGE_SIZE) big.
+These stacks contain useful data as long as a thread is alive or a
+zombie. While the thread is in user space the kernel stack is empty
+except for the thread_info structure at the bottom.
+
+In addition to the per thread stacks, there are specialized stacks
+associated with each CPU. These stacks are only used while the kernel
+is in control on that CPU; when a CPU returns to user space the
+specialized stacks contain no useful data. The main CPU stacks are:
+
+* Interrupt stack. IRQSTACKSIZE
+
+ Used for external hardware interrupts. If this is the first external
+ hardware interrupt (i.e. not a nested hardware interrupt) then the
+ kernel switches from the current task to the interrupt stack. Like
+ the split thread and interrupt stacks on i386, this gives more room
+ for kernel interrupt processing without having to increase the size
+ of every per thread stack.
+
+ The interrupt stack is also used when processing a softirq.
+
+Switching to the kernel interrupt stack is done by software based on a
+per CPU interrupt nest counter. This is needed because x86-64 "IST"
+hardware stacks cannot nest without races.
+
+x86_64 also has a feature which is not available on i386, the ability
+to automatically switch to a new stack for designated events such as
+double fault or NMI, which makes it easier to handle these unusual
+events on x86_64. This feature is called the Interrupt Stack Table
+(IST). There can be up to 7 IST entries per CPU. The IST code is an
+index into the Task State Segment (TSS). The IST entries in the TSS
+point to dedicated stacks; each stack can be a different size.
+
+An IST is selected by a non-zero value in the IST field of an
+interrupt-gate descriptor. When an interrupt occurs and the hardware
+loads such a descriptor, the hardware automatically sets the new stack
+pointer based on the IST value, then invokes the interrupt handler. If
+the interrupt came from user mode, then the interrupt handler prologue
+will switch back to the per-thread stack. If software wants to allow
+nested IST interrupts then the handler must adjust the IST values on
+entry to and exit from the interrupt handler. (This is occasionally
+done, e.g. for debug exceptions.)
+
+Events with different IST codes (i.e. with different stacks) can be
+nested. For example, a debug interrupt can safely be interrupted by an
+NMI. arch/x86_64/kernel/entry.S::paranoidentry adjusts the stack
+pointers on entry to and exit from all IST events, in theory allowing
+IST events with the same code to be nested. However in most cases, the
+stack size allocated to an IST assumes no nesting for the same code.
+If that assumption is ever broken then the stacks will become corrupt.
+
+The currently assigned IST stacks are :-
+
+* STACKFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 12 - Stack Fault Exception (#SS).
+
+ This allows the CPU to recover from invalid stack segments. Rarely
+ happens.
+
+* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 8 - Double Fault Exception (#DF).
+
+ Invoked when handling one exception causes another exception. Happens
+ when the kernel is very confused (e.g. kernel stack pointer corrupt).
+ Using a separate stack allows the kernel to recover from it well enough
+ in many cases to still output an oops.
+
+* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for non-maskable interrupts (NMI).
+
+ NMI can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for NMI events avoids making
+ assumptions about the previous state of the kernel stack.
+
+* DEBUG_STACK. DEBUG_STKSZ
+
+ Used for hardware debug interrupts (interrupt 1) and for software
+ debug interrupts (INT3).
+
+ When debugging a kernel, debug interrupts (both hardware and
+ software) can occur at any time. Using IST for these interrupts
+ avoids making assumptions about the previous state of the kernel
+ stack.
+
+* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
+
+ Used for interrupt 18 - Machine Check Exception (#MC).
+
+ MCE can be delivered at any time, including when the kernel is in the
+ middle of switching stacks. Using IST for MCE events avoids making
+ assumptions about the previous state of the kernel stack.
+
+For more details see the Intel IA32 or AMD AMD64 architecture manuals.
diff --git a/Documentation/x86/x86_64/machinecheck b/Documentation/x86/x86_64/machinecheck
new file mode 100644
index 000000000..b1fb30273
--- /dev/null
+++ b/Documentation/x86/x86_64/machinecheck
@@ -0,0 +1,83 @@
+
+Configurable sysfs parameters for the x86-64 machine check code.
+
+Machine checks report internal hardware error conditions detected
+by the CPU. Uncorrected errors typically cause a machine check
+(often with panic), corrected ones cause a machine check log entry.
+
+Machine checks are organized in banks (normally associated with
+a hardware subsystem) and subevents in a bank. The exact meaning
+of the banks and subevent is CPU specific.
+
+mcelog knows how to decode them.
+
+When you see the "Machine check errors logged" message in the system
+log then mcelog should run to collect and decode machine check entries
+from /dev/mcelog. Normally mcelog should be run regularly from a cronjob.
+
+Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN
+(N = CPU number)
+
+The directory contains some configurable entries:
+
+Entries:
+
+bankNctl
+(N bank number)
+ 64bit Hex bitmask enabling/disabling specific subevents for bank N
+ When a bit in the bitmask is zero then the respective
+ subevent will not be reported.
+ By default all events are enabled.
+ Note that BIOS maintain another mask to disable specific events
+ per bank. This is not visible here
+
+The following entries appear for each CPU, but they are truly shared
+between all CPUs.
+
+check_interval
+ How often to poll for corrected machine check errors, in seconds
+ (Note output is hexademical). Default 5 minutes. When the poller
+ finds MCEs it triggers an exponential speedup (poll more often) on
+ the polling interval. When the poller stops finding MCEs, it
+ triggers an exponential backoff (poll less often) on the polling
+ interval. The check_interval variable is both the initial and
+ maximum polling interval. 0 means no polling for corrected machine
+ check errors (but some corrected errors might be still reported
+ in other ways)
+
+tolerant
+ Tolerance level. When a machine check exception occurs for a non
+ corrected machine check the kernel can take different actions.
+ Since machine check exceptions can happen any time it is sometimes
+ risky for the kernel to kill a process because it defies
+ normal kernel locking rules. The tolerance level configures
+ how hard the kernel tries to recover even at some risk of
+ deadlock. Higher tolerant values trade potentially better uptime
+ with the risk of a crash or even corruption (for tolerant >= 3).
+
+ 0: always panic on uncorrected errors, log corrected errors
+ 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ 2: SIGBUS or log uncorrected errors, log corrected errors
+ 3: never panic or SIGBUS, log all errors (for testing only)
+
+ Default: 1
+
+ Note this only makes a difference if the CPU allows recovery
+ from a machine check exception. Current x86 CPUs generally do not.
+
+trigger
+ Program to run when a machine check event is detected.
+ This is an alternative to running mcelog regularly from cron
+ and allows to detect events faster.
+monarch_timeout
+ How long to wait for the other CPUs to machine check too on a
+ exception. 0 to disable waiting for other CPUs.
+ Unit: us
+
+TBD document entries for AMD threshold interrupt configuration
+
+For more details about the x86 machine check architecture
+see the Intel and AMD architecture manuals from their developer websites.
+
+For more details about the architecture see
+see http://one.firstfloor.org/~andi/mce.pdf
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
new file mode 100644
index 000000000..05712ac83
--- /dev/null
+++ b/Documentation/x86/x86_64/mm.txt
@@ -0,0 +1,42 @@
+
+<previous description obsolete, deleted>
+
+Virtual memory map with 4 level page tables:
+
+0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
+hole caused by [48:63] sign extension
+ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
+ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
+ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
+ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
+ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
+ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
+... unused hole ...
+ffffec0000000000 - fffffc0000000000 (=44 bits) kasan shadow memory (16TB)
+... unused hole ...
+ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+... unused hole ...
+ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
+ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
+ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
+ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+
+The direct mapping covers all memory in the system up to the highest
+memory address (this means in some cases it can also include PCI memory
+holes).
+
+vmalloc space is lazily synchronized into the different PML4 pages of
+the processes using the page fault handler, with init_level4_pgt as
+reference.
+
+Current X86-64 implementations only support 40 bits of address space,
+but we support up to 46 bits. This expands into MBZ space in the page tables.
+
+->trampoline_pgd:
+
+We map EFI runtime services in the aforementioned PGD in the virtual
+range of 64Gb (arbitrarily set, can be raised if needed)
+
+0xffffffef00000000 - 0xffffffff00000000
+
+-Andi Kleen, Jul 2004
diff --git a/Documentation/x86/x86_64/uefi.txt b/Documentation/x86/x86_64/uefi.txt
new file mode 100644
index 000000000..a5e2b4fdb
--- /dev/null
+++ b/Documentation/x86/x86_64/uefi.txt
@@ -0,0 +1,42 @@
+General note on [U]EFI x86_64 support
+-------------------------------------
+
+The nomenclature EFI and UEFI are used interchangeably in this document.
+
+Although the tools below are _not_ needed for building the kernel,
+the needed bootloader support and associated tools for x86_64 platforms
+with EFI firmware and specifications are listed below.
+
+1. UEFI specification: http://www.uefi.org
+
+2. Booting Linux kernel on UEFI x86_64 platform requires bootloader
+ support. Elilo with x86_64 support can be used.
+
+3. x86_64 platform with EFI/UEFI firmware.
+
+Mechanics:
+---------
+- Build the kernel with the following configuration.
+ CONFIG_FB_EFI=y
+ CONFIG_FRAMEBUFFER_CONSOLE=y
+ If EFI runtime services are expected, the following configuration should
+ be selected.
+ CONFIG_EFI=y
+ CONFIG_EFI_VARS=y or m # optional
+- Create a VFAT partition on the disk
+- Copy the following to the VFAT partition:
+ elilo bootloader with x86_64 support, elilo configuration file,
+ kernel image built in first step and corresponding
+ initrd. Instructions on building elilo and its dependencies
+ can be found in the elilo sourceforge project.
+- Boot to EFI shell and invoke elilo choosing the kernel image built
+ in first step.
+- If some or all EFI runtime services don't work, you can try following
+ kernel command line parameters to turn off some or all EFI runtime
+ services.
+ noefi turn off all EFI runtime services
+ reboot_type=k turn off EFI reboot runtime service
+- If the EFI memory map has additional entries not in the E820 map,
+ you can include those entries in the kernels memory map of available
+ physical RAM by using the following kernel command line parameter.
+ add_efi_memmap include EFI memory map of available physical RAM
diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt
new file mode 100644
index 000000000..82fbdbc1e
--- /dev/null
+++ b/Documentation/x86/zero-page.txt
@@ -0,0 +1,37 @@
+The additional fields in struct boot_params as a part of 32-bit boot
+protocol of kernel. These should be filled by bootloader or 16-bit
+real-mode setup code of the kernel. References/settings to it mainly
+are in:
+
+ arch/x86/include/uapi/asm/bootparam.h
+
+
+Offset Proto Name Meaning
+/Size
+
+000/040 ALL screen_info Text mode or frame buffer information
+ (struct screen_info)
+040/014 ALL apm_bios_info APM BIOS information (struct apm_bios_info)
+058/008 ALL tboot_addr Physical address of tboot shared page
+060/010 ALL ist_info Intel SpeedStep (IST) BIOS support information
+ (struct ist_info)
+080/010 ALL hd0_info hd0 disk parameter, OBSOLETE!!
+090/010 ALL hd1_info hd1 disk parameter, OBSOLETE!!
+0A0/010 ALL sys_desc_table System description table (struct sys_desc_table)
+0B0/010 ALL olpc_ofw_header OLPC's OpenFirmware CIF and friends
+0C0/004 ALL ext_ramdisk_image ramdisk_image high 32bits
+0C4/004 ALL ext_ramdisk_size ramdisk_size high 32bits
+0C8/004 ALL ext_cmd_line_ptr cmd_line_ptr high 32bits
+140/080 ALL edid_info Video mode setup (struct edid_info)
+1C0/020 ALL efi_info EFI 32 information (struct efi_info)
+1E0/004 ALL alk_mem_k Alternative mem check, in KB
+1E4/004 ALL scratch Scratch field for the kernel setup code
+1E8/001 ALL e820_entries Number of entries in e820_map (below)
+1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below)
+1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer
+ (below)
+1EF/001 ALL sentinel Used to detect broken bootloaders
+290/040 ALL edd_mbr_sig_buffer EDD MBR signatures
+2D0/A00 ALL e820_map E820 memory map table
+ (array of struct e820entry)
+D00/1EC ALL eddbuf EDD data (array of struct edd_info)
diff --git a/Documentation/xillybus.txt b/Documentation/xillybus.txt
new file mode 100644
index 000000000..81d111b4d
--- /dev/null
+++ b/Documentation/xillybus.txt
@@ -0,0 +1,380 @@
+
+ ==========================================
+ Xillybus driver for generic FPGA interface
+ ==========================================
+
+Author: Eli Billauer, Xillybus Ltd. (http://xillybus.com)
+Email: eli.billauer@gmail.com or as advertised on Xillybus' site.
+
+Contents:
+
+ - Introduction
+ -- Background
+ -- Xillybus Overview
+
+ - Usage
+ -- User interface
+ -- Synchronization
+ -- Seekable pipes
+
+- Internals
+ -- Source code organization
+ -- Pipe attributes
+ -- Host never reads from the FPGA
+ -- Channels, pipes, and the message channel
+ -- Data streaming
+ -- Data granularity
+ -- Probing
+ -- Buffer allocation
+ -- The "nonempty" message (supporting poll)
+
+
+INTRODUCTION
+============
+
+Background
+----------
+
+An FPGA (Field Programmable Gate Array) is a piece of logic hardware, which
+can be programmed to become virtually anything that is usually found as a
+dedicated chipset: For instance, a display adapter, network interface card,
+or even a processor with its peripherals. FPGAs are the LEGO of hardware:
+Based upon certain building blocks, you make your own toys the way you like
+them. It's usually pointless to reimplement something that is already
+available on the market as a chipset, so FPGAs are mostly used when some
+special functionality is needed, and the production volume is relatively low
+(hence not justifying the development of an ASIC).
+
+The challenge with FPGAs is that everything is implemented at a very low
+level, even lower than assembly language. In order to allow FPGA designers to
+focus on their specific project, and not reinvent the wheel over and over
+again, pre-designed building blocks, IP cores, are often used. These are the
+FPGA parallels of library functions. IP cores may implement certain
+mathematical functions, a functional unit (e.g. a USB interface), an entire
+processor (e.g. ARM) or anything that might come handy. Think of them as a
+building block, with electrical wires dangling on the sides for connection to
+other blocks.
+
+One of the daunting tasks in FPGA design is communicating with a fullblown
+operating system (actually, with the processor running it): Implementing the
+low-level bus protocol and the somewhat higher-level interface with the host
+(registers, interrupts, DMA etc.) is a project in itself. When the FPGA's
+function is a well-known one (e.g. a video adapter card, or a NIC), it can
+make sense to design the FPGA's interface logic specifically for the project.
+A special driver is then written to present the FPGA as a well-known interface
+to the kernel and/or user space. In that case, there is no reason to treat the
+FPGA differently than any device on the bus.
+
+It's however common that the desired data communication doesn't fit any well-
+known peripheral function. Also, the effort of designing an elegant
+abstraction for the data exchange is often considered too big. In those cases,
+a quicker and possibly less elegant solution is sought: The driver is
+effectively written as a user space program, leaving the kernel space part
+with just elementary data transport. This still requires designing some
+interface logic for the FPGA, and write a simple ad-hoc driver for the kernel.
+
+Xillybus Overview
+-----------------
+
+Xillybus is an IP core and a Linux driver. Together, they form a kit for
+elementary data transport between an FPGA and the host, providing pipe-like
+data streams with a straightforward user interface. It's intended as a low-
+effort solution for mixed FPGA-host projects, for which it makes sense to
+have the project-specific part of the driver running in a user-space program.
+
+Since the communication requirements may vary significantly from one FPGA
+project to another (the number of data pipes needed in each direction and
+their attributes), there isn't one specific chunk of logic being the Xillybus
+IP core. Rather, the IP core is configured and built based upon a
+specification given by its end user.
+
+Xillybus presents independent data streams, which resemble pipes or TCP/IP
+communication to the user. At the host side, a character device file is used
+just like any pipe file. On the FPGA side, hardware FIFOs are used to stream
+the data. This is contrary to a common method of communicating through fixed-
+sized buffers (even though such buffers are used by Xillybus under the hood).
+There may be more than a hundred of these streams on a single IP core, but
+also no more than one, depending on the configuration.
+
+In order to ease the deployment of the Xillybus IP core, it contains a simple
+data structure which completely defines the core's configuration. The Linux
+driver fetches this data structure during its initialization process, and sets
+up the DMA buffers and character devices accordingly. As a result, a single
+driver is used to work out of the box with any Xillybus IP core.
+
+The data structure just mentioned should not be confused with PCI's
+configuration space or the Flattened Device Tree.
+
+USAGE
+=====
+
+User interface
+--------------
+
+On the host, all interface with Xillybus is done through /dev/xillybus_*
+device files, which are generated automatically as the drivers loads. The
+names of these files depend on the IP core that is loaded in the FPGA (see
+Probing below). To communicate with the FPGA, open the device file that
+corresponds to the hardware FIFO you want to send data or receive data from,
+and use plain write() or read() calls, just like with a regular pipe. In
+particular, it makes perfect sense to go:
+
+$ cat mydata > /dev/xillybus_thisfifo
+
+$ cat /dev/xillybus_thatfifo > hisdata
+
+possibly pressing CTRL-C as some stage, even though the xillybus_* pipes have
+the capability to send an EOF (but may not use it).
+
+The driver and hardware are designed to behave sensibly as pipes, including:
+
+* Supporting non-blocking I/O (by setting O_NONBLOCK on open() ).
+
+* Supporting poll() and select().
+
+* Being bandwidth efficient under load (using DMA) but also handle small
+ pieces of data sent across (like TCP/IP) by autoflushing.
+
+A device file can be read only, write only or bidirectional. Bidirectional
+device files are treated like two independent pipes (except for sharing a
+"channel" structure in the implementation code).
+
+Synchronization
+---------------
+
+Xillybus pipes are configured (on the IP core) to be either synchronous or
+asynchronous. For a synchronous pipe, write() returns successfully only after
+some data has been submitted and acknowledged by the FPGA. This slows down
+bulk data transfers, and is nearly impossible for use with streams that
+require data at a constant rate: There is no data transmitted to the FPGA
+between write() calls, in particular when the process loses the CPU.
+
+When a pipe is configured asynchronous, write() returns if there was enough
+room in the buffers to store any of the data in the buffers.
+
+For FPGA to host pipes, asynchronous pipes allow data transfer from the FPGA
+as soon as the respective device file is opened, regardless of if the data
+has been requested by a read() call. On synchronous pipes, only the amount
+of data requested by a read() call is transmitted.
+
+In summary, for synchronous pipes, data between the host and FPGA is
+transmitted only to satisfy the read() or write() call currently handled
+by the driver, and those calls wait for the transmission to complete before
+returning.
+
+Note that the synchronization attribute has nothing to do with the possibility
+that read() or write() completes less bytes than requested. There is a
+separate configuration flag ("allowpartial") that determines whether such a
+partial completion is allowed.
+
+Seekable pipes
+--------------
+
+A synchronous pipe can be configured to have the stream's position exposed
+to the user logic at the FPGA. Such a pipe is also seekable on the host API.
+With this feature, a memory or register interface can be attached on the
+FPGA side to the seekable stream. Reading or writing to a certain address in
+the attached memory is done by seeking to the desired address, and calling
+read() or write() as required.
+
+
+INTERNALS
+=========
+
+Source code organization
+------------------------
+
+The Xillybus driver consists of a core module, xillybus_core.c, and modules
+that depend on the specific bus interface (xillybus_of.c and xillybus_pcie.c).
+
+The bus specific modules are those probed when a suitable device is found by
+the kernel. Since the DMA mapping and synchronization functions, which are bus
+dependent by their nature, are used by the core module, a
+xilly_endpoint_hardware structure is passed to the core module on
+initialization. This structure is populated with pointers to wrapper functions
+which execute the DMA-related operations on the bus.
+
+Pipe attributes
+---------------
+
+Each pipe has a number of attributes which are set when the FPGA component
+(IP core) is built. They are fetched from the IDT (the data structure which
+defines the core's configuration, see Probing below) by xilly_setupchannels()
+in xillybus_core.c as follows:
+
+* is_writebuf: The pipe's direction. A non-zero value means it's an FPGA to
+ host pipe (the FPGA "writes").
+
+* channelnum: The pipe's identification number in communication between the
+ host and FPGA.
+
+* format: The underlying data width. See Data Granularity below.
+
+* allowpartial: A non-zero value means that a read() or write() (whichever
+ applies) may return with less than the requested number of bytes. The common
+ choice is a non-zero value, to match standard UNIX behavior.
+
+* synchronous: A non-zero value means that the pipe is synchronous. See
+ Syncronization above.
+
+* bufsize: Each DMA buffer's size. Always a power of two.
+
+* bufnum: The number of buffers allocated for this pipe. Always a power of two.
+
+* exclusive_open: A non-zero value forces exclusive opening of the associated
+ device file. If the device file is bidirectional, and already opened only in
+ one direction, the opposite direction may be opened once.
+
+* seekable: A non-zero value indicates that the pipe is seekable. See
+ Seekable pipes above.
+
+* supports_nonempty: A non-zero value (which is typical) indicates that the
+ hardware will send the messages that are necessary to support select() and
+ poll() for this pipe.
+
+Host never reads from the FPGA
+------------------------------
+
+Even though PCI Express is hotpluggable in general, a typical motherboard
+doesn't expect a card to go away all of the sudden. But since the PCIe card
+is based upon reprogrammable logic, a sudden disappearance from the bus is
+quite likely as a result of an accidental reprogramming of the FPGA while the
+host is up. In practice, nothing happens immediately in such a situation. But
+if the host attempts to read from an address that is mapped to the PCI Express
+device, that leads to an immediate freeze of the system on some motherboards,
+even though the PCIe standard requires a graceful recovery.
+
+In order to avoid these freezes, the Xillybus driver refrains completely from
+reading from the device's register space. All communication from the FPGA to
+the host is done through DMA. In particular, the Interrupt Service Routine
+doesn't follow the common practice of checking a status register when it's
+invoked. Rather, the FPGA prepares a small buffer which contains short
+messages, which inform the host what the interrupt was about.
+
+This mechanism is used on non-PCIe buses as well for the sake of uniformity.
+
+
+Channels, pipes, and the message channel
+----------------------------------------
+
+Each of the (possibly bidirectional) pipes presented to the user is allocated
+a data channel between the FPGA and the host. The distinction between channels
+and pipes is necessary only because of channel 0, which is used for interrupt-
+related messages from the FPGA, and has no pipe attached to it.
+
+Data streaming
+--------------
+
+Even though a non-segmented data stream is presented to the user at both
+sides, the implementation relies on a set of DMA buffers which is allocated
+for each channel. For the sake of illustration, let's take the FPGA to host
+direction: As data streams into the respective channel's interface in the
+FPGA, the Xillybus IP core writes it to one of the DMA buffers. When the
+buffer is full, the FPGA informs the host about that (appending a
+XILLYMSG_OPCODE_RELEASEBUF message channel 0 and sending an interrupt if
+necessary). The host responds by making the data available for reading through
+the character device. When all data has been read, the host writes on the
+the FPGA's buffer control register, allowing the buffer's overwriting. Flow
+control mechanisms exist on both sides to prevent underflows and overflows.
+
+This is not good enough for creating a TCP/IP-like stream: If the data flow
+stops momentarily before a DMA buffer is filled, the intuitive expectation is
+that the partial data in buffer will arrive anyhow, despite the buffer not
+being completed. This is implemented by adding a field in the
+XILLYMSG_OPCODE_RELEASEBUF message, through which the FPGA informs not just
+which buffer is submitted, but how much data it contains.
+
+But the FPGA will submit a partially filled buffer only if directed to do so
+by the host. This situation occurs when the read() method has been blocking
+for XILLY_RX_TIMEOUT jiffies (currently 10 ms), after which the host commands
+the FPGA to submit a DMA buffer as soon as it can. This timeout mechanism
+balances between bus bandwidth efficiency (preventing a lot of partially
+filled buffers being sent) and a latency held fairly low for tails of data.
+
+A similar setting is used in the host to FPGA direction. The handling of
+partial DMA buffers is somewhat different, though. The user can tell the
+driver to submit all data it has in the buffers to the FPGA, by issuing a
+write() with the byte count set to zero. This is similar to a flush request,
+but it doesn't block. There is also an autoflushing mechanism, which triggers
+an equivalent flush roughly XILLY_RX_TIMEOUT jiffies after the last write().
+This allows the user to be oblivious about the underlying buffering mechanism
+and yet enjoy a stream-like interface.
+
+Note that the issue of partial buffer flushing is irrelevant for pipes having
+the "synchronous" attribute nonzero, since synchronous pipes don't allow data
+to lay around in the DMA buffers between read() and write() anyhow.
+
+Data granularity
+----------------
+
+The data arrives or is sent at the FPGA as 8, 16 or 32 bit wide words, as
+configured by the "format" attribute. Whenever possible, the driver attempts
+to hide this when the pipe is accessed differently from its natural alignment.
+For example, reading single bytes from a pipe with 32 bit granularity works
+with no issues. Writing single bytes to pipes with 16 or 32 bit granularity
+will also work, but the driver can't send partially completed words to the
+FPGA, so the transmission of up to one word may be held until it's fully
+occupied with user data.
+
+This somewhat complicates the handling of host to FPGA streams, because
+when a buffer is flushed, it may contain up to 3 bytes don't form a word in
+the FPGA, and hence can't be sent. To prevent loss of data, these leftover
+bytes need to be moved to the next buffer. The parts in xillybus_core.c
+that mention "leftovers" in some way are related to this complication.
+
+Probing
+-------
+
+As mentioned earlier, the number of pipes that are created when the driver
+loads and their attributes depend on the Xillybus IP core in the FPGA. During
+the driver's initialization, a blob containing configuration info, the
+Interface Description Table (IDT), is sent from the FPGA to the host. The
+bootstrap process is done in three phases:
+
+1. Acquire the length of the IDT, so a buffer can be allocated for it. This
+ is done by sending a quiesce command to the device, since the acknowledge
+ for this command contains the IDT's buffer length.
+
+2. Acquire the IDT itself.
+
+3. Create the interfaces according to the IDT.
+
+Buffer allocation
+-----------------
+
+In order to simplify the logic that prevents illegal boundary crossings of
+PCIe packets, the following rule applies: If a buffer is smaller than 4kB,
+it must not cross a 4kB boundary. Otherwise, it must be 4kB aligned. The
+xilly_setupchannels() functions allocates these buffers by requesting whole
+pages from the kernel, and diving them into DMA buffers as necessary. Since
+all buffers' sizes are powers of two, it's possible to pack any set of such
+buffers, with a maximal waste of one page of memory.
+
+All buffers are allocated when the driver is loaded. This is necessary,
+since large continuous physical memory segments are sometimes requested,
+which are more likely to be available when the system is freshly booted.
+
+The allocation of buffer memory takes place in the same order they appear in
+the IDT. The driver relies on a rule that the pipes are sorted with decreasing
+buffer size in the IDT. If a requested buffer is larger or equal to a page,
+the necessary number of pages is requested from the kernel, and these are
+used for this buffer. If the requested buffer is smaller than a page, one
+single page is requested from the kernel, and that page is partially used.
+Or, if there already is a partially used page at hand, the buffer is packed
+into that page. It can be shown that all pages requested from the kernel
+(except possibly for the last) are 100% utilized this way.
+
+The "nonempty" message (supporting poll)
+---------------------------------------
+
+In order to support the "poll" method (and hence select() ), there is a small
+catch regarding the FPGA to host direction: The FPGA may have filled a DMA
+buffer with some data, but not submitted that buffer. If the host waited for
+the buffer's submission by the FPGA, there would be a possibility that the
+FPGA side has sent data, but a select() call would still block, because the
+host has not received any notification about this. This is solved with
+XILLYMSG_OPCODE_NONEMPTY messages sent by the FPGA when a channel goes from
+completely empty to containing some data.
+
+These messages are used only to support poll() and select(). The IP core can
+be configured not to send them for a slight reduction of bandwidth.
diff --git a/Documentation/xtensa/atomctl.txt b/Documentation/xtensa/atomctl.txt
new file mode 100644
index 000000000..1da783ac2
--- /dev/null
+++ b/Documentation/xtensa/atomctl.txt
@@ -0,0 +1,44 @@
+We Have Atomic Operation Control (ATOMCTL) Register.
+This register determines the effect of using a S32C1I instruction
+with various combinations of:
+
+ 1. With and without an Coherent Cache Controller which
+ can do Atomic Transactions to the memory internally.
+
+ 2. With and without An Intelligent Memory Controller which
+ can do Atomic Transactions itself.
+
+The Core comes up with a default value of for the three types of cache ops:
+
+ 0x28: (WB: Internal, WT: Internal, BY:Exception)
+
+On the FPGA Cards we typically simulate an Intelligent Memory controller
+which can implement RCW transactions. For FPGA cards with an External
+Memory controller we let it to the atomic operations internally while
+doing a Cached (WB) transaction and use the Memory RCW for un-cached
+operations.
+
+For systems without an coherent cache controller, non-MX, we always
+use the memory controllers RCW, thought non-MX controlers likely
+support the Internal Operation.
+
+CUSTOMER-WARNING:
+ Virtually all customers buy their memory controllers from vendors that
+ don't support atomic RCW memory transactions and will likely want to
+ configure this register to not use RCW.
+
+Developers might find using RCW in Bypass mode convenient when testing
+with the cache being bypassed; for example studying cache alias problems.
+
+See Section 4.3.12.4 of ISA; Bits:
+
+ WB WT BY
+ 5 4 | 3 2 | 1 0
+ 2 Bit
+ Field
+ Values WB - Write Back WT - Write Thru BY - Bypass
+--------- --------------- ----------------- ----------------
+ 0 Exception Exception Exception
+ 1 RCW Transaction RCW Transaction RCW Transaction
+ 2 Internal Operation Internal Operation Reserved
+ 3 Reserved Reserved Reserved
diff --git a/Documentation/xtensa/mmu.txt b/Documentation/xtensa/mmu.txt
new file mode 100644
index 000000000..0312fe664
--- /dev/null
+++ b/Documentation/xtensa/mmu.txt
@@ -0,0 +1,64 @@
+MMUv3 initialization sequence.
+
+The code in the initialize_mmu macro sets up MMUv3 memory mapping
+identically to MMUv2 fixed memory mapping. Depending on
+CONFIG_INITIALIZE_XTENSA_MMU_INSIDE_VMLINUX symbol this code is
+located in one of the following address ranges:
+
+ 0xF0000000..0xFFFFFFFF (will keep same address in MMU v2 layout;
+ typically ROM)
+ 0x00000000..0x07FFFFFF (system RAM; this code is actually linked
+ at 0xD0000000..0xD7FFFFFF [cached]
+ or 0xD8000000..0xDFFFFFFF [uncached];
+ in any case, initially runs elsewhere
+ than linked, so have to be careful)
+
+The code has the following assumptions:
+ This code fragment is run only on an MMU v3.
+ TLBs are in their reset state.
+ ITLBCFG and DTLBCFG are zero (reset state).
+ RASID is 0x04030201 (reset state).
+ PS.RING is zero (reset state).
+ LITBASE is zero (reset state, PC-relative literals); required to be PIC.
+
+TLB setup proceeds along the following steps.
+
+ Legend:
+ VA = virtual address (two upper nibbles of it);
+ PA = physical address (two upper nibbles of it);
+ pc = physical range that contains this code;
+
+After step 2, we jump to virtual address in 0x40000000..0x5fffffff
+that corresponds to next instruction to execute in this code.
+After step 4, we jump to intended (linked) address of this code.
+
+ Step 0 Step1 Step 2 Step3 Step 4 Step5
+ ============ ===== ============ ===== ============ =====
+ VA PA PA VA PA PA VA PA PA
+ ------ -- -- ------ -- -- ------ -- --
+ E0..FF -> E0 -> E0 E0..FF -> E0 F0..FF -> F0 -> F0
+ C0..DF -> C0 -> C0 C0..DF -> C0 E0..EF -> F0 -> F0
+ A0..BF -> A0 -> A0 A0..BF -> A0 D8..DF -> 00 -> 00
+ 80..9F -> 80 -> 80 80..9F -> 80 D0..D7 -> 00 -> 00
+ 60..7F -> 60 -> 60 60..7F -> 60
+ 40..5F -> 40 40..5F -> pc -> pc 40..5F -> pc
+ 20..3F -> 20 -> 20 20..3F -> 20
+ 00..1F -> 00 -> 00 00..1F -> 00
+
+The default location of IO peripherals is above 0xf0000000. This may change
+using a "ranges" property in a device tree simple-bus node. See ePAPR 1.1, §6.5
+for details on the syntax and semantic of simple-bus nodes. The following
+limitations apply:
+
+1. Only top level simple-bus nodes are considered
+
+2. Only one (first) simple-bus node is considered
+
+3. Empty "ranges" properties are not supported
+
+4. Only the first triplet in the "ranges" property is considered
+
+5. The parent-bus-address value is rounded down to the nearest 256MB boundary
+
+6. The IO area covers the entire 256MB segment of parent-bus-address; the
+ "ranges" triplet length field is ignored
diff --git a/Documentation/xz.txt b/Documentation/xz.txt
new file mode 100644
index 000000000..2cf3e2608
--- /dev/null
+++ b/Documentation/xz.txt
@@ -0,0 +1,121 @@
+
+XZ data compression in Linux
+============================
+
+Introduction
+
+ XZ is a general purpose data compression format with high compression
+ ratio and relatively fast decompression. The primary compression
+ algorithm (filter) is LZMA2. Additional filters can be used to improve
+ compression ratio even further. E.g. Branch/Call/Jump (BCJ) filters
+ improve compression ratio of executable data.
+
+ The XZ decompressor in Linux is called XZ Embedded. It supports
+ the LZMA2 filter and optionally also BCJ filters. CRC32 is supported
+ for integrity checking. The home page of XZ Embedded is at
+ <http://tukaani.org/xz/embedded.html>, where you can find the
+ latest version and also information about using the code outside
+ the Linux kernel.
+
+ For userspace, XZ Utils provide a zlib-like compression library
+ and a gzip-like command line tool. XZ Utils can be downloaded from
+ <http://tukaani.org/xz/>.
+
+XZ related components in the kernel
+
+ The xz_dec module provides XZ decompressor with single-call (buffer
+ to buffer) and multi-call (stateful) APIs. The usage of the xz_dec
+ module is documented in include/linux/xz.h.
+
+ The xz_dec_test module is for testing xz_dec. xz_dec_test is not
+ useful unless you are hacking the XZ decompressor. xz_dec_test
+ allocates a char device major dynamically to which one can write
+ .xz files from userspace. The decompressed output is thrown away.
+ Keep an eye on dmesg to see diagnostics printed by xz_dec_test.
+ See the xz_dec_test source code for the details.
+
+ For decompressing the kernel image, initramfs, and initrd, there
+ is a wrapper function in lib/decompress_unxz.c. Its API is the
+ same as in other decompress_*.c files, which is defined in
+ include/linux/decompress/generic.h.
+
+ scripts/xz_wrap.sh is a wrapper for the xz command line tool found
+ from XZ Utils. The wrapper sets compression options to values suitable
+ for compressing the kernel image.
+
+ For kernel makefiles, two commands are provided for use with
+ $(call if_needed). The kernel image should be compressed with
+ $(call if_needed,xzkern) which will use a BCJ filter and a big LZMA2
+ dictionary. It will also append a four-byte trailer containing the
+ uncompressed size of the file, which is needed by the boot code.
+ Other things should be compressed with $(call if_needed,xzmisc)
+ which will use no BCJ filter and 1 MiB LZMA2 dictionary.
+
+Notes on compression options
+
+ Since the XZ Embedded supports only streams with no integrity check or
+ CRC32, make sure that you don't use some other integrity check type
+ when encoding files that are supposed to be decoded by the kernel. With
+ liblzma, you need to use either LZMA_CHECK_NONE or LZMA_CHECK_CRC32
+ when encoding. With the xz command line tool, use --check=none or
+ --check=crc32.
+
+ Using CRC32 is strongly recommended unless there is some other layer
+ which will verify the integrity of the uncompressed data anyway.
+ Double checking the integrity would probably be waste of CPU cycles.
+ Note that the headers will always have a CRC32 which will be validated
+ by the decoder; you can only change the integrity check type (or
+ disable it) for the actual uncompressed data.
+
+ In userspace, LZMA2 is typically used with dictionary sizes of several
+ megabytes. The decoder needs to have the dictionary in RAM, thus big
+ dictionaries cannot be used for files that are intended to be decoded
+ by the kernel. 1 MiB is probably the maximum reasonable dictionary
+ size for in-kernel use (maybe more is OK for initramfs). The presets
+ in XZ Utils may not be optimal when creating files for the kernel,
+ so don't hesitate to use custom settings. Example:
+
+ xz --check=crc32 --lzma2=dict=512KiB inputfile
+
+ An exception to above dictionary size limitation is when the decoder
+ is used in single-call mode. Decompressing the kernel itself is an
+ example of this situation. In single-call mode, the memory usage
+ doesn't depend on the dictionary size, and it is perfectly fine to
+ use a big dictionary: for maximum compression, the dictionary should
+ be at least as big as the uncompressed data itself.
+
+Future plans
+
+ Creating a limited XZ encoder may be considered if people think it is
+ useful. LZMA2 is slower to compress than e.g. Deflate or LZO even at
+ the fastest settings, so it isn't clear if LZMA2 encoder is wanted
+ into the kernel.
+
+ Support for limited random-access reading is planned for the
+ decompression code. I don't know if it could have any use in the
+ kernel, but I know that it would be useful in some embedded projects
+ outside the Linux kernel.
+
+Conformance to the .xz file format specification
+
+ There are a couple of corner cases where things have been simplified
+ at expense of detecting errors as early as possible. These should not
+ matter in practice all, since they don't cause security issues. But
+ it is good to know this if testing the code e.g. with the test files
+ from XZ Utils.
+
+Reporting bugs
+
+ Before reporting a bug, please check that it's not fixed already
+ at upstream. See <http://tukaani.org/xz/embedded.html> to get the
+ latest code.
+
+ Report bugs to <lasse.collin@tukaani.org> or visit #tukaani on
+ Freenode and talk to Larhzu. I don't actively read LKML or other
+ kernel-related mailing lists, so if there's something I should know,
+ you should email to me personally or use IRC.
+
+ Don't bother Igor Pavlov with questions about the XZ implementation
+ in the kernel or about XZ Utils. While these two implementations
+ include essential code that is directly based on Igor Pavlov's code,
+ these implementations aren't maintained nor supported by him.
diff --git a/Documentation/zh_CN/CodingStyle b/Documentation/zh_CN/CodingStyle
new file mode 100644
index 000000000..654afd72e
--- /dev/null
+++ b/Documentation/zh_CN/CodingStyle
@@ -0,0 +1,694 @@
+Chinese translated version of Documentation/CodingStyle
+
+If you have any comment or update to the content, please post to LKML directly.
+However, if you have problem communicating in English you can also ask the
+Chinese maintainer for help. Contact the Chinese maintainer, if this
+translation is outdated or there is problem with translation.
+
+Chinese maintainer: Zhang Le <r0bertz@gentoo.org>
+---------------------------------------------------------------------
+Documentation/CodingStyle的中文翻译
+
+如果想评论或更新本文的内容,请直接发信到LKML。如果你使用英文交流有困难的话,也可
+以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题,请联系中文版维护者。
+
+中文版维护者: 张乐 Zhang Le <r0bertz@gentoo.org>
+中文版翻译者: 张乐 Zhang Le <r0bertz@gentoo.org>
+中文版校译者: 王聪 Wang Cong <xiyou.wangcong@gmail.com>
+ wheelz <kernel.zeng@gmail.com>
+ 管旭东 Xudong Guan <xudong.guan@gmail.com>
+ Li Zefan <lizf@cn.fujitsu.com>
+ Wang Chen <wangchen@cn.fujitsu.com>
+以下为正文
+---------------------------------------------------------------------
+
+ Linux内核代码风格
+
+这是一个简短的文档,描述了linux内核的首选代码风格。代码风格是因人而异的,而且我
+不愿意把我的观点强加给任何人,不过这里所讲述的是我必须要维护的代码所遵守的风格,
+并且我也希望绝大多数其他代码也能遵守这个风格。请在写代码时至少考虑一下本文所述的
+风格。
+
+首先,我建议你打印一份GNU代码规范,然后不要读它。烧了它,这是一个具有重大象征性
+意义的动作。
+
+不管怎样,现在我们开始:
+
+
+ 第一章:缩进
+
+制表符是8个字符,所以缩进也是8个字符。有些异端运动试图将缩进变为4(乃至2)个字符
+深,这几乎相当于尝试将圆周率的值定义为3。
+
+理由:缩进的全部意义就在于清楚的定义一个控制块起止于何处。尤其是当你盯着你的屏幕
+连续看了20小时之后,你将会发现大一点的缩进会使你更容易分辨缩进。
+
+现在,有些人会抱怨8个字符的缩进会使代码向右边移动的太远,在80个字符的终端屏幕上
+就很难读这样的代码。这个问题的答案是,如果你需要3级以上的缩进,不管用何种方式你
+的代码已经有问题了,应该修正你的程序。
+
+简而言之,8个字符的缩进可以让代码更容易阅读,还有一个好处是当你的函数嵌套太深的
+时候可以给你警告。留心这个警告。
+
+在switch语句中消除多级缩进的首选的方式是让“switch”和从属于它的“case”标签对齐于同
+一列,而不要“两次缩进”“case”标签。比如:
+
+ switch (suffix) {
+ case 'G':
+ case 'g':
+ mem <<= 30;
+ break;
+ case 'M':
+ case 'm':
+ mem <<= 20;
+ break;
+ case 'K':
+ case 'k':
+ mem <<= 10;
+ /* fall through */
+ default:
+ break;
+ }
+
+
+不要把多个语句放在一行里,除非你有什么东西要隐藏:
+
+ if (condition) do_this;
+ do_something_everytime;
+
+也不要在一行里放多个赋值语句。内核代码风格超级简单。就是避免可能导致别人误读的表
+达式。
+
+除了注释、文档和Kconfig之外,不要使用空格来缩进,前面的例子是例外,是有意为之。
+
+选用一个好的编辑器,不要在行尾留空格。
+
+
+ 第二章:把长的行和字符串打散
+
+代码风格的意义就在于使用平常使用的工具来维持代码的可读性和可维护性。
+
+每一行的长度的限制是80列,我们强烈建议您遵守这个惯例。
+
+长于80列的语句要打散成有意义的片段。每个片段要明显短于原来的语句,而且放置的位置
+也明显的靠右。同样的规则也适用于有很长参数列表的函数头。长字符串也要打散成较短的
+字符串。唯一的例外是超过80列可以大幅度提高可读性并且不会隐藏信息的情况。
+
+void fun(int a, int b, int c)
+{
+ if (condition)
+ printk(KERN_WARNING "Warning this is a long printk with "
+ "3 parameters a: %u b: %u "
+ "c: %u \n", a, b, c);
+ else
+ next_statement;
+}
+
+ 第三章:大括号和空格的放置
+
+C语言风格中另外一个常见问题是大括号的放置。和缩进大小不同,选择或弃用某种放置策
+略并没有多少技术上的原因,不过首选的方式,就像Kernighan和Ritchie展示给我们的,是
+把起始大括号放在行尾,而把结束大括号放在行首,所以:
+
+ if (x is true) {
+ we do y
+ }
+
+这适用于所有的非函数语句块(if、switch、for、while、do)。比如:
+
+ switch (action) {
+ case KOBJ_ADD:
+ return "add";
+ case KOBJ_REMOVE:
+ return "remove";
+ case KOBJ_CHANGE:
+ return "change";
+ default:
+ return NULL;
+ }
+
+不过,有一个例外,那就是函数:函数的起始大括号放置于下一行的开头,所以:
+
+ int function(int x)
+ {
+ body of function
+ }
+
+全世界的异端可能会抱怨这个不一致性是……呃……不一致的,不过所有思维健全的人都知道(
+a)K&R是_正确的_,并且(b)K&R是正确的。此外,不管怎样函数都是特殊的(在C语言中
+,函数是不能嵌套的)。
+
+注意结束大括号独自占据一行,除非它后面跟着同一个语句的剩余部分,也就是do语句中的
+“while”或者if语句中的“else”,像这样:
+
+ do {
+ body of do-loop
+ } while (condition);
+
+和
+
+ if (x == y) {
+ ..
+ } else if (x > y) {
+ ...
+ } else {
+ ....
+ }
+
+理由:K&R。
+
+也请注意这种大括号的放置方式也能使空(或者差不多空的)行的数量最小化,同时不失可
+读性。因此,由于你的屏幕上的新行是不可再生资源(想想25行的终端屏幕),你将会有更
+多的空行来放置注释。
+
+当只有一个单独的语句的时候,不用加不必要的大括号。
+
+if (condition)
+ action();
+
+这点不适用于本身为某个条件语句的一个分支的单独语句。这时需要在两个分支里都使用大
+括号。
+
+if (condition) {
+ do_this();
+ do_that();
+} else {
+ otherwise();
+}
+
+ 3.1:空格
+
+Linux内核的空格使用方式(主要)取决于它是用于函数还是关键字。(大多数)关键字后
+要加一个空格。值得注意的例外是sizeof、typeof、alignof和__attribute__,这些关键字
+某些程度上看起来更像函数(它们在Linux里也常常伴随小括号而使用,尽管在C语言里这样
+的小括号不是必需的,就像“struct fileinfo info”声明过后的“sizeof info”)。
+
+所以在这些关键字之后放一个空格:
+ if, switch, case, for, do, while
+但是不要在sizeof、typeof、alignof或者__attribute__这些关键字之后放空格。例如,
+ s = sizeof(struct file);
+
+不要在小括号里的表达式两侧加空格。这是一个反例:
+
+ s = sizeof( struct file );
+
+当声明指针类型或者返回指针类型的函数时,“*”的首选使用方式是使之靠近变量名或者函
+数名,而不是靠近类型名。例子:
+
+ char *linux_banner;
+ unsigned long long memparse(char *ptr, char **retptr);
+ char *match_strdup(substring_t *s);
+
+在大多数二元和三元操作符两侧使用一个空格,例如下面所有这些操作符:
+
+ = + - < > * / % | & ^ <= >= == != ? :
+
+但是一元操作符后不要加空格:
+ & * + - ~ ! sizeof typeof alignof __attribute__ defined
+
+后缀自加和自减一元操作符前不加空格:
+ ++ --
+
+前缀自加和自减一元操作符后不加空格:
+ ++ --
+
+“.”和“->”结构体成员操作符前后不加空格。
+
+不要在行尾留空白。有些可以自动缩进的编辑器会在新行的行首加入适量的空白,然后你
+就可以直接在那一行输入代码。不过假如你最后没有在那一行输入代码,有些编辑器就不
+会移除已经加入的空白,就像你故意留下一个只有空白的行。包含行尾空白的行就这样产
+生了。
+
+当git发现补丁包含了行尾空白的时候会警告你,并且可以应你的要求去掉行尾空白;不过
+如果你是正在打一系列补丁,这样做会导致后面的补丁失败,因为你改变了补丁的上下文。
+
+
+ 第四章:命名
+
+C是一个简朴的语言,你的命名也应该这样。和Modula-2和Pascal程序员不同,C程序员不使
+用类似ThisVariableIsATemporaryCounter这样华丽的名字。C程序员会称那个变量为“tmp”
+,这样写起来会更容易,而且至少不会令其难于理解。
+
+不过,虽然混用大小写的名字是不提倡使用的,但是全局变量还是需要一个具描述性的名字
+。称一个全局函数为“foo”是一个难以饶恕的错误。
+
+全局变量(只有当你真正需要它们的时候再用它)需要有一个具描述性的名字,就像全局函
+数。如果你有一个可以计算活动用户数量的函数,你应该叫它“count_active_users()”或者
+类似的名字,你不应该叫它“cntuser()”。
+
+在函数名中包含函数类型(所谓的匈牙利命名法)是脑子出了问题——编译器知道那些类型而
+且能够检查那些类型,这样做只能把程序员弄糊涂了。难怪微软总是制造出有问题的程序。
+
+本地变量名应该简短,而且能够表达相关的含义。如果你有一些随机的整数型的循环计数器
+,它应该被称为“i”。叫它“loop_counter”并无益处,如果它没有被误解的可能的话。类似
+的,“tmp”可以用来称呼任意类型的临时变量。
+
+如果你怕混淆了你的本地变量名,你就遇到另一个问题了,叫做函数增长荷尔蒙失衡综合症
+。请看第六章(函数)。
+
+
+ 第五章:Typedef
+
+不要使用类似“vps_t”之类的东西。
+
+对结构体和指针使用typedef是一个错误。当你在代码里看到:
+
+ vps_t a;
+
+这代表什么意思呢?
+
+相反,如果是这样
+
+ struct virtual_container *a;
+
+你就知道“a”是什么了。
+
+很多人认为typedef“能提高可读性”。实际不是这样的。它们只在下列情况下有用:
+
+ (a) 完全不透明的对象(这种情况下要主动使用typedef来隐藏这个对象实际上是什么)。
+
+ 例如:“pte_t”等不透明对象,你只能用合适的访问函数来访问它们。
+
+ 注意!不透明性和“访问函数”本身是不好的。我们使用pte_t等类型的原因在于真的是
+ 完全没有任何共用的可访问信息。
+
+ (b) 清楚的整数类型,如此,这层抽象就可以帮助消除到底是“int”还是“long”的混淆。
+
+ u8/u16/u32是完全没有问题的typedef,不过它们更符合类别(d)而不是这里。
+
+ 再次注意!要这样做,必须事出有因。如果某个变量是“unsigned long“,那么没有必要
+
+ typedef unsigned long myflags_t;
+
+ 不过如果有一个明确的原因,比如它在某种情况下可能会是一个“unsigned int”而在
+ 其他情况下可能为“unsigned long”,那么就不要犹豫,请务必使用typedef。
+
+ (c) 当你使用sparse按字面的创建一个新类型来做类型检查的时候。
+
+ (d) 和标准C99类型相同的类型,在某些例外的情况下。
+
+ 虽然让眼睛和脑筋来适应新的标准类型比如“uint32_t”不需要花很多时间,可是有些
+ 人仍然拒绝使用它们。
+
+ 因此,Linux特有的等同于标准类型的“u8/u16/u32/u64”类型和它们的有符号类型是被
+ 允许的——尽管在你自己的新代码中,它们不是强制要求要使用的。
+
+ 当编辑已经使用了某个类型集的已有代码时,你应该遵循那些代码中已经做出的选择。
+
+ (e) 可以在用户空间安全使用的类型。
+
+ 在某些用户空间可见的结构体里,我们不能要求C99类型而且不能用上面提到的“u32”
+ 类型。因此,我们在与用户空间共享的所有结构体中使用__u32和类似的类型。
+
+可能还有其他的情况,不过基本的规则是永远不要使用typedef,除非你可以明确的应用上
+述某个规则中的一个。
+
+总的来说,如果一个指针或者一个结构体里的元素可以合理的被直接访问到,那么它们就不
+应该是一个typedef。
+
+
+ 第六章:函数
+
+函数应该简短而漂亮,并且只完成一件事情。函数应该可以一屏或者两屏显示完(我们都知
+道ISO/ANSI屏幕大小是80x24),只做一件事情,而且把它做好。
+
+一个函数的最大长度是和该函数的复杂度和缩进级数成反比的。所以,如果你有一个理论上
+很简单的只有一个很长(但是简单)的case语句的函数,而且你需要在每个case里做很多很
+小的事情,这样的函数尽管很长,但也是可以的。
+
+不过,如果你有一个复杂的函数,而且你怀疑一个天分不是很高的高中一年级学生可能甚至
+搞不清楚这个函数的目的,你应该严格的遵守前面提到的长度限制。使用辅助函数,并为之
+取个具描述性的名字(如果你觉得它们的性能很重要的话,可以让编译器内联它们,这样的
+效果往往会比你写一个复杂函数的效果要好。)
+
+函数的另外一个衡量标准是本地变量的数量。此数量不应超过5-10个,否则你的函数就有
+问题了。重新考虑一下你的函数,把它分拆成更小的函数。人的大脑一般可以轻松的同时跟
+踪7个不同的事物,如果再增多的话,就会糊涂了。即便你聪颖过人,你也可能会记不清你2
+个星期前做过的事情。
+
+在源文件里,使用空行隔开不同的函数。如果该函数需要被导出,它的EXPORT*宏应该紧贴
+在它的结束大括号之下。比如:
+
+int system_is_up(void)
+{
+ return system_state == SYSTEM_RUNNING;
+}
+EXPORT_SYMBOL(system_is_up);
+
+在函数原型中,包含函数名和它们的数据类型。虽然C语言里没有这样的要求,在Linux里这
+是提倡的做法,因为这样可以很简单的给读者提供更多的有价值的信息。
+
+
+ 第七章:集中的函数退出途径
+
+虽然被某些人声称已经过时,但是goto语句的等价物还是经常被编译器所使用,具体形式是
+无条件跳转指令。
+
+当一个函数从多个位置退出并且需要做一些通用的清理工作的时候,goto的好处就显现出来
+了。
+
+理由是:
+
+- 无条件语句容易理解和跟踪
+- 嵌套程度减小
+- 可以避免由于修改时忘记更新某个单独的退出点而导致的错误
+- 减轻了编译器的工作,无需删除冗余代码;)
+
+int fun(int a)
+{
+ int result = 0;
+ char *buffer = kmalloc(SIZE);
+
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ if (condition1) {
+ while (loop1) {
+ ...
+ }
+ result = 1;
+ goto out;
+ }
+ ...
+out:
+ kfree(buffer);
+ return result;
+}
+
+ 第八章:注释
+
+注释是好的,不过有过度注释的危险。永远不要在注释里解释你的代码是如何运作的:更好
+的做法是让别人一看你的代码就可以明白,解释写的很差的代码是浪费时间。
+
+一般的,你想要你的注释告诉别人你的代码做了什么,而不是怎么做的。也请你不要把注释
+放在一个函数体内部:如果函数复杂到你需要独立的注释其中的一部分,你很可能需要回到
+第六章看一看。你可以做一些小注释来注明或警告某些很聪明(或者槽糕)的做法,但不要
+加太多。你应该做的,是把注释放在函数的头部,告诉人们它做了什么,也可以加上它做这
+些事情的原因。
+
+当注释内核API函数时,请使用kernel-doc格式。请看
+Documentation/kernel-doc-nano-HOWTO.txt和scripts/kernel-doc以获得详细信息。
+
+Linux的注释风格是C89“/* ... */”风格。不要使用C99风格“// ...”注释。
+
+长(多行)的首选注释风格是:
+
+ /*
+ * This is the preferred style for multi-line
+ * comments in the Linux kernel source code.
+ * Please use it consistently.
+ *
+ * Description: A column of asterisks on the left side,
+ * with beginning and ending almost-blank lines.
+ */
+
+注释数据也是很重要的,不管是基本类型还是衍生类型。为了方便实现这一点,每一行应只
+声明一个数据(不要使用逗号来一次声明多个数据)。这样你就有空间来为每个数据写一段
+小注释来解释它们的用途了。
+
+
+ 第九章:你已经把事情弄糟了
+
+这没什么,我们都是这样。可能你的使用了很长时间Unix的朋友已经告诉你“GNU emacs”能
+自动帮你格式化C源代码,而且你也注意到了,确实是这样,不过它所使用的默认值和我们
+想要的相去甚远(实际上,甚至比随机打的还要差——无数个猴子在GNU emacs里打字永远不
+会创造出一个好程序)(译注:请参考Infinite Monkey Theorem)
+
+所以你要么放弃GNU emacs,要么改变它让它使用更合理的设定。要采用后一个方案,你可
+以把下面这段粘贴到你的.emacs文件里。
+
+(defun linux-c-mode ()
+ "C mode with adjusted defaults for use with the Linux kernel."
+ (interactive)
+ (c-mode)
+ (c-set-style "K&R")
+ (setq tab-width 8)
+ (setq indent-tabs-mode t)
+ (setq c-basic-offset 8))
+
+这样就定义了M-x linux-c-mode命令。当你hack一个模块的时候,如果你把字符串
+-*- linux-c -*-放在头两行的某个位置,这个模式将会被自动调用。如果你希望在你修改
+/usr/src/linux里的文件时魔术般自动打开linux-c-mode的话,你也可能需要添加
+
+(setq auto-mode-alist (cons '("/usr/src/linux.*/.*\\.[ch]$" . linux-c-mode)
+ auto-mode-alist))
+
+到你的.emacs文件里。
+
+不过就算你尝试让emacs正确的格式化代码失败了,也并不意味着你失去了一切:还可以用“
+indent”。
+
+不过,GNU indent也有和GNU emacs一样有问题的设定,所以你需要给它一些命令选项。不
+过,这还不算太糟糕,因为就算是GNU indent的作者也认同K&R的权威性(GNU的人并不是坏
+人,他们只是在这个问题上被严重的误导了),所以你只要给indent指定选项“-kr -i8”
+(代表“K&R,8个字符缩进”),或者使用“scripts/Lindent”,这样就可以以最时髦的方式
+缩进源代码。
+
+“indent”有很多选项,特别是重新格式化注释的时候,你可能需要看一下它的手册页。不过
+记住:“indent”不能修正坏的编程习惯。
+
+
+ 第十章:Kconfig配置文件
+
+对于遍布源码树的所有Kconfig*配置文件来说,它们缩进方式与C代码相比有所不同。紧挨
+在“config”定义下面的行缩进一个制表符,帮助信息则再多缩进2个空格。比如:
+
+config AUDIT
+ bool "Auditing support"
+ depends on NET
+ help
+ Enable auditing infrastructure that can be used with another
+ kernel subsystem, such as SELinux (which requires this for
+ logging of avc messages output). Does not do system-call
+ auditing without CONFIG_AUDITSYSCALL.
+
+而那些危险的功能(比如某些文件系统的写支持)应该在它们的提示字符串里显著的声明这
+一点:
+
+config ADFS_FS_RW
+ bool "ADFS write support (DANGEROUS)"
+ depends on ADFS_FS
+ ...
+
+要查看配置文件的完整文档,请看Documentation/kbuild/kconfig-language.txt。
+
+
+ 第十一章:数据结构
+
+如果一个数据结构,在创建和销毁它的单线执行环境之外可见,那么它必须要有一个引用计
+数器。内核里没有垃圾收集(并且内核之外的垃圾收集慢且效率低下),这意味着你绝对需
+要记录你对这种数据结构的使用情况。
+
+引用计数意味着你能够避免上锁,并且允许多个用户并行访问这个数据结构——而不需要担心
+这个数据结构仅仅因为暂时不被使用就消失了,那些用户可能不过是沉睡了一阵或者做了一
+些其他事情而已。
+
+注意上锁不能取代引用计数。上锁是为了保持数据结构的一致性,而引用计数是一个内存管
+理技巧。通常二者都需要,不要把两个搞混了。
+
+很多数据结构实际上有2级引用计数,它们通常有不同“类”的用户。子类计数器统计子类用
+户的数量,每当子类计数器减至零时,全局计数器减一。
+
+这种“多级引用计数”的例子可以在内存管理(“struct mm_struct”:mm_users和mm_count)
+和文件系统(“struct super_block”:s_count和s_active)中找到。
+
+记住:如果另一个执行线索可以找到你的数据结构,但是这个数据结构没有引用计数器,这
+里几乎肯定是一个bug。
+
+
+ 第十二章:宏,枚举和RTL
+
+用于定义常量的宏的名字及枚举里的标签需要大写。
+
+#define CONSTANT 0x12345
+
+在定义几个相关的常量时,最好用枚举。
+
+宏的名字请用大写字母,不过形如函数的宏的名字可以用小写字母。
+
+一般的,如果能写成内联函数就不要写成像函数的宏。
+
+含有多个语句的宏应该被包含在一个do-while代码块里:
+
+#define macrofun(a, b, c) \
+ do { \
+ if (a == 5) \
+ do_this(b, c); \
+ } while (0)
+
+使用宏的时候应避免的事情:
+
+1) 影响控制流程的宏:
+
+#define FOO(x) \
+ do { \
+ if (blah(x) < 0) \
+ return -EBUGGERED; \
+ } while(0)
+
+非常不好。它看起来像一个函数,不过却能导致“调用”它的函数退出;不要打乱读者大脑里
+的语法分析器。
+
+2) 依赖于一个固定名字的本地变量的宏:
+
+#define FOO(val) bar(index, val)
+
+可能看起来像是个不错的东西,不过它非常容易把读代码的人搞糊涂,而且容易导致看起来
+不相关的改动带来错误。
+
+3) 作为左值的带参数的宏: FOO(x) = y;如果有人把FOO变成一个内联函数的话,这种用
+法就会出错了。
+
+4) 忘记了优先级:使用表达式定义常量的宏必须将表达式置于一对小括号之内。带参数的
+宏也要注意此类问题。
+
+#define CONSTANT 0x4000
+#define CONSTEXP (CONSTANT | 3)
+
+cpp手册对宏的讲解很详细。Gcc internals手册也详细讲解了RTL(译注:register
+transfer language),内核里的汇编语言经常用到它。
+
+
+ 第十三章:打印内核消息
+
+内核开发者应该是受过良好教育的。请一定注意内核信息的拼写,以给人以好的印象。不要
+用不规范的单词比如“dont”,而要用“do not”或者“don't”。保证这些信息简单、明了、无
+歧义。
+
+内核信息不必以句号(译注:英文句号,即点)结束。
+
+在小括号里打印数字(%d)没有任何价值,应该避免这样做。
+
+<linux/device.h>里有一些驱动模型诊断宏,你应该使用它们,以确保信息对应于正确的
+设备和驱动,并且被标记了正确的消息级别。这些宏有:dev_err(), dev_warn(),
+dev_info()等等。对于那些不和某个特定设备相关连的信息,<linux/kernel.h>定义了
+pr_debug()和pr_info()。
+
+写出好的调试信息可以是一个很大的挑战;当你写出来之后,这些信息在远程除错的时候
+就会成为极大的帮助。当DEBUG符号没有被定义的时候,这些信息不应该被编译进内核里
+(也就是说,默认地,它们不应该被包含在内)。如果你使用dev_dbg()或者pr_debug(),
+就能自动达到这个效果。很多子系统拥有Kconfig选项来启用-DDEBUG。还有一个相关的惯例
+是使用VERBOSE_DEBUG来添加dev_vdbg()消息到那些已经由DEBUG启用的消息之上。
+
+
+ 第十四章:分配内存
+
+内核提供了下面的一般用途的内存分配函数:kmalloc(),kzalloc(),kcalloc()和
+vmalloc()。请参考API文档以获取有关它们的详细信息。
+
+传递结构体大小的首选形式是这样的:
+
+ p = kmalloc(sizeof(*p), ...);
+
+另外一种传递方式中,sizeof的操作数是结构体的名字,这样会降低可读性,并且可能会引
+入bug。有可能指针变量类型被改变时,而对应的传递给内存分配函数的sizeof的结果不变。
+
+强制转换一个void指针返回值是多余的。C语言本身保证了从void指针到其他任何指针类型
+的转换是没有问题的。
+
+
+ 第十五章:内联弊病
+
+有一个常见的误解是内联函数是gcc提供的可以让代码运行更快的一个选项。虽然使用内联
+函数有时候是恰当的(比如作为一种替代宏的方式,请看第十二章),不过很多情况下不是
+这样。inline关键字的过度使用会使内核变大,从而使整个系统运行速度变慢。因为大内核
+会占用更多的指令高速缓存(译注:一级缓存通常是指令缓存和数据缓存分开的)而且会导
+致pagecache的可用内存减少。想象一下,一次pagecache未命中就会导致一次磁盘寻址,将
+耗时5毫秒。5毫秒的时间内CPU能执行很多很多指令。
+
+一个基本的原则是如果一个函数有3行以上,就不要把它变成内联函数。这个原则的一个例
+外是,如果你知道某个参数是一个编译时常量,而且因为这个常量你确定编译器在编译时能
+优化掉你的函数的大部分代码,那仍然可以给它加上inline关键字。kmalloc()内联函数就
+是一个很好的例子。
+
+人们经常主张给static的而且只用了一次的函数加上inline,如此不会有任何损失,因为没
+有什么好权衡的。虽然从技术上说这是正确的,但是实际上这种情况下即使不加inline gcc
+也可以自动使其内联。而且其他用户可能会要求移除inline,由此而来的争论会抵消inline
+自身的潜在价值,得不偿失。
+
+
+ 第十六章:函数返回值及命名
+
+函数可以返回很多种不同类型的值,最常见的一种是表明函数执行成功或者失败的值。这样
+的一个值可以表示为一个错误代码整数(-Exxx=失败,0=成功)或者一个“成功”布尔值(
+0=失败,非0=成功)。
+
+混合使用这两种表达方式是难于发现的bug的来源。如果C语言本身严格区分整形和布尔型变
+量,那么编译器就能够帮我们发现这些错误……不过C语言不区分。为了避免产生这种bug,请
+遵循下面的惯例:
+
+ 如果函数的名字是一个动作或者强制性的命令,那么这个函数应该返回错误代码整
+ 数。如果是一个判断,那么函数应该返回一个“成功”布尔值。
+
+比如,“add work”是一个命令,所以add_work()函数在成功时返回0,在失败时返回-EBUSY。
+类似的,因为“PCI device present”是一个判断,所以pci_dev_present()函数在成功找到
+一个匹配的设备时应该返回1,如果找不到时应该返回0。
+
+所有导出(译注:EXPORT)的函数都必须遵守这个惯例,所有的公共函数也都应该如此。私
+有(static)函数不需要如此,但是我们也推荐这样做。
+
+返回值是实际计算结果而不是计算是否成功的标志的函数不受此惯例的限制。一般的,他们
+通过返回一些正常值范围之外的结果来表示出错。典型的例子是返回指针的函数,他们使用
+NULL或者ERR_PTR机制来报告错误。
+
+
+ 第十七章:不要重新发明内核宏
+
+头文件include/linux/kernel.h包含了一些宏,你应该使用它们,而不要自己写一些它们的
+变种。比如,如果你需要计算一个数组的长度,使用这个宏
+
+ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+类似的,如果你要计算某结构体成员的大小,使用
+
+ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+
+还有可以做严格的类型检查的min()和max()宏,如果你需要可以使用它们。你可以自己看看
+那个头文件里还定义了什么你可以拿来用的东西,如果有定义的话,你就不应在你的代码里
+自己重新定义。
+
+
+ 第十八章:编辑器模式行和其他需要罗嗦的事情
+
+有一些编辑器可以解释嵌入在源文件里的由一些特殊标记标明的配置信息。比如,emacs
+能够解释被标记成这样的行:
+
+-*- mode: c -*-
+
+或者这样的:
+
+/*
+Local Variables:
+compile-command: "gcc -DMAGIC_DEBUG_FLAG foo.c"
+End:
+*/
+
+Vim能够解释这样的标记:
+
+/* vim:set sw=8 noet */
+
+不要在源代码中包含任何这样的内容。每个人都有他自己的编辑器配置,你的源文件不应
+该覆盖别人的配置。这包括有关缩进和模式配置的标记。人们可以使用他们自己定制的模
+式,或者使用其他可以产生正确的缩进的巧妙方法。
+
+
+
+ 附录 I:参考
+
+The C Programming Language, 第二版, 作者Brian W. Kernighan和Denni
+M. Ritchie. Prentice Hall, Inc., 1988. ISBN 0-13-110362-8 (软皮),
+0-13-110370-9 (硬皮). URL: http://cm.bell-labs.com/cm/cs/cbook/
+
+The Practice of Programming 作者Brian W. Kernighan和Rob Pike. Addison-Wesley,
+Inc., 1999. ISBN 0-201-61586-X. URL: http://cm.bell-labs.com/cm/cs/tpop/
+
+cpp,gcc,gcc internals和indent的GNU手册——和K&R及本文相符合的部分,全部可以在
+http://www.gnu.org/manual/找到
+
+WG14是C语言的国际标准化工作组,URL: http://www.open-std.org/JTC1/SC22/WG14/
+
+Kernel CodingStyle,作者greg@kroah.com发表于OLS 2002:
+http://www.kroah.com/linux/talks/ols_2002_kernel_codingstyle_talk/html/
+
+--
+最后更新于2007年7月13日。
diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO
new file mode 100644
index 000000000..54ea24ff6
--- /dev/null
+++ b/Documentation/zh_CN/HOWTO
@@ -0,0 +1,538 @@
+Chinese translated version of Documentation/HOWTO
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Greg Kroah-Hartman <greg@kroah.com>
+Chinese maintainer: Li Yang <leoli@freescale.com>
+---------------------------------------------------------------------
+Documentation/HOWTO 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
+中文版维护者: 李阳 Li Yang <leoli@freescale.com>
+中文版翻译者: 李阳 Li Yang <leoli@freescale.com>
+中文版校译者: 钟宇 TripleX Chung <xxx.phy@gmail.com>
+ 陈琦 Maggie Chen <chenqi@beyondsoft.com>
+ 王聪 Wang Cong <xiyou.wangcong@gmail.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+如何参与Linux内核开发
+---------------------
+
+这是一篇将如何参与Linux内核开发的相关问题一网打尽的终极秘笈。它将指导你
+成为一名Linux内核开发者,并且学会如何同Linux内核开发社区合作。它尽可能不
+包括任何关于内核编程的技术细节,但会给你指引一条获得这些知识的正确途径。
+
+如果这篇文章中的任何内容不再适用,请给文末列出的文件维护者发送补丁。
+
+
+入门
+----
+
+你想了解如何成为一名Linux内核开发者?或者老板吩咐你“给这个设备写个Linux
+驱动程序”?这篇文章的目的就是教会你达成这些目标的全部诀窍,它将描述你需
+要经过的流程以及给出如何同内核社区合作的一些提示。它还将试图解释内核社区
+为何这样运作。
+
+Linux内核大部分是由C语言写成的,一些体系结构相关的代码用到了汇编语言。要
+参与内核开发,你必须精通C语言。除非你想为某个架构开发底层代码,否则你并
+不需要了解(任何体系结构的)汇编语言。下面列举的书籍虽然不能替代扎实的C
+语言教育和多年的开发经验,但如果需要的话,做为参考还是不错的:
+ - "The C Programming Language" by Kernighan and Ritchie [Prentice Hall]
+ 《C程序设计语言(第2版·新版)》(徐宝文 李志 译)[机械工业出版社]
+ - "Practical C Programming" by Steve Oualline [O'Reilly]
+ 《实用C语言编程(第三版)》(郭大海 译)[中国电力出版社]
+ - "C: A Reference Manual" by Harbison and Steele [Prentice Hall]
+ 《C语言参考手册(原书第5版)》(邱仲潘 等译)[机械工业出版社]
+
+Linux内核使用GNU C和GNU工具链开发。虽然它遵循ISO C89标准,但也用到了一些
+标准中没有定义的扩展。内核是自给自足的C环境,不依赖于标准C库的支持,所以
+并不支持C标准中的部分定义。比如long long类型的大数除法和浮点运算就不允许
+使用。有时候确实很难弄清楚内核对工具链的要求和它所使用的扩展,不幸的是目
+前还没有明确的参考资料可以解释它们。请查阅gcc信息页(使用“info gcc”命令
+显示)获得一些这方面信息。
+
+请记住你是在学习怎么和已经存在的开发社区打交道。它由一群形形色色的人组成,
+他们对代码、风格和过程有着很高的标准。这些标准是在长期实践中总结出来的,
+适应于地理上分散的大型开发团队。它们已经被很好得整理成档,建议你在开发
+之前尽可能多的学习这些标准,而不要期望别人来适应你或者你公司的行为方式。
+
+
+法律问题
+--------
+
+Linux内核源代码都是在GPL(通用公共许可证)的保护下发布的。要了解这种许可
+的细节请查看源代码主目录下的COPYING文件。如果你对它还有更深入问题请联系
+律师,而不要在Linux内核邮件组上提问。因为邮件组里的人并不是律师,不要期
+望他们的话有法律效力。
+
+对于GPL的常见问题和解答,请访问以下链接:
+ http://www.gnu.org/licenses/gpl-faq.html
+
+
+文档
+----
+
+Linux内核代码中包含有大量的文档。这些文档对于学习如何与内核社区互动有着
+不可估量的价值。当一个新的功能被加入内核,最好把解释如何使用这个功能的文
+档也放进内核。当内核的改动导致面向用户空间的接口发生变化时,最好将相关信
+息或手册页(manpages)的补丁发到mtk.manpages@gmail.com,以向手册页(manpages)
+的维护者解释这些变化。
+
+以下是内核代码中需要阅读的文档:
+ README
+ 文件简要介绍了Linux内核的背景,并且描述了如何配置和编译内核。内核的
+ 新用户应该从这里开始。
+
+ Documentation/Changes
+ 文件给出了用来编译和使用内核所需要的最小软件包列表。
+
+ Documentation/CodingStyle
+ 描述Linux内核的代码风格和理由。所有新代码需要遵守这篇文档中定义的规
+ 范。大多数维护者只会接收符合规定的补丁,很多人也只会帮忙检查符合风格
+ 的代码。
+
+ Documentation/SubmittingPatches
+ Documentation/SubmittingDrivers
+ 这两份文档明确描述如何创建和发送补丁,其中包括(但不仅限于):
+ - 邮件内容
+ - 邮件格式
+ - 选择收件人
+ 遵守这些规定并不能保证提交成功(因为所有补丁需要通过严格的内容和风格
+ 审查),但是忽视他们几乎就意味着失败。
+
+ 其他关于如何正确地生成补丁的优秀文档包括:
+ "The Perfect Patch"
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+ "Linux kernel patch submission format"
+ http://linux.yyz.us/patch-format.html
+
+ Documentation/stable_api_nonsense.txt
+ 论证内核为什么特意不包括稳定的内核内部API,也就是说不包括像这样的特
+ 性:
+ - 子系统中间层(为了兼容性?)
+ - 在不同操作系统间易于移植的驱动程序
+ - 减缓(甚至阻止)内核代码的快速变化
+ 这篇文档对于理解Linux的开发哲学至关重要。对于将开发平台从其他操作系
+ 统转移到Linux的人来说也很重要。
+
+ Documentation/SecurityBugs
+ 如果你认为自己发现了Linux内核的安全性问题,请根据这篇文档中的步骤来
+ 提醒其他内核开发者并帮助解决这个问题。
+
+ Documentation/ManagementStyle
+ 描述内核维护者的工作方法及其共有特点。这对于刚刚接触内核开发(或者对
+ 它感到好奇)的人来说很重要,因为它解释了很多对于内核维护者独特行为的
+ 普遍误解与迷惑。
+
+ Documentation/stable_kernel_rules.txt
+ 解释了稳定版内核发布的规则,以及如何将改动放入这些版本的步骤。
+
+ Documentation/kernel-docs.txt
+ 有助于内核开发的外部文档列表。如果你在内核自带的文档中没有找到你想找
+ 的内容,可以查看这些文档。
+
+ Documentation/applying-patches.txt
+ 关于补丁是什么以及如何将它打在不同内核开发分支上的好介绍
+
+内核还拥有大量从代码自动生成的文档。它包含内核内部API的全面介绍以及如何
+妥善处理加锁的规则。生成的文档会放在 Documentation/DocBook/目录下。在内
+核源码的主目录中使用以下不同命令将会分别生成PDF、Postscript、HTML和手册
+页等不同格式的文档:
+ make pdfdocs
+ make psdocs
+ make htmldocs
+ make mandocs
+
+
+如何成为内核开发者
+------------------
+如果你对Linux内核开发一无所知,你应该访问“Linux内核新手”计划:
+ http://kernelnewbies.org
+它拥有一个可以问各种最基本的内核开发问题的邮件列表(在提问之前一定要记得
+查找已往的邮件,确认是否有人已经回答过相同的问题)。它还拥有一个可以获得
+实时反馈的IRC聊天频道,以及大量对于学习Linux内核开发相当有帮助的文档。
+
+网站简要介绍了源代码组织结构、子系统划分以及目前正在进行的项目(包括内核
+中的和单独维护的)。它还提供了一些基本的帮助信息,比如如何编译内核和打补
+丁。
+
+如果你想加入内核开发社区并协助完成一些任务,却找不到从哪里开始,可以访问
+“Linux内核房管员”计划:
+ http://kernelnewbies.org/KernelJanitors
+这是极佳的起点。它提供一个相对简单的任务列表,列出内核代码中需要被重新
+整理或者改正的地方。通过和负责这个计划的开发者们一同工作,你会学到将补丁
+集成进内核的基本原理。如果还没有决定下一步要做什么的话,你还可能会得到方
+向性的指点。
+
+如果你已经有一些现成的代码想要放到内核中,但是需要一些帮助来使它们拥有正
+确的格式。请访问“内核导师”计划。这个计划就是用来帮助你完成这个目标的。它
+是一个邮件列表,地址如下:
+ http://selenic.com/mailman/listinfo/kernel-mentors
+
+在真正动手修改内核代码之前,理解要修改的代码如何运作是必需的。要达到这个
+目的,没什么办法比直接读代码更有效了(大多数花招都会有相应的注释),而且
+一些特制的工具还可以提供帮助。例如,“Linux代码交叉引用”项目就是一个值得
+特别推荐的帮助工具,它将源代码显示在有编目和索引的网页上。其中一个更新及
+时的内核源码库,可以通过以下地址访问:
+ http://sosdg.org/~coywolf/lxr/
+
+
+开发流程
+--------
+
+目前Linux内核开发流程包括几个“主内核分支”和很多子系统相关的内核分支。这
+些分支包括:
+ - 2.6.x主内核源码树
+ - 2.6.x.y -stable内核源码树
+ - 2.6.x -git内核补丁集
+ - 2.6.x -mm内核补丁集
+ - 子系统相关的内核源码树和补丁集
+
+
+2.6.x内核主源码树
+-----------------
+2.6.x内核是由Linus Torvalds(Linux的创造者)亲自维护的。你可以在
+kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开发遵循以下步
+骤:
+ - 每当一个新版本的内核被发布,为期两周的集成窗口将被打开。在这段时间里
+ 维护者可以向Linus提交大段的修改,通常这些修改已经被放到-mm内核中几个
+ 星期了。提交大量修改的首选方式是使用git工具(内核的代码版本管理工具
+ ,更多的信息可以在http://git.or.cz/获取),不过使用普通补丁也是可以
+ 的。
+ - 两个星期以后-rc1版本内核发布。之后只有不包含可能影响整个内核稳定性的
+ 新功能的补丁才可能被接受。请注意一个全新的驱动程序(或者文件系统)有
+ 可能在-rc1后被接受是因为这样的修改完全独立,不会影响其他的代码,所以
+ 没有造成内核退步的风险。在-rc1以后也可以用git向Linus提交补丁,不过所
+ 有的补丁需要同时被发送到相应的公众邮件列表以征询意见。
+ - 当Linus认为当前的git源码树已经达到一个合理健全的状态足以发布供人测试
+ 时,一个新的-rc版本就会被发布。计划是每周都发布新的-rc版本。
+ - 这个过程一直持续下去直到内核被认为达到足够稳定的状态,持续时间大概是
+ 6个星期。
+ - 以下地址跟踪了在每个-rc发布中发现的退步列表:
+ http://kernelnewbies.org/known_regressions
+
+关于内核发布,值得一提的是Andrew Morton在linux-kernel邮件列表中如是说:
+ “没有人知道新内核何时会被发布,因为发布是根据已知bug的情况来决定
+ 的,而不是根据一个事先制定好的时间表。”
+
+
+2.6.x.y -stable(稳定版)内核源码树
+-----------------------------------
+由4个数字组成的内核版本号说明此内核是-stable版本。它们包含基于2.6.x版本
+内核的相对较小且至关重要的修补,这些修补针对安全性问题或者严重的内核退步。
+
+这种版本的内核适用于那些期望获得最新的稳定版内核并且不想参与测试开发版或
+者实验版的用户。
+
+如果没有2.6.x.y版本内核存在,那么最新的2.6.x版本内核就相当于是当前的稳定
+版内核。
+
+2.6.x.y版本由“稳定版”小组(邮件地址<stable@vger.kernel.org>)维护,一般隔周发
+布新版本。
+
+内核源码中的Documentation/stable_kernel_rules.txt文件具体描述了可被稳定
+版内核接受的修改类型以及发布的流程。
+
+
+2.6.x -git补丁集
+----------------
+Linus的内核源码树的每日快照,这个源码树是由git工具管理的(由此得名)。这
+些补丁通常每天更新以反映Linus的源码树的最新状态。它们比-rc版本的内核源码
+树更具试验性质,因为这个补丁集是全自动生成的,没有任何人来确认其是否真正
+健全。
+
+
+2.6.x -mm补丁集
+---------------
+这是由Andrew Morton维护的试验性内核补丁集。Andrew将所有子系统的内核源码
+和补丁拼凑到一起,并且加入了大量从linux-kernel邮件列表中采集的补丁。这个
+源码树是新功能和补丁的试炼场。当补丁在-mm补丁集里证明了其价值以后Andrew
+或者相应子系统的维护者会将补丁发给Linus以便集成进主内核源码树。
+
+在将所有新补丁发给Linus以集成到主内核源码树之前,我们非常鼓励先把这些补
+丁放在-mm版内核源码树中进行测试。
+
+这些内核版本不适合在需要稳定运行的系统上运行,因为运行它们比运行任何其他
+内核分支都更具有风险。
+
+如果你想为内核开发进程提供帮助,请尝试并使用这些内核版本,并在
+linux-kernel邮件列表中提供反馈,告诉大家你遇到了问题还是一切正常。
+
+通常-mm版补丁集不光包括这些额外的试验性补丁,还包括发布时-git版主源码树
+中的改动。
+
+-mm版内核没有固定的发布周期,但是通常在每两个-rc版内核发布之间都会有若干
+个-mm版内核发布(一般是1至3个)。
+
+
+子系统相关内核源码树和补丁集
+----------------------------
+相当一部分内核子系统开发者会公开他们自己的开发源码树,以便其他人能了解内
+核的不同领域正在发生的事情。如上所述,这些源码树会被集成到-mm版本内核中。
+
+下面是目前可用的一些内核源码树的列表:
+ 通过git管理的源码树:
+ - Kbuild开发源码树, Sam Ravnborg <sam@ravnborg.org>
+ git.kernel.org:/pub/scm/linux/kernel/git/sam/kbuild.git
+
+ - ACPI开发源码树, Len Brown <len.brown@intel.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/lenb/linux-acpi-2.6.git
+
+ - 块设备开发源码树, Jens Axboe <axboe@suse.de>
+ git.kernel.org:/pub/scm/linux/kernel/git/axboe/linux-2.6-block.git
+
+ - DRM开发源码树, Dave Airlie <airlied@linux.ie>
+ git.kernel.org:/pub/scm/linux/kernel/git/airlied/drm-2.6.git
+
+ - ia64开发源码树, Tony Luck <tony.luck@intel.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/aegl/linux-2.6.git
+
+ - ieee1394开发源码树, Jody McIntyre <scjody@modernduck.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/scjody/ieee1394.git
+
+ - infiniband开发源码树, Roland Dreier <rolandd@cisco.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
+
+ - libata开发源码树, Jeff Garzik <jgarzik@pobox.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/libata-dev.git
+
+ - 网络驱动程序开发源码树, Jeff Garzik <jgarzik@pobox.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git
+
+ - pcmcia开发源码树, Dominik Brodowski <linux@dominikbrodowski.net>
+ git.kernel.org:/pub/scm/linux/kernel/git/brodo/pcmcia-2.6.git
+
+ - SCSI开发源码树, James Bottomley <James.Bottomley@SteelEye.com>
+ git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
+
+ 使用quilt管理的补丁集:
+ - USB, PCI, 驱动程序核心和I2C, Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+ kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/
+ - x86-64, 部分i386, Andi Kleen <ak@suse.de>
+ ftp.firstfloor.org:/pub/ak/x86_64/quilt/
+
+ 其他内核源码树可以在http://git.kernel.org的列表中和MAINTAINERS文件里
+ 找到。
+
+报告bug
+-------
+
+bugzilla.kernel.org是Linux内核开发者们用来跟踪内核Bug的网站。我们鼓励用
+户在这个工具中报告找到的所有bug。如何使用内核bugzilla的细节请访问:
+ http://test.kernel.org/bugzilla/faq.html
+
+内核源码主目录中的REPORTING-BUGS文件里有一个很好的模板。它指导用户如何报
+告可能的内核bug以及需要提供哪些信息来帮助内核开发者们找到问题的根源。
+
+
+利用bug报告
+-----------
+
+练习内核开发技能的最好办法就是修改其他人报告的bug。你不光可以帮助内核变
+得更加稳定,还可以学会如何解决实际问题从而提高自己的技能,并且让其他开发
+者感受到你的存在。修改bug是赢得其他开发者赞誉的最好办法,因为并不是很多
+人都喜欢浪费时间去修改别人报告的bug。
+
+要尝试修改已知的bug,请访问http://bugzilla.kernel.org网址。如果你想获得
+最新bug的通知,可以订阅bugme-new邮件列表(只有新的bug报告会被寄到这里)
+或者订阅bugme-janitor邮件列表(所有bugzilla的变动都会被寄到这里)。
+
+ https://lists.linux-foundation.org/mailman/listinfo/bugme-new
+ https://lists.linux-foundation.org/mailman/listinfo/bugme-janitors
+
+
+邮件列表
+--------
+
+正如上面的文档所描述,大多数的骨干内核开发者都加入了Linux Kernel邮件列
+表。如何订阅和退订列表的细节可以在这里找到:
+ http://vger.kernel.org/vger-lists.html#linux-kernel
+网上很多地方都有这个邮件列表的存档(archive)。可以使用搜索引擎来找到这些
+存档。比如:
+ http://dir.gmane.org/gmane.linux.kernel
+在发信之前,我们强烈建议你先在存档中搜索你想要讨论的问题。很多已经被详细
+讨论过的问题只在邮件列表的存档中可以找到。
+
+大多数内核子系统也有自己独立的邮件列表来协调各自的开发工作。从
+MAINTAINERS文件中可以找到不同话题对应的邮件列表。
+
+很多邮件列表架设在kernel.org服务器上。这些列表的信息可以在这里找到:
+ http://vger.kernel.org/vger-lists.html
+
+在使用这些邮件列表时,请记住保持良好的行为习惯。下面的链接提供了与这些列
+表(或任何其它邮件列表)交流的一些简单规则,虽然内容有点滥竽充数。
+ http://www.albion.com/netiquette/
+
+当有很多人回复你的邮件时,邮件的抄送列表会变得很长。请不要将任何人从抄送
+列表中删除,除非你有足够的理由这么做。也不要只回复到邮件列表。请习惯于同
+一封邮件接收两次(一封来自发送者一封来自邮件列表),而不要试图通过添加一
+些奇特的邮件头来解决这个问题,人们不会喜欢的。
+
+记住保留你所回复内容的上下文和源头。在你回复邮件的顶部保留“某某某说到……”
+这几行。将你的评论加在被引用的段落之间而不要放在邮件的顶部。
+
+如果你在邮件中附带补丁,请确认它们是可以直接阅读的纯文本(如
+Documentation/SubmittingPatches文档中所述)。内核开发者们不希望遇到附件
+或者被压缩了的补丁。只有这样才能保证他们可以直接评论你的每行代码。请确保
+你使用的邮件发送程序不会修改空格和制表符。一个防范性的测试方法是先将邮件
+发送给自己,然后自己尝试是否可以顺利地打上收到的补丁。如果测试不成功,请
+调整或者更换你的邮件发送程序直到它正确工作为止。
+
+总而言之,请尊重其他的邮件列表订阅者。
+
+
+同内核社区合作
+----------------
+
+内核社区的目标就是提供尽善尽美的内核。所以当你提交补丁期望被接受进内核的
+时候,它的技术价值以及其他方面都将被评审。那么你可能会得到什么呢?
+ - 批评
+ - 评论
+ - 要求修改
+ - 要求证明修改的必要性
+ - 沉默
+
+要记住,这些是把补丁放进内核的正常情况。你必须学会听取对补丁的批评和评论,
+从技术层面评估它们,然后要么重写你的补丁要么简明扼要地论证修改是不必要
+的。如果你发的邮件没有得到任何回应,请过几天后再试一次,因为有时信件会湮
+没在茫茫信海中。
+
+你不应该做的事情:
+ - 期望自己的补丁不受任何质疑就直接被接受
+ - 翻脸
+ - 忽略别人的评论
+ - 没有按照别人的要求做任何修改就重新提交
+
+在一个努力追寻最好技术方案的社区里,对于一个补丁有多少好处总会有不同的见
+解。你必须要抱着合作的态度,愿意改变自己的观点来适应内核的风格。或者至少
+愿意去证明你的想法是有价值的。记住,犯错误是允许的,只要你愿意朝着正确的
+方案去努力。
+
+如果你的第一个补丁换来的是一堆修改建议,这是很正常的。这并不代表你的补丁
+不会被接受,也不意味着有人和你作对。你只需要改正所有提出的问题然后重新发
+送你的补丁。
+
+内核社区和公司文化的差异
+------------------------
+
+内核社区的工作模式同大多数传统公司开发队伍的工作模式并不相同。下面这些例
+子,可以帮助你避免某些可能发生问题:
+ 用这些话介绍你的修改提案会有好处:
+ - 它同时解决了多个问题
+ - 它删除了2000行代码
+ - 这是补丁,它已经解释了我想要说明的
+ - 我在5种不同的体系结构上测试过它……
+ - 这是一系列小补丁用来……
+ - 这个修改提高了普通机器的性能……
+
+ 应该避免如下的说法:
+ - 我们在AIX/ptx/Solaris就是这么做的,所以这么做肯定是好的……
+ - 我做这行已经20年了,所以……
+ - 为了我们公司赚钱考虑必须这么做
+ - 这是我们的企业产品线所需要的
+ - 这里是描述我观点的1000页设计文档
+ - 这是一个5000行的补丁用来……
+ - 我重写了现在乱七八糟的代码,这就是……
+ - 我被规定了最后期限,所以这个补丁需要立刻被接受
+
+另外一个内核社区与大部分传统公司的软件开发队伍不同的地方是无法面对面地交
+流。使用电子邮件和IRC聊天工具做为主要沟通工具的一个好处是性别和种族歧视
+将会更少。Linux内核的工作环境更能接受妇女和少数族群,因为每个人在别人眼
+里只是一个邮件地址。国际化也帮助了公平的实现,因为你无法通过姓名来判断人
+的性别。男人有可能叫李丽,女人也有可能叫王刚。大多数在Linux内核上工作过
+并表达过看法的女性对在linux上工作的经历都给出了正面的评价。
+
+对于一些不习惯使用英语的人来说,语言可能是一个引起问题的障碍。在邮件列表
+中要正确地表达想法必需良好地掌握语言,所以建议你在发送邮件之前最好检查一
+下英文写得是否正确。
+
+
+拆分修改
+--------
+
+Linux内核社区并不喜欢一下接收大段的代码。修改需要被恰当地介绍、讨论并且
+拆分成独立的小段。这几乎完全和公司中的习惯背道而驰。你的想法应该在开发最
+开始的阶段就让大家知道,这样你就可以及时获得对你正在进行的开发的反馈。这
+样也会让社区觉得你是在和他们协作,而不是仅仅把他们当作倾销新功能的对象。
+无论如何,你不要一次性地向邮件列表发送50封信,你的补丁序列应该永远用不到
+这么多。
+
+将补丁拆开的原因如下:
+
+1) 小的补丁更有可能被接受,因为它们不需要太多的时间和精力去验证其正确性。
+ 一个5行的补丁,可能在维护者看了一眼以后就会被接受。而500行的补丁则
+ 需要数个小时来审查其正确性(所需时间随补丁大小增加大约呈指数级增长)。
+
+ 当出了问题的时候,小的补丁也会让调试变得非常容易。一个一个补丁地回溯
+ 将会比仔细剖析一个被打上的大补丁(这个补丁破坏了其他东西)容易得多。
+
+2)不光发送小的补丁很重要,在提交之前重新编排、化简(或者仅仅重新排列)
+ 补丁也是很重要的。
+
+这里有内核开发者Al Viro打的一个比方:
+ “想象一个老师正在给学生批改数学作业。老师并不希望看到学生为了得
+ 到正确解法所进行的尝试和产生的错误。他希望看到的是最干净最优雅的
+ 解答。好学生了解这点,绝不会把最终解决之前的中间方案提交上去。”
+
+ 内核开发也是这样。维护者和评审者不希望看到一个人在解决问题时的思
+ 考过程。他们只希望看到简单和优雅的解决方案。
+
+直接给出一流的解决方案,和社区一起协作讨论尚未完成的工作,这两者之间似乎
+很难找到一个平衡点。所以最好尽早开始收集有利于你进行改进的反馈;同时也要
+保证修改分成很多小块,这样在整个项目都准备好被包含进内核之前,其中的一部
+分可能会先被接收。
+
+必须了解这样做是不可接受的:试图将未完成的工作提交进内核,然后再找时间修
+复。
+
+
+证明修改的必要性
+----------------
+除了将补丁拆成小块,很重要的一点是让Linux社区了解他们为什么需要这样修改。
+你必须证明新功能是有人需要的并且是有用的。
+
+
+记录修改
+--------
+
+当你发送补丁的时候,需要特别留意邮件正文的内容。因为这里的信息将会做为补
+丁的修改记录(ChangeLog),会被一直保留以备大家查阅。它需要完全地描述补丁,
+包括:
+ - 为什么需要这个修改
+ - 补丁的总体设计
+ - 实现细节
+ - 测试结果
+
+想了解它具体应该看起来像什么,请查阅以下文档中的“ChangeLog”章节:
+ “The Perfect Patch”
+ http://www.ozlabs.org/~akpm/stuff/tpp.txt
+
+
+这些事情有时候做起来很难。要在任何方面都做到完美可能需要好几年时间。这是
+一个持续提高的过程,它需要大量的耐心和决心。只要不放弃,你一定可以做到。
+很多人已经做到了,而他们都曾经和现在的你站在同样的起点上。
+
+
+---------------
+感谢Paolo Ciarrocchi允许“开发流程”部分基于他所写的文章
+(http://www.kerneltravel.net/newbie/2.6-development_process),感谢Randy
+Dunlap和Gerrit Huizenga完善了应该说和不该说的列表。感谢Pat Mochel, Hanna
+Linder, Randy Dunlap, Kay Sievers, Vojtech Pavlik, Jan Kara, Josh Boyer,
+Kees Cook, Andrew Morton, Andi Kleen, Vadim Lobanov, Jesper Juhl, Adrian
+Bunk, Keri Harris, Frans Pop, David A. Wheeler, Junio Hamano, Michael
+Kerrisk和Alex Shepard的评审、建议和贡献。没有他们的帮助,这篇文档是不可
+能完成的。
+
+
+
+英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/zh_CN/IRQ.txt b/Documentation/zh_CN/IRQ.txt
new file mode 100644
index 000000000..956026d5c
--- /dev/null
+++ b/Documentation/zh_CN/IRQ.txt
@@ -0,0 +1,39 @@
+Chinese translated version of Documentation/IRQ.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Eric W. Biederman <ebiederman@xmission.com>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/IRQ.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+英文版维护者: Eric W. Biederman <ebiederman@xmission.com>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+何为 IRQ?
+
+一个 IRQ 是来自某个设备的一个中断请求。目前,它们可以来自一个硬件引脚,
+或来自一个数据包。多个设备可能连接到同个硬件引脚,从而共享一个 IRQ。
+
+一个 IRQ 编号是用于告知硬件中断源的内核标识。通常情况下,这是一个
+全局 irq_desc 数组的索引,但是除了在 linux/interrupt.h 中的实现,
+具体的细节是体系结构特定的。
+
+一个 IRQ 编号是设备上某个可能的中断源的枚举。通常情况下,枚举的编号是
+该引脚在系统内中断控制器的所有输入引脚中的编号。对于 ISA 总线中的情况,
+枚举的是在两个 i8259 中断控制器中 16 个输入引脚。
+
+架构可以对 IRQ 编号指定额外的含义,在硬件涉及任何手工配置的情况下,
+是被提倡的。ISA 的 IRQ 是一个分配这类额外含义的典型例子。
diff --git a/Documentation/zh_CN/SecurityBugs b/Documentation/zh_CN/SecurityBugs
new file mode 100644
index 000000000..d21eb07fe
--- /dev/null
+++ b/Documentation/zh_CN/SecurityBugs
@@ -0,0 +1,50 @@
+Chinese translated version of Documentation/SecurityBugs
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
+---------------------------------------------------------------------
+Documentation/SecurityBugs 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 贾威威 Harry Wei <harryxiyou@gmail.com>
+中文版翻译者: 贾威威 Harry Wei <harryxiyou@gmail.com>
+中文版校译者: 贾威威 Harry Wei <harryxiyou@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+Linux内核开发者认为安全非常重要。因此,我们想要知道当一个有关于
+安全的漏洞被发现的时候,并且它可能会被尽快的修复或者公开。请把这个安全
+漏洞报告给Linux内核安全团队。
+
+1) 联系
+
+linux内核安全团队可以通过email<security@kernel.org>来联系。这是
+一组独立的安全工作人员,可以帮助改善漏洞报告并且公布和取消一个修复。安
+全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。
+当遇到任何漏洞,所能提供的信息越多就越能诊断和修复。如果你不清楚什么
+是有帮助的信息,那就请重温一下REPORTING-BUGS文件中的概述过程。任
+何攻击性的代码都是非常有用的,未经报告者的同意不会被取消,除非它已经
+被公布于众。
+
+2) 公开
+
+Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直
+到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理
+解,解决方案没有通过测试或者供应商协调,可以合理地延迟公开。然而,我们
+期望这些延迟尽可能的短些,是可数的几天,而不是几个星期或者几个月。公开
+日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很
+短(特殊的,它已经被公众所知道)到几个星期。作为一个基本的默认政策,我
+们所期望通知公众的日期是7天的安排。
+
+3) 保密协议
+
+Linux内核安全团队不是一个正式的团体,因此不能加入任何的保密协议。
diff --git a/Documentation/zh_CN/SubmittingDrivers b/Documentation/zh_CN/SubmittingDrivers
new file mode 100644
index 000000000..d313f5d84
--- /dev/null
+++ b/Documentation/zh_CN/SubmittingDrivers
@@ -0,0 +1,164 @@
+Chinese translated version of Documentation/SubmittingDrivers
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Li Yang <leo@zh-kernel.org>
+---------------------------------------------------------------------
+Documentation/SubmittingDrivers 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 李阳 Li Yang <leo@zh-kernel.org>
+中文版翻译者: 李阳 Li Yang <leo@zh-kernel.org>
+中文版校译者: 陈琦 Maggie Chen <chenqi@beyondsoft.com>
+ 王聪 Wang Cong <xiyou.wangcong@gmail.com>
+ 张巍 Zhang Wei <Wei.Zhang@freescale.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+如何向 Linux 内核提交驱动程序
+-----------------------------
+
+这篇文档将会解释如何向不同的内核源码树提交设备驱动程序。请注意,如果你感
+兴趣的是显卡驱动程序,你也许应该访问 XFree86 项目(http://www.xfree86.org/)
+和/或 X.org 项目 (http://x.org)。
+
+另请参阅 Documentation/SubmittingPatches 文档。
+
+
+分配设备号
+----------
+
+块设备和字符设备的主设备号与从设备号是由 Linux 命名编号分配权威 LANANA(
+现在是 Torben Mathiasen)负责分配。申请的网址是 http://www.lanana.org/。
+即使不准备提交到主流内核的设备驱动也需要在这里分配设备号。有关详细信息,
+请参阅 Documentation/devices.txt。
+
+如果你使用的不是已经分配的设备号,那么当你提交设备驱动的时候,它将会被强
+制分配一个新的设备号,即便这个设备号和你之前发给客户的截然不同。
+
+设备驱动的提交对象
+------------------
+
+Linux 2.0:
+ 此内核源码树不接受新的驱动程序。
+
+Linux 2.2:
+ 此内核源码树不接受新的驱动程序。
+
+Linux 2.4:
+ 如果所属的代码领域在内核的 MAINTAINERS 文件中列有一个总维护者,
+ 那么请将驱动程序提交给他。如果此维护者没有回应或者你找不到恰当的
+ 维护者,那么请联系 Willy Tarreau <w@1wt.eu>。
+
+Linux 2.6:
+ 除了遵循和 2.4 版内核同样的规则外,你还需要在 linux-kernel 邮件
+ 列表上跟踪最新的 API 变化。向 Linux 2.6 内核提交驱动的顶级联系人
+ 是 Andrew Morton <akpm@linux-foundation.org>。
+
+决定设备驱动能否被接受的条件
+----------------------------
+
+许可: 代码必须使用 GNU 通用公开许可证 (GPL) 提交给 Linux,但是
+ 我们并不要求 GPL 是唯一的许可。你或许会希望同时使用多种
+ 许可证发布,如果希望驱动程序可以被其他开源社区(比如BSD)
+ 使用。请参考 include/linux/module.h 文件中所列出的可被
+ 接受共存的许可。
+
+版权: 版权所有者必须同意使用 GPL 许可。最好提交者和版权所有者
+ 是相同个人或实体。否则,必需列出授权使用 GPL 的版权所有
+ 人或实体,以备验证之需。
+
+接口: 如果你的驱动程序使用现成的接口并且和其他同类的驱动程序行
+ 为相似,而不是去发明无谓的新接口,那么它将会更容易被接受。
+ 如果你需要一个 Linux 和 NT 的通用驱动接口,那么请在用
+ 户空间实现它。
+
+代码: 请使用 Documentation/CodingStyle 中所描述的 Linux 代码风
+ 格。如果你的某些代码段(例如那些与 Windows 驱动程序包共
+ 享的代码段)需要使用其他格式,而你却只希望维护一份代码,
+ 那么请将它们很好地区分出来,并且注明原因。
+
+可移植性: 请注意,指针并不永远是 32 位的,不是所有的计算机都使用小
+ 尾模式 (little endian) 存储数据,不是所有的人都拥有浮点
+ 单元,不要随便在你的驱动程序里嵌入 x86 汇编指令。只能在
+ x86 上运行的驱动程序一般是不受欢迎的。虽然你可能只有 x86
+ 硬件,很难测试驱动程序在其他平台上是否可用,但是确保代码
+ 可以被轻松地移植却是很简单的。
+
+清晰度: 做到所有人都能修补这个驱动程序将会很有好处,因为这样你将
+ 会直接收到修复的补丁而不是 bug 报告。如果你提交一个试图
+ 隐藏硬件工作机理的驱动程序,那么它将会被扔进废纸篓。
+
+电源管理: 因为 Linux 正在被很多移动设备和桌面系统使用,所以你的驱
+ 动程序也很有可能被使用在这些设备上。它应该支持最基本的电
+ 源管理,即在需要的情况下实现系统级休眠和唤醒要用到的
+ .suspend 和 .resume 函数。你应该检查你的驱动程序是否能正
+ 确地处理休眠与唤醒,如果实在无法确认,请至少把 .suspend
+ 函数定义成返回 -ENOSYS(功能未实现)错误。你还应该尝试确
+ 保你的驱动在什么都不干的情况下将耗电降到最低。要获得驱动
+ 程序测试的指导,请参阅
+ Documentation/power/drivers-testing.txt。有关驱动程序电
+ 源管理问题相对全面的概述,请参阅
+ Documentation/power/devices.txt。
+
+管理: 如果一个驱动程序的作者还在进行有效的维护,那么通常除了那
+ 些明显正确且不需要任何检查的补丁以外,其他所有的补丁都会
+ 被转发给作者。如果你希望成为驱动程序的联系人和更新者,最
+ 好在代码注释中写明并且在 MAINTAINERS 文件中加入这个驱动
+ 程序的条目。
+
+不影响设备驱动能否被接受的条件
+------------------------------
+
+供应商: 由硬件供应商来维护驱动程序通常是一件好事。不过,如果源码
+ 树里已经有其他人提供了可稳定工作的驱动程序,那么请不要期
+ 望“我是供应商”会成为内核改用你的驱动程序的理由。理想的情
+ 况是:供应商与现有驱动程序的作者合作,构建一个统一完美的
+ 驱动程序。
+
+作者: 驱动程序是由大的 Linux 公司研发还是由你个人编写,并不影
+ 响其是否能被内核接受。没有人对内核源码树享有特权。只要你
+ 充分了解内核社区,你就会发现这一点。
+
+
+资源列表
+--------
+
+Linux 内核主源码树:
+ ftp.??.kernel.org:/pub/linux/kernel/...
+ ?? == 你的国家代码,例如 "cn"、"us"、"uk"、"fr" 等等
+
+Linux 内核邮件列表:
+ linux-kernel@vger.kernel.org
+ [可通过向majordomo@vger.kernel.org发邮件来订阅]
+
+Linux 设备驱动程序,第三版(探讨 2.6.10 版内核):
+ http://lwn.net/Kernel/LDD3/ (免费版)
+
+LWN.net:
+ 每周内核开发活动摘要 - http://lwn.net/
+ 2.6 版中 API 的变更:
+ http://lwn.net/Articles/2.6-kernel-api/
+ 将旧版内核的驱动程序移植到 2.6 版:
+ http://lwn.net/Articles/driver-porting/
+
+内核新手(KernelNewbies):
+ 为新的内核开发者提供文档和帮助
+ http://kernelnewbies.org/
+
+Linux USB项目:
+ http://www.linux-usb.org/
+
+写内核驱动的“不要”(Arjan van de Ven著):
+ http://www.fenrus.org/how-to-not-write-a-device-driver-paper.pdf
+
+内核清洁工 (Kernel Janitor):
+ http://kernelnewbies.org/KernelJanitors
diff --git a/Documentation/zh_CN/SubmittingPatches b/Documentation/zh_CN/SubmittingPatches
new file mode 100644
index 000000000..1d3a10f87
--- /dev/null
+++ b/Documentation/zh_CN/SubmittingPatches
@@ -0,0 +1,412 @@
+Chinese translated version of Documentation/SubmittingPatches
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: TripleX Chung <triplex@zh-kernel.org>
+---------------------------------------------------------------------
+Documentation/SubmittingPatches 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 钟宇 TripleX Chung <triplex@zh-kernel.org>
+中文版翻译者: 钟宇 TripleX Chung <triplex@zh-kernel.org>
+中文版校译者: 李阳 Li Yang <leo@zh-kernel.org>
+ 王聪 Wang Cong <xiyou.wangcong@gmail.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+ 如何让你的改动进入内核
+ 或者
+ 获得亲爱的 Linus Torvalds 的关注和处理
+----------------------------------
+
+对于想要将改动提交到 Linux 内核的个人或者公司来说,如果不熟悉“规矩”,
+提交的流程会让人畏惧。本文档收集了一系列建议,这些建议可以大大的提高你
+的改动被接受的机会。
+阅读 Documentation/SubmitChecklist 来获得在提交代码前需要检查的项目的列
+表。如果你在提交一个驱动程序,那么同时阅读一下
+Documentation/SubmittingDrivers 。
+
+
+--------------------------
+第一节 - 创建并发送你的改动
+--------------------------
+
+1) "diff -up"
+-----------
+
+使用 "diff -up" 或者 "diff -uprN" 来创建补丁。
+
+所有内核的改动,都是以补丁的形式呈现的,补丁由 diff(1) 生成。创建补丁的
+时候,要确认它是以 "unified diff" 格式创建的,这种格式由 diff(1) 的 '-u'
+参数生成。而且,请使用 '-p' 参数,那样会显示每个改动所在的C函数,使得
+产生的补丁容易读得多。补丁应该基于内核源代码树的根目录,而不是里边的任
+何子目录。
+为一个单独的文件创建补丁,一般来说这样做就够了:
+
+ SRCTREE= linux-2.6
+ MYFILE= drivers/net/mydriver.c
+
+ cd $SRCTREE
+ cp $MYFILE $MYFILE.orig
+ vi $MYFILE # make your change
+ cd ..
+ diff -up $SRCTREE/$MYFILE{.orig,} > /tmp/patch
+
+为多个文件创建补丁,你可以解开一个没有修改过的内核源代码树,然后和你自
+己的代码树之间做 diff 。例如:
+
+ MYSRC= /devel/linux-2.6
+
+ tar xvfz linux-2.6.12.tar.gz
+ mv linux-2.6.12 linux-2.6.12-vanilla
+ diff -uprN -X linux-2.6.12-vanilla/Documentation/dontdiff \
+ linux-2.6.12-vanilla $MYSRC > /tmp/patch
+
+"dontdiff" 是内核在编译的时候产生的文件的列表,列表中的文件在 diff(1)
+产生的补丁里会被跳过。"dontdiff" 文件被包含在2.6.12和之后版本的内核源代
+码树中。对于更早的内核版本,你可以从
+<http://www.xenotime.net/linux/doc/dontdiff> 获取它。
+确定你的补丁里没有包含任何不属于这次补丁提交的额外文件。记得在用diff(1)
+生成补丁之后,审阅一次补丁,以确保准确。
+如果你的改动很散乱,你应该研究一下如何将补丁分割成独立的部分,将改动分
+割成一系列合乎逻辑的步骤。这样更容易让其他内核开发者审核,如果你想你的
+补丁被接受,这是很重要的。下面这些脚本能够帮助你做这件事情:
+Quilt:
+http://savannah.nongnu.org/projects/quilt
+
+2)描述你的改动。
+描述你的改动包含的技术细节。
+
+要多具体就写多具体。最糟糕的描述可能是像下面这些语句:“更新了某驱动程
+序”,“修正了某驱动程序的bug”,或者“这个补丁包含了某子系统的修改,请
+使用。”
+
+如果你的描述开始变长,这表示你也许需要拆分你的补丁了,请看第3小节,
+继续。
+
+3)拆分你的改动
+
+将改动拆分,逻辑类似的放到同一个补丁文件里。
+
+例如,如果你的改动里同时有bug修正和性能优化,那么把这些改动拆分到两个或
+者更多的补丁文件中。如果你的改动包含对API的修改,并且修改了驱动程序来适
+应这些新的API,那么把这些修改分成两个补丁。
+
+另一方面,如果你将一个单独的改动做成多个补丁文件,那么将它们合并成一个
+单独的补丁文件。这样一个逻辑上单独的改动只被包含在一个补丁文件里。
+
+如果有一个补丁依赖另外一个补丁来完成它的改动,那没问题。简单的在你的补
+丁描述里指出“这个补丁依赖某补丁”就好了。
+
+如果你不能将补丁浓缩成更少的文件,那么每次大约发送出15个,然后等待审查
+和整合。
+
+4)选择 e-mail 的收件人
+
+看一遍 MAINTAINERS 文件和源代码,看看你所的改动所在的内核子系统有没有指
+定的维护者。如果有,给他们发e-mail。
+
+如果没有找到维护者,或者维护者没有反馈,将你的补丁发送到内核开发者主邮
+件列表 linux-kernel@vger.kernel.org。大部分的内核开发者都跟踪这个邮件列
+表,可以评价你的改动。
+
+每次不要发送超过15个补丁到 vger 邮件列表!!!
+
+Linus Torvalds 是决定改动能否进入 Linux 内核的最终裁决者。他的 e-mail
+地址是 <torvalds@linux-foundation.org> 。他收到的 e-mail 很多,所以一般
+的说,最好别给他发 e-mail。
+
+那些修正bug,“显而易见”的修改或者是类似的只需要很少讨论的补丁可以直接
+发送或者CC给Linus。那些需要讨论或者没有很清楚的好处的补丁,一般先发送到
+linux-kernel邮件列表。只有当补丁被讨论得差不多了,才提交给Linus。
+
+5)选择CC( e-mail 抄送)列表
+
+除非你有理由不这样做,否则CC linux-kernel@vger.kernel.org。
+
+除了 Linus 之外,其他内核开发者也需要注意到你的改动,这样他们才能评论你
+的改动并提供代码审查和建议。linux-kernel 是 Linux 内核开发者主邮件列表
+。其它的邮件列表为特定的子系统提供服务,比如 USB,framebuffer 设备,虚
+拟文件系统,SCSI 子系统,等等。查看 MAINTAINERS 文件来获得和你的改动有
+关的邮件列表。
+
+Majordomo lists of VGER.KERNEL.ORG at:
+ <http://vger.kernel.org/vger-lists.html>
+
+如果改动影响了用户空间和内核之间的接口,请给 MAN-PAGES 的维护者(列在
+MAINTAINERS 文件里的)发送一个手册页(man-pages)补丁,或者至少通知一下改
+变,让一些信息有途径进入手册页。
+
+即使在第四步的时候,维护者没有作出回应,也要确认在修改他们的代码的时候
+,一直将维护者拷贝到CC列表中。
+
+对于小的补丁,你也许会CC到 Adrian Bunk 管理的搜集琐碎补丁的邮件列表
+(Trivial Patch Monkey)trivial@kernel.org,那里专门收集琐碎的补丁。下面这样
+的补丁会被看作“琐碎的”补丁:
+ 文档的拼写修正。
+ 修正会影响到 grep(1) 的拼写。
+ 警告信息修正(频繁的打印无用的警告是不好的。)
+ 编译错误修正(代码逻辑的确是对的,只是编译有问题。)
+ 运行时修正(只要真的修正了错误。)
+ 移除使用了被废弃的函数/宏的代码(例如 check_region。)
+ 联系方式和文档修正。
+ 用可移植的代码替换不可移植的代码(即使在体系结构相关的代码中,既然有
+ 人拷贝,只要它是琐碎的)
+ 任何文件的作者/维护者对该文件的改动(例如 patch monkey 在重传模式下)
+
+EMAIL: trivial@kernel.org
+
+(译注,关于“琐碎补丁”的一些说明:因为原文的这一部分写得比较简单,所以不得不
+违例写一下译注。"trivial"这个英文单词的本意是“琐碎的,不重要的。”但是在这里
+有稍微有一些变化,例如对一些明显的NULL指针的修正,属于运行时修正,会被归类
+到琐碎补丁里。虽然NULL指针的修正很重要,但是这样的修正往往很小而且很容易得到
+检验,所以也被归入琐碎补丁。琐碎补丁更精确的归类应该是
+“simple, localized & easy to verify”,也就是说简单的,局部的和易于检验的。
+trivial@kernel.org邮件列表的目的是针对这样的补丁,为提交者提供一个中心,来
+降低提交的门槛。)
+
+6)没有 MIME 编码,没有链接,没有压缩,没有附件,只有纯文本。
+
+Linus 和其他的内核开发者需要阅读和评论你提交的改动。对于内核开发者来说
+,可以“引用”你的改动很重要,使用一般的 e-mail 工具,他们就可以在你的
+代码的任何位置添加评论。
+
+因为这个原因,所有的提交的补丁都是 e-mail 中“内嵌”的。
+警告:如果你使用剪切-粘贴你的补丁,小心你的编辑器的自动换行功能破坏你的
+补丁。
+
+不要将补丁作为 MIME 编码的附件,不管是否压缩。很多流行的 e-mail 软件不
+是任何时候都将 MIME 编码的附件当作纯文本发送的,这会使得别人无法在你的
+代码中加评论。另外,MIME 编码的附件会让 Linus 多花一点时间来处理,这就
+降低了你的改动被接受的可能性。
+
+警告:一些邮件软件,比如 Mozilla 会将你的信息以如下格式发送:
+---- 邮件头 ----
+Content-Type: text/plain; charset=us-ascii; format=flowed
+---- 邮件头 ----
+问题在于 “format=flowed” 会让接收端的某些邮件软件将邮件中的制表符替换
+成空格以及做一些类似的替换。这样,你发送的时候看起来没问题的补丁就被破
+坏了。
+
+要修正这个问题,只需要将你的 mozilla 的 defaults/pref/mailnews.js 文件
+里的
+pref("mailnews.send_plaintext_flowed", false); // RFC 2646=======
+修改成
+pref("mailnews.display.disable_format_flowed_support", true);
+就可以了。
+
+7) e-mail 的大小
+
+给 Linus 发送补丁的时候,永远按照第6小节说的做。
+
+大的改动对邮件列表不合适,对某些维护者也不合适。如果你的补丁,在不压缩
+的情况下,超过了40kB,那么你最好将补丁放在一个能通过 internet 访问的服
+务器上,然后用指向你的补丁的 URL 替代。
+
+8) 指出你的内核版本
+
+在标题和在补丁的描述中,指出补丁对应的内核的版本,是很重要的。
+
+如果补丁不能干净的在最新版本的内核上打上,Linus 是不会接受它的。
+
+9) 不要气馁,继续提交。
+
+当你提交了改动以后,耐心地等待。如果 Linus 喜欢你的改动并且同意它,那么
+它将在下一个内核发布版本中出现。
+
+然而,如果你的改动没有出现在下一个版本的内核中,可能有若干原因。减少那
+些原因,修正错误,重新提交更新后的改动,是你自己的工作。
+
+Linus不给出任何评论就“丢弃”你的补丁是常见的事情。在系统中这样的事情很
+平常。如果他没有接受你的补丁,也许是由于以下原因:
+* 你的补丁不能在最新版本的内核上干净的打上。
+* 你的补丁在 linux-kernel 邮件列表中没有得到充分的讨论。
+* 风格问题(参照第2小节)
+* 邮件格式问题(重读本节)
+* 你的改动有技术问题。
+* 他收到了成吨的 e-mail,而你的在混乱中丢失了。
+* 你让人为难。
+
+有疑问的时候,在 linux-kernel 邮件列表上请求评论。
+
+10) 在标题上加上 PATCH 的字样
+
+Linus 和 linux-kernel 邮件列表的 e-mail 流量都很高,一个通常的约定是标
+题行以 [PATCH] 开头。这样可以让 Linus 和其他内核开发人员可以从 e-mail
+的讨论中很轻易的将补丁分辨出来。
+
+11)为你的工作签名
+
+为了加强对谁做了何事的追踪,尤其是对那些透过好几层的维护者的补丁,我们
+建议在发送出去的补丁上加一个 “sign-off” 的过程。
+
+"sign-off" 是在补丁的注释的最后的简单的一行文字,认证你编写了它或者其他
+人有权力将它作为开放源代码的补丁传递。规则很简单:如果你能认证如下信息
+:
+ 开发者来源证书 1.1
+ 对于本项目的贡献,我认证如下信息:
+ (a)这些贡献是完全或者部分的由我创建,我有权利以文件中指出
+ 的开放源代码许可证提交它;或者
+ (b)这些贡献基于以前的工作,据我所知,这些以前的工作受恰当的开放
+ 源代码许可证保护,而且,根据许可证,我有权提交修改后的贡献,
+ 无论是完全还是部分由我创造,这些贡献都使用同一个开放源代码许可证
+ (除非我被允许用其它的许可证),正如文件中指出的;或者
+ (c)这些贡献由认证(a),(b)或者(c)的人直接提供给我,而
+ 且我没有修改它。
+ (d)我理解并同意这个项目和贡献是公开的,贡献的记录(包括我
+ 一起提交的个人记录,包括 sign-off )被永久维护并且可以和这个项目
+ 或者开放源代码的许可证同步地再发行。
+ 那么加入这样一行:
+ Signed-off-by: Random J Developer <random@developer.example.org>
+
+使用你的真名(抱歉,不能使用假名或者匿名。)
+
+有人在最后加上标签。现在这些东西会被忽略,但是你可以这样做,来标记公司
+内部的过程,或者只是指出关于 sign-off 的一些特殊细节。
+
+12)标准补丁格式
+
+标准的补丁,标题行是:
+ Subject: [PATCH 001/123] 子系统:一句话概述
+
+标准补丁的信体存在如下部分:
+
+ - 一个 "from" 行指出补丁作者。
+
+ - 一个空行
+
+ - 说明的主体,这些说明文字会被拷贝到描述该补丁的永久改动记录里。
+
+ - 一个由"---"构成的标记行
+
+ - 不合适放到改动记录里的额外的注解。
+
+ - 补丁本身(diff 输出)
+
+标题行的格式,使得对标题行按字母序排序非常的容易 - 很多 e-mail 客户端都
+可以支持 - 因为序列号是用零填充的,所以按数字排序和按字母排序是一样的。
+
+e-mail 标题中的“子系统”标识哪个内核子系统将被打补丁。
+
+e-mail 标题中的“一句话概述”扼要的描述 e-mail 中的补丁。“一句话概述”
+不应该是一个文件名。对于一个补丁系列(“补丁系列”指一系列的多个相关补
+丁),不要对每个补丁都使用同样的“一句话概述”。
+
+记住 e-mail 的“一句话概述”会成为该补丁的全局唯一标识。它会蔓延到 git
+的改动记录里。然后“一句话概述”会被用在开发者的讨论里,用来指代这个补
+丁。用户将希望通过 google 来搜索"一句话概述"来找到那些讨论这个补丁的文
+章。
+
+一些标题的例子:
+
+ Subject: [patch 2/5] ext2: improve scalability of bitmap searching
+ Subject: [PATCHv2 001/207] x86: fix eflags tracking
+
+"from" 行是信体里的最上面一行,具有如下格式:
+ From: Original Author <author@example.com>
+
+"from" 行指明在永久改动日志里,谁会被确认为作者。如果没有 "from" 行,那
+么邮件头里的 "From: " 行会被用来决定改动日志中的作者。
+
+说明的主题将会被提交到永久的源代码改动日志里,因此对那些早已经不记得和
+这个补丁相关的讨论细节的有能力的读者来说,是有意义的。
+
+"---" 标记行对于补丁处理工具要找到哪里是改动日志信息的结束,是不可缺少
+的。
+
+对于 "---" 标记之后的额外注解,一个好的用途就是用来写 diffstat,用来显
+示修改了什么文件和每个文件都增加和删除了多少行。diffstat 对于比较大的补
+丁特别有用。其余那些只是和时刻或者开发者相关的注解,不合适放到永久的改
+动日志里的,也应该放这里。
+使用 diffstat的选项 "-p 1 -w 70" 这样文件名就会从内核源代码树的目录开始
+,不会占用太宽的空间(很容易适合80列的宽度,也许会有一些缩进。)
+
+在后面的参考资料中能看到适当的补丁格式的更多细节。
+
+-------------------------------
+第二节 提示,建议和诀窍
+-------------------------------
+
+本节包含很多和提交到内核的代码有关的通常的"规则"。事情永远有例外...但是
+你必须真的有好的理由这样做。你可以把本节叫做Linus的计算机科学入门课。
+
+1) 读 Document/CodingStyle
+
+Nuff 说过,如果你的代码和这个偏离太多,那么它有可能会被拒绝,没有更多的
+审查,没有更多的评价。
+
+2) #ifdef 是丑陋的
+混杂了 ifdef 的代码难以阅读和维护。别这样做。作为替代,将你的 ifdef 放
+在头文件里,有条件地定义 "static inline" 函数,或者宏,在代码里用这些东
+西。让编译器把那些"空操作"优化掉。
+
+一个简单的例子,不好的代码:
+
+ dev = alloc_etherdev (sizeof(struct funky_private));
+ if (!dev)
+ return -ENODEV;
+ #ifdef CONFIG_NET_FUNKINESS
+ init_funky_net(dev);
+ #endif
+
+清理后的例子:
+
+(头文件里)
+ #ifndef CONFIG_NET_FUNKINESS
+ static inline void init_funky_net (struct net_device *d) {}
+ #endif
+
+(代码文件里)
+ dev = alloc_etherdev (sizeof(struct funky_private));
+ if (!dev)
+ return -ENODEV;
+ init_funky_net(dev);
+
+3) 'static inline' 比宏好
+
+Static inline 函数相比宏来说,是好得多的选择。Static inline 函数提供了
+类型安全,没有长度限制,没有格式限制,在 gcc 下开销和宏一样小。
+
+宏只在 static inline 函数不是最优的时候[在 fast paths 里有很少的独立的
+案例],或者不可能用 static inline 函数的时候[例如字符串分配]。
+应该用 'static inline' 而不是 'static __inline__', 'extern inline' 和
+'extern __inline__' 。
+
+4) 不要过度设计
+
+不要试图预计模糊的未来事情,这些事情也许有用也许没有用:"让事情尽可能的
+简单,而不是更简单"。
+
+----------------
+第三节 参考文献
+----------------
+
+Andrew Morton, "The perfect patch" (tpp).
+ <http://www.ozlabs.org/~akpm/stuff/tpp.txt>
+
+Jeff Garzik, "Linux kernel patch submission format".
+ <http://linux.yyz.us/patch-format.html>
+
+Greg Kroah-Hartman, "How to piss off a kernel subsystem maintainer".
+ <http://www.kroah.com/log/2005/03/31/>
+ <http://www.kroah.com/log/2005/07/08/>
+ <http://www.kroah.com/log/2005/10/19/>
+ <http://www.kroah.com/log/2006/01/11/>
+
+NO!!!! No more huge patch bombs to linux-kernel@vger.kernel.org people!
+ <https://lkml.org/lkml/2005/7/11/336>
+
+Kernel Documentation/CodingStyle:
+ <http://sosdg.org/~coywolf/lxr/source/Documentation/CodingStyle>
+
+Linus Torvalds's mail on the canonical patch format:
+ <http://lkml.org/lkml/2005/4/7/183>
+--
diff --git a/Documentation/zh_CN/arm/Booting b/Documentation/zh_CN/arm/Booting
new file mode 100644
index 000000000..6158a64df
--- /dev/null
+++ b/Documentation/zh_CN/arm/Booting
@@ -0,0 +1,175 @@
+Chinese translated version of Documentation/arm/Booting
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Russell King <linux@arm.linux.org.uk>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/arm/Booting 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+英文版维护者: Russell King <linux@arm.linux.org.uk>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+ 启动 ARM Linux
+ ==============
+
+作者:Russell King
+日期:2002年5月18日
+
+以下文档适用于 2.4.18-rmk6 及以上版本。
+
+为了启动 ARM Linux,你需要一个引导装载程序(boot loader),
+它是一个在主内核启动前运行的一个小程序。引导装载程序需要初始化各种
+设备,并最终调用 Linux 内核,将信息传递给内核。
+
+从本质上讲,引导装载程序应提供(至少)以下功能:
+
+1、设置和初始化 RAM。
+2、初始化一个串口。
+3、检测机器的类型(machine type)。
+4、设置内核标签列表(tagged list)。
+5、调用内核映像。
+
+
+1、设置和初始化 RAM
+-------------------
+
+现有的引导加载程序: 强制
+新开发的引导加载程序: 强制
+
+引导装载程序应该找到并初始化系统中所有内核用于保持系统变量数据的 RAM。
+这个操作的执行是设备依赖的。(它可能使用内部算法来自动定位和计算所有
+RAM,或可能使用对这个设备已知的 RAM 信息,还可能使用任何引导装载程序
+设计者想到的匹配方法。)
+
+
+2、初始化一个串口
+-----------------------------
+
+现有的引导加载程序: 可选、建议
+新开发的引导加载程序: 可选、建议
+
+引导加载程序应该初始化并使能一个目标板上的串口。这允许内核串口驱动
+自动检测哪个串口用于内核控制台。(一般用于调试或与目标板通信。)
+
+作为替代方案,引导加载程序也可以通过标签列表传递相关的'console='
+选项给内核以指定某个串口,而串口数据格式的选项在以下文档中描述:
+
+ Documentation/kernel-parameters.txt。
+
+
+3、检测机器类型
+--------------------------
+
+现有的引导加载程序: 可选
+新开发的引导加载程序: 强制
+
+引导加载程序应该通过某些方式检测自身所处的机器类型。这是一个硬件
+代码或通过查看所连接的硬件用某些算法得到,这些超出了本文档的范围。
+引导加载程序最终必须能提供一个 MACH_TYPE_xxx 值给内核。
+(详见 linux/arch/arm/tools/mach-types )。
+
+4、设置启动数据
+------------------
+
+现有的引导加载程序: 可选、强烈建议
+新开发的引导加载程序: 强制
+
+引导加载程序必须提供标签列表或者 dtb 映像以传递配置数据给内核。启动
+数据的物理地址通过寄存器 r2 传递给内核。
+
+4a、设置内核标签列表
+--------------------------------
+
+bootloader 必须创建和初始化内核标签列表。一个有效的标签列表以
+ATAG_CORE 标签开始,并以 ATAG_NONE 标签结束。ATAG_CORE 标签可以是
+空的,也可以是非空。一个空 ATAG_CORE 标签其 size 域设置为
+‘2’(0x00000002)。ATAG_NONE 标签的 size 域必须设置为零。
+
+在列表中可以保存任意数量的标签。对于一个重复的标签是追加到之前标签
+所携带的信息之后,还是会覆盖原来的信息,是未定义的。某些标签的行为
+是前者,其他是后者。
+
+bootloader 必须传递一个系统内存的位置和最小值,以及根文件系统位置。
+因此,最小的标签列表如下所示:
+
+ +-----------+
+基地址 -> | ATAG_CORE | |
+ +-----------+ |
+ | ATAG_MEM | | 地址增长方向
+ +-----------+ |
+ | ATAG_NONE | |
+ +-----------+ v
+
+标签列表应该保存在系统的 RAM 中。
+
+标签列表必须置于内核自解压和 initrd'bootp' 程序都不会覆盖的内存区。
+建议放在 RAM 的头 16KiB 中。
+
+4b、设置设备树
+-------------------------
+
+bootloader 必须以 64bit 地址对齐的形式加载一个设备树映像(dtb)到系统
+RAM 中,并用启动数据初始化它。dtb 格式在文档
+Documentation/devicetree/booting-without-of.txt 中。内核将会在
+dtb 物理地址处查找 dtb 魔数值(0xd00dfeed),以确定 dtb 是否已经代替
+标签列表被传递进来。
+
+bootloader 必须传递一个系统内存的位置和最小值,以及根文件系统位置。
+dtb 必须置于内核自解压不会覆盖的内存区。建议将其放置于 RAM 的头 16KiB
+中。但是不可将其放置于“0”物理地址处,因为内核认为:r2 中为 0,意味着
+没有标签列表和 dtb 传递过来。
+
+5、调用内核映像
+---------------------------
+
+现有的引导加载程序: 强制
+新开发的引导加载程序: 强制
+
+调用内核映像 zImage 有两个选择。如果 zImge 保存在 flash 中,且是为了
+在 flash 中直接运行而被正确链接的。这样引导加载程序就可以在 flash 中
+直接调用 zImage。
+
+zImage 也可以被放在系统 RAM(任意位置)中被调用。注意:内核使用映像
+基地址的前 16KB RAM 空间来保存页表。建议将映像置于 RAM 的 32KB 处。
+
+对于以上任意一种情况,都必须符合以下启动状态:
+
+- 停止所有 DMA 设备,这样内存数据就不会因为虚假网络包或磁盘数据而被破坏。
+ 这可能可以节省你许多的调试时间。
+
+- CPU 寄存器配置
+ r0 = 0,
+ r1 = (在上面 3 中获取的)机器类型码。
+ r2 = 标签列表在系统 RAM 中的物理地址,或
+ 设备树块(dtb)在系统 RAM 中的物理地址
+
+- CPU 模式
+ 所有形式的中断必须被禁止 (IRQs 和 FIQs)
+ CPU 必须处于 SVC 模式。(对于 Angel 调试有特例存在)
+
+- 缓存,MMUs
+ MMU 必须关闭。
+ 指令缓存开启或关闭都可以。
+ 数据缓存必须关闭。
+
+- 引导加载程序应该通过直接跳转到内核映像的第一条指令来调用内核映像。
+
+ 对于支持 ARM 指令集的 CPU,跳入内核入口时必须处在 ARM 状态,即使
+ 对于 Thumb-2 内核也是如此。
+
+ 对于仅支持 Thumb 指令集的 CPU,比如 Cortex-M 系列的 CPU,跳入
+ 内核入口时必须处于 Thumb 状态。
diff --git a/Documentation/zh_CN/arm/kernel_user_helpers.txt b/Documentation/zh_CN/arm/kernel_user_helpers.txt
new file mode 100644
index 000000000..cd7fc8f34
--- /dev/null
+++ b/Documentation/zh_CN/arm/kernel_user_helpers.txt
@@ -0,0 +1,284 @@
+Chinese translated version of Documentation/arm/kernel_user_helpers.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Nicolas Pitre <nicolas.pitre@linaro.org>
+ Dave Martin <dave.martin@linaro.org>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/arm/kernel_user_helpers.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+英文版维护者: Nicolas Pitre <nicolas.pitre@linaro.org>
+ Dave Martin <dave.martin@linaro.org>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 宋冬生 Dongsheng Song <dongshneg.song@gmail.com>
+ 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+内核提供的用户空间辅助代码
+=========================
+
+在内核内存空间的固定地址处,有一个由内核提供并可从用户空间访问的代码
+段。它用于向用户空间提供因在许多 ARM CPU 中未实现的特性和/或指令而需
+内核提供帮助的某些操作。这些代码直接在用户模式下执行的想法是为了获得
+最佳效率,但那些与内核计数器联系过于紧密的部分,则被留给了用户库实现。
+事实上,此代码甚至可能因不同的 CPU 而异,这取决于其可用的指令集或它
+是否为 SMP 系统。换句话说,内核保留在不作出警告的情况下根据需要更改
+这些代码的权利。只有本文档描述的入口及其结果是保证稳定的。
+
+这与完全成熟的 VDSO 实现不同(但两者并不冲突),尽管如此,VDSO 可阻止
+某些通过常量高效跳转到那些代码段的汇编技巧。且由于那些代码段在返回用户
+代码前仅使用少量的代码周期,则一个 VDSO 间接远程调用将会在这些简单的
+操作上增加一个可测量的开销。
+
+在对那些拥有原生支持的新型处理器进行代码优化时,仅在已为其他操作使用
+了类似的新增指令,而导致二进制结果已与早期 ARM 处理器不兼容的情况下,
+用户空间才应绕过这些辅助代码,并在内联函数中实现这些操作(无论是通过
+编译器在代码中直接放置,还是作为库函数调用实现的一部分)。也就是说,
+如果你编译的代码不会为了其他目的使用新指令,则不要仅为了避免使用这些
+内核辅助代码,导致二进制程序无法在早期处理器上运行。
+
+新的辅助代码可能随着时间的推移而增加,所以新内核中的某些辅助代码在旧
+内核中可能不存在。因此,程序必须在对任何辅助代码调用假设是安全之前,
+检测 __kuser_helper_version 的值(见下文)。理想情况下,这种检测应该
+只在进程启动时执行一次;如果内核版本不支持所需辅助代码,则该进程可尽早
+中止执行。
+
+kuser_helper_version
+--------------------
+
+位置: 0xffff0ffc
+
+参考声明:
+
+ extern int32_t __kuser_helper_version;
+
+定义:
+
+ 这个区域包含了当前运行内核实现的辅助代码版本号。用户空间可以通过读
+ 取此版本号以确定特定的辅助代码是否存在。
+
+使用范例:
+
+#define __kuser_helper_version (*(int32_t *)0xffff0ffc)
+
+void check_kuser_version(void)
+{
+ if (__kuser_helper_version < 2) {
+ fprintf(stderr, "can't do atomic operations, kernel too old\n");
+ abort();
+ }
+}
+
+注意:
+
+ 用户空间可以假设这个域的值不会在任何单个进程的生存期内改变。也就
+ 是说,这个域可以仅在库的初始化阶段或进程启动阶段读取一次。
+
+kuser_get_tls
+-------------
+
+位置: 0xffff0fe0
+
+参考原型:
+
+ void * __kuser_get_tls(void);
+
+输入:
+
+ lr = 返回地址
+
+输出:
+
+ r0 = TLS 值
+
+被篡改的寄存器:
+
+ 无
+
+定义:
+
+ 获取之前通过 __ARM_NR_set_tls 系统调用设置的 TLS 值。
+
+使用范例:
+
+typedef void * (__kuser_get_tls_t)(void);
+#define __kuser_get_tls (*(__kuser_get_tls_t *)0xffff0fe0)
+
+void foo()
+{
+ void *tls = __kuser_get_tls();
+ printf("TLS = %p\n", tls);
+}
+
+注意:
+
+ - 仅在 __kuser_helper_version >= 1 时,此辅助代码存在
+ (从内核版本 2.6.12 开始)。
+
+kuser_cmpxchg
+-------------
+
+位置: 0xffff0fc0
+
+参考原型:
+
+ int __kuser_cmpxchg(int32_t oldval, int32_t newval, volatile int32_t *ptr);
+
+输入:
+
+ r0 = oldval
+ r1 = newval
+ r2 = ptr
+ lr = 返回地址
+
+输出:
+
+ r0 = 成功代码 (零或非零)
+ C flag = 如果 r0 == 0 则置 1,如果 r0 != 0 则清零。
+
+被篡改的寄存器:
+
+ r3, ip, flags
+
+定义:
+
+ 仅在 *ptr 为 oldval 时原子保存 newval 于 *ptr 中。
+ 如果 *ptr 被改变,则返回值为零,否则为非零值。
+ 如果 *ptr 被改变,则 C flag 也会被置 1,以实现调用代码中的汇编
+ 优化。
+
+使用范例:
+
+typedef int (__kuser_cmpxchg_t)(int oldval, int newval, volatile int *ptr);
+#define __kuser_cmpxchg (*(__kuser_cmpxchg_t *)0xffff0fc0)
+
+int atomic_add(volatile int *ptr, int val)
+{
+ int old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg(old, new, ptr));
+
+ return new;
+}
+
+注意:
+
+ - 这个例程已根据需要包含了内存屏障。
+
+ - 仅在 __kuser_helper_version >= 2 时,此辅助代码存在
+ (从内核版本 2.6.12 开始)。
+
+kuser_memory_barrier
+--------------------
+
+位置: 0xffff0fa0
+
+参考原型:
+
+ void __kuser_memory_barrier(void);
+
+输入:
+
+ lr = 返回地址
+
+输出:
+
+ 无
+
+被篡改的寄存器:
+
+ 无
+
+定义:
+
+ 应用于任何需要内存屏障以防止手动数据修改带来的一致性问题,以及
+ __kuser_cmpxchg 中。
+
+使用范例:
+
+typedef void (__kuser_dmb_t)(void);
+#define __kuser_dmb (*(__kuser_dmb_t *)0xffff0fa0)
+
+注意:
+
+ - 仅在 __kuser_helper_version >= 3 时,此辅助代码存在
+ (从内核版本 2.6.15 开始)。
+
+kuser_cmpxchg64
+---------------
+
+位置: 0xffff0f60
+
+参考原型:
+
+ int __kuser_cmpxchg64(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+
+输入:
+
+ r0 = 指向 oldval
+ r1 = 指向 newval
+ r2 = 指向目标值
+ lr = 返回地址
+
+输出:
+
+ r0 = 成功代码 (零或非零)
+ C flag = 如果 r0 == 0 则置 1,如果 r0 != 0 则清零。
+
+被篡改的寄存器:
+
+ r3, lr, flags
+
+定义:
+
+ 仅在 *ptr 等于 *oldval 指向的 64 位值时,原子保存 *newval
+ 指向的 64 位值于 *ptr 中。如果 *ptr 被改变,则返回值为零,
+ 否则为非零值。
+
+ 如果 *ptr 被改变,则 C flag 也会被置 1,以实现调用代码中的汇编
+ 优化。
+
+使用范例:
+
+typedef int (__kuser_cmpxchg64_t)(const int64_t *oldval,
+ const int64_t *newval,
+ volatile int64_t *ptr);
+#define __kuser_cmpxchg64 (*(__kuser_cmpxchg64_t *)0xffff0f60)
+
+int64_t atomic_add64(volatile int64_t *ptr, int64_t val)
+{
+ int64_t old, new;
+
+ do {
+ old = *ptr;
+ new = old + val;
+ } while(__kuser_cmpxchg64(&old, &new, ptr));
+
+ return new;
+}
+
+注意:
+
+ - 这个例程已根据需要包含了内存屏障。
+
+ - 由于这个过程的代码长度(此辅助代码跨越 2 个常规的 kuser “槽”),
+ 因此 0xffff0f80 不被作为有效的入口点。
+
+ - 仅在 __kuser_helper_version >= 5 时,此辅助代码存在
+ (从内核版本 3.1 开始)。
diff --git a/Documentation/zh_CN/arm64/booting.txt b/Documentation/zh_CN/arm64/booting.txt
new file mode 100644
index 000000000..7cd36af11
--- /dev/null
+++ b/Documentation/zh_CN/arm64/booting.txt
@@ -0,0 +1,219 @@
+Chinese translated version of Documentation/arm64/booting.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Will Deacon <will.deacon@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+---------------------------------------------------------------------
+Documentation/arm64/booting.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+本文翻译提交时的 Git 检出点为: bc465aa9d045feb0e13b4a8f32cc33c1943f62d6
+
+英文版维护者: Will Deacon <will.deacon@arm.com>
+中文版维护者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版翻译者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版校译者: 傅炜 Fu Wei <wefu@redhat.com>
+
+以下为正文
+---------------------------------------------------------------------
+ 启动 AArch64 Linux
+ ==================
+
+作者: Will Deacon <will.deacon@arm.com>
+日期: 2012 年 09 月 07 日
+
+本文档基于 Russell King 的 ARM 启动文档,且适用于所有公开发布的
+AArch64 Linux 内核代码。
+
+AArch64 异常模型由多个异常级别(EL0 - EL3)组成,对于 EL0 和 EL1
+异常级有对应的安全和非安全模式。EL2 是系统管理级,且仅存在于
+非安全模式下。EL3 是最高特权级,且仅存在于安全模式下。
+
+基于本文档的目的,我们将简单地使用‘引导装载程序’(‘boot loader’)
+这个术语来定义在将控制权交给 Linux 内核前 CPU 上执行的所有软件。
+这可能包含安全监控和系统管理代码,或者它可能只是一些用于准备最小启动
+环境的指令。
+
+基本上,引导装载程序(至少)应实现以下操作:
+
+1、设置和初始化 RAM
+2、设置设备树数据
+3、解压内核映像
+4、调用内核映像
+
+
+1、设置和初始化 RAM
+-----------------
+
+必要性: 强制
+
+引导装载程序应该找到并初始化系统中所有内核用于保持系统变量数据的 RAM。
+这个操作的执行是设备依赖的。(它可能使用内部算法来自动定位和计算所有
+RAM,或可能使用对这个设备已知的 RAM 信息,还可能使用任何引导装载程序
+设计者想到的匹配方法。)
+
+
+2、设置设备树数据
+---------------
+
+必要性: 强制
+
+设备树数据块(dtb)必须 8 字节对齐,并位于从内核映像起始算起第一个 512MB
+内,且不得跨越 2MB 对齐边界。这使得内核可以通过初始页表中的单个节描述符来
+映射此数据块。
+
+
+3、解压内核映像
+-------------
+
+必要性: 可选
+
+AArch64 内核当前没有提供自解压代码,因此如果使用了压缩内核映像文件
+(比如 Image.gz),则需要通过引导装载程序(使用 gzip 等)来进行解压。
+若引导装载程序没有实现这个需求,就要使用非压缩内核映像文件。
+
+
+4、调用内核映像
+-------------
+
+必要性: 强制
+
+已解压的内核映像包含一个 64 字节的头,内容如下:
+
+ u32 code0; /* 可执行代码 */
+ u32 code1; /* 可执行代码 */
+ u64 text_offset; /* 映像装载偏移,小端模式 */
+ u64 image_size; /* 映像实际大小, 小端模式 */
+ u64 flags; /* 内核旗标, 小端模式 *
+ u64 res2 = 0; /* 保留 */
+ u64 res3 = 0; /* 保留 */
+ u64 res4 = 0; /* 保留 */
+ u32 magic = 0x644d5241; /* 魔数, 小端, "ARM\x64" */
+ u32 res5; /* 保留 (用于 PE COFF 偏移) */
+
+
+映像头注释:
+
+- 自 v3.17 起,除非另有说明,所有域都是小端模式。
+
+- code0/code1 负责跳转到 stext.
+
+- 当通过 EFI 启动时, 最初 code0/code1 被跳过。
+ res5 是到 PE 文件头的偏移,而 PE 文件头含有 EFI 的启动入口点 (efi_stub_entry)。
+ 当 stub 代码完成了它的使命,它会跳转到 code0 继续正常的启动流程。
+
+- v3.17 之前,未明确指定 text_offset 的字节序。此时,image_size 为零,
+ 且 text_offset 依照内核字节序为 0x80000。
+ 当 image_size 非零,text_offset 为小端模式且是有效值,应被引导加载程序使用。
+ 当 image_size 为零,text_offset 可假定为 0x80000。
+
+- flags 域 (v3.17 引入) 为 64 位小端模式,其编码如下:
+ 位 0: 内核字节序。 1 表示大端模式,0 表示小端模式。
+ 位 1-63: 保留。
+
+- 当 image_size 为零时,引导装载程序应该试图在内核映像末尾之后尽可能多地保留空闲内存
+ 供内核直接使用。对内存空间的需求量因所选定的内核特性而异, 且无实际限制。
+
+内核映像必须被放置在靠近可用系统内存起始的 2MB 对齐为基址的 text_offset 字节处,并从那里被调用。
+当前,对 Linux 来说在此基址以下的内存是无法使用的,因此强烈建议将系统内存的起始作为这个基址。
+从映像起始地址算起,最少必须为内核释放出 image_size 字节的空间。
+
+任何提供给内核的内存(甚至在 2MB 对齐的基地址之前),若未从内核中标记为保留
+(如在设备树(dtb)的 memreserve 区域),都将被认为对内核是可用。
+
+在跳转入内核前,必须符合以下状态:
+
+- 停止所有 DMA 设备,这样内存数据就不会因为虚假网络包或磁盘数据而
+ 被破坏。这可能可以节省你许多的调试时间。
+
+- 主 CPU 通用寄存器设置
+ x0 = 系统 RAM 中设备树数据块(dtb)的物理地址。
+ x1 = 0 (保留,将来可能使用)
+ x2 = 0 (保留,将来可能使用)
+ x3 = 0 (保留,将来可能使用)
+
+- CPU 模式
+ 所有形式的中断必须在 PSTATE.DAIF 中被屏蔽(Debug、SError、IRQ
+ 和 FIQ)。
+ CPU 必须处于 EL2(推荐,可访问虚拟化扩展)或非安全 EL1 模式下。
+
+- 高速缓存、MMU
+ MMU 必须关闭。
+ 指令缓存开启或关闭都可以。
+ 已载入的内核映像的相应内存区必须被清理,以达到缓存一致性点(PoC)。
+ 当存在系统缓存或其他使能缓存的一致性主控器时,通常需使用虚拟地址维护其缓存,而非 set/way 操作。
+ 遵从通过虚拟地址操作维护构架缓存的系统缓存必须被配置,并可以被使能。
+ 而不通过虚拟地址操作维护构架缓存的系统缓存(不推荐),必须被配置且禁用。
+
+ *译者注:对于 PoC 以及缓存相关内容,请参考 ARMv8 构架参考手册 ARM DDI 0487A
+
+- 架构计时器
+ CNTFRQ 必须设定为计时器的频率,且 CNTVOFF 必须设定为对所有 CPU
+ 都一致的值。如果在 EL1 模式下进入内核,则 CNTHCTL_EL2 中的
+ EL1PCTEN (bit 0) 必须置位。
+
+- 一致性
+ 通过内核启动的所有 CPU 在内核入口地址上必须处于相同的一致性域中。
+ 这可能要根据具体实现来定义初始化过程,以使能每个CPU上对维护操作的
+ 接收。
+
+- 系统寄存器
+ 在进入内核映像的异常级中,所有构架中可写的系统寄存器必须通过软件
+ 在一个更高的异常级别下初始化,以防止在 未知 状态下运行。
+
+ 对于拥有 GICv3 中断控制器的系统:
+ - 若当前在 EL3 :
+ ICC_SRE_EL3.Enable (位 3) 必须初始化为 0b1。
+ ICC_SRE_EL3.SRE (位 0) 必须初始化为 0b1。
+ - 若内核运行在 EL1:
+ ICC_SRE_EL2.Enable (位 3) 必须初始化为 0b1。
+ ICC_SRE_EL2.SRE (位 0) 必须初始化为 0b1。
+
+以上对于 CPU 模式、高速缓存、MMU、架构计时器、一致性、系统寄存器的
+必要条件描述适用于所有 CPU。所有 CPU 必须在同一异常级别跳入内核。
+
+引导装载程序必须在每个 CPU 处于以下状态时跳入内核入口:
+
+- 主 CPU 必须直接跳入内核映像的第一条指令。通过此 CPU 传递的设备树
+ 数据块必须在每个 CPU 节点中包含一个 ‘enable-method’ 属性,所
+ 支持的 enable-method 请见下文。
+
+ 引导装载程序必须生成这些设备树属性,并在跳入内核入口之前将其插入
+ 数据块。
+
+- enable-method 为 “spin-table” 的 CPU 必须在它们的 CPU
+ 节点中包含一个 ‘cpu-release-addr’ 属性。这个属性标识了一个
+ 64 位自然对齐且初始化为零的内存位置。
+
+ 这些 CPU 必须在内存保留区(通过设备树中的 /memreserve/ 域传递
+ 给内核)中自旋于内核之外,轮询它们的 cpu-release-addr 位置(必须
+ 包含在保留区中)。可通过插入 wfe 指令来降低忙循环开销,而主 CPU 将
+ 发出 sev 指令。当对 cpu-release-addr 所指位置的读取操作返回非零值
+ 时,CPU 必须跳入此值所指向的地址。此值为一个单独的 64 位小端值,
+ 因此 CPU 须在跳转前将所读取的值转换为其本身的端模式。
+
+- enable-method 为 “psci” 的 CPU 保持在内核外(比如,在
+ memory 节点中描述为内核空间的内存区外,或在通过设备树 /memreserve/
+ 域中描述为内核保留区的空间中)。内核将会发起在 ARM 文档(编号
+ ARM DEN 0022A:用于 ARM 上的电源状态协调接口系统软件)中描述的
+ CPU_ON 调用来将 CPU 带入内核。
+
+ *译者注: ARM DEN 0022A 已更新到 ARM DEN 0022C。
+
+ 设备树必须包含一个 ‘psci’ 节点,请参考以下文档:
+ Documentation/devicetree/bindings/arm/psci.txt
+
+
+- 辅助 CPU 通用寄存器设置
+ x0 = 0 (保留,将来可能使用)
+ x1 = 0 (保留,将来可能使用)
+ x2 = 0 (保留,将来可能使用)
+ x3 = 0 (保留,将来可能使用)
diff --git a/Documentation/zh_CN/arm64/legacy_instructions.txt b/Documentation/zh_CN/arm64/legacy_instructions.txt
new file mode 100644
index 000000000..68362a1ab
--- /dev/null
+++ b/Documentation/zh_CN/arm64/legacy_instructions.txt
@@ -0,0 +1,72 @@
+Chinese translated version of Documentation/arm64/legacy_instructions.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Punit Agrawal <punit.agrawal@arm.com>
+ Suzuki K. Poulose <suzuki.poulose@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+---------------------------------------------------------------------
+Documentation/arm64/legacy_instructions.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+本文翻译提交时的 Git 检出点为: bc465aa9d045feb0e13b4a8f32cc33c1943f62d6
+
+英文版维护者: Punit Agrawal <punit.agrawal@arm.com>
+ Suzuki K. Poulose <suzuki.poulose@arm.com>
+中文版维护者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版翻译者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版校译者: 傅炜 Fu Wei <wefu@redhat.com>
+
+以下为正文
+---------------------------------------------------------------------
+Linux 内核在 arm64 上的移植提供了一个基础框架,以支持构架中正在被淘汰或已废弃指令的模拟执行。
+这个基础框架的代码使用未定义指令钩子(hooks)来支持模拟。如果指令存在,它也允许在硬件中启用该指令。
+
+模拟模式可通过写 sysctl 节点(/proc/sys/abi)来控制。
+不同的执行方式及 sysctl 节点的相应值,解释如下:
+
+* Undef(未定义)
+ 值: 0
+ 产生未定义指令终止异常。它是那些构架中已废弃的指令,如 SWP,的默认处理方式。
+
+* Emulate(模拟)
+ 值: 1
+ 使用软件模拟方式。为解决软件迁移问题,这种模拟指令模式的使用是被跟踪的,并会发出速率限制警告。
+ 它是那些构架中正在被淘汰的指令,如 CP15 barriers(隔离指令),的默认处理方式。
+
+* Hardware Execution(硬件执行)
+ 值: 2
+ 虽然标记为正在被淘汰,但一些实现可能提供硬件执行这些指令的使能/禁用操作。
+ 使用硬件执行一般会有更好的性能,但将无法收集运行时对正被淘汰指令的使用统计数据。
+
+默认执行模式依赖于指令在构架中状态。正在被淘汰的指令应该以模拟(Emulate)作为默认模式,
+而已废弃的指令必须默认使用未定义(Undef)模式
+
+注意:指令模拟可能无法应对所有情况。更多详情请参考单独的指令注释。
+
+受支持的遗留指令
+-------------
+* SWP{B}
+节点: /proc/sys/abi/swp
+状态: 已废弃
+默认执行方式: Undef (0)
+
+* CP15 Barriers
+节点: /proc/sys/abi/cp15_barrier
+状态: 正被淘汰,不推荐使用
+默认执行方式: Emulate (1)
+
+* SETEND
+节点: /proc/sys/abi/setend
+状态: 正被淘汰,不推荐使用
+默认执行方式: Emulate (1)*
+注:为了使能这个特性,系统中的所有 CPU 必须在 EL0 支持混合字节序。
+如果一个新的 CPU (不支持混合字节序) 在使能这个特性后被热插入系统,
+在应用中可能会出现不可预期的结果。
diff --git a/Documentation/zh_CN/arm64/memory.txt b/Documentation/zh_CN/arm64/memory.txt
new file mode 100644
index 000000000..19b3a52d5
--- /dev/null
+++ b/Documentation/zh_CN/arm64/memory.txt
@@ -0,0 +1,114 @@
+Chinese translated version of Documentation/arm64/memory.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Catalin Marinas <catalin.marinas@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+---------------------------------------------------------------------
+Documentation/arm64/memory.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+本文翻译提交时的 Git 检出点为: bc465aa9d045feb0e13b4a8f32cc33c1943f62d6
+
+英文版维护者: Catalin Marinas <catalin.marinas@arm.com>
+中文版维护者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版翻译者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版校译者: 傅炜 Fu Wei <wefu@redhat.com>
+
+以下为正文
+---------------------------------------------------------------------
+ Linux 在 AArch64 中的内存布局
+ ===========================
+
+作者: Catalin Marinas <catalin.marinas@arm.com>
+
+本文档描述 AArch64 Linux 内核所使用的虚拟内存布局。此构架可以实现
+页大小为 4KB 的 4 级转换表和页大小为 64KB 的 3 级转换表。
+
+AArch64 Linux 使用 3 级或 4 级转换表,其页大小配置为 4KB,对于用户和内核
+分别都有 39-bit (512GB) 或 48-bit (256TB) 的虚拟地址空间。
+对于页大小为 64KB的配置,仅使用 2 级转换表,有 42-bit (4TB) 的虚拟地址空间,但内存布局相同。
+
+用户地址空间的 63:48 位为 0,而内核地址空间的相应位为 1。TTBRx 的
+选择由虚拟地址的 63 位给出。swapper_pg_dir 仅包含内核(全局)映射,
+而用户 pgd 仅包含用户(非全局)映射。swapper_pg_dir 地址被写入
+TTBR1 中,且从不写入 TTBR0。
+
+
+AArch64 Linux 在页大小为 4KB,并使用 3 级转换表时的内存布局:
+
+起始地址 结束地址 大小 用途
+-----------------------------------------------------------------------
+0000000000000000 0000007fffffffff 512GB 用户空间
+ffffff8000000000 ffffffffffffffff 512GB 内核空间
+
+
+AArch64 Linux 在页大小为 4KB,并使用 4 级转换表时的内存布局:
+
+起始地址 结束地址 大小 用途
+-----------------------------------------------------------------------
+0000000000000000 0000ffffffffffff 256TB 用户空间
+ffff000000000000 ffffffffffffffff 256TB 内核空间
+
+
+AArch64 Linux 在页大小为 64KB,并使用 2 级转换表时的内存布局:
+
+起始地址 结束地址 大小 用途
+-----------------------------------------------------------------------
+0000000000000000 000003ffffffffff 4TB 用户空间
+fffffc0000000000 ffffffffffffffff 4TB 内核空间
+
+
+AArch64 Linux 在页大小为 64KB,并使用 3 级转换表时的内存布局:
+
+起始地址 结束地址 大小 用途
+-----------------------------------------------------------------------
+0000000000000000 0000ffffffffffff 256TB 用户空间
+ffff000000000000 ffffffffffffffff 256TB 内核空间
+
+
+更详细的内核虚拟内存布局,请参阅内核启动信息。
+
+
+4KB 页大小的转换表查找:
+
++--------+--------+--------+--------+--------+--------+--------+--------+
+|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
++--------+--------+--------+--------+--------+--------+--------+--------+
+ | | | | | |
+ | | | | | v
+ | | | | | [11:0] 页内偏移
+ | | | | +-> [20:12] L3 索引
+ | | | +-----------> [29:21] L2 索引
+ | | +---------------------> [38:30] L1 索引
+ | +-------------------------------> [47:39] L0 索引
+ +-------------------------------------------------> [63] TTBR0/1
+
+
+64KB 页大小的转换表查找:
+
++--------+--------+--------+--------+--------+--------+--------+--------+
+|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0|
++--------+--------+--------+--------+--------+--------+--------+--------+
+ | | | | |
+ | | | | v
+ | | | | [15:0] 页内偏移
+ | | | +----------> [28:16] L3 索引
+ | | +--------------------------> [41:29] L2 索引
+ | +-------------------------------> [47:42] L1 索引
+ +-------------------------------------------------> [63] TTBR0/1
+
+
+当使用 KVM 时, 管理程序(hypervisor)在 EL2 中通过相对内核虚拟地址的
+一个固定偏移来映射内核页(内核虚拟地址的高 24 位设为零):
+
+起始地址 结束地址 大小 用途
+-----------------------------------------------------------------------
+0000004000000000 0000007fffffffff 256GB 在 HYP 中映射的内核对象
diff --git a/Documentation/zh_CN/arm64/tagged-pointers.txt b/Documentation/zh_CN/arm64/tagged-pointers.txt
new file mode 100644
index 000000000..2664d1bd5
--- /dev/null
+++ b/Documentation/zh_CN/arm64/tagged-pointers.txt
@@ -0,0 +1,52 @@
+Chinese translated version of Documentation/arm64/tagged-pointers.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Will Deacon <will.deacon@arm.com>
+Chinese maintainer: Fu Wei <wefu@redhat.com>
+---------------------------------------------------------------------
+Documentation/arm64/tagged-pointers.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+英文版维护者: Will Deacon <will.deacon@arm.com>
+中文版维护者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版翻译者: 傅炜 Fu Wei <wefu@redhat.com>
+中文版校译者: 傅炜 Fu Wei <wefu@redhat.com>
+
+以下为正文
+---------------------------------------------------------------------
+ Linux 在 AArch64 中带标记的虚拟地址
+ =================================
+
+作者: Will Deacon <will.deacon@arm.com>
+日期: 2013 年 06 月 12 日
+
+本文档简述了在 AArch64 地址转换系统中提供的带标记的虚拟地址及其在
+AArch64 Linux 中的潜在用途。
+
+内核提供的地址转换表配置使通过 TTBR0 完成的虚拟地址转换(即用户空间
+映射),其虚拟地址的最高 8 位(63:56)会被转换硬件所忽略。这种机制
+让这些位可供应用程序自由使用,其注意事项如下:
+
+ (1) 内核要求所有传递到 EL1 的用户空间地址带有 0x00 标记。
+ 这意味着任何携带用户空间虚拟地址的系统调用(syscall)
+ 参数 *必须* 在陷入内核前使它们的最高字节被清零。
+
+ (2) 非零标记在传递信号时不被保存。这意味着在应用程序中利用了
+ 标记的信号处理函数无法依赖 siginfo_t 的用户空间虚拟
+ 地址所携带的包含其内部域信息的标记。此规则的一个例外是
+ 当信号是在调试观察点的异常处理程序中产生的,此时标记的
+ 信息将被保存。
+
+ (3) 当使用带标记的指针时需特别留心,因为仅对两个虚拟地址
+ 的高字节,C 编译器很可能无法判断它们是不同的。
+
+此构架会阻止对带标记的 PC 指针的利用,因此在异常返回时,其高字节
+将被设置成一个为 “55” 的扩展符。
diff --git a/Documentation/zh_CN/basic_profiling.txt b/Documentation/zh_CN/basic_profiling.txt
new file mode 100644
index 000000000..1e6bf0bdf
--- /dev/null
+++ b/Documentation/zh_CN/basic_profiling.txt
@@ -0,0 +1,71 @@
+Chinese translated version of Documentation/basic_profiling
+
+If you have any comment or update to the content, please post to LKML directly.
+However, if you have problem communicating in English you can also ask the
+Chinese maintainer for help. Contact the Chinese maintainer, if this
+translation is outdated or there is problem with translation.
+
+Chinese maintainer: Liang Xie <xieliang@xiaomi.com>
+---------------------------------------------------------------------
+Documentation/basic_profiling的中文翻译
+
+如果想评论或更新本文的内容,请直接发信到LKML。如果你使用英文交流有困难的话,也可
+以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题,请联系中文版维护者。
+
+中文版维护者: 谢良 Liang Xie <xieliang007@gmail.com>
+中文版翻译者: 谢良 Liang Xie <xieliang007@gmail.com>
+中文版校译者:
+以下为正文
+---------------------------------------------------------------------
+
+下面这些说明指令都是非常基础的,如果你想进一步了解请阅读相关专业文档:)
+请不要再在本文档增加新的内容,但可以修复文档中的错误:)(mbligh@aracnet.com)
+感谢John Levon,Dave Hansen等在撰写时的帮助
+
+<test> 用于表示要测量的目标
+请先确保您已经有正确的System.map / vmlinux配置!
+
+对于linux系统来说,配置vmlinuz最容易的方法可能就是使用“make install”,然后修改
+/sbin/installkernel将vmlinux拷贝到/boot目录,而System.map通常是默认安装好的
+
+Readprofile
+-----------
+2.6系列内核需要版本相对较新的readprofile,比如util-linux 2.12a中包含的,可以从:
+
+http://www.kernel.org/pub/linux/utils/util-linux/ 下载
+
+大部分linux发行版已经包含了.
+
+启用readprofile需要在kernel启动命令行增加”profile=2“
+
+clear readprofile -r
+ <test>
+dump output readprofile -m /boot/System.map > captured_profile
+
+Oprofile
+--------
+
+从http://oprofile.sourceforge.net/获取源代码(请参考Changes以获取匹配的版本)
+在kernel启动命令行增加“idle=poll”
+
+配置CONFIG_PROFILING=y和CONFIG_OPROFILE=y然后重启进入新kernel
+
+./configure --with-kernel-support
+make install
+
+想得到好的测量结果,请确保启用了本地APIC特性。如果opreport显示有0Hz CPU,
+说明APIC特性没有开启。另外注意idle=poll选项可能有损性能。
+
+One time setup:
+ opcontrol --setup --vmlinux=/boot/vmlinux
+
+clear opcontrol --reset
+start opcontrol --start
+ <test>
+stop opcontrol --stop
+dump output opreport > output_file
+
+如果只看kernel相关的报告结果,请运行命令 opreport -l /boot/vmlinux > output_file
+
+通过reset选项可以清理过期统计数据,相当于重启的效果。
+
diff --git a/Documentation/zh_CN/email-clients.txt b/Documentation/zh_CN/email-clients.txt
new file mode 100644
index 000000000..b9a1a3e6c
--- /dev/null
+++ b/Documentation/zh_CN/email-clients.txt
@@ -0,0 +1,210 @@
+Chinese translated version of Documentation/email-clients.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
+---------------------------------------------------------------------
+Documentation/email-clients.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 贾威威 Harry Wei <harryxiyou@gmail.com>
+中文版翻译者: 贾威威 Harry Wei <harryxiyou@gmail.com>
+中文版校译者: Yinglin Luan <synmyth@gmail.com>
+ Xiaochen Wang <wangxiaochen0@gmail.com>
+ yaxinsn <yaxinsn@163.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+Linux邮件客户端配置信息
+======================================================================
+
+普通配置
+----------------------------------------------------------------------
+Linux内核补丁是通过邮件被提交的,最好把补丁作为邮件体的内嵌文本。有些维护者
+接收附件,但是附件的内容格式应该是"text/plain"。然而,附件一般是不赞成的,
+因为这会使补丁的引用部分在评论过程中变的很困难。
+
+用来发送Linux内核补丁的邮件客户端在发送补丁时应该处于文本的原始状态。例如,
+他们不能改变或者删除制表符或者空格,甚至是在每一行的开头或者结尾。
+
+不要通过"format=flowed"模式发送补丁。这样会引起不可预期以及有害的断行。
+
+不要让你的邮件客户端进行自动换行。这样也会破坏你的补丁。
+
+邮件客户端不能改变文本的字符集编码方式。要发送的补丁只能是ASCII或者UTF-8编码方式,
+如果你使用UTF-8编码方式发送邮件,那么你将会避免一些可能发生的字符集问题。
+
+邮件客户端应该形成并且保持 References: 或者 In-Reply-To: 标题,那么
+邮件话题就不会中断。
+
+复制粘帖(或者剪贴粘帖)通常不能用于补丁,因为制表符会转换为空格。使用xclipboard, xclip
+或者xcutsel也许可以,但是最好测试一下或者避免使用复制粘帖。
+
+不要在使用PGP/GPG署名的邮件中包含补丁。这样会使得很多脚本不能读取和适用于你的补丁。
+(这个问题应该是可以修复的)
+
+在给内核邮件列表发送补丁之前,给自己发送一个补丁是个不错的主意,保存接收到的
+邮件,将补丁用'patch'命令打上,如果成功了,再给内核邮件列表发送。
+
+
+一些邮件客户端提示
+----------------------------------------------------------------------
+这里给出一些详细的MUA配置提示,可以用于给Linux内核发送补丁。这些并不意味是
+所有的软件包配置总结。
+
+说明:
+TUI = 以文本为基础的用户接口
+GUI = 图形界面用户接口
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Alpine (TUI)
+
+配置选项:
+在"Sending Preferences"部分:
+
+- "Do Not Send Flowed Text"必须开启
+- "Strip Whitespace Before Sending"必须关闭
+
+当写邮件时,光标应该放在补丁会出现的地方,然后按下CTRL-R组合键,使指定的
+补丁文件嵌入到邮件中。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Evolution (GUI)
+
+一些开发者成功的使用它发送补丁
+
+当选择邮件选项:Preformat
+ 从Format->Heading->Preformatted (Ctrl-7)或者工具栏
+
+然后使用:
+ Insert->Text File... (Alt-n x)插入补丁文件。
+
+你还可以"diff -Nru old.c new.c | xclip",选择Preformat,然后使用中间键进行粘帖。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Kmail (GUI)
+
+一些开发者成功的使用它发送补丁。
+
+默认设置不为HTML格式是合适的;不要启用它。
+
+当书写一封邮件的时候,在选项下面不要选择自动换行。唯一的缺点就是你在邮件中输入的任何文本
+都不会被自动换行,因此你必须在发送补丁之前手动换行。最简单的方法就是启用自动换行来书写邮件,
+然后把它保存为草稿。一旦你在草稿中再次打开它,它已经全部自动换行了,那么你的邮件虽然没有
+选择自动换行,但是还不会失去已有的自动换行。
+
+在邮件的底部,插入补丁之前,放上常用的补丁定界符:三个连字号(---)。
+
+然后在"Message"菜单条目,选择插入文件,接着选取你的补丁文件。还有一个额外的选项,你可以
+通过它配置你的邮件建立工具栏菜单,还可以带上"insert file"图标。
+
+你可以安全地通过GPG标记附件,但是内嵌补丁最好不要使用GPG标记它们。作为内嵌文本的签发补丁,
+当从GPG中提取7位编码时会使他们变的更加复杂。
+
+如果你非要以附件的形式发送补丁,那么就右键点击附件,然后选中属性,突出"Suggest automatic
+display",这样内嵌附件更容易让读者看到。
+
+当你要保存将要发送的内嵌文本补丁,你可以从消息列表窗格选择包含补丁的邮件,然后右击选择
+"save as"。你可以使用一个没有更改的包含补丁的邮件,如果它是以正确的形式组成。当你正真在它
+自己的窗口之下察看,那时没有选项可以保存邮件--已经有一个这样的bug被汇报到了kmail的bugzilla
+并且希望这将会被处理。邮件是以只针对某个用户可读写的权限被保存的,所以如果你想把邮件复制到其他地方,
+你不得不把他们的权限改为组或者整体可读。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Lotus Notes (GUI)
+
+不要使用它。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Mutt (TUI)
+
+很多Linux开发人员使用mutt客户端,所以证明它肯定工作的非常漂亮。
+
+Mutt不自带编辑器,所以不管你使用什么编辑器都不应该带有自动断行。大多数编辑器都带有
+一个"insert file"选项,它可以通过不改变文件内容的方式插入文件。
+
+'vim'作为mutt的编辑器:
+ set editor="vi"
+
+ 如果使用xclip,敲入以下命令
+ :set paste
+ 按中键之前或者shift-insert或者使用
+ :r filename
+
+如果想要把补丁作为内嵌文本。
+(a)ttach工作的很好,不带有"set paste"。
+
+配置选项:
+它应该以默认设置的形式工作。
+然而,把"send_charset"设置为"us-ascii::utf-8"也是一个不错的主意。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Pine (TUI)
+
+Pine过去有一些空格删减问题,但是这些现在应该都被修复了。
+
+如果可以,请使用alpine(pine的继承者)
+
+配置选项:
+- 最近的版本需要消除流程文本
+- "no-strip-whitespace-before-send"选项也是需要的。
+
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Sylpheed (GUI)
+
+- 内嵌文本可以很好的工作(或者使用附件)。
+- 允许使用外部的编辑器。
+- 对于目录较多时非常慢。
+- 如果通过non-SSL连接,无法使用TLS SMTP授权。
+- 在组成窗口中有一个很有用的ruler bar。
+- 给地址本中添加地址就不会正确的了解显示名。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Thunderbird (GUI)
+
+默认情况下,thunderbird很容易损坏文本,但是还有一些方法可以强制它变得更好。
+
+- 在用户帐号设置里,组成和寻址,不要选择"Compose messages in HTML format"。
+
+- 编辑你的Thunderbird配置设置来使它不要拆行使用:user_pref("mailnews.wraplength", 0);
+
+- 编辑你的Thunderbird配置设置,使它不要使用"format=flowed"格式:user_pref("mailnews.
+ send_plaintext_flowed", false);
+
+- 你需要使Thunderbird变为预先格式方式:
+ 如果默认情况下你书写的是HTML格式,那不是很难。仅仅从标题栏的下拉框中选择"Preformat"格式。
+ 如果默认情况下你书写的是文本格式,你不得把它改为HTML格式(仅仅作为一次性的)来书写新的消息,
+ 然后强制使它回到文本格式,否则它就会拆行。要实现它,在写信的图标上使用shift键来使它变为HTML
+ 格式,然后标题栏的下拉框中选择"Preformat"格式。
+
+- 允许使用外部的编辑器:
+ 针对Thunderbird打补丁最简单的方法就是使用一个"external editor"扩展,然后使用你最喜欢的
+ $EDITOR来读取或者合并补丁到文本中。要实现它,可以下载并且安装这个扩展,然后添加一个使用它的
+ 按键View->Toolbars->Customize...最后当你书写信息的时候仅仅点击它就可以了。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+TkRat (GUI)
+
+可以使用它。使用"Insert file..."或者外部的编辑器。
+
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Gmail (Web GUI)
+
+不要使用它发送补丁。
+
+Gmail网页客户端自动地把制表符转换为空格。
+
+虽然制表符转换为空格问题可以被外部编辑器解决,同时它还会使用回车换行把每行拆分为78个字符。
+
+另一个问题是Gmail还会把任何不是ASCII的字符的信息改为base64编码。它把东西变的像欧洲人的名字。
+
+ ###
diff --git a/Documentation/zh_CN/filesystems/sysfs.txt b/Documentation/zh_CN/filesystems/sysfs.txt
new file mode 100644
index 000000000..e230eaa33
--- /dev/null
+++ b/Documentation/zh_CN/filesystems/sysfs.txt
@@ -0,0 +1,372 @@
+Chinese translated version of Documentation/filesystems/sysfs.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Patrick Mochel <mochel@osdl.org>
+ Mike Murphy <mamurph@cs.clemson.edu>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/filesystems/sysfs.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+英文版维护者: Patrick Mochel <mochel@osdl.org>
+ Mike Murphy <mamurph@cs.clemson.edu>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+sysfs - 用于导出内核对象(kobject)的文件系统
+
+Patrick Mochel <mochel@osdl.org>
+Mike Murphy <mamurph@cs.clemson.edu>
+
+修订: 16 August 2011
+原始版本: 10 January 2003
+
+
+sysfs 简介:
+~~~~~~~~~~
+
+sysfs 是一个最初基于 ramfs 且位于内存的文件系统。它提供导出内核
+数据结构及其属性,以及它们之间的关联到用户空间的方法。
+
+sysfs 始终与 kobject 的底层结构紧密相关。请阅读
+Documentation/kobject.txt 文档以获得更多关于 kobject 接口的
+信息。
+
+
+使用 sysfs
+~~~~~~~~~~~
+
+只要内核配置中定义了 CONFIG_SYSFS ,sysfs 总是被编译进内核。你可
+通过以下命令挂载它:
+
+ mount -t sysfs sysfs /sys
+
+
+创建目录
+~~~~~~~~
+
+任何 kobject 在系统中注册,就会有一个目录在 sysfs 中被创建。这个
+目录是作为该 kobject 的父对象所在目录的子目录创建的,以准确地传递
+内核的对象层次到用户空间。sysfs 中的顶层目录代表着内核对象层次的
+共同祖先;例如:某些对象属于某个子系统。
+
+Sysfs 在与其目录关联的 sysfs_dirent 对象中内部保存一个指向实现
+目录的 kobject 的指针。以前,这个 kobject 指针被 sysfs 直接用于
+kobject 文件打开和关闭的引用计数。而现在的 sysfs 实现中,kobject
+引用计数只能通过 sysfs_schedule_callback() 函数直接修改。
+
+
+属性
+~~~~
+
+kobject 的属性可在文件系统中以普通文件的形式导出。Sysfs 为属性定义
+了面向文件 I/O 操作的方法,以提供对内核属性的读写。
+
+
+属性应为 ASCII 码文本文件。以一个文件只存储一个属性值为宜。但一个
+文件只包含一个属性值可能影响效率,所以一个包含相同数据类型的属性值
+数组也被广泛地接受。
+
+混合类型、表达多行数据以及一些怪异的数据格式会遭到强烈反对。这样做是
+很丢脸的,而且其代码会在未通知作者的情况下被重写。
+
+
+一个简单的属性结构定义如下:
+
+struct attribute {
+ char * name;
+ struct module *owner;
+ umode_t mode;
+};
+
+
+int sysfs_create_file(struct kobject * kobj, const struct attribute * attr);
+void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr);
+
+
+一个单独的属性结构并不包含读写其属性值的方法。子系统最好为增删特定
+对象类型的属性定义自己的属性结构体和封装函数。
+
+例如:驱动程序模型定义的 device_attribute 结构体如下:
+
+struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+};
+
+int device_create_file(struct device *, const struct device_attribute *);
+void device_remove_file(struct device *, const struct device_attribute *);
+
+为了定义设备属性,同时定义了一下辅助宏:
+
+#define DEVICE_ATTR(_name, _mode, _show, _store) \
+struct device_attribute dev_attr_##_name = __ATTR(_name, _mode, _show, _store)
+
+例如:声明
+
+static DEVICE_ATTR(foo, S_IWUSR | S_IRUGO, show_foo, store_foo);
+
+等同于如下代码:
+
+static struct device_attribute dev_attr_foo = {
+ .attr = {
+ .name = "foo",
+ .mode = S_IWUSR | S_IRUGO,
+ .show = show_foo,
+ .store = store_foo,
+ },
+};
+
+
+子系统特有的回调函数
+~~~~~~~~~~~~~~~~~~~
+
+当一个子系统定义一个新的属性类型时,必须实现一系列的 sysfs 操作,
+以帮助读写调用实现属性所有者的显示和储存方法。
+
+struct sysfs_ops {
+ ssize_t (*show)(struct kobject *, struct attribute *, char *);
+ ssize_t (*store)(struct kobject *, struct attribute *, const char *, size_t);
+};
+
+[子系统应已经定义了一个 struct kobj_type 结构体作为这个类型的
+描述符,并在此保存 sysfs_ops 的指针。更多的信息参见 kobject 的
+文档]
+
+sysfs 会为这个类型调用适当的方法。当一个文件被读写时,这个方法会
+将一般的kobject 和 attribute 结构体指针转换为适当的指针类型后
+调用相关联的函数。
+
+
+示例:
+
+#define to_dev(obj) container_of(obj, struct device, kobj)
+#define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr)
+
+static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct device_attribute *dev_attr = to_dev_attr(attr);
+ struct device *dev = to_dev(kobj);
+ ssize_t ret = -EIO;
+
+ if (dev_attr->show)
+ ret = dev_attr->show(dev, dev_attr, buf);
+ if (ret >= (ssize_t)PAGE_SIZE) {
+ print_symbol("dev_attr_show: %s returned bad count\n",
+ (unsigned long)dev_attr->show);
+ }
+ return ret;
+}
+
+
+
+读写属性数据
+~~~~~~~~~~~~
+
+在声明属性时,必须指定 show() 或 store() 方法,以实现属性的
+读或写。这些方法的类型应该和以下的设备属性定义一样简单。
+
+ssize_t (*show)(struct device *dev, struct device_attribute *attr, char *buf);
+ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+
+也就是说,他们应只以一个处理对象、一个属性和一个缓冲指针作为参数。
+
+sysfs 会分配一个大小为 (PAGE_SIZE) 的缓冲区并传递给这个方法。
+Sysfs 将会为每次读写操作调用一次这个方法。这使得这些方法在执行时
+会出现以下的行为:
+
+- 在读方面(read(2)),show() 方法应该填充整个缓冲区。回想属性
+ 应只导出了一个属性值或是一个同类型属性值的数组,所以这个代价将
+ 不会不太高。
+
+ 这使得用户空间可以局部地读和任意的向前搜索整个文件。如果用户空间
+ 向后搜索到零或使用‘0’偏移执行一个pread(2)操作,show()方法将
+ 再次被调用,以重新填充缓存。
+
+- 在写方面(write(2)),sysfs 希望在第一次写操作时得到整个缓冲区。
+ 之后 Sysfs 传递整个缓冲区给 store() 方法。
+
+ 当要写 sysfs 文件时,用户空间进程应首先读取整个文件,修该想要
+ 改变的值,然后回写整个缓冲区。
+
+ 在读写属性值时,属性方法的执行应操作相同的缓冲区。
+
+注记:
+
+- 写操作导致的 show() 方法重载,会忽略当前文件位置。
+
+- 缓冲区应总是 PAGE_SIZE 大小。对于i386,这个值为4096。
+
+- show() 方法应该返回写入缓冲区的字节数,也就是 snprintf()的
+ 返回值。
+
+- show() 应始终使用 snprintf()。
+
+- store() 应返回缓冲区的已用字节数。如果整个缓存都已填满,只需返回
+ count 参数。
+
+- show() 或 store() 可以返回错误值。当得到一个非法值,必须返回一个
+ 错误值。
+
+- 一个传递给方法的对象将会通过 sysfs 调用对象内嵌的引用计数固定在
+ 内存中。尽管如此,对象代表的物理实体(如设备)可能已不存在。如有必要,
+ 应该实现一个检测机制。
+
+一个简单的(未经实验证实的)设备属性实现如下:
+
+static ssize_t show_name(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%s\n", dev->name);
+}
+
+static ssize_t store_name(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ snprintf(dev->name, sizeof(dev->name), "%.*s",
+ (int)min(count, sizeof(dev->name) - 1), buf);
+ return count;
+}
+
+static DEVICE_ATTR(name, S_IRUGO, show_name, store_name);
+
+
+(注意:真正的实现不允许用户空间设置设备名。)
+
+顶层目录布局
+~~~~~~~~~~~~
+
+sysfs 目录的安排显示了内核数据结构之间的关系。
+
+顶层 sysfs 目录如下:
+
+block/
+bus/
+class/
+dev/
+devices/
+firmware/
+net/
+fs/
+
+devices/ 包含了一个设备树的文件系统表示。他直接映射了内部的内核
+设备树,反映了设备的层次结构。
+
+bus/ 包含了内核中各种总线类型的平面目录布局。每个总线目录包含两个
+子目录:
+
+ devices/
+ drivers/
+
+devices/ 包含了系统中出现的每个设备的符号链接,他们指向 root/ 下的
+设备目录。
+
+drivers/ 包含了每个已为特定总线上的设备而挂载的驱动程序的目录(这里
+假定驱动没有跨越多个总线类型)。
+
+fs/ 包含了一个为文件系统设立的目录。现在每个想要导出属性的文件系统必须
+在 fs/ 下创建自己的层次结构(参见Documentation/filesystems/fuse.txt)。
+
+dev/ 包含两个子目录: char/ 和 block/。在这两个子目录中,有以
+<major>:<minor> 格式命名的符号链接。这些符号链接指向 sysfs 目录
+中相应的设备。/sys/dev 提供一个通过一个 stat(2) 操作结果,查找
+设备 sysfs 接口快捷的方法。
+
+更多有关 driver-model 的特性信息可以在 Documentation/driver-model/
+中找到。
+
+
+TODO: 完成这一节。
+
+
+当前接口
+~~~~~~~~
+
+以下的接口层普遍存在于当前的sysfs中:
+
+- 设备 (include/linux/device.h)
+----------------------------------
+结构体:
+
+struct device_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device *dev, struct device_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count);
+};
+
+声明:
+
+DEVICE_ATTR(_name, _mode, _show, _store);
+
+增/删属性:
+
+int device_create_file(struct device *dev, const struct device_attribute * attr);
+void device_remove_file(struct device *dev, const struct device_attribute * attr);
+
+
+- 总线驱动程序 (include/linux/device.h)
+--------------------------------------
+结构体:
+
+struct bus_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct bus_type *, char * buf);
+ ssize_t (*store)(struct bus_type *, const char * buf, size_t count);
+};
+
+声明:
+
+BUS_ATTR(_name, _mode, _show, _store)
+
+增/删属性:
+
+int bus_create_file(struct bus_type *, struct bus_attribute *);
+void bus_remove_file(struct bus_type *, struct bus_attribute *);
+
+
+- 设备驱动程序 (include/linux/device.h)
+-----------------------------------------
+
+结构体:
+
+struct driver_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct device_driver *, char * buf);
+ ssize_t (*store)(struct device_driver *, const char * buf,
+ size_t count);
+};
+
+声明:
+
+DRIVER_ATTR(_name, _mode, _show, _store)
+
+增/删属性:
+
+int driver_create_file(struct device_driver *, const struct driver_attribute *);
+void driver_remove_file(struct device_driver *, const struct driver_attribute *);
+
+
+文档
+~~~~
+
+sysfs 目录结构以及其中包含的属性定义了一个内核与用户空间之间的 ABI。
+对于任何 ABI,其自身的稳定和适当的文档是非常重要的。所有新的 sysfs
+属性必须在 Documentation/ABI 中有文档。详见 Documentation/ABI/README。
diff --git a/Documentation/zh_CN/gpio.txt b/Documentation/zh_CN/gpio.txt
new file mode 100644
index 000000000..d5b8f0183
--- /dev/null
+++ b/Documentation/zh_CN/gpio.txt
@@ -0,0 +1,658 @@
+Chinese translated version of Documentation/gpio.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Grant Likely <grant.likely@secretlab.ca>
+ Linus Walleij <linus.walleij@linaro.org>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/gpio.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+英文版维护者: Grant Likely <grant.likely@secretlab.ca>
+ Linus Walleij <linus.walleij@linaro.org>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+GPIO 接口
+
+本文档提供了一个在Linux下访问GPIO的公约概述。
+
+这些函数以 gpio_* 作为前缀。其他的函数不允许使用这样的前缀或相关的
+__gpio_* 前缀。
+
+
+什么是GPIO?
+==========
+"通用输入/输出口"(GPIO)是一个灵活的由软件控制的数字信号。他们可
+由多种芯片提供,且对于从事嵌入式和定制硬件的 Linux 开发者来说是
+比较熟悉。每个GPIO 都代表一个连接到特定引脚或球栅阵列(BGA)封装中
+“球珠”的一个位。电路板原理图显示了 GPIO 与外部硬件的连接关系。
+驱动可以编写成通用代码,以使板级启动代码可传递引脚配置数据给驱动。
+
+片上系统 (SOC) 处理器对 GPIO 有很大的依赖。在某些情况下,每个
+非专用引脚都可配置为 GPIO,且大多数芯片都最少有一些 GPIO。
+可编程逻辑器件(类似 FPGA) 可以方便地提供 GPIO。像电源管理和
+音频编解码器这样的多功能芯片经常留有一些这样的引脚来帮助那些引脚
+匮乏的 SOC。同时还有通过 I2C 或 SPI 串行总线连接的“GPIO扩展器”
+芯片。大多数 PC 的南桥有一些拥有 GPIO 能力的引脚 (只有BIOS
+固件才知道如何使用他们)。
+
+GPIO 的实际功能因系统而异。通常用法有:
+
+ - 输出值可写 (高电平=1,低电平=0)。一些芯片也有如何驱动这些值的选项,
+ 例如只允许输出一个值、支持“线与”及其他取值类似的模式(值得注意的是
+ “开漏”信号)
+
+ - 输入值可读(1、0)。一些芯片支持引脚在配置为“输出”时回读,这对于类似
+ “线与”的情况(以支持双向信号)是非常有用的。GPIO 控制器可能有输入
+ 去毛刺/消抖逻辑,这有时需要软件控制。
+
+ - 输入通常可作为 IRQ 信号,一般是沿触发,但有时是电平触发。这样的 IRQ
+ 可能配置为系统唤醒事件,以将系统从低功耗状态下唤醒。
+
+ - 通常一个 GPIO 根据不同产品电路板的需求,可以配置为输入或输出,也有仅
+ 支持单向的。
+
+ - 大部分 GPIO 可以在持有自旋锁时访问,但是通常由串行总线扩展的 GPIO
+ 不允许持有自旋锁。但某些系统也支持这种类型。
+
+对于给定的电路板,每个 GPIO 都用于某个特定的目的,如监控 MMC/SD 卡的
+插入/移除、检测卡的写保护状态、驱动 LED、配置收发器、模拟串行总线、
+复位硬件看门狗、感知开关状态等等。
+
+
+GPIO 公约
+=========
+注意,这个叫做“公约”,因为这不是强制性的,不遵循这个公约是无伤大雅的,
+因为此时可移植性并不重要。GPIO 常用于板级特定的电路逻辑,甚至可能
+随着电路板的版本而改变,且不可能在不同走线的电路板上使用。仅有在少数
+功能上才具有可移植性,其他功能是平台特定。这也是由于“胶合”的逻辑造成的。
+
+此外,这不需要任何的执行框架,只是一个接口。某个平台可能通过一个简单地
+访问芯片寄存器的内联函数来实现它,其他平台可能通过委托一系列不同的GPIO
+控制器的抽象函数来实现它。(有一些可选的代码能支持这种策略的实现,本文档
+后面会介绍,但作为 GPIO 接口的客户端驱动程序必须与它的实现无关。)
+
+也就是说,如果在他们的平台上支持这个公约,驱动应尽可能的使用它。同时,平台
+必须在 Kconfig 中选择 ARCH_REQUIRE_GPIOLIB 或者 ARCH_WANT_OPTIONAL_GPIOLIB
+选项。那些调用标准 GPIO 函数的驱动应该在 Kconfig 入口中声明依赖GENERIC_GPIO。
+当驱动包含文件:
+
+ #include <linux/gpio.h>
+
+则 GPIO 函数是可用,无论是“真实代码”还是经优化过的语句。如果你遵守
+这个公约,当你的代码完成后,对其他的开发者来说会更容易看懂和维护。
+
+注意,这些操作包含所用平台的 I/O 屏障代码,驱动无须显式地调用他们。
+
+
+标识 GPIO
+---------
+GPIO 是通过无符号整型来标识的,范围是 0 到 MAX_INT。保留“负”数
+用于其他目的,例如标识信号“在这个板子上不可用”或指示错误。未接触底层
+硬件的代码会忽略这些整数。
+
+平台会定义这些整数的用法,且通常使用 #define 来定义 GPIO,这样
+板级特定的启动代码可以直接关联相应的原理图。相对来说,驱动应该仅使用
+启动代码传递过来的 GPIO 编号,使用 platform_data 保存板级特定
+引脚配置数据 (同时还有其他须要的板级特定数据),避免可能出现的问题。
+
+例如一个平台使用编号 32-159 来标识 GPIO,而在另一个平台使用编号0-63
+标识一组 GPIO 控制器,64-79标识另一类 GPIO 控制器,且在一个含有
+FPGA 的特定板子上使用 80-95。编号不一定要连续,那些平台中,也可以
+使用编号2000-2063来标识一个 I2C 接口的 GPIO 扩展器中的 GPIO。
+
+如果你要初始化一个带有无效 GPIO 编号的结构体,可以使用一些负编码
+(如"-EINVAL"),那将使其永远不会是有效。来测试这样一个结构体中的编号
+是否关联一个 GPIO,你可使用以下断言:
+
+ int gpio_is_valid(int number);
+
+如果编号不存在,则请求和释放 GPIO 的函数将拒绝执行相关操作(见下文)。
+其他编号也可能被拒绝,比如一个编号可能存在,但暂时在给定的电路上不可用。
+
+一个平台是否支持多个 GPIO 控制器为平台特定的实现问题,就像是否可以
+在 GPIO 编号空间中有“空洞”和是否可以在运行时添加新的控制器一样。
+这些问题会影响其他事情,包括相邻的 GPIO 编号是否存在等。
+
+使用 GPIO
+---------
+对于一个 GPIO,系统应该做的第一件事情就是通过 gpio_request()
+函数分配它,见下文。
+
+接下来是设置I/O方向,这通常是在板级启动代码中为所使用的 GPIO 设置
+platform_device 时完成。
+
+ /* 设置为输入或输出, 返回 0 或负的错误代码 */
+ int gpio_direction_input(unsigned gpio);
+ int gpio_direction_output(unsigned gpio, int value);
+
+返回值为零代表成功,否则返回一个负的错误代码。这个返回值需要检查,因为
+get/set(获取/设置)函数调用没法返回错误,且有可能是配置错误。通常,
+你应该在进程上下文中调用这些函数。然而,对于自旋锁安全的 GPIO,在板子
+启动的早期、进程启动前使用他们也是可以的。
+
+对于作为输出的 GPIO,为其提供初始输出值,对于避免在系统启动期间出现
+信号毛刺是很有帮助的。
+
+为了与传统的 GPIO 接口兼容, 在设置一个 GPIO 方向时,如果它还未被申请,
+则隐含了申请那个 GPIO 的操作(见下文)。这种兼容性正在从可选的 gpiolib
+框架中移除。
+
+如果这个 GPIO 编码不存在,或者特定的 GPIO 不能用于那种模式,则方向
+设置可能失败。依赖启动固件来正确地设置方向通常是一个坏主意,因为它可能
+除了启动Linux,并没有做更多的验证工作。(同理, 板子的启动代码可能需要
+将这个复用的引脚设置为 GPIO,并正确地配置上拉/下拉电阻。)
+
+
+访问自旋锁安全的 GPIO
+-------------------
+大多数 GPIO 控制器可以通过内存读/写指令来访问。这些指令不会休眠,可以
+安全地在硬(非线程)中断例程和类似的上下文中完成。
+
+对于那些用 gpio_cansleep()测试总是返回失败的 GPIO(见下文),使用
+以下的函数访问:
+
+ /* GPIO 输入:返回零或非零 */
+ int gpio_get_value(unsigned gpio);
+
+ /* GPIO 输出 */
+ void gpio_set_value(unsigned gpio, int value);
+
+GPIO值是布尔值,零表示低电平,非零表示高电平。当读取一个输出引脚的值时,
+返回值应该是引脚上的值。这个值不总是和输出值相符,因为存在开漏输出信号和
+输出延迟问题。
+
+以上的 get/set 函数无错误返回值,因为之前 gpio_direction_*()应已检查过
+其是否为“无效GPIO”。此外,还需要注意的是并不是所有平台都可以从输出引脚
+中读取数据,对于不能读取的引脚应总返回零。另外,对那些在原子上下文中无法
+安全访问的 GPIO (译者注:因为访问可能导致休眠)使用这些函数是不合适的
+(见下文)。
+
+在 GPIO 编号(还有输出、值)为常数的情况下,鼓励通过平台特定的实现来优化
+这两个函数来访问 GPIO 值。这种情况(读写一个硬件寄存器)下只需要几条指令
+是很正常的,且无须自旋锁。这种优化函数比起那些在子程序上花费许多指令的
+函数可以使得模拟接口(译者注:例如 GPIO 模拟 I2C、1-wire 或 SPI)的
+应用(在空间和时间上都)更具效率。
+
+
+访问可能休眠的 GPIO
+-----------------
+某些 GPIO 控制器必须通过基于总线(如 I2C 或 SPI)的消息访问。读或写这些
+GPIO 值的命令需要等待其信息排到队首才发送命令,再获得其反馈。期间需要
+休眠,这不能在 IRQ 例程(中断上下文)中执行。
+
+支持此类 GPIO 的平台通过以下函数返回非零值来区分出这种 GPIO。(此函数需要
+一个之前通过 gpio_request 分配到的有效 GPIO 编号):
+
+ int gpio_cansleep(unsigned gpio);
+
+为了访问这种 GPIO,内核定义了一套不同的函数:
+
+ /* GPIO 输入:返回零或非零 ,可能会休眠 */
+ int gpio_get_value_cansleep(unsigned gpio);
+
+ /* GPIO 输出,可能会休眠 */
+ void gpio_set_value_cansleep(unsigned gpio, int value);
+
+
+访问这样的 GPIO 需要一个允许休眠的上下文,例如线程 IRQ 处理例程,并用以上的
+访问函数替换那些没有 cansleep()后缀的自旋锁安全访问函数。
+
+除了这些访问函数可能休眠,且它们操作的 GPIO 不能在硬件 IRQ 处理例程中访问的
+事实,这些处理例程实际上和自旋锁安全的函数是一样的。
+
+** 除此之外 ** 调用设置和配置此类 GPIO 的函数也必须在允许休眠的上下文中,
+因为它们可能也需要访问 GPIO 控制器芯片: (这些设置函数通常在板级启动代码或者
+驱动探测/断开代码中,所以这是一个容易满足的约束条件。)
+
+ gpio_direction_input()
+ gpio_direction_output()
+ gpio_request()
+
+## gpio_request_one()
+## gpio_request_array()
+## gpio_free_array()
+
+ gpio_free()
+ gpio_set_debounce()
+
+
+
+声明和释放 GPIO
+----------------------------
+为了有助于捕获系统配置错误,定义了两个函数。
+
+ /* 申请 GPIO, 返回 0 或负的错误代码.
+ * 非空标签可能有助于诊断.
+ */
+ int gpio_request(unsigned gpio, const char *label);
+
+ /* 释放之前声明的 GPIO */
+ void gpio_free(unsigned gpio);
+
+将无效的 GPIO 编码传递给 gpio_request()会导致失败,申请一个已使用这个
+函数声明过的 GPIO 也会失败。gpio_request()的返回值必须检查。你应该在
+进程上下文中调用这些函数。然而,对于自旋锁安全的 GPIO,在板子启动的早期、
+进入进程之前是可以申请的。
+
+这个函数完成两个基本的目标。一是标识那些实际上已作为 GPIO 使用的信号线,
+这样便于更好地诊断;系统可能需要服务几百个可用的 GPIO,但是对于任何一个
+给定的电路板通常只有一些被使用。另一个目的是捕获冲突,查明错误:如两个或
+更多驱动错误地认为他们已经独占了某个信号线,或是错误地认为移除一个管理着
+某个已激活信号的驱动是安全的。也就是说,申请 GPIO 的作用类似一种锁机制。
+
+某些平台可能也使用 GPIO 作为电源管理激活信号(例如通过关闭未使用芯片区和
+简单地关闭未使用时钟)。
+
+对于 GPIO 使用 pinctrl 子系统已知的引脚,子系统应该被告知其使用情况;
+一个 gpiolib 驱动的 .request()操作应调用 pinctrl_request_gpio(),
+而 gpiolib 驱动的 .free()操作应调用 pinctrl_free_gpio()。pinctrl
+子系统允许 pinctrl_request_gpio()在某个引脚或引脚组以复用形式“属于”
+一个设备时都成功返回。
+
+任何须将 GPIO 信号导向适当引脚的引脚复用硬件的编程应该发生在 GPIO
+驱动的 .direction_input()或 .direction_output()函数中,以及
+任何输出 GPIO 值的设置之后。这样可使从引脚特殊功能到 GPIO 的转换
+不会在引脚产生毛刺波形。有时当用一个 GPIO 实现其信号驱动一个非 GPIO
+硬件模块的解决方案时,就需要这种机制。
+
+某些平台允许部分或所有 GPIO 信号使用不同的引脚。类似的,GPIO 或引脚的
+其他方面也需要配置,如上拉/下拉。平台软件应该在对这些 GPIO 调用
+gpio_request()前将这类细节配置好,例如使用 pinctrl 子系统的映射表,
+使得 GPIO 的用户无须关注这些细节。
+
+还有一个值得注意的是在释放 GPIO 前,你必须停止使用它。
+
+
+注意:申请一个 GPIO 并没有以任何方式配置它,只不过标识那个 GPIO 处于使用
+状态。必须有另外的代码来处理引脚配置(如控制 GPIO 使用的引脚、上拉/下拉)。
+考虑到大多数情况下声明 GPIO 之后就会立即配置它们,所以定义了以下三个辅助函数:
+
+ /* 申请一个 GPIO 信号, 同时通过特定的'flags'初始化配置,
+ * 其他和 gpio_request()的参数和返回值相同
+ *
+ */
+ int gpio_request_one(unsigned gpio, unsigned long flags, const char *label);
+
+ /* 在单个函数中申请多个 GPIO
+ */
+ int gpio_request_array(struct gpio *array, size_t num);
+
+ /* 在单个函数中释放多个 GPIO
+ */
+ void gpio_free_array(struct gpio *array, size_t num);
+
+这里 'flags' 当前定义可指定以下属性:
+
+ * GPIOF_DIR_IN - 配置方向为输入
+ * GPIOF_DIR_OUT - 配置方向为输出
+
+ * GPIOF_INIT_LOW - 在作为输出时,初始值为低电平
+ * GPIOF_INIT_HIGH - 在作为输出时,初始值为高电平
+ * GPIOF_OPEN_DRAIN - gpio引脚为开漏信号
+ * GPIOF_OPEN_SOURCE - gpio引脚为源极开路信号
+
+ * GPIOF_EXPORT_DIR_FIXED - 将 gpio 导出到 sysfs,并保持方向
+ * GPIOF_EXPORT_DIR_CHANGEABLE - 同样是导出, 但允许改变方向
+
+因为 GPIOF_INIT_* 仅有在配置为输出的时候才存在,所以有效的组合为:
+
+ * GPIOF_IN - 配置为输入
+ * GPIOF_OUT_INIT_LOW - 配置为输出,并初始化为低电平
+ * GPIOF_OUT_INIT_HIGH - 配置为输出,并初始化为高电平
+
+当设置 flag 为 GPIOF_OPEN_DRAIN 时,则假设引脚是开漏信号。这样的引脚
+将不会在输出模式下置1。这样的引脚需要连接上拉电阻。通过使能这个标志,gpio库
+将会在被要求输出模式下置1时将引脚变为输入状态来使引脚置高。引脚在输出模式下
+通过置0使其输出低电平。
+
+当设置 flag 为 GPIOF_OPEN_SOURCE 时,则假设引脚为源极开路信号。这样的引脚
+将不会在输出模式下置0。这样的引脚需要连接下拉电阻。通过使能这个标志,gpio库
+将会在被要求输出模式下置0时将引脚变为输入状态来使引脚置低。引脚在输出模式下
+通过置1使其输出高电平。
+
+将来这些标志可能扩展到支持更多的属性。
+
+更进一步,为了更简单地声明/释放多个 GPIO,'struct gpio'被引进来封装所有
+这三个领域:
+
+ struct gpio {
+ unsigned gpio;
+ unsigned long flags;
+ const char *label;
+ };
+
+一个典型的用例:
+
+ static struct gpio leds_gpios[] = {
+ { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* 默认开启 */
+ { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* 默认关闭 */
+ { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* 默认关闭 */
+ { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* 默认关闭 */
+ { ... },
+ };
+
+ err = gpio_request_one(31, GPIOF_IN, "Reset Button");
+ if (err)
+ ...
+
+ err = gpio_request_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+ if (err)
+ ...
+
+ gpio_free_array(leds_gpios, ARRAY_SIZE(leds_gpios));
+
+
+GPIO 映射到 IRQ
+--------------------
+GPIO 编号是无符号整数;IRQ 编号也是。这些构成了两个逻辑上不同的命名空间
+(GPIO 0 不一定使用 IRQ 0)。你可以通过以下函数在它们之间实现映射:
+
+ /* 映射 GPIO 编号到 IRQ 编号 */
+ int gpio_to_irq(unsigned gpio);
+
+ /* 映射 IRQ 编号到 GPIO 编号 (尽量避免使用) */
+ int irq_to_gpio(unsigned irq);
+
+它们的返回值为对应命名空间的相关编号,或是负的错误代码(如果无法映射)。
+(例如,某些 GPIO 无法做为 IRQ 使用。)以下的编号错误是未经检测的:使用一个
+未通过 gpio_direction_input()配置为输入的 GPIO 编号,或者使用一个
+并非来源于gpio_to_irq()的 IRQ 编号。
+
+这两个映射函数可能会在信号编号的加减计算过程上花些时间。它们不可休眠。
+
+gpio_to_irq()返回的非错误值可以传递给 request_irq()或者 free_irq()。
+它们通常通过板级特定的初始化代码存放到平台设备的 IRQ 资源中。注意:IRQ
+触发选项是 IRQ 接口的一部分,如 IRQF_TRIGGER_FALLING,系统唤醒能力
+也是如此。
+
+irq_to_gpio()返回的非错误值大多数通常可以被 gpio_get_value()所使用,
+比如在 IRQ 是沿触发时初始化或更新驱动状态。注意某些平台不支持反映射,所以
+你应该尽量避免使用它。
+
+
+模拟开漏信号
+----------------------------
+有时在只有低电平信号作为实际驱动结果(译者注:多个输出连接于一点,逻辑电平
+结果为所有输出的逻辑与)的时候,共享的信号线需要使用“开漏”信号。(该术语
+适用于 CMOS 管;而 TTL 用“集电极开路”。)一个上拉电阻使信号为高电平。这
+有时被称为“线与”。实际上,从负逻辑(低电平为真)的角度来看,这是一个“线或”。
+
+一个开漏信号的常见例子是共享的低电平使能 IRQ 信号线。此外,有时双向数据总线
+信号也使用漏极开路信号。
+
+某些 GPIO 控制器直接支持开漏输出,还有许多不支持。当你需要开漏信号,但
+硬件又不直接支持的时候,一个常用的方法是用任何即可作输入也可作输出的 GPIO
+引脚来模拟:
+
+ LOW: gpio_direction_output(gpio, 0) ... 这代码驱动信号并覆盖
+ 上拉配置。
+
+ HIGH: gpio_direction_input(gpio) ... 这代码关闭输出,所以上拉电阻
+ (或其他的一些器件)控制了信号。
+
+如果你将信号线“驱动”为高电平,但是 gpio_get_value(gpio)报告了一个
+低电平(在适当的上升时间后),你就可以知道是其他的一些组件将共享信号线拉低了。
+这不一定是错误的。一个常见的例子就是 I2C 时钟的延长:一个需要较慢时钟的
+从设备延迟 SCK 的上升沿,而 I2C 主设备相应地调整其信号传输速率。
+
+
+这些公约忽略了什么?
+================
+这些公约忽略的最大一件事就是引脚复用,因为这属于高度芯片特定的属性且
+没有可移植性。某个平台可能不需要明确的复用信息;有的对于任意给定的引脚
+可能只有两个功能选项;有的可能每个引脚有八个功能选项;有的可能可以将
+几个引脚中的任何一个作为给定的 GPIO。(是的,这些例子都来自于当前运行
+Linux 的系统。)
+
+在某些系统中,与引脚复用相关的是配置和使能集成的上、下拉模式。并不是所有
+平台都支持这种模式,或者不会以相同的方式来支持这种模式;且任何给定的电路板
+可能使用外置的上拉(或下拉)电阻,这时芯片上的就不应该使用。(当一个电路需要
+5kOhm 的拉动电阻,芯片上的 100 kOhm 电阻就不能做到。)同样的,驱动能力
+(2 mA vs 20 mA)和电压(1.8V vs 3.3V)是平台特定问题,就像模型一样在
+可配置引脚和 GPIO 之间(没)有一一对应的关系。
+
+还有其他一些系统特定的机制没有在这里指出,例如上述的输入去毛刺和线与输出
+选项。硬件可能支持批量读或写 GPIO,但是那一般是配置相关的:对于处于同一
+块区(bank)的GPIO。(GPIO 通常以 16 或 32 个组成一个区块,一个给定的
+片上系统一般有几个这样的区块。)某些系统可以通过输出 GPIO 触发 IRQ,
+或者从并非以 GPIO 管理的引脚取值。这些机制的相关代码没有必要具有可移植性。
+
+当前,动态定义 GPIO 并不是标准的,例如作为配置一个带有某些 GPIO 扩展器的
+附加电路板的副作用。
+
+GPIO 实现者的框架 (可选)
+=====================
+前面提到了,有一个可选的实现框架,让平台使用相同的编程接口,更加简单地支持
+不同种类的 GPIO 控制器。这个框架称为"gpiolib"。
+
+作为一个辅助调试功能,如果 debugfs 可用,就会有一个 /sys/kernel/debug/gpio
+文件。通过这个框架,它可以列出所有注册的控制器,以及当前正在使用中的 GPIO
+的状态。
+
+
+控制器驱动: gpio_chip
+-------------------
+在框架中每个 GPIO 控制器都包装为一个 "struct gpio_chip",他包含了
+该类型的每个控制器的常用信息:
+
+ - 设置 GPIO 方向的方法
+ - 用于访问 GPIO 值的方法
+ - 告知调用其方法是否可能休眠的标志
+ - 可选的 debugfs 信息导出方法 (显示类似上拉配置一样的额外状态)
+ - 诊断标签
+
+也包含了来自 device.platform_data 的每个实例的数据:它第一个 GPIO 的
+编号和它可用的 GPIO 的数量。
+
+实现 gpio_chip 的代码应支持多控制器实例,这可能使用驱动模型。那些代码要
+配置每个 gpio_chip,并发起gpiochip_add()。卸载一个 GPIO 控制器很少见,
+但在必要的时候可以使用 gpiochip_remove()。
+
+大部分 gpio_chip 是一个实例特定结构体的一部分,而并不将 GPIO 接口单独
+暴露出来,比如编址、电源管理等。类似编解码器这样的芯片会有复杂的非 GPIO
+状态。
+
+任何一个 debugfs 信息导出方法通常应该忽略还未申请作为 GPIO 的信号线。
+他们可以使用 gpiochip_is_requested()测试,当这个 GPIO 已经申请过了
+就返回相关的标签,否则返回 NULL。
+
+
+平台支持
+-------
+为了支持这个框架,一个平台的 Kconfig 文件将会 "select"(选择)
+ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB,并让它的
+<asm/gpio.h> 包含 <asm-generic/gpio.h>,同时定义三个方法:
+gpio_get_value()、gpio_set_value()和 gpio_cansleep()。
+
+它也应提供一个 ARCH_NR_GPIOS 的定义值,这样可以更好地反映该平台 GPIO
+的实际数量,节省静态表的空间。(这个定义值应该包含片上系统内建 GPIO 和
+GPIO 扩展器中的数据。)
+
+ARCH_REQUIRE_GPIOLIB 意味着 gpiolib 核心在这个构架中将总是编译进内核。
+
+ARCH_WANT_OPTIONAL_GPIOLIB 意味着 gpiolib 核心默认关闭,且用户可以
+使能它,并将其编译进内核(可选)。
+
+如果这些选项都没被选择,该平台就不通过 GPIO-lib 支持 GPIO,且代码不可以
+被用户使能。
+
+以下这些方法的实现可以直接使用框架代码,并总是通过 gpio_chip 调度:
+
+ #define gpio_get_value __gpio_get_value
+ #define gpio_set_value __gpio_set_value
+ #define gpio_cansleep __gpio_cansleep
+
+这些定义可以用更理想的实现方法替代,那就是使用经过逻辑优化的内联函数来访问
+基于特定片上系统的 GPIO。例如,若引用的 GPIO (寄存器位偏移)是常量“12”,
+读取或设置它可能只需少则两或三个指令,且不会休眠。当这样的优化无法实现时,
+那些函数必须使用框架提供的代码,那就至少要几十条指令才可以实现。对于用 GPIO
+模拟的 I/O 接口, 如此精简指令是很有意义的。
+
+对于片上系统,平台特定代码为片上 GPIO 每个区(bank)定义并注册 gpio_chip
+实例。那些 GPIO 应该根据芯片厂商的文档进行编码/标签,并直接和电路板原理图
+对应。他们应该开始于零并终止于平台特定的限制。这些 GPIO(代码)通常从
+arch_initcall()或者更早的地方集成进平台初始化代码,使这些 GPIO 总是可用,
+且他们通常可以作为 IRQ 使用。
+
+板级支持
+-------
+对于外部 GPIO 控制器(例如 I2C 或 SPI 扩展器、专用芯片、多功能器件、FPGA
+或 CPLD),大多数常用板级特定代码都可以注册控制器设备,并保证他们的驱动知道
+gpiochip_add()所使用的 GPIO 编号。他们的起始编号通常跟在平台特定的 GPIO
+编号之后。
+
+例如板级启动代码应该创建结构体指明芯片公开的 GPIO 范围,并使用 platform_data
+将其传递给每个 GPIO 扩展器芯片。然后芯片驱动中的 probe()例程可以将这个
+数据传递给 gpiochip_add()。
+
+初始化顺序很重要。例如,如果一个设备依赖基于 I2C 的(扩展)GPIO,那么它的
+probe()例程就应该在那个 GPIO 有效以后才可以被调用。这意味着设备应该在
+GPIO 可以工作之后才可被注册。解决这类依赖的的一种方法是让这种 gpio_chip
+控制器向板级特定代码提供 setup()和 teardown()回调函数。一旦所有必须的
+资源可用之后,这些板级特定的回调函数将会注册设备,并可以在这些 GPIO 控制器
+设备变成无效时移除它们。
+
+
+用户空间的 Sysfs 接口(可选)
+========================
+使用“gpiolib”实现框架的平台可以选择配置一个 GPIO 的 sysfs 用户接口。
+这不同于 debugfs 接口,因为它提供的是对 GPIO方向和值的控制,而不只显示
+一个GPIO 的状态摘要。此外,它可以出现在没有调试支持的产品级系统中。
+
+例如,通过适当的系统硬件文档,用户空间可以知道 GIOP #23 控制 Flash
+存储器的写保护(用于保护其中 Bootloader 分区)。产品的系统升级可能需要
+临时解除这个保护:首先导入一个 GPIO,改变其输出状态,然后在重新使能写保护
+前升级代码。通常情况下,GPIO #23 是不会被触及的,并且内核也不需要知道他。
+
+根据适当的硬件文档,某些系统的用户空间 GPIO 可以用于确定系统配置数据,
+这些数据是标准内核不知道的。在某些任务中,简单的用户空间 GPIO 驱动可能是
+系统真正需要的。
+
+注意:标准内核驱动中已经存在通用的“LED 和按键”GPIO 任务,分别是:
+"leds-gpio" 和 "gpio_keys"。请使用这些来替代直接访问 GPIO,因为集成在
+内核框架中的这类驱动比你在用户空间的代码更好。
+
+
+Sysfs 中的路径
+--------------
+在/sys/class/gpio 中有 3 类入口:
+
+ - 用于在用户空间控制 GPIO 的控制接口;
+
+ - GPIOs 本身;以及
+
+ - GPIO 控制器 ("gpio_chip" 实例)。
+
+除了这些标准的文件,还包含“device”符号链接。
+
+控制接口是只写的:
+
+ /sys/class/gpio/
+
+ "export" ... 用户空间可以通过写其编号到这个文件,要求内核导出
+ 一个 GPIO 的控制到用户空间。
+
+ 例如: 如果内核代码没有申请 GPIO #19,"echo 19 > export"
+ 将会为 GPIO #19 创建一个 "gpio19" 节点。
+
+ "unexport" ... 导出到用户空间的逆操作。
+
+ 例如: "echo 19 > unexport" 将会移除使用"export"文件导出的
+ "gpio19" 节点。
+
+GPIO 信号的路径类似 /sys/class/gpio/gpio42/ (对于 GPIO #42 来说),
+并有如下的读/写属性:
+
+ /sys/class/gpio/gpioN/
+
+ "direction" ... 读取得到 "in" 或 "out"。这个值通常运行写入。
+ 写入"out" 时,其引脚的默认输出为低电平。为了确保无故障运行,
+ "low" 或 "high" 的电平值应该写入 GPIO 的配置,作为初始输出值。
+
+ 注意:如果内核不支持改变 GPIO 的方向,或者在导出时内核代码没有
+ 明确允许用户空间可以重新配置 GPIO 方向,那么这个属性将不存在。
+
+ "value" ... 读取得到 0 (低电平) 或 1 (高电平)。如果 GPIO 配置为
+ 输出,这个值允许写操作。任何非零值都以高电平看待。
+
+ 如果引脚可以配置为中断信号,且如果已经配置了产生中断的模式
+ (见"edge"的描述),你可以对这个文件使用轮询操作(poll(2)),
+ 且轮询操作会在任何中断触发时返回。如果你使用轮询操作(poll(2)),
+ 请在 events 中设置 POLLPRI 和 POLLERR。如果你使用轮询操作
+ (select(2)),请在 exceptfds 设置你期望的文件描述符。在
+ 轮询操作(poll(2))返回之后,既可以通过 lseek(2)操作读取
+ sysfs 文件的开始部分,也可以关闭这个文件并重新打开它来读取数据。
+
+ "edge" ... 读取得到“none”、“rising”、“falling”或者“both”。
+ 将这些字符串写入这个文件可以选择沿触发模式,会使得轮询操作
+ (select(2))在"value"文件中返回。
+
+ 这个文件仅有在这个引脚可以配置为可产生中断输入引脚时,才存在。
+
+ "active_low" ... 读取得到 0 (假) 或 1 (真)。写入任何非零值可以
+ 翻转这个属性的(读写)值。已存在或之后通过"edge"属性设置了"rising"
+ 和 "falling" 沿触发模式的轮询操作(poll(2))将会遵循这个设置。
+
+GPIO 控制器的路径类似 /sys/class/gpio/gpiochip42/ (对于从#42 GPIO
+开始实现控制的控制器),并有着以下只读属性:
+
+ /sys/class/gpio/gpiochipN/
+
+ "base" ... 与以上的 N 相同,代表此芯片管理的第一个 GPIO 的编号
+
+ "label" ... 用于诊断 (并不总是只有唯一值)
+
+ "ngpio" ... 此控制器所管理的 GPIO 数量(而 GPIO 编号从 N 到
+ N + ngpio - 1)
+
+大多数情况下,电路板的文档应当标明每个 GPIO 的使用目的。但是那些编号并不总是
+固定的,例如在扩展卡上的 GPIO会根据所使用的主板或所在堆叠架构中其他的板子而
+有所不同。在这种情况下,你可能需要使用 gpiochip 节点(尽可能地结合电路图)来
+确定给定信号所用的 GPIO 编号。
+
+
+从内核代码中导出
+-------------
+内核代码可以明确地管理那些已通过 gpio_request()申请的 GPIO 的导出:
+
+ /* 导出 GPIO 到用户空间 */
+ int gpio_export(unsigned gpio, bool direction_may_change);
+
+ /* gpio_export()的逆操作 */
+ void gpio_unexport();
+
+ /* 创建一个 sysfs 连接到已导出的 GPIO 节点 */
+ int gpio_export_link(struct device *dev, const char *name,
+ unsigned gpio)
+
+ /* 改变 sysfs 中的一个 GPIO 节点的极性 */
+ int gpio_sysfs_set_active_low(unsigned gpio, int value);
+
+在一个内核驱动申请一个 GPIO 之后,它可以通过 gpio_export()使其在 sysfs
+接口中可见。该驱动可以控制信号方向是否可修改。这有助于防止用户空间代码无意间
+破坏重要的系统状态。
+
+这个明确的导出有助于(通过使某些实验更容易来)调试,也可以提供一个始终存在的接口,
+与文档配合作为板级支持包的一部分。
+
+在 GPIO 被导出之后,gpio_export_link()允许在 sysfs 文件系统的任何地方
+创建一个到这个 GPIO sysfs 节点的符号链接。这样驱动就可以通过一个描述性的
+名字,在 sysfs 中他们所拥有的设备下提供一个(到这个 GPIO sysfs 节点的)接口。
+
+驱动可以使用 gpio_sysfs_set_active_low() 来在用户空间隐藏电路板之间
+GPIO 线的极性差异。这个仅对 sysfs 接口起作用。极性的改变可以在 gpio_export()
+前后进行,且之前使能的轮询操作(poll(2))支持(上升或下降沿)将会被重新配置来遵循
+这个设置。
diff --git a/Documentation/zh_CN/io_ordering.txt b/Documentation/zh_CN/io_ordering.txt
new file mode 100644
index 000000000..e592daf4e
--- /dev/null
+++ b/Documentation/zh_CN/io_ordering.txt
@@ -0,0 +1,67 @@
+Chinese translated version of Documentation/io_orderings.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Lin Yongting <linyongting@gmail.com>
+---------------------------------------------------------------------
+Documentation/io_ordering.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 林永听 Lin Yongting <linyongting@gmail.com>
+中文版翻译者: 林永听 Lin Yongting <linyongting@gmail.com>
+中文版校译者: 林永听 Lin Yongting <linyongting@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+
+在某些平台上,所谓的内存映射I/O是弱顺序。在这些平台上,驱动开发者有责任
+保证I/O内存映射地址的写操作按程序图意的顺序达到设备。通常读取一个“安全”
+设备寄存器或桥寄存器,触发IO芯片清刷未处理的写操作到达设备后才处理读操作,
+而达到保证目的。驱动程序通常在spinlock保护的临界区退出之前使用这种技术。
+这也可以保证后面的写操作只在前面的写操作之后到达设备(这非常类似于内存
+屏障操作,mb(),不过仅适用于I/O)。
+
+假设一个设备驱动程的具体例子:
+
+ ...
+CPU A: spin_lock_irqsave(&dev_lock, flags)
+CPU A: val = readl(my_status);
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: spin_unlock_irqrestore(&dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&dev_lock, flags)
+CPU B: val = readl(my_status);
+CPU B: ...
+CPU B: writel(newval2, ring_ptr);
+CPU B: spin_unlock_irqrestore(&dev_lock, flags)
+ ...
+
+上述例子中,设备可能会先接收到newval2的值,然后接收到newval的值,问题就
+发生了。不过很容易通过下面方法来修复:
+
+ ...
+CPU A: spin_lock_irqsave(&dev_lock, flags)
+CPU A: val = readl(my_status);
+CPU A: ...
+CPU A: writel(newval, ring_ptr);
+CPU A: (void)readl(safe_register); /* 配置寄存器?*/
+CPU A: spin_unlock_irqrestore(&dev_lock, flags)
+ ...
+CPU B: spin_lock_irqsave(&dev_lock, flags)
+CPU B: val = readl(my_status);
+CPU B: ...
+CPU B: writel(newval2, ring_ptr);
+CPU B: (void)readl(safe_register); /* 配置寄存器?*/
+CPU B: spin_unlock_irqrestore(&dev_lock, flags)
+
+在解决方案中,读取safe_register寄存器,触发IO芯片清刷未处理的写操作,
+再处理后面的读操作,防止引发数据不一致问题。
diff --git a/Documentation/zh_CN/magic-number.txt b/Documentation/zh_CN/magic-number.txt
new file mode 100644
index 000000000..dfb72a5c6
--- /dev/null
+++ b/Documentation/zh_CN/magic-number.txt
@@ -0,0 +1,155 @@
+Chinese translated version of Documentation/magic-number.txt
+
+If you have any comment or update to the content, please post to LKML directly.
+However, if you have problem communicating in English you can also ask the
+Chinese maintainer for help. Contact the Chinese maintainer, if this
+translation is outdated or there is problem with translation.
+
+Chinese maintainer: Jia Wei Wei <harryxiyou@gmail.com>
+---------------------------------------------------------------------
+Documentation/magic-number.txt的中文翻译
+
+如果想评论或更新本文的内容,请直接发信到LKML。如果你使用英文交流有困难的话,也可
+以向中文版维护者求助。如果本翻译更新不及时或者翻译存在问题,请联系中文版维护者。
+
+中文版维护者: 贾威威 Jia Wei Wei <harryxiyou@gmail.com>
+中文版翻译者: 贾威威 Jia Wei Wei <harryxiyou@gmail.com>
+中文版校译者: 贾威威 Jia Wei Wei <harryxiyou@gmail.com>
+
+以下为正文
+---------------------------------------------------------------------
+这个文件是有关当前使用的魔术值注册表。当你给一个结构添加了一个魔术值,你也应该把这个魔术值添加到这个文件,因为我们最好把用于各种结构的魔术值统一起来。
+
+使用魔术值来保护内核数据结构是一个非常好的主意。这就允许你在运行期检查(a)一个结构是否已经被攻击,或者(b)你已经给一个例行程序通过了一个错误的结构。后一种情况特别地有用---特别是当你通过一个空指针指向结构体的时候。tty源码,例如,经常通过特定驱动使用这种方法并且反复地排列特定方面的结构。
+
+使用魔术值的方法是在结构的开始处声明的,如下:
+
+struct tty_ldisc {
+ int magic;
+ ...
+};
+
+当你以后给内核添加增强功能的时候,请遵守这条规则!这样就会节省数不清的调试时间,特别是一些古怪的情况,例如,数组超出范围并且重新写了超出部分。遵守这个规则,‪这些情况可以被快速地,安全地避免。
+
+ Theodore Ts'o
+ 31 Mar 94
+
+给当前的Linux 2.1.55添加魔术表。
+
+ Michael Chastain
+ <mailto:mec@shout.net>
+ 22 Sep 1997
+
+现在应该最新的Linux 2.1.112.因为在特性冻结期间,不能在2.2.x前改变任何东西。这些条目被数域所排序。
+
+ Krzysztof G.Baranowski
+ <mailto: kgb@knm.org.pl>
+ 29 Jul 1998
+
+更新魔术表到Linux 2.5.45。刚好越过特性冻结,但是有可能还会有一些新的魔术值在2.6.x之前融入到内核中。
+
+ Petr Baudis
+ <pasky@ucw.cz>
+ 03 Nov 2002
+
+更新魔术表到Linux 2.5.74。
+
+ Fabian Frederick
+ <ffrederick@users.sourceforge.net>
+ 09 Jul 2003
+
+魔术名 地址 结构 所在文件
+===========================================================================
+PG_MAGIC 'P' pg_{read,write}_hdr include/linux/pg.h
+CMAGIC 0x0111 user include/linux/a.out.h
+MKISS_DRIVER_MAGIC 0x04bf mkiss_channel drivers/net/mkiss.h
+HDLC_MAGIC 0x239e n_hdlc drivers/char/n_hdlc.c
+APM_BIOS_MAGIC 0x4101 apm_user arch/x86/kernel/apm_32.c
+CYCLADES_MAGIC 0x4359 cyclades_port include/linux/cyclades.h
+DB_MAGIC 0x4442 fc_info drivers/net/iph5526_novram.c
+DL_MAGIC 0x444d fc_info drivers/net/iph5526_novram.c
+FASYNC_MAGIC 0x4601 fasync_struct include/linux/fs.h
+FF_MAGIC 0x4646 fc_info drivers/net/iph5526_novram.c
+ISICOM_MAGIC 0x4d54 isi_port include/linux/isicom.h
+PTY_MAGIC 0x5001 drivers/char/pty.c
+PPP_MAGIC 0x5002 ppp include/linux/if_pppvar.h
+SERIAL_MAGIC 0x5301 async_struct include/linux/serial.h
+SSTATE_MAGIC 0x5302 serial_state include/linux/serial.h
+SLIP_MAGIC 0x5302 slip drivers/net/slip.h
+STRIP_MAGIC 0x5303 strip drivers/net/strip.c
+X25_ASY_MAGIC 0x5303 x25_asy drivers/net/x25_asy.h
+SIXPACK_MAGIC 0x5304 sixpack drivers/net/hamradio/6pack.h
+AX25_MAGIC 0x5316 ax_disp drivers/net/mkiss.h
+TTY_MAGIC 0x5401 tty_struct include/linux/tty.h
+MGSL_MAGIC 0x5401 mgsl_info drivers/char/synclink.c
+TTY_DRIVER_MAGIC 0x5402 tty_driver include/linux/tty_driver.h
+MGSLPC_MAGIC 0x5402 mgslpc_info drivers/char/pcmcia/synclink_cs.c
+TTY_LDISC_MAGIC 0x5403 tty_ldisc include/linux/tty_ldisc.h
+USB_SERIAL_MAGIC 0x6702 usb_serial drivers/usb/serial/usb-serial.h
+FULL_DUPLEX_MAGIC 0x6969 drivers/net/ethernet/dec/tulip/de2104x.c
+USB_BLUETOOTH_MAGIC 0x6d02 usb_bluetooth drivers/usb/class/bluetty.c
+RFCOMM_TTY_MAGIC 0x6d02 net/bluetooth/rfcomm/tty.c
+USB_SERIAL_PORT_MAGIC 0x7301 usb_serial_port drivers/usb/serial/usb-serial.h
+CG_MAGIC 0x00090255 ufs_cylinder_group include/linux/ufs_fs.h
+RPORT_MAGIC 0x00525001 r_port drivers/char/rocket_int.h
+LSEMAGIC 0x05091998 lse drivers/fc4/fc.c
+GDTIOCTL_MAGIC 0x06030f07 gdth_iowr_str drivers/scsi/gdth_ioctl.h
+RIEBL_MAGIC 0x09051990 drivers/net/atarilance.c
+NBD_REQUEST_MAGIC 0x12560953 nbd_request include/linux/nbd.h
+RED_MAGIC2 0x170fc2a5 (any) mm/slab.c
+BAYCOM_MAGIC 0x19730510 baycom_state drivers/net/baycom_epp.c
+ISDN_X25IFACE_MAGIC 0x1e75a2b9 isdn_x25iface_proto_data
+ drivers/isdn/isdn_x25iface.h
+ECP_MAGIC 0x21504345 cdkecpsig include/linux/cdk.h
+LSOMAGIC 0x27091997 lso drivers/fc4/fc.c
+LSMAGIC 0x2a3b4d2a ls drivers/fc4/fc.c
+WANPIPE_MAGIC 0x414C4453 sdla_{dump,exec} include/linux/wanpipe.h
+CS_CARD_MAGIC 0x43525553 cs_card sound/oss/cs46xx.c
+LABELCL_MAGIC 0x4857434c labelcl_info_s include/asm/ia64/sn/labelcl.h
+ISDN_ASYNC_MAGIC 0x49344C01 modem_info include/linux/isdn.h
+CTC_ASYNC_MAGIC 0x49344C01 ctc_tty_info drivers/s390/net/ctctty.c
+ISDN_NET_MAGIC 0x49344C02 isdn_net_local_s drivers/isdn/i4l/isdn_net_lib.h
+SAVEKMSG_MAGIC2 0x4B4D5347 savekmsg arch/*/amiga/config.c
+CS_STATE_MAGIC 0x4c4f4749 cs_state sound/oss/cs46xx.c
+SLAB_C_MAGIC 0x4f17a36d kmem_cache mm/slab.c
+COW_MAGIC 0x4f4f4f4d cow_header_v1 arch/um/drivers/ubd_user.c
+I810_CARD_MAGIC 0x5072696E i810_card sound/oss/i810_audio.c
+TRIDENT_CARD_MAGIC 0x5072696E trident_card sound/oss/trident.c
+ROUTER_MAGIC 0x524d4157 wan_device [in wanrouter.h pre 3.9]
+SCC_MAGIC 0x52696368 gs_port drivers/char/scc.h
+SAVEKMSG_MAGIC1 0x53415645 savekmsg arch/*/amiga/config.c
+GDA_MAGIC 0x58464552 gda arch/mips/include/asm/sn/gda.h
+RED_MAGIC1 0x5a2cf071 (any) mm/slab.c
+EEPROM_MAGIC_VALUE 0x5ab478d2 lanai_dev drivers/atm/lanai.c
+HDLCDRV_MAGIC 0x5ac6e778 hdlcdrv_state include/linux/hdlcdrv.h
+PCXX_MAGIC 0x5c6df104 channel drivers/char/pcxx.h
+KV_MAGIC 0x5f4b565f kernel_vars_s arch/mips/include/asm/sn/klkernvars.h
+I810_STATE_MAGIC 0x63657373 i810_state sound/oss/i810_audio.c
+TRIDENT_STATE_MAGIC 0x63657373 trient_state sound/oss/trident.c
+M3_CARD_MAGIC 0x646e6f50 m3_card sound/oss/maestro3.c
+FW_HEADER_MAGIC 0x65726F66 fw_header drivers/atm/fore200e.h
+SLOT_MAGIC 0x67267321 slot drivers/hotplug/cpqphp.h
+SLOT_MAGIC 0x67267322 slot drivers/hotplug/acpiphp.h
+LO_MAGIC 0x68797548 nbd_device include/linux/nbd.h
+OPROFILE_MAGIC 0x6f70726f super_block drivers/oprofile/oprofilefs.h
+M3_STATE_MAGIC 0x734d724d m3_state sound/oss/maestro3.c
+VMALLOC_MAGIC 0x87654320 snd_alloc_track sound/core/memory.c
+KMALLOC_MAGIC 0x87654321 snd_alloc_track sound/core/memory.c
+PWC_MAGIC 0x89DC10AB pwc_device drivers/usb/media/pwc.h
+NBD_REPLY_MAGIC 0x96744668 nbd_reply include/linux/nbd.h
+ENI155_MAGIC 0xa54b872d midway_eprom drivers/atm/eni.h
+SCI_MAGIC 0xbabeface gs_port drivers/char/sh-sci.h
+CODA_MAGIC 0xC0DAC0DA coda_file_info include/linux/coda_fs_i.h
+DPMEM_MAGIC 0xc0ffee11 gdt_pci_sram drivers/scsi/gdth.h
+YAM_MAGIC 0xF10A7654 yam_port drivers/net/hamradio/yam.c
+CCB_MAGIC 0xf2691ad2 ccb drivers/scsi/ncr53c8xx.c
+QUEUE_MAGIC_FREE 0xf7e1c9a3 queue_entry drivers/scsi/arm/queue.c
+QUEUE_MAGIC_USED 0xf7e1cc33 queue_entry drivers/scsi/arm/queue.c
+HTB_CMAGIC 0xFEFAFEF1 htb_class net/sched/sch_htb.c
+NMI_MAGIC 0x48414d4d455201 nmi_s arch/mips/include/asm/sn/nmi.h
+
+请注意,在声音记忆管理中仍然有一些特殊的为每个驱动定义的魔术值。查看include/sound/sndmagic.h来获取他们完整的列表信息。很多OSS声音驱动拥有自己从声卡PCI ID构建的魔术值-他们也没有被列在这里。
+
+IrDA子系统也使用了大量的自己的魔术值,查看include/net/irda/irda.h来获取他们完整的信息。
+
+HFS是另外一个比较大的使用魔术值的文件系统-你可以在fs/hfs/hfs.h中找到他们。
diff --git a/Documentation/zh_CN/oops-tracing.txt b/Documentation/zh_CN/oops-tracing.txt
new file mode 100644
index 000000000..9312608ff
--- /dev/null
+++ b/Documentation/zh_CN/oops-tracing.txt
@@ -0,0 +1,212 @@
+Chinese translated version of Documentation/oops-tracing.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Dave Young <hidave.darkstar@gmail.com>
+---------------------------------------------------------------------
+Documentation/oops-tracing.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 杨瑞 Dave Young <hidave.darkstar@gmail.com>
+中文版翻译者: 杨瑞 Dave Young <hidave.darkstar@gmail.com>
+中文版校译者: 李阳 Li Yang <leo@zh-kernel.org>
+ 王聪 Wang Cong <xiyou.wangcong@gmail.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+注意: ksymoops 在2.6中是没有用的。 请以原有格式使用Oops(来自dmesg,等等)。
+忽略任何这样那样关于“解码Oops”或者“通过ksymoops运行”的文档。 如果你贴出运行过
+ksymoops的来自2.6的Oops,人们只会让你重贴一次。
+
+快速总结
+-------------
+
+发现Oops并发送给看似相关的内核领域的维护者。别太担心对不上号。如果你不确定就发给
+和你所做的事情相关的代码的负责人。 如果可重现试着描述怎样重构。 那甚至比oops更有
+价值。
+
+如果你对于发送给谁一无所知, 发给linux-kernel@vger.kernel.org。感谢你帮助Linux
+尽可能地稳定。
+
+Oops在哪里?
+----------------------
+
+通常Oops文本由klogd从内核缓冲区里读取并传给syslogd,由syslogd写到syslog文件中,
+典型地是/var/log/messages(依赖于/etc/syslog.conf)。有时klogd崩溃了,这种情况下你
+能够运行dmesg > file来从内核缓冲区中读取数据并保存下来。 否则你可以
+cat /proc/kmsg > file, 然而你必须介入中止传输, kmsg是一个“永不结束的文件”。如
+果机器崩溃坏到你不能输入命令或者磁盘不可用那么你有三种选择:-
+
+(1) 手抄屏幕上的文本待机器重启后再输入计算机。 麻烦但如果没有针对崩溃的准备,
+这是仅有的选择。 另外,你可以用数码相机把屏幕拍下来-不太好,但比没有强。 如果信
+息滚动到了终端的上面,你会发现以高分辩率启动(比如,vga=791)会让你读到更多的文
+本。(注意:这需要vesafb,所以对‘早期’的oops没有帮助)
+
+(2)用串口终端启动(请参看Documentation/serial-console.txt),运行一个null
+modem到另一台机器并用你喜欢的通讯工具获取输出。Minicom工作地很好。
+
+(3)使用Kdump(请参看Documentation/kdump/kdump.txt),
+使用在Documentation/kdump/gdbmacros.txt中定义的dmesg gdb宏,从旧的内存中提取内核
+环形缓冲区。
+
+完整信息
+----------------
+
+注意:以下来自于Linus的邮件适用于2.4内核。 我因为历史原因保留了它,并且因为其中
+一些信息仍然适用。 特别注意的是,请忽略任何ksymoops的引用。
+
+From: Linus Torvalds <torvalds@osdl.org>
+
+怎样跟踪Oops.. [原发到linux-kernel的一封邮件]
+
+主要的窍门是有五年和这些烦人的oops消息打交道的经验;-)
+
+实际上,你有办法使它更简单。我有两个不同的方法:
+
+ gdb /usr/src/linux/vmlinux
+ gdb> disassemble <offending_function>
+
+那是发现问题的简单办法,至少如果bug报告做的好的情况下(象这个一样-运行ksymoops
+得到oops发生的函数及函数内的偏移)。
+
+哦,如果报告发生的内核以相同的编译器和相似的配置编译它会有帮助的。
+
+另一件要做的事是反汇编bug报告的“Code”部分:ksymoops也会用正确的工具来做这件事,
+但如果没有那些工具你可以写一个傻程序:
+
+ char str[] = "\xXX\xXX\xXX...";
+ main(){}
+
+并用gcc -g编译它然后执行“disassemble str”(XX部分是由Oops报告的值-你可以仅剪切
+粘贴并用“\x”替换空格-我就是这么做的,因为我懒得写程序自动做这一切)。
+
+另外,你可以用scripts/decodecode这个shell脚本。它的使用方法是:
+decodecode < oops.txt
+
+“Code”之后的十六进制字节可能(在某些架构上)有一些当前指令之前的指令字节以及
+当前和之后的指令字节
+
+Code: f9 0f 8d f9 00 00 00 8d 42 0c e8 dd 26 11 c7 a1 60 ea 2b f9 8b 50 08 a1
+64 ea 2b f9 8d 34 82 8b 1e 85 db 74 6d 8b 15 60 ea 2b f9 <8b> 43 04 39 42 54
+7e 04 40 89 42 54 8b 43 04 3b 05 00 f6 52 c0
+
+最后,如果你想知道代码来自哪里,你可以:
+
+ cd /usr/src/linux
+ make fs/buffer.s # 或任何产生BUG的文件
+
+然后你会比gdb反汇编更清楚的知道发生了什么。
+
+现在,问题是把你所拥有的所有数据结合起来:C源码(关于它应该怎样的一般知识),
+汇编代码及其反汇编得到的代码(另外还有从“oops”消息得到的寄存器状态-对了解毁坏的
+指针有用,而且当你有了汇编代码你也能拿其它的寄存器和任何它们对应的C表达式做匹配
+)。
+
+实际上,你仅需看看哪里不匹配(这个例子是“Code”反汇编和编译器生成的代码不匹配)。
+然后你须要找出为什么不匹配。通常很简单-你看到代码使用了空指针然后你看代码想知道
+空指针是怎么出现的,还有检查它是否合法..
+
+现在,如果明白这是一项耗时的工作而且需要一丁点儿的专心,没错。这就是我为什么大多
+只是忽略那些没有符号表信息的崩溃报告的原因:简单的说太难查找了(我有一些
+程序用于在内核代码段中搜索特定的模式,而且有时我也已经能找出那些崩溃的地方,但是
+仅仅是找出正确的序列也确实需要相当扎实的内核知识)
+
+_有时_会发生这种情况,我仅看到崩溃中的反汇编代码序列, 然后我马上就明白问题出在
+哪里。这时我才意识到自己干这个工作已经太长时间了;-)
+
+ Linus
+
+
+---------------------------------------------------------------------------
+关于Oops跟踪的注解:
+
+为了帮助Linus和其它内核开发者,klogd纳入了大量的支持来处理保护错误。为了拥有对
+地址解析的完整支持至少应该使用1.3-pl3的sysklogd包。
+
+当保护错误发生时,klogd守护进程自动把内核日志信息中的重要地址翻译成它们相应的符
+号。
+
+klogd执行两种类型的地址解析。首先是静态翻译其次是动态翻译。静态翻译和ksymoops
+一样使用System.map文件。为了做静态翻译klogd守护进程必须在初始化时能找到system
+map文件。关于klogd怎样搜索map文件请参看klogd手册页。
+
+动态地址翻译在使用内核可装载模块时很重要。 因为内核模块的内存是从内核动态内存池
+里分配的,所以不管是模块开始位置还是模块中函数和符号的位置都不是固定的。
+
+内核支持允许程序决定装载哪些模块和它们在内存中位置的系统调用。使用这些系统调用
+klogd守护进程生成一张符号表用于调试发生在可装载模块中的保护错误。
+
+至少klogd会提供产生保护错误的模块名。还可有额外的符号信息供可装载模块开发者选择
+以从模块中输出符号信息。
+
+因为内核模块环境可能是动态的,所以必须有一种机制当模块环境发生改变时来通知klogd
+守护进程。 有一些可用的命令行选项允许klogd向当前执行中的守护进程发送信号,告知符
+号信息应该被刷新了。 更多信息请参看klogd手册页。
+
+sysklogd发布时包含一个补丁修改了modules-2.0.0包,无论何时一个模块装载或者卸载都
+会自动向klogd发送信号。打上这个补丁提供了必要的对调试发生于内核可装载模块的保护
+错误的无缝支持。
+
+以下是被klogd处理过的发生在可装载模块中的一个保护错误例子:
+---------------------------------------------------------------------------
+Aug 29 09:51:01 blizard kernel: Unable to handle kernel paging request at virtual address f15e97cc
+Aug 29 09:51:01 blizard kernel: current->tss.cr3 = 0062d000, %cr3 = 0062d000
+Aug 29 09:51:01 blizard kernel: *pde = 00000000
+Aug 29 09:51:01 blizard kernel: Oops: 0002
+Aug 29 09:51:01 blizard kernel: CPU: 0
+Aug 29 09:51:01 blizard kernel: EIP: 0010:[oops:_oops+16/3868]
+Aug 29 09:51:01 blizard kernel: EFLAGS: 00010212
+Aug 29 09:51:01 blizard kernel: eax: 315e97cc ebx: 003a6f80 ecx: 001be77b edx: 00237c0c
+Aug 29 09:51:01 blizard kernel: esi: 00000000 edi: bffffdb3 ebp: 00589f90 esp: 00589f8c
+Aug 29 09:51:01 blizard kernel: ds: 0018 es: 0018 fs: 002b gs: 002b ss: 0018
+Aug 29 09:51:01 blizard kernel: Process oops_test (pid: 3374, process nr: 21, stackpage=00589000)
+Aug 29 09:51:01 blizard kernel: Stack: 315e97cc 00589f98 0100b0b4 bffffed4 0012e38e 00240c64 003a6f80 00000001
+Aug 29 09:51:01 blizard kernel: 00000000 00237810 bfffff00 0010a7fa 00000003 00000001 00000000 bfffff00
+Aug 29 09:51:01 blizard kernel: bffffdb3 bffffed4 ffffffda 0000002b 0007002b 0000002b 0000002b 00000036
+Aug 29 09:51:01 blizard kernel: Call Trace: [oops:_oops_ioctl+48/80] [_sys_ioctl+254/272] [_system_call+82/128]
+Aug 29 09:51:01 blizard kernel: Code: c7 00 05 00 00 00 eb 08 90 90 90 90 90 90 90 90 89 ec 5d c3
+---------------------------------------------------------------------------
+
+Dr. G.W. Wettstein Oncology Research Div. Computing Facility
+Roger Maris Cancer Center INTERNET: greg@wind.rmcc.com
+820 4th St. N.
+Fargo, ND 58122
+Phone: 701-234-7556
+
+
+---------------------------------------------------------------------------
+受污染的内核
+
+一些oops报告在程序记数器之后包含字符串'Tainted: '。这表明内核已经被一些东西给污
+染了。 该字符串之后紧跟着一系列的位置敏感的字符,每个代表一个特定的污染值。
+
+ 1:'G'如果所有装载的模块都有GPL或相容的许可证,'P'如果装载了任何的专有模块。
+没有模块MODULE_LICENSE或者带有insmod认为是与GPL不相容的的MODULE_LICENSE的模块被
+认定是专有的。
+
+ 2:'F'如果有任何通过“insmod -f”被强制装载的模块,' '如果所有模块都被正常装载。
+
+ 3:'S'如果oops发生在SMP内核中,运行于没有证明安全运行多处理器的硬件。 当前这种
+情况仅限于几种不支持SMP的速龙处理器。
+
+ 4:'R'如果模块通过“insmod -f”被强制装载,' '如果所有模块都被正常装载。
+
+ 5:'M'如果任何处理器报告了机器检查异常,' '如果没有发生机器检查异常。
+
+ 6:'B'如果页释放函数发现了一个错误的页引用或者一些非预期的页标志。
+
+ 7:'U'如果用户或者用户应用程序特别请求设置污染标志,否则' '。
+
+ 8:'D'如果内核刚刚死掉,比如有OOPS或者BUG。
+
+使用'Tainted: '字符串的主要原因是要告诉内核调试者,这是否是一个干净的内核亦或发
+生了任何的不正常的事。污染是永久的:即使出错的模块已经被卸载了,污染值仍然存在,
+以表明内核不再值得信任。
diff --git a/Documentation/zh_CN/sparse.txt b/Documentation/zh_CN/sparse.txt
new file mode 100644
index 000000000..cc144e581
--- /dev/null
+++ b/Documentation/zh_CN/sparse.txt
@@ -0,0 +1,100 @@
+Chinese translated version of Documentation/sparse.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: Li Yang <leo@zh-kernel.org>
+---------------------------------------------------------------------
+Documentation/sparse.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+中文版维护者: 李阳 Li Yang <leo@zh-kernel.org>
+中文版翻译者: 李阳 Li Yang <leo@zh-kernel.org>
+
+
+以下为正文
+---------------------------------------------------------------------
+
+Copyright 2004 Linus Torvalds
+Copyright 2004 Pavel Machek <pavel@ucw.cz>
+Copyright 2006 Bob Copeland <me@bobcopeland.com>
+
+使用 sparse 工具做类型检查
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+"__bitwise" 是一种类型属性,所以你应该这样使用它:
+
+ typedef int __bitwise pm_request_t;
+
+ enum pm_request {
+ PM_SUSPEND = (__force pm_request_t) 1,
+ PM_RESUME = (__force pm_request_t) 2
+ };
+
+这样会使 PM_SUSPEND 和 PM_RESUME 成为位方式(bitwise)整数(使用"__force"
+是因为 sparse 会抱怨改变位方式的类型转换,但是这里我们确实需要强制进行转
+换)。而且因为所有枚举值都使用了相同的类型,这里的"enum pm_request"也将
+会使用那个类型做为底层实现。
+
+而且使用 gcc 编译的时候,所有的 __bitwise/__force 都会消失,最后在 gcc
+看来它们只不过是普通的整数。
+
+坦白来说,你并不需要使用枚举类型。上面那些实际都可以浓缩成一个特殊的"int
+__bitwise"类型。
+
+所以更简单的办法只要这样做:
+
+ typedef int __bitwise pm_request_t;
+
+ #define PM_SUSPEND ((__force pm_request_t) 1)
+ #define PM_RESUME ((__force pm_request_t) 2)
+
+现在你就有了严格的类型检查所需要的所有基础架构。
+
+一个小提醒:常数整数"0"是特殊的。你可以直接把常数零当作位方式整数使用而
+不用担心 sparse 会抱怨。这是因为"bitwise"(恰如其名)是用来确保不同位方
+式类型不会被弄混(小尾模式,大尾模式,cpu尾模式,或者其他),对他们来说
+常数"0"确实是特殊的。
+
+获取 sparse 工具
+~~~~~~~~~~~~~~~~
+
+你可以从 Sparse 的主页获取最新的发布版本:
+
+ http://www.kernel.org/pub/linux/kernel/people/josh/sparse/
+
+或者,你也可以使用 git 克隆最新的 sparse 开发版本:
+
+ git://git.kernel.org/pub/scm/linux/kernel/git/josh/sparse.git
+
+DaveJ 把每小时自动生成的 git 源码树 tar 包放在以下地址:
+
+ http://www.codemonkey.org.uk/projects/git-snapshots/sparse/
+
+一旦你下载了源码,只要以普通用户身份运行:
+
+ make
+ make install
+
+它将会被自动安装到你的 ~/bin 目录下。
+
+使用 sparse 工具
+~~~~~~~~~~~~~~~~
+
+用"make C=1"命令来编译内核,会对所有重新编译的 C 文件使用 sparse 工具。
+或者使用"make C=2"命令,无论文件是否被重新编译都会对其使用 sparse 工具。
+如果你已经编译了内核,用后一种方式可以很快地检查整个源码树。
+
+make 的可选变量 CHECKFLAGS 可以用来向 sparse 工具传递参数。编译系统会自
+动向 sparse 工具传递 -Wbitwise 参数。你可以定义 __CHECK_ENDIAN__ 来进行
+大小尾检查。
+
+ make C=2 CHECKFLAGS="-D__CHECK_ENDIAN__"
+
+这些检查默认都是被关闭的,因为他们通常会产生大量的警告。
diff --git a/Documentation/zh_CN/stable_api_nonsense.txt b/Documentation/zh_CN/stable_api_nonsense.txt
new file mode 100644
index 000000000..c26a27d1e
--- /dev/null
+++ b/Documentation/zh_CN/stable_api_nonsense.txt
@@ -0,0 +1,157 @@
+Chinese translated version of Documentation/stable_api_nonsense.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have problem
+communicating in English you can also ask the Chinese maintainer for help.
+Contact the Chinese maintainer, if this translation is outdated or there
+is problem with translation.
+
+Maintainer: Greg Kroah-Hartman <greg@kroah.com>
+Chinese maintainer: TripleX Chung <zhongyu@18mail.cn>
+---------------------------------------------------------------------
+Documentation/stable_api_nonsense.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
+中文版维护者: 钟宇 TripleX Chung <zhongyu@18mail.cn>
+中文版翻译者: 钟宇 TripleX Chung <zhongyu@18mail.cn>
+中文版校译者: 李阳 Li Yang <leoli@freescale.com>
+以下为正文
+---------------------------------------------------------------------
+
+写作本文档的目的,是为了解释为什么Linux既没有二进制内核接口,也没有稳定
+的内核接口。这里所说的内核接口,是指内核里的接口,而不是内核和用户空间
+的接口。内核到用户空间的接口,是提供给应用程序使用的系统调用,系统调用
+在历史上几乎没有过变化,将来也不会有变化。我有一些老应用程序是在0.9版本
+或者更早版本的内核上编译的,在使用2.6版本内核的Linux发布上依然用得很好
+。用户和应用程序作者可以将这个接口看成是稳定的。
+
+
+执行纲要
+--------
+
+你也许以为自己想要稳定的内核接口,但是你不清楚你要的实际上不是它。你需
+要的其实是稳定的驱动程序,而你只有将驱动程序放到公版内核的源代码树里,
+才有可能达到这个目的。而且这样做还有很多其它好处,正是因为这些好处使得
+Linux能成为强壮,稳定,成熟的操作系统,这也是你最开始选择Linux的原因。
+
+
+入门
+-----
+
+只有那些写驱动程序的“怪人”才会担心内核接口的改变,对广大用户来说,既
+看不到内核接口,也不需要去关心它。
+
+首先,我不打算讨论关于任何非GPL许可的内核驱动的法律问题,这些非GPL许可
+的驱动程序包括不公开源代码,隐藏源代码,二进制或者是用源代码包装,或者
+是其它任何形式的不能以GPL许可公开源代码的驱动程序。如果有法律问题,请咨
+询律师,我只是一个程序员,所以我只打算探讨技术问题(不是小看法律问题,
+法律问题很实际,并且需要一直关注)。
+
+既然只谈技术问题,我们就有了下面两个主题:二进制内核接口和稳定的内核源
+代码接口。这两个问题是互相关联的,让我们先解决掉二进制接口的问题。
+
+
+二进制内核接口
+--------------
+假如我们有一个稳定的内核源代码接口,那么自然而然的,我们就拥有了稳定的
+二进制接口,是这样的吗?错。让我们看看关于Linux内核的几点事实:
+ - 取决于所用的C编译器的版本,不同的内核数据结构里的结构体的对齐方
+式会有差别,代码中不同函数的表现形式也不一样(函数是不是被inline编译取
+决于编译器行为)。不同的函数的表现形式并不重要,但是数据结构内部的对齐
+方式很关键。
+ - 取决于内核的配置选项,不同的选项会让内核的很多东西发生改变:
+ - 同一个结构体可能包含不同的成员变量
+ - 有的函数可能根本不会被实现(比如编译的时候没有选择SMP支持
+,一些锁函数就会被定义成空函数)。
+ - 内核使用的内存会以不同的方式对齐,这取决于不同的内核配置选
+项。
+ - Linux可以在很多的不同体系结构的处理器上运行。在某个体系结构上编
+译好的二进制驱动程序,不可能在另外一个体系结构上正确的运行。
+
+对于一个特定的内核,满足这些条件并不难,使用同一个C编译器和同样的内核配
+置选项来编译驱动程序模块就可以了。这对于给一个特定Linux发布的特定版本提
+供驱动程序,是完全可以满足需求的。但是如果你要给不同发布的不同版本都发
+布一个驱动程序,就需要在每个发布上用不同的内核设置参数都编译一次内核,
+这简直跟噩梦一样。而且还要注意到,每个Linux发布还提供不同的Linux内核,
+这些内核都针对不同的硬件类型进行了优化(有很多种不同的处理器,还有不同
+的内核设置选项)。所以每发布一次驱动程序,都需要提供很多不同版本的内核
+模块。
+
+相信我,如果你真的要采取这种发布方式,一定会慢慢疯掉,我很久以前就有过
+深刻的教训...
+
+
+稳定的内核源代码接口
+--------------------
+
+如果有人不将他的内核驱动程序,放入公版内核的源代码树,而又想让驱动程序
+一直保持在最新的内核中可用,那么这个话题将会变得没完没了。
+ 内核开发是持续而且快节奏的,从来都不会慢下来。内核开发人员在当前接口中
+找到bug,或者找到更好的实现方式。一旦发现这些,他们就很快会去修改当前的
+接口。修改接口意味着,函数名可能会改变,结构体可能被扩充或者删减,函数
+的参数也可能发生改变。一旦接口被修改,内核中使用这些接口的地方需要同时
+修正,这样才能保证所有的东西继续工作。
+
+举一个例子,内核的USB驱动程序接口在USB子系统的整个生命周期中,至少经历
+了三次重写。这些重写解决以下问题:
+ - 把数据流从同步模式改成非同步模式,这个改动减少了一些驱动程序的
+复杂度,提高了所有USB驱动程序的吞吐率,这样几乎所有的USB设备都能以最大
+速率工作了。
+ - 修改了USB核心代码中为USB驱动分配数据包内存的方式,所有的驱动都
+需要提供更多的参数给USB核心,以修正了很多已经被记录在案的死锁。
+
+这和一些封闭源代码的操作系统形成鲜明的对比,在那些操作系统上,不得不额
+外的维护旧的USB接口。这导致了一个可能性,新的开发者依然会不小心使用旧的
+接口,以不恰当的方式编写代码,进而影响到操作系统的稳定性。
+ 在上面的例子中,所有的开发者都同意这些重要的改动,在这样的情况下修改代
+价很低。如果Linux保持一个稳定的内核源代码接口,那么就得创建一个新的接口
+;旧的,有问题的接口必须一直维护,给Linux USB开发者带来额外的工作。既然
+所有的Linux USB驱动的作者都是利用自己的时间工作,那么要求他们去做毫无意
+义的免费额外工作,是不可能的。
+ 安全问题对Linux来说十分重要。一个安全问题被发现,就会在短时间内得到修
+正。在很多情况下,这将导致Linux内核中的一些接口被重写,以从根本上避免安
+全问题。一旦接口被重写,所有使用这些接口的驱动程序,必须同时得到修正,
+以确定安全问题已经得到修复并且不可能在未来还有同样的安全问题。如果内核
+内部接口不允许改变,那么就不可能修复这样的安全问题,也不可能确认这样的
+安全问题以后不会发生。
+开发者一直在清理内核接口。如果一个接口没有人在使用了,它就会被删除。这
+样可以确保内核尽可能的小,而且所有潜在的接口都会得到尽可能完整的测试
+(没有人使用的接口是不可能得到良好的测试的)。
+
+
+要做什么
+-------
+
+如果你写了一个Linux内核驱动,但是它还不在Linux源代码树里,作为一个开发
+者,你应该怎么做?为每个发布的每个版本提供一个二进制驱动,那简直是一个
+噩梦,要跟上永远处于变化之中的内核接口,也是一件辛苦活。
+很简单,让你的驱动进入内核源代码树(要记得我们在谈论的是以GPL许可发行
+的驱动,如果你的代码不符合GPL,那么祝你好运,你只能自己解决这个问题了,
+你这个吸血鬼<把Andrew和Linus对吸血鬼的定义链接到这里>)。当你的代码加入
+公版内核源代码树之后,如果一个内核接口改变,你的驱动会直接被修改接口的
+那个人修改。保证你的驱动永远都可以编译通过,并且一直工作,你几乎不需要
+做什么事情。
+
+把驱动放到内核源代码树里会有很多的好处:
+ - 驱动的质量会提升,而维护成本(对原始作者来说)会下降。
+ - 其他人会给驱动添加新特性。
+ - 其他人会找到驱动中的bug并修复。
+ - 其他人会在驱动中找到性能优化的机会。
+ - 当外部的接口的改变需要修改驱动程序的时候,其他人会修改驱动程序
+。
+ - 不需要联系任何发行商,这个驱动会自动的随着所有的Linux发布一起发
+布。
+
+和别的操作系统相比,Linux为更多不同的设备提供现成的驱动,而且能在更多不
+同体系结构的处理器上支持这些设备。这个经过考验的开发模式,必然是错不了
+的 :)
+
+-------------
+感谢 Randy Dunlap, Andrew Morton, David Brownell, Hanna Linder,
+Robert Love, and Nishanth Aravamudan 对于本文档早期版本的评审和建议。
+
+英文版维护者: Greg Kroah-Hartman <greg@kroah.com>
diff --git a/Documentation/zh_CN/stable_kernel_rules.txt b/Documentation/zh_CN/stable_kernel_rules.txt
new file mode 100644
index 000000000..26ea5ed7c
--- /dev/null
+++ b/Documentation/zh_CN/stable_kernel_rules.txt
@@ -0,0 +1,66 @@
+Chinese translated version of Documentation/stable_kernel_rules.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Chinese maintainer: TripleX Chung <triplex@zh-kernel.org>
+---------------------------------------------------------------------
+Documentation/stable_kernel_rules.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+
+中文版维护者: 钟宇 TripleX Chung <triplex@zh-kernel.org>
+中文版翻译者: 钟宇 TripleX Chung <triplex@zh-kernel.org>
+中文版校译者: 李阳 Li Yang <leo@zh-kernel.org>
+ Kangkai Yin <e12051@motorola.com>
+
+以下为正文
+---------------------------------------------------------------------
+
+关于Linux 2.6稳定版发布,所有你想知道的事情。
+
+关于哪些类型的补丁可以被接收进入稳定版代码树,哪些不可以的规则:
+
+ - 必须是显而易见的正确,并且经过测试的。
+ - 连同上下文,不能大于100行。
+ - 必须只修正一件事情。
+ - 必须修正了一个给大家带来麻烦的真正的bug(不是“这也许是一个问题...”
+ 那样的东西)。
+ - 必须修正带来如下后果的问题:编译错误(对被标记为CONFIG_BROKEN的例外),
+ 内核崩溃,挂起,数据损坏,真正的安全问题,或者一些类似“哦,这不
+ 好”的问题。简短的说,就是一些致命的问题。
+ - 没有“理论上的竞争条件”,除非能给出竞争条件如何被利用的解释。
+ - 不能存在任何的“琐碎的”修正(拼写修正,去掉多余空格之类的)。
+ - 必须被相关子系统的维护者接受。
+ - 必须遵循Documentation/SubmittingPatches里的规则。
+
+向稳定版代码树提交补丁的过程:
+
+ - 在确认了补丁符合以上的规则后,将补丁发送到stable@vger.kernel.org。
+ - 如果补丁被接受到队列里,发送者会收到一个ACK回复,如果没有被接受,收
+ 到的是NAK回复。回复需要几天的时间,这取决于开发者的时间安排。
+ - 被接受的补丁会被加到稳定版本队列里,等待其他开发者的审查。
+ - 安全方面的补丁不要发到这个列表,应该发送到security@kernel.org。
+
+审查周期:
+
+ - 当稳定版的维护者决定开始一个审查周期,补丁将被发送到审查委员会,以
+ 及被补丁影响的领域的维护者(除非提交者就是该领域的维护者)并且抄送
+ 到linux-kernel邮件列表。
+ - 审查委员会有48小时的时间,用来决定给该补丁回复ACK还是NAK。
+ - 如果委员会中有成员拒绝这个补丁,或者linux-kernel列表上有人反对这个
+ 补丁,并提出维护者和审查委员会之前没有意识到的问题,补丁会从队列中
+ 丢弃。
+ - 在审查周期结束的时候,那些得到ACK回应的补丁将会被加入到最新的稳定版
+ 发布中,一个新的稳定版发布就此产生。
+ - 安全性补丁将从内核安全小组那里直接接收到稳定版代码树中,而不是通过
+ 通常的审查周期。请联系内核安全小组以获得关于这个过程的更多细节。
+
+审查委员会:
+ - 由一些自愿承担这项任务的内核开发者,和几个非志愿的组成。
diff --git a/Documentation/zh_CN/video4linux/omap3isp.txt b/Documentation/zh_CN/video4linux/omap3isp.txt
new file mode 100644
index 000000000..67ffbf352
--- /dev/null
+++ b/Documentation/zh_CN/video4linux/omap3isp.txt
@@ -0,0 +1,277 @@
+Chinese translated version of Documentation/video4linux/omap3isp.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ Sakari Ailus <sakari.ailus@iki.fi>
+ David Cohen <dacohen@gmail.com>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/video4linux/omap3isp.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+英文版维护者: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ Sakari Ailus <sakari.ailus@iki.fi>
+ David Cohen <dacohen@gmail.com>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+OMAP 3 图像信号处理器 (ISP) 驱动
+
+Copyright (C) 2010 Nokia Corporation
+Copyright (C) 2009 Texas Instruments, Inc.
+
+联系人: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ Sakari Ailus <sakari.ailus@iki.fi>
+ David Cohen <dacohen@gmail.com>
+
+
+介绍
+===
+
+本文档介绍了由 drivers/media/video/omap3isp 加载的德州仪器
+(TI)OMAP 3 图像信号处理器 (ISP) 驱动。原始驱动由德州仪器(TI)
+编写,但此后由诺基亚重写了两次。
+
+驱动已在以下 OMAP 3 系列的芯片中成功使用:
+
+ 3430
+ 3530
+ 3630
+
+驱动实现了 V4L2、媒体控制器和 v4l2_subdev 接口。支持内核中使用
+v4l2_subdev 接口的传感器、镜头和闪光灯驱动。
+
+
+拆分为子设备
+==========
+
+OMAP 3 ISP 被拆分为 V4L2 子设备,ISP中的每个模块都由一个子设备
+来表示。每个子设备向用户空间提供一个 V4L2 子设备接口。
+
+ OMAP3 ISP CCP2
+ OMAP3 ISP CSI2a
+ OMAP3 ISP CCDC
+ OMAP3 ISP preview
+ OMAP3 ISP resizer
+ OMAP3 ISP AEWB
+ OMAP3 ISP AF
+ OMAP3 ISP histogram
+
+ISP 中每个可能的连接都通过一个链接嵌入到媒体控制器接口中。详见例程 [2]。
+
+
+控制 OMAP 3 ISP
+==============
+
+通常,对 OMAP 3 ISP 的配置会在下一帧起始时生效。在传感器垂直消隐期间,
+模块变为空闲时完成配置。在内存到内存的操作中,视频管道一次处理一帧。
+应用配置应在帧间完成。
+
+ISP 中的所有模块,除 CSI-2 和 (可能存在的)CCP2 接收器外,都必须
+接收完整的帧数据。因此,传感器必须保证从不发送部分帧数据给ISP。
+
+Autoidle(自动空闲)功能至少在 3430 的 ISP 模块中确实存在一些问题。
+当 omap3isp 模块参数 autoidle 非零时,autoidle(自动空闲)功能
+仅在 3630 中启用了。
+
+
+事件机制
+======
+
+OMAP 3 ISP 驱动在 CCDC 和统计(AEWB、AF 和 直方图)子设备中支持
+V4L2 事件机制接口。
+
+CCDC 子设备通过 HS_VS 中断,处理 V4L2_EVENT_FRAME_SYNC 类型
+事件,用于告知帧起始。早期版本的驱动则使用 V4L2_EVENT_OMAP3ISP_HS_VS。
+当在 CCDC 模块中接收到起始帧的第一行时,会准确地触发事件。这个事件
+可以在 CCDC 子设备中“订阅”。
+
+(当使用并行接口时,必须注意正确地配置 VS 信号极性。而当使用串行接收时
+这个会自动校正。)
+
+每个统计子设备都可以产生事件。每当一个统计缓冲区可由用户空间应用程序
+通过 VIDIOC_OMAP3ISP_STAT_REQ IOCTL 操作获取时,就会产生一个
+事件。当前存在以下事件:
+
+ V4L2_EVENT_OMAP3ISP_AEWB
+ V4L2_EVENT_OMAP3ISP_AF
+ V4L2_EVENT_OMAP3ISP_HIST
+
+这些 ioctl 的事件数据类型为 struct omap3isp_stat_event_status
+结构体。如果出现计算错误的统计,也同样会产生一个事件,但没有相关的统计
+数据缓冲区。这种情况下 omap3isp_stat_event_status.buf_err 会被
+设置为非零值。
+
+
+私有 IOCTL
+==========
+
+OMAP 3 ISP 驱动支持标准的 V4L2 IOCTL 以及可能存在且实用的控制。但
+ISP 提供的许多功能都不在标准 IOCTL 之列,例如 gamma(伽马)表和统计
+数据采集配置等。
+
+通常,会有一个私有 ioctl 用于配置每个包含硬件依赖功能的模块。
+
+支持以下私有 IOCTL:
+
+ VIDIOC_OMAP3ISP_CCDC_CFG
+ VIDIOC_OMAP3ISP_PRV_CFG
+ VIDIOC_OMAP3ISP_AEWB_CFG
+ VIDIOC_OMAP3ISP_HIST_CFG
+ VIDIOC_OMAP3ISP_AF_CFG
+ VIDIOC_OMAP3ISP_STAT_REQ
+ VIDIOC_OMAP3ISP_STAT_EN
+
+在 include/linux/omap3isp.h 中描述了这些 ioctl 使用的参数结构体。
+与特定 ISP 模块相关的 ISP 自身的详细功能在技术参考手册 (TRMs)中有
+描述,详见文档结尾。
+
+虽然在不使用任何私有 IOCTL 的情况下使用 ISP 驱动是可能的,但这样无法
+获得最佳的图像质量。AEWB、AF 和 直方图(译者注:一般用于自动曝光和增益
+控制,以及图像均衡等)模块无法在未使用适当的私有 IOCTL 配置的情况下使用。
+
+
+CCDC 和 preview(预览)模块 IOCTL
+===============================
+
+VIDIOC_OMAP3ISP_CCDC_CFG 和 VIDIOC_OMAP3ISP_PRV_CFG IOCTL
+被分别用于配置、启用和禁用 CCDC 和 preview(预览)模块的功能。在它们
+所控制的模块中,两个 IOCTL 控制多种功能。VIDIOC_OMAP3ISP_CCDC_CFG IOCTL
+接受一个指向 omap3isp_ccdc_update_config 结构体的指针作为它的参数。
+同样的,VIDIOC_OMAP3ISP_PRV_CFG 接受一个指向 omap3isp_prev_update_config
+结构体的指针。以上两个结构体定义位于 [1]。
+
+这些结构体中的 update 域标识是否针对指定的功能更新配置,而 flag 域
+则标识是启用还是禁用此功能。
+
+update 和 flag 位接受以下掩码值。CCDC 和 preview(预览)模块的
+每个单独功能都与一个 flag 关联(禁用或启用;在结构体中 flag 域的
+一部分)和一个指向功能配置数据的指针。
+
+对于 VIDIOC_OMAP3ISP_CCDC_CFG,下面列出了 update 和 flag 域
+中的有效值。 这些值可能会在同一个 IOCTL 调用中配置多个功能。
+
+ OMAP3ISP_CCDC_ALAW
+ OMAP3ISP_CCDC_LPF
+ OMAP3ISP_CCDC_BLCLAMP
+ OMAP3ISP_CCDC_BCOMP
+ OMAP3ISP_CCDC_FPC
+ OMAP3ISP_CCDC_CULL
+ OMAP3ISP_CCDC_CONFIG_LSC
+ OMAP3ISP_CCDC_TBL_LSC
+
+针对 VIDIOC_OMAP3ISP_PRV_CFG 的相应值如下:
+
+ OMAP3ISP_PREV_LUMAENH
+ OMAP3ISP_PREV_INVALAW
+ OMAP3ISP_PREV_HRZ_MED
+ OMAP3ISP_PREV_CFA
+ OMAP3ISP_PREV_CHROMA_SUPP
+ OMAP3ISP_PREV_WB
+ OMAP3ISP_PREV_BLKADJ
+ OMAP3ISP_PREV_RGB2RGB
+ OMAP3ISP_PREV_COLOR_CONV
+ OMAP3ISP_PREV_YC_LIMIT
+ OMAP3ISP_PREV_DEFECT_COR
+ OMAP3ISP_PREV_GAMMABYPASS
+ OMAP3ISP_PREV_DRK_FRM_CAPTURE
+ OMAP3ISP_PREV_DRK_FRM_SUBTRACT
+ OMAP3ISP_PREV_LENS_SHADING
+ OMAP3ISP_PREV_NF
+ OMAP3ISP_PREV_GAMMA
+
+在启用某个功能的时候,相关的配置数据指针不可为 NULL。在禁用某个功能时,
+配置数据指针会被忽略。
+
+
+统计模块 IOCTL
+=============
+
+统计子设备相较于其他子设备提供了更多动态配置选项。在图像处理流水线处于
+工作状态时,它们可以被启用、禁用和重配。
+
+统计模块总是从 CCDC 中获取输入的图像数据(由于直方图内存读取未实现)。
+统计数据可由用户通过统计子设备节点使用私有 IOCTL 获取。
+
+AEWB、AF 和 直方图子设备提供的私有 IOCTL 极大程度上反应了 ISP 硬件
+提供的寄存器级接口。有些方面纯粹和驱动程序的实现相关,这些将在下面讨论。
+
+VIDIOC_OMAP3ISP_STAT_EN
+-----------------------
+
+这个私有 IOCTL 启用/禁用 一个统计模块。如果这个申请在视频流启动前完成,
+它将在视频流水线开始工作时生效。如果视频流水线已经处于工作状态了,它将在
+CCDC 变为空闲时生效。
+
+VIDIOC_OMAP3ISP_AEWB_CFG, VIDIOC_OMAP3ISP_HIST_CFG and VIDIOC_OMAP3ISP_AF_CFG
+-----------------------------------------------------------------------------
+
+这些 IOCTL 用于配置模块。它们要求用户应用程序对硬件有深入的认识。对
+大多数域的解释可以在 OMAP 的 TRM 中找到。以下两个域对于以上所有的
+私有 IOCTL 配置都很常见,由于他们没有在 TRM 中提及,故需要对其有
+更好的认识。
+
+omap3isp_[h3a_af/h3a_aewb/hist]_config.buf_size:
+
+模块在内部处理自身缓冲。对模块数据输出所必需的缓存大小依赖于已申请的配置。
+虽然驱动支持在视频流工作时重新配置,但对于所需缓存量大于模块启用时内部
+所分配数量的情况,则不支持重新配置。在这种情况下将返回 -EBUSY。为了避免
+此类状况,无论是禁用/重配/启用模块,还是第一次配置时申请必须的缓存大小,
+都应在模块禁用的情况下进行。
+
+内部缓冲分配的大小需综合考虑所申请配置的最小缓存量以及 buf_size 域中
+所设的值。如果 buf_size 域在[minimum(最小值), maximum(最大值)]
+缓冲大小范围之外,则应该将其调整到其范围中。驱动则会选择最大值。正确的
+buf_size 值将回写到用户应用程序中。
+
+omap3isp_[h3a_af/h3a_aewb/hist]_config.config_counter:
+
+由于配置并未在申请之后同步生效,驱动必须提供一个跟踪这类信息的方法,
+以提供更准确的数据。在一个配置被申请之后,返回到用户空间应用程序的
+config_counter 是一个与其配置相关的唯一值。当用户应用程序接收到
+一个缓冲可用或一个新的缓冲申请事件时,这个 config_counter 用于
+一个缓冲数据和一个配置的匹配。
+
+VIDIOC_OMAP3ISP_STAT_REQ
+------------------------
+
+将内部缓冲队列中最早的数据发送到用户空间,然后丢弃此缓冲区。
+omap3isp_stat_data.frame_number 域与视频缓冲的 field_count
+域相匹配。
+
+
+技术参考手册 (TRMs) 和其他文档
+==========================
+
+OMAP 3430 TRM:
+<URL:http://focus.ti.com/pdfs/wtbu/OMAP34xx_ES3.1.x_PUBLIC_TRM_vZM.zip>
+参考于 2011-03-05.
+
+OMAP 35xx TRM:
+<URL:http://www.ti.com/litv/pdf/spruf98o> 参考于 2011-03-05.
+
+OMAP 3630 TRM:
+<URL:http://focus.ti.com/pdfs/wtbu/OMAP36xx_ES1.x_PUBLIC_TRM_vQ.zip>
+参考于 2011-03-05.
+
+DM 3730 TRM:
+<URL:http://www.ti.com/litv/pdf/sprugn4h> 参考于 2011-03-06.
+
+
+参考资料
+=======
+
+[1] include/linux/omap3isp.h
+
+[2] http://git.ideasonboard.org/?p=media-ctl.git;a=summary
diff --git a/Documentation/zh_CN/video4linux/v4l2-framework.txt b/Documentation/zh_CN/video4linux/v4l2-framework.txt
new file mode 100644
index 000000000..2b828e631
--- /dev/null
+++ b/Documentation/zh_CN/video4linux/v4l2-framework.txt
@@ -0,0 +1,976 @@
+Chinese translated version of Documentation/video4linux/v4l2-framework.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Mauro Carvalho Chehab <mchehab@infradead.org>
+Chinese maintainer: Fu Wei <tekkamanninja@gmail.com>
+---------------------------------------------------------------------
+Documentation/video4linux/v4l2-framework.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+英文版维护者: Mauro Carvalho Chehab <mchehab@infradead.org>
+中文版维护者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版翻译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+中文版校译者: 傅炜 Fu Wei <tekkamanninja@gmail.com>
+
+
+以下为正文
+---------------------------------------------------------------------
+V4L2 驱动框架概览
+==============
+
+本文档描述 V4L2 框架所提供的各种结构和它们之间的关系。
+
+
+介绍
+----
+
+大部分现代 V4L2 设备由多个 IC 组成,在 /dev 下导出多个设备节点,
+并同时创建非 V4L2 设备(如 DVB、ALSA、FB、I2C 和红外输入设备)。
+由于这种硬件的复杂性,V4L2 驱动也变得非常复杂。
+
+尤其是 V4L2 必须支持 IC 实现音视频的多路复用和编解码,这就更增加了其
+复杂性。通常这些 IC 通过一个或多个 I2C 总线连接到主桥驱动器,但也可
+使用其他总线。这些设备称为“子设备”。
+
+长期以来,这个框架仅限于通过 video_device 结构体创建 V4L 设备节点,
+并使用 video_buf 处理视频缓冲(注:本文不讨论 video_buf 框架)。
+
+这意味着所有驱动必须自己设置设备实例并连接到子设备。其中一部分要正确地
+完成是比较复杂的,使得许多驱动都没有正确地实现。
+
+由于框架的缺失,有很多通用代码都不可重复利用。
+
+因此,这个框架构建所有驱动都需要的基本结构块,而统一的框架将使通用代码
+创建成实用函数并在所有驱动中共享变得更加容易。
+
+
+驱动结构
+-------
+
+所有 V4L2 驱动都有如下结构:
+
+1) 每个设备实例的结构体--包含其设备状态。
+
+2) 初始化和控制子设备的方法(如果有)。
+
+3) 创建 V4L2 设备节点 (/dev/videoX、/dev/vbiX 和 /dev/radioX)
+ 并跟踪设备节点的特定数据。
+
+4) 特定文件句柄结构体--包含每个文件句柄的数据。
+
+5) 视频缓冲处理。
+
+以下是它们的初略关系图:
+
+ device instances(设备实例)
+ |
+ +-sub-device instances(子设备实例)
+ |
+ \-V4L2 device nodes(V4L2 设备节点)
+ |
+ \-filehandle instances(文件句柄实例)
+
+
+框架结构
+-------
+
+该框架非常类似驱动结构:它有一个 v4l2_device 结构用于保存设备
+实例的数据;一个 v4l2_subdev 结构体代表子设备实例;video_device
+结构体保存 V4L2 设备节点的数据;将来 v4l2_fh 结构体将跟踪文件句柄
+实例(暂未尚未实现)。
+
+V4L2 框架也可与媒体框架整合(可选的)。如果驱动设置了 v4l2_device
+结构体的 mdev 域,子设备和视频节点的入口将自动出现在媒体框架中。
+
+
+v4l2_device 结构体
+----------------
+
+每个设备实例都通过 v4l2_device (v4l2-device.h)结构体来表示。
+简单设备可以仅分配这个结构体,但在大多数情况下,都会将这个结构体
+嵌入到一个更大的结构体中。
+
+你必须注册这个设备实例:
+
+ v4l2_device_register(struct device *dev, struct v4l2_device *v4l2_dev);
+
+注册操作将会初始化 v4l2_device 结构体。如果 dev->driver_data 域
+为 NULL,就将其指向 v4l2_dev。
+
+需要与媒体框架整合的驱动必须手动设置 dev->driver_data,指向包含
+v4l2_device 结构体实例的驱动特定设备结构体。这可以在注册 V4L2 设备
+实例前通过 dev_set_drvdata() 函数完成。同时必须设置 v4l2_device
+结构体的 mdev 域,指向适当的初始化并注册过的 media_device 实例。
+
+如果 v4l2_dev->name 为空,则它将被设置为从 dev 中衍生出的值(为了
+更加精确,形式为驱动名后跟 bus_id)。如果你在调用 v4l2_device_register
+前已经设置好了,则不会被修改。如果 dev 为 NULL,则你*必须*在调用
+v4l2_device_register 前设置 v4l2_dev->name。
+
+你可以基于驱动名和驱动的全局 atomic_t 类型的实例编号,通过
+v4l2_device_set_name() 设置 name。这样会生成类似 ivtv0、ivtv1 等
+名字。若驱动名以数字结尾,则会在编号和驱动名间插入一个破折号,如:
+cx18-0、cx18-1 等。此函数返回实例编号。
+
+第一个 “dev” 参数通常是一个指向 pci_dev、usb_interface 或
+platform_device 的指针。很少使其为 NULL,除非是一个ISA设备或者
+当一个设备创建了多个 PCI 设备,使得 v4l2_dev 无法与一个特定的父设备
+关联。
+
+你也可以提供一个 notify() 回调,使子设备可以调用它实现事件通知。
+但这个设置与子设备相关。子设备支持的任何通知必须在
+include/media/<subdevice>.h 中定义一个消息头。
+
+注销 v4l2_device 使用如下函数:
+
+ v4l2_device_unregister(struct v4l2_device *v4l2_dev);
+
+如果 dev->driver_data 域指向 v4l2_dev,将会被重置为 NULL。注销同时
+会自动从设备中注销所有子设备。
+
+如果你有一个热插拔设备(如USB设备),则当断开发生时,父设备将无效。
+由于 v4l2_device 有一个指向父设备的指针必须被清除,同时标志父设备
+已消失,所以必须调用以下函数:
+
+ v4l2_device_disconnect(struct v4l2_device *v4l2_dev);
+
+这个函数并*不*注销子设备,因此你依然要调用 v4l2_device_unregister()
+函数。如果你的驱动器并非热插拔的,就没有必要调用 v4l2_device_disconnect()。
+
+有时你需要遍历所有被特定驱动注册的设备。这通常发生在多个设备驱动使用
+同一个硬件的情况下。如:ivtvfb 驱动是一个使用 ivtv 硬件的帧缓冲驱动,
+同时 alsa 驱动也使用此硬件。
+
+你可以使用如下例程遍历所有注册的设备:
+
+static int callback(struct device *dev, void *p)
+{
+ struct v4l2_device *v4l2_dev = dev_get_drvdata(dev);
+
+ /* 测试这个设备是否已经初始化 */
+ if (v4l2_dev == NULL)
+ return 0;
+ ...
+ return 0;
+}
+
+int iterate(void *p)
+{
+ struct device_driver *drv;
+ int err;
+
+ /* 在PCI 总线上查找ivtv驱动。
+ pci_bus_type是全局的. 对于USB总线使用usb_bus_type。 */
+ drv = driver_find("ivtv", &pci_bus_type);
+ /* 遍历所有的ivtv设备实例 */
+ err = driver_for_each_device(drv, NULL, p, callback);
+ put_driver(drv);
+ return err;
+}
+
+有时你需要一个设备实例的运行计数。这个通常用于映射一个设备实例到一个
+模块选择数组的索引。
+
+推荐方法如下:
+
+static atomic_t drv_instance = ATOMIC_INIT(0);
+
+static int drv_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
+{
+ ...
+ state->instance = atomic_inc_return(&drv_instance) - 1;
+}
+
+如果你有多个设备节点,对于热插拔设备,知道何时注销 v4l2_device 结构体
+就比较困难。为此 v4l2_device 有引用计数支持。当调用 video_register_device
+时增加引用计数,而设备节点释放时减小引用计数。当引用计数为零,则
+v4l2_device 的release() 回调将被执行。你就可以在此时做最后的清理工作。
+
+如果创建了其他设备节点(比如 ALSA),则你可以通过以下函数手动增减
+引用计数:
+
+void v4l2_device_get(struct v4l2_device *v4l2_dev);
+
+或:
+
+int v4l2_device_put(struct v4l2_device *v4l2_dev);
+
+由于引用技术初始化为 1 ,你也需要在 disconnect() 回调(对于 USB 设备)中
+调用 v4l2_device_put,或者 remove() 回调(例如对于 PCI 设备),否则
+引用计数将永远不会为 0 。
+
+v4l2_subdev结构体
+------------------
+
+许多驱动需要与子设备通信。这些设备可以完成各种任务,但通常他们负责
+音视频复用和编解码。如网络摄像头的子设备通常是传感器和摄像头控制器。
+
+这些一般为 I2C 接口设备,但并不一定都是。为了给驱动提供调用子设备的
+统一接口,v4l2_subdev 结构体(v4l2-subdev.h)产生了。
+
+每个子设备驱动都必须有一个 v4l2_subdev 结构体。这个结构体可以单独
+代表一个简单的子设备,也可以嵌入到一个更大的结构体中,与更多设备状态
+信息保存在一起。通常有一个下级设备结构体(比如:i2c_client)包含了
+内核创建的设备数据。建议使用 v4l2_set_subdevdata() 将这个结构体的
+指针保存在 v4l2_subdev 的私有数据域(dev_priv)中。这使得通过 v4l2_subdev
+找到实际的低层总线特定设备数据变得容易。
+
+你同时需要一个从低层结构体获取 v4l2_subdev 指针的方法。对于常用的
+i2c_client 结构体,i2c_set_clientdata() 函数可用于保存一个 v4l2_subdev
+指针;对于其他总线你可能需要使用其他相关函数。
+
+桥驱动中也应保存每个子设备的私有数据,比如一个指向特定桥的各设备私有
+数据的指针。为此 v4l2_subdev 结构体提供主机私有数据域(host_priv),
+并可通过 v4l2_get_subdev_hostdata() 和 v4l2_set_subdev_hostdata()
+访问。
+
+从总线桥驱动的视角,驱动加载子设备模块并以某种方式获得 v4l2_subdev
+结构体指针。对于 i2c 总线设备相对简单:调用 i2c_get_clientdata()。
+对于其他总线也需要做类似的操作。针对 I2C 总线上的子设备辅助函数帮你
+完成了大部分复杂的工作。
+
+每个 v4l2_subdev 都包含子设备驱动需要实现的函数指针(如果对此设备
+不适用,可为NULL)。由于子设备可完成许多不同的工作,而在一个庞大的
+函数指针结构体中通常仅有少数有用的函数实现其功能肯定不合适。所以,
+函数指针根据其实现的功能被分类,每一类都有自己的函数指针结构体。
+
+顶层函数指针结构体包含了指向各类函数指针结构体的指针,如果子设备驱动
+不支持该类函数中的任何一个功能,则指向该类结构体的指针为NULL。
+
+这些结构体定义如下:
+
+struct v4l2_subdev_core_ops {
+ int (*log_status)(struct v4l2_subdev *sd);
+ int (*init)(struct v4l2_subdev *sd, u32 val);
+ ...
+};
+
+struct v4l2_subdev_tuner_ops {
+ ...
+};
+
+struct v4l2_subdev_audio_ops {
+ ...
+};
+
+struct v4l2_subdev_video_ops {
+ ...
+};
+
+struct v4l2_subdev_pad_ops {
+ ...
+};
+
+struct v4l2_subdev_ops {
+ const struct v4l2_subdev_core_ops *core;
+ const struct v4l2_subdev_tuner_ops *tuner;
+ const struct v4l2_subdev_audio_ops *audio;
+ const struct v4l2_subdev_video_ops *video;
+ const struct v4l2_subdev_pad_ops *video;
+};
+
+其中 core(核心)函数集通常可用于所有子设备,其他类别的实现依赖于
+子设备。如视频设备可能不支持音频操作函数,反之亦然。
+
+这样的设置在限制了函数指针数量的同时,还使增加新的操作函数和分类
+变得较为容易。
+
+子设备驱动可使用如下函数初始化 v4l2_subdev 结构体:
+
+ v4l2_subdev_init(sd, &ops);
+
+然后,你必须用一个唯一的名字初始化 subdev->name,并初始化模块的
+owner 域。若使用 i2c 辅助函数,这些都会帮你处理好。
+
+若需同媒体框架整合,你必须调用 media_entity_init() 初始化 v4l2_subdev
+结构体中的 media_entity 结构体(entity 域):
+
+ struct media_pad *pads = &my_sd->pads;
+ int err;
+
+ err = media_entity_init(&sd->entity, npads, pads, 0);
+
+pads 数组必须预先初始化。无须手动设置 media_entity 的 type 和
+name 域,但如有必要,revision 域必须初始化。
+
+当(任何)子设备节点被打开/关闭,对 entity 的引用将被自动获取/释放。
+
+在子设备被注销之后,不要忘记清理 media_entity 结构体:
+
+ media_entity_cleanup(&sd->entity);
+
+如果子设备驱动趋向于处理视频并整合进了媒体框架,必须使用 v4l2_subdev_pad_ops
+替代 v4l2_subdev_video_ops 实现格式相关的功能。
+
+这种情况下,子设备驱动应该设置 link_validate 域,以提供它自身的链接
+验证函数。链接验证函数应对管道(两端链接的都是 V4L2 子设备)中的每个
+链接调用。驱动还要负责验证子设备和视频节点间格式配置的正确性。
+
+如果 link_validate 操作没有设置,默认的 v4l2_subdev_link_validate_default()
+函数将会被调用。这个函数保证宽、高和媒体总线像素格式在链接的收发两端
+都一致。子设备驱动除了它们自己的检测外,也可以自由使用这个函数以执行
+上面提到的检查。
+
+设备(桥)驱动程序必须向 v4l2_device 注册 v4l2_subdev:
+
+ int err = v4l2_device_register_subdev(v4l2_dev, sd);
+
+如果子设备模块在它注册前消失,这个操作可能失败。在这个函数成功返回后,
+subdev->dev 域就指向了 v4l2_device。
+
+如果 v4l2_device 父设备的 mdev 域为非 NULL 值,则子设备实体将被自动
+注册为媒体设备。
+
+注销子设备则可用如下函数:
+
+ v4l2_device_unregister_subdev(sd);
+
+此后,子设备模块就可卸载,且 sd->dev == NULL。
+
+注册之设备后,可通过以下方式直接调用其操作函数:
+
+ err = sd->ops->core->g_std(sd, &norm);
+
+但使用如下宏会比较容易且合适:
+
+ err = v4l2_subdev_call(sd, core, g_std, &norm);
+
+这个宏将会做 NULL 指针检查,如果 subdev 为 NULL,则返回-ENODEV;如果
+subdev->core 或 subdev->core->g_std 为 NULL,则返回 -ENOIOCTLCMD;
+否则将返回 subdev->ops->core->g_std ops 调用的实际结果。
+
+有时也可能同时调用所有或一系列子设备的某个操作函数:
+
+ v4l2_device_call_all(v4l2_dev, 0, core, g_std, &norm);
+
+任何不支持此操作的子设备都会被跳过,并忽略错误返回值。但如果你需要
+检查出错码,则可使用如下函数:
+
+ err = v4l2_device_call_until_err(v4l2_dev, 0, core, g_std, &norm);
+
+除 -ENOIOCTLCMD 外的任何错误都会跳出循环并返回错误值。如果(除 -ENOIOCTLCMD
+外)没有错误发生,则返回 0。
+
+对于以上两个函数的第二个参数为组 ID。如果为 0,则所有子设备都会执行
+这个操作。如果为非 0 值,则只有那些组 ID 匹配的子设备才会执行此操作。
+在桥驱动注册一个子设备前,可以设置 sd->grp_id 为任何期望值(默认值为
+0)。这个值属于桥驱动,且子设备驱动将不会修改和使用它。
+
+组 ID 赋予了桥驱动更多对于如何调用回调的控制。例如,电路板上有多个
+音频芯片,每个都有改变音量的能力。但当用户想要改变音量的时候,通常
+只有一个会被实际使用。你可以对这样的子设备设置组 ID 为(例如 AUDIO_CONTROLLER)
+并在调用 v4l2_device_call_all() 时指定它为组 ID 值。这就保证了只有
+需要的子设备才会执行这个回调。
+
+如果子设备需要通知它的 v4l2_device 父设备一个事件,可以调用
+v4l2_subdev_notify(sd, notification, arg)。这个宏检查是否有一个
+notify() 回调被注册,如果没有,返回 -ENODEV。否则返回 notify() 调用
+结果。
+
+使用 v4l2_subdev 的好处在于它是一个通用结构体,且不包含任何底层硬件
+信息。所有驱动可以包含多个 I2C 总线的子设备,但也有子设备是通过 GPIO
+控制。这个区别仅在配置设备时有关系,一旦子设备注册完成,对于 v4l2
+子系统来说就完全透明了。
+
+
+V4L2 子设备用户空间API
+--------------------
+
+除了通过 v4l2_subdev_ops 结构导出的内核 API,V4L2 子设备也可以直接
+通过用户空间应用程序来控制。
+
+可以在 /dev 中创建名为 v4l-subdevX 设备节点,以通过其直接访问子设备。
+如果子设备支持用户空间直接配置,必须在注册前设置 V4L2_SUBDEV_FL_HAS_DEVNODE
+标志。
+
+注册子设备之后, v4l2_device 驱动会通过调用 v4l2_device_register_subdev_nodes()
+函数为所有已注册并设置了 V4L2_SUBDEV_FL_HAS_DEVNODE 的子设备创建
+设备节点。这些设备节点会在子设备注销时自动删除。
+
+这些设备节点处理 V4L2 API 的一个子集。
+
+VIDIOC_QUERYCTRL
+VIDIOC_QUERYMENU
+VIDIOC_G_CTRL
+VIDIOC_S_CTRL
+VIDIOC_G_EXT_CTRLS
+VIDIOC_S_EXT_CTRLS
+VIDIOC_TRY_EXT_CTRLS
+
+ 这些 ioctls 控制与 V4L2 中定义的一致。他们行为相同,唯一的
+ 不同是他们只处理子设备的控制实现。根据驱动程序,这些控制也
+ 可以通过一个(或多个) V4L2 设备节点访问。
+
+VIDIOC_DQEVENT
+VIDIOC_SUBSCRIBE_EVENT
+VIDIOC_UNSUBSCRIBE_EVENT
+
+ 这些 ioctls 事件与 V4L2 中定义的一致。他们行为相同,唯一的
+ 不同是他们只处理子设备产生的事件。根据驱动程序,这些事件也
+ 可以通过一个(或多个) V4L2 设备节点上报。
+
+ 要使用事件通知的子设备驱动,在注册子设备前必须在 v4l2_subdev::flags
+ 中设置 V4L2_SUBDEV_USES_EVENTS 并在 v4l2_subdev::nevents
+ 中初始化事件队列深度。注册完成后,事件会在 v4l2_subdev::devnode
+ 设备节点中像通常一样被排队。
+
+ 为正确支持事件机制,poll() 文件操作也应被实现。
+
+私有 ioctls
+
+ 不在以上列表中的所有 ioctls 会通过 core::ioctl 操作直接传递
+ 给子设备驱动。
+
+
+I2C 子设备驱动
+-------------
+
+由于这些驱动很常见,所以内特提供了特定的辅助函数(v4l2-common.h)让这些
+设备的使用更加容易。
+
+添加 v4l2_subdev 支持的推荐方法是让 I2C 驱动将 v4l2_subdev 结构体
+嵌入到为每个 I2C 设备实例创建的状态结构体中。而最简单的设备没有状态
+结构体,此时可以直接创建一个 v4l2_subdev 结构体。
+
+一个典型的状态结构体如下所示(‘chipname’用芯片名代替):
+
+struct chipname_state {
+ struct v4l2_subdev sd;
+ ... /* 附加的状态域*/
+};
+
+初始化 v4l2_subdev 结构体的方法如下:
+
+ v4l2_i2c_subdev_init(&state->sd, client, subdev_ops);
+
+这个函数将填充 v4l2_subdev 结构体中的所有域,并保证 v4l2_subdev 和
+i2c_client 都指向彼此。
+
+同时,你也应该为从 v4l2_subdev 指针找到 chipname_state 结构体指针
+添加一个辅助内联函数。
+
+static inline struct chipname_state *to_state(struct v4l2_subdev *sd)
+{
+ return container_of(sd, struct chipname_state, sd);
+}
+
+使用以下函数可以通过 v4l2_subdev 结构体指针获得 i2c_client 结构体
+指针:
+
+ struct i2c_client *client = v4l2_get_subdevdata(sd);
+
+而以下函数则相反,通过 i2c_client 结构体指针获得 v4l2_subdev 结构体
+指针:
+
+ struct v4l2_subdev *sd = i2c_get_clientdata(client);
+
+当 remove()函数被调用前,必须保证先调用 v4l2_device_unregister_subdev(sd)。
+此操作将会从桥驱动中注销子设备。即使子设备没有注册,调用此函数也是
+安全的。
+
+必须这样做的原因是:当桥驱动注销 i2c 适配器时,remove()回调函数
+会被那个适配器上的 i2c 设备调用。此后,相应的 v4l2_subdev 结构体
+就不存在了,所有它们必须先被注销。在 remove()回调函数中调用
+v4l2_device_unregister_subdev(sd),可以保证执行总是正确的。
+
+
+桥驱动也有一些辅组函数可用:
+
+struct v4l2_subdev *sd = v4l2_i2c_new_subdev(v4l2_dev, adapter,
+ "module_foo", "chipid", 0x36, NULL);
+
+这个函数会加载给定的模块(如果没有模块需要加载,可以为 NULL),
+并用给定的 i2c 适配器结构体指针(i2c_adapter)和 器件地址(chip/address)
+作为参数调用 i2c_new_device()。如果一切顺利,则就在 v4l2_device
+中注册了子设备。
+
+你也可以利用 v4l2_i2c_new_subdev()的最后一个参数,传递一个可能的
+I2C 地址数组,让函数自动探测。这些探测地址只有在前一个参数为 0 的
+情况下使用。非零参数意味着你知道准确的 i2c 地址,所以此时无须进行
+探测。
+
+如果出错,两个函数都返回 NULL。
+
+注意:传递给 v4l2_i2c_new_subdev()的 chipid 通常与模块名一致。
+它允许你指定一个芯片的变体,比如“saa7114”或“saa7115”。一般通过
+i2c 驱动自动探测。chipid 的使用是在今后需要深入了解的事情。这个与
+i2c 驱动不同,较容易混淆。要知道支持哪些芯片变体,你可以查阅 i2c
+驱动代码的 i2c_device_id 表,上面列出了所有可能支持的芯片。
+
+还有两个辅助函数:
+
+v4l2_i2c_new_subdev_cfg:这个函数添加新的 irq 和 platform_data
+参数,并有‘addr’和‘probed_addrs’参数:如果 addr 非零,则被使用
+(不探测变体),否则 probed_addrs 中的地址将用于自动探测。
+
+例如:以下代码将会探测地址(0x10):
+
+struct v4l2_subdev *sd = v4l2_i2c_new_subdev_cfg(v4l2_dev, adapter,
+ "module_foo", "chipid", 0, NULL, 0, I2C_ADDRS(0x10));
+
+v4l2_i2c_new_subdev_board 使用一个 i2c_board_info 结构体,将其
+替代 irq、platform_data 和 add r参数传递给 i2c 驱动。
+
+如果子设备支持 s_config 核心操作,这个操作会在子设备配置好之后以 irq 和
+platform_data 为参数调用。早期的 v4l2_i2c_new_(probed_)subdev 函数
+同样也会调用 s_config,但仅在 irq 为 0 且 platform_data 为 NULL 时。
+
+video_device结构体
+-----------------
+
+在 /dev 目录下的实际设备节点根据 video_device 结构体(v4l2-dev.h)
+创建。此结构体既可以动态分配也可以嵌入到一个更大的结构体中。
+
+动态分配方法如下:
+
+ struct video_device *vdev = video_device_alloc();
+
+ if (vdev == NULL)
+ return -ENOMEM;
+
+ vdev->release = video_device_release;
+
+如果将其嵌入到一个大结构体中,则必须自己实现 release()回调。
+
+ struct video_device *vdev = &my_vdev->vdev;
+
+ vdev->release = my_vdev_release;
+
+release()回调必须被设置,且在最后一个 video_device 用户退出之后
+被调用。
+
+默认的 video_device_release()回调只是调用 kfree 来释放之前分配的
+内存。
+
+你应该设置这些域:
+
+- v4l2_dev: 设置为 v4l2_device 父设备。
+
+- name: 设置为唯一的描述性设备名。
+
+- fops: 设置为已有的 v4l2_file_operations 结构体。
+
+- ioctl_ops: 如果你使用v4l2_ioctl_ops 来简化 ioctl 的维护
+ (强烈建议使用,且将来可能变为强制性的!),然后设置你自己的
+ v4l2_ioctl_ops 结构体.
+
+- lock: 如果你要在驱动中实现所有的锁操作,则设为 NULL 。否则
+ 就要设置一个指向 struct mutex_lock 结构体的指针,这个锁将
+ 在 unlocked_ioctl 文件操作被调用前由内核获得,并在调用返回后
+ 释放。详见下一节。
+
+- prio: 保持对优先级的跟踪。用于实现 VIDIOC_G/S_PRIORITY。如果
+ 设置为 NULL,则会使用 v4l2_device 中的 v4l2_prio_state 结构体。
+ 如果要对每个设备节点(组)实现独立的优先级,可以将其指向自己
+ 实现的 v4l2_prio_state 结构体。
+
+- parent: 仅在使用 NULL 作为父设备结构体参数注册 v4l2_device 时
+ 设置此参数。只有在一个硬件设备包含多一个 PCI 设备,共享同一个
+ v4l2_device 核心时才会发生。
+
+ cx88 驱动就是一个例子:一个 v4l2_device 结构体核心,被一个裸的
+ 视频 PCI 设备(cx8800)和一个 MPEG PCI 设备(cx8802)共用。由于
+ v4l2_device 无法与特定的 PCI 设备关联,所有没有设置父设备。但当
+ video_device 配置后,就知道使用哪个父 PCI 设备了。
+
+如果你使用 v4l2_ioctl_ops,则应该在 v4l2_file_operations 结构体中
+设置 .unlocked_ioctl 指向 video_ioctl2。
+
+请勿使用 .ioctl!它已被废弃,今后将消失。
+
+某些情况下你要告诉核心:你在 v4l2_ioctl_ops 指定的某个函数应被忽略。
+你可以在 video_device_register 被调用前通过以下函数标记这个 ioctls。
+
+void v4l2_disable_ioctl(struct video_device *vdev, unsigned int cmd);
+
+基于外部因素(例如某个板卡已被使用),在不创建新结构体的情况下,你想
+要关闭 v4l2_ioctl_ops 中某个特性往往需要这个机制。
+
+v4l2_file_operations 结构体是 file_operations 的一个子集。其主要
+区别在于:因 inode 参数从未被使用,它将被忽略。
+
+如果需要与媒体框架整合,你必须通过调用 media_entity_init() 初始化
+嵌入在 video_device 结构体中的 media_entity(entity 域)结构体:
+
+ struct media_pad *pad = &my_vdev->pad;
+ int err;
+
+ err = media_entity_init(&vdev->entity, 1, pad, 0);
+
+pads 数组必须预先初始化。没有必要手动设置 media_entity 的 type 和
+name 域。
+
+当(任何)子设备节点被打开/关闭,对 entity 的引用将被自动获取/释放。
+
+v4l2_file_operations 与锁
+--------------------------
+
+你可以在 video_device 结构体中设置一个指向 mutex_lock 的指针。通常
+这既可是一个顶层互斥锁也可为设备节点自身的互斥锁。默认情况下,此锁
+用于 unlocked_ioctl,但为了使用 ioctls 你通过以下函数可禁用锁定:
+
+ void v4l2_disable_ioctl_locking(struct video_device *vdev, unsigned int cmd);
+
+例如: v4l2_disable_ioctl_locking(vdev, VIDIOC_DQBUF);
+
+你必须在注册 video_device 前调用这个函数。
+
+特别是对于 USB 驱动程序,某些命令(如设置控制)需要很长的时间,可能
+需要自行为缓冲区队列的 ioctls 实现锁定。
+
+如果你需要更细粒度的锁,你必须设置 mutex_lock 为 NULL,并完全自己实现
+锁机制。
+
+这完全由驱动开发者决定使用何种方法。然而,如果你的驱动存在长延时操作
+(例如,改变 USB 摄像头的曝光时间可能需要较长时间),而你又想让用户
+在等待长延时操作完成期间做其他的事,则你最好自己实现锁机制。
+
+如果指定一个锁,则所有 ioctl 操作将在这个锁的作用下串行执行。如果你
+使用 videobuf,则必须将同一个锁传递给 videobuf 队列初始化函数;如
+videobuf 必须等待一帧的到达,则可临时解锁并在这之后重新上锁。如果驱动
+也在代码执行期间等待,则可做同样的工作(临时解锁,再上锁)让其他进程
+可以在第一个进程阻塞时访问设备节点。
+
+在使用 videobuf2 的情况下,必须实现 wait_prepare 和 wait_finish 回调
+在适当的时候解锁/加锁。进一步来说,如果你在 video_device 结构体中使用
+锁,则必须在 wait_prepare 和 wait_finish 中对这个互斥锁进行解锁/加锁。
+
+热插拔的断开实现也必须在调用 v4l2_device_disconnect 前获得锁。
+
+video_device注册
+---------------
+
+接下来你需要注册视频设备:这会为你创建一个字符设备。
+
+ err = video_register_device(vdev, VFL_TYPE_GRABBER, -1);
+ if (err) {
+ video_device_release(vdev); /* or kfree(my_vdev); */
+ return err;
+ }
+
+如果 v4l2_device 父设备的 mdev 域为非 NULL 值,视频设备实体将自动
+注册为媒体设备。
+
+注册哪种设备是根据类型(type)参数。存在以下类型:
+
+VFL_TYPE_GRABBER: 用于视频输入/输出设备的 videoX
+VFL_TYPE_VBI: 用于垂直消隐数据的 vbiX (例如,隐藏式字幕,图文电视)
+VFL_TYPE_RADIO: 用于广播调谐器的 radioX
+
+最后一个参数让你确定一个所控制设备的设备节点号数量(例如 videoX 中的 X)。
+通常你可以传入-1,让 v4l2 框架自己选择第一个空闲的编号。但是有时用户
+需要选择一个特定的节点号。驱动允许用户通过驱动模块参数选择一个特定的
+设备节点号是很普遍的。这个编号将会传递给这个函数,且 video_register_device
+将会试图选择这个设备节点号。如果这个编号被占用,下一个空闲的设备节点
+编号将被选中,并向内核日志中发送一个警告信息。
+
+另一个使用场景是当驱动创建多个设备时。这种情况下,对不同的视频设备在
+编号上使用不同的范围是很有用的。例如,视频捕获设备从 0 开始,视频
+输出设备从 16 开始。所以你可以使用最后一个参数来指定设备节点号最小值,
+而 v4l2 框架会试图选择第一个的空闲编号(等于或大于你提供的编号)。
+如果失败,则它会就选择第一个空闲的编号。
+
+由于这种情况下,你会忽略无法选择特定设备节点号的警告,则可调用
+video_register_device_no_warn() 函数避免警告信息的产生。
+
+只要设备节点被创建,一些属性也会同时创建。在 /sys/class/video4linux
+目录中你会找到这些设备。例如进入其中的 video0 目录,你会看到‘name’和
+‘index’属性。‘name’属性值就是 video_device 结构体中的‘name’域。
+
+‘index’属性值就是设备节点的索引值:每次调用 video_register_device(),
+索引值都递增 1 。第一个视频设备节点总是从索引值 0 开始。
+
+用户可以设置 udev 规则,利用索引属性生成花哨的设备名(例如:用‘mpegX’
+代表 MPEG 视频捕获设备节点)。
+
+在设备成功注册后,就可以使用这些域:
+
+- vfl_type: 传递给 video_register_device 的设备类型。
+- minor: 已指派的次设备号。
+- num: 设备节点编号 (例如 videoX 中的 X)。
+- index: 设备索引号。
+
+如果注册失败,你必须调用 video_device_release() 来释放已分配的
+video_device 结构体;如果 video_device 是嵌入在自己创建的结构体中,
+你也必须释放它。vdev->release() 回调不会在注册失败之后被调用,
+你也不应试图在注册失败后注销设备。
+
+
+video_device 注销
+----------------
+
+当视频设备节点已被移除,不论是卸载驱动还是USB设备断开,你都应注销
+它们:
+
+ video_unregister_device(vdev);
+
+这个操作将从 sysfs 中移除设备节点(导致 udev 将其从 /dev 中移除)。
+
+video_unregister_device() 返回之后,就无法完成打开操作。尽管如此,
+USB 设备的情况则不同,某些应用程序可能依然打开着其中一个已注销设备
+节点。所以在注销之后,所有文件操作(当然除了 release )也应返回错误值。
+
+当最后一个视频设备节点的用户退出,则 vdev->release() 回调会被调用,
+并且你可以做最后的清理操作。
+
+不要忘记清理与视频设备相关的媒体入口(如果被初始化过):
+
+ media_entity_cleanup(&vdev->entity);
+
+这可以在 release 回调中完成。
+
+
+video_device 辅助函数
+---------------------
+
+一些有用的辅助函数如下:
+
+- file/video_device 私有数据
+
+你可以用以下函数在 video_device 结构体中设置/获取驱动私有数据:
+
+void *video_get_drvdata(struct video_device *vdev);
+void video_set_drvdata(struct video_device *vdev, void *data);
+
+注意:在调用 video_register_device() 前执行 video_set_drvdata()
+是安全的。
+
+而以下函数:
+
+struct video_device *video_devdata(struct file *file);
+
+返回 file 结构体中拥有的的 video_device 指针。
+
+video_drvdata 辅助函数结合了 video_get_drvdata 和 video_devdata
+的功能:
+
+void *video_drvdata(struct file *file);
+
+你可以使用如下代码从 video_device 结构体中获取 v4l2_device 结构体
+指针:
+
+struct v4l2_device *v4l2_dev = vdev->v4l2_dev;
+
+- 设备节点名
+
+video_device 设备节点在内核中的名称可以通过以下函数获得
+
+const char *video_device_node_name(struct video_device *vdev);
+
+这个名字被用户空间工具(例如 udev)作为提示信息使用。应尽可能使用
+此功能,而非访问 video_device::num 和 video_device::minor 域。
+
+
+视频缓冲辅助函数
+---------------
+
+v4l2 核心 API 提供了一个处理视频缓冲的标准方法(称为“videobuf”)。
+这些方法使驱动可以通过统一的方式实现 read()、mmap() 和 overlay()。
+目前在设备上支持视频缓冲的方法有分散/聚集 DMA(videobuf-dma-sg)、
+线性 DMA(videobuf-dma-contig)以及大多用于 USB 设备的用 vmalloc
+分配的缓冲(videobuf-vmalloc)。
+
+请参阅 Documentation/video4linux/videobuf,以获得更多关于 videobuf
+层的使用信息。
+
+v4l2_fh 结构体
+-------------
+
+v4l2_fh 结构体提供一个保存用于 V4L2 框架的文件句柄特定数据的简单方法。
+如果 video_device 标志,新驱动
+必须使用 v4l2_fh 结构体,因为它也用于实现优先级处理(VIDIOC_G/S_PRIORITY)。
+
+v4l2_fh 的用户(位于 V4l2 框架中,并非驱动)可通过测试
+video_device->flags 中的 V4L2_FL_USES_V4L2_FH 位得知驱动是否使用
+v4l2_fh 作为他的 file->private_data 指针。这个位会在调用 v4l2_fh_init()
+时被设置。
+
+v4l2_fh 结构体作为驱动自身文件句柄结构体的一部分被分配,且驱动在
+其打开函数中将 file->private_data 指向它。
+
+在许多情况下,v4l2_fh 结构体会嵌入到一个更大的结构体中。这钟情况下,
+应该在 open() 中调用 v4l2_fh_init+v4l2_fh_add,并在 release() 中
+调用 v4l2_fh_del+v4l2_fh_exit。
+
+驱动可以通过使用 container_of 宏提取他们自己的文件句柄结构体。例如:
+
+struct my_fh {
+ int blah;
+ struct v4l2_fh fh;
+};
+
+...
+
+int my_open(struct file *file)
+{
+ struct my_fh *my_fh;
+ struct video_device *vfd;
+ int ret;
+
+ ...
+
+ my_fh = kzalloc(sizeof(*my_fh), GFP_KERNEL);
+
+ ...
+
+ v4l2_fh_init(&my_fh->fh, vfd);
+
+ ...
+
+ file->private_data = &my_fh->fh;
+ v4l2_fh_add(&my_fh->fh);
+ return 0;
+}
+
+int my_release(struct file *file)
+{
+ struct v4l2_fh *fh = file->private_data;
+ struct my_fh *my_fh = container_of(fh, struct my_fh, fh);
+
+ ...
+ v4l2_fh_del(&my_fh->fh);
+ v4l2_fh_exit(&my_fh->fh);
+ kfree(my_fh);
+ return 0;
+}
+
+以下是 v4l2_fh 函数使用的简介:
+
+void v4l2_fh_init(struct v4l2_fh *fh, struct video_device *vdev)
+
+ 初始化文件句柄。这*必须*在驱动的 v4l2_file_operations->open()
+ 函数中执行。
+
+void v4l2_fh_add(struct v4l2_fh *fh)
+
+ 添加一个 v4l2_fh 到 video_device 文件句柄列表。一旦文件句柄
+ 初始化完成就必须调用。
+
+void v4l2_fh_del(struct v4l2_fh *fh)
+
+ 从 video_device() 中解除文件句柄的关联。文件句柄的退出函数也
+ 将被调用。
+
+void v4l2_fh_exit(struct v4l2_fh *fh)
+
+ 清理文件句柄。在清理完 v4l2_fh 后,相关内存会被释放。
+
+
+如果 v4l2_fh 不是嵌入在其他结构体中的,则可以用这些辅助函数:
+
+int v4l2_fh_open(struct file *filp)
+
+ 分配一个 v4l2_fh 结构体空间,初始化并将其添加到 file 结构体相关的
+ video_device 结构体中。
+
+int v4l2_fh_release(struct file *filp)
+
+ 从 file 结构体相关的 video_device 结构体中删除 v4l2_fh ,清理
+ v4l2_fh 并释放空间。
+
+这两个函数可以插入到 v4l2_file_operation 的 open() 和 release()
+操作中。
+
+
+某些驱动需要在第一个文件句柄打开和最后一个文件句柄关闭的时候做些
+工作。所以加入了两个辅助函数以检查 v4l2_fh 结构体是否是相关设备
+节点打开的唯一文件句柄。
+
+int v4l2_fh_is_singular(struct v4l2_fh *fh)
+
+ 如果此文件句柄是唯一打开的文件句柄,则返回 1 ,否则返回 0 。
+
+int v4l2_fh_is_singular_file(struct file *filp)
+
+ 功能相同,但通过 filp->private_data 调用 v4l2_fh_is_singular。
+
+
+V4L2 事件机制
+-----------
+
+V4L2 事件机制提供了一个通用的方法将事件传递到用户空间。驱动必须使用
+v4l2_fh 才能支持 V4L2 事件机制。
+
+
+事件通过一个类型和选择 ID 来定义。ID 对应一个 V4L2 对象,例如
+一个控制 ID。如果未使用,则 ID 为 0。
+
+当用户订阅一个事件,驱动会为此分配一些 kevent 结构体。所以每个
+事件组(类型、ID)都会有自己的一套 kevent 结构体。这保证了如果
+一个驱动短时间内产生了许多同类事件,不会覆盖其他类型的事件。
+
+但如果你收到的事件数量大于同类事件 kevent 的保存数量,则最早的
+事件将被丢弃,并加入新事件。
+
+此外,v4l2_subscribed_event 结构体内部有可供驱动设置的 merge() 和
+replace() 回调,这些回调会在新事件产生且没有多余空间的时候被调用。
+replace() 回调让你可以将早期事件的净荷替换为新事件的净荷,将早期
+净荷的相关数据合并到替换进来的新净荷中。当该类型的事件仅分配了一个
+kevent 结构体时,它将被调用。merge() 回调让你可以合并最早的事件净荷
+到在它之后的那个事件净荷中。当该类型的事件分配了两个或更多 kevent
+结构体时,它将被调用。
+
+这种方法不会有状态信息丢失,只会导致中间步骤信息丢失。
+
+
+关于 replace/merge 回调的一个不错的例子在 v4l2-event.c 中:用于
+控制事件的 ctrls_replace() 和 ctrls_merge() 回调。
+
+注意:这些回调可以在中断上下文中调用,所以它们必须尽快完成并退出。
+
+有用的函数:
+
+void v4l2_event_queue(struct video_device *vdev, const struct v4l2_event *ev)
+
+ 将事件加入视频设备的队列。驱动仅负责填充 type 和 data 域。
+ 其他域由 V4L2 填充。
+
+int v4l2_event_subscribe(struct v4l2_fh *fh,
+ struct v4l2_event_subscription *sub, unsigned elems,
+ const struct v4l2_subscribed_event_ops *ops)
+
+ video_device->ioctl_ops->vidioc_subscribe_event 必须检测驱动能
+ 产生特定 id 的事件。然后调用 v4l2_event_subscribe() 来订阅该事件。
+
+ elems 参数是该事件的队列大小。若为 0,V4L2 框架将会(根据事件类型)
+ 填充默认值。
+
+ ops 参数允许驱动指定一系列回调:
+ * add: 当添加一个新监听者时调用(重复订阅同一个事件,此回调
+ 仅被执行一次)。
+ * del: 当一个监听者停止监听时调用。
+ * replace: 用‘新’事件替换‘早期‘事件。
+ * merge: 将‘早期‘事件合并到‘新’事件中。
+ 这四个调用都是可选的,如果不想指定任何回调,则 ops 可为 NULL。
+
+int v4l2_event_unsubscribe(struct v4l2_fh *fh,
+ struct v4l2_event_subscription *sub)
+
+ v4l2_ioctl_ops 结构体中的 vidioc_unsubscribe_event 回调函数。
+ 驱动程序可以直接使用 v4l2_event_unsubscribe() 实现退订事件过程。
+
+ 特殊的 V4L2_EVENT_ALL 类型,可用于退订所有事件。驱动可能在特殊
+ 情况下需要做此操作。
+
+int v4l2_event_pending(struct v4l2_fh *fh)
+
+ 返回未决事件的数量。有助于实现轮询(poll)操作。
+
+事件通过 poll 系统调用传递到用户空间。驱动可用
+v4l2_fh->wait (wait_queue_head_t 类型)作为参数调用 poll_wait()。
+
+事件分为标准事件和私有事件。新的标准事件必须使用可用的最小事件类型
+编号。驱动必须从他们本类型的编号起始处分配事件。类型的编号起始为
+V4L2_EVENT_PRIVATE_START + n * 1000 ,其中 n 为可用最小编号。每个
+类型中的第一个事件类型编号是为以后的使用保留的,所以第一个可用事件
+类型编号是‘class base + 1’。
+
+V4L2 事件机制的使用实例可以在 OMAP3 ISP 的驱动
+(drivers/media/video/omap3isp)中找到。
diff --git a/Documentation/zh_CN/volatile-considered-harmful.txt b/Documentation/zh_CN/volatile-considered-harmful.txt
new file mode 100644
index 000000000..ba8149d22
--- /dev/null
+++ b/Documentation/zh_CN/volatile-considered-harmful.txt
@@ -0,0 +1,113 @@
+Chinese translated version of Documentation/volatile-considered-harmful.txt
+
+If you have any comment or update to the content, please contact the
+original document maintainer directly. However, if you have a problem
+communicating in English you can also ask the Chinese maintainer for
+help. Contact the Chinese maintainer if this translation is outdated
+or if there is a problem with the translation.
+
+Maintainer: Jonathan Corbet <corbet@lwn.net>
+Chinese maintainer: Bryan Wu <bryan.wu@analog.com>
+---------------------------------------------------------------------
+Documentation/volatile-considered-harmful.txt 的中文翻译
+
+如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
+交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
+译存在问题,请联系中文版维护者。
+
+英文版维护者: Jonathan Corbet <corbet@lwn.net>
+中文版维护者: 伍鹏 Bryan Wu <bryan.wu@analog.com>
+中文版翻译者: 伍鹏 Bryan Wu <bryan.wu@analog.com>
+中文版校译者: 张汉辉 Eugene Teo <eugeneteo@kernel.sg>
+ 杨瑞 Dave Young <hidave.darkstar@gmail.com>
+以下为正文
+---------------------------------------------------------------------
+
+为什么不应该使用“volatile”类型
+------------------------------
+
+C程序员通常认为volatile表示某个变量可以在当前执行的线程之外被改变;因此,在内核
+中用到共享数据结构时,常常会有C程序员喜欢使用volatile这类变量。换句话说,他们经
+常会把volatile类型看成某种简易的原子变量,当然它们不是。在内核中使用volatile几
+乎总是错误的;本文档将解释为什么这样。
+
+理解volatile的关键是知道它的目的是用来消除优化,实际上很少有人真正需要这样的应
+用。在内核中,程序员必须防止意外的并发访问破坏共享的数据结构,这其实是一个完全
+不同的任务。用来防止意外并发访问的保护措施,可以更加高效的避免大多数优化相关的
+问题。
+
+像volatile一样,内核提供了很多原语来保证并发访问时的数据安全(自旋锁, 互斥量,内
+存屏障等等),同样可以防止意外的优化。如果可以正确使用这些内核原语,那么就没有
+必要再使用volatile。如果仍然必须使用volatile,那么几乎可以肯定在代码的某处有一
+个bug。在正确设计的内核代码中,volatile能带来的仅仅是使事情变慢。
+
+思考一下这段典型的内核代码:
+
+ spin_lock(&the_lock);
+ do_something_on(&shared_data);
+ do_something_else_with(&shared_data);
+ spin_unlock(&the_lock);
+
+如果所有的代码都遵循加锁规则,当持有the_lock的时候,不可能意外的改变shared_data的
+值。任何可能访问该数据的其他代码都会在这个锁上等待。自旋锁原语跟内存屏障一样—— 它
+们显式的用来书写成这样 —— 意味着数据访问不会跨越它们而被优化。所以本来编译器认为
+它知道在shared_data里面将有什么,但是因为spin_lock()调用跟内存屏障一样,会强制编
+译器忘记它所知道的一切。那么在访问这些数据时不会有优化的问题。
+
+如果shared_data被声名为volatile,锁操作将仍然是必须的。就算我们知道没有其他人正在
+使用它,编译器也将被阻止优化对临界区内shared_data的访问。在锁有效的同时,
+shared_data不是volatile的。在处理共享数据的时候,适当的锁操作可以不再需要
+volatile —— 并且是有潜在危害的。
+
+volatile的存储类型最初是为那些内存映射的I/O寄存器而定义。在内核里,寄存器访问也应
+该被锁保护,但是人们也不希望编译器“优化”临界区内的寄存器访问。内核里I/O的内存访问
+是通过访问函数完成的;不赞成通过指针对I/O内存的直接访问,并且不是在所有体系架构上
+都能工作。那些访问函数正是为了防止意外优化而写的,因此,再说一次,volatile类型不
+是必需的。
+
+另一种引起用户可能使用volatile的情况是当处理器正忙着等待一个变量的值。正确执行一
+个忙等待的方法是:
+
+ while (my_variable != what_i_want)
+ cpu_relax();
+
+cpu_relax()调用会降低CPU的能量消耗或者让位于超线程双处理器;它也作为内存屏障一样出
+现,所以,再一次,volatile不是必需的。当然,忙等待一开始就是一种反常规的做法。
+
+在内核中,一些稀少的情况下volatile仍然是有意义的:
+
+ - 在一些体系架构的系统上,允许直接的I/0内存访问,那么前面提到的访问函数可以使用
+ volatile。基本上,每一个访问函数调用它自己都是一个小的临界区域并且保证了按照
+ 程序员期望的那样发生访问操作。
+
+ - 某些会改变内存的内联汇编代码虽然没有什么其他明显的附作用,但是有被GCC删除的可
+ 能性。在汇编声明中加上volatile关键字可以防止这种删除操作。
+
+ - Jiffies变量是一种特殊情况,虽然每次引用它的时候都可以有不同的值,但读jiffies
+ 变量时不需要任何特殊的加锁保护。所以jiffies变量可以使用volatile,但是不赞成
+ 其他跟jiffies相同类型变量使用volatile。Jiffies被认为是一种“愚蠢的遗留物"
+ (Linus的话)因为解决这个问题比保持现状要麻烦的多。
+
+ - 由于某些I/0设备可能会修改连续一致的内存,所以有时,指向连续一致内存的数据结构
+ 的指针需要正确的使用volatile。网络适配器使用的环状缓存区正是这类情形的一个例
+ 子,其中适配器用改变指针来表示哪些描述符已经处理过了。
+
+对于大多代码,上述几种可以使用volatile的情况都不适用。所以,使用volatile是一种
+bug并且需要对这样的代码额外仔细检查。那些试图使用volatile的开发人员需要退一步想想
+他们真正想实现的是什么。
+
+非常欢迎删除volatile变量的补丁 - 只要证明这些补丁完整的考虑了并发问题。
+
+注释
+----
+
+[1] http://lwn.net/Articles/233481/
+[2] http://lwn.net/Articles/233482/
+
+致谢
+----
+
+最初由Randy Dunlap推动并作初步研究
+由Jonathan Corbet撰写
+参考Satyam Sharma,Johannes Stezenbach,Jesper Juhl,Heikki Orsila,
+H. Peter Anvin,Philipp Hahn和Stefan Richter的意见改善了本档。
diff --git a/Documentation/zorro.txt b/Documentation/zorro.txt
new file mode 100644
index 000000000..90a64d52b
--- /dev/null
+++ b/Documentation/zorro.txt
@@ -0,0 +1,103 @@
+ Writing Device Drivers for Zorro Devices
+ ----------------------------------------
+
+Written by Geert Uytterhoeven <geert@linux-m68k.org>
+Last revised: September 5, 2003
+
+
+1. Introduction
+---------------
+
+The Zorro bus is the bus used in the Amiga family of computers. Thanks to
+AutoConfig(tm), it's 100% Plug-and-Play.
+
+There are two types of Zorro busses, Zorro II and Zorro III:
+
+ - The Zorro II address space is 24-bit and lies within the first 16 MB of the
+ Amiga's address map.
+
+ - Zorro III is a 32-bit extension of Zorro II, which is backwards compatible
+ with Zorro II. The Zorro III address space lies outside the first 16 MB.
+
+
+2. Probing for Zorro Devices
+----------------------------
+
+Zorro devices are found by calling `zorro_find_device()', which returns a
+pointer to the `next' Zorro device with the specified Zorro ID. A probe loop
+for the board with Zorro ID `ZORRO_PROD_xxx' looks like:
+
+ struct zorro_dev *z = NULL;
+
+ while ((z = zorro_find_device(ZORRO_PROD_xxx, z))) {
+ if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE,
+ "My explanation"))
+ ...
+ }
+
+`ZORRO_WILDCARD' acts as a wildcard and finds any Zorro device. If your driver
+supports different types of boards, you can use a construct like:
+
+ struct zorro_dev *z = NULL;
+
+ while ((z = zorro_find_device(ZORRO_WILDCARD, z))) {
+ if (z->id != ZORRO_PROD_xxx1 && z->id != ZORRO_PROD_xxx2 && ...)
+ continue;
+ if (!zorro_request_region(z->resource.start+MY_START, MY_SIZE,
+ "My explanation"))
+ ...
+ }
+
+
+3. Zorro Resources
+------------------
+
+Before you can access a Zorro device's registers, you have to make sure it's
+not yet in use. This is done using the I/O memory space resource management
+functions:
+
+ request_mem_region()
+ release_mem_region()
+
+Shortcuts to claim the whole device's address space are provided as well:
+
+ zorro_request_device
+ zorro_release_device
+
+
+4. Accessing the Zorro Address Space
+------------------------------------
+
+The address regions in the Zorro device resources are Zorro bus address
+regions. Due to the identity bus-physical address mapping on the Zorro bus,
+they are CPU physical addresses as well.
+
+The treatment of these regions depends on the type of Zorro space:
+
+ - Zorro II address space is always mapped and does not have to be mapped
+ explicitly using z_ioremap().
+
+ Conversion from bus/physical Zorro II addresses to kernel virtual addresses
+ and vice versa is done using:
+
+ virt_addr = ZTWO_VADDR(bus_addr);
+ bus_addr = ZTWO_PADDR(virt_addr);
+
+ - Zorro III address space must be mapped explicitly using z_ioremap() first
+ before it can be accessed:
+
+ virt_addr = z_ioremap(bus_addr, size);
+ ...
+ z_iounmap(virt_addr);
+
+
+5. References
+-------------
+
+linux/include/linux/zorro.h
+linux/include/uapi/linux/zorro.h
+linux/include/uapi/linux/zorro_ids.h
+linux/arch/m68k/include/asm/zorro.h
+linux/drivers/zorro
+/proc/bus/zorro
+